diff --git a/configure b/configure index 67440a2257c1f7..19ec28f92f1d01 100755 --- a/configure +++ b/configure @@ -357,11 +357,63 @@ def try_check_compiler(cc, lang): return (True, is_clang, clang_version, gcc_version) +# +# The version of asm compiler is needed for building openssl asm files. +# See deps/openssl/openssl.gypi for detail. +# Commands and reglar expressions to obtain its version number is taken from +# https://github.com/openssl/openssl/blob/OpenSSL_1_0_2-stable/crypto/sha/asm/sha512-x86_64.pl#L112-L129 +# +def get_llvm_version(cc): + try: + proc = subprocess.Popen(shlex.split(cc) + ['-v'], stdin=subprocess.PIPE, + stderr=subprocess.PIPE, stdout=subprocess.PIPE) + except OSError: + print '''io.js configure error: No acceptable C compiler found! + + Please make sure you have a C compiler installed on your system and/or + consider adjusting the CC environment variable if you installed + it in a non-standard prefix. + ''' + sys.exit() + + match = re.search(r"(^clang version|based on LLVM) ([3-9]\.[0-9]+)", + proc.communicate()[1]) + + if match: + return match.group(2) + else: + return 0 + + +def get_gas_version(cc): + try: + proc = subprocess.Popen(shlex.split(cc) + ['-Wa,-v', '-c', '-o', + '/dev/null', '-x', + 'assembler', '/dev/null'], + stdin=subprocess.PIPE, stderr=subprocess.PIPE, + stdout=subprocess.PIPE) + except OSError: + print '''io.js configure error: No acceptable C compiler found! + + Please make sure you have a C compiler installed on your system and/or + consider adjusting the CC environment variable if you installed + it in a non-standard prefix. + ''' + sys.exit() + + match = re.match(r"GNU assembler version ([2-9]\.[0-9]+)", + proc.communicate()[1]) + + if match: + return match.group(1) + else: + return 0 + # Note: Apple clang self-reports as clang 4.2.0 and gcc 4.2.1. It passes # the version check more by accident than anything else but a more rigorous # check involves checking the build number against a whitelist. I'm not # quite prepared to go that far yet. -def check_compiler(): +def check_compiler(o): if sys.platform == 'win32': return @@ -380,6 +432,15 @@ def check_compiler(): # to a version that is not completely ancient. warn('C compiler too old, need gcc 4.2 or clang 3.2 (CC=%s)' % CC) + # Need llvm_version or gas_version when openssl asm files are compiled + if options.without_ssl or options.openssl_no_asm or options.shared_openssl: + return + + if is_clang: + o['variables']['llvm_version'] = get_llvm_version(CC) + else: + o['variables']['gas_version'] = get_gas_version(CC) + def cc_macros(): """Checks predefined macros using the CC command.""" @@ -390,7 +451,7 @@ def cc_macros(): stdout=subprocess.PIPE, stderr=subprocess.PIPE) except OSError: - print '''Node.js configure error: No acceptable C compiler found! + print '''io.js configure error: No acceptable C compiler found! Please make sure you have a C compiler installed on your system and/or consider adjusting the CC environment variable if you installed @@ -533,13 +594,6 @@ def configure_node(o): if target_arch == 'arm': configure_arm(o) - elif (target_arch == 'arm64' and - not options.shared_openssl and - not options.without_ssl): - # FIXME(bnoordhuis) It's not possible to build the bundled openssl due to - # deps/openssl/asm/arm-elf-gas/modes/ghash-armv4.S, which is 32 bits only. - warn('not building openssl, arm64 not yet supported') - options.without_ssl = True elif target_arch in ('mips', 'mipsel'): configure_mips(o) @@ -935,8 +989,16 @@ def configure_intl(o): pprint.pformat(icu_config, indent=2) + '\n') return # end of configure_intl +output = { + 'variables': { 'python': sys.executable }, + 'include_dirs': [], + 'libraries': [], + 'defines': [], + 'cflags': [], +} + # Print a warning when the compiler is too old. -check_compiler() +check_compiler(output) # determine the "flavor" (operating system) we're building for, # leveraging gyp's GetFlavor function @@ -945,14 +1007,6 @@ if (options.dest_os): flavor_params['flavor'] = options.dest_os flavor = GetFlavor(flavor_params) -output = { - 'variables': { 'python': sys.executable }, - 'include_dirs': [], - 'libraries': [], - 'defines': [], - 'cflags': [], -} - configure_node(output) configure_libz(output) configure_http_parser(output) diff --git a/deps/openssl/asm/Makefile b/deps/openssl/asm/Makefile index 19a36bb77dc9cb..5da1f3b99c8958 100644 --- a/deps/openssl/asm/Makefile +++ b/deps/openssl/asm/Makefile @@ -1,21 +1,25 @@ -SED ?= sed PERL ?= perl PERL += -I../openssl/crypto/perlasm -I../openssl/crypto/bn/asm +# OPENSSL_IA32_SSE2 flag is needed for checking the sse2 feature on ia32 +# see https://github.com/openssl/openssl/blob/OpenSSL_1_0_2-stable/crypto/sha/asm/sha512-586.pl#L56 +SSE2 = -DOPENSSL_IA32_SSE2 + OUTPUTS = \ x86-elf-gas/aes/aes-586.s \ x86-elf-gas/aes/aesni-x86.s \ x86-elf-gas/aes/vpaes-x86.s \ - x86-elf-gas/bf/bf-686.s \ + x86-elf-gas/bf/bf-586.s \ + x86-elf-gas/bn/bn-586.s \ + x86-elf-gas/bn/co-586.s \ x86-elf-gas/bn/x86-mont.s \ - x86-elf-gas/bn/x86.s \ + x86-elf-gas/bn/x86-gf2m.s \ x86-elf-gas/camellia/cmll-x86.s \ x86-elf-gas/cast/cast-586.s \ x86-elf-gas/des/crypt586.s \ x86-elf-gas/des/des-586.s \ x86-elf-gas/md5/md5-586.s \ x86-elf-gas/rc4/rc4-586.s \ - x86-elf-gas/rc5/rc5-586.s \ x86-elf-gas/ripemd/rmd-586.s \ x86-elf-gas/sha/sha1-586.s \ x86-elf-gas/sha/sha256-586.s \ @@ -24,44 +28,60 @@ OUTPUTS = \ x86-elf-gas/modes/ghash-x86.s \ x86-elf-gas/x86cpuid.s \ x64-elf-gas/aes/aes-x86_64.s \ + x64-elf-gas/aes/aesni-mb-x86_64.s \ + x64-elf-gas/aes/aesni-sha256-x86_64.s \ x64-elf-gas/aes/aesni-x86_64.s \ x64-elf-gas/aes/vpaes-x86_64.s \ x64-elf-gas/aes/bsaes-x86_64.s \ x64-elf-gas/aes/aesni-sha1-x86_64.s \ - x64-elf-gas/bn/modexp512-x86_64.s \ + x64-elf-gas/bn/rsaz-avx2.s \ + x64-elf-gas/bn/rsaz-x86_64.s \ x64-elf-gas/bn/x86_64-mont.s \ x64-elf-gas/bn/x86_64-mont5.s \ x64-elf-gas/bn/x86_64-gf2m.s \ x64-elf-gas/camellia/cmll-x86_64.s \ + x64-elf-gas/ec/ecp_nistz256-x86_64.s \ x64-elf-gas/md5/md5-x86_64.s \ x64-elf-gas/rc4/rc4-x86_64.s \ x64-elf-gas/rc4/rc4-md5-x86_64.s \ + x64-elf-gas/sha/sha1-mb-x86_64.s \ x64-elf-gas/sha/sha1-x86_64.s \ + x64-elf-gas/sha/sha256-mb-x86_64.s \ x64-elf-gas/sha/sha256-x86_64.s \ x64-elf-gas/sha/sha512-x86_64.s \ x64-elf-gas/whrlpool/wp-x86_64.s \ + x64-elf-gas/modes/aesni-gcm-x86_64.s \ x64-elf-gas/modes/ghash-x86_64.s \ x64-elf-gas/x86_64cpuid.s \ - arm-elf-gas/aes/aes-armv4.S \ - arm-elf-gas/bn/armv4-mont.S \ - arm-elf-gas/bn/armv4-gf2m.S \ - arm-elf-gas/sha/sha1-armv4-large.S \ - arm-elf-gas/sha/sha256-armv4.S \ - arm-elf-gas/sha/sha512-armv4.S \ - arm-elf-gas/modes/ghash-armv4.S \ + arm-void-gas/aes/aes-armv4.S \ + arm-void-gas/aes/bsaes-armv7.S \ + arm-void-gas/aes/aesv8-armx.S \ + arm-void-gas/bn/armv4-mont.S \ + arm-void-gas/bn/armv4-gf2m.S \ + arm-void-gas/sha/sha1-armv4-large.S \ + arm-void-gas/sha/sha256-armv4.S \ + arm-void-gas/sha/sha512-armv4.S \ + arm-void-gas/modes/ghash-armv4.S \ + arm-void-gas/modes/ghashv8-armx.S \ + arm64-linux64-gas/aes/aesv8-armx.S \ + arm64-linux64-gas/modes/ghashv8-armx.S \ + arm64-linux64-gas/sha/sha1-armv8.S \ + arm64-linux64-gas/sha/sha256-armv8.S \ + arm64-linux64-gas/sha/sha512-armv8.S \ x86-macosx-gas/aes/aes-586.s \ x86-macosx-gas/aes/aesni-x86.s \ x86-macosx-gas/aes/vpaes-x86.s \ - x86-macosx-gas/bf/bf-686.s \ + x86-macosx-gas/bf/bf-586.s \ + x86-macosx-gas/bn/bn-586.s \ + x86-macosx-gas/bn/co-586.s \ x86-macosx-gas/bn/x86-mont.s \ - x86-macosx-gas/bn/x86.s \ + x86-macosx-gas/bn/x86-gf2m.s \ x86-macosx-gas/camellia/cmll-x86.s \ x86-macosx-gas/cast/cast-586.s \ x86-macosx-gas/des/crypt586.s \ x86-macosx-gas/des/des-586.s \ x86-macosx-gas/md5/md5-586.s \ x86-macosx-gas/rc4/rc4-586.s \ - x86-macosx-gas/rc5/rc5-586.s \ x86-macosx-gas/ripemd/rmd-586.s \ x86-macosx-gas/sha/sha1-586.s \ x86-macosx-gas/sha/sha256-586.s \ @@ -72,35 +92,41 @@ OUTPUTS = \ x64-macosx-gas/aes/aes-x86_64.s \ x64-macosx-gas/aes/aesni-x86_64.s \ x64-macosx-gas/aes/vpaes-x86_64.s \ + x64-macosx-gas/aes/aesni-mb-x86_64.s \ + x64-macosx-gas/aes/aesni-sha256-x86_64.s \ x64-macosx-gas/aes/bsaes-x86_64.s \ x64-macosx-gas/aes/aesni-sha1-x86_64.s \ - x64-macosx-gas/bn/modexp512-x86_64.s \ + x64-macosx-gas/bn/rsaz-avx2.s \ + x64-macosx-gas/bn/rsaz-x86_64.s \ x64-macosx-gas/bn/x86_64-mont.s \ x64-macosx-gas/bn/x86_64-mont5.s \ x64-macosx-gas/bn/x86_64-gf2m.s \ x64-macosx-gas/camellia/cmll-x86_64.s \ + x64-macosx-gas/ec/ecp_nistz256-x86_64.s \ x64-macosx-gas/md5/md5-x86_64.s \ - x64-macosx-gas/rc4/rc4-x86_64.s \ - x64-macosx-gas/rc4/rc4-md5-x86_64.s \ + x64-macosx-gas/sha/sha1-mb-x86_64.s \ x64-macosx-gas/sha/sha1-x86_64.s \ + x64-macosx-gas/sha/sha256-mb-x86_64.s \ x64-macosx-gas/sha/sha256-x86_64.s \ x64-macosx-gas/sha/sha512-x86_64.s \ x64-macosx-gas/whrlpool/wp-x86_64.s \ + x64-macosx-gas/modes/aesni-gcm-x86_64.s \ x64-macosx-gas/modes/ghash-x86_64.s \ x64-macosx-gas/x86_64cpuid.s \ x86-win32-masm/aes/aes-586.asm \ x86-win32-masm/aes/aesni-x86.asm \ x86-win32-masm/aes/vpaes-x86.asm \ - x86-win32-masm/bf/bf-686.asm \ + x86-win32-masm/bf/bf-586.asm \ + x86-win32-masm/bn/bn-586.asm \ + x86-win32-masm/bn/co-586.asm \ x86-win32-masm/bn/x86-mont.asm \ - x86-win32-masm/bn/x86.asm \ + x86-win32-masm/bn/x86-gf2m.asm \ x86-win32-masm/camellia/cmll-x86.asm \ x86-win32-masm/cast/cast-586.asm \ x86-win32-masm/des/crypt586.asm \ x86-win32-masm/des/des-586.asm \ x86-win32-masm/md5/md5-586.asm \ x86-win32-masm/rc4/rc4-586.asm \ - x86-win32-masm/rc5/rc5-586.asm \ x86-win32-masm/ripemd/rmd-586.asm \ x86-win32-masm/sha/sha1-586.asm \ x86-win32-masm/sha/sha256-586.asm \ @@ -109,112 +135,183 @@ OUTPUTS = \ x86-win32-masm/modes/ghash-x86.asm \ x86-win32-masm/x86cpuid.asm \ x64-win32-masm/aes/aes-x86_64.asm \ + x64-win32-masm/aes/aesni-mb-x86_64.asm \ + x64-win32-masm/aes/aesni-sha256-x86_64.asm \ x64-win32-masm/aes/aesni-x86_64.asm \ x64-win32-masm/aes/vpaes-x86_64.asm \ x64-win32-masm/aes/bsaes-x86_64.asm \ x64-win32-masm/aes/aesni-sha1-x86_64.asm \ - x64-win32-masm/bn/modexp512-x86_64.asm \ + x64-win32-masm/bn/rsaz-avx2.asm \ + x64-win32-masm/bn/rsaz-x86_64.asm \ x64-win32-masm/bn/x86_64-mont.asm \ x64-win32-masm/bn/x86_64-mont5.asm \ x64-win32-masm/bn/x86_64-gf2m.asm \ x64-win32-masm/camellia/cmll-x86_64.asm \ + x64-win32-masm/ec/ecp_nistz256-x86_64.asm \ x64-win32-masm/md5/md5-x86_64.asm \ x64-win32-masm/rc4/rc4-x86_64.asm \ x64-win32-masm/rc4/rc4-md5-x86_64.asm \ + x64-win32-masm/sha/sha1-mb-x86_64.asm \ x64-win32-masm/sha/sha1-x86_64.asm \ + x64-win32-masm/sha/sha256-mb-x86_64.asm \ x64-win32-masm/sha/sha256-x86_64.asm \ - x64-win32-masm/sha/sha512-x86_64.asm \ + x64-win32-masm/sha/sha512-x86_64.asm \ x64-win32-masm/whrlpool/wp-x86_64.asm \ + x64-win32-masm/modes/aesni-gcm-x86_64.asm \ x64-win32-masm/modes/ghash-x86_64.asm \ x64-win32-masm/x86_64cpuid.asm \ -arm-elf-gas/%.S x64-elf-gas/%.s x86-elf-gas/%.s: +# sha512 asm files for x86_64 need 512 in the filenames for outputs +# so that we add new rules to generate sha512 asm files with +# specifying its filename in the second argument. See +# https://github.com/openssl/openssl/blob/OpenSSL_1_0_2-stable/crypto/sha/asm/sha512-x86_64.pl#L137-L149 + +x64-elf-gas/sha/sha512-%.s: + $(PERL) $< elf $@ + +x64-elf-gas/%.s: $(PERL) $< elf > $@ -x64-macosx-gas/%.s x86-macosx-gas/%.s: - $(PERL) $< macosx > $@ +arm-void-gas/%.S: + $(PERL) $< void > $@ + +arm64-linux64-gas/sha/sha512-%.S: + $(PERL) $< linux64 $@ + +arm64-linux64-gas/%.S: + $(PERL) $< linux64 > $@ + +x64-macosx-gas/sha/sha512-%.s: + $(PERL) $< macosx $@ -x64-macosx-gas/%.s x86-macosx-gas/%.s: +x64-macosx-gas/%.s: $(PERL) $< macosx > $@ +x64-win32-masm/sha/sha512-%.asm: + $(PERL) $< masm $@ + x64-win32-masm/%.asm: $(PERL) $< masm > $@ +x86-elf-gas/%.s: + $(PERL) $< elf $(SSE2) > $@ + +x86-macosx-gas/%.s: + $(PERL) $< macosx $(SSE2) > $@ + x86-win32-masm/%.asm: - $(PERL) $< win32 > $@ + $(PERL) $< win32 $(SSE2) > $@ + +.PHONY: all outputs clean check-cc check-asm +all: check-cc check-asm outputs -.PHONY: all -all: $(OUTPUTS) +outputs: $(OUTPUTS) # strip trailing whitespace and final blank newline - $(SED) -sri -e 's/\s+$$/\n/' -e '$$ { /^$$/d }' $^ + $(PERL) -pi -e 's/\s+$$/\n/; s/^\n$$// if eof' $^ clean: find . -iname '*.asm' -exec rm "{}" \; find . -iname '*.s' -exec rm "{}" \; + find . -iname '*.S' -exec rm "{}" \; + +# CC and ASM enviroments are needed to genrate asm files from perl. +# Use gcc and nasm to run on unix. See +# https://github.com/openssl/openssl/blob/OpenSSL_1_0_2-stable/crypto/sha/asm/sha512-x86_64.pl#L112-L129 + +check-cc: +ifndef CC + $(error CC is not set.) +endif + +check-asm: +ifndef ASM + $(error ASM is not set.) +endif x64-elf-gas/aes/aes-x86_64.s: ../openssl/crypto/aes/asm/aes-x86_64.pl x64-elf-gas/aes/aesni-x86_64.s: ../openssl/crypto/aes/asm/aesni-x86_64.pl +x64-elf-gas/aes/aesni-mb-x86_64.s: ../openssl/crypto/aes/asm/aesni-mb-x86_64.pl +x64-elf-gas/aes/aesni-sha256-x86_64.s: ../openssl/crypto/aes/asm/aesni-sha256-x86_64.pl x64-elf-gas/aes/vpaes-x86_64.s: ../openssl/crypto/aes/asm/vpaes-x86_64.pl x64-elf-gas/aes/bsaes-x86_64.s: ../openssl/crypto/aes/asm/bsaes-x86_64.pl x64-elf-gas/aes/aesni-sha1-x86_64.s: ../openssl/crypto/aes/asm/aesni-sha1-x86_64.pl -x64-elf-gas/bn/modexp512-x86_64.s: ../openssl/crypto/bn/asm/modexp512-x86_64.pl +x64-elf-gas/bn/rsaz-avx2.s: ../openssl/crypto/bn/asm/rsaz-avx2.pl +x64-elf-gas/bn/rsaz-x86_64.s: ../openssl/crypto/bn/asm/rsaz-x86_64.pl x64-elf-gas/bn/x86_64-mont.s: ../openssl/crypto/bn/asm/x86_64-mont.pl x64-elf-gas/bn/x86_64-mont5.s: ../openssl/crypto/bn/asm/x86_64-mont5.pl x64-elf-gas/bn/x86_64-gf2m.s: ../openssl/crypto/bn/asm/x86_64-gf2m.pl x64-elf-gas/camellia/cmll-x86_64.s: ../openssl/crypto/camellia/asm/cmll-x86_64.pl +x64-elf-gas/ec/ecp_nistz256-x86_64.s: ../openssl/crypto/ec/asm/ecp_nistz256-x86_64.pl x64-elf-gas/md5/md5-x86_64.s: ../openssl/crypto/md5/asm/md5-x86_64.pl x64-elf-gas/rc4/rc4-x86_64.s: ../openssl/crypto/rc4/asm/rc4-x86_64.pl x64-elf-gas/rc4/rc4-md5-x86_64.s: ../openssl/crypto/rc4/asm/rc4-md5-x86_64.pl +x64-elf-gas/sha/sha1-mb-x86_64.s: ../openssl/crypto/sha/asm/sha1-mb-x86_64.pl x64-elf-gas/sha/sha1-x86_64.s: ../openssl/crypto/sha/asm/sha1-x86_64.pl x64-elf-gas/sha/sha512-x86_64.s: ../openssl/crypto/sha/asm/sha512-x86_64.pl -x64-elf-gas/sha/sha256-x86_64.s: ../openssl/crypto/sha/asm/sha256-x86_64.pl +x64-elf-gas/sha/sha256-mb-x86_64.s: ../openssl/crypto/sha/asm/sha256-mb-x86_64.pl +x64-elf-gas/sha/sha256-x86_64.s: ../openssl/crypto/sha/asm/sha512-x86_64.pl x64-elf-gas/whrlpool/wp-x86_64.s: ../openssl/crypto/whrlpool/asm/wp-x86_64.pl +x64-elf-gas/modes/aesni-gcm-x86_64.s: ../openssl/crypto/modes/asm/aesni-gcm-x86_64.pl x64-elf-gas/modes/ghash-x86_64.s: ../openssl/crypto/modes/asm/ghash-x86_64.pl x64-elf-gas/x86_64cpuid.s: ../openssl/crypto/x86_64cpuid.pl x64-macosx-gas/aes/aes-x86_64.s: ../openssl/crypto/aes/asm/aes-x86_64.pl x64-macosx-gas/aes/aesni-x86_64.s: ../openssl/crypto/aes/asm/aesni-x86_64.pl x64-macosx-gas/aes/vpaes-x86_64.s: ../openssl/crypto/aes/asm/vpaes-x86_64.pl +x64-macosx-gas/aes/aesni-mb-x86_64.s: ../openssl/crypto/aes/asm/aesni-mb-x86_64.pl +x64-macosx-gas/aes/aesni-sha256-x86_64.s: ../openssl/crypto/aes/asm/aesni-sha256-x86_64.pl x64-macosx-gas/aes/bsaes-x86_64.s: ../openssl/crypto/aes/asm/bsaes-x86_64.pl x64-macosx-gas/aes/aesni-sha1-x86_64.s: ../openssl/crypto/aes/asm/aesni-sha1-x86_64.pl -x64-macosx-gas/bn/modexp512-x86_64.s: ../openssl/crypto/bn/asm/modexp512-x86_64.pl +x64-macosx-gas/bn/rsaz-avx2.s: ../openssl/crypto/bn/asm/rsaz-avx2.pl +x64-macosx-gas/bn/rsaz-x86_64.s: ../openssl/crypto/bn/asm/rsaz-x86_64.pl x64-macosx-gas/bn/x86_64-mont.s: ../openssl/crypto/bn/asm/x86_64-mont.pl x64-macosx-gas/bn/x86_64-mont5.s: ../openssl/crypto/bn/asm/x86_64-mont5.pl x64-macosx-gas/bn/x86_64-gf2m.s: ../openssl/crypto/bn/asm/x86_64-gf2m.pl x64-macosx-gas/camellia/cmll-x86_64.s: ../openssl/crypto/camellia/asm/cmll-x86_64.pl +x64-macosx-gas/ec/ecp_nistz256-x86_64.s: ../openssl/crypto/ec/asm/ecp_nistz256-x86_64.pl x64-macosx-gas/md5/md5-x86_64.s: ../openssl/crypto/md5/asm/md5-x86_64.pl -x64-macosx-gas/rc4/rc4-x86_64.s: ../openssl/crypto/rc4/asm/rc4-x86_64.pl -x64-macosx-gas/rc4/rc4-md5-x86_64.s: ../openssl/crypto/rc4/asm/rc4-md5-x86_64.pl x64-macosx-gas/sha/sha1-x86_64.s: ../openssl/crypto/sha/asm/sha1-x86_64.pl -x64-macosx-gas/sha/sha256-x86_64.s: ../openssl/crypto/sha/asm/sha256-x86_64.pl +x64-macosx-gas/sha/sha1-mb-x86_64.s: ../openssl/crypto/sha/asm/sha1-mb-x86_64.pl +x64-macosx-gas/sha/sha256-mb-x86_64.s: ../openssl/crypto/sha/asm/sha256-mb-x86_64.pl +x64-macosx-gas/sha/sha256-x86_64.s: ../openssl/crypto/sha/asm/sha512-x86_64.pl x64-macosx-gas/sha/sha512-x86_64.s: ../openssl/crypto/sha/asm/sha512-x86_64.pl x64-macosx-gas/whrlpool/wp-x86_64.s: ../openssl/crypto/whrlpool/asm/wp-x86_64.pl +x64-macosx-gas/modes/aesni-gcm-x86_64.s: ../openssl/crypto/modes/asm/aesni-gcm-x86_64.pl x64-macosx-gas/modes/ghash-x86_64.s: ../openssl/crypto/modes/asm/ghash-x86_64.pl x64-macosx-gas/x86_64cpuid.s: ../openssl/crypto/x86_64cpuid.pl x64-win32-masm/aes/aes-x86_64.asm: ../openssl/crypto/aes/asm/aes-x86_64.pl x64-win32-masm/aes/aesni-x86_64.asm: ../openssl/crypto/aes/asm/aesni-x86_64.pl +x64-win32-masm/aes/aesni-mb-x86_64.asm: ../openssl/crypto/aes/asm/aesni-mb-x86_64.pl +x64-win32-masm/aes/aesni-sha256-x86_64.asm: ../openssl/crypto/aes/asm/aesni-sha256-x86_64.pl x64-win32-masm/aes/vpaes-x86_64.asm: ../openssl/crypto/aes/asm/vpaes-x86_64.pl x64-win32-masm/aes/bsaes-x86_64.asm: ../openssl/crypto/aes/asm/bsaes-x86_64.pl x64-win32-masm/aes/aesni-sha1-x86_64.asm: ../openssl/crypto/aes/asm/aesni-sha1-x86_64.pl -x64-win32-masm/bn/modexp512-x86_64.asm: ../openssl/crypto/bn/asm/modexp512-x86_64.pl +x64-win32-masm/bn/rsaz-avx2.asm: ../openssl/crypto/bn/asm/rsaz-avx2.pl +x64-win32-masm/bn/rsaz-x86_64.asm: ../openssl/crypto/bn/asm/rsaz-x86_64.pl x64-win32-masm/bn/x86_64-mont.asm: ../openssl/crypto/bn/asm/x86_64-mont.pl x64-win32-masm/bn/x86_64-mont5.asm: ../openssl/crypto/bn/asm/x86_64-mont5.pl x64-win32-masm/bn/x86_64-gf2m.asm: ../openssl/crypto/bn/asm/x86_64-gf2m.pl x64-win32-masm/camellia/cmll-x86_64.asm: ../openssl/crypto/camellia/asm/cmll-x86_64.pl +x64-win32-masm/ec/ecp_nistz256-x86_64.asm: ../openssl/crypto/ec/asm/ecp_nistz256-x86_64.pl x64-win32-masm/md5/md5-x86_64.asm: ../openssl/crypto/md5/asm/md5-x86_64.pl x64-win32-masm/rc4/rc4-x86_64.asm: ../openssl/crypto/rc4/asm/rc4-x86_64.pl x64-win32-masm/rc4/rc4-md5-x86_64.asm: ../openssl/crypto/rc4/asm/rc4-md5-x86_64.pl +x64-win32-masm/sha/sha1-mb-x86_64.asm: ../openssl/crypto/sha/asm/sha1-mb-x86_64.pl x64-win32-masm/sha/sha1-x86_64.asm: ../openssl/crypto/sha/asm/sha1-x86_64.pl -x64-win32-masm/sha/sha256-x86_64.asm: ../openssl/crypto/sha/asm/sha256-x86_64.pl +x64-win32-masm/sha/sha256-mb-x86_64.asm: ../openssl/crypto/sha/asm/sha256-mb-x86_64.pl +x64-win32-masm/sha/sha256-x86_64.asm: ../openssl/crypto/sha/asm/sha512-x86_64.pl x64-win32-masm/sha/sha512-x86_64.asm: ../openssl/crypto/sha/asm/sha512-x86_64.pl x64-win32-masm/whrlpool/wp-x86_64.asm: ../openssl/crypto/whrlpool/asm/wp-x86_64.pl +x64-win32-masm/modes/aesni-gcm-x86_64.asm: ../openssl/crypto/modes/asm/aesni-gcm-x86_64.pl x64-win32-masm/modes/ghash-x86_64.asm: ../openssl/crypto/modes/asm/ghash-x86_64.pl x64-win32-masm/x86_64cpuid.asm: ../openssl/crypto/x86_64cpuid.pl x86-elf-gas/aes/aes-586.s: ../openssl/crypto/aes/asm/aes-586.pl x86-elf-gas/aes/aesni-x86.s: ../openssl/crypto/aes/asm/aesni-x86.pl x86-elf-gas/aes/vpaes-x86.s: ../openssl/crypto/aes/asm/vpaes-x86.pl -x86-elf-gas/bf/bf-686.s: ../openssl/crypto/bf/asm/bf-686.pl +x86-elf-gas/bf/bf-586.s: ../openssl/crypto/bf/asm/bf-586.pl +x86-elf-gas/bn/bn-586.s: ../openssl/crypto/bn/asm/bn-586.pl +x86-elf-gas/bn/co-586.s: ../openssl/crypto/bn/asm/co-586.pl x86-elf-gas/bn/x86-mont.s: ../openssl/crypto/bn/asm/x86-mont.pl -x86-elf-gas/bn/x86.s: ../openssl/crypto/bn/asm/x86.pl +x86-elf-gas/bn/x86-gf2m.s: ../openssl/crypto/bn/asm/x86-gf2m.pl x86-elf-gas/camellia/cmll-x86.s: ../openssl/crypto/camellia/asm/cmll-x86.pl x86-elf-gas/cast/cast-586.s: ../openssl/crypto/cast/asm/cast-586.pl x86-elf-gas/des/crypt586.s: ../openssl/crypto/des/asm/crypt586.pl @@ -232,9 +329,11 @@ x86-elf-gas/x86cpuid.s: ../openssl/crypto/x86cpuid.pl x86-macosx-gas/aes/aes-586.s: ../openssl/crypto/aes/asm/aes-586.pl x86-macosx-gas/aes/aesni-x86.s: ../openssl/crypto/aes/asm/aesni-x86.pl x86-macosx-gas/aes/vpaes-x86.s: ../openssl/crypto/aes/asm/vpaes-x86.pl -x86-macosx-gas/bf/bf-686.s: ../openssl/crypto/bf/asm/bf-686.pl +x86-macosx-gas/bf/bf-586.s: ../openssl/crypto/bf/asm/bf-686.pl +x86-macosx-gas/bn/bn-586.s: ../openssl/crypto/bn/asm/bn-586.pl +x86-macosx-gas/bn/co-586.s: ../openssl/crypto/bn/asm/co-586.pl x86-macosx-gas/bn/x86-mont.s: ../openssl/crypto/bn/asm/x86-mont.pl -x86-macosx-gas/bn/x86.s: ../openssl/crypto/bn/asm/x86.pl +x86-macosx-gas/bn/x86-gf2m.s: ../openssl/crypto/bn/asm/x86-gf2m.pl x86-macosx-gas/camellia/cmll-x86.s: ../openssl/crypto/camellia/asm/cmll-x86.pl x86-macosx-gas/cast/cast-586.s: ../openssl/crypto/cast/asm/cast-586.pl x86-macosx-gas/des/crypt586.s: ../openssl/crypto/des/asm/crypt586.pl @@ -252,8 +351,10 @@ x86-macosx-gas/x86cpuid.s: ../openssl/crypto/x86cpuid.pl x86-win32-masm/aes/aes-586.asm: ../openssl/crypto/aes/asm/aes-586.pl x86-win32-masm/aes/aesni-x86.asm: ../openssl/crypto/aes/asm/aesni-x86.pl x86-win32-masm/aes/vpaes-x86.asm: ../openssl/crypto/aes/asm/vpaes-x86.pl -x86-win32-masm/bf/bf-686.asm: ../openssl/crypto/bf/asm/bf-686.pl -x86-win32-masm/bn/x86.asm: ../openssl/crypto/bn/asm/x86.pl +x86-win32-masm/bf/bf-586.asm: ../openssl/crypto/bf/asm/bf-586.pl +x86-win32-masm/bn/bn-586.asm: ../openssl/crypto/bn/asm/bn-586.pl +x86-win32-masm/bn/co-586.asm: ../openssl/crypto/bn/asm/co-586.pl +x86-win32-masm/bn/x86-gf2m.asm: ../openssl/crypto/bn/asm/x86-gf2m.pl x86-win32-masm/bn/x86-mont.asm: ../openssl/crypto/bn/asm/x86-mont.pl x86-win32-masm/camellia/cmll-x86.asm: ../openssl/crypto/camellia/asm/cmll-x86.pl x86-win32-masm/cast/cast-586.asm: ../openssl/crypto/cast/asm/cast-586.pl @@ -261,7 +362,6 @@ x86-win32-masm/des/crypt586.asm: ../openssl/crypto/des/asm/crypt586.pl x86-win32-masm/des/des-586.asm: ../openssl/crypto/des/asm/des-586.pl x86-win32-masm/md5/md5-586.asm: ../openssl/crypto/md5/asm/md5-586.pl x86-win32-masm/rc4/rc4-586.asm: ../openssl/crypto/rc4/asm/rc4-586.pl -x86-win32-masm/rc5/rc5-586.asm: ../openssl/crypto/rc5/asm/rc5-586.pl x86-win32-masm/ripemd/rmd-586.asm: ../openssl/crypto/ripemd/asm/rmd-586.pl x86-win32-masm/sha/sha1-586.asm: ../openssl/crypto/sha/asm/sha1-586.pl x86-win32-masm/sha/sha256-586.asm: ../openssl/crypto/sha/asm/sha256-586.pl @@ -269,10 +369,18 @@ x86-win32-masm/sha/sha512-586.asm: ../openssl/crypto/sha/asm/sha512-586.pl x86-win32-masm/whrlpool/wp-mmx.asm: ../openssl/crypto/whrlpool/asm/wp-mmx.pl x86-win32-masm/modes/ghash-x86.asm: ../openssl/crypto/modes/asm/ghash-x86.pl x86-win32-masm/x86cpuid.asm: ../openssl/crypto/x86cpuid.pl -arm-elf-gas/aes/aes-armv4.S: ../openssl/crypto/aes/asm/aes-armv4.pl -arm-elf-gas/bn/armv4-mont.S: ../openssl/crypto/bn/asm/armv4-mont.pl -arm-elf-gas/bn/armv4-gf2m.S: ../openssl/crypto/bn/asm/armv4-gf2m.pl -arm-elf-gas/sha/sha1-armv4-large.S: ../openssl/crypto/sha/asm/sha1-armv4-large.pl -arm-elf-gas/sha/sha512-armv4.S: ../openssl/crypto/sha/asm/sha512-armv4.pl -arm-elf-gas/sha/sha256-armv4.S: ../openssl/crypto/sha/asm/sha256-armv4.pl -arm-elf-gas/modes/ghash-armv4.S: ../openssl/crypto/modes/asm/ghash-armv4.pl +arm-void-gas/aes/aes-armv4.S: ../openssl/crypto/aes/asm/aes-armv4.pl +arm-void-gas/aes/bsaes-armv7.S: ../openssl/crypto/aes/asm/bsaes-armv7.pl +arm-void-gas/aes/aesv8-armx.S: ../openssl/crypto/aes/asm/aesv8-armx.pl +arm-void-gas/bn/armv4-mont.S: ../openssl/crypto/bn/asm/armv4-mont.pl +arm-void-gas/bn/armv4-gf2m.S: ../openssl/crypto/bn/asm/armv4-gf2m.pl +arm-void-gas/sha/sha1-armv4-large.S: ../openssl/crypto/sha/asm/sha1-armv4-large.pl +arm-void-gas/sha/sha512-armv4.S: ../openssl/crypto/sha/asm/sha512-armv4.pl +arm-void-gas/sha/sha256-armv4.S: ../openssl/crypto/sha/asm/sha256-armv4.pl +arm-void-gas/modes/ghash-armv4.S: ../openssl/crypto/modes/asm/ghash-armv4.pl +arm-void-gas/modes/ghashv8-armx.S: ../openssl/crypto/modes/asm/ghashv8-armx.pl +arm64-linux64-gas/aes/aesv8-armx.S: ../openssl/crypto/aes/asm/aesv8-armx.pl +arm64-linux64-gas/modes/ghashv8-armx.S: ../openssl/crypto/modes/asm/ghashv8-armx.pl +arm64-linux64-gas/sha/sha1-armv8.S: ../openssl/crypto/sha/asm/sha1-armv8.pl +arm64-linux64-gas/sha/sha256-armv8.S: ../openssl/crypto/sha/asm/sha512-armv8.pl +arm64-linux64-gas/sha/sha512-armv8.S: ../openssl/crypto/sha/asm/sha512-armv8.pl diff --git a/deps/openssl/asm/arm-elf-gas/bn/armv4-mont.S b/deps/openssl/asm/arm-elf-gas/bn/armv4-mont.S deleted file mode 100644 index ba4cb874ff9e1f..00000000000000 --- a/deps/openssl/asm/arm-elf-gas/bn/armv4-mont.S +++ /dev/null @@ -1,147 +0,0 @@ -.text - -.global bn_mul_mont -.type bn_mul_mont,%function - -.align 2 -bn_mul_mont: - stmdb sp!,{r0,r2} @ sp points at argument block - ldr r0,[sp,#3*4] @ load num - cmp r0,#2 - movlt r0,#0 - addlt sp,sp,#2*4 - blt .Labrt - - stmdb sp!,{r4-r12,lr} @ save 10 registers - - mov r0,r0,lsl#2 @ rescale r0 for byte count - sub sp,sp,r0 @ alloca(4*num) - sub sp,sp,#4 @ +extra dword - sub r0,r0,#4 @ "num=num-1" - add r4,r2,r0 @ &bp[num-1] - - add r0,sp,r0 @ r0 to point at &tp[num-1] - ldr r8,[r0,#14*4] @ &n0 - ldr r2,[r2] @ bp[0] - ldr r5,[r1],#4 @ ap[0],ap++ - ldr r6,[r3],#4 @ np[0],np++ - ldr r8,[r8] @ *n0 - str r4,[r0,#15*4] @ save &bp[num] - - umull r10,r11,r5,r2 @ ap[0]*bp[0] - str r8,[r0,#14*4] @ save n0 value - mul r8,r10,r8 @ "tp[0]"*n0 - mov r12,#0 - umlal r10,r12,r6,r8 @ np[0]*n0+"t[0]" - mov r4,sp - -.L1st: - ldr r5,[r1],#4 @ ap[j],ap++ - mov r10,r11 - ldr r6,[r3],#4 @ np[j],np++ - mov r11,#0 - umlal r10,r11,r5,r2 @ ap[j]*bp[0] - mov r14,#0 - umlal r12,r14,r6,r8 @ np[j]*n0 - adds r12,r12,r10 - str r12,[r4],#4 @ tp[j-1]=,tp++ - adc r12,r14,#0 - cmp r4,r0 - bne .L1st - - adds r12,r12,r11 - ldr r4,[r0,#13*4] @ restore bp - mov r14,#0 - ldr r8,[r0,#14*4] @ restore n0 - adc r14,r14,#0 - str r12,[r0] @ tp[num-1]= - str r14,[r0,#4] @ tp[num]= - - -.Louter: - sub r7,r0,sp @ "original" r0-1 value - sub r1,r1,r7 @ "rewind" ap to &ap[1] - ldr r2,[r4,#4]! @ *(++bp) - sub r3,r3,r7 @ "rewind" np to &np[1] - ldr r5,[r1,#-4] @ ap[0] - ldr r10,[sp] @ tp[0] - ldr r6,[r3,#-4] @ np[0] - ldr r7,[sp,#4] @ tp[1] - - mov r11,#0 - umlal r10,r11,r5,r2 @ ap[0]*bp[i]+tp[0] - str r4,[r0,#13*4] @ save bp - mul r8,r10,r8 - mov r12,#0 - umlal r10,r12,r6,r8 @ np[0]*n0+"tp[0]" - mov r4,sp - -.Linner: - ldr r5,[r1],#4 @ ap[j],ap++ - adds r10,r11,r7 @ +=tp[j] - ldr r6,[r3],#4 @ np[j],np++ - mov r11,#0 - umlal r10,r11,r5,r2 @ ap[j]*bp[i] - mov r14,#0 - umlal r12,r14,r6,r8 @ np[j]*n0 - adc r11,r11,#0 - ldr r7,[r4,#8] @ tp[j+1] - adds r12,r12,r10 - str r12,[r4],#4 @ tp[j-1]=,tp++ - adc r12,r14,#0 - cmp r4,r0 - bne .Linner - - adds r12,r12,r11 - mov r14,#0 - ldr r4,[r0,#13*4] @ restore bp - adc r14,r14,#0 - ldr r8,[r0,#14*4] @ restore n0 - adds r12,r12,r7 - ldr r7,[r0,#15*4] @ restore &bp[num] - adc r14,r14,#0 - str r12,[r0] @ tp[num-1]= - str r14,[r0,#4] @ tp[num]= - - cmp r4,r7 - bne .Louter - - - ldr r2,[r0,#12*4] @ pull rp - add r0,r0,#4 @ r0 to point at &tp[num] - sub r5,r0,sp @ "original" num value - mov r4,sp @ "rewind" r4 - mov r1,r4 @ "borrow" r1 - sub r3,r3,r5 @ "rewind" r3 to &np[0] - - subs r7,r7,r7 @ "clear" carry flag -.Lsub: ldr r7,[r4],#4 - ldr r6,[r3],#4 - sbcs r7,r7,r6 @ tp[j]-np[j] - str r7,[r2],#4 @ rp[j]= - teq r4,r0 @ preserve carry - bne .Lsub - sbcs r14,r14,#0 @ upmost carry - mov r4,sp @ "rewind" r4 - sub r2,r2,r5 @ "rewind" r2 - - and r1,r4,r14 - bic r3,r2,r14 - orr r1,r1,r3 @ ap=borrow?tp:rp - -.Lcopy: ldr r7,[r1],#4 @ copy or in-place refresh - str sp,[r4],#4 @ zap tp - str r7,[r2],#4 - cmp r4,r0 - bne .Lcopy - - add sp,r0,#4 @ skip over tp[num+1] - ldmia sp!,{r4-r12,lr} @ restore registers - add sp,sp,#2*4 @ skip over {r0,r2} - mov r0,#1 -.Labrt: tst lr,#1 - moveq pc,lr @ be binary compatible with V4, yet - .word 0xe12fff1e @ interoperable with Thumb ISA:-) -.size bn_mul_mont,.-bn_mul_mont -.asciz "Montgomery multiplication for ARMv4, CRYPTOGAMS by " -.align 2 diff --git a/deps/openssl/asm/arm-elf-gas/sha/sha1-armv4-large.S b/deps/openssl/asm/arm-elf-gas/sha/sha1-armv4-large.S deleted file mode 100644 index 639ae78aaf9b9e..00000000000000 --- a/deps/openssl/asm/arm-elf-gas/sha/sha1-armv4-large.S +++ /dev/null @@ -1,452 +0,0 @@ -#include "arm_arch.h" - -.text - -.global sha1_block_data_order -.type sha1_block_data_order,%function - -.align 2 -sha1_block_data_order: - stmdb sp!,{r4-r12,lr} - add r2,r1,r2,lsl#6 @ r2 to point at the end of r1 - ldmia r0,{r3,r4,r5,r6,r7} -.Lloop: - ldr r8,.LK_00_19 - mov r14,sp - sub sp,sp,#15*4 - mov r5,r5,ror#30 - mov r6,r6,ror#30 - mov r7,r7,ror#30 @ [6] -.L_00_15: -#if __ARM_ARCH__<7 - ldrb r10,[r1,#2] - ldrb r9,[r1,#3] - ldrb r11,[r1,#1] - add r7,r8,r7,ror#2 @ E+=K_00_19 - ldrb r12,[r1],#4 - orr r9,r9,r10,lsl#8 - eor r10,r5,r6 @ F_xx_xx - orr r9,r9,r11,lsl#16 - add r7,r7,r3,ror#27 @ E+=ROR(A,27) - orr r9,r9,r12,lsl#24 -#else - ldr r9,[r1],#4 @ handles unaligned - add r7,r8,r7,ror#2 @ E+=K_00_19 - eor r10,r5,r6 @ F_xx_xx - add r7,r7,r3,ror#27 @ E+=ROR(A,27) -#ifdef __ARMEL__ - rev r9,r9 @ byte swap -#endif -#endif - and r10,r4,r10,ror#2 - add r7,r7,r9 @ E+=X[i] - eor r10,r10,r6,ror#2 @ F_00_19(B,C,D) - str r9,[r14,#-4]! - add r7,r7,r10 @ E+=F_00_19(B,C,D) -#if __ARM_ARCH__<7 - ldrb r10,[r1,#2] - ldrb r9,[r1,#3] - ldrb r11,[r1,#1] - add r6,r8,r6,ror#2 @ E+=K_00_19 - ldrb r12,[r1],#4 - orr r9,r9,r10,lsl#8 - eor r10,r4,r5 @ F_xx_xx - orr r9,r9,r11,lsl#16 - add r6,r6,r7,ror#27 @ E+=ROR(A,27) - orr r9,r9,r12,lsl#24 -#else - ldr r9,[r1],#4 @ handles unaligned - add r6,r8,r6,ror#2 @ E+=K_00_19 - eor r10,r4,r5 @ F_xx_xx - add r6,r6,r7,ror#27 @ E+=ROR(A,27) -#ifdef __ARMEL__ - rev r9,r9 @ byte swap -#endif -#endif - and r10,r3,r10,ror#2 - add r6,r6,r9 @ E+=X[i] - eor r10,r10,r5,ror#2 @ F_00_19(B,C,D) - str r9,[r14,#-4]! - add r6,r6,r10 @ E+=F_00_19(B,C,D) -#if __ARM_ARCH__<7 - ldrb r10,[r1,#2] - ldrb r9,[r1,#3] - ldrb r11,[r1,#1] - add r5,r8,r5,ror#2 @ E+=K_00_19 - ldrb r12,[r1],#4 - orr r9,r9,r10,lsl#8 - eor r10,r3,r4 @ F_xx_xx - orr r9,r9,r11,lsl#16 - add r5,r5,r6,ror#27 @ E+=ROR(A,27) - orr r9,r9,r12,lsl#24 -#else - ldr r9,[r1],#4 @ handles unaligned - add r5,r8,r5,ror#2 @ E+=K_00_19 - eor r10,r3,r4 @ F_xx_xx - add r5,r5,r6,ror#27 @ E+=ROR(A,27) -#ifdef __ARMEL__ - rev r9,r9 @ byte swap -#endif -#endif - and r10,r7,r10,ror#2 - add r5,r5,r9 @ E+=X[i] - eor r10,r10,r4,ror#2 @ F_00_19(B,C,D) - str r9,[r14,#-4]! - add r5,r5,r10 @ E+=F_00_19(B,C,D) -#if __ARM_ARCH__<7 - ldrb r10,[r1,#2] - ldrb r9,[r1,#3] - ldrb r11,[r1,#1] - add r4,r8,r4,ror#2 @ E+=K_00_19 - ldrb r12,[r1],#4 - orr r9,r9,r10,lsl#8 - eor r10,r7,r3 @ F_xx_xx - orr r9,r9,r11,lsl#16 - add r4,r4,r5,ror#27 @ E+=ROR(A,27) - orr r9,r9,r12,lsl#24 -#else - ldr r9,[r1],#4 @ handles unaligned - add r4,r8,r4,ror#2 @ E+=K_00_19 - eor r10,r7,r3 @ F_xx_xx - add r4,r4,r5,ror#27 @ E+=ROR(A,27) -#ifdef __ARMEL__ - rev r9,r9 @ byte swap -#endif -#endif - and r10,r6,r10,ror#2 - add r4,r4,r9 @ E+=X[i] - eor r10,r10,r3,ror#2 @ F_00_19(B,C,D) - str r9,[r14,#-4]! - add r4,r4,r10 @ E+=F_00_19(B,C,D) -#if __ARM_ARCH__<7 - ldrb r10,[r1,#2] - ldrb r9,[r1,#3] - ldrb r11,[r1,#1] - add r3,r8,r3,ror#2 @ E+=K_00_19 - ldrb r12,[r1],#4 - orr r9,r9,r10,lsl#8 - eor r10,r6,r7 @ F_xx_xx - orr r9,r9,r11,lsl#16 - add r3,r3,r4,ror#27 @ E+=ROR(A,27) - orr r9,r9,r12,lsl#24 -#else - ldr r9,[r1],#4 @ handles unaligned - add r3,r8,r3,ror#2 @ E+=K_00_19 - eor r10,r6,r7 @ F_xx_xx - add r3,r3,r4,ror#27 @ E+=ROR(A,27) -#ifdef __ARMEL__ - rev r9,r9 @ byte swap -#endif -#endif - and r10,r5,r10,ror#2 - add r3,r3,r9 @ E+=X[i] - eor r10,r10,r7,ror#2 @ F_00_19(B,C,D) - str r9,[r14,#-4]! - add r3,r3,r10 @ E+=F_00_19(B,C,D) - teq r14,sp - bne .L_00_15 @ [((11+4)*5+2)*3] - sub sp,sp,#25*4 -#if __ARM_ARCH__<7 - ldrb r10,[r1,#2] - ldrb r9,[r1,#3] - ldrb r11,[r1,#1] - add r7,r8,r7,ror#2 @ E+=K_00_19 - ldrb r12,[r1],#4 - orr r9,r9,r10,lsl#8 - eor r10,r5,r6 @ F_xx_xx - orr r9,r9,r11,lsl#16 - add r7,r7,r3,ror#27 @ E+=ROR(A,27) - orr r9,r9,r12,lsl#24 -#else - ldr r9,[r1],#4 @ handles unaligned - add r7,r8,r7,ror#2 @ E+=K_00_19 - eor r10,r5,r6 @ F_xx_xx - add r7,r7,r3,ror#27 @ E+=ROR(A,27) -#ifdef __ARMEL__ - rev r9,r9 @ byte swap -#endif -#endif - and r10,r4,r10,ror#2 - add r7,r7,r9 @ E+=X[i] - eor r10,r10,r6,ror#2 @ F_00_19(B,C,D) - str r9,[r14,#-4]! - add r7,r7,r10 @ E+=F_00_19(B,C,D) - ldr r9,[r14,#15*4] - ldr r10,[r14,#13*4] - ldr r11,[r14,#7*4] - add r6,r8,r6,ror#2 @ E+=K_xx_xx - ldr r12,[r14,#2*4] - eor r9,r9,r10 - eor r11,r11,r12 @ 1 cycle stall - eor r10,r4,r5 @ F_xx_xx - mov r9,r9,ror#31 - add r6,r6,r7,ror#27 @ E+=ROR(A,27) - eor r9,r9,r11,ror#31 - str r9,[r14,#-4]! - and r10,r3,r10,ror#2 @ F_xx_xx - @ F_xx_xx - add r6,r6,r9 @ E+=X[i] - eor r10,r10,r5,ror#2 @ F_00_19(B,C,D) - add r6,r6,r10 @ E+=F_00_19(B,C,D) - ldr r9,[r14,#15*4] - ldr r10,[r14,#13*4] - ldr r11,[r14,#7*4] - add r5,r8,r5,ror#2 @ E+=K_xx_xx - ldr r12,[r14,#2*4] - eor r9,r9,r10 - eor r11,r11,r12 @ 1 cycle stall - eor r10,r3,r4 @ F_xx_xx - mov r9,r9,ror#31 - add r5,r5,r6,ror#27 @ E+=ROR(A,27) - eor r9,r9,r11,ror#31 - str r9,[r14,#-4]! - and r10,r7,r10,ror#2 @ F_xx_xx - @ F_xx_xx - add r5,r5,r9 @ E+=X[i] - eor r10,r10,r4,ror#2 @ F_00_19(B,C,D) - add r5,r5,r10 @ E+=F_00_19(B,C,D) - ldr r9,[r14,#15*4] - ldr r10,[r14,#13*4] - ldr r11,[r14,#7*4] - add r4,r8,r4,ror#2 @ E+=K_xx_xx - ldr r12,[r14,#2*4] - eor r9,r9,r10 - eor r11,r11,r12 @ 1 cycle stall - eor r10,r7,r3 @ F_xx_xx - mov r9,r9,ror#31 - add r4,r4,r5,ror#27 @ E+=ROR(A,27) - eor r9,r9,r11,ror#31 - str r9,[r14,#-4]! - and r10,r6,r10,ror#2 @ F_xx_xx - @ F_xx_xx - add r4,r4,r9 @ E+=X[i] - eor r10,r10,r3,ror#2 @ F_00_19(B,C,D) - add r4,r4,r10 @ E+=F_00_19(B,C,D) - ldr r9,[r14,#15*4] - ldr r10,[r14,#13*4] - ldr r11,[r14,#7*4] - add r3,r8,r3,ror#2 @ E+=K_xx_xx - ldr r12,[r14,#2*4] - eor r9,r9,r10 - eor r11,r11,r12 @ 1 cycle stall - eor r10,r6,r7 @ F_xx_xx - mov r9,r9,ror#31 - add r3,r3,r4,ror#27 @ E+=ROR(A,27) - eor r9,r9,r11,ror#31 - str r9,[r14,#-4]! - and r10,r5,r10,ror#2 @ F_xx_xx - @ F_xx_xx - add r3,r3,r9 @ E+=X[i] - eor r10,r10,r7,ror#2 @ F_00_19(B,C,D) - add r3,r3,r10 @ E+=F_00_19(B,C,D) - - ldr r8,.LK_20_39 @ [+15+16*4] - cmn sp,#0 @ [+3], clear carry to denote 20_39 -.L_20_39_or_60_79: - ldr r9,[r14,#15*4] - ldr r10,[r14,#13*4] - ldr r11,[r14,#7*4] - add r7,r8,r7,ror#2 @ E+=K_xx_xx - ldr r12,[r14,#2*4] - eor r9,r9,r10 - eor r11,r11,r12 @ 1 cycle stall - eor r10,r5,r6 @ F_xx_xx - mov r9,r9,ror#31 - add r7,r7,r3,ror#27 @ E+=ROR(A,27) - eor r9,r9,r11,ror#31 - str r9,[r14,#-4]! - eor r10,r4,r10,ror#2 @ F_xx_xx - @ F_xx_xx - add r7,r7,r9 @ E+=X[i] - add r7,r7,r10 @ E+=F_20_39(B,C,D) - ldr r9,[r14,#15*4] - ldr r10,[r14,#13*4] - ldr r11,[r14,#7*4] - add r6,r8,r6,ror#2 @ E+=K_xx_xx - ldr r12,[r14,#2*4] - eor r9,r9,r10 - eor r11,r11,r12 @ 1 cycle stall - eor r10,r4,r5 @ F_xx_xx - mov r9,r9,ror#31 - add r6,r6,r7,ror#27 @ E+=ROR(A,27) - eor r9,r9,r11,ror#31 - str r9,[r14,#-4]! - eor r10,r3,r10,ror#2 @ F_xx_xx - @ F_xx_xx - add r6,r6,r9 @ E+=X[i] - add r6,r6,r10 @ E+=F_20_39(B,C,D) - ldr r9,[r14,#15*4] - ldr r10,[r14,#13*4] - ldr r11,[r14,#7*4] - add r5,r8,r5,ror#2 @ E+=K_xx_xx - ldr r12,[r14,#2*4] - eor r9,r9,r10 - eor r11,r11,r12 @ 1 cycle stall - eor r10,r3,r4 @ F_xx_xx - mov r9,r9,ror#31 - add r5,r5,r6,ror#27 @ E+=ROR(A,27) - eor r9,r9,r11,ror#31 - str r9,[r14,#-4]! - eor r10,r7,r10,ror#2 @ F_xx_xx - @ F_xx_xx - add r5,r5,r9 @ E+=X[i] - add r5,r5,r10 @ E+=F_20_39(B,C,D) - ldr r9,[r14,#15*4] - ldr r10,[r14,#13*4] - ldr r11,[r14,#7*4] - add r4,r8,r4,ror#2 @ E+=K_xx_xx - ldr r12,[r14,#2*4] - eor r9,r9,r10 - eor r11,r11,r12 @ 1 cycle stall - eor r10,r7,r3 @ F_xx_xx - mov r9,r9,ror#31 - add r4,r4,r5,ror#27 @ E+=ROR(A,27) - eor r9,r9,r11,ror#31 - str r9,[r14,#-4]! - eor r10,r6,r10,ror#2 @ F_xx_xx - @ F_xx_xx - add r4,r4,r9 @ E+=X[i] - add r4,r4,r10 @ E+=F_20_39(B,C,D) - ldr r9,[r14,#15*4] - ldr r10,[r14,#13*4] - ldr r11,[r14,#7*4] - add r3,r8,r3,ror#2 @ E+=K_xx_xx - ldr r12,[r14,#2*4] - eor r9,r9,r10 - eor r11,r11,r12 @ 1 cycle stall - eor r10,r6,r7 @ F_xx_xx - mov r9,r9,ror#31 - add r3,r3,r4,ror#27 @ E+=ROR(A,27) - eor r9,r9,r11,ror#31 - str r9,[r14,#-4]! - eor r10,r5,r10,ror#2 @ F_xx_xx - @ F_xx_xx - add r3,r3,r9 @ E+=X[i] - add r3,r3,r10 @ E+=F_20_39(B,C,D) - teq r14,sp @ preserve carry - bne .L_20_39_or_60_79 @ [+((12+3)*5+2)*4] - bcs .L_done @ [+((12+3)*5+2)*4], spare 300 bytes - - ldr r8,.LK_40_59 - sub sp,sp,#20*4 @ [+2] -.L_40_59: - ldr r9,[r14,#15*4] - ldr r10,[r14,#13*4] - ldr r11,[r14,#7*4] - add r7,r8,r7,ror#2 @ E+=K_xx_xx - ldr r12,[r14,#2*4] - eor r9,r9,r10 - eor r11,r11,r12 @ 1 cycle stall - eor r10,r5,r6 @ F_xx_xx - mov r9,r9,ror#31 - add r7,r7,r3,ror#27 @ E+=ROR(A,27) - eor r9,r9,r11,ror#31 - str r9,[r14,#-4]! - and r10,r4,r10,ror#2 @ F_xx_xx - and r11,r5,r6 @ F_xx_xx - add r7,r7,r9 @ E+=X[i] - add r7,r7,r10 @ E+=F_40_59(B,C,D) - add r7,r7,r11,ror#2 - ldr r9,[r14,#15*4] - ldr r10,[r14,#13*4] - ldr r11,[r14,#7*4] - add r6,r8,r6,ror#2 @ E+=K_xx_xx - ldr r12,[r14,#2*4] - eor r9,r9,r10 - eor r11,r11,r12 @ 1 cycle stall - eor r10,r4,r5 @ F_xx_xx - mov r9,r9,ror#31 - add r6,r6,r7,ror#27 @ E+=ROR(A,27) - eor r9,r9,r11,ror#31 - str r9,[r14,#-4]! - and r10,r3,r10,ror#2 @ F_xx_xx - and r11,r4,r5 @ F_xx_xx - add r6,r6,r9 @ E+=X[i] - add r6,r6,r10 @ E+=F_40_59(B,C,D) - add r6,r6,r11,ror#2 - ldr r9,[r14,#15*4] - ldr r10,[r14,#13*4] - ldr r11,[r14,#7*4] - add r5,r8,r5,ror#2 @ E+=K_xx_xx - ldr r12,[r14,#2*4] - eor r9,r9,r10 - eor r11,r11,r12 @ 1 cycle stall - eor r10,r3,r4 @ F_xx_xx - mov r9,r9,ror#31 - add r5,r5,r6,ror#27 @ E+=ROR(A,27) - eor r9,r9,r11,ror#31 - str r9,[r14,#-4]! - and r10,r7,r10,ror#2 @ F_xx_xx - and r11,r3,r4 @ F_xx_xx - add r5,r5,r9 @ E+=X[i] - add r5,r5,r10 @ E+=F_40_59(B,C,D) - add r5,r5,r11,ror#2 - ldr r9,[r14,#15*4] - ldr r10,[r14,#13*4] - ldr r11,[r14,#7*4] - add r4,r8,r4,ror#2 @ E+=K_xx_xx - ldr r12,[r14,#2*4] - eor r9,r9,r10 - eor r11,r11,r12 @ 1 cycle stall - eor r10,r7,r3 @ F_xx_xx - mov r9,r9,ror#31 - add r4,r4,r5,ror#27 @ E+=ROR(A,27) - eor r9,r9,r11,ror#31 - str r9,[r14,#-4]! - and r10,r6,r10,ror#2 @ F_xx_xx - and r11,r7,r3 @ F_xx_xx - add r4,r4,r9 @ E+=X[i] - add r4,r4,r10 @ E+=F_40_59(B,C,D) - add r4,r4,r11,ror#2 - ldr r9,[r14,#15*4] - ldr r10,[r14,#13*4] - ldr r11,[r14,#7*4] - add r3,r8,r3,ror#2 @ E+=K_xx_xx - ldr r12,[r14,#2*4] - eor r9,r9,r10 - eor r11,r11,r12 @ 1 cycle stall - eor r10,r6,r7 @ F_xx_xx - mov r9,r9,ror#31 - add r3,r3,r4,ror#27 @ E+=ROR(A,27) - eor r9,r9,r11,ror#31 - str r9,[r14,#-4]! - and r10,r5,r10,ror#2 @ F_xx_xx - and r11,r6,r7 @ F_xx_xx - add r3,r3,r9 @ E+=X[i] - add r3,r3,r10 @ E+=F_40_59(B,C,D) - add r3,r3,r11,ror#2 - teq r14,sp - bne .L_40_59 @ [+((12+5)*5+2)*4] - - ldr r8,.LK_60_79 - sub sp,sp,#20*4 - cmp sp,#0 @ set carry to denote 60_79 - b .L_20_39_or_60_79 @ [+4], spare 300 bytes -.L_done: - add sp,sp,#80*4 @ "deallocate" stack frame - ldmia r0,{r8,r9,r10,r11,r12} - add r3,r8,r3 - add r4,r9,r4 - add r5,r10,r5,ror#2 - add r6,r11,r6,ror#2 - add r7,r12,r7,ror#2 - stmia r0,{r3,r4,r5,r6,r7} - teq r1,r2 - bne .Lloop @ [+18], total 1307 - -#if __ARM_ARCH__>=5 - ldmia sp!,{r4-r12,pc} -#else - ldmia sp!,{r4-r12,lr} - tst lr,#1 - moveq pc,lr @ be binary compatible with V4, yet - .word 0xe12fff1e @ interoperable with Thumb ISA:-) -#endif -.align 2 -.LK_00_19: .word 0x5a827999 -.LK_20_39: .word 0x6ed9eba1 -.LK_40_59: .word 0x8f1bbcdc -.LK_60_79: .word 0xca62c1d6 -.size sha1_block_data_order,.-sha1_block_data_order -.asciz "SHA1 block transform for ARMv4, CRYPTOGAMS by " -.align 2 diff --git a/deps/openssl/asm/arm-elf-gas/sha/sha256-armv4.S b/deps/openssl/asm/arm-elf-gas/sha/sha256-armv4.S deleted file mode 100644 index 9c20a63cac30de..00000000000000 --- a/deps/openssl/asm/arm-elf-gas/sha/sha256-armv4.S +++ /dev/null @@ -1,1517 +0,0 @@ -#include "arm_arch.h" - -.text -.code 32 - -.type K256,%object -.align 5 -K256: -.word 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 -.word 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 -.word 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 -.word 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 -.word 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc -.word 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da -.word 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 -.word 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967 -.word 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 -.word 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 -.word 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 -.word 0xd192e819,0xd6990624,0xf40e3585,0x106aa070 -.word 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 -.word 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 -.word 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 -.word 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 -.size K256,.-K256 - -.global sha256_block_data_order -.type sha256_block_data_order,%function -sha256_block_data_order: - sub r3,pc,#8 @ sha256_block_data_order - add r2,r1,r2,lsl#6 @ len to point at the end of inp - stmdb sp!,{r0,r1,r2,r4-r11,lr} - ldmia r0,{r4,r5,r6,r7,r8,r9,r10,r11} - sub r14,r3,#256 @ K256 - sub sp,sp,#16*4 @ alloca(X[16]) -.Loop: -#if __ARM_ARCH__>=7 - ldr r3,[r1],#4 -#else - ldrb r3,[r1,#3] @ 0 - ldrb r12,[r1,#2] - ldrb r2,[r1,#1] - ldrb r0,[r1],#4 - orr r3,r3,r12,lsl#8 - orr r3,r3,r2,lsl#16 - orr r3,r3,r0,lsl#24 -#endif - mov r0,r8,ror#6 - ldr r12,[r14],#4 @ *K256++ - eor r0,r0,r8,ror#11 - eor r2,r9,r10 -#if 0>=16 - add r3,r3,r1 @ from BODY_16_xx -#elif __ARM_ARCH__>=7 && defined(__ARMEL__) - rev r3,r3 -#endif -#if 0==15 - str r1,[sp,#17*4] @ leave room for r1 -#endif - eor r0,r0,r8,ror#25 @ Sigma1(e) - and r2,r2,r8 - str r3,[sp,#0*4] - add r3,r3,r0 - eor r2,r2,r10 @ Ch(e,f,g) - add r3,r3,r11 - mov r11,r4,ror#2 - add r3,r3,r2 - eor r11,r11,r4,ror#13 - add r3,r3,r12 - eor r11,r11,r4,ror#22 @ Sigma0(a) -#if 0>=15 - ldr r1,[sp,#2*4] @ from BODY_16_xx -#endif - orr r0,r4,r5 - and r2,r4,r5 - and r0,r0,r6 - add r11,r11,r3 - orr r0,r0,r2 @ Maj(a,b,c) - add r7,r7,r3 - add r11,r11,r0 -#if __ARM_ARCH__>=7 - ldr r3,[r1],#4 -#else - ldrb r3,[r1,#3] @ 1 - ldrb r12,[r1,#2] - ldrb r2,[r1,#1] - ldrb r0,[r1],#4 - orr r3,r3,r12,lsl#8 - orr r3,r3,r2,lsl#16 - orr r3,r3,r0,lsl#24 -#endif - mov r0,r7,ror#6 - ldr r12,[r14],#4 @ *K256++ - eor r0,r0,r7,ror#11 - eor r2,r8,r9 -#if 1>=16 - add r3,r3,r1 @ from BODY_16_xx -#elif __ARM_ARCH__>=7 && defined(__ARMEL__) - rev r3,r3 -#endif -#if 1==15 - str r1,[sp,#17*4] @ leave room for r1 -#endif - eor r0,r0,r7,ror#25 @ Sigma1(e) - and r2,r2,r7 - str r3,[sp,#1*4] - add r3,r3,r0 - eor r2,r2,r9 @ Ch(e,f,g) - add r3,r3,r10 - mov r10,r11,ror#2 - add r3,r3,r2 - eor r10,r10,r11,ror#13 - add r3,r3,r12 - eor r10,r10,r11,ror#22 @ Sigma0(a) -#if 1>=15 - ldr r1,[sp,#3*4] @ from BODY_16_xx -#endif - orr r0,r11,r4 - and r2,r11,r4 - and r0,r0,r5 - add r10,r10,r3 - orr r0,r0,r2 @ Maj(a,b,c) - add r6,r6,r3 - add r10,r10,r0 -#if __ARM_ARCH__>=7 - ldr r3,[r1],#4 -#else - ldrb r3,[r1,#3] @ 2 - ldrb r12,[r1,#2] - ldrb r2,[r1,#1] - ldrb r0,[r1],#4 - orr r3,r3,r12,lsl#8 - orr r3,r3,r2,lsl#16 - orr r3,r3,r0,lsl#24 -#endif - mov r0,r6,ror#6 - ldr r12,[r14],#4 @ *K256++ - eor r0,r0,r6,ror#11 - eor r2,r7,r8 -#if 2>=16 - add r3,r3,r1 @ from BODY_16_xx -#elif __ARM_ARCH__>=7 && defined(__ARMEL__) - rev r3,r3 -#endif -#if 2==15 - str r1,[sp,#17*4] @ leave room for r1 -#endif - eor r0,r0,r6,ror#25 @ Sigma1(e) - and r2,r2,r6 - str r3,[sp,#2*4] - add r3,r3,r0 - eor r2,r2,r8 @ Ch(e,f,g) - add r3,r3,r9 - mov r9,r10,ror#2 - add r3,r3,r2 - eor r9,r9,r10,ror#13 - add r3,r3,r12 - eor r9,r9,r10,ror#22 @ Sigma0(a) -#if 2>=15 - ldr r1,[sp,#4*4] @ from BODY_16_xx -#endif - orr r0,r10,r11 - and r2,r10,r11 - and r0,r0,r4 - add r9,r9,r3 - orr r0,r0,r2 @ Maj(a,b,c) - add r5,r5,r3 - add r9,r9,r0 -#if __ARM_ARCH__>=7 - ldr r3,[r1],#4 -#else - ldrb r3,[r1,#3] @ 3 - ldrb r12,[r1,#2] - ldrb r2,[r1,#1] - ldrb r0,[r1],#4 - orr r3,r3,r12,lsl#8 - orr r3,r3,r2,lsl#16 - orr r3,r3,r0,lsl#24 -#endif - mov r0,r5,ror#6 - ldr r12,[r14],#4 @ *K256++ - eor r0,r0,r5,ror#11 - eor r2,r6,r7 -#if 3>=16 - add r3,r3,r1 @ from BODY_16_xx -#elif __ARM_ARCH__>=7 && defined(__ARMEL__) - rev r3,r3 -#endif -#if 3==15 - str r1,[sp,#17*4] @ leave room for r1 -#endif - eor r0,r0,r5,ror#25 @ Sigma1(e) - and r2,r2,r5 - str r3,[sp,#3*4] - add r3,r3,r0 - eor r2,r2,r7 @ Ch(e,f,g) - add r3,r3,r8 - mov r8,r9,ror#2 - add r3,r3,r2 - eor r8,r8,r9,ror#13 - add r3,r3,r12 - eor r8,r8,r9,ror#22 @ Sigma0(a) -#if 3>=15 - ldr r1,[sp,#5*4] @ from BODY_16_xx -#endif - orr r0,r9,r10 - and r2,r9,r10 - and r0,r0,r11 - add r8,r8,r3 - orr r0,r0,r2 @ Maj(a,b,c) - add r4,r4,r3 - add r8,r8,r0 -#if __ARM_ARCH__>=7 - ldr r3,[r1],#4 -#else - ldrb r3,[r1,#3] @ 4 - ldrb r12,[r1,#2] - ldrb r2,[r1,#1] - ldrb r0,[r1],#4 - orr r3,r3,r12,lsl#8 - orr r3,r3,r2,lsl#16 - orr r3,r3,r0,lsl#24 -#endif - mov r0,r4,ror#6 - ldr r12,[r14],#4 @ *K256++ - eor r0,r0,r4,ror#11 - eor r2,r5,r6 -#if 4>=16 - add r3,r3,r1 @ from BODY_16_xx -#elif __ARM_ARCH__>=7 && defined(__ARMEL__) - rev r3,r3 -#endif -#if 4==15 - str r1,[sp,#17*4] @ leave room for r1 -#endif - eor r0,r0,r4,ror#25 @ Sigma1(e) - and r2,r2,r4 - str r3,[sp,#4*4] - add r3,r3,r0 - eor r2,r2,r6 @ Ch(e,f,g) - add r3,r3,r7 - mov r7,r8,ror#2 - add r3,r3,r2 - eor r7,r7,r8,ror#13 - add r3,r3,r12 - eor r7,r7,r8,ror#22 @ Sigma0(a) -#if 4>=15 - ldr r1,[sp,#6*4] @ from BODY_16_xx -#endif - orr r0,r8,r9 - and r2,r8,r9 - and r0,r0,r10 - add r7,r7,r3 - orr r0,r0,r2 @ Maj(a,b,c) - add r11,r11,r3 - add r7,r7,r0 -#if __ARM_ARCH__>=7 - ldr r3,[r1],#4 -#else - ldrb r3,[r1,#3] @ 5 - ldrb r12,[r1,#2] - ldrb r2,[r1,#1] - ldrb r0,[r1],#4 - orr r3,r3,r12,lsl#8 - orr r3,r3,r2,lsl#16 - orr r3,r3,r0,lsl#24 -#endif - mov r0,r11,ror#6 - ldr r12,[r14],#4 @ *K256++ - eor r0,r0,r11,ror#11 - eor r2,r4,r5 -#if 5>=16 - add r3,r3,r1 @ from BODY_16_xx -#elif __ARM_ARCH__>=7 && defined(__ARMEL__) - rev r3,r3 -#endif -#if 5==15 - str r1,[sp,#17*4] @ leave room for r1 -#endif - eor r0,r0,r11,ror#25 @ Sigma1(e) - and r2,r2,r11 - str r3,[sp,#5*4] - add r3,r3,r0 - eor r2,r2,r5 @ Ch(e,f,g) - add r3,r3,r6 - mov r6,r7,ror#2 - add r3,r3,r2 - eor r6,r6,r7,ror#13 - add r3,r3,r12 - eor r6,r6,r7,ror#22 @ Sigma0(a) -#if 5>=15 - ldr r1,[sp,#7*4] @ from BODY_16_xx -#endif - orr r0,r7,r8 - and r2,r7,r8 - and r0,r0,r9 - add r6,r6,r3 - orr r0,r0,r2 @ Maj(a,b,c) - add r10,r10,r3 - add r6,r6,r0 -#if __ARM_ARCH__>=7 - ldr r3,[r1],#4 -#else - ldrb r3,[r1,#3] @ 6 - ldrb r12,[r1,#2] - ldrb r2,[r1,#1] - ldrb r0,[r1],#4 - orr r3,r3,r12,lsl#8 - orr r3,r3,r2,lsl#16 - orr r3,r3,r0,lsl#24 -#endif - mov r0,r10,ror#6 - ldr r12,[r14],#4 @ *K256++ - eor r0,r0,r10,ror#11 - eor r2,r11,r4 -#if 6>=16 - add r3,r3,r1 @ from BODY_16_xx -#elif __ARM_ARCH__>=7 && defined(__ARMEL__) - rev r3,r3 -#endif -#if 6==15 - str r1,[sp,#17*4] @ leave room for r1 -#endif - eor r0,r0,r10,ror#25 @ Sigma1(e) - and r2,r2,r10 - str r3,[sp,#6*4] - add r3,r3,r0 - eor r2,r2,r4 @ Ch(e,f,g) - add r3,r3,r5 - mov r5,r6,ror#2 - add r3,r3,r2 - eor r5,r5,r6,ror#13 - add r3,r3,r12 - eor r5,r5,r6,ror#22 @ Sigma0(a) -#if 6>=15 - ldr r1,[sp,#8*4] @ from BODY_16_xx -#endif - orr r0,r6,r7 - and r2,r6,r7 - and r0,r0,r8 - add r5,r5,r3 - orr r0,r0,r2 @ Maj(a,b,c) - add r9,r9,r3 - add r5,r5,r0 -#if __ARM_ARCH__>=7 - ldr r3,[r1],#4 -#else - ldrb r3,[r1,#3] @ 7 - ldrb r12,[r1,#2] - ldrb r2,[r1,#1] - ldrb r0,[r1],#4 - orr r3,r3,r12,lsl#8 - orr r3,r3,r2,lsl#16 - orr r3,r3,r0,lsl#24 -#endif - mov r0,r9,ror#6 - ldr r12,[r14],#4 @ *K256++ - eor r0,r0,r9,ror#11 - eor r2,r10,r11 -#if 7>=16 - add r3,r3,r1 @ from BODY_16_xx -#elif __ARM_ARCH__>=7 && defined(__ARMEL__) - rev r3,r3 -#endif -#if 7==15 - str r1,[sp,#17*4] @ leave room for r1 -#endif - eor r0,r0,r9,ror#25 @ Sigma1(e) - and r2,r2,r9 - str r3,[sp,#7*4] - add r3,r3,r0 - eor r2,r2,r11 @ Ch(e,f,g) - add r3,r3,r4 - mov r4,r5,ror#2 - add r3,r3,r2 - eor r4,r4,r5,ror#13 - add r3,r3,r12 - eor r4,r4,r5,ror#22 @ Sigma0(a) -#if 7>=15 - ldr r1,[sp,#9*4] @ from BODY_16_xx -#endif - orr r0,r5,r6 - and r2,r5,r6 - and r0,r0,r7 - add r4,r4,r3 - orr r0,r0,r2 @ Maj(a,b,c) - add r8,r8,r3 - add r4,r4,r0 -#if __ARM_ARCH__>=7 - ldr r3,[r1],#4 -#else - ldrb r3,[r1,#3] @ 8 - ldrb r12,[r1,#2] - ldrb r2,[r1,#1] - ldrb r0,[r1],#4 - orr r3,r3,r12,lsl#8 - orr r3,r3,r2,lsl#16 - orr r3,r3,r0,lsl#24 -#endif - mov r0,r8,ror#6 - ldr r12,[r14],#4 @ *K256++ - eor r0,r0,r8,ror#11 - eor r2,r9,r10 -#if 8>=16 - add r3,r3,r1 @ from BODY_16_xx -#elif __ARM_ARCH__>=7 && defined(__ARMEL__) - rev r3,r3 -#endif -#if 8==15 - str r1,[sp,#17*4] @ leave room for r1 -#endif - eor r0,r0,r8,ror#25 @ Sigma1(e) - and r2,r2,r8 - str r3,[sp,#8*4] - add r3,r3,r0 - eor r2,r2,r10 @ Ch(e,f,g) - add r3,r3,r11 - mov r11,r4,ror#2 - add r3,r3,r2 - eor r11,r11,r4,ror#13 - add r3,r3,r12 - eor r11,r11,r4,ror#22 @ Sigma0(a) -#if 8>=15 - ldr r1,[sp,#10*4] @ from BODY_16_xx -#endif - orr r0,r4,r5 - and r2,r4,r5 - and r0,r0,r6 - add r11,r11,r3 - orr r0,r0,r2 @ Maj(a,b,c) - add r7,r7,r3 - add r11,r11,r0 -#if __ARM_ARCH__>=7 - ldr r3,[r1],#4 -#else - ldrb r3,[r1,#3] @ 9 - ldrb r12,[r1,#2] - ldrb r2,[r1,#1] - ldrb r0,[r1],#4 - orr r3,r3,r12,lsl#8 - orr r3,r3,r2,lsl#16 - orr r3,r3,r0,lsl#24 -#endif - mov r0,r7,ror#6 - ldr r12,[r14],#4 @ *K256++ - eor r0,r0,r7,ror#11 - eor r2,r8,r9 -#if 9>=16 - add r3,r3,r1 @ from BODY_16_xx -#elif __ARM_ARCH__>=7 && defined(__ARMEL__) - rev r3,r3 -#endif -#if 9==15 - str r1,[sp,#17*4] @ leave room for r1 -#endif - eor r0,r0,r7,ror#25 @ Sigma1(e) - and r2,r2,r7 - str r3,[sp,#9*4] - add r3,r3,r0 - eor r2,r2,r9 @ Ch(e,f,g) - add r3,r3,r10 - mov r10,r11,ror#2 - add r3,r3,r2 - eor r10,r10,r11,ror#13 - add r3,r3,r12 - eor r10,r10,r11,ror#22 @ Sigma0(a) -#if 9>=15 - ldr r1,[sp,#11*4] @ from BODY_16_xx -#endif - orr r0,r11,r4 - and r2,r11,r4 - and r0,r0,r5 - add r10,r10,r3 - orr r0,r0,r2 @ Maj(a,b,c) - add r6,r6,r3 - add r10,r10,r0 -#if __ARM_ARCH__>=7 - ldr r3,[r1],#4 -#else - ldrb r3,[r1,#3] @ 10 - ldrb r12,[r1,#2] - ldrb r2,[r1,#1] - ldrb r0,[r1],#4 - orr r3,r3,r12,lsl#8 - orr r3,r3,r2,lsl#16 - orr r3,r3,r0,lsl#24 -#endif - mov r0,r6,ror#6 - ldr r12,[r14],#4 @ *K256++ - eor r0,r0,r6,ror#11 - eor r2,r7,r8 -#if 10>=16 - add r3,r3,r1 @ from BODY_16_xx -#elif __ARM_ARCH__>=7 && defined(__ARMEL__) - rev r3,r3 -#endif -#if 10==15 - str r1,[sp,#17*4] @ leave room for r1 -#endif - eor r0,r0,r6,ror#25 @ Sigma1(e) - and r2,r2,r6 - str r3,[sp,#10*4] - add r3,r3,r0 - eor r2,r2,r8 @ Ch(e,f,g) - add r3,r3,r9 - mov r9,r10,ror#2 - add r3,r3,r2 - eor r9,r9,r10,ror#13 - add r3,r3,r12 - eor r9,r9,r10,ror#22 @ Sigma0(a) -#if 10>=15 - ldr r1,[sp,#12*4] @ from BODY_16_xx -#endif - orr r0,r10,r11 - and r2,r10,r11 - and r0,r0,r4 - add r9,r9,r3 - orr r0,r0,r2 @ Maj(a,b,c) - add r5,r5,r3 - add r9,r9,r0 -#if __ARM_ARCH__>=7 - ldr r3,[r1],#4 -#else - ldrb r3,[r1,#3] @ 11 - ldrb r12,[r1,#2] - ldrb r2,[r1,#1] - ldrb r0,[r1],#4 - orr r3,r3,r12,lsl#8 - orr r3,r3,r2,lsl#16 - orr r3,r3,r0,lsl#24 -#endif - mov r0,r5,ror#6 - ldr r12,[r14],#4 @ *K256++ - eor r0,r0,r5,ror#11 - eor r2,r6,r7 -#if 11>=16 - add r3,r3,r1 @ from BODY_16_xx -#elif __ARM_ARCH__>=7 && defined(__ARMEL__) - rev r3,r3 -#endif -#if 11==15 - str r1,[sp,#17*4] @ leave room for r1 -#endif - eor r0,r0,r5,ror#25 @ Sigma1(e) - and r2,r2,r5 - str r3,[sp,#11*4] - add r3,r3,r0 - eor r2,r2,r7 @ Ch(e,f,g) - add r3,r3,r8 - mov r8,r9,ror#2 - add r3,r3,r2 - eor r8,r8,r9,ror#13 - add r3,r3,r12 - eor r8,r8,r9,ror#22 @ Sigma0(a) -#if 11>=15 - ldr r1,[sp,#13*4] @ from BODY_16_xx -#endif - orr r0,r9,r10 - and r2,r9,r10 - and r0,r0,r11 - add r8,r8,r3 - orr r0,r0,r2 @ Maj(a,b,c) - add r4,r4,r3 - add r8,r8,r0 -#if __ARM_ARCH__>=7 - ldr r3,[r1],#4 -#else - ldrb r3,[r1,#3] @ 12 - ldrb r12,[r1,#2] - ldrb r2,[r1,#1] - ldrb r0,[r1],#4 - orr r3,r3,r12,lsl#8 - orr r3,r3,r2,lsl#16 - orr r3,r3,r0,lsl#24 -#endif - mov r0,r4,ror#6 - ldr r12,[r14],#4 @ *K256++ - eor r0,r0,r4,ror#11 - eor r2,r5,r6 -#if 12>=16 - add r3,r3,r1 @ from BODY_16_xx -#elif __ARM_ARCH__>=7 && defined(__ARMEL__) - rev r3,r3 -#endif -#if 12==15 - str r1,[sp,#17*4] @ leave room for r1 -#endif - eor r0,r0,r4,ror#25 @ Sigma1(e) - and r2,r2,r4 - str r3,[sp,#12*4] - add r3,r3,r0 - eor r2,r2,r6 @ Ch(e,f,g) - add r3,r3,r7 - mov r7,r8,ror#2 - add r3,r3,r2 - eor r7,r7,r8,ror#13 - add r3,r3,r12 - eor r7,r7,r8,ror#22 @ Sigma0(a) -#if 12>=15 - ldr r1,[sp,#14*4] @ from BODY_16_xx -#endif - orr r0,r8,r9 - and r2,r8,r9 - and r0,r0,r10 - add r7,r7,r3 - orr r0,r0,r2 @ Maj(a,b,c) - add r11,r11,r3 - add r7,r7,r0 -#if __ARM_ARCH__>=7 - ldr r3,[r1],#4 -#else - ldrb r3,[r1,#3] @ 13 - ldrb r12,[r1,#2] - ldrb r2,[r1,#1] - ldrb r0,[r1],#4 - orr r3,r3,r12,lsl#8 - orr r3,r3,r2,lsl#16 - orr r3,r3,r0,lsl#24 -#endif - mov r0,r11,ror#6 - ldr r12,[r14],#4 @ *K256++ - eor r0,r0,r11,ror#11 - eor r2,r4,r5 -#if 13>=16 - add r3,r3,r1 @ from BODY_16_xx -#elif __ARM_ARCH__>=7 && defined(__ARMEL__) - rev r3,r3 -#endif -#if 13==15 - str r1,[sp,#17*4] @ leave room for r1 -#endif - eor r0,r0,r11,ror#25 @ Sigma1(e) - and r2,r2,r11 - str r3,[sp,#13*4] - add r3,r3,r0 - eor r2,r2,r5 @ Ch(e,f,g) - add r3,r3,r6 - mov r6,r7,ror#2 - add r3,r3,r2 - eor r6,r6,r7,ror#13 - add r3,r3,r12 - eor r6,r6,r7,ror#22 @ Sigma0(a) -#if 13>=15 - ldr r1,[sp,#15*4] @ from BODY_16_xx -#endif - orr r0,r7,r8 - and r2,r7,r8 - and r0,r0,r9 - add r6,r6,r3 - orr r0,r0,r2 @ Maj(a,b,c) - add r10,r10,r3 - add r6,r6,r0 -#if __ARM_ARCH__>=7 - ldr r3,[r1],#4 -#else - ldrb r3,[r1,#3] @ 14 - ldrb r12,[r1,#2] - ldrb r2,[r1,#1] - ldrb r0,[r1],#4 - orr r3,r3,r12,lsl#8 - orr r3,r3,r2,lsl#16 - orr r3,r3,r0,lsl#24 -#endif - mov r0,r10,ror#6 - ldr r12,[r14],#4 @ *K256++ - eor r0,r0,r10,ror#11 - eor r2,r11,r4 -#if 14>=16 - add r3,r3,r1 @ from BODY_16_xx -#elif __ARM_ARCH__>=7 && defined(__ARMEL__) - rev r3,r3 -#endif -#if 14==15 - str r1,[sp,#17*4] @ leave room for r1 -#endif - eor r0,r0,r10,ror#25 @ Sigma1(e) - and r2,r2,r10 - str r3,[sp,#14*4] - add r3,r3,r0 - eor r2,r2,r4 @ Ch(e,f,g) - add r3,r3,r5 - mov r5,r6,ror#2 - add r3,r3,r2 - eor r5,r5,r6,ror#13 - add r3,r3,r12 - eor r5,r5,r6,ror#22 @ Sigma0(a) -#if 14>=15 - ldr r1,[sp,#0*4] @ from BODY_16_xx -#endif - orr r0,r6,r7 - and r2,r6,r7 - and r0,r0,r8 - add r5,r5,r3 - orr r0,r0,r2 @ Maj(a,b,c) - add r9,r9,r3 - add r5,r5,r0 -#if __ARM_ARCH__>=7 - ldr r3,[r1],#4 -#else - ldrb r3,[r1,#3] @ 15 - ldrb r12,[r1,#2] - ldrb r2,[r1,#1] - ldrb r0,[r1],#4 - orr r3,r3,r12,lsl#8 - orr r3,r3,r2,lsl#16 - orr r3,r3,r0,lsl#24 -#endif - mov r0,r9,ror#6 - ldr r12,[r14],#4 @ *K256++ - eor r0,r0,r9,ror#11 - eor r2,r10,r11 -#if 15>=16 - add r3,r3,r1 @ from BODY_16_xx -#elif __ARM_ARCH__>=7 && defined(__ARMEL__) - rev r3,r3 -#endif -#if 15==15 - str r1,[sp,#17*4] @ leave room for r1 -#endif - eor r0,r0,r9,ror#25 @ Sigma1(e) - and r2,r2,r9 - str r3,[sp,#15*4] - add r3,r3,r0 - eor r2,r2,r11 @ Ch(e,f,g) - add r3,r3,r4 - mov r4,r5,ror#2 - add r3,r3,r2 - eor r4,r4,r5,ror#13 - add r3,r3,r12 - eor r4,r4,r5,ror#22 @ Sigma0(a) -#if 15>=15 - ldr r1,[sp,#1*4] @ from BODY_16_xx -#endif - orr r0,r5,r6 - and r2,r5,r6 - and r0,r0,r7 - add r4,r4,r3 - orr r0,r0,r2 @ Maj(a,b,c) - add r8,r8,r3 - add r4,r4,r0 -.Lrounds_16_xx: - @ ldr r1,[sp,#1*4] @ 16 - ldr r12,[sp,#14*4] - mov r0,r1,ror#7 - ldr r3,[sp,#0*4] - eor r0,r0,r1,ror#18 - ldr r2,[sp,#9*4] - eor r0,r0,r1,lsr#3 @ sigma0(X[i+1]) - mov r1,r12,ror#17 - add r3,r3,r0 - eor r1,r1,r12,ror#19 - add r3,r3,r2 - eor r1,r1,r12,lsr#10 @ sigma1(X[i+14]) - @ add r3,r3,r1 - mov r0,r8,ror#6 - ldr r12,[r14],#4 @ *K256++ - eor r0,r0,r8,ror#11 - eor r2,r9,r10 -#if 16>=16 - add r3,r3,r1 @ from BODY_16_xx -#elif __ARM_ARCH__>=7 && defined(__ARMEL__) - rev r3,r3 -#endif -#if 16==15 - str r1,[sp,#17*4] @ leave room for r1 -#endif - eor r0,r0,r8,ror#25 @ Sigma1(e) - and r2,r2,r8 - str r3,[sp,#0*4] - add r3,r3,r0 - eor r2,r2,r10 @ Ch(e,f,g) - add r3,r3,r11 - mov r11,r4,ror#2 - add r3,r3,r2 - eor r11,r11,r4,ror#13 - add r3,r3,r12 - eor r11,r11,r4,ror#22 @ Sigma0(a) -#if 16>=15 - ldr r1,[sp,#2*4] @ from BODY_16_xx -#endif - orr r0,r4,r5 - and r2,r4,r5 - and r0,r0,r6 - add r11,r11,r3 - orr r0,r0,r2 @ Maj(a,b,c) - add r7,r7,r3 - add r11,r11,r0 - @ ldr r1,[sp,#2*4] @ 17 - ldr r12,[sp,#15*4] - mov r0,r1,ror#7 - ldr r3,[sp,#1*4] - eor r0,r0,r1,ror#18 - ldr r2,[sp,#10*4] - eor r0,r0,r1,lsr#3 @ sigma0(X[i+1]) - mov r1,r12,ror#17 - add r3,r3,r0 - eor r1,r1,r12,ror#19 - add r3,r3,r2 - eor r1,r1,r12,lsr#10 @ sigma1(X[i+14]) - @ add r3,r3,r1 - mov r0,r7,ror#6 - ldr r12,[r14],#4 @ *K256++ - eor r0,r0,r7,ror#11 - eor r2,r8,r9 -#if 17>=16 - add r3,r3,r1 @ from BODY_16_xx -#elif __ARM_ARCH__>=7 && defined(__ARMEL__) - rev r3,r3 -#endif -#if 17==15 - str r1,[sp,#17*4] @ leave room for r1 -#endif - eor r0,r0,r7,ror#25 @ Sigma1(e) - and r2,r2,r7 - str r3,[sp,#1*4] - add r3,r3,r0 - eor r2,r2,r9 @ Ch(e,f,g) - add r3,r3,r10 - mov r10,r11,ror#2 - add r3,r3,r2 - eor r10,r10,r11,ror#13 - add r3,r3,r12 - eor r10,r10,r11,ror#22 @ Sigma0(a) -#if 17>=15 - ldr r1,[sp,#3*4] @ from BODY_16_xx -#endif - orr r0,r11,r4 - and r2,r11,r4 - and r0,r0,r5 - add r10,r10,r3 - orr r0,r0,r2 @ Maj(a,b,c) - add r6,r6,r3 - add r10,r10,r0 - @ ldr r1,[sp,#3*4] @ 18 - ldr r12,[sp,#0*4] - mov r0,r1,ror#7 - ldr r3,[sp,#2*4] - eor r0,r0,r1,ror#18 - ldr r2,[sp,#11*4] - eor r0,r0,r1,lsr#3 @ sigma0(X[i+1]) - mov r1,r12,ror#17 - add r3,r3,r0 - eor r1,r1,r12,ror#19 - add r3,r3,r2 - eor r1,r1,r12,lsr#10 @ sigma1(X[i+14]) - @ add r3,r3,r1 - mov r0,r6,ror#6 - ldr r12,[r14],#4 @ *K256++ - eor r0,r0,r6,ror#11 - eor r2,r7,r8 -#if 18>=16 - add r3,r3,r1 @ from BODY_16_xx -#elif __ARM_ARCH__>=7 && defined(__ARMEL__) - rev r3,r3 -#endif -#if 18==15 - str r1,[sp,#17*4] @ leave room for r1 -#endif - eor r0,r0,r6,ror#25 @ Sigma1(e) - and r2,r2,r6 - str r3,[sp,#2*4] - add r3,r3,r0 - eor r2,r2,r8 @ Ch(e,f,g) - add r3,r3,r9 - mov r9,r10,ror#2 - add r3,r3,r2 - eor r9,r9,r10,ror#13 - add r3,r3,r12 - eor r9,r9,r10,ror#22 @ Sigma0(a) -#if 18>=15 - ldr r1,[sp,#4*4] @ from BODY_16_xx -#endif - orr r0,r10,r11 - and r2,r10,r11 - and r0,r0,r4 - add r9,r9,r3 - orr r0,r0,r2 @ Maj(a,b,c) - add r5,r5,r3 - add r9,r9,r0 - @ ldr r1,[sp,#4*4] @ 19 - ldr r12,[sp,#1*4] - mov r0,r1,ror#7 - ldr r3,[sp,#3*4] - eor r0,r0,r1,ror#18 - ldr r2,[sp,#12*4] - eor r0,r0,r1,lsr#3 @ sigma0(X[i+1]) - mov r1,r12,ror#17 - add r3,r3,r0 - eor r1,r1,r12,ror#19 - add r3,r3,r2 - eor r1,r1,r12,lsr#10 @ sigma1(X[i+14]) - @ add r3,r3,r1 - mov r0,r5,ror#6 - ldr r12,[r14],#4 @ *K256++ - eor r0,r0,r5,ror#11 - eor r2,r6,r7 -#if 19>=16 - add r3,r3,r1 @ from BODY_16_xx -#elif __ARM_ARCH__>=7 && defined(__ARMEL__) - rev r3,r3 -#endif -#if 19==15 - str r1,[sp,#17*4] @ leave room for r1 -#endif - eor r0,r0,r5,ror#25 @ Sigma1(e) - and r2,r2,r5 - str r3,[sp,#3*4] - add r3,r3,r0 - eor r2,r2,r7 @ Ch(e,f,g) - add r3,r3,r8 - mov r8,r9,ror#2 - add r3,r3,r2 - eor r8,r8,r9,ror#13 - add r3,r3,r12 - eor r8,r8,r9,ror#22 @ Sigma0(a) -#if 19>=15 - ldr r1,[sp,#5*4] @ from BODY_16_xx -#endif - orr r0,r9,r10 - and r2,r9,r10 - and r0,r0,r11 - add r8,r8,r3 - orr r0,r0,r2 @ Maj(a,b,c) - add r4,r4,r3 - add r8,r8,r0 - @ ldr r1,[sp,#5*4] @ 20 - ldr r12,[sp,#2*4] - mov r0,r1,ror#7 - ldr r3,[sp,#4*4] - eor r0,r0,r1,ror#18 - ldr r2,[sp,#13*4] - eor r0,r0,r1,lsr#3 @ sigma0(X[i+1]) - mov r1,r12,ror#17 - add r3,r3,r0 - eor r1,r1,r12,ror#19 - add r3,r3,r2 - eor r1,r1,r12,lsr#10 @ sigma1(X[i+14]) - @ add r3,r3,r1 - mov r0,r4,ror#6 - ldr r12,[r14],#4 @ *K256++ - eor r0,r0,r4,ror#11 - eor r2,r5,r6 -#if 20>=16 - add r3,r3,r1 @ from BODY_16_xx -#elif __ARM_ARCH__>=7 && defined(__ARMEL__) - rev r3,r3 -#endif -#if 20==15 - str r1,[sp,#17*4] @ leave room for r1 -#endif - eor r0,r0,r4,ror#25 @ Sigma1(e) - and r2,r2,r4 - str r3,[sp,#4*4] - add r3,r3,r0 - eor r2,r2,r6 @ Ch(e,f,g) - add r3,r3,r7 - mov r7,r8,ror#2 - add r3,r3,r2 - eor r7,r7,r8,ror#13 - add r3,r3,r12 - eor r7,r7,r8,ror#22 @ Sigma0(a) -#if 20>=15 - ldr r1,[sp,#6*4] @ from BODY_16_xx -#endif - orr r0,r8,r9 - and r2,r8,r9 - and r0,r0,r10 - add r7,r7,r3 - orr r0,r0,r2 @ Maj(a,b,c) - add r11,r11,r3 - add r7,r7,r0 - @ ldr r1,[sp,#6*4] @ 21 - ldr r12,[sp,#3*4] - mov r0,r1,ror#7 - ldr r3,[sp,#5*4] - eor r0,r0,r1,ror#18 - ldr r2,[sp,#14*4] - eor r0,r0,r1,lsr#3 @ sigma0(X[i+1]) - mov r1,r12,ror#17 - add r3,r3,r0 - eor r1,r1,r12,ror#19 - add r3,r3,r2 - eor r1,r1,r12,lsr#10 @ sigma1(X[i+14]) - @ add r3,r3,r1 - mov r0,r11,ror#6 - ldr r12,[r14],#4 @ *K256++ - eor r0,r0,r11,ror#11 - eor r2,r4,r5 -#if 21>=16 - add r3,r3,r1 @ from BODY_16_xx -#elif __ARM_ARCH__>=7 && defined(__ARMEL__) - rev r3,r3 -#endif -#if 21==15 - str r1,[sp,#17*4] @ leave room for r1 -#endif - eor r0,r0,r11,ror#25 @ Sigma1(e) - and r2,r2,r11 - str r3,[sp,#5*4] - add r3,r3,r0 - eor r2,r2,r5 @ Ch(e,f,g) - add r3,r3,r6 - mov r6,r7,ror#2 - add r3,r3,r2 - eor r6,r6,r7,ror#13 - add r3,r3,r12 - eor r6,r6,r7,ror#22 @ Sigma0(a) -#if 21>=15 - ldr r1,[sp,#7*4] @ from BODY_16_xx -#endif - orr r0,r7,r8 - and r2,r7,r8 - and r0,r0,r9 - add r6,r6,r3 - orr r0,r0,r2 @ Maj(a,b,c) - add r10,r10,r3 - add r6,r6,r0 - @ ldr r1,[sp,#7*4] @ 22 - ldr r12,[sp,#4*4] - mov r0,r1,ror#7 - ldr r3,[sp,#6*4] - eor r0,r0,r1,ror#18 - ldr r2,[sp,#15*4] - eor r0,r0,r1,lsr#3 @ sigma0(X[i+1]) - mov r1,r12,ror#17 - add r3,r3,r0 - eor r1,r1,r12,ror#19 - add r3,r3,r2 - eor r1,r1,r12,lsr#10 @ sigma1(X[i+14]) - @ add r3,r3,r1 - mov r0,r10,ror#6 - ldr r12,[r14],#4 @ *K256++ - eor r0,r0,r10,ror#11 - eor r2,r11,r4 -#if 22>=16 - add r3,r3,r1 @ from BODY_16_xx -#elif __ARM_ARCH__>=7 && defined(__ARMEL__) - rev r3,r3 -#endif -#if 22==15 - str r1,[sp,#17*4] @ leave room for r1 -#endif - eor r0,r0,r10,ror#25 @ Sigma1(e) - and r2,r2,r10 - str r3,[sp,#6*4] - add r3,r3,r0 - eor r2,r2,r4 @ Ch(e,f,g) - add r3,r3,r5 - mov r5,r6,ror#2 - add r3,r3,r2 - eor r5,r5,r6,ror#13 - add r3,r3,r12 - eor r5,r5,r6,ror#22 @ Sigma0(a) -#if 22>=15 - ldr r1,[sp,#8*4] @ from BODY_16_xx -#endif - orr r0,r6,r7 - and r2,r6,r7 - and r0,r0,r8 - add r5,r5,r3 - orr r0,r0,r2 @ Maj(a,b,c) - add r9,r9,r3 - add r5,r5,r0 - @ ldr r1,[sp,#8*4] @ 23 - ldr r12,[sp,#5*4] - mov r0,r1,ror#7 - ldr r3,[sp,#7*4] - eor r0,r0,r1,ror#18 - ldr r2,[sp,#0*4] - eor r0,r0,r1,lsr#3 @ sigma0(X[i+1]) - mov r1,r12,ror#17 - add r3,r3,r0 - eor r1,r1,r12,ror#19 - add r3,r3,r2 - eor r1,r1,r12,lsr#10 @ sigma1(X[i+14]) - @ add r3,r3,r1 - mov r0,r9,ror#6 - ldr r12,[r14],#4 @ *K256++ - eor r0,r0,r9,ror#11 - eor r2,r10,r11 -#if 23>=16 - add r3,r3,r1 @ from BODY_16_xx -#elif __ARM_ARCH__>=7 && defined(__ARMEL__) - rev r3,r3 -#endif -#if 23==15 - str r1,[sp,#17*4] @ leave room for r1 -#endif - eor r0,r0,r9,ror#25 @ Sigma1(e) - and r2,r2,r9 - str r3,[sp,#7*4] - add r3,r3,r0 - eor r2,r2,r11 @ Ch(e,f,g) - add r3,r3,r4 - mov r4,r5,ror#2 - add r3,r3,r2 - eor r4,r4,r5,ror#13 - add r3,r3,r12 - eor r4,r4,r5,ror#22 @ Sigma0(a) -#if 23>=15 - ldr r1,[sp,#9*4] @ from BODY_16_xx -#endif - orr r0,r5,r6 - and r2,r5,r6 - and r0,r0,r7 - add r4,r4,r3 - orr r0,r0,r2 @ Maj(a,b,c) - add r8,r8,r3 - add r4,r4,r0 - @ ldr r1,[sp,#9*4] @ 24 - ldr r12,[sp,#6*4] - mov r0,r1,ror#7 - ldr r3,[sp,#8*4] - eor r0,r0,r1,ror#18 - ldr r2,[sp,#1*4] - eor r0,r0,r1,lsr#3 @ sigma0(X[i+1]) - mov r1,r12,ror#17 - add r3,r3,r0 - eor r1,r1,r12,ror#19 - add r3,r3,r2 - eor r1,r1,r12,lsr#10 @ sigma1(X[i+14]) - @ add r3,r3,r1 - mov r0,r8,ror#6 - ldr r12,[r14],#4 @ *K256++ - eor r0,r0,r8,ror#11 - eor r2,r9,r10 -#if 24>=16 - add r3,r3,r1 @ from BODY_16_xx -#elif __ARM_ARCH__>=7 && defined(__ARMEL__) - rev r3,r3 -#endif -#if 24==15 - str r1,[sp,#17*4] @ leave room for r1 -#endif - eor r0,r0,r8,ror#25 @ Sigma1(e) - and r2,r2,r8 - str r3,[sp,#8*4] - add r3,r3,r0 - eor r2,r2,r10 @ Ch(e,f,g) - add r3,r3,r11 - mov r11,r4,ror#2 - add r3,r3,r2 - eor r11,r11,r4,ror#13 - add r3,r3,r12 - eor r11,r11,r4,ror#22 @ Sigma0(a) -#if 24>=15 - ldr r1,[sp,#10*4] @ from BODY_16_xx -#endif - orr r0,r4,r5 - and r2,r4,r5 - and r0,r0,r6 - add r11,r11,r3 - orr r0,r0,r2 @ Maj(a,b,c) - add r7,r7,r3 - add r11,r11,r0 - @ ldr r1,[sp,#10*4] @ 25 - ldr r12,[sp,#7*4] - mov r0,r1,ror#7 - ldr r3,[sp,#9*4] - eor r0,r0,r1,ror#18 - ldr r2,[sp,#2*4] - eor r0,r0,r1,lsr#3 @ sigma0(X[i+1]) - mov r1,r12,ror#17 - add r3,r3,r0 - eor r1,r1,r12,ror#19 - add r3,r3,r2 - eor r1,r1,r12,lsr#10 @ sigma1(X[i+14]) - @ add r3,r3,r1 - mov r0,r7,ror#6 - ldr r12,[r14],#4 @ *K256++ - eor r0,r0,r7,ror#11 - eor r2,r8,r9 -#if 25>=16 - add r3,r3,r1 @ from BODY_16_xx -#elif __ARM_ARCH__>=7 && defined(__ARMEL__) - rev r3,r3 -#endif -#if 25==15 - str r1,[sp,#17*4] @ leave room for r1 -#endif - eor r0,r0,r7,ror#25 @ Sigma1(e) - and r2,r2,r7 - str r3,[sp,#9*4] - add r3,r3,r0 - eor r2,r2,r9 @ Ch(e,f,g) - add r3,r3,r10 - mov r10,r11,ror#2 - add r3,r3,r2 - eor r10,r10,r11,ror#13 - add r3,r3,r12 - eor r10,r10,r11,ror#22 @ Sigma0(a) -#if 25>=15 - ldr r1,[sp,#11*4] @ from BODY_16_xx -#endif - orr r0,r11,r4 - and r2,r11,r4 - and r0,r0,r5 - add r10,r10,r3 - orr r0,r0,r2 @ Maj(a,b,c) - add r6,r6,r3 - add r10,r10,r0 - @ ldr r1,[sp,#11*4] @ 26 - ldr r12,[sp,#8*4] - mov r0,r1,ror#7 - ldr r3,[sp,#10*4] - eor r0,r0,r1,ror#18 - ldr r2,[sp,#3*4] - eor r0,r0,r1,lsr#3 @ sigma0(X[i+1]) - mov r1,r12,ror#17 - add r3,r3,r0 - eor r1,r1,r12,ror#19 - add r3,r3,r2 - eor r1,r1,r12,lsr#10 @ sigma1(X[i+14]) - @ add r3,r3,r1 - mov r0,r6,ror#6 - ldr r12,[r14],#4 @ *K256++ - eor r0,r0,r6,ror#11 - eor r2,r7,r8 -#if 26>=16 - add r3,r3,r1 @ from BODY_16_xx -#elif __ARM_ARCH__>=7 && defined(__ARMEL__) - rev r3,r3 -#endif -#if 26==15 - str r1,[sp,#17*4] @ leave room for r1 -#endif - eor r0,r0,r6,ror#25 @ Sigma1(e) - and r2,r2,r6 - str r3,[sp,#10*4] - add r3,r3,r0 - eor r2,r2,r8 @ Ch(e,f,g) - add r3,r3,r9 - mov r9,r10,ror#2 - add r3,r3,r2 - eor r9,r9,r10,ror#13 - add r3,r3,r12 - eor r9,r9,r10,ror#22 @ Sigma0(a) -#if 26>=15 - ldr r1,[sp,#12*4] @ from BODY_16_xx -#endif - orr r0,r10,r11 - and r2,r10,r11 - and r0,r0,r4 - add r9,r9,r3 - orr r0,r0,r2 @ Maj(a,b,c) - add r5,r5,r3 - add r9,r9,r0 - @ ldr r1,[sp,#12*4] @ 27 - ldr r12,[sp,#9*4] - mov r0,r1,ror#7 - ldr r3,[sp,#11*4] - eor r0,r0,r1,ror#18 - ldr r2,[sp,#4*4] - eor r0,r0,r1,lsr#3 @ sigma0(X[i+1]) - mov r1,r12,ror#17 - add r3,r3,r0 - eor r1,r1,r12,ror#19 - add r3,r3,r2 - eor r1,r1,r12,lsr#10 @ sigma1(X[i+14]) - @ add r3,r3,r1 - mov r0,r5,ror#6 - ldr r12,[r14],#4 @ *K256++ - eor r0,r0,r5,ror#11 - eor r2,r6,r7 -#if 27>=16 - add r3,r3,r1 @ from BODY_16_xx -#elif __ARM_ARCH__>=7 && defined(__ARMEL__) - rev r3,r3 -#endif -#if 27==15 - str r1,[sp,#17*4] @ leave room for r1 -#endif - eor r0,r0,r5,ror#25 @ Sigma1(e) - and r2,r2,r5 - str r3,[sp,#11*4] - add r3,r3,r0 - eor r2,r2,r7 @ Ch(e,f,g) - add r3,r3,r8 - mov r8,r9,ror#2 - add r3,r3,r2 - eor r8,r8,r9,ror#13 - add r3,r3,r12 - eor r8,r8,r9,ror#22 @ Sigma0(a) -#if 27>=15 - ldr r1,[sp,#13*4] @ from BODY_16_xx -#endif - orr r0,r9,r10 - and r2,r9,r10 - and r0,r0,r11 - add r8,r8,r3 - orr r0,r0,r2 @ Maj(a,b,c) - add r4,r4,r3 - add r8,r8,r0 - @ ldr r1,[sp,#13*4] @ 28 - ldr r12,[sp,#10*4] - mov r0,r1,ror#7 - ldr r3,[sp,#12*4] - eor r0,r0,r1,ror#18 - ldr r2,[sp,#5*4] - eor r0,r0,r1,lsr#3 @ sigma0(X[i+1]) - mov r1,r12,ror#17 - add r3,r3,r0 - eor r1,r1,r12,ror#19 - add r3,r3,r2 - eor r1,r1,r12,lsr#10 @ sigma1(X[i+14]) - @ add r3,r3,r1 - mov r0,r4,ror#6 - ldr r12,[r14],#4 @ *K256++ - eor r0,r0,r4,ror#11 - eor r2,r5,r6 -#if 28>=16 - add r3,r3,r1 @ from BODY_16_xx -#elif __ARM_ARCH__>=7 && defined(__ARMEL__) - rev r3,r3 -#endif -#if 28==15 - str r1,[sp,#17*4] @ leave room for r1 -#endif - eor r0,r0,r4,ror#25 @ Sigma1(e) - and r2,r2,r4 - str r3,[sp,#12*4] - add r3,r3,r0 - eor r2,r2,r6 @ Ch(e,f,g) - add r3,r3,r7 - mov r7,r8,ror#2 - add r3,r3,r2 - eor r7,r7,r8,ror#13 - add r3,r3,r12 - eor r7,r7,r8,ror#22 @ Sigma0(a) -#if 28>=15 - ldr r1,[sp,#14*4] @ from BODY_16_xx -#endif - orr r0,r8,r9 - and r2,r8,r9 - and r0,r0,r10 - add r7,r7,r3 - orr r0,r0,r2 @ Maj(a,b,c) - add r11,r11,r3 - add r7,r7,r0 - @ ldr r1,[sp,#14*4] @ 29 - ldr r12,[sp,#11*4] - mov r0,r1,ror#7 - ldr r3,[sp,#13*4] - eor r0,r0,r1,ror#18 - ldr r2,[sp,#6*4] - eor r0,r0,r1,lsr#3 @ sigma0(X[i+1]) - mov r1,r12,ror#17 - add r3,r3,r0 - eor r1,r1,r12,ror#19 - add r3,r3,r2 - eor r1,r1,r12,lsr#10 @ sigma1(X[i+14]) - @ add r3,r3,r1 - mov r0,r11,ror#6 - ldr r12,[r14],#4 @ *K256++ - eor r0,r0,r11,ror#11 - eor r2,r4,r5 -#if 29>=16 - add r3,r3,r1 @ from BODY_16_xx -#elif __ARM_ARCH__>=7 && defined(__ARMEL__) - rev r3,r3 -#endif -#if 29==15 - str r1,[sp,#17*4] @ leave room for r1 -#endif - eor r0,r0,r11,ror#25 @ Sigma1(e) - and r2,r2,r11 - str r3,[sp,#13*4] - add r3,r3,r0 - eor r2,r2,r5 @ Ch(e,f,g) - add r3,r3,r6 - mov r6,r7,ror#2 - add r3,r3,r2 - eor r6,r6,r7,ror#13 - add r3,r3,r12 - eor r6,r6,r7,ror#22 @ Sigma0(a) -#if 29>=15 - ldr r1,[sp,#15*4] @ from BODY_16_xx -#endif - orr r0,r7,r8 - and r2,r7,r8 - and r0,r0,r9 - add r6,r6,r3 - orr r0,r0,r2 @ Maj(a,b,c) - add r10,r10,r3 - add r6,r6,r0 - @ ldr r1,[sp,#15*4] @ 30 - ldr r12,[sp,#12*4] - mov r0,r1,ror#7 - ldr r3,[sp,#14*4] - eor r0,r0,r1,ror#18 - ldr r2,[sp,#7*4] - eor r0,r0,r1,lsr#3 @ sigma0(X[i+1]) - mov r1,r12,ror#17 - add r3,r3,r0 - eor r1,r1,r12,ror#19 - add r3,r3,r2 - eor r1,r1,r12,lsr#10 @ sigma1(X[i+14]) - @ add r3,r3,r1 - mov r0,r10,ror#6 - ldr r12,[r14],#4 @ *K256++ - eor r0,r0,r10,ror#11 - eor r2,r11,r4 -#if 30>=16 - add r3,r3,r1 @ from BODY_16_xx -#elif __ARM_ARCH__>=7 && defined(__ARMEL__) - rev r3,r3 -#endif -#if 30==15 - str r1,[sp,#17*4] @ leave room for r1 -#endif - eor r0,r0,r10,ror#25 @ Sigma1(e) - and r2,r2,r10 - str r3,[sp,#14*4] - add r3,r3,r0 - eor r2,r2,r4 @ Ch(e,f,g) - add r3,r3,r5 - mov r5,r6,ror#2 - add r3,r3,r2 - eor r5,r5,r6,ror#13 - add r3,r3,r12 - eor r5,r5,r6,ror#22 @ Sigma0(a) -#if 30>=15 - ldr r1,[sp,#0*4] @ from BODY_16_xx -#endif - orr r0,r6,r7 - and r2,r6,r7 - and r0,r0,r8 - add r5,r5,r3 - orr r0,r0,r2 @ Maj(a,b,c) - add r9,r9,r3 - add r5,r5,r0 - @ ldr r1,[sp,#0*4] @ 31 - ldr r12,[sp,#13*4] - mov r0,r1,ror#7 - ldr r3,[sp,#15*4] - eor r0,r0,r1,ror#18 - ldr r2,[sp,#8*4] - eor r0,r0,r1,lsr#3 @ sigma0(X[i+1]) - mov r1,r12,ror#17 - add r3,r3,r0 - eor r1,r1,r12,ror#19 - add r3,r3,r2 - eor r1,r1,r12,lsr#10 @ sigma1(X[i+14]) - @ add r3,r3,r1 - mov r0,r9,ror#6 - ldr r12,[r14],#4 @ *K256++ - eor r0,r0,r9,ror#11 - eor r2,r10,r11 -#if 31>=16 - add r3,r3,r1 @ from BODY_16_xx -#elif __ARM_ARCH__>=7 && defined(__ARMEL__) - rev r3,r3 -#endif -#if 31==15 - str r1,[sp,#17*4] @ leave room for r1 -#endif - eor r0,r0,r9,ror#25 @ Sigma1(e) - and r2,r2,r9 - str r3,[sp,#15*4] - add r3,r3,r0 - eor r2,r2,r11 @ Ch(e,f,g) - add r3,r3,r4 - mov r4,r5,ror#2 - add r3,r3,r2 - eor r4,r4,r5,ror#13 - add r3,r3,r12 - eor r4,r4,r5,ror#22 @ Sigma0(a) -#if 31>=15 - ldr r1,[sp,#1*4] @ from BODY_16_xx -#endif - orr r0,r5,r6 - and r2,r5,r6 - and r0,r0,r7 - add r4,r4,r3 - orr r0,r0,r2 @ Maj(a,b,c) - add r8,r8,r3 - add r4,r4,r0 - and r12,r12,#0xff - cmp r12,#0xf2 - bne .Lrounds_16_xx - - ldr r3,[sp,#16*4] @ pull ctx - ldr r0,[r3,#0] - ldr r2,[r3,#4] - ldr r12,[r3,#8] - add r4,r4,r0 - ldr r0,[r3,#12] - add r5,r5,r2 - ldr r2,[r3,#16] - add r6,r6,r12 - ldr r12,[r3,#20] - add r7,r7,r0 - ldr r0,[r3,#24] - add r8,r8,r2 - ldr r2,[r3,#28] - add r9,r9,r12 - ldr r1,[sp,#17*4] @ pull inp - ldr r12,[sp,#18*4] @ pull inp+len - add r10,r10,r0 - add r11,r11,r2 - stmia r3,{r4,r5,r6,r7,r8,r9,r10,r11} - cmp r1,r12 - sub r14,r14,#256 @ rewind Ktbl - bne .Loop - - add sp,sp,#19*4 @ destroy frame -#if __ARM_ARCH__>=5 - ldmia sp!,{r4-r11,pc} -#else - ldmia sp!,{r4-r11,lr} - tst lr,#1 - moveq pc,lr @ be binary compatible with V4, yet - .word 0xe12fff1e @ interoperable with Thumb ISA:-) -#endif -.size sha256_block_data_order,.-sha256_block_data_order -.asciz "SHA256 block transform for ARMv4, CRYPTOGAMS by " -.align 2 diff --git a/deps/openssl/asm/arm-elf-gas/aes/aes-armv4.S b/deps/openssl/asm/arm-void-gas/aes/aes-armv4.S similarity index 89% rename from deps/openssl/asm/arm-elf-gas/aes/aes-armv4.S rename to deps/openssl/asm/arm-void-gas/aes/aes-armv4.S index 2697d4ce4c5e01..333a522730ba6c 100644 --- a/deps/openssl/asm/arm-elf-gas/aes/aes-armv4.S +++ b/deps/openssl/asm/arm-void-gas/aes/aes-armv4.S @@ -1,6 +1,53 @@ -#include "arm_arch.h" + +@ ==================================================================== +@ Written by Andy Polyakov for the OpenSSL +@ project. The module is, however, dual licensed under OpenSSL and +@ CRYPTOGAMS licenses depending on where you obtain it. For further +@ details see http://www.openssl.org/~appro/cryptogams/. +@ ==================================================================== + +@ AES for ARMv4 + +@ January 2007. +@ +@ Code uses single 1K S-box and is >2 times faster than code generated +@ by gcc-3.4.1. This is thanks to unique feature of ARMv4 ISA, which +@ allows to merge logical or arithmetic operation with shift or rotate +@ in one instruction and emit combined result every cycle. The module +@ is endian-neutral. The performance is ~42 cycles/byte for 128-bit +@ key [on single-issue Xscale PXA250 core]. + +@ May 2007. +@ +@ AES_set_[en|de]crypt_key is added. + +@ July 2010. +@ +@ Rescheduling for dual-issue pipeline resulted in 12% improvement on +@ Cortex A8 core and ~25 cycles per byte processed with 128-bit key. + +@ February 2011. +@ +@ Profiler-assisted and platform-specific optimization resulted in 16% +@ improvement on Cortex A8 core and ~21.5 cycles per byte. + +#ifndef __KERNEL__ +# include "arm_arch.h" +#else +# define __ARM_ARCH__ __LINUX_ARM_ARCH__ +#endif + .text +#if __ARM_ARCH__<7 +.code 32 +#else +.syntax unified +# ifdef __thumb2__ +.thumb +# else .code 32 +# endif +#endif .type AES_Te,%object .align 5 @@ -114,7 +161,11 @@ AES_Te: .type AES_encrypt,%function .align 5 AES_encrypt: +#if __ARM_ARCH__<7 sub r3,pc,#8 @ AES_encrypt +#else + adr r3,AES_encrypt +#endif stmdb sp!,{r1,r4-r12,lr} mov r12,r0 @ inp mov r11,r2 @@ -356,11 +407,21 @@ _armv4_AES_encrypt: .align 5 private_AES_set_encrypt_key: _armv4_AES_set_encrypt_key: +#if __ARM_ARCH__<7 sub r3,pc,#8 @ AES_set_encrypt_key +#else + adr r3,private_AES_set_encrypt_key +#endif teq r0,#0 +#if __ARM_ARCH__>=7 + itt eq @ Thumb2 thing, sanity check in ARM +#endif moveq r0,#-1 beq .Labrt teq r2,#0 +#if __ARM_ARCH__>=7 + itt eq @ Thumb2 thing, sanity check in ARM +#endif moveq r0,#-1 beq .Labrt @@ -369,6 +430,9 @@ _armv4_AES_set_encrypt_key: teq r1,#192 beq .Lok teq r1,#256 +#if __ARM_ARCH__>=7 + itt ne @ Thumb2 thing, sanity check in ARM +#endif movne r0,#-1 bne .Labrt @@ -523,6 +587,9 @@ _armv4_AES_set_encrypt_key: str r2,[r11,#-16] subs r12,r12,#1 str r3,[r11,#-12] +#if __ARM_ARCH__>=7 + itt eq @ Thumb2 thing, sanity check in ARM +#endif subeq r2,r11,#216 beq .Ldone @@ -592,6 +659,9 @@ _armv4_AES_set_encrypt_key: str r2,[r11,#-24] subs r12,r12,#1 str r3,[r11,#-20] +#if __ARM_ARCH__>=7 + itt eq @ Thumb2 thing, sanity check in ARM +#endif subeq r2,r11,#256 beq .Ldone @@ -621,11 +691,17 @@ _armv4_AES_set_encrypt_key: str r9,[r11,#-4] b .L256_loop +.align 2 .Ldone: mov r0,#0 ldmia sp!,{r4-r12,lr} -.Labrt: tst lr,#1 +.Labrt: +#if __ARM_ARCH__>=5 + bx lr @ .word 0xe12fff1e +#else + tst lr,#1 moveq pc,lr @ be binary compatible with V4, yet .word 0xe12fff1e @ interoperable with Thumb ISA:-) +#endif .size private_AES_set_encrypt_key,.-private_AES_set_encrypt_key .global private_AES_set_decrypt_key @@ -635,34 +711,57 @@ private_AES_set_decrypt_key: str lr,[sp,#-4]! @ push lr bl _armv4_AES_set_encrypt_key teq r0,#0 - ldrne lr,[sp],#4 @ pop lr + ldr lr,[sp],#4 @ pop lr bne .Labrt - stmdb sp!,{r4-r12} + mov r0,r2 @ AES_set_encrypt_key preserves r2, + mov r1,r2 @ which is AES_KEY *key + b _armv4_AES_set_enc2dec_key +.size private_AES_set_decrypt_key,.-private_AES_set_decrypt_key + +@ void AES_set_enc2dec_key(const AES_KEY *inp,AES_KEY *out) +.global AES_set_enc2dec_key +.type AES_set_enc2dec_key,%function +.align 5 +AES_set_enc2dec_key: +_armv4_AES_set_enc2dec_key: + stmdb sp!,{r4-r12,lr} + + ldr r12,[r0,#240] + mov r7,r0 @ input + add r8,r0,r12,lsl#4 + mov r11,r1 @ ouput + add r10,r1,r12,lsl#4 + str r12,[r1,#240] - ldr r12,[r2,#240] @ AES_set_encrypt_key preserves r2, - mov r11,r2 @ which is AES_KEY *key - mov r7,r2 - add r8,r2,r12,lsl#4 +.Linv: ldr r0,[r7],#16 + ldr r1,[r7,#-12] + ldr r2,[r7,#-8] + ldr r3,[r7,#-4] + ldr r4,[r8],#-16 + ldr r5,[r8,#16+4] + ldr r6,[r8,#16+8] + ldr r9,[r8,#16+12] + str r0,[r10],#-16 + str r1,[r10,#16+4] + str r2,[r10,#16+8] + str r3,[r10,#16+12] + str r4,[r11],#16 + str r5,[r11,#-12] + str r6,[r11,#-8] + str r9,[r11,#-4] + teq r7,r8 + bne .Linv -.Linv: ldr r0,[r7] + ldr r0,[r7] ldr r1,[r7,#4] ldr r2,[r7,#8] ldr r3,[r7,#12] - ldr r4,[r8] - ldr r5,[r8,#4] - ldr r6,[r8,#8] - ldr r9,[r8,#12] - str r0,[r8],#-16 - str r1,[r8,#16+4] - str r2,[r8,#16+8] - str r3,[r8,#16+12] - str r4,[r7],#16 - str r5,[r7,#-12] - str r6,[r7,#-8] - str r9,[r7,#-4] - teq r7,r8 - bne .Linv + str r0,[r11] + str r1,[r11,#4] + str r2,[r11,#8] + str r3,[r11,#12] + sub r11,r11,r12,lsl#3 ldr r0,[r11,#16]! @ prefetch tp1 mov r7,#0x80 mov r8,#0x1b @@ -715,7 +814,7 @@ private_AES_set_decrypt_key: moveq pc,lr @ be binary compatible with V4, yet .word 0xe12fff1e @ interoperable with Thumb ISA:-) #endif -.size private_AES_set_decrypt_key,.-private_AES_set_decrypt_key +.size AES_set_enc2dec_key,.-AES_set_enc2dec_key .type AES_Td,%object .align 5 @@ -825,7 +924,11 @@ AES_Td: .type AES_decrypt,%function .align 5 AES_decrypt: +#if __ARM_ARCH__<7 sub r3,pc,#8 @ AES_decrypt +#else + adr r3,AES_decrypt +#endif stmdb sp!,{r1,r4-r12,lr} mov r12,r0 @ inp mov r11,r2 @@ -1022,8 +1125,9 @@ _armv4_AES_decrypt: ldrb r6,[r10,r9] @ Td4[s0>>0] and r9,lr,r1,lsr#8 + add r1,r10,r1,lsr#24 ldrb r7,[r10,r7] @ Td4[s1>>0] - ldrb r1,[r10,r1,lsr#24] @ Td4[s1>>24] + ldrb r1,[r1] @ Td4[s1>>24] ldrb r8,[r10,r8] @ Td4[s1>>16] eor r0,r7,r0,lsl#24 ldrb r9,[r10,r9] @ Td4[s1>>8] @@ -1036,7 +1140,8 @@ _armv4_AES_decrypt: ldrb r8,[r10,r8] @ Td4[s2>>0] and r9,lr,r2,lsr#16 - ldrb r2,[r10,r2,lsr#24] @ Td4[s2>>24] + add r2,r10,r2,lsr#24 + ldrb r2,[r2] @ Td4[s2>>24] eor r0,r0,r7,lsl#8 ldrb r9,[r10,r9] @ Td4[s2>>16] eor r1,r8,r1,lsl#16 @@ -1048,8 +1153,9 @@ _armv4_AES_decrypt: ldrb r8,[r10,r8] @ Td4[s3>>8] and r9,lr,r3 @ i2 + add r3,r10,r3,lsr#24 ldrb r9,[r10,r9] @ Td4[s3>>0] - ldrb r3,[r10,r3,lsr#24] @ Td4[s3>>24] + ldrb r3,[r3] @ Td4[s3>>24] eor r0,r0,r7,lsl#16 ldr r7,[r11,#0] eor r1,r1,r8,lsl#8 diff --git a/deps/openssl/asm/arm-void-gas/aes/aesv8-armx.S b/deps/openssl/asm/arm-void-gas/aes/aesv8-armx.S new file mode 100644 index 00000000000000..732ba3d9c88b94 --- /dev/null +++ b/deps/openssl/asm/arm-void-gas/aes/aesv8-armx.S @@ -0,0 +1,732 @@ +#include "arm_arch.h" + +#if __ARM_MAX_ARCH__>=7 +.text +.arch armv7-a +.fpu neon +.code 32 +.align 5 +rcon: +.long 0x01,0x01,0x01,0x01 +.long 0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d @ rotate-n-splat +.long 0x1b,0x1b,0x1b,0x1b + +.globl aes_v8_set_encrypt_key +.type aes_v8_set_encrypt_key,%function +.align 5 +aes_v8_set_encrypt_key: +.Lenc_key: + mov r3,#-1 + cmp r0,#0 + beq .Lenc_key_abort + cmp r2,#0 + beq .Lenc_key_abort + mov r3,#-2 + cmp r1,#128 + blt .Lenc_key_abort + cmp r1,#256 + bgt .Lenc_key_abort + tst r1,#0x3f + bne .Lenc_key_abort + + adr r3,rcon + cmp r1,#192 + + veor q0,q0,q0 + vld1.8 {q3},[r0]! + mov r1,#8 @ reuse r1 + vld1.32 {q1,q2},[r3]! + + blt .Loop128 + beq .L192 + b .L256 + +.align 4 +.Loop128: + vtbl.8 d20,{q3},d4 + vtbl.8 d21,{q3},d5 + vext.8 q9,q0,q3,#12 + vst1.32 {q3},[r2]! + .byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0 + subs r1,r1,#1 + + veor q3,q3,q9 + vext.8 q9,q0,q9,#12 + veor q3,q3,q9 + vext.8 q9,q0,q9,#12 + veor q10,q10,q1 + veor q3,q3,q9 + vshl.u8 q1,q1,#1 + veor q3,q3,q10 + bne .Loop128 + + vld1.32 {q1},[r3] + + vtbl.8 d20,{q3},d4 + vtbl.8 d21,{q3},d5 + vext.8 q9,q0,q3,#12 + vst1.32 {q3},[r2]! + .byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0 + + veor q3,q3,q9 + vext.8 q9,q0,q9,#12 + veor q3,q3,q9 + vext.8 q9,q0,q9,#12 + veor q10,q10,q1 + veor q3,q3,q9 + vshl.u8 q1,q1,#1 + veor q3,q3,q10 + + vtbl.8 d20,{q3},d4 + vtbl.8 d21,{q3},d5 + vext.8 q9,q0,q3,#12 + vst1.32 {q3},[r2]! + .byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0 + + veor q3,q3,q9 + vext.8 q9,q0,q9,#12 + veor q3,q3,q9 + vext.8 q9,q0,q9,#12 + veor q10,q10,q1 + veor q3,q3,q9 + veor q3,q3,q10 + vst1.32 {q3},[r2] + add r2,r2,#0x50 + + mov r12,#10 + b .Ldone + +.align 4 +.L192: + vld1.8 {d16},[r0]! + vmov.i8 q10,#8 @ borrow q10 + vst1.32 {q3},[r2]! + vsub.i8 q2,q2,q10 @ adjust the mask + +.Loop192: + vtbl.8 d20,{q8},d4 + vtbl.8 d21,{q8},d5 + vext.8 q9,q0,q3,#12 + vst1.32 {d16},[r2]! + .byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0 + subs r1,r1,#1 + + veor q3,q3,q9 + vext.8 q9,q0,q9,#12 + veor q3,q3,q9 + vext.8 q9,q0,q9,#12 + veor q3,q3,q9 + + vdup.32 q9,d7[1] + veor q9,q9,q8 + veor q10,q10,q1 + vext.8 q8,q0,q8,#12 + vshl.u8 q1,q1,#1 + veor q8,q8,q9 + veor q3,q3,q10 + veor q8,q8,q10 + vst1.32 {q3},[r2]! + bne .Loop192 + + mov r12,#12 + add r2,r2,#0x20 + b .Ldone + +.align 4 +.L256: + vld1.8 {q8},[r0] + mov r1,#7 + mov r12,#14 + vst1.32 {q3},[r2]! + +.Loop256: + vtbl.8 d20,{q8},d4 + vtbl.8 d21,{q8},d5 + vext.8 q9,q0,q3,#12 + vst1.32 {q8},[r2]! + .byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0 + subs r1,r1,#1 + + veor q3,q3,q9 + vext.8 q9,q0,q9,#12 + veor q3,q3,q9 + vext.8 q9,q0,q9,#12 + veor q10,q10,q1 + veor q3,q3,q9 + vshl.u8 q1,q1,#1 + veor q3,q3,q10 + vst1.32 {q3},[r2]! + beq .Ldone + + vdup.32 q10,d7[1] + vext.8 q9,q0,q8,#12 + .byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0 + + veor q8,q8,q9 + vext.8 q9,q0,q9,#12 + veor q8,q8,q9 + vext.8 q9,q0,q9,#12 + veor q8,q8,q9 + + veor q8,q8,q10 + b .Loop256 + +.Ldone: + str r12,[r2] + mov r3,#0 + +.Lenc_key_abort: + mov r0,r3 @ return value + + bx lr +.size aes_v8_set_encrypt_key,.-aes_v8_set_encrypt_key + +.globl aes_v8_set_decrypt_key +.type aes_v8_set_decrypt_key,%function +.align 5 +aes_v8_set_decrypt_key: + stmdb sp!,{r4,lr} + bl .Lenc_key + + cmp r0,#0 + bne .Ldec_key_abort + + sub r2,r2,#240 @ restore original r2 + mov r4,#-16 + add r0,r2,r12,lsl#4 @ end of key schedule + + vld1.32 {q0},[r2] + vld1.32 {q1},[r0] + vst1.32 {q0},[r0],r4 + vst1.32 {q1},[r2]! + +.Loop_imc: + vld1.32 {q0},[r2] + vld1.32 {q1},[r0] + .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 + .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 + vst1.32 {q0},[r0],r4 + vst1.32 {q1},[r2]! + cmp r0,r2 + bhi .Loop_imc + + vld1.32 {q0},[r2] + .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 + vst1.32 {q0},[r0] + + eor r0,r0,r0 @ return value +.Ldec_key_abort: + ldmia sp!,{r4,pc} +.size aes_v8_set_decrypt_key,.-aes_v8_set_decrypt_key +.globl aes_v8_encrypt +.type aes_v8_encrypt,%function +.align 5 +aes_v8_encrypt: + ldr r3,[r2,#240] + vld1.32 {q0},[r2]! + vld1.8 {q2},[r0] + sub r3,r3,#2 + vld1.32 {q1},[r2]! + +.Loop_enc: + .byte 0x00,0x43,0xb0,0xf3 @ aese q2,q0 + vld1.32 {q0},[r2]! + .byte 0x84,0x43,0xb0,0xf3 @ aesmc q2,q2 + subs r3,r3,#2 + .byte 0x02,0x43,0xb0,0xf3 @ aese q2,q1 + vld1.32 {q1},[r2]! + .byte 0x84,0x43,0xb0,0xf3 @ aesmc q2,q2 + bgt .Loop_enc + + .byte 0x00,0x43,0xb0,0xf3 @ aese q2,q0 + vld1.32 {q0},[r2] + .byte 0x84,0x43,0xb0,0xf3 @ aesmc q2,q2 + .byte 0x02,0x43,0xb0,0xf3 @ aese q2,q1 + veor q2,q2,q0 + + vst1.8 {q2},[r1] + bx lr +.size aes_v8_encrypt,.-aes_v8_encrypt +.globl aes_v8_decrypt +.type aes_v8_decrypt,%function +.align 5 +aes_v8_decrypt: + ldr r3,[r2,#240] + vld1.32 {q0},[r2]! + vld1.8 {q2},[r0] + sub r3,r3,#2 + vld1.32 {q1},[r2]! + +.Loop_dec: + .byte 0x40,0x43,0xb0,0xf3 @ aesd q2,q0 + vld1.32 {q0},[r2]! + .byte 0xc4,0x43,0xb0,0xf3 @ aesimc q2,q2 + subs r3,r3,#2 + .byte 0x42,0x43,0xb0,0xf3 @ aesd q2,q1 + vld1.32 {q1},[r2]! + .byte 0xc4,0x43,0xb0,0xf3 @ aesimc q2,q2 + bgt .Loop_dec + + .byte 0x40,0x43,0xb0,0xf3 @ aesd q2,q0 + vld1.32 {q0},[r2] + .byte 0xc4,0x43,0xb0,0xf3 @ aesimc q2,q2 + .byte 0x42,0x43,0xb0,0xf3 @ aesd q2,q1 + veor q2,q2,q0 + + vst1.8 {q2},[r1] + bx lr +.size aes_v8_decrypt,.-aes_v8_decrypt +.globl aes_v8_cbc_encrypt +.type aes_v8_cbc_encrypt,%function +.align 5 +aes_v8_cbc_encrypt: + mov ip,sp + stmdb sp!,{r4-r8,lr} + vstmdb sp!,{d8-d15} @ ABI specification says so + ldmia ip,{r4-r5} @ load remaining args + subs r2,r2,#16 + mov r8,#16 + blo .Lcbc_abort + moveq r8,#0 + + cmp r5,#0 @ en- or decrypting? + ldr r5,[r3,#240] + and r2,r2,#-16 + vld1.8 {q6},[r4] + vld1.8 {q0},[r0],r8 + + vld1.32 {q8-q9},[r3] @ load key schedule... + sub r5,r5,#6 + add r7,r3,r5,lsl#4 @ pointer to last 7 round keys + sub r5,r5,#2 + vld1.32 {q10-q11},[r7]! + vld1.32 {q12-q13},[r7]! + vld1.32 {q14-q15},[r7]! + vld1.32 {q7},[r7] + + add r7,r3,#32 + mov r6,r5 + beq .Lcbc_dec + + cmp r5,#2 + veor q0,q0,q6 + veor q5,q8,q7 + beq .Lcbc_enc128 + +.Loop_cbc_enc: + .byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 + vld1.32 {q8},[r7]! + .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 + subs r6,r6,#2 + .byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9 + vld1.32 {q9},[r7]! + .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 + bgt .Loop_cbc_enc + + .byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 + .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 + subs r2,r2,#16 + .byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9 + .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 + moveq r8,#0 + .byte 0x24,0x03,0xb0,0xf3 @ aese q0,q10 + .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 + add r7,r3,#16 + .byte 0x26,0x03,0xb0,0xf3 @ aese q0,q11 + .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 + vld1.8 {q8},[r0],r8 + .byte 0x28,0x03,0xb0,0xf3 @ aese q0,q12 + .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 + veor q8,q8,q5 + .byte 0x2a,0x03,0xb0,0xf3 @ aese q0,q13 + .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 + vld1.32 {q9},[r7]! @ re-pre-load rndkey[1] + .byte 0x2c,0x03,0xb0,0xf3 @ aese q0,q14 + .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 + .byte 0x2e,0x03,0xb0,0xf3 @ aese q0,q15 + + mov r6,r5 + veor q6,q0,q7 + vst1.8 {q6},[r1]! + bhs .Loop_cbc_enc + + b .Lcbc_done + +.align 5 +.Lcbc_enc128: + vld1.32 {q2-q3},[r7] + .byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 + .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 + b .Lenter_cbc_enc128 +.Loop_cbc_enc128: + .byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 + .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 + vst1.8 {q6},[r1]! +.Lenter_cbc_enc128: + .byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9 + .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 + subs r2,r2,#16 + .byte 0x04,0x03,0xb0,0xf3 @ aese q0,q2 + .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 + moveq r8,#0 + .byte 0x06,0x03,0xb0,0xf3 @ aese q0,q3 + .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 + .byte 0x24,0x03,0xb0,0xf3 @ aese q0,q10 + .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 + .byte 0x26,0x03,0xb0,0xf3 @ aese q0,q11 + .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 + vld1.8 {q8},[r0],r8 + .byte 0x28,0x03,0xb0,0xf3 @ aese q0,q12 + .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 + .byte 0x2a,0x03,0xb0,0xf3 @ aese q0,q13 + .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 + .byte 0x2c,0x03,0xb0,0xf3 @ aese q0,q14 + .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 + veor q8,q8,q5 + .byte 0x2e,0x03,0xb0,0xf3 @ aese q0,q15 + veor q6,q0,q7 + bhs .Loop_cbc_enc128 + + vst1.8 {q6},[r1]! + b .Lcbc_done +.align 5 +.Lcbc_dec: + vld1.8 {q10},[r0]! + subs r2,r2,#32 @ bias + add r6,r5,#2 + vorr q3,q0,q0 + vorr q1,q0,q0 + vorr q11,q10,q10 + blo .Lcbc_dec_tail + + vorr q1,q10,q10 + vld1.8 {q10},[r0]! + vorr q2,q0,q0 + vorr q3,q1,q1 + vorr q11,q10,q10 + +.Loop3x_cbc_dec: + .byte 0x60,0x03,0xb0,0xf3 @ aesd q0,q8 + .byte 0x60,0x23,0xb0,0xf3 @ aesd q1,q8 + .byte 0x60,0x43,0xf0,0xf3 @ aesd q10,q8 + vld1.32 {q8},[r7]! + .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 + .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 + .byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 + subs r6,r6,#2 + .byte 0x62,0x03,0xb0,0xf3 @ aesd q0,q9 + .byte 0x62,0x23,0xb0,0xf3 @ aesd q1,q9 + .byte 0x62,0x43,0xf0,0xf3 @ aesd q10,q9 + vld1.32 {q9},[r7]! + .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 + .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 + .byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 + bgt .Loop3x_cbc_dec + + .byte 0x60,0x03,0xb0,0xf3 @ aesd q0,q8 + .byte 0x60,0x23,0xb0,0xf3 @ aesd q1,q8 + .byte 0x60,0x43,0xf0,0xf3 @ aesd q10,q8 + veor q4,q6,q7 + .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 + .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 + .byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 + veor q5,q2,q7 + .byte 0x62,0x03,0xb0,0xf3 @ aesd q0,q9 + .byte 0x62,0x23,0xb0,0xf3 @ aesd q1,q9 + .byte 0x62,0x43,0xf0,0xf3 @ aesd q10,q9 + veor q9,q3,q7 + subs r2,r2,#0x30 + .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 + .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 + .byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 + vorr q6,q11,q11 + movlo r6,r2 @ r6, r6, is zero at this point + .byte 0x68,0x03,0xb0,0xf3 @ aesd q0,q12 + .byte 0x68,0x23,0xb0,0xf3 @ aesd q1,q12 + .byte 0x68,0x43,0xf0,0xf3 @ aesd q10,q12 + add r0,r0,r6 @ r0 is adjusted in such way that + @ at exit from the loop q1-q10 + @ are loaded with last "words" + .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 + .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 + .byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 + mov r7,r3 + .byte 0x6a,0x03,0xb0,0xf3 @ aesd q0,q13 + .byte 0x6a,0x23,0xb0,0xf3 @ aesd q1,q13 + .byte 0x6a,0x43,0xf0,0xf3 @ aesd q10,q13 + vld1.8 {q2},[r0]! + .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 + .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 + .byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 + vld1.8 {q3},[r0]! + .byte 0x6c,0x03,0xb0,0xf3 @ aesd q0,q14 + .byte 0x6c,0x23,0xb0,0xf3 @ aesd q1,q14 + .byte 0x6c,0x43,0xf0,0xf3 @ aesd q10,q14 + vld1.8 {q11},[r0]! + .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 + .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 + .byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 + vld1.32 {q8},[r7]! @ re-pre-load rndkey[0] + .byte 0x6e,0x03,0xb0,0xf3 @ aesd q0,q15 + .byte 0x6e,0x23,0xb0,0xf3 @ aesd q1,q15 + .byte 0x6e,0x43,0xf0,0xf3 @ aesd q10,q15 + + add r6,r5,#2 + veor q4,q4,q0 + veor q5,q5,q1 + veor q10,q10,q9 + vld1.32 {q9},[r7]! @ re-pre-load rndkey[1] + vorr q0,q2,q2 + vst1.8 {q4},[r1]! + vorr q1,q3,q3 + vst1.8 {q5},[r1]! + vst1.8 {q10},[r1]! + vorr q10,q11,q11 + bhs .Loop3x_cbc_dec + + cmn r2,#0x30 + beq .Lcbc_done + nop + +.Lcbc_dec_tail: + .byte 0x60,0x23,0xb0,0xf3 @ aesd q1,q8 + .byte 0x60,0x43,0xf0,0xf3 @ aesd q10,q8 + vld1.32 {q8},[r7]! + .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 + .byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 + subs r6,r6,#2 + .byte 0x62,0x23,0xb0,0xf3 @ aesd q1,q9 + .byte 0x62,0x43,0xf0,0xf3 @ aesd q10,q9 + vld1.32 {q9},[r7]! + .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 + .byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 + bgt .Lcbc_dec_tail + + .byte 0x60,0x23,0xb0,0xf3 @ aesd q1,q8 + .byte 0x60,0x43,0xf0,0xf3 @ aesd q10,q8 + .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 + .byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 + .byte 0x62,0x23,0xb0,0xf3 @ aesd q1,q9 + .byte 0x62,0x43,0xf0,0xf3 @ aesd q10,q9 + .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 + .byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 + .byte 0x68,0x23,0xb0,0xf3 @ aesd q1,q12 + .byte 0x68,0x43,0xf0,0xf3 @ aesd q10,q12 + .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 + .byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 + cmn r2,#0x20 + .byte 0x6a,0x23,0xb0,0xf3 @ aesd q1,q13 + .byte 0x6a,0x43,0xf0,0xf3 @ aesd q10,q13 + .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 + .byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 + veor q5,q6,q7 + .byte 0x6c,0x23,0xb0,0xf3 @ aesd q1,q14 + .byte 0x6c,0x43,0xf0,0xf3 @ aesd q10,q14 + .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 + .byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 + veor q9,q3,q7 + .byte 0x6e,0x23,0xb0,0xf3 @ aesd q1,q15 + .byte 0x6e,0x43,0xf0,0xf3 @ aesd q10,q15 + beq .Lcbc_dec_one + veor q5,q5,q1 + veor q9,q9,q10 + vorr q6,q11,q11 + vst1.8 {q5},[r1]! + vst1.8 {q9},[r1]! + b .Lcbc_done + +.Lcbc_dec_one: + veor q5,q5,q10 + vorr q6,q11,q11 + vst1.8 {q5},[r1]! + +.Lcbc_done: + vst1.8 {q6},[r4] +.Lcbc_abort: + vldmia sp!,{d8-d15} + ldmia sp!,{r4-r8,pc} +.size aes_v8_cbc_encrypt,.-aes_v8_cbc_encrypt +.globl aes_v8_ctr32_encrypt_blocks +.type aes_v8_ctr32_encrypt_blocks,%function +.align 5 +aes_v8_ctr32_encrypt_blocks: + mov ip,sp + stmdb sp!,{r4-r10,lr} + vstmdb sp!,{d8-d15} @ ABI specification says so + ldr r4, [ip] @ load remaining arg + ldr r5,[r3,#240] + + ldr r8, [r4, #12] + vld1.32 {q0},[r4] + + vld1.32 {q8-q9},[r3] @ load key schedule... + sub r5,r5,#4 + mov r12,#16 + cmp r2,#2 + add r7,r3,r5,lsl#4 @ pointer to last 5 round keys + sub r5,r5,#2 + vld1.32 {q12-q13},[r7]! + vld1.32 {q14-q15},[r7]! + vld1.32 {q7},[r7] + add r7,r3,#32 + mov r6,r5 + movlo r12,#0 +#ifndef __ARMEB__ + rev r8, r8 +#endif + vorr q1,q0,q0 + add r10, r8, #1 + vorr q10,q0,q0 + add r8, r8, #2 + vorr q6,q0,q0 + rev r10, r10 + vmov.32 d3[1],r10 + bls .Lctr32_tail + rev r12, r8 + sub r2,r2,#3 @ bias + vmov.32 d21[1],r12 + b .Loop3x_ctr32 + +.align 4 +.Loop3x_ctr32: + .byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 + .byte 0x20,0x23,0xb0,0xf3 @ aese q1,q8 + .byte 0x20,0x43,0xf0,0xf3 @ aese q10,q8 + vld1.32 {q8},[r7]! + .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 + .byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 + .byte 0xa4,0x43,0xf0,0xf3 @ aesmc q10,q10 + subs r6,r6,#2 + .byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9 + .byte 0x22,0x23,0xb0,0xf3 @ aese q1,q9 + .byte 0x22,0x43,0xf0,0xf3 @ aese q10,q9 + vld1.32 {q9},[r7]! + .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 + .byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 + .byte 0xa4,0x43,0xf0,0xf3 @ aesmc q10,q10 + bgt .Loop3x_ctr32 + + .byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 + .byte 0x20,0x23,0xb0,0xf3 @ aese q1,q8 + .byte 0x20,0x43,0xf0,0xf3 @ aese q10,q8 + mov r7,r3 + .byte 0x80,0x83,0xb0,0xf3 @ aesmc q4,q0 + vld1.8 {q2},[r0]! + .byte 0x82,0xa3,0xb0,0xf3 @ aesmc q5,q1 + .byte 0xa4,0x43,0xf0,0xf3 @ aesmc q10,q10 + vorr q0,q6,q6 + .byte 0x22,0x83,0xb0,0xf3 @ aese q4,q9 + vld1.8 {q3},[r0]! + .byte 0x22,0xa3,0xb0,0xf3 @ aese q5,q9 + .byte 0x22,0x43,0xf0,0xf3 @ aese q10,q9 + vorr q1,q6,q6 + .byte 0x88,0x83,0xb0,0xf3 @ aesmc q4,q4 + vld1.8 {q11},[r0]! + .byte 0x8a,0xa3,0xb0,0xf3 @ aesmc q5,q5 + .byte 0xa4,0x23,0xf0,0xf3 @ aesmc q9,q10 + vorr q10,q6,q6 + add r9,r8,#1 + .byte 0x28,0x83,0xb0,0xf3 @ aese q4,q12 + .byte 0x28,0xa3,0xb0,0xf3 @ aese q5,q12 + .byte 0x28,0x23,0xf0,0xf3 @ aese q9,q12 + veor q2,q2,q7 + add r10,r8,#2 + .byte 0x88,0x83,0xb0,0xf3 @ aesmc q4,q4 + .byte 0x8a,0xa3,0xb0,0xf3 @ aesmc q5,q5 + .byte 0xa2,0x23,0xf0,0xf3 @ aesmc q9,q9 + veor q3,q3,q7 + add r8,r8,#3 + .byte 0x2a,0x83,0xb0,0xf3 @ aese q4,q13 + .byte 0x2a,0xa3,0xb0,0xf3 @ aese q5,q13 + .byte 0x2a,0x23,0xf0,0xf3 @ aese q9,q13 + veor q11,q11,q7 + rev r9,r9 + .byte 0x88,0x83,0xb0,0xf3 @ aesmc q4,q4 + vld1.32 {q8},[r7]! @ re-pre-load rndkey[0] + .byte 0x8a,0xa3,0xb0,0xf3 @ aesmc q5,q5 + .byte 0xa2,0x23,0xf0,0xf3 @ aesmc q9,q9 + vmov.32 d1[1], r9 + rev r10,r10 + .byte 0x2c,0x83,0xb0,0xf3 @ aese q4,q14 + .byte 0x2c,0xa3,0xb0,0xf3 @ aese q5,q14 + .byte 0x2c,0x23,0xf0,0xf3 @ aese q9,q14 + vmov.32 d3[1], r10 + rev r12,r8 + .byte 0x88,0x83,0xb0,0xf3 @ aesmc q4,q4 + .byte 0x8a,0xa3,0xb0,0xf3 @ aesmc q5,q5 + .byte 0xa2,0x23,0xf0,0xf3 @ aesmc q9,q9 + vmov.32 d21[1], r12 + subs r2,r2,#3 + .byte 0x2e,0x83,0xb0,0xf3 @ aese q4,q15 + .byte 0x2e,0xa3,0xb0,0xf3 @ aese q5,q15 + .byte 0x2e,0x23,0xf0,0xf3 @ aese q9,q15 + + mov r6,r5 + veor q2,q2,q4 + veor q3,q3,q5 + veor q11,q11,q9 + vld1.32 {q9},[r7]! @ re-pre-load rndkey[1] + vst1.8 {q2},[r1]! + vst1.8 {q3},[r1]! + vst1.8 {q11},[r1]! + bhs .Loop3x_ctr32 + + adds r2,r2,#3 + beq .Lctr32_done + cmp r2,#1 + mov r12,#16 + moveq r12,#0 + +.Lctr32_tail: + .byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 + .byte 0x20,0x23,0xb0,0xf3 @ aese q1,q8 + vld1.32 {q8},[r7]! + .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 + .byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 + subs r6,r6,#2 + .byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9 + .byte 0x22,0x23,0xb0,0xf3 @ aese q1,q9 + vld1.32 {q9},[r7]! + .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 + .byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 + bgt .Lctr32_tail + + .byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 + .byte 0x20,0x23,0xb0,0xf3 @ aese q1,q8 + .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 + .byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 + .byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9 + .byte 0x22,0x23,0xb0,0xf3 @ aese q1,q9 + .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 + .byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 + vld1.8 {q2},[r0],r12 + .byte 0x28,0x03,0xb0,0xf3 @ aese q0,q12 + .byte 0x28,0x23,0xb0,0xf3 @ aese q1,q12 + vld1.8 {q3},[r0] + .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 + .byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 + .byte 0x2a,0x03,0xb0,0xf3 @ aese q0,q13 + .byte 0x2a,0x23,0xb0,0xf3 @ aese q1,q13 + .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 + .byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 + .byte 0x2c,0x03,0xb0,0xf3 @ aese q0,q14 + .byte 0x2c,0x23,0xb0,0xf3 @ aese q1,q14 + veor q2,q2,q7 + .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 + .byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 + veor q3,q3,q7 + .byte 0x2e,0x03,0xb0,0xf3 @ aese q0,q15 + .byte 0x2e,0x23,0xb0,0xf3 @ aese q1,q15 + + cmp r2,#1 + veor q2,q2,q0 + veor q3,q3,q1 + vst1.8 {q2},[r1]! + beq .Lctr32_done + vst1.8 {q3},[r1] + +.Lctr32_done: + vldmia sp!,{d8-d15} + ldmia sp!,{r4-r10,pc} +.size aes_v8_ctr32_encrypt_blocks,.-aes_v8_ctr32_encrypt_blocks +#endif diff --git a/deps/openssl/asm/arm-void-gas/aes/bsaes-armv7.S b/deps/openssl/asm/arm-void-gas/aes/bsaes-armv7.S new file mode 100644 index 00000000000000..9738ed50235b3e --- /dev/null +++ b/deps/openssl/asm/arm-void-gas/aes/bsaes-armv7.S @@ -0,0 +1,2546 @@ + +@ ==================================================================== +@ Written by Andy Polyakov for the OpenSSL +@ project. The module is, however, dual licensed under OpenSSL and +@ CRYPTOGAMS licenses depending on where you obtain it. For further +@ details see http://www.openssl.org/~appro/cryptogams/. +@ +@ Specific modes and adaptation for Linux kernel by Ard Biesheuvel +@ . Permission to use under GPL terms is +@ granted. +@ ==================================================================== + +@ Bit-sliced AES for ARM NEON +@ +@ February 2012. +@ +@ This implementation is direct adaptation of bsaes-x86_64 module for +@ ARM NEON. Except that this module is endian-neutral [in sense that +@ it can be compiled for either endianness] by courtesy of vld1.8's +@ neutrality. Initial version doesn't implement interface to OpenSSL, +@ only low-level primitives and unsupported entry points, just enough +@ to collect performance results, which for Cortex-A8 core are: +@ +@ encrypt 19.5 cycles per byte processed with 128-bit key +@ decrypt 22.1 cycles per byte processed with 128-bit key +@ key conv. 440 cycles per 128-bit key/0.18 of 8x block +@ +@ Snapdragon S4 encrypts byte in 17.6 cycles and decrypts in 19.7, +@ which is [much] worse than anticipated (for further details see +@ http://www.openssl.org/~appro/Snapdragon-S4.html). +@ +@ Cortex-A15 manages in 14.2/16.1 cycles [when integer-only code +@ manages in 20.0 cycles]. +@ +@ When comparing to x86_64 results keep in mind that NEON unit is +@ [mostly] single-issue and thus can't [fully] benefit from +@ instruction-level parallelism. And when comparing to aes-armv4 +@ results keep in mind key schedule conversion overhead (see +@ bsaes-x86_64.pl for further details)... +@ +@ + +@ April-August 2013 +@ +@ Add CBC, CTR and XTS subroutines, adapt for kernel use. +@ +@ + +#ifndef __KERNEL__ +# include "arm_arch.h" + +# define VFP_ABI_PUSH vstmdb sp!,{d8-d15} +# define VFP_ABI_POP vldmia sp!,{d8-d15} +# define VFP_ABI_FRAME 0x40 +#else +# define VFP_ABI_PUSH +# define VFP_ABI_POP +# define VFP_ABI_FRAME 0 +# define BSAES_ASM_EXTENDED_KEY +# define XTS_CHAIN_TWEAK +# define __ARM_ARCH__ __LINUX_ARM_ARCH__ +# define __ARM_MAX_ARCH__ __LINUX_ARM_ARCH__ +#endif + +#ifdef __thumb__ +# define adrl adr +#endif + +#if __ARM_MAX_ARCH__>=7 +.arch armv7-a +.fpu neon + +.text +.syntax unified @ ARMv7-capable assembler is expected to handle this +#ifdef __thumb2__ +.thumb +#else +.code 32 +#endif + +.type _bsaes_decrypt8,%function +.align 4 +_bsaes_decrypt8: + adr r6,_bsaes_decrypt8 + vldmia r4!, {q9} @ round 0 key + add r6,r6,#.LM0ISR-_bsaes_decrypt8 + + vldmia r6!, {q8} @ .LM0ISR + veor q10, q0, q9 @ xor with round0 key + veor q11, q1, q9 + vtbl.8 d0, {q10}, d16 + vtbl.8 d1, {q10}, d17 + veor q12, q2, q9 + vtbl.8 d2, {q11}, d16 + vtbl.8 d3, {q11}, d17 + veor q13, q3, q9 + vtbl.8 d4, {q12}, d16 + vtbl.8 d5, {q12}, d17 + veor q14, q4, q9 + vtbl.8 d6, {q13}, d16 + vtbl.8 d7, {q13}, d17 + veor q15, q5, q9 + vtbl.8 d8, {q14}, d16 + vtbl.8 d9, {q14}, d17 + veor q10, q6, q9 + vtbl.8 d10, {q15}, d16 + vtbl.8 d11, {q15}, d17 + veor q11, q7, q9 + vtbl.8 d12, {q10}, d16 + vtbl.8 d13, {q10}, d17 + vtbl.8 d14, {q11}, d16 + vtbl.8 d15, {q11}, d17 + vmov.i8 q8,#0x55 @ compose .LBS0 + vmov.i8 q9,#0x33 @ compose .LBS1 + vshr.u64 q10, q6, #1 + vshr.u64 q11, q4, #1 + veor q10, q10, q7 + veor q11, q11, q5 + vand q10, q10, q8 + vand q11, q11, q8 + veor q7, q7, q10 + vshl.u64 q10, q10, #1 + veor q5, q5, q11 + vshl.u64 q11, q11, #1 + veor q6, q6, q10 + veor q4, q4, q11 + vshr.u64 q10, q2, #1 + vshr.u64 q11, q0, #1 + veor q10, q10, q3 + veor q11, q11, q1 + vand q10, q10, q8 + vand q11, q11, q8 + veor q3, q3, q10 + vshl.u64 q10, q10, #1 + veor q1, q1, q11 + vshl.u64 q11, q11, #1 + veor q2, q2, q10 + veor q0, q0, q11 + vmov.i8 q8,#0x0f @ compose .LBS2 + vshr.u64 q10, q5, #2 + vshr.u64 q11, q4, #2 + veor q10, q10, q7 + veor q11, q11, q6 + vand q10, q10, q9 + vand q11, q11, q9 + veor q7, q7, q10 + vshl.u64 q10, q10, #2 + veor q6, q6, q11 + vshl.u64 q11, q11, #2 + veor q5, q5, q10 + veor q4, q4, q11 + vshr.u64 q10, q1, #2 + vshr.u64 q11, q0, #2 + veor q10, q10, q3 + veor q11, q11, q2 + vand q10, q10, q9 + vand q11, q11, q9 + veor q3, q3, q10 + vshl.u64 q10, q10, #2 + veor q2, q2, q11 + vshl.u64 q11, q11, #2 + veor q1, q1, q10 + veor q0, q0, q11 + vshr.u64 q10, q3, #4 + vshr.u64 q11, q2, #4 + veor q10, q10, q7 + veor q11, q11, q6 + vand q10, q10, q8 + vand q11, q11, q8 + veor q7, q7, q10 + vshl.u64 q10, q10, #4 + veor q6, q6, q11 + vshl.u64 q11, q11, #4 + veor q3, q3, q10 + veor q2, q2, q11 + vshr.u64 q10, q1, #4 + vshr.u64 q11, q0, #4 + veor q10, q10, q5 + veor q11, q11, q4 + vand q10, q10, q8 + vand q11, q11, q8 + veor q5, q5, q10 + vshl.u64 q10, q10, #4 + veor q4, q4, q11 + vshl.u64 q11, q11, #4 + veor q1, q1, q10 + veor q0, q0, q11 + sub r5,r5,#1 + b .Ldec_sbox +.align 4 +.Ldec_loop: + vldmia r4!, {q8-q11} + veor q8, q8, q0 + veor q9, q9, q1 + vtbl.8 d0, {q8}, d24 + vtbl.8 d1, {q8}, d25 + vldmia r4!, {q8} + veor q10, q10, q2 + vtbl.8 d2, {q9}, d24 + vtbl.8 d3, {q9}, d25 + vldmia r4!, {q9} + veor q11, q11, q3 + vtbl.8 d4, {q10}, d24 + vtbl.8 d5, {q10}, d25 + vldmia r4!, {q10} + vtbl.8 d6, {q11}, d24 + vtbl.8 d7, {q11}, d25 + vldmia r4!, {q11} + veor q8, q8, q4 + veor q9, q9, q5 + vtbl.8 d8, {q8}, d24 + vtbl.8 d9, {q8}, d25 + veor q10, q10, q6 + vtbl.8 d10, {q9}, d24 + vtbl.8 d11, {q9}, d25 + veor q11, q11, q7 + vtbl.8 d12, {q10}, d24 + vtbl.8 d13, {q10}, d25 + vtbl.8 d14, {q11}, d24 + vtbl.8 d15, {q11}, d25 +.Ldec_sbox: + veor q1, q1, q4 + veor q3, q3, q4 + + veor q4, q4, q7 + veor q1, q1, q6 + veor q2, q2, q7 + veor q6, q6, q4 + + veor q0, q0, q1 + veor q2, q2, q5 + veor q7, q7, q6 + veor q3, q3, q0 + veor q5, q5, q0 + veor q1, q1, q3 + veor q11, q3, q0 + veor q10, q7, q4 + veor q9, q1, q6 + veor q13, q4, q0 + vmov q8, q10 + veor q12, q5, q2 + + vorr q10, q10, q9 + veor q15, q11, q8 + vand q14, q11, q12 + vorr q11, q11, q12 + veor q12, q12, q9 + vand q8, q8, q9 + veor q9, q6, q2 + vand q15, q15, q12 + vand q13, q13, q9 + veor q9, q3, q7 + veor q12, q1, q5 + veor q11, q11, q13 + veor q10, q10, q13 + vand q13, q9, q12 + vorr q9, q9, q12 + veor q11, q11, q15 + veor q8, q8, q13 + veor q10, q10, q14 + veor q9, q9, q15 + veor q8, q8, q14 + vand q12, q4, q6 + veor q9, q9, q14 + vand q13, q0, q2 + vand q14, q7, q1 + vorr q15, q3, q5 + veor q11, q11, q12 + veor q9, q9, q14 + veor q8, q8, q15 + veor q10, q10, q13 + + @ Inv_GF16 0, 1, 2, 3, s0, s1, s2, s3 + + @ new smaller inversion + + vand q14, q11, q9 + vmov q12, q8 + + veor q13, q10, q14 + veor q15, q8, q14 + veor q14, q8, q14 @ q14=q15 + + vbsl q13, q9, q8 + vbsl q15, q11, q10 + veor q11, q11, q10 + + vbsl q12, q13, q14 + vbsl q8, q14, q13 + + vand q14, q12, q15 + veor q9, q9, q8 + + veor q14, q14, q11 + veor q12, q5, q2 + veor q8, q1, q6 + veor q10, q15, q14 + vand q10, q10, q5 + veor q5, q5, q1 + vand q11, q1, q15 + vand q5, q5, q14 + veor q1, q11, q10 + veor q5, q5, q11 + veor q15, q15, q13 + veor q14, q14, q9 + veor q11, q15, q14 + veor q10, q13, q9 + vand q11, q11, q12 + vand q10, q10, q2 + veor q12, q12, q8 + veor q2, q2, q6 + vand q8, q8, q15 + vand q6, q6, q13 + vand q12, q12, q14 + vand q2, q2, q9 + veor q8, q8, q12 + veor q2, q2, q6 + veor q12, q12, q11 + veor q6, q6, q10 + veor q5, q5, q12 + veor q2, q2, q12 + veor q1, q1, q8 + veor q6, q6, q8 + + veor q12, q3, q0 + veor q8, q7, q4 + veor q11, q15, q14 + veor q10, q13, q9 + vand q11, q11, q12 + vand q10, q10, q0 + veor q12, q12, q8 + veor q0, q0, q4 + vand q8, q8, q15 + vand q4, q4, q13 + vand q12, q12, q14 + vand q0, q0, q9 + veor q8, q8, q12 + veor q0, q0, q4 + veor q12, q12, q11 + veor q4, q4, q10 + veor q15, q15, q13 + veor q14, q14, q9 + veor q10, q15, q14 + vand q10, q10, q3 + veor q3, q3, q7 + vand q11, q7, q15 + vand q3, q3, q14 + veor q7, q11, q10 + veor q3, q3, q11 + veor q3, q3, q12 + veor q0, q0, q12 + veor q7, q7, q8 + veor q4, q4, q8 + veor q1, q1, q7 + veor q6, q6, q5 + + veor q4, q4, q1 + veor q2, q2, q7 + veor q5, q5, q7 + veor q4, q4, q2 + veor q7, q7, q0 + veor q4, q4, q5 + veor q3, q3, q6 + veor q6, q6, q1 + veor q3, q3, q4 + + veor q4, q4, q0 + veor q7, q7, q3 + subs r5,r5,#1 + bcc .Ldec_done + @ multiplication by 0x05-0x00-0x04-0x00 + vext.8 q8, q0, q0, #8 + vext.8 q14, q3, q3, #8 + vext.8 q15, q5, q5, #8 + veor q8, q8, q0 + vext.8 q9, q1, q1, #8 + veor q14, q14, q3 + vext.8 q10, q6, q6, #8 + veor q15, q15, q5 + vext.8 q11, q4, q4, #8 + veor q9, q9, q1 + vext.8 q12, q2, q2, #8 + veor q10, q10, q6 + vext.8 q13, q7, q7, #8 + veor q11, q11, q4 + veor q12, q12, q2 + veor q13, q13, q7 + + veor q0, q0, q14 + veor q1, q1, q14 + veor q6, q6, q8 + veor q2, q2, q10 + veor q4, q4, q9 + veor q1, q1, q15 + veor q6, q6, q15 + veor q2, q2, q14 + veor q7, q7, q11 + veor q4, q4, q14 + veor q3, q3, q12 + veor q2, q2, q15 + veor q7, q7, q15 + veor q5, q5, q13 + vext.8 q8, q0, q0, #12 @ x0 <<< 32 + vext.8 q9, q1, q1, #12 + veor q0, q0, q8 @ x0 ^ (x0 <<< 32) + vext.8 q10, q6, q6, #12 + veor q1, q1, q9 + vext.8 q11, q4, q4, #12 + veor q6, q6, q10 + vext.8 q12, q2, q2, #12 + veor q4, q4, q11 + vext.8 q13, q7, q7, #12 + veor q2, q2, q12 + vext.8 q14, q3, q3, #12 + veor q7, q7, q13 + vext.8 q15, q5, q5, #12 + veor q3, q3, q14 + + veor q9, q9, q0 + veor q5, q5, q15 + vext.8 q0, q0, q0, #8 @ (x0 ^ (x0 <<< 32)) <<< 64) + veor q10, q10, q1 + veor q8, q8, q5 + veor q9, q9, q5 + vext.8 q1, q1, q1, #8 + veor q13, q13, q2 + veor q0, q0, q8 + veor q14, q14, q7 + veor q1, q1, q9 + vext.8 q8, q2, q2, #8 + veor q12, q12, q4 + vext.8 q9, q7, q7, #8 + veor q15, q15, q3 + vext.8 q2, q4, q4, #8 + veor q11, q11, q6 + vext.8 q7, q5, q5, #8 + veor q12, q12, q5 + vext.8 q4, q3, q3, #8 + veor q11, q11, q5 + vext.8 q3, q6, q6, #8 + veor q5, q9, q13 + veor q11, q11, q2 + veor q7, q7, q15 + veor q6, q4, q14 + veor q4, q8, q12 + veor q2, q3, q10 + vmov q3, q11 + @ vmov q5, q9 + vldmia r6, {q12} @ .LISR + ite eq @ Thumb2 thing, sanity check in ARM + addeq r6,r6,#0x10 + bne .Ldec_loop + vldmia r6, {q12} @ .LISRM0 + b .Ldec_loop +.align 4 +.Ldec_done: + vmov.i8 q8,#0x55 @ compose .LBS0 + vmov.i8 q9,#0x33 @ compose .LBS1 + vshr.u64 q10, q3, #1 + vshr.u64 q11, q2, #1 + veor q10, q10, q5 + veor q11, q11, q7 + vand q10, q10, q8 + vand q11, q11, q8 + veor q5, q5, q10 + vshl.u64 q10, q10, #1 + veor q7, q7, q11 + vshl.u64 q11, q11, #1 + veor q3, q3, q10 + veor q2, q2, q11 + vshr.u64 q10, q6, #1 + vshr.u64 q11, q0, #1 + veor q10, q10, q4 + veor q11, q11, q1 + vand q10, q10, q8 + vand q11, q11, q8 + veor q4, q4, q10 + vshl.u64 q10, q10, #1 + veor q1, q1, q11 + vshl.u64 q11, q11, #1 + veor q6, q6, q10 + veor q0, q0, q11 + vmov.i8 q8,#0x0f @ compose .LBS2 + vshr.u64 q10, q7, #2 + vshr.u64 q11, q2, #2 + veor q10, q10, q5 + veor q11, q11, q3 + vand q10, q10, q9 + vand q11, q11, q9 + veor q5, q5, q10 + vshl.u64 q10, q10, #2 + veor q3, q3, q11 + vshl.u64 q11, q11, #2 + veor q7, q7, q10 + veor q2, q2, q11 + vshr.u64 q10, q1, #2 + vshr.u64 q11, q0, #2 + veor q10, q10, q4 + veor q11, q11, q6 + vand q10, q10, q9 + vand q11, q11, q9 + veor q4, q4, q10 + vshl.u64 q10, q10, #2 + veor q6, q6, q11 + vshl.u64 q11, q11, #2 + veor q1, q1, q10 + veor q0, q0, q11 + vshr.u64 q10, q4, #4 + vshr.u64 q11, q6, #4 + veor q10, q10, q5 + veor q11, q11, q3 + vand q10, q10, q8 + vand q11, q11, q8 + veor q5, q5, q10 + vshl.u64 q10, q10, #4 + veor q3, q3, q11 + vshl.u64 q11, q11, #4 + veor q4, q4, q10 + veor q6, q6, q11 + vshr.u64 q10, q1, #4 + vshr.u64 q11, q0, #4 + veor q10, q10, q7 + veor q11, q11, q2 + vand q10, q10, q8 + vand q11, q11, q8 + veor q7, q7, q10 + vshl.u64 q10, q10, #4 + veor q2, q2, q11 + vshl.u64 q11, q11, #4 + veor q1, q1, q10 + veor q0, q0, q11 + vldmia r4, {q8} @ last round key + veor q6, q6, q8 + veor q4, q4, q8 + veor q2, q2, q8 + veor q7, q7, q8 + veor q3, q3, q8 + veor q5, q5, q8 + veor q0, q0, q8 + veor q1, q1, q8 + bx lr +.size _bsaes_decrypt8,.-_bsaes_decrypt8 + +.type _bsaes_const,%object +.align 6 +_bsaes_const: +.LM0ISR: @ InvShiftRows constants + .quad 0x0a0e0206070b0f03, 0x0004080c0d010509 +.LISR: + .quad 0x0504070602010003, 0x0f0e0d0c080b0a09 +.LISRM0: + .quad 0x01040b0e0205080f, 0x0306090c00070a0d +.LM0SR: @ ShiftRows constants + .quad 0x0a0e02060f03070b, 0x0004080c05090d01 +.LSR: + .quad 0x0504070600030201, 0x0f0e0d0c0a09080b +.LSRM0: + .quad 0x0304090e00050a0f, 0x01060b0c0207080d +.LM0: + .quad 0x02060a0e03070b0f, 0x0004080c0105090d +.LREVM0SR: + .quad 0x090d01050c000408, 0x03070b0f060a0e02 +.asciz "Bit-sliced AES for NEON, CRYPTOGAMS by " +.align 6 +.size _bsaes_const,.-_bsaes_const + +.type _bsaes_encrypt8,%function +.align 4 +_bsaes_encrypt8: + adr r6,_bsaes_encrypt8 + vldmia r4!, {q9} @ round 0 key + sub r6,r6,#_bsaes_encrypt8-.LM0SR + + vldmia r6!, {q8} @ .LM0SR +_bsaes_encrypt8_alt: + veor q10, q0, q9 @ xor with round0 key + veor q11, q1, q9 + vtbl.8 d0, {q10}, d16 + vtbl.8 d1, {q10}, d17 + veor q12, q2, q9 + vtbl.8 d2, {q11}, d16 + vtbl.8 d3, {q11}, d17 + veor q13, q3, q9 + vtbl.8 d4, {q12}, d16 + vtbl.8 d5, {q12}, d17 + veor q14, q4, q9 + vtbl.8 d6, {q13}, d16 + vtbl.8 d7, {q13}, d17 + veor q15, q5, q9 + vtbl.8 d8, {q14}, d16 + vtbl.8 d9, {q14}, d17 + veor q10, q6, q9 + vtbl.8 d10, {q15}, d16 + vtbl.8 d11, {q15}, d17 + veor q11, q7, q9 + vtbl.8 d12, {q10}, d16 + vtbl.8 d13, {q10}, d17 + vtbl.8 d14, {q11}, d16 + vtbl.8 d15, {q11}, d17 +_bsaes_encrypt8_bitslice: + vmov.i8 q8,#0x55 @ compose .LBS0 + vmov.i8 q9,#0x33 @ compose .LBS1 + vshr.u64 q10, q6, #1 + vshr.u64 q11, q4, #1 + veor q10, q10, q7 + veor q11, q11, q5 + vand q10, q10, q8 + vand q11, q11, q8 + veor q7, q7, q10 + vshl.u64 q10, q10, #1 + veor q5, q5, q11 + vshl.u64 q11, q11, #1 + veor q6, q6, q10 + veor q4, q4, q11 + vshr.u64 q10, q2, #1 + vshr.u64 q11, q0, #1 + veor q10, q10, q3 + veor q11, q11, q1 + vand q10, q10, q8 + vand q11, q11, q8 + veor q3, q3, q10 + vshl.u64 q10, q10, #1 + veor q1, q1, q11 + vshl.u64 q11, q11, #1 + veor q2, q2, q10 + veor q0, q0, q11 + vmov.i8 q8,#0x0f @ compose .LBS2 + vshr.u64 q10, q5, #2 + vshr.u64 q11, q4, #2 + veor q10, q10, q7 + veor q11, q11, q6 + vand q10, q10, q9 + vand q11, q11, q9 + veor q7, q7, q10 + vshl.u64 q10, q10, #2 + veor q6, q6, q11 + vshl.u64 q11, q11, #2 + veor q5, q5, q10 + veor q4, q4, q11 + vshr.u64 q10, q1, #2 + vshr.u64 q11, q0, #2 + veor q10, q10, q3 + veor q11, q11, q2 + vand q10, q10, q9 + vand q11, q11, q9 + veor q3, q3, q10 + vshl.u64 q10, q10, #2 + veor q2, q2, q11 + vshl.u64 q11, q11, #2 + veor q1, q1, q10 + veor q0, q0, q11 + vshr.u64 q10, q3, #4 + vshr.u64 q11, q2, #4 + veor q10, q10, q7 + veor q11, q11, q6 + vand q10, q10, q8 + vand q11, q11, q8 + veor q7, q7, q10 + vshl.u64 q10, q10, #4 + veor q6, q6, q11 + vshl.u64 q11, q11, #4 + veor q3, q3, q10 + veor q2, q2, q11 + vshr.u64 q10, q1, #4 + vshr.u64 q11, q0, #4 + veor q10, q10, q5 + veor q11, q11, q4 + vand q10, q10, q8 + vand q11, q11, q8 + veor q5, q5, q10 + vshl.u64 q10, q10, #4 + veor q4, q4, q11 + vshl.u64 q11, q11, #4 + veor q1, q1, q10 + veor q0, q0, q11 + sub r5,r5,#1 + b .Lenc_sbox +.align 4 +.Lenc_loop: + vldmia r4!, {q8-q11} + veor q8, q8, q0 + veor q9, q9, q1 + vtbl.8 d0, {q8}, d24 + vtbl.8 d1, {q8}, d25 + vldmia r4!, {q8} + veor q10, q10, q2 + vtbl.8 d2, {q9}, d24 + vtbl.8 d3, {q9}, d25 + vldmia r4!, {q9} + veor q11, q11, q3 + vtbl.8 d4, {q10}, d24 + vtbl.8 d5, {q10}, d25 + vldmia r4!, {q10} + vtbl.8 d6, {q11}, d24 + vtbl.8 d7, {q11}, d25 + vldmia r4!, {q11} + veor q8, q8, q4 + veor q9, q9, q5 + vtbl.8 d8, {q8}, d24 + vtbl.8 d9, {q8}, d25 + veor q10, q10, q6 + vtbl.8 d10, {q9}, d24 + vtbl.8 d11, {q9}, d25 + veor q11, q11, q7 + vtbl.8 d12, {q10}, d24 + vtbl.8 d13, {q10}, d25 + vtbl.8 d14, {q11}, d24 + vtbl.8 d15, {q11}, d25 +.Lenc_sbox: + veor q2, q2, q1 + veor q5, q5, q6 + veor q3, q3, q0 + veor q6, q6, q2 + veor q5, q5, q0 + + veor q6, q6, q3 + veor q3, q3, q7 + veor q7, q7, q5 + veor q3, q3, q4 + veor q4, q4, q5 + + veor q2, q2, q7 + veor q3, q3, q1 + veor q1, q1, q5 + veor q11, q7, q4 + veor q10, q1, q2 + veor q9, q5, q3 + veor q13, q2, q4 + vmov q8, q10 + veor q12, q6, q0 + + vorr q10, q10, q9 + veor q15, q11, q8 + vand q14, q11, q12 + vorr q11, q11, q12 + veor q12, q12, q9 + vand q8, q8, q9 + veor q9, q3, q0 + vand q15, q15, q12 + vand q13, q13, q9 + veor q9, q7, q1 + veor q12, q5, q6 + veor q11, q11, q13 + veor q10, q10, q13 + vand q13, q9, q12 + vorr q9, q9, q12 + veor q11, q11, q15 + veor q8, q8, q13 + veor q10, q10, q14 + veor q9, q9, q15 + veor q8, q8, q14 + vand q12, q2, q3 + veor q9, q9, q14 + vand q13, q4, q0 + vand q14, q1, q5 + vorr q15, q7, q6 + veor q11, q11, q12 + veor q9, q9, q14 + veor q8, q8, q15 + veor q10, q10, q13 + + @ Inv_GF16 0, 1, 2, 3, s0, s1, s2, s3 + + @ new smaller inversion + + vand q14, q11, q9 + vmov q12, q8 + + veor q13, q10, q14 + veor q15, q8, q14 + veor q14, q8, q14 @ q14=q15 + + vbsl q13, q9, q8 + vbsl q15, q11, q10 + veor q11, q11, q10 + + vbsl q12, q13, q14 + vbsl q8, q14, q13 + + vand q14, q12, q15 + veor q9, q9, q8 + + veor q14, q14, q11 + veor q12, q6, q0 + veor q8, q5, q3 + veor q10, q15, q14 + vand q10, q10, q6 + veor q6, q6, q5 + vand q11, q5, q15 + vand q6, q6, q14 + veor q5, q11, q10 + veor q6, q6, q11 + veor q15, q15, q13 + veor q14, q14, q9 + veor q11, q15, q14 + veor q10, q13, q9 + vand q11, q11, q12 + vand q10, q10, q0 + veor q12, q12, q8 + veor q0, q0, q3 + vand q8, q8, q15 + vand q3, q3, q13 + vand q12, q12, q14 + vand q0, q0, q9 + veor q8, q8, q12 + veor q0, q0, q3 + veor q12, q12, q11 + veor q3, q3, q10 + veor q6, q6, q12 + veor q0, q0, q12 + veor q5, q5, q8 + veor q3, q3, q8 + + veor q12, q7, q4 + veor q8, q1, q2 + veor q11, q15, q14 + veor q10, q13, q9 + vand q11, q11, q12 + vand q10, q10, q4 + veor q12, q12, q8 + veor q4, q4, q2 + vand q8, q8, q15 + vand q2, q2, q13 + vand q12, q12, q14 + vand q4, q4, q9 + veor q8, q8, q12 + veor q4, q4, q2 + veor q12, q12, q11 + veor q2, q2, q10 + veor q15, q15, q13 + veor q14, q14, q9 + veor q10, q15, q14 + vand q10, q10, q7 + veor q7, q7, q1 + vand q11, q1, q15 + vand q7, q7, q14 + veor q1, q11, q10 + veor q7, q7, q11 + veor q7, q7, q12 + veor q4, q4, q12 + veor q1, q1, q8 + veor q2, q2, q8 + veor q7, q7, q0 + veor q1, q1, q6 + veor q6, q6, q0 + veor q4, q4, q7 + veor q0, q0, q1 + + veor q1, q1, q5 + veor q5, q5, q2 + veor q2, q2, q3 + veor q3, q3, q5 + veor q4, q4, q5 + + veor q6, q6, q3 + subs r5,r5,#1 + bcc .Lenc_done + vext.8 q8, q0, q0, #12 @ x0 <<< 32 + vext.8 q9, q1, q1, #12 + veor q0, q0, q8 @ x0 ^ (x0 <<< 32) + vext.8 q10, q4, q4, #12 + veor q1, q1, q9 + vext.8 q11, q6, q6, #12 + veor q4, q4, q10 + vext.8 q12, q3, q3, #12 + veor q6, q6, q11 + vext.8 q13, q7, q7, #12 + veor q3, q3, q12 + vext.8 q14, q2, q2, #12 + veor q7, q7, q13 + vext.8 q15, q5, q5, #12 + veor q2, q2, q14 + + veor q9, q9, q0 + veor q5, q5, q15 + vext.8 q0, q0, q0, #8 @ (x0 ^ (x0 <<< 32)) <<< 64) + veor q10, q10, q1 + veor q8, q8, q5 + veor q9, q9, q5 + vext.8 q1, q1, q1, #8 + veor q13, q13, q3 + veor q0, q0, q8 + veor q14, q14, q7 + veor q1, q1, q9 + vext.8 q8, q3, q3, #8 + veor q12, q12, q6 + vext.8 q9, q7, q7, #8 + veor q15, q15, q2 + vext.8 q3, q6, q6, #8 + veor q11, q11, q4 + vext.8 q7, q5, q5, #8 + veor q12, q12, q5 + vext.8 q6, q2, q2, #8 + veor q11, q11, q5 + vext.8 q2, q4, q4, #8 + veor q5, q9, q13 + veor q4, q8, q12 + veor q3, q3, q11 + veor q7, q7, q15 + veor q6, q6, q14 + @ vmov q4, q8 + veor q2, q2, q10 + @ vmov q5, q9 + vldmia r6, {q12} @ .LSR + ite eq @ Thumb2 thing, samity check in ARM + addeq r6,r6,#0x10 + bne .Lenc_loop + vldmia r6, {q12} @ .LSRM0 + b .Lenc_loop +.align 4 +.Lenc_done: + vmov.i8 q8,#0x55 @ compose .LBS0 + vmov.i8 q9,#0x33 @ compose .LBS1 + vshr.u64 q10, q2, #1 + vshr.u64 q11, q3, #1 + veor q10, q10, q5 + veor q11, q11, q7 + vand q10, q10, q8 + vand q11, q11, q8 + veor q5, q5, q10 + vshl.u64 q10, q10, #1 + veor q7, q7, q11 + vshl.u64 q11, q11, #1 + veor q2, q2, q10 + veor q3, q3, q11 + vshr.u64 q10, q4, #1 + vshr.u64 q11, q0, #1 + veor q10, q10, q6 + veor q11, q11, q1 + vand q10, q10, q8 + vand q11, q11, q8 + veor q6, q6, q10 + vshl.u64 q10, q10, #1 + veor q1, q1, q11 + vshl.u64 q11, q11, #1 + veor q4, q4, q10 + veor q0, q0, q11 + vmov.i8 q8,#0x0f @ compose .LBS2 + vshr.u64 q10, q7, #2 + vshr.u64 q11, q3, #2 + veor q10, q10, q5 + veor q11, q11, q2 + vand q10, q10, q9 + vand q11, q11, q9 + veor q5, q5, q10 + vshl.u64 q10, q10, #2 + veor q2, q2, q11 + vshl.u64 q11, q11, #2 + veor q7, q7, q10 + veor q3, q3, q11 + vshr.u64 q10, q1, #2 + vshr.u64 q11, q0, #2 + veor q10, q10, q6 + veor q11, q11, q4 + vand q10, q10, q9 + vand q11, q11, q9 + veor q6, q6, q10 + vshl.u64 q10, q10, #2 + veor q4, q4, q11 + vshl.u64 q11, q11, #2 + veor q1, q1, q10 + veor q0, q0, q11 + vshr.u64 q10, q6, #4 + vshr.u64 q11, q4, #4 + veor q10, q10, q5 + veor q11, q11, q2 + vand q10, q10, q8 + vand q11, q11, q8 + veor q5, q5, q10 + vshl.u64 q10, q10, #4 + veor q2, q2, q11 + vshl.u64 q11, q11, #4 + veor q6, q6, q10 + veor q4, q4, q11 + vshr.u64 q10, q1, #4 + vshr.u64 q11, q0, #4 + veor q10, q10, q7 + veor q11, q11, q3 + vand q10, q10, q8 + vand q11, q11, q8 + veor q7, q7, q10 + vshl.u64 q10, q10, #4 + veor q3, q3, q11 + vshl.u64 q11, q11, #4 + veor q1, q1, q10 + veor q0, q0, q11 + vldmia r4, {q8} @ last round key + veor q4, q4, q8 + veor q6, q6, q8 + veor q3, q3, q8 + veor q7, q7, q8 + veor q2, q2, q8 + veor q5, q5, q8 + veor q0, q0, q8 + veor q1, q1, q8 + bx lr +.size _bsaes_encrypt8,.-_bsaes_encrypt8 +.type _bsaes_key_convert,%function +.align 4 +_bsaes_key_convert: + adr r6,_bsaes_key_convert + vld1.8 {q7}, [r4]! @ load round 0 key + sub r6,r6,#_bsaes_key_convert-.LM0 + vld1.8 {q15}, [r4]! @ load round 1 key + + vmov.i8 q8, #0x01 @ bit masks + vmov.i8 q9, #0x02 + vmov.i8 q10, #0x04 + vmov.i8 q11, #0x08 + vmov.i8 q12, #0x10 + vmov.i8 q13, #0x20 + vldmia r6, {q14} @ .LM0 + +#ifdef __ARMEL__ + vrev32.8 q7, q7 + vrev32.8 q15, q15 +#endif + sub r5,r5,#1 + vstmia r12!, {q7} @ save round 0 key + b .Lkey_loop + +.align 4 +.Lkey_loop: + vtbl.8 d14,{q15},d28 + vtbl.8 d15,{q15},d29 + vmov.i8 q6, #0x40 + vmov.i8 q15, #0x80 + + vtst.8 q0, q7, q8 + vtst.8 q1, q7, q9 + vtst.8 q2, q7, q10 + vtst.8 q3, q7, q11 + vtst.8 q4, q7, q12 + vtst.8 q5, q7, q13 + vtst.8 q6, q7, q6 + vtst.8 q7, q7, q15 + vld1.8 {q15}, [r4]! @ load next round key + vmvn q0, q0 @ "pnot" + vmvn q1, q1 + vmvn q5, q5 + vmvn q6, q6 +#ifdef __ARMEL__ + vrev32.8 q15, q15 +#endif + subs r5,r5,#1 + vstmia r12!,{q0-q7} @ write bit-sliced round key + bne .Lkey_loop + + vmov.i8 q7,#0x63 @ compose .L63 + @ don't save last round key + bx lr +.size _bsaes_key_convert,.-_bsaes_key_convert +.extern AES_cbc_encrypt +.extern AES_decrypt + +.global bsaes_cbc_encrypt +.type bsaes_cbc_encrypt,%function +.align 5 +bsaes_cbc_encrypt: +#ifndef __KERNEL__ + cmp r2, #128 +#ifndef __thumb__ + blo AES_cbc_encrypt +#else + bhs 1f + b AES_cbc_encrypt +1: +#endif +#endif + + @ it is up to the caller to make sure we are called with enc == 0 + + mov ip, sp + stmdb sp!, {r4-r10, lr} + VFP_ABI_PUSH + ldr r8, [ip] @ IV is 1st arg on the stack + mov r2, r2, lsr#4 @ len in 16 byte blocks + sub sp, #0x10 @ scratch space to carry over the IV + mov r9, sp @ save sp + + ldr r10, [r3, #240] @ get # of rounds +#ifndef BSAES_ASM_EXTENDED_KEY + @ allocate the key schedule on the stack + sub r12, sp, r10, lsl#7 @ 128 bytes per inner round key + add r12, #96 @ sifze of bit-slices key schedule + + @ populate the key schedule + mov r4, r3 @ pass key + mov r5, r10 @ pass # of rounds + mov sp, r12 @ sp is sp + bl _bsaes_key_convert + vldmia sp, {q6} + vstmia r12, {q15} @ save last round key + veor q7, q7, q6 @ fix up round 0 key + vstmia sp, {q7} +#else + ldr r12, [r3, #244] + eors r12, #1 + beq 0f + + @ populate the key schedule + str r12, [r3, #244] + mov r4, r3 @ pass key + mov r5, r10 @ pass # of rounds + add r12, r3, #248 @ pass key schedule + bl _bsaes_key_convert + add r4, r3, #248 + vldmia r4, {q6} + vstmia r12, {q15} @ save last round key + veor q7, q7, q6 @ fix up round 0 key + vstmia r4, {q7} + +.align 2 +0: +#endif + + vld1.8 {q15}, [r8] @ load IV + b .Lcbc_dec_loop + +.align 4 +.Lcbc_dec_loop: + subs r2, r2, #0x8 + bmi .Lcbc_dec_loop_finish + + vld1.8 {q0-q1}, [r0]! @ load input + vld1.8 {q2-q3}, [r0]! +#ifndef BSAES_ASM_EXTENDED_KEY + mov r4, sp @ pass the key +#else + add r4, r3, #248 +#endif + vld1.8 {q4-q5}, [r0]! + mov r5, r10 + vld1.8 {q6-q7}, [r0] + sub r0, r0, #0x60 + vstmia r9, {q15} @ put aside IV + + bl _bsaes_decrypt8 + + vldmia r9, {q14} @ reload IV + vld1.8 {q8-q9}, [r0]! @ reload input + veor q0, q0, q14 @ ^= IV + vld1.8 {q10-q11}, [r0]! + veor q1, q1, q8 + veor q6, q6, q9 + vld1.8 {q12-q13}, [r0]! + veor q4, q4, q10 + veor q2, q2, q11 + vld1.8 {q14-q15}, [r0]! + veor q7, q7, q12 + vst1.8 {q0-q1}, [r1]! @ write output + veor q3, q3, q13 + vst1.8 {q6}, [r1]! + veor q5, q5, q14 + vst1.8 {q4}, [r1]! + vst1.8 {q2}, [r1]! + vst1.8 {q7}, [r1]! + vst1.8 {q3}, [r1]! + vst1.8 {q5}, [r1]! + + b .Lcbc_dec_loop + +.Lcbc_dec_loop_finish: + adds r2, r2, #8 + beq .Lcbc_dec_done + + vld1.8 {q0}, [r0]! @ load input + cmp r2, #2 + blo .Lcbc_dec_one + vld1.8 {q1}, [r0]! +#ifndef BSAES_ASM_EXTENDED_KEY + mov r4, sp @ pass the key +#else + add r4, r3, #248 +#endif + mov r5, r10 + vstmia r9, {q15} @ put aside IV + beq .Lcbc_dec_two + vld1.8 {q2}, [r0]! + cmp r2, #4 + blo .Lcbc_dec_three + vld1.8 {q3}, [r0]! + beq .Lcbc_dec_four + vld1.8 {q4}, [r0]! + cmp r2, #6 + blo .Lcbc_dec_five + vld1.8 {q5}, [r0]! + beq .Lcbc_dec_six + vld1.8 {q6}, [r0]! + sub r0, r0, #0x70 + + bl _bsaes_decrypt8 + + vldmia r9, {q14} @ reload IV + vld1.8 {q8-q9}, [r0]! @ reload input + veor q0, q0, q14 @ ^= IV + vld1.8 {q10-q11}, [r0]! + veor q1, q1, q8 + veor q6, q6, q9 + vld1.8 {q12-q13}, [r0]! + veor q4, q4, q10 + veor q2, q2, q11 + vld1.8 {q15}, [r0]! + veor q7, q7, q12 + vst1.8 {q0-q1}, [r1]! @ write output + veor q3, q3, q13 + vst1.8 {q6}, [r1]! + vst1.8 {q4}, [r1]! + vst1.8 {q2}, [r1]! + vst1.8 {q7}, [r1]! + vst1.8 {q3}, [r1]! + b .Lcbc_dec_done +.align 4 +.Lcbc_dec_six: + sub r0, r0, #0x60 + bl _bsaes_decrypt8 + vldmia r9,{q14} @ reload IV + vld1.8 {q8-q9}, [r0]! @ reload input + veor q0, q0, q14 @ ^= IV + vld1.8 {q10-q11}, [r0]! + veor q1, q1, q8 + veor q6, q6, q9 + vld1.8 {q12}, [r0]! + veor q4, q4, q10 + veor q2, q2, q11 + vld1.8 {q15}, [r0]! + veor q7, q7, q12 + vst1.8 {q0-q1}, [r1]! @ write output + vst1.8 {q6}, [r1]! + vst1.8 {q4}, [r1]! + vst1.8 {q2}, [r1]! + vst1.8 {q7}, [r1]! + b .Lcbc_dec_done +.align 4 +.Lcbc_dec_five: + sub r0, r0, #0x50 + bl _bsaes_decrypt8 + vldmia r9, {q14} @ reload IV + vld1.8 {q8-q9}, [r0]! @ reload input + veor q0, q0, q14 @ ^= IV + vld1.8 {q10-q11}, [r0]! + veor q1, q1, q8 + veor q6, q6, q9 + vld1.8 {q15}, [r0]! + veor q4, q4, q10 + vst1.8 {q0-q1}, [r1]! @ write output + veor q2, q2, q11 + vst1.8 {q6}, [r1]! + vst1.8 {q4}, [r1]! + vst1.8 {q2}, [r1]! + b .Lcbc_dec_done +.align 4 +.Lcbc_dec_four: + sub r0, r0, #0x40 + bl _bsaes_decrypt8 + vldmia r9, {q14} @ reload IV + vld1.8 {q8-q9}, [r0]! @ reload input + veor q0, q0, q14 @ ^= IV + vld1.8 {q10}, [r0]! + veor q1, q1, q8 + veor q6, q6, q9 + vld1.8 {q15}, [r0]! + veor q4, q4, q10 + vst1.8 {q0-q1}, [r1]! @ write output + vst1.8 {q6}, [r1]! + vst1.8 {q4}, [r1]! + b .Lcbc_dec_done +.align 4 +.Lcbc_dec_three: + sub r0, r0, #0x30 + bl _bsaes_decrypt8 + vldmia r9, {q14} @ reload IV + vld1.8 {q8-q9}, [r0]! @ reload input + veor q0, q0, q14 @ ^= IV + vld1.8 {q15}, [r0]! + veor q1, q1, q8 + veor q6, q6, q9 + vst1.8 {q0-q1}, [r1]! @ write output + vst1.8 {q6}, [r1]! + b .Lcbc_dec_done +.align 4 +.Lcbc_dec_two: + sub r0, r0, #0x20 + bl _bsaes_decrypt8 + vldmia r9, {q14} @ reload IV + vld1.8 {q8}, [r0]! @ reload input + veor q0, q0, q14 @ ^= IV + vld1.8 {q15}, [r0]! @ reload input + veor q1, q1, q8 + vst1.8 {q0-q1}, [r1]! @ write output + b .Lcbc_dec_done +.align 4 +.Lcbc_dec_one: + sub r0, r0, #0x10 + mov r10, r1 @ save original out pointer + mov r1, r9 @ use the iv scratch space as out buffer + mov r2, r3 + vmov q4,q15 @ just in case ensure that IV + vmov q5,q0 @ and input are preserved + bl AES_decrypt + vld1.8 {q0}, [r9,:64] @ load result + veor q0, q0, q4 @ ^= IV + vmov q15, q5 @ q5 holds input + vst1.8 {q0}, [r10] @ write output + +.Lcbc_dec_done: +#ifndef BSAES_ASM_EXTENDED_KEY + vmov.i32 q0, #0 + vmov.i32 q1, #0 +.Lcbc_dec_bzero: @ wipe key schedule [if any] + vstmia sp!, {q0-q1} + cmp sp, r9 + bne .Lcbc_dec_bzero +#endif + + mov sp, r9 + add sp, #0x10 @ add sp,r9,#0x10 is no good for thumb + vst1.8 {q15}, [r8] @ return IV + VFP_ABI_POP + ldmia sp!, {r4-r10, pc} +.size bsaes_cbc_encrypt,.-bsaes_cbc_encrypt +.extern AES_encrypt +.global bsaes_ctr32_encrypt_blocks +.type bsaes_ctr32_encrypt_blocks,%function +.align 5 +bsaes_ctr32_encrypt_blocks: + cmp r2, #8 @ use plain AES for + blo .Lctr_enc_short @ small sizes + + mov ip, sp + stmdb sp!, {r4-r10, lr} + VFP_ABI_PUSH + ldr r8, [ip] @ ctr is 1st arg on the stack + sub sp, sp, #0x10 @ scratch space to carry over the ctr + mov r9, sp @ save sp + + ldr r10, [r3, #240] @ get # of rounds +#ifndef BSAES_ASM_EXTENDED_KEY + @ allocate the key schedule on the stack + sub r12, sp, r10, lsl#7 @ 128 bytes per inner round key + add r12, #96 @ size of bit-sliced key schedule + + @ populate the key schedule + mov r4, r3 @ pass key + mov r5, r10 @ pass # of rounds + mov sp, r12 @ sp is sp + bl _bsaes_key_convert + veor q7,q7,q15 @ fix up last round key + vstmia r12, {q7} @ save last round key + + vld1.8 {q0}, [r8] @ load counter + add r8, r6, #.LREVM0SR-.LM0 @ borrow r8 + vldmia sp, {q4} @ load round0 key +#else + ldr r12, [r3, #244] + eors r12, #1 + beq 0f + + @ populate the key schedule + str r12, [r3, #244] + mov r4, r3 @ pass key + mov r5, r10 @ pass # of rounds + add r12, r3, #248 @ pass key schedule + bl _bsaes_key_convert + veor q7,q7,q15 @ fix up last round key + vstmia r12, {q7} @ save last round key + +.align 2 +0: add r12, r3, #248 + vld1.8 {q0}, [r8] @ load counter + adrl r8, .LREVM0SR @ borrow r8 + vldmia r12, {q4} @ load round0 key + sub sp, #0x10 @ place for adjusted round0 key +#endif + + vmov.i32 q8,#1 @ compose 1<<96 + veor q9,q9,q9 + vrev32.8 q0,q0 + vext.8 q8,q9,q8,#4 + vrev32.8 q4,q4 + vadd.u32 q9,q8,q8 @ compose 2<<96 + vstmia sp, {q4} @ save adjusted round0 key + b .Lctr_enc_loop + +.align 4 +.Lctr_enc_loop: + vadd.u32 q10, q8, q9 @ compose 3<<96 + vadd.u32 q1, q0, q8 @ +1 + vadd.u32 q2, q0, q9 @ +2 + vadd.u32 q3, q0, q10 @ +3 + vadd.u32 q4, q1, q10 + vadd.u32 q5, q2, q10 + vadd.u32 q6, q3, q10 + vadd.u32 q7, q4, q10 + vadd.u32 q10, q5, q10 @ next counter + + @ Borrow prologue from _bsaes_encrypt8 to use the opportunity + @ to flip byte order in 32-bit counter + + vldmia sp, {q9} @ load round0 key +#ifndef BSAES_ASM_EXTENDED_KEY + add r4, sp, #0x10 @ pass next round key +#else + add r4, r3, #264 +#endif + vldmia r8, {q8} @ .LREVM0SR + mov r5, r10 @ pass rounds + vstmia r9, {q10} @ save next counter + sub r6, r8, #.LREVM0SR-.LSR @ pass constants + + bl _bsaes_encrypt8_alt + + subs r2, r2, #8 + blo .Lctr_enc_loop_done + + vld1.8 {q8-q9}, [r0]! @ load input + vld1.8 {q10-q11}, [r0]! + veor q0, q8 + veor q1, q9 + vld1.8 {q12-q13}, [r0]! + veor q4, q10 + veor q6, q11 + vld1.8 {q14-q15}, [r0]! + veor q3, q12 + vst1.8 {q0-q1}, [r1]! @ write output + veor q7, q13 + veor q2, q14 + vst1.8 {q4}, [r1]! + veor q5, q15 + vst1.8 {q6}, [r1]! + vmov.i32 q8, #1 @ compose 1<<96 + vst1.8 {q3}, [r1]! + veor q9, q9, q9 + vst1.8 {q7}, [r1]! + vext.8 q8, q9, q8, #4 + vst1.8 {q2}, [r1]! + vadd.u32 q9,q8,q8 @ compose 2<<96 + vst1.8 {q5}, [r1]! + vldmia r9, {q0} @ load counter + + bne .Lctr_enc_loop + b .Lctr_enc_done + +.align 4 +.Lctr_enc_loop_done: + add r2, r2, #8 + vld1.8 {q8}, [r0]! @ load input + veor q0, q8 + vst1.8 {q0}, [r1]! @ write output + cmp r2, #2 + blo .Lctr_enc_done + vld1.8 {q9}, [r0]! + veor q1, q9 + vst1.8 {q1}, [r1]! + beq .Lctr_enc_done + vld1.8 {q10}, [r0]! + veor q4, q10 + vst1.8 {q4}, [r1]! + cmp r2, #4 + blo .Lctr_enc_done + vld1.8 {q11}, [r0]! + veor q6, q11 + vst1.8 {q6}, [r1]! + beq .Lctr_enc_done + vld1.8 {q12}, [r0]! + veor q3, q12 + vst1.8 {q3}, [r1]! + cmp r2, #6 + blo .Lctr_enc_done + vld1.8 {q13}, [r0]! + veor q7, q13 + vst1.8 {q7}, [r1]! + beq .Lctr_enc_done + vld1.8 {q14}, [r0] + veor q2, q14 + vst1.8 {q2}, [r1]! + +.Lctr_enc_done: + vmov.i32 q0, #0 + vmov.i32 q1, #0 +#ifndef BSAES_ASM_EXTENDED_KEY +.Lctr_enc_bzero: @ wipe key schedule [if any] + vstmia sp!, {q0-q1} + cmp sp, r9 + bne .Lctr_enc_bzero +#else + vstmia sp, {q0-q1} +#endif + + mov sp, r9 + add sp, #0x10 @ add sp,r9,#0x10 is no good for thumb + VFP_ABI_POP + ldmia sp!, {r4-r10, pc} @ return + +.align 4 +.Lctr_enc_short: + ldr ip, [sp] @ ctr pointer is passed on stack + stmdb sp!, {r4-r8, lr} + + mov r4, r0 @ copy arguments + mov r5, r1 + mov r6, r2 + mov r7, r3 + ldr r8, [ip, #12] @ load counter LSW + vld1.8 {q1}, [ip] @ load whole counter value +#ifdef __ARMEL__ + rev r8, r8 +#endif + sub sp, sp, #0x10 + vst1.8 {q1}, [sp,:64] @ copy counter value + sub sp, sp, #0x10 + +.Lctr_enc_short_loop: + add r0, sp, #0x10 @ input counter value + mov r1, sp @ output on the stack + mov r2, r7 @ key + + bl AES_encrypt + + vld1.8 {q0}, [r4]! @ load input + vld1.8 {q1}, [sp,:64] @ load encrypted counter + add r8, r8, #1 +#ifdef __ARMEL__ + rev r0, r8 + str r0, [sp, #0x1c] @ next counter value +#else + str r8, [sp, #0x1c] @ next counter value +#endif + veor q0,q0,q1 + vst1.8 {q0}, [r5]! @ store output + subs r6, r6, #1 + bne .Lctr_enc_short_loop + + vmov.i32 q0, #0 + vmov.i32 q1, #0 + vstmia sp!, {q0-q1} + + ldmia sp!, {r4-r8, pc} +.size bsaes_ctr32_encrypt_blocks,.-bsaes_ctr32_encrypt_blocks +.globl bsaes_xts_encrypt +.type bsaes_xts_encrypt,%function +.align 4 +bsaes_xts_encrypt: + mov ip, sp + stmdb sp!, {r4-r10, lr} @ 0x20 + VFP_ABI_PUSH + mov r6, sp @ future r3 + + mov r7, r0 + mov r8, r1 + mov r9, r2 + mov r10, r3 + + sub r0, sp, #0x10 @ 0x10 + bic r0, #0xf @ align at 16 bytes + mov sp, r0 + +#ifdef XTS_CHAIN_TWEAK + ldr r0, [ip] @ pointer to input tweak +#else + @ generate initial tweak + ldr r0, [ip, #4] @ iv[] + mov r1, sp + ldr r2, [ip, #0] @ key2 + bl AES_encrypt + mov r0,sp @ pointer to initial tweak +#endif + + ldr r1, [r10, #240] @ get # of rounds + mov r3, r6 +#ifndef BSAES_ASM_EXTENDED_KEY + @ allocate the key schedule on the stack + sub r12, sp, r1, lsl#7 @ 128 bytes per inner round key + @ add r12, #96 @ size of bit-sliced key schedule + sub r12, #48 @ place for tweak[9] + + @ populate the key schedule + mov r4, r10 @ pass key + mov r5, r1 @ pass # of rounds + mov sp, r12 + add r12, #0x90 @ pass key schedule + bl _bsaes_key_convert + veor q7, q7, q15 @ fix up last round key + vstmia r12, {q7} @ save last round key +#else + ldr r12, [r10, #244] + eors r12, #1 + beq 0f + + str r12, [r10, #244] + mov r4, r10 @ pass key + mov r5, r1 @ pass # of rounds + add r12, r10, #248 @ pass key schedule + bl _bsaes_key_convert + veor q7, q7, q15 @ fix up last round key + vstmia r12, {q7} + +.align 2 +0: sub sp, #0x90 @ place for tweak[9] +#endif + + vld1.8 {q8}, [r0] @ initial tweak + adr r2, .Lxts_magic + + subs r9, #0x80 + blo .Lxts_enc_short + b .Lxts_enc_loop + +.align 4 +.Lxts_enc_loop: + vldmia r2, {q5} @ load XTS magic + vshr.s64 q6, q8, #63 + mov r0, sp + vand q6, q6, q5 + vadd.u64 q9, q8, q8 + vst1.64 {q8}, [r0,:128]! + vswp d13,d12 + vshr.s64 q7, q9, #63 + veor q9, q9, q6 + vand q7, q7, q5 + vadd.u64 q10, q9, q9 + vst1.64 {q9}, [r0,:128]! + vswp d15,d14 + vshr.s64 q6, q10, #63 + veor q10, q10, q7 + vand q6, q6, q5 + vld1.8 {q0}, [r7]! + vadd.u64 q11, q10, q10 + vst1.64 {q10}, [r0,:128]! + vswp d13,d12 + vshr.s64 q7, q11, #63 + veor q11, q11, q6 + vand q7, q7, q5 + vld1.8 {q1}, [r7]! + veor q0, q0, q8 + vadd.u64 q12, q11, q11 + vst1.64 {q11}, [r0,:128]! + vswp d15,d14 + vshr.s64 q6, q12, #63 + veor q12, q12, q7 + vand q6, q6, q5 + vld1.8 {q2}, [r7]! + veor q1, q1, q9 + vadd.u64 q13, q12, q12 + vst1.64 {q12}, [r0,:128]! + vswp d13,d12 + vshr.s64 q7, q13, #63 + veor q13, q13, q6 + vand q7, q7, q5 + vld1.8 {q3}, [r7]! + veor q2, q2, q10 + vadd.u64 q14, q13, q13 + vst1.64 {q13}, [r0,:128]! + vswp d15,d14 + vshr.s64 q6, q14, #63 + veor q14, q14, q7 + vand q6, q6, q5 + vld1.8 {q4}, [r7]! + veor q3, q3, q11 + vadd.u64 q15, q14, q14 + vst1.64 {q14}, [r0,:128]! + vswp d13,d12 + vshr.s64 q7, q15, #63 + veor q15, q15, q6 + vand q7, q7, q5 + vld1.8 {q5}, [r7]! + veor q4, q4, q12 + vadd.u64 q8, q15, q15 + vst1.64 {q15}, [r0,:128]! + vswp d15,d14 + veor q8, q8, q7 + vst1.64 {q8}, [r0,:128] @ next round tweak + + vld1.8 {q6-q7}, [r7]! + veor q5, q5, q13 +#ifndef BSAES_ASM_EXTENDED_KEY + add r4, sp, #0x90 @ pass key schedule +#else + add r4, r10, #248 @ pass key schedule +#endif + veor q6, q6, q14 + mov r5, r1 @ pass rounds + veor q7, q7, q15 + mov r0, sp + + bl _bsaes_encrypt8 + + vld1.64 {q8-q9}, [r0,:128]! + vld1.64 {q10-q11}, [r0,:128]! + veor q0, q0, q8 + vld1.64 {q12-q13}, [r0,:128]! + veor q1, q1, q9 + veor q8, q4, q10 + vst1.8 {q0-q1}, [r8]! + veor q9, q6, q11 + vld1.64 {q14-q15}, [r0,:128]! + veor q10, q3, q12 + vst1.8 {q8-q9}, [r8]! + veor q11, q7, q13 + veor q12, q2, q14 + vst1.8 {q10-q11}, [r8]! + veor q13, q5, q15 + vst1.8 {q12-q13}, [r8]! + + vld1.64 {q8}, [r0,:128] @ next round tweak + + subs r9, #0x80 + bpl .Lxts_enc_loop + +.Lxts_enc_short: + adds r9, #0x70 + bmi .Lxts_enc_done + + vldmia r2, {q5} @ load XTS magic + vshr.s64 q7, q8, #63 + mov r0, sp + vand q7, q7, q5 + vadd.u64 q9, q8, q8 + vst1.64 {q8}, [r0,:128]! + vswp d15,d14 + vshr.s64 q6, q9, #63 + veor q9, q9, q7 + vand q6, q6, q5 + vadd.u64 q10, q9, q9 + vst1.64 {q9}, [r0,:128]! + vswp d13,d12 + vshr.s64 q7, q10, #63 + veor q10, q10, q6 + vand q7, q7, q5 + vld1.8 {q0}, [r7]! + subs r9, #0x10 + bmi .Lxts_enc_1 + vadd.u64 q11, q10, q10 + vst1.64 {q10}, [r0,:128]! + vswp d15,d14 + vshr.s64 q6, q11, #63 + veor q11, q11, q7 + vand q6, q6, q5 + vld1.8 {q1}, [r7]! + subs r9, #0x10 + bmi .Lxts_enc_2 + veor q0, q0, q8 + vadd.u64 q12, q11, q11 + vst1.64 {q11}, [r0,:128]! + vswp d13,d12 + vshr.s64 q7, q12, #63 + veor q12, q12, q6 + vand q7, q7, q5 + vld1.8 {q2}, [r7]! + subs r9, #0x10 + bmi .Lxts_enc_3 + veor q1, q1, q9 + vadd.u64 q13, q12, q12 + vst1.64 {q12}, [r0,:128]! + vswp d15,d14 + vshr.s64 q6, q13, #63 + veor q13, q13, q7 + vand q6, q6, q5 + vld1.8 {q3}, [r7]! + subs r9, #0x10 + bmi .Lxts_enc_4 + veor q2, q2, q10 + vadd.u64 q14, q13, q13 + vst1.64 {q13}, [r0,:128]! + vswp d13,d12 + vshr.s64 q7, q14, #63 + veor q14, q14, q6 + vand q7, q7, q5 + vld1.8 {q4}, [r7]! + subs r9, #0x10 + bmi .Lxts_enc_5 + veor q3, q3, q11 + vadd.u64 q15, q14, q14 + vst1.64 {q14}, [r0,:128]! + vswp d15,d14 + vshr.s64 q6, q15, #63 + veor q15, q15, q7 + vand q6, q6, q5 + vld1.8 {q5}, [r7]! + subs r9, #0x10 + bmi .Lxts_enc_6 + veor q4, q4, q12 + sub r9, #0x10 + vst1.64 {q15}, [r0,:128] @ next round tweak + + vld1.8 {q6}, [r7]! + veor q5, q5, q13 +#ifndef BSAES_ASM_EXTENDED_KEY + add r4, sp, #0x90 @ pass key schedule +#else + add r4, r10, #248 @ pass key schedule +#endif + veor q6, q6, q14 + mov r5, r1 @ pass rounds + mov r0, sp + + bl _bsaes_encrypt8 + + vld1.64 {q8-q9}, [r0,:128]! + vld1.64 {q10-q11}, [r0,:128]! + veor q0, q0, q8 + vld1.64 {q12-q13}, [r0,:128]! + veor q1, q1, q9 + veor q8, q4, q10 + vst1.8 {q0-q1}, [r8]! + veor q9, q6, q11 + vld1.64 {q14}, [r0,:128]! + veor q10, q3, q12 + vst1.8 {q8-q9}, [r8]! + veor q11, q7, q13 + veor q12, q2, q14 + vst1.8 {q10-q11}, [r8]! + vst1.8 {q12}, [r8]! + + vld1.64 {q8}, [r0,:128] @ next round tweak + b .Lxts_enc_done +.align 4 +.Lxts_enc_6: + vst1.64 {q14}, [r0,:128] @ next round tweak + + veor q4, q4, q12 +#ifndef BSAES_ASM_EXTENDED_KEY + add r4, sp, #0x90 @ pass key schedule +#else + add r4, r10, #248 @ pass key schedule +#endif + veor q5, q5, q13 + mov r5, r1 @ pass rounds + mov r0, sp + + bl _bsaes_encrypt8 + + vld1.64 {q8-q9}, [r0,:128]! + vld1.64 {q10-q11}, [r0,:128]! + veor q0, q0, q8 + vld1.64 {q12-q13}, [r0,:128]! + veor q1, q1, q9 + veor q8, q4, q10 + vst1.8 {q0-q1}, [r8]! + veor q9, q6, q11 + veor q10, q3, q12 + vst1.8 {q8-q9}, [r8]! + veor q11, q7, q13 + vst1.8 {q10-q11}, [r8]! + + vld1.64 {q8}, [r0,:128] @ next round tweak + b .Lxts_enc_done + +@ put this in range for both ARM and Thumb mode adr instructions +.align 5 +.Lxts_magic: + .quad 1, 0x87 + +.align 5 +.Lxts_enc_5: + vst1.64 {q13}, [r0,:128] @ next round tweak + + veor q3, q3, q11 +#ifndef BSAES_ASM_EXTENDED_KEY + add r4, sp, #0x90 @ pass key schedule +#else + add r4, r10, #248 @ pass key schedule +#endif + veor q4, q4, q12 + mov r5, r1 @ pass rounds + mov r0, sp + + bl _bsaes_encrypt8 + + vld1.64 {q8-q9}, [r0,:128]! + vld1.64 {q10-q11}, [r0,:128]! + veor q0, q0, q8 + vld1.64 {q12}, [r0,:128]! + veor q1, q1, q9 + veor q8, q4, q10 + vst1.8 {q0-q1}, [r8]! + veor q9, q6, q11 + veor q10, q3, q12 + vst1.8 {q8-q9}, [r8]! + vst1.8 {q10}, [r8]! + + vld1.64 {q8}, [r0,:128] @ next round tweak + b .Lxts_enc_done +.align 4 +.Lxts_enc_4: + vst1.64 {q12}, [r0,:128] @ next round tweak + + veor q2, q2, q10 +#ifndef BSAES_ASM_EXTENDED_KEY + add r4, sp, #0x90 @ pass key schedule +#else + add r4, r10, #248 @ pass key schedule +#endif + veor q3, q3, q11 + mov r5, r1 @ pass rounds + mov r0, sp + + bl _bsaes_encrypt8 + + vld1.64 {q8-q9}, [r0,:128]! + vld1.64 {q10-q11}, [r0,:128]! + veor q0, q0, q8 + veor q1, q1, q9 + veor q8, q4, q10 + vst1.8 {q0-q1}, [r8]! + veor q9, q6, q11 + vst1.8 {q8-q9}, [r8]! + + vld1.64 {q8}, [r0,:128] @ next round tweak + b .Lxts_enc_done +.align 4 +.Lxts_enc_3: + vst1.64 {q11}, [r0,:128] @ next round tweak + + veor q1, q1, q9 +#ifndef BSAES_ASM_EXTENDED_KEY + add r4, sp, #0x90 @ pass key schedule +#else + add r4, r10, #248 @ pass key schedule +#endif + veor q2, q2, q10 + mov r5, r1 @ pass rounds + mov r0, sp + + bl _bsaes_encrypt8 + + vld1.64 {q8-q9}, [r0,:128]! + vld1.64 {q10}, [r0,:128]! + veor q0, q0, q8 + veor q1, q1, q9 + veor q8, q4, q10 + vst1.8 {q0-q1}, [r8]! + vst1.8 {q8}, [r8]! + + vld1.64 {q8}, [r0,:128] @ next round tweak + b .Lxts_enc_done +.align 4 +.Lxts_enc_2: + vst1.64 {q10}, [r0,:128] @ next round tweak + + veor q0, q0, q8 +#ifndef BSAES_ASM_EXTENDED_KEY + add r4, sp, #0x90 @ pass key schedule +#else + add r4, r10, #248 @ pass key schedule +#endif + veor q1, q1, q9 + mov r5, r1 @ pass rounds + mov r0, sp + + bl _bsaes_encrypt8 + + vld1.64 {q8-q9}, [r0,:128]! + veor q0, q0, q8 + veor q1, q1, q9 + vst1.8 {q0-q1}, [r8]! + + vld1.64 {q8}, [r0,:128] @ next round tweak + b .Lxts_enc_done +.align 4 +.Lxts_enc_1: + mov r0, sp + veor q0, q8 + mov r1, sp + vst1.8 {q0}, [sp,:128] + mov r2, r10 + mov r4, r3 @ preserve fp + + bl AES_encrypt + + vld1.8 {q0}, [sp,:128] + veor q0, q0, q8 + vst1.8 {q0}, [r8]! + mov r3, r4 + + vmov q8, q9 @ next round tweak + +.Lxts_enc_done: +#ifndef XTS_CHAIN_TWEAK + adds r9, #0x10 + beq .Lxts_enc_ret + sub r6, r8, #0x10 + +.Lxts_enc_steal: + ldrb r0, [r7], #1 + ldrb r1, [r8, #-0x10] + strb r0, [r8, #-0x10] + strb r1, [r8], #1 + + subs r9, #1 + bhi .Lxts_enc_steal + + vld1.8 {q0}, [r6] + mov r0, sp + veor q0, q0, q8 + mov r1, sp + vst1.8 {q0}, [sp,:128] + mov r2, r10 + mov r4, r3 @ preserve fp + + bl AES_encrypt + + vld1.8 {q0}, [sp,:128] + veor q0, q0, q8 + vst1.8 {q0}, [r6] + mov r3, r4 +#endif + +.Lxts_enc_ret: + bic r0, r3, #0xf + vmov.i32 q0, #0 + vmov.i32 q1, #0 +#ifdef XTS_CHAIN_TWEAK + ldr r1, [r3, #0x20+VFP_ABI_FRAME] @ chain tweak +#endif +.Lxts_enc_bzero: @ wipe key schedule [if any] + vstmia sp!, {q0-q1} + cmp sp, r0 + bne .Lxts_enc_bzero + + mov sp, r3 +#ifdef XTS_CHAIN_TWEAK + vst1.8 {q8}, [r1] +#endif + VFP_ABI_POP + ldmia sp!, {r4-r10, pc} @ return + +.size bsaes_xts_encrypt,.-bsaes_xts_encrypt + +.globl bsaes_xts_decrypt +.type bsaes_xts_decrypt,%function +.align 4 +bsaes_xts_decrypt: + mov ip, sp + stmdb sp!, {r4-r10, lr} @ 0x20 + VFP_ABI_PUSH + mov r6, sp @ future r3 + + mov r7, r0 + mov r8, r1 + mov r9, r2 + mov r10, r3 + + sub r0, sp, #0x10 @ 0x10 + bic r0, #0xf @ align at 16 bytes + mov sp, r0 + +#ifdef XTS_CHAIN_TWEAK + ldr r0, [ip] @ pointer to input tweak +#else + @ generate initial tweak + ldr r0, [ip, #4] @ iv[] + mov r1, sp + ldr r2, [ip, #0] @ key2 + bl AES_encrypt + mov r0, sp @ pointer to initial tweak +#endif + + ldr r1, [r10, #240] @ get # of rounds + mov r3, r6 +#ifndef BSAES_ASM_EXTENDED_KEY + @ allocate the key schedule on the stack + sub r12, sp, r1, lsl#7 @ 128 bytes per inner round key + @ add r12, #96 @ size of bit-sliced key schedule + sub r12, #48 @ place for tweak[9] + + @ populate the key schedule + mov r4, r10 @ pass key + mov r5, r1 @ pass # of rounds + mov sp, r12 + add r12, #0x90 @ pass key schedule + bl _bsaes_key_convert + add r4, sp, #0x90 + vldmia r4, {q6} + vstmia r12, {q15} @ save last round key + veor q7, q7, q6 @ fix up round 0 key + vstmia r4, {q7} +#else + ldr r12, [r10, #244] + eors r12, #1 + beq 0f + + str r12, [r10, #244] + mov r4, r10 @ pass key + mov r5, r1 @ pass # of rounds + add r12, r10, #248 @ pass key schedule + bl _bsaes_key_convert + add r4, r10, #248 + vldmia r4, {q6} + vstmia r12, {q15} @ save last round key + veor q7, q7, q6 @ fix up round 0 key + vstmia r4, {q7} + +.align 2 +0: sub sp, #0x90 @ place for tweak[9] +#endif + vld1.8 {q8}, [r0] @ initial tweak + adr r2, .Lxts_magic + + tst r9, #0xf @ if not multiple of 16 + it ne @ Thumb2 thing, sanity check in ARM + subne r9, #0x10 @ subtract another 16 bytes + subs r9, #0x80 + + blo .Lxts_dec_short + b .Lxts_dec_loop + +.align 4 +.Lxts_dec_loop: + vldmia r2, {q5} @ load XTS magic + vshr.s64 q6, q8, #63 + mov r0, sp + vand q6, q6, q5 + vadd.u64 q9, q8, q8 + vst1.64 {q8}, [r0,:128]! + vswp d13,d12 + vshr.s64 q7, q9, #63 + veor q9, q9, q6 + vand q7, q7, q5 + vadd.u64 q10, q9, q9 + vst1.64 {q9}, [r0,:128]! + vswp d15,d14 + vshr.s64 q6, q10, #63 + veor q10, q10, q7 + vand q6, q6, q5 + vld1.8 {q0}, [r7]! + vadd.u64 q11, q10, q10 + vst1.64 {q10}, [r0,:128]! + vswp d13,d12 + vshr.s64 q7, q11, #63 + veor q11, q11, q6 + vand q7, q7, q5 + vld1.8 {q1}, [r7]! + veor q0, q0, q8 + vadd.u64 q12, q11, q11 + vst1.64 {q11}, [r0,:128]! + vswp d15,d14 + vshr.s64 q6, q12, #63 + veor q12, q12, q7 + vand q6, q6, q5 + vld1.8 {q2}, [r7]! + veor q1, q1, q9 + vadd.u64 q13, q12, q12 + vst1.64 {q12}, [r0,:128]! + vswp d13,d12 + vshr.s64 q7, q13, #63 + veor q13, q13, q6 + vand q7, q7, q5 + vld1.8 {q3}, [r7]! + veor q2, q2, q10 + vadd.u64 q14, q13, q13 + vst1.64 {q13}, [r0,:128]! + vswp d15,d14 + vshr.s64 q6, q14, #63 + veor q14, q14, q7 + vand q6, q6, q5 + vld1.8 {q4}, [r7]! + veor q3, q3, q11 + vadd.u64 q15, q14, q14 + vst1.64 {q14}, [r0,:128]! + vswp d13,d12 + vshr.s64 q7, q15, #63 + veor q15, q15, q6 + vand q7, q7, q5 + vld1.8 {q5}, [r7]! + veor q4, q4, q12 + vadd.u64 q8, q15, q15 + vst1.64 {q15}, [r0,:128]! + vswp d15,d14 + veor q8, q8, q7 + vst1.64 {q8}, [r0,:128] @ next round tweak + + vld1.8 {q6-q7}, [r7]! + veor q5, q5, q13 +#ifndef BSAES_ASM_EXTENDED_KEY + add r4, sp, #0x90 @ pass key schedule +#else + add r4, r10, #248 @ pass key schedule +#endif + veor q6, q6, q14 + mov r5, r1 @ pass rounds + veor q7, q7, q15 + mov r0, sp + + bl _bsaes_decrypt8 + + vld1.64 {q8-q9}, [r0,:128]! + vld1.64 {q10-q11}, [r0,:128]! + veor q0, q0, q8 + vld1.64 {q12-q13}, [r0,:128]! + veor q1, q1, q9 + veor q8, q6, q10 + vst1.8 {q0-q1}, [r8]! + veor q9, q4, q11 + vld1.64 {q14-q15}, [r0,:128]! + veor q10, q2, q12 + vst1.8 {q8-q9}, [r8]! + veor q11, q7, q13 + veor q12, q3, q14 + vst1.8 {q10-q11}, [r8]! + veor q13, q5, q15 + vst1.8 {q12-q13}, [r8]! + + vld1.64 {q8}, [r0,:128] @ next round tweak + + subs r9, #0x80 + bpl .Lxts_dec_loop + +.Lxts_dec_short: + adds r9, #0x70 + bmi .Lxts_dec_done + + vldmia r2, {q5} @ load XTS magic + vshr.s64 q7, q8, #63 + mov r0, sp + vand q7, q7, q5 + vadd.u64 q9, q8, q8 + vst1.64 {q8}, [r0,:128]! + vswp d15,d14 + vshr.s64 q6, q9, #63 + veor q9, q9, q7 + vand q6, q6, q5 + vadd.u64 q10, q9, q9 + vst1.64 {q9}, [r0,:128]! + vswp d13,d12 + vshr.s64 q7, q10, #63 + veor q10, q10, q6 + vand q7, q7, q5 + vld1.8 {q0}, [r7]! + subs r9, #0x10 + bmi .Lxts_dec_1 + vadd.u64 q11, q10, q10 + vst1.64 {q10}, [r0,:128]! + vswp d15,d14 + vshr.s64 q6, q11, #63 + veor q11, q11, q7 + vand q6, q6, q5 + vld1.8 {q1}, [r7]! + subs r9, #0x10 + bmi .Lxts_dec_2 + veor q0, q0, q8 + vadd.u64 q12, q11, q11 + vst1.64 {q11}, [r0,:128]! + vswp d13,d12 + vshr.s64 q7, q12, #63 + veor q12, q12, q6 + vand q7, q7, q5 + vld1.8 {q2}, [r7]! + subs r9, #0x10 + bmi .Lxts_dec_3 + veor q1, q1, q9 + vadd.u64 q13, q12, q12 + vst1.64 {q12}, [r0,:128]! + vswp d15,d14 + vshr.s64 q6, q13, #63 + veor q13, q13, q7 + vand q6, q6, q5 + vld1.8 {q3}, [r7]! + subs r9, #0x10 + bmi .Lxts_dec_4 + veor q2, q2, q10 + vadd.u64 q14, q13, q13 + vst1.64 {q13}, [r0,:128]! + vswp d13,d12 + vshr.s64 q7, q14, #63 + veor q14, q14, q6 + vand q7, q7, q5 + vld1.8 {q4}, [r7]! + subs r9, #0x10 + bmi .Lxts_dec_5 + veor q3, q3, q11 + vadd.u64 q15, q14, q14 + vst1.64 {q14}, [r0,:128]! + vswp d15,d14 + vshr.s64 q6, q15, #63 + veor q15, q15, q7 + vand q6, q6, q5 + vld1.8 {q5}, [r7]! + subs r9, #0x10 + bmi .Lxts_dec_6 + veor q4, q4, q12 + sub r9, #0x10 + vst1.64 {q15}, [r0,:128] @ next round tweak + + vld1.8 {q6}, [r7]! + veor q5, q5, q13 +#ifndef BSAES_ASM_EXTENDED_KEY + add r4, sp, #0x90 @ pass key schedule +#else + add r4, r10, #248 @ pass key schedule +#endif + veor q6, q6, q14 + mov r5, r1 @ pass rounds + mov r0, sp + + bl _bsaes_decrypt8 + + vld1.64 {q8-q9}, [r0,:128]! + vld1.64 {q10-q11}, [r0,:128]! + veor q0, q0, q8 + vld1.64 {q12-q13}, [r0,:128]! + veor q1, q1, q9 + veor q8, q6, q10 + vst1.8 {q0-q1}, [r8]! + veor q9, q4, q11 + vld1.64 {q14}, [r0,:128]! + veor q10, q2, q12 + vst1.8 {q8-q9}, [r8]! + veor q11, q7, q13 + veor q12, q3, q14 + vst1.8 {q10-q11}, [r8]! + vst1.8 {q12}, [r8]! + + vld1.64 {q8}, [r0,:128] @ next round tweak + b .Lxts_dec_done +.align 4 +.Lxts_dec_6: + vst1.64 {q14}, [r0,:128] @ next round tweak + + veor q4, q4, q12 +#ifndef BSAES_ASM_EXTENDED_KEY + add r4, sp, #0x90 @ pass key schedule +#else + add r4, r10, #248 @ pass key schedule +#endif + veor q5, q5, q13 + mov r5, r1 @ pass rounds + mov r0, sp + + bl _bsaes_decrypt8 + + vld1.64 {q8-q9}, [r0,:128]! + vld1.64 {q10-q11}, [r0,:128]! + veor q0, q0, q8 + vld1.64 {q12-q13}, [r0,:128]! + veor q1, q1, q9 + veor q8, q6, q10 + vst1.8 {q0-q1}, [r8]! + veor q9, q4, q11 + veor q10, q2, q12 + vst1.8 {q8-q9}, [r8]! + veor q11, q7, q13 + vst1.8 {q10-q11}, [r8]! + + vld1.64 {q8}, [r0,:128] @ next round tweak + b .Lxts_dec_done +.align 4 +.Lxts_dec_5: + vst1.64 {q13}, [r0,:128] @ next round tweak + + veor q3, q3, q11 +#ifndef BSAES_ASM_EXTENDED_KEY + add r4, sp, #0x90 @ pass key schedule +#else + add r4, r10, #248 @ pass key schedule +#endif + veor q4, q4, q12 + mov r5, r1 @ pass rounds + mov r0, sp + + bl _bsaes_decrypt8 + + vld1.64 {q8-q9}, [r0,:128]! + vld1.64 {q10-q11}, [r0,:128]! + veor q0, q0, q8 + vld1.64 {q12}, [r0,:128]! + veor q1, q1, q9 + veor q8, q6, q10 + vst1.8 {q0-q1}, [r8]! + veor q9, q4, q11 + veor q10, q2, q12 + vst1.8 {q8-q9}, [r8]! + vst1.8 {q10}, [r8]! + + vld1.64 {q8}, [r0,:128] @ next round tweak + b .Lxts_dec_done +.align 4 +.Lxts_dec_4: + vst1.64 {q12}, [r0,:128] @ next round tweak + + veor q2, q2, q10 +#ifndef BSAES_ASM_EXTENDED_KEY + add r4, sp, #0x90 @ pass key schedule +#else + add r4, r10, #248 @ pass key schedule +#endif + veor q3, q3, q11 + mov r5, r1 @ pass rounds + mov r0, sp + + bl _bsaes_decrypt8 + + vld1.64 {q8-q9}, [r0,:128]! + vld1.64 {q10-q11}, [r0,:128]! + veor q0, q0, q8 + veor q1, q1, q9 + veor q8, q6, q10 + vst1.8 {q0-q1}, [r8]! + veor q9, q4, q11 + vst1.8 {q8-q9}, [r8]! + + vld1.64 {q8}, [r0,:128] @ next round tweak + b .Lxts_dec_done +.align 4 +.Lxts_dec_3: + vst1.64 {q11}, [r0,:128] @ next round tweak + + veor q1, q1, q9 +#ifndef BSAES_ASM_EXTENDED_KEY + add r4, sp, #0x90 @ pass key schedule +#else + add r4, r10, #248 @ pass key schedule +#endif + veor q2, q2, q10 + mov r5, r1 @ pass rounds + mov r0, sp + + bl _bsaes_decrypt8 + + vld1.64 {q8-q9}, [r0,:128]! + vld1.64 {q10}, [r0,:128]! + veor q0, q0, q8 + veor q1, q1, q9 + veor q8, q6, q10 + vst1.8 {q0-q1}, [r8]! + vst1.8 {q8}, [r8]! + + vld1.64 {q8}, [r0,:128] @ next round tweak + b .Lxts_dec_done +.align 4 +.Lxts_dec_2: + vst1.64 {q10}, [r0,:128] @ next round tweak + + veor q0, q0, q8 +#ifndef BSAES_ASM_EXTENDED_KEY + add r4, sp, #0x90 @ pass key schedule +#else + add r4, r10, #248 @ pass key schedule +#endif + veor q1, q1, q9 + mov r5, r1 @ pass rounds + mov r0, sp + + bl _bsaes_decrypt8 + + vld1.64 {q8-q9}, [r0,:128]! + veor q0, q0, q8 + veor q1, q1, q9 + vst1.8 {q0-q1}, [r8]! + + vld1.64 {q8}, [r0,:128] @ next round tweak + b .Lxts_dec_done +.align 4 +.Lxts_dec_1: + mov r0, sp + veor q0, q8 + mov r1, sp + vst1.8 {q0}, [sp,:128] + mov r2, r10 + mov r4, r3 @ preserve fp + mov r5, r2 @ preserve magic + + bl AES_decrypt + + vld1.8 {q0}, [sp,:128] + veor q0, q0, q8 + vst1.8 {q0}, [r8]! + mov r3, r4 + mov r2, r5 + + vmov q8, q9 @ next round tweak + +.Lxts_dec_done: +#ifndef XTS_CHAIN_TWEAK + adds r9, #0x10 + beq .Lxts_dec_ret + + @ calculate one round of extra tweak for the stolen ciphertext + vldmia r2, {q5} + vshr.s64 q6, q8, #63 + vand q6, q6, q5 + vadd.u64 q9, q8, q8 + vswp d13,d12 + veor q9, q9, q6 + + @ perform the final decryption with the last tweak value + vld1.8 {q0}, [r7]! + mov r0, sp + veor q0, q0, q9 + mov r1, sp + vst1.8 {q0}, [sp,:128] + mov r2, r10 + mov r4, r3 @ preserve fp + + bl AES_decrypt + + vld1.8 {q0}, [sp,:128] + veor q0, q0, q9 + vst1.8 {q0}, [r8] + + mov r6, r8 +.Lxts_dec_steal: + ldrb r1, [r8] + ldrb r0, [r7], #1 + strb r1, [r8, #0x10] + strb r0, [r8], #1 + + subs r9, #1 + bhi .Lxts_dec_steal + + vld1.8 {q0}, [r6] + mov r0, sp + veor q0, q8 + mov r1, sp + vst1.8 {q0}, [sp,:128] + mov r2, r10 + + bl AES_decrypt + + vld1.8 {q0}, [sp,:128] + veor q0, q0, q8 + vst1.8 {q0}, [r6] + mov r3, r4 +#endif + +.Lxts_dec_ret: + bic r0, r3, #0xf + vmov.i32 q0, #0 + vmov.i32 q1, #0 +#ifdef XTS_CHAIN_TWEAK + ldr r1, [r3, #0x20+VFP_ABI_FRAME] @ chain tweak +#endif +.Lxts_dec_bzero: @ wipe key schedule [if any] + vstmia sp!, {q0-q1} + cmp sp, r0 + bne .Lxts_dec_bzero + + mov sp, r3 +#ifdef XTS_CHAIN_TWEAK + vst1.8 {q8}, [r1] +#endif + VFP_ABI_POP + ldmia sp!, {r4-r10, pc} @ return + +.size bsaes_xts_decrypt,.-bsaes_xts_decrypt +#endif diff --git a/deps/openssl/asm/arm-elf-gas/bn/armv4-gf2m.S b/deps/openssl/asm/arm-void-gas/bn/armv4-gf2m.S similarity index 68% rename from deps/openssl/asm/arm-elf-gas/bn/armv4-gf2m.S rename to deps/openssl/asm/arm-void-gas/bn/armv4-gf2m.S index 552a883709f43e..32610558ac5853 100644 --- a/deps/openssl/asm/arm-elf-gas/bn/armv4-gf2m.S +++ b/deps/openssl/asm/arm-void-gas/bn/armv4-gf2m.S @@ -2,36 +2,6 @@ .text .code 32 - -#if __ARM_ARCH__>=7 -.fpu neon - -.type mul_1x1_neon,%function -.align 5 -mul_1x1_neon: - vshl.u64 d2,d16,#8 @ q1-q3 are slided - - vmull.p8 q0,d16,d17 @ a·bb - vshl.u64 d4,d16,#16 - vmull.p8 q1,d2,d17 @ a<<8·bb - vshl.u64 d6,d16,#24 - vmull.p8 q2,d4,d17 @ a<<16·bb - vshr.u64 d2,#8 - vmull.p8 q3,d6,d17 @ a<<24·bb - vshl.u64 d3,#24 - veor d0,d2 - vshr.u64 d4,#16 - veor d0,d3 - vshl.u64 d5,#16 - veor d0,d4 - vshr.u64 d6,#24 - veor d0,d5 - vshl.u64 d7,#8 - veor d0,d6 - veor d0,d7 - .word 0xe12fff1e -.size mul_1x1_neon,.-mul_1x1_neon -#endif .type mul_1x1_ialu,%function .align 5 mul_1x1_ialu: @@ -115,48 +85,11 @@ mul_1x1_ialu: .type bn_GF2m_mul_2x2,%function .align 5 bn_GF2m_mul_2x2: -#if __ARM_ARCH__>=7 +#if __ARM_MAX_ARCH__>=7 ldr r12,.LOPENSSL_armcap .Lpic: ldr r12,[pc,r12] tst r12,#1 - beq .Lialu - - veor d18,d18 - vmov.32 d19,r3,r3 @ two copies of b1 - vmov.32 d18[0],r1 @ a1 - - veor d20,d20 - vld1.32 d21[],[sp,:32] @ two copies of b0 - vmov.32 d20[0],r2 @ a0 - mov r12,lr - - vmov d16,d18 - vmov d17,d19 - bl mul_1x1_neon @ a1·b1 - vmov d22,d0 - - vmov d16,d20 - vmov d17,d21 - bl mul_1x1_neon @ a0·b0 - vmov d23,d0 - - veor d16,d20,d18 - veor d17,d21,d19 - veor d20,d23,d22 - bl mul_1x1_neon @ (a0+a1)·(b0+b1) - - veor d0,d20 @ (a0+a1)·(b0+b1)-a0·b0-a1·b1 - vshl.u64 d1,d0,#32 - vshr.u64 d0,d0,#32 - veor d23,d1 - veor d22,d0 - vst1.32 {d23[0]},[r0,:32]! - vst1.32 {d23[1]},[r0,:32]! - vst1.32 {d22[0]},[r0,:32]! - vst1.32 {d22[1]},[r0,:32] - bx r12 -.align 4 -.Lialu: + bne .LNEON #endif stmdb sp!,{r4-r10,lr} mov r10,r0 @ reassign 1st argument @@ -202,8 +135,62 @@ bn_GF2m_mul_2x2: moveq pc,lr @ be binary compatible with V4, yet .word 0xe12fff1e @ interoperable with Thumb ISA:-) #endif +#if __ARM_MAX_ARCH__>=7 +.arch armv7-a +.fpu neon + +.align 5 +.LNEON: + ldr r12, [sp] @ 5th argument + vmov.32 d26, r2, r1 + vmov.32 d27, r12, r3 + vmov.i64 d28, #0x0000ffffffffffff + vmov.i64 d29, #0x00000000ffffffff + vmov.i64 d30, #0x000000000000ffff + + vext.8 d2, d26, d26, #1 @ A1 + vmull.p8 q1, d2, d27 @ F = A1*B + vext.8 d0, d27, d27, #1 @ B1 + vmull.p8 q0, d26, d0 @ E = A*B1 + vext.8 d4, d26, d26, #2 @ A2 + vmull.p8 q2, d4, d27 @ H = A2*B + vext.8 d16, d27, d27, #2 @ B2 + vmull.p8 q8, d26, d16 @ G = A*B2 + vext.8 d6, d26, d26, #3 @ A3 + veor q1, q1, q0 @ L = E + F + vmull.p8 q3, d6, d27 @ J = A3*B + vext.8 d0, d27, d27, #3 @ B3 + veor q2, q2, q8 @ M = G + H + vmull.p8 q0, d26, d0 @ I = A*B3 + veor d2, d2, d3 @ t0 = (L) (P0 + P1) << 8 + vand d3, d3, d28 + vext.8 d16, d27, d27, #4 @ B4 + veor d4, d4, d5 @ t1 = (M) (P2 + P3) << 16 + vand d5, d5, d29 + vmull.p8 q8, d26, d16 @ K = A*B4 + veor q3, q3, q0 @ N = I + J + veor d2, d2, d3 + veor d4, d4, d5 + veor d6, d6, d7 @ t2 = (N) (P4 + P5) << 24 + vand d7, d7, d30 + vext.8 q1, q1, q1, #15 + veor d16, d16, d17 @ t3 = (K) (P6 + P7) << 32 + vmov.i64 d17, #0 + vext.8 q2, q2, q2, #14 + veor d6, d6, d7 + vmull.p8 q0, d26, d27 @ D = A*B + vext.8 q8, q8, q8, #12 + vext.8 q3, q3, q3, #13 + veor q1, q1, q2 + veor q3, q3, q8 + veor q0, q0, q1 + veor q0, q0, q3 + + vst1.32 {q0}, [r0] + bx lr @ bx lr +#endif .size bn_GF2m_mul_2x2,.-bn_GF2m_mul_2x2 -#if __ARM_ARCH__>=7 +#if __ARM_MAX_ARCH__>=7 .align 5 .LOPENSSL_armcap: .word OPENSSL_armcap_P-(.Lpic+8) @@ -211,4 +198,6 @@ bn_GF2m_mul_2x2: .asciz "GF(2^m) Multiplication for ARMv4/NEON, CRYPTOGAMS by " .align 5 +#if __ARM_MAX_ARCH__>=7 .comm OPENSSL_armcap_P,4,4 +#endif diff --git a/deps/openssl/asm/arm-void-gas/bn/armv4-mont.S b/deps/openssl/asm/arm-void-gas/bn/armv4-mont.S new file mode 100644 index 00000000000000..71fc296fcab181 --- /dev/null +++ b/deps/openssl/asm/arm-void-gas/bn/armv4-mont.S @@ -0,0 +1,580 @@ +#include "arm_arch.h" + +.text +.code 32 + +#if __ARM_MAX_ARCH__>=7 +.align 5 +.LOPENSSL_armcap: +.word OPENSSL_armcap_P-bn_mul_mont +#endif + +.global bn_mul_mont +.type bn_mul_mont,%function + +.align 5 +bn_mul_mont: + ldr ip,[sp,#4] @ load num + stmdb sp!,{r0,r2} @ sp points at argument block +#if __ARM_MAX_ARCH__>=7 + tst ip,#7 + bne .Lialu + adr r0,bn_mul_mont + ldr r2,.LOPENSSL_armcap + ldr r0,[r0,r2] + tst r0,#1 @ NEON available? + ldmia sp, {r0,r2} + beq .Lialu + add sp,sp,#8 + b bn_mul8x_mont_neon +.align 4 +.Lialu: +#endif + cmp ip,#2 + mov r0,ip @ load num + movlt r0,#0 + addlt sp,sp,#2*4 + blt .Labrt + + stmdb sp!,{r4-r12,lr} @ save 10 registers + + mov r0,r0,lsl#2 @ rescale r0 for byte count + sub sp,sp,r0 @ alloca(4*num) + sub sp,sp,#4 @ +extra dword + sub r0,r0,#4 @ "num=num-1" + add r4,r2,r0 @ &bp[num-1] + + add r0,sp,r0 @ r0 to point at &tp[num-1] + ldr r8,[r0,#14*4] @ &n0 + ldr r2,[r2] @ bp[0] + ldr r5,[r1],#4 @ ap[0],ap++ + ldr r6,[r3],#4 @ np[0],np++ + ldr r8,[r8] @ *n0 + str r4,[r0,#15*4] @ save &bp[num] + + umull r10,r11,r5,r2 @ ap[0]*bp[0] + str r8,[r0,#14*4] @ save n0 value + mul r8,r10,r8 @ "tp[0]"*n0 + mov r12,#0 + umlal r10,r12,r6,r8 @ np[0]*n0+"t[0]" + mov r4,sp + +.L1st: + ldr r5,[r1],#4 @ ap[j],ap++ + mov r10,r11 + ldr r6,[r3],#4 @ np[j],np++ + mov r11,#0 + umlal r10,r11,r5,r2 @ ap[j]*bp[0] + mov r14,#0 + umlal r12,r14,r6,r8 @ np[j]*n0 + adds r12,r12,r10 + str r12,[r4],#4 @ tp[j-1]=,tp++ + adc r12,r14,#0 + cmp r4,r0 + bne .L1st + + adds r12,r12,r11 + ldr r4,[r0,#13*4] @ restore bp + mov r14,#0 + ldr r8,[r0,#14*4] @ restore n0 + adc r14,r14,#0 + str r12,[r0] @ tp[num-1]= + str r14,[r0,#4] @ tp[num]= + +.Louter: + sub r7,r0,sp @ "original" r0-1 value + sub r1,r1,r7 @ "rewind" ap to &ap[1] + ldr r2,[r4,#4]! @ *(++bp) + sub r3,r3,r7 @ "rewind" np to &np[1] + ldr r5,[r1,#-4] @ ap[0] + ldr r10,[sp] @ tp[0] + ldr r6,[r3,#-4] @ np[0] + ldr r7,[sp,#4] @ tp[1] + + mov r11,#0 + umlal r10,r11,r5,r2 @ ap[0]*bp[i]+tp[0] + str r4,[r0,#13*4] @ save bp + mul r8,r10,r8 + mov r12,#0 + umlal r10,r12,r6,r8 @ np[0]*n0+"tp[0]" + mov r4,sp + +.Linner: + ldr r5,[r1],#4 @ ap[j],ap++ + adds r10,r11,r7 @ +=tp[j] + ldr r6,[r3],#4 @ np[j],np++ + mov r11,#0 + umlal r10,r11,r5,r2 @ ap[j]*bp[i] + mov r14,#0 + umlal r12,r14,r6,r8 @ np[j]*n0 + adc r11,r11,#0 + ldr r7,[r4,#8] @ tp[j+1] + adds r12,r12,r10 + str r12,[r4],#4 @ tp[j-1]=,tp++ + adc r12,r14,#0 + cmp r4,r0 + bne .Linner + + adds r12,r12,r11 + mov r14,#0 + ldr r4,[r0,#13*4] @ restore bp + adc r14,r14,#0 + ldr r8,[r0,#14*4] @ restore n0 + adds r12,r12,r7 + ldr r7,[r0,#15*4] @ restore &bp[num] + adc r14,r14,#0 + str r12,[r0] @ tp[num-1]= + str r14,[r0,#4] @ tp[num]= + + cmp r4,r7 + bne .Louter + + ldr r2,[r0,#12*4] @ pull rp + add r0,r0,#4 @ r0 to point at &tp[num] + sub r5,r0,sp @ "original" num value + mov r4,sp @ "rewind" r4 + mov r1,r4 @ "borrow" r1 + sub r3,r3,r5 @ "rewind" r3 to &np[0] + + subs r7,r7,r7 @ "clear" carry flag +.Lsub: ldr r7,[r4],#4 + ldr r6,[r3],#4 + sbcs r7,r7,r6 @ tp[j]-np[j] + str r7,[r2],#4 @ rp[j]= + teq r4,r0 @ preserve carry + bne .Lsub + sbcs r14,r14,#0 @ upmost carry + mov r4,sp @ "rewind" r4 + sub r2,r2,r5 @ "rewind" r2 + + and r1,r4,r14 + bic r3,r2,r14 + orr r1,r1,r3 @ ap=borrow?tp:rp + +.Lcopy: ldr r7,[r1],#4 @ copy or in-place refresh + str sp,[r4],#4 @ zap tp + str r7,[r2],#4 + cmp r4,r0 + bne .Lcopy + + add sp,r0,#4 @ skip over tp[num+1] + ldmia sp!,{r4-r12,lr} @ restore registers + add sp,sp,#2*4 @ skip over {r0,r2} + mov r0,#1 +.Labrt: +#if __ARM_ARCH__>=5 + bx lr @ .word 0xe12fff1e +#else + tst lr,#1 + moveq pc,lr @ be binary compatible with V4, yet + .word 0xe12fff1e @ interoperable with Thumb ISA:-) +#endif +.size bn_mul_mont,.-bn_mul_mont +#if __ARM_MAX_ARCH__>=7 +.arch armv7-a +.fpu neon + +.type bn_mul8x_mont_neon,%function +.align 5 +bn_mul8x_mont_neon: + mov ip,sp + stmdb sp!,{r4-r11} + vstmdb sp!,{d8-d15} @ ABI specification says so + ldmia ip,{r4-r5} @ load rest of parameter block + + sub r7,sp,#16 + vld1.32 {d28[0]}, [r2,:32]! + sub r7,r7,r5,lsl#4 + vld1.32 {d0-d3}, [r1]! @ can't specify :32 :-( + and r7,r7,#-64 + vld1.32 {d30[0]}, [r4,:32] + mov sp,r7 @ alloca + veor d8,d8,d8 + subs r8,r5,#8 + vzip.16 d28,d8 + + vmull.u32 q6,d28,d0[0] + vmull.u32 q7,d28,d0[1] + vmull.u32 q8,d28,d1[0] + vshl.i64 d10,d13,#16 + vmull.u32 q9,d28,d1[1] + + vadd.u64 d10,d10,d12 + veor d8,d8,d8 + vmul.u32 d29,d10,d30 + + vmull.u32 q10,d28,d2[0] + vld1.32 {d4-d7}, [r3]! + vmull.u32 q11,d28,d2[1] + vmull.u32 q12,d28,d3[0] + vzip.16 d29,d8 + vmull.u32 q13,d28,d3[1] + + bne .LNEON_1st + + @ special case for num=8, everything is in register bank... + + vmlal.u32 q6,d29,d4[0] + sub r9,r5,#1 + vmlal.u32 q7,d29,d4[1] + vmlal.u32 q8,d29,d5[0] + vmlal.u32 q9,d29,d5[1] + + vmlal.u32 q10,d29,d6[0] + vmov q5,q6 + vmlal.u32 q11,d29,d6[1] + vmov q6,q7 + vmlal.u32 q12,d29,d7[0] + vmov q7,q8 + vmlal.u32 q13,d29,d7[1] + vmov q8,q9 + vmov q9,q10 + vshr.u64 d10,d10,#16 + vmov q10,q11 + vmov q11,q12 + vadd.u64 d10,d10,d11 + vmov q12,q13 + veor q13,q13 + vshr.u64 d10,d10,#16 + + b .LNEON_outer8 + +.align 4 +.LNEON_outer8: + vld1.32 {d28[0]}, [r2,:32]! + veor d8,d8,d8 + vzip.16 d28,d8 + vadd.u64 d12,d12,d10 + + vmlal.u32 q6,d28,d0[0] + vmlal.u32 q7,d28,d0[1] + vmlal.u32 q8,d28,d1[0] + vshl.i64 d10,d13,#16 + vmlal.u32 q9,d28,d1[1] + + vadd.u64 d10,d10,d12 + veor d8,d8,d8 + subs r9,r9,#1 + vmul.u32 d29,d10,d30 + + vmlal.u32 q10,d28,d2[0] + vmlal.u32 q11,d28,d2[1] + vmlal.u32 q12,d28,d3[0] + vzip.16 d29,d8 + vmlal.u32 q13,d28,d3[1] + + vmlal.u32 q6,d29,d4[0] + vmlal.u32 q7,d29,d4[1] + vmlal.u32 q8,d29,d5[0] + vmlal.u32 q9,d29,d5[1] + + vmlal.u32 q10,d29,d6[0] + vmov q5,q6 + vmlal.u32 q11,d29,d6[1] + vmov q6,q7 + vmlal.u32 q12,d29,d7[0] + vmov q7,q8 + vmlal.u32 q13,d29,d7[1] + vmov q8,q9 + vmov q9,q10 + vshr.u64 d10,d10,#16 + vmov q10,q11 + vmov q11,q12 + vadd.u64 d10,d10,d11 + vmov q12,q13 + veor q13,q13 + vshr.u64 d10,d10,#16 + + bne .LNEON_outer8 + + vadd.u64 d12,d12,d10 + mov r7,sp + vshr.u64 d10,d12,#16 + mov r8,r5 + vadd.u64 d13,d13,d10 + add r6,sp,#16 + vshr.u64 d10,d13,#16 + vzip.16 d12,d13 + + b .LNEON_tail2 + +.align 4 +.LNEON_1st: + vmlal.u32 q6,d29,d4[0] + vld1.32 {d0-d3}, [r1]! + vmlal.u32 q7,d29,d4[1] + subs r8,r8,#8 + vmlal.u32 q8,d29,d5[0] + vmlal.u32 q9,d29,d5[1] + + vmlal.u32 q10,d29,d6[0] + vld1.32 {d4-d5}, [r3]! + vmlal.u32 q11,d29,d6[1] + vst1.64 {q6-q7}, [r7,:256]! + vmlal.u32 q12,d29,d7[0] + vmlal.u32 q13,d29,d7[1] + vst1.64 {q8-q9}, [r7,:256]! + + vmull.u32 q6,d28,d0[0] + vld1.32 {d6-d7}, [r3]! + vmull.u32 q7,d28,d0[1] + vst1.64 {q10-q11}, [r7,:256]! + vmull.u32 q8,d28,d1[0] + vmull.u32 q9,d28,d1[1] + vst1.64 {q12-q13}, [r7,:256]! + + vmull.u32 q10,d28,d2[0] + vmull.u32 q11,d28,d2[1] + vmull.u32 q12,d28,d3[0] + vmull.u32 q13,d28,d3[1] + + bne .LNEON_1st + + vmlal.u32 q6,d29,d4[0] + add r6,sp,#16 + vmlal.u32 q7,d29,d4[1] + sub r1,r1,r5,lsl#2 @ rewind r1 + vmlal.u32 q8,d29,d5[0] + vld1.64 {q5}, [sp,:128] + vmlal.u32 q9,d29,d5[1] + sub r9,r5,#1 + + vmlal.u32 q10,d29,d6[0] + vst1.64 {q6-q7}, [r7,:256]! + vmlal.u32 q11,d29,d6[1] + vshr.u64 d10,d10,#16 + vld1.64 {q6}, [r6, :128]! + vmlal.u32 q12,d29,d7[0] + vst1.64 {q8-q9}, [r7,:256]! + vmlal.u32 q13,d29,d7[1] + + vst1.64 {q10-q11}, [r7,:256]! + vadd.u64 d10,d10,d11 + veor q4,q4,q4 + vst1.64 {q12-q13}, [r7,:256]! + vld1.64 {q7-q8}, [r6, :256]! + vst1.64 {q4}, [r7,:128] + vshr.u64 d10,d10,#16 + + b .LNEON_outer + +.align 4 +.LNEON_outer: + vld1.32 {d28[0]}, [r2,:32]! + sub r3,r3,r5,lsl#2 @ rewind r3 + vld1.32 {d0-d3}, [r1]! + veor d8,d8,d8 + mov r7,sp + vzip.16 d28,d8 + sub r8,r5,#8 + vadd.u64 d12,d12,d10 + + vmlal.u32 q6,d28,d0[0] + vld1.64 {q9-q10},[r6,:256]! + vmlal.u32 q7,d28,d0[1] + vmlal.u32 q8,d28,d1[0] + vld1.64 {q11-q12},[r6,:256]! + vmlal.u32 q9,d28,d1[1] + + vshl.i64 d10,d13,#16 + veor d8,d8,d8 + vadd.u64 d10,d10,d12 + vld1.64 {q13},[r6,:128]! + vmul.u32 d29,d10,d30 + + vmlal.u32 q10,d28,d2[0] + vld1.32 {d4-d7}, [r3]! + vmlal.u32 q11,d28,d2[1] + vmlal.u32 q12,d28,d3[0] + vzip.16 d29,d8 + vmlal.u32 q13,d28,d3[1] + +.LNEON_inner: + vmlal.u32 q6,d29,d4[0] + vld1.32 {d0-d3}, [r1]! + vmlal.u32 q7,d29,d4[1] + subs r8,r8,#8 + vmlal.u32 q8,d29,d5[0] + vmlal.u32 q9,d29,d5[1] + vst1.64 {q6-q7}, [r7,:256]! + + vmlal.u32 q10,d29,d6[0] + vld1.64 {q6}, [r6, :128]! + vmlal.u32 q11,d29,d6[1] + vst1.64 {q8-q9}, [r7,:256]! + vmlal.u32 q12,d29,d7[0] + vld1.64 {q7-q8}, [r6, :256]! + vmlal.u32 q13,d29,d7[1] + vst1.64 {q10-q11}, [r7,:256]! + + vmlal.u32 q6,d28,d0[0] + vld1.64 {q9-q10}, [r6, :256]! + vmlal.u32 q7,d28,d0[1] + vst1.64 {q12-q13}, [r7,:256]! + vmlal.u32 q8,d28,d1[0] + vld1.64 {q11-q12}, [r6, :256]! + vmlal.u32 q9,d28,d1[1] + vld1.32 {d4-d7}, [r3]! + + vmlal.u32 q10,d28,d2[0] + vld1.64 {q13}, [r6, :128]! + vmlal.u32 q11,d28,d2[1] + vmlal.u32 q12,d28,d3[0] + vmlal.u32 q13,d28,d3[1] + + bne .LNEON_inner + + vmlal.u32 q6,d29,d4[0] + add r6,sp,#16 + vmlal.u32 q7,d29,d4[1] + sub r1,r1,r5,lsl#2 @ rewind r1 + vmlal.u32 q8,d29,d5[0] + vld1.64 {q5}, [sp,:128] + vmlal.u32 q9,d29,d5[1] + subs r9,r9,#1 + + vmlal.u32 q10,d29,d6[0] + vst1.64 {q6-q7}, [r7,:256]! + vmlal.u32 q11,d29,d6[1] + vld1.64 {q6}, [r6, :128]! + vshr.u64 d10,d10,#16 + vst1.64 {q8-q9}, [r7,:256]! + vmlal.u32 q12,d29,d7[0] + vld1.64 {q7-q8}, [r6, :256]! + vmlal.u32 q13,d29,d7[1] + + vst1.64 {q10-q11}, [r7,:256]! + vadd.u64 d10,d10,d11 + vst1.64 {q12-q13}, [r7,:256]! + vshr.u64 d10,d10,#16 + + bne .LNEON_outer + + mov r7,sp + mov r8,r5 + +.LNEON_tail: + vadd.u64 d12,d12,d10 + vld1.64 {q9-q10}, [r6, :256]! + vshr.u64 d10,d12,#16 + vadd.u64 d13,d13,d10 + vld1.64 {q11-q12}, [r6, :256]! + vshr.u64 d10,d13,#16 + vld1.64 {q13}, [r6, :128]! + vzip.16 d12,d13 + +.LNEON_tail2: + vadd.u64 d14,d14,d10 + vst1.32 {d12[0]}, [r7, :32]! + vshr.u64 d10,d14,#16 + vadd.u64 d15,d15,d10 + vshr.u64 d10,d15,#16 + vzip.16 d14,d15 + + vadd.u64 d16,d16,d10 + vst1.32 {d14[0]}, [r7, :32]! + vshr.u64 d10,d16,#16 + vadd.u64 d17,d17,d10 + vshr.u64 d10,d17,#16 + vzip.16 d16,d17 + + vadd.u64 d18,d18,d10 + vst1.32 {d16[0]}, [r7, :32]! + vshr.u64 d10,d18,#16 + vadd.u64 d19,d19,d10 + vshr.u64 d10,d19,#16 + vzip.16 d18,d19 + + vadd.u64 d20,d20,d10 + vst1.32 {d18[0]}, [r7, :32]! + vshr.u64 d10,d20,#16 + vadd.u64 d21,d21,d10 + vshr.u64 d10,d21,#16 + vzip.16 d20,d21 + + vadd.u64 d22,d22,d10 + vst1.32 {d20[0]}, [r7, :32]! + vshr.u64 d10,d22,#16 + vadd.u64 d23,d23,d10 + vshr.u64 d10,d23,#16 + vzip.16 d22,d23 + + vadd.u64 d24,d24,d10 + vst1.32 {d22[0]}, [r7, :32]! + vshr.u64 d10,d24,#16 + vadd.u64 d25,d25,d10 + vld1.64 {q6}, [r6, :128]! + vshr.u64 d10,d25,#16 + vzip.16 d24,d25 + + vadd.u64 d26,d26,d10 + vst1.32 {d24[0]}, [r7, :32]! + vshr.u64 d10,d26,#16 + vadd.u64 d27,d27,d10 + vld1.64 {q7-q8}, [r6, :256]! + vshr.u64 d10,d27,#16 + vzip.16 d26,d27 + subs r8,r8,#8 + vst1.32 {d26[0]}, [r7, :32]! + + bne .LNEON_tail + + vst1.32 {d10[0]}, [r7, :32] @ top-most bit + sub r3,r3,r5,lsl#2 @ rewind r3 + subs r1,sp,#0 @ clear carry flag + add r2,sp,r5,lsl#2 + +.LNEON_sub: + ldmia r1!, {r4-r7} + ldmia r3!, {r8-r11} + sbcs r8, r4,r8 + sbcs r9, r5,r9 + sbcs r10,r6,r10 + sbcs r11,r7,r11 + teq r1,r2 @ preserves carry + stmia r0!, {r8-r11} + bne .LNEON_sub + + ldr r10, [r1] @ load top-most bit + veor q0,q0,q0 + sub r11,r2,sp @ this is num*4 + veor q1,q1,q1 + mov r1,sp + sub r0,r0,r11 @ rewind r0 + mov r3,r2 @ second 3/4th of frame + sbcs r10,r10,#0 @ result is carry flag + +.LNEON_copy_n_zap: + ldmia r1!, {r4-r7} + ldmia r0, {r8-r11} + movcc r8, r4 + vst1.64 {q0-q1}, [r3,:256]! @ wipe + movcc r9, r5 + movcc r10,r6 + vst1.64 {q0-q1}, [r3,:256]! @ wipe + movcc r11,r7 + ldmia r1, {r4-r7} + stmia r0!, {r8-r11} + sub r1,r1,#16 + ldmia r0, {r8-r11} + movcc r8, r4 + vst1.64 {q0-q1}, [r1,:256]! @ wipe + movcc r9, r5 + movcc r10,r6 + vst1.64 {q0-q1}, [r3,:256]! @ wipe + movcc r11,r7 + teq r1,r2 @ preserves carry + stmia r0!, {r8-r11} + bne .LNEON_copy_n_zap + + sub sp,ip,#96 + vldmia sp!,{d8-d15} + ldmia sp!,{r4-r11} + bx lr @ .word 0xe12fff1e +.size bn_mul8x_mont_neon,.-bn_mul8x_mont_neon +#endif +.asciz "Montgomery multiplication for ARMv4/NEON, CRYPTOGAMS by " +.align 2 +#if __ARM_MAX_ARCH__>=7 +.comm OPENSSL_armcap_P,4,4 +#endif diff --git a/deps/openssl/asm/arm-elf-gas/modes/ghash-armv4.S b/deps/openssl/asm/arm-void-gas/modes/ghash-armv4.S similarity index 55% rename from deps/openssl/asm/arm-elf-gas/modes/ghash-armv4.S rename to deps/openssl/asm/arm-void-gas/modes/ghash-armv4.S index 872aff1727fdda..c54f5149974c6f 100644 --- a/deps/openssl/asm/arm-elf-gas/modes/ghash-armv4.S +++ b/deps/openssl/asm/arm-void-gas/modes/ghash-armv4.S @@ -145,7 +145,6 @@ gcm_ghash_4bit: strb r11,[r0,#4] #endif - #if __ARM_ARCH__>=7 && defined(__ARMEL__) rev r7,r7 str r7,[r0,#0] @@ -161,7 +160,6 @@ gcm_ghash_4bit: strb r11,[r0,#0] #endif - bne .Louter add sp,sp,#36 @@ -254,7 +252,6 @@ gcm_gmult_4bit: strb r11,[r0,#12] #endif - #if __ARM_ARCH__>=7 && defined(__ARMEL__) rev r5,r5 str r5,[r0,#8] @@ -270,7 +267,6 @@ gcm_gmult_4bit: strb r11,[r0,#8] #endif - #if __ARM_ARCH__>=7 && defined(__ARMEL__) rev r6,r6 str r6,[r0,#4] @@ -286,7 +282,6 @@ gcm_gmult_4bit: strb r11,[r0,#4] #endif - #if __ARM_ARCH__>=7 && defined(__ARMEL__) rev r7,r7 str r7,[r0,#0] @@ -302,7 +297,6 @@ gcm_gmult_4bit: strb r11,[r0,#0] #endif - #if __ARM_ARCH__>=5 ldmia sp!,{r4-r11,pc} #else @@ -312,103 +306,217 @@ gcm_gmult_4bit: .word 0xe12fff1e @ interoperable with Thumb ISA:-) #endif .size gcm_gmult_4bit,.-gcm_gmult_4bit -#if __ARM_ARCH__>=7 +#if __ARM_MAX_ARCH__>=7 +.arch armv7-a .fpu neon +.global gcm_init_neon +.type gcm_init_neon,%function +.align 4 +gcm_init_neon: + vld1.64 d7,[r1,:64]! @ load H + vmov.i8 q8,#0xe1 + vld1.64 d6,[r1,:64] + vshl.i64 d17,#57 + vshr.u64 d16,#63 @ t0=0xc2....01 + vdup.8 q9,d7[7] + vshr.u64 d26,d6,#63 + vshr.s8 q9,#7 @ broadcast carry bit + vshl.i64 q3,q3,#1 + vand q8,q8,q9 + vorr d7,d26 @ H<<<=1 + veor q3,q3,q8 @ twisted H + vstmia r0,{q3} + + bx lr @ bx lr +.size gcm_init_neon,.-gcm_init_neon + .global gcm_gmult_neon .type gcm_gmult_neon,%function .align 4 gcm_gmult_neon: - sub r1,#16 @ point at H in GCM128_CTX - vld1.64 d29,[r0,:64]!@ load Xi - vmov.i32 d5,#0xe1 @ our irreducible polynomial - vld1.64 d28,[r0,:64]! - vshr.u64 d5,#32 - vldmia r1,{d0-d1} @ load H - veor q12,q12 + vld1.64 d7,[r0,:64]! @ load Xi + vld1.64 d6,[r0,:64]! + vmov.i64 d29,#0x0000ffffffffffff + vldmia r1,{d26-d27} @ load twisted H + vmov.i64 d30,#0x00000000ffffffff #ifdef __ARMEL__ - vrev64.8 q14,q14 + vrev64.8 q3,q3 #endif - veor q13,q13 - veor q11,q11 - mov r1,#16 - veor q10,q10 + vmov.i64 d31,#0x000000000000ffff + veor d28,d26,d27 @ Karatsuba pre-processing mov r3,#16 - veor d2,d2 - vdup.8 d4,d28[0] @ broadcast lowest byte - b .Linner_neon + b .Lgmult_neon .size gcm_gmult_neon,.-gcm_gmult_neon .global gcm_ghash_neon .type gcm_ghash_neon,%function .align 4 gcm_ghash_neon: - vld1.64 d21,[r0,:64]! @ load Xi - vmov.i32 d5,#0xe1 @ our irreducible polynomial - vld1.64 d20,[r0,:64]! - vshr.u64 d5,#32 - vldmia r0,{d0-d1} @ load H - veor q12,q12 - nop + vld1.64 d1,[r0,:64]! @ load Xi + vld1.64 d0,[r0,:64]! + vmov.i64 d29,#0x0000ffffffffffff + vldmia r1,{d26-d27} @ load twisted H + vmov.i64 d30,#0x00000000ffffffff #ifdef __ARMEL__ - vrev64.8 q10,q10 + vrev64.8 q0,q0 #endif -.Louter_neon: - vld1.64 d29,[r2]! @ load inp - veor q13,q13 - vld1.64 d28,[r2]! - veor q11,q11 - mov r1,#16 + vmov.i64 d31,#0x000000000000ffff + veor d28,d26,d27 @ Karatsuba pre-processing + +.Loop_neon: + vld1.64 d7,[r2]! @ load inp + vld1.64 d6,[r2]! #ifdef __ARMEL__ - vrev64.8 q14,q14 + vrev64.8 q3,q3 #endif - veor d2,d2 - veor q14,q10 @ inp^=Xi - veor q10,q10 - vdup.8 d4,d28[0] @ broadcast lowest byte -.Linner_neon: - subs r1,r1,#1 - vmull.p8 q9,d1,d4 @ H.lo·Xi[i] - vmull.p8 q8,d0,d4 @ H.hi·Xi[i] - vext.8 q14,q12,#1 @ IN>>=8 - - veor q10,q13 @ modulo-scheduled part - vshl.i64 d22,#48 - vdup.8 d4,d28[0] @ broadcast lowest byte - veor d3,d18,d20 - - veor d21,d22 - vuzp.8 q9,q8 - vsli.8 d2,d3,#1 @ compose the "carry" byte - vext.8 q10,q12,#1 @ Z>>=8 - - vmull.p8 q11,d2,d5 @ "carry"·0xe1 - vshr.u8 d2,d3,#7 @ save Z's bottom bit - vext.8 q13,q9,q12,#1 @ Qlo>>=8 - veor q10,q8 - bne .Linner_neon - - veor q10,q13 @ modulo-scheduled artefact - vshl.i64 d22,#48 - veor d21,d22 - - @ finalization, normalize Z:Zo - vand d2,d5 @ suffices to mask the bit - vshr.u64 d3,d20,#63 - vshl.i64 q10,#1 + veor q3,q0 @ inp^=Xi +.Lgmult_neon: + vext.8 d16, d26, d26, #1 @ A1 + vmull.p8 q8, d16, d6 @ F = A1*B + vext.8 d0, d6, d6, #1 @ B1 + vmull.p8 q0, d26, d0 @ E = A*B1 + vext.8 d18, d26, d26, #2 @ A2 + vmull.p8 q9, d18, d6 @ H = A2*B + vext.8 d22, d6, d6, #2 @ B2 + vmull.p8 q11, d26, d22 @ G = A*B2 + vext.8 d20, d26, d26, #3 @ A3 + veor q8, q8, q0 @ L = E + F + vmull.p8 q10, d20, d6 @ J = A3*B + vext.8 d0, d6, d6, #3 @ B3 + veor q9, q9, q11 @ M = G + H + vmull.p8 q0, d26, d0 @ I = A*B3 + veor d16, d16, d17 @ t0 = (L) (P0 + P1) << 8 + vand d17, d17, d29 + vext.8 d22, d6, d6, #4 @ B4 + veor d18, d18, d19 @ t1 = (M) (P2 + P3) << 16 + vand d19, d19, d30 + vmull.p8 q11, d26, d22 @ K = A*B4 + veor q10, q10, q0 @ N = I + J + veor d16, d16, d17 + veor d18, d18, d19 + veor d20, d20, d21 @ t2 = (N) (P4 + P5) << 24 + vand d21, d21, d31 + vext.8 q8, q8, q8, #15 + veor d22, d22, d23 @ t3 = (K) (P6 + P7) << 32 + vmov.i64 d23, #0 + vext.8 q9, q9, q9, #14 + veor d20, d20, d21 + vmull.p8 q0, d26, d6 @ D = A*B + vext.8 q11, q11, q11, #12 + vext.8 q10, q10, q10, #13 + veor q8, q8, q9 + veor q10, q10, q11 + veor q0, q0, q8 + veor q0, q0, q10 + veor d6,d6,d7 @ Karatsuba pre-processing + vext.8 d16, d28, d28, #1 @ A1 + vmull.p8 q8, d16, d6 @ F = A1*B + vext.8 d2, d6, d6, #1 @ B1 + vmull.p8 q1, d28, d2 @ E = A*B1 + vext.8 d18, d28, d28, #2 @ A2 + vmull.p8 q9, d18, d6 @ H = A2*B + vext.8 d22, d6, d6, #2 @ B2 + vmull.p8 q11, d28, d22 @ G = A*B2 + vext.8 d20, d28, d28, #3 @ A3 + veor q8, q8, q1 @ L = E + F + vmull.p8 q10, d20, d6 @ J = A3*B + vext.8 d2, d6, d6, #3 @ B3 + veor q9, q9, q11 @ M = G + H + vmull.p8 q1, d28, d2 @ I = A*B3 + veor d16, d16, d17 @ t0 = (L) (P0 + P1) << 8 + vand d17, d17, d29 + vext.8 d22, d6, d6, #4 @ B4 + veor d18, d18, d19 @ t1 = (M) (P2 + P3) << 16 + vand d19, d19, d30 + vmull.p8 q11, d28, d22 @ K = A*B4 + veor q10, q10, q1 @ N = I + J + veor d16, d16, d17 + veor d18, d18, d19 + veor d20, d20, d21 @ t2 = (N) (P4 + P5) << 24 + vand d21, d21, d31 + vext.8 q8, q8, q8, #15 + veor d22, d22, d23 @ t3 = (K) (P6 + P7) << 32 + vmov.i64 d23, #0 + vext.8 q9, q9, q9, #14 + veor d20, d20, d21 + vmull.p8 q1, d28, d6 @ D = A*B + vext.8 q11, q11, q11, #12 + vext.8 q10, q10, q10, #13 + veor q8, q8, q9 + veor q10, q10, q11 + veor q1, q1, q8 + veor q1, q1, q10 + vext.8 d16, d27, d27, #1 @ A1 + vmull.p8 q8, d16, d7 @ F = A1*B + vext.8 d4, d7, d7, #1 @ B1 + vmull.p8 q2, d27, d4 @ E = A*B1 + vext.8 d18, d27, d27, #2 @ A2 + vmull.p8 q9, d18, d7 @ H = A2*B + vext.8 d22, d7, d7, #2 @ B2 + vmull.p8 q11, d27, d22 @ G = A*B2 + vext.8 d20, d27, d27, #3 @ A3 + veor q8, q8, q2 @ L = E + F + vmull.p8 q10, d20, d7 @ J = A3*B + vext.8 d4, d7, d7, #3 @ B3 + veor q9, q9, q11 @ M = G + H + vmull.p8 q2, d27, d4 @ I = A*B3 + veor d16, d16, d17 @ t0 = (L) (P0 + P1) << 8 + vand d17, d17, d29 + vext.8 d22, d7, d7, #4 @ B4 + veor d18, d18, d19 @ t1 = (M) (P2 + P3) << 16 + vand d19, d19, d30 + vmull.p8 q11, d27, d22 @ K = A*B4 + veor q10, q10, q2 @ N = I + J + veor d16, d16, d17 + veor d18, d18, d19 + veor d20, d20, d21 @ t2 = (N) (P4 + P5) << 24 + vand d21, d21, d31 + vext.8 q8, q8, q8, #15 + veor d22, d22, d23 @ t3 = (K) (P6 + P7) << 32 + vmov.i64 d23, #0 + vext.8 q9, q9, q9, #14 + veor d20, d20, d21 + vmull.p8 q2, d27, d7 @ D = A*B + vext.8 q11, q11, q11, #12 + vext.8 q10, q10, q10, #13 + veor q8, q8, q9 + veor q10, q10, q11 + veor q2, q2, q8 + veor q2, q2, q10 + veor q1,q1,q0 @ Karatsuba post-processing + veor q1,q1,q2 + veor d1,d1,d2 + veor d4,d4,d3 @ Xh|Xl - 256-bit result + + @ equivalent of reduction_avx from ghash-x86_64.pl + vshl.i64 q9,q0,#57 @ 1st phase + vshl.i64 q10,q0,#62 + veor q10,q10,q9 @ + vshl.i64 q9,q0,#63 + veor q10, q10, q9 @ + veor d1,d1,d20 @ + veor d4,d4,d21 + + vshr.u64 q10,q0,#1 @ 2nd phase + veor q2,q2,q0 + veor q0,q0,q10 @ + vshr.u64 q10,q10,#6 + vshr.u64 q0,q0,#1 @ + veor q0,q0,q2 @ + veor q0,q0,q10 @ + subs r3,#16 - vorr q10,q1 @ Z=Z:Zo<<1 - bne .Louter_neon + bne .Loop_neon #ifdef __ARMEL__ - vrev64.8 q10,q10 + vrev64.8 q0,q0 #endif sub r0,#16 + vst1.64 d1,[r0,:64]! @ write out Xi + vst1.64 d0,[r0,:64] - vst1.64 d21,[r0,:64]! @ write out Xi - vst1.64 d20,[r0,:64] - - .word 0xe12fff1e + bx lr @ bx lr .size gcm_ghash_neon,.-gcm_ghash_neon #endif .asciz "GHASH for ARMv4/NEON, CRYPTOGAMS by " diff --git a/deps/openssl/asm/arm-void-gas/modes/ghashv8-armx.S b/deps/openssl/asm/arm-void-gas/modes/ghashv8-armx.S new file mode 100644 index 00000000000000..570d9175c47605 --- /dev/null +++ b/deps/openssl/asm/arm-void-gas/modes/ghashv8-armx.S @@ -0,0 +1,116 @@ +#include "arm_arch.h" + +.text +.fpu neon +.code 32 +.global gcm_init_v8 +.type gcm_init_v8,%function +.align 4 +gcm_init_v8: + vld1.64 {q9},[r1] @ load H + vmov.i8 q8,#0xe1 + vext.8 q3,q9,q9,#8 + vshl.i64 q8,q8,#57 + vshr.u64 q10,q8,#63 + vext.8 q8,q10,q8,#8 @ t0=0xc2....01 + vdup.32 q9,d18[1] + vshr.u64 q11,q3,#63 + vshr.s32 q9,q9,#31 @ broadcast carry bit + vand q11,q11,q8 + vshl.i64 q3,q3,#1 + vext.8 q11,q11,q11,#8 + vand q8,q8,q9 + vorr q3,q3,q11 @ H<<<=1 + veor q3,q3,q8 @ twisted H + vst1.64 {q3},[r0] + + bx lr +.size gcm_init_v8,.-gcm_init_v8 + +.global gcm_gmult_v8 +.type gcm_gmult_v8,%function +.align 4 +gcm_gmult_v8: + vld1.64 {q9},[r0] @ load Xi + vmov.i8 q11,#0xe1 + vld1.64 {q12},[r1] @ load twisted H + vshl.u64 q11,q11,#57 +#ifndef __ARMEB__ + vrev64.8 q9,q9 +#endif + vext.8 q13,q12,q12,#8 + mov r3,#0 + vext.8 q3,q9,q9,#8 + mov r12,#0 + veor q13,q13,q12 @ Karatsuba pre-processing + mov r2,r0 + b .Lgmult_v8 +.size gcm_gmult_v8,.-gcm_gmult_v8 + +.global gcm_ghash_v8 +.type gcm_ghash_v8,%function +.align 4 +gcm_ghash_v8: + vld1.64 {q0},[r0] @ load [rotated] Xi + subs r3,r3,#16 + vmov.i8 q11,#0xe1 + mov r12,#16 + vld1.64 {q12},[r1] @ load twisted H + moveq r12,#0 + vext.8 q0,q0,q0,#8 + vshl.u64 q11,q11,#57 + vld1.64 {q9},[r2],r12 @ load [rotated] inp + vext.8 q13,q12,q12,#8 +#ifndef __ARMEB__ + vrev64.8 q0,q0 + vrev64.8 q9,q9 +#endif + veor q13,q13,q12 @ Karatsuba pre-processing + vext.8 q3,q9,q9,#8 + b .Loop_v8 + +.align 4 +.Loop_v8: + vext.8 q10,q0,q0,#8 + veor q3,q3,q0 @ inp^=Xi + veor q9,q9,q10 @ q9 is rotated inp^Xi + +.Lgmult_v8: + .byte 0x86,0x0e,0xa8,0xf2 @ pmull q0,q12,q3 @ H.lo·Xi.lo + veor q9,q9,q3 @ Karatsuba pre-processing + .byte 0x87,0x4e,0xa9,0xf2 @ pmull2 q2,q12,q3 @ H.hi·Xi.hi + subs r3,r3,#16 + .byte 0xa2,0x2e,0xaa,0xf2 @ pmull q1,q13,q9 @ (H.lo+H.hi)·(Xi.lo+Xi.hi) + moveq r12,#0 + + vext.8 q9,q0,q2,#8 @ Karatsuba post-processing + veor q10,q0,q2 + veor q1,q1,q9 + vld1.64 {q9},[r2],r12 @ load [rotated] inp + veor q1,q1,q10 + .byte 0x26,0x4e,0xe0,0xf2 @ pmull q10,q0,q11 @ 1st phase + + vmov d4,d3 @ Xh|Xm - 256-bit result + vmov d3,d0 @ Xm is rotated Xl +#ifndef __ARMEB__ + vrev64.8 q9,q9 +#endif + veor q0,q1,q10 + vext.8 q3,q9,q9,#8 + + vext.8 q10,q0,q0,#8 @ 2nd phase + .byte 0x26,0x0e,0xa0,0xf2 @ pmull q0,q0,q11 + veor q10,q10,q2 + veor q0,q0,q10 + bhs .Loop_v8 + +#ifndef __ARMEB__ + vrev64.8 q0,q0 +#endif + vext.8 q0,q0,q0,#8 + vst1.64 {q0},[r0] @ write out Xi + + bx lr +.size gcm_ghash_v8,.-gcm_ghash_v8 +.asciz "GHASH for ARMv8, CRYPTOGAMS by " +.align 2 diff --git a/deps/openssl/asm/arm-void-gas/sha/sha1-armv4-large.S b/deps/openssl/asm/arm-void-gas/sha/sha1-armv4-large.S new file mode 100644 index 00000000000000..b0893359cac772 --- /dev/null +++ b/deps/openssl/asm/arm-void-gas/sha/sha1-armv4-large.S @@ -0,0 +1,1455 @@ +#include "arm_arch.h" + +.text +.code 32 + +.global sha1_block_data_order +.type sha1_block_data_order,%function + +.align 5 +sha1_block_data_order: +#if __ARM_MAX_ARCH__>=7 + sub r3,pc,#8 @ sha1_block_data_order + ldr r12,.LOPENSSL_armcap + ldr r12,[r3,r12] @ OPENSSL_armcap_P + tst r12,#ARMV8_SHA1 + bne .LARMv8 + tst r12,#ARMV7_NEON + bne .LNEON +#endif + stmdb sp!,{r4-r12,lr} + add r2,r1,r2,lsl#6 @ r2 to point at the end of r1 + ldmia r0,{r3,r4,r5,r6,r7} +.Lloop: + ldr r8,.LK_00_19 + mov r14,sp + sub sp,sp,#15*4 + mov r5,r5,ror#30 + mov r6,r6,ror#30 + mov r7,r7,ror#30 @ [6] +.L_00_15: +#if __ARM_ARCH__<7 + ldrb r10,[r1,#2] + ldrb r9,[r1,#3] + ldrb r11,[r1,#1] + add r7,r8,r7,ror#2 @ E+=K_00_19 + ldrb r12,[r1],#4 + orr r9,r9,r10,lsl#8 + eor r10,r5,r6 @ F_xx_xx + orr r9,r9,r11,lsl#16 + add r7,r7,r3,ror#27 @ E+=ROR(A,27) + orr r9,r9,r12,lsl#24 +#else + ldr r9,[r1],#4 @ handles unaligned + add r7,r8,r7,ror#2 @ E+=K_00_19 + eor r10,r5,r6 @ F_xx_xx + add r7,r7,r3,ror#27 @ E+=ROR(A,27) +#ifdef __ARMEL__ + rev r9,r9 @ byte swap +#endif +#endif + and r10,r4,r10,ror#2 + add r7,r7,r9 @ E+=X[i] + eor r10,r10,r6,ror#2 @ F_00_19(B,C,D) + str r9,[r14,#-4]! + add r7,r7,r10 @ E+=F_00_19(B,C,D) +#if __ARM_ARCH__<7 + ldrb r10,[r1,#2] + ldrb r9,[r1,#3] + ldrb r11,[r1,#1] + add r6,r8,r6,ror#2 @ E+=K_00_19 + ldrb r12,[r1],#4 + orr r9,r9,r10,lsl#8 + eor r10,r4,r5 @ F_xx_xx + orr r9,r9,r11,lsl#16 + add r6,r6,r7,ror#27 @ E+=ROR(A,27) + orr r9,r9,r12,lsl#24 +#else + ldr r9,[r1],#4 @ handles unaligned + add r6,r8,r6,ror#2 @ E+=K_00_19 + eor r10,r4,r5 @ F_xx_xx + add r6,r6,r7,ror#27 @ E+=ROR(A,27) +#ifdef __ARMEL__ + rev r9,r9 @ byte swap +#endif +#endif + and r10,r3,r10,ror#2 + add r6,r6,r9 @ E+=X[i] + eor r10,r10,r5,ror#2 @ F_00_19(B,C,D) + str r9,[r14,#-4]! + add r6,r6,r10 @ E+=F_00_19(B,C,D) +#if __ARM_ARCH__<7 + ldrb r10,[r1,#2] + ldrb r9,[r1,#3] + ldrb r11,[r1,#1] + add r5,r8,r5,ror#2 @ E+=K_00_19 + ldrb r12,[r1],#4 + orr r9,r9,r10,lsl#8 + eor r10,r3,r4 @ F_xx_xx + orr r9,r9,r11,lsl#16 + add r5,r5,r6,ror#27 @ E+=ROR(A,27) + orr r9,r9,r12,lsl#24 +#else + ldr r9,[r1],#4 @ handles unaligned + add r5,r8,r5,ror#2 @ E+=K_00_19 + eor r10,r3,r4 @ F_xx_xx + add r5,r5,r6,ror#27 @ E+=ROR(A,27) +#ifdef __ARMEL__ + rev r9,r9 @ byte swap +#endif +#endif + and r10,r7,r10,ror#2 + add r5,r5,r9 @ E+=X[i] + eor r10,r10,r4,ror#2 @ F_00_19(B,C,D) + str r9,[r14,#-4]! + add r5,r5,r10 @ E+=F_00_19(B,C,D) +#if __ARM_ARCH__<7 + ldrb r10,[r1,#2] + ldrb r9,[r1,#3] + ldrb r11,[r1,#1] + add r4,r8,r4,ror#2 @ E+=K_00_19 + ldrb r12,[r1],#4 + orr r9,r9,r10,lsl#8 + eor r10,r7,r3 @ F_xx_xx + orr r9,r9,r11,lsl#16 + add r4,r4,r5,ror#27 @ E+=ROR(A,27) + orr r9,r9,r12,lsl#24 +#else + ldr r9,[r1],#4 @ handles unaligned + add r4,r8,r4,ror#2 @ E+=K_00_19 + eor r10,r7,r3 @ F_xx_xx + add r4,r4,r5,ror#27 @ E+=ROR(A,27) +#ifdef __ARMEL__ + rev r9,r9 @ byte swap +#endif +#endif + and r10,r6,r10,ror#2 + add r4,r4,r9 @ E+=X[i] + eor r10,r10,r3,ror#2 @ F_00_19(B,C,D) + str r9,[r14,#-4]! + add r4,r4,r10 @ E+=F_00_19(B,C,D) +#if __ARM_ARCH__<7 + ldrb r10,[r1,#2] + ldrb r9,[r1,#3] + ldrb r11,[r1,#1] + add r3,r8,r3,ror#2 @ E+=K_00_19 + ldrb r12,[r1],#4 + orr r9,r9,r10,lsl#8 + eor r10,r6,r7 @ F_xx_xx + orr r9,r9,r11,lsl#16 + add r3,r3,r4,ror#27 @ E+=ROR(A,27) + orr r9,r9,r12,lsl#24 +#else + ldr r9,[r1],#4 @ handles unaligned + add r3,r8,r3,ror#2 @ E+=K_00_19 + eor r10,r6,r7 @ F_xx_xx + add r3,r3,r4,ror#27 @ E+=ROR(A,27) +#ifdef __ARMEL__ + rev r9,r9 @ byte swap +#endif +#endif + and r10,r5,r10,ror#2 + add r3,r3,r9 @ E+=X[i] + eor r10,r10,r7,ror#2 @ F_00_19(B,C,D) + str r9,[r14,#-4]! + add r3,r3,r10 @ E+=F_00_19(B,C,D) + teq r14,sp + bne .L_00_15 @ [((11+4)*5+2)*3] + sub sp,sp,#25*4 +#if __ARM_ARCH__<7 + ldrb r10,[r1,#2] + ldrb r9,[r1,#3] + ldrb r11,[r1,#1] + add r7,r8,r7,ror#2 @ E+=K_00_19 + ldrb r12,[r1],#4 + orr r9,r9,r10,lsl#8 + eor r10,r5,r6 @ F_xx_xx + orr r9,r9,r11,lsl#16 + add r7,r7,r3,ror#27 @ E+=ROR(A,27) + orr r9,r9,r12,lsl#24 +#else + ldr r9,[r1],#4 @ handles unaligned + add r7,r8,r7,ror#2 @ E+=K_00_19 + eor r10,r5,r6 @ F_xx_xx + add r7,r7,r3,ror#27 @ E+=ROR(A,27) +#ifdef __ARMEL__ + rev r9,r9 @ byte swap +#endif +#endif + and r10,r4,r10,ror#2 + add r7,r7,r9 @ E+=X[i] + eor r10,r10,r6,ror#2 @ F_00_19(B,C,D) + str r9,[r14,#-4]! + add r7,r7,r10 @ E+=F_00_19(B,C,D) + ldr r9,[r14,#15*4] + ldr r10,[r14,#13*4] + ldr r11,[r14,#7*4] + add r6,r8,r6,ror#2 @ E+=K_xx_xx + ldr r12,[r14,#2*4] + eor r9,r9,r10 + eor r11,r11,r12 @ 1 cycle stall + eor r10,r4,r5 @ F_xx_xx + mov r9,r9,ror#31 + add r6,r6,r7,ror#27 @ E+=ROR(A,27) + eor r9,r9,r11,ror#31 + str r9,[r14,#-4]! + and r10,r3,r10,ror#2 @ F_xx_xx + @ F_xx_xx + add r6,r6,r9 @ E+=X[i] + eor r10,r10,r5,ror#2 @ F_00_19(B,C,D) + add r6,r6,r10 @ E+=F_00_19(B,C,D) + ldr r9,[r14,#15*4] + ldr r10,[r14,#13*4] + ldr r11,[r14,#7*4] + add r5,r8,r5,ror#2 @ E+=K_xx_xx + ldr r12,[r14,#2*4] + eor r9,r9,r10 + eor r11,r11,r12 @ 1 cycle stall + eor r10,r3,r4 @ F_xx_xx + mov r9,r9,ror#31 + add r5,r5,r6,ror#27 @ E+=ROR(A,27) + eor r9,r9,r11,ror#31 + str r9,[r14,#-4]! + and r10,r7,r10,ror#2 @ F_xx_xx + @ F_xx_xx + add r5,r5,r9 @ E+=X[i] + eor r10,r10,r4,ror#2 @ F_00_19(B,C,D) + add r5,r5,r10 @ E+=F_00_19(B,C,D) + ldr r9,[r14,#15*4] + ldr r10,[r14,#13*4] + ldr r11,[r14,#7*4] + add r4,r8,r4,ror#2 @ E+=K_xx_xx + ldr r12,[r14,#2*4] + eor r9,r9,r10 + eor r11,r11,r12 @ 1 cycle stall + eor r10,r7,r3 @ F_xx_xx + mov r9,r9,ror#31 + add r4,r4,r5,ror#27 @ E+=ROR(A,27) + eor r9,r9,r11,ror#31 + str r9,[r14,#-4]! + and r10,r6,r10,ror#2 @ F_xx_xx + @ F_xx_xx + add r4,r4,r9 @ E+=X[i] + eor r10,r10,r3,ror#2 @ F_00_19(B,C,D) + add r4,r4,r10 @ E+=F_00_19(B,C,D) + ldr r9,[r14,#15*4] + ldr r10,[r14,#13*4] + ldr r11,[r14,#7*4] + add r3,r8,r3,ror#2 @ E+=K_xx_xx + ldr r12,[r14,#2*4] + eor r9,r9,r10 + eor r11,r11,r12 @ 1 cycle stall + eor r10,r6,r7 @ F_xx_xx + mov r9,r9,ror#31 + add r3,r3,r4,ror#27 @ E+=ROR(A,27) + eor r9,r9,r11,ror#31 + str r9,[r14,#-4]! + and r10,r5,r10,ror#2 @ F_xx_xx + @ F_xx_xx + add r3,r3,r9 @ E+=X[i] + eor r10,r10,r7,ror#2 @ F_00_19(B,C,D) + add r3,r3,r10 @ E+=F_00_19(B,C,D) + + ldr r8,.LK_20_39 @ [+15+16*4] + cmn sp,#0 @ [+3], clear carry to denote 20_39 +.L_20_39_or_60_79: + ldr r9,[r14,#15*4] + ldr r10,[r14,#13*4] + ldr r11,[r14,#7*4] + add r7,r8,r7,ror#2 @ E+=K_xx_xx + ldr r12,[r14,#2*4] + eor r9,r9,r10 + eor r11,r11,r12 @ 1 cycle stall + eor r10,r5,r6 @ F_xx_xx + mov r9,r9,ror#31 + add r7,r7,r3,ror#27 @ E+=ROR(A,27) + eor r9,r9,r11,ror#31 + str r9,[r14,#-4]! + eor r10,r4,r10,ror#2 @ F_xx_xx + @ F_xx_xx + add r7,r7,r9 @ E+=X[i] + add r7,r7,r10 @ E+=F_20_39(B,C,D) + ldr r9,[r14,#15*4] + ldr r10,[r14,#13*4] + ldr r11,[r14,#7*4] + add r6,r8,r6,ror#2 @ E+=K_xx_xx + ldr r12,[r14,#2*4] + eor r9,r9,r10 + eor r11,r11,r12 @ 1 cycle stall + eor r10,r4,r5 @ F_xx_xx + mov r9,r9,ror#31 + add r6,r6,r7,ror#27 @ E+=ROR(A,27) + eor r9,r9,r11,ror#31 + str r9,[r14,#-4]! + eor r10,r3,r10,ror#2 @ F_xx_xx + @ F_xx_xx + add r6,r6,r9 @ E+=X[i] + add r6,r6,r10 @ E+=F_20_39(B,C,D) + ldr r9,[r14,#15*4] + ldr r10,[r14,#13*4] + ldr r11,[r14,#7*4] + add r5,r8,r5,ror#2 @ E+=K_xx_xx + ldr r12,[r14,#2*4] + eor r9,r9,r10 + eor r11,r11,r12 @ 1 cycle stall + eor r10,r3,r4 @ F_xx_xx + mov r9,r9,ror#31 + add r5,r5,r6,ror#27 @ E+=ROR(A,27) + eor r9,r9,r11,ror#31 + str r9,[r14,#-4]! + eor r10,r7,r10,ror#2 @ F_xx_xx + @ F_xx_xx + add r5,r5,r9 @ E+=X[i] + add r5,r5,r10 @ E+=F_20_39(B,C,D) + ldr r9,[r14,#15*4] + ldr r10,[r14,#13*4] + ldr r11,[r14,#7*4] + add r4,r8,r4,ror#2 @ E+=K_xx_xx + ldr r12,[r14,#2*4] + eor r9,r9,r10 + eor r11,r11,r12 @ 1 cycle stall + eor r10,r7,r3 @ F_xx_xx + mov r9,r9,ror#31 + add r4,r4,r5,ror#27 @ E+=ROR(A,27) + eor r9,r9,r11,ror#31 + str r9,[r14,#-4]! + eor r10,r6,r10,ror#2 @ F_xx_xx + @ F_xx_xx + add r4,r4,r9 @ E+=X[i] + add r4,r4,r10 @ E+=F_20_39(B,C,D) + ldr r9,[r14,#15*4] + ldr r10,[r14,#13*4] + ldr r11,[r14,#7*4] + add r3,r8,r3,ror#2 @ E+=K_xx_xx + ldr r12,[r14,#2*4] + eor r9,r9,r10 + eor r11,r11,r12 @ 1 cycle stall + eor r10,r6,r7 @ F_xx_xx + mov r9,r9,ror#31 + add r3,r3,r4,ror#27 @ E+=ROR(A,27) + eor r9,r9,r11,ror#31 + str r9,[r14,#-4]! + eor r10,r5,r10,ror#2 @ F_xx_xx + @ F_xx_xx + add r3,r3,r9 @ E+=X[i] + add r3,r3,r10 @ E+=F_20_39(B,C,D) + teq r14,sp @ preserve carry + bne .L_20_39_or_60_79 @ [+((12+3)*5+2)*4] + bcs .L_done @ [+((12+3)*5+2)*4], spare 300 bytes + + ldr r8,.LK_40_59 + sub sp,sp,#20*4 @ [+2] +.L_40_59: + ldr r9,[r14,#15*4] + ldr r10,[r14,#13*4] + ldr r11,[r14,#7*4] + add r7,r8,r7,ror#2 @ E+=K_xx_xx + ldr r12,[r14,#2*4] + eor r9,r9,r10 + eor r11,r11,r12 @ 1 cycle stall + eor r10,r5,r6 @ F_xx_xx + mov r9,r9,ror#31 + add r7,r7,r3,ror#27 @ E+=ROR(A,27) + eor r9,r9,r11,ror#31 + str r9,[r14,#-4]! + and r10,r4,r10,ror#2 @ F_xx_xx + and r11,r5,r6 @ F_xx_xx + add r7,r7,r9 @ E+=X[i] + add r7,r7,r10 @ E+=F_40_59(B,C,D) + add r7,r7,r11,ror#2 + ldr r9,[r14,#15*4] + ldr r10,[r14,#13*4] + ldr r11,[r14,#7*4] + add r6,r8,r6,ror#2 @ E+=K_xx_xx + ldr r12,[r14,#2*4] + eor r9,r9,r10 + eor r11,r11,r12 @ 1 cycle stall + eor r10,r4,r5 @ F_xx_xx + mov r9,r9,ror#31 + add r6,r6,r7,ror#27 @ E+=ROR(A,27) + eor r9,r9,r11,ror#31 + str r9,[r14,#-4]! + and r10,r3,r10,ror#2 @ F_xx_xx + and r11,r4,r5 @ F_xx_xx + add r6,r6,r9 @ E+=X[i] + add r6,r6,r10 @ E+=F_40_59(B,C,D) + add r6,r6,r11,ror#2 + ldr r9,[r14,#15*4] + ldr r10,[r14,#13*4] + ldr r11,[r14,#7*4] + add r5,r8,r5,ror#2 @ E+=K_xx_xx + ldr r12,[r14,#2*4] + eor r9,r9,r10 + eor r11,r11,r12 @ 1 cycle stall + eor r10,r3,r4 @ F_xx_xx + mov r9,r9,ror#31 + add r5,r5,r6,ror#27 @ E+=ROR(A,27) + eor r9,r9,r11,ror#31 + str r9,[r14,#-4]! + and r10,r7,r10,ror#2 @ F_xx_xx + and r11,r3,r4 @ F_xx_xx + add r5,r5,r9 @ E+=X[i] + add r5,r5,r10 @ E+=F_40_59(B,C,D) + add r5,r5,r11,ror#2 + ldr r9,[r14,#15*4] + ldr r10,[r14,#13*4] + ldr r11,[r14,#7*4] + add r4,r8,r4,ror#2 @ E+=K_xx_xx + ldr r12,[r14,#2*4] + eor r9,r9,r10 + eor r11,r11,r12 @ 1 cycle stall + eor r10,r7,r3 @ F_xx_xx + mov r9,r9,ror#31 + add r4,r4,r5,ror#27 @ E+=ROR(A,27) + eor r9,r9,r11,ror#31 + str r9,[r14,#-4]! + and r10,r6,r10,ror#2 @ F_xx_xx + and r11,r7,r3 @ F_xx_xx + add r4,r4,r9 @ E+=X[i] + add r4,r4,r10 @ E+=F_40_59(B,C,D) + add r4,r4,r11,ror#2 + ldr r9,[r14,#15*4] + ldr r10,[r14,#13*4] + ldr r11,[r14,#7*4] + add r3,r8,r3,ror#2 @ E+=K_xx_xx + ldr r12,[r14,#2*4] + eor r9,r9,r10 + eor r11,r11,r12 @ 1 cycle stall + eor r10,r6,r7 @ F_xx_xx + mov r9,r9,ror#31 + add r3,r3,r4,ror#27 @ E+=ROR(A,27) + eor r9,r9,r11,ror#31 + str r9,[r14,#-4]! + and r10,r5,r10,ror#2 @ F_xx_xx + and r11,r6,r7 @ F_xx_xx + add r3,r3,r9 @ E+=X[i] + add r3,r3,r10 @ E+=F_40_59(B,C,D) + add r3,r3,r11,ror#2 + teq r14,sp + bne .L_40_59 @ [+((12+5)*5+2)*4] + + ldr r8,.LK_60_79 + sub sp,sp,#20*4 + cmp sp,#0 @ set carry to denote 60_79 + b .L_20_39_or_60_79 @ [+4], spare 300 bytes +.L_done: + add sp,sp,#80*4 @ "deallocate" stack frame + ldmia r0,{r8,r9,r10,r11,r12} + add r3,r8,r3 + add r4,r9,r4 + add r5,r10,r5,ror#2 + add r6,r11,r6,ror#2 + add r7,r12,r7,ror#2 + stmia r0,{r3,r4,r5,r6,r7} + teq r1,r2 + bne .Lloop @ [+18], total 1307 + +#if __ARM_ARCH__>=5 + ldmia sp!,{r4-r12,pc} +#else + ldmia sp!,{r4-r12,lr} + tst lr,#1 + moveq pc,lr @ be binary compatible with V4, yet + .word 0xe12fff1e @ interoperable with Thumb ISA:-) +#endif +.size sha1_block_data_order,.-sha1_block_data_order + +.align 5 +.LK_00_19: .word 0x5a827999 +.LK_20_39: .word 0x6ed9eba1 +.LK_40_59: .word 0x8f1bbcdc +.LK_60_79: .word 0xca62c1d6 +#if __ARM_MAX_ARCH__>=7 +.LOPENSSL_armcap: +.word OPENSSL_armcap_P-sha1_block_data_order +#endif +.asciz "SHA1 block transform for ARMv4/NEON/ARMv8, CRYPTOGAMS by " +.align 5 +#if __ARM_MAX_ARCH__>=7 +.arch armv7-a +.fpu neon + +.type sha1_block_data_order_neon,%function +.align 4 +sha1_block_data_order_neon: +.LNEON: + stmdb sp!,{r4-r12,lr} + add r2,r1,r2,lsl#6 @ r2 to point at the end of r1 + @ dmb @ errata #451034 on early Cortex A8 + @ vstmdb sp!,{d8-d15} @ ABI specification says so + mov r14,sp + sub sp,sp,#64 @ alloca + adr r8,.LK_00_19 + bic sp,sp,#15 @ align for 128-bit stores + + ldmia r0,{r3,r4,r5,r6,r7} @ load context + mov r12,sp + + vld1.8 {q0-q1},[r1]! @ handles unaligned + veor q15,q15,q15 + vld1.8 {q2-q3},[r1]! + vld1.32 {d28[],d29[]},[r8,:32]! @ load K_00_19 + vrev32.8 q0,q0 @ yes, even on + vrev32.8 q1,q1 @ big-endian... + vrev32.8 q2,q2 + vadd.i32 q8,q0,q14 + vrev32.8 q3,q3 + vadd.i32 q9,q1,q14 + vst1.32 {q8},[r12,:128]! + vadd.i32 q10,q2,q14 + vst1.32 {q9},[r12,:128]! + vst1.32 {q10},[r12,:128]! + ldr r9,[sp] @ big RAW stall + +.Loop_neon: + vext.8 q8,q0,q1,#8 + bic r10,r6,r4 + add r7,r7,r9 + and r11,r5,r4 + vadd.i32 q13,q3,q14 + ldr r9,[sp,#4] + add r7,r7,r3,ror#27 + vext.8 q12,q3,q15,#4 + eor r11,r11,r10 + mov r4,r4,ror#2 + add r7,r7,r11 + veor q8,q8,q0 + bic r10,r5,r3 + add r6,r6,r9 + veor q12,q12,q2 + and r11,r4,r3 + ldr r9,[sp,#8] + veor q12,q12,q8 + add r6,r6,r7,ror#27 + eor r11,r11,r10 + vst1.32 {q13},[r12,:128]! + sub r12,r12,#64 + mov r3,r3,ror#2 + add r6,r6,r11 + vext.8 q13,q15,q12,#4 + bic r10,r4,r7 + add r5,r5,r9 + vadd.i32 q8,q12,q12 + and r11,r3,r7 + ldr r9,[sp,#12] + vsri.32 q8,q12,#31 + add r5,r5,r6,ror#27 + eor r11,r11,r10 + mov r7,r7,ror#2 + vshr.u32 q12,q13,#30 + add r5,r5,r11 + bic r10,r3,r6 + vshl.u32 q13,q13,#2 + add r4,r4,r9 + and r11,r7,r6 + veor q8,q8,q12 + ldr r9,[sp,#16] + add r4,r4,r5,ror#27 + veor q8,q8,q13 + eor r11,r11,r10 + mov r6,r6,ror#2 + add r4,r4,r11 + vext.8 q9,q1,q2,#8 + bic r10,r7,r5 + add r3,r3,r9 + and r11,r6,r5 + vadd.i32 q13,q8,q14 + ldr r9,[sp,#20] + vld1.32 {d28[],d29[]},[r8,:32]! + add r3,r3,r4,ror#27 + vext.8 q12,q8,q15,#4 + eor r11,r11,r10 + mov r5,r5,ror#2 + add r3,r3,r11 + veor q9,q9,q1 + bic r10,r6,r4 + add r7,r7,r9 + veor q12,q12,q3 + and r11,r5,r4 + ldr r9,[sp,#24] + veor q12,q12,q9 + add r7,r7,r3,ror#27 + eor r11,r11,r10 + vst1.32 {q13},[r12,:128]! + mov r4,r4,ror#2 + add r7,r7,r11 + vext.8 q13,q15,q12,#4 + bic r10,r5,r3 + add r6,r6,r9 + vadd.i32 q9,q12,q12 + and r11,r4,r3 + ldr r9,[sp,#28] + vsri.32 q9,q12,#31 + add r6,r6,r7,ror#27 + eor r11,r11,r10 + mov r3,r3,ror#2 + vshr.u32 q12,q13,#30 + add r6,r6,r11 + bic r10,r4,r7 + vshl.u32 q13,q13,#2 + add r5,r5,r9 + and r11,r3,r7 + veor q9,q9,q12 + ldr r9,[sp,#32] + add r5,r5,r6,ror#27 + veor q9,q9,q13 + eor r11,r11,r10 + mov r7,r7,ror#2 + add r5,r5,r11 + vext.8 q10,q2,q3,#8 + bic r10,r3,r6 + add r4,r4,r9 + and r11,r7,r6 + vadd.i32 q13,q9,q14 + ldr r9,[sp,#36] + add r4,r4,r5,ror#27 + vext.8 q12,q9,q15,#4 + eor r11,r11,r10 + mov r6,r6,ror#2 + add r4,r4,r11 + veor q10,q10,q2 + bic r10,r7,r5 + add r3,r3,r9 + veor q12,q12,q8 + and r11,r6,r5 + ldr r9,[sp,#40] + veor q12,q12,q10 + add r3,r3,r4,ror#27 + eor r11,r11,r10 + vst1.32 {q13},[r12,:128]! + mov r5,r5,ror#2 + add r3,r3,r11 + vext.8 q13,q15,q12,#4 + bic r10,r6,r4 + add r7,r7,r9 + vadd.i32 q10,q12,q12 + and r11,r5,r4 + ldr r9,[sp,#44] + vsri.32 q10,q12,#31 + add r7,r7,r3,ror#27 + eor r11,r11,r10 + mov r4,r4,ror#2 + vshr.u32 q12,q13,#30 + add r7,r7,r11 + bic r10,r5,r3 + vshl.u32 q13,q13,#2 + add r6,r6,r9 + and r11,r4,r3 + veor q10,q10,q12 + ldr r9,[sp,#48] + add r6,r6,r7,ror#27 + veor q10,q10,q13 + eor r11,r11,r10 + mov r3,r3,ror#2 + add r6,r6,r11 + vext.8 q11,q3,q8,#8 + bic r10,r4,r7 + add r5,r5,r9 + and r11,r3,r7 + vadd.i32 q13,q10,q14 + ldr r9,[sp,#52] + add r5,r5,r6,ror#27 + vext.8 q12,q10,q15,#4 + eor r11,r11,r10 + mov r7,r7,ror#2 + add r5,r5,r11 + veor q11,q11,q3 + bic r10,r3,r6 + add r4,r4,r9 + veor q12,q12,q9 + and r11,r7,r6 + ldr r9,[sp,#56] + veor q12,q12,q11 + add r4,r4,r5,ror#27 + eor r11,r11,r10 + vst1.32 {q13},[r12,:128]! + mov r6,r6,ror#2 + add r4,r4,r11 + vext.8 q13,q15,q12,#4 + bic r10,r7,r5 + add r3,r3,r9 + vadd.i32 q11,q12,q12 + and r11,r6,r5 + ldr r9,[sp,#60] + vsri.32 q11,q12,#31 + add r3,r3,r4,ror#27 + eor r11,r11,r10 + mov r5,r5,ror#2 + vshr.u32 q12,q13,#30 + add r3,r3,r11 + bic r10,r6,r4 + vshl.u32 q13,q13,#2 + add r7,r7,r9 + and r11,r5,r4 + veor q11,q11,q12 + ldr r9,[sp,#0] + add r7,r7,r3,ror#27 + veor q11,q11,q13 + eor r11,r11,r10 + mov r4,r4,ror#2 + add r7,r7,r11 + vext.8 q12,q10,q11,#8 + bic r10,r5,r3 + add r6,r6,r9 + and r11,r4,r3 + veor q0,q0,q8 + ldr r9,[sp,#4] + add r6,r6,r7,ror#27 + veor q0,q0,q1 + eor r11,r11,r10 + mov r3,r3,ror#2 + vadd.i32 q13,q11,q14 + add r6,r6,r11 + bic r10,r4,r7 + veor q12,q12,q0 + add r5,r5,r9 + and r11,r3,r7 + vshr.u32 q0,q12,#30 + ldr r9,[sp,#8] + add r5,r5,r6,ror#27 + vst1.32 {q13},[r12,:128]! + sub r12,r12,#64 + eor r11,r11,r10 + mov r7,r7,ror#2 + vsli.32 q0,q12,#2 + add r5,r5,r11 + bic r10,r3,r6 + add r4,r4,r9 + and r11,r7,r6 + ldr r9,[sp,#12] + add r4,r4,r5,ror#27 + eor r11,r11,r10 + mov r6,r6,ror#2 + add r4,r4,r11 + bic r10,r7,r5 + add r3,r3,r9 + and r11,r6,r5 + ldr r9,[sp,#16] + add r3,r3,r4,ror#27 + eor r11,r11,r10 + mov r5,r5,ror#2 + add r3,r3,r11 + vext.8 q12,q11,q0,#8 + eor r10,r4,r6 + add r7,r7,r9 + ldr r9,[sp,#20] + veor q1,q1,q9 + eor r11,r10,r5 + add r7,r7,r3,ror#27 + veor q1,q1,q2 + mov r4,r4,ror#2 + add r7,r7,r11 + vadd.i32 q13,q0,q14 + eor r10,r3,r5 + add r6,r6,r9 + veor q12,q12,q1 + ldr r9,[sp,#24] + eor r11,r10,r4 + vshr.u32 q1,q12,#30 + add r6,r6,r7,ror#27 + mov r3,r3,ror#2 + vst1.32 {q13},[r12,:128]! + add r6,r6,r11 + eor r10,r7,r4 + vsli.32 q1,q12,#2 + add r5,r5,r9 + ldr r9,[sp,#28] + eor r11,r10,r3 + add r5,r5,r6,ror#27 + mov r7,r7,ror#2 + add r5,r5,r11 + eor r10,r6,r3 + add r4,r4,r9 + ldr r9,[sp,#32] + eor r11,r10,r7 + add r4,r4,r5,ror#27 + mov r6,r6,ror#2 + add r4,r4,r11 + vext.8 q12,q0,q1,#8 + eor r10,r5,r7 + add r3,r3,r9 + ldr r9,[sp,#36] + veor q2,q2,q10 + eor r11,r10,r6 + add r3,r3,r4,ror#27 + veor q2,q2,q3 + mov r5,r5,ror#2 + add r3,r3,r11 + vadd.i32 q13,q1,q14 + eor r10,r4,r6 + vld1.32 {d28[],d29[]},[r8,:32]! + add r7,r7,r9 + veor q12,q12,q2 + ldr r9,[sp,#40] + eor r11,r10,r5 + vshr.u32 q2,q12,#30 + add r7,r7,r3,ror#27 + mov r4,r4,ror#2 + vst1.32 {q13},[r12,:128]! + add r7,r7,r11 + eor r10,r3,r5 + vsli.32 q2,q12,#2 + add r6,r6,r9 + ldr r9,[sp,#44] + eor r11,r10,r4 + add r6,r6,r7,ror#27 + mov r3,r3,ror#2 + add r6,r6,r11 + eor r10,r7,r4 + add r5,r5,r9 + ldr r9,[sp,#48] + eor r11,r10,r3 + add r5,r5,r6,ror#27 + mov r7,r7,ror#2 + add r5,r5,r11 + vext.8 q12,q1,q2,#8 + eor r10,r6,r3 + add r4,r4,r9 + ldr r9,[sp,#52] + veor q3,q3,q11 + eor r11,r10,r7 + add r4,r4,r5,ror#27 + veor q3,q3,q8 + mov r6,r6,ror#2 + add r4,r4,r11 + vadd.i32 q13,q2,q14 + eor r10,r5,r7 + add r3,r3,r9 + veor q12,q12,q3 + ldr r9,[sp,#56] + eor r11,r10,r6 + vshr.u32 q3,q12,#30 + add r3,r3,r4,ror#27 + mov r5,r5,ror#2 + vst1.32 {q13},[r12,:128]! + add r3,r3,r11 + eor r10,r4,r6 + vsli.32 q3,q12,#2 + add r7,r7,r9 + ldr r9,[sp,#60] + eor r11,r10,r5 + add r7,r7,r3,ror#27 + mov r4,r4,ror#2 + add r7,r7,r11 + eor r10,r3,r5 + add r6,r6,r9 + ldr r9,[sp,#0] + eor r11,r10,r4 + add r6,r6,r7,ror#27 + mov r3,r3,ror#2 + add r6,r6,r11 + vext.8 q12,q2,q3,#8 + eor r10,r7,r4 + add r5,r5,r9 + ldr r9,[sp,#4] + veor q8,q8,q0 + eor r11,r10,r3 + add r5,r5,r6,ror#27 + veor q8,q8,q9 + mov r7,r7,ror#2 + add r5,r5,r11 + vadd.i32 q13,q3,q14 + eor r10,r6,r3 + add r4,r4,r9 + veor q12,q12,q8 + ldr r9,[sp,#8] + eor r11,r10,r7 + vshr.u32 q8,q12,#30 + add r4,r4,r5,ror#27 + mov r6,r6,ror#2 + vst1.32 {q13},[r12,:128]! + sub r12,r12,#64 + add r4,r4,r11 + eor r10,r5,r7 + vsli.32 q8,q12,#2 + add r3,r3,r9 + ldr r9,[sp,#12] + eor r11,r10,r6 + add r3,r3,r4,ror#27 + mov r5,r5,ror#2 + add r3,r3,r11 + eor r10,r4,r6 + add r7,r7,r9 + ldr r9,[sp,#16] + eor r11,r10,r5 + add r7,r7,r3,ror#27 + mov r4,r4,ror#2 + add r7,r7,r11 + vext.8 q12,q3,q8,#8 + eor r10,r3,r5 + add r6,r6,r9 + ldr r9,[sp,#20] + veor q9,q9,q1 + eor r11,r10,r4 + add r6,r6,r7,ror#27 + veor q9,q9,q10 + mov r3,r3,ror#2 + add r6,r6,r11 + vadd.i32 q13,q8,q14 + eor r10,r7,r4 + add r5,r5,r9 + veor q12,q12,q9 + ldr r9,[sp,#24] + eor r11,r10,r3 + vshr.u32 q9,q12,#30 + add r5,r5,r6,ror#27 + mov r7,r7,ror#2 + vst1.32 {q13},[r12,:128]! + add r5,r5,r11 + eor r10,r6,r3 + vsli.32 q9,q12,#2 + add r4,r4,r9 + ldr r9,[sp,#28] + eor r11,r10,r7 + add r4,r4,r5,ror#27 + mov r6,r6,ror#2 + add r4,r4,r11 + eor r10,r5,r7 + add r3,r3,r9 + ldr r9,[sp,#32] + eor r11,r10,r6 + add r3,r3,r4,ror#27 + mov r5,r5,ror#2 + add r3,r3,r11 + vext.8 q12,q8,q9,#8 + add r7,r7,r9 + and r10,r5,r6 + ldr r9,[sp,#36] + veor q10,q10,q2 + add r7,r7,r3,ror#27 + eor r11,r5,r6 + veor q10,q10,q11 + add r7,r7,r10 + and r11,r11,r4 + vadd.i32 q13,q9,q14 + mov r4,r4,ror#2 + add r7,r7,r11 + veor q12,q12,q10 + add r6,r6,r9 + and r10,r4,r5 + vshr.u32 q10,q12,#30 + ldr r9,[sp,#40] + add r6,r6,r7,ror#27 + vst1.32 {q13},[r12,:128]! + eor r11,r4,r5 + add r6,r6,r10 + vsli.32 q10,q12,#2 + and r11,r11,r3 + mov r3,r3,ror#2 + add r6,r6,r11 + add r5,r5,r9 + and r10,r3,r4 + ldr r9,[sp,#44] + add r5,r5,r6,ror#27 + eor r11,r3,r4 + add r5,r5,r10 + and r11,r11,r7 + mov r7,r7,ror#2 + add r5,r5,r11 + add r4,r4,r9 + and r10,r7,r3 + ldr r9,[sp,#48] + add r4,r4,r5,ror#27 + eor r11,r7,r3 + add r4,r4,r10 + and r11,r11,r6 + mov r6,r6,ror#2 + add r4,r4,r11 + vext.8 q12,q9,q10,#8 + add r3,r3,r9 + and r10,r6,r7 + ldr r9,[sp,#52] + veor q11,q11,q3 + add r3,r3,r4,ror#27 + eor r11,r6,r7 + veor q11,q11,q0 + add r3,r3,r10 + and r11,r11,r5 + vadd.i32 q13,q10,q14 + mov r5,r5,ror#2 + vld1.32 {d28[],d29[]},[r8,:32]! + add r3,r3,r11 + veor q12,q12,q11 + add r7,r7,r9 + and r10,r5,r6 + vshr.u32 q11,q12,#30 + ldr r9,[sp,#56] + add r7,r7,r3,ror#27 + vst1.32 {q13},[r12,:128]! + eor r11,r5,r6 + add r7,r7,r10 + vsli.32 q11,q12,#2 + and r11,r11,r4 + mov r4,r4,ror#2 + add r7,r7,r11 + add r6,r6,r9 + and r10,r4,r5 + ldr r9,[sp,#60] + add r6,r6,r7,ror#27 + eor r11,r4,r5 + add r6,r6,r10 + and r11,r11,r3 + mov r3,r3,ror#2 + add r6,r6,r11 + add r5,r5,r9 + and r10,r3,r4 + ldr r9,[sp,#0] + add r5,r5,r6,ror#27 + eor r11,r3,r4 + add r5,r5,r10 + and r11,r11,r7 + mov r7,r7,ror#2 + add r5,r5,r11 + vext.8 q12,q10,q11,#8 + add r4,r4,r9 + and r10,r7,r3 + ldr r9,[sp,#4] + veor q0,q0,q8 + add r4,r4,r5,ror#27 + eor r11,r7,r3 + veor q0,q0,q1 + add r4,r4,r10 + and r11,r11,r6 + vadd.i32 q13,q11,q14 + mov r6,r6,ror#2 + add r4,r4,r11 + veor q12,q12,q0 + add r3,r3,r9 + and r10,r6,r7 + vshr.u32 q0,q12,#30 + ldr r9,[sp,#8] + add r3,r3,r4,ror#27 + vst1.32 {q13},[r12,:128]! + sub r12,r12,#64 + eor r11,r6,r7 + add r3,r3,r10 + vsli.32 q0,q12,#2 + and r11,r11,r5 + mov r5,r5,ror#2 + add r3,r3,r11 + add r7,r7,r9 + and r10,r5,r6 + ldr r9,[sp,#12] + add r7,r7,r3,ror#27 + eor r11,r5,r6 + add r7,r7,r10 + and r11,r11,r4 + mov r4,r4,ror#2 + add r7,r7,r11 + add r6,r6,r9 + and r10,r4,r5 + ldr r9,[sp,#16] + add r6,r6,r7,ror#27 + eor r11,r4,r5 + add r6,r6,r10 + and r11,r11,r3 + mov r3,r3,ror#2 + add r6,r6,r11 + vext.8 q12,q11,q0,#8 + add r5,r5,r9 + and r10,r3,r4 + ldr r9,[sp,#20] + veor q1,q1,q9 + add r5,r5,r6,ror#27 + eor r11,r3,r4 + veor q1,q1,q2 + add r5,r5,r10 + and r11,r11,r7 + vadd.i32 q13,q0,q14 + mov r7,r7,ror#2 + add r5,r5,r11 + veor q12,q12,q1 + add r4,r4,r9 + and r10,r7,r3 + vshr.u32 q1,q12,#30 + ldr r9,[sp,#24] + add r4,r4,r5,ror#27 + vst1.32 {q13},[r12,:128]! + eor r11,r7,r3 + add r4,r4,r10 + vsli.32 q1,q12,#2 + and r11,r11,r6 + mov r6,r6,ror#2 + add r4,r4,r11 + add r3,r3,r9 + and r10,r6,r7 + ldr r9,[sp,#28] + add r3,r3,r4,ror#27 + eor r11,r6,r7 + add r3,r3,r10 + and r11,r11,r5 + mov r5,r5,ror#2 + add r3,r3,r11 + add r7,r7,r9 + and r10,r5,r6 + ldr r9,[sp,#32] + add r7,r7,r3,ror#27 + eor r11,r5,r6 + add r7,r7,r10 + and r11,r11,r4 + mov r4,r4,ror#2 + add r7,r7,r11 + vext.8 q12,q0,q1,#8 + add r6,r6,r9 + and r10,r4,r5 + ldr r9,[sp,#36] + veor q2,q2,q10 + add r6,r6,r7,ror#27 + eor r11,r4,r5 + veor q2,q2,q3 + add r6,r6,r10 + and r11,r11,r3 + vadd.i32 q13,q1,q14 + mov r3,r3,ror#2 + add r6,r6,r11 + veor q12,q12,q2 + add r5,r5,r9 + and r10,r3,r4 + vshr.u32 q2,q12,#30 + ldr r9,[sp,#40] + add r5,r5,r6,ror#27 + vst1.32 {q13},[r12,:128]! + eor r11,r3,r4 + add r5,r5,r10 + vsli.32 q2,q12,#2 + and r11,r11,r7 + mov r7,r7,ror#2 + add r5,r5,r11 + add r4,r4,r9 + and r10,r7,r3 + ldr r9,[sp,#44] + add r4,r4,r5,ror#27 + eor r11,r7,r3 + add r4,r4,r10 + and r11,r11,r6 + mov r6,r6,ror#2 + add r4,r4,r11 + add r3,r3,r9 + and r10,r6,r7 + ldr r9,[sp,#48] + add r3,r3,r4,ror#27 + eor r11,r6,r7 + add r3,r3,r10 + and r11,r11,r5 + mov r5,r5,ror#2 + add r3,r3,r11 + vext.8 q12,q1,q2,#8 + eor r10,r4,r6 + add r7,r7,r9 + ldr r9,[sp,#52] + veor q3,q3,q11 + eor r11,r10,r5 + add r7,r7,r3,ror#27 + veor q3,q3,q8 + mov r4,r4,ror#2 + add r7,r7,r11 + vadd.i32 q13,q2,q14 + eor r10,r3,r5 + add r6,r6,r9 + veor q12,q12,q3 + ldr r9,[sp,#56] + eor r11,r10,r4 + vshr.u32 q3,q12,#30 + add r6,r6,r7,ror#27 + mov r3,r3,ror#2 + vst1.32 {q13},[r12,:128]! + add r6,r6,r11 + eor r10,r7,r4 + vsli.32 q3,q12,#2 + add r5,r5,r9 + ldr r9,[sp,#60] + eor r11,r10,r3 + add r5,r5,r6,ror#27 + mov r7,r7,ror#2 + add r5,r5,r11 + eor r10,r6,r3 + add r4,r4,r9 + ldr r9,[sp,#0] + eor r11,r10,r7 + add r4,r4,r5,ror#27 + mov r6,r6,ror#2 + add r4,r4,r11 + vadd.i32 q13,q3,q14 + eor r10,r5,r7 + add r3,r3,r9 + vst1.32 {q13},[r12,:128]! + sub r12,r12,#64 + teq r1,r2 + sub r8,r8,#16 + subeq r1,r1,#64 + vld1.8 {q0-q1},[r1]! + ldr r9,[sp,#4] + eor r11,r10,r6 + vld1.8 {q2-q3},[r1]! + add r3,r3,r4,ror#27 + mov r5,r5,ror#2 + vld1.32 {d28[],d29[]},[r8,:32]! + add r3,r3,r11 + eor r10,r4,r6 + vrev32.8 q0,q0 + add r7,r7,r9 + ldr r9,[sp,#8] + eor r11,r10,r5 + add r7,r7,r3,ror#27 + mov r4,r4,ror#2 + add r7,r7,r11 + eor r10,r3,r5 + add r6,r6,r9 + ldr r9,[sp,#12] + eor r11,r10,r4 + add r6,r6,r7,ror#27 + mov r3,r3,ror#2 + add r6,r6,r11 + eor r10,r7,r4 + add r5,r5,r9 + ldr r9,[sp,#16] + eor r11,r10,r3 + add r5,r5,r6,ror#27 + mov r7,r7,ror#2 + add r5,r5,r11 + vrev32.8 q1,q1 + eor r10,r6,r3 + add r4,r4,r9 + vadd.i32 q8,q0,q14 + ldr r9,[sp,#20] + eor r11,r10,r7 + vst1.32 {q8},[r12,:128]! + add r4,r4,r5,ror#27 + mov r6,r6,ror#2 + add r4,r4,r11 + eor r10,r5,r7 + add r3,r3,r9 + ldr r9,[sp,#24] + eor r11,r10,r6 + add r3,r3,r4,ror#27 + mov r5,r5,ror#2 + add r3,r3,r11 + eor r10,r4,r6 + add r7,r7,r9 + ldr r9,[sp,#28] + eor r11,r10,r5 + add r7,r7,r3,ror#27 + mov r4,r4,ror#2 + add r7,r7,r11 + eor r10,r3,r5 + add r6,r6,r9 + ldr r9,[sp,#32] + eor r11,r10,r4 + add r6,r6,r7,ror#27 + mov r3,r3,ror#2 + add r6,r6,r11 + vrev32.8 q2,q2 + eor r10,r7,r4 + add r5,r5,r9 + vadd.i32 q9,q1,q14 + ldr r9,[sp,#36] + eor r11,r10,r3 + vst1.32 {q9},[r12,:128]! + add r5,r5,r6,ror#27 + mov r7,r7,ror#2 + add r5,r5,r11 + eor r10,r6,r3 + add r4,r4,r9 + ldr r9,[sp,#40] + eor r11,r10,r7 + add r4,r4,r5,ror#27 + mov r6,r6,ror#2 + add r4,r4,r11 + eor r10,r5,r7 + add r3,r3,r9 + ldr r9,[sp,#44] + eor r11,r10,r6 + add r3,r3,r4,ror#27 + mov r5,r5,ror#2 + add r3,r3,r11 + eor r10,r4,r6 + add r7,r7,r9 + ldr r9,[sp,#48] + eor r11,r10,r5 + add r7,r7,r3,ror#27 + mov r4,r4,ror#2 + add r7,r7,r11 + vrev32.8 q3,q3 + eor r10,r3,r5 + add r6,r6,r9 + vadd.i32 q10,q2,q14 + ldr r9,[sp,#52] + eor r11,r10,r4 + vst1.32 {q10},[r12,:128]! + add r6,r6,r7,ror#27 + mov r3,r3,ror#2 + add r6,r6,r11 + eor r10,r7,r4 + add r5,r5,r9 + ldr r9,[sp,#56] + eor r11,r10,r3 + add r5,r5,r6,ror#27 + mov r7,r7,ror#2 + add r5,r5,r11 + eor r10,r6,r3 + add r4,r4,r9 + ldr r9,[sp,#60] + eor r11,r10,r7 + add r4,r4,r5,ror#27 + mov r6,r6,ror#2 + add r4,r4,r11 + eor r10,r5,r7 + add r3,r3,r9 + eor r11,r10,r6 + add r3,r3,r4,ror#27 + mov r5,r5,ror#2 + add r3,r3,r11 + ldmia r0,{r9,r10,r11,r12} @ accumulate context + add r3,r3,r9 + ldr r9,[r0,#16] + add r4,r4,r10 + add r5,r5,r11 + add r6,r6,r12 + moveq sp,r14 + add r7,r7,r9 + ldrne r9,[sp] + stmia r0,{r3,r4,r5,r6,r7} + addne r12,sp,#3*16 + bne .Loop_neon + + @ vldmia sp!,{d8-d15} + ldmia sp!,{r4-r12,pc} +.size sha1_block_data_order_neon,.-sha1_block_data_order_neon +#endif +#if __ARM_MAX_ARCH__>=7 +.type sha1_block_data_order_armv8,%function +.align 5 +sha1_block_data_order_armv8: +.LARMv8: + vstmdb sp!,{d8-d15} @ ABI specification says so + + veor q1,q1,q1 + adr r3,.LK_00_19 + vld1.32 {q0},[r0]! + vld1.32 {d2[0]},[r0] + sub r0,r0,#16 + vld1.32 {d16[],d17[]},[r3,:32]! + vld1.32 {d18[],d19[]},[r3,:32]! + vld1.32 {d20[],d21[]},[r3,:32]! + vld1.32 {d22[],d23[]},[r3,:32] + +.Loop_v8: + vld1.8 {q4-q5},[r1]! + vld1.8 {q6-q7},[r1]! + vrev32.8 q4,q4 + vrev32.8 q5,q5 + + vadd.i32 q12,q8,q4 + vrev32.8 q6,q6 + vmov q14,q0 @ offload + subs r2,r2,#1 + + vadd.i32 q13,q8,q5 + vrev32.8 q7,q7 + .byte 0xc0,0x62,0xb9,0xf3 @ sha1h q3,q0 @ 0 + .byte 0x68,0x0c,0x02,0xf2 @ sha1c q0,q1,q12 + vadd.i32 q12,q8,q6 + .byte 0x4c,0x8c,0x3a,0xf2 @ sha1su0 q4,q5,q6 + .byte 0xc0,0x42,0xb9,0xf3 @ sha1h q2,q0 @ 1 + .byte 0x6a,0x0c,0x06,0xf2 @ sha1c q0,q3,q13 + vadd.i32 q13,q8,q7 + .byte 0x8e,0x83,0xba,0xf3 @ sha1su1 q4,q7 + .byte 0x4e,0xac,0x3c,0xf2 @ sha1su0 q5,q6,q7 + .byte 0xc0,0x62,0xb9,0xf3 @ sha1h q3,q0 @ 2 + .byte 0x68,0x0c,0x04,0xf2 @ sha1c q0,q2,q12 + vadd.i32 q12,q8,q4 + .byte 0x88,0xa3,0xba,0xf3 @ sha1su1 q5,q4 + .byte 0x48,0xcc,0x3e,0xf2 @ sha1su0 q6,q7,q4 + .byte 0xc0,0x42,0xb9,0xf3 @ sha1h q2,q0 @ 3 + .byte 0x6a,0x0c,0x06,0xf2 @ sha1c q0,q3,q13 + vadd.i32 q13,q9,q5 + .byte 0x8a,0xc3,0xba,0xf3 @ sha1su1 q6,q5 + .byte 0x4a,0xec,0x38,0xf2 @ sha1su0 q7,q4,q5 + .byte 0xc0,0x62,0xb9,0xf3 @ sha1h q3,q0 @ 4 + .byte 0x68,0x0c,0x04,0xf2 @ sha1c q0,q2,q12 + vadd.i32 q12,q9,q6 + .byte 0x8c,0xe3,0xba,0xf3 @ sha1su1 q7,q6 + .byte 0x4c,0x8c,0x3a,0xf2 @ sha1su0 q4,q5,q6 + .byte 0xc0,0x42,0xb9,0xf3 @ sha1h q2,q0 @ 5 + .byte 0x6a,0x0c,0x16,0xf2 @ sha1p q0,q3,q13 + vadd.i32 q13,q9,q7 + .byte 0x8e,0x83,0xba,0xf3 @ sha1su1 q4,q7 + .byte 0x4e,0xac,0x3c,0xf2 @ sha1su0 q5,q6,q7 + .byte 0xc0,0x62,0xb9,0xf3 @ sha1h q3,q0 @ 6 + .byte 0x68,0x0c,0x14,0xf2 @ sha1p q0,q2,q12 + vadd.i32 q12,q9,q4 + .byte 0x88,0xa3,0xba,0xf3 @ sha1su1 q5,q4 + .byte 0x48,0xcc,0x3e,0xf2 @ sha1su0 q6,q7,q4 + .byte 0xc0,0x42,0xb9,0xf3 @ sha1h q2,q0 @ 7 + .byte 0x6a,0x0c,0x16,0xf2 @ sha1p q0,q3,q13 + vadd.i32 q13,q9,q5 + .byte 0x8a,0xc3,0xba,0xf3 @ sha1su1 q6,q5 + .byte 0x4a,0xec,0x38,0xf2 @ sha1su0 q7,q4,q5 + .byte 0xc0,0x62,0xb9,0xf3 @ sha1h q3,q0 @ 8 + .byte 0x68,0x0c,0x14,0xf2 @ sha1p q0,q2,q12 + vadd.i32 q12,q10,q6 + .byte 0x8c,0xe3,0xba,0xf3 @ sha1su1 q7,q6 + .byte 0x4c,0x8c,0x3a,0xf2 @ sha1su0 q4,q5,q6 + .byte 0xc0,0x42,0xb9,0xf3 @ sha1h q2,q0 @ 9 + .byte 0x6a,0x0c,0x16,0xf2 @ sha1p q0,q3,q13 + vadd.i32 q13,q10,q7 + .byte 0x8e,0x83,0xba,0xf3 @ sha1su1 q4,q7 + .byte 0x4e,0xac,0x3c,0xf2 @ sha1su0 q5,q6,q7 + .byte 0xc0,0x62,0xb9,0xf3 @ sha1h q3,q0 @ 10 + .byte 0x68,0x0c,0x24,0xf2 @ sha1m q0,q2,q12 + vadd.i32 q12,q10,q4 + .byte 0x88,0xa3,0xba,0xf3 @ sha1su1 q5,q4 + .byte 0x48,0xcc,0x3e,0xf2 @ sha1su0 q6,q7,q4 + .byte 0xc0,0x42,0xb9,0xf3 @ sha1h q2,q0 @ 11 + .byte 0x6a,0x0c,0x26,0xf2 @ sha1m q0,q3,q13 + vadd.i32 q13,q10,q5 + .byte 0x8a,0xc3,0xba,0xf3 @ sha1su1 q6,q5 + .byte 0x4a,0xec,0x38,0xf2 @ sha1su0 q7,q4,q5 + .byte 0xc0,0x62,0xb9,0xf3 @ sha1h q3,q0 @ 12 + .byte 0x68,0x0c,0x24,0xf2 @ sha1m q0,q2,q12 + vadd.i32 q12,q10,q6 + .byte 0x8c,0xe3,0xba,0xf3 @ sha1su1 q7,q6 + .byte 0x4c,0x8c,0x3a,0xf2 @ sha1su0 q4,q5,q6 + .byte 0xc0,0x42,0xb9,0xf3 @ sha1h q2,q0 @ 13 + .byte 0x6a,0x0c,0x26,0xf2 @ sha1m q0,q3,q13 + vadd.i32 q13,q11,q7 + .byte 0x8e,0x83,0xba,0xf3 @ sha1su1 q4,q7 + .byte 0x4e,0xac,0x3c,0xf2 @ sha1su0 q5,q6,q7 + .byte 0xc0,0x62,0xb9,0xf3 @ sha1h q3,q0 @ 14 + .byte 0x68,0x0c,0x24,0xf2 @ sha1m q0,q2,q12 + vadd.i32 q12,q11,q4 + .byte 0x88,0xa3,0xba,0xf3 @ sha1su1 q5,q4 + .byte 0x48,0xcc,0x3e,0xf2 @ sha1su0 q6,q7,q4 + .byte 0xc0,0x42,0xb9,0xf3 @ sha1h q2,q0 @ 15 + .byte 0x6a,0x0c,0x16,0xf2 @ sha1p q0,q3,q13 + vadd.i32 q13,q11,q5 + .byte 0x8a,0xc3,0xba,0xf3 @ sha1su1 q6,q5 + .byte 0x4a,0xec,0x38,0xf2 @ sha1su0 q7,q4,q5 + .byte 0xc0,0x62,0xb9,0xf3 @ sha1h q3,q0 @ 16 + .byte 0x68,0x0c,0x14,0xf2 @ sha1p q0,q2,q12 + vadd.i32 q12,q11,q6 + .byte 0x8c,0xe3,0xba,0xf3 @ sha1su1 q7,q6 + .byte 0xc0,0x42,0xb9,0xf3 @ sha1h q2,q0 @ 17 + .byte 0x6a,0x0c,0x16,0xf2 @ sha1p q0,q3,q13 + vadd.i32 q13,q11,q7 + + .byte 0xc0,0x62,0xb9,0xf3 @ sha1h q3,q0 @ 18 + .byte 0x68,0x0c,0x14,0xf2 @ sha1p q0,q2,q12 + + .byte 0xc0,0x42,0xb9,0xf3 @ sha1h q2,q0 @ 19 + .byte 0x6a,0x0c,0x16,0xf2 @ sha1p q0,q3,q13 + + vadd.i32 q1,q1,q2 + vadd.i32 q0,q0,q14 + bne .Loop_v8 + + vst1.32 {q0},[r0]! + vst1.32 {d2[0]},[r0] + + vldmia sp!,{d8-d15} + bx lr @ bx lr +.size sha1_block_data_order_armv8,.-sha1_block_data_order_armv8 +#endif +#if __ARM_MAX_ARCH__>=7 +.comm OPENSSL_armcap_P,4,4 +#endif diff --git a/deps/openssl/asm/arm-void-gas/sha/sha256-armv4.S b/deps/openssl/asm/arm-void-gas/sha/sha256-armv4.S new file mode 100644 index 00000000000000..bf1ce4f997e7b7 --- /dev/null +++ b/deps/openssl/asm/arm-void-gas/sha/sha256-armv4.S @@ -0,0 +1,2695 @@ +#include "arm_arch.h" + +.text +.code 32 + +.type K256,%object +.align 5 +K256: +.word 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 +.word 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 +.word 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 +.word 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 +.word 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc +.word 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da +.word 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 +.word 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967 +.word 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 +.word 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 +.word 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 +.word 0xd192e819,0xd6990624,0xf40e3585,0x106aa070 +.word 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 +.word 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 +.word 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 +.word 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 +.size K256,.-K256 +.word 0 @ terminator +#if __ARM_MAX_ARCH__>=7 +.LOPENSSL_armcap: +.word OPENSSL_armcap_P-sha256_block_data_order +#endif +.align 5 + +.global sha256_block_data_order +.type sha256_block_data_order,%function +sha256_block_data_order: + sub r3,pc,#8 @ sha256_block_data_order + add r2,r1,r2,lsl#6 @ len to point at the end of inp +#if __ARM_MAX_ARCH__>=7 + ldr r12,.LOPENSSL_armcap + ldr r12,[r3,r12] @ OPENSSL_armcap_P + tst r12,#ARMV8_SHA256 + bne .LARMv8 + tst r12,#ARMV7_NEON + bne .LNEON +#endif + stmdb sp!,{r0,r1,r2,r4-r11,lr} + ldmia r0,{r4,r5,r6,r7,r8,r9,r10,r11} + sub r14,r3,#256+32 @ K256 + sub sp,sp,#16*4 @ alloca(X[16]) +.Loop: +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 +# else + ldrb r2,[r1,#3] +# endif + eor r3,r5,r6 @ magic + eor r12,r12,r12 +#if __ARM_ARCH__>=7 + @ ldr r2,[r1],#4 @ 0 +# if 0==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r8,r8,ror#5 + add r4,r4,r12 @ h+=Maj(a,b,c) from the past + eor r0,r0,r8,ror#19 @ Sigma1(e) + rev r2,r2 +#else + @ ldrb r2,[r1,#3] @ 0 + add r4,r4,r12 @ h+=Maj(a,b,c) from the past + ldrb r12,[r1,#2] + ldrb r0,[r1,#1] + orr r2,r2,r12,lsl#8 + ldrb r12,[r1],#4 + orr r2,r2,r0,lsl#16 +# if 0==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r8,r8,ror#5 + orr r2,r2,r12,lsl#24 + eor r0,r0,r8,ror#19 @ Sigma1(e) +#endif + ldr r12,[r14],#4 @ *K256++ + add r11,r11,r2 @ h+=X[i] + str r2,[sp,#0*4] + eor r2,r9,r10 + add r11,r11,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r8 + add r11,r11,r12 @ h+=K256[i] + eor r2,r2,r10 @ Ch(e,f,g) + eor r0,r4,r4,ror#11 + add r11,r11,r2 @ h+=Ch(e,f,g) +#if 0==31 + and r12,r12,#0xff + cmp r12,#0xf2 @ done? +#endif +#if 0<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r12,r4,r5 @ a^b, b^c in next round +#else + ldr r2,[sp,#2*4] @ from future BODY_16_xx + eor r12,r4,r5 @ a^b, b^c in next round + ldr r1,[sp,#15*4] @ from future BODY_16_xx +#endif + eor r0,r0,r4,ror#20 @ Sigma0(a) + and r3,r3,r12 @ (b^c)&=(a^b) + add r7,r7,r11 @ d+=h + eor r3,r3,r5 @ Maj(a,b,c) + add r11,r11,r0,ror#2 @ h+=Sigma0(a) + @ add r11,r11,r3 @ h+=Maj(a,b,c) +#if __ARM_ARCH__>=7 + @ ldr r2,[r1],#4 @ 1 +# if 1==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r7,r7,ror#5 + add r11,r11,r3 @ h+=Maj(a,b,c) from the past + eor r0,r0,r7,ror#19 @ Sigma1(e) + rev r2,r2 +#else + @ ldrb r2,[r1,#3] @ 1 + add r11,r11,r3 @ h+=Maj(a,b,c) from the past + ldrb r3,[r1,#2] + ldrb r0,[r1,#1] + orr r2,r2,r3,lsl#8 + ldrb r3,[r1],#4 + orr r2,r2,r0,lsl#16 +# if 1==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r7,r7,ror#5 + orr r2,r2,r3,lsl#24 + eor r0,r0,r7,ror#19 @ Sigma1(e) +#endif + ldr r3,[r14],#4 @ *K256++ + add r10,r10,r2 @ h+=X[i] + str r2,[sp,#1*4] + eor r2,r8,r9 + add r10,r10,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r7 + add r10,r10,r3 @ h+=K256[i] + eor r2,r2,r9 @ Ch(e,f,g) + eor r0,r11,r11,ror#11 + add r10,r10,r2 @ h+=Ch(e,f,g) +#if 1==31 + and r3,r3,#0xff + cmp r3,#0xf2 @ done? +#endif +#if 1<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r3,r11,r4 @ a^b, b^c in next round +#else + ldr r2,[sp,#3*4] @ from future BODY_16_xx + eor r3,r11,r4 @ a^b, b^c in next round + ldr r1,[sp,#0*4] @ from future BODY_16_xx +#endif + eor r0,r0,r11,ror#20 @ Sigma0(a) + and r12,r12,r3 @ (b^c)&=(a^b) + add r6,r6,r10 @ d+=h + eor r12,r12,r4 @ Maj(a,b,c) + add r10,r10,r0,ror#2 @ h+=Sigma0(a) + @ add r10,r10,r12 @ h+=Maj(a,b,c) +#if __ARM_ARCH__>=7 + @ ldr r2,[r1],#4 @ 2 +# if 2==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r6,r6,ror#5 + add r10,r10,r12 @ h+=Maj(a,b,c) from the past + eor r0,r0,r6,ror#19 @ Sigma1(e) + rev r2,r2 +#else + @ ldrb r2,[r1,#3] @ 2 + add r10,r10,r12 @ h+=Maj(a,b,c) from the past + ldrb r12,[r1,#2] + ldrb r0,[r1,#1] + orr r2,r2,r12,lsl#8 + ldrb r12,[r1],#4 + orr r2,r2,r0,lsl#16 +# if 2==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r6,r6,ror#5 + orr r2,r2,r12,lsl#24 + eor r0,r0,r6,ror#19 @ Sigma1(e) +#endif + ldr r12,[r14],#4 @ *K256++ + add r9,r9,r2 @ h+=X[i] + str r2,[sp,#2*4] + eor r2,r7,r8 + add r9,r9,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r6 + add r9,r9,r12 @ h+=K256[i] + eor r2,r2,r8 @ Ch(e,f,g) + eor r0,r10,r10,ror#11 + add r9,r9,r2 @ h+=Ch(e,f,g) +#if 2==31 + and r12,r12,#0xff + cmp r12,#0xf2 @ done? +#endif +#if 2<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r12,r10,r11 @ a^b, b^c in next round +#else + ldr r2,[sp,#4*4] @ from future BODY_16_xx + eor r12,r10,r11 @ a^b, b^c in next round + ldr r1,[sp,#1*4] @ from future BODY_16_xx +#endif + eor r0,r0,r10,ror#20 @ Sigma0(a) + and r3,r3,r12 @ (b^c)&=(a^b) + add r5,r5,r9 @ d+=h + eor r3,r3,r11 @ Maj(a,b,c) + add r9,r9,r0,ror#2 @ h+=Sigma0(a) + @ add r9,r9,r3 @ h+=Maj(a,b,c) +#if __ARM_ARCH__>=7 + @ ldr r2,[r1],#4 @ 3 +# if 3==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r5,r5,ror#5 + add r9,r9,r3 @ h+=Maj(a,b,c) from the past + eor r0,r0,r5,ror#19 @ Sigma1(e) + rev r2,r2 +#else + @ ldrb r2,[r1,#3] @ 3 + add r9,r9,r3 @ h+=Maj(a,b,c) from the past + ldrb r3,[r1,#2] + ldrb r0,[r1,#1] + orr r2,r2,r3,lsl#8 + ldrb r3,[r1],#4 + orr r2,r2,r0,lsl#16 +# if 3==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r5,r5,ror#5 + orr r2,r2,r3,lsl#24 + eor r0,r0,r5,ror#19 @ Sigma1(e) +#endif + ldr r3,[r14],#4 @ *K256++ + add r8,r8,r2 @ h+=X[i] + str r2,[sp,#3*4] + eor r2,r6,r7 + add r8,r8,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r5 + add r8,r8,r3 @ h+=K256[i] + eor r2,r2,r7 @ Ch(e,f,g) + eor r0,r9,r9,ror#11 + add r8,r8,r2 @ h+=Ch(e,f,g) +#if 3==31 + and r3,r3,#0xff + cmp r3,#0xf2 @ done? +#endif +#if 3<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r3,r9,r10 @ a^b, b^c in next round +#else + ldr r2,[sp,#5*4] @ from future BODY_16_xx + eor r3,r9,r10 @ a^b, b^c in next round + ldr r1,[sp,#2*4] @ from future BODY_16_xx +#endif + eor r0,r0,r9,ror#20 @ Sigma0(a) + and r12,r12,r3 @ (b^c)&=(a^b) + add r4,r4,r8 @ d+=h + eor r12,r12,r10 @ Maj(a,b,c) + add r8,r8,r0,ror#2 @ h+=Sigma0(a) + @ add r8,r8,r12 @ h+=Maj(a,b,c) +#if __ARM_ARCH__>=7 + @ ldr r2,[r1],#4 @ 4 +# if 4==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r4,r4,ror#5 + add r8,r8,r12 @ h+=Maj(a,b,c) from the past + eor r0,r0,r4,ror#19 @ Sigma1(e) + rev r2,r2 +#else + @ ldrb r2,[r1,#3] @ 4 + add r8,r8,r12 @ h+=Maj(a,b,c) from the past + ldrb r12,[r1,#2] + ldrb r0,[r1,#1] + orr r2,r2,r12,lsl#8 + ldrb r12,[r1],#4 + orr r2,r2,r0,lsl#16 +# if 4==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r4,r4,ror#5 + orr r2,r2,r12,lsl#24 + eor r0,r0,r4,ror#19 @ Sigma1(e) +#endif + ldr r12,[r14],#4 @ *K256++ + add r7,r7,r2 @ h+=X[i] + str r2,[sp,#4*4] + eor r2,r5,r6 + add r7,r7,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r4 + add r7,r7,r12 @ h+=K256[i] + eor r2,r2,r6 @ Ch(e,f,g) + eor r0,r8,r8,ror#11 + add r7,r7,r2 @ h+=Ch(e,f,g) +#if 4==31 + and r12,r12,#0xff + cmp r12,#0xf2 @ done? +#endif +#if 4<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r12,r8,r9 @ a^b, b^c in next round +#else + ldr r2,[sp,#6*4] @ from future BODY_16_xx + eor r12,r8,r9 @ a^b, b^c in next round + ldr r1,[sp,#3*4] @ from future BODY_16_xx +#endif + eor r0,r0,r8,ror#20 @ Sigma0(a) + and r3,r3,r12 @ (b^c)&=(a^b) + add r11,r11,r7 @ d+=h + eor r3,r3,r9 @ Maj(a,b,c) + add r7,r7,r0,ror#2 @ h+=Sigma0(a) + @ add r7,r7,r3 @ h+=Maj(a,b,c) +#if __ARM_ARCH__>=7 + @ ldr r2,[r1],#4 @ 5 +# if 5==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r11,r11,ror#5 + add r7,r7,r3 @ h+=Maj(a,b,c) from the past + eor r0,r0,r11,ror#19 @ Sigma1(e) + rev r2,r2 +#else + @ ldrb r2,[r1,#3] @ 5 + add r7,r7,r3 @ h+=Maj(a,b,c) from the past + ldrb r3,[r1,#2] + ldrb r0,[r1,#1] + orr r2,r2,r3,lsl#8 + ldrb r3,[r1],#4 + orr r2,r2,r0,lsl#16 +# if 5==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r11,r11,ror#5 + orr r2,r2,r3,lsl#24 + eor r0,r0,r11,ror#19 @ Sigma1(e) +#endif + ldr r3,[r14],#4 @ *K256++ + add r6,r6,r2 @ h+=X[i] + str r2,[sp,#5*4] + eor r2,r4,r5 + add r6,r6,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r11 + add r6,r6,r3 @ h+=K256[i] + eor r2,r2,r5 @ Ch(e,f,g) + eor r0,r7,r7,ror#11 + add r6,r6,r2 @ h+=Ch(e,f,g) +#if 5==31 + and r3,r3,#0xff + cmp r3,#0xf2 @ done? +#endif +#if 5<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r3,r7,r8 @ a^b, b^c in next round +#else + ldr r2,[sp,#7*4] @ from future BODY_16_xx + eor r3,r7,r8 @ a^b, b^c in next round + ldr r1,[sp,#4*4] @ from future BODY_16_xx +#endif + eor r0,r0,r7,ror#20 @ Sigma0(a) + and r12,r12,r3 @ (b^c)&=(a^b) + add r10,r10,r6 @ d+=h + eor r12,r12,r8 @ Maj(a,b,c) + add r6,r6,r0,ror#2 @ h+=Sigma0(a) + @ add r6,r6,r12 @ h+=Maj(a,b,c) +#if __ARM_ARCH__>=7 + @ ldr r2,[r1],#4 @ 6 +# if 6==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r10,r10,ror#5 + add r6,r6,r12 @ h+=Maj(a,b,c) from the past + eor r0,r0,r10,ror#19 @ Sigma1(e) + rev r2,r2 +#else + @ ldrb r2,[r1,#3] @ 6 + add r6,r6,r12 @ h+=Maj(a,b,c) from the past + ldrb r12,[r1,#2] + ldrb r0,[r1,#1] + orr r2,r2,r12,lsl#8 + ldrb r12,[r1],#4 + orr r2,r2,r0,lsl#16 +# if 6==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r10,r10,ror#5 + orr r2,r2,r12,lsl#24 + eor r0,r0,r10,ror#19 @ Sigma1(e) +#endif + ldr r12,[r14],#4 @ *K256++ + add r5,r5,r2 @ h+=X[i] + str r2,[sp,#6*4] + eor r2,r11,r4 + add r5,r5,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r10 + add r5,r5,r12 @ h+=K256[i] + eor r2,r2,r4 @ Ch(e,f,g) + eor r0,r6,r6,ror#11 + add r5,r5,r2 @ h+=Ch(e,f,g) +#if 6==31 + and r12,r12,#0xff + cmp r12,#0xf2 @ done? +#endif +#if 6<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r12,r6,r7 @ a^b, b^c in next round +#else + ldr r2,[sp,#8*4] @ from future BODY_16_xx + eor r12,r6,r7 @ a^b, b^c in next round + ldr r1,[sp,#5*4] @ from future BODY_16_xx +#endif + eor r0,r0,r6,ror#20 @ Sigma0(a) + and r3,r3,r12 @ (b^c)&=(a^b) + add r9,r9,r5 @ d+=h + eor r3,r3,r7 @ Maj(a,b,c) + add r5,r5,r0,ror#2 @ h+=Sigma0(a) + @ add r5,r5,r3 @ h+=Maj(a,b,c) +#if __ARM_ARCH__>=7 + @ ldr r2,[r1],#4 @ 7 +# if 7==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r9,r9,ror#5 + add r5,r5,r3 @ h+=Maj(a,b,c) from the past + eor r0,r0,r9,ror#19 @ Sigma1(e) + rev r2,r2 +#else + @ ldrb r2,[r1,#3] @ 7 + add r5,r5,r3 @ h+=Maj(a,b,c) from the past + ldrb r3,[r1,#2] + ldrb r0,[r1,#1] + orr r2,r2,r3,lsl#8 + ldrb r3,[r1],#4 + orr r2,r2,r0,lsl#16 +# if 7==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r9,r9,ror#5 + orr r2,r2,r3,lsl#24 + eor r0,r0,r9,ror#19 @ Sigma1(e) +#endif + ldr r3,[r14],#4 @ *K256++ + add r4,r4,r2 @ h+=X[i] + str r2,[sp,#7*4] + eor r2,r10,r11 + add r4,r4,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r9 + add r4,r4,r3 @ h+=K256[i] + eor r2,r2,r11 @ Ch(e,f,g) + eor r0,r5,r5,ror#11 + add r4,r4,r2 @ h+=Ch(e,f,g) +#if 7==31 + and r3,r3,#0xff + cmp r3,#0xf2 @ done? +#endif +#if 7<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r3,r5,r6 @ a^b, b^c in next round +#else + ldr r2,[sp,#9*4] @ from future BODY_16_xx + eor r3,r5,r6 @ a^b, b^c in next round + ldr r1,[sp,#6*4] @ from future BODY_16_xx +#endif + eor r0,r0,r5,ror#20 @ Sigma0(a) + and r12,r12,r3 @ (b^c)&=(a^b) + add r8,r8,r4 @ d+=h + eor r12,r12,r6 @ Maj(a,b,c) + add r4,r4,r0,ror#2 @ h+=Sigma0(a) + @ add r4,r4,r12 @ h+=Maj(a,b,c) +#if __ARM_ARCH__>=7 + @ ldr r2,[r1],#4 @ 8 +# if 8==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r8,r8,ror#5 + add r4,r4,r12 @ h+=Maj(a,b,c) from the past + eor r0,r0,r8,ror#19 @ Sigma1(e) + rev r2,r2 +#else + @ ldrb r2,[r1,#3] @ 8 + add r4,r4,r12 @ h+=Maj(a,b,c) from the past + ldrb r12,[r1,#2] + ldrb r0,[r1,#1] + orr r2,r2,r12,lsl#8 + ldrb r12,[r1],#4 + orr r2,r2,r0,lsl#16 +# if 8==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r8,r8,ror#5 + orr r2,r2,r12,lsl#24 + eor r0,r0,r8,ror#19 @ Sigma1(e) +#endif + ldr r12,[r14],#4 @ *K256++ + add r11,r11,r2 @ h+=X[i] + str r2,[sp,#8*4] + eor r2,r9,r10 + add r11,r11,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r8 + add r11,r11,r12 @ h+=K256[i] + eor r2,r2,r10 @ Ch(e,f,g) + eor r0,r4,r4,ror#11 + add r11,r11,r2 @ h+=Ch(e,f,g) +#if 8==31 + and r12,r12,#0xff + cmp r12,#0xf2 @ done? +#endif +#if 8<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r12,r4,r5 @ a^b, b^c in next round +#else + ldr r2,[sp,#10*4] @ from future BODY_16_xx + eor r12,r4,r5 @ a^b, b^c in next round + ldr r1,[sp,#7*4] @ from future BODY_16_xx +#endif + eor r0,r0,r4,ror#20 @ Sigma0(a) + and r3,r3,r12 @ (b^c)&=(a^b) + add r7,r7,r11 @ d+=h + eor r3,r3,r5 @ Maj(a,b,c) + add r11,r11,r0,ror#2 @ h+=Sigma0(a) + @ add r11,r11,r3 @ h+=Maj(a,b,c) +#if __ARM_ARCH__>=7 + @ ldr r2,[r1],#4 @ 9 +# if 9==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r7,r7,ror#5 + add r11,r11,r3 @ h+=Maj(a,b,c) from the past + eor r0,r0,r7,ror#19 @ Sigma1(e) + rev r2,r2 +#else + @ ldrb r2,[r1,#3] @ 9 + add r11,r11,r3 @ h+=Maj(a,b,c) from the past + ldrb r3,[r1,#2] + ldrb r0,[r1,#1] + orr r2,r2,r3,lsl#8 + ldrb r3,[r1],#4 + orr r2,r2,r0,lsl#16 +# if 9==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r7,r7,ror#5 + orr r2,r2,r3,lsl#24 + eor r0,r0,r7,ror#19 @ Sigma1(e) +#endif + ldr r3,[r14],#4 @ *K256++ + add r10,r10,r2 @ h+=X[i] + str r2,[sp,#9*4] + eor r2,r8,r9 + add r10,r10,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r7 + add r10,r10,r3 @ h+=K256[i] + eor r2,r2,r9 @ Ch(e,f,g) + eor r0,r11,r11,ror#11 + add r10,r10,r2 @ h+=Ch(e,f,g) +#if 9==31 + and r3,r3,#0xff + cmp r3,#0xf2 @ done? +#endif +#if 9<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r3,r11,r4 @ a^b, b^c in next round +#else + ldr r2,[sp,#11*4] @ from future BODY_16_xx + eor r3,r11,r4 @ a^b, b^c in next round + ldr r1,[sp,#8*4] @ from future BODY_16_xx +#endif + eor r0,r0,r11,ror#20 @ Sigma0(a) + and r12,r12,r3 @ (b^c)&=(a^b) + add r6,r6,r10 @ d+=h + eor r12,r12,r4 @ Maj(a,b,c) + add r10,r10,r0,ror#2 @ h+=Sigma0(a) + @ add r10,r10,r12 @ h+=Maj(a,b,c) +#if __ARM_ARCH__>=7 + @ ldr r2,[r1],#4 @ 10 +# if 10==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r6,r6,ror#5 + add r10,r10,r12 @ h+=Maj(a,b,c) from the past + eor r0,r0,r6,ror#19 @ Sigma1(e) + rev r2,r2 +#else + @ ldrb r2,[r1,#3] @ 10 + add r10,r10,r12 @ h+=Maj(a,b,c) from the past + ldrb r12,[r1,#2] + ldrb r0,[r1,#1] + orr r2,r2,r12,lsl#8 + ldrb r12,[r1],#4 + orr r2,r2,r0,lsl#16 +# if 10==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r6,r6,ror#5 + orr r2,r2,r12,lsl#24 + eor r0,r0,r6,ror#19 @ Sigma1(e) +#endif + ldr r12,[r14],#4 @ *K256++ + add r9,r9,r2 @ h+=X[i] + str r2,[sp,#10*4] + eor r2,r7,r8 + add r9,r9,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r6 + add r9,r9,r12 @ h+=K256[i] + eor r2,r2,r8 @ Ch(e,f,g) + eor r0,r10,r10,ror#11 + add r9,r9,r2 @ h+=Ch(e,f,g) +#if 10==31 + and r12,r12,#0xff + cmp r12,#0xf2 @ done? +#endif +#if 10<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r12,r10,r11 @ a^b, b^c in next round +#else + ldr r2,[sp,#12*4] @ from future BODY_16_xx + eor r12,r10,r11 @ a^b, b^c in next round + ldr r1,[sp,#9*4] @ from future BODY_16_xx +#endif + eor r0,r0,r10,ror#20 @ Sigma0(a) + and r3,r3,r12 @ (b^c)&=(a^b) + add r5,r5,r9 @ d+=h + eor r3,r3,r11 @ Maj(a,b,c) + add r9,r9,r0,ror#2 @ h+=Sigma0(a) + @ add r9,r9,r3 @ h+=Maj(a,b,c) +#if __ARM_ARCH__>=7 + @ ldr r2,[r1],#4 @ 11 +# if 11==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r5,r5,ror#5 + add r9,r9,r3 @ h+=Maj(a,b,c) from the past + eor r0,r0,r5,ror#19 @ Sigma1(e) + rev r2,r2 +#else + @ ldrb r2,[r1,#3] @ 11 + add r9,r9,r3 @ h+=Maj(a,b,c) from the past + ldrb r3,[r1,#2] + ldrb r0,[r1,#1] + orr r2,r2,r3,lsl#8 + ldrb r3,[r1],#4 + orr r2,r2,r0,lsl#16 +# if 11==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r5,r5,ror#5 + orr r2,r2,r3,lsl#24 + eor r0,r0,r5,ror#19 @ Sigma1(e) +#endif + ldr r3,[r14],#4 @ *K256++ + add r8,r8,r2 @ h+=X[i] + str r2,[sp,#11*4] + eor r2,r6,r7 + add r8,r8,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r5 + add r8,r8,r3 @ h+=K256[i] + eor r2,r2,r7 @ Ch(e,f,g) + eor r0,r9,r9,ror#11 + add r8,r8,r2 @ h+=Ch(e,f,g) +#if 11==31 + and r3,r3,#0xff + cmp r3,#0xf2 @ done? +#endif +#if 11<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r3,r9,r10 @ a^b, b^c in next round +#else + ldr r2,[sp,#13*4] @ from future BODY_16_xx + eor r3,r9,r10 @ a^b, b^c in next round + ldr r1,[sp,#10*4] @ from future BODY_16_xx +#endif + eor r0,r0,r9,ror#20 @ Sigma0(a) + and r12,r12,r3 @ (b^c)&=(a^b) + add r4,r4,r8 @ d+=h + eor r12,r12,r10 @ Maj(a,b,c) + add r8,r8,r0,ror#2 @ h+=Sigma0(a) + @ add r8,r8,r12 @ h+=Maj(a,b,c) +#if __ARM_ARCH__>=7 + @ ldr r2,[r1],#4 @ 12 +# if 12==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r4,r4,ror#5 + add r8,r8,r12 @ h+=Maj(a,b,c) from the past + eor r0,r0,r4,ror#19 @ Sigma1(e) + rev r2,r2 +#else + @ ldrb r2,[r1,#3] @ 12 + add r8,r8,r12 @ h+=Maj(a,b,c) from the past + ldrb r12,[r1,#2] + ldrb r0,[r1,#1] + orr r2,r2,r12,lsl#8 + ldrb r12,[r1],#4 + orr r2,r2,r0,lsl#16 +# if 12==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r4,r4,ror#5 + orr r2,r2,r12,lsl#24 + eor r0,r0,r4,ror#19 @ Sigma1(e) +#endif + ldr r12,[r14],#4 @ *K256++ + add r7,r7,r2 @ h+=X[i] + str r2,[sp,#12*4] + eor r2,r5,r6 + add r7,r7,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r4 + add r7,r7,r12 @ h+=K256[i] + eor r2,r2,r6 @ Ch(e,f,g) + eor r0,r8,r8,ror#11 + add r7,r7,r2 @ h+=Ch(e,f,g) +#if 12==31 + and r12,r12,#0xff + cmp r12,#0xf2 @ done? +#endif +#if 12<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r12,r8,r9 @ a^b, b^c in next round +#else + ldr r2,[sp,#14*4] @ from future BODY_16_xx + eor r12,r8,r9 @ a^b, b^c in next round + ldr r1,[sp,#11*4] @ from future BODY_16_xx +#endif + eor r0,r0,r8,ror#20 @ Sigma0(a) + and r3,r3,r12 @ (b^c)&=(a^b) + add r11,r11,r7 @ d+=h + eor r3,r3,r9 @ Maj(a,b,c) + add r7,r7,r0,ror#2 @ h+=Sigma0(a) + @ add r7,r7,r3 @ h+=Maj(a,b,c) +#if __ARM_ARCH__>=7 + @ ldr r2,[r1],#4 @ 13 +# if 13==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r11,r11,ror#5 + add r7,r7,r3 @ h+=Maj(a,b,c) from the past + eor r0,r0,r11,ror#19 @ Sigma1(e) + rev r2,r2 +#else + @ ldrb r2,[r1,#3] @ 13 + add r7,r7,r3 @ h+=Maj(a,b,c) from the past + ldrb r3,[r1,#2] + ldrb r0,[r1,#1] + orr r2,r2,r3,lsl#8 + ldrb r3,[r1],#4 + orr r2,r2,r0,lsl#16 +# if 13==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r11,r11,ror#5 + orr r2,r2,r3,lsl#24 + eor r0,r0,r11,ror#19 @ Sigma1(e) +#endif + ldr r3,[r14],#4 @ *K256++ + add r6,r6,r2 @ h+=X[i] + str r2,[sp,#13*4] + eor r2,r4,r5 + add r6,r6,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r11 + add r6,r6,r3 @ h+=K256[i] + eor r2,r2,r5 @ Ch(e,f,g) + eor r0,r7,r7,ror#11 + add r6,r6,r2 @ h+=Ch(e,f,g) +#if 13==31 + and r3,r3,#0xff + cmp r3,#0xf2 @ done? +#endif +#if 13<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r3,r7,r8 @ a^b, b^c in next round +#else + ldr r2,[sp,#15*4] @ from future BODY_16_xx + eor r3,r7,r8 @ a^b, b^c in next round + ldr r1,[sp,#12*4] @ from future BODY_16_xx +#endif + eor r0,r0,r7,ror#20 @ Sigma0(a) + and r12,r12,r3 @ (b^c)&=(a^b) + add r10,r10,r6 @ d+=h + eor r12,r12,r8 @ Maj(a,b,c) + add r6,r6,r0,ror#2 @ h+=Sigma0(a) + @ add r6,r6,r12 @ h+=Maj(a,b,c) +#if __ARM_ARCH__>=7 + @ ldr r2,[r1],#4 @ 14 +# if 14==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r10,r10,ror#5 + add r6,r6,r12 @ h+=Maj(a,b,c) from the past + eor r0,r0,r10,ror#19 @ Sigma1(e) + rev r2,r2 +#else + @ ldrb r2,[r1,#3] @ 14 + add r6,r6,r12 @ h+=Maj(a,b,c) from the past + ldrb r12,[r1,#2] + ldrb r0,[r1,#1] + orr r2,r2,r12,lsl#8 + ldrb r12,[r1],#4 + orr r2,r2,r0,lsl#16 +# if 14==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r10,r10,ror#5 + orr r2,r2,r12,lsl#24 + eor r0,r0,r10,ror#19 @ Sigma1(e) +#endif + ldr r12,[r14],#4 @ *K256++ + add r5,r5,r2 @ h+=X[i] + str r2,[sp,#14*4] + eor r2,r11,r4 + add r5,r5,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r10 + add r5,r5,r12 @ h+=K256[i] + eor r2,r2,r4 @ Ch(e,f,g) + eor r0,r6,r6,ror#11 + add r5,r5,r2 @ h+=Ch(e,f,g) +#if 14==31 + and r12,r12,#0xff + cmp r12,#0xf2 @ done? +#endif +#if 14<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r12,r6,r7 @ a^b, b^c in next round +#else + ldr r2,[sp,#0*4] @ from future BODY_16_xx + eor r12,r6,r7 @ a^b, b^c in next round + ldr r1,[sp,#13*4] @ from future BODY_16_xx +#endif + eor r0,r0,r6,ror#20 @ Sigma0(a) + and r3,r3,r12 @ (b^c)&=(a^b) + add r9,r9,r5 @ d+=h + eor r3,r3,r7 @ Maj(a,b,c) + add r5,r5,r0,ror#2 @ h+=Sigma0(a) + @ add r5,r5,r3 @ h+=Maj(a,b,c) +#if __ARM_ARCH__>=7 + @ ldr r2,[r1],#4 @ 15 +# if 15==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r9,r9,ror#5 + add r5,r5,r3 @ h+=Maj(a,b,c) from the past + eor r0,r0,r9,ror#19 @ Sigma1(e) + rev r2,r2 +#else + @ ldrb r2,[r1,#3] @ 15 + add r5,r5,r3 @ h+=Maj(a,b,c) from the past + ldrb r3,[r1,#2] + ldrb r0,[r1,#1] + orr r2,r2,r3,lsl#8 + ldrb r3,[r1],#4 + orr r2,r2,r0,lsl#16 +# if 15==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r9,r9,ror#5 + orr r2,r2,r3,lsl#24 + eor r0,r0,r9,ror#19 @ Sigma1(e) +#endif + ldr r3,[r14],#4 @ *K256++ + add r4,r4,r2 @ h+=X[i] + str r2,[sp,#15*4] + eor r2,r10,r11 + add r4,r4,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r9 + add r4,r4,r3 @ h+=K256[i] + eor r2,r2,r11 @ Ch(e,f,g) + eor r0,r5,r5,ror#11 + add r4,r4,r2 @ h+=Ch(e,f,g) +#if 15==31 + and r3,r3,#0xff + cmp r3,#0xf2 @ done? +#endif +#if 15<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r3,r5,r6 @ a^b, b^c in next round +#else + ldr r2,[sp,#1*4] @ from future BODY_16_xx + eor r3,r5,r6 @ a^b, b^c in next round + ldr r1,[sp,#14*4] @ from future BODY_16_xx +#endif + eor r0,r0,r5,ror#20 @ Sigma0(a) + and r12,r12,r3 @ (b^c)&=(a^b) + add r8,r8,r4 @ d+=h + eor r12,r12,r6 @ Maj(a,b,c) + add r4,r4,r0,ror#2 @ h+=Sigma0(a) + @ add r4,r4,r12 @ h+=Maj(a,b,c) +.Lrounds_16_xx: + @ ldr r2,[sp,#1*4] @ 16 + @ ldr r1,[sp,#14*4] + mov r0,r2,ror#7 + add r4,r4,r12 @ h+=Maj(a,b,c) from the past + mov r12,r1,ror#17 + eor r0,r0,r2,ror#18 + eor r12,r12,r1,ror#19 + eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) + ldr r2,[sp,#0*4] + eor r12,r12,r1,lsr#10 @ sigma1(X[i+14]) + ldr r1,[sp,#9*4] + + add r12,r12,r0 + eor r0,r8,r8,ror#5 @ from BODY_00_15 + add r2,r2,r12 + eor r0,r0,r8,ror#19 @ Sigma1(e) + add r2,r2,r1 @ X[i] + ldr r12,[r14],#4 @ *K256++ + add r11,r11,r2 @ h+=X[i] + str r2,[sp,#0*4] + eor r2,r9,r10 + add r11,r11,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r8 + add r11,r11,r12 @ h+=K256[i] + eor r2,r2,r10 @ Ch(e,f,g) + eor r0,r4,r4,ror#11 + add r11,r11,r2 @ h+=Ch(e,f,g) +#if 16==31 + and r12,r12,#0xff + cmp r12,#0xf2 @ done? +#endif +#if 16<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r12,r4,r5 @ a^b, b^c in next round +#else + ldr r2,[sp,#2*4] @ from future BODY_16_xx + eor r12,r4,r5 @ a^b, b^c in next round + ldr r1,[sp,#15*4] @ from future BODY_16_xx +#endif + eor r0,r0,r4,ror#20 @ Sigma0(a) + and r3,r3,r12 @ (b^c)&=(a^b) + add r7,r7,r11 @ d+=h + eor r3,r3,r5 @ Maj(a,b,c) + add r11,r11,r0,ror#2 @ h+=Sigma0(a) + @ add r11,r11,r3 @ h+=Maj(a,b,c) + @ ldr r2,[sp,#2*4] @ 17 + @ ldr r1,[sp,#15*4] + mov r0,r2,ror#7 + add r11,r11,r3 @ h+=Maj(a,b,c) from the past + mov r3,r1,ror#17 + eor r0,r0,r2,ror#18 + eor r3,r3,r1,ror#19 + eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) + ldr r2,[sp,#1*4] + eor r3,r3,r1,lsr#10 @ sigma1(X[i+14]) + ldr r1,[sp,#10*4] + + add r3,r3,r0 + eor r0,r7,r7,ror#5 @ from BODY_00_15 + add r2,r2,r3 + eor r0,r0,r7,ror#19 @ Sigma1(e) + add r2,r2,r1 @ X[i] + ldr r3,[r14],#4 @ *K256++ + add r10,r10,r2 @ h+=X[i] + str r2,[sp,#1*4] + eor r2,r8,r9 + add r10,r10,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r7 + add r10,r10,r3 @ h+=K256[i] + eor r2,r2,r9 @ Ch(e,f,g) + eor r0,r11,r11,ror#11 + add r10,r10,r2 @ h+=Ch(e,f,g) +#if 17==31 + and r3,r3,#0xff + cmp r3,#0xf2 @ done? +#endif +#if 17<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r3,r11,r4 @ a^b, b^c in next round +#else + ldr r2,[sp,#3*4] @ from future BODY_16_xx + eor r3,r11,r4 @ a^b, b^c in next round + ldr r1,[sp,#0*4] @ from future BODY_16_xx +#endif + eor r0,r0,r11,ror#20 @ Sigma0(a) + and r12,r12,r3 @ (b^c)&=(a^b) + add r6,r6,r10 @ d+=h + eor r12,r12,r4 @ Maj(a,b,c) + add r10,r10,r0,ror#2 @ h+=Sigma0(a) + @ add r10,r10,r12 @ h+=Maj(a,b,c) + @ ldr r2,[sp,#3*4] @ 18 + @ ldr r1,[sp,#0*4] + mov r0,r2,ror#7 + add r10,r10,r12 @ h+=Maj(a,b,c) from the past + mov r12,r1,ror#17 + eor r0,r0,r2,ror#18 + eor r12,r12,r1,ror#19 + eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) + ldr r2,[sp,#2*4] + eor r12,r12,r1,lsr#10 @ sigma1(X[i+14]) + ldr r1,[sp,#11*4] + + add r12,r12,r0 + eor r0,r6,r6,ror#5 @ from BODY_00_15 + add r2,r2,r12 + eor r0,r0,r6,ror#19 @ Sigma1(e) + add r2,r2,r1 @ X[i] + ldr r12,[r14],#4 @ *K256++ + add r9,r9,r2 @ h+=X[i] + str r2,[sp,#2*4] + eor r2,r7,r8 + add r9,r9,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r6 + add r9,r9,r12 @ h+=K256[i] + eor r2,r2,r8 @ Ch(e,f,g) + eor r0,r10,r10,ror#11 + add r9,r9,r2 @ h+=Ch(e,f,g) +#if 18==31 + and r12,r12,#0xff + cmp r12,#0xf2 @ done? +#endif +#if 18<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r12,r10,r11 @ a^b, b^c in next round +#else + ldr r2,[sp,#4*4] @ from future BODY_16_xx + eor r12,r10,r11 @ a^b, b^c in next round + ldr r1,[sp,#1*4] @ from future BODY_16_xx +#endif + eor r0,r0,r10,ror#20 @ Sigma0(a) + and r3,r3,r12 @ (b^c)&=(a^b) + add r5,r5,r9 @ d+=h + eor r3,r3,r11 @ Maj(a,b,c) + add r9,r9,r0,ror#2 @ h+=Sigma0(a) + @ add r9,r9,r3 @ h+=Maj(a,b,c) + @ ldr r2,[sp,#4*4] @ 19 + @ ldr r1,[sp,#1*4] + mov r0,r2,ror#7 + add r9,r9,r3 @ h+=Maj(a,b,c) from the past + mov r3,r1,ror#17 + eor r0,r0,r2,ror#18 + eor r3,r3,r1,ror#19 + eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) + ldr r2,[sp,#3*4] + eor r3,r3,r1,lsr#10 @ sigma1(X[i+14]) + ldr r1,[sp,#12*4] + + add r3,r3,r0 + eor r0,r5,r5,ror#5 @ from BODY_00_15 + add r2,r2,r3 + eor r0,r0,r5,ror#19 @ Sigma1(e) + add r2,r2,r1 @ X[i] + ldr r3,[r14],#4 @ *K256++ + add r8,r8,r2 @ h+=X[i] + str r2,[sp,#3*4] + eor r2,r6,r7 + add r8,r8,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r5 + add r8,r8,r3 @ h+=K256[i] + eor r2,r2,r7 @ Ch(e,f,g) + eor r0,r9,r9,ror#11 + add r8,r8,r2 @ h+=Ch(e,f,g) +#if 19==31 + and r3,r3,#0xff + cmp r3,#0xf2 @ done? +#endif +#if 19<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r3,r9,r10 @ a^b, b^c in next round +#else + ldr r2,[sp,#5*4] @ from future BODY_16_xx + eor r3,r9,r10 @ a^b, b^c in next round + ldr r1,[sp,#2*4] @ from future BODY_16_xx +#endif + eor r0,r0,r9,ror#20 @ Sigma0(a) + and r12,r12,r3 @ (b^c)&=(a^b) + add r4,r4,r8 @ d+=h + eor r12,r12,r10 @ Maj(a,b,c) + add r8,r8,r0,ror#2 @ h+=Sigma0(a) + @ add r8,r8,r12 @ h+=Maj(a,b,c) + @ ldr r2,[sp,#5*4] @ 20 + @ ldr r1,[sp,#2*4] + mov r0,r2,ror#7 + add r8,r8,r12 @ h+=Maj(a,b,c) from the past + mov r12,r1,ror#17 + eor r0,r0,r2,ror#18 + eor r12,r12,r1,ror#19 + eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) + ldr r2,[sp,#4*4] + eor r12,r12,r1,lsr#10 @ sigma1(X[i+14]) + ldr r1,[sp,#13*4] + + add r12,r12,r0 + eor r0,r4,r4,ror#5 @ from BODY_00_15 + add r2,r2,r12 + eor r0,r0,r4,ror#19 @ Sigma1(e) + add r2,r2,r1 @ X[i] + ldr r12,[r14],#4 @ *K256++ + add r7,r7,r2 @ h+=X[i] + str r2,[sp,#4*4] + eor r2,r5,r6 + add r7,r7,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r4 + add r7,r7,r12 @ h+=K256[i] + eor r2,r2,r6 @ Ch(e,f,g) + eor r0,r8,r8,ror#11 + add r7,r7,r2 @ h+=Ch(e,f,g) +#if 20==31 + and r12,r12,#0xff + cmp r12,#0xf2 @ done? +#endif +#if 20<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r12,r8,r9 @ a^b, b^c in next round +#else + ldr r2,[sp,#6*4] @ from future BODY_16_xx + eor r12,r8,r9 @ a^b, b^c in next round + ldr r1,[sp,#3*4] @ from future BODY_16_xx +#endif + eor r0,r0,r8,ror#20 @ Sigma0(a) + and r3,r3,r12 @ (b^c)&=(a^b) + add r11,r11,r7 @ d+=h + eor r3,r3,r9 @ Maj(a,b,c) + add r7,r7,r0,ror#2 @ h+=Sigma0(a) + @ add r7,r7,r3 @ h+=Maj(a,b,c) + @ ldr r2,[sp,#6*4] @ 21 + @ ldr r1,[sp,#3*4] + mov r0,r2,ror#7 + add r7,r7,r3 @ h+=Maj(a,b,c) from the past + mov r3,r1,ror#17 + eor r0,r0,r2,ror#18 + eor r3,r3,r1,ror#19 + eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) + ldr r2,[sp,#5*4] + eor r3,r3,r1,lsr#10 @ sigma1(X[i+14]) + ldr r1,[sp,#14*4] + + add r3,r3,r0 + eor r0,r11,r11,ror#5 @ from BODY_00_15 + add r2,r2,r3 + eor r0,r0,r11,ror#19 @ Sigma1(e) + add r2,r2,r1 @ X[i] + ldr r3,[r14],#4 @ *K256++ + add r6,r6,r2 @ h+=X[i] + str r2,[sp,#5*4] + eor r2,r4,r5 + add r6,r6,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r11 + add r6,r6,r3 @ h+=K256[i] + eor r2,r2,r5 @ Ch(e,f,g) + eor r0,r7,r7,ror#11 + add r6,r6,r2 @ h+=Ch(e,f,g) +#if 21==31 + and r3,r3,#0xff + cmp r3,#0xf2 @ done? +#endif +#if 21<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r3,r7,r8 @ a^b, b^c in next round +#else + ldr r2,[sp,#7*4] @ from future BODY_16_xx + eor r3,r7,r8 @ a^b, b^c in next round + ldr r1,[sp,#4*4] @ from future BODY_16_xx +#endif + eor r0,r0,r7,ror#20 @ Sigma0(a) + and r12,r12,r3 @ (b^c)&=(a^b) + add r10,r10,r6 @ d+=h + eor r12,r12,r8 @ Maj(a,b,c) + add r6,r6,r0,ror#2 @ h+=Sigma0(a) + @ add r6,r6,r12 @ h+=Maj(a,b,c) + @ ldr r2,[sp,#7*4] @ 22 + @ ldr r1,[sp,#4*4] + mov r0,r2,ror#7 + add r6,r6,r12 @ h+=Maj(a,b,c) from the past + mov r12,r1,ror#17 + eor r0,r0,r2,ror#18 + eor r12,r12,r1,ror#19 + eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) + ldr r2,[sp,#6*4] + eor r12,r12,r1,lsr#10 @ sigma1(X[i+14]) + ldr r1,[sp,#15*4] + + add r12,r12,r0 + eor r0,r10,r10,ror#5 @ from BODY_00_15 + add r2,r2,r12 + eor r0,r0,r10,ror#19 @ Sigma1(e) + add r2,r2,r1 @ X[i] + ldr r12,[r14],#4 @ *K256++ + add r5,r5,r2 @ h+=X[i] + str r2,[sp,#6*4] + eor r2,r11,r4 + add r5,r5,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r10 + add r5,r5,r12 @ h+=K256[i] + eor r2,r2,r4 @ Ch(e,f,g) + eor r0,r6,r6,ror#11 + add r5,r5,r2 @ h+=Ch(e,f,g) +#if 22==31 + and r12,r12,#0xff + cmp r12,#0xf2 @ done? +#endif +#if 22<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r12,r6,r7 @ a^b, b^c in next round +#else + ldr r2,[sp,#8*4] @ from future BODY_16_xx + eor r12,r6,r7 @ a^b, b^c in next round + ldr r1,[sp,#5*4] @ from future BODY_16_xx +#endif + eor r0,r0,r6,ror#20 @ Sigma0(a) + and r3,r3,r12 @ (b^c)&=(a^b) + add r9,r9,r5 @ d+=h + eor r3,r3,r7 @ Maj(a,b,c) + add r5,r5,r0,ror#2 @ h+=Sigma0(a) + @ add r5,r5,r3 @ h+=Maj(a,b,c) + @ ldr r2,[sp,#8*4] @ 23 + @ ldr r1,[sp,#5*4] + mov r0,r2,ror#7 + add r5,r5,r3 @ h+=Maj(a,b,c) from the past + mov r3,r1,ror#17 + eor r0,r0,r2,ror#18 + eor r3,r3,r1,ror#19 + eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) + ldr r2,[sp,#7*4] + eor r3,r3,r1,lsr#10 @ sigma1(X[i+14]) + ldr r1,[sp,#0*4] + + add r3,r3,r0 + eor r0,r9,r9,ror#5 @ from BODY_00_15 + add r2,r2,r3 + eor r0,r0,r9,ror#19 @ Sigma1(e) + add r2,r2,r1 @ X[i] + ldr r3,[r14],#4 @ *K256++ + add r4,r4,r2 @ h+=X[i] + str r2,[sp,#7*4] + eor r2,r10,r11 + add r4,r4,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r9 + add r4,r4,r3 @ h+=K256[i] + eor r2,r2,r11 @ Ch(e,f,g) + eor r0,r5,r5,ror#11 + add r4,r4,r2 @ h+=Ch(e,f,g) +#if 23==31 + and r3,r3,#0xff + cmp r3,#0xf2 @ done? +#endif +#if 23<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r3,r5,r6 @ a^b, b^c in next round +#else + ldr r2,[sp,#9*4] @ from future BODY_16_xx + eor r3,r5,r6 @ a^b, b^c in next round + ldr r1,[sp,#6*4] @ from future BODY_16_xx +#endif + eor r0,r0,r5,ror#20 @ Sigma0(a) + and r12,r12,r3 @ (b^c)&=(a^b) + add r8,r8,r4 @ d+=h + eor r12,r12,r6 @ Maj(a,b,c) + add r4,r4,r0,ror#2 @ h+=Sigma0(a) + @ add r4,r4,r12 @ h+=Maj(a,b,c) + @ ldr r2,[sp,#9*4] @ 24 + @ ldr r1,[sp,#6*4] + mov r0,r2,ror#7 + add r4,r4,r12 @ h+=Maj(a,b,c) from the past + mov r12,r1,ror#17 + eor r0,r0,r2,ror#18 + eor r12,r12,r1,ror#19 + eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) + ldr r2,[sp,#8*4] + eor r12,r12,r1,lsr#10 @ sigma1(X[i+14]) + ldr r1,[sp,#1*4] + + add r12,r12,r0 + eor r0,r8,r8,ror#5 @ from BODY_00_15 + add r2,r2,r12 + eor r0,r0,r8,ror#19 @ Sigma1(e) + add r2,r2,r1 @ X[i] + ldr r12,[r14],#4 @ *K256++ + add r11,r11,r2 @ h+=X[i] + str r2,[sp,#8*4] + eor r2,r9,r10 + add r11,r11,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r8 + add r11,r11,r12 @ h+=K256[i] + eor r2,r2,r10 @ Ch(e,f,g) + eor r0,r4,r4,ror#11 + add r11,r11,r2 @ h+=Ch(e,f,g) +#if 24==31 + and r12,r12,#0xff + cmp r12,#0xf2 @ done? +#endif +#if 24<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r12,r4,r5 @ a^b, b^c in next round +#else + ldr r2,[sp,#10*4] @ from future BODY_16_xx + eor r12,r4,r5 @ a^b, b^c in next round + ldr r1,[sp,#7*4] @ from future BODY_16_xx +#endif + eor r0,r0,r4,ror#20 @ Sigma0(a) + and r3,r3,r12 @ (b^c)&=(a^b) + add r7,r7,r11 @ d+=h + eor r3,r3,r5 @ Maj(a,b,c) + add r11,r11,r0,ror#2 @ h+=Sigma0(a) + @ add r11,r11,r3 @ h+=Maj(a,b,c) + @ ldr r2,[sp,#10*4] @ 25 + @ ldr r1,[sp,#7*4] + mov r0,r2,ror#7 + add r11,r11,r3 @ h+=Maj(a,b,c) from the past + mov r3,r1,ror#17 + eor r0,r0,r2,ror#18 + eor r3,r3,r1,ror#19 + eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) + ldr r2,[sp,#9*4] + eor r3,r3,r1,lsr#10 @ sigma1(X[i+14]) + ldr r1,[sp,#2*4] + + add r3,r3,r0 + eor r0,r7,r7,ror#5 @ from BODY_00_15 + add r2,r2,r3 + eor r0,r0,r7,ror#19 @ Sigma1(e) + add r2,r2,r1 @ X[i] + ldr r3,[r14],#4 @ *K256++ + add r10,r10,r2 @ h+=X[i] + str r2,[sp,#9*4] + eor r2,r8,r9 + add r10,r10,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r7 + add r10,r10,r3 @ h+=K256[i] + eor r2,r2,r9 @ Ch(e,f,g) + eor r0,r11,r11,ror#11 + add r10,r10,r2 @ h+=Ch(e,f,g) +#if 25==31 + and r3,r3,#0xff + cmp r3,#0xf2 @ done? +#endif +#if 25<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r3,r11,r4 @ a^b, b^c in next round +#else + ldr r2,[sp,#11*4] @ from future BODY_16_xx + eor r3,r11,r4 @ a^b, b^c in next round + ldr r1,[sp,#8*4] @ from future BODY_16_xx +#endif + eor r0,r0,r11,ror#20 @ Sigma0(a) + and r12,r12,r3 @ (b^c)&=(a^b) + add r6,r6,r10 @ d+=h + eor r12,r12,r4 @ Maj(a,b,c) + add r10,r10,r0,ror#2 @ h+=Sigma0(a) + @ add r10,r10,r12 @ h+=Maj(a,b,c) + @ ldr r2,[sp,#11*4] @ 26 + @ ldr r1,[sp,#8*4] + mov r0,r2,ror#7 + add r10,r10,r12 @ h+=Maj(a,b,c) from the past + mov r12,r1,ror#17 + eor r0,r0,r2,ror#18 + eor r12,r12,r1,ror#19 + eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) + ldr r2,[sp,#10*4] + eor r12,r12,r1,lsr#10 @ sigma1(X[i+14]) + ldr r1,[sp,#3*4] + + add r12,r12,r0 + eor r0,r6,r6,ror#5 @ from BODY_00_15 + add r2,r2,r12 + eor r0,r0,r6,ror#19 @ Sigma1(e) + add r2,r2,r1 @ X[i] + ldr r12,[r14],#4 @ *K256++ + add r9,r9,r2 @ h+=X[i] + str r2,[sp,#10*4] + eor r2,r7,r8 + add r9,r9,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r6 + add r9,r9,r12 @ h+=K256[i] + eor r2,r2,r8 @ Ch(e,f,g) + eor r0,r10,r10,ror#11 + add r9,r9,r2 @ h+=Ch(e,f,g) +#if 26==31 + and r12,r12,#0xff + cmp r12,#0xf2 @ done? +#endif +#if 26<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r12,r10,r11 @ a^b, b^c in next round +#else + ldr r2,[sp,#12*4] @ from future BODY_16_xx + eor r12,r10,r11 @ a^b, b^c in next round + ldr r1,[sp,#9*4] @ from future BODY_16_xx +#endif + eor r0,r0,r10,ror#20 @ Sigma0(a) + and r3,r3,r12 @ (b^c)&=(a^b) + add r5,r5,r9 @ d+=h + eor r3,r3,r11 @ Maj(a,b,c) + add r9,r9,r0,ror#2 @ h+=Sigma0(a) + @ add r9,r9,r3 @ h+=Maj(a,b,c) + @ ldr r2,[sp,#12*4] @ 27 + @ ldr r1,[sp,#9*4] + mov r0,r2,ror#7 + add r9,r9,r3 @ h+=Maj(a,b,c) from the past + mov r3,r1,ror#17 + eor r0,r0,r2,ror#18 + eor r3,r3,r1,ror#19 + eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) + ldr r2,[sp,#11*4] + eor r3,r3,r1,lsr#10 @ sigma1(X[i+14]) + ldr r1,[sp,#4*4] + + add r3,r3,r0 + eor r0,r5,r5,ror#5 @ from BODY_00_15 + add r2,r2,r3 + eor r0,r0,r5,ror#19 @ Sigma1(e) + add r2,r2,r1 @ X[i] + ldr r3,[r14],#4 @ *K256++ + add r8,r8,r2 @ h+=X[i] + str r2,[sp,#11*4] + eor r2,r6,r7 + add r8,r8,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r5 + add r8,r8,r3 @ h+=K256[i] + eor r2,r2,r7 @ Ch(e,f,g) + eor r0,r9,r9,ror#11 + add r8,r8,r2 @ h+=Ch(e,f,g) +#if 27==31 + and r3,r3,#0xff + cmp r3,#0xf2 @ done? +#endif +#if 27<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r3,r9,r10 @ a^b, b^c in next round +#else + ldr r2,[sp,#13*4] @ from future BODY_16_xx + eor r3,r9,r10 @ a^b, b^c in next round + ldr r1,[sp,#10*4] @ from future BODY_16_xx +#endif + eor r0,r0,r9,ror#20 @ Sigma0(a) + and r12,r12,r3 @ (b^c)&=(a^b) + add r4,r4,r8 @ d+=h + eor r12,r12,r10 @ Maj(a,b,c) + add r8,r8,r0,ror#2 @ h+=Sigma0(a) + @ add r8,r8,r12 @ h+=Maj(a,b,c) + @ ldr r2,[sp,#13*4] @ 28 + @ ldr r1,[sp,#10*4] + mov r0,r2,ror#7 + add r8,r8,r12 @ h+=Maj(a,b,c) from the past + mov r12,r1,ror#17 + eor r0,r0,r2,ror#18 + eor r12,r12,r1,ror#19 + eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) + ldr r2,[sp,#12*4] + eor r12,r12,r1,lsr#10 @ sigma1(X[i+14]) + ldr r1,[sp,#5*4] + + add r12,r12,r0 + eor r0,r4,r4,ror#5 @ from BODY_00_15 + add r2,r2,r12 + eor r0,r0,r4,ror#19 @ Sigma1(e) + add r2,r2,r1 @ X[i] + ldr r12,[r14],#4 @ *K256++ + add r7,r7,r2 @ h+=X[i] + str r2,[sp,#12*4] + eor r2,r5,r6 + add r7,r7,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r4 + add r7,r7,r12 @ h+=K256[i] + eor r2,r2,r6 @ Ch(e,f,g) + eor r0,r8,r8,ror#11 + add r7,r7,r2 @ h+=Ch(e,f,g) +#if 28==31 + and r12,r12,#0xff + cmp r12,#0xf2 @ done? +#endif +#if 28<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r12,r8,r9 @ a^b, b^c in next round +#else + ldr r2,[sp,#14*4] @ from future BODY_16_xx + eor r12,r8,r9 @ a^b, b^c in next round + ldr r1,[sp,#11*4] @ from future BODY_16_xx +#endif + eor r0,r0,r8,ror#20 @ Sigma0(a) + and r3,r3,r12 @ (b^c)&=(a^b) + add r11,r11,r7 @ d+=h + eor r3,r3,r9 @ Maj(a,b,c) + add r7,r7,r0,ror#2 @ h+=Sigma0(a) + @ add r7,r7,r3 @ h+=Maj(a,b,c) + @ ldr r2,[sp,#14*4] @ 29 + @ ldr r1,[sp,#11*4] + mov r0,r2,ror#7 + add r7,r7,r3 @ h+=Maj(a,b,c) from the past + mov r3,r1,ror#17 + eor r0,r0,r2,ror#18 + eor r3,r3,r1,ror#19 + eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) + ldr r2,[sp,#13*4] + eor r3,r3,r1,lsr#10 @ sigma1(X[i+14]) + ldr r1,[sp,#6*4] + + add r3,r3,r0 + eor r0,r11,r11,ror#5 @ from BODY_00_15 + add r2,r2,r3 + eor r0,r0,r11,ror#19 @ Sigma1(e) + add r2,r2,r1 @ X[i] + ldr r3,[r14],#4 @ *K256++ + add r6,r6,r2 @ h+=X[i] + str r2,[sp,#13*4] + eor r2,r4,r5 + add r6,r6,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r11 + add r6,r6,r3 @ h+=K256[i] + eor r2,r2,r5 @ Ch(e,f,g) + eor r0,r7,r7,ror#11 + add r6,r6,r2 @ h+=Ch(e,f,g) +#if 29==31 + and r3,r3,#0xff + cmp r3,#0xf2 @ done? +#endif +#if 29<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r3,r7,r8 @ a^b, b^c in next round +#else + ldr r2,[sp,#15*4] @ from future BODY_16_xx + eor r3,r7,r8 @ a^b, b^c in next round + ldr r1,[sp,#12*4] @ from future BODY_16_xx +#endif + eor r0,r0,r7,ror#20 @ Sigma0(a) + and r12,r12,r3 @ (b^c)&=(a^b) + add r10,r10,r6 @ d+=h + eor r12,r12,r8 @ Maj(a,b,c) + add r6,r6,r0,ror#2 @ h+=Sigma0(a) + @ add r6,r6,r12 @ h+=Maj(a,b,c) + @ ldr r2,[sp,#15*4] @ 30 + @ ldr r1,[sp,#12*4] + mov r0,r2,ror#7 + add r6,r6,r12 @ h+=Maj(a,b,c) from the past + mov r12,r1,ror#17 + eor r0,r0,r2,ror#18 + eor r12,r12,r1,ror#19 + eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) + ldr r2,[sp,#14*4] + eor r12,r12,r1,lsr#10 @ sigma1(X[i+14]) + ldr r1,[sp,#7*4] + + add r12,r12,r0 + eor r0,r10,r10,ror#5 @ from BODY_00_15 + add r2,r2,r12 + eor r0,r0,r10,ror#19 @ Sigma1(e) + add r2,r2,r1 @ X[i] + ldr r12,[r14],#4 @ *K256++ + add r5,r5,r2 @ h+=X[i] + str r2,[sp,#14*4] + eor r2,r11,r4 + add r5,r5,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r10 + add r5,r5,r12 @ h+=K256[i] + eor r2,r2,r4 @ Ch(e,f,g) + eor r0,r6,r6,ror#11 + add r5,r5,r2 @ h+=Ch(e,f,g) +#if 30==31 + and r12,r12,#0xff + cmp r12,#0xf2 @ done? +#endif +#if 30<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r12,r6,r7 @ a^b, b^c in next round +#else + ldr r2,[sp,#0*4] @ from future BODY_16_xx + eor r12,r6,r7 @ a^b, b^c in next round + ldr r1,[sp,#13*4] @ from future BODY_16_xx +#endif + eor r0,r0,r6,ror#20 @ Sigma0(a) + and r3,r3,r12 @ (b^c)&=(a^b) + add r9,r9,r5 @ d+=h + eor r3,r3,r7 @ Maj(a,b,c) + add r5,r5,r0,ror#2 @ h+=Sigma0(a) + @ add r5,r5,r3 @ h+=Maj(a,b,c) + @ ldr r2,[sp,#0*4] @ 31 + @ ldr r1,[sp,#13*4] + mov r0,r2,ror#7 + add r5,r5,r3 @ h+=Maj(a,b,c) from the past + mov r3,r1,ror#17 + eor r0,r0,r2,ror#18 + eor r3,r3,r1,ror#19 + eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) + ldr r2,[sp,#15*4] + eor r3,r3,r1,lsr#10 @ sigma1(X[i+14]) + ldr r1,[sp,#8*4] + + add r3,r3,r0 + eor r0,r9,r9,ror#5 @ from BODY_00_15 + add r2,r2,r3 + eor r0,r0,r9,ror#19 @ Sigma1(e) + add r2,r2,r1 @ X[i] + ldr r3,[r14],#4 @ *K256++ + add r4,r4,r2 @ h+=X[i] + str r2,[sp,#15*4] + eor r2,r10,r11 + add r4,r4,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r9 + add r4,r4,r3 @ h+=K256[i] + eor r2,r2,r11 @ Ch(e,f,g) + eor r0,r5,r5,ror#11 + add r4,r4,r2 @ h+=Ch(e,f,g) +#if 31==31 + and r3,r3,#0xff + cmp r3,#0xf2 @ done? +#endif +#if 31<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r3,r5,r6 @ a^b, b^c in next round +#else + ldr r2,[sp,#1*4] @ from future BODY_16_xx + eor r3,r5,r6 @ a^b, b^c in next round + ldr r1,[sp,#14*4] @ from future BODY_16_xx +#endif + eor r0,r0,r5,ror#20 @ Sigma0(a) + and r12,r12,r3 @ (b^c)&=(a^b) + add r8,r8,r4 @ d+=h + eor r12,r12,r6 @ Maj(a,b,c) + add r4,r4,r0,ror#2 @ h+=Sigma0(a) + @ add r4,r4,r12 @ h+=Maj(a,b,c) + ldreq r3,[sp,#16*4] @ pull ctx + bne .Lrounds_16_xx + + add r4,r4,r12 @ h+=Maj(a,b,c) from the past + ldr r0,[r3,#0] + ldr r2,[r3,#4] + ldr r12,[r3,#8] + add r4,r4,r0 + ldr r0,[r3,#12] + add r5,r5,r2 + ldr r2,[r3,#16] + add r6,r6,r12 + ldr r12,[r3,#20] + add r7,r7,r0 + ldr r0,[r3,#24] + add r8,r8,r2 + ldr r2,[r3,#28] + add r9,r9,r12 + ldr r1,[sp,#17*4] @ pull inp + ldr r12,[sp,#18*4] @ pull inp+len + add r10,r10,r0 + add r11,r11,r2 + stmia r3,{r4,r5,r6,r7,r8,r9,r10,r11} + cmp r1,r12 + sub r14,r14,#256 @ rewind Ktbl + bne .Loop + + add sp,sp,#19*4 @ destroy frame +#if __ARM_ARCH__>=5 + ldmia sp!,{r4-r11,pc} +#else + ldmia sp!,{r4-r11,lr} + tst lr,#1 + moveq pc,lr @ be binary compatible with V4, yet + .word 0xe12fff1e @ interoperable with Thumb ISA:-) +#endif +.size sha256_block_data_order,.-sha256_block_data_order +#if __ARM_MAX_ARCH__>=7 +.arch armv7-a +.fpu neon + +.type sha256_block_data_order_neon,%function +.align 4 +sha256_block_data_order_neon: +.LNEON: + stmdb sp!,{r4-r12,lr} + + mov r12,sp + sub sp,sp,#16*4+16 @ alloca + sub r14,r3,#256+32 @ K256 + bic sp,sp,#15 @ align for 128-bit stores + + vld1.8 {q0},[r1]! + vld1.8 {q1},[r1]! + vld1.8 {q2},[r1]! + vld1.8 {q3},[r1]! + vld1.32 {q8},[r14,:128]! + vld1.32 {q9},[r14,:128]! + vld1.32 {q10},[r14,:128]! + vld1.32 {q11},[r14,:128]! + vrev32.8 q0,q0 @ yes, even on + str r0,[sp,#64] + vrev32.8 q1,q1 @ big-endian + str r1,[sp,#68] + mov r1,sp + vrev32.8 q2,q2 + str r2,[sp,#72] + vrev32.8 q3,q3 + str r12,[sp,#76] @ save original sp + vadd.i32 q8,q8,q0 + vadd.i32 q9,q9,q1 + vst1.32 {q8},[r1,:128]! + vadd.i32 q10,q10,q2 + vst1.32 {q9},[r1,:128]! + vadd.i32 q11,q11,q3 + vst1.32 {q10},[r1,:128]! + vst1.32 {q11},[r1,:128]! + + ldmia r0,{r4-r11} + sub r1,r1,#64 + ldr r2,[sp,#0] + eor r12,r12,r12 + eor r3,r5,r6 + b .L_00_48 + +.align 4 +.L_00_48: + vext.8 q8,q0,q1,#4 + add r11,r11,r2 + eor r2,r9,r10 + eor r0,r8,r8,ror#5 + vext.8 q9,q2,q3,#4 + add r4,r4,r12 + and r2,r2,r8 + eor r12,r0,r8,ror#19 + vshr.u32 q10,q8,#7 + eor r0,r4,r4,ror#11 + eor r2,r2,r10 + vadd.i32 q0,q0,q9 + add r11,r11,r12,ror#6 + eor r12,r4,r5 + vshr.u32 q9,q8,#3 + eor r0,r0,r4,ror#20 + add r11,r11,r2 + vsli.32 q10,q8,#25 + ldr r2,[sp,#4] + and r3,r3,r12 + vshr.u32 q11,q8,#18 + add r7,r7,r11 + add r11,r11,r0,ror#2 + eor r3,r3,r5 + veor q9,q9,q10 + add r10,r10,r2 + vsli.32 q11,q8,#14 + eor r2,r8,r9 + eor r0,r7,r7,ror#5 + vshr.u32 d24,d7,#17 + add r11,r11,r3 + and r2,r2,r7 + veor q9,q9,q11 + eor r3,r0,r7,ror#19 + eor r0,r11,r11,ror#11 + vsli.32 d24,d7,#15 + eor r2,r2,r9 + add r10,r10,r3,ror#6 + vshr.u32 d25,d7,#10 + eor r3,r11,r4 + eor r0,r0,r11,ror#20 + vadd.i32 q0,q0,q9 + add r10,r10,r2 + ldr r2,[sp,#8] + veor d25,d25,d24 + and r12,r12,r3 + add r6,r6,r10 + vshr.u32 d24,d7,#19 + add r10,r10,r0,ror#2 + eor r12,r12,r4 + vsli.32 d24,d7,#13 + add r9,r9,r2 + eor r2,r7,r8 + veor d25,d25,d24 + eor r0,r6,r6,ror#5 + add r10,r10,r12 + vadd.i32 d0,d0,d25 + and r2,r2,r6 + eor r12,r0,r6,ror#19 + vshr.u32 d24,d0,#17 + eor r0,r10,r10,ror#11 + eor r2,r2,r8 + vsli.32 d24,d0,#15 + add r9,r9,r12,ror#6 + eor r12,r10,r11 + vshr.u32 d25,d0,#10 + eor r0,r0,r10,ror#20 + add r9,r9,r2 + veor d25,d25,d24 + ldr r2,[sp,#12] + and r3,r3,r12 + vshr.u32 d24,d0,#19 + add r5,r5,r9 + add r9,r9,r0,ror#2 + eor r3,r3,r11 + vld1.32 {q8},[r14,:128]! + add r8,r8,r2 + vsli.32 d24,d0,#13 + eor r2,r6,r7 + eor r0,r5,r5,ror#5 + veor d25,d25,d24 + add r9,r9,r3 + and r2,r2,r5 + vadd.i32 d1,d1,d25 + eor r3,r0,r5,ror#19 + eor r0,r9,r9,ror#11 + vadd.i32 q8,q8,q0 + eor r2,r2,r7 + add r8,r8,r3,ror#6 + eor r3,r9,r10 + eor r0,r0,r9,ror#20 + add r8,r8,r2 + ldr r2,[sp,#16] + and r12,r12,r3 + add r4,r4,r8 + vst1.32 {q8},[r1,:128]! + add r8,r8,r0,ror#2 + eor r12,r12,r10 + vext.8 q8,q1,q2,#4 + add r7,r7,r2 + eor r2,r5,r6 + eor r0,r4,r4,ror#5 + vext.8 q9,q3,q0,#4 + add r8,r8,r12 + and r2,r2,r4 + eor r12,r0,r4,ror#19 + vshr.u32 q10,q8,#7 + eor r0,r8,r8,ror#11 + eor r2,r2,r6 + vadd.i32 q1,q1,q9 + add r7,r7,r12,ror#6 + eor r12,r8,r9 + vshr.u32 q9,q8,#3 + eor r0,r0,r8,ror#20 + add r7,r7,r2 + vsli.32 q10,q8,#25 + ldr r2,[sp,#20] + and r3,r3,r12 + vshr.u32 q11,q8,#18 + add r11,r11,r7 + add r7,r7,r0,ror#2 + eor r3,r3,r9 + veor q9,q9,q10 + add r6,r6,r2 + vsli.32 q11,q8,#14 + eor r2,r4,r5 + eor r0,r11,r11,ror#5 + vshr.u32 d24,d1,#17 + add r7,r7,r3 + and r2,r2,r11 + veor q9,q9,q11 + eor r3,r0,r11,ror#19 + eor r0,r7,r7,ror#11 + vsli.32 d24,d1,#15 + eor r2,r2,r5 + add r6,r6,r3,ror#6 + vshr.u32 d25,d1,#10 + eor r3,r7,r8 + eor r0,r0,r7,ror#20 + vadd.i32 q1,q1,q9 + add r6,r6,r2 + ldr r2,[sp,#24] + veor d25,d25,d24 + and r12,r12,r3 + add r10,r10,r6 + vshr.u32 d24,d1,#19 + add r6,r6,r0,ror#2 + eor r12,r12,r8 + vsli.32 d24,d1,#13 + add r5,r5,r2 + eor r2,r11,r4 + veor d25,d25,d24 + eor r0,r10,r10,ror#5 + add r6,r6,r12 + vadd.i32 d2,d2,d25 + and r2,r2,r10 + eor r12,r0,r10,ror#19 + vshr.u32 d24,d2,#17 + eor r0,r6,r6,ror#11 + eor r2,r2,r4 + vsli.32 d24,d2,#15 + add r5,r5,r12,ror#6 + eor r12,r6,r7 + vshr.u32 d25,d2,#10 + eor r0,r0,r6,ror#20 + add r5,r5,r2 + veor d25,d25,d24 + ldr r2,[sp,#28] + and r3,r3,r12 + vshr.u32 d24,d2,#19 + add r9,r9,r5 + add r5,r5,r0,ror#2 + eor r3,r3,r7 + vld1.32 {q8},[r14,:128]! + add r4,r4,r2 + vsli.32 d24,d2,#13 + eor r2,r10,r11 + eor r0,r9,r9,ror#5 + veor d25,d25,d24 + add r5,r5,r3 + and r2,r2,r9 + vadd.i32 d3,d3,d25 + eor r3,r0,r9,ror#19 + eor r0,r5,r5,ror#11 + vadd.i32 q8,q8,q1 + eor r2,r2,r11 + add r4,r4,r3,ror#6 + eor r3,r5,r6 + eor r0,r0,r5,ror#20 + add r4,r4,r2 + ldr r2,[sp,#32] + and r12,r12,r3 + add r8,r8,r4 + vst1.32 {q8},[r1,:128]! + add r4,r4,r0,ror#2 + eor r12,r12,r6 + vext.8 q8,q2,q3,#4 + add r11,r11,r2 + eor r2,r9,r10 + eor r0,r8,r8,ror#5 + vext.8 q9,q0,q1,#4 + add r4,r4,r12 + and r2,r2,r8 + eor r12,r0,r8,ror#19 + vshr.u32 q10,q8,#7 + eor r0,r4,r4,ror#11 + eor r2,r2,r10 + vadd.i32 q2,q2,q9 + add r11,r11,r12,ror#6 + eor r12,r4,r5 + vshr.u32 q9,q8,#3 + eor r0,r0,r4,ror#20 + add r11,r11,r2 + vsli.32 q10,q8,#25 + ldr r2,[sp,#36] + and r3,r3,r12 + vshr.u32 q11,q8,#18 + add r7,r7,r11 + add r11,r11,r0,ror#2 + eor r3,r3,r5 + veor q9,q9,q10 + add r10,r10,r2 + vsli.32 q11,q8,#14 + eor r2,r8,r9 + eor r0,r7,r7,ror#5 + vshr.u32 d24,d3,#17 + add r11,r11,r3 + and r2,r2,r7 + veor q9,q9,q11 + eor r3,r0,r7,ror#19 + eor r0,r11,r11,ror#11 + vsli.32 d24,d3,#15 + eor r2,r2,r9 + add r10,r10,r3,ror#6 + vshr.u32 d25,d3,#10 + eor r3,r11,r4 + eor r0,r0,r11,ror#20 + vadd.i32 q2,q2,q9 + add r10,r10,r2 + ldr r2,[sp,#40] + veor d25,d25,d24 + and r12,r12,r3 + add r6,r6,r10 + vshr.u32 d24,d3,#19 + add r10,r10,r0,ror#2 + eor r12,r12,r4 + vsli.32 d24,d3,#13 + add r9,r9,r2 + eor r2,r7,r8 + veor d25,d25,d24 + eor r0,r6,r6,ror#5 + add r10,r10,r12 + vadd.i32 d4,d4,d25 + and r2,r2,r6 + eor r12,r0,r6,ror#19 + vshr.u32 d24,d4,#17 + eor r0,r10,r10,ror#11 + eor r2,r2,r8 + vsli.32 d24,d4,#15 + add r9,r9,r12,ror#6 + eor r12,r10,r11 + vshr.u32 d25,d4,#10 + eor r0,r0,r10,ror#20 + add r9,r9,r2 + veor d25,d25,d24 + ldr r2,[sp,#44] + and r3,r3,r12 + vshr.u32 d24,d4,#19 + add r5,r5,r9 + add r9,r9,r0,ror#2 + eor r3,r3,r11 + vld1.32 {q8},[r14,:128]! + add r8,r8,r2 + vsli.32 d24,d4,#13 + eor r2,r6,r7 + eor r0,r5,r5,ror#5 + veor d25,d25,d24 + add r9,r9,r3 + and r2,r2,r5 + vadd.i32 d5,d5,d25 + eor r3,r0,r5,ror#19 + eor r0,r9,r9,ror#11 + vadd.i32 q8,q8,q2 + eor r2,r2,r7 + add r8,r8,r3,ror#6 + eor r3,r9,r10 + eor r0,r0,r9,ror#20 + add r8,r8,r2 + ldr r2,[sp,#48] + and r12,r12,r3 + add r4,r4,r8 + vst1.32 {q8},[r1,:128]! + add r8,r8,r0,ror#2 + eor r12,r12,r10 + vext.8 q8,q3,q0,#4 + add r7,r7,r2 + eor r2,r5,r6 + eor r0,r4,r4,ror#5 + vext.8 q9,q1,q2,#4 + add r8,r8,r12 + and r2,r2,r4 + eor r12,r0,r4,ror#19 + vshr.u32 q10,q8,#7 + eor r0,r8,r8,ror#11 + eor r2,r2,r6 + vadd.i32 q3,q3,q9 + add r7,r7,r12,ror#6 + eor r12,r8,r9 + vshr.u32 q9,q8,#3 + eor r0,r0,r8,ror#20 + add r7,r7,r2 + vsli.32 q10,q8,#25 + ldr r2,[sp,#52] + and r3,r3,r12 + vshr.u32 q11,q8,#18 + add r11,r11,r7 + add r7,r7,r0,ror#2 + eor r3,r3,r9 + veor q9,q9,q10 + add r6,r6,r2 + vsli.32 q11,q8,#14 + eor r2,r4,r5 + eor r0,r11,r11,ror#5 + vshr.u32 d24,d5,#17 + add r7,r7,r3 + and r2,r2,r11 + veor q9,q9,q11 + eor r3,r0,r11,ror#19 + eor r0,r7,r7,ror#11 + vsli.32 d24,d5,#15 + eor r2,r2,r5 + add r6,r6,r3,ror#6 + vshr.u32 d25,d5,#10 + eor r3,r7,r8 + eor r0,r0,r7,ror#20 + vadd.i32 q3,q3,q9 + add r6,r6,r2 + ldr r2,[sp,#56] + veor d25,d25,d24 + and r12,r12,r3 + add r10,r10,r6 + vshr.u32 d24,d5,#19 + add r6,r6,r0,ror#2 + eor r12,r12,r8 + vsli.32 d24,d5,#13 + add r5,r5,r2 + eor r2,r11,r4 + veor d25,d25,d24 + eor r0,r10,r10,ror#5 + add r6,r6,r12 + vadd.i32 d6,d6,d25 + and r2,r2,r10 + eor r12,r0,r10,ror#19 + vshr.u32 d24,d6,#17 + eor r0,r6,r6,ror#11 + eor r2,r2,r4 + vsli.32 d24,d6,#15 + add r5,r5,r12,ror#6 + eor r12,r6,r7 + vshr.u32 d25,d6,#10 + eor r0,r0,r6,ror#20 + add r5,r5,r2 + veor d25,d25,d24 + ldr r2,[sp,#60] + and r3,r3,r12 + vshr.u32 d24,d6,#19 + add r9,r9,r5 + add r5,r5,r0,ror#2 + eor r3,r3,r7 + vld1.32 {q8},[r14,:128]! + add r4,r4,r2 + vsli.32 d24,d6,#13 + eor r2,r10,r11 + eor r0,r9,r9,ror#5 + veor d25,d25,d24 + add r5,r5,r3 + and r2,r2,r9 + vadd.i32 d7,d7,d25 + eor r3,r0,r9,ror#19 + eor r0,r5,r5,ror#11 + vadd.i32 q8,q8,q3 + eor r2,r2,r11 + add r4,r4,r3,ror#6 + eor r3,r5,r6 + eor r0,r0,r5,ror#20 + add r4,r4,r2 + ldr r2,[r14] + and r12,r12,r3 + add r8,r8,r4 + vst1.32 {q8},[r1,:128]! + add r4,r4,r0,ror#2 + eor r12,r12,r6 + teq r2,#0 @ check for K256 terminator + ldr r2,[sp,#0] + sub r1,r1,#64 + bne .L_00_48 + + ldr r1,[sp,#68] + ldr r0,[sp,#72] + sub r14,r14,#256 @ rewind r14 + teq r1,r0 + subeq r1,r1,#64 @ avoid SEGV + vld1.8 {q0},[r1]! @ load next input block + vld1.8 {q1},[r1]! + vld1.8 {q2},[r1]! + vld1.8 {q3},[r1]! + strne r1,[sp,#68] + mov r1,sp + add r11,r11,r2 + eor r2,r9,r10 + eor r0,r8,r8,ror#5 + add r4,r4,r12 + vld1.32 {q8},[r14,:128]! + and r2,r2,r8 + eor r12,r0,r8,ror#19 + eor r0,r4,r4,ror#11 + eor r2,r2,r10 + vrev32.8 q0,q0 + add r11,r11,r12,ror#6 + eor r12,r4,r5 + eor r0,r0,r4,ror#20 + add r11,r11,r2 + vadd.i32 q8,q8,q0 + ldr r2,[sp,#4] + and r3,r3,r12 + add r7,r7,r11 + add r11,r11,r0,ror#2 + eor r3,r3,r5 + add r10,r10,r2 + eor r2,r8,r9 + eor r0,r7,r7,ror#5 + add r11,r11,r3 + and r2,r2,r7 + eor r3,r0,r7,ror#19 + eor r0,r11,r11,ror#11 + eor r2,r2,r9 + add r10,r10,r3,ror#6 + eor r3,r11,r4 + eor r0,r0,r11,ror#20 + add r10,r10,r2 + ldr r2,[sp,#8] + and r12,r12,r3 + add r6,r6,r10 + add r10,r10,r0,ror#2 + eor r12,r12,r4 + add r9,r9,r2 + eor r2,r7,r8 + eor r0,r6,r6,ror#5 + add r10,r10,r12 + and r2,r2,r6 + eor r12,r0,r6,ror#19 + eor r0,r10,r10,ror#11 + eor r2,r2,r8 + add r9,r9,r12,ror#6 + eor r12,r10,r11 + eor r0,r0,r10,ror#20 + add r9,r9,r2 + ldr r2,[sp,#12] + and r3,r3,r12 + add r5,r5,r9 + add r9,r9,r0,ror#2 + eor r3,r3,r11 + add r8,r8,r2 + eor r2,r6,r7 + eor r0,r5,r5,ror#5 + add r9,r9,r3 + and r2,r2,r5 + eor r3,r0,r5,ror#19 + eor r0,r9,r9,ror#11 + eor r2,r2,r7 + add r8,r8,r3,ror#6 + eor r3,r9,r10 + eor r0,r0,r9,ror#20 + add r8,r8,r2 + ldr r2,[sp,#16] + and r12,r12,r3 + add r4,r4,r8 + add r8,r8,r0,ror#2 + eor r12,r12,r10 + vst1.32 {q8},[r1,:128]! + add r7,r7,r2 + eor r2,r5,r6 + eor r0,r4,r4,ror#5 + add r8,r8,r12 + vld1.32 {q8},[r14,:128]! + and r2,r2,r4 + eor r12,r0,r4,ror#19 + eor r0,r8,r8,ror#11 + eor r2,r2,r6 + vrev32.8 q1,q1 + add r7,r7,r12,ror#6 + eor r12,r8,r9 + eor r0,r0,r8,ror#20 + add r7,r7,r2 + vadd.i32 q8,q8,q1 + ldr r2,[sp,#20] + and r3,r3,r12 + add r11,r11,r7 + add r7,r7,r0,ror#2 + eor r3,r3,r9 + add r6,r6,r2 + eor r2,r4,r5 + eor r0,r11,r11,ror#5 + add r7,r7,r3 + and r2,r2,r11 + eor r3,r0,r11,ror#19 + eor r0,r7,r7,ror#11 + eor r2,r2,r5 + add r6,r6,r3,ror#6 + eor r3,r7,r8 + eor r0,r0,r7,ror#20 + add r6,r6,r2 + ldr r2,[sp,#24] + and r12,r12,r3 + add r10,r10,r6 + add r6,r6,r0,ror#2 + eor r12,r12,r8 + add r5,r5,r2 + eor r2,r11,r4 + eor r0,r10,r10,ror#5 + add r6,r6,r12 + and r2,r2,r10 + eor r12,r0,r10,ror#19 + eor r0,r6,r6,ror#11 + eor r2,r2,r4 + add r5,r5,r12,ror#6 + eor r12,r6,r7 + eor r0,r0,r6,ror#20 + add r5,r5,r2 + ldr r2,[sp,#28] + and r3,r3,r12 + add r9,r9,r5 + add r5,r5,r0,ror#2 + eor r3,r3,r7 + add r4,r4,r2 + eor r2,r10,r11 + eor r0,r9,r9,ror#5 + add r5,r5,r3 + and r2,r2,r9 + eor r3,r0,r9,ror#19 + eor r0,r5,r5,ror#11 + eor r2,r2,r11 + add r4,r4,r3,ror#6 + eor r3,r5,r6 + eor r0,r0,r5,ror#20 + add r4,r4,r2 + ldr r2,[sp,#32] + and r12,r12,r3 + add r8,r8,r4 + add r4,r4,r0,ror#2 + eor r12,r12,r6 + vst1.32 {q8},[r1,:128]! + add r11,r11,r2 + eor r2,r9,r10 + eor r0,r8,r8,ror#5 + add r4,r4,r12 + vld1.32 {q8},[r14,:128]! + and r2,r2,r8 + eor r12,r0,r8,ror#19 + eor r0,r4,r4,ror#11 + eor r2,r2,r10 + vrev32.8 q2,q2 + add r11,r11,r12,ror#6 + eor r12,r4,r5 + eor r0,r0,r4,ror#20 + add r11,r11,r2 + vadd.i32 q8,q8,q2 + ldr r2,[sp,#36] + and r3,r3,r12 + add r7,r7,r11 + add r11,r11,r0,ror#2 + eor r3,r3,r5 + add r10,r10,r2 + eor r2,r8,r9 + eor r0,r7,r7,ror#5 + add r11,r11,r3 + and r2,r2,r7 + eor r3,r0,r7,ror#19 + eor r0,r11,r11,ror#11 + eor r2,r2,r9 + add r10,r10,r3,ror#6 + eor r3,r11,r4 + eor r0,r0,r11,ror#20 + add r10,r10,r2 + ldr r2,[sp,#40] + and r12,r12,r3 + add r6,r6,r10 + add r10,r10,r0,ror#2 + eor r12,r12,r4 + add r9,r9,r2 + eor r2,r7,r8 + eor r0,r6,r6,ror#5 + add r10,r10,r12 + and r2,r2,r6 + eor r12,r0,r6,ror#19 + eor r0,r10,r10,ror#11 + eor r2,r2,r8 + add r9,r9,r12,ror#6 + eor r12,r10,r11 + eor r0,r0,r10,ror#20 + add r9,r9,r2 + ldr r2,[sp,#44] + and r3,r3,r12 + add r5,r5,r9 + add r9,r9,r0,ror#2 + eor r3,r3,r11 + add r8,r8,r2 + eor r2,r6,r7 + eor r0,r5,r5,ror#5 + add r9,r9,r3 + and r2,r2,r5 + eor r3,r0,r5,ror#19 + eor r0,r9,r9,ror#11 + eor r2,r2,r7 + add r8,r8,r3,ror#6 + eor r3,r9,r10 + eor r0,r0,r9,ror#20 + add r8,r8,r2 + ldr r2,[sp,#48] + and r12,r12,r3 + add r4,r4,r8 + add r8,r8,r0,ror#2 + eor r12,r12,r10 + vst1.32 {q8},[r1,:128]! + add r7,r7,r2 + eor r2,r5,r6 + eor r0,r4,r4,ror#5 + add r8,r8,r12 + vld1.32 {q8},[r14,:128]! + and r2,r2,r4 + eor r12,r0,r4,ror#19 + eor r0,r8,r8,ror#11 + eor r2,r2,r6 + vrev32.8 q3,q3 + add r7,r7,r12,ror#6 + eor r12,r8,r9 + eor r0,r0,r8,ror#20 + add r7,r7,r2 + vadd.i32 q8,q8,q3 + ldr r2,[sp,#52] + and r3,r3,r12 + add r11,r11,r7 + add r7,r7,r0,ror#2 + eor r3,r3,r9 + add r6,r6,r2 + eor r2,r4,r5 + eor r0,r11,r11,ror#5 + add r7,r7,r3 + and r2,r2,r11 + eor r3,r0,r11,ror#19 + eor r0,r7,r7,ror#11 + eor r2,r2,r5 + add r6,r6,r3,ror#6 + eor r3,r7,r8 + eor r0,r0,r7,ror#20 + add r6,r6,r2 + ldr r2,[sp,#56] + and r12,r12,r3 + add r10,r10,r6 + add r6,r6,r0,ror#2 + eor r12,r12,r8 + add r5,r5,r2 + eor r2,r11,r4 + eor r0,r10,r10,ror#5 + add r6,r6,r12 + and r2,r2,r10 + eor r12,r0,r10,ror#19 + eor r0,r6,r6,ror#11 + eor r2,r2,r4 + add r5,r5,r12,ror#6 + eor r12,r6,r7 + eor r0,r0,r6,ror#20 + add r5,r5,r2 + ldr r2,[sp,#60] + and r3,r3,r12 + add r9,r9,r5 + add r5,r5,r0,ror#2 + eor r3,r3,r7 + add r4,r4,r2 + eor r2,r10,r11 + eor r0,r9,r9,ror#5 + add r5,r5,r3 + and r2,r2,r9 + eor r3,r0,r9,ror#19 + eor r0,r5,r5,ror#11 + eor r2,r2,r11 + add r4,r4,r3,ror#6 + eor r3,r5,r6 + eor r0,r0,r5,ror#20 + add r4,r4,r2 + ldr r2,[sp,#64] + and r12,r12,r3 + add r8,r8,r4 + add r4,r4,r0,ror#2 + eor r12,r12,r6 + vst1.32 {q8},[r1,:128]! + ldr r0,[r2,#0] + add r4,r4,r12 @ h+=Maj(a,b,c) from the past + ldr r12,[r2,#4] + ldr r3,[r2,#8] + ldr r1,[r2,#12] + add r4,r4,r0 @ accumulate + ldr r0,[r2,#16] + add r5,r5,r12 + ldr r12,[r2,#20] + add r6,r6,r3 + ldr r3,[r2,#24] + add r7,r7,r1 + ldr r1,[r2,#28] + add r8,r8,r0 + str r4,[r2],#4 + add r9,r9,r12 + str r5,[r2],#4 + add r10,r10,r3 + str r6,[r2],#4 + add r11,r11,r1 + str r7,[r2],#4 + stmia r2,{r8-r11} + + movne r1,sp + ldrne r2,[sp,#0] + eorne r12,r12,r12 + ldreq sp,[sp,#76] @ restore original sp + eorne r3,r5,r6 + bne .L_00_48 + + ldmia sp!,{r4-r12,pc} +.size sha256_block_data_order_neon,.-sha256_block_data_order_neon +#endif +#if __ARM_MAX_ARCH__>=7 +.type sha256_block_data_order_armv8,%function +.align 5 +sha256_block_data_order_armv8: +.LARMv8: + vld1.32 {q0,q1},[r0] + sub r3,r3,#sha256_block_data_order-K256 + +.Loop_v8: + vld1.8 {q8-q9},[r1]! + vld1.8 {q10-q11},[r1]! + vld1.32 {q12},[r3]! + vrev32.8 q8,q8 + vrev32.8 q9,q9 + vrev32.8 q10,q10 + vrev32.8 q11,q11 + vmov q14,q0 @ offload + vmov q15,q1 + teq r1,r2 + vld1.32 {q13},[r3]! + vadd.i32 q12,q12,q8 + .byte 0xe2,0x03,0xfa,0xf3 @ sha256su0 q8,q9 + vmov q2,q0 + .byte 0x68,0x0c,0x02,0xf3 @ sha256h q0,q1,q12 + .byte 0x68,0x2c,0x14,0xf3 @ sha256h2 q1,q2,q12 + .byte 0xe6,0x0c,0x64,0xf3 @ sha256su1 q8,q10,q11 + vld1.32 {q12},[r3]! + vadd.i32 q13,q13,q9 + .byte 0xe4,0x23,0xfa,0xf3 @ sha256su0 q9,q10 + vmov q2,q0 + .byte 0x6a,0x0c,0x02,0xf3 @ sha256h q0,q1,q13 + .byte 0x6a,0x2c,0x14,0xf3 @ sha256h2 q1,q2,q13 + .byte 0xe0,0x2c,0x66,0xf3 @ sha256su1 q9,q11,q8 + vld1.32 {q13},[r3]! + vadd.i32 q12,q12,q10 + .byte 0xe6,0x43,0xfa,0xf3 @ sha256su0 q10,q11 + vmov q2,q0 + .byte 0x68,0x0c,0x02,0xf3 @ sha256h q0,q1,q12 + .byte 0x68,0x2c,0x14,0xf3 @ sha256h2 q1,q2,q12 + .byte 0xe2,0x4c,0x60,0xf3 @ sha256su1 q10,q8,q9 + vld1.32 {q12},[r3]! + vadd.i32 q13,q13,q11 + .byte 0xe0,0x63,0xfa,0xf3 @ sha256su0 q11,q8 + vmov q2,q0 + .byte 0x6a,0x0c,0x02,0xf3 @ sha256h q0,q1,q13 + .byte 0x6a,0x2c,0x14,0xf3 @ sha256h2 q1,q2,q13 + .byte 0xe4,0x6c,0x62,0xf3 @ sha256su1 q11,q9,q10 + vld1.32 {q13},[r3]! + vadd.i32 q12,q12,q8 + .byte 0xe2,0x03,0xfa,0xf3 @ sha256su0 q8,q9 + vmov q2,q0 + .byte 0x68,0x0c,0x02,0xf3 @ sha256h q0,q1,q12 + .byte 0x68,0x2c,0x14,0xf3 @ sha256h2 q1,q2,q12 + .byte 0xe6,0x0c,0x64,0xf3 @ sha256su1 q8,q10,q11 + vld1.32 {q12},[r3]! + vadd.i32 q13,q13,q9 + .byte 0xe4,0x23,0xfa,0xf3 @ sha256su0 q9,q10 + vmov q2,q0 + .byte 0x6a,0x0c,0x02,0xf3 @ sha256h q0,q1,q13 + .byte 0x6a,0x2c,0x14,0xf3 @ sha256h2 q1,q2,q13 + .byte 0xe0,0x2c,0x66,0xf3 @ sha256su1 q9,q11,q8 + vld1.32 {q13},[r3]! + vadd.i32 q12,q12,q10 + .byte 0xe6,0x43,0xfa,0xf3 @ sha256su0 q10,q11 + vmov q2,q0 + .byte 0x68,0x0c,0x02,0xf3 @ sha256h q0,q1,q12 + .byte 0x68,0x2c,0x14,0xf3 @ sha256h2 q1,q2,q12 + .byte 0xe2,0x4c,0x60,0xf3 @ sha256su1 q10,q8,q9 + vld1.32 {q12},[r3]! + vadd.i32 q13,q13,q11 + .byte 0xe0,0x63,0xfa,0xf3 @ sha256su0 q11,q8 + vmov q2,q0 + .byte 0x6a,0x0c,0x02,0xf3 @ sha256h q0,q1,q13 + .byte 0x6a,0x2c,0x14,0xf3 @ sha256h2 q1,q2,q13 + .byte 0xe4,0x6c,0x62,0xf3 @ sha256su1 q11,q9,q10 + vld1.32 {q13},[r3]! + vadd.i32 q12,q12,q8 + .byte 0xe2,0x03,0xfa,0xf3 @ sha256su0 q8,q9 + vmov q2,q0 + .byte 0x68,0x0c,0x02,0xf3 @ sha256h q0,q1,q12 + .byte 0x68,0x2c,0x14,0xf3 @ sha256h2 q1,q2,q12 + .byte 0xe6,0x0c,0x64,0xf3 @ sha256su1 q8,q10,q11 + vld1.32 {q12},[r3]! + vadd.i32 q13,q13,q9 + .byte 0xe4,0x23,0xfa,0xf3 @ sha256su0 q9,q10 + vmov q2,q0 + .byte 0x6a,0x0c,0x02,0xf3 @ sha256h q0,q1,q13 + .byte 0x6a,0x2c,0x14,0xf3 @ sha256h2 q1,q2,q13 + .byte 0xe0,0x2c,0x66,0xf3 @ sha256su1 q9,q11,q8 + vld1.32 {q13},[r3]! + vadd.i32 q12,q12,q10 + .byte 0xe6,0x43,0xfa,0xf3 @ sha256su0 q10,q11 + vmov q2,q0 + .byte 0x68,0x0c,0x02,0xf3 @ sha256h q0,q1,q12 + .byte 0x68,0x2c,0x14,0xf3 @ sha256h2 q1,q2,q12 + .byte 0xe2,0x4c,0x60,0xf3 @ sha256su1 q10,q8,q9 + vld1.32 {q12},[r3]! + vadd.i32 q13,q13,q11 + .byte 0xe0,0x63,0xfa,0xf3 @ sha256su0 q11,q8 + vmov q2,q0 + .byte 0x6a,0x0c,0x02,0xf3 @ sha256h q0,q1,q13 + .byte 0x6a,0x2c,0x14,0xf3 @ sha256h2 q1,q2,q13 + .byte 0xe4,0x6c,0x62,0xf3 @ sha256su1 q11,q9,q10 + vld1.32 {q13},[r3]! + vadd.i32 q12,q12,q8 + vmov q2,q0 + .byte 0x68,0x0c,0x02,0xf3 @ sha256h q0,q1,q12 + .byte 0x68,0x2c,0x14,0xf3 @ sha256h2 q1,q2,q12 + + vld1.32 {q12},[r3]! + vadd.i32 q13,q13,q9 + vmov q2,q0 + .byte 0x6a,0x0c,0x02,0xf3 @ sha256h q0,q1,q13 + .byte 0x6a,0x2c,0x14,0xf3 @ sha256h2 q1,q2,q13 + + vld1.32 {q13},[r3] + vadd.i32 q12,q12,q10 + sub r3,r3,#256-16 @ rewind + vmov q2,q0 + .byte 0x68,0x0c,0x02,0xf3 @ sha256h q0,q1,q12 + .byte 0x68,0x2c,0x14,0xf3 @ sha256h2 q1,q2,q12 + + vadd.i32 q13,q13,q11 + vmov q2,q0 + .byte 0x6a,0x0c,0x02,0xf3 @ sha256h q0,q1,q13 + .byte 0x6a,0x2c,0x14,0xf3 @ sha256h2 q1,q2,q13 + + vadd.i32 q0,q0,q14 + vadd.i32 q1,q1,q15 + bne .Loop_v8 + + vst1.32 {q0,q1},[r0] + + bx lr @ bx lr +.size sha256_block_data_order_armv8,.-sha256_block_data_order_armv8 +#endif +.asciz "SHA256 block transform for ARMv4/NEON/ARMv8, CRYPTOGAMS by " +.align 2 +#if __ARM_MAX_ARCH__>=7 +.comm OPENSSL_armcap_P,4,4 +#endif diff --git a/deps/openssl/asm/arm-elf-gas/sha/sha512-armv4.S b/deps/openssl/asm/arm-void-gas/sha/sha512-armv4.S similarity index 74% rename from deps/openssl/asm/arm-elf-gas/sha/sha512-armv4.S rename to deps/openssl/asm/arm-void-gas/sha/sha512-armv4.S index 573019225ec8e5..1889fc701ac094 100644 --- a/deps/openssl/asm/arm-elf-gas/sha/sha512-armv4.S +++ b/deps/openssl/asm/arm-void-gas/sha/sha512-armv4.S @@ -55,16 +55,20 @@ WORD64(0x3c9ebe0a,0x15c9bebc, 0x431d67c4,0x9c100d4c) WORD64(0x4cc5d4be,0xcb3e42b6, 0x597f299c,0xfc657e2a) WORD64(0x5fcb6fab,0x3ad6faec, 0x6c44198c,0x4a475817) .size K512,.-K512 +#if __ARM_MAX_ARCH__>=7 .LOPENSSL_armcap: .word OPENSSL_armcap_P-sha512_block_data_order .skip 32-4 +#else +.skip 32 +#endif .global sha512_block_data_order .type sha512_block_data_order,%function sha512_block_data_order: sub r3,pc,#8 @ sha512_block_data_order add r2,r1,r2,lsl#7 @ len to point at the end of inp -#if __ARM_ARCH__>=7 +#if __ARM_MAX_ARCH__>=7 ldr r12,.LOPENSSL_armcap ldr r12,[r3,r12] @ OPENSSL_armcap_P tst r12,#1 @@ -437,7 +441,8 @@ sha512_block_data_order: moveq pc,lr @ be binary compatible with V4, yet .word 0xe12fff1e @ interoperable with Thumb ISA:-) #endif -#if __ARM_ARCH__>=7 +#if __ARM_MAX_ARCH__>=7 +.arch armv7-a .fpu neon .align 4 @@ -452,598 +457,599 @@ sha512_block_data_order: vld1.64 {d0},[r1]! @ handles unaligned #endif vshr.u64 d25,d20,#18 +#if 0>0 + vadd.i64 d16,d30 @ h+=Maj from the past +#endif vshr.u64 d26,d20,#41 vld1.64 {d28},[r3,:64]! @ K[i++] vsli.64 d24,d20,#50 vsli.64 d25,d20,#46 + vmov d29,d20 vsli.64 d26,d20,#23 #if 0<16 && defined(__ARMEL__) vrev64.8 d0,d0 #endif - vadd.i64 d27,d28,d23 - veor d29,d21,d22 - veor d24,d25 - vand d29,d20 - veor d24,d26 @ Sigma1(e) - veor d29,d22 @ Ch(e,f,g) - vadd.i64 d27,d24 + veor d25,d24 + vbsl d29,d21,d22 @ Ch(e,f,g) vshr.u64 d24,d16,#28 - vadd.i64 d27,d29 + veor d26,d25 @ Sigma1(e) + vadd.i64 d27,d29,d23 vshr.u64 d25,d16,#34 - vshr.u64 d26,d16,#39 vsli.64 d24,d16,#36 + vadd.i64 d27,d26 + vshr.u64 d26,d16,#39 + vadd.i64 d28,d0 vsli.64 d25,d16,#30 + veor d30,d16,d17 vsli.64 d26,d16,#25 - vadd.i64 d27,d0 - vorr d30,d16,d18 - vand d29,d16,d18 veor d23,d24,d25 - vand d30,d17 + vadd.i64 d27,d28 + vbsl d30,d18,d17 @ Maj(a,b,c) veor d23,d26 @ Sigma0(a) - vorr d30,d29 @ Maj(a,b,c) - vadd.i64 d23,d27 vadd.i64 d19,d27 - vadd.i64 d23,d30 + vadd.i64 d30,d27 + @ vadd.i64 d23,d30 vshr.u64 d24,d19,#14 @ 1 #if 1<16 vld1.64 {d1},[r1]! @ handles unaligned #endif vshr.u64 d25,d19,#18 +#if 1>0 + vadd.i64 d23,d30 @ h+=Maj from the past +#endif vshr.u64 d26,d19,#41 vld1.64 {d28},[r3,:64]! @ K[i++] vsli.64 d24,d19,#50 vsli.64 d25,d19,#46 + vmov d29,d19 vsli.64 d26,d19,#23 #if 1<16 && defined(__ARMEL__) vrev64.8 d1,d1 #endif - vadd.i64 d27,d28,d22 - veor d29,d20,d21 - veor d24,d25 - vand d29,d19 - veor d24,d26 @ Sigma1(e) - veor d29,d21 @ Ch(e,f,g) - vadd.i64 d27,d24 + veor d25,d24 + vbsl d29,d20,d21 @ Ch(e,f,g) vshr.u64 d24,d23,#28 - vadd.i64 d27,d29 + veor d26,d25 @ Sigma1(e) + vadd.i64 d27,d29,d22 vshr.u64 d25,d23,#34 - vshr.u64 d26,d23,#39 vsli.64 d24,d23,#36 + vadd.i64 d27,d26 + vshr.u64 d26,d23,#39 + vadd.i64 d28,d1 vsli.64 d25,d23,#30 + veor d30,d23,d16 vsli.64 d26,d23,#25 - vadd.i64 d27,d1 - vorr d30,d23,d17 - vand d29,d23,d17 veor d22,d24,d25 - vand d30,d16 + vadd.i64 d27,d28 + vbsl d30,d17,d16 @ Maj(a,b,c) veor d22,d26 @ Sigma0(a) - vorr d30,d29 @ Maj(a,b,c) - vadd.i64 d22,d27 vadd.i64 d18,d27 - vadd.i64 d22,d30 + vadd.i64 d30,d27 + @ vadd.i64 d22,d30 vshr.u64 d24,d18,#14 @ 2 #if 2<16 vld1.64 {d2},[r1]! @ handles unaligned #endif vshr.u64 d25,d18,#18 +#if 2>0 + vadd.i64 d22,d30 @ h+=Maj from the past +#endif vshr.u64 d26,d18,#41 vld1.64 {d28},[r3,:64]! @ K[i++] vsli.64 d24,d18,#50 vsli.64 d25,d18,#46 + vmov d29,d18 vsli.64 d26,d18,#23 #if 2<16 && defined(__ARMEL__) vrev64.8 d2,d2 #endif - vadd.i64 d27,d28,d21 - veor d29,d19,d20 - veor d24,d25 - vand d29,d18 - veor d24,d26 @ Sigma1(e) - veor d29,d20 @ Ch(e,f,g) - vadd.i64 d27,d24 + veor d25,d24 + vbsl d29,d19,d20 @ Ch(e,f,g) vshr.u64 d24,d22,#28 - vadd.i64 d27,d29 + veor d26,d25 @ Sigma1(e) + vadd.i64 d27,d29,d21 vshr.u64 d25,d22,#34 - vshr.u64 d26,d22,#39 vsli.64 d24,d22,#36 + vadd.i64 d27,d26 + vshr.u64 d26,d22,#39 + vadd.i64 d28,d2 vsli.64 d25,d22,#30 + veor d30,d22,d23 vsli.64 d26,d22,#25 - vadd.i64 d27,d2 - vorr d30,d22,d16 - vand d29,d22,d16 veor d21,d24,d25 - vand d30,d23 + vadd.i64 d27,d28 + vbsl d30,d16,d23 @ Maj(a,b,c) veor d21,d26 @ Sigma0(a) - vorr d30,d29 @ Maj(a,b,c) - vadd.i64 d21,d27 vadd.i64 d17,d27 - vadd.i64 d21,d30 + vadd.i64 d30,d27 + @ vadd.i64 d21,d30 vshr.u64 d24,d17,#14 @ 3 #if 3<16 vld1.64 {d3},[r1]! @ handles unaligned #endif vshr.u64 d25,d17,#18 +#if 3>0 + vadd.i64 d21,d30 @ h+=Maj from the past +#endif vshr.u64 d26,d17,#41 vld1.64 {d28},[r3,:64]! @ K[i++] vsli.64 d24,d17,#50 vsli.64 d25,d17,#46 + vmov d29,d17 vsli.64 d26,d17,#23 #if 3<16 && defined(__ARMEL__) vrev64.8 d3,d3 #endif - vadd.i64 d27,d28,d20 - veor d29,d18,d19 - veor d24,d25 - vand d29,d17 - veor d24,d26 @ Sigma1(e) - veor d29,d19 @ Ch(e,f,g) - vadd.i64 d27,d24 + veor d25,d24 + vbsl d29,d18,d19 @ Ch(e,f,g) vshr.u64 d24,d21,#28 - vadd.i64 d27,d29 + veor d26,d25 @ Sigma1(e) + vadd.i64 d27,d29,d20 vshr.u64 d25,d21,#34 - vshr.u64 d26,d21,#39 vsli.64 d24,d21,#36 + vadd.i64 d27,d26 + vshr.u64 d26,d21,#39 + vadd.i64 d28,d3 vsli.64 d25,d21,#30 + veor d30,d21,d22 vsli.64 d26,d21,#25 - vadd.i64 d27,d3 - vorr d30,d21,d23 - vand d29,d21,d23 veor d20,d24,d25 - vand d30,d22 + vadd.i64 d27,d28 + vbsl d30,d23,d22 @ Maj(a,b,c) veor d20,d26 @ Sigma0(a) - vorr d30,d29 @ Maj(a,b,c) - vadd.i64 d20,d27 vadd.i64 d16,d27 - vadd.i64 d20,d30 + vadd.i64 d30,d27 + @ vadd.i64 d20,d30 vshr.u64 d24,d16,#14 @ 4 #if 4<16 vld1.64 {d4},[r1]! @ handles unaligned #endif vshr.u64 d25,d16,#18 +#if 4>0 + vadd.i64 d20,d30 @ h+=Maj from the past +#endif vshr.u64 d26,d16,#41 vld1.64 {d28},[r3,:64]! @ K[i++] vsli.64 d24,d16,#50 vsli.64 d25,d16,#46 + vmov d29,d16 vsli.64 d26,d16,#23 #if 4<16 && defined(__ARMEL__) vrev64.8 d4,d4 #endif - vadd.i64 d27,d28,d19 - veor d29,d17,d18 - veor d24,d25 - vand d29,d16 - veor d24,d26 @ Sigma1(e) - veor d29,d18 @ Ch(e,f,g) - vadd.i64 d27,d24 + veor d25,d24 + vbsl d29,d17,d18 @ Ch(e,f,g) vshr.u64 d24,d20,#28 - vadd.i64 d27,d29 + veor d26,d25 @ Sigma1(e) + vadd.i64 d27,d29,d19 vshr.u64 d25,d20,#34 - vshr.u64 d26,d20,#39 vsli.64 d24,d20,#36 + vadd.i64 d27,d26 + vshr.u64 d26,d20,#39 + vadd.i64 d28,d4 vsli.64 d25,d20,#30 + veor d30,d20,d21 vsli.64 d26,d20,#25 - vadd.i64 d27,d4 - vorr d30,d20,d22 - vand d29,d20,d22 veor d19,d24,d25 - vand d30,d21 + vadd.i64 d27,d28 + vbsl d30,d22,d21 @ Maj(a,b,c) veor d19,d26 @ Sigma0(a) - vorr d30,d29 @ Maj(a,b,c) - vadd.i64 d19,d27 vadd.i64 d23,d27 - vadd.i64 d19,d30 + vadd.i64 d30,d27 + @ vadd.i64 d19,d30 vshr.u64 d24,d23,#14 @ 5 #if 5<16 vld1.64 {d5},[r1]! @ handles unaligned #endif vshr.u64 d25,d23,#18 +#if 5>0 + vadd.i64 d19,d30 @ h+=Maj from the past +#endif vshr.u64 d26,d23,#41 vld1.64 {d28},[r3,:64]! @ K[i++] vsli.64 d24,d23,#50 vsli.64 d25,d23,#46 + vmov d29,d23 vsli.64 d26,d23,#23 #if 5<16 && defined(__ARMEL__) vrev64.8 d5,d5 #endif - vadd.i64 d27,d28,d18 - veor d29,d16,d17 - veor d24,d25 - vand d29,d23 - veor d24,d26 @ Sigma1(e) - veor d29,d17 @ Ch(e,f,g) - vadd.i64 d27,d24 + veor d25,d24 + vbsl d29,d16,d17 @ Ch(e,f,g) vshr.u64 d24,d19,#28 - vadd.i64 d27,d29 + veor d26,d25 @ Sigma1(e) + vadd.i64 d27,d29,d18 vshr.u64 d25,d19,#34 - vshr.u64 d26,d19,#39 vsli.64 d24,d19,#36 + vadd.i64 d27,d26 + vshr.u64 d26,d19,#39 + vadd.i64 d28,d5 vsli.64 d25,d19,#30 + veor d30,d19,d20 vsli.64 d26,d19,#25 - vadd.i64 d27,d5 - vorr d30,d19,d21 - vand d29,d19,d21 veor d18,d24,d25 - vand d30,d20 + vadd.i64 d27,d28 + vbsl d30,d21,d20 @ Maj(a,b,c) veor d18,d26 @ Sigma0(a) - vorr d30,d29 @ Maj(a,b,c) - vadd.i64 d18,d27 vadd.i64 d22,d27 - vadd.i64 d18,d30 + vadd.i64 d30,d27 + @ vadd.i64 d18,d30 vshr.u64 d24,d22,#14 @ 6 #if 6<16 vld1.64 {d6},[r1]! @ handles unaligned #endif vshr.u64 d25,d22,#18 +#if 6>0 + vadd.i64 d18,d30 @ h+=Maj from the past +#endif vshr.u64 d26,d22,#41 vld1.64 {d28},[r3,:64]! @ K[i++] vsli.64 d24,d22,#50 vsli.64 d25,d22,#46 + vmov d29,d22 vsli.64 d26,d22,#23 #if 6<16 && defined(__ARMEL__) vrev64.8 d6,d6 #endif - vadd.i64 d27,d28,d17 - veor d29,d23,d16 - veor d24,d25 - vand d29,d22 - veor d24,d26 @ Sigma1(e) - veor d29,d16 @ Ch(e,f,g) - vadd.i64 d27,d24 + veor d25,d24 + vbsl d29,d23,d16 @ Ch(e,f,g) vshr.u64 d24,d18,#28 - vadd.i64 d27,d29 + veor d26,d25 @ Sigma1(e) + vadd.i64 d27,d29,d17 vshr.u64 d25,d18,#34 - vshr.u64 d26,d18,#39 vsli.64 d24,d18,#36 + vadd.i64 d27,d26 + vshr.u64 d26,d18,#39 + vadd.i64 d28,d6 vsli.64 d25,d18,#30 + veor d30,d18,d19 vsli.64 d26,d18,#25 - vadd.i64 d27,d6 - vorr d30,d18,d20 - vand d29,d18,d20 veor d17,d24,d25 - vand d30,d19 + vadd.i64 d27,d28 + vbsl d30,d20,d19 @ Maj(a,b,c) veor d17,d26 @ Sigma0(a) - vorr d30,d29 @ Maj(a,b,c) - vadd.i64 d17,d27 vadd.i64 d21,d27 - vadd.i64 d17,d30 + vadd.i64 d30,d27 + @ vadd.i64 d17,d30 vshr.u64 d24,d21,#14 @ 7 #if 7<16 vld1.64 {d7},[r1]! @ handles unaligned #endif vshr.u64 d25,d21,#18 +#if 7>0 + vadd.i64 d17,d30 @ h+=Maj from the past +#endif vshr.u64 d26,d21,#41 vld1.64 {d28},[r3,:64]! @ K[i++] vsli.64 d24,d21,#50 vsli.64 d25,d21,#46 + vmov d29,d21 vsli.64 d26,d21,#23 #if 7<16 && defined(__ARMEL__) vrev64.8 d7,d7 #endif - vadd.i64 d27,d28,d16 - veor d29,d22,d23 - veor d24,d25 - vand d29,d21 - veor d24,d26 @ Sigma1(e) - veor d29,d23 @ Ch(e,f,g) - vadd.i64 d27,d24 + veor d25,d24 + vbsl d29,d22,d23 @ Ch(e,f,g) vshr.u64 d24,d17,#28 - vadd.i64 d27,d29 + veor d26,d25 @ Sigma1(e) + vadd.i64 d27,d29,d16 vshr.u64 d25,d17,#34 - vshr.u64 d26,d17,#39 vsli.64 d24,d17,#36 + vadd.i64 d27,d26 + vshr.u64 d26,d17,#39 + vadd.i64 d28,d7 vsli.64 d25,d17,#30 + veor d30,d17,d18 vsli.64 d26,d17,#25 - vadd.i64 d27,d7 - vorr d30,d17,d19 - vand d29,d17,d19 veor d16,d24,d25 - vand d30,d18 + vadd.i64 d27,d28 + vbsl d30,d19,d18 @ Maj(a,b,c) veor d16,d26 @ Sigma0(a) - vorr d30,d29 @ Maj(a,b,c) - vadd.i64 d16,d27 vadd.i64 d20,d27 - vadd.i64 d16,d30 + vadd.i64 d30,d27 + @ vadd.i64 d16,d30 vshr.u64 d24,d20,#14 @ 8 #if 8<16 vld1.64 {d8},[r1]! @ handles unaligned #endif vshr.u64 d25,d20,#18 +#if 8>0 + vadd.i64 d16,d30 @ h+=Maj from the past +#endif vshr.u64 d26,d20,#41 vld1.64 {d28},[r3,:64]! @ K[i++] vsli.64 d24,d20,#50 vsli.64 d25,d20,#46 + vmov d29,d20 vsli.64 d26,d20,#23 #if 8<16 && defined(__ARMEL__) vrev64.8 d8,d8 #endif - vadd.i64 d27,d28,d23 - veor d29,d21,d22 - veor d24,d25 - vand d29,d20 - veor d24,d26 @ Sigma1(e) - veor d29,d22 @ Ch(e,f,g) - vadd.i64 d27,d24 + veor d25,d24 + vbsl d29,d21,d22 @ Ch(e,f,g) vshr.u64 d24,d16,#28 - vadd.i64 d27,d29 + veor d26,d25 @ Sigma1(e) + vadd.i64 d27,d29,d23 vshr.u64 d25,d16,#34 - vshr.u64 d26,d16,#39 vsli.64 d24,d16,#36 + vadd.i64 d27,d26 + vshr.u64 d26,d16,#39 + vadd.i64 d28,d8 vsli.64 d25,d16,#30 + veor d30,d16,d17 vsli.64 d26,d16,#25 - vadd.i64 d27,d8 - vorr d30,d16,d18 - vand d29,d16,d18 veor d23,d24,d25 - vand d30,d17 + vadd.i64 d27,d28 + vbsl d30,d18,d17 @ Maj(a,b,c) veor d23,d26 @ Sigma0(a) - vorr d30,d29 @ Maj(a,b,c) - vadd.i64 d23,d27 vadd.i64 d19,d27 - vadd.i64 d23,d30 + vadd.i64 d30,d27 + @ vadd.i64 d23,d30 vshr.u64 d24,d19,#14 @ 9 #if 9<16 vld1.64 {d9},[r1]! @ handles unaligned #endif vshr.u64 d25,d19,#18 +#if 9>0 + vadd.i64 d23,d30 @ h+=Maj from the past +#endif vshr.u64 d26,d19,#41 vld1.64 {d28},[r3,:64]! @ K[i++] vsli.64 d24,d19,#50 vsli.64 d25,d19,#46 + vmov d29,d19 vsli.64 d26,d19,#23 #if 9<16 && defined(__ARMEL__) vrev64.8 d9,d9 #endif - vadd.i64 d27,d28,d22 - veor d29,d20,d21 - veor d24,d25 - vand d29,d19 - veor d24,d26 @ Sigma1(e) - veor d29,d21 @ Ch(e,f,g) - vadd.i64 d27,d24 + veor d25,d24 + vbsl d29,d20,d21 @ Ch(e,f,g) vshr.u64 d24,d23,#28 - vadd.i64 d27,d29 + veor d26,d25 @ Sigma1(e) + vadd.i64 d27,d29,d22 vshr.u64 d25,d23,#34 - vshr.u64 d26,d23,#39 vsli.64 d24,d23,#36 + vadd.i64 d27,d26 + vshr.u64 d26,d23,#39 + vadd.i64 d28,d9 vsli.64 d25,d23,#30 + veor d30,d23,d16 vsli.64 d26,d23,#25 - vadd.i64 d27,d9 - vorr d30,d23,d17 - vand d29,d23,d17 veor d22,d24,d25 - vand d30,d16 + vadd.i64 d27,d28 + vbsl d30,d17,d16 @ Maj(a,b,c) veor d22,d26 @ Sigma0(a) - vorr d30,d29 @ Maj(a,b,c) - vadd.i64 d22,d27 vadd.i64 d18,d27 - vadd.i64 d22,d30 + vadd.i64 d30,d27 + @ vadd.i64 d22,d30 vshr.u64 d24,d18,#14 @ 10 #if 10<16 vld1.64 {d10},[r1]! @ handles unaligned #endif vshr.u64 d25,d18,#18 +#if 10>0 + vadd.i64 d22,d30 @ h+=Maj from the past +#endif vshr.u64 d26,d18,#41 vld1.64 {d28},[r3,:64]! @ K[i++] vsli.64 d24,d18,#50 vsli.64 d25,d18,#46 + vmov d29,d18 vsli.64 d26,d18,#23 #if 10<16 && defined(__ARMEL__) vrev64.8 d10,d10 #endif - vadd.i64 d27,d28,d21 - veor d29,d19,d20 - veor d24,d25 - vand d29,d18 - veor d24,d26 @ Sigma1(e) - veor d29,d20 @ Ch(e,f,g) - vadd.i64 d27,d24 + veor d25,d24 + vbsl d29,d19,d20 @ Ch(e,f,g) vshr.u64 d24,d22,#28 - vadd.i64 d27,d29 + veor d26,d25 @ Sigma1(e) + vadd.i64 d27,d29,d21 vshr.u64 d25,d22,#34 - vshr.u64 d26,d22,#39 vsli.64 d24,d22,#36 + vadd.i64 d27,d26 + vshr.u64 d26,d22,#39 + vadd.i64 d28,d10 vsli.64 d25,d22,#30 + veor d30,d22,d23 vsli.64 d26,d22,#25 - vadd.i64 d27,d10 - vorr d30,d22,d16 - vand d29,d22,d16 veor d21,d24,d25 - vand d30,d23 + vadd.i64 d27,d28 + vbsl d30,d16,d23 @ Maj(a,b,c) veor d21,d26 @ Sigma0(a) - vorr d30,d29 @ Maj(a,b,c) - vadd.i64 d21,d27 vadd.i64 d17,d27 - vadd.i64 d21,d30 + vadd.i64 d30,d27 + @ vadd.i64 d21,d30 vshr.u64 d24,d17,#14 @ 11 #if 11<16 vld1.64 {d11},[r1]! @ handles unaligned #endif vshr.u64 d25,d17,#18 +#if 11>0 + vadd.i64 d21,d30 @ h+=Maj from the past +#endif vshr.u64 d26,d17,#41 vld1.64 {d28},[r3,:64]! @ K[i++] vsli.64 d24,d17,#50 vsli.64 d25,d17,#46 + vmov d29,d17 vsli.64 d26,d17,#23 #if 11<16 && defined(__ARMEL__) vrev64.8 d11,d11 #endif - vadd.i64 d27,d28,d20 - veor d29,d18,d19 - veor d24,d25 - vand d29,d17 - veor d24,d26 @ Sigma1(e) - veor d29,d19 @ Ch(e,f,g) - vadd.i64 d27,d24 + veor d25,d24 + vbsl d29,d18,d19 @ Ch(e,f,g) vshr.u64 d24,d21,#28 - vadd.i64 d27,d29 + veor d26,d25 @ Sigma1(e) + vadd.i64 d27,d29,d20 vshr.u64 d25,d21,#34 - vshr.u64 d26,d21,#39 vsli.64 d24,d21,#36 + vadd.i64 d27,d26 + vshr.u64 d26,d21,#39 + vadd.i64 d28,d11 vsli.64 d25,d21,#30 + veor d30,d21,d22 vsli.64 d26,d21,#25 - vadd.i64 d27,d11 - vorr d30,d21,d23 - vand d29,d21,d23 veor d20,d24,d25 - vand d30,d22 + vadd.i64 d27,d28 + vbsl d30,d23,d22 @ Maj(a,b,c) veor d20,d26 @ Sigma0(a) - vorr d30,d29 @ Maj(a,b,c) - vadd.i64 d20,d27 vadd.i64 d16,d27 - vadd.i64 d20,d30 + vadd.i64 d30,d27 + @ vadd.i64 d20,d30 vshr.u64 d24,d16,#14 @ 12 #if 12<16 vld1.64 {d12},[r1]! @ handles unaligned #endif vshr.u64 d25,d16,#18 +#if 12>0 + vadd.i64 d20,d30 @ h+=Maj from the past +#endif vshr.u64 d26,d16,#41 vld1.64 {d28},[r3,:64]! @ K[i++] vsli.64 d24,d16,#50 vsli.64 d25,d16,#46 + vmov d29,d16 vsli.64 d26,d16,#23 #if 12<16 && defined(__ARMEL__) vrev64.8 d12,d12 #endif - vadd.i64 d27,d28,d19 - veor d29,d17,d18 - veor d24,d25 - vand d29,d16 - veor d24,d26 @ Sigma1(e) - veor d29,d18 @ Ch(e,f,g) - vadd.i64 d27,d24 + veor d25,d24 + vbsl d29,d17,d18 @ Ch(e,f,g) vshr.u64 d24,d20,#28 - vadd.i64 d27,d29 + veor d26,d25 @ Sigma1(e) + vadd.i64 d27,d29,d19 vshr.u64 d25,d20,#34 - vshr.u64 d26,d20,#39 vsli.64 d24,d20,#36 + vadd.i64 d27,d26 + vshr.u64 d26,d20,#39 + vadd.i64 d28,d12 vsli.64 d25,d20,#30 + veor d30,d20,d21 vsli.64 d26,d20,#25 - vadd.i64 d27,d12 - vorr d30,d20,d22 - vand d29,d20,d22 veor d19,d24,d25 - vand d30,d21 + vadd.i64 d27,d28 + vbsl d30,d22,d21 @ Maj(a,b,c) veor d19,d26 @ Sigma0(a) - vorr d30,d29 @ Maj(a,b,c) - vadd.i64 d19,d27 vadd.i64 d23,d27 - vadd.i64 d19,d30 + vadd.i64 d30,d27 + @ vadd.i64 d19,d30 vshr.u64 d24,d23,#14 @ 13 #if 13<16 vld1.64 {d13},[r1]! @ handles unaligned #endif vshr.u64 d25,d23,#18 +#if 13>0 + vadd.i64 d19,d30 @ h+=Maj from the past +#endif vshr.u64 d26,d23,#41 vld1.64 {d28},[r3,:64]! @ K[i++] vsli.64 d24,d23,#50 vsli.64 d25,d23,#46 + vmov d29,d23 vsli.64 d26,d23,#23 #if 13<16 && defined(__ARMEL__) vrev64.8 d13,d13 #endif - vadd.i64 d27,d28,d18 - veor d29,d16,d17 - veor d24,d25 - vand d29,d23 - veor d24,d26 @ Sigma1(e) - veor d29,d17 @ Ch(e,f,g) - vadd.i64 d27,d24 + veor d25,d24 + vbsl d29,d16,d17 @ Ch(e,f,g) vshr.u64 d24,d19,#28 - vadd.i64 d27,d29 + veor d26,d25 @ Sigma1(e) + vadd.i64 d27,d29,d18 vshr.u64 d25,d19,#34 - vshr.u64 d26,d19,#39 vsli.64 d24,d19,#36 + vadd.i64 d27,d26 + vshr.u64 d26,d19,#39 + vadd.i64 d28,d13 vsli.64 d25,d19,#30 + veor d30,d19,d20 vsli.64 d26,d19,#25 - vadd.i64 d27,d13 - vorr d30,d19,d21 - vand d29,d19,d21 veor d18,d24,d25 - vand d30,d20 + vadd.i64 d27,d28 + vbsl d30,d21,d20 @ Maj(a,b,c) veor d18,d26 @ Sigma0(a) - vorr d30,d29 @ Maj(a,b,c) - vadd.i64 d18,d27 vadd.i64 d22,d27 - vadd.i64 d18,d30 + vadd.i64 d30,d27 + @ vadd.i64 d18,d30 vshr.u64 d24,d22,#14 @ 14 #if 14<16 vld1.64 {d14},[r1]! @ handles unaligned #endif vshr.u64 d25,d22,#18 +#if 14>0 + vadd.i64 d18,d30 @ h+=Maj from the past +#endif vshr.u64 d26,d22,#41 vld1.64 {d28},[r3,:64]! @ K[i++] vsli.64 d24,d22,#50 vsli.64 d25,d22,#46 + vmov d29,d22 vsli.64 d26,d22,#23 #if 14<16 && defined(__ARMEL__) vrev64.8 d14,d14 #endif - vadd.i64 d27,d28,d17 - veor d29,d23,d16 - veor d24,d25 - vand d29,d22 - veor d24,d26 @ Sigma1(e) - veor d29,d16 @ Ch(e,f,g) - vadd.i64 d27,d24 + veor d25,d24 + vbsl d29,d23,d16 @ Ch(e,f,g) vshr.u64 d24,d18,#28 - vadd.i64 d27,d29 + veor d26,d25 @ Sigma1(e) + vadd.i64 d27,d29,d17 vshr.u64 d25,d18,#34 - vshr.u64 d26,d18,#39 vsli.64 d24,d18,#36 + vadd.i64 d27,d26 + vshr.u64 d26,d18,#39 + vadd.i64 d28,d14 vsli.64 d25,d18,#30 + veor d30,d18,d19 vsli.64 d26,d18,#25 - vadd.i64 d27,d14 - vorr d30,d18,d20 - vand d29,d18,d20 veor d17,d24,d25 - vand d30,d19 + vadd.i64 d27,d28 + vbsl d30,d20,d19 @ Maj(a,b,c) veor d17,d26 @ Sigma0(a) - vorr d30,d29 @ Maj(a,b,c) - vadd.i64 d17,d27 vadd.i64 d21,d27 - vadd.i64 d17,d30 + vadd.i64 d30,d27 + @ vadd.i64 d17,d30 vshr.u64 d24,d21,#14 @ 15 #if 15<16 vld1.64 {d15},[r1]! @ handles unaligned #endif vshr.u64 d25,d21,#18 +#if 15>0 + vadd.i64 d17,d30 @ h+=Maj from the past +#endif vshr.u64 d26,d21,#41 vld1.64 {d28},[r3,:64]! @ K[i++] vsli.64 d24,d21,#50 vsli.64 d25,d21,#46 + vmov d29,d21 vsli.64 d26,d21,#23 #if 15<16 && defined(__ARMEL__) vrev64.8 d15,d15 #endif - vadd.i64 d27,d28,d16 - veor d29,d22,d23 - veor d24,d25 - vand d29,d21 - veor d24,d26 @ Sigma1(e) - veor d29,d23 @ Ch(e,f,g) - vadd.i64 d27,d24 + veor d25,d24 + vbsl d29,d22,d23 @ Ch(e,f,g) vshr.u64 d24,d17,#28 - vadd.i64 d27,d29 + veor d26,d25 @ Sigma1(e) + vadd.i64 d27,d29,d16 vshr.u64 d25,d17,#34 - vshr.u64 d26,d17,#39 vsli.64 d24,d17,#36 + vadd.i64 d27,d26 + vshr.u64 d26,d17,#39 + vadd.i64 d28,d15 vsli.64 d25,d17,#30 + veor d30,d17,d18 vsli.64 d26,d17,#25 - vadd.i64 d27,d15 - vorr d30,d17,d19 - vand d29,d17,d19 veor d16,d24,d25 - vand d30,d18 + vadd.i64 d27,d28 + vbsl d30,d19,d18 @ Maj(a,b,c) veor d16,d26 @ Sigma0(a) - vorr d30,d29 @ Maj(a,b,c) - vadd.i64 d16,d27 vadd.i64 d20,d27 - vadd.i64 d16,d30 + vadd.i64 d30,d27 + @ vadd.i64 d16,d30 mov r12,#4 .L16_79_neon: subs r12,#1 vshr.u64 q12,q7,#19 vshr.u64 q13,q7,#61 + vadd.i64 d16,d30 @ h+=Maj from the past vshr.u64 q15,q7,#6 vsli.64 q12,q7,#45 vext.8 q14,q0,q1,#8 @ X[i+1] @@ -1067,73 +1073,71 @@ sha512_block_data_order: vld1.64 {d28},[r3,:64]! @ K[i++] vsli.64 d24,d20,#50 vsli.64 d25,d20,#46 + vmov d29,d20 vsli.64 d26,d20,#23 #if 16<16 && defined(__ARMEL__) vrev64.8 , #endif - vadd.i64 d27,d28,d23 - veor d29,d21,d22 - veor d24,d25 - vand d29,d20 - veor d24,d26 @ Sigma1(e) - veor d29,d22 @ Ch(e,f,g) - vadd.i64 d27,d24 + veor d25,d24 + vbsl d29,d21,d22 @ Ch(e,f,g) vshr.u64 d24,d16,#28 - vadd.i64 d27,d29 + veor d26,d25 @ Sigma1(e) + vadd.i64 d27,d29,d23 vshr.u64 d25,d16,#34 - vshr.u64 d26,d16,#39 vsli.64 d24,d16,#36 + vadd.i64 d27,d26 + vshr.u64 d26,d16,#39 + vadd.i64 d28,d0 vsli.64 d25,d16,#30 + veor d30,d16,d17 vsli.64 d26,d16,#25 - vadd.i64 d27,d0 - vorr d30,d16,d18 - vand d29,d16,d18 veor d23,d24,d25 - vand d30,d17 + vadd.i64 d27,d28 + vbsl d30,d18,d17 @ Maj(a,b,c) veor d23,d26 @ Sigma0(a) - vorr d30,d29 @ Maj(a,b,c) - vadd.i64 d23,d27 vadd.i64 d19,d27 - vadd.i64 d23,d30 + vadd.i64 d30,d27 + @ vadd.i64 d23,d30 vshr.u64 d24,d19,#14 @ 17 #if 17<16 vld1.64 {d1},[r1]! @ handles unaligned #endif vshr.u64 d25,d19,#18 +#if 17>0 + vadd.i64 d23,d30 @ h+=Maj from the past +#endif vshr.u64 d26,d19,#41 vld1.64 {d28},[r3,:64]! @ K[i++] vsli.64 d24,d19,#50 vsli.64 d25,d19,#46 + vmov d29,d19 vsli.64 d26,d19,#23 #if 17<16 && defined(__ARMEL__) vrev64.8 , #endif - vadd.i64 d27,d28,d22 - veor d29,d20,d21 - veor d24,d25 - vand d29,d19 - veor d24,d26 @ Sigma1(e) - veor d29,d21 @ Ch(e,f,g) - vadd.i64 d27,d24 + veor d25,d24 + vbsl d29,d20,d21 @ Ch(e,f,g) vshr.u64 d24,d23,#28 - vadd.i64 d27,d29 + veor d26,d25 @ Sigma1(e) + vadd.i64 d27,d29,d22 vshr.u64 d25,d23,#34 - vshr.u64 d26,d23,#39 vsli.64 d24,d23,#36 + vadd.i64 d27,d26 + vshr.u64 d26,d23,#39 + vadd.i64 d28,d1 vsli.64 d25,d23,#30 + veor d30,d23,d16 vsli.64 d26,d23,#25 - vadd.i64 d27,d1 - vorr d30,d23,d17 - vand d29,d23,d17 veor d22,d24,d25 - vand d30,d16 + vadd.i64 d27,d28 + vbsl d30,d17,d16 @ Maj(a,b,c) veor d22,d26 @ Sigma0(a) - vorr d30,d29 @ Maj(a,b,c) - vadd.i64 d22,d27 vadd.i64 d18,d27 - vadd.i64 d22,d30 + vadd.i64 d30,d27 + @ vadd.i64 d22,d30 vshr.u64 q12,q0,#19 vshr.u64 q13,q0,#61 + vadd.i64 d22,d30 @ h+=Maj from the past vshr.u64 q15,q0,#6 vsli.64 q12,q0,#45 vext.8 q14,q1,q2,#8 @ X[i+1] @@ -1157,73 +1161,71 @@ sha512_block_data_order: vld1.64 {d28},[r3,:64]! @ K[i++] vsli.64 d24,d18,#50 vsli.64 d25,d18,#46 + vmov d29,d18 vsli.64 d26,d18,#23 #if 18<16 && defined(__ARMEL__) vrev64.8 , #endif - vadd.i64 d27,d28,d21 - veor d29,d19,d20 - veor d24,d25 - vand d29,d18 - veor d24,d26 @ Sigma1(e) - veor d29,d20 @ Ch(e,f,g) - vadd.i64 d27,d24 + veor d25,d24 + vbsl d29,d19,d20 @ Ch(e,f,g) vshr.u64 d24,d22,#28 - vadd.i64 d27,d29 + veor d26,d25 @ Sigma1(e) + vadd.i64 d27,d29,d21 vshr.u64 d25,d22,#34 - vshr.u64 d26,d22,#39 vsli.64 d24,d22,#36 + vadd.i64 d27,d26 + vshr.u64 d26,d22,#39 + vadd.i64 d28,d2 vsli.64 d25,d22,#30 + veor d30,d22,d23 vsli.64 d26,d22,#25 - vadd.i64 d27,d2 - vorr d30,d22,d16 - vand d29,d22,d16 veor d21,d24,d25 - vand d30,d23 + vadd.i64 d27,d28 + vbsl d30,d16,d23 @ Maj(a,b,c) veor d21,d26 @ Sigma0(a) - vorr d30,d29 @ Maj(a,b,c) - vadd.i64 d21,d27 vadd.i64 d17,d27 - vadd.i64 d21,d30 + vadd.i64 d30,d27 + @ vadd.i64 d21,d30 vshr.u64 d24,d17,#14 @ 19 #if 19<16 vld1.64 {d3},[r1]! @ handles unaligned #endif vshr.u64 d25,d17,#18 +#if 19>0 + vadd.i64 d21,d30 @ h+=Maj from the past +#endif vshr.u64 d26,d17,#41 vld1.64 {d28},[r3,:64]! @ K[i++] vsli.64 d24,d17,#50 vsli.64 d25,d17,#46 + vmov d29,d17 vsli.64 d26,d17,#23 #if 19<16 && defined(__ARMEL__) vrev64.8 , #endif - vadd.i64 d27,d28,d20 - veor d29,d18,d19 - veor d24,d25 - vand d29,d17 - veor d24,d26 @ Sigma1(e) - veor d29,d19 @ Ch(e,f,g) - vadd.i64 d27,d24 + veor d25,d24 + vbsl d29,d18,d19 @ Ch(e,f,g) vshr.u64 d24,d21,#28 - vadd.i64 d27,d29 + veor d26,d25 @ Sigma1(e) + vadd.i64 d27,d29,d20 vshr.u64 d25,d21,#34 - vshr.u64 d26,d21,#39 vsli.64 d24,d21,#36 + vadd.i64 d27,d26 + vshr.u64 d26,d21,#39 + vadd.i64 d28,d3 vsli.64 d25,d21,#30 + veor d30,d21,d22 vsli.64 d26,d21,#25 - vadd.i64 d27,d3 - vorr d30,d21,d23 - vand d29,d21,d23 veor d20,d24,d25 - vand d30,d22 + vadd.i64 d27,d28 + vbsl d30,d23,d22 @ Maj(a,b,c) veor d20,d26 @ Sigma0(a) - vorr d30,d29 @ Maj(a,b,c) - vadd.i64 d20,d27 vadd.i64 d16,d27 - vadd.i64 d20,d30 + vadd.i64 d30,d27 + @ vadd.i64 d20,d30 vshr.u64 q12,q1,#19 vshr.u64 q13,q1,#61 + vadd.i64 d20,d30 @ h+=Maj from the past vshr.u64 q15,q1,#6 vsli.64 q12,q1,#45 vext.8 q14,q2,q3,#8 @ X[i+1] @@ -1247,73 +1249,71 @@ sha512_block_data_order: vld1.64 {d28},[r3,:64]! @ K[i++] vsli.64 d24,d16,#50 vsli.64 d25,d16,#46 + vmov d29,d16 vsli.64 d26,d16,#23 #if 20<16 && defined(__ARMEL__) vrev64.8 , #endif - vadd.i64 d27,d28,d19 - veor d29,d17,d18 - veor d24,d25 - vand d29,d16 - veor d24,d26 @ Sigma1(e) - veor d29,d18 @ Ch(e,f,g) - vadd.i64 d27,d24 + veor d25,d24 + vbsl d29,d17,d18 @ Ch(e,f,g) vshr.u64 d24,d20,#28 - vadd.i64 d27,d29 + veor d26,d25 @ Sigma1(e) + vadd.i64 d27,d29,d19 vshr.u64 d25,d20,#34 - vshr.u64 d26,d20,#39 vsli.64 d24,d20,#36 + vadd.i64 d27,d26 + vshr.u64 d26,d20,#39 + vadd.i64 d28,d4 vsli.64 d25,d20,#30 + veor d30,d20,d21 vsli.64 d26,d20,#25 - vadd.i64 d27,d4 - vorr d30,d20,d22 - vand d29,d20,d22 veor d19,d24,d25 - vand d30,d21 + vadd.i64 d27,d28 + vbsl d30,d22,d21 @ Maj(a,b,c) veor d19,d26 @ Sigma0(a) - vorr d30,d29 @ Maj(a,b,c) - vadd.i64 d19,d27 vadd.i64 d23,d27 - vadd.i64 d19,d30 + vadd.i64 d30,d27 + @ vadd.i64 d19,d30 vshr.u64 d24,d23,#14 @ 21 #if 21<16 vld1.64 {d5},[r1]! @ handles unaligned #endif vshr.u64 d25,d23,#18 +#if 21>0 + vadd.i64 d19,d30 @ h+=Maj from the past +#endif vshr.u64 d26,d23,#41 vld1.64 {d28},[r3,:64]! @ K[i++] vsli.64 d24,d23,#50 vsli.64 d25,d23,#46 + vmov d29,d23 vsli.64 d26,d23,#23 #if 21<16 && defined(__ARMEL__) vrev64.8 , #endif - vadd.i64 d27,d28,d18 - veor d29,d16,d17 - veor d24,d25 - vand d29,d23 - veor d24,d26 @ Sigma1(e) - veor d29,d17 @ Ch(e,f,g) - vadd.i64 d27,d24 + veor d25,d24 + vbsl d29,d16,d17 @ Ch(e,f,g) vshr.u64 d24,d19,#28 - vadd.i64 d27,d29 + veor d26,d25 @ Sigma1(e) + vadd.i64 d27,d29,d18 vshr.u64 d25,d19,#34 - vshr.u64 d26,d19,#39 vsli.64 d24,d19,#36 + vadd.i64 d27,d26 + vshr.u64 d26,d19,#39 + vadd.i64 d28,d5 vsli.64 d25,d19,#30 + veor d30,d19,d20 vsli.64 d26,d19,#25 - vadd.i64 d27,d5 - vorr d30,d19,d21 - vand d29,d19,d21 veor d18,d24,d25 - vand d30,d20 + vadd.i64 d27,d28 + vbsl d30,d21,d20 @ Maj(a,b,c) veor d18,d26 @ Sigma0(a) - vorr d30,d29 @ Maj(a,b,c) - vadd.i64 d18,d27 vadd.i64 d22,d27 - vadd.i64 d18,d30 + vadd.i64 d30,d27 + @ vadd.i64 d18,d30 vshr.u64 q12,q2,#19 vshr.u64 q13,q2,#61 + vadd.i64 d18,d30 @ h+=Maj from the past vshr.u64 q15,q2,#6 vsli.64 q12,q2,#45 vext.8 q14,q3,q4,#8 @ X[i+1] @@ -1337,73 +1337,71 @@ sha512_block_data_order: vld1.64 {d28},[r3,:64]! @ K[i++] vsli.64 d24,d22,#50 vsli.64 d25,d22,#46 + vmov d29,d22 vsli.64 d26,d22,#23 #if 22<16 && defined(__ARMEL__) vrev64.8 , #endif - vadd.i64 d27,d28,d17 - veor d29,d23,d16 - veor d24,d25 - vand d29,d22 - veor d24,d26 @ Sigma1(e) - veor d29,d16 @ Ch(e,f,g) - vadd.i64 d27,d24 + veor d25,d24 + vbsl d29,d23,d16 @ Ch(e,f,g) vshr.u64 d24,d18,#28 - vadd.i64 d27,d29 + veor d26,d25 @ Sigma1(e) + vadd.i64 d27,d29,d17 vshr.u64 d25,d18,#34 - vshr.u64 d26,d18,#39 vsli.64 d24,d18,#36 + vadd.i64 d27,d26 + vshr.u64 d26,d18,#39 + vadd.i64 d28,d6 vsli.64 d25,d18,#30 + veor d30,d18,d19 vsli.64 d26,d18,#25 - vadd.i64 d27,d6 - vorr d30,d18,d20 - vand d29,d18,d20 veor d17,d24,d25 - vand d30,d19 + vadd.i64 d27,d28 + vbsl d30,d20,d19 @ Maj(a,b,c) veor d17,d26 @ Sigma0(a) - vorr d30,d29 @ Maj(a,b,c) - vadd.i64 d17,d27 vadd.i64 d21,d27 - vadd.i64 d17,d30 + vadd.i64 d30,d27 + @ vadd.i64 d17,d30 vshr.u64 d24,d21,#14 @ 23 #if 23<16 vld1.64 {d7},[r1]! @ handles unaligned #endif vshr.u64 d25,d21,#18 +#if 23>0 + vadd.i64 d17,d30 @ h+=Maj from the past +#endif vshr.u64 d26,d21,#41 vld1.64 {d28},[r3,:64]! @ K[i++] vsli.64 d24,d21,#50 vsli.64 d25,d21,#46 + vmov d29,d21 vsli.64 d26,d21,#23 #if 23<16 && defined(__ARMEL__) vrev64.8 , #endif - vadd.i64 d27,d28,d16 - veor d29,d22,d23 - veor d24,d25 - vand d29,d21 - veor d24,d26 @ Sigma1(e) - veor d29,d23 @ Ch(e,f,g) - vadd.i64 d27,d24 + veor d25,d24 + vbsl d29,d22,d23 @ Ch(e,f,g) vshr.u64 d24,d17,#28 - vadd.i64 d27,d29 + veor d26,d25 @ Sigma1(e) + vadd.i64 d27,d29,d16 vshr.u64 d25,d17,#34 - vshr.u64 d26,d17,#39 vsli.64 d24,d17,#36 + vadd.i64 d27,d26 + vshr.u64 d26,d17,#39 + vadd.i64 d28,d7 vsli.64 d25,d17,#30 + veor d30,d17,d18 vsli.64 d26,d17,#25 - vadd.i64 d27,d7 - vorr d30,d17,d19 - vand d29,d17,d19 veor d16,d24,d25 - vand d30,d18 + vadd.i64 d27,d28 + vbsl d30,d19,d18 @ Maj(a,b,c) veor d16,d26 @ Sigma0(a) - vorr d30,d29 @ Maj(a,b,c) - vadd.i64 d16,d27 vadd.i64 d20,d27 - vadd.i64 d16,d30 + vadd.i64 d30,d27 + @ vadd.i64 d16,d30 vshr.u64 q12,q3,#19 vshr.u64 q13,q3,#61 + vadd.i64 d16,d30 @ h+=Maj from the past vshr.u64 q15,q3,#6 vsli.64 q12,q3,#45 vext.8 q14,q4,q5,#8 @ X[i+1] @@ -1427,73 +1425,71 @@ sha512_block_data_order: vld1.64 {d28},[r3,:64]! @ K[i++] vsli.64 d24,d20,#50 vsli.64 d25,d20,#46 + vmov d29,d20 vsli.64 d26,d20,#23 #if 24<16 && defined(__ARMEL__) vrev64.8 , #endif - vadd.i64 d27,d28,d23 - veor d29,d21,d22 - veor d24,d25 - vand d29,d20 - veor d24,d26 @ Sigma1(e) - veor d29,d22 @ Ch(e,f,g) - vadd.i64 d27,d24 + veor d25,d24 + vbsl d29,d21,d22 @ Ch(e,f,g) vshr.u64 d24,d16,#28 - vadd.i64 d27,d29 + veor d26,d25 @ Sigma1(e) + vadd.i64 d27,d29,d23 vshr.u64 d25,d16,#34 - vshr.u64 d26,d16,#39 vsli.64 d24,d16,#36 + vadd.i64 d27,d26 + vshr.u64 d26,d16,#39 + vadd.i64 d28,d8 vsli.64 d25,d16,#30 + veor d30,d16,d17 vsli.64 d26,d16,#25 - vadd.i64 d27,d8 - vorr d30,d16,d18 - vand d29,d16,d18 veor d23,d24,d25 - vand d30,d17 + vadd.i64 d27,d28 + vbsl d30,d18,d17 @ Maj(a,b,c) veor d23,d26 @ Sigma0(a) - vorr d30,d29 @ Maj(a,b,c) - vadd.i64 d23,d27 vadd.i64 d19,d27 - vadd.i64 d23,d30 + vadd.i64 d30,d27 + @ vadd.i64 d23,d30 vshr.u64 d24,d19,#14 @ 25 #if 25<16 vld1.64 {d9},[r1]! @ handles unaligned #endif vshr.u64 d25,d19,#18 +#if 25>0 + vadd.i64 d23,d30 @ h+=Maj from the past +#endif vshr.u64 d26,d19,#41 vld1.64 {d28},[r3,:64]! @ K[i++] vsli.64 d24,d19,#50 vsli.64 d25,d19,#46 + vmov d29,d19 vsli.64 d26,d19,#23 #if 25<16 && defined(__ARMEL__) vrev64.8 , #endif - vadd.i64 d27,d28,d22 - veor d29,d20,d21 - veor d24,d25 - vand d29,d19 - veor d24,d26 @ Sigma1(e) - veor d29,d21 @ Ch(e,f,g) - vadd.i64 d27,d24 + veor d25,d24 + vbsl d29,d20,d21 @ Ch(e,f,g) vshr.u64 d24,d23,#28 - vadd.i64 d27,d29 + veor d26,d25 @ Sigma1(e) + vadd.i64 d27,d29,d22 vshr.u64 d25,d23,#34 - vshr.u64 d26,d23,#39 vsli.64 d24,d23,#36 + vadd.i64 d27,d26 + vshr.u64 d26,d23,#39 + vadd.i64 d28,d9 vsli.64 d25,d23,#30 + veor d30,d23,d16 vsli.64 d26,d23,#25 - vadd.i64 d27,d9 - vorr d30,d23,d17 - vand d29,d23,d17 veor d22,d24,d25 - vand d30,d16 + vadd.i64 d27,d28 + vbsl d30,d17,d16 @ Maj(a,b,c) veor d22,d26 @ Sigma0(a) - vorr d30,d29 @ Maj(a,b,c) - vadd.i64 d22,d27 vadd.i64 d18,d27 - vadd.i64 d22,d30 + vadd.i64 d30,d27 + @ vadd.i64 d22,d30 vshr.u64 q12,q4,#19 vshr.u64 q13,q4,#61 + vadd.i64 d22,d30 @ h+=Maj from the past vshr.u64 q15,q4,#6 vsli.64 q12,q4,#45 vext.8 q14,q5,q6,#8 @ X[i+1] @@ -1517,73 +1513,71 @@ sha512_block_data_order: vld1.64 {d28},[r3,:64]! @ K[i++] vsli.64 d24,d18,#50 vsli.64 d25,d18,#46 + vmov d29,d18 vsli.64 d26,d18,#23 #if 26<16 && defined(__ARMEL__) vrev64.8 , #endif - vadd.i64 d27,d28,d21 - veor d29,d19,d20 - veor d24,d25 - vand d29,d18 - veor d24,d26 @ Sigma1(e) - veor d29,d20 @ Ch(e,f,g) - vadd.i64 d27,d24 + veor d25,d24 + vbsl d29,d19,d20 @ Ch(e,f,g) vshr.u64 d24,d22,#28 - vadd.i64 d27,d29 + veor d26,d25 @ Sigma1(e) + vadd.i64 d27,d29,d21 vshr.u64 d25,d22,#34 - vshr.u64 d26,d22,#39 vsli.64 d24,d22,#36 + vadd.i64 d27,d26 + vshr.u64 d26,d22,#39 + vadd.i64 d28,d10 vsli.64 d25,d22,#30 + veor d30,d22,d23 vsli.64 d26,d22,#25 - vadd.i64 d27,d10 - vorr d30,d22,d16 - vand d29,d22,d16 veor d21,d24,d25 - vand d30,d23 + vadd.i64 d27,d28 + vbsl d30,d16,d23 @ Maj(a,b,c) veor d21,d26 @ Sigma0(a) - vorr d30,d29 @ Maj(a,b,c) - vadd.i64 d21,d27 vadd.i64 d17,d27 - vadd.i64 d21,d30 + vadd.i64 d30,d27 + @ vadd.i64 d21,d30 vshr.u64 d24,d17,#14 @ 27 #if 27<16 vld1.64 {d11},[r1]! @ handles unaligned #endif vshr.u64 d25,d17,#18 +#if 27>0 + vadd.i64 d21,d30 @ h+=Maj from the past +#endif vshr.u64 d26,d17,#41 vld1.64 {d28},[r3,:64]! @ K[i++] vsli.64 d24,d17,#50 vsli.64 d25,d17,#46 + vmov d29,d17 vsli.64 d26,d17,#23 #if 27<16 && defined(__ARMEL__) vrev64.8 , #endif - vadd.i64 d27,d28,d20 - veor d29,d18,d19 - veor d24,d25 - vand d29,d17 - veor d24,d26 @ Sigma1(e) - veor d29,d19 @ Ch(e,f,g) - vadd.i64 d27,d24 + veor d25,d24 + vbsl d29,d18,d19 @ Ch(e,f,g) vshr.u64 d24,d21,#28 - vadd.i64 d27,d29 + veor d26,d25 @ Sigma1(e) + vadd.i64 d27,d29,d20 vshr.u64 d25,d21,#34 - vshr.u64 d26,d21,#39 vsli.64 d24,d21,#36 + vadd.i64 d27,d26 + vshr.u64 d26,d21,#39 + vadd.i64 d28,d11 vsli.64 d25,d21,#30 + veor d30,d21,d22 vsli.64 d26,d21,#25 - vadd.i64 d27,d11 - vorr d30,d21,d23 - vand d29,d21,d23 veor d20,d24,d25 - vand d30,d22 + vadd.i64 d27,d28 + vbsl d30,d23,d22 @ Maj(a,b,c) veor d20,d26 @ Sigma0(a) - vorr d30,d29 @ Maj(a,b,c) - vadd.i64 d20,d27 vadd.i64 d16,d27 - vadd.i64 d20,d30 + vadd.i64 d30,d27 + @ vadd.i64 d20,d30 vshr.u64 q12,q5,#19 vshr.u64 q13,q5,#61 + vadd.i64 d20,d30 @ h+=Maj from the past vshr.u64 q15,q5,#6 vsli.64 q12,q5,#45 vext.8 q14,q6,q7,#8 @ X[i+1] @@ -1607,73 +1601,71 @@ sha512_block_data_order: vld1.64 {d28},[r3,:64]! @ K[i++] vsli.64 d24,d16,#50 vsli.64 d25,d16,#46 + vmov d29,d16 vsli.64 d26,d16,#23 #if 28<16 && defined(__ARMEL__) vrev64.8 , #endif - vadd.i64 d27,d28,d19 - veor d29,d17,d18 - veor d24,d25 - vand d29,d16 - veor d24,d26 @ Sigma1(e) - veor d29,d18 @ Ch(e,f,g) - vadd.i64 d27,d24 + veor d25,d24 + vbsl d29,d17,d18 @ Ch(e,f,g) vshr.u64 d24,d20,#28 - vadd.i64 d27,d29 + veor d26,d25 @ Sigma1(e) + vadd.i64 d27,d29,d19 vshr.u64 d25,d20,#34 - vshr.u64 d26,d20,#39 vsli.64 d24,d20,#36 + vadd.i64 d27,d26 + vshr.u64 d26,d20,#39 + vadd.i64 d28,d12 vsli.64 d25,d20,#30 + veor d30,d20,d21 vsli.64 d26,d20,#25 - vadd.i64 d27,d12 - vorr d30,d20,d22 - vand d29,d20,d22 veor d19,d24,d25 - vand d30,d21 + vadd.i64 d27,d28 + vbsl d30,d22,d21 @ Maj(a,b,c) veor d19,d26 @ Sigma0(a) - vorr d30,d29 @ Maj(a,b,c) - vadd.i64 d19,d27 vadd.i64 d23,d27 - vadd.i64 d19,d30 + vadd.i64 d30,d27 + @ vadd.i64 d19,d30 vshr.u64 d24,d23,#14 @ 29 #if 29<16 vld1.64 {d13},[r1]! @ handles unaligned #endif vshr.u64 d25,d23,#18 +#if 29>0 + vadd.i64 d19,d30 @ h+=Maj from the past +#endif vshr.u64 d26,d23,#41 vld1.64 {d28},[r3,:64]! @ K[i++] vsli.64 d24,d23,#50 vsli.64 d25,d23,#46 + vmov d29,d23 vsli.64 d26,d23,#23 #if 29<16 && defined(__ARMEL__) vrev64.8 , #endif - vadd.i64 d27,d28,d18 - veor d29,d16,d17 - veor d24,d25 - vand d29,d23 - veor d24,d26 @ Sigma1(e) - veor d29,d17 @ Ch(e,f,g) - vadd.i64 d27,d24 + veor d25,d24 + vbsl d29,d16,d17 @ Ch(e,f,g) vshr.u64 d24,d19,#28 - vadd.i64 d27,d29 + veor d26,d25 @ Sigma1(e) + vadd.i64 d27,d29,d18 vshr.u64 d25,d19,#34 - vshr.u64 d26,d19,#39 vsli.64 d24,d19,#36 + vadd.i64 d27,d26 + vshr.u64 d26,d19,#39 + vadd.i64 d28,d13 vsli.64 d25,d19,#30 + veor d30,d19,d20 vsli.64 d26,d19,#25 - vadd.i64 d27,d13 - vorr d30,d19,d21 - vand d29,d19,d21 veor d18,d24,d25 - vand d30,d20 + vadd.i64 d27,d28 + vbsl d30,d21,d20 @ Maj(a,b,c) veor d18,d26 @ Sigma0(a) - vorr d30,d29 @ Maj(a,b,c) - vadd.i64 d18,d27 vadd.i64 d22,d27 - vadd.i64 d18,d30 + vadd.i64 d30,d27 + @ vadd.i64 d18,d30 vshr.u64 q12,q6,#19 vshr.u64 q13,q6,#61 + vadd.i64 d18,d30 @ h+=Maj from the past vshr.u64 q15,q6,#6 vsli.64 q12,q6,#45 vext.8 q14,q7,q0,#8 @ X[i+1] @@ -1697,73 +1689,71 @@ sha512_block_data_order: vld1.64 {d28},[r3,:64]! @ K[i++] vsli.64 d24,d22,#50 vsli.64 d25,d22,#46 + vmov d29,d22 vsli.64 d26,d22,#23 #if 30<16 && defined(__ARMEL__) vrev64.8 , #endif - vadd.i64 d27,d28,d17 - veor d29,d23,d16 - veor d24,d25 - vand d29,d22 - veor d24,d26 @ Sigma1(e) - veor d29,d16 @ Ch(e,f,g) - vadd.i64 d27,d24 + veor d25,d24 + vbsl d29,d23,d16 @ Ch(e,f,g) vshr.u64 d24,d18,#28 - vadd.i64 d27,d29 + veor d26,d25 @ Sigma1(e) + vadd.i64 d27,d29,d17 vshr.u64 d25,d18,#34 - vshr.u64 d26,d18,#39 vsli.64 d24,d18,#36 + vadd.i64 d27,d26 + vshr.u64 d26,d18,#39 + vadd.i64 d28,d14 vsli.64 d25,d18,#30 + veor d30,d18,d19 vsli.64 d26,d18,#25 - vadd.i64 d27,d14 - vorr d30,d18,d20 - vand d29,d18,d20 veor d17,d24,d25 - vand d30,d19 + vadd.i64 d27,d28 + vbsl d30,d20,d19 @ Maj(a,b,c) veor d17,d26 @ Sigma0(a) - vorr d30,d29 @ Maj(a,b,c) - vadd.i64 d17,d27 vadd.i64 d21,d27 - vadd.i64 d17,d30 + vadd.i64 d30,d27 + @ vadd.i64 d17,d30 vshr.u64 d24,d21,#14 @ 31 #if 31<16 vld1.64 {d15},[r1]! @ handles unaligned #endif vshr.u64 d25,d21,#18 +#if 31>0 + vadd.i64 d17,d30 @ h+=Maj from the past +#endif vshr.u64 d26,d21,#41 vld1.64 {d28},[r3,:64]! @ K[i++] vsli.64 d24,d21,#50 vsli.64 d25,d21,#46 + vmov d29,d21 vsli.64 d26,d21,#23 #if 31<16 && defined(__ARMEL__) vrev64.8 , #endif - vadd.i64 d27,d28,d16 - veor d29,d22,d23 - veor d24,d25 - vand d29,d21 - veor d24,d26 @ Sigma1(e) - veor d29,d23 @ Ch(e,f,g) - vadd.i64 d27,d24 + veor d25,d24 + vbsl d29,d22,d23 @ Ch(e,f,g) vshr.u64 d24,d17,#28 - vadd.i64 d27,d29 + veor d26,d25 @ Sigma1(e) + vadd.i64 d27,d29,d16 vshr.u64 d25,d17,#34 - vshr.u64 d26,d17,#39 vsli.64 d24,d17,#36 + vadd.i64 d27,d26 + vshr.u64 d26,d17,#39 + vadd.i64 d28,d15 vsli.64 d25,d17,#30 + veor d30,d17,d18 vsli.64 d26,d17,#25 - vadd.i64 d27,d15 - vorr d30,d17,d19 - vand d29,d17,d19 veor d16,d24,d25 - vand d30,d18 + vadd.i64 d27,d28 + vbsl d30,d19,d18 @ Maj(a,b,c) veor d16,d26 @ Sigma0(a) - vorr d30,d29 @ Maj(a,b,c) - vadd.i64 d16,d27 vadd.i64 d20,d27 - vadd.i64 d16,d30 + vadd.i64 d30,d27 + @ vadd.i64 d16,d30 bne .L16_79_neon + vadd.i64 d16,d30 @ h+=Maj from the past vldmia r0,{d24-d31} @ load context to temp vadd.i64 q8,q12 @ vectorized accumulate vadd.i64 q9,q13 @@ -1775,9 +1765,11 @@ sha512_block_data_order: bne .Loop_neon vldmia sp!,{d8-d15} @ epilogue - .word 0xe12fff1e + bx lr @ .word 0xe12fff1e #endif .size sha512_block_data_order,.-sha512_block_data_order .asciz "SHA512 block transform for ARMv4/NEON, CRYPTOGAMS by " .align 2 +#if __ARM_MAX_ARCH__>=7 .comm OPENSSL_armcap_P,4,4 +#endif diff --git a/deps/openssl/asm/arm64-linux64-gas/aes/aesv8-armx.S b/deps/openssl/asm/arm64-linux64-gas/aes/aesv8-armx.S new file mode 100644 index 00000000000000..0a4b1ac4c40082 --- /dev/null +++ b/deps/openssl/asm/arm64-linux64-gas/aes/aesv8-armx.S @@ -0,0 +1,725 @@ +#include "arm_arch.h" + +#if __ARM_MAX_ARCH__>=7 +.text +.arch armv8-a+crypto +.align 5 +rcon: +.long 0x01,0x01,0x01,0x01 +.long 0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d // rotate-n-splat +.long 0x1b,0x1b,0x1b,0x1b + +.globl aes_v8_set_encrypt_key +.type aes_v8_set_encrypt_key,%function +.align 5 +aes_v8_set_encrypt_key: +.Lenc_key: + stp x29,x30,[sp,#-16]! + add x29,sp,#0 + mov x3,#-1 + cmp x0,#0 + b.eq .Lenc_key_abort + cmp x2,#0 + b.eq .Lenc_key_abort + mov x3,#-2 + cmp w1,#128 + b.lt .Lenc_key_abort + cmp w1,#256 + b.gt .Lenc_key_abort + tst w1,#0x3f + b.ne .Lenc_key_abort + + adr x3,rcon + cmp w1,#192 + + eor v0.16b,v0.16b,v0.16b + ld1 {v3.16b},[x0],#16 + mov w1,#8 // reuse w1 + ld1 {v1.4s,v2.4s},[x3],#32 + + b.lt .Loop128 + b.eq .L192 + b .L256 + +.align 4 +.Loop128: + tbl v6.16b,{v3.16b},v2.16b + ext v5.16b,v0.16b,v3.16b,#12 + st1 {v3.4s},[x2],#16 + aese v6.16b,v0.16b + subs w1,w1,#1 + + eor v3.16b,v3.16b,v5.16b + ext v5.16b,v0.16b,v5.16b,#12 + eor v3.16b,v3.16b,v5.16b + ext v5.16b,v0.16b,v5.16b,#12 + eor v6.16b,v6.16b,v1.16b + eor v3.16b,v3.16b,v5.16b + shl v1.16b,v1.16b,#1 + eor v3.16b,v3.16b,v6.16b + b.ne .Loop128 + + ld1 {v1.4s},[x3] + + tbl v6.16b,{v3.16b},v2.16b + ext v5.16b,v0.16b,v3.16b,#12 + st1 {v3.4s},[x2],#16 + aese v6.16b,v0.16b + + eor v3.16b,v3.16b,v5.16b + ext v5.16b,v0.16b,v5.16b,#12 + eor v3.16b,v3.16b,v5.16b + ext v5.16b,v0.16b,v5.16b,#12 + eor v6.16b,v6.16b,v1.16b + eor v3.16b,v3.16b,v5.16b + shl v1.16b,v1.16b,#1 + eor v3.16b,v3.16b,v6.16b + + tbl v6.16b,{v3.16b},v2.16b + ext v5.16b,v0.16b,v3.16b,#12 + st1 {v3.4s},[x2],#16 + aese v6.16b,v0.16b + + eor v3.16b,v3.16b,v5.16b + ext v5.16b,v0.16b,v5.16b,#12 + eor v3.16b,v3.16b,v5.16b + ext v5.16b,v0.16b,v5.16b,#12 + eor v6.16b,v6.16b,v1.16b + eor v3.16b,v3.16b,v5.16b + eor v3.16b,v3.16b,v6.16b + st1 {v3.4s},[x2] + add x2,x2,#0x50 + + mov w12,#10 + b .Ldone + +.align 4 +.L192: + ld1 {v4.8b},[x0],#8 + movi v6.16b,#8 // borrow v6.16b + st1 {v3.4s},[x2],#16 + sub v2.16b,v2.16b,v6.16b // adjust the mask + +.Loop192: + tbl v6.16b,{v4.16b},v2.16b + ext v5.16b,v0.16b,v3.16b,#12 + st1 {v4.8b},[x2],#8 + aese v6.16b,v0.16b + subs w1,w1,#1 + + eor v3.16b,v3.16b,v5.16b + ext v5.16b,v0.16b,v5.16b,#12 + eor v3.16b,v3.16b,v5.16b + ext v5.16b,v0.16b,v5.16b,#12 + eor v3.16b,v3.16b,v5.16b + + dup v5.4s,v3.s[3] + eor v5.16b,v5.16b,v4.16b + eor v6.16b,v6.16b,v1.16b + ext v4.16b,v0.16b,v4.16b,#12 + shl v1.16b,v1.16b,#1 + eor v4.16b,v4.16b,v5.16b + eor v3.16b,v3.16b,v6.16b + eor v4.16b,v4.16b,v6.16b + st1 {v3.4s},[x2],#16 + b.ne .Loop192 + + mov w12,#12 + add x2,x2,#0x20 + b .Ldone + +.align 4 +.L256: + ld1 {v4.16b},[x0] + mov w1,#7 + mov w12,#14 + st1 {v3.4s},[x2],#16 + +.Loop256: + tbl v6.16b,{v4.16b},v2.16b + ext v5.16b,v0.16b,v3.16b,#12 + st1 {v4.4s},[x2],#16 + aese v6.16b,v0.16b + subs w1,w1,#1 + + eor v3.16b,v3.16b,v5.16b + ext v5.16b,v0.16b,v5.16b,#12 + eor v3.16b,v3.16b,v5.16b + ext v5.16b,v0.16b,v5.16b,#12 + eor v6.16b,v6.16b,v1.16b + eor v3.16b,v3.16b,v5.16b + shl v1.16b,v1.16b,#1 + eor v3.16b,v3.16b,v6.16b + st1 {v3.4s},[x2],#16 + b.eq .Ldone + + dup v6.4s,v3.s[3] // just splat + ext v5.16b,v0.16b,v4.16b,#12 + aese v6.16b,v0.16b + + eor v4.16b,v4.16b,v5.16b + ext v5.16b,v0.16b,v5.16b,#12 + eor v4.16b,v4.16b,v5.16b + ext v5.16b,v0.16b,v5.16b,#12 + eor v4.16b,v4.16b,v5.16b + + eor v4.16b,v4.16b,v6.16b + b .Loop256 + +.Ldone: + str w12,[x2] + mov x3,#0 + +.Lenc_key_abort: + mov x0,x3 // return value + ldr x29,[sp],#16 + ret +.size aes_v8_set_encrypt_key,.-aes_v8_set_encrypt_key + +.globl aes_v8_set_decrypt_key +.type aes_v8_set_decrypt_key,%function +.align 5 +aes_v8_set_decrypt_key: + stp x29,x30,[sp,#-16]! + add x29,sp,#0 + bl .Lenc_key + + cmp x0,#0 + b.ne .Ldec_key_abort + + sub x2,x2,#240 // restore original x2 + mov x4,#-16 + add x0,x2,x12,lsl#4 // end of key schedule + + ld1 {v0.4s},[x2] + ld1 {v1.4s},[x0] + st1 {v0.4s},[x0],x4 + st1 {v1.4s},[x2],#16 + +.Loop_imc: + ld1 {v0.4s},[x2] + ld1 {v1.4s},[x0] + aesimc v0.16b,v0.16b + aesimc v1.16b,v1.16b + st1 {v0.4s},[x0],x4 + st1 {v1.4s},[x2],#16 + cmp x0,x2 + b.hi .Loop_imc + + ld1 {v0.4s},[x2] + aesimc v0.16b,v0.16b + st1 {v0.4s},[x0] + + eor x0,x0,x0 // return value +.Ldec_key_abort: + ldp x29,x30,[sp],#16 + ret +.size aes_v8_set_decrypt_key,.-aes_v8_set_decrypt_key +.globl aes_v8_encrypt +.type aes_v8_encrypt,%function +.align 5 +aes_v8_encrypt: + ldr w3,[x2,#240] + ld1 {v0.4s},[x2],#16 + ld1 {v2.16b},[x0] + sub w3,w3,#2 + ld1 {v1.4s},[x2],#16 + +.Loop_enc: + aese v2.16b,v0.16b + ld1 {v0.4s},[x2],#16 + aesmc v2.16b,v2.16b + subs w3,w3,#2 + aese v2.16b,v1.16b + ld1 {v1.4s},[x2],#16 + aesmc v2.16b,v2.16b + b.gt .Loop_enc + + aese v2.16b,v0.16b + ld1 {v0.4s},[x2] + aesmc v2.16b,v2.16b + aese v2.16b,v1.16b + eor v2.16b,v2.16b,v0.16b + + st1 {v2.16b},[x1] + ret +.size aes_v8_encrypt,.-aes_v8_encrypt +.globl aes_v8_decrypt +.type aes_v8_decrypt,%function +.align 5 +aes_v8_decrypt: + ldr w3,[x2,#240] + ld1 {v0.4s},[x2],#16 + ld1 {v2.16b},[x0] + sub w3,w3,#2 + ld1 {v1.4s},[x2],#16 + +.Loop_dec: + aesd v2.16b,v0.16b + ld1 {v0.4s},[x2],#16 + aesimc v2.16b,v2.16b + subs w3,w3,#2 + aesd v2.16b,v1.16b + ld1 {v1.4s},[x2],#16 + aesimc v2.16b,v2.16b + b.gt .Loop_dec + + aesd v2.16b,v0.16b + ld1 {v0.4s},[x2] + aesimc v2.16b,v2.16b + aesd v2.16b,v1.16b + eor v2.16b,v2.16b,v0.16b + + st1 {v2.16b},[x1] + ret +.size aes_v8_decrypt,.-aes_v8_decrypt +.globl aes_v8_cbc_encrypt +.type aes_v8_cbc_encrypt,%function +.align 5 +aes_v8_cbc_encrypt: + stp x29,x30,[sp,#-16]! + add x29,sp,#0 + subs x2,x2,#16 + mov x8,#16 + b.lo .Lcbc_abort + csel x8,xzr,x8,eq + + cmp w5,#0 // en- or decrypting? + ldr w5,[x3,#240] + and x2,x2,#-16 + ld1 {v6.16b},[x4] + ld1 {v0.16b},[x0],x8 + + ld1 {v16.4s-v17.4s},[x3] // load key schedule... + sub w5,w5,#6 + add x7,x3,x5,lsl#4 // pointer to last 7 round keys + sub w5,w5,#2 + ld1 {v18.4s-v19.4s},[x7],#32 + ld1 {v20.4s-v21.4s},[x7],#32 + ld1 {v22.4s-v23.4s},[x7],#32 + ld1 {v7.4s},[x7] + + add x7,x3,#32 + mov w6,w5 + b.eq .Lcbc_dec + + cmp w5,#2 + eor v0.16b,v0.16b,v6.16b + eor v5.16b,v16.16b,v7.16b + b.eq .Lcbc_enc128 + +.Loop_cbc_enc: + aese v0.16b,v16.16b + ld1 {v16.4s},[x7],#16 + aesmc v0.16b,v0.16b + subs w6,w6,#2 + aese v0.16b,v17.16b + ld1 {v17.4s},[x7],#16 + aesmc v0.16b,v0.16b + b.gt .Loop_cbc_enc + + aese v0.16b,v16.16b + aesmc v0.16b,v0.16b + subs x2,x2,#16 + aese v0.16b,v17.16b + aesmc v0.16b,v0.16b + csel x8,xzr,x8,eq + aese v0.16b,v18.16b + aesmc v0.16b,v0.16b + add x7,x3,#16 + aese v0.16b,v19.16b + aesmc v0.16b,v0.16b + ld1 {v16.16b},[x0],x8 + aese v0.16b,v20.16b + aesmc v0.16b,v0.16b + eor v16.16b,v16.16b,v5.16b + aese v0.16b,v21.16b + aesmc v0.16b,v0.16b + ld1 {v17.4s},[x7],#16 // re-pre-load rndkey[1] + aese v0.16b,v22.16b + aesmc v0.16b,v0.16b + aese v0.16b,v23.16b + + mov w6,w5 + eor v6.16b,v0.16b,v7.16b + st1 {v6.16b},[x1],#16 + b.hs .Loop_cbc_enc + + b .Lcbc_done + +.align 5 +.Lcbc_enc128: + ld1 {v2.4s-v3.4s},[x7] + aese v0.16b,v16.16b + aesmc v0.16b,v0.16b + b .Lenter_cbc_enc128 +.Loop_cbc_enc128: + aese v0.16b,v16.16b + aesmc v0.16b,v0.16b + st1 {v6.16b},[x1],#16 +.Lenter_cbc_enc128: + aese v0.16b,v17.16b + aesmc v0.16b,v0.16b + subs x2,x2,#16 + aese v0.16b,v2.16b + aesmc v0.16b,v0.16b + csel x8,xzr,x8,eq + aese v0.16b,v3.16b + aesmc v0.16b,v0.16b + aese v0.16b,v18.16b + aesmc v0.16b,v0.16b + aese v0.16b,v19.16b + aesmc v0.16b,v0.16b + ld1 {v16.16b},[x0],x8 + aese v0.16b,v20.16b + aesmc v0.16b,v0.16b + aese v0.16b,v21.16b + aesmc v0.16b,v0.16b + aese v0.16b,v22.16b + aesmc v0.16b,v0.16b + eor v16.16b,v16.16b,v5.16b + aese v0.16b,v23.16b + eor v6.16b,v0.16b,v7.16b + b.hs .Loop_cbc_enc128 + + st1 {v6.16b},[x1],#16 + b .Lcbc_done +.align 5 +.Lcbc_dec: + ld1 {v18.16b},[x0],#16 + subs x2,x2,#32 // bias + add w6,w5,#2 + orr v3.16b,v0.16b,v0.16b + orr v1.16b,v0.16b,v0.16b + orr v19.16b,v18.16b,v18.16b + b.lo .Lcbc_dec_tail + + orr v1.16b,v18.16b,v18.16b + ld1 {v18.16b},[x0],#16 + orr v2.16b,v0.16b,v0.16b + orr v3.16b,v1.16b,v1.16b + orr v19.16b,v18.16b,v18.16b + +.Loop3x_cbc_dec: + aesd v0.16b,v16.16b + aesd v1.16b,v16.16b + aesd v18.16b,v16.16b + ld1 {v16.4s},[x7],#16 + aesimc v0.16b,v0.16b + aesimc v1.16b,v1.16b + aesimc v18.16b,v18.16b + subs w6,w6,#2 + aesd v0.16b,v17.16b + aesd v1.16b,v17.16b + aesd v18.16b,v17.16b + ld1 {v17.4s},[x7],#16 + aesimc v0.16b,v0.16b + aesimc v1.16b,v1.16b + aesimc v18.16b,v18.16b + b.gt .Loop3x_cbc_dec + + aesd v0.16b,v16.16b + aesd v1.16b,v16.16b + aesd v18.16b,v16.16b + eor v4.16b,v6.16b,v7.16b + aesimc v0.16b,v0.16b + aesimc v1.16b,v1.16b + aesimc v18.16b,v18.16b + eor v5.16b,v2.16b,v7.16b + aesd v0.16b,v17.16b + aesd v1.16b,v17.16b + aesd v18.16b,v17.16b + eor v17.16b,v3.16b,v7.16b + subs x2,x2,#0x30 + aesimc v0.16b,v0.16b + aesimc v1.16b,v1.16b + aesimc v18.16b,v18.16b + orr v6.16b,v19.16b,v19.16b + csel x6,x2,x6,lo // x6, w6, is zero at this point + aesd v0.16b,v20.16b + aesd v1.16b,v20.16b + aesd v18.16b,v20.16b + add x0,x0,x6 // x0 is adjusted in such way that + // at exit from the loop v1.16b-v18.16b + // are loaded with last "words" + aesimc v0.16b,v0.16b + aesimc v1.16b,v1.16b + aesimc v18.16b,v18.16b + mov x7,x3 + aesd v0.16b,v21.16b + aesd v1.16b,v21.16b + aesd v18.16b,v21.16b + ld1 {v2.16b},[x0],#16 + aesimc v0.16b,v0.16b + aesimc v1.16b,v1.16b + aesimc v18.16b,v18.16b + ld1 {v3.16b},[x0],#16 + aesd v0.16b,v22.16b + aesd v1.16b,v22.16b + aesd v18.16b,v22.16b + ld1 {v19.16b},[x0],#16 + aesimc v0.16b,v0.16b + aesimc v1.16b,v1.16b + aesimc v18.16b,v18.16b + ld1 {v16.4s},[x7],#16 // re-pre-load rndkey[0] + aesd v0.16b,v23.16b + aesd v1.16b,v23.16b + aesd v18.16b,v23.16b + + add w6,w5,#2 + eor v4.16b,v4.16b,v0.16b + eor v5.16b,v5.16b,v1.16b + eor v18.16b,v18.16b,v17.16b + ld1 {v17.4s},[x7],#16 // re-pre-load rndkey[1] + orr v0.16b,v2.16b,v2.16b + st1 {v4.16b},[x1],#16 + orr v1.16b,v3.16b,v3.16b + st1 {v5.16b},[x1],#16 + st1 {v18.16b},[x1],#16 + orr v18.16b,v19.16b,v19.16b + b.hs .Loop3x_cbc_dec + + cmn x2,#0x30 + b.eq .Lcbc_done + nop + +.Lcbc_dec_tail: + aesd v1.16b,v16.16b + aesd v18.16b,v16.16b + ld1 {v16.4s},[x7],#16 + aesimc v1.16b,v1.16b + aesimc v18.16b,v18.16b + subs w6,w6,#2 + aesd v1.16b,v17.16b + aesd v18.16b,v17.16b + ld1 {v17.4s},[x7],#16 + aesimc v1.16b,v1.16b + aesimc v18.16b,v18.16b + b.gt .Lcbc_dec_tail + + aesd v1.16b,v16.16b + aesd v18.16b,v16.16b + aesimc v1.16b,v1.16b + aesimc v18.16b,v18.16b + aesd v1.16b,v17.16b + aesd v18.16b,v17.16b + aesimc v1.16b,v1.16b + aesimc v18.16b,v18.16b + aesd v1.16b,v20.16b + aesd v18.16b,v20.16b + aesimc v1.16b,v1.16b + aesimc v18.16b,v18.16b + cmn x2,#0x20 + aesd v1.16b,v21.16b + aesd v18.16b,v21.16b + aesimc v1.16b,v1.16b + aesimc v18.16b,v18.16b + eor v5.16b,v6.16b,v7.16b + aesd v1.16b,v22.16b + aesd v18.16b,v22.16b + aesimc v1.16b,v1.16b + aesimc v18.16b,v18.16b + eor v17.16b,v3.16b,v7.16b + aesd v1.16b,v23.16b + aesd v18.16b,v23.16b + b.eq .Lcbc_dec_one + eor v5.16b,v5.16b,v1.16b + eor v17.16b,v17.16b,v18.16b + orr v6.16b,v19.16b,v19.16b + st1 {v5.16b},[x1],#16 + st1 {v17.16b},[x1],#16 + b .Lcbc_done + +.Lcbc_dec_one: + eor v5.16b,v5.16b,v18.16b + orr v6.16b,v19.16b,v19.16b + st1 {v5.16b},[x1],#16 + +.Lcbc_done: + st1 {v6.16b},[x4] +.Lcbc_abort: + ldr x29,[sp],#16 + ret +.size aes_v8_cbc_encrypt,.-aes_v8_cbc_encrypt +.globl aes_v8_ctr32_encrypt_blocks +.type aes_v8_ctr32_encrypt_blocks,%function +.align 5 +aes_v8_ctr32_encrypt_blocks: + stp x29,x30,[sp,#-16]! + add x29,sp,#0 + ldr w5,[x3,#240] + + ldr w8, [x4, #12] + ld1 {v0.4s},[x4] + + ld1 {v16.4s-v17.4s},[x3] // load key schedule... + sub w5,w5,#4 + mov x12,#16 + cmp x2,#2 + add x7,x3,x5,lsl#4 // pointer to last 5 round keys + sub w5,w5,#2 + ld1 {v20.4s-v21.4s},[x7],#32 + ld1 {v22.4s-v23.4s},[x7],#32 + ld1 {v7.4s},[x7] + add x7,x3,#32 + mov w6,w5 + csel x12,xzr,x12,lo +#ifndef __ARMEB__ + rev w8, w8 +#endif + orr v1.16b,v0.16b,v0.16b + add w10, w8, #1 + orr v18.16b,v0.16b,v0.16b + add w8, w8, #2 + orr v6.16b,v0.16b,v0.16b + rev w10, w10 + mov v1.s[3],w10 + b.ls .Lctr32_tail + rev w12, w8 + sub x2,x2,#3 // bias + mov v18.s[3],w12 + b .Loop3x_ctr32 + +.align 4 +.Loop3x_ctr32: + aese v0.16b,v16.16b + aese v1.16b,v16.16b + aese v18.16b,v16.16b + ld1 {v16.4s},[x7],#16 + aesmc v0.16b,v0.16b + aesmc v1.16b,v1.16b + aesmc v18.16b,v18.16b + subs w6,w6,#2 + aese v0.16b,v17.16b + aese v1.16b,v17.16b + aese v18.16b,v17.16b + ld1 {v17.4s},[x7],#16 + aesmc v0.16b,v0.16b + aesmc v1.16b,v1.16b + aesmc v18.16b,v18.16b + b.gt .Loop3x_ctr32 + + aese v0.16b,v16.16b + aese v1.16b,v16.16b + aese v18.16b,v16.16b + mov x7,x3 + aesmc v4.16b,v0.16b + ld1 {v2.16b},[x0],#16 + aesmc v5.16b,v1.16b + aesmc v18.16b,v18.16b + orr v0.16b,v6.16b,v6.16b + aese v4.16b,v17.16b + ld1 {v3.16b},[x0],#16 + aese v5.16b,v17.16b + aese v18.16b,v17.16b + orr v1.16b,v6.16b,v6.16b + aesmc v4.16b,v4.16b + ld1 {v19.16b},[x0],#16 + aesmc v5.16b,v5.16b + aesmc v17.16b,v18.16b + orr v18.16b,v6.16b,v6.16b + add w9,w8,#1 + aese v4.16b,v20.16b + aese v5.16b,v20.16b + aese v17.16b,v20.16b + eor v2.16b,v2.16b,v7.16b + add w10,w8,#2 + aesmc v4.16b,v4.16b + aesmc v5.16b,v5.16b + aesmc v17.16b,v17.16b + eor v3.16b,v3.16b,v7.16b + add w8,w8,#3 + aese v4.16b,v21.16b + aese v5.16b,v21.16b + aese v17.16b,v21.16b + eor v19.16b,v19.16b,v7.16b + rev w9,w9 + aesmc v4.16b,v4.16b + ld1 {v16.4s},[x7],#16 // re-pre-load rndkey[0] + aesmc v5.16b,v5.16b + aesmc v17.16b,v17.16b + mov v0.s[3], w9 + rev w10,w10 + aese v4.16b,v22.16b + aese v5.16b,v22.16b + aese v17.16b,v22.16b + mov v1.s[3], w10 + rev w12,w8 + aesmc v4.16b,v4.16b + aesmc v5.16b,v5.16b + aesmc v17.16b,v17.16b + mov v18.s[3], w12 + subs x2,x2,#3 + aese v4.16b,v23.16b + aese v5.16b,v23.16b + aese v17.16b,v23.16b + + mov w6,w5 + eor v2.16b,v2.16b,v4.16b + eor v3.16b,v3.16b,v5.16b + eor v19.16b,v19.16b,v17.16b + ld1 {v17.4s},[x7],#16 // re-pre-load rndkey[1] + st1 {v2.16b},[x1],#16 + st1 {v3.16b},[x1],#16 + st1 {v19.16b},[x1],#16 + b.hs .Loop3x_ctr32 + + adds x2,x2,#3 + b.eq .Lctr32_done + cmp x2,#1 + mov x12,#16 + csel x12,xzr,x12,eq + +.Lctr32_tail: + aese v0.16b,v16.16b + aese v1.16b,v16.16b + ld1 {v16.4s},[x7],#16 + aesmc v0.16b,v0.16b + aesmc v1.16b,v1.16b + subs w6,w6,#2 + aese v0.16b,v17.16b + aese v1.16b,v17.16b + ld1 {v17.4s},[x7],#16 + aesmc v0.16b,v0.16b + aesmc v1.16b,v1.16b + b.gt .Lctr32_tail + + aese v0.16b,v16.16b + aese v1.16b,v16.16b + aesmc v0.16b,v0.16b + aesmc v1.16b,v1.16b + aese v0.16b,v17.16b + aese v1.16b,v17.16b + aesmc v0.16b,v0.16b + aesmc v1.16b,v1.16b + ld1 {v2.16b},[x0],x12 + aese v0.16b,v20.16b + aese v1.16b,v20.16b + ld1 {v3.16b},[x0] + aesmc v0.16b,v0.16b + aesmc v1.16b,v1.16b + aese v0.16b,v21.16b + aese v1.16b,v21.16b + aesmc v0.16b,v0.16b + aesmc v1.16b,v1.16b + aese v0.16b,v22.16b + aese v1.16b,v22.16b + eor v2.16b,v2.16b,v7.16b + aesmc v0.16b,v0.16b + aesmc v1.16b,v1.16b + eor v3.16b,v3.16b,v7.16b + aese v0.16b,v23.16b + aese v1.16b,v23.16b + + cmp x2,#1 + eor v2.16b,v2.16b,v0.16b + eor v3.16b,v3.16b,v1.16b + st1 {v2.16b},[x1],#16 + b.eq .Lctr32_done + st1 {v3.16b},[x1] + +.Lctr32_done: + ldr x29,[sp],#16 + ret +.size aes_v8_ctr32_encrypt_blocks,.-aes_v8_ctr32_encrypt_blocks +#endif diff --git a/deps/openssl/asm/arm64-linux64-gas/modes/ghashv8-armx.S b/deps/openssl/asm/arm64-linux64-gas/modes/ghashv8-armx.S new file mode 100644 index 00000000000000..1bfb26340a6e9e --- /dev/null +++ b/deps/openssl/asm/arm64-linux64-gas/modes/ghashv8-armx.S @@ -0,0 +1,115 @@ +#include "arm_arch.h" + +.text +.arch armv8-a+crypto +.global gcm_init_v8 +.type gcm_init_v8,%function +.align 4 +gcm_init_v8: + ld1 {v17.2d},[x1] //load H + movi v16.16b,#0xe1 + ext v3.16b,v17.16b,v17.16b,#8 + shl v16.2d,v16.2d,#57 + ushr v18.2d,v16.2d,#63 + ext v16.16b,v18.16b,v16.16b,#8 //t0=0xc2....01 + dup v17.4s,v17.s[1] + ushr v19.2d,v3.2d,#63 + sshr v17.4s,v17.4s,#31 //broadcast carry bit + and v19.16b,v19.16b,v16.16b + shl v3.2d,v3.2d,#1 + ext v19.16b,v19.16b,v19.16b,#8 + and v16.16b,v16.16b,v17.16b + orr v3.16b,v3.16b,v19.16b //H<<<=1 + eor v3.16b,v3.16b,v16.16b //twisted H + st1 {v3.2d},[x0] + + ret +.size gcm_init_v8,.-gcm_init_v8 + +.global gcm_gmult_v8 +.type gcm_gmult_v8,%function +.align 4 +gcm_gmult_v8: + ld1 {v17.2d},[x0] //load Xi + movi v19.16b,#0xe1 + ld1 {v20.2d},[x1] //load twisted H + shl v19.2d,v19.2d,#57 +#ifndef __ARMEB__ + rev64 v17.16b,v17.16b +#endif + ext v21.16b,v20.16b,v20.16b,#8 + mov x3,#0 + ext v3.16b,v17.16b,v17.16b,#8 + mov x12,#0 + eor v21.16b,v21.16b,v20.16b //Karatsuba pre-processing + mov x2,x0 + b .Lgmult_v8 +.size gcm_gmult_v8,.-gcm_gmult_v8 + +.global gcm_ghash_v8 +.type gcm_ghash_v8,%function +.align 4 +gcm_ghash_v8: + ld1 {v0.2d},[x0] //load [rotated] Xi + subs x3,x3,#16 + movi v19.16b,#0xe1 + mov x12,#16 + ld1 {v20.2d},[x1] //load twisted H + csel x12,xzr,x12,eq + ext v0.16b,v0.16b,v0.16b,#8 + shl v19.2d,v19.2d,#57 + ld1 {v17.2d},[x2],x12 //load [rotated] inp + ext v21.16b,v20.16b,v20.16b,#8 +#ifndef __ARMEB__ + rev64 v0.16b,v0.16b + rev64 v17.16b,v17.16b +#endif + eor v21.16b,v21.16b,v20.16b //Karatsuba pre-processing + ext v3.16b,v17.16b,v17.16b,#8 + b .Loop_v8 + +.align 4 +.Loop_v8: + ext v18.16b,v0.16b,v0.16b,#8 + eor v3.16b,v3.16b,v0.16b //inp^=Xi + eor v17.16b,v17.16b,v18.16b //v17.16b is rotated inp^Xi + +.Lgmult_v8: + pmull v0.1q,v20.1d,v3.1d //H.lo·Xi.lo + eor v17.16b,v17.16b,v3.16b //Karatsuba pre-processing + pmull2 v2.1q,v20.2d,v3.2d //H.hi·Xi.hi + subs x3,x3,#16 + pmull v1.1q,v21.1d,v17.1d //(H.lo+H.hi)·(Xi.lo+Xi.hi) + csel x12,xzr,x12,eq + + ext v17.16b,v0.16b,v2.16b,#8 //Karatsuba post-processing + eor v18.16b,v0.16b,v2.16b + eor v1.16b,v1.16b,v17.16b + ld1 {v17.2d},[x2],x12 //load [rotated] inp + eor v1.16b,v1.16b,v18.16b + pmull v18.1q,v0.1d,v19.1d //1st phase + + ins v2.d[0],v1.d[1] + ins v1.d[1],v0.d[0] +#ifndef __ARMEB__ + rev64 v17.16b,v17.16b +#endif + eor v0.16b,v1.16b,v18.16b + ext v3.16b,v17.16b,v17.16b,#8 + + ext v18.16b,v0.16b,v0.16b,#8 //2nd phase + pmull v0.1q,v0.1d,v19.1d + eor v18.16b,v18.16b,v2.16b + eor v0.16b,v0.16b,v18.16b + b.hs .Loop_v8 + +#ifndef __ARMEB__ + rev64 v0.16b,v0.16b +#endif + ext v0.16b,v0.16b,v0.16b,#8 + st1 {v0.2d},[x0] //write out Xi + + ret +.size gcm_ghash_v8,.-gcm_ghash_v8 +.asciz "GHASH for ARMv8, CRYPTOGAMS by " +.align 2 diff --git a/deps/openssl/asm/arm64-linux64-gas/sha/sha1-armv8.S b/deps/openssl/asm/arm64-linux64-gas/sha/sha1-armv8.S new file mode 100644 index 00000000000000..f9d126252ef2cd --- /dev/null +++ b/deps/openssl/asm/arm64-linux64-gas/sha/sha1-armv8.S @@ -0,0 +1,1211 @@ +#include "arm_arch.h" + +.text + +.globl sha1_block_data_order +.type sha1_block_data_order,%function +.align 6 +sha1_block_data_order: + ldr x16,.LOPENSSL_armcap_P + adr x17,.LOPENSSL_armcap_P + add x16,x16,x17 + ldr w16,[x16] + tst w16,#ARMV8_SHA1 + b.ne .Lv8_entry + + stp x29,x30,[sp,#-96]! + add x29,sp,#0 + stp x19,x20,[sp,#16] + stp x21,x22,[sp,#32] + stp x23,x24,[sp,#48] + stp x25,x26,[sp,#64] + stp x27,x28,[sp,#80] + + ldp w20,w21,[x0] + ldp w22,w23,[x0,#8] + ldr w24,[x0,#16] + +.Loop: + ldr x3,[x1],#64 + movz w28,#0x7999 + sub x2,x2,#1 + movk w28,#0x5a82,lsl#16 +#ifdef __ARMEB__ + ror x3,x3,#32 +#else + rev32 x3,x3 +#endif + add w24,w24,w28 // warm it up + add w24,w24,w3 + lsr x4,x3,#32 + ldr x5,[x1,#-56] + bic w25,w23,w21 + and w26,w22,w21 + ror w27,w20,#27 + add w23,w23,w28 // future e+=K + orr w25,w25,w26 + add w24,w24,w27 // e+=rot(a,5) + ror w21,w21,#2 + add w23,w23,w4 // future e+=X[i] + add w24,w24,w25 // e+=F(b,c,d) +#ifdef __ARMEB__ + ror x5,x5,#32 +#else + rev32 x5,x5 +#endif + bic w25,w22,w20 + and w26,w21,w20 + ror w27,w24,#27 + add w22,w22,w28 // future e+=K + orr w25,w25,w26 + add w23,w23,w27 // e+=rot(a,5) + ror w20,w20,#2 + add w22,w22,w5 // future e+=X[i] + add w23,w23,w25 // e+=F(b,c,d) + lsr x6,x5,#32 + ldr x7,[x1,#-48] + bic w25,w21,w24 + and w26,w20,w24 + ror w27,w23,#27 + add w21,w21,w28 // future e+=K + orr w25,w25,w26 + add w22,w22,w27 // e+=rot(a,5) + ror w24,w24,#2 + add w21,w21,w6 // future e+=X[i] + add w22,w22,w25 // e+=F(b,c,d) +#ifdef __ARMEB__ + ror x7,x7,#32 +#else + rev32 x7,x7 +#endif + bic w25,w20,w23 + and w26,w24,w23 + ror w27,w22,#27 + add w20,w20,w28 // future e+=K + orr w25,w25,w26 + add w21,w21,w27 // e+=rot(a,5) + ror w23,w23,#2 + add w20,w20,w7 // future e+=X[i] + add w21,w21,w25 // e+=F(b,c,d) + lsr x8,x7,#32 + ldr x9,[x1,#-40] + bic w25,w24,w22 + and w26,w23,w22 + ror w27,w21,#27 + add w24,w24,w28 // future e+=K + orr w25,w25,w26 + add w20,w20,w27 // e+=rot(a,5) + ror w22,w22,#2 + add w24,w24,w8 // future e+=X[i] + add w20,w20,w25 // e+=F(b,c,d) +#ifdef __ARMEB__ + ror x9,x9,#32 +#else + rev32 x9,x9 +#endif + bic w25,w23,w21 + and w26,w22,w21 + ror w27,w20,#27 + add w23,w23,w28 // future e+=K + orr w25,w25,w26 + add w24,w24,w27 // e+=rot(a,5) + ror w21,w21,#2 + add w23,w23,w9 // future e+=X[i] + add w24,w24,w25 // e+=F(b,c,d) + lsr x10,x9,#32 + ldr x11,[x1,#-32] + bic w25,w22,w20 + and w26,w21,w20 + ror w27,w24,#27 + add w22,w22,w28 // future e+=K + orr w25,w25,w26 + add w23,w23,w27 // e+=rot(a,5) + ror w20,w20,#2 + add w22,w22,w10 // future e+=X[i] + add w23,w23,w25 // e+=F(b,c,d) +#ifdef __ARMEB__ + ror x11,x11,#32 +#else + rev32 x11,x11 +#endif + bic w25,w21,w24 + and w26,w20,w24 + ror w27,w23,#27 + add w21,w21,w28 // future e+=K + orr w25,w25,w26 + add w22,w22,w27 // e+=rot(a,5) + ror w24,w24,#2 + add w21,w21,w11 // future e+=X[i] + add w22,w22,w25 // e+=F(b,c,d) + lsr x12,x11,#32 + ldr x13,[x1,#-24] + bic w25,w20,w23 + and w26,w24,w23 + ror w27,w22,#27 + add w20,w20,w28 // future e+=K + orr w25,w25,w26 + add w21,w21,w27 // e+=rot(a,5) + ror w23,w23,#2 + add w20,w20,w12 // future e+=X[i] + add w21,w21,w25 // e+=F(b,c,d) +#ifdef __ARMEB__ + ror x13,x13,#32 +#else + rev32 x13,x13 +#endif + bic w25,w24,w22 + and w26,w23,w22 + ror w27,w21,#27 + add w24,w24,w28 // future e+=K + orr w25,w25,w26 + add w20,w20,w27 // e+=rot(a,5) + ror w22,w22,#2 + add w24,w24,w13 // future e+=X[i] + add w20,w20,w25 // e+=F(b,c,d) + lsr x14,x13,#32 + ldr x15,[x1,#-16] + bic w25,w23,w21 + and w26,w22,w21 + ror w27,w20,#27 + add w23,w23,w28 // future e+=K + orr w25,w25,w26 + add w24,w24,w27 // e+=rot(a,5) + ror w21,w21,#2 + add w23,w23,w14 // future e+=X[i] + add w24,w24,w25 // e+=F(b,c,d) +#ifdef __ARMEB__ + ror x15,x15,#32 +#else + rev32 x15,x15 +#endif + bic w25,w22,w20 + and w26,w21,w20 + ror w27,w24,#27 + add w22,w22,w28 // future e+=K + orr w25,w25,w26 + add w23,w23,w27 // e+=rot(a,5) + ror w20,w20,#2 + add w22,w22,w15 // future e+=X[i] + add w23,w23,w25 // e+=F(b,c,d) + lsr x16,x15,#32 + ldr x17,[x1,#-8] + bic w25,w21,w24 + and w26,w20,w24 + ror w27,w23,#27 + add w21,w21,w28 // future e+=K + orr w25,w25,w26 + add w22,w22,w27 // e+=rot(a,5) + ror w24,w24,#2 + add w21,w21,w16 // future e+=X[i] + add w22,w22,w25 // e+=F(b,c,d) +#ifdef __ARMEB__ + ror x17,x17,#32 +#else + rev32 x17,x17 +#endif + bic w25,w20,w23 + and w26,w24,w23 + ror w27,w22,#27 + add w20,w20,w28 // future e+=K + orr w25,w25,w26 + add w21,w21,w27 // e+=rot(a,5) + ror w23,w23,#2 + add w20,w20,w17 // future e+=X[i] + add w21,w21,w25 // e+=F(b,c,d) + lsr x19,x17,#32 + eor w3,w3,w5 + bic w25,w24,w22 + and w26,w23,w22 + ror w27,w21,#27 + eor w3,w3,w11 + add w24,w24,w28 // future e+=K + orr w25,w25,w26 + add w20,w20,w27 // e+=rot(a,5) + eor w3,w3,w16 + ror w22,w22,#2 + add w24,w24,w19 // future e+=X[i] + add w20,w20,w25 // e+=F(b,c,d) + ror w3,w3,#31 + eor w4,w4,w6 + bic w25,w23,w21 + and w26,w22,w21 + ror w27,w20,#27 + eor w4,w4,w12 + add w23,w23,w28 // future e+=K + orr w25,w25,w26 + add w24,w24,w27 // e+=rot(a,5) + eor w4,w4,w17 + ror w21,w21,#2 + add w23,w23,w3 // future e+=X[i] + add w24,w24,w25 // e+=F(b,c,d) + ror w4,w4,#31 + eor w5,w5,w7 + bic w25,w22,w20 + and w26,w21,w20 + ror w27,w24,#27 + eor w5,w5,w13 + add w22,w22,w28 // future e+=K + orr w25,w25,w26 + add w23,w23,w27 // e+=rot(a,5) + eor w5,w5,w19 + ror w20,w20,#2 + add w22,w22,w4 // future e+=X[i] + add w23,w23,w25 // e+=F(b,c,d) + ror w5,w5,#31 + eor w6,w6,w8 + bic w25,w21,w24 + and w26,w20,w24 + ror w27,w23,#27 + eor w6,w6,w14 + add w21,w21,w28 // future e+=K + orr w25,w25,w26 + add w22,w22,w27 // e+=rot(a,5) + eor w6,w6,w3 + ror w24,w24,#2 + add w21,w21,w5 // future e+=X[i] + add w22,w22,w25 // e+=F(b,c,d) + ror w6,w6,#31 + eor w7,w7,w9 + bic w25,w20,w23 + and w26,w24,w23 + ror w27,w22,#27 + eor w7,w7,w15 + add w20,w20,w28 // future e+=K + orr w25,w25,w26 + add w21,w21,w27 // e+=rot(a,5) + eor w7,w7,w4 + ror w23,w23,#2 + add w20,w20,w6 // future e+=X[i] + add w21,w21,w25 // e+=F(b,c,d) + ror w7,w7,#31 + movz w28,#0xeba1 + movk w28,#0x6ed9,lsl#16 + eor w8,w8,w10 + bic w25,w24,w22 + and w26,w23,w22 + ror w27,w21,#27 + eor w8,w8,w16 + add w24,w24,w28 // future e+=K + orr w25,w25,w26 + add w20,w20,w27 // e+=rot(a,5) + eor w8,w8,w5 + ror w22,w22,#2 + add w24,w24,w7 // future e+=X[i] + add w20,w20,w25 // e+=F(b,c,d) + ror w8,w8,#31 + eor w9,w9,w11 + eor w25,w23,w21 + ror w27,w20,#27 + add w23,w23,w28 // future e+=K + eor w9,w9,w17 + eor w25,w25,w22 + add w24,w24,w27 // e+=rot(a,5) + ror w21,w21,#2 + eor w9,w9,w6 + add w23,w23,w8 // future e+=X[i] + add w24,w24,w25 // e+=F(b,c,d) + ror w9,w9,#31 + eor w10,w10,w12 + eor w25,w22,w20 + ror w27,w24,#27 + add w22,w22,w28 // future e+=K + eor w10,w10,w19 + eor w25,w25,w21 + add w23,w23,w27 // e+=rot(a,5) + ror w20,w20,#2 + eor w10,w10,w7 + add w22,w22,w9 // future e+=X[i] + add w23,w23,w25 // e+=F(b,c,d) + ror w10,w10,#31 + eor w11,w11,w13 + eor w25,w21,w24 + ror w27,w23,#27 + add w21,w21,w28 // future e+=K + eor w11,w11,w3 + eor w25,w25,w20 + add w22,w22,w27 // e+=rot(a,5) + ror w24,w24,#2 + eor w11,w11,w8 + add w21,w21,w10 // future e+=X[i] + add w22,w22,w25 // e+=F(b,c,d) + ror w11,w11,#31 + eor w12,w12,w14 + eor w25,w20,w23 + ror w27,w22,#27 + add w20,w20,w28 // future e+=K + eor w12,w12,w4 + eor w25,w25,w24 + add w21,w21,w27 // e+=rot(a,5) + ror w23,w23,#2 + eor w12,w12,w9 + add w20,w20,w11 // future e+=X[i] + add w21,w21,w25 // e+=F(b,c,d) + ror w12,w12,#31 + eor w13,w13,w15 + eor w25,w24,w22 + ror w27,w21,#27 + add w24,w24,w28 // future e+=K + eor w13,w13,w5 + eor w25,w25,w23 + add w20,w20,w27 // e+=rot(a,5) + ror w22,w22,#2 + eor w13,w13,w10 + add w24,w24,w12 // future e+=X[i] + add w20,w20,w25 // e+=F(b,c,d) + ror w13,w13,#31 + eor w14,w14,w16 + eor w25,w23,w21 + ror w27,w20,#27 + add w23,w23,w28 // future e+=K + eor w14,w14,w6 + eor w25,w25,w22 + add w24,w24,w27 // e+=rot(a,5) + ror w21,w21,#2 + eor w14,w14,w11 + add w23,w23,w13 // future e+=X[i] + add w24,w24,w25 // e+=F(b,c,d) + ror w14,w14,#31 + eor w15,w15,w17 + eor w25,w22,w20 + ror w27,w24,#27 + add w22,w22,w28 // future e+=K + eor w15,w15,w7 + eor w25,w25,w21 + add w23,w23,w27 // e+=rot(a,5) + ror w20,w20,#2 + eor w15,w15,w12 + add w22,w22,w14 // future e+=X[i] + add w23,w23,w25 // e+=F(b,c,d) + ror w15,w15,#31 + eor w16,w16,w19 + eor w25,w21,w24 + ror w27,w23,#27 + add w21,w21,w28 // future e+=K + eor w16,w16,w8 + eor w25,w25,w20 + add w22,w22,w27 // e+=rot(a,5) + ror w24,w24,#2 + eor w16,w16,w13 + add w21,w21,w15 // future e+=X[i] + add w22,w22,w25 // e+=F(b,c,d) + ror w16,w16,#31 + eor w17,w17,w3 + eor w25,w20,w23 + ror w27,w22,#27 + add w20,w20,w28 // future e+=K + eor w17,w17,w9 + eor w25,w25,w24 + add w21,w21,w27 // e+=rot(a,5) + ror w23,w23,#2 + eor w17,w17,w14 + add w20,w20,w16 // future e+=X[i] + add w21,w21,w25 // e+=F(b,c,d) + ror w17,w17,#31 + eor w19,w19,w4 + eor w25,w24,w22 + ror w27,w21,#27 + add w24,w24,w28 // future e+=K + eor w19,w19,w10 + eor w25,w25,w23 + add w20,w20,w27 // e+=rot(a,5) + ror w22,w22,#2 + eor w19,w19,w15 + add w24,w24,w17 // future e+=X[i] + add w20,w20,w25 // e+=F(b,c,d) + ror w19,w19,#31 + eor w3,w3,w5 + eor w25,w23,w21 + ror w27,w20,#27 + add w23,w23,w28 // future e+=K + eor w3,w3,w11 + eor w25,w25,w22 + add w24,w24,w27 // e+=rot(a,5) + ror w21,w21,#2 + eor w3,w3,w16 + add w23,w23,w19 // future e+=X[i] + add w24,w24,w25 // e+=F(b,c,d) + ror w3,w3,#31 + eor w4,w4,w6 + eor w25,w22,w20 + ror w27,w24,#27 + add w22,w22,w28 // future e+=K + eor w4,w4,w12 + eor w25,w25,w21 + add w23,w23,w27 // e+=rot(a,5) + ror w20,w20,#2 + eor w4,w4,w17 + add w22,w22,w3 // future e+=X[i] + add w23,w23,w25 // e+=F(b,c,d) + ror w4,w4,#31 + eor w5,w5,w7 + eor w25,w21,w24 + ror w27,w23,#27 + add w21,w21,w28 // future e+=K + eor w5,w5,w13 + eor w25,w25,w20 + add w22,w22,w27 // e+=rot(a,5) + ror w24,w24,#2 + eor w5,w5,w19 + add w21,w21,w4 // future e+=X[i] + add w22,w22,w25 // e+=F(b,c,d) + ror w5,w5,#31 + eor w6,w6,w8 + eor w25,w20,w23 + ror w27,w22,#27 + add w20,w20,w28 // future e+=K + eor w6,w6,w14 + eor w25,w25,w24 + add w21,w21,w27 // e+=rot(a,5) + ror w23,w23,#2 + eor w6,w6,w3 + add w20,w20,w5 // future e+=X[i] + add w21,w21,w25 // e+=F(b,c,d) + ror w6,w6,#31 + eor w7,w7,w9 + eor w25,w24,w22 + ror w27,w21,#27 + add w24,w24,w28 // future e+=K + eor w7,w7,w15 + eor w25,w25,w23 + add w20,w20,w27 // e+=rot(a,5) + ror w22,w22,#2 + eor w7,w7,w4 + add w24,w24,w6 // future e+=X[i] + add w20,w20,w25 // e+=F(b,c,d) + ror w7,w7,#31 + eor w8,w8,w10 + eor w25,w23,w21 + ror w27,w20,#27 + add w23,w23,w28 // future e+=K + eor w8,w8,w16 + eor w25,w25,w22 + add w24,w24,w27 // e+=rot(a,5) + ror w21,w21,#2 + eor w8,w8,w5 + add w23,w23,w7 // future e+=X[i] + add w24,w24,w25 // e+=F(b,c,d) + ror w8,w8,#31 + eor w9,w9,w11 + eor w25,w22,w20 + ror w27,w24,#27 + add w22,w22,w28 // future e+=K + eor w9,w9,w17 + eor w25,w25,w21 + add w23,w23,w27 // e+=rot(a,5) + ror w20,w20,#2 + eor w9,w9,w6 + add w22,w22,w8 // future e+=X[i] + add w23,w23,w25 // e+=F(b,c,d) + ror w9,w9,#31 + eor w10,w10,w12 + eor w25,w21,w24 + ror w27,w23,#27 + add w21,w21,w28 // future e+=K + eor w10,w10,w19 + eor w25,w25,w20 + add w22,w22,w27 // e+=rot(a,5) + ror w24,w24,#2 + eor w10,w10,w7 + add w21,w21,w9 // future e+=X[i] + add w22,w22,w25 // e+=F(b,c,d) + ror w10,w10,#31 + eor w11,w11,w13 + eor w25,w20,w23 + ror w27,w22,#27 + add w20,w20,w28 // future e+=K + eor w11,w11,w3 + eor w25,w25,w24 + add w21,w21,w27 // e+=rot(a,5) + ror w23,w23,#2 + eor w11,w11,w8 + add w20,w20,w10 // future e+=X[i] + add w21,w21,w25 // e+=F(b,c,d) + ror w11,w11,#31 + movz w28,#0xbcdc + movk w28,#0x8f1b,lsl#16 + eor w12,w12,w14 + eor w25,w24,w22 + ror w27,w21,#27 + add w24,w24,w28 // future e+=K + eor w12,w12,w4 + eor w25,w25,w23 + add w20,w20,w27 // e+=rot(a,5) + ror w22,w22,#2 + eor w12,w12,w9 + add w24,w24,w11 // future e+=X[i] + add w20,w20,w25 // e+=F(b,c,d) + ror w12,w12,#31 + orr w25,w21,w22 + and w26,w21,w22 + eor w13,w13,w15 + ror w27,w20,#27 + and w25,w25,w23 + add w23,w23,w28 // future e+=K + eor w13,w13,w5 + add w24,w24,w27 // e+=rot(a,5) + orr w25,w25,w26 + ror w21,w21,#2 + eor w13,w13,w10 + add w23,w23,w12 // future e+=X[i] + add w24,w24,w25 // e+=F(b,c,d) + ror w13,w13,#31 + orr w25,w20,w21 + and w26,w20,w21 + eor w14,w14,w16 + ror w27,w24,#27 + and w25,w25,w22 + add w22,w22,w28 // future e+=K + eor w14,w14,w6 + add w23,w23,w27 // e+=rot(a,5) + orr w25,w25,w26 + ror w20,w20,#2 + eor w14,w14,w11 + add w22,w22,w13 // future e+=X[i] + add w23,w23,w25 // e+=F(b,c,d) + ror w14,w14,#31 + orr w25,w24,w20 + and w26,w24,w20 + eor w15,w15,w17 + ror w27,w23,#27 + and w25,w25,w21 + add w21,w21,w28 // future e+=K + eor w15,w15,w7 + add w22,w22,w27 // e+=rot(a,5) + orr w25,w25,w26 + ror w24,w24,#2 + eor w15,w15,w12 + add w21,w21,w14 // future e+=X[i] + add w22,w22,w25 // e+=F(b,c,d) + ror w15,w15,#31 + orr w25,w23,w24 + and w26,w23,w24 + eor w16,w16,w19 + ror w27,w22,#27 + and w25,w25,w20 + add w20,w20,w28 // future e+=K + eor w16,w16,w8 + add w21,w21,w27 // e+=rot(a,5) + orr w25,w25,w26 + ror w23,w23,#2 + eor w16,w16,w13 + add w20,w20,w15 // future e+=X[i] + add w21,w21,w25 // e+=F(b,c,d) + ror w16,w16,#31 + orr w25,w22,w23 + and w26,w22,w23 + eor w17,w17,w3 + ror w27,w21,#27 + and w25,w25,w24 + add w24,w24,w28 // future e+=K + eor w17,w17,w9 + add w20,w20,w27 // e+=rot(a,5) + orr w25,w25,w26 + ror w22,w22,#2 + eor w17,w17,w14 + add w24,w24,w16 // future e+=X[i] + add w20,w20,w25 // e+=F(b,c,d) + ror w17,w17,#31 + orr w25,w21,w22 + and w26,w21,w22 + eor w19,w19,w4 + ror w27,w20,#27 + and w25,w25,w23 + add w23,w23,w28 // future e+=K + eor w19,w19,w10 + add w24,w24,w27 // e+=rot(a,5) + orr w25,w25,w26 + ror w21,w21,#2 + eor w19,w19,w15 + add w23,w23,w17 // future e+=X[i] + add w24,w24,w25 // e+=F(b,c,d) + ror w19,w19,#31 + orr w25,w20,w21 + and w26,w20,w21 + eor w3,w3,w5 + ror w27,w24,#27 + and w25,w25,w22 + add w22,w22,w28 // future e+=K + eor w3,w3,w11 + add w23,w23,w27 // e+=rot(a,5) + orr w25,w25,w26 + ror w20,w20,#2 + eor w3,w3,w16 + add w22,w22,w19 // future e+=X[i] + add w23,w23,w25 // e+=F(b,c,d) + ror w3,w3,#31 + orr w25,w24,w20 + and w26,w24,w20 + eor w4,w4,w6 + ror w27,w23,#27 + and w25,w25,w21 + add w21,w21,w28 // future e+=K + eor w4,w4,w12 + add w22,w22,w27 // e+=rot(a,5) + orr w25,w25,w26 + ror w24,w24,#2 + eor w4,w4,w17 + add w21,w21,w3 // future e+=X[i] + add w22,w22,w25 // e+=F(b,c,d) + ror w4,w4,#31 + orr w25,w23,w24 + and w26,w23,w24 + eor w5,w5,w7 + ror w27,w22,#27 + and w25,w25,w20 + add w20,w20,w28 // future e+=K + eor w5,w5,w13 + add w21,w21,w27 // e+=rot(a,5) + orr w25,w25,w26 + ror w23,w23,#2 + eor w5,w5,w19 + add w20,w20,w4 // future e+=X[i] + add w21,w21,w25 // e+=F(b,c,d) + ror w5,w5,#31 + orr w25,w22,w23 + and w26,w22,w23 + eor w6,w6,w8 + ror w27,w21,#27 + and w25,w25,w24 + add w24,w24,w28 // future e+=K + eor w6,w6,w14 + add w20,w20,w27 // e+=rot(a,5) + orr w25,w25,w26 + ror w22,w22,#2 + eor w6,w6,w3 + add w24,w24,w5 // future e+=X[i] + add w20,w20,w25 // e+=F(b,c,d) + ror w6,w6,#31 + orr w25,w21,w22 + and w26,w21,w22 + eor w7,w7,w9 + ror w27,w20,#27 + and w25,w25,w23 + add w23,w23,w28 // future e+=K + eor w7,w7,w15 + add w24,w24,w27 // e+=rot(a,5) + orr w25,w25,w26 + ror w21,w21,#2 + eor w7,w7,w4 + add w23,w23,w6 // future e+=X[i] + add w24,w24,w25 // e+=F(b,c,d) + ror w7,w7,#31 + orr w25,w20,w21 + and w26,w20,w21 + eor w8,w8,w10 + ror w27,w24,#27 + and w25,w25,w22 + add w22,w22,w28 // future e+=K + eor w8,w8,w16 + add w23,w23,w27 // e+=rot(a,5) + orr w25,w25,w26 + ror w20,w20,#2 + eor w8,w8,w5 + add w22,w22,w7 // future e+=X[i] + add w23,w23,w25 // e+=F(b,c,d) + ror w8,w8,#31 + orr w25,w24,w20 + and w26,w24,w20 + eor w9,w9,w11 + ror w27,w23,#27 + and w25,w25,w21 + add w21,w21,w28 // future e+=K + eor w9,w9,w17 + add w22,w22,w27 // e+=rot(a,5) + orr w25,w25,w26 + ror w24,w24,#2 + eor w9,w9,w6 + add w21,w21,w8 // future e+=X[i] + add w22,w22,w25 // e+=F(b,c,d) + ror w9,w9,#31 + orr w25,w23,w24 + and w26,w23,w24 + eor w10,w10,w12 + ror w27,w22,#27 + and w25,w25,w20 + add w20,w20,w28 // future e+=K + eor w10,w10,w19 + add w21,w21,w27 // e+=rot(a,5) + orr w25,w25,w26 + ror w23,w23,#2 + eor w10,w10,w7 + add w20,w20,w9 // future e+=X[i] + add w21,w21,w25 // e+=F(b,c,d) + ror w10,w10,#31 + orr w25,w22,w23 + and w26,w22,w23 + eor w11,w11,w13 + ror w27,w21,#27 + and w25,w25,w24 + add w24,w24,w28 // future e+=K + eor w11,w11,w3 + add w20,w20,w27 // e+=rot(a,5) + orr w25,w25,w26 + ror w22,w22,#2 + eor w11,w11,w8 + add w24,w24,w10 // future e+=X[i] + add w20,w20,w25 // e+=F(b,c,d) + ror w11,w11,#31 + orr w25,w21,w22 + and w26,w21,w22 + eor w12,w12,w14 + ror w27,w20,#27 + and w25,w25,w23 + add w23,w23,w28 // future e+=K + eor w12,w12,w4 + add w24,w24,w27 // e+=rot(a,5) + orr w25,w25,w26 + ror w21,w21,#2 + eor w12,w12,w9 + add w23,w23,w11 // future e+=X[i] + add w24,w24,w25 // e+=F(b,c,d) + ror w12,w12,#31 + orr w25,w20,w21 + and w26,w20,w21 + eor w13,w13,w15 + ror w27,w24,#27 + and w25,w25,w22 + add w22,w22,w28 // future e+=K + eor w13,w13,w5 + add w23,w23,w27 // e+=rot(a,5) + orr w25,w25,w26 + ror w20,w20,#2 + eor w13,w13,w10 + add w22,w22,w12 // future e+=X[i] + add w23,w23,w25 // e+=F(b,c,d) + ror w13,w13,#31 + orr w25,w24,w20 + and w26,w24,w20 + eor w14,w14,w16 + ror w27,w23,#27 + and w25,w25,w21 + add w21,w21,w28 // future e+=K + eor w14,w14,w6 + add w22,w22,w27 // e+=rot(a,5) + orr w25,w25,w26 + ror w24,w24,#2 + eor w14,w14,w11 + add w21,w21,w13 // future e+=X[i] + add w22,w22,w25 // e+=F(b,c,d) + ror w14,w14,#31 + orr w25,w23,w24 + and w26,w23,w24 + eor w15,w15,w17 + ror w27,w22,#27 + and w25,w25,w20 + add w20,w20,w28 // future e+=K + eor w15,w15,w7 + add w21,w21,w27 // e+=rot(a,5) + orr w25,w25,w26 + ror w23,w23,#2 + eor w15,w15,w12 + add w20,w20,w14 // future e+=X[i] + add w21,w21,w25 // e+=F(b,c,d) + ror w15,w15,#31 + movz w28,#0xc1d6 + movk w28,#0xca62,lsl#16 + orr w25,w22,w23 + and w26,w22,w23 + eor w16,w16,w19 + ror w27,w21,#27 + and w25,w25,w24 + add w24,w24,w28 // future e+=K + eor w16,w16,w8 + add w20,w20,w27 // e+=rot(a,5) + orr w25,w25,w26 + ror w22,w22,#2 + eor w16,w16,w13 + add w24,w24,w15 // future e+=X[i] + add w20,w20,w25 // e+=F(b,c,d) + ror w16,w16,#31 + eor w17,w17,w3 + eor w25,w23,w21 + ror w27,w20,#27 + add w23,w23,w28 // future e+=K + eor w17,w17,w9 + eor w25,w25,w22 + add w24,w24,w27 // e+=rot(a,5) + ror w21,w21,#2 + eor w17,w17,w14 + add w23,w23,w16 // future e+=X[i] + add w24,w24,w25 // e+=F(b,c,d) + ror w17,w17,#31 + eor w19,w19,w4 + eor w25,w22,w20 + ror w27,w24,#27 + add w22,w22,w28 // future e+=K + eor w19,w19,w10 + eor w25,w25,w21 + add w23,w23,w27 // e+=rot(a,5) + ror w20,w20,#2 + eor w19,w19,w15 + add w22,w22,w17 // future e+=X[i] + add w23,w23,w25 // e+=F(b,c,d) + ror w19,w19,#31 + eor w3,w3,w5 + eor w25,w21,w24 + ror w27,w23,#27 + add w21,w21,w28 // future e+=K + eor w3,w3,w11 + eor w25,w25,w20 + add w22,w22,w27 // e+=rot(a,5) + ror w24,w24,#2 + eor w3,w3,w16 + add w21,w21,w19 // future e+=X[i] + add w22,w22,w25 // e+=F(b,c,d) + ror w3,w3,#31 + eor w4,w4,w6 + eor w25,w20,w23 + ror w27,w22,#27 + add w20,w20,w28 // future e+=K + eor w4,w4,w12 + eor w25,w25,w24 + add w21,w21,w27 // e+=rot(a,5) + ror w23,w23,#2 + eor w4,w4,w17 + add w20,w20,w3 // future e+=X[i] + add w21,w21,w25 // e+=F(b,c,d) + ror w4,w4,#31 + eor w5,w5,w7 + eor w25,w24,w22 + ror w27,w21,#27 + add w24,w24,w28 // future e+=K + eor w5,w5,w13 + eor w25,w25,w23 + add w20,w20,w27 // e+=rot(a,5) + ror w22,w22,#2 + eor w5,w5,w19 + add w24,w24,w4 // future e+=X[i] + add w20,w20,w25 // e+=F(b,c,d) + ror w5,w5,#31 + eor w6,w6,w8 + eor w25,w23,w21 + ror w27,w20,#27 + add w23,w23,w28 // future e+=K + eor w6,w6,w14 + eor w25,w25,w22 + add w24,w24,w27 // e+=rot(a,5) + ror w21,w21,#2 + eor w6,w6,w3 + add w23,w23,w5 // future e+=X[i] + add w24,w24,w25 // e+=F(b,c,d) + ror w6,w6,#31 + eor w7,w7,w9 + eor w25,w22,w20 + ror w27,w24,#27 + add w22,w22,w28 // future e+=K + eor w7,w7,w15 + eor w25,w25,w21 + add w23,w23,w27 // e+=rot(a,5) + ror w20,w20,#2 + eor w7,w7,w4 + add w22,w22,w6 // future e+=X[i] + add w23,w23,w25 // e+=F(b,c,d) + ror w7,w7,#31 + eor w8,w8,w10 + eor w25,w21,w24 + ror w27,w23,#27 + add w21,w21,w28 // future e+=K + eor w8,w8,w16 + eor w25,w25,w20 + add w22,w22,w27 // e+=rot(a,5) + ror w24,w24,#2 + eor w8,w8,w5 + add w21,w21,w7 // future e+=X[i] + add w22,w22,w25 // e+=F(b,c,d) + ror w8,w8,#31 + eor w9,w9,w11 + eor w25,w20,w23 + ror w27,w22,#27 + add w20,w20,w28 // future e+=K + eor w9,w9,w17 + eor w25,w25,w24 + add w21,w21,w27 // e+=rot(a,5) + ror w23,w23,#2 + eor w9,w9,w6 + add w20,w20,w8 // future e+=X[i] + add w21,w21,w25 // e+=F(b,c,d) + ror w9,w9,#31 + eor w10,w10,w12 + eor w25,w24,w22 + ror w27,w21,#27 + add w24,w24,w28 // future e+=K + eor w10,w10,w19 + eor w25,w25,w23 + add w20,w20,w27 // e+=rot(a,5) + ror w22,w22,#2 + eor w10,w10,w7 + add w24,w24,w9 // future e+=X[i] + add w20,w20,w25 // e+=F(b,c,d) + ror w10,w10,#31 + eor w11,w11,w13 + eor w25,w23,w21 + ror w27,w20,#27 + add w23,w23,w28 // future e+=K + eor w11,w11,w3 + eor w25,w25,w22 + add w24,w24,w27 // e+=rot(a,5) + ror w21,w21,#2 + eor w11,w11,w8 + add w23,w23,w10 // future e+=X[i] + add w24,w24,w25 // e+=F(b,c,d) + ror w11,w11,#31 + eor w12,w12,w14 + eor w25,w22,w20 + ror w27,w24,#27 + add w22,w22,w28 // future e+=K + eor w12,w12,w4 + eor w25,w25,w21 + add w23,w23,w27 // e+=rot(a,5) + ror w20,w20,#2 + eor w12,w12,w9 + add w22,w22,w11 // future e+=X[i] + add w23,w23,w25 // e+=F(b,c,d) + ror w12,w12,#31 + eor w13,w13,w15 + eor w25,w21,w24 + ror w27,w23,#27 + add w21,w21,w28 // future e+=K + eor w13,w13,w5 + eor w25,w25,w20 + add w22,w22,w27 // e+=rot(a,5) + ror w24,w24,#2 + eor w13,w13,w10 + add w21,w21,w12 // future e+=X[i] + add w22,w22,w25 // e+=F(b,c,d) + ror w13,w13,#31 + eor w14,w14,w16 + eor w25,w20,w23 + ror w27,w22,#27 + add w20,w20,w28 // future e+=K + eor w14,w14,w6 + eor w25,w25,w24 + add w21,w21,w27 // e+=rot(a,5) + ror w23,w23,#2 + eor w14,w14,w11 + add w20,w20,w13 // future e+=X[i] + add w21,w21,w25 // e+=F(b,c,d) + ror w14,w14,#31 + eor w15,w15,w17 + eor w25,w24,w22 + ror w27,w21,#27 + add w24,w24,w28 // future e+=K + eor w15,w15,w7 + eor w25,w25,w23 + add w20,w20,w27 // e+=rot(a,5) + ror w22,w22,#2 + eor w15,w15,w12 + add w24,w24,w14 // future e+=X[i] + add w20,w20,w25 // e+=F(b,c,d) + ror w15,w15,#31 + eor w16,w16,w19 + eor w25,w23,w21 + ror w27,w20,#27 + add w23,w23,w28 // future e+=K + eor w16,w16,w8 + eor w25,w25,w22 + add w24,w24,w27 // e+=rot(a,5) + ror w21,w21,#2 + eor w16,w16,w13 + add w23,w23,w15 // future e+=X[i] + add w24,w24,w25 // e+=F(b,c,d) + ror w16,w16,#31 + eor w17,w17,w3 + eor w25,w22,w20 + ror w27,w24,#27 + add w22,w22,w28 // future e+=K + eor w17,w17,w9 + eor w25,w25,w21 + add w23,w23,w27 // e+=rot(a,5) + ror w20,w20,#2 + eor w17,w17,w14 + add w22,w22,w16 // future e+=X[i] + add w23,w23,w25 // e+=F(b,c,d) + ror w17,w17,#31 + eor w19,w19,w4 + eor w25,w21,w24 + ror w27,w23,#27 + add w21,w21,w28 // future e+=K + eor w19,w19,w10 + eor w25,w25,w20 + add w22,w22,w27 // e+=rot(a,5) + ror w24,w24,#2 + eor w19,w19,w15 + add w21,w21,w17 // future e+=X[i] + add w22,w22,w25 // e+=F(b,c,d) + ror w19,w19,#31 + ldp w4,w5,[x0] + eor w25,w20,w23 + ror w27,w22,#27 + add w20,w20,w28 // future e+=K + eor w25,w25,w24 + add w21,w21,w27 // e+=rot(a,5) + ror w23,w23,#2 + add w20,w20,w19 // future e+=X[i] + add w21,w21,w25 // e+=F(b,c,d) + ldp w6,w7,[x0,#8] + eor w25,w24,w22 + ror w27,w21,#27 + eor w25,w25,w23 + add w20,w20,w27 // e+=rot(a,5) + ror w22,w22,#2 + ldr w8,[x0,#16] + add w20,w20,w25 // e+=F(b,c,d) + add w21,w21,w5 + add w22,w22,w6 + add w20,w20,w4 + add w23,w23,w7 + add w24,w24,w8 + stp w20,w21,[x0] + stp w22,w23,[x0,#8] + str w24,[x0,#16] + cbnz x2,.Loop + + ldp x19,x20,[sp,#16] + ldp x21,x22,[sp,#32] + ldp x23,x24,[sp,#48] + ldp x25,x26,[sp,#64] + ldp x27,x28,[sp,#80] + ldr x29,[sp],#96 + ret +.size sha1_block_data_order,.-sha1_block_data_order +.type sha1_block_armv8,%function +.align 6 +sha1_block_armv8: +.Lv8_entry: + stp x29,x30,[sp,#-16]! + add x29,sp,#0 + + adr x4,.Lconst + eor v1.16b,v1.16b,v1.16b + ld1 {v0.4s},[x0],#16 + ld1 {v1.s}[0],[x0] + sub x0,x0,#16 + ld1 {v16.4s-v19.4s},[x4] + +.Loop_hw: + ld1 {v4.16b-v7.16b},[x1],#64 + sub x2,x2,#1 + rev32 v4.16b,v4.16b + rev32 v5.16b,v5.16b + + add v20.4s,v16.4s,v4.4s + rev32 v6.16b,v6.16b + orr v22.16b,v0.16b,v0.16b // offload + + add v21.4s,v16.4s,v5.4s + rev32 v7.16b,v7.16b + .inst 0x5e280803 //sha1h v3.16b,v0.16b + .inst 0x5e140020 //sha1c v0.16b,v1.16b,v20.4s // 0 + add v20.4s,v16.4s,v6.4s + .inst 0x5e0630a4 //sha1su0 v4.16b,v5.16b,v6.16b + .inst 0x5e280802 //sha1h v2.16b,v0.16b // 1 + .inst 0x5e150060 //sha1c v0.16b,v3.16b,v21.4s + add v21.4s,v16.4s,v7.4s + .inst 0x5e2818e4 //sha1su1 v4.16b,v7.16b + .inst 0x5e0730c5 //sha1su0 v5.16b,v6.16b,v7.16b + .inst 0x5e280803 //sha1h v3.16b,v0.16b // 2 + .inst 0x5e140040 //sha1c v0.16b,v2.16b,v20.4s + add v20.4s,v16.4s,v4.4s + .inst 0x5e281885 //sha1su1 v5.16b,v4.16b + .inst 0x5e0430e6 //sha1su0 v6.16b,v7.16b,v4.16b + .inst 0x5e280802 //sha1h v2.16b,v0.16b // 3 + .inst 0x5e150060 //sha1c v0.16b,v3.16b,v21.4s + add v21.4s,v17.4s,v5.4s + .inst 0x5e2818a6 //sha1su1 v6.16b,v5.16b + .inst 0x5e053087 //sha1su0 v7.16b,v4.16b,v5.16b + .inst 0x5e280803 //sha1h v3.16b,v0.16b // 4 + .inst 0x5e140040 //sha1c v0.16b,v2.16b,v20.4s + add v20.4s,v17.4s,v6.4s + .inst 0x5e2818c7 //sha1su1 v7.16b,v6.16b + .inst 0x5e0630a4 //sha1su0 v4.16b,v5.16b,v6.16b + .inst 0x5e280802 //sha1h v2.16b,v0.16b // 5 + .inst 0x5e151060 //sha1p v0.16b,v3.16b,v21.4s + add v21.4s,v17.4s,v7.4s + .inst 0x5e2818e4 //sha1su1 v4.16b,v7.16b + .inst 0x5e0730c5 //sha1su0 v5.16b,v6.16b,v7.16b + .inst 0x5e280803 //sha1h v3.16b,v0.16b // 6 + .inst 0x5e141040 //sha1p v0.16b,v2.16b,v20.4s + add v20.4s,v17.4s,v4.4s + .inst 0x5e281885 //sha1su1 v5.16b,v4.16b + .inst 0x5e0430e6 //sha1su0 v6.16b,v7.16b,v4.16b + .inst 0x5e280802 //sha1h v2.16b,v0.16b // 7 + .inst 0x5e151060 //sha1p v0.16b,v3.16b,v21.4s + add v21.4s,v17.4s,v5.4s + .inst 0x5e2818a6 //sha1su1 v6.16b,v5.16b + .inst 0x5e053087 //sha1su0 v7.16b,v4.16b,v5.16b + .inst 0x5e280803 //sha1h v3.16b,v0.16b // 8 + .inst 0x5e141040 //sha1p v0.16b,v2.16b,v20.4s + add v20.4s,v18.4s,v6.4s + .inst 0x5e2818c7 //sha1su1 v7.16b,v6.16b + .inst 0x5e0630a4 //sha1su0 v4.16b,v5.16b,v6.16b + .inst 0x5e280802 //sha1h v2.16b,v0.16b // 9 + .inst 0x5e151060 //sha1p v0.16b,v3.16b,v21.4s + add v21.4s,v18.4s,v7.4s + .inst 0x5e2818e4 //sha1su1 v4.16b,v7.16b + .inst 0x5e0730c5 //sha1su0 v5.16b,v6.16b,v7.16b + .inst 0x5e280803 //sha1h v3.16b,v0.16b // 10 + .inst 0x5e142040 //sha1m v0.16b,v2.16b,v20.4s + add v20.4s,v18.4s,v4.4s + .inst 0x5e281885 //sha1su1 v5.16b,v4.16b + .inst 0x5e0430e6 //sha1su0 v6.16b,v7.16b,v4.16b + .inst 0x5e280802 //sha1h v2.16b,v0.16b // 11 + .inst 0x5e152060 //sha1m v0.16b,v3.16b,v21.4s + add v21.4s,v18.4s,v5.4s + .inst 0x5e2818a6 //sha1su1 v6.16b,v5.16b + .inst 0x5e053087 //sha1su0 v7.16b,v4.16b,v5.16b + .inst 0x5e280803 //sha1h v3.16b,v0.16b // 12 + .inst 0x5e142040 //sha1m v0.16b,v2.16b,v20.4s + add v20.4s,v18.4s,v6.4s + .inst 0x5e2818c7 //sha1su1 v7.16b,v6.16b + .inst 0x5e0630a4 //sha1su0 v4.16b,v5.16b,v6.16b + .inst 0x5e280802 //sha1h v2.16b,v0.16b // 13 + .inst 0x5e152060 //sha1m v0.16b,v3.16b,v21.4s + add v21.4s,v19.4s,v7.4s + .inst 0x5e2818e4 //sha1su1 v4.16b,v7.16b + .inst 0x5e0730c5 //sha1su0 v5.16b,v6.16b,v7.16b + .inst 0x5e280803 //sha1h v3.16b,v0.16b // 14 + .inst 0x5e142040 //sha1m v0.16b,v2.16b,v20.4s + add v20.4s,v19.4s,v4.4s + .inst 0x5e281885 //sha1su1 v5.16b,v4.16b + .inst 0x5e0430e6 //sha1su0 v6.16b,v7.16b,v4.16b + .inst 0x5e280802 //sha1h v2.16b,v0.16b // 15 + .inst 0x5e151060 //sha1p v0.16b,v3.16b,v21.4s + add v21.4s,v19.4s,v5.4s + .inst 0x5e2818a6 //sha1su1 v6.16b,v5.16b + .inst 0x5e053087 //sha1su0 v7.16b,v4.16b,v5.16b + .inst 0x5e280803 //sha1h v3.16b,v0.16b // 16 + .inst 0x5e141040 //sha1p v0.16b,v2.16b,v20.4s + add v20.4s,v19.4s,v6.4s + .inst 0x5e2818c7 //sha1su1 v7.16b,v6.16b + .inst 0x5e280802 //sha1h v2.16b,v0.16b // 17 + .inst 0x5e151060 //sha1p v0.16b,v3.16b,v21.4s + add v21.4s,v19.4s,v7.4s + + .inst 0x5e280803 //sha1h v3.16b,v0.16b // 18 + .inst 0x5e141040 //sha1p v0.16b,v2.16b,v20.4s + + .inst 0x5e280802 //sha1h v2.16b,v0.16b // 19 + .inst 0x5e151060 //sha1p v0.16b,v3.16b,v21.4s + + add v1.4s,v1.4s,v2.4s + add v0.4s,v0.4s,v22.4s + + cbnz x2,.Loop_hw + + st1 {v0.4s},[x0],#16 + st1 {v1.s}[0],[x0] + + ldr x29,[sp],#16 + ret +.size sha1_block_armv8,.-sha1_block_armv8 +.align 6 +.Lconst: +.long 0x5a827999,0x5a827999,0x5a827999,0x5a827999 //K_00_19 +.long 0x6ed9eba1,0x6ed9eba1,0x6ed9eba1,0x6ed9eba1 //K_20_39 +.long 0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc //K_40_59 +.long 0xca62c1d6,0xca62c1d6,0xca62c1d6,0xca62c1d6 //K_60_79 +.LOPENSSL_armcap_P: +.quad OPENSSL_armcap_P-. +.asciz "SHA1 block transform for ARMv8, CRYPTOGAMS by " +.align 2 +.comm OPENSSL_armcap_P,4,4 diff --git a/deps/openssl/asm/arm64-linux64-gas/sha/sha256-armv8.S b/deps/openssl/asm/arm64-linux64-gas/sha/sha256-armv8.S new file mode 100644 index 00000000000000..bd43b1fe76dd34 --- /dev/null +++ b/deps/openssl/asm/arm64-linux64-gas/sha/sha256-armv8.S @@ -0,0 +1,1141 @@ +#include "arm_arch.h" + +.text + +.globl sha256_block_data_order +.type sha256_block_data_order,%function +.align 6 +sha256_block_data_order: + ldr x16,.LOPENSSL_armcap_P + adr x17,.LOPENSSL_armcap_P + add x16,x16,x17 + ldr w16,[x16] + tst w16,#ARMV8_SHA256 + b.ne .Lv8_entry + stp x29,x30,[sp,#-128]! + add x29,sp,#0 + + stp x19,x20,[sp,#16] + stp x21,x22,[sp,#32] + stp x23,x24,[sp,#48] + stp x25,x26,[sp,#64] + stp x27,x28,[sp,#80] + sub sp,sp,#4*4 + + ldp w20,w21,[x0] // load context + ldp w22,w23,[x0,#2*4] + ldp w24,w25,[x0,#4*4] + add x2,x1,x2,lsl#6 // end of input + ldp w26,w27,[x0,#6*4] + adr x30,K256 + stp x0,x2,[x29,#96] + +.Loop: + ldp w3,w4,[x1],#2*4 + ldr w19,[x30],#4 // *K++ + eor w28,w21,w22 // magic seed + str x1,[x29,#112] +#ifndef __ARMEB__ + rev w3,w3 // 0 +#endif + ror w16,w24,#6 + add w27,w27,w19 // h+=K[i] + eor w6,w24,w24,ror#14 + and w17,w25,w24 + bic w19,w26,w24 + add w27,w27,w3 // h+=X[i] + orr w17,w17,w19 // Ch(e,f,g) + eor w19,w20,w21 // a^b, b^c in next round + eor w16,w16,w6,ror#11 // Sigma1(e) + ror w6,w20,#2 + add w27,w27,w17 // h+=Ch(e,f,g) + eor w17,w20,w20,ror#9 + add w27,w27,w16 // h+=Sigma1(e) + and w28,w28,w19 // (b^c)&=(a^b) + add w23,w23,w27 // d+=h + eor w28,w28,w21 // Maj(a,b,c) + eor w17,w6,w17,ror#13 // Sigma0(a) + add w27,w27,w28 // h+=Maj(a,b,c) + ldr w28,[x30],#4 // *K++, w19 in next round + //add w27,w27,w17 // h+=Sigma0(a) +#ifndef __ARMEB__ + rev w4,w4 // 1 +#endif + ldp w5,w6,[x1],#2*4 + add w27,w27,w17 // h+=Sigma0(a) + ror w16,w23,#6 + add w26,w26,w28 // h+=K[i] + eor w7,w23,w23,ror#14 + and w17,w24,w23 + bic w28,w25,w23 + add w26,w26,w4 // h+=X[i] + orr w17,w17,w28 // Ch(e,f,g) + eor w28,w27,w20 // a^b, b^c in next round + eor w16,w16,w7,ror#11 // Sigma1(e) + ror w7,w27,#2 + add w26,w26,w17 // h+=Ch(e,f,g) + eor w17,w27,w27,ror#9 + add w26,w26,w16 // h+=Sigma1(e) + and w19,w19,w28 // (b^c)&=(a^b) + add w22,w22,w26 // d+=h + eor w19,w19,w20 // Maj(a,b,c) + eor w17,w7,w17,ror#13 // Sigma0(a) + add w26,w26,w19 // h+=Maj(a,b,c) + ldr w19,[x30],#4 // *K++, w28 in next round + //add w26,w26,w17 // h+=Sigma0(a) +#ifndef __ARMEB__ + rev w5,w5 // 2 +#endif + add w26,w26,w17 // h+=Sigma0(a) + ror w16,w22,#6 + add w25,w25,w19 // h+=K[i] + eor w8,w22,w22,ror#14 + and w17,w23,w22 + bic w19,w24,w22 + add w25,w25,w5 // h+=X[i] + orr w17,w17,w19 // Ch(e,f,g) + eor w19,w26,w27 // a^b, b^c in next round + eor w16,w16,w8,ror#11 // Sigma1(e) + ror w8,w26,#2 + add w25,w25,w17 // h+=Ch(e,f,g) + eor w17,w26,w26,ror#9 + add w25,w25,w16 // h+=Sigma1(e) + and w28,w28,w19 // (b^c)&=(a^b) + add w21,w21,w25 // d+=h + eor w28,w28,w27 // Maj(a,b,c) + eor w17,w8,w17,ror#13 // Sigma0(a) + add w25,w25,w28 // h+=Maj(a,b,c) + ldr w28,[x30],#4 // *K++, w19 in next round + //add w25,w25,w17 // h+=Sigma0(a) +#ifndef __ARMEB__ + rev w6,w6 // 3 +#endif + ldp w7,w8,[x1],#2*4 + add w25,w25,w17 // h+=Sigma0(a) + ror w16,w21,#6 + add w24,w24,w28 // h+=K[i] + eor w9,w21,w21,ror#14 + and w17,w22,w21 + bic w28,w23,w21 + add w24,w24,w6 // h+=X[i] + orr w17,w17,w28 // Ch(e,f,g) + eor w28,w25,w26 // a^b, b^c in next round + eor w16,w16,w9,ror#11 // Sigma1(e) + ror w9,w25,#2 + add w24,w24,w17 // h+=Ch(e,f,g) + eor w17,w25,w25,ror#9 + add w24,w24,w16 // h+=Sigma1(e) + and w19,w19,w28 // (b^c)&=(a^b) + add w20,w20,w24 // d+=h + eor w19,w19,w26 // Maj(a,b,c) + eor w17,w9,w17,ror#13 // Sigma0(a) + add w24,w24,w19 // h+=Maj(a,b,c) + ldr w19,[x30],#4 // *K++, w28 in next round + //add w24,w24,w17 // h+=Sigma0(a) +#ifndef __ARMEB__ + rev w7,w7 // 4 +#endif + add w24,w24,w17 // h+=Sigma0(a) + ror w16,w20,#6 + add w23,w23,w19 // h+=K[i] + eor w10,w20,w20,ror#14 + and w17,w21,w20 + bic w19,w22,w20 + add w23,w23,w7 // h+=X[i] + orr w17,w17,w19 // Ch(e,f,g) + eor w19,w24,w25 // a^b, b^c in next round + eor w16,w16,w10,ror#11 // Sigma1(e) + ror w10,w24,#2 + add w23,w23,w17 // h+=Ch(e,f,g) + eor w17,w24,w24,ror#9 + add w23,w23,w16 // h+=Sigma1(e) + and w28,w28,w19 // (b^c)&=(a^b) + add w27,w27,w23 // d+=h + eor w28,w28,w25 // Maj(a,b,c) + eor w17,w10,w17,ror#13 // Sigma0(a) + add w23,w23,w28 // h+=Maj(a,b,c) + ldr w28,[x30],#4 // *K++, w19 in next round + //add w23,w23,w17 // h+=Sigma0(a) +#ifndef __ARMEB__ + rev w8,w8 // 5 +#endif + ldp w9,w10,[x1],#2*4 + add w23,w23,w17 // h+=Sigma0(a) + ror w16,w27,#6 + add w22,w22,w28 // h+=K[i] + eor w11,w27,w27,ror#14 + and w17,w20,w27 + bic w28,w21,w27 + add w22,w22,w8 // h+=X[i] + orr w17,w17,w28 // Ch(e,f,g) + eor w28,w23,w24 // a^b, b^c in next round + eor w16,w16,w11,ror#11 // Sigma1(e) + ror w11,w23,#2 + add w22,w22,w17 // h+=Ch(e,f,g) + eor w17,w23,w23,ror#9 + add w22,w22,w16 // h+=Sigma1(e) + and w19,w19,w28 // (b^c)&=(a^b) + add w26,w26,w22 // d+=h + eor w19,w19,w24 // Maj(a,b,c) + eor w17,w11,w17,ror#13 // Sigma0(a) + add w22,w22,w19 // h+=Maj(a,b,c) + ldr w19,[x30],#4 // *K++, w28 in next round + //add w22,w22,w17 // h+=Sigma0(a) +#ifndef __ARMEB__ + rev w9,w9 // 6 +#endif + add w22,w22,w17 // h+=Sigma0(a) + ror w16,w26,#6 + add w21,w21,w19 // h+=K[i] + eor w12,w26,w26,ror#14 + and w17,w27,w26 + bic w19,w20,w26 + add w21,w21,w9 // h+=X[i] + orr w17,w17,w19 // Ch(e,f,g) + eor w19,w22,w23 // a^b, b^c in next round + eor w16,w16,w12,ror#11 // Sigma1(e) + ror w12,w22,#2 + add w21,w21,w17 // h+=Ch(e,f,g) + eor w17,w22,w22,ror#9 + add w21,w21,w16 // h+=Sigma1(e) + and w28,w28,w19 // (b^c)&=(a^b) + add w25,w25,w21 // d+=h + eor w28,w28,w23 // Maj(a,b,c) + eor w17,w12,w17,ror#13 // Sigma0(a) + add w21,w21,w28 // h+=Maj(a,b,c) + ldr w28,[x30],#4 // *K++, w19 in next round + //add w21,w21,w17 // h+=Sigma0(a) +#ifndef __ARMEB__ + rev w10,w10 // 7 +#endif + ldp w11,w12,[x1],#2*4 + add w21,w21,w17 // h+=Sigma0(a) + ror w16,w25,#6 + add w20,w20,w28 // h+=K[i] + eor w13,w25,w25,ror#14 + and w17,w26,w25 + bic w28,w27,w25 + add w20,w20,w10 // h+=X[i] + orr w17,w17,w28 // Ch(e,f,g) + eor w28,w21,w22 // a^b, b^c in next round + eor w16,w16,w13,ror#11 // Sigma1(e) + ror w13,w21,#2 + add w20,w20,w17 // h+=Ch(e,f,g) + eor w17,w21,w21,ror#9 + add w20,w20,w16 // h+=Sigma1(e) + and w19,w19,w28 // (b^c)&=(a^b) + add w24,w24,w20 // d+=h + eor w19,w19,w22 // Maj(a,b,c) + eor w17,w13,w17,ror#13 // Sigma0(a) + add w20,w20,w19 // h+=Maj(a,b,c) + ldr w19,[x30],#4 // *K++, w28 in next round + //add w20,w20,w17 // h+=Sigma0(a) +#ifndef __ARMEB__ + rev w11,w11 // 8 +#endif + add w20,w20,w17 // h+=Sigma0(a) + ror w16,w24,#6 + add w27,w27,w19 // h+=K[i] + eor w14,w24,w24,ror#14 + and w17,w25,w24 + bic w19,w26,w24 + add w27,w27,w11 // h+=X[i] + orr w17,w17,w19 // Ch(e,f,g) + eor w19,w20,w21 // a^b, b^c in next round + eor w16,w16,w14,ror#11 // Sigma1(e) + ror w14,w20,#2 + add w27,w27,w17 // h+=Ch(e,f,g) + eor w17,w20,w20,ror#9 + add w27,w27,w16 // h+=Sigma1(e) + and w28,w28,w19 // (b^c)&=(a^b) + add w23,w23,w27 // d+=h + eor w28,w28,w21 // Maj(a,b,c) + eor w17,w14,w17,ror#13 // Sigma0(a) + add w27,w27,w28 // h+=Maj(a,b,c) + ldr w28,[x30],#4 // *K++, w19 in next round + //add w27,w27,w17 // h+=Sigma0(a) +#ifndef __ARMEB__ + rev w12,w12 // 9 +#endif + ldp w13,w14,[x1],#2*4 + add w27,w27,w17 // h+=Sigma0(a) + ror w16,w23,#6 + add w26,w26,w28 // h+=K[i] + eor w15,w23,w23,ror#14 + and w17,w24,w23 + bic w28,w25,w23 + add w26,w26,w12 // h+=X[i] + orr w17,w17,w28 // Ch(e,f,g) + eor w28,w27,w20 // a^b, b^c in next round + eor w16,w16,w15,ror#11 // Sigma1(e) + ror w15,w27,#2 + add w26,w26,w17 // h+=Ch(e,f,g) + eor w17,w27,w27,ror#9 + add w26,w26,w16 // h+=Sigma1(e) + and w19,w19,w28 // (b^c)&=(a^b) + add w22,w22,w26 // d+=h + eor w19,w19,w20 // Maj(a,b,c) + eor w17,w15,w17,ror#13 // Sigma0(a) + add w26,w26,w19 // h+=Maj(a,b,c) + ldr w19,[x30],#4 // *K++, w28 in next round + //add w26,w26,w17 // h+=Sigma0(a) +#ifndef __ARMEB__ + rev w13,w13 // 10 +#endif + add w26,w26,w17 // h+=Sigma0(a) + ror w16,w22,#6 + add w25,w25,w19 // h+=K[i] + eor w0,w22,w22,ror#14 + and w17,w23,w22 + bic w19,w24,w22 + add w25,w25,w13 // h+=X[i] + orr w17,w17,w19 // Ch(e,f,g) + eor w19,w26,w27 // a^b, b^c in next round + eor w16,w16,w0,ror#11 // Sigma1(e) + ror w0,w26,#2 + add w25,w25,w17 // h+=Ch(e,f,g) + eor w17,w26,w26,ror#9 + add w25,w25,w16 // h+=Sigma1(e) + and w28,w28,w19 // (b^c)&=(a^b) + add w21,w21,w25 // d+=h + eor w28,w28,w27 // Maj(a,b,c) + eor w17,w0,w17,ror#13 // Sigma0(a) + add w25,w25,w28 // h+=Maj(a,b,c) + ldr w28,[x30],#4 // *K++, w19 in next round + //add w25,w25,w17 // h+=Sigma0(a) +#ifndef __ARMEB__ + rev w14,w14 // 11 +#endif + ldp w15,w0,[x1],#2*4 + add w25,w25,w17 // h+=Sigma0(a) + str w6,[sp,#12] + ror w16,w21,#6 + add w24,w24,w28 // h+=K[i] + eor w6,w21,w21,ror#14 + and w17,w22,w21 + bic w28,w23,w21 + add w24,w24,w14 // h+=X[i] + orr w17,w17,w28 // Ch(e,f,g) + eor w28,w25,w26 // a^b, b^c in next round + eor w16,w16,w6,ror#11 // Sigma1(e) + ror w6,w25,#2 + add w24,w24,w17 // h+=Ch(e,f,g) + eor w17,w25,w25,ror#9 + add w24,w24,w16 // h+=Sigma1(e) + and w19,w19,w28 // (b^c)&=(a^b) + add w20,w20,w24 // d+=h + eor w19,w19,w26 // Maj(a,b,c) + eor w17,w6,w17,ror#13 // Sigma0(a) + add w24,w24,w19 // h+=Maj(a,b,c) + ldr w19,[x30],#4 // *K++, w28 in next round + //add w24,w24,w17 // h+=Sigma0(a) +#ifndef __ARMEB__ + rev w15,w15 // 12 +#endif + add w24,w24,w17 // h+=Sigma0(a) + str w7,[sp,#0] + ror w16,w20,#6 + add w23,w23,w19 // h+=K[i] + eor w7,w20,w20,ror#14 + and w17,w21,w20 + bic w19,w22,w20 + add w23,w23,w15 // h+=X[i] + orr w17,w17,w19 // Ch(e,f,g) + eor w19,w24,w25 // a^b, b^c in next round + eor w16,w16,w7,ror#11 // Sigma1(e) + ror w7,w24,#2 + add w23,w23,w17 // h+=Ch(e,f,g) + eor w17,w24,w24,ror#9 + add w23,w23,w16 // h+=Sigma1(e) + and w28,w28,w19 // (b^c)&=(a^b) + add w27,w27,w23 // d+=h + eor w28,w28,w25 // Maj(a,b,c) + eor w17,w7,w17,ror#13 // Sigma0(a) + add w23,w23,w28 // h+=Maj(a,b,c) + ldr w28,[x30],#4 // *K++, w19 in next round + //add w23,w23,w17 // h+=Sigma0(a) +#ifndef __ARMEB__ + rev w0,w0 // 13 +#endif + ldp w1,w2,[x1] + add w23,w23,w17 // h+=Sigma0(a) + str w8,[sp,#4] + ror w16,w27,#6 + add w22,w22,w28 // h+=K[i] + eor w8,w27,w27,ror#14 + and w17,w20,w27 + bic w28,w21,w27 + add w22,w22,w0 // h+=X[i] + orr w17,w17,w28 // Ch(e,f,g) + eor w28,w23,w24 // a^b, b^c in next round + eor w16,w16,w8,ror#11 // Sigma1(e) + ror w8,w23,#2 + add w22,w22,w17 // h+=Ch(e,f,g) + eor w17,w23,w23,ror#9 + add w22,w22,w16 // h+=Sigma1(e) + and w19,w19,w28 // (b^c)&=(a^b) + add w26,w26,w22 // d+=h + eor w19,w19,w24 // Maj(a,b,c) + eor w17,w8,w17,ror#13 // Sigma0(a) + add w22,w22,w19 // h+=Maj(a,b,c) + ldr w19,[x30],#4 // *K++, w28 in next round + //add w22,w22,w17 // h+=Sigma0(a) +#ifndef __ARMEB__ + rev w1,w1 // 14 +#endif + ldr w6,[sp,#12] + add w22,w22,w17 // h+=Sigma0(a) + str w9,[sp,#8] + ror w16,w26,#6 + add w21,w21,w19 // h+=K[i] + eor w9,w26,w26,ror#14 + and w17,w27,w26 + bic w19,w20,w26 + add w21,w21,w1 // h+=X[i] + orr w17,w17,w19 // Ch(e,f,g) + eor w19,w22,w23 // a^b, b^c in next round + eor w16,w16,w9,ror#11 // Sigma1(e) + ror w9,w22,#2 + add w21,w21,w17 // h+=Ch(e,f,g) + eor w17,w22,w22,ror#9 + add w21,w21,w16 // h+=Sigma1(e) + and w28,w28,w19 // (b^c)&=(a^b) + add w25,w25,w21 // d+=h + eor w28,w28,w23 // Maj(a,b,c) + eor w17,w9,w17,ror#13 // Sigma0(a) + add w21,w21,w28 // h+=Maj(a,b,c) + ldr w28,[x30],#4 // *K++, w19 in next round + //add w21,w21,w17 // h+=Sigma0(a) +#ifndef __ARMEB__ + rev w2,w2 // 15 +#endif + ldr w7,[sp,#0] + add w21,w21,w17 // h+=Sigma0(a) + str w10,[sp,#12] + ror w16,w25,#6 + add w20,w20,w28 // h+=K[i] + ror w9,w4,#7 + and w17,w26,w25 + ror w8,w1,#17 + bic w28,w27,w25 + ror w10,w21,#2 + add w20,w20,w2 // h+=X[i] + eor w16,w16,w25,ror#11 + eor w9,w9,w4,ror#18 + orr w17,w17,w28 // Ch(e,f,g) + eor w28,w21,w22 // a^b, b^c in next round + eor w16,w16,w25,ror#25 // Sigma1(e) + eor w10,w10,w21,ror#13 + add w20,w20,w17 // h+=Ch(e,f,g) + and w19,w19,w28 // (b^c)&=(a^b) + eor w8,w8,w1,ror#19 + eor w9,w9,w4,lsr#3 // sigma0(X[i+1]) + add w20,w20,w16 // h+=Sigma1(e) + eor w19,w19,w22 // Maj(a,b,c) + eor w17,w10,w21,ror#22 // Sigma0(a) + eor w8,w8,w1,lsr#10 // sigma1(X[i+14]) + add w3,w3,w12 + add w24,w24,w20 // d+=h + add w20,w20,w19 // h+=Maj(a,b,c) + ldr w19,[x30],#4 // *K++, w28 in next round + add w3,w3,w9 + add w20,w20,w17 // h+=Sigma0(a) + add w3,w3,w8 +.Loop_16_xx: + ldr w8,[sp,#4] + str w11,[sp,#0] + ror w16,w24,#6 + add w27,w27,w19 // h+=K[i] + ror w10,w5,#7 + and w17,w25,w24 + ror w9,w2,#17 + bic w19,w26,w24 + ror w11,w20,#2 + add w27,w27,w3 // h+=X[i] + eor w16,w16,w24,ror#11 + eor w10,w10,w5,ror#18 + orr w17,w17,w19 // Ch(e,f,g) + eor w19,w20,w21 // a^b, b^c in next round + eor w16,w16,w24,ror#25 // Sigma1(e) + eor w11,w11,w20,ror#13 + add w27,w27,w17 // h+=Ch(e,f,g) + and w28,w28,w19 // (b^c)&=(a^b) + eor w9,w9,w2,ror#19 + eor w10,w10,w5,lsr#3 // sigma0(X[i+1]) + add w27,w27,w16 // h+=Sigma1(e) + eor w28,w28,w21 // Maj(a,b,c) + eor w17,w11,w20,ror#22 // Sigma0(a) + eor w9,w9,w2,lsr#10 // sigma1(X[i+14]) + add w4,w4,w13 + add w23,w23,w27 // d+=h + add w27,w27,w28 // h+=Maj(a,b,c) + ldr w28,[x30],#4 // *K++, w19 in next round + add w4,w4,w10 + add w27,w27,w17 // h+=Sigma0(a) + add w4,w4,w9 + ldr w9,[sp,#8] + str w12,[sp,#4] + ror w16,w23,#6 + add w26,w26,w28 // h+=K[i] + ror w11,w6,#7 + and w17,w24,w23 + ror w10,w3,#17 + bic w28,w25,w23 + ror w12,w27,#2 + add w26,w26,w4 // h+=X[i] + eor w16,w16,w23,ror#11 + eor w11,w11,w6,ror#18 + orr w17,w17,w28 // Ch(e,f,g) + eor w28,w27,w20 // a^b, b^c in next round + eor w16,w16,w23,ror#25 // Sigma1(e) + eor w12,w12,w27,ror#13 + add w26,w26,w17 // h+=Ch(e,f,g) + and w19,w19,w28 // (b^c)&=(a^b) + eor w10,w10,w3,ror#19 + eor w11,w11,w6,lsr#3 // sigma0(X[i+1]) + add w26,w26,w16 // h+=Sigma1(e) + eor w19,w19,w20 // Maj(a,b,c) + eor w17,w12,w27,ror#22 // Sigma0(a) + eor w10,w10,w3,lsr#10 // sigma1(X[i+14]) + add w5,w5,w14 + add w22,w22,w26 // d+=h + add w26,w26,w19 // h+=Maj(a,b,c) + ldr w19,[x30],#4 // *K++, w28 in next round + add w5,w5,w11 + add w26,w26,w17 // h+=Sigma0(a) + add w5,w5,w10 + ldr w10,[sp,#12] + str w13,[sp,#8] + ror w16,w22,#6 + add w25,w25,w19 // h+=K[i] + ror w12,w7,#7 + and w17,w23,w22 + ror w11,w4,#17 + bic w19,w24,w22 + ror w13,w26,#2 + add w25,w25,w5 // h+=X[i] + eor w16,w16,w22,ror#11 + eor w12,w12,w7,ror#18 + orr w17,w17,w19 // Ch(e,f,g) + eor w19,w26,w27 // a^b, b^c in next round + eor w16,w16,w22,ror#25 // Sigma1(e) + eor w13,w13,w26,ror#13 + add w25,w25,w17 // h+=Ch(e,f,g) + and w28,w28,w19 // (b^c)&=(a^b) + eor w11,w11,w4,ror#19 + eor w12,w12,w7,lsr#3 // sigma0(X[i+1]) + add w25,w25,w16 // h+=Sigma1(e) + eor w28,w28,w27 // Maj(a,b,c) + eor w17,w13,w26,ror#22 // Sigma0(a) + eor w11,w11,w4,lsr#10 // sigma1(X[i+14]) + add w6,w6,w15 + add w21,w21,w25 // d+=h + add w25,w25,w28 // h+=Maj(a,b,c) + ldr w28,[x30],#4 // *K++, w19 in next round + add w6,w6,w12 + add w25,w25,w17 // h+=Sigma0(a) + add w6,w6,w11 + ldr w11,[sp,#0] + str w14,[sp,#12] + ror w16,w21,#6 + add w24,w24,w28 // h+=K[i] + ror w13,w8,#7 + and w17,w22,w21 + ror w12,w5,#17 + bic w28,w23,w21 + ror w14,w25,#2 + add w24,w24,w6 // h+=X[i] + eor w16,w16,w21,ror#11 + eor w13,w13,w8,ror#18 + orr w17,w17,w28 // Ch(e,f,g) + eor w28,w25,w26 // a^b, b^c in next round + eor w16,w16,w21,ror#25 // Sigma1(e) + eor w14,w14,w25,ror#13 + add w24,w24,w17 // h+=Ch(e,f,g) + and w19,w19,w28 // (b^c)&=(a^b) + eor w12,w12,w5,ror#19 + eor w13,w13,w8,lsr#3 // sigma0(X[i+1]) + add w24,w24,w16 // h+=Sigma1(e) + eor w19,w19,w26 // Maj(a,b,c) + eor w17,w14,w25,ror#22 // Sigma0(a) + eor w12,w12,w5,lsr#10 // sigma1(X[i+14]) + add w7,w7,w0 + add w20,w20,w24 // d+=h + add w24,w24,w19 // h+=Maj(a,b,c) + ldr w19,[x30],#4 // *K++, w28 in next round + add w7,w7,w13 + add w24,w24,w17 // h+=Sigma0(a) + add w7,w7,w12 + ldr w12,[sp,#4] + str w15,[sp,#0] + ror w16,w20,#6 + add w23,w23,w19 // h+=K[i] + ror w14,w9,#7 + and w17,w21,w20 + ror w13,w6,#17 + bic w19,w22,w20 + ror w15,w24,#2 + add w23,w23,w7 // h+=X[i] + eor w16,w16,w20,ror#11 + eor w14,w14,w9,ror#18 + orr w17,w17,w19 // Ch(e,f,g) + eor w19,w24,w25 // a^b, b^c in next round + eor w16,w16,w20,ror#25 // Sigma1(e) + eor w15,w15,w24,ror#13 + add w23,w23,w17 // h+=Ch(e,f,g) + and w28,w28,w19 // (b^c)&=(a^b) + eor w13,w13,w6,ror#19 + eor w14,w14,w9,lsr#3 // sigma0(X[i+1]) + add w23,w23,w16 // h+=Sigma1(e) + eor w28,w28,w25 // Maj(a,b,c) + eor w17,w15,w24,ror#22 // Sigma0(a) + eor w13,w13,w6,lsr#10 // sigma1(X[i+14]) + add w8,w8,w1 + add w27,w27,w23 // d+=h + add w23,w23,w28 // h+=Maj(a,b,c) + ldr w28,[x30],#4 // *K++, w19 in next round + add w8,w8,w14 + add w23,w23,w17 // h+=Sigma0(a) + add w8,w8,w13 + ldr w13,[sp,#8] + str w0,[sp,#4] + ror w16,w27,#6 + add w22,w22,w28 // h+=K[i] + ror w15,w10,#7 + and w17,w20,w27 + ror w14,w7,#17 + bic w28,w21,w27 + ror w0,w23,#2 + add w22,w22,w8 // h+=X[i] + eor w16,w16,w27,ror#11 + eor w15,w15,w10,ror#18 + orr w17,w17,w28 // Ch(e,f,g) + eor w28,w23,w24 // a^b, b^c in next round + eor w16,w16,w27,ror#25 // Sigma1(e) + eor w0,w0,w23,ror#13 + add w22,w22,w17 // h+=Ch(e,f,g) + and w19,w19,w28 // (b^c)&=(a^b) + eor w14,w14,w7,ror#19 + eor w15,w15,w10,lsr#3 // sigma0(X[i+1]) + add w22,w22,w16 // h+=Sigma1(e) + eor w19,w19,w24 // Maj(a,b,c) + eor w17,w0,w23,ror#22 // Sigma0(a) + eor w14,w14,w7,lsr#10 // sigma1(X[i+14]) + add w9,w9,w2 + add w26,w26,w22 // d+=h + add w22,w22,w19 // h+=Maj(a,b,c) + ldr w19,[x30],#4 // *K++, w28 in next round + add w9,w9,w15 + add w22,w22,w17 // h+=Sigma0(a) + add w9,w9,w14 + ldr w14,[sp,#12] + str w1,[sp,#8] + ror w16,w26,#6 + add w21,w21,w19 // h+=K[i] + ror w0,w11,#7 + and w17,w27,w26 + ror w15,w8,#17 + bic w19,w20,w26 + ror w1,w22,#2 + add w21,w21,w9 // h+=X[i] + eor w16,w16,w26,ror#11 + eor w0,w0,w11,ror#18 + orr w17,w17,w19 // Ch(e,f,g) + eor w19,w22,w23 // a^b, b^c in next round + eor w16,w16,w26,ror#25 // Sigma1(e) + eor w1,w1,w22,ror#13 + add w21,w21,w17 // h+=Ch(e,f,g) + and w28,w28,w19 // (b^c)&=(a^b) + eor w15,w15,w8,ror#19 + eor w0,w0,w11,lsr#3 // sigma0(X[i+1]) + add w21,w21,w16 // h+=Sigma1(e) + eor w28,w28,w23 // Maj(a,b,c) + eor w17,w1,w22,ror#22 // Sigma0(a) + eor w15,w15,w8,lsr#10 // sigma1(X[i+14]) + add w10,w10,w3 + add w25,w25,w21 // d+=h + add w21,w21,w28 // h+=Maj(a,b,c) + ldr w28,[x30],#4 // *K++, w19 in next round + add w10,w10,w0 + add w21,w21,w17 // h+=Sigma0(a) + add w10,w10,w15 + ldr w15,[sp,#0] + str w2,[sp,#12] + ror w16,w25,#6 + add w20,w20,w28 // h+=K[i] + ror w1,w12,#7 + and w17,w26,w25 + ror w0,w9,#17 + bic w28,w27,w25 + ror w2,w21,#2 + add w20,w20,w10 // h+=X[i] + eor w16,w16,w25,ror#11 + eor w1,w1,w12,ror#18 + orr w17,w17,w28 // Ch(e,f,g) + eor w28,w21,w22 // a^b, b^c in next round + eor w16,w16,w25,ror#25 // Sigma1(e) + eor w2,w2,w21,ror#13 + add w20,w20,w17 // h+=Ch(e,f,g) + and w19,w19,w28 // (b^c)&=(a^b) + eor w0,w0,w9,ror#19 + eor w1,w1,w12,lsr#3 // sigma0(X[i+1]) + add w20,w20,w16 // h+=Sigma1(e) + eor w19,w19,w22 // Maj(a,b,c) + eor w17,w2,w21,ror#22 // Sigma0(a) + eor w0,w0,w9,lsr#10 // sigma1(X[i+14]) + add w11,w11,w4 + add w24,w24,w20 // d+=h + add w20,w20,w19 // h+=Maj(a,b,c) + ldr w19,[x30],#4 // *K++, w28 in next round + add w11,w11,w1 + add w20,w20,w17 // h+=Sigma0(a) + add w11,w11,w0 + ldr w0,[sp,#4] + str w3,[sp,#0] + ror w16,w24,#6 + add w27,w27,w19 // h+=K[i] + ror w2,w13,#7 + and w17,w25,w24 + ror w1,w10,#17 + bic w19,w26,w24 + ror w3,w20,#2 + add w27,w27,w11 // h+=X[i] + eor w16,w16,w24,ror#11 + eor w2,w2,w13,ror#18 + orr w17,w17,w19 // Ch(e,f,g) + eor w19,w20,w21 // a^b, b^c in next round + eor w16,w16,w24,ror#25 // Sigma1(e) + eor w3,w3,w20,ror#13 + add w27,w27,w17 // h+=Ch(e,f,g) + and w28,w28,w19 // (b^c)&=(a^b) + eor w1,w1,w10,ror#19 + eor w2,w2,w13,lsr#3 // sigma0(X[i+1]) + add w27,w27,w16 // h+=Sigma1(e) + eor w28,w28,w21 // Maj(a,b,c) + eor w17,w3,w20,ror#22 // Sigma0(a) + eor w1,w1,w10,lsr#10 // sigma1(X[i+14]) + add w12,w12,w5 + add w23,w23,w27 // d+=h + add w27,w27,w28 // h+=Maj(a,b,c) + ldr w28,[x30],#4 // *K++, w19 in next round + add w12,w12,w2 + add w27,w27,w17 // h+=Sigma0(a) + add w12,w12,w1 + ldr w1,[sp,#8] + str w4,[sp,#4] + ror w16,w23,#6 + add w26,w26,w28 // h+=K[i] + ror w3,w14,#7 + and w17,w24,w23 + ror w2,w11,#17 + bic w28,w25,w23 + ror w4,w27,#2 + add w26,w26,w12 // h+=X[i] + eor w16,w16,w23,ror#11 + eor w3,w3,w14,ror#18 + orr w17,w17,w28 // Ch(e,f,g) + eor w28,w27,w20 // a^b, b^c in next round + eor w16,w16,w23,ror#25 // Sigma1(e) + eor w4,w4,w27,ror#13 + add w26,w26,w17 // h+=Ch(e,f,g) + and w19,w19,w28 // (b^c)&=(a^b) + eor w2,w2,w11,ror#19 + eor w3,w3,w14,lsr#3 // sigma0(X[i+1]) + add w26,w26,w16 // h+=Sigma1(e) + eor w19,w19,w20 // Maj(a,b,c) + eor w17,w4,w27,ror#22 // Sigma0(a) + eor w2,w2,w11,lsr#10 // sigma1(X[i+14]) + add w13,w13,w6 + add w22,w22,w26 // d+=h + add w26,w26,w19 // h+=Maj(a,b,c) + ldr w19,[x30],#4 // *K++, w28 in next round + add w13,w13,w3 + add w26,w26,w17 // h+=Sigma0(a) + add w13,w13,w2 + ldr w2,[sp,#12] + str w5,[sp,#8] + ror w16,w22,#6 + add w25,w25,w19 // h+=K[i] + ror w4,w15,#7 + and w17,w23,w22 + ror w3,w12,#17 + bic w19,w24,w22 + ror w5,w26,#2 + add w25,w25,w13 // h+=X[i] + eor w16,w16,w22,ror#11 + eor w4,w4,w15,ror#18 + orr w17,w17,w19 // Ch(e,f,g) + eor w19,w26,w27 // a^b, b^c in next round + eor w16,w16,w22,ror#25 // Sigma1(e) + eor w5,w5,w26,ror#13 + add w25,w25,w17 // h+=Ch(e,f,g) + and w28,w28,w19 // (b^c)&=(a^b) + eor w3,w3,w12,ror#19 + eor w4,w4,w15,lsr#3 // sigma0(X[i+1]) + add w25,w25,w16 // h+=Sigma1(e) + eor w28,w28,w27 // Maj(a,b,c) + eor w17,w5,w26,ror#22 // Sigma0(a) + eor w3,w3,w12,lsr#10 // sigma1(X[i+14]) + add w14,w14,w7 + add w21,w21,w25 // d+=h + add w25,w25,w28 // h+=Maj(a,b,c) + ldr w28,[x30],#4 // *K++, w19 in next round + add w14,w14,w4 + add w25,w25,w17 // h+=Sigma0(a) + add w14,w14,w3 + ldr w3,[sp,#0] + str w6,[sp,#12] + ror w16,w21,#6 + add w24,w24,w28 // h+=K[i] + ror w5,w0,#7 + and w17,w22,w21 + ror w4,w13,#17 + bic w28,w23,w21 + ror w6,w25,#2 + add w24,w24,w14 // h+=X[i] + eor w16,w16,w21,ror#11 + eor w5,w5,w0,ror#18 + orr w17,w17,w28 // Ch(e,f,g) + eor w28,w25,w26 // a^b, b^c in next round + eor w16,w16,w21,ror#25 // Sigma1(e) + eor w6,w6,w25,ror#13 + add w24,w24,w17 // h+=Ch(e,f,g) + and w19,w19,w28 // (b^c)&=(a^b) + eor w4,w4,w13,ror#19 + eor w5,w5,w0,lsr#3 // sigma0(X[i+1]) + add w24,w24,w16 // h+=Sigma1(e) + eor w19,w19,w26 // Maj(a,b,c) + eor w17,w6,w25,ror#22 // Sigma0(a) + eor w4,w4,w13,lsr#10 // sigma1(X[i+14]) + add w15,w15,w8 + add w20,w20,w24 // d+=h + add w24,w24,w19 // h+=Maj(a,b,c) + ldr w19,[x30],#4 // *K++, w28 in next round + add w15,w15,w5 + add w24,w24,w17 // h+=Sigma0(a) + add w15,w15,w4 + ldr w4,[sp,#4] + str w7,[sp,#0] + ror w16,w20,#6 + add w23,w23,w19 // h+=K[i] + ror w6,w1,#7 + and w17,w21,w20 + ror w5,w14,#17 + bic w19,w22,w20 + ror w7,w24,#2 + add w23,w23,w15 // h+=X[i] + eor w16,w16,w20,ror#11 + eor w6,w6,w1,ror#18 + orr w17,w17,w19 // Ch(e,f,g) + eor w19,w24,w25 // a^b, b^c in next round + eor w16,w16,w20,ror#25 // Sigma1(e) + eor w7,w7,w24,ror#13 + add w23,w23,w17 // h+=Ch(e,f,g) + and w28,w28,w19 // (b^c)&=(a^b) + eor w5,w5,w14,ror#19 + eor w6,w6,w1,lsr#3 // sigma0(X[i+1]) + add w23,w23,w16 // h+=Sigma1(e) + eor w28,w28,w25 // Maj(a,b,c) + eor w17,w7,w24,ror#22 // Sigma0(a) + eor w5,w5,w14,lsr#10 // sigma1(X[i+14]) + add w0,w0,w9 + add w27,w27,w23 // d+=h + add w23,w23,w28 // h+=Maj(a,b,c) + ldr w28,[x30],#4 // *K++, w19 in next round + add w0,w0,w6 + add w23,w23,w17 // h+=Sigma0(a) + add w0,w0,w5 + ldr w5,[sp,#8] + str w8,[sp,#4] + ror w16,w27,#6 + add w22,w22,w28 // h+=K[i] + ror w7,w2,#7 + and w17,w20,w27 + ror w6,w15,#17 + bic w28,w21,w27 + ror w8,w23,#2 + add w22,w22,w0 // h+=X[i] + eor w16,w16,w27,ror#11 + eor w7,w7,w2,ror#18 + orr w17,w17,w28 // Ch(e,f,g) + eor w28,w23,w24 // a^b, b^c in next round + eor w16,w16,w27,ror#25 // Sigma1(e) + eor w8,w8,w23,ror#13 + add w22,w22,w17 // h+=Ch(e,f,g) + and w19,w19,w28 // (b^c)&=(a^b) + eor w6,w6,w15,ror#19 + eor w7,w7,w2,lsr#3 // sigma0(X[i+1]) + add w22,w22,w16 // h+=Sigma1(e) + eor w19,w19,w24 // Maj(a,b,c) + eor w17,w8,w23,ror#22 // Sigma0(a) + eor w6,w6,w15,lsr#10 // sigma1(X[i+14]) + add w1,w1,w10 + add w26,w26,w22 // d+=h + add w22,w22,w19 // h+=Maj(a,b,c) + ldr w19,[x30],#4 // *K++, w28 in next round + add w1,w1,w7 + add w22,w22,w17 // h+=Sigma0(a) + add w1,w1,w6 + ldr w6,[sp,#12] + str w9,[sp,#8] + ror w16,w26,#6 + add w21,w21,w19 // h+=K[i] + ror w8,w3,#7 + and w17,w27,w26 + ror w7,w0,#17 + bic w19,w20,w26 + ror w9,w22,#2 + add w21,w21,w1 // h+=X[i] + eor w16,w16,w26,ror#11 + eor w8,w8,w3,ror#18 + orr w17,w17,w19 // Ch(e,f,g) + eor w19,w22,w23 // a^b, b^c in next round + eor w16,w16,w26,ror#25 // Sigma1(e) + eor w9,w9,w22,ror#13 + add w21,w21,w17 // h+=Ch(e,f,g) + and w28,w28,w19 // (b^c)&=(a^b) + eor w7,w7,w0,ror#19 + eor w8,w8,w3,lsr#3 // sigma0(X[i+1]) + add w21,w21,w16 // h+=Sigma1(e) + eor w28,w28,w23 // Maj(a,b,c) + eor w17,w9,w22,ror#22 // Sigma0(a) + eor w7,w7,w0,lsr#10 // sigma1(X[i+14]) + add w2,w2,w11 + add w25,w25,w21 // d+=h + add w21,w21,w28 // h+=Maj(a,b,c) + ldr w28,[x30],#4 // *K++, w19 in next round + add w2,w2,w8 + add w21,w21,w17 // h+=Sigma0(a) + add w2,w2,w7 + ldr w7,[sp,#0] + str w10,[sp,#12] + ror w16,w25,#6 + add w20,w20,w28 // h+=K[i] + ror w9,w4,#7 + and w17,w26,w25 + ror w8,w1,#17 + bic w28,w27,w25 + ror w10,w21,#2 + add w20,w20,w2 // h+=X[i] + eor w16,w16,w25,ror#11 + eor w9,w9,w4,ror#18 + orr w17,w17,w28 // Ch(e,f,g) + eor w28,w21,w22 // a^b, b^c in next round + eor w16,w16,w25,ror#25 // Sigma1(e) + eor w10,w10,w21,ror#13 + add w20,w20,w17 // h+=Ch(e,f,g) + and w19,w19,w28 // (b^c)&=(a^b) + eor w8,w8,w1,ror#19 + eor w9,w9,w4,lsr#3 // sigma0(X[i+1]) + add w20,w20,w16 // h+=Sigma1(e) + eor w19,w19,w22 // Maj(a,b,c) + eor w17,w10,w21,ror#22 // Sigma0(a) + eor w8,w8,w1,lsr#10 // sigma1(X[i+14]) + add w3,w3,w12 + add w24,w24,w20 // d+=h + add w20,w20,w19 // h+=Maj(a,b,c) + ldr w19,[x30],#4 // *K++, w28 in next round + add w3,w3,w9 + add w20,w20,w17 // h+=Sigma0(a) + add w3,w3,w8 + cbnz w19,.Loop_16_xx + + ldp x0,x2,[x29,#96] + ldr x1,[x29,#112] + sub x30,x30,#260 // rewind + + ldp w3,w4,[x0] + ldp w5,w6,[x0,#2*4] + add x1,x1,#14*4 // advance input pointer + ldp w7,w8,[x0,#4*4] + add w20,w20,w3 + ldp w9,w10,[x0,#6*4] + add w21,w21,w4 + add w22,w22,w5 + add w23,w23,w6 + stp w20,w21,[x0] + add w24,w24,w7 + add w25,w25,w8 + stp w22,w23,[x0,#2*4] + add w26,w26,w9 + add w27,w27,w10 + cmp x1,x2 + stp w24,w25,[x0,#4*4] + stp w26,w27,[x0,#6*4] + b.ne .Loop + + ldp x19,x20,[x29,#16] + add sp,sp,#4*4 + ldp x21,x22,[x29,#32] + ldp x23,x24,[x29,#48] + ldp x25,x26,[x29,#64] + ldp x27,x28,[x29,#80] + ldp x29,x30,[sp],#128 + ret +.size sha256_block_data_order,.-sha256_block_data_order + +.align 6 +.type K256,%object +K256: + .long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 + .long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 + .long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 + .long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 + .long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc + .long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da + .long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 + .long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967 + .long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 + .long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 + .long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 + .long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070 + .long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 + .long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 + .long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 + .long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 + .long 0 //terminator +.size K256,.-K256 +.align 3 +.LOPENSSL_armcap_P: + .quad OPENSSL_armcap_P-. +.asciz "SHA256 block transform for ARMv8, CRYPTOGAMS by " +.align 2 +.type sha256_block_armv8,%function +.align 6 +sha256_block_armv8: +.Lv8_entry: + stp x29,x30,[sp,#-16]! + add x29,sp,#0 + + ld1 {v0.4s,v1.4s},[x0] + adr x3,K256 + +.Loop_hw: + ld1 {v4.16b-v7.16b},[x1],#64 + sub x2,x2,#1 + ld1 {v16.4s},[x3],#16 + rev32 v4.16b,v4.16b + rev32 v5.16b,v5.16b + rev32 v6.16b,v6.16b + rev32 v7.16b,v7.16b + orr v18.16b,v0.16b,v0.16b // offload + orr v19.16b,v1.16b,v1.16b + ld1 {v17.4s},[x3],#16 + add v16.4s,v16.4s,v4.4s + .inst 0x5e2828a4 //sha256su0 v4.16b,v5.16b + orr v2.16b,v0.16b,v0.16b + .inst 0x5e104020 //sha256h v0.16b,v1.16b,v16.4s + .inst 0x5e105041 //sha256h2 v1.16b,v2.16b,v16.4s + .inst 0x5e0760c4 //sha256su1 v4.16b,v6.16b,v7.16b + ld1 {v16.4s},[x3],#16 + add v17.4s,v17.4s,v5.4s + .inst 0x5e2828c5 //sha256su0 v5.16b,v6.16b + orr v2.16b,v0.16b,v0.16b + .inst 0x5e114020 //sha256h v0.16b,v1.16b,v17.4s + .inst 0x5e115041 //sha256h2 v1.16b,v2.16b,v17.4s + .inst 0x5e0460e5 //sha256su1 v5.16b,v7.16b,v4.16b + ld1 {v17.4s},[x3],#16 + add v16.4s,v16.4s,v6.4s + .inst 0x5e2828e6 //sha256su0 v6.16b,v7.16b + orr v2.16b,v0.16b,v0.16b + .inst 0x5e104020 //sha256h v0.16b,v1.16b,v16.4s + .inst 0x5e105041 //sha256h2 v1.16b,v2.16b,v16.4s + .inst 0x5e056086 //sha256su1 v6.16b,v4.16b,v5.16b + ld1 {v16.4s},[x3],#16 + add v17.4s,v17.4s,v7.4s + .inst 0x5e282887 //sha256su0 v7.16b,v4.16b + orr v2.16b,v0.16b,v0.16b + .inst 0x5e114020 //sha256h v0.16b,v1.16b,v17.4s + .inst 0x5e115041 //sha256h2 v1.16b,v2.16b,v17.4s + .inst 0x5e0660a7 //sha256su1 v7.16b,v5.16b,v6.16b + ld1 {v17.4s},[x3],#16 + add v16.4s,v16.4s,v4.4s + .inst 0x5e2828a4 //sha256su0 v4.16b,v5.16b + orr v2.16b,v0.16b,v0.16b + .inst 0x5e104020 //sha256h v0.16b,v1.16b,v16.4s + .inst 0x5e105041 //sha256h2 v1.16b,v2.16b,v16.4s + .inst 0x5e0760c4 //sha256su1 v4.16b,v6.16b,v7.16b + ld1 {v16.4s},[x3],#16 + add v17.4s,v17.4s,v5.4s + .inst 0x5e2828c5 //sha256su0 v5.16b,v6.16b + orr v2.16b,v0.16b,v0.16b + .inst 0x5e114020 //sha256h v0.16b,v1.16b,v17.4s + .inst 0x5e115041 //sha256h2 v1.16b,v2.16b,v17.4s + .inst 0x5e0460e5 //sha256su1 v5.16b,v7.16b,v4.16b + ld1 {v17.4s},[x3],#16 + add v16.4s,v16.4s,v6.4s + .inst 0x5e2828e6 //sha256su0 v6.16b,v7.16b + orr v2.16b,v0.16b,v0.16b + .inst 0x5e104020 //sha256h v0.16b,v1.16b,v16.4s + .inst 0x5e105041 //sha256h2 v1.16b,v2.16b,v16.4s + .inst 0x5e056086 //sha256su1 v6.16b,v4.16b,v5.16b + ld1 {v16.4s},[x3],#16 + add v17.4s,v17.4s,v7.4s + .inst 0x5e282887 //sha256su0 v7.16b,v4.16b + orr v2.16b,v0.16b,v0.16b + .inst 0x5e114020 //sha256h v0.16b,v1.16b,v17.4s + .inst 0x5e115041 //sha256h2 v1.16b,v2.16b,v17.4s + .inst 0x5e0660a7 //sha256su1 v7.16b,v5.16b,v6.16b + ld1 {v17.4s},[x3],#16 + add v16.4s,v16.4s,v4.4s + .inst 0x5e2828a4 //sha256su0 v4.16b,v5.16b + orr v2.16b,v0.16b,v0.16b + .inst 0x5e104020 //sha256h v0.16b,v1.16b,v16.4s + .inst 0x5e105041 //sha256h2 v1.16b,v2.16b,v16.4s + .inst 0x5e0760c4 //sha256su1 v4.16b,v6.16b,v7.16b + ld1 {v16.4s},[x3],#16 + add v17.4s,v17.4s,v5.4s + .inst 0x5e2828c5 //sha256su0 v5.16b,v6.16b + orr v2.16b,v0.16b,v0.16b + .inst 0x5e114020 //sha256h v0.16b,v1.16b,v17.4s + .inst 0x5e115041 //sha256h2 v1.16b,v2.16b,v17.4s + .inst 0x5e0460e5 //sha256su1 v5.16b,v7.16b,v4.16b + ld1 {v17.4s},[x3],#16 + add v16.4s,v16.4s,v6.4s + .inst 0x5e2828e6 //sha256su0 v6.16b,v7.16b + orr v2.16b,v0.16b,v0.16b + .inst 0x5e104020 //sha256h v0.16b,v1.16b,v16.4s + .inst 0x5e105041 //sha256h2 v1.16b,v2.16b,v16.4s + .inst 0x5e056086 //sha256su1 v6.16b,v4.16b,v5.16b + ld1 {v16.4s},[x3],#16 + add v17.4s,v17.4s,v7.4s + .inst 0x5e282887 //sha256su0 v7.16b,v4.16b + orr v2.16b,v0.16b,v0.16b + .inst 0x5e114020 //sha256h v0.16b,v1.16b,v17.4s + .inst 0x5e115041 //sha256h2 v1.16b,v2.16b,v17.4s + .inst 0x5e0660a7 //sha256su1 v7.16b,v5.16b,v6.16b + ld1 {v17.4s},[x3],#16 + add v16.4s,v16.4s,v4.4s + orr v2.16b,v0.16b,v0.16b + .inst 0x5e104020 //sha256h v0.16b,v1.16b,v16.4s + .inst 0x5e105041 //sha256h2 v1.16b,v2.16b,v16.4s + + ld1 {v16.4s},[x3],#16 + add v17.4s,v17.4s,v5.4s + orr v2.16b,v0.16b,v0.16b + .inst 0x5e114020 //sha256h v0.16b,v1.16b,v17.4s + .inst 0x5e115041 //sha256h2 v1.16b,v2.16b,v17.4s + + ld1 {v17.4s},[x3] + add v16.4s,v16.4s,v6.4s + sub x3,x3,#64*4-16 // rewind + orr v2.16b,v0.16b,v0.16b + .inst 0x5e104020 //sha256h v0.16b,v1.16b,v16.4s + .inst 0x5e105041 //sha256h2 v1.16b,v2.16b,v16.4s + + add v17.4s,v17.4s,v7.4s + orr v2.16b,v0.16b,v0.16b + .inst 0x5e114020 //sha256h v0.16b,v1.16b,v17.4s + .inst 0x5e115041 //sha256h2 v1.16b,v2.16b,v17.4s + + add v0.4s,v0.4s,v18.4s + add v1.4s,v1.4s,v19.4s + + cbnz x2,.Loop_hw + + st1 {v0.4s,v1.4s},[x0] + + ldr x29,[sp],#16 + ret +.size sha256_block_armv8,.-sha256_block_armv8 +.comm OPENSSL_armcap_P,4,4 diff --git a/deps/openssl/asm/arm64-linux64-gas/sha/sha512-armv8.S b/deps/openssl/asm/arm64-linux64-gas/sha/sha512-armv8.S new file mode 100644 index 00000000000000..6b0d1940c6bef1 --- /dev/null +++ b/deps/openssl/asm/arm64-linux64-gas/sha/sha512-armv8.S @@ -0,0 +1,1021 @@ +#include "arm_arch.h" + +.text + +.globl sha512_block_data_order +.type sha512_block_data_order,%function +.align 6 +sha512_block_data_order: + stp x29,x30,[sp,#-128]! + add x29,sp,#0 + + stp x19,x20,[sp,#16] + stp x21,x22,[sp,#32] + stp x23,x24,[sp,#48] + stp x25,x26,[sp,#64] + stp x27,x28,[sp,#80] + sub sp,sp,#4*8 + + ldp x20,x21,[x0] // load context + ldp x22,x23,[x0,#2*8] + ldp x24,x25,[x0,#4*8] + add x2,x1,x2,lsl#7 // end of input + ldp x26,x27,[x0,#6*8] + adr x30,K512 + stp x0,x2,[x29,#96] + +.Loop: + ldp x3,x4,[x1],#2*8 + ldr x19,[x30],#8 // *K++ + eor x28,x21,x22 // magic seed + str x1,[x29,#112] +#ifndef __ARMEB__ + rev x3,x3 // 0 +#endif + ror x16,x24,#14 + add x27,x27,x19 // h+=K[i] + eor x6,x24,x24,ror#23 + and x17,x25,x24 + bic x19,x26,x24 + add x27,x27,x3 // h+=X[i] + orr x17,x17,x19 // Ch(e,f,g) + eor x19,x20,x21 // a^b, b^c in next round + eor x16,x16,x6,ror#18 // Sigma1(e) + ror x6,x20,#28 + add x27,x27,x17 // h+=Ch(e,f,g) + eor x17,x20,x20,ror#5 + add x27,x27,x16 // h+=Sigma1(e) + and x28,x28,x19 // (b^c)&=(a^b) + add x23,x23,x27 // d+=h + eor x28,x28,x21 // Maj(a,b,c) + eor x17,x6,x17,ror#34 // Sigma0(a) + add x27,x27,x28 // h+=Maj(a,b,c) + ldr x28,[x30],#8 // *K++, x19 in next round + //add x27,x27,x17 // h+=Sigma0(a) +#ifndef __ARMEB__ + rev x4,x4 // 1 +#endif + ldp x5,x6,[x1],#2*8 + add x27,x27,x17 // h+=Sigma0(a) + ror x16,x23,#14 + add x26,x26,x28 // h+=K[i] + eor x7,x23,x23,ror#23 + and x17,x24,x23 + bic x28,x25,x23 + add x26,x26,x4 // h+=X[i] + orr x17,x17,x28 // Ch(e,f,g) + eor x28,x27,x20 // a^b, b^c in next round + eor x16,x16,x7,ror#18 // Sigma1(e) + ror x7,x27,#28 + add x26,x26,x17 // h+=Ch(e,f,g) + eor x17,x27,x27,ror#5 + add x26,x26,x16 // h+=Sigma1(e) + and x19,x19,x28 // (b^c)&=(a^b) + add x22,x22,x26 // d+=h + eor x19,x19,x20 // Maj(a,b,c) + eor x17,x7,x17,ror#34 // Sigma0(a) + add x26,x26,x19 // h+=Maj(a,b,c) + ldr x19,[x30],#8 // *K++, x28 in next round + //add x26,x26,x17 // h+=Sigma0(a) +#ifndef __ARMEB__ + rev x5,x5 // 2 +#endif + add x26,x26,x17 // h+=Sigma0(a) + ror x16,x22,#14 + add x25,x25,x19 // h+=K[i] + eor x8,x22,x22,ror#23 + and x17,x23,x22 + bic x19,x24,x22 + add x25,x25,x5 // h+=X[i] + orr x17,x17,x19 // Ch(e,f,g) + eor x19,x26,x27 // a^b, b^c in next round + eor x16,x16,x8,ror#18 // Sigma1(e) + ror x8,x26,#28 + add x25,x25,x17 // h+=Ch(e,f,g) + eor x17,x26,x26,ror#5 + add x25,x25,x16 // h+=Sigma1(e) + and x28,x28,x19 // (b^c)&=(a^b) + add x21,x21,x25 // d+=h + eor x28,x28,x27 // Maj(a,b,c) + eor x17,x8,x17,ror#34 // Sigma0(a) + add x25,x25,x28 // h+=Maj(a,b,c) + ldr x28,[x30],#8 // *K++, x19 in next round + //add x25,x25,x17 // h+=Sigma0(a) +#ifndef __ARMEB__ + rev x6,x6 // 3 +#endif + ldp x7,x8,[x1],#2*8 + add x25,x25,x17 // h+=Sigma0(a) + ror x16,x21,#14 + add x24,x24,x28 // h+=K[i] + eor x9,x21,x21,ror#23 + and x17,x22,x21 + bic x28,x23,x21 + add x24,x24,x6 // h+=X[i] + orr x17,x17,x28 // Ch(e,f,g) + eor x28,x25,x26 // a^b, b^c in next round + eor x16,x16,x9,ror#18 // Sigma1(e) + ror x9,x25,#28 + add x24,x24,x17 // h+=Ch(e,f,g) + eor x17,x25,x25,ror#5 + add x24,x24,x16 // h+=Sigma1(e) + and x19,x19,x28 // (b^c)&=(a^b) + add x20,x20,x24 // d+=h + eor x19,x19,x26 // Maj(a,b,c) + eor x17,x9,x17,ror#34 // Sigma0(a) + add x24,x24,x19 // h+=Maj(a,b,c) + ldr x19,[x30],#8 // *K++, x28 in next round + //add x24,x24,x17 // h+=Sigma0(a) +#ifndef __ARMEB__ + rev x7,x7 // 4 +#endif + add x24,x24,x17 // h+=Sigma0(a) + ror x16,x20,#14 + add x23,x23,x19 // h+=K[i] + eor x10,x20,x20,ror#23 + and x17,x21,x20 + bic x19,x22,x20 + add x23,x23,x7 // h+=X[i] + orr x17,x17,x19 // Ch(e,f,g) + eor x19,x24,x25 // a^b, b^c in next round + eor x16,x16,x10,ror#18 // Sigma1(e) + ror x10,x24,#28 + add x23,x23,x17 // h+=Ch(e,f,g) + eor x17,x24,x24,ror#5 + add x23,x23,x16 // h+=Sigma1(e) + and x28,x28,x19 // (b^c)&=(a^b) + add x27,x27,x23 // d+=h + eor x28,x28,x25 // Maj(a,b,c) + eor x17,x10,x17,ror#34 // Sigma0(a) + add x23,x23,x28 // h+=Maj(a,b,c) + ldr x28,[x30],#8 // *K++, x19 in next round + //add x23,x23,x17 // h+=Sigma0(a) +#ifndef __ARMEB__ + rev x8,x8 // 5 +#endif + ldp x9,x10,[x1],#2*8 + add x23,x23,x17 // h+=Sigma0(a) + ror x16,x27,#14 + add x22,x22,x28 // h+=K[i] + eor x11,x27,x27,ror#23 + and x17,x20,x27 + bic x28,x21,x27 + add x22,x22,x8 // h+=X[i] + orr x17,x17,x28 // Ch(e,f,g) + eor x28,x23,x24 // a^b, b^c in next round + eor x16,x16,x11,ror#18 // Sigma1(e) + ror x11,x23,#28 + add x22,x22,x17 // h+=Ch(e,f,g) + eor x17,x23,x23,ror#5 + add x22,x22,x16 // h+=Sigma1(e) + and x19,x19,x28 // (b^c)&=(a^b) + add x26,x26,x22 // d+=h + eor x19,x19,x24 // Maj(a,b,c) + eor x17,x11,x17,ror#34 // Sigma0(a) + add x22,x22,x19 // h+=Maj(a,b,c) + ldr x19,[x30],#8 // *K++, x28 in next round + //add x22,x22,x17 // h+=Sigma0(a) +#ifndef __ARMEB__ + rev x9,x9 // 6 +#endif + add x22,x22,x17 // h+=Sigma0(a) + ror x16,x26,#14 + add x21,x21,x19 // h+=K[i] + eor x12,x26,x26,ror#23 + and x17,x27,x26 + bic x19,x20,x26 + add x21,x21,x9 // h+=X[i] + orr x17,x17,x19 // Ch(e,f,g) + eor x19,x22,x23 // a^b, b^c in next round + eor x16,x16,x12,ror#18 // Sigma1(e) + ror x12,x22,#28 + add x21,x21,x17 // h+=Ch(e,f,g) + eor x17,x22,x22,ror#5 + add x21,x21,x16 // h+=Sigma1(e) + and x28,x28,x19 // (b^c)&=(a^b) + add x25,x25,x21 // d+=h + eor x28,x28,x23 // Maj(a,b,c) + eor x17,x12,x17,ror#34 // Sigma0(a) + add x21,x21,x28 // h+=Maj(a,b,c) + ldr x28,[x30],#8 // *K++, x19 in next round + //add x21,x21,x17 // h+=Sigma0(a) +#ifndef __ARMEB__ + rev x10,x10 // 7 +#endif + ldp x11,x12,[x1],#2*8 + add x21,x21,x17 // h+=Sigma0(a) + ror x16,x25,#14 + add x20,x20,x28 // h+=K[i] + eor x13,x25,x25,ror#23 + and x17,x26,x25 + bic x28,x27,x25 + add x20,x20,x10 // h+=X[i] + orr x17,x17,x28 // Ch(e,f,g) + eor x28,x21,x22 // a^b, b^c in next round + eor x16,x16,x13,ror#18 // Sigma1(e) + ror x13,x21,#28 + add x20,x20,x17 // h+=Ch(e,f,g) + eor x17,x21,x21,ror#5 + add x20,x20,x16 // h+=Sigma1(e) + and x19,x19,x28 // (b^c)&=(a^b) + add x24,x24,x20 // d+=h + eor x19,x19,x22 // Maj(a,b,c) + eor x17,x13,x17,ror#34 // Sigma0(a) + add x20,x20,x19 // h+=Maj(a,b,c) + ldr x19,[x30],#8 // *K++, x28 in next round + //add x20,x20,x17 // h+=Sigma0(a) +#ifndef __ARMEB__ + rev x11,x11 // 8 +#endif + add x20,x20,x17 // h+=Sigma0(a) + ror x16,x24,#14 + add x27,x27,x19 // h+=K[i] + eor x14,x24,x24,ror#23 + and x17,x25,x24 + bic x19,x26,x24 + add x27,x27,x11 // h+=X[i] + orr x17,x17,x19 // Ch(e,f,g) + eor x19,x20,x21 // a^b, b^c in next round + eor x16,x16,x14,ror#18 // Sigma1(e) + ror x14,x20,#28 + add x27,x27,x17 // h+=Ch(e,f,g) + eor x17,x20,x20,ror#5 + add x27,x27,x16 // h+=Sigma1(e) + and x28,x28,x19 // (b^c)&=(a^b) + add x23,x23,x27 // d+=h + eor x28,x28,x21 // Maj(a,b,c) + eor x17,x14,x17,ror#34 // Sigma0(a) + add x27,x27,x28 // h+=Maj(a,b,c) + ldr x28,[x30],#8 // *K++, x19 in next round + //add x27,x27,x17 // h+=Sigma0(a) +#ifndef __ARMEB__ + rev x12,x12 // 9 +#endif + ldp x13,x14,[x1],#2*8 + add x27,x27,x17 // h+=Sigma0(a) + ror x16,x23,#14 + add x26,x26,x28 // h+=K[i] + eor x15,x23,x23,ror#23 + and x17,x24,x23 + bic x28,x25,x23 + add x26,x26,x12 // h+=X[i] + orr x17,x17,x28 // Ch(e,f,g) + eor x28,x27,x20 // a^b, b^c in next round + eor x16,x16,x15,ror#18 // Sigma1(e) + ror x15,x27,#28 + add x26,x26,x17 // h+=Ch(e,f,g) + eor x17,x27,x27,ror#5 + add x26,x26,x16 // h+=Sigma1(e) + and x19,x19,x28 // (b^c)&=(a^b) + add x22,x22,x26 // d+=h + eor x19,x19,x20 // Maj(a,b,c) + eor x17,x15,x17,ror#34 // Sigma0(a) + add x26,x26,x19 // h+=Maj(a,b,c) + ldr x19,[x30],#8 // *K++, x28 in next round + //add x26,x26,x17 // h+=Sigma0(a) +#ifndef __ARMEB__ + rev x13,x13 // 10 +#endif + add x26,x26,x17 // h+=Sigma0(a) + ror x16,x22,#14 + add x25,x25,x19 // h+=K[i] + eor x0,x22,x22,ror#23 + and x17,x23,x22 + bic x19,x24,x22 + add x25,x25,x13 // h+=X[i] + orr x17,x17,x19 // Ch(e,f,g) + eor x19,x26,x27 // a^b, b^c in next round + eor x16,x16,x0,ror#18 // Sigma1(e) + ror x0,x26,#28 + add x25,x25,x17 // h+=Ch(e,f,g) + eor x17,x26,x26,ror#5 + add x25,x25,x16 // h+=Sigma1(e) + and x28,x28,x19 // (b^c)&=(a^b) + add x21,x21,x25 // d+=h + eor x28,x28,x27 // Maj(a,b,c) + eor x17,x0,x17,ror#34 // Sigma0(a) + add x25,x25,x28 // h+=Maj(a,b,c) + ldr x28,[x30],#8 // *K++, x19 in next round + //add x25,x25,x17 // h+=Sigma0(a) +#ifndef __ARMEB__ + rev x14,x14 // 11 +#endif + ldp x15,x0,[x1],#2*8 + add x25,x25,x17 // h+=Sigma0(a) + str x6,[sp,#24] + ror x16,x21,#14 + add x24,x24,x28 // h+=K[i] + eor x6,x21,x21,ror#23 + and x17,x22,x21 + bic x28,x23,x21 + add x24,x24,x14 // h+=X[i] + orr x17,x17,x28 // Ch(e,f,g) + eor x28,x25,x26 // a^b, b^c in next round + eor x16,x16,x6,ror#18 // Sigma1(e) + ror x6,x25,#28 + add x24,x24,x17 // h+=Ch(e,f,g) + eor x17,x25,x25,ror#5 + add x24,x24,x16 // h+=Sigma1(e) + and x19,x19,x28 // (b^c)&=(a^b) + add x20,x20,x24 // d+=h + eor x19,x19,x26 // Maj(a,b,c) + eor x17,x6,x17,ror#34 // Sigma0(a) + add x24,x24,x19 // h+=Maj(a,b,c) + ldr x19,[x30],#8 // *K++, x28 in next round + //add x24,x24,x17 // h+=Sigma0(a) +#ifndef __ARMEB__ + rev x15,x15 // 12 +#endif + add x24,x24,x17 // h+=Sigma0(a) + str x7,[sp,#0] + ror x16,x20,#14 + add x23,x23,x19 // h+=K[i] + eor x7,x20,x20,ror#23 + and x17,x21,x20 + bic x19,x22,x20 + add x23,x23,x15 // h+=X[i] + orr x17,x17,x19 // Ch(e,f,g) + eor x19,x24,x25 // a^b, b^c in next round + eor x16,x16,x7,ror#18 // Sigma1(e) + ror x7,x24,#28 + add x23,x23,x17 // h+=Ch(e,f,g) + eor x17,x24,x24,ror#5 + add x23,x23,x16 // h+=Sigma1(e) + and x28,x28,x19 // (b^c)&=(a^b) + add x27,x27,x23 // d+=h + eor x28,x28,x25 // Maj(a,b,c) + eor x17,x7,x17,ror#34 // Sigma0(a) + add x23,x23,x28 // h+=Maj(a,b,c) + ldr x28,[x30],#8 // *K++, x19 in next round + //add x23,x23,x17 // h+=Sigma0(a) +#ifndef __ARMEB__ + rev x0,x0 // 13 +#endif + ldp x1,x2,[x1] + add x23,x23,x17 // h+=Sigma0(a) + str x8,[sp,#8] + ror x16,x27,#14 + add x22,x22,x28 // h+=K[i] + eor x8,x27,x27,ror#23 + and x17,x20,x27 + bic x28,x21,x27 + add x22,x22,x0 // h+=X[i] + orr x17,x17,x28 // Ch(e,f,g) + eor x28,x23,x24 // a^b, b^c in next round + eor x16,x16,x8,ror#18 // Sigma1(e) + ror x8,x23,#28 + add x22,x22,x17 // h+=Ch(e,f,g) + eor x17,x23,x23,ror#5 + add x22,x22,x16 // h+=Sigma1(e) + and x19,x19,x28 // (b^c)&=(a^b) + add x26,x26,x22 // d+=h + eor x19,x19,x24 // Maj(a,b,c) + eor x17,x8,x17,ror#34 // Sigma0(a) + add x22,x22,x19 // h+=Maj(a,b,c) + ldr x19,[x30],#8 // *K++, x28 in next round + //add x22,x22,x17 // h+=Sigma0(a) +#ifndef __ARMEB__ + rev x1,x1 // 14 +#endif + ldr x6,[sp,#24] + add x22,x22,x17 // h+=Sigma0(a) + str x9,[sp,#16] + ror x16,x26,#14 + add x21,x21,x19 // h+=K[i] + eor x9,x26,x26,ror#23 + and x17,x27,x26 + bic x19,x20,x26 + add x21,x21,x1 // h+=X[i] + orr x17,x17,x19 // Ch(e,f,g) + eor x19,x22,x23 // a^b, b^c in next round + eor x16,x16,x9,ror#18 // Sigma1(e) + ror x9,x22,#28 + add x21,x21,x17 // h+=Ch(e,f,g) + eor x17,x22,x22,ror#5 + add x21,x21,x16 // h+=Sigma1(e) + and x28,x28,x19 // (b^c)&=(a^b) + add x25,x25,x21 // d+=h + eor x28,x28,x23 // Maj(a,b,c) + eor x17,x9,x17,ror#34 // Sigma0(a) + add x21,x21,x28 // h+=Maj(a,b,c) + ldr x28,[x30],#8 // *K++, x19 in next round + //add x21,x21,x17 // h+=Sigma0(a) +#ifndef __ARMEB__ + rev x2,x2 // 15 +#endif + ldr x7,[sp,#0] + add x21,x21,x17 // h+=Sigma0(a) + str x10,[sp,#24] + ror x16,x25,#14 + add x20,x20,x28 // h+=K[i] + ror x9,x4,#1 + and x17,x26,x25 + ror x8,x1,#19 + bic x28,x27,x25 + ror x10,x21,#28 + add x20,x20,x2 // h+=X[i] + eor x16,x16,x25,ror#18 + eor x9,x9,x4,ror#8 + orr x17,x17,x28 // Ch(e,f,g) + eor x28,x21,x22 // a^b, b^c in next round + eor x16,x16,x25,ror#41 // Sigma1(e) + eor x10,x10,x21,ror#34 + add x20,x20,x17 // h+=Ch(e,f,g) + and x19,x19,x28 // (b^c)&=(a^b) + eor x8,x8,x1,ror#61 + eor x9,x9,x4,lsr#7 // sigma0(X[i+1]) + add x20,x20,x16 // h+=Sigma1(e) + eor x19,x19,x22 // Maj(a,b,c) + eor x17,x10,x21,ror#39 // Sigma0(a) + eor x8,x8,x1,lsr#6 // sigma1(X[i+14]) + add x3,x3,x12 + add x24,x24,x20 // d+=h + add x20,x20,x19 // h+=Maj(a,b,c) + ldr x19,[x30],#8 // *K++, x28 in next round + add x3,x3,x9 + add x20,x20,x17 // h+=Sigma0(a) + add x3,x3,x8 +.Loop_16_xx: + ldr x8,[sp,#8] + str x11,[sp,#0] + ror x16,x24,#14 + add x27,x27,x19 // h+=K[i] + ror x10,x5,#1 + and x17,x25,x24 + ror x9,x2,#19 + bic x19,x26,x24 + ror x11,x20,#28 + add x27,x27,x3 // h+=X[i] + eor x16,x16,x24,ror#18 + eor x10,x10,x5,ror#8 + orr x17,x17,x19 // Ch(e,f,g) + eor x19,x20,x21 // a^b, b^c in next round + eor x16,x16,x24,ror#41 // Sigma1(e) + eor x11,x11,x20,ror#34 + add x27,x27,x17 // h+=Ch(e,f,g) + and x28,x28,x19 // (b^c)&=(a^b) + eor x9,x9,x2,ror#61 + eor x10,x10,x5,lsr#7 // sigma0(X[i+1]) + add x27,x27,x16 // h+=Sigma1(e) + eor x28,x28,x21 // Maj(a,b,c) + eor x17,x11,x20,ror#39 // Sigma0(a) + eor x9,x9,x2,lsr#6 // sigma1(X[i+14]) + add x4,x4,x13 + add x23,x23,x27 // d+=h + add x27,x27,x28 // h+=Maj(a,b,c) + ldr x28,[x30],#8 // *K++, x19 in next round + add x4,x4,x10 + add x27,x27,x17 // h+=Sigma0(a) + add x4,x4,x9 + ldr x9,[sp,#16] + str x12,[sp,#8] + ror x16,x23,#14 + add x26,x26,x28 // h+=K[i] + ror x11,x6,#1 + and x17,x24,x23 + ror x10,x3,#19 + bic x28,x25,x23 + ror x12,x27,#28 + add x26,x26,x4 // h+=X[i] + eor x16,x16,x23,ror#18 + eor x11,x11,x6,ror#8 + orr x17,x17,x28 // Ch(e,f,g) + eor x28,x27,x20 // a^b, b^c in next round + eor x16,x16,x23,ror#41 // Sigma1(e) + eor x12,x12,x27,ror#34 + add x26,x26,x17 // h+=Ch(e,f,g) + and x19,x19,x28 // (b^c)&=(a^b) + eor x10,x10,x3,ror#61 + eor x11,x11,x6,lsr#7 // sigma0(X[i+1]) + add x26,x26,x16 // h+=Sigma1(e) + eor x19,x19,x20 // Maj(a,b,c) + eor x17,x12,x27,ror#39 // Sigma0(a) + eor x10,x10,x3,lsr#6 // sigma1(X[i+14]) + add x5,x5,x14 + add x22,x22,x26 // d+=h + add x26,x26,x19 // h+=Maj(a,b,c) + ldr x19,[x30],#8 // *K++, x28 in next round + add x5,x5,x11 + add x26,x26,x17 // h+=Sigma0(a) + add x5,x5,x10 + ldr x10,[sp,#24] + str x13,[sp,#16] + ror x16,x22,#14 + add x25,x25,x19 // h+=K[i] + ror x12,x7,#1 + and x17,x23,x22 + ror x11,x4,#19 + bic x19,x24,x22 + ror x13,x26,#28 + add x25,x25,x5 // h+=X[i] + eor x16,x16,x22,ror#18 + eor x12,x12,x7,ror#8 + orr x17,x17,x19 // Ch(e,f,g) + eor x19,x26,x27 // a^b, b^c in next round + eor x16,x16,x22,ror#41 // Sigma1(e) + eor x13,x13,x26,ror#34 + add x25,x25,x17 // h+=Ch(e,f,g) + and x28,x28,x19 // (b^c)&=(a^b) + eor x11,x11,x4,ror#61 + eor x12,x12,x7,lsr#7 // sigma0(X[i+1]) + add x25,x25,x16 // h+=Sigma1(e) + eor x28,x28,x27 // Maj(a,b,c) + eor x17,x13,x26,ror#39 // Sigma0(a) + eor x11,x11,x4,lsr#6 // sigma1(X[i+14]) + add x6,x6,x15 + add x21,x21,x25 // d+=h + add x25,x25,x28 // h+=Maj(a,b,c) + ldr x28,[x30],#8 // *K++, x19 in next round + add x6,x6,x12 + add x25,x25,x17 // h+=Sigma0(a) + add x6,x6,x11 + ldr x11,[sp,#0] + str x14,[sp,#24] + ror x16,x21,#14 + add x24,x24,x28 // h+=K[i] + ror x13,x8,#1 + and x17,x22,x21 + ror x12,x5,#19 + bic x28,x23,x21 + ror x14,x25,#28 + add x24,x24,x6 // h+=X[i] + eor x16,x16,x21,ror#18 + eor x13,x13,x8,ror#8 + orr x17,x17,x28 // Ch(e,f,g) + eor x28,x25,x26 // a^b, b^c in next round + eor x16,x16,x21,ror#41 // Sigma1(e) + eor x14,x14,x25,ror#34 + add x24,x24,x17 // h+=Ch(e,f,g) + and x19,x19,x28 // (b^c)&=(a^b) + eor x12,x12,x5,ror#61 + eor x13,x13,x8,lsr#7 // sigma0(X[i+1]) + add x24,x24,x16 // h+=Sigma1(e) + eor x19,x19,x26 // Maj(a,b,c) + eor x17,x14,x25,ror#39 // Sigma0(a) + eor x12,x12,x5,lsr#6 // sigma1(X[i+14]) + add x7,x7,x0 + add x20,x20,x24 // d+=h + add x24,x24,x19 // h+=Maj(a,b,c) + ldr x19,[x30],#8 // *K++, x28 in next round + add x7,x7,x13 + add x24,x24,x17 // h+=Sigma0(a) + add x7,x7,x12 + ldr x12,[sp,#8] + str x15,[sp,#0] + ror x16,x20,#14 + add x23,x23,x19 // h+=K[i] + ror x14,x9,#1 + and x17,x21,x20 + ror x13,x6,#19 + bic x19,x22,x20 + ror x15,x24,#28 + add x23,x23,x7 // h+=X[i] + eor x16,x16,x20,ror#18 + eor x14,x14,x9,ror#8 + orr x17,x17,x19 // Ch(e,f,g) + eor x19,x24,x25 // a^b, b^c in next round + eor x16,x16,x20,ror#41 // Sigma1(e) + eor x15,x15,x24,ror#34 + add x23,x23,x17 // h+=Ch(e,f,g) + and x28,x28,x19 // (b^c)&=(a^b) + eor x13,x13,x6,ror#61 + eor x14,x14,x9,lsr#7 // sigma0(X[i+1]) + add x23,x23,x16 // h+=Sigma1(e) + eor x28,x28,x25 // Maj(a,b,c) + eor x17,x15,x24,ror#39 // Sigma0(a) + eor x13,x13,x6,lsr#6 // sigma1(X[i+14]) + add x8,x8,x1 + add x27,x27,x23 // d+=h + add x23,x23,x28 // h+=Maj(a,b,c) + ldr x28,[x30],#8 // *K++, x19 in next round + add x8,x8,x14 + add x23,x23,x17 // h+=Sigma0(a) + add x8,x8,x13 + ldr x13,[sp,#16] + str x0,[sp,#8] + ror x16,x27,#14 + add x22,x22,x28 // h+=K[i] + ror x15,x10,#1 + and x17,x20,x27 + ror x14,x7,#19 + bic x28,x21,x27 + ror x0,x23,#28 + add x22,x22,x8 // h+=X[i] + eor x16,x16,x27,ror#18 + eor x15,x15,x10,ror#8 + orr x17,x17,x28 // Ch(e,f,g) + eor x28,x23,x24 // a^b, b^c in next round + eor x16,x16,x27,ror#41 // Sigma1(e) + eor x0,x0,x23,ror#34 + add x22,x22,x17 // h+=Ch(e,f,g) + and x19,x19,x28 // (b^c)&=(a^b) + eor x14,x14,x7,ror#61 + eor x15,x15,x10,lsr#7 // sigma0(X[i+1]) + add x22,x22,x16 // h+=Sigma1(e) + eor x19,x19,x24 // Maj(a,b,c) + eor x17,x0,x23,ror#39 // Sigma0(a) + eor x14,x14,x7,lsr#6 // sigma1(X[i+14]) + add x9,x9,x2 + add x26,x26,x22 // d+=h + add x22,x22,x19 // h+=Maj(a,b,c) + ldr x19,[x30],#8 // *K++, x28 in next round + add x9,x9,x15 + add x22,x22,x17 // h+=Sigma0(a) + add x9,x9,x14 + ldr x14,[sp,#24] + str x1,[sp,#16] + ror x16,x26,#14 + add x21,x21,x19 // h+=K[i] + ror x0,x11,#1 + and x17,x27,x26 + ror x15,x8,#19 + bic x19,x20,x26 + ror x1,x22,#28 + add x21,x21,x9 // h+=X[i] + eor x16,x16,x26,ror#18 + eor x0,x0,x11,ror#8 + orr x17,x17,x19 // Ch(e,f,g) + eor x19,x22,x23 // a^b, b^c in next round + eor x16,x16,x26,ror#41 // Sigma1(e) + eor x1,x1,x22,ror#34 + add x21,x21,x17 // h+=Ch(e,f,g) + and x28,x28,x19 // (b^c)&=(a^b) + eor x15,x15,x8,ror#61 + eor x0,x0,x11,lsr#7 // sigma0(X[i+1]) + add x21,x21,x16 // h+=Sigma1(e) + eor x28,x28,x23 // Maj(a,b,c) + eor x17,x1,x22,ror#39 // Sigma0(a) + eor x15,x15,x8,lsr#6 // sigma1(X[i+14]) + add x10,x10,x3 + add x25,x25,x21 // d+=h + add x21,x21,x28 // h+=Maj(a,b,c) + ldr x28,[x30],#8 // *K++, x19 in next round + add x10,x10,x0 + add x21,x21,x17 // h+=Sigma0(a) + add x10,x10,x15 + ldr x15,[sp,#0] + str x2,[sp,#24] + ror x16,x25,#14 + add x20,x20,x28 // h+=K[i] + ror x1,x12,#1 + and x17,x26,x25 + ror x0,x9,#19 + bic x28,x27,x25 + ror x2,x21,#28 + add x20,x20,x10 // h+=X[i] + eor x16,x16,x25,ror#18 + eor x1,x1,x12,ror#8 + orr x17,x17,x28 // Ch(e,f,g) + eor x28,x21,x22 // a^b, b^c in next round + eor x16,x16,x25,ror#41 // Sigma1(e) + eor x2,x2,x21,ror#34 + add x20,x20,x17 // h+=Ch(e,f,g) + and x19,x19,x28 // (b^c)&=(a^b) + eor x0,x0,x9,ror#61 + eor x1,x1,x12,lsr#7 // sigma0(X[i+1]) + add x20,x20,x16 // h+=Sigma1(e) + eor x19,x19,x22 // Maj(a,b,c) + eor x17,x2,x21,ror#39 // Sigma0(a) + eor x0,x0,x9,lsr#6 // sigma1(X[i+14]) + add x11,x11,x4 + add x24,x24,x20 // d+=h + add x20,x20,x19 // h+=Maj(a,b,c) + ldr x19,[x30],#8 // *K++, x28 in next round + add x11,x11,x1 + add x20,x20,x17 // h+=Sigma0(a) + add x11,x11,x0 + ldr x0,[sp,#8] + str x3,[sp,#0] + ror x16,x24,#14 + add x27,x27,x19 // h+=K[i] + ror x2,x13,#1 + and x17,x25,x24 + ror x1,x10,#19 + bic x19,x26,x24 + ror x3,x20,#28 + add x27,x27,x11 // h+=X[i] + eor x16,x16,x24,ror#18 + eor x2,x2,x13,ror#8 + orr x17,x17,x19 // Ch(e,f,g) + eor x19,x20,x21 // a^b, b^c in next round + eor x16,x16,x24,ror#41 // Sigma1(e) + eor x3,x3,x20,ror#34 + add x27,x27,x17 // h+=Ch(e,f,g) + and x28,x28,x19 // (b^c)&=(a^b) + eor x1,x1,x10,ror#61 + eor x2,x2,x13,lsr#7 // sigma0(X[i+1]) + add x27,x27,x16 // h+=Sigma1(e) + eor x28,x28,x21 // Maj(a,b,c) + eor x17,x3,x20,ror#39 // Sigma0(a) + eor x1,x1,x10,lsr#6 // sigma1(X[i+14]) + add x12,x12,x5 + add x23,x23,x27 // d+=h + add x27,x27,x28 // h+=Maj(a,b,c) + ldr x28,[x30],#8 // *K++, x19 in next round + add x12,x12,x2 + add x27,x27,x17 // h+=Sigma0(a) + add x12,x12,x1 + ldr x1,[sp,#16] + str x4,[sp,#8] + ror x16,x23,#14 + add x26,x26,x28 // h+=K[i] + ror x3,x14,#1 + and x17,x24,x23 + ror x2,x11,#19 + bic x28,x25,x23 + ror x4,x27,#28 + add x26,x26,x12 // h+=X[i] + eor x16,x16,x23,ror#18 + eor x3,x3,x14,ror#8 + orr x17,x17,x28 // Ch(e,f,g) + eor x28,x27,x20 // a^b, b^c in next round + eor x16,x16,x23,ror#41 // Sigma1(e) + eor x4,x4,x27,ror#34 + add x26,x26,x17 // h+=Ch(e,f,g) + and x19,x19,x28 // (b^c)&=(a^b) + eor x2,x2,x11,ror#61 + eor x3,x3,x14,lsr#7 // sigma0(X[i+1]) + add x26,x26,x16 // h+=Sigma1(e) + eor x19,x19,x20 // Maj(a,b,c) + eor x17,x4,x27,ror#39 // Sigma0(a) + eor x2,x2,x11,lsr#6 // sigma1(X[i+14]) + add x13,x13,x6 + add x22,x22,x26 // d+=h + add x26,x26,x19 // h+=Maj(a,b,c) + ldr x19,[x30],#8 // *K++, x28 in next round + add x13,x13,x3 + add x26,x26,x17 // h+=Sigma0(a) + add x13,x13,x2 + ldr x2,[sp,#24] + str x5,[sp,#16] + ror x16,x22,#14 + add x25,x25,x19 // h+=K[i] + ror x4,x15,#1 + and x17,x23,x22 + ror x3,x12,#19 + bic x19,x24,x22 + ror x5,x26,#28 + add x25,x25,x13 // h+=X[i] + eor x16,x16,x22,ror#18 + eor x4,x4,x15,ror#8 + orr x17,x17,x19 // Ch(e,f,g) + eor x19,x26,x27 // a^b, b^c in next round + eor x16,x16,x22,ror#41 // Sigma1(e) + eor x5,x5,x26,ror#34 + add x25,x25,x17 // h+=Ch(e,f,g) + and x28,x28,x19 // (b^c)&=(a^b) + eor x3,x3,x12,ror#61 + eor x4,x4,x15,lsr#7 // sigma0(X[i+1]) + add x25,x25,x16 // h+=Sigma1(e) + eor x28,x28,x27 // Maj(a,b,c) + eor x17,x5,x26,ror#39 // Sigma0(a) + eor x3,x3,x12,lsr#6 // sigma1(X[i+14]) + add x14,x14,x7 + add x21,x21,x25 // d+=h + add x25,x25,x28 // h+=Maj(a,b,c) + ldr x28,[x30],#8 // *K++, x19 in next round + add x14,x14,x4 + add x25,x25,x17 // h+=Sigma0(a) + add x14,x14,x3 + ldr x3,[sp,#0] + str x6,[sp,#24] + ror x16,x21,#14 + add x24,x24,x28 // h+=K[i] + ror x5,x0,#1 + and x17,x22,x21 + ror x4,x13,#19 + bic x28,x23,x21 + ror x6,x25,#28 + add x24,x24,x14 // h+=X[i] + eor x16,x16,x21,ror#18 + eor x5,x5,x0,ror#8 + orr x17,x17,x28 // Ch(e,f,g) + eor x28,x25,x26 // a^b, b^c in next round + eor x16,x16,x21,ror#41 // Sigma1(e) + eor x6,x6,x25,ror#34 + add x24,x24,x17 // h+=Ch(e,f,g) + and x19,x19,x28 // (b^c)&=(a^b) + eor x4,x4,x13,ror#61 + eor x5,x5,x0,lsr#7 // sigma0(X[i+1]) + add x24,x24,x16 // h+=Sigma1(e) + eor x19,x19,x26 // Maj(a,b,c) + eor x17,x6,x25,ror#39 // Sigma0(a) + eor x4,x4,x13,lsr#6 // sigma1(X[i+14]) + add x15,x15,x8 + add x20,x20,x24 // d+=h + add x24,x24,x19 // h+=Maj(a,b,c) + ldr x19,[x30],#8 // *K++, x28 in next round + add x15,x15,x5 + add x24,x24,x17 // h+=Sigma0(a) + add x15,x15,x4 + ldr x4,[sp,#8] + str x7,[sp,#0] + ror x16,x20,#14 + add x23,x23,x19 // h+=K[i] + ror x6,x1,#1 + and x17,x21,x20 + ror x5,x14,#19 + bic x19,x22,x20 + ror x7,x24,#28 + add x23,x23,x15 // h+=X[i] + eor x16,x16,x20,ror#18 + eor x6,x6,x1,ror#8 + orr x17,x17,x19 // Ch(e,f,g) + eor x19,x24,x25 // a^b, b^c in next round + eor x16,x16,x20,ror#41 // Sigma1(e) + eor x7,x7,x24,ror#34 + add x23,x23,x17 // h+=Ch(e,f,g) + and x28,x28,x19 // (b^c)&=(a^b) + eor x5,x5,x14,ror#61 + eor x6,x6,x1,lsr#7 // sigma0(X[i+1]) + add x23,x23,x16 // h+=Sigma1(e) + eor x28,x28,x25 // Maj(a,b,c) + eor x17,x7,x24,ror#39 // Sigma0(a) + eor x5,x5,x14,lsr#6 // sigma1(X[i+14]) + add x0,x0,x9 + add x27,x27,x23 // d+=h + add x23,x23,x28 // h+=Maj(a,b,c) + ldr x28,[x30],#8 // *K++, x19 in next round + add x0,x0,x6 + add x23,x23,x17 // h+=Sigma0(a) + add x0,x0,x5 + ldr x5,[sp,#16] + str x8,[sp,#8] + ror x16,x27,#14 + add x22,x22,x28 // h+=K[i] + ror x7,x2,#1 + and x17,x20,x27 + ror x6,x15,#19 + bic x28,x21,x27 + ror x8,x23,#28 + add x22,x22,x0 // h+=X[i] + eor x16,x16,x27,ror#18 + eor x7,x7,x2,ror#8 + orr x17,x17,x28 // Ch(e,f,g) + eor x28,x23,x24 // a^b, b^c in next round + eor x16,x16,x27,ror#41 // Sigma1(e) + eor x8,x8,x23,ror#34 + add x22,x22,x17 // h+=Ch(e,f,g) + and x19,x19,x28 // (b^c)&=(a^b) + eor x6,x6,x15,ror#61 + eor x7,x7,x2,lsr#7 // sigma0(X[i+1]) + add x22,x22,x16 // h+=Sigma1(e) + eor x19,x19,x24 // Maj(a,b,c) + eor x17,x8,x23,ror#39 // Sigma0(a) + eor x6,x6,x15,lsr#6 // sigma1(X[i+14]) + add x1,x1,x10 + add x26,x26,x22 // d+=h + add x22,x22,x19 // h+=Maj(a,b,c) + ldr x19,[x30],#8 // *K++, x28 in next round + add x1,x1,x7 + add x22,x22,x17 // h+=Sigma0(a) + add x1,x1,x6 + ldr x6,[sp,#24] + str x9,[sp,#16] + ror x16,x26,#14 + add x21,x21,x19 // h+=K[i] + ror x8,x3,#1 + and x17,x27,x26 + ror x7,x0,#19 + bic x19,x20,x26 + ror x9,x22,#28 + add x21,x21,x1 // h+=X[i] + eor x16,x16,x26,ror#18 + eor x8,x8,x3,ror#8 + orr x17,x17,x19 // Ch(e,f,g) + eor x19,x22,x23 // a^b, b^c in next round + eor x16,x16,x26,ror#41 // Sigma1(e) + eor x9,x9,x22,ror#34 + add x21,x21,x17 // h+=Ch(e,f,g) + and x28,x28,x19 // (b^c)&=(a^b) + eor x7,x7,x0,ror#61 + eor x8,x8,x3,lsr#7 // sigma0(X[i+1]) + add x21,x21,x16 // h+=Sigma1(e) + eor x28,x28,x23 // Maj(a,b,c) + eor x17,x9,x22,ror#39 // Sigma0(a) + eor x7,x7,x0,lsr#6 // sigma1(X[i+14]) + add x2,x2,x11 + add x25,x25,x21 // d+=h + add x21,x21,x28 // h+=Maj(a,b,c) + ldr x28,[x30],#8 // *K++, x19 in next round + add x2,x2,x8 + add x21,x21,x17 // h+=Sigma0(a) + add x2,x2,x7 + ldr x7,[sp,#0] + str x10,[sp,#24] + ror x16,x25,#14 + add x20,x20,x28 // h+=K[i] + ror x9,x4,#1 + and x17,x26,x25 + ror x8,x1,#19 + bic x28,x27,x25 + ror x10,x21,#28 + add x20,x20,x2 // h+=X[i] + eor x16,x16,x25,ror#18 + eor x9,x9,x4,ror#8 + orr x17,x17,x28 // Ch(e,f,g) + eor x28,x21,x22 // a^b, b^c in next round + eor x16,x16,x25,ror#41 // Sigma1(e) + eor x10,x10,x21,ror#34 + add x20,x20,x17 // h+=Ch(e,f,g) + and x19,x19,x28 // (b^c)&=(a^b) + eor x8,x8,x1,ror#61 + eor x9,x9,x4,lsr#7 // sigma0(X[i+1]) + add x20,x20,x16 // h+=Sigma1(e) + eor x19,x19,x22 // Maj(a,b,c) + eor x17,x10,x21,ror#39 // Sigma0(a) + eor x8,x8,x1,lsr#6 // sigma1(X[i+14]) + add x3,x3,x12 + add x24,x24,x20 // d+=h + add x20,x20,x19 // h+=Maj(a,b,c) + ldr x19,[x30],#8 // *K++, x28 in next round + add x3,x3,x9 + add x20,x20,x17 // h+=Sigma0(a) + add x3,x3,x8 + cbnz x19,.Loop_16_xx + + ldp x0,x2,[x29,#96] + ldr x1,[x29,#112] + sub x30,x30,#648 // rewind + + ldp x3,x4,[x0] + ldp x5,x6,[x0,#2*8] + add x1,x1,#14*8 // advance input pointer + ldp x7,x8,[x0,#4*8] + add x20,x20,x3 + ldp x9,x10,[x0,#6*8] + add x21,x21,x4 + add x22,x22,x5 + add x23,x23,x6 + stp x20,x21,[x0] + add x24,x24,x7 + add x25,x25,x8 + stp x22,x23,[x0,#2*8] + add x26,x26,x9 + add x27,x27,x10 + cmp x1,x2 + stp x24,x25,[x0,#4*8] + stp x26,x27,[x0,#6*8] + b.ne .Loop + + ldp x19,x20,[x29,#16] + add sp,sp,#4*8 + ldp x21,x22,[x29,#32] + ldp x23,x24,[x29,#48] + ldp x25,x26,[x29,#64] + ldp x27,x28,[x29,#80] + ldp x29,x30,[sp],#128 + ret +.size sha512_block_data_order,.-sha512_block_data_order + +.align 6 +.type K512,%object +K512: + .quad 0x428a2f98d728ae22,0x7137449123ef65cd + .quad 0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc + .quad 0x3956c25bf348b538,0x59f111f1b605d019 + .quad 0x923f82a4af194f9b,0xab1c5ed5da6d8118 + .quad 0xd807aa98a3030242,0x12835b0145706fbe + .quad 0x243185be4ee4b28c,0x550c7dc3d5ffb4e2 + .quad 0x72be5d74f27b896f,0x80deb1fe3b1696b1 + .quad 0x9bdc06a725c71235,0xc19bf174cf692694 + .quad 0xe49b69c19ef14ad2,0xefbe4786384f25e3 + .quad 0x0fc19dc68b8cd5b5,0x240ca1cc77ac9c65 + .quad 0x2de92c6f592b0275,0x4a7484aa6ea6e483 + .quad 0x5cb0a9dcbd41fbd4,0x76f988da831153b5 + .quad 0x983e5152ee66dfab,0xa831c66d2db43210 + .quad 0xb00327c898fb213f,0xbf597fc7beef0ee4 + .quad 0xc6e00bf33da88fc2,0xd5a79147930aa725 + .quad 0x06ca6351e003826f,0x142929670a0e6e70 + .quad 0x27b70a8546d22ffc,0x2e1b21385c26c926 + .quad 0x4d2c6dfc5ac42aed,0x53380d139d95b3df + .quad 0x650a73548baf63de,0x766a0abb3c77b2a8 + .quad 0x81c2c92e47edaee6,0x92722c851482353b + .quad 0xa2bfe8a14cf10364,0xa81a664bbc423001 + .quad 0xc24b8b70d0f89791,0xc76c51a30654be30 + .quad 0xd192e819d6ef5218,0xd69906245565a910 + .quad 0xf40e35855771202a,0x106aa07032bbd1b8 + .quad 0x19a4c116b8d2d0c8,0x1e376c085141ab53 + .quad 0x2748774cdf8eeb99,0x34b0bcb5e19b48a8 + .quad 0x391c0cb3c5c95a63,0x4ed8aa4ae3418acb + .quad 0x5b9cca4f7763e373,0x682e6ff3d6b2b8a3 + .quad 0x748f82ee5defb2fc,0x78a5636f43172f60 + .quad 0x84c87814a1f0ab72,0x8cc702081a6439ec + .quad 0x90befffa23631e28,0xa4506cebde82bde9 + .quad 0xbef9a3f7b2c67915,0xc67178f2e372532b + .quad 0xca273eceea26619c,0xd186b8c721c0c207 + .quad 0xeada7dd6cde0eb1e,0xf57d4f7fee6ed178 + .quad 0x06f067aa72176fba,0x0a637dc5a2c898a6 + .quad 0x113f9804bef90dae,0x1b710b35131c471b + .quad 0x28db77f523047d84,0x32caab7b40c72493 + .quad 0x3c9ebe0a15c9bebc,0x431d67c49c100d4c + .quad 0x4cc5d4becb3e42b6,0x597f299cfc657e2a + .quad 0x5fcb6fab3ad6faec,0x6c44198c4a475817 + .quad 0 // terminator +.size K512,.-K512 +.align 3 +.LOPENSSL_armcap_P: + .quad OPENSSL_armcap_P-. +.asciz "SHA512 block transform for ARMv8, CRYPTOGAMS by " +.align 2 +.comm OPENSSL_armcap_P,4,4 diff --git a/deps/openssl/asm/x64-elf-gas/aes/aes-x86_64.s b/deps/openssl/asm/x64-elf-gas/aes/aes-x86_64.s index 49bf32eed31eee..0bdfe91fc530bc 100644 --- a/deps/openssl/asm/x64-elf-gas/aes/aes-x86_64.s +++ b/deps/openssl/asm/x64-elf-gas/aes/aes-x86_64.s @@ -1,5 +1,4 @@ .text - .type _x86_64_AES_encrypt,@function .align 16 _x86_64_AES_encrypt: @@ -152,7 +151,6 @@ _x86_64_AES_encrypt: xorl %r12d,%ecx xorl %r8d,%edx .byte 0xf3,0xc3 - .size _x86_64_AES_encrypt,.-_x86_64_AES_encrypt .type _x86_64_AES_encrypt_compact,@function .align 16 @@ -177,80 +175,78 @@ _x86_64_AES_encrypt_compact: movzbl %al,%r10d movzbl %bl,%r11d movzbl %cl,%r12d - movzbl (%r14,%r10,1),%r10d - movzbl (%r14,%r11,1),%r11d - movzbl (%r14,%r12,1),%r12d - movzbl %dl,%r8d movzbl %bh,%esi movzbl %ch,%edi + shrl $16,%ecx + movzbl %dh,%ebp + movzbl (%r14,%r10,1),%r10d + movzbl (%r14,%r11,1),%r11d + movzbl (%r14,%r12,1),%r12d movzbl (%r14,%r8,1),%r8d - movzbl (%r14,%rsi,1),%r9d - movzbl (%r14,%rdi,1),%r13d - movzbl %dh,%ebp + movzbl (%r14,%rsi,1),%r9d movzbl %ah,%esi - shrl $16,%ecx + movzbl (%r14,%rdi,1),%r13d + movzbl %cl,%edi movzbl (%r14,%rbp,1),%ebp movzbl (%r14,%rsi,1),%esi - shrl $16,%edx - movzbl %cl,%edi shll $8,%r9d + shrl $16,%edx shll $8,%r13d - movzbl (%r14,%rdi,1),%edi xorl %r9d,%r10d - xorl %r13d,%r11d - - movzbl %dl,%r9d shrl $16,%eax + movzbl %dl,%r9d shrl $16,%ebx - movzbl %al,%r13d + xorl %r13d,%r11d shll $8,%ebp - shll $8,%esi - movzbl (%r14,%r9,1),%r9d - movzbl (%r14,%r13,1),%r13d + movzbl %al,%r13d + movzbl (%r14,%rdi,1),%edi xorl %ebp,%r12d - xorl %esi,%r8d + shll $8,%esi movzbl %bl,%ebp - movzbl %dh,%esi shll $16,%edi - movzbl (%r14,%rbp,1),%ebp - movzbl (%r14,%rsi,1),%esi + xorl %esi,%r8d + movzbl (%r14,%r9,1),%r9d + movzbl %dh,%esi + movzbl (%r14,%r13,1),%r13d xorl %edi,%r10d - movzbl %ah,%edi shrl $8,%ecx + movzbl %ah,%edi + shll $16,%r9d shrl $8,%ebx + shll $16,%r13d + xorl %r9d,%r11d + movzbl (%r14,%rbp,1),%ebp + movzbl (%r14,%rsi,1),%esi movzbl (%r14,%rdi,1),%edi movzbl (%r14,%rcx,1),%edx movzbl (%r14,%rbx,1),%ecx - shll $16,%r9d - shll $16,%r13d + shll $16,%ebp - xorl %r9d,%r11d xorl %r13d,%r12d - xorl %ebp,%r8d - shll $24,%esi + xorl %ebp,%r8d shll $24,%edi - shll $24,%edx xorl %esi,%r10d - shll $24,%ecx + shll $24,%edx xorl %edi,%r11d + shll $24,%ecx movl %r10d,%eax movl %r11d,%ebx xorl %r12d,%ecx xorl %r8d,%edx cmpq 16(%rsp),%r15 je .Lenc_compact_done - movl %eax,%esi - movl %ebx,%edi - andl $2155905152,%esi - andl $2155905152,%edi - movl %esi,%r10d - movl %edi,%r11d + movl $2155905152,%r10d + movl $2155905152,%r11d + andl %eax,%r10d + andl %ebx,%r11d + movl %r10d,%esi + movl %r11d,%edi shrl $7,%r10d leal (%rax,%rax,1),%r8d shrl $7,%r11d @@ -268,25 +264,25 @@ _x86_64_AES_encrypt_compact: xorl %r8d,%eax xorl %r9d,%ebx - movl %ecx,%esi - movl %edx,%edi + movl $2155905152,%r12d roll $24,%eax + movl $2155905152,%ebp roll $24,%ebx - andl $2155905152,%esi - andl $2155905152,%edi + andl %ecx,%r12d + andl %edx,%ebp xorl %r8d,%eax xorl %r9d,%ebx - movl %esi,%r12d - movl %edi,%ebp + movl %r12d,%esi rorl $16,%r10d + movl %ebp,%edi rorl $16,%r11d - shrl $7,%r12d leal (%rcx,%rcx,1),%r8d + shrl $7,%r12d xorl %r10d,%eax - xorl %r11d,%ebx shrl $7,%ebp - leal (%rdx,%rdx,1),%r9d + xorl %r11d,%ebx rorl $8,%r10d + leal (%rdx,%rdx,1),%r9d rorl $8,%r11d subl %r12d,%esi subl %ebp,%edi @@ -302,23 +298,23 @@ _x86_64_AES_encrypt_compact: xorl %esi,%r8d xorl %edi,%r9d + rorl $16,%r12d xorl %r8d,%ecx + rorl $16,%ebp xorl %r9d,%edx roll $24,%ecx + movl 0(%r14),%esi roll $24,%edx xorl %r8d,%ecx - xorl %r9d,%edx - movl 0(%r14),%esi - rorl $16,%r12d - rorl $16,%ebp movl 64(%r14),%edi - xorl %r12d,%ecx - xorl %ebp,%edx + xorl %r9d,%edx movl 128(%r14),%r8d + xorl %r12d,%ecx rorl $8,%r12d + xorl %ebp,%edx rorl $8,%ebp - movl 192(%r14),%r9d xorl %r12d,%ecx + movl 192(%r14),%r9d xorl %ebp,%edx jmp .Lenc_loop_compact .align 16 @@ -328,7 +324,6 @@ _x86_64_AES_encrypt_compact: xorl 8(%r15),%ecx xorl 12(%r15),%edx .byte 0xf3,0xc3 - .size _x86_64_AES_encrypt_compact,.-_x86_64_AES_encrypt_compact .globl AES_encrypt .type AES_encrypt,@function @@ -551,7 +546,6 @@ _x86_64_AES_decrypt: xorl %r12d,%ecx xorl %r8d,%edx .byte 0xf3,0xc3 - .size _x86_64_AES_decrypt,.-_x86_64_AES_decrypt .type _x86_64_AES_decrypt_compact,@function .align 16 @@ -577,70 +571,69 @@ _x86_64_AES_decrypt_compact: movzbl %al,%r10d movzbl %bl,%r11d movzbl %cl,%r12d - movzbl (%r14,%r10,1),%r10d - movzbl (%r14,%r11,1),%r11d - movzbl (%r14,%r12,1),%r12d - movzbl %dl,%r8d movzbl %dh,%esi movzbl %ah,%edi + shrl $16,%edx + movzbl %bh,%ebp + movzbl (%r14,%r10,1),%r10d + movzbl (%r14,%r11,1),%r11d + movzbl (%r14,%r12,1),%r12d movzbl (%r14,%r8,1),%r8d - movzbl (%r14,%rsi,1),%r9d - movzbl (%r14,%rdi,1),%r13d - movzbl %bh,%ebp + movzbl (%r14,%rsi,1),%r9d movzbl %ch,%esi - shrl $16,%ecx + movzbl (%r14,%rdi,1),%r13d movzbl (%r14,%rbp,1),%ebp movzbl (%r14,%rsi,1),%esi - shrl $16,%edx - movzbl %cl,%edi - shll $8,%r9d + shrl $16,%ecx shll $8,%r13d - movzbl (%r14,%rdi,1),%edi - xorl %r9d,%r10d - xorl %r13d,%r11d - - movzbl %dl,%r9d + shll $8,%r9d + movzbl %cl,%edi shrl $16,%eax + xorl %r9d,%r10d shrl $16,%ebx - movzbl %al,%r13d + movzbl %dl,%r9d + shll $8,%ebp + xorl %r13d,%r11d shll $8,%esi - movzbl (%r14,%r9,1),%r9d - movzbl (%r14,%r13,1),%r13d + movzbl %al,%r13d + movzbl (%r14,%rdi,1),%edi xorl %ebp,%r12d - xorl %esi,%r8d - movzbl %bl,%ebp - movzbl %bh,%esi + shll $16,%edi + xorl %esi,%r8d + movzbl (%r14,%r9,1),%r9d + movzbl %bh,%esi movzbl (%r14,%rbp,1),%ebp - movzbl (%r14,%rsi,1),%esi xorl %edi,%r10d - + movzbl (%r14,%r13,1),%r13d movzbl %ch,%edi + + shll $16,%ebp shll $16,%r9d shll $16,%r13d - movzbl (%r14,%rdi,1),%ebx + xorl %ebp,%r8d + movzbl %dh,%ebp xorl %r9d,%r11d + shrl $8,%eax xorl %r13d,%r12d - movzbl %dh,%edi - shrl $8,%eax - shll $16,%ebp - movzbl (%r14,%rdi,1),%ecx + movzbl (%r14,%rsi,1),%esi + movzbl (%r14,%rdi,1),%ebx + movzbl (%r14,%rbp,1),%ecx movzbl (%r14,%rax,1),%edx - xorl %ebp,%r8d + movl %r10d,%eax shll $24,%esi shll $24,%ebx shll $24,%ecx - xorl %esi,%r10d + xorl %esi,%eax shll $24,%edx xorl %r11d,%ebx - movl %r10d,%eax xorl %r12d,%ecx xorl %r8d,%edx cmpq 16(%rsp),%r15 @@ -653,12 +646,12 @@ _x86_64_AES_decrypt_compact: orq %rbx,%rax orq %rdx,%rcx movq 256+16(%r14),%rbp - movq %rax,%rbx - movq %rcx,%rdx - andq %rsi,%rbx - andq %rsi,%rdx - movq %rbx,%r9 - movq %rdx,%r12 + movq %rsi,%r9 + movq %rsi,%r12 + andq %rax,%r9 + andq %rcx,%r12 + movq %r9,%rbx + movq %r12,%rdx shrq $7,%r9 leaq (%rax,%rax,1),%r8 shrq $7,%r12 @@ -669,15 +662,15 @@ _x86_64_AES_decrypt_compact: andq %rdi,%r11 andq %rbp,%rbx andq %rbp,%rdx - xorq %r8,%rbx - xorq %r11,%rdx - movq %rbx,%r8 - movq %rdx,%r11 - - andq %rsi,%rbx - andq %rsi,%rdx - movq %rbx,%r10 - movq %rdx,%r13 + xorq %rbx,%r8 + xorq %rdx,%r11 + movq %rsi,%r10 + movq %rsi,%r13 + + andq %r8,%r10 + andq %r11,%r13 + movq %r10,%rbx + movq %r13,%rdx shrq $7,%r10 leaq (%r8,%r8,1),%r9 shrq $7,%r13 @@ -688,15 +681,15 @@ _x86_64_AES_decrypt_compact: andq %rdi,%r12 andq %rbp,%rbx andq %rbp,%rdx - xorq %r9,%rbx - xorq %r12,%rdx - movq %rbx,%r9 - movq %rdx,%r12 - - andq %rsi,%rbx - andq %rsi,%rdx - movq %rbx,%r10 - movq %rdx,%r13 + xorq %rbx,%r9 + xorq %rdx,%r12 + movq %rsi,%r10 + movq %rsi,%r13 + + andq %r9,%r10 + andq %r12,%r13 + movq %r10,%rbx + movq %r13,%rdx shrq $7,%r10 xorq %rax,%r8 shrq $7,%r13 @@ -721,51 +714,51 @@ _x86_64_AES_decrypt_compact: movq %rax,%rbx movq %rcx,%rdx xorq %r10,%r9 - xorq %r13,%r12 shrq $32,%rbx + xorq %r13,%r12 shrq $32,%rdx xorq %r8,%r10 - xorq %r11,%r13 roll $8,%eax + xorq %r11,%r13 roll $8,%ecx xorq %r9,%r10 + roll $8,%ebx xorq %r12,%r13 - roll $8,%ebx roll $8,%edx xorl %r10d,%eax - xorl %r13d,%ecx shrq $32,%r10 + xorl %r13d,%ecx shrq $32,%r13 xorl %r10d,%ebx xorl %r13d,%edx movq %r8,%r10 - movq %r11,%r13 - shrq $32,%r10 - shrq $32,%r13 roll $24,%r8d + movq %r11,%r13 roll $24,%r11d - roll $24,%r10d - roll $24,%r13d + shrq $32,%r10 xorl %r8d,%eax + shrq $32,%r13 xorl %r11d,%ecx + roll $24,%r10d movq %r9,%r8 + roll $24,%r13d movq %r12,%r11 + shrq $32,%r8 xorl %r10d,%ebx + shrq $32,%r11 xorl %r13d,%edx movq 0(%r14),%rsi - shrq $32,%r8 - shrq $32,%r11 - movq 64(%r14),%rdi roll $16,%r9d + movq 64(%r14),%rdi roll $16,%r12d movq 128(%r14),%rbp roll $16,%r8d - roll $16,%r11d movq 192(%r14),%r10 xorl %r9d,%eax + roll $16,%r11d xorl %r12d,%ecx movq 256(%r14),%r13 xorl %r8d,%ebx @@ -778,7 +771,6 @@ _x86_64_AES_decrypt_compact: xorl 8(%r15),%ecx xorl 12(%r15),%edx .byte 0xf3,0xc3 - .size _x86_64_AES_decrypt_compact,.-_x86_64_AES_decrypt_compact .globl AES_decrypt .type AES_decrypt,@function @@ -864,10 +856,6 @@ private_AES_set_encrypt_key: call _x86_64_AES_set_encrypt_key - movq 8(%rsp),%r15 - movq 16(%rsp),%r14 - movq 24(%rsp),%r13 - movq 32(%rsp),%r12 movq 40(%rsp),%rbp movq 48(%rsp),%rbx addq $56,%rsp @@ -1113,7 +1101,6 @@ _x86_64_AES_set_encrypt_key: movq $-1,%rax .Lexit: .byte 0xf3,0xc3 - .size _x86_64_AES_set_encrypt_key,.-_x86_64_AES_set_encrypt_key .globl private_AES_set_decrypt_key .type private_AES_set_decrypt_key,@function @@ -1166,12 +1153,12 @@ private_AES_set_decrypt_key: leaq 16(%r15),%r15 movq 0(%r15),%rax movq 8(%r15),%rcx - movq %rax,%rbx - movq %rcx,%rdx - andq %rsi,%rbx - andq %rsi,%rdx - movq %rbx,%r9 - movq %rdx,%r12 + movq %rsi,%r9 + movq %rsi,%r12 + andq %rax,%r9 + andq %rcx,%r12 + movq %r9,%rbx + movq %r12,%rdx shrq $7,%r9 leaq (%rax,%rax,1),%r8 shrq $7,%r12 @@ -1182,15 +1169,15 @@ private_AES_set_decrypt_key: andq %rdi,%r11 andq %rbp,%rbx andq %rbp,%rdx - xorq %r8,%rbx - xorq %r11,%rdx - movq %rbx,%r8 - movq %rdx,%r11 - - andq %rsi,%rbx - andq %rsi,%rdx - movq %rbx,%r10 - movq %rdx,%r13 + xorq %rbx,%r8 + xorq %rdx,%r11 + movq %rsi,%r10 + movq %rsi,%r13 + + andq %r8,%r10 + andq %r11,%r13 + movq %r10,%rbx + movq %r13,%rdx shrq $7,%r10 leaq (%r8,%r8,1),%r9 shrq $7,%r13 @@ -1201,15 +1188,15 @@ private_AES_set_decrypt_key: andq %rdi,%r12 andq %rbp,%rbx andq %rbp,%rdx - xorq %r9,%rbx - xorq %r12,%rdx - movq %rbx,%r9 - movq %rdx,%r12 - - andq %rsi,%rbx - andq %rsi,%rdx - movq %rbx,%r10 - movq %rdx,%r13 + xorq %rbx,%r9 + xorq %rdx,%r12 + movq %rsi,%r10 + movq %rsi,%r13 + + andq %r9,%r10 + andq %r12,%r13 + movq %r10,%rbx + movq %r13,%rdx shrq $7,%r10 xorq %rax,%r8 shrq $7,%r13 @@ -1234,51 +1221,51 @@ private_AES_set_decrypt_key: movq %rax,%rbx movq %rcx,%rdx xorq %r10,%r9 - xorq %r13,%r12 shrq $32,%rbx + xorq %r13,%r12 shrq $32,%rdx xorq %r8,%r10 - xorq %r11,%r13 roll $8,%eax + xorq %r11,%r13 roll $8,%ecx xorq %r9,%r10 + roll $8,%ebx xorq %r12,%r13 - roll $8,%ebx roll $8,%edx xorl %r10d,%eax - xorl %r13d,%ecx shrq $32,%r10 + xorl %r13d,%ecx shrq $32,%r13 xorl %r10d,%ebx xorl %r13d,%edx movq %r8,%r10 - movq %r11,%r13 - shrq $32,%r10 - shrq $32,%r13 roll $24,%r8d + movq %r11,%r13 roll $24,%r11d - roll $24,%r10d - roll $24,%r13d + shrq $32,%r10 xorl %r8d,%eax + shrq $32,%r13 xorl %r11d,%ecx + roll $24,%r10d movq %r9,%r8 + roll $24,%r13d movq %r12,%r11 + shrq $32,%r8 xorl %r10d,%ebx + shrq $32,%r11 xorl %r13d,%edx - shrq $32,%r8 - shrq $32,%r11 - roll $16,%r9d + roll $16,%r12d roll $16,%r8d - roll $16,%r11d xorl %r9d,%eax + roll $16,%r11d xorl %r12d,%ecx xorl %r8d,%ebx @@ -1395,7 +1382,6 @@ AES_cbc_encrypt: leaq 80(%rsp),%r15 movl $30,%ecx .long 0x90A548F3 - movl %eax,(%rdi) .Lcbc_skip_ecopy: movq %r15,0(%rsp) @@ -1559,7 +1545,6 @@ AES_cbc_encrypt: xorq %rax,%rax .long 0x90AB48F3 - jmp .Lcbc_exit @@ -1615,7 +1600,6 @@ AES_cbc_encrypt: movl 12(%rbp),%edx jz .Lcbc_slow_enc_tail - .align 4 .Lcbc_slow_enc_loop: xorl 0(%r8),%eax @@ -1660,19 +1644,16 @@ AES_cbc_encrypt: movq %r8,%rsi movq %r9,%rdi .long 0x9066A4F3 - movq $16,%rcx subq %r10,%rcx xorq %rax,%rax .long 0x9066AAF3 - movq %r9,%r8 movq $16,%r10 movq %r11,%rax movq %r12,%rcx jmp .Lcbc_slow_enc_loop - .align 16 .LSLOW_DECRYPT: shrq $3,%rax @@ -1748,7 +1729,6 @@ AES_cbc_encrypt: leaq 64(%rsp),%rsi leaq 16(%r10),%rcx .long 0x9066A4F3 - jmp .Lcbc_exit .align 16 diff --git a/deps/openssl/asm/x64-elf-gas/aes/aesni-mb-x86_64.s b/deps/openssl/asm/x64-elf-gas/aes/aesni-mb-x86_64.s new file mode 100644 index 00000000000000..543b58831656cb --- /dev/null +++ b/deps/openssl/asm/x64-elf-gas/aes/aesni-mb-x86_64.s @@ -0,0 +1,1435 @@ +.text + + + +.globl aesni_multi_cbc_encrypt +.type aesni_multi_cbc_encrypt,@function +.align 32 +aesni_multi_cbc_encrypt: + cmpl $2,%edx + jb .Lenc_non_avx + movl OPENSSL_ia32cap_P+4(%rip),%ecx + testl $268435456,%ecx + jnz _avx_cbc_enc_shortcut + jmp .Lenc_non_avx +.align 16 +.Lenc_non_avx: + movq %rsp,%rax + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + + + + + + + subq $48,%rsp + andq $-64,%rsp + movq %rax,16(%rsp) + +.Lenc4x_body: + movdqu (%rsi),%xmm12 + leaq 120(%rsi),%rsi + leaq 80(%rdi),%rdi + +.Lenc4x_loop_grande: + movl %edx,24(%rsp) + xorl %edx,%edx + movl -64(%rdi),%ecx + movq -80(%rdi),%r8 + cmpl %edx,%ecx + movq -72(%rdi),%r12 + cmovgl %ecx,%edx + testl %ecx,%ecx + movdqu -56(%rdi),%xmm2 + movl %ecx,32(%rsp) + cmovleq %rsp,%r8 + movl -24(%rdi),%ecx + movq -40(%rdi),%r9 + cmpl %edx,%ecx + movq -32(%rdi),%r13 + cmovgl %ecx,%edx + testl %ecx,%ecx + movdqu -16(%rdi),%xmm3 + movl %ecx,36(%rsp) + cmovleq %rsp,%r9 + movl 16(%rdi),%ecx + movq 0(%rdi),%r10 + cmpl %edx,%ecx + movq 8(%rdi),%r14 + cmovgl %ecx,%edx + testl %ecx,%ecx + movdqu 24(%rdi),%xmm4 + movl %ecx,40(%rsp) + cmovleq %rsp,%r10 + movl 56(%rdi),%ecx + movq 40(%rdi),%r11 + cmpl %edx,%ecx + movq 48(%rdi),%r15 + cmovgl %ecx,%edx + testl %ecx,%ecx + movdqu 64(%rdi),%xmm5 + movl %ecx,44(%rsp) + cmovleq %rsp,%r11 + testl %edx,%edx + jz .Lenc4x_done + + movups 16-120(%rsi),%xmm1 + pxor %xmm12,%xmm2 + movups 32-120(%rsi),%xmm0 + pxor %xmm12,%xmm3 + movl 240-120(%rsi),%eax + pxor %xmm12,%xmm4 + movdqu (%r8),%xmm6 + pxor %xmm12,%xmm5 + movdqu (%r9),%xmm7 + pxor %xmm6,%xmm2 + movdqu (%r10),%xmm8 + pxor %xmm7,%xmm3 + movdqu (%r11),%xmm9 + pxor %xmm8,%xmm4 + pxor %xmm9,%xmm5 + movdqa 32(%rsp),%xmm10 + xorq %rbx,%rbx + jmp .Loop_enc4x + +.align 32 +.Loop_enc4x: + addq $16,%rbx + leaq 16(%rsp),%rbp + movl $1,%ecx + subq %rbx,%rbp + +.byte 102,15,56,220,209 + prefetcht0 31(%r8,%rbx,1) + prefetcht0 31(%r9,%rbx,1) +.byte 102,15,56,220,217 + prefetcht0 31(%r10,%rbx,1) + prefetcht0 31(%r10,%rbx,1) +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 + movups 48-120(%rsi),%xmm1 + cmpl 32(%rsp),%ecx +.byte 102,15,56,220,208 +.byte 102,15,56,220,216 +.byte 102,15,56,220,224 + cmovgeq %rbp,%r8 + cmovgq %rbp,%r12 +.byte 102,15,56,220,232 + movups -56(%rsi),%xmm0 + cmpl 36(%rsp),%ecx +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 +.byte 102,15,56,220,225 + cmovgeq %rbp,%r9 + cmovgq %rbp,%r13 +.byte 102,15,56,220,233 + movups -40(%rsi),%xmm1 + cmpl 40(%rsp),%ecx +.byte 102,15,56,220,208 +.byte 102,15,56,220,216 +.byte 102,15,56,220,224 + cmovgeq %rbp,%r10 + cmovgq %rbp,%r14 +.byte 102,15,56,220,232 + movups -24(%rsi),%xmm0 + cmpl 44(%rsp),%ecx +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 +.byte 102,15,56,220,225 + cmovgeq %rbp,%r11 + cmovgq %rbp,%r15 +.byte 102,15,56,220,233 + movups -8(%rsi),%xmm1 + movdqa %xmm10,%xmm11 +.byte 102,15,56,220,208 + prefetcht0 15(%r12,%rbx,1) + prefetcht0 15(%r13,%rbx,1) +.byte 102,15,56,220,216 + prefetcht0 15(%r14,%rbx,1) + prefetcht0 15(%r15,%rbx,1) +.byte 102,15,56,220,224 +.byte 102,15,56,220,232 + movups 128-120(%rsi),%xmm0 + pxor %xmm12,%xmm12 + +.byte 102,15,56,220,209 + pcmpgtd %xmm12,%xmm11 + movdqu -120(%rsi),%xmm12 +.byte 102,15,56,220,217 + paddd %xmm11,%xmm10 + movdqa %xmm10,32(%rsp) +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 + movups 144-120(%rsi),%xmm1 + + cmpl $11,%eax + +.byte 102,15,56,220,208 +.byte 102,15,56,220,216 +.byte 102,15,56,220,224 +.byte 102,15,56,220,232 + movups 160-120(%rsi),%xmm0 + + jb .Lenc4x_tail + +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 + movups 176-120(%rsi),%xmm1 + +.byte 102,15,56,220,208 +.byte 102,15,56,220,216 +.byte 102,15,56,220,224 +.byte 102,15,56,220,232 + movups 192-120(%rsi),%xmm0 + + je .Lenc4x_tail + +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 + movups 208-120(%rsi),%xmm1 + +.byte 102,15,56,220,208 +.byte 102,15,56,220,216 +.byte 102,15,56,220,224 +.byte 102,15,56,220,232 + movups 224-120(%rsi),%xmm0 + jmp .Lenc4x_tail + +.align 32 +.Lenc4x_tail: +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 + movdqu (%r8,%rbx,1),%xmm6 + movdqu 16-120(%rsi),%xmm1 + +.byte 102,15,56,221,208 + movdqu (%r9,%rbx,1),%xmm7 + pxor %xmm12,%xmm6 +.byte 102,15,56,221,216 + movdqu (%r10,%rbx,1),%xmm8 + pxor %xmm12,%xmm7 +.byte 102,15,56,221,224 + movdqu (%r11,%rbx,1),%xmm9 + pxor %xmm12,%xmm8 +.byte 102,15,56,221,232 + movdqu 32-120(%rsi),%xmm0 + pxor %xmm12,%xmm9 + + movups %xmm2,-16(%r12,%rbx,1) + pxor %xmm6,%xmm2 + movups %xmm3,-16(%r13,%rbx,1) + pxor %xmm7,%xmm3 + movups %xmm4,-16(%r14,%rbx,1) + pxor %xmm8,%xmm4 + movups %xmm5,-16(%r15,%rbx,1) + pxor %xmm9,%xmm5 + + decl %edx + jnz .Loop_enc4x + + movq 16(%rsp),%rax + movl 24(%rsp),%edx + + + + + + + + + + + leaq 160(%rdi),%rdi + decl %edx + jnz .Lenc4x_loop_grande + +.Lenc4x_done: + movq -48(%rax),%r15 + movq -40(%rax),%r14 + movq -32(%rax),%r13 + movq -24(%rax),%r12 + movq -16(%rax),%rbp + movq -8(%rax),%rbx + leaq (%rax),%rsp +.Lenc4x_epilogue: + .byte 0xf3,0xc3 +.size aesni_multi_cbc_encrypt,.-aesni_multi_cbc_encrypt + +.globl aesni_multi_cbc_decrypt +.type aesni_multi_cbc_decrypt,@function +.align 32 +aesni_multi_cbc_decrypt: + cmpl $2,%edx + jb .Ldec_non_avx + movl OPENSSL_ia32cap_P+4(%rip),%ecx + testl $268435456,%ecx + jnz _avx_cbc_dec_shortcut + jmp .Ldec_non_avx +.align 16 +.Ldec_non_avx: + movq %rsp,%rax + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + + + + + + + subq $48,%rsp + andq $-64,%rsp + movq %rax,16(%rsp) + +.Ldec4x_body: + movdqu (%rsi),%xmm12 + leaq 120(%rsi),%rsi + leaq 80(%rdi),%rdi + +.Ldec4x_loop_grande: + movl %edx,24(%rsp) + xorl %edx,%edx + movl -64(%rdi),%ecx + movq -80(%rdi),%r8 + cmpl %edx,%ecx + movq -72(%rdi),%r12 + cmovgl %ecx,%edx + testl %ecx,%ecx + movdqu -56(%rdi),%xmm6 + movl %ecx,32(%rsp) + cmovleq %rsp,%r8 + movl -24(%rdi),%ecx + movq -40(%rdi),%r9 + cmpl %edx,%ecx + movq -32(%rdi),%r13 + cmovgl %ecx,%edx + testl %ecx,%ecx + movdqu -16(%rdi),%xmm7 + movl %ecx,36(%rsp) + cmovleq %rsp,%r9 + movl 16(%rdi),%ecx + movq 0(%rdi),%r10 + cmpl %edx,%ecx + movq 8(%rdi),%r14 + cmovgl %ecx,%edx + testl %ecx,%ecx + movdqu 24(%rdi),%xmm8 + movl %ecx,40(%rsp) + cmovleq %rsp,%r10 + movl 56(%rdi),%ecx + movq 40(%rdi),%r11 + cmpl %edx,%ecx + movq 48(%rdi),%r15 + cmovgl %ecx,%edx + testl %ecx,%ecx + movdqu 64(%rdi),%xmm9 + movl %ecx,44(%rsp) + cmovleq %rsp,%r11 + testl %edx,%edx + jz .Ldec4x_done + + movups 16-120(%rsi),%xmm1 + movups 32-120(%rsi),%xmm0 + movl 240-120(%rsi),%eax + movdqu (%r8),%xmm2 + movdqu (%r9),%xmm3 + pxor %xmm12,%xmm2 + movdqu (%r10),%xmm4 + pxor %xmm12,%xmm3 + movdqu (%r11),%xmm5 + pxor %xmm12,%xmm4 + pxor %xmm12,%xmm5 + movdqa 32(%rsp),%xmm10 + xorq %rbx,%rbx + jmp .Loop_dec4x + +.align 32 +.Loop_dec4x: + addq $16,%rbx + leaq 16(%rsp),%rbp + movl $1,%ecx + subq %rbx,%rbp + +.byte 102,15,56,222,209 + prefetcht0 31(%r8,%rbx,1) + prefetcht0 31(%r9,%rbx,1) +.byte 102,15,56,222,217 + prefetcht0 31(%r10,%rbx,1) + prefetcht0 31(%r11,%rbx,1) +.byte 102,15,56,222,225 +.byte 102,15,56,222,233 + movups 48-120(%rsi),%xmm1 + cmpl 32(%rsp),%ecx +.byte 102,15,56,222,208 +.byte 102,15,56,222,216 +.byte 102,15,56,222,224 + cmovgeq %rbp,%r8 + cmovgq %rbp,%r12 +.byte 102,15,56,222,232 + movups -56(%rsi),%xmm0 + cmpl 36(%rsp),%ecx +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 +.byte 102,15,56,222,225 + cmovgeq %rbp,%r9 + cmovgq %rbp,%r13 +.byte 102,15,56,222,233 + movups -40(%rsi),%xmm1 + cmpl 40(%rsp),%ecx +.byte 102,15,56,222,208 +.byte 102,15,56,222,216 +.byte 102,15,56,222,224 + cmovgeq %rbp,%r10 + cmovgq %rbp,%r14 +.byte 102,15,56,222,232 + movups -24(%rsi),%xmm0 + cmpl 44(%rsp),%ecx +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 +.byte 102,15,56,222,225 + cmovgeq %rbp,%r11 + cmovgq %rbp,%r15 +.byte 102,15,56,222,233 + movups -8(%rsi),%xmm1 + movdqa %xmm10,%xmm11 +.byte 102,15,56,222,208 + prefetcht0 15(%r12,%rbx,1) + prefetcht0 15(%r13,%rbx,1) +.byte 102,15,56,222,216 + prefetcht0 15(%r14,%rbx,1) + prefetcht0 15(%r15,%rbx,1) +.byte 102,15,56,222,224 +.byte 102,15,56,222,232 + movups 128-120(%rsi),%xmm0 + pxor %xmm12,%xmm12 + +.byte 102,15,56,222,209 + pcmpgtd %xmm12,%xmm11 + movdqu -120(%rsi),%xmm12 +.byte 102,15,56,222,217 + paddd %xmm11,%xmm10 + movdqa %xmm10,32(%rsp) +.byte 102,15,56,222,225 +.byte 102,15,56,222,233 + movups 144-120(%rsi),%xmm1 + + cmpl $11,%eax + +.byte 102,15,56,222,208 +.byte 102,15,56,222,216 +.byte 102,15,56,222,224 +.byte 102,15,56,222,232 + movups 160-120(%rsi),%xmm0 + + jb .Ldec4x_tail + +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 +.byte 102,15,56,222,225 +.byte 102,15,56,222,233 + movups 176-120(%rsi),%xmm1 + +.byte 102,15,56,222,208 +.byte 102,15,56,222,216 +.byte 102,15,56,222,224 +.byte 102,15,56,222,232 + movups 192-120(%rsi),%xmm0 + + je .Ldec4x_tail + +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 +.byte 102,15,56,222,225 +.byte 102,15,56,222,233 + movups 208-120(%rsi),%xmm1 + +.byte 102,15,56,222,208 +.byte 102,15,56,222,216 +.byte 102,15,56,222,224 +.byte 102,15,56,222,232 + movups 224-120(%rsi),%xmm0 + jmp .Ldec4x_tail + +.align 32 +.Ldec4x_tail: +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 +.byte 102,15,56,222,225 + pxor %xmm0,%xmm6 + pxor %xmm0,%xmm7 +.byte 102,15,56,222,233 + movdqu 16-120(%rsi),%xmm1 + pxor %xmm0,%xmm8 + pxor %xmm0,%xmm9 + movdqu 32-120(%rsi),%xmm0 + +.byte 102,15,56,223,214 +.byte 102,15,56,223,223 + movdqu -16(%r8,%rbx,1),%xmm6 + movdqu -16(%r9,%rbx,1),%xmm7 +.byte 102,65,15,56,223,224 +.byte 102,65,15,56,223,233 + movdqu -16(%r10,%rbx,1),%xmm8 + movdqu -16(%r11,%rbx,1),%xmm9 + + movups %xmm2,-16(%r12,%rbx,1) + movdqu (%r8,%rbx,1),%xmm2 + movups %xmm3,-16(%r13,%rbx,1) + movdqu (%r9,%rbx,1),%xmm3 + pxor %xmm12,%xmm2 + movups %xmm4,-16(%r14,%rbx,1) + movdqu (%r10,%rbx,1),%xmm4 + pxor %xmm12,%xmm3 + movups %xmm5,-16(%r15,%rbx,1) + movdqu (%r11,%rbx,1),%xmm5 + pxor %xmm12,%xmm4 + pxor %xmm12,%xmm5 + + decl %edx + jnz .Loop_dec4x + + movq 16(%rsp),%rax + movl 24(%rsp),%edx + + leaq 160(%rdi),%rdi + decl %edx + jnz .Ldec4x_loop_grande + +.Ldec4x_done: + movq -48(%rax),%r15 + movq -40(%rax),%r14 + movq -32(%rax),%r13 + movq -24(%rax),%r12 + movq -16(%rax),%rbp + movq -8(%rax),%rbx + leaq (%rax),%rsp +.Ldec4x_epilogue: + .byte 0xf3,0xc3 +.size aesni_multi_cbc_decrypt,.-aesni_multi_cbc_decrypt +.type aesni_multi_cbc_encrypt_avx,@function +.align 32 +aesni_multi_cbc_encrypt_avx: +_avx_cbc_enc_shortcut: + movq %rsp,%rax + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + + + + + + + + + subq $192,%rsp + andq $-128,%rsp + movq %rax,16(%rsp) + +.Lenc8x_body: + vzeroupper + vmovdqu (%rsi),%xmm15 + leaq 120(%rsi),%rsi + leaq 160(%rdi),%rdi + shrl $1,%edx + +.Lenc8x_loop_grande: + + xorl %edx,%edx + movl -144(%rdi),%ecx + movq -160(%rdi),%r8 + cmpl %edx,%ecx + movq -152(%rdi),%rbx + cmovgl %ecx,%edx + testl %ecx,%ecx + vmovdqu -136(%rdi),%xmm2 + movl %ecx,32(%rsp) + cmovleq %rsp,%r8 + subq %r8,%rbx + movq %rbx,64(%rsp) + movl -104(%rdi),%ecx + movq -120(%rdi),%r9 + cmpl %edx,%ecx + movq -112(%rdi),%rbp + cmovgl %ecx,%edx + testl %ecx,%ecx + vmovdqu -96(%rdi),%xmm3 + movl %ecx,36(%rsp) + cmovleq %rsp,%r9 + subq %r9,%rbp + movq %rbp,72(%rsp) + movl -64(%rdi),%ecx + movq -80(%rdi),%r10 + cmpl %edx,%ecx + movq -72(%rdi),%rbp + cmovgl %ecx,%edx + testl %ecx,%ecx + vmovdqu -56(%rdi),%xmm4 + movl %ecx,40(%rsp) + cmovleq %rsp,%r10 + subq %r10,%rbp + movq %rbp,80(%rsp) + movl -24(%rdi),%ecx + movq -40(%rdi),%r11 + cmpl %edx,%ecx + movq -32(%rdi),%rbp + cmovgl %ecx,%edx + testl %ecx,%ecx + vmovdqu -16(%rdi),%xmm5 + movl %ecx,44(%rsp) + cmovleq %rsp,%r11 + subq %r11,%rbp + movq %rbp,88(%rsp) + movl 16(%rdi),%ecx + movq 0(%rdi),%r12 + cmpl %edx,%ecx + movq 8(%rdi),%rbp + cmovgl %ecx,%edx + testl %ecx,%ecx + vmovdqu 24(%rdi),%xmm6 + movl %ecx,48(%rsp) + cmovleq %rsp,%r12 + subq %r12,%rbp + movq %rbp,96(%rsp) + movl 56(%rdi),%ecx + movq 40(%rdi),%r13 + cmpl %edx,%ecx + movq 48(%rdi),%rbp + cmovgl %ecx,%edx + testl %ecx,%ecx + vmovdqu 64(%rdi),%xmm7 + movl %ecx,52(%rsp) + cmovleq %rsp,%r13 + subq %r13,%rbp + movq %rbp,104(%rsp) + movl 96(%rdi),%ecx + movq 80(%rdi),%r14 + cmpl %edx,%ecx + movq 88(%rdi),%rbp + cmovgl %ecx,%edx + testl %ecx,%ecx + vmovdqu 104(%rdi),%xmm8 + movl %ecx,56(%rsp) + cmovleq %rsp,%r14 + subq %r14,%rbp + movq %rbp,112(%rsp) + movl 136(%rdi),%ecx + movq 120(%rdi),%r15 + cmpl %edx,%ecx + movq 128(%rdi),%rbp + cmovgl %ecx,%edx + testl %ecx,%ecx + vmovdqu 144(%rdi),%xmm9 + movl %ecx,60(%rsp) + cmovleq %rsp,%r15 + subq %r15,%rbp + movq %rbp,120(%rsp) + testl %edx,%edx + jz .Lenc8x_done + + vmovups 16-120(%rsi),%xmm1 + vmovups 32-120(%rsi),%xmm0 + movl 240-120(%rsi),%eax + + vpxor (%r8),%xmm15,%xmm10 + leaq 128(%rsp),%rbp + vpxor (%r9),%xmm15,%xmm11 + vpxor (%r10),%xmm15,%xmm12 + vpxor (%r11),%xmm15,%xmm13 + vpxor %xmm10,%xmm2,%xmm2 + vpxor (%r12),%xmm15,%xmm10 + vpxor %xmm11,%xmm3,%xmm3 + vpxor (%r13),%xmm15,%xmm11 + vpxor %xmm12,%xmm4,%xmm4 + vpxor (%r14),%xmm15,%xmm12 + vpxor %xmm13,%xmm5,%xmm5 + vpxor (%r15),%xmm15,%xmm13 + vpxor %xmm10,%xmm6,%xmm6 + movl $1,%ecx + vpxor %xmm11,%xmm7,%xmm7 + vpxor %xmm12,%xmm8,%xmm8 + vpxor %xmm13,%xmm9,%xmm9 + jmp .Loop_enc8x + +.align 32 +.Loop_enc8x: + vaesenc %xmm1,%xmm2,%xmm2 + cmpl 32+0(%rsp),%ecx + vaesenc %xmm1,%xmm3,%xmm3 + prefetcht0 31(%r8) + vaesenc %xmm1,%xmm4,%xmm4 + vaesenc %xmm1,%xmm5,%xmm5 + leaq (%r8,%rbx,1),%rbx + cmovgeq %rsp,%r8 + vaesenc %xmm1,%xmm6,%xmm6 + cmovgq %rsp,%rbx + vaesenc %xmm1,%xmm7,%xmm7 + subq %r8,%rbx + vaesenc %xmm1,%xmm8,%xmm8 + vpxor 16(%r8),%xmm15,%xmm10 + movq %rbx,64+0(%rsp) + vaesenc %xmm1,%xmm9,%xmm9 + vmovups -72(%rsi),%xmm1 + leaq 16(%r8,%rbx,1),%r8 + vmovdqu %xmm10,0(%rbp) + vaesenc %xmm0,%xmm2,%xmm2 + cmpl 32+4(%rsp),%ecx + movq 64+8(%rsp),%rbx + vaesenc %xmm0,%xmm3,%xmm3 + prefetcht0 31(%r9) + vaesenc %xmm0,%xmm4,%xmm4 + vaesenc %xmm0,%xmm5,%xmm5 + leaq (%r9,%rbx,1),%rbx + cmovgeq %rsp,%r9 + vaesenc %xmm0,%xmm6,%xmm6 + cmovgq %rsp,%rbx + vaesenc %xmm0,%xmm7,%xmm7 + subq %r9,%rbx + vaesenc %xmm0,%xmm8,%xmm8 + vpxor 16(%r9),%xmm15,%xmm11 + movq %rbx,64+8(%rsp) + vaesenc %xmm0,%xmm9,%xmm9 + vmovups -56(%rsi),%xmm0 + leaq 16(%r9,%rbx,1),%r9 + vmovdqu %xmm11,16(%rbp) + vaesenc %xmm1,%xmm2,%xmm2 + cmpl 32+8(%rsp),%ecx + movq 64+16(%rsp),%rbx + vaesenc %xmm1,%xmm3,%xmm3 + prefetcht0 31(%r10) + vaesenc %xmm1,%xmm4,%xmm4 + prefetcht0 15(%r8) + vaesenc %xmm1,%xmm5,%xmm5 + leaq (%r10,%rbx,1),%rbx + cmovgeq %rsp,%r10 + vaesenc %xmm1,%xmm6,%xmm6 + cmovgq %rsp,%rbx + vaesenc %xmm1,%xmm7,%xmm7 + subq %r10,%rbx + vaesenc %xmm1,%xmm8,%xmm8 + vpxor 16(%r10),%xmm15,%xmm12 + movq %rbx,64+16(%rsp) + vaesenc %xmm1,%xmm9,%xmm9 + vmovups -40(%rsi),%xmm1 + leaq 16(%r10,%rbx,1),%r10 + vmovdqu %xmm12,32(%rbp) + vaesenc %xmm0,%xmm2,%xmm2 + cmpl 32+12(%rsp),%ecx + movq 64+24(%rsp),%rbx + vaesenc %xmm0,%xmm3,%xmm3 + prefetcht0 31(%r11) + vaesenc %xmm0,%xmm4,%xmm4 + prefetcht0 15(%r9) + vaesenc %xmm0,%xmm5,%xmm5 + leaq (%r11,%rbx,1),%rbx + cmovgeq %rsp,%r11 + vaesenc %xmm0,%xmm6,%xmm6 + cmovgq %rsp,%rbx + vaesenc %xmm0,%xmm7,%xmm7 + subq %r11,%rbx + vaesenc %xmm0,%xmm8,%xmm8 + vpxor 16(%r11),%xmm15,%xmm13 + movq %rbx,64+24(%rsp) + vaesenc %xmm0,%xmm9,%xmm9 + vmovups -24(%rsi),%xmm0 + leaq 16(%r11,%rbx,1),%r11 + vmovdqu %xmm13,48(%rbp) + vaesenc %xmm1,%xmm2,%xmm2 + cmpl 32+16(%rsp),%ecx + movq 64+32(%rsp),%rbx + vaesenc %xmm1,%xmm3,%xmm3 + prefetcht0 31(%r12) + vaesenc %xmm1,%xmm4,%xmm4 + prefetcht0 15(%r10) + vaesenc %xmm1,%xmm5,%xmm5 + leaq (%r12,%rbx,1),%rbx + cmovgeq %rsp,%r12 + vaesenc %xmm1,%xmm6,%xmm6 + cmovgq %rsp,%rbx + vaesenc %xmm1,%xmm7,%xmm7 + subq %r12,%rbx + vaesenc %xmm1,%xmm8,%xmm8 + vpxor 16(%r12),%xmm15,%xmm10 + movq %rbx,64+32(%rsp) + vaesenc %xmm1,%xmm9,%xmm9 + vmovups -8(%rsi),%xmm1 + leaq 16(%r12,%rbx,1),%r12 + vaesenc %xmm0,%xmm2,%xmm2 + cmpl 32+20(%rsp),%ecx + movq 64+40(%rsp),%rbx + vaesenc %xmm0,%xmm3,%xmm3 + prefetcht0 31(%r13) + vaesenc %xmm0,%xmm4,%xmm4 + prefetcht0 15(%r11) + vaesenc %xmm0,%xmm5,%xmm5 + leaq (%rbx,%r13,1),%rbx + cmovgeq %rsp,%r13 + vaesenc %xmm0,%xmm6,%xmm6 + cmovgq %rsp,%rbx + vaesenc %xmm0,%xmm7,%xmm7 + subq %r13,%rbx + vaesenc %xmm0,%xmm8,%xmm8 + vpxor 16(%r13),%xmm15,%xmm11 + movq %rbx,64+40(%rsp) + vaesenc %xmm0,%xmm9,%xmm9 + vmovups 8(%rsi),%xmm0 + leaq 16(%r13,%rbx,1),%r13 + vaesenc %xmm1,%xmm2,%xmm2 + cmpl 32+24(%rsp),%ecx + movq 64+48(%rsp),%rbx + vaesenc %xmm1,%xmm3,%xmm3 + prefetcht0 31(%r14) + vaesenc %xmm1,%xmm4,%xmm4 + prefetcht0 15(%r12) + vaesenc %xmm1,%xmm5,%xmm5 + leaq (%r14,%rbx,1),%rbx + cmovgeq %rsp,%r14 + vaesenc %xmm1,%xmm6,%xmm6 + cmovgq %rsp,%rbx + vaesenc %xmm1,%xmm7,%xmm7 + subq %r14,%rbx + vaesenc %xmm1,%xmm8,%xmm8 + vpxor 16(%r14),%xmm15,%xmm12 + movq %rbx,64+48(%rsp) + vaesenc %xmm1,%xmm9,%xmm9 + vmovups 24(%rsi),%xmm1 + leaq 16(%r14,%rbx,1),%r14 + vaesenc %xmm0,%xmm2,%xmm2 + cmpl 32+28(%rsp),%ecx + movq 64+56(%rsp),%rbx + vaesenc %xmm0,%xmm3,%xmm3 + prefetcht0 31(%r15) + vaesenc %xmm0,%xmm4,%xmm4 + prefetcht0 15(%r13) + vaesenc %xmm0,%xmm5,%xmm5 + leaq (%r15,%rbx,1),%rbx + cmovgeq %rsp,%r15 + vaesenc %xmm0,%xmm6,%xmm6 + cmovgq %rsp,%rbx + vaesenc %xmm0,%xmm7,%xmm7 + subq %r15,%rbx + vaesenc %xmm0,%xmm8,%xmm8 + vpxor 16(%r15),%xmm15,%xmm13 + movq %rbx,64+56(%rsp) + vaesenc %xmm0,%xmm9,%xmm9 + vmovups 40(%rsi),%xmm0 + leaq 16(%r15,%rbx,1),%r15 + vmovdqu 32(%rsp),%xmm14 + prefetcht0 15(%r14) + prefetcht0 15(%r15) + cmpl $11,%eax + jb .Lenc8x_tail + + vaesenc %xmm1,%xmm2,%xmm2 + vaesenc %xmm1,%xmm3,%xmm3 + vaesenc %xmm1,%xmm4,%xmm4 + vaesenc %xmm1,%xmm5,%xmm5 + vaesenc %xmm1,%xmm6,%xmm6 + vaesenc %xmm1,%xmm7,%xmm7 + vaesenc %xmm1,%xmm8,%xmm8 + vaesenc %xmm1,%xmm9,%xmm9 + vmovups 176-120(%rsi),%xmm1 + + vaesenc %xmm0,%xmm2,%xmm2 + vaesenc %xmm0,%xmm3,%xmm3 + vaesenc %xmm0,%xmm4,%xmm4 + vaesenc %xmm0,%xmm5,%xmm5 + vaesenc %xmm0,%xmm6,%xmm6 + vaesenc %xmm0,%xmm7,%xmm7 + vaesenc %xmm0,%xmm8,%xmm8 + vaesenc %xmm0,%xmm9,%xmm9 + vmovups 192-120(%rsi),%xmm0 + je .Lenc8x_tail + + vaesenc %xmm1,%xmm2,%xmm2 + vaesenc %xmm1,%xmm3,%xmm3 + vaesenc %xmm1,%xmm4,%xmm4 + vaesenc %xmm1,%xmm5,%xmm5 + vaesenc %xmm1,%xmm6,%xmm6 + vaesenc %xmm1,%xmm7,%xmm7 + vaesenc %xmm1,%xmm8,%xmm8 + vaesenc %xmm1,%xmm9,%xmm9 + vmovups 208-120(%rsi),%xmm1 + + vaesenc %xmm0,%xmm2,%xmm2 + vaesenc %xmm0,%xmm3,%xmm3 + vaesenc %xmm0,%xmm4,%xmm4 + vaesenc %xmm0,%xmm5,%xmm5 + vaesenc %xmm0,%xmm6,%xmm6 + vaesenc %xmm0,%xmm7,%xmm7 + vaesenc %xmm0,%xmm8,%xmm8 + vaesenc %xmm0,%xmm9,%xmm9 + vmovups 224-120(%rsi),%xmm0 + +.Lenc8x_tail: + vaesenc %xmm1,%xmm2,%xmm2 + vpxor %xmm15,%xmm15,%xmm15 + vaesenc %xmm1,%xmm3,%xmm3 + vaesenc %xmm1,%xmm4,%xmm4 + vpcmpgtd %xmm15,%xmm14,%xmm15 + vaesenc %xmm1,%xmm5,%xmm5 + vaesenc %xmm1,%xmm6,%xmm6 + vpaddd %xmm14,%xmm15,%xmm15 + vmovdqu 48(%rsp),%xmm14 + vaesenc %xmm1,%xmm7,%xmm7 + movq 64(%rsp),%rbx + vaesenc %xmm1,%xmm8,%xmm8 + vaesenc %xmm1,%xmm9,%xmm9 + vmovups 16-120(%rsi),%xmm1 + + vaesenclast %xmm0,%xmm2,%xmm2 + vmovdqa %xmm15,32(%rsp) + vpxor %xmm15,%xmm15,%xmm15 + vaesenclast %xmm0,%xmm3,%xmm3 + vaesenclast %xmm0,%xmm4,%xmm4 + vpcmpgtd %xmm15,%xmm14,%xmm15 + vaesenclast %xmm0,%xmm5,%xmm5 + vaesenclast %xmm0,%xmm6,%xmm6 + vpaddd %xmm15,%xmm14,%xmm14 + vmovdqu -120(%rsi),%xmm15 + vaesenclast %xmm0,%xmm7,%xmm7 + vaesenclast %xmm0,%xmm8,%xmm8 + vmovdqa %xmm14,48(%rsp) + vaesenclast %xmm0,%xmm9,%xmm9 + vmovups 32-120(%rsi),%xmm0 + + vmovups %xmm2,-16(%r8) + subq %rbx,%r8 + vpxor 0(%rbp),%xmm2,%xmm2 + vmovups %xmm3,-16(%r9) + subq 72(%rsp),%r9 + vpxor 16(%rbp),%xmm3,%xmm3 + vmovups %xmm4,-16(%r10) + subq 80(%rsp),%r10 + vpxor 32(%rbp),%xmm4,%xmm4 + vmovups %xmm5,-16(%r11) + subq 88(%rsp),%r11 + vpxor 48(%rbp),%xmm5,%xmm5 + vmovups %xmm6,-16(%r12) + subq 96(%rsp),%r12 + vpxor %xmm10,%xmm6,%xmm6 + vmovups %xmm7,-16(%r13) + subq 104(%rsp),%r13 + vpxor %xmm11,%xmm7,%xmm7 + vmovups %xmm8,-16(%r14) + subq 112(%rsp),%r14 + vpxor %xmm12,%xmm8,%xmm8 + vmovups %xmm9,-16(%r15) + subq 120(%rsp),%r15 + vpxor %xmm13,%xmm9,%xmm9 + + decl %edx + jnz .Loop_enc8x + + movq 16(%rsp),%rax + + + + + +.Lenc8x_done: + vzeroupper + movq -48(%rax),%r15 + movq -40(%rax),%r14 + movq -32(%rax),%r13 + movq -24(%rax),%r12 + movq -16(%rax),%rbp + movq -8(%rax),%rbx + leaq (%rax),%rsp +.Lenc8x_epilogue: + .byte 0xf3,0xc3 +.size aesni_multi_cbc_encrypt_avx,.-aesni_multi_cbc_encrypt_avx + +.type aesni_multi_cbc_decrypt_avx,@function +.align 32 +aesni_multi_cbc_decrypt_avx: +_avx_cbc_dec_shortcut: + movq %rsp,%rax + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + + + + + + + + + + subq $256,%rsp + andq $-256,%rsp + subq $192,%rsp + movq %rax,16(%rsp) + +.Ldec8x_body: + vzeroupper + vmovdqu (%rsi),%xmm15 + leaq 120(%rsi),%rsi + leaq 160(%rdi),%rdi + shrl $1,%edx + +.Ldec8x_loop_grande: + + xorl %edx,%edx + movl -144(%rdi),%ecx + movq -160(%rdi),%r8 + cmpl %edx,%ecx + movq -152(%rdi),%rbx + cmovgl %ecx,%edx + testl %ecx,%ecx + vmovdqu -136(%rdi),%xmm2 + movl %ecx,32(%rsp) + cmovleq %rsp,%r8 + subq %r8,%rbx + movq %rbx,64(%rsp) + vmovdqu %xmm2,192(%rsp) + movl -104(%rdi),%ecx + movq -120(%rdi),%r9 + cmpl %edx,%ecx + movq -112(%rdi),%rbp + cmovgl %ecx,%edx + testl %ecx,%ecx + vmovdqu -96(%rdi),%xmm3 + movl %ecx,36(%rsp) + cmovleq %rsp,%r9 + subq %r9,%rbp + movq %rbp,72(%rsp) + vmovdqu %xmm3,208(%rsp) + movl -64(%rdi),%ecx + movq -80(%rdi),%r10 + cmpl %edx,%ecx + movq -72(%rdi),%rbp + cmovgl %ecx,%edx + testl %ecx,%ecx + vmovdqu -56(%rdi),%xmm4 + movl %ecx,40(%rsp) + cmovleq %rsp,%r10 + subq %r10,%rbp + movq %rbp,80(%rsp) + vmovdqu %xmm4,224(%rsp) + movl -24(%rdi),%ecx + movq -40(%rdi),%r11 + cmpl %edx,%ecx + movq -32(%rdi),%rbp + cmovgl %ecx,%edx + testl %ecx,%ecx + vmovdqu -16(%rdi),%xmm5 + movl %ecx,44(%rsp) + cmovleq %rsp,%r11 + subq %r11,%rbp + movq %rbp,88(%rsp) + vmovdqu %xmm5,240(%rsp) + movl 16(%rdi),%ecx + movq 0(%rdi),%r12 + cmpl %edx,%ecx + movq 8(%rdi),%rbp + cmovgl %ecx,%edx + testl %ecx,%ecx + vmovdqu 24(%rdi),%xmm6 + movl %ecx,48(%rsp) + cmovleq %rsp,%r12 + subq %r12,%rbp + movq %rbp,96(%rsp) + vmovdqu %xmm6,256(%rsp) + movl 56(%rdi),%ecx + movq 40(%rdi),%r13 + cmpl %edx,%ecx + movq 48(%rdi),%rbp + cmovgl %ecx,%edx + testl %ecx,%ecx + vmovdqu 64(%rdi),%xmm7 + movl %ecx,52(%rsp) + cmovleq %rsp,%r13 + subq %r13,%rbp + movq %rbp,104(%rsp) + vmovdqu %xmm7,272(%rsp) + movl 96(%rdi),%ecx + movq 80(%rdi),%r14 + cmpl %edx,%ecx + movq 88(%rdi),%rbp + cmovgl %ecx,%edx + testl %ecx,%ecx + vmovdqu 104(%rdi),%xmm8 + movl %ecx,56(%rsp) + cmovleq %rsp,%r14 + subq %r14,%rbp + movq %rbp,112(%rsp) + vmovdqu %xmm8,288(%rsp) + movl 136(%rdi),%ecx + movq 120(%rdi),%r15 + cmpl %edx,%ecx + movq 128(%rdi),%rbp + cmovgl %ecx,%edx + testl %ecx,%ecx + vmovdqu 144(%rdi),%xmm9 + movl %ecx,60(%rsp) + cmovleq %rsp,%r15 + subq %r15,%rbp + movq %rbp,120(%rsp) + vmovdqu %xmm9,304(%rsp) + testl %edx,%edx + jz .Ldec8x_done + + vmovups 16-120(%rsi),%xmm1 + vmovups 32-120(%rsi),%xmm0 + movl 240-120(%rsi),%eax + leaq 192+128(%rsp),%rbp + + vmovdqu (%r8),%xmm2 + vmovdqu (%r9),%xmm3 + vmovdqu (%r10),%xmm4 + vmovdqu (%r11),%xmm5 + vmovdqu (%r12),%xmm6 + vmovdqu (%r13),%xmm7 + vmovdqu (%r14),%xmm8 + vmovdqu (%r15),%xmm9 + vmovdqu %xmm2,0(%rbp) + vpxor %xmm15,%xmm2,%xmm2 + vmovdqu %xmm3,16(%rbp) + vpxor %xmm15,%xmm3,%xmm3 + vmovdqu %xmm4,32(%rbp) + vpxor %xmm15,%xmm4,%xmm4 + vmovdqu %xmm5,48(%rbp) + vpxor %xmm15,%xmm5,%xmm5 + vmovdqu %xmm6,64(%rbp) + vpxor %xmm15,%xmm6,%xmm6 + vmovdqu %xmm7,80(%rbp) + vpxor %xmm15,%xmm7,%xmm7 + vmovdqu %xmm8,96(%rbp) + vpxor %xmm15,%xmm8,%xmm8 + vmovdqu %xmm9,112(%rbp) + vpxor %xmm15,%xmm9,%xmm9 + xorq $128,%rbp + movl $1,%ecx + jmp .Loop_dec8x + +.align 32 +.Loop_dec8x: + vaesdec %xmm1,%xmm2,%xmm2 + cmpl 32+0(%rsp),%ecx + vaesdec %xmm1,%xmm3,%xmm3 + prefetcht0 31(%r8) + vaesdec %xmm1,%xmm4,%xmm4 + vaesdec %xmm1,%xmm5,%xmm5 + leaq (%r8,%rbx,1),%rbx + cmovgeq %rsp,%r8 + vaesdec %xmm1,%xmm6,%xmm6 + cmovgq %rsp,%rbx + vaesdec %xmm1,%xmm7,%xmm7 + subq %r8,%rbx + vaesdec %xmm1,%xmm8,%xmm8 + vmovdqu 16(%r8),%xmm10 + movq %rbx,64+0(%rsp) + vaesdec %xmm1,%xmm9,%xmm9 + vmovups -72(%rsi),%xmm1 + leaq 16(%r8,%rbx,1),%r8 + vmovdqu %xmm10,128(%rsp) + vaesdec %xmm0,%xmm2,%xmm2 + cmpl 32+4(%rsp),%ecx + movq 64+8(%rsp),%rbx + vaesdec %xmm0,%xmm3,%xmm3 + prefetcht0 31(%r9) + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + leaq (%r9,%rbx,1),%rbx + cmovgeq %rsp,%r9 + vaesdec %xmm0,%xmm6,%xmm6 + cmovgq %rsp,%rbx + vaesdec %xmm0,%xmm7,%xmm7 + subq %r9,%rbx + vaesdec %xmm0,%xmm8,%xmm8 + vmovdqu 16(%r9),%xmm11 + movq %rbx,64+8(%rsp) + vaesdec %xmm0,%xmm9,%xmm9 + vmovups -56(%rsi),%xmm0 + leaq 16(%r9,%rbx,1),%r9 + vmovdqu %xmm11,144(%rsp) + vaesdec %xmm1,%xmm2,%xmm2 + cmpl 32+8(%rsp),%ecx + movq 64+16(%rsp),%rbx + vaesdec %xmm1,%xmm3,%xmm3 + prefetcht0 31(%r10) + vaesdec %xmm1,%xmm4,%xmm4 + prefetcht0 15(%r8) + vaesdec %xmm1,%xmm5,%xmm5 + leaq (%r10,%rbx,1),%rbx + cmovgeq %rsp,%r10 + vaesdec %xmm1,%xmm6,%xmm6 + cmovgq %rsp,%rbx + vaesdec %xmm1,%xmm7,%xmm7 + subq %r10,%rbx + vaesdec %xmm1,%xmm8,%xmm8 + vmovdqu 16(%r10),%xmm12 + movq %rbx,64+16(%rsp) + vaesdec %xmm1,%xmm9,%xmm9 + vmovups -40(%rsi),%xmm1 + leaq 16(%r10,%rbx,1),%r10 + vmovdqu %xmm12,160(%rsp) + vaesdec %xmm0,%xmm2,%xmm2 + cmpl 32+12(%rsp),%ecx + movq 64+24(%rsp),%rbx + vaesdec %xmm0,%xmm3,%xmm3 + prefetcht0 31(%r11) + vaesdec %xmm0,%xmm4,%xmm4 + prefetcht0 15(%r9) + vaesdec %xmm0,%xmm5,%xmm5 + leaq (%r11,%rbx,1),%rbx + cmovgeq %rsp,%r11 + vaesdec %xmm0,%xmm6,%xmm6 + cmovgq %rsp,%rbx + vaesdec %xmm0,%xmm7,%xmm7 + subq %r11,%rbx + vaesdec %xmm0,%xmm8,%xmm8 + vmovdqu 16(%r11),%xmm13 + movq %rbx,64+24(%rsp) + vaesdec %xmm0,%xmm9,%xmm9 + vmovups -24(%rsi),%xmm0 + leaq 16(%r11,%rbx,1),%r11 + vmovdqu %xmm13,176(%rsp) + vaesdec %xmm1,%xmm2,%xmm2 + cmpl 32+16(%rsp),%ecx + movq 64+32(%rsp),%rbx + vaesdec %xmm1,%xmm3,%xmm3 + prefetcht0 31(%r12) + vaesdec %xmm1,%xmm4,%xmm4 + prefetcht0 15(%r10) + vaesdec %xmm1,%xmm5,%xmm5 + leaq (%r12,%rbx,1),%rbx + cmovgeq %rsp,%r12 + vaesdec %xmm1,%xmm6,%xmm6 + cmovgq %rsp,%rbx + vaesdec %xmm1,%xmm7,%xmm7 + subq %r12,%rbx + vaesdec %xmm1,%xmm8,%xmm8 + vmovdqu 16(%r12),%xmm10 + movq %rbx,64+32(%rsp) + vaesdec %xmm1,%xmm9,%xmm9 + vmovups -8(%rsi),%xmm1 + leaq 16(%r12,%rbx,1),%r12 + vaesdec %xmm0,%xmm2,%xmm2 + cmpl 32+20(%rsp),%ecx + movq 64+40(%rsp),%rbx + vaesdec %xmm0,%xmm3,%xmm3 + prefetcht0 31(%r13) + vaesdec %xmm0,%xmm4,%xmm4 + prefetcht0 15(%r11) + vaesdec %xmm0,%xmm5,%xmm5 + leaq (%rbx,%r13,1),%rbx + cmovgeq %rsp,%r13 + vaesdec %xmm0,%xmm6,%xmm6 + cmovgq %rsp,%rbx + vaesdec %xmm0,%xmm7,%xmm7 + subq %r13,%rbx + vaesdec %xmm0,%xmm8,%xmm8 + vmovdqu 16(%r13),%xmm11 + movq %rbx,64+40(%rsp) + vaesdec %xmm0,%xmm9,%xmm9 + vmovups 8(%rsi),%xmm0 + leaq 16(%r13,%rbx,1),%r13 + vaesdec %xmm1,%xmm2,%xmm2 + cmpl 32+24(%rsp),%ecx + movq 64+48(%rsp),%rbx + vaesdec %xmm1,%xmm3,%xmm3 + prefetcht0 31(%r14) + vaesdec %xmm1,%xmm4,%xmm4 + prefetcht0 15(%r12) + vaesdec %xmm1,%xmm5,%xmm5 + leaq (%r14,%rbx,1),%rbx + cmovgeq %rsp,%r14 + vaesdec %xmm1,%xmm6,%xmm6 + cmovgq %rsp,%rbx + vaesdec %xmm1,%xmm7,%xmm7 + subq %r14,%rbx + vaesdec %xmm1,%xmm8,%xmm8 + vmovdqu 16(%r14),%xmm12 + movq %rbx,64+48(%rsp) + vaesdec %xmm1,%xmm9,%xmm9 + vmovups 24(%rsi),%xmm1 + leaq 16(%r14,%rbx,1),%r14 + vaesdec %xmm0,%xmm2,%xmm2 + cmpl 32+28(%rsp),%ecx + movq 64+56(%rsp),%rbx + vaesdec %xmm0,%xmm3,%xmm3 + prefetcht0 31(%r15) + vaesdec %xmm0,%xmm4,%xmm4 + prefetcht0 15(%r13) + vaesdec %xmm0,%xmm5,%xmm5 + leaq (%r15,%rbx,1),%rbx + cmovgeq %rsp,%r15 + vaesdec %xmm0,%xmm6,%xmm6 + cmovgq %rsp,%rbx + vaesdec %xmm0,%xmm7,%xmm7 + subq %r15,%rbx + vaesdec %xmm0,%xmm8,%xmm8 + vmovdqu 16(%r15),%xmm13 + movq %rbx,64+56(%rsp) + vaesdec %xmm0,%xmm9,%xmm9 + vmovups 40(%rsi),%xmm0 + leaq 16(%r15,%rbx,1),%r15 + vmovdqu 32(%rsp),%xmm14 + prefetcht0 15(%r14) + prefetcht0 15(%r15) + cmpl $11,%eax + jb .Ldec8x_tail + + vaesdec %xmm1,%xmm2,%xmm2 + vaesdec %xmm1,%xmm3,%xmm3 + vaesdec %xmm1,%xmm4,%xmm4 + vaesdec %xmm1,%xmm5,%xmm5 + vaesdec %xmm1,%xmm6,%xmm6 + vaesdec %xmm1,%xmm7,%xmm7 + vaesdec %xmm1,%xmm8,%xmm8 + vaesdec %xmm1,%xmm9,%xmm9 + vmovups 176-120(%rsi),%xmm1 + + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vaesdec %xmm0,%xmm7,%xmm7 + vaesdec %xmm0,%xmm8,%xmm8 + vaesdec %xmm0,%xmm9,%xmm9 + vmovups 192-120(%rsi),%xmm0 + je .Ldec8x_tail + + vaesdec %xmm1,%xmm2,%xmm2 + vaesdec %xmm1,%xmm3,%xmm3 + vaesdec %xmm1,%xmm4,%xmm4 + vaesdec %xmm1,%xmm5,%xmm5 + vaesdec %xmm1,%xmm6,%xmm6 + vaesdec %xmm1,%xmm7,%xmm7 + vaesdec %xmm1,%xmm8,%xmm8 + vaesdec %xmm1,%xmm9,%xmm9 + vmovups 208-120(%rsi),%xmm1 + + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vaesdec %xmm0,%xmm7,%xmm7 + vaesdec %xmm0,%xmm8,%xmm8 + vaesdec %xmm0,%xmm9,%xmm9 + vmovups 224-120(%rsi),%xmm0 + +.Ldec8x_tail: + vaesdec %xmm1,%xmm2,%xmm2 + vpxor %xmm15,%xmm15,%xmm15 + vaesdec %xmm1,%xmm3,%xmm3 + vaesdec %xmm1,%xmm4,%xmm4 + vpcmpgtd %xmm15,%xmm14,%xmm15 + vaesdec %xmm1,%xmm5,%xmm5 + vaesdec %xmm1,%xmm6,%xmm6 + vpaddd %xmm14,%xmm15,%xmm15 + vmovdqu 48(%rsp),%xmm14 + vaesdec %xmm1,%xmm7,%xmm7 + movq 64(%rsp),%rbx + vaesdec %xmm1,%xmm8,%xmm8 + vaesdec %xmm1,%xmm9,%xmm9 + vmovups 16-120(%rsi),%xmm1 + + vaesdeclast %xmm0,%xmm2,%xmm2 + vmovdqa %xmm15,32(%rsp) + vpxor %xmm15,%xmm15,%xmm15 + vaesdeclast %xmm0,%xmm3,%xmm3 + vpxor 0(%rbp),%xmm2,%xmm2 + vaesdeclast %xmm0,%xmm4,%xmm4 + vpxor 16(%rbp),%xmm3,%xmm3 + vpcmpgtd %xmm15,%xmm14,%xmm15 + vaesdeclast %xmm0,%xmm5,%xmm5 + vpxor 32(%rbp),%xmm4,%xmm4 + vaesdeclast %xmm0,%xmm6,%xmm6 + vpxor 48(%rbp),%xmm5,%xmm5 + vpaddd %xmm15,%xmm14,%xmm14 + vmovdqu -120(%rsi),%xmm15 + vaesdeclast %xmm0,%xmm7,%xmm7 + vpxor 64(%rbp),%xmm6,%xmm6 + vaesdeclast %xmm0,%xmm8,%xmm8 + vpxor 80(%rbp),%xmm7,%xmm7 + vmovdqa %xmm14,48(%rsp) + vaesdeclast %xmm0,%xmm9,%xmm9 + vpxor 96(%rbp),%xmm8,%xmm8 + vmovups 32-120(%rsi),%xmm0 + + vmovups %xmm2,-16(%r8) + subq %rbx,%r8 + vmovdqu 128+0(%rsp),%xmm2 + vpxor 112(%rbp),%xmm9,%xmm9 + vmovups %xmm3,-16(%r9) + subq 72(%rsp),%r9 + vmovdqu %xmm2,0(%rbp) + vpxor %xmm15,%xmm2,%xmm2 + vmovdqu 128+16(%rsp),%xmm3 + vmovups %xmm4,-16(%r10) + subq 80(%rsp),%r10 + vmovdqu %xmm3,16(%rbp) + vpxor %xmm15,%xmm3,%xmm3 + vmovdqu 128+32(%rsp),%xmm4 + vmovups %xmm5,-16(%r11) + subq 88(%rsp),%r11 + vmovdqu %xmm4,32(%rbp) + vpxor %xmm15,%xmm4,%xmm4 + vmovdqu 128+48(%rsp),%xmm5 + vmovups %xmm6,-16(%r12) + subq 96(%rsp),%r12 + vmovdqu %xmm5,48(%rbp) + vpxor %xmm15,%xmm5,%xmm5 + vmovdqu %xmm10,64(%rbp) + vpxor %xmm10,%xmm15,%xmm6 + vmovups %xmm7,-16(%r13) + subq 104(%rsp),%r13 + vmovdqu %xmm11,80(%rbp) + vpxor %xmm11,%xmm15,%xmm7 + vmovups %xmm8,-16(%r14) + subq 112(%rsp),%r14 + vmovdqu %xmm12,96(%rbp) + vpxor %xmm12,%xmm15,%xmm8 + vmovups %xmm9,-16(%r15) + subq 120(%rsp),%r15 + vmovdqu %xmm13,112(%rbp) + vpxor %xmm13,%xmm15,%xmm9 + + xorq $128,%rbp + decl %edx + jnz .Loop_dec8x + + movq 16(%rsp),%rax + + + + + +.Ldec8x_done: + vzeroupper + movq -48(%rax),%r15 + movq -40(%rax),%r14 + movq -32(%rax),%r13 + movq -24(%rax),%r12 + movq -16(%rax),%rbp + movq -8(%rax),%rbx + leaq (%rax),%rsp +.Ldec8x_epilogue: + .byte 0xf3,0xc3 +.size aesni_multi_cbc_decrypt_avx,.-aesni_multi_cbc_decrypt_avx diff --git a/deps/openssl/asm/x64-elf-gas/aes/aesni-sha1-x86_64.s b/deps/openssl/asm/x64-elf-gas/aes/aesni-sha1-x86_64.s index 8f0475e0d28bcf..88042248685372 100644 --- a/deps/openssl/asm/x64-elf-gas/aes/aesni-sha1-x86_64.s +++ b/deps/openssl/asm/x64-elf-gas/aes/aesni-sha1-x86_64.s @@ -1,19 +1,25 @@ .text - .globl aesni_cbc_sha1_enc .type aesni_cbc_sha1_enc,@function -.align 16 +.align 32 aesni_cbc_sha1_enc: movl OPENSSL_ia32cap_P+0(%rip),%r10d - movl OPENSSL_ia32cap_P+4(%rip),%r11d + movq OPENSSL_ia32cap_P+4(%rip),%r11 + btq $61,%r11 + jc aesni_cbc_sha1_enc_shaext + andl $268435456,%r11d + andl $1073741824,%r10d + orl %r11d,%r10d + cmpl $1342177280,%r10d + je aesni_cbc_sha1_enc_avx jmp aesni_cbc_sha1_enc_ssse3 .byte 0xf3,0xc3 .size aesni_cbc_sha1_enc,.-aesni_cbc_sha1_enc .type aesni_cbc_sha1_enc_ssse3,@function -.align 16 +.align 32 aesni_cbc_sha1_enc_ssse3: movq 8(%rsp),%r10 @@ -30,12 +36,12 @@ aesni_cbc_sha1_enc_ssse3: movq %rdi,%r12 movq %rsi,%r13 movq %rdx,%r14 - movq %rcx,%r15 - movdqu (%r8),%xmm11 + leaq 112(%rcx),%r15 + movdqu (%r8),%xmm2 movq %r8,88(%rsp) shlq $6,%r14 subq %r12,%r13 - movl 240(%r15),%r8d + movl 240-112(%r15),%r8d addq %r10,%r14 leaq K_XX_XX(%rip),%r11 @@ -45,1188 +51,2464 @@ aesni_cbc_sha1_enc_ssse3: movl 12(%r9),%edx movl %ebx,%esi movl 16(%r9),%ebp + movl %ecx,%edi + xorl %edx,%edi + andl %edi,%esi - movdqa 64(%r11),%xmm6 - movdqa 0(%r11),%xmm9 - movdqu 0(%r10),%xmm0 - movdqu 16(%r10),%xmm1 - movdqu 32(%r10),%xmm2 - movdqu 48(%r10),%xmm3 -.byte 102,15,56,0,198 + movdqa 64(%r11),%xmm3 + movdqa 0(%r11),%xmm13 + movdqu 0(%r10),%xmm4 + movdqu 16(%r10),%xmm5 + movdqu 32(%r10),%xmm6 + movdqu 48(%r10),%xmm7 +.byte 102,15,56,0,227 +.byte 102,15,56,0,235 +.byte 102,15,56,0,243 addq $64,%r10 -.byte 102,15,56,0,206 -.byte 102,15,56,0,214 -.byte 102,15,56,0,222 - paddd %xmm9,%xmm0 - paddd %xmm9,%xmm1 - paddd %xmm9,%xmm2 - movdqa %xmm0,0(%rsp) - psubd %xmm9,%xmm0 - movdqa %xmm1,16(%rsp) - psubd %xmm9,%xmm1 - movdqa %xmm2,32(%rsp) - psubd %xmm9,%xmm2 - movups (%r15),%xmm13 - movups 16(%r15),%xmm14 + paddd %xmm13,%xmm4 +.byte 102,15,56,0,251 + paddd %xmm13,%xmm5 + paddd %xmm13,%xmm6 + movdqa %xmm4,0(%rsp) + psubd %xmm13,%xmm4 + movdqa %xmm5,16(%rsp) + psubd %xmm13,%xmm5 + movdqa %xmm6,32(%rsp) + psubd %xmm13,%xmm6 + movups -112(%r15),%xmm15 + movups 16-112(%r15),%xmm0 jmp .Loop_ssse3 -.align 16 +.align 32 .Loop_ssse3: - movdqa %xmm1,%xmm4 - addl 0(%rsp),%ebp - movups 0(%r12),%xmm12 - xorps %xmm13,%xmm12 - xorps %xmm12,%xmm11 -.byte 102,69,15,56,220,222 - movups 32(%r15),%xmm15 - xorl %edx,%ecx - movdqa %xmm3,%xmm8 -.byte 102,15,58,15,224,8 + rorl $2,%ebx + movups 0(%r12),%xmm14 + xorps %xmm15,%xmm14 + xorps %xmm14,%xmm2 + movups -80(%r15),%xmm1 +.byte 102,15,56,220,208 + pshufd $238,%xmm4,%xmm8 + xorl %edx,%esi + movdqa %xmm7,%xmm12 + paddd %xmm7,%xmm13 movl %eax,%edi + addl 0(%rsp),%ebp + punpcklqdq %xmm5,%xmm8 + xorl %ecx,%ebx roll $5,%eax - paddd %xmm3,%xmm9 - andl %ecx,%esi - xorl %edx,%ecx - psrldq $4,%xmm8 - xorl %edx,%esi - addl %eax,%ebp - pxor %xmm0,%xmm4 - rorl $2,%ebx addl %esi,%ebp - pxor %xmm2,%xmm8 - addl 4(%rsp),%edx - xorl %ecx,%ebx - movl %ebp,%esi - roll $5,%ebp - pxor %xmm8,%xmm4 + psrldq $4,%xmm12 andl %ebx,%edi xorl %ecx,%ebx - movdqa %xmm9,48(%rsp) - xorl %ecx,%edi -.byte 102,69,15,56,220,223 - movups 48(%r15),%xmm14 - addl %ebp,%edx - movdqa %xmm4,%xmm10 - movdqa %xmm4,%xmm8 + pxor %xmm4,%xmm8 + addl %eax,%ebp rorl $7,%eax - addl %edi,%edx - addl 8(%rsp),%ecx + pxor %xmm6,%xmm12 + xorl %ecx,%edi + movl %ebp,%esi + addl 4(%rsp),%edx + pxor %xmm12,%xmm8 xorl %ebx,%eax - pslldq $12,%xmm10 - paddd %xmm4,%xmm4 - movl %edx,%edi - roll $5,%edx + roll $5,%ebp + movdqa %xmm13,48(%rsp) + addl %edi,%edx + movups -64(%r15),%xmm0 +.byte 102,15,56,220,209 andl %eax,%esi + movdqa %xmm8,%xmm3 xorl %ebx,%eax - psrld $31,%xmm8 - xorl %ebx,%esi - addl %edx,%ecx - movdqa %xmm10,%xmm9 + addl %ebp,%edx rorl $7,%ebp - addl %esi,%ecx - psrld $30,%xmm10 - por %xmm8,%xmm4 - addl 12(%rsp),%ebx + movdqa %xmm8,%xmm12 + xorl %ebx,%esi + pslldq $12,%xmm3 + paddd %xmm8,%xmm8 + movl %edx,%edi + addl 8(%rsp),%ecx + psrld $31,%xmm12 xorl %eax,%ebp - movl %ecx,%esi - roll $5,%ecx -.byte 102,69,15,56,220,222 - movups 64(%r15),%xmm15 - pslld $2,%xmm9 - pxor %xmm10,%xmm4 + roll $5,%edx + addl %esi,%ecx + movdqa %xmm3,%xmm13 andl %ebp,%edi xorl %eax,%ebp - movdqa 0(%r11),%xmm10 - xorl %eax,%edi - addl %ecx,%ebx - pxor %xmm9,%xmm4 + psrld $30,%xmm3 + addl %edx,%ecx rorl $7,%edx - addl %edi,%ebx - movdqa %xmm2,%xmm5 - addl 16(%rsp),%eax + por %xmm12,%xmm8 + xorl %eax,%edi + movl %ecx,%esi + addl 12(%rsp),%ebx + movups -48(%r15),%xmm1 +.byte 102,15,56,220,208 + pslld $2,%xmm13 + pxor %xmm3,%xmm8 xorl %ebp,%edx - movdqa %xmm4,%xmm9 -.byte 102,15,58,15,233,8 - movl %ebx,%edi - roll $5,%ebx - paddd %xmm4,%xmm10 + movdqa 0(%r11),%xmm3 + roll $5,%ecx + addl %edi,%ebx andl %edx,%esi + pxor %xmm13,%xmm8 xorl %ebp,%edx - psrldq $4,%xmm9 - xorl %ebp,%esi - addl %ebx,%eax - pxor %xmm1,%xmm5 + addl %ecx,%ebx rorl $7,%ecx - addl %esi,%eax - pxor %xmm3,%xmm9 - addl 20(%rsp),%ebp -.byte 102,69,15,56,220,223 - movups 80(%r15),%xmm14 + pshufd $238,%xmm5,%xmm9 + xorl %ebp,%esi + movdqa %xmm8,%xmm13 + paddd %xmm8,%xmm3 + movl %ebx,%edi + addl 16(%rsp),%eax + punpcklqdq %xmm6,%xmm9 xorl %edx,%ecx - movl %eax,%esi - roll $5,%eax - pxor %xmm9,%xmm5 + roll $5,%ebx + addl %esi,%eax + psrldq $4,%xmm13 andl %ecx,%edi xorl %edx,%ecx - movdqa %xmm10,0(%rsp) - xorl %edx,%edi - addl %eax,%ebp - movdqa %xmm5,%xmm8 - movdqa %xmm5,%xmm9 + pxor %xmm5,%xmm9 + addl %ebx,%eax rorl $7,%ebx - addl %edi,%ebp - addl 24(%rsp),%edx + movups -32(%r15),%xmm0 +.byte 102,15,56,220,209 + pxor %xmm7,%xmm13 + xorl %edx,%edi + movl %eax,%esi + addl 20(%rsp),%ebp + pxor %xmm13,%xmm9 xorl %ecx,%ebx - pslldq $12,%xmm8 - paddd %xmm5,%xmm5 - movl %ebp,%edi - roll $5,%ebp + roll $5,%eax + movdqa %xmm3,0(%rsp) + addl %edi,%ebp andl %ebx,%esi + movdqa %xmm9,%xmm12 xorl %ecx,%ebx - psrld $31,%xmm9 - xorl %ecx,%esi -.byte 102,69,15,56,220,222 - movups 96(%r15),%xmm15 - addl %ebp,%edx - movdqa %xmm8,%xmm10 + addl %eax,%ebp rorl $7,%eax - addl %esi,%edx - psrld $30,%xmm8 - por %xmm9,%xmm5 - addl 28(%rsp),%ecx + movdqa %xmm9,%xmm13 + xorl %ecx,%esi + pslldq $12,%xmm12 + paddd %xmm9,%xmm9 + movl %ebp,%edi + addl 24(%rsp),%edx + psrld $31,%xmm13 xorl %ebx,%eax - movl %edx,%esi - roll $5,%edx - pslld $2,%xmm10 - pxor %xmm8,%xmm5 + roll $5,%ebp + addl %esi,%edx + movups -16(%r15),%xmm1 +.byte 102,15,56,220,208 + movdqa %xmm12,%xmm3 andl %eax,%edi xorl %ebx,%eax - movdqa 16(%r11),%xmm8 - xorl %ebx,%edi - addl %edx,%ecx - pxor %xmm10,%xmm5 + psrld $30,%xmm12 + addl %ebp,%edx rorl $7,%ebp - addl %edi,%ecx - movdqa %xmm3,%xmm6 - addl 32(%rsp),%ebx + por %xmm13,%xmm9 + xorl %ebx,%edi + movl %edx,%esi + addl 28(%rsp),%ecx + pslld $2,%xmm3 + pxor %xmm12,%xmm9 xorl %eax,%ebp - movdqa %xmm5,%xmm10 -.byte 102,15,58,15,242,8 - movl %ecx,%edi - roll $5,%ecx -.byte 102,69,15,56,220,223 - movups 112(%r15),%xmm14 - paddd %xmm5,%xmm8 + movdqa 16(%r11),%xmm12 + roll $5,%edx + addl %edi,%ecx andl %ebp,%esi + pxor %xmm3,%xmm9 xorl %eax,%ebp - psrldq $4,%xmm10 - xorl %eax,%esi - addl %ecx,%ebx - pxor %xmm2,%xmm6 + addl %edx,%ecx rorl $7,%edx - addl %esi,%ebx - pxor %xmm4,%xmm10 - addl 36(%rsp),%eax + pshufd $238,%xmm6,%xmm10 + xorl %eax,%esi + movdqa %xmm9,%xmm3 + paddd %xmm9,%xmm12 + movl %ecx,%edi + addl 32(%rsp),%ebx + movups 0(%r15),%xmm0 +.byte 102,15,56,220,209 + punpcklqdq %xmm7,%xmm10 xorl %ebp,%edx - movl %ebx,%esi - roll $5,%ebx - pxor %xmm10,%xmm6 + roll $5,%ecx + addl %esi,%ebx + psrldq $4,%xmm3 andl %edx,%edi xorl %ebp,%edx - movdqa %xmm8,16(%rsp) - xorl %ebp,%edi - addl %ebx,%eax - movdqa %xmm6,%xmm9 - movdqa %xmm6,%xmm10 + pxor %xmm6,%xmm10 + addl %ecx,%ebx rorl $7,%ecx - addl %edi,%eax - addl 40(%rsp),%ebp -.byte 102,69,15,56,220,222 - movups 128(%r15),%xmm15 + pxor %xmm8,%xmm3 + xorl %ebp,%edi + movl %ebx,%esi + addl 36(%rsp),%eax + pxor %xmm3,%xmm10 xorl %edx,%ecx - pslldq $12,%xmm9 - paddd %xmm6,%xmm6 - movl %eax,%edi - roll $5,%eax + roll $5,%ebx + movdqa %xmm12,16(%rsp) + addl %edi,%eax andl %ecx,%esi + movdqa %xmm10,%xmm13 xorl %edx,%ecx - psrld $31,%xmm10 - xorl %edx,%esi - addl %eax,%ebp - movdqa %xmm9,%xmm8 + addl %ebx,%eax rorl $7,%ebx - addl %esi,%ebp - psrld $30,%xmm9 - por %xmm10,%xmm6 - addl 44(%rsp),%edx + movups 16(%r15),%xmm1 +.byte 102,15,56,220,208 + movdqa %xmm10,%xmm3 + xorl %edx,%esi + pslldq $12,%xmm13 + paddd %xmm10,%xmm10 + movl %eax,%edi + addl 40(%rsp),%ebp + psrld $31,%xmm3 xorl %ecx,%ebx - movl %ebp,%esi - roll $5,%ebp - pslld $2,%xmm8 - pxor %xmm9,%xmm6 + roll $5,%eax + addl %esi,%ebp + movdqa %xmm13,%xmm12 andl %ebx,%edi xorl %ecx,%ebx - movdqa 16(%r11),%xmm9 - xorl %ecx,%edi -.byte 102,69,15,56,220,223 - movups 144(%r15),%xmm14 - addl %ebp,%edx - pxor %xmm8,%xmm6 + psrld $30,%xmm13 + addl %eax,%ebp rorl $7,%eax - addl %edi,%edx - movdqa %xmm4,%xmm7 - addl 48(%rsp),%ecx + por %xmm3,%xmm10 + xorl %ecx,%edi + movl %ebp,%esi + addl 44(%rsp),%edx + pslld $2,%xmm12 + pxor %xmm13,%xmm10 xorl %ebx,%eax - movdqa %xmm6,%xmm8 -.byte 102,15,58,15,251,8 - movl %edx,%edi - roll $5,%edx - paddd %xmm6,%xmm9 + movdqa 16(%r11),%xmm13 + roll $5,%ebp + addl %edi,%edx + movups 32(%r15),%xmm0 +.byte 102,15,56,220,209 andl %eax,%esi + pxor %xmm12,%xmm10 xorl %ebx,%eax - psrldq $4,%xmm8 - xorl %ebx,%esi - addl %edx,%ecx - pxor %xmm3,%xmm7 + addl %ebp,%edx rorl $7,%ebp - addl %esi,%ecx - pxor %xmm5,%xmm8 - addl 52(%rsp),%ebx + pshufd $238,%xmm7,%xmm11 + xorl %ebx,%esi + movdqa %xmm10,%xmm12 + paddd %xmm10,%xmm13 + movl %edx,%edi + addl 48(%rsp),%ecx + punpcklqdq %xmm8,%xmm11 xorl %eax,%ebp - movl %ecx,%esi - roll $5,%ecx -.byte 102,69,15,56,220,222 - movups 160(%r15),%xmm15 - pxor %xmm8,%xmm7 + roll $5,%edx + addl %esi,%ecx + psrldq $4,%xmm12 andl %ebp,%edi xorl %eax,%ebp - movdqa %xmm9,32(%rsp) - xorl %eax,%edi - addl %ecx,%ebx - movdqa %xmm7,%xmm10 - movdqa %xmm7,%xmm8 + pxor %xmm7,%xmm11 + addl %edx,%ecx rorl $7,%edx - addl %edi,%ebx - addl 56(%rsp),%eax + pxor %xmm9,%xmm12 + xorl %eax,%edi + movl %ecx,%esi + addl 52(%rsp),%ebx + movups 48(%r15),%xmm1 +.byte 102,15,56,220,208 + pxor %xmm12,%xmm11 xorl %ebp,%edx - pslldq $12,%xmm10 - paddd %xmm7,%xmm7 - movl %ebx,%edi - roll $5,%ebx + roll $5,%ecx + movdqa %xmm13,32(%rsp) + addl %edi,%ebx andl %edx,%esi + movdqa %xmm11,%xmm3 xorl %ebp,%edx - psrld $31,%xmm8 - xorl %ebp,%esi - addl %ebx,%eax - movdqa %xmm10,%xmm9 + addl %ecx,%ebx rorl $7,%ecx + movdqa %xmm11,%xmm12 + xorl %ebp,%esi + pslldq $12,%xmm3 + paddd %xmm11,%xmm11 + movl %ebx,%edi + addl 56(%rsp),%eax + psrld $31,%xmm12 + xorl %edx,%ecx + roll $5,%ebx addl %esi,%eax - psrld $30,%xmm10 - por %xmm8,%xmm7 - addl 60(%rsp),%ebp + movdqa %xmm3,%xmm13 + andl %ecx,%edi + xorl %edx,%ecx + psrld $30,%xmm3 + addl %ebx,%eax + rorl $7,%ebx cmpl $11,%r8d jb .Laesenclast1 - movups 176(%r15),%xmm14 -.byte 102,69,15,56,220,223 - movups 192(%r15),%xmm15 -.byte 102,69,15,56,220,222 + movups 64(%r15),%xmm0 +.byte 102,15,56,220,209 + movups 80(%r15),%xmm1 +.byte 102,15,56,220,208 je .Laesenclast1 - movups 208(%r15),%xmm14 -.byte 102,69,15,56,220,223 - movups 224(%r15),%xmm15 -.byte 102,69,15,56,220,222 + movups 96(%r15),%xmm0 +.byte 102,15,56,220,209 + movups 112(%r15),%xmm1 +.byte 102,15,56,220,208 .Laesenclast1: -.byte 102,69,15,56,221,223 - movups 16(%r15),%xmm14 - xorl %edx,%ecx +.byte 102,15,56,221,209 + movups 16-112(%r15),%xmm0 + por %xmm12,%xmm11 + xorl %edx,%edi movl %eax,%esi + addl 60(%rsp),%ebp + pslld $2,%xmm13 + pxor %xmm3,%xmm11 + xorl %ecx,%ebx + movdqa 16(%r11),%xmm3 roll $5,%eax - pslld $2,%xmm9 - pxor %xmm10,%xmm7 - andl %ecx,%edi - xorl %edx,%ecx - movdqa 16(%r11),%xmm10 - xorl %edx,%edi - addl %eax,%ebp - pxor %xmm9,%xmm7 - rorl $7,%ebx addl %edi,%ebp - movdqa %xmm7,%xmm9 - addl 0(%rsp),%edx - pxor %xmm4,%xmm0 -.byte 102,68,15,58,15,206,8 - xorl %ecx,%ebx - movl %ebp,%edi - roll $5,%ebp - pxor %xmm1,%xmm0 andl %ebx,%esi + pxor %xmm13,%xmm11 + pshufd $238,%xmm10,%xmm13 xorl %ecx,%ebx - movdqa %xmm10,%xmm8 - paddd %xmm7,%xmm10 - xorl %ecx,%esi - movups 16(%r12),%xmm12 - xorps %xmm13,%xmm12 - movups %xmm11,0(%r13,%r12,1) - xorps %xmm12,%xmm11 -.byte 102,69,15,56,220,222 - movups 32(%r15),%xmm15 - addl %ebp,%edx - pxor %xmm9,%xmm0 + addl %eax,%ebp rorl $7,%eax - addl %esi,%edx - addl 4(%rsp),%ecx + pxor %xmm8,%xmm4 + xorl %ecx,%esi + movl %ebp,%edi + addl 0(%rsp),%edx + punpcklqdq %xmm11,%xmm13 xorl %ebx,%eax - movdqa %xmm0,%xmm9 - movdqa %xmm10,48(%rsp) - movl %edx,%esi - roll $5,%edx + roll $5,%ebp + pxor %xmm5,%xmm4 + addl %esi,%edx + movups 16(%r12),%xmm14 + xorps %xmm15,%xmm14 + movups %xmm2,0(%r12,%r13,1) + xorps %xmm14,%xmm2 + movups -80(%r15),%xmm1 +.byte 102,15,56,220,208 andl %eax,%edi + movdqa %xmm3,%xmm12 xorl %ebx,%eax - pslld $2,%xmm0 - xorl %ebx,%edi - addl %edx,%ecx - psrld $30,%xmm9 + paddd %xmm11,%xmm3 + addl %ebp,%edx + pxor %xmm13,%xmm4 rorl $7,%ebp - addl %edi,%ecx - addl 8(%rsp),%ebx + xorl %ebx,%edi + movl %edx,%esi + addl 4(%rsp),%ecx + movdqa %xmm4,%xmm13 xorl %eax,%ebp - movl %ecx,%edi - roll $5,%ecx -.byte 102,69,15,56,220,223 - movups 48(%r15),%xmm14 - por %xmm9,%xmm0 + roll $5,%edx + movdqa %xmm3,48(%rsp) + addl %edi,%ecx andl %ebp,%esi xorl %eax,%ebp - movdqa %xmm0,%xmm10 - xorl %eax,%esi - addl %ecx,%ebx + pslld $2,%xmm4 + addl %edx,%ecx rorl $7,%edx - addl %esi,%ebx - addl 12(%rsp),%eax + psrld $30,%xmm13 + xorl %eax,%esi + movl %ecx,%edi + addl 8(%rsp),%ebx + movups -64(%r15),%xmm0 +.byte 102,15,56,220,209 + por %xmm13,%xmm4 xorl %ebp,%edx - movl %ebx,%esi - roll $5,%ebx + roll $5,%ecx + pshufd $238,%xmm11,%xmm3 + addl %esi,%ebx andl %edx,%edi xorl %ebp,%edx + addl %ecx,%ebx + addl 12(%rsp),%eax xorl %ebp,%edi - addl %ebx,%eax - rorl $7,%ecx + movl %ebx,%esi + roll $5,%ebx addl %edi,%eax - addl 16(%rsp),%ebp -.byte 102,69,15,56,220,222 - movups 64(%r15),%xmm15 - pxor %xmm5,%xmm1 -.byte 102,68,15,58,15,215,8 xorl %edx,%esi + rorl $7,%ecx + addl %ebx,%eax + pxor %xmm9,%xmm5 + addl 16(%rsp),%ebp + movups -48(%r15),%xmm1 +.byte 102,15,56,220,208 + xorl %ecx,%esi + punpcklqdq %xmm4,%xmm3 movl %eax,%edi roll $5,%eax - pxor %xmm2,%xmm1 - xorl %ecx,%esi - addl %eax,%ebp - movdqa %xmm8,%xmm9 - paddd %xmm0,%xmm8 - rorl $7,%ebx + pxor %xmm6,%xmm5 addl %esi,%ebp - pxor %xmm10,%xmm1 - addl 20(%rsp),%edx xorl %ecx,%edi + movdqa %xmm12,%xmm13 + rorl $7,%ebx + paddd %xmm4,%xmm12 + addl %eax,%ebp + pxor %xmm3,%xmm5 + addl 20(%rsp),%edx + xorl %ebx,%edi movl %ebp,%esi roll $5,%ebp - movdqa %xmm1,%xmm10 - movdqa %xmm8,0(%rsp) - xorl %ebx,%edi - addl %ebp,%edx - rorl $7,%eax + movdqa %xmm5,%xmm3 addl %edi,%edx - pslld $2,%xmm1 - addl 24(%rsp),%ecx xorl %ebx,%esi - psrld $30,%xmm10 + movdqa %xmm12,0(%rsp) + rorl $7,%eax + addl %ebp,%edx + addl 24(%rsp),%ecx + pslld $2,%xmm5 + xorl %eax,%esi movl %edx,%edi + psrld $30,%xmm3 roll $5,%edx - xorl %eax,%esi -.byte 102,69,15,56,220,223 - movups 80(%r15),%xmm14 - addl %edx,%ecx - rorl $7,%ebp addl %esi,%ecx - por %xmm10,%xmm1 - addl 28(%rsp),%ebx + movups -32(%r15),%xmm0 +.byte 102,15,56,220,209 xorl %eax,%edi - movdqa %xmm1,%xmm8 + rorl $7,%ebp + por %xmm3,%xmm5 + addl %edx,%ecx + addl 28(%rsp),%ebx + pshufd $238,%xmm4,%xmm12 + xorl %ebp,%edi movl %ecx,%esi roll $5,%ecx - xorl %ebp,%edi - addl %ecx,%ebx - rorl $7,%edx addl %edi,%ebx - addl 32(%rsp),%eax - pxor %xmm6,%xmm2 -.byte 102,68,15,58,15,192,8 xorl %ebp,%esi + rorl $7,%edx + addl %ecx,%ebx + pxor %xmm10,%xmm6 + addl 32(%rsp),%eax + xorl %edx,%esi + punpcklqdq %xmm5,%xmm12 movl %ebx,%edi roll $5,%ebx - pxor %xmm3,%xmm2 - xorl %edx,%esi - addl %ebx,%eax - movdqa 32(%r11),%xmm10 - paddd %xmm1,%xmm9 - rorl $7,%ecx + pxor %xmm7,%xmm6 addl %esi,%eax - pxor %xmm8,%xmm2 - addl 36(%rsp),%ebp -.byte 102,69,15,56,220,222 - movups 96(%r15),%xmm15 xorl %edx,%edi + movdqa 32(%r11),%xmm3 + rorl $7,%ecx + paddd %xmm5,%xmm13 + addl %ebx,%eax + pxor %xmm12,%xmm6 + addl 36(%rsp),%ebp + movups -16(%r15),%xmm1 +.byte 102,15,56,220,208 + xorl %ecx,%edi movl %eax,%esi roll $5,%eax - movdqa %xmm2,%xmm8 - movdqa %xmm9,16(%rsp) - xorl %ecx,%edi - addl %eax,%ebp - rorl $7,%ebx + movdqa %xmm6,%xmm12 addl %edi,%ebp - pslld $2,%xmm2 - addl 40(%rsp),%edx xorl %ecx,%esi - psrld $30,%xmm8 + movdqa %xmm13,16(%rsp) + rorl $7,%ebx + addl %eax,%ebp + addl 40(%rsp),%edx + pslld $2,%xmm6 + xorl %ebx,%esi movl %ebp,%edi + psrld $30,%xmm12 roll $5,%ebp - xorl %ebx,%esi - addl %ebp,%edx - rorl $7,%eax addl %esi,%edx - por %xmm8,%xmm2 - addl 44(%rsp),%ecx xorl %ebx,%edi - movdqa %xmm2,%xmm9 + rorl $7,%eax + por %xmm12,%xmm6 + addl %ebp,%edx + addl 44(%rsp),%ecx + pshufd $238,%xmm5,%xmm13 + xorl %eax,%edi movl %edx,%esi roll $5,%edx - xorl %eax,%edi -.byte 102,69,15,56,220,223 - movups 112(%r15),%xmm14 - addl %edx,%ecx - rorl $7,%ebp addl %edi,%ecx - addl 48(%rsp),%ebx - pxor %xmm7,%xmm3 -.byte 102,68,15,58,15,201,8 + movups 0(%r15),%xmm0 +.byte 102,15,56,220,209 xorl %eax,%esi + rorl $7,%ebp + addl %edx,%ecx + pxor %xmm11,%xmm7 + addl 48(%rsp),%ebx + xorl %ebp,%esi + punpcklqdq %xmm6,%xmm13 movl %ecx,%edi roll $5,%ecx - pxor %xmm4,%xmm3 - xorl %ebp,%esi - addl %ecx,%ebx - movdqa %xmm10,%xmm8 - paddd %xmm2,%xmm10 - rorl $7,%edx + pxor %xmm8,%xmm7 addl %esi,%ebx - pxor %xmm9,%xmm3 - addl 52(%rsp),%eax xorl %ebp,%edi + movdqa %xmm3,%xmm12 + rorl $7,%edx + paddd %xmm6,%xmm3 + addl %ecx,%ebx + pxor %xmm13,%xmm7 + addl 52(%rsp),%eax + xorl %edx,%edi movl %ebx,%esi roll $5,%ebx - movdqa %xmm3,%xmm9 - movdqa %xmm10,32(%rsp) - xorl %edx,%edi - addl %ebx,%eax - rorl $7,%ecx + movdqa %xmm7,%xmm13 addl %edi,%eax - pslld $2,%xmm3 - addl 56(%rsp),%ebp -.byte 102,69,15,56,220,222 - movups 128(%r15),%xmm15 xorl %edx,%esi - psrld $30,%xmm9 + movdqa %xmm3,32(%rsp) + rorl $7,%ecx + addl %ebx,%eax + addl 56(%rsp),%ebp + movups 16(%r15),%xmm1 +.byte 102,15,56,220,208 + pslld $2,%xmm7 + xorl %ecx,%esi movl %eax,%edi + psrld $30,%xmm13 roll $5,%eax - xorl %ecx,%esi - addl %eax,%ebp - rorl $7,%ebx addl %esi,%ebp - por %xmm9,%xmm3 - addl 60(%rsp),%edx xorl %ecx,%edi - movdqa %xmm3,%xmm10 + rorl $7,%ebx + por %xmm13,%xmm7 + addl %eax,%ebp + addl 60(%rsp),%edx + pshufd $238,%xmm6,%xmm3 + xorl %ebx,%edi movl %ebp,%esi roll $5,%ebp - xorl %ebx,%edi - addl %ebp,%edx - rorl $7,%eax addl %edi,%edx - addl 0(%rsp),%ecx - pxor %xmm0,%xmm4 -.byte 102,68,15,58,15,210,8 xorl %ebx,%esi + rorl $7,%eax + addl %ebp,%edx + pxor %xmm4,%xmm8 + addl 0(%rsp),%ecx + xorl %eax,%esi + punpcklqdq %xmm7,%xmm3 movl %edx,%edi roll $5,%edx - pxor %xmm5,%xmm4 - xorl %eax,%esi -.byte 102,69,15,56,220,223 - movups 144(%r15),%xmm14 - addl %edx,%ecx - movdqa %xmm8,%xmm9 - paddd %xmm3,%xmm8 - rorl $7,%ebp + pxor %xmm9,%xmm8 addl %esi,%ecx - pxor %xmm10,%xmm4 - addl 4(%rsp),%ebx + movups 32(%r15),%xmm0 +.byte 102,15,56,220,209 xorl %eax,%edi + movdqa %xmm12,%xmm13 + rorl $7,%ebp + paddd %xmm7,%xmm12 + addl %edx,%ecx + pxor %xmm3,%xmm8 + addl 4(%rsp),%ebx + xorl %ebp,%edi movl %ecx,%esi roll $5,%ecx - movdqa %xmm4,%xmm10 - movdqa %xmm8,48(%rsp) - xorl %ebp,%edi - addl %ecx,%ebx - rorl $7,%edx + movdqa %xmm8,%xmm3 addl %edi,%ebx - pslld $2,%xmm4 - addl 8(%rsp),%eax xorl %ebp,%esi - psrld $30,%xmm10 + movdqa %xmm12,48(%rsp) + rorl $7,%edx + addl %ecx,%ebx + addl 8(%rsp),%eax + pslld $2,%xmm8 + xorl %edx,%esi movl %ebx,%edi + psrld $30,%xmm3 roll $5,%ebx - xorl %edx,%esi - addl %ebx,%eax - rorl $7,%ecx addl %esi,%eax - por %xmm10,%xmm4 - addl 12(%rsp),%ebp -.byte 102,69,15,56,220,222 - movups 160(%r15),%xmm15 xorl %edx,%edi - movdqa %xmm4,%xmm8 + rorl $7,%ecx + por %xmm3,%xmm8 + addl %ebx,%eax + addl 12(%rsp),%ebp + movups 48(%r15),%xmm1 +.byte 102,15,56,220,208 + pshufd $238,%xmm7,%xmm12 + xorl %ecx,%edi movl %eax,%esi roll $5,%eax - xorl %ecx,%edi - addl %eax,%ebp - rorl $7,%ebx addl %edi,%ebp - addl 16(%rsp),%edx - pxor %xmm1,%xmm5 -.byte 102,68,15,58,15,195,8 xorl %ecx,%esi + rorl $7,%ebx + addl %eax,%ebp + pxor %xmm5,%xmm9 + addl 16(%rsp),%edx + xorl %ebx,%esi + punpcklqdq %xmm8,%xmm12 movl %ebp,%edi roll $5,%ebp - pxor %xmm6,%xmm5 - xorl %ebx,%esi - addl %ebp,%edx - movdqa %xmm9,%xmm10 - paddd %xmm4,%xmm9 - rorl $7,%eax + pxor %xmm10,%xmm9 addl %esi,%edx - pxor %xmm8,%xmm5 - addl 20(%rsp),%ecx xorl %ebx,%edi + movdqa %xmm13,%xmm3 + rorl $7,%eax + paddd %xmm8,%xmm13 + addl %ebp,%edx + pxor %xmm12,%xmm9 + addl 20(%rsp),%ecx + xorl %eax,%edi movl %edx,%esi roll $5,%edx - movdqa %xmm5,%xmm8 - movdqa %xmm9,0(%rsp) - xorl %eax,%edi + movdqa %xmm9,%xmm12 + addl %edi,%ecx cmpl $11,%r8d jb .Laesenclast2 - movups 176(%r15),%xmm14 -.byte 102,69,15,56,220,223 - movups 192(%r15),%xmm15 -.byte 102,69,15,56,220,222 + movups 64(%r15),%xmm0 +.byte 102,15,56,220,209 + movups 80(%r15),%xmm1 +.byte 102,15,56,220,208 je .Laesenclast2 - movups 208(%r15),%xmm14 -.byte 102,69,15,56,220,223 - movups 224(%r15),%xmm15 -.byte 102,69,15,56,220,222 + movups 96(%r15),%xmm0 +.byte 102,15,56,220,209 + movups 112(%r15),%xmm1 +.byte 102,15,56,220,208 .Laesenclast2: -.byte 102,69,15,56,221,223 - movups 16(%r15),%xmm14 - addl %edx,%ecx +.byte 102,15,56,221,209 + movups 16-112(%r15),%xmm0 + xorl %eax,%esi + movdqa %xmm13,0(%rsp) rorl $7,%ebp - addl %edi,%ecx - pslld $2,%xmm5 + addl %edx,%ecx addl 24(%rsp),%ebx - xorl %eax,%esi - psrld $30,%xmm8 + pslld $2,%xmm9 + xorl %ebp,%esi movl %ecx,%edi + psrld $30,%xmm12 roll $5,%ecx - xorl %ebp,%esi - addl %ecx,%ebx - rorl $7,%edx addl %esi,%ebx - por %xmm8,%xmm5 - addl 28(%rsp),%eax xorl %ebp,%edi - movdqa %xmm5,%xmm9 + rorl $7,%edx + por %xmm12,%xmm9 + addl %ecx,%ebx + addl 28(%rsp),%eax + pshufd $238,%xmm8,%xmm13 + rorl $7,%ecx movl %ebx,%esi - roll $5,%ebx xorl %edx,%edi - addl %ebx,%eax - rorl $7,%ecx + roll $5,%ebx addl %edi,%eax - movl %ecx,%edi - movups 32(%r12),%xmm12 - xorps %xmm13,%xmm12 - movups %xmm11,16(%r13,%r12,1) - xorps %xmm12,%xmm11 -.byte 102,69,15,56,220,222 - movups 32(%r15),%xmm15 - pxor %xmm2,%xmm6 -.byte 102,68,15,58,15,204,8 + xorl %ecx,%esi xorl %edx,%ecx + addl %ebx,%eax + pxor %xmm6,%xmm10 addl 32(%rsp),%ebp - andl %edx,%edi - pxor %xmm7,%xmm6 + movups 32(%r12),%xmm14 + xorps %xmm15,%xmm14 + movups %xmm2,16(%r13,%r12,1) + xorps %xmm14,%xmm2 + movups -80(%r15),%xmm1 +.byte 102,15,56,220,208 andl %ecx,%esi + xorl %edx,%ecx rorl $7,%ebx - movdqa %xmm10,%xmm8 - paddd %xmm5,%xmm10 - addl %edi,%ebp + punpcklqdq %xmm9,%xmm13 movl %eax,%edi - pxor %xmm9,%xmm6 + xorl %ecx,%esi + pxor %xmm11,%xmm10 roll $5,%eax addl %esi,%ebp - xorl %edx,%ecx - addl %eax,%ebp - movdqa %xmm6,%xmm9 - movdqa %xmm10,16(%rsp) - movl %ebx,%esi + movdqa %xmm3,%xmm12 + xorl %ebx,%edi + paddd %xmm9,%xmm3 xorl %ecx,%ebx + pxor %xmm13,%xmm10 + addl %eax,%ebp addl 36(%rsp),%edx - andl %ecx,%esi - pslld $2,%xmm6 andl %ebx,%edi + xorl %ecx,%ebx rorl $7,%eax - psrld $30,%xmm9 - addl %esi,%edx + movdqa %xmm10,%xmm13 movl %ebp,%esi + xorl %ebx,%edi + movdqa %xmm3,16(%rsp) roll $5,%ebp -.byte 102,69,15,56,220,223 - movups 48(%r15),%xmm14 addl %edi,%edx - xorl %ecx,%ebx - addl %ebp,%edx - por %xmm9,%xmm6 - movl %eax,%edi + movups -64(%r15),%xmm0 +.byte 102,15,56,220,209 + xorl %eax,%esi + pslld $2,%xmm10 xorl %ebx,%eax - movdqa %xmm6,%xmm10 + addl %ebp,%edx + psrld $30,%xmm13 addl 40(%rsp),%ecx - andl %ebx,%edi andl %eax,%esi + xorl %ebx,%eax + por %xmm13,%xmm10 rorl $7,%ebp - addl %edi,%ecx movl %edx,%edi + xorl %eax,%esi roll $5,%edx + pshufd $238,%xmm9,%xmm3 addl %esi,%ecx - xorl %ebx,%eax - addl %edx,%ecx - movl %ebp,%esi + xorl %ebp,%edi xorl %eax,%ebp + addl %edx,%ecx addl 44(%rsp),%ebx - andl %eax,%esi andl %ebp,%edi -.byte 102,69,15,56,220,222 - movups 64(%r15),%xmm15 + xorl %eax,%ebp rorl $7,%edx - addl %esi,%ebx + movups -48(%r15),%xmm1 +.byte 102,15,56,220,208 movl %ecx,%esi + xorl %ebp,%edi roll $5,%ecx addl %edi,%ebx - xorl %eax,%ebp - addl %ecx,%ebx - movl %edx,%edi - pxor %xmm3,%xmm7 -.byte 102,68,15,58,15,213,8 + xorl %edx,%esi xorl %ebp,%edx + addl %ecx,%ebx + pxor %xmm7,%xmm11 addl 48(%rsp),%eax - andl %ebp,%edi - pxor %xmm0,%xmm7 andl %edx,%esi + xorl %ebp,%edx rorl $7,%ecx - movdqa 48(%r11),%xmm9 - paddd %xmm6,%xmm8 - addl %edi,%eax + punpcklqdq %xmm10,%xmm3 movl %ebx,%edi - pxor %xmm10,%xmm7 + xorl %edx,%esi + pxor %xmm4,%xmm11 roll $5,%ebx addl %esi,%eax - xorl %ebp,%edx - addl %ebx,%eax - movdqa %xmm7,%xmm10 - movdqa %xmm8,32(%rsp) - movl %ecx,%esi -.byte 102,69,15,56,220,223 - movups 80(%r15),%xmm14 + movdqa 48(%r11),%xmm13 + xorl %ecx,%edi + paddd %xmm10,%xmm12 xorl %edx,%ecx + pxor %xmm3,%xmm11 + addl %ebx,%eax addl 52(%rsp),%ebp - andl %edx,%esi - pslld $2,%xmm7 + movups -32(%r15),%xmm0 +.byte 102,15,56,220,209 andl %ecx,%edi + xorl %edx,%ecx rorl $7,%ebx - psrld $30,%xmm10 - addl %esi,%ebp + movdqa %xmm11,%xmm3 movl %eax,%esi + xorl %ecx,%edi + movdqa %xmm12,32(%rsp) roll $5,%eax addl %edi,%ebp - xorl %edx,%ecx - addl %eax,%ebp - por %xmm10,%xmm7 - movl %ebx,%edi + xorl %ebx,%esi + pslld $2,%xmm11 xorl %ecx,%ebx - movdqa %xmm7,%xmm8 + addl %eax,%ebp + psrld $30,%xmm3 addl 56(%rsp),%edx - andl %ecx,%edi andl %ebx,%esi + xorl %ecx,%ebx + por %xmm3,%xmm11 rorl $7,%eax - addl %edi,%edx movl %ebp,%edi + xorl %ebx,%esi roll $5,%ebp -.byte 102,69,15,56,220,222 - movups 96(%r15),%xmm15 + pshufd $238,%xmm10,%xmm12 addl %esi,%edx - xorl %ecx,%ebx - addl %ebp,%edx - movl %eax,%esi + movups -16(%r15),%xmm1 +.byte 102,15,56,220,208 + xorl %eax,%edi xorl %ebx,%eax + addl %ebp,%edx addl 60(%rsp),%ecx - andl %ebx,%esi andl %eax,%edi + xorl %ebx,%eax rorl $7,%ebp - addl %esi,%ecx movl %edx,%esi + xorl %eax,%edi roll $5,%edx addl %edi,%ecx - xorl %ebx,%eax - addl %edx,%ecx - movl %ebp,%edi - pxor %xmm4,%xmm0 -.byte 102,68,15,58,15,198,8 + xorl %ebp,%esi xorl %eax,%ebp + addl %edx,%ecx + pxor %xmm8,%xmm4 addl 0(%rsp),%ebx - andl %eax,%edi - pxor %xmm1,%xmm0 andl %ebp,%esi -.byte 102,69,15,56,220,223 - movups 112(%r15),%xmm14 + xorl %eax,%ebp rorl $7,%edx - movdqa %xmm9,%xmm10 - paddd %xmm7,%xmm9 - addl %edi,%ebx + movups 0(%r15),%xmm0 +.byte 102,15,56,220,209 + punpcklqdq %xmm11,%xmm12 movl %ecx,%edi - pxor %xmm8,%xmm0 + xorl %ebp,%esi + pxor %xmm5,%xmm4 roll $5,%ecx addl %esi,%ebx - xorl %eax,%ebp - addl %ecx,%ebx - movdqa %xmm0,%xmm8 - movdqa %xmm9,48(%rsp) - movl %edx,%esi + movdqa %xmm13,%xmm3 + xorl %edx,%edi + paddd %xmm11,%xmm13 xorl %ebp,%edx + pxor %xmm12,%xmm4 + addl %ecx,%ebx addl 4(%rsp),%eax - andl %ebp,%esi - pslld $2,%xmm0 andl %edx,%edi + xorl %ebp,%edx rorl $7,%ecx - psrld $30,%xmm8 - addl %esi,%eax + movdqa %xmm4,%xmm12 movl %ebx,%esi + xorl %edx,%edi + movdqa %xmm13,48(%rsp) roll $5,%ebx addl %edi,%eax - xorl %ebp,%edx - addl %ebx,%eax - por %xmm8,%xmm0 - movl %ecx,%edi -.byte 102,69,15,56,220,222 - movups 128(%r15),%xmm15 + xorl %ecx,%esi + pslld $2,%xmm4 xorl %edx,%ecx - movdqa %xmm0,%xmm9 + addl %ebx,%eax + psrld $30,%xmm12 addl 8(%rsp),%ebp - andl %edx,%edi + movups 16(%r15),%xmm1 +.byte 102,15,56,220,208 andl %ecx,%esi + xorl %edx,%ecx + por %xmm12,%xmm4 rorl $7,%ebx - addl %edi,%ebp movl %eax,%edi + xorl %ecx,%esi roll $5,%eax + pshufd $238,%xmm11,%xmm13 addl %esi,%ebp - xorl %edx,%ecx - addl %eax,%ebp - movl %ebx,%esi + xorl %ebx,%edi xorl %ecx,%ebx + addl %eax,%ebp addl 12(%rsp),%edx - andl %ecx,%esi andl %ebx,%edi + xorl %ecx,%ebx rorl $7,%eax - addl %esi,%edx movl %ebp,%esi + xorl %ebx,%edi roll $5,%ebp -.byte 102,69,15,56,220,223 - movups 144(%r15),%xmm14 addl %edi,%edx - xorl %ecx,%ebx - addl %ebp,%edx - movl %eax,%edi - pxor %xmm5,%xmm1 -.byte 102,68,15,58,15,207,8 + movups 32(%r15),%xmm0 +.byte 102,15,56,220,209 + xorl %eax,%esi xorl %ebx,%eax + addl %ebp,%edx + pxor %xmm9,%xmm5 addl 16(%rsp),%ecx - andl %ebx,%edi - pxor %xmm2,%xmm1 andl %eax,%esi + xorl %ebx,%eax rorl $7,%ebp - movdqa %xmm10,%xmm8 - paddd %xmm0,%xmm10 - addl %edi,%ecx + punpcklqdq %xmm4,%xmm13 movl %edx,%edi - pxor %xmm9,%xmm1 + xorl %eax,%esi + pxor %xmm6,%xmm5 roll $5,%edx addl %esi,%ecx - xorl %ebx,%eax - addl %edx,%ecx - movdqa %xmm1,%xmm9 - movdqa %xmm10,0(%rsp) - movl %ebp,%esi + movdqa %xmm3,%xmm12 + xorl %ebp,%edi + paddd %xmm4,%xmm3 xorl %eax,%ebp + pxor %xmm13,%xmm5 + addl %edx,%ecx addl 20(%rsp),%ebx - andl %eax,%esi - pslld $2,%xmm1 andl %ebp,%edi -.byte 102,69,15,56,220,222 - movups 160(%r15),%xmm15 + xorl %eax,%ebp rorl $7,%edx - psrld $30,%xmm9 - addl %esi,%ebx + movups 48(%r15),%xmm1 +.byte 102,15,56,220,208 + movdqa %xmm5,%xmm13 movl %ecx,%esi + xorl %ebp,%edi + movdqa %xmm3,0(%rsp) roll $5,%ecx addl %edi,%ebx - xorl %eax,%ebp - addl %ecx,%ebx - por %xmm9,%xmm1 - movl %edx,%edi + xorl %edx,%esi + pslld $2,%xmm5 xorl %ebp,%edx - movdqa %xmm1,%xmm10 + addl %ecx,%ebx + psrld $30,%xmm13 addl 24(%rsp),%eax - andl %ebp,%edi andl %edx,%esi + xorl %ebp,%edx + por %xmm13,%xmm5 rorl $7,%ecx - addl %edi,%eax movl %ebx,%edi + xorl %edx,%esi roll $5,%ebx + pshufd $238,%xmm4,%xmm3 addl %esi,%eax - xorl %ebp,%edx + xorl %ecx,%edi + xorl %edx,%ecx addl %ebx,%eax - movl %ecx,%esi + addl 28(%rsp),%ebp cmpl $11,%r8d jb .Laesenclast3 - movups 176(%r15),%xmm14 -.byte 102,69,15,56,220,223 - movups 192(%r15),%xmm15 -.byte 102,69,15,56,220,222 + movups 64(%r15),%xmm0 +.byte 102,15,56,220,209 + movups 80(%r15),%xmm1 +.byte 102,15,56,220,208 je .Laesenclast3 - movups 208(%r15),%xmm14 -.byte 102,69,15,56,220,223 - movups 224(%r15),%xmm15 -.byte 102,69,15,56,220,222 + movups 96(%r15),%xmm0 +.byte 102,15,56,220,209 + movups 112(%r15),%xmm1 +.byte 102,15,56,220,208 .Laesenclast3: -.byte 102,69,15,56,221,223 - movups 16(%r15),%xmm14 - xorl %edx,%ecx - addl 28(%rsp),%ebp - andl %edx,%esi +.byte 102,15,56,221,209 + movups 16-112(%r15),%xmm0 andl %ecx,%edi + xorl %edx,%ecx rorl $7,%ebx - addl %esi,%ebp movl %eax,%esi + xorl %ecx,%edi roll $5,%eax addl %edi,%ebp - xorl %edx,%ecx - addl %eax,%ebp - movl %ebx,%edi - pxor %xmm6,%xmm2 -.byte 102,68,15,58,15,208,8 + xorl %ebx,%esi xorl %ecx,%ebx + addl %eax,%ebp + pxor %xmm10,%xmm6 addl 32(%rsp),%edx - andl %ecx,%edi - pxor %xmm3,%xmm2 andl %ebx,%esi + xorl %ecx,%ebx rorl $7,%eax - movdqa %xmm8,%xmm9 - paddd %xmm1,%xmm8 - addl %edi,%edx + punpcklqdq %xmm5,%xmm3 movl %ebp,%edi - pxor %xmm10,%xmm2 + xorl %ebx,%esi + pxor %xmm7,%xmm6 roll $5,%ebp - movups 48(%r12),%xmm12 - xorps %xmm13,%xmm12 - movups %xmm11,32(%r13,%r12,1) - xorps %xmm12,%xmm11 -.byte 102,69,15,56,220,222 - movups 32(%r15),%xmm15 addl %esi,%edx - xorl %ecx,%ebx - addl %ebp,%edx - movdqa %xmm2,%xmm10 - movdqa %xmm8,16(%rsp) - movl %eax,%esi + movups 48(%r12),%xmm14 + xorps %xmm15,%xmm14 + movups %xmm2,32(%r13,%r12,1) + xorps %xmm14,%xmm2 + movups -80(%r15),%xmm1 +.byte 102,15,56,220,208 + movdqa %xmm12,%xmm13 + xorl %eax,%edi + paddd %xmm5,%xmm12 xorl %ebx,%eax + pxor %xmm3,%xmm6 + addl %ebp,%edx addl 36(%rsp),%ecx - andl %ebx,%esi - pslld $2,%xmm2 andl %eax,%edi + xorl %ebx,%eax rorl $7,%ebp - psrld $30,%xmm10 - addl %esi,%ecx + movdqa %xmm6,%xmm3 movl %edx,%esi + xorl %eax,%edi + movdqa %xmm12,16(%rsp) roll $5,%edx addl %edi,%ecx - xorl %ebx,%eax - addl %edx,%ecx - por %xmm10,%xmm2 - movl %ebp,%edi + xorl %ebp,%esi + pslld $2,%xmm6 xorl %eax,%ebp - movdqa %xmm2,%xmm8 + addl %edx,%ecx + psrld $30,%xmm3 addl 40(%rsp),%ebx - andl %eax,%edi andl %ebp,%esi -.byte 102,69,15,56,220,223 - movups 48(%r15),%xmm14 + xorl %eax,%ebp + por %xmm3,%xmm6 rorl $7,%edx - addl %edi,%ebx + movups -64(%r15),%xmm0 +.byte 102,15,56,220,209 movl %ecx,%edi + xorl %ebp,%esi roll $5,%ecx + pshufd $238,%xmm5,%xmm12 addl %esi,%ebx - xorl %eax,%ebp - addl %ecx,%ebx - movl %edx,%esi + xorl %edx,%edi xorl %ebp,%edx + addl %ecx,%ebx addl 44(%rsp),%eax - andl %ebp,%esi andl %edx,%edi + xorl %ebp,%edx rorl $7,%ecx - addl %esi,%eax movl %ebx,%esi + xorl %edx,%edi roll $5,%ebx addl %edi,%eax - xorl %ebp,%edx + xorl %edx,%esi addl %ebx,%eax + pxor %xmm11,%xmm7 addl 48(%rsp),%ebp -.byte 102,69,15,56,220,222 - movups 64(%r15),%xmm15 - pxor %xmm7,%xmm3 -.byte 102,68,15,58,15,193,8 - xorl %edx,%esi + movups -48(%r15),%xmm1 +.byte 102,15,56,220,208 + xorl %ecx,%esi + punpcklqdq %xmm6,%xmm12 movl %eax,%edi roll $5,%eax - pxor %xmm4,%xmm3 - xorl %ecx,%esi - addl %eax,%ebp - movdqa %xmm9,%xmm10 - paddd %xmm2,%xmm9 - rorl $7,%ebx + pxor %xmm8,%xmm7 addl %esi,%ebp - pxor %xmm8,%xmm3 - addl 52(%rsp),%edx xorl %ecx,%edi + movdqa %xmm13,%xmm3 + rorl $7,%ebx + paddd %xmm6,%xmm13 + addl %eax,%ebp + pxor %xmm12,%xmm7 + addl 52(%rsp),%edx + xorl %ebx,%edi movl %ebp,%esi roll $5,%ebp - movdqa %xmm3,%xmm8 - movdqa %xmm9,32(%rsp) - xorl %ebx,%edi - addl %ebp,%edx - rorl $7,%eax + movdqa %xmm7,%xmm12 addl %edi,%edx - pslld $2,%xmm3 - addl 56(%rsp),%ecx xorl %ebx,%esi - psrld $30,%xmm8 + movdqa %xmm13,32(%rsp) + rorl $7,%eax + addl %ebp,%edx + addl 56(%rsp),%ecx + pslld $2,%xmm7 + xorl %eax,%esi movl %edx,%edi + psrld $30,%xmm12 roll $5,%edx - xorl %eax,%esi -.byte 102,69,15,56,220,223 - movups 80(%r15),%xmm14 - addl %edx,%ecx - rorl $7,%ebp addl %esi,%ecx - por %xmm8,%xmm3 - addl 60(%rsp),%ebx + movups -32(%r15),%xmm0 +.byte 102,15,56,220,209 xorl %eax,%edi + rorl $7,%ebp + por %xmm12,%xmm7 + addl %edx,%ecx + addl 60(%rsp),%ebx + xorl %ebp,%edi movl %ecx,%esi roll $5,%ecx - xorl %ebp,%edi - addl %ecx,%ebx - rorl $7,%edx addl %edi,%ebx - addl 0(%rsp),%eax - paddd %xmm3,%xmm10 xorl %ebp,%esi + rorl $7,%edx + addl %ecx,%ebx + addl 0(%rsp),%eax + xorl %edx,%esi movl %ebx,%edi roll $5,%ebx - xorl %edx,%esi - movdqa %xmm10,48(%rsp) - addl %ebx,%eax - rorl $7,%ecx + paddd %xmm7,%xmm3 addl %esi,%eax - addl 4(%rsp),%ebp -.byte 102,69,15,56,220,222 - movups 96(%r15),%xmm15 xorl %edx,%edi + movdqa %xmm3,48(%rsp) + rorl $7,%ecx + addl %ebx,%eax + addl 4(%rsp),%ebp + movups -16(%r15),%xmm1 +.byte 102,15,56,220,208 + xorl %ecx,%edi movl %eax,%esi roll $5,%eax - xorl %ecx,%edi + addl %edi,%ebp + xorl %ecx,%esi + rorl $7,%ebx addl %eax,%ebp + addl 8(%rsp),%edx + xorl %ebx,%esi + movl %ebp,%edi + roll $5,%ebp + addl %esi,%edx + xorl %ebx,%edi + rorl $7,%eax + addl %ebp,%edx + addl 12(%rsp),%ecx + xorl %eax,%edi + movl %edx,%esi + roll $5,%edx + addl %edi,%ecx + movups 0(%r15),%xmm0 +.byte 102,15,56,220,209 + xorl %eax,%esi + rorl $7,%ebp + addl %edx,%ecx + cmpq %r14,%r10 + je .Ldone_ssse3 + movdqa 64(%r11),%xmm3 + movdqa 0(%r11),%xmm13 + movdqu 0(%r10),%xmm4 + movdqu 16(%r10),%xmm5 + movdqu 32(%r10),%xmm6 + movdqu 48(%r10),%xmm7 +.byte 102,15,56,0,227 + addq $64,%r10 + addl 16(%rsp),%ebx + xorl %ebp,%esi + movl %ecx,%edi +.byte 102,15,56,0,235 + roll $5,%ecx + addl %esi,%ebx + xorl %ebp,%edi + rorl $7,%edx + paddd %xmm13,%xmm4 + addl %ecx,%ebx + addl 20(%rsp),%eax + xorl %edx,%edi + movl %ebx,%esi + movdqa %xmm4,0(%rsp) + roll $5,%ebx + addl %edi,%eax + xorl %edx,%esi + rorl $7,%ecx + psubd %xmm13,%xmm4 + addl %ebx,%eax + addl 24(%rsp),%ebp + movups 16(%r15),%xmm1 +.byte 102,15,56,220,208 + xorl %ecx,%esi + movl %eax,%edi + roll $5,%eax + addl %esi,%ebp + xorl %ecx,%edi rorl $7,%ebx + addl %eax,%ebp + addl 28(%rsp),%edx + xorl %ebx,%edi + movl %ebp,%esi + roll $5,%ebp + addl %edi,%edx + xorl %ebx,%esi + rorl $7,%eax + addl %ebp,%edx + addl 32(%rsp),%ecx + xorl %eax,%esi + movl %edx,%edi +.byte 102,15,56,0,243 + roll $5,%edx + addl %esi,%ecx + movups 32(%r15),%xmm0 +.byte 102,15,56,220,209 + xorl %eax,%edi + rorl $7,%ebp + paddd %xmm13,%xmm5 + addl %edx,%ecx + addl 36(%rsp),%ebx + xorl %ebp,%edi + movl %ecx,%esi + movdqa %xmm5,16(%rsp) + roll $5,%ecx + addl %edi,%ebx + xorl %ebp,%esi + rorl $7,%edx + psubd %xmm13,%xmm5 + addl %ecx,%ebx + addl 40(%rsp),%eax + xorl %edx,%esi + movl %ebx,%edi + roll $5,%ebx + addl %esi,%eax + xorl %edx,%edi + rorl $7,%ecx + addl %ebx,%eax + addl 44(%rsp),%ebp + movups 48(%r15),%xmm1 +.byte 102,15,56,220,208 + xorl %ecx,%edi + movl %eax,%esi + roll $5,%eax + addl %edi,%ebp + xorl %ecx,%esi + rorl $7,%ebx + addl %eax,%ebp + addl 48(%rsp),%edx + xorl %ebx,%esi + movl %ebp,%edi +.byte 102,15,56,0,251 + roll $5,%ebp + addl %esi,%edx + xorl %ebx,%edi + rorl $7,%eax + paddd %xmm13,%xmm6 + addl %ebp,%edx + addl 52(%rsp),%ecx + xorl %eax,%edi + movl %edx,%esi + movdqa %xmm6,32(%rsp) + roll $5,%edx + addl %edi,%ecx + cmpl $11,%r8d + jb .Laesenclast4 + movups 64(%r15),%xmm0 +.byte 102,15,56,220,209 + movups 80(%r15),%xmm1 +.byte 102,15,56,220,208 + je .Laesenclast4 + movups 96(%r15),%xmm0 +.byte 102,15,56,220,209 + movups 112(%r15),%xmm1 +.byte 102,15,56,220,208 +.Laesenclast4: +.byte 102,15,56,221,209 + movups 16-112(%r15),%xmm0 + xorl %eax,%esi + rorl $7,%ebp + psubd %xmm13,%xmm6 + addl %edx,%ecx + addl 56(%rsp),%ebx + xorl %ebp,%esi + movl %ecx,%edi + roll $5,%ecx + addl %esi,%ebx + xorl %ebp,%edi + rorl $7,%edx + addl %ecx,%ebx + addl 60(%rsp),%eax + xorl %edx,%edi + movl %ebx,%esi + roll $5,%ebx + addl %edi,%eax + rorl $7,%ecx + addl %ebx,%eax + movups %xmm2,48(%r13,%r12,1) + leaq 64(%r12),%r12 + + addl 0(%r9),%eax + addl 4(%r9),%esi + addl 8(%r9),%ecx + addl 12(%r9),%edx + movl %eax,0(%r9) + addl 16(%r9),%ebp + movl %esi,4(%r9) + movl %esi,%ebx + movl %ecx,8(%r9) + movl %ecx,%edi + movl %edx,12(%r9) + xorl %edx,%edi + movl %ebp,16(%r9) + andl %edi,%esi + jmp .Loop_ssse3 + +.Ldone_ssse3: + addl 16(%rsp),%ebx + xorl %ebp,%esi + movl %ecx,%edi + roll $5,%ecx + addl %esi,%ebx + xorl %ebp,%edi + rorl $7,%edx + addl %ecx,%ebx + addl 20(%rsp),%eax + xorl %edx,%edi + movl %ebx,%esi + roll $5,%ebx + addl %edi,%eax + xorl %edx,%esi + rorl $7,%ecx + addl %ebx,%eax + addl 24(%rsp),%ebp + movups 16(%r15),%xmm1 +.byte 102,15,56,220,208 + xorl %ecx,%esi + movl %eax,%edi + roll $5,%eax + addl %esi,%ebp + xorl %ecx,%edi + rorl $7,%ebx + addl %eax,%ebp + addl 28(%rsp),%edx + xorl %ebx,%edi + movl %ebp,%esi + roll $5,%ebp + addl %edi,%edx + xorl %ebx,%esi + rorl $7,%eax + addl %ebp,%edx + addl 32(%rsp),%ecx + xorl %eax,%esi + movl %edx,%edi + roll $5,%edx + addl %esi,%ecx + movups 32(%r15),%xmm0 +.byte 102,15,56,220,209 + xorl %eax,%edi + rorl $7,%ebp + addl %edx,%ecx + addl 36(%rsp),%ebx + xorl %ebp,%edi + movl %ecx,%esi + roll $5,%ecx + addl %edi,%ebx + xorl %ebp,%esi + rorl $7,%edx + addl %ecx,%ebx + addl 40(%rsp),%eax + xorl %edx,%esi + movl %ebx,%edi + roll $5,%ebx + addl %esi,%eax + xorl %edx,%edi + rorl $7,%ecx + addl %ebx,%eax + addl 44(%rsp),%ebp + movups 48(%r15),%xmm1 +.byte 102,15,56,220,208 + xorl %ecx,%edi + movl %eax,%esi + roll $5,%eax + addl %edi,%ebp + xorl %ecx,%esi + rorl $7,%ebx + addl %eax,%ebp + addl 48(%rsp),%edx + xorl %ebx,%esi + movl %ebp,%edi + roll $5,%ebp + addl %esi,%edx + xorl %ebx,%edi + rorl $7,%eax + addl %ebp,%edx + addl 52(%rsp),%ecx + xorl %eax,%edi + movl %edx,%esi + roll $5,%edx + addl %edi,%ecx + cmpl $11,%r8d + jb .Laesenclast5 + movups 64(%r15),%xmm0 +.byte 102,15,56,220,209 + movups 80(%r15),%xmm1 +.byte 102,15,56,220,208 + je .Laesenclast5 + movups 96(%r15),%xmm0 +.byte 102,15,56,220,209 + movups 112(%r15),%xmm1 +.byte 102,15,56,220,208 +.Laesenclast5: +.byte 102,15,56,221,209 + movups 16-112(%r15),%xmm0 + xorl %eax,%esi + rorl $7,%ebp + addl %edx,%ecx + addl 56(%rsp),%ebx + xorl %ebp,%esi + movl %ecx,%edi + roll $5,%ecx + addl %esi,%ebx + xorl %ebp,%edi + rorl $7,%edx + addl %ecx,%ebx + addl 60(%rsp),%eax + xorl %edx,%edi + movl %ebx,%esi + roll $5,%ebx + addl %edi,%eax + rorl $7,%ecx + addl %ebx,%eax + movups %xmm2,48(%r13,%r12,1) + movq 88(%rsp),%r8 + + addl 0(%r9),%eax + addl 4(%r9),%esi + addl 8(%r9),%ecx + movl %eax,0(%r9) + addl 12(%r9),%edx + movl %esi,4(%r9) + addl 16(%r9),%ebp + movl %ecx,8(%r9) + movl %edx,12(%r9) + movl %ebp,16(%r9) + movups %xmm2,(%r8) + leaq 104(%rsp),%rsi + movq 0(%rsi),%r15 + movq 8(%rsi),%r14 + movq 16(%rsi),%r13 + movq 24(%rsi),%r12 + movq 32(%rsi),%rbp + movq 40(%rsi),%rbx + leaq 48(%rsi),%rsp +.Lepilogue_ssse3: + .byte 0xf3,0xc3 +.size aesni_cbc_sha1_enc_ssse3,.-aesni_cbc_sha1_enc_ssse3 +.type aesni_cbc_sha1_enc_avx,@function +.align 32 +aesni_cbc_sha1_enc_avx: + movq 8(%rsp),%r10 + + + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + leaq -104(%rsp),%rsp + + + vzeroall + movq %rdi,%r12 + movq %rsi,%r13 + movq %rdx,%r14 + leaq 112(%rcx),%r15 + vmovdqu (%r8),%xmm12 + movq %r8,88(%rsp) + shlq $6,%r14 + subq %r12,%r13 + movl 240-112(%r15),%r8d + addq %r10,%r14 + + leaq K_XX_XX(%rip),%r11 + movl 0(%r9),%eax + movl 4(%r9),%ebx + movl 8(%r9),%ecx + movl 12(%r9),%edx + movl %ebx,%esi + movl 16(%r9),%ebp + movl %ecx,%edi + xorl %edx,%edi + andl %edi,%esi + + vmovdqa 64(%r11),%xmm6 + vmovdqa 0(%r11),%xmm10 + vmovdqu 0(%r10),%xmm0 + vmovdqu 16(%r10),%xmm1 + vmovdqu 32(%r10),%xmm2 + vmovdqu 48(%r10),%xmm3 + vpshufb %xmm6,%xmm0,%xmm0 + addq $64,%r10 + vpshufb %xmm6,%xmm1,%xmm1 + vpshufb %xmm6,%xmm2,%xmm2 + vpshufb %xmm6,%xmm3,%xmm3 + vpaddd %xmm10,%xmm0,%xmm4 + vpaddd %xmm10,%xmm1,%xmm5 + vpaddd %xmm10,%xmm2,%xmm6 + vmovdqa %xmm4,0(%rsp) + vmovdqa %xmm5,16(%rsp) + vmovdqa %xmm6,32(%rsp) + vmovups -112(%r15),%xmm15 + vmovups 16-112(%r15),%xmm14 + jmp .Loop_avx +.align 32 +.Loop_avx: + shrdl $2,%ebx,%ebx + vmovdqu 0(%r12),%xmm13 + vpxor %xmm15,%xmm13,%xmm13 + vpxor %xmm13,%xmm12,%xmm12 + vaesenc %xmm14,%xmm12,%xmm12 + vmovups -80(%r15),%xmm15 + xorl %edx,%esi + vpalignr $8,%xmm0,%xmm1,%xmm4 + movl %eax,%edi + addl 0(%rsp),%ebp + vpaddd %xmm3,%xmm10,%xmm9 + xorl %ecx,%ebx + shldl $5,%eax,%eax + vpsrldq $4,%xmm3,%xmm8 + addl %esi,%ebp + andl %ebx,%edi + vpxor %xmm0,%xmm4,%xmm4 + xorl %ecx,%ebx + addl %eax,%ebp + vpxor %xmm2,%xmm8,%xmm8 + shrdl $7,%eax,%eax + xorl %ecx,%edi + movl %ebp,%esi + addl 4(%rsp),%edx + vpxor %xmm8,%xmm4,%xmm4 + xorl %ebx,%eax + shldl $5,%ebp,%ebp + vmovdqa %xmm9,48(%rsp) + addl %edi,%edx + vaesenc %xmm15,%xmm12,%xmm12 + vmovups -64(%r15),%xmm14 + andl %eax,%esi + vpsrld $31,%xmm4,%xmm8 + xorl %ebx,%eax + addl %ebp,%edx + shrdl $7,%ebp,%ebp + xorl %ebx,%esi + vpslldq $12,%xmm4,%xmm9 + vpaddd %xmm4,%xmm4,%xmm4 + movl %edx,%edi + addl 8(%rsp),%ecx + xorl %eax,%ebp + shldl $5,%edx,%edx + vpor %xmm8,%xmm4,%xmm4 + vpsrld $30,%xmm9,%xmm8 + addl %esi,%ecx + andl %ebp,%edi + xorl %eax,%ebp + addl %edx,%ecx + vpslld $2,%xmm9,%xmm9 + vpxor %xmm8,%xmm4,%xmm4 + shrdl $7,%edx,%edx + xorl %eax,%edi + movl %ecx,%esi + addl 12(%rsp),%ebx + vaesenc %xmm14,%xmm12,%xmm12 + vmovups -48(%r15),%xmm15 + vpxor %xmm9,%xmm4,%xmm4 + xorl %ebp,%edx + shldl $5,%ecx,%ecx + addl %edi,%ebx + andl %edx,%esi + xorl %ebp,%edx + addl %ecx,%ebx + shrdl $7,%ecx,%ecx + xorl %ebp,%esi + vpalignr $8,%xmm1,%xmm2,%xmm5 + movl %ebx,%edi + addl 16(%rsp),%eax + vpaddd %xmm4,%xmm10,%xmm9 + xorl %edx,%ecx + shldl $5,%ebx,%ebx + vpsrldq $4,%xmm4,%xmm8 + addl %esi,%eax + andl %ecx,%edi + vpxor %xmm1,%xmm5,%xmm5 + xorl %edx,%ecx + addl %ebx,%eax + vpxor %xmm3,%xmm8,%xmm8 + shrdl $7,%ebx,%ebx + vaesenc %xmm15,%xmm12,%xmm12 + vmovups -32(%r15),%xmm14 + xorl %edx,%edi + movl %eax,%esi + addl 20(%rsp),%ebp + vpxor %xmm8,%xmm5,%xmm5 + xorl %ecx,%ebx + shldl $5,%eax,%eax + vmovdqa %xmm9,0(%rsp) + addl %edi,%ebp + andl %ebx,%esi + vpsrld $31,%xmm5,%xmm8 + xorl %ecx,%ebx + addl %eax,%ebp + shrdl $7,%eax,%eax + xorl %ecx,%esi + vpslldq $12,%xmm5,%xmm9 + vpaddd %xmm5,%xmm5,%xmm5 + movl %ebp,%edi + addl 24(%rsp),%edx + xorl %ebx,%eax + shldl $5,%ebp,%ebp + vpor %xmm8,%xmm5,%xmm5 + vpsrld $30,%xmm9,%xmm8 + addl %esi,%edx + vaesenc %xmm14,%xmm12,%xmm12 + vmovups -16(%r15),%xmm15 + andl %eax,%edi + xorl %ebx,%eax + addl %ebp,%edx + vpslld $2,%xmm9,%xmm9 + vpxor %xmm8,%xmm5,%xmm5 + shrdl $7,%ebp,%ebp + xorl %ebx,%edi + movl %edx,%esi + addl 28(%rsp),%ecx + vpxor %xmm9,%xmm5,%xmm5 + xorl %eax,%ebp + shldl $5,%edx,%edx + vmovdqa 16(%r11),%xmm10 + addl %edi,%ecx + andl %ebp,%esi + xorl %eax,%ebp + addl %edx,%ecx + shrdl $7,%edx,%edx + xorl %eax,%esi + vpalignr $8,%xmm2,%xmm3,%xmm6 + movl %ecx,%edi + addl 32(%rsp),%ebx + vaesenc %xmm15,%xmm12,%xmm12 + vmovups 0(%r15),%xmm14 + vpaddd %xmm5,%xmm10,%xmm9 + xorl %ebp,%edx + shldl $5,%ecx,%ecx + vpsrldq $4,%xmm5,%xmm8 + addl %esi,%ebx + andl %edx,%edi + vpxor %xmm2,%xmm6,%xmm6 + xorl %ebp,%edx + addl %ecx,%ebx + vpxor %xmm4,%xmm8,%xmm8 + shrdl $7,%ecx,%ecx + xorl %ebp,%edi + movl %ebx,%esi + addl 36(%rsp),%eax + vpxor %xmm8,%xmm6,%xmm6 + xorl %edx,%ecx + shldl $5,%ebx,%ebx + vmovdqa %xmm9,16(%rsp) + addl %edi,%eax + andl %ecx,%esi + vpsrld $31,%xmm6,%xmm8 + xorl %edx,%ecx + addl %ebx,%eax + shrdl $7,%ebx,%ebx + vaesenc %xmm14,%xmm12,%xmm12 + vmovups 16(%r15),%xmm15 + xorl %edx,%esi + vpslldq $12,%xmm6,%xmm9 + vpaddd %xmm6,%xmm6,%xmm6 + movl %eax,%edi + addl 40(%rsp),%ebp + xorl %ecx,%ebx + shldl $5,%eax,%eax + vpor %xmm8,%xmm6,%xmm6 + vpsrld $30,%xmm9,%xmm8 + addl %esi,%ebp + andl %ebx,%edi + xorl %ecx,%ebx + addl %eax,%ebp + vpslld $2,%xmm9,%xmm9 + vpxor %xmm8,%xmm6,%xmm6 + shrdl $7,%eax,%eax + xorl %ecx,%edi + movl %ebp,%esi + addl 44(%rsp),%edx + vpxor %xmm9,%xmm6,%xmm6 + xorl %ebx,%eax + shldl $5,%ebp,%ebp + addl %edi,%edx + vaesenc %xmm15,%xmm12,%xmm12 + vmovups 32(%r15),%xmm14 + andl %eax,%esi + xorl %ebx,%eax + addl %ebp,%edx + shrdl $7,%ebp,%ebp + xorl %ebx,%esi + vpalignr $8,%xmm3,%xmm4,%xmm7 + movl %edx,%edi + addl 48(%rsp),%ecx + vpaddd %xmm6,%xmm10,%xmm9 + xorl %eax,%ebp + shldl $5,%edx,%edx + vpsrldq $4,%xmm6,%xmm8 + addl %esi,%ecx + andl %ebp,%edi + vpxor %xmm3,%xmm7,%xmm7 + xorl %eax,%ebp + addl %edx,%ecx + vpxor %xmm5,%xmm8,%xmm8 + shrdl $7,%edx,%edx + xorl %eax,%edi + movl %ecx,%esi + addl 52(%rsp),%ebx + vaesenc %xmm14,%xmm12,%xmm12 + vmovups 48(%r15),%xmm15 + vpxor %xmm8,%xmm7,%xmm7 + xorl %ebp,%edx + shldl $5,%ecx,%ecx + vmovdqa %xmm9,32(%rsp) + addl %edi,%ebx + andl %edx,%esi + vpsrld $31,%xmm7,%xmm8 + xorl %ebp,%edx + addl %ecx,%ebx + shrdl $7,%ecx,%ecx + xorl %ebp,%esi + vpslldq $12,%xmm7,%xmm9 + vpaddd %xmm7,%xmm7,%xmm7 + movl %ebx,%edi + addl 56(%rsp),%eax + xorl %edx,%ecx + shldl $5,%ebx,%ebx + vpor %xmm8,%xmm7,%xmm7 + vpsrld $30,%xmm9,%xmm8 + addl %esi,%eax + andl %ecx,%edi + xorl %edx,%ecx + addl %ebx,%eax + vpslld $2,%xmm9,%xmm9 + vpxor %xmm8,%xmm7,%xmm7 + shrdl $7,%ebx,%ebx + cmpl $11,%r8d + jb .Lvaesenclast6 + vaesenc %xmm15,%xmm12,%xmm12 + vmovups 64(%r15),%xmm14 + vaesenc %xmm14,%xmm12,%xmm12 + vmovups 80(%r15),%xmm15 + je .Lvaesenclast6 + vaesenc %xmm15,%xmm12,%xmm12 + vmovups 96(%r15),%xmm14 + vaesenc %xmm14,%xmm12,%xmm12 + vmovups 112(%r15),%xmm15 +.Lvaesenclast6: + vaesenclast %xmm15,%xmm12,%xmm12 + vmovups -112(%r15),%xmm15 + vmovups 16-112(%r15),%xmm14 + xorl %edx,%edi + movl %eax,%esi + addl 60(%rsp),%ebp + vpxor %xmm9,%xmm7,%xmm7 + xorl %ecx,%ebx + shldl $5,%eax,%eax + addl %edi,%ebp + andl %ebx,%esi + xorl %ecx,%ebx + addl %eax,%ebp + vpalignr $8,%xmm6,%xmm7,%xmm8 + vpxor %xmm4,%xmm0,%xmm0 + shrdl $7,%eax,%eax + xorl %ecx,%esi + movl %ebp,%edi + addl 0(%rsp),%edx + vpxor %xmm1,%xmm0,%xmm0 + xorl %ebx,%eax + shldl $5,%ebp,%ebp + vpaddd %xmm7,%xmm10,%xmm9 + addl %esi,%edx + vmovdqu 16(%r12),%xmm13 + vpxor %xmm15,%xmm13,%xmm13 + vmovups %xmm12,0(%r12,%r13,1) + vpxor %xmm13,%xmm12,%xmm12 + vaesenc %xmm14,%xmm12,%xmm12 + vmovups -80(%r15),%xmm15 + andl %eax,%edi + vpxor %xmm8,%xmm0,%xmm0 + xorl %ebx,%eax + addl %ebp,%edx + shrdl $7,%ebp,%ebp + xorl %ebx,%edi + vpsrld $30,%xmm0,%xmm8 + vmovdqa %xmm9,48(%rsp) + movl %edx,%esi + addl 4(%rsp),%ecx + xorl %eax,%ebp + shldl $5,%edx,%edx + vpslld $2,%xmm0,%xmm0 + addl %edi,%ecx + andl %ebp,%esi + xorl %eax,%ebp + addl %edx,%ecx + shrdl $7,%edx,%edx + xorl %eax,%esi + movl %ecx,%edi + addl 8(%rsp),%ebx + vaesenc %xmm15,%xmm12,%xmm12 + vmovups -64(%r15),%xmm14 + vpor %xmm8,%xmm0,%xmm0 + xorl %ebp,%edx + shldl $5,%ecx,%ecx + addl %esi,%ebx + andl %edx,%edi + xorl %ebp,%edx + addl %ecx,%ebx + addl 12(%rsp),%eax + xorl %ebp,%edi + movl %ebx,%esi + shldl $5,%ebx,%ebx + addl %edi,%eax + xorl %edx,%esi + shrdl $7,%ecx,%ecx + addl %ebx,%eax + vpalignr $8,%xmm7,%xmm0,%xmm8 + vpxor %xmm5,%xmm1,%xmm1 + addl 16(%rsp),%ebp + vaesenc %xmm14,%xmm12,%xmm12 + vmovups -48(%r15),%xmm15 + xorl %ecx,%esi + movl %eax,%edi + shldl $5,%eax,%eax + vpxor %xmm2,%xmm1,%xmm1 + addl %esi,%ebp + xorl %ecx,%edi + vpaddd %xmm0,%xmm10,%xmm9 + shrdl $7,%ebx,%ebx + addl %eax,%ebp + vpxor %xmm8,%xmm1,%xmm1 + addl 20(%rsp),%edx + xorl %ebx,%edi + movl %ebp,%esi + shldl $5,%ebp,%ebp + vpsrld $30,%xmm1,%xmm8 + vmovdqa %xmm9,0(%rsp) + addl %edi,%edx + xorl %ebx,%esi + shrdl $7,%eax,%eax + addl %ebp,%edx + vpslld $2,%xmm1,%xmm1 + addl 24(%rsp),%ecx + xorl %eax,%esi + movl %edx,%edi + shldl $5,%edx,%edx + addl %esi,%ecx + vaesenc %xmm15,%xmm12,%xmm12 + vmovups -32(%r15),%xmm14 + xorl %eax,%edi + shrdl $7,%ebp,%ebp + addl %edx,%ecx + vpor %xmm8,%xmm1,%xmm1 + addl 28(%rsp),%ebx + xorl %ebp,%edi + movl %ecx,%esi + shldl $5,%ecx,%ecx + addl %edi,%ebx + xorl %ebp,%esi + shrdl $7,%edx,%edx + addl %ecx,%ebx + vpalignr $8,%xmm0,%xmm1,%xmm8 + vpxor %xmm6,%xmm2,%xmm2 + addl 32(%rsp),%eax + xorl %edx,%esi + movl %ebx,%edi + shldl $5,%ebx,%ebx + vpxor %xmm3,%xmm2,%xmm2 + addl %esi,%eax + xorl %edx,%edi + vpaddd %xmm1,%xmm10,%xmm9 + vmovdqa 32(%r11),%xmm10 + shrdl $7,%ecx,%ecx + addl %ebx,%eax + vpxor %xmm8,%xmm2,%xmm2 + addl 36(%rsp),%ebp + vaesenc %xmm14,%xmm12,%xmm12 + vmovups -16(%r15),%xmm15 + xorl %ecx,%edi + movl %eax,%esi + shldl $5,%eax,%eax + vpsrld $30,%xmm2,%xmm8 + vmovdqa %xmm9,16(%rsp) + addl %edi,%ebp + xorl %ecx,%esi + shrdl $7,%ebx,%ebx + addl %eax,%ebp + vpslld $2,%xmm2,%xmm2 + addl 40(%rsp),%edx + xorl %ebx,%esi + movl %ebp,%edi + shldl $5,%ebp,%ebp + addl %esi,%edx + xorl %ebx,%edi + shrdl $7,%eax,%eax + addl %ebp,%edx + vpor %xmm8,%xmm2,%xmm2 + addl 44(%rsp),%ecx + xorl %eax,%edi + movl %edx,%esi + shldl $5,%edx,%edx + addl %edi,%ecx + vaesenc %xmm15,%xmm12,%xmm12 + vmovups 0(%r15),%xmm14 + xorl %eax,%esi + shrdl $7,%ebp,%ebp + addl %edx,%ecx + vpalignr $8,%xmm1,%xmm2,%xmm8 + vpxor %xmm7,%xmm3,%xmm3 + addl 48(%rsp),%ebx + xorl %ebp,%esi + movl %ecx,%edi + shldl $5,%ecx,%ecx + vpxor %xmm4,%xmm3,%xmm3 + addl %esi,%ebx + xorl %ebp,%edi + vpaddd %xmm2,%xmm10,%xmm9 + shrdl $7,%edx,%edx + addl %ecx,%ebx + vpxor %xmm8,%xmm3,%xmm3 + addl 52(%rsp),%eax + xorl %edx,%edi + movl %ebx,%esi + shldl $5,%ebx,%ebx + vpsrld $30,%xmm3,%xmm8 + vmovdqa %xmm9,32(%rsp) + addl %edi,%eax + xorl %edx,%esi + shrdl $7,%ecx,%ecx + addl %ebx,%eax + vpslld $2,%xmm3,%xmm3 + addl 56(%rsp),%ebp + vaesenc %xmm14,%xmm12,%xmm12 + vmovups 16(%r15),%xmm15 + xorl %ecx,%esi + movl %eax,%edi + shldl $5,%eax,%eax + addl %esi,%ebp + xorl %ecx,%edi + shrdl $7,%ebx,%ebx + addl %eax,%ebp + vpor %xmm8,%xmm3,%xmm3 + addl 60(%rsp),%edx + xorl %ebx,%edi + movl %ebp,%esi + shldl $5,%ebp,%ebp + addl %edi,%edx + xorl %ebx,%esi + shrdl $7,%eax,%eax + addl %ebp,%edx + vpalignr $8,%xmm2,%xmm3,%xmm8 + vpxor %xmm0,%xmm4,%xmm4 + addl 0(%rsp),%ecx + xorl %eax,%esi + movl %edx,%edi + shldl $5,%edx,%edx + vpxor %xmm5,%xmm4,%xmm4 + addl %esi,%ecx + vaesenc %xmm15,%xmm12,%xmm12 + vmovups 32(%r15),%xmm14 + xorl %eax,%edi + vpaddd %xmm3,%xmm10,%xmm9 + shrdl $7,%ebp,%ebp + addl %edx,%ecx + vpxor %xmm8,%xmm4,%xmm4 + addl 4(%rsp),%ebx + xorl %ebp,%edi + movl %ecx,%esi + shldl $5,%ecx,%ecx + vpsrld $30,%xmm4,%xmm8 + vmovdqa %xmm9,48(%rsp) + addl %edi,%ebx + xorl %ebp,%esi + shrdl $7,%edx,%edx + addl %ecx,%ebx + vpslld $2,%xmm4,%xmm4 + addl 8(%rsp),%eax + xorl %edx,%esi + movl %ebx,%edi + shldl $5,%ebx,%ebx + addl %esi,%eax + xorl %edx,%edi + shrdl $7,%ecx,%ecx + addl %ebx,%eax + vpor %xmm8,%xmm4,%xmm4 + addl 12(%rsp),%ebp + vaesenc %xmm14,%xmm12,%xmm12 + vmovups 48(%r15),%xmm15 + xorl %ecx,%edi + movl %eax,%esi + shldl $5,%eax,%eax + addl %edi,%ebp + xorl %ecx,%esi + shrdl $7,%ebx,%ebx + addl %eax,%ebp + vpalignr $8,%xmm3,%xmm4,%xmm8 + vpxor %xmm1,%xmm5,%xmm5 + addl 16(%rsp),%edx + xorl %ebx,%esi + movl %ebp,%edi + shldl $5,%ebp,%ebp + vpxor %xmm6,%xmm5,%xmm5 + addl %esi,%edx + xorl %ebx,%edi + vpaddd %xmm4,%xmm10,%xmm9 + shrdl $7,%eax,%eax + addl %ebp,%edx + vpxor %xmm8,%xmm5,%xmm5 + addl 20(%rsp),%ecx + xorl %eax,%edi + movl %edx,%esi + shldl $5,%edx,%edx + vpsrld $30,%xmm5,%xmm8 + vmovdqa %xmm9,0(%rsp) + addl %edi,%ecx + cmpl $11,%r8d + jb .Lvaesenclast7 + vaesenc %xmm15,%xmm12,%xmm12 + vmovups 64(%r15),%xmm14 + vaesenc %xmm14,%xmm12,%xmm12 + vmovups 80(%r15),%xmm15 + je .Lvaesenclast7 + vaesenc %xmm15,%xmm12,%xmm12 + vmovups 96(%r15),%xmm14 + vaesenc %xmm14,%xmm12,%xmm12 + vmovups 112(%r15),%xmm15 +.Lvaesenclast7: + vaesenclast %xmm15,%xmm12,%xmm12 + vmovups -112(%r15),%xmm15 + vmovups 16-112(%r15),%xmm14 + xorl %eax,%esi + shrdl $7,%ebp,%ebp + addl %edx,%ecx + vpslld $2,%xmm5,%xmm5 + addl 24(%rsp),%ebx + xorl %ebp,%esi + movl %ecx,%edi + shldl $5,%ecx,%ecx + addl %esi,%ebx + xorl %ebp,%edi + shrdl $7,%edx,%edx + addl %ecx,%ebx + vpor %xmm8,%xmm5,%xmm5 + addl 28(%rsp),%eax + shrdl $7,%ecx,%ecx + movl %ebx,%esi + xorl %edx,%edi + shldl $5,%ebx,%ebx + addl %edi,%eax + xorl %ecx,%esi + xorl %edx,%ecx + addl %ebx,%eax + vpalignr $8,%xmm4,%xmm5,%xmm8 + vpxor %xmm2,%xmm6,%xmm6 + addl 32(%rsp),%ebp + vmovdqu 32(%r12),%xmm13 + vpxor %xmm15,%xmm13,%xmm13 + vmovups %xmm12,16(%r13,%r12,1) + vpxor %xmm13,%xmm12,%xmm12 + vaesenc %xmm14,%xmm12,%xmm12 + vmovups -80(%r15),%xmm15 + andl %ecx,%esi + xorl %edx,%ecx + shrdl $7,%ebx,%ebx + vpxor %xmm7,%xmm6,%xmm6 + movl %eax,%edi + xorl %ecx,%esi + vpaddd %xmm5,%xmm10,%xmm9 + shldl $5,%eax,%eax + addl %esi,%ebp + vpxor %xmm8,%xmm6,%xmm6 + xorl %ebx,%edi + xorl %ecx,%ebx + addl %eax,%ebp + addl 36(%rsp),%edx + vpsrld $30,%xmm6,%xmm8 + vmovdqa %xmm9,16(%rsp) + andl %ebx,%edi + xorl %ecx,%ebx + shrdl $7,%eax,%eax + movl %ebp,%esi + vpslld $2,%xmm6,%xmm6 + xorl %ebx,%edi + shldl $5,%ebp,%ebp + addl %edi,%edx + vaesenc %xmm15,%xmm12,%xmm12 + vmovups -64(%r15),%xmm14 + xorl %eax,%esi + xorl %ebx,%eax + addl %ebp,%edx + addl 40(%rsp),%ecx + andl %eax,%esi + vpor %xmm8,%xmm6,%xmm6 + xorl %ebx,%eax + shrdl $7,%ebp,%ebp + movl %edx,%edi + xorl %eax,%esi + shldl $5,%edx,%edx + addl %esi,%ecx + xorl %ebp,%edi + xorl %eax,%ebp + addl %edx,%ecx + addl 44(%rsp),%ebx + andl %ebp,%edi + xorl %eax,%ebp + shrdl $7,%edx,%edx + vaesenc %xmm14,%xmm12,%xmm12 + vmovups -48(%r15),%xmm15 + movl %ecx,%esi + xorl %ebp,%edi + shldl $5,%ecx,%ecx + addl %edi,%ebx + xorl %edx,%esi + xorl %ebp,%edx + addl %ecx,%ebx + vpalignr $8,%xmm5,%xmm6,%xmm8 + vpxor %xmm3,%xmm7,%xmm7 + addl 48(%rsp),%eax + andl %edx,%esi + xorl %ebp,%edx + shrdl $7,%ecx,%ecx + vpxor %xmm0,%xmm7,%xmm7 + movl %ebx,%edi + xorl %edx,%esi + vpaddd %xmm6,%xmm10,%xmm9 + vmovdqa 48(%r11),%xmm10 + shldl $5,%ebx,%ebx + addl %esi,%eax + vpxor %xmm8,%xmm7,%xmm7 + xorl %ecx,%edi + xorl %edx,%ecx + addl %ebx,%eax + addl 52(%rsp),%ebp + vaesenc %xmm15,%xmm12,%xmm12 + vmovups -32(%r15),%xmm14 + vpsrld $30,%xmm7,%xmm8 + vmovdqa %xmm9,32(%rsp) + andl %ecx,%edi + xorl %edx,%ecx + shrdl $7,%ebx,%ebx + movl %eax,%esi + vpslld $2,%xmm7,%xmm7 + xorl %ecx,%edi + shldl $5,%eax,%eax + addl %edi,%ebp + xorl %ebx,%esi + xorl %ecx,%ebx + addl %eax,%ebp + addl 56(%rsp),%edx + andl %ebx,%esi + vpor %xmm8,%xmm7,%xmm7 + xorl %ecx,%ebx + shrdl $7,%eax,%eax + movl %ebp,%edi + xorl %ebx,%esi + shldl $5,%ebp,%ebp + addl %esi,%edx + vaesenc %xmm14,%xmm12,%xmm12 + vmovups -16(%r15),%xmm15 + xorl %eax,%edi + xorl %ebx,%eax + addl %ebp,%edx + addl 60(%rsp),%ecx + andl %eax,%edi + xorl %ebx,%eax + shrdl $7,%ebp,%ebp + movl %edx,%esi + xorl %eax,%edi + shldl $5,%edx,%edx + addl %edi,%ecx + xorl %ebp,%esi + xorl %eax,%ebp + addl %edx,%ecx + vpalignr $8,%xmm6,%xmm7,%xmm8 + vpxor %xmm4,%xmm0,%xmm0 + addl 0(%rsp),%ebx + andl %ebp,%esi + xorl %eax,%ebp + shrdl $7,%edx,%edx + vaesenc %xmm15,%xmm12,%xmm12 + vmovups 0(%r15),%xmm14 + vpxor %xmm1,%xmm0,%xmm0 + movl %ecx,%edi + xorl %ebp,%esi + vpaddd %xmm7,%xmm10,%xmm9 + shldl $5,%ecx,%ecx + addl %esi,%ebx + vpxor %xmm8,%xmm0,%xmm0 + xorl %edx,%edi + xorl %ebp,%edx + addl %ecx,%ebx + addl 4(%rsp),%eax + vpsrld $30,%xmm0,%xmm8 + vmovdqa %xmm9,48(%rsp) + andl %edx,%edi + xorl %ebp,%edx + shrdl $7,%ecx,%ecx + movl %ebx,%esi + vpslld $2,%xmm0,%xmm0 + xorl %edx,%edi + shldl $5,%ebx,%ebx + addl %edi,%eax + xorl %ecx,%esi + xorl %edx,%ecx + addl %ebx,%eax + addl 8(%rsp),%ebp + vaesenc %xmm14,%xmm12,%xmm12 + vmovups 16(%r15),%xmm15 + andl %ecx,%esi + vpor %xmm8,%xmm0,%xmm0 + xorl %edx,%ecx + shrdl $7,%ebx,%ebx + movl %eax,%edi + xorl %ecx,%esi + shldl $5,%eax,%eax + addl %esi,%ebp + xorl %ebx,%edi + xorl %ecx,%ebx + addl %eax,%ebp + addl 12(%rsp),%edx + andl %ebx,%edi + xorl %ecx,%ebx + shrdl $7,%eax,%eax + movl %ebp,%esi + xorl %ebx,%edi + shldl $5,%ebp,%ebp + addl %edi,%edx + vaesenc %xmm15,%xmm12,%xmm12 + vmovups 32(%r15),%xmm14 + xorl %eax,%esi + xorl %ebx,%eax + addl %ebp,%edx + vpalignr $8,%xmm7,%xmm0,%xmm8 + vpxor %xmm5,%xmm1,%xmm1 + addl 16(%rsp),%ecx + andl %eax,%esi + xorl %ebx,%eax + shrdl $7,%ebp,%ebp + vpxor %xmm2,%xmm1,%xmm1 + movl %edx,%edi + xorl %eax,%esi + vpaddd %xmm0,%xmm10,%xmm9 + shldl $5,%edx,%edx + addl %esi,%ecx + vpxor %xmm8,%xmm1,%xmm1 + xorl %ebp,%edi + xorl %eax,%ebp + addl %edx,%ecx + addl 20(%rsp),%ebx + vpsrld $30,%xmm1,%xmm8 + vmovdqa %xmm9,0(%rsp) + andl %ebp,%edi + xorl %eax,%ebp + shrdl $7,%edx,%edx + vaesenc %xmm14,%xmm12,%xmm12 + vmovups 48(%r15),%xmm15 + movl %ecx,%esi + vpslld $2,%xmm1,%xmm1 + xorl %ebp,%edi + shldl $5,%ecx,%ecx + addl %edi,%ebx + xorl %edx,%esi + xorl %ebp,%edx + addl %ecx,%ebx + addl 24(%rsp),%eax + andl %edx,%esi + vpor %xmm8,%xmm1,%xmm1 + xorl %ebp,%edx + shrdl $7,%ecx,%ecx + movl %ebx,%edi + xorl %edx,%esi + shldl $5,%ebx,%ebx + addl %esi,%eax + xorl %ecx,%edi + xorl %edx,%ecx + addl %ebx,%eax + addl 28(%rsp),%ebp + cmpl $11,%r8d + jb .Lvaesenclast8 + vaesenc %xmm15,%xmm12,%xmm12 + vmovups 64(%r15),%xmm14 + vaesenc %xmm14,%xmm12,%xmm12 + vmovups 80(%r15),%xmm15 + je .Lvaesenclast8 + vaesenc %xmm15,%xmm12,%xmm12 + vmovups 96(%r15),%xmm14 + vaesenc %xmm14,%xmm12,%xmm12 + vmovups 112(%r15),%xmm15 +.Lvaesenclast8: + vaesenclast %xmm15,%xmm12,%xmm12 + vmovups -112(%r15),%xmm15 + vmovups 16-112(%r15),%xmm14 + andl %ecx,%edi + xorl %edx,%ecx + shrdl $7,%ebx,%ebx + movl %eax,%esi + xorl %ecx,%edi + shldl $5,%eax,%eax + addl %edi,%ebp + xorl %ebx,%esi + xorl %ecx,%ebx + addl %eax,%ebp + vpalignr $8,%xmm0,%xmm1,%xmm8 + vpxor %xmm6,%xmm2,%xmm2 + addl 32(%rsp),%edx + andl %ebx,%esi + xorl %ecx,%ebx + shrdl $7,%eax,%eax + vpxor %xmm3,%xmm2,%xmm2 + movl %ebp,%edi + xorl %ebx,%esi + vpaddd %xmm1,%xmm10,%xmm9 + shldl $5,%ebp,%ebp + addl %esi,%edx + vmovdqu 48(%r12),%xmm13 + vpxor %xmm15,%xmm13,%xmm13 + vmovups %xmm12,32(%r13,%r12,1) + vpxor %xmm13,%xmm12,%xmm12 + vaesenc %xmm14,%xmm12,%xmm12 + vmovups -80(%r15),%xmm15 + vpxor %xmm8,%xmm2,%xmm2 + xorl %eax,%edi + xorl %ebx,%eax + addl %ebp,%edx + addl 36(%rsp),%ecx + vpsrld $30,%xmm2,%xmm8 + vmovdqa %xmm9,16(%rsp) + andl %eax,%edi + xorl %ebx,%eax + shrdl $7,%ebp,%ebp + movl %edx,%esi + vpslld $2,%xmm2,%xmm2 + xorl %eax,%edi + shldl $5,%edx,%edx + addl %edi,%ecx + xorl %ebp,%esi + xorl %eax,%ebp + addl %edx,%ecx + addl 40(%rsp),%ebx + andl %ebp,%esi + vpor %xmm8,%xmm2,%xmm2 + xorl %eax,%ebp + shrdl $7,%edx,%edx + vaesenc %xmm15,%xmm12,%xmm12 + vmovups -64(%r15),%xmm14 + movl %ecx,%edi + xorl %ebp,%esi + shldl $5,%ecx,%ecx + addl %esi,%ebx + xorl %edx,%edi + xorl %ebp,%edx + addl %ecx,%ebx + addl 44(%rsp),%eax + andl %edx,%edi + xorl %ebp,%edx + shrdl $7,%ecx,%ecx + movl %ebx,%esi + xorl %edx,%edi + shldl $5,%ebx,%ebx + addl %edi,%eax + xorl %edx,%esi + addl %ebx,%eax + vpalignr $8,%xmm1,%xmm2,%xmm8 + vpxor %xmm7,%xmm3,%xmm3 + addl 48(%rsp),%ebp + vaesenc %xmm14,%xmm12,%xmm12 + vmovups -48(%r15),%xmm15 + xorl %ecx,%esi + movl %eax,%edi + shldl $5,%eax,%eax + vpxor %xmm4,%xmm3,%xmm3 + addl %esi,%ebp + xorl %ecx,%edi + vpaddd %xmm2,%xmm10,%xmm9 + shrdl $7,%ebx,%ebx + addl %eax,%ebp + vpxor %xmm8,%xmm3,%xmm3 + addl 52(%rsp),%edx + xorl %ebx,%edi + movl %ebp,%esi + shldl $5,%ebp,%ebp + vpsrld $30,%xmm3,%xmm8 + vmovdqa %xmm9,32(%rsp) + addl %edi,%edx + xorl %ebx,%esi + shrdl $7,%eax,%eax + addl %ebp,%edx + vpslld $2,%xmm3,%xmm3 + addl 56(%rsp),%ecx + xorl %eax,%esi + movl %edx,%edi + shldl $5,%edx,%edx + addl %esi,%ecx + vaesenc %xmm15,%xmm12,%xmm12 + vmovups -32(%r15),%xmm14 + xorl %eax,%edi + shrdl $7,%ebp,%ebp + addl %edx,%ecx + vpor %xmm8,%xmm3,%xmm3 + addl 60(%rsp),%ebx + xorl %ebp,%edi + movl %ecx,%esi + shldl $5,%ecx,%ecx + addl %edi,%ebx + xorl %ebp,%esi + shrdl $7,%edx,%edx + addl %ecx,%ebx + addl 0(%rsp),%eax + vpaddd %xmm3,%xmm10,%xmm9 + xorl %edx,%esi + movl %ebx,%edi + shldl $5,%ebx,%ebx + addl %esi,%eax + vmovdqa %xmm9,48(%rsp) + xorl %edx,%edi + shrdl $7,%ecx,%ecx + addl %ebx,%eax + addl 4(%rsp),%ebp + vaesenc %xmm14,%xmm12,%xmm12 + vmovups -16(%r15),%xmm15 + xorl %ecx,%edi + movl %eax,%esi + shldl $5,%eax,%eax addl %edi,%ebp - addl 8(%rsp),%edx xorl %ecx,%esi - movl %ebp,%edi - roll $5,%ebp + shrdl $7,%ebx,%ebx + addl %eax,%ebp + addl 8(%rsp),%edx xorl %ebx,%esi - addl %ebp,%edx - rorl $7,%eax + movl %ebp,%edi + shldl $5,%ebp,%ebp addl %esi,%edx - addl 12(%rsp),%ecx xorl %ebx,%edi - movl %edx,%esi - roll $5,%edx + shrdl $7,%eax,%eax + addl %ebp,%edx + addl 12(%rsp),%ecx xorl %eax,%edi -.byte 102,69,15,56,220,223 - movups 112(%r15),%xmm14 - addl %edx,%ecx - rorl $7,%ebp + movl %edx,%esi + shldl $5,%edx,%edx addl %edi,%ecx + vaesenc %xmm15,%xmm12,%xmm12 + vmovups 0(%r15),%xmm14 + xorl %eax,%esi + shrdl $7,%ebp,%ebp + addl %edx,%ecx cmpq %r14,%r10 - je .Ldone_ssse3 - movdqa 64(%r11),%xmm6 - movdqa 0(%r11),%xmm9 - movdqu 0(%r10),%xmm0 - movdqu 16(%r10),%xmm1 - movdqu 32(%r10),%xmm2 - movdqu 48(%r10),%xmm3 -.byte 102,15,56,0,198 + je .Ldone_avx + vmovdqa 64(%r11),%xmm9 + vmovdqa 0(%r11),%xmm10 + vmovdqu 0(%r10),%xmm0 + vmovdqu 16(%r10),%xmm1 + vmovdqu 32(%r10),%xmm2 + vmovdqu 48(%r10),%xmm3 + vpshufb %xmm9,%xmm0,%xmm0 addq $64,%r10 addl 16(%rsp),%ebx - xorl %eax,%esi -.byte 102,15,56,0,206 - movl %ecx,%edi - roll $5,%ecx - paddd %xmm9,%xmm0 xorl %ebp,%esi - addl %ecx,%ebx - rorl $7,%edx + vpshufb %xmm9,%xmm1,%xmm1 + movl %ecx,%edi + shldl $5,%ecx,%ecx + vpaddd %xmm10,%xmm0,%xmm8 addl %esi,%ebx - movdqa %xmm0,0(%rsp) - addl 20(%rsp),%eax xorl %ebp,%edi - psubd %xmm9,%xmm0 - movl %ebx,%esi - roll $5,%ebx + shrdl $7,%edx,%edx + addl %ecx,%ebx + vmovdqa %xmm8,0(%rsp) + addl 20(%rsp),%eax xorl %edx,%edi - addl %ebx,%eax - rorl $7,%ecx + movl %ebx,%esi + shldl $5,%ebx,%ebx addl %edi,%eax - addl 24(%rsp),%ebp -.byte 102,69,15,56,220,222 - movups 128(%r15),%xmm15 xorl %edx,%esi - movl %eax,%edi - roll $5,%eax + shrdl $7,%ecx,%ecx + addl %ebx,%eax + addl 24(%rsp),%ebp + vaesenc %xmm14,%xmm12,%xmm12 + vmovups 16(%r15),%xmm15 xorl %ecx,%esi - addl %eax,%ebp - rorl $7,%ebx + movl %eax,%edi + shldl $5,%eax,%eax addl %esi,%ebp - addl 28(%rsp),%edx xorl %ecx,%edi - movl %ebp,%esi - roll $5,%ebp + shrdl $7,%ebx,%ebx + addl %eax,%ebp + addl 28(%rsp),%edx xorl %ebx,%edi - addl %ebp,%edx - rorl $7,%eax + movl %ebp,%esi + shldl $5,%ebp,%ebp addl %edi,%edx - addl 32(%rsp),%ecx xorl %ebx,%esi -.byte 102,15,56,0,214 - movl %edx,%edi - roll $5,%edx - paddd %xmm9,%xmm1 + shrdl $7,%eax,%eax + addl %ebp,%edx + addl 32(%rsp),%ecx xorl %eax,%esi -.byte 102,69,15,56,220,223 - movups 144(%r15),%xmm14 - addl %edx,%ecx - rorl $7,%ebp + vpshufb %xmm9,%xmm2,%xmm2 + movl %edx,%edi + shldl $5,%edx,%edx + vpaddd %xmm10,%xmm1,%xmm8 addl %esi,%ecx - movdqa %xmm1,16(%rsp) - addl 36(%rsp),%ebx + vaesenc %xmm15,%xmm12,%xmm12 + vmovups 32(%r15),%xmm14 xorl %eax,%edi - psubd %xmm9,%xmm1 - movl %ecx,%esi - roll $5,%ecx + shrdl $7,%ebp,%ebp + addl %edx,%ecx + vmovdqa %xmm8,16(%rsp) + addl 36(%rsp),%ebx xorl %ebp,%edi - addl %ecx,%ebx - rorl $7,%edx + movl %ecx,%esi + shldl $5,%ecx,%ecx addl %edi,%ebx - addl 40(%rsp),%eax xorl %ebp,%esi - movl %ebx,%edi - roll $5,%ebx + shrdl $7,%edx,%edx + addl %ecx,%ebx + addl 40(%rsp),%eax xorl %edx,%esi - addl %ebx,%eax - rorl $7,%ecx + movl %ebx,%edi + shldl $5,%ebx,%ebx addl %esi,%eax - addl 44(%rsp),%ebp -.byte 102,69,15,56,220,222 - movups 160(%r15),%xmm15 xorl %edx,%edi - movl %eax,%esi - roll $5,%eax + shrdl $7,%ecx,%ecx + addl %ebx,%eax + addl 44(%rsp),%ebp + vaesenc %xmm14,%xmm12,%xmm12 + vmovups 48(%r15),%xmm15 xorl %ecx,%edi - addl %eax,%ebp - rorl $7,%ebx + movl %eax,%esi + shldl $5,%eax,%eax addl %edi,%ebp - addl 48(%rsp),%edx xorl %ecx,%esi -.byte 102,15,56,0,222 - movl %ebp,%edi - roll $5,%ebp - paddd %xmm9,%xmm2 + shrdl $7,%ebx,%ebx + addl %eax,%ebp + addl 48(%rsp),%edx xorl %ebx,%esi - addl %ebp,%edx - rorl $7,%eax + vpshufb %xmm9,%xmm3,%xmm3 + movl %ebp,%edi + shldl $5,%ebp,%ebp + vpaddd %xmm10,%xmm2,%xmm8 addl %esi,%edx - movdqa %xmm2,32(%rsp) - addl 52(%rsp),%ecx xorl %ebx,%edi - psubd %xmm9,%xmm2 - movl %edx,%esi - roll $5,%edx + shrdl $7,%eax,%eax + addl %ebp,%edx + vmovdqa %xmm8,32(%rsp) + addl 52(%rsp),%ecx xorl %eax,%edi + movl %edx,%esi + shldl $5,%edx,%edx + addl %edi,%ecx cmpl $11,%r8d - jb .Laesenclast4 - movups 176(%r15),%xmm14 -.byte 102,69,15,56,220,223 - movups 192(%r15),%xmm15 -.byte 102,69,15,56,220,222 - je .Laesenclast4 - movups 208(%r15),%xmm14 -.byte 102,69,15,56,220,223 - movups 224(%r15),%xmm15 -.byte 102,69,15,56,220,222 -.Laesenclast4: -.byte 102,69,15,56,221,223 - movups 16(%r15),%xmm14 + jb .Lvaesenclast9 + vaesenc %xmm15,%xmm12,%xmm12 + vmovups 64(%r15),%xmm14 + vaesenc %xmm14,%xmm12,%xmm12 + vmovups 80(%r15),%xmm15 + je .Lvaesenclast9 + vaesenc %xmm15,%xmm12,%xmm12 + vmovups 96(%r15),%xmm14 + vaesenc %xmm14,%xmm12,%xmm12 + vmovups 112(%r15),%xmm15 +.Lvaesenclast9: + vaesenclast %xmm15,%xmm12,%xmm12 + vmovups -112(%r15),%xmm15 + vmovups 16-112(%r15),%xmm14 + xorl %eax,%esi + shrdl $7,%ebp,%ebp addl %edx,%ecx - rorl $7,%ebp - addl %edi,%ecx addl 56(%rsp),%ebx - xorl %eax,%esi - movl %ecx,%edi - roll $5,%ecx xorl %ebp,%esi - addl %ecx,%ebx - rorl $7,%edx + movl %ecx,%edi + shldl $5,%ecx,%ecx addl %esi,%ebx - addl 60(%rsp),%eax xorl %ebp,%edi - movl %ebx,%esi - roll $5,%ebx + shrdl $7,%edx,%edx + addl %ecx,%ebx + addl 60(%rsp),%eax xorl %edx,%edi - addl %ebx,%eax - rorl $7,%ecx + movl %ebx,%esi + shldl $5,%ebx,%ebx addl %edi,%eax - movups %xmm11,48(%r13,%r12,1) + shrdl $7,%ecx,%ecx + addl %ebx,%eax + vmovups %xmm12,48(%r13,%r12,1) leaq 64(%r12),%r12 addl 0(%r9),%eax @@ -1238,129 +2520,131 @@ aesni_cbc_sha1_enc_ssse3: movl %esi,4(%r9) movl %esi,%ebx movl %ecx,8(%r9) + movl %ecx,%edi movl %edx,12(%r9) + xorl %edx,%edi movl %ebp,16(%r9) - jmp .Loop_ssse3 + andl %edi,%esi + jmp .Loop_avx -.align 16 -.Ldone_ssse3: +.Ldone_avx: addl 16(%rsp),%ebx - xorl %eax,%esi - movl %ecx,%edi - roll $5,%ecx xorl %ebp,%esi - addl %ecx,%ebx - rorl $7,%edx + movl %ecx,%edi + shldl $5,%ecx,%ecx addl %esi,%ebx - addl 20(%rsp),%eax xorl %ebp,%edi - movl %ebx,%esi - roll $5,%ebx + shrdl $7,%edx,%edx + addl %ecx,%ebx + addl 20(%rsp),%eax xorl %edx,%edi - addl %ebx,%eax - rorl $7,%ecx + movl %ebx,%esi + shldl $5,%ebx,%ebx addl %edi,%eax - addl 24(%rsp),%ebp -.byte 102,69,15,56,220,222 - movups 128(%r15),%xmm15 xorl %edx,%esi - movl %eax,%edi - roll $5,%eax + shrdl $7,%ecx,%ecx + addl %ebx,%eax + addl 24(%rsp),%ebp + vaesenc %xmm14,%xmm12,%xmm12 + vmovups 16(%r15),%xmm15 xorl %ecx,%esi - addl %eax,%ebp - rorl $7,%ebx + movl %eax,%edi + shldl $5,%eax,%eax addl %esi,%ebp - addl 28(%rsp),%edx xorl %ecx,%edi - movl %ebp,%esi - roll $5,%ebp + shrdl $7,%ebx,%ebx + addl %eax,%ebp + addl 28(%rsp),%edx xorl %ebx,%edi - addl %ebp,%edx - rorl $7,%eax + movl %ebp,%esi + shldl $5,%ebp,%ebp addl %edi,%edx - addl 32(%rsp),%ecx xorl %ebx,%esi - movl %edx,%edi - roll $5,%edx + shrdl $7,%eax,%eax + addl %ebp,%edx + addl 32(%rsp),%ecx xorl %eax,%esi -.byte 102,69,15,56,220,223 - movups 144(%r15),%xmm14 - addl %edx,%ecx - rorl $7,%ebp + movl %edx,%edi + shldl $5,%edx,%edx addl %esi,%ecx - addl 36(%rsp),%ebx + vaesenc %xmm15,%xmm12,%xmm12 + vmovups 32(%r15),%xmm14 xorl %eax,%edi - movl %ecx,%esi - roll $5,%ecx + shrdl $7,%ebp,%ebp + addl %edx,%ecx + addl 36(%rsp),%ebx xorl %ebp,%edi - addl %ecx,%ebx - rorl $7,%edx + movl %ecx,%esi + shldl $5,%ecx,%ecx addl %edi,%ebx - addl 40(%rsp),%eax xorl %ebp,%esi - movl %ebx,%edi - roll $5,%ebx + shrdl $7,%edx,%edx + addl %ecx,%ebx + addl 40(%rsp),%eax xorl %edx,%esi - addl %ebx,%eax - rorl $7,%ecx + movl %ebx,%edi + shldl $5,%ebx,%ebx addl %esi,%eax - addl 44(%rsp),%ebp -.byte 102,69,15,56,220,222 - movups 160(%r15),%xmm15 xorl %edx,%edi - movl %eax,%esi - roll $5,%eax + shrdl $7,%ecx,%ecx + addl %ebx,%eax + addl 44(%rsp),%ebp + vaesenc %xmm14,%xmm12,%xmm12 + vmovups 48(%r15),%xmm15 xorl %ecx,%edi - addl %eax,%ebp - rorl $7,%ebx + movl %eax,%esi + shldl $5,%eax,%eax addl %edi,%ebp - addl 48(%rsp),%edx xorl %ecx,%esi - movl %ebp,%edi - roll $5,%ebp + shrdl $7,%ebx,%ebx + addl %eax,%ebp + addl 48(%rsp),%edx xorl %ebx,%esi - addl %ebp,%edx - rorl $7,%eax + movl %ebp,%edi + shldl $5,%ebp,%ebp addl %esi,%edx - addl 52(%rsp),%ecx xorl %ebx,%edi - movl %edx,%esi - roll $5,%edx + shrdl $7,%eax,%eax + addl %ebp,%edx + addl 52(%rsp),%ecx xorl %eax,%edi + movl %edx,%esi + shldl $5,%edx,%edx + addl %edi,%ecx cmpl $11,%r8d - jb .Laesenclast5 - movups 176(%r15),%xmm14 -.byte 102,69,15,56,220,223 - movups 192(%r15),%xmm15 -.byte 102,69,15,56,220,222 - je .Laesenclast5 - movups 208(%r15),%xmm14 -.byte 102,69,15,56,220,223 - movups 224(%r15),%xmm15 -.byte 102,69,15,56,220,222 -.Laesenclast5: -.byte 102,69,15,56,221,223 - movups 16(%r15),%xmm14 + jb .Lvaesenclast10 + vaesenc %xmm15,%xmm12,%xmm12 + vmovups 64(%r15),%xmm14 + vaesenc %xmm14,%xmm12,%xmm12 + vmovups 80(%r15),%xmm15 + je .Lvaesenclast10 + vaesenc %xmm15,%xmm12,%xmm12 + vmovups 96(%r15),%xmm14 + vaesenc %xmm14,%xmm12,%xmm12 + vmovups 112(%r15),%xmm15 +.Lvaesenclast10: + vaesenclast %xmm15,%xmm12,%xmm12 + vmovups -112(%r15),%xmm15 + vmovups 16-112(%r15),%xmm14 + xorl %eax,%esi + shrdl $7,%ebp,%ebp addl %edx,%ecx - rorl $7,%ebp - addl %edi,%ecx addl 56(%rsp),%ebx - xorl %eax,%esi - movl %ecx,%edi - roll $5,%ecx xorl %ebp,%esi - addl %ecx,%ebx - rorl $7,%edx + movl %ecx,%edi + shldl $5,%ecx,%ecx addl %esi,%ebx - addl 60(%rsp),%eax xorl %ebp,%edi - movl %ebx,%esi - roll $5,%ebx + shrdl $7,%edx,%edx + addl %ecx,%ebx + addl 60(%rsp),%eax xorl %edx,%edi - addl %ebx,%eax - rorl $7,%ecx + movl %ebx,%esi + shldl $5,%ebx,%ebx addl %edi,%eax - movups %xmm11,48(%r13,%r12,1) + shrdl $7,%ecx,%ecx + addl %ebx,%eax + vmovups %xmm12,48(%r13,%r12,1) movq 88(%rsp),%r8 addl 0(%r9),%eax @@ -1373,7 +2657,8 @@ aesni_cbc_sha1_enc_ssse3: movl %ecx,8(%r9) movl %edx,12(%r9) movl %ebp,16(%r9) - movups %xmm11,(%r8) + vmovups %xmm12,(%r8) + vzeroall leaq 104(%rsp),%rsi movq 0(%rsi),%r15 movq 8(%rsi),%r14 @@ -1382,21 +2667,318 @@ aesni_cbc_sha1_enc_ssse3: movq 32(%rsi),%rbp movq 40(%rsi),%rbx leaq 48(%rsi),%rsp -.Lepilogue_ssse3: +.Lepilogue_avx: .byte 0xf3,0xc3 -.size aesni_cbc_sha1_enc_ssse3,.-aesni_cbc_sha1_enc_ssse3 +.size aesni_cbc_sha1_enc_avx,.-aesni_cbc_sha1_enc_avx .align 64 K_XX_XX: .long 0x5a827999,0x5a827999,0x5a827999,0x5a827999 - .long 0x6ed9eba1,0x6ed9eba1,0x6ed9eba1,0x6ed9eba1 - .long 0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc - .long 0xca62c1d6,0xca62c1d6,0xca62c1d6,0xca62c1d6 - .long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f - +.byte 0xf,0xe,0xd,0xc,0xb,0xa,0x9,0x8,0x7,0x6,0x5,0x4,0x3,0x2,0x1,0x0 .byte 65,69,83,78,73,45,67,66,67,43,83,72,65,49,32,115,116,105,116,99,104,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 .align 64 +.type aesni_cbc_sha1_enc_shaext,@function +.align 32 +aesni_cbc_sha1_enc_shaext: + movq 8(%rsp),%r10 + movdqu (%r9),%xmm8 + movd 16(%r9),%xmm9 + movdqa K_XX_XX+80(%rip),%xmm7 + + movl 240(%rcx),%r11d + subq %rdi,%rsi + movups (%rcx),%xmm15 + movups 16(%rcx),%xmm0 + leaq 112(%rcx),%rcx + + pshufd $27,%xmm8,%xmm8 + pshufd $27,%xmm9,%xmm9 + jmp .Loop_shaext + +.align 16 +.Loop_shaext: + movups 0(%rdi),%xmm14 + xorps %xmm15,%xmm14 + xorps %xmm14,%xmm2 + movups -80(%rcx),%xmm1 +.byte 102,15,56,220,208 + movdqu (%r10),%xmm3 + movdqa %xmm9,%xmm12 +.byte 102,15,56,0,223 + movdqu 16(%r10),%xmm4 + movdqa %xmm8,%xmm11 + movups -64(%rcx),%xmm0 +.byte 102,15,56,220,209 +.byte 102,15,56,0,231 + + paddd %xmm3,%xmm9 + movdqu 32(%r10),%xmm5 + leaq 64(%r10),%r10 + pxor %xmm12,%xmm3 + movups -48(%rcx),%xmm1 +.byte 102,15,56,220,208 + pxor %xmm12,%xmm3 + movdqa %xmm8,%xmm10 +.byte 102,15,56,0,239 +.byte 69,15,58,204,193,0 +.byte 68,15,56,200,212 + movups -32(%rcx),%xmm0 +.byte 102,15,56,220,209 +.byte 15,56,201,220 + movdqu -16(%r10),%xmm6 + movdqa %xmm8,%xmm9 +.byte 102,15,56,0,247 + movups -16(%rcx),%xmm1 +.byte 102,15,56,220,208 +.byte 69,15,58,204,194,0 +.byte 68,15,56,200,205 + pxor %xmm5,%xmm3 +.byte 15,56,201,229 + movups 0(%rcx),%xmm0 +.byte 102,15,56,220,209 + movdqa %xmm8,%xmm10 +.byte 69,15,58,204,193,0 +.byte 68,15,56,200,214 + movups 16(%rcx),%xmm1 +.byte 102,15,56,220,208 +.byte 15,56,202,222 + pxor %xmm6,%xmm4 +.byte 15,56,201,238 + movups 32(%rcx),%xmm0 +.byte 102,15,56,220,209 + movdqa %xmm8,%xmm9 +.byte 69,15,58,204,194,0 +.byte 68,15,56,200,203 + movups 48(%rcx),%xmm1 +.byte 102,15,56,220,208 +.byte 15,56,202,227 + pxor %xmm3,%xmm5 +.byte 15,56,201,243 + cmpl $11,%r11d + jb .Laesenclast11 + movups 64(%rcx),%xmm0 +.byte 102,15,56,220,209 + movups 80(%rcx),%xmm1 +.byte 102,15,56,220,208 + je .Laesenclast11 + movups 96(%rcx),%xmm0 +.byte 102,15,56,220,209 + movups 112(%rcx),%xmm1 +.byte 102,15,56,220,208 +.Laesenclast11: +.byte 102,15,56,221,209 + movups 16-112(%rcx),%xmm0 + movdqa %xmm8,%xmm10 +.byte 69,15,58,204,193,0 +.byte 68,15,56,200,212 + movups 16(%rdi),%xmm14 + xorps %xmm15,%xmm14 + movups %xmm2,0(%rsi,%rdi,1) + xorps %xmm14,%xmm2 + movups -80(%rcx),%xmm1 +.byte 102,15,56,220,208 +.byte 15,56,202,236 + pxor %xmm4,%xmm6 +.byte 15,56,201,220 + movups -64(%rcx),%xmm0 +.byte 102,15,56,220,209 + movdqa %xmm8,%xmm9 +.byte 69,15,58,204,194,1 +.byte 68,15,56,200,205 + movups -48(%rcx),%xmm1 +.byte 102,15,56,220,208 +.byte 15,56,202,245 + pxor %xmm5,%xmm3 +.byte 15,56,201,229 + movups -32(%rcx),%xmm0 +.byte 102,15,56,220,209 + movdqa %xmm8,%xmm10 +.byte 69,15,58,204,193,1 +.byte 68,15,56,200,214 + movups -16(%rcx),%xmm1 +.byte 102,15,56,220,208 +.byte 15,56,202,222 + pxor %xmm6,%xmm4 +.byte 15,56,201,238 + movups 0(%rcx),%xmm0 +.byte 102,15,56,220,209 + movdqa %xmm8,%xmm9 +.byte 69,15,58,204,194,1 +.byte 68,15,56,200,203 + movups 16(%rcx),%xmm1 +.byte 102,15,56,220,208 +.byte 15,56,202,227 + pxor %xmm3,%xmm5 +.byte 15,56,201,243 + movups 32(%rcx),%xmm0 +.byte 102,15,56,220,209 + movdqa %xmm8,%xmm10 +.byte 69,15,58,204,193,1 +.byte 68,15,56,200,212 + movups 48(%rcx),%xmm1 +.byte 102,15,56,220,208 +.byte 15,56,202,236 + pxor %xmm4,%xmm6 +.byte 15,56,201,220 + cmpl $11,%r11d + jb .Laesenclast12 + movups 64(%rcx),%xmm0 +.byte 102,15,56,220,209 + movups 80(%rcx),%xmm1 +.byte 102,15,56,220,208 + je .Laesenclast12 + movups 96(%rcx),%xmm0 +.byte 102,15,56,220,209 + movups 112(%rcx),%xmm1 +.byte 102,15,56,220,208 +.Laesenclast12: +.byte 102,15,56,221,209 + movups 16-112(%rcx),%xmm0 + movdqa %xmm8,%xmm9 +.byte 69,15,58,204,194,1 +.byte 68,15,56,200,205 + movups 32(%rdi),%xmm14 + xorps %xmm15,%xmm14 + movups %xmm2,16(%rsi,%rdi,1) + xorps %xmm14,%xmm2 + movups -80(%rcx),%xmm1 +.byte 102,15,56,220,208 +.byte 15,56,202,245 + pxor %xmm5,%xmm3 +.byte 15,56,201,229 + movups -64(%rcx),%xmm0 +.byte 102,15,56,220,209 + movdqa %xmm8,%xmm10 +.byte 69,15,58,204,193,2 +.byte 68,15,56,200,214 + movups -48(%rcx),%xmm1 +.byte 102,15,56,220,208 +.byte 15,56,202,222 + pxor %xmm6,%xmm4 +.byte 15,56,201,238 + movups -32(%rcx),%xmm0 +.byte 102,15,56,220,209 + movdqa %xmm8,%xmm9 +.byte 69,15,58,204,194,2 +.byte 68,15,56,200,203 + movups -16(%rcx),%xmm1 +.byte 102,15,56,220,208 +.byte 15,56,202,227 + pxor %xmm3,%xmm5 +.byte 15,56,201,243 + movups 0(%rcx),%xmm0 +.byte 102,15,56,220,209 + movdqa %xmm8,%xmm10 +.byte 69,15,58,204,193,2 +.byte 68,15,56,200,212 + movups 16(%rcx),%xmm1 +.byte 102,15,56,220,208 +.byte 15,56,202,236 + pxor %xmm4,%xmm6 +.byte 15,56,201,220 + movups 32(%rcx),%xmm0 +.byte 102,15,56,220,209 + movdqa %xmm8,%xmm9 +.byte 69,15,58,204,194,2 +.byte 68,15,56,200,205 + movups 48(%rcx),%xmm1 +.byte 102,15,56,220,208 +.byte 15,56,202,245 + pxor %xmm5,%xmm3 +.byte 15,56,201,229 + cmpl $11,%r11d + jb .Laesenclast13 + movups 64(%rcx),%xmm0 +.byte 102,15,56,220,209 + movups 80(%rcx),%xmm1 +.byte 102,15,56,220,208 + je .Laesenclast13 + movups 96(%rcx),%xmm0 +.byte 102,15,56,220,209 + movups 112(%rcx),%xmm1 +.byte 102,15,56,220,208 +.Laesenclast13: +.byte 102,15,56,221,209 + movups 16-112(%rcx),%xmm0 + movdqa %xmm8,%xmm10 +.byte 69,15,58,204,193,2 +.byte 68,15,56,200,214 + movups 48(%rdi),%xmm14 + xorps %xmm15,%xmm14 + movups %xmm2,32(%rsi,%rdi,1) + xorps %xmm14,%xmm2 + movups -80(%rcx),%xmm1 +.byte 102,15,56,220,208 +.byte 15,56,202,222 + pxor %xmm6,%xmm4 +.byte 15,56,201,238 + movups -64(%rcx),%xmm0 +.byte 102,15,56,220,209 + movdqa %xmm8,%xmm9 +.byte 69,15,58,204,194,3 +.byte 68,15,56,200,203 + movups -48(%rcx),%xmm1 +.byte 102,15,56,220,208 +.byte 15,56,202,227 + pxor %xmm3,%xmm5 +.byte 15,56,201,243 + movups -32(%rcx),%xmm0 +.byte 102,15,56,220,209 + movdqa %xmm8,%xmm10 +.byte 69,15,58,204,193,3 +.byte 68,15,56,200,212 +.byte 15,56,202,236 + pxor %xmm4,%xmm6 + movups -16(%rcx),%xmm1 +.byte 102,15,56,220,208 + movdqa %xmm8,%xmm9 +.byte 69,15,58,204,194,3 +.byte 68,15,56,200,205 +.byte 15,56,202,245 + movups 0(%rcx),%xmm0 +.byte 102,15,56,220,209 + movdqa %xmm12,%xmm5 + movdqa %xmm8,%xmm10 +.byte 69,15,58,204,193,3 +.byte 68,15,56,200,214 + movups 16(%rcx),%xmm1 +.byte 102,15,56,220,208 + movdqa %xmm8,%xmm9 +.byte 69,15,58,204,194,3 +.byte 68,15,56,200,205 + movups 32(%rcx),%xmm0 +.byte 102,15,56,220,209 + movups 48(%rcx),%xmm1 +.byte 102,15,56,220,208 + cmpl $11,%r11d + jb .Laesenclast14 + movups 64(%rcx),%xmm0 +.byte 102,15,56,220,209 + movups 80(%rcx),%xmm1 +.byte 102,15,56,220,208 + je .Laesenclast14 + movups 96(%rcx),%xmm0 +.byte 102,15,56,220,209 + movups 112(%rcx),%xmm1 +.byte 102,15,56,220,208 +.Laesenclast14: +.byte 102,15,56,221,209 + movups 16-112(%rcx),%xmm0 + decq %rdx + + paddd %xmm11,%xmm8 + movups %xmm2,48(%rsi,%rdi,1) + leaq 64(%rdi),%rdi + jnz .Loop_shaext + + pshufd $27,%xmm8,%xmm8 + pshufd $27,%xmm9,%xmm9 + movups %xmm2,(%r8) + movdqu %xmm8,(%r9) + movd %xmm9,16(%r9) + .byte 0xf3,0xc3 +.size aesni_cbc_sha1_enc_shaext,.-aesni_cbc_sha1_enc_shaext diff --git a/deps/openssl/asm/x64-elf-gas/aes/aesni-sha256-x86_64.s b/deps/openssl/asm/x64-elf-gas/aes/aesni-sha256-x86_64.s new file mode 100644 index 00000000000000..14c1f7a7e2660c --- /dev/null +++ b/deps/openssl/asm/x64-elf-gas/aes/aesni-sha256-x86_64.s @@ -0,0 +1,4357 @@ +.text + + +.globl aesni_cbc_sha256_enc +.type aesni_cbc_sha256_enc,@function +.align 16 +aesni_cbc_sha256_enc: + leaq OPENSSL_ia32cap_P(%rip),%r11 + movl $1,%eax + cmpq $0,%rdi + je .Lprobe + movl 0(%r11),%eax + movq 4(%r11),%r10 + btq $61,%r10 + jc aesni_cbc_sha256_enc_shaext + movq %r10,%r11 + shrq $32,%r11 + + testl $2048,%r10d + jnz aesni_cbc_sha256_enc_xop + andl $296,%r11d + cmpl $296,%r11d + je aesni_cbc_sha256_enc_avx2 + andl $1073741824,%eax + andl $268435968,%r10d + orl %eax,%r10d + cmpl $1342177792,%r10d + je aesni_cbc_sha256_enc_avx + ud2 + xorl %eax,%eax + cmpq $0,%rdi + je .Lprobe + ud2 +.Lprobe: + .byte 0xf3,0xc3 +.size aesni_cbc_sha256_enc,.-aesni_cbc_sha256_enc + +.align 64 +.type K256,@object +K256: +.long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 +.long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 +.long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 +.long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 +.long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 +.long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 +.long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 +.long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 +.long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc +.long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc +.long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da +.long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da +.long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 +.long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 +.long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967 +.long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967 +.long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 +.long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 +.long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 +.long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 +.long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 +.long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 +.long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070 +.long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070 +.long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 +.long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 +.long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 +.long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 +.long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 +.long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 +.long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 +.long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 + +.long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f +.long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f +.long 0,0,0,0, 0,0,0,0, -1,-1,-1,-1 +.long 0,0,0,0, 0,0,0,0 +.byte 65,69,83,78,73,45,67,66,67,43,83,72,65,50,53,54,32,115,116,105,116,99,104,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 +.align 64 +.type aesni_cbc_sha256_enc_xop,@function +.align 64 +aesni_cbc_sha256_enc_xop: +.Lxop_shortcut: + movq 8(%rsp),%r10 + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + movq %rsp,%r11 + subq $128,%rsp + andq $-64,%rsp + + shlq $6,%rdx + subq %rdi,%rsi + subq %rdi,%r10 + addq %rdi,%rdx + + + movq %rsi,64+8(%rsp) + movq %rdx,64+16(%rsp) + + movq %r8,64+32(%rsp) + movq %r9,64+40(%rsp) + movq %r10,64+48(%rsp) + movq %r11,64+56(%rsp) +.Lprologue_xop: + vzeroall + + movq %rdi,%r12 + leaq 128(%rcx),%rdi + leaq K256+544(%rip),%r13 + movl 240-128(%rdi),%r14d + movq %r9,%r15 + movq %r10,%rsi + vmovdqu (%r8),%xmm8 + subq $9,%r14 + + movl 0(%r15),%eax + movl 4(%r15),%ebx + movl 8(%r15),%ecx + movl 12(%r15),%edx + movl 16(%r15),%r8d + movl 20(%r15),%r9d + movl 24(%r15),%r10d + movl 28(%r15),%r11d + + vmovdqa 0(%r13,%r14,8),%xmm14 + vmovdqa 16(%r13,%r14,8),%xmm13 + vmovdqa 32(%r13,%r14,8),%xmm12 + vmovdqu 0-128(%rdi),%xmm10 + jmp .Lloop_xop +.align 16 +.Lloop_xop: + vmovdqa K256+512(%rip),%xmm7 + vmovdqu 0(%rsi,%r12,1),%xmm0 + vmovdqu 16(%rsi,%r12,1),%xmm1 + vmovdqu 32(%rsi,%r12,1),%xmm2 + vmovdqu 48(%rsi,%r12,1),%xmm3 + vpshufb %xmm7,%xmm0,%xmm0 + leaq K256(%rip),%rbp + vpshufb %xmm7,%xmm1,%xmm1 + vpshufb %xmm7,%xmm2,%xmm2 + vpaddd 0(%rbp),%xmm0,%xmm4 + vpshufb %xmm7,%xmm3,%xmm3 + vpaddd 32(%rbp),%xmm1,%xmm5 + vpaddd 64(%rbp),%xmm2,%xmm6 + vpaddd 96(%rbp),%xmm3,%xmm7 + vmovdqa %xmm4,0(%rsp) + movl %eax,%r14d + vmovdqa %xmm5,16(%rsp) + movl %ebx,%esi + vmovdqa %xmm6,32(%rsp) + xorl %ecx,%esi + vmovdqa %xmm7,48(%rsp) + movl %r8d,%r13d + jmp .Lxop_00_47 + +.align 16 +.Lxop_00_47: + subq $-32*4,%rbp + vmovdqu (%r12),%xmm9 + movq %r12,64+0(%rsp) + vpalignr $4,%xmm0,%xmm1,%xmm4 + rorl $14,%r13d + movl %r14d,%eax + vpalignr $4,%xmm2,%xmm3,%xmm7 + movl %r9d,%r12d + xorl %r8d,%r13d +.byte 143,232,120,194,236,14 + rorl $9,%r14d + xorl %r10d,%r12d + vpsrld $3,%xmm4,%xmm4 + rorl $5,%r13d + xorl %eax,%r14d + vpaddd %xmm7,%xmm0,%xmm0 + andl %r8d,%r12d + vpxor %xmm10,%xmm9,%xmm9 + vmovdqu 16-128(%rdi),%xmm10 + xorl %r8d,%r13d + addl 0(%rsp),%r11d + movl %eax,%r15d +.byte 143,232,120,194,245,11 + rorl $11,%r14d + xorl %r10d,%r12d + vpxor %xmm5,%xmm4,%xmm4 + xorl %ebx,%r15d + rorl $6,%r13d + addl %r12d,%r11d + andl %r15d,%esi +.byte 143,232,120,194,251,13 + xorl %eax,%r14d + addl %r13d,%r11d + vpxor %xmm6,%xmm4,%xmm4 + xorl %ebx,%esi + addl %r11d,%edx + vpsrld $10,%xmm3,%xmm6 + rorl $2,%r14d + addl %esi,%r11d + vpaddd %xmm4,%xmm0,%xmm0 + movl %edx,%r13d + addl %r11d,%r14d +.byte 143,232,120,194,239,2 + rorl $14,%r13d + movl %r14d,%r11d + vpxor %xmm6,%xmm7,%xmm7 + movl %r8d,%r12d + xorl %edx,%r13d + rorl $9,%r14d + xorl %r9d,%r12d + vpxor %xmm5,%xmm7,%xmm7 + rorl $5,%r13d + xorl %r11d,%r14d + andl %edx,%r12d + vpxor %xmm8,%xmm9,%xmm9 + xorl %edx,%r13d + vpsrldq $8,%xmm7,%xmm7 + addl 4(%rsp),%r10d + movl %r11d,%esi + rorl $11,%r14d + xorl %r9d,%r12d + vpaddd %xmm7,%xmm0,%xmm0 + xorl %eax,%esi + rorl $6,%r13d + addl %r12d,%r10d + andl %esi,%r15d +.byte 143,232,120,194,248,13 + xorl %r11d,%r14d + addl %r13d,%r10d + vpsrld $10,%xmm0,%xmm6 + xorl %eax,%r15d + addl %r10d,%ecx +.byte 143,232,120,194,239,2 + rorl $2,%r14d + addl %r15d,%r10d + vpxor %xmm6,%xmm7,%xmm7 + movl %ecx,%r13d + addl %r10d,%r14d + rorl $14,%r13d + movl %r14d,%r10d + vpxor %xmm5,%xmm7,%xmm7 + movl %edx,%r12d + xorl %ecx,%r13d + rorl $9,%r14d + xorl %r8d,%r12d + vpslldq $8,%xmm7,%xmm7 + rorl $5,%r13d + xorl %r10d,%r14d + andl %ecx,%r12d + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 32-128(%rdi),%xmm10 + xorl %ecx,%r13d + vpaddd %xmm7,%xmm0,%xmm0 + addl 8(%rsp),%r9d + movl %r10d,%r15d + rorl $11,%r14d + xorl %r8d,%r12d + vpaddd 0(%rbp),%xmm0,%xmm6 + xorl %r11d,%r15d + rorl $6,%r13d + addl %r12d,%r9d + andl %r15d,%esi + xorl %r10d,%r14d + addl %r13d,%r9d + xorl %r11d,%esi + addl %r9d,%ebx + rorl $2,%r14d + addl %esi,%r9d + movl %ebx,%r13d + addl %r9d,%r14d + rorl $14,%r13d + movl %r14d,%r9d + movl %ecx,%r12d + xorl %ebx,%r13d + rorl $9,%r14d + xorl %edx,%r12d + rorl $5,%r13d + xorl %r9d,%r14d + andl %ebx,%r12d + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 48-128(%rdi),%xmm10 + xorl %ebx,%r13d + addl 12(%rsp),%r8d + movl %r9d,%esi + rorl $11,%r14d + xorl %edx,%r12d + xorl %r10d,%esi + rorl $6,%r13d + addl %r12d,%r8d + andl %esi,%r15d + xorl %r9d,%r14d + addl %r13d,%r8d + xorl %r10d,%r15d + addl %r8d,%eax + rorl $2,%r14d + addl %r15d,%r8d + movl %eax,%r13d + addl %r8d,%r14d + vmovdqa %xmm6,0(%rsp) + vpalignr $4,%xmm1,%xmm2,%xmm4 + rorl $14,%r13d + movl %r14d,%r8d + vpalignr $4,%xmm3,%xmm0,%xmm7 + movl %ebx,%r12d + xorl %eax,%r13d +.byte 143,232,120,194,236,14 + rorl $9,%r14d + xorl %ecx,%r12d + vpsrld $3,%xmm4,%xmm4 + rorl $5,%r13d + xorl %r8d,%r14d + vpaddd %xmm7,%xmm1,%xmm1 + andl %eax,%r12d + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 64-128(%rdi),%xmm10 + xorl %eax,%r13d + addl 16(%rsp),%edx + movl %r8d,%r15d +.byte 143,232,120,194,245,11 + rorl $11,%r14d + xorl %ecx,%r12d + vpxor %xmm5,%xmm4,%xmm4 + xorl %r9d,%r15d + rorl $6,%r13d + addl %r12d,%edx + andl %r15d,%esi +.byte 143,232,120,194,248,13 + xorl %r8d,%r14d + addl %r13d,%edx + vpxor %xmm6,%xmm4,%xmm4 + xorl %r9d,%esi + addl %edx,%r11d + vpsrld $10,%xmm0,%xmm6 + rorl $2,%r14d + addl %esi,%edx + vpaddd %xmm4,%xmm1,%xmm1 + movl %r11d,%r13d + addl %edx,%r14d +.byte 143,232,120,194,239,2 + rorl $14,%r13d + movl %r14d,%edx + vpxor %xmm6,%xmm7,%xmm7 + movl %eax,%r12d + xorl %r11d,%r13d + rorl $9,%r14d + xorl %ebx,%r12d + vpxor %xmm5,%xmm7,%xmm7 + rorl $5,%r13d + xorl %edx,%r14d + andl %r11d,%r12d + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 80-128(%rdi),%xmm10 + xorl %r11d,%r13d + vpsrldq $8,%xmm7,%xmm7 + addl 20(%rsp),%ecx + movl %edx,%esi + rorl $11,%r14d + xorl %ebx,%r12d + vpaddd %xmm7,%xmm1,%xmm1 + xorl %r8d,%esi + rorl $6,%r13d + addl %r12d,%ecx + andl %esi,%r15d +.byte 143,232,120,194,249,13 + xorl %edx,%r14d + addl %r13d,%ecx + vpsrld $10,%xmm1,%xmm6 + xorl %r8d,%r15d + addl %ecx,%r10d +.byte 143,232,120,194,239,2 + rorl $2,%r14d + addl %r15d,%ecx + vpxor %xmm6,%xmm7,%xmm7 + movl %r10d,%r13d + addl %ecx,%r14d + rorl $14,%r13d + movl %r14d,%ecx + vpxor %xmm5,%xmm7,%xmm7 + movl %r11d,%r12d + xorl %r10d,%r13d + rorl $9,%r14d + xorl %eax,%r12d + vpslldq $8,%xmm7,%xmm7 + rorl $5,%r13d + xorl %ecx,%r14d + andl %r10d,%r12d + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 96-128(%rdi),%xmm10 + xorl %r10d,%r13d + vpaddd %xmm7,%xmm1,%xmm1 + addl 24(%rsp),%ebx + movl %ecx,%r15d + rorl $11,%r14d + xorl %eax,%r12d + vpaddd 32(%rbp),%xmm1,%xmm6 + xorl %edx,%r15d + rorl $6,%r13d + addl %r12d,%ebx + andl %r15d,%esi + xorl %ecx,%r14d + addl %r13d,%ebx + xorl %edx,%esi + addl %ebx,%r9d + rorl $2,%r14d + addl %esi,%ebx + movl %r9d,%r13d + addl %ebx,%r14d + rorl $14,%r13d + movl %r14d,%ebx + movl %r10d,%r12d + xorl %r9d,%r13d + rorl $9,%r14d + xorl %r11d,%r12d + rorl $5,%r13d + xorl %ebx,%r14d + andl %r9d,%r12d + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 112-128(%rdi),%xmm10 + xorl %r9d,%r13d + addl 28(%rsp),%eax + movl %ebx,%esi + rorl $11,%r14d + xorl %r11d,%r12d + xorl %ecx,%esi + rorl $6,%r13d + addl %r12d,%eax + andl %esi,%r15d + xorl %ebx,%r14d + addl %r13d,%eax + xorl %ecx,%r15d + addl %eax,%r8d + rorl $2,%r14d + addl %r15d,%eax + movl %r8d,%r13d + addl %eax,%r14d + vmovdqa %xmm6,16(%rsp) + vpalignr $4,%xmm2,%xmm3,%xmm4 + rorl $14,%r13d + movl %r14d,%eax + vpalignr $4,%xmm0,%xmm1,%xmm7 + movl %r9d,%r12d + xorl %r8d,%r13d +.byte 143,232,120,194,236,14 + rorl $9,%r14d + xorl %r10d,%r12d + vpsrld $3,%xmm4,%xmm4 + rorl $5,%r13d + xorl %eax,%r14d + vpaddd %xmm7,%xmm2,%xmm2 + andl %r8d,%r12d + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 128-128(%rdi),%xmm10 + xorl %r8d,%r13d + addl 32(%rsp),%r11d + movl %eax,%r15d +.byte 143,232,120,194,245,11 + rorl $11,%r14d + xorl %r10d,%r12d + vpxor %xmm5,%xmm4,%xmm4 + xorl %ebx,%r15d + rorl $6,%r13d + addl %r12d,%r11d + andl %r15d,%esi +.byte 143,232,120,194,249,13 + xorl %eax,%r14d + addl %r13d,%r11d + vpxor %xmm6,%xmm4,%xmm4 + xorl %ebx,%esi + addl %r11d,%edx + vpsrld $10,%xmm1,%xmm6 + rorl $2,%r14d + addl %esi,%r11d + vpaddd %xmm4,%xmm2,%xmm2 + movl %edx,%r13d + addl %r11d,%r14d +.byte 143,232,120,194,239,2 + rorl $14,%r13d + movl %r14d,%r11d + vpxor %xmm6,%xmm7,%xmm7 + movl %r8d,%r12d + xorl %edx,%r13d + rorl $9,%r14d + xorl %r9d,%r12d + vpxor %xmm5,%xmm7,%xmm7 + rorl $5,%r13d + xorl %r11d,%r14d + andl %edx,%r12d + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 144-128(%rdi),%xmm10 + xorl %edx,%r13d + vpsrldq $8,%xmm7,%xmm7 + addl 36(%rsp),%r10d + movl %r11d,%esi + rorl $11,%r14d + xorl %r9d,%r12d + vpaddd %xmm7,%xmm2,%xmm2 + xorl %eax,%esi + rorl $6,%r13d + addl %r12d,%r10d + andl %esi,%r15d +.byte 143,232,120,194,250,13 + xorl %r11d,%r14d + addl %r13d,%r10d + vpsrld $10,%xmm2,%xmm6 + xorl %eax,%r15d + addl %r10d,%ecx +.byte 143,232,120,194,239,2 + rorl $2,%r14d + addl %r15d,%r10d + vpxor %xmm6,%xmm7,%xmm7 + movl %ecx,%r13d + addl %r10d,%r14d + rorl $14,%r13d + movl %r14d,%r10d + vpxor %xmm5,%xmm7,%xmm7 + movl %edx,%r12d + xorl %ecx,%r13d + rorl $9,%r14d + xorl %r8d,%r12d + vpslldq $8,%xmm7,%xmm7 + rorl $5,%r13d + xorl %r10d,%r14d + andl %ecx,%r12d + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 160-128(%rdi),%xmm10 + xorl %ecx,%r13d + vpaddd %xmm7,%xmm2,%xmm2 + addl 40(%rsp),%r9d + movl %r10d,%r15d + rorl $11,%r14d + xorl %r8d,%r12d + vpaddd 64(%rbp),%xmm2,%xmm6 + xorl %r11d,%r15d + rorl $6,%r13d + addl %r12d,%r9d + andl %r15d,%esi + xorl %r10d,%r14d + addl %r13d,%r9d + xorl %r11d,%esi + addl %r9d,%ebx + rorl $2,%r14d + addl %esi,%r9d + movl %ebx,%r13d + addl %r9d,%r14d + rorl $14,%r13d + movl %r14d,%r9d + movl %ecx,%r12d + xorl %ebx,%r13d + rorl $9,%r14d + xorl %edx,%r12d + rorl $5,%r13d + xorl %r9d,%r14d + andl %ebx,%r12d + vaesenclast %xmm10,%xmm9,%xmm11 + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 176-128(%rdi),%xmm10 + xorl %ebx,%r13d + addl 44(%rsp),%r8d + movl %r9d,%esi + rorl $11,%r14d + xorl %edx,%r12d + xorl %r10d,%esi + rorl $6,%r13d + addl %r12d,%r8d + andl %esi,%r15d + xorl %r9d,%r14d + addl %r13d,%r8d + xorl %r10d,%r15d + addl %r8d,%eax + rorl $2,%r14d + addl %r15d,%r8d + movl %eax,%r13d + addl %r8d,%r14d + vmovdqa %xmm6,32(%rsp) + vpalignr $4,%xmm3,%xmm0,%xmm4 + rorl $14,%r13d + movl %r14d,%r8d + vpalignr $4,%xmm1,%xmm2,%xmm7 + movl %ebx,%r12d + xorl %eax,%r13d +.byte 143,232,120,194,236,14 + rorl $9,%r14d + xorl %ecx,%r12d + vpsrld $3,%xmm4,%xmm4 + rorl $5,%r13d + xorl %r8d,%r14d + vpaddd %xmm7,%xmm3,%xmm3 + andl %eax,%r12d + vpand %xmm12,%xmm11,%xmm8 + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 192-128(%rdi),%xmm10 + xorl %eax,%r13d + addl 48(%rsp),%edx + movl %r8d,%r15d +.byte 143,232,120,194,245,11 + rorl $11,%r14d + xorl %ecx,%r12d + vpxor %xmm5,%xmm4,%xmm4 + xorl %r9d,%r15d + rorl $6,%r13d + addl %r12d,%edx + andl %r15d,%esi +.byte 143,232,120,194,250,13 + xorl %r8d,%r14d + addl %r13d,%edx + vpxor %xmm6,%xmm4,%xmm4 + xorl %r9d,%esi + addl %edx,%r11d + vpsrld $10,%xmm2,%xmm6 + rorl $2,%r14d + addl %esi,%edx + vpaddd %xmm4,%xmm3,%xmm3 + movl %r11d,%r13d + addl %edx,%r14d +.byte 143,232,120,194,239,2 + rorl $14,%r13d + movl %r14d,%edx + vpxor %xmm6,%xmm7,%xmm7 + movl %eax,%r12d + xorl %r11d,%r13d + rorl $9,%r14d + xorl %ebx,%r12d + vpxor %xmm5,%xmm7,%xmm7 + rorl $5,%r13d + xorl %edx,%r14d + andl %r11d,%r12d + vaesenclast %xmm10,%xmm9,%xmm11 + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 208-128(%rdi),%xmm10 + xorl %r11d,%r13d + vpsrldq $8,%xmm7,%xmm7 + addl 52(%rsp),%ecx + movl %edx,%esi + rorl $11,%r14d + xorl %ebx,%r12d + vpaddd %xmm7,%xmm3,%xmm3 + xorl %r8d,%esi + rorl $6,%r13d + addl %r12d,%ecx + andl %esi,%r15d +.byte 143,232,120,194,251,13 + xorl %edx,%r14d + addl %r13d,%ecx + vpsrld $10,%xmm3,%xmm6 + xorl %r8d,%r15d + addl %ecx,%r10d +.byte 143,232,120,194,239,2 + rorl $2,%r14d + addl %r15d,%ecx + vpxor %xmm6,%xmm7,%xmm7 + movl %r10d,%r13d + addl %ecx,%r14d + rorl $14,%r13d + movl %r14d,%ecx + vpxor %xmm5,%xmm7,%xmm7 + movl %r11d,%r12d + xorl %r10d,%r13d + rorl $9,%r14d + xorl %eax,%r12d + vpslldq $8,%xmm7,%xmm7 + rorl $5,%r13d + xorl %ecx,%r14d + andl %r10d,%r12d + vpand %xmm13,%xmm11,%xmm11 + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 224-128(%rdi),%xmm10 + xorl %r10d,%r13d + vpaddd %xmm7,%xmm3,%xmm3 + addl 56(%rsp),%ebx + movl %ecx,%r15d + rorl $11,%r14d + xorl %eax,%r12d + vpaddd 96(%rbp),%xmm3,%xmm6 + xorl %edx,%r15d + rorl $6,%r13d + addl %r12d,%ebx + andl %r15d,%esi + xorl %ecx,%r14d + addl %r13d,%ebx + xorl %edx,%esi + addl %ebx,%r9d + rorl $2,%r14d + addl %esi,%ebx + movl %r9d,%r13d + addl %ebx,%r14d + rorl $14,%r13d + movl %r14d,%ebx + movl %r10d,%r12d + xorl %r9d,%r13d + rorl $9,%r14d + xorl %r11d,%r12d + rorl $5,%r13d + xorl %ebx,%r14d + andl %r9d,%r12d + vpor %xmm11,%xmm8,%xmm8 + vaesenclast %xmm10,%xmm9,%xmm11 + vmovdqu 0-128(%rdi),%xmm10 + xorl %r9d,%r13d + addl 60(%rsp),%eax + movl %ebx,%esi + rorl $11,%r14d + xorl %r11d,%r12d + xorl %ecx,%esi + rorl $6,%r13d + addl %r12d,%eax + andl %esi,%r15d + xorl %ebx,%r14d + addl %r13d,%eax + xorl %ecx,%r15d + addl %eax,%r8d + rorl $2,%r14d + addl %r15d,%eax + movl %r8d,%r13d + addl %eax,%r14d + vmovdqa %xmm6,48(%rsp) + movq 64+0(%rsp),%r12 + vpand %xmm14,%xmm11,%xmm11 + movq 64+8(%rsp),%r15 + vpor %xmm11,%xmm8,%xmm8 + vmovdqu %xmm8,(%r15,%r12,1) + leaq 16(%r12),%r12 + cmpb $0,131(%rbp) + jne .Lxop_00_47 + vmovdqu (%r12),%xmm9 + movq %r12,64+0(%rsp) + rorl $14,%r13d + movl %r14d,%eax + movl %r9d,%r12d + xorl %r8d,%r13d + rorl $9,%r14d + xorl %r10d,%r12d + rorl $5,%r13d + xorl %eax,%r14d + andl %r8d,%r12d + vpxor %xmm10,%xmm9,%xmm9 + vmovdqu 16-128(%rdi),%xmm10 + xorl %r8d,%r13d + addl 0(%rsp),%r11d + movl %eax,%r15d + rorl $11,%r14d + xorl %r10d,%r12d + xorl %ebx,%r15d + rorl $6,%r13d + addl %r12d,%r11d + andl %r15d,%esi + xorl %eax,%r14d + addl %r13d,%r11d + xorl %ebx,%esi + addl %r11d,%edx + rorl $2,%r14d + addl %esi,%r11d + movl %edx,%r13d + addl %r11d,%r14d + rorl $14,%r13d + movl %r14d,%r11d + movl %r8d,%r12d + xorl %edx,%r13d + rorl $9,%r14d + xorl %r9d,%r12d + rorl $5,%r13d + xorl %r11d,%r14d + andl %edx,%r12d + vpxor %xmm8,%xmm9,%xmm9 + xorl %edx,%r13d + addl 4(%rsp),%r10d + movl %r11d,%esi + rorl $11,%r14d + xorl %r9d,%r12d + xorl %eax,%esi + rorl $6,%r13d + addl %r12d,%r10d + andl %esi,%r15d + xorl %r11d,%r14d + addl %r13d,%r10d + xorl %eax,%r15d + addl %r10d,%ecx + rorl $2,%r14d + addl %r15d,%r10d + movl %ecx,%r13d + addl %r10d,%r14d + rorl $14,%r13d + movl %r14d,%r10d + movl %edx,%r12d + xorl %ecx,%r13d + rorl $9,%r14d + xorl %r8d,%r12d + rorl $5,%r13d + xorl %r10d,%r14d + andl %ecx,%r12d + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 32-128(%rdi),%xmm10 + xorl %ecx,%r13d + addl 8(%rsp),%r9d + movl %r10d,%r15d + rorl $11,%r14d + xorl %r8d,%r12d + xorl %r11d,%r15d + rorl $6,%r13d + addl %r12d,%r9d + andl %r15d,%esi + xorl %r10d,%r14d + addl %r13d,%r9d + xorl %r11d,%esi + addl %r9d,%ebx + rorl $2,%r14d + addl %esi,%r9d + movl %ebx,%r13d + addl %r9d,%r14d + rorl $14,%r13d + movl %r14d,%r9d + movl %ecx,%r12d + xorl %ebx,%r13d + rorl $9,%r14d + xorl %edx,%r12d + rorl $5,%r13d + xorl %r9d,%r14d + andl %ebx,%r12d + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 48-128(%rdi),%xmm10 + xorl %ebx,%r13d + addl 12(%rsp),%r8d + movl %r9d,%esi + rorl $11,%r14d + xorl %edx,%r12d + xorl %r10d,%esi + rorl $6,%r13d + addl %r12d,%r8d + andl %esi,%r15d + xorl %r9d,%r14d + addl %r13d,%r8d + xorl %r10d,%r15d + addl %r8d,%eax + rorl $2,%r14d + addl %r15d,%r8d + movl %eax,%r13d + addl %r8d,%r14d + rorl $14,%r13d + movl %r14d,%r8d + movl %ebx,%r12d + xorl %eax,%r13d + rorl $9,%r14d + xorl %ecx,%r12d + rorl $5,%r13d + xorl %r8d,%r14d + andl %eax,%r12d + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 64-128(%rdi),%xmm10 + xorl %eax,%r13d + addl 16(%rsp),%edx + movl %r8d,%r15d + rorl $11,%r14d + xorl %ecx,%r12d + xorl %r9d,%r15d + rorl $6,%r13d + addl %r12d,%edx + andl %r15d,%esi + xorl %r8d,%r14d + addl %r13d,%edx + xorl %r9d,%esi + addl %edx,%r11d + rorl $2,%r14d + addl %esi,%edx + movl %r11d,%r13d + addl %edx,%r14d + rorl $14,%r13d + movl %r14d,%edx + movl %eax,%r12d + xorl %r11d,%r13d + rorl $9,%r14d + xorl %ebx,%r12d + rorl $5,%r13d + xorl %edx,%r14d + andl %r11d,%r12d + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 80-128(%rdi),%xmm10 + xorl %r11d,%r13d + addl 20(%rsp),%ecx + movl %edx,%esi + rorl $11,%r14d + xorl %ebx,%r12d + xorl %r8d,%esi + rorl $6,%r13d + addl %r12d,%ecx + andl %esi,%r15d + xorl %edx,%r14d + addl %r13d,%ecx + xorl %r8d,%r15d + addl %ecx,%r10d + rorl $2,%r14d + addl %r15d,%ecx + movl %r10d,%r13d + addl %ecx,%r14d + rorl $14,%r13d + movl %r14d,%ecx + movl %r11d,%r12d + xorl %r10d,%r13d + rorl $9,%r14d + xorl %eax,%r12d + rorl $5,%r13d + xorl %ecx,%r14d + andl %r10d,%r12d + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 96-128(%rdi),%xmm10 + xorl %r10d,%r13d + addl 24(%rsp),%ebx + movl %ecx,%r15d + rorl $11,%r14d + xorl %eax,%r12d + xorl %edx,%r15d + rorl $6,%r13d + addl %r12d,%ebx + andl %r15d,%esi + xorl %ecx,%r14d + addl %r13d,%ebx + xorl %edx,%esi + addl %ebx,%r9d + rorl $2,%r14d + addl %esi,%ebx + movl %r9d,%r13d + addl %ebx,%r14d + rorl $14,%r13d + movl %r14d,%ebx + movl %r10d,%r12d + xorl %r9d,%r13d + rorl $9,%r14d + xorl %r11d,%r12d + rorl $5,%r13d + xorl %ebx,%r14d + andl %r9d,%r12d + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 112-128(%rdi),%xmm10 + xorl %r9d,%r13d + addl 28(%rsp),%eax + movl %ebx,%esi + rorl $11,%r14d + xorl %r11d,%r12d + xorl %ecx,%esi + rorl $6,%r13d + addl %r12d,%eax + andl %esi,%r15d + xorl %ebx,%r14d + addl %r13d,%eax + xorl %ecx,%r15d + addl %eax,%r8d + rorl $2,%r14d + addl %r15d,%eax + movl %r8d,%r13d + addl %eax,%r14d + rorl $14,%r13d + movl %r14d,%eax + movl %r9d,%r12d + xorl %r8d,%r13d + rorl $9,%r14d + xorl %r10d,%r12d + rorl $5,%r13d + xorl %eax,%r14d + andl %r8d,%r12d + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 128-128(%rdi),%xmm10 + xorl %r8d,%r13d + addl 32(%rsp),%r11d + movl %eax,%r15d + rorl $11,%r14d + xorl %r10d,%r12d + xorl %ebx,%r15d + rorl $6,%r13d + addl %r12d,%r11d + andl %r15d,%esi + xorl %eax,%r14d + addl %r13d,%r11d + xorl %ebx,%esi + addl %r11d,%edx + rorl $2,%r14d + addl %esi,%r11d + movl %edx,%r13d + addl %r11d,%r14d + rorl $14,%r13d + movl %r14d,%r11d + movl %r8d,%r12d + xorl %edx,%r13d + rorl $9,%r14d + xorl %r9d,%r12d + rorl $5,%r13d + xorl %r11d,%r14d + andl %edx,%r12d + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 144-128(%rdi),%xmm10 + xorl %edx,%r13d + addl 36(%rsp),%r10d + movl %r11d,%esi + rorl $11,%r14d + xorl %r9d,%r12d + xorl %eax,%esi + rorl $6,%r13d + addl %r12d,%r10d + andl %esi,%r15d + xorl %r11d,%r14d + addl %r13d,%r10d + xorl %eax,%r15d + addl %r10d,%ecx + rorl $2,%r14d + addl %r15d,%r10d + movl %ecx,%r13d + addl %r10d,%r14d + rorl $14,%r13d + movl %r14d,%r10d + movl %edx,%r12d + xorl %ecx,%r13d + rorl $9,%r14d + xorl %r8d,%r12d + rorl $5,%r13d + xorl %r10d,%r14d + andl %ecx,%r12d + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 160-128(%rdi),%xmm10 + xorl %ecx,%r13d + addl 40(%rsp),%r9d + movl %r10d,%r15d + rorl $11,%r14d + xorl %r8d,%r12d + xorl %r11d,%r15d + rorl $6,%r13d + addl %r12d,%r9d + andl %r15d,%esi + xorl %r10d,%r14d + addl %r13d,%r9d + xorl %r11d,%esi + addl %r9d,%ebx + rorl $2,%r14d + addl %esi,%r9d + movl %ebx,%r13d + addl %r9d,%r14d + rorl $14,%r13d + movl %r14d,%r9d + movl %ecx,%r12d + xorl %ebx,%r13d + rorl $9,%r14d + xorl %edx,%r12d + rorl $5,%r13d + xorl %r9d,%r14d + andl %ebx,%r12d + vaesenclast %xmm10,%xmm9,%xmm11 + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 176-128(%rdi),%xmm10 + xorl %ebx,%r13d + addl 44(%rsp),%r8d + movl %r9d,%esi + rorl $11,%r14d + xorl %edx,%r12d + xorl %r10d,%esi + rorl $6,%r13d + addl %r12d,%r8d + andl %esi,%r15d + xorl %r9d,%r14d + addl %r13d,%r8d + xorl %r10d,%r15d + addl %r8d,%eax + rorl $2,%r14d + addl %r15d,%r8d + movl %eax,%r13d + addl %r8d,%r14d + rorl $14,%r13d + movl %r14d,%r8d + movl %ebx,%r12d + xorl %eax,%r13d + rorl $9,%r14d + xorl %ecx,%r12d + rorl $5,%r13d + xorl %r8d,%r14d + andl %eax,%r12d + vpand %xmm12,%xmm11,%xmm8 + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 192-128(%rdi),%xmm10 + xorl %eax,%r13d + addl 48(%rsp),%edx + movl %r8d,%r15d + rorl $11,%r14d + xorl %ecx,%r12d + xorl %r9d,%r15d + rorl $6,%r13d + addl %r12d,%edx + andl %r15d,%esi + xorl %r8d,%r14d + addl %r13d,%edx + xorl %r9d,%esi + addl %edx,%r11d + rorl $2,%r14d + addl %esi,%edx + movl %r11d,%r13d + addl %edx,%r14d + rorl $14,%r13d + movl %r14d,%edx + movl %eax,%r12d + xorl %r11d,%r13d + rorl $9,%r14d + xorl %ebx,%r12d + rorl $5,%r13d + xorl %edx,%r14d + andl %r11d,%r12d + vaesenclast %xmm10,%xmm9,%xmm11 + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 208-128(%rdi),%xmm10 + xorl %r11d,%r13d + addl 52(%rsp),%ecx + movl %edx,%esi + rorl $11,%r14d + xorl %ebx,%r12d + xorl %r8d,%esi + rorl $6,%r13d + addl %r12d,%ecx + andl %esi,%r15d + xorl %edx,%r14d + addl %r13d,%ecx + xorl %r8d,%r15d + addl %ecx,%r10d + rorl $2,%r14d + addl %r15d,%ecx + movl %r10d,%r13d + addl %ecx,%r14d + rorl $14,%r13d + movl %r14d,%ecx + movl %r11d,%r12d + xorl %r10d,%r13d + rorl $9,%r14d + xorl %eax,%r12d + rorl $5,%r13d + xorl %ecx,%r14d + andl %r10d,%r12d + vpand %xmm13,%xmm11,%xmm11 + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 224-128(%rdi),%xmm10 + xorl %r10d,%r13d + addl 56(%rsp),%ebx + movl %ecx,%r15d + rorl $11,%r14d + xorl %eax,%r12d + xorl %edx,%r15d + rorl $6,%r13d + addl %r12d,%ebx + andl %r15d,%esi + xorl %ecx,%r14d + addl %r13d,%ebx + xorl %edx,%esi + addl %ebx,%r9d + rorl $2,%r14d + addl %esi,%ebx + movl %r9d,%r13d + addl %ebx,%r14d + rorl $14,%r13d + movl %r14d,%ebx + movl %r10d,%r12d + xorl %r9d,%r13d + rorl $9,%r14d + xorl %r11d,%r12d + rorl $5,%r13d + xorl %ebx,%r14d + andl %r9d,%r12d + vpor %xmm11,%xmm8,%xmm8 + vaesenclast %xmm10,%xmm9,%xmm11 + vmovdqu 0-128(%rdi),%xmm10 + xorl %r9d,%r13d + addl 60(%rsp),%eax + movl %ebx,%esi + rorl $11,%r14d + xorl %r11d,%r12d + xorl %ecx,%esi + rorl $6,%r13d + addl %r12d,%eax + andl %esi,%r15d + xorl %ebx,%r14d + addl %r13d,%eax + xorl %ecx,%r15d + addl %eax,%r8d + rorl $2,%r14d + addl %r15d,%eax + movl %r8d,%r13d + addl %eax,%r14d + movq 64+0(%rsp),%r12 + movq 64+8(%rsp),%r13 + movq 64+40(%rsp),%r15 + movq 64+48(%rsp),%rsi + + vpand %xmm14,%xmm11,%xmm11 + movl %r14d,%eax + vpor %xmm11,%xmm8,%xmm8 + vmovdqu %xmm8,(%r12,%r13,1) + leaq 16(%r12),%r12 + + addl 0(%r15),%eax + addl 4(%r15),%ebx + addl 8(%r15),%ecx + addl 12(%r15),%edx + addl 16(%r15),%r8d + addl 20(%r15),%r9d + addl 24(%r15),%r10d + addl 28(%r15),%r11d + + cmpq 64+16(%rsp),%r12 + + movl %eax,0(%r15) + movl %ebx,4(%r15) + movl %ecx,8(%r15) + movl %edx,12(%r15) + movl %r8d,16(%r15) + movl %r9d,20(%r15) + movl %r10d,24(%r15) + movl %r11d,28(%r15) + + jb .Lloop_xop + + movq 64+32(%rsp),%r8 + movq 64+56(%rsp),%rsi + vmovdqu %xmm8,(%r8) + vzeroall + movq (%rsi),%r15 + movq 8(%rsi),%r14 + movq 16(%rsi),%r13 + movq 24(%rsi),%r12 + movq 32(%rsi),%rbp + movq 40(%rsi),%rbx + leaq 48(%rsi),%rsp +.Lepilogue_xop: + .byte 0xf3,0xc3 +.size aesni_cbc_sha256_enc_xop,.-aesni_cbc_sha256_enc_xop +.type aesni_cbc_sha256_enc_avx,@function +.align 64 +aesni_cbc_sha256_enc_avx: +.Lavx_shortcut: + movq 8(%rsp),%r10 + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + movq %rsp,%r11 + subq $128,%rsp + andq $-64,%rsp + + shlq $6,%rdx + subq %rdi,%rsi + subq %rdi,%r10 + addq %rdi,%rdx + + + movq %rsi,64+8(%rsp) + movq %rdx,64+16(%rsp) + + movq %r8,64+32(%rsp) + movq %r9,64+40(%rsp) + movq %r10,64+48(%rsp) + movq %r11,64+56(%rsp) +.Lprologue_avx: + vzeroall + + movq %rdi,%r12 + leaq 128(%rcx),%rdi + leaq K256+544(%rip),%r13 + movl 240-128(%rdi),%r14d + movq %r9,%r15 + movq %r10,%rsi + vmovdqu (%r8),%xmm8 + subq $9,%r14 + + movl 0(%r15),%eax + movl 4(%r15),%ebx + movl 8(%r15),%ecx + movl 12(%r15),%edx + movl 16(%r15),%r8d + movl 20(%r15),%r9d + movl 24(%r15),%r10d + movl 28(%r15),%r11d + + vmovdqa 0(%r13,%r14,8),%xmm14 + vmovdqa 16(%r13,%r14,8),%xmm13 + vmovdqa 32(%r13,%r14,8),%xmm12 + vmovdqu 0-128(%rdi),%xmm10 + jmp .Lloop_avx +.align 16 +.Lloop_avx: + vmovdqa K256+512(%rip),%xmm7 + vmovdqu 0(%rsi,%r12,1),%xmm0 + vmovdqu 16(%rsi,%r12,1),%xmm1 + vmovdqu 32(%rsi,%r12,1),%xmm2 + vmovdqu 48(%rsi,%r12,1),%xmm3 + vpshufb %xmm7,%xmm0,%xmm0 + leaq K256(%rip),%rbp + vpshufb %xmm7,%xmm1,%xmm1 + vpshufb %xmm7,%xmm2,%xmm2 + vpaddd 0(%rbp),%xmm0,%xmm4 + vpshufb %xmm7,%xmm3,%xmm3 + vpaddd 32(%rbp),%xmm1,%xmm5 + vpaddd 64(%rbp),%xmm2,%xmm6 + vpaddd 96(%rbp),%xmm3,%xmm7 + vmovdqa %xmm4,0(%rsp) + movl %eax,%r14d + vmovdqa %xmm5,16(%rsp) + movl %ebx,%esi + vmovdqa %xmm6,32(%rsp) + xorl %ecx,%esi + vmovdqa %xmm7,48(%rsp) + movl %r8d,%r13d + jmp .Lavx_00_47 + +.align 16 +.Lavx_00_47: + subq $-32*4,%rbp + vmovdqu (%r12),%xmm9 + movq %r12,64+0(%rsp) + vpalignr $4,%xmm0,%xmm1,%xmm4 + shrdl $14,%r13d,%r13d + movl %r14d,%eax + movl %r9d,%r12d + vpalignr $4,%xmm2,%xmm3,%xmm7 + xorl %r8d,%r13d + shrdl $9,%r14d,%r14d + xorl %r10d,%r12d + vpsrld $7,%xmm4,%xmm6 + shrdl $5,%r13d,%r13d + xorl %eax,%r14d + andl %r8d,%r12d + vpaddd %xmm7,%xmm0,%xmm0 + vpxor %xmm10,%xmm9,%xmm9 + vmovdqu 16-128(%rdi),%xmm10 + xorl %r8d,%r13d + addl 0(%rsp),%r11d + movl %eax,%r15d + vpsrld $3,%xmm4,%xmm7 + shrdl $11,%r14d,%r14d + xorl %r10d,%r12d + xorl %ebx,%r15d + vpslld $14,%xmm4,%xmm5 + shrdl $6,%r13d,%r13d + addl %r12d,%r11d + andl %r15d,%esi + vpxor %xmm6,%xmm7,%xmm4 + xorl %eax,%r14d + addl %r13d,%r11d + xorl %ebx,%esi + vpshufd $250,%xmm3,%xmm7 + addl %r11d,%edx + shrdl $2,%r14d,%r14d + addl %esi,%r11d + vpsrld $11,%xmm6,%xmm6 + movl %edx,%r13d + addl %r11d,%r14d + shrdl $14,%r13d,%r13d + vpxor %xmm5,%xmm4,%xmm4 + movl %r14d,%r11d + movl %r8d,%r12d + xorl %edx,%r13d + vpslld $11,%xmm5,%xmm5 + shrdl $9,%r14d,%r14d + xorl %r9d,%r12d + shrdl $5,%r13d,%r13d + vpxor %xmm6,%xmm4,%xmm4 + xorl %r11d,%r14d + andl %edx,%r12d + vpxor %xmm8,%xmm9,%xmm9 + xorl %edx,%r13d + vpsrld $10,%xmm7,%xmm6 + addl 4(%rsp),%r10d + movl %r11d,%esi + shrdl $11,%r14d,%r14d + vpxor %xmm5,%xmm4,%xmm4 + xorl %r9d,%r12d + xorl %eax,%esi + shrdl $6,%r13d,%r13d + vpsrlq $17,%xmm7,%xmm7 + addl %r12d,%r10d + andl %esi,%r15d + xorl %r11d,%r14d + vpaddd %xmm4,%xmm0,%xmm0 + addl %r13d,%r10d + xorl %eax,%r15d + addl %r10d,%ecx + vpxor %xmm7,%xmm6,%xmm6 + shrdl $2,%r14d,%r14d + addl %r15d,%r10d + movl %ecx,%r13d + vpsrlq $2,%xmm7,%xmm7 + addl %r10d,%r14d + shrdl $14,%r13d,%r13d + movl %r14d,%r10d + vpxor %xmm7,%xmm6,%xmm6 + movl %edx,%r12d + xorl %ecx,%r13d + shrdl $9,%r14d,%r14d + vpshufd $132,%xmm6,%xmm6 + xorl %r8d,%r12d + shrdl $5,%r13d,%r13d + xorl %r10d,%r14d + vpsrldq $8,%xmm6,%xmm6 + andl %ecx,%r12d + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 32-128(%rdi),%xmm10 + xorl %ecx,%r13d + addl 8(%rsp),%r9d + vpaddd %xmm6,%xmm0,%xmm0 + movl %r10d,%r15d + shrdl $11,%r14d,%r14d + xorl %r8d,%r12d + vpshufd $80,%xmm0,%xmm7 + xorl %r11d,%r15d + shrdl $6,%r13d,%r13d + addl %r12d,%r9d + vpsrld $10,%xmm7,%xmm6 + andl %r15d,%esi + xorl %r10d,%r14d + addl %r13d,%r9d + vpsrlq $17,%xmm7,%xmm7 + xorl %r11d,%esi + addl %r9d,%ebx + shrdl $2,%r14d,%r14d + vpxor %xmm7,%xmm6,%xmm6 + addl %esi,%r9d + movl %ebx,%r13d + addl %r9d,%r14d + vpsrlq $2,%xmm7,%xmm7 + shrdl $14,%r13d,%r13d + movl %r14d,%r9d + movl %ecx,%r12d + vpxor %xmm7,%xmm6,%xmm6 + xorl %ebx,%r13d + shrdl $9,%r14d,%r14d + xorl %edx,%r12d + vpshufd $232,%xmm6,%xmm6 + shrdl $5,%r13d,%r13d + xorl %r9d,%r14d + andl %ebx,%r12d + vpslldq $8,%xmm6,%xmm6 + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 48-128(%rdi),%xmm10 + xorl %ebx,%r13d + addl 12(%rsp),%r8d + movl %r9d,%esi + vpaddd %xmm6,%xmm0,%xmm0 + shrdl $11,%r14d,%r14d + xorl %edx,%r12d + xorl %r10d,%esi + vpaddd 0(%rbp),%xmm0,%xmm6 + shrdl $6,%r13d,%r13d + addl %r12d,%r8d + andl %esi,%r15d + xorl %r9d,%r14d + addl %r13d,%r8d + xorl %r10d,%r15d + addl %r8d,%eax + shrdl $2,%r14d,%r14d + addl %r15d,%r8d + movl %eax,%r13d + addl %r8d,%r14d + vmovdqa %xmm6,0(%rsp) + vpalignr $4,%xmm1,%xmm2,%xmm4 + shrdl $14,%r13d,%r13d + movl %r14d,%r8d + movl %ebx,%r12d + vpalignr $4,%xmm3,%xmm0,%xmm7 + xorl %eax,%r13d + shrdl $9,%r14d,%r14d + xorl %ecx,%r12d + vpsrld $7,%xmm4,%xmm6 + shrdl $5,%r13d,%r13d + xorl %r8d,%r14d + andl %eax,%r12d + vpaddd %xmm7,%xmm1,%xmm1 + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 64-128(%rdi),%xmm10 + xorl %eax,%r13d + addl 16(%rsp),%edx + movl %r8d,%r15d + vpsrld $3,%xmm4,%xmm7 + shrdl $11,%r14d,%r14d + xorl %ecx,%r12d + xorl %r9d,%r15d + vpslld $14,%xmm4,%xmm5 + shrdl $6,%r13d,%r13d + addl %r12d,%edx + andl %r15d,%esi + vpxor %xmm6,%xmm7,%xmm4 + xorl %r8d,%r14d + addl %r13d,%edx + xorl %r9d,%esi + vpshufd $250,%xmm0,%xmm7 + addl %edx,%r11d + shrdl $2,%r14d,%r14d + addl %esi,%edx + vpsrld $11,%xmm6,%xmm6 + movl %r11d,%r13d + addl %edx,%r14d + shrdl $14,%r13d,%r13d + vpxor %xmm5,%xmm4,%xmm4 + movl %r14d,%edx + movl %eax,%r12d + xorl %r11d,%r13d + vpslld $11,%xmm5,%xmm5 + shrdl $9,%r14d,%r14d + xorl %ebx,%r12d + shrdl $5,%r13d,%r13d + vpxor %xmm6,%xmm4,%xmm4 + xorl %edx,%r14d + andl %r11d,%r12d + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 80-128(%rdi),%xmm10 + xorl %r11d,%r13d + vpsrld $10,%xmm7,%xmm6 + addl 20(%rsp),%ecx + movl %edx,%esi + shrdl $11,%r14d,%r14d + vpxor %xmm5,%xmm4,%xmm4 + xorl %ebx,%r12d + xorl %r8d,%esi + shrdl $6,%r13d,%r13d + vpsrlq $17,%xmm7,%xmm7 + addl %r12d,%ecx + andl %esi,%r15d + xorl %edx,%r14d + vpaddd %xmm4,%xmm1,%xmm1 + addl %r13d,%ecx + xorl %r8d,%r15d + addl %ecx,%r10d + vpxor %xmm7,%xmm6,%xmm6 + shrdl $2,%r14d,%r14d + addl %r15d,%ecx + movl %r10d,%r13d + vpsrlq $2,%xmm7,%xmm7 + addl %ecx,%r14d + shrdl $14,%r13d,%r13d + movl %r14d,%ecx + vpxor %xmm7,%xmm6,%xmm6 + movl %r11d,%r12d + xorl %r10d,%r13d + shrdl $9,%r14d,%r14d + vpshufd $132,%xmm6,%xmm6 + xorl %eax,%r12d + shrdl $5,%r13d,%r13d + xorl %ecx,%r14d + vpsrldq $8,%xmm6,%xmm6 + andl %r10d,%r12d + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 96-128(%rdi),%xmm10 + xorl %r10d,%r13d + addl 24(%rsp),%ebx + vpaddd %xmm6,%xmm1,%xmm1 + movl %ecx,%r15d + shrdl $11,%r14d,%r14d + xorl %eax,%r12d + vpshufd $80,%xmm1,%xmm7 + xorl %edx,%r15d + shrdl $6,%r13d,%r13d + addl %r12d,%ebx + vpsrld $10,%xmm7,%xmm6 + andl %r15d,%esi + xorl %ecx,%r14d + addl %r13d,%ebx + vpsrlq $17,%xmm7,%xmm7 + xorl %edx,%esi + addl %ebx,%r9d + shrdl $2,%r14d,%r14d + vpxor %xmm7,%xmm6,%xmm6 + addl %esi,%ebx + movl %r9d,%r13d + addl %ebx,%r14d + vpsrlq $2,%xmm7,%xmm7 + shrdl $14,%r13d,%r13d + movl %r14d,%ebx + movl %r10d,%r12d + vpxor %xmm7,%xmm6,%xmm6 + xorl %r9d,%r13d + shrdl $9,%r14d,%r14d + xorl %r11d,%r12d + vpshufd $232,%xmm6,%xmm6 + shrdl $5,%r13d,%r13d + xorl %ebx,%r14d + andl %r9d,%r12d + vpslldq $8,%xmm6,%xmm6 + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 112-128(%rdi),%xmm10 + xorl %r9d,%r13d + addl 28(%rsp),%eax + movl %ebx,%esi + vpaddd %xmm6,%xmm1,%xmm1 + shrdl $11,%r14d,%r14d + xorl %r11d,%r12d + xorl %ecx,%esi + vpaddd 32(%rbp),%xmm1,%xmm6 + shrdl $6,%r13d,%r13d + addl %r12d,%eax + andl %esi,%r15d + xorl %ebx,%r14d + addl %r13d,%eax + xorl %ecx,%r15d + addl %eax,%r8d + shrdl $2,%r14d,%r14d + addl %r15d,%eax + movl %r8d,%r13d + addl %eax,%r14d + vmovdqa %xmm6,16(%rsp) + vpalignr $4,%xmm2,%xmm3,%xmm4 + shrdl $14,%r13d,%r13d + movl %r14d,%eax + movl %r9d,%r12d + vpalignr $4,%xmm0,%xmm1,%xmm7 + xorl %r8d,%r13d + shrdl $9,%r14d,%r14d + xorl %r10d,%r12d + vpsrld $7,%xmm4,%xmm6 + shrdl $5,%r13d,%r13d + xorl %eax,%r14d + andl %r8d,%r12d + vpaddd %xmm7,%xmm2,%xmm2 + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 128-128(%rdi),%xmm10 + xorl %r8d,%r13d + addl 32(%rsp),%r11d + movl %eax,%r15d + vpsrld $3,%xmm4,%xmm7 + shrdl $11,%r14d,%r14d + xorl %r10d,%r12d + xorl %ebx,%r15d + vpslld $14,%xmm4,%xmm5 + shrdl $6,%r13d,%r13d + addl %r12d,%r11d + andl %r15d,%esi + vpxor %xmm6,%xmm7,%xmm4 + xorl %eax,%r14d + addl %r13d,%r11d + xorl %ebx,%esi + vpshufd $250,%xmm1,%xmm7 + addl %r11d,%edx + shrdl $2,%r14d,%r14d + addl %esi,%r11d + vpsrld $11,%xmm6,%xmm6 + movl %edx,%r13d + addl %r11d,%r14d + shrdl $14,%r13d,%r13d + vpxor %xmm5,%xmm4,%xmm4 + movl %r14d,%r11d + movl %r8d,%r12d + xorl %edx,%r13d + vpslld $11,%xmm5,%xmm5 + shrdl $9,%r14d,%r14d + xorl %r9d,%r12d + shrdl $5,%r13d,%r13d + vpxor %xmm6,%xmm4,%xmm4 + xorl %r11d,%r14d + andl %edx,%r12d + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 144-128(%rdi),%xmm10 + xorl %edx,%r13d + vpsrld $10,%xmm7,%xmm6 + addl 36(%rsp),%r10d + movl %r11d,%esi + shrdl $11,%r14d,%r14d + vpxor %xmm5,%xmm4,%xmm4 + xorl %r9d,%r12d + xorl %eax,%esi + shrdl $6,%r13d,%r13d + vpsrlq $17,%xmm7,%xmm7 + addl %r12d,%r10d + andl %esi,%r15d + xorl %r11d,%r14d + vpaddd %xmm4,%xmm2,%xmm2 + addl %r13d,%r10d + xorl %eax,%r15d + addl %r10d,%ecx + vpxor %xmm7,%xmm6,%xmm6 + shrdl $2,%r14d,%r14d + addl %r15d,%r10d + movl %ecx,%r13d + vpsrlq $2,%xmm7,%xmm7 + addl %r10d,%r14d + shrdl $14,%r13d,%r13d + movl %r14d,%r10d + vpxor %xmm7,%xmm6,%xmm6 + movl %edx,%r12d + xorl %ecx,%r13d + shrdl $9,%r14d,%r14d + vpshufd $132,%xmm6,%xmm6 + xorl %r8d,%r12d + shrdl $5,%r13d,%r13d + xorl %r10d,%r14d + vpsrldq $8,%xmm6,%xmm6 + andl %ecx,%r12d + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 160-128(%rdi),%xmm10 + xorl %ecx,%r13d + addl 40(%rsp),%r9d + vpaddd %xmm6,%xmm2,%xmm2 + movl %r10d,%r15d + shrdl $11,%r14d,%r14d + xorl %r8d,%r12d + vpshufd $80,%xmm2,%xmm7 + xorl %r11d,%r15d + shrdl $6,%r13d,%r13d + addl %r12d,%r9d + vpsrld $10,%xmm7,%xmm6 + andl %r15d,%esi + xorl %r10d,%r14d + addl %r13d,%r9d + vpsrlq $17,%xmm7,%xmm7 + xorl %r11d,%esi + addl %r9d,%ebx + shrdl $2,%r14d,%r14d + vpxor %xmm7,%xmm6,%xmm6 + addl %esi,%r9d + movl %ebx,%r13d + addl %r9d,%r14d + vpsrlq $2,%xmm7,%xmm7 + shrdl $14,%r13d,%r13d + movl %r14d,%r9d + movl %ecx,%r12d + vpxor %xmm7,%xmm6,%xmm6 + xorl %ebx,%r13d + shrdl $9,%r14d,%r14d + xorl %edx,%r12d + vpshufd $232,%xmm6,%xmm6 + shrdl $5,%r13d,%r13d + xorl %r9d,%r14d + andl %ebx,%r12d + vpslldq $8,%xmm6,%xmm6 + vaesenclast %xmm10,%xmm9,%xmm11 + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 176-128(%rdi),%xmm10 + xorl %ebx,%r13d + addl 44(%rsp),%r8d + movl %r9d,%esi + vpaddd %xmm6,%xmm2,%xmm2 + shrdl $11,%r14d,%r14d + xorl %edx,%r12d + xorl %r10d,%esi + vpaddd 64(%rbp),%xmm2,%xmm6 + shrdl $6,%r13d,%r13d + addl %r12d,%r8d + andl %esi,%r15d + xorl %r9d,%r14d + addl %r13d,%r8d + xorl %r10d,%r15d + addl %r8d,%eax + shrdl $2,%r14d,%r14d + addl %r15d,%r8d + movl %eax,%r13d + addl %r8d,%r14d + vmovdqa %xmm6,32(%rsp) + vpalignr $4,%xmm3,%xmm0,%xmm4 + shrdl $14,%r13d,%r13d + movl %r14d,%r8d + movl %ebx,%r12d + vpalignr $4,%xmm1,%xmm2,%xmm7 + xorl %eax,%r13d + shrdl $9,%r14d,%r14d + xorl %ecx,%r12d + vpsrld $7,%xmm4,%xmm6 + shrdl $5,%r13d,%r13d + xorl %r8d,%r14d + andl %eax,%r12d + vpaddd %xmm7,%xmm3,%xmm3 + vpand %xmm12,%xmm11,%xmm8 + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 192-128(%rdi),%xmm10 + xorl %eax,%r13d + addl 48(%rsp),%edx + movl %r8d,%r15d + vpsrld $3,%xmm4,%xmm7 + shrdl $11,%r14d,%r14d + xorl %ecx,%r12d + xorl %r9d,%r15d + vpslld $14,%xmm4,%xmm5 + shrdl $6,%r13d,%r13d + addl %r12d,%edx + andl %r15d,%esi + vpxor %xmm6,%xmm7,%xmm4 + xorl %r8d,%r14d + addl %r13d,%edx + xorl %r9d,%esi + vpshufd $250,%xmm2,%xmm7 + addl %edx,%r11d + shrdl $2,%r14d,%r14d + addl %esi,%edx + vpsrld $11,%xmm6,%xmm6 + movl %r11d,%r13d + addl %edx,%r14d + shrdl $14,%r13d,%r13d + vpxor %xmm5,%xmm4,%xmm4 + movl %r14d,%edx + movl %eax,%r12d + xorl %r11d,%r13d + vpslld $11,%xmm5,%xmm5 + shrdl $9,%r14d,%r14d + xorl %ebx,%r12d + shrdl $5,%r13d,%r13d + vpxor %xmm6,%xmm4,%xmm4 + xorl %edx,%r14d + andl %r11d,%r12d + vaesenclast %xmm10,%xmm9,%xmm11 + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 208-128(%rdi),%xmm10 + xorl %r11d,%r13d + vpsrld $10,%xmm7,%xmm6 + addl 52(%rsp),%ecx + movl %edx,%esi + shrdl $11,%r14d,%r14d + vpxor %xmm5,%xmm4,%xmm4 + xorl %ebx,%r12d + xorl %r8d,%esi + shrdl $6,%r13d,%r13d + vpsrlq $17,%xmm7,%xmm7 + addl %r12d,%ecx + andl %esi,%r15d + xorl %edx,%r14d + vpaddd %xmm4,%xmm3,%xmm3 + addl %r13d,%ecx + xorl %r8d,%r15d + addl %ecx,%r10d + vpxor %xmm7,%xmm6,%xmm6 + shrdl $2,%r14d,%r14d + addl %r15d,%ecx + movl %r10d,%r13d + vpsrlq $2,%xmm7,%xmm7 + addl %ecx,%r14d + shrdl $14,%r13d,%r13d + movl %r14d,%ecx + vpxor %xmm7,%xmm6,%xmm6 + movl %r11d,%r12d + xorl %r10d,%r13d + shrdl $9,%r14d,%r14d + vpshufd $132,%xmm6,%xmm6 + xorl %eax,%r12d + shrdl $5,%r13d,%r13d + xorl %ecx,%r14d + vpsrldq $8,%xmm6,%xmm6 + andl %r10d,%r12d + vpand %xmm13,%xmm11,%xmm11 + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 224-128(%rdi),%xmm10 + xorl %r10d,%r13d + addl 56(%rsp),%ebx + vpaddd %xmm6,%xmm3,%xmm3 + movl %ecx,%r15d + shrdl $11,%r14d,%r14d + xorl %eax,%r12d + vpshufd $80,%xmm3,%xmm7 + xorl %edx,%r15d + shrdl $6,%r13d,%r13d + addl %r12d,%ebx + vpsrld $10,%xmm7,%xmm6 + andl %r15d,%esi + xorl %ecx,%r14d + addl %r13d,%ebx + vpsrlq $17,%xmm7,%xmm7 + xorl %edx,%esi + addl %ebx,%r9d + shrdl $2,%r14d,%r14d + vpxor %xmm7,%xmm6,%xmm6 + addl %esi,%ebx + movl %r9d,%r13d + addl %ebx,%r14d + vpsrlq $2,%xmm7,%xmm7 + shrdl $14,%r13d,%r13d + movl %r14d,%ebx + movl %r10d,%r12d + vpxor %xmm7,%xmm6,%xmm6 + xorl %r9d,%r13d + shrdl $9,%r14d,%r14d + xorl %r11d,%r12d + vpshufd $232,%xmm6,%xmm6 + shrdl $5,%r13d,%r13d + xorl %ebx,%r14d + andl %r9d,%r12d + vpslldq $8,%xmm6,%xmm6 + vpor %xmm11,%xmm8,%xmm8 + vaesenclast %xmm10,%xmm9,%xmm11 + vmovdqu 0-128(%rdi),%xmm10 + xorl %r9d,%r13d + addl 60(%rsp),%eax + movl %ebx,%esi + vpaddd %xmm6,%xmm3,%xmm3 + shrdl $11,%r14d,%r14d + xorl %r11d,%r12d + xorl %ecx,%esi + vpaddd 96(%rbp),%xmm3,%xmm6 + shrdl $6,%r13d,%r13d + addl %r12d,%eax + andl %esi,%r15d + xorl %ebx,%r14d + addl %r13d,%eax + xorl %ecx,%r15d + addl %eax,%r8d + shrdl $2,%r14d,%r14d + addl %r15d,%eax + movl %r8d,%r13d + addl %eax,%r14d + vmovdqa %xmm6,48(%rsp) + movq 64+0(%rsp),%r12 + vpand %xmm14,%xmm11,%xmm11 + movq 64+8(%rsp),%r15 + vpor %xmm11,%xmm8,%xmm8 + vmovdqu %xmm8,(%r15,%r12,1) + leaq 16(%r12),%r12 + cmpb $0,131(%rbp) + jne .Lavx_00_47 + vmovdqu (%r12),%xmm9 + movq %r12,64+0(%rsp) + shrdl $14,%r13d,%r13d + movl %r14d,%eax + movl %r9d,%r12d + xorl %r8d,%r13d + shrdl $9,%r14d,%r14d + xorl %r10d,%r12d + shrdl $5,%r13d,%r13d + xorl %eax,%r14d + andl %r8d,%r12d + vpxor %xmm10,%xmm9,%xmm9 + vmovdqu 16-128(%rdi),%xmm10 + xorl %r8d,%r13d + addl 0(%rsp),%r11d + movl %eax,%r15d + shrdl $11,%r14d,%r14d + xorl %r10d,%r12d + xorl %ebx,%r15d + shrdl $6,%r13d,%r13d + addl %r12d,%r11d + andl %r15d,%esi + xorl %eax,%r14d + addl %r13d,%r11d + xorl %ebx,%esi + addl %r11d,%edx + shrdl $2,%r14d,%r14d + addl %esi,%r11d + movl %edx,%r13d + addl %r11d,%r14d + shrdl $14,%r13d,%r13d + movl %r14d,%r11d + movl %r8d,%r12d + xorl %edx,%r13d + shrdl $9,%r14d,%r14d + xorl %r9d,%r12d + shrdl $5,%r13d,%r13d + xorl %r11d,%r14d + andl %edx,%r12d + vpxor %xmm8,%xmm9,%xmm9 + xorl %edx,%r13d + addl 4(%rsp),%r10d + movl %r11d,%esi + shrdl $11,%r14d,%r14d + xorl %r9d,%r12d + xorl %eax,%esi + shrdl $6,%r13d,%r13d + addl %r12d,%r10d + andl %esi,%r15d + xorl %r11d,%r14d + addl %r13d,%r10d + xorl %eax,%r15d + addl %r10d,%ecx + shrdl $2,%r14d,%r14d + addl %r15d,%r10d + movl %ecx,%r13d + addl %r10d,%r14d + shrdl $14,%r13d,%r13d + movl %r14d,%r10d + movl %edx,%r12d + xorl %ecx,%r13d + shrdl $9,%r14d,%r14d + xorl %r8d,%r12d + shrdl $5,%r13d,%r13d + xorl %r10d,%r14d + andl %ecx,%r12d + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 32-128(%rdi),%xmm10 + xorl %ecx,%r13d + addl 8(%rsp),%r9d + movl %r10d,%r15d + shrdl $11,%r14d,%r14d + xorl %r8d,%r12d + xorl %r11d,%r15d + shrdl $6,%r13d,%r13d + addl %r12d,%r9d + andl %r15d,%esi + xorl %r10d,%r14d + addl %r13d,%r9d + xorl %r11d,%esi + addl %r9d,%ebx + shrdl $2,%r14d,%r14d + addl %esi,%r9d + movl %ebx,%r13d + addl %r9d,%r14d + shrdl $14,%r13d,%r13d + movl %r14d,%r9d + movl %ecx,%r12d + xorl %ebx,%r13d + shrdl $9,%r14d,%r14d + xorl %edx,%r12d + shrdl $5,%r13d,%r13d + xorl %r9d,%r14d + andl %ebx,%r12d + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 48-128(%rdi),%xmm10 + xorl %ebx,%r13d + addl 12(%rsp),%r8d + movl %r9d,%esi + shrdl $11,%r14d,%r14d + xorl %edx,%r12d + xorl %r10d,%esi + shrdl $6,%r13d,%r13d + addl %r12d,%r8d + andl %esi,%r15d + xorl %r9d,%r14d + addl %r13d,%r8d + xorl %r10d,%r15d + addl %r8d,%eax + shrdl $2,%r14d,%r14d + addl %r15d,%r8d + movl %eax,%r13d + addl %r8d,%r14d + shrdl $14,%r13d,%r13d + movl %r14d,%r8d + movl %ebx,%r12d + xorl %eax,%r13d + shrdl $9,%r14d,%r14d + xorl %ecx,%r12d + shrdl $5,%r13d,%r13d + xorl %r8d,%r14d + andl %eax,%r12d + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 64-128(%rdi),%xmm10 + xorl %eax,%r13d + addl 16(%rsp),%edx + movl %r8d,%r15d + shrdl $11,%r14d,%r14d + xorl %ecx,%r12d + xorl %r9d,%r15d + shrdl $6,%r13d,%r13d + addl %r12d,%edx + andl %r15d,%esi + xorl %r8d,%r14d + addl %r13d,%edx + xorl %r9d,%esi + addl %edx,%r11d + shrdl $2,%r14d,%r14d + addl %esi,%edx + movl %r11d,%r13d + addl %edx,%r14d + shrdl $14,%r13d,%r13d + movl %r14d,%edx + movl %eax,%r12d + xorl %r11d,%r13d + shrdl $9,%r14d,%r14d + xorl %ebx,%r12d + shrdl $5,%r13d,%r13d + xorl %edx,%r14d + andl %r11d,%r12d + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 80-128(%rdi),%xmm10 + xorl %r11d,%r13d + addl 20(%rsp),%ecx + movl %edx,%esi + shrdl $11,%r14d,%r14d + xorl %ebx,%r12d + xorl %r8d,%esi + shrdl $6,%r13d,%r13d + addl %r12d,%ecx + andl %esi,%r15d + xorl %edx,%r14d + addl %r13d,%ecx + xorl %r8d,%r15d + addl %ecx,%r10d + shrdl $2,%r14d,%r14d + addl %r15d,%ecx + movl %r10d,%r13d + addl %ecx,%r14d + shrdl $14,%r13d,%r13d + movl %r14d,%ecx + movl %r11d,%r12d + xorl %r10d,%r13d + shrdl $9,%r14d,%r14d + xorl %eax,%r12d + shrdl $5,%r13d,%r13d + xorl %ecx,%r14d + andl %r10d,%r12d + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 96-128(%rdi),%xmm10 + xorl %r10d,%r13d + addl 24(%rsp),%ebx + movl %ecx,%r15d + shrdl $11,%r14d,%r14d + xorl %eax,%r12d + xorl %edx,%r15d + shrdl $6,%r13d,%r13d + addl %r12d,%ebx + andl %r15d,%esi + xorl %ecx,%r14d + addl %r13d,%ebx + xorl %edx,%esi + addl %ebx,%r9d + shrdl $2,%r14d,%r14d + addl %esi,%ebx + movl %r9d,%r13d + addl %ebx,%r14d + shrdl $14,%r13d,%r13d + movl %r14d,%ebx + movl %r10d,%r12d + xorl %r9d,%r13d + shrdl $9,%r14d,%r14d + xorl %r11d,%r12d + shrdl $5,%r13d,%r13d + xorl %ebx,%r14d + andl %r9d,%r12d + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 112-128(%rdi),%xmm10 + xorl %r9d,%r13d + addl 28(%rsp),%eax + movl %ebx,%esi + shrdl $11,%r14d,%r14d + xorl %r11d,%r12d + xorl %ecx,%esi + shrdl $6,%r13d,%r13d + addl %r12d,%eax + andl %esi,%r15d + xorl %ebx,%r14d + addl %r13d,%eax + xorl %ecx,%r15d + addl %eax,%r8d + shrdl $2,%r14d,%r14d + addl %r15d,%eax + movl %r8d,%r13d + addl %eax,%r14d + shrdl $14,%r13d,%r13d + movl %r14d,%eax + movl %r9d,%r12d + xorl %r8d,%r13d + shrdl $9,%r14d,%r14d + xorl %r10d,%r12d + shrdl $5,%r13d,%r13d + xorl %eax,%r14d + andl %r8d,%r12d + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 128-128(%rdi),%xmm10 + xorl %r8d,%r13d + addl 32(%rsp),%r11d + movl %eax,%r15d + shrdl $11,%r14d,%r14d + xorl %r10d,%r12d + xorl %ebx,%r15d + shrdl $6,%r13d,%r13d + addl %r12d,%r11d + andl %r15d,%esi + xorl %eax,%r14d + addl %r13d,%r11d + xorl %ebx,%esi + addl %r11d,%edx + shrdl $2,%r14d,%r14d + addl %esi,%r11d + movl %edx,%r13d + addl %r11d,%r14d + shrdl $14,%r13d,%r13d + movl %r14d,%r11d + movl %r8d,%r12d + xorl %edx,%r13d + shrdl $9,%r14d,%r14d + xorl %r9d,%r12d + shrdl $5,%r13d,%r13d + xorl %r11d,%r14d + andl %edx,%r12d + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 144-128(%rdi),%xmm10 + xorl %edx,%r13d + addl 36(%rsp),%r10d + movl %r11d,%esi + shrdl $11,%r14d,%r14d + xorl %r9d,%r12d + xorl %eax,%esi + shrdl $6,%r13d,%r13d + addl %r12d,%r10d + andl %esi,%r15d + xorl %r11d,%r14d + addl %r13d,%r10d + xorl %eax,%r15d + addl %r10d,%ecx + shrdl $2,%r14d,%r14d + addl %r15d,%r10d + movl %ecx,%r13d + addl %r10d,%r14d + shrdl $14,%r13d,%r13d + movl %r14d,%r10d + movl %edx,%r12d + xorl %ecx,%r13d + shrdl $9,%r14d,%r14d + xorl %r8d,%r12d + shrdl $5,%r13d,%r13d + xorl %r10d,%r14d + andl %ecx,%r12d + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 160-128(%rdi),%xmm10 + xorl %ecx,%r13d + addl 40(%rsp),%r9d + movl %r10d,%r15d + shrdl $11,%r14d,%r14d + xorl %r8d,%r12d + xorl %r11d,%r15d + shrdl $6,%r13d,%r13d + addl %r12d,%r9d + andl %r15d,%esi + xorl %r10d,%r14d + addl %r13d,%r9d + xorl %r11d,%esi + addl %r9d,%ebx + shrdl $2,%r14d,%r14d + addl %esi,%r9d + movl %ebx,%r13d + addl %r9d,%r14d + shrdl $14,%r13d,%r13d + movl %r14d,%r9d + movl %ecx,%r12d + xorl %ebx,%r13d + shrdl $9,%r14d,%r14d + xorl %edx,%r12d + shrdl $5,%r13d,%r13d + xorl %r9d,%r14d + andl %ebx,%r12d + vaesenclast %xmm10,%xmm9,%xmm11 + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 176-128(%rdi),%xmm10 + xorl %ebx,%r13d + addl 44(%rsp),%r8d + movl %r9d,%esi + shrdl $11,%r14d,%r14d + xorl %edx,%r12d + xorl %r10d,%esi + shrdl $6,%r13d,%r13d + addl %r12d,%r8d + andl %esi,%r15d + xorl %r9d,%r14d + addl %r13d,%r8d + xorl %r10d,%r15d + addl %r8d,%eax + shrdl $2,%r14d,%r14d + addl %r15d,%r8d + movl %eax,%r13d + addl %r8d,%r14d + shrdl $14,%r13d,%r13d + movl %r14d,%r8d + movl %ebx,%r12d + xorl %eax,%r13d + shrdl $9,%r14d,%r14d + xorl %ecx,%r12d + shrdl $5,%r13d,%r13d + xorl %r8d,%r14d + andl %eax,%r12d + vpand %xmm12,%xmm11,%xmm8 + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 192-128(%rdi),%xmm10 + xorl %eax,%r13d + addl 48(%rsp),%edx + movl %r8d,%r15d + shrdl $11,%r14d,%r14d + xorl %ecx,%r12d + xorl %r9d,%r15d + shrdl $6,%r13d,%r13d + addl %r12d,%edx + andl %r15d,%esi + xorl %r8d,%r14d + addl %r13d,%edx + xorl %r9d,%esi + addl %edx,%r11d + shrdl $2,%r14d,%r14d + addl %esi,%edx + movl %r11d,%r13d + addl %edx,%r14d + shrdl $14,%r13d,%r13d + movl %r14d,%edx + movl %eax,%r12d + xorl %r11d,%r13d + shrdl $9,%r14d,%r14d + xorl %ebx,%r12d + shrdl $5,%r13d,%r13d + xorl %edx,%r14d + andl %r11d,%r12d + vaesenclast %xmm10,%xmm9,%xmm11 + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 208-128(%rdi),%xmm10 + xorl %r11d,%r13d + addl 52(%rsp),%ecx + movl %edx,%esi + shrdl $11,%r14d,%r14d + xorl %ebx,%r12d + xorl %r8d,%esi + shrdl $6,%r13d,%r13d + addl %r12d,%ecx + andl %esi,%r15d + xorl %edx,%r14d + addl %r13d,%ecx + xorl %r8d,%r15d + addl %ecx,%r10d + shrdl $2,%r14d,%r14d + addl %r15d,%ecx + movl %r10d,%r13d + addl %ecx,%r14d + shrdl $14,%r13d,%r13d + movl %r14d,%ecx + movl %r11d,%r12d + xorl %r10d,%r13d + shrdl $9,%r14d,%r14d + xorl %eax,%r12d + shrdl $5,%r13d,%r13d + xorl %ecx,%r14d + andl %r10d,%r12d + vpand %xmm13,%xmm11,%xmm11 + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 224-128(%rdi),%xmm10 + xorl %r10d,%r13d + addl 56(%rsp),%ebx + movl %ecx,%r15d + shrdl $11,%r14d,%r14d + xorl %eax,%r12d + xorl %edx,%r15d + shrdl $6,%r13d,%r13d + addl %r12d,%ebx + andl %r15d,%esi + xorl %ecx,%r14d + addl %r13d,%ebx + xorl %edx,%esi + addl %ebx,%r9d + shrdl $2,%r14d,%r14d + addl %esi,%ebx + movl %r9d,%r13d + addl %ebx,%r14d + shrdl $14,%r13d,%r13d + movl %r14d,%ebx + movl %r10d,%r12d + xorl %r9d,%r13d + shrdl $9,%r14d,%r14d + xorl %r11d,%r12d + shrdl $5,%r13d,%r13d + xorl %ebx,%r14d + andl %r9d,%r12d + vpor %xmm11,%xmm8,%xmm8 + vaesenclast %xmm10,%xmm9,%xmm11 + vmovdqu 0-128(%rdi),%xmm10 + xorl %r9d,%r13d + addl 60(%rsp),%eax + movl %ebx,%esi + shrdl $11,%r14d,%r14d + xorl %r11d,%r12d + xorl %ecx,%esi + shrdl $6,%r13d,%r13d + addl %r12d,%eax + andl %esi,%r15d + xorl %ebx,%r14d + addl %r13d,%eax + xorl %ecx,%r15d + addl %eax,%r8d + shrdl $2,%r14d,%r14d + addl %r15d,%eax + movl %r8d,%r13d + addl %eax,%r14d + movq 64+0(%rsp),%r12 + movq 64+8(%rsp),%r13 + movq 64+40(%rsp),%r15 + movq 64+48(%rsp),%rsi + + vpand %xmm14,%xmm11,%xmm11 + movl %r14d,%eax + vpor %xmm11,%xmm8,%xmm8 + vmovdqu %xmm8,(%r12,%r13,1) + leaq 16(%r12),%r12 + + addl 0(%r15),%eax + addl 4(%r15),%ebx + addl 8(%r15),%ecx + addl 12(%r15),%edx + addl 16(%r15),%r8d + addl 20(%r15),%r9d + addl 24(%r15),%r10d + addl 28(%r15),%r11d + + cmpq 64+16(%rsp),%r12 + + movl %eax,0(%r15) + movl %ebx,4(%r15) + movl %ecx,8(%r15) + movl %edx,12(%r15) + movl %r8d,16(%r15) + movl %r9d,20(%r15) + movl %r10d,24(%r15) + movl %r11d,28(%r15) + jb .Lloop_avx + + movq 64+32(%rsp),%r8 + movq 64+56(%rsp),%rsi + vmovdqu %xmm8,(%r8) + vzeroall + movq (%rsi),%r15 + movq 8(%rsi),%r14 + movq 16(%rsi),%r13 + movq 24(%rsi),%r12 + movq 32(%rsi),%rbp + movq 40(%rsi),%rbx + leaq 48(%rsi),%rsp +.Lepilogue_avx: + .byte 0xf3,0xc3 +.size aesni_cbc_sha256_enc_avx,.-aesni_cbc_sha256_enc_avx +.type aesni_cbc_sha256_enc_avx2,@function +.align 64 +aesni_cbc_sha256_enc_avx2: +.Lavx2_shortcut: + movq 8(%rsp),%r10 + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + movq %rsp,%r11 + subq $576,%rsp + andq $-1024,%rsp + addq $448,%rsp + + shlq $6,%rdx + subq %rdi,%rsi + subq %rdi,%r10 + addq %rdi,%rdx + + + + movq %rdx,64+16(%rsp) + + movq %r8,64+32(%rsp) + movq %r9,64+40(%rsp) + movq %r10,64+48(%rsp) + movq %r11,64+56(%rsp) +.Lprologue_avx2: + vzeroall + + movq %rdi,%r13 + vpinsrq $1,%rsi,%xmm15,%xmm15 + leaq 128(%rcx),%rdi + leaq K256+544(%rip),%r12 + movl 240-128(%rdi),%r14d + movq %r9,%r15 + movq %r10,%rsi + vmovdqu (%r8),%xmm8 + leaq -9(%r14),%r14 + + vmovdqa 0(%r12,%r14,8),%xmm14 + vmovdqa 16(%r12,%r14,8),%xmm13 + vmovdqa 32(%r12,%r14,8),%xmm12 + + subq $-64,%r13 + movl 0(%r15),%eax + leaq (%rsi,%r13,1),%r12 + movl 4(%r15),%ebx + cmpq %rdx,%r13 + movl 8(%r15),%ecx + cmoveq %rsp,%r12 + movl 12(%r15),%edx + movl 16(%r15),%r8d + movl 20(%r15),%r9d + movl 24(%r15),%r10d + movl 28(%r15),%r11d + vmovdqu 0-128(%rdi),%xmm10 + jmp .Loop_avx2 +.align 16 +.Loop_avx2: + vmovdqa K256+512(%rip),%ymm7 + vmovdqu -64+0(%rsi,%r13,1),%xmm0 + vmovdqu -64+16(%rsi,%r13,1),%xmm1 + vmovdqu -64+32(%rsi,%r13,1),%xmm2 + vmovdqu -64+48(%rsi,%r13,1),%xmm3 + + vinserti128 $1,(%r12),%ymm0,%ymm0 + vinserti128 $1,16(%r12),%ymm1,%ymm1 + vpshufb %ymm7,%ymm0,%ymm0 + vinserti128 $1,32(%r12),%ymm2,%ymm2 + vpshufb %ymm7,%ymm1,%ymm1 + vinserti128 $1,48(%r12),%ymm3,%ymm3 + + leaq K256(%rip),%rbp + vpshufb %ymm7,%ymm2,%ymm2 + leaq -64(%r13),%r13 + vpaddd 0(%rbp),%ymm0,%ymm4 + vpshufb %ymm7,%ymm3,%ymm3 + vpaddd 32(%rbp),%ymm1,%ymm5 + vpaddd 64(%rbp),%ymm2,%ymm6 + vpaddd 96(%rbp),%ymm3,%ymm7 + vmovdqa %ymm4,0(%rsp) + xorl %r14d,%r14d + vmovdqa %ymm5,32(%rsp) + leaq -64(%rsp),%rsp + movl %ebx,%esi + vmovdqa %ymm6,0(%rsp) + xorl %ecx,%esi + vmovdqa %ymm7,32(%rsp) + movl %r9d,%r12d + subq $-32*4,%rbp + jmp .Lavx2_00_47 + +.align 16 +.Lavx2_00_47: + vmovdqu (%r13),%xmm9 + vpinsrq $0,%r13,%xmm15,%xmm15 + leaq -64(%rsp),%rsp + vpalignr $4,%ymm0,%ymm1,%ymm4 + addl 0+128(%rsp),%r11d + andl %r8d,%r12d + rorxl $25,%r8d,%r13d + vpalignr $4,%ymm2,%ymm3,%ymm7 + rorxl $11,%r8d,%r15d + leal (%rax,%r14,1),%eax + leal (%r11,%r12,1),%r11d + vpsrld $7,%ymm4,%ymm6 + andnl %r10d,%r8d,%r12d + xorl %r15d,%r13d + rorxl $6,%r8d,%r14d + vpaddd %ymm7,%ymm0,%ymm0 + leal (%r11,%r12,1),%r11d + xorl %r14d,%r13d + movl %eax,%r15d + vpsrld $3,%ymm4,%ymm7 + rorxl $22,%eax,%r12d + leal (%r11,%r13,1),%r11d + xorl %ebx,%r15d + vpslld $14,%ymm4,%ymm5 + rorxl $13,%eax,%r14d + rorxl $2,%eax,%r13d + leal (%rdx,%r11,1),%edx + vpxor %ymm6,%ymm7,%ymm4 + andl %r15d,%esi + vpxor %xmm10,%xmm9,%xmm9 + vmovdqu 16-128(%rdi),%xmm10 + xorl %r12d,%r14d + xorl %ebx,%esi + vpshufd $250,%ymm3,%ymm7 + xorl %r13d,%r14d + leal (%r11,%rsi,1),%r11d + movl %r8d,%r12d + vpsrld $11,%ymm6,%ymm6 + addl 4+128(%rsp),%r10d + andl %edx,%r12d + rorxl $25,%edx,%r13d + vpxor %ymm5,%ymm4,%ymm4 + rorxl $11,%edx,%esi + leal (%r11,%r14,1),%r11d + leal (%r10,%r12,1),%r10d + vpslld $11,%ymm5,%ymm5 + andnl %r9d,%edx,%r12d + xorl %esi,%r13d + rorxl $6,%edx,%r14d + vpxor %ymm6,%ymm4,%ymm4 + leal (%r10,%r12,1),%r10d + xorl %r14d,%r13d + movl %r11d,%esi + vpsrld $10,%ymm7,%ymm6 + rorxl $22,%r11d,%r12d + leal (%r10,%r13,1),%r10d + xorl %eax,%esi + vpxor %ymm5,%ymm4,%ymm4 + rorxl $13,%r11d,%r14d + rorxl $2,%r11d,%r13d + leal (%rcx,%r10,1),%ecx + vpsrlq $17,%ymm7,%ymm7 + andl %esi,%r15d + vpxor %xmm8,%xmm9,%xmm9 + xorl %r12d,%r14d + xorl %eax,%r15d + vpaddd %ymm4,%ymm0,%ymm0 + xorl %r13d,%r14d + leal (%r10,%r15,1),%r10d + movl %edx,%r12d + vpxor %ymm7,%ymm6,%ymm6 + addl 8+128(%rsp),%r9d + andl %ecx,%r12d + rorxl $25,%ecx,%r13d + vpsrlq $2,%ymm7,%ymm7 + rorxl $11,%ecx,%r15d + leal (%r10,%r14,1),%r10d + leal (%r9,%r12,1),%r9d + vpxor %ymm7,%ymm6,%ymm6 + andnl %r8d,%ecx,%r12d + xorl %r15d,%r13d + rorxl $6,%ecx,%r14d + vpshufd $132,%ymm6,%ymm6 + leal (%r9,%r12,1),%r9d + xorl %r14d,%r13d + movl %r10d,%r15d + vpsrldq $8,%ymm6,%ymm6 + rorxl $22,%r10d,%r12d + leal (%r9,%r13,1),%r9d + xorl %r11d,%r15d + vpaddd %ymm6,%ymm0,%ymm0 + rorxl $13,%r10d,%r14d + rorxl $2,%r10d,%r13d + leal (%rbx,%r9,1),%ebx + vpshufd $80,%ymm0,%ymm7 + andl %r15d,%esi + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 32-128(%rdi),%xmm10 + xorl %r12d,%r14d + xorl %r11d,%esi + vpsrld $10,%ymm7,%ymm6 + xorl %r13d,%r14d + leal (%r9,%rsi,1),%r9d + movl %ecx,%r12d + vpsrlq $17,%ymm7,%ymm7 + addl 12+128(%rsp),%r8d + andl %ebx,%r12d + rorxl $25,%ebx,%r13d + vpxor %ymm7,%ymm6,%ymm6 + rorxl $11,%ebx,%esi + leal (%r9,%r14,1),%r9d + leal (%r8,%r12,1),%r8d + vpsrlq $2,%ymm7,%ymm7 + andnl %edx,%ebx,%r12d + xorl %esi,%r13d + rorxl $6,%ebx,%r14d + vpxor %ymm7,%ymm6,%ymm6 + leal (%r8,%r12,1),%r8d + xorl %r14d,%r13d + movl %r9d,%esi + vpshufd $232,%ymm6,%ymm6 + rorxl $22,%r9d,%r12d + leal (%r8,%r13,1),%r8d + xorl %r10d,%esi + vpslldq $8,%ymm6,%ymm6 + rorxl $13,%r9d,%r14d + rorxl $2,%r9d,%r13d + leal (%rax,%r8,1),%eax + vpaddd %ymm6,%ymm0,%ymm0 + andl %esi,%r15d + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 48-128(%rdi),%xmm10 + xorl %r12d,%r14d + xorl %r10d,%r15d + vpaddd 0(%rbp),%ymm0,%ymm6 + xorl %r13d,%r14d + leal (%r8,%r15,1),%r8d + movl %ebx,%r12d + vmovdqa %ymm6,0(%rsp) + vpalignr $4,%ymm1,%ymm2,%ymm4 + addl 32+128(%rsp),%edx + andl %eax,%r12d + rorxl $25,%eax,%r13d + vpalignr $4,%ymm3,%ymm0,%ymm7 + rorxl $11,%eax,%r15d + leal (%r8,%r14,1),%r8d + leal (%rdx,%r12,1),%edx + vpsrld $7,%ymm4,%ymm6 + andnl %ecx,%eax,%r12d + xorl %r15d,%r13d + rorxl $6,%eax,%r14d + vpaddd %ymm7,%ymm1,%ymm1 + leal (%rdx,%r12,1),%edx + xorl %r14d,%r13d + movl %r8d,%r15d + vpsrld $3,%ymm4,%ymm7 + rorxl $22,%r8d,%r12d + leal (%rdx,%r13,1),%edx + xorl %r9d,%r15d + vpslld $14,%ymm4,%ymm5 + rorxl $13,%r8d,%r14d + rorxl $2,%r8d,%r13d + leal (%r11,%rdx,1),%r11d + vpxor %ymm6,%ymm7,%ymm4 + andl %r15d,%esi + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 64-128(%rdi),%xmm10 + xorl %r12d,%r14d + xorl %r9d,%esi + vpshufd $250,%ymm0,%ymm7 + xorl %r13d,%r14d + leal (%rdx,%rsi,1),%edx + movl %eax,%r12d + vpsrld $11,%ymm6,%ymm6 + addl 36+128(%rsp),%ecx + andl %r11d,%r12d + rorxl $25,%r11d,%r13d + vpxor %ymm5,%ymm4,%ymm4 + rorxl $11,%r11d,%esi + leal (%rdx,%r14,1),%edx + leal (%rcx,%r12,1),%ecx + vpslld $11,%ymm5,%ymm5 + andnl %ebx,%r11d,%r12d + xorl %esi,%r13d + rorxl $6,%r11d,%r14d + vpxor %ymm6,%ymm4,%ymm4 + leal (%rcx,%r12,1),%ecx + xorl %r14d,%r13d + movl %edx,%esi + vpsrld $10,%ymm7,%ymm6 + rorxl $22,%edx,%r12d + leal (%rcx,%r13,1),%ecx + xorl %r8d,%esi + vpxor %ymm5,%ymm4,%ymm4 + rorxl $13,%edx,%r14d + rorxl $2,%edx,%r13d + leal (%r10,%rcx,1),%r10d + vpsrlq $17,%ymm7,%ymm7 + andl %esi,%r15d + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 80-128(%rdi),%xmm10 + xorl %r12d,%r14d + xorl %r8d,%r15d + vpaddd %ymm4,%ymm1,%ymm1 + xorl %r13d,%r14d + leal (%rcx,%r15,1),%ecx + movl %r11d,%r12d + vpxor %ymm7,%ymm6,%ymm6 + addl 40+128(%rsp),%ebx + andl %r10d,%r12d + rorxl $25,%r10d,%r13d + vpsrlq $2,%ymm7,%ymm7 + rorxl $11,%r10d,%r15d + leal (%rcx,%r14,1),%ecx + leal (%rbx,%r12,1),%ebx + vpxor %ymm7,%ymm6,%ymm6 + andnl %eax,%r10d,%r12d + xorl %r15d,%r13d + rorxl $6,%r10d,%r14d + vpshufd $132,%ymm6,%ymm6 + leal (%rbx,%r12,1),%ebx + xorl %r14d,%r13d + movl %ecx,%r15d + vpsrldq $8,%ymm6,%ymm6 + rorxl $22,%ecx,%r12d + leal (%rbx,%r13,1),%ebx + xorl %edx,%r15d + vpaddd %ymm6,%ymm1,%ymm1 + rorxl $13,%ecx,%r14d + rorxl $2,%ecx,%r13d + leal (%r9,%rbx,1),%r9d + vpshufd $80,%ymm1,%ymm7 + andl %r15d,%esi + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 96-128(%rdi),%xmm10 + xorl %r12d,%r14d + xorl %edx,%esi + vpsrld $10,%ymm7,%ymm6 + xorl %r13d,%r14d + leal (%rbx,%rsi,1),%ebx + movl %r10d,%r12d + vpsrlq $17,%ymm7,%ymm7 + addl 44+128(%rsp),%eax + andl %r9d,%r12d + rorxl $25,%r9d,%r13d + vpxor %ymm7,%ymm6,%ymm6 + rorxl $11,%r9d,%esi + leal (%rbx,%r14,1),%ebx + leal (%rax,%r12,1),%eax + vpsrlq $2,%ymm7,%ymm7 + andnl %r11d,%r9d,%r12d + xorl %esi,%r13d + rorxl $6,%r9d,%r14d + vpxor %ymm7,%ymm6,%ymm6 + leal (%rax,%r12,1),%eax + xorl %r14d,%r13d + movl %ebx,%esi + vpshufd $232,%ymm6,%ymm6 + rorxl $22,%ebx,%r12d + leal (%rax,%r13,1),%eax + xorl %ecx,%esi + vpslldq $8,%ymm6,%ymm6 + rorxl $13,%ebx,%r14d + rorxl $2,%ebx,%r13d + leal (%r8,%rax,1),%r8d + vpaddd %ymm6,%ymm1,%ymm1 + andl %esi,%r15d + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 112-128(%rdi),%xmm10 + xorl %r12d,%r14d + xorl %ecx,%r15d + vpaddd 32(%rbp),%ymm1,%ymm6 + xorl %r13d,%r14d + leal (%rax,%r15,1),%eax + movl %r9d,%r12d + vmovdqa %ymm6,32(%rsp) + leaq -64(%rsp),%rsp + vpalignr $4,%ymm2,%ymm3,%ymm4 + addl 0+128(%rsp),%r11d + andl %r8d,%r12d + rorxl $25,%r8d,%r13d + vpalignr $4,%ymm0,%ymm1,%ymm7 + rorxl $11,%r8d,%r15d + leal (%rax,%r14,1),%eax + leal (%r11,%r12,1),%r11d + vpsrld $7,%ymm4,%ymm6 + andnl %r10d,%r8d,%r12d + xorl %r15d,%r13d + rorxl $6,%r8d,%r14d + vpaddd %ymm7,%ymm2,%ymm2 + leal (%r11,%r12,1),%r11d + xorl %r14d,%r13d + movl %eax,%r15d + vpsrld $3,%ymm4,%ymm7 + rorxl $22,%eax,%r12d + leal (%r11,%r13,1),%r11d + xorl %ebx,%r15d + vpslld $14,%ymm4,%ymm5 + rorxl $13,%eax,%r14d + rorxl $2,%eax,%r13d + leal (%rdx,%r11,1),%edx + vpxor %ymm6,%ymm7,%ymm4 + andl %r15d,%esi + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 128-128(%rdi),%xmm10 + xorl %r12d,%r14d + xorl %ebx,%esi + vpshufd $250,%ymm1,%ymm7 + xorl %r13d,%r14d + leal (%r11,%rsi,1),%r11d + movl %r8d,%r12d + vpsrld $11,%ymm6,%ymm6 + addl 4+128(%rsp),%r10d + andl %edx,%r12d + rorxl $25,%edx,%r13d + vpxor %ymm5,%ymm4,%ymm4 + rorxl $11,%edx,%esi + leal (%r11,%r14,1),%r11d + leal (%r10,%r12,1),%r10d + vpslld $11,%ymm5,%ymm5 + andnl %r9d,%edx,%r12d + xorl %esi,%r13d + rorxl $6,%edx,%r14d + vpxor %ymm6,%ymm4,%ymm4 + leal (%r10,%r12,1),%r10d + xorl %r14d,%r13d + movl %r11d,%esi + vpsrld $10,%ymm7,%ymm6 + rorxl $22,%r11d,%r12d + leal (%r10,%r13,1),%r10d + xorl %eax,%esi + vpxor %ymm5,%ymm4,%ymm4 + rorxl $13,%r11d,%r14d + rorxl $2,%r11d,%r13d + leal (%rcx,%r10,1),%ecx + vpsrlq $17,%ymm7,%ymm7 + andl %esi,%r15d + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 144-128(%rdi),%xmm10 + xorl %r12d,%r14d + xorl %eax,%r15d + vpaddd %ymm4,%ymm2,%ymm2 + xorl %r13d,%r14d + leal (%r10,%r15,1),%r10d + movl %edx,%r12d + vpxor %ymm7,%ymm6,%ymm6 + addl 8+128(%rsp),%r9d + andl %ecx,%r12d + rorxl $25,%ecx,%r13d + vpsrlq $2,%ymm7,%ymm7 + rorxl $11,%ecx,%r15d + leal (%r10,%r14,1),%r10d + leal (%r9,%r12,1),%r9d + vpxor %ymm7,%ymm6,%ymm6 + andnl %r8d,%ecx,%r12d + xorl %r15d,%r13d + rorxl $6,%ecx,%r14d + vpshufd $132,%ymm6,%ymm6 + leal (%r9,%r12,1),%r9d + xorl %r14d,%r13d + movl %r10d,%r15d + vpsrldq $8,%ymm6,%ymm6 + rorxl $22,%r10d,%r12d + leal (%r9,%r13,1),%r9d + xorl %r11d,%r15d + vpaddd %ymm6,%ymm2,%ymm2 + rorxl $13,%r10d,%r14d + rorxl $2,%r10d,%r13d + leal (%rbx,%r9,1),%ebx + vpshufd $80,%ymm2,%ymm7 + andl %r15d,%esi + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 160-128(%rdi),%xmm10 + xorl %r12d,%r14d + xorl %r11d,%esi + vpsrld $10,%ymm7,%ymm6 + xorl %r13d,%r14d + leal (%r9,%rsi,1),%r9d + movl %ecx,%r12d + vpsrlq $17,%ymm7,%ymm7 + addl 12+128(%rsp),%r8d + andl %ebx,%r12d + rorxl $25,%ebx,%r13d + vpxor %ymm7,%ymm6,%ymm6 + rorxl $11,%ebx,%esi + leal (%r9,%r14,1),%r9d + leal (%r8,%r12,1),%r8d + vpsrlq $2,%ymm7,%ymm7 + andnl %edx,%ebx,%r12d + xorl %esi,%r13d + rorxl $6,%ebx,%r14d + vpxor %ymm7,%ymm6,%ymm6 + leal (%r8,%r12,1),%r8d + xorl %r14d,%r13d + movl %r9d,%esi + vpshufd $232,%ymm6,%ymm6 + rorxl $22,%r9d,%r12d + leal (%r8,%r13,1),%r8d + xorl %r10d,%esi + vpslldq $8,%ymm6,%ymm6 + rorxl $13,%r9d,%r14d + rorxl $2,%r9d,%r13d + leal (%rax,%r8,1),%eax + vpaddd %ymm6,%ymm2,%ymm2 + andl %esi,%r15d + vaesenclast %xmm10,%xmm9,%xmm11 + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 176-128(%rdi),%xmm10 + xorl %r12d,%r14d + xorl %r10d,%r15d + vpaddd 64(%rbp),%ymm2,%ymm6 + xorl %r13d,%r14d + leal (%r8,%r15,1),%r8d + movl %ebx,%r12d + vmovdqa %ymm6,0(%rsp) + vpalignr $4,%ymm3,%ymm0,%ymm4 + addl 32+128(%rsp),%edx + andl %eax,%r12d + rorxl $25,%eax,%r13d + vpalignr $4,%ymm1,%ymm2,%ymm7 + rorxl $11,%eax,%r15d + leal (%r8,%r14,1),%r8d + leal (%rdx,%r12,1),%edx + vpsrld $7,%ymm4,%ymm6 + andnl %ecx,%eax,%r12d + xorl %r15d,%r13d + rorxl $6,%eax,%r14d + vpaddd %ymm7,%ymm3,%ymm3 + leal (%rdx,%r12,1),%edx + xorl %r14d,%r13d + movl %r8d,%r15d + vpsrld $3,%ymm4,%ymm7 + rorxl $22,%r8d,%r12d + leal (%rdx,%r13,1),%edx + xorl %r9d,%r15d + vpslld $14,%ymm4,%ymm5 + rorxl $13,%r8d,%r14d + rorxl $2,%r8d,%r13d + leal (%r11,%rdx,1),%r11d + vpxor %ymm6,%ymm7,%ymm4 + andl %r15d,%esi + vpand %xmm12,%xmm11,%xmm8 + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 192-128(%rdi),%xmm10 + xorl %r12d,%r14d + xorl %r9d,%esi + vpshufd $250,%ymm2,%ymm7 + xorl %r13d,%r14d + leal (%rdx,%rsi,1),%edx + movl %eax,%r12d + vpsrld $11,%ymm6,%ymm6 + addl 36+128(%rsp),%ecx + andl %r11d,%r12d + rorxl $25,%r11d,%r13d + vpxor %ymm5,%ymm4,%ymm4 + rorxl $11,%r11d,%esi + leal (%rdx,%r14,1),%edx + leal (%rcx,%r12,1),%ecx + vpslld $11,%ymm5,%ymm5 + andnl %ebx,%r11d,%r12d + xorl %esi,%r13d + rorxl $6,%r11d,%r14d + vpxor %ymm6,%ymm4,%ymm4 + leal (%rcx,%r12,1),%ecx + xorl %r14d,%r13d + movl %edx,%esi + vpsrld $10,%ymm7,%ymm6 + rorxl $22,%edx,%r12d + leal (%rcx,%r13,1),%ecx + xorl %r8d,%esi + vpxor %ymm5,%ymm4,%ymm4 + rorxl $13,%edx,%r14d + rorxl $2,%edx,%r13d + leal (%r10,%rcx,1),%r10d + vpsrlq $17,%ymm7,%ymm7 + andl %esi,%r15d + vaesenclast %xmm10,%xmm9,%xmm11 + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 208-128(%rdi),%xmm10 + xorl %r12d,%r14d + xorl %r8d,%r15d + vpaddd %ymm4,%ymm3,%ymm3 + xorl %r13d,%r14d + leal (%rcx,%r15,1),%ecx + movl %r11d,%r12d + vpxor %ymm7,%ymm6,%ymm6 + addl 40+128(%rsp),%ebx + andl %r10d,%r12d + rorxl $25,%r10d,%r13d + vpsrlq $2,%ymm7,%ymm7 + rorxl $11,%r10d,%r15d + leal (%rcx,%r14,1),%ecx + leal (%rbx,%r12,1),%ebx + vpxor %ymm7,%ymm6,%ymm6 + andnl %eax,%r10d,%r12d + xorl %r15d,%r13d + rorxl $6,%r10d,%r14d + vpshufd $132,%ymm6,%ymm6 + leal (%rbx,%r12,1),%ebx + xorl %r14d,%r13d + movl %ecx,%r15d + vpsrldq $8,%ymm6,%ymm6 + rorxl $22,%ecx,%r12d + leal (%rbx,%r13,1),%ebx + xorl %edx,%r15d + vpaddd %ymm6,%ymm3,%ymm3 + rorxl $13,%ecx,%r14d + rorxl $2,%ecx,%r13d + leal (%r9,%rbx,1),%r9d + vpshufd $80,%ymm3,%ymm7 + andl %r15d,%esi + vpand %xmm13,%xmm11,%xmm11 + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 224-128(%rdi),%xmm10 + xorl %r12d,%r14d + xorl %edx,%esi + vpsrld $10,%ymm7,%ymm6 + xorl %r13d,%r14d + leal (%rbx,%rsi,1),%ebx + movl %r10d,%r12d + vpsrlq $17,%ymm7,%ymm7 + addl 44+128(%rsp),%eax + andl %r9d,%r12d + rorxl $25,%r9d,%r13d + vpxor %ymm7,%ymm6,%ymm6 + rorxl $11,%r9d,%esi + leal (%rbx,%r14,1),%ebx + leal (%rax,%r12,1),%eax + vpsrlq $2,%ymm7,%ymm7 + andnl %r11d,%r9d,%r12d + xorl %esi,%r13d + rorxl $6,%r9d,%r14d + vpxor %ymm7,%ymm6,%ymm6 + leal (%rax,%r12,1),%eax + xorl %r14d,%r13d + movl %ebx,%esi + vpshufd $232,%ymm6,%ymm6 + rorxl $22,%ebx,%r12d + leal (%rax,%r13,1),%eax + xorl %ecx,%esi + vpslldq $8,%ymm6,%ymm6 + rorxl $13,%ebx,%r14d + rorxl $2,%ebx,%r13d + leal (%r8,%rax,1),%r8d + vpaddd %ymm6,%ymm3,%ymm3 + andl %esi,%r15d + vpor %xmm11,%xmm8,%xmm8 + vaesenclast %xmm10,%xmm9,%xmm11 + vmovdqu 0-128(%rdi),%xmm10 + xorl %r12d,%r14d + xorl %ecx,%r15d + vpaddd 96(%rbp),%ymm3,%ymm6 + xorl %r13d,%r14d + leal (%rax,%r15,1),%eax + movl %r9d,%r12d + vmovdqa %ymm6,32(%rsp) + vmovq %xmm15,%r13 + vpextrq $1,%xmm15,%r15 + vpand %xmm14,%xmm11,%xmm11 + vpor %xmm11,%xmm8,%xmm8 + vmovdqu %xmm8,(%r15,%r13,1) + leaq 16(%r13),%r13 + leaq 128(%rbp),%rbp + cmpb $0,3(%rbp) + jne .Lavx2_00_47 + vmovdqu (%r13),%xmm9 + vpinsrq $0,%r13,%xmm15,%xmm15 + addl 0+64(%rsp),%r11d + andl %r8d,%r12d + rorxl $25,%r8d,%r13d + rorxl $11,%r8d,%r15d + leal (%rax,%r14,1),%eax + leal (%r11,%r12,1),%r11d + andnl %r10d,%r8d,%r12d + xorl %r15d,%r13d + rorxl $6,%r8d,%r14d + leal (%r11,%r12,1),%r11d + xorl %r14d,%r13d + movl %eax,%r15d + rorxl $22,%eax,%r12d + leal (%r11,%r13,1),%r11d + xorl %ebx,%r15d + rorxl $13,%eax,%r14d + rorxl $2,%eax,%r13d + leal (%rdx,%r11,1),%edx + andl %r15d,%esi + vpxor %xmm10,%xmm9,%xmm9 + vmovdqu 16-128(%rdi),%xmm10 + xorl %r12d,%r14d + xorl %ebx,%esi + xorl %r13d,%r14d + leal (%r11,%rsi,1),%r11d + movl %r8d,%r12d + addl 4+64(%rsp),%r10d + andl %edx,%r12d + rorxl $25,%edx,%r13d + rorxl $11,%edx,%esi + leal (%r11,%r14,1),%r11d + leal (%r10,%r12,1),%r10d + andnl %r9d,%edx,%r12d + xorl %esi,%r13d + rorxl $6,%edx,%r14d + leal (%r10,%r12,1),%r10d + xorl %r14d,%r13d + movl %r11d,%esi + rorxl $22,%r11d,%r12d + leal (%r10,%r13,1),%r10d + xorl %eax,%esi + rorxl $13,%r11d,%r14d + rorxl $2,%r11d,%r13d + leal (%rcx,%r10,1),%ecx + andl %esi,%r15d + vpxor %xmm8,%xmm9,%xmm9 + xorl %r12d,%r14d + xorl %eax,%r15d + xorl %r13d,%r14d + leal (%r10,%r15,1),%r10d + movl %edx,%r12d + addl 8+64(%rsp),%r9d + andl %ecx,%r12d + rorxl $25,%ecx,%r13d + rorxl $11,%ecx,%r15d + leal (%r10,%r14,1),%r10d + leal (%r9,%r12,1),%r9d + andnl %r8d,%ecx,%r12d + xorl %r15d,%r13d + rorxl $6,%ecx,%r14d + leal (%r9,%r12,1),%r9d + xorl %r14d,%r13d + movl %r10d,%r15d + rorxl $22,%r10d,%r12d + leal (%r9,%r13,1),%r9d + xorl %r11d,%r15d + rorxl $13,%r10d,%r14d + rorxl $2,%r10d,%r13d + leal (%rbx,%r9,1),%ebx + andl %r15d,%esi + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 32-128(%rdi),%xmm10 + xorl %r12d,%r14d + xorl %r11d,%esi + xorl %r13d,%r14d + leal (%r9,%rsi,1),%r9d + movl %ecx,%r12d + addl 12+64(%rsp),%r8d + andl %ebx,%r12d + rorxl $25,%ebx,%r13d + rorxl $11,%ebx,%esi + leal (%r9,%r14,1),%r9d + leal (%r8,%r12,1),%r8d + andnl %edx,%ebx,%r12d + xorl %esi,%r13d + rorxl $6,%ebx,%r14d + leal (%r8,%r12,1),%r8d + xorl %r14d,%r13d + movl %r9d,%esi + rorxl $22,%r9d,%r12d + leal (%r8,%r13,1),%r8d + xorl %r10d,%esi + rorxl $13,%r9d,%r14d + rorxl $2,%r9d,%r13d + leal (%rax,%r8,1),%eax + andl %esi,%r15d + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 48-128(%rdi),%xmm10 + xorl %r12d,%r14d + xorl %r10d,%r15d + xorl %r13d,%r14d + leal (%r8,%r15,1),%r8d + movl %ebx,%r12d + addl 32+64(%rsp),%edx + andl %eax,%r12d + rorxl $25,%eax,%r13d + rorxl $11,%eax,%r15d + leal (%r8,%r14,1),%r8d + leal (%rdx,%r12,1),%edx + andnl %ecx,%eax,%r12d + xorl %r15d,%r13d + rorxl $6,%eax,%r14d + leal (%rdx,%r12,1),%edx + xorl %r14d,%r13d + movl %r8d,%r15d + rorxl $22,%r8d,%r12d + leal (%rdx,%r13,1),%edx + xorl %r9d,%r15d + rorxl $13,%r8d,%r14d + rorxl $2,%r8d,%r13d + leal (%r11,%rdx,1),%r11d + andl %r15d,%esi + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 64-128(%rdi),%xmm10 + xorl %r12d,%r14d + xorl %r9d,%esi + xorl %r13d,%r14d + leal (%rdx,%rsi,1),%edx + movl %eax,%r12d + addl 36+64(%rsp),%ecx + andl %r11d,%r12d + rorxl $25,%r11d,%r13d + rorxl $11,%r11d,%esi + leal (%rdx,%r14,1),%edx + leal (%rcx,%r12,1),%ecx + andnl %ebx,%r11d,%r12d + xorl %esi,%r13d + rorxl $6,%r11d,%r14d + leal (%rcx,%r12,1),%ecx + xorl %r14d,%r13d + movl %edx,%esi + rorxl $22,%edx,%r12d + leal (%rcx,%r13,1),%ecx + xorl %r8d,%esi + rorxl $13,%edx,%r14d + rorxl $2,%edx,%r13d + leal (%r10,%rcx,1),%r10d + andl %esi,%r15d + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 80-128(%rdi),%xmm10 + xorl %r12d,%r14d + xorl %r8d,%r15d + xorl %r13d,%r14d + leal (%rcx,%r15,1),%ecx + movl %r11d,%r12d + addl 40+64(%rsp),%ebx + andl %r10d,%r12d + rorxl $25,%r10d,%r13d + rorxl $11,%r10d,%r15d + leal (%rcx,%r14,1),%ecx + leal (%rbx,%r12,1),%ebx + andnl %eax,%r10d,%r12d + xorl %r15d,%r13d + rorxl $6,%r10d,%r14d + leal (%rbx,%r12,1),%ebx + xorl %r14d,%r13d + movl %ecx,%r15d + rorxl $22,%ecx,%r12d + leal (%rbx,%r13,1),%ebx + xorl %edx,%r15d + rorxl $13,%ecx,%r14d + rorxl $2,%ecx,%r13d + leal (%r9,%rbx,1),%r9d + andl %r15d,%esi + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 96-128(%rdi),%xmm10 + xorl %r12d,%r14d + xorl %edx,%esi + xorl %r13d,%r14d + leal (%rbx,%rsi,1),%ebx + movl %r10d,%r12d + addl 44+64(%rsp),%eax + andl %r9d,%r12d + rorxl $25,%r9d,%r13d + rorxl $11,%r9d,%esi + leal (%rbx,%r14,1),%ebx + leal (%rax,%r12,1),%eax + andnl %r11d,%r9d,%r12d + xorl %esi,%r13d + rorxl $6,%r9d,%r14d + leal (%rax,%r12,1),%eax + xorl %r14d,%r13d + movl %ebx,%esi + rorxl $22,%ebx,%r12d + leal (%rax,%r13,1),%eax + xorl %ecx,%esi + rorxl $13,%ebx,%r14d + rorxl $2,%ebx,%r13d + leal (%r8,%rax,1),%r8d + andl %esi,%r15d + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 112-128(%rdi),%xmm10 + xorl %r12d,%r14d + xorl %ecx,%r15d + xorl %r13d,%r14d + leal (%rax,%r15,1),%eax + movl %r9d,%r12d + addl 0(%rsp),%r11d + andl %r8d,%r12d + rorxl $25,%r8d,%r13d + rorxl $11,%r8d,%r15d + leal (%rax,%r14,1),%eax + leal (%r11,%r12,1),%r11d + andnl %r10d,%r8d,%r12d + xorl %r15d,%r13d + rorxl $6,%r8d,%r14d + leal (%r11,%r12,1),%r11d + xorl %r14d,%r13d + movl %eax,%r15d + rorxl $22,%eax,%r12d + leal (%r11,%r13,1),%r11d + xorl %ebx,%r15d + rorxl $13,%eax,%r14d + rorxl $2,%eax,%r13d + leal (%rdx,%r11,1),%edx + andl %r15d,%esi + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 128-128(%rdi),%xmm10 + xorl %r12d,%r14d + xorl %ebx,%esi + xorl %r13d,%r14d + leal (%r11,%rsi,1),%r11d + movl %r8d,%r12d + addl 4(%rsp),%r10d + andl %edx,%r12d + rorxl $25,%edx,%r13d + rorxl $11,%edx,%esi + leal (%r11,%r14,1),%r11d + leal (%r10,%r12,1),%r10d + andnl %r9d,%edx,%r12d + xorl %esi,%r13d + rorxl $6,%edx,%r14d + leal (%r10,%r12,1),%r10d + xorl %r14d,%r13d + movl %r11d,%esi + rorxl $22,%r11d,%r12d + leal (%r10,%r13,1),%r10d + xorl %eax,%esi + rorxl $13,%r11d,%r14d + rorxl $2,%r11d,%r13d + leal (%rcx,%r10,1),%ecx + andl %esi,%r15d + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 144-128(%rdi),%xmm10 + xorl %r12d,%r14d + xorl %eax,%r15d + xorl %r13d,%r14d + leal (%r10,%r15,1),%r10d + movl %edx,%r12d + addl 8(%rsp),%r9d + andl %ecx,%r12d + rorxl $25,%ecx,%r13d + rorxl $11,%ecx,%r15d + leal (%r10,%r14,1),%r10d + leal (%r9,%r12,1),%r9d + andnl %r8d,%ecx,%r12d + xorl %r15d,%r13d + rorxl $6,%ecx,%r14d + leal (%r9,%r12,1),%r9d + xorl %r14d,%r13d + movl %r10d,%r15d + rorxl $22,%r10d,%r12d + leal (%r9,%r13,1),%r9d + xorl %r11d,%r15d + rorxl $13,%r10d,%r14d + rorxl $2,%r10d,%r13d + leal (%rbx,%r9,1),%ebx + andl %r15d,%esi + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 160-128(%rdi),%xmm10 + xorl %r12d,%r14d + xorl %r11d,%esi + xorl %r13d,%r14d + leal (%r9,%rsi,1),%r9d + movl %ecx,%r12d + addl 12(%rsp),%r8d + andl %ebx,%r12d + rorxl $25,%ebx,%r13d + rorxl $11,%ebx,%esi + leal (%r9,%r14,1),%r9d + leal (%r8,%r12,1),%r8d + andnl %edx,%ebx,%r12d + xorl %esi,%r13d + rorxl $6,%ebx,%r14d + leal (%r8,%r12,1),%r8d + xorl %r14d,%r13d + movl %r9d,%esi + rorxl $22,%r9d,%r12d + leal (%r8,%r13,1),%r8d + xorl %r10d,%esi + rorxl $13,%r9d,%r14d + rorxl $2,%r9d,%r13d + leal (%rax,%r8,1),%eax + andl %esi,%r15d + vaesenclast %xmm10,%xmm9,%xmm11 + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 176-128(%rdi),%xmm10 + xorl %r12d,%r14d + xorl %r10d,%r15d + xorl %r13d,%r14d + leal (%r8,%r15,1),%r8d + movl %ebx,%r12d + addl 32(%rsp),%edx + andl %eax,%r12d + rorxl $25,%eax,%r13d + rorxl $11,%eax,%r15d + leal (%r8,%r14,1),%r8d + leal (%rdx,%r12,1),%edx + andnl %ecx,%eax,%r12d + xorl %r15d,%r13d + rorxl $6,%eax,%r14d + leal (%rdx,%r12,1),%edx + xorl %r14d,%r13d + movl %r8d,%r15d + rorxl $22,%r8d,%r12d + leal (%rdx,%r13,1),%edx + xorl %r9d,%r15d + rorxl $13,%r8d,%r14d + rorxl $2,%r8d,%r13d + leal (%r11,%rdx,1),%r11d + andl %r15d,%esi + vpand %xmm12,%xmm11,%xmm8 + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 192-128(%rdi),%xmm10 + xorl %r12d,%r14d + xorl %r9d,%esi + xorl %r13d,%r14d + leal (%rdx,%rsi,1),%edx + movl %eax,%r12d + addl 36(%rsp),%ecx + andl %r11d,%r12d + rorxl $25,%r11d,%r13d + rorxl $11,%r11d,%esi + leal (%rdx,%r14,1),%edx + leal (%rcx,%r12,1),%ecx + andnl %ebx,%r11d,%r12d + xorl %esi,%r13d + rorxl $6,%r11d,%r14d + leal (%rcx,%r12,1),%ecx + xorl %r14d,%r13d + movl %edx,%esi + rorxl $22,%edx,%r12d + leal (%rcx,%r13,1),%ecx + xorl %r8d,%esi + rorxl $13,%edx,%r14d + rorxl $2,%edx,%r13d + leal (%r10,%rcx,1),%r10d + andl %esi,%r15d + vaesenclast %xmm10,%xmm9,%xmm11 + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 208-128(%rdi),%xmm10 + xorl %r12d,%r14d + xorl %r8d,%r15d + xorl %r13d,%r14d + leal (%rcx,%r15,1),%ecx + movl %r11d,%r12d + addl 40(%rsp),%ebx + andl %r10d,%r12d + rorxl $25,%r10d,%r13d + rorxl $11,%r10d,%r15d + leal (%rcx,%r14,1),%ecx + leal (%rbx,%r12,1),%ebx + andnl %eax,%r10d,%r12d + xorl %r15d,%r13d + rorxl $6,%r10d,%r14d + leal (%rbx,%r12,1),%ebx + xorl %r14d,%r13d + movl %ecx,%r15d + rorxl $22,%ecx,%r12d + leal (%rbx,%r13,1),%ebx + xorl %edx,%r15d + rorxl $13,%ecx,%r14d + rorxl $2,%ecx,%r13d + leal (%r9,%rbx,1),%r9d + andl %r15d,%esi + vpand %xmm13,%xmm11,%xmm11 + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 224-128(%rdi),%xmm10 + xorl %r12d,%r14d + xorl %edx,%esi + xorl %r13d,%r14d + leal (%rbx,%rsi,1),%ebx + movl %r10d,%r12d + addl 44(%rsp),%eax + andl %r9d,%r12d + rorxl $25,%r9d,%r13d + rorxl $11,%r9d,%esi + leal (%rbx,%r14,1),%ebx + leal (%rax,%r12,1),%eax + andnl %r11d,%r9d,%r12d + xorl %esi,%r13d + rorxl $6,%r9d,%r14d + leal (%rax,%r12,1),%eax + xorl %r14d,%r13d + movl %ebx,%esi + rorxl $22,%ebx,%r12d + leal (%rax,%r13,1),%eax + xorl %ecx,%esi + rorxl $13,%ebx,%r14d + rorxl $2,%ebx,%r13d + leal (%r8,%rax,1),%r8d + andl %esi,%r15d + vpor %xmm11,%xmm8,%xmm8 + vaesenclast %xmm10,%xmm9,%xmm11 + vmovdqu 0-128(%rdi),%xmm10 + xorl %r12d,%r14d + xorl %ecx,%r15d + xorl %r13d,%r14d + leal (%rax,%r15,1),%eax + movl %r9d,%r12d + vpextrq $1,%xmm15,%r12 + vmovq %xmm15,%r13 + movq 552(%rsp),%r15 + addl %r14d,%eax + leaq 448(%rsp),%rbp + + vpand %xmm14,%xmm11,%xmm11 + vpor %xmm11,%xmm8,%xmm8 + vmovdqu %xmm8,(%r12,%r13,1) + leaq 16(%r13),%r13 + + addl 0(%r15),%eax + addl 4(%r15),%ebx + addl 8(%r15),%ecx + addl 12(%r15),%edx + addl 16(%r15),%r8d + addl 20(%r15),%r9d + addl 24(%r15),%r10d + addl 28(%r15),%r11d + + movl %eax,0(%r15) + movl %ebx,4(%r15) + movl %ecx,8(%r15) + movl %edx,12(%r15) + movl %r8d,16(%r15) + movl %r9d,20(%r15) + movl %r10d,24(%r15) + movl %r11d,28(%r15) + + cmpq 80(%rbp),%r13 + je .Ldone_avx2 + + xorl %r14d,%r14d + movl %ebx,%esi + movl %r9d,%r12d + xorl %ecx,%esi + jmp .Lower_avx2 +.align 16 +.Lower_avx2: + vmovdqu (%r13),%xmm9 + vpinsrq $0,%r13,%xmm15,%xmm15 + addl 0+16(%rbp),%r11d + andl %r8d,%r12d + rorxl $25,%r8d,%r13d + rorxl $11,%r8d,%r15d + leal (%rax,%r14,1),%eax + leal (%r11,%r12,1),%r11d + andnl %r10d,%r8d,%r12d + xorl %r15d,%r13d + rorxl $6,%r8d,%r14d + leal (%r11,%r12,1),%r11d + xorl %r14d,%r13d + movl %eax,%r15d + rorxl $22,%eax,%r12d + leal (%r11,%r13,1),%r11d + xorl %ebx,%r15d + rorxl $13,%eax,%r14d + rorxl $2,%eax,%r13d + leal (%rdx,%r11,1),%edx + andl %r15d,%esi + vpxor %xmm10,%xmm9,%xmm9 + vmovdqu 16-128(%rdi),%xmm10 + xorl %r12d,%r14d + xorl %ebx,%esi + xorl %r13d,%r14d + leal (%r11,%rsi,1),%r11d + movl %r8d,%r12d + addl 4+16(%rbp),%r10d + andl %edx,%r12d + rorxl $25,%edx,%r13d + rorxl $11,%edx,%esi + leal (%r11,%r14,1),%r11d + leal (%r10,%r12,1),%r10d + andnl %r9d,%edx,%r12d + xorl %esi,%r13d + rorxl $6,%edx,%r14d + leal (%r10,%r12,1),%r10d + xorl %r14d,%r13d + movl %r11d,%esi + rorxl $22,%r11d,%r12d + leal (%r10,%r13,1),%r10d + xorl %eax,%esi + rorxl $13,%r11d,%r14d + rorxl $2,%r11d,%r13d + leal (%rcx,%r10,1),%ecx + andl %esi,%r15d + vpxor %xmm8,%xmm9,%xmm9 + xorl %r12d,%r14d + xorl %eax,%r15d + xorl %r13d,%r14d + leal (%r10,%r15,1),%r10d + movl %edx,%r12d + addl 8+16(%rbp),%r9d + andl %ecx,%r12d + rorxl $25,%ecx,%r13d + rorxl $11,%ecx,%r15d + leal (%r10,%r14,1),%r10d + leal (%r9,%r12,1),%r9d + andnl %r8d,%ecx,%r12d + xorl %r15d,%r13d + rorxl $6,%ecx,%r14d + leal (%r9,%r12,1),%r9d + xorl %r14d,%r13d + movl %r10d,%r15d + rorxl $22,%r10d,%r12d + leal (%r9,%r13,1),%r9d + xorl %r11d,%r15d + rorxl $13,%r10d,%r14d + rorxl $2,%r10d,%r13d + leal (%rbx,%r9,1),%ebx + andl %r15d,%esi + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 32-128(%rdi),%xmm10 + xorl %r12d,%r14d + xorl %r11d,%esi + xorl %r13d,%r14d + leal (%r9,%rsi,1),%r9d + movl %ecx,%r12d + addl 12+16(%rbp),%r8d + andl %ebx,%r12d + rorxl $25,%ebx,%r13d + rorxl $11,%ebx,%esi + leal (%r9,%r14,1),%r9d + leal (%r8,%r12,1),%r8d + andnl %edx,%ebx,%r12d + xorl %esi,%r13d + rorxl $6,%ebx,%r14d + leal (%r8,%r12,1),%r8d + xorl %r14d,%r13d + movl %r9d,%esi + rorxl $22,%r9d,%r12d + leal (%r8,%r13,1),%r8d + xorl %r10d,%esi + rorxl $13,%r9d,%r14d + rorxl $2,%r9d,%r13d + leal (%rax,%r8,1),%eax + andl %esi,%r15d + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 48-128(%rdi),%xmm10 + xorl %r12d,%r14d + xorl %r10d,%r15d + xorl %r13d,%r14d + leal (%r8,%r15,1),%r8d + movl %ebx,%r12d + addl 32+16(%rbp),%edx + andl %eax,%r12d + rorxl $25,%eax,%r13d + rorxl $11,%eax,%r15d + leal (%r8,%r14,1),%r8d + leal (%rdx,%r12,1),%edx + andnl %ecx,%eax,%r12d + xorl %r15d,%r13d + rorxl $6,%eax,%r14d + leal (%rdx,%r12,1),%edx + xorl %r14d,%r13d + movl %r8d,%r15d + rorxl $22,%r8d,%r12d + leal (%rdx,%r13,1),%edx + xorl %r9d,%r15d + rorxl $13,%r8d,%r14d + rorxl $2,%r8d,%r13d + leal (%r11,%rdx,1),%r11d + andl %r15d,%esi + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 64-128(%rdi),%xmm10 + xorl %r12d,%r14d + xorl %r9d,%esi + xorl %r13d,%r14d + leal (%rdx,%rsi,1),%edx + movl %eax,%r12d + addl 36+16(%rbp),%ecx + andl %r11d,%r12d + rorxl $25,%r11d,%r13d + rorxl $11,%r11d,%esi + leal (%rdx,%r14,1),%edx + leal (%rcx,%r12,1),%ecx + andnl %ebx,%r11d,%r12d + xorl %esi,%r13d + rorxl $6,%r11d,%r14d + leal (%rcx,%r12,1),%ecx + xorl %r14d,%r13d + movl %edx,%esi + rorxl $22,%edx,%r12d + leal (%rcx,%r13,1),%ecx + xorl %r8d,%esi + rorxl $13,%edx,%r14d + rorxl $2,%edx,%r13d + leal (%r10,%rcx,1),%r10d + andl %esi,%r15d + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 80-128(%rdi),%xmm10 + xorl %r12d,%r14d + xorl %r8d,%r15d + xorl %r13d,%r14d + leal (%rcx,%r15,1),%ecx + movl %r11d,%r12d + addl 40+16(%rbp),%ebx + andl %r10d,%r12d + rorxl $25,%r10d,%r13d + rorxl $11,%r10d,%r15d + leal (%rcx,%r14,1),%ecx + leal (%rbx,%r12,1),%ebx + andnl %eax,%r10d,%r12d + xorl %r15d,%r13d + rorxl $6,%r10d,%r14d + leal (%rbx,%r12,1),%ebx + xorl %r14d,%r13d + movl %ecx,%r15d + rorxl $22,%ecx,%r12d + leal (%rbx,%r13,1),%ebx + xorl %edx,%r15d + rorxl $13,%ecx,%r14d + rorxl $2,%ecx,%r13d + leal (%r9,%rbx,1),%r9d + andl %r15d,%esi + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 96-128(%rdi),%xmm10 + xorl %r12d,%r14d + xorl %edx,%esi + xorl %r13d,%r14d + leal (%rbx,%rsi,1),%ebx + movl %r10d,%r12d + addl 44+16(%rbp),%eax + andl %r9d,%r12d + rorxl $25,%r9d,%r13d + rorxl $11,%r9d,%esi + leal (%rbx,%r14,1),%ebx + leal (%rax,%r12,1),%eax + andnl %r11d,%r9d,%r12d + xorl %esi,%r13d + rorxl $6,%r9d,%r14d + leal (%rax,%r12,1),%eax + xorl %r14d,%r13d + movl %ebx,%esi + rorxl $22,%ebx,%r12d + leal (%rax,%r13,1),%eax + xorl %ecx,%esi + rorxl $13,%ebx,%r14d + rorxl $2,%ebx,%r13d + leal (%r8,%rax,1),%r8d + andl %esi,%r15d + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 112-128(%rdi),%xmm10 + xorl %r12d,%r14d + xorl %ecx,%r15d + xorl %r13d,%r14d + leal (%rax,%r15,1),%eax + movl %r9d,%r12d + leaq -64(%rbp),%rbp + addl 0+16(%rbp),%r11d + andl %r8d,%r12d + rorxl $25,%r8d,%r13d + rorxl $11,%r8d,%r15d + leal (%rax,%r14,1),%eax + leal (%r11,%r12,1),%r11d + andnl %r10d,%r8d,%r12d + xorl %r15d,%r13d + rorxl $6,%r8d,%r14d + leal (%r11,%r12,1),%r11d + xorl %r14d,%r13d + movl %eax,%r15d + rorxl $22,%eax,%r12d + leal (%r11,%r13,1),%r11d + xorl %ebx,%r15d + rorxl $13,%eax,%r14d + rorxl $2,%eax,%r13d + leal (%rdx,%r11,1),%edx + andl %r15d,%esi + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 128-128(%rdi),%xmm10 + xorl %r12d,%r14d + xorl %ebx,%esi + xorl %r13d,%r14d + leal (%r11,%rsi,1),%r11d + movl %r8d,%r12d + addl 4+16(%rbp),%r10d + andl %edx,%r12d + rorxl $25,%edx,%r13d + rorxl $11,%edx,%esi + leal (%r11,%r14,1),%r11d + leal (%r10,%r12,1),%r10d + andnl %r9d,%edx,%r12d + xorl %esi,%r13d + rorxl $6,%edx,%r14d + leal (%r10,%r12,1),%r10d + xorl %r14d,%r13d + movl %r11d,%esi + rorxl $22,%r11d,%r12d + leal (%r10,%r13,1),%r10d + xorl %eax,%esi + rorxl $13,%r11d,%r14d + rorxl $2,%r11d,%r13d + leal (%rcx,%r10,1),%ecx + andl %esi,%r15d + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 144-128(%rdi),%xmm10 + xorl %r12d,%r14d + xorl %eax,%r15d + xorl %r13d,%r14d + leal (%r10,%r15,1),%r10d + movl %edx,%r12d + addl 8+16(%rbp),%r9d + andl %ecx,%r12d + rorxl $25,%ecx,%r13d + rorxl $11,%ecx,%r15d + leal (%r10,%r14,1),%r10d + leal (%r9,%r12,1),%r9d + andnl %r8d,%ecx,%r12d + xorl %r15d,%r13d + rorxl $6,%ecx,%r14d + leal (%r9,%r12,1),%r9d + xorl %r14d,%r13d + movl %r10d,%r15d + rorxl $22,%r10d,%r12d + leal (%r9,%r13,1),%r9d + xorl %r11d,%r15d + rorxl $13,%r10d,%r14d + rorxl $2,%r10d,%r13d + leal (%rbx,%r9,1),%ebx + andl %r15d,%esi + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 160-128(%rdi),%xmm10 + xorl %r12d,%r14d + xorl %r11d,%esi + xorl %r13d,%r14d + leal (%r9,%rsi,1),%r9d + movl %ecx,%r12d + addl 12+16(%rbp),%r8d + andl %ebx,%r12d + rorxl $25,%ebx,%r13d + rorxl $11,%ebx,%esi + leal (%r9,%r14,1),%r9d + leal (%r8,%r12,1),%r8d + andnl %edx,%ebx,%r12d + xorl %esi,%r13d + rorxl $6,%ebx,%r14d + leal (%r8,%r12,1),%r8d + xorl %r14d,%r13d + movl %r9d,%esi + rorxl $22,%r9d,%r12d + leal (%r8,%r13,1),%r8d + xorl %r10d,%esi + rorxl $13,%r9d,%r14d + rorxl $2,%r9d,%r13d + leal (%rax,%r8,1),%eax + andl %esi,%r15d + vaesenclast %xmm10,%xmm9,%xmm11 + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 176-128(%rdi),%xmm10 + xorl %r12d,%r14d + xorl %r10d,%r15d + xorl %r13d,%r14d + leal (%r8,%r15,1),%r8d + movl %ebx,%r12d + addl 32+16(%rbp),%edx + andl %eax,%r12d + rorxl $25,%eax,%r13d + rorxl $11,%eax,%r15d + leal (%r8,%r14,1),%r8d + leal (%rdx,%r12,1),%edx + andnl %ecx,%eax,%r12d + xorl %r15d,%r13d + rorxl $6,%eax,%r14d + leal (%rdx,%r12,1),%edx + xorl %r14d,%r13d + movl %r8d,%r15d + rorxl $22,%r8d,%r12d + leal (%rdx,%r13,1),%edx + xorl %r9d,%r15d + rorxl $13,%r8d,%r14d + rorxl $2,%r8d,%r13d + leal (%r11,%rdx,1),%r11d + andl %r15d,%esi + vpand %xmm12,%xmm11,%xmm8 + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 192-128(%rdi),%xmm10 + xorl %r12d,%r14d + xorl %r9d,%esi + xorl %r13d,%r14d + leal (%rdx,%rsi,1),%edx + movl %eax,%r12d + addl 36+16(%rbp),%ecx + andl %r11d,%r12d + rorxl $25,%r11d,%r13d + rorxl $11,%r11d,%esi + leal (%rdx,%r14,1),%edx + leal (%rcx,%r12,1),%ecx + andnl %ebx,%r11d,%r12d + xorl %esi,%r13d + rorxl $6,%r11d,%r14d + leal (%rcx,%r12,1),%ecx + xorl %r14d,%r13d + movl %edx,%esi + rorxl $22,%edx,%r12d + leal (%rcx,%r13,1),%ecx + xorl %r8d,%esi + rorxl $13,%edx,%r14d + rorxl $2,%edx,%r13d + leal (%r10,%rcx,1),%r10d + andl %esi,%r15d + vaesenclast %xmm10,%xmm9,%xmm11 + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 208-128(%rdi),%xmm10 + xorl %r12d,%r14d + xorl %r8d,%r15d + xorl %r13d,%r14d + leal (%rcx,%r15,1),%ecx + movl %r11d,%r12d + addl 40+16(%rbp),%ebx + andl %r10d,%r12d + rorxl $25,%r10d,%r13d + rorxl $11,%r10d,%r15d + leal (%rcx,%r14,1),%ecx + leal (%rbx,%r12,1),%ebx + andnl %eax,%r10d,%r12d + xorl %r15d,%r13d + rorxl $6,%r10d,%r14d + leal (%rbx,%r12,1),%ebx + xorl %r14d,%r13d + movl %ecx,%r15d + rorxl $22,%ecx,%r12d + leal (%rbx,%r13,1),%ebx + xorl %edx,%r15d + rorxl $13,%ecx,%r14d + rorxl $2,%ecx,%r13d + leal (%r9,%rbx,1),%r9d + andl %r15d,%esi + vpand %xmm13,%xmm11,%xmm11 + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 224-128(%rdi),%xmm10 + xorl %r12d,%r14d + xorl %edx,%esi + xorl %r13d,%r14d + leal (%rbx,%rsi,1),%ebx + movl %r10d,%r12d + addl 44+16(%rbp),%eax + andl %r9d,%r12d + rorxl $25,%r9d,%r13d + rorxl $11,%r9d,%esi + leal (%rbx,%r14,1),%ebx + leal (%rax,%r12,1),%eax + andnl %r11d,%r9d,%r12d + xorl %esi,%r13d + rorxl $6,%r9d,%r14d + leal (%rax,%r12,1),%eax + xorl %r14d,%r13d + movl %ebx,%esi + rorxl $22,%ebx,%r12d + leal (%rax,%r13,1),%eax + xorl %ecx,%esi + rorxl $13,%ebx,%r14d + rorxl $2,%ebx,%r13d + leal (%r8,%rax,1),%r8d + andl %esi,%r15d + vpor %xmm11,%xmm8,%xmm8 + vaesenclast %xmm10,%xmm9,%xmm11 + vmovdqu 0-128(%rdi),%xmm10 + xorl %r12d,%r14d + xorl %ecx,%r15d + xorl %r13d,%r14d + leal (%rax,%r15,1),%eax + movl %r9d,%r12d + vmovq %xmm15,%r13 + vpextrq $1,%xmm15,%r15 + vpand %xmm14,%xmm11,%xmm11 + vpor %xmm11,%xmm8,%xmm8 + leaq -64(%rbp),%rbp + vmovdqu %xmm8,(%r15,%r13,1) + leaq 16(%r13),%r13 + cmpq %rsp,%rbp + jae .Lower_avx2 + + movq 552(%rsp),%r15 + leaq 64(%r13),%r13 + movq 560(%rsp),%rsi + addl %r14d,%eax + leaq 448(%rsp),%rsp + + addl 0(%r15),%eax + addl 4(%r15),%ebx + addl 8(%r15),%ecx + addl 12(%r15),%edx + addl 16(%r15),%r8d + addl 20(%r15),%r9d + addl 24(%r15),%r10d + leaq (%rsi,%r13,1),%r12 + addl 28(%r15),%r11d + + cmpq 64+16(%rsp),%r13 + + movl %eax,0(%r15) + cmoveq %rsp,%r12 + movl %ebx,4(%r15) + movl %ecx,8(%r15) + movl %edx,12(%r15) + movl %r8d,16(%r15) + movl %r9d,20(%r15) + movl %r10d,24(%r15) + movl %r11d,28(%r15) + + jbe .Loop_avx2 + leaq (%rsp),%rbp + +.Ldone_avx2: + leaq (%rbp),%rsp + movq 64+32(%rsp),%r8 + movq 64+56(%rsp),%rsi + vmovdqu %xmm8,(%r8) + vzeroall + movq (%rsi),%r15 + movq 8(%rsi),%r14 + movq 16(%rsi),%r13 + movq 24(%rsi),%r12 + movq 32(%rsi),%rbp + movq 40(%rsi),%rbx + leaq 48(%rsi),%rsp +.Lepilogue_avx2: + .byte 0xf3,0xc3 +.size aesni_cbc_sha256_enc_avx2,.-aesni_cbc_sha256_enc_avx2 +.type aesni_cbc_sha256_enc_shaext,@function +.align 32 +aesni_cbc_sha256_enc_shaext: + movq 8(%rsp),%r10 + leaq K256+128(%rip),%rax + movdqu (%r9),%xmm1 + movdqu 16(%r9),%xmm2 + movdqa 512-128(%rax),%xmm3 + + movl 240(%rcx),%r11d + subq %rdi,%rsi + movups (%rcx),%xmm15 + movups 16(%rcx),%xmm4 + leaq 112(%rcx),%rcx + + pshufd $27,%xmm1,%xmm0 + pshufd $177,%xmm1,%xmm1 + pshufd $27,%xmm2,%xmm2 + movdqa %xmm3,%xmm7 +.byte 102,15,58,15,202,8 + punpcklqdq %xmm0,%xmm2 + + jmp .Loop_shaext + +.align 16 +.Loop_shaext: + movdqu (%r10),%xmm10 + movdqu 16(%r10),%xmm11 + movdqu 32(%r10),%xmm12 +.byte 102,68,15,56,0,211 + movdqu 48(%r10),%xmm13 + + movdqa 0-128(%rax),%xmm0 + paddd %xmm10,%xmm0 +.byte 102,68,15,56,0,219 + movdqa %xmm2,%xmm9 + movdqa %xmm1,%xmm8 + movups 0(%rdi),%xmm14 + xorps %xmm15,%xmm14 + xorps %xmm14,%xmm6 + movups -80(%rcx),%xmm5 + aesenc %xmm4,%xmm6 +.byte 15,56,203,209 + pshufd $14,%xmm0,%xmm0 + movups -64(%rcx),%xmm4 + aesenc %xmm5,%xmm6 +.byte 15,56,203,202 + + movdqa 32-128(%rax),%xmm0 + paddd %xmm11,%xmm0 +.byte 102,68,15,56,0,227 + leaq 64(%r10),%r10 + movups -48(%rcx),%xmm5 + aesenc %xmm4,%xmm6 +.byte 15,56,203,209 + pshufd $14,%xmm0,%xmm0 + movups -32(%rcx),%xmm4 + aesenc %xmm5,%xmm6 +.byte 15,56,203,202 + + movdqa 64-128(%rax),%xmm0 + paddd %xmm12,%xmm0 +.byte 102,68,15,56,0,235 +.byte 69,15,56,204,211 + movups -16(%rcx),%xmm5 + aesenc %xmm4,%xmm6 +.byte 15,56,203,209 + pshufd $14,%xmm0,%xmm0 + movdqa %xmm13,%xmm3 +.byte 102,65,15,58,15,220,4 + paddd %xmm3,%xmm10 + movups 0(%rcx),%xmm4 + aesenc %xmm5,%xmm6 +.byte 15,56,203,202 + + movdqa 96-128(%rax),%xmm0 + paddd %xmm13,%xmm0 +.byte 69,15,56,205,213 +.byte 69,15,56,204,220 + movups 16(%rcx),%xmm5 + aesenc %xmm4,%xmm6 +.byte 15,56,203,209 + pshufd $14,%xmm0,%xmm0 + movups 32(%rcx),%xmm4 + aesenc %xmm5,%xmm6 + movdqa %xmm10,%xmm3 +.byte 102,65,15,58,15,221,4 + paddd %xmm3,%xmm11 +.byte 15,56,203,202 + movdqa 128-128(%rax),%xmm0 + paddd %xmm10,%xmm0 +.byte 69,15,56,205,218 +.byte 69,15,56,204,229 + movups 48(%rcx),%xmm5 + aesenc %xmm4,%xmm6 +.byte 15,56,203,209 + pshufd $14,%xmm0,%xmm0 + movdqa %xmm11,%xmm3 +.byte 102,65,15,58,15,218,4 + paddd %xmm3,%xmm12 + cmpl $11,%r11d + jb .Laesenclast1 + movups 64(%rcx),%xmm4 + aesenc %xmm5,%xmm6 + movups 80(%rcx),%xmm5 + aesenc %xmm4,%xmm6 + je .Laesenclast1 + movups 96(%rcx),%xmm4 + aesenc %xmm5,%xmm6 + movups 112(%rcx),%xmm5 + aesenc %xmm4,%xmm6 +.Laesenclast1: + aesenclast %xmm5,%xmm6 + movups 16-112(%rcx),%xmm4 + nop +.byte 15,56,203,202 + movups 16(%rdi),%xmm14 + xorps %xmm15,%xmm14 + movups %xmm6,0(%rsi,%rdi,1) + xorps %xmm14,%xmm6 + movups -80(%rcx),%xmm5 + aesenc %xmm4,%xmm6 + movdqa 160-128(%rax),%xmm0 + paddd %xmm11,%xmm0 +.byte 69,15,56,205,227 +.byte 69,15,56,204,234 + movups -64(%rcx),%xmm4 + aesenc %xmm5,%xmm6 +.byte 15,56,203,209 + pshufd $14,%xmm0,%xmm0 + movdqa %xmm12,%xmm3 +.byte 102,65,15,58,15,219,4 + paddd %xmm3,%xmm13 + movups -48(%rcx),%xmm5 + aesenc %xmm4,%xmm6 +.byte 15,56,203,202 + movdqa 192-128(%rax),%xmm0 + paddd %xmm12,%xmm0 +.byte 69,15,56,205,236 +.byte 69,15,56,204,211 + movups -32(%rcx),%xmm4 + aesenc %xmm5,%xmm6 +.byte 15,56,203,209 + pshufd $14,%xmm0,%xmm0 + movdqa %xmm13,%xmm3 +.byte 102,65,15,58,15,220,4 + paddd %xmm3,%xmm10 + movups -16(%rcx),%xmm5 + aesenc %xmm4,%xmm6 +.byte 15,56,203,202 + movdqa 224-128(%rax),%xmm0 + paddd %xmm13,%xmm0 +.byte 69,15,56,205,213 +.byte 69,15,56,204,220 + movups 0(%rcx),%xmm4 + aesenc %xmm5,%xmm6 +.byte 15,56,203,209 + pshufd $14,%xmm0,%xmm0 + movdqa %xmm10,%xmm3 +.byte 102,65,15,58,15,221,4 + paddd %xmm3,%xmm11 + movups 16(%rcx),%xmm5 + aesenc %xmm4,%xmm6 +.byte 15,56,203,202 + movdqa 256-128(%rax),%xmm0 + paddd %xmm10,%xmm0 +.byte 69,15,56,205,218 +.byte 69,15,56,204,229 + movups 32(%rcx),%xmm4 + aesenc %xmm5,%xmm6 +.byte 15,56,203,209 + pshufd $14,%xmm0,%xmm0 + movdqa %xmm11,%xmm3 +.byte 102,65,15,58,15,218,4 + paddd %xmm3,%xmm12 + movups 48(%rcx),%xmm5 + aesenc %xmm4,%xmm6 + cmpl $11,%r11d + jb .Laesenclast2 + movups 64(%rcx),%xmm4 + aesenc %xmm5,%xmm6 + movups 80(%rcx),%xmm5 + aesenc %xmm4,%xmm6 + je .Laesenclast2 + movups 96(%rcx),%xmm4 + aesenc %xmm5,%xmm6 + movups 112(%rcx),%xmm5 + aesenc %xmm4,%xmm6 +.Laesenclast2: + aesenclast %xmm5,%xmm6 + movups 16-112(%rcx),%xmm4 + nop +.byte 15,56,203,202 + movups 32(%rdi),%xmm14 + xorps %xmm15,%xmm14 + movups %xmm6,16(%rsi,%rdi,1) + xorps %xmm14,%xmm6 + movups -80(%rcx),%xmm5 + aesenc %xmm4,%xmm6 + movdqa 288-128(%rax),%xmm0 + paddd %xmm11,%xmm0 +.byte 69,15,56,205,227 +.byte 69,15,56,204,234 + movups -64(%rcx),%xmm4 + aesenc %xmm5,%xmm6 +.byte 15,56,203,209 + pshufd $14,%xmm0,%xmm0 + movdqa %xmm12,%xmm3 +.byte 102,65,15,58,15,219,4 + paddd %xmm3,%xmm13 + movups -48(%rcx),%xmm5 + aesenc %xmm4,%xmm6 +.byte 15,56,203,202 + movdqa 320-128(%rax),%xmm0 + paddd %xmm12,%xmm0 +.byte 69,15,56,205,236 +.byte 69,15,56,204,211 + movups -32(%rcx),%xmm4 + aesenc %xmm5,%xmm6 +.byte 15,56,203,209 + pshufd $14,%xmm0,%xmm0 + movdqa %xmm13,%xmm3 +.byte 102,65,15,58,15,220,4 + paddd %xmm3,%xmm10 + movups -16(%rcx),%xmm5 + aesenc %xmm4,%xmm6 +.byte 15,56,203,202 + movdqa 352-128(%rax),%xmm0 + paddd %xmm13,%xmm0 +.byte 69,15,56,205,213 +.byte 69,15,56,204,220 + movups 0(%rcx),%xmm4 + aesenc %xmm5,%xmm6 +.byte 15,56,203,209 + pshufd $14,%xmm0,%xmm0 + movdqa %xmm10,%xmm3 +.byte 102,65,15,58,15,221,4 + paddd %xmm3,%xmm11 + movups 16(%rcx),%xmm5 + aesenc %xmm4,%xmm6 +.byte 15,56,203,202 + movdqa 384-128(%rax),%xmm0 + paddd %xmm10,%xmm0 +.byte 69,15,56,205,218 +.byte 69,15,56,204,229 + movups 32(%rcx),%xmm4 + aesenc %xmm5,%xmm6 +.byte 15,56,203,209 + pshufd $14,%xmm0,%xmm0 + movdqa %xmm11,%xmm3 +.byte 102,65,15,58,15,218,4 + paddd %xmm3,%xmm12 + movups 48(%rcx),%xmm5 + aesenc %xmm4,%xmm6 +.byte 15,56,203,202 + movdqa 416-128(%rax),%xmm0 + paddd %xmm11,%xmm0 +.byte 69,15,56,205,227 +.byte 69,15,56,204,234 + cmpl $11,%r11d + jb .Laesenclast3 + movups 64(%rcx),%xmm4 + aesenc %xmm5,%xmm6 + movups 80(%rcx),%xmm5 + aesenc %xmm4,%xmm6 + je .Laesenclast3 + movups 96(%rcx),%xmm4 + aesenc %xmm5,%xmm6 + movups 112(%rcx),%xmm5 + aesenc %xmm4,%xmm6 +.Laesenclast3: + aesenclast %xmm5,%xmm6 + movups 16-112(%rcx),%xmm4 + nop +.byte 15,56,203,209 + pshufd $14,%xmm0,%xmm0 + movdqa %xmm12,%xmm3 +.byte 102,65,15,58,15,219,4 + paddd %xmm3,%xmm13 + movups 48(%rdi),%xmm14 + xorps %xmm15,%xmm14 + movups %xmm6,32(%rsi,%rdi,1) + xorps %xmm14,%xmm6 + movups -80(%rcx),%xmm5 + aesenc %xmm4,%xmm6 + movups -64(%rcx),%xmm4 + aesenc %xmm5,%xmm6 +.byte 15,56,203,202 + + movdqa 448-128(%rax),%xmm0 + paddd %xmm12,%xmm0 +.byte 69,15,56,205,236 + movdqa %xmm7,%xmm3 + movups -48(%rcx),%xmm5 + aesenc %xmm4,%xmm6 +.byte 15,56,203,209 + pshufd $14,%xmm0,%xmm0 + movups -32(%rcx),%xmm4 + aesenc %xmm5,%xmm6 +.byte 15,56,203,202 + + movdqa 480-128(%rax),%xmm0 + paddd %xmm13,%xmm0 + movups -16(%rcx),%xmm5 + aesenc %xmm4,%xmm6 + movups 0(%rcx),%xmm4 + aesenc %xmm5,%xmm6 +.byte 15,56,203,209 + pshufd $14,%xmm0,%xmm0 + movups 16(%rcx),%xmm5 + aesenc %xmm4,%xmm6 +.byte 15,56,203,202 + + movups 32(%rcx),%xmm4 + aesenc %xmm5,%xmm6 + movups 48(%rcx),%xmm5 + aesenc %xmm4,%xmm6 + cmpl $11,%r11d + jb .Laesenclast4 + movups 64(%rcx),%xmm4 + aesenc %xmm5,%xmm6 + movups 80(%rcx),%xmm5 + aesenc %xmm4,%xmm6 + je .Laesenclast4 + movups 96(%rcx),%xmm4 + aesenc %xmm5,%xmm6 + movups 112(%rcx),%xmm5 + aesenc %xmm4,%xmm6 +.Laesenclast4: + aesenclast %xmm5,%xmm6 + movups 16-112(%rcx),%xmm4 + nop + + paddd %xmm9,%xmm2 + paddd %xmm8,%xmm1 + + decq %rdx + movups %xmm6,48(%rsi,%rdi,1) + leaq 64(%rdi),%rdi + jnz .Loop_shaext + + pshufd $177,%xmm2,%xmm2 + pshufd $27,%xmm1,%xmm3 + pshufd $177,%xmm1,%xmm1 + punpckhqdq %xmm2,%xmm1 +.byte 102,15,58,15,211,8 + + movups %xmm6,(%r8) + movdqu %xmm1,(%r9) + movdqu %xmm2,16(%r9) + .byte 0xf3,0xc3 +.size aesni_cbc_sha256_enc_shaext,.-aesni_cbc_sha256_enc_shaext diff --git a/deps/openssl/asm/x64-elf-gas/aes/aesni-x86_64.s b/deps/openssl/asm/x64-elf-gas/aes/aesni-x86_64.s index 2d24b7b28d69a5..84708afbbb352f 100644 --- a/deps/openssl/asm/x64-elf-gas/aes/aesni-x86_64.s +++ b/deps/openssl/asm/x64-elf-gas/aes/aesni-x86_64.s @@ -16,7 +16,6 @@ aesni_encrypt: movups (%rdx),%xmm1 leaq 16(%rdx),%rdx jnz .Loop_enc1_1 - .byte 102,15,56,221,209 movups %xmm2,(%rsi) .byte 0xf3,0xc3 @@ -38,34 +37,92 @@ aesni_decrypt: movups (%rdx),%xmm1 leaq 16(%rdx),%rdx jnz .Loop_dec1_2 - .byte 102,15,56,223,209 movups %xmm2,(%rsi) .byte 0xf3,0xc3 .size aesni_decrypt, .-aesni_decrypt +.type _aesni_encrypt2,@function +.align 16 +_aesni_encrypt2: + movups (%rcx),%xmm0 + shll $4,%eax + movups 16(%rcx),%xmm1 + xorps %xmm0,%xmm2 + xorps %xmm0,%xmm3 + movups 32(%rcx),%xmm0 + leaq 32(%rcx,%rax,1),%rcx + negq %rax + addq $16,%rax + +.Lenc_loop2: +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 + movups (%rcx,%rax,1),%xmm1 + addq $32,%rax +.byte 102,15,56,220,208 +.byte 102,15,56,220,216 + movups -16(%rcx,%rax,1),%xmm0 + jnz .Lenc_loop2 + +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 +.byte 102,15,56,221,208 +.byte 102,15,56,221,216 + .byte 0xf3,0xc3 +.size _aesni_encrypt2,.-_aesni_encrypt2 +.type _aesni_decrypt2,@function +.align 16 +_aesni_decrypt2: + movups (%rcx),%xmm0 + shll $4,%eax + movups 16(%rcx),%xmm1 + xorps %xmm0,%xmm2 + xorps %xmm0,%xmm3 + movups 32(%rcx),%xmm0 + leaq 32(%rcx,%rax,1),%rcx + negq %rax + addq $16,%rax + +.Ldec_loop2: +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 + movups (%rcx,%rax,1),%xmm1 + addq $32,%rax +.byte 102,15,56,222,208 +.byte 102,15,56,222,216 + movups -16(%rcx,%rax,1),%xmm0 + jnz .Ldec_loop2 + +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 +.byte 102,15,56,223,208 +.byte 102,15,56,223,216 + .byte 0xf3,0xc3 +.size _aesni_decrypt2,.-_aesni_decrypt2 .type _aesni_encrypt3,@function .align 16 _aesni_encrypt3: movups (%rcx),%xmm0 - shrl $1,%eax + shll $4,%eax movups 16(%rcx),%xmm1 - leaq 32(%rcx),%rcx xorps %xmm0,%xmm2 xorps %xmm0,%xmm3 xorps %xmm0,%xmm4 - movups (%rcx),%xmm0 + movups 32(%rcx),%xmm0 + leaq 32(%rcx,%rax,1),%rcx + negq %rax + addq $16,%rax .Lenc_loop3: .byte 102,15,56,220,209 .byte 102,15,56,220,217 - decl %eax .byte 102,15,56,220,225 - movups 16(%rcx),%xmm1 + movups (%rcx,%rax,1),%xmm1 + addq $32,%rax .byte 102,15,56,220,208 .byte 102,15,56,220,216 - leaq 32(%rcx),%rcx .byte 102,15,56,220,224 - movups (%rcx),%xmm0 + movups -16(%rcx,%rax,1),%xmm0 jnz .Lenc_loop3 .byte 102,15,56,220,209 @@ -80,25 +137,26 @@ _aesni_encrypt3: .align 16 _aesni_decrypt3: movups (%rcx),%xmm0 - shrl $1,%eax + shll $4,%eax movups 16(%rcx),%xmm1 - leaq 32(%rcx),%rcx xorps %xmm0,%xmm2 xorps %xmm0,%xmm3 xorps %xmm0,%xmm4 - movups (%rcx),%xmm0 + movups 32(%rcx),%xmm0 + leaq 32(%rcx,%rax,1),%rcx + negq %rax + addq $16,%rax .Ldec_loop3: .byte 102,15,56,222,209 .byte 102,15,56,222,217 - decl %eax .byte 102,15,56,222,225 - movups 16(%rcx),%xmm1 + movups (%rcx,%rax,1),%xmm1 + addq $32,%rax .byte 102,15,56,222,208 .byte 102,15,56,222,216 - leaq 32(%rcx),%rcx .byte 102,15,56,222,224 - movups (%rcx),%xmm0 + movups -16(%rcx,%rax,1),%xmm0 jnz .Ldec_loop3 .byte 102,15,56,222,209 @@ -113,28 +171,30 @@ _aesni_decrypt3: .align 16 _aesni_encrypt4: movups (%rcx),%xmm0 - shrl $1,%eax + shll $4,%eax movups 16(%rcx),%xmm1 - leaq 32(%rcx),%rcx xorps %xmm0,%xmm2 xorps %xmm0,%xmm3 xorps %xmm0,%xmm4 xorps %xmm0,%xmm5 - movups (%rcx),%xmm0 + movups 32(%rcx),%xmm0 + leaq 32(%rcx,%rax,1),%rcx + negq %rax +.byte 0x0f,0x1f,0x00 + addq $16,%rax .Lenc_loop4: .byte 102,15,56,220,209 .byte 102,15,56,220,217 - decl %eax .byte 102,15,56,220,225 .byte 102,15,56,220,233 - movups 16(%rcx),%xmm1 + movups (%rcx,%rax,1),%xmm1 + addq $32,%rax .byte 102,15,56,220,208 .byte 102,15,56,220,216 - leaq 32(%rcx),%rcx .byte 102,15,56,220,224 .byte 102,15,56,220,232 - movups (%rcx),%xmm0 + movups -16(%rcx,%rax,1),%xmm0 jnz .Lenc_loop4 .byte 102,15,56,220,209 @@ -151,28 +211,30 @@ _aesni_encrypt4: .align 16 _aesni_decrypt4: movups (%rcx),%xmm0 - shrl $1,%eax + shll $4,%eax movups 16(%rcx),%xmm1 - leaq 32(%rcx),%rcx xorps %xmm0,%xmm2 xorps %xmm0,%xmm3 xorps %xmm0,%xmm4 xorps %xmm0,%xmm5 - movups (%rcx),%xmm0 + movups 32(%rcx),%xmm0 + leaq 32(%rcx,%rax,1),%rcx + negq %rax +.byte 0x0f,0x1f,0x00 + addq $16,%rax .Ldec_loop4: .byte 102,15,56,222,209 .byte 102,15,56,222,217 - decl %eax .byte 102,15,56,222,225 .byte 102,15,56,222,233 - movups 16(%rcx),%xmm1 + movups (%rcx,%rax,1),%xmm1 + addq $32,%rax .byte 102,15,56,222,208 .byte 102,15,56,222,216 - leaq 32(%rcx),%rcx .byte 102,15,56,222,224 .byte 102,15,56,222,232 - movups (%rcx),%xmm0 + movups -16(%rcx,%rax,1),%xmm0 jnz .Ldec_loop4 .byte 102,15,56,222,209 @@ -189,43 +251,43 @@ _aesni_decrypt4: .align 16 _aesni_encrypt6: movups (%rcx),%xmm0 - shrl $1,%eax + shll $4,%eax movups 16(%rcx),%xmm1 - leaq 32(%rcx),%rcx xorps %xmm0,%xmm2 pxor %xmm0,%xmm3 -.byte 102,15,56,220,209 pxor %xmm0,%xmm4 +.byte 102,15,56,220,209 + leaq 32(%rcx,%rax,1),%rcx + negq %rax .byte 102,15,56,220,217 pxor %xmm0,%xmm5 -.byte 102,15,56,220,225 pxor %xmm0,%xmm6 -.byte 102,15,56,220,233 +.byte 102,15,56,220,225 pxor %xmm0,%xmm7 - decl %eax + addq $16,%rax +.byte 102,15,56,220,233 .byte 102,15,56,220,241 - movups (%rcx),%xmm0 .byte 102,15,56,220,249 + movups -16(%rcx,%rax,1),%xmm0 jmp .Lenc_loop6_enter .align 16 .Lenc_loop6: .byte 102,15,56,220,209 .byte 102,15,56,220,217 - decl %eax .byte 102,15,56,220,225 .byte 102,15,56,220,233 .byte 102,15,56,220,241 .byte 102,15,56,220,249 .Lenc_loop6_enter: - movups 16(%rcx),%xmm1 + movups (%rcx,%rax,1),%xmm1 + addq $32,%rax .byte 102,15,56,220,208 .byte 102,15,56,220,216 - leaq 32(%rcx),%rcx .byte 102,15,56,220,224 .byte 102,15,56,220,232 .byte 102,15,56,220,240 .byte 102,15,56,220,248 - movups (%rcx),%xmm0 + movups -16(%rcx,%rax,1),%xmm0 jnz .Lenc_loop6 .byte 102,15,56,220,209 @@ -246,43 +308,43 @@ _aesni_encrypt6: .align 16 _aesni_decrypt6: movups (%rcx),%xmm0 - shrl $1,%eax + shll $4,%eax movups 16(%rcx),%xmm1 - leaq 32(%rcx),%rcx xorps %xmm0,%xmm2 pxor %xmm0,%xmm3 -.byte 102,15,56,222,209 pxor %xmm0,%xmm4 +.byte 102,15,56,222,209 + leaq 32(%rcx,%rax,1),%rcx + negq %rax .byte 102,15,56,222,217 pxor %xmm0,%xmm5 -.byte 102,15,56,222,225 pxor %xmm0,%xmm6 -.byte 102,15,56,222,233 +.byte 102,15,56,222,225 pxor %xmm0,%xmm7 - decl %eax + addq $16,%rax +.byte 102,15,56,222,233 .byte 102,15,56,222,241 - movups (%rcx),%xmm0 .byte 102,15,56,222,249 + movups -16(%rcx,%rax,1),%xmm0 jmp .Ldec_loop6_enter .align 16 .Ldec_loop6: .byte 102,15,56,222,209 .byte 102,15,56,222,217 - decl %eax .byte 102,15,56,222,225 .byte 102,15,56,222,233 .byte 102,15,56,222,241 .byte 102,15,56,222,249 .Ldec_loop6_enter: - movups 16(%rcx),%xmm1 + movups (%rcx,%rax,1),%xmm1 + addq $32,%rax .byte 102,15,56,222,208 .byte 102,15,56,222,216 - leaq 32(%rcx),%rcx .byte 102,15,56,222,224 .byte 102,15,56,222,232 .byte 102,15,56,222,240 .byte 102,15,56,222,248 - movups (%rcx),%xmm0 + movups -16(%rcx,%rax,1),%xmm0 jnz .Ldec_loop6 .byte 102,15,56,222,209 @@ -303,52 +365,51 @@ _aesni_decrypt6: .align 16 _aesni_encrypt8: movups (%rcx),%xmm0 - shrl $1,%eax + shll $4,%eax movups 16(%rcx),%xmm1 - leaq 32(%rcx),%rcx xorps %xmm0,%xmm2 xorps %xmm0,%xmm3 -.byte 102,15,56,220,209 pxor %xmm0,%xmm4 -.byte 102,15,56,220,217 pxor %xmm0,%xmm5 -.byte 102,15,56,220,225 pxor %xmm0,%xmm6 -.byte 102,15,56,220,233 + leaq 32(%rcx,%rax,1),%rcx + negq %rax +.byte 102,15,56,220,209 + addq $16,%rax pxor %xmm0,%xmm7 - decl %eax -.byte 102,15,56,220,241 +.byte 102,15,56,220,217 pxor %xmm0,%xmm8 -.byte 102,15,56,220,249 pxor %xmm0,%xmm9 - movups (%rcx),%xmm0 +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 +.byte 102,15,56,220,241 +.byte 102,15,56,220,249 .byte 102,68,15,56,220,193 .byte 102,68,15,56,220,201 - movups 16(%rcx),%xmm1 + movups -16(%rcx,%rax,1),%xmm0 jmp .Lenc_loop8_enter .align 16 .Lenc_loop8: .byte 102,15,56,220,209 .byte 102,15,56,220,217 - decl %eax .byte 102,15,56,220,225 .byte 102,15,56,220,233 .byte 102,15,56,220,241 .byte 102,15,56,220,249 .byte 102,68,15,56,220,193 .byte 102,68,15,56,220,201 - movups 16(%rcx),%xmm1 .Lenc_loop8_enter: + movups (%rcx,%rax,1),%xmm1 + addq $32,%rax .byte 102,15,56,220,208 .byte 102,15,56,220,216 - leaq 32(%rcx),%rcx .byte 102,15,56,220,224 .byte 102,15,56,220,232 .byte 102,15,56,220,240 .byte 102,15,56,220,248 .byte 102,68,15,56,220,192 .byte 102,68,15,56,220,200 - movups (%rcx),%xmm0 + movups -16(%rcx,%rax,1),%xmm0 jnz .Lenc_loop8 .byte 102,15,56,220,209 @@ -373,52 +434,51 @@ _aesni_encrypt8: .align 16 _aesni_decrypt8: movups (%rcx),%xmm0 - shrl $1,%eax + shll $4,%eax movups 16(%rcx),%xmm1 - leaq 32(%rcx),%rcx xorps %xmm0,%xmm2 xorps %xmm0,%xmm3 -.byte 102,15,56,222,209 pxor %xmm0,%xmm4 -.byte 102,15,56,222,217 pxor %xmm0,%xmm5 -.byte 102,15,56,222,225 pxor %xmm0,%xmm6 -.byte 102,15,56,222,233 + leaq 32(%rcx,%rax,1),%rcx + negq %rax +.byte 102,15,56,222,209 + addq $16,%rax pxor %xmm0,%xmm7 - decl %eax -.byte 102,15,56,222,241 +.byte 102,15,56,222,217 pxor %xmm0,%xmm8 -.byte 102,15,56,222,249 pxor %xmm0,%xmm9 - movups (%rcx),%xmm0 +.byte 102,15,56,222,225 +.byte 102,15,56,222,233 +.byte 102,15,56,222,241 +.byte 102,15,56,222,249 .byte 102,68,15,56,222,193 .byte 102,68,15,56,222,201 - movups 16(%rcx),%xmm1 + movups -16(%rcx,%rax,1),%xmm0 jmp .Ldec_loop8_enter .align 16 .Ldec_loop8: .byte 102,15,56,222,209 .byte 102,15,56,222,217 - decl %eax .byte 102,15,56,222,225 .byte 102,15,56,222,233 .byte 102,15,56,222,241 .byte 102,15,56,222,249 .byte 102,68,15,56,222,193 .byte 102,68,15,56,222,201 - movups 16(%rcx),%xmm1 .Ldec_loop8_enter: + movups (%rcx,%rax,1),%xmm1 + addq $32,%rax .byte 102,15,56,222,208 .byte 102,15,56,222,216 - leaq 32(%rcx),%rcx .byte 102,15,56,222,224 .byte 102,15,56,222,232 .byte 102,15,56,222,240 .byte 102,15,56,222,248 .byte 102,68,15,56,222,192 .byte 102,68,15,56,222,200 - movups (%rcx),%xmm0 + movups -16(%rcx,%rax,1),%xmm0 jnz .Ldec_loop8 .byte 102,15,56,222,209 @@ -548,14 +608,12 @@ aesni_ecb_encrypt: movups (%rcx),%xmm1 leaq 16(%rcx),%rcx jnz .Loop_enc1_3 - .byte 102,15,56,221,209 movups %xmm2,(%rsi) jmp .Lecb_ret .align 16 .Lecb_enc_two: - xorps %xmm4,%xmm4 - call _aesni_encrypt3 + call _aesni_encrypt2 movups %xmm2,(%rsi) movups %xmm3,16(%rsi) jmp .Lecb_ret @@ -694,14 +752,12 @@ aesni_ecb_encrypt: movups (%rcx),%xmm1 leaq 16(%rcx),%rcx jnz .Loop_dec1_4 - .byte 102,15,56,223,209 movups %xmm2,(%rsi) jmp .Lecb_ret .align 16 .Lecb_dec_two: - xorps %xmm4,%xmm4 - call _aesni_decrypt3 + call _aesni_decrypt2 movups %xmm2,(%rsi) movups %xmm3,16(%rsi) jmp .Lecb_ret @@ -748,53 +804,53 @@ aesni_ecb_encrypt: .align 16 aesni_ccm64_encrypt_blocks: movl 240(%rcx),%eax - movdqu (%r8),%xmm9 - movdqa .Lincrement64(%rip),%xmm6 + movdqu (%r8),%xmm6 + movdqa .Lincrement64(%rip),%xmm9 movdqa .Lbswap_mask(%rip),%xmm7 - shrl $1,%eax + shll $4,%eax + movl $16,%r10d leaq 0(%rcx),%r11 movdqu (%r9),%xmm3 - movdqa %xmm9,%xmm2 - movl %eax,%r10d -.byte 102,68,15,56,0,207 + movdqa %xmm6,%xmm2 + leaq 32(%rcx,%rax,1),%rcx +.byte 102,15,56,0,247 + subq %rax,%r10 jmp .Lccm64_enc_outer .align 16 .Lccm64_enc_outer: movups (%r11),%xmm0 - movl %r10d,%eax + movq %r10,%rax movups (%rdi),%xmm8 xorps %xmm0,%xmm2 movups 16(%r11),%xmm1 xorps %xmm8,%xmm0 - leaq 32(%r11),%rcx xorps %xmm0,%xmm3 - movups (%rcx),%xmm0 + movups 32(%r11),%xmm0 .Lccm64_enc2_loop: .byte 102,15,56,220,209 - decl %eax .byte 102,15,56,220,217 - movups 16(%rcx),%xmm1 + movups (%rcx,%rax,1),%xmm1 + addq $32,%rax .byte 102,15,56,220,208 - leaq 32(%rcx),%rcx .byte 102,15,56,220,216 - movups 0(%rcx),%xmm0 + movups -16(%rcx,%rax,1),%xmm0 jnz .Lccm64_enc2_loop .byte 102,15,56,220,209 .byte 102,15,56,220,217 - paddq %xmm6,%xmm9 + paddq %xmm9,%xmm6 + decq %rdx .byte 102,15,56,221,208 .byte 102,15,56,221,216 - decq %rdx leaq 16(%rdi),%rdi xorps %xmm2,%xmm8 - movdqa %xmm9,%xmm2 + movdqa %xmm6,%xmm2 movups %xmm8,(%rsi) - leaq 16(%rsi),%rsi .byte 102,15,56,0,215 + leaq 16(%rsi),%rsi jnz .Lccm64_enc_outer movups %xmm3,(%r9) @@ -805,15 +861,15 @@ aesni_ccm64_encrypt_blocks: .align 16 aesni_ccm64_decrypt_blocks: movl 240(%rcx),%eax - movups (%r8),%xmm9 + movups (%r8),%xmm6 movdqu (%r9),%xmm3 - movdqa .Lincrement64(%rip),%xmm6 + movdqa .Lincrement64(%rip),%xmm9 movdqa .Lbswap_mask(%rip),%xmm7 - movaps %xmm9,%xmm2 + movaps %xmm6,%xmm2 movl %eax,%r10d movq %rcx,%r11 -.byte 102,68,15,56,0,207 +.byte 102,15,56,0,247 movups (%rcx),%xmm0 movups 16(%rcx),%xmm1 leaq 32(%rcx),%rcx @@ -824,17 +880,20 @@ aesni_ccm64_decrypt_blocks: movups (%rcx),%xmm1 leaq 16(%rcx),%rcx jnz .Loop_enc1_5 - .byte 102,15,56,221,209 + shll $4,%r10d + movl $16,%eax movups (%rdi),%xmm8 - paddq %xmm6,%xmm9 + paddq %xmm9,%xmm6 leaq 16(%rdi),%rdi + subq %r10,%rax + leaq 32(%r11,%r10,1),%rcx + movq %rax,%r10 jmp .Lccm64_dec_outer .align 16 .Lccm64_dec_outer: xorps %xmm2,%xmm8 - movdqa %xmm9,%xmm2 - movl %r10d,%eax + movdqa %xmm6,%xmm2 movups %xmm8,(%rsi) leaq 16(%rsi),%rsi .byte 102,15,56,0,215 @@ -843,36 +902,36 @@ aesni_ccm64_decrypt_blocks: jz .Lccm64_dec_break movups (%r11),%xmm0 - shrl $1,%eax + movq %r10,%rax movups 16(%r11),%xmm1 xorps %xmm0,%xmm8 - leaq 32(%r11),%rcx xorps %xmm0,%xmm2 xorps %xmm8,%xmm3 - movups (%rcx),%xmm0 - + movups 32(%r11),%xmm0 + jmp .Lccm64_dec2_loop +.align 16 .Lccm64_dec2_loop: .byte 102,15,56,220,209 - decl %eax .byte 102,15,56,220,217 - movups 16(%rcx),%xmm1 + movups (%rcx,%rax,1),%xmm1 + addq $32,%rax .byte 102,15,56,220,208 - leaq 32(%rcx),%rcx .byte 102,15,56,220,216 - movups 0(%rcx),%xmm0 + movups -16(%rcx,%rax,1),%xmm0 jnz .Lccm64_dec2_loop movups (%rdi),%xmm8 - paddq %xmm6,%xmm9 + paddq %xmm9,%xmm6 .byte 102,15,56,220,209 .byte 102,15,56,220,217 - leaq 16(%rdi),%rdi .byte 102,15,56,221,208 .byte 102,15,56,221,216 + leaq 16(%rdi),%rdi jmp .Lccm64_dec_outer .align 16 .Lccm64_dec_break: + movl 240(%r11),%eax movups (%r11),%xmm0 movups 16(%r11),%xmm1 xorps %xmm0,%xmm8 @@ -884,7 +943,6 @@ aesni_ccm64_decrypt_blocks: movups (%r11),%xmm1 leaq 16(%r11),%r11 jnz .Loop_enc1_6 - .byte 102,15,56,221,217 movups %xmm3,(%r9) .byte 0xf3,0xc3 @@ -893,199 +951,518 @@ aesni_ccm64_decrypt_blocks: .type aesni_ctr32_encrypt_blocks,@function .align 16 aesni_ctr32_encrypt_blocks: + leaq (%rsp),%rax + pushq %rbp + subq $128,%rsp + andq $-16,%rsp + leaq -8(%rax),%rbp + cmpq $1,%rdx je .Lctr32_one_shortcut - movdqu (%r8),%xmm14 - movdqa .Lbswap_mask(%rip),%xmm15 - xorl %eax,%eax -.byte 102,69,15,58,22,242,3 -.byte 102,68,15,58,34,240,3 - + movdqu (%r8),%xmm2 + movdqu (%rcx),%xmm0 + movl 12(%r8),%r8d + pxor %xmm0,%xmm2 + movl 12(%rcx),%r11d + movdqa %xmm2,0(%rsp) + bswapl %r8d + movdqa %xmm2,%xmm3 + movdqa %xmm2,%xmm4 + movdqa %xmm2,%xmm5 + movdqa %xmm2,64(%rsp) + movdqa %xmm2,80(%rsp) + movdqa %xmm2,96(%rsp) + movq %rdx,%r10 + movdqa %xmm2,112(%rsp) + + leaq 1(%r8),%rax + leaq 2(%r8),%rdx + bswapl %eax + bswapl %edx + xorl %r11d,%eax + xorl %r11d,%edx +.byte 102,15,58,34,216,3 + leaq 3(%r8),%rax + movdqa %xmm3,16(%rsp) +.byte 102,15,58,34,226,3 + bswapl %eax + movq %r10,%rdx + leaq 4(%r8),%r10 + movdqa %xmm4,32(%rsp) + xorl %r11d,%eax + bswapl %r10d +.byte 102,15,58,34,232,3 + xorl %r11d,%r10d + movdqa %xmm5,48(%rsp) + leaq 5(%r8),%r9 + movl %r10d,64+12(%rsp) + bswapl %r9d + leaq 6(%r8),%r10 movl 240(%rcx),%eax + xorl %r11d,%r9d bswapl %r10d - pxor %xmm12,%xmm12 - pxor %xmm13,%xmm13 -.byte 102,69,15,58,34,226,0 - leaq 3(%r10),%r11 -.byte 102,69,15,58,34,235,0 - incl %r10d -.byte 102,69,15,58,34,226,1 - incq %r11 -.byte 102,69,15,58,34,235,1 - incl %r10d -.byte 102,69,15,58,34,226,2 - incq %r11 -.byte 102,69,15,58,34,235,2 - movdqa %xmm12,-40(%rsp) -.byte 102,69,15,56,0,231 - movdqa %xmm13,-24(%rsp) -.byte 102,69,15,56,0,239 - - pshufd $192,%xmm12,%xmm2 - pshufd $128,%xmm12,%xmm3 - pshufd $64,%xmm12,%xmm4 - cmpq $6,%rdx + movl %r9d,80+12(%rsp) + xorl %r11d,%r10d + leaq 7(%r8),%r9 + movl %r10d,96+12(%rsp) + bswapl %r9d + movl OPENSSL_ia32cap_P+4(%rip),%r10d + xorl %r11d,%r9d + andl $71303168,%r10d + movl %r9d,112+12(%rsp) + + movups 16(%rcx),%xmm1 + + movdqa 64(%rsp),%xmm6 + movdqa 80(%rsp),%xmm7 + + cmpq $8,%rdx jb .Lctr32_tail - shrl $1,%eax - movq %rcx,%r11 - movl %eax,%r10d + subq $6,%rdx + cmpl $4194304,%r10d + je .Lctr32_6x + + leaq 128(%rcx),%rcx + subq $2,%rdx + jmp .Lctr32_loop8 + +.align 16 +.Lctr32_6x: + shll $4,%eax + movl $48,%r10d + bswapl %r11d + leaq 32(%rcx,%rax,1),%rcx + subq %rax,%r10 jmp .Lctr32_loop6 .align 16 .Lctr32_loop6: - pshufd $192,%xmm13,%xmm5 - por %xmm14,%xmm2 - movups (%r11),%xmm0 - pshufd $128,%xmm13,%xmm6 - por %xmm14,%xmm3 - movups 16(%r11),%xmm1 - pshufd $64,%xmm13,%xmm7 - por %xmm14,%xmm4 - por %xmm14,%xmm5 - xorps %xmm0,%xmm2 - por %xmm14,%xmm6 - por %xmm14,%xmm7 + addl $6,%r8d + movups -48(%rcx,%r10,1),%xmm0 +.byte 102,15,56,220,209 + movl %r8d,%eax + xorl %r11d,%eax +.byte 102,15,56,220,217 +.byte 0x0f,0x38,0xf1,0x44,0x24,12 + leal 1(%r8),%eax +.byte 102,15,56,220,225 + xorl %r11d,%eax +.byte 0x0f,0x38,0xf1,0x44,0x24,28 +.byte 102,15,56,220,233 + leal 2(%r8),%eax + xorl %r11d,%eax +.byte 102,15,56,220,241 +.byte 0x0f,0x38,0xf1,0x44,0x24,44 + leal 3(%r8),%eax +.byte 102,15,56,220,249 + movups -32(%rcx,%r10,1),%xmm1 + xorl %r11d,%eax + +.byte 102,15,56,220,208 +.byte 0x0f,0x38,0xf1,0x44,0x24,60 + leal 4(%r8),%eax +.byte 102,15,56,220,216 + xorl %r11d,%eax +.byte 0x0f,0x38,0xf1,0x44,0x24,76 +.byte 102,15,56,220,224 + leal 5(%r8),%eax + xorl %r11d,%eax +.byte 102,15,56,220,232 +.byte 0x0f,0x38,0xf1,0x44,0x24,92 + movq %r10,%rax +.byte 102,15,56,220,240 +.byte 102,15,56,220,248 + movups -16(%rcx,%r10,1),%xmm0 + call .Lenc_loop6 + movdqu (%rdi),%xmm8 + movdqu 16(%rdi),%xmm9 + movdqu 32(%rdi),%xmm10 + movdqu 48(%rdi),%xmm11 + movdqu 64(%rdi),%xmm12 + movdqu 80(%rdi),%xmm13 + leaq 96(%rdi),%rdi + movups -64(%rcx,%r10,1),%xmm1 + pxor %xmm2,%xmm8 + movaps 0(%rsp),%xmm2 + pxor %xmm3,%xmm9 + movaps 16(%rsp),%xmm3 + pxor %xmm4,%xmm10 + movaps 32(%rsp),%xmm4 + pxor %xmm5,%xmm11 + movaps 48(%rsp),%xmm5 + pxor %xmm6,%xmm12 + movaps 64(%rsp),%xmm6 + pxor %xmm7,%xmm13 + movaps 80(%rsp),%xmm7 + movdqu %xmm8,(%rsi) + movdqu %xmm9,16(%rsi) + movdqu %xmm10,32(%rsi) + movdqu %xmm11,48(%rsi) + movdqu %xmm12,64(%rsi) + movdqu %xmm13,80(%rsi) + leaq 96(%rsi),%rsi + + subq $6,%rdx + jnc .Lctr32_loop6 + addq $6,%rdx + jz .Lctr32_done - pxor %xmm0,%xmm3 + leal -48(%r10),%eax + leaq -80(%rcx,%r10,1),%rcx + negl %eax + shrl $4,%eax + jmp .Lctr32_tail + +.align 32 +.Lctr32_loop8: + addl $8,%r8d + movdqa 96(%rsp),%xmm8 .byte 102,15,56,220,209 - leaq 32(%r11),%rcx - pxor %xmm0,%xmm4 + movl %r8d,%r9d + movdqa 112(%rsp),%xmm9 .byte 102,15,56,220,217 - movdqa .Lincrement32(%rip),%xmm13 - pxor %xmm0,%xmm5 + bswapl %r9d + movups 32-128(%rcx),%xmm0 .byte 102,15,56,220,225 - movdqa -40(%rsp),%xmm12 - pxor %xmm0,%xmm6 + xorl %r11d,%r9d + nop .byte 102,15,56,220,233 - pxor %xmm0,%xmm7 - movups (%rcx),%xmm0 - decl %eax + movl %r9d,0+12(%rsp) + leaq 1(%r8),%r9 .byte 102,15,56,220,241 .byte 102,15,56,220,249 - jmp .Lctr32_enc_loop6_enter -.align 16 -.Lctr32_enc_loop6: +.byte 102,68,15,56,220,193 +.byte 102,68,15,56,220,201 + movups 48-128(%rcx),%xmm1 + bswapl %r9d +.byte 102,15,56,220,208 +.byte 102,15,56,220,216 + xorl %r11d,%r9d +.byte 0x66,0x90 +.byte 102,15,56,220,224 +.byte 102,15,56,220,232 + movl %r9d,16+12(%rsp) + leaq 2(%r8),%r9 +.byte 102,15,56,220,240 +.byte 102,15,56,220,248 +.byte 102,68,15,56,220,192 +.byte 102,68,15,56,220,200 + movups 64-128(%rcx),%xmm0 + bswapl %r9d .byte 102,15,56,220,209 .byte 102,15,56,220,217 - decl %eax + xorl %r11d,%r9d +.byte 0x66,0x90 .byte 102,15,56,220,225 .byte 102,15,56,220,233 + movl %r9d,32+12(%rsp) + leaq 3(%r8),%r9 .byte 102,15,56,220,241 .byte 102,15,56,220,249 -.Lctr32_enc_loop6_enter: - movups 16(%rcx),%xmm1 +.byte 102,68,15,56,220,193 +.byte 102,68,15,56,220,201 + movups 80-128(%rcx),%xmm1 + bswapl %r9d .byte 102,15,56,220,208 .byte 102,15,56,220,216 - leaq 32(%rcx),%rcx + xorl %r11d,%r9d +.byte 0x66,0x90 .byte 102,15,56,220,224 .byte 102,15,56,220,232 + movl %r9d,48+12(%rsp) + leaq 4(%r8),%r9 .byte 102,15,56,220,240 .byte 102,15,56,220,248 - movups (%rcx),%xmm0 - jnz .Lctr32_enc_loop6 - +.byte 102,68,15,56,220,192 +.byte 102,68,15,56,220,200 + movups 96-128(%rcx),%xmm0 + bswapl %r9d +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 + xorl %r11d,%r9d +.byte 0x66,0x90 +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 + movl %r9d,64+12(%rsp) + leaq 5(%r8),%r9 +.byte 102,15,56,220,241 +.byte 102,15,56,220,249 +.byte 102,68,15,56,220,193 +.byte 102,68,15,56,220,201 + movups 112-128(%rcx),%xmm1 + bswapl %r9d +.byte 102,15,56,220,208 +.byte 102,15,56,220,216 + xorl %r11d,%r9d +.byte 0x66,0x90 +.byte 102,15,56,220,224 +.byte 102,15,56,220,232 + movl %r9d,80+12(%rsp) + leaq 6(%r8),%r9 +.byte 102,15,56,220,240 +.byte 102,15,56,220,248 +.byte 102,68,15,56,220,192 +.byte 102,68,15,56,220,200 + movups 128-128(%rcx),%xmm0 + bswapl %r9d .byte 102,15,56,220,209 - paddd %xmm13,%xmm12 .byte 102,15,56,220,217 - paddd -24(%rsp),%xmm13 + xorl %r11d,%r9d +.byte 0x66,0x90 .byte 102,15,56,220,225 - movdqa %xmm12,-40(%rsp) .byte 102,15,56,220,233 - movdqa %xmm13,-24(%rsp) + movl %r9d,96+12(%rsp) + leaq 7(%r8),%r9 .byte 102,15,56,220,241 -.byte 102,69,15,56,0,231 .byte 102,15,56,220,249 -.byte 102,69,15,56,0,239 +.byte 102,68,15,56,220,193 +.byte 102,68,15,56,220,201 + movups 144-128(%rcx),%xmm1 + bswapl %r9d +.byte 102,15,56,220,208 +.byte 102,15,56,220,216 +.byte 102,15,56,220,224 + xorl %r11d,%r9d + movdqu 0(%rdi),%xmm10 +.byte 102,15,56,220,232 + movl %r9d,112+12(%rsp) + cmpl $11,%eax +.byte 102,15,56,220,240 +.byte 102,15,56,220,248 +.byte 102,68,15,56,220,192 +.byte 102,68,15,56,220,200 + movups 160-128(%rcx),%xmm0 -.byte 102,15,56,221,208 - movups (%rdi),%xmm8 -.byte 102,15,56,221,216 - movups 16(%rdi),%xmm9 -.byte 102,15,56,221,224 - movups 32(%rdi),%xmm10 -.byte 102,15,56,221,232 - movups 48(%rdi),%xmm11 -.byte 102,15,56,221,240 - movups 64(%rdi),%xmm1 -.byte 102,15,56,221,248 - movups 80(%rdi),%xmm0 - leaq 96(%rdi),%rdi + jb .Lctr32_enc_done - xorps %xmm2,%xmm8 - pshufd $192,%xmm12,%xmm2 - xorps %xmm3,%xmm9 - pshufd $128,%xmm12,%xmm3 - movups %xmm8,(%rsi) - xorps %xmm4,%xmm10 - pshufd $64,%xmm12,%xmm4 - movups %xmm9,16(%rsi) - xorps %xmm5,%xmm11 - movups %xmm10,32(%rsi) - xorps %xmm6,%xmm1 - movups %xmm11,48(%rsi) - xorps %xmm7,%xmm0 - movups %xmm1,64(%rsi) - movups %xmm0,80(%rsi) - leaq 96(%rsi),%rsi - movl %r10d,%eax - subq $6,%rdx - jnc .Lctr32_loop6 +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 +.byte 102,15,56,220,241 +.byte 102,15,56,220,249 +.byte 102,68,15,56,220,193 +.byte 102,68,15,56,220,201 + movups 176-128(%rcx),%xmm1 - addq $6,%rdx +.byte 102,15,56,220,208 +.byte 102,15,56,220,216 +.byte 102,15,56,220,224 +.byte 102,15,56,220,232 +.byte 102,15,56,220,240 +.byte 102,15,56,220,248 +.byte 102,68,15,56,220,192 +.byte 102,68,15,56,220,200 + movups 192-128(%rcx),%xmm0 + je .Lctr32_enc_done + +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 +.byte 102,15,56,220,241 +.byte 102,15,56,220,249 +.byte 102,68,15,56,220,193 +.byte 102,68,15,56,220,201 + movups 208-128(%rcx),%xmm1 + +.byte 102,15,56,220,208 +.byte 102,15,56,220,216 +.byte 102,15,56,220,224 +.byte 102,15,56,220,232 +.byte 102,15,56,220,240 +.byte 102,15,56,220,248 +.byte 102,68,15,56,220,192 +.byte 102,68,15,56,220,200 + movups 224-128(%rcx),%xmm0 + jmp .Lctr32_enc_done + +.align 16 +.Lctr32_enc_done: + movdqu 16(%rdi),%xmm11 + pxor %xmm0,%xmm10 + movdqu 32(%rdi),%xmm12 + pxor %xmm0,%xmm11 + movdqu 48(%rdi),%xmm13 + pxor %xmm0,%xmm12 + movdqu 64(%rdi),%xmm14 + pxor %xmm0,%xmm13 + movdqu 80(%rdi),%xmm15 + pxor %xmm0,%xmm14 + pxor %xmm0,%xmm15 +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 +.byte 102,15,56,220,241 +.byte 102,15,56,220,249 +.byte 102,68,15,56,220,193 +.byte 102,68,15,56,220,201 + movdqu 96(%rdi),%xmm1 + leaq 128(%rdi),%rdi + +.byte 102,65,15,56,221,210 + pxor %xmm0,%xmm1 + movdqu 112-128(%rdi),%xmm10 +.byte 102,65,15,56,221,219 + pxor %xmm0,%xmm10 + movdqa 0(%rsp),%xmm11 +.byte 102,65,15,56,221,228 +.byte 102,65,15,56,221,237 + movdqa 16(%rsp),%xmm12 + movdqa 32(%rsp),%xmm13 +.byte 102,65,15,56,221,246 +.byte 102,65,15,56,221,255 + movdqa 48(%rsp),%xmm14 + movdqa 64(%rsp),%xmm15 +.byte 102,68,15,56,221,193 + movdqa 80(%rsp),%xmm0 + movups 16-128(%rcx),%xmm1 +.byte 102,69,15,56,221,202 + + movups %xmm2,(%rsi) + movdqa %xmm11,%xmm2 + movups %xmm3,16(%rsi) + movdqa %xmm12,%xmm3 + movups %xmm4,32(%rsi) + movdqa %xmm13,%xmm4 + movups %xmm5,48(%rsi) + movdqa %xmm14,%xmm5 + movups %xmm6,64(%rsi) + movdqa %xmm15,%xmm6 + movups %xmm7,80(%rsi) + movdqa %xmm0,%xmm7 + movups %xmm8,96(%rsi) + movups %xmm9,112(%rsi) + leaq 128(%rsi),%rsi + + subq $8,%rdx + jnc .Lctr32_loop8 + + addq $8,%rdx jz .Lctr32_done - movq %r11,%rcx - leal 1(%rax,%rax,1),%eax + leaq -128(%rcx),%rcx .Lctr32_tail: - por %xmm14,%xmm2 - movups (%rdi),%xmm8 - cmpq $2,%rdx - jb .Lctr32_one + leaq 16(%rcx),%rcx + cmpq $4,%rdx + jb .Lctr32_loop3 + je .Lctr32_loop4 - por %xmm14,%xmm3 - movups 16(%rdi),%xmm9 - je .Lctr32_two + shll $4,%eax + movdqa 96(%rsp),%xmm8 + pxor %xmm9,%xmm9 - pshufd $192,%xmm13,%xmm5 - por %xmm14,%xmm4 - movups 32(%rdi),%xmm10 - cmpq $4,%rdx - jb .Lctr32_three + movups 16(%rcx),%xmm0 +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 + leaq 32-16(%rcx,%rax,1),%rcx + negq %rax +.byte 102,15,56,220,225 + addq $16,%rax + movups (%rdi),%xmm10 +.byte 102,15,56,220,233 +.byte 102,15,56,220,241 + movups 16(%rdi),%xmm11 + movups 32(%rdi),%xmm12 +.byte 102,15,56,220,249 +.byte 102,68,15,56,220,193 - pshufd $128,%xmm13,%xmm6 - por %xmm14,%xmm5 - movups 48(%rdi),%xmm11 - je .Lctr32_four + call .Lenc_loop8_enter - por %xmm14,%xmm6 - xorps %xmm7,%xmm7 + movdqu 48(%rdi),%xmm13 + pxor %xmm10,%xmm2 + movdqu 64(%rdi),%xmm10 + pxor %xmm11,%xmm3 + movdqu %xmm2,(%rsi) + pxor %xmm12,%xmm4 + movdqu %xmm3,16(%rsi) + pxor %xmm13,%xmm5 + movdqu %xmm4,32(%rsi) + pxor %xmm10,%xmm6 + movdqu %xmm5,48(%rsi) + movdqu %xmm6,64(%rsi) + cmpq $6,%rdx + jb .Lctr32_done - call _aesni_encrypt6 + movups 80(%rdi),%xmm11 + xorps %xmm11,%xmm7 + movups %xmm7,80(%rsi) + je .Lctr32_done - movups 64(%rdi),%xmm1 - xorps %xmm2,%xmm8 - xorps %xmm3,%xmm9 - movups %xmm8,(%rsi) - xorps %xmm4,%xmm10 - movups %xmm9,16(%rsi) - xorps %xmm5,%xmm11 - movups %xmm10,32(%rsi) - xorps %xmm6,%xmm1 - movups %xmm11,48(%rsi) - movups %xmm1,64(%rsi) + movups 96(%rdi),%xmm12 + xorps %xmm12,%xmm8 + movups %xmm8,96(%rsi) + jmp .Lctr32_done + +.align 32 +.Lctr32_loop4: +.byte 102,15,56,220,209 + leaq 16(%rcx),%rcx + decl %eax +.byte 102,15,56,220,217 +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 + movups (%rcx),%xmm1 + jnz .Lctr32_loop4 +.byte 102,15,56,221,209 +.byte 102,15,56,221,217 + movups (%rdi),%xmm10 + movups 16(%rdi),%xmm11 +.byte 102,15,56,221,225 +.byte 102,15,56,221,233 + movups 32(%rdi),%xmm12 + movups 48(%rdi),%xmm13 + + xorps %xmm10,%xmm2 + movups %xmm2,(%rsi) + xorps %xmm11,%xmm3 + movups %xmm3,16(%rsi) + pxor %xmm12,%xmm4 + movdqu %xmm4,32(%rsi) + pxor %xmm13,%xmm5 + movdqu %xmm5,48(%rsi) + jmp .Lctr32_done + +.align 32 +.Lctr32_loop3: +.byte 102,15,56,220,209 + leaq 16(%rcx),%rcx + decl %eax +.byte 102,15,56,220,217 +.byte 102,15,56,220,225 + movups (%rcx),%xmm1 + jnz .Lctr32_loop3 +.byte 102,15,56,221,209 +.byte 102,15,56,221,217 +.byte 102,15,56,221,225 + + movups (%rdi),%xmm10 + xorps %xmm10,%xmm2 + movups %xmm2,(%rsi) + cmpq $2,%rdx + jb .Lctr32_done + + movups 16(%rdi),%xmm11 + xorps %xmm11,%xmm3 + movups %xmm3,16(%rsi) + je .Lctr32_done + + movups 32(%rdi),%xmm12 + xorps %xmm12,%xmm4 + movups %xmm4,32(%rsi) jmp .Lctr32_done .align 16 .Lctr32_one_shortcut: movups (%r8),%xmm2 - movups (%rdi),%xmm8 + movups (%rdi),%xmm10 movl 240(%rcx),%eax -.Lctr32_one: movups (%rcx),%xmm0 movups 16(%rcx),%xmm1 leaq 32(%rcx),%rcx @@ -1096,290 +1473,302 @@ aesni_ctr32_encrypt_blocks: movups (%rcx),%xmm1 leaq 16(%rcx),%rcx jnz .Loop_enc1_7 - .byte 102,15,56,221,209 - xorps %xmm2,%xmm8 - movups %xmm8,(%rsi) - jmp .Lctr32_done - -.align 16 -.Lctr32_two: - xorps %xmm4,%xmm4 - call _aesni_encrypt3 - xorps %xmm2,%xmm8 - xorps %xmm3,%xmm9 - movups %xmm8,(%rsi) - movups %xmm9,16(%rsi) - jmp .Lctr32_done - -.align 16 -.Lctr32_three: - call _aesni_encrypt3 - xorps %xmm2,%xmm8 - xorps %xmm3,%xmm9 - movups %xmm8,(%rsi) - xorps %xmm4,%xmm10 - movups %xmm9,16(%rsi) - movups %xmm10,32(%rsi) + xorps %xmm10,%xmm2 + movups %xmm2,(%rsi) jmp .Lctr32_done .align 16 -.Lctr32_four: - call _aesni_encrypt4 - xorps %xmm2,%xmm8 - xorps %xmm3,%xmm9 - movups %xmm8,(%rsi) - xorps %xmm4,%xmm10 - movups %xmm9,16(%rsi) - xorps %xmm5,%xmm11 - movups %xmm10,32(%rsi) - movups %xmm11,48(%rsi) - .Lctr32_done: + leaq (%rbp),%rsp + popq %rbp +.Lctr32_epilogue: .byte 0xf3,0xc3 .size aesni_ctr32_encrypt_blocks,.-aesni_ctr32_encrypt_blocks .globl aesni_xts_encrypt .type aesni_xts_encrypt,@function .align 16 aesni_xts_encrypt: - leaq -104(%rsp),%rsp - movups (%r9),%xmm15 + leaq (%rsp),%rax + pushq %rbp + subq $112,%rsp + andq $-16,%rsp + leaq -8(%rax),%rbp + movups (%r9),%xmm2 movl 240(%r8),%eax movl 240(%rcx),%r10d movups (%r8),%xmm0 movups 16(%r8),%xmm1 leaq 32(%r8),%r8 - xorps %xmm0,%xmm15 + xorps %xmm0,%xmm2 .Loop_enc1_8: -.byte 102,68,15,56,220,249 +.byte 102,15,56,220,209 decl %eax movups (%r8),%xmm1 leaq 16(%r8),%r8 jnz .Loop_enc1_8 - -.byte 102,68,15,56,221,249 +.byte 102,15,56,221,209 + movups (%rcx),%xmm0 movq %rcx,%r11 movl %r10d,%eax + shll $4,%r10d movq %rdx,%r9 andq $-16,%rdx + movups 16(%rcx,%r10,1),%xmm1 + movdqa .Lxts_magic(%rip),%xmm8 - pxor %xmm14,%xmm14 - pcmpgtd %xmm15,%xmm14 - pshufd $19,%xmm14,%xmm9 - pxor %xmm14,%xmm14 + movdqa %xmm2,%xmm15 + pshufd $95,%xmm2,%xmm9 + pxor %xmm0,%xmm1 + movdqa %xmm9,%xmm14 + paddd %xmm9,%xmm9 movdqa %xmm15,%xmm10 + psrad $31,%xmm14 paddq %xmm15,%xmm15 - pand %xmm8,%xmm9 - pcmpgtd %xmm15,%xmm14 - pxor %xmm9,%xmm15 - pshufd $19,%xmm14,%xmm9 - pxor %xmm14,%xmm14 + pand %xmm8,%xmm14 + pxor %xmm0,%xmm10 + pxor %xmm14,%xmm15 + movdqa %xmm9,%xmm14 + paddd %xmm9,%xmm9 movdqa %xmm15,%xmm11 + psrad $31,%xmm14 paddq %xmm15,%xmm15 - pand %xmm8,%xmm9 - pcmpgtd %xmm15,%xmm14 - pxor %xmm9,%xmm15 - pshufd $19,%xmm14,%xmm9 - pxor %xmm14,%xmm14 + pand %xmm8,%xmm14 + pxor %xmm0,%xmm11 + pxor %xmm14,%xmm15 + movdqa %xmm9,%xmm14 + paddd %xmm9,%xmm9 movdqa %xmm15,%xmm12 + psrad $31,%xmm14 paddq %xmm15,%xmm15 - pand %xmm8,%xmm9 - pcmpgtd %xmm15,%xmm14 - pxor %xmm9,%xmm15 - pshufd $19,%xmm14,%xmm9 - pxor %xmm14,%xmm14 + pand %xmm8,%xmm14 + pxor %xmm0,%xmm12 + pxor %xmm14,%xmm15 + movdqa %xmm9,%xmm14 + paddd %xmm9,%xmm9 movdqa %xmm15,%xmm13 + psrad $31,%xmm14 + paddq %xmm15,%xmm15 + pand %xmm8,%xmm14 + pxor %xmm0,%xmm13 + pxor %xmm14,%xmm15 + movdqa %xmm15,%xmm14 + psrad $31,%xmm9 paddq %xmm15,%xmm15 pand %xmm8,%xmm9 - pcmpgtd %xmm15,%xmm14 + pxor %xmm0,%xmm14 pxor %xmm9,%xmm15 + movaps %xmm1,96(%rsp) + subq $96,%rdx jc .Lxts_enc_short - shrl $1,%eax - subl $1,%eax - movl %eax,%r10d + movl $16+96,%eax + leaq 32(%r11,%r10,1),%rcx + subq %r10,%rax + movups 16(%r11),%xmm1 + movq %rax,%r10 + leaq .Lxts_magic(%rip),%r8 jmp .Lxts_enc_grandloop -.align 16 +.align 32 .Lxts_enc_grandloop: - pshufd $19,%xmm14,%xmm9 - movdqa %xmm15,%xmm14 - paddq %xmm15,%xmm15 movdqu 0(%rdi),%xmm2 - pand %xmm8,%xmm9 + movdqa %xmm0,%xmm8 movdqu 16(%rdi),%xmm3 - pxor %xmm9,%xmm15 - - movdqu 32(%rdi),%xmm4 pxor %xmm10,%xmm2 - movdqu 48(%rdi),%xmm5 + movdqu 32(%rdi),%xmm4 pxor %xmm11,%xmm3 - movdqu 64(%rdi),%xmm6 +.byte 102,15,56,220,209 + movdqu 48(%rdi),%xmm5 pxor %xmm12,%xmm4 - movdqu 80(%rdi),%xmm7 - leaq 96(%rdi),%rdi +.byte 102,15,56,220,217 + movdqu 64(%rdi),%xmm6 pxor %xmm13,%xmm5 - movups (%r11),%xmm0 +.byte 102,15,56,220,225 + movdqu 80(%rdi),%xmm7 + pxor %xmm15,%xmm8 + movdqa 96(%rsp),%xmm9 pxor %xmm14,%xmm6 - pxor %xmm15,%xmm7 - - +.byte 102,15,56,220,233 + movups 32(%r11),%xmm0 + leaq 96(%rdi),%rdi + pxor %xmm8,%xmm7 - movups 16(%r11),%xmm1 - pxor %xmm0,%xmm2 - pxor %xmm0,%xmm3 + pxor %xmm9,%xmm10 +.byte 102,15,56,220,241 + pxor %xmm9,%xmm11 movdqa %xmm10,0(%rsp) -.byte 102,15,56,220,209 - leaq 32(%r11),%rcx - pxor %xmm0,%xmm4 +.byte 102,15,56,220,249 + movups 48(%r11),%xmm1 + pxor %xmm9,%xmm12 + +.byte 102,15,56,220,208 + pxor %xmm9,%xmm13 movdqa %xmm11,16(%rsp) -.byte 102,15,56,220,217 - pxor %xmm0,%xmm5 +.byte 102,15,56,220,216 + pxor %xmm9,%xmm14 movdqa %xmm12,32(%rsp) -.byte 102,15,56,220,225 - pxor %xmm0,%xmm6 - movdqa %xmm13,48(%rsp) -.byte 102,15,56,220,233 - pxor %xmm0,%xmm7 - movups (%rcx),%xmm0 - decl %eax +.byte 102,15,56,220,224 +.byte 102,15,56,220,232 + pxor %xmm9,%xmm8 movdqa %xmm14,64(%rsp) -.byte 102,15,56,220,241 - movdqa %xmm15,80(%rsp) -.byte 102,15,56,220,249 - pxor %xmm14,%xmm14 - pcmpgtd %xmm15,%xmm14 - jmp .Lxts_enc_loop6_enter - -.align 16 +.byte 102,15,56,220,240 +.byte 102,15,56,220,248 + movups 64(%r11),%xmm0 + movdqa %xmm8,80(%rsp) + pshufd $95,%xmm15,%xmm9 + jmp .Lxts_enc_loop6 +.align 32 .Lxts_enc_loop6: .byte 102,15,56,220,209 .byte 102,15,56,220,217 - decl %eax .byte 102,15,56,220,225 .byte 102,15,56,220,233 .byte 102,15,56,220,241 .byte 102,15,56,220,249 -.Lxts_enc_loop6_enter: - movups 16(%rcx),%xmm1 + movups -64(%rcx,%rax,1),%xmm1 + addq $32,%rax + .byte 102,15,56,220,208 .byte 102,15,56,220,216 - leaq 32(%rcx),%rcx .byte 102,15,56,220,224 .byte 102,15,56,220,232 .byte 102,15,56,220,240 .byte 102,15,56,220,248 - movups (%rcx),%xmm0 + movups -80(%rcx,%rax,1),%xmm0 jnz .Lxts_enc_loop6 - pshufd $19,%xmm14,%xmm9 - pxor %xmm14,%xmm14 - paddq %xmm15,%xmm15 + movdqa (%r8),%xmm8 + movdqa %xmm9,%xmm14 + paddd %xmm9,%xmm9 .byte 102,15,56,220,209 - pand %xmm8,%xmm9 + paddq %xmm15,%xmm15 + psrad $31,%xmm14 .byte 102,15,56,220,217 - pcmpgtd %xmm15,%xmm14 + pand %xmm8,%xmm14 + movups (%r11),%xmm10 .byte 102,15,56,220,225 - pxor %xmm9,%xmm15 .byte 102,15,56,220,233 .byte 102,15,56,220,241 + pxor %xmm14,%xmm15 + movaps %xmm10,%xmm11 .byte 102,15,56,220,249 - movups 16(%rcx),%xmm1 + movups -64(%rcx),%xmm1 - pshufd $19,%xmm14,%xmm9 - pxor %xmm14,%xmm14 - movdqa %xmm15,%xmm10 - paddq %xmm15,%xmm15 + movdqa %xmm9,%xmm14 .byte 102,15,56,220,208 - pand %xmm8,%xmm9 + paddd %xmm9,%xmm9 + pxor %xmm15,%xmm10 .byte 102,15,56,220,216 - pcmpgtd %xmm15,%xmm14 + psrad $31,%xmm14 + paddq %xmm15,%xmm15 .byte 102,15,56,220,224 - pxor %xmm9,%xmm15 .byte 102,15,56,220,232 + pand %xmm8,%xmm14 + movaps %xmm11,%xmm12 .byte 102,15,56,220,240 + pxor %xmm14,%xmm15 + movdqa %xmm9,%xmm14 .byte 102,15,56,220,248 - movups 32(%rcx),%xmm0 + movups -48(%rcx),%xmm0 - pshufd $19,%xmm14,%xmm9 - pxor %xmm14,%xmm14 - movdqa %xmm15,%xmm11 - paddq %xmm15,%xmm15 + paddd %xmm9,%xmm9 .byte 102,15,56,220,209 - pand %xmm8,%xmm9 + pxor %xmm15,%xmm11 + psrad $31,%xmm14 .byte 102,15,56,220,217 - pcmpgtd %xmm15,%xmm14 + paddq %xmm15,%xmm15 + pand %xmm8,%xmm14 .byte 102,15,56,220,225 - pxor %xmm9,%xmm15 .byte 102,15,56,220,233 + movdqa %xmm13,48(%rsp) + pxor %xmm14,%xmm15 .byte 102,15,56,220,241 + movaps %xmm12,%xmm13 + movdqa %xmm9,%xmm14 .byte 102,15,56,220,249 + movups -32(%rcx),%xmm1 - pshufd $19,%xmm14,%xmm9 - pxor %xmm14,%xmm14 - movdqa %xmm15,%xmm12 + paddd %xmm9,%xmm9 +.byte 102,15,56,220,208 + pxor %xmm15,%xmm12 + psrad $31,%xmm14 +.byte 102,15,56,220,216 paddq %xmm15,%xmm15 -.byte 102,15,56,221,208 - pand %xmm8,%xmm9 -.byte 102,15,56,221,216 - pcmpgtd %xmm15,%xmm14 -.byte 102,15,56,221,224 - pxor %xmm9,%xmm15 -.byte 102,15,56,221,232 -.byte 102,15,56,221,240 -.byte 102,15,56,221,248 + pand %xmm8,%xmm14 +.byte 102,15,56,220,224 +.byte 102,15,56,220,232 +.byte 102,15,56,220,240 + pxor %xmm14,%xmm15 + movaps %xmm13,%xmm14 +.byte 102,15,56,220,248 - pshufd $19,%xmm14,%xmm9 - pxor %xmm14,%xmm14 - movdqa %xmm15,%xmm13 + movdqa %xmm9,%xmm0 + paddd %xmm9,%xmm9 +.byte 102,15,56,220,209 + pxor %xmm15,%xmm13 + psrad $31,%xmm0 +.byte 102,15,56,220,217 + paddq %xmm15,%xmm15 + pand %xmm8,%xmm0 +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 + pxor %xmm0,%xmm15 + movups (%r11),%xmm0 +.byte 102,15,56,220,241 +.byte 102,15,56,220,249 + movups 16(%r11),%xmm1 + + pxor %xmm15,%xmm14 +.byte 102,15,56,221,84,36,0 + psrad $31,%xmm9 paddq %xmm15,%xmm15 - xorps 0(%rsp),%xmm2 +.byte 102,15,56,221,92,36,16 +.byte 102,15,56,221,100,36,32 pand %xmm8,%xmm9 - xorps 16(%rsp),%xmm3 - pcmpgtd %xmm15,%xmm14 + movq %r10,%rax +.byte 102,15,56,221,108,36,48 +.byte 102,15,56,221,116,36,64 +.byte 102,15,56,221,124,36,80 pxor %xmm9,%xmm15 - xorps 32(%rsp),%xmm4 - movups %xmm2,0(%rsi) - xorps 48(%rsp),%xmm5 - movups %xmm3,16(%rsi) - xorps 64(%rsp),%xmm6 - movups %xmm4,32(%rsi) - xorps 80(%rsp),%xmm7 - movups %xmm5,48(%rsi) - movl %r10d,%eax - movups %xmm6,64(%rsi) - movups %xmm7,80(%rsi) leaq 96(%rsi),%rsi + movups %xmm2,-96(%rsi) + movups %xmm3,-80(%rsi) + movups %xmm4,-64(%rsi) + movups %xmm5,-48(%rsi) + movups %xmm6,-32(%rsi) + movups %xmm7,-16(%rsi) subq $96,%rdx jnc .Lxts_enc_grandloop - leal 3(%rax,%rax,1),%eax + movl $16+96,%eax + subl %r10d,%eax movq %r11,%rcx - movl %eax,%r10d + shrl $4,%eax .Lxts_enc_short: + movl %eax,%r10d + pxor %xmm0,%xmm10 addq $96,%rdx jz .Lxts_enc_done + pxor %xmm0,%xmm11 cmpq $32,%rdx jb .Lxts_enc_one + pxor %xmm0,%xmm12 je .Lxts_enc_two + pxor %xmm0,%xmm13 cmpq $64,%rdx jb .Lxts_enc_three + pxor %xmm0,%xmm14 je .Lxts_enc_four - pshufd $19,%xmm14,%xmm9 - movdqa %xmm15,%xmm14 - paddq %xmm15,%xmm15 movdqu (%rdi),%xmm2 - pand %xmm8,%xmm9 movdqu 16(%rdi),%xmm3 - pxor %xmm9,%xmm15 - movdqu 32(%rdi),%xmm4 pxor %xmm10,%xmm2 movdqu 48(%rdi),%xmm5 @@ -1421,7 +1810,6 @@ aesni_xts_encrypt: movups (%rcx),%xmm1 leaq 16(%rcx),%rcx jnz .Loop_enc1_9 - .byte 102,15,56,221,209 xorps %xmm10,%xmm2 movdqa %xmm11,%xmm10 @@ -1437,7 +1825,7 @@ aesni_xts_encrypt: xorps %xmm10,%xmm2 xorps %xmm11,%xmm3 - call _aesni_encrypt3 + call _aesni_encrypt2 xorps %xmm10,%xmm2 movdqa %xmm12,%xmm10 @@ -1483,15 +1871,15 @@ aesni_xts_encrypt: call _aesni_encrypt4 - xorps %xmm10,%xmm2 - movdqa %xmm15,%xmm10 - xorps %xmm11,%xmm3 - xorps %xmm12,%xmm4 - movups %xmm2,(%rsi) - xorps %xmm13,%xmm5 - movups %xmm3,16(%rsi) - movups %xmm4,32(%rsi) - movups %xmm5,48(%rsi) + pxor %xmm10,%xmm2 + movdqa %xmm14,%xmm10 + pxor %xmm11,%xmm3 + pxor %xmm12,%xmm4 + movdqu %xmm2,(%rsi) + pxor %xmm13,%xmm5 + movdqu %xmm3,16(%rsi) + movdqu %xmm4,32(%rsi) + movdqu %xmm5,48(%rsi) leaq 64(%rsi),%rsi jmp .Lxts_enc_done @@ -1527,13 +1915,13 @@ aesni_xts_encrypt: movups (%rcx),%xmm1 leaq 16(%rcx),%rcx jnz .Loop_enc1_10 - .byte 102,15,56,221,209 xorps %xmm10,%xmm2 movups %xmm2,-16(%rsi) .Lxts_enc_ret: - leaq 104(%rsp),%rsp + leaq (%rbp),%rsp + popq %rbp .Lxts_enc_epilogue: .byte 0xf3,0xc3 .size aesni_xts_encrypt,.-aesni_xts_encrypt @@ -1541,250 +1929,292 @@ aesni_xts_encrypt: .type aesni_xts_decrypt,@function .align 16 aesni_xts_decrypt: - leaq -104(%rsp),%rsp - movups (%r9),%xmm15 + leaq (%rsp),%rax + pushq %rbp + subq $112,%rsp + andq $-16,%rsp + leaq -8(%rax),%rbp + movups (%r9),%xmm2 movl 240(%r8),%eax movl 240(%rcx),%r10d movups (%r8),%xmm0 movups 16(%r8),%xmm1 leaq 32(%r8),%r8 - xorps %xmm0,%xmm15 + xorps %xmm0,%xmm2 .Loop_enc1_11: -.byte 102,68,15,56,220,249 +.byte 102,15,56,220,209 decl %eax movups (%r8),%xmm1 leaq 16(%r8),%r8 jnz .Loop_enc1_11 - -.byte 102,68,15,56,221,249 +.byte 102,15,56,221,209 xorl %eax,%eax testq $15,%rdx setnz %al shlq $4,%rax subq %rax,%rdx + movups (%rcx),%xmm0 movq %rcx,%r11 movl %r10d,%eax + shll $4,%r10d movq %rdx,%r9 andq $-16,%rdx + movups 16(%rcx,%r10,1),%xmm1 + movdqa .Lxts_magic(%rip),%xmm8 - pxor %xmm14,%xmm14 - pcmpgtd %xmm15,%xmm14 - pshufd $19,%xmm14,%xmm9 - pxor %xmm14,%xmm14 + movdqa %xmm2,%xmm15 + pshufd $95,%xmm2,%xmm9 + pxor %xmm0,%xmm1 + movdqa %xmm9,%xmm14 + paddd %xmm9,%xmm9 movdqa %xmm15,%xmm10 + psrad $31,%xmm14 paddq %xmm15,%xmm15 - pand %xmm8,%xmm9 - pcmpgtd %xmm15,%xmm14 - pxor %xmm9,%xmm15 - pshufd $19,%xmm14,%xmm9 - pxor %xmm14,%xmm14 + pand %xmm8,%xmm14 + pxor %xmm0,%xmm10 + pxor %xmm14,%xmm15 + movdqa %xmm9,%xmm14 + paddd %xmm9,%xmm9 movdqa %xmm15,%xmm11 + psrad $31,%xmm14 paddq %xmm15,%xmm15 - pand %xmm8,%xmm9 - pcmpgtd %xmm15,%xmm14 - pxor %xmm9,%xmm15 - pshufd $19,%xmm14,%xmm9 - pxor %xmm14,%xmm14 + pand %xmm8,%xmm14 + pxor %xmm0,%xmm11 + pxor %xmm14,%xmm15 + movdqa %xmm9,%xmm14 + paddd %xmm9,%xmm9 movdqa %xmm15,%xmm12 + psrad $31,%xmm14 paddq %xmm15,%xmm15 - pand %xmm8,%xmm9 - pcmpgtd %xmm15,%xmm14 - pxor %xmm9,%xmm15 - pshufd $19,%xmm14,%xmm9 - pxor %xmm14,%xmm14 + pand %xmm8,%xmm14 + pxor %xmm0,%xmm12 + pxor %xmm14,%xmm15 + movdqa %xmm9,%xmm14 + paddd %xmm9,%xmm9 movdqa %xmm15,%xmm13 + psrad $31,%xmm14 + paddq %xmm15,%xmm15 + pand %xmm8,%xmm14 + pxor %xmm0,%xmm13 + pxor %xmm14,%xmm15 + movdqa %xmm15,%xmm14 + psrad $31,%xmm9 paddq %xmm15,%xmm15 pand %xmm8,%xmm9 - pcmpgtd %xmm15,%xmm14 + pxor %xmm0,%xmm14 pxor %xmm9,%xmm15 + movaps %xmm1,96(%rsp) + subq $96,%rdx jc .Lxts_dec_short - shrl $1,%eax - subl $1,%eax - movl %eax,%r10d + movl $16+96,%eax + leaq 32(%r11,%r10,1),%rcx + subq %r10,%rax + movups 16(%r11),%xmm1 + movq %rax,%r10 + leaq .Lxts_magic(%rip),%r8 jmp .Lxts_dec_grandloop -.align 16 +.align 32 .Lxts_dec_grandloop: - pshufd $19,%xmm14,%xmm9 - movdqa %xmm15,%xmm14 - paddq %xmm15,%xmm15 movdqu 0(%rdi),%xmm2 - pand %xmm8,%xmm9 + movdqa %xmm0,%xmm8 movdqu 16(%rdi),%xmm3 - pxor %xmm9,%xmm15 - - movdqu 32(%rdi),%xmm4 pxor %xmm10,%xmm2 - movdqu 48(%rdi),%xmm5 + movdqu 32(%rdi),%xmm4 pxor %xmm11,%xmm3 - movdqu 64(%rdi),%xmm6 +.byte 102,15,56,222,209 + movdqu 48(%rdi),%xmm5 pxor %xmm12,%xmm4 - movdqu 80(%rdi),%xmm7 - leaq 96(%rdi),%rdi +.byte 102,15,56,222,217 + movdqu 64(%rdi),%xmm6 pxor %xmm13,%xmm5 - movups (%r11),%xmm0 +.byte 102,15,56,222,225 + movdqu 80(%rdi),%xmm7 + pxor %xmm15,%xmm8 + movdqa 96(%rsp),%xmm9 pxor %xmm14,%xmm6 - pxor %xmm15,%xmm7 - - +.byte 102,15,56,222,233 + movups 32(%r11),%xmm0 + leaq 96(%rdi),%rdi + pxor %xmm8,%xmm7 - movups 16(%r11),%xmm1 - pxor %xmm0,%xmm2 - pxor %xmm0,%xmm3 + pxor %xmm9,%xmm10 +.byte 102,15,56,222,241 + pxor %xmm9,%xmm11 movdqa %xmm10,0(%rsp) -.byte 102,15,56,222,209 - leaq 32(%r11),%rcx - pxor %xmm0,%xmm4 +.byte 102,15,56,222,249 + movups 48(%r11),%xmm1 + pxor %xmm9,%xmm12 + +.byte 102,15,56,222,208 + pxor %xmm9,%xmm13 movdqa %xmm11,16(%rsp) -.byte 102,15,56,222,217 - pxor %xmm0,%xmm5 +.byte 102,15,56,222,216 + pxor %xmm9,%xmm14 movdqa %xmm12,32(%rsp) -.byte 102,15,56,222,225 - pxor %xmm0,%xmm6 - movdqa %xmm13,48(%rsp) -.byte 102,15,56,222,233 - pxor %xmm0,%xmm7 - movups (%rcx),%xmm0 - decl %eax +.byte 102,15,56,222,224 +.byte 102,15,56,222,232 + pxor %xmm9,%xmm8 movdqa %xmm14,64(%rsp) -.byte 102,15,56,222,241 - movdqa %xmm15,80(%rsp) -.byte 102,15,56,222,249 - pxor %xmm14,%xmm14 - pcmpgtd %xmm15,%xmm14 - jmp .Lxts_dec_loop6_enter - -.align 16 +.byte 102,15,56,222,240 +.byte 102,15,56,222,248 + movups 64(%r11),%xmm0 + movdqa %xmm8,80(%rsp) + pshufd $95,%xmm15,%xmm9 + jmp .Lxts_dec_loop6 +.align 32 .Lxts_dec_loop6: .byte 102,15,56,222,209 .byte 102,15,56,222,217 - decl %eax .byte 102,15,56,222,225 .byte 102,15,56,222,233 .byte 102,15,56,222,241 .byte 102,15,56,222,249 -.Lxts_dec_loop6_enter: - movups 16(%rcx),%xmm1 + movups -64(%rcx,%rax,1),%xmm1 + addq $32,%rax + .byte 102,15,56,222,208 .byte 102,15,56,222,216 - leaq 32(%rcx),%rcx .byte 102,15,56,222,224 .byte 102,15,56,222,232 .byte 102,15,56,222,240 .byte 102,15,56,222,248 - movups (%rcx),%xmm0 + movups -80(%rcx,%rax,1),%xmm0 jnz .Lxts_dec_loop6 - pshufd $19,%xmm14,%xmm9 - pxor %xmm14,%xmm14 - paddq %xmm15,%xmm15 + movdqa (%r8),%xmm8 + movdqa %xmm9,%xmm14 + paddd %xmm9,%xmm9 .byte 102,15,56,222,209 - pand %xmm8,%xmm9 + paddq %xmm15,%xmm15 + psrad $31,%xmm14 .byte 102,15,56,222,217 - pcmpgtd %xmm15,%xmm14 + pand %xmm8,%xmm14 + movups (%r11),%xmm10 .byte 102,15,56,222,225 - pxor %xmm9,%xmm15 .byte 102,15,56,222,233 .byte 102,15,56,222,241 + pxor %xmm14,%xmm15 + movaps %xmm10,%xmm11 .byte 102,15,56,222,249 - movups 16(%rcx),%xmm1 + movups -64(%rcx),%xmm1 - pshufd $19,%xmm14,%xmm9 - pxor %xmm14,%xmm14 - movdqa %xmm15,%xmm10 - paddq %xmm15,%xmm15 + movdqa %xmm9,%xmm14 .byte 102,15,56,222,208 - pand %xmm8,%xmm9 + paddd %xmm9,%xmm9 + pxor %xmm15,%xmm10 .byte 102,15,56,222,216 - pcmpgtd %xmm15,%xmm14 + psrad $31,%xmm14 + paddq %xmm15,%xmm15 .byte 102,15,56,222,224 - pxor %xmm9,%xmm15 .byte 102,15,56,222,232 + pand %xmm8,%xmm14 + movaps %xmm11,%xmm12 .byte 102,15,56,222,240 + pxor %xmm14,%xmm15 + movdqa %xmm9,%xmm14 .byte 102,15,56,222,248 - movups 32(%rcx),%xmm0 + movups -48(%rcx),%xmm0 - pshufd $19,%xmm14,%xmm9 - pxor %xmm14,%xmm14 - movdqa %xmm15,%xmm11 - paddq %xmm15,%xmm15 + paddd %xmm9,%xmm9 .byte 102,15,56,222,209 - pand %xmm8,%xmm9 + pxor %xmm15,%xmm11 + psrad $31,%xmm14 .byte 102,15,56,222,217 - pcmpgtd %xmm15,%xmm14 + paddq %xmm15,%xmm15 + pand %xmm8,%xmm14 .byte 102,15,56,222,225 - pxor %xmm9,%xmm15 .byte 102,15,56,222,233 + movdqa %xmm13,48(%rsp) + pxor %xmm14,%xmm15 .byte 102,15,56,222,241 + movaps %xmm12,%xmm13 + movdqa %xmm9,%xmm14 .byte 102,15,56,222,249 + movups -32(%rcx),%xmm1 - pshufd $19,%xmm14,%xmm9 - pxor %xmm14,%xmm14 - movdqa %xmm15,%xmm12 + paddd %xmm9,%xmm9 +.byte 102,15,56,222,208 + pxor %xmm15,%xmm12 + psrad $31,%xmm14 +.byte 102,15,56,222,216 paddq %xmm15,%xmm15 -.byte 102,15,56,223,208 - pand %xmm8,%xmm9 -.byte 102,15,56,223,216 - pcmpgtd %xmm15,%xmm14 -.byte 102,15,56,223,224 - pxor %xmm9,%xmm15 -.byte 102,15,56,223,232 -.byte 102,15,56,223,240 -.byte 102,15,56,223,248 + pand %xmm8,%xmm14 +.byte 102,15,56,222,224 +.byte 102,15,56,222,232 +.byte 102,15,56,222,240 + pxor %xmm14,%xmm15 + movaps %xmm13,%xmm14 +.byte 102,15,56,222,248 - pshufd $19,%xmm14,%xmm9 - pxor %xmm14,%xmm14 - movdqa %xmm15,%xmm13 + movdqa %xmm9,%xmm0 + paddd %xmm9,%xmm9 +.byte 102,15,56,222,209 + pxor %xmm15,%xmm13 + psrad $31,%xmm0 +.byte 102,15,56,222,217 + paddq %xmm15,%xmm15 + pand %xmm8,%xmm0 +.byte 102,15,56,222,225 +.byte 102,15,56,222,233 + pxor %xmm0,%xmm15 + movups (%r11),%xmm0 +.byte 102,15,56,222,241 +.byte 102,15,56,222,249 + movups 16(%r11),%xmm1 + + pxor %xmm15,%xmm14 +.byte 102,15,56,223,84,36,0 + psrad $31,%xmm9 paddq %xmm15,%xmm15 - xorps 0(%rsp),%xmm2 +.byte 102,15,56,223,92,36,16 +.byte 102,15,56,223,100,36,32 pand %xmm8,%xmm9 - xorps 16(%rsp),%xmm3 - pcmpgtd %xmm15,%xmm14 + movq %r10,%rax +.byte 102,15,56,223,108,36,48 +.byte 102,15,56,223,116,36,64 +.byte 102,15,56,223,124,36,80 pxor %xmm9,%xmm15 - xorps 32(%rsp),%xmm4 - movups %xmm2,0(%rsi) - xorps 48(%rsp),%xmm5 - movups %xmm3,16(%rsi) - xorps 64(%rsp),%xmm6 - movups %xmm4,32(%rsi) - xorps 80(%rsp),%xmm7 - movups %xmm5,48(%rsi) - movl %r10d,%eax - movups %xmm6,64(%rsi) - movups %xmm7,80(%rsi) leaq 96(%rsi),%rsi + movups %xmm2,-96(%rsi) + movups %xmm3,-80(%rsi) + movups %xmm4,-64(%rsi) + movups %xmm5,-48(%rsi) + movups %xmm6,-32(%rsi) + movups %xmm7,-16(%rsi) subq $96,%rdx jnc .Lxts_dec_grandloop - leal 3(%rax,%rax,1),%eax + movl $16+96,%eax + subl %r10d,%eax movq %r11,%rcx - movl %eax,%r10d + shrl $4,%eax .Lxts_dec_short: + movl %eax,%r10d + pxor %xmm0,%xmm10 + pxor %xmm0,%xmm11 addq $96,%rdx jz .Lxts_dec_done + pxor %xmm0,%xmm12 cmpq $32,%rdx jb .Lxts_dec_one + pxor %xmm0,%xmm13 je .Lxts_dec_two + pxor %xmm0,%xmm14 cmpq $64,%rdx jb .Lxts_dec_three je .Lxts_dec_four - pshufd $19,%xmm14,%xmm9 - movdqa %xmm15,%xmm14 - paddq %xmm15,%xmm15 movdqu (%rdi),%xmm2 - pand %xmm8,%xmm9 movdqu 16(%rdi),%xmm3 - pxor %xmm9,%xmm15 - movdqu 32(%rdi),%xmm4 pxor %xmm10,%xmm2 movdqu 48(%rdi),%xmm5 @@ -1835,7 +2265,6 @@ aesni_xts_decrypt: movups (%rcx),%xmm1 leaq 16(%rcx),%rcx jnz .Loop_dec1_12 - .byte 102,15,56,223,209 xorps %xmm10,%xmm2 movdqa %xmm11,%xmm10 @@ -1852,7 +2281,7 @@ aesni_xts_decrypt: xorps %xmm10,%xmm2 xorps %xmm11,%xmm3 - call _aesni_decrypt3 + call _aesni_decrypt2 xorps %xmm10,%xmm2 movdqa %xmm12,%xmm10 @@ -1878,7 +2307,7 @@ aesni_xts_decrypt: xorps %xmm10,%xmm2 movdqa %xmm13,%xmm10 xorps %xmm11,%xmm3 - movdqa %xmm15,%xmm11 + movdqa %xmm14,%xmm11 xorps %xmm12,%xmm4 movups %xmm2,(%rsi) movups %xmm3,16(%rsi) @@ -1888,14 +2317,8 @@ aesni_xts_decrypt: .align 16 .Lxts_dec_four: - pshufd $19,%xmm14,%xmm9 - movdqa %xmm15,%xmm14 - paddq %xmm15,%xmm15 movups (%rdi),%xmm2 - pand %xmm8,%xmm9 movups 16(%rdi),%xmm3 - pxor %xmm9,%xmm15 - movups 32(%rdi),%xmm4 xorps %xmm10,%xmm2 movups 48(%rdi),%xmm5 @@ -1906,16 +2329,16 @@ aesni_xts_decrypt: call _aesni_decrypt4 - xorps %xmm10,%xmm2 + pxor %xmm10,%xmm2 movdqa %xmm14,%xmm10 - xorps %xmm11,%xmm3 + pxor %xmm11,%xmm3 movdqa %xmm15,%xmm11 - xorps %xmm12,%xmm4 - movups %xmm2,(%rsi) - xorps %xmm13,%xmm5 - movups %xmm3,16(%rsi) - movups %xmm4,32(%rsi) - movups %xmm5,48(%rsi) + pxor %xmm12,%xmm4 + movdqu %xmm2,(%rsi) + pxor %xmm13,%xmm5 + movdqu %xmm3,16(%rsi) + movdqu %xmm4,32(%rsi) + movdqu %xmm5,48(%rsi) leaq 64(%rsi),%rsi jmp .Lxts_dec_done @@ -1940,7 +2363,6 @@ aesni_xts_decrypt: movups (%rcx),%xmm1 leaq 16(%rcx),%rcx jnz .Loop_dec1_13 - .byte 102,15,56,223,209 xorps %xmm11,%xmm2 movups %xmm2,(%rsi) @@ -1971,13 +2393,13 @@ aesni_xts_decrypt: movups (%rcx),%xmm1 leaq 16(%rcx),%rcx jnz .Loop_dec1_14 - .byte 102,15,56,223,209 xorps %xmm10,%xmm2 movups %xmm2,(%rsi) .Lxts_dec_ret: - leaq 104(%rsp),%rsp + leaq (%rbp),%rsp + popq %rbp .Lxts_dec_epilogue: .byte 0xf3,0xc3 .size aesni_xts_decrypt,.-aesni_xts_decrypt @@ -2015,7 +2437,6 @@ aesni_cbc_encrypt: movups (%rcx),%xmm1 leaq 16(%rcx),%rcx jnz .Loop_enc1_15 - .byte 102,15,56,221,209 movl %r10d,%eax movq %r11,%rcx @@ -2032,12 +2453,10 @@ aesni_cbc_encrypt: movq %rdx,%rcx xchgq %rdi,%rsi .long 0x9066A4F3 - movl $16,%ecx subq %rdx,%rcx xorl %eax,%eax .long 0x9066AAF3 - leaq -16(%rdi),%rdi movl %r10d,%eax movq %rdi,%rsi @@ -2045,152 +2464,385 @@ aesni_cbc_encrypt: xorq %rdx,%rdx jmp .Lcbc_enc_loop - .align 16 .Lcbc_decrypt: - movups (%r8),%xmm9 + leaq (%rsp),%rax + pushq %rbp + subq $16,%rsp + andq $-16,%rsp + leaq -8(%rax),%rbp + movups (%r8),%xmm10 movl %r10d,%eax - cmpq $112,%rdx + cmpq $80,%rdx jbe .Lcbc_dec_tail - shrl $1,%r10d - subq $112,%rdx - movl %r10d,%eax - movaps %xmm9,-24(%rsp) + + movups (%rcx),%xmm0 + movdqu 0(%rdi),%xmm2 + movdqu 16(%rdi),%xmm3 + movdqa %xmm2,%xmm11 + movdqu 32(%rdi),%xmm4 + movdqa %xmm3,%xmm12 + movdqu 48(%rdi),%xmm5 + movdqa %xmm4,%xmm13 + movdqu 64(%rdi),%xmm6 + movdqa %xmm5,%xmm14 + movdqu 80(%rdi),%xmm7 + movdqa %xmm6,%xmm15 + movl OPENSSL_ia32cap_P+4(%rip),%r9d + cmpq $112,%rdx + jbe .Lcbc_dec_six_or_seven + + andl $71303168,%r9d + subq $80,%rdx + cmpl $4194304,%r9d + je .Lcbc_dec_loop6_enter + subq $32,%rdx + leaq 112(%rcx),%rcx jmp .Lcbc_dec_loop8_enter .align 16 -.Lcbc_dec_loop8: - movaps %xmm0,-24(%rsp) - movups %xmm9,(%rsi) - leaq 16(%rsi),%rsi -.Lcbc_dec_loop8_enter: - movups (%rcx),%xmm0 - movups (%rdi),%xmm2 - movups 16(%rdi),%xmm3 - movups 16(%rcx),%xmm1 - - leaq 32(%rcx),%rcx - movdqu 32(%rdi),%xmm4 - xorps %xmm0,%xmm2 - movdqu 48(%rdi),%xmm5 - xorps %xmm0,%xmm3 - movdqu 64(%rdi),%xmm6 +.Lcbc_dec_loop8: + movups %xmm9,(%rsi) + leaq 16(%rsi),%rsi +.Lcbc_dec_loop8_enter: + movdqu 96(%rdi),%xmm8 + pxor %xmm0,%xmm2 + movdqu 112(%rdi),%xmm9 + pxor %xmm0,%xmm3 + movups 16-112(%rcx),%xmm1 + pxor %xmm0,%xmm4 + xorq %r11,%r11 + cmpq $112,%rdx + pxor %xmm0,%xmm5 + pxor %xmm0,%xmm6 + pxor %xmm0,%xmm7 + pxor %xmm0,%xmm8 + +.byte 102,15,56,222,209 + pxor %xmm0,%xmm9 + movups 32-112(%rcx),%xmm0 +.byte 102,15,56,222,217 +.byte 102,15,56,222,225 +.byte 102,15,56,222,233 +.byte 102,15,56,222,241 +.byte 102,15,56,222,249 +.byte 102,68,15,56,222,193 + setnc %r11b + shlq $7,%r11 +.byte 102,68,15,56,222,201 + addq %rdi,%r11 + movups 48-112(%rcx),%xmm1 +.byte 102,15,56,222,208 +.byte 102,15,56,222,216 +.byte 102,15,56,222,224 +.byte 102,15,56,222,232 +.byte 102,15,56,222,240 +.byte 102,15,56,222,248 +.byte 102,68,15,56,222,192 +.byte 102,68,15,56,222,200 + movups 64-112(%rcx),%xmm0 + nop +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 +.byte 102,15,56,222,225 +.byte 102,15,56,222,233 +.byte 102,15,56,222,241 +.byte 102,15,56,222,249 +.byte 102,68,15,56,222,193 +.byte 102,68,15,56,222,201 + movups 80-112(%rcx),%xmm1 + nop +.byte 102,15,56,222,208 +.byte 102,15,56,222,216 +.byte 102,15,56,222,224 +.byte 102,15,56,222,232 +.byte 102,15,56,222,240 +.byte 102,15,56,222,248 +.byte 102,68,15,56,222,192 +.byte 102,68,15,56,222,200 + movups 96-112(%rcx),%xmm0 + nop +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 +.byte 102,15,56,222,225 +.byte 102,15,56,222,233 +.byte 102,15,56,222,241 +.byte 102,15,56,222,249 +.byte 102,68,15,56,222,193 +.byte 102,68,15,56,222,201 + movups 112-112(%rcx),%xmm1 + nop +.byte 102,15,56,222,208 +.byte 102,15,56,222,216 +.byte 102,15,56,222,224 +.byte 102,15,56,222,232 +.byte 102,15,56,222,240 +.byte 102,15,56,222,248 +.byte 102,68,15,56,222,192 +.byte 102,68,15,56,222,200 + movups 128-112(%rcx),%xmm0 + nop +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 +.byte 102,15,56,222,225 +.byte 102,15,56,222,233 +.byte 102,15,56,222,241 +.byte 102,15,56,222,249 +.byte 102,68,15,56,222,193 +.byte 102,68,15,56,222,201 + movups 144-112(%rcx),%xmm1 + cmpl $11,%eax +.byte 102,15,56,222,208 +.byte 102,15,56,222,216 +.byte 102,15,56,222,224 +.byte 102,15,56,222,232 +.byte 102,15,56,222,240 +.byte 102,15,56,222,248 +.byte 102,68,15,56,222,192 +.byte 102,68,15,56,222,200 + movups 160-112(%rcx),%xmm0 + jb .Lcbc_dec_done +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 +.byte 102,15,56,222,225 +.byte 102,15,56,222,233 +.byte 102,15,56,222,241 +.byte 102,15,56,222,249 +.byte 102,68,15,56,222,193 +.byte 102,68,15,56,222,201 + movups 176-112(%rcx),%xmm1 + nop +.byte 102,15,56,222,208 +.byte 102,15,56,222,216 +.byte 102,15,56,222,224 +.byte 102,15,56,222,232 +.byte 102,15,56,222,240 +.byte 102,15,56,222,248 +.byte 102,68,15,56,222,192 +.byte 102,68,15,56,222,200 + movups 192-112(%rcx),%xmm0 + je .Lcbc_dec_done +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 +.byte 102,15,56,222,225 +.byte 102,15,56,222,233 +.byte 102,15,56,222,241 +.byte 102,15,56,222,249 +.byte 102,68,15,56,222,193 +.byte 102,68,15,56,222,201 + movups 208-112(%rcx),%xmm1 + nop +.byte 102,15,56,222,208 +.byte 102,15,56,222,216 +.byte 102,15,56,222,224 +.byte 102,15,56,222,232 +.byte 102,15,56,222,240 +.byte 102,15,56,222,248 +.byte 102,68,15,56,222,192 +.byte 102,68,15,56,222,200 + movups 224-112(%rcx),%xmm0 + jmp .Lcbc_dec_done +.align 16 +.Lcbc_dec_done: .byte 102,15,56,222,209 - pxor %xmm0,%xmm4 - movdqu 80(%rdi),%xmm7 .byte 102,15,56,222,217 - pxor %xmm0,%xmm5 - movdqu 96(%rdi),%xmm8 + pxor %xmm0,%xmm10 + pxor %xmm0,%xmm11 .byte 102,15,56,222,225 - pxor %xmm0,%xmm6 - movdqu 112(%rdi),%xmm9 .byte 102,15,56,222,233 - pxor %xmm0,%xmm7 - decl %eax + pxor %xmm0,%xmm12 + pxor %xmm0,%xmm13 .byte 102,15,56,222,241 - pxor %xmm0,%xmm8 .byte 102,15,56,222,249 - pxor %xmm0,%xmm9 - movups (%rcx),%xmm0 + pxor %xmm0,%xmm14 + pxor %xmm0,%xmm15 .byte 102,68,15,56,222,193 .byte 102,68,15,56,222,201 - movups 16(%rcx),%xmm1 - - call .Ldec_loop8_enter + movdqu 80(%rdi),%xmm1 + +.byte 102,65,15,56,223,210 + movdqu 96(%rdi),%xmm10 + pxor %xmm0,%xmm1 +.byte 102,65,15,56,223,219 + pxor %xmm0,%xmm10 + movdqu 112(%rdi),%xmm0 +.byte 102,65,15,56,223,228 + leaq 128(%rdi),%rdi + movdqu 0(%r11),%xmm11 +.byte 102,65,15,56,223,237 +.byte 102,65,15,56,223,246 + movdqu 16(%r11),%xmm12 + movdqu 32(%r11),%xmm13 +.byte 102,65,15,56,223,255 +.byte 102,68,15,56,223,193 + movdqu 48(%r11),%xmm14 + movdqu 64(%r11),%xmm15 +.byte 102,69,15,56,223,202 + movdqa %xmm0,%xmm10 + movdqu 80(%r11),%xmm1 + movups -112(%rcx),%xmm0 - movups (%rdi),%xmm1 - movups 16(%rdi),%xmm0 - xorps -24(%rsp),%xmm2 - xorps %xmm1,%xmm3 - movups 32(%rdi),%xmm1 - xorps %xmm0,%xmm4 - movups 48(%rdi),%xmm0 - xorps %xmm1,%xmm5 - movups 64(%rdi),%xmm1 - xorps %xmm0,%xmm6 - movups 80(%rdi),%xmm0 - xorps %xmm1,%xmm7 - movups 96(%rdi),%xmm1 - xorps %xmm0,%xmm8 - movups 112(%rdi),%xmm0 - xorps %xmm1,%xmm9 movups %xmm2,(%rsi) + movdqa %xmm11,%xmm2 movups %xmm3,16(%rsi) + movdqa %xmm12,%xmm3 movups %xmm4,32(%rsi) + movdqa %xmm13,%xmm4 movups %xmm5,48(%rsi) - movl %r10d,%eax + movdqa %xmm14,%xmm5 movups %xmm6,64(%rsi) - movq %r11,%rcx + movdqa %xmm15,%xmm6 movups %xmm7,80(%rsi) - leaq 128(%rdi),%rdi + movdqa %xmm1,%xmm7 movups %xmm8,96(%rsi) leaq 112(%rsi),%rsi + subq $128,%rdx ja .Lcbc_dec_loop8 movaps %xmm9,%xmm2 - movaps %xmm0,%xmm9 + leaq -112(%rcx),%rcx addq $112,%rdx jle .Lcbc_dec_tail_collected - movups %xmm2,(%rsi) - leal 1(%r10,%r10,1),%eax + movups %xmm9,(%rsi) + leaq 16(%rsi),%rsi + cmpq $80,%rdx + jbe .Lcbc_dec_tail + + movaps %xmm11,%xmm2 +.Lcbc_dec_six_or_seven: + cmpq $96,%rdx + ja .Lcbc_dec_seven + + movaps %xmm7,%xmm8 + call _aesni_decrypt6 + pxor %xmm10,%xmm2 + movaps %xmm8,%xmm10 + pxor %xmm11,%xmm3 + movdqu %xmm2,(%rsi) + pxor %xmm12,%xmm4 + movdqu %xmm3,16(%rsi) + pxor %xmm13,%xmm5 + movdqu %xmm4,32(%rsi) + pxor %xmm14,%xmm6 + movdqu %xmm5,48(%rsi) + pxor %xmm15,%xmm7 + movdqu %xmm6,64(%rsi) + leaq 80(%rsi),%rsi + movdqa %xmm7,%xmm2 + jmp .Lcbc_dec_tail_collected + +.align 16 +.Lcbc_dec_seven: + movups 96(%rdi),%xmm8 + xorps %xmm9,%xmm9 + call _aesni_decrypt8 + movups 80(%rdi),%xmm9 + pxor %xmm10,%xmm2 + movups 96(%rdi),%xmm10 + pxor %xmm11,%xmm3 + movdqu %xmm2,(%rsi) + pxor %xmm12,%xmm4 + movdqu %xmm3,16(%rsi) + pxor %xmm13,%xmm5 + movdqu %xmm4,32(%rsi) + pxor %xmm14,%xmm6 + movdqu %xmm5,48(%rsi) + pxor %xmm15,%xmm7 + movdqu %xmm6,64(%rsi) + pxor %xmm9,%xmm8 + movdqu %xmm7,80(%rsi) + leaq 96(%rsi),%rsi + movdqa %xmm8,%xmm2 + jmp .Lcbc_dec_tail_collected + +.align 16 +.Lcbc_dec_loop6: + movups %xmm7,(%rsi) + leaq 16(%rsi),%rsi + movdqu 0(%rdi),%xmm2 + movdqu 16(%rdi),%xmm3 + movdqa %xmm2,%xmm11 + movdqu 32(%rdi),%xmm4 + movdqa %xmm3,%xmm12 + movdqu 48(%rdi),%xmm5 + movdqa %xmm4,%xmm13 + movdqu 64(%rdi),%xmm6 + movdqa %xmm5,%xmm14 + movdqu 80(%rdi),%xmm7 + movdqa %xmm6,%xmm15 +.Lcbc_dec_loop6_enter: + leaq 96(%rdi),%rdi + movdqa %xmm7,%xmm8 + + call _aesni_decrypt6 + + pxor %xmm10,%xmm2 + movdqa %xmm8,%xmm10 + pxor %xmm11,%xmm3 + movdqu %xmm2,(%rsi) + pxor %xmm12,%xmm4 + movdqu %xmm3,16(%rsi) + pxor %xmm13,%xmm5 + movdqu %xmm4,32(%rsi) + pxor %xmm14,%xmm6 + movq %r11,%rcx + movdqu %xmm5,48(%rsi) + pxor %xmm15,%xmm7 + movl %r10d,%eax + movdqu %xmm6,64(%rsi) + leaq 80(%rsi),%rsi + subq $96,%rdx + ja .Lcbc_dec_loop6 + + movdqa %xmm7,%xmm2 + addq $80,%rdx + jle .Lcbc_dec_tail_collected + movups %xmm7,(%rsi) leaq 16(%rsi),%rsi + .Lcbc_dec_tail: movups (%rdi),%xmm2 - movaps %xmm2,%xmm8 - cmpq $16,%rdx + subq $16,%rdx jbe .Lcbc_dec_one movups 16(%rdi),%xmm3 - movaps %xmm3,%xmm7 - cmpq $32,%rdx + movaps %xmm2,%xmm11 + subq $16,%rdx jbe .Lcbc_dec_two movups 32(%rdi),%xmm4 - movaps %xmm4,%xmm6 - cmpq $48,%rdx + movaps %xmm3,%xmm12 + subq $16,%rdx jbe .Lcbc_dec_three movups 48(%rdi),%xmm5 - cmpq $64,%rdx + movaps %xmm4,%xmm13 + subq $16,%rdx jbe .Lcbc_dec_four movups 64(%rdi),%xmm6 - cmpq $80,%rdx - jbe .Lcbc_dec_five - - movups 80(%rdi),%xmm7 - cmpq $96,%rdx - jbe .Lcbc_dec_six - - movups 96(%rdi),%xmm8 - movaps %xmm9,-24(%rsp) - call _aesni_decrypt8 - movups (%rdi),%xmm1 - movups 16(%rdi),%xmm0 - xorps -24(%rsp),%xmm2 - xorps %xmm1,%xmm3 - movups 32(%rdi),%xmm1 - xorps %xmm0,%xmm4 - movups 48(%rdi),%xmm0 - xorps %xmm1,%xmm5 - movups 64(%rdi),%xmm1 - xorps %xmm0,%xmm6 - movups 80(%rdi),%xmm0 - xorps %xmm1,%xmm7 - movups 96(%rdi),%xmm9 - xorps %xmm0,%xmm8 - movups %xmm2,(%rsi) - movups %xmm3,16(%rsi) - movups %xmm4,32(%rsi) - movups %xmm5,48(%rsi) - movups %xmm6,64(%rsi) - movups %xmm7,80(%rsi) - leaq 96(%rsi),%rsi - movaps %xmm8,%xmm2 - subq $112,%rdx + movaps %xmm5,%xmm14 + movaps %xmm6,%xmm15 + xorps %xmm7,%xmm7 + call _aesni_decrypt6 + pxor %xmm10,%xmm2 + movaps %xmm15,%xmm10 + pxor %xmm11,%xmm3 + movdqu %xmm2,(%rsi) + pxor %xmm12,%xmm4 + movdqu %xmm3,16(%rsi) + pxor %xmm13,%xmm5 + movdqu %xmm4,32(%rsi) + pxor %xmm14,%xmm6 + movdqu %xmm5,48(%rsi) + leaq 64(%rsi),%rsi + movdqa %xmm6,%xmm2 + subq $16,%rdx jmp .Lcbc_dec_tail_collected + .align 16 .Lcbc_dec_one: + movaps %xmm2,%xmm11 movups (%rcx),%xmm0 movups 16(%rcx),%xmm1 leaq 32(%rcx),%rcx @@ -2201,114 +2853,69 @@ aesni_cbc_encrypt: movups (%rcx),%xmm1 leaq 16(%rcx),%rcx jnz .Loop_dec1_16 - .byte 102,15,56,223,209 - xorps %xmm9,%xmm2 - movaps %xmm8,%xmm9 - subq $16,%rdx + xorps %xmm10,%xmm2 + movaps %xmm11,%xmm10 jmp .Lcbc_dec_tail_collected .align 16 .Lcbc_dec_two: - xorps %xmm4,%xmm4 - call _aesni_decrypt3 - xorps %xmm9,%xmm2 - xorps %xmm8,%xmm3 - movups %xmm2,(%rsi) - movaps %xmm7,%xmm9 - movaps %xmm3,%xmm2 + movaps %xmm3,%xmm12 + call _aesni_decrypt2 + pxor %xmm10,%xmm2 + movaps %xmm12,%xmm10 + pxor %xmm11,%xmm3 + movdqu %xmm2,(%rsi) + movdqa %xmm3,%xmm2 leaq 16(%rsi),%rsi - subq $32,%rdx jmp .Lcbc_dec_tail_collected .align 16 .Lcbc_dec_three: + movaps %xmm4,%xmm13 call _aesni_decrypt3 - xorps %xmm9,%xmm2 - xorps %xmm8,%xmm3 - movups %xmm2,(%rsi) - xorps %xmm7,%xmm4 - movups %xmm3,16(%rsi) - movaps %xmm6,%xmm9 - movaps %xmm4,%xmm2 + pxor %xmm10,%xmm2 + movaps %xmm13,%xmm10 + pxor %xmm11,%xmm3 + movdqu %xmm2,(%rsi) + pxor %xmm12,%xmm4 + movdqu %xmm3,16(%rsi) + movdqa %xmm4,%xmm2 leaq 32(%rsi),%rsi - subq $48,%rdx jmp .Lcbc_dec_tail_collected .align 16 .Lcbc_dec_four: + movaps %xmm5,%xmm14 call _aesni_decrypt4 - xorps %xmm9,%xmm2 - movups 48(%rdi),%xmm9 - xorps %xmm8,%xmm3 - movups %xmm2,(%rsi) - xorps %xmm7,%xmm4 - movups %xmm3,16(%rsi) - xorps %xmm6,%xmm5 - movups %xmm4,32(%rsi) - movaps %xmm5,%xmm2 + pxor %xmm10,%xmm2 + movaps %xmm14,%xmm10 + pxor %xmm11,%xmm3 + movdqu %xmm2,(%rsi) + pxor %xmm12,%xmm4 + movdqu %xmm3,16(%rsi) + pxor %xmm13,%xmm5 + movdqu %xmm4,32(%rsi) + movdqa %xmm5,%xmm2 leaq 48(%rsi),%rsi - subq $64,%rdx - jmp .Lcbc_dec_tail_collected -.align 16 -.Lcbc_dec_five: - xorps %xmm7,%xmm7 - call _aesni_decrypt6 - movups 16(%rdi),%xmm1 - movups 32(%rdi),%xmm0 - xorps %xmm9,%xmm2 - xorps %xmm8,%xmm3 - xorps %xmm1,%xmm4 - movups 48(%rdi),%xmm1 - xorps %xmm0,%xmm5 - movups 64(%rdi),%xmm9 - xorps %xmm1,%xmm6 - movups %xmm2,(%rsi) - movups %xmm3,16(%rsi) - movups %xmm4,32(%rsi) - movups %xmm5,48(%rsi) - leaq 64(%rsi),%rsi - movaps %xmm6,%xmm2 - subq $80,%rdx - jmp .Lcbc_dec_tail_collected -.align 16 -.Lcbc_dec_six: - call _aesni_decrypt6 - movups 16(%rdi),%xmm1 - movups 32(%rdi),%xmm0 - xorps %xmm9,%xmm2 - xorps %xmm8,%xmm3 - xorps %xmm1,%xmm4 - movups 48(%rdi),%xmm1 - xorps %xmm0,%xmm5 - movups 64(%rdi),%xmm0 - xorps %xmm1,%xmm6 - movups 80(%rdi),%xmm9 - xorps %xmm0,%xmm7 - movups %xmm2,(%rsi) - movups %xmm3,16(%rsi) - movups %xmm4,32(%rsi) - movups %xmm5,48(%rsi) - movups %xmm6,64(%rsi) - leaq 80(%rsi),%rsi - movaps %xmm7,%xmm2 - subq $96,%rdx jmp .Lcbc_dec_tail_collected + .align 16 .Lcbc_dec_tail_collected: + movups %xmm10,(%r8) andq $15,%rdx - movups %xmm9,(%r8) jnz .Lcbc_dec_tail_partial movups %xmm2,(%rsi) jmp .Lcbc_dec_ret .align 16 .Lcbc_dec_tail_partial: - movaps %xmm2,-24(%rsp) + movaps %xmm2,(%rsp) movq $16,%rcx movq %rsi,%rdi subq %rdx,%rcx - leaq -24(%rsp),%rsi + leaq (%rsp),%rsi .long 0x9066A4F3 - .Lcbc_dec_ret: + leaq (%rbp),%rsp + popq %rbp .Lcbc_ret: .byte 0xf3,0xc3 .size aesni_cbc_encrypt,.-aesni_cbc_encrypt @@ -2317,7 +2924,6 @@ aesni_cbc_encrypt: .align 16 aesni_set_decrypt_key: .byte 0x48,0x83,0xEC,0x08 - call __aesni_set_encrypt_key shll $4,%esi testl %eax,%eax @@ -2357,7 +2963,6 @@ aesni_set_decrypt_key: aesni_set_encrypt_key: __aesni_set_encrypt_key: .byte 0x48,0x83,0xEC,0x08 - movq $-1,%rax testq %rdi,%rdi jz .Lenc_key_ret @@ -2553,6 +3158,8 @@ __aesni_set_encrypt_key: .long 1,0,0,0 .Lxts_magic: .long 0x87,0,1,0 +.Lincrement1: +.byte 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1 .byte 65,69,83,32,102,111,114,32,73,110,116,101,108,32,65,69,83,45,78,73,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 .align 64 diff --git a/deps/openssl/asm/x64-elf-gas/aes/bsaes-x86_64.s b/deps/openssl/asm/x64-elf-gas/aes/bsaes-x86_64.s index 77754a3ef9dd31..5b363a5eef9020 100644 --- a/deps/openssl/asm/x64-elf-gas/aes/bsaes-x86_64.s +++ b/deps/openssl/asm/x64-elf-gas/aes/bsaes-x86_64.s @@ -3,7 +3,6 @@ - .type _bsaes_encrypt8,@function .align 64 _bsaes_encrypt8: @@ -14,18 +13,18 @@ _bsaes_encrypt8: movdqa 80(%r11),%xmm7 pxor %xmm8,%xmm15 pxor %xmm8,%xmm0 -.byte 102,68,15,56,0,255 pxor %xmm8,%xmm1 -.byte 102,15,56,0,199 pxor %xmm8,%xmm2 -.byte 102,15,56,0,207 +.byte 102,68,15,56,0,255 +.byte 102,15,56,0,199 pxor %xmm8,%xmm3 -.byte 102,15,56,0,215 pxor %xmm8,%xmm4 -.byte 102,15,56,0,223 +.byte 102,15,56,0,207 +.byte 102,15,56,0,215 pxor %xmm8,%xmm5 -.byte 102,15,56,0,231 pxor %xmm8,%xmm6 +.byte 102,15,56,0,223 +.byte 102,15,56,0,231 .byte 102,15,56,0,239 .byte 102,15,56,0,247 _bsaes_encrypt8_bitslice: @@ -122,21 +121,21 @@ _bsaes_encrypt8_bitslice: .Lenc_loop: pxor 0(%rax),%xmm15 pxor 16(%rax),%xmm0 -.byte 102,68,15,56,0,255 pxor 32(%rax),%xmm1 -.byte 102,15,56,0,199 pxor 48(%rax),%xmm2 -.byte 102,15,56,0,207 +.byte 102,68,15,56,0,255 +.byte 102,15,56,0,199 pxor 64(%rax),%xmm3 -.byte 102,15,56,0,215 pxor 80(%rax),%xmm4 -.byte 102,15,56,0,223 +.byte 102,15,56,0,207 +.byte 102,15,56,0,215 pxor 96(%rax),%xmm5 -.byte 102,15,56,0,231 pxor 112(%rax),%xmm6 +.byte 102,15,56,0,223 +.byte 102,15,56,0,231 .byte 102,15,56,0,239 - leaq 128(%rax),%rax .byte 102,15,56,0,247 + leaq 128(%rax),%rax .Lenc_sbox: pxor %xmm5,%xmm4 pxor %xmm0,%xmm1 @@ -486,18 +485,18 @@ _bsaes_decrypt8: movdqa -48(%r11),%xmm7 pxor %xmm8,%xmm15 pxor %xmm8,%xmm0 -.byte 102,68,15,56,0,255 pxor %xmm8,%xmm1 -.byte 102,15,56,0,199 pxor %xmm8,%xmm2 -.byte 102,15,56,0,207 +.byte 102,68,15,56,0,255 +.byte 102,15,56,0,199 pxor %xmm8,%xmm3 -.byte 102,15,56,0,215 pxor %xmm8,%xmm4 -.byte 102,15,56,0,223 +.byte 102,15,56,0,207 +.byte 102,15,56,0,215 pxor %xmm8,%xmm5 -.byte 102,15,56,0,231 pxor %xmm8,%xmm6 +.byte 102,15,56,0,223 +.byte 102,15,56,0,231 .byte 102,15,56,0,239 .byte 102,15,56,0,247 movdqa 0(%r11),%xmm7 @@ -593,21 +592,21 @@ _bsaes_decrypt8: .Ldec_loop: pxor 0(%rax),%xmm15 pxor 16(%rax),%xmm0 -.byte 102,68,15,56,0,255 pxor 32(%rax),%xmm1 -.byte 102,15,56,0,199 pxor 48(%rax),%xmm2 -.byte 102,15,56,0,207 +.byte 102,68,15,56,0,255 +.byte 102,15,56,0,199 pxor 64(%rax),%xmm3 -.byte 102,15,56,0,215 pxor 80(%rax),%xmm4 -.byte 102,15,56,0,223 +.byte 102,15,56,0,207 +.byte 102,15,56,0,215 pxor 96(%rax),%xmm5 -.byte 102,15,56,0,231 pxor 112(%rax),%xmm6 +.byte 102,15,56,0,223 +.byte 102,15,56,0,231 .byte 102,15,56,0,239 - leaq 128(%rax),%rax .byte 102,15,56,0,247 + leaq 128(%rax),%rax .Ldec_sbox: pxor %xmm3,%xmm2 @@ -1286,7 +1285,6 @@ bsaes_cbc_encrypt: leaq 32(%rbp),%rsi leaq (%r15),%rdx call asm_AES_decrypt - pxor 32(%rbp),%xmm14 movdqu %xmm14,(%r13) movdqa %xmm15,%xmm14 @@ -1384,21 +1382,21 @@ bsaes_ctr32_encrypt_blocks: movdqa -16(%r11),%xmm7 pxor %xmm8,%xmm15 pxor %xmm8,%xmm0 -.byte 102,68,15,56,0,255 pxor %xmm8,%xmm1 -.byte 102,15,56,0,199 pxor %xmm8,%xmm2 -.byte 102,15,56,0,207 +.byte 102,68,15,56,0,255 +.byte 102,15,56,0,199 pxor %xmm8,%xmm3 -.byte 102,15,56,0,215 pxor %xmm8,%xmm4 -.byte 102,15,56,0,223 +.byte 102,15,56,0,207 +.byte 102,15,56,0,215 pxor %xmm8,%xmm5 -.byte 102,15,56,0,231 pxor %xmm8,%xmm6 +.byte 102,15,56,0,223 +.byte 102,15,56,0,231 .byte 102,15,56,0,239 - leaq .LBS0(%rip),%r11 .byte 102,15,56,0,247 + leaq .LBS0(%rip),%r11 movl %ebx,%r10d call _bsaes_encrypt8_bitslice @@ -1538,7 +1536,6 @@ bsaes_xts_encrypt: leaq (%r8),%rdx call asm_AES_encrypt - movl 240(%r15),%eax movq %r14,%rbx @@ -1908,7 +1905,6 @@ bsaes_xts_encrypt: leaq 32(%rbp),%rsi leaq (%r15),%rdx call asm_AES_encrypt - pxor 32(%rbp),%xmm15 @@ -1942,7 +1938,6 @@ bsaes_xts_encrypt: movdqa %xmm15,32(%rbp) leaq (%r15),%rdx call asm_AES_encrypt - pxor 32(%rbp),%xmm6 movdqu %xmm6,-16(%r13) @@ -1993,7 +1988,6 @@ bsaes_xts_decrypt: leaq (%r8),%rdx call asm_AES_encrypt - movl 240(%r15),%eax movq %r14,%rbx @@ -2370,7 +2364,6 @@ bsaes_xts_decrypt: leaq 32(%rbp),%rsi leaq (%r15),%rdx call asm_AES_decrypt - pxor 32(%rbp),%xmm15 @@ -2402,7 +2395,6 @@ bsaes_xts_decrypt: movdqa %xmm15,32(%rbp) leaq (%r15),%rdx call asm_AES_decrypt - pxor 32(%rbp),%xmm6 movq %r13,%rdx movdqu %xmm6,(%r13) @@ -2424,7 +2416,6 @@ bsaes_xts_decrypt: movdqa %xmm15,32(%rbp) leaq (%r15),%rdx call asm_AES_decrypt - pxor 32(%rbp),%xmm5 movdqu %xmm5,(%r13) diff --git a/deps/openssl/asm/x64-elf-gas/aes/vpaes-x86_64.s b/deps/openssl/asm/x64-elf-gas/aes/vpaes-x86_64.s index fb201b6649d8a5..b9d6df5134ec60 100644 --- a/deps/openssl/asm/x64-elf-gas/aes/vpaes-x86_64.s +++ b/deps/openssl/asm/x64-elf-gas/aes/vpaes-x86_64.s @@ -15,7 +15,6 @@ - .type _vpaes_encrypt_core,@function .align 16 _vpaes_encrypt_core: @@ -32,8 +31,8 @@ _vpaes_encrypt_core: movdqa .Lk_ipt+16(%rip),%xmm0 .byte 102,15,56,0,193 pxor %xmm5,%xmm2 - pxor %xmm2,%xmm0 addq $16,%r9 + pxor %xmm2,%xmm0 leaq .Lk_mc_backward(%rip),%r10 jmp .Lenc_entry @@ -41,19 +40,19 @@ _vpaes_encrypt_core: .Lenc_loop: movdqa %xmm13,%xmm4 -.byte 102,15,56,0,226 - pxor %xmm5,%xmm4 movdqa %xmm12,%xmm0 +.byte 102,15,56,0,226 .byte 102,15,56,0,195 - pxor %xmm4,%xmm0 + pxor %xmm5,%xmm4 movdqa %xmm15,%xmm5 -.byte 102,15,56,0,234 + pxor %xmm4,%xmm0 movdqa -64(%r11,%r10,1),%xmm1 +.byte 102,15,56,0,234 + movdqa (%r11,%r10,1),%xmm4 movdqa %xmm14,%xmm2 .byte 102,15,56,0,211 - pxor %xmm5,%xmm2 - movdqa (%r11,%r10,1),%xmm4 movdqa %xmm0,%xmm3 + pxor %xmm5,%xmm2 .byte 102,15,56,0,193 addq $16,%r9 pxor %xmm2,%xmm0 @@ -62,30 +61,30 @@ _vpaes_encrypt_core: pxor %xmm0,%xmm3 .byte 102,15,56,0,193 andq $48,%r11 - pxor %xmm3,%xmm0 subq $1,%rax + pxor %xmm3,%xmm0 .Lenc_entry: movdqa %xmm9,%xmm1 + movdqa %xmm11,%xmm5 pandn %xmm0,%xmm1 psrld $4,%xmm1 pand %xmm9,%xmm0 - movdqa %xmm11,%xmm5 .byte 102,15,56,0,232 - pxor %xmm1,%xmm0 movdqa %xmm10,%xmm3 + pxor %xmm1,%xmm0 .byte 102,15,56,0,217 - pxor %xmm5,%xmm3 movdqa %xmm10,%xmm4 + pxor %xmm5,%xmm3 .byte 102,15,56,0,224 - pxor %xmm5,%xmm4 movdqa %xmm10,%xmm2 + pxor %xmm5,%xmm4 .byte 102,15,56,0,211 - pxor %xmm0,%xmm2 movdqa %xmm10,%xmm3 - movdqu (%r9),%xmm5 + pxor %xmm0,%xmm2 .byte 102,15,56,0,220 + movdqu (%r9),%xmm5 pxor %xmm1,%xmm3 jnz .Lenc_loop @@ -138,62 +137,61 @@ _vpaes_decrypt_core: movdqa -32(%r10),%xmm4 + movdqa -16(%r10),%xmm1 .byte 102,15,56,0,226 - pxor %xmm0,%xmm4 - movdqa -16(%r10),%xmm0 -.byte 102,15,56,0,195 +.byte 102,15,56,0,203 pxor %xmm4,%xmm0 - addq $16,%r9 - -.byte 102,15,56,0,197 movdqa 0(%r10),%xmm4 -.byte 102,15,56,0,226 - pxor %xmm0,%xmm4 - movdqa 16(%r10),%xmm0 -.byte 102,15,56,0,195 - pxor %xmm4,%xmm0 - subq $1,%rax + pxor %xmm1,%xmm0 + movdqa 16(%r10),%xmm1 -.byte 102,15,56,0,197 - movdqa 32(%r10),%xmm4 .byte 102,15,56,0,226 - pxor %xmm0,%xmm4 - movdqa 48(%r10),%xmm0 -.byte 102,15,56,0,195 +.byte 102,15,56,0,197 +.byte 102,15,56,0,203 pxor %xmm4,%xmm0 + movdqa 32(%r10),%xmm4 + pxor %xmm1,%xmm0 + movdqa 48(%r10),%xmm1 +.byte 102,15,56,0,226 .byte 102,15,56,0,197 +.byte 102,15,56,0,203 + pxor %xmm4,%xmm0 movdqa 64(%r10),%xmm4 + pxor %xmm1,%xmm0 + movdqa 80(%r10),%xmm1 + .byte 102,15,56,0,226 - pxor %xmm0,%xmm4 - movdqa 80(%r10),%xmm0 -.byte 102,15,56,0,195 +.byte 102,15,56,0,197 +.byte 102,15,56,0,203 pxor %xmm4,%xmm0 - + addq $16,%r9 .byte 102,15,58,15,237,12 + pxor %xmm1,%xmm0 + subq $1,%rax .Ldec_entry: movdqa %xmm9,%xmm1 pandn %xmm0,%xmm1 + movdqa %xmm11,%xmm2 psrld $4,%xmm1 pand %xmm9,%xmm0 - movdqa %xmm11,%xmm2 .byte 102,15,56,0,208 - pxor %xmm1,%xmm0 movdqa %xmm10,%xmm3 + pxor %xmm1,%xmm0 .byte 102,15,56,0,217 - pxor %xmm2,%xmm3 movdqa %xmm10,%xmm4 + pxor %xmm2,%xmm3 .byte 102,15,56,0,224 pxor %xmm2,%xmm4 movdqa %xmm10,%xmm2 .byte 102,15,56,0,211 - pxor %xmm0,%xmm2 movdqa %xmm10,%xmm3 + pxor %xmm0,%xmm2 .byte 102,15,56,0,220 - pxor %xmm1,%xmm3 movdqu (%r9),%xmm0 + pxor %xmm1,%xmm3 jnz .Ldec_loop @@ -222,7 +220,6 @@ _vpaes_schedule_core: call _vpaes_preheat - movdqa .Lk_rcon(%rip),%xmm8 movdqu (%rdi),%xmm0 @@ -269,7 +266,6 @@ _vpaes_schedule_core: decq %rsi jz .Lschedule_mangle_last call _vpaes_schedule_mangle - jmp .Loop_schedule_128 @@ -291,7 +287,6 @@ _vpaes_schedule_core: .Lschedule_192: movdqu 8(%rdi),%xmm0 call _vpaes_schedule_transform - movdqa %xmm0,%xmm6 pxor %xmm4,%xmm4 movhlps %xmm4,%xmm6 @@ -301,15 +296,12 @@ _vpaes_schedule_core: call _vpaes_schedule_round .byte 102,15,58,15,198,8 call _vpaes_schedule_mangle - call _vpaes_schedule_192_smear call _vpaes_schedule_mangle - call _vpaes_schedule_round decq %rsi jz .Lschedule_mangle_last call _vpaes_schedule_mangle - call _vpaes_schedule_192_smear jmp .Loop_schedule_192 @@ -327,12 +319,10 @@ _vpaes_schedule_core: .Lschedule_256: movdqu 16(%rdi),%xmm0 call _vpaes_schedule_transform - movl $7,%esi .Loop_schedule_256: call _vpaes_schedule_mangle - movdqa %xmm0,%xmm6 @@ -342,7 +332,6 @@ _vpaes_schedule_core: call _vpaes_schedule_mangle - pshufd $255,%xmm0,%xmm0 movdqa %xmm7,%xmm5 movdqa %xmm6,%xmm7 @@ -379,7 +368,6 @@ _vpaes_schedule_core: addq $-16,%rdx pxor .Lk_s63(%rip),%xmm0 call _vpaes_schedule_transform - movdqu %xmm0,(%rdx) @@ -411,12 +399,12 @@ _vpaes_schedule_core: .type _vpaes_schedule_192_smear,@function .align 16 _vpaes_schedule_192_smear: - pshufd $128,%xmm6,%xmm0 - pxor %xmm0,%xmm6 + pshufd $128,%xmm6,%xmm1 pshufd $254,%xmm7,%xmm0 + pxor %xmm1,%xmm6 + pxor %xmm1,%xmm1 pxor %xmm0,%xmm6 movdqa %xmm6,%xmm0 - pxor %xmm1,%xmm1 movhlps %xmm1,%xmm6 .byte 0xf3,0xc3 .size _vpaes_schedule_192_smear,.-_vpaes_schedule_192_smear diff --git a/deps/openssl/asm/x64-elf-gas/bn/modexp512-x86_64.s b/deps/openssl/asm/x64-elf-gas/bn/modexp512-x86_64.s deleted file mode 100644 index c980dd07332a8a..00000000000000 --- a/deps/openssl/asm/x64-elf-gas/bn/modexp512-x86_64.s +++ /dev/null @@ -1,1776 +0,0 @@ -.text - - -.type MULADD_128x512,@function -.align 16 -MULADD_128x512: - movq 0(%rsi),%rax - mulq %rbp - addq %rax,%r8 - adcq $0,%rdx - movq %r8,0(%rcx) - movq %rdx,%rbx - - movq 8(%rsi),%rax - mulq %rbp - addq %rax,%r9 - adcq $0,%rdx - addq %rbx,%r9 - adcq $0,%rdx - movq %rdx,%rbx - - movq 16(%rsi),%rax - mulq %rbp - addq %rax,%r10 - adcq $0,%rdx - addq %rbx,%r10 - adcq $0,%rdx - movq %rdx,%rbx - - movq 24(%rsi),%rax - mulq %rbp - addq %rax,%r11 - adcq $0,%rdx - addq %rbx,%r11 - adcq $0,%rdx - movq %rdx,%rbx - - movq 32(%rsi),%rax - mulq %rbp - addq %rax,%r12 - adcq $0,%rdx - addq %rbx,%r12 - adcq $0,%rdx - movq %rdx,%rbx - - movq 40(%rsi),%rax - mulq %rbp - addq %rax,%r13 - adcq $0,%rdx - addq %rbx,%r13 - adcq $0,%rdx - movq %rdx,%rbx - - movq 48(%rsi),%rax - mulq %rbp - addq %rax,%r14 - adcq $0,%rdx - addq %rbx,%r14 - adcq $0,%rdx - movq %rdx,%rbx - - movq 56(%rsi),%rax - mulq %rbp - addq %rax,%r15 - adcq $0,%rdx - addq %rbx,%r15 - adcq $0,%rdx - movq %rdx,%r8 - movq 8(%rdi),%rbp - movq 0(%rsi),%rax - mulq %rbp - addq %rax,%r9 - adcq $0,%rdx - movq %r9,8(%rcx) - movq %rdx,%rbx - - movq 8(%rsi),%rax - mulq %rbp - addq %rax,%r10 - adcq $0,%rdx - addq %rbx,%r10 - adcq $0,%rdx - movq %rdx,%rbx - - movq 16(%rsi),%rax - mulq %rbp - addq %rax,%r11 - adcq $0,%rdx - addq %rbx,%r11 - adcq $0,%rdx - movq %rdx,%rbx - - movq 24(%rsi),%rax - mulq %rbp - addq %rax,%r12 - adcq $0,%rdx - addq %rbx,%r12 - adcq $0,%rdx - movq %rdx,%rbx - - movq 32(%rsi),%rax - mulq %rbp - addq %rax,%r13 - adcq $0,%rdx - addq %rbx,%r13 - adcq $0,%rdx - movq %rdx,%rbx - - movq 40(%rsi),%rax - mulq %rbp - addq %rax,%r14 - adcq $0,%rdx - addq %rbx,%r14 - adcq $0,%rdx - movq %rdx,%rbx - - movq 48(%rsi),%rax - mulq %rbp - addq %rax,%r15 - adcq $0,%rdx - addq %rbx,%r15 - adcq $0,%rdx - movq %rdx,%rbx - - movq 56(%rsi),%rax - mulq %rbp - addq %rax,%r8 - adcq $0,%rdx - addq %rbx,%r8 - adcq $0,%rdx - movq %rdx,%r9 - .byte 0xf3,0xc3 -.size MULADD_128x512,.-MULADD_128x512 -.type mont_reduce,@function -.align 16 -mont_reduce: - leaq 192(%rsp),%rdi - movq 32(%rsp),%rsi - addq $576,%rsi - leaq 520(%rsp),%rcx - - movq 96(%rcx),%rbp - movq 0(%rsi),%rax - mulq %rbp - movq (%rcx),%r8 - addq %rax,%r8 - adcq $0,%rdx - movq %r8,0(%rdi) - movq %rdx,%rbx - - movq 8(%rsi),%rax - mulq %rbp - movq 8(%rcx),%r9 - addq %rax,%r9 - adcq $0,%rdx - addq %rbx,%r9 - adcq $0,%rdx - movq %rdx,%rbx - - movq 16(%rsi),%rax - mulq %rbp - movq 16(%rcx),%r10 - addq %rax,%r10 - adcq $0,%rdx - addq %rbx,%r10 - adcq $0,%rdx - movq %rdx,%rbx - - movq 24(%rsi),%rax - mulq %rbp - movq 24(%rcx),%r11 - addq %rax,%r11 - adcq $0,%rdx - addq %rbx,%r11 - adcq $0,%rdx - movq %rdx,%rbx - - movq 32(%rsi),%rax - mulq %rbp - movq 32(%rcx),%r12 - addq %rax,%r12 - adcq $0,%rdx - addq %rbx,%r12 - adcq $0,%rdx - movq %rdx,%rbx - - movq 40(%rsi),%rax - mulq %rbp - movq 40(%rcx),%r13 - addq %rax,%r13 - adcq $0,%rdx - addq %rbx,%r13 - adcq $0,%rdx - movq %rdx,%rbx - - movq 48(%rsi),%rax - mulq %rbp - movq 48(%rcx),%r14 - addq %rax,%r14 - adcq $0,%rdx - addq %rbx,%r14 - adcq $0,%rdx - movq %rdx,%rbx - - movq 56(%rsi),%rax - mulq %rbp - movq 56(%rcx),%r15 - addq %rax,%r15 - adcq $0,%rdx - addq %rbx,%r15 - adcq $0,%rdx - movq %rdx,%r8 - movq 104(%rcx),%rbp - movq 0(%rsi),%rax - mulq %rbp - addq %rax,%r9 - adcq $0,%rdx - movq %r9,8(%rdi) - movq %rdx,%rbx - - movq 8(%rsi),%rax - mulq %rbp - addq %rax,%r10 - adcq $0,%rdx - addq %rbx,%r10 - adcq $0,%rdx - movq %rdx,%rbx - - movq 16(%rsi),%rax - mulq %rbp - addq %rax,%r11 - adcq $0,%rdx - addq %rbx,%r11 - adcq $0,%rdx - movq %rdx,%rbx - - movq 24(%rsi),%rax - mulq %rbp - addq %rax,%r12 - adcq $0,%rdx - addq %rbx,%r12 - adcq $0,%rdx - movq %rdx,%rbx - - movq 32(%rsi),%rax - mulq %rbp - addq %rax,%r13 - adcq $0,%rdx - addq %rbx,%r13 - adcq $0,%rdx - movq %rdx,%rbx - - movq 40(%rsi),%rax - mulq %rbp - addq %rax,%r14 - adcq $0,%rdx - addq %rbx,%r14 - adcq $0,%rdx - movq %rdx,%rbx - - movq 48(%rsi),%rax - mulq %rbp - addq %rax,%r15 - adcq $0,%rdx - addq %rbx,%r15 - adcq $0,%rdx - movq %rdx,%rbx - - movq 56(%rsi),%rax - mulq %rbp - addq %rax,%r8 - adcq $0,%rdx - addq %rbx,%r8 - adcq $0,%rdx - movq %rdx,%r9 - movq 112(%rcx),%rbp - movq 0(%rsi),%rax - mulq %rbp - addq %rax,%r10 - adcq $0,%rdx - movq %r10,16(%rdi) - movq %rdx,%rbx - - movq 8(%rsi),%rax - mulq %rbp - addq %rax,%r11 - adcq $0,%rdx - addq %rbx,%r11 - adcq $0,%rdx - movq %rdx,%rbx - - movq 16(%rsi),%rax - mulq %rbp - addq %rax,%r12 - adcq $0,%rdx - addq %rbx,%r12 - adcq $0,%rdx - movq %rdx,%rbx - - movq 24(%rsi),%rax - mulq %rbp - addq %rax,%r13 - adcq $0,%rdx - addq %rbx,%r13 - adcq $0,%rdx - movq %rdx,%rbx - - movq 32(%rsi),%rax - mulq %rbp - addq %rax,%r14 - adcq $0,%rdx - addq %rbx,%r14 - adcq $0,%rdx - movq %rdx,%rbx - - movq 40(%rsi),%rax - mulq %rbp - addq %rax,%r15 - adcq $0,%rdx - addq %rbx,%r15 - adcq $0,%rdx - movq %rdx,%rbx - - movq 48(%rsi),%rax - mulq %rbp - addq %rax,%r8 - adcq $0,%rdx - addq %rbx,%r8 - adcq $0,%rdx - movq %rdx,%rbx - - movq 56(%rsi),%rax - mulq %rbp - addq %rax,%r9 - adcq $0,%rdx - addq %rbx,%r9 - adcq $0,%rdx - movq %rdx,%r10 - movq 120(%rcx),%rbp - movq 0(%rsi),%rax - mulq %rbp - addq %rax,%r11 - adcq $0,%rdx - movq %r11,24(%rdi) - movq %rdx,%rbx - - movq 8(%rsi),%rax - mulq %rbp - addq %rax,%r12 - adcq $0,%rdx - addq %rbx,%r12 - adcq $0,%rdx - movq %rdx,%rbx - - movq 16(%rsi),%rax - mulq %rbp - addq %rax,%r13 - adcq $0,%rdx - addq %rbx,%r13 - adcq $0,%rdx - movq %rdx,%rbx - - movq 24(%rsi),%rax - mulq %rbp - addq %rax,%r14 - adcq $0,%rdx - addq %rbx,%r14 - adcq $0,%rdx - movq %rdx,%rbx - - movq 32(%rsi),%rax - mulq %rbp - addq %rax,%r15 - adcq $0,%rdx - addq %rbx,%r15 - adcq $0,%rdx - movq %rdx,%rbx - - movq 40(%rsi),%rax - mulq %rbp - addq %rax,%r8 - adcq $0,%rdx - addq %rbx,%r8 - adcq $0,%rdx - movq %rdx,%rbx - - movq 48(%rsi),%rax - mulq %rbp - addq %rax,%r9 - adcq $0,%rdx - addq %rbx,%r9 - adcq $0,%rdx - movq %rdx,%rbx - - movq 56(%rsi),%rax - mulq %rbp - addq %rax,%r10 - adcq $0,%rdx - addq %rbx,%r10 - adcq $0,%rdx - movq %rdx,%r11 - xorq %rax,%rax - - addq 64(%rcx),%r8 - adcq 72(%rcx),%r9 - adcq 80(%rcx),%r10 - adcq 88(%rcx),%r11 - adcq $0,%rax - - - - - movq %r8,64(%rdi) - movq %r9,72(%rdi) - movq %r10,%rbp - movq %r11,88(%rdi) - - movq %rax,384(%rsp) - - movq 0(%rdi),%r8 - movq 8(%rdi),%r9 - movq 16(%rdi),%r10 - movq 24(%rdi),%r11 - - - - - - - - - addq $80,%rdi - - addq $64,%rsi - leaq 296(%rsp),%rcx - - call MULADD_128x512 - - - movq 384(%rsp),%rax - - - addq -16(%rdi),%r8 - adcq -8(%rdi),%r9 - movq %r8,64(%rcx) - movq %r9,72(%rcx) - - adcq %rax,%rax - movq %rax,384(%rsp) - - leaq 192(%rsp),%rdi - addq $64,%rsi - - - - - - movq (%rsi),%r8 - movq 8(%rsi),%rbx - - movq (%rcx),%rax - mulq %r8 - movq %rax,%rbp - movq %rdx,%r9 - - movq 8(%rcx),%rax - mulq %r8 - addq %rax,%r9 - - movq (%rcx),%rax - mulq %rbx - addq %rax,%r9 - - movq %r9,8(%rdi) - - - subq $192,%rsi - - movq (%rcx),%r8 - movq 8(%rcx),%r9 - - call MULADD_128x512 - - - - - - movq 0(%rsi),%rax - movq 8(%rsi),%rbx - movq 16(%rsi),%rdi - movq 24(%rsi),%rdx - - - movq 384(%rsp),%rbp - - addq 64(%rcx),%r8 - adcq 72(%rcx),%r9 - - - adcq %rbp,%rbp - - - - shlq $3,%rbp - movq 32(%rsp),%rcx - addq %rcx,%rbp - - - xorq %rsi,%rsi - - addq 0(%rbp),%r10 - adcq 64(%rbp),%r11 - adcq 128(%rbp),%r12 - adcq 192(%rbp),%r13 - adcq 256(%rbp),%r14 - adcq 320(%rbp),%r15 - adcq 384(%rbp),%r8 - adcq 448(%rbp),%r9 - - - - sbbq $0,%rsi - - - andq %rsi,%rax - andq %rsi,%rbx - andq %rsi,%rdi - andq %rsi,%rdx - - movq $1,%rbp - subq %rax,%r10 - sbbq %rbx,%r11 - sbbq %rdi,%r12 - sbbq %rdx,%r13 - - - - - sbbq $0,%rbp - - - - addq $512,%rcx - movq 32(%rcx),%rax - movq 40(%rcx),%rbx - movq 48(%rcx),%rdi - movq 56(%rcx),%rdx - - - - andq %rsi,%rax - andq %rsi,%rbx - andq %rsi,%rdi - andq %rsi,%rdx - - - - subq $1,%rbp - - sbbq %rax,%r14 - sbbq %rbx,%r15 - sbbq %rdi,%r8 - sbbq %rdx,%r9 - - - - movq 144(%rsp),%rsi - movq %r10,0(%rsi) - movq %r11,8(%rsi) - movq %r12,16(%rsi) - movq %r13,24(%rsi) - movq %r14,32(%rsi) - movq %r15,40(%rsi) - movq %r8,48(%rsi) - movq %r9,56(%rsi) - - .byte 0xf3,0xc3 -.size mont_reduce,.-mont_reduce -.type mont_mul_a3b,@function -.align 16 -mont_mul_a3b: - - - - - movq 0(%rdi),%rbp - - movq %r10,%rax - mulq %rbp - movq %rax,520(%rsp) - movq %rdx,%r10 - movq %r11,%rax - mulq %rbp - addq %rax,%r10 - adcq $0,%rdx - movq %rdx,%r11 - movq %r12,%rax - mulq %rbp - addq %rax,%r11 - adcq $0,%rdx - movq %rdx,%r12 - movq %r13,%rax - mulq %rbp - addq %rax,%r12 - adcq $0,%rdx - movq %rdx,%r13 - movq %r14,%rax - mulq %rbp - addq %rax,%r13 - adcq $0,%rdx - movq %rdx,%r14 - movq %r15,%rax - mulq %rbp - addq %rax,%r14 - adcq $0,%rdx - movq %rdx,%r15 - movq %r8,%rax - mulq %rbp - addq %rax,%r15 - adcq $0,%rdx - movq %rdx,%r8 - movq %r9,%rax - mulq %rbp - addq %rax,%r8 - adcq $0,%rdx - movq %rdx,%r9 - movq 8(%rdi),%rbp - movq 0(%rsi),%rax - mulq %rbp - addq %rax,%r10 - adcq $0,%rdx - movq %r10,528(%rsp) - movq %rdx,%rbx - - movq 8(%rsi),%rax - mulq %rbp - addq %rax,%r11 - adcq $0,%rdx - addq %rbx,%r11 - adcq $0,%rdx - movq %rdx,%rbx - - movq 16(%rsi),%rax - mulq %rbp - addq %rax,%r12 - adcq $0,%rdx - addq %rbx,%r12 - adcq $0,%rdx - movq %rdx,%rbx - - movq 24(%rsi),%rax - mulq %rbp - addq %rax,%r13 - adcq $0,%rdx - addq %rbx,%r13 - adcq $0,%rdx - movq %rdx,%rbx - - movq 32(%rsi),%rax - mulq %rbp - addq %rax,%r14 - adcq $0,%rdx - addq %rbx,%r14 - adcq $0,%rdx - movq %rdx,%rbx - - movq 40(%rsi),%rax - mulq %rbp - addq %rax,%r15 - adcq $0,%rdx - addq %rbx,%r15 - adcq $0,%rdx - movq %rdx,%rbx - - movq 48(%rsi),%rax - mulq %rbp - addq %rax,%r8 - adcq $0,%rdx - addq %rbx,%r8 - adcq $0,%rdx - movq %rdx,%rbx - - movq 56(%rsi),%rax - mulq %rbp - addq %rax,%r9 - adcq $0,%rdx - addq %rbx,%r9 - adcq $0,%rdx - movq %rdx,%r10 - movq 16(%rdi),%rbp - movq 0(%rsi),%rax - mulq %rbp - addq %rax,%r11 - adcq $0,%rdx - movq %r11,536(%rsp) - movq %rdx,%rbx - - movq 8(%rsi),%rax - mulq %rbp - addq %rax,%r12 - adcq $0,%rdx - addq %rbx,%r12 - adcq $0,%rdx - movq %rdx,%rbx - - movq 16(%rsi),%rax - mulq %rbp - addq %rax,%r13 - adcq $0,%rdx - addq %rbx,%r13 - adcq $0,%rdx - movq %rdx,%rbx - - movq 24(%rsi),%rax - mulq %rbp - addq %rax,%r14 - adcq $0,%rdx - addq %rbx,%r14 - adcq $0,%rdx - movq %rdx,%rbx - - movq 32(%rsi),%rax - mulq %rbp - addq %rax,%r15 - adcq $0,%rdx - addq %rbx,%r15 - adcq $0,%rdx - movq %rdx,%rbx - - movq 40(%rsi),%rax - mulq %rbp - addq %rax,%r8 - adcq $0,%rdx - addq %rbx,%r8 - adcq $0,%rdx - movq %rdx,%rbx - - movq 48(%rsi),%rax - mulq %rbp - addq %rax,%r9 - adcq $0,%rdx - addq %rbx,%r9 - adcq $0,%rdx - movq %rdx,%rbx - - movq 56(%rsi),%rax - mulq %rbp - addq %rax,%r10 - adcq $0,%rdx - addq %rbx,%r10 - adcq $0,%rdx - movq %rdx,%r11 - movq 24(%rdi),%rbp - movq 0(%rsi),%rax - mulq %rbp - addq %rax,%r12 - adcq $0,%rdx - movq %r12,544(%rsp) - movq %rdx,%rbx - - movq 8(%rsi),%rax - mulq %rbp - addq %rax,%r13 - adcq $0,%rdx - addq %rbx,%r13 - adcq $0,%rdx - movq %rdx,%rbx - - movq 16(%rsi),%rax - mulq %rbp - addq %rax,%r14 - adcq $0,%rdx - addq %rbx,%r14 - adcq $0,%rdx - movq %rdx,%rbx - - movq 24(%rsi),%rax - mulq %rbp - addq %rax,%r15 - adcq $0,%rdx - addq %rbx,%r15 - adcq $0,%rdx - movq %rdx,%rbx - - movq 32(%rsi),%rax - mulq %rbp - addq %rax,%r8 - adcq $0,%rdx - addq %rbx,%r8 - adcq $0,%rdx - movq %rdx,%rbx - - movq 40(%rsi),%rax - mulq %rbp - addq %rax,%r9 - adcq $0,%rdx - addq %rbx,%r9 - adcq $0,%rdx - movq %rdx,%rbx - - movq 48(%rsi),%rax - mulq %rbp - addq %rax,%r10 - adcq $0,%rdx - addq %rbx,%r10 - adcq $0,%rdx - movq %rdx,%rbx - - movq 56(%rsi),%rax - mulq %rbp - addq %rax,%r11 - adcq $0,%rdx - addq %rbx,%r11 - adcq $0,%rdx - movq %rdx,%r12 - movq 32(%rdi),%rbp - movq 0(%rsi),%rax - mulq %rbp - addq %rax,%r13 - adcq $0,%rdx - movq %r13,552(%rsp) - movq %rdx,%rbx - - movq 8(%rsi),%rax - mulq %rbp - addq %rax,%r14 - adcq $0,%rdx - addq %rbx,%r14 - adcq $0,%rdx - movq %rdx,%rbx - - movq 16(%rsi),%rax - mulq %rbp - addq %rax,%r15 - adcq $0,%rdx - addq %rbx,%r15 - adcq $0,%rdx - movq %rdx,%rbx - - movq 24(%rsi),%rax - mulq %rbp - addq %rax,%r8 - adcq $0,%rdx - addq %rbx,%r8 - adcq $0,%rdx - movq %rdx,%rbx - - movq 32(%rsi),%rax - mulq %rbp - addq %rax,%r9 - adcq $0,%rdx - addq %rbx,%r9 - adcq $0,%rdx - movq %rdx,%rbx - - movq 40(%rsi),%rax - mulq %rbp - addq %rax,%r10 - adcq $0,%rdx - addq %rbx,%r10 - adcq $0,%rdx - movq %rdx,%rbx - - movq 48(%rsi),%rax - mulq %rbp - addq %rax,%r11 - adcq $0,%rdx - addq %rbx,%r11 - adcq $0,%rdx - movq %rdx,%rbx - - movq 56(%rsi),%rax - mulq %rbp - addq %rax,%r12 - adcq $0,%rdx - addq %rbx,%r12 - adcq $0,%rdx - movq %rdx,%r13 - movq 40(%rdi),%rbp - movq 0(%rsi),%rax - mulq %rbp - addq %rax,%r14 - adcq $0,%rdx - movq %r14,560(%rsp) - movq %rdx,%rbx - - movq 8(%rsi),%rax - mulq %rbp - addq %rax,%r15 - adcq $0,%rdx - addq %rbx,%r15 - adcq $0,%rdx - movq %rdx,%rbx - - movq 16(%rsi),%rax - mulq %rbp - addq %rax,%r8 - adcq $0,%rdx - addq %rbx,%r8 - adcq $0,%rdx - movq %rdx,%rbx - - movq 24(%rsi),%rax - mulq %rbp - addq %rax,%r9 - adcq $0,%rdx - addq %rbx,%r9 - adcq $0,%rdx - movq %rdx,%rbx - - movq 32(%rsi),%rax - mulq %rbp - addq %rax,%r10 - adcq $0,%rdx - addq %rbx,%r10 - adcq $0,%rdx - movq %rdx,%rbx - - movq 40(%rsi),%rax - mulq %rbp - addq %rax,%r11 - adcq $0,%rdx - addq %rbx,%r11 - adcq $0,%rdx - movq %rdx,%rbx - - movq 48(%rsi),%rax - mulq %rbp - addq %rax,%r12 - adcq $0,%rdx - addq %rbx,%r12 - adcq $0,%rdx - movq %rdx,%rbx - - movq 56(%rsi),%rax - mulq %rbp - addq %rax,%r13 - adcq $0,%rdx - addq %rbx,%r13 - adcq $0,%rdx - movq %rdx,%r14 - movq 48(%rdi),%rbp - movq 0(%rsi),%rax - mulq %rbp - addq %rax,%r15 - adcq $0,%rdx - movq %r15,568(%rsp) - movq %rdx,%rbx - - movq 8(%rsi),%rax - mulq %rbp - addq %rax,%r8 - adcq $0,%rdx - addq %rbx,%r8 - adcq $0,%rdx - movq %rdx,%rbx - - movq 16(%rsi),%rax - mulq %rbp - addq %rax,%r9 - adcq $0,%rdx - addq %rbx,%r9 - adcq $0,%rdx - movq %rdx,%rbx - - movq 24(%rsi),%rax - mulq %rbp - addq %rax,%r10 - adcq $0,%rdx - addq %rbx,%r10 - adcq $0,%rdx - movq %rdx,%rbx - - movq 32(%rsi),%rax - mulq %rbp - addq %rax,%r11 - adcq $0,%rdx - addq %rbx,%r11 - adcq $0,%rdx - movq %rdx,%rbx - - movq 40(%rsi),%rax - mulq %rbp - addq %rax,%r12 - adcq $0,%rdx - addq %rbx,%r12 - adcq $0,%rdx - movq %rdx,%rbx - - movq 48(%rsi),%rax - mulq %rbp - addq %rax,%r13 - adcq $0,%rdx - addq %rbx,%r13 - adcq $0,%rdx - movq %rdx,%rbx - - movq 56(%rsi),%rax - mulq %rbp - addq %rax,%r14 - adcq $0,%rdx - addq %rbx,%r14 - adcq $0,%rdx - movq %rdx,%r15 - movq 56(%rdi),%rbp - movq 0(%rsi),%rax - mulq %rbp - addq %rax,%r8 - adcq $0,%rdx - movq %r8,576(%rsp) - movq %rdx,%rbx - - movq 8(%rsi),%rax - mulq %rbp - addq %rax,%r9 - adcq $0,%rdx - addq %rbx,%r9 - adcq $0,%rdx - movq %rdx,%rbx - - movq 16(%rsi),%rax - mulq %rbp - addq %rax,%r10 - adcq $0,%rdx - addq %rbx,%r10 - adcq $0,%rdx - movq %rdx,%rbx - - movq 24(%rsi),%rax - mulq %rbp - addq %rax,%r11 - adcq $0,%rdx - addq %rbx,%r11 - adcq $0,%rdx - movq %rdx,%rbx - - movq 32(%rsi),%rax - mulq %rbp - addq %rax,%r12 - adcq $0,%rdx - addq %rbx,%r12 - adcq $0,%rdx - movq %rdx,%rbx - - movq 40(%rsi),%rax - mulq %rbp - addq %rax,%r13 - adcq $0,%rdx - addq %rbx,%r13 - adcq $0,%rdx - movq %rdx,%rbx - - movq 48(%rsi),%rax - mulq %rbp - addq %rax,%r14 - adcq $0,%rdx - addq %rbx,%r14 - adcq $0,%rdx - movq %rdx,%rbx - - movq 56(%rsi),%rax - mulq %rbp - addq %rax,%r15 - adcq $0,%rdx - addq %rbx,%r15 - adcq $0,%rdx - movq %rdx,%r8 - movq %r9,584(%rsp) - movq %r10,592(%rsp) - movq %r11,600(%rsp) - movq %r12,608(%rsp) - movq %r13,616(%rsp) - movq %r14,624(%rsp) - movq %r15,632(%rsp) - movq %r8,640(%rsp) - - - - - - jmp mont_reduce - - -.size mont_mul_a3b,.-mont_mul_a3b -.type sqr_reduce,@function -.align 16 -sqr_reduce: - movq 16(%rsp),%rcx - - - - movq %r10,%rbx - - movq %r11,%rax - mulq %rbx - movq %rax,528(%rsp) - movq %rdx,%r10 - movq %r12,%rax - mulq %rbx - addq %rax,%r10 - adcq $0,%rdx - movq %rdx,%r11 - movq %r13,%rax - mulq %rbx - addq %rax,%r11 - adcq $0,%rdx - movq %rdx,%r12 - movq %r14,%rax - mulq %rbx - addq %rax,%r12 - adcq $0,%rdx - movq %rdx,%r13 - movq %r15,%rax - mulq %rbx - addq %rax,%r13 - adcq $0,%rdx - movq %rdx,%r14 - movq %r8,%rax - mulq %rbx - addq %rax,%r14 - adcq $0,%rdx - movq %rdx,%r15 - movq %r9,%rax - mulq %rbx - addq %rax,%r15 - adcq $0,%rdx - movq %rdx,%rsi - - movq %r10,536(%rsp) - - - - - - movq 8(%rcx),%rbx - - movq 16(%rcx),%rax - mulq %rbx - addq %rax,%r11 - adcq $0,%rdx - movq %r11,544(%rsp) - - movq %rdx,%r10 - movq 24(%rcx),%rax - mulq %rbx - addq %rax,%r12 - adcq $0,%rdx - addq %r10,%r12 - adcq $0,%rdx - movq %r12,552(%rsp) - - movq %rdx,%r10 - movq 32(%rcx),%rax - mulq %rbx - addq %rax,%r13 - adcq $0,%rdx - addq %r10,%r13 - adcq $0,%rdx - - movq %rdx,%r10 - movq 40(%rcx),%rax - mulq %rbx - addq %rax,%r14 - adcq $0,%rdx - addq %r10,%r14 - adcq $0,%rdx - - movq %rdx,%r10 - movq %r8,%rax - mulq %rbx - addq %rax,%r15 - adcq $0,%rdx - addq %r10,%r15 - adcq $0,%rdx - - movq %rdx,%r10 - movq %r9,%rax - mulq %rbx - addq %rax,%rsi - adcq $0,%rdx - addq %r10,%rsi - adcq $0,%rdx - - movq %rdx,%r11 - - - - - movq 16(%rcx),%rbx - - movq 24(%rcx),%rax - mulq %rbx - addq %rax,%r13 - adcq $0,%rdx - movq %r13,560(%rsp) - - movq %rdx,%r10 - movq 32(%rcx),%rax - mulq %rbx - addq %rax,%r14 - adcq $0,%rdx - addq %r10,%r14 - adcq $0,%rdx - movq %r14,568(%rsp) - - movq %rdx,%r10 - movq 40(%rcx),%rax - mulq %rbx - addq %rax,%r15 - adcq $0,%rdx - addq %r10,%r15 - adcq $0,%rdx - - movq %rdx,%r10 - movq %r8,%rax - mulq %rbx - addq %rax,%rsi - adcq $0,%rdx - addq %r10,%rsi - adcq $0,%rdx - - movq %rdx,%r10 - movq %r9,%rax - mulq %rbx - addq %rax,%r11 - adcq $0,%rdx - addq %r10,%r11 - adcq $0,%rdx - - movq %rdx,%r12 - - - - - - movq 24(%rcx),%rbx - - movq 32(%rcx),%rax - mulq %rbx - addq %rax,%r15 - adcq $0,%rdx - movq %r15,576(%rsp) - - movq %rdx,%r10 - movq 40(%rcx),%rax - mulq %rbx - addq %rax,%rsi - adcq $0,%rdx - addq %r10,%rsi - adcq $0,%rdx - movq %rsi,584(%rsp) - - movq %rdx,%r10 - movq %r8,%rax - mulq %rbx - addq %rax,%r11 - adcq $0,%rdx - addq %r10,%r11 - adcq $0,%rdx - - movq %rdx,%r10 - movq %r9,%rax - mulq %rbx - addq %rax,%r12 - adcq $0,%rdx - addq %r10,%r12 - adcq $0,%rdx - - movq %rdx,%r15 - - - - - movq 32(%rcx),%rbx - - movq 40(%rcx),%rax - mulq %rbx - addq %rax,%r11 - adcq $0,%rdx - movq %r11,592(%rsp) - - movq %rdx,%r10 - movq %r8,%rax - mulq %rbx - addq %rax,%r12 - adcq $0,%rdx - addq %r10,%r12 - adcq $0,%rdx - movq %r12,600(%rsp) - - movq %rdx,%r10 - movq %r9,%rax - mulq %rbx - addq %rax,%r15 - adcq $0,%rdx - addq %r10,%r15 - adcq $0,%rdx - - movq %rdx,%r11 - - - - - movq 40(%rcx),%rbx - - movq %r8,%rax - mulq %rbx - addq %rax,%r15 - adcq $0,%rdx - movq %r15,608(%rsp) - - movq %rdx,%r10 - movq %r9,%rax - mulq %rbx - addq %rax,%r11 - adcq $0,%rdx - addq %r10,%r11 - adcq $0,%rdx - movq %r11,616(%rsp) - - movq %rdx,%r12 - - - - - movq %r8,%rbx - - movq %r9,%rax - mulq %rbx - addq %rax,%r12 - adcq $0,%rdx - movq %r12,624(%rsp) - - movq %rdx,632(%rsp) - - - movq 528(%rsp),%r10 - movq 536(%rsp),%r11 - movq 544(%rsp),%r12 - movq 552(%rsp),%r13 - movq 560(%rsp),%r14 - movq 568(%rsp),%r15 - - movq 24(%rcx),%rax - mulq %rax - movq %rax,%rdi - movq %rdx,%r8 - - addq %r10,%r10 - adcq %r11,%r11 - adcq %r12,%r12 - adcq %r13,%r13 - adcq %r14,%r14 - adcq %r15,%r15 - adcq $0,%r8 - - movq 0(%rcx),%rax - mulq %rax - movq %rax,520(%rsp) - movq %rdx,%rbx - - movq 8(%rcx),%rax - mulq %rax - - addq %rbx,%r10 - adcq %rax,%r11 - adcq $0,%rdx - - movq %rdx,%rbx - movq %r10,528(%rsp) - movq %r11,536(%rsp) - - movq 16(%rcx),%rax - mulq %rax - - addq %rbx,%r12 - adcq %rax,%r13 - adcq $0,%rdx - - movq %rdx,%rbx - - movq %r12,544(%rsp) - movq %r13,552(%rsp) - - xorq %rbp,%rbp - addq %rbx,%r14 - adcq %rdi,%r15 - adcq $0,%rbp - - movq %r14,560(%rsp) - movq %r15,568(%rsp) - - - - - movq 576(%rsp),%r10 - movq 584(%rsp),%r11 - movq 592(%rsp),%r12 - movq 600(%rsp),%r13 - movq 608(%rsp),%r14 - movq 616(%rsp),%r15 - movq 624(%rsp),%rdi - movq 632(%rsp),%rsi - - movq %r9,%rax - mulq %rax - movq %rax,%r9 - movq %rdx,%rbx - - addq %r10,%r10 - adcq %r11,%r11 - adcq %r12,%r12 - adcq %r13,%r13 - adcq %r14,%r14 - adcq %r15,%r15 - adcq %rdi,%rdi - adcq %rsi,%rsi - adcq $0,%rbx - - addq %rbp,%r10 - - movq 32(%rcx),%rax - mulq %rax - - addq %r8,%r10 - adcq %rax,%r11 - adcq $0,%rdx - - movq %rdx,%rbp - - movq %r10,576(%rsp) - movq %r11,584(%rsp) - - movq 40(%rcx),%rax - mulq %rax - - addq %rbp,%r12 - adcq %rax,%r13 - adcq $0,%rdx - - movq %rdx,%rbp - - movq %r12,592(%rsp) - movq %r13,600(%rsp) - - movq 48(%rcx),%rax - mulq %rax - - addq %rbp,%r14 - adcq %rax,%r15 - adcq $0,%rdx - - movq %r14,608(%rsp) - movq %r15,616(%rsp) - - addq %rdx,%rdi - adcq %r9,%rsi - adcq $0,%rbx - - movq %rdi,624(%rsp) - movq %rsi,632(%rsp) - movq %rbx,640(%rsp) - - jmp mont_reduce - - -.size sqr_reduce,.-sqr_reduce -.globl mod_exp_512 -.type mod_exp_512,@function -mod_exp_512: - pushq %rbp - pushq %rbx - pushq %r12 - pushq %r13 - pushq %r14 - pushq %r15 - - - movq %rsp,%r8 - subq $2688,%rsp - andq $-64,%rsp - - - movq %r8,0(%rsp) - movq %rdi,8(%rsp) - movq %rsi,16(%rsp) - movq %rcx,24(%rsp) -.Lbody: - - - - pxor %xmm4,%xmm4 - movdqu 0(%rsi),%xmm0 - movdqu 16(%rsi),%xmm1 - movdqu 32(%rsi),%xmm2 - movdqu 48(%rsi),%xmm3 - movdqa %xmm4,512(%rsp) - movdqa %xmm4,528(%rsp) - movdqa %xmm4,608(%rsp) - movdqa %xmm4,624(%rsp) - movdqa %xmm0,544(%rsp) - movdqa %xmm1,560(%rsp) - movdqa %xmm2,576(%rsp) - movdqa %xmm3,592(%rsp) - - - movdqu 0(%rdx),%xmm0 - movdqu 16(%rdx),%xmm1 - movdqu 32(%rdx),%xmm2 - movdqu 48(%rdx),%xmm3 - - leaq 384(%rsp),%rbx - movq %rbx,136(%rsp) - call mont_reduce - - - leaq 448(%rsp),%rcx - xorq %rax,%rax - movq %rax,0(%rcx) - movq %rax,8(%rcx) - movq %rax,24(%rcx) - movq %rax,32(%rcx) - movq %rax,40(%rcx) - movq %rax,48(%rcx) - movq %rax,56(%rcx) - movq %rax,128(%rsp) - movq $1,16(%rcx) - - leaq 640(%rsp),%rbp - movq %rcx,%rsi - movq %rbp,%rdi - movq $8,%rax -loop_0: - movq (%rcx),%rbx - movw %bx,(%rdi) - shrq $16,%rbx - movw %bx,64(%rdi) - shrq $16,%rbx - movw %bx,128(%rdi) - shrq $16,%rbx - movw %bx,192(%rdi) - leaq 8(%rcx),%rcx - leaq 256(%rdi),%rdi - decq %rax - jnz loop_0 - movq $31,%rax - movq %rax,32(%rsp) - movq %rbp,40(%rsp) - - movq %rsi,136(%rsp) - movq 0(%rsi),%r10 - movq 8(%rsi),%r11 - movq 16(%rsi),%r12 - movq 24(%rsi),%r13 - movq 32(%rsi),%r14 - movq 40(%rsi),%r15 - movq 48(%rsi),%r8 - movq 56(%rsi),%r9 -init_loop: - leaq 384(%rsp),%rdi - call mont_mul_a3b - leaq 448(%rsp),%rsi - movq 40(%rsp),%rbp - addq $2,%rbp - movq %rbp,40(%rsp) - movq %rsi,%rcx - movq $8,%rax -loop_1: - movq (%rcx),%rbx - movw %bx,(%rbp) - shrq $16,%rbx - movw %bx,64(%rbp) - shrq $16,%rbx - movw %bx,128(%rbp) - shrq $16,%rbx - movw %bx,192(%rbp) - leaq 8(%rcx),%rcx - leaq 256(%rbp),%rbp - decq %rax - jnz loop_1 - movq 32(%rsp),%rax - subq $1,%rax - movq %rax,32(%rsp) - jne init_loop - - - - movdqa %xmm0,64(%rsp) - movdqa %xmm1,80(%rsp) - movdqa %xmm2,96(%rsp) - movdqa %xmm3,112(%rsp) - - - - - - movl 126(%rsp),%eax - movq %rax,%rdx - shrq $11,%rax - andl $2047,%edx - movl %edx,126(%rsp) - leaq 640(%rsp,%rax,2),%rsi - movq 8(%rsp),%rdx - movq $4,%rbp -loop_2: - movzwq 192(%rsi),%rbx - movzwq 448(%rsi),%rax - shlq $16,%rbx - shlq $16,%rax - movw 128(%rsi),%bx - movw 384(%rsi),%ax - shlq $16,%rbx - shlq $16,%rax - movw 64(%rsi),%bx - movw 320(%rsi),%ax - shlq $16,%rbx - shlq $16,%rax - movw 0(%rsi),%bx - movw 256(%rsi),%ax - movq %rbx,0(%rdx) - movq %rax,8(%rdx) - leaq 512(%rsi),%rsi - leaq 16(%rdx),%rdx - subq $1,%rbp - jnz loop_2 - movq $505,48(%rsp) - - movq 8(%rsp),%rcx - movq %rcx,136(%rsp) - movq 0(%rcx),%r10 - movq 8(%rcx),%r11 - movq 16(%rcx),%r12 - movq 24(%rcx),%r13 - movq 32(%rcx),%r14 - movq 40(%rcx),%r15 - movq 48(%rcx),%r8 - movq 56(%rcx),%r9 - jmp sqr_2 - -main_loop_a3b: - call sqr_reduce - call sqr_reduce - call sqr_reduce -sqr_2: - call sqr_reduce - call sqr_reduce - - - - movq 48(%rsp),%rcx - movq %rcx,%rax - shrq $4,%rax - movl 64(%rsp,%rax,2),%edx - andq $15,%rcx - shrq %cl,%rdx - andq $31,%rdx - - leaq 640(%rsp,%rdx,2),%rsi - leaq 448(%rsp),%rdx - movq %rdx,%rdi - movq $4,%rbp -loop_3: - movzwq 192(%rsi),%rbx - movzwq 448(%rsi),%rax - shlq $16,%rbx - shlq $16,%rax - movw 128(%rsi),%bx - movw 384(%rsi),%ax - shlq $16,%rbx - shlq $16,%rax - movw 64(%rsi),%bx - movw 320(%rsi),%ax - shlq $16,%rbx - shlq $16,%rax - movw 0(%rsi),%bx - movw 256(%rsi),%ax - movq %rbx,0(%rdx) - movq %rax,8(%rdx) - leaq 512(%rsi),%rsi - leaq 16(%rdx),%rdx - subq $1,%rbp - jnz loop_3 - movq 8(%rsp),%rsi - call mont_mul_a3b - - - - movq 48(%rsp),%rcx - subq $5,%rcx - movq %rcx,48(%rsp) - jge main_loop_a3b - - - -end_main_loop_a3b: - - - movq 8(%rsp),%rdx - pxor %xmm4,%xmm4 - movdqu 0(%rdx),%xmm0 - movdqu 16(%rdx),%xmm1 - movdqu 32(%rdx),%xmm2 - movdqu 48(%rdx),%xmm3 - movdqa %xmm4,576(%rsp) - movdqa %xmm4,592(%rsp) - movdqa %xmm4,608(%rsp) - movdqa %xmm4,624(%rsp) - movdqa %xmm0,512(%rsp) - movdqa %xmm1,528(%rsp) - movdqa %xmm2,544(%rsp) - movdqa %xmm3,560(%rsp) - call mont_reduce - - - - movq 8(%rsp),%rax - movq 0(%rax),%r8 - movq 8(%rax),%r9 - movq 16(%rax),%r10 - movq 24(%rax),%r11 - movq 32(%rax),%r12 - movq 40(%rax),%r13 - movq 48(%rax),%r14 - movq 56(%rax),%r15 - - - movq 24(%rsp),%rbx - addq $512,%rbx - - subq 0(%rbx),%r8 - sbbq 8(%rbx),%r9 - sbbq 16(%rbx),%r10 - sbbq 24(%rbx),%r11 - sbbq 32(%rbx),%r12 - sbbq 40(%rbx),%r13 - sbbq 48(%rbx),%r14 - sbbq 56(%rbx),%r15 - - - movq 0(%rax),%rsi - movq 8(%rax),%rdi - movq 16(%rax),%rcx - movq 24(%rax),%rdx - cmovncq %r8,%rsi - cmovncq %r9,%rdi - cmovncq %r10,%rcx - cmovncq %r11,%rdx - movq %rsi,0(%rax) - movq %rdi,8(%rax) - movq %rcx,16(%rax) - movq %rdx,24(%rax) - - movq 32(%rax),%rsi - movq 40(%rax),%rdi - movq 48(%rax),%rcx - movq 56(%rax),%rdx - cmovncq %r12,%rsi - cmovncq %r13,%rdi - cmovncq %r14,%rcx - cmovncq %r15,%rdx - movq %rsi,32(%rax) - movq %rdi,40(%rax) - movq %rcx,48(%rax) - movq %rdx,56(%rax) - - movq 0(%rsp),%rsi - movq 0(%rsi),%r15 - movq 8(%rsi),%r14 - movq 16(%rsi),%r13 - movq 24(%rsi),%r12 - movq 32(%rsi),%rbx - movq 40(%rsi),%rbp - leaq 48(%rsi),%rsp -.Lepilogue: - .byte 0xf3,0xc3 -.size mod_exp_512, . - mod_exp_512 diff --git a/deps/openssl/asm/x64-elf-gas/bn/rsaz-avx2.s b/deps/openssl/asm/x64-elf-gas/bn/rsaz-avx2.s new file mode 100644 index 00000000000000..8f356fc3d51a4e --- /dev/null +++ b/deps/openssl/asm/x64-elf-gas/bn/rsaz-avx2.s @@ -0,0 +1,1632 @@ +.text + +.globl rsaz_1024_sqr_avx2 +.type rsaz_1024_sqr_avx2,@function +.align 64 +rsaz_1024_sqr_avx2: + leaq (%rsp),%rax + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + vzeroupper + movq %rax,%rbp + movq %rdx,%r13 + subq $832,%rsp + movq %r13,%r15 + subq $-128,%rdi + subq $-128,%rsi + subq $-128,%r13 + + andq $4095,%r15 + addq $320,%r15 + shrq $12,%r15 + vpxor %ymm9,%ymm9,%ymm9 + jz .Lsqr_1024_no_n_copy + + + + + + subq $320,%rsp + vmovdqu 0-128(%r13),%ymm0 + andq $-2048,%rsp + vmovdqu 32-128(%r13),%ymm1 + vmovdqu 64-128(%r13),%ymm2 + vmovdqu 96-128(%r13),%ymm3 + vmovdqu 128-128(%r13),%ymm4 + vmovdqu 160-128(%r13),%ymm5 + vmovdqu 192-128(%r13),%ymm6 + vmovdqu 224-128(%r13),%ymm7 + vmovdqu 256-128(%r13),%ymm8 + leaq 832+128(%rsp),%r13 + vmovdqu %ymm0,0-128(%r13) + vmovdqu %ymm1,32-128(%r13) + vmovdqu %ymm2,64-128(%r13) + vmovdqu %ymm3,96-128(%r13) + vmovdqu %ymm4,128-128(%r13) + vmovdqu %ymm5,160-128(%r13) + vmovdqu %ymm6,192-128(%r13) + vmovdqu %ymm7,224-128(%r13) + vmovdqu %ymm8,256-128(%r13) + vmovdqu %ymm9,288-128(%r13) + +.Lsqr_1024_no_n_copy: + andq $-1024,%rsp + + vmovdqu 32-128(%rsi),%ymm1 + vmovdqu 64-128(%rsi),%ymm2 + vmovdqu 96-128(%rsi),%ymm3 + vmovdqu 128-128(%rsi),%ymm4 + vmovdqu 160-128(%rsi),%ymm5 + vmovdqu 192-128(%rsi),%ymm6 + vmovdqu 224-128(%rsi),%ymm7 + vmovdqu 256-128(%rsi),%ymm8 + + leaq 192(%rsp),%rbx + vpbroadcastq .Land_mask(%rip),%ymm15 + jmp .LOOP_GRANDE_SQR_1024 + +.align 32 +.LOOP_GRANDE_SQR_1024: + leaq 576+128(%rsp),%r9 + leaq 448(%rsp),%r12 + + + + + vpaddq %ymm1,%ymm1,%ymm1 + vpbroadcastq 0-128(%rsi),%ymm10 + vpaddq %ymm2,%ymm2,%ymm2 + vmovdqa %ymm1,0-128(%r9) + vpaddq %ymm3,%ymm3,%ymm3 + vmovdqa %ymm2,32-128(%r9) + vpaddq %ymm4,%ymm4,%ymm4 + vmovdqa %ymm3,64-128(%r9) + vpaddq %ymm5,%ymm5,%ymm5 + vmovdqa %ymm4,96-128(%r9) + vpaddq %ymm6,%ymm6,%ymm6 + vmovdqa %ymm5,128-128(%r9) + vpaddq %ymm7,%ymm7,%ymm7 + vmovdqa %ymm6,160-128(%r9) + vpaddq %ymm8,%ymm8,%ymm8 + vmovdqa %ymm7,192-128(%r9) + vpxor %ymm9,%ymm9,%ymm9 + vmovdqa %ymm8,224-128(%r9) + + vpmuludq 0-128(%rsi),%ymm10,%ymm0 + vpbroadcastq 32-128(%rsi),%ymm11 + vmovdqu %ymm9,288-192(%rbx) + vpmuludq %ymm10,%ymm1,%ymm1 + vmovdqu %ymm9,320-448(%r12) + vpmuludq %ymm10,%ymm2,%ymm2 + vmovdqu %ymm9,352-448(%r12) + vpmuludq %ymm10,%ymm3,%ymm3 + vmovdqu %ymm9,384-448(%r12) + vpmuludq %ymm10,%ymm4,%ymm4 + vmovdqu %ymm9,416-448(%r12) + vpmuludq %ymm10,%ymm5,%ymm5 + vmovdqu %ymm9,448-448(%r12) + vpmuludq %ymm10,%ymm6,%ymm6 + vmovdqu %ymm9,480-448(%r12) + vpmuludq %ymm10,%ymm7,%ymm7 + vmovdqu %ymm9,512-448(%r12) + vpmuludq %ymm10,%ymm8,%ymm8 + vpbroadcastq 64-128(%rsi),%ymm10 + vmovdqu %ymm9,544-448(%r12) + + movq %rsi,%r15 + movl $4,%r14d + jmp .Lsqr_entry_1024 +.align 32 +.LOOP_SQR_1024: + vpbroadcastq 32-128(%r15),%ymm11 + vpmuludq 0-128(%rsi),%ymm10,%ymm0 + vpaddq 0-192(%rbx),%ymm0,%ymm0 + vpmuludq 0-128(%r9),%ymm10,%ymm1 + vpaddq 32-192(%rbx),%ymm1,%ymm1 + vpmuludq 32-128(%r9),%ymm10,%ymm2 + vpaddq 64-192(%rbx),%ymm2,%ymm2 + vpmuludq 64-128(%r9),%ymm10,%ymm3 + vpaddq 96-192(%rbx),%ymm3,%ymm3 + vpmuludq 96-128(%r9),%ymm10,%ymm4 + vpaddq 128-192(%rbx),%ymm4,%ymm4 + vpmuludq 128-128(%r9),%ymm10,%ymm5 + vpaddq 160-192(%rbx),%ymm5,%ymm5 + vpmuludq 160-128(%r9),%ymm10,%ymm6 + vpaddq 192-192(%rbx),%ymm6,%ymm6 + vpmuludq 192-128(%r9),%ymm10,%ymm7 + vpaddq 224-192(%rbx),%ymm7,%ymm7 + vpmuludq 224-128(%r9),%ymm10,%ymm8 + vpbroadcastq 64-128(%r15),%ymm10 + vpaddq 256-192(%rbx),%ymm8,%ymm8 +.Lsqr_entry_1024: + vmovdqu %ymm0,0-192(%rbx) + vmovdqu %ymm1,32-192(%rbx) + + vpmuludq 32-128(%rsi),%ymm11,%ymm12 + vpaddq %ymm12,%ymm2,%ymm2 + vpmuludq 32-128(%r9),%ymm11,%ymm14 + vpaddq %ymm14,%ymm3,%ymm3 + vpmuludq 64-128(%r9),%ymm11,%ymm13 + vpaddq %ymm13,%ymm4,%ymm4 + vpmuludq 96-128(%r9),%ymm11,%ymm12 + vpaddq %ymm12,%ymm5,%ymm5 + vpmuludq 128-128(%r9),%ymm11,%ymm14 + vpaddq %ymm14,%ymm6,%ymm6 + vpmuludq 160-128(%r9),%ymm11,%ymm13 + vpaddq %ymm13,%ymm7,%ymm7 + vpmuludq 192-128(%r9),%ymm11,%ymm12 + vpaddq %ymm12,%ymm8,%ymm8 + vpmuludq 224-128(%r9),%ymm11,%ymm0 + vpbroadcastq 96-128(%r15),%ymm11 + vpaddq 288-192(%rbx),%ymm0,%ymm0 + + vmovdqu %ymm2,64-192(%rbx) + vmovdqu %ymm3,96-192(%rbx) + + vpmuludq 64-128(%rsi),%ymm10,%ymm13 + vpaddq %ymm13,%ymm4,%ymm4 + vpmuludq 64-128(%r9),%ymm10,%ymm12 + vpaddq %ymm12,%ymm5,%ymm5 + vpmuludq 96-128(%r9),%ymm10,%ymm14 + vpaddq %ymm14,%ymm6,%ymm6 + vpmuludq 128-128(%r9),%ymm10,%ymm13 + vpaddq %ymm13,%ymm7,%ymm7 + vpmuludq 160-128(%r9),%ymm10,%ymm12 + vpaddq %ymm12,%ymm8,%ymm8 + vpmuludq 192-128(%r9),%ymm10,%ymm14 + vpaddq %ymm14,%ymm0,%ymm0 + vpmuludq 224-128(%r9),%ymm10,%ymm1 + vpbroadcastq 128-128(%r15),%ymm10 + vpaddq 320-448(%r12),%ymm1,%ymm1 + + vmovdqu %ymm4,128-192(%rbx) + vmovdqu %ymm5,160-192(%rbx) + + vpmuludq 96-128(%rsi),%ymm11,%ymm12 + vpaddq %ymm12,%ymm6,%ymm6 + vpmuludq 96-128(%r9),%ymm11,%ymm14 + vpaddq %ymm14,%ymm7,%ymm7 + vpmuludq 128-128(%r9),%ymm11,%ymm13 + vpaddq %ymm13,%ymm8,%ymm8 + vpmuludq 160-128(%r9),%ymm11,%ymm12 + vpaddq %ymm12,%ymm0,%ymm0 + vpmuludq 192-128(%r9),%ymm11,%ymm14 + vpaddq %ymm14,%ymm1,%ymm1 + vpmuludq 224-128(%r9),%ymm11,%ymm2 + vpbroadcastq 160-128(%r15),%ymm11 + vpaddq 352-448(%r12),%ymm2,%ymm2 + + vmovdqu %ymm6,192-192(%rbx) + vmovdqu %ymm7,224-192(%rbx) + + vpmuludq 128-128(%rsi),%ymm10,%ymm12 + vpaddq %ymm12,%ymm8,%ymm8 + vpmuludq 128-128(%r9),%ymm10,%ymm14 + vpaddq %ymm14,%ymm0,%ymm0 + vpmuludq 160-128(%r9),%ymm10,%ymm13 + vpaddq %ymm13,%ymm1,%ymm1 + vpmuludq 192-128(%r9),%ymm10,%ymm12 + vpaddq %ymm12,%ymm2,%ymm2 + vpmuludq 224-128(%r9),%ymm10,%ymm3 + vpbroadcastq 192-128(%r15),%ymm10 + vpaddq 384-448(%r12),%ymm3,%ymm3 + + vmovdqu %ymm8,256-192(%rbx) + vmovdqu %ymm0,288-192(%rbx) + leaq 8(%rbx),%rbx + + vpmuludq 160-128(%rsi),%ymm11,%ymm13 + vpaddq %ymm13,%ymm1,%ymm1 + vpmuludq 160-128(%r9),%ymm11,%ymm12 + vpaddq %ymm12,%ymm2,%ymm2 + vpmuludq 192-128(%r9),%ymm11,%ymm14 + vpaddq %ymm14,%ymm3,%ymm3 + vpmuludq 224-128(%r9),%ymm11,%ymm4 + vpbroadcastq 224-128(%r15),%ymm11 + vpaddq 416-448(%r12),%ymm4,%ymm4 + + vmovdqu %ymm1,320-448(%r12) + vmovdqu %ymm2,352-448(%r12) + + vpmuludq 192-128(%rsi),%ymm10,%ymm12 + vpaddq %ymm12,%ymm3,%ymm3 + vpmuludq 192-128(%r9),%ymm10,%ymm14 + vpbroadcastq 256-128(%r15),%ymm0 + vpaddq %ymm14,%ymm4,%ymm4 + vpmuludq 224-128(%r9),%ymm10,%ymm5 + vpbroadcastq 0+8-128(%r15),%ymm10 + vpaddq 448-448(%r12),%ymm5,%ymm5 + + vmovdqu %ymm3,384-448(%r12) + vmovdqu %ymm4,416-448(%r12) + leaq 8(%r15),%r15 + + vpmuludq 224-128(%rsi),%ymm11,%ymm12 + vpaddq %ymm12,%ymm5,%ymm5 + vpmuludq 224-128(%r9),%ymm11,%ymm6 + vpaddq 480-448(%r12),%ymm6,%ymm6 + + vpmuludq 256-128(%rsi),%ymm0,%ymm7 + vmovdqu %ymm5,448-448(%r12) + vpaddq 512-448(%r12),%ymm7,%ymm7 + vmovdqu %ymm6,480-448(%r12) + vmovdqu %ymm7,512-448(%r12) + leaq 8(%r12),%r12 + + decl %r14d + jnz .LOOP_SQR_1024 + + vmovdqu 256(%rsp),%ymm8 + vmovdqu 288(%rsp),%ymm1 + vmovdqu 320(%rsp),%ymm2 + leaq 192(%rsp),%rbx + + vpsrlq $29,%ymm8,%ymm14 + vpand %ymm15,%ymm8,%ymm8 + vpsrlq $29,%ymm1,%ymm11 + vpand %ymm15,%ymm1,%ymm1 + + vpermq $147,%ymm14,%ymm14 + vpxor %ymm9,%ymm9,%ymm9 + vpermq $147,%ymm11,%ymm11 + + vpblendd $3,%ymm9,%ymm14,%ymm10 + vpblendd $3,%ymm14,%ymm11,%ymm14 + vpaddq %ymm10,%ymm8,%ymm8 + vpblendd $3,%ymm11,%ymm9,%ymm11 + vpaddq %ymm14,%ymm1,%ymm1 + vpaddq %ymm11,%ymm2,%ymm2 + vmovdqu %ymm1,288-192(%rbx) + vmovdqu %ymm2,320-192(%rbx) + + movq (%rsp),%rax + movq 8(%rsp),%r10 + movq 16(%rsp),%r11 + movq 24(%rsp),%r12 + vmovdqu 32(%rsp),%ymm1 + vmovdqu 64-192(%rbx),%ymm2 + vmovdqu 96-192(%rbx),%ymm3 + vmovdqu 128-192(%rbx),%ymm4 + vmovdqu 160-192(%rbx),%ymm5 + vmovdqu 192-192(%rbx),%ymm6 + vmovdqu 224-192(%rbx),%ymm7 + + movq %rax,%r9 + imull %ecx,%eax + andl $536870911,%eax + vmovd %eax,%xmm12 + + movq %rax,%rdx + imulq -128(%r13),%rax + vpbroadcastq %xmm12,%ymm12 + addq %rax,%r9 + movq %rdx,%rax + imulq 8-128(%r13),%rax + shrq $29,%r9 + addq %rax,%r10 + movq %rdx,%rax + imulq 16-128(%r13),%rax + addq %r9,%r10 + addq %rax,%r11 + imulq 24-128(%r13),%rdx + addq %rdx,%r12 + + movq %r10,%rax + imull %ecx,%eax + andl $536870911,%eax + + movl $9,%r14d + jmp .LOOP_REDUCE_1024 + +.align 32 +.LOOP_REDUCE_1024: + vmovd %eax,%xmm13 + vpbroadcastq %xmm13,%ymm13 + + vpmuludq 32-128(%r13),%ymm12,%ymm10 + movq %rax,%rdx + imulq -128(%r13),%rax + vpaddq %ymm10,%ymm1,%ymm1 + addq %rax,%r10 + vpmuludq 64-128(%r13),%ymm12,%ymm14 + movq %rdx,%rax + imulq 8-128(%r13),%rax + vpaddq %ymm14,%ymm2,%ymm2 + vpmuludq 96-128(%r13),%ymm12,%ymm11 +.byte 0x67 + addq %rax,%r11 +.byte 0x67 + movq %rdx,%rax + imulq 16-128(%r13),%rax + shrq $29,%r10 + vpaddq %ymm11,%ymm3,%ymm3 + vpmuludq 128-128(%r13),%ymm12,%ymm10 + addq %rax,%r12 + addq %r10,%r11 + vpaddq %ymm10,%ymm4,%ymm4 + vpmuludq 160-128(%r13),%ymm12,%ymm14 + movq %r11,%rax + imull %ecx,%eax + vpaddq %ymm14,%ymm5,%ymm5 + vpmuludq 192-128(%r13),%ymm12,%ymm11 + andl $536870911,%eax + vpaddq %ymm11,%ymm6,%ymm6 + vpmuludq 224-128(%r13),%ymm12,%ymm10 + vpaddq %ymm10,%ymm7,%ymm7 + vpmuludq 256-128(%r13),%ymm12,%ymm14 + vmovd %eax,%xmm12 + + vpaddq %ymm14,%ymm8,%ymm8 + + vpbroadcastq %xmm12,%ymm12 + + vpmuludq 32-8-128(%r13),%ymm13,%ymm11 + vmovdqu 96-8-128(%r13),%ymm14 + movq %rax,%rdx + imulq -128(%r13),%rax + vpaddq %ymm11,%ymm1,%ymm1 + vpmuludq 64-8-128(%r13),%ymm13,%ymm10 + vmovdqu 128-8-128(%r13),%ymm11 + addq %rax,%r11 + movq %rdx,%rax + imulq 8-128(%r13),%rax + vpaddq %ymm10,%ymm2,%ymm2 + addq %r12,%rax + shrq $29,%r11 + vpmuludq %ymm13,%ymm14,%ymm14 + vmovdqu 160-8-128(%r13),%ymm10 + addq %r11,%rax + vpaddq %ymm14,%ymm3,%ymm3 + vpmuludq %ymm13,%ymm11,%ymm11 + vmovdqu 192-8-128(%r13),%ymm14 +.byte 0x67 + movq %rax,%r12 + imull %ecx,%eax + vpaddq %ymm11,%ymm4,%ymm4 + vpmuludq %ymm13,%ymm10,%ymm10 +.byte 0xc4,0x41,0x7e,0x6f,0x9d,0x58,0x00,0x00,0x00 + andl $536870911,%eax + vpaddq %ymm10,%ymm5,%ymm5 + vpmuludq %ymm13,%ymm14,%ymm14 + vmovdqu 256-8-128(%r13),%ymm10 + vpaddq %ymm14,%ymm6,%ymm6 + vpmuludq %ymm13,%ymm11,%ymm11 + vmovdqu 288-8-128(%r13),%ymm9 + vmovd %eax,%xmm0 + imulq -128(%r13),%rax + vpaddq %ymm11,%ymm7,%ymm7 + vpmuludq %ymm13,%ymm10,%ymm10 + vmovdqu 32-16-128(%r13),%ymm14 + vpbroadcastq %xmm0,%ymm0 + vpaddq %ymm10,%ymm8,%ymm8 + vpmuludq %ymm13,%ymm9,%ymm9 + vmovdqu 64-16-128(%r13),%ymm11 + addq %rax,%r12 + + vmovdqu 32-24-128(%r13),%ymm13 + vpmuludq %ymm12,%ymm14,%ymm14 + vmovdqu 96-16-128(%r13),%ymm10 + vpaddq %ymm14,%ymm1,%ymm1 + vpmuludq %ymm0,%ymm13,%ymm13 + vpmuludq %ymm12,%ymm11,%ymm11 +.byte 0xc4,0x41,0x7e,0x6f,0xb5,0xf0,0xff,0xff,0xff + vpaddq %ymm1,%ymm13,%ymm13 + vpaddq %ymm11,%ymm2,%ymm2 + vpmuludq %ymm12,%ymm10,%ymm10 + vmovdqu 160-16-128(%r13),%ymm11 +.byte 0x67 + vmovq %xmm13,%rax + vmovdqu %ymm13,(%rsp) + vpaddq %ymm10,%ymm3,%ymm3 + vpmuludq %ymm12,%ymm14,%ymm14 + vmovdqu 192-16-128(%r13),%ymm10 + vpaddq %ymm14,%ymm4,%ymm4 + vpmuludq %ymm12,%ymm11,%ymm11 + vmovdqu 224-16-128(%r13),%ymm14 + vpaddq %ymm11,%ymm5,%ymm5 + vpmuludq %ymm12,%ymm10,%ymm10 + vmovdqu 256-16-128(%r13),%ymm11 + vpaddq %ymm10,%ymm6,%ymm6 + vpmuludq %ymm12,%ymm14,%ymm14 + shrq $29,%r12 + vmovdqu 288-16-128(%r13),%ymm10 + addq %r12,%rax + vpaddq %ymm14,%ymm7,%ymm7 + vpmuludq %ymm12,%ymm11,%ymm11 + + movq %rax,%r9 + imull %ecx,%eax + vpaddq %ymm11,%ymm8,%ymm8 + vpmuludq %ymm12,%ymm10,%ymm10 + andl $536870911,%eax + vmovd %eax,%xmm12 + vmovdqu 96-24-128(%r13),%ymm11 +.byte 0x67 + vpaddq %ymm10,%ymm9,%ymm9 + vpbroadcastq %xmm12,%ymm12 + + vpmuludq 64-24-128(%r13),%ymm0,%ymm14 + vmovdqu 128-24-128(%r13),%ymm10 + movq %rax,%rdx + imulq -128(%r13),%rax + movq 8(%rsp),%r10 + vpaddq %ymm14,%ymm2,%ymm1 + vpmuludq %ymm0,%ymm11,%ymm11 + vmovdqu 160-24-128(%r13),%ymm14 + addq %rax,%r9 + movq %rdx,%rax + imulq 8-128(%r13),%rax +.byte 0x67 + shrq $29,%r9 + movq 16(%rsp),%r11 + vpaddq %ymm11,%ymm3,%ymm2 + vpmuludq %ymm0,%ymm10,%ymm10 + vmovdqu 192-24-128(%r13),%ymm11 + addq %rax,%r10 + movq %rdx,%rax + imulq 16-128(%r13),%rax + vpaddq %ymm10,%ymm4,%ymm3 + vpmuludq %ymm0,%ymm14,%ymm14 + vmovdqu 224-24-128(%r13),%ymm10 + imulq 24-128(%r13),%rdx + addq %rax,%r11 + leaq (%r9,%r10,1),%rax + vpaddq %ymm14,%ymm5,%ymm4 + vpmuludq %ymm0,%ymm11,%ymm11 + vmovdqu 256-24-128(%r13),%ymm14 + movq %rax,%r10 + imull %ecx,%eax + vpmuludq %ymm0,%ymm10,%ymm10 + vpaddq %ymm11,%ymm6,%ymm5 + vmovdqu 288-24-128(%r13),%ymm11 + andl $536870911,%eax + vpaddq %ymm10,%ymm7,%ymm6 + vpmuludq %ymm0,%ymm14,%ymm14 + addq 24(%rsp),%rdx + vpaddq %ymm14,%ymm8,%ymm7 + vpmuludq %ymm0,%ymm11,%ymm11 + vpaddq %ymm11,%ymm9,%ymm8 + vmovq %r12,%xmm9 + movq %rdx,%r12 + + decl %r14d + jnz .LOOP_REDUCE_1024 + leaq 448(%rsp),%r12 + vpaddq %ymm9,%ymm13,%ymm0 + vpxor %ymm9,%ymm9,%ymm9 + + vpaddq 288-192(%rbx),%ymm0,%ymm0 + vpaddq 320-448(%r12),%ymm1,%ymm1 + vpaddq 352-448(%r12),%ymm2,%ymm2 + vpaddq 384-448(%r12),%ymm3,%ymm3 + vpaddq 416-448(%r12),%ymm4,%ymm4 + vpaddq 448-448(%r12),%ymm5,%ymm5 + vpaddq 480-448(%r12),%ymm6,%ymm6 + vpaddq 512-448(%r12),%ymm7,%ymm7 + vpaddq 544-448(%r12),%ymm8,%ymm8 + + vpsrlq $29,%ymm0,%ymm14 + vpand %ymm15,%ymm0,%ymm0 + vpsrlq $29,%ymm1,%ymm11 + vpand %ymm15,%ymm1,%ymm1 + vpsrlq $29,%ymm2,%ymm12 + vpermq $147,%ymm14,%ymm14 + vpand %ymm15,%ymm2,%ymm2 + vpsrlq $29,%ymm3,%ymm13 + vpermq $147,%ymm11,%ymm11 + vpand %ymm15,%ymm3,%ymm3 + vpermq $147,%ymm12,%ymm12 + + vpblendd $3,%ymm9,%ymm14,%ymm10 + vpermq $147,%ymm13,%ymm13 + vpblendd $3,%ymm14,%ymm11,%ymm14 + vpaddq %ymm10,%ymm0,%ymm0 + vpblendd $3,%ymm11,%ymm12,%ymm11 + vpaddq %ymm14,%ymm1,%ymm1 + vpblendd $3,%ymm12,%ymm13,%ymm12 + vpaddq %ymm11,%ymm2,%ymm2 + vpblendd $3,%ymm13,%ymm9,%ymm13 + vpaddq %ymm12,%ymm3,%ymm3 + vpaddq %ymm13,%ymm4,%ymm4 + + vpsrlq $29,%ymm0,%ymm14 + vpand %ymm15,%ymm0,%ymm0 + vpsrlq $29,%ymm1,%ymm11 + vpand %ymm15,%ymm1,%ymm1 + vpsrlq $29,%ymm2,%ymm12 + vpermq $147,%ymm14,%ymm14 + vpand %ymm15,%ymm2,%ymm2 + vpsrlq $29,%ymm3,%ymm13 + vpermq $147,%ymm11,%ymm11 + vpand %ymm15,%ymm3,%ymm3 + vpermq $147,%ymm12,%ymm12 + + vpblendd $3,%ymm9,%ymm14,%ymm10 + vpermq $147,%ymm13,%ymm13 + vpblendd $3,%ymm14,%ymm11,%ymm14 + vpaddq %ymm10,%ymm0,%ymm0 + vpblendd $3,%ymm11,%ymm12,%ymm11 + vpaddq %ymm14,%ymm1,%ymm1 + vmovdqu %ymm0,0-128(%rdi) + vpblendd $3,%ymm12,%ymm13,%ymm12 + vpaddq %ymm11,%ymm2,%ymm2 + vmovdqu %ymm1,32-128(%rdi) + vpblendd $3,%ymm13,%ymm9,%ymm13 + vpaddq %ymm12,%ymm3,%ymm3 + vmovdqu %ymm2,64-128(%rdi) + vpaddq %ymm13,%ymm4,%ymm4 + vmovdqu %ymm3,96-128(%rdi) + vpsrlq $29,%ymm4,%ymm14 + vpand %ymm15,%ymm4,%ymm4 + vpsrlq $29,%ymm5,%ymm11 + vpand %ymm15,%ymm5,%ymm5 + vpsrlq $29,%ymm6,%ymm12 + vpermq $147,%ymm14,%ymm14 + vpand %ymm15,%ymm6,%ymm6 + vpsrlq $29,%ymm7,%ymm13 + vpermq $147,%ymm11,%ymm11 + vpand %ymm15,%ymm7,%ymm7 + vpsrlq $29,%ymm8,%ymm0 + vpermq $147,%ymm12,%ymm12 + vpand %ymm15,%ymm8,%ymm8 + vpermq $147,%ymm13,%ymm13 + + vpblendd $3,%ymm9,%ymm14,%ymm10 + vpermq $147,%ymm0,%ymm0 + vpblendd $3,%ymm14,%ymm11,%ymm14 + vpaddq %ymm10,%ymm4,%ymm4 + vpblendd $3,%ymm11,%ymm12,%ymm11 + vpaddq %ymm14,%ymm5,%ymm5 + vpblendd $3,%ymm12,%ymm13,%ymm12 + vpaddq %ymm11,%ymm6,%ymm6 + vpblendd $3,%ymm13,%ymm0,%ymm13 + vpaddq %ymm12,%ymm7,%ymm7 + vpaddq %ymm13,%ymm8,%ymm8 + + vpsrlq $29,%ymm4,%ymm14 + vpand %ymm15,%ymm4,%ymm4 + vpsrlq $29,%ymm5,%ymm11 + vpand %ymm15,%ymm5,%ymm5 + vpsrlq $29,%ymm6,%ymm12 + vpermq $147,%ymm14,%ymm14 + vpand %ymm15,%ymm6,%ymm6 + vpsrlq $29,%ymm7,%ymm13 + vpermq $147,%ymm11,%ymm11 + vpand %ymm15,%ymm7,%ymm7 + vpsrlq $29,%ymm8,%ymm0 + vpermq $147,%ymm12,%ymm12 + vpand %ymm15,%ymm8,%ymm8 + vpermq $147,%ymm13,%ymm13 + + vpblendd $3,%ymm9,%ymm14,%ymm10 + vpermq $147,%ymm0,%ymm0 + vpblendd $3,%ymm14,%ymm11,%ymm14 + vpaddq %ymm10,%ymm4,%ymm4 + vpblendd $3,%ymm11,%ymm12,%ymm11 + vpaddq %ymm14,%ymm5,%ymm5 + vmovdqu %ymm4,128-128(%rdi) + vpblendd $3,%ymm12,%ymm13,%ymm12 + vpaddq %ymm11,%ymm6,%ymm6 + vmovdqu %ymm5,160-128(%rdi) + vpblendd $3,%ymm13,%ymm0,%ymm13 + vpaddq %ymm12,%ymm7,%ymm7 + vmovdqu %ymm6,192-128(%rdi) + vpaddq %ymm13,%ymm8,%ymm8 + vmovdqu %ymm7,224-128(%rdi) + vmovdqu %ymm8,256-128(%rdi) + + movq %rdi,%rsi + decl %r8d + jne .LOOP_GRANDE_SQR_1024 + + vzeroall + movq %rbp,%rax + movq -48(%rax),%r15 + movq -40(%rax),%r14 + movq -32(%rax),%r13 + movq -24(%rax),%r12 + movq -16(%rax),%rbp + movq -8(%rax),%rbx + leaq (%rax),%rsp +.Lsqr_1024_epilogue: + .byte 0xf3,0xc3 +.size rsaz_1024_sqr_avx2,.-rsaz_1024_sqr_avx2 +.globl rsaz_1024_mul_avx2 +.type rsaz_1024_mul_avx2,@function +.align 64 +rsaz_1024_mul_avx2: + leaq (%rsp),%rax + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + movq %rax,%rbp + vzeroall + movq %rdx,%r13 + subq $64,%rsp + + + + + + +.byte 0x67,0x67 + movq %rsi,%r15 + andq $4095,%r15 + addq $320,%r15 + shrq $12,%r15 + movq %rsi,%r15 + cmovnzq %r13,%rsi + cmovnzq %r15,%r13 + + movq %rcx,%r15 + subq $-128,%rsi + subq $-128,%rcx + subq $-128,%rdi + + andq $4095,%r15 + addq $320,%r15 +.byte 0x67,0x67 + shrq $12,%r15 + jz .Lmul_1024_no_n_copy + + + + + + subq $320,%rsp + vmovdqu 0-128(%rcx),%ymm0 + andq $-512,%rsp + vmovdqu 32-128(%rcx),%ymm1 + vmovdqu 64-128(%rcx),%ymm2 + vmovdqu 96-128(%rcx),%ymm3 + vmovdqu 128-128(%rcx),%ymm4 + vmovdqu 160-128(%rcx),%ymm5 + vmovdqu 192-128(%rcx),%ymm6 + vmovdqu 224-128(%rcx),%ymm7 + vmovdqu 256-128(%rcx),%ymm8 + leaq 64+128(%rsp),%rcx + vmovdqu %ymm0,0-128(%rcx) + vpxor %ymm0,%ymm0,%ymm0 + vmovdqu %ymm1,32-128(%rcx) + vpxor %ymm1,%ymm1,%ymm1 + vmovdqu %ymm2,64-128(%rcx) + vpxor %ymm2,%ymm2,%ymm2 + vmovdqu %ymm3,96-128(%rcx) + vpxor %ymm3,%ymm3,%ymm3 + vmovdqu %ymm4,128-128(%rcx) + vpxor %ymm4,%ymm4,%ymm4 + vmovdqu %ymm5,160-128(%rcx) + vpxor %ymm5,%ymm5,%ymm5 + vmovdqu %ymm6,192-128(%rcx) + vpxor %ymm6,%ymm6,%ymm6 + vmovdqu %ymm7,224-128(%rcx) + vpxor %ymm7,%ymm7,%ymm7 + vmovdqu %ymm8,256-128(%rcx) + vmovdqa %ymm0,%ymm8 + vmovdqu %ymm9,288-128(%rcx) +.Lmul_1024_no_n_copy: + andq $-64,%rsp + + movq (%r13),%rbx + vpbroadcastq (%r13),%ymm10 + vmovdqu %ymm0,(%rsp) + xorq %r9,%r9 +.byte 0x67 + xorq %r10,%r10 + xorq %r11,%r11 + xorq %r12,%r12 + + vmovdqu .Land_mask(%rip),%ymm15 + movl $9,%r14d + vmovdqu %ymm9,288-128(%rdi) + jmp .Loop_mul_1024 + +.align 32 +.Loop_mul_1024: + vpsrlq $29,%ymm3,%ymm9 + movq %rbx,%rax + imulq -128(%rsi),%rax + addq %r9,%rax + movq %rbx,%r10 + imulq 8-128(%rsi),%r10 + addq 8(%rsp),%r10 + + movq %rax,%r9 + imull %r8d,%eax + andl $536870911,%eax + + movq %rbx,%r11 + imulq 16-128(%rsi),%r11 + addq 16(%rsp),%r11 + + movq %rbx,%r12 + imulq 24-128(%rsi),%r12 + addq 24(%rsp),%r12 + vpmuludq 32-128(%rsi),%ymm10,%ymm0 + vmovd %eax,%xmm11 + vpaddq %ymm0,%ymm1,%ymm1 + vpmuludq 64-128(%rsi),%ymm10,%ymm12 + vpbroadcastq %xmm11,%ymm11 + vpaddq %ymm12,%ymm2,%ymm2 + vpmuludq 96-128(%rsi),%ymm10,%ymm13 + vpand %ymm15,%ymm3,%ymm3 + vpaddq %ymm13,%ymm3,%ymm3 + vpmuludq 128-128(%rsi),%ymm10,%ymm0 + vpaddq %ymm0,%ymm4,%ymm4 + vpmuludq 160-128(%rsi),%ymm10,%ymm12 + vpaddq %ymm12,%ymm5,%ymm5 + vpmuludq 192-128(%rsi),%ymm10,%ymm13 + vpaddq %ymm13,%ymm6,%ymm6 + vpmuludq 224-128(%rsi),%ymm10,%ymm0 + vpermq $147,%ymm9,%ymm9 + vpaddq %ymm0,%ymm7,%ymm7 + vpmuludq 256-128(%rsi),%ymm10,%ymm12 + vpbroadcastq 8(%r13),%ymm10 + vpaddq %ymm12,%ymm8,%ymm8 + + movq %rax,%rdx + imulq -128(%rcx),%rax + addq %rax,%r9 + movq %rdx,%rax + imulq 8-128(%rcx),%rax + addq %rax,%r10 + movq %rdx,%rax + imulq 16-128(%rcx),%rax + addq %rax,%r11 + shrq $29,%r9 + imulq 24-128(%rcx),%rdx + addq %rdx,%r12 + addq %r9,%r10 + + vpmuludq 32-128(%rcx),%ymm11,%ymm13 + vmovq %xmm10,%rbx + vpaddq %ymm13,%ymm1,%ymm1 + vpmuludq 64-128(%rcx),%ymm11,%ymm0 + vpaddq %ymm0,%ymm2,%ymm2 + vpmuludq 96-128(%rcx),%ymm11,%ymm12 + vpaddq %ymm12,%ymm3,%ymm3 + vpmuludq 128-128(%rcx),%ymm11,%ymm13 + vpaddq %ymm13,%ymm4,%ymm4 + vpmuludq 160-128(%rcx),%ymm11,%ymm0 + vpaddq %ymm0,%ymm5,%ymm5 + vpmuludq 192-128(%rcx),%ymm11,%ymm12 + vpaddq %ymm12,%ymm6,%ymm6 + vpmuludq 224-128(%rcx),%ymm11,%ymm13 + vpblendd $3,%ymm14,%ymm9,%ymm9 + vpaddq %ymm13,%ymm7,%ymm7 + vpmuludq 256-128(%rcx),%ymm11,%ymm0 + vpaddq %ymm9,%ymm3,%ymm3 + vpaddq %ymm0,%ymm8,%ymm8 + + movq %rbx,%rax + imulq -128(%rsi),%rax + addq %rax,%r10 + vmovdqu -8+32-128(%rsi),%ymm12 + movq %rbx,%rax + imulq 8-128(%rsi),%rax + addq %rax,%r11 + vmovdqu -8+64-128(%rsi),%ymm13 + + movq %r10,%rax + imull %r8d,%eax + andl $536870911,%eax + + imulq 16-128(%rsi),%rbx + addq %rbx,%r12 + vpmuludq %ymm10,%ymm12,%ymm12 + vmovd %eax,%xmm11 + vmovdqu -8+96-128(%rsi),%ymm0 + vpaddq %ymm12,%ymm1,%ymm1 + vpmuludq %ymm10,%ymm13,%ymm13 + vpbroadcastq %xmm11,%ymm11 + vmovdqu -8+128-128(%rsi),%ymm12 + vpaddq %ymm13,%ymm2,%ymm2 + vpmuludq %ymm10,%ymm0,%ymm0 + vmovdqu -8+160-128(%rsi),%ymm13 + vpaddq %ymm0,%ymm3,%ymm3 + vpmuludq %ymm10,%ymm12,%ymm12 + vmovdqu -8+192-128(%rsi),%ymm0 + vpaddq %ymm12,%ymm4,%ymm4 + vpmuludq %ymm10,%ymm13,%ymm13 + vmovdqu -8+224-128(%rsi),%ymm12 + vpaddq %ymm13,%ymm5,%ymm5 + vpmuludq %ymm10,%ymm0,%ymm0 + vmovdqu -8+256-128(%rsi),%ymm13 + vpaddq %ymm0,%ymm6,%ymm6 + vpmuludq %ymm10,%ymm12,%ymm12 + vmovdqu -8+288-128(%rsi),%ymm9 + vpaddq %ymm12,%ymm7,%ymm7 + vpmuludq %ymm10,%ymm13,%ymm13 + vpaddq %ymm13,%ymm8,%ymm8 + vpmuludq %ymm10,%ymm9,%ymm9 + vpbroadcastq 16(%r13),%ymm10 + + movq %rax,%rdx + imulq -128(%rcx),%rax + addq %rax,%r10 + vmovdqu -8+32-128(%rcx),%ymm0 + movq %rdx,%rax + imulq 8-128(%rcx),%rax + addq %rax,%r11 + vmovdqu -8+64-128(%rcx),%ymm12 + shrq $29,%r10 + imulq 16-128(%rcx),%rdx + addq %rdx,%r12 + addq %r10,%r11 + + vpmuludq %ymm11,%ymm0,%ymm0 + vmovq %xmm10,%rbx + vmovdqu -8+96-128(%rcx),%ymm13 + vpaddq %ymm0,%ymm1,%ymm1 + vpmuludq %ymm11,%ymm12,%ymm12 + vmovdqu -8+128-128(%rcx),%ymm0 + vpaddq %ymm12,%ymm2,%ymm2 + vpmuludq %ymm11,%ymm13,%ymm13 + vmovdqu -8+160-128(%rcx),%ymm12 + vpaddq %ymm13,%ymm3,%ymm3 + vpmuludq %ymm11,%ymm0,%ymm0 + vmovdqu -8+192-128(%rcx),%ymm13 + vpaddq %ymm0,%ymm4,%ymm4 + vpmuludq %ymm11,%ymm12,%ymm12 + vmovdqu -8+224-128(%rcx),%ymm0 + vpaddq %ymm12,%ymm5,%ymm5 + vpmuludq %ymm11,%ymm13,%ymm13 + vmovdqu -8+256-128(%rcx),%ymm12 + vpaddq %ymm13,%ymm6,%ymm6 + vpmuludq %ymm11,%ymm0,%ymm0 + vmovdqu -8+288-128(%rcx),%ymm13 + vpaddq %ymm0,%ymm7,%ymm7 + vpmuludq %ymm11,%ymm12,%ymm12 + vpaddq %ymm12,%ymm8,%ymm8 + vpmuludq %ymm11,%ymm13,%ymm13 + vpaddq %ymm13,%ymm9,%ymm9 + + vmovdqu -16+32-128(%rsi),%ymm0 + movq %rbx,%rax + imulq -128(%rsi),%rax + addq %r11,%rax + + vmovdqu -16+64-128(%rsi),%ymm12 + movq %rax,%r11 + imull %r8d,%eax + andl $536870911,%eax + + imulq 8-128(%rsi),%rbx + addq %rbx,%r12 + vpmuludq %ymm10,%ymm0,%ymm0 + vmovd %eax,%xmm11 + vmovdqu -16+96-128(%rsi),%ymm13 + vpaddq %ymm0,%ymm1,%ymm1 + vpmuludq %ymm10,%ymm12,%ymm12 + vpbroadcastq %xmm11,%ymm11 + vmovdqu -16+128-128(%rsi),%ymm0 + vpaddq %ymm12,%ymm2,%ymm2 + vpmuludq %ymm10,%ymm13,%ymm13 + vmovdqu -16+160-128(%rsi),%ymm12 + vpaddq %ymm13,%ymm3,%ymm3 + vpmuludq %ymm10,%ymm0,%ymm0 + vmovdqu -16+192-128(%rsi),%ymm13 + vpaddq %ymm0,%ymm4,%ymm4 + vpmuludq %ymm10,%ymm12,%ymm12 + vmovdqu -16+224-128(%rsi),%ymm0 + vpaddq %ymm12,%ymm5,%ymm5 + vpmuludq %ymm10,%ymm13,%ymm13 + vmovdqu -16+256-128(%rsi),%ymm12 + vpaddq %ymm13,%ymm6,%ymm6 + vpmuludq %ymm10,%ymm0,%ymm0 + vmovdqu -16+288-128(%rsi),%ymm13 + vpaddq %ymm0,%ymm7,%ymm7 + vpmuludq %ymm10,%ymm12,%ymm12 + vpaddq %ymm12,%ymm8,%ymm8 + vpmuludq %ymm10,%ymm13,%ymm13 + vpbroadcastq 24(%r13),%ymm10 + vpaddq %ymm13,%ymm9,%ymm9 + + vmovdqu -16+32-128(%rcx),%ymm0 + movq %rax,%rdx + imulq -128(%rcx),%rax + addq %rax,%r11 + vmovdqu -16+64-128(%rcx),%ymm12 + imulq 8-128(%rcx),%rdx + addq %rdx,%r12 + shrq $29,%r11 + + vpmuludq %ymm11,%ymm0,%ymm0 + vmovq %xmm10,%rbx + vmovdqu -16+96-128(%rcx),%ymm13 + vpaddq %ymm0,%ymm1,%ymm1 + vpmuludq %ymm11,%ymm12,%ymm12 + vmovdqu -16+128-128(%rcx),%ymm0 + vpaddq %ymm12,%ymm2,%ymm2 + vpmuludq %ymm11,%ymm13,%ymm13 + vmovdqu -16+160-128(%rcx),%ymm12 + vpaddq %ymm13,%ymm3,%ymm3 + vpmuludq %ymm11,%ymm0,%ymm0 + vmovdqu -16+192-128(%rcx),%ymm13 + vpaddq %ymm0,%ymm4,%ymm4 + vpmuludq %ymm11,%ymm12,%ymm12 + vmovdqu -16+224-128(%rcx),%ymm0 + vpaddq %ymm12,%ymm5,%ymm5 + vpmuludq %ymm11,%ymm13,%ymm13 + vmovdqu -16+256-128(%rcx),%ymm12 + vpaddq %ymm13,%ymm6,%ymm6 + vpmuludq %ymm11,%ymm0,%ymm0 + vmovdqu -16+288-128(%rcx),%ymm13 + vpaddq %ymm0,%ymm7,%ymm7 + vpmuludq %ymm11,%ymm12,%ymm12 + vmovdqu -24+32-128(%rsi),%ymm0 + vpaddq %ymm12,%ymm8,%ymm8 + vpmuludq %ymm11,%ymm13,%ymm13 + vmovdqu -24+64-128(%rsi),%ymm12 + vpaddq %ymm13,%ymm9,%ymm9 + + addq %r11,%r12 + imulq -128(%rsi),%rbx + addq %rbx,%r12 + + movq %r12,%rax + imull %r8d,%eax + andl $536870911,%eax + + vpmuludq %ymm10,%ymm0,%ymm0 + vmovd %eax,%xmm11 + vmovdqu -24+96-128(%rsi),%ymm13 + vpaddq %ymm0,%ymm1,%ymm1 + vpmuludq %ymm10,%ymm12,%ymm12 + vpbroadcastq %xmm11,%ymm11 + vmovdqu -24+128-128(%rsi),%ymm0 + vpaddq %ymm12,%ymm2,%ymm2 + vpmuludq %ymm10,%ymm13,%ymm13 + vmovdqu -24+160-128(%rsi),%ymm12 + vpaddq %ymm13,%ymm3,%ymm3 + vpmuludq %ymm10,%ymm0,%ymm0 + vmovdqu -24+192-128(%rsi),%ymm13 + vpaddq %ymm0,%ymm4,%ymm4 + vpmuludq %ymm10,%ymm12,%ymm12 + vmovdqu -24+224-128(%rsi),%ymm0 + vpaddq %ymm12,%ymm5,%ymm5 + vpmuludq %ymm10,%ymm13,%ymm13 + vmovdqu -24+256-128(%rsi),%ymm12 + vpaddq %ymm13,%ymm6,%ymm6 + vpmuludq %ymm10,%ymm0,%ymm0 + vmovdqu -24+288-128(%rsi),%ymm13 + vpaddq %ymm0,%ymm7,%ymm7 + vpmuludq %ymm10,%ymm12,%ymm12 + vpaddq %ymm12,%ymm8,%ymm8 + vpmuludq %ymm10,%ymm13,%ymm13 + vpbroadcastq 32(%r13),%ymm10 + vpaddq %ymm13,%ymm9,%ymm9 + addq $32,%r13 + + vmovdqu -24+32-128(%rcx),%ymm0 + imulq -128(%rcx),%rax + addq %rax,%r12 + shrq $29,%r12 + + vmovdqu -24+64-128(%rcx),%ymm12 + vpmuludq %ymm11,%ymm0,%ymm0 + vmovq %xmm10,%rbx + vmovdqu -24+96-128(%rcx),%ymm13 + vpaddq %ymm0,%ymm1,%ymm0 + vpmuludq %ymm11,%ymm12,%ymm12 + vmovdqu %ymm0,(%rsp) + vpaddq %ymm12,%ymm2,%ymm1 + vmovdqu -24+128-128(%rcx),%ymm0 + vpmuludq %ymm11,%ymm13,%ymm13 + vmovdqu -24+160-128(%rcx),%ymm12 + vpaddq %ymm13,%ymm3,%ymm2 + vpmuludq %ymm11,%ymm0,%ymm0 + vmovdqu -24+192-128(%rcx),%ymm13 + vpaddq %ymm0,%ymm4,%ymm3 + vpmuludq %ymm11,%ymm12,%ymm12 + vmovdqu -24+224-128(%rcx),%ymm0 + vpaddq %ymm12,%ymm5,%ymm4 + vpmuludq %ymm11,%ymm13,%ymm13 + vmovdqu -24+256-128(%rcx),%ymm12 + vpaddq %ymm13,%ymm6,%ymm5 + vpmuludq %ymm11,%ymm0,%ymm0 + vmovdqu -24+288-128(%rcx),%ymm13 + movq %r12,%r9 + vpaddq %ymm0,%ymm7,%ymm6 + vpmuludq %ymm11,%ymm12,%ymm12 + addq (%rsp),%r9 + vpaddq %ymm12,%ymm8,%ymm7 + vpmuludq %ymm11,%ymm13,%ymm13 + vmovq %r12,%xmm12 + vpaddq %ymm13,%ymm9,%ymm8 + + decl %r14d + jnz .Loop_mul_1024 + vpermq $0,%ymm15,%ymm15 + vpaddq (%rsp),%ymm12,%ymm0 + + vpsrlq $29,%ymm0,%ymm12 + vpand %ymm15,%ymm0,%ymm0 + vpsrlq $29,%ymm1,%ymm13 + vpand %ymm15,%ymm1,%ymm1 + vpsrlq $29,%ymm2,%ymm10 + vpermq $147,%ymm12,%ymm12 + vpand %ymm15,%ymm2,%ymm2 + vpsrlq $29,%ymm3,%ymm11 + vpermq $147,%ymm13,%ymm13 + vpand %ymm15,%ymm3,%ymm3 + + vpblendd $3,%ymm14,%ymm12,%ymm9 + vpermq $147,%ymm10,%ymm10 + vpblendd $3,%ymm12,%ymm13,%ymm12 + vpermq $147,%ymm11,%ymm11 + vpaddq %ymm9,%ymm0,%ymm0 + vpblendd $3,%ymm13,%ymm10,%ymm13 + vpaddq %ymm12,%ymm1,%ymm1 + vpblendd $3,%ymm10,%ymm11,%ymm10 + vpaddq %ymm13,%ymm2,%ymm2 + vpblendd $3,%ymm11,%ymm14,%ymm11 + vpaddq %ymm10,%ymm3,%ymm3 + vpaddq %ymm11,%ymm4,%ymm4 + + vpsrlq $29,%ymm0,%ymm12 + vpand %ymm15,%ymm0,%ymm0 + vpsrlq $29,%ymm1,%ymm13 + vpand %ymm15,%ymm1,%ymm1 + vpsrlq $29,%ymm2,%ymm10 + vpermq $147,%ymm12,%ymm12 + vpand %ymm15,%ymm2,%ymm2 + vpsrlq $29,%ymm3,%ymm11 + vpermq $147,%ymm13,%ymm13 + vpand %ymm15,%ymm3,%ymm3 + vpermq $147,%ymm10,%ymm10 + + vpblendd $3,%ymm14,%ymm12,%ymm9 + vpermq $147,%ymm11,%ymm11 + vpblendd $3,%ymm12,%ymm13,%ymm12 + vpaddq %ymm9,%ymm0,%ymm0 + vpblendd $3,%ymm13,%ymm10,%ymm13 + vpaddq %ymm12,%ymm1,%ymm1 + vpblendd $3,%ymm10,%ymm11,%ymm10 + vpaddq %ymm13,%ymm2,%ymm2 + vpblendd $3,%ymm11,%ymm14,%ymm11 + vpaddq %ymm10,%ymm3,%ymm3 + vpaddq %ymm11,%ymm4,%ymm4 + + vmovdqu %ymm0,0-128(%rdi) + vmovdqu %ymm1,32-128(%rdi) + vmovdqu %ymm2,64-128(%rdi) + vmovdqu %ymm3,96-128(%rdi) + vpsrlq $29,%ymm4,%ymm12 + vpand %ymm15,%ymm4,%ymm4 + vpsrlq $29,%ymm5,%ymm13 + vpand %ymm15,%ymm5,%ymm5 + vpsrlq $29,%ymm6,%ymm10 + vpermq $147,%ymm12,%ymm12 + vpand %ymm15,%ymm6,%ymm6 + vpsrlq $29,%ymm7,%ymm11 + vpermq $147,%ymm13,%ymm13 + vpand %ymm15,%ymm7,%ymm7 + vpsrlq $29,%ymm8,%ymm0 + vpermq $147,%ymm10,%ymm10 + vpand %ymm15,%ymm8,%ymm8 + vpermq $147,%ymm11,%ymm11 + + vpblendd $3,%ymm14,%ymm12,%ymm9 + vpermq $147,%ymm0,%ymm0 + vpblendd $3,%ymm12,%ymm13,%ymm12 + vpaddq %ymm9,%ymm4,%ymm4 + vpblendd $3,%ymm13,%ymm10,%ymm13 + vpaddq %ymm12,%ymm5,%ymm5 + vpblendd $3,%ymm10,%ymm11,%ymm10 + vpaddq %ymm13,%ymm6,%ymm6 + vpblendd $3,%ymm11,%ymm0,%ymm11 + vpaddq %ymm10,%ymm7,%ymm7 + vpaddq %ymm11,%ymm8,%ymm8 + + vpsrlq $29,%ymm4,%ymm12 + vpand %ymm15,%ymm4,%ymm4 + vpsrlq $29,%ymm5,%ymm13 + vpand %ymm15,%ymm5,%ymm5 + vpsrlq $29,%ymm6,%ymm10 + vpermq $147,%ymm12,%ymm12 + vpand %ymm15,%ymm6,%ymm6 + vpsrlq $29,%ymm7,%ymm11 + vpermq $147,%ymm13,%ymm13 + vpand %ymm15,%ymm7,%ymm7 + vpsrlq $29,%ymm8,%ymm0 + vpermq $147,%ymm10,%ymm10 + vpand %ymm15,%ymm8,%ymm8 + vpermq $147,%ymm11,%ymm11 + + vpblendd $3,%ymm14,%ymm12,%ymm9 + vpermq $147,%ymm0,%ymm0 + vpblendd $3,%ymm12,%ymm13,%ymm12 + vpaddq %ymm9,%ymm4,%ymm4 + vpblendd $3,%ymm13,%ymm10,%ymm13 + vpaddq %ymm12,%ymm5,%ymm5 + vpblendd $3,%ymm10,%ymm11,%ymm10 + vpaddq %ymm13,%ymm6,%ymm6 + vpblendd $3,%ymm11,%ymm0,%ymm11 + vpaddq %ymm10,%ymm7,%ymm7 + vpaddq %ymm11,%ymm8,%ymm8 + + vmovdqu %ymm4,128-128(%rdi) + vmovdqu %ymm5,160-128(%rdi) + vmovdqu %ymm6,192-128(%rdi) + vmovdqu %ymm7,224-128(%rdi) + vmovdqu %ymm8,256-128(%rdi) + vzeroupper + + movq %rbp,%rax + movq -48(%rax),%r15 + movq -40(%rax),%r14 + movq -32(%rax),%r13 + movq -24(%rax),%r12 + movq -16(%rax),%rbp + movq -8(%rax),%rbx + leaq (%rax),%rsp +.Lmul_1024_epilogue: + .byte 0xf3,0xc3 +.size rsaz_1024_mul_avx2,.-rsaz_1024_mul_avx2 +.globl rsaz_1024_red2norm_avx2 +.type rsaz_1024_red2norm_avx2,@function +.align 32 +rsaz_1024_red2norm_avx2: + subq $-128,%rsi + xorq %rax,%rax + movq -128(%rsi),%r8 + movq -120(%rsi),%r9 + movq -112(%rsi),%r10 + shlq $0,%r8 + shlq $29,%r9 + movq %r10,%r11 + shlq $58,%r10 + shrq $6,%r11 + addq %r8,%rax + addq %r9,%rax + addq %r10,%rax + adcq $0,%r11 + movq %rax,0(%rdi) + movq %r11,%rax + movq -104(%rsi),%r8 + movq -96(%rsi),%r9 + shlq $23,%r8 + movq %r9,%r10 + shlq $52,%r9 + shrq $12,%r10 + addq %r8,%rax + addq %r9,%rax + adcq $0,%r10 + movq %rax,8(%rdi) + movq %r10,%rax + movq -88(%rsi),%r11 + movq -80(%rsi),%r8 + shlq $17,%r11 + movq %r8,%r9 + shlq $46,%r8 + shrq $18,%r9 + addq %r11,%rax + addq %r8,%rax + adcq $0,%r9 + movq %rax,16(%rdi) + movq %r9,%rax + movq -72(%rsi),%r10 + movq -64(%rsi),%r11 + shlq $11,%r10 + movq %r11,%r8 + shlq $40,%r11 + shrq $24,%r8 + addq %r10,%rax + addq %r11,%rax + adcq $0,%r8 + movq %rax,24(%rdi) + movq %r8,%rax + movq -56(%rsi),%r9 + movq -48(%rsi),%r10 + movq -40(%rsi),%r11 + shlq $5,%r9 + shlq $34,%r10 + movq %r11,%r8 + shlq $63,%r11 + shrq $1,%r8 + addq %r9,%rax + addq %r10,%rax + addq %r11,%rax + adcq $0,%r8 + movq %rax,32(%rdi) + movq %r8,%rax + movq -32(%rsi),%r9 + movq -24(%rsi),%r10 + shlq $28,%r9 + movq %r10,%r11 + shlq $57,%r10 + shrq $7,%r11 + addq %r9,%rax + addq %r10,%rax + adcq $0,%r11 + movq %rax,40(%rdi) + movq %r11,%rax + movq -16(%rsi),%r8 + movq -8(%rsi),%r9 + shlq $22,%r8 + movq %r9,%r10 + shlq $51,%r9 + shrq $13,%r10 + addq %r8,%rax + addq %r9,%rax + adcq $0,%r10 + movq %rax,48(%rdi) + movq %r10,%rax + movq 0(%rsi),%r11 + movq 8(%rsi),%r8 + shlq $16,%r11 + movq %r8,%r9 + shlq $45,%r8 + shrq $19,%r9 + addq %r11,%rax + addq %r8,%rax + adcq $0,%r9 + movq %rax,56(%rdi) + movq %r9,%rax + movq 16(%rsi),%r10 + movq 24(%rsi),%r11 + shlq $10,%r10 + movq %r11,%r8 + shlq $39,%r11 + shrq $25,%r8 + addq %r10,%rax + addq %r11,%rax + adcq $0,%r8 + movq %rax,64(%rdi) + movq %r8,%rax + movq 32(%rsi),%r9 + movq 40(%rsi),%r10 + movq 48(%rsi),%r11 + shlq $4,%r9 + shlq $33,%r10 + movq %r11,%r8 + shlq $62,%r11 + shrq $2,%r8 + addq %r9,%rax + addq %r10,%rax + addq %r11,%rax + adcq $0,%r8 + movq %rax,72(%rdi) + movq %r8,%rax + movq 56(%rsi),%r9 + movq 64(%rsi),%r10 + shlq $27,%r9 + movq %r10,%r11 + shlq $56,%r10 + shrq $8,%r11 + addq %r9,%rax + addq %r10,%rax + adcq $0,%r11 + movq %rax,80(%rdi) + movq %r11,%rax + movq 72(%rsi),%r8 + movq 80(%rsi),%r9 + shlq $21,%r8 + movq %r9,%r10 + shlq $50,%r9 + shrq $14,%r10 + addq %r8,%rax + addq %r9,%rax + adcq $0,%r10 + movq %rax,88(%rdi) + movq %r10,%rax + movq 88(%rsi),%r11 + movq 96(%rsi),%r8 + shlq $15,%r11 + movq %r8,%r9 + shlq $44,%r8 + shrq $20,%r9 + addq %r11,%rax + addq %r8,%rax + adcq $0,%r9 + movq %rax,96(%rdi) + movq %r9,%rax + movq 104(%rsi),%r10 + movq 112(%rsi),%r11 + shlq $9,%r10 + movq %r11,%r8 + shlq $38,%r11 + shrq $26,%r8 + addq %r10,%rax + addq %r11,%rax + adcq $0,%r8 + movq %rax,104(%rdi) + movq %r8,%rax + movq 120(%rsi),%r9 + movq 128(%rsi),%r10 + movq 136(%rsi),%r11 + shlq $3,%r9 + shlq $32,%r10 + movq %r11,%r8 + shlq $61,%r11 + shrq $3,%r8 + addq %r9,%rax + addq %r10,%rax + addq %r11,%rax + adcq $0,%r8 + movq %rax,112(%rdi) + movq %r8,%rax + movq 144(%rsi),%r9 + movq 152(%rsi),%r10 + shlq $26,%r9 + movq %r10,%r11 + shlq $55,%r10 + shrq $9,%r11 + addq %r9,%rax + addq %r10,%rax + adcq $0,%r11 + movq %rax,120(%rdi) + movq %r11,%rax + .byte 0xf3,0xc3 +.size rsaz_1024_red2norm_avx2,.-rsaz_1024_red2norm_avx2 + +.globl rsaz_1024_norm2red_avx2 +.type rsaz_1024_norm2red_avx2,@function +.align 32 +rsaz_1024_norm2red_avx2: + subq $-128,%rdi + movq (%rsi),%r8 + movl $536870911,%eax + movq 8(%rsi),%r9 + movq %r8,%r11 + shrq $0,%r11 + andq %rax,%r11 + movq %r11,-128(%rdi) + movq %r8,%r10 + shrq $29,%r10 + andq %rax,%r10 + movq %r10,-120(%rdi) + shrdq $58,%r9,%r8 + andq %rax,%r8 + movq %r8,-112(%rdi) + movq 16(%rsi),%r10 + movq %r9,%r8 + shrq $23,%r8 + andq %rax,%r8 + movq %r8,-104(%rdi) + shrdq $52,%r10,%r9 + andq %rax,%r9 + movq %r9,-96(%rdi) + movq 24(%rsi),%r11 + movq %r10,%r9 + shrq $17,%r9 + andq %rax,%r9 + movq %r9,-88(%rdi) + shrdq $46,%r11,%r10 + andq %rax,%r10 + movq %r10,-80(%rdi) + movq 32(%rsi),%r8 + movq %r11,%r10 + shrq $11,%r10 + andq %rax,%r10 + movq %r10,-72(%rdi) + shrdq $40,%r8,%r11 + andq %rax,%r11 + movq %r11,-64(%rdi) + movq 40(%rsi),%r9 + movq %r8,%r11 + shrq $5,%r11 + andq %rax,%r11 + movq %r11,-56(%rdi) + movq %r8,%r10 + shrq $34,%r10 + andq %rax,%r10 + movq %r10,-48(%rdi) + shrdq $63,%r9,%r8 + andq %rax,%r8 + movq %r8,-40(%rdi) + movq 48(%rsi),%r10 + movq %r9,%r8 + shrq $28,%r8 + andq %rax,%r8 + movq %r8,-32(%rdi) + shrdq $57,%r10,%r9 + andq %rax,%r9 + movq %r9,-24(%rdi) + movq 56(%rsi),%r11 + movq %r10,%r9 + shrq $22,%r9 + andq %rax,%r9 + movq %r9,-16(%rdi) + shrdq $51,%r11,%r10 + andq %rax,%r10 + movq %r10,-8(%rdi) + movq 64(%rsi),%r8 + movq %r11,%r10 + shrq $16,%r10 + andq %rax,%r10 + movq %r10,0(%rdi) + shrdq $45,%r8,%r11 + andq %rax,%r11 + movq %r11,8(%rdi) + movq 72(%rsi),%r9 + movq %r8,%r11 + shrq $10,%r11 + andq %rax,%r11 + movq %r11,16(%rdi) + shrdq $39,%r9,%r8 + andq %rax,%r8 + movq %r8,24(%rdi) + movq 80(%rsi),%r10 + movq %r9,%r8 + shrq $4,%r8 + andq %rax,%r8 + movq %r8,32(%rdi) + movq %r9,%r11 + shrq $33,%r11 + andq %rax,%r11 + movq %r11,40(%rdi) + shrdq $62,%r10,%r9 + andq %rax,%r9 + movq %r9,48(%rdi) + movq 88(%rsi),%r11 + movq %r10,%r9 + shrq $27,%r9 + andq %rax,%r9 + movq %r9,56(%rdi) + shrdq $56,%r11,%r10 + andq %rax,%r10 + movq %r10,64(%rdi) + movq 96(%rsi),%r8 + movq %r11,%r10 + shrq $21,%r10 + andq %rax,%r10 + movq %r10,72(%rdi) + shrdq $50,%r8,%r11 + andq %rax,%r11 + movq %r11,80(%rdi) + movq 104(%rsi),%r9 + movq %r8,%r11 + shrq $15,%r11 + andq %rax,%r11 + movq %r11,88(%rdi) + shrdq $44,%r9,%r8 + andq %rax,%r8 + movq %r8,96(%rdi) + movq 112(%rsi),%r10 + movq %r9,%r8 + shrq $9,%r8 + andq %rax,%r8 + movq %r8,104(%rdi) + shrdq $38,%r10,%r9 + andq %rax,%r9 + movq %r9,112(%rdi) + movq 120(%rsi),%r11 + movq %r10,%r9 + shrq $3,%r9 + andq %rax,%r9 + movq %r9,120(%rdi) + movq %r10,%r8 + shrq $32,%r8 + andq %rax,%r8 + movq %r8,128(%rdi) + shrdq $61,%r11,%r10 + andq %rax,%r10 + movq %r10,136(%rdi) + xorq %r8,%r8 + movq %r11,%r10 + shrq $26,%r10 + andq %rax,%r10 + movq %r10,144(%rdi) + shrdq $55,%r8,%r11 + andq %rax,%r11 + movq %r11,152(%rdi) + movq %r8,160(%rdi) + movq %r8,168(%rdi) + movq %r8,176(%rdi) + movq %r8,184(%rdi) + .byte 0xf3,0xc3 +.size rsaz_1024_norm2red_avx2,.-rsaz_1024_norm2red_avx2 +.globl rsaz_1024_scatter5_avx2 +.type rsaz_1024_scatter5_avx2,@function +.align 32 +rsaz_1024_scatter5_avx2: + vzeroupper + vmovdqu .Lscatter_permd(%rip),%ymm5 + shll $4,%edx + leaq (%rdi,%rdx,1),%rdi + movl $9,%eax + jmp .Loop_scatter_1024 + +.align 32 +.Loop_scatter_1024: + vmovdqu (%rsi),%ymm0 + leaq 32(%rsi),%rsi + vpermd %ymm0,%ymm5,%ymm0 + vmovdqu %xmm0,(%rdi) + leaq 512(%rdi),%rdi + decl %eax + jnz .Loop_scatter_1024 + + vzeroupper + .byte 0xf3,0xc3 +.size rsaz_1024_scatter5_avx2,.-rsaz_1024_scatter5_avx2 + +.globl rsaz_1024_gather5_avx2 +.type rsaz_1024_gather5_avx2,@function +.align 32 +rsaz_1024_gather5_avx2: + leaq .Lgather_table(%rip),%r11 + movl %edx,%eax + andl $3,%edx + shrl $2,%eax + shll $4,%edx + + vmovdqu -32(%r11),%ymm7 + vpbroadcastb 8(%r11,%rax,1),%xmm8 + vpbroadcastb 7(%r11,%rax,1),%xmm9 + vpbroadcastb 6(%r11,%rax,1),%xmm10 + vpbroadcastb 5(%r11,%rax,1),%xmm11 + vpbroadcastb 4(%r11,%rax,1),%xmm12 + vpbroadcastb 3(%r11,%rax,1),%xmm13 + vpbroadcastb 2(%r11,%rax,1),%xmm14 + vpbroadcastb 1(%r11,%rax,1),%xmm15 + + leaq 64(%rsi,%rdx,1),%rsi + movq $64,%r11 + movl $9,%eax + jmp .Loop_gather_1024 + +.align 32 +.Loop_gather_1024: + vpand -64(%rsi),%xmm8,%xmm0 + vpand (%rsi),%xmm9,%xmm1 + vpand 64(%rsi),%xmm10,%xmm2 + vpand (%rsi,%r11,2),%xmm11,%xmm3 + vpor %xmm0,%xmm1,%xmm1 + vpand 64(%rsi,%r11,2),%xmm12,%xmm4 + vpor %xmm2,%xmm3,%xmm3 + vpand (%rsi,%r11,4),%xmm13,%xmm5 + vpor %xmm1,%xmm3,%xmm3 + vpand 64(%rsi,%r11,4),%xmm14,%xmm6 + vpor %xmm4,%xmm5,%xmm5 + vpand -128(%rsi,%r11,8),%xmm15,%xmm2 + leaq (%rsi,%r11,8),%rsi + vpor %xmm3,%xmm5,%xmm5 + vpor %xmm2,%xmm6,%xmm6 + vpor %xmm5,%xmm6,%xmm6 + vpermd %ymm6,%ymm7,%ymm6 + vmovdqu %ymm6,(%rdi) + leaq 32(%rdi),%rdi + decl %eax + jnz .Loop_gather_1024 + + vpxor %ymm0,%ymm0,%ymm0 + vmovdqu %ymm0,(%rdi) + vzeroupper + .byte 0xf3,0xc3 +.size rsaz_1024_gather5_avx2,.-rsaz_1024_gather5_avx2 + +.globl rsaz_avx2_eligible +.type rsaz_avx2_eligible,@function +.align 32 +rsaz_avx2_eligible: + movl OPENSSL_ia32cap_P+8(%rip),%eax + movl $524544,%ecx + movl $0,%edx + andl %eax,%ecx + cmpl $524544,%ecx + cmovel %edx,%eax + andl $32,%eax + shrl $5,%eax + .byte 0xf3,0xc3 +.size rsaz_avx2_eligible,.-rsaz_avx2_eligible + +.align 64 +.Land_mask: +.quad 0x1fffffff,0x1fffffff,0x1fffffff,-1 +.Lscatter_permd: +.long 0,2,4,6,7,7,7,7 +.Lgather_permd: +.long 0,7,1,7,2,7,3,7 +.Lgather_table: +.byte 0,0,0,0,0,0,0,0, 0xff,0,0,0,0,0,0,0 +.align 64 diff --git a/deps/openssl/asm/x64-elf-gas/bn/rsaz-x86_64.s b/deps/openssl/asm/x64-elf-gas/bn/rsaz-x86_64.s new file mode 100644 index 00000000000000..f42075571e66b8 --- /dev/null +++ b/deps/openssl/asm/x64-elf-gas/bn/rsaz-x86_64.s @@ -0,0 +1,1755 @@ +.text + + + +.globl rsaz_512_sqr +.type rsaz_512_sqr,@function +.align 32 +rsaz_512_sqr: + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + + subq $128+24,%rsp +.Lsqr_body: + movq %rdx,%rbp + movq (%rsi),%rdx + movq 8(%rsi),%rax + movq %rcx,128(%rsp) + movl $524544,%r11d + andl OPENSSL_ia32cap_P+8(%rip),%r11d + cmpl $524544,%r11d + je .Loop_sqrx + jmp .Loop_sqr + +.align 32 +.Loop_sqr: + movl %r8d,128+8(%rsp) + + movq %rdx,%rbx + mulq %rdx + movq %rax,%r8 + movq 16(%rsi),%rax + movq %rdx,%r9 + + mulq %rbx + addq %rax,%r9 + movq 24(%rsi),%rax + movq %rdx,%r10 + adcq $0,%r10 + + mulq %rbx + addq %rax,%r10 + movq 32(%rsi),%rax + movq %rdx,%r11 + adcq $0,%r11 + + mulq %rbx + addq %rax,%r11 + movq 40(%rsi),%rax + movq %rdx,%r12 + adcq $0,%r12 + + mulq %rbx + addq %rax,%r12 + movq 48(%rsi),%rax + movq %rdx,%r13 + adcq $0,%r13 + + mulq %rbx + addq %rax,%r13 + movq 56(%rsi),%rax + movq %rdx,%r14 + adcq $0,%r14 + + mulq %rbx + addq %rax,%r14 + movq %rbx,%rax + movq %rdx,%r15 + adcq $0,%r15 + + addq %r8,%r8 + movq %r9,%rcx + adcq %r9,%r9 + + mulq %rax + movq %rax,(%rsp) + addq %rdx,%r8 + adcq $0,%r9 + + movq %r8,8(%rsp) + shrq $63,%rcx + + + movq 8(%rsi),%r8 + movq 16(%rsi),%rax + mulq %r8 + addq %rax,%r10 + movq 24(%rsi),%rax + movq %rdx,%rbx + adcq $0,%rbx + + mulq %r8 + addq %rax,%r11 + movq 32(%rsi),%rax + adcq $0,%rdx + addq %rbx,%r11 + movq %rdx,%rbx + adcq $0,%rbx + + mulq %r8 + addq %rax,%r12 + movq 40(%rsi),%rax + adcq $0,%rdx + addq %rbx,%r12 + movq %rdx,%rbx + adcq $0,%rbx + + mulq %r8 + addq %rax,%r13 + movq 48(%rsi),%rax + adcq $0,%rdx + addq %rbx,%r13 + movq %rdx,%rbx + adcq $0,%rbx + + mulq %r8 + addq %rax,%r14 + movq 56(%rsi),%rax + adcq $0,%rdx + addq %rbx,%r14 + movq %rdx,%rbx + adcq $0,%rbx + + mulq %r8 + addq %rax,%r15 + movq %r8,%rax + adcq $0,%rdx + addq %rbx,%r15 + movq %rdx,%r8 + movq %r10,%rdx + adcq $0,%r8 + + addq %rdx,%rdx + leaq (%rcx,%r10,2),%r10 + movq %r11,%rbx + adcq %r11,%r11 + + mulq %rax + addq %rax,%r9 + adcq %rdx,%r10 + adcq $0,%r11 + + movq %r9,16(%rsp) + movq %r10,24(%rsp) + shrq $63,%rbx + + + movq 16(%rsi),%r9 + movq 24(%rsi),%rax + mulq %r9 + addq %rax,%r12 + movq 32(%rsi),%rax + movq %rdx,%rcx + adcq $0,%rcx + + mulq %r9 + addq %rax,%r13 + movq 40(%rsi),%rax + adcq $0,%rdx + addq %rcx,%r13 + movq %rdx,%rcx + adcq $0,%rcx + + mulq %r9 + addq %rax,%r14 + movq 48(%rsi),%rax + adcq $0,%rdx + addq %rcx,%r14 + movq %rdx,%rcx + adcq $0,%rcx + + mulq %r9 + movq %r12,%r10 + leaq (%rbx,%r12,2),%r12 + addq %rax,%r15 + movq 56(%rsi),%rax + adcq $0,%rdx + addq %rcx,%r15 + movq %rdx,%rcx + adcq $0,%rcx + + mulq %r9 + shrq $63,%r10 + addq %rax,%r8 + movq %r9,%rax + adcq $0,%rdx + addq %rcx,%r8 + movq %rdx,%r9 + adcq $0,%r9 + + movq %r13,%rcx + leaq (%r10,%r13,2),%r13 + + mulq %rax + addq %rax,%r11 + adcq %rdx,%r12 + adcq $0,%r13 + + movq %r11,32(%rsp) + movq %r12,40(%rsp) + shrq $63,%rcx + + + movq 24(%rsi),%r10 + movq 32(%rsi),%rax + mulq %r10 + addq %rax,%r14 + movq 40(%rsi),%rax + movq %rdx,%rbx + adcq $0,%rbx + + mulq %r10 + addq %rax,%r15 + movq 48(%rsi),%rax + adcq $0,%rdx + addq %rbx,%r15 + movq %rdx,%rbx + adcq $0,%rbx + + mulq %r10 + movq %r14,%r12 + leaq (%rcx,%r14,2),%r14 + addq %rax,%r8 + movq 56(%rsi),%rax + adcq $0,%rdx + addq %rbx,%r8 + movq %rdx,%rbx + adcq $0,%rbx + + mulq %r10 + shrq $63,%r12 + addq %rax,%r9 + movq %r10,%rax + adcq $0,%rdx + addq %rbx,%r9 + movq %rdx,%r10 + adcq $0,%r10 + + movq %r15,%rbx + leaq (%r12,%r15,2),%r15 + + mulq %rax + addq %rax,%r13 + adcq %rdx,%r14 + adcq $0,%r15 + + movq %r13,48(%rsp) + movq %r14,56(%rsp) + shrq $63,%rbx + + + movq 32(%rsi),%r11 + movq 40(%rsi),%rax + mulq %r11 + addq %rax,%r8 + movq 48(%rsi),%rax + movq %rdx,%rcx + adcq $0,%rcx + + mulq %r11 + addq %rax,%r9 + movq 56(%rsi),%rax + adcq $0,%rdx + movq %r8,%r12 + leaq (%rbx,%r8,2),%r8 + addq %rcx,%r9 + movq %rdx,%rcx + adcq $0,%rcx + + mulq %r11 + shrq $63,%r12 + addq %rax,%r10 + movq %r11,%rax + adcq $0,%rdx + addq %rcx,%r10 + movq %rdx,%r11 + adcq $0,%r11 + + movq %r9,%rcx + leaq (%r12,%r9,2),%r9 + + mulq %rax + addq %rax,%r15 + adcq %rdx,%r8 + adcq $0,%r9 + + movq %r15,64(%rsp) + movq %r8,72(%rsp) + shrq $63,%rcx + + + movq 40(%rsi),%r12 + movq 48(%rsi),%rax + mulq %r12 + addq %rax,%r10 + movq 56(%rsi),%rax + movq %rdx,%rbx + adcq $0,%rbx + + mulq %r12 + addq %rax,%r11 + movq %r12,%rax + movq %r10,%r15 + leaq (%rcx,%r10,2),%r10 + adcq $0,%rdx + shrq $63,%r15 + addq %rbx,%r11 + movq %rdx,%r12 + adcq $0,%r12 + + movq %r11,%rbx + leaq (%r15,%r11,2),%r11 + + mulq %rax + addq %rax,%r9 + adcq %rdx,%r10 + adcq $0,%r11 + + movq %r9,80(%rsp) + movq %r10,88(%rsp) + + + movq 48(%rsi),%r13 + movq 56(%rsi),%rax + mulq %r13 + addq %rax,%r12 + movq %r13,%rax + movq %rdx,%r13 + adcq $0,%r13 + + xorq %r14,%r14 + shlq $1,%rbx + adcq %r12,%r12 + adcq %r13,%r13 + adcq %r14,%r14 + + mulq %rax + addq %rax,%r11 + adcq %rdx,%r12 + adcq $0,%r13 + + movq %r11,96(%rsp) + movq %r12,104(%rsp) + + + movq 56(%rsi),%rax + mulq %rax + addq %rax,%r13 + adcq $0,%rdx + + addq %rdx,%r14 + + movq %r13,112(%rsp) + movq %r14,120(%rsp) + + movq (%rsp),%r8 + movq 8(%rsp),%r9 + movq 16(%rsp),%r10 + movq 24(%rsp),%r11 + movq 32(%rsp),%r12 + movq 40(%rsp),%r13 + movq 48(%rsp),%r14 + movq 56(%rsp),%r15 + + call __rsaz_512_reduce + + addq 64(%rsp),%r8 + adcq 72(%rsp),%r9 + adcq 80(%rsp),%r10 + adcq 88(%rsp),%r11 + adcq 96(%rsp),%r12 + adcq 104(%rsp),%r13 + adcq 112(%rsp),%r14 + adcq 120(%rsp),%r15 + sbbq %rcx,%rcx + + call __rsaz_512_subtract + + movq %r8,%rdx + movq %r9,%rax + movl 128+8(%rsp),%r8d + movq %rdi,%rsi + + decl %r8d + jnz .Loop_sqr + jmp .Lsqr_tail + +.align 32 +.Loop_sqrx: + movl %r8d,128+8(%rsp) +.byte 102,72,15,110,199 +.byte 102,72,15,110,205 + + mulxq %rax,%r8,%r9 + + mulxq 16(%rsi),%rcx,%r10 + xorq %rbp,%rbp + + mulxq 24(%rsi),%rax,%r11 + adcxq %rcx,%r9 + + mulxq 32(%rsi),%rcx,%r12 + adcxq %rax,%r10 + + mulxq 40(%rsi),%rax,%r13 + adcxq %rcx,%r11 + +.byte 0xc4,0x62,0xf3,0xf6,0xb6,0x30,0x00,0x00,0x00 + adcxq %rax,%r12 + adcxq %rcx,%r13 + +.byte 0xc4,0x62,0xfb,0xf6,0xbe,0x38,0x00,0x00,0x00 + adcxq %rax,%r14 + adcxq %rbp,%r15 + + movq %r9,%rcx + shldq $1,%r8,%r9 + shlq $1,%r8 + + xorl %ebp,%ebp + mulxq %rdx,%rax,%rdx + adcxq %rdx,%r8 + movq 8(%rsi),%rdx + adcxq %rbp,%r9 + + movq %rax,(%rsp) + movq %r8,8(%rsp) + + + mulxq 16(%rsi),%rax,%rbx + adoxq %rax,%r10 + adcxq %rbx,%r11 + +.byte 0xc4,0x62,0xc3,0xf6,0x86,0x18,0x00,0x00,0x00 + adoxq %rdi,%r11 + adcxq %r8,%r12 + + mulxq 32(%rsi),%rax,%rbx + adoxq %rax,%r12 + adcxq %rbx,%r13 + + mulxq 40(%rsi),%rdi,%r8 + adoxq %rdi,%r13 + adcxq %r8,%r14 + +.byte 0xc4,0xe2,0xfb,0xf6,0x9e,0x30,0x00,0x00,0x00 + adoxq %rax,%r14 + adcxq %rbx,%r15 + +.byte 0xc4,0x62,0xc3,0xf6,0x86,0x38,0x00,0x00,0x00 + adoxq %rdi,%r15 + adcxq %rbp,%r8 + adoxq %rbp,%r8 + + movq %r11,%rbx + shldq $1,%r10,%r11 + shldq $1,%rcx,%r10 + + xorl %ebp,%ebp + mulxq %rdx,%rax,%rcx + movq 16(%rsi),%rdx + adcxq %rax,%r9 + adcxq %rcx,%r10 + adcxq %rbp,%r11 + + movq %r9,16(%rsp) +.byte 0x4c,0x89,0x94,0x24,0x18,0x00,0x00,0x00 + + +.byte 0xc4,0x62,0xc3,0xf6,0x8e,0x18,0x00,0x00,0x00 + adoxq %rdi,%r12 + adcxq %r9,%r13 + + mulxq 32(%rsi),%rax,%rcx + adoxq %rax,%r13 + adcxq %rcx,%r14 + + mulxq 40(%rsi),%rdi,%r9 + adoxq %rdi,%r14 + adcxq %r9,%r15 + +.byte 0xc4,0xe2,0xfb,0xf6,0x8e,0x30,0x00,0x00,0x00 + adoxq %rax,%r15 + adcxq %rcx,%r8 + +.byte 0xc4,0x62,0xc3,0xf6,0x8e,0x38,0x00,0x00,0x00 + adoxq %rdi,%r8 + adcxq %rbp,%r9 + adoxq %rbp,%r9 + + movq %r13,%rcx + shldq $1,%r12,%r13 + shldq $1,%rbx,%r12 + + xorl %ebp,%ebp + mulxq %rdx,%rax,%rdx + adcxq %rax,%r11 + adcxq %rdx,%r12 + movq 24(%rsi),%rdx + adcxq %rbp,%r13 + + movq %r11,32(%rsp) +.byte 0x4c,0x89,0xa4,0x24,0x28,0x00,0x00,0x00 + + +.byte 0xc4,0xe2,0xfb,0xf6,0x9e,0x20,0x00,0x00,0x00 + adoxq %rax,%r14 + adcxq %rbx,%r15 + + mulxq 40(%rsi),%rdi,%r10 + adoxq %rdi,%r15 + adcxq %r10,%r8 + + mulxq 48(%rsi),%rax,%rbx + adoxq %rax,%r8 + adcxq %rbx,%r9 + + mulxq 56(%rsi),%rdi,%r10 + adoxq %rdi,%r9 + adcxq %rbp,%r10 + adoxq %rbp,%r10 + +.byte 0x66 + movq %r15,%rbx + shldq $1,%r14,%r15 + shldq $1,%rcx,%r14 + + xorl %ebp,%ebp + mulxq %rdx,%rax,%rdx + adcxq %rax,%r13 + adcxq %rdx,%r14 + movq 32(%rsi),%rdx + adcxq %rbp,%r15 + + movq %r13,48(%rsp) + movq %r14,56(%rsp) + + +.byte 0xc4,0x62,0xc3,0xf6,0x9e,0x28,0x00,0x00,0x00 + adoxq %rdi,%r8 + adcxq %r11,%r9 + + mulxq 48(%rsi),%rax,%rcx + adoxq %rax,%r9 + adcxq %rcx,%r10 + + mulxq 56(%rsi),%rdi,%r11 + adoxq %rdi,%r10 + adcxq %rbp,%r11 + adoxq %rbp,%r11 + + movq %r9,%rcx + shldq $1,%r8,%r9 + shldq $1,%rbx,%r8 + + xorl %ebp,%ebp + mulxq %rdx,%rax,%rdx + adcxq %rax,%r15 + adcxq %rdx,%r8 + movq 40(%rsi),%rdx + adcxq %rbp,%r9 + + movq %r15,64(%rsp) + movq %r8,72(%rsp) + + +.byte 0xc4,0xe2,0xfb,0xf6,0x9e,0x30,0x00,0x00,0x00 + adoxq %rax,%r10 + adcxq %rbx,%r11 + +.byte 0xc4,0x62,0xc3,0xf6,0xa6,0x38,0x00,0x00,0x00 + adoxq %rdi,%r11 + adcxq %rbp,%r12 + adoxq %rbp,%r12 + + movq %r11,%rbx + shldq $1,%r10,%r11 + shldq $1,%rcx,%r10 + + xorl %ebp,%ebp + mulxq %rdx,%rax,%rdx + adcxq %rax,%r9 + adcxq %rdx,%r10 + movq 48(%rsi),%rdx + adcxq %rbp,%r11 + + movq %r9,80(%rsp) + movq %r10,88(%rsp) + + +.byte 0xc4,0x62,0xfb,0xf6,0xae,0x38,0x00,0x00,0x00 + adoxq %rax,%r12 + adoxq %rbp,%r13 + + xorq %r14,%r14 + shldq $1,%r13,%r14 + shldq $1,%r12,%r13 + shldq $1,%rbx,%r12 + + xorl %ebp,%ebp + mulxq %rdx,%rax,%rdx + adcxq %rax,%r11 + adcxq %rdx,%r12 + movq 56(%rsi),%rdx + adcxq %rbp,%r13 + +.byte 0x4c,0x89,0x9c,0x24,0x60,0x00,0x00,0x00 +.byte 0x4c,0x89,0xa4,0x24,0x68,0x00,0x00,0x00 + + + mulxq %rdx,%rax,%rdx + adoxq %rax,%r13 + adoxq %rbp,%rdx + +.byte 0x66 + addq %rdx,%r14 + + movq %r13,112(%rsp) + movq %r14,120(%rsp) +.byte 102,72,15,126,199 +.byte 102,72,15,126,205 + + movq 128(%rsp),%rdx + movq (%rsp),%r8 + movq 8(%rsp),%r9 + movq 16(%rsp),%r10 + movq 24(%rsp),%r11 + movq 32(%rsp),%r12 + movq 40(%rsp),%r13 + movq 48(%rsp),%r14 + movq 56(%rsp),%r15 + + call __rsaz_512_reducex + + addq 64(%rsp),%r8 + adcq 72(%rsp),%r9 + adcq 80(%rsp),%r10 + adcq 88(%rsp),%r11 + adcq 96(%rsp),%r12 + adcq 104(%rsp),%r13 + adcq 112(%rsp),%r14 + adcq 120(%rsp),%r15 + sbbq %rcx,%rcx + + call __rsaz_512_subtract + + movq %r8,%rdx + movq %r9,%rax + movl 128+8(%rsp),%r8d + movq %rdi,%rsi + + decl %r8d + jnz .Loop_sqrx + +.Lsqr_tail: + + leaq 128+24+48(%rsp),%rax + movq -48(%rax),%r15 + movq -40(%rax),%r14 + movq -32(%rax),%r13 + movq -24(%rax),%r12 + movq -16(%rax),%rbp + movq -8(%rax),%rbx + leaq (%rax),%rsp +.Lsqr_epilogue: + .byte 0xf3,0xc3 +.size rsaz_512_sqr,.-rsaz_512_sqr +.globl rsaz_512_mul +.type rsaz_512_mul,@function +.align 32 +rsaz_512_mul: + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + + subq $128+24,%rsp +.Lmul_body: +.byte 102,72,15,110,199 +.byte 102,72,15,110,201 + movq %r8,128(%rsp) + movl $524544,%r11d + andl OPENSSL_ia32cap_P+8(%rip),%r11d + cmpl $524544,%r11d + je .Lmulx + movq (%rdx),%rbx + movq %rdx,%rbp + call __rsaz_512_mul + +.byte 102,72,15,126,199 +.byte 102,72,15,126,205 + + movq (%rsp),%r8 + movq 8(%rsp),%r9 + movq 16(%rsp),%r10 + movq 24(%rsp),%r11 + movq 32(%rsp),%r12 + movq 40(%rsp),%r13 + movq 48(%rsp),%r14 + movq 56(%rsp),%r15 + + call __rsaz_512_reduce + jmp .Lmul_tail + +.align 32 +.Lmulx: + movq %rdx,%rbp + movq (%rdx),%rdx + call __rsaz_512_mulx + +.byte 102,72,15,126,199 +.byte 102,72,15,126,205 + + movq 128(%rsp),%rdx + movq (%rsp),%r8 + movq 8(%rsp),%r9 + movq 16(%rsp),%r10 + movq 24(%rsp),%r11 + movq 32(%rsp),%r12 + movq 40(%rsp),%r13 + movq 48(%rsp),%r14 + movq 56(%rsp),%r15 + + call __rsaz_512_reducex +.Lmul_tail: + addq 64(%rsp),%r8 + adcq 72(%rsp),%r9 + adcq 80(%rsp),%r10 + adcq 88(%rsp),%r11 + adcq 96(%rsp),%r12 + adcq 104(%rsp),%r13 + adcq 112(%rsp),%r14 + adcq 120(%rsp),%r15 + sbbq %rcx,%rcx + + call __rsaz_512_subtract + + leaq 128+24+48(%rsp),%rax + movq -48(%rax),%r15 + movq -40(%rax),%r14 + movq -32(%rax),%r13 + movq -24(%rax),%r12 + movq -16(%rax),%rbp + movq -8(%rax),%rbx + leaq (%rax),%rsp +.Lmul_epilogue: + .byte 0xf3,0xc3 +.size rsaz_512_mul,.-rsaz_512_mul +.globl rsaz_512_mul_gather4 +.type rsaz_512_mul_gather4,@function +.align 32 +rsaz_512_mul_gather4: + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + + movl %r9d,%r9d + subq $128+24,%rsp +.Lmul_gather4_body: + movl $524544,%r11d + andl OPENSSL_ia32cap_P+8(%rip),%r11d + cmpl $524544,%r11d + je .Lmulx_gather + movl 64(%rdx,%r9,4),%eax +.byte 102,72,15,110,199 + movl (%rdx,%r9,4),%ebx +.byte 102,72,15,110,201 + movq %r8,128(%rsp) + + shlq $32,%rax + orq %rax,%rbx + movq (%rsi),%rax + movq 8(%rsi),%rcx + leaq 128(%rdx,%r9,4),%rbp + mulq %rbx + movq %rax,(%rsp) + movq %rcx,%rax + movq %rdx,%r8 + + mulq %rbx + movd (%rbp),%xmm4 + addq %rax,%r8 + movq 16(%rsi),%rax + movq %rdx,%r9 + adcq $0,%r9 + + mulq %rbx + movd 64(%rbp),%xmm5 + addq %rax,%r9 + movq 24(%rsi),%rax + movq %rdx,%r10 + adcq $0,%r10 + + mulq %rbx + pslldq $4,%xmm5 + addq %rax,%r10 + movq 32(%rsi),%rax + movq %rdx,%r11 + adcq $0,%r11 + + mulq %rbx + por %xmm5,%xmm4 + addq %rax,%r11 + movq 40(%rsi),%rax + movq %rdx,%r12 + adcq $0,%r12 + + mulq %rbx + addq %rax,%r12 + movq 48(%rsi),%rax + movq %rdx,%r13 + adcq $0,%r13 + + mulq %rbx + leaq 128(%rbp),%rbp + addq %rax,%r13 + movq 56(%rsi),%rax + movq %rdx,%r14 + adcq $0,%r14 + + mulq %rbx +.byte 102,72,15,126,227 + addq %rax,%r14 + movq (%rsi),%rax + movq %rdx,%r15 + adcq $0,%r15 + + leaq 8(%rsp),%rdi + movl $7,%ecx + jmp .Loop_mul_gather + +.align 32 +.Loop_mul_gather: + mulq %rbx + addq %rax,%r8 + movq 8(%rsi),%rax + movq %r8,(%rdi) + movq %rdx,%r8 + adcq $0,%r8 + + mulq %rbx + movd (%rbp),%xmm4 + addq %rax,%r9 + movq 16(%rsi),%rax + adcq $0,%rdx + addq %r9,%r8 + movq %rdx,%r9 + adcq $0,%r9 + + mulq %rbx + movd 64(%rbp),%xmm5 + addq %rax,%r10 + movq 24(%rsi),%rax + adcq $0,%rdx + addq %r10,%r9 + movq %rdx,%r10 + adcq $0,%r10 + + mulq %rbx + pslldq $4,%xmm5 + addq %rax,%r11 + movq 32(%rsi),%rax + adcq $0,%rdx + addq %r11,%r10 + movq %rdx,%r11 + adcq $0,%r11 + + mulq %rbx + por %xmm5,%xmm4 + addq %rax,%r12 + movq 40(%rsi),%rax + adcq $0,%rdx + addq %r12,%r11 + movq %rdx,%r12 + adcq $0,%r12 + + mulq %rbx + addq %rax,%r13 + movq 48(%rsi),%rax + adcq $0,%rdx + addq %r13,%r12 + movq %rdx,%r13 + adcq $0,%r13 + + mulq %rbx + addq %rax,%r14 + movq 56(%rsi),%rax + adcq $0,%rdx + addq %r14,%r13 + movq %rdx,%r14 + adcq $0,%r14 + + mulq %rbx +.byte 102,72,15,126,227 + addq %rax,%r15 + movq (%rsi),%rax + adcq $0,%rdx + addq %r15,%r14 + movq %rdx,%r15 + adcq $0,%r15 + + leaq 128(%rbp),%rbp + leaq 8(%rdi),%rdi + + decl %ecx + jnz .Loop_mul_gather + + movq %r8,(%rdi) + movq %r9,8(%rdi) + movq %r10,16(%rdi) + movq %r11,24(%rdi) + movq %r12,32(%rdi) + movq %r13,40(%rdi) + movq %r14,48(%rdi) + movq %r15,56(%rdi) + +.byte 102,72,15,126,199 +.byte 102,72,15,126,205 + + movq (%rsp),%r8 + movq 8(%rsp),%r9 + movq 16(%rsp),%r10 + movq 24(%rsp),%r11 + movq 32(%rsp),%r12 + movq 40(%rsp),%r13 + movq 48(%rsp),%r14 + movq 56(%rsp),%r15 + + call __rsaz_512_reduce + jmp .Lmul_gather_tail + +.align 32 +.Lmulx_gather: + movl 64(%rdx,%r9,4),%eax +.byte 102,72,15,110,199 + leaq 128(%rdx,%r9,4),%rbp + movl (%rdx,%r9,4),%edx +.byte 102,72,15,110,201 + movq %r8,128(%rsp) + + shlq $32,%rax + orq %rax,%rdx + mulxq (%rsi),%rbx,%r8 + movq %rbx,(%rsp) + xorl %edi,%edi + + mulxq 8(%rsi),%rax,%r9 + movd (%rbp),%xmm4 + + mulxq 16(%rsi),%rbx,%r10 + movd 64(%rbp),%xmm5 + adcxq %rax,%r8 + + mulxq 24(%rsi),%rax,%r11 + pslldq $4,%xmm5 + adcxq %rbx,%r9 + + mulxq 32(%rsi),%rbx,%r12 + por %xmm5,%xmm4 + adcxq %rax,%r10 + + mulxq 40(%rsi),%rax,%r13 + adcxq %rbx,%r11 + + mulxq 48(%rsi),%rbx,%r14 + leaq 128(%rbp),%rbp + adcxq %rax,%r12 + + mulxq 56(%rsi),%rax,%r15 +.byte 102,72,15,126,226 + adcxq %rbx,%r13 + adcxq %rax,%r14 + movq %r8,%rbx + adcxq %rdi,%r15 + + movq $-7,%rcx + jmp .Loop_mulx_gather + +.align 32 +.Loop_mulx_gather: + mulxq (%rsi),%rax,%r8 + adcxq %rax,%rbx + adoxq %r9,%r8 + + mulxq 8(%rsi),%rax,%r9 +.byte 0x66,0x0f,0x6e,0xa5,0x00,0x00,0x00,0x00 + adcxq %rax,%r8 + adoxq %r10,%r9 + + mulxq 16(%rsi),%rax,%r10 + movd 64(%rbp),%xmm5 + leaq 128(%rbp),%rbp + adcxq %rax,%r9 + adoxq %r11,%r10 + +.byte 0xc4,0x62,0xfb,0xf6,0x9e,0x18,0x00,0x00,0x00 + pslldq $4,%xmm5 + por %xmm5,%xmm4 + adcxq %rax,%r10 + adoxq %r12,%r11 + + mulxq 32(%rsi),%rax,%r12 + adcxq %rax,%r11 + adoxq %r13,%r12 + + mulxq 40(%rsi),%rax,%r13 + adcxq %rax,%r12 + adoxq %r14,%r13 + +.byte 0xc4,0x62,0xfb,0xf6,0xb6,0x30,0x00,0x00,0x00 + adcxq %rax,%r13 + adoxq %r15,%r14 + + mulxq 56(%rsi),%rax,%r15 +.byte 102,72,15,126,226 + movq %rbx,64(%rsp,%rcx,8) + adcxq %rax,%r14 + adoxq %rdi,%r15 + movq %r8,%rbx + adcxq %rdi,%r15 + + incq %rcx + jnz .Loop_mulx_gather + + movq %r8,64(%rsp) + movq %r9,64+8(%rsp) + movq %r10,64+16(%rsp) + movq %r11,64+24(%rsp) + movq %r12,64+32(%rsp) + movq %r13,64+40(%rsp) + movq %r14,64+48(%rsp) + movq %r15,64+56(%rsp) + +.byte 102,72,15,126,199 +.byte 102,72,15,126,205 + + movq 128(%rsp),%rdx + movq (%rsp),%r8 + movq 8(%rsp),%r9 + movq 16(%rsp),%r10 + movq 24(%rsp),%r11 + movq 32(%rsp),%r12 + movq 40(%rsp),%r13 + movq 48(%rsp),%r14 + movq 56(%rsp),%r15 + + call __rsaz_512_reducex + +.Lmul_gather_tail: + addq 64(%rsp),%r8 + adcq 72(%rsp),%r9 + adcq 80(%rsp),%r10 + adcq 88(%rsp),%r11 + adcq 96(%rsp),%r12 + adcq 104(%rsp),%r13 + adcq 112(%rsp),%r14 + adcq 120(%rsp),%r15 + sbbq %rcx,%rcx + + call __rsaz_512_subtract + + leaq 128+24+48(%rsp),%rax + movq -48(%rax),%r15 + movq -40(%rax),%r14 + movq -32(%rax),%r13 + movq -24(%rax),%r12 + movq -16(%rax),%rbp + movq -8(%rax),%rbx + leaq (%rax),%rsp +.Lmul_gather4_epilogue: + .byte 0xf3,0xc3 +.size rsaz_512_mul_gather4,.-rsaz_512_mul_gather4 +.globl rsaz_512_mul_scatter4 +.type rsaz_512_mul_scatter4,@function +.align 32 +rsaz_512_mul_scatter4: + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + + movl %r9d,%r9d + subq $128+24,%rsp +.Lmul_scatter4_body: + leaq (%r8,%r9,4),%r8 +.byte 102,72,15,110,199 +.byte 102,72,15,110,202 +.byte 102,73,15,110,208 + movq %rcx,128(%rsp) + + movq %rdi,%rbp + movl $524544,%r11d + andl OPENSSL_ia32cap_P+8(%rip),%r11d + cmpl $524544,%r11d + je .Lmulx_scatter + movq (%rdi),%rbx + call __rsaz_512_mul + +.byte 102,72,15,126,199 +.byte 102,72,15,126,205 + + movq (%rsp),%r8 + movq 8(%rsp),%r9 + movq 16(%rsp),%r10 + movq 24(%rsp),%r11 + movq 32(%rsp),%r12 + movq 40(%rsp),%r13 + movq 48(%rsp),%r14 + movq 56(%rsp),%r15 + + call __rsaz_512_reduce + jmp .Lmul_scatter_tail + +.align 32 +.Lmulx_scatter: + movq (%rdi),%rdx + call __rsaz_512_mulx + +.byte 102,72,15,126,199 +.byte 102,72,15,126,205 + + movq 128(%rsp),%rdx + movq (%rsp),%r8 + movq 8(%rsp),%r9 + movq 16(%rsp),%r10 + movq 24(%rsp),%r11 + movq 32(%rsp),%r12 + movq 40(%rsp),%r13 + movq 48(%rsp),%r14 + movq 56(%rsp),%r15 + + call __rsaz_512_reducex + +.Lmul_scatter_tail: + addq 64(%rsp),%r8 + adcq 72(%rsp),%r9 + adcq 80(%rsp),%r10 + adcq 88(%rsp),%r11 + adcq 96(%rsp),%r12 + adcq 104(%rsp),%r13 + adcq 112(%rsp),%r14 + adcq 120(%rsp),%r15 +.byte 102,72,15,126,214 + sbbq %rcx,%rcx + + call __rsaz_512_subtract + + movl %r8d,0(%rsi) + shrq $32,%r8 + movl %r9d,128(%rsi) + shrq $32,%r9 + movl %r10d,256(%rsi) + shrq $32,%r10 + movl %r11d,384(%rsi) + shrq $32,%r11 + movl %r12d,512(%rsi) + shrq $32,%r12 + movl %r13d,640(%rsi) + shrq $32,%r13 + movl %r14d,768(%rsi) + shrq $32,%r14 + movl %r15d,896(%rsi) + shrq $32,%r15 + movl %r8d,64(%rsi) + movl %r9d,192(%rsi) + movl %r10d,320(%rsi) + movl %r11d,448(%rsi) + movl %r12d,576(%rsi) + movl %r13d,704(%rsi) + movl %r14d,832(%rsi) + movl %r15d,960(%rsi) + + leaq 128+24+48(%rsp),%rax + movq -48(%rax),%r15 + movq -40(%rax),%r14 + movq -32(%rax),%r13 + movq -24(%rax),%r12 + movq -16(%rax),%rbp + movq -8(%rax),%rbx + leaq (%rax),%rsp +.Lmul_scatter4_epilogue: + .byte 0xf3,0xc3 +.size rsaz_512_mul_scatter4,.-rsaz_512_mul_scatter4 +.globl rsaz_512_mul_by_one +.type rsaz_512_mul_by_one,@function +.align 32 +rsaz_512_mul_by_one: + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + + subq $128+24,%rsp +.Lmul_by_one_body: + movl OPENSSL_ia32cap_P+8(%rip),%eax + movq %rdx,%rbp + movq %rcx,128(%rsp) + + movq (%rsi),%r8 + pxor %xmm0,%xmm0 + movq 8(%rsi),%r9 + movq 16(%rsi),%r10 + movq 24(%rsi),%r11 + movq 32(%rsi),%r12 + movq 40(%rsi),%r13 + movq 48(%rsi),%r14 + movq 56(%rsi),%r15 + + movdqa %xmm0,(%rsp) + movdqa %xmm0,16(%rsp) + movdqa %xmm0,32(%rsp) + movdqa %xmm0,48(%rsp) + movdqa %xmm0,64(%rsp) + movdqa %xmm0,80(%rsp) + movdqa %xmm0,96(%rsp) + andl $524544,%eax + cmpl $524544,%eax + je .Lby_one_callx + call __rsaz_512_reduce + jmp .Lby_one_tail +.align 32 +.Lby_one_callx: + movq 128(%rsp),%rdx + call __rsaz_512_reducex +.Lby_one_tail: + movq %r8,(%rdi) + movq %r9,8(%rdi) + movq %r10,16(%rdi) + movq %r11,24(%rdi) + movq %r12,32(%rdi) + movq %r13,40(%rdi) + movq %r14,48(%rdi) + movq %r15,56(%rdi) + + leaq 128+24+48(%rsp),%rax + movq -48(%rax),%r15 + movq -40(%rax),%r14 + movq -32(%rax),%r13 + movq -24(%rax),%r12 + movq -16(%rax),%rbp + movq -8(%rax),%rbx + leaq (%rax),%rsp +.Lmul_by_one_epilogue: + .byte 0xf3,0xc3 +.size rsaz_512_mul_by_one,.-rsaz_512_mul_by_one +.type __rsaz_512_reduce,@function +.align 32 +__rsaz_512_reduce: + movq %r8,%rbx + imulq 128+8(%rsp),%rbx + movq 0(%rbp),%rax + movl $8,%ecx + jmp .Lreduction_loop + +.align 32 +.Lreduction_loop: + mulq %rbx + movq 8(%rbp),%rax + negq %r8 + movq %rdx,%r8 + adcq $0,%r8 + + mulq %rbx + addq %rax,%r9 + movq 16(%rbp),%rax + adcq $0,%rdx + addq %r9,%r8 + movq %rdx,%r9 + adcq $0,%r9 + + mulq %rbx + addq %rax,%r10 + movq 24(%rbp),%rax + adcq $0,%rdx + addq %r10,%r9 + movq %rdx,%r10 + adcq $0,%r10 + + mulq %rbx + addq %rax,%r11 + movq 32(%rbp),%rax + adcq $0,%rdx + addq %r11,%r10 + movq 128+8(%rsp),%rsi + + + adcq $0,%rdx + movq %rdx,%r11 + + mulq %rbx + addq %rax,%r12 + movq 40(%rbp),%rax + adcq $0,%rdx + imulq %r8,%rsi + addq %r12,%r11 + movq %rdx,%r12 + adcq $0,%r12 + + mulq %rbx + addq %rax,%r13 + movq 48(%rbp),%rax + adcq $0,%rdx + addq %r13,%r12 + movq %rdx,%r13 + adcq $0,%r13 + + mulq %rbx + addq %rax,%r14 + movq 56(%rbp),%rax + adcq $0,%rdx + addq %r14,%r13 + movq %rdx,%r14 + adcq $0,%r14 + + mulq %rbx + movq %rsi,%rbx + addq %rax,%r15 + movq 0(%rbp),%rax + adcq $0,%rdx + addq %r15,%r14 + movq %rdx,%r15 + adcq $0,%r15 + + decl %ecx + jne .Lreduction_loop + + .byte 0xf3,0xc3 +.size __rsaz_512_reduce,.-__rsaz_512_reduce +.type __rsaz_512_reducex,@function +.align 32 +__rsaz_512_reducex: + + imulq %r8,%rdx + xorq %rsi,%rsi + movl $8,%ecx + jmp .Lreduction_loopx + +.align 32 +.Lreduction_loopx: + movq %r8,%rbx + mulxq 0(%rbp),%rax,%r8 + adcxq %rbx,%rax + adoxq %r9,%r8 + + mulxq 8(%rbp),%rax,%r9 + adcxq %rax,%r8 + adoxq %r10,%r9 + + mulxq 16(%rbp),%rbx,%r10 + adcxq %rbx,%r9 + adoxq %r11,%r10 + + mulxq 24(%rbp),%rbx,%r11 + adcxq %rbx,%r10 + adoxq %r12,%r11 + +.byte 0xc4,0x62,0xe3,0xf6,0xa5,0x20,0x00,0x00,0x00 + movq %rdx,%rax + movq %r8,%rdx + adcxq %rbx,%r11 + adoxq %r13,%r12 + + mulxq 128+8(%rsp),%rbx,%rdx + movq %rax,%rdx + + mulxq 40(%rbp),%rax,%r13 + adcxq %rax,%r12 + adoxq %r14,%r13 + +.byte 0xc4,0x62,0xfb,0xf6,0xb5,0x30,0x00,0x00,0x00 + adcxq %rax,%r13 + adoxq %r15,%r14 + + mulxq 56(%rbp),%rax,%r15 + movq %rbx,%rdx + adcxq %rax,%r14 + adoxq %rsi,%r15 + adcxq %rsi,%r15 + + decl %ecx + jne .Lreduction_loopx + + .byte 0xf3,0xc3 +.size __rsaz_512_reducex,.-__rsaz_512_reducex +.type __rsaz_512_subtract,@function +.align 32 +__rsaz_512_subtract: + movq %r8,(%rdi) + movq %r9,8(%rdi) + movq %r10,16(%rdi) + movq %r11,24(%rdi) + movq %r12,32(%rdi) + movq %r13,40(%rdi) + movq %r14,48(%rdi) + movq %r15,56(%rdi) + + movq 0(%rbp),%r8 + movq 8(%rbp),%r9 + negq %r8 + notq %r9 + andq %rcx,%r8 + movq 16(%rbp),%r10 + andq %rcx,%r9 + notq %r10 + movq 24(%rbp),%r11 + andq %rcx,%r10 + notq %r11 + movq 32(%rbp),%r12 + andq %rcx,%r11 + notq %r12 + movq 40(%rbp),%r13 + andq %rcx,%r12 + notq %r13 + movq 48(%rbp),%r14 + andq %rcx,%r13 + notq %r14 + movq 56(%rbp),%r15 + andq %rcx,%r14 + notq %r15 + andq %rcx,%r15 + + addq (%rdi),%r8 + adcq 8(%rdi),%r9 + adcq 16(%rdi),%r10 + adcq 24(%rdi),%r11 + adcq 32(%rdi),%r12 + adcq 40(%rdi),%r13 + adcq 48(%rdi),%r14 + adcq 56(%rdi),%r15 + + movq %r8,(%rdi) + movq %r9,8(%rdi) + movq %r10,16(%rdi) + movq %r11,24(%rdi) + movq %r12,32(%rdi) + movq %r13,40(%rdi) + movq %r14,48(%rdi) + movq %r15,56(%rdi) + + .byte 0xf3,0xc3 +.size __rsaz_512_subtract,.-__rsaz_512_subtract +.type __rsaz_512_mul,@function +.align 32 +__rsaz_512_mul: + leaq 8(%rsp),%rdi + + movq (%rsi),%rax + mulq %rbx + movq %rax,(%rdi) + movq 8(%rsi),%rax + movq %rdx,%r8 + + mulq %rbx + addq %rax,%r8 + movq 16(%rsi),%rax + movq %rdx,%r9 + adcq $0,%r9 + + mulq %rbx + addq %rax,%r9 + movq 24(%rsi),%rax + movq %rdx,%r10 + adcq $0,%r10 + + mulq %rbx + addq %rax,%r10 + movq 32(%rsi),%rax + movq %rdx,%r11 + adcq $0,%r11 + + mulq %rbx + addq %rax,%r11 + movq 40(%rsi),%rax + movq %rdx,%r12 + adcq $0,%r12 + + mulq %rbx + addq %rax,%r12 + movq 48(%rsi),%rax + movq %rdx,%r13 + adcq $0,%r13 + + mulq %rbx + addq %rax,%r13 + movq 56(%rsi),%rax + movq %rdx,%r14 + adcq $0,%r14 + + mulq %rbx + addq %rax,%r14 + movq (%rsi),%rax + movq %rdx,%r15 + adcq $0,%r15 + + leaq 8(%rbp),%rbp + leaq 8(%rdi),%rdi + + movl $7,%ecx + jmp .Loop_mul + +.align 32 +.Loop_mul: + movq (%rbp),%rbx + mulq %rbx + addq %rax,%r8 + movq 8(%rsi),%rax + movq %r8,(%rdi) + movq %rdx,%r8 + adcq $0,%r8 + + mulq %rbx + addq %rax,%r9 + movq 16(%rsi),%rax + adcq $0,%rdx + addq %r9,%r8 + movq %rdx,%r9 + adcq $0,%r9 + + mulq %rbx + addq %rax,%r10 + movq 24(%rsi),%rax + adcq $0,%rdx + addq %r10,%r9 + movq %rdx,%r10 + adcq $0,%r10 + + mulq %rbx + addq %rax,%r11 + movq 32(%rsi),%rax + adcq $0,%rdx + addq %r11,%r10 + movq %rdx,%r11 + adcq $0,%r11 + + mulq %rbx + addq %rax,%r12 + movq 40(%rsi),%rax + adcq $0,%rdx + addq %r12,%r11 + movq %rdx,%r12 + adcq $0,%r12 + + mulq %rbx + addq %rax,%r13 + movq 48(%rsi),%rax + adcq $0,%rdx + addq %r13,%r12 + movq %rdx,%r13 + adcq $0,%r13 + + mulq %rbx + addq %rax,%r14 + movq 56(%rsi),%rax + adcq $0,%rdx + addq %r14,%r13 + movq %rdx,%r14 + leaq 8(%rbp),%rbp + adcq $0,%r14 + + mulq %rbx + addq %rax,%r15 + movq (%rsi),%rax + adcq $0,%rdx + addq %r15,%r14 + movq %rdx,%r15 + adcq $0,%r15 + + leaq 8(%rdi),%rdi + + decl %ecx + jnz .Loop_mul + + movq %r8,(%rdi) + movq %r9,8(%rdi) + movq %r10,16(%rdi) + movq %r11,24(%rdi) + movq %r12,32(%rdi) + movq %r13,40(%rdi) + movq %r14,48(%rdi) + movq %r15,56(%rdi) + + .byte 0xf3,0xc3 +.size __rsaz_512_mul,.-__rsaz_512_mul +.type __rsaz_512_mulx,@function +.align 32 +__rsaz_512_mulx: + mulxq (%rsi),%rbx,%r8 + movq $-6,%rcx + + mulxq 8(%rsi),%rax,%r9 + movq %rbx,8(%rsp) + + mulxq 16(%rsi),%rbx,%r10 + adcq %rax,%r8 + + mulxq 24(%rsi),%rax,%r11 + adcq %rbx,%r9 + + mulxq 32(%rsi),%rbx,%r12 + adcq %rax,%r10 + + mulxq 40(%rsi),%rax,%r13 + adcq %rbx,%r11 + + mulxq 48(%rsi),%rbx,%r14 + adcq %rax,%r12 + + mulxq 56(%rsi),%rax,%r15 + movq 8(%rbp),%rdx + adcq %rbx,%r13 + adcq %rax,%r14 + adcq $0,%r15 + + xorq %rdi,%rdi + jmp .Loop_mulx + +.align 32 +.Loop_mulx: + movq %r8,%rbx + mulxq (%rsi),%rax,%r8 + adcxq %rax,%rbx + adoxq %r9,%r8 + + mulxq 8(%rsi),%rax,%r9 + adcxq %rax,%r8 + adoxq %r10,%r9 + + mulxq 16(%rsi),%rax,%r10 + adcxq %rax,%r9 + adoxq %r11,%r10 + + mulxq 24(%rsi),%rax,%r11 + adcxq %rax,%r10 + adoxq %r12,%r11 + +.byte 0x3e,0xc4,0x62,0xfb,0xf6,0xa6,0x20,0x00,0x00,0x00 + adcxq %rax,%r11 + adoxq %r13,%r12 + + mulxq 40(%rsi),%rax,%r13 + adcxq %rax,%r12 + adoxq %r14,%r13 + + mulxq 48(%rsi),%rax,%r14 + adcxq %rax,%r13 + adoxq %r15,%r14 + + mulxq 56(%rsi),%rax,%r15 + movq 64(%rbp,%rcx,8),%rdx + movq %rbx,8+64-8(%rsp,%rcx,8) + adcxq %rax,%r14 + adoxq %rdi,%r15 + adcxq %rdi,%r15 + + incq %rcx + jnz .Loop_mulx + + movq %r8,%rbx + mulxq (%rsi),%rax,%r8 + adcxq %rax,%rbx + adoxq %r9,%r8 + +.byte 0xc4,0x62,0xfb,0xf6,0x8e,0x08,0x00,0x00,0x00 + adcxq %rax,%r8 + adoxq %r10,%r9 + +.byte 0xc4,0x62,0xfb,0xf6,0x96,0x10,0x00,0x00,0x00 + adcxq %rax,%r9 + adoxq %r11,%r10 + + mulxq 24(%rsi),%rax,%r11 + adcxq %rax,%r10 + adoxq %r12,%r11 + + mulxq 32(%rsi),%rax,%r12 + adcxq %rax,%r11 + adoxq %r13,%r12 + + mulxq 40(%rsi),%rax,%r13 + adcxq %rax,%r12 + adoxq %r14,%r13 + +.byte 0xc4,0x62,0xfb,0xf6,0xb6,0x30,0x00,0x00,0x00 + adcxq %rax,%r13 + adoxq %r15,%r14 + +.byte 0xc4,0x62,0xfb,0xf6,0xbe,0x38,0x00,0x00,0x00 + adcxq %rax,%r14 + adoxq %rdi,%r15 + adcxq %rdi,%r15 + + movq %rbx,8+64-8(%rsp) + movq %r8,8+64(%rsp) + movq %r9,8+64+8(%rsp) + movq %r10,8+64+16(%rsp) + movq %r11,8+64+24(%rsp) + movq %r12,8+64+32(%rsp) + movq %r13,8+64+40(%rsp) + movq %r14,8+64+48(%rsp) + movq %r15,8+64+56(%rsp) + + .byte 0xf3,0xc3 +.size __rsaz_512_mulx,.-__rsaz_512_mulx +.globl rsaz_512_scatter4 +.type rsaz_512_scatter4,@function +.align 16 +rsaz_512_scatter4: + leaq (%rdi,%rdx,4),%rdi + movl $8,%r9d + jmp .Loop_scatter +.align 16 +.Loop_scatter: + movq (%rsi),%rax + leaq 8(%rsi),%rsi + movl %eax,(%rdi) + shrq $32,%rax + movl %eax,64(%rdi) + leaq 128(%rdi),%rdi + decl %r9d + jnz .Loop_scatter + .byte 0xf3,0xc3 +.size rsaz_512_scatter4,.-rsaz_512_scatter4 + +.globl rsaz_512_gather4 +.type rsaz_512_gather4,@function +.align 16 +rsaz_512_gather4: + leaq (%rsi,%rdx,4),%rsi + movl $8,%r9d + jmp .Loop_gather +.align 16 +.Loop_gather: + movl (%rsi),%eax + movl 64(%rsi),%r8d + leaq 128(%rsi),%rsi + shlq $32,%r8 + orq %r8,%rax + movq %rax,(%rdi) + leaq 8(%rdi),%rdi + decl %r9d + jnz .Loop_gather + .byte 0xf3,0xc3 +.size rsaz_512_gather4,.-rsaz_512_gather4 diff --git a/deps/openssl/asm/x64-elf-gas/bn/x86_64-gf2m.s b/deps/openssl/asm/x64-elf-gas/bn/x86_64-gf2m.s index 4f84013f2b61e2..eed057ad6a1a8a 100644 --- a/deps/openssl/asm/x64-elf-gas/bn/x86_64-gf2m.s +++ b/deps/openssl/asm/x64-elf-gas/bn/x86_64-gf2m.s @@ -1,6 +1,5 @@ .text - .type _mul_1x1,@function .align 16 _mul_1x1: @@ -247,14 +246,12 @@ bn_GF2m_mul_2x2: movq %rsi,%rax movq %rcx,%rbp call _mul_1x1 - movq %rax,16(%rsp) movq %rdx,24(%rsp) movq 48(%rsp),%rax movq 64(%rsp),%rbp call _mul_1x1 - movq %rax,0(%rsp) movq %rdx,8(%rsp) @@ -263,7 +260,6 @@ bn_GF2m_mul_2x2: xorq 48(%rsp),%rax xorq 64(%rsp),%rbp call _mul_1x1 - movq 0(%rsp),%rbx movq 8(%rsp),%rcx movq 16(%rsp),%rdi diff --git a/deps/openssl/asm/x64-elf-gas/bn/x86_64-mont.s b/deps/openssl/asm/x64-elf-gas/bn/x86_64-mont.s index ea12bd408cb594..45d19cd8b5fc2e 100644 --- a/deps/openssl/asm/x64-elf-gas/bn/x86_64-mont.s +++ b/deps/openssl/asm/x64-elf-gas/bn/x86_64-mont.s @@ -1,6 +1,7 @@ .text + .globl bn_mul_mont .type bn_mul_mont,@function .align 16 @@ -9,9 +10,12 @@ bn_mul_mont: jnz .Lmul_enter cmpl $8,%r9d jb .Lmul_enter + movl OPENSSL_ia32cap_P+8(%rip),%r11d cmpq %rsi,%rdx jne .Lmul4x_enter - jmp .Lsqr4x_enter + testl $7,%r9d + jz .Lsqr8x_enter + jmp .Lmul4x_enter .align 16 .Lmul_enter: @@ -164,7 +168,7 @@ bn_mul_mont: leaq 1(%r14),%r14 cmpq %r9,%r14 - jl .Louter + jb .Louter xorq %r14,%r14 movq (%rsp),%rax @@ -212,6 +216,9 @@ bn_mul_mont: .align 16 bn_mul4x_mont: .Lmul4x_enter: + andl $524544,%r11d + cmpl $524544,%r11d + je .Lmulx4x_enter pushq %rbx pushq %rbp pushq %r12 @@ -330,7 +337,7 @@ bn_mul4x_mont: movq %rdi,-32(%rsp,%r15,8) movq %rdx,%r13 cmpq %r9,%r15 - jl .L1st4x + jb .L1st4x mulq %rbx addq %rax,%r10 @@ -478,7 +485,7 @@ bn_mul4x_mont: movq %rdi,-32(%rsp,%r15,8) movq %rdx,%r13 cmpq %r9,%r15 - jl .Linner4x + jb .Linner4x mulq %rbx addq %rax,%r10 @@ -524,7 +531,7 @@ bn_mul4x_mont: movq %rdi,(%rsp,%r15,8) cmpq %r9,%r14 - jl .Louter4x + jb .Louter4x movq 16(%rsp,%r9,8),%rdi movq 0(%rsp),%rax pxor %xmm0,%xmm0 @@ -606,10 +613,14 @@ bn_mul4x_mont: .Lmul4x_epilogue: .byte 0xf3,0xc3 .size bn_mul4x_mont,.-bn_mul4x_mont -.type bn_sqr4x_mont,@function -.align 16 -bn_sqr4x_mont: -.Lsqr4x_enter: + + + +.type bn_sqr8x_mont,@function +.align 32 +bn_sqr8x_mont: +.Lsqr8x_enter: + movq %rsp,%rax pushq %rbx pushq %rbp pushq %r12 @@ -617,759 +628,428 @@ bn_sqr4x_mont: pushq %r14 pushq %r15 + movl %r9d,%r10d shll $3,%r9d - xorq %r10,%r10 - movq %rsp,%r11 - subq %r9,%r10 - movq (%r8),%r8 - leaq -72(%rsp,%r10,2),%rsp - andq $-1024,%rsp - - - - - - - - - - - - movq %rdi,32(%rsp) - movq %rcx,40(%rsp) - movq %r8,48(%rsp) - movq %r11,56(%rsp) -.Lsqr4x_body: - - - - + shlq $3+2,%r10 + negq %r9 - leaq 32(%r10),%rbp - leaq (%rsi,%r9,1),%rsi - movq %r9,%rcx - movq -32(%rsi,%rbp,1),%r14 - leaq 64(%rsp,%r9,2),%rdi - movq -24(%rsi,%rbp,1),%rax - leaq -32(%rdi,%rbp,1),%rdi - movq -16(%rsi,%rbp,1),%rbx - movq %rax,%r15 - - mulq %r14 - movq %rax,%r10 - movq %rbx,%rax - movq %rdx,%r11 - movq %r10,-24(%rdi,%rbp,1) - - xorq %r10,%r10 - mulq %r14 - addq %rax,%r11 - movq %rbx,%rax - adcq %rdx,%r10 - movq %r11,-16(%rdi,%rbp,1) - - leaq -16(%rbp),%rcx - - - movq 8(%rsi,%rcx,1),%rbx - mulq %r15 - movq %rax,%r12 - movq %rbx,%rax - movq %rdx,%r13 - - xorq %r11,%r11 - addq %r12,%r10 - leaq 16(%rcx),%rcx - adcq $0,%r11 - mulq %r14 - addq %rax,%r10 - movq %rbx,%rax - adcq %rdx,%r11 - movq %r10,-8(%rdi,%rcx,1) - jmp .Lsqr4x_1st - -.align 16 -.Lsqr4x_1st: - movq (%rsi,%rcx,1),%rbx - xorq %r12,%r12 - mulq %r15 - addq %rax,%r13 - movq %rbx,%rax - adcq %rdx,%r12 - - xorq %r10,%r10 - addq %r13,%r11 - adcq $0,%r10 - mulq %r14 - addq %rax,%r11 - movq %rbx,%rax - adcq %rdx,%r10 - movq %r11,(%rdi,%rcx,1) - - - movq 8(%rsi,%rcx,1),%rbx - xorq %r13,%r13 - mulq %r15 - addq %rax,%r12 - movq %rbx,%rax - adcq %rdx,%r13 - - xorq %r11,%r11 - addq %r12,%r10 - adcq $0,%r11 - mulq %r14 - addq %rax,%r10 - movq %rbx,%rax - adcq %rdx,%r11 - movq %r10,8(%rdi,%rcx,1) - - movq 16(%rsi,%rcx,1),%rbx - xorq %r12,%r12 - mulq %r15 - addq %rax,%r13 - movq %rbx,%rax - adcq %rdx,%r12 - - xorq %r10,%r10 - addq %r13,%r11 - adcq $0,%r10 - mulq %r14 - addq %rax,%r11 - movq %rbx,%rax - adcq %rdx,%r10 - movq %r11,16(%rdi,%rcx,1) - - - movq 24(%rsi,%rcx,1),%rbx - xorq %r13,%r13 - mulq %r15 - addq %rax,%r12 - movq %rbx,%rax - adcq %rdx,%r13 - - xorq %r11,%r11 - addq %r12,%r10 + leaq -64(%rsp,%r9,4),%r11 + movq (%r8),%r8 + subq %rsi,%r11 + andq $4095,%r11 + cmpq %r11,%r10 + jb .Lsqr8x_sp_alt + subq %r11,%rsp + leaq -64(%rsp,%r9,4),%rsp + jmp .Lsqr8x_sp_done + +.align 32 +.Lsqr8x_sp_alt: + leaq 4096-64(,%r9,4),%r10 + leaq -64(%rsp,%r9,4),%rsp + subq %r10,%r11 + movq $0,%r10 + cmovcq %r10,%r11 + subq %r11,%rsp +.Lsqr8x_sp_done: + andq $-64,%rsp + movq %r9,%r10 + negq %r9 + + leaq 64(%rsp,%r9,2),%r11 + movq %r8,32(%rsp) + movq %rax,40(%rsp) +.Lsqr8x_body: + + movq %r9,%rbp +.byte 102,73,15,110,211 + shrq $3+2,%rbp + movl OPENSSL_ia32cap_P+8(%rip),%eax + jmp .Lsqr8x_copy_n + +.align 32 +.Lsqr8x_copy_n: + movq 0(%rcx),%xmm0 + movq 8(%rcx),%xmm1 + movq 16(%rcx),%xmm3 + movq 24(%rcx),%xmm4 leaq 32(%rcx),%rcx - adcq $0,%r11 - mulq %r14 - addq %rax,%r10 - movq %rbx,%rax - adcq %rdx,%r11 - movq %r10,-8(%rdi,%rcx,1) + movdqa %xmm0,0(%r11) + movdqa %xmm1,16(%r11) + movdqa %xmm3,32(%r11) + movdqa %xmm4,48(%r11) + leaq 64(%r11),%r11 + decq %rbp + jnz .Lsqr8x_copy_n - cmpq $0,%rcx - jne .Lsqr4x_1st - - xorq %r12,%r12 - addq %r11,%r13 - adcq $0,%r12 - mulq %r15 - addq %rax,%r13 - adcq %rdx,%r12 - - movq %r13,(%rdi) - leaq 16(%rbp),%rbp - movq %r12,8(%rdi) - jmp .Lsqr4x_outer - -.align 16 -.Lsqr4x_outer: - movq -32(%rsi,%rbp,1),%r14 - leaq 64(%rsp,%r9,2),%rdi - movq -24(%rsi,%rbp,1),%rax - leaq -32(%rdi,%rbp,1),%rdi - movq -16(%rsi,%rbp,1),%rbx - movq %rax,%r15 - - movq -24(%rdi,%rbp,1),%r10 - xorq %r11,%r11 - mulq %r14 - addq %rax,%r10 - movq %rbx,%rax - adcq %rdx,%r11 - movq %r10,-24(%rdi,%rbp,1) - - xorq %r10,%r10 - addq -16(%rdi,%rbp,1),%r11 - adcq $0,%r10 - mulq %r14 - addq %rax,%r11 - movq %rbx,%rax - adcq %rdx,%r10 - movq %r11,-16(%rdi,%rbp,1) - - leaq -16(%rbp),%rcx - xorq %r12,%r12 + pxor %xmm0,%xmm0 +.byte 102,72,15,110,207 +.byte 102,73,15,110,218 + andl $524544,%eax + cmpl $524544,%eax + jne .Lsqr8x_nox + call bn_sqrx8x_internal - movq 8(%rsi,%rcx,1),%rbx - xorq %r13,%r13 - addq 8(%rdi,%rcx,1),%r12 - adcq $0,%r13 - mulq %r15 - addq %rax,%r12 - movq %rbx,%rax - adcq %rdx,%r13 + pxor %xmm0,%xmm0 + leaq 48(%rsp),%rax + leaq 64(%rsp,%r9,2),%rdx + shrq $3+2,%r9 + movq 40(%rsp),%rsi + jmp .Lsqr8x_zero - xorq %r11,%r11 - addq %r12,%r10 - adcq $0,%r11 - mulq %r14 - addq %rax,%r10 - movq %rbx,%rax - adcq %rdx,%r11 - movq %r10,8(%rdi,%rcx,1) +.align 32 +.Lsqr8x_nox: + call bn_sqr8x_internal - leaq 16(%rcx),%rcx - jmp .Lsqr4x_inner + pxor %xmm0,%xmm0 + leaq 48(%rsp),%rax + leaq 64(%rsp,%r9,2),%rdx + shrq $3+2,%r9 + movq 40(%rsp),%rsi + jmp .Lsqr8x_zero + +.align 32 +.Lsqr8x_zero: + movdqa %xmm0,0(%rax) + movdqa %xmm0,16(%rax) + movdqa %xmm0,32(%rax) + movdqa %xmm0,48(%rax) + leaq 64(%rax),%rax + movdqa %xmm0,0(%rdx) + movdqa %xmm0,16(%rdx) + movdqa %xmm0,32(%rdx) + movdqa %xmm0,48(%rdx) + leaq 64(%rdx),%rdx + decq %r9 + jnz .Lsqr8x_zero -.align 16 -.Lsqr4x_inner: - movq (%rsi,%rcx,1),%rbx - xorq %r12,%r12 - addq (%rdi,%rcx,1),%r13 - adcq $0,%r12 - mulq %r15 - addq %rax,%r13 - movq %rbx,%rax - adcq %rdx,%r12 + movq $1,%rax + movq -48(%rsi),%r15 + movq -40(%rsi),%r14 + movq -32(%rsi),%r13 + movq -24(%rsi),%r12 + movq -16(%rsi),%rbp + movq -8(%rsi),%rbx + leaq (%rsi),%rsp +.Lsqr8x_epilogue: + .byte 0xf3,0xc3 +.size bn_sqr8x_mont,.-bn_sqr8x_mont +.type bn_mulx4x_mont,@function +.align 32 +bn_mulx4x_mont: +.Lmulx4x_enter: + movq %rsp,%rax + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + shll $3,%r9d +.byte 0x67 xorq %r10,%r10 - addq %r13,%r11 - adcq $0,%r10 - mulq %r14 - addq %rax,%r11 - movq %rbx,%rax - adcq %rdx,%r10 - movq %r11,(%rdi,%rcx,1) - - movq 8(%rsi,%rcx,1),%rbx - xorq %r13,%r13 - addq 8(%rdi,%rcx,1),%r12 - adcq $0,%r13 - mulq %r15 - addq %rax,%r12 - movq %rbx,%rax - adcq %rdx,%r13 - - xorq %r11,%r11 - addq %r12,%r10 - leaq 16(%rcx),%rcx - adcq $0,%r11 - mulq %r14 - addq %rax,%r10 - movq %rbx,%rax - adcq %rdx,%r11 - movq %r10,-8(%rdi,%rcx,1) - - cmpq $0,%rcx - jne .Lsqr4x_inner - - xorq %r12,%r12 - addq %r11,%r13 - adcq $0,%r12 - mulq %r15 - addq %rax,%r13 - adcq %rdx,%r12 + subq %r9,%r10 + movq (%r8),%r8 + leaq -72(%rsp,%r10,1),%rsp + leaq (%rdx,%r9,1),%r10 + andq $-128,%rsp - movq %r13,(%rdi) - movq %r12,8(%rdi) - addq $16,%rbp - jnz .Lsqr4x_outer - movq -32(%rsi),%r14 - leaq 64(%rsp,%r9,2),%rdi - movq -24(%rsi),%rax - leaq -32(%rdi,%rbp,1),%rdi - movq -16(%rsi),%rbx - movq %rax,%r15 - xorq %r11,%r11 - mulq %r14 - addq %rax,%r10 - movq %rbx,%rax - adcq %rdx,%r11 - movq %r10,-24(%rdi) - xorq %r10,%r10 - addq %r13,%r11 - adcq $0,%r10 - mulq %r14 - addq %rax,%r11 - movq %rbx,%rax - adcq %rdx,%r10 - movq %r11,-16(%rdi) - movq -8(%rsi),%rbx - mulq %r15 - addq %rax,%r12 - movq %rbx,%rax - adcq $0,%rdx - - xorq %r11,%r11 - addq %r12,%r10 - movq %rdx,%r13 - adcq $0,%r11 - mulq %r14 - addq %rax,%r10 - movq %rbx,%rax - adcq %rdx,%r11 - movq %r10,-8(%rdi) - - xorq %r12,%r12 - addq %r11,%r13 - adcq $0,%r12 - mulq %r15 - addq %rax,%r13 - movq -16(%rsi),%rax - adcq %rdx,%r12 - movq %r13,(%rdi) - movq %r12,8(%rdi) - mulq %rbx - addq $16,%rbp - xorq %r14,%r14 - subq %r9,%rbp - xorq %r15,%r15 - addq %r12,%rax - adcq $0,%rdx - movq %rax,8(%rdi) - movq %rdx,16(%rdi) - movq %r15,24(%rdi) - movq -16(%rsi,%rbp,1),%rax - leaq 64(%rsp,%r9,2),%rdi - xorq %r10,%r10 - movq -24(%rdi,%rbp,2),%r11 - - leaq (%r14,%r10,2),%r12 - shrq $63,%r10 - leaq (%rcx,%r11,2),%r13 - shrq $63,%r11 - orq %r10,%r13 - movq -16(%rdi,%rbp,2),%r10 - movq %r11,%r14 - mulq %rax - negq %r15 - movq -8(%rdi,%rbp,2),%r11 - adcq %rax,%r12 - movq -8(%rsi,%rbp,1),%rax - movq %r12,-32(%rdi,%rbp,2) - adcq %rdx,%r13 - - leaq (%r14,%r10,2),%rbx - movq %r13,-24(%rdi,%rbp,2) - sbbq %r15,%r15 - shrq $63,%r10 - leaq (%rcx,%r11,2),%r8 - shrq $63,%r11 - orq %r10,%r8 - movq 0(%rdi,%rbp,2),%r10 - movq %r11,%r14 - mulq %rax - negq %r15 - movq 8(%rdi,%rbp,2),%r11 - adcq %rax,%rbx - movq 0(%rsi,%rbp,1),%rax - movq %rbx,-16(%rdi,%rbp,2) - adcq %rdx,%r8 - leaq 16(%rbp),%rbp - movq %r8,-40(%rdi,%rbp,2) - sbbq %r15,%r15 - jmp .Lsqr4x_shift_n_add -.align 16 -.Lsqr4x_shift_n_add: - leaq (%r14,%r10,2),%r12 - shrq $63,%r10 - leaq (%rcx,%r11,2),%r13 - shrq $63,%r11 - orq %r10,%r13 - movq -16(%rdi,%rbp,2),%r10 - movq %r11,%r14 - mulq %rax - negq %r15 - movq -8(%rdi,%rbp,2),%r11 - adcq %rax,%r12 - movq -8(%rsi,%rbp,1),%rax - movq %r12,-32(%rdi,%rbp,2) - adcq %rdx,%r13 - - leaq (%r14,%r10,2),%rbx - movq %r13,-24(%rdi,%rbp,2) - sbbq %r15,%r15 - shrq $63,%r10 - leaq (%rcx,%r11,2),%r8 - shrq $63,%r11 - orq %r10,%r8 - movq 0(%rdi,%rbp,2),%r10 - movq %r11,%r14 - mulq %rax - negq %r15 - movq 8(%rdi,%rbp,2),%r11 - adcq %rax,%rbx - movq 0(%rsi,%rbp,1),%rax - movq %rbx,-16(%rdi,%rbp,2) - adcq %rdx,%r8 - - leaq (%r14,%r10,2),%r12 - movq %r8,-8(%rdi,%rbp,2) - sbbq %r15,%r15 - shrq $63,%r10 - leaq (%rcx,%r11,2),%r13 - shrq $63,%r11 - orq %r10,%r13 - movq 16(%rdi,%rbp,2),%r10 - movq %r11,%r14 - mulq %rax - negq %r15 - movq 24(%rdi,%rbp,2),%r11 - adcq %rax,%r12 - movq 8(%rsi,%rbp,1),%rax - movq %r12,0(%rdi,%rbp,2) - adcq %rdx,%r13 - - leaq (%r14,%r10,2),%rbx - movq %r13,8(%rdi,%rbp,2) - sbbq %r15,%r15 - shrq $63,%r10 - leaq (%rcx,%r11,2),%r8 - shrq $63,%r11 - orq %r10,%r8 - movq 32(%rdi,%rbp,2),%r10 - movq %r11,%r14 - mulq %rax - negq %r15 - movq 40(%rdi,%rbp,2),%r11 - adcq %rax,%rbx - movq 16(%rsi,%rbp,1),%rax - movq %rbx,16(%rdi,%rbp,2) - adcq %rdx,%r8 - movq %r8,24(%rdi,%rbp,2) - sbbq %r15,%r15 - addq $32,%rbp - jnz .Lsqr4x_shift_n_add - - leaq (%r14,%r10,2),%r12 - shrq $63,%r10 - leaq (%rcx,%r11,2),%r13 - shrq $63,%r11 - orq %r10,%r13 - movq -16(%rdi),%r10 - movq %r11,%r14 - mulq %rax - negq %r15 - movq -8(%rdi),%r11 - adcq %rax,%r12 - movq -8(%rsi),%rax - movq %r12,-32(%rdi) - adcq %rdx,%r13 - - leaq (%r14,%r10,2),%rbx - movq %r13,-24(%rdi) - sbbq %r15,%r15 - shrq $63,%r10 - leaq (%rcx,%r11,2),%r8 - shrq $63,%r11 - orq %r10,%r8 - mulq %rax - negq %r15 - adcq %rax,%rbx - adcq %rdx,%r8 - movq %rbx,-16(%rdi) - movq %r8,-8(%rdi) - movq 40(%rsp),%rsi - movq 48(%rsp),%r8 - xorq %rcx,%rcx movq %r9,0(%rsp) - subq %r9,%rcx - movq 64(%rsp),%r10 - movq %r8,%r14 - leaq 64(%rsp,%r9,2),%rax - leaq 64(%rsp,%r9,1),%rdi - movq %rax,8(%rsp) - leaq (%rsi,%r9,1),%rsi - xorq %rbp,%rbp - - movq 0(%rsi,%rcx,1),%rax - movq 8(%rsi,%rcx,1),%r9 - imulq %r10,%r14 - movq %rax,%rbx - jmp .Lsqr4x_mont_outer - -.align 16 -.Lsqr4x_mont_outer: - xorq %r11,%r11 - mulq %r14 - addq %rax,%r10 - movq %r9,%rax - adcq %rdx,%r11 - movq %r8,%r15 - - xorq %r10,%r10 - addq 8(%rdi,%rcx,1),%r11 - adcq $0,%r10 - mulq %r14 + shrq $5,%r9 + movq %r10,16(%rsp) + subq $1,%r9 + movq %r8,24(%rsp) + movq %rdi,32(%rsp) + movq %rax,40(%rsp) + movq %r9,48(%rsp) + jmp .Lmulx4x_body + +.align 32 +.Lmulx4x_body: + leaq 8(%rdx),%rdi + movq (%rdx),%rdx + leaq 64+32(%rsp),%rbx + movq %rdx,%r9 + + mulxq 0(%rsi),%r8,%rax + mulxq 8(%rsi),%r11,%r14 addq %rax,%r11 - movq %rbx,%rax - adcq %rdx,%r10 - - imulq %r11,%r15 - - movq 16(%rsi,%rcx,1),%rbx - xorq %r13,%r13 - addq %r11,%r12 + movq %rdi,8(%rsp) + mulxq 16(%rsi),%r12,%r13 + adcq %r14,%r12 adcq $0,%r13 - mulq %r15 - addq %rax,%r12 - movq %rbx,%rax - adcq %rdx,%r13 - movq %r12,8(%rdi,%rcx,1) - - xorq %r11,%r11 - addq 16(%rdi,%rcx,1),%r10 - adcq $0,%r11 - mulq %r14 - addq %rax,%r10 - movq %r9,%rax - adcq %rdx,%r11 - movq 24(%rsi,%rcx,1),%r9 - xorq %r12,%r12 - addq %r10,%r13 - adcq $0,%r12 - mulq %r15 - addq %rax,%r13 - movq %r9,%rax - adcq %rdx,%r12 - movq %r13,16(%rdi,%rcx,1) + movq %r8,%rdi + imulq 24(%rsp),%r8 + xorq %rbp,%rbp - xorq %r10,%r10 - addq 24(%rdi,%rcx,1),%r11 + mulxq 24(%rsi),%rax,%r14 + movq %r8,%rdx + leaq 32(%rsi),%rsi + adcxq %rax,%r13 + adcxq %rbp,%r14 + + mulxq 0(%rcx),%rax,%r10 + adcxq %rax,%rdi + adoxq %r11,%r10 + mulxq 8(%rcx),%rax,%r11 + adcxq %rax,%r10 + adoxq %r12,%r11 +.byte 0xc4,0x62,0xfb,0xf6,0xa1,0x10,0x00,0x00,0x00 + movq 48(%rsp),%rdi + movq %r10,-32(%rbx) + adcxq %rax,%r11 + adoxq %r13,%r12 + mulxq 24(%rcx),%rax,%r15 + movq %r9,%rdx + movq %r11,-24(%rbx) + adcxq %rax,%r12 + adoxq %rbp,%r15 leaq 32(%rcx),%rcx - adcq $0,%r10 - mulq %r14 - addq %rax,%r11 - movq %rbx,%rax - adcq %rdx,%r10 - jmp .Lsqr4x_mont_inner - -.align 16 -.Lsqr4x_mont_inner: - movq (%rsi,%rcx,1),%rbx - xorq %r13,%r13 - addq %r11,%r12 - adcq $0,%r13 - mulq %r15 - addq %rax,%r12 - movq %rbx,%rax - adcq %rdx,%r13 - movq %r12,-8(%rdi,%rcx,1) - - xorq %r11,%r11 - addq (%rdi,%rcx,1),%r10 - adcq $0,%r11 - mulq %r14 - addq %rax,%r10 - movq %r9,%rax - adcq %rdx,%r11 - - movq 8(%rsi,%rcx,1),%r9 - xorq %r12,%r12 - addq %r10,%r13 - adcq $0,%r12 - mulq %r15 - addq %rax,%r13 - movq %r9,%rax - adcq %rdx,%r12 - movq %r13,(%rdi,%rcx,1) - - xorq %r10,%r10 - addq 8(%rdi,%rcx,1),%r11 - adcq $0,%r10 - mulq %r14 - addq %rax,%r11 - movq %rbx,%rax - adcq %rdx,%r10 - + movq %r12,-16(%rbx) + + jmp .Lmulx4x_1st + +.align 32 +.Lmulx4x_1st: + adcxq %rbp,%r15 + mulxq 0(%rsi),%r10,%rax + adcxq %r14,%r10 + mulxq 8(%rsi),%r11,%r14 + adcxq %rax,%r11 + mulxq 16(%rsi),%r12,%rax + adcxq %r14,%r12 + mulxq 24(%rsi),%r13,%r14 +.byte 0x67,0x67 + movq %r8,%rdx + adcxq %rax,%r13 + adcxq %rbp,%r14 + leaq 32(%rsi),%rsi + leaq 32(%rbx),%rbx + + adoxq %r15,%r10 + mulxq 0(%rcx),%rax,%r15 + adcxq %rax,%r10 + adoxq %r15,%r11 + mulxq 8(%rcx),%rax,%r15 + adcxq %rax,%r11 + adoxq %r15,%r12 + mulxq 16(%rcx),%rax,%r15 + movq %r10,-40(%rbx) + adcxq %rax,%r12 + movq %r11,-32(%rbx) + adoxq %r15,%r13 + mulxq 24(%rcx),%rax,%r15 + movq %r9,%rdx + movq %r12,-24(%rbx) + adcxq %rax,%r13 + adoxq %rbp,%r15 + leaq 32(%rcx),%rcx + movq %r13,-16(%rbx) - movq 16(%rsi,%rcx,1),%rbx - xorq %r13,%r13 - addq %r11,%r12 - adcq $0,%r13 - mulq %r15 - addq %rax,%r12 - movq %rbx,%rax - adcq %rdx,%r13 - movq %r12,8(%rdi,%rcx,1) - - xorq %r11,%r11 - addq 16(%rdi,%rcx,1),%r10 - adcq $0,%r11 - mulq %r14 - addq %rax,%r10 - movq %r9,%rax - adcq %rdx,%r11 + decq %rdi + jnz .Lmulx4x_1st - movq 24(%rsi,%rcx,1),%r9 - xorq %r12,%r12 - addq %r10,%r13 - adcq $0,%r12 - mulq %r15 - addq %rax,%r13 - movq %r9,%rax - adcq %rdx,%r12 - movq %r13,16(%rdi,%rcx,1) - - xorq %r10,%r10 - addq 24(%rdi,%rcx,1),%r11 + movq 0(%rsp),%rax + movq 8(%rsp),%rdi + adcq %rbp,%r15 + addq %r15,%r14 + sbbq %r15,%r15 + movq %r14,-8(%rbx) + jmp .Lmulx4x_outer + +.align 32 +.Lmulx4x_outer: + movq (%rdi),%rdx + leaq 8(%rdi),%rdi + subq %rax,%rsi + movq %r15,(%rbx) + leaq 64+32(%rsp),%rbx + subq %rax,%rcx + + mulxq 0(%rsi),%r8,%r11 + xorl %ebp,%ebp + movq %rdx,%r9 + mulxq 8(%rsi),%r14,%r12 + adoxq -32(%rbx),%r8 + adcxq %r14,%r11 + mulxq 16(%rsi),%r15,%r13 + adoxq -24(%rbx),%r11 + adcxq %r15,%r12 + adoxq %rbp,%r12 + adcxq %rbp,%r13 + + movq %rdi,8(%rsp) +.byte 0x67 + movq %r8,%r15 + imulq 24(%rsp),%r8 + xorl %ebp,%ebp + + mulxq 24(%rsi),%rax,%r14 + movq %r8,%rdx + adoxq -16(%rbx),%r12 + adcxq %rax,%r13 + adoxq -8(%rbx),%r13 + adcxq %rbp,%r14 + leaq 32(%rsi),%rsi + adoxq %rbp,%r14 + + mulxq 0(%rcx),%rax,%r10 + adcxq %rax,%r15 + adoxq %r11,%r10 + mulxq 8(%rcx),%rax,%r11 + adcxq %rax,%r10 + adoxq %r12,%r11 + mulxq 16(%rcx),%rax,%r12 + movq %r10,-32(%rbx) + adcxq %rax,%r11 + adoxq %r13,%r12 + mulxq 24(%rcx),%rax,%r15 + movq %r9,%rdx + movq %r11,-24(%rbx) leaq 32(%rcx),%rcx - adcq $0,%r10 - mulq %r14 - addq %rax,%r11 - movq %rbx,%rax - adcq %rdx,%r10 - cmpq $0,%rcx - jne .Lsqr4x_mont_inner + adcxq %rax,%r12 + adoxq %rbp,%r15 + movq 48(%rsp),%rdi + movq %r12,-16(%rbx) + + jmp .Lmulx4x_inner + +.align 32 +.Lmulx4x_inner: + mulxq 0(%rsi),%r10,%rax + adcxq %rbp,%r15 + adoxq %r14,%r10 + mulxq 8(%rsi),%r11,%r14 + adcxq 0(%rbx),%r10 + adoxq %rax,%r11 + mulxq 16(%rsi),%r12,%rax + adcxq 8(%rbx),%r11 + adoxq %r14,%r12 + mulxq 24(%rsi),%r13,%r14 + movq %r8,%rdx + adcxq 16(%rbx),%r12 + adoxq %rax,%r13 + adcxq 24(%rbx),%r13 + adoxq %rbp,%r14 + leaq 32(%rsi),%rsi + leaq 32(%rbx),%rbx + adcxq %rbp,%r14 + + adoxq %r15,%r10 + mulxq 0(%rcx),%rax,%r15 + adcxq %rax,%r10 + adoxq %r15,%r11 + mulxq 8(%rcx),%rax,%r15 + adcxq %rax,%r11 + adoxq %r15,%r12 + mulxq 16(%rcx),%rax,%r15 + movq %r10,-40(%rbx) + adcxq %rax,%r12 + adoxq %r15,%r13 + mulxq 24(%rcx),%rax,%r15 + movq %r9,%rdx + movq %r11,-32(%rbx) + movq %r12,-24(%rbx) + adcxq %rax,%r13 + adoxq %rbp,%r15 + leaq 32(%rcx),%rcx + movq %r13,-16(%rbx) - subq 0(%rsp),%rcx - movq %r8,%r14 + decq %rdi + jnz .Lmulx4x_inner - xorq %r13,%r13 - addq %r11,%r12 - adcq $0,%r13 - mulq %r15 - addq %rax,%r12 - movq %r9,%rax - adcq %rdx,%r13 - movq %r12,-8(%rdi) - - xorq %r11,%r11 - addq (%rdi),%r10 - adcq $0,%r11 - movq 0(%rsi,%rcx,1),%rbx - addq %rbp,%r10 - adcq $0,%r11 + movq 0(%rsp),%rax + movq 8(%rsp),%rdi + adcq %rbp,%r15 + subq 0(%rbx),%rbp + adcq %r15,%r14 + movq -8(%rcx),%r8 + sbbq %r15,%r15 + movq %r14,-8(%rbx) - imulq 16(%rdi,%rcx,1),%r14 - xorq %r12,%r12 - movq 8(%rsi,%rcx,1),%r9 - addq %r10,%r13 - movq 16(%rdi,%rcx,1),%r10 - adcq $0,%r12 - mulq %r15 - addq %rax,%r13 - movq %rbx,%rax - adcq %rdx,%r12 - movq %r13,(%rdi) + cmpq 16(%rsp),%rdi + jne .Lmulx4x_outer - xorq %rbp,%rbp - addq 8(%rdi),%r12 - adcq %rbp,%rbp - addq %r11,%r12 - leaq 16(%rdi),%rdi - adcq $0,%rbp - movq %r12,-8(%rdi) - cmpq 8(%rsp),%rdi - jb .Lsqr4x_mont_outer - - movq 0(%rsp),%r9 - movq %rbp,(%rdi) - movq 64(%rsp,%r9,1),%rax - leaq 64(%rsp,%r9,1),%rbx - movq 40(%rsp),%rsi - shrq $5,%r9 - movq 8(%rbx),%rdx - xorq %rbp,%rbp + subq %r14,%r8 + sbbq %r8,%r8 + orq %r8,%r15 + negq %rax + xorq %rdx,%rdx movq 32(%rsp),%rdi - subq 0(%rsi),%rax - movq 16(%rbx),%r10 - movq 24(%rbx),%r11 - sbbq 8(%rsi),%rdx - leaq -1(%r9),%rcx - jmp .Lsqr4x_sub -.align 16 -.Lsqr4x_sub: - movq %rax,0(%rdi,%rbp,8) - movq %rdx,8(%rdi,%rbp,8) - sbbq 16(%rsi,%rbp,8),%r10 - movq 32(%rbx,%rbp,8),%rax - movq 40(%rbx,%rbp,8),%rdx - sbbq 24(%rsi,%rbp,8),%r11 - movq %r10,16(%rdi,%rbp,8) - movq %r11,24(%rdi,%rbp,8) - sbbq 32(%rsi,%rbp,8),%rax - movq 48(%rbx,%rbp,8),%r10 - movq 56(%rbx,%rbp,8),%r11 - sbbq 40(%rsi,%rbp,8),%rdx - leaq 4(%rbp),%rbp - decq %rcx - jnz .Lsqr4x_sub - - movq %rax,0(%rdi,%rbp,8) - movq 32(%rbx,%rbp,8),%rax - sbbq 16(%rsi,%rbp,8),%r10 - movq %rdx,8(%rdi,%rbp,8) - sbbq 24(%rsi,%rbp,8),%r11 - movq %r10,16(%rdi,%rbp,8) - - sbbq $0,%rax - movq %r11,24(%rdi,%rbp,8) - xorq %rbp,%rbp - andq %rax,%rbx - notq %rax - movq %rdi,%rsi - andq %rax,%rsi - leaq -1(%r9),%rcx - orq %rsi,%rbx + leaq 64(%rsp),%rbx pxor %xmm0,%xmm0 - leaq 64(%rsp,%r9,8),%rsi - movdqu (%rbx),%xmm1 - leaq (%rsi,%r9,8),%rsi - movdqa %xmm0,64(%rsp) - movdqa %xmm0,(%rsi) - movdqu %xmm1,(%rdi) - jmp .Lsqr4x_copy -.align 16 -.Lsqr4x_copy: - movdqu 16(%rbx,%rbp,1),%xmm2 - movdqu 32(%rbx,%rbp,1),%xmm1 - movdqa %xmm0,80(%rsp,%rbp,1) - movdqa %xmm0,96(%rsp,%rbp,1) - movdqa %xmm0,16(%rsi,%rbp,1) - movdqa %xmm0,32(%rsi,%rbp,1) - movdqu %xmm2,16(%rdi,%rbp,1) - movdqu %xmm1,32(%rdi,%rbp,1) - leaq 32(%rbp),%rbp - decq %rcx - jnz .Lsqr4x_copy - - movdqu 16(%rbx,%rbp,1),%xmm2 - movdqa %xmm0,80(%rsp,%rbp,1) - movdqa %xmm0,16(%rsi,%rbp,1) - movdqu %xmm2,16(%rdi,%rbp,1) - movq 56(%rsp),%rsi + movq 0(%rcx,%rax,1),%r8 + movq 8(%rcx,%rax,1),%r9 + negq %r8 + jmp .Lmulx4x_sub_entry + +.align 32 +.Lmulx4x_sub: + movq 0(%rcx,%rax,1),%r8 + movq 8(%rcx,%rax,1),%r9 + notq %r8 +.Lmulx4x_sub_entry: + movq 16(%rcx,%rax,1),%r10 + notq %r9 + andq %r15,%r8 + movq 24(%rcx,%rax,1),%r11 + notq %r10 + andq %r15,%r9 + notq %r11 + andq %r15,%r10 + andq %r15,%r11 + + negq %rdx + adcq 0(%rbx),%r8 + adcq 8(%rbx),%r9 + movdqa %xmm0,(%rbx) + adcq 16(%rbx),%r10 + adcq 24(%rbx),%r11 + movdqa %xmm0,16(%rbx) + leaq 32(%rbx),%rbx + sbbq %rdx,%rdx + + movq %r8,0(%rdi) + movq %r9,8(%rdi) + movq %r10,16(%rdi) + movq %r11,24(%rdi) + leaq 32(%rdi),%rdi + + addq $32,%rax + jnz .Lmulx4x_sub + + movq 40(%rsp),%rsi movq $1,%rax - movq 0(%rsi),%r15 - movq 8(%rsi),%r14 - movq 16(%rsi),%r13 - movq 24(%rsi),%r12 - movq 32(%rsi),%rbp - movq 40(%rsi),%rbx - leaq 48(%rsi),%rsp -.Lsqr4x_epilogue: + movq -48(%rsi),%r15 + movq -40(%rsi),%r14 + movq -32(%rsi),%r13 + movq -24(%rsi),%r12 + movq -16(%rsi),%rbp + movq -8(%rsi),%rbx + leaq (%rsi),%rsp +.Lmulx4x_epilogue: .byte 0xf3,0xc3 -.size bn_sqr4x_mont,.-bn_sqr4x_mont +.size bn_mulx4x_mont,.-bn_mulx4x_mont .byte 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105,112,108,105,99,97,116,105,111,110,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 .align 16 diff --git a/deps/openssl/asm/x64-elf-gas/bn/x86_64-mont5.s b/deps/openssl/asm/x64-elf-gas/bn/x86_64-mont5.s index 875911c0fd4f98..84dd72075d3007 100644 --- a/deps/openssl/asm/x64-elf-gas/bn/x86_64-mont5.s +++ b/deps/openssl/asm/x64-elf-gas/bn/x86_64-mont5.s @@ -1,19 +1,20 @@ .text + .globl bn_mul_mont_gather5 .type bn_mul_mont_gather5,@function .align 64 bn_mul_mont_gather5: - testl $3,%r9d + testl $7,%r9d jnz .Lmul_enter - cmpl $8,%r9d - jb .Lmul_enter + movl OPENSSL_ia32cap_P+8(%rip),%r11d jmp .Lmul4x_enter .align 16 .Lmul_enter: movl %r9d,%r9d + movq %rsp,%rax movl 8(%rsp),%r10d pushq %rbx pushq %rbp @@ -21,7 +22,6 @@ bn_mul_mont_gather5: pushq %r13 pushq %r14 pushq %r15 - movq %rsp,%rax leaq 2(%r9),%r11 negq %r11 leaq (%rsp,%r11,8),%rsp @@ -222,7 +222,7 @@ bn_mul_mont_gather5: leaq 1(%r14),%r14 cmpq %r9,%r14 - jl .Louter + jb .Louter xorq %r14,%r14 movq (%rsp),%rax @@ -256,477 +256,2649 @@ bn_mul_mont_gather5: movq 8(%rsp,%r9,8),%rsi movq $1,%rax - movq (%rsi),%r15 - movq 8(%rsi),%r14 - movq 16(%rsi),%r13 - movq 24(%rsi),%r12 - movq 32(%rsi),%rbp - movq 40(%rsi),%rbx - leaq 48(%rsi),%rsp + movq -48(%rsi),%r15 + movq -40(%rsi),%r14 + movq -32(%rsi),%r13 + movq -24(%rsi),%r12 + movq -16(%rsi),%rbp + movq -8(%rsi),%rbx + leaq (%rsi),%rsp .Lmul_epilogue: .byte 0xf3,0xc3 .size bn_mul_mont_gather5,.-bn_mul_mont_gather5 .type bn_mul4x_mont_gather5,@function -.align 16 +.align 32 bn_mul4x_mont_gather5: .Lmul4x_enter: - movl %r9d,%r9d - movl 8(%rsp),%r10d + andl $524544,%r11d + cmpl $524544,%r11d + je .Lmulx4x_enter +.byte 0x67 + movq %rsp,%rax pushq %rbx pushq %rbp pushq %r12 pushq %r13 pushq %r14 pushq %r15 - movq %rsp,%rax - leaq 4(%r9),%r11 - negq %r11 - leaq (%rsp,%r11,8),%rsp - andq $-1024,%rsp +.byte 0x67 + movl %r9d,%r10d + shll $3,%r9d + shll $3+2,%r10d + negq %r9 - movq %rax,8(%rsp,%r9,8) + + + + + + + + leaq -64(%rsp,%r9,2),%r11 + subq %rsi,%r11 + andq $4095,%r11 + cmpq %r11,%r10 + jb .Lmul4xsp_alt + subq %r11,%rsp + leaq -64(%rsp,%r9,2),%rsp + jmp .Lmul4xsp_done + +.align 32 +.Lmul4xsp_alt: + leaq 4096-64(,%r9,2),%r10 + leaq -64(%rsp,%r9,2),%rsp + subq %r10,%r11 + movq $0,%r10 + cmovcq %r10,%r11 + subq %r11,%rsp +.Lmul4xsp_done: + andq $-64,%rsp + negq %r9 + + movq %rax,40(%rsp) .Lmul4x_body: - movq %rdi,16(%rsp,%r9,8) - movq %rdx,%r12 + + call mul4x_internal + + movq 40(%rsp),%rsi + movq $1,%rax + movq -48(%rsi),%r15 + movq -40(%rsi),%r14 + movq -32(%rsi),%r13 + movq -24(%rsi),%r12 + movq -16(%rsi),%rbp + movq -8(%rsi),%rbx + leaq (%rsi),%rsp +.Lmul4x_epilogue: + .byte 0xf3,0xc3 +.size bn_mul4x_mont_gather5,.-bn_mul4x_mont_gather5 + +.type mul4x_internal,@function +.align 32 +mul4x_internal: + shlq $5,%r9 + movl 8(%rax),%r10d + leaq 256(%rdx,%r9,1),%r13 + shrq $5,%r9 movq %r10,%r11 shrq $3,%r10 andq $7,%r11 notq %r10 leaq .Lmagic_masks(%rip),%rax andq $3,%r10 - leaq 96(%r12,%r11,8),%r12 + leaq 96(%rdx,%r11,8),%r12 movq 0(%rax,%r10,8),%xmm4 movq 8(%rax,%r10,8),%xmm5 + addq $7,%r11 movq 16(%rax,%r10,8),%xmm6 movq 24(%rax,%r10,8),%xmm7 + andq $7,%r11 movq -96(%r12),%xmm0 + leaq 256(%r12),%r14 movq -32(%r12),%xmm1 pand %xmm4,%xmm0 movq 32(%r12),%xmm2 pand %xmm5,%xmm1 movq 96(%r12),%xmm3 pand %xmm6,%xmm2 +.byte 0x67 por %xmm1,%xmm0 + movq -96(%r14),%xmm1 +.byte 0x67 pand %xmm7,%xmm3 +.byte 0x67 por %xmm2,%xmm0 - leaq 256(%r12),%r12 + movq -32(%r14),%xmm2 +.byte 0x67 + pand %xmm4,%xmm1 +.byte 0x67 por %xmm3,%xmm0 + movq 32(%r14),%xmm3 .byte 102,72,15,126,195 + movq 96(%r14),%xmm0 + movq %r13,16+8(%rsp) + movq %rdi,56+8(%rsp) + movq (%r8),%r8 movq (%rsi),%rax - - xorq %r14,%r14 - xorq %r15,%r15 - - movq -96(%r12),%xmm0 - movq -32(%r12),%xmm1 - pand %xmm4,%xmm0 - movq 32(%r12),%xmm2 - pand %xmm5,%xmm1 + leaq (%rsi,%r9,1),%rsi + negq %r9 movq %r8,%rbp mulq %rbx movq %rax,%r10 movq (%rcx),%rax - movq 96(%r12),%xmm3 - pand %xmm6,%xmm2 - por %xmm1,%xmm0 - pand %xmm7,%xmm3 + pand %xmm5,%xmm2 + pand %xmm6,%xmm3 + por %xmm2,%xmm1 imulq %r10,%rbp + + + + + + + + leaq 64+8(%rsp,%r11,8),%r14 movq %rdx,%r11 - por %xmm2,%xmm0 - leaq 256(%r12),%r12 - por %xmm3,%xmm0 + pand %xmm7,%xmm0 + por %xmm3,%xmm1 + leaq 512(%r12),%r12 + por %xmm1,%xmm0 mulq %rbp addq %rax,%r10 - movq 8(%rsi),%rax + movq 8(%rsi,%r9,1),%rax adcq $0,%rdx movq %rdx,%rdi mulq %rbx addq %rax,%r11 - movq 8(%rcx),%rax + movq 16(%rcx),%rax adcq $0,%rdx movq %rdx,%r10 mulq %rbp addq %rax,%rdi - movq 16(%rsi),%rax + movq 16(%rsi,%r9,1),%rax adcq $0,%rdx addq %r11,%rdi - leaq 4(%r15),%r15 + leaq 32(%r9),%r15 + leaq 64(%rcx),%rcx adcq $0,%rdx - movq %rdi,(%rsp) + movq %rdi,(%r14) movq %rdx,%r13 jmp .L1st4x -.align 16 + +.align 32 .L1st4x: mulq %rbx addq %rax,%r10 - movq -16(%rcx,%r15,8),%rax + movq -32(%rcx),%rax + leaq 32(%r14),%r14 adcq $0,%rdx movq %rdx,%r11 mulq %rbp addq %rax,%r13 - movq -8(%rsi,%r15,8),%rax + movq -8(%rsi,%r15,1),%rax adcq $0,%rdx addq %r10,%r13 adcq $0,%rdx - movq %r13,-24(%rsp,%r15,8) + movq %r13,-24(%r14) movq %rdx,%rdi mulq %rbx addq %rax,%r11 - movq -8(%rcx,%r15,8),%rax + movq -16(%rcx),%rax adcq $0,%rdx movq %rdx,%r10 mulq %rbp addq %rax,%rdi - movq (%rsi,%r15,8),%rax + movq (%rsi,%r15,1),%rax adcq $0,%rdx addq %r11,%rdi adcq $0,%rdx - movq %rdi,-16(%rsp,%r15,8) + movq %rdi,-16(%r14) movq %rdx,%r13 mulq %rbx addq %rax,%r10 - movq (%rcx,%r15,8),%rax + movq 0(%rcx),%rax adcq $0,%rdx movq %rdx,%r11 mulq %rbp addq %rax,%r13 - movq 8(%rsi,%r15,8),%rax + movq 8(%rsi,%r15,1),%rax adcq $0,%rdx addq %r10,%r13 adcq $0,%rdx - movq %r13,-8(%rsp,%r15,8) + movq %r13,-8(%r14) movq %rdx,%rdi mulq %rbx addq %rax,%r11 - movq 8(%rcx,%r15,8),%rax + movq 16(%rcx),%rax adcq $0,%rdx - leaq 4(%r15),%r15 movq %rdx,%r10 mulq %rbp addq %rax,%rdi - movq -16(%rsi,%r15,8),%rax + movq 16(%rsi,%r15,1),%rax adcq $0,%rdx addq %r11,%rdi + leaq 64(%rcx),%rcx adcq $0,%rdx - movq %rdi,-32(%rsp,%r15,8) + movq %rdi,(%r14) movq %rdx,%r13 - cmpq %r9,%r15 - jl .L1st4x + + addq $32,%r15 + jnz .L1st4x mulq %rbx addq %rax,%r10 - movq -16(%rcx,%r15,8),%rax + movq -32(%rcx),%rax + leaq 32(%r14),%r14 adcq $0,%rdx movq %rdx,%r11 mulq %rbp addq %rax,%r13 - movq -8(%rsi,%r15,8),%rax + movq -8(%rsi),%rax adcq $0,%rdx addq %r10,%r13 adcq $0,%rdx - movq %r13,-24(%rsp,%r15,8) + movq %r13,-24(%r14) movq %rdx,%rdi mulq %rbx addq %rax,%r11 - movq -8(%rcx,%r15,8),%rax + movq -16(%rcx),%rax adcq $0,%rdx movq %rdx,%r10 mulq %rbp addq %rax,%rdi - movq (%rsi),%rax + movq (%rsi,%r9,1),%rax adcq $0,%rdx addq %r11,%rdi adcq $0,%rdx - movq %rdi,-16(%rsp,%r15,8) + movq %rdi,-16(%r14) movq %rdx,%r13 .byte 102,72,15,126,195 + leaq (%rcx,%r9,2),%rcx xorq %rdi,%rdi addq %r10,%r13 adcq $0,%rdi - movq %r13,-8(%rsp,%r15,8) - movq %rdi,(%rsp,%r15,8) + movq %r13,-8(%r14) - leaq 1(%r14),%r14 -.align 4 -.Louter4x: - xorq %r15,%r15 - movq -96(%r12),%xmm0 - movq -32(%r12),%xmm1 - pand %xmm4,%xmm0 - movq 32(%r12),%xmm2 - pand %xmm5,%xmm1 + jmp .Louter4x - movq (%rsp),%r10 +.align 32 +.Louter4x: + movq (%r14,%r9,1),%r10 movq %r8,%rbp mulq %rbx addq %rax,%r10 movq (%rcx),%rax adcq $0,%rdx + movq -96(%r12),%xmm0 + movq -32(%r12),%xmm1 + pand %xmm4,%xmm0 + movq 32(%r12),%xmm2 + pand %xmm5,%xmm1 movq 96(%r12),%xmm3 - pand %xmm6,%xmm2 - por %xmm1,%xmm0 - pand %xmm7,%xmm3 imulq %r10,%rbp +.byte 0x67 movq %rdx,%r11 + movq %rdi,(%r14) + pand %xmm6,%xmm2 + por %xmm1,%xmm0 + pand %xmm7,%xmm3 por %xmm2,%xmm0 + leaq (%r14,%r9,1),%r14 leaq 256(%r12),%r12 por %xmm3,%xmm0 mulq %rbp addq %rax,%r10 - movq 8(%rsi),%rax + movq 8(%rsi,%r9,1),%rax adcq $0,%rdx movq %rdx,%rdi mulq %rbx addq %rax,%r11 - movq 8(%rcx),%rax + movq 16(%rcx),%rax adcq $0,%rdx - addq 8(%rsp),%r11 + addq 8(%r14),%r11 adcq $0,%rdx movq %rdx,%r10 mulq %rbp addq %rax,%rdi - movq 16(%rsi),%rax + movq 16(%rsi,%r9,1),%rax adcq $0,%rdx addq %r11,%rdi - leaq 4(%r15),%r15 + leaq 32(%r9),%r15 + leaq 64(%rcx),%rcx adcq $0,%rdx movq %rdx,%r13 jmp .Linner4x -.align 16 + +.align 32 .Linner4x: mulq %rbx addq %rax,%r10 - movq -16(%rcx,%r15,8),%rax + movq -32(%rcx),%rax adcq $0,%rdx - addq -16(%rsp,%r15,8),%r10 + addq 16(%r14),%r10 + leaq 32(%r14),%r14 adcq $0,%rdx movq %rdx,%r11 mulq %rbp addq %rax,%r13 - movq -8(%rsi,%r15,8),%rax + movq -8(%rsi,%r15,1),%rax adcq $0,%rdx addq %r10,%r13 adcq $0,%rdx - movq %rdi,-32(%rsp,%r15,8) + movq %rdi,-32(%r14) movq %rdx,%rdi mulq %rbx addq %rax,%r11 - movq -8(%rcx,%r15,8),%rax + movq -16(%rcx),%rax adcq $0,%rdx - addq -8(%rsp,%r15,8),%r11 + addq -8(%r14),%r11 adcq $0,%rdx movq %rdx,%r10 mulq %rbp addq %rax,%rdi - movq (%rsi,%r15,8),%rax + movq (%rsi,%r15,1),%rax adcq $0,%rdx addq %r11,%rdi adcq $0,%rdx - movq %r13,-24(%rsp,%r15,8) + movq %r13,-24(%r14) movq %rdx,%r13 mulq %rbx addq %rax,%r10 - movq (%rcx,%r15,8),%rax + movq 0(%rcx),%rax adcq $0,%rdx - addq (%rsp,%r15,8),%r10 + addq (%r14),%r10 adcq $0,%rdx movq %rdx,%r11 mulq %rbp addq %rax,%r13 - movq 8(%rsi,%r15,8),%rax + movq 8(%rsi,%r15,1),%rax adcq $0,%rdx addq %r10,%r13 adcq $0,%rdx - movq %rdi,-16(%rsp,%r15,8) + movq %rdi,-16(%r14) movq %rdx,%rdi mulq %rbx addq %rax,%r11 - movq 8(%rcx,%r15,8),%rax + movq 16(%rcx),%rax adcq $0,%rdx - addq 8(%rsp,%r15,8),%r11 + addq 8(%r14),%r11 adcq $0,%rdx - leaq 4(%r15),%r15 movq %rdx,%r10 mulq %rbp addq %rax,%rdi - movq -16(%rsi,%r15,8),%rax + movq 16(%rsi,%r15,1),%rax adcq $0,%rdx addq %r11,%rdi + leaq 64(%rcx),%rcx adcq $0,%rdx - movq %r13,-40(%rsp,%r15,8) + movq %r13,-8(%r14) movq %rdx,%r13 - cmpq %r9,%r15 - jl .Linner4x + + addq $32,%r15 + jnz .Linner4x mulq %rbx addq %rax,%r10 - movq -16(%rcx,%r15,8),%rax + movq -32(%rcx),%rax adcq $0,%rdx - addq -16(%rsp,%r15,8),%r10 + addq 16(%r14),%r10 + leaq 32(%r14),%r14 adcq $0,%rdx movq %rdx,%r11 mulq %rbp addq %rax,%r13 - movq -8(%rsi,%r15,8),%rax + movq -8(%rsi),%rax adcq $0,%rdx addq %r10,%r13 adcq $0,%rdx - movq %rdi,-32(%rsp,%r15,8) + movq %rdi,-32(%r14) movq %rdx,%rdi mulq %rbx addq %rax,%r11 - movq -8(%rcx,%r15,8),%rax + movq %rbp,%rax + movq -16(%rcx),%rbp adcq $0,%rdx - addq -8(%rsp,%r15,8),%r11 + addq -8(%r14),%r11 adcq $0,%rdx - leaq 1(%r14),%r14 movq %rdx,%r10 mulq %rbp addq %rax,%rdi - movq (%rsi),%rax + movq (%rsi,%r9,1),%rax adcq $0,%rdx addq %r11,%rdi adcq $0,%rdx - movq %r13,-24(%rsp,%r15,8) + movq %r13,-24(%r14) movq %rdx,%r13 .byte 102,72,15,126,195 - movq %rdi,-16(%rsp,%r15,8) + movq %rdi,-16(%r14) + leaq (%rcx,%r9,2),%rcx xorq %rdi,%rdi addq %r10,%r13 adcq $0,%rdi - addq (%rsp,%r9,8),%r13 + addq (%r14),%r13 adcq $0,%rdi - movq %r13,-8(%rsp,%r15,8) - movq %rdi,(%rsp,%r15,8) + movq %r13,-8(%r14) + + cmpq 16+8(%rsp),%r12 + jb .Louter4x + subq %r13,%rbp + adcq %r15,%r15 + orq %r15,%rdi + xorq $1,%rdi + leaq (%r14,%r9,1),%rbx + leaq (%rcx,%rdi,8),%rbp + movq %r9,%rcx + sarq $3+2,%rcx + movq 56+8(%rsp),%rdi + jmp .Lsqr4x_sub +.size mul4x_internal,.-mul4x_internal +.globl bn_power5 +.type bn_power5,@function +.align 32 +bn_power5: + movl OPENSSL_ia32cap_P+8(%rip),%r11d + andl $524544,%r11d + cmpl $524544,%r11d + je .Lpowerx5_enter + movq %rsp,%rax + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + movl %r9d,%r10d + shll $3,%r9d + shll $3+2,%r10d + negq %r9 + movq (%r8),%r8 - cmpq %r9,%r14 - jl .Louter4x - movq 16(%rsp,%r9,8),%rdi - movq 0(%rsp),%rax - pxor %xmm0,%xmm0 - movq 8(%rsp),%rdx - shrq $2,%r9 - leaq (%rsp),%rsi - xorq %r14,%r14 - subq 0(%rcx),%rax - movq 16(%rsi),%rbx - movq 24(%rsi),%rbp - sbbq 8(%rcx),%rdx - leaq -1(%r9),%r15 - jmp .Lsub4x -.align 16 -.Lsub4x: - movq %rax,0(%rdi,%r14,8) - movq %rdx,8(%rdi,%r14,8) - sbbq 16(%rcx,%r14,8),%rbx - movq 32(%rsi,%r14,8),%rax - movq 40(%rsi,%r14,8),%rdx - sbbq 24(%rcx,%r14,8),%rbp - movq %rbx,16(%rdi,%r14,8) - movq %rbp,24(%rdi,%r14,8) - sbbq 32(%rcx,%r14,8),%rax - movq 48(%rsi,%r14,8),%rbx - movq 56(%rsi,%r14,8),%rbp - sbbq 40(%rcx,%r14,8),%rdx - leaq 4(%r14),%r14 - decq %r15 - jnz .Lsub4x - movq %rax,0(%rdi,%r14,8) - movq 32(%rsi,%r14,8),%rax - sbbq 16(%rcx,%r14,8),%rbx - movq %rdx,8(%rdi,%r14,8) - sbbq 24(%rcx,%r14,8),%rbp - movq %rbx,16(%rdi,%r14,8) - sbbq $0,%rax - movq %rbp,24(%rdi,%r14,8) - xorq %r14,%r14 - andq %rax,%rsi - notq %rax - movq %rdi,%rcx - andq %rax,%rcx - leaq -1(%r9),%r15 - orq %rcx,%rsi - movdqu (%rsi),%xmm1 - movdqa %xmm0,(%rsp) - movdqu %xmm1,(%rdi) - jmp .Lcopy4x -.align 16 -.Lcopy4x: - movdqu 16(%rsi,%r14,1),%xmm2 - movdqu 32(%rsi,%r14,1),%xmm1 - movdqa %xmm0,16(%rsp,%r14,1) - movdqu %xmm2,16(%rdi,%r14,1) - movdqa %xmm0,32(%rsp,%r14,1) - movdqu %xmm1,32(%rdi,%r14,1) - leaq 32(%r14),%r14 - decq %r15 - jnz .Lcopy4x - shlq $2,%r9 - movdqu 16(%rsi,%r14,1),%xmm2 - movdqa %xmm0,16(%rsp,%r14,1) - movdqu %xmm2,16(%rdi,%r14,1) - movq 8(%rsp,%r9,8),%rsi + + leaq -64(%rsp,%r9,2),%r11 + subq %rsi,%r11 + andq $4095,%r11 + cmpq %r11,%r10 + jb .Lpwr_sp_alt + subq %r11,%rsp + leaq -64(%rsp,%r9,2),%rsp + jmp .Lpwr_sp_done + +.align 32 +.Lpwr_sp_alt: + leaq 4096-64(,%r9,2),%r10 + leaq -64(%rsp,%r9,2),%rsp + subq %r10,%r11 + movq $0,%r10 + cmovcq %r10,%r11 + subq %r11,%rsp +.Lpwr_sp_done: + andq $-64,%rsp + movq %r9,%r10 + negq %r9 + + + + + + + + + + + movq %r8,32(%rsp) + movq %rax,40(%rsp) +.Lpower5_body: +.byte 102,72,15,110,207 +.byte 102,72,15,110,209 +.byte 102,73,15,110,218 +.byte 102,72,15,110,226 + + call __bn_sqr8x_internal + call __bn_sqr8x_internal + call __bn_sqr8x_internal + call __bn_sqr8x_internal + call __bn_sqr8x_internal + +.byte 102,72,15,126,209 +.byte 102,72,15,126,226 + movq %rsi,%rdi + movq 40(%rsp),%rax + leaq 32(%rsp),%r8 + + call mul4x_internal + + movq 40(%rsp),%rsi movq $1,%rax - movq (%rsi),%r15 - movq 8(%rsi),%r14 - movq 16(%rsi),%r13 - movq 24(%rsi),%r12 - movq 32(%rsi),%rbp - movq 40(%rsi),%rbx - leaq 48(%rsi),%rsp -.Lmul4x_epilogue: + movq -48(%rsi),%r15 + movq -40(%rsi),%r14 + movq -32(%rsi),%r13 + movq -24(%rsi),%r12 + movq -16(%rsi),%rbp + movq -8(%rsi),%rbx + leaq (%rsi),%rsp +.Lpower5_epilogue: .byte 0xf3,0xc3 -.size bn_mul4x_mont_gather5,.-bn_mul4x_mont_gather5 +.size bn_power5,.-bn_power5 + +.globl bn_sqr8x_internal +.hidden bn_sqr8x_internal +.type bn_sqr8x_internal,@function +.align 32 +bn_sqr8x_internal: +__bn_sqr8x_internal: + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + leaq 32(%r10),%rbp + leaq (%rsi,%r9,1),%rsi + + movq %r9,%rcx + + + movq -32(%rsi,%rbp,1),%r14 + leaq 48+8(%rsp,%r9,2),%rdi + movq -24(%rsi,%rbp,1),%rax + leaq -32(%rdi,%rbp,1),%rdi + movq -16(%rsi,%rbp,1),%rbx + movq %rax,%r15 + + mulq %r14 + movq %rax,%r10 + movq %rbx,%rax + movq %rdx,%r11 + movq %r10,-24(%rdi,%rbp,1) + + mulq %r14 + addq %rax,%r11 + movq %rbx,%rax + adcq $0,%rdx + movq %r11,-16(%rdi,%rbp,1) + movq %rdx,%r10 + + + movq -8(%rsi,%rbp,1),%rbx + mulq %r15 + movq %rax,%r12 + movq %rbx,%rax + movq %rdx,%r13 + + leaq (%rbp),%rcx + mulq %r14 + addq %rax,%r10 + movq %rbx,%rax + movq %rdx,%r11 + adcq $0,%r11 + addq %r12,%r10 + adcq $0,%r11 + movq %r10,-8(%rdi,%rcx,1) + jmp .Lsqr4x_1st + +.align 32 +.Lsqr4x_1st: + movq (%rsi,%rcx,1),%rbx + mulq %r15 + addq %rax,%r13 + movq %rbx,%rax + movq %rdx,%r12 + adcq $0,%r12 + + mulq %r14 + addq %rax,%r11 + movq %rbx,%rax + movq 8(%rsi,%rcx,1),%rbx + movq %rdx,%r10 + adcq $0,%r10 + addq %r13,%r11 + adcq $0,%r10 + + + mulq %r15 + addq %rax,%r12 + movq %rbx,%rax + movq %r11,(%rdi,%rcx,1) + movq %rdx,%r13 + adcq $0,%r13 + + mulq %r14 + addq %rax,%r10 + movq %rbx,%rax + movq 16(%rsi,%rcx,1),%rbx + movq %rdx,%r11 + adcq $0,%r11 + addq %r12,%r10 + adcq $0,%r11 + + mulq %r15 + addq %rax,%r13 + movq %rbx,%rax + movq %r10,8(%rdi,%rcx,1) + movq %rdx,%r12 + adcq $0,%r12 + + mulq %r14 + addq %rax,%r11 + movq %rbx,%rax + movq 24(%rsi,%rcx,1),%rbx + movq %rdx,%r10 + adcq $0,%r10 + addq %r13,%r11 + adcq $0,%r10 + + + mulq %r15 + addq %rax,%r12 + movq %rbx,%rax + movq %r11,16(%rdi,%rcx,1) + movq %rdx,%r13 + adcq $0,%r13 + leaq 32(%rcx),%rcx + + mulq %r14 + addq %rax,%r10 + movq %rbx,%rax + movq %rdx,%r11 + adcq $0,%r11 + addq %r12,%r10 + adcq $0,%r11 + movq %r10,-8(%rdi,%rcx,1) + + cmpq $0,%rcx + jne .Lsqr4x_1st + + mulq %r15 + addq %rax,%r13 + leaq 16(%rbp),%rbp + adcq $0,%rdx + addq %r11,%r13 + adcq $0,%rdx + + movq %r13,(%rdi) + movq %rdx,%r12 + movq %rdx,8(%rdi) + jmp .Lsqr4x_outer + +.align 32 +.Lsqr4x_outer: + movq -32(%rsi,%rbp,1),%r14 + leaq 48+8(%rsp,%r9,2),%rdi + movq -24(%rsi,%rbp,1),%rax + leaq -32(%rdi,%rbp,1),%rdi + movq -16(%rsi,%rbp,1),%rbx + movq %rax,%r15 + + mulq %r14 + movq -24(%rdi,%rbp,1),%r10 + addq %rax,%r10 + movq %rbx,%rax + adcq $0,%rdx + movq %r10,-24(%rdi,%rbp,1) + movq %rdx,%r11 + + mulq %r14 + addq %rax,%r11 + movq %rbx,%rax + adcq $0,%rdx + addq -16(%rdi,%rbp,1),%r11 + movq %rdx,%r10 + adcq $0,%r10 + movq %r11,-16(%rdi,%rbp,1) + + xorq %r12,%r12 + + movq -8(%rsi,%rbp,1),%rbx + mulq %r15 + addq %rax,%r12 + movq %rbx,%rax + adcq $0,%rdx + addq -8(%rdi,%rbp,1),%r12 + movq %rdx,%r13 + adcq $0,%r13 + + mulq %r14 + addq %rax,%r10 + movq %rbx,%rax + adcq $0,%rdx + addq %r12,%r10 + movq %rdx,%r11 + adcq $0,%r11 + movq %r10,-8(%rdi,%rbp,1) + + leaq (%rbp),%rcx + jmp .Lsqr4x_inner + +.align 32 +.Lsqr4x_inner: + movq (%rsi,%rcx,1),%rbx + mulq %r15 + addq %rax,%r13 + movq %rbx,%rax + movq %rdx,%r12 + adcq $0,%r12 + addq (%rdi,%rcx,1),%r13 + adcq $0,%r12 + +.byte 0x67 + mulq %r14 + addq %rax,%r11 + movq %rbx,%rax + movq 8(%rsi,%rcx,1),%rbx + movq %rdx,%r10 + adcq $0,%r10 + addq %r13,%r11 + adcq $0,%r10 + + mulq %r15 + addq %rax,%r12 + movq %r11,(%rdi,%rcx,1) + movq %rbx,%rax + movq %rdx,%r13 + adcq $0,%r13 + addq 8(%rdi,%rcx,1),%r12 + leaq 16(%rcx),%rcx + adcq $0,%r13 + + mulq %r14 + addq %rax,%r10 + movq %rbx,%rax + adcq $0,%rdx + addq %r12,%r10 + movq %rdx,%r11 + adcq $0,%r11 + movq %r10,-8(%rdi,%rcx,1) + + cmpq $0,%rcx + jne .Lsqr4x_inner + +.byte 0x67 + mulq %r15 + addq %rax,%r13 + adcq $0,%rdx + addq %r11,%r13 + adcq $0,%rdx + + movq %r13,(%rdi) + movq %rdx,%r12 + movq %rdx,8(%rdi) + + addq $16,%rbp + jnz .Lsqr4x_outer + + + movq -32(%rsi),%r14 + leaq 48+8(%rsp,%r9,2),%rdi + movq -24(%rsi),%rax + leaq -32(%rdi,%rbp,1),%rdi + movq -16(%rsi),%rbx + movq %rax,%r15 + + mulq %r14 + addq %rax,%r10 + movq %rbx,%rax + movq %rdx,%r11 + adcq $0,%r11 + + mulq %r14 + addq %rax,%r11 + movq %rbx,%rax + movq %r10,-24(%rdi) + movq %rdx,%r10 + adcq $0,%r10 + addq %r13,%r11 + movq -8(%rsi),%rbx + adcq $0,%r10 + + mulq %r15 + addq %rax,%r12 + movq %rbx,%rax + movq %r11,-16(%rdi) + movq %rdx,%r13 + adcq $0,%r13 + + mulq %r14 + addq %rax,%r10 + movq %rbx,%rax + movq %rdx,%r11 + adcq $0,%r11 + addq %r12,%r10 + adcq $0,%r11 + movq %r10,-8(%rdi) + + mulq %r15 + addq %rax,%r13 + movq -16(%rsi),%rax + adcq $0,%rdx + addq %r11,%r13 + adcq $0,%rdx + + movq %r13,(%rdi) + movq %rdx,%r12 + movq %rdx,8(%rdi) + + mulq %rbx + addq $16,%rbp + xorq %r14,%r14 + subq %r9,%rbp + xorq %r15,%r15 + + addq %r12,%rax + adcq $0,%rdx + movq %rax,8(%rdi) + movq %rdx,16(%rdi) + movq %r15,24(%rdi) + + movq -16(%rsi,%rbp,1),%rax + leaq 48+8(%rsp),%rdi + xorq %r10,%r10 + movq 8(%rdi),%r11 + + leaq (%r14,%r10,2),%r12 + shrq $63,%r10 + leaq (%rcx,%r11,2),%r13 + shrq $63,%r11 + orq %r10,%r13 + movq 16(%rdi),%r10 + movq %r11,%r14 + mulq %rax + negq %r15 + movq 24(%rdi),%r11 + adcq %rax,%r12 + movq -8(%rsi,%rbp,1),%rax + movq %r12,(%rdi) + adcq %rdx,%r13 + + leaq (%r14,%r10,2),%rbx + movq %r13,8(%rdi) + sbbq %r15,%r15 + shrq $63,%r10 + leaq (%rcx,%r11,2),%r8 + shrq $63,%r11 + orq %r10,%r8 + movq 32(%rdi),%r10 + movq %r11,%r14 + mulq %rax + negq %r15 + movq 40(%rdi),%r11 + adcq %rax,%rbx + movq 0(%rsi,%rbp,1),%rax + movq %rbx,16(%rdi) + adcq %rdx,%r8 + leaq 16(%rbp),%rbp + movq %r8,24(%rdi) + sbbq %r15,%r15 + leaq 64(%rdi),%rdi + jmp .Lsqr4x_shift_n_add + +.align 32 +.Lsqr4x_shift_n_add: + leaq (%r14,%r10,2),%r12 + shrq $63,%r10 + leaq (%rcx,%r11,2),%r13 + shrq $63,%r11 + orq %r10,%r13 + movq -16(%rdi),%r10 + movq %r11,%r14 + mulq %rax + negq %r15 + movq -8(%rdi),%r11 + adcq %rax,%r12 + movq -8(%rsi,%rbp,1),%rax + movq %r12,-32(%rdi) + adcq %rdx,%r13 + + leaq (%r14,%r10,2),%rbx + movq %r13,-24(%rdi) + sbbq %r15,%r15 + shrq $63,%r10 + leaq (%rcx,%r11,2),%r8 + shrq $63,%r11 + orq %r10,%r8 + movq 0(%rdi),%r10 + movq %r11,%r14 + mulq %rax + negq %r15 + movq 8(%rdi),%r11 + adcq %rax,%rbx + movq 0(%rsi,%rbp,1),%rax + movq %rbx,-16(%rdi) + adcq %rdx,%r8 + + leaq (%r14,%r10,2),%r12 + movq %r8,-8(%rdi) + sbbq %r15,%r15 + shrq $63,%r10 + leaq (%rcx,%r11,2),%r13 + shrq $63,%r11 + orq %r10,%r13 + movq 16(%rdi),%r10 + movq %r11,%r14 + mulq %rax + negq %r15 + movq 24(%rdi),%r11 + adcq %rax,%r12 + movq 8(%rsi,%rbp,1),%rax + movq %r12,0(%rdi) + adcq %rdx,%r13 + + leaq (%r14,%r10,2),%rbx + movq %r13,8(%rdi) + sbbq %r15,%r15 + shrq $63,%r10 + leaq (%rcx,%r11,2),%r8 + shrq $63,%r11 + orq %r10,%r8 + movq 32(%rdi),%r10 + movq %r11,%r14 + mulq %rax + negq %r15 + movq 40(%rdi),%r11 + adcq %rax,%rbx + movq 16(%rsi,%rbp,1),%rax + movq %rbx,16(%rdi) + adcq %rdx,%r8 + movq %r8,24(%rdi) + sbbq %r15,%r15 + leaq 64(%rdi),%rdi + addq $32,%rbp + jnz .Lsqr4x_shift_n_add + + leaq (%r14,%r10,2),%r12 +.byte 0x67 + shrq $63,%r10 + leaq (%rcx,%r11,2),%r13 + shrq $63,%r11 + orq %r10,%r13 + movq -16(%rdi),%r10 + movq %r11,%r14 + mulq %rax + negq %r15 + movq -8(%rdi),%r11 + adcq %rax,%r12 + movq -8(%rsi),%rax + movq %r12,-32(%rdi) + adcq %rdx,%r13 + + leaq (%r14,%r10,2),%rbx + movq %r13,-24(%rdi) + sbbq %r15,%r15 + shrq $63,%r10 + leaq (%rcx,%r11,2),%r8 + shrq $63,%r11 + orq %r10,%r8 + mulq %rax + negq %r15 + adcq %rax,%rbx + adcq %rdx,%r8 + movq %rbx,-16(%rdi) + movq %r8,-8(%rdi) +.byte 102,72,15,126,213 +sqr8x_reduction: + xorq %rax,%rax + leaq (%rbp,%r9,2),%rcx + leaq 48+8(%rsp,%r9,2),%rdx + movq %rcx,0+8(%rsp) + leaq 48+8(%rsp,%r9,1),%rdi + movq %rdx,8+8(%rsp) + negq %r9 + jmp .L8x_reduction_loop + +.align 32 +.L8x_reduction_loop: + leaq (%rdi,%r9,1),%rdi +.byte 0x66 + movq 0(%rdi),%rbx + movq 8(%rdi),%r9 + movq 16(%rdi),%r10 + movq 24(%rdi),%r11 + movq 32(%rdi),%r12 + movq 40(%rdi),%r13 + movq 48(%rdi),%r14 + movq 56(%rdi),%r15 + movq %rax,(%rdx) + leaq 64(%rdi),%rdi + +.byte 0x67 + movq %rbx,%r8 + imulq 32+8(%rsp),%rbx + movq 0(%rbp),%rax + movl $8,%ecx + jmp .L8x_reduce + +.align 32 +.L8x_reduce: + mulq %rbx + movq 16(%rbp),%rax + negq %r8 + movq %rdx,%r8 + adcq $0,%r8 + + mulq %rbx + addq %rax,%r9 + movq 32(%rbp),%rax + adcq $0,%rdx + addq %r9,%r8 + movq %rbx,48-8+8(%rsp,%rcx,8) + movq %rdx,%r9 + adcq $0,%r9 + + mulq %rbx + addq %rax,%r10 + movq 48(%rbp),%rax + adcq $0,%rdx + addq %r10,%r9 + movq 32+8(%rsp),%rsi + movq %rdx,%r10 + adcq $0,%r10 + + mulq %rbx + addq %rax,%r11 + movq 64(%rbp),%rax + adcq $0,%rdx + imulq %r8,%rsi + addq %r11,%r10 + movq %rdx,%r11 + adcq $0,%r11 + + mulq %rbx + addq %rax,%r12 + movq 80(%rbp),%rax + adcq $0,%rdx + addq %r12,%r11 + movq %rdx,%r12 + adcq $0,%r12 + + mulq %rbx + addq %rax,%r13 + movq 96(%rbp),%rax + adcq $0,%rdx + addq %r13,%r12 + movq %rdx,%r13 + adcq $0,%r13 + + mulq %rbx + addq %rax,%r14 + movq 112(%rbp),%rax + adcq $0,%rdx + addq %r14,%r13 + movq %rdx,%r14 + adcq $0,%r14 + + mulq %rbx + movq %rsi,%rbx + addq %rax,%r15 + movq 0(%rbp),%rax + adcq $0,%rdx + addq %r15,%r14 + movq %rdx,%r15 + adcq $0,%r15 + + decl %ecx + jnz .L8x_reduce + + leaq 128(%rbp),%rbp + xorq %rax,%rax + movq 8+8(%rsp),%rdx + cmpq 0+8(%rsp),%rbp + jae .L8x_no_tail + +.byte 0x66 + addq 0(%rdi),%r8 + adcq 8(%rdi),%r9 + adcq 16(%rdi),%r10 + adcq 24(%rdi),%r11 + adcq 32(%rdi),%r12 + adcq 40(%rdi),%r13 + adcq 48(%rdi),%r14 + adcq 56(%rdi),%r15 + sbbq %rsi,%rsi + + movq 48+56+8(%rsp),%rbx + movl $8,%ecx + movq 0(%rbp),%rax + jmp .L8x_tail + +.align 32 +.L8x_tail: + mulq %rbx + addq %rax,%r8 + movq 16(%rbp),%rax + movq %r8,(%rdi) + movq %rdx,%r8 + adcq $0,%r8 + + mulq %rbx + addq %rax,%r9 + movq 32(%rbp),%rax + adcq $0,%rdx + addq %r9,%r8 + leaq 8(%rdi),%rdi + movq %rdx,%r9 + adcq $0,%r9 + + mulq %rbx + addq %rax,%r10 + movq 48(%rbp),%rax + adcq $0,%rdx + addq %r10,%r9 + movq %rdx,%r10 + adcq $0,%r10 + + mulq %rbx + addq %rax,%r11 + movq 64(%rbp),%rax + adcq $0,%rdx + addq %r11,%r10 + movq %rdx,%r11 + adcq $0,%r11 + + mulq %rbx + addq %rax,%r12 + movq 80(%rbp),%rax + adcq $0,%rdx + addq %r12,%r11 + movq %rdx,%r12 + adcq $0,%r12 + + mulq %rbx + addq %rax,%r13 + movq 96(%rbp),%rax + adcq $0,%rdx + addq %r13,%r12 + movq %rdx,%r13 + adcq $0,%r13 + + mulq %rbx + addq %rax,%r14 + movq 112(%rbp),%rax + adcq $0,%rdx + addq %r14,%r13 + movq %rdx,%r14 + adcq $0,%r14 + + mulq %rbx + movq 48-16+8(%rsp,%rcx,8),%rbx + addq %rax,%r15 + adcq $0,%rdx + addq %r15,%r14 + movq 0(%rbp),%rax + movq %rdx,%r15 + adcq $0,%r15 + + decl %ecx + jnz .L8x_tail + + leaq 128(%rbp),%rbp + movq 8+8(%rsp),%rdx + cmpq 0+8(%rsp),%rbp + jae .L8x_tail_done + + movq 48+56+8(%rsp),%rbx + negq %rsi + movq 0(%rbp),%rax + adcq 0(%rdi),%r8 + adcq 8(%rdi),%r9 + adcq 16(%rdi),%r10 + adcq 24(%rdi),%r11 + adcq 32(%rdi),%r12 + adcq 40(%rdi),%r13 + adcq 48(%rdi),%r14 + adcq 56(%rdi),%r15 + sbbq %rsi,%rsi + + movl $8,%ecx + jmp .L8x_tail + +.align 32 +.L8x_tail_done: + addq (%rdx),%r8 + xorq %rax,%rax + + negq %rsi +.L8x_no_tail: + adcq 0(%rdi),%r8 + adcq 8(%rdi),%r9 + adcq 16(%rdi),%r10 + adcq 24(%rdi),%r11 + adcq 32(%rdi),%r12 + adcq 40(%rdi),%r13 + adcq 48(%rdi),%r14 + adcq 56(%rdi),%r15 + adcq $0,%rax + movq -16(%rbp),%rcx + xorq %rsi,%rsi + +.byte 102,72,15,126,213 + + movq %r8,0(%rdi) + movq %r9,8(%rdi) +.byte 102,73,15,126,217 + movq %r10,16(%rdi) + movq %r11,24(%rdi) + movq %r12,32(%rdi) + movq %r13,40(%rdi) + movq %r14,48(%rdi) + movq %r15,56(%rdi) + leaq 64(%rdi),%rdi + + cmpq %rdx,%rdi + jb .L8x_reduction_loop + + subq %r15,%rcx + leaq (%rdi,%r9,1),%rbx + adcq %rsi,%rsi + movq %r9,%rcx + orq %rsi,%rax +.byte 102,72,15,126,207 + xorq $1,%rax +.byte 102,72,15,126,206 + leaq (%rbp,%rax,8),%rbp + sarq $3+2,%rcx + jmp .Lsqr4x_sub + +.align 32 +.Lsqr4x_sub: +.byte 0x66 + movq 0(%rbx),%r12 + movq 8(%rbx),%r13 + sbbq 0(%rbp),%r12 + movq 16(%rbx),%r14 + sbbq 16(%rbp),%r13 + movq 24(%rbx),%r15 + leaq 32(%rbx),%rbx + sbbq 32(%rbp),%r14 + movq %r12,0(%rdi) + sbbq 48(%rbp),%r15 + leaq 64(%rbp),%rbp + movq %r13,8(%rdi) + movq %r14,16(%rdi) + movq %r15,24(%rdi) + leaq 32(%rdi),%rdi + + incq %rcx + jnz .Lsqr4x_sub + movq %r9,%r10 + negq %r9 + .byte 0xf3,0xc3 +.size bn_sqr8x_internal,.-bn_sqr8x_internal +.globl bn_from_montgomery +.type bn_from_montgomery,@function +.align 32 +bn_from_montgomery: + testl $7,%r9d + jz bn_from_mont8x + xorl %eax,%eax + .byte 0xf3,0xc3 +.size bn_from_montgomery,.-bn_from_montgomery + +.type bn_from_mont8x,@function +.align 32 +bn_from_mont8x: +.byte 0x67 + movq %rsp,%rax + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 +.byte 0x67 + movl %r9d,%r10d + shll $3,%r9d + shll $3+2,%r10d + negq %r9 + movq (%r8),%r8 + + + + + + + + leaq -64(%rsp,%r9,2),%r11 + subq %rsi,%r11 + andq $4095,%r11 + cmpq %r11,%r10 + jb .Lfrom_sp_alt + subq %r11,%rsp + leaq -64(%rsp,%r9,2),%rsp + jmp .Lfrom_sp_done + +.align 32 +.Lfrom_sp_alt: + leaq 4096-64(,%r9,2),%r10 + leaq -64(%rsp,%r9,2),%rsp + subq %r10,%r11 + movq $0,%r10 + cmovcq %r10,%r11 + subq %r11,%rsp +.Lfrom_sp_done: + andq $-64,%rsp + movq %r9,%r10 + negq %r9 + + + + + + + + + + + movq %r8,32(%rsp) + movq %rax,40(%rsp) +.Lfrom_body: + movq %r9,%r11 + leaq 48(%rsp),%rax + pxor %xmm0,%xmm0 + jmp .Lmul_by_1 + +.align 32 +.Lmul_by_1: + movdqu (%rsi),%xmm1 + movdqu 16(%rsi),%xmm2 + movdqu 32(%rsi),%xmm3 + movdqa %xmm0,(%rax,%r9,1) + movdqu 48(%rsi),%xmm4 + movdqa %xmm0,16(%rax,%r9,1) +.byte 0x48,0x8d,0xb6,0x40,0x00,0x00,0x00 + movdqa %xmm1,(%rax) + movdqa %xmm0,32(%rax,%r9,1) + movdqa %xmm2,16(%rax) + movdqa %xmm0,48(%rax,%r9,1) + movdqa %xmm3,32(%rax) + movdqa %xmm4,48(%rax) + leaq 64(%rax),%rax + subq $64,%r11 + jnz .Lmul_by_1 + +.byte 102,72,15,110,207 +.byte 102,72,15,110,209 +.byte 0x67 + movq %rcx,%rbp +.byte 102,73,15,110,218 + movl OPENSSL_ia32cap_P+8(%rip),%r11d + andl $524544,%r11d + cmpl $524544,%r11d + jne .Lfrom_mont_nox + + leaq (%rax,%r9,1),%rdi + call sqrx8x_reduction + + pxor %xmm0,%xmm0 + leaq 48(%rsp),%rax + movq 40(%rsp),%rsi + jmp .Lfrom_mont_zero + +.align 32 +.Lfrom_mont_nox: + call sqr8x_reduction + + pxor %xmm0,%xmm0 + leaq 48(%rsp),%rax + movq 40(%rsp),%rsi + jmp .Lfrom_mont_zero + +.align 32 +.Lfrom_mont_zero: + movdqa %xmm0,0(%rax) + movdqa %xmm0,16(%rax) + movdqa %xmm0,32(%rax) + movdqa %xmm0,48(%rax) + leaq 64(%rax),%rax + subq $32,%r9 + jnz .Lfrom_mont_zero + + movq $1,%rax + movq -48(%rsi),%r15 + movq -40(%rsi),%r14 + movq -32(%rsi),%r13 + movq -24(%rsi),%r12 + movq -16(%rsi),%rbp + movq -8(%rsi),%rbx + leaq (%rsi),%rsp +.Lfrom_epilogue: + .byte 0xf3,0xc3 +.size bn_from_mont8x,.-bn_from_mont8x +.type bn_mulx4x_mont_gather5,@function +.align 32 +bn_mulx4x_mont_gather5: +.Lmulx4x_enter: +.byte 0x67 + movq %rsp,%rax + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 +.byte 0x67 + movl %r9d,%r10d + shll $3,%r9d + shll $3+2,%r10d + negq %r9 + movq (%r8),%r8 + + + + + + + + + leaq -64(%rsp,%r9,2),%r11 + subq %rsi,%r11 + andq $4095,%r11 + cmpq %r11,%r10 + jb .Lmulx4xsp_alt + subq %r11,%rsp + leaq -64(%rsp,%r9,2),%rsp + jmp .Lmulx4xsp_done + +.align 32 +.Lmulx4xsp_alt: + leaq 4096-64(,%r9,2),%r10 + leaq -64(%rsp,%r9,2),%rsp + subq %r10,%r11 + movq $0,%r10 + cmovcq %r10,%r11 + subq %r11,%rsp +.Lmulx4xsp_done: + andq $-64,%rsp + + + + + + + + + + + + + movq %r8,32(%rsp) + movq %rax,40(%rsp) +.Lmulx4x_body: + call mulx4x_internal + + movq 40(%rsp),%rsi + movq $1,%rax + movq -48(%rsi),%r15 + movq -40(%rsi),%r14 + movq -32(%rsi),%r13 + movq -24(%rsi),%r12 + movq -16(%rsi),%rbp + movq -8(%rsi),%rbx + leaq (%rsi),%rsp +.Lmulx4x_epilogue: + .byte 0xf3,0xc3 +.size bn_mulx4x_mont_gather5,.-bn_mulx4x_mont_gather5 + +.type mulx4x_internal,@function +.align 32 +mulx4x_internal: +.byte 0x4c,0x89,0x8c,0x24,0x08,0x00,0x00,0x00 +.byte 0x67 + negq %r9 + shlq $5,%r9 + leaq 256(%rdx,%r9,1),%r13 + shrq $5+5,%r9 + movl 8(%rax),%r10d + subq $1,%r9 + movq %r13,16+8(%rsp) + movq %r9,24+8(%rsp) + movq %rdi,56+8(%rsp) + movq %r10,%r11 + shrq $3,%r10 + andq $7,%r11 + notq %r10 + leaq .Lmagic_masks(%rip),%rax + andq $3,%r10 + leaq 96(%rdx,%r11,8),%rdi + movq 0(%rax,%r10,8),%xmm4 + movq 8(%rax,%r10,8),%xmm5 + addq $7,%r11 + movq 16(%rax,%r10,8),%xmm6 + movq 24(%rax,%r10,8),%xmm7 + andq $7,%r11 + + movq -96(%rdi),%xmm0 + leaq 256(%rdi),%rbx + movq -32(%rdi),%xmm1 + pand %xmm4,%xmm0 + movq 32(%rdi),%xmm2 + pand %xmm5,%xmm1 + movq 96(%rdi),%xmm3 + pand %xmm6,%xmm2 + por %xmm1,%xmm0 + movq -96(%rbx),%xmm1 + pand %xmm7,%xmm3 + por %xmm2,%xmm0 + movq -32(%rbx),%xmm2 + por %xmm3,%xmm0 +.byte 0x67,0x67 + pand %xmm4,%xmm1 + movq 32(%rbx),%xmm3 + +.byte 102,72,15,126,194 + movq 96(%rbx),%xmm0 + leaq 512(%rdi),%rdi + pand %xmm5,%xmm2 +.byte 0x67,0x67 + pand %xmm6,%xmm3 + + + + + + + + leaq 64+32+8(%rsp,%r11,8),%rbx + + movq %rdx,%r9 + mulxq 0(%rsi),%r8,%rax + mulxq 8(%rsi),%r11,%r12 + addq %rax,%r11 + mulxq 16(%rsi),%rax,%r13 + adcq %rax,%r12 + adcq $0,%r13 + mulxq 24(%rsi),%rax,%r14 + + movq %r8,%r15 + imulq 32+8(%rsp),%r8 + xorq %rbp,%rbp + movq %r8,%rdx + + por %xmm2,%xmm1 + pand %xmm7,%xmm0 + por %xmm3,%xmm1 + movq %rdi,8+8(%rsp) + por %xmm1,%xmm0 + +.byte 0x48,0x8d,0xb6,0x20,0x00,0x00,0x00 + adcxq %rax,%r13 + adcxq %rbp,%r14 + + mulxq 0(%rcx),%rax,%r10 + adcxq %rax,%r15 + adoxq %r11,%r10 + mulxq 16(%rcx),%rax,%r11 + adcxq %rax,%r10 + adoxq %r12,%r11 + mulxq 32(%rcx),%rax,%r12 + movq 24+8(%rsp),%rdi +.byte 0x66 + movq %r10,-32(%rbx) + adcxq %rax,%r11 + adoxq %r13,%r12 + mulxq 48(%rcx),%rax,%r15 +.byte 0x67,0x67 + movq %r9,%rdx + movq %r11,-24(%rbx) + adcxq %rax,%r12 + adoxq %rbp,%r15 +.byte 0x48,0x8d,0x89,0x40,0x00,0x00,0x00 + movq %r12,-16(%rbx) + + +.align 32 +.Lmulx4x_1st: + adcxq %rbp,%r15 + mulxq 0(%rsi),%r10,%rax + adcxq %r14,%r10 + mulxq 8(%rsi),%r11,%r14 + adcxq %rax,%r11 + mulxq 16(%rsi),%r12,%rax + adcxq %r14,%r12 + mulxq 24(%rsi),%r13,%r14 +.byte 0x67,0x67 + movq %r8,%rdx + adcxq %rax,%r13 + adcxq %rbp,%r14 + leaq 32(%rsi),%rsi + leaq 32(%rbx),%rbx + + adoxq %r15,%r10 + mulxq 0(%rcx),%rax,%r15 + adcxq %rax,%r10 + adoxq %r15,%r11 + mulxq 16(%rcx),%rax,%r15 + adcxq %rax,%r11 + adoxq %r15,%r12 + mulxq 32(%rcx),%rax,%r15 + movq %r10,-40(%rbx) + adcxq %rax,%r12 + movq %r11,-32(%rbx) + adoxq %r15,%r13 + mulxq 48(%rcx),%rax,%r15 + movq %r9,%rdx + movq %r12,-24(%rbx) + adcxq %rax,%r13 + adoxq %rbp,%r15 + leaq 64(%rcx),%rcx + movq %r13,-16(%rbx) + + decq %rdi + jnz .Lmulx4x_1st + + movq 8(%rsp),%rax +.byte 102,72,15,126,194 + adcq %rbp,%r15 + leaq (%rsi,%rax,1),%rsi + addq %r15,%r14 + movq 8+8(%rsp),%rdi + adcq %rbp,%rbp + movq %r14,-8(%rbx) + jmp .Lmulx4x_outer + +.align 32 +.Lmulx4x_outer: + movq %rbp,(%rbx) + leaq 32(%rbx,%rax,1),%rbx + mulxq 0(%rsi),%r8,%r11 + xorq %rbp,%rbp + movq %rdx,%r9 + mulxq 8(%rsi),%r14,%r12 + adoxq -32(%rbx),%r8 + adcxq %r14,%r11 + mulxq 16(%rsi),%r15,%r13 + adoxq -24(%rbx),%r11 + adcxq %r15,%r12 + mulxq 24(%rsi),%rdx,%r14 + adoxq -16(%rbx),%r12 + adcxq %rdx,%r13 + leaq (%rcx,%rax,2),%rcx + leaq 32(%rsi),%rsi + adoxq -8(%rbx),%r13 + adcxq %rbp,%r14 + adoxq %rbp,%r14 + +.byte 0x67 + movq %r8,%r15 + imulq 32+8(%rsp),%r8 + + movq -96(%rdi),%xmm0 +.byte 0x67,0x67 + movq %r8,%rdx + movq -32(%rdi),%xmm1 +.byte 0x67 + pand %xmm4,%xmm0 + movq 32(%rdi),%xmm2 +.byte 0x67 + pand %xmm5,%xmm1 + movq 96(%rdi),%xmm3 + addq $256,%rdi +.byte 0x67 + pand %xmm6,%xmm2 + por %xmm1,%xmm0 + pand %xmm7,%xmm3 + xorq %rbp,%rbp + movq %rdi,8+8(%rsp) + + mulxq 0(%rcx),%rax,%r10 + adcxq %rax,%r15 + adoxq %r11,%r10 + mulxq 16(%rcx),%rax,%r11 + adcxq %rax,%r10 + adoxq %r12,%r11 + mulxq 32(%rcx),%rax,%r12 + adcxq %rax,%r11 + adoxq %r13,%r12 + mulxq 48(%rcx),%rax,%r15 + movq %r9,%rdx + por %xmm2,%xmm0 + movq 24+8(%rsp),%rdi + movq %r10,-32(%rbx) + por %xmm3,%xmm0 + adcxq %rax,%r12 + movq %r11,-24(%rbx) + adoxq %rbp,%r15 + movq %r12,-16(%rbx) + leaq 64(%rcx),%rcx + jmp .Lmulx4x_inner + +.align 32 +.Lmulx4x_inner: + mulxq 0(%rsi),%r10,%rax + adcxq %rbp,%r15 + adoxq %r14,%r10 + mulxq 8(%rsi),%r11,%r14 + adcxq 0(%rbx),%r10 + adoxq %rax,%r11 + mulxq 16(%rsi),%r12,%rax + adcxq 8(%rbx),%r11 + adoxq %r14,%r12 + mulxq 24(%rsi),%r13,%r14 + movq %r8,%rdx + adcxq 16(%rbx),%r12 + adoxq %rax,%r13 + adcxq 24(%rbx),%r13 + adoxq %rbp,%r14 + leaq 32(%rsi),%rsi + leaq 32(%rbx),%rbx + adcxq %rbp,%r14 + + adoxq %r15,%r10 + mulxq 0(%rcx),%rax,%r15 + adcxq %rax,%r10 + adoxq %r15,%r11 + mulxq 16(%rcx),%rax,%r15 + adcxq %rax,%r11 + adoxq %r15,%r12 + mulxq 32(%rcx),%rax,%r15 + movq %r10,-40(%rbx) + adcxq %rax,%r12 + adoxq %r15,%r13 + movq %r11,-32(%rbx) + mulxq 48(%rcx),%rax,%r15 + movq %r9,%rdx + leaq 64(%rcx),%rcx + movq %r12,-24(%rbx) + adcxq %rax,%r13 + adoxq %rbp,%r15 + movq %r13,-16(%rbx) + + decq %rdi + jnz .Lmulx4x_inner + + movq 0+8(%rsp),%rax +.byte 102,72,15,126,194 + adcq %rbp,%r15 + subq 0(%rbx),%rdi + movq 8+8(%rsp),%rdi + movq 16+8(%rsp),%r10 + adcq %r15,%r14 + leaq (%rsi,%rax,1),%rsi + adcq %rbp,%rbp + movq %r14,-8(%rbx) + + cmpq %r10,%rdi + jb .Lmulx4x_outer + + movq -16(%rcx),%r10 + xorq %r15,%r15 + subq %r14,%r10 + adcq %r15,%r15 + orq %r15,%rbp + xorq $1,%rbp + leaq (%rbx,%rax,1),%rdi + leaq (%rcx,%rax,2),%rcx +.byte 0x67,0x67 + sarq $3+2,%rax + leaq (%rcx,%rbp,8),%rbp + movq 56+8(%rsp),%rdx + movq %rax,%rcx + jmp .Lsqrx4x_sub +.size mulx4x_internal,.-mulx4x_internal +.type bn_powerx5,@function +.align 32 +bn_powerx5: +.Lpowerx5_enter: +.byte 0x67 + movq %rsp,%rax + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 +.byte 0x67 + movl %r9d,%r10d + shll $3,%r9d + shll $3+2,%r10d + negq %r9 + movq (%r8),%r8 + + + + + + + + leaq -64(%rsp,%r9,2),%r11 + subq %rsi,%r11 + andq $4095,%r11 + cmpq %r11,%r10 + jb .Lpwrx_sp_alt + subq %r11,%rsp + leaq -64(%rsp,%r9,2),%rsp + jmp .Lpwrx_sp_done + +.align 32 +.Lpwrx_sp_alt: + leaq 4096-64(,%r9,2),%r10 + leaq -64(%rsp,%r9,2),%rsp + subq %r10,%r11 + movq $0,%r10 + cmovcq %r10,%r11 + subq %r11,%rsp +.Lpwrx_sp_done: + andq $-64,%rsp + movq %r9,%r10 + negq %r9 + + + + + + + + + + + + + pxor %xmm0,%xmm0 +.byte 102,72,15,110,207 +.byte 102,72,15,110,209 +.byte 102,73,15,110,218 +.byte 102,72,15,110,226 + movq %r8,32(%rsp) + movq %rax,40(%rsp) +.Lpowerx5_body: + + call __bn_sqrx8x_internal + call __bn_sqrx8x_internal + call __bn_sqrx8x_internal + call __bn_sqrx8x_internal + call __bn_sqrx8x_internal + + movq %r10,%r9 + movq %rsi,%rdi +.byte 102,72,15,126,209 +.byte 102,72,15,126,226 + movq 40(%rsp),%rax + + call mulx4x_internal + + movq 40(%rsp),%rsi + movq $1,%rax + movq -48(%rsi),%r15 + movq -40(%rsi),%r14 + movq -32(%rsi),%r13 + movq -24(%rsi),%r12 + movq -16(%rsi),%rbp + movq -8(%rsi),%rbx + leaq (%rsi),%rsp +.Lpowerx5_epilogue: + .byte 0xf3,0xc3 +.size bn_powerx5,.-bn_powerx5 + +.globl bn_sqrx8x_internal +.hidden bn_sqrx8x_internal +.type bn_sqrx8x_internal,@function +.align 32 +bn_sqrx8x_internal: +__bn_sqrx8x_internal: + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + leaq 48+8(%rsp),%rdi + leaq (%rsi,%r9,1),%rbp + movq %r9,0+8(%rsp) + movq %rbp,8+8(%rsp) + jmp .Lsqr8x_zero_start + +.align 32 +.byte 0x66,0x66,0x66,0x2e,0x0f,0x1f,0x84,0x00,0x00,0x00,0x00,0x00 +.Lsqrx8x_zero: +.byte 0x3e + movdqa %xmm0,0(%rdi) + movdqa %xmm0,16(%rdi) + movdqa %xmm0,32(%rdi) + movdqa %xmm0,48(%rdi) +.Lsqr8x_zero_start: + movdqa %xmm0,64(%rdi) + movdqa %xmm0,80(%rdi) + movdqa %xmm0,96(%rdi) + movdqa %xmm0,112(%rdi) + leaq 128(%rdi),%rdi + subq $64,%r9 + jnz .Lsqrx8x_zero + + movq 0(%rsi),%rdx + + xorq %r10,%r10 + xorq %r11,%r11 + xorq %r12,%r12 + xorq %r13,%r13 + xorq %r14,%r14 + xorq %r15,%r15 + leaq 48+8(%rsp),%rdi + xorq %rbp,%rbp + jmp .Lsqrx8x_outer_loop + +.align 32 +.Lsqrx8x_outer_loop: + mulxq 8(%rsi),%r8,%rax + adcxq %r9,%r8 + adoxq %rax,%r10 + mulxq 16(%rsi),%r9,%rax + adcxq %r10,%r9 + adoxq %rax,%r11 +.byte 0xc4,0xe2,0xab,0xf6,0x86,0x18,0x00,0x00,0x00 + adcxq %r11,%r10 + adoxq %rax,%r12 +.byte 0xc4,0xe2,0xa3,0xf6,0x86,0x20,0x00,0x00,0x00 + adcxq %r12,%r11 + adoxq %rax,%r13 + mulxq 40(%rsi),%r12,%rax + adcxq %r13,%r12 + adoxq %rax,%r14 + mulxq 48(%rsi),%r13,%rax + adcxq %r14,%r13 + adoxq %r15,%rax + mulxq 56(%rsi),%r14,%r15 + movq 8(%rsi),%rdx + adcxq %rax,%r14 + adoxq %rbp,%r15 + adcq 64(%rdi),%r15 + movq %r8,8(%rdi) + movq %r9,16(%rdi) + sbbq %rcx,%rcx + xorq %rbp,%rbp + + + mulxq 16(%rsi),%r8,%rbx + mulxq 24(%rsi),%r9,%rax + adcxq %r10,%r8 + adoxq %rbx,%r9 + mulxq 32(%rsi),%r10,%rbx + adcxq %r11,%r9 + adoxq %rax,%r10 +.byte 0xc4,0xe2,0xa3,0xf6,0x86,0x28,0x00,0x00,0x00 + adcxq %r12,%r10 + adoxq %rbx,%r11 +.byte 0xc4,0xe2,0x9b,0xf6,0x9e,0x30,0x00,0x00,0x00 + adcxq %r13,%r11 + adoxq %r14,%r12 +.byte 0xc4,0x62,0x93,0xf6,0xb6,0x38,0x00,0x00,0x00 + movq 16(%rsi),%rdx + adcxq %rax,%r12 + adoxq %rbx,%r13 + adcxq %r15,%r13 + adoxq %rbp,%r14 + adcxq %rbp,%r14 + + movq %r8,24(%rdi) + movq %r9,32(%rdi) + + mulxq 24(%rsi),%r8,%rbx + mulxq 32(%rsi),%r9,%rax + adcxq %r10,%r8 + adoxq %rbx,%r9 + mulxq 40(%rsi),%r10,%rbx + adcxq %r11,%r9 + adoxq %rax,%r10 +.byte 0xc4,0xe2,0xa3,0xf6,0x86,0x30,0x00,0x00,0x00 + adcxq %r12,%r10 + adoxq %r13,%r11 +.byte 0xc4,0x62,0x9b,0xf6,0xae,0x38,0x00,0x00,0x00 +.byte 0x3e + movq 24(%rsi),%rdx + adcxq %rbx,%r11 + adoxq %rax,%r12 + adcxq %r14,%r12 + movq %r8,40(%rdi) + movq %r9,48(%rdi) + mulxq 32(%rsi),%r8,%rax + adoxq %rbp,%r13 + adcxq %rbp,%r13 + + mulxq 40(%rsi),%r9,%rbx + adcxq %r10,%r8 + adoxq %rax,%r9 + mulxq 48(%rsi),%r10,%rax + adcxq %r11,%r9 + adoxq %r12,%r10 + mulxq 56(%rsi),%r11,%r12 + movq 32(%rsi),%rdx + movq 40(%rsi),%r14 + adcxq %rbx,%r10 + adoxq %rax,%r11 + movq 48(%rsi),%r15 + adcxq %r13,%r11 + adoxq %rbp,%r12 + adcxq %rbp,%r12 + + movq %r8,56(%rdi) + movq %r9,64(%rdi) + + mulxq %r14,%r9,%rax + movq 56(%rsi),%r8 + adcxq %r10,%r9 + mulxq %r15,%r10,%rbx + adoxq %rax,%r10 + adcxq %r11,%r10 + mulxq %r8,%r11,%rax + movq %r14,%rdx + adoxq %rbx,%r11 + adcxq %r12,%r11 + + adcxq %rbp,%rax + + mulxq %r15,%r14,%rbx + mulxq %r8,%r12,%r13 + movq %r15,%rdx + leaq 64(%rsi),%rsi + adcxq %r14,%r11 + adoxq %rbx,%r12 + adcxq %rax,%r12 + adoxq %rbp,%r13 + +.byte 0x67,0x67 + mulxq %r8,%r8,%r14 + adcxq %r8,%r13 + adcxq %rbp,%r14 + + cmpq 8+8(%rsp),%rsi + je .Lsqrx8x_outer_break + + negq %rcx + movq $-8,%rcx + movq %rbp,%r15 + movq 64(%rdi),%r8 + adcxq 72(%rdi),%r9 + adcxq 80(%rdi),%r10 + adcxq 88(%rdi),%r11 + adcq 96(%rdi),%r12 + adcq 104(%rdi),%r13 + adcq 112(%rdi),%r14 + adcq 120(%rdi),%r15 + leaq (%rsi),%rbp + leaq 128(%rdi),%rdi + sbbq %rax,%rax + + movq -64(%rsi),%rdx + movq %rax,16+8(%rsp) + movq %rdi,24+8(%rsp) + + + xorl %eax,%eax + jmp .Lsqrx8x_loop + +.align 32 +.Lsqrx8x_loop: + movq %r8,%rbx + mulxq 0(%rbp),%rax,%r8 + adcxq %rax,%rbx + adoxq %r9,%r8 + + mulxq 8(%rbp),%rax,%r9 + adcxq %rax,%r8 + adoxq %r10,%r9 + + mulxq 16(%rbp),%rax,%r10 + adcxq %rax,%r9 + adoxq %r11,%r10 + + mulxq 24(%rbp),%rax,%r11 + adcxq %rax,%r10 + adoxq %r12,%r11 + +.byte 0xc4,0x62,0xfb,0xf6,0xa5,0x20,0x00,0x00,0x00 + adcxq %rax,%r11 + adoxq %r13,%r12 + + mulxq 40(%rbp),%rax,%r13 + adcxq %rax,%r12 + adoxq %r14,%r13 + + mulxq 48(%rbp),%rax,%r14 + movq %rbx,(%rdi,%rcx,8) + movl $0,%ebx + adcxq %rax,%r13 + adoxq %r15,%r14 + +.byte 0xc4,0x62,0xfb,0xf6,0xbd,0x38,0x00,0x00,0x00 + movq 8(%rsi,%rcx,8),%rdx + adcxq %rax,%r14 + adoxq %rbx,%r15 + adcxq %rbx,%r15 + +.byte 0x67 + incq %rcx + jnz .Lsqrx8x_loop + + leaq 64(%rbp),%rbp + movq $-8,%rcx + cmpq 8+8(%rsp),%rbp + je .Lsqrx8x_break + + subq 16+8(%rsp),%rbx +.byte 0x66 + movq -64(%rsi),%rdx + adcxq 0(%rdi),%r8 + adcxq 8(%rdi),%r9 + adcq 16(%rdi),%r10 + adcq 24(%rdi),%r11 + adcq 32(%rdi),%r12 + adcq 40(%rdi),%r13 + adcq 48(%rdi),%r14 + adcq 56(%rdi),%r15 + leaq 64(%rdi),%rdi +.byte 0x67 + sbbq %rax,%rax + xorl %ebx,%ebx + movq %rax,16+8(%rsp) + jmp .Lsqrx8x_loop + +.align 32 +.Lsqrx8x_break: + subq 16+8(%rsp),%r8 + movq 24+8(%rsp),%rcx + movq 0(%rsi),%rdx + xorl %ebp,%ebp + movq %r8,0(%rdi) + cmpq %rcx,%rdi + je .Lsqrx8x_outer_loop + + movq %r9,8(%rdi) + movq 8(%rcx),%r9 + movq %r10,16(%rdi) + movq 16(%rcx),%r10 + movq %r11,24(%rdi) + movq 24(%rcx),%r11 + movq %r12,32(%rdi) + movq 32(%rcx),%r12 + movq %r13,40(%rdi) + movq 40(%rcx),%r13 + movq %r14,48(%rdi) + movq 48(%rcx),%r14 + movq %r15,56(%rdi) + movq 56(%rcx),%r15 + movq %rcx,%rdi + jmp .Lsqrx8x_outer_loop + +.align 32 +.Lsqrx8x_outer_break: + movq %r9,72(%rdi) +.byte 102,72,15,126,217 + movq %r10,80(%rdi) + movq %r11,88(%rdi) + movq %r12,96(%rdi) + movq %r13,104(%rdi) + movq %r14,112(%rdi) + leaq 48+8(%rsp),%rdi + movq (%rsi,%rcx,1),%rdx + + movq 8(%rdi),%r11 + xorq %r10,%r10 + movq 0+8(%rsp),%r9 + adoxq %r11,%r11 + movq 16(%rdi),%r12 + movq 24(%rdi),%r13 + + +.align 32 +.Lsqrx4x_shift_n_add: + mulxq %rdx,%rax,%rbx + adoxq %r12,%r12 + adcxq %r10,%rax +.byte 0x48,0x8b,0x94,0x0e,0x08,0x00,0x00,0x00 +.byte 0x4c,0x8b,0x97,0x20,0x00,0x00,0x00 + adoxq %r13,%r13 + adcxq %r11,%rbx + movq 40(%rdi),%r11 + movq %rax,0(%rdi) + movq %rbx,8(%rdi) + + mulxq %rdx,%rax,%rbx + adoxq %r10,%r10 + adcxq %r12,%rax + movq 16(%rsi,%rcx,1),%rdx + movq 48(%rdi),%r12 + adoxq %r11,%r11 + adcxq %r13,%rbx + movq 56(%rdi),%r13 + movq %rax,16(%rdi) + movq %rbx,24(%rdi) + + mulxq %rdx,%rax,%rbx + adoxq %r12,%r12 + adcxq %r10,%rax + movq 24(%rsi,%rcx,1),%rdx + leaq 32(%rcx),%rcx + movq 64(%rdi),%r10 + adoxq %r13,%r13 + adcxq %r11,%rbx + movq 72(%rdi),%r11 + movq %rax,32(%rdi) + movq %rbx,40(%rdi) + + mulxq %rdx,%rax,%rbx + adoxq %r10,%r10 + adcxq %r12,%rax + jrcxz .Lsqrx4x_shift_n_add_break +.byte 0x48,0x8b,0x94,0x0e,0x00,0x00,0x00,0x00 + adoxq %r11,%r11 + adcxq %r13,%rbx + movq 80(%rdi),%r12 + movq 88(%rdi),%r13 + movq %rax,48(%rdi) + movq %rbx,56(%rdi) + leaq 64(%rdi),%rdi + nop + jmp .Lsqrx4x_shift_n_add + +.align 32 +.Lsqrx4x_shift_n_add_break: + adcxq %r13,%rbx + movq %rax,48(%rdi) + movq %rbx,56(%rdi) + leaq 64(%rdi),%rdi +.byte 102,72,15,126,213 +sqrx8x_reduction: + xorl %eax,%eax + movq 32+8(%rsp),%rbx + movq 48+8(%rsp),%rdx + leaq -128(%rbp,%r9,2),%rcx + + movq %rcx,0+8(%rsp) + movq %rdi,8+8(%rsp) + + leaq 48+8(%rsp),%rdi + jmp .Lsqrx8x_reduction_loop + +.align 32 +.Lsqrx8x_reduction_loop: + movq 8(%rdi),%r9 + movq 16(%rdi),%r10 + movq 24(%rdi),%r11 + movq 32(%rdi),%r12 + movq %rdx,%r8 + imulq %rbx,%rdx + movq 40(%rdi),%r13 + movq 48(%rdi),%r14 + movq 56(%rdi),%r15 + movq %rax,24+8(%rsp) + + leaq 64(%rdi),%rdi + xorq %rsi,%rsi + movq $-8,%rcx + jmp .Lsqrx8x_reduce + +.align 32 +.Lsqrx8x_reduce: + movq %r8,%rbx + mulxq 0(%rbp),%rax,%r8 + adcxq %rbx,%rax + adoxq %r9,%r8 + + mulxq 16(%rbp),%rbx,%r9 + adcxq %rbx,%r8 + adoxq %r10,%r9 + + mulxq 32(%rbp),%rbx,%r10 + adcxq %rbx,%r9 + adoxq %r11,%r10 + + mulxq 48(%rbp),%rbx,%r11 + adcxq %rbx,%r10 + adoxq %r12,%r11 + +.byte 0xc4,0x62,0xe3,0xf6,0xa5,0x40,0x00,0x00,0x00 + movq %rdx,%rax + movq %r8,%rdx + adcxq %rbx,%r11 + adoxq %r13,%r12 + + mulxq 32+8(%rsp),%rbx,%rdx + movq %rax,%rdx + movq %rax,64+48+8(%rsp,%rcx,8) + + mulxq 80(%rbp),%rax,%r13 + adcxq %rax,%r12 + adoxq %r14,%r13 + + mulxq 96(%rbp),%rax,%r14 + adcxq %rax,%r13 + adoxq %r15,%r14 + + mulxq 112(%rbp),%rax,%r15 + movq %rbx,%rdx + adcxq %rax,%r14 + adoxq %rsi,%r15 + adcxq %rsi,%r15 + +.byte 0x67,0x67,0x67 + incq %rcx + jnz .Lsqrx8x_reduce + + movq %rsi,%rax + cmpq 0+8(%rsp),%rbp + jae .Lsqrx8x_no_tail + + movq 48+8(%rsp),%rdx + addq 0(%rdi),%r8 + leaq 128(%rbp),%rbp + movq $-8,%rcx + adcxq 8(%rdi),%r9 + adcxq 16(%rdi),%r10 + adcq 24(%rdi),%r11 + adcq 32(%rdi),%r12 + adcq 40(%rdi),%r13 + adcq 48(%rdi),%r14 + adcq 56(%rdi),%r15 + leaq 64(%rdi),%rdi + sbbq %rax,%rax + + xorq %rsi,%rsi + movq %rax,16+8(%rsp) + jmp .Lsqrx8x_tail + +.align 32 +.Lsqrx8x_tail: + movq %r8,%rbx + mulxq 0(%rbp),%rax,%r8 + adcxq %rax,%rbx + adoxq %r9,%r8 + + mulxq 16(%rbp),%rax,%r9 + adcxq %rax,%r8 + adoxq %r10,%r9 + + mulxq 32(%rbp),%rax,%r10 + adcxq %rax,%r9 + adoxq %r11,%r10 + + mulxq 48(%rbp),%rax,%r11 + adcxq %rax,%r10 + adoxq %r12,%r11 + +.byte 0xc4,0x62,0xfb,0xf6,0xa5,0x40,0x00,0x00,0x00 + adcxq %rax,%r11 + adoxq %r13,%r12 + + mulxq 80(%rbp),%rax,%r13 + adcxq %rax,%r12 + adoxq %r14,%r13 + + mulxq 96(%rbp),%rax,%r14 + adcxq %rax,%r13 + adoxq %r15,%r14 + + mulxq 112(%rbp),%rax,%r15 + movq 72+48+8(%rsp,%rcx,8),%rdx + adcxq %rax,%r14 + adoxq %rsi,%r15 + movq %rbx,(%rdi,%rcx,8) + movq %r8,%rbx + adcxq %rsi,%r15 + + incq %rcx + jnz .Lsqrx8x_tail + + cmpq 0+8(%rsp),%rbp + jae .Lsqrx8x_tail_done + + subq 16+8(%rsp),%rsi + movq 48+8(%rsp),%rdx + leaq 128(%rbp),%rbp + adcq 0(%rdi),%r8 + adcq 8(%rdi),%r9 + adcq 16(%rdi),%r10 + adcq 24(%rdi),%r11 + adcq 32(%rdi),%r12 + adcq 40(%rdi),%r13 + adcq 48(%rdi),%r14 + adcq 56(%rdi),%r15 + leaq 64(%rdi),%rdi + sbbq %rax,%rax + subq $8,%rcx + + xorq %rsi,%rsi + movq %rax,16+8(%rsp) + jmp .Lsqrx8x_tail + +.align 32 +.Lsqrx8x_tail_done: + addq 24+8(%rsp),%r8 + movq %rsi,%rax + + subq 16+8(%rsp),%rsi +.Lsqrx8x_no_tail: + adcq 0(%rdi),%r8 +.byte 102,72,15,126,217 + adcq 8(%rdi),%r9 + movq 112(%rbp),%rsi +.byte 102,72,15,126,213 + adcq 16(%rdi),%r10 + adcq 24(%rdi),%r11 + adcq 32(%rdi),%r12 + adcq 40(%rdi),%r13 + adcq 48(%rdi),%r14 + adcq 56(%rdi),%r15 + adcq %rax,%rax + + movq 32+8(%rsp),%rbx + movq 64(%rdi,%rcx,1),%rdx + + movq %r8,0(%rdi) + leaq 64(%rdi),%r8 + movq %r9,8(%rdi) + movq %r10,16(%rdi) + movq %r11,24(%rdi) + movq %r12,32(%rdi) + movq %r13,40(%rdi) + movq %r14,48(%rdi) + movq %r15,56(%rdi) + + leaq 64(%rdi,%rcx,1),%rdi + cmpq 8+8(%rsp),%r8 + jb .Lsqrx8x_reduction_loop + xorq %rbx,%rbx + subq %r15,%rsi + adcq %rbx,%rbx + movq %rcx,%r10 +.byte 0x67 + orq %rbx,%rax +.byte 0x67 + movq %rcx,%r9 + xorq $1,%rax + sarq $3+2,%rcx + + leaq (%rbp,%rax,8),%rbp +.byte 102,72,15,126,202 +.byte 102,72,15,126,206 + jmp .Lsqrx4x_sub + +.align 32 +.Lsqrx4x_sub: +.byte 0x66 + movq 0(%rdi),%r12 + movq 8(%rdi),%r13 + sbbq 0(%rbp),%r12 + movq 16(%rdi),%r14 + sbbq 16(%rbp),%r13 + movq 24(%rdi),%r15 + leaq 32(%rdi),%rdi + sbbq 32(%rbp),%r14 + movq %r12,0(%rdx) + sbbq 48(%rbp),%r15 + leaq 64(%rbp),%rbp + movq %r13,8(%rdx) + movq %r14,16(%rdx) + movq %r15,24(%rdx) + leaq 32(%rdx),%rdx + + incq %rcx + jnz .Lsqrx4x_sub + negq %r9 + + .byte 0xf3,0xc3 +.size bn_sqrx8x_internal,.-bn_sqrx8x_internal +.globl bn_get_bits5 +.type bn_get_bits5,@function +.align 16 +bn_get_bits5: + movq %rdi,%r10 + movl %esi,%ecx + shrl $3,%esi + movzwl (%r10,%rsi,1),%eax + andl $7,%ecx + shrl %cl,%eax + andl $31,%eax + .byte 0xf3,0xc3 +.size bn_get_bits5,.-bn_get_bits5 + .globl bn_scatter5 .type bn_scatter5,@function .align 16 bn_scatter5: - cmpq $0,%rsi + cmpl $0,%esi jz .Lscatter_epilogue leaq (%rdx,%rcx,8),%rdx .Lscatter: @@ -734,7 +2906,7 @@ bn_scatter5: leaq 8(%rdi),%rdi movq %rax,(%rdx) leaq 256(%rdx),%rdx - subq $1,%rsi + subl $1,%esi jnz .Lscatter .Lscatter_epilogue: .byte 0xf3,0xc3 @@ -744,13 +2916,13 @@ bn_scatter5: .type bn_gather5,@function .align 16 bn_gather5: - movq %rcx,%r11 - shrq $3,%rcx + movl %ecx,%r11d + shrl $3,%ecx andq $7,%r11 - notq %rcx + notl %ecx leaq .Lmagic_masks(%rip),%rax - andq $3,%rcx - leaq 96(%rdx,%r11,8),%rdx + andl $3,%ecx + leaq 128(%rdx,%r11,8),%rdx movq 0(%rax,%rcx,8),%xmm4 movq 8(%rax,%rcx,8),%xmm5 movq 16(%rax,%rcx,8),%xmm6 @@ -758,22 +2930,23 @@ bn_gather5: jmp .Lgather .align 16 .Lgather: - movq -96(%rdx),%xmm0 - movq -32(%rdx),%xmm1 + movq -128(%rdx),%xmm0 + movq -64(%rdx),%xmm1 pand %xmm4,%xmm0 - movq 32(%rdx),%xmm2 + movq 0(%rdx),%xmm2 pand %xmm5,%xmm1 - movq 96(%rdx),%xmm3 + movq 64(%rdx),%xmm3 pand %xmm6,%xmm2 por %xmm1,%xmm0 pand %xmm7,%xmm3 +.byte 0x67,0x67 por %xmm2,%xmm0 leaq 256(%rdx),%rdx por %xmm3,%xmm0 movq %xmm0,(%rdi) leaq 8(%rdi),%rdi - subq $1,%rsi + subl $1,%esi jnz .Lgather .byte 0xf3,0xc3 .LSEH_end_bn_gather5: diff --git a/deps/openssl/asm/x64-elf-gas/camellia/cmll-x86_64.s b/deps/openssl/asm/x64-elf-gas/camellia/cmll-x86_64.s index 3a5f4c4230a2ea..ac7da4dfc2d19e 100644 --- a/deps/openssl/asm/x64-elf-gas/camellia/cmll-x86_64.s +++ b/deps/openssl/asm/x64-elf-gas/camellia/cmll-x86_64.s @@ -1,7 +1,6 @@ .text - .globl Camellia_EncryptBlock .type Camellia_EncryptBlock,@function .align 16 @@ -268,7 +267,6 @@ _x86_64_Camellia_encrypt: movl %edx,%r11d .byte 0xf3,0xc3 - .size _x86_64_Camellia_encrypt,.-_x86_64_Camellia_encrypt @@ -539,7 +537,6 @@ _x86_64_Camellia_decrypt: movl %ebx,%r11d .byte 0xf3,0xc3 - .size _x86_64_Camellia_decrypt,.-_x86_64_Camellia_decrypt .globl Camellia_Ekeygen .type Camellia_Ekeygen,@function @@ -552,7 +549,7 @@ Camellia_Ekeygen: pushq %r15 .Lkey_prologue: - movq %rdi,%r15 + movl %edi,%r15d movq %rdx,%r13 movl 0(%rsi),%r8d @@ -1727,7 +1724,6 @@ Camellia_cbc_encrypt: movq %r12,%rsi leaq 8+24(%rsp),%rdi .long 0x9066A4F3 - popfq .Lcbc_enc_popf: @@ -1736,7 +1732,6 @@ Camellia_cbc_encrypt: movq %rax,8(%rsp) jmp .Lcbc_eloop - .align 16 .LCBC_DECRYPT: xchgq %r14,%r15 @@ -1819,7 +1814,6 @@ Camellia_cbc_encrypt: leaq 8+24(%rsp),%rsi leaq (%r13),%rdi .long 0x9066A4F3 - popfq .Lcbc_dec_popf: diff --git a/deps/openssl/asm/x64-elf-gas/ec/ecp_nistz256-x86_64.s b/deps/openssl/asm/x64-elf-gas/ec/ecp_nistz256-x86_64.s new file mode 100644 index 00000000000000..3a999664c74dc6 --- /dev/null +++ b/deps/openssl/asm/x64-elf-gas/ec/ecp_nistz256-x86_64.s @@ -0,0 +1,3504 @@ +.text + + + +.align 64 +.Lpoly: +.quad 0xffffffffffffffff, 0x00000000ffffffff, 0x0000000000000000, 0xffffffff00000001 + + +.LRR: +.quad 0x0000000000000003, 0xfffffffbffffffff, 0xfffffffffffffffe, 0x00000004fffffffd + +.LOne: +.long 1,1,1,1,1,1,1,1 +.LTwo: +.long 2,2,2,2,2,2,2,2 +.LThree: +.long 3,3,3,3,3,3,3,3 +.LONE_mont: +.quad 0x0000000000000001, 0xffffffff00000000, 0xffffffffffffffff, 0x00000000fffffffe + +.globl ecp_nistz256_mul_by_2 +.type ecp_nistz256_mul_by_2,@function +.align 64 +ecp_nistz256_mul_by_2: + pushq %r12 + pushq %r13 + + movq 0(%rsi),%r8 + movq 8(%rsi),%r9 + addq %r8,%r8 + movq 16(%rsi),%r10 + adcq %r9,%r9 + movq 24(%rsi),%r11 + leaq .Lpoly(%rip),%rsi + movq %r8,%rax + adcq %r10,%r10 + adcq %r11,%r11 + movq %r9,%rdx + sbbq %r13,%r13 + + subq 0(%rsi),%r8 + movq %r10,%rcx + sbbq 8(%rsi),%r9 + sbbq 16(%rsi),%r10 + movq %r11,%r12 + sbbq 24(%rsi),%r11 + testq %r13,%r13 + + cmovzq %rax,%r8 + cmovzq %rdx,%r9 + movq %r8,0(%rdi) + cmovzq %rcx,%r10 + movq %r9,8(%rdi) + cmovzq %r12,%r11 + movq %r10,16(%rdi) + movq %r11,24(%rdi) + + popq %r13 + popq %r12 + .byte 0xf3,0xc3 +.size ecp_nistz256_mul_by_2,.-ecp_nistz256_mul_by_2 + + + +.globl ecp_nistz256_div_by_2 +.type ecp_nistz256_div_by_2,@function +.align 32 +ecp_nistz256_div_by_2: + pushq %r12 + pushq %r13 + + movq 0(%rsi),%r8 + movq 8(%rsi),%r9 + movq 16(%rsi),%r10 + movq %r8,%rax + movq 24(%rsi),%r11 + leaq .Lpoly(%rip),%rsi + + movq %r9,%rdx + xorq %r13,%r13 + addq 0(%rsi),%r8 + movq %r10,%rcx + adcq 8(%rsi),%r9 + adcq 16(%rsi),%r10 + movq %r11,%r12 + adcq 24(%rsi),%r11 + adcq $0,%r13 + xorq %rsi,%rsi + testq $1,%rax + + cmovzq %rax,%r8 + cmovzq %rdx,%r9 + cmovzq %rcx,%r10 + cmovzq %r12,%r11 + cmovzq %rsi,%r13 + + movq %r9,%rax + shrq $1,%r8 + shlq $63,%rax + movq %r10,%rdx + shrq $1,%r9 + orq %rax,%r8 + shlq $63,%rdx + movq %r11,%rcx + shrq $1,%r10 + orq %rdx,%r9 + shlq $63,%rcx + shrq $1,%r11 + shlq $63,%r13 + orq %rcx,%r10 + orq %r13,%r11 + + movq %r8,0(%rdi) + movq %r9,8(%rdi) + movq %r10,16(%rdi) + movq %r11,24(%rdi) + + popq %r13 + popq %r12 + .byte 0xf3,0xc3 +.size ecp_nistz256_div_by_2,.-ecp_nistz256_div_by_2 + + + +.globl ecp_nistz256_mul_by_3 +.type ecp_nistz256_mul_by_3,@function +.align 32 +ecp_nistz256_mul_by_3: + pushq %r12 + pushq %r13 + + movq 0(%rsi),%r8 + xorq %r13,%r13 + movq 8(%rsi),%r9 + addq %r8,%r8 + movq 16(%rsi),%r10 + adcq %r9,%r9 + movq 24(%rsi),%r11 + movq %r8,%rax + adcq %r10,%r10 + adcq %r11,%r11 + movq %r9,%rdx + adcq $0,%r13 + + subq $-1,%r8 + movq %r10,%rcx + sbbq .Lpoly+8(%rip),%r9 + sbbq $0,%r10 + movq %r11,%r12 + sbbq .Lpoly+24(%rip),%r11 + testq %r13,%r13 + + cmovzq %rax,%r8 + cmovzq %rdx,%r9 + cmovzq %rcx,%r10 + cmovzq %r12,%r11 + + xorq %r13,%r13 + addq 0(%rsi),%r8 + adcq 8(%rsi),%r9 + movq %r8,%rax + adcq 16(%rsi),%r10 + adcq 24(%rsi),%r11 + movq %r9,%rdx + adcq $0,%r13 + + subq $-1,%r8 + movq %r10,%rcx + sbbq .Lpoly+8(%rip),%r9 + sbbq $0,%r10 + movq %r11,%r12 + sbbq .Lpoly+24(%rip),%r11 + testq %r13,%r13 + + cmovzq %rax,%r8 + cmovzq %rdx,%r9 + movq %r8,0(%rdi) + cmovzq %rcx,%r10 + movq %r9,8(%rdi) + cmovzq %r12,%r11 + movq %r10,16(%rdi) + movq %r11,24(%rdi) + + popq %r13 + popq %r12 + .byte 0xf3,0xc3 +.size ecp_nistz256_mul_by_3,.-ecp_nistz256_mul_by_3 + + + +.globl ecp_nistz256_add +.type ecp_nistz256_add,@function +.align 32 +ecp_nistz256_add: + pushq %r12 + pushq %r13 + + movq 0(%rsi),%r8 + xorq %r13,%r13 + movq 8(%rsi),%r9 + movq 16(%rsi),%r10 + movq 24(%rsi),%r11 + leaq .Lpoly(%rip),%rsi + + addq 0(%rdx),%r8 + adcq 8(%rdx),%r9 + movq %r8,%rax + adcq 16(%rdx),%r10 + adcq 24(%rdx),%r11 + movq %r9,%rdx + adcq $0,%r13 + + subq 0(%rsi),%r8 + movq %r10,%rcx + sbbq 8(%rsi),%r9 + sbbq 16(%rsi),%r10 + movq %r11,%r12 + sbbq 24(%rsi),%r11 + testq %r13,%r13 + + cmovzq %rax,%r8 + cmovzq %rdx,%r9 + movq %r8,0(%rdi) + cmovzq %rcx,%r10 + movq %r9,8(%rdi) + cmovzq %r12,%r11 + movq %r10,16(%rdi) + movq %r11,24(%rdi) + + popq %r13 + popq %r12 + .byte 0xf3,0xc3 +.size ecp_nistz256_add,.-ecp_nistz256_add + + + +.globl ecp_nistz256_sub +.type ecp_nistz256_sub,@function +.align 32 +ecp_nistz256_sub: + pushq %r12 + pushq %r13 + + movq 0(%rsi),%r8 + xorq %r13,%r13 + movq 8(%rsi),%r9 + movq 16(%rsi),%r10 + movq 24(%rsi),%r11 + leaq .Lpoly(%rip),%rsi + + subq 0(%rdx),%r8 + sbbq 8(%rdx),%r9 + movq %r8,%rax + sbbq 16(%rdx),%r10 + sbbq 24(%rdx),%r11 + movq %r9,%rdx + sbbq $0,%r13 + + addq 0(%rsi),%r8 + movq %r10,%rcx + adcq 8(%rsi),%r9 + adcq 16(%rsi),%r10 + movq %r11,%r12 + adcq 24(%rsi),%r11 + testq %r13,%r13 + + cmovzq %rax,%r8 + cmovzq %rdx,%r9 + movq %r8,0(%rdi) + cmovzq %rcx,%r10 + movq %r9,8(%rdi) + cmovzq %r12,%r11 + movq %r10,16(%rdi) + movq %r11,24(%rdi) + + popq %r13 + popq %r12 + .byte 0xf3,0xc3 +.size ecp_nistz256_sub,.-ecp_nistz256_sub + + + +.globl ecp_nistz256_neg +.type ecp_nistz256_neg,@function +.align 32 +ecp_nistz256_neg: + pushq %r12 + pushq %r13 + + xorq %r8,%r8 + xorq %r9,%r9 + xorq %r10,%r10 + xorq %r11,%r11 + xorq %r13,%r13 + + subq 0(%rsi),%r8 + sbbq 8(%rsi),%r9 + sbbq 16(%rsi),%r10 + movq %r8,%rax + sbbq 24(%rsi),%r11 + leaq .Lpoly(%rip),%rsi + movq %r9,%rdx + sbbq $0,%r13 + + addq 0(%rsi),%r8 + movq %r10,%rcx + adcq 8(%rsi),%r9 + adcq 16(%rsi),%r10 + movq %r11,%r12 + adcq 24(%rsi),%r11 + testq %r13,%r13 + + cmovzq %rax,%r8 + cmovzq %rdx,%r9 + movq %r8,0(%rdi) + cmovzq %rcx,%r10 + movq %r9,8(%rdi) + cmovzq %r12,%r11 + movq %r10,16(%rdi) + movq %r11,24(%rdi) + + popq %r13 + popq %r12 + .byte 0xf3,0xc3 +.size ecp_nistz256_neg,.-ecp_nistz256_neg + + + + +.globl ecp_nistz256_to_mont +.type ecp_nistz256_to_mont,@function +.align 32 +ecp_nistz256_to_mont: + movl $524544,%ecx + andl OPENSSL_ia32cap_P+8(%rip),%ecx + leaq .LRR(%rip),%rdx + jmp .Lmul_mont +.size ecp_nistz256_to_mont,.-ecp_nistz256_to_mont + + + + + + + +.globl ecp_nistz256_mul_mont +.type ecp_nistz256_mul_mont,@function +.align 32 +ecp_nistz256_mul_mont: + movl $524544,%ecx + andl OPENSSL_ia32cap_P+8(%rip),%ecx +.Lmul_mont: + pushq %rbp + pushq %rbx + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + cmpl $524544,%ecx + je .Lmul_montx + movq %rdx,%rbx + movq 0(%rdx),%rax + movq 0(%rsi),%r9 + movq 8(%rsi),%r10 + movq 16(%rsi),%r11 + movq 24(%rsi),%r12 + + call __ecp_nistz256_mul_montq + jmp .Lmul_mont_done + +.align 32 +.Lmul_montx: + movq %rdx,%rbx + movq 0(%rdx),%rdx + movq 0(%rsi),%r9 + movq 8(%rsi),%r10 + movq 16(%rsi),%r11 + movq 24(%rsi),%r12 + leaq -128(%rsi),%rsi + + call __ecp_nistz256_mul_montx +.Lmul_mont_done: + popq %r15 + popq %r14 + popq %r13 + popq %r12 + popq %rbx + popq %rbp + .byte 0xf3,0xc3 +.size ecp_nistz256_mul_mont,.-ecp_nistz256_mul_mont + +.type __ecp_nistz256_mul_montq,@function +.align 32 +__ecp_nistz256_mul_montq: + + + movq %rax,%rbp + mulq %r9 + movq .Lpoly+8(%rip),%r14 + movq %rax,%r8 + movq %rbp,%rax + movq %rdx,%r9 + + mulq %r10 + movq .Lpoly+24(%rip),%r15 + addq %rax,%r9 + movq %rbp,%rax + adcq $0,%rdx + movq %rdx,%r10 + + mulq %r11 + addq %rax,%r10 + movq %rbp,%rax + adcq $0,%rdx + movq %rdx,%r11 + + mulq %r12 + addq %rax,%r11 + movq %r8,%rax + adcq $0,%rdx + xorq %r13,%r13 + movq %rdx,%r12 + + + + + + + + + + + movq %r8,%rbp + shlq $32,%r8 + mulq %r15 + shrq $32,%rbp + addq %r8,%r9 + adcq %rbp,%r10 + adcq %rax,%r11 + movq 8(%rbx),%rax + adcq %rdx,%r12 + adcq $0,%r13 + xorq %r8,%r8 + + + + movq %rax,%rbp + mulq 0(%rsi) + addq %rax,%r9 + movq %rbp,%rax + adcq $0,%rdx + movq %rdx,%rcx + + mulq 8(%rsi) + addq %rcx,%r10 + adcq $0,%rdx + addq %rax,%r10 + movq %rbp,%rax + adcq $0,%rdx + movq %rdx,%rcx + + mulq 16(%rsi) + addq %rcx,%r11 + adcq $0,%rdx + addq %rax,%r11 + movq %rbp,%rax + adcq $0,%rdx + movq %rdx,%rcx + + mulq 24(%rsi) + addq %rcx,%r12 + adcq $0,%rdx + addq %rax,%r12 + movq %r9,%rax + adcq %rdx,%r13 + adcq $0,%r8 + + + + movq %r9,%rbp + shlq $32,%r9 + mulq %r15 + shrq $32,%rbp + addq %r9,%r10 + adcq %rbp,%r11 + adcq %rax,%r12 + movq 16(%rbx),%rax + adcq %rdx,%r13 + adcq $0,%r8 + xorq %r9,%r9 + + + + movq %rax,%rbp + mulq 0(%rsi) + addq %rax,%r10 + movq %rbp,%rax + adcq $0,%rdx + movq %rdx,%rcx + + mulq 8(%rsi) + addq %rcx,%r11 + adcq $0,%rdx + addq %rax,%r11 + movq %rbp,%rax + adcq $0,%rdx + movq %rdx,%rcx + + mulq 16(%rsi) + addq %rcx,%r12 + adcq $0,%rdx + addq %rax,%r12 + movq %rbp,%rax + adcq $0,%rdx + movq %rdx,%rcx + + mulq 24(%rsi) + addq %rcx,%r13 + adcq $0,%rdx + addq %rax,%r13 + movq %r10,%rax + adcq %rdx,%r8 + adcq $0,%r9 + + + + movq %r10,%rbp + shlq $32,%r10 + mulq %r15 + shrq $32,%rbp + addq %r10,%r11 + adcq %rbp,%r12 + adcq %rax,%r13 + movq 24(%rbx),%rax + adcq %rdx,%r8 + adcq $0,%r9 + xorq %r10,%r10 + + + + movq %rax,%rbp + mulq 0(%rsi) + addq %rax,%r11 + movq %rbp,%rax + adcq $0,%rdx + movq %rdx,%rcx + + mulq 8(%rsi) + addq %rcx,%r12 + adcq $0,%rdx + addq %rax,%r12 + movq %rbp,%rax + adcq $0,%rdx + movq %rdx,%rcx + + mulq 16(%rsi) + addq %rcx,%r13 + adcq $0,%rdx + addq %rax,%r13 + movq %rbp,%rax + adcq $0,%rdx + movq %rdx,%rcx + + mulq 24(%rsi) + addq %rcx,%r8 + adcq $0,%rdx + addq %rax,%r8 + movq %r11,%rax + adcq %rdx,%r9 + adcq $0,%r10 + + + + movq %r11,%rbp + shlq $32,%r11 + mulq %r15 + shrq $32,%rbp + addq %r11,%r12 + adcq %rbp,%r13 + movq %r12,%rcx + adcq %rax,%r8 + adcq %rdx,%r9 + movq %r13,%rbp + adcq $0,%r10 + + + + subq $-1,%r12 + movq %r8,%rbx + sbbq %r14,%r13 + sbbq $0,%r8 + movq %r9,%rdx + sbbq %r15,%r9 + sbbq $0,%r10 + + cmovcq %rcx,%r12 + cmovcq %rbp,%r13 + movq %r12,0(%rdi) + cmovcq %rbx,%r8 + movq %r13,8(%rdi) + cmovcq %rdx,%r9 + movq %r8,16(%rdi) + movq %r9,24(%rdi) + + .byte 0xf3,0xc3 +.size __ecp_nistz256_mul_montq,.-__ecp_nistz256_mul_montq + + + + + + + + +.globl ecp_nistz256_sqr_mont +.type ecp_nistz256_sqr_mont,@function +.align 32 +ecp_nistz256_sqr_mont: + movl $524544,%ecx + andl OPENSSL_ia32cap_P+8(%rip),%ecx + pushq %rbp + pushq %rbx + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + cmpl $524544,%ecx + je .Lsqr_montx + movq 0(%rsi),%rax + movq 8(%rsi),%r14 + movq 16(%rsi),%r15 + movq 24(%rsi),%r8 + + call __ecp_nistz256_sqr_montq + jmp .Lsqr_mont_done + +.align 32 +.Lsqr_montx: + movq 0(%rsi),%rdx + movq 8(%rsi),%r14 + movq 16(%rsi),%r15 + movq 24(%rsi),%r8 + leaq -128(%rsi),%rsi + + call __ecp_nistz256_sqr_montx +.Lsqr_mont_done: + popq %r15 + popq %r14 + popq %r13 + popq %r12 + popq %rbx + popq %rbp + .byte 0xf3,0xc3 +.size ecp_nistz256_sqr_mont,.-ecp_nistz256_sqr_mont + +.type __ecp_nistz256_sqr_montq,@function +.align 32 +__ecp_nistz256_sqr_montq: + movq %rax,%r13 + mulq %r14 + movq %rax,%r9 + movq %r15,%rax + movq %rdx,%r10 + + mulq %r13 + addq %rax,%r10 + movq %r8,%rax + adcq $0,%rdx + movq %rdx,%r11 + + mulq %r13 + addq %rax,%r11 + movq %r15,%rax + adcq $0,%rdx + movq %rdx,%r12 + + + mulq %r14 + addq %rax,%r11 + movq %r8,%rax + adcq $0,%rdx + movq %rdx,%rbp + + mulq %r14 + addq %rax,%r12 + movq %r8,%rax + adcq $0,%rdx + addq %rbp,%r12 + movq %rdx,%r13 + adcq $0,%r13 + + + mulq %r15 + xorq %r15,%r15 + addq %rax,%r13 + movq 0(%rsi),%rax + movq %rdx,%r14 + adcq $0,%r14 + + addq %r9,%r9 + adcq %r10,%r10 + adcq %r11,%r11 + adcq %r12,%r12 + adcq %r13,%r13 + adcq %r14,%r14 + adcq $0,%r15 + + mulq %rax + movq %rax,%r8 + movq 8(%rsi),%rax + movq %rdx,%rcx + + mulq %rax + addq %rcx,%r9 + adcq %rax,%r10 + movq 16(%rsi),%rax + adcq $0,%rdx + movq %rdx,%rcx + + mulq %rax + addq %rcx,%r11 + adcq %rax,%r12 + movq 24(%rsi),%rax + adcq $0,%rdx + movq %rdx,%rcx + + mulq %rax + addq %rcx,%r13 + adcq %rax,%r14 + movq %r8,%rax + adcq %rdx,%r15 + + movq .Lpoly+8(%rip),%rsi + movq .Lpoly+24(%rip),%rbp + + + + + movq %r8,%rcx + shlq $32,%r8 + mulq %rbp + shrq $32,%rcx + addq %r8,%r9 + adcq %rcx,%r10 + adcq %rax,%r11 + movq %r9,%rax + adcq $0,%rdx + + + + movq %r9,%rcx + shlq $32,%r9 + movq %rdx,%r8 + mulq %rbp + shrq $32,%rcx + addq %r9,%r10 + adcq %rcx,%r11 + adcq %rax,%r8 + movq %r10,%rax + adcq $0,%rdx + + + + movq %r10,%rcx + shlq $32,%r10 + movq %rdx,%r9 + mulq %rbp + shrq $32,%rcx + addq %r10,%r11 + adcq %rcx,%r8 + adcq %rax,%r9 + movq %r11,%rax + adcq $0,%rdx + + + + movq %r11,%rcx + shlq $32,%r11 + movq %rdx,%r10 + mulq %rbp + shrq $32,%rcx + addq %r11,%r8 + adcq %rcx,%r9 + adcq %rax,%r10 + adcq $0,%rdx + xorq %r11,%r11 + + + + addq %r8,%r12 + adcq %r9,%r13 + movq %r12,%r8 + adcq %r10,%r14 + adcq %rdx,%r15 + movq %r13,%r9 + adcq $0,%r11 + + subq $-1,%r12 + movq %r14,%r10 + sbbq %rsi,%r13 + sbbq $0,%r14 + movq %r15,%rcx + sbbq %rbp,%r15 + sbbq $0,%r11 + + cmovcq %r8,%r12 + cmovcq %r9,%r13 + movq %r12,0(%rdi) + cmovcq %r10,%r14 + movq %r13,8(%rdi) + cmovcq %rcx,%r15 + movq %r14,16(%rdi) + movq %r15,24(%rdi) + + .byte 0xf3,0xc3 +.size __ecp_nistz256_sqr_montq,.-__ecp_nistz256_sqr_montq +.type __ecp_nistz256_mul_montx,@function +.align 32 +__ecp_nistz256_mul_montx: + + + mulxq %r9,%r8,%r9 + mulxq %r10,%rcx,%r10 + movq $32,%r14 + xorq %r13,%r13 + mulxq %r11,%rbp,%r11 + movq .Lpoly+24(%rip),%r15 + adcq %rcx,%r9 + mulxq %r12,%rcx,%r12 + movq %r8,%rdx + adcq %rbp,%r10 + shlxq %r14,%r8,%rbp + adcq %rcx,%r11 + shrxq %r14,%r8,%rcx + adcq $0,%r12 + + + + addq %rbp,%r9 + adcq %rcx,%r10 + + mulxq %r15,%rcx,%rbp + movq 8(%rbx),%rdx + adcq %rcx,%r11 + adcq %rbp,%r12 + adcq $0,%r13 + xorq %r8,%r8 + + + + mulxq 0+128(%rsi),%rcx,%rbp + adcxq %rcx,%r9 + adoxq %rbp,%r10 + + mulxq 8+128(%rsi),%rcx,%rbp + adcxq %rcx,%r10 + adoxq %rbp,%r11 + + mulxq 16+128(%rsi),%rcx,%rbp + adcxq %rcx,%r11 + adoxq %rbp,%r12 + + mulxq 24+128(%rsi),%rcx,%rbp + movq %r9,%rdx + adcxq %rcx,%r12 + shlxq %r14,%r9,%rcx + adoxq %rbp,%r13 + shrxq %r14,%r9,%rbp + + adcxq %r8,%r13 + adoxq %r8,%r8 + adcq $0,%r8 + + + + addq %rcx,%r10 + adcq %rbp,%r11 + + mulxq %r15,%rcx,%rbp + movq 16(%rbx),%rdx + adcq %rcx,%r12 + adcq %rbp,%r13 + adcq $0,%r8 + xorq %r9,%r9 + + + + mulxq 0+128(%rsi),%rcx,%rbp + adcxq %rcx,%r10 + adoxq %rbp,%r11 + + mulxq 8+128(%rsi),%rcx,%rbp + adcxq %rcx,%r11 + adoxq %rbp,%r12 + + mulxq 16+128(%rsi),%rcx,%rbp + adcxq %rcx,%r12 + adoxq %rbp,%r13 + + mulxq 24+128(%rsi),%rcx,%rbp + movq %r10,%rdx + adcxq %rcx,%r13 + shlxq %r14,%r10,%rcx + adoxq %rbp,%r8 + shrxq %r14,%r10,%rbp + + adcxq %r9,%r8 + adoxq %r9,%r9 + adcq $0,%r9 + + + + addq %rcx,%r11 + adcq %rbp,%r12 + + mulxq %r15,%rcx,%rbp + movq 24(%rbx),%rdx + adcq %rcx,%r13 + adcq %rbp,%r8 + adcq $0,%r9 + xorq %r10,%r10 + + + + mulxq 0+128(%rsi),%rcx,%rbp + adcxq %rcx,%r11 + adoxq %rbp,%r12 + + mulxq 8+128(%rsi),%rcx,%rbp + adcxq %rcx,%r12 + adoxq %rbp,%r13 + + mulxq 16+128(%rsi),%rcx,%rbp + adcxq %rcx,%r13 + adoxq %rbp,%r8 + + mulxq 24+128(%rsi),%rcx,%rbp + movq %r11,%rdx + adcxq %rcx,%r8 + shlxq %r14,%r11,%rcx + adoxq %rbp,%r9 + shrxq %r14,%r11,%rbp + + adcxq %r10,%r9 + adoxq %r10,%r10 + adcq $0,%r10 + + + + addq %rcx,%r12 + adcq %rbp,%r13 + + mulxq %r15,%rcx,%rbp + movq %r12,%rbx + movq .Lpoly+8(%rip),%r14 + adcq %rcx,%r8 + movq %r13,%rdx + adcq %rbp,%r9 + adcq $0,%r10 + + + + xorl %eax,%eax + movq %r8,%rcx + sbbq $-1,%r12 + sbbq %r14,%r13 + sbbq $0,%r8 + movq %r9,%rbp + sbbq %r15,%r9 + sbbq $0,%r10 + + cmovcq %rbx,%r12 + cmovcq %rdx,%r13 + movq %r12,0(%rdi) + cmovcq %rcx,%r8 + movq %r13,8(%rdi) + cmovcq %rbp,%r9 + movq %r8,16(%rdi) + movq %r9,24(%rdi) + + .byte 0xf3,0xc3 +.size __ecp_nistz256_mul_montx,.-__ecp_nistz256_mul_montx + +.type __ecp_nistz256_sqr_montx,@function +.align 32 +__ecp_nistz256_sqr_montx: + mulxq %r14,%r9,%r10 + mulxq %r15,%rcx,%r11 + xorl %eax,%eax + adcq %rcx,%r10 + mulxq %r8,%rbp,%r12 + movq %r14,%rdx + adcq %rbp,%r11 + adcq $0,%r12 + xorq %r13,%r13 + + + mulxq %r15,%rcx,%rbp + adcxq %rcx,%r11 + adoxq %rbp,%r12 + + mulxq %r8,%rcx,%rbp + movq %r15,%rdx + adcxq %rcx,%r12 + adoxq %rbp,%r13 + adcq $0,%r13 + + + mulxq %r8,%rcx,%r14 + movq 0+128(%rsi),%rdx + xorq %r15,%r15 + adcxq %r9,%r9 + adoxq %rcx,%r13 + adcxq %r10,%r10 + adoxq %r15,%r14 + + mulxq %rdx,%r8,%rbp + movq 8+128(%rsi),%rdx + adcxq %r11,%r11 + adoxq %rbp,%r9 + adcxq %r12,%r12 + mulxq %rdx,%rcx,%rax + movq 16+128(%rsi),%rdx + adcxq %r13,%r13 + adoxq %rcx,%r10 + adcxq %r14,%r14 +.byte 0x67 + mulxq %rdx,%rcx,%rbp + movq 24+128(%rsi),%rdx + adoxq %rax,%r11 + adcxq %r15,%r15 + adoxq %rcx,%r12 + movq $32,%rsi + adoxq %rbp,%r13 +.byte 0x67,0x67 + mulxq %rdx,%rcx,%rax + movq %r8,%rdx + adoxq %rcx,%r14 + shlxq %rsi,%r8,%rcx + adoxq %rax,%r15 + shrxq %rsi,%r8,%rax + movq .Lpoly+24(%rip),%rbp + + + addq %rcx,%r9 + adcq %rax,%r10 + + mulxq %rbp,%rcx,%r8 + movq %r9,%rdx + adcq %rcx,%r11 + shlxq %rsi,%r9,%rcx + adcq $0,%r8 + shrxq %rsi,%r9,%rax + + + addq %rcx,%r10 + adcq %rax,%r11 + + mulxq %rbp,%rcx,%r9 + movq %r10,%rdx + adcq %rcx,%r8 + shlxq %rsi,%r10,%rcx + adcq $0,%r9 + shrxq %rsi,%r10,%rax + + + addq %rcx,%r11 + adcq %rax,%r8 + + mulxq %rbp,%rcx,%r10 + movq %r11,%rdx + adcq %rcx,%r9 + shlxq %rsi,%r11,%rcx + adcq $0,%r10 + shrxq %rsi,%r11,%rax + + + addq %rcx,%r8 + adcq %rax,%r9 + + mulxq %rbp,%rcx,%r11 + adcq %rcx,%r10 + adcq $0,%r11 + + xorq %rdx,%rdx + adcq %r8,%r12 + movq .Lpoly+8(%rip),%rsi + adcq %r9,%r13 + movq %r12,%r8 + adcq %r10,%r14 + adcq %r11,%r15 + movq %r13,%r9 + adcq $0,%rdx + + xorl %eax,%eax + sbbq $-1,%r12 + movq %r14,%r10 + sbbq %rsi,%r13 + sbbq $0,%r14 + movq %r15,%r11 + sbbq %rbp,%r15 + sbbq $0,%rdx + + cmovcq %r8,%r12 + cmovcq %r9,%r13 + movq %r12,0(%rdi) + cmovcq %r10,%r14 + movq %r13,8(%rdi) + cmovcq %r11,%r15 + movq %r14,16(%rdi) + movq %r15,24(%rdi) + + .byte 0xf3,0xc3 +.size __ecp_nistz256_sqr_montx,.-__ecp_nistz256_sqr_montx + + + + + + +.globl ecp_nistz256_from_mont +.type ecp_nistz256_from_mont,@function +.align 32 +ecp_nistz256_from_mont: + pushq %r12 + pushq %r13 + + movq 0(%rsi),%rax + movq .Lpoly+24(%rip),%r13 + movq 8(%rsi),%r9 + movq 16(%rsi),%r10 + movq 24(%rsi),%r11 + movq %rax,%r8 + movq .Lpoly+8(%rip),%r12 + + + + movq %rax,%rcx + shlq $32,%r8 + mulq %r13 + shrq $32,%rcx + addq %r8,%r9 + adcq %rcx,%r10 + adcq %rax,%r11 + movq %r9,%rax + adcq $0,%rdx + + + + movq %r9,%rcx + shlq $32,%r9 + movq %rdx,%r8 + mulq %r13 + shrq $32,%rcx + addq %r9,%r10 + adcq %rcx,%r11 + adcq %rax,%r8 + movq %r10,%rax + adcq $0,%rdx + + + + movq %r10,%rcx + shlq $32,%r10 + movq %rdx,%r9 + mulq %r13 + shrq $32,%rcx + addq %r10,%r11 + adcq %rcx,%r8 + adcq %rax,%r9 + movq %r11,%rax + adcq $0,%rdx + + + + movq %r11,%rcx + shlq $32,%r11 + movq %rdx,%r10 + mulq %r13 + shrq $32,%rcx + addq %r11,%r8 + adcq %rcx,%r9 + movq %r8,%rcx + adcq %rax,%r10 + movq %r9,%rsi + adcq $0,%rdx + + + + subq $-1,%r8 + movq %r10,%rax + sbbq %r12,%r9 + sbbq $0,%r10 + movq %rdx,%r11 + sbbq %r13,%rdx + sbbq %r13,%r13 + + cmovnzq %rcx,%r8 + cmovnzq %rsi,%r9 + movq %r8,0(%rdi) + cmovnzq %rax,%r10 + movq %r9,8(%rdi) + cmovzq %rdx,%r11 + movq %r10,16(%rdi) + movq %r11,24(%rdi) + + popq %r13 + popq %r12 + .byte 0xf3,0xc3 +.size ecp_nistz256_from_mont,.-ecp_nistz256_from_mont + + +.globl ecp_nistz256_select_w5 +.type ecp_nistz256_select_w5,@function +.align 32 +ecp_nistz256_select_w5: + movl OPENSSL_ia32cap_P+8(%rip),%eax + testl $32,%eax + jnz .Lavx2_select_w5 + movdqa .LOne(%rip),%xmm0 + movd %edx,%xmm1 + + pxor %xmm2,%xmm2 + pxor %xmm3,%xmm3 + pxor %xmm4,%xmm4 + pxor %xmm5,%xmm5 + pxor %xmm6,%xmm6 + pxor %xmm7,%xmm7 + + movdqa %xmm0,%xmm8 + pshufd $0,%xmm1,%xmm1 + + movq $16,%rax +.Lselect_loop_sse_w5: + + movdqa %xmm8,%xmm15 + paddd %xmm0,%xmm8 + pcmpeqd %xmm1,%xmm15 + + movdqa 0(%rsi),%xmm9 + movdqa 16(%rsi),%xmm10 + movdqa 32(%rsi),%xmm11 + movdqa 48(%rsi),%xmm12 + movdqa 64(%rsi),%xmm13 + movdqa 80(%rsi),%xmm14 + leaq 96(%rsi),%rsi + + pand %xmm15,%xmm9 + pand %xmm15,%xmm10 + por %xmm9,%xmm2 + pand %xmm15,%xmm11 + por %xmm10,%xmm3 + pand %xmm15,%xmm12 + por %xmm11,%xmm4 + pand %xmm15,%xmm13 + por %xmm12,%xmm5 + pand %xmm15,%xmm14 + por %xmm13,%xmm6 + por %xmm14,%xmm7 + + decq %rax + jnz .Lselect_loop_sse_w5 + + movdqu %xmm2,0(%rdi) + movdqu %xmm3,16(%rdi) + movdqu %xmm4,32(%rdi) + movdqu %xmm5,48(%rdi) + movdqu %xmm6,64(%rdi) + movdqu %xmm7,80(%rdi) + .byte 0xf3,0xc3 +.size ecp_nistz256_select_w5,.-ecp_nistz256_select_w5 + + + +.globl ecp_nistz256_select_w7 +.type ecp_nistz256_select_w7,@function +.align 32 +ecp_nistz256_select_w7: + movl OPENSSL_ia32cap_P+8(%rip),%eax + testl $32,%eax + jnz .Lavx2_select_w7 + movdqa .LOne(%rip),%xmm8 + movd %edx,%xmm1 + + pxor %xmm2,%xmm2 + pxor %xmm3,%xmm3 + pxor %xmm4,%xmm4 + pxor %xmm5,%xmm5 + + movdqa %xmm8,%xmm0 + pshufd $0,%xmm1,%xmm1 + movq $64,%rax + +.Lselect_loop_sse_w7: + movdqa %xmm8,%xmm15 + paddd %xmm0,%xmm8 + movdqa 0(%rsi),%xmm9 + movdqa 16(%rsi),%xmm10 + pcmpeqd %xmm1,%xmm15 + movdqa 32(%rsi),%xmm11 + movdqa 48(%rsi),%xmm12 + leaq 64(%rsi),%rsi + + pand %xmm15,%xmm9 + pand %xmm15,%xmm10 + por %xmm9,%xmm2 + pand %xmm15,%xmm11 + por %xmm10,%xmm3 + pand %xmm15,%xmm12 + por %xmm11,%xmm4 + prefetcht0 255(%rsi) + por %xmm12,%xmm5 + + decq %rax + jnz .Lselect_loop_sse_w7 + + movdqu %xmm2,0(%rdi) + movdqu %xmm3,16(%rdi) + movdqu %xmm4,32(%rdi) + movdqu %xmm5,48(%rdi) + .byte 0xf3,0xc3 +.size ecp_nistz256_select_w7,.-ecp_nistz256_select_w7 + + +.type ecp_nistz256_avx2_select_w5,@function +.align 32 +ecp_nistz256_avx2_select_w5: +.Lavx2_select_w5: + vzeroupper + vmovdqa .LTwo(%rip),%ymm0 + + vpxor %ymm2,%ymm2,%ymm2 + vpxor %ymm3,%ymm3,%ymm3 + vpxor %ymm4,%ymm4,%ymm4 + + vmovdqa .LOne(%rip),%ymm5 + vmovdqa .LTwo(%rip),%ymm10 + + vmovd %edx,%xmm1 + vpermd %ymm1,%ymm2,%ymm1 + + movq $8,%rax +.Lselect_loop_avx2_w5: + + vmovdqa 0(%rsi),%ymm6 + vmovdqa 32(%rsi),%ymm7 + vmovdqa 64(%rsi),%ymm8 + + vmovdqa 96(%rsi),%ymm11 + vmovdqa 128(%rsi),%ymm12 + vmovdqa 160(%rsi),%ymm13 + + vpcmpeqd %ymm1,%ymm5,%ymm9 + vpcmpeqd %ymm1,%ymm10,%ymm14 + + vpaddd %ymm0,%ymm5,%ymm5 + vpaddd %ymm0,%ymm10,%ymm10 + leaq 192(%rsi),%rsi + + vpand %ymm9,%ymm6,%ymm6 + vpand %ymm9,%ymm7,%ymm7 + vpand %ymm9,%ymm8,%ymm8 + vpand %ymm14,%ymm11,%ymm11 + vpand %ymm14,%ymm12,%ymm12 + vpand %ymm14,%ymm13,%ymm13 + + vpxor %ymm6,%ymm2,%ymm2 + vpxor %ymm7,%ymm3,%ymm3 + vpxor %ymm8,%ymm4,%ymm4 + vpxor %ymm11,%ymm2,%ymm2 + vpxor %ymm12,%ymm3,%ymm3 + vpxor %ymm13,%ymm4,%ymm4 + + decq %rax + jnz .Lselect_loop_avx2_w5 + + vmovdqu %ymm2,0(%rdi) + vmovdqu %ymm3,32(%rdi) + vmovdqu %ymm4,64(%rdi) + vzeroupper + .byte 0xf3,0xc3 +.size ecp_nistz256_avx2_select_w5,.-ecp_nistz256_avx2_select_w5 + + + +.globl ecp_nistz256_avx2_select_w7 +.type ecp_nistz256_avx2_select_w7,@function +.align 32 +ecp_nistz256_avx2_select_w7: +.Lavx2_select_w7: + vzeroupper + vmovdqa .LThree(%rip),%ymm0 + + vpxor %ymm2,%ymm2,%ymm2 + vpxor %ymm3,%ymm3,%ymm3 + + vmovdqa .LOne(%rip),%ymm4 + vmovdqa .LTwo(%rip),%ymm8 + vmovdqa .LThree(%rip),%ymm12 + + vmovd %edx,%xmm1 + vpermd %ymm1,%ymm2,%ymm1 + + + movq $21,%rax +.Lselect_loop_avx2_w7: + + vmovdqa 0(%rsi),%ymm5 + vmovdqa 32(%rsi),%ymm6 + + vmovdqa 64(%rsi),%ymm9 + vmovdqa 96(%rsi),%ymm10 + + vmovdqa 128(%rsi),%ymm13 + vmovdqa 160(%rsi),%ymm14 + + vpcmpeqd %ymm1,%ymm4,%ymm7 + vpcmpeqd %ymm1,%ymm8,%ymm11 + vpcmpeqd %ymm1,%ymm12,%ymm15 + + vpaddd %ymm0,%ymm4,%ymm4 + vpaddd %ymm0,%ymm8,%ymm8 + vpaddd %ymm0,%ymm12,%ymm12 + leaq 192(%rsi),%rsi + + vpand %ymm7,%ymm5,%ymm5 + vpand %ymm7,%ymm6,%ymm6 + vpand %ymm11,%ymm9,%ymm9 + vpand %ymm11,%ymm10,%ymm10 + vpand %ymm15,%ymm13,%ymm13 + vpand %ymm15,%ymm14,%ymm14 + + vpxor %ymm5,%ymm2,%ymm2 + vpxor %ymm6,%ymm3,%ymm3 + vpxor %ymm9,%ymm2,%ymm2 + vpxor %ymm10,%ymm3,%ymm3 + vpxor %ymm13,%ymm2,%ymm2 + vpxor %ymm14,%ymm3,%ymm3 + + decq %rax + jnz .Lselect_loop_avx2_w7 + + + vmovdqa 0(%rsi),%ymm5 + vmovdqa 32(%rsi),%ymm6 + + vpcmpeqd %ymm1,%ymm4,%ymm7 + + vpand %ymm7,%ymm5,%ymm5 + vpand %ymm7,%ymm6,%ymm6 + + vpxor %ymm5,%ymm2,%ymm2 + vpxor %ymm6,%ymm3,%ymm3 + + vmovdqu %ymm2,0(%rdi) + vmovdqu %ymm3,32(%rdi) + vzeroupper + .byte 0xf3,0xc3 +.size ecp_nistz256_avx2_select_w7,.-ecp_nistz256_avx2_select_w7 +.type __ecp_nistz256_add_toq,@function +.align 32 +__ecp_nistz256_add_toq: + addq 0(%rbx),%r12 + adcq 8(%rbx),%r13 + movq %r12,%rax + adcq 16(%rbx),%r8 + adcq 24(%rbx),%r9 + movq %r13,%rbp + sbbq %r11,%r11 + + subq $-1,%r12 + movq %r8,%rcx + sbbq %r14,%r13 + sbbq $0,%r8 + movq %r9,%r10 + sbbq %r15,%r9 + testq %r11,%r11 + + cmovzq %rax,%r12 + cmovzq %rbp,%r13 + movq %r12,0(%rdi) + cmovzq %rcx,%r8 + movq %r13,8(%rdi) + cmovzq %r10,%r9 + movq %r8,16(%rdi) + movq %r9,24(%rdi) + + .byte 0xf3,0xc3 +.size __ecp_nistz256_add_toq,.-__ecp_nistz256_add_toq + +.type __ecp_nistz256_sub_fromq,@function +.align 32 +__ecp_nistz256_sub_fromq: + subq 0(%rbx),%r12 + sbbq 8(%rbx),%r13 + movq %r12,%rax + sbbq 16(%rbx),%r8 + sbbq 24(%rbx),%r9 + movq %r13,%rbp + sbbq %r11,%r11 + + addq $-1,%r12 + movq %r8,%rcx + adcq %r14,%r13 + adcq $0,%r8 + movq %r9,%r10 + adcq %r15,%r9 + testq %r11,%r11 + + cmovzq %rax,%r12 + cmovzq %rbp,%r13 + movq %r12,0(%rdi) + cmovzq %rcx,%r8 + movq %r13,8(%rdi) + cmovzq %r10,%r9 + movq %r8,16(%rdi) + movq %r9,24(%rdi) + + .byte 0xf3,0xc3 +.size __ecp_nistz256_sub_fromq,.-__ecp_nistz256_sub_fromq + +.type __ecp_nistz256_subq,@function +.align 32 +__ecp_nistz256_subq: + subq %r12,%rax + sbbq %r13,%rbp + movq %rax,%r12 + sbbq %r8,%rcx + sbbq %r9,%r10 + movq %rbp,%r13 + sbbq %r11,%r11 + + addq $-1,%rax + movq %rcx,%r8 + adcq %r14,%rbp + adcq $0,%rcx + movq %r10,%r9 + adcq %r15,%r10 + testq %r11,%r11 + + cmovnzq %rax,%r12 + cmovnzq %rbp,%r13 + cmovnzq %rcx,%r8 + cmovnzq %r10,%r9 + + .byte 0xf3,0xc3 +.size __ecp_nistz256_subq,.-__ecp_nistz256_subq + +.type __ecp_nistz256_mul_by_2q,@function +.align 32 +__ecp_nistz256_mul_by_2q: + addq %r12,%r12 + adcq %r13,%r13 + movq %r12,%rax + adcq %r8,%r8 + adcq %r9,%r9 + movq %r13,%rbp + sbbq %r11,%r11 + + subq $-1,%r12 + movq %r8,%rcx + sbbq %r14,%r13 + sbbq $0,%r8 + movq %r9,%r10 + sbbq %r15,%r9 + testq %r11,%r11 + + cmovzq %rax,%r12 + cmovzq %rbp,%r13 + movq %r12,0(%rdi) + cmovzq %rcx,%r8 + movq %r13,8(%rdi) + cmovzq %r10,%r9 + movq %r8,16(%rdi) + movq %r9,24(%rdi) + + .byte 0xf3,0xc3 +.size __ecp_nistz256_mul_by_2q,.-__ecp_nistz256_mul_by_2q +.globl ecp_nistz256_point_double +.type ecp_nistz256_point_double,@function +.align 32 +ecp_nistz256_point_double: + movl $524544,%ecx + andl OPENSSL_ia32cap_P+8(%rip),%ecx + cmpl $524544,%ecx + je .Lpoint_doublex + pushq %rbp + pushq %rbx + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + subq $160+8,%rsp + + movdqu 0(%rsi),%xmm0 + movq %rsi,%rbx + movdqu 16(%rsi),%xmm1 + movq 32+0(%rsi),%r12 + movq 32+8(%rsi),%r13 + movq 32+16(%rsi),%r8 + movq 32+24(%rsi),%r9 + movq .Lpoly+8(%rip),%r14 + movq .Lpoly+24(%rip),%r15 + movdqa %xmm0,96(%rsp) + movdqa %xmm1,96+16(%rsp) + leaq 32(%rdi),%r10 + leaq 64(%rdi),%r11 +.byte 102,72,15,110,199 +.byte 102,73,15,110,202 +.byte 102,73,15,110,211 + + leaq 0(%rsp),%rdi + call __ecp_nistz256_mul_by_2q + + movq 64+0(%rsi),%rax + movq 64+8(%rsi),%r14 + movq 64+16(%rsi),%r15 + movq 64+24(%rsi),%r8 + leaq 64-0(%rsi),%rsi + leaq 64(%rsp),%rdi + call __ecp_nistz256_sqr_montq + + movq 0+0(%rsp),%rax + movq 8+0(%rsp),%r14 + leaq 0+0(%rsp),%rsi + movq 16+0(%rsp),%r15 + movq 24+0(%rsp),%r8 + leaq 0(%rsp),%rdi + call __ecp_nistz256_sqr_montq + + movq 32(%rbx),%rax + movq 64+0(%rbx),%r9 + movq 64+8(%rbx),%r10 + movq 64+16(%rbx),%r11 + movq 64+24(%rbx),%r12 + leaq 64-0(%rbx),%rsi + leaq 32(%rbx),%rbx +.byte 102,72,15,126,215 + call __ecp_nistz256_mul_montq + call __ecp_nistz256_mul_by_2q + + movq 96+0(%rsp),%r12 + movq 96+8(%rsp),%r13 + leaq 64(%rsp),%rbx + movq 96+16(%rsp),%r8 + movq 96+24(%rsp),%r9 + leaq 32(%rsp),%rdi + call __ecp_nistz256_add_toq + + movq 96+0(%rsp),%r12 + movq 96+8(%rsp),%r13 + leaq 64(%rsp),%rbx + movq 96+16(%rsp),%r8 + movq 96+24(%rsp),%r9 + leaq 64(%rsp),%rdi + call __ecp_nistz256_sub_fromq + + movq 0+0(%rsp),%rax + movq 8+0(%rsp),%r14 + leaq 0+0(%rsp),%rsi + movq 16+0(%rsp),%r15 + movq 24+0(%rsp),%r8 +.byte 102,72,15,126,207 + call __ecp_nistz256_sqr_montq + xorq %r9,%r9 + movq %r12,%rax + addq $-1,%r12 + movq %r13,%r10 + adcq %rsi,%r13 + movq %r14,%rcx + adcq $0,%r14 + movq %r15,%r8 + adcq %rbp,%r15 + adcq $0,%r9 + xorq %rsi,%rsi + testq $1,%rax + + cmovzq %rax,%r12 + cmovzq %r10,%r13 + cmovzq %rcx,%r14 + cmovzq %r8,%r15 + cmovzq %rsi,%r9 + + movq %r13,%rax + shrq $1,%r12 + shlq $63,%rax + movq %r14,%r10 + shrq $1,%r13 + orq %rax,%r12 + shlq $63,%r10 + movq %r15,%rcx + shrq $1,%r14 + orq %r10,%r13 + shlq $63,%rcx + movq %r12,0(%rdi) + shrq $1,%r15 + movq %r13,8(%rdi) + shlq $63,%r9 + orq %rcx,%r14 + orq %r9,%r15 + movq %r14,16(%rdi) + movq %r15,24(%rdi) + movq 64(%rsp),%rax + leaq 64(%rsp),%rbx + movq 0+32(%rsp),%r9 + movq 8+32(%rsp),%r10 + leaq 0+32(%rsp),%rsi + movq 16+32(%rsp),%r11 + movq 24+32(%rsp),%r12 + leaq 32(%rsp),%rdi + call __ecp_nistz256_mul_montq + + leaq 128(%rsp),%rdi + call __ecp_nistz256_mul_by_2q + + leaq 32(%rsp),%rbx + leaq 32(%rsp),%rdi + call __ecp_nistz256_add_toq + + movq 96(%rsp),%rax + leaq 96(%rsp),%rbx + movq 0+0(%rsp),%r9 + movq 8+0(%rsp),%r10 + leaq 0+0(%rsp),%rsi + movq 16+0(%rsp),%r11 + movq 24+0(%rsp),%r12 + leaq 0(%rsp),%rdi + call __ecp_nistz256_mul_montq + + leaq 128(%rsp),%rdi + call __ecp_nistz256_mul_by_2q + + movq 0+32(%rsp),%rax + movq 8+32(%rsp),%r14 + leaq 0+32(%rsp),%rsi + movq 16+32(%rsp),%r15 + movq 24+32(%rsp),%r8 +.byte 102,72,15,126,199 + call __ecp_nistz256_sqr_montq + + leaq 128(%rsp),%rbx + movq %r14,%r8 + movq %r15,%r9 + movq %rsi,%r14 + movq %rbp,%r15 + call __ecp_nistz256_sub_fromq + + movq 0+0(%rsp),%rax + movq 0+8(%rsp),%rbp + movq 0+16(%rsp),%rcx + movq 0+24(%rsp),%r10 + leaq 0(%rsp),%rdi + call __ecp_nistz256_subq + + movq 32(%rsp),%rax + leaq 32(%rsp),%rbx + movq %r12,%r14 + xorl %ecx,%ecx + movq %r12,0+0(%rsp) + movq %r13,%r10 + movq %r13,0+8(%rsp) + cmovzq %r8,%r11 + movq %r8,0+16(%rsp) + leaq 0-0(%rsp),%rsi + cmovzq %r9,%r12 + movq %r9,0+24(%rsp) + movq %r14,%r9 + leaq 0(%rsp),%rdi + call __ecp_nistz256_mul_montq + +.byte 102,72,15,126,203 +.byte 102,72,15,126,207 + call __ecp_nistz256_sub_fromq + + addq $160+8,%rsp + popq %r15 + popq %r14 + popq %r13 + popq %r12 + popq %rbx + popq %rbp + .byte 0xf3,0xc3 +.size ecp_nistz256_point_double,.-ecp_nistz256_point_double +.globl ecp_nistz256_point_add +.type ecp_nistz256_point_add,@function +.align 32 +ecp_nistz256_point_add: + movl $524544,%ecx + andl OPENSSL_ia32cap_P+8(%rip),%ecx + cmpl $524544,%ecx + je .Lpoint_addx + pushq %rbp + pushq %rbx + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + subq $576+8,%rsp + + movdqu 0(%rsi),%xmm0 + movdqu 16(%rsi),%xmm1 + movdqu 32(%rsi),%xmm2 + movdqu 48(%rsi),%xmm3 + movdqu 64(%rsi),%xmm4 + movdqu 80(%rsi),%xmm5 + movq %rsi,%rbx + movq %rdx,%rsi + movdqa %xmm0,384(%rsp) + movdqa %xmm1,384+16(%rsp) + por %xmm0,%xmm1 + movdqa %xmm2,416(%rsp) + movdqa %xmm3,416+16(%rsp) + por %xmm2,%xmm3 + movdqa %xmm4,448(%rsp) + movdqa %xmm5,448+16(%rsp) + por %xmm1,%xmm3 + + movdqu 0(%rsi),%xmm0 + pshufd $177,%xmm3,%xmm5 + movdqu 16(%rsi),%xmm1 + movdqu 32(%rsi),%xmm2 + por %xmm3,%xmm5 + movdqu 48(%rsi),%xmm3 + movq 64+0(%rsi),%rax + movq 64+8(%rsi),%r14 + movq 64+16(%rsi),%r15 + movq 64+24(%rsi),%r8 + movdqa %xmm0,480(%rsp) + pshufd $30,%xmm5,%xmm4 + movdqa %xmm1,480+16(%rsp) + por %xmm0,%xmm1 +.byte 102,72,15,110,199 + movdqa %xmm2,512(%rsp) + movdqa %xmm3,512+16(%rsp) + por %xmm2,%xmm3 + por %xmm4,%xmm5 + pxor %xmm4,%xmm4 + por %xmm1,%xmm3 + + leaq 64-0(%rsi),%rsi + movq %rax,544+0(%rsp) + movq %r14,544+8(%rsp) + movq %r15,544+16(%rsp) + movq %r8,544+24(%rsp) + leaq 96(%rsp),%rdi + call __ecp_nistz256_sqr_montq + + pcmpeqd %xmm4,%xmm5 + pshufd $177,%xmm3,%xmm4 + por %xmm3,%xmm4 + pshufd $0,%xmm5,%xmm5 + pshufd $30,%xmm4,%xmm3 + por %xmm3,%xmm4 + pxor %xmm3,%xmm3 + pcmpeqd %xmm3,%xmm4 + pshufd $0,%xmm4,%xmm4 + movq 64+0(%rbx),%rax + movq 64+8(%rbx),%r14 + movq 64+16(%rbx),%r15 + movq 64+24(%rbx),%r8 + + leaq 64-0(%rbx),%rsi + leaq 32(%rsp),%rdi + call __ecp_nistz256_sqr_montq + + movq 544(%rsp),%rax + leaq 544(%rsp),%rbx + movq 0+96(%rsp),%r9 + movq 8+96(%rsp),%r10 + leaq 0+96(%rsp),%rsi + movq 16+96(%rsp),%r11 + movq 24+96(%rsp),%r12 + leaq 224(%rsp),%rdi + call __ecp_nistz256_mul_montq + + movq 448(%rsp),%rax + leaq 448(%rsp),%rbx + movq 0+32(%rsp),%r9 + movq 8+32(%rsp),%r10 + leaq 0+32(%rsp),%rsi + movq 16+32(%rsp),%r11 + movq 24+32(%rsp),%r12 + leaq 256(%rsp),%rdi + call __ecp_nistz256_mul_montq + + movq 416(%rsp),%rax + leaq 416(%rsp),%rbx + movq 0+224(%rsp),%r9 + movq 8+224(%rsp),%r10 + leaq 0+224(%rsp),%rsi + movq 16+224(%rsp),%r11 + movq 24+224(%rsp),%r12 + leaq 224(%rsp),%rdi + call __ecp_nistz256_mul_montq + + movq 512(%rsp),%rax + leaq 512(%rsp),%rbx + movq 0+256(%rsp),%r9 + movq 8+256(%rsp),%r10 + leaq 0+256(%rsp),%rsi + movq 16+256(%rsp),%r11 + movq 24+256(%rsp),%r12 + leaq 256(%rsp),%rdi + call __ecp_nistz256_mul_montq + + leaq 224(%rsp),%rbx + leaq 64(%rsp),%rdi + call __ecp_nistz256_sub_fromq + + orq %r13,%r12 + movdqa %xmm4,%xmm2 + orq %r8,%r12 + orq %r9,%r12 + por %xmm5,%xmm2 +.byte 102,73,15,110,220 + + movq 384(%rsp),%rax + leaq 384(%rsp),%rbx + movq 0+96(%rsp),%r9 + movq 8+96(%rsp),%r10 + leaq 0+96(%rsp),%rsi + movq 16+96(%rsp),%r11 + movq 24+96(%rsp),%r12 + leaq 160(%rsp),%rdi + call __ecp_nistz256_mul_montq + + movq 480(%rsp),%rax + leaq 480(%rsp),%rbx + movq 0+32(%rsp),%r9 + movq 8+32(%rsp),%r10 + leaq 0+32(%rsp),%rsi + movq 16+32(%rsp),%r11 + movq 24+32(%rsp),%r12 + leaq 192(%rsp),%rdi + call __ecp_nistz256_mul_montq + + leaq 160(%rsp),%rbx + leaq 0(%rsp),%rdi + call __ecp_nistz256_sub_fromq + + orq %r13,%r12 + orq %r8,%r12 + orq %r9,%r12 + +.byte 0x3e + jnz .Ladd_proceedq +.byte 102,73,15,126,208 +.byte 102,73,15,126,217 + testq %r8,%r8 + jnz .Ladd_proceedq + testq %r9,%r9 + jz .Ladd_proceedq + +.byte 102,72,15,126,199 + pxor %xmm0,%xmm0 + movdqu %xmm0,0(%rdi) + movdqu %xmm0,16(%rdi) + movdqu %xmm0,32(%rdi) + movdqu %xmm0,48(%rdi) + movdqu %xmm0,64(%rdi) + movdqu %xmm0,80(%rdi) + jmp .Ladd_doneq + +.align 32 +.Ladd_proceedq: + movq 0+64(%rsp),%rax + movq 8+64(%rsp),%r14 + leaq 0+64(%rsp),%rsi + movq 16+64(%rsp),%r15 + movq 24+64(%rsp),%r8 + leaq 96(%rsp),%rdi + call __ecp_nistz256_sqr_montq + + movq 448(%rsp),%rax + leaq 448(%rsp),%rbx + movq 0+0(%rsp),%r9 + movq 8+0(%rsp),%r10 + leaq 0+0(%rsp),%rsi + movq 16+0(%rsp),%r11 + movq 24+0(%rsp),%r12 + leaq 352(%rsp),%rdi + call __ecp_nistz256_mul_montq + + movq 0+0(%rsp),%rax + movq 8+0(%rsp),%r14 + leaq 0+0(%rsp),%rsi + movq 16+0(%rsp),%r15 + movq 24+0(%rsp),%r8 + leaq 32(%rsp),%rdi + call __ecp_nistz256_sqr_montq + + movq 544(%rsp),%rax + leaq 544(%rsp),%rbx + movq 0+352(%rsp),%r9 + movq 8+352(%rsp),%r10 + leaq 0+352(%rsp),%rsi + movq 16+352(%rsp),%r11 + movq 24+352(%rsp),%r12 + leaq 352(%rsp),%rdi + call __ecp_nistz256_mul_montq + + movq 0(%rsp),%rax + leaq 0(%rsp),%rbx + movq 0+32(%rsp),%r9 + movq 8+32(%rsp),%r10 + leaq 0+32(%rsp),%rsi + movq 16+32(%rsp),%r11 + movq 24+32(%rsp),%r12 + leaq 128(%rsp),%rdi + call __ecp_nistz256_mul_montq + + movq 160(%rsp),%rax + leaq 160(%rsp),%rbx + movq 0+32(%rsp),%r9 + movq 8+32(%rsp),%r10 + leaq 0+32(%rsp),%rsi + movq 16+32(%rsp),%r11 + movq 24+32(%rsp),%r12 + leaq 192(%rsp),%rdi + call __ecp_nistz256_mul_montq + + + + + addq %r12,%r12 + leaq 96(%rsp),%rsi + adcq %r13,%r13 + movq %r12,%rax + adcq %r8,%r8 + adcq %r9,%r9 + movq %r13,%rbp + sbbq %r11,%r11 + + subq $-1,%r12 + movq %r8,%rcx + sbbq %r14,%r13 + sbbq $0,%r8 + movq %r9,%r10 + sbbq %r15,%r9 + testq %r11,%r11 + + cmovzq %rax,%r12 + movq 0(%rsi),%rax + cmovzq %rbp,%r13 + movq 8(%rsi),%rbp + cmovzq %rcx,%r8 + movq 16(%rsi),%rcx + cmovzq %r10,%r9 + movq 24(%rsi),%r10 + + call __ecp_nistz256_subq + + leaq 128(%rsp),%rbx + leaq 288(%rsp),%rdi + call __ecp_nistz256_sub_fromq + + movq 192+0(%rsp),%rax + movq 192+8(%rsp),%rbp + movq 192+16(%rsp),%rcx + movq 192+24(%rsp),%r10 + leaq 320(%rsp),%rdi + + call __ecp_nistz256_subq + + movq %r12,0(%rdi) + movq %r13,8(%rdi) + movq %r8,16(%rdi) + movq %r9,24(%rdi) + movq 128(%rsp),%rax + leaq 128(%rsp),%rbx + movq 0+224(%rsp),%r9 + movq 8+224(%rsp),%r10 + leaq 0+224(%rsp),%rsi + movq 16+224(%rsp),%r11 + movq 24+224(%rsp),%r12 + leaq 256(%rsp),%rdi + call __ecp_nistz256_mul_montq + + movq 320(%rsp),%rax + leaq 320(%rsp),%rbx + movq 0+64(%rsp),%r9 + movq 8+64(%rsp),%r10 + leaq 0+64(%rsp),%rsi + movq 16+64(%rsp),%r11 + movq 24+64(%rsp),%r12 + leaq 320(%rsp),%rdi + call __ecp_nistz256_mul_montq + + leaq 256(%rsp),%rbx + leaq 320(%rsp),%rdi + call __ecp_nistz256_sub_fromq + +.byte 102,72,15,126,199 + + movdqa %xmm5,%xmm0 + movdqa %xmm5,%xmm1 + pandn 352(%rsp),%xmm0 + movdqa %xmm5,%xmm2 + pandn 352+16(%rsp),%xmm1 + movdqa %xmm5,%xmm3 + pand 544(%rsp),%xmm2 + pand 544+16(%rsp),%xmm3 + por %xmm0,%xmm2 + por %xmm1,%xmm3 + + movdqa %xmm4,%xmm0 + movdqa %xmm4,%xmm1 + pandn %xmm2,%xmm0 + movdqa %xmm4,%xmm2 + pandn %xmm3,%xmm1 + movdqa %xmm4,%xmm3 + pand 448(%rsp),%xmm2 + pand 448+16(%rsp),%xmm3 + por %xmm0,%xmm2 + por %xmm1,%xmm3 + movdqu %xmm2,64(%rdi) + movdqu %xmm3,80(%rdi) + + movdqa %xmm5,%xmm0 + movdqa %xmm5,%xmm1 + pandn 288(%rsp),%xmm0 + movdqa %xmm5,%xmm2 + pandn 288+16(%rsp),%xmm1 + movdqa %xmm5,%xmm3 + pand 480(%rsp),%xmm2 + pand 480+16(%rsp),%xmm3 + por %xmm0,%xmm2 + por %xmm1,%xmm3 + + movdqa %xmm4,%xmm0 + movdqa %xmm4,%xmm1 + pandn %xmm2,%xmm0 + movdqa %xmm4,%xmm2 + pandn %xmm3,%xmm1 + movdqa %xmm4,%xmm3 + pand 384(%rsp),%xmm2 + pand 384+16(%rsp),%xmm3 + por %xmm0,%xmm2 + por %xmm1,%xmm3 + movdqu %xmm2,0(%rdi) + movdqu %xmm3,16(%rdi) + + movdqa %xmm5,%xmm0 + movdqa %xmm5,%xmm1 + pandn 320(%rsp),%xmm0 + movdqa %xmm5,%xmm2 + pandn 320+16(%rsp),%xmm1 + movdqa %xmm5,%xmm3 + pand 512(%rsp),%xmm2 + pand 512+16(%rsp),%xmm3 + por %xmm0,%xmm2 + por %xmm1,%xmm3 + + movdqa %xmm4,%xmm0 + movdqa %xmm4,%xmm1 + pandn %xmm2,%xmm0 + movdqa %xmm4,%xmm2 + pandn %xmm3,%xmm1 + movdqa %xmm4,%xmm3 + pand 416(%rsp),%xmm2 + pand 416+16(%rsp),%xmm3 + por %xmm0,%xmm2 + por %xmm1,%xmm3 + movdqu %xmm2,32(%rdi) + movdqu %xmm3,48(%rdi) + +.Ladd_doneq: + addq $576+8,%rsp + popq %r15 + popq %r14 + popq %r13 + popq %r12 + popq %rbx + popq %rbp + .byte 0xf3,0xc3 +.size ecp_nistz256_point_add,.-ecp_nistz256_point_add +.globl ecp_nistz256_point_add_affine +.type ecp_nistz256_point_add_affine,@function +.align 32 +ecp_nistz256_point_add_affine: + movl $524544,%ecx + andl OPENSSL_ia32cap_P+8(%rip),%ecx + cmpl $524544,%ecx + je .Lpoint_add_affinex + pushq %rbp + pushq %rbx + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + subq $480+8,%rsp + + movdqu 0(%rsi),%xmm0 + movq %rdx,%rbx + movdqu 16(%rsi),%xmm1 + movdqu 32(%rsi),%xmm2 + movdqu 48(%rsi),%xmm3 + movdqu 64(%rsi),%xmm4 + movdqu 80(%rsi),%xmm5 + movq 64+0(%rsi),%rax + movq 64+8(%rsi),%r14 + movq 64+16(%rsi),%r15 + movq 64+24(%rsi),%r8 + movdqa %xmm0,320(%rsp) + movdqa %xmm1,320+16(%rsp) + por %xmm0,%xmm1 + movdqa %xmm2,352(%rsp) + movdqa %xmm3,352+16(%rsp) + por %xmm2,%xmm3 + movdqa %xmm4,384(%rsp) + movdqa %xmm5,384+16(%rsp) + por %xmm1,%xmm3 + + movdqu 0(%rbx),%xmm0 + pshufd $177,%xmm3,%xmm5 + movdqu 16(%rbx),%xmm1 + movdqu 32(%rbx),%xmm2 + por %xmm3,%xmm5 + movdqu 48(%rbx),%xmm3 + movdqa %xmm0,416(%rsp) + pshufd $30,%xmm5,%xmm4 + movdqa %xmm1,416+16(%rsp) + por %xmm0,%xmm1 +.byte 102,72,15,110,199 + movdqa %xmm2,448(%rsp) + movdqa %xmm3,448+16(%rsp) + por %xmm2,%xmm3 + por %xmm4,%xmm5 + pxor %xmm4,%xmm4 + por %xmm1,%xmm3 + + leaq 64-0(%rsi),%rsi + leaq 32(%rsp),%rdi + call __ecp_nistz256_sqr_montq + + pcmpeqd %xmm4,%xmm5 + pshufd $177,%xmm3,%xmm4 + movq 0(%rbx),%rax + + movq %r12,%r9 + por %xmm3,%xmm4 + pshufd $0,%xmm5,%xmm5 + pshufd $30,%xmm4,%xmm3 + movq %r13,%r10 + por %xmm3,%xmm4 + pxor %xmm3,%xmm3 + movq %r14,%r11 + pcmpeqd %xmm3,%xmm4 + pshufd $0,%xmm4,%xmm4 + + leaq 32-0(%rsp),%rsi + movq %r15,%r12 + leaq 0(%rsp),%rdi + call __ecp_nistz256_mul_montq + + leaq 320(%rsp),%rbx + leaq 64(%rsp),%rdi + call __ecp_nistz256_sub_fromq + + movq 384(%rsp),%rax + leaq 384(%rsp),%rbx + movq 0+32(%rsp),%r9 + movq 8+32(%rsp),%r10 + leaq 0+32(%rsp),%rsi + movq 16+32(%rsp),%r11 + movq 24+32(%rsp),%r12 + leaq 32(%rsp),%rdi + call __ecp_nistz256_mul_montq + + movq 384(%rsp),%rax + leaq 384(%rsp),%rbx + movq 0+64(%rsp),%r9 + movq 8+64(%rsp),%r10 + leaq 0+64(%rsp),%rsi + movq 16+64(%rsp),%r11 + movq 24+64(%rsp),%r12 + leaq 288(%rsp),%rdi + call __ecp_nistz256_mul_montq + + movq 448(%rsp),%rax + leaq 448(%rsp),%rbx + movq 0+32(%rsp),%r9 + movq 8+32(%rsp),%r10 + leaq 0+32(%rsp),%rsi + movq 16+32(%rsp),%r11 + movq 24+32(%rsp),%r12 + leaq 32(%rsp),%rdi + call __ecp_nistz256_mul_montq + + leaq 352(%rsp),%rbx + leaq 96(%rsp),%rdi + call __ecp_nistz256_sub_fromq + + movq 0+64(%rsp),%rax + movq 8+64(%rsp),%r14 + leaq 0+64(%rsp),%rsi + movq 16+64(%rsp),%r15 + movq 24+64(%rsp),%r8 + leaq 128(%rsp),%rdi + call __ecp_nistz256_sqr_montq + + movq 0+96(%rsp),%rax + movq 8+96(%rsp),%r14 + leaq 0+96(%rsp),%rsi + movq 16+96(%rsp),%r15 + movq 24+96(%rsp),%r8 + leaq 192(%rsp),%rdi + call __ecp_nistz256_sqr_montq + + movq 128(%rsp),%rax + leaq 128(%rsp),%rbx + movq 0+64(%rsp),%r9 + movq 8+64(%rsp),%r10 + leaq 0+64(%rsp),%rsi + movq 16+64(%rsp),%r11 + movq 24+64(%rsp),%r12 + leaq 160(%rsp),%rdi + call __ecp_nistz256_mul_montq + + movq 320(%rsp),%rax + leaq 320(%rsp),%rbx + movq 0+128(%rsp),%r9 + movq 8+128(%rsp),%r10 + leaq 0+128(%rsp),%rsi + movq 16+128(%rsp),%r11 + movq 24+128(%rsp),%r12 + leaq 0(%rsp),%rdi + call __ecp_nistz256_mul_montq + + + + + addq %r12,%r12 + leaq 192(%rsp),%rsi + adcq %r13,%r13 + movq %r12,%rax + adcq %r8,%r8 + adcq %r9,%r9 + movq %r13,%rbp + sbbq %r11,%r11 + + subq $-1,%r12 + movq %r8,%rcx + sbbq %r14,%r13 + sbbq $0,%r8 + movq %r9,%r10 + sbbq %r15,%r9 + testq %r11,%r11 + + cmovzq %rax,%r12 + movq 0(%rsi),%rax + cmovzq %rbp,%r13 + movq 8(%rsi),%rbp + cmovzq %rcx,%r8 + movq 16(%rsi),%rcx + cmovzq %r10,%r9 + movq 24(%rsi),%r10 + + call __ecp_nistz256_subq + + leaq 160(%rsp),%rbx + leaq 224(%rsp),%rdi + call __ecp_nistz256_sub_fromq + + movq 0+0(%rsp),%rax + movq 0+8(%rsp),%rbp + movq 0+16(%rsp),%rcx + movq 0+24(%rsp),%r10 + leaq 64(%rsp),%rdi + + call __ecp_nistz256_subq + + movq %r12,0(%rdi) + movq %r13,8(%rdi) + movq %r8,16(%rdi) + movq %r9,24(%rdi) + movq 352(%rsp),%rax + leaq 352(%rsp),%rbx + movq 0+160(%rsp),%r9 + movq 8+160(%rsp),%r10 + leaq 0+160(%rsp),%rsi + movq 16+160(%rsp),%r11 + movq 24+160(%rsp),%r12 + leaq 32(%rsp),%rdi + call __ecp_nistz256_mul_montq + + movq 96(%rsp),%rax + leaq 96(%rsp),%rbx + movq 0+64(%rsp),%r9 + movq 8+64(%rsp),%r10 + leaq 0+64(%rsp),%rsi + movq 16+64(%rsp),%r11 + movq 24+64(%rsp),%r12 + leaq 64(%rsp),%rdi + call __ecp_nistz256_mul_montq + + leaq 32(%rsp),%rbx + leaq 256(%rsp),%rdi + call __ecp_nistz256_sub_fromq + +.byte 102,72,15,126,199 + + movdqa %xmm5,%xmm0 + movdqa %xmm5,%xmm1 + pandn 288(%rsp),%xmm0 + movdqa %xmm5,%xmm2 + pandn 288+16(%rsp),%xmm1 + movdqa %xmm5,%xmm3 + pand .LONE_mont(%rip),%xmm2 + pand .LONE_mont+16(%rip),%xmm3 + por %xmm0,%xmm2 + por %xmm1,%xmm3 + + movdqa %xmm4,%xmm0 + movdqa %xmm4,%xmm1 + pandn %xmm2,%xmm0 + movdqa %xmm4,%xmm2 + pandn %xmm3,%xmm1 + movdqa %xmm4,%xmm3 + pand 384(%rsp),%xmm2 + pand 384+16(%rsp),%xmm3 + por %xmm0,%xmm2 + por %xmm1,%xmm3 + movdqu %xmm2,64(%rdi) + movdqu %xmm3,80(%rdi) + + movdqa %xmm5,%xmm0 + movdqa %xmm5,%xmm1 + pandn 224(%rsp),%xmm0 + movdqa %xmm5,%xmm2 + pandn 224+16(%rsp),%xmm1 + movdqa %xmm5,%xmm3 + pand 416(%rsp),%xmm2 + pand 416+16(%rsp),%xmm3 + por %xmm0,%xmm2 + por %xmm1,%xmm3 + + movdqa %xmm4,%xmm0 + movdqa %xmm4,%xmm1 + pandn %xmm2,%xmm0 + movdqa %xmm4,%xmm2 + pandn %xmm3,%xmm1 + movdqa %xmm4,%xmm3 + pand 320(%rsp),%xmm2 + pand 320+16(%rsp),%xmm3 + por %xmm0,%xmm2 + por %xmm1,%xmm3 + movdqu %xmm2,0(%rdi) + movdqu %xmm3,16(%rdi) + + movdqa %xmm5,%xmm0 + movdqa %xmm5,%xmm1 + pandn 256(%rsp),%xmm0 + movdqa %xmm5,%xmm2 + pandn 256+16(%rsp),%xmm1 + movdqa %xmm5,%xmm3 + pand 448(%rsp),%xmm2 + pand 448+16(%rsp),%xmm3 + por %xmm0,%xmm2 + por %xmm1,%xmm3 + + movdqa %xmm4,%xmm0 + movdqa %xmm4,%xmm1 + pandn %xmm2,%xmm0 + movdqa %xmm4,%xmm2 + pandn %xmm3,%xmm1 + movdqa %xmm4,%xmm3 + pand 352(%rsp),%xmm2 + pand 352+16(%rsp),%xmm3 + por %xmm0,%xmm2 + por %xmm1,%xmm3 + movdqu %xmm2,32(%rdi) + movdqu %xmm3,48(%rdi) + + addq $480+8,%rsp + popq %r15 + popq %r14 + popq %r13 + popq %r12 + popq %rbx + popq %rbp + .byte 0xf3,0xc3 +.size ecp_nistz256_point_add_affine,.-ecp_nistz256_point_add_affine +.type __ecp_nistz256_add_tox,@function +.align 32 +__ecp_nistz256_add_tox: + xorq %r11,%r11 + adcq 0(%rbx),%r12 + adcq 8(%rbx),%r13 + movq %r12,%rax + adcq 16(%rbx),%r8 + adcq 24(%rbx),%r9 + movq %r13,%rbp + adcq $0,%r11 + + xorq %r10,%r10 + sbbq $-1,%r12 + movq %r8,%rcx + sbbq %r14,%r13 + sbbq $0,%r8 + movq %r9,%r10 + sbbq %r15,%r9 + + btq $0,%r11 + cmovncq %rax,%r12 + cmovncq %rbp,%r13 + movq %r12,0(%rdi) + cmovncq %rcx,%r8 + movq %r13,8(%rdi) + cmovncq %r10,%r9 + movq %r8,16(%rdi) + movq %r9,24(%rdi) + + .byte 0xf3,0xc3 +.size __ecp_nistz256_add_tox,.-__ecp_nistz256_add_tox + +.type __ecp_nistz256_sub_fromx,@function +.align 32 +__ecp_nistz256_sub_fromx: + xorq %r11,%r11 + sbbq 0(%rbx),%r12 + sbbq 8(%rbx),%r13 + movq %r12,%rax + sbbq 16(%rbx),%r8 + sbbq 24(%rbx),%r9 + movq %r13,%rbp + sbbq $0,%r11 + + xorq %r10,%r10 + adcq $-1,%r12 + movq %r8,%rcx + adcq %r14,%r13 + adcq $0,%r8 + movq %r9,%r10 + adcq %r15,%r9 + + btq $0,%r11 + cmovncq %rax,%r12 + cmovncq %rbp,%r13 + movq %r12,0(%rdi) + cmovncq %rcx,%r8 + movq %r13,8(%rdi) + cmovncq %r10,%r9 + movq %r8,16(%rdi) + movq %r9,24(%rdi) + + .byte 0xf3,0xc3 +.size __ecp_nistz256_sub_fromx,.-__ecp_nistz256_sub_fromx + +.type __ecp_nistz256_subx,@function +.align 32 +__ecp_nistz256_subx: + xorq %r11,%r11 + sbbq %r12,%rax + sbbq %r13,%rbp + movq %rax,%r12 + sbbq %r8,%rcx + sbbq %r9,%r10 + movq %rbp,%r13 + sbbq $0,%r11 + + xorq %r9,%r9 + adcq $-1,%rax + movq %rcx,%r8 + adcq %r14,%rbp + adcq $0,%rcx + movq %r10,%r9 + adcq %r15,%r10 + + btq $0,%r11 + cmovcq %rax,%r12 + cmovcq %rbp,%r13 + cmovcq %rcx,%r8 + cmovcq %r10,%r9 + + .byte 0xf3,0xc3 +.size __ecp_nistz256_subx,.-__ecp_nistz256_subx + +.type __ecp_nistz256_mul_by_2x,@function +.align 32 +__ecp_nistz256_mul_by_2x: + xorq %r11,%r11 + adcq %r12,%r12 + adcq %r13,%r13 + movq %r12,%rax + adcq %r8,%r8 + adcq %r9,%r9 + movq %r13,%rbp + adcq $0,%r11 + + xorq %r10,%r10 + sbbq $-1,%r12 + movq %r8,%rcx + sbbq %r14,%r13 + sbbq $0,%r8 + movq %r9,%r10 + sbbq %r15,%r9 + + btq $0,%r11 + cmovncq %rax,%r12 + cmovncq %rbp,%r13 + movq %r12,0(%rdi) + cmovncq %rcx,%r8 + movq %r13,8(%rdi) + cmovncq %r10,%r9 + movq %r8,16(%rdi) + movq %r9,24(%rdi) + + .byte 0xf3,0xc3 +.size __ecp_nistz256_mul_by_2x,.-__ecp_nistz256_mul_by_2x +.type ecp_nistz256_point_doublex,@function +.align 32 +ecp_nistz256_point_doublex: +.Lpoint_doublex: + pushq %rbp + pushq %rbx + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + subq $160+8,%rsp + + movdqu 0(%rsi),%xmm0 + movq %rsi,%rbx + movdqu 16(%rsi),%xmm1 + movq 32+0(%rsi),%r12 + movq 32+8(%rsi),%r13 + movq 32+16(%rsi),%r8 + movq 32+24(%rsi),%r9 + movq .Lpoly+8(%rip),%r14 + movq .Lpoly+24(%rip),%r15 + movdqa %xmm0,96(%rsp) + movdqa %xmm1,96+16(%rsp) + leaq 32(%rdi),%r10 + leaq 64(%rdi),%r11 +.byte 102,72,15,110,199 +.byte 102,73,15,110,202 +.byte 102,73,15,110,211 + + leaq 0(%rsp),%rdi + call __ecp_nistz256_mul_by_2x + + movq 64+0(%rsi),%rdx + movq 64+8(%rsi),%r14 + movq 64+16(%rsi),%r15 + movq 64+24(%rsi),%r8 + leaq 64-128(%rsi),%rsi + leaq 64(%rsp),%rdi + call __ecp_nistz256_sqr_montx + + movq 0+0(%rsp),%rdx + movq 8+0(%rsp),%r14 + leaq -128+0(%rsp),%rsi + movq 16+0(%rsp),%r15 + movq 24+0(%rsp),%r8 + leaq 0(%rsp),%rdi + call __ecp_nistz256_sqr_montx + + movq 32(%rbx),%rdx + movq 64+0(%rbx),%r9 + movq 64+8(%rbx),%r10 + movq 64+16(%rbx),%r11 + movq 64+24(%rbx),%r12 + leaq 64-128(%rbx),%rsi + leaq 32(%rbx),%rbx +.byte 102,72,15,126,215 + call __ecp_nistz256_mul_montx + call __ecp_nistz256_mul_by_2x + + movq 96+0(%rsp),%r12 + movq 96+8(%rsp),%r13 + leaq 64(%rsp),%rbx + movq 96+16(%rsp),%r8 + movq 96+24(%rsp),%r9 + leaq 32(%rsp),%rdi + call __ecp_nistz256_add_tox + + movq 96+0(%rsp),%r12 + movq 96+8(%rsp),%r13 + leaq 64(%rsp),%rbx + movq 96+16(%rsp),%r8 + movq 96+24(%rsp),%r9 + leaq 64(%rsp),%rdi + call __ecp_nistz256_sub_fromx + + movq 0+0(%rsp),%rdx + movq 8+0(%rsp),%r14 + leaq -128+0(%rsp),%rsi + movq 16+0(%rsp),%r15 + movq 24+0(%rsp),%r8 +.byte 102,72,15,126,207 + call __ecp_nistz256_sqr_montx + xorq %r9,%r9 + movq %r12,%rax + addq $-1,%r12 + movq %r13,%r10 + adcq %rsi,%r13 + movq %r14,%rcx + adcq $0,%r14 + movq %r15,%r8 + adcq %rbp,%r15 + adcq $0,%r9 + xorq %rsi,%rsi + testq $1,%rax + + cmovzq %rax,%r12 + cmovzq %r10,%r13 + cmovzq %rcx,%r14 + cmovzq %r8,%r15 + cmovzq %rsi,%r9 + + movq %r13,%rax + shrq $1,%r12 + shlq $63,%rax + movq %r14,%r10 + shrq $1,%r13 + orq %rax,%r12 + shlq $63,%r10 + movq %r15,%rcx + shrq $1,%r14 + orq %r10,%r13 + shlq $63,%rcx + movq %r12,0(%rdi) + shrq $1,%r15 + movq %r13,8(%rdi) + shlq $63,%r9 + orq %rcx,%r14 + orq %r9,%r15 + movq %r14,16(%rdi) + movq %r15,24(%rdi) + movq 64(%rsp),%rdx + leaq 64(%rsp),%rbx + movq 0+32(%rsp),%r9 + movq 8+32(%rsp),%r10 + leaq -128+32(%rsp),%rsi + movq 16+32(%rsp),%r11 + movq 24+32(%rsp),%r12 + leaq 32(%rsp),%rdi + call __ecp_nistz256_mul_montx + + leaq 128(%rsp),%rdi + call __ecp_nistz256_mul_by_2x + + leaq 32(%rsp),%rbx + leaq 32(%rsp),%rdi + call __ecp_nistz256_add_tox + + movq 96(%rsp),%rdx + leaq 96(%rsp),%rbx + movq 0+0(%rsp),%r9 + movq 8+0(%rsp),%r10 + leaq -128+0(%rsp),%rsi + movq 16+0(%rsp),%r11 + movq 24+0(%rsp),%r12 + leaq 0(%rsp),%rdi + call __ecp_nistz256_mul_montx + + leaq 128(%rsp),%rdi + call __ecp_nistz256_mul_by_2x + + movq 0+32(%rsp),%rdx + movq 8+32(%rsp),%r14 + leaq -128+32(%rsp),%rsi + movq 16+32(%rsp),%r15 + movq 24+32(%rsp),%r8 +.byte 102,72,15,126,199 + call __ecp_nistz256_sqr_montx + + leaq 128(%rsp),%rbx + movq %r14,%r8 + movq %r15,%r9 + movq %rsi,%r14 + movq %rbp,%r15 + call __ecp_nistz256_sub_fromx + + movq 0+0(%rsp),%rax + movq 0+8(%rsp),%rbp + movq 0+16(%rsp),%rcx + movq 0+24(%rsp),%r10 + leaq 0(%rsp),%rdi + call __ecp_nistz256_subx + + movq 32(%rsp),%rdx + leaq 32(%rsp),%rbx + movq %r12,%r14 + xorl %ecx,%ecx + movq %r12,0+0(%rsp) + movq %r13,%r10 + movq %r13,0+8(%rsp) + cmovzq %r8,%r11 + movq %r8,0+16(%rsp) + leaq 0-128(%rsp),%rsi + cmovzq %r9,%r12 + movq %r9,0+24(%rsp) + movq %r14,%r9 + leaq 0(%rsp),%rdi + call __ecp_nistz256_mul_montx + +.byte 102,72,15,126,203 +.byte 102,72,15,126,207 + call __ecp_nistz256_sub_fromx + + addq $160+8,%rsp + popq %r15 + popq %r14 + popq %r13 + popq %r12 + popq %rbx + popq %rbp + .byte 0xf3,0xc3 +.size ecp_nistz256_point_doublex,.-ecp_nistz256_point_doublex +.type ecp_nistz256_point_addx,@function +.align 32 +ecp_nistz256_point_addx: +.Lpoint_addx: + pushq %rbp + pushq %rbx + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + subq $576+8,%rsp + + movdqu 0(%rsi),%xmm0 + movdqu 16(%rsi),%xmm1 + movdqu 32(%rsi),%xmm2 + movdqu 48(%rsi),%xmm3 + movdqu 64(%rsi),%xmm4 + movdqu 80(%rsi),%xmm5 + movq %rsi,%rbx + movq %rdx,%rsi + movdqa %xmm0,384(%rsp) + movdqa %xmm1,384+16(%rsp) + por %xmm0,%xmm1 + movdqa %xmm2,416(%rsp) + movdqa %xmm3,416+16(%rsp) + por %xmm2,%xmm3 + movdqa %xmm4,448(%rsp) + movdqa %xmm5,448+16(%rsp) + por %xmm1,%xmm3 + + movdqu 0(%rsi),%xmm0 + pshufd $177,%xmm3,%xmm5 + movdqu 16(%rsi),%xmm1 + movdqu 32(%rsi),%xmm2 + por %xmm3,%xmm5 + movdqu 48(%rsi),%xmm3 + movq 64+0(%rsi),%rdx + movq 64+8(%rsi),%r14 + movq 64+16(%rsi),%r15 + movq 64+24(%rsi),%r8 + movdqa %xmm0,480(%rsp) + pshufd $30,%xmm5,%xmm4 + movdqa %xmm1,480+16(%rsp) + por %xmm0,%xmm1 +.byte 102,72,15,110,199 + movdqa %xmm2,512(%rsp) + movdqa %xmm3,512+16(%rsp) + por %xmm2,%xmm3 + por %xmm4,%xmm5 + pxor %xmm4,%xmm4 + por %xmm1,%xmm3 + + leaq 64-128(%rsi),%rsi + movq %rdx,544+0(%rsp) + movq %r14,544+8(%rsp) + movq %r15,544+16(%rsp) + movq %r8,544+24(%rsp) + leaq 96(%rsp),%rdi + call __ecp_nistz256_sqr_montx + + pcmpeqd %xmm4,%xmm5 + pshufd $177,%xmm3,%xmm4 + por %xmm3,%xmm4 + pshufd $0,%xmm5,%xmm5 + pshufd $30,%xmm4,%xmm3 + por %xmm3,%xmm4 + pxor %xmm3,%xmm3 + pcmpeqd %xmm3,%xmm4 + pshufd $0,%xmm4,%xmm4 + movq 64+0(%rbx),%rdx + movq 64+8(%rbx),%r14 + movq 64+16(%rbx),%r15 + movq 64+24(%rbx),%r8 + + leaq 64-128(%rbx),%rsi + leaq 32(%rsp),%rdi + call __ecp_nistz256_sqr_montx + + movq 544(%rsp),%rdx + leaq 544(%rsp),%rbx + movq 0+96(%rsp),%r9 + movq 8+96(%rsp),%r10 + leaq -128+96(%rsp),%rsi + movq 16+96(%rsp),%r11 + movq 24+96(%rsp),%r12 + leaq 224(%rsp),%rdi + call __ecp_nistz256_mul_montx + + movq 448(%rsp),%rdx + leaq 448(%rsp),%rbx + movq 0+32(%rsp),%r9 + movq 8+32(%rsp),%r10 + leaq -128+32(%rsp),%rsi + movq 16+32(%rsp),%r11 + movq 24+32(%rsp),%r12 + leaq 256(%rsp),%rdi + call __ecp_nistz256_mul_montx + + movq 416(%rsp),%rdx + leaq 416(%rsp),%rbx + movq 0+224(%rsp),%r9 + movq 8+224(%rsp),%r10 + leaq -128+224(%rsp),%rsi + movq 16+224(%rsp),%r11 + movq 24+224(%rsp),%r12 + leaq 224(%rsp),%rdi + call __ecp_nistz256_mul_montx + + movq 512(%rsp),%rdx + leaq 512(%rsp),%rbx + movq 0+256(%rsp),%r9 + movq 8+256(%rsp),%r10 + leaq -128+256(%rsp),%rsi + movq 16+256(%rsp),%r11 + movq 24+256(%rsp),%r12 + leaq 256(%rsp),%rdi + call __ecp_nistz256_mul_montx + + leaq 224(%rsp),%rbx + leaq 64(%rsp),%rdi + call __ecp_nistz256_sub_fromx + + orq %r13,%r12 + movdqa %xmm4,%xmm2 + orq %r8,%r12 + orq %r9,%r12 + por %xmm5,%xmm2 +.byte 102,73,15,110,220 + + movq 384(%rsp),%rdx + leaq 384(%rsp),%rbx + movq 0+96(%rsp),%r9 + movq 8+96(%rsp),%r10 + leaq -128+96(%rsp),%rsi + movq 16+96(%rsp),%r11 + movq 24+96(%rsp),%r12 + leaq 160(%rsp),%rdi + call __ecp_nistz256_mul_montx + + movq 480(%rsp),%rdx + leaq 480(%rsp),%rbx + movq 0+32(%rsp),%r9 + movq 8+32(%rsp),%r10 + leaq -128+32(%rsp),%rsi + movq 16+32(%rsp),%r11 + movq 24+32(%rsp),%r12 + leaq 192(%rsp),%rdi + call __ecp_nistz256_mul_montx + + leaq 160(%rsp),%rbx + leaq 0(%rsp),%rdi + call __ecp_nistz256_sub_fromx + + orq %r13,%r12 + orq %r8,%r12 + orq %r9,%r12 + +.byte 0x3e + jnz .Ladd_proceedx +.byte 102,73,15,126,208 +.byte 102,73,15,126,217 + testq %r8,%r8 + jnz .Ladd_proceedx + testq %r9,%r9 + jz .Ladd_proceedx + +.byte 102,72,15,126,199 + pxor %xmm0,%xmm0 + movdqu %xmm0,0(%rdi) + movdqu %xmm0,16(%rdi) + movdqu %xmm0,32(%rdi) + movdqu %xmm0,48(%rdi) + movdqu %xmm0,64(%rdi) + movdqu %xmm0,80(%rdi) + jmp .Ladd_donex + +.align 32 +.Ladd_proceedx: + movq 0+64(%rsp),%rdx + movq 8+64(%rsp),%r14 + leaq -128+64(%rsp),%rsi + movq 16+64(%rsp),%r15 + movq 24+64(%rsp),%r8 + leaq 96(%rsp),%rdi + call __ecp_nistz256_sqr_montx + + movq 448(%rsp),%rdx + leaq 448(%rsp),%rbx + movq 0+0(%rsp),%r9 + movq 8+0(%rsp),%r10 + leaq -128+0(%rsp),%rsi + movq 16+0(%rsp),%r11 + movq 24+0(%rsp),%r12 + leaq 352(%rsp),%rdi + call __ecp_nistz256_mul_montx + + movq 0+0(%rsp),%rdx + movq 8+0(%rsp),%r14 + leaq -128+0(%rsp),%rsi + movq 16+0(%rsp),%r15 + movq 24+0(%rsp),%r8 + leaq 32(%rsp),%rdi + call __ecp_nistz256_sqr_montx + + movq 544(%rsp),%rdx + leaq 544(%rsp),%rbx + movq 0+352(%rsp),%r9 + movq 8+352(%rsp),%r10 + leaq -128+352(%rsp),%rsi + movq 16+352(%rsp),%r11 + movq 24+352(%rsp),%r12 + leaq 352(%rsp),%rdi + call __ecp_nistz256_mul_montx + + movq 0(%rsp),%rdx + leaq 0(%rsp),%rbx + movq 0+32(%rsp),%r9 + movq 8+32(%rsp),%r10 + leaq -128+32(%rsp),%rsi + movq 16+32(%rsp),%r11 + movq 24+32(%rsp),%r12 + leaq 128(%rsp),%rdi + call __ecp_nistz256_mul_montx + + movq 160(%rsp),%rdx + leaq 160(%rsp),%rbx + movq 0+32(%rsp),%r9 + movq 8+32(%rsp),%r10 + leaq -128+32(%rsp),%rsi + movq 16+32(%rsp),%r11 + movq 24+32(%rsp),%r12 + leaq 192(%rsp),%rdi + call __ecp_nistz256_mul_montx + + + + + addq %r12,%r12 + leaq 96(%rsp),%rsi + adcq %r13,%r13 + movq %r12,%rax + adcq %r8,%r8 + adcq %r9,%r9 + movq %r13,%rbp + sbbq %r11,%r11 + + subq $-1,%r12 + movq %r8,%rcx + sbbq %r14,%r13 + sbbq $0,%r8 + movq %r9,%r10 + sbbq %r15,%r9 + testq %r11,%r11 + + cmovzq %rax,%r12 + movq 0(%rsi),%rax + cmovzq %rbp,%r13 + movq 8(%rsi),%rbp + cmovzq %rcx,%r8 + movq 16(%rsi),%rcx + cmovzq %r10,%r9 + movq 24(%rsi),%r10 + + call __ecp_nistz256_subx + + leaq 128(%rsp),%rbx + leaq 288(%rsp),%rdi + call __ecp_nistz256_sub_fromx + + movq 192+0(%rsp),%rax + movq 192+8(%rsp),%rbp + movq 192+16(%rsp),%rcx + movq 192+24(%rsp),%r10 + leaq 320(%rsp),%rdi + + call __ecp_nistz256_subx + + movq %r12,0(%rdi) + movq %r13,8(%rdi) + movq %r8,16(%rdi) + movq %r9,24(%rdi) + movq 128(%rsp),%rdx + leaq 128(%rsp),%rbx + movq 0+224(%rsp),%r9 + movq 8+224(%rsp),%r10 + leaq -128+224(%rsp),%rsi + movq 16+224(%rsp),%r11 + movq 24+224(%rsp),%r12 + leaq 256(%rsp),%rdi + call __ecp_nistz256_mul_montx + + movq 320(%rsp),%rdx + leaq 320(%rsp),%rbx + movq 0+64(%rsp),%r9 + movq 8+64(%rsp),%r10 + leaq -128+64(%rsp),%rsi + movq 16+64(%rsp),%r11 + movq 24+64(%rsp),%r12 + leaq 320(%rsp),%rdi + call __ecp_nistz256_mul_montx + + leaq 256(%rsp),%rbx + leaq 320(%rsp),%rdi + call __ecp_nistz256_sub_fromx + +.byte 102,72,15,126,199 + + movdqa %xmm5,%xmm0 + movdqa %xmm5,%xmm1 + pandn 352(%rsp),%xmm0 + movdqa %xmm5,%xmm2 + pandn 352+16(%rsp),%xmm1 + movdqa %xmm5,%xmm3 + pand 544(%rsp),%xmm2 + pand 544+16(%rsp),%xmm3 + por %xmm0,%xmm2 + por %xmm1,%xmm3 + + movdqa %xmm4,%xmm0 + movdqa %xmm4,%xmm1 + pandn %xmm2,%xmm0 + movdqa %xmm4,%xmm2 + pandn %xmm3,%xmm1 + movdqa %xmm4,%xmm3 + pand 448(%rsp),%xmm2 + pand 448+16(%rsp),%xmm3 + por %xmm0,%xmm2 + por %xmm1,%xmm3 + movdqu %xmm2,64(%rdi) + movdqu %xmm3,80(%rdi) + + movdqa %xmm5,%xmm0 + movdqa %xmm5,%xmm1 + pandn 288(%rsp),%xmm0 + movdqa %xmm5,%xmm2 + pandn 288+16(%rsp),%xmm1 + movdqa %xmm5,%xmm3 + pand 480(%rsp),%xmm2 + pand 480+16(%rsp),%xmm3 + por %xmm0,%xmm2 + por %xmm1,%xmm3 + + movdqa %xmm4,%xmm0 + movdqa %xmm4,%xmm1 + pandn %xmm2,%xmm0 + movdqa %xmm4,%xmm2 + pandn %xmm3,%xmm1 + movdqa %xmm4,%xmm3 + pand 384(%rsp),%xmm2 + pand 384+16(%rsp),%xmm3 + por %xmm0,%xmm2 + por %xmm1,%xmm3 + movdqu %xmm2,0(%rdi) + movdqu %xmm3,16(%rdi) + + movdqa %xmm5,%xmm0 + movdqa %xmm5,%xmm1 + pandn 320(%rsp),%xmm0 + movdqa %xmm5,%xmm2 + pandn 320+16(%rsp),%xmm1 + movdqa %xmm5,%xmm3 + pand 512(%rsp),%xmm2 + pand 512+16(%rsp),%xmm3 + por %xmm0,%xmm2 + por %xmm1,%xmm3 + + movdqa %xmm4,%xmm0 + movdqa %xmm4,%xmm1 + pandn %xmm2,%xmm0 + movdqa %xmm4,%xmm2 + pandn %xmm3,%xmm1 + movdqa %xmm4,%xmm3 + pand 416(%rsp),%xmm2 + pand 416+16(%rsp),%xmm3 + por %xmm0,%xmm2 + por %xmm1,%xmm3 + movdqu %xmm2,32(%rdi) + movdqu %xmm3,48(%rdi) + +.Ladd_donex: + addq $576+8,%rsp + popq %r15 + popq %r14 + popq %r13 + popq %r12 + popq %rbx + popq %rbp + .byte 0xf3,0xc3 +.size ecp_nistz256_point_addx,.-ecp_nistz256_point_addx +.type ecp_nistz256_point_add_affinex,@function +.align 32 +ecp_nistz256_point_add_affinex: +.Lpoint_add_affinex: + pushq %rbp + pushq %rbx + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + subq $480+8,%rsp + + movdqu 0(%rsi),%xmm0 + movq %rdx,%rbx + movdqu 16(%rsi),%xmm1 + movdqu 32(%rsi),%xmm2 + movdqu 48(%rsi),%xmm3 + movdqu 64(%rsi),%xmm4 + movdqu 80(%rsi),%xmm5 + movq 64+0(%rsi),%rdx + movq 64+8(%rsi),%r14 + movq 64+16(%rsi),%r15 + movq 64+24(%rsi),%r8 + movdqa %xmm0,320(%rsp) + movdqa %xmm1,320+16(%rsp) + por %xmm0,%xmm1 + movdqa %xmm2,352(%rsp) + movdqa %xmm3,352+16(%rsp) + por %xmm2,%xmm3 + movdqa %xmm4,384(%rsp) + movdqa %xmm5,384+16(%rsp) + por %xmm1,%xmm3 + + movdqu 0(%rbx),%xmm0 + pshufd $177,%xmm3,%xmm5 + movdqu 16(%rbx),%xmm1 + movdqu 32(%rbx),%xmm2 + por %xmm3,%xmm5 + movdqu 48(%rbx),%xmm3 + movdqa %xmm0,416(%rsp) + pshufd $30,%xmm5,%xmm4 + movdqa %xmm1,416+16(%rsp) + por %xmm0,%xmm1 +.byte 102,72,15,110,199 + movdqa %xmm2,448(%rsp) + movdqa %xmm3,448+16(%rsp) + por %xmm2,%xmm3 + por %xmm4,%xmm5 + pxor %xmm4,%xmm4 + por %xmm1,%xmm3 + + leaq 64-128(%rsi),%rsi + leaq 32(%rsp),%rdi + call __ecp_nistz256_sqr_montx + + pcmpeqd %xmm4,%xmm5 + pshufd $177,%xmm3,%xmm4 + movq 0(%rbx),%rdx + + movq %r12,%r9 + por %xmm3,%xmm4 + pshufd $0,%xmm5,%xmm5 + pshufd $30,%xmm4,%xmm3 + movq %r13,%r10 + por %xmm3,%xmm4 + pxor %xmm3,%xmm3 + movq %r14,%r11 + pcmpeqd %xmm3,%xmm4 + pshufd $0,%xmm4,%xmm4 + + leaq 32-128(%rsp),%rsi + movq %r15,%r12 + leaq 0(%rsp),%rdi + call __ecp_nistz256_mul_montx + + leaq 320(%rsp),%rbx + leaq 64(%rsp),%rdi + call __ecp_nistz256_sub_fromx + + movq 384(%rsp),%rdx + leaq 384(%rsp),%rbx + movq 0+32(%rsp),%r9 + movq 8+32(%rsp),%r10 + leaq -128+32(%rsp),%rsi + movq 16+32(%rsp),%r11 + movq 24+32(%rsp),%r12 + leaq 32(%rsp),%rdi + call __ecp_nistz256_mul_montx + + movq 384(%rsp),%rdx + leaq 384(%rsp),%rbx + movq 0+64(%rsp),%r9 + movq 8+64(%rsp),%r10 + leaq -128+64(%rsp),%rsi + movq 16+64(%rsp),%r11 + movq 24+64(%rsp),%r12 + leaq 288(%rsp),%rdi + call __ecp_nistz256_mul_montx + + movq 448(%rsp),%rdx + leaq 448(%rsp),%rbx + movq 0+32(%rsp),%r9 + movq 8+32(%rsp),%r10 + leaq -128+32(%rsp),%rsi + movq 16+32(%rsp),%r11 + movq 24+32(%rsp),%r12 + leaq 32(%rsp),%rdi + call __ecp_nistz256_mul_montx + + leaq 352(%rsp),%rbx + leaq 96(%rsp),%rdi + call __ecp_nistz256_sub_fromx + + movq 0+64(%rsp),%rdx + movq 8+64(%rsp),%r14 + leaq -128+64(%rsp),%rsi + movq 16+64(%rsp),%r15 + movq 24+64(%rsp),%r8 + leaq 128(%rsp),%rdi + call __ecp_nistz256_sqr_montx + + movq 0+96(%rsp),%rdx + movq 8+96(%rsp),%r14 + leaq -128+96(%rsp),%rsi + movq 16+96(%rsp),%r15 + movq 24+96(%rsp),%r8 + leaq 192(%rsp),%rdi + call __ecp_nistz256_sqr_montx + + movq 128(%rsp),%rdx + leaq 128(%rsp),%rbx + movq 0+64(%rsp),%r9 + movq 8+64(%rsp),%r10 + leaq -128+64(%rsp),%rsi + movq 16+64(%rsp),%r11 + movq 24+64(%rsp),%r12 + leaq 160(%rsp),%rdi + call __ecp_nistz256_mul_montx + + movq 320(%rsp),%rdx + leaq 320(%rsp),%rbx + movq 0+128(%rsp),%r9 + movq 8+128(%rsp),%r10 + leaq -128+128(%rsp),%rsi + movq 16+128(%rsp),%r11 + movq 24+128(%rsp),%r12 + leaq 0(%rsp),%rdi + call __ecp_nistz256_mul_montx + + + + + addq %r12,%r12 + leaq 192(%rsp),%rsi + adcq %r13,%r13 + movq %r12,%rax + adcq %r8,%r8 + adcq %r9,%r9 + movq %r13,%rbp + sbbq %r11,%r11 + + subq $-1,%r12 + movq %r8,%rcx + sbbq %r14,%r13 + sbbq $0,%r8 + movq %r9,%r10 + sbbq %r15,%r9 + testq %r11,%r11 + + cmovzq %rax,%r12 + movq 0(%rsi),%rax + cmovzq %rbp,%r13 + movq 8(%rsi),%rbp + cmovzq %rcx,%r8 + movq 16(%rsi),%rcx + cmovzq %r10,%r9 + movq 24(%rsi),%r10 + + call __ecp_nistz256_subx + + leaq 160(%rsp),%rbx + leaq 224(%rsp),%rdi + call __ecp_nistz256_sub_fromx + + movq 0+0(%rsp),%rax + movq 0+8(%rsp),%rbp + movq 0+16(%rsp),%rcx + movq 0+24(%rsp),%r10 + leaq 64(%rsp),%rdi + + call __ecp_nistz256_subx + + movq %r12,0(%rdi) + movq %r13,8(%rdi) + movq %r8,16(%rdi) + movq %r9,24(%rdi) + movq 352(%rsp),%rdx + leaq 352(%rsp),%rbx + movq 0+160(%rsp),%r9 + movq 8+160(%rsp),%r10 + leaq -128+160(%rsp),%rsi + movq 16+160(%rsp),%r11 + movq 24+160(%rsp),%r12 + leaq 32(%rsp),%rdi + call __ecp_nistz256_mul_montx + + movq 96(%rsp),%rdx + leaq 96(%rsp),%rbx + movq 0+64(%rsp),%r9 + movq 8+64(%rsp),%r10 + leaq -128+64(%rsp),%rsi + movq 16+64(%rsp),%r11 + movq 24+64(%rsp),%r12 + leaq 64(%rsp),%rdi + call __ecp_nistz256_mul_montx + + leaq 32(%rsp),%rbx + leaq 256(%rsp),%rdi + call __ecp_nistz256_sub_fromx + +.byte 102,72,15,126,199 + + movdqa %xmm5,%xmm0 + movdqa %xmm5,%xmm1 + pandn 288(%rsp),%xmm0 + movdqa %xmm5,%xmm2 + pandn 288+16(%rsp),%xmm1 + movdqa %xmm5,%xmm3 + pand .LONE_mont(%rip),%xmm2 + pand .LONE_mont+16(%rip),%xmm3 + por %xmm0,%xmm2 + por %xmm1,%xmm3 + + movdqa %xmm4,%xmm0 + movdqa %xmm4,%xmm1 + pandn %xmm2,%xmm0 + movdqa %xmm4,%xmm2 + pandn %xmm3,%xmm1 + movdqa %xmm4,%xmm3 + pand 384(%rsp),%xmm2 + pand 384+16(%rsp),%xmm3 + por %xmm0,%xmm2 + por %xmm1,%xmm3 + movdqu %xmm2,64(%rdi) + movdqu %xmm3,80(%rdi) + + movdqa %xmm5,%xmm0 + movdqa %xmm5,%xmm1 + pandn 224(%rsp),%xmm0 + movdqa %xmm5,%xmm2 + pandn 224+16(%rsp),%xmm1 + movdqa %xmm5,%xmm3 + pand 416(%rsp),%xmm2 + pand 416+16(%rsp),%xmm3 + por %xmm0,%xmm2 + por %xmm1,%xmm3 + + movdqa %xmm4,%xmm0 + movdqa %xmm4,%xmm1 + pandn %xmm2,%xmm0 + movdqa %xmm4,%xmm2 + pandn %xmm3,%xmm1 + movdqa %xmm4,%xmm3 + pand 320(%rsp),%xmm2 + pand 320+16(%rsp),%xmm3 + por %xmm0,%xmm2 + por %xmm1,%xmm3 + movdqu %xmm2,0(%rdi) + movdqu %xmm3,16(%rdi) + + movdqa %xmm5,%xmm0 + movdqa %xmm5,%xmm1 + pandn 256(%rsp),%xmm0 + movdqa %xmm5,%xmm2 + pandn 256+16(%rsp),%xmm1 + movdqa %xmm5,%xmm3 + pand 448(%rsp),%xmm2 + pand 448+16(%rsp),%xmm3 + por %xmm0,%xmm2 + por %xmm1,%xmm3 + + movdqa %xmm4,%xmm0 + movdqa %xmm4,%xmm1 + pandn %xmm2,%xmm0 + movdqa %xmm4,%xmm2 + pandn %xmm3,%xmm1 + movdqa %xmm4,%xmm3 + pand 352(%rsp),%xmm2 + pand 352+16(%rsp),%xmm3 + por %xmm0,%xmm2 + por %xmm1,%xmm3 + movdqu %xmm2,32(%rdi) + movdqu %xmm3,48(%rdi) + + addq $480+8,%rsp + popq %r15 + popq %r14 + popq %r13 + popq %r12 + popq %rbx + popq %rbp + .byte 0xf3,0xc3 +.size ecp_nistz256_point_add_affinex,.-ecp_nistz256_point_add_affinex diff --git a/deps/openssl/asm/x64-elf-gas/md5/md5-x86_64.s b/deps/openssl/asm/x64-elf-gas/md5/md5-x86_64.s index 81b0c7a1178b4e..53f44ff5f180bc 100644 --- a/deps/openssl/asm/x64-elf-gas/md5/md5-x86_64.s +++ b/deps/openssl/asm/x64-elf-gas/md5/md5-x86_64.s @@ -1,5 +1,4 @@ .text - .align 16 .globl md5_block_asm_data_order @@ -33,7 +32,6 @@ md5_block_asm_data_order: je .Lend - .Lloop: movl %eax,%r8d movl %ebx,%r9d @@ -653,7 +651,6 @@ md5_block_asm_data_order: jb .Lloop - .Lend: movl %eax,0(%rbp) movl %ebx,4(%rbp) diff --git a/deps/openssl/asm/x64-elf-gas/modes/aesni-gcm-x86_64.s b/deps/openssl/asm/x64-elf-gas/modes/aesni-gcm-x86_64.s new file mode 100644 index 00000000000000..4e82736a3eaf3a --- /dev/null +++ b/deps/openssl/asm/x64-elf-gas/modes/aesni-gcm-x86_64.s @@ -0,0 +1,753 @@ +.text + +.type _aesni_ctr32_ghash_6x,@function +.align 32 +_aesni_ctr32_ghash_6x: + vmovdqu 32(%r11),%xmm2 + subq $6,%rdx + vpxor %xmm4,%xmm4,%xmm4 + vmovdqu 0-128(%rcx),%xmm15 + vpaddb %xmm2,%xmm1,%xmm10 + vpaddb %xmm2,%xmm10,%xmm11 + vpaddb %xmm2,%xmm11,%xmm12 + vpaddb %xmm2,%xmm12,%xmm13 + vpaddb %xmm2,%xmm13,%xmm14 + vpxor %xmm15,%xmm1,%xmm9 + vmovdqu %xmm4,16+8(%rsp) + jmp .Loop6x + +.align 32 +.Loop6x: + addl $100663296,%ebx + jc .Lhandle_ctr32 + vmovdqu 0-32(%r9),%xmm3 + vpaddb %xmm2,%xmm14,%xmm1 + vpxor %xmm15,%xmm10,%xmm10 + vpxor %xmm15,%xmm11,%xmm11 + +.Lresume_ctr32: + vmovdqu %xmm1,(%r8) + vpclmulqdq $16,%xmm3,%xmm7,%xmm5 + vpxor %xmm15,%xmm12,%xmm12 + vmovups 16-128(%rcx),%xmm2 + vpclmulqdq $1,%xmm3,%xmm7,%xmm6 + xorq %r12,%r12 + cmpq %r14,%r15 + + vaesenc %xmm2,%xmm9,%xmm9 + vmovdqu 48+8(%rsp),%xmm0 + vpxor %xmm15,%xmm13,%xmm13 + vpclmulqdq $0,%xmm3,%xmm7,%xmm1 + vaesenc %xmm2,%xmm10,%xmm10 + vpxor %xmm15,%xmm14,%xmm14 + setnc %r12b + vpclmulqdq $17,%xmm3,%xmm7,%xmm7 + vaesenc %xmm2,%xmm11,%xmm11 + vmovdqu 16-32(%r9),%xmm3 + negq %r12 + vaesenc %xmm2,%xmm12,%xmm12 + vpxor %xmm5,%xmm6,%xmm6 + vpclmulqdq $0,%xmm3,%xmm0,%xmm5 + vpxor %xmm4,%xmm8,%xmm8 + vaesenc %xmm2,%xmm13,%xmm13 + vpxor %xmm5,%xmm1,%xmm4 + andq $96,%r12 + vmovups 32-128(%rcx),%xmm15 + vpclmulqdq $16,%xmm3,%xmm0,%xmm1 + vaesenc %xmm2,%xmm14,%xmm14 + + vpclmulqdq $1,%xmm3,%xmm0,%xmm2 + leaq (%r14,%r12,1),%r14 + vaesenc %xmm15,%xmm9,%xmm9 + vpxor 16+8(%rsp),%xmm8,%xmm8 + vpclmulqdq $17,%xmm3,%xmm0,%xmm3 + vmovdqu 64+8(%rsp),%xmm0 + vaesenc %xmm15,%xmm10,%xmm10 + movbeq 88(%r14),%r13 + vaesenc %xmm15,%xmm11,%xmm11 + movbeq 80(%r14),%r12 + vaesenc %xmm15,%xmm12,%xmm12 + movq %r13,32+8(%rsp) + vaesenc %xmm15,%xmm13,%xmm13 + movq %r12,40+8(%rsp) + vmovdqu 48-32(%r9),%xmm5 + vaesenc %xmm15,%xmm14,%xmm14 + + vmovups 48-128(%rcx),%xmm15 + vpxor %xmm1,%xmm6,%xmm6 + vpclmulqdq $0,%xmm5,%xmm0,%xmm1 + vaesenc %xmm15,%xmm9,%xmm9 + vpxor %xmm2,%xmm6,%xmm6 + vpclmulqdq $16,%xmm5,%xmm0,%xmm2 + vaesenc %xmm15,%xmm10,%xmm10 + vpxor %xmm3,%xmm7,%xmm7 + vpclmulqdq $1,%xmm5,%xmm0,%xmm3 + vaesenc %xmm15,%xmm11,%xmm11 + vpclmulqdq $17,%xmm5,%xmm0,%xmm5 + vmovdqu 80+8(%rsp),%xmm0 + vaesenc %xmm15,%xmm12,%xmm12 + vaesenc %xmm15,%xmm13,%xmm13 + vpxor %xmm1,%xmm4,%xmm4 + vmovdqu 64-32(%r9),%xmm1 + vaesenc %xmm15,%xmm14,%xmm14 + + vmovups 64-128(%rcx),%xmm15 + vpxor %xmm2,%xmm6,%xmm6 + vpclmulqdq $0,%xmm1,%xmm0,%xmm2 + vaesenc %xmm15,%xmm9,%xmm9 + vpxor %xmm3,%xmm6,%xmm6 + vpclmulqdq $16,%xmm1,%xmm0,%xmm3 + vaesenc %xmm15,%xmm10,%xmm10 + movbeq 72(%r14),%r13 + vpxor %xmm5,%xmm7,%xmm7 + vpclmulqdq $1,%xmm1,%xmm0,%xmm5 + vaesenc %xmm15,%xmm11,%xmm11 + movbeq 64(%r14),%r12 + vpclmulqdq $17,%xmm1,%xmm0,%xmm1 + vmovdqu 96+8(%rsp),%xmm0 + vaesenc %xmm15,%xmm12,%xmm12 + movq %r13,48+8(%rsp) + vaesenc %xmm15,%xmm13,%xmm13 + movq %r12,56+8(%rsp) + vpxor %xmm2,%xmm4,%xmm4 + vmovdqu 96-32(%r9),%xmm2 + vaesenc %xmm15,%xmm14,%xmm14 + + vmovups 80-128(%rcx),%xmm15 + vpxor %xmm3,%xmm6,%xmm6 + vpclmulqdq $0,%xmm2,%xmm0,%xmm3 + vaesenc %xmm15,%xmm9,%xmm9 + vpxor %xmm5,%xmm6,%xmm6 + vpclmulqdq $16,%xmm2,%xmm0,%xmm5 + vaesenc %xmm15,%xmm10,%xmm10 + movbeq 56(%r14),%r13 + vpxor %xmm1,%xmm7,%xmm7 + vpclmulqdq $1,%xmm2,%xmm0,%xmm1 + vpxor 112+8(%rsp),%xmm8,%xmm8 + vaesenc %xmm15,%xmm11,%xmm11 + movbeq 48(%r14),%r12 + vpclmulqdq $17,%xmm2,%xmm0,%xmm2 + vaesenc %xmm15,%xmm12,%xmm12 + movq %r13,64+8(%rsp) + vaesenc %xmm15,%xmm13,%xmm13 + movq %r12,72+8(%rsp) + vpxor %xmm3,%xmm4,%xmm4 + vmovdqu 112-32(%r9),%xmm3 + vaesenc %xmm15,%xmm14,%xmm14 + + vmovups 96-128(%rcx),%xmm15 + vpxor %xmm5,%xmm6,%xmm6 + vpclmulqdq $16,%xmm3,%xmm8,%xmm5 + vaesenc %xmm15,%xmm9,%xmm9 + vpxor %xmm1,%xmm6,%xmm6 + vpclmulqdq $1,%xmm3,%xmm8,%xmm1 + vaesenc %xmm15,%xmm10,%xmm10 + movbeq 40(%r14),%r13 + vpxor %xmm2,%xmm7,%xmm7 + vpclmulqdq $0,%xmm3,%xmm8,%xmm2 + vaesenc %xmm15,%xmm11,%xmm11 + movbeq 32(%r14),%r12 + vpclmulqdq $17,%xmm3,%xmm8,%xmm8 + vaesenc %xmm15,%xmm12,%xmm12 + movq %r13,80+8(%rsp) + vaesenc %xmm15,%xmm13,%xmm13 + movq %r12,88+8(%rsp) + vpxor %xmm5,%xmm6,%xmm6 + vaesenc %xmm15,%xmm14,%xmm14 + vpxor %xmm1,%xmm6,%xmm6 + + vmovups 112-128(%rcx),%xmm15 + vpslldq $8,%xmm6,%xmm5 + vpxor %xmm2,%xmm4,%xmm4 + vmovdqu 16(%r11),%xmm3 + + vaesenc %xmm15,%xmm9,%xmm9 + vpxor %xmm8,%xmm7,%xmm7 + vaesenc %xmm15,%xmm10,%xmm10 + vpxor %xmm5,%xmm4,%xmm4 + movbeq 24(%r14),%r13 + vaesenc %xmm15,%xmm11,%xmm11 + movbeq 16(%r14),%r12 + vpalignr $8,%xmm4,%xmm4,%xmm0 + vpclmulqdq $16,%xmm3,%xmm4,%xmm4 + movq %r13,96+8(%rsp) + vaesenc %xmm15,%xmm12,%xmm12 + movq %r12,104+8(%rsp) + vaesenc %xmm15,%xmm13,%xmm13 + vmovups 128-128(%rcx),%xmm1 + vaesenc %xmm15,%xmm14,%xmm14 + + vaesenc %xmm1,%xmm9,%xmm9 + vmovups 144-128(%rcx),%xmm15 + vaesenc %xmm1,%xmm10,%xmm10 + vpsrldq $8,%xmm6,%xmm6 + vaesenc %xmm1,%xmm11,%xmm11 + vpxor %xmm6,%xmm7,%xmm7 + vaesenc %xmm1,%xmm12,%xmm12 + vpxor %xmm0,%xmm4,%xmm4 + movbeq 8(%r14),%r13 + vaesenc %xmm1,%xmm13,%xmm13 + movbeq 0(%r14),%r12 + vaesenc %xmm1,%xmm14,%xmm14 + vmovups 160-128(%rcx),%xmm1 + cmpl $11,%ebp + jb .Lenc_tail + + vaesenc %xmm15,%xmm9,%xmm9 + vaesenc %xmm15,%xmm10,%xmm10 + vaesenc %xmm15,%xmm11,%xmm11 + vaesenc %xmm15,%xmm12,%xmm12 + vaesenc %xmm15,%xmm13,%xmm13 + vaesenc %xmm15,%xmm14,%xmm14 + + vaesenc %xmm1,%xmm9,%xmm9 + vaesenc %xmm1,%xmm10,%xmm10 + vaesenc %xmm1,%xmm11,%xmm11 + vaesenc %xmm1,%xmm12,%xmm12 + vaesenc %xmm1,%xmm13,%xmm13 + vmovups 176-128(%rcx),%xmm15 + vaesenc %xmm1,%xmm14,%xmm14 + vmovups 192-128(%rcx),%xmm1 + je .Lenc_tail + + vaesenc %xmm15,%xmm9,%xmm9 + vaesenc %xmm15,%xmm10,%xmm10 + vaesenc %xmm15,%xmm11,%xmm11 + vaesenc %xmm15,%xmm12,%xmm12 + vaesenc %xmm15,%xmm13,%xmm13 + vaesenc %xmm15,%xmm14,%xmm14 + + vaesenc %xmm1,%xmm9,%xmm9 + vaesenc %xmm1,%xmm10,%xmm10 + vaesenc %xmm1,%xmm11,%xmm11 + vaesenc %xmm1,%xmm12,%xmm12 + vaesenc %xmm1,%xmm13,%xmm13 + vmovups 208-128(%rcx),%xmm15 + vaesenc %xmm1,%xmm14,%xmm14 + vmovups 224-128(%rcx),%xmm1 + jmp .Lenc_tail + +.align 32 +.Lhandle_ctr32: + vmovdqu (%r11),%xmm0 + vpshufb %xmm0,%xmm1,%xmm6 + vmovdqu 48(%r11),%xmm5 + vpaddd 64(%r11),%xmm6,%xmm10 + vpaddd %xmm5,%xmm6,%xmm11 + vmovdqu 0-32(%r9),%xmm3 + vpaddd %xmm5,%xmm10,%xmm12 + vpshufb %xmm0,%xmm10,%xmm10 + vpaddd %xmm5,%xmm11,%xmm13 + vpshufb %xmm0,%xmm11,%xmm11 + vpxor %xmm15,%xmm10,%xmm10 + vpaddd %xmm5,%xmm12,%xmm14 + vpshufb %xmm0,%xmm12,%xmm12 + vpxor %xmm15,%xmm11,%xmm11 + vpaddd %xmm5,%xmm13,%xmm1 + vpshufb %xmm0,%xmm13,%xmm13 + vpshufb %xmm0,%xmm14,%xmm14 + vpshufb %xmm0,%xmm1,%xmm1 + jmp .Lresume_ctr32 + +.align 32 +.Lenc_tail: + vaesenc %xmm15,%xmm9,%xmm9 + vmovdqu %xmm7,16+8(%rsp) + vpalignr $8,%xmm4,%xmm4,%xmm8 + vaesenc %xmm15,%xmm10,%xmm10 + vpclmulqdq $16,%xmm3,%xmm4,%xmm4 + vpxor 0(%rdi),%xmm1,%xmm2 + vaesenc %xmm15,%xmm11,%xmm11 + vpxor 16(%rdi),%xmm1,%xmm0 + vaesenc %xmm15,%xmm12,%xmm12 + vpxor 32(%rdi),%xmm1,%xmm5 + vaesenc %xmm15,%xmm13,%xmm13 + vpxor 48(%rdi),%xmm1,%xmm6 + vaesenc %xmm15,%xmm14,%xmm14 + vpxor 64(%rdi),%xmm1,%xmm7 + vpxor 80(%rdi),%xmm1,%xmm3 + vmovdqu (%r8),%xmm1 + + vaesenclast %xmm2,%xmm9,%xmm9 + vmovdqu 32(%r11),%xmm2 + vaesenclast %xmm0,%xmm10,%xmm10 + vpaddb %xmm2,%xmm1,%xmm0 + movq %r13,112+8(%rsp) + leaq 96(%rdi),%rdi + vaesenclast %xmm5,%xmm11,%xmm11 + vpaddb %xmm2,%xmm0,%xmm5 + movq %r12,120+8(%rsp) + leaq 96(%rsi),%rsi + vmovdqu 0-128(%rcx),%xmm15 + vaesenclast %xmm6,%xmm12,%xmm12 + vpaddb %xmm2,%xmm5,%xmm6 + vaesenclast %xmm7,%xmm13,%xmm13 + vpaddb %xmm2,%xmm6,%xmm7 + vaesenclast %xmm3,%xmm14,%xmm14 + vpaddb %xmm2,%xmm7,%xmm3 + + addq $96,%r10 + subq $6,%rdx + jc .L6x_done + + vmovups %xmm9,-96(%rsi) + vpxor %xmm15,%xmm1,%xmm9 + vmovups %xmm10,-80(%rsi) + vmovdqa %xmm0,%xmm10 + vmovups %xmm11,-64(%rsi) + vmovdqa %xmm5,%xmm11 + vmovups %xmm12,-48(%rsi) + vmovdqa %xmm6,%xmm12 + vmovups %xmm13,-32(%rsi) + vmovdqa %xmm7,%xmm13 + vmovups %xmm14,-16(%rsi) + vmovdqa %xmm3,%xmm14 + vmovdqu 32+8(%rsp),%xmm7 + jmp .Loop6x + +.L6x_done: + vpxor 16+8(%rsp),%xmm8,%xmm8 + vpxor %xmm4,%xmm8,%xmm8 + + .byte 0xf3,0xc3 +.size _aesni_ctr32_ghash_6x,.-_aesni_ctr32_ghash_6x +.globl aesni_gcm_decrypt +.type aesni_gcm_decrypt,@function +.align 32 +aesni_gcm_decrypt: + xorq %r10,%r10 + cmpq $96,%rdx + jb .Lgcm_dec_abort + + leaq (%rsp),%rax + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + vzeroupper + + vmovdqu (%r8),%xmm1 + addq $-128,%rsp + movl 12(%r8),%ebx + leaq .Lbswap_mask(%rip),%r11 + leaq -128(%rcx),%r14 + movq $3968,%r15 + vmovdqu (%r9),%xmm8 + andq $-128,%rsp + vmovdqu (%r11),%xmm0 + leaq 128(%rcx),%rcx + leaq 32+32(%r9),%r9 + movl 240-128(%rcx),%ebp + vpshufb %xmm0,%xmm8,%xmm8 + + andq %r15,%r14 + andq %rsp,%r15 + subq %r14,%r15 + jc .Ldec_no_key_aliasing + cmpq $768,%r15 + jnc .Ldec_no_key_aliasing + subq %r15,%rsp +.Ldec_no_key_aliasing: + + vmovdqu 80(%rdi),%xmm7 + leaq (%rdi),%r14 + vmovdqu 64(%rdi),%xmm4 + leaq -192(%rdi,%rdx,1),%r15 + vmovdqu 48(%rdi),%xmm5 + shrq $4,%rdx + xorq %r10,%r10 + vmovdqu 32(%rdi),%xmm6 + vpshufb %xmm0,%xmm7,%xmm7 + vmovdqu 16(%rdi),%xmm2 + vpshufb %xmm0,%xmm4,%xmm4 + vmovdqu (%rdi),%xmm3 + vpshufb %xmm0,%xmm5,%xmm5 + vmovdqu %xmm4,48(%rsp) + vpshufb %xmm0,%xmm6,%xmm6 + vmovdqu %xmm5,64(%rsp) + vpshufb %xmm0,%xmm2,%xmm2 + vmovdqu %xmm6,80(%rsp) + vpshufb %xmm0,%xmm3,%xmm3 + vmovdqu %xmm2,96(%rsp) + vmovdqu %xmm3,112(%rsp) + + call _aesni_ctr32_ghash_6x + + vmovups %xmm9,-96(%rsi) + vmovups %xmm10,-80(%rsi) + vmovups %xmm11,-64(%rsi) + vmovups %xmm12,-48(%rsi) + vmovups %xmm13,-32(%rsi) + vmovups %xmm14,-16(%rsi) + + vpshufb (%r11),%xmm8,%xmm8 + vmovdqu %xmm8,-64(%r9) + + vzeroupper + movq -48(%rax),%r15 + movq -40(%rax),%r14 + movq -32(%rax),%r13 + movq -24(%rax),%r12 + movq -16(%rax),%rbp + movq -8(%rax),%rbx + leaq (%rax),%rsp +.Lgcm_dec_abort: + movq %r10,%rax + .byte 0xf3,0xc3 +.size aesni_gcm_decrypt,.-aesni_gcm_decrypt +.type _aesni_ctr32_6x,@function +.align 32 +_aesni_ctr32_6x: + vmovdqu 0-128(%rcx),%xmm4 + vmovdqu 32(%r11),%xmm2 + leaq -1(%rbp),%r13 + vmovups 16-128(%rcx),%xmm15 + leaq 32-128(%rcx),%r12 + vpxor %xmm4,%xmm1,%xmm9 + addl $100663296,%ebx + jc .Lhandle_ctr32_2 + vpaddb %xmm2,%xmm1,%xmm10 + vpaddb %xmm2,%xmm10,%xmm11 + vpxor %xmm4,%xmm10,%xmm10 + vpaddb %xmm2,%xmm11,%xmm12 + vpxor %xmm4,%xmm11,%xmm11 + vpaddb %xmm2,%xmm12,%xmm13 + vpxor %xmm4,%xmm12,%xmm12 + vpaddb %xmm2,%xmm13,%xmm14 + vpxor %xmm4,%xmm13,%xmm13 + vpaddb %xmm2,%xmm14,%xmm1 + vpxor %xmm4,%xmm14,%xmm14 + jmp .Loop_ctr32 + +.align 16 +.Loop_ctr32: + vaesenc %xmm15,%xmm9,%xmm9 + vaesenc %xmm15,%xmm10,%xmm10 + vaesenc %xmm15,%xmm11,%xmm11 + vaesenc %xmm15,%xmm12,%xmm12 + vaesenc %xmm15,%xmm13,%xmm13 + vaesenc %xmm15,%xmm14,%xmm14 + vmovups (%r12),%xmm15 + leaq 16(%r12),%r12 + decl %r13d + jnz .Loop_ctr32 + + vmovdqu (%r12),%xmm3 + vaesenc %xmm15,%xmm9,%xmm9 + vpxor 0(%rdi),%xmm3,%xmm4 + vaesenc %xmm15,%xmm10,%xmm10 + vpxor 16(%rdi),%xmm3,%xmm5 + vaesenc %xmm15,%xmm11,%xmm11 + vpxor 32(%rdi),%xmm3,%xmm6 + vaesenc %xmm15,%xmm12,%xmm12 + vpxor 48(%rdi),%xmm3,%xmm8 + vaesenc %xmm15,%xmm13,%xmm13 + vpxor 64(%rdi),%xmm3,%xmm2 + vaesenc %xmm15,%xmm14,%xmm14 + vpxor 80(%rdi),%xmm3,%xmm3 + leaq 96(%rdi),%rdi + + vaesenclast %xmm4,%xmm9,%xmm9 + vaesenclast %xmm5,%xmm10,%xmm10 + vaesenclast %xmm6,%xmm11,%xmm11 + vaesenclast %xmm8,%xmm12,%xmm12 + vaesenclast %xmm2,%xmm13,%xmm13 + vaesenclast %xmm3,%xmm14,%xmm14 + vmovups %xmm9,0(%rsi) + vmovups %xmm10,16(%rsi) + vmovups %xmm11,32(%rsi) + vmovups %xmm12,48(%rsi) + vmovups %xmm13,64(%rsi) + vmovups %xmm14,80(%rsi) + leaq 96(%rsi),%rsi + + .byte 0xf3,0xc3 +.align 32 +.Lhandle_ctr32_2: + vpshufb %xmm0,%xmm1,%xmm6 + vmovdqu 48(%r11),%xmm5 + vpaddd 64(%r11),%xmm6,%xmm10 + vpaddd %xmm5,%xmm6,%xmm11 + vpaddd %xmm5,%xmm10,%xmm12 + vpshufb %xmm0,%xmm10,%xmm10 + vpaddd %xmm5,%xmm11,%xmm13 + vpshufb %xmm0,%xmm11,%xmm11 + vpxor %xmm4,%xmm10,%xmm10 + vpaddd %xmm5,%xmm12,%xmm14 + vpshufb %xmm0,%xmm12,%xmm12 + vpxor %xmm4,%xmm11,%xmm11 + vpaddd %xmm5,%xmm13,%xmm1 + vpshufb %xmm0,%xmm13,%xmm13 + vpxor %xmm4,%xmm12,%xmm12 + vpshufb %xmm0,%xmm14,%xmm14 + vpxor %xmm4,%xmm13,%xmm13 + vpshufb %xmm0,%xmm1,%xmm1 + vpxor %xmm4,%xmm14,%xmm14 + jmp .Loop_ctr32 +.size _aesni_ctr32_6x,.-_aesni_ctr32_6x + +.globl aesni_gcm_encrypt +.type aesni_gcm_encrypt,@function +.align 32 +aesni_gcm_encrypt: + xorq %r10,%r10 + cmpq $288,%rdx + jb .Lgcm_enc_abort + + leaq (%rsp),%rax + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + vzeroupper + + vmovdqu (%r8),%xmm1 + addq $-128,%rsp + movl 12(%r8),%ebx + leaq .Lbswap_mask(%rip),%r11 + leaq -128(%rcx),%r14 + movq $3968,%r15 + leaq 128(%rcx),%rcx + vmovdqu (%r11),%xmm0 + andq $-128,%rsp + movl 240-128(%rcx),%ebp + + andq %r15,%r14 + andq %rsp,%r15 + subq %r14,%r15 + jc .Lenc_no_key_aliasing + cmpq $768,%r15 + jnc .Lenc_no_key_aliasing + subq %r15,%rsp +.Lenc_no_key_aliasing: + + leaq (%rsi),%r14 + leaq -192(%rsi,%rdx,1),%r15 + shrq $4,%rdx + + call _aesni_ctr32_6x + vpshufb %xmm0,%xmm9,%xmm8 + vpshufb %xmm0,%xmm10,%xmm2 + vmovdqu %xmm8,112(%rsp) + vpshufb %xmm0,%xmm11,%xmm4 + vmovdqu %xmm2,96(%rsp) + vpshufb %xmm0,%xmm12,%xmm5 + vmovdqu %xmm4,80(%rsp) + vpshufb %xmm0,%xmm13,%xmm6 + vmovdqu %xmm5,64(%rsp) + vpshufb %xmm0,%xmm14,%xmm7 + vmovdqu %xmm6,48(%rsp) + + call _aesni_ctr32_6x + + vmovdqu (%r9),%xmm8 + leaq 32+32(%r9),%r9 + subq $12,%rdx + movq $192,%r10 + vpshufb %xmm0,%xmm8,%xmm8 + + call _aesni_ctr32_ghash_6x + vmovdqu 32(%rsp),%xmm7 + vmovdqu (%r11),%xmm0 + vmovdqu 0-32(%r9),%xmm3 + vpunpckhqdq %xmm7,%xmm7,%xmm1 + vmovdqu 32-32(%r9),%xmm15 + vmovups %xmm9,-96(%rsi) + vpshufb %xmm0,%xmm9,%xmm9 + vpxor %xmm7,%xmm1,%xmm1 + vmovups %xmm10,-80(%rsi) + vpshufb %xmm0,%xmm10,%xmm10 + vmovups %xmm11,-64(%rsi) + vpshufb %xmm0,%xmm11,%xmm11 + vmovups %xmm12,-48(%rsi) + vpshufb %xmm0,%xmm12,%xmm12 + vmovups %xmm13,-32(%rsi) + vpshufb %xmm0,%xmm13,%xmm13 + vmovups %xmm14,-16(%rsi) + vpshufb %xmm0,%xmm14,%xmm14 + vmovdqu %xmm9,16(%rsp) + vmovdqu 48(%rsp),%xmm6 + vmovdqu 16-32(%r9),%xmm0 + vpunpckhqdq %xmm6,%xmm6,%xmm2 + vpclmulqdq $0,%xmm3,%xmm7,%xmm5 + vpxor %xmm6,%xmm2,%xmm2 + vpclmulqdq $17,%xmm3,%xmm7,%xmm7 + vpclmulqdq $0,%xmm15,%xmm1,%xmm1 + + vmovdqu 64(%rsp),%xmm9 + vpclmulqdq $0,%xmm0,%xmm6,%xmm4 + vmovdqu 48-32(%r9),%xmm3 + vpxor %xmm5,%xmm4,%xmm4 + vpunpckhqdq %xmm9,%xmm9,%xmm5 + vpclmulqdq $17,%xmm0,%xmm6,%xmm6 + vpxor %xmm9,%xmm5,%xmm5 + vpxor %xmm7,%xmm6,%xmm6 + vpclmulqdq $16,%xmm15,%xmm2,%xmm2 + vmovdqu 80-32(%r9),%xmm15 + vpxor %xmm1,%xmm2,%xmm2 + + vmovdqu 80(%rsp),%xmm1 + vpclmulqdq $0,%xmm3,%xmm9,%xmm7 + vmovdqu 64-32(%r9),%xmm0 + vpxor %xmm4,%xmm7,%xmm7 + vpunpckhqdq %xmm1,%xmm1,%xmm4 + vpclmulqdq $17,%xmm3,%xmm9,%xmm9 + vpxor %xmm1,%xmm4,%xmm4 + vpxor %xmm6,%xmm9,%xmm9 + vpclmulqdq $0,%xmm15,%xmm5,%xmm5 + vpxor %xmm2,%xmm5,%xmm5 + + vmovdqu 96(%rsp),%xmm2 + vpclmulqdq $0,%xmm0,%xmm1,%xmm6 + vmovdqu 96-32(%r9),%xmm3 + vpxor %xmm7,%xmm6,%xmm6 + vpunpckhqdq %xmm2,%xmm2,%xmm7 + vpclmulqdq $17,%xmm0,%xmm1,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpxor %xmm9,%xmm1,%xmm1 + vpclmulqdq $16,%xmm15,%xmm4,%xmm4 + vmovdqu 128-32(%r9),%xmm15 + vpxor %xmm5,%xmm4,%xmm4 + + vpxor 112(%rsp),%xmm8,%xmm8 + vpclmulqdq $0,%xmm3,%xmm2,%xmm5 + vmovdqu 112-32(%r9),%xmm0 + vpunpckhqdq %xmm8,%xmm8,%xmm9 + vpxor %xmm6,%xmm5,%xmm5 + vpclmulqdq $17,%xmm3,%xmm2,%xmm2 + vpxor %xmm8,%xmm9,%xmm9 + vpxor %xmm1,%xmm2,%xmm2 + vpclmulqdq $0,%xmm15,%xmm7,%xmm7 + vpxor %xmm4,%xmm7,%xmm4 + + vpclmulqdq $0,%xmm0,%xmm8,%xmm6 + vmovdqu 0-32(%r9),%xmm3 + vpunpckhqdq %xmm14,%xmm14,%xmm1 + vpclmulqdq $17,%xmm0,%xmm8,%xmm8 + vpxor %xmm14,%xmm1,%xmm1 + vpxor %xmm5,%xmm6,%xmm5 + vpclmulqdq $16,%xmm15,%xmm9,%xmm9 + vmovdqu 32-32(%r9),%xmm15 + vpxor %xmm2,%xmm8,%xmm7 + vpxor %xmm4,%xmm9,%xmm6 + + vmovdqu 16-32(%r9),%xmm0 + vpxor %xmm5,%xmm7,%xmm9 + vpclmulqdq $0,%xmm3,%xmm14,%xmm4 + vpxor %xmm9,%xmm6,%xmm6 + vpunpckhqdq %xmm13,%xmm13,%xmm2 + vpclmulqdq $17,%xmm3,%xmm14,%xmm14 + vpxor %xmm13,%xmm2,%xmm2 + vpslldq $8,%xmm6,%xmm9 + vpclmulqdq $0,%xmm15,%xmm1,%xmm1 + vpxor %xmm9,%xmm5,%xmm8 + vpsrldq $8,%xmm6,%xmm6 + vpxor %xmm6,%xmm7,%xmm7 + + vpclmulqdq $0,%xmm0,%xmm13,%xmm5 + vmovdqu 48-32(%r9),%xmm3 + vpxor %xmm4,%xmm5,%xmm5 + vpunpckhqdq %xmm12,%xmm12,%xmm9 + vpclmulqdq $17,%xmm0,%xmm13,%xmm13 + vpxor %xmm12,%xmm9,%xmm9 + vpxor %xmm14,%xmm13,%xmm13 + vpalignr $8,%xmm8,%xmm8,%xmm14 + vpclmulqdq $16,%xmm15,%xmm2,%xmm2 + vmovdqu 80-32(%r9),%xmm15 + vpxor %xmm1,%xmm2,%xmm2 + + vpclmulqdq $0,%xmm3,%xmm12,%xmm4 + vmovdqu 64-32(%r9),%xmm0 + vpxor %xmm5,%xmm4,%xmm4 + vpunpckhqdq %xmm11,%xmm11,%xmm1 + vpclmulqdq $17,%xmm3,%xmm12,%xmm12 + vpxor %xmm11,%xmm1,%xmm1 + vpxor %xmm13,%xmm12,%xmm12 + vxorps 16(%rsp),%xmm7,%xmm7 + vpclmulqdq $0,%xmm15,%xmm9,%xmm9 + vpxor %xmm2,%xmm9,%xmm9 + + vpclmulqdq $16,16(%r11),%xmm8,%xmm8 + vxorps %xmm14,%xmm8,%xmm8 + + vpclmulqdq $0,%xmm0,%xmm11,%xmm5 + vmovdqu 96-32(%r9),%xmm3 + vpxor %xmm4,%xmm5,%xmm5 + vpunpckhqdq %xmm10,%xmm10,%xmm2 + vpclmulqdq $17,%xmm0,%xmm11,%xmm11 + vpxor %xmm10,%xmm2,%xmm2 + vpalignr $8,%xmm8,%xmm8,%xmm14 + vpxor %xmm12,%xmm11,%xmm11 + vpclmulqdq $16,%xmm15,%xmm1,%xmm1 + vmovdqu 128-32(%r9),%xmm15 + vpxor %xmm9,%xmm1,%xmm1 + + vxorps %xmm7,%xmm14,%xmm14 + vpclmulqdq $16,16(%r11),%xmm8,%xmm8 + vxorps %xmm14,%xmm8,%xmm8 + + vpclmulqdq $0,%xmm3,%xmm10,%xmm4 + vmovdqu 112-32(%r9),%xmm0 + vpxor %xmm5,%xmm4,%xmm4 + vpunpckhqdq %xmm8,%xmm8,%xmm9 + vpclmulqdq $17,%xmm3,%xmm10,%xmm10 + vpxor %xmm8,%xmm9,%xmm9 + vpxor %xmm11,%xmm10,%xmm10 + vpclmulqdq $0,%xmm15,%xmm2,%xmm2 + vpxor %xmm1,%xmm2,%xmm2 + + vpclmulqdq $0,%xmm0,%xmm8,%xmm5 + vpclmulqdq $17,%xmm0,%xmm8,%xmm7 + vpxor %xmm4,%xmm5,%xmm5 + vpclmulqdq $16,%xmm15,%xmm9,%xmm6 + vpxor %xmm10,%xmm7,%xmm7 + vpxor %xmm2,%xmm6,%xmm6 + + vpxor %xmm5,%xmm7,%xmm4 + vpxor %xmm4,%xmm6,%xmm6 + vpslldq $8,%xmm6,%xmm1 + vmovdqu 16(%r11),%xmm3 + vpsrldq $8,%xmm6,%xmm6 + vpxor %xmm1,%xmm5,%xmm8 + vpxor %xmm6,%xmm7,%xmm7 + + vpalignr $8,%xmm8,%xmm8,%xmm2 + vpclmulqdq $16,%xmm3,%xmm8,%xmm8 + vpxor %xmm2,%xmm8,%xmm8 + + vpalignr $8,%xmm8,%xmm8,%xmm2 + vpclmulqdq $16,%xmm3,%xmm8,%xmm8 + vpxor %xmm7,%xmm2,%xmm2 + vpxor %xmm2,%xmm8,%xmm8 + vpshufb (%r11),%xmm8,%xmm8 + vmovdqu %xmm8,-64(%r9) + + vzeroupper + movq -48(%rax),%r15 + movq -40(%rax),%r14 + movq -32(%rax),%r13 + movq -24(%rax),%r12 + movq -16(%rax),%rbp + movq -8(%rax),%rbx + leaq (%rax),%rsp +.Lgcm_enc_abort: + movq %r10,%rax + .byte 0xf3,0xc3 +.size aesni_gcm_encrypt,.-aesni_gcm_encrypt +.align 64 +.Lbswap_mask: +.byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0 +.Lpoly: +.byte 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xc2 +.Lone_msb: +.byte 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1 +.Ltwo_lsb: +.byte 2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 +.Lone_lsb: +.byte 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 +.byte 65,69,83,45,78,73,32,71,67,77,32,109,111,100,117,108,101,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 +.align 64 diff --git a/deps/openssl/asm/x64-elf-gas/modes/ghash-x86_64.s b/deps/openssl/asm/x64-elf-gas/modes/ghash-x86_64.s index f5411f39a75788..1cfe19cb55ecf5 100644 --- a/deps/openssl/asm/x64-elf-gas/modes/ghash-x86_64.s +++ b/deps/openssl/asm/x64-elf-gas/modes/ghash-x86_64.s @@ -659,6 +659,7 @@ gcm_ghash_4bit: .type gcm_init_clmul,@function .align 16 gcm_init_clmul: +.L_init_clmul: movdqu (%rsi),%xmm2 pshufd $78,%xmm2,%xmm2 @@ -677,15 +678,15 @@ gcm_init_clmul: pxor %xmm5,%xmm2 + pshufd $78,%xmm2,%xmm6 movdqa %xmm2,%xmm0 + pxor %xmm2,%xmm6 movdqa %xmm0,%xmm1 pshufd $78,%xmm0,%xmm3 - pshufd $78,%xmm2,%xmm4 pxor %xmm0,%xmm3 - pxor %xmm2,%xmm4 .byte 102,15,58,68,194,0 .byte 102,15,58,68,202,17 -.byte 102,15,58,68,220,0 +.byte 102,15,58,68,222,0 pxor %xmm0,%xmm3 pxor %xmm1,%xmm3 @@ -695,44 +696,134 @@ gcm_init_clmul: pxor %xmm3,%xmm1 pxor %xmm4,%xmm0 + movdqa %xmm0,%xmm4 movdqa %xmm0,%xmm3 + psllq $5,%xmm0 + pxor %xmm0,%xmm3 psllq $1,%xmm0 pxor %xmm3,%xmm0 + psllq $57,%xmm0 + movdqa %xmm0,%xmm3 + pslldq $8,%xmm0 + psrldq $8,%xmm3 + pxor %xmm4,%xmm0 + pxor %xmm3,%xmm1 + + + movdqa %xmm0,%xmm4 + psrlq $1,%xmm0 + pxor %xmm4,%xmm1 + pxor %xmm0,%xmm4 + psrlq $5,%xmm0 + pxor %xmm4,%xmm0 + psrlq $1,%xmm0 + pxor %xmm1,%xmm0 + pshufd $78,%xmm2,%xmm3 + pshufd $78,%xmm0,%xmm4 + pxor %xmm2,%xmm3 + movdqu %xmm2,0(%rdi) + pxor %xmm0,%xmm4 + movdqu %xmm0,16(%rdi) +.byte 102,15,58,15,227,8 + movdqu %xmm4,32(%rdi) + movdqa %xmm0,%xmm1 + pshufd $78,%xmm0,%xmm3 + pxor %xmm0,%xmm3 +.byte 102,15,58,68,194,0 +.byte 102,15,58,68,202,17 +.byte 102,15,58,68,222,0 + pxor %xmm0,%xmm3 + pxor %xmm1,%xmm3 + + movdqa %xmm3,%xmm4 + psrldq $8,%xmm3 + pslldq $8,%xmm4 + pxor %xmm3,%xmm1 + pxor %xmm4,%xmm0 + + movdqa %xmm0,%xmm4 + movdqa %xmm0,%xmm3 psllq $5,%xmm0 + pxor %xmm0,%xmm3 + psllq $1,%xmm0 pxor %xmm3,%xmm0 psllq $57,%xmm0 - movdqa %xmm0,%xmm4 + movdqa %xmm0,%xmm3 pslldq $8,%xmm0 - psrldq $8,%xmm4 - pxor %xmm3,%xmm0 - pxor %xmm4,%xmm1 + psrldq $8,%xmm3 + pxor %xmm4,%xmm0 + pxor %xmm3,%xmm1 movdqa %xmm0,%xmm4 + psrlq $1,%xmm0 + pxor %xmm4,%xmm1 + pxor %xmm0,%xmm4 psrlq $5,%xmm0 pxor %xmm4,%xmm0 psrlq $1,%xmm0 + pxor %xmm1,%xmm0 + movdqa %xmm0,%xmm5 + movdqa %xmm0,%xmm1 + pshufd $78,%xmm0,%xmm3 + pxor %xmm0,%xmm3 +.byte 102,15,58,68,194,0 +.byte 102,15,58,68,202,17 +.byte 102,15,58,68,222,0 + pxor %xmm0,%xmm3 + pxor %xmm1,%xmm3 + + movdqa %xmm3,%xmm4 + psrldq $8,%xmm3 + pslldq $8,%xmm4 + pxor %xmm3,%xmm1 + pxor %xmm4,%xmm0 + + movdqa %xmm0,%xmm4 + movdqa %xmm0,%xmm3 + psllq $5,%xmm0 + pxor %xmm0,%xmm3 + psllq $1,%xmm0 + pxor %xmm3,%xmm0 + psllq $57,%xmm0 + movdqa %xmm0,%xmm3 + pslldq $8,%xmm0 + psrldq $8,%xmm3 pxor %xmm4,%xmm0 - pxor %xmm1,%xmm4 + pxor %xmm3,%xmm1 + + + movdqa %xmm0,%xmm4 psrlq $1,%xmm0 + pxor %xmm4,%xmm1 + pxor %xmm0,%xmm4 + psrlq $5,%xmm0 pxor %xmm4,%xmm0 - movdqu %xmm2,(%rdi) - movdqu %xmm0,16(%rdi) + psrlq $1,%xmm0 + pxor %xmm1,%xmm0 + pshufd $78,%xmm5,%xmm3 + pshufd $78,%xmm0,%xmm4 + pxor %xmm5,%xmm3 + movdqu %xmm5,48(%rdi) + pxor %xmm0,%xmm4 + movdqu %xmm0,64(%rdi) +.byte 102,15,58,15,227,8 + movdqu %xmm4,80(%rdi) .byte 0xf3,0xc3 .size gcm_init_clmul,.-gcm_init_clmul .globl gcm_gmult_clmul .type gcm_gmult_clmul,@function .align 16 gcm_gmult_clmul: +.L_gmult_clmul: movdqu (%rdi),%xmm0 movdqa .Lbswap_mask(%rip),%xmm5 movdqu (%rsi),%xmm2 + movdqu 32(%rsi),%xmm4 .byte 102,15,56,0,197 movdqa %xmm0,%xmm1 pshufd $78,%xmm0,%xmm3 - pshufd $78,%xmm2,%xmm4 pxor %xmm0,%xmm3 - pxor %xmm2,%xmm4 .byte 102,15,58,68,194,0 .byte 102,15,58,68,202,17 .byte 102,15,58,68,220,0 @@ -745,201 +836,379 @@ gcm_gmult_clmul: pxor %xmm3,%xmm1 pxor %xmm4,%xmm0 + movdqa %xmm0,%xmm4 movdqa %xmm0,%xmm3 - psllq $1,%xmm0 - pxor %xmm3,%xmm0 psllq $5,%xmm0 + pxor %xmm0,%xmm3 + psllq $1,%xmm0 pxor %xmm3,%xmm0 psllq $57,%xmm0 - movdqa %xmm0,%xmm4 + movdqa %xmm0,%xmm3 pslldq $8,%xmm0 - psrldq $8,%xmm4 - pxor %xmm3,%xmm0 - pxor %xmm4,%xmm1 + psrldq $8,%xmm3 + pxor %xmm4,%xmm0 + pxor %xmm3,%xmm1 movdqa %xmm0,%xmm4 - psrlq $5,%xmm0 - pxor %xmm4,%xmm0 psrlq $1,%xmm0 + pxor %xmm4,%xmm1 + pxor %xmm0,%xmm4 + psrlq $5,%xmm0 pxor %xmm4,%xmm0 - pxor %xmm1,%xmm4 psrlq $1,%xmm0 - pxor %xmm4,%xmm0 + pxor %xmm1,%xmm0 .byte 102,15,56,0,197 movdqu %xmm0,(%rdi) .byte 0xf3,0xc3 .size gcm_gmult_clmul,.-gcm_gmult_clmul .globl gcm_ghash_clmul .type gcm_ghash_clmul,@function -.align 16 +.align 32 gcm_ghash_clmul: - movdqa .Lbswap_mask(%rip),%xmm5 +.L_ghash_clmul: + movdqa .Lbswap_mask(%rip),%xmm10 movdqu (%rdi),%xmm0 movdqu (%rsi),%xmm2 -.byte 102,15,56,0,197 + movdqu 32(%rsi),%xmm7 +.byte 102,65,15,56,0,194 subq $16,%rcx jz .Lodd_tail - movdqu 16(%rsi),%xmm8 + movdqu 16(%rsi),%xmm6 + movl OPENSSL_ia32cap_P+4(%rip),%eax + cmpq $48,%rcx + jb .Lskip4x + andl $71303168,%eax + cmpl $4194304,%eax + je .Lskip4x + subq $48,%rcx + movq $11547335547999543296,%rax + movdqu 48(%rsi),%xmm14 + movdqu 64(%rsi),%xmm15 - movdqu (%rdx),%xmm3 - movdqu 16(%rdx),%xmm6 -.byte 102,15,56,0,221 -.byte 102,15,56,0,245 - pxor %xmm3,%xmm0 - movdqa %xmm6,%xmm7 - pshufd $78,%xmm6,%xmm3 - pshufd $78,%xmm2,%xmm4 - pxor %xmm6,%xmm3 - pxor %xmm2,%xmm4 -.byte 102,15,58,68,242,0 -.byte 102,15,58,68,250,17 -.byte 102,15,58,68,220,0 - pxor %xmm6,%xmm3 - pxor %xmm7,%xmm3 - movdqa %xmm3,%xmm4 - psrldq $8,%xmm3 - pslldq $8,%xmm4 - pxor %xmm3,%xmm7 - pxor %xmm4,%xmm6 + movdqu 48(%rdx),%xmm3 + movdqu 32(%rdx),%xmm11 +.byte 102,65,15,56,0,218 +.byte 102,69,15,56,0,218 + movdqa %xmm3,%xmm5 + pshufd $78,%xmm3,%xmm4 + pxor %xmm3,%xmm4 +.byte 102,15,58,68,218,0 +.byte 102,15,58,68,234,17 +.byte 102,15,58,68,231,0 + + movdqa %xmm11,%xmm13 + pshufd $78,%xmm11,%xmm12 + pxor %xmm11,%xmm12 +.byte 102,68,15,58,68,222,0 +.byte 102,68,15,58,68,238,17 +.byte 102,68,15,58,68,231,16 + xorps %xmm11,%xmm3 + xorps %xmm13,%xmm5 + movups 80(%rsi),%xmm7 + xorps %xmm12,%xmm4 + + movdqu 16(%rdx),%xmm11 + movdqu 0(%rdx),%xmm8 +.byte 102,69,15,56,0,218 +.byte 102,69,15,56,0,194 + movdqa %xmm11,%xmm13 + pshufd $78,%xmm11,%xmm12 + pxor %xmm8,%xmm0 + pxor %xmm11,%xmm12 +.byte 102,69,15,58,68,222,0 movdqa %xmm0,%xmm1 - pshufd $78,%xmm0,%xmm3 - pshufd $78,%xmm8,%xmm4 - pxor %xmm0,%xmm3 - pxor %xmm8,%xmm4 + pshufd $78,%xmm0,%xmm8 + pxor %xmm0,%xmm8 +.byte 102,69,15,58,68,238,17 +.byte 102,68,15,58,68,231,0 + xorps %xmm11,%xmm3 + xorps %xmm13,%xmm5 - leaq 32(%rdx),%rdx - subq $32,%rcx - jbe .Leven_tail + leaq 64(%rdx),%rdx + subq $64,%rcx + jc .Ltail4x -.Lmod_loop: -.byte 102,65,15,58,68,192,0 -.byte 102,65,15,58,68,200,17 -.byte 102,15,58,68,220,0 - pxor %xmm0,%xmm3 - pxor %xmm1,%xmm3 + jmp .Lmod4_loop +.align 32 +.Lmod4_loop: +.byte 102,65,15,58,68,199,0 + xorps %xmm12,%xmm4 + movdqu 48(%rdx),%xmm11 +.byte 102,69,15,56,0,218 +.byte 102,65,15,58,68,207,17 + xorps %xmm3,%xmm0 + movdqu 32(%rdx),%xmm3 + movdqa %xmm11,%xmm13 +.byte 102,68,15,58,68,199,16 + pshufd $78,%xmm11,%xmm12 + xorps %xmm5,%xmm1 + pxor %xmm11,%xmm12 +.byte 102,65,15,56,0,218 + movups 32(%rsi),%xmm7 + xorps %xmm4,%xmm8 +.byte 102,68,15,58,68,218,0 + pshufd $78,%xmm3,%xmm4 - movdqa %xmm3,%xmm4 - psrldq $8,%xmm3 - pslldq $8,%xmm4 - pxor %xmm3,%xmm1 - pxor %xmm4,%xmm0 - movdqu (%rdx),%xmm3 - pxor %xmm6,%xmm0 - pxor %xmm7,%xmm1 - - movdqu 16(%rdx),%xmm6 -.byte 102,15,56,0,221 -.byte 102,15,56,0,245 - - movdqa %xmm6,%xmm7 - pshufd $78,%xmm6,%xmm9 - pshufd $78,%xmm2,%xmm10 - pxor %xmm6,%xmm9 - pxor %xmm2,%xmm10 - pxor %xmm3,%xmm1 + pxor %xmm0,%xmm8 + movdqa %xmm3,%xmm5 + pxor %xmm1,%xmm8 + pxor %xmm3,%xmm4 + movdqa %xmm8,%xmm9 +.byte 102,68,15,58,68,234,17 + pslldq $8,%xmm8 + psrldq $8,%xmm9 + pxor %xmm8,%xmm0 + movdqa .L7_mask(%rip),%xmm8 + pxor %xmm9,%xmm1 +.byte 102,76,15,110,200 + + pand %xmm0,%xmm8 +.byte 102,69,15,56,0,200 + pxor %xmm0,%xmm9 +.byte 102,68,15,58,68,231,0 + psllq $57,%xmm9 + movdqa %xmm9,%xmm8 + pslldq $8,%xmm9 +.byte 102,15,58,68,222,0 + psrldq $8,%xmm8 + pxor %xmm9,%xmm0 + pxor %xmm8,%xmm1 + movdqu 0(%rdx),%xmm8 + + movdqa %xmm0,%xmm9 + psrlq $1,%xmm0 +.byte 102,15,58,68,238,17 + xorps %xmm11,%xmm3 + movdqu 16(%rdx),%xmm11 +.byte 102,69,15,56,0,218 +.byte 102,15,58,68,231,16 + xorps %xmm13,%xmm5 + movups 80(%rsi),%xmm7 +.byte 102,69,15,56,0,194 + pxor %xmm9,%xmm1 + pxor %xmm0,%xmm9 + psrlq $5,%xmm0 + movdqa %xmm11,%xmm13 + pxor %xmm12,%xmm4 + pshufd $78,%xmm11,%xmm12 + pxor %xmm9,%xmm0 + pxor %xmm8,%xmm1 + pxor %xmm11,%xmm12 +.byte 102,69,15,58,68,222,0 + psrlq $1,%xmm0 + pxor %xmm1,%xmm0 + movdqa %xmm0,%xmm1 +.byte 102,69,15,58,68,238,17 + xorps %xmm11,%xmm3 + pshufd $78,%xmm0,%xmm8 + pxor %xmm0,%xmm8 + +.byte 102,68,15,58,68,231,0 + xorps %xmm13,%xmm5 + + leaq 64(%rdx),%rdx + subq $64,%rcx + jnc .Lmod4_loop + +.Ltail4x: +.byte 102,65,15,58,68,199,0 +.byte 102,65,15,58,68,207,17 +.byte 102,68,15,58,68,199,16 + xorps %xmm12,%xmm4 + xorps %xmm3,%xmm0 + xorps %xmm5,%xmm1 + pxor %xmm0,%xmm1 + pxor %xmm4,%xmm8 + + pxor %xmm1,%xmm8 + pxor %xmm0,%xmm1 + + movdqa %xmm8,%xmm9 + psrldq $8,%xmm8 + pslldq $8,%xmm9 + pxor %xmm8,%xmm1 + pxor %xmm9,%xmm0 + + movdqa %xmm0,%xmm4 movdqa %xmm0,%xmm3 - psllq $1,%xmm0 - pxor %xmm3,%xmm0 psllq $5,%xmm0 + pxor %xmm0,%xmm3 + psllq $1,%xmm0 pxor %xmm3,%xmm0 -.byte 102,15,58,68,242,0 psllq $57,%xmm0 - movdqa %xmm0,%xmm4 + movdqa %xmm0,%xmm3 pslldq $8,%xmm0 - psrldq $8,%xmm4 - pxor %xmm3,%xmm0 - pxor %xmm4,%xmm1 + psrldq $8,%xmm3 + pxor %xmm4,%xmm0 + pxor %xmm3,%xmm1 + -.byte 102,15,58,68,250,17 movdqa %xmm0,%xmm4 - psrlq $5,%xmm0 - pxor %xmm4,%xmm0 psrlq $1,%xmm0 + pxor %xmm4,%xmm1 + pxor %xmm0,%xmm4 + psrlq $5,%xmm0 pxor %xmm4,%xmm0 - pxor %xmm1,%xmm4 psrlq $1,%xmm0 - pxor %xmm4,%xmm0 + pxor %xmm1,%xmm0 + addq $64,%rcx + jz .Ldone + movdqu 32(%rsi),%xmm7 + subq $16,%rcx + jz .Lodd_tail +.Lskip4x: + + + + + + movdqu (%rdx),%xmm8 + movdqu 16(%rdx),%xmm3 +.byte 102,69,15,56,0,194 +.byte 102,65,15,56,0,218 + pxor %xmm8,%xmm0 + + movdqa %xmm3,%xmm5 + pshufd $78,%xmm3,%xmm4 + pxor %xmm3,%xmm4 +.byte 102,15,58,68,218,0 +.byte 102,15,58,68,234,17 +.byte 102,15,58,68,231,0 + + leaq 32(%rdx),%rdx + nop + subq $32,%rcx + jbe .Leven_tail + nop + jmp .Lmod_loop -.byte 102,69,15,58,68,202,0 +.align 32 +.Lmod_loop: movdqa %xmm0,%xmm1 - pshufd $78,%xmm0,%xmm3 - pshufd $78,%xmm8,%xmm4 - pxor %xmm0,%xmm3 + movdqa %xmm4,%xmm8 + pshufd $78,%xmm0,%xmm4 + pxor %xmm0,%xmm4 + +.byte 102,15,58,68,198,0 +.byte 102,15,58,68,206,17 +.byte 102,15,58,68,231,16 + + pxor %xmm3,%xmm0 + pxor %xmm5,%xmm1 + movdqu (%rdx),%xmm9 + pxor %xmm0,%xmm8 +.byte 102,69,15,56,0,202 + movdqu 16(%rdx),%xmm3 + + pxor %xmm1,%xmm8 + pxor %xmm9,%xmm1 pxor %xmm8,%xmm4 +.byte 102,65,15,56,0,218 + movdqa %xmm4,%xmm8 + psrldq $8,%xmm8 + pslldq $8,%xmm4 + pxor %xmm8,%xmm1 + pxor %xmm4,%xmm0 - pxor %xmm6,%xmm9 - pxor %xmm7,%xmm9 - movdqa %xmm9,%xmm10 - psrldq $8,%xmm9 - pslldq $8,%xmm10 - pxor %xmm9,%xmm7 - pxor %xmm10,%xmm6 + movdqa %xmm3,%xmm5 + movdqa %xmm0,%xmm9 + movdqa %xmm0,%xmm8 + psllq $5,%xmm0 + pxor %xmm0,%xmm8 +.byte 102,15,58,68,218,0 + psllq $1,%xmm0 + pxor %xmm8,%xmm0 + psllq $57,%xmm0 + movdqa %xmm0,%xmm8 + pslldq $8,%xmm0 + psrldq $8,%xmm8 + pxor %xmm9,%xmm0 + pshufd $78,%xmm5,%xmm4 + pxor %xmm8,%xmm1 + pxor %xmm5,%xmm4 + + movdqa %xmm0,%xmm9 + psrlq $1,%xmm0 +.byte 102,15,58,68,234,17 + pxor %xmm9,%xmm1 + pxor %xmm0,%xmm9 + psrlq $5,%xmm0 + pxor %xmm9,%xmm0 leaq 32(%rdx),%rdx + psrlq $1,%xmm0 +.byte 102,15,58,68,231,0 + pxor %xmm1,%xmm0 + subq $32,%rcx ja .Lmod_loop .Leven_tail: -.byte 102,65,15,58,68,192,0 -.byte 102,65,15,58,68,200,17 -.byte 102,15,58,68,220,0 - pxor %xmm0,%xmm3 - pxor %xmm1,%xmm3 + movdqa %xmm0,%xmm1 + movdqa %xmm4,%xmm8 + pshufd $78,%xmm0,%xmm4 + pxor %xmm0,%xmm4 - movdqa %xmm3,%xmm4 - psrldq $8,%xmm3 +.byte 102,15,58,68,198,0 +.byte 102,15,58,68,206,17 +.byte 102,15,58,68,231,16 + + pxor %xmm3,%xmm0 + pxor %xmm5,%xmm1 + pxor %xmm0,%xmm8 + pxor %xmm1,%xmm8 + pxor %xmm8,%xmm4 + movdqa %xmm4,%xmm8 + psrldq $8,%xmm8 pslldq $8,%xmm4 - pxor %xmm3,%xmm1 + pxor %xmm8,%xmm1 pxor %xmm4,%xmm0 - pxor %xmm6,%xmm0 - pxor %xmm7,%xmm1 + movdqa %xmm0,%xmm4 movdqa %xmm0,%xmm3 - psllq $1,%xmm0 - pxor %xmm3,%xmm0 psllq $5,%xmm0 + pxor %xmm0,%xmm3 + psllq $1,%xmm0 pxor %xmm3,%xmm0 psllq $57,%xmm0 - movdqa %xmm0,%xmm4 + movdqa %xmm0,%xmm3 pslldq $8,%xmm0 - psrldq $8,%xmm4 - pxor %xmm3,%xmm0 - pxor %xmm4,%xmm1 + psrldq $8,%xmm3 + pxor %xmm4,%xmm0 + pxor %xmm3,%xmm1 movdqa %xmm0,%xmm4 - psrlq $5,%xmm0 - pxor %xmm4,%xmm0 psrlq $1,%xmm0 + pxor %xmm4,%xmm1 + pxor %xmm0,%xmm4 + psrlq $5,%xmm0 pxor %xmm4,%xmm0 - pxor %xmm1,%xmm4 psrlq $1,%xmm0 - pxor %xmm4,%xmm0 + pxor %xmm1,%xmm0 testq %rcx,%rcx jnz .Ldone .Lodd_tail: - movdqu (%rdx),%xmm3 -.byte 102,15,56,0,221 - pxor %xmm3,%xmm0 + movdqu (%rdx),%xmm8 +.byte 102,69,15,56,0,194 + pxor %xmm8,%xmm0 movdqa %xmm0,%xmm1 pshufd $78,%xmm0,%xmm3 - pshufd $78,%xmm2,%xmm4 pxor %xmm0,%xmm3 - pxor %xmm2,%xmm4 .byte 102,15,58,68,194,0 .byte 102,15,58,68,202,17 -.byte 102,15,58,68,220,0 +.byte 102,15,58,68,223,0 pxor %xmm0,%xmm3 pxor %xmm1,%xmm3 @@ -949,38 +1218,531 @@ gcm_ghash_clmul: pxor %xmm3,%xmm1 pxor %xmm4,%xmm0 + movdqa %xmm0,%xmm4 movdqa %xmm0,%xmm3 - psllq $1,%xmm0 - pxor %xmm3,%xmm0 psllq $5,%xmm0 + pxor %xmm0,%xmm3 + psllq $1,%xmm0 pxor %xmm3,%xmm0 psllq $57,%xmm0 - movdqa %xmm0,%xmm4 + movdqa %xmm0,%xmm3 pslldq $8,%xmm0 - psrldq $8,%xmm4 - pxor %xmm3,%xmm0 - pxor %xmm4,%xmm1 + psrldq $8,%xmm3 + pxor %xmm4,%xmm0 + pxor %xmm3,%xmm1 movdqa %xmm0,%xmm4 - psrlq $5,%xmm0 - pxor %xmm4,%xmm0 psrlq $1,%xmm0 + pxor %xmm4,%xmm1 + pxor %xmm0,%xmm4 + psrlq $5,%xmm0 pxor %xmm4,%xmm0 - pxor %xmm1,%xmm4 psrlq $1,%xmm0 - pxor %xmm4,%xmm0 + pxor %xmm1,%xmm0 .Ldone: -.byte 102,15,56,0,197 +.byte 102,65,15,56,0,194 movdqu %xmm0,(%rdi) .byte 0xf3,0xc3 -.LSEH_end_gcm_ghash_clmul: .size gcm_ghash_clmul,.-gcm_ghash_clmul +.globl gcm_init_avx +.type gcm_init_avx,@function +.align 32 +gcm_init_avx: + vzeroupper + + vmovdqu (%rsi),%xmm2 + vpshufd $78,%xmm2,%xmm2 + + + vpshufd $255,%xmm2,%xmm4 + vpsrlq $63,%xmm2,%xmm3 + vpsllq $1,%xmm2,%xmm2 + vpxor %xmm5,%xmm5,%xmm5 + vpcmpgtd %xmm4,%xmm5,%xmm5 + vpslldq $8,%xmm3,%xmm3 + vpor %xmm3,%xmm2,%xmm2 + + + vpand .L0x1c2_polynomial(%rip),%xmm5,%xmm5 + vpxor %xmm5,%xmm2,%xmm2 + + vpunpckhqdq %xmm2,%xmm2,%xmm6 + vmovdqa %xmm2,%xmm0 + vpxor %xmm2,%xmm6,%xmm6 + movq $4,%r10 + jmp .Linit_start_avx +.align 32 +.Linit_loop_avx: + vpalignr $8,%xmm3,%xmm4,%xmm5 + vmovdqu %xmm5,-16(%rdi) + vpunpckhqdq %xmm0,%xmm0,%xmm3 + vpxor %xmm0,%xmm3,%xmm3 + vpclmulqdq $17,%xmm2,%xmm0,%xmm1 + vpclmulqdq $0,%xmm2,%xmm0,%xmm0 + vpclmulqdq $0,%xmm6,%xmm3,%xmm3 + vpxor %xmm0,%xmm1,%xmm4 + vpxor %xmm4,%xmm3,%xmm3 + + vpslldq $8,%xmm3,%xmm4 + vpsrldq $8,%xmm3,%xmm3 + vpxor %xmm4,%xmm0,%xmm0 + vpxor %xmm3,%xmm1,%xmm1 + vpsllq $57,%xmm0,%xmm3 + vpsllq $62,%xmm0,%xmm4 + vpxor %xmm3,%xmm4,%xmm4 + vpsllq $63,%xmm0,%xmm3 + vpxor %xmm3,%xmm4,%xmm4 + vpslldq $8,%xmm4,%xmm3 + vpsrldq $8,%xmm4,%xmm4 + vpxor %xmm3,%xmm0,%xmm0 + vpxor %xmm4,%xmm1,%xmm1 + + vpsrlq $1,%xmm0,%xmm4 + vpxor %xmm0,%xmm1,%xmm1 + vpxor %xmm4,%xmm0,%xmm0 + vpsrlq $5,%xmm4,%xmm4 + vpxor %xmm4,%xmm0,%xmm0 + vpsrlq $1,%xmm0,%xmm0 + vpxor %xmm1,%xmm0,%xmm0 +.Linit_start_avx: + vmovdqa %xmm0,%xmm5 + vpunpckhqdq %xmm0,%xmm0,%xmm3 + vpxor %xmm0,%xmm3,%xmm3 + vpclmulqdq $17,%xmm2,%xmm0,%xmm1 + vpclmulqdq $0,%xmm2,%xmm0,%xmm0 + vpclmulqdq $0,%xmm6,%xmm3,%xmm3 + vpxor %xmm0,%xmm1,%xmm4 + vpxor %xmm4,%xmm3,%xmm3 + + vpslldq $8,%xmm3,%xmm4 + vpsrldq $8,%xmm3,%xmm3 + vpxor %xmm4,%xmm0,%xmm0 + vpxor %xmm3,%xmm1,%xmm1 + vpsllq $57,%xmm0,%xmm3 + vpsllq $62,%xmm0,%xmm4 + vpxor %xmm3,%xmm4,%xmm4 + vpsllq $63,%xmm0,%xmm3 + vpxor %xmm3,%xmm4,%xmm4 + vpslldq $8,%xmm4,%xmm3 + vpsrldq $8,%xmm4,%xmm4 + vpxor %xmm3,%xmm0,%xmm0 + vpxor %xmm4,%xmm1,%xmm1 + + vpsrlq $1,%xmm0,%xmm4 + vpxor %xmm0,%xmm1,%xmm1 + vpxor %xmm4,%xmm0,%xmm0 + vpsrlq $5,%xmm4,%xmm4 + vpxor %xmm4,%xmm0,%xmm0 + vpsrlq $1,%xmm0,%xmm0 + vpxor %xmm1,%xmm0,%xmm0 + vpshufd $78,%xmm5,%xmm3 + vpshufd $78,%xmm0,%xmm4 + vpxor %xmm5,%xmm3,%xmm3 + vmovdqu %xmm5,0(%rdi) + vpxor %xmm0,%xmm4,%xmm4 + vmovdqu %xmm0,16(%rdi) + leaq 48(%rdi),%rdi + subq $1,%r10 + jnz .Linit_loop_avx + + vpalignr $8,%xmm4,%xmm3,%xmm5 + vmovdqu %xmm5,-16(%rdi) + + vzeroupper + .byte 0xf3,0xc3 +.size gcm_init_avx,.-gcm_init_avx +.globl gcm_gmult_avx +.type gcm_gmult_avx,@function +.align 32 +gcm_gmult_avx: + jmp .L_gmult_clmul +.size gcm_gmult_avx,.-gcm_gmult_avx +.globl gcm_ghash_avx +.type gcm_ghash_avx,@function +.align 32 +gcm_ghash_avx: + vzeroupper + + vmovdqu (%rdi),%xmm10 + leaq .L0x1c2_polynomial(%rip),%r10 + leaq 64(%rsi),%rsi + vmovdqu .Lbswap_mask(%rip),%xmm13 + vpshufb %xmm13,%xmm10,%xmm10 + cmpq $128,%rcx + jb .Lshort_avx + subq $128,%rcx + + vmovdqu 112(%rdx),%xmm14 + vmovdqu 0-64(%rsi),%xmm6 + vpshufb %xmm13,%xmm14,%xmm14 + vmovdqu 32-64(%rsi),%xmm7 + + vpunpckhqdq %xmm14,%xmm14,%xmm9 + vmovdqu 96(%rdx),%xmm15 + vpclmulqdq $0,%xmm6,%xmm14,%xmm0 + vpxor %xmm14,%xmm9,%xmm9 + vpshufb %xmm13,%xmm15,%xmm15 + vpclmulqdq $17,%xmm6,%xmm14,%xmm1 + vmovdqu 16-64(%rsi),%xmm6 + vpunpckhqdq %xmm15,%xmm15,%xmm8 + vmovdqu 80(%rdx),%xmm14 + vpclmulqdq $0,%xmm7,%xmm9,%xmm2 + vpxor %xmm15,%xmm8,%xmm8 + + vpshufb %xmm13,%xmm14,%xmm14 + vpclmulqdq $0,%xmm6,%xmm15,%xmm3 + vpunpckhqdq %xmm14,%xmm14,%xmm9 + vpclmulqdq $17,%xmm6,%xmm15,%xmm4 + vmovdqu 48-64(%rsi),%xmm6 + vpxor %xmm14,%xmm9,%xmm9 + vmovdqu 64(%rdx),%xmm15 + vpclmulqdq $16,%xmm7,%xmm8,%xmm5 + vmovdqu 80-64(%rsi),%xmm7 + + vpshufb %xmm13,%xmm15,%xmm15 + vpxor %xmm0,%xmm3,%xmm3 + vpclmulqdq $0,%xmm6,%xmm14,%xmm0 + vpxor %xmm1,%xmm4,%xmm4 + vpunpckhqdq %xmm15,%xmm15,%xmm8 + vpclmulqdq $17,%xmm6,%xmm14,%xmm1 + vmovdqu 64-64(%rsi),%xmm6 + vpxor %xmm2,%xmm5,%xmm5 + vpclmulqdq $0,%xmm7,%xmm9,%xmm2 + vpxor %xmm15,%xmm8,%xmm8 + + vmovdqu 48(%rdx),%xmm14 + vpxor %xmm3,%xmm0,%xmm0 + vpclmulqdq $0,%xmm6,%xmm15,%xmm3 + vpxor %xmm4,%xmm1,%xmm1 + vpshufb %xmm13,%xmm14,%xmm14 + vpclmulqdq $17,%xmm6,%xmm15,%xmm4 + vmovdqu 96-64(%rsi),%xmm6 + vpxor %xmm5,%xmm2,%xmm2 + vpunpckhqdq %xmm14,%xmm14,%xmm9 + vpclmulqdq $16,%xmm7,%xmm8,%xmm5 + vmovdqu 128-64(%rsi),%xmm7 + vpxor %xmm14,%xmm9,%xmm9 + + vmovdqu 32(%rdx),%xmm15 + vpxor %xmm0,%xmm3,%xmm3 + vpclmulqdq $0,%xmm6,%xmm14,%xmm0 + vpxor %xmm1,%xmm4,%xmm4 + vpshufb %xmm13,%xmm15,%xmm15 + vpclmulqdq $17,%xmm6,%xmm14,%xmm1 + vmovdqu 112-64(%rsi),%xmm6 + vpxor %xmm2,%xmm5,%xmm5 + vpunpckhqdq %xmm15,%xmm15,%xmm8 + vpclmulqdq $0,%xmm7,%xmm9,%xmm2 + vpxor %xmm15,%xmm8,%xmm8 + + vmovdqu 16(%rdx),%xmm14 + vpxor %xmm3,%xmm0,%xmm0 + vpclmulqdq $0,%xmm6,%xmm15,%xmm3 + vpxor %xmm4,%xmm1,%xmm1 + vpshufb %xmm13,%xmm14,%xmm14 + vpclmulqdq $17,%xmm6,%xmm15,%xmm4 + vmovdqu 144-64(%rsi),%xmm6 + vpxor %xmm5,%xmm2,%xmm2 + vpunpckhqdq %xmm14,%xmm14,%xmm9 + vpclmulqdq $16,%xmm7,%xmm8,%xmm5 + vmovdqu 176-64(%rsi),%xmm7 + vpxor %xmm14,%xmm9,%xmm9 + + vmovdqu (%rdx),%xmm15 + vpxor %xmm0,%xmm3,%xmm3 + vpclmulqdq $0,%xmm6,%xmm14,%xmm0 + vpxor %xmm1,%xmm4,%xmm4 + vpshufb %xmm13,%xmm15,%xmm15 + vpclmulqdq $17,%xmm6,%xmm14,%xmm1 + vmovdqu 160-64(%rsi),%xmm6 + vpxor %xmm2,%xmm5,%xmm5 + vpclmulqdq $16,%xmm7,%xmm9,%xmm2 + + leaq 128(%rdx),%rdx + cmpq $128,%rcx + jb .Ltail_avx + + vpxor %xmm10,%xmm15,%xmm15 + subq $128,%rcx + jmp .Loop8x_avx + +.align 32 +.Loop8x_avx: + vpunpckhqdq %xmm15,%xmm15,%xmm8 + vmovdqu 112(%rdx),%xmm14 + vpxor %xmm0,%xmm3,%xmm3 + vpxor %xmm15,%xmm8,%xmm8 + vpclmulqdq $0,%xmm6,%xmm15,%xmm10 + vpshufb %xmm13,%xmm14,%xmm14 + vpxor %xmm1,%xmm4,%xmm4 + vpclmulqdq $17,%xmm6,%xmm15,%xmm11 + vmovdqu 0-64(%rsi),%xmm6 + vpunpckhqdq %xmm14,%xmm14,%xmm9 + vpxor %xmm2,%xmm5,%xmm5 + vpclmulqdq $0,%xmm7,%xmm8,%xmm12 + vmovdqu 32-64(%rsi),%xmm7 + vpxor %xmm14,%xmm9,%xmm9 + + vmovdqu 96(%rdx),%xmm15 + vpclmulqdq $0,%xmm6,%xmm14,%xmm0 + vpxor %xmm3,%xmm10,%xmm10 + vpshufb %xmm13,%xmm15,%xmm15 + vpclmulqdq $17,%xmm6,%xmm14,%xmm1 + vxorps %xmm4,%xmm11,%xmm11 + vmovdqu 16-64(%rsi),%xmm6 + vpunpckhqdq %xmm15,%xmm15,%xmm8 + vpclmulqdq $0,%xmm7,%xmm9,%xmm2 + vpxor %xmm5,%xmm12,%xmm12 + vxorps %xmm15,%xmm8,%xmm8 + + vmovdqu 80(%rdx),%xmm14 + vpxor %xmm10,%xmm12,%xmm12 + vpclmulqdq $0,%xmm6,%xmm15,%xmm3 + vpxor %xmm11,%xmm12,%xmm12 + vpslldq $8,%xmm12,%xmm9 + vpxor %xmm0,%xmm3,%xmm3 + vpclmulqdq $17,%xmm6,%xmm15,%xmm4 + vpsrldq $8,%xmm12,%xmm12 + vpxor %xmm9,%xmm10,%xmm10 + vmovdqu 48-64(%rsi),%xmm6 + vpshufb %xmm13,%xmm14,%xmm14 + vxorps %xmm12,%xmm11,%xmm11 + vpxor %xmm1,%xmm4,%xmm4 + vpunpckhqdq %xmm14,%xmm14,%xmm9 + vpclmulqdq $16,%xmm7,%xmm8,%xmm5 + vmovdqu 80-64(%rsi),%xmm7 + vpxor %xmm14,%xmm9,%xmm9 + vpxor %xmm2,%xmm5,%xmm5 + + vmovdqu 64(%rdx),%xmm15 + vpalignr $8,%xmm10,%xmm10,%xmm12 + vpclmulqdq $0,%xmm6,%xmm14,%xmm0 + vpshufb %xmm13,%xmm15,%xmm15 + vpxor %xmm3,%xmm0,%xmm0 + vpclmulqdq $17,%xmm6,%xmm14,%xmm1 + vmovdqu 64-64(%rsi),%xmm6 + vpunpckhqdq %xmm15,%xmm15,%xmm8 + vpxor %xmm4,%xmm1,%xmm1 + vpclmulqdq $0,%xmm7,%xmm9,%xmm2 + vxorps %xmm15,%xmm8,%xmm8 + vpxor %xmm5,%xmm2,%xmm2 + + vmovdqu 48(%rdx),%xmm14 + vpclmulqdq $16,(%r10),%xmm10,%xmm10 + vpclmulqdq $0,%xmm6,%xmm15,%xmm3 + vpshufb %xmm13,%xmm14,%xmm14 + vpxor %xmm0,%xmm3,%xmm3 + vpclmulqdq $17,%xmm6,%xmm15,%xmm4 + vmovdqu 96-64(%rsi),%xmm6 + vpunpckhqdq %xmm14,%xmm14,%xmm9 + vpxor %xmm1,%xmm4,%xmm4 + vpclmulqdq $16,%xmm7,%xmm8,%xmm5 + vmovdqu 128-64(%rsi),%xmm7 + vpxor %xmm14,%xmm9,%xmm9 + vpxor %xmm2,%xmm5,%xmm5 + + vmovdqu 32(%rdx),%xmm15 + vpclmulqdq $0,%xmm6,%xmm14,%xmm0 + vpshufb %xmm13,%xmm15,%xmm15 + vpxor %xmm3,%xmm0,%xmm0 + vpclmulqdq $17,%xmm6,%xmm14,%xmm1 + vmovdqu 112-64(%rsi),%xmm6 + vpunpckhqdq %xmm15,%xmm15,%xmm8 + vpxor %xmm4,%xmm1,%xmm1 + vpclmulqdq $0,%xmm7,%xmm9,%xmm2 + vpxor %xmm15,%xmm8,%xmm8 + vpxor %xmm5,%xmm2,%xmm2 + vxorps %xmm12,%xmm10,%xmm10 + + vmovdqu 16(%rdx),%xmm14 + vpalignr $8,%xmm10,%xmm10,%xmm12 + vpclmulqdq $0,%xmm6,%xmm15,%xmm3 + vpshufb %xmm13,%xmm14,%xmm14 + vpxor %xmm0,%xmm3,%xmm3 + vpclmulqdq $17,%xmm6,%xmm15,%xmm4 + vmovdqu 144-64(%rsi),%xmm6 + vpclmulqdq $16,(%r10),%xmm10,%xmm10 + vxorps %xmm11,%xmm12,%xmm12 + vpunpckhqdq %xmm14,%xmm14,%xmm9 + vpxor %xmm1,%xmm4,%xmm4 + vpclmulqdq $16,%xmm7,%xmm8,%xmm5 + vmovdqu 176-64(%rsi),%xmm7 + vpxor %xmm14,%xmm9,%xmm9 + vpxor %xmm2,%xmm5,%xmm5 + + vmovdqu (%rdx),%xmm15 + vpclmulqdq $0,%xmm6,%xmm14,%xmm0 + vpshufb %xmm13,%xmm15,%xmm15 + vpclmulqdq $17,%xmm6,%xmm14,%xmm1 + vmovdqu 160-64(%rsi),%xmm6 + vpxor %xmm12,%xmm15,%xmm15 + vpclmulqdq $16,%xmm7,%xmm9,%xmm2 + vpxor %xmm10,%xmm15,%xmm15 + + leaq 128(%rdx),%rdx + subq $128,%rcx + jnc .Loop8x_avx + + addq $128,%rcx + jmp .Ltail_no_xor_avx + +.align 32 +.Lshort_avx: + vmovdqu -16(%rdx,%rcx,1),%xmm14 + leaq (%rdx,%rcx,1),%rdx + vmovdqu 0-64(%rsi),%xmm6 + vmovdqu 32-64(%rsi),%xmm7 + vpshufb %xmm13,%xmm14,%xmm15 + + vmovdqa %xmm0,%xmm3 + vmovdqa %xmm1,%xmm4 + vmovdqa %xmm2,%xmm5 + subq $16,%rcx + jz .Ltail_avx + + vpunpckhqdq %xmm15,%xmm15,%xmm8 + vpxor %xmm0,%xmm3,%xmm3 + vpclmulqdq $0,%xmm6,%xmm15,%xmm0 + vpxor %xmm15,%xmm8,%xmm8 + vmovdqu -32(%rdx),%xmm14 + vpxor %xmm1,%xmm4,%xmm4 + vpclmulqdq $17,%xmm6,%xmm15,%xmm1 + vmovdqu 16-64(%rsi),%xmm6 + vpshufb %xmm13,%xmm14,%xmm15 + vpxor %xmm2,%xmm5,%xmm5 + vpclmulqdq $0,%xmm7,%xmm8,%xmm2 + vpsrldq $8,%xmm7,%xmm7 + subq $16,%rcx + jz .Ltail_avx + + vpunpckhqdq %xmm15,%xmm15,%xmm8 + vpxor %xmm0,%xmm3,%xmm3 + vpclmulqdq $0,%xmm6,%xmm15,%xmm0 + vpxor %xmm15,%xmm8,%xmm8 + vmovdqu -48(%rdx),%xmm14 + vpxor %xmm1,%xmm4,%xmm4 + vpclmulqdq $17,%xmm6,%xmm15,%xmm1 + vmovdqu 48-64(%rsi),%xmm6 + vpshufb %xmm13,%xmm14,%xmm15 + vpxor %xmm2,%xmm5,%xmm5 + vpclmulqdq $0,%xmm7,%xmm8,%xmm2 + vmovdqu 80-64(%rsi),%xmm7 + subq $16,%rcx + jz .Ltail_avx + + vpunpckhqdq %xmm15,%xmm15,%xmm8 + vpxor %xmm0,%xmm3,%xmm3 + vpclmulqdq $0,%xmm6,%xmm15,%xmm0 + vpxor %xmm15,%xmm8,%xmm8 + vmovdqu -64(%rdx),%xmm14 + vpxor %xmm1,%xmm4,%xmm4 + vpclmulqdq $17,%xmm6,%xmm15,%xmm1 + vmovdqu 64-64(%rsi),%xmm6 + vpshufb %xmm13,%xmm14,%xmm15 + vpxor %xmm2,%xmm5,%xmm5 + vpclmulqdq $0,%xmm7,%xmm8,%xmm2 + vpsrldq $8,%xmm7,%xmm7 + subq $16,%rcx + jz .Ltail_avx + + vpunpckhqdq %xmm15,%xmm15,%xmm8 + vpxor %xmm0,%xmm3,%xmm3 + vpclmulqdq $0,%xmm6,%xmm15,%xmm0 + vpxor %xmm15,%xmm8,%xmm8 + vmovdqu -80(%rdx),%xmm14 + vpxor %xmm1,%xmm4,%xmm4 + vpclmulqdq $17,%xmm6,%xmm15,%xmm1 + vmovdqu 96-64(%rsi),%xmm6 + vpshufb %xmm13,%xmm14,%xmm15 + vpxor %xmm2,%xmm5,%xmm5 + vpclmulqdq $0,%xmm7,%xmm8,%xmm2 + vmovdqu 128-64(%rsi),%xmm7 + subq $16,%rcx + jz .Ltail_avx + + vpunpckhqdq %xmm15,%xmm15,%xmm8 + vpxor %xmm0,%xmm3,%xmm3 + vpclmulqdq $0,%xmm6,%xmm15,%xmm0 + vpxor %xmm15,%xmm8,%xmm8 + vmovdqu -96(%rdx),%xmm14 + vpxor %xmm1,%xmm4,%xmm4 + vpclmulqdq $17,%xmm6,%xmm15,%xmm1 + vmovdqu 112-64(%rsi),%xmm6 + vpshufb %xmm13,%xmm14,%xmm15 + vpxor %xmm2,%xmm5,%xmm5 + vpclmulqdq $0,%xmm7,%xmm8,%xmm2 + vpsrldq $8,%xmm7,%xmm7 + subq $16,%rcx + jz .Ltail_avx + + vpunpckhqdq %xmm15,%xmm15,%xmm8 + vpxor %xmm0,%xmm3,%xmm3 + vpclmulqdq $0,%xmm6,%xmm15,%xmm0 + vpxor %xmm15,%xmm8,%xmm8 + vmovdqu -112(%rdx),%xmm14 + vpxor %xmm1,%xmm4,%xmm4 + vpclmulqdq $17,%xmm6,%xmm15,%xmm1 + vmovdqu 144-64(%rsi),%xmm6 + vpshufb %xmm13,%xmm14,%xmm15 + vpxor %xmm2,%xmm5,%xmm5 + vpclmulqdq $0,%xmm7,%xmm8,%xmm2 + vmovq 184-64(%rsi),%xmm7 + subq $16,%rcx + jmp .Ltail_avx + +.align 32 +.Ltail_avx: + vpxor %xmm10,%xmm15,%xmm15 +.Ltail_no_xor_avx: + vpunpckhqdq %xmm15,%xmm15,%xmm8 + vpxor %xmm0,%xmm3,%xmm3 + vpclmulqdq $0,%xmm6,%xmm15,%xmm0 + vpxor %xmm15,%xmm8,%xmm8 + vpxor %xmm1,%xmm4,%xmm4 + vpclmulqdq $17,%xmm6,%xmm15,%xmm1 + vpxor %xmm2,%xmm5,%xmm5 + vpclmulqdq $0,%xmm7,%xmm8,%xmm2 + + vmovdqu (%r10),%xmm12 + + vpxor %xmm0,%xmm3,%xmm10 + vpxor %xmm1,%xmm4,%xmm11 + vpxor %xmm2,%xmm5,%xmm5 + + vpxor %xmm10,%xmm5,%xmm5 + vpxor %xmm11,%xmm5,%xmm5 + vpslldq $8,%xmm5,%xmm9 + vpsrldq $8,%xmm5,%xmm5 + vpxor %xmm9,%xmm10,%xmm10 + vpxor %xmm5,%xmm11,%xmm11 + + vpclmulqdq $16,%xmm12,%xmm10,%xmm9 + vpalignr $8,%xmm10,%xmm10,%xmm10 + vpxor %xmm9,%xmm10,%xmm10 + + vpclmulqdq $16,%xmm12,%xmm10,%xmm9 + vpalignr $8,%xmm10,%xmm10,%xmm10 + vpxor %xmm11,%xmm10,%xmm10 + vpxor %xmm9,%xmm10,%xmm10 + + cmpq $0,%rcx + jne .Lshort_avx + + vpshufb %xmm13,%xmm10,%xmm10 + vmovdqu %xmm10,(%rdi) + vzeroupper + .byte 0xf3,0xc3 +.size gcm_ghash_avx,.-gcm_ghash_avx .align 64 .Lbswap_mask: .byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0 .L0x1c2_polynomial: .byte 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xc2 +.L7_mask: +.long 7,0,7,0 +.L7_mask_poly: +.long 7,0,450,0 .align 64 .type .Lrem_4bit,@object .Lrem_4bit: diff --git a/deps/openssl/asm/x64-elf-gas/rc4/rc4-md5-x86_64.s b/deps/openssl/asm/x64-elf-gas/rc4/rc4-md5-x86_64.s index 501027a801124e..9c7110f4ef09c3 100644 --- a/deps/openssl/asm/x64-elf-gas/rc4/rc4-md5-x86_64.s +++ b/deps/openssl/asm/x64-elf-gas/rc4/rc4-md5-x86_64.s @@ -1,5 +1,4 @@ .text - .align 16 .globl rc4_md5_enc diff --git a/deps/openssl/asm/x64-elf-gas/rc4/rc4-x86_64.s b/deps/openssl/asm/x64-elf-gas/rc4/rc4-x86_64.s index f2b8a8bc04d630..d52224afcfa806 100644 --- a/deps/openssl/asm/x64-elf-gas/rc4/rc4-x86_64.s +++ b/deps/openssl/asm/x64-elf-gas/rc4/rc4-x86_64.s @@ -1,7 +1,6 @@ .text - .globl RC4 .type RC4,@function .align 16 @@ -48,7 +47,7 @@ RC4: orq %rsi,%rsi movl (%rdi,%rax,4),%edx movl (%rdi,%r10,4),%eax xorb (%r12),%dl - movb %dl,(%r13,%r12,1) + movb %dl,(%r12,%r13,1) leaq 1(%r12),%r12 decq %rbx jnz .Loop8_warmup @@ -127,7 +126,7 @@ RC4: orq %rsi,%rsi subq $8,%r11 xorq (%r12),%r8 - movq %r8,(%r13,%r12,1) + movq %r8,(%r12,%r13,1) leaq 8(%r12),%r12 testq $-8,%r11 @@ -153,7 +152,7 @@ RC4: orq %rsi,%rsi movl (%rdi,%rax,4),%edx movl (%rdi,%r10,4),%eax xorb (%r12),%dl - movb %dl,(%r13,%r12,1) + movb %dl,(%r12,%r13,1) leaq 1(%r12),%r12 decq %rbx jnz .Loop16_warmup @@ -190,7 +189,7 @@ RC4: orq %rsi,%rsi pxor %xmm1,%xmm2 addb %bl,%cl pinsrw $0,(%rdi,%rax,4),%xmm0 - movdqu %xmm2,(%r13,%r12,1) + movdqu %xmm2,(%r12,%r13,1) leaq 16(%r12),%r12 .Loop16_enter: movl (%rdi,%rcx,4),%edx @@ -326,7 +325,7 @@ RC4: orq %rsi,%rsi psllq $8,%xmm1 pxor %xmm0,%xmm2 pxor %xmm1,%xmm2 - movdqu %xmm2,(%r13,%r12,1) + movdqu %xmm2,(%r12,%r13,1) leaq 16(%r12),%r12 cmpq $0,%r11 @@ -344,7 +343,7 @@ RC4: orq %rsi,%rsi movl (%rdi,%rax,4),%edx movl (%rdi,%r10,4),%eax xorb (%r12),%dl - movb %dl,(%r13,%r12,1) + movb %dl,(%r12,%r13,1) leaq 1(%r12),%r12 decq %r11 jnz .Lloop1 @@ -370,7 +369,6 @@ RC4: orq %rsi,%rsi cmpq %rsi,%rcx movb %dl,(%rdi,%r10,1) jne .Lcmov0 - movq %rax,%rbx .Lcmov0: addb %al,%dl @@ -385,7 +383,6 @@ RC4: orq %rsi,%rsi cmpq %r10,%rcx movb %dl,(%rdi,%rsi,1) jne .Lcmov1 - movq %rbx,%rax .Lcmov1: addb %bl,%dl @@ -400,7 +397,6 @@ RC4: orq %rsi,%rsi cmpq %rsi,%rcx movb %dl,(%rdi,%r10,1) jne .Lcmov2 - movq %rax,%rbx .Lcmov2: addb %al,%dl @@ -415,7 +411,6 @@ RC4: orq %rsi,%rsi cmpq %r10,%rcx movb %dl,(%rdi,%rsi,1) jne .Lcmov3 - movq %rbx,%rax .Lcmov3: addb %bl,%dl @@ -430,7 +425,6 @@ RC4: orq %rsi,%rsi cmpq %rsi,%rcx movb %dl,(%rdi,%r10,1) jne .Lcmov4 - movq %rax,%rbx .Lcmov4: addb %al,%dl @@ -445,7 +439,6 @@ RC4: orq %rsi,%rsi cmpq %r10,%rcx movb %dl,(%rdi,%rsi,1) jne .Lcmov5 - movq %rbx,%rax .Lcmov5: addb %bl,%dl @@ -460,7 +453,6 @@ RC4: orq %rsi,%rsi cmpq %rsi,%rcx movb %dl,(%rdi,%r10,1) jne .Lcmov6 - movq %rax,%rbx .Lcmov6: addb %al,%dl @@ -475,7 +467,6 @@ RC4: orq %rsi,%rsi cmpq %r10,%rcx movb %dl,(%rdi,%rsi,1) jne .Lcmov7 - movq %rbx,%rax .Lcmov7: addb %bl,%dl diff --git a/deps/openssl/asm/x64-elf-gas/sha/sha1-mb-x86_64.s b/deps/openssl/asm/x64-elf-gas/sha/sha1-mb-x86_64.s new file mode 100644 index 00000000000000..8da489ea45f931 --- /dev/null +++ b/deps/openssl/asm/x64-elf-gas/sha/sha1-mb-x86_64.s @@ -0,0 +1,7221 @@ +.text + + + +.globl sha1_multi_block +.type sha1_multi_block,@function +.align 32 +sha1_multi_block: + movq OPENSSL_ia32cap_P+4(%rip),%rcx + btq $61,%rcx + jc _shaext_shortcut + testl $268435456,%ecx + jnz _avx_shortcut + movq %rsp,%rax + pushq %rbx + pushq %rbp + subq $288,%rsp + andq $-256,%rsp + movq %rax,272(%rsp) +.Lbody: + leaq K_XX_XX(%rip),%rbp + leaq 256(%rsp),%rbx + +.Loop_grande: + movl %edx,280(%rsp) + xorl %edx,%edx + movq 0(%rsi),%r8 + movl 8(%rsi),%ecx + cmpl %edx,%ecx + cmovgl %ecx,%edx + testl %ecx,%ecx + movl %ecx,0(%rbx) + cmovleq %rbp,%r8 + movq 16(%rsi),%r9 + movl 24(%rsi),%ecx + cmpl %edx,%ecx + cmovgl %ecx,%edx + testl %ecx,%ecx + movl %ecx,4(%rbx) + cmovleq %rbp,%r9 + movq 32(%rsi),%r10 + movl 40(%rsi),%ecx + cmpl %edx,%ecx + cmovgl %ecx,%edx + testl %ecx,%ecx + movl %ecx,8(%rbx) + cmovleq %rbp,%r10 + movq 48(%rsi),%r11 + movl 56(%rsi),%ecx + cmpl %edx,%ecx + cmovgl %ecx,%edx + testl %ecx,%ecx + movl %ecx,12(%rbx) + cmovleq %rbp,%r11 + testl %edx,%edx + jz .Ldone + + movdqu 0(%rdi),%xmm10 + leaq 128(%rsp),%rax + movdqu 32(%rdi),%xmm11 + movdqu 64(%rdi),%xmm12 + movdqu 96(%rdi),%xmm13 + movdqu 128(%rdi),%xmm14 + movdqa 96(%rbp),%xmm5 + movdqa -32(%rbp),%xmm15 + jmp .Loop + +.align 32 +.Loop: + movd (%r8),%xmm0 + leaq 64(%r8),%r8 + movd (%r9),%xmm2 + leaq 64(%r9),%r9 + movd (%r10),%xmm3 + leaq 64(%r10),%r10 + movd (%r11),%xmm4 + leaq 64(%r11),%r11 + punpckldq %xmm3,%xmm0 + movd -60(%r8),%xmm1 + punpckldq %xmm4,%xmm2 + movd -60(%r9),%xmm9 + punpckldq %xmm2,%xmm0 + movd -60(%r10),%xmm8 +.byte 102,15,56,0,197 + movd -60(%r11),%xmm7 + punpckldq %xmm8,%xmm1 + movdqa %xmm10,%xmm8 + paddd %xmm15,%xmm14 + punpckldq %xmm7,%xmm9 + movdqa %xmm11,%xmm7 + movdqa %xmm11,%xmm6 + pslld $5,%xmm8 + pandn %xmm13,%xmm7 + pand %xmm12,%xmm6 + punpckldq %xmm9,%xmm1 + movdqa %xmm10,%xmm9 + + movdqa %xmm0,0-128(%rax) + paddd %xmm0,%xmm14 + movd -56(%r8),%xmm2 + psrld $27,%xmm9 + pxor %xmm7,%xmm6 + movdqa %xmm11,%xmm7 + + por %xmm9,%xmm8 + movd -56(%r9),%xmm9 + pslld $30,%xmm7 + paddd %xmm6,%xmm14 + + psrld $2,%xmm11 + paddd %xmm8,%xmm14 +.byte 102,15,56,0,205 + movd -56(%r10),%xmm8 + por %xmm7,%xmm11 + movd -56(%r11),%xmm7 + punpckldq %xmm8,%xmm2 + movdqa %xmm14,%xmm8 + paddd %xmm15,%xmm13 + punpckldq %xmm7,%xmm9 + movdqa %xmm10,%xmm7 + movdqa %xmm10,%xmm6 + pslld $5,%xmm8 + pandn %xmm12,%xmm7 + pand %xmm11,%xmm6 + punpckldq %xmm9,%xmm2 + movdqa %xmm14,%xmm9 + + movdqa %xmm1,16-128(%rax) + paddd %xmm1,%xmm13 + movd -52(%r8),%xmm3 + psrld $27,%xmm9 + pxor %xmm7,%xmm6 + movdqa %xmm10,%xmm7 + + por %xmm9,%xmm8 + movd -52(%r9),%xmm9 + pslld $30,%xmm7 + paddd %xmm6,%xmm13 + + psrld $2,%xmm10 + paddd %xmm8,%xmm13 +.byte 102,15,56,0,213 + movd -52(%r10),%xmm8 + por %xmm7,%xmm10 + movd -52(%r11),%xmm7 + punpckldq %xmm8,%xmm3 + movdqa %xmm13,%xmm8 + paddd %xmm15,%xmm12 + punpckldq %xmm7,%xmm9 + movdqa %xmm14,%xmm7 + movdqa %xmm14,%xmm6 + pslld $5,%xmm8 + pandn %xmm11,%xmm7 + pand %xmm10,%xmm6 + punpckldq %xmm9,%xmm3 + movdqa %xmm13,%xmm9 + + movdqa %xmm2,32-128(%rax) + paddd %xmm2,%xmm12 + movd -48(%r8),%xmm4 + psrld $27,%xmm9 + pxor %xmm7,%xmm6 + movdqa %xmm14,%xmm7 + + por %xmm9,%xmm8 + movd -48(%r9),%xmm9 + pslld $30,%xmm7 + paddd %xmm6,%xmm12 + + psrld $2,%xmm14 + paddd %xmm8,%xmm12 +.byte 102,15,56,0,221 + movd -48(%r10),%xmm8 + por %xmm7,%xmm14 + movd -48(%r11),%xmm7 + punpckldq %xmm8,%xmm4 + movdqa %xmm12,%xmm8 + paddd %xmm15,%xmm11 + punpckldq %xmm7,%xmm9 + movdqa %xmm13,%xmm7 + movdqa %xmm13,%xmm6 + pslld $5,%xmm8 + pandn %xmm10,%xmm7 + pand %xmm14,%xmm6 + punpckldq %xmm9,%xmm4 + movdqa %xmm12,%xmm9 + + movdqa %xmm3,48-128(%rax) + paddd %xmm3,%xmm11 + movd -44(%r8),%xmm0 + psrld $27,%xmm9 + pxor %xmm7,%xmm6 + movdqa %xmm13,%xmm7 + + por %xmm9,%xmm8 + movd -44(%r9),%xmm9 + pslld $30,%xmm7 + paddd %xmm6,%xmm11 + + psrld $2,%xmm13 + paddd %xmm8,%xmm11 +.byte 102,15,56,0,229 + movd -44(%r10),%xmm8 + por %xmm7,%xmm13 + movd -44(%r11),%xmm7 + punpckldq %xmm8,%xmm0 + movdqa %xmm11,%xmm8 + paddd %xmm15,%xmm10 + punpckldq %xmm7,%xmm9 + movdqa %xmm12,%xmm7 + movdqa %xmm12,%xmm6 + pslld $5,%xmm8 + pandn %xmm14,%xmm7 + pand %xmm13,%xmm6 + punpckldq %xmm9,%xmm0 + movdqa %xmm11,%xmm9 + + movdqa %xmm4,64-128(%rax) + paddd %xmm4,%xmm10 + movd -40(%r8),%xmm1 + psrld $27,%xmm9 + pxor %xmm7,%xmm6 + movdqa %xmm12,%xmm7 + + por %xmm9,%xmm8 + movd -40(%r9),%xmm9 + pslld $30,%xmm7 + paddd %xmm6,%xmm10 + + psrld $2,%xmm12 + paddd %xmm8,%xmm10 +.byte 102,15,56,0,197 + movd -40(%r10),%xmm8 + por %xmm7,%xmm12 + movd -40(%r11),%xmm7 + punpckldq %xmm8,%xmm1 + movdqa %xmm10,%xmm8 + paddd %xmm15,%xmm14 + punpckldq %xmm7,%xmm9 + movdqa %xmm11,%xmm7 + movdqa %xmm11,%xmm6 + pslld $5,%xmm8 + pandn %xmm13,%xmm7 + pand %xmm12,%xmm6 + punpckldq %xmm9,%xmm1 + movdqa %xmm10,%xmm9 + + movdqa %xmm0,80-128(%rax) + paddd %xmm0,%xmm14 + movd -36(%r8),%xmm2 + psrld $27,%xmm9 + pxor %xmm7,%xmm6 + movdqa %xmm11,%xmm7 + + por %xmm9,%xmm8 + movd -36(%r9),%xmm9 + pslld $30,%xmm7 + paddd %xmm6,%xmm14 + + psrld $2,%xmm11 + paddd %xmm8,%xmm14 +.byte 102,15,56,0,205 + movd -36(%r10),%xmm8 + por %xmm7,%xmm11 + movd -36(%r11),%xmm7 + punpckldq %xmm8,%xmm2 + movdqa %xmm14,%xmm8 + paddd %xmm15,%xmm13 + punpckldq %xmm7,%xmm9 + movdqa %xmm10,%xmm7 + movdqa %xmm10,%xmm6 + pslld $5,%xmm8 + pandn %xmm12,%xmm7 + pand %xmm11,%xmm6 + punpckldq %xmm9,%xmm2 + movdqa %xmm14,%xmm9 + + movdqa %xmm1,96-128(%rax) + paddd %xmm1,%xmm13 + movd -32(%r8),%xmm3 + psrld $27,%xmm9 + pxor %xmm7,%xmm6 + movdqa %xmm10,%xmm7 + + por %xmm9,%xmm8 + movd -32(%r9),%xmm9 + pslld $30,%xmm7 + paddd %xmm6,%xmm13 + + psrld $2,%xmm10 + paddd %xmm8,%xmm13 +.byte 102,15,56,0,213 + movd -32(%r10),%xmm8 + por %xmm7,%xmm10 + movd -32(%r11),%xmm7 + punpckldq %xmm8,%xmm3 + movdqa %xmm13,%xmm8 + paddd %xmm15,%xmm12 + punpckldq %xmm7,%xmm9 + movdqa %xmm14,%xmm7 + movdqa %xmm14,%xmm6 + pslld $5,%xmm8 + pandn %xmm11,%xmm7 + pand %xmm10,%xmm6 + punpckldq %xmm9,%xmm3 + movdqa %xmm13,%xmm9 + + movdqa %xmm2,112-128(%rax) + paddd %xmm2,%xmm12 + movd -28(%r8),%xmm4 + psrld $27,%xmm9 + pxor %xmm7,%xmm6 + movdqa %xmm14,%xmm7 + + por %xmm9,%xmm8 + movd -28(%r9),%xmm9 + pslld $30,%xmm7 + paddd %xmm6,%xmm12 + + psrld $2,%xmm14 + paddd %xmm8,%xmm12 +.byte 102,15,56,0,221 + movd -28(%r10),%xmm8 + por %xmm7,%xmm14 + movd -28(%r11),%xmm7 + punpckldq %xmm8,%xmm4 + movdqa %xmm12,%xmm8 + paddd %xmm15,%xmm11 + punpckldq %xmm7,%xmm9 + movdqa %xmm13,%xmm7 + movdqa %xmm13,%xmm6 + pslld $5,%xmm8 + pandn %xmm10,%xmm7 + pand %xmm14,%xmm6 + punpckldq %xmm9,%xmm4 + movdqa %xmm12,%xmm9 + + movdqa %xmm3,128-128(%rax) + paddd %xmm3,%xmm11 + movd -24(%r8),%xmm0 + psrld $27,%xmm9 + pxor %xmm7,%xmm6 + movdqa %xmm13,%xmm7 + + por %xmm9,%xmm8 + movd -24(%r9),%xmm9 + pslld $30,%xmm7 + paddd %xmm6,%xmm11 + + psrld $2,%xmm13 + paddd %xmm8,%xmm11 +.byte 102,15,56,0,229 + movd -24(%r10),%xmm8 + por %xmm7,%xmm13 + movd -24(%r11),%xmm7 + punpckldq %xmm8,%xmm0 + movdqa %xmm11,%xmm8 + paddd %xmm15,%xmm10 + punpckldq %xmm7,%xmm9 + movdqa %xmm12,%xmm7 + movdqa %xmm12,%xmm6 + pslld $5,%xmm8 + pandn %xmm14,%xmm7 + pand %xmm13,%xmm6 + punpckldq %xmm9,%xmm0 + movdqa %xmm11,%xmm9 + + movdqa %xmm4,144-128(%rax) + paddd %xmm4,%xmm10 + movd -20(%r8),%xmm1 + psrld $27,%xmm9 + pxor %xmm7,%xmm6 + movdqa %xmm12,%xmm7 + + por %xmm9,%xmm8 + movd -20(%r9),%xmm9 + pslld $30,%xmm7 + paddd %xmm6,%xmm10 + + psrld $2,%xmm12 + paddd %xmm8,%xmm10 +.byte 102,15,56,0,197 + movd -20(%r10),%xmm8 + por %xmm7,%xmm12 + movd -20(%r11),%xmm7 + punpckldq %xmm8,%xmm1 + movdqa %xmm10,%xmm8 + paddd %xmm15,%xmm14 + punpckldq %xmm7,%xmm9 + movdqa %xmm11,%xmm7 + movdqa %xmm11,%xmm6 + pslld $5,%xmm8 + pandn %xmm13,%xmm7 + pand %xmm12,%xmm6 + punpckldq %xmm9,%xmm1 + movdqa %xmm10,%xmm9 + + movdqa %xmm0,160-128(%rax) + paddd %xmm0,%xmm14 + movd -16(%r8),%xmm2 + psrld $27,%xmm9 + pxor %xmm7,%xmm6 + movdqa %xmm11,%xmm7 + + por %xmm9,%xmm8 + movd -16(%r9),%xmm9 + pslld $30,%xmm7 + paddd %xmm6,%xmm14 + + psrld $2,%xmm11 + paddd %xmm8,%xmm14 +.byte 102,15,56,0,205 + movd -16(%r10),%xmm8 + por %xmm7,%xmm11 + movd -16(%r11),%xmm7 + punpckldq %xmm8,%xmm2 + movdqa %xmm14,%xmm8 + paddd %xmm15,%xmm13 + punpckldq %xmm7,%xmm9 + movdqa %xmm10,%xmm7 + movdqa %xmm10,%xmm6 + pslld $5,%xmm8 + pandn %xmm12,%xmm7 + pand %xmm11,%xmm6 + punpckldq %xmm9,%xmm2 + movdqa %xmm14,%xmm9 + + movdqa %xmm1,176-128(%rax) + paddd %xmm1,%xmm13 + movd -12(%r8),%xmm3 + psrld $27,%xmm9 + pxor %xmm7,%xmm6 + movdqa %xmm10,%xmm7 + + por %xmm9,%xmm8 + movd -12(%r9),%xmm9 + pslld $30,%xmm7 + paddd %xmm6,%xmm13 + + psrld $2,%xmm10 + paddd %xmm8,%xmm13 +.byte 102,15,56,0,213 + movd -12(%r10),%xmm8 + por %xmm7,%xmm10 + movd -12(%r11),%xmm7 + punpckldq %xmm8,%xmm3 + movdqa %xmm13,%xmm8 + paddd %xmm15,%xmm12 + punpckldq %xmm7,%xmm9 + movdqa %xmm14,%xmm7 + movdqa %xmm14,%xmm6 + pslld $5,%xmm8 + pandn %xmm11,%xmm7 + pand %xmm10,%xmm6 + punpckldq %xmm9,%xmm3 + movdqa %xmm13,%xmm9 + + movdqa %xmm2,192-128(%rax) + paddd %xmm2,%xmm12 + movd -8(%r8),%xmm4 + psrld $27,%xmm9 + pxor %xmm7,%xmm6 + movdqa %xmm14,%xmm7 + + por %xmm9,%xmm8 + movd -8(%r9),%xmm9 + pslld $30,%xmm7 + paddd %xmm6,%xmm12 + + psrld $2,%xmm14 + paddd %xmm8,%xmm12 +.byte 102,15,56,0,221 + movd -8(%r10),%xmm8 + por %xmm7,%xmm14 + movd -8(%r11),%xmm7 + punpckldq %xmm8,%xmm4 + movdqa %xmm12,%xmm8 + paddd %xmm15,%xmm11 + punpckldq %xmm7,%xmm9 + movdqa %xmm13,%xmm7 + movdqa %xmm13,%xmm6 + pslld $5,%xmm8 + pandn %xmm10,%xmm7 + pand %xmm14,%xmm6 + punpckldq %xmm9,%xmm4 + movdqa %xmm12,%xmm9 + + movdqa %xmm3,208-128(%rax) + paddd %xmm3,%xmm11 + movd -4(%r8),%xmm0 + psrld $27,%xmm9 + pxor %xmm7,%xmm6 + movdqa %xmm13,%xmm7 + + por %xmm9,%xmm8 + movd -4(%r9),%xmm9 + pslld $30,%xmm7 + paddd %xmm6,%xmm11 + + psrld $2,%xmm13 + paddd %xmm8,%xmm11 +.byte 102,15,56,0,229 + movd -4(%r10),%xmm8 + por %xmm7,%xmm13 + movdqa 0-128(%rax),%xmm1 + movd -4(%r11),%xmm7 + punpckldq %xmm8,%xmm0 + movdqa %xmm11,%xmm8 + paddd %xmm15,%xmm10 + punpckldq %xmm7,%xmm9 + movdqa %xmm12,%xmm7 + movdqa %xmm12,%xmm6 + pslld $5,%xmm8 + prefetcht0 63(%r8) + pandn %xmm14,%xmm7 + pand %xmm13,%xmm6 + punpckldq %xmm9,%xmm0 + movdqa %xmm11,%xmm9 + + movdqa %xmm4,224-128(%rax) + paddd %xmm4,%xmm10 + psrld $27,%xmm9 + pxor %xmm7,%xmm6 + movdqa %xmm12,%xmm7 + prefetcht0 63(%r9) + + por %xmm9,%xmm8 + pslld $30,%xmm7 + paddd %xmm6,%xmm10 + prefetcht0 63(%r10) + + psrld $2,%xmm12 + paddd %xmm8,%xmm10 +.byte 102,15,56,0,197 + prefetcht0 63(%r11) + por %xmm7,%xmm12 + movdqa 16-128(%rax),%xmm2 + pxor %xmm3,%xmm1 + movdqa 32-128(%rax),%xmm3 + + movdqa %xmm10,%xmm8 + pxor 128-128(%rax),%xmm1 + paddd %xmm15,%xmm14 + movdqa %xmm11,%xmm7 + pslld $5,%xmm8 + pxor %xmm3,%xmm1 + movdqa %xmm11,%xmm6 + pandn %xmm13,%xmm7 + movdqa %xmm1,%xmm5 + pand %xmm12,%xmm6 + movdqa %xmm10,%xmm9 + psrld $31,%xmm5 + paddd %xmm1,%xmm1 + + movdqa %xmm0,240-128(%rax) + paddd %xmm0,%xmm14 + psrld $27,%xmm9 + pxor %xmm7,%xmm6 + + movdqa %xmm11,%xmm7 + por %xmm9,%xmm8 + pslld $30,%xmm7 + paddd %xmm6,%xmm14 + + psrld $2,%xmm11 + paddd %xmm8,%xmm14 + por %xmm5,%xmm1 + por %xmm7,%xmm11 + pxor %xmm4,%xmm2 + movdqa 48-128(%rax),%xmm4 + + movdqa %xmm14,%xmm8 + pxor 144-128(%rax),%xmm2 + paddd %xmm15,%xmm13 + movdqa %xmm10,%xmm7 + pslld $5,%xmm8 + pxor %xmm4,%xmm2 + movdqa %xmm10,%xmm6 + pandn %xmm12,%xmm7 + movdqa %xmm2,%xmm5 + pand %xmm11,%xmm6 + movdqa %xmm14,%xmm9 + psrld $31,%xmm5 + paddd %xmm2,%xmm2 + + movdqa %xmm1,0-128(%rax) + paddd %xmm1,%xmm13 + psrld $27,%xmm9 + pxor %xmm7,%xmm6 + + movdqa %xmm10,%xmm7 + por %xmm9,%xmm8 + pslld $30,%xmm7 + paddd %xmm6,%xmm13 + + psrld $2,%xmm10 + paddd %xmm8,%xmm13 + por %xmm5,%xmm2 + por %xmm7,%xmm10 + pxor %xmm0,%xmm3 + movdqa 64-128(%rax),%xmm0 + + movdqa %xmm13,%xmm8 + pxor 160-128(%rax),%xmm3 + paddd %xmm15,%xmm12 + movdqa %xmm14,%xmm7 + pslld $5,%xmm8 + pxor %xmm0,%xmm3 + movdqa %xmm14,%xmm6 + pandn %xmm11,%xmm7 + movdqa %xmm3,%xmm5 + pand %xmm10,%xmm6 + movdqa %xmm13,%xmm9 + psrld $31,%xmm5 + paddd %xmm3,%xmm3 + + movdqa %xmm2,16-128(%rax) + paddd %xmm2,%xmm12 + psrld $27,%xmm9 + pxor %xmm7,%xmm6 + + movdqa %xmm14,%xmm7 + por %xmm9,%xmm8 + pslld $30,%xmm7 + paddd %xmm6,%xmm12 + + psrld $2,%xmm14 + paddd %xmm8,%xmm12 + por %xmm5,%xmm3 + por %xmm7,%xmm14 + pxor %xmm1,%xmm4 + movdqa 80-128(%rax),%xmm1 + + movdqa %xmm12,%xmm8 + pxor 176-128(%rax),%xmm4 + paddd %xmm15,%xmm11 + movdqa %xmm13,%xmm7 + pslld $5,%xmm8 + pxor %xmm1,%xmm4 + movdqa %xmm13,%xmm6 + pandn %xmm10,%xmm7 + movdqa %xmm4,%xmm5 + pand %xmm14,%xmm6 + movdqa %xmm12,%xmm9 + psrld $31,%xmm5 + paddd %xmm4,%xmm4 + + movdqa %xmm3,32-128(%rax) + paddd %xmm3,%xmm11 + psrld $27,%xmm9 + pxor %xmm7,%xmm6 + + movdqa %xmm13,%xmm7 + por %xmm9,%xmm8 + pslld $30,%xmm7 + paddd %xmm6,%xmm11 + + psrld $2,%xmm13 + paddd %xmm8,%xmm11 + por %xmm5,%xmm4 + por %xmm7,%xmm13 + pxor %xmm2,%xmm0 + movdqa 96-128(%rax),%xmm2 + + movdqa %xmm11,%xmm8 + pxor 192-128(%rax),%xmm0 + paddd %xmm15,%xmm10 + movdqa %xmm12,%xmm7 + pslld $5,%xmm8 + pxor %xmm2,%xmm0 + movdqa %xmm12,%xmm6 + pandn %xmm14,%xmm7 + movdqa %xmm0,%xmm5 + pand %xmm13,%xmm6 + movdqa %xmm11,%xmm9 + psrld $31,%xmm5 + paddd %xmm0,%xmm0 + + movdqa %xmm4,48-128(%rax) + paddd %xmm4,%xmm10 + psrld $27,%xmm9 + pxor %xmm7,%xmm6 + + movdqa %xmm12,%xmm7 + por %xmm9,%xmm8 + pslld $30,%xmm7 + paddd %xmm6,%xmm10 + + psrld $2,%xmm12 + paddd %xmm8,%xmm10 + por %xmm5,%xmm0 + por %xmm7,%xmm12 + movdqa 0(%rbp),%xmm15 + pxor %xmm3,%xmm1 + movdqa 112-128(%rax),%xmm3 + + movdqa %xmm10,%xmm8 + movdqa %xmm13,%xmm6 + pxor 208-128(%rax),%xmm1 + paddd %xmm15,%xmm14 + pslld $5,%xmm8 + pxor %xmm11,%xmm6 + + movdqa %xmm10,%xmm9 + movdqa %xmm0,64-128(%rax) + paddd %xmm0,%xmm14 + pxor %xmm3,%xmm1 + psrld $27,%xmm9 + pxor %xmm12,%xmm6 + movdqa %xmm11,%xmm7 + + pslld $30,%xmm7 + movdqa %xmm1,%xmm5 + por %xmm9,%xmm8 + psrld $31,%xmm5 + paddd %xmm6,%xmm14 + paddd %xmm1,%xmm1 + + psrld $2,%xmm11 + paddd %xmm8,%xmm14 + por %xmm5,%xmm1 + por %xmm7,%xmm11 + pxor %xmm4,%xmm2 + movdqa 128-128(%rax),%xmm4 + + movdqa %xmm14,%xmm8 + movdqa %xmm12,%xmm6 + pxor 224-128(%rax),%xmm2 + paddd %xmm15,%xmm13 + pslld $5,%xmm8 + pxor %xmm10,%xmm6 + + movdqa %xmm14,%xmm9 + movdqa %xmm1,80-128(%rax) + paddd %xmm1,%xmm13 + pxor %xmm4,%xmm2 + psrld $27,%xmm9 + pxor %xmm11,%xmm6 + movdqa %xmm10,%xmm7 + + pslld $30,%xmm7 + movdqa %xmm2,%xmm5 + por %xmm9,%xmm8 + psrld $31,%xmm5 + paddd %xmm6,%xmm13 + paddd %xmm2,%xmm2 + + psrld $2,%xmm10 + paddd %xmm8,%xmm13 + por %xmm5,%xmm2 + por %xmm7,%xmm10 + pxor %xmm0,%xmm3 + movdqa 144-128(%rax),%xmm0 + + movdqa %xmm13,%xmm8 + movdqa %xmm11,%xmm6 + pxor 240-128(%rax),%xmm3 + paddd %xmm15,%xmm12 + pslld $5,%xmm8 + pxor %xmm14,%xmm6 + + movdqa %xmm13,%xmm9 + movdqa %xmm2,96-128(%rax) + paddd %xmm2,%xmm12 + pxor %xmm0,%xmm3 + psrld $27,%xmm9 + pxor %xmm10,%xmm6 + movdqa %xmm14,%xmm7 + + pslld $30,%xmm7 + movdqa %xmm3,%xmm5 + por %xmm9,%xmm8 + psrld $31,%xmm5 + paddd %xmm6,%xmm12 + paddd %xmm3,%xmm3 + + psrld $2,%xmm14 + paddd %xmm8,%xmm12 + por %xmm5,%xmm3 + por %xmm7,%xmm14 + pxor %xmm1,%xmm4 + movdqa 160-128(%rax),%xmm1 + + movdqa %xmm12,%xmm8 + movdqa %xmm10,%xmm6 + pxor 0-128(%rax),%xmm4 + paddd %xmm15,%xmm11 + pslld $5,%xmm8 + pxor %xmm13,%xmm6 + + movdqa %xmm12,%xmm9 + movdqa %xmm3,112-128(%rax) + paddd %xmm3,%xmm11 + pxor %xmm1,%xmm4 + psrld $27,%xmm9 + pxor %xmm14,%xmm6 + movdqa %xmm13,%xmm7 + + pslld $30,%xmm7 + movdqa %xmm4,%xmm5 + por %xmm9,%xmm8 + psrld $31,%xmm5 + paddd %xmm6,%xmm11 + paddd %xmm4,%xmm4 + + psrld $2,%xmm13 + paddd %xmm8,%xmm11 + por %xmm5,%xmm4 + por %xmm7,%xmm13 + pxor %xmm2,%xmm0 + movdqa 176-128(%rax),%xmm2 + + movdqa %xmm11,%xmm8 + movdqa %xmm14,%xmm6 + pxor 16-128(%rax),%xmm0 + paddd %xmm15,%xmm10 + pslld $5,%xmm8 + pxor %xmm12,%xmm6 + + movdqa %xmm11,%xmm9 + movdqa %xmm4,128-128(%rax) + paddd %xmm4,%xmm10 + pxor %xmm2,%xmm0 + psrld $27,%xmm9 + pxor %xmm13,%xmm6 + movdqa %xmm12,%xmm7 + + pslld $30,%xmm7 + movdqa %xmm0,%xmm5 + por %xmm9,%xmm8 + psrld $31,%xmm5 + paddd %xmm6,%xmm10 + paddd %xmm0,%xmm0 + + psrld $2,%xmm12 + paddd %xmm8,%xmm10 + por %xmm5,%xmm0 + por %xmm7,%xmm12 + pxor %xmm3,%xmm1 + movdqa 192-128(%rax),%xmm3 + + movdqa %xmm10,%xmm8 + movdqa %xmm13,%xmm6 + pxor 32-128(%rax),%xmm1 + paddd %xmm15,%xmm14 + pslld $5,%xmm8 + pxor %xmm11,%xmm6 + + movdqa %xmm10,%xmm9 + movdqa %xmm0,144-128(%rax) + paddd %xmm0,%xmm14 + pxor %xmm3,%xmm1 + psrld $27,%xmm9 + pxor %xmm12,%xmm6 + movdqa %xmm11,%xmm7 + + pslld $30,%xmm7 + movdqa %xmm1,%xmm5 + por %xmm9,%xmm8 + psrld $31,%xmm5 + paddd %xmm6,%xmm14 + paddd %xmm1,%xmm1 + + psrld $2,%xmm11 + paddd %xmm8,%xmm14 + por %xmm5,%xmm1 + por %xmm7,%xmm11 + pxor %xmm4,%xmm2 + movdqa 208-128(%rax),%xmm4 + + movdqa %xmm14,%xmm8 + movdqa %xmm12,%xmm6 + pxor 48-128(%rax),%xmm2 + paddd %xmm15,%xmm13 + pslld $5,%xmm8 + pxor %xmm10,%xmm6 + + movdqa %xmm14,%xmm9 + movdqa %xmm1,160-128(%rax) + paddd %xmm1,%xmm13 + pxor %xmm4,%xmm2 + psrld $27,%xmm9 + pxor %xmm11,%xmm6 + movdqa %xmm10,%xmm7 + + pslld $30,%xmm7 + movdqa %xmm2,%xmm5 + por %xmm9,%xmm8 + psrld $31,%xmm5 + paddd %xmm6,%xmm13 + paddd %xmm2,%xmm2 + + psrld $2,%xmm10 + paddd %xmm8,%xmm13 + por %xmm5,%xmm2 + por %xmm7,%xmm10 + pxor %xmm0,%xmm3 + movdqa 224-128(%rax),%xmm0 + + movdqa %xmm13,%xmm8 + movdqa %xmm11,%xmm6 + pxor 64-128(%rax),%xmm3 + paddd %xmm15,%xmm12 + pslld $5,%xmm8 + pxor %xmm14,%xmm6 + + movdqa %xmm13,%xmm9 + movdqa %xmm2,176-128(%rax) + paddd %xmm2,%xmm12 + pxor %xmm0,%xmm3 + psrld $27,%xmm9 + pxor %xmm10,%xmm6 + movdqa %xmm14,%xmm7 + + pslld $30,%xmm7 + movdqa %xmm3,%xmm5 + por %xmm9,%xmm8 + psrld $31,%xmm5 + paddd %xmm6,%xmm12 + paddd %xmm3,%xmm3 + + psrld $2,%xmm14 + paddd %xmm8,%xmm12 + por %xmm5,%xmm3 + por %xmm7,%xmm14 + pxor %xmm1,%xmm4 + movdqa 240-128(%rax),%xmm1 + + movdqa %xmm12,%xmm8 + movdqa %xmm10,%xmm6 + pxor 80-128(%rax),%xmm4 + paddd %xmm15,%xmm11 + pslld $5,%xmm8 + pxor %xmm13,%xmm6 + + movdqa %xmm12,%xmm9 + movdqa %xmm3,192-128(%rax) + paddd %xmm3,%xmm11 + pxor %xmm1,%xmm4 + psrld $27,%xmm9 + pxor %xmm14,%xmm6 + movdqa %xmm13,%xmm7 + + pslld $30,%xmm7 + movdqa %xmm4,%xmm5 + por %xmm9,%xmm8 + psrld $31,%xmm5 + paddd %xmm6,%xmm11 + paddd %xmm4,%xmm4 + + psrld $2,%xmm13 + paddd %xmm8,%xmm11 + por %xmm5,%xmm4 + por %xmm7,%xmm13 + pxor %xmm2,%xmm0 + movdqa 0-128(%rax),%xmm2 + + movdqa %xmm11,%xmm8 + movdqa %xmm14,%xmm6 + pxor 96-128(%rax),%xmm0 + paddd %xmm15,%xmm10 + pslld $5,%xmm8 + pxor %xmm12,%xmm6 + + movdqa %xmm11,%xmm9 + movdqa %xmm4,208-128(%rax) + paddd %xmm4,%xmm10 + pxor %xmm2,%xmm0 + psrld $27,%xmm9 + pxor %xmm13,%xmm6 + movdqa %xmm12,%xmm7 + + pslld $30,%xmm7 + movdqa %xmm0,%xmm5 + por %xmm9,%xmm8 + psrld $31,%xmm5 + paddd %xmm6,%xmm10 + paddd %xmm0,%xmm0 + + psrld $2,%xmm12 + paddd %xmm8,%xmm10 + por %xmm5,%xmm0 + por %xmm7,%xmm12 + pxor %xmm3,%xmm1 + movdqa 16-128(%rax),%xmm3 + + movdqa %xmm10,%xmm8 + movdqa %xmm13,%xmm6 + pxor 112-128(%rax),%xmm1 + paddd %xmm15,%xmm14 + pslld $5,%xmm8 + pxor %xmm11,%xmm6 + + movdqa %xmm10,%xmm9 + movdqa %xmm0,224-128(%rax) + paddd %xmm0,%xmm14 + pxor %xmm3,%xmm1 + psrld $27,%xmm9 + pxor %xmm12,%xmm6 + movdqa %xmm11,%xmm7 + + pslld $30,%xmm7 + movdqa %xmm1,%xmm5 + por %xmm9,%xmm8 + psrld $31,%xmm5 + paddd %xmm6,%xmm14 + paddd %xmm1,%xmm1 + + psrld $2,%xmm11 + paddd %xmm8,%xmm14 + por %xmm5,%xmm1 + por %xmm7,%xmm11 + pxor %xmm4,%xmm2 + movdqa 32-128(%rax),%xmm4 + + movdqa %xmm14,%xmm8 + movdqa %xmm12,%xmm6 + pxor 128-128(%rax),%xmm2 + paddd %xmm15,%xmm13 + pslld $5,%xmm8 + pxor %xmm10,%xmm6 + + movdqa %xmm14,%xmm9 + movdqa %xmm1,240-128(%rax) + paddd %xmm1,%xmm13 + pxor %xmm4,%xmm2 + psrld $27,%xmm9 + pxor %xmm11,%xmm6 + movdqa %xmm10,%xmm7 + + pslld $30,%xmm7 + movdqa %xmm2,%xmm5 + por %xmm9,%xmm8 + psrld $31,%xmm5 + paddd %xmm6,%xmm13 + paddd %xmm2,%xmm2 + + psrld $2,%xmm10 + paddd %xmm8,%xmm13 + por %xmm5,%xmm2 + por %xmm7,%xmm10 + pxor %xmm0,%xmm3 + movdqa 48-128(%rax),%xmm0 + + movdqa %xmm13,%xmm8 + movdqa %xmm11,%xmm6 + pxor 144-128(%rax),%xmm3 + paddd %xmm15,%xmm12 + pslld $5,%xmm8 + pxor %xmm14,%xmm6 + + movdqa %xmm13,%xmm9 + movdqa %xmm2,0-128(%rax) + paddd %xmm2,%xmm12 + pxor %xmm0,%xmm3 + psrld $27,%xmm9 + pxor %xmm10,%xmm6 + movdqa %xmm14,%xmm7 + + pslld $30,%xmm7 + movdqa %xmm3,%xmm5 + por %xmm9,%xmm8 + psrld $31,%xmm5 + paddd %xmm6,%xmm12 + paddd %xmm3,%xmm3 + + psrld $2,%xmm14 + paddd %xmm8,%xmm12 + por %xmm5,%xmm3 + por %xmm7,%xmm14 + pxor %xmm1,%xmm4 + movdqa 64-128(%rax),%xmm1 + + movdqa %xmm12,%xmm8 + movdqa %xmm10,%xmm6 + pxor 160-128(%rax),%xmm4 + paddd %xmm15,%xmm11 + pslld $5,%xmm8 + pxor %xmm13,%xmm6 + + movdqa %xmm12,%xmm9 + movdqa %xmm3,16-128(%rax) + paddd %xmm3,%xmm11 + pxor %xmm1,%xmm4 + psrld $27,%xmm9 + pxor %xmm14,%xmm6 + movdqa %xmm13,%xmm7 + + pslld $30,%xmm7 + movdqa %xmm4,%xmm5 + por %xmm9,%xmm8 + psrld $31,%xmm5 + paddd %xmm6,%xmm11 + paddd %xmm4,%xmm4 + + psrld $2,%xmm13 + paddd %xmm8,%xmm11 + por %xmm5,%xmm4 + por %xmm7,%xmm13 + pxor %xmm2,%xmm0 + movdqa 80-128(%rax),%xmm2 + + movdqa %xmm11,%xmm8 + movdqa %xmm14,%xmm6 + pxor 176-128(%rax),%xmm0 + paddd %xmm15,%xmm10 + pslld $5,%xmm8 + pxor %xmm12,%xmm6 + + movdqa %xmm11,%xmm9 + movdqa %xmm4,32-128(%rax) + paddd %xmm4,%xmm10 + pxor %xmm2,%xmm0 + psrld $27,%xmm9 + pxor %xmm13,%xmm6 + movdqa %xmm12,%xmm7 + + pslld $30,%xmm7 + movdqa %xmm0,%xmm5 + por %xmm9,%xmm8 + psrld $31,%xmm5 + paddd %xmm6,%xmm10 + paddd %xmm0,%xmm0 + + psrld $2,%xmm12 + paddd %xmm8,%xmm10 + por %xmm5,%xmm0 + por %xmm7,%xmm12 + pxor %xmm3,%xmm1 + movdqa 96-128(%rax),%xmm3 + + movdqa %xmm10,%xmm8 + movdqa %xmm13,%xmm6 + pxor 192-128(%rax),%xmm1 + paddd %xmm15,%xmm14 + pslld $5,%xmm8 + pxor %xmm11,%xmm6 + + movdqa %xmm10,%xmm9 + movdqa %xmm0,48-128(%rax) + paddd %xmm0,%xmm14 + pxor %xmm3,%xmm1 + psrld $27,%xmm9 + pxor %xmm12,%xmm6 + movdqa %xmm11,%xmm7 + + pslld $30,%xmm7 + movdqa %xmm1,%xmm5 + por %xmm9,%xmm8 + psrld $31,%xmm5 + paddd %xmm6,%xmm14 + paddd %xmm1,%xmm1 + + psrld $2,%xmm11 + paddd %xmm8,%xmm14 + por %xmm5,%xmm1 + por %xmm7,%xmm11 + pxor %xmm4,%xmm2 + movdqa 112-128(%rax),%xmm4 + + movdqa %xmm14,%xmm8 + movdqa %xmm12,%xmm6 + pxor 208-128(%rax),%xmm2 + paddd %xmm15,%xmm13 + pslld $5,%xmm8 + pxor %xmm10,%xmm6 + + movdqa %xmm14,%xmm9 + movdqa %xmm1,64-128(%rax) + paddd %xmm1,%xmm13 + pxor %xmm4,%xmm2 + psrld $27,%xmm9 + pxor %xmm11,%xmm6 + movdqa %xmm10,%xmm7 + + pslld $30,%xmm7 + movdqa %xmm2,%xmm5 + por %xmm9,%xmm8 + psrld $31,%xmm5 + paddd %xmm6,%xmm13 + paddd %xmm2,%xmm2 + + psrld $2,%xmm10 + paddd %xmm8,%xmm13 + por %xmm5,%xmm2 + por %xmm7,%xmm10 + pxor %xmm0,%xmm3 + movdqa 128-128(%rax),%xmm0 + + movdqa %xmm13,%xmm8 + movdqa %xmm11,%xmm6 + pxor 224-128(%rax),%xmm3 + paddd %xmm15,%xmm12 + pslld $5,%xmm8 + pxor %xmm14,%xmm6 + + movdqa %xmm13,%xmm9 + movdqa %xmm2,80-128(%rax) + paddd %xmm2,%xmm12 + pxor %xmm0,%xmm3 + psrld $27,%xmm9 + pxor %xmm10,%xmm6 + movdqa %xmm14,%xmm7 + + pslld $30,%xmm7 + movdqa %xmm3,%xmm5 + por %xmm9,%xmm8 + psrld $31,%xmm5 + paddd %xmm6,%xmm12 + paddd %xmm3,%xmm3 + + psrld $2,%xmm14 + paddd %xmm8,%xmm12 + por %xmm5,%xmm3 + por %xmm7,%xmm14 + pxor %xmm1,%xmm4 + movdqa 144-128(%rax),%xmm1 + + movdqa %xmm12,%xmm8 + movdqa %xmm10,%xmm6 + pxor 240-128(%rax),%xmm4 + paddd %xmm15,%xmm11 + pslld $5,%xmm8 + pxor %xmm13,%xmm6 + + movdqa %xmm12,%xmm9 + movdqa %xmm3,96-128(%rax) + paddd %xmm3,%xmm11 + pxor %xmm1,%xmm4 + psrld $27,%xmm9 + pxor %xmm14,%xmm6 + movdqa %xmm13,%xmm7 + + pslld $30,%xmm7 + movdqa %xmm4,%xmm5 + por %xmm9,%xmm8 + psrld $31,%xmm5 + paddd %xmm6,%xmm11 + paddd %xmm4,%xmm4 + + psrld $2,%xmm13 + paddd %xmm8,%xmm11 + por %xmm5,%xmm4 + por %xmm7,%xmm13 + pxor %xmm2,%xmm0 + movdqa 160-128(%rax),%xmm2 + + movdqa %xmm11,%xmm8 + movdqa %xmm14,%xmm6 + pxor 0-128(%rax),%xmm0 + paddd %xmm15,%xmm10 + pslld $5,%xmm8 + pxor %xmm12,%xmm6 + + movdqa %xmm11,%xmm9 + movdqa %xmm4,112-128(%rax) + paddd %xmm4,%xmm10 + pxor %xmm2,%xmm0 + psrld $27,%xmm9 + pxor %xmm13,%xmm6 + movdqa %xmm12,%xmm7 + + pslld $30,%xmm7 + movdqa %xmm0,%xmm5 + por %xmm9,%xmm8 + psrld $31,%xmm5 + paddd %xmm6,%xmm10 + paddd %xmm0,%xmm0 + + psrld $2,%xmm12 + paddd %xmm8,%xmm10 + por %xmm5,%xmm0 + por %xmm7,%xmm12 + movdqa 32(%rbp),%xmm15 + pxor %xmm3,%xmm1 + movdqa 176-128(%rax),%xmm3 + + movdqa %xmm10,%xmm8 + movdqa %xmm13,%xmm7 + pxor 16-128(%rax),%xmm1 + pxor %xmm3,%xmm1 + paddd %xmm15,%xmm14 + pslld $5,%xmm8 + movdqa %xmm10,%xmm9 + pand %xmm12,%xmm7 + + movdqa %xmm13,%xmm6 + movdqa %xmm1,%xmm5 + psrld $27,%xmm9 + paddd %xmm7,%xmm14 + pxor %xmm12,%xmm6 + + movdqa %xmm0,128-128(%rax) + paddd %xmm0,%xmm14 + por %xmm9,%xmm8 + psrld $31,%xmm5 + pand %xmm11,%xmm6 + movdqa %xmm11,%xmm7 + + pslld $30,%xmm7 + paddd %xmm1,%xmm1 + paddd %xmm6,%xmm14 + + psrld $2,%xmm11 + paddd %xmm8,%xmm14 + por %xmm5,%xmm1 + por %xmm7,%xmm11 + pxor %xmm4,%xmm2 + movdqa 192-128(%rax),%xmm4 + + movdqa %xmm14,%xmm8 + movdqa %xmm12,%xmm7 + pxor 32-128(%rax),%xmm2 + pxor %xmm4,%xmm2 + paddd %xmm15,%xmm13 + pslld $5,%xmm8 + movdqa %xmm14,%xmm9 + pand %xmm11,%xmm7 + + movdqa %xmm12,%xmm6 + movdqa %xmm2,%xmm5 + psrld $27,%xmm9 + paddd %xmm7,%xmm13 + pxor %xmm11,%xmm6 + + movdqa %xmm1,144-128(%rax) + paddd %xmm1,%xmm13 + por %xmm9,%xmm8 + psrld $31,%xmm5 + pand %xmm10,%xmm6 + movdqa %xmm10,%xmm7 + + pslld $30,%xmm7 + paddd %xmm2,%xmm2 + paddd %xmm6,%xmm13 + + psrld $2,%xmm10 + paddd %xmm8,%xmm13 + por %xmm5,%xmm2 + por %xmm7,%xmm10 + pxor %xmm0,%xmm3 + movdqa 208-128(%rax),%xmm0 + + movdqa %xmm13,%xmm8 + movdqa %xmm11,%xmm7 + pxor 48-128(%rax),%xmm3 + pxor %xmm0,%xmm3 + paddd %xmm15,%xmm12 + pslld $5,%xmm8 + movdqa %xmm13,%xmm9 + pand %xmm10,%xmm7 + + movdqa %xmm11,%xmm6 + movdqa %xmm3,%xmm5 + psrld $27,%xmm9 + paddd %xmm7,%xmm12 + pxor %xmm10,%xmm6 + + movdqa %xmm2,160-128(%rax) + paddd %xmm2,%xmm12 + por %xmm9,%xmm8 + psrld $31,%xmm5 + pand %xmm14,%xmm6 + movdqa %xmm14,%xmm7 + + pslld $30,%xmm7 + paddd %xmm3,%xmm3 + paddd %xmm6,%xmm12 + + psrld $2,%xmm14 + paddd %xmm8,%xmm12 + por %xmm5,%xmm3 + por %xmm7,%xmm14 + pxor %xmm1,%xmm4 + movdqa 224-128(%rax),%xmm1 + + movdqa %xmm12,%xmm8 + movdqa %xmm10,%xmm7 + pxor 64-128(%rax),%xmm4 + pxor %xmm1,%xmm4 + paddd %xmm15,%xmm11 + pslld $5,%xmm8 + movdqa %xmm12,%xmm9 + pand %xmm14,%xmm7 + + movdqa %xmm10,%xmm6 + movdqa %xmm4,%xmm5 + psrld $27,%xmm9 + paddd %xmm7,%xmm11 + pxor %xmm14,%xmm6 + + movdqa %xmm3,176-128(%rax) + paddd %xmm3,%xmm11 + por %xmm9,%xmm8 + psrld $31,%xmm5 + pand %xmm13,%xmm6 + movdqa %xmm13,%xmm7 + + pslld $30,%xmm7 + paddd %xmm4,%xmm4 + paddd %xmm6,%xmm11 + + psrld $2,%xmm13 + paddd %xmm8,%xmm11 + por %xmm5,%xmm4 + por %xmm7,%xmm13 + pxor %xmm2,%xmm0 + movdqa 240-128(%rax),%xmm2 + + movdqa %xmm11,%xmm8 + movdqa %xmm14,%xmm7 + pxor 80-128(%rax),%xmm0 + pxor %xmm2,%xmm0 + paddd %xmm15,%xmm10 + pslld $5,%xmm8 + movdqa %xmm11,%xmm9 + pand %xmm13,%xmm7 + + movdqa %xmm14,%xmm6 + movdqa %xmm0,%xmm5 + psrld $27,%xmm9 + paddd %xmm7,%xmm10 + pxor %xmm13,%xmm6 + + movdqa %xmm4,192-128(%rax) + paddd %xmm4,%xmm10 + por %xmm9,%xmm8 + psrld $31,%xmm5 + pand %xmm12,%xmm6 + movdqa %xmm12,%xmm7 + + pslld $30,%xmm7 + paddd %xmm0,%xmm0 + paddd %xmm6,%xmm10 + + psrld $2,%xmm12 + paddd %xmm8,%xmm10 + por %xmm5,%xmm0 + por %xmm7,%xmm12 + pxor %xmm3,%xmm1 + movdqa 0-128(%rax),%xmm3 + + movdqa %xmm10,%xmm8 + movdqa %xmm13,%xmm7 + pxor 96-128(%rax),%xmm1 + pxor %xmm3,%xmm1 + paddd %xmm15,%xmm14 + pslld $5,%xmm8 + movdqa %xmm10,%xmm9 + pand %xmm12,%xmm7 + + movdqa %xmm13,%xmm6 + movdqa %xmm1,%xmm5 + psrld $27,%xmm9 + paddd %xmm7,%xmm14 + pxor %xmm12,%xmm6 + + movdqa %xmm0,208-128(%rax) + paddd %xmm0,%xmm14 + por %xmm9,%xmm8 + psrld $31,%xmm5 + pand %xmm11,%xmm6 + movdqa %xmm11,%xmm7 + + pslld $30,%xmm7 + paddd %xmm1,%xmm1 + paddd %xmm6,%xmm14 + + psrld $2,%xmm11 + paddd %xmm8,%xmm14 + por %xmm5,%xmm1 + por %xmm7,%xmm11 + pxor %xmm4,%xmm2 + movdqa 16-128(%rax),%xmm4 + + movdqa %xmm14,%xmm8 + movdqa %xmm12,%xmm7 + pxor 112-128(%rax),%xmm2 + pxor %xmm4,%xmm2 + paddd %xmm15,%xmm13 + pslld $5,%xmm8 + movdqa %xmm14,%xmm9 + pand %xmm11,%xmm7 + + movdqa %xmm12,%xmm6 + movdqa %xmm2,%xmm5 + psrld $27,%xmm9 + paddd %xmm7,%xmm13 + pxor %xmm11,%xmm6 + + movdqa %xmm1,224-128(%rax) + paddd %xmm1,%xmm13 + por %xmm9,%xmm8 + psrld $31,%xmm5 + pand %xmm10,%xmm6 + movdqa %xmm10,%xmm7 + + pslld $30,%xmm7 + paddd %xmm2,%xmm2 + paddd %xmm6,%xmm13 + + psrld $2,%xmm10 + paddd %xmm8,%xmm13 + por %xmm5,%xmm2 + por %xmm7,%xmm10 + pxor %xmm0,%xmm3 + movdqa 32-128(%rax),%xmm0 + + movdqa %xmm13,%xmm8 + movdqa %xmm11,%xmm7 + pxor 128-128(%rax),%xmm3 + pxor %xmm0,%xmm3 + paddd %xmm15,%xmm12 + pslld $5,%xmm8 + movdqa %xmm13,%xmm9 + pand %xmm10,%xmm7 + + movdqa %xmm11,%xmm6 + movdqa %xmm3,%xmm5 + psrld $27,%xmm9 + paddd %xmm7,%xmm12 + pxor %xmm10,%xmm6 + + movdqa %xmm2,240-128(%rax) + paddd %xmm2,%xmm12 + por %xmm9,%xmm8 + psrld $31,%xmm5 + pand %xmm14,%xmm6 + movdqa %xmm14,%xmm7 + + pslld $30,%xmm7 + paddd %xmm3,%xmm3 + paddd %xmm6,%xmm12 + + psrld $2,%xmm14 + paddd %xmm8,%xmm12 + por %xmm5,%xmm3 + por %xmm7,%xmm14 + pxor %xmm1,%xmm4 + movdqa 48-128(%rax),%xmm1 + + movdqa %xmm12,%xmm8 + movdqa %xmm10,%xmm7 + pxor 144-128(%rax),%xmm4 + pxor %xmm1,%xmm4 + paddd %xmm15,%xmm11 + pslld $5,%xmm8 + movdqa %xmm12,%xmm9 + pand %xmm14,%xmm7 + + movdqa %xmm10,%xmm6 + movdqa %xmm4,%xmm5 + psrld $27,%xmm9 + paddd %xmm7,%xmm11 + pxor %xmm14,%xmm6 + + movdqa %xmm3,0-128(%rax) + paddd %xmm3,%xmm11 + por %xmm9,%xmm8 + psrld $31,%xmm5 + pand %xmm13,%xmm6 + movdqa %xmm13,%xmm7 + + pslld $30,%xmm7 + paddd %xmm4,%xmm4 + paddd %xmm6,%xmm11 + + psrld $2,%xmm13 + paddd %xmm8,%xmm11 + por %xmm5,%xmm4 + por %xmm7,%xmm13 + pxor %xmm2,%xmm0 + movdqa 64-128(%rax),%xmm2 + + movdqa %xmm11,%xmm8 + movdqa %xmm14,%xmm7 + pxor 160-128(%rax),%xmm0 + pxor %xmm2,%xmm0 + paddd %xmm15,%xmm10 + pslld $5,%xmm8 + movdqa %xmm11,%xmm9 + pand %xmm13,%xmm7 + + movdqa %xmm14,%xmm6 + movdqa %xmm0,%xmm5 + psrld $27,%xmm9 + paddd %xmm7,%xmm10 + pxor %xmm13,%xmm6 + + movdqa %xmm4,16-128(%rax) + paddd %xmm4,%xmm10 + por %xmm9,%xmm8 + psrld $31,%xmm5 + pand %xmm12,%xmm6 + movdqa %xmm12,%xmm7 + + pslld $30,%xmm7 + paddd %xmm0,%xmm0 + paddd %xmm6,%xmm10 + + psrld $2,%xmm12 + paddd %xmm8,%xmm10 + por %xmm5,%xmm0 + por %xmm7,%xmm12 + pxor %xmm3,%xmm1 + movdqa 80-128(%rax),%xmm3 + + movdqa %xmm10,%xmm8 + movdqa %xmm13,%xmm7 + pxor 176-128(%rax),%xmm1 + pxor %xmm3,%xmm1 + paddd %xmm15,%xmm14 + pslld $5,%xmm8 + movdqa %xmm10,%xmm9 + pand %xmm12,%xmm7 + + movdqa %xmm13,%xmm6 + movdqa %xmm1,%xmm5 + psrld $27,%xmm9 + paddd %xmm7,%xmm14 + pxor %xmm12,%xmm6 + + movdqa %xmm0,32-128(%rax) + paddd %xmm0,%xmm14 + por %xmm9,%xmm8 + psrld $31,%xmm5 + pand %xmm11,%xmm6 + movdqa %xmm11,%xmm7 + + pslld $30,%xmm7 + paddd %xmm1,%xmm1 + paddd %xmm6,%xmm14 + + psrld $2,%xmm11 + paddd %xmm8,%xmm14 + por %xmm5,%xmm1 + por %xmm7,%xmm11 + pxor %xmm4,%xmm2 + movdqa 96-128(%rax),%xmm4 + + movdqa %xmm14,%xmm8 + movdqa %xmm12,%xmm7 + pxor 192-128(%rax),%xmm2 + pxor %xmm4,%xmm2 + paddd %xmm15,%xmm13 + pslld $5,%xmm8 + movdqa %xmm14,%xmm9 + pand %xmm11,%xmm7 + + movdqa %xmm12,%xmm6 + movdqa %xmm2,%xmm5 + psrld $27,%xmm9 + paddd %xmm7,%xmm13 + pxor %xmm11,%xmm6 + + movdqa %xmm1,48-128(%rax) + paddd %xmm1,%xmm13 + por %xmm9,%xmm8 + psrld $31,%xmm5 + pand %xmm10,%xmm6 + movdqa %xmm10,%xmm7 + + pslld $30,%xmm7 + paddd %xmm2,%xmm2 + paddd %xmm6,%xmm13 + + psrld $2,%xmm10 + paddd %xmm8,%xmm13 + por %xmm5,%xmm2 + por %xmm7,%xmm10 + pxor %xmm0,%xmm3 + movdqa 112-128(%rax),%xmm0 + + movdqa %xmm13,%xmm8 + movdqa %xmm11,%xmm7 + pxor 208-128(%rax),%xmm3 + pxor %xmm0,%xmm3 + paddd %xmm15,%xmm12 + pslld $5,%xmm8 + movdqa %xmm13,%xmm9 + pand %xmm10,%xmm7 + + movdqa %xmm11,%xmm6 + movdqa %xmm3,%xmm5 + psrld $27,%xmm9 + paddd %xmm7,%xmm12 + pxor %xmm10,%xmm6 + + movdqa %xmm2,64-128(%rax) + paddd %xmm2,%xmm12 + por %xmm9,%xmm8 + psrld $31,%xmm5 + pand %xmm14,%xmm6 + movdqa %xmm14,%xmm7 + + pslld $30,%xmm7 + paddd %xmm3,%xmm3 + paddd %xmm6,%xmm12 + + psrld $2,%xmm14 + paddd %xmm8,%xmm12 + por %xmm5,%xmm3 + por %xmm7,%xmm14 + pxor %xmm1,%xmm4 + movdqa 128-128(%rax),%xmm1 + + movdqa %xmm12,%xmm8 + movdqa %xmm10,%xmm7 + pxor 224-128(%rax),%xmm4 + pxor %xmm1,%xmm4 + paddd %xmm15,%xmm11 + pslld $5,%xmm8 + movdqa %xmm12,%xmm9 + pand %xmm14,%xmm7 + + movdqa %xmm10,%xmm6 + movdqa %xmm4,%xmm5 + psrld $27,%xmm9 + paddd %xmm7,%xmm11 + pxor %xmm14,%xmm6 + + movdqa %xmm3,80-128(%rax) + paddd %xmm3,%xmm11 + por %xmm9,%xmm8 + psrld $31,%xmm5 + pand %xmm13,%xmm6 + movdqa %xmm13,%xmm7 + + pslld $30,%xmm7 + paddd %xmm4,%xmm4 + paddd %xmm6,%xmm11 + + psrld $2,%xmm13 + paddd %xmm8,%xmm11 + por %xmm5,%xmm4 + por %xmm7,%xmm13 + pxor %xmm2,%xmm0 + movdqa 144-128(%rax),%xmm2 + + movdqa %xmm11,%xmm8 + movdqa %xmm14,%xmm7 + pxor 240-128(%rax),%xmm0 + pxor %xmm2,%xmm0 + paddd %xmm15,%xmm10 + pslld $5,%xmm8 + movdqa %xmm11,%xmm9 + pand %xmm13,%xmm7 + + movdqa %xmm14,%xmm6 + movdqa %xmm0,%xmm5 + psrld $27,%xmm9 + paddd %xmm7,%xmm10 + pxor %xmm13,%xmm6 + + movdqa %xmm4,96-128(%rax) + paddd %xmm4,%xmm10 + por %xmm9,%xmm8 + psrld $31,%xmm5 + pand %xmm12,%xmm6 + movdqa %xmm12,%xmm7 + + pslld $30,%xmm7 + paddd %xmm0,%xmm0 + paddd %xmm6,%xmm10 + + psrld $2,%xmm12 + paddd %xmm8,%xmm10 + por %xmm5,%xmm0 + por %xmm7,%xmm12 + pxor %xmm3,%xmm1 + movdqa 160-128(%rax),%xmm3 + + movdqa %xmm10,%xmm8 + movdqa %xmm13,%xmm7 + pxor 0-128(%rax),%xmm1 + pxor %xmm3,%xmm1 + paddd %xmm15,%xmm14 + pslld $5,%xmm8 + movdqa %xmm10,%xmm9 + pand %xmm12,%xmm7 + + movdqa %xmm13,%xmm6 + movdqa %xmm1,%xmm5 + psrld $27,%xmm9 + paddd %xmm7,%xmm14 + pxor %xmm12,%xmm6 + + movdqa %xmm0,112-128(%rax) + paddd %xmm0,%xmm14 + por %xmm9,%xmm8 + psrld $31,%xmm5 + pand %xmm11,%xmm6 + movdqa %xmm11,%xmm7 + + pslld $30,%xmm7 + paddd %xmm1,%xmm1 + paddd %xmm6,%xmm14 + + psrld $2,%xmm11 + paddd %xmm8,%xmm14 + por %xmm5,%xmm1 + por %xmm7,%xmm11 + pxor %xmm4,%xmm2 + movdqa 176-128(%rax),%xmm4 + + movdqa %xmm14,%xmm8 + movdqa %xmm12,%xmm7 + pxor 16-128(%rax),%xmm2 + pxor %xmm4,%xmm2 + paddd %xmm15,%xmm13 + pslld $5,%xmm8 + movdqa %xmm14,%xmm9 + pand %xmm11,%xmm7 + + movdqa %xmm12,%xmm6 + movdqa %xmm2,%xmm5 + psrld $27,%xmm9 + paddd %xmm7,%xmm13 + pxor %xmm11,%xmm6 + + movdqa %xmm1,128-128(%rax) + paddd %xmm1,%xmm13 + por %xmm9,%xmm8 + psrld $31,%xmm5 + pand %xmm10,%xmm6 + movdqa %xmm10,%xmm7 + + pslld $30,%xmm7 + paddd %xmm2,%xmm2 + paddd %xmm6,%xmm13 + + psrld $2,%xmm10 + paddd %xmm8,%xmm13 + por %xmm5,%xmm2 + por %xmm7,%xmm10 + pxor %xmm0,%xmm3 + movdqa 192-128(%rax),%xmm0 + + movdqa %xmm13,%xmm8 + movdqa %xmm11,%xmm7 + pxor 32-128(%rax),%xmm3 + pxor %xmm0,%xmm3 + paddd %xmm15,%xmm12 + pslld $5,%xmm8 + movdqa %xmm13,%xmm9 + pand %xmm10,%xmm7 + + movdqa %xmm11,%xmm6 + movdqa %xmm3,%xmm5 + psrld $27,%xmm9 + paddd %xmm7,%xmm12 + pxor %xmm10,%xmm6 + + movdqa %xmm2,144-128(%rax) + paddd %xmm2,%xmm12 + por %xmm9,%xmm8 + psrld $31,%xmm5 + pand %xmm14,%xmm6 + movdqa %xmm14,%xmm7 + + pslld $30,%xmm7 + paddd %xmm3,%xmm3 + paddd %xmm6,%xmm12 + + psrld $2,%xmm14 + paddd %xmm8,%xmm12 + por %xmm5,%xmm3 + por %xmm7,%xmm14 + pxor %xmm1,%xmm4 + movdqa 208-128(%rax),%xmm1 + + movdqa %xmm12,%xmm8 + movdqa %xmm10,%xmm7 + pxor 48-128(%rax),%xmm4 + pxor %xmm1,%xmm4 + paddd %xmm15,%xmm11 + pslld $5,%xmm8 + movdqa %xmm12,%xmm9 + pand %xmm14,%xmm7 + + movdqa %xmm10,%xmm6 + movdqa %xmm4,%xmm5 + psrld $27,%xmm9 + paddd %xmm7,%xmm11 + pxor %xmm14,%xmm6 + + movdqa %xmm3,160-128(%rax) + paddd %xmm3,%xmm11 + por %xmm9,%xmm8 + psrld $31,%xmm5 + pand %xmm13,%xmm6 + movdqa %xmm13,%xmm7 + + pslld $30,%xmm7 + paddd %xmm4,%xmm4 + paddd %xmm6,%xmm11 + + psrld $2,%xmm13 + paddd %xmm8,%xmm11 + por %xmm5,%xmm4 + por %xmm7,%xmm13 + pxor %xmm2,%xmm0 + movdqa 224-128(%rax),%xmm2 + + movdqa %xmm11,%xmm8 + movdqa %xmm14,%xmm7 + pxor 64-128(%rax),%xmm0 + pxor %xmm2,%xmm0 + paddd %xmm15,%xmm10 + pslld $5,%xmm8 + movdqa %xmm11,%xmm9 + pand %xmm13,%xmm7 + + movdqa %xmm14,%xmm6 + movdqa %xmm0,%xmm5 + psrld $27,%xmm9 + paddd %xmm7,%xmm10 + pxor %xmm13,%xmm6 + + movdqa %xmm4,176-128(%rax) + paddd %xmm4,%xmm10 + por %xmm9,%xmm8 + psrld $31,%xmm5 + pand %xmm12,%xmm6 + movdqa %xmm12,%xmm7 + + pslld $30,%xmm7 + paddd %xmm0,%xmm0 + paddd %xmm6,%xmm10 + + psrld $2,%xmm12 + paddd %xmm8,%xmm10 + por %xmm5,%xmm0 + por %xmm7,%xmm12 + movdqa 64(%rbp),%xmm15 + pxor %xmm3,%xmm1 + movdqa 240-128(%rax),%xmm3 + + movdqa %xmm10,%xmm8 + movdqa %xmm13,%xmm6 + pxor 80-128(%rax),%xmm1 + paddd %xmm15,%xmm14 + pslld $5,%xmm8 + pxor %xmm11,%xmm6 + + movdqa %xmm10,%xmm9 + movdqa %xmm0,192-128(%rax) + paddd %xmm0,%xmm14 + pxor %xmm3,%xmm1 + psrld $27,%xmm9 + pxor %xmm12,%xmm6 + movdqa %xmm11,%xmm7 + + pslld $30,%xmm7 + movdqa %xmm1,%xmm5 + por %xmm9,%xmm8 + psrld $31,%xmm5 + paddd %xmm6,%xmm14 + paddd %xmm1,%xmm1 + + psrld $2,%xmm11 + paddd %xmm8,%xmm14 + por %xmm5,%xmm1 + por %xmm7,%xmm11 + pxor %xmm4,%xmm2 + movdqa 0-128(%rax),%xmm4 + + movdqa %xmm14,%xmm8 + movdqa %xmm12,%xmm6 + pxor 96-128(%rax),%xmm2 + paddd %xmm15,%xmm13 + pslld $5,%xmm8 + pxor %xmm10,%xmm6 + + movdqa %xmm14,%xmm9 + movdqa %xmm1,208-128(%rax) + paddd %xmm1,%xmm13 + pxor %xmm4,%xmm2 + psrld $27,%xmm9 + pxor %xmm11,%xmm6 + movdqa %xmm10,%xmm7 + + pslld $30,%xmm7 + movdqa %xmm2,%xmm5 + por %xmm9,%xmm8 + psrld $31,%xmm5 + paddd %xmm6,%xmm13 + paddd %xmm2,%xmm2 + + psrld $2,%xmm10 + paddd %xmm8,%xmm13 + por %xmm5,%xmm2 + por %xmm7,%xmm10 + pxor %xmm0,%xmm3 + movdqa 16-128(%rax),%xmm0 + + movdqa %xmm13,%xmm8 + movdqa %xmm11,%xmm6 + pxor 112-128(%rax),%xmm3 + paddd %xmm15,%xmm12 + pslld $5,%xmm8 + pxor %xmm14,%xmm6 + + movdqa %xmm13,%xmm9 + movdqa %xmm2,224-128(%rax) + paddd %xmm2,%xmm12 + pxor %xmm0,%xmm3 + psrld $27,%xmm9 + pxor %xmm10,%xmm6 + movdqa %xmm14,%xmm7 + + pslld $30,%xmm7 + movdqa %xmm3,%xmm5 + por %xmm9,%xmm8 + psrld $31,%xmm5 + paddd %xmm6,%xmm12 + paddd %xmm3,%xmm3 + + psrld $2,%xmm14 + paddd %xmm8,%xmm12 + por %xmm5,%xmm3 + por %xmm7,%xmm14 + pxor %xmm1,%xmm4 + movdqa 32-128(%rax),%xmm1 + + movdqa %xmm12,%xmm8 + movdqa %xmm10,%xmm6 + pxor 128-128(%rax),%xmm4 + paddd %xmm15,%xmm11 + pslld $5,%xmm8 + pxor %xmm13,%xmm6 + + movdqa %xmm12,%xmm9 + movdqa %xmm3,240-128(%rax) + paddd %xmm3,%xmm11 + pxor %xmm1,%xmm4 + psrld $27,%xmm9 + pxor %xmm14,%xmm6 + movdqa %xmm13,%xmm7 + + pslld $30,%xmm7 + movdqa %xmm4,%xmm5 + por %xmm9,%xmm8 + psrld $31,%xmm5 + paddd %xmm6,%xmm11 + paddd %xmm4,%xmm4 + + psrld $2,%xmm13 + paddd %xmm8,%xmm11 + por %xmm5,%xmm4 + por %xmm7,%xmm13 + pxor %xmm2,%xmm0 + movdqa 48-128(%rax),%xmm2 + + movdqa %xmm11,%xmm8 + movdqa %xmm14,%xmm6 + pxor 144-128(%rax),%xmm0 + paddd %xmm15,%xmm10 + pslld $5,%xmm8 + pxor %xmm12,%xmm6 + + movdqa %xmm11,%xmm9 + movdqa %xmm4,0-128(%rax) + paddd %xmm4,%xmm10 + pxor %xmm2,%xmm0 + psrld $27,%xmm9 + pxor %xmm13,%xmm6 + movdqa %xmm12,%xmm7 + + pslld $30,%xmm7 + movdqa %xmm0,%xmm5 + por %xmm9,%xmm8 + psrld $31,%xmm5 + paddd %xmm6,%xmm10 + paddd %xmm0,%xmm0 + + psrld $2,%xmm12 + paddd %xmm8,%xmm10 + por %xmm5,%xmm0 + por %xmm7,%xmm12 + pxor %xmm3,%xmm1 + movdqa 64-128(%rax),%xmm3 + + movdqa %xmm10,%xmm8 + movdqa %xmm13,%xmm6 + pxor 160-128(%rax),%xmm1 + paddd %xmm15,%xmm14 + pslld $5,%xmm8 + pxor %xmm11,%xmm6 + + movdqa %xmm10,%xmm9 + movdqa %xmm0,16-128(%rax) + paddd %xmm0,%xmm14 + pxor %xmm3,%xmm1 + psrld $27,%xmm9 + pxor %xmm12,%xmm6 + movdqa %xmm11,%xmm7 + + pslld $30,%xmm7 + movdqa %xmm1,%xmm5 + por %xmm9,%xmm8 + psrld $31,%xmm5 + paddd %xmm6,%xmm14 + paddd %xmm1,%xmm1 + + psrld $2,%xmm11 + paddd %xmm8,%xmm14 + por %xmm5,%xmm1 + por %xmm7,%xmm11 + pxor %xmm4,%xmm2 + movdqa 80-128(%rax),%xmm4 + + movdqa %xmm14,%xmm8 + movdqa %xmm12,%xmm6 + pxor 176-128(%rax),%xmm2 + paddd %xmm15,%xmm13 + pslld $5,%xmm8 + pxor %xmm10,%xmm6 + + movdqa %xmm14,%xmm9 + movdqa %xmm1,32-128(%rax) + paddd %xmm1,%xmm13 + pxor %xmm4,%xmm2 + psrld $27,%xmm9 + pxor %xmm11,%xmm6 + movdqa %xmm10,%xmm7 + + pslld $30,%xmm7 + movdqa %xmm2,%xmm5 + por %xmm9,%xmm8 + psrld $31,%xmm5 + paddd %xmm6,%xmm13 + paddd %xmm2,%xmm2 + + psrld $2,%xmm10 + paddd %xmm8,%xmm13 + por %xmm5,%xmm2 + por %xmm7,%xmm10 + pxor %xmm0,%xmm3 + movdqa 96-128(%rax),%xmm0 + + movdqa %xmm13,%xmm8 + movdqa %xmm11,%xmm6 + pxor 192-128(%rax),%xmm3 + paddd %xmm15,%xmm12 + pslld $5,%xmm8 + pxor %xmm14,%xmm6 + + movdqa %xmm13,%xmm9 + movdqa %xmm2,48-128(%rax) + paddd %xmm2,%xmm12 + pxor %xmm0,%xmm3 + psrld $27,%xmm9 + pxor %xmm10,%xmm6 + movdqa %xmm14,%xmm7 + + pslld $30,%xmm7 + movdqa %xmm3,%xmm5 + por %xmm9,%xmm8 + psrld $31,%xmm5 + paddd %xmm6,%xmm12 + paddd %xmm3,%xmm3 + + psrld $2,%xmm14 + paddd %xmm8,%xmm12 + por %xmm5,%xmm3 + por %xmm7,%xmm14 + pxor %xmm1,%xmm4 + movdqa 112-128(%rax),%xmm1 + + movdqa %xmm12,%xmm8 + movdqa %xmm10,%xmm6 + pxor 208-128(%rax),%xmm4 + paddd %xmm15,%xmm11 + pslld $5,%xmm8 + pxor %xmm13,%xmm6 + + movdqa %xmm12,%xmm9 + movdqa %xmm3,64-128(%rax) + paddd %xmm3,%xmm11 + pxor %xmm1,%xmm4 + psrld $27,%xmm9 + pxor %xmm14,%xmm6 + movdqa %xmm13,%xmm7 + + pslld $30,%xmm7 + movdqa %xmm4,%xmm5 + por %xmm9,%xmm8 + psrld $31,%xmm5 + paddd %xmm6,%xmm11 + paddd %xmm4,%xmm4 + + psrld $2,%xmm13 + paddd %xmm8,%xmm11 + por %xmm5,%xmm4 + por %xmm7,%xmm13 + pxor %xmm2,%xmm0 + movdqa 128-128(%rax),%xmm2 + + movdqa %xmm11,%xmm8 + movdqa %xmm14,%xmm6 + pxor 224-128(%rax),%xmm0 + paddd %xmm15,%xmm10 + pslld $5,%xmm8 + pxor %xmm12,%xmm6 + + movdqa %xmm11,%xmm9 + movdqa %xmm4,80-128(%rax) + paddd %xmm4,%xmm10 + pxor %xmm2,%xmm0 + psrld $27,%xmm9 + pxor %xmm13,%xmm6 + movdqa %xmm12,%xmm7 + + pslld $30,%xmm7 + movdqa %xmm0,%xmm5 + por %xmm9,%xmm8 + psrld $31,%xmm5 + paddd %xmm6,%xmm10 + paddd %xmm0,%xmm0 + + psrld $2,%xmm12 + paddd %xmm8,%xmm10 + por %xmm5,%xmm0 + por %xmm7,%xmm12 + pxor %xmm3,%xmm1 + movdqa 144-128(%rax),%xmm3 + + movdqa %xmm10,%xmm8 + movdqa %xmm13,%xmm6 + pxor 240-128(%rax),%xmm1 + paddd %xmm15,%xmm14 + pslld $5,%xmm8 + pxor %xmm11,%xmm6 + + movdqa %xmm10,%xmm9 + movdqa %xmm0,96-128(%rax) + paddd %xmm0,%xmm14 + pxor %xmm3,%xmm1 + psrld $27,%xmm9 + pxor %xmm12,%xmm6 + movdqa %xmm11,%xmm7 + + pslld $30,%xmm7 + movdqa %xmm1,%xmm5 + por %xmm9,%xmm8 + psrld $31,%xmm5 + paddd %xmm6,%xmm14 + paddd %xmm1,%xmm1 + + psrld $2,%xmm11 + paddd %xmm8,%xmm14 + por %xmm5,%xmm1 + por %xmm7,%xmm11 + pxor %xmm4,%xmm2 + movdqa 160-128(%rax),%xmm4 + + movdqa %xmm14,%xmm8 + movdqa %xmm12,%xmm6 + pxor 0-128(%rax),%xmm2 + paddd %xmm15,%xmm13 + pslld $5,%xmm8 + pxor %xmm10,%xmm6 + + movdqa %xmm14,%xmm9 + movdqa %xmm1,112-128(%rax) + paddd %xmm1,%xmm13 + pxor %xmm4,%xmm2 + psrld $27,%xmm9 + pxor %xmm11,%xmm6 + movdqa %xmm10,%xmm7 + + pslld $30,%xmm7 + movdqa %xmm2,%xmm5 + por %xmm9,%xmm8 + psrld $31,%xmm5 + paddd %xmm6,%xmm13 + paddd %xmm2,%xmm2 + + psrld $2,%xmm10 + paddd %xmm8,%xmm13 + por %xmm5,%xmm2 + por %xmm7,%xmm10 + pxor %xmm0,%xmm3 + movdqa 176-128(%rax),%xmm0 + + movdqa %xmm13,%xmm8 + movdqa %xmm11,%xmm6 + pxor 16-128(%rax),%xmm3 + paddd %xmm15,%xmm12 + pslld $5,%xmm8 + pxor %xmm14,%xmm6 + + movdqa %xmm13,%xmm9 + paddd %xmm2,%xmm12 + pxor %xmm0,%xmm3 + psrld $27,%xmm9 + pxor %xmm10,%xmm6 + movdqa %xmm14,%xmm7 + + pslld $30,%xmm7 + movdqa %xmm3,%xmm5 + por %xmm9,%xmm8 + psrld $31,%xmm5 + paddd %xmm6,%xmm12 + paddd %xmm3,%xmm3 + + psrld $2,%xmm14 + paddd %xmm8,%xmm12 + por %xmm5,%xmm3 + por %xmm7,%xmm14 + pxor %xmm1,%xmm4 + movdqa 192-128(%rax),%xmm1 + + movdqa %xmm12,%xmm8 + movdqa %xmm10,%xmm6 + pxor 32-128(%rax),%xmm4 + paddd %xmm15,%xmm11 + pslld $5,%xmm8 + pxor %xmm13,%xmm6 + + movdqa %xmm12,%xmm9 + paddd %xmm3,%xmm11 + pxor %xmm1,%xmm4 + psrld $27,%xmm9 + pxor %xmm14,%xmm6 + movdqa %xmm13,%xmm7 + + pslld $30,%xmm7 + movdqa %xmm4,%xmm5 + por %xmm9,%xmm8 + psrld $31,%xmm5 + paddd %xmm6,%xmm11 + paddd %xmm4,%xmm4 + + psrld $2,%xmm13 + paddd %xmm8,%xmm11 + por %xmm5,%xmm4 + por %xmm7,%xmm13 + pxor %xmm2,%xmm0 + movdqa 208-128(%rax),%xmm2 + + movdqa %xmm11,%xmm8 + movdqa %xmm14,%xmm6 + pxor 48-128(%rax),%xmm0 + paddd %xmm15,%xmm10 + pslld $5,%xmm8 + pxor %xmm12,%xmm6 + + movdqa %xmm11,%xmm9 + paddd %xmm4,%xmm10 + pxor %xmm2,%xmm0 + psrld $27,%xmm9 + pxor %xmm13,%xmm6 + movdqa %xmm12,%xmm7 + + pslld $30,%xmm7 + movdqa %xmm0,%xmm5 + por %xmm9,%xmm8 + psrld $31,%xmm5 + paddd %xmm6,%xmm10 + paddd %xmm0,%xmm0 + + psrld $2,%xmm12 + paddd %xmm8,%xmm10 + por %xmm5,%xmm0 + por %xmm7,%xmm12 + pxor %xmm3,%xmm1 + movdqa 224-128(%rax),%xmm3 + + movdqa %xmm10,%xmm8 + movdqa %xmm13,%xmm6 + pxor 64-128(%rax),%xmm1 + paddd %xmm15,%xmm14 + pslld $5,%xmm8 + pxor %xmm11,%xmm6 + + movdqa %xmm10,%xmm9 + paddd %xmm0,%xmm14 + pxor %xmm3,%xmm1 + psrld $27,%xmm9 + pxor %xmm12,%xmm6 + movdqa %xmm11,%xmm7 + + pslld $30,%xmm7 + movdqa %xmm1,%xmm5 + por %xmm9,%xmm8 + psrld $31,%xmm5 + paddd %xmm6,%xmm14 + paddd %xmm1,%xmm1 + + psrld $2,%xmm11 + paddd %xmm8,%xmm14 + por %xmm5,%xmm1 + por %xmm7,%xmm11 + pxor %xmm4,%xmm2 + movdqa 240-128(%rax),%xmm4 + + movdqa %xmm14,%xmm8 + movdqa %xmm12,%xmm6 + pxor 80-128(%rax),%xmm2 + paddd %xmm15,%xmm13 + pslld $5,%xmm8 + pxor %xmm10,%xmm6 + + movdqa %xmm14,%xmm9 + paddd %xmm1,%xmm13 + pxor %xmm4,%xmm2 + psrld $27,%xmm9 + pxor %xmm11,%xmm6 + movdqa %xmm10,%xmm7 + + pslld $30,%xmm7 + movdqa %xmm2,%xmm5 + por %xmm9,%xmm8 + psrld $31,%xmm5 + paddd %xmm6,%xmm13 + paddd %xmm2,%xmm2 + + psrld $2,%xmm10 + paddd %xmm8,%xmm13 + por %xmm5,%xmm2 + por %xmm7,%xmm10 + pxor %xmm0,%xmm3 + movdqa 0-128(%rax),%xmm0 + + movdqa %xmm13,%xmm8 + movdqa %xmm11,%xmm6 + pxor 96-128(%rax),%xmm3 + paddd %xmm15,%xmm12 + pslld $5,%xmm8 + pxor %xmm14,%xmm6 + + movdqa %xmm13,%xmm9 + paddd %xmm2,%xmm12 + pxor %xmm0,%xmm3 + psrld $27,%xmm9 + pxor %xmm10,%xmm6 + movdqa %xmm14,%xmm7 + + pslld $30,%xmm7 + movdqa %xmm3,%xmm5 + por %xmm9,%xmm8 + psrld $31,%xmm5 + paddd %xmm6,%xmm12 + paddd %xmm3,%xmm3 + + psrld $2,%xmm14 + paddd %xmm8,%xmm12 + por %xmm5,%xmm3 + por %xmm7,%xmm14 + pxor %xmm1,%xmm4 + movdqa 16-128(%rax),%xmm1 + + movdqa %xmm12,%xmm8 + movdqa %xmm10,%xmm6 + pxor 112-128(%rax),%xmm4 + paddd %xmm15,%xmm11 + pslld $5,%xmm8 + pxor %xmm13,%xmm6 + + movdqa %xmm12,%xmm9 + paddd %xmm3,%xmm11 + pxor %xmm1,%xmm4 + psrld $27,%xmm9 + pxor %xmm14,%xmm6 + movdqa %xmm13,%xmm7 + + pslld $30,%xmm7 + movdqa %xmm4,%xmm5 + por %xmm9,%xmm8 + psrld $31,%xmm5 + paddd %xmm6,%xmm11 + paddd %xmm4,%xmm4 + + psrld $2,%xmm13 + paddd %xmm8,%xmm11 + por %xmm5,%xmm4 + por %xmm7,%xmm13 + movdqa %xmm11,%xmm8 + paddd %xmm15,%xmm10 + movdqa %xmm14,%xmm6 + pslld $5,%xmm8 + pxor %xmm12,%xmm6 + + movdqa %xmm11,%xmm9 + paddd %xmm4,%xmm10 + psrld $27,%xmm9 + movdqa %xmm12,%xmm7 + pxor %xmm13,%xmm6 + + pslld $30,%xmm7 + por %xmm9,%xmm8 + paddd %xmm6,%xmm10 + + psrld $2,%xmm12 + paddd %xmm8,%xmm10 + por %xmm7,%xmm12 + movdqa (%rbx),%xmm0 + movl $1,%ecx + cmpl 0(%rbx),%ecx + pxor %xmm8,%xmm8 + cmovgeq %rbp,%r8 + cmpl 4(%rbx),%ecx + movdqa %xmm0,%xmm1 + cmovgeq %rbp,%r9 + cmpl 8(%rbx),%ecx + pcmpgtd %xmm8,%xmm1 + cmovgeq %rbp,%r10 + cmpl 12(%rbx),%ecx + paddd %xmm1,%xmm0 + cmovgeq %rbp,%r11 + + movdqu 0(%rdi),%xmm6 + pand %xmm1,%xmm10 + movdqu 32(%rdi),%xmm7 + pand %xmm1,%xmm11 + paddd %xmm6,%xmm10 + movdqu 64(%rdi),%xmm8 + pand %xmm1,%xmm12 + paddd %xmm7,%xmm11 + movdqu 96(%rdi),%xmm9 + pand %xmm1,%xmm13 + paddd %xmm8,%xmm12 + movdqu 128(%rdi),%xmm5 + pand %xmm1,%xmm14 + movdqu %xmm10,0(%rdi) + paddd %xmm9,%xmm13 + movdqu %xmm11,32(%rdi) + paddd %xmm5,%xmm14 + movdqu %xmm12,64(%rdi) + movdqu %xmm13,96(%rdi) + movdqu %xmm14,128(%rdi) + + movdqa %xmm0,(%rbx) + movdqa 96(%rbp),%xmm5 + movdqa -32(%rbp),%xmm15 + decl %edx + jnz .Loop + + movl 280(%rsp),%edx + leaq 16(%rdi),%rdi + leaq 64(%rsi),%rsi + decl %edx + jnz .Loop_grande + +.Ldone: + movq 272(%rsp),%rax + movq -16(%rax),%rbp + movq -8(%rax),%rbx + leaq (%rax),%rsp +.Lepilogue: + .byte 0xf3,0xc3 +.size sha1_multi_block,.-sha1_multi_block +.type sha1_multi_block_shaext,@function +.align 32 +sha1_multi_block_shaext: +_shaext_shortcut: + movq %rsp,%rax + pushq %rbx + pushq %rbp + subq $288,%rsp + shll $1,%edx + andq $-256,%rsp + leaq 64(%rdi),%rdi + movq %rax,272(%rsp) +.Lbody_shaext: + leaq 256(%rsp),%rbx + movdqa K_XX_XX+128(%rip),%xmm3 + +.Loop_grande_shaext: + movl %edx,280(%rsp) + xorl %edx,%edx + movq 0(%rsi),%r8 + movl 8(%rsi),%ecx + cmpl %edx,%ecx + cmovgl %ecx,%edx + testl %ecx,%ecx + movl %ecx,0(%rbx) + cmovleq %rsp,%r8 + movq 16(%rsi),%r9 + movl 24(%rsi),%ecx + cmpl %edx,%ecx + cmovgl %ecx,%edx + testl %ecx,%ecx + movl %ecx,4(%rbx) + cmovleq %rsp,%r9 + testl %edx,%edx + jz .Ldone_shaext + + movq 0-64(%rdi),%xmm0 + movq 32-64(%rdi),%xmm4 + movq 64-64(%rdi),%xmm5 + movq 96-64(%rdi),%xmm6 + movq 128-64(%rdi),%xmm7 + + punpckldq %xmm4,%xmm0 + punpckldq %xmm6,%xmm5 + + movdqa %xmm0,%xmm8 + punpcklqdq %xmm5,%xmm0 + punpckhqdq %xmm5,%xmm8 + + pshufd $63,%xmm7,%xmm1 + pshufd $127,%xmm7,%xmm9 + pshufd $27,%xmm0,%xmm0 + pshufd $27,%xmm8,%xmm8 + jmp .Loop_shaext + +.align 32 +.Loop_shaext: + movdqu 0(%r8),%xmm4 + movdqu 0(%r9),%xmm11 + movdqu 16(%r8),%xmm5 + movdqu 16(%r9),%xmm12 + movdqu 32(%r8),%xmm6 +.byte 102,15,56,0,227 + movdqu 32(%r9),%xmm13 +.byte 102,68,15,56,0,219 + movdqu 48(%r8),%xmm7 + leaq 64(%r8),%r8 +.byte 102,15,56,0,235 + movdqu 48(%r9),%xmm14 + leaq 64(%r9),%r9 +.byte 102,68,15,56,0,227 + + movdqa %xmm1,80(%rsp) + paddd %xmm4,%xmm1 + movdqa %xmm9,112(%rsp) + paddd %xmm11,%xmm9 + movdqa %xmm0,64(%rsp) + movdqa %xmm0,%xmm2 + movdqa %xmm8,96(%rsp) + movdqa %xmm8,%xmm10 +.byte 15,58,204,193,0 +.byte 15,56,200,213 +.byte 69,15,58,204,193,0 +.byte 69,15,56,200,212 +.byte 102,15,56,0,243 + prefetcht0 127(%r8) +.byte 15,56,201,229 +.byte 102,68,15,56,0,235 + prefetcht0 127(%r9) +.byte 69,15,56,201,220 + +.byte 102,15,56,0,251 + movdqa %xmm0,%xmm1 +.byte 102,68,15,56,0,243 + movdqa %xmm8,%xmm9 +.byte 15,58,204,194,0 +.byte 15,56,200,206 +.byte 69,15,58,204,194,0 +.byte 69,15,56,200,205 + pxor %xmm6,%xmm4 +.byte 15,56,201,238 + pxor %xmm13,%xmm11 +.byte 69,15,56,201,229 + movdqa %xmm0,%xmm2 + movdqa %xmm8,%xmm10 +.byte 15,58,204,193,0 +.byte 15,56,200,215 +.byte 69,15,58,204,193,0 +.byte 69,15,56,200,214 +.byte 15,56,202,231 +.byte 69,15,56,202,222 + pxor %xmm7,%xmm5 +.byte 15,56,201,247 + pxor %xmm14,%xmm12 +.byte 69,15,56,201,238 + movdqa %xmm0,%xmm1 + movdqa %xmm8,%xmm9 +.byte 15,58,204,194,0 +.byte 15,56,200,204 +.byte 69,15,58,204,194,0 +.byte 69,15,56,200,203 +.byte 15,56,202,236 +.byte 69,15,56,202,227 + pxor %xmm4,%xmm6 +.byte 15,56,201,252 + pxor %xmm11,%xmm13 +.byte 69,15,56,201,243 + movdqa %xmm0,%xmm2 + movdqa %xmm8,%xmm10 +.byte 15,58,204,193,0 +.byte 15,56,200,213 +.byte 69,15,58,204,193,0 +.byte 69,15,56,200,212 +.byte 15,56,202,245 +.byte 69,15,56,202,236 + pxor %xmm5,%xmm7 +.byte 15,56,201,229 + pxor %xmm12,%xmm14 +.byte 69,15,56,201,220 + movdqa %xmm0,%xmm1 + movdqa %xmm8,%xmm9 +.byte 15,58,204,194,1 +.byte 15,56,200,206 +.byte 69,15,58,204,194,1 +.byte 69,15,56,200,205 +.byte 15,56,202,254 +.byte 69,15,56,202,245 + pxor %xmm6,%xmm4 +.byte 15,56,201,238 + pxor %xmm13,%xmm11 +.byte 69,15,56,201,229 + movdqa %xmm0,%xmm2 + movdqa %xmm8,%xmm10 +.byte 15,58,204,193,1 +.byte 15,56,200,215 +.byte 69,15,58,204,193,1 +.byte 69,15,56,200,214 +.byte 15,56,202,231 +.byte 69,15,56,202,222 + pxor %xmm7,%xmm5 +.byte 15,56,201,247 + pxor %xmm14,%xmm12 +.byte 69,15,56,201,238 + movdqa %xmm0,%xmm1 + movdqa %xmm8,%xmm9 +.byte 15,58,204,194,1 +.byte 15,56,200,204 +.byte 69,15,58,204,194,1 +.byte 69,15,56,200,203 +.byte 15,56,202,236 +.byte 69,15,56,202,227 + pxor %xmm4,%xmm6 +.byte 15,56,201,252 + pxor %xmm11,%xmm13 +.byte 69,15,56,201,243 + movdqa %xmm0,%xmm2 + movdqa %xmm8,%xmm10 +.byte 15,58,204,193,1 +.byte 15,56,200,213 +.byte 69,15,58,204,193,1 +.byte 69,15,56,200,212 +.byte 15,56,202,245 +.byte 69,15,56,202,236 + pxor %xmm5,%xmm7 +.byte 15,56,201,229 + pxor %xmm12,%xmm14 +.byte 69,15,56,201,220 + movdqa %xmm0,%xmm1 + movdqa %xmm8,%xmm9 +.byte 15,58,204,194,1 +.byte 15,56,200,206 +.byte 69,15,58,204,194,1 +.byte 69,15,56,200,205 +.byte 15,56,202,254 +.byte 69,15,56,202,245 + pxor %xmm6,%xmm4 +.byte 15,56,201,238 + pxor %xmm13,%xmm11 +.byte 69,15,56,201,229 + movdqa %xmm0,%xmm2 + movdqa %xmm8,%xmm10 +.byte 15,58,204,193,2 +.byte 15,56,200,215 +.byte 69,15,58,204,193,2 +.byte 69,15,56,200,214 +.byte 15,56,202,231 +.byte 69,15,56,202,222 + pxor %xmm7,%xmm5 +.byte 15,56,201,247 + pxor %xmm14,%xmm12 +.byte 69,15,56,201,238 + movdqa %xmm0,%xmm1 + movdqa %xmm8,%xmm9 +.byte 15,58,204,194,2 +.byte 15,56,200,204 +.byte 69,15,58,204,194,2 +.byte 69,15,56,200,203 +.byte 15,56,202,236 +.byte 69,15,56,202,227 + pxor %xmm4,%xmm6 +.byte 15,56,201,252 + pxor %xmm11,%xmm13 +.byte 69,15,56,201,243 + movdqa %xmm0,%xmm2 + movdqa %xmm8,%xmm10 +.byte 15,58,204,193,2 +.byte 15,56,200,213 +.byte 69,15,58,204,193,2 +.byte 69,15,56,200,212 +.byte 15,56,202,245 +.byte 69,15,56,202,236 + pxor %xmm5,%xmm7 +.byte 15,56,201,229 + pxor %xmm12,%xmm14 +.byte 69,15,56,201,220 + movdqa %xmm0,%xmm1 + movdqa %xmm8,%xmm9 +.byte 15,58,204,194,2 +.byte 15,56,200,206 +.byte 69,15,58,204,194,2 +.byte 69,15,56,200,205 +.byte 15,56,202,254 +.byte 69,15,56,202,245 + pxor %xmm6,%xmm4 +.byte 15,56,201,238 + pxor %xmm13,%xmm11 +.byte 69,15,56,201,229 + movdqa %xmm0,%xmm2 + movdqa %xmm8,%xmm10 +.byte 15,58,204,193,2 +.byte 15,56,200,215 +.byte 69,15,58,204,193,2 +.byte 69,15,56,200,214 +.byte 15,56,202,231 +.byte 69,15,56,202,222 + pxor %xmm7,%xmm5 +.byte 15,56,201,247 + pxor %xmm14,%xmm12 +.byte 69,15,56,201,238 + movdqa %xmm0,%xmm1 + movdqa %xmm8,%xmm9 +.byte 15,58,204,194,3 +.byte 15,56,200,204 +.byte 69,15,58,204,194,3 +.byte 69,15,56,200,203 +.byte 15,56,202,236 +.byte 69,15,56,202,227 + pxor %xmm4,%xmm6 +.byte 15,56,201,252 + pxor %xmm11,%xmm13 +.byte 69,15,56,201,243 + movdqa %xmm0,%xmm2 + movdqa %xmm8,%xmm10 +.byte 15,58,204,193,3 +.byte 15,56,200,213 +.byte 69,15,58,204,193,3 +.byte 69,15,56,200,212 +.byte 15,56,202,245 +.byte 69,15,56,202,236 + pxor %xmm5,%xmm7 + pxor %xmm12,%xmm14 + + movl $1,%ecx + pxor %xmm4,%xmm4 + cmpl 0(%rbx),%ecx + cmovgeq %rsp,%r8 + + movdqa %xmm0,%xmm1 + movdqa %xmm8,%xmm9 +.byte 15,58,204,194,3 +.byte 15,56,200,206 +.byte 69,15,58,204,194,3 +.byte 69,15,56,200,205 +.byte 15,56,202,254 +.byte 69,15,56,202,245 + + cmpl 4(%rbx),%ecx + cmovgeq %rsp,%r9 + movq (%rbx),%xmm6 + + movdqa %xmm0,%xmm2 + movdqa %xmm8,%xmm10 +.byte 15,58,204,193,3 +.byte 15,56,200,215 +.byte 69,15,58,204,193,3 +.byte 69,15,56,200,214 + + pshufd $0,%xmm6,%xmm11 + pshufd $85,%xmm6,%xmm12 + movdqa %xmm6,%xmm7 + pcmpgtd %xmm4,%xmm11 + pcmpgtd %xmm4,%xmm12 + + movdqa %xmm0,%xmm1 + movdqa %xmm8,%xmm9 +.byte 15,58,204,194,3 +.byte 15,56,200,204 +.byte 69,15,58,204,194,3 +.byte 68,15,56,200,204 + + pcmpgtd %xmm4,%xmm7 + pand %xmm11,%xmm0 + pand %xmm11,%xmm1 + pand %xmm12,%xmm8 + pand %xmm12,%xmm9 + paddd %xmm7,%xmm6 + + paddd 64(%rsp),%xmm0 + paddd 80(%rsp),%xmm1 + paddd 96(%rsp),%xmm8 + paddd 112(%rsp),%xmm9 + + movq %xmm6,(%rbx) + decl %edx + jnz .Loop_shaext + + movl 280(%rsp),%edx + + pshufd $27,%xmm0,%xmm0 + pshufd $27,%xmm8,%xmm8 + + movdqa %xmm0,%xmm6 + punpckldq %xmm8,%xmm0 + punpckhdq %xmm8,%xmm6 + punpckhdq %xmm9,%xmm1 + movq %xmm0,0-64(%rdi) + psrldq $8,%xmm0 + movq %xmm6,64-64(%rdi) + psrldq $8,%xmm6 + movq %xmm0,32-64(%rdi) + psrldq $8,%xmm1 + movq %xmm6,96-64(%rdi) + movq %xmm1,128-64(%rdi) + + leaq 8(%rdi),%rdi + leaq 32(%rsi),%rsi + decl %edx + jnz .Loop_grande_shaext + +.Ldone_shaext: + + movq -16(%rax),%rbp + movq -8(%rax),%rbx + leaq (%rax),%rsp +.Lepilogue_shaext: + .byte 0xf3,0xc3 +.size sha1_multi_block_shaext,.-sha1_multi_block_shaext +.type sha1_multi_block_avx,@function +.align 32 +sha1_multi_block_avx: +_avx_shortcut: + shrq $32,%rcx + cmpl $2,%edx + jb .Lavx + testl $32,%ecx + jnz _avx2_shortcut + jmp .Lavx +.align 32 +.Lavx: + movq %rsp,%rax + pushq %rbx + pushq %rbp + subq $288,%rsp + andq $-256,%rsp + movq %rax,272(%rsp) +.Lbody_avx: + leaq K_XX_XX(%rip),%rbp + leaq 256(%rsp),%rbx + + vzeroupper +.Loop_grande_avx: + movl %edx,280(%rsp) + xorl %edx,%edx + movq 0(%rsi),%r8 + movl 8(%rsi),%ecx + cmpl %edx,%ecx + cmovgl %ecx,%edx + testl %ecx,%ecx + movl %ecx,0(%rbx) + cmovleq %rbp,%r8 + movq 16(%rsi),%r9 + movl 24(%rsi),%ecx + cmpl %edx,%ecx + cmovgl %ecx,%edx + testl %ecx,%ecx + movl %ecx,4(%rbx) + cmovleq %rbp,%r9 + movq 32(%rsi),%r10 + movl 40(%rsi),%ecx + cmpl %edx,%ecx + cmovgl %ecx,%edx + testl %ecx,%ecx + movl %ecx,8(%rbx) + cmovleq %rbp,%r10 + movq 48(%rsi),%r11 + movl 56(%rsi),%ecx + cmpl %edx,%ecx + cmovgl %ecx,%edx + testl %ecx,%ecx + movl %ecx,12(%rbx) + cmovleq %rbp,%r11 + testl %edx,%edx + jz .Ldone_avx + + vmovdqu 0(%rdi),%xmm10 + leaq 128(%rsp),%rax + vmovdqu 32(%rdi),%xmm11 + vmovdqu 64(%rdi),%xmm12 + vmovdqu 96(%rdi),%xmm13 + vmovdqu 128(%rdi),%xmm14 + vmovdqu 96(%rbp),%xmm5 + jmp .Loop_avx + +.align 32 +.Loop_avx: + vmovdqa -32(%rbp),%xmm15 + vmovd (%r8),%xmm0 + leaq 64(%r8),%r8 + vmovd (%r9),%xmm2 + leaq 64(%r9),%r9 + vpinsrd $1,(%r10),%xmm0,%xmm0 + leaq 64(%r10),%r10 + vpinsrd $1,(%r11),%xmm2,%xmm2 + leaq 64(%r11),%r11 + vmovd -60(%r8),%xmm1 + vpunpckldq %xmm2,%xmm0,%xmm0 + vmovd -60(%r9),%xmm9 + vpshufb %xmm5,%xmm0,%xmm0 + vpinsrd $1,-60(%r10),%xmm1,%xmm1 + vpinsrd $1,-60(%r11),%xmm9,%xmm9 + vpaddd %xmm15,%xmm14,%xmm14 + vpslld $5,%xmm10,%xmm8 + vpandn %xmm13,%xmm11,%xmm7 + vpand %xmm12,%xmm11,%xmm6 + + vmovdqa %xmm0,0-128(%rax) + vpaddd %xmm0,%xmm14,%xmm14 + vpunpckldq %xmm9,%xmm1,%xmm1 + vpsrld $27,%xmm10,%xmm9 + vpxor %xmm7,%xmm6,%xmm6 + vmovd -56(%r8),%xmm2 + + vpslld $30,%xmm11,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vmovd -56(%r9),%xmm9 + vpaddd %xmm6,%xmm14,%xmm14 + + vpsrld $2,%xmm11,%xmm11 + vpaddd %xmm8,%xmm14,%xmm14 + vpshufb %xmm5,%xmm1,%xmm1 + vpor %xmm7,%xmm11,%xmm11 + vpinsrd $1,-56(%r10),%xmm2,%xmm2 + vpinsrd $1,-56(%r11),%xmm9,%xmm9 + vpaddd %xmm15,%xmm13,%xmm13 + vpslld $5,%xmm14,%xmm8 + vpandn %xmm12,%xmm10,%xmm7 + vpand %xmm11,%xmm10,%xmm6 + + vmovdqa %xmm1,16-128(%rax) + vpaddd %xmm1,%xmm13,%xmm13 + vpunpckldq %xmm9,%xmm2,%xmm2 + vpsrld $27,%xmm14,%xmm9 + vpxor %xmm7,%xmm6,%xmm6 + vmovd -52(%r8),%xmm3 + + vpslld $30,%xmm10,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vmovd -52(%r9),%xmm9 + vpaddd %xmm6,%xmm13,%xmm13 + + vpsrld $2,%xmm10,%xmm10 + vpaddd %xmm8,%xmm13,%xmm13 + vpshufb %xmm5,%xmm2,%xmm2 + vpor %xmm7,%xmm10,%xmm10 + vpinsrd $1,-52(%r10),%xmm3,%xmm3 + vpinsrd $1,-52(%r11),%xmm9,%xmm9 + vpaddd %xmm15,%xmm12,%xmm12 + vpslld $5,%xmm13,%xmm8 + vpandn %xmm11,%xmm14,%xmm7 + vpand %xmm10,%xmm14,%xmm6 + + vmovdqa %xmm2,32-128(%rax) + vpaddd %xmm2,%xmm12,%xmm12 + vpunpckldq %xmm9,%xmm3,%xmm3 + vpsrld $27,%xmm13,%xmm9 + vpxor %xmm7,%xmm6,%xmm6 + vmovd -48(%r8),%xmm4 + + vpslld $30,%xmm14,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vmovd -48(%r9),%xmm9 + vpaddd %xmm6,%xmm12,%xmm12 + + vpsrld $2,%xmm14,%xmm14 + vpaddd %xmm8,%xmm12,%xmm12 + vpshufb %xmm5,%xmm3,%xmm3 + vpor %xmm7,%xmm14,%xmm14 + vpinsrd $1,-48(%r10),%xmm4,%xmm4 + vpinsrd $1,-48(%r11),%xmm9,%xmm9 + vpaddd %xmm15,%xmm11,%xmm11 + vpslld $5,%xmm12,%xmm8 + vpandn %xmm10,%xmm13,%xmm7 + vpand %xmm14,%xmm13,%xmm6 + + vmovdqa %xmm3,48-128(%rax) + vpaddd %xmm3,%xmm11,%xmm11 + vpunpckldq %xmm9,%xmm4,%xmm4 + vpsrld $27,%xmm12,%xmm9 + vpxor %xmm7,%xmm6,%xmm6 + vmovd -44(%r8),%xmm0 + + vpslld $30,%xmm13,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vmovd -44(%r9),%xmm9 + vpaddd %xmm6,%xmm11,%xmm11 + + vpsrld $2,%xmm13,%xmm13 + vpaddd %xmm8,%xmm11,%xmm11 + vpshufb %xmm5,%xmm4,%xmm4 + vpor %xmm7,%xmm13,%xmm13 + vpinsrd $1,-44(%r10),%xmm0,%xmm0 + vpinsrd $1,-44(%r11),%xmm9,%xmm9 + vpaddd %xmm15,%xmm10,%xmm10 + vpslld $5,%xmm11,%xmm8 + vpandn %xmm14,%xmm12,%xmm7 + vpand %xmm13,%xmm12,%xmm6 + + vmovdqa %xmm4,64-128(%rax) + vpaddd %xmm4,%xmm10,%xmm10 + vpunpckldq %xmm9,%xmm0,%xmm0 + vpsrld $27,%xmm11,%xmm9 + vpxor %xmm7,%xmm6,%xmm6 + vmovd -40(%r8),%xmm1 + + vpslld $30,%xmm12,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vmovd -40(%r9),%xmm9 + vpaddd %xmm6,%xmm10,%xmm10 + + vpsrld $2,%xmm12,%xmm12 + vpaddd %xmm8,%xmm10,%xmm10 + vpshufb %xmm5,%xmm0,%xmm0 + vpor %xmm7,%xmm12,%xmm12 + vpinsrd $1,-40(%r10),%xmm1,%xmm1 + vpinsrd $1,-40(%r11),%xmm9,%xmm9 + vpaddd %xmm15,%xmm14,%xmm14 + vpslld $5,%xmm10,%xmm8 + vpandn %xmm13,%xmm11,%xmm7 + vpand %xmm12,%xmm11,%xmm6 + + vmovdqa %xmm0,80-128(%rax) + vpaddd %xmm0,%xmm14,%xmm14 + vpunpckldq %xmm9,%xmm1,%xmm1 + vpsrld $27,%xmm10,%xmm9 + vpxor %xmm7,%xmm6,%xmm6 + vmovd -36(%r8),%xmm2 + + vpslld $30,%xmm11,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vmovd -36(%r9),%xmm9 + vpaddd %xmm6,%xmm14,%xmm14 + + vpsrld $2,%xmm11,%xmm11 + vpaddd %xmm8,%xmm14,%xmm14 + vpshufb %xmm5,%xmm1,%xmm1 + vpor %xmm7,%xmm11,%xmm11 + vpinsrd $1,-36(%r10),%xmm2,%xmm2 + vpinsrd $1,-36(%r11),%xmm9,%xmm9 + vpaddd %xmm15,%xmm13,%xmm13 + vpslld $5,%xmm14,%xmm8 + vpandn %xmm12,%xmm10,%xmm7 + vpand %xmm11,%xmm10,%xmm6 + + vmovdqa %xmm1,96-128(%rax) + vpaddd %xmm1,%xmm13,%xmm13 + vpunpckldq %xmm9,%xmm2,%xmm2 + vpsrld $27,%xmm14,%xmm9 + vpxor %xmm7,%xmm6,%xmm6 + vmovd -32(%r8),%xmm3 + + vpslld $30,%xmm10,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vmovd -32(%r9),%xmm9 + vpaddd %xmm6,%xmm13,%xmm13 + + vpsrld $2,%xmm10,%xmm10 + vpaddd %xmm8,%xmm13,%xmm13 + vpshufb %xmm5,%xmm2,%xmm2 + vpor %xmm7,%xmm10,%xmm10 + vpinsrd $1,-32(%r10),%xmm3,%xmm3 + vpinsrd $1,-32(%r11),%xmm9,%xmm9 + vpaddd %xmm15,%xmm12,%xmm12 + vpslld $5,%xmm13,%xmm8 + vpandn %xmm11,%xmm14,%xmm7 + vpand %xmm10,%xmm14,%xmm6 + + vmovdqa %xmm2,112-128(%rax) + vpaddd %xmm2,%xmm12,%xmm12 + vpunpckldq %xmm9,%xmm3,%xmm3 + vpsrld $27,%xmm13,%xmm9 + vpxor %xmm7,%xmm6,%xmm6 + vmovd -28(%r8),%xmm4 + + vpslld $30,%xmm14,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vmovd -28(%r9),%xmm9 + vpaddd %xmm6,%xmm12,%xmm12 + + vpsrld $2,%xmm14,%xmm14 + vpaddd %xmm8,%xmm12,%xmm12 + vpshufb %xmm5,%xmm3,%xmm3 + vpor %xmm7,%xmm14,%xmm14 + vpinsrd $1,-28(%r10),%xmm4,%xmm4 + vpinsrd $1,-28(%r11),%xmm9,%xmm9 + vpaddd %xmm15,%xmm11,%xmm11 + vpslld $5,%xmm12,%xmm8 + vpandn %xmm10,%xmm13,%xmm7 + vpand %xmm14,%xmm13,%xmm6 + + vmovdqa %xmm3,128-128(%rax) + vpaddd %xmm3,%xmm11,%xmm11 + vpunpckldq %xmm9,%xmm4,%xmm4 + vpsrld $27,%xmm12,%xmm9 + vpxor %xmm7,%xmm6,%xmm6 + vmovd -24(%r8),%xmm0 + + vpslld $30,%xmm13,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vmovd -24(%r9),%xmm9 + vpaddd %xmm6,%xmm11,%xmm11 + + vpsrld $2,%xmm13,%xmm13 + vpaddd %xmm8,%xmm11,%xmm11 + vpshufb %xmm5,%xmm4,%xmm4 + vpor %xmm7,%xmm13,%xmm13 + vpinsrd $1,-24(%r10),%xmm0,%xmm0 + vpinsrd $1,-24(%r11),%xmm9,%xmm9 + vpaddd %xmm15,%xmm10,%xmm10 + vpslld $5,%xmm11,%xmm8 + vpandn %xmm14,%xmm12,%xmm7 + vpand %xmm13,%xmm12,%xmm6 + + vmovdqa %xmm4,144-128(%rax) + vpaddd %xmm4,%xmm10,%xmm10 + vpunpckldq %xmm9,%xmm0,%xmm0 + vpsrld $27,%xmm11,%xmm9 + vpxor %xmm7,%xmm6,%xmm6 + vmovd -20(%r8),%xmm1 + + vpslld $30,%xmm12,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vmovd -20(%r9),%xmm9 + vpaddd %xmm6,%xmm10,%xmm10 + + vpsrld $2,%xmm12,%xmm12 + vpaddd %xmm8,%xmm10,%xmm10 + vpshufb %xmm5,%xmm0,%xmm0 + vpor %xmm7,%xmm12,%xmm12 + vpinsrd $1,-20(%r10),%xmm1,%xmm1 + vpinsrd $1,-20(%r11),%xmm9,%xmm9 + vpaddd %xmm15,%xmm14,%xmm14 + vpslld $5,%xmm10,%xmm8 + vpandn %xmm13,%xmm11,%xmm7 + vpand %xmm12,%xmm11,%xmm6 + + vmovdqa %xmm0,160-128(%rax) + vpaddd %xmm0,%xmm14,%xmm14 + vpunpckldq %xmm9,%xmm1,%xmm1 + vpsrld $27,%xmm10,%xmm9 + vpxor %xmm7,%xmm6,%xmm6 + vmovd -16(%r8),%xmm2 + + vpslld $30,%xmm11,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vmovd -16(%r9),%xmm9 + vpaddd %xmm6,%xmm14,%xmm14 + + vpsrld $2,%xmm11,%xmm11 + vpaddd %xmm8,%xmm14,%xmm14 + vpshufb %xmm5,%xmm1,%xmm1 + vpor %xmm7,%xmm11,%xmm11 + vpinsrd $1,-16(%r10),%xmm2,%xmm2 + vpinsrd $1,-16(%r11),%xmm9,%xmm9 + vpaddd %xmm15,%xmm13,%xmm13 + vpslld $5,%xmm14,%xmm8 + vpandn %xmm12,%xmm10,%xmm7 + vpand %xmm11,%xmm10,%xmm6 + + vmovdqa %xmm1,176-128(%rax) + vpaddd %xmm1,%xmm13,%xmm13 + vpunpckldq %xmm9,%xmm2,%xmm2 + vpsrld $27,%xmm14,%xmm9 + vpxor %xmm7,%xmm6,%xmm6 + vmovd -12(%r8),%xmm3 + + vpslld $30,%xmm10,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vmovd -12(%r9),%xmm9 + vpaddd %xmm6,%xmm13,%xmm13 + + vpsrld $2,%xmm10,%xmm10 + vpaddd %xmm8,%xmm13,%xmm13 + vpshufb %xmm5,%xmm2,%xmm2 + vpor %xmm7,%xmm10,%xmm10 + vpinsrd $1,-12(%r10),%xmm3,%xmm3 + vpinsrd $1,-12(%r11),%xmm9,%xmm9 + vpaddd %xmm15,%xmm12,%xmm12 + vpslld $5,%xmm13,%xmm8 + vpandn %xmm11,%xmm14,%xmm7 + vpand %xmm10,%xmm14,%xmm6 + + vmovdqa %xmm2,192-128(%rax) + vpaddd %xmm2,%xmm12,%xmm12 + vpunpckldq %xmm9,%xmm3,%xmm3 + vpsrld $27,%xmm13,%xmm9 + vpxor %xmm7,%xmm6,%xmm6 + vmovd -8(%r8),%xmm4 + + vpslld $30,%xmm14,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vmovd -8(%r9),%xmm9 + vpaddd %xmm6,%xmm12,%xmm12 + + vpsrld $2,%xmm14,%xmm14 + vpaddd %xmm8,%xmm12,%xmm12 + vpshufb %xmm5,%xmm3,%xmm3 + vpor %xmm7,%xmm14,%xmm14 + vpinsrd $1,-8(%r10),%xmm4,%xmm4 + vpinsrd $1,-8(%r11),%xmm9,%xmm9 + vpaddd %xmm15,%xmm11,%xmm11 + vpslld $5,%xmm12,%xmm8 + vpandn %xmm10,%xmm13,%xmm7 + vpand %xmm14,%xmm13,%xmm6 + + vmovdqa %xmm3,208-128(%rax) + vpaddd %xmm3,%xmm11,%xmm11 + vpunpckldq %xmm9,%xmm4,%xmm4 + vpsrld $27,%xmm12,%xmm9 + vpxor %xmm7,%xmm6,%xmm6 + vmovd -4(%r8),%xmm0 + + vpslld $30,%xmm13,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vmovd -4(%r9),%xmm9 + vpaddd %xmm6,%xmm11,%xmm11 + + vpsrld $2,%xmm13,%xmm13 + vpaddd %xmm8,%xmm11,%xmm11 + vpshufb %xmm5,%xmm4,%xmm4 + vpor %xmm7,%xmm13,%xmm13 + vmovdqa 0-128(%rax),%xmm1 + vpinsrd $1,-4(%r10),%xmm0,%xmm0 + vpinsrd $1,-4(%r11),%xmm9,%xmm9 + vpaddd %xmm15,%xmm10,%xmm10 + prefetcht0 63(%r8) + vpslld $5,%xmm11,%xmm8 + vpandn %xmm14,%xmm12,%xmm7 + vpand %xmm13,%xmm12,%xmm6 + + vmovdqa %xmm4,224-128(%rax) + vpaddd %xmm4,%xmm10,%xmm10 + vpunpckldq %xmm9,%xmm0,%xmm0 + vpsrld $27,%xmm11,%xmm9 + prefetcht0 63(%r9) + vpxor %xmm7,%xmm6,%xmm6 + + vpslld $30,%xmm12,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + prefetcht0 63(%r10) + vpaddd %xmm6,%xmm10,%xmm10 + + vpsrld $2,%xmm12,%xmm12 + vpaddd %xmm8,%xmm10,%xmm10 + prefetcht0 63(%r11) + vpshufb %xmm5,%xmm0,%xmm0 + vpor %xmm7,%xmm12,%xmm12 + vmovdqa 16-128(%rax),%xmm2 + vpxor %xmm3,%xmm1,%xmm1 + vmovdqa 32-128(%rax),%xmm3 + + vpaddd %xmm15,%xmm14,%xmm14 + vpslld $5,%xmm10,%xmm8 + vpandn %xmm13,%xmm11,%xmm7 + + vpand %xmm12,%xmm11,%xmm6 + + vmovdqa %xmm0,240-128(%rax) + vpaddd %xmm0,%xmm14,%xmm14 + vpxor 128-128(%rax),%xmm1,%xmm1 + vpsrld $27,%xmm10,%xmm9 + vpxor %xmm7,%xmm6,%xmm6 + vpxor %xmm3,%xmm1,%xmm1 + + + vpslld $30,%xmm11,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vpaddd %xmm6,%xmm14,%xmm14 + + vpsrld $31,%xmm1,%xmm5 + vpaddd %xmm1,%xmm1,%xmm1 + + vpsrld $2,%xmm11,%xmm11 + + vpaddd %xmm8,%xmm14,%xmm14 + vpor %xmm5,%xmm1,%xmm1 + vpor %xmm7,%xmm11,%xmm11 + vpxor %xmm4,%xmm2,%xmm2 + vmovdqa 48-128(%rax),%xmm4 + + vpaddd %xmm15,%xmm13,%xmm13 + vpslld $5,%xmm14,%xmm8 + vpandn %xmm12,%xmm10,%xmm7 + + vpand %xmm11,%xmm10,%xmm6 + + vmovdqa %xmm1,0-128(%rax) + vpaddd %xmm1,%xmm13,%xmm13 + vpxor 144-128(%rax),%xmm2,%xmm2 + vpsrld $27,%xmm14,%xmm9 + vpxor %xmm7,%xmm6,%xmm6 + vpxor %xmm4,%xmm2,%xmm2 + + + vpslld $30,%xmm10,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vpaddd %xmm6,%xmm13,%xmm13 + + vpsrld $31,%xmm2,%xmm5 + vpaddd %xmm2,%xmm2,%xmm2 + + vpsrld $2,%xmm10,%xmm10 + + vpaddd %xmm8,%xmm13,%xmm13 + vpor %xmm5,%xmm2,%xmm2 + vpor %xmm7,%xmm10,%xmm10 + vpxor %xmm0,%xmm3,%xmm3 + vmovdqa 64-128(%rax),%xmm0 + + vpaddd %xmm15,%xmm12,%xmm12 + vpslld $5,%xmm13,%xmm8 + vpandn %xmm11,%xmm14,%xmm7 + + vpand %xmm10,%xmm14,%xmm6 + + vmovdqa %xmm2,16-128(%rax) + vpaddd %xmm2,%xmm12,%xmm12 + vpxor 160-128(%rax),%xmm3,%xmm3 + vpsrld $27,%xmm13,%xmm9 + vpxor %xmm7,%xmm6,%xmm6 + vpxor %xmm0,%xmm3,%xmm3 + + + vpslld $30,%xmm14,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vpaddd %xmm6,%xmm12,%xmm12 + + vpsrld $31,%xmm3,%xmm5 + vpaddd %xmm3,%xmm3,%xmm3 + + vpsrld $2,%xmm14,%xmm14 + + vpaddd %xmm8,%xmm12,%xmm12 + vpor %xmm5,%xmm3,%xmm3 + vpor %xmm7,%xmm14,%xmm14 + vpxor %xmm1,%xmm4,%xmm4 + vmovdqa 80-128(%rax),%xmm1 + + vpaddd %xmm15,%xmm11,%xmm11 + vpslld $5,%xmm12,%xmm8 + vpandn %xmm10,%xmm13,%xmm7 + + vpand %xmm14,%xmm13,%xmm6 + + vmovdqa %xmm3,32-128(%rax) + vpaddd %xmm3,%xmm11,%xmm11 + vpxor 176-128(%rax),%xmm4,%xmm4 + vpsrld $27,%xmm12,%xmm9 + vpxor %xmm7,%xmm6,%xmm6 + vpxor %xmm1,%xmm4,%xmm4 + + + vpslld $30,%xmm13,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vpaddd %xmm6,%xmm11,%xmm11 + + vpsrld $31,%xmm4,%xmm5 + vpaddd %xmm4,%xmm4,%xmm4 + + vpsrld $2,%xmm13,%xmm13 + + vpaddd %xmm8,%xmm11,%xmm11 + vpor %xmm5,%xmm4,%xmm4 + vpor %xmm7,%xmm13,%xmm13 + vpxor %xmm2,%xmm0,%xmm0 + vmovdqa 96-128(%rax),%xmm2 + + vpaddd %xmm15,%xmm10,%xmm10 + vpslld $5,%xmm11,%xmm8 + vpandn %xmm14,%xmm12,%xmm7 + + vpand %xmm13,%xmm12,%xmm6 + + vmovdqa %xmm4,48-128(%rax) + vpaddd %xmm4,%xmm10,%xmm10 + vpxor 192-128(%rax),%xmm0,%xmm0 + vpsrld $27,%xmm11,%xmm9 + vpxor %xmm7,%xmm6,%xmm6 + vpxor %xmm2,%xmm0,%xmm0 + + + vpslld $30,%xmm12,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vpaddd %xmm6,%xmm10,%xmm10 + + vpsrld $31,%xmm0,%xmm5 + vpaddd %xmm0,%xmm0,%xmm0 + + vpsrld $2,%xmm12,%xmm12 + + vpaddd %xmm8,%xmm10,%xmm10 + vpor %xmm5,%xmm0,%xmm0 + vpor %xmm7,%xmm12,%xmm12 + vmovdqa 0(%rbp),%xmm15 + vpxor %xmm3,%xmm1,%xmm1 + vmovdqa 112-128(%rax),%xmm3 + + vpslld $5,%xmm10,%xmm8 + vpaddd %xmm15,%xmm14,%xmm14 + vpxor %xmm11,%xmm13,%xmm6 + vmovdqa %xmm0,64-128(%rax) + vpaddd %xmm0,%xmm14,%xmm14 + vpxor 208-128(%rax),%xmm1,%xmm1 + vpsrld $27,%xmm10,%xmm9 + vpxor %xmm12,%xmm6,%xmm6 + vpxor %xmm3,%xmm1,%xmm1 + + vpslld $30,%xmm11,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vpaddd %xmm6,%xmm14,%xmm14 + vpsrld $31,%xmm1,%xmm5 + vpaddd %xmm1,%xmm1,%xmm1 + + vpsrld $2,%xmm11,%xmm11 + vpaddd %xmm8,%xmm14,%xmm14 + vpor %xmm5,%xmm1,%xmm1 + vpor %xmm7,%xmm11,%xmm11 + vpxor %xmm4,%xmm2,%xmm2 + vmovdqa 128-128(%rax),%xmm4 + + vpslld $5,%xmm14,%xmm8 + vpaddd %xmm15,%xmm13,%xmm13 + vpxor %xmm10,%xmm12,%xmm6 + vmovdqa %xmm1,80-128(%rax) + vpaddd %xmm1,%xmm13,%xmm13 + vpxor 224-128(%rax),%xmm2,%xmm2 + vpsrld $27,%xmm14,%xmm9 + vpxor %xmm11,%xmm6,%xmm6 + vpxor %xmm4,%xmm2,%xmm2 + + vpslld $30,%xmm10,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vpaddd %xmm6,%xmm13,%xmm13 + vpsrld $31,%xmm2,%xmm5 + vpaddd %xmm2,%xmm2,%xmm2 + + vpsrld $2,%xmm10,%xmm10 + vpaddd %xmm8,%xmm13,%xmm13 + vpor %xmm5,%xmm2,%xmm2 + vpor %xmm7,%xmm10,%xmm10 + vpxor %xmm0,%xmm3,%xmm3 + vmovdqa 144-128(%rax),%xmm0 + + vpslld $5,%xmm13,%xmm8 + vpaddd %xmm15,%xmm12,%xmm12 + vpxor %xmm14,%xmm11,%xmm6 + vmovdqa %xmm2,96-128(%rax) + vpaddd %xmm2,%xmm12,%xmm12 + vpxor 240-128(%rax),%xmm3,%xmm3 + vpsrld $27,%xmm13,%xmm9 + vpxor %xmm10,%xmm6,%xmm6 + vpxor %xmm0,%xmm3,%xmm3 + + vpslld $30,%xmm14,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vpaddd %xmm6,%xmm12,%xmm12 + vpsrld $31,%xmm3,%xmm5 + vpaddd %xmm3,%xmm3,%xmm3 + + vpsrld $2,%xmm14,%xmm14 + vpaddd %xmm8,%xmm12,%xmm12 + vpor %xmm5,%xmm3,%xmm3 + vpor %xmm7,%xmm14,%xmm14 + vpxor %xmm1,%xmm4,%xmm4 + vmovdqa 160-128(%rax),%xmm1 + + vpslld $5,%xmm12,%xmm8 + vpaddd %xmm15,%xmm11,%xmm11 + vpxor %xmm13,%xmm10,%xmm6 + vmovdqa %xmm3,112-128(%rax) + vpaddd %xmm3,%xmm11,%xmm11 + vpxor 0-128(%rax),%xmm4,%xmm4 + vpsrld $27,%xmm12,%xmm9 + vpxor %xmm14,%xmm6,%xmm6 + vpxor %xmm1,%xmm4,%xmm4 + + vpslld $30,%xmm13,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vpaddd %xmm6,%xmm11,%xmm11 + vpsrld $31,%xmm4,%xmm5 + vpaddd %xmm4,%xmm4,%xmm4 + + vpsrld $2,%xmm13,%xmm13 + vpaddd %xmm8,%xmm11,%xmm11 + vpor %xmm5,%xmm4,%xmm4 + vpor %xmm7,%xmm13,%xmm13 + vpxor %xmm2,%xmm0,%xmm0 + vmovdqa 176-128(%rax),%xmm2 + + vpslld $5,%xmm11,%xmm8 + vpaddd %xmm15,%xmm10,%xmm10 + vpxor %xmm12,%xmm14,%xmm6 + vmovdqa %xmm4,128-128(%rax) + vpaddd %xmm4,%xmm10,%xmm10 + vpxor 16-128(%rax),%xmm0,%xmm0 + vpsrld $27,%xmm11,%xmm9 + vpxor %xmm13,%xmm6,%xmm6 + vpxor %xmm2,%xmm0,%xmm0 + + vpslld $30,%xmm12,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vpaddd %xmm6,%xmm10,%xmm10 + vpsrld $31,%xmm0,%xmm5 + vpaddd %xmm0,%xmm0,%xmm0 + + vpsrld $2,%xmm12,%xmm12 + vpaddd %xmm8,%xmm10,%xmm10 + vpor %xmm5,%xmm0,%xmm0 + vpor %xmm7,%xmm12,%xmm12 + vpxor %xmm3,%xmm1,%xmm1 + vmovdqa 192-128(%rax),%xmm3 + + vpslld $5,%xmm10,%xmm8 + vpaddd %xmm15,%xmm14,%xmm14 + vpxor %xmm11,%xmm13,%xmm6 + vmovdqa %xmm0,144-128(%rax) + vpaddd %xmm0,%xmm14,%xmm14 + vpxor 32-128(%rax),%xmm1,%xmm1 + vpsrld $27,%xmm10,%xmm9 + vpxor %xmm12,%xmm6,%xmm6 + vpxor %xmm3,%xmm1,%xmm1 + + vpslld $30,%xmm11,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vpaddd %xmm6,%xmm14,%xmm14 + vpsrld $31,%xmm1,%xmm5 + vpaddd %xmm1,%xmm1,%xmm1 + + vpsrld $2,%xmm11,%xmm11 + vpaddd %xmm8,%xmm14,%xmm14 + vpor %xmm5,%xmm1,%xmm1 + vpor %xmm7,%xmm11,%xmm11 + vpxor %xmm4,%xmm2,%xmm2 + vmovdqa 208-128(%rax),%xmm4 + + vpslld $5,%xmm14,%xmm8 + vpaddd %xmm15,%xmm13,%xmm13 + vpxor %xmm10,%xmm12,%xmm6 + vmovdqa %xmm1,160-128(%rax) + vpaddd %xmm1,%xmm13,%xmm13 + vpxor 48-128(%rax),%xmm2,%xmm2 + vpsrld $27,%xmm14,%xmm9 + vpxor %xmm11,%xmm6,%xmm6 + vpxor %xmm4,%xmm2,%xmm2 + + vpslld $30,%xmm10,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vpaddd %xmm6,%xmm13,%xmm13 + vpsrld $31,%xmm2,%xmm5 + vpaddd %xmm2,%xmm2,%xmm2 + + vpsrld $2,%xmm10,%xmm10 + vpaddd %xmm8,%xmm13,%xmm13 + vpor %xmm5,%xmm2,%xmm2 + vpor %xmm7,%xmm10,%xmm10 + vpxor %xmm0,%xmm3,%xmm3 + vmovdqa 224-128(%rax),%xmm0 + + vpslld $5,%xmm13,%xmm8 + vpaddd %xmm15,%xmm12,%xmm12 + vpxor %xmm14,%xmm11,%xmm6 + vmovdqa %xmm2,176-128(%rax) + vpaddd %xmm2,%xmm12,%xmm12 + vpxor 64-128(%rax),%xmm3,%xmm3 + vpsrld $27,%xmm13,%xmm9 + vpxor %xmm10,%xmm6,%xmm6 + vpxor %xmm0,%xmm3,%xmm3 + + vpslld $30,%xmm14,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vpaddd %xmm6,%xmm12,%xmm12 + vpsrld $31,%xmm3,%xmm5 + vpaddd %xmm3,%xmm3,%xmm3 + + vpsrld $2,%xmm14,%xmm14 + vpaddd %xmm8,%xmm12,%xmm12 + vpor %xmm5,%xmm3,%xmm3 + vpor %xmm7,%xmm14,%xmm14 + vpxor %xmm1,%xmm4,%xmm4 + vmovdqa 240-128(%rax),%xmm1 + + vpslld $5,%xmm12,%xmm8 + vpaddd %xmm15,%xmm11,%xmm11 + vpxor %xmm13,%xmm10,%xmm6 + vmovdqa %xmm3,192-128(%rax) + vpaddd %xmm3,%xmm11,%xmm11 + vpxor 80-128(%rax),%xmm4,%xmm4 + vpsrld $27,%xmm12,%xmm9 + vpxor %xmm14,%xmm6,%xmm6 + vpxor %xmm1,%xmm4,%xmm4 + + vpslld $30,%xmm13,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vpaddd %xmm6,%xmm11,%xmm11 + vpsrld $31,%xmm4,%xmm5 + vpaddd %xmm4,%xmm4,%xmm4 + + vpsrld $2,%xmm13,%xmm13 + vpaddd %xmm8,%xmm11,%xmm11 + vpor %xmm5,%xmm4,%xmm4 + vpor %xmm7,%xmm13,%xmm13 + vpxor %xmm2,%xmm0,%xmm0 + vmovdqa 0-128(%rax),%xmm2 + + vpslld $5,%xmm11,%xmm8 + vpaddd %xmm15,%xmm10,%xmm10 + vpxor %xmm12,%xmm14,%xmm6 + vmovdqa %xmm4,208-128(%rax) + vpaddd %xmm4,%xmm10,%xmm10 + vpxor 96-128(%rax),%xmm0,%xmm0 + vpsrld $27,%xmm11,%xmm9 + vpxor %xmm13,%xmm6,%xmm6 + vpxor %xmm2,%xmm0,%xmm0 + + vpslld $30,%xmm12,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vpaddd %xmm6,%xmm10,%xmm10 + vpsrld $31,%xmm0,%xmm5 + vpaddd %xmm0,%xmm0,%xmm0 + + vpsrld $2,%xmm12,%xmm12 + vpaddd %xmm8,%xmm10,%xmm10 + vpor %xmm5,%xmm0,%xmm0 + vpor %xmm7,%xmm12,%xmm12 + vpxor %xmm3,%xmm1,%xmm1 + vmovdqa 16-128(%rax),%xmm3 + + vpslld $5,%xmm10,%xmm8 + vpaddd %xmm15,%xmm14,%xmm14 + vpxor %xmm11,%xmm13,%xmm6 + vmovdqa %xmm0,224-128(%rax) + vpaddd %xmm0,%xmm14,%xmm14 + vpxor 112-128(%rax),%xmm1,%xmm1 + vpsrld $27,%xmm10,%xmm9 + vpxor %xmm12,%xmm6,%xmm6 + vpxor %xmm3,%xmm1,%xmm1 + + vpslld $30,%xmm11,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vpaddd %xmm6,%xmm14,%xmm14 + vpsrld $31,%xmm1,%xmm5 + vpaddd %xmm1,%xmm1,%xmm1 + + vpsrld $2,%xmm11,%xmm11 + vpaddd %xmm8,%xmm14,%xmm14 + vpor %xmm5,%xmm1,%xmm1 + vpor %xmm7,%xmm11,%xmm11 + vpxor %xmm4,%xmm2,%xmm2 + vmovdqa 32-128(%rax),%xmm4 + + vpslld $5,%xmm14,%xmm8 + vpaddd %xmm15,%xmm13,%xmm13 + vpxor %xmm10,%xmm12,%xmm6 + vmovdqa %xmm1,240-128(%rax) + vpaddd %xmm1,%xmm13,%xmm13 + vpxor 128-128(%rax),%xmm2,%xmm2 + vpsrld $27,%xmm14,%xmm9 + vpxor %xmm11,%xmm6,%xmm6 + vpxor %xmm4,%xmm2,%xmm2 + + vpslld $30,%xmm10,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vpaddd %xmm6,%xmm13,%xmm13 + vpsrld $31,%xmm2,%xmm5 + vpaddd %xmm2,%xmm2,%xmm2 + + vpsrld $2,%xmm10,%xmm10 + vpaddd %xmm8,%xmm13,%xmm13 + vpor %xmm5,%xmm2,%xmm2 + vpor %xmm7,%xmm10,%xmm10 + vpxor %xmm0,%xmm3,%xmm3 + vmovdqa 48-128(%rax),%xmm0 + + vpslld $5,%xmm13,%xmm8 + vpaddd %xmm15,%xmm12,%xmm12 + vpxor %xmm14,%xmm11,%xmm6 + vmovdqa %xmm2,0-128(%rax) + vpaddd %xmm2,%xmm12,%xmm12 + vpxor 144-128(%rax),%xmm3,%xmm3 + vpsrld $27,%xmm13,%xmm9 + vpxor %xmm10,%xmm6,%xmm6 + vpxor %xmm0,%xmm3,%xmm3 + + vpslld $30,%xmm14,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vpaddd %xmm6,%xmm12,%xmm12 + vpsrld $31,%xmm3,%xmm5 + vpaddd %xmm3,%xmm3,%xmm3 + + vpsrld $2,%xmm14,%xmm14 + vpaddd %xmm8,%xmm12,%xmm12 + vpor %xmm5,%xmm3,%xmm3 + vpor %xmm7,%xmm14,%xmm14 + vpxor %xmm1,%xmm4,%xmm4 + vmovdqa 64-128(%rax),%xmm1 + + vpslld $5,%xmm12,%xmm8 + vpaddd %xmm15,%xmm11,%xmm11 + vpxor %xmm13,%xmm10,%xmm6 + vmovdqa %xmm3,16-128(%rax) + vpaddd %xmm3,%xmm11,%xmm11 + vpxor 160-128(%rax),%xmm4,%xmm4 + vpsrld $27,%xmm12,%xmm9 + vpxor %xmm14,%xmm6,%xmm6 + vpxor %xmm1,%xmm4,%xmm4 + + vpslld $30,%xmm13,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vpaddd %xmm6,%xmm11,%xmm11 + vpsrld $31,%xmm4,%xmm5 + vpaddd %xmm4,%xmm4,%xmm4 + + vpsrld $2,%xmm13,%xmm13 + vpaddd %xmm8,%xmm11,%xmm11 + vpor %xmm5,%xmm4,%xmm4 + vpor %xmm7,%xmm13,%xmm13 + vpxor %xmm2,%xmm0,%xmm0 + vmovdqa 80-128(%rax),%xmm2 + + vpslld $5,%xmm11,%xmm8 + vpaddd %xmm15,%xmm10,%xmm10 + vpxor %xmm12,%xmm14,%xmm6 + vmovdqa %xmm4,32-128(%rax) + vpaddd %xmm4,%xmm10,%xmm10 + vpxor 176-128(%rax),%xmm0,%xmm0 + vpsrld $27,%xmm11,%xmm9 + vpxor %xmm13,%xmm6,%xmm6 + vpxor %xmm2,%xmm0,%xmm0 + + vpslld $30,%xmm12,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vpaddd %xmm6,%xmm10,%xmm10 + vpsrld $31,%xmm0,%xmm5 + vpaddd %xmm0,%xmm0,%xmm0 + + vpsrld $2,%xmm12,%xmm12 + vpaddd %xmm8,%xmm10,%xmm10 + vpor %xmm5,%xmm0,%xmm0 + vpor %xmm7,%xmm12,%xmm12 + vpxor %xmm3,%xmm1,%xmm1 + vmovdqa 96-128(%rax),%xmm3 + + vpslld $5,%xmm10,%xmm8 + vpaddd %xmm15,%xmm14,%xmm14 + vpxor %xmm11,%xmm13,%xmm6 + vmovdqa %xmm0,48-128(%rax) + vpaddd %xmm0,%xmm14,%xmm14 + vpxor 192-128(%rax),%xmm1,%xmm1 + vpsrld $27,%xmm10,%xmm9 + vpxor %xmm12,%xmm6,%xmm6 + vpxor %xmm3,%xmm1,%xmm1 + + vpslld $30,%xmm11,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vpaddd %xmm6,%xmm14,%xmm14 + vpsrld $31,%xmm1,%xmm5 + vpaddd %xmm1,%xmm1,%xmm1 + + vpsrld $2,%xmm11,%xmm11 + vpaddd %xmm8,%xmm14,%xmm14 + vpor %xmm5,%xmm1,%xmm1 + vpor %xmm7,%xmm11,%xmm11 + vpxor %xmm4,%xmm2,%xmm2 + vmovdqa 112-128(%rax),%xmm4 + + vpslld $5,%xmm14,%xmm8 + vpaddd %xmm15,%xmm13,%xmm13 + vpxor %xmm10,%xmm12,%xmm6 + vmovdqa %xmm1,64-128(%rax) + vpaddd %xmm1,%xmm13,%xmm13 + vpxor 208-128(%rax),%xmm2,%xmm2 + vpsrld $27,%xmm14,%xmm9 + vpxor %xmm11,%xmm6,%xmm6 + vpxor %xmm4,%xmm2,%xmm2 + + vpslld $30,%xmm10,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vpaddd %xmm6,%xmm13,%xmm13 + vpsrld $31,%xmm2,%xmm5 + vpaddd %xmm2,%xmm2,%xmm2 + + vpsrld $2,%xmm10,%xmm10 + vpaddd %xmm8,%xmm13,%xmm13 + vpor %xmm5,%xmm2,%xmm2 + vpor %xmm7,%xmm10,%xmm10 + vpxor %xmm0,%xmm3,%xmm3 + vmovdqa 128-128(%rax),%xmm0 + + vpslld $5,%xmm13,%xmm8 + vpaddd %xmm15,%xmm12,%xmm12 + vpxor %xmm14,%xmm11,%xmm6 + vmovdqa %xmm2,80-128(%rax) + vpaddd %xmm2,%xmm12,%xmm12 + vpxor 224-128(%rax),%xmm3,%xmm3 + vpsrld $27,%xmm13,%xmm9 + vpxor %xmm10,%xmm6,%xmm6 + vpxor %xmm0,%xmm3,%xmm3 + + vpslld $30,%xmm14,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vpaddd %xmm6,%xmm12,%xmm12 + vpsrld $31,%xmm3,%xmm5 + vpaddd %xmm3,%xmm3,%xmm3 + + vpsrld $2,%xmm14,%xmm14 + vpaddd %xmm8,%xmm12,%xmm12 + vpor %xmm5,%xmm3,%xmm3 + vpor %xmm7,%xmm14,%xmm14 + vpxor %xmm1,%xmm4,%xmm4 + vmovdqa 144-128(%rax),%xmm1 + + vpslld $5,%xmm12,%xmm8 + vpaddd %xmm15,%xmm11,%xmm11 + vpxor %xmm13,%xmm10,%xmm6 + vmovdqa %xmm3,96-128(%rax) + vpaddd %xmm3,%xmm11,%xmm11 + vpxor 240-128(%rax),%xmm4,%xmm4 + vpsrld $27,%xmm12,%xmm9 + vpxor %xmm14,%xmm6,%xmm6 + vpxor %xmm1,%xmm4,%xmm4 + + vpslld $30,%xmm13,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vpaddd %xmm6,%xmm11,%xmm11 + vpsrld $31,%xmm4,%xmm5 + vpaddd %xmm4,%xmm4,%xmm4 + + vpsrld $2,%xmm13,%xmm13 + vpaddd %xmm8,%xmm11,%xmm11 + vpor %xmm5,%xmm4,%xmm4 + vpor %xmm7,%xmm13,%xmm13 + vpxor %xmm2,%xmm0,%xmm0 + vmovdqa 160-128(%rax),%xmm2 + + vpslld $5,%xmm11,%xmm8 + vpaddd %xmm15,%xmm10,%xmm10 + vpxor %xmm12,%xmm14,%xmm6 + vmovdqa %xmm4,112-128(%rax) + vpaddd %xmm4,%xmm10,%xmm10 + vpxor 0-128(%rax),%xmm0,%xmm0 + vpsrld $27,%xmm11,%xmm9 + vpxor %xmm13,%xmm6,%xmm6 + vpxor %xmm2,%xmm0,%xmm0 + + vpslld $30,%xmm12,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vpaddd %xmm6,%xmm10,%xmm10 + vpsrld $31,%xmm0,%xmm5 + vpaddd %xmm0,%xmm0,%xmm0 + + vpsrld $2,%xmm12,%xmm12 + vpaddd %xmm8,%xmm10,%xmm10 + vpor %xmm5,%xmm0,%xmm0 + vpor %xmm7,%xmm12,%xmm12 + vmovdqa 32(%rbp),%xmm15 + vpxor %xmm3,%xmm1,%xmm1 + vmovdqa 176-128(%rax),%xmm3 + + vpaddd %xmm15,%xmm14,%xmm14 + vpslld $5,%xmm10,%xmm8 + vpand %xmm12,%xmm13,%xmm7 + vpxor 16-128(%rax),%xmm1,%xmm1 + + vpaddd %xmm7,%xmm14,%xmm14 + vpsrld $27,%xmm10,%xmm9 + vpxor %xmm12,%xmm13,%xmm6 + vpxor %xmm3,%xmm1,%xmm1 + + vmovdqu %xmm0,128-128(%rax) + vpaddd %xmm0,%xmm14,%xmm14 + vpor %xmm9,%xmm8,%xmm8 + vpsrld $31,%xmm1,%xmm5 + vpand %xmm11,%xmm6,%xmm6 + vpaddd %xmm1,%xmm1,%xmm1 + + vpslld $30,%xmm11,%xmm7 + vpaddd %xmm6,%xmm14,%xmm14 + + vpsrld $2,%xmm11,%xmm11 + vpaddd %xmm8,%xmm14,%xmm14 + vpor %xmm5,%xmm1,%xmm1 + vpor %xmm7,%xmm11,%xmm11 + vpxor %xmm4,%xmm2,%xmm2 + vmovdqa 192-128(%rax),%xmm4 + + vpaddd %xmm15,%xmm13,%xmm13 + vpslld $5,%xmm14,%xmm8 + vpand %xmm11,%xmm12,%xmm7 + vpxor 32-128(%rax),%xmm2,%xmm2 + + vpaddd %xmm7,%xmm13,%xmm13 + vpsrld $27,%xmm14,%xmm9 + vpxor %xmm11,%xmm12,%xmm6 + vpxor %xmm4,%xmm2,%xmm2 + + vmovdqu %xmm1,144-128(%rax) + vpaddd %xmm1,%xmm13,%xmm13 + vpor %xmm9,%xmm8,%xmm8 + vpsrld $31,%xmm2,%xmm5 + vpand %xmm10,%xmm6,%xmm6 + vpaddd %xmm2,%xmm2,%xmm2 + + vpslld $30,%xmm10,%xmm7 + vpaddd %xmm6,%xmm13,%xmm13 + + vpsrld $2,%xmm10,%xmm10 + vpaddd %xmm8,%xmm13,%xmm13 + vpor %xmm5,%xmm2,%xmm2 + vpor %xmm7,%xmm10,%xmm10 + vpxor %xmm0,%xmm3,%xmm3 + vmovdqa 208-128(%rax),%xmm0 + + vpaddd %xmm15,%xmm12,%xmm12 + vpslld $5,%xmm13,%xmm8 + vpand %xmm10,%xmm11,%xmm7 + vpxor 48-128(%rax),%xmm3,%xmm3 + + vpaddd %xmm7,%xmm12,%xmm12 + vpsrld $27,%xmm13,%xmm9 + vpxor %xmm10,%xmm11,%xmm6 + vpxor %xmm0,%xmm3,%xmm3 + + vmovdqu %xmm2,160-128(%rax) + vpaddd %xmm2,%xmm12,%xmm12 + vpor %xmm9,%xmm8,%xmm8 + vpsrld $31,%xmm3,%xmm5 + vpand %xmm14,%xmm6,%xmm6 + vpaddd %xmm3,%xmm3,%xmm3 + + vpslld $30,%xmm14,%xmm7 + vpaddd %xmm6,%xmm12,%xmm12 + + vpsrld $2,%xmm14,%xmm14 + vpaddd %xmm8,%xmm12,%xmm12 + vpor %xmm5,%xmm3,%xmm3 + vpor %xmm7,%xmm14,%xmm14 + vpxor %xmm1,%xmm4,%xmm4 + vmovdqa 224-128(%rax),%xmm1 + + vpaddd %xmm15,%xmm11,%xmm11 + vpslld $5,%xmm12,%xmm8 + vpand %xmm14,%xmm10,%xmm7 + vpxor 64-128(%rax),%xmm4,%xmm4 + + vpaddd %xmm7,%xmm11,%xmm11 + vpsrld $27,%xmm12,%xmm9 + vpxor %xmm14,%xmm10,%xmm6 + vpxor %xmm1,%xmm4,%xmm4 + + vmovdqu %xmm3,176-128(%rax) + vpaddd %xmm3,%xmm11,%xmm11 + vpor %xmm9,%xmm8,%xmm8 + vpsrld $31,%xmm4,%xmm5 + vpand %xmm13,%xmm6,%xmm6 + vpaddd %xmm4,%xmm4,%xmm4 + + vpslld $30,%xmm13,%xmm7 + vpaddd %xmm6,%xmm11,%xmm11 + + vpsrld $2,%xmm13,%xmm13 + vpaddd %xmm8,%xmm11,%xmm11 + vpor %xmm5,%xmm4,%xmm4 + vpor %xmm7,%xmm13,%xmm13 + vpxor %xmm2,%xmm0,%xmm0 + vmovdqa 240-128(%rax),%xmm2 + + vpaddd %xmm15,%xmm10,%xmm10 + vpslld $5,%xmm11,%xmm8 + vpand %xmm13,%xmm14,%xmm7 + vpxor 80-128(%rax),%xmm0,%xmm0 + + vpaddd %xmm7,%xmm10,%xmm10 + vpsrld $27,%xmm11,%xmm9 + vpxor %xmm13,%xmm14,%xmm6 + vpxor %xmm2,%xmm0,%xmm0 + + vmovdqu %xmm4,192-128(%rax) + vpaddd %xmm4,%xmm10,%xmm10 + vpor %xmm9,%xmm8,%xmm8 + vpsrld $31,%xmm0,%xmm5 + vpand %xmm12,%xmm6,%xmm6 + vpaddd %xmm0,%xmm0,%xmm0 + + vpslld $30,%xmm12,%xmm7 + vpaddd %xmm6,%xmm10,%xmm10 + + vpsrld $2,%xmm12,%xmm12 + vpaddd %xmm8,%xmm10,%xmm10 + vpor %xmm5,%xmm0,%xmm0 + vpor %xmm7,%xmm12,%xmm12 + vpxor %xmm3,%xmm1,%xmm1 + vmovdqa 0-128(%rax),%xmm3 + + vpaddd %xmm15,%xmm14,%xmm14 + vpslld $5,%xmm10,%xmm8 + vpand %xmm12,%xmm13,%xmm7 + vpxor 96-128(%rax),%xmm1,%xmm1 + + vpaddd %xmm7,%xmm14,%xmm14 + vpsrld $27,%xmm10,%xmm9 + vpxor %xmm12,%xmm13,%xmm6 + vpxor %xmm3,%xmm1,%xmm1 + + vmovdqu %xmm0,208-128(%rax) + vpaddd %xmm0,%xmm14,%xmm14 + vpor %xmm9,%xmm8,%xmm8 + vpsrld $31,%xmm1,%xmm5 + vpand %xmm11,%xmm6,%xmm6 + vpaddd %xmm1,%xmm1,%xmm1 + + vpslld $30,%xmm11,%xmm7 + vpaddd %xmm6,%xmm14,%xmm14 + + vpsrld $2,%xmm11,%xmm11 + vpaddd %xmm8,%xmm14,%xmm14 + vpor %xmm5,%xmm1,%xmm1 + vpor %xmm7,%xmm11,%xmm11 + vpxor %xmm4,%xmm2,%xmm2 + vmovdqa 16-128(%rax),%xmm4 + + vpaddd %xmm15,%xmm13,%xmm13 + vpslld $5,%xmm14,%xmm8 + vpand %xmm11,%xmm12,%xmm7 + vpxor 112-128(%rax),%xmm2,%xmm2 + + vpaddd %xmm7,%xmm13,%xmm13 + vpsrld $27,%xmm14,%xmm9 + vpxor %xmm11,%xmm12,%xmm6 + vpxor %xmm4,%xmm2,%xmm2 + + vmovdqu %xmm1,224-128(%rax) + vpaddd %xmm1,%xmm13,%xmm13 + vpor %xmm9,%xmm8,%xmm8 + vpsrld $31,%xmm2,%xmm5 + vpand %xmm10,%xmm6,%xmm6 + vpaddd %xmm2,%xmm2,%xmm2 + + vpslld $30,%xmm10,%xmm7 + vpaddd %xmm6,%xmm13,%xmm13 + + vpsrld $2,%xmm10,%xmm10 + vpaddd %xmm8,%xmm13,%xmm13 + vpor %xmm5,%xmm2,%xmm2 + vpor %xmm7,%xmm10,%xmm10 + vpxor %xmm0,%xmm3,%xmm3 + vmovdqa 32-128(%rax),%xmm0 + + vpaddd %xmm15,%xmm12,%xmm12 + vpslld $5,%xmm13,%xmm8 + vpand %xmm10,%xmm11,%xmm7 + vpxor 128-128(%rax),%xmm3,%xmm3 + + vpaddd %xmm7,%xmm12,%xmm12 + vpsrld $27,%xmm13,%xmm9 + vpxor %xmm10,%xmm11,%xmm6 + vpxor %xmm0,%xmm3,%xmm3 + + vmovdqu %xmm2,240-128(%rax) + vpaddd %xmm2,%xmm12,%xmm12 + vpor %xmm9,%xmm8,%xmm8 + vpsrld $31,%xmm3,%xmm5 + vpand %xmm14,%xmm6,%xmm6 + vpaddd %xmm3,%xmm3,%xmm3 + + vpslld $30,%xmm14,%xmm7 + vpaddd %xmm6,%xmm12,%xmm12 + + vpsrld $2,%xmm14,%xmm14 + vpaddd %xmm8,%xmm12,%xmm12 + vpor %xmm5,%xmm3,%xmm3 + vpor %xmm7,%xmm14,%xmm14 + vpxor %xmm1,%xmm4,%xmm4 + vmovdqa 48-128(%rax),%xmm1 + + vpaddd %xmm15,%xmm11,%xmm11 + vpslld $5,%xmm12,%xmm8 + vpand %xmm14,%xmm10,%xmm7 + vpxor 144-128(%rax),%xmm4,%xmm4 + + vpaddd %xmm7,%xmm11,%xmm11 + vpsrld $27,%xmm12,%xmm9 + vpxor %xmm14,%xmm10,%xmm6 + vpxor %xmm1,%xmm4,%xmm4 + + vmovdqu %xmm3,0-128(%rax) + vpaddd %xmm3,%xmm11,%xmm11 + vpor %xmm9,%xmm8,%xmm8 + vpsrld $31,%xmm4,%xmm5 + vpand %xmm13,%xmm6,%xmm6 + vpaddd %xmm4,%xmm4,%xmm4 + + vpslld $30,%xmm13,%xmm7 + vpaddd %xmm6,%xmm11,%xmm11 + + vpsrld $2,%xmm13,%xmm13 + vpaddd %xmm8,%xmm11,%xmm11 + vpor %xmm5,%xmm4,%xmm4 + vpor %xmm7,%xmm13,%xmm13 + vpxor %xmm2,%xmm0,%xmm0 + vmovdqa 64-128(%rax),%xmm2 + + vpaddd %xmm15,%xmm10,%xmm10 + vpslld $5,%xmm11,%xmm8 + vpand %xmm13,%xmm14,%xmm7 + vpxor 160-128(%rax),%xmm0,%xmm0 + + vpaddd %xmm7,%xmm10,%xmm10 + vpsrld $27,%xmm11,%xmm9 + vpxor %xmm13,%xmm14,%xmm6 + vpxor %xmm2,%xmm0,%xmm0 + + vmovdqu %xmm4,16-128(%rax) + vpaddd %xmm4,%xmm10,%xmm10 + vpor %xmm9,%xmm8,%xmm8 + vpsrld $31,%xmm0,%xmm5 + vpand %xmm12,%xmm6,%xmm6 + vpaddd %xmm0,%xmm0,%xmm0 + + vpslld $30,%xmm12,%xmm7 + vpaddd %xmm6,%xmm10,%xmm10 + + vpsrld $2,%xmm12,%xmm12 + vpaddd %xmm8,%xmm10,%xmm10 + vpor %xmm5,%xmm0,%xmm0 + vpor %xmm7,%xmm12,%xmm12 + vpxor %xmm3,%xmm1,%xmm1 + vmovdqa 80-128(%rax),%xmm3 + + vpaddd %xmm15,%xmm14,%xmm14 + vpslld $5,%xmm10,%xmm8 + vpand %xmm12,%xmm13,%xmm7 + vpxor 176-128(%rax),%xmm1,%xmm1 + + vpaddd %xmm7,%xmm14,%xmm14 + vpsrld $27,%xmm10,%xmm9 + vpxor %xmm12,%xmm13,%xmm6 + vpxor %xmm3,%xmm1,%xmm1 + + vmovdqu %xmm0,32-128(%rax) + vpaddd %xmm0,%xmm14,%xmm14 + vpor %xmm9,%xmm8,%xmm8 + vpsrld $31,%xmm1,%xmm5 + vpand %xmm11,%xmm6,%xmm6 + vpaddd %xmm1,%xmm1,%xmm1 + + vpslld $30,%xmm11,%xmm7 + vpaddd %xmm6,%xmm14,%xmm14 + + vpsrld $2,%xmm11,%xmm11 + vpaddd %xmm8,%xmm14,%xmm14 + vpor %xmm5,%xmm1,%xmm1 + vpor %xmm7,%xmm11,%xmm11 + vpxor %xmm4,%xmm2,%xmm2 + vmovdqa 96-128(%rax),%xmm4 + + vpaddd %xmm15,%xmm13,%xmm13 + vpslld $5,%xmm14,%xmm8 + vpand %xmm11,%xmm12,%xmm7 + vpxor 192-128(%rax),%xmm2,%xmm2 + + vpaddd %xmm7,%xmm13,%xmm13 + vpsrld $27,%xmm14,%xmm9 + vpxor %xmm11,%xmm12,%xmm6 + vpxor %xmm4,%xmm2,%xmm2 + + vmovdqu %xmm1,48-128(%rax) + vpaddd %xmm1,%xmm13,%xmm13 + vpor %xmm9,%xmm8,%xmm8 + vpsrld $31,%xmm2,%xmm5 + vpand %xmm10,%xmm6,%xmm6 + vpaddd %xmm2,%xmm2,%xmm2 + + vpslld $30,%xmm10,%xmm7 + vpaddd %xmm6,%xmm13,%xmm13 + + vpsrld $2,%xmm10,%xmm10 + vpaddd %xmm8,%xmm13,%xmm13 + vpor %xmm5,%xmm2,%xmm2 + vpor %xmm7,%xmm10,%xmm10 + vpxor %xmm0,%xmm3,%xmm3 + vmovdqa 112-128(%rax),%xmm0 + + vpaddd %xmm15,%xmm12,%xmm12 + vpslld $5,%xmm13,%xmm8 + vpand %xmm10,%xmm11,%xmm7 + vpxor 208-128(%rax),%xmm3,%xmm3 + + vpaddd %xmm7,%xmm12,%xmm12 + vpsrld $27,%xmm13,%xmm9 + vpxor %xmm10,%xmm11,%xmm6 + vpxor %xmm0,%xmm3,%xmm3 + + vmovdqu %xmm2,64-128(%rax) + vpaddd %xmm2,%xmm12,%xmm12 + vpor %xmm9,%xmm8,%xmm8 + vpsrld $31,%xmm3,%xmm5 + vpand %xmm14,%xmm6,%xmm6 + vpaddd %xmm3,%xmm3,%xmm3 + + vpslld $30,%xmm14,%xmm7 + vpaddd %xmm6,%xmm12,%xmm12 + + vpsrld $2,%xmm14,%xmm14 + vpaddd %xmm8,%xmm12,%xmm12 + vpor %xmm5,%xmm3,%xmm3 + vpor %xmm7,%xmm14,%xmm14 + vpxor %xmm1,%xmm4,%xmm4 + vmovdqa 128-128(%rax),%xmm1 + + vpaddd %xmm15,%xmm11,%xmm11 + vpslld $5,%xmm12,%xmm8 + vpand %xmm14,%xmm10,%xmm7 + vpxor 224-128(%rax),%xmm4,%xmm4 + + vpaddd %xmm7,%xmm11,%xmm11 + vpsrld $27,%xmm12,%xmm9 + vpxor %xmm14,%xmm10,%xmm6 + vpxor %xmm1,%xmm4,%xmm4 + + vmovdqu %xmm3,80-128(%rax) + vpaddd %xmm3,%xmm11,%xmm11 + vpor %xmm9,%xmm8,%xmm8 + vpsrld $31,%xmm4,%xmm5 + vpand %xmm13,%xmm6,%xmm6 + vpaddd %xmm4,%xmm4,%xmm4 + + vpslld $30,%xmm13,%xmm7 + vpaddd %xmm6,%xmm11,%xmm11 + + vpsrld $2,%xmm13,%xmm13 + vpaddd %xmm8,%xmm11,%xmm11 + vpor %xmm5,%xmm4,%xmm4 + vpor %xmm7,%xmm13,%xmm13 + vpxor %xmm2,%xmm0,%xmm0 + vmovdqa 144-128(%rax),%xmm2 + + vpaddd %xmm15,%xmm10,%xmm10 + vpslld $5,%xmm11,%xmm8 + vpand %xmm13,%xmm14,%xmm7 + vpxor 240-128(%rax),%xmm0,%xmm0 + + vpaddd %xmm7,%xmm10,%xmm10 + vpsrld $27,%xmm11,%xmm9 + vpxor %xmm13,%xmm14,%xmm6 + vpxor %xmm2,%xmm0,%xmm0 + + vmovdqu %xmm4,96-128(%rax) + vpaddd %xmm4,%xmm10,%xmm10 + vpor %xmm9,%xmm8,%xmm8 + vpsrld $31,%xmm0,%xmm5 + vpand %xmm12,%xmm6,%xmm6 + vpaddd %xmm0,%xmm0,%xmm0 + + vpslld $30,%xmm12,%xmm7 + vpaddd %xmm6,%xmm10,%xmm10 + + vpsrld $2,%xmm12,%xmm12 + vpaddd %xmm8,%xmm10,%xmm10 + vpor %xmm5,%xmm0,%xmm0 + vpor %xmm7,%xmm12,%xmm12 + vpxor %xmm3,%xmm1,%xmm1 + vmovdqa 160-128(%rax),%xmm3 + + vpaddd %xmm15,%xmm14,%xmm14 + vpslld $5,%xmm10,%xmm8 + vpand %xmm12,%xmm13,%xmm7 + vpxor 0-128(%rax),%xmm1,%xmm1 + + vpaddd %xmm7,%xmm14,%xmm14 + vpsrld $27,%xmm10,%xmm9 + vpxor %xmm12,%xmm13,%xmm6 + vpxor %xmm3,%xmm1,%xmm1 + + vmovdqu %xmm0,112-128(%rax) + vpaddd %xmm0,%xmm14,%xmm14 + vpor %xmm9,%xmm8,%xmm8 + vpsrld $31,%xmm1,%xmm5 + vpand %xmm11,%xmm6,%xmm6 + vpaddd %xmm1,%xmm1,%xmm1 + + vpslld $30,%xmm11,%xmm7 + vpaddd %xmm6,%xmm14,%xmm14 + + vpsrld $2,%xmm11,%xmm11 + vpaddd %xmm8,%xmm14,%xmm14 + vpor %xmm5,%xmm1,%xmm1 + vpor %xmm7,%xmm11,%xmm11 + vpxor %xmm4,%xmm2,%xmm2 + vmovdqa 176-128(%rax),%xmm4 + + vpaddd %xmm15,%xmm13,%xmm13 + vpslld $5,%xmm14,%xmm8 + vpand %xmm11,%xmm12,%xmm7 + vpxor 16-128(%rax),%xmm2,%xmm2 + + vpaddd %xmm7,%xmm13,%xmm13 + vpsrld $27,%xmm14,%xmm9 + vpxor %xmm11,%xmm12,%xmm6 + vpxor %xmm4,%xmm2,%xmm2 + + vmovdqu %xmm1,128-128(%rax) + vpaddd %xmm1,%xmm13,%xmm13 + vpor %xmm9,%xmm8,%xmm8 + vpsrld $31,%xmm2,%xmm5 + vpand %xmm10,%xmm6,%xmm6 + vpaddd %xmm2,%xmm2,%xmm2 + + vpslld $30,%xmm10,%xmm7 + vpaddd %xmm6,%xmm13,%xmm13 + + vpsrld $2,%xmm10,%xmm10 + vpaddd %xmm8,%xmm13,%xmm13 + vpor %xmm5,%xmm2,%xmm2 + vpor %xmm7,%xmm10,%xmm10 + vpxor %xmm0,%xmm3,%xmm3 + vmovdqa 192-128(%rax),%xmm0 + + vpaddd %xmm15,%xmm12,%xmm12 + vpslld $5,%xmm13,%xmm8 + vpand %xmm10,%xmm11,%xmm7 + vpxor 32-128(%rax),%xmm3,%xmm3 + + vpaddd %xmm7,%xmm12,%xmm12 + vpsrld $27,%xmm13,%xmm9 + vpxor %xmm10,%xmm11,%xmm6 + vpxor %xmm0,%xmm3,%xmm3 + + vmovdqu %xmm2,144-128(%rax) + vpaddd %xmm2,%xmm12,%xmm12 + vpor %xmm9,%xmm8,%xmm8 + vpsrld $31,%xmm3,%xmm5 + vpand %xmm14,%xmm6,%xmm6 + vpaddd %xmm3,%xmm3,%xmm3 + + vpslld $30,%xmm14,%xmm7 + vpaddd %xmm6,%xmm12,%xmm12 + + vpsrld $2,%xmm14,%xmm14 + vpaddd %xmm8,%xmm12,%xmm12 + vpor %xmm5,%xmm3,%xmm3 + vpor %xmm7,%xmm14,%xmm14 + vpxor %xmm1,%xmm4,%xmm4 + vmovdqa 208-128(%rax),%xmm1 + + vpaddd %xmm15,%xmm11,%xmm11 + vpslld $5,%xmm12,%xmm8 + vpand %xmm14,%xmm10,%xmm7 + vpxor 48-128(%rax),%xmm4,%xmm4 + + vpaddd %xmm7,%xmm11,%xmm11 + vpsrld $27,%xmm12,%xmm9 + vpxor %xmm14,%xmm10,%xmm6 + vpxor %xmm1,%xmm4,%xmm4 + + vmovdqu %xmm3,160-128(%rax) + vpaddd %xmm3,%xmm11,%xmm11 + vpor %xmm9,%xmm8,%xmm8 + vpsrld $31,%xmm4,%xmm5 + vpand %xmm13,%xmm6,%xmm6 + vpaddd %xmm4,%xmm4,%xmm4 + + vpslld $30,%xmm13,%xmm7 + vpaddd %xmm6,%xmm11,%xmm11 + + vpsrld $2,%xmm13,%xmm13 + vpaddd %xmm8,%xmm11,%xmm11 + vpor %xmm5,%xmm4,%xmm4 + vpor %xmm7,%xmm13,%xmm13 + vpxor %xmm2,%xmm0,%xmm0 + vmovdqa 224-128(%rax),%xmm2 + + vpaddd %xmm15,%xmm10,%xmm10 + vpslld $5,%xmm11,%xmm8 + vpand %xmm13,%xmm14,%xmm7 + vpxor 64-128(%rax),%xmm0,%xmm0 + + vpaddd %xmm7,%xmm10,%xmm10 + vpsrld $27,%xmm11,%xmm9 + vpxor %xmm13,%xmm14,%xmm6 + vpxor %xmm2,%xmm0,%xmm0 + + vmovdqu %xmm4,176-128(%rax) + vpaddd %xmm4,%xmm10,%xmm10 + vpor %xmm9,%xmm8,%xmm8 + vpsrld $31,%xmm0,%xmm5 + vpand %xmm12,%xmm6,%xmm6 + vpaddd %xmm0,%xmm0,%xmm0 + + vpslld $30,%xmm12,%xmm7 + vpaddd %xmm6,%xmm10,%xmm10 + + vpsrld $2,%xmm12,%xmm12 + vpaddd %xmm8,%xmm10,%xmm10 + vpor %xmm5,%xmm0,%xmm0 + vpor %xmm7,%xmm12,%xmm12 + vmovdqa 64(%rbp),%xmm15 + vpxor %xmm3,%xmm1,%xmm1 + vmovdqa 240-128(%rax),%xmm3 + + vpslld $5,%xmm10,%xmm8 + vpaddd %xmm15,%xmm14,%xmm14 + vpxor %xmm11,%xmm13,%xmm6 + vmovdqa %xmm0,192-128(%rax) + vpaddd %xmm0,%xmm14,%xmm14 + vpxor 80-128(%rax),%xmm1,%xmm1 + vpsrld $27,%xmm10,%xmm9 + vpxor %xmm12,%xmm6,%xmm6 + vpxor %xmm3,%xmm1,%xmm1 + + vpslld $30,%xmm11,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vpaddd %xmm6,%xmm14,%xmm14 + vpsrld $31,%xmm1,%xmm5 + vpaddd %xmm1,%xmm1,%xmm1 + + vpsrld $2,%xmm11,%xmm11 + vpaddd %xmm8,%xmm14,%xmm14 + vpor %xmm5,%xmm1,%xmm1 + vpor %xmm7,%xmm11,%xmm11 + vpxor %xmm4,%xmm2,%xmm2 + vmovdqa 0-128(%rax),%xmm4 + + vpslld $5,%xmm14,%xmm8 + vpaddd %xmm15,%xmm13,%xmm13 + vpxor %xmm10,%xmm12,%xmm6 + vmovdqa %xmm1,208-128(%rax) + vpaddd %xmm1,%xmm13,%xmm13 + vpxor 96-128(%rax),%xmm2,%xmm2 + vpsrld $27,%xmm14,%xmm9 + vpxor %xmm11,%xmm6,%xmm6 + vpxor %xmm4,%xmm2,%xmm2 + + vpslld $30,%xmm10,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vpaddd %xmm6,%xmm13,%xmm13 + vpsrld $31,%xmm2,%xmm5 + vpaddd %xmm2,%xmm2,%xmm2 + + vpsrld $2,%xmm10,%xmm10 + vpaddd %xmm8,%xmm13,%xmm13 + vpor %xmm5,%xmm2,%xmm2 + vpor %xmm7,%xmm10,%xmm10 + vpxor %xmm0,%xmm3,%xmm3 + vmovdqa 16-128(%rax),%xmm0 + + vpslld $5,%xmm13,%xmm8 + vpaddd %xmm15,%xmm12,%xmm12 + vpxor %xmm14,%xmm11,%xmm6 + vmovdqa %xmm2,224-128(%rax) + vpaddd %xmm2,%xmm12,%xmm12 + vpxor 112-128(%rax),%xmm3,%xmm3 + vpsrld $27,%xmm13,%xmm9 + vpxor %xmm10,%xmm6,%xmm6 + vpxor %xmm0,%xmm3,%xmm3 + + vpslld $30,%xmm14,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vpaddd %xmm6,%xmm12,%xmm12 + vpsrld $31,%xmm3,%xmm5 + vpaddd %xmm3,%xmm3,%xmm3 + + vpsrld $2,%xmm14,%xmm14 + vpaddd %xmm8,%xmm12,%xmm12 + vpor %xmm5,%xmm3,%xmm3 + vpor %xmm7,%xmm14,%xmm14 + vpxor %xmm1,%xmm4,%xmm4 + vmovdqa 32-128(%rax),%xmm1 + + vpslld $5,%xmm12,%xmm8 + vpaddd %xmm15,%xmm11,%xmm11 + vpxor %xmm13,%xmm10,%xmm6 + vmovdqa %xmm3,240-128(%rax) + vpaddd %xmm3,%xmm11,%xmm11 + vpxor 128-128(%rax),%xmm4,%xmm4 + vpsrld $27,%xmm12,%xmm9 + vpxor %xmm14,%xmm6,%xmm6 + vpxor %xmm1,%xmm4,%xmm4 + + vpslld $30,%xmm13,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vpaddd %xmm6,%xmm11,%xmm11 + vpsrld $31,%xmm4,%xmm5 + vpaddd %xmm4,%xmm4,%xmm4 + + vpsrld $2,%xmm13,%xmm13 + vpaddd %xmm8,%xmm11,%xmm11 + vpor %xmm5,%xmm4,%xmm4 + vpor %xmm7,%xmm13,%xmm13 + vpxor %xmm2,%xmm0,%xmm0 + vmovdqa 48-128(%rax),%xmm2 + + vpslld $5,%xmm11,%xmm8 + vpaddd %xmm15,%xmm10,%xmm10 + vpxor %xmm12,%xmm14,%xmm6 + vmovdqa %xmm4,0-128(%rax) + vpaddd %xmm4,%xmm10,%xmm10 + vpxor 144-128(%rax),%xmm0,%xmm0 + vpsrld $27,%xmm11,%xmm9 + vpxor %xmm13,%xmm6,%xmm6 + vpxor %xmm2,%xmm0,%xmm0 + + vpslld $30,%xmm12,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vpaddd %xmm6,%xmm10,%xmm10 + vpsrld $31,%xmm0,%xmm5 + vpaddd %xmm0,%xmm0,%xmm0 + + vpsrld $2,%xmm12,%xmm12 + vpaddd %xmm8,%xmm10,%xmm10 + vpor %xmm5,%xmm0,%xmm0 + vpor %xmm7,%xmm12,%xmm12 + vpxor %xmm3,%xmm1,%xmm1 + vmovdqa 64-128(%rax),%xmm3 + + vpslld $5,%xmm10,%xmm8 + vpaddd %xmm15,%xmm14,%xmm14 + vpxor %xmm11,%xmm13,%xmm6 + vmovdqa %xmm0,16-128(%rax) + vpaddd %xmm0,%xmm14,%xmm14 + vpxor 160-128(%rax),%xmm1,%xmm1 + vpsrld $27,%xmm10,%xmm9 + vpxor %xmm12,%xmm6,%xmm6 + vpxor %xmm3,%xmm1,%xmm1 + + vpslld $30,%xmm11,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vpaddd %xmm6,%xmm14,%xmm14 + vpsrld $31,%xmm1,%xmm5 + vpaddd %xmm1,%xmm1,%xmm1 + + vpsrld $2,%xmm11,%xmm11 + vpaddd %xmm8,%xmm14,%xmm14 + vpor %xmm5,%xmm1,%xmm1 + vpor %xmm7,%xmm11,%xmm11 + vpxor %xmm4,%xmm2,%xmm2 + vmovdqa 80-128(%rax),%xmm4 + + vpslld $5,%xmm14,%xmm8 + vpaddd %xmm15,%xmm13,%xmm13 + vpxor %xmm10,%xmm12,%xmm6 + vmovdqa %xmm1,32-128(%rax) + vpaddd %xmm1,%xmm13,%xmm13 + vpxor 176-128(%rax),%xmm2,%xmm2 + vpsrld $27,%xmm14,%xmm9 + vpxor %xmm11,%xmm6,%xmm6 + vpxor %xmm4,%xmm2,%xmm2 + + vpslld $30,%xmm10,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vpaddd %xmm6,%xmm13,%xmm13 + vpsrld $31,%xmm2,%xmm5 + vpaddd %xmm2,%xmm2,%xmm2 + + vpsrld $2,%xmm10,%xmm10 + vpaddd %xmm8,%xmm13,%xmm13 + vpor %xmm5,%xmm2,%xmm2 + vpor %xmm7,%xmm10,%xmm10 + vpxor %xmm0,%xmm3,%xmm3 + vmovdqa 96-128(%rax),%xmm0 + + vpslld $5,%xmm13,%xmm8 + vpaddd %xmm15,%xmm12,%xmm12 + vpxor %xmm14,%xmm11,%xmm6 + vmovdqa %xmm2,48-128(%rax) + vpaddd %xmm2,%xmm12,%xmm12 + vpxor 192-128(%rax),%xmm3,%xmm3 + vpsrld $27,%xmm13,%xmm9 + vpxor %xmm10,%xmm6,%xmm6 + vpxor %xmm0,%xmm3,%xmm3 + + vpslld $30,%xmm14,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vpaddd %xmm6,%xmm12,%xmm12 + vpsrld $31,%xmm3,%xmm5 + vpaddd %xmm3,%xmm3,%xmm3 + + vpsrld $2,%xmm14,%xmm14 + vpaddd %xmm8,%xmm12,%xmm12 + vpor %xmm5,%xmm3,%xmm3 + vpor %xmm7,%xmm14,%xmm14 + vpxor %xmm1,%xmm4,%xmm4 + vmovdqa 112-128(%rax),%xmm1 + + vpslld $5,%xmm12,%xmm8 + vpaddd %xmm15,%xmm11,%xmm11 + vpxor %xmm13,%xmm10,%xmm6 + vmovdqa %xmm3,64-128(%rax) + vpaddd %xmm3,%xmm11,%xmm11 + vpxor 208-128(%rax),%xmm4,%xmm4 + vpsrld $27,%xmm12,%xmm9 + vpxor %xmm14,%xmm6,%xmm6 + vpxor %xmm1,%xmm4,%xmm4 + + vpslld $30,%xmm13,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vpaddd %xmm6,%xmm11,%xmm11 + vpsrld $31,%xmm4,%xmm5 + vpaddd %xmm4,%xmm4,%xmm4 + + vpsrld $2,%xmm13,%xmm13 + vpaddd %xmm8,%xmm11,%xmm11 + vpor %xmm5,%xmm4,%xmm4 + vpor %xmm7,%xmm13,%xmm13 + vpxor %xmm2,%xmm0,%xmm0 + vmovdqa 128-128(%rax),%xmm2 + + vpslld $5,%xmm11,%xmm8 + vpaddd %xmm15,%xmm10,%xmm10 + vpxor %xmm12,%xmm14,%xmm6 + vmovdqa %xmm4,80-128(%rax) + vpaddd %xmm4,%xmm10,%xmm10 + vpxor 224-128(%rax),%xmm0,%xmm0 + vpsrld $27,%xmm11,%xmm9 + vpxor %xmm13,%xmm6,%xmm6 + vpxor %xmm2,%xmm0,%xmm0 + + vpslld $30,%xmm12,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vpaddd %xmm6,%xmm10,%xmm10 + vpsrld $31,%xmm0,%xmm5 + vpaddd %xmm0,%xmm0,%xmm0 + + vpsrld $2,%xmm12,%xmm12 + vpaddd %xmm8,%xmm10,%xmm10 + vpor %xmm5,%xmm0,%xmm0 + vpor %xmm7,%xmm12,%xmm12 + vpxor %xmm3,%xmm1,%xmm1 + vmovdqa 144-128(%rax),%xmm3 + + vpslld $5,%xmm10,%xmm8 + vpaddd %xmm15,%xmm14,%xmm14 + vpxor %xmm11,%xmm13,%xmm6 + vmovdqa %xmm0,96-128(%rax) + vpaddd %xmm0,%xmm14,%xmm14 + vpxor 240-128(%rax),%xmm1,%xmm1 + vpsrld $27,%xmm10,%xmm9 + vpxor %xmm12,%xmm6,%xmm6 + vpxor %xmm3,%xmm1,%xmm1 + + vpslld $30,%xmm11,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vpaddd %xmm6,%xmm14,%xmm14 + vpsrld $31,%xmm1,%xmm5 + vpaddd %xmm1,%xmm1,%xmm1 + + vpsrld $2,%xmm11,%xmm11 + vpaddd %xmm8,%xmm14,%xmm14 + vpor %xmm5,%xmm1,%xmm1 + vpor %xmm7,%xmm11,%xmm11 + vpxor %xmm4,%xmm2,%xmm2 + vmovdqa 160-128(%rax),%xmm4 + + vpslld $5,%xmm14,%xmm8 + vpaddd %xmm15,%xmm13,%xmm13 + vpxor %xmm10,%xmm12,%xmm6 + vmovdqa %xmm1,112-128(%rax) + vpaddd %xmm1,%xmm13,%xmm13 + vpxor 0-128(%rax),%xmm2,%xmm2 + vpsrld $27,%xmm14,%xmm9 + vpxor %xmm11,%xmm6,%xmm6 + vpxor %xmm4,%xmm2,%xmm2 + + vpslld $30,%xmm10,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vpaddd %xmm6,%xmm13,%xmm13 + vpsrld $31,%xmm2,%xmm5 + vpaddd %xmm2,%xmm2,%xmm2 + + vpsrld $2,%xmm10,%xmm10 + vpaddd %xmm8,%xmm13,%xmm13 + vpor %xmm5,%xmm2,%xmm2 + vpor %xmm7,%xmm10,%xmm10 + vpxor %xmm0,%xmm3,%xmm3 + vmovdqa 176-128(%rax),%xmm0 + + vpslld $5,%xmm13,%xmm8 + vpaddd %xmm15,%xmm12,%xmm12 + vpxor %xmm14,%xmm11,%xmm6 + vpaddd %xmm2,%xmm12,%xmm12 + vpxor 16-128(%rax),%xmm3,%xmm3 + vpsrld $27,%xmm13,%xmm9 + vpxor %xmm10,%xmm6,%xmm6 + vpxor %xmm0,%xmm3,%xmm3 + + vpslld $30,%xmm14,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vpaddd %xmm6,%xmm12,%xmm12 + vpsrld $31,%xmm3,%xmm5 + vpaddd %xmm3,%xmm3,%xmm3 + + vpsrld $2,%xmm14,%xmm14 + vpaddd %xmm8,%xmm12,%xmm12 + vpor %xmm5,%xmm3,%xmm3 + vpor %xmm7,%xmm14,%xmm14 + vpxor %xmm1,%xmm4,%xmm4 + vmovdqa 192-128(%rax),%xmm1 + + vpslld $5,%xmm12,%xmm8 + vpaddd %xmm15,%xmm11,%xmm11 + vpxor %xmm13,%xmm10,%xmm6 + vpaddd %xmm3,%xmm11,%xmm11 + vpxor 32-128(%rax),%xmm4,%xmm4 + vpsrld $27,%xmm12,%xmm9 + vpxor %xmm14,%xmm6,%xmm6 + vpxor %xmm1,%xmm4,%xmm4 + + vpslld $30,%xmm13,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vpaddd %xmm6,%xmm11,%xmm11 + vpsrld $31,%xmm4,%xmm5 + vpaddd %xmm4,%xmm4,%xmm4 + + vpsrld $2,%xmm13,%xmm13 + vpaddd %xmm8,%xmm11,%xmm11 + vpor %xmm5,%xmm4,%xmm4 + vpor %xmm7,%xmm13,%xmm13 + vpxor %xmm2,%xmm0,%xmm0 + vmovdqa 208-128(%rax),%xmm2 + + vpslld $5,%xmm11,%xmm8 + vpaddd %xmm15,%xmm10,%xmm10 + vpxor %xmm12,%xmm14,%xmm6 + vpaddd %xmm4,%xmm10,%xmm10 + vpxor 48-128(%rax),%xmm0,%xmm0 + vpsrld $27,%xmm11,%xmm9 + vpxor %xmm13,%xmm6,%xmm6 + vpxor %xmm2,%xmm0,%xmm0 + + vpslld $30,%xmm12,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vpaddd %xmm6,%xmm10,%xmm10 + vpsrld $31,%xmm0,%xmm5 + vpaddd %xmm0,%xmm0,%xmm0 + + vpsrld $2,%xmm12,%xmm12 + vpaddd %xmm8,%xmm10,%xmm10 + vpor %xmm5,%xmm0,%xmm0 + vpor %xmm7,%xmm12,%xmm12 + vpxor %xmm3,%xmm1,%xmm1 + vmovdqa 224-128(%rax),%xmm3 + + vpslld $5,%xmm10,%xmm8 + vpaddd %xmm15,%xmm14,%xmm14 + vpxor %xmm11,%xmm13,%xmm6 + vpaddd %xmm0,%xmm14,%xmm14 + vpxor 64-128(%rax),%xmm1,%xmm1 + vpsrld $27,%xmm10,%xmm9 + vpxor %xmm12,%xmm6,%xmm6 + vpxor %xmm3,%xmm1,%xmm1 + + vpslld $30,%xmm11,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vpaddd %xmm6,%xmm14,%xmm14 + vpsrld $31,%xmm1,%xmm5 + vpaddd %xmm1,%xmm1,%xmm1 + + vpsrld $2,%xmm11,%xmm11 + vpaddd %xmm8,%xmm14,%xmm14 + vpor %xmm5,%xmm1,%xmm1 + vpor %xmm7,%xmm11,%xmm11 + vpxor %xmm4,%xmm2,%xmm2 + vmovdqa 240-128(%rax),%xmm4 + + vpslld $5,%xmm14,%xmm8 + vpaddd %xmm15,%xmm13,%xmm13 + vpxor %xmm10,%xmm12,%xmm6 + vpaddd %xmm1,%xmm13,%xmm13 + vpxor 80-128(%rax),%xmm2,%xmm2 + vpsrld $27,%xmm14,%xmm9 + vpxor %xmm11,%xmm6,%xmm6 + vpxor %xmm4,%xmm2,%xmm2 + + vpslld $30,%xmm10,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vpaddd %xmm6,%xmm13,%xmm13 + vpsrld $31,%xmm2,%xmm5 + vpaddd %xmm2,%xmm2,%xmm2 + + vpsrld $2,%xmm10,%xmm10 + vpaddd %xmm8,%xmm13,%xmm13 + vpor %xmm5,%xmm2,%xmm2 + vpor %xmm7,%xmm10,%xmm10 + vpxor %xmm0,%xmm3,%xmm3 + vmovdqa 0-128(%rax),%xmm0 + + vpslld $5,%xmm13,%xmm8 + vpaddd %xmm15,%xmm12,%xmm12 + vpxor %xmm14,%xmm11,%xmm6 + vpaddd %xmm2,%xmm12,%xmm12 + vpxor 96-128(%rax),%xmm3,%xmm3 + vpsrld $27,%xmm13,%xmm9 + vpxor %xmm10,%xmm6,%xmm6 + vpxor %xmm0,%xmm3,%xmm3 + + vpslld $30,%xmm14,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vpaddd %xmm6,%xmm12,%xmm12 + vpsrld $31,%xmm3,%xmm5 + vpaddd %xmm3,%xmm3,%xmm3 + + vpsrld $2,%xmm14,%xmm14 + vpaddd %xmm8,%xmm12,%xmm12 + vpor %xmm5,%xmm3,%xmm3 + vpor %xmm7,%xmm14,%xmm14 + vpxor %xmm1,%xmm4,%xmm4 + vmovdqa 16-128(%rax),%xmm1 + + vpslld $5,%xmm12,%xmm8 + vpaddd %xmm15,%xmm11,%xmm11 + vpxor %xmm13,%xmm10,%xmm6 + vpaddd %xmm3,%xmm11,%xmm11 + vpxor 112-128(%rax),%xmm4,%xmm4 + vpsrld $27,%xmm12,%xmm9 + vpxor %xmm14,%xmm6,%xmm6 + vpxor %xmm1,%xmm4,%xmm4 + + vpslld $30,%xmm13,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vpaddd %xmm6,%xmm11,%xmm11 + vpsrld $31,%xmm4,%xmm5 + vpaddd %xmm4,%xmm4,%xmm4 + + vpsrld $2,%xmm13,%xmm13 + vpaddd %xmm8,%xmm11,%xmm11 + vpor %xmm5,%xmm4,%xmm4 + vpor %xmm7,%xmm13,%xmm13 + vpslld $5,%xmm11,%xmm8 + vpaddd %xmm15,%xmm10,%xmm10 + vpxor %xmm12,%xmm14,%xmm6 + + vpsrld $27,%xmm11,%xmm9 + vpaddd %xmm4,%xmm10,%xmm10 + vpxor %xmm13,%xmm6,%xmm6 + + vpslld $30,%xmm12,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vpaddd %xmm6,%xmm10,%xmm10 + + vpsrld $2,%xmm12,%xmm12 + vpaddd %xmm8,%xmm10,%xmm10 + vpor %xmm7,%xmm12,%xmm12 + movl $1,%ecx + cmpl 0(%rbx),%ecx + cmovgeq %rbp,%r8 + cmpl 4(%rbx),%ecx + cmovgeq %rbp,%r9 + cmpl 8(%rbx),%ecx + cmovgeq %rbp,%r10 + cmpl 12(%rbx),%ecx + cmovgeq %rbp,%r11 + vmovdqu (%rbx),%xmm6 + vpxor %xmm8,%xmm8,%xmm8 + vmovdqa %xmm6,%xmm7 + vpcmpgtd %xmm8,%xmm7,%xmm7 + vpaddd %xmm7,%xmm6,%xmm6 + + vpand %xmm7,%xmm10,%xmm10 + vpand %xmm7,%xmm11,%xmm11 + vpaddd 0(%rdi),%xmm10,%xmm10 + vpand %xmm7,%xmm12,%xmm12 + vpaddd 32(%rdi),%xmm11,%xmm11 + vpand %xmm7,%xmm13,%xmm13 + vpaddd 64(%rdi),%xmm12,%xmm12 + vpand %xmm7,%xmm14,%xmm14 + vpaddd 96(%rdi),%xmm13,%xmm13 + vpaddd 128(%rdi),%xmm14,%xmm14 + vmovdqu %xmm10,0(%rdi) + vmovdqu %xmm11,32(%rdi) + vmovdqu %xmm12,64(%rdi) + vmovdqu %xmm13,96(%rdi) + vmovdqu %xmm14,128(%rdi) + + vmovdqu %xmm6,(%rbx) + vmovdqu 96(%rbp),%xmm5 + decl %edx + jnz .Loop_avx + + movl 280(%rsp),%edx + leaq 16(%rdi),%rdi + leaq 64(%rsi),%rsi + decl %edx + jnz .Loop_grande_avx + +.Ldone_avx: + movq 272(%rsp),%rax + vzeroupper + movq -16(%rax),%rbp + movq -8(%rax),%rbx + leaq (%rax),%rsp +.Lepilogue_avx: + .byte 0xf3,0xc3 +.size sha1_multi_block_avx,.-sha1_multi_block_avx +.type sha1_multi_block_avx2,@function +.align 32 +sha1_multi_block_avx2: +_avx2_shortcut: + movq %rsp,%rax + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + subq $576,%rsp + andq $-256,%rsp + movq %rax,544(%rsp) +.Lbody_avx2: + leaq K_XX_XX(%rip),%rbp + shrl $1,%edx + + vzeroupper +.Loop_grande_avx2: + movl %edx,552(%rsp) + xorl %edx,%edx + leaq 512(%rsp),%rbx + movq 0(%rsi),%r12 + movl 8(%rsi),%ecx + cmpl %edx,%ecx + cmovgl %ecx,%edx + testl %ecx,%ecx + movl %ecx,0(%rbx) + cmovleq %rbp,%r12 + movq 16(%rsi),%r13 + movl 24(%rsi),%ecx + cmpl %edx,%ecx + cmovgl %ecx,%edx + testl %ecx,%ecx + movl %ecx,4(%rbx) + cmovleq %rbp,%r13 + movq 32(%rsi),%r14 + movl 40(%rsi),%ecx + cmpl %edx,%ecx + cmovgl %ecx,%edx + testl %ecx,%ecx + movl %ecx,8(%rbx) + cmovleq %rbp,%r14 + movq 48(%rsi),%r15 + movl 56(%rsi),%ecx + cmpl %edx,%ecx + cmovgl %ecx,%edx + testl %ecx,%ecx + movl %ecx,12(%rbx) + cmovleq %rbp,%r15 + movq 64(%rsi),%r8 + movl 72(%rsi),%ecx + cmpl %edx,%ecx + cmovgl %ecx,%edx + testl %ecx,%ecx + movl %ecx,16(%rbx) + cmovleq %rbp,%r8 + movq 80(%rsi),%r9 + movl 88(%rsi),%ecx + cmpl %edx,%ecx + cmovgl %ecx,%edx + testl %ecx,%ecx + movl %ecx,20(%rbx) + cmovleq %rbp,%r9 + movq 96(%rsi),%r10 + movl 104(%rsi),%ecx + cmpl %edx,%ecx + cmovgl %ecx,%edx + testl %ecx,%ecx + movl %ecx,24(%rbx) + cmovleq %rbp,%r10 + movq 112(%rsi),%r11 + movl 120(%rsi),%ecx + cmpl %edx,%ecx + cmovgl %ecx,%edx + testl %ecx,%ecx + movl %ecx,28(%rbx) + cmovleq %rbp,%r11 + vmovdqu 0(%rdi),%ymm0 + leaq 128(%rsp),%rax + vmovdqu 32(%rdi),%ymm1 + leaq 256+128(%rsp),%rbx + vmovdqu 64(%rdi),%ymm2 + vmovdqu 96(%rdi),%ymm3 + vmovdqu 128(%rdi),%ymm4 + vmovdqu 96(%rbp),%ymm9 + jmp .Loop_avx2 + +.align 32 +.Loop_avx2: + vmovdqa -32(%rbp),%ymm15 + vmovd (%r12),%xmm10 + leaq 64(%r12),%r12 + vmovd (%r8),%xmm12 + leaq 64(%r8),%r8 + vmovd (%r13),%xmm7 + leaq 64(%r13),%r13 + vmovd (%r9),%xmm6 + leaq 64(%r9),%r9 + vpinsrd $1,(%r14),%xmm10,%xmm10 + leaq 64(%r14),%r14 + vpinsrd $1,(%r10),%xmm12,%xmm12 + leaq 64(%r10),%r10 + vpinsrd $1,(%r15),%xmm7,%xmm7 + leaq 64(%r15),%r15 + vpunpckldq %ymm7,%ymm10,%ymm10 + vpinsrd $1,(%r11),%xmm6,%xmm6 + leaq 64(%r11),%r11 + vpunpckldq %ymm6,%ymm12,%ymm12 + vmovd -60(%r12),%xmm11 + vinserti128 $1,%xmm12,%ymm10,%ymm10 + vmovd -60(%r8),%xmm8 + vpshufb %ymm9,%ymm10,%ymm10 + vmovd -60(%r13),%xmm7 + vmovd -60(%r9),%xmm6 + vpinsrd $1,-60(%r14),%xmm11,%xmm11 + vpinsrd $1,-60(%r10),%xmm8,%xmm8 + vpinsrd $1,-60(%r15),%xmm7,%xmm7 + vpunpckldq %ymm7,%ymm11,%ymm11 + vpinsrd $1,-60(%r11),%xmm6,%xmm6 + vpunpckldq %ymm6,%ymm8,%ymm8 + vpaddd %ymm15,%ymm4,%ymm4 + vpslld $5,%ymm0,%ymm7 + vpandn %ymm3,%ymm1,%ymm6 + vpand %ymm2,%ymm1,%ymm5 + + vmovdqa %ymm10,0-128(%rax) + vpaddd %ymm10,%ymm4,%ymm4 + vinserti128 $1,%xmm8,%ymm11,%ymm11 + vpsrld $27,%ymm0,%ymm8 + vpxor %ymm6,%ymm5,%ymm5 + vmovd -56(%r12),%xmm12 + + vpslld $30,%ymm1,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vmovd -56(%r8),%xmm8 + vpaddd %ymm5,%ymm4,%ymm4 + + vpsrld $2,%ymm1,%ymm1 + vpaddd %ymm7,%ymm4,%ymm4 + vpshufb %ymm9,%ymm11,%ymm11 + vpor %ymm6,%ymm1,%ymm1 + vmovd -56(%r13),%xmm7 + vmovd -56(%r9),%xmm6 + vpinsrd $1,-56(%r14),%xmm12,%xmm12 + vpinsrd $1,-56(%r10),%xmm8,%xmm8 + vpinsrd $1,-56(%r15),%xmm7,%xmm7 + vpunpckldq %ymm7,%ymm12,%ymm12 + vpinsrd $1,-56(%r11),%xmm6,%xmm6 + vpunpckldq %ymm6,%ymm8,%ymm8 + vpaddd %ymm15,%ymm3,%ymm3 + vpslld $5,%ymm4,%ymm7 + vpandn %ymm2,%ymm0,%ymm6 + vpand %ymm1,%ymm0,%ymm5 + + vmovdqa %ymm11,32-128(%rax) + vpaddd %ymm11,%ymm3,%ymm3 + vinserti128 $1,%xmm8,%ymm12,%ymm12 + vpsrld $27,%ymm4,%ymm8 + vpxor %ymm6,%ymm5,%ymm5 + vmovd -52(%r12),%xmm13 + + vpslld $30,%ymm0,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vmovd -52(%r8),%xmm8 + vpaddd %ymm5,%ymm3,%ymm3 + + vpsrld $2,%ymm0,%ymm0 + vpaddd %ymm7,%ymm3,%ymm3 + vpshufb %ymm9,%ymm12,%ymm12 + vpor %ymm6,%ymm0,%ymm0 + vmovd -52(%r13),%xmm7 + vmovd -52(%r9),%xmm6 + vpinsrd $1,-52(%r14),%xmm13,%xmm13 + vpinsrd $1,-52(%r10),%xmm8,%xmm8 + vpinsrd $1,-52(%r15),%xmm7,%xmm7 + vpunpckldq %ymm7,%ymm13,%ymm13 + vpinsrd $1,-52(%r11),%xmm6,%xmm6 + vpunpckldq %ymm6,%ymm8,%ymm8 + vpaddd %ymm15,%ymm2,%ymm2 + vpslld $5,%ymm3,%ymm7 + vpandn %ymm1,%ymm4,%ymm6 + vpand %ymm0,%ymm4,%ymm5 + + vmovdqa %ymm12,64-128(%rax) + vpaddd %ymm12,%ymm2,%ymm2 + vinserti128 $1,%xmm8,%ymm13,%ymm13 + vpsrld $27,%ymm3,%ymm8 + vpxor %ymm6,%ymm5,%ymm5 + vmovd -48(%r12),%xmm14 + + vpslld $30,%ymm4,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vmovd -48(%r8),%xmm8 + vpaddd %ymm5,%ymm2,%ymm2 + + vpsrld $2,%ymm4,%ymm4 + vpaddd %ymm7,%ymm2,%ymm2 + vpshufb %ymm9,%ymm13,%ymm13 + vpor %ymm6,%ymm4,%ymm4 + vmovd -48(%r13),%xmm7 + vmovd -48(%r9),%xmm6 + vpinsrd $1,-48(%r14),%xmm14,%xmm14 + vpinsrd $1,-48(%r10),%xmm8,%xmm8 + vpinsrd $1,-48(%r15),%xmm7,%xmm7 + vpunpckldq %ymm7,%ymm14,%ymm14 + vpinsrd $1,-48(%r11),%xmm6,%xmm6 + vpunpckldq %ymm6,%ymm8,%ymm8 + vpaddd %ymm15,%ymm1,%ymm1 + vpslld $5,%ymm2,%ymm7 + vpandn %ymm0,%ymm3,%ymm6 + vpand %ymm4,%ymm3,%ymm5 + + vmovdqa %ymm13,96-128(%rax) + vpaddd %ymm13,%ymm1,%ymm1 + vinserti128 $1,%xmm8,%ymm14,%ymm14 + vpsrld $27,%ymm2,%ymm8 + vpxor %ymm6,%ymm5,%ymm5 + vmovd -44(%r12),%xmm10 + + vpslld $30,%ymm3,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vmovd -44(%r8),%xmm8 + vpaddd %ymm5,%ymm1,%ymm1 + + vpsrld $2,%ymm3,%ymm3 + vpaddd %ymm7,%ymm1,%ymm1 + vpshufb %ymm9,%ymm14,%ymm14 + vpor %ymm6,%ymm3,%ymm3 + vmovd -44(%r13),%xmm7 + vmovd -44(%r9),%xmm6 + vpinsrd $1,-44(%r14),%xmm10,%xmm10 + vpinsrd $1,-44(%r10),%xmm8,%xmm8 + vpinsrd $1,-44(%r15),%xmm7,%xmm7 + vpunpckldq %ymm7,%ymm10,%ymm10 + vpinsrd $1,-44(%r11),%xmm6,%xmm6 + vpunpckldq %ymm6,%ymm8,%ymm8 + vpaddd %ymm15,%ymm0,%ymm0 + vpslld $5,%ymm1,%ymm7 + vpandn %ymm4,%ymm2,%ymm6 + vpand %ymm3,%ymm2,%ymm5 + + vmovdqa %ymm14,128-128(%rax) + vpaddd %ymm14,%ymm0,%ymm0 + vinserti128 $1,%xmm8,%ymm10,%ymm10 + vpsrld $27,%ymm1,%ymm8 + vpxor %ymm6,%ymm5,%ymm5 + vmovd -40(%r12),%xmm11 + + vpslld $30,%ymm2,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vmovd -40(%r8),%xmm8 + vpaddd %ymm5,%ymm0,%ymm0 + + vpsrld $2,%ymm2,%ymm2 + vpaddd %ymm7,%ymm0,%ymm0 + vpshufb %ymm9,%ymm10,%ymm10 + vpor %ymm6,%ymm2,%ymm2 + vmovd -40(%r13),%xmm7 + vmovd -40(%r9),%xmm6 + vpinsrd $1,-40(%r14),%xmm11,%xmm11 + vpinsrd $1,-40(%r10),%xmm8,%xmm8 + vpinsrd $1,-40(%r15),%xmm7,%xmm7 + vpunpckldq %ymm7,%ymm11,%ymm11 + vpinsrd $1,-40(%r11),%xmm6,%xmm6 + vpunpckldq %ymm6,%ymm8,%ymm8 + vpaddd %ymm15,%ymm4,%ymm4 + vpslld $5,%ymm0,%ymm7 + vpandn %ymm3,%ymm1,%ymm6 + vpand %ymm2,%ymm1,%ymm5 + + vmovdqa %ymm10,160-128(%rax) + vpaddd %ymm10,%ymm4,%ymm4 + vinserti128 $1,%xmm8,%ymm11,%ymm11 + vpsrld $27,%ymm0,%ymm8 + vpxor %ymm6,%ymm5,%ymm5 + vmovd -36(%r12),%xmm12 + + vpslld $30,%ymm1,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vmovd -36(%r8),%xmm8 + vpaddd %ymm5,%ymm4,%ymm4 + + vpsrld $2,%ymm1,%ymm1 + vpaddd %ymm7,%ymm4,%ymm4 + vpshufb %ymm9,%ymm11,%ymm11 + vpor %ymm6,%ymm1,%ymm1 + vmovd -36(%r13),%xmm7 + vmovd -36(%r9),%xmm6 + vpinsrd $1,-36(%r14),%xmm12,%xmm12 + vpinsrd $1,-36(%r10),%xmm8,%xmm8 + vpinsrd $1,-36(%r15),%xmm7,%xmm7 + vpunpckldq %ymm7,%ymm12,%ymm12 + vpinsrd $1,-36(%r11),%xmm6,%xmm6 + vpunpckldq %ymm6,%ymm8,%ymm8 + vpaddd %ymm15,%ymm3,%ymm3 + vpslld $5,%ymm4,%ymm7 + vpandn %ymm2,%ymm0,%ymm6 + vpand %ymm1,%ymm0,%ymm5 + + vmovdqa %ymm11,192-128(%rax) + vpaddd %ymm11,%ymm3,%ymm3 + vinserti128 $1,%xmm8,%ymm12,%ymm12 + vpsrld $27,%ymm4,%ymm8 + vpxor %ymm6,%ymm5,%ymm5 + vmovd -32(%r12),%xmm13 + + vpslld $30,%ymm0,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vmovd -32(%r8),%xmm8 + vpaddd %ymm5,%ymm3,%ymm3 + + vpsrld $2,%ymm0,%ymm0 + vpaddd %ymm7,%ymm3,%ymm3 + vpshufb %ymm9,%ymm12,%ymm12 + vpor %ymm6,%ymm0,%ymm0 + vmovd -32(%r13),%xmm7 + vmovd -32(%r9),%xmm6 + vpinsrd $1,-32(%r14),%xmm13,%xmm13 + vpinsrd $1,-32(%r10),%xmm8,%xmm8 + vpinsrd $1,-32(%r15),%xmm7,%xmm7 + vpunpckldq %ymm7,%ymm13,%ymm13 + vpinsrd $1,-32(%r11),%xmm6,%xmm6 + vpunpckldq %ymm6,%ymm8,%ymm8 + vpaddd %ymm15,%ymm2,%ymm2 + vpslld $5,%ymm3,%ymm7 + vpandn %ymm1,%ymm4,%ymm6 + vpand %ymm0,%ymm4,%ymm5 + + vmovdqa %ymm12,224-128(%rax) + vpaddd %ymm12,%ymm2,%ymm2 + vinserti128 $1,%xmm8,%ymm13,%ymm13 + vpsrld $27,%ymm3,%ymm8 + vpxor %ymm6,%ymm5,%ymm5 + vmovd -28(%r12),%xmm14 + + vpslld $30,%ymm4,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vmovd -28(%r8),%xmm8 + vpaddd %ymm5,%ymm2,%ymm2 + + vpsrld $2,%ymm4,%ymm4 + vpaddd %ymm7,%ymm2,%ymm2 + vpshufb %ymm9,%ymm13,%ymm13 + vpor %ymm6,%ymm4,%ymm4 + vmovd -28(%r13),%xmm7 + vmovd -28(%r9),%xmm6 + vpinsrd $1,-28(%r14),%xmm14,%xmm14 + vpinsrd $1,-28(%r10),%xmm8,%xmm8 + vpinsrd $1,-28(%r15),%xmm7,%xmm7 + vpunpckldq %ymm7,%ymm14,%ymm14 + vpinsrd $1,-28(%r11),%xmm6,%xmm6 + vpunpckldq %ymm6,%ymm8,%ymm8 + vpaddd %ymm15,%ymm1,%ymm1 + vpslld $5,%ymm2,%ymm7 + vpandn %ymm0,%ymm3,%ymm6 + vpand %ymm4,%ymm3,%ymm5 + + vmovdqa %ymm13,256-256-128(%rbx) + vpaddd %ymm13,%ymm1,%ymm1 + vinserti128 $1,%xmm8,%ymm14,%ymm14 + vpsrld $27,%ymm2,%ymm8 + vpxor %ymm6,%ymm5,%ymm5 + vmovd -24(%r12),%xmm10 + + vpslld $30,%ymm3,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vmovd -24(%r8),%xmm8 + vpaddd %ymm5,%ymm1,%ymm1 + + vpsrld $2,%ymm3,%ymm3 + vpaddd %ymm7,%ymm1,%ymm1 + vpshufb %ymm9,%ymm14,%ymm14 + vpor %ymm6,%ymm3,%ymm3 + vmovd -24(%r13),%xmm7 + vmovd -24(%r9),%xmm6 + vpinsrd $1,-24(%r14),%xmm10,%xmm10 + vpinsrd $1,-24(%r10),%xmm8,%xmm8 + vpinsrd $1,-24(%r15),%xmm7,%xmm7 + vpunpckldq %ymm7,%ymm10,%ymm10 + vpinsrd $1,-24(%r11),%xmm6,%xmm6 + vpunpckldq %ymm6,%ymm8,%ymm8 + vpaddd %ymm15,%ymm0,%ymm0 + vpslld $5,%ymm1,%ymm7 + vpandn %ymm4,%ymm2,%ymm6 + vpand %ymm3,%ymm2,%ymm5 + + vmovdqa %ymm14,288-256-128(%rbx) + vpaddd %ymm14,%ymm0,%ymm0 + vinserti128 $1,%xmm8,%ymm10,%ymm10 + vpsrld $27,%ymm1,%ymm8 + vpxor %ymm6,%ymm5,%ymm5 + vmovd -20(%r12),%xmm11 + + vpslld $30,%ymm2,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vmovd -20(%r8),%xmm8 + vpaddd %ymm5,%ymm0,%ymm0 + + vpsrld $2,%ymm2,%ymm2 + vpaddd %ymm7,%ymm0,%ymm0 + vpshufb %ymm9,%ymm10,%ymm10 + vpor %ymm6,%ymm2,%ymm2 + vmovd -20(%r13),%xmm7 + vmovd -20(%r9),%xmm6 + vpinsrd $1,-20(%r14),%xmm11,%xmm11 + vpinsrd $1,-20(%r10),%xmm8,%xmm8 + vpinsrd $1,-20(%r15),%xmm7,%xmm7 + vpunpckldq %ymm7,%ymm11,%ymm11 + vpinsrd $1,-20(%r11),%xmm6,%xmm6 + vpunpckldq %ymm6,%ymm8,%ymm8 + vpaddd %ymm15,%ymm4,%ymm4 + vpslld $5,%ymm0,%ymm7 + vpandn %ymm3,%ymm1,%ymm6 + vpand %ymm2,%ymm1,%ymm5 + + vmovdqa %ymm10,320-256-128(%rbx) + vpaddd %ymm10,%ymm4,%ymm4 + vinserti128 $1,%xmm8,%ymm11,%ymm11 + vpsrld $27,%ymm0,%ymm8 + vpxor %ymm6,%ymm5,%ymm5 + vmovd -16(%r12),%xmm12 + + vpslld $30,%ymm1,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vmovd -16(%r8),%xmm8 + vpaddd %ymm5,%ymm4,%ymm4 + + vpsrld $2,%ymm1,%ymm1 + vpaddd %ymm7,%ymm4,%ymm4 + vpshufb %ymm9,%ymm11,%ymm11 + vpor %ymm6,%ymm1,%ymm1 + vmovd -16(%r13),%xmm7 + vmovd -16(%r9),%xmm6 + vpinsrd $1,-16(%r14),%xmm12,%xmm12 + vpinsrd $1,-16(%r10),%xmm8,%xmm8 + vpinsrd $1,-16(%r15),%xmm7,%xmm7 + vpunpckldq %ymm7,%ymm12,%ymm12 + vpinsrd $1,-16(%r11),%xmm6,%xmm6 + vpunpckldq %ymm6,%ymm8,%ymm8 + vpaddd %ymm15,%ymm3,%ymm3 + vpslld $5,%ymm4,%ymm7 + vpandn %ymm2,%ymm0,%ymm6 + vpand %ymm1,%ymm0,%ymm5 + + vmovdqa %ymm11,352-256-128(%rbx) + vpaddd %ymm11,%ymm3,%ymm3 + vinserti128 $1,%xmm8,%ymm12,%ymm12 + vpsrld $27,%ymm4,%ymm8 + vpxor %ymm6,%ymm5,%ymm5 + vmovd -12(%r12),%xmm13 + + vpslld $30,%ymm0,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vmovd -12(%r8),%xmm8 + vpaddd %ymm5,%ymm3,%ymm3 + + vpsrld $2,%ymm0,%ymm0 + vpaddd %ymm7,%ymm3,%ymm3 + vpshufb %ymm9,%ymm12,%ymm12 + vpor %ymm6,%ymm0,%ymm0 + vmovd -12(%r13),%xmm7 + vmovd -12(%r9),%xmm6 + vpinsrd $1,-12(%r14),%xmm13,%xmm13 + vpinsrd $1,-12(%r10),%xmm8,%xmm8 + vpinsrd $1,-12(%r15),%xmm7,%xmm7 + vpunpckldq %ymm7,%ymm13,%ymm13 + vpinsrd $1,-12(%r11),%xmm6,%xmm6 + vpunpckldq %ymm6,%ymm8,%ymm8 + vpaddd %ymm15,%ymm2,%ymm2 + vpslld $5,%ymm3,%ymm7 + vpandn %ymm1,%ymm4,%ymm6 + vpand %ymm0,%ymm4,%ymm5 + + vmovdqa %ymm12,384-256-128(%rbx) + vpaddd %ymm12,%ymm2,%ymm2 + vinserti128 $1,%xmm8,%ymm13,%ymm13 + vpsrld $27,%ymm3,%ymm8 + vpxor %ymm6,%ymm5,%ymm5 + vmovd -8(%r12),%xmm14 + + vpslld $30,%ymm4,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vmovd -8(%r8),%xmm8 + vpaddd %ymm5,%ymm2,%ymm2 + + vpsrld $2,%ymm4,%ymm4 + vpaddd %ymm7,%ymm2,%ymm2 + vpshufb %ymm9,%ymm13,%ymm13 + vpor %ymm6,%ymm4,%ymm4 + vmovd -8(%r13),%xmm7 + vmovd -8(%r9),%xmm6 + vpinsrd $1,-8(%r14),%xmm14,%xmm14 + vpinsrd $1,-8(%r10),%xmm8,%xmm8 + vpinsrd $1,-8(%r15),%xmm7,%xmm7 + vpunpckldq %ymm7,%ymm14,%ymm14 + vpinsrd $1,-8(%r11),%xmm6,%xmm6 + vpunpckldq %ymm6,%ymm8,%ymm8 + vpaddd %ymm15,%ymm1,%ymm1 + vpslld $5,%ymm2,%ymm7 + vpandn %ymm0,%ymm3,%ymm6 + vpand %ymm4,%ymm3,%ymm5 + + vmovdqa %ymm13,416-256-128(%rbx) + vpaddd %ymm13,%ymm1,%ymm1 + vinserti128 $1,%xmm8,%ymm14,%ymm14 + vpsrld $27,%ymm2,%ymm8 + vpxor %ymm6,%ymm5,%ymm5 + vmovd -4(%r12),%xmm10 + + vpslld $30,%ymm3,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vmovd -4(%r8),%xmm8 + vpaddd %ymm5,%ymm1,%ymm1 + + vpsrld $2,%ymm3,%ymm3 + vpaddd %ymm7,%ymm1,%ymm1 + vpshufb %ymm9,%ymm14,%ymm14 + vpor %ymm6,%ymm3,%ymm3 + vmovdqa 0-128(%rax),%ymm11 + vmovd -4(%r13),%xmm7 + vmovd -4(%r9),%xmm6 + vpinsrd $1,-4(%r14),%xmm10,%xmm10 + vpinsrd $1,-4(%r10),%xmm8,%xmm8 + vpinsrd $1,-4(%r15),%xmm7,%xmm7 + vpunpckldq %ymm7,%ymm10,%ymm10 + vpinsrd $1,-4(%r11),%xmm6,%xmm6 + vpunpckldq %ymm6,%ymm8,%ymm8 + vpaddd %ymm15,%ymm0,%ymm0 + prefetcht0 63(%r12) + vpslld $5,%ymm1,%ymm7 + vpandn %ymm4,%ymm2,%ymm6 + vpand %ymm3,%ymm2,%ymm5 + + vmovdqa %ymm14,448-256-128(%rbx) + vpaddd %ymm14,%ymm0,%ymm0 + vinserti128 $1,%xmm8,%ymm10,%ymm10 + vpsrld $27,%ymm1,%ymm8 + prefetcht0 63(%r13) + vpxor %ymm6,%ymm5,%ymm5 + + vpslld $30,%ymm2,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + prefetcht0 63(%r14) + vpaddd %ymm5,%ymm0,%ymm0 + + vpsrld $2,%ymm2,%ymm2 + vpaddd %ymm7,%ymm0,%ymm0 + prefetcht0 63(%r15) + vpshufb %ymm9,%ymm10,%ymm10 + vpor %ymm6,%ymm2,%ymm2 + vmovdqa 32-128(%rax),%ymm12 + vpxor %ymm13,%ymm11,%ymm11 + vmovdqa 64-128(%rax),%ymm13 + + vpaddd %ymm15,%ymm4,%ymm4 + vpslld $5,%ymm0,%ymm7 + vpandn %ymm3,%ymm1,%ymm6 + prefetcht0 63(%r8) + vpand %ymm2,%ymm1,%ymm5 + + vmovdqa %ymm10,480-256-128(%rbx) + vpaddd %ymm10,%ymm4,%ymm4 + vpxor 256-256-128(%rbx),%ymm11,%ymm11 + vpsrld $27,%ymm0,%ymm8 + vpxor %ymm6,%ymm5,%ymm5 + vpxor %ymm13,%ymm11,%ymm11 + prefetcht0 63(%r9) + + vpslld $30,%ymm1,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vpaddd %ymm5,%ymm4,%ymm4 + prefetcht0 63(%r10) + vpsrld $31,%ymm11,%ymm9 + vpaddd %ymm11,%ymm11,%ymm11 + + vpsrld $2,%ymm1,%ymm1 + prefetcht0 63(%r11) + vpaddd %ymm7,%ymm4,%ymm4 + vpor %ymm9,%ymm11,%ymm11 + vpor %ymm6,%ymm1,%ymm1 + vpxor %ymm14,%ymm12,%ymm12 + vmovdqa 96-128(%rax),%ymm14 + + vpaddd %ymm15,%ymm3,%ymm3 + vpslld $5,%ymm4,%ymm7 + vpandn %ymm2,%ymm0,%ymm6 + + vpand %ymm1,%ymm0,%ymm5 + + vmovdqa %ymm11,0-128(%rax) + vpaddd %ymm11,%ymm3,%ymm3 + vpxor 288-256-128(%rbx),%ymm12,%ymm12 + vpsrld $27,%ymm4,%ymm8 + vpxor %ymm6,%ymm5,%ymm5 + vpxor %ymm14,%ymm12,%ymm12 + + + vpslld $30,%ymm0,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vpaddd %ymm5,%ymm3,%ymm3 + + vpsrld $31,%ymm12,%ymm9 + vpaddd %ymm12,%ymm12,%ymm12 + + vpsrld $2,%ymm0,%ymm0 + + vpaddd %ymm7,%ymm3,%ymm3 + vpor %ymm9,%ymm12,%ymm12 + vpor %ymm6,%ymm0,%ymm0 + vpxor %ymm10,%ymm13,%ymm13 + vmovdqa 128-128(%rax),%ymm10 + + vpaddd %ymm15,%ymm2,%ymm2 + vpslld $5,%ymm3,%ymm7 + vpandn %ymm1,%ymm4,%ymm6 + + vpand %ymm0,%ymm4,%ymm5 + + vmovdqa %ymm12,32-128(%rax) + vpaddd %ymm12,%ymm2,%ymm2 + vpxor 320-256-128(%rbx),%ymm13,%ymm13 + vpsrld $27,%ymm3,%ymm8 + vpxor %ymm6,%ymm5,%ymm5 + vpxor %ymm10,%ymm13,%ymm13 + + + vpslld $30,%ymm4,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vpaddd %ymm5,%ymm2,%ymm2 + + vpsrld $31,%ymm13,%ymm9 + vpaddd %ymm13,%ymm13,%ymm13 + + vpsrld $2,%ymm4,%ymm4 + + vpaddd %ymm7,%ymm2,%ymm2 + vpor %ymm9,%ymm13,%ymm13 + vpor %ymm6,%ymm4,%ymm4 + vpxor %ymm11,%ymm14,%ymm14 + vmovdqa 160-128(%rax),%ymm11 + + vpaddd %ymm15,%ymm1,%ymm1 + vpslld $5,%ymm2,%ymm7 + vpandn %ymm0,%ymm3,%ymm6 + + vpand %ymm4,%ymm3,%ymm5 + + vmovdqa %ymm13,64-128(%rax) + vpaddd %ymm13,%ymm1,%ymm1 + vpxor 352-256-128(%rbx),%ymm14,%ymm14 + vpsrld $27,%ymm2,%ymm8 + vpxor %ymm6,%ymm5,%ymm5 + vpxor %ymm11,%ymm14,%ymm14 + + + vpslld $30,%ymm3,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vpaddd %ymm5,%ymm1,%ymm1 + + vpsrld $31,%ymm14,%ymm9 + vpaddd %ymm14,%ymm14,%ymm14 + + vpsrld $2,%ymm3,%ymm3 + + vpaddd %ymm7,%ymm1,%ymm1 + vpor %ymm9,%ymm14,%ymm14 + vpor %ymm6,%ymm3,%ymm3 + vpxor %ymm12,%ymm10,%ymm10 + vmovdqa 192-128(%rax),%ymm12 + + vpaddd %ymm15,%ymm0,%ymm0 + vpslld $5,%ymm1,%ymm7 + vpandn %ymm4,%ymm2,%ymm6 + + vpand %ymm3,%ymm2,%ymm5 + + vmovdqa %ymm14,96-128(%rax) + vpaddd %ymm14,%ymm0,%ymm0 + vpxor 384-256-128(%rbx),%ymm10,%ymm10 + vpsrld $27,%ymm1,%ymm8 + vpxor %ymm6,%ymm5,%ymm5 + vpxor %ymm12,%ymm10,%ymm10 + + + vpslld $30,%ymm2,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vpaddd %ymm5,%ymm0,%ymm0 + + vpsrld $31,%ymm10,%ymm9 + vpaddd %ymm10,%ymm10,%ymm10 + + vpsrld $2,%ymm2,%ymm2 + + vpaddd %ymm7,%ymm0,%ymm0 + vpor %ymm9,%ymm10,%ymm10 + vpor %ymm6,%ymm2,%ymm2 + vmovdqa 0(%rbp),%ymm15 + vpxor %ymm13,%ymm11,%ymm11 + vmovdqa 224-128(%rax),%ymm13 + + vpslld $5,%ymm0,%ymm7 + vpaddd %ymm15,%ymm4,%ymm4 + vpxor %ymm1,%ymm3,%ymm5 + vmovdqa %ymm10,128-128(%rax) + vpaddd %ymm10,%ymm4,%ymm4 + vpxor 416-256-128(%rbx),%ymm11,%ymm11 + vpsrld $27,%ymm0,%ymm8 + vpxor %ymm2,%ymm5,%ymm5 + vpxor %ymm13,%ymm11,%ymm11 + + vpslld $30,%ymm1,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vpaddd %ymm5,%ymm4,%ymm4 + vpsrld $31,%ymm11,%ymm9 + vpaddd %ymm11,%ymm11,%ymm11 + + vpsrld $2,%ymm1,%ymm1 + vpaddd %ymm7,%ymm4,%ymm4 + vpor %ymm9,%ymm11,%ymm11 + vpor %ymm6,%ymm1,%ymm1 + vpxor %ymm14,%ymm12,%ymm12 + vmovdqa 256-256-128(%rbx),%ymm14 + + vpslld $5,%ymm4,%ymm7 + vpaddd %ymm15,%ymm3,%ymm3 + vpxor %ymm0,%ymm2,%ymm5 + vmovdqa %ymm11,160-128(%rax) + vpaddd %ymm11,%ymm3,%ymm3 + vpxor 448-256-128(%rbx),%ymm12,%ymm12 + vpsrld $27,%ymm4,%ymm8 + vpxor %ymm1,%ymm5,%ymm5 + vpxor %ymm14,%ymm12,%ymm12 + + vpslld $30,%ymm0,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vpaddd %ymm5,%ymm3,%ymm3 + vpsrld $31,%ymm12,%ymm9 + vpaddd %ymm12,%ymm12,%ymm12 + + vpsrld $2,%ymm0,%ymm0 + vpaddd %ymm7,%ymm3,%ymm3 + vpor %ymm9,%ymm12,%ymm12 + vpor %ymm6,%ymm0,%ymm0 + vpxor %ymm10,%ymm13,%ymm13 + vmovdqa 288-256-128(%rbx),%ymm10 + + vpslld $5,%ymm3,%ymm7 + vpaddd %ymm15,%ymm2,%ymm2 + vpxor %ymm4,%ymm1,%ymm5 + vmovdqa %ymm12,192-128(%rax) + vpaddd %ymm12,%ymm2,%ymm2 + vpxor 480-256-128(%rbx),%ymm13,%ymm13 + vpsrld $27,%ymm3,%ymm8 + vpxor %ymm0,%ymm5,%ymm5 + vpxor %ymm10,%ymm13,%ymm13 + + vpslld $30,%ymm4,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vpaddd %ymm5,%ymm2,%ymm2 + vpsrld $31,%ymm13,%ymm9 + vpaddd %ymm13,%ymm13,%ymm13 + + vpsrld $2,%ymm4,%ymm4 + vpaddd %ymm7,%ymm2,%ymm2 + vpor %ymm9,%ymm13,%ymm13 + vpor %ymm6,%ymm4,%ymm4 + vpxor %ymm11,%ymm14,%ymm14 + vmovdqa 320-256-128(%rbx),%ymm11 + + vpslld $5,%ymm2,%ymm7 + vpaddd %ymm15,%ymm1,%ymm1 + vpxor %ymm3,%ymm0,%ymm5 + vmovdqa %ymm13,224-128(%rax) + vpaddd %ymm13,%ymm1,%ymm1 + vpxor 0-128(%rax),%ymm14,%ymm14 + vpsrld $27,%ymm2,%ymm8 + vpxor %ymm4,%ymm5,%ymm5 + vpxor %ymm11,%ymm14,%ymm14 + + vpslld $30,%ymm3,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vpaddd %ymm5,%ymm1,%ymm1 + vpsrld $31,%ymm14,%ymm9 + vpaddd %ymm14,%ymm14,%ymm14 + + vpsrld $2,%ymm3,%ymm3 + vpaddd %ymm7,%ymm1,%ymm1 + vpor %ymm9,%ymm14,%ymm14 + vpor %ymm6,%ymm3,%ymm3 + vpxor %ymm12,%ymm10,%ymm10 + vmovdqa 352-256-128(%rbx),%ymm12 + + vpslld $5,%ymm1,%ymm7 + vpaddd %ymm15,%ymm0,%ymm0 + vpxor %ymm2,%ymm4,%ymm5 + vmovdqa %ymm14,256-256-128(%rbx) + vpaddd %ymm14,%ymm0,%ymm0 + vpxor 32-128(%rax),%ymm10,%ymm10 + vpsrld $27,%ymm1,%ymm8 + vpxor %ymm3,%ymm5,%ymm5 + vpxor %ymm12,%ymm10,%ymm10 + + vpslld $30,%ymm2,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vpaddd %ymm5,%ymm0,%ymm0 + vpsrld $31,%ymm10,%ymm9 + vpaddd %ymm10,%ymm10,%ymm10 + + vpsrld $2,%ymm2,%ymm2 + vpaddd %ymm7,%ymm0,%ymm0 + vpor %ymm9,%ymm10,%ymm10 + vpor %ymm6,%ymm2,%ymm2 + vpxor %ymm13,%ymm11,%ymm11 + vmovdqa 384-256-128(%rbx),%ymm13 + + vpslld $5,%ymm0,%ymm7 + vpaddd %ymm15,%ymm4,%ymm4 + vpxor %ymm1,%ymm3,%ymm5 + vmovdqa %ymm10,288-256-128(%rbx) + vpaddd %ymm10,%ymm4,%ymm4 + vpxor 64-128(%rax),%ymm11,%ymm11 + vpsrld $27,%ymm0,%ymm8 + vpxor %ymm2,%ymm5,%ymm5 + vpxor %ymm13,%ymm11,%ymm11 + + vpslld $30,%ymm1,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vpaddd %ymm5,%ymm4,%ymm4 + vpsrld $31,%ymm11,%ymm9 + vpaddd %ymm11,%ymm11,%ymm11 + + vpsrld $2,%ymm1,%ymm1 + vpaddd %ymm7,%ymm4,%ymm4 + vpor %ymm9,%ymm11,%ymm11 + vpor %ymm6,%ymm1,%ymm1 + vpxor %ymm14,%ymm12,%ymm12 + vmovdqa 416-256-128(%rbx),%ymm14 + + vpslld $5,%ymm4,%ymm7 + vpaddd %ymm15,%ymm3,%ymm3 + vpxor %ymm0,%ymm2,%ymm5 + vmovdqa %ymm11,320-256-128(%rbx) + vpaddd %ymm11,%ymm3,%ymm3 + vpxor 96-128(%rax),%ymm12,%ymm12 + vpsrld $27,%ymm4,%ymm8 + vpxor %ymm1,%ymm5,%ymm5 + vpxor %ymm14,%ymm12,%ymm12 + + vpslld $30,%ymm0,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vpaddd %ymm5,%ymm3,%ymm3 + vpsrld $31,%ymm12,%ymm9 + vpaddd %ymm12,%ymm12,%ymm12 + + vpsrld $2,%ymm0,%ymm0 + vpaddd %ymm7,%ymm3,%ymm3 + vpor %ymm9,%ymm12,%ymm12 + vpor %ymm6,%ymm0,%ymm0 + vpxor %ymm10,%ymm13,%ymm13 + vmovdqa 448-256-128(%rbx),%ymm10 + + vpslld $5,%ymm3,%ymm7 + vpaddd %ymm15,%ymm2,%ymm2 + vpxor %ymm4,%ymm1,%ymm5 + vmovdqa %ymm12,352-256-128(%rbx) + vpaddd %ymm12,%ymm2,%ymm2 + vpxor 128-128(%rax),%ymm13,%ymm13 + vpsrld $27,%ymm3,%ymm8 + vpxor %ymm0,%ymm5,%ymm5 + vpxor %ymm10,%ymm13,%ymm13 + + vpslld $30,%ymm4,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vpaddd %ymm5,%ymm2,%ymm2 + vpsrld $31,%ymm13,%ymm9 + vpaddd %ymm13,%ymm13,%ymm13 + + vpsrld $2,%ymm4,%ymm4 + vpaddd %ymm7,%ymm2,%ymm2 + vpor %ymm9,%ymm13,%ymm13 + vpor %ymm6,%ymm4,%ymm4 + vpxor %ymm11,%ymm14,%ymm14 + vmovdqa 480-256-128(%rbx),%ymm11 + + vpslld $5,%ymm2,%ymm7 + vpaddd %ymm15,%ymm1,%ymm1 + vpxor %ymm3,%ymm0,%ymm5 + vmovdqa %ymm13,384-256-128(%rbx) + vpaddd %ymm13,%ymm1,%ymm1 + vpxor 160-128(%rax),%ymm14,%ymm14 + vpsrld $27,%ymm2,%ymm8 + vpxor %ymm4,%ymm5,%ymm5 + vpxor %ymm11,%ymm14,%ymm14 + + vpslld $30,%ymm3,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vpaddd %ymm5,%ymm1,%ymm1 + vpsrld $31,%ymm14,%ymm9 + vpaddd %ymm14,%ymm14,%ymm14 + + vpsrld $2,%ymm3,%ymm3 + vpaddd %ymm7,%ymm1,%ymm1 + vpor %ymm9,%ymm14,%ymm14 + vpor %ymm6,%ymm3,%ymm3 + vpxor %ymm12,%ymm10,%ymm10 + vmovdqa 0-128(%rax),%ymm12 + + vpslld $5,%ymm1,%ymm7 + vpaddd %ymm15,%ymm0,%ymm0 + vpxor %ymm2,%ymm4,%ymm5 + vmovdqa %ymm14,416-256-128(%rbx) + vpaddd %ymm14,%ymm0,%ymm0 + vpxor 192-128(%rax),%ymm10,%ymm10 + vpsrld $27,%ymm1,%ymm8 + vpxor %ymm3,%ymm5,%ymm5 + vpxor %ymm12,%ymm10,%ymm10 + + vpslld $30,%ymm2,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vpaddd %ymm5,%ymm0,%ymm0 + vpsrld $31,%ymm10,%ymm9 + vpaddd %ymm10,%ymm10,%ymm10 + + vpsrld $2,%ymm2,%ymm2 + vpaddd %ymm7,%ymm0,%ymm0 + vpor %ymm9,%ymm10,%ymm10 + vpor %ymm6,%ymm2,%ymm2 + vpxor %ymm13,%ymm11,%ymm11 + vmovdqa 32-128(%rax),%ymm13 + + vpslld $5,%ymm0,%ymm7 + vpaddd %ymm15,%ymm4,%ymm4 + vpxor %ymm1,%ymm3,%ymm5 + vmovdqa %ymm10,448-256-128(%rbx) + vpaddd %ymm10,%ymm4,%ymm4 + vpxor 224-128(%rax),%ymm11,%ymm11 + vpsrld $27,%ymm0,%ymm8 + vpxor %ymm2,%ymm5,%ymm5 + vpxor %ymm13,%ymm11,%ymm11 + + vpslld $30,%ymm1,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vpaddd %ymm5,%ymm4,%ymm4 + vpsrld $31,%ymm11,%ymm9 + vpaddd %ymm11,%ymm11,%ymm11 + + vpsrld $2,%ymm1,%ymm1 + vpaddd %ymm7,%ymm4,%ymm4 + vpor %ymm9,%ymm11,%ymm11 + vpor %ymm6,%ymm1,%ymm1 + vpxor %ymm14,%ymm12,%ymm12 + vmovdqa 64-128(%rax),%ymm14 + + vpslld $5,%ymm4,%ymm7 + vpaddd %ymm15,%ymm3,%ymm3 + vpxor %ymm0,%ymm2,%ymm5 + vmovdqa %ymm11,480-256-128(%rbx) + vpaddd %ymm11,%ymm3,%ymm3 + vpxor 256-256-128(%rbx),%ymm12,%ymm12 + vpsrld $27,%ymm4,%ymm8 + vpxor %ymm1,%ymm5,%ymm5 + vpxor %ymm14,%ymm12,%ymm12 + + vpslld $30,%ymm0,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vpaddd %ymm5,%ymm3,%ymm3 + vpsrld $31,%ymm12,%ymm9 + vpaddd %ymm12,%ymm12,%ymm12 + + vpsrld $2,%ymm0,%ymm0 + vpaddd %ymm7,%ymm3,%ymm3 + vpor %ymm9,%ymm12,%ymm12 + vpor %ymm6,%ymm0,%ymm0 + vpxor %ymm10,%ymm13,%ymm13 + vmovdqa 96-128(%rax),%ymm10 + + vpslld $5,%ymm3,%ymm7 + vpaddd %ymm15,%ymm2,%ymm2 + vpxor %ymm4,%ymm1,%ymm5 + vmovdqa %ymm12,0-128(%rax) + vpaddd %ymm12,%ymm2,%ymm2 + vpxor 288-256-128(%rbx),%ymm13,%ymm13 + vpsrld $27,%ymm3,%ymm8 + vpxor %ymm0,%ymm5,%ymm5 + vpxor %ymm10,%ymm13,%ymm13 + + vpslld $30,%ymm4,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vpaddd %ymm5,%ymm2,%ymm2 + vpsrld $31,%ymm13,%ymm9 + vpaddd %ymm13,%ymm13,%ymm13 + + vpsrld $2,%ymm4,%ymm4 + vpaddd %ymm7,%ymm2,%ymm2 + vpor %ymm9,%ymm13,%ymm13 + vpor %ymm6,%ymm4,%ymm4 + vpxor %ymm11,%ymm14,%ymm14 + vmovdqa 128-128(%rax),%ymm11 + + vpslld $5,%ymm2,%ymm7 + vpaddd %ymm15,%ymm1,%ymm1 + vpxor %ymm3,%ymm0,%ymm5 + vmovdqa %ymm13,32-128(%rax) + vpaddd %ymm13,%ymm1,%ymm1 + vpxor 320-256-128(%rbx),%ymm14,%ymm14 + vpsrld $27,%ymm2,%ymm8 + vpxor %ymm4,%ymm5,%ymm5 + vpxor %ymm11,%ymm14,%ymm14 + + vpslld $30,%ymm3,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vpaddd %ymm5,%ymm1,%ymm1 + vpsrld $31,%ymm14,%ymm9 + vpaddd %ymm14,%ymm14,%ymm14 + + vpsrld $2,%ymm3,%ymm3 + vpaddd %ymm7,%ymm1,%ymm1 + vpor %ymm9,%ymm14,%ymm14 + vpor %ymm6,%ymm3,%ymm3 + vpxor %ymm12,%ymm10,%ymm10 + vmovdqa 160-128(%rax),%ymm12 + + vpslld $5,%ymm1,%ymm7 + vpaddd %ymm15,%ymm0,%ymm0 + vpxor %ymm2,%ymm4,%ymm5 + vmovdqa %ymm14,64-128(%rax) + vpaddd %ymm14,%ymm0,%ymm0 + vpxor 352-256-128(%rbx),%ymm10,%ymm10 + vpsrld $27,%ymm1,%ymm8 + vpxor %ymm3,%ymm5,%ymm5 + vpxor %ymm12,%ymm10,%ymm10 + + vpslld $30,%ymm2,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vpaddd %ymm5,%ymm0,%ymm0 + vpsrld $31,%ymm10,%ymm9 + vpaddd %ymm10,%ymm10,%ymm10 + + vpsrld $2,%ymm2,%ymm2 + vpaddd %ymm7,%ymm0,%ymm0 + vpor %ymm9,%ymm10,%ymm10 + vpor %ymm6,%ymm2,%ymm2 + vpxor %ymm13,%ymm11,%ymm11 + vmovdqa 192-128(%rax),%ymm13 + + vpslld $5,%ymm0,%ymm7 + vpaddd %ymm15,%ymm4,%ymm4 + vpxor %ymm1,%ymm3,%ymm5 + vmovdqa %ymm10,96-128(%rax) + vpaddd %ymm10,%ymm4,%ymm4 + vpxor 384-256-128(%rbx),%ymm11,%ymm11 + vpsrld $27,%ymm0,%ymm8 + vpxor %ymm2,%ymm5,%ymm5 + vpxor %ymm13,%ymm11,%ymm11 + + vpslld $30,%ymm1,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vpaddd %ymm5,%ymm4,%ymm4 + vpsrld $31,%ymm11,%ymm9 + vpaddd %ymm11,%ymm11,%ymm11 + + vpsrld $2,%ymm1,%ymm1 + vpaddd %ymm7,%ymm4,%ymm4 + vpor %ymm9,%ymm11,%ymm11 + vpor %ymm6,%ymm1,%ymm1 + vpxor %ymm14,%ymm12,%ymm12 + vmovdqa 224-128(%rax),%ymm14 + + vpslld $5,%ymm4,%ymm7 + vpaddd %ymm15,%ymm3,%ymm3 + vpxor %ymm0,%ymm2,%ymm5 + vmovdqa %ymm11,128-128(%rax) + vpaddd %ymm11,%ymm3,%ymm3 + vpxor 416-256-128(%rbx),%ymm12,%ymm12 + vpsrld $27,%ymm4,%ymm8 + vpxor %ymm1,%ymm5,%ymm5 + vpxor %ymm14,%ymm12,%ymm12 + + vpslld $30,%ymm0,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vpaddd %ymm5,%ymm3,%ymm3 + vpsrld $31,%ymm12,%ymm9 + vpaddd %ymm12,%ymm12,%ymm12 + + vpsrld $2,%ymm0,%ymm0 + vpaddd %ymm7,%ymm3,%ymm3 + vpor %ymm9,%ymm12,%ymm12 + vpor %ymm6,%ymm0,%ymm0 + vpxor %ymm10,%ymm13,%ymm13 + vmovdqa 256-256-128(%rbx),%ymm10 + + vpslld $5,%ymm3,%ymm7 + vpaddd %ymm15,%ymm2,%ymm2 + vpxor %ymm4,%ymm1,%ymm5 + vmovdqa %ymm12,160-128(%rax) + vpaddd %ymm12,%ymm2,%ymm2 + vpxor 448-256-128(%rbx),%ymm13,%ymm13 + vpsrld $27,%ymm3,%ymm8 + vpxor %ymm0,%ymm5,%ymm5 + vpxor %ymm10,%ymm13,%ymm13 + + vpslld $30,%ymm4,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vpaddd %ymm5,%ymm2,%ymm2 + vpsrld $31,%ymm13,%ymm9 + vpaddd %ymm13,%ymm13,%ymm13 + + vpsrld $2,%ymm4,%ymm4 + vpaddd %ymm7,%ymm2,%ymm2 + vpor %ymm9,%ymm13,%ymm13 + vpor %ymm6,%ymm4,%ymm4 + vpxor %ymm11,%ymm14,%ymm14 + vmovdqa 288-256-128(%rbx),%ymm11 + + vpslld $5,%ymm2,%ymm7 + vpaddd %ymm15,%ymm1,%ymm1 + vpxor %ymm3,%ymm0,%ymm5 + vmovdqa %ymm13,192-128(%rax) + vpaddd %ymm13,%ymm1,%ymm1 + vpxor 480-256-128(%rbx),%ymm14,%ymm14 + vpsrld $27,%ymm2,%ymm8 + vpxor %ymm4,%ymm5,%ymm5 + vpxor %ymm11,%ymm14,%ymm14 + + vpslld $30,%ymm3,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vpaddd %ymm5,%ymm1,%ymm1 + vpsrld $31,%ymm14,%ymm9 + vpaddd %ymm14,%ymm14,%ymm14 + + vpsrld $2,%ymm3,%ymm3 + vpaddd %ymm7,%ymm1,%ymm1 + vpor %ymm9,%ymm14,%ymm14 + vpor %ymm6,%ymm3,%ymm3 + vpxor %ymm12,%ymm10,%ymm10 + vmovdqa 320-256-128(%rbx),%ymm12 + + vpslld $5,%ymm1,%ymm7 + vpaddd %ymm15,%ymm0,%ymm0 + vpxor %ymm2,%ymm4,%ymm5 + vmovdqa %ymm14,224-128(%rax) + vpaddd %ymm14,%ymm0,%ymm0 + vpxor 0-128(%rax),%ymm10,%ymm10 + vpsrld $27,%ymm1,%ymm8 + vpxor %ymm3,%ymm5,%ymm5 + vpxor %ymm12,%ymm10,%ymm10 + + vpslld $30,%ymm2,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vpaddd %ymm5,%ymm0,%ymm0 + vpsrld $31,%ymm10,%ymm9 + vpaddd %ymm10,%ymm10,%ymm10 + + vpsrld $2,%ymm2,%ymm2 + vpaddd %ymm7,%ymm0,%ymm0 + vpor %ymm9,%ymm10,%ymm10 + vpor %ymm6,%ymm2,%ymm2 + vmovdqa 32(%rbp),%ymm15 + vpxor %ymm13,%ymm11,%ymm11 + vmovdqa 352-256-128(%rbx),%ymm13 + + vpaddd %ymm15,%ymm4,%ymm4 + vpslld $5,%ymm0,%ymm7 + vpand %ymm2,%ymm3,%ymm6 + vpxor 32-128(%rax),%ymm11,%ymm11 + + vpaddd %ymm6,%ymm4,%ymm4 + vpsrld $27,%ymm0,%ymm8 + vpxor %ymm2,%ymm3,%ymm5 + vpxor %ymm13,%ymm11,%ymm11 + + vmovdqu %ymm10,256-256-128(%rbx) + vpaddd %ymm10,%ymm4,%ymm4 + vpor %ymm8,%ymm7,%ymm7 + vpsrld $31,%ymm11,%ymm9 + vpand %ymm1,%ymm5,%ymm5 + vpaddd %ymm11,%ymm11,%ymm11 + + vpslld $30,%ymm1,%ymm6 + vpaddd %ymm5,%ymm4,%ymm4 + + vpsrld $2,%ymm1,%ymm1 + vpaddd %ymm7,%ymm4,%ymm4 + vpor %ymm9,%ymm11,%ymm11 + vpor %ymm6,%ymm1,%ymm1 + vpxor %ymm14,%ymm12,%ymm12 + vmovdqa 384-256-128(%rbx),%ymm14 + + vpaddd %ymm15,%ymm3,%ymm3 + vpslld $5,%ymm4,%ymm7 + vpand %ymm1,%ymm2,%ymm6 + vpxor 64-128(%rax),%ymm12,%ymm12 + + vpaddd %ymm6,%ymm3,%ymm3 + vpsrld $27,%ymm4,%ymm8 + vpxor %ymm1,%ymm2,%ymm5 + vpxor %ymm14,%ymm12,%ymm12 + + vmovdqu %ymm11,288-256-128(%rbx) + vpaddd %ymm11,%ymm3,%ymm3 + vpor %ymm8,%ymm7,%ymm7 + vpsrld $31,%ymm12,%ymm9 + vpand %ymm0,%ymm5,%ymm5 + vpaddd %ymm12,%ymm12,%ymm12 + + vpslld $30,%ymm0,%ymm6 + vpaddd %ymm5,%ymm3,%ymm3 + + vpsrld $2,%ymm0,%ymm0 + vpaddd %ymm7,%ymm3,%ymm3 + vpor %ymm9,%ymm12,%ymm12 + vpor %ymm6,%ymm0,%ymm0 + vpxor %ymm10,%ymm13,%ymm13 + vmovdqa 416-256-128(%rbx),%ymm10 + + vpaddd %ymm15,%ymm2,%ymm2 + vpslld $5,%ymm3,%ymm7 + vpand %ymm0,%ymm1,%ymm6 + vpxor 96-128(%rax),%ymm13,%ymm13 + + vpaddd %ymm6,%ymm2,%ymm2 + vpsrld $27,%ymm3,%ymm8 + vpxor %ymm0,%ymm1,%ymm5 + vpxor %ymm10,%ymm13,%ymm13 + + vmovdqu %ymm12,320-256-128(%rbx) + vpaddd %ymm12,%ymm2,%ymm2 + vpor %ymm8,%ymm7,%ymm7 + vpsrld $31,%ymm13,%ymm9 + vpand %ymm4,%ymm5,%ymm5 + vpaddd %ymm13,%ymm13,%ymm13 + + vpslld $30,%ymm4,%ymm6 + vpaddd %ymm5,%ymm2,%ymm2 + + vpsrld $2,%ymm4,%ymm4 + vpaddd %ymm7,%ymm2,%ymm2 + vpor %ymm9,%ymm13,%ymm13 + vpor %ymm6,%ymm4,%ymm4 + vpxor %ymm11,%ymm14,%ymm14 + vmovdqa 448-256-128(%rbx),%ymm11 + + vpaddd %ymm15,%ymm1,%ymm1 + vpslld $5,%ymm2,%ymm7 + vpand %ymm4,%ymm0,%ymm6 + vpxor 128-128(%rax),%ymm14,%ymm14 + + vpaddd %ymm6,%ymm1,%ymm1 + vpsrld $27,%ymm2,%ymm8 + vpxor %ymm4,%ymm0,%ymm5 + vpxor %ymm11,%ymm14,%ymm14 + + vmovdqu %ymm13,352-256-128(%rbx) + vpaddd %ymm13,%ymm1,%ymm1 + vpor %ymm8,%ymm7,%ymm7 + vpsrld $31,%ymm14,%ymm9 + vpand %ymm3,%ymm5,%ymm5 + vpaddd %ymm14,%ymm14,%ymm14 + + vpslld $30,%ymm3,%ymm6 + vpaddd %ymm5,%ymm1,%ymm1 + + vpsrld $2,%ymm3,%ymm3 + vpaddd %ymm7,%ymm1,%ymm1 + vpor %ymm9,%ymm14,%ymm14 + vpor %ymm6,%ymm3,%ymm3 + vpxor %ymm12,%ymm10,%ymm10 + vmovdqa 480-256-128(%rbx),%ymm12 + + vpaddd %ymm15,%ymm0,%ymm0 + vpslld $5,%ymm1,%ymm7 + vpand %ymm3,%ymm4,%ymm6 + vpxor 160-128(%rax),%ymm10,%ymm10 + + vpaddd %ymm6,%ymm0,%ymm0 + vpsrld $27,%ymm1,%ymm8 + vpxor %ymm3,%ymm4,%ymm5 + vpxor %ymm12,%ymm10,%ymm10 + + vmovdqu %ymm14,384-256-128(%rbx) + vpaddd %ymm14,%ymm0,%ymm0 + vpor %ymm8,%ymm7,%ymm7 + vpsrld $31,%ymm10,%ymm9 + vpand %ymm2,%ymm5,%ymm5 + vpaddd %ymm10,%ymm10,%ymm10 + + vpslld $30,%ymm2,%ymm6 + vpaddd %ymm5,%ymm0,%ymm0 + + vpsrld $2,%ymm2,%ymm2 + vpaddd %ymm7,%ymm0,%ymm0 + vpor %ymm9,%ymm10,%ymm10 + vpor %ymm6,%ymm2,%ymm2 + vpxor %ymm13,%ymm11,%ymm11 + vmovdqa 0-128(%rax),%ymm13 + + vpaddd %ymm15,%ymm4,%ymm4 + vpslld $5,%ymm0,%ymm7 + vpand %ymm2,%ymm3,%ymm6 + vpxor 192-128(%rax),%ymm11,%ymm11 + + vpaddd %ymm6,%ymm4,%ymm4 + vpsrld $27,%ymm0,%ymm8 + vpxor %ymm2,%ymm3,%ymm5 + vpxor %ymm13,%ymm11,%ymm11 + + vmovdqu %ymm10,416-256-128(%rbx) + vpaddd %ymm10,%ymm4,%ymm4 + vpor %ymm8,%ymm7,%ymm7 + vpsrld $31,%ymm11,%ymm9 + vpand %ymm1,%ymm5,%ymm5 + vpaddd %ymm11,%ymm11,%ymm11 + + vpslld $30,%ymm1,%ymm6 + vpaddd %ymm5,%ymm4,%ymm4 + + vpsrld $2,%ymm1,%ymm1 + vpaddd %ymm7,%ymm4,%ymm4 + vpor %ymm9,%ymm11,%ymm11 + vpor %ymm6,%ymm1,%ymm1 + vpxor %ymm14,%ymm12,%ymm12 + vmovdqa 32-128(%rax),%ymm14 + + vpaddd %ymm15,%ymm3,%ymm3 + vpslld $5,%ymm4,%ymm7 + vpand %ymm1,%ymm2,%ymm6 + vpxor 224-128(%rax),%ymm12,%ymm12 + + vpaddd %ymm6,%ymm3,%ymm3 + vpsrld $27,%ymm4,%ymm8 + vpxor %ymm1,%ymm2,%ymm5 + vpxor %ymm14,%ymm12,%ymm12 + + vmovdqu %ymm11,448-256-128(%rbx) + vpaddd %ymm11,%ymm3,%ymm3 + vpor %ymm8,%ymm7,%ymm7 + vpsrld $31,%ymm12,%ymm9 + vpand %ymm0,%ymm5,%ymm5 + vpaddd %ymm12,%ymm12,%ymm12 + + vpslld $30,%ymm0,%ymm6 + vpaddd %ymm5,%ymm3,%ymm3 + + vpsrld $2,%ymm0,%ymm0 + vpaddd %ymm7,%ymm3,%ymm3 + vpor %ymm9,%ymm12,%ymm12 + vpor %ymm6,%ymm0,%ymm0 + vpxor %ymm10,%ymm13,%ymm13 + vmovdqa 64-128(%rax),%ymm10 + + vpaddd %ymm15,%ymm2,%ymm2 + vpslld $5,%ymm3,%ymm7 + vpand %ymm0,%ymm1,%ymm6 + vpxor 256-256-128(%rbx),%ymm13,%ymm13 + + vpaddd %ymm6,%ymm2,%ymm2 + vpsrld $27,%ymm3,%ymm8 + vpxor %ymm0,%ymm1,%ymm5 + vpxor %ymm10,%ymm13,%ymm13 + + vmovdqu %ymm12,480-256-128(%rbx) + vpaddd %ymm12,%ymm2,%ymm2 + vpor %ymm8,%ymm7,%ymm7 + vpsrld $31,%ymm13,%ymm9 + vpand %ymm4,%ymm5,%ymm5 + vpaddd %ymm13,%ymm13,%ymm13 + + vpslld $30,%ymm4,%ymm6 + vpaddd %ymm5,%ymm2,%ymm2 + + vpsrld $2,%ymm4,%ymm4 + vpaddd %ymm7,%ymm2,%ymm2 + vpor %ymm9,%ymm13,%ymm13 + vpor %ymm6,%ymm4,%ymm4 + vpxor %ymm11,%ymm14,%ymm14 + vmovdqa 96-128(%rax),%ymm11 + + vpaddd %ymm15,%ymm1,%ymm1 + vpslld $5,%ymm2,%ymm7 + vpand %ymm4,%ymm0,%ymm6 + vpxor 288-256-128(%rbx),%ymm14,%ymm14 + + vpaddd %ymm6,%ymm1,%ymm1 + vpsrld $27,%ymm2,%ymm8 + vpxor %ymm4,%ymm0,%ymm5 + vpxor %ymm11,%ymm14,%ymm14 + + vmovdqu %ymm13,0-128(%rax) + vpaddd %ymm13,%ymm1,%ymm1 + vpor %ymm8,%ymm7,%ymm7 + vpsrld $31,%ymm14,%ymm9 + vpand %ymm3,%ymm5,%ymm5 + vpaddd %ymm14,%ymm14,%ymm14 + + vpslld $30,%ymm3,%ymm6 + vpaddd %ymm5,%ymm1,%ymm1 + + vpsrld $2,%ymm3,%ymm3 + vpaddd %ymm7,%ymm1,%ymm1 + vpor %ymm9,%ymm14,%ymm14 + vpor %ymm6,%ymm3,%ymm3 + vpxor %ymm12,%ymm10,%ymm10 + vmovdqa 128-128(%rax),%ymm12 + + vpaddd %ymm15,%ymm0,%ymm0 + vpslld $5,%ymm1,%ymm7 + vpand %ymm3,%ymm4,%ymm6 + vpxor 320-256-128(%rbx),%ymm10,%ymm10 + + vpaddd %ymm6,%ymm0,%ymm0 + vpsrld $27,%ymm1,%ymm8 + vpxor %ymm3,%ymm4,%ymm5 + vpxor %ymm12,%ymm10,%ymm10 + + vmovdqu %ymm14,32-128(%rax) + vpaddd %ymm14,%ymm0,%ymm0 + vpor %ymm8,%ymm7,%ymm7 + vpsrld $31,%ymm10,%ymm9 + vpand %ymm2,%ymm5,%ymm5 + vpaddd %ymm10,%ymm10,%ymm10 + + vpslld $30,%ymm2,%ymm6 + vpaddd %ymm5,%ymm0,%ymm0 + + vpsrld $2,%ymm2,%ymm2 + vpaddd %ymm7,%ymm0,%ymm0 + vpor %ymm9,%ymm10,%ymm10 + vpor %ymm6,%ymm2,%ymm2 + vpxor %ymm13,%ymm11,%ymm11 + vmovdqa 160-128(%rax),%ymm13 + + vpaddd %ymm15,%ymm4,%ymm4 + vpslld $5,%ymm0,%ymm7 + vpand %ymm2,%ymm3,%ymm6 + vpxor 352-256-128(%rbx),%ymm11,%ymm11 + + vpaddd %ymm6,%ymm4,%ymm4 + vpsrld $27,%ymm0,%ymm8 + vpxor %ymm2,%ymm3,%ymm5 + vpxor %ymm13,%ymm11,%ymm11 + + vmovdqu %ymm10,64-128(%rax) + vpaddd %ymm10,%ymm4,%ymm4 + vpor %ymm8,%ymm7,%ymm7 + vpsrld $31,%ymm11,%ymm9 + vpand %ymm1,%ymm5,%ymm5 + vpaddd %ymm11,%ymm11,%ymm11 + + vpslld $30,%ymm1,%ymm6 + vpaddd %ymm5,%ymm4,%ymm4 + + vpsrld $2,%ymm1,%ymm1 + vpaddd %ymm7,%ymm4,%ymm4 + vpor %ymm9,%ymm11,%ymm11 + vpor %ymm6,%ymm1,%ymm1 + vpxor %ymm14,%ymm12,%ymm12 + vmovdqa 192-128(%rax),%ymm14 + + vpaddd %ymm15,%ymm3,%ymm3 + vpslld $5,%ymm4,%ymm7 + vpand %ymm1,%ymm2,%ymm6 + vpxor 384-256-128(%rbx),%ymm12,%ymm12 + + vpaddd %ymm6,%ymm3,%ymm3 + vpsrld $27,%ymm4,%ymm8 + vpxor %ymm1,%ymm2,%ymm5 + vpxor %ymm14,%ymm12,%ymm12 + + vmovdqu %ymm11,96-128(%rax) + vpaddd %ymm11,%ymm3,%ymm3 + vpor %ymm8,%ymm7,%ymm7 + vpsrld $31,%ymm12,%ymm9 + vpand %ymm0,%ymm5,%ymm5 + vpaddd %ymm12,%ymm12,%ymm12 + + vpslld $30,%ymm0,%ymm6 + vpaddd %ymm5,%ymm3,%ymm3 + + vpsrld $2,%ymm0,%ymm0 + vpaddd %ymm7,%ymm3,%ymm3 + vpor %ymm9,%ymm12,%ymm12 + vpor %ymm6,%ymm0,%ymm0 + vpxor %ymm10,%ymm13,%ymm13 + vmovdqa 224-128(%rax),%ymm10 + + vpaddd %ymm15,%ymm2,%ymm2 + vpslld $5,%ymm3,%ymm7 + vpand %ymm0,%ymm1,%ymm6 + vpxor 416-256-128(%rbx),%ymm13,%ymm13 + + vpaddd %ymm6,%ymm2,%ymm2 + vpsrld $27,%ymm3,%ymm8 + vpxor %ymm0,%ymm1,%ymm5 + vpxor %ymm10,%ymm13,%ymm13 + + vmovdqu %ymm12,128-128(%rax) + vpaddd %ymm12,%ymm2,%ymm2 + vpor %ymm8,%ymm7,%ymm7 + vpsrld $31,%ymm13,%ymm9 + vpand %ymm4,%ymm5,%ymm5 + vpaddd %ymm13,%ymm13,%ymm13 + + vpslld $30,%ymm4,%ymm6 + vpaddd %ymm5,%ymm2,%ymm2 + + vpsrld $2,%ymm4,%ymm4 + vpaddd %ymm7,%ymm2,%ymm2 + vpor %ymm9,%ymm13,%ymm13 + vpor %ymm6,%ymm4,%ymm4 + vpxor %ymm11,%ymm14,%ymm14 + vmovdqa 256-256-128(%rbx),%ymm11 + + vpaddd %ymm15,%ymm1,%ymm1 + vpslld $5,%ymm2,%ymm7 + vpand %ymm4,%ymm0,%ymm6 + vpxor 448-256-128(%rbx),%ymm14,%ymm14 + + vpaddd %ymm6,%ymm1,%ymm1 + vpsrld $27,%ymm2,%ymm8 + vpxor %ymm4,%ymm0,%ymm5 + vpxor %ymm11,%ymm14,%ymm14 + + vmovdqu %ymm13,160-128(%rax) + vpaddd %ymm13,%ymm1,%ymm1 + vpor %ymm8,%ymm7,%ymm7 + vpsrld $31,%ymm14,%ymm9 + vpand %ymm3,%ymm5,%ymm5 + vpaddd %ymm14,%ymm14,%ymm14 + + vpslld $30,%ymm3,%ymm6 + vpaddd %ymm5,%ymm1,%ymm1 + + vpsrld $2,%ymm3,%ymm3 + vpaddd %ymm7,%ymm1,%ymm1 + vpor %ymm9,%ymm14,%ymm14 + vpor %ymm6,%ymm3,%ymm3 + vpxor %ymm12,%ymm10,%ymm10 + vmovdqa 288-256-128(%rbx),%ymm12 + + vpaddd %ymm15,%ymm0,%ymm0 + vpslld $5,%ymm1,%ymm7 + vpand %ymm3,%ymm4,%ymm6 + vpxor 480-256-128(%rbx),%ymm10,%ymm10 + + vpaddd %ymm6,%ymm0,%ymm0 + vpsrld $27,%ymm1,%ymm8 + vpxor %ymm3,%ymm4,%ymm5 + vpxor %ymm12,%ymm10,%ymm10 + + vmovdqu %ymm14,192-128(%rax) + vpaddd %ymm14,%ymm0,%ymm0 + vpor %ymm8,%ymm7,%ymm7 + vpsrld $31,%ymm10,%ymm9 + vpand %ymm2,%ymm5,%ymm5 + vpaddd %ymm10,%ymm10,%ymm10 + + vpslld $30,%ymm2,%ymm6 + vpaddd %ymm5,%ymm0,%ymm0 + + vpsrld $2,%ymm2,%ymm2 + vpaddd %ymm7,%ymm0,%ymm0 + vpor %ymm9,%ymm10,%ymm10 + vpor %ymm6,%ymm2,%ymm2 + vpxor %ymm13,%ymm11,%ymm11 + vmovdqa 320-256-128(%rbx),%ymm13 + + vpaddd %ymm15,%ymm4,%ymm4 + vpslld $5,%ymm0,%ymm7 + vpand %ymm2,%ymm3,%ymm6 + vpxor 0-128(%rax),%ymm11,%ymm11 + + vpaddd %ymm6,%ymm4,%ymm4 + vpsrld $27,%ymm0,%ymm8 + vpxor %ymm2,%ymm3,%ymm5 + vpxor %ymm13,%ymm11,%ymm11 + + vmovdqu %ymm10,224-128(%rax) + vpaddd %ymm10,%ymm4,%ymm4 + vpor %ymm8,%ymm7,%ymm7 + vpsrld $31,%ymm11,%ymm9 + vpand %ymm1,%ymm5,%ymm5 + vpaddd %ymm11,%ymm11,%ymm11 + + vpslld $30,%ymm1,%ymm6 + vpaddd %ymm5,%ymm4,%ymm4 + + vpsrld $2,%ymm1,%ymm1 + vpaddd %ymm7,%ymm4,%ymm4 + vpor %ymm9,%ymm11,%ymm11 + vpor %ymm6,%ymm1,%ymm1 + vpxor %ymm14,%ymm12,%ymm12 + vmovdqa 352-256-128(%rbx),%ymm14 + + vpaddd %ymm15,%ymm3,%ymm3 + vpslld $5,%ymm4,%ymm7 + vpand %ymm1,%ymm2,%ymm6 + vpxor 32-128(%rax),%ymm12,%ymm12 + + vpaddd %ymm6,%ymm3,%ymm3 + vpsrld $27,%ymm4,%ymm8 + vpxor %ymm1,%ymm2,%ymm5 + vpxor %ymm14,%ymm12,%ymm12 + + vmovdqu %ymm11,256-256-128(%rbx) + vpaddd %ymm11,%ymm3,%ymm3 + vpor %ymm8,%ymm7,%ymm7 + vpsrld $31,%ymm12,%ymm9 + vpand %ymm0,%ymm5,%ymm5 + vpaddd %ymm12,%ymm12,%ymm12 + + vpslld $30,%ymm0,%ymm6 + vpaddd %ymm5,%ymm3,%ymm3 + + vpsrld $2,%ymm0,%ymm0 + vpaddd %ymm7,%ymm3,%ymm3 + vpor %ymm9,%ymm12,%ymm12 + vpor %ymm6,%ymm0,%ymm0 + vpxor %ymm10,%ymm13,%ymm13 + vmovdqa 384-256-128(%rbx),%ymm10 + + vpaddd %ymm15,%ymm2,%ymm2 + vpslld $5,%ymm3,%ymm7 + vpand %ymm0,%ymm1,%ymm6 + vpxor 64-128(%rax),%ymm13,%ymm13 + + vpaddd %ymm6,%ymm2,%ymm2 + vpsrld $27,%ymm3,%ymm8 + vpxor %ymm0,%ymm1,%ymm5 + vpxor %ymm10,%ymm13,%ymm13 + + vmovdqu %ymm12,288-256-128(%rbx) + vpaddd %ymm12,%ymm2,%ymm2 + vpor %ymm8,%ymm7,%ymm7 + vpsrld $31,%ymm13,%ymm9 + vpand %ymm4,%ymm5,%ymm5 + vpaddd %ymm13,%ymm13,%ymm13 + + vpslld $30,%ymm4,%ymm6 + vpaddd %ymm5,%ymm2,%ymm2 + + vpsrld $2,%ymm4,%ymm4 + vpaddd %ymm7,%ymm2,%ymm2 + vpor %ymm9,%ymm13,%ymm13 + vpor %ymm6,%ymm4,%ymm4 + vpxor %ymm11,%ymm14,%ymm14 + vmovdqa 416-256-128(%rbx),%ymm11 + + vpaddd %ymm15,%ymm1,%ymm1 + vpslld $5,%ymm2,%ymm7 + vpand %ymm4,%ymm0,%ymm6 + vpxor 96-128(%rax),%ymm14,%ymm14 + + vpaddd %ymm6,%ymm1,%ymm1 + vpsrld $27,%ymm2,%ymm8 + vpxor %ymm4,%ymm0,%ymm5 + vpxor %ymm11,%ymm14,%ymm14 + + vmovdqu %ymm13,320-256-128(%rbx) + vpaddd %ymm13,%ymm1,%ymm1 + vpor %ymm8,%ymm7,%ymm7 + vpsrld $31,%ymm14,%ymm9 + vpand %ymm3,%ymm5,%ymm5 + vpaddd %ymm14,%ymm14,%ymm14 + + vpslld $30,%ymm3,%ymm6 + vpaddd %ymm5,%ymm1,%ymm1 + + vpsrld $2,%ymm3,%ymm3 + vpaddd %ymm7,%ymm1,%ymm1 + vpor %ymm9,%ymm14,%ymm14 + vpor %ymm6,%ymm3,%ymm3 + vpxor %ymm12,%ymm10,%ymm10 + vmovdqa 448-256-128(%rbx),%ymm12 + + vpaddd %ymm15,%ymm0,%ymm0 + vpslld $5,%ymm1,%ymm7 + vpand %ymm3,%ymm4,%ymm6 + vpxor 128-128(%rax),%ymm10,%ymm10 + + vpaddd %ymm6,%ymm0,%ymm0 + vpsrld $27,%ymm1,%ymm8 + vpxor %ymm3,%ymm4,%ymm5 + vpxor %ymm12,%ymm10,%ymm10 + + vmovdqu %ymm14,352-256-128(%rbx) + vpaddd %ymm14,%ymm0,%ymm0 + vpor %ymm8,%ymm7,%ymm7 + vpsrld $31,%ymm10,%ymm9 + vpand %ymm2,%ymm5,%ymm5 + vpaddd %ymm10,%ymm10,%ymm10 + + vpslld $30,%ymm2,%ymm6 + vpaddd %ymm5,%ymm0,%ymm0 + + vpsrld $2,%ymm2,%ymm2 + vpaddd %ymm7,%ymm0,%ymm0 + vpor %ymm9,%ymm10,%ymm10 + vpor %ymm6,%ymm2,%ymm2 + vmovdqa 64(%rbp),%ymm15 + vpxor %ymm13,%ymm11,%ymm11 + vmovdqa 480-256-128(%rbx),%ymm13 + + vpslld $5,%ymm0,%ymm7 + vpaddd %ymm15,%ymm4,%ymm4 + vpxor %ymm1,%ymm3,%ymm5 + vmovdqa %ymm10,384-256-128(%rbx) + vpaddd %ymm10,%ymm4,%ymm4 + vpxor 160-128(%rax),%ymm11,%ymm11 + vpsrld $27,%ymm0,%ymm8 + vpxor %ymm2,%ymm5,%ymm5 + vpxor %ymm13,%ymm11,%ymm11 + + vpslld $30,%ymm1,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vpaddd %ymm5,%ymm4,%ymm4 + vpsrld $31,%ymm11,%ymm9 + vpaddd %ymm11,%ymm11,%ymm11 + + vpsrld $2,%ymm1,%ymm1 + vpaddd %ymm7,%ymm4,%ymm4 + vpor %ymm9,%ymm11,%ymm11 + vpor %ymm6,%ymm1,%ymm1 + vpxor %ymm14,%ymm12,%ymm12 + vmovdqa 0-128(%rax),%ymm14 + + vpslld $5,%ymm4,%ymm7 + vpaddd %ymm15,%ymm3,%ymm3 + vpxor %ymm0,%ymm2,%ymm5 + vmovdqa %ymm11,416-256-128(%rbx) + vpaddd %ymm11,%ymm3,%ymm3 + vpxor 192-128(%rax),%ymm12,%ymm12 + vpsrld $27,%ymm4,%ymm8 + vpxor %ymm1,%ymm5,%ymm5 + vpxor %ymm14,%ymm12,%ymm12 + + vpslld $30,%ymm0,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vpaddd %ymm5,%ymm3,%ymm3 + vpsrld $31,%ymm12,%ymm9 + vpaddd %ymm12,%ymm12,%ymm12 + + vpsrld $2,%ymm0,%ymm0 + vpaddd %ymm7,%ymm3,%ymm3 + vpor %ymm9,%ymm12,%ymm12 + vpor %ymm6,%ymm0,%ymm0 + vpxor %ymm10,%ymm13,%ymm13 + vmovdqa 32-128(%rax),%ymm10 + + vpslld $5,%ymm3,%ymm7 + vpaddd %ymm15,%ymm2,%ymm2 + vpxor %ymm4,%ymm1,%ymm5 + vmovdqa %ymm12,448-256-128(%rbx) + vpaddd %ymm12,%ymm2,%ymm2 + vpxor 224-128(%rax),%ymm13,%ymm13 + vpsrld $27,%ymm3,%ymm8 + vpxor %ymm0,%ymm5,%ymm5 + vpxor %ymm10,%ymm13,%ymm13 + + vpslld $30,%ymm4,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vpaddd %ymm5,%ymm2,%ymm2 + vpsrld $31,%ymm13,%ymm9 + vpaddd %ymm13,%ymm13,%ymm13 + + vpsrld $2,%ymm4,%ymm4 + vpaddd %ymm7,%ymm2,%ymm2 + vpor %ymm9,%ymm13,%ymm13 + vpor %ymm6,%ymm4,%ymm4 + vpxor %ymm11,%ymm14,%ymm14 + vmovdqa 64-128(%rax),%ymm11 + + vpslld $5,%ymm2,%ymm7 + vpaddd %ymm15,%ymm1,%ymm1 + vpxor %ymm3,%ymm0,%ymm5 + vmovdqa %ymm13,480-256-128(%rbx) + vpaddd %ymm13,%ymm1,%ymm1 + vpxor 256-256-128(%rbx),%ymm14,%ymm14 + vpsrld $27,%ymm2,%ymm8 + vpxor %ymm4,%ymm5,%ymm5 + vpxor %ymm11,%ymm14,%ymm14 + + vpslld $30,%ymm3,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vpaddd %ymm5,%ymm1,%ymm1 + vpsrld $31,%ymm14,%ymm9 + vpaddd %ymm14,%ymm14,%ymm14 + + vpsrld $2,%ymm3,%ymm3 + vpaddd %ymm7,%ymm1,%ymm1 + vpor %ymm9,%ymm14,%ymm14 + vpor %ymm6,%ymm3,%ymm3 + vpxor %ymm12,%ymm10,%ymm10 + vmovdqa 96-128(%rax),%ymm12 + + vpslld $5,%ymm1,%ymm7 + vpaddd %ymm15,%ymm0,%ymm0 + vpxor %ymm2,%ymm4,%ymm5 + vmovdqa %ymm14,0-128(%rax) + vpaddd %ymm14,%ymm0,%ymm0 + vpxor 288-256-128(%rbx),%ymm10,%ymm10 + vpsrld $27,%ymm1,%ymm8 + vpxor %ymm3,%ymm5,%ymm5 + vpxor %ymm12,%ymm10,%ymm10 + + vpslld $30,%ymm2,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vpaddd %ymm5,%ymm0,%ymm0 + vpsrld $31,%ymm10,%ymm9 + vpaddd %ymm10,%ymm10,%ymm10 + + vpsrld $2,%ymm2,%ymm2 + vpaddd %ymm7,%ymm0,%ymm0 + vpor %ymm9,%ymm10,%ymm10 + vpor %ymm6,%ymm2,%ymm2 + vpxor %ymm13,%ymm11,%ymm11 + vmovdqa 128-128(%rax),%ymm13 + + vpslld $5,%ymm0,%ymm7 + vpaddd %ymm15,%ymm4,%ymm4 + vpxor %ymm1,%ymm3,%ymm5 + vmovdqa %ymm10,32-128(%rax) + vpaddd %ymm10,%ymm4,%ymm4 + vpxor 320-256-128(%rbx),%ymm11,%ymm11 + vpsrld $27,%ymm0,%ymm8 + vpxor %ymm2,%ymm5,%ymm5 + vpxor %ymm13,%ymm11,%ymm11 + + vpslld $30,%ymm1,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vpaddd %ymm5,%ymm4,%ymm4 + vpsrld $31,%ymm11,%ymm9 + vpaddd %ymm11,%ymm11,%ymm11 + + vpsrld $2,%ymm1,%ymm1 + vpaddd %ymm7,%ymm4,%ymm4 + vpor %ymm9,%ymm11,%ymm11 + vpor %ymm6,%ymm1,%ymm1 + vpxor %ymm14,%ymm12,%ymm12 + vmovdqa 160-128(%rax),%ymm14 + + vpslld $5,%ymm4,%ymm7 + vpaddd %ymm15,%ymm3,%ymm3 + vpxor %ymm0,%ymm2,%ymm5 + vmovdqa %ymm11,64-128(%rax) + vpaddd %ymm11,%ymm3,%ymm3 + vpxor 352-256-128(%rbx),%ymm12,%ymm12 + vpsrld $27,%ymm4,%ymm8 + vpxor %ymm1,%ymm5,%ymm5 + vpxor %ymm14,%ymm12,%ymm12 + + vpslld $30,%ymm0,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vpaddd %ymm5,%ymm3,%ymm3 + vpsrld $31,%ymm12,%ymm9 + vpaddd %ymm12,%ymm12,%ymm12 + + vpsrld $2,%ymm0,%ymm0 + vpaddd %ymm7,%ymm3,%ymm3 + vpor %ymm9,%ymm12,%ymm12 + vpor %ymm6,%ymm0,%ymm0 + vpxor %ymm10,%ymm13,%ymm13 + vmovdqa 192-128(%rax),%ymm10 + + vpslld $5,%ymm3,%ymm7 + vpaddd %ymm15,%ymm2,%ymm2 + vpxor %ymm4,%ymm1,%ymm5 + vmovdqa %ymm12,96-128(%rax) + vpaddd %ymm12,%ymm2,%ymm2 + vpxor 384-256-128(%rbx),%ymm13,%ymm13 + vpsrld $27,%ymm3,%ymm8 + vpxor %ymm0,%ymm5,%ymm5 + vpxor %ymm10,%ymm13,%ymm13 + + vpslld $30,%ymm4,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vpaddd %ymm5,%ymm2,%ymm2 + vpsrld $31,%ymm13,%ymm9 + vpaddd %ymm13,%ymm13,%ymm13 + + vpsrld $2,%ymm4,%ymm4 + vpaddd %ymm7,%ymm2,%ymm2 + vpor %ymm9,%ymm13,%ymm13 + vpor %ymm6,%ymm4,%ymm4 + vpxor %ymm11,%ymm14,%ymm14 + vmovdqa 224-128(%rax),%ymm11 + + vpslld $5,%ymm2,%ymm7 + vpaddd %ymm15,%ymm1,%ymm1 + vpxor %ymm3,%ymm0,%ymm5 + vmovdqa %ymm13,128-128(%rax) + vpaddd %ymm13,%ymm1,%ymm1 + vpxor 416-256-128(%rbx),%ymm14,%ymm14 + vpsrld $27,%ymm2,%ymm8 + vpxor %ymm4,%ymm5,%ymm5 + vpxor %ymm11,%ymm14,%ymm14 + + vpslld $30,%ymm3,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vpaddd %ymm5,%ymm1,%ymm1 + vpsrld $31,%ymm14,%ymm9 + vpaddd %ymm14,%ymm14,%ymm14 + + vpsrld $2,%ymm3,%ymm3 + vpaddd %ymm7,%ymm1,%ymm1 + vpor %ymm9,%ymm14,%ymm14 + vpor %ymm6,%ymm3,%ymm3 + vpxor %ymm12,%ymm10,%ymm10 + vmovdqa 256-256-128(%rbx),%ymm12 + + vpslld $5,%ymm1,%ymm7 + vpaddd %ymm15,%ymm0,%ymm0 + vpxor %ymm2,%ymm4,%ymm5 + vmovdqa %ymm14,160-128(%rax) + vpaddd %ymm14,%ymm0,%ymm0 + vpxor 448-256-128(%rbx),%ymm10,%ymm10 + vpsrld $27,%ymm1,%ymm8 + vpxor %ymm3,%ymm5,%ymm5 + vpxor %ymm12,%ymm10,%ymm10 + + vpslld $30,%ymm2,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vpaddd %ymm5,%ymm0,%ymm0 + vpsrld $31,%ymm10,%ymm9 + vpaddd %ymm10,%ymm10,%ymm10 + + vpsrld $2,%ymm2,%ymm2 + vpaddd %ymm7,%ymm0,%ymm0 + vpor %ymm9,%ymm10,%ymm10 + vpor %ymm6,%ymm2,%ymm2 + vpxor %ymm13,%ymm11,%ymm11 + vmovdqa 288-256-128(%rbx),%ymm13 + + vpslld $5,%ymm0,%ymm7 + vpaddd %ymm15,%ymm4,%ymm4 + vpxor %ymm1,%ymm3,%ymm5 + vmovdqa %ymm10,192-128(%rax) + vpaddd %ymm10,%ymm4,%ymm4 + vpxor 480-256-128(%rbx),%ymm11,%ymm11 + vpsrld $27,%ymm0,%ymm8 + vpxor %ymm2,%ymm5,%ymm5 + vpxor %ymm13,%ymm11,%ymm11 + + vpslld $30,%ymm1,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vpaddd %ymm5,%ymm4,%ymm4 + vpsrld $31,%ymm11,%ymm9 + vpaddd %ymm11,%ymm11,%ymm11 + + vpsrld $2,%ymm1,%ymm1 + vpaddd %ymm7,%ymm4,%ymm4 + vpor %ymm9,%ymm11,%ymm11 + vpor %ymm6,%ymm1,%ymm1 + vpxor %ymm14,%ymm12,%ymm12 + vmovdqa 320-256-128(%rbx),%ymm14 + + vpslld $5,%ymm4,%ymm7 + vpaddd %ymm15,%ymm3,%ymm3 + vpxor %ymm0,%ymm2,%ymm5 + vmovdqa %ymm11,224-128(%rax) + vpaddd %ymm11,%ymm3,%ymm3 + vpxor 0-128(%rax),%ymm12,%ymm12 + vpsrld $27,%ymm4,%ymm8 + vpxor %ymm1,%ymm5,%ymm5 + vpxor %ymm14,%ymm12,%ymm12 + + vpslld $30,%ymm0,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vpaddd %ymm5,%ymm3,%ymm3 + vpsrld $31,%ymm12,%ymm9 + vpaddd %ymm12,%ymm12,%ymm12 + + vpsrld $2,%ymm0,%ymm0 + vpaddd %ymm7,%ymm3,%ymm3 + vpor %ymm9,%ymm12,%ymm12 + vpor %ymm6,%ymm0,%ymm0 + vpxor %ymm10,%ymm13,%ymm13 + vmovdqa 352-256-128(%rbx),%ymm10 + + vpslld $5,%ymm3,%ymm7 + vpaddd %ymm15,%ymm2,%ymm2 + vpxor %ymm4,%ymm1,%ymm5 + vpaddd %ymm12,%ymm2,%ymm2 + vpxor 32-128(%rax),%ymm13,%ymm13 + vpsrld $27,%ymm3,%ymm8 + vpxor %ymm0,%ymm5,%ymm5 + vpxor %ymm10,%ymm13,%ymm13 + + vpslld $30,%ymm4,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vpaddd %ymm5,%ymm2,%ymm2 + vpsrld $31,%ymm13,%ymm9 + vpaddd %ymm13,%ymm13,%ymm13 + + vpsrld $2,%ymm4,%ymm4 + vpaddd %ymm7,%ymm2,%ymm2 + vpor %ymm9,%ymm13,%ymm13 + vpor %ymm6,%ymm4,%ymm4 + vpxor %ymm11,%ymm14,%ymm14 + vmovdqa 384-256-128(%rbx),%ymm11 + + vpslld $5,%ymm2,%ymm7 + vpaddd %ymm15,%ymm1,%ymm1 + vpxor %ymm3,%ymm0,%ymm5 + vpaddd %ymm13,%ymm1,%ymm1 + vpxor 64-128(%rax),%ymm14,%ymm14 + vpsrld $27,%ymm2,%ymm8 + vpxor %ymm4,%ymm5,%ymm5 + vpxor %ymm11,%ymm14,%ymm14 + + vpslld $30,%ymm3,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vpaddd %ymm5,%ymm1,%ymm1 + vpsrld $31,%ymm14,%ymm9 + vpaddd %ymm14,%ymm14,%ymm14 + + vpsrld $2,%ymm3,%ymm3 + vpaddd %ymm7,%ymm1,%ymm1 + vpor %ymm9,%ymm14,%ymm14 + vpor %ymm6,%ymm3,%ymm3 + vpxor %ymm12,%ymm10,%ymm10 + vmovdqa 416-256-128(%rbx),%ymm12 + + vpslld $5,%ymm1,%ymm7 + vpaddd %ymm15,%ymm0,%ymm0 + vpxor %ymm2,%ymm4,%ymm5 + vpaddd %ymm14,%ymm0,%ymm0 + vpxor 96-128(%rax),%ymm10,%ymm10 + vpsrld $27,%ymm1,%ymm8 + vpxor %ymm3,%ymm5,%ymm5 + vpxor %ymm12,%ymm10,%ymm10 + + vpslld $30,%ymm2,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vpaddd %ymm5,%ymm0,%ymm0 + vpsrld $31,%ymm10,%ymm9 + vpaddd %ymm10,%ymm10,%ymm10 + + vpsrld $2,%ymm2,%ymm2 + vpaddd %ymm7,%ymm0,%ymm0 + vpor %ymm9,%ymm10,%ymm10 + vpor %ymm6,%ymm2,%ymm2 + vpxor %ymm13,%ymm11,%ymm11 + vmovdqa 448-256-128(%rbx),%ymm13 + + vpslld $5,%ymm0,%ymm7 + vpaddd %ymm15,%ymm4,%ymm4 + vpxor %ymm1,%ymm3,%ymm5 + vpaddd %ymm10,%ymm4,%ymm4 + vpxor 128-128(%rax),%ymm11,%ymm11 + vpsrld $27,%ymm0,%ymm8 + vpxor %ymm2,%ymm5,%ymm5 + vpxor %ymm13,%ymm11,%ymm11 + + vpslld $30,%ymm1,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vpaddd %ymm5,%ymm4,%ymm4 + vpsrld $31,%ymm11,%ymm9 + vpaddd %ymm11,%ymm11,%ymm11 + + vpsrld $2,%ymm1,%ymm1 + vpaddd %ymm7,%ymm4,%ymm4 + vpor %ymm9,%ymm11,%ymm11 + vpor %ymm6,%ymm1,%ymm1 + vpxor %ymm14,%ymm12,%ymm12 + vmovdqa 480-256-128(%rbx),%ymm14 + + vpslld $5,%ymm4,%ymm7 + vpaddd %ymm15,%ymm3,%ymm3 + vpxor %ymm0,%ymm2,%ymm5 + vpaddd %ymm11,%ymm3,%ymm3 + vpxor 160-128(%rax),%ymm12,%ymm12 + vpsrld $27,%ymm4,%ymm8 + vpxor %ymm1,%ymm5,%ymm5 + vpxor %ymm14,%ymm12,%ymm12 + + vpslld $30,%ymm0,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vpaddd %ymm5,%ymm3,%ymm3 + vpsrld $31,%ymm12,%ymm9 + vpaddd %ymm12,%ymm12,%ymm12 + + vpsrld $2,%ymm0,%ymm0 + vpaddd %ymm7,%ymm3,%ymm3 + vpor %ymm9,%ymm12,%ymm12 + vpor %ymm6,%ymm0,%ymm0 + vpxor %ymm10,%ymm13,%ymm13 + vmovdqa 0-128(%rax),%ymm10 + + vpslld $5,%ymm3,%ymm7 + vpaddd %ymm15,%ymm2,%ymm2 + vpxor %ymm4,%ymm1,%ymm5 + vpaddd %ymm12,%ymm2,%ymm2 + vpxor 192-128(%rax),%ymm13,%ymm13 + vpsrld $27,%ymm3,%ymm8 + vpxor %ymm0,%ymm5,%ymm5 + vpxor %ymm10,%ymm13,%ymm13 + + vpslld $30,%ymm4,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vpaddd %ymm5,%ymm2,%ymm2 + vpsrld $31,%ymm13,%ymm9 + vpaddd %ymm13,%ymm13,%ymm13 + + vpsrld $2,%ymm4,%ymm4 + vpaddd %ymm7,%ymm2,%ymm2 + vpor %ymm9,%ymm13,%ymm13 + vpor %ymm6,%ymm4,%ymm4 + vpxor %ymm11,%ymm14,%ymm14 + vmovdqa 32-128(%rax),%ymm11 + + vpslld $5,%ymm2,%ymm7 + vpaddd %ymm15,%ymm1,%ymm1 + vpxor %ymm3,%ymm0,%ymm5 + vpaddd %ymm13,%ymm1,%ymm1 + vpxor 224-128(%rax),%ymm14,%ymm14 + vpsrld $27,%ymm2,%ymm8 + vpxor %ymm4,%ymm5,%ymm5 + vpxor %ymm11,%ymm14,%ymm14 + + vpslld $30,%ymm3,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vpaddd %ymm5,%ymm1,%ymm1 + vpsrld $31,%ymm14,%ymm9 + vpaddd %ymm14,%ymm14,%ymm14 + + vpsrld $2,%ymm3,%ymm3 + vpaddd %ymm7,%ymm1,%ymm1 + vpor %ymm9,%ymm14,%ymm14 + vpor %ymm6,%ymm3,%ymm3 + vpslld $5,%ymm1,%ymm7 + vpaddd %ymm15,%ymm0,%ymm0 + vpxor %ymm2,%ymm4,%ymm5 + + vpsrld $27,%ymm1,%ymm8 + vpaddd %ymm14,%ymm0,%ymm0 + vpxor %ymm3,%ymm5,%ymm5 + + vpslld $30,%ymm2,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vpaddd %ymm5,%ymm0,%ymm0 + + vpsrld $2,%ymm2,%ymm2 + vpaddd %ymm7,%ymm0,%ymm0 + vpor %ymm6,%ymm2,%ymm2 + movl $1,%ecx + leaq 512(%rsp),%rbx + cmpl 0(%rbx),%ecx + cmovgeq %rbp,%r12 + cmpl 4(%rbx),%ecx + cmovgeq %rbp,%r13 + cmpl 8(%rbx),%ecx + cmovgeq %rbp,%r14 + cmpl 12(%rbx),%ecx + cmovgeq %rbp,%r15 + cmpl 16(%rbx),%ecx + cmovgeq %rbp,%r8 + cmpl 20(%rbx),%ecx + cmovgeq %rbp,%r9 + cmpl 24(%rbx),%ecx + cmovgeq %rbp,%r10 + cmpl 28(%rbx),%ecx + cmovgeq %rbp,%r11 + vmovdqu (%rbx),%ymm5 + vpxor %ymm7,%ymm7,%ymm7 + vmovdqa %ymm5,%ymm6 + vpcmpgtd %ymm7,%ymm6,%ymm6 + vpaddd %ymm6,%ymm5,%ymm5 + + vpand %ymm6,%ymm0,%ymm0 + vpand %ymm6,%ymm1,%ymm1 + vpaddd 0(%rdi),%ymm0,%ymm0 + vpand %ymm6,%ymm2,%ymm2 + vpaddd 32(%rdi),%ymm1,%ymm1 + vpand %ymm6,%ymm3,%ymm3 + vpaddd 64(%rdi),%ymm2,%ymm2 + vpand %ymm6,%ymm4,%ymm4 + vpaddd 96(%rdi),%ymm3,%ymm3 + vpaddd 128(%rdi),%ymm4,%ymm4 + vmovdqu %ymm0,0(%rdi) + vmovdqu %ymm1,32(%rdi) + vmovdqu %ymm2,64(%rdi) + vmovdqu %ymm3,96(%rdi) + vmovdqu %ymm4,128(%rdi) + + vmovdqu %ymm5,(%rbx) + leaq 256+128(%rsp),%rbx + vmovdqu 96(%rbp),%ymm9 + decl %edx + jnz .Loop_avx2 + + + + + + + +.Ldone_avx2: + movq 544(%rsp),%rax + vzeroupper + movq -48(%rax),%r15 + movq -40(%rax),%r14 + movq -32(%rax),%r13 + movq -24(%rax),%r12 + movq -16(%rax),%rbp + movq -8(%rax),%rbx + leaq (%rax),%rsp +.Lepilogue_avx2: + .byte 0xf3,0xc3 +.size sha1_multi_block_avx2,.-sha1_multi_block_avx2 + +.align 256 +.long 0x5a827999,0x5a827999,0x5a827999,0x5a827999 +.long 0x5a827999,0x5a827999,0x5a827999,0x5a827999 +K_XX_XX: +.long 0x6ed9eba1,0x6ed9eba1,0x6ed9eba1,0x6ed9eba1 +.long 0x6ed9eba1,0x6ed9eba1,0x6ed9eba1,0x6ed9eba1 +.long 0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc +.long 0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc +.long 0xca62c1d6,0xca62c1d6,0xca62c1d6,0xca62c1d6 +.long 0xca62c1d6,0xca62c1d6,0xca62c1d6,0xca62c1d6 +.long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f +.long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f +.byte 0xf,0xe,0xd,0xc,0xb,0xa,0x9,0x8,0x7,0x6,0x5,0x4,0x3,0x2,0x1,0x0 +.byte 83,72,65,49,32,109,117,108,116,105,45,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 diff --git a/deps/openssl/asm/x64-elf-gas/sha/sha1-x86_64.s b/deps/openssl/asm/x64-elf-gas/sha/sha1-x86_64.s index c11c6f650b9884..22a031f368e505 100644 --- a/deps/openssl/asm/x64-elf-gas/sha/sha1-x86_64.s +++ b/deps/openssl/asm/x64-elf-gas/sha/sha1-x86_64.s @@ -1,30 +1,41 @@ .text - .globl sha1_block_data_order .type sha1_block_data_order,@function .align 16 sha1_block_data_order: movl OPENSSL_ia32cap_P+0(%rip),%r9d movl OPENSSL_ia32cap_P+4(%rip),%r8d + movl OPENSSL_ia32cap_P+8(%rip),%r10d testl $512,%r8d jz .Lialu + testl $536870912,%r10d + jnz _shaext_shortcut + andl $296,%r10d + cmpl $296,%r10d + je _avx2_shortcut + andl $268435456,%r8d + andl $1073741824,%r9d + orl %r9d,%r8d + cmpl $1342177280,%r8d + je _avx_shortcut jmp _ssse3_shortcut .align 16 .Lialu: + movq %rsp,%rax pushq %rbx pushq %rbp pushq %r12 pushq %r13 - movq %rsp,%r11 + pushq %r14 movq %rdi,%r8 subq $72,%rsp movq %rsi,%r9 andq $-64,%rsp movq %rdx,%r10 - movq %r11,64(%rsp) + movq %rax,64(%rsp) .Lprologue: movl 0(%r8),%esi @@ -38,1230 +49,1168 @@ sha1_block_data_order: .Lloop: movl 0(%r9),%edx bswapl %edx - movl %edx,0(%rsp) - movl %r11d,%eax movl 4(%r9),%ebp + movl %r12d,%eax + movl %edx,0(%rsp) movl %esi,%ecx - xorl %r12d,%eax bswapl %ebp + xorl %r11d,%eax roll $5,%ecx - leal 1518500249(%rdx,%r13,1),%r13d andl %edi,%eax - movl %ebp,4(%rsp) + leal 1518500249(%rdx,%r13,1),%r13d addl %ecx,%r13d xorl %r12d,%eax roll $30,%edi addl %eax,%r13d - movl %edi,%eax - movl 8(%r9),%edx + movl 8(%r9),%r14d + movl %r11d,%eax + movl %ebp,4(%rsp) movl %r13d,%ecx - xorl %r11d,%eax - bswapl %edx + bswapl %r14d + xorl %edi,%eax roll $5,%ecx - leal 1518500249(%rbp,%r12,1),%r12d andl %esi,%eax - movl %edx,8(%rsp) + leal 1518500249(%rbp,%r12,1),%r12d addl %ecx,%r12d xorl %r11d,%eax roll $30,%esi addl %eax,%r12d - movl %esi,%eax - movl 12(%r9),%ebp + movl 12(%r9),%edx + movl %edi,%eax + movl %r14d,8(%rsp) movl %r12d,%ecx - xorl %edi,%eax - bswapl %ebp + bswapl %edx + xorl %esi,%eax roll $5,%ecx - leal 1518500249(%rdx,%r11,1),%r11d andl %r13d,%eax - movl %ebp,12(%rsp) + leal 1518500249(%r14,%r11,1),%r11d addl %ecx,%r11d xorl %edi,%eax roll $30,%r13d addl %eax,%r11d - movl %r13d,%eax - movl 16(%r9),%edx + movl 16(%r9),%ebp + movl %esi,%eax + movl %edx,12(%rsp) movl %r11d,%ecx - xorl %esi,%eax - bswapl %edx + bswapl %ebp + xorl %r13d,%eax roll $5,%ecx - leal 1518500249(%rbp,%rdi,1),%edi andl %r12d,%eax - movl %edx,16(%rsp) + leal 1518500249(%rdx,%rdi,1),%edi addl %ecx,%edi xorl %esi,%eax roll $30,%r12d addl %eax,%edi - movl %r12d,%eax - movl 20(%r9),%ebp + movl 20(%r9),%r14d + movl %r13d,%eax + movl %ebp,16(%rsp) movl %edi,%ecx - xorl %r13d,%eax - bswapl %ebp + bswapl %r14d + xorl %r12d,%eax roll $5,%ecx - leal 1518500249(%rdx,%rsi,1),%esi andl %r11d,%eax - movl %ebp,20(%rsp) + leal 1518500249(%rbp,%rsi,1),%esi addl %ecx,%esi xorl %r13d,%eax roll $30,%r11d addl %eax,%esi - movl %r11d,%eax movl 24(%r9),%edx + movl %r12d,%eax + movl %r14d,20(%rsp) movl %esi,%ecx - xorl %r12d,%eax bswapl %edx + xorl %r11d,%eax roll $5,%ecx - leal 1518500249(%rbp,%r13,1),%r13d andl %edi,%eax - movl %edx,24(%rsp) + leal 1518500249(%r14,%r13,1),%r13d addl %ecx,%r13d xorl %r12d,%eax roll $30,%edi addl %eax,%r13d - movl %edi,%eax movl 28(%r9),%ebp + movl %r11d,%eax + movl %edx,24(%rsp) movl %r13d,%ecx - xorl %r11d,%eax bswapl %ebp + xorl %edi,%eax roll $5,%ecx - leal 1518500249(%rdx,%r12,1),%r12d andl %esi,%eax - movl %ebp,28(%rsp) + leal 1518500249(%rdx,%r12,1),%r12d addl %ecx,%r12d xorl %r11d,%eax roll $30,%esi addl %eax,%r12d - movl %esi,%eax - movl 32(%r9),%edx + movl 32(%r9),%r14d + movl %edi,%eax + movl %ebp,28(%rsp) movl %r12d,%ecx - xorl %edi,%eax - bswapl %edx + bswapl %r14d + xorl %esi,%eax roll $5,%ecx - leal 1518500249(%rbp,%r11,1),%r11d andl %r13d,%eax - movl %edx,32(%rsp) + leal 1518500249(%rbp,%r11,1),%r11d addl %ecx,%r11d xorl %edi,%eax roll $30,%r13d addl %eax,%r11d - movl %r13d,%eax - movl 36(%r9),%ebp + movl 36(%r9),%edx + movl %esi,%eax + movl %r14d,32(%rsp) movl %r11d,%ecx - xorl %esi,%eax - bswapl %ebp + bswapl %edx + xorl %r13d,%eax roll $5,%ecx - leal 1518500249(%rdx,%rdi,1),%edi andl %r12d,%eax - movl %ebp,36(%rsp) + leal 1518500249(%r14,%rdi,1),%edi addl %ecx,%edi xorl %esi,%eax roll $30,%r12d addl %eax,%edi - movl %r12d,%eax - movl 40(%r9),%edx + movl 40(%r9),%ebp + movl %r13d,%eax + movl %edx,36(%rsp) movl %edi,%ecx - xorl %r13d,%eax - bswapl %edx + bswapl %ebp + xorl %r12d,%eax roll $5,%ecx - leal 1518500249(%rbp,%rsi,1),%esi andl %r11d,%eax - movl %edx,40(%rsp) + leal 1518500249(%rdx,%rsi,1),%esi addl %ecx,%esi xorl %r13d,%eax roll $30,%r11d addl %eax,%esi - movl %r11d,%eax - movl 44(%r9),%ebp + movl 44(%r9),%r14d + movl %r12d,%eax + movl %ebp,40(%rsp) movl %esi,%ecx - xorl %r12d,%eax - bswapl %ebp + bswapl %r14d + xorl %r11d,%eax roll $5,%ecx - leal 1518500249(%rdx,%r13,1),%r13d andl %edi,%eax - movl %ebp,44(%rsp) + leal 1518500249(%rbp,%r13,1),%r13d addl %ecx,%r13d xorl %r12d,%eax roll $30,%edi addl %eax,%r13d - movl %edi,%eax movl 48(%r9),%edx + movl %r11d,%eax + movl %r14d,44(%rsp) movl %r13d,%ecx - xorl %r11d,%eax bswapl %edx + xorl %edi,%eax roll $5,%ecx - leal 1518500249(%rbp,%r12,1),%r12d andl %esi,%eax - movl %edx,48(%rsp) + leal 1518500249(%r14,%r12,1),%r12d addl %ecx,%r12d xorl %r11d,%eax roll $30,%esi addl %eax,%r12d - movl %esi,%eax movl 52(%r9),%ebp + movl %edi,%eax + movl %edx,48(%rsp) movl %r12d,%ecx - xorl %edi,%eax bswapl %ebp + xorl %esi,%eax roll $5,%ecx - leal 1518500249(%rdx,%r11,1),%r11d andl %r13d,%eax - movl %ebp,52(%rsp) + leal 1518500249(%rdx,%r11,1),%r11d addl %ecx,%r11d xorl %edi,%eax roll $30,%r13d addl %eax,%r11d - movl %r13d,%eax - movl 56(%r9),%edx + movl 56(%r9),%r14d + movl %esi,%eax + movl %ebp,52(%rsp) movl %r11d,%ecx - xorl %esi,%eax - bswapl %edx + bswapl %r14d + xorl %r13d,%eax roll $5,%ecx - leal 1518500249(%rbp,%rdi,1),%edi andl %r12d,%eax - movl %edx,56(%rsp) + leal 1518500249(%rbp,%rdi,1),%edi addl %ecx,%edi xorl %esi,%eax roll $30,%r12d addl %eax,%edi - movl %r12d,%eax - movl 60(%r9),%ebp + movl 60(%r9),%edx + movl %r13d,%eax + movl %r14d,56(%rsp) movl %edi,%ecx - xorl %r13d,%eax - bswapl %ebp + bswapl %edx + xorl %r12d,%eax roll $5,%ecx - leal 1518500249(%rdx,%rsi,1),%esi andl %r11d,%eax - movl %ebp,60(%rsp) + leal 1518500249(%r14,%rsi,1),%esi addl %ecx,%esi xorl %r13d,%eax roll $30,%r11d addl %eax,%esi - movl 0(%rsp),%edx - movl %r11d,%eax + xorl 0(%rsp),%ebp + movl %r12d,%eax + movl %edx,60(%rsp) movl %esi,%ecx - xorl 8(%rsp),%edx - xorl %r12d,%eax + xorl 8(%rsp),%ebp + xorl %r11d,%eax roll $5,%ecx - xorl 32(%rsp),%edx + xorl 32(%rsp),%ebp andl %edi,%eax - leal 1518500249(%rbp,%r13,1),%r13d - xorl 52(%rsp),%edx + leal 1518500249(%rdx,%r13,1),%r13d + roll $30,%edi xorl %r12d,%eax - roll $1,%edx addl %ecx,%r13d - roll $30,%edi - movl %edx,0(%rsp) + roll $1,%ebp addl %eax,%r13d - movl 4(%rsp),%ebp - movl %edi,%eax + xorl 4(%rsp),%r14d + movl %r11d,%eax + movl %ebp,0(%rsp) movl %r13d,%ecx - xorl 12(%rsp),%ebp - xorl %r11d,%eax + xorl 12(%rsp),%r14d + xorl %edi,%eax roll $5,%ecx - xorl 36(%rsp),%ebp + xorl 36(%rsp),%r14d andl %esi,%eax - leal 1518500249(%rdx,%r12,1),%r12d - xorl 56(%rsp),%ebp + leal 1518500249(%rbp,%r12,1),%r12d + roll $30,%esi xorl %r11d,%eax - roll $1,%ebp addl %ecx,%r12d - roll $30,%esi - movl %ebp,4(%rsp) + roll $1,%r14d addl %eax,%r12d - movl 8(%rsp),%edx - movl %esi,%eax + xorl 8(%rsp),%edx + movl %edi,%eax + movl %r14d,4(%rsp) movl %r12d,%ecx xorl 16(%rsp),%edx - xorl %edi,%eax + xorl %esi,%eax roll $5,%ecx xorl 40(%rsp),%edx andl %r13d,%eax - leal 1518500249(%rbp,%r11,1),%r11d - xorl 60(%rsp),%edx + leal 1518500249(%r14,%r11,1),%r11d + roll $30,%r13d xorl %edi,%eax - roll $1,%edx addl %ecx,%r11d - roll $30,%r13d - movl %edx,8(%rsp) + roll $1,%edx addl %eax,%r11d - movl 12(%rsp),%ebp - movl %r13d,%eax + xorl 12(%rsp),%ebp + movl %esi,%eax + movl %edx,8(%rsp) movl %r11d,%ecx xorl 20(%rsp),%ebp - xorl %esi,%eax + xorl %r13d,%eax roll $5,%ecx xorl 44(%rsp),%ebp andl %r12d,%eax leal 1518500249(%rdx,%rdi,1),%edi - xorl 0(%rsp),%ebp + roll $30,%r12d xorl %esi,%eax - roll $1,%ebp addl %ecx,%edi - roll $30,%r12d - movl %ebp,12(%rsp) + roll $1,%ebp addl %eax,%edi - movl 16(%rsp),%edx - movl %r12d,%eax + xorl 16(%rsp),%r14d + movl %r13d,%eax + movl %ebp,12(%rsp) movl %edi,%ecx - xorl 24(%rsp),%edx - xorl %r13d,%eax + xorl 24(%rsp),%r14d + xorl %r12d,%eax roll $5,%ecx - xorl 48(%rsp),%edx + xorl 48(%rsp),%r14d andl %r11d,%eax leal 1518500249(%rbp,%rsi,1),%esi - xorl 4(%rsp),%edx + roll $30,%r11d xorl %r13d,%eax - roll $1,%edx addl %ecx,%esi - roll $30,%r11d - movl %edx,16(%rsp) + roll $1,%r14d addl %eax,%esi - movl 20(%rsp),%ebp - movl %r11d,%eax + xorl 20(%rsp),%edx + movl %edi,%eax + movl %r14d,16(%rsp) movl %esi,%ecx - xorl 28(%rsp),%ebp - xorl %edi,%eax - roll $5,%ecx - leal 1859775393(%rdx,%r13,1),%r13d - xorl 52(%rsp),%ebp + xorl 28(%rsp),%edx xorl %r12d,%eax + roll $5,%ecx + xorl 52(%rsp),%edx + leal 1859775393(%r14,%r13,1),%r13d + xorl %r11d,%eax addl %ecx,%r13d - xorl 8(%rsp),%ebp roll $30,%edi addl %eax,%r13d - roll $1,%ebp - movl %ebp,20(%rsp) - movl 24(%rsp),%edx - movl %edi,%eax + roll $1,%edx + xorl 24(%rsp),%ebp + movl %esi,%eax + movl %edx,20(%rsp) movl %r13d,%ecx - xorl 32(%rsp),%edx - xorl %esi,%eax - roll $5,%ecx - leal 1859775393(%rbp,%r12,1),%r12d - xorl 56(%rsp),%edx + xorl 32(%rsp),%ebp xorl %r11d,%eax + roll $5,%ecx + xorl 56(%rsp),%ebp + leal 1859775393(%rdx,%r12,1),%r12d + xorl %edi,%eax addl %ecx,%r12d - xorl 12(%rsp),%edx roll $30,%esi addl %eax,%r12d - roll $1,%edx - movl %edx,24(%rsp) - movl 28(%rsp),%ebp - movl %esi,%eax + roll $1,%ebp + xorl 28(%rsp),%r14d + movl %r13d,%eax + movl %ebp,24(%rsp) movl %r12d,%ecx - xorl 36(%rsp),%ebp - xorl %r13d,%eax - roll $5,%ecx - leal 1859775393(%rdx,%r11,1),%r11d - xorl 60(%rsp),%ebp + xorl 36(%rsp),%r14d xorl %edi,%eax + roll $5,%ecx + xorl 60(%rsp),%r14d + leal 1859775393(%rbp,%r11,1),%r11d + xorl %esi,%eax addl %ecx,%r11d - xorl 16(%rsp),%ebp roll $30,%r13d addl %eax,%r11d - roll $1,%ebp - movl %ebp,28(%rsp) - movl 32(%rsp),%edx - movl %r13d,%eax + roll $1,%r14d + xorl 32(%rsp),%edx + movl %r12d,%eax + movl %r14d,28(%rsp) movl %r11d,%ecx xorl 40(%rsp),%edx - xorl %r12d,%eax + xorl %esi,%eax roll $5,%ecx - leal 1859775393(%rbp,%rdi,1),%edi xorl 0(%rsp),%edx - xorl %esi,%eax + leal 1859775393(%r14,%rdi,1),%edi + xorl %r13d,%eax addl %ecx,%edi - xorl 20(%rsp),%edx roll $30,%r12d addl %eax,%edi roll $1,%edx + xorl 36(%rsp),%ebp + movl %r11d,%eax movl %edx,32(%rsp) - movl 36(%rsp),%ebp - movl %r12d,%eax movl %edi,%ecx xorl 44(%rsp),%ebp - xorl %r11d,%eax + xorl %r13d,%eax roll $5,%ecx - leal 1859775393(%rdx,%rsi,1),%esi xorl 4(%rsp),%ebp - xorl %r13d,%eax + leal 1859775393(%rdx,%rsi,1),%esi + xorl %r12d,%eax addl %ecx,%esi - xorl 24(%rsp),%ebp roll $30,%r11d addl %eax,%esi roll $1,%ebp + xorl 40(%rsp),%r14d + movl %edi,%eax movl %ebp,36(%rsp) - movl 40(%rsp),%edx - movl %r11d,%eax movl %esi,%ecx - xorl 48(%rsp),%edx - xorl %edi,%eax + xorl 48(%rsp),%r14d + xorl %r12d,%eax roll $5,%ecx + xorl 8(%rsp),%r14d leal 1859775393(%rbp,%r13,1),%r13d - xorl 8(%rsp),%edx - xorl %r12d,%eax + xorl %r11d,%eax addl %ecx,%r13d - xorl 28(%rsp),%edx roll $30,%edi addl %eax,%r13d - roll $1,%edx - movl %edx,40(%rsp) - movl 44(%rsp),%ebp - movl %edi,%eax + roll $1,%r14d + xorl 44(%rsp),%edx + movl %esi,%eax + movl %r14d,40(%rsp) movl %r13d,%ecx - xorl 52(%rsp),%ebp - xorl %esi,%eax - roll $5,%ecx - leal 1859775393(%rdx,%r12,1),%r12d - xorl 12(%rsp),%ebp + xorl 52(%rsp),%edx xorl %r11d,%eax + roll $5,%ecx + xorl 12(%rsp),%edx + leal 1859775393(%r14,%r12,1),%r12d + xorl %edi,%eax addl %ecx,%r12d - xorl 32(%rsp),%ebp roll $30,%esi addl %eax,%r12d - roll $1,%ebp - movl %ebp,44(%rsp) - movl 48(%rsp),%edx - movl %esi,%eax + roll $1,%edx + xorl 48(%rsp),%ebp + movl %r13d,%eax + movl %edx,44(%rsp) movl %r12d,%ecx - xorl 56(%rsp),%edx - xorl %r13d,%eax - roll $5,%ecx - leal 1859775393(%rbp,%r11,1),%r11d - xorl 16(%rsp),%edx + xorl 56(%rsp),%ebp xorl %edi,%eax + roll $5,%ecx + xorl 16(%rsp),%ebp + leal 1859775393(%rdx,%r11,1),%r11d + xorl %esi,%eax addl %ecx,%r11d - xorl 36(%rsp),%edx roll $30,%r13d addl %eax,%r11d - roll $1,%edx - movl %edx,48(%rsp) - movl 52(%rsp),%ebp - movl %r13d,%eax + roll $1,%ebp + xorl 52(%rsp),%r14d + movl %r12d,%eax + movl %ebp,48(%rsp) movl %r11d,%ecx - xorl 60(%rsp),%ebp - xorl %r12d,%eax - roll $5,%ecx - leal 1859775393(%rdx,%rdi,1),%edi - xorl 20(%rsp),%ebp + xorl 60(%rsp),%r14d xorl %esi,%eax + roll $5,%ecx + xorl 20(%rsp),%r14d + leal 1859775393(%rbp,%rdi,1),%edi + xorl %r13d,%eax addl %ecx,%edi - xorl 40(%rsp),%ebp roll $30,%r12d addl %eax,%edi - roll $1,%ebp - movl %ebp,52(%rsp) - movl 56(%rsp),%edx - movl %r12d,%eax + roll $1,%r14d + xorl 56(%rsp),%edx + movl %r11d,%eax + movl %r14d,52(%rsp) movl %edi,%ecx xorl 0(%rsp),%edx - xorl %r11d,%eax + xorl %r13d,%eax roll $5,%ecx - leal 1859775393(%rbp,%rsi,1),%esi xorl 24(%rsp),%edx - xorl %r13d,%eax + leal 1859775393(%r14,%rsi,1),%esi + xorl %r12d,%eax addl %ecx,%esi - xorl 44(%rsp),%edx roll $30,%r11d addl %eax,%esi roll $1,%edx + xorl 60(%rsp),%ebp + movl %edi,%eax movl %edx,56(%rsp) - movl 60(%rsp),%ebp - movl %r11d,%eax movl %esi,%ecx xorl 4(%rsp),%ebp - xorl %edi,%eax + xorl %r12d,%eax roll $5,%ecx - leal 1859775393(%rdx,%r13,1),%r13d xorl 28(%rsp),%ebp - xorl %r12d,%eax + leal 1859775393(%rdx,%r13,1),%r13d + xorl %r11d,%eax addl %ecx,%r13d - xorl 48(%rsp),%ebp roll $30,%edi addl %eax,%r13d roll $1,%ebp + xorl 0(%rsp),%r14d + movl %esi,%eax movl %ebp,60(%rsp) - movl 0(%rsp),%edx - movl %edi,%eax movl %r13d,%ecx - xorl 8(%rsp),%edx - xorl %esi,%eax + xorl 8(%rsp),%r14d + xorl %r11d,%eax roll $5,%ecx + xorl 32(%rsp),%r14d leal 1859775393(%rbp,%r12,1),%r12d - xorl 32(%rsp),%edx - xorl %r11d,%eax + xorl %edi,%eax addl %ecx,%r12d - xorl 52(%rsp),%edx roll $30,%esi addl %eax,%r12d - roll $1,%edx - movl %edx,0(%rsp) - movl 4(%rsp),%ebp - movl %esi,%eax + roll $1,%r14d + xorl 4(%rsp),%edx + movl %r13d,%eax + movl %r14d,0(%rsp) movl %r12d,%ecx - xorl 12(%rsp),%ebp - xorl %r13d,%eax - roll $5,%ecx - leal 1859775393(%rdx,%r11,1),%r11d - xorl 36(%rsp),%ebp + xorl 12(%rsp),%edx xorl %edi,%eax + roll $5,%ecx + xorl 36(%rsp),%edx + leal 1859775393(%r14,%r11,1),%r11d + xorl %esi,%eax addl %ecx,%r11d - xorl 56(%rsp),%ebp roll $30,%r13d addl %eax,%r11d - roll $1,%ebp - movl %ebp,4(%rsp) - movl 8(%rsp),%edx - movl %r13d,%eax + roll $1,%edx + xorl 8(%rsp),%ebp + movl %r12d,%eax + movl %edx,4(%rsp) movl %r11d,%ecx - xorl 16(%rsp),%edx - xorl %r12d,%eax - roll $5,%ecx - leal 1859775393(%rbp,%rdi,1),%edi - xorl 40(%rsp),%edx + xorl 16(%rsp),%ebp xorl %esi,%eax + roll $5,%ecx + xorl 40(%rsp),%ebp + leal 1859775393(%rdx,%rdi,1),%edi + xorl %r13d,%eax addl %ecx,%edi - xorl 60(%rsp),%edx roll $30,%r12d addl %eax,%edi - roll $1,%edx - movl %edx,8(%rsp) - movl 12(%rsp),%ebp - movl %r12d,%eax + roll $1,%ebp + xorl 12(%rsp),%r14d + movl %r11d,%eax + movl %ebp,8(%rsp) movl %edi,%ecx - xorl 20(%rsp),%ebp - xorl %r11d,%eax - roll $5,%ecx - leal 1859775393(%rdx,%rsi,1),%esi - xorl 44(%rsp),%ebp + xorl 20(%rsp),%r14d xorl %r13d,%eax + roll $5,%ecx + xorl 44(%rsp),%r14d + leal 1859775393(%rbp,%rsi,1),%esi + xorl %r12d,%eax addl %ecx,%esi - xorl 0(%rsp),%ebp roll $30,%r11d addl %eax,%esi - roll $1,%ebp - movl %ebp,12(%rsp) - movl 16(%rsp),%edx - movl %r11d,%eax + roll $1,%r14d + xorl 16(%rsp),%edx + movl %edi,%eax + movl %r14d,12(%rsp) movl %esi,%ecx xorl 24(%rsp),%edx - xorl %edi,%eax + xorl %r12d,%eax roll $5,%ecx - leal 1859775393(%rbp,%r13,1),%r13d xorl 48(%rsp),%edx - xorl %r12d,%eax + leal 1859775393(%r14,%r13,1),%r13d + xorl %r11d,%eax addl %ecx,%r13d - xorl 4(%rsp),%edx roll $30,%edi addl %eax,%r13d roll $1,%edx + xorl 20(%rsp),%ebp + movl %esi,%eax movl %edx,16(%rsp) - movl 20(%rsp),%ebp - movl %edi,%eax movl %r13d,%ecx xorl 28(%rsp),%ebp - xorl %esi,%eax + xorl %r11d,%eax roll $5,%ecx - leal 1859775393(%rdx,%r12,1),%r12d xorl 52(%rsp),%ebp - xorl %r11d,%eax + leal 1859775393(%rdx,%r12,1),%r12d + xorl %edi,%eax addl %ecx,%r12d - xorl 8(%rsp),%ebp roll $30,%esi addl %eax,%r12d roll $1,%ebp + xorl 24(%rsp),%r14d + movl %r13d,%eax movl %ebp,20(%rsp) - movl 24(%rsp),%edx - movl %esi,%eax movl %r12d,%ecx - xorl 32(%rsp),%edx - xorl %r13d,%eax + xorl 32(%rsp),%r14d + xorl %edi,%eax roll $5,%ecx + xorl 56(%rsp),%r14d leal 1859775393(%rbp,%r11,1),%r11d - xorl 56(%rsp),%edx - xorl %edi,%eax + xorl %esi,%eax addl %ecx,%r11d - xorl 12(%rsp),%edx roll $30,%r13d addl %eax,%r11d - roll $1,%edx - movl %edx,24(%rsp) - movl 28(%rsp),%ebp - movl %r13d,%eax + roll $1,%r14d + xorl 28(%rsp),%edx + movl %r12d,%eax + movl %r14d,24(%rsp) movl %r11d,%ecx - xorl 36(%rsp),%ebp - xorl %r12d,%eax - roll $5,%ecx - leal 1859775393(%rdx,%rdi,1),%edi - xorl 60(%rsp),%ebp + xorl 36(%rsp),%edx xorl %esi,%eax + roll $5,%ecx + xorl 60(%rsp),%edx + leal 1859775393(%r14,%rdi,1),%edi + xorl %r13d,%eax addl %ecx,%edi - xorl 16(%rsp),%ebp roll $30,%r12d addl %eax,%edi - roll $1,%ebp - movl %ebp,28(%rsp) - movl 32(%rsp),%edx - movl %r12d,%eax + roll $1,%edx + xorl 32(%rsp),%ebp + movl %r11d,%eax + movl %edx,28(%rsp) movl %edi,%ecx - xorl 40(%rsp),%edx - xorl %r11d,%eax - roll $5,%ecx - leal 1859775393(%rbp,%rsi,1),%esi - xorl 0(%rsp),%edx + xorl 40(%rsp),%ebp xorl %r13d,%eax + roll $5,%ecx + xorl 0(%rsp),%ebp + leal 1859775393(%rdx,%rsi,1),%esi + xorl %r12d,%eax addl %ecx,%esi - xorl 20(%rsp),%edx roll $30,%r11d addl %eax,%esi - roll $1,%edx - movl %edx,32(%rsp) - movl 36(%rsp),%ebp - movl %r11d,%eax - movl %r11d,%ebx - xorl 44(%rsp),%ebp - andl %r12d,%eax + roll $1,%ebp + xorl 36(%rsp),%r14d + movl %r12d,%eax + movl %ebp,32(%rsp) + movl %r12d,%ebx + xorl 44(%rsp),%r14d + andl %r11d,%eax movl %esi,%ecx - xorl 4(%rsp),%ebp - xorl %r12d,%ebx - leal -1894007588(%rdx,%r13,1),%r13d + xorl 4(%rsp),%r14d + leal -1894007588(%rbp,%r13,1),%r13d + xorl %r11d,%ebx roll $5,%ecx - xorl 24(%rsp),%ebp addl %eax,%r13d + roll $1,%r14d andl %edi,%ebx - roll $1,%ebp - addl %ebx,%r13d - roll $30,%edi - movl %ebp,36(%rsp) addl %ecx,%r13d - movl 40(%rsp),%edx - movl %edi,%eax - movl %edi,%ebx + roll $30,%edi + addl %ebx,%r13d + xorl 40(%rsp),%edx + movl %r11d,%eax + movl %r14d,36(%rsp) + movl %r11d,%ebx xorl 48(%rsp),%edx - andl %r11d,%eax + andl %edi,%eax movl %r13d,%ecx xorl 8(%rsp),%edx - xorl %r11d,%ebx - leal -1894007588(%rbp,%r12,1),%r12d + leal -1894007588(%r14,%r12,1),%r12d + xorl %edi,%ebx roll $5,%ecx - xorl 28(%rsp),%edx addl %eax,%r12d - andl %esi,%ebx roll $1,%edx - addl %ebx,%r12d + andl %esi,%ebx + addl %ecx,%r12d roll $30,%esi + addl %ebx,%r12d + xorl 44(%rsp),%ebp + movl %edi,%eax movl %edx,40(%rsp) - addl %ecx,%r12d - movl 44(%rsp),%ebp - movl %esi,%eax - movl %esi,%ebx + movl %edi,%ebx xorl 52(%rsp),%ebp - andl %edi,%eax + andl %esi,%eax movl %r12d,%ecx xorl 12(%rsp),%ebp - xorl %edi,%ebx leal -1894007588(%rdx,%r11,1),%r11d + xorl %esi,%ebx roll $5,%ecx - xorl 32(%rsp),%ebp addl %eax,%r11d - andl %r13d,%ebx roll $1,%ebp - addl %ebx,%r11d + andl %r13d,%ebx + addl %ecx,%r11d roll $30,%r13d + addl %ebx,%r11d + xorl 48(%rsp),%r14d + movl %esi,%eax movl %ebp,44(%rsp) - addl %ecx,%r11d - movl 48(%rsp),%edx - movl %r13d,%eax - movl %r13d,%ebx - xorl 56(%rsp),%edx - andl %esi,%eax + movl %esi,%ebx + xorl 56(%rsp),%r14d + andl %r13d,%eax movl %r11d,%ecx - xorl 16(%rsp),%edx - xorl %esi,%ebx + xorl 16(%rsp),%r14d leal -1894007588(%rbp,%rdi,1),%edi + xorl %r13d,%ebx roll $5,%ecx - xorl 36(%rsp),%edx addl %eax,%edi + roll $1,%r14d andl %r12d,%ebx - roll $1,%edx - addl %ebx,%edi - roll $30,%r12d - movl %edx,48(%rsp) addl %ecx,%edi - movl 52(%rsp),%ebp - movl %r12d,%eax - movl %r12d,%ebx - xorl 60(%rsp),%ebp - andl %r13d,%eax + roll $30,%r12d + addl %ebx,%edi + xorl 52(%rsp),%edx + movl %r13d,%eax + movl %r14d,48(%rsp) + movl %r13d,%ebx + xorl 60(%rsp),%edx + andl %r12d,%eax movl %edi,%ecx - xorl 20(%rsp),%ebp - xorl %r13d,%ebx - leal -1894007588(%rdx,%rsi,1),%esi + xorl 20(%rsp),%edx + leal -1894007588(%r14,%rsi,1),%esi + xorl %r12d,%ebx roll $5,%ecx - xorl 40(%rsp),%ebp addl %eax,%esi + roll $1,%edx andl %r11d,%ebx - roll $1,%ebp - addl %ebx,%esi - roll $30,%r11d - movl %ebp,52(%rsp) addl %ecx,%esi - movl 56(%rsp),%edx - movl %r11d,%eax - movl %r11d,%ebx - xorl 0(%rsp),%edx - andl %r12d,%eax + roll $30,%r11d + addl %ebx,%esi + xorl 56(%rsp),%ebp + movl %r12d,%eax + movl %edx,52(%rsp) + movl %r12d,%ebx + xorl 0(%rsp),%ebp + andl %r11d,%eax movl %esi,%ecx - xorl 24(%rsp),%edx - xorl %r12d,%ebx - leal -1894007588(%rbp,%r13,1),%r13d + xorl 24(%rsp),%ebp + leal -1894007588(%rdx,%r13,1),%r13d + xorl %r11d,%ebx roll $5,%ecx - xorl 44(%rsp),%edx addl %eax,%r13d + roll $1,%ebp andl %edi,%ebx - roll $1,%edx - addl %ebx,%r13d - roll $30,%edi - movl %edx,56(%rsp) addl %ecx,%r13d - movl 60(%rsp),%ebp - movl %edi,%eax - movl %edi,%ebx - xorl 4(%rsp),%ebp - andl %r11d,%eax + roll $30,%edi + addl %ebx,%r13d + xorl 60(%rsp),%r14d + movl %r11d,%eax + movl %ebp,56(%rsp) + movl %r11d,%ebx + xorl 4(%rsp),%r14d + andl %edi,%eax movl %r13d,%ecx - xorl 28(%rsp),%ebp - xorl %r11d,%ebx - leal -1894007588(%rdx,%r12,1),%r12d + xorl 28(%rsp),%r14d + leal -1894007588(%rbp,%r12,1),%r12d + xorl %edi,%ebx roll $5,%ecx - xorl 48(%rsp),%ebp addl %eax,%r12d + roll $1,%r14d andl %esi,%ebx - roll $1,%ebp - addl %ebx,%r12d - roll $30,%esi - movl %ebp,60(%rsp) addl %ecx,%r12d - movl 0(%rsp),%edx - movl %esi,%eax - movl %esi,%ebx + roll $30,%esi + addl %ebx,%r12d + xorl 0(%rsp),%edx + movl %edi,%eax + movl %r14d,60(%rsp) + movl %edi,%ebx xorl 8(%rsp),%edx - andl %edi,%eax + andl %esi,%eax movl %r12d,%ecx xorl 32(%rsp),%edx - xorl %edi,%ebx - leal -1894007588(%rbp,%r11,1),%r11d + leal -1894007588(%r14,%r11,1),%r11d + xorl %esi,%ebx roll $5,%ecx - xorl 52(%rsp),%edx addl %eax,%r11d - andl %r13d,%ebx roll $1,%edx - addl %ebx,%r11d + andl %r13d,%ebx + addl %ecx,%r11d roll $30,%r13d + addl %ebx,%r11d + xorl 4(%rsp),%ebp + movl %esi,%eax movl %edx,0(%rsp) - addl %ecx,%r11d - movl 4(%rsp),%ebp - movl %r13d,%eax - movl %r13d,%ebx + movl %esi,%ebx xorl 12(%rsp),%ebp - andl %esi,%eax + andl %r13d,%eax movl %r11d,%ecx xorl 36(%rsp),%ebp - xorl %esi,%ebx leal -1894007588(%rdx,%rdi,1),%edi + xorl %r13d,%ebx roll $5,%ecx - xorl 56(%rsp),%ebp addl %eax,%edi - andl %r12d,%ebx roll $1,%ebp - addl %ebx,%edi + andl %r12d,%ebx + addl %ecx,%edi roll $30,%r12d + addl %ebx,%edi + xorl 8(%rsp),%r14d + movl %r13d,%eax movl %ebp,4(%rsp) - addl %ecx,%edi - movl 8(%rsp),%edx - movl %r12d,%eax - movl %r12d,%ebx - xorl 16(%rsp),%edx - andl %r13d,%eax + movl %r13d,%ebx + xorl 16(%rsp),%r14d + andl %r12d,%eax movl %edi,%ecx - xorl 40(%rsp),%edx - xorl %r13d,%ebx + xorl 40(%rsp),%r14d leal -1894007588(%rbp,%rsi,1),%esi + xorl %r12d,%ebx roll $5,%ecx - xorl 60(%rsp),%edx addl %eax,%esi + roll $1,%r14d andl %r11d,%ebx - roll $1,%edx - addl %ebx,%esi - roll $30,%r11d - movl %edx,8(%rsp) addl %ecx,%esi - movl 12(%rsp),%ebp - movl %r11d,%eax - movl %r11d,%ebx - xorl 20(%rsp),%ebp - andl %r12d,%eax + roll $30,%r11d + addl %ebx,%esi + xorl 12(%rsp),%edx + movl %r12d,%eax + movl %r14d,8(%rsp) + movl %r12d,%ebx + xorl 20(%rsp),%edx + andl %r11d,%eax movl %esi,%ecx - xorl 44(%rsp),%ebp - xorl %r12d,%ebx - leal -1894007588(%rdx,%r13,1),%r13d + xorl 44(%rsp),%edx + leal -1894007588(%r14,%r13,1),%r13d + xorl %r11d,%ebx roll $5,%ecx - xorl 0(%rsp),%ebp addl %eax,%r13d + roll $1,%edx andl %edi,%ebx - roll $1,%ebp - addl %ebx,%r13d - roll $30,%edi - movl %ebp,12(%rsp) addl %ecx,%r13d - movl 16(%rsp),%edx - movl %edi,%eax - movl %edi,%ebx - xorl 24(%rsp),%edx - andl %r11d,%eax + roll $30,%edi + addl %ebx,%r13d + xorl 16(%rsp),%ebp + movl %r11d,%eax + movl %edx,12(%rsp) + movl %r11d,%ebx + xorl 24(%rsp),%ebp + andl %edi,%eax movl %r13d,%ecx - xorl 48(%rsp),%edx - xorl %r11d,%ebx - leal -1894007588(%rbp,%r12,1),%r12d + xorl 48(%rsp),%ebp + leal -1894007588(%rdx,%r12,1),%r12d + xorl %edi,%ebx roll $5,%ecx - xorl 4(%rsp),%edx addl %eax,%r12d + roll $1,%ebp andl %esi,%ebx - roll $1,%edx - addl %ebx,%r12d - roll $30,%esi - movl %edx,16(%rsp) addl %ecx,%r12d - movl 20(%rsp),%ebp - movl %esi,%eax - movl %esi,%ebx - xorl 28(%rsp),%ebp - andl %edi,%eax + roll $30,%esi + addl %ebx,%r12d + xorl 20(%rsp),%r14d + movl %edi,%eax + movl %ebp,16(%rsp) + movl %edi,%ebx + xorl 28(%rsp),%r14d + andl %esi,%eax movl %r12d,%ecx - xorl 52(%rsp),%ebp - xorl %edi,%ebx - leal -1894007588(%rdx,%r11,1),%r11d + xorl 52(%rsp),%r14d + leal -1894007588(%rbp,%r11,1),%r11d + xorl %esi,%ebx roll $5,%ecx - xorl 8(%rsp),%ebp addl %eax,%r11d + roll $1,%r14d andl %r13d,%ebx - roll $1,%ebp - addl %ebx,%r11d - roll $30,%r13d - movl %ebp,20(%rsp) addl %ecx,%r11d - movl 24(%rsp),%edx - movl %r13d,%eax - movl %r13d,%ebx + roll $30,%r13d + addl %ebx,%r11d + xorl 24(%rsp),%edx + movl %esi,%eax + movl %r14d,20(%rsp) + movl %esi,%ebx xorl 32(%rsp),%edx - andl %esi,%eax + andl %r13d,%eax movl %r11d,%ecx xorl 56(%rsp),%edx - xorl %esi,%ebx - leal -1894007588(%rbp,%rdi,1),%edi + leal -1894007588(%r14,%rdi,1),%edi + xorl %r13d,%ebx roll $5,%ecx - xorl 12(%rsp),%edx addl %eax,%edi - andl %r12d,%ebx roll $1,%edx - addl %ebx,%edi + andl %r12d,%ebx + addl %ecx,%edi roll $30,%r12d + addl %ebx,%edi + xorl 28(%rsp),%ebp + movl %r13d,%eax movl %edx,24(%rsp) - addl %ecx,%edi - movl 28(%rsp),%ebp - movl %r12d,%eax - movl %r12d,%ebx + movl %r13d,%ebx xorl 36(%rsp),%ebp - andl %r13d,%eax + andl %r12d,%eax movl %edi,%ecx xorl 60(%rsp),%ebp - xorl %r13d,%ebx leal -1894007588(%rdx,%rsi,1),%esi + xorl %r12d,%ebx roll $5,%ecx - xorl 16(%rsp),%ebp addl %eax,%esi - andl %r11d,%ebx roll $1,%ebp - addl %ebx,%esi + andl %r11d,%ebx + addl %ecx,%esi roll $30,%r11d + addl %ebx,%esi + xorl 32(%rsp),%r14d + movl %r12d,%eax movl %ebp,28(%rsp) - addl %ecx,%esi - movl 32(%rsp),%edx - movl %r11d,%eax - movl %r11d,%ebx - xorl 40(%rsp),%edx - andl %r12d,%eax + movl %r12d,%ebx + xorl 40(%rsp),%r14d + andl %r11d,%eax movl %esi,%ecx - xorl 0(%rsp),%edx - xorl %r12d,%ebx + xorl 0(%rsp),%r14d leal -1894007588(%rbp,%r13,1),%r13d + xorl %r11d,%ebx roll $5,%ecx - xorl 20(%rsp),%edx addl %eax,%r13d + roll $1,%r14d andl %edi,%ebx - roll $1,%edx - addl %ebx,%r13d - roll $30,%edi - movl %edx,32(%rsp) addl %ecx,%r13d - movl 36(%rsp),%ebp - movl %edi,%eax - movl %edi,%ebx - xorl 44(%rsp),%ebp - andl %r11d,%eax + roll $30,%edi + addl %ebx,%r13d + xorl 36(%rsp),%edx + movl %r11d,%eax + movl %r14d,32(%rsp) + movl %r11d,%ebx + xorl 44(%rsp),%edx + andl %edi,%eax movl %r13d,%ecx - xorl 4(%rsp),%ebp - xorl %r11d,%ebx - leal -1894007588(%rdx,%r12,1),%r12d + xorl 4(%rsp),%edx + leal -1894007588(%r14,%r12,1),%r12d + xorl %edi,%ebx roll $5,%ecx - xorl 24(%rsp),%ebp addl %eax,%r12d + roll $1,%edx andl %esi,%ebx - roll $1,%ebp - addl %ebx,%r12d - roll $30,%esi - movl %ebp,36(%rsp) addl %ecx,%r12d - movl 40(%rsp),%edx - movl %esi,%eax - movl %esi,%ebx - xorl 48(%rsp),%edx - andl %edi,%eax + roll $30,%esi + addl %ebx,%r12d + xorl 40(%rsp),%ebp + movl %edi,%eax + movl %edx,36(%rsp) + movl %edi,%ebx + xorl 48(%rsp),%ebp + andl %esi,%eax movl %r12d,%ecx - xorl 8(%rsp),%edx - xorl %edi,%ebx - leal -1894007588(%rbp,%r11,1),%r11d + xorl 8(%rsp),%ebp + leal -1894007588(%rdx,%r11,1),%r11d + xorl %esi,%ebx roll $5,%ecx - xorl 28(%rsp),%edx addl %eax,%r11d + roll $1,%ebp andl %r13d,%ebx - roll $1,%edx - addl %ebx,%r11d - roll $30,%r13d - movl %edx,40(%rsp) addl %ecx,%r11d - movl 44(%rsp),%ebp - movl %r13d,%eax - movl %r13d,%ebx - xorl 52(%rsp),%ebp - andl %esi,%eax + roll $30,%r13d + addl %ebx,%r11d + xorl 44(%rsp),%r14d + movl %esi,%eax + movl %ebp,40(%rsp) + movl %esi,%ebx + xorl 52(%rsp),%r14d + andl %r13d,%eax movl %r11d,%ecx - xorl 12(%rsp),%ebp - xorl %esi,%ebx - leal -1894007588(%rdx,%rdi,1),%edi + xorl 12(%rsp),%r14d + leal -1894007588(%rbp,%rdi,1),%edi + xorl %r13d,%ebx roll $5,%ecx - xorl 32(%rsp),%ebp addl %eax,%edi + roll $1,%r14d andl %r12d,%ebx - roll $1,%ebp - addl %ebx,%edi - roll $30,%r12d - movl %ebp,44(%rsp) addl %ecx,%edi - movl 48(%rsp),%edx - movl %r12d,%eax - movl %r12d,%ebx + roll $30,%r12d + addl %ebx,%edi + xorl 48(%rsp),%edx + movl %r13d,%eax + movl %r14d,44(%rsp) + movl %r13d,%ebx xorl 56(%rsp),%edx - andl %r13d,%eax + andl %r12d,%eax movl %edi,%ecx xorl 16(%rsp),%edx - xorl %r13d,%ebx - leal -1894007588(%rbp,%rsi,1),%esi + leal -1894007588(%r14,%rsi,1),%esi + xorl %r12d,%ebx roll $5,%ecx - xorl 36(%rsp),%edx addl %eax,%esi - andl %r11d,%ebx roll $1,%edx - addl %ebx,%esi + andl %r11d,%ebx + addl %ecx,%esi roll $30,%r11d + addl %ebx,%esi + xorl 52(%rsp),%ebp + movl %edi,%eax movl %edx,48(%rsp) - addl %ecx,%esi - movl 52(%rsp),%ebp - movl %r11d,%eax movl %esi,%ecx xorl 60(%rsp),%ebp - xorl %edi,%eax + xorl %r12d,%eax roll $5,%ecx - leal -899497514(%rdx,%r13,1),%r13d xorl 20(%rsp),%ebp - xorl %r12d,%eax + leal -899497514(%rdx,%r13,1),%r13d + xorl %r11d,%eax addl %ecx,%r13d - xorl 40(%rsp),%ebp roll $30,%edi addl %eax,%r13d roll $1,%ebp + xorl 56(%rsp),%r14d + movl %esi,%eax movl %ebp,52(%rsp) - movl 56(%rsp),%edx - movl %edi,%eax movl %r13d,%ecx - xorl 0(%rsp),%edx - xorl %esi,%eax + xorl 0(%rsp),%r14d + xorl %r11d,%eax roll $5,%ecx + xorl 24(%rsp),%r14d leal -899497514(%rbp,%r12,1),%r12d - xorl 24(%rsp),%edx - xorl %r11d,%eax + xorl %edi,%eax addl %ecx,%r12d - xorl 44(%rsp),%edx roll $30,%esi addl %eax,%r12d - roll $1,%edx - movl %edx,56(%rsp) - movl 60(%rsp),%ebp - movl %esi,%eax + roll $1,%r14d + xorl 60(%rsp),%edx + movl %r13d,%eax + movl %r14d,56(%rsp) movl %r12d,%ecx - xorl 4(%rsp),%ebp - xorl %r13d,%eax - roll $5,%ecx - leal -899497514(%rdx,%r11,1),%r11d - xorl 28(%rsp),%ebp + xorl 4(%rsp),%edx xorl %edi,%eax + roll $5,%ecx + xorl 28(%rsp),%edx + leal -899497514(%r14,%r11,1),%r11d + xorl %esi,%eax addl %ecx,%r11d - xorl 48(%rsp),%ebp roll $30,%r13d addl %eax,%r11d - roll $1,%ebp - movl %ebp,60(%rsp) - movl 0(%rsp),%edx - movl %r13d,%eax + roll $1,%edx + xorl 0(%rsp),%ebp + movl %r12d,%eax + movl %edx,60(%rsp) movl %r11d,%ecx - xorl 8(%rsp),%edx - xorl %r12d,%eax - roll $5,%ecx - leal -899497514(%rbp,%rdi,1),%edi - xorl 32(%rsp),%edx + xorl 8(%rsp),%ebp xorl %esi,%eax + roll $5,%ecx + xorl 32(%rsp),%ebp + leal -899497514(%rdx,%rdi,1),%edi + xorl %r13d,%eax addl %ecx,%edi - xorl 52(%rsp),%edx roll $30,%r12d addl %eax,%edi - roll $1,%edx - movl %edx,0(%rsp) - movl 4(%rsp),%ebp - movl %r12d,%eax + roll $1,%ebp + xorl 4(%rsp),%r14d + movl %r11d,%eax + movl %ebp,0(%rsp) movl %edi,%ecx - xorl 12(%rsp),%ebp - xorl %r11d,%eax - roll $5,%ecx - leal -899497514(%rdx,%rsi,1),%esi - xorl 36(%rsp),%ebp + xorl 12(%rsp),%r14d xorl %r13d,%eax + roll $5,%ecx + xorl 36(%rsp),%r14d + leal -899497514(%rbp,%rsi,1),%esi + xorl %r12d,%eax addl %ecx,%esi - xorl 56(%rsp),%ebp roll $30,%r11d addl %eax,%esi - roll $1,%ebp - movl %ebp,4(%rsp) - movl 8(%rsp),%edx - movl %r11d,%eax + roll $1,%r14d + xorl 8(%rsp),%edx + movl %edi,%eax + movl %r14d,4(%rsp) movl %esi,%ecx xorl 16(%rsp),%edx - xorl %edi,%eax + xorl %r12d,%eax roll $5,%ecx - leal -899497514(%rbp,%r13,1),%r13d xorl 40(%rsp),%edx - xorl %r12d,%eax + leal -899497514(%r14,%r13,1),%r13d + xorl %r11d,%eax addl %ecx,%r13d - xorl 60(%rsp),%edx roll $30,%edi addl %eax,%r13d roll $1,%edx + xorl 12(%rsp),%ebp + movl %esi,%eax movl %edx,8(%rsp) - movl 12(%rsp),%ebp - movl %edi,%eax movl %r13d,%ecx xorl 20(%rsp),%ebp - xorl %esi,%eax + xorl %r11d,%eax roll $5,%ecx - leal -899497514(%rdx,%r12,1),%r12d xorl 44(%rsp),%ebp - xorl %r11d,%eax + leal -899497514(%rdx,%r12,1),%r12d + xorl %edi,%eax addl %ecx,%r12d - xorl 0(%rsp),%ebp roll $30,%esi addl %eax,%r12d roll $1,%ebp + xorl 16(%rsp),%r14d + movl %r13d,%eax movl %ebp,12(%rsp) - movl 16(%rsp),%edx - movl %esi,%eax movl %r12d,%ecx - xorl 24(%rsp),%edx - xorl %r13d,%eax + xorl 24(%rsp),%r14d + xorl %edi,%eax roll $5,%ecx + xorl 48(%rsp),%r14d leal -899497514(%rbp,%r11,1),%r11d - xorl 48(%rsp),%edx - xorl %edi,%eax + xorl %esi,%eax addl %ecx,%r11d - xorl 4(%rsp),%edx roll $30,%r13d addl %eax,%r11d - roll $1,%edx - movl %edx,16(%rsp) - movl 20(%rsp),%ebp - movl %r13d,%eax + roll $1,%r14d + xorl 20(%rsp),%edx + movl %r12d,%eax + movl %r14d,16(%rsp) movl %r11d,%ecx - xorl 28(%rsp),%ebp - xorl %r12d,%eax - roll $5,%ecx - leal -899497514(%rdx,%rdi,1),%edi - xorl 52(%rsp),%ebp + xorl 28(%rsp),%edx xorl %esi,%eax + roll $5,%ecx + xorl 52(%rsp),%edx + leal -899497514(%r14,%rdi,1),%edi + xorl %r13d,%eax addl %ecx,%edi - xorl 8(%rsp),%ebp roll $30,%r12d addl %eax,%edi - roll $1,%ebp - movl %ebp,20(%rsp) - movl 24(%rsp),%edx - movl %r12d,%eax + roll $1,%edx + xorl 24(%rsp),%ebp + movl %r11d,%eax + movl %edx,20(%rsp) movl %edi,%ecx - xorl 32(%rsp),%edx - xorl %r11d,%eax - roll $5,%ecx - leal -899497514(%rbp,%rsi,1),%esi - xorl 56(%rsp),%edx + xorl 32(%rsp),%ebp xorl %r13d,%eax + roll $5,%ecx + xorl 56(%rsp),%ebp + leal -899497514(%rdx,%rsi,1),%esi + xorl %r12d,%eax addl %ecx,%esi - xorl 12(%rsp),%edx roll $30,%r11d addl %eax,%esi - roll $1,%edx - movl %edx,24(%rsp) - movl 28(%rsp),%ebp - movl %r11d,%eax + roll $1,%ebp + xorl 28(%rsp),%r14d + movl %edi,%eax + movl %ebp,24(%rsp) movl %esi,%ecx - xorl 36(%rsp),%ebp - xorl %edi,%eax - roll $5,%ecx - leal -899497514(%rdx,%r13,1),%r13d - xorl 60(%rsp),%ebp + xorl 36(%rsp),%r14d xorl %r12d,%eax + roll $5,%ecx + xorl 60(%rsp),%r14d + leal -899497514(%rbp,%r13,1),%r13d + xorl %r11d,%eax addl %ecx,%r13d - xorl 16(%rsp),%ebp roll $30,%edi addl %eax,%r13d - roll $1,%ebp - movl %ebp,28(%rsp) - movl 32(%rsp),%edx - movl %edi,%eax + roll $1,%r14d + xorl 32(%rsp),%edx + movl %esi,%eax + movl %r14d,28(%rsp) movl %r13d,%ecx xorl 40(%rsp),%edx - xorl %esi,%eax + xorl %r11d,%eax roll $5,%ecx - leal -899497514(%rbp,%r12,1),%r12d xorl 0(%rsp),%edx - xorl %r11d,%eax + leal -899497514(%r14,%r12,1),%r12d + xorl %edi,%eax addl %ecx,%r12d - xorl 20(%rsp),%edx roll $30,%esi addl %eax,%r12d roll $1,%edx - movl %edx,32(%rsp) - movl 36(%rsp),%ebp - movl %esi,%eax + xorl 36(%rsp),%ebp + movl %r13d,%eax + movl %r12d,%ecx xorl 44(%rsp),%ebp - xorl %r13d,%eax + xorl %edi,%eax roll $5,%ecx - leal -899497514(%rdx,%r11,1),%r11d xorl 4(%rsp),%ebp - xorl %edi,%eax + leal -899497514(%rdx,%r11,1),%r11d + xorl %esi,%eax addl %ecx,%r11d - xorl 24(%rsp),%ebp roll $30,%r13d addl %eax,%r11d roll $1,%ebp - movl %ebp,36(%rsp) - movl 40(%rsp),%edx - movl %r13d,%eax + xorl 40(%rsp),%r14d + movl %r12d,%eax + movl %r11d,%ecx - xorl 48(%rsp),%edx - xorl %r12d,%eax + xorl 48(%rsp),%r14d + xorl %esi,%eax roll $5,%ecx + xorl 8(%rsp),%r14d leal -899497514(%rbp,%rdi,1),%edi - xorl 8(%rsp),%edx - xorl %esi,%eax + xorl %r13d,%eax addl %ecx,%edi - xorl 28(%rsp),%edx roll $30,%r12d addl %eax,%edi - roll $1,%edx - movl %edx,40(%rsp) - movl 44(%rsp),%ebp - movl %r12d,%eax + roll $1,%r14d + xorl 44(%rsp),%edx + movl %r11d,%eax + movl %edi,%ecx - xorl 52(%rsp),%ebp - xorl %r11d,%eax - roll $5,%ecx - leal -899497514(%rdx,%rsi,1),%esi - xorl 12(%rsp),%ebp + xorl 52(%rsp),%edx xorl %r13d,%eax + roll $5,%ecx + xorl 12(%rsp),%edx + leal -899497514(%r14,%rsi,1),%esi + xorl %r12d,%eax addl %ecx,%esi - xorl 32(%rsp),%ebp roll $30,%r11d addl %eax,%esi - roll $1,%ebp - movl %ebp,44(%rsp) - movl 48(%rsp),%edx - movl %r11d,%eax - movl %esi,%ecx - xorl 56(%rsp),%edx - xorl %edi,%eax - roll $5,%ecx - leal -899497514(%rbp,%r13,1),%r13d - xorl 16(%rsp),%edx + roll $1,%edx + xorl 48(%rsp),%ebp + movl %edi,%eax + + movl %esi,%ecx + xorl 56(%rsp),%ebp xorl %r12d,%eax + roll $5,%ecx + xorl 16(%rsp),%ebp + leal -899497514(%rdx,%r13,1),%r13d + xorl %r11d,%eax addl %ecx,%r13d - xorl 36(%rsp),%edx roll $30,%edi addl %eax,%r13d - roll $1,%edx - movl %edx,48(%rsp) - movl 52(%rsp),%ebp - movl %edi,%eax + roll $1,%ebp + xorl 52(%rsp),%r14d + movl %esi,%eax + movl %r13d,%ecx - xorl 60(%rsp),%ebp - xorl %esi,%eax - roll $5,%ecx - leal -899497514(%rdx,%r12,1),%r12d - xorl 20(%rsp),%ebp + xorl 60(%rsp),%r14d xorl %r11d,%eax + roll $5,%ecx + xorl 20(%rsp),%r14d + leal -899497514(%rbp,%r12,1),%r12d + xorl %edi,%eax addl %ecx,%r12d - xorl 40(%rsp),%ebp roll $30,%esi addl %eax,%r12d - roll $1,%ebp - movl 56(%rsp),%edx - movl %esi,%eax + roll $1,%r14d + xorl 56(%rsp),%edx + movl %r13d,%eax + movl %r12d,%ecx xorl 0(%rsp),%edx - xorl %r13d,%eax + xorl %edi,%eax roll $5,%ecx - leal -899497514(%rbp,%r11,1),%r11d xorl 24(%rsp),%edx - xorl %edi,%eax + leal -899497514(%r14,%r11,1),%r11d + xorl %esi,%eax addl %ecx,%r11d - xorl 44(%rsp),%edx roll $30,%r13d addl %eax,%r11d roll $1,%edx - movl 60(%rsp),%ebp - movl %r13d,%eax + xorl 60(%rsp),%ebp + movl %r12d,%eax + movl %r11d,%ecx xorl 4(%rsp),%ebp - xorl %r12d,%eax + xorl %esi,%eax roll $5,%ecx - leal -899497514(%rdx,%rdi,1),%edi xorl 28(%rsp),%ebp - xorl %esi,%eax + leal -899497514(%rdx,%rdi,1),%edi + xorl %r13d,%eax addl %ecx,%edi - xorl 48(%rsp),%ebp roll $30,%r12d addl %eax,%edi roll $1,%ebp - movl %r12d,%eax + movl %r11d,%eax movl %edi,%ecx - xorl %r11d,%eax + xorl %r13d,%eax leal -899497514(%rbp,%rsi,1),%esi roll $5,%ecx - xorl %r13d,%eax + xorl %r12d,%eax addl %ecx,%esi roll $30,%r11d addl %eax,%esi @@ -1281,29 +1230,202 @@ sha1_block_data_order: jnz .Lloop movq 64(%rsp),%rsi - movq (%rsi),%r13 - movq 8(%rsi),%r12 - movq 16(%rsi),%rbp - movq 24(%rsi),%rbx - leaq 32(%rsi),%rsp + movq -40(%rsi),%r14 + movq -32(%rsi),%r13 + movq -24(%rsi),%r12 + movq -16(%rsi),%rbp + movq -8(%rsi),%rbx + leaq (%rsi),%rsp .Lepilogue: .byte 0xf3,0xc3 .size sha1_block_data_order,.-sha1_block_data_order +.type sha1_block_data_order_shaext,@function +.align 32 +sha1_block_data_order_shaext: +_shaext_shortcut: + movdqu (%rdi),%xmm0 + movd 16(%rdi),%xmm1 + movdqa K_XX_XX+160(%rip),%xmm3 + + movdqu (%rsi),%xmm4 + pshufd $27,%xmm0,%xmm0 + movdqu 16(%rsi),%xmm5 + pshufd $27,%xmm1,%xmm1 + movdqu 32(%rsi),%xmm6 +.byte 102,15,56,0,227 + movdqu 48(%rsi),%xmm7 +.byte 102,15,56,0,235 +.byte 102,15,56,0,243 + movdqa %xmm1,%xmm9 +.byte 102,15,56,0,251 + jmp .Loop_shaext + +.align 16 +.Loop_shaext: + decq %rdx + leaq 64(%rsi),%rax + paddd %xmm4,%xmm1 + cmovneq %rax,%rsi + movdqa %xmm0,%xmm8 +.byte 15,56,201,229 + movdqa %xmm0,%xmm2 +.byte 15,58,204,193,0 +.byte 15,56,200,213 + pxor %xmm6,%xmm4 +.byte 15,56,201,238 +.byte 15,56,202,231 + + movdqa %xmm0,%xmm1 +.byte 15,58,204,194,0 +.byte 15,56,200,206 + pxor %xmm7,%xmm5 +.byte 15,56,202,236 +.byte 15,56,201,247 + movdqa %xmm0,%xmm2 +.byte 15,58,204,193,0 +.byte 15,56,200,215 + pxor %xmm4,%xmm6 +.byte 15,56,201,252 +.byte 15,56,202,245 + + movdqa %xmm0,%xmm1 +.byte 15,58,204,194,0 +.byte 15,56,200,204 + pxor %xmm5,%xmm7 +.byte 15,56,202,254 +.byte 15,56,201,229 + movdqa %xmm0,%xmm2 +.byte 15,58,204,193,0 +.byte 15,56,200,213 + pxor %xmm6,%xmm4 +.byte 15,56,201,238 +.byte 15,56,202,231 + + movdqa %xmm0,%xmm1 +.byte 15,58,204,194,1 +.byte 15,56,200,206 + pxor %xmm7,%xmm5 +.byte 15,56,202,236 +.byte 15,56,201,247 + movdqa %xmm0,%xmm2 +.byte 15,58,204,193,1 +.byte 15,56,200,215 + pxor %xmm4,%xmm6 +.byte 15,56,201,252 +.byte 15,56,202,245 + + movdqa %xmm0,%xmm1 +.byte 15,58,204,194,1 +.byte 15,56,200,204 + pxor %xmm5,%xmm7 +.byte 15,56,202,254 +.byte 15,56,201,229 + movdqa %xmm0,%xmm2 +.byte 15,58,204,193,1 +.byte 15,56,200,213 + pxor %xmm6,%xmm4 +.byte 15,56,201,238 +.byte 15,56,202,231 + + movdqa %xmm0,%xmm1 +.byte 15,58,204,194,1 +.byte 15,56,200,206 + pxor %xmm7,%xmm5 +.byte 15,56,202,236 +.byte 15,56,201,247 + movdqa %xmm0,%xmm2 +.byte 15,58,204,193,2 +.byte 15,56,200,215 + pxor %xmm4,%xmm6 +.byte 15,56,201,252 +.byte 15,56,202,245 + + movdqa %xmm0,%xmm1 +.byte 15,58,204,194,2 +.byte 15,56,200,204 + pxor %xmm5,%xmm7 +.byte 15,56,202,254 +.byte 15,56,201,229 + movdqa %xmm0,%xmm2 +.byte 15,58,204,193,2 +.byte 15,56,200,213 + pxor %xmm6,%xmm4 +.byte 15,56,201,238 +.byte 15,56,202,231 + + movdqa %xmm0,%xmm1 +.byte 15,58,204,194,2 +.byte 15,56,200,206 + pxor %xmm7,%xmm5 +.byte 15,56,202,236 +.byte 15,56,201,247 + movdqa %xmm0,%xmm2 +.byte 15,58,204,193,2 +.byte 15,56,200,215 + pxor %xmm4,%xmm6 +.byte 15,56,201,252 +.byte 15,56,202,245 + + movdqa %xmm0,%xmm1 +.byte 15,58,204,194,3 +.byte 15,56,200,204 + pxor %xmm5,%xmm7 +.byte 15,56,202,254 + movdqu (%rsi),%xmm4 + movdqa %xmm0,%xmm2 +.byte 15,58,204,193,3 +.byte 15,56,200,213 + movdqu 16(%rsi),%xmm5 +.byte 102,15,56,0,227 + + movdqa %xmm0,%xmm1 +.byte 15,58,204,194,3 +.byte 15,56,200,206 + movdqu 32(%rsi),%xmm6 +.byte 102,15,56,0,235 + + movdqa %xmm0,%xmm2 +.byte 15,58,204,193,3 +.byte 15,56,200,215 + movdqu 48(%rsi),%xmm7 +.byte 102,15,56,0,243 + + movdqa %xmm0,%xmm1 +.byte 15,58,204,194,3 +.byte 65,15,56,200,201 +.byte 102,15,56,0,251 + + paddd %xmm8,%xmm0 + movdqa %xmm1,%xmm9 + + jnz .Loop_shaext + + pshufd $27,%xmm0,%xmm0 + pshufd $27,%xmm1,%xmm1 + movdqu %xmm0,(%rdi) + movd %xmm1,16(%rdi) + .byte 0xf3,0xc3 +.size sha1_block_data_order_shaext,.-sha1_block_data_order_shaext .type sha1_block_data_order_ssse3,@function .align 16 sha1_block_data_order_ssse3: _ssse3_shortcut: + movq %rsp,%rax pushq %rbx pushq %rbp pushq %r12 + pushq %r13 + pushq %r14 leaq -64(%rsp),%rsp + movq %rax,%r14 + andq $-64,%rsp movq %rdi,%r8 movq %rsi,%r9 movq %rdx,%r10 shlq $6,%r10 addq %r9,%r10 - leaq K_XX_XX(%rip),%r11 + leaq K_XX_XX+64(%rip),%r11 movl 0(%r8),%eax movl 4(%r8),%ebx @@ -1311,19 +1433,22 @@ _ssse3_shortcut: movl 12(%r8),%edx movl %ebx,%esi movl 16(%r8),%ebp + movl %ecx,%edi + xorl %edx,%edi + andl %edi,%esi movdqa 64(%r11),%xmm6 - movdqa 0(%r11),%xmm9 + movdqa -64(%r11),%xmm9 movdqu 0(%r9),%xmm0 movdqu 16(%r9),%xmm1 movdqu 32(%r9),%xmm2 movdqu 48(%r9),%xmm3 .byte 102,15,56,0,198 - addq $64,%r9 .byte 102,15,56,0,206 .byte 102,15,56,0,214 -.byte 102,15,56,0,222 + addq $64,%r9 paddd %xmm9,%xmm0 +.byte 102,15,56,0,222 paddd %xmm9,%xmm1 paddd %xmm9,%xmm2 movdqa %xmm0,0(%rsp) @@ -1335,904 +1460,882 @@ _ssse3_shortcut: jmp .Loop_ssse3 .align 16 .Loop_ssse3: - movdqa %xmm1,%xmm4 - addl 0(%rsp),%ebp - xorl %edx,%ecx + rorl $2,%ebx + pshufd $238,%xmm0,%xmm4 + xorl %edx,%esi movdqa %xmm3,%xmm8 -.byte 102,15,58,15,224,8 + paddd %xmm3,%xmm9 movl %eax,%edi + addl 0(%rsp),%ebp + punpcklqdq %xmm1,%xmm4 + xorl %ecx,%ebx roll $5,%eax - paddd %xmm3,%xmm9 - andl %ecx,%esi - xorl %edx,%ecx + addl %esi,%ebp psrldq $4,%xmm8 - xorl %edx,%esi - addl %eax,%ebp + andl %ebx,%edi + xorl %ecx,%ebx pxor %xmm0,%xmm4 - rorl $2,%ebx - addl %esi,%ebp + addl %eax,%ebp + rorl $7,%eax pxor %xmm2,%xmm8 - addl 4(%rsp),%edx - xorl %ecx,%ebx + xorl %ecx,%edi movl %ebp,%esi - roll $5,%ebp + addl 4(%rsp),%edx pxor %xmm8,%xmm4 - andl %ebx,%edi - xorl %ecx,%ebx + xorl %ebx,%eax + roll $5,%ebp movdqa %xmm9,48(%rsp) - xorl %ecx,%edi - addl %ebp,%edx - movdqa %xmm4,%xmm10 - movdqa %xmm4,%xmm8 - rorl $7,%eax addl %edi,%edx - addl 8(%rsp),%ecx + andl %eax,%esi + movdqa %xmm4,%xmm10 xorl %ebx,%eax + addl %ebp,%edx + rorl $7,%ebp + movdqa %xmm4,%xmm8 + xorl %ebx,%esi pslldq $12,%xmm10 paddd %xmm4,%xmm4 movl %edx,%edi - roll $5,%edx - andl %eax,%esi - xorl %ebx,%eax + addl 8(%rsp),%ecx psrld $31,%xmm8 - xorl %ebx,%esi - addl %edx,%ecx - movdqa %xmm10,%xmm9 - rorl $7,%ebp + xorl %eax,%ebp + roll $5,%edx addl %esi,%ecx + movdqa %xmm10,%xmm9 + andl %ebp,%edi + xorl %eax,%ebp psrld $30,%xmm10 + addl %edx,%ecx + rorl $7,%edx por %xmm8,%xmm4 - addl 12(%rsp),%ebx - xorl %eax,%ebp + xorl %eax,%edi movl %ecx,%esi - roll $5,%ecx + addl 12(%rsp),%ebx pslld $2,%xmm9 pxor %xmm10,%xmm4 - andl %ebp,%edi - xorl %eax,%ebp - movdqa 0(%r11),%xmm10 - xorl %eax,%edi - addl %ecx,%ebx - pxor %xmm9,%xmm4 - rorl $7,%edx + xorl %ebp,%edx + movdqa -64(%r11),%xmm10 + roll $5,%ecx addl %edi,%ebx - movdqa %xmm2,%xmm5 - addl 16(%rsp),%eax + andl %edx,%esi + pxor %xmm9,%xmm4 xorl %ebp,%edx + addl %ecx,%ebx + rorl $7,%ecx + pshufd $238,%xmm1,%xmm5 + xorl %ebp,%esi movdqa %xmm4,%xmm9 -.byte 102,15,58,15,233,8 + paddd %xmm4,%xmm10 movl %ebx,%edi + addl 16(%rsp),%eax + punpcklqdq %xmm2,%xmm5 + xorl %edx,%ecx roll $5,%ebx - paddd %xmm4,%xmm10 - andl %edx,%esi - xorl %ebp,%edx + addl %esi,%eax psrldq $4,%xmm9 - xorl %ebp,%esi - addl %ebx,%eax + andl %ecx,%edi + xorl %edx,%ecx pxor %xmm1,%xmm5 - rorl $7,%ecx - addl %esi,%eax + addl %ebx,%eax + rorl $7,%ebx pxor %xmm3,%xmm9 - addl 20(%rsp),%ebp - xorl %edx,%ecx + xorl %edx,%edi movl %eax,%esi - roll $5,%eax + addl 20(%rsp),%ebp pxor %xmm9,%xmm5 - andl %ecx,%edi - xorl %edx,%ecx + xorl %ecx,%ebx + roll $5,%eax movdqa %xmm10,0(%rsp) - xorl %edx,%edi - addl %eax,%ebp - movdqa %xmm5,%xmm8 - movdqa %xmm5,%xmm9 - rorl $7,%ebx addl %edi,%ebp - addl 24(%rsp),%edx + andl %ebx,%esi + movdqa %xmm5,%xmm8 xorl %ecx,%ebx + addl %eax,%ebp + rorl $7,%eax + movdqa %xmm5,%xmm9 + xorl %ecx,%esi pslldq $12,%xmm8 paddd %xmm5,%xmm5 movl %ebp,%edi - roll $5,%ebp - andl %ebx,%esi - xorl %ecx,%ebx + addl 24(%rsp),%edx psrld $31,%xmm9 - xorl %ecx,%esi - addl %ebp,%edx - movdqa %xmm8,%xmm10 - rorl $7,%eax + xorl %ebx,%eax + roll $5,%ebp addl %esi,%edx + movdqa %xmm8,%xmm10 + andl %eax,%edi + xorl %ebx,%eax psrld $30,%xmm8 + addl %ebp,%edx + rorl $7,%ebp por %xmm9,%xmm5 - addl 28(%rsp),%ecx - xorl %ebx,%eax + xorl %ebx,%edi movl %edx,%esi - roll $5,%edx + addl 28(%rsp),%ecx pslld $2,%xmm10 pxor %xmm8,%xmm5 - andl %eax,%edi - xorl %ebx,%eax - movdqa 16(%r11),%xmm8 - xorl %ebx,%edi - addl %edx,%ecx - pxor %xmm10,%xmm5 - rorl $7,%ebp + xorl %eax,%ebp + movdqa -32(%r11),%xmm8 + roll $5,%edx addl %edi,%ecx - movdqa %xmm3,%xmm6 - addl 32(%rsp),%ebx + andl %ebp,%esi + pxor %xmm10,%xmm5 xorl %eax,%ebp + addl %edx,%ecx + rorl $7,%edx + pshufd $238,%xmm2,%xmm6 + xorl %eax,%esi movdqa %xmm5,%xmm10 -.byte 102,15,58,15,242,8 + paddd %xmm5,%xmm8 movl %ecx,%edi + addl 32(%rsp),%ebx + punpcklqdq %xmm3,%xmm6 + xorl %ebp,%edx roll $5,%ecx - paddd %xmm5,%xmm8 - andl %ebp,%esi - xorl %eax,%ebp + addl %esi,%ebx psrldq $4,%xmm10 - xorl %eax,%esi - addl %ecx,%ebx + andl %edx,%edi + xorl %ebp,%edx pxor %xmm2,%xmm6 - rorl $7,%edx - addl %esi,%ebx + addl %ecx,%ebx + rorl $7,%ecx pxor %xmm4,%xmm10 - addl 36(%rsp),%eax - xorl %ebp,%edx + xorl %ebp,%edi movl %ebx,%esi - roll $5,%ebx + addl 36(%rsp),%eax pxor %xmm10,%xmm6 - andl %edx,%edi - xorl %ebp,%edx + xorl %edx,%ecx + roll $5,%ebx movdqa %xmm8,16(%rsp) - xorl %ebp,%edi - addl %ebx,%eax - movdqa %xmm6,%xmm9 - movdqa %xmm6,%xmm10 - rorl $7,%ecx addl %edi,%eax - addl 40(%rsp),%ebp + andl %ecx,%esi + movdqa %xmm6,%xmm9 xorl %edx,%ecx + addl %ebx,%eax + rorl $7,%ebx + movdqa %xmm6,%xmm10 + xorl %edx,%esi pslldq $12,%xmm9 paddd %xmm6,%xmm6 movl %eax,%edi - roll $5,%eax - andl %ecx,%esi - xorl %edx,%ecx + addl 40(%rsp),%ebp psrld $31,%xmm10 - xorl %edx,%esi - addl %eax,%ebp - movdqa %xmm9,%xmm8 - rorl $7,%ebx + xorl %ecx,%ebx + roll $5,%eax addl %esi,%ebp + movdqa %xmm9,%xmm8 + andl %ebx,%edi + xorl %ecx,%ebx psrld $30,%xmm9 + addl %eax,%ebp + rorl $7,%eax por %xmm10,%xmm6 - addl 44(%rsp),%edx - xorl %ecx,%ebx + xorl %ecx,%edi movl %ebp,%esi - roll $5,%ebp + addl 44(%rsp),%edx pslld $2,%xmm8 pxor %xmm9,%xmm6 - andl %ebx,%edi - xorl %ecx,%ebx - movdqa 16(%r11),%xmm9 - xorl %ecx,%edi - addl %ebp,%edx - pxor %xmm8,%xmm6 - rorl $7,%eax + xorl %ebx,%eax + movdqa -32(%r11),%xmm9 + roll $5,%ebp addl %edi,%edx - movdqa %xmm4,%xmm7 - addl 48(%rsp),%ecx + andl %eax,%esi + pxor %xmm8,%xmm6 xorl %ebx,%eax + addl %ebp,%edx + rorl $7,%ebp + pshufd $238,%xmm3,%xmm7 + xorl %ebx,%esi movdqa %xmm6,%xmm8 -.byte 102,15,58,15,251,8 + paddd %xmm6,%xmm9 movl %edx,%edi + addl 48(%rsp),%ecx + punpcklqdq %xmm4,%xmm7 + xorl %eax,%ebp roll $5,%edx - paddd %xmm6,%xmm9 - andl %eax,%esi - xorl %ebx,%eax + addl %esi,%ecx psrldq $4,%xmm8 - xorl %ebx,%esi - addl %edx,%ecx + andl %ebp,%edi + xorl %eax,%ebp pxor %xmm3,%xmm7 - rorl $7,%ebp - addl %esi,%ecx + addl %edx,%ecx + rorl $7,%edx pxor %xmm5,%xmm8 - addl 52(%rsp),%ebx - xorl %eax,%ebp + xorl %eax,%edi movl %ecx,%esi - roll $5,%ecx + addl 52(%rsp),%ebx pxor %xmm8,%xmm7 - andl %ebp,%edi - xorl %eax,%ebp + xorl %ebp,%edx + roll $5,%ecx movdqa %xmm9,32(%rsp) - xorl %eax,%edi - addl %ecx,%ebx - movdqa %xmm7,%xmm10 - movdqa %xmm7,%xmm8 - rorl $7,%edx addl %edi,%ebx - addl 56(%rsp),%eax + andl %edx,%esi + movdqa %xmm7,%xmm10 xorl %ebp,%edx + addl %ecx,%ebx + rorl $7,%ecx + movdqa %xmm7,%xmm8 + xorl %ebp,%esi pslldq $12,%xmm10 paddd %xmm7,%xmm7 movl %ebx,%edi - roll $5,%ebx - andl %edx,%esi - xorl %ebp,%edx + addl 56(%rsp),%eax psrld $31,%xmm8 - xorl %ebp,%esi - addl %ebx,%eax - movdqa %xmm10,%xmm9 - rorl $7,%ecx + xorl %edx,%ecx + roll $5,%ebx addl %esi,%eax + movdqa %xmm10,%xmm9 + andl %ecx,%edi + xorl %edx,%ecx psrld $30,%xmm10 + addl %ebx,%eax + rorl $7,%ebx por %xmm8,%xmm7 - addl 60(%rsp),%ebp - xorl %edx,%ecx + xorl %edx,%edi movl %eax,%esi - roll $5,%eax + addl 60(%rsp),%ebp pslld $2,%xmm9 pxor %xmm10,%xmm7 - andl %ecx,%edi - xorl %edx,%ecx - movdqa 16(%r11),%xmm10 - xorl %edx,%edi - addl %eax,%ebp - pxor %xmm9,%xmm7 - rorl $7,%ebx + xorl %ecx,%ebx + movdqa -32(%r11),%xmm10 + roll $5,%eax addl %edi,%ebp - movdqa %xmm7,%xmm9 - addl 0(%rsp),%edx - pxor %xmm4,%xmm0 -.byte 102,68,15,58,15,206,8 + andl %ebx,%esi + pxor %xmm9,%xmm7 + pshufd $238,%xmm6,%xmm9 xorl %ecx,%ebx + addl %eax,%ebp + rorl $7,%eax + pxor %xmm4,%xmm0 + xorl %ecx,%esi movl %ebp,%edi + addl 0(%rsp),%edx + punpcklqdq %xmm7,%xmm9 + xorl %ebx,%eax roll $5,%ebp pxor %xmm1,%xmm0 - andl %ebx,%esi - xorl %ecx,%ebx + addl %esi,%edx + andl %eax,%edi movdqa %xmm10,%xmm8 + xorl %ebx,%eax paddd %xmm7,%xmm10 - xorl %ecx,%esi addl %ebp,%edx pxor %xmm9,%xmm0 - rorl $7,%eax - addl %esi,%edx + rorl $7,%ebp + xorl %ebx,%edi + movl %edx,%esi addl 4(%rsp),%ecx - xorl %ebx,%eax movdqa %xmm0,%xmm9 - movdqa %xmm10,48(%rsp) - movl %edx,%esi + xorl %eax,%ebp roll $5,%edx - andl %eax,%edi - xorl %ebx,%eax + movdqa %xmm10,48(%rsp) + addl %edi,%ecx + andl %ebp,%esi + xorl %eax,%ebp pslld $2,%xmm0 - xorl %ebx,%edi addl %edx,%ecx + rorl $7,%edx psrld $30,%xmm9 - rorl $7,%ebp - addl %edi,%ecx - addl 8(%rsp),%ebx - xorl %eax,%ebp + xorl %eax,%esi movl %ecx,%edi - roll $5,%ecx + addl 8(%rsp),%ebx por %xmm9,%xmm0 - andl %ebp,%esi - xorl %eax,%ebp - movdqa %xmm0,%xmm10 - xorl %eax,%esi - addl %ecx,%ebx - rorl $7,%edx - addl %esi,%ebx - addl 12(%rsp),%eax xorl %ebp,%edx - movl %ebx,%esi - roll $5,%ebx + roll $5,%ecx + pshufd $238,%xmm7,%xmm10 + addl %esi,%ebx andl %edx,%edi xorl %ebp,%edx + addl %ecx,%ebx + addl 12(%rsp),%eax xorl %ebp,%edi - addl %ebx,%eax - rorl $7,%ecx + movl %ebx,%esi + roll $5,%ebx addl %edi,%eax - addl 16(%rsp),%ebp - pxor %xmm5,%xmm1 -.byte 102,68,15,58,15,215,8 xorl %edx,%esi + rorl $7,%ecx + addl %ebx,%eax + pxor %xmm5,%xmm1 + addl 16(%rsp),%ebp + xorl %ecx,%esi + punpcklqdq %xmm0,%xmm10 movl %eax,%edi roll $5,%eax pxor %xmm2,%xmm1 - xorl %ecx,%esi - addl %eax,%ebp + addl %esi,%ebp + xorl %ecx,%edi movdqa %xmm8,%xmm9 - paddd %xmm0,%xmm8 rorl $7,%ebx - addl %esi,%ebp + paddd %xmm0,%xmm8 + addl %eax,%ebp pxor %xmm10,%xmm1 addl 20(%rsp),%edx - xorl %ecx,%edi + xorl %ebx,%edi movl %ebp,%esi roll $5,%ebp movdqa %xmm1,%xmm10 + addl %edi,%edx + xorl %ebx,%esi movdqa %xmm8,0(%rsp) - xorl %ebx,%edi - addl %ebp,%edx rorl $7,%eax - addl %edi,%edx - pslld $2,%xmm1 + addl %ebp,%edx addl 24(%rsp),%ecx - xorl %ebx,%esi - psrld $30,%xmm10 + pslld $2,%xmm1 + xorl %eax,%esi movl %edx,%edi + psrld $30,%xmm10 roll $5,%edx - xorl %eax,%esi - addl %edx,%ecx - rorl $7,%ebp addl %esi,%ecx + xorl %eax,%edi + rorl $7,%ebp por %xmm10,%xmm1 + addl %edx,%ecx addl 28(%rsp),%ebx - xorl %eax,%edi - movdqa %xmm1,%xmm8 + pshufd $238,%xmm0,%xmm8 + xorl %ebp,%edi movl %ecx,%esi roll $5,%ecx - xorl %ebp,%edi - addl %ecx,%ebx - rorl $7,%edx addl %edi,%ebx - addl 32(%rsp),%eax - pxor %xmm6,%xmm2 -.byte 102,68,15,58,15,192,8 xorl %ebp,%esi + rorl $7,%edx + addl %ecx,%ebx + pxor %xmm6,%xmm2 + addl 32(%rsp),%eax + xorl %edx,%esi + punpcklqdq %xmm1,%xmm8 movl %ebx,%edi roll $5,%ebx pxor %xmm3,%xmm2 - xorl %edx,%esi - addl %ebx,%eax - movdqa 32(%r11),%xmm10 - paddd %xmm1,%xmm9 - rorl $7,%ecx addl %esi,%eax + xorl %edx,%edi + movdqa 0(%r11),%xmm10 + rorl $7,%ecx + paddd %xmm1,%xmm9 + addl %ebx,%eax pxor %xmm8,%xmm2 addl 36(%rsp),%ebp - xorl %edx,%edi + xorl %ecx,%edi movl %eax,%esi roll $5,%eax movdqa %xmm2,%xmm8 + addl %edi,%ebp + xorl %ecx,%esi movdqa %xmm9,16(%rsp) - xorl %ecx,%edi - addl %eax,%ebp rorl $7,%ebx - addl %edi,%ebp - pslld $2,%xmm2 + addl %eax,%ebp addl 40(%rsp),%edx - xorl %ecx,%esi - psrld $30,%xmm8 + pslld $2,%xmm2 + xorl %ebx,%esi movl %ebp,%edi + psrld $30,%xmm8 roll $5,%ebp - xorl %ebx,%esi - addl %ebp,%edx - rorl $7,%eax addl %esi,%edx + xorl %ebx,%edi + rorl $7,%eax por %xmm8,%xmm2 + addl %ebp,%edx addl 44(%rsp),%ecx - xorl %ebx,%edi - movdqa %xmm2,%xmm9 + pshufd $238,%xmm1,%xmm9 + xorl %eax,%edi movl %edx,%esi roll $5,%edx - xorl %eax,%edi - addl %edx,%ecx - rorl $7,%ebp addl %edi,%ecx - addl 48(%rsp),%ebx - pxor %xmm7,%xmm3 -.byte 102,68,15,58,15,201,8 xorl %eax,%esi + rorl $7,%ebp + addl %edx,%ecx + pxor %xmm7,%xmm3 + addl 48(%rsp),%ebx + xorl %ebp,%esi + punpcklqdq %xmm2,%xmm9 movl %ecx,%edi roll $5,%ecx pxor %xmm4,%xmm3 - xorl %ebp,%esi - addl %ecx,%ebx + addl %esi,%ebx + xorl %ebp,%edi movdqa %xmm10,%xmm8 - paddd %xmm2,%xmm10 rorl $7,%edx - addl %esi,%ebx + paddd %xmm2,%xmm10 + addl %ecx,%ebx pxor %xmm9,%xmm3 addl 52(%rsp),%eax - xorl %ebp,%edi + xorl %edx,%edi movl %ebx,%esi roll $5,%ebx movdqa %xmm3,%xmm9 + addl %edi,%eax + xorl %edx,%esi movdqa %xmm10,32(%rsp) - xorl %edx,%edi - addl %ebx,%eax rorl $7,%ecx - addl %edi,%eax - pslld $2,%xmm3 + addl %ebx,%eax addl 56(%rsp),%ebp - xorl %edx,%esi - psrld $30,%xmm9 + pslld $2,%xmm3 + xorl %ecx,%esi movl %eax,%edi + psrld $30,%xmm9 roll $5,%eax - xorl %ecx,%esi - addl %eax,%ebp - rorl $7,%ebx addl %esi,%ebp + xorl %ecx,%edi + rorl $7,%ebx por %xmm9,%xmm3 + addl %eax,%ebp addl 60(%rsp),%edx - xorl %ecx,%edi - movdqa %xmm3,%xmm10 + pshufd $238,%xmm2,%xmm10 + xorl %ebx,%edi movl %ebp,%esi roll $5,%ebp - xorl %ebx,%edi - addl %ebp,%edx - rorl $7,%eax addl %edi,%edx - addl 0(%rsp),%ecx - pxor %xmm0,%xmm4 -.byte 102,68,15,58,15,210,8 xorl %ebx,%esi + rorl $7,%eax + addl %ebp,%edx + pxor %xmm0,%xmm4 + addl 0(%rsp),%ecx + xorl %eax,%esi + punpcklqdq %xmm3,%xmm10 movl %edx,%edi roll $5,%edx pxor %xmm5,%xmm4 - xorl %eax,%esi - addl %edx,%ecx + addl %esi,%ecx + xorl %eax,%edi movdqa %xmm8,%xmm9 - paddd %xmm3,%xmm8 rorl $7,%ebp - addl %esi,%ecx + paddd %xmm3,%xmm8 + addl %edx,%ecx pxor %xmm10,%xmm4 addl 4(%rsp),%ebx - xorl %eax,%edi + xorl %ebp,%edi movl %ecx,%esi roll $5,%ecx movdqa %xmm4,%xmm10 + addl %edi,%ebx + xorl %ebp,%esi movdqa %xmm8,48(%rsp) - xorl %ebp,%edi - addl %ecx,%ebx rorl $7,%edx - addl %edi,%ebx - pslld $2,%xmm4 + addl %ecx,%ebx addl 8(%rsp),%eax - xorl %ebp,%esi - psrld $30,%xmm10 + pslld $2,%xmm4 + xorl %edx,%esi movl %ebx,%edi + psrld $30,%xmm10 roll $5,%ebx - xorl %edx,%esi - addl %ebx,%eax - rorl $7,%ecx addl %esi,%eax + xorl %edx,%edi + rorl $7,%ecx por %xmm10,%xmm4 + addl %ebx,%eax addl 12(%rsp),%ebp - xorl %edx,%edi - movdqa %xmm4,%xmm8 + pshufd $238,%xmm3,%xmm8 + xorl %ecx,%edi movl %eax,%esi roll $5,%eax - xorl %ecx,%edi - addl %eax,%ebp - rorl $7,%ebx addl %edi,%ebp - addl 16(%rsp),%edx - pxor %xmm1,%xmm5 -.byte 102,68,15,58,15,195,8 xorl %ecx,%esi + rorl $7,%ebx + addl %eax,%ebp + pxor %xmm1,%xmm5 + addl 16(%rsp),%edx + xorl %ebx,%esi + punpcklqdq %xmm4,%xmm8 movl %ebp,%edi roll $5,%ebp pxor %xmm6,%xmm5 - xorl %ebx,%esi - addl %ebp,%edx + addl %esi,%edx + xorl %ebx,%edi movdqa %xmm9,%xmm10 - paddd %xmm4,%xmm9 rorl $7,%eax - addl %esi,%edx + paddd %xmm4,%xmm9 + addl %ebp,%edx pxor %xmm8,%xmm5 addl 20(%rsp),%ecx - xorl %ebx,%edi + xorl %eax,%edi movl %edx,%esi roll $5,%edx movdqa %xmm5,%xmm8 + addl %edi,%ecx + xorl %eax,%esi movdqa %xmm9,0(%rsp) - xorl %eax,%edi - addl %edx,%ecx rorl $7,%ebp - addl %edi,%ecx - pslld $2,%xmm5 + addl %edx,%ecx addl 24(%rsp),%ebx - xorl %eax,%esi - psrld $30,%xmm8 + pslld $2,%xmm5 + xorl %ebp,%esi movl %ecx,%edi + psrld $30,%xmm8 roll $5,%ecx - xorl %ebp,%esi - addl %ecx,%ebx - rorl $7,%edx addl %esi,%ebx + xorl %ebp,%edi + rorl $7,%edx por %xmm8,%xmm5 + addl %ecx,%ebx addl 28(%rsp),%eax - xorl %ebp,%edi - movdqa %xmm5,%xmm9 + pshufd $238,%xmm4,%xmm9 + rorl $7,%ecx movl %ebx,%esi - roll $5,%ebx xorl %edx,%edi - addl %ebx,%eax - rorl $7,%ecx + roll $5,%ebx addl %edi,%eax - movl %ecx,%edi - pxor %xmm2,%xmm6 -.byte 102,68,15,58,15,204,8 + xorl %ecx,%esi xorl %edx,%ecx + addl %ebx,%eax + pxor %xmm2,%xmm6 addl 32(%rsp),%ebp - andl %edx,%edi - pxor %xmm7,%xmm6 andl %ecx,%esi + xorl %edx,%ecx rorl $7,%ebx - movdqa %xmm10,%xmm8 - paddd %xmm5,%xmm10 - addl %edi,%ebp + punpcklqdq %xmm5,%xmm9 movl %eax,%edi - pxor %xmm9,%xmm6 + xorl %ecx,%esi + pxor %xmm7,%xmm6 roll $5,%eax addl %esi,%ebp - xorl %edx,%ecx - addl %eax,%ebp - movdqa %xmm6,%xmm9 - movdqa %xmm10,16(%rsp) - movl %ebx,%esi + movdqa %xmm10,%xmm8 + xorl %ebx,%edi + paddd %xmm5,%xmm10 xorl %ecx,%ebx + pxor %xmm9,%xmm6 + addl %eax,%ebp addl 36(%rsp),%edx - andl %ecx,%esi - pslld $2,%xmm6 andl %ebx,%edi + xorl %ecx,%ebx rorl $7,%eax - psrld $30,%xmm9 - addl %esi,%edx + movdqa %xmm6,%xmm9 movl %ebp,%esi + xorl %ebx,%edi + movdqa %xmm10,16(%rsp) roll $5,%ebp addl %edi,%edx - xorl %ecx,%ebx - addl %ebp,%edx - por %xmm9,%xmm6 - movl %eax,%edi + xorl %eax,%esi + pslld $2,%xmm6 xorl %ebx,%eax - movdqa %xmm6,%xmm10 + addl %ebp,%edx + psrld $30,%xmm9 addl 40(%rsp),%ecx - andl %ebx,%edi andl %eax,%esi + xorl %ebx,%eax + por %xmm9,%xmm6 rorl $7,%ebp - addl %edi,%ecx movl %edx,%edi + xorl %eax,%esi roll $5,%edx + pshufd $238,%xmm5,%xmm10 addl %esi,%ecx - xorl %ebx,%eax - addl %edx,%ecx - movl %ebp,%esi + xorl %ebp,%edi xorl %eax,%ebp + addl %edx,%ecx addl 44(%rsp),%ebx - andl %eax,%esi andl %ebp,%edi + xorl %eax,%ebp rorl $7,%edx - addl %esi,%ebx movl %ecx,%esi + xorl %ebp,%edi roll $5,%ecx addl %edi,%ebx - xorl %eax,%ebp + xorl %edx,%esi + xorl %ebp,%edx addl %ecx,%ebx - movl %edx,%edi pxor %xmm3,%xmm7 -.byte 102,68,15,58,15,213,8 - xorl %ebp,%edx addl 48(%rsp),%eax - andl %ebp,%edi - pxor %xmm0,%xmm7 andl %edx,%esi + xorl %ebp,%edx rorl $7,%ecx - movdqa 48(%r11),%xmm9 - paddd %xmm6,%xmm8 - addl %edi,%eax + punpcklqdq %xmm6,%xmm10 movl %ebx,%edi - pxor %xmm10,%xmm7 + xorl %edx,%esi + pxor %xmm0,%xmm7 roll $5,%ebx addl %esi,%eax - xorl %ebp,%edx - addl %ebx,%eax - movdqa %xmm7,%xmm10 - movdqa %xmm8,32(%rsp) - movl %ecx,%esi + movdqa 32(%r11),%xmm9 + xorl %ecx,%edi + paddd %xmm6,%xmm8 xorl %edx,%ecx + pxor %xmm10,%xmm7 + addl %ebx,%eax addl 52(%rsp),%ebp - andl %edx,%esi - pslld $2,%xmm7 andl %ecx,%edi + xorl %edx,%ecx rorl $7,%ebx - psrld $30,%xmm10 - addl %esi,%ebp + movdqa %xmm7,%xmm10 movl %eax,%esi + xorl %ecx,%edi + movdqa %xmm8,32(%rsp) roll $5,%eax addl %edi,%ebp - xorl %edx,%ecx - addl %eax,%ebp - por %xmm10,%xmm7 - movl %ebx,%edi + xorl %ebx,%esi + pslld $2,%xmm7 xorl %ecx,%ebx - movdqa %xmm7,%xmm8 + addl %eax,%ebp + psrld $30,%xmm10 addl 56(%rsp),%edx - andl %ecx,%edi andl %ebx,%esi + xorl %ecx,%ebx + por %xmm10,%xmm7 rorl $7,%eax - addl %edi,%edx movl %ebp,%edi + xorl %ebx,%esi roll $5,%ebp + pshufd $238,%xmm6,%xmm8 addl %esi,%edx - xorl %ecx,%ebx - addl %ebp,%edx - movl %eax,%esi + xorl %eax,%edi xorl %ebx,%eax + addl %ebp,%edx addl 60(%rsp),%ecx - andl %ebx,%esi andl %eax,%edi + xorl %ebx,%eax rorl $7,%ebp - addl %esi,%ecx movl %edx,%esi + xorl %eax,%edi roll $5,%edx addl %edi,%ecx - xorl %ebx,%eax + xorl %ebp,%esi + xorl %eax,%ebp addl %edx,%ecx - movl %ebp,%edi pxor %xmm4,%xmm0 -.byte 102,68,15,58,15,198,8 - xorl %eax,%ebp addl 0(%rsp),%ebx - andl %eax,%edi - pxor %xmm1,%xmm0 andl %ebp,%esi + xorl %eax,%ebp rorl $7,%edx - movdqa %xmm9,%xmm10 - paddd %xmm7,%xmm9 - addl %edi,%ebx + punpcklqdq %xmm7,%xmm8 movl %ecx,%edi - pxor %xmm8,%xmm0 + xorl %ebp,%esi + pxor %xmm1,%xmm0 roll $5,%ecx addl %esi,%ebx - xorl %eax,%ebp - addl %ecx,%ebx - movdqa %xmm0,%xmm8 - movdqa %xmm9,48(%rsp) - movl %edx,%esi + movdqa %xmm9,%xmm10 + xorl %edx,%edi + paddd %xmm7,%xmm9 xorl %ebp,%edx + pxor %xmm8,%xmm0 + addl %ecx,%ebx addl 4(%rsp),%eax - andl %ebp,%esi - pslld $2,%xmm0 andl %edx,%edi + xorl %ebp,%edx rorl $7,%ecx - psrld $30,%xmm8 - addl %esi,%eax + movdqa %xmm0,%xmm8 movl %ebx,%esi + xorl %edx,%edi + movdqa %xmm9,48(%rsp) roll $5,%ebx addl %edi,%eax - xorl %ebp,%edx - addl %ebx,%eax - por %xmm8,%xmm0 - movl %ecx,%edi + xorl %ecx,%esi + pslld $2,%xmm0 xorl %edx,%ecx - movdqa %xmm0,%xmm9 + addl %ebx,%eax + psrld $30,%xmm8 addl 8(%rsp),%ebp - andl %edx,%edi andl %ecx,%esi + xorl %edx,%ecx + por %xmm8,%xmm0 rorl $7,%ebx - addl %edi,%ebp movl %eax,%edi + xorl %ecx,%esi roll $5,%eax + pshufd $238,%xmm7,%xmm9 addl %esi,%ebp - xorl %edx,%ecx - addl %eax,%ebp - movl %ebx,%esi + xorl %ebx,%edi xorl %ecx,%ebx + addl %eax,%ebp addl 12(%rsp),%edx - andl %ecx,%esi andl %ebx,%edi + xorl %ecx,%ebx rorl $7,%eax - addl %esi,%edx movl %ebp,%esi + xorl %ebx,%edi roll $5,%ebp addl %edi,%edx - xorl %ecx,%ebx + xorl %eax,%esi + xorl %ebx,%eax addl %ebp,%edx - movl %eax,%edi pxor %xmm5,%xmm1 -.byte 102,68,15,58,15,207,8 - xorl %ebx,%eax addl 16(%rsp),%ecx - andl %ebx,%edi - pxor %xmm2,%xmm1 andl %eax,%esi + xorl %ebx,%eax rorl $7,%ebp - movdqa %xmm10,%xmm8 - paddd %xmm0,%xmm10 - addl %edi,%ecx + punpcklqdq %xmm0,%xmm9 movl %edx,%edi - pxor %xmm9,%xmm1 + xorl %eax,%esi + pxor %xmm2,%xmm1 roll $5,%edx addl %esi,%ecx - xorl %ebx,%eax - addl %edx,%ecx - movdqa %xmm1,%xmm9 - movdqa %xmm10,0(%rsp) - movl %ebp,%esi + movdqa %xmm10,%xmm8 + xorl %ebp,%edi + paddd %xmm0,%xmm10 xorl %eax,%ebp + pxor %xmm9,%xmm1 + addl %edx,%ecx addl 20(%rsp),%ebx - andl %eax,%esi - pslld $2,%xmm1 andl %ebp,%edi + xorl %eax,%ebp rorl $7,%edx - psrld $30,%xmm9 - addl %esi,%ebx + movdqa %xmm1,%xmm9 movl %ecx,%esi + xorl %ebp,%edi + movdqa %xmm10,0(%rsp) roll $5,%ecx addl %edi,%ebx - xorl %eax,%ebp - addl %ecx,%ebx - por %xmm9,%xmm1 - movl %edx,%edi + xorl %edx,%esi + pslld $2,%xmm1 xorl %ebp,%edx - movdqa %xmm1,%xmm10 + addl %ecx,%ebx + psrld $30,%xmm9 addl 24(%rsp),%eax - andl %ebp,%edi andl %edx,%esi + xorl %ebp,%edx + por %xmm9,%xmm1 rorl $7,%ecx - addl %edi,%eax movl %ebx,%edi + xorl %edx,%esi roll $5,%ebx + pshufd $238,%xmm0,%xmm10 addl %esi,%eax - xorl %ebp,%edx - addl %ebx,%eax - movl %ecx,%esi + xorl %ecx,%edi xorl %edx,%ecx + addl %ebx,%eax addl 28(%rsp),%ebp - andl %edx,%esi andl %ecx,%edi + xorl %edx,%ecx rorl $7,%ebx - addl %esi,%ebp movl %eax,%esi + xorl %ecx,%edi roll $5,%eax addl %edi,%ebp - xorl %edx,%ecx + xorl %ebx,%esi + xorl %ecx,%ebx addl %eax,%ebp - movl %ebx,%edi pxor %xmm6,%xmm2 -.byte 102,68,15,58,15,208,8 - xorl %ecx,%ebx addl 32(%rsp),%edx - andl %ecx,%edi - pxor %xmm3,%xmm2 andl %ebx,%esi + xorl %ecx,%ebx rorl $7,%eax - movdqa %xmm8,%xmm9 - paddd %xmm1,%xmm8 - addl %edi,%edx + punpcklqdq %xmm1,%xmm10 movl %ebp,%edi - pxor %xmm10,%xmm2 + xorl %ebx,%esi + pxor %xmm3,%xmm2 roll $5,%ebp addl %esi,%edx - xorl %ecx,%ebx - addl %ebp,%edx - movdqa %xmm2,%xmm10 - movdqa %xmm8,16(%rsp) - movl %eax,%esi + movdqa %xmm8,%xmm9 + xorl %eax,%edi + paddd %xmm1,%xmm8 xorl %ebx,%eax + pxor %xmm10,%xmm2 + addl %ebp,%edx addl 36(%rsp),%ecx - andl %ebx,%esi - pslld $2,%xmm2 andl %eax,%edi + xorl %ebx,%eax rorl $7,%ebp - psrld $30,%xmm10 - addl %esi,%ecx + movdqa %xmm2,%xmm10 movl %edx,%esi + xorl %eax,%edi + movdqa %xmm8,16(%rsp) roll $5,%edx addl %edi,%ecx - xorl %ebx,%eax - addl %edx,%ecx - por %xmm10,%xmm2 - movl %ebp,%edi + xorl %ebp,%esi + pslld $2,%xmm2 xorl %eax,%ebp - movdqa %xmm2,%xmm8 + addl %edx,%ecx + psrld $30,%xmm10 addl 40(%rsp),%ebx - andl %eax,%edi andl %ebp,%esi + xorl %eax,%ebp + por %xmm10,%xmm2 rorl $7,%edx - addl %edi,%ebx movl %ecx,%edi + xorl %ebp,%esi roll $5,%ecx + pshufd $238,%xmm1,%xmm8 addl %esi,%ebx - xorl %eax,%ebp - addl %ecx,%ebx - movl %edx,%esi + xorl %edx,%edi xorl %ebp,%edx + addl %ecx,%ebx addl 44(%rsp),%eax - andl %ebp,%esi andl %edx,%edi + xorl %ebp,%edx rorl $7,%ecx - addl %esi,%eax movl %ebx,%esi + xorl %edx,%edi roll $5,%ebx addl %edi,%eax - xorl %ebp,%edx + xorl %edx,%esi addl %ebx,%eax - addl 48(%rsp),%ebp pxor %xmm7,%xmm3 -.byte 102,68,15,58,15,193,8 - xorl %edx,%esi + addl 48(%rsp),%ebp + xorl %ecx,%esi + punpcklqdq %xmm2,%xmm8 movl %eax,%edi roll $5,%eax pxor %xmm4,%xmm3 - xorl %ecx,%esi - addl %eax,%ebp + addl %esi,%ebp + xorl %ecx,%edi movdqa %xmm9,%xmm10 - paddd %xmm2,%xmm9 rorl $7,%ebx - addl %esi,%ebp + paddd %xmm2,%xmm9 + addl %eax,%ebp pxor %xmm8,%xmm3 addl 52(%rsp),%edx - xorl %ecx,%edi + xorl %ebx,%edi movl %ebp,%esi roll $5,%ebp movdqa %xmm3,%xmm8 + addl %edi,%edx + xorl %ebx,%esi movdqa %xmm9,32(%rsp) - xorl %ebx,%edi - addl %ebp,%edx rorl $7,%eax - addl %edi,%edx - pslld $2,%xmm3 + addl %ebp,%edx addl 56(%rsp),%ecx - xorl %ebx,%esi - psrld $30,%xmm8 + pslld $2,%xmm3 + xorl %eax,%esi movl %edx,%edi + psrld $30,%xmm8 roll $5,%edx - xorl %eax,%esi - addl %edx,%ecx - rorl $7,%ebp addl %esi,%ecx + xorl %eax,%edi + rorl $7,%ebp por %xmm8,%xmm3 + addl %edx,%ecx addl 60(%rsp),%ebx - xorl %eax,%edi + xorl %ebp,%edi movl %ecx,%esi roll $5,%ecx - xorl %ebp,%edi - addl %ecx,%ebx - rorl $7,%edx addl %edi,%ebx - addl 0(%rsp),%eax - paddd %xmm3,%xmm10 xorl %ebp,%esi + rorl $7,%edx + addl %ecx,%ebx + addl 0(%rsp),%eax + xorl %edx,%esi movl %ebx,%edi roll $5,%ebx - xorl %edx,%esi + paddd %xmm3,%xmm10 + addl %esi,%eax + xorl %edx,%edi movdqa %xmm10,48(%rsp) - addl %ebx,%eax rorl $7,%ecx - addl %esi,%eax + addl %ebx,%eax addl 4(%rsp),%ebp - xorl %edx,%edi + xorl %ecx,%edi movl %eax,%esi roll $5,%eax - xorl %ecx,%edi - addl %eax,%ebp - rorl $7,%ebx addl %edi,%ebp - addl 8(%rsp),%edx xorl %ecx,%esi + rorl $7,%ebx + addl %eax,%ebp + addl 8(%rsp),%edx + xorl %ebx,%esi movl %ebp,%edi roll $5,%ebp - xorl %ebx,%esi - addl %ebp,%edx - rorl $7,%eax addl %esi,%edx - addl 12(%rsp),%ecx xorl %ebx,%edi + rorl $7,%eax + addl %ebp,%edx + addl 12(%rsp),%ecx + xorl %eax,%edi movl %edx,%esi roll $5,%edx - xorl %eax,%edi - addl %edx,%ecx - rorl $7,%ebp addl %edi,%ecx + xorl %eax,%esi + rorl $7,%ebp + addl %edx,%ecx cmpq %r10,%r9 je .Ldone_ssse3 movdqa 64(%r11),%xmm6 - movdqa 0(%r11),%xmm9 + movdqa -64(%r11),%xmm9 movdqu 0(%r9),%xmm0 movdqu 16(%r9),%xmm1 movdqu 32(%r9),%xmm2 @@ -2240,113 +2343,112 @@ _ssse3_shortcut: .byte 102,15,56,0,198 addq $64,%r9 addl 16(%rsp),%ebx - xorl %eax,%esi -.byte 102,15,56,0,206 + xorl %ebp,%esi movl %ecx,%edi +.byte 102,15,56,0,206 roll $5,%ecx + addl %esi,%ebx + xorl %ebp,%edi + rorl $7,%edx paddd %xmm9,%xmm0 - xorl %ebp,%esi addl %ecx,%ebx - rorl $7,%edx - addl %esi,%ebx - movdqa %xmm0,0(%rsp) addl 20(%rsp),%eax - xorl %ebp,%edi - psubd %xmm9,%xmm0 + xorl %edx,%edi movl %ebx,%esi + movdqa %xmm0,0(%rsp) roll $5,%ebx - xorl %edx,%edi - addl %ebx,%eax - rorl $7,%ecx addl %edi,%eax - addl 24(%rsp),%ebp xorl %edx,%esi + rorl $7,%ecx + psubd %xmm9,%xmm0 + addl %ebx,%eax + addl 24(%rsp),%ebp + xorl %ecx,%esi movl %eax,%edi roll $5,%eax - xorl %ecx,%esi - addl %eax,%ebp - rorl $7,%ebx addl %esi,%ebp - addl 28(%rsp),%edx xorl %ecx,%edi + rorl $7,%ebx + addl %eax,%ebp + addl 28(%rsp),%edx + xorl %ebx,%edi movl %ebp,%esi roll $5,%ebp - xorl %ebx,%edi - addl %ebp,%edx - rorl $7,%eax addl %edi,%edx - addl 32(%rsp),%ecx xorl %ebx,%esi -.byte 102,15,56,0,214 + rorl $7,%eax + addl %ebp,%edx + addl 32(%rsp),%ecx + xorl %eax,%esi movl %edx,%edi +.byte 102,15,56,0,214 roll $5,%edx - paddd %xmm9,%xmm1 - xorl %eax,%esi - addl %edx,%ecx - rorl $7,%ebp addl %esi,%ecx - movdqa %xmm1,16(%rsp) - addl 36(%rsp),%ebx xorl %eax,%edi - psubd %xmm9,%xmm1 + rorl $7,%ebp + paddd %xmm9,%xmm1 + addl %edx,%ecx + addl 36(%rsp),%ebx + xorl %ebp,%edi movl %ecx,%esi + movdqa %xmm1,16(%rsp) roll $5,%ecx - xorl %ebp,%edi - addl %ecx,%ebx - rorl $7,%edx addl %edi,%ebx - addl 40(%rsp),%eax xorl %ebp,%esi + rorl $7,%edx + psubd %xmm9,%xmm1 + addl %ecx,%ebx + addl 40(%rsp),%eax + xorl %edx,%esi movl %ebx,%edi roll $5,%ebx - xorl %edx,%esi - addl %ebx,%eax - rorl $7,%ecx addl %esi,%eax - addl 44(%rsp),%ebp xorl %edx,%edi + rorl $7,%ecx + addl %ebx,%eax + addl 44(%rsp),%ebp + xorl %ecx,%edi movl %eax,%esi roll $5,%eax - xorl %ecx,%edi - addl %eax,%ebp - rorl $7,%ebx addl %edi,%ebp - addl 48(%rsp),%edx xorl %ecx,%esi -.byte 102,15,56,0,222 + rorl $7,%ebx + addl %eax,%ebp + addl 48(%rsp),%edx + xorl %ebx,%esi movl %ebp,%edi +.byte 102,15,56,0,222 roll $5,%ebp + addl %esi,%edx + xorl %ebx,%edi + rorl $7,%eax paddd %xmm9,%xmm2 - xorl %ebx,%esi addl %ebp,%edx - rorl $7,%eax - addl %esi,%edx - movdqa %xmm2,32(%rsp) addl 52(%rsp),%ecx - xorl %ebx,%edi - psubd %xmm9,%xmm2 + xorl %eax,%edi movl %edx,%esi + movdqa %xmm2,32(%rsp) roll $5,%edx - xorl %eax,%edi - addl %edx,%ecx - rorl $7,%ebp addl %edi,%ecx - addl 56(%rsp),%ebx xorl %eax,%esi + rorl $7,%ebp + psubd %xmm9,%xmm2 + addl %edx,%ecx + addl 56(%rsp),%ebx + xorl %ebp,%esi movl %ecx,%edi roll $5,%ecx - xorl %ebp,%esi - addl %ecx,%ebx - rorl $7,%edx addl %esi,%ebx - addl 60(%rsp),%eax xorl %ebp,%edi + rorl $7,%edx + addl %ecx,%ebx + addl 60(%rsp),%eax + xorl %edx,%edi movl %ebx,%esi roll $5,%ebx - xorl %edx,%edi - addl %ebx,%eax - rorl $7,%ecx addl %edi,%eax + rorl $7,%ecx + addl %ebx,%eax addl 0(%r8),%eax addl 4(%r8),%esi addl 8(%r8),%ecx @@ -2356,108 +2458,110 @@ _ssse3_shortcut: movl %esi,4(%r8) movl %esi,%ebx movl %ecx,8(%r8) + movl %ecx,%edi movl %edx,12(%r8) + xorl %edx,%edi movl %ebp,16(%r8) + andl %edi,%esi jmp .Loop_ssse3 .align 16 .Ldone_ssse3: addl 16(%rsp),%ebx - xorl %eax,%esi + xorl %ebp,%esi movl %ecx,%edi roll $5,%ecx - xorl %ebp,%esi - addl %ecx,%ebx - rorl $7,%edx addl %esi,%ebx - addl 20(%rsp),%eax xorl %ebp,%edi + rorl $7,%edx + addl %ecx,%ebx + addl 20(%rsp),%eax + xorl %edx,%edi movl %ebx,%esi roll $5,%ebx - xorl %edx,%edi - addl %ebx,%eax - rorl $7,%ecx addl %edi,%eax - addl 24(%rsp),%ebp xorl %edx,%esi + rorl $7,%ecx + addl %ebx,%eax + addl 24(%rsp),%ebp + xorl %ecx,%esi movl %eax,%edi roll $5,%eax - xorl %ecx,%esi - addl %eax,%ebp - rorl $7,%ebx addl %esi,%ebp - addl 28(%rsp),%edx xorl %ecx,%edi + rorl $7,%ebx + addl %eax,%ebp + addl 28(%rsp),%edx + xorl %ebx,%edi movl %ebp,%esi roll $5,%ebp - xorl %ebx,%edi - addl %ebp,%edx - rorl $7,%eax addl %edi,%edx - addl 32(%rsp),%ecx xorl %ebx,%esi + rorl $7,%eax + addl %ebp,%edx + addl 32(%rsp),%ecx + xorl %eax,%esi movl %edx,%edi roll $5,%edx - xorl %eax,%esi - addl %edx,%ecx - rorl $7,%ebp addl %esi,%ecx - addl 36(%rsp),%ebx xorl %eax,%edi + rorl $7,%ebp + addl %edx,%ecx + addl 36(%rsp),%ebx + xorl %ebp,%edi movl %ecx,%esi roll $5,%ecx - xorl %ebp,%edi - addl %ecx,%ebx - rorl $7,%edx addl %edi,%ebx - addl 40(%rsp),%eax xorl %ebp,%esi + rorl $7,%edx + addl %ecx,%ebx + addl 40(%rsp),%eax + xorl %edx,%esi movl %ebx,%edi roll $5,%ebx - xorl %edx,%esi - addl %ebx,%eax - rorl $7,%ecx addl %esi,%eax - addl 44(%rsp),%ebp xorl %edx,%edi + rorl $7,%ecx + addl %ebx,%eax + addl 44(%rsp),%ebp + xorl %ecx,%edi movl %eax,%esi roll $5,%eax - xorl %ecx,%edi - addl %eax,%ebp - rorl $7,%ebx addl %edi,%ebp - addl 48(%rsp),%edx xorl %ecx,%esi + rorl $7,%ebx + addl %eax,%ebp + addl 48(%rsp),%edx + xorl %ebx,%esi movl %ebp,%edi roll $5,%ebp - xorl %ebx,%esi - addl %ebp,%edx - rorl $7,%eax addl %esi,%edx - addl 52(%rsp),%ecx xorl %ebx,%edi + rorl $7,%eax + addl %ebp,%edx + addl 52(%rsp),%ecx + xorl %eax,%edi movl %edx,%esi roll $5,%edx - xorl %eax,%edi - addl %edx,%ecx - rorl $7,%ebp addl %edi,%ecx - addl 56(%rsp),%ebx xorl %eax,%esi + rorl $7,%ebp + addl %edx,%ecx + addl 56(%rsp),%ebx + xorl %ebp,%esi movl %ecx,%edi roll $5,%ecx - xorl %ebp,%esi - addl %ecx,%ebx - rorl $7,%edx addl %esi,%ebx - addl 60(%rsp),%eax xorl %ebp,%edi + rorl $7,%edx + addl %ecx,%ebx + addl 60(%rsp),%eax + xorl %edx,%edi movl %ebx,%esi roll $5,%ebx - xorl %edx,%edi - addl %ebx,%eax - rorl $7,%ecx addl %edi,%eax + rorl $7,%ecx + addl %ebx,%eax addl 0(%r8),%eax addl 4(%r8),%esi addl 8(%r8),%ecx @@ -2468,25 +2572,2825 @@ _ssse3_shortcut: movl %ecx,8(%r8) movl %edx,12(%r8) movl %ebp,16(%r8) - leaq 64(%rsp),%rsi - movq 0(%rsi),%r12 - movq 8(%rsi),%rbp - movq 16(%rsi),%rbx - leaq 24(%rsi),%rsp + leaq (%r14),%rsi + movq -40(%rsi),%r14 + movq -32(%rsi),%r13 + movq -24(%rsi),%r12 + movq -16(%rsi),%rbp + movq -8(%rsi),%rbx + leaq (%rsi),%rsp .Lepilogue_ssse3: .byte 0xf3,0xc3 .size sha1_block_data_order_ssse3,.-sha1_block_data_order_ssse3 -.align 64 -K_XX_XX: -.long 0x5a827999,0x5a827999,0x5a827999,0x5a827999 - -.long 0x6ed9eba1,0x6ed9eba1,0x6ed9eba1,0x6ed9eba1 - -.long 0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc +.type sha1_block_data_order_avx,@function +.align 16 +sha1_block_data_order_avx: +_avx_shortcut: + movq %rsp,%rax + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + leaq -64(%rsp),%rsp + vzeroupper + movq %rax,%r14 + andq $-64,%rsp + movq %rdi,%r8 + movq %rsi,%r9 + movq %rdx,%r10 -.long 0xca62c1d6,0xca62c1d6,0xca62c1d6,0xca62c1d6 + shlq $6,%r10 + addq %r9,%r10 + leaq K_XX_XX+64(%rip),%r11 -.long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f + movl 0(%r8),%eax + movl 4(%r8),%ebx + movl 8(%r8),%ecx + movl 12(%r8),%edx + movl %ebx,%esi + movl 16(%r8),%ebp + movl %ecx,%edi + xorl %edx,%edi + andl %edi,%esi + vmovdqa 64(%r11),%xmm6 + vmovdqa -64(%r11),%xmm11 + vmovdqu 0(%r9),%xmm0 + vmovdqu 16(%r9),%xmm1 + vmovdqu 32(%r9),%xmm2 + vmovdqu 48(%r9),%xmm3 + vpshufb %xmm6,%xmm0,%xmm0 + addq $64,%r9 + vpshufb %xmm6,%xmm1,%xmm1 + vpshufb %xmm6,%xmm2,%xmm2 + vpshufb %xmm6,%xmm3,%xmm3 + vpaddd %xmm11,%xmm0,%xmm4 + vpaddd %xmm11,%xmm1,%xmm5 + vpaddd %xmm11,%xmm2,%xmm6 + vmovdqa %xmm4,0(%rsp) + vmovdqa %xmm5,16(%rsp) + vmovdqa %xmm6,32(%rsp) + jmp .Loop_avx +.align 16 +.Loop_avx: + shrdl $2,%ebx,%ebx + xorl %edx,%esi + vpalignr $8,%xmm0,%xmm1,%xmm4 + movl %eax,%edi + addl 0(%rsp),%ebp + vpaddd %xmm3,%xmm11,%xmm9 + xorl %ecx,%ebx + shldl $5,%eax,%eax + vpsrldq $4,%xmm3,%xmm8 + addl %esi,%ebp + andl %ebx,%edi + vpxor %xmm0,%xmm4,%xmm4 + xorl %ecx,%ebx + addl %eax,%ebp + vpxor %xmm2,%xmm8,%xmm8 + shrdl $7,%eax,%eax + xorl %ecx,%edi + movl %ebp,%esi + addl 4(%rsp),%edx + vpxor %xmm8,%xmm4,%xmm4 + xorl %ebx,%eax + shldl $5,%ebp,%ebp + vmovdqa %xmm9,48(%rsp) + addl %edi,%edx + andl %eax,%esi + vpsrld $31,%xmm4,%xmm8 + xorl %ebx,%eax + addl %ebp,%edx + shrdl $7,%ebp,%ebp + xorl %ebx,%esi + vpslldq $12,%xmm4,%xmm10 + vpaddd %xmm4,%xmm4,%xmm4 + movl %edx,%edi + addl 8(%rsp),%ecx + xorl %eax,%ebp + shldl $5,%edx,%edx + vpsrld $30,%xmm10,%xmm9 + vpor %xmm8,%xmm4,%xmm4 + addl %esi,%ecx + andl %ebp,%edi + xorl %eax,%ebp + addl %edx,%ecx + vpslld $2,%xmm10,%xmm10 + vpxor %xmm9,%xmm4,%xmm4 + shrdl $7,%edx,%edx + xorl %eax,%edi + movl %ecx,%esi + addl 12(%rsp),%ebx + vpxor %xmm10,%xmm4,%xmm4 + xorl %ebp,%edx + shldl $5,%ecx,%ecx + addl %edi,%ebx + andl %edx,%esi + xorl %ebp,%edx + addl %ecx,%ebx + shrdl $7,%ecx,%ecx + xorl %ebp,%esi + vpalignr $8,%xmm1,%xmm2,%xmm5 + movl %ebx,%edi + addl 16(%rsp),%eax + vpaddd %xmm4,%xmm11,%xmm9 + xorl %edx,%ecx + shldl $5,%ebx,%ebx + vpsrldq $4,%xmm4,%xmm8 + addl %esi,%eax + andl %ecx,%edi + vpxor %xmm1,%xmm5,%xmm5 + xorl %edx,%ecx + addl %ebx,%eax + vpxor %xmm3,%xmm8,%xmm8 + shrdl $7,%ebx,%ebx + xorl %edx,%edi + movl %eax,%esi + addl 20(%rsp),%ebp + vpxor %xmm8,%xmm5,%xmm5 + xorl %ecx,%ebx + shldl $5,%eax,%eax + vmovdqa %xmm9,0(%rsp) + addl %edi,%ebp + andl %ebx,%esi + vpsrld $31,%xmm5,%xmm8 + xorl %ecx,%ebx + addl %eax,%ebp + shrdl $7,%eax,%eax + xorl %ecx,%esi + vpslldq $12,%xmm5,%xmm10 + vpaddd %xmm5,%xmm5,%xmm5 + movl %ebp,%edi + addl 24(%rsp),%edx + xorl %ebx,%eax + shldl $5,%ebp,%ebp + vpsrld $30,%xmm10,%xmm9 + vpor %xmm8,%xmm5,%xmm5 + addl %esi,%edx + andl %eax,%edi + xorl %ebx,%eax + addl %ebp,%edx + vpslld $2,%xmm10,%xmm10 + vpxor %xmm9,%xmm5,%xmm5 + shrdl $7,%ebp,%ebp + xorl %ebx,%edi + movl %edx,%esi + addl 28(%rsp),%ecx + vpxor %xmm10,%xmm5,%xmm5 + xorl %eax,%ebp + shldl $5,%edx,%edx + vmovdqa -32(%r11),%xmm11 + addl %edi,%ecx + andl %ebp,%esi + xorl %eax,%ebp + addl %edx,%ecx + shrdl $7,%edx,%edx + xorl %eax,%esi + vpalignr $8,%xmm2,%xmm3,%xmm6 + movl %ecx,%edi + addl 32(%rsp),%ebx + vpaddd %xmm5,%xmm11,%xmm9 + xorl %ebp,%edx + shldl $5,%ecx,%ecx + vpsrldq $4,%xmm5,%xmm8 + addl %esi,%ebx + andl %edx,%edi + vpxor %xmm2,%xmm6,%xmm6 + xorl %ebp,%edx + addl %ecx,%ebx + vpxor %xmm4,%xmm8,%xmm8 + shrdl $7,%ecx,%ecx + xorl %ebp,%edi + movl %ebx,%esi + addl 36(%rsp),%eax + vpxor %xmm8,%xmm6,%xmm6 + xorl %edx,%ecx + shldl $5,%ebx,%ebx + vmovdqa %xmm9,16(%rsp) + addl %edi,%eax + andl %ecx,%esi + vpsrld $31,%xmm6,%xmm8 + xorl %edx,%ecx + addl %ebx,%eax + shrdl $7,%ebx,%ebx + xorl %edx,%esi + vpslldq $12,%xmm6,%xmm10 + vpaddd %xmm6,%xmm6,%xmm6 + movl %eax,%edi + addl 40(%rsp),%ebp + xorl %ecx,%ebx + shldl $5,%eax,%eax + vpsrld $30,%xmm10,%xmm9 + vpor %xmm8,%xmm6,%xmm6 + addl %esi,%ebp + andl %ebx,%edi + xorl %ecx,%ebx + addl %eax,%ebp + vpslld $2,%xmm10,%xmm10 + vpxor %xmm9,%xmm6,%xmm6 + shrdl $7,%eax,%eax + xorl %ecx,%edi + movl %ebp,%esi + addl 44(%rsp),%edx + vpxor %xmm10,%xmm6,%xmm6 + xorl %ebx,%eax + shldl $5,%ebp,%ebp + addl %edi,%edx + andl %eax,%esi + xorl %ebx,%eax + addl %ebp,%edx + shrdl $7,%ebp,%ebp + xorl %ebx,%esi + vpalignr $8,%xmm3,%xmm4,%xmm7 + movl %edx,%edi + addl 48(%rsp),%ecx + vpaddd %xmm6,%xmm11,%xmm9 + xorl %eax,%ebp + shldl $5,%edx,%edx + vpsrldq $4,%xmm6,%xmm8 + addl %esi,%ecx + andl %ebp,%edi + vpxor %xmm3,%xmm7,%xmm7 + xorl %eax,%ebp + addl %edx,%ecx + vpxor %xmm5,%xmm8,%xmm8 + shrdl $7,%edx,%edx + xorl %eax,%edi + movl %ecx,%esi + addl 52(%rsp),%ebx + vpxor %xmm8,%xmm7,%xmm7 + xorl %ebp,%edx + shldl $5,%ecx,%ecx + vmovdqa %xmm9,32(%rsp) + addl %edi,%ebx + andl %edx,%esi + vpsrld $31,%xmm7,%xmm8 + xorl %ebp,%edx + addl %ecx,%ebx + shrdl $7,%ecx,%ecx + xorl %ebp,%esi + vpslldq $12,%xmm7,%xmm10 + vpaddd %xmm7,%xmm7,%xmm7 + movl %ebx,%edi + addl 56(%rsp),%eax + xorl %edx,%ecx + shldl $5,%ebx,%ebx + vpsrld $30,%xmm10,%xmm9 + vpor %xmm8,%xmm7,%xmm7 + addl %esi,%eax + andl %ecx,%edi + xorl %edx,%ecx + addl %ebx,%eax + vpslld $2,%xmm10,%xmm10 + vpxor %xmm9,%xmm7,%xmm7 + shrdl $7,%ebx,%ebx + xorl %edx,%edi + movl %eax,%esi + addl 60(%rsp),%ebp + vpxor %xmm10,%xmm7,%xmm7 + xorl %ecx,%ebx + shldl $5,%eax,%eax + addl %edi,%ebp + andl %ebx,%esi + xorl %ecx,%ebx + addl %eax,%ebp + vpalignr $8,%xmm6,%xmm7,%xmm8 + vpxor %xmm4,%xmm0,%xmm0 + shrdl $7,%eax,%eax + xorl %ecx,%esi + movl %ebp,%edi + addl 0(%rsp),%edx + vpxor %xmm1,%xmm0,%xmm0 + xorl %ebx,%eax + shldl $5,%ebp,%ebp + vpaddd %xmm7,%xmm11,%xmm9 + addl %esi,%edx + andl %eax,%edi + vpxor %xmm8,%xmm0,%xmm0 + xorl %ebx,%eax + addl %ebp,%edx + shrdl $7,%ebp,%ebp + xorl %ebx,%edi + vpsrld $30,%xmm0,%xmm8 + vmovdqa %xmm9,48(%rsp) + movl %edx,%esi + addl 4(%rsp),%ecx + xorl %eax,%ebp + shldl $5,%edx,%edx + vpslld $2,%xmm0,%xmm0 + addl %edi,%ecx + andl %ebp,%esi + xorl %eax,%ebp + addl %edx,%ecx + shrdl $7,%edx,%edx + xorl %eax,%esi + movl %ecx,%edi + addl 8(%rsp),%ebx + vpor %xmm8,%xmm0,%xmm0 + xorl %ebp,%edx + shldl $5,%ecx,%ecx + addl %esi,%ebx + andl %edx,%edi + xorl %ebp,%edx + addl %ecx,%ebx + addl 12(%rsp),%eax + xorl %ebp,%edi + movl %ebx,%esi + shldl $5,%ebx,%ebx + addl %edi,%eax + xorl %edx,%esi + shrdl $7,%ecx,%ecx + addl %ebx,%eax + vpalignr $8,%xmm7,%xmm0,%xmm8 + vpxor %xmm5,%xmm1,%xmm1 + addl 16(%rsp),%ebp + xorl %ecx,%esi + movl %eax,%edi + shldl $5,%eax,%eax + vpxor %xmm2,%xmm1,%xmm1 + addl %esi,%ebp + xorl %ecx,%edi + vpaddd %xmm0,%xmm11,%xmm9 + shrdl $7,%ebx,%ebx + addl %eax,%ebp + vpxor %xmm8,%xmm1,%xmm1 + addl 20(%rsp),%edx + xorl %ebx,%edi + movl %ebp,%esi + shldl $5,%ebp,%ebp + vpsrld $30,%xmm1,%xmm8 + vmovdqa %xmm9,0(%rsp) + addl %edi,%edx + xorl %ebx,%esi + shrdl $7,%eax,%eax + addl %ebp,%edx + vpslld $2,%xmm1,%xmm1 + addl 24(%rsp),%ecx + xorl %eax,%esi + movl %edx,%edi + shldl $5,%edx,%edx + addl %esi,%ecx + xorl %eax,%edi + shrdl $7,%ebp,%ebp + addl %edx,%ecx + vpor %xmm8,%xmm1,%xmm1 + addl 28(%rsp),%ebx + xorl %ebp,%edi + movl %ecx,%esi + shldl $5,%ecx,%ecx + addl %edi,%ebx + xorl %ebp,%esi + shrdl $7,%edx,%edx + addl %ecx,%ebx + vpalignr $8,%xmm0,%xmm1,%xmm8 + vpxor %xmm6,%xmm2,%xmm2 + addl 32(%rsp),%eax + xorl %edx,%esi + movl %ebx,%edi + shldl $5,%ebx,%ebx + vpxor %xmm3,%xmm2,%xmm2 + addl %esi,%eax + xorl %edx,%edi + vpaddd %xmm1,%xmm11,%xmm9 + vmovdqa 0(%r11),%xmm11 + shrdl $7,%ecx,%ecx + addl %ebx,%eax + vpxor %xmm8,%xmm2,%xmm2 + addl 36(%rsp),%ebp + xorl %ecx,%edi + movl %eax,%esi + shldl $5,%eax,%eax + vpsrld $30,%xmm2,%xmm8 + vmovdqa %xmm9,16(%rsp) + addl %edi,%ebp + xorl %ecx,%esi + shrdl $7,%ebx,%ebx + addl %eax,%ebp + vpslld $2,%xmm2,%xmm2 + addl 40(%rsp),%edx + xorl %ebx,%esi + movl %ebp,%edi + shldl $5,%ebp,%ebp + addl %esi,%edx + xorl %ebx,%edi + shrdl $7,%eax,%eax + addl %ebp,%edx + vpor %xmm8,%xmm2,%xmm2 + addl 44(%rsp),%ecx + xorl %eax,%edi + movl %edx,%esi + shldl $5,%edx,%edx + addl %edi,%ecx + xorl %eax,%esi + shrdl $7,%ebp,%ebp + addl %edx,%ecx + vpalignr $8,%xmm1,%xmm2,%xmm8 + vpxor %xmm7,%xmm3,%xmm3 + addl 48(%rsp),%ebx + xorl %ebp,%esi + movl %ecx,%edi + shldl $5,%ecx,%ecx + vpxor %xmm4,%xmm3,%xmm3 + addl %esi,%ebx + xorl %ebp,%edi + vpaddd %xmm2,%xmm11,%xmm9 + shrdl $7,%edx,%edx + addl %ecx,%ebx + vpxor %xmm8,%xmm3,%xmm3 + addl 52(%rsp),%eax + xorl %edx,%edi + movl %ebx,%esi + shldl $5,%ebx,%ebx + vpsrld $30,%xmm3,%xmm8 + vmovdqa %xmm9,32(%rsp) + addl %edi,%eax + xorl %edx,%esi + shrdl $7,%ecx,%ecx + addl %ebx,%eax + vpslld $2,%xmm3,%xmm3 + addl 56(%rsp),%ebp + xorl %ecx,%esi + movl %eax,%edi + shldl $5,%eax,%eax + addl %esi,%ebp + xorl %ecx,%edi + shrdl $7,%ebx,%ebx + addl %eax,%ebp + vpor %xmm8,%xmm3,%xmm3 + addl 60(%rsp),%edx + xorl %ebx,%edi + movl %ebp,%esi + shldl $5,%ebp,%ebp + addl %edi,%edx + xorl %ebx,%esi + shrdl $7,%eax,%eax + addl %ebp,%edx + vpalignr $8,%xmm2,%xmm3,%xmm8 + vpxor %xmm0,%xmm4,%xmm4 + addl 0(%rsp),%ecx + xorl %eax,%esi + movl %edx,%edi + shldl $5,%edx,%edx + vpxor %xmm5,%xmm4,%xmm4 + addl %esi,%ecx + xorl %eax,%edi + vpaddd %xmm3,%xmm11,%xmm9 + shrdl $7,%ebp,%ebp + addl %edx,%ecx + vpxor %xmm8,%xmm4,%xmm4 + addl 4(%rsp),%ebx + xorl %ebp,%edi + movl %ecx,%esi + shldl $5,%ecx,%ecx + vpsrld $30,%xmm4,%xmm8 + vmovdqa %xmm9,48(%rsp) + addl %edi,%ebx + xorl %ebp,%esi + shrdl $7,%edx,%edx + addl %ecx,%ebx + vpslld $2,%xmm4,%xmm4 + addl 8(%rsp),%eax + xorl %edx,%esi + movl %ebx,%edi + shldl $5,%ebx,%ebx + addl %esi,%eax + xorl %edx,%edi + shrdl $7,%ecx,%ecx + addl %ebx,%eax + vpor %xmm8,%xmm4,%xmm4 + addl 12(%rsp),%ebp + xorl %ecx,%edi + movl %eax,%esi + shldl $5,%eax,%eax + addl %edi,%ebp + xorl %ecx,%esi + shrdl $7,%ebx,%ebx + addl %eax,%ebp + vpalignr $8,%xmm3,%xmm4,%xmm8 + vpxor %xmm1,%xmm5,%xmm5 + addl 16(%rsp),%edx + xorl %ebx,%esi + movl %ebp,%edi + shldl $5,%ebp,%ebp + vpxor %xmm6,%xmm5,%xmm5 + addl %esi,%edx + xorl %ebx,%edi + vpaddd %xmm4,%xmm11,%xmm9 + shrdl $7,%eax,%eax + addl %ebp,%edx + vpxor %xmm8,%xmm5,%xmm5 + addl 20(%rsp),%ecx + xorl %eax,%edi + movl %edx,%esi + shldl $5,%edx,%edx + vpsrld $30,%xmm5,%xmm8 + vmovdqa %xmm9,0(%rsp) + addl %edi,%ecx + xorl %eax,%esi + shrdl $7,%ebp,%ebp + addl %edx,%ecx + vpslld $2,%xmm5,%xmm5 + addl 24(%rsp),%ebx + xorl %ebp,%esi + movl %ecx,%edi + shldl $5,%ecx,%ecx + addl %esi,%ebx + xorl %ebp,%edi + shrdl $7,%edx,%edx + addl %ecx,%ebx + vpor %xmm8,%xmm5,%xmm5 + addl 28(%rsp),%eax + shrdl $7,%ecx,%ecx + movl %ebx,%esi + xorl %edx,%edi + shldl $5,%ebx,%ebx + addl %edi,%eax + xorl %ecx,%esi + xorl %edx,%ecx + addl %ebx,%eax + vpalignr $8,%xmm4,%xmm5,%xmm8 + vpxor %xmm2,%xmm6,%xmm6 + addl 32(%rsp),%ebp + andl %ecx,%esi + xorl %edx,%ecx + shrdl $7,%ebx,%ebx + vpxor %xmm7,%xmm6,%xmm6 + movl %eax,%edi + xorl %ecx,%esi + vpaddd %xmm5,%xmm11,%xmm9 + shldl $5,%eax,%eax + addl %esi,%ebp + vpxor %xmm8,%xmm6,%xmm6 + xorl %ebx,%edi + xorl %ecx,%ebx + addl %eax,%ebp + addl 36(%rsp),%edx + vpsrld $30,%xmm6,%xmm8 + vmovdqa %xmm9,16(%rsp) + andl %ebx,%edi + xorl %ecx,%ebx + shrdl $7,%eax,%eax + movl %ebp,%esi + vpslld $2,%xmm6,%xmm6 + xorl %ebx,%edi + shldl $5,%ebp,%ebp + addl %edi,%edx + xorl %eax,%esi + xorl %ebx,%eax + addl %ebp,%edx + addl 40(%rsp),%ecx + andl %eax,%esi + vpor %xmm8,%xmm6,%xmm6 + xorl %ebx,%eax + shrdl $7,%ebp,%ebp + movl %edx,%edi + xorl %eax,%esi + shldl $5,%edx,%edx + addl %esi,%ecx + xorl %ebp,%edi + xorl %eax,%ebp + addl %edx,%ecx + addl 44(%rsp),%ebx + andl %ebp,%edi + xorl %eax,%ebp + shrdl $7,%edx,%edx + movl %ecx,%esi + xorl %ebp,%edi + shldl $5,%ecx,%ecx + addl %edi,%ebx + xorl %edx,%esi + xorl %ebp,%edx + addl %ecx,%ebx + vpalignr $8,%xmm5,%xmm6,%xmm8 + vpxor %xmm3,%xmm7,%xmm7 + addl 48(%rsp),%eax + andl %edx,%esi + xorl %ebp,%edx + shrdl $7,%ecx,%ecx + vpxor %xmm0,%xmm7,%xmm7 + movl %ebx,%edi + xorl %edx,%esi + vpaddd %xmm6,%xmm11,%xmm9 + vmovdqa 32(%r11),%xmm11 + shldl $5,%ebx,%ebx + addl %esi,%eax + vpxor %xmm8,%xmm7,%xmm7 + xorl %ecx,%edi + xorl %edx,%ecx + addl %ebx,%eax + addl 52(%rsp),%ebp + vpsrld $30,%xmm7,%xmm8 + vmovdqa %xmm9,32(%rsp) + andl %ecx,%edi + xorl %edx,%ecx + shrdl $7,%ebx,%ebx + movl %eax,%esi + vpslld $2,%xmm7,%xmm7 + xorl %ecx,%edi + shldl $5,%eax,%eax + addl %edi,%ebp + xorl %ebx,%esi + xorl %ecx,%ebx + addl %eax,%ebp + addl 56(%rsp),%edx + andl %ebx,%esi + vpor %xmm8,%xmm7,%xmm7 + xorl %ecx,%ebx + shrdl $7,%eax,%eax + movl %ebp,%edi + xorl %ebx,%esi + shldl $5,%ebp,%ebp + addl %esi,%edx + xorl %eax,%edi + xorl %ebx,%eax + addl %ebp,%edx + addl 60(%rsp),%ecx + andl %eax,%edi + xorl %ebx,%eax + shrdl $7,%ebp,%ebp + movl %edx,%esi + xorl %eax,%edi + shldl $5,%edx,%edx + addl %edi,%ecx + xorl %ebp,%esi + xorl %eax,%ebp + addl %edx,%ecx + vpalignr $8,%xmm6,%xmm7,%xmm8 + vpxor %xmm4,%xmm0,%xmm0 + addl 0(%rsp),%ebx + andl %ebp,%esi + xorl %eax,%ebp + shrdl $7,%edx,%edx + vpxor %xmm1,%xmm0,%xmm0 + movl %ecx,%edi + xorl %ebp,%esi + vpaddd %xmm7,%xmm11,%xmm9 + shldl $5,%ecx,%ecx + addl %esi,%ebx + vpxor %xmm8,%xmm0,%xmm0 + xorl %edx,%edi + xorl %ebp,%edx + addl %ecx,%ebx + addl 4(%rsp),%eax + vpsrld $30,%xmm0,%xmm8 + vmovdqa %xmm9,48(%rsp) + andl %edx,%edi + xorl %ebp,%edx + shrdl $7,%ecx,%ecx + movl %ebx,%esi + vpslld $2,%xmm0,%xmm0 + xorl %edx,%edi + shldl $5,%ebx,%ebx + addl %edi,%eax + xorl %ecx,%esi + xorl %edx,%ecx + addl %ebx,%eax + addl 8(%rsp),%ebp + andl %ecx,%esi + vpor %xmm8,%xmm0,%xmm0 + xorl %edx,%ecx + shrdl $7,%ebx,%ebx + movl %eax,%edi + xorl %ecx,%esi + shldl $5,%eax,%eax + addl %esi,%ebp + xorl %ebx,%edi + xorl %ecx,%ebx + addl %eax,%ebp + addl 12(%rsp),%edx + andl %ebx,%edi + xorl %ecx,%ebx + shrdl $7,%eax,%eax + movl %ebp,%esi + xorl %ebx,%edi + shldl $5,%ebp,%ebp + addl %edi,%edx + xorl %eax,%esi + xorl %ebx,%eax + addl %ebp,%edx + vpalignr $8,%xmm7,%xmm0,%xmm8 + vpxor %xmm5,%xmm1,%xmm1 + addl 16(%rsp),%ecx + andl %eax,%esi + xorl %ebx,%eax + shrdl $7,%ebp,%ebp + vpxor %xmm2,%xmm1,%xmm1 + movl %edx,%edi + xorl %eax,%esi + vpaddd %xmm0,%xmm11,%xmm9 + shldl $5,%edx,%edx + addl %esi,%ecx + vpxor %xmm8,%xmm1,%xmm1 + xorl %ebp,%edi + xorl %eax,%ebp + addl %edx,%ecx + addl 20(%rsp),%ebx + vpsrld $30,%xmm1,%xmm8 + vmovdqa %xmm9,0(%rsp) + andl %ebp,%edi + xorl %eax,%ebp + shrdl $7,%edx,%edx + movl %ecx,%esi + vpslld $2,%xmm1,%xmm1 + xorl %ebp,%edi + shldl $5,%ecx,%ecx + addl %edi,%ebx + xorl %edx,%esi + xorl %ebp,%edx + addl %ecx,%ebx + addl 24(%rsp),%eax + andl %edx,%esi + vpor %xmm8,%xmm1,%xmm1 + xorl %ebp,%edx + shrdl $7,%ecx,%ecx + movl %ebx,%edi + xorl %edx,%esi + shldl $5,%ebx,%ebx + addl %esi,%eax + xorl %ecx,%edi + xorl %edx,%ecx + addl %ebx,%eax + addl 28(%rsp),%ebp + andl %ecx,%edi + xorl %edx,%ecx + shrdl $7,%ebx,%ebx + movl %eax,%esi + xorl %ecx,%edi + shldl $5,%eax,%eax + addl %edi,%ebp + xorl %ebx,%esi + xorl %ecx,%ebx + addl %eax,%ebp + vpalignr $8,%xmm0,%xmm1,%xmm8 + vpxor %xmm6,%xmm2,%xmm2 + addl 32(%rsp),%edx + andl %ebx,%esi + xorl %ecx,%ebx + shrdl $7,%eax,%eax + vpxor %xmm3,%xmm2,%xmm2 + movl %ebp,%edi + xorl %ebx,%esi + vpaddd %xmm1,%xmm11,%xmm9 + shldl $5,%ebp,%ebp + addl %esi,%edx + vpxor %xmm8,%xmm2,%xmm2 + xorl %eax,%edi + xorl %ebx,%eax + addl %ebp,%edx + addl 36(%rsp),%ecx + vpsrld $30,%xmm2,%xmm8 + vmovdqa %xmm9,16(%rsp) + andl %eax,%edi + xorl %ebx,%eax + shrdl $7,%ebp,%ebp + movl %edx,%esi + vpslld $2,%xmm2,%xmm2 + xorl %eax,%edi + shldl $5,%edx,%edx + addl %edi,%ecx + xorl %ebp,%esi + xorl %eax,%ebp + addl %edx,%ecx + addl 40(%rsp),%ebx + andl %ebp,%esi + vpor %xmm8,%xmm2,%xmm2 + xorl %eax,%ebp + shrdl $7,%edx,%edx + movl %ecx,%edi + xorl %ebp,%esi + shldl $5,%ecx,%ecx + addl %esi,%ebx + xorl %edx,%edi + xorl %ebp,%edx + addl %ecx,%ebx + addl 44(%rsp),%eax + andl %edx,%edi + xorl %ebp,%edx + shrdl $7,%ecx,%ecx + movl %ebx,%esi + xorl %edx,%edi + shldl $5,%ebx,%ebx + addl %edi,%eax + xorl %edx,%esi + addl %ebx,%eax + vpalignr $8,%xmm1,%xmm2,%xmm8 + vpxor %xmm7,%xmm3,%xmm3 + addl 48(%rsp),%ebp + xorl %ecx,%esi + movl %eax,%edi + shldl $5,%eax,%eax + vpxor %xmm4,%xmm3,%xmm3 + addl %esi,%ebp + xorl %ecx,%edi + vpaddd %xmm2,%xmm11,%xmm9 + shrdl $7,%ebx,%ebx + addl %eax,%ebp + vpxor %xmm8,%xmm3,%xmm3 + addl 52(%rsp),%edx + xorl %ebx,%edi + movl %ebp,%esi + shldl $5,%ebp,%ebp + vpsrld $30,%xmm3,%xmm8 + vmovdqa %xmm9,32(%rsp) + addl %edi,%edx + xorl %ebx,%esi + shrdl $7,%eax,%eax + addl %ebp,%edx + vpslld $2,%xmm3,%xmm3 + addl 56(%rsp),%ecx + xorl %eax,%esi + movl %edx,%edi + shldl $5,%edx,%edx + addl %esi,%ecx + xorl %eax,%edi + shrdl $7,%ebp,%ebp + addl %edx,%ecx + vpor %xmm8,%xmm3,%xmm3 + addl 60(%rsp),%ebx + xorl %ebp,%edi + movl %ecx,%esi + shldl $5,%ecx,%ecx + addl %edi,%ebx + xorl %ebp,%esi + shrdl $7,%edx,%edx + addl %ecx,%ebx + addl 0(%rsp),%eax + vpaddd %xmm3,%xmm11,%xmm9 + xorl %edx,%esi + movl %ebx,%edi + shldl $5,%ebx,%ebx + addl %esi,%eax + vmovdqa %xmm9,48(%rsp) + xorl %edx,%edi + shrdl $7,%ecx,%ecx + addl %ebx,%eax + addl 4(%rsp),%ebp + xorl %ecx,%edi + movl %eax,%esi + shldl $5,%eax,%eax + addl %edi,%ebp + xorl %ecx,%esi + shrdl $7,%ebx,%ebx + addl %eax,%ebp + addl 8(%rsp),%edx + xorl %ebx,%esi + movl %ebp,%edi + shldl $5,%ebp,%ebp + addl %esi,%edx + xorl %ebx,%edi + shrdl $7,%eax,%eax + addl %ebp,%edx + addl 12(%rsp),%ecx + xorl %eax,%edi + movl %edx,%esi + shldl $5,%edx,%edx + addl %edi,%ecx + xorl %eax,%esi + shrdl $7,%ebp,%ebp + addl %edx,%ecx + cmpq %r10,%r9 + je .Ldone_avx + vmovdqa 64(%r11),%xmm6 + vmovdqa -64(%r11),%xmm11 + vmovdqu 0(%r9),%xmm0 + vmovdqu 16(%r9),%xmm1 + vmovdqu 32(%r9),%xmm2 + vmovdqu 48(%r9),%xmm3 + vpshufb %xmm6,%xmm0,%xmm0 + addq $64,%r9 + addl 16(%rsp),%ebx + xorl %ebp,%esi + vpshufb %xmm6,%xmm1,%xmm1 + movl %ecx,%edi + shldl $5,%ecx,%ecx + vpaddd %xmm11,%xmm0,%xmm4 + addl %esi,%ebx + xorl %ebp,%edi + shrdl $7,%edx,%edx + addl %ecx,%ebx + vmovdqa %xmm4,0(%rsp) + addl 20(%rsp),%eax + xorl %edx,%edi + movl %ebx,%esi + shldl $5,%ebx,%ebx + addl %edi,%eax + xorl %edx,%esi + shrdl $7,%ecx,%ecx + addl %ebx,%eax + addl 24(%rsp),%ebp + xorl %ecx,%esi + movl %eax,%edi + shldl $5,%eax,%eax + addl %esi,%ebp + xorl %ecx,%edi + shrdl $7,%ebx,%ebx + addl %eax,%ebp + addl 28(%rsp),%edx + xorl %ebx,%edi + movl %ebp,%esi + shldl $5,%ebp,%ebp + addl %edi,%edx + xorl %ebx,%esi + shrdl $7,%eax,%eax + addl %ebp,%edx + addl 32(%rsp),%ecx + xorl %eax,%esi + vpshufb %xmm6,%xmm2,%xmm2 + movl %edx,%edi + shldl $5,%edx,%edx + vpaddd %xmm11,%xmm1,%xmm5 + addl %esi,%ecx + xorl %eax,%edi + shrdl $7,%ebp,%ebp + addl %edx,%ecx + vmovdqa %xmm5,16(%rsp) + addl 36(%rsp),%ebx + xorl %ebp,%edi + movl %ecx,%esi + shldl $5,%ecx,%ecx + addl %edi,%ebx + xorl %ebp,%esi + shrdl $7,%edx,%edx + addl %ecx,%ebx + addl 40(%rsp),%eax + xorl %edx,%esi + movl %ebx,%edi + shldl $5,%ebx,%ebx + addl %esi,%eax + xorl %edx,%edi + shrdl $7,%ecx,%ecx + addl %ebx,%eax + addl 44(%rsp),%ebp + xorl %ecx,%edi + movl %eax,%esi + shldl $5,%eax,%eax + addl %edi,%ebp + xorl %ecx,%esi + shrdl $7,%ebx,%ebx + addl %eax,%ebp + addl 48(%rsp),%edx + xorl %ebx,%esi + vpshufb %xmm6,%xmm3,%xmm3 + movl %ebp,%edi + shldl $5,%ebp,%ebp + vpaddd %xmm11,%xmm2,%xmm6 + addl %esi,%edx + xorl %ebx,%edi + shrdl $7,%eax,%eax + addl %ebp,%edx + vmovdqa %xmm6,32(%rsp) + addl 52(%rsp),%ecx + xorl %eax,%edi + movl %edx,%esi + shldl $5,%edx,%edx + addl %edi,%ecx + xorl %eax,%esi + shrdl $7,%ebp,%ebp + addl %edx,%ecx + addl 56(%rsp),%ebx + xorl %ebp,%esi + movl %ecx,%edi + shldl $5,%ecx,%ecx + addl %esi,%ebx + xorl %ebp,%edi + shrdl $7,%edx,%edx + addl %ecx,%ebx + addl 60(%rsp),%eax + xorl %edx,%edi + movl %ebx,%esi + shldl $5,%ebx,%ebx + addl %edi,%eax + shrdl $7,%ecx,%ecx + addl %ebx,%eax + addl 0(%r8),%eax + addl 4(%r8),%esi + addl 8(%r8),%ecx + addl 12(%r8),%edx + movl %eax,0(%r8) + addl 16(%r8),%ebp + movl %esi,4(%r8) + movl %esi,%ebx + movl %ecx,8(%r8) + movl %ecx,%edi + movl %edx,12(%r8) + xorl %edx,%edi + movl %ebp,16(%r8) + andl %edi,%esi + jmp .Loop_avx + +.align 16 +.Ldone_avx: + addl 16(%rsp),%ebx + xorl %ebp,%esi + movl %ecx,%edi + shldl $5,%ecx,%ecx + addl %esi,%ebx + xorl %ebp,%edi + shrdl $7,%edx,%edx + addl %ecx,%ebx + addl 20(%rsp),%eax + xorl %edx,%edi + movl %ebx,%esi + shldl $5,%ebx,%ebx + addl %edi,%eax + xorl %edx,%esi + shrdl $7,%ecx,%ecx + addl %ebx,%eax + addl 24(%rsp),%ebp + xorl %ecx,%esi + movl %eax,%edi + shldl $5,%eax,%eax + addl %esi,%ebp + xorl %ecx,%edi + shrdl $7,%ebx,%ebx + addl %eax,%ebp + addl 28(%rsp),%edx + xorl %ebx,%edi + movl %ebp,%esi + shldl $5,%ebp,%ebp + addl %edi,%edx + xorl %ebx,%esi + shrdl $7,%eax,%eax + addl %ebp,%edx + addl 32(%rsp),%ecx + xorl %eax,%esi + movl %edx,%edi + shldl $5,%edx,%edx + addl %esi,%ecx + xorl %eax,%edi + shrdl $7,%ebp,%ebp + addl %edx,%ecx + addl 36(%rsp),%ebx + xorl %ebp,%edi + movl %ecx,%esi + shldl $5,%ecx,%ecx + addl %edi,%ebx + xorl %ebp,%esi + shrdl $7,%edx,%edx + addl %ecx,%ebx + addl 40(%rsp),%eax + xorl %edx,%esi + movl %ebx,%edi + shldl $5,%ebx,%ebx + addl %esi,%eax + xorl %edx,%edi + shrdl $7,%ecx,%ecx + addl %ebx,%eax + addl 44(%rsp),%ebp + xorl %ecx,%edi + movl %eax,%esi + shldl $5,%eax,%eax + addl %edi,%ebp + xorl %ecx,%esi + shrdl $7,%ebx,%ebx + addl %eax,%ebp + addl 48(%rsp),%edx + xorl %ebx,%esi + movl %ebp,%edi + shldl $5,%ebp,%ebp + addl %esi,%edx + xorl %ebx,%edi + shrdl $7,%eax,%eax + addl %ebp,%edx + addl 52(%rsp),%ecx + xorl %eax,%edi + movl %edx,%esi + shldl $5,%edx,%edx + addl %edi,%ecx + xorl %eax,%esi + shrdl $7,%ebp,%ebp + addl %edx,%ecx + addl 56(%rsp),%ebx + xorl %ebp,%esi + movl %ecx,%edi + shldl $5,%ecx,%ecx + addl %esi,%ebx + xorl %ebp,%edi + shrdl $7,%edx,%edx + addl %ecx,%ebx + addl 60(%rsp),%eax + xorl %edx,%edi + movl %ebx,%esi + shldl $5,%ebx,%ebx + addl %edi,%eax + shrdl $7,%ecx,%ecx + addl %ebx,%eax + vzeroupper + + addl 0(%r8),%eax + addl 4(%r8),%esi + addl 8(%r8),%ecx + movl %eax,0(%r8) + addl 12(%r8),%edx + movl %esi,4(%r8) + addl 16(%r8),%ebp + movl %ecx,8(%r8) + movl %edx,12(%r8) + movl %ebp,16(%r8) + leaq (%r14),%rsi + movq -40(%rsi),%r14 + movq -32(%rsi),%r13 + movq -24(%rsi),%r12 + movq -16(%rsi),%rbp + movq -8(%rsi),%rbx + leaq (%rsi),%rsp +.Lepilogue_avx: + .byte 0xf3,0xc3 +.size sha1_block_data_order_avx,.-sha1_block_data_order_avx +.type sha1_block_data_order_avx2,@function +.align 16 +sha1_block_data_order_avx2: +_avx2_shortcut: + movq %rsp,%rax + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + vzeroupper + movq %rax,%r14 + movq %rdi,%r8 + movq %rsi,%r9 + movq %rdx,%r10 + + leaq -640(%rsp),%rsp + shlq $6,%r10 + leaq 64(%r9),%r13 + andq $-128,%rsp + addq %r9,%r10 + leaq K_XX_XX+64(%rip),%r11 + + movl 0(%r8),%eax + cmpq %r10,%r13 + cmovaeq %r9,%r13 + movl 4(%r8),%ebp + movl 8(%r8),%ecx + movl 12(%r8),%edx + movl 16(%r8),%esi + vmovdqu 64(%r11),%ymm6 + + vmovdqu (%r9),%xmm0 + vmovdqu 16(%r9),%xmm1 + vmovdqu 32(%r9),%xmm2 + vmovdqu 48(%r9),%xmm3 + leaq 64(%r9),%r9 + vinserti128 $1,(%r13),%ymm0,%ymm0 + vinserti128 $1,16(%r13),%ymm1,%ymm1 + vpshufb %ymm6,%ymm0,%ymm0 + vinserti128 $1,32(%r13),%ymm2,%ymm2 + vpshufb %ymm6,%ymm1,%ymm1 + vinserti128 $1,48(%r13),%ymm3,%ymm3 + vpshufb %ymm6,%ymm2,%ymm2 + vmovdqu -64(%r11),%ymm11 + vpshufb %ymm6,%ymm3,%ymm3 + + vpaddd %ymm11,%ymm0,%ymm4 + vpaddd %ymm11,%ymm1,%ymm5 + vmovdqu %ymm4,0(%rsp) + vpaddd %ymm11,%ymm2,%ymm6 + vmovdqu %ymm5,32(%rsp) + vpaddd %ymm11,%ymm3,%ymm7 + vmovdqu %ymm6,64(%rsp) + vmovdqu %ymm7,96(%rsp) + vpalignr $8,%ymm0,%ymm1,%ymm4 + vpsrldq $4,%ymm3,%ymm8 + vpxor %ymm0,%ymm4,%ymm4 + vpxor %ymm2,%ymm8,%ymm8 + vpxor %ymm8,%ymm4,%ymm4 + vpsrld $31,%ymm4,%ymm8 + vpslldq $12,%ymm4,%ymm10 + vpaddd %ymm4,%ymm4,%ymm4 + vpsrld $30,%ymm10,%ymm9 + vpor %ymm8,%ymm4,%ymm4 + vpslld $2,%ymm10,%ymm10 + vpxor %ymm9,%ymm4,%ymm4 + vpxor %ymm10,%ymm4,%ymm4 + vpaddd %ymm11,%ymm4,%ymm9 + vmovdqu %ymm9,128(%rsp) + vpalignr $8,%ymm1,%ymm2,%ymm5 + vpsrldq $4,%ymm4,%ymm8 + vpxor %ymm1,%ymm5,%ymm5 + vpxor %ymm3,%ymm8,%ymm8 + vpxor %ymm8,%ymm5,%ymm5 + vpsrld $31,%ymm5,%ymm8 + vmovdqu -32(%r11),%ymm11 + vpslldq $12,%ymm5,%ymm10 + vpaddd %ymm5,%ymm5,%ymm5 + vpsrld $30,%ymm10,%ymm9 + vpor %ymm8,%ymm5,%ymm5 + vpslld $2,%ymm10,%ymm10 + vpxor %ymm9,%ymm5,%ymm5 + vpxor %ymm10,%ymm5,%ymm5 + vpaddd %ymm11,%ymm5,%ymm9 + vmovdqu %ymm9,160(%rsp) + vpalignr $8,%ymm2,%ymm3,%ymm6 + vpsrldq $4,%ymm5,%ymm8 + vpxor %ymm2,%ymm6,%ymm6 + vpxor %ymm4,%ymm8,%ymm8 + vpxor %ymm8,%ymm6,%ymm6 + vpsrld $31,%ymm6,%ymm8 + vpslldq $12,%ymm6,%ymm10 + vpaddd %ymm6,%ymm6,%ymm6 + vpsrld $30,%ymm10,%ymm9 + vpor %ymm8,%ymm6,%ymm6 + vpslld $2,%ymm10,%ymm10 + vpxor %ymm9,%ymm6,%ymm6 + vpxor %ymm10,%ymm6,%ymm6 + vpaddd %ymm11,%ymm6,%ymm9 + vmovdqu %ymm9,192(%rsp) + vpalignr $8,%ymm3,%ymm4,%ymm7 + vpsrldq $4,%ymm6,%ymm8 + vpxor %ymm3,%ymm7,%ymm7 + vpxor %ymm5,%ymm8,%ymm8 + vpxor %ymm8,%ymm7,%ymm7 + vpsrld $31,%ymm7,%ymm8 + vpslldq $12,%ymm7,%ymm10 + vpaddd %ymm7,%ymm7,%ymm7 + vpsrld $30,%ymm10,%ymm9 + vpor %ymm8,%ymm7,%ymm7 + vpslld $2,%ymm10,%ymm10 + vpxor %ymm9,%ymm7,%ymm7 + vpxor %ymm10,%ymm7,%ymm7 + vpaddd %ymm11,%ymm7,%ymm9 + vmovdqu %ymm9,224(%rsp) + leaq 128(%rsp),%r13 + jmp .Loop_avx2 +.align 32 +.Loop_avx2: + rorxl $2,%ebp,%ebx + andnl %edx,%ebp,%edi + andl %ecx,%ebp + xorl %edi,%ebp + jmp .Lalign32_1 +.align 32 +.Lalign32_1: + vpalignr $8,%ymm6,%ymm7,%ymm8 + vpxor %ymm4,%ymm0,%ymm0 + addl -128(%r13),%esi + andnl %ecx,%eax,%edi + vpxor %ymm1,%ymm0,%ymm0 + addl %ebp,%esi + rorxl $27,%eax,%r12d + rorxl $2,%eax,%ebp + vpxor %ymm8,%ymm0,%ymm0 + andl %ebx,%eax + addl %r12d,%esi + xorl %edi,%eax + vpsrld $30,%ymm0,%ymm8 + vpslld $2,%ymm0,%ymm0 + addl -124(%r13),%edx + andnl %ebx,%esi,%edi + addl %eax,%edx + rorxl $27,%esi,%r12d + rorxl $2,%esi,%eax + andl %ebp,%esi + vpor %ymm8,%ymm0,%ymm0 + addl %r12d,%edx + xorl %edi,%esi + addl -120(%r13),%ecx + andnl %ebp,%edx,%edi + vpaddd %ymm11,%ymm0,%ymm9 + addl %esi,%ecx + rorxl $27,%edx,%r12d + rorxl $2,%edx,%esi + andl %eax,%edx + vmovdqu %ymm9,256(%rsp) + addl %r12d,%ecx + xorl %edi,%edx + addl -116(%r13),%ebx + andnl %eax,%ecx,%edi + addl %edx,%ebx + rorxl $27,%ecx,%r12d + rorxl $2,%ecx,%edx + andl %esi,%ecx + addl %r12d,%ebx + xorl %edi,%ecx + addl -96(%r13),%ebp + andnl %esi,%ebx,%edi + addl %ecx,%ebp + rorxl $27,%ebx,%r12d + rorxl $2,%ebx,%ecx + andl %edx,%ebx + addl %r12d,%ebp + xorl %edi,%ebx + vpalignr $8,%ymm7,%ymm0,%ymm8 + vpxor %ymm5,%ymm1,%ymm1 + addl -92(%r13),%eax + andnl %edx,%ebp,%edi + vpxor %ymm2,%ymm1,%ymm1 + addl %ebx,%eax + rorxl $27,%ebp,%r12d + rorxl $2,%ebp,%ebx + vpxor %ymm8,%ymm1,%ymm1 + andl %ecx,%ebp + addl %r12d,%eax + xorl %edi,%ebp + vpsrld $30,%ymm1,%ymm8 + vpslld $2,%ymm1,%ymm1 + addl -88(%r13),%esi + andnl %ecx,%eax,%edi + addl %ebp,%esi + rorxl $27,%eax,%r12d + rorxl $2,%eax,%ebp + andl %ebx,%eax + vpor %ymm8,%ymm1,%ymm1 + addl %r12d,%esi + xorl %edi,%eax + addl -84(%r13),%edx + andnl %ebx,%esi,%edi + vpaddd %ymm11,%ymm1,%ymm9 + addl %eax,%edx + rorxl $27,%esi,%r12d + rorxl $2,%esi,%eax + andl %ebp,%esi + vmovdqu %ymm9,288(%rsp) + addl %r12d,%edx + xorl %edi,%esi + addl -64(%r13),%ecx + andnl %ebp,%edx,%edi + addl %esi,%ecx + rorxl $27,%edx,%r12d + rorxl $2,%edx,%esi + andl %eax,%edx + addl %r12d,%ecx + xorl %edi,%edx + addl -60(%r13),%ebx + andnl %eax,%ecx,%edi + addl %edx,%ebx + rorxl $27,%ecx,%r12d + rorxl $2,%ecx,%edx + andl %esi,%ecx + addl %r12d,%ebx + xorl %edi,%ecx + vpalignr $8,%ymm0,%ymm1,%ymm8 + vpxor %ymm6,%ymm2,%ymm2 + addl -56(%r13),%ebp + andnl %esi,%ebx,%edi + vpxor %ymm3,%ymm2,%ymm2 + vmovdqu 0(%r11),%ymm11 + addl %ecx,%ebp + rorxl $27,%ebx,%r12d + rorxl $2,%ebx,%ecx + vpxor %ymm8,%ymm2,%ymm2 + andl %edx,%ebx + addl %r12d,%ebp + xorl %edi,%ebx + vpsrld $30,%ymm2,%ymm8 + vpslld $2,%ymm2,%ymm2 + addl -52(%r13),%eax + andnl %edx,%ebp,%edi + addl %ebx,%eax + rorxl $27,%ebp,%r12d + rorxl $2,%ebp,%ebx + andl %ecx,%ebp + vpor %ymm8,%ymm2,%ymm2 + addl %r12d,%eax + xorl %edi,%ebp + addl -32(%r13),%esi + andnl %ecx,%eax,%edi + vpaddd %ymm11,%ymm2,%ymm9 + addl %ebp,%esi + rorxl $27,%eax,%r12d + rorxl $2,%eax,%ebp + andl %ebx,%eax + vmovdqu %ymm9,320(%rsp) + addl %r12d,%esi + xorl %edi,%eax + addl -28(%r13),%edx + andnl %ebx,%esi,%edi + addl %eax,%edx + rorxl $27,%esi,%r12d + rorxl $2,%esi,%eax + andl %ebp,%esi + addl %r12d,%edx + xorl %edi,%esi + addl -24(%r13),%ecx + andnl %ebp,%edx,%edi + addl %esi,%ecx + rorxl $27,%edx,%r12d + rorxl $2,%edx,%esi + andl %eax,%edx + addl %r12d,%ecx + xorl %edi,%edx + vpalignr $8,%ymm1,%ymm2,%ymm8 + vpxor %ymm7,%ymm3,%ymm3 + addl -20(%r13),%ebx + andnl %eax,%ecx,%edi + vpxor %ymm4,%ymm3,%ymm3 + addl %edx,%ebx + rorxl $27,%ecx,%r12d + rorxl $2,%ecx,%edx + vpxor %ymm8,%ymm3,%ymm3 + andl %esi,%ecx + addl %r12d,%ebx + xorl %edi,%ecx + vpsrld $30,%ymm3,%ymm8 + vpslld $2,%ymm3,%ymm3 + addl 0(%r13),%ebp + andnl %esi,%ebx,%edi + addl %ecx,%ebp + rorxl $27,%ebx,%r12d + rorxl $2,%ebx,%ecx + andl %edx,%ebx + vpor %ymm8,%ymm3,%ymm3 + addl %r12d,%ebp + xorl %edi,%ebx + addl 4(%r13),%eax + andnl %edx,%ebp,%edi + vpaddd %ymm11,%ymm3,%ymm9 + addl %ebx,%eax + rorxl $27,%ebp,%r12d + rorxl $2,%ebp,%ebx + andl %ecx,%ebp + vmovdqu %ymm9,352(%rsp) + addl %r12d,%eax + xorl %edi,%ebp + addl 8(%r13),%esi + andnl %ecx,%eax,%edi + addl %ebp,%esi + rorxl $27,%eax,%r12d + rorxl $2,%eax,%ebp + andl %ebx,%eax + addl %r12d,%esi + xorl %edi,%eax + addl 12(%r13),%edx + leal (%rdx,%rax,1),%edx + rorxl $27,%esi,%r12d + rorxl $2,%esi,%eax + xorl %ebp,%esi + addl %r12d,%edx + xorl %ebx,%esi + vpalignr $8,%ymm2,%ymm3,%ymm8 + vpxor %ymm0,%ymm4,%ymm4 + addl 32(%r13),%ecx + leal (%rcx,%rsi,1),%ecx + vpxor %ymm5,%ymm4,%ymm4 + rorxl $27,%edx,%r12d + rorxl $2,%edx,%esi + xorl %eax,%edx + vpxor %ymm8,%ymm4,%ymm4 + addl %r12d,%ecx + xorl %ebp,%edx + addl 36(%r13),%ebx + vpsrld $30,%ymm4,%ymm8 + vpslld $2,%ymm4,%ymm4 + leal (%rbx,%rdx,1),%ebx + rorxl $27,%ecx,%r12d + rorxl $2,%ecx,%edx + xorl %esi,%ecx + addl %r12d,%ebx + xorl %eax,%ecx + vpor %ymm8,%ymm4,%ymm4 + addl 40(%r13),%ebp + leal (%rcx,%rbp,1),%ebp + rorxl $27,%ebx,%r12d + rorxl $2,%ebx,%ecx + vpaddd %ymm11,%ymm4,%ymm9 + xorl %edx,%ebx + addl %r12d,%ebp + xorl %esi,%ebx + addl 44(%r13),%eax + vmovdqu %ymm9,384(%rsp) + leal (%rax,%rbx,1),%eax + rorxl $27,%ebp,%r12d + rorxl $2,%ebp,%ebx + xorl %ecx,%ebp + addl %r12d,%eax + xorl %edx,%ebp + addl 64(%r13),%esi + leal (%rsi,%rbp,1),%esi + rorxl $27,%eax,%r12d + rorxl $2,%eax,%ebp + xorl %ebx,%eax + addl %r12d,%esi + xorl %ecx,%eax + vpalignr $8,%ymm3,%ymm4,%ymm8 + vpxor %ymm1,%ymm5,%ymm5 + addl 68(%r13),%edx + leal (%rdx,%rax,1),%edx + vpxor %ymm6,%ymm5,%ymm5 + rorxl $27,%esi,%r12d + rorxl $2,%esi,%eax + xorl %ebp,%esi + vpxor %ymm8,%ymm5,%ymm5 + addl %r12d,%edx + xorl %ebx,%esi + addl 72(%r13),%ecx + vpsrld $30,%ymm5,%ymm8 + vpslld $2,%ymm5,%ymm5 + leal (%rcx,%rsi,1),%ecx + rorxl $27,%edx,%r12d + rorxl $2,%edx,%esi + xorl %eax,%edx + addl %r12d,%ecx + xorl %ebp,%edx + vpor %ymm8,%ymm5,%ymm5 + addl 76(%r13),%ebx + leal (%rbx,%rdx,1),%ebx + rorxl $27,%ecx,%r12d + rorxl $2,%ecx,%edx + vpaddd %ymm11,%ymm5,%ymm9 + xorl %esi,%ecx + addl %r12d,%ebx + xorl %eax,%ecx + addl 96(%r13),%ebp + vmovdqu %ymm9,416(%rsp) + leal (%rcx,%rbp,1),%ebp + rorxl $27,%ebx,%r12d + rorxl $2,%ebx,%ecx + xorl %edx,%ebx + addl %r12d,%ebp + xorl %esi,%ebx + addl 100(%r13),%eax + leal (%rax,%rbx,1),%eax + rorxl $27,%ebp,%r12d + rorxl $2,%ebp,%ebx + xorl %ecx,%ebp + addl %r12d,%eax + xorl %edx,%ebp + vpalignr $8,%ymm4,%ymm5,%ymm8 + vpxor %ymm2,%ymm6,%ymm6 + addl 104(%r13),%esi + leal (%rsi,%rbp,1),%esi + vpxor %ymm7,%ymm6,%ymm6 + rorxl $27,%eax,%r12d + rorxl $2,%eax,%ebp + xorl %ebx,%eax + vpxor %ymm8,%ymm6,%ymm6 + addl %r12d,%esi + xorl %ecx,%eax + addl 108(%r13),%edx + leaq 256(%r13),%r13 + vpsrld $30,%ymm6,%ymm8 + vpslld $2,%ymm6,%ymm6 + leal (%rdx,%rax,1),%edx + rorxl $27,%esi,%r12d + rorxl $2,%esi,%eax + xorl %ebp,%esi + addl %r12d,%edx + xorl %ebx,%esi + vpor %ymm8,%ymm6,%ymm6 + addl -128(%r13),%ecx + leal (%rcx,%rsi,1),%ecx + rorxl $27,%edx,%r12d + rorxl $2,%edx,%esi + vpaddd %ymm11,%ymm6,%ymm9 + xorl %eax,%edx + addl %r12d,%ecx + xorl %ebp,%edx + addl -124(%r13),%ebx + vmovdqu %ymm9,448(%rsp) + leal (%rbx,%rdx,1),%ebx + rorxl $27,%ecx,%r12d + rorxl $2,%ecx,%edx + xorl %esi,%ecx + addl %r12d,%ebx + xorl %eax,%ecx + addl -120(%r13),%ebp + leal (%rcx,%rbp,1),%ebp + rorxl $27,%ebx,%r12d + rorxl $2,%ebx,%ecx + xorl %edx,%ebx + addl %r12d,%ebp + xorl %esi,%ebx + vpalignr $8,%ymm5,%ymm6,%ymm8 + vpxor %ymm3,%ymm7,%ymm7 + addl -116(%r13),%eax + leal (%rax,%rbx,1),%eax + vpxor %ymm0,%ymm7,%ymm7 + vmovdqu 32(%r11),%ymm11 + rorxl $27,%ebp,%r12d + rorxl $2,%ebp,%ebx + xorl %ecx,%ebp + vpxor %ymm8,%ymm7,%ymm7 + addl %r12d,%eax + xorl %edx,%ebp + addl -96(%r13),%esi + vpsrld $30,%ymm7,%ymm8 + vpslld $2,%ymm7,%ymm7 + leal (%rsi,%rbp,1),%esi + rorxl $27,%eax,%r12d + rorxl $2,%eax,%ebp + xorl %ebx,%eax + addl %r12d,%esi + xorl %ecx,%eax + vpor %ymm8,%ymm7,%ymm7 + addl -92(%r13),%edx + leal (%rdx,%rax,1),%edx + rorxl $27,%esi,%r12d + rorxl $2,%esi,%eax + vpaddd %ymm11,%ymm7,%ymm9 + xorl %ebp,%esi + addl %r12d,%edx + xorl %ebx,%esi + addl -88(%r13),%ecx + vmovdqu %ymm9,480(%rsp) + leal (%rcx,%rsi,1),%ecx + rorxl $27,%edx,%r12d + rorxl $2,%edx,%esi + xorl %eax,%edx + addl %r12d,%ecx + xorl %ebp,%edx + addl -84(%r13),%ebx + movl %esi,%edi + xorl %eax,%edi + leal (%rbx,%rdx,1),%ebx + rorxl $27,%ecx,%r12d + rorxl $2,%ecx,%edx + xorl %esi,%ecx + addl %r12d,%ebx + andl %edi,%ecx + jmp .Lalign32_2 +.align 32 +.Lalign32_2: + vpalignr $8,%ymm6,%ymm7,%ymm8 + vpxor %ymm4,%ymm0,%ymm0 + addl -64(%r13),%ebp + xorl %esi,%ecx + vpxor %ymm1,%ymm0,%ymm0 + movl %edx,%edi + xorl %esi,%edi + leal (%rcx,%rbp,1),%ebp + vpxor %ymm8,%ymm0,%ymm0 + rorxl $27,%ebx,%r12d + rorxl $2,%ebx,%ecx + xorl %edx,%ebx + vpsrld $30,%ymm0,%ymm8 + vpslld $2,%ymm0,%ymm0 + addl %r12d,%ebp + andl %edi,%ebx + addl -60(%r13),%eax + xorl %edx,%ebx + movl %ecx,%edi + xorl %edx,%edi + vpor %ymm8,%ymm0,%ymm0 + leal (%rax,%rbx,1),%eax + rorxl $27,%ebp,%r12d + rorxl $2,%ebp,%ebx + xorl %ecx,%ebp + vpaddd %ymm11,%ymm0,%ymm9 + addl %r12d,%eax + andl %edi,%ebp + addl -56(%r13),%esi + xorl %ecx,%ebp + vmovdqu %ymm9,512(%rsp) + movl %ebx,%edi + xorl %ecx,%edi + leal (%rsi,%rbp,1),%esi + rorxl $27,%eax,%r12d + rorxl $2,%eax,%ebp + xorl %ebx,%eax + addl %r12d,%esi + andl %edi,%eax + addl -52(%r13),%edx + xorl %ebx,%eax + movl %ebp,%edi + xorl %ebx,%edi + leal (%rdx,%rax,1),%edx + rorxl $27,%esi,%r12d + rorxl $2,%esi,%eax + xorl %ebp,%esi + addl %r12d,%edx + andl %edi,%esi + addl -32(%r13),%ecx + xorl %ebp,%esi + movl %eax,%edi + xorl %ebp,%edi + leal (%rcx,%rsi,1),%ecx + rorxl $27,%edx,%r12d + rorxl $2,%edx,%esi + xorl %eax,%edx + addl %r12d,%ecx + andl %edi,%edx + vpalignr $8,%ymm7,%ymm0,%ymm8 + vpxor %ymm5,%ymm1,%ymm1 + addl -28(%r13),%ebx + xorl %eax,%edx + vpxor %ymm2,%ymm1,%ymm1 + movl %esi,%edi + xorl %eax,%edi + leal (%rbx,%rdx,1),%ebx + vpxor %ymm8,%ymm1,%ymm1 + rorxl $27,%ecx,%r12d + rorxl $2,%ecx,%edx + xorl %esi,%ecx + vpsrld $30,%ymm1,%ymm8 + vpslld $2,%ymm1,%ymm1 + addl %r12d,%ebx + andl %edi,%ecx + addl -24(%r13),%ebp + xorl %esi,%ecx + movl %edx,%edi + xorl %esi,%edi + vpor %ymm8,%ymm1,%ymm1 + leal (%rcx,%rbp,1),%ebp + rorxl $27,%ebx,%r12d + rorxl $2,%ebx,%ecx + xorl %edx,%ebx + vpaddd %ymm11,%ymm1,%ymm9 + addl %r12d,%ebp + andl %edi,%ebx + addl -20(%r13),%eax + xorl %edx,%ebx + vmovdqu %ymm9,544(%rsp) + movl %ecx,%edi + xorl %edx,%edi + leal (%rax,%rbx,1),%eax + rorxl $27,%ebp,%r12d + rorxl $2,%ebp,%ebx + xorl %ecx,%ebp + addl %r12d,%eax + andl %edi,%ebp + addl 0(%r13),%esi + xorl %ecx,%ebp + movl %ebx,%edi + xorl %ecx,%edi + leal (%rsi,%rbp,1),%esi + rorxl $27,%eax,%r12d + rorxl $2,%eax,%ebp + xorl %ebx,%eax + addl %r12d,%esi + andl %edi,%eax + addl 4(%r13),%edx + xorl %ebx,%eax + movl %ebp,%edi + xorl %ebx,%edi + leal (%rdx,%rax,1),%edx + rorxl $27,%esi,%r12d + rorxl $2,%esi,%eax + xorl %ebp,%esi + addl %r12d,%edx + andl %edi,%esi + vpalignr $8,%ymm0,%ymm1,%ymm8 + vpxor %ymm6,%ymm2,%ymm2 + addl 8(%r13),%ecx + xorl %ebp,%esi + vpxor %ymm3,%ymm2,%ymm2 + movl %eax,%edi + xorl %ebp,%edi + leal (%rcx,%rsi,1),%ecx + vpxor %ymm8,%ymm2,%ymm2 + rorxl $27,%edx,%r12d + rorxl $2,%edx,%esi + xorl %eax,%edx + vpsrld $30,%ymm2,%ymm8 + vpslld $2,%ymm2,%ymm2 + addl %r12d,%ecx + andl %edi,%edx + addl 12(%r13),%ebx + xorl %eax,%edx + movl %esi,%edi + xorl %eax,%edi + vpor %ymm8,%ymm2,%ymm2 + leal (%rbx,%rdx,1),%ebx + rorxl $27,%ecx,%r12d + rorxl $2,%ecx,%edx + xorl %esi,%ecx + vpaddd %ymm11,%ymm2,%ymm9 + addl %r12d,%ebx + andl %edi,%ecx + addl 32(%r13),%ebp + xorl %esi,%ecx + vmovdqu %ymm9,576(%rsp) + movl %edx,%edi + xorl %esi,%edi + leal (%rcx,%rbp,1),%ebp + rorxl $27,%ebx,%r12d + rorxl $2,%ebx,%ecx + xorl %edx,%ebx + addl %r12d,%ebp + andl %edi,%ebx + addl 36(%r13),%eax + xorl %edx,%ebx + movl %ecx,%edi + xorl %edx,%edi + leal (%rax,%rbx,1),%eax + rorxl $27,%ebp,%r12d + rorxl $2,%ebp,%ebx + xorl %ecx,%ebp + addl %r12d,%eax + andl %edi,%ebp + addl 40(%r13),%esi + xorl %ecx,%ebp + movl %ebx,%edi + xorl %ecx,%edi + leal (%rsi,%rbp,1),%esi + rorxl $27,%eax,%r12d + rorxl $2,%eax,%ebp + xorl %ebx,%eax + addl %r12d,%esi + andl %edi,%eax + vpalignr $8,%ymm1,%ymm2,%ymm8 + vpxor %ymm7,%ymm3,%ymm3 + addl 44(%r13),%edx + xorl %ebx,%eax + vpxor %ymm4,%ymm3,%ymm3 + movl %ebp,%edi + xorl %ebx,%edi + leal (%rdx,%rax,1),%edx + vpxor %ymm8,%ymm3,%ymm3 + rorxl $27,%esi,%r12d + rorxl $2,%esi,%eax + xorl %ebp,%esi + vpsrld $30,%ymm3,%ymm8 + vpslld $2,%ymm3,%ymm3 + addl %r12d,%edx + andl %edi,%esi + addl 64(%r13),%ecx + xorl %ebp,%esi + movl %eax,%edi + xorl %ebp,%edi + vpor %ymm8,%ymm3,%ymm3 + leal (%rcx,%rsi,1),%ecx + rorxl $27,%edx,%r12d + rorxl $2,%edx,%esi + xorl %eax,%edx + vpaddd %ymm11,%ymm3,%ymm9 + addl %r12d,%ecx + andl %edi,%edx + addl 68(%r13),%ebx + xorl %eax,%edx + vmovdqu %ymm9,608(%rsp) + movl %esi,%edi + xorl %eax,%edi + leal (%rbx,%rdx,1),%ebx + rorxl $27,%ecx,%r12d + rorxl $2,%ecx,%edx + xorl %esi,%ecx + addl %r12d,%ebx + andl %edi,%ecx + addl 72(%r13),%ebp + xorl %esi,%ecx + movl %edx,%edi + xorl %esi,%edi + leal (%rcx,%rbp,1),%ebp + rorxl $27,%ebx,%r12d + rorxl $2,%ebx,%ecx + xorl %edx,%ebx + addl %r12d,%ebp + andl %edi,%ebx + addl 76(%r13),%eax + xorl %edx,%ebx + leal (%rax,%rbx,1),%eax + rorxl $27,%ebp,%r12d + rorxl $2,%ebp,%ebx + xorl %ecx,%ebp + addl %r12d,%eax + xorl %edx,%ebp + addl 96(%r13),%esi + leal (%rsi,%rbp,1),%esi + rorxl $27,%eax,%r12d + rorxl $2,%eax,%ebp + xorl %ebx,%eax + addl %r12d,%esi + xorl %ecx,%eax + addl 100(%r13),%edx + leal (%rdx,%rax,1),%edx + rorxl $27,%esi,%r12d + rorxl $2,%esi,%eax + xorl %ebp,%esi + addl %r12d,%edx + xorl %ebx,%esi + addl 104(%r13),%ecx + leal (%rcx,%rsi,1),%ecx + rorxl $27,%edx,%r12d + rorxl $2,%edx,%esi + xorl %eax,%edx + addl %r12d,%ecx + xorl %ebp,%edx + addl 108(%r13),%ebx + leaq 256(%r13),%r13 + leal (%rbx,%rdx,1),%ebx + rorxl $27,%ecx,%r12d + rorxl $2,%ecx,%edx + xorl %esi,%ecx + addl %r12d,%ebx + xorl %eax,%ecx + addl -128(%r13),%ebp + leal (%rcx,%rbp,1),%ebp + rorxl $27,%ebx,%r12d + rorxl $2,%ebx,%ecx + xorl %edx,%ebx + addl %r12d,%ebp + xorl %esi,%ebx + addl -124(%r13),%eax + leal (%rax,%rbx,1),%eax + rorxl $27,%ebp,%r12d + rorxl $2,%ebp,%ebx + xorl %ecx,%ebp + addl %r12d,%eax + xorl %edx,%ebp + addl -120(%r13),%esi + leal (%rsi,%rbp,1),%esi + rorxl $27,%eax,%r12d + rorxl $2,%eax,%ebp + xorl %ebx,%eax + addl %r12d,%esi + xorl %ecx,%eax + addl -116(%r13),%edx + leal (%rdx,%rax,1),%edx + rorxl $27,%esi,%r12d + rorxl $2,%esi,%eax + xorl %ebp,%esi + addl %r12d,%edx + xorl %ebx,%esi + addl -96(%r13),%ecx + leal (%rcx,%rsi,1),%ecx + rorxl $27,%edx,%r12d + rorxl $2,%edx,%esi + xorl %eax,%edx + addl %r12d,%ecx + xorl %ebp,%edx + addl -92(%r13),%ebx + leal (%rbx,%rdx,1),%ebx + rorxl $27,%ecx,%r12d + rorxl $2,%ecx,%edx + xorl %esi,%ecx + addl %r12d,%ebx + xorl %eax,%ecx + addl -88(%r13),%ebp + leal (%rcx,%rbp,1),%ebp + rorxl $27,%ebx,%r12d + rorxl $2,%ebx,%ecx + xorl %edx,%ebx + addl %r12d,%ebp + xorl %esi,%ebx + addl -84(%r13),%eax + leal (%rax,%rbx,1),%eax + rorxl $27,%ebp,%r12d + rorxl $2,%ebp,%ebx + xorl %ecx,%ebp + addl %r12d,%eax + xorl %edx,%ebp + addl -64(%r13),%esi + leal (%rsi,%rbp,1),%esi + rorxl $27,%eax,%r12d + rorxl $2,%eax,%ebp + xorl %ebx,%eax + addl %r12d,%esi + xorl %ecx,%eax + addl -60(%r13),%edx + leal (%rdx,%rax,1),%edx + rorxl $27,%esi,%r12d + rorxl $2,%esi,%eax + xorl %ebp,%esi + addl %r12d,%edx + xorl %ebx,%esi + addl -56(%r13),%ecx + leal (%rcx,%rsi,1),%ecx + rorxl $27,%edx,%r12d + rorxl $2,%edx,%esi + xorl %eax,%edx + addl %r12d,%ecx + xorl %ebp,%edx + addl -52(%r13),%ebx + leal (%rbx,%rdx,1),%ebx + rorxl $27,%ecx,%r12d + rorxl $2,%ecx,%edx + xorl %esi,%ecx + addl %r12d,%ebx + xorl %eax,%ecx + addl -32(%r13),%ebp + leal (%rcx,%rbp,1),%ebp + rorxl $27,%ebx,%r12d + rorxl $2,%ebx,%ecx + xorl %edx,%ebx + addl %r12d,%ebp + xorl %esi,%ebx + addl -28(%r13),%eax + leal (%rax,%rbx,1),%eax + rorxl $27,%ebp,%r12d + rorxl $2,%ebp,%ebx + xorl %ecx,%ebp + addl %r12d,%eax + xorl %edx,%ebp + addl -24(%r13),%esi + leal (%rsi,%rbp,1),%esi + rorxl $27,%eax,%r12d + rorxl $2,%eax,%ebp + xorl %ebx,%eax + addl %r12d,%esi + xorl %ecx,%eax + addl -20(%r13),%edx + leal (%rdx,%rax,1),%edx + rorxl $27,%esi,%r12d + addl %r12d,%edx + leaq 128(%r9),%r13 + leaq 128(%r9),%rdi + cmpq %r10,%r13 + cmovaeq %r9,%r13 + + + addl 0(%r8),%edx + addl 4(%r8),%esi + addl 8(%r8),%ebp + movl %edx,0(%r8) + addl 12(%r8),%ebx + movl %esi,4(%r8) + movl %edx,%eax + addl 16(%r8),%ecx + movl %ebp,%r12d + movl %ebp,8(%r8) + movl %ebx,%edx + + movl %ebx,12(%r8) + movl %esi,%ebp + movl %ecx,16(%r8) + + movl %ecx,%esi + movl %r12d,%ecx + + + cmpq %r10,%r9 + je .Ldone_avx2 + vmovdqu 64(%r11),%ymm6 + cmpq %r10,%rdi + ja .Last_avx2 + + vmovdqu -64(%rdi),%xmm0 + vmovdqu -48(%rdi),%xmm1 + vmovdqu -32(%rdi),%xmm2 + vmovdqu -16(%rdi),%xmm3 + vinserti128 $1,0(%r13),%ymm0,%ymm0 + vinserti128 $1,16(%r13),%ymm1,%ymm1 + vinserti128 $1,32(%r13),%ymm2,%ymm2 + vinserti128 $1,48(%r13),%ymm3,%ymm3 + jmp .Last_avx2 + +.align 32 +.Last_avx2: + leaq 128+16(%rsp),%r13 + rorxl $2,%ebp,%ebx + andnl %edx,%ebp,%edi + andl %ecx,%ebp + xorl %edi,%ebp + subq $-128,%r9 + addl -128(%r13),%esi + andnl %ecx,%eax,%edi + addl %ebp,%esi + rorxl $27,%eax,%r12d + rorxl $2,%eax,%ebp + andl %ebx,%eax + addl %r12d,%esi + xorl %edi,%eax + addl -124(%r13),%edx + andnl %ebx,%esi,%edi + addl %eax,%edx + rorxl $27,%esi,%r12d + rorxl $2,%esi,%eax + andl %ebp,%esi + addl %r12d,%edx + xorl %edi,%esi + addl -120(%r13),%ecx + andnl %ebp,%edx,%edi + addl %esi,%ecx + rorxl $27,%edx,%r12d + rorxl $2,%edx,%esi + andl %eax,%edx + addl %r12d,%ecx + xorl %edi,%edx + addl -116(%r13),%ebx + andnl %eax,%ecx,%edi + addl %edx,%ebx + rorxl $27,%ecx,%r12d + rorxl $2,%ecx,%edx + andl %esi,%ecx + addl %r12d,%ebx + xorl %edi,%ecx + addl -96(%r13),%ebp + andnl %esi,%ebx,%edi + addl %ecx,%ebp + rorxl $27,%ebx,%r12d + rorxl $2,%ebx,%ecx + andl %edx,%ebx + addl %r12d,%ebp + xorl %edi,%ebx + addl -92(%r13),%eax + andnl %edx,%ebp,%edi + addl %ebx,%eax + rorxl $27,%ebp,%r12d + rorxl $2,%ebp,%ebx + andl %ecx,%ebp + addl %r12d,%eax + xorl %edi,%ebp + addl -88(%r13),%esi + andnl %ecx,%eax,%edi + addl %ebp,%esi + rorxl $27,%eax,%r12d + rorxl $2,%eax,%ebp + andl %ebx,%eax + addl %r12d,%esi + xorl %edi,%eax + addl -84(%r13),%edx + andnl %ebx,%esi,%edi + addl %eax,%edx + rorxl $27,%esi,%r12d + rorxl $2,%esi,%eax + andl %ebp,%esi + addl %r12d,%edx + xorl %edi,%esi + addl -64(%r13),%ecx + andnl %ebp,%edx,%edi + addl %esi,%ecx + rorxl $27,%edx,%r12d + rorxl $2,%edx,%esi + andl %eax,%edx + addl %r12d,%ecx + xorl %edi,%edx + addl -60(%r13),%ebx + andnl %eax,%ecx,%edi + addl %edx,%ebx + rorxl $27,%ecx,%r12d + rorxl $2,%ecx,%edx + andl %esi,%ecx + addl %r12d,%ebx + xorl %edi,%ecx + addl -56(%r13),%ebp + andnl %esi,%ebx,%edi + addl %ecx,%ebp + rorxl $27,%ebx,%r12d + rorxl $2,%ebx,%ecx + andl %edx,%ebx + addl %r12d,%ebp + xorl %edi,%ebx + addl -52(%r13),%eax + andnl %edx,%ebp,%edi + addl %ebx,%eax + rorxl $27,%ebp,%r12d + rorxl $2,%ebp,%ebx + andl %ecx,%ebp + addl %r12d,%eax + xorl %edi,%ebp + addl -32(%r13),%esi + andnl %ecx,%eax,%edi + addl %ebp,%esi + rorxl $27,%eax,%r12d + rorxl $2,%eax,%ebp + andl %ebx,%eax + addl %r12d,%esi + xorl %edi,%eax + addl -28(%r13),%edx + andnl %ebx,%esi,%edi + addl %eax,%edx + rorxl $27,%esi,%r12d + rorxl $2,%esi,%eax + andl %ebp,%esi + addl %r12d,%edx + xorl %edi,%esi + addl -24(%r13),%ecx + andnl %ebp,%edx,%edi + addl %esi,%ecx + rorxl $27,%edx,%r12d + rorxl $2,%edx,%esi + andl %eax,%edx + addl %r12d,%ecx + xorl %edi,%edx + addl -20(%r13),%ebx + andnl %eax,%ecx,%edi + addl %edx,%ebx + rorxl $27,%ecx,%r12d + rorxl $2,%ecx,%edx + andl %esi,%ecx + addl %r12d,%ebx + xorl %edi,%ecx + addl 0(%r13),%ebp + andnl %esi,%ebx,%edi + addl %ecx,%ebp + rorxl $27,%ebx,%r12d + rorxl $2,%ebx,%ecx + andl %edx,%ebx + addl %r12d,%ebp + xorl %edi,%ebx + addl 4(%r13),%eax + andnl %edx,%ebp,%edi + addl %ebx,%eax + rorxl $27,%ebp,%r12d + rorxl $2,%ebp,%ebx + andl %ecx,%ebp + addl %r12d,%eax + xorl %edi,%ebp + addl 8(%r13),%esi + andnl %ecx,%eax,%edi + addl %ebp,%esi + rorxl $27,%eax,%r12d + rorxl $2,%eax,%ebp + andl %ebx,%eax + addl %r12d,%esi + xorl %edi,%eax + addl 12(%r13),%edx + leal (%rdx,%rax,1),%edx + rorxl $27,%esi,%r12d + rorxl $2,%esi,%eax + xorl %ebp,%esi + addl %r12d,%edx + xorl %ebx,%esi + addl 32(%r13),%ecx + leal (%rcx,%rsi,1),%ecx + rorxl $27,%edx,%r12d + rorxl $2,%edx,%esi + xorl %eax,%edx + addl %r12d,%ecx + xorl %ebp,%edx + addl 36(%r13),%ebx + leal (%rbx,%rdx,1),%ebx + rorxl $27,%ecx,%r12d + rorxl $2,%ecx,%edx + xorl %esi,%ecx + addl %r12d,%ebx + xorl %eax,%ecx + addl 40(%r13),%ebp + leal (%rcx,%rbp,1),%ebp + rorxl $27,%ebx,%r12d + rorxl $2,%ebx,%ecx + xorl %edx,%ebx + addl %r12d,%ebp + xorl %esi,%ebx + addl 44(%r13),%eax + leal (%rax,%rbx,1),%eax + rorxl $27,%ebp,%r12d + rorxl $2,%ebp,%ebx + xorl %ecx,%ebp + addl %r12d,%eax + xorl %edx,%ebp + addl 64(%r13),%esi + leal (%rsi,%rbp,1),%esi + rorxl $27,%eax,%r12d + rorxl $2,%eax,%ebp + xorl %ebx,%eax + addl %r12d,%esi + xorl %ecx,%eax + vmovdqu -64(%r11),%ymm11 + vpshufb %ymm6,%ymm0,%ymm0 + addl 68(%r13),%edx + leal (%rdx,%rax,1),%edx + rorxl $27,%esi,%r12d + rorxl $2,%esi,%eax + xorl %ebp,%esi + addl %r12d,%edx + xorl %ebx,%esi + addl 72(%r13),%ecx + leal (%rcx,%rsi,1),%ecx + rorxl $27,%edx,%r12d + rorxl $2,%edx,%esi + xorl %eax,%edx + addl %r12d,%ecx + xorl %ebp,%edx + addl 76(%r13),%ebx + leal (%rbx,%rdx,1),%ebx + rorxl $27,%ecx,%r12d + rorxl $2,%ecx,%edx + xorl %esi,%ecx + addl %r12d,%ebx + xorl %eax,%ecx + addl 96(%r13),%ebp + leal (%rcx,%rbp,1),%ebp + rorxl $27,%ebx,%r12d + rorxl $2,%ebx,%ecx + xorl %edx,%ebx + addl %r12d,%ebp + xorl %esi,%ebx + addl 100(%r13),%eax + leal (%rax,%rbx,1),%eax + rorxl $27,%ebp,%r12d + rorxl $2,%ebp,%ebx + xorl %ecx,%ebp + addl %r12d,%eax + xorl %edx,%ebp + vpshufb %ymm6,%ymm1,%ymm1 + vpaddd %ymm11,%ymm0,%ymm8 + addl 104(%r13),%esi + leal (%rsi,%rbp,1),%esi + rorxl $27,%eax,%r12d + rorxl $2,%eax,%ebp + xorl %ebx,%eax + addl %r12d,%esi + xorl %ecx,%eax + addl 108(%r13),%edx + leaq 256(%r13),%r13 + leal (%rdx,%rax,1),%edx + rorxl $27,%esi,%r12d + rorxl $2,%esi,%eax + xorl %ebp,%esi + addl %r12d,%edx + xorl %ebx,%esi + addl -128(%r13),%ecx + leal (%rcx,%rsi,1),%ecx + rorxl $27,%edx,%r12d + rorxl $2,%edx,%esi + xorl %eax,%edx + addl %r12d,%ecx + xorl %ebp,%edx + addl -124(%r13),%ebx + leal (%rbx,%rdx,1),%ebx + rorxl $27,%ecx,%r12d + rorxl $2,%ecx,%edx + xorl %esi,%ecx + addl %r12d,%ebx + xorl %eax,%ecx + addl -120(%r13),%ebp + leal (%rcx,%rbp,1),%ebp + rorxl $27,%ebx,%r12d + rorxl $2,%ebx,%ecx + xorl %edx,%ebx + addl %r12d,%ebp + xorl %esi,%ebx + vmovdqu %ymm8,0(%rsp) + vpshufb %ymm6,%ymm2,%ymm2 + vpaddd %ymm11,%ymm1,%ymm9 + addl -116(%r13),%eax + leal (%rax,%rbx,1),%eax + rorxl $27,%ebp,%r12d + rorxl $2,%ebp,%ebx + xorl %ecx,%ebp + addl %r12d,%eax + xorl %edx,%ebp + addl -96(%r13),%esi + leal (%rsi,%rbp,1),%esi + rorxl $27,%eax,%r12d + rorxl $2,%eax,%ebp + xorl %ebx,%eax + addl %r12d,%esi + xorl %ecx,%eax + addl -92(%r13),%edx + leal (%rdx,%rax,1),%edx + rorxl $27,%esi,%r12d + rorxl $2,%esi,%eax + xorl %ebp,%esi + addl %r12d,%edx + xorl %ebx,%esi + addl -88(%r13),%ecx + leal (%rcx,%rsi,1),%ecx + rorxl $27,%edx,%r12d + rorxl $2,%edx,%esi + xorl %eax,%edx + addl %r12d,%ecx + xorl %ebp,%edx + addl -84(%r13),%ebx + movl %esi,%edi + xorl %eax,%edi + leal (%rbx,%rdx,1),%ebx + rorxl $27,%ecx,%r12d + rorxl $2,%ecx,%edx + xorl %esi,%ecx + addl %r12d,%ebx + andl %edi,%ecx + vmovdqu %ymm9,32(%rsp) + vpshufb %ymm6,%ymm3,%ymm3 + vpaddd %ymm11,%ymm2,%ymm6 + addl -64(%r13),%ebp + xorl %esi,%ecx + movl %edx,%edi + xorl %esi,%edi + leal (%rcx,%rbp,1),%ebp + rorxl $27,%ebx,%r12d + rorxl $2,%ebx,%ecx + xorl %edx,%ebx + addl %r12d,%ebp + andl %edi,%ebx + addl -60(%r13),%eax + xorl %edx,%ebx + movl %ecx,%edi + xorl %edx,%edi + leal (%rax,%rbx,1),%eax + rorxl $27,%ebp,%r12d + rorxl $2,%ebp,%ebx + xorl %ecx,%ebp + addl %r12d,%eax + andl %edi,%ebp + addl -56(%r13),%esi + xorl %ecx,%ebp + movl %ebx,%edi + xorl %ecx,%edi + leal (%rsi,%rbp,1),%esi + rorxl $27,%eax,%r12d + rorxl $2,%eax,%ebp + xorl %ebx,%eax + addl %r12d,%esi + andl %edi,%eax + addl -52(%r13),%edx + xorl %ebx,%eax + movl %ebp,%edi + xorl %ebx,%edi + leal (%rdx,%rax,1),%edx + rorxl $27,%esi,%r12d + rorxl $2,%esi,%eax + xorl %ebp,%esi + addl %r12d,%edx + andl %edi,%esi + addl -32(%r13),%ecx + xorl %ebp,%esi + movl %eax,%edi + xorl %ebp,%edi + leal (%rcx,%rsi,1),%ecx + rorxl $27,%edx,%r12d + rorxl $2,%edx,%esi + xorl %eax,%edx + addl %r12d,%ecx + andl %edi,%edx + jmp .Lalign32_3 +.align 32 +.Lalign32_3: + vmovdqu %ymm6,64(%rsp) + vpaddd %ymm11,%ymm3,%ymm7 + addl -28(%r13),%ebx + xorl %eax,%edx + movl %esi,%edi + xorl %eax,%edi + leal (%rbx,%rdx,1),%ebx + rorxl $27,%ecx,%r12d + rorxl $2,%ecx,%edx + xorl %esi,%ecx + addl %r12d,%ebx + andl %edi,%ecx + addl -24(%r13),%ebp + xorl %esi,%ecx + movl %edx,%edi + xorl %esi,%edi + leal (%rcx,%rbp,1),%ebp + rorxl $27,%ebx,%r12d + rorxl $2,%ebx,%ecx + xorl %edx,%ebx + addl %r12d,%ebp + andl %edi,%ebx + addl -20(%r13),%eax + xorl %edx,%ebx + movl %ecx,%edi + xorl %edx,%edi + leal (%rax,%rbx,1),%eax + rorxl $27,%ebp,%r12d + rorxl $2,%ebp,%ebx + xorl %ecx,%ebp + addl %r12d,%eax + andl %edi,%ebp + addl 0(%r13),%esi + xorl %ecx,%ebp + movl %ebx,%edi + xorl %ecx,%edi + leal (%rsi,%rbp,1),%esi + rorxl $27,%eax,%r12d + rorxl $2,%eax,%ebp + xorl %ebx,%eax + addl %r12d,%esi + andl %edi,%eax + addl 4(%r13),%edx + xorl %ebx,%eax + movl %ebp,%edi + xorl %ebx,%edi + leal (%rdx,%rax,1),%edx + rorxl $27,%esi,%r12d + rorxl $2,%esi,%eax + xorl %ebp,%esi + addl %r12d,%edx + andl %edi,%esi + vmovdqu %ymm7,96(%rsp) + addl 8(%r13),%ecx + xorl %ebp,%esi + movl %eax,%edi + xorl %ebp,%edi + leal (%rcx,%rsi,1),%ecx + rorxl $27,%edx,%r12d + rorxl $2,%edx,%esi + xorl %eax,%edx + addl %r12d,%ecx + andl %edi,%edx + addl 12(%r13),%ebx + xorl %eax,%edx + movl %esi,%edi + xorl %eax,%edi + leal (%rbx,%rdx,1),%ebx + rorxl $27,%ecx,%r12d + rorxl $2,%ecx,%edx + xorl %esi,%ecx + addl %r12d,%ebx + andl %edi,%ecx + addl 32(%r13),%ebp + xorl %esi,%ecx + movl %edx,%edi + xorl %esi,%edi + leal (%rcx,%rbp,1),%ebp + rorxl $27,%ebx,%r12d + rorxl $2,%ebx,%ecx + xorl %edx,%ebx + addl %r12d,%ebp + andl %edi,%ebx + addl 36(%r13),%eax + xorl %edx,%ebx + movl %ecx,%edi + xorl %edx,%edi + leal (%rax,%rbx,1),%eax + rorxl $27,%ebp,%r12d + rorxl $2,%ebp,%ebx + xorl %ecx,%ebp + addl %r12d,%eax + andl %edi,%ebp + addl 40(%r13),%esi + xorl %ecx,%ebp + movl %ebx,%edi + xorl %ecx,%edi + leal (%rsi,%rbp,1),%esi + rorxl $27,%eax,%r12d + rorxl $2,%eax,%ebp + xorl %ebx,%eax + addl %r12d,%esi + andl %edi,%eax + vpalignr $8,%ymm0,%ymm1,%ymm4 + addl 44(%r13),%edx + xorl %ebx,%eax + movl %ebp,%edi + xorl %ebx,%edi + vpsrldq $4,%ymm3,%ymm8 + leal (%rdx,%rax,1),%edx + rorxl $27,%esi,%r12d + rorxl $2,%esi,%eax + vpxor %ymm0,%ymm4,%ymm4 + vpxor %ymm2,%ymm8,%ymm8 + xorl %ebp,%esi + addl %r12d,%edx + vpxor %ymm8,%ymm4,%ymm4 + andl %edi,%esi + addl 64(%r13),%ecx + xorl %ebp,%esi + movl %eax,%edi + vpsrld $31,%ymm4,%ymm8 + xorl %ebp,%edi + leal (%rcx,%rsi,1),%ecx + rorxl $27,%edx,%r12d + vpslldq $12,%ymm4,%ymm10 + vpaddd %ymm4,%ymm4,%ymm4 + rorxl $2,%edx,%esi + xorl %eax,%edx + vpsrld $30,%ymm10,%ymm9 + vpor %ymm8,%ymm4,%ymm4 + addl %r12d,%ecx + andl %edi,%edx + vpslld $2,%ymm10,%ymm10 + vpxor %ymm9,%ymm4,%ymm4 + addl 68(%r13),%ebx + xorl %eax,%edx + vpxor %ymm10,%ymm4,%ymm4 + movl %esi,%edi + xorl %eax,%edi + leal (%rbx,%rdx,1),%ebx + vpaddd %ymm11,%ymm4,%ymm9 + rorxl $27,%ecx,%r12d + rorxl $2,%ecx,%edx + xorl %esi,%ecx + vmovdqu %ymm9,128(%rsp) + addl %r12d,%ebx + andl %edi,%ecx + addl 72(%r13),%ebp + xorl %esi,%ecx + movl %edx,%edi + xorl %esi,%edi + leal (%rcx,%rbp,1),%ebp + rorxl $27,%ebx,%r12d + rorxl $2,%ebx,%ecx + xorl %edx,%ebx + addl %r12d,%ebp + andl %edi,%ebx + addl 76(%r13),%eax + xorl %edx,%ebx + leal (%rax,%rbx,1),%eax + rorxl $27,%ebp,%r12d + rorxl $2,%ebp,%ebx + xorl %ecx,%ebp + addl %r12d,%eax + xorl %edx,%ebp + vpalignr $8,%ymm1,%ymm2,%ymm5 + addl 96(%r13),%esi + leal (%rsi,%rbp,1),%esi + rorxl $27,%eax,%r12d + rorxl $2,%eax,%ebp + vpsrldq $4,%ymm4,%ymm8 + xorl %ebx,%eax + addl %r12d,%esi + xorl %ecx,%eax + vpxor %ymm1,%ymm5,%ymm5 + vpxor %ymm3,%ymm8,%ymm8 + addl 100(%r13),%edx + leal (%rdx,%rax,1),%edx + vpxor %ymm8,%ymm5,%ymm5 + rorxl $27,%esi,%r12d + rorxl $2,%esi,%eax + xorl %ebp,%esi + addl %r12d,%edx + vpsrld $31,%ymm5,%ymm8 + vmovdqu -32(%r11),%ymm11 + xorl %ebx,%esi + addl 104(%r13),%ecx + leal (%rcx,%rsi,1),%ecx + vpslldq $12,%ymm5,%ymm10 + vpaddd %ymm5,%ymm5,%ymm5 + rorxl $27,%edx,%r12d + rorxl $2,%edx,%esi + vpsrld $30,%ymm10,%ymm9 + vpor %ymm8,%ymm5,%ymm5 + xorl %eax,%edx + addl %r12d,%ecx + vpslld $2,%ymm10,%ymm10 + vpxor %ymm9,%ymm5,%ymm5 + xorl %ebp,%edx + addl 108(%r13),%ebx + leaq 256(%r13),%r13 + vpxor %ymm10,%ymm5,%ymm5 + leal (%rbx,%rdx,1),%ebx + rorxl $27,%ecx,%r12d + rorxl $2,%ecx,%edx + vpaddd %ymm11,%ymm5,%ymm9 + xorl %esi,%ecx + addl %r12d,%ebx + xorl %eax,%ecx + vmovdqu %ymm9,160(%rsp) + addl -128(%r13),%ebp + leal (%rcx,%rbp,1),%ebp + rorxl $27,%ebx,%r12d + rorxl $2,%ebx,%ecx + xorl %edx,%ebx + addl %r12d,%ebp + xorl %esi,%ebx + vpalignr $8,%ymm2,%ymm3,%ymm6 + addl -124(%r13),%eax + leal (%rax,%rbx,1),%eax + rorxl $27,%ebp,%r12d + rorxl $2,%ebp,%ebx + vpsrldq $4,%ymm5,%ymm8 + xorl %ecx,%ebp + addl %r12d,%eax + xorl %edx,%ebp + vpxor %ymm2,%ymm6,%ymm6 + vpxor %ymm4,%ymm8,%ymm8 + addl -120(%r13),%esi + leal (%rsi,%rbp,1),%esi + vpxor %ymm8,%ymm6,%ymm6 + rorxl $27,%eax,%r12d + rorxl $2,%eax,%ebp + xorl %ebx,%eax + addl %r12d,%esi + vpsrld $31,%ymm6,%ymm8 + xorl %ecx,%eax + addl -116(%r13),%edx + leal (%rdx,%rax,1),%edx + vpslldq $12,%ymm6,%ymm10 + vpaddd %ymm6,%ymm6,%ymm6 + rorxl $27,%esi,%r12d + rorxl $2,%esi,%eax + vpsrld $30,%ymm10,%ymm9 + vpor %ymm8,%ymm6,%ymm6 + xorl %ebp,%esi + addl %r12d,%edx + vpslld $2,%ymm10,%ymm10 + vpxor %ymm9,%ymm6,%ymm6 + xorl %ebx,%esi + addl -96(%r13),%ecx + vpxor %ymm10,%ymm6,%ymm6 + leal (%rcx,%rsi,1),%ecx + rorxl $27,%edx,%r12d + rorxl $2,%edx,%esi + vpaddd %ymm11,%ymm6,%ymm9 + xorl %eax,%edx + addl %r12d,%ecx + xorl %ebp,%edx + vmovdqu %ymm9,192(%rsp) + addl -92(%r13),%ebx + leal (%rbx,%rdx,1),%ebx + rorxl $27,%ecx,%r12d + rorxl $2,%ecx,%edx + xorl %esi,%ecx + addl %r12d,%ebx + xorl %eax,%ecx + vpalignr $8,%ymm3,%ymm4,%ymm7 + addl -88(%r13),%ebp + leal (%rcx,%rbp,1),%ebp + rorxl $27,%ebx,%r12d + rorxl $2,%ebx,%ecx + vpsrldq $4,%ymm6,%ymm8 + xorl %edx,%ebx + addl %r12d,%ebp + xorl %esi,%ebx + vpxor %ymm3,%ymm7,%ymm7 + vpxor %ymm5,%ymm8,%ymm8 + addl -84(%r13),%eax + leal (%rax,%rbx,1),%eax + vpxor %ymm8,%ymm7,%ymm7 + rorxl $27,%ebp,%r12d + rorxl $2,%ebp,%ebx + xorl %ecx,%ebp + addl %r12d,%eax + vpsrld $31,%ymm7,%ymm8 + xorl %edx,%ebp + addl -64(%r13),%esi + leal (%rsi,%rbp,1),%esi + vpslldq $12,%ymm7,%ymm10 + vpaddd %ymm7,%ymm7,%ymm7 + rorxl $27,%eax,%r12d + rorxl $2,%eax,%ebp + vpsrld $30,%ymm10,%ymm9 + vpor %ymm8,%ymm7,%ymm7 + xorl %ebx,%eax + addl %r12d,%esi + vpslld $2,%ymm10,%ymm10 + vpxor %ymm9,%ymm7,%ymm7 + xorl %ecx,%eax + addl -60(%r13),%edx + vpxor %ymm10,%ymm7,%ymm7 + leal (%rdx,%rax,1),%edx + rorxl $27,%esi,%r12d + rorxl $2,%esi,%eax + vpaddd %ymm11,%ymm7,%ymm9 + xorl %ebp,%esi + addl %r12d,%edx + xorl %ebx,%esi + vmovdqu %ymm9,224(%rsp) + addl -56(%r13),%ecx + leal (%rcx,%rsi,1),%ecx + rorxl $27,%edx,%r12d + rorxl $2,%edx,%esi + xorl %eax,%edx + addl %r12d,%ecx + xorl %ebp,%edx + addl -52(%r13),%ebx + leal (%rbx,%rdx,1),%ebx + rorxl $27,%ecx,%r12d + rorxl $2,%ecx,%edx + xorl %esi,%ecx + addl %r12d,%ebx + xorl %eax,%ecx + addl -32(%r13),%ebp + leal (%rcx,%rbp,1),%ebp + rorxl $27,%ebx,%r12d + rorxl $2,%ebx,%ecx + xorl %edx,%ebx + addl %r12d,%ebp + xorl %esi,%ebx + addl -28(%r13),%eax + leal (%rax,%rbx,1),%eax + rorxl $27,%ebp,%r12d + rorxl $2,%ebp,%ebx + xorl %ecx,%ebp + addl %r12d,%eax + xorl %edx,%ebp + addl -24(%r13),%esi + leal (%rsi,%rbp,1),%esi + rorxl $27,%eax,%r12d + rorxl $2,%eax,%ebp + xorl %ebx,%eax + addl %r12d,%esi + xorl %ecx,%eax + addl -20(%r13),%edx + leal (%rdx,%rax,1),%edx + rorxl $27,%esi,%r12d + addl %r12d,%edx + leaq 128(%rsp),%r13 + + + addl 0(%r8),%edx + addl 4(%r8),%esi + addl 8(%r8),%ebp + movl %edx,0(%r8) + addl 12(%r8),%ebx + movl %esi,4(%r8) + movl %edx,%eax + addl 16(%r8),%ecx + movl %ebp,%r12d + movl %ebp,8(%r8) + movl %ebx,%edx + + movl %ebx,12(%r8) + movl %esi,%ebp + movl %ecx,16(%r8) + + movl %ecx,%esi + movl %r12d,%ecx + + + cmpq %r10,%r9 + jbe .Loop_avx2 + +.Ldone_avx2: + vzeroupper + leaq (%r14),%rsi + movq -40(%rsi),%r14 + movq -32(%rsi),%r13 + movq -24(%rsi),%r12 + movq -16(%rsi),%rbp + movq -8(%rsi),%rbx + leaq (%rsi),%rsp +.Lepilogue_avx2: + .byte 0xf3,0xc3 +.size sha1_block_data_order_avx2,.-sha1_block_data_order_avx2 +.align 64 +K_XX_XX: +.long 0x5a827999,0x5a827999,0x5a827999,0x5a827999 +.long 0x5a827999,0x5a827999,0x5a827999,0x5a827999 +.long 0x6ed9eba1,0x6ed9eba1,0x6ed9eba1,0x6ed9eba1 +.long 0x6ed9eba1,0x6ed9eba1,0x6ed9eba1,0x6ed9eba1 +.long 0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc +.long 0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc +.long 0xca62c1d6,0xca62c1d6,0xca62c1d6,0xca62c1d6 +.long 0xca62c1d6,0xca62c1d6,0xca62c1d6,0xca62c1d6 +.long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f +.long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f +.byte 0xf,0xe,0xd,0xc,0xb,0xa,0x9,0x8,0x7,0x6,0x5,0x4,0x3,0x2,0x1,0x0 .byte 83,72,65,49,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 .align 64 diff --git a/deps/openssl/asm/x64-elf-gas/sha/sha256-mb-x86_64.s b/deps/openssl/asm/x64-elf-gas/sha/sha256-mb-x86_64.s new file mode 100644 index 00000000000000..0c06094084e48a --- /dev/null +++ b/deps/openssl/asm/x64-elf-gas/sha/sha256-mb-x86_64.s @@ -0,0 +1,7902 @@ +.text + + + +.globl sha256_multi_block +.type sha256_multi_block,@function +.align 32 +sha256_multi_block: + movq OPENSSL_ia32cap_P+4(%rip),%rcx + btq $61,%rcx + jc _shaext_shortcut + testl $268435456,%ecx + jnz _avx_shortcut + movq %rsp,%rax + pushq %rbx + pushq %rbp + subq $288,%rsp + andq $-256,%rsp + movq %rax,272(%rsp) +.Lbody: + leaq K256+128(%rip),%rbp + leaq 256(%rsp),%rbx + leaq 128(%rdi),%rdi + +.Loop_grande: + movl %edx,280(%rsp) + xorl %edx,%edx + movq 0(%rsi),%r8 + movl 8(%rsi),%ecx + cmpl %edx,%ecx + cmovgl %ecx,%edx + testl %ecx,%ecx + movl %ecx,0(%rbx) + cmovleq %rbp,%r8 + movq 16(%rsi),%r9 + movl 24(%rsi),%ecx + cmpl %edx,%ecx + cmovgl %ecx,%edx + testl %ecx,%ecx + movl %ecx,4(%rbx) + cmovleq %rbp,%r9 + movq 32(%rsi),%r10 + movl 40(%rsi),%ecx + cmpl %edx,%ecx + cmovgl %ecx,%edx + testl %ecx,%ecx + movl %ecx,8(%rbx) + cmovleq %rbp,%r10 + movq 48(%rsi),%r11 + movl 56(%rsi),%ecx + cmpl %edx,%ecx + cmovgl %ecx,%edx + testl %ecx,%ecx + movl %ecx,12(%rbx) + cmovleq %rbp,%r11 + testl %edx,%edx + jz .Ldone + + movdqu 0-128(%rdi),%xmm8 + leaq 128(%rsp),%rax + movdqu 32-128(%rdi),%xmm9 + movdqu 64-128(%rdi),%xmm10 + movdqu 96-128(%rdi),%xmm11 + movdqu 128-128(%rdi),%xmm12 + movdqu 160-128(%rdi),%xmm13 + movdqu 192-128(%rdi),%xmm14 + movdqu 224-128(%rdi),%xmm15 + movdqu .Lpbswap(%rip),%xmm6 + jmp .Loop + +.align 32 +.Loop: + movdqa %xmm10,%xmm4 + pxor %xmm9,%xmm4 + movd 0(%r8),%xmm5 + movd 0(%r9),%xmm0 + movd 0(%r10),%xmm1 + movd 0(%r11),%xmm2 + punpckldq %xmm1,%xmm5 + punpckldq %xmm2,%xmm0 + punpckldq %xmm0,%xmm5 + movdqa %xmm12,%xmm7 +.byte 102,15,56,0,238 + movdqa %xmm12,%xmm2 + + psrld $6,%xmm7 + movdqa %xmm12,%xmm1 + pslld $7,%xmm2 + movdqa %xmm5,0-128(%rax) + paddd %xmm15,%xmm5 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd -128(%rbp),%xmm5 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm12,%xmm0 + + pxor %xmm2,%xmm7 + movdqa %xmm12,%xmm3 + pslld $26-21,%xmm2 + pandn %xmm14,%xmm0 + pand %xmm13,%xmm3 + pxor %xmm1,%xmm7 + + + movdqa %xmm8,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm8,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm3,%xmm0 + movdqa %xmm9,%xmm3 + movdqa %xmm8,%xmm7 + pslld $10,%xmm2 + pxor %xmm8,%xmm3 + + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm5 + pslld $19-10,%xmm2 + pand %xmm3,%xmm4 + pxor %xmm7,%xmm1 + + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm9,%xmm15 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm4,%xmm15 + paddd %xmm5,%xmm11 + pxor %xmm2,%xmm7 + + paddd %xmm5,%xmm15 + paddd %xmm7,%xmm15 + movd 4(%r8),%xmm5 + movd 4(%r9),%xmm0 + movd 4(%r10),%xmm1 + movd 4(%r11),%xmm2 + punpckldq %xmm1,%xmm5 + punpckldq %xmm2,%xmm0 + punpckldq %xmm0,%xmm5 + movdqa %xmm11,%xmm7 + + movdqa %xmm11,%xmm2 +.byte 102,15,56,0,238 + psrld $6,%xmm7 + movdqa %xmm11,%xmm1 + pslld $7,%xmm2 + movdqa %xmm5,16-128(%rax) + paddd %xmm14,%xmm5 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd -96(%rbp),%xmm5 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm11,%xmm0 + + pxor %xmm2,%xmm7 + movdqa %xmm11,%xmm4 + pslld $26-21,%xmm2 + pandn %xmm13,%xmm0 + pand %xmm12,%xmm4 + pxor %xmm1,%xmm7 + + + movdqa %xmm15,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm15,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm4,%xmm0 + movdqa %xmm8,%xmm4 + movdqa %xmm15,%xmm7 + pslld $10,%xmm2 + pxor %xmm15,%xmm4 + + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm5 + pslld $19-10,%xmm2 + pand %xmm4,%xmm3 + pxor %xmm7,%xmm1 + + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm8,%xmm14 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm3,%xmm14 + paddd %xmm5,%xmm10 + pxor %xmm2,%xmm7 + + paddd %xmm5,%xmm14 + paddd %xmm7,%xmm14 + movd 8(%r8),%xmm5 + movd 8(%r9),%xmm0 + movd 8(%r10),%xmm1 + movd 8(%r11),%xmm2 + punpckldq %xmm1,%xmm5 + punpckldq %xmm2,%xmm0 + punpckldq %xmm0,%xmm5 + movdqa %xmm10,%xmm7 +.byte 102,15,56,0,238 + movdqa %xmm10,%xmm2 + + psrld $6,%xmm7 + movdqa %xmm10,%xmm1 + pslld $7,%xmm2 + movdqa %xmm5,32-128(%rax) + paddd %xmm13,%xmm5 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd -64(%rbp),%xmm5 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm10,%xmm0 + + pxor %xmm2,%xmm7 + movdqa %xmm10,%xmm3 + pslld $26-21,%xmm2 + pandn %xmm12,%xmm0 + pand %xmm11,%xmm3 + pxor %xmm1,%xmm7 + + + movdqa %xmm14,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm14,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm3,%xmm0 + movdqa %xmm15,%xmm3 + movdqa %xmm14,%xmm7 + pslld $10,%xmm2 + pxor %xmm14,%xmm3 + + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm5 + pslld $19-10,%xmm2 + pand %xmm3,%xmm4 + pxor %xmm7,%xmm1 + + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm15,%xmm13 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm4,%xmm13 + paddd %xmm5,%xmm9 + pxor %xmm2,%xmm7 + + paddd %xmm5,%xmm13 + paddd %xmm7,%xmm13 + movd 12(%r8),%xmm5 + movd 12(%r9),%xmm0 + movd 12(%r10),%xmm1 + movd 12(%r11),%xmm2 + punpckldq %xmm1,%xmm5 + punpckldq %xmm2,%xmm0 + punpckldq %xmm0,%xmm5 + movdqa %xmm9,%xmm7 + + movdqa %xmm9,%xmm2 +.byte 102,15,56,0,238 + psrld $6,%xmm7 + movdqa %xmm9,%xmm1 + pslld $7,%xmm2 + movdqa %xmm5,48-128(%rax) + paddd %xmm12,%xmm5 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd -32(%rbp),%xmm5 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm9,%xmm0 + + pxor %xmm2,%xmm7 + movdqa %xmm9,%xmm4 + pslld $26-21,%xmm2 + pandn %xmm11,%xmm0 + pand %xmm10,%xmm4 + pxor %xmm1,%xmm7 + + + movdqa %xmm13,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm13,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm4,%xmm0 + movdqa %xmm14,%xmm4 + movdqa %xmm13,%xmm7 + pslld $10,%xmm2 + pxor %xmm13,%xmm4 + + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm5 + pslld $19-10,%xmm2 + pand %xmm4,%xmm3 + pxor %xmm7,%xmm1 + + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm14,%xmm12 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm3,%xmm12 + paddd %xmm5,%xmm8 + pxor %xmm2,%xmm7 + + paddd %xmm5,%xmm12 + paddd %xmm7,%xmm12 + movd 16(%r8),%xmm5 + movd 16(%r9),%xmm0 + movd 16(%r10),%xmm1 + movd 16(%r11),%xmm2 + punpckldq %xmm1,%xmm5 + punpckldq %xmm2,%xmm0 + punpckldq %xmm0,%xmm5 + movdqa %xmm8,%xmm7 +.byte 102,15,56,0,238 + movdqa %xmm8,%xmm2 + + psrld $6,%xmm7 + movdqa %xmm8,%xmm1 + pslld $7,%xmm2 + movdqa %xmm5,64-128(%rax) + paddd %xmm11,%xmm5 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd 0(%rbp),%xmm5 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm8,%xmm0 + + pxor %xmm2,%xmm7 + movdqa %xmm8,%xmm3 + pslld $26-21,%xmm2 + pandn %xmm10,%xmm0 + pand %xmm9,%xmm3 + pxor %xmm1,%xmm7 + + + movdqa %xmm12,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm12,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm3,%xmm0 + movdqa %xmm13,%xmm3 + movdqa %xmm12,%xmm7 + pslld $10,%xmm2 + pxor %xmm12,%xmm3 + + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm5 + pslld $19-10,%xmm2 + pand %xmm3,%xmm4 + pxor %xmm7,%xmm1 + + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm13,%xmm11 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm4,%xmm11 + paddd %xmm5,%xmm15 + pxor %xmm2,%xmm7 + + paddd %xmm5,%xmm11 + paddd %xmm7,%xmm11 + movd 20(%r8),%xmm5 + movd 20(%r9),%xmm0 + movd 20(%r10),%xmm1 + movd 20(%r11),%xmm2 + punpckldq %xmm1,%xmm5 + punpckldq %xmm2,%xmm0 + punpckldq %xmm0,%xmm5 + movdqa %xmm15,%xmm7 + + movdqa %xmm15,%xmm2 +.byte 102,15,56,0,238 + psrld $6,%xmm7 + movdqa %xmm15,%xmm1 + pslld $7,%xmm2 + movdqa %xmm5,80-128(%rax) + paddd %xmm10,%xmm5 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd 32(%rbp),%xmm5 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm15,%xmm0 + + pxor %xmm2,%xmm7 + movdqa %xmm15,%xmm4 + pslld $26-21,%xmm2 + pandn %xmm9,%xmm0 + pand %xmm8,%xmm4 + pxor %xmm1,%xmm7 + + + movdqa %xmm11,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm11,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm4,%xmm0 + movdqa %xmm12,%xmm4 + movdqa %xmm11,%xmm7 + pslld $10,%xmm2 + pxor %xmm11,%xmm4 + + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm5 + pslld $19-10,%xmm2 + pand %xmm4,%xmm3 + pxor %xmm7,%xmm1 + + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm12,%xmm10 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm3,%xmm10 + paddd %xmm5,%xmm14 + pxor %xmm2,%xmm7 + + paddd %xmm5,%xmm10 + paddd %xmm7,%xmm10 + movd 24(%r8),%xmm5 + movd 24(%r9),%xmm0 + movd 24(%r10),%xmm1 + movd 24(%r11),%xmm2 + punpckldq %xmm1,%xmm5 + punpckldq %xmm2,%xmm0 + punpckldq %xmm0,%xmm5 + movdqa %xmm14,%xmm7 +.byte 102,15,56,0,238 + movdqa %xmm14,%xmm2 + + psrld $6,%xmm7 + movdqa %xmm14,%xmm1 + pslld $7,%xmm2 + movdqa %xmm5,96-128(%rax) + paddd %xmm9,%xmm5 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd 64(%rbp),%xmm5 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm14,%xmm0 + + pxor %xmm2,%xmm7 + movdqa %xmm14,%xmm3 + pslld $26-21,%xmm2 + pandn %xmm8,%xmm0 + pand %xmm15,%xmm3 + pxor %xmm1,%xmm7 + + + movdqa %xmm10,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm10,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm3,%xmm0 + movdqa %xmm11,%xmm3 + movdqa %xmm10,%xmm7 + pslld $10,%xmm2 + pxor %xmm10,%xmm3 + + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm5 + pslld $19-10,%xmm2 + pand %xmm3,%xmm4 + pxor %xmm7,%xmm1 + + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm11,%xmm9 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm4,%xmm9 + paddd %xmm5,%xmm13 + pxor %xmm2,%xmm7 + + paddd %xmm5,%xmm9 + paddd %xmm7,%xmm9 + movd 28(%r8),%xmm5 + movd 28(%r9),%xmm0 + movd 28(%r10),%xmm1 + movd 28(%r11),%xmm2 + punpckldq %xmm1,%xmm5 + punpckldq %xmm2,%xmm0 + punpckldq %xmm0,%xmm5 + movdqa %xmm13,%xmm7 + + movdqa %xmm13,%xmm2 +.byte 102,15,56,0,238 + psrld $6,%xmm7 + movdqa %xmm13,%xmm1 + pslld $7,%xmm2 + movdqa %xmm5,112-128(%rax) + paddd %xmm8,%xmm5 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd 96(%rbp),%xmm5 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm13,%xmm0 + + pxor %xmm2,%xmm7 + movdqa %xmm13,%xmm4 + pslld $26-21,%xmm2 + pandn %xmm15,%xmm0 + pand %xmm14,%xmm4 + pxor %xmm1,%xmm7 + + + movdqa %xmm9,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm9,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm4,%xmm0 + movdqa %xmm10,%xmm4 + movdqa %xmm9,%xmm7 + pslld $10,%xmm2 + pxor %xmm9,%xmm4 + + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm5 + pslld $19-10,%xmm2 + pand %xmm4,%xmm3 + pxor %xmm7,%xmm1 + + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm10,%xmm8 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm3,%xmm8 + paddd %xmm5,%xmm12 + pxor %xmm2,%xmm7 + + paddd %xmm5,%xmm8 + paddd %xmm7,%xmm8 + leaq 256(%rbp),%rbp + movd 32(%r8),%xmm5 + movd 32(%r9),%xmm0 + movd 32(%r10),%xmm1 + movd 32(%r11),%xmm2 + punpckldq %xmm1,%xmm5 + punpckldq %xmm2,%xmm0 + punpckldq %xmm0,%xmm5 + movdqa %xmm12,%xmm7 +.byte 102,15,56,0,238 + movdqa %xmm12,%xmm2 + + psrld $6,%xmm7 + movdqa %xmm12,%xmm1 + pslld $7,%xmm2 + movdqa %xmm5,128-128(%rax) + paddd %xmm15,%xmm5 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd -128(%rbp),%xmm5 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm12,%xmm0 + + pxor %xmm2,%xmm7 + movdqa %xmm12,%xmm3 + pslld $26-21,%xmm2 + pandn %xmm14,%xmm0 + pand %xmm13,%xmm3 + pxor %xmm1,%xmm7 + + + movdqa %xmm8,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm8,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm3,%xmm0 + movdqa %xmm9,%xmm3 + movdqa %xmm8,%xmm7 + pslld $10,%xmm2 + pxor %xmm8,%xmm3 + + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm5 + pslld $19-10,%xmm2 + pand %xmm3,%xmm4 + pxor %xmm7,%xmm1 + + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm9,%xmm15 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm4,%xmm15 + paddd %xmm5,%xmm11 + pxor %xmm2,%xmm7 + + paddd %xmm5,%xmm15 + paddd %xmm7,%xmm15 + movd 36(%r8),%xmm5 + movd 36(%r9),%xmm0 + movd 36(%r10),%xmm1 + movd 36(%r11),%xmm2 + punpckldq %xmm1,%xmm5 + punpckldq %xmm2,%xmm0 + punpckldq %xmm0,%xmm5 + movdqa %xmm11,%xmm7 + + movdqa %xmm11,%xmm2 +.byte 102,15,56,0,238 + psrld $6,%xmm7 + movdqa %xmm11,%xmm1 + pslld $7,%xmm2 + movdqa %xmm5,144-128(%rax) + paddd %xmm14,%xmm5 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd -96(%rbp),%xmm5 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm11,%xmm0 + + pxor %xmm2,%xmm7 + movdqa %xmm11,%xmm4 + pslld $26-21,%xmm2 + pandn %xmm13,%xmm0 + pand %xmm12,%xmm4 + pxor %xmm1,%xmm7 + + + movdqa %xmm15,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm15,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm4,%xmm0 + movdqa %xmm8,%xmm4 + movdqa %xmm15,%xmm7 + pslld $10,%xmm2 + pxor %xmm15,%xmm4 + + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm5 + pslld $19-10,%xmm2 + pand %xmm4,%xmm3 + pxor %xmm7,%xmm1 + + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm8,%xmm14 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm3,%xmm14 + paddd %xmm5,%xmm10 + pxor %xmm2,%xmm7 + + paddd %xmm5,%xmm14 + paddd %xmm7,%xmm14 + movd 40(%r8),%xmm5 + movd 40(%r9),%xmm0 + movd 40(%r10),%xmm1 + movd 40(%r11),%xmm2 + punpckldq %xmm1,%xmm5 + punpckldq %xmm2,%xmm0 + punpckldq %xmm0,%xmm5 + movdqa %xmm10,%xmm7 +.byte 102,15,56,0,238 + movdqa %xmm10,%xmm2 + + psrld $6,%xmm7 + movdqa %xmm10,%xmm1 + pslld $7,%xmm2 + movdqa %xmm5,160-128(%rax) + paddd %xmm13,%xmm5 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd -64(%rbp),%xmm5 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm10,%xmm0 + + pxor %xmm2,%xmm7 + movdqa %xmm10,%xmm3 + pslld $26-21,%xmm2 + pandn %xmm12,%xmm0 + pand %xmm11,%xmm3 + pxor %xmm1,%xmm7 + + + movdqa %xmm14,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm14,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm3,%xmm0 + movdqa %xmm15,%xmm3 + movdqa %xmm14,%xmm7 + pslld $10,%xmm2 + pxor %xmm14,%xmm3 + + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm5 + pslld $19-10,%xmm2 + pand %xmm3,%xmm4 + pxor %xmm7,%xmm1 + + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm15,%xmm13 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm4,%xmm13 + paddd %xmm5,%xmm9 + pxor %xmm2,%xmm7 + + paddd %xmm5,%xmm13 + paddd %xmm7,%xmm13 + movd 44(%r8),%xmm5 + movd 44(%r9),%xmm0 + movd 44(%r10),%xmm1 + movd 44(%r11),%xmm2 + punpckldq %xmm1,%xmm5 + punpckldq %xmm2,%xmm0 + punpckldq %xmm0,%xmm5 + movdqa %xmm9,%xmm7 + + movdqa %xmm9,%xmm2 +.byte 102,15,56,0,238 + psrld $6,%xmm7 + movdqa %xmm9,%xmm1 + pslld $7,%xmm2 + movdqa %xmm5,176-128(%rax) + paddd %xmm12,%xmm5 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd -32(%rbp),%xmm5 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm9,%xmm0 + + pxor %xmm2,%xmm7 + movdqa %xmm9,%xmm4 + pslld $26-21,%xmm2 + pandn %xmm11,%xmm0 + pand %xmm10,%xmm4 + pxor %xmm1,%xmm7 + + + movdqa %xmm13,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm13,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm4,%xmm0 + movdqa %xmm14,%xmm4 + movdqa %xmm13,%xmm7 + pslld $10,%xmm2 + pxor %xmm13,%xmm4 + + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm5 + pslld $19-10,%xmm2 + pand %xmm4,%xmm3 + pxor %xmm7,%xmm1 + + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm14,%xmm12 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm3,%xmm12 + paddd %xmm5,%xmm8 + pxor %xmm2,%xmm7 + + paddd %xmm5,%xmm12 + paddd %xmm7,%xmm12 + movd 48(%r8),%xmm5 + movd 48(%r9),%xmm0 + movd 48(%r10),%xmm1 + movd 48(%r11),%xmm2 + punpckldq %xmm1,%xmm5 + punpckldq %xmm2,%xmm0 + punpckldq %xmm0,%xmm5 + movdqa %xmm8,%xmm7 +.byte 102,15,56,0,238 + movdqa %xmm8,%xmm2 + + psrld $6,%xmm7 + movdqa %xmm8,%xmm1 + pslld $7,%xmm2 + movdqa %xmm5,192-128(%rax) + paddd %xmm11,%xmm5 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd 0(%rbp),%xmm5 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm8,%xmm0 + + pxor %xmm2,%xmm7 + movdqa %xmm8,%xmm3 + pslld $26-21,%xmm2 + pandn %xmm10,%xmm0 + pand %xmm9,%xmm3 + pxor %xmm1,%xmm7 + + + movdqa %xmm12,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm12,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm3,%xmm0 + movdqa %xmm13,%xmm3 + movdqa %xmm12,%xmm7 + pslld $10,%xmm2 + pxor %xmm12,%xmm3 + + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm5 + pslld $19-10,%xmm2 + pand %xmm3,%xmm4 + pxor %xmm7,%xmm1 + + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm13,%xmm11 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm4,%xmm11 + paddd %xmm5,%xmm15 + pxor %xmm2,%xmm7 + + paddd %xmm5,%xmm11 + paddd %xmm7,%xmm11 + movd 52(%r8),%xmm5 + movd 52(%r9),%xmm0 + movd 52(%r10),%xmm1 + movd 52(%r11),%xmm2 + punpckldq %xmm1,%xmm5 + punpckldq %xmm2,%xmm0 + punpckldq %xmm0,%xmm5 + movdqa %xmm15,%xmm7 + + movdqa %xmm15,%xmm2 +.byte 102,15,56,0,238 + psrld $6,%xmm7 + movdqa %xmm15,%xmm1 + pslld $7,%xmm2 + movdqa %xmm5,208-128(%rax) + paddd %xmm10,%xmm5 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd 32(%rbp),%xmm5 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm15,%xmm0 + + pxor %xmm2,%xmm7 + movdqa %xmm15,%xmm4 + pslld $26-21,%xmm2 + pandn %xmm9,%xmm0 + pand %xmm8,%xmm4 + pxor %xmm1,%xmm7 + + + movdqa %xmm11,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm11,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm4,%xmm0 + movdqa %xmm12,%xmm4 + movdqa %xmm11,%xmm7 + pslld $10,%xmm2 + pxor %xmm11,%xmm4 + + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm5 + pslld $19-10,%xmm2 + pand %xmm4,%xmm3 + pxor %xmm7,%xmm1 + + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm12,%xmm10 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm3,%xmm10 + paddd %xmm5,%xmm14 + pxor %xmm2,%xmm7 + + paddd %xmm5,%xmm10 + paddd %xmm7,%xmm10 + movd 56(%r8),%xmm5 + movd 56(%r9),%xmm0 + movd 56(%r10),%xmm1 + movd 56(%r11),%xmm2 + punpckldq %xmm1,%xmm5 + punpckldq %xmm2,%xmm0 + punpckldq %xmm0,%xmm5 + movdqa %xmm14,%xmm7 +.byte 102,15,56,0,238 + movdqa %xmm14,%xmm2 + + psrld $6,%xmm7 + movdqa %xmm14,%xmm1 + pslld $7,%xmm2 + movdqa %xmm5,224-128(%rax) + paddd %xmm9,%xmm5 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd 64(%rbp),%xmm5 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm14,%xmm0 + + pxor %xmm2,%xmm7 + movdqa %xmm14,%xmm3 + pslld $26-21,%xmm2 + pandn %xmm8,%xmm0 + pand %xmm15,%xmm3 + pxor %xmm1,%xmm7 + + + movdqa %xmm10,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm10,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm3,%xmm0 + movdqa %xmm11,%xmm3 + movdqa %xmm10,%xmm7 + pslld $10,%xmm2 + pxor %xmm10,%xmm3 + + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm5 + pslld $19-10,%xmm2 + pand %xmm3,%xmm4 + pxor %xmm7,%xmm1 + + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm11,%xmm9 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm4,%xmm9 + paddd %xmm5,%xmm13 + pxor %xmm2,%xmm7 + + paddd %xmm5,%xmm9 + paddd %xmm7,%xmm9 + movd 60(%r8),%xmm5 + leaq 64(%r8),%r8 + movd 60(%r9),%xmm0 + leaq 64(%r9),%r9 + movd 60(%r10),%xmm1 + leaq 64(%r10),%r10 + movd 60(%r11),%xmm2 + leaq 64(%r11),%r11 + punpckldq %xmm1,%xmm5 + punpckldq %xmm2,%xmm0 + punpckldq %xmm0,%xmm5 + movdqa %xmm13,%xmm7 + + movdqa %xmm13,%xmm2 +.byte 102,15,56,0,238 + psrld $6,%xmm7 + movdqa %xmm13,%xmm1 + pslld $7,%xmm2 + movdqa %xmm5,240-128(%rax) + paddd %xmm8,%xmm5 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd 96(%rbp),%xmm5 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm13,%xmm0 + prefetcht0 63(%r8) + pxor %xmm2,%xmm7 + movdqa %xmm13,%xmm4 + pslld $26-21,%xmm2 + pandn %xmm15,%xmm0 + pand %xmm14,%xmm4 + pxor %xmm1,%xmm7 + + prefetcht0 63(%r9) + movdqa %xmm9,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm9,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm4,%xmm0 + movdqa %xmm10,%xmm4 + movdqa %xmm9,%xmm7 + pslld $10,%xmm2 + pxor %xmm9,%xmm4 + + prefetcht0 63(%r10) + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm5 + pslld $19-10,%xmm2 + pand %xmm4,%xmm3 + pxor %xmm7,%xmm1 + + prefetcht0 63(%r11) + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm10,%xmm8 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm3,%xmm8 + paddd %xmm5,%xmm12 + pxor %xmm2,%xmm7 + + paddd %xmm5,%xmm8 + paddd %xmm7,%xmm8 + leaq 256(%rbp),%rbp + movdqu 0-128(%rax),%xmm5 + movl $3,%ecx + jmp .Loop_16_xx +.align 32 +.Loop_16_xx: + movdqa 16-128(%rax),%xmm6 + paddd 144-128(%rax),%xmm5 + + movdqa %xmm6,%xmm7 + movdqa %xmm6,%xmm1 + psrld $3,%xmm7 + movdqa %xmm6,%xmm2 + + psrld $7,%xmm1 + movdqa 224-128(%rax),%xmm0 + pslld $14,%xmm2 + pxor %xmm1,%xmm7 + psrld $18-7,%xmm1 + movdqa %xmm0,%xmm3 + pxor %xmm2,%xmm7 + pslld $25-14,%xmm2 + pxor %xmm1,%xmm7 + psrld $10,%xmm0 + movdqa %xmm3,%xmm1 + + psrld $17,%xmm3 + pxor %xmm2,%xmm7 + pslld $13,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm3,%xmm0 + psrld $19-17,%xmm3 + pxor %xmm1,%xmm0 + pslld $15-13,%xmm1 + pxor %xmm3,%xmm0 + pxor %xmm1,%xmm0 + paddd %xmm0,%xmm5 + movdqa %xmm12,%xmm7 + + movdqa %xmm12,%xmm2 + + psrld $6,%xmm7 + movdqa %xmm12,%xmm1 + pslld $7,%xmm2 + movdqa %xmm5,0-128(%rax) + paddd %xmm15,%xmm5 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd -128(%rbp),%xmm5 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm12,%xmm0 + + pxor %xmm2,%xmm7 + movdqa %xmm12,%xmm3 + pslld $26-21,%xmm2 + pandn %xmm14,%xmm0 + pand %xmm13,%xmm3 + pxor %xmm1,%xmm7 + + + movdqa %xmm8,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm8,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm3,%xmm0 + movdqa %xmm9,%xmm3 + movdqa %xmm8,%xmm7 + pslld $10,%xmm2 + pxor %xmm8,%xmm3 + + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm5 + pslld $19-10,%xmm2 + pand %xmm3,%xmm4 + pxor %xmm7,%xmm1 + + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm9,%xmm15 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm4,%xmm15 + paddd %xmm5,%xmm11 + pxor %xmm2,%xmm7 + + paddd %xmm5,%xmm15 + paddd %xmm7,%xmm15 + movdqa 32-128(%rax),%xmm5 + paddd 160-128(%rax),%xmm6 + + movdqa %xmm5,%xmm7 + movdqa %xmm5,%xmm1 + psrld $3,%xmm7 + movdqa %xmm5,%xmm2 + + psrld $7,%xmm1 + movdqa 240-128(%rax),%xmm0 + pslld $14,%xmm2 + pxor %xmm1,%xmm7 + psrld $18-7,%xmm1 + movdqa %xmm0,%xmm4 + pxor %xmm2,%xmm7 + pslld $25-14,%xmm2 + pxor %xmm1,%xmm7 + psrld $10,%xmm0 + movdqa %xmm4,%xmm1 + + psrld $17,%xmm4 + pxor %xmm2,%xmm7 + pslld $13,%xmm1 + paddd %xmm7,%xmm6 + pxor %xmm4,%xmm0 + psrld $19-17,%xmm4 + pxor %xmm1,%xmm0 + pslld $15-13,%xmm1 + pxor %xmm4,%xmm0 + pxor %xmm1,%xmm0 + paddd %xmm0,%xmm6 + movdqa %xmm11,%xmm7 + + movdqa %xmm11,%xmm2 + + psrld $6,%xmm7 + movdqa %xmm11,%xmm1 + pslld $7,%xmm2 + movdqa %xmm6,16-128(%rax) + paddd %xmm14,%xmm6 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd -96(%rbp),%xmm6 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm11,%xmm0 + + pxor %xmm2,%xmm7 + movdqa %xmm11,%xmm4 + pslld $26-21,%xmm2 + pandn %xmm13,%xmm0 + pand %xmm12,%xmm4 + pxor %xmm1,%xmm7 + + + movdqa %xmm15,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm15,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm6 + pxor %xmm4,%xmm0 + movdqa %xmm8,%xmm4 + movdqa %xmm15,%xmm7 + pslld $10,%xmm2 + pxor %xmm15,%xmm4 + + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm6 + pslld $19-10,%xmm2 + pand %xmm4,%xmm3 + pxor %xmm7,%xmm1 + + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm8,%xmm14 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm3,%xmm14 + paddd %xmm6,%xmm10 + pxor %xmm2,%xmm7 + + paddd %xmm6,%xmm14 + paddd %xmm7,%xmm14 + movdqa 48-128(%rax),%xmm6 + paddd 176-128(%rax),%xmm5 + + movdqa %xmm6,%xmm7 + movdqa %xmm6,%xmm1 + psrld $3,%xmm7 + movdqa %xmm6,%xmm2 + + psrld $7,%xmm1 + movdqa 0-128(%rax),%xmm0 + pslld $14,%xmm2 + pxor %xmm1,%xmm7 + psrld $18-7,%xmm1 + movdqa %xmm0,%xmm3 + pxor %xmm2,%xmm7 + pslld $25-14,%xmm2 + pxor %xmm1,%xmm7 + psrld $10,%xmm0 + movdqa %xmm3,%xmm1 + + psrld $17,%xmm3 + pxor %xmm2,%xmm7 + pslld $13,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm3,%xmm0 + psrld $19-17,%xmm3 + pxor %xmm1,%xmm0 + pslld $15-13,%xmm1 + pxor %xmm3,%xmm0 + pxor %xmm1,%xmm0 + paddd %xmm0,%xmm5 + movdqa %xmm10,%xmm7 + + movdqa %xmm10,%xmm2 + + psrld $6,%xmm7 + movdqa %xmm10,%xmm1 + pslld $7,%xmm2 + movdqa %xmm5,32-128(%rax) + paddd %xmm13,%xmm5 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd -64(%rbp),%xmm5 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm10,%xmm0 + + pxor %xmm2,%xmm7 + movdqa %xmm10,%xmm3 + pslld $26-21,%xmm2 + pandn %xmm12,%xmm0 + pand %xmm11,%xmm3 + pxor %xmm1,%xmm7 + + + movdqa %xmm14,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm14,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm3,%xmm0 + movdqa %xmm15,%xmm3 + movdqa %xmm14,%xmm7 + pslld $10,%xmm2 + pxor %xmm14,%xmm3 + + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm5 + pslld $19-10,%xmm2 + pand %xmm3,%xmm4 + pxor %xmm7,%xmm1 + + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm15,%xmm13 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm4,%xmm13 + paddd %xmm5,%xmm9 + pxor %xmm2,%xmm7 + + paddd %xmm5,%xmm13 + paddd %xmm7,%xmm13 + movdqa 64-128(%rax),%xmm5 + paddd 192-128(%rax),%xmm6 + + movdqa %xmm5,%xmm7 + movdqa %xmm5,%xmm1 + psrld $3,%xmm7 + movdqa %xmm5,%xmm2 + + psrld $7,%xmm1 + movdqa 16-128(%rax),%xmm0 + pslld $14,%xmm2 + pxor %xmm1,%xmm7 + psrld $18-7,%xmm1 + movdqa %xmm0,%xmm4 + pxor %xmm2,%xmm7 + pslld $25-14,%xmm2 + pxor %xmm1,%xmm7 + psrld $10,%xmm0 + movdqa %xmm4,%xmm1 + + psrld $17,%xmm4 + pxor %xmm2,%xmm7 + pslld $13,%xmm1 + paddd %xmm7,%xmm6 + pxor %xmm4,%xmm0 + psrld $19-17,%xmm4 + pxor %xmm1,%xmm0 + pslld $15-13,%xmm1 + pxor %xmm4,%xmm0 + pxor %xmm1,%xmm0 + paddd %xmm0,%xmm6 + movdqa %xmm9,%xmm7 + + movdqa %xmm9,%xmm2 + + psrld $6,%xmm7 + movdqa %xmm9,%xmm1 + pslld $7,%xmm2 + movdqa %xmm6,48-128(%rax) + paddd %xmm12,%xmm6 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd -32(%rbp),%xmm6 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm9,%xmm0 + + pxor %xmm2,%xmm7 + movdqa %xmm9,%xmm4 + pslld $26-21,%xmm2 + pandn %xmm11,%xmm0 + pand %xmm10,%xmm4 + pxor %xmm1,%xmm7 + + + movdqa %xmm13,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm13,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm6 + pxor %xmm4,%xmm0 + movdqa %xmm14,%xmm4 + movdqa %xmm13,%xmm7 + pslld $10,%xmm2 + pxor %xmm13,%xmm4 + + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm6 + pslld $19-10,%xmm2 + pand %xmm4,%xmm3 + pxor %xmm7,%xmm1 + + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm14,%xmm12 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm3,%xmm12 + paddd %xmm6,%xmm8 + pxor %xmm2,%xmm7 + + paddd %xmm6,%xmm12 + paddd %xmm7,%xmm12 + movdqa 80-128(%rax),%xmm6 + paddd 208-128(%rax),%xmm5 + + movdqa %xmm6,%xmm7 + movdqa %xmm6,%xmm1 + psrld $3,%xmm7 + movdqa %xmm6,%xmm2 + + psrld $7,%xmm1 + movdqa 32-128(%rax),%xmm0 + pslld $14,%xmm2 + pxor %xmm1,%xmm7 + psrld $18-7,%xmm1 + movdqa %xmm0,%xmm3 + pxor %xmm2,%xmm7 + pslld $25-14,%xmm2 + pxor %xmm1,%xmm7 + psrld $10,%xmm0 + movdqa %xmm3,%xmm1 + + psrld $17,%xmm3 + pxor %xmm2,%xmm7 + pslld $13,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm3,%xmm0 + psrld $19-17,%xmm3 + pxor %xmm1,%xmm0 + pslld $15-13,%xmm1 + pxor %xmm3,%xmm0 + pxor %xmm1,%xmm0 + paddd %xmm0,%xmm5 + movdqa %xmm8,%xmm7 + + movdqa %xmm8,%xmm2 + + psrld $6,%xmm7 + movdqa %xmm8,%xmm1 + pslld $7,%xmm2 + movdqa %xmm5,64-128(%rax) + paddd %xmm11,%xmm5 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd 0(%rbp),%xmm5 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm8,%xmm0 + + pxor %xmm2,%xmm7 + movdqa %xmm8,%xmm3 + pslld $26-21,%xmm2 + pandn %xmm10,%xmm0 + pand %xmm9,%xmm3 + pxor %xmm1,%xmm7 + + + movdqa %xmm12,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm12,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm3,%xmm0 + movdqa %xmm13,%xmm3 + movdqa %xmm12,%xmm7 + pslld $10,%xmm2 + pxor %xmm12,%xmm3 + + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm5 + pslld $19-10,%xmm2 + pand %xmm3,%xmm4 + pxor %xmm7,%xmm1 + + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm13,%xmm11 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm4,%xmm11 + paddd %xmm5,%xmm15 + pxor %xmm2,%xmm7 + + paddd %xmm5,%xmm11 + paddd %xmm7,%xmm11 + movdqa 96-128(%rax),%xmm5 + paddd 224-128(%rax),%xmm6 + + movdqa %xmm5,%xmm7 + movdqa %xmm5,%xmm1 + psrld $3,%xmm7 + movdqa %xmm5,%xmm2 + + psrld $7,%xmm1 + movdqa 48-128(%rax),%xmm0 + pslld $14,%xmm2 + pxor %xmm1,%xmm7 + psrld $18-7,%xmm1 + movdqa %xmm0,%xmm4 + pxor %xmm2,%xmm7 + pslld $25-14,%xmm2 + pxor %xmm1,%xmm7 + psrld $10,%xmm0 + movdqa %xmm4,%xmm1 + + psrld $17,%xmm4 + pxor %xmm2,%xmm7 + pslld $13,%xmm1 + paddd %xmm7,%xmm6 + pxor %xmm4,%xmm0 + psrld $19-17,%xmm4 + pxor %xmm1,%xmm0 + pslld $15-13,%xmm1 + pxor %xmm4,%xmm0 + pxor %xmm1,%xmm0 + paddd %xmm0,%xmm6 + movdqa %xmm15,%xmm7 + + movdqa %xmm15,%xmm2 + + psrld $6,%xmm7 + movdqa %xmm15,%xmm1 + pslld $7,%xmm2 + movdqa %xmm6,80-128(%rax) + paddd %xmm10,%xmm6 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd 32(%rbp),%xmm6 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm15,%xmm0 + + pxor %xmm2,%xmm7 + movdqa %xmm15,%xmm4 + pslld $26-21,%xmm2 + pandn %xmm9,%xmm0 + pand %xmm8,%xmm4 + pxor %xmm1,%xmm7 + + + movdqa %xmm11,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm11,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm6 + pxor %xmm4,%xmm0 + movdqa %xmm12,%xmm4 + movdqa %xmm11,%xmm7 + pslld $10,%xmm2 + pxor %xmm11,%xmm4 + + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm6 + pslld $19-10,%xmm2 + pand %xmm4,%xmm3 + pxor %xmm7,%xmm1 + + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm12,%xmm10 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm3,%xmm10 + paddd %xmm6,%xmm14 + pxor %xmm2,%xmm7 + + paddd %xmm6,%xmm10 + paddd %xmm7,%xmm10 + movdqa 112-128(%rax),%xmm6 + paddd 240-128(%rax),%xmm5 + + movdqa %xmm6,%xmm7 + movdqa %xmm6,%xmm1 + psrld $3,%xmm7 + movdqa %xmm6,%xmm2 + + psrld $7,%xmm1 + movdqa 64-128(%rax),%xmm0 + pslld $14,%xmm2 + pxor %xmm1,%xmm7 + psrld $18-7,%xmm1 + movdqa %xmm0,%xmm3 + pxor %xmm2,%xmm7 + pslld $25-14,%xmm2 + pxor %xmm1,%xmm7 + psrld $10,%xmm0 + movdqa %xmm3,%xmm1 + + psrld $17,%xmm3 + pxor %xmm2,%xmm7 + pslld $13,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm3,%xmm0 + psrld $19-17,%xmm3 + pxor %xmm1,%xmm0 + pslld $15-13,%xmm1 + pxor %xmm3,%xmm0 + pxor %xmm1,%xmm0 + paddd %xmm0,%xmm5 + movdqa %xmm14,%xmm7 + + movdqa %xmm14,%xmm2 + + psrld $6,%xmm7 + movdqa %xmm14,%xmm1 + pslld $7,%xmm2 + movdqa %xmm5,96-128(%rax) + paddd %xmm9,%xmm5 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd 64(%rbp),%xmm5 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm14,%xmm0 + + pxor %xmm2,%xmm7 + movdqa %xmm14,%xmm3 + pslld $26-21,%xmm2 + pandn %xmm8,%xmm0 + pand %xmm15,%xmm3 + pxor %xmm1,%xmm7 + + + movdqa %xmm10,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm10,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm3,%xmm0 + movdqa %xmm11,%xmm3 + movdqa %xmm10,%xmm7 + pslld $10,%xmm2 + pxor %xmm10,%xmm3 + + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm5 + pslld $19-10,%xmm2 + pand %xmm3,%xmm4 + pxor %xmm7,%xmm1 + + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm11,%xmm9 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm4,%xmm9 + paddd %xmm5,%xmm13 + pxor %xmm2,%xmm7 + + paddd %xmm5,%xmm9 + paddd %xmm7,%xmm9 + movdqa 128-128(%rax),%xmm5 + paddd 0-128(%rax),%xmm6 + + movdqa %xmm5,%xmm7 + movdqa %xmm5,%xmm1 + psrld $3,%xmm7 + movdqa %xmm5,%xmm2 + + psrld $7,%xmm1 + movdqa 80-128(%rax),%xmm0 + pslld $14,%xmm2 + pxor %xmm1,%xmm7 + psrld $18-7,%xmm1 + movdqa %xmm0,%xmm4 + pxor %xmm2,%xmm7 + pslld $25-14,%xmm2 + pxor %xmm1,%xmm7 + psrld $10,%xmm0 + movdqa %xmm4,%xmm1 + + psrld $17,%xmm4 + pxor %xmm2,%xmm7 + pslld $13,%xmm1 + paddd %xmm7,%xmm6 + pxor %xmm4,%xmm0 + psrld $19-17,%xmm4 + pxor %xmm1,%xmm0 + pslld $15-13,%xmm1 + pxor %xmm4,%xmm0 + pxor %xmm1,%xmm0 + paddd %xmm0,%xmm6 + movdqa %xmm13,%xmm7 + + movdqa %xmm13,%xmm2 + + psrld $6,%xmm7 + movdqa %xmm13,%xmm1 + pslld $7,%xmm2 + movdqa %xmm6,112-128(%rax) + paddd %xmm8,%xmm6 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd 96(%rbp),%xmm6 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm13,%xmm0 + + pxor %xmm2,%xmm7 + movdqa %xmm13,%xmm4 + pslld $26-21,%xmm2 + pandn %xmm15,%xmm0 + pand %xmm14,%xmm4 + pxor %xmm1,%xmm7 + + + movdqa %xmm9,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm9,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm6 + pxor %xmm4,%xmm0 + movdqa %xmm10,%xmm4 + movdqa %xmm9,%xmm7 + pslld $10,%xmm2 + pxor %xmm9,%xmm4 + + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm6 + pslld $19-10,%xmm2 + pand %xmm4,%xmm3 + pxor %xmm7,%xmm1 + + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm10,%xmm8 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm3,%xmm8 + paddd %xmm6,%xmm12 + pxor %xmm2,%xmm7 + + paddd %xmm6,%xmm8 + paddd %xmm7,%xmm8 + leaq 256(%rbp),%rbp + movdqa 144-128(%rax),%xmm6 + paddd 16-128(%rax),%xmm5 + + movdqa %xmm6,%xmm7 + movdqa %xmm6,%xmm1 + psrld $3,%xmm7 + movdqa %xmm6,%xmm2 + + psrld $7,%xmm1 + movdqa 96-128(%rax),%xmm0 + pslld $14,%xmm2 + pxor %xmm1,%xmm7 + psrld $18-7,%xmm1 + movdqa %xmm0,%xmm3 + pxor %xmm2,%xmm7 + pslld $25-14,%xmm2 + pxor %xmm1,%xmm7 + psrld $10,%xmm0 + movdqa %xmm3,%xmm1 + + psrld $17,%xmm3 + pxor %xmm2,%xmm7 + pslld $13,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm3,%xmm0 + psrld $19-17,%xmm3 + pxor %xmm1,%xmm0 + pslld $15-13,%xmm1 + pxor %xmm3,%xmm0 + pxor %xmm1,%xmm0 + paddd %xmm0,%xmm5 + movdqa %xmm12,%xmm7 + + movdqa %xmm12,%xmm2 + + psrld $6,%xmm7 + movdqa %xmm12,%xmm1 + pslld $7,%xmm2 + movdqa %xmm5,128-128(%rax) + paddd %xmm15,%xmm5 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd -128(%rbp),%xmm5 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm12,%xmm0 + + pxor %xmm2,%xmm7 + movdqa %xmm12,%xmm3 + pslld $26-21,%xmm2 + pandn %xmm14,%xmm0 + pand %xmm13,%xmm3 + pxor %xmm1,%xmm7 + + + movdqa %xmm8,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm8,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm3,%xmm0 + movdqa %xmm9,%xmm3 + movdqa %xmm8,%xmm7 + pslld $10,%xmm2 + pxor %xmm8,%xmm3 + + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm5 + pslld $19-10,%xmm2 + pand %xmm3,%xmm4 + pxor %xmm7,%xmm1 + + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm9,%xmm15 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm4,%xmm15 + paddd %xmm5,%xmm11 + pxor %xmm2,%xmm7 + + paddd %xmm5,%xmm15 + paddd %xmm7,%xmm15 + movdqa 160-128(%rax),%xmm5 + paddd 32-128(%rax),%xmm6 + + movdqa %xmm5,%xmm7 + movdqa %xmm5,%xmm1 + psrld $3,%xmm7 + movdqa %xmm5,%xmm2 + + psrld $7,%xmm1 + movdqa 112-128(%rax),%xmm0 + pslld $14,%xmm2 + pxor %xmm1,%xmm7 + psrld $18-7,%xmm1 + movdqa %xmm0,%xmm4 + pxor %xmm2,%xmm7 + pslld $25-14,%xmm2 + pxor %xmm1,%xmm7 + psrld $10,%xmm0 + movdqa %xmm4,%xmm1 + + psrld $17,%xmm4 + pxor %xmm2,%xmm7 + pslld $13,%xmm1 + paddd %xmm7,%xmm6 + pxor %xmm4,%xmm0 + psrld $19-17,%xmm4 + pxor %xmm1,%xmm0 + pslld $15-13,%xmm1 + pxor %xmm4,%xmm0 + pxor %xmm1,%xmm0 + paddd %xmm0,%xmm6 + movdqa %xmm11,%xmm7 + + movdqa %xmm11,%xmm2 + + psrld $6,%xmm7 + movdqa %xmm11,%xmm1 + pslld $7,%xmm2 + movdqa %xmm6,144-128(%rax) + paddd %xmm14,%xmm6 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd -96(%rbp),%xmm6 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm11,%xmm0 + + pxor %xmm2,%xmm7 + movdqa %xmm11,%xmm4 + pslld $26-21,%xmm2 + pandn %xmm13,%xmm0 + pand %xmm12,%xmm4 + pxor %xmm1,%xmm7 + + + movdqa %xmm15,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm15,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm6 + pxor %xmm4,%xmm0 + movdqa %xmm8,%xmm4 + movdqa %xmm15,%xmm7 + pslld $10,%xmm2 + pxor %xmm15,%xmm4 + + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm6 + pslld $19-10,%xmm2 + pand %xmm4,%xmm3 + pxor %xmm7,%xmm1 + + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm8,%xmm14 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm3,%xmm14 + paddd %xmm6,%xmm10 + pxor %xmm2,%xmm7 + + paddd %xmm6,%xmm14 + paddd %xmm7,%xmm14 + movdqa 176-128(%rax),%xmm6 + paddd 48-128(%rax),%xmm5 + + movdqa %xmm6,%xmm7 + movdqa %xmm6,%xmm1 + psrld $3,%xmm7 + movdqa %xmm6,%xmm2 + + psrld $7,%xmm1 + movdqa 128-128(%rax),%xmm0 + pslld $14,%xmm2 + pxor %xmm1,%xmm7 + psrld $18-7,%xmm1 + movdqa %xmm0,%xmm3 + pxor %xmm2,%xmm7 + pslld $25-14,%xmm2 + pxor %xmm1,%xmm7 + psrld $10,%xmm0 + movdqa %xmm3,%xmm1 + + psrld $17,%xmm3 + pxor %xmm2,%xmm7 + pslld $13,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm3,%xmm0 + psrld $19-17,%xmm3 + pxor %xmm1,%xmm0 + pslld $15-13,%xmm1 + pxor %xmm3,%xmm0 + pxor %xmm1,%xmm0 + paddd %xmm0,%xmm5 + movdqa %xmm10,%xmm7 + + movdqa %xmm10,%xmm2 + + psrld $6,%xmm7 + movdqa %xmm10,%xmm1 + pslld $7,%xmm2 + movdqa %xmm5,160-128(%rax) + paddd %xmm13,%xmm5 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd -64(%rbp),%xmm5 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm10,%xmm0 + + pxor %xmm2,%xmm7 + movdqa %xmm10,%xmm3 + pslld $26-21,%xmm2 + pandn %xmm12,%xmm0 + pand %xmm11,%xmm3 + pxor %xmm1,%xmm7 + + + movdqa %xmm14,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm14,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm3,%xmm0 + movdqa %xmm15,%xmm3 + movdqa %xmm14,%xmm7 + pslld $10,%xmm2 + pxor %xmm14,%xmm3 + + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm5 + pslld $19-10,%xmm2 + pand %xmm3,%xmm4 + pxor %xmm7,%xmm1 + + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm15,%xmm13 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm4,%xmm13 + paddd %xmm5,%xmm9 + pxor %xmm2,%xmm7 + + paddd %xmm5,%xmm13 + paddd %xmm7,%xmm13 + movdqa 192-128(%rax),%xmm5 + paddd 64-128(%rax),%xmm6 + + movdqa %xmm5,%xmm7 + movdqa %xmm5,%xmm1 + psrld $3,%xmm7 + movdqa %xmm5,%xmm2 + + psrld $7,%xmm1 + movdqa 144-128(%rax),%xmm0 + pslld $14,%xmm2 + pxor %xmm1,%xmm7 + psrld $18-7,%xmm1 + movdqa %xmm0,%xmm4 + pxor %xmm2,%xmm7 + pslld $25-14,%xmm2 + pxor %xmm1,%xmm7 + psrld $10,%xmm0 + movdqa %xmm4,%xmm1 + + psrld $17,%xmm4 + pxor %xmm2,%xmm7 + pslld $13,%xmm1 + paddd %xmm7,%xmm6 + pxor %xmm4,%xmm0 + psrld $19-17,%xmm4 + pxor %xmm1,%xmm0 + pslld $15-13,%xmm1 + pxor %xmm4,%xmm0 + pxor %xmm1,%xmm0 + paddd %xmm0,%xmm6 + movdqa %xmm9,%xmm7 + + movdqa %xmm9,%xmm2 + + psrld $6,%xmm7 + movdqa %xmm9,%xmm1 + pslld $7,%xmm2 + movdqa %xmm6,176-128(%rax) + paddd %xmm12,%xmm6 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd -32(%rbp),%xmm6 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm9,%xmm0 + + pxor %xmm2,%xmm7 + movdqa %xmm9,%xmm4 + pslld $26-21,%xmm2 + pandn %xmm11,%xmm0 + pand %xmm10,%xmm4 + pxor %xmm1,%xmm7 + + + movdqa %xmm13,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm13,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm6 + pxor %xmm4,%xmm0 + movdqa %xmm14,%xmm4 + movdqa %xmm13,%xmm7 + pslld $10,%xmm2 + pxor %xmm13,%xmm4 + + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm6 + pslld $19-10,%xmm2 + pand %xmm4,%xmm3 + pxor %xmm7,%xmm1 + + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm14,%xmm12 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm3,%xmm12 + paddd %xmm6,%xmm8 + pxor %xmm2,%xmm7 + + paddd %xmm6,%xmm12 + paddd %xmm7,%xmm12 + movdqa 208-128(%rax),%xmm6 + paddd 80-128(%rax),%xmm5 + + movdqa %xmm6,%xmm7 + movdqa %xmm6,%xmm1 + psrld $3,%xmm7 + movdqa %xmm6,%xmm2 + + psrld $7,%xmm1 + movdqa 160-128(%rax),%xmm0 + pslld $14,%xmm2 + pxor %xmm1,%xmm7 + psrld $18-7,%xmm1 + movdqa %xmm0,%xmm3 + pxor %xmm2,%xmm7 + pslld $25-14,%xmm2 + pxor %xmm1,%xmm7 + psrld $10,%xmm0 + movdqa %xmm3,%xmm1 + + psrld $17,%xmm3 + pxor %xmm2,%xmm7 + pslld $13,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm3,%xmm0 + psrld $19-17,%xmm3 + pxor %xmm1,%xmm0 + pslld $15-13,%xmm1 + pxor %xmm3,%xmm0 + pxor %xmm1,%xmm0 + paddd %xmm0,%xmm5 + movdqa %xmm8,%xmm7 + + movdqa %xmm8,%xmm2 + + psrld $6,%xmm7 + movdqa %xmm8,%xmm1 + pslld $7,%xmm2 + movdqa %xmm5,192-128(%rax) + paddd %xmm11,%xmm5 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd 0(%rbp),%xmm5 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm8,%xmm0 + + pxor %xmm2,%xmm7 + movdqa %xmm8,%xmm3 + pslld $26-21,%xmm2 + pandn %xmm10,%xmm0 + pand %xmm9,%xmm3 + pxor %xmm1,%xmm7 + + + movdqa %xmm12,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm12,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm3,%xmm0 + movdqa %xmm13,%xmm3 + movdqa %xmm12,%xmm7 + pslld $10,%xmm2 + pxor %xmm12,%xmm3 + + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm5 + pslld $19-10,%xmm2 + pand %xmm3,%xmm4 + pxor %xmm7,%xmm1 + + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm13,%xmm11 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm4,%xmm11 + paddd %xmm5,%xmm15 + pxor %xmm2,%xmm7 + + paddd %xmm5,%xmm11 + paddd %xmm7,%xmm11 + movdqa 224-128(%rax),%xmm5 + paddd 96-128(%rax),%xmm6 + + movdqa %xmm5,%xmm7 + movdqa %xmm5,%xmm1 + psrld $3,%xmm7 + movdqa %xmm5,%xmm2 + + psrld $7,%xmm1 + movdqa 176-128(%rax),%xmm0 + pslld $14,%xmm2 + pxor %xmm1,%xmm7 + psrld $18-7,%xmm1 + movdqa %xmm0,%xmm4 + pxor %xmm2,%xmm7 + pslld $25-14,%xmm2 + pxor %xmm1,%xmm7 + psrld $10,%xmm0 + movdqa %xmm4,%xmm1 + + psrld $17,%xmm4 + pxor %xmm2,%xmm7 + pslld $13,%xmm1 + paddd %xmm7,%xmm6 + pxor %xmm4,%xmm0 + psrld $19-17,%xmm4 + pxor %xmm1,%xmm0 + pslld $15-13,%xmm1 + pxor %xmm4,%xmm0 + pxor %xmm1,%xmm0 + paddd %xmm0,%xmm6 + movdqa %xmm15,%xmm7 + + movdqa %xmm15,%xmm2 + + psrld $6,%xmm7 + movdqa %xmm15,%xmm1 + pslld $7,%xmm2 + movdqa %xmm6,208-128(%rax) + paddd %xmm10,%xmm6 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd 32(%rbp),%xmm6 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm15,%xmm0 + + pxor %xmm2,%xmm7 + movdqa %xmm15,%xmm4 + pslld $26-21,%xmm2 + pandn %xmm9,%xmm0 + pand %xmm8,%xmm4 + pxor %xmm1,%xmm7 + + + movdqa %xmm11,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm11,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm6 + pxor %xmm4,%xmm0 + movdqa %xmm12,%xmm4 + movdqa %xmm11,%xmm7 + pslld $10,%xmm2 + pxor %xmm11,%xmm4 + + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm6 + pslld $19-10,%xmm2 + pand %xmm4,%xmm3 + pxor %xmm7,%xmm1 + + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm12,%xmm10 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm3,%xmm10 + paddd %xmm6,%xmm14 + pxor %xmm2,%xmm7 + + paddd %xmm6,%xmm10 + paddd %xmm7,%xmm10 + movdqa 240-128(%rax),%xmm6 + paddd 112-128(%rax),%xmm5 + + movdqa %xmm6,%xmm7 + movdqa %xmm6,%xmm1 + psrld $3,%xmm7 + movdqa %xmm6,%xmm2 + + psrld $7,%xmm1 + movdqa 192-128(%rax),%xmm0 + pslld $14,%xmm2 + pxor %xmm1,%xmm7 + psrld $18-7,%xmm1 + movdqa %xmm0,%xmm3 + pxor %xmm2,%xmm7 + pslld $25-14,%xmm2 + pxor %xmm1,%xmm7 + psrld $10,%xmm0 + movdqa %xmm3,%xmm1 + + psrld $17,%xmm3 + pxor %xmm2,%xmm7 + pslld $13,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm3,%xmm0 + psrld $19-17,%xmm3 + pxor %xmm1,%xmm0 + pslld $15-13,%xmm1 + pxor %xmm3,%xmm0 + pxor %xmm1,%xmm0 + paddd %xmm0,%xmm5 + movdqa %xmm14,%xmm7 + + movdqa %xmm14,%xmm2 + + psrld $6,%xmm7 + movdqa %xmm14,%xmm1 + pslld $7,%xmm2 + movdqa %xmm5,224-128(%rax) + paddd %xmm9,%xmm5 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd 64(%rbp),%xmm5 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm14,%xmm0 + + pxor %xmm2,%xmm7 + movdqa %xmm14,%xmm3 + pslld $26-21,%xmm2 + pandn %xmm8,%xmm0 + pand %xmm15,%xmm3 + pxor %xmm1,%xmm7 + + + movdqa %xmm10,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm10,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm3,%xmm0 + movdqa %xmm11,%xmm3 + movdqa %xmm10,%xmm7 + pslld $10,%xmm2 + pxor %xmm10,%xmm3 + + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm5 + pslld $19-10,%xmm2 + pand %xmm3,%xmm4 + pxor %xmm7,%xmm1 + + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm11,%xmm9 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm4,%xmm9 + paddd %xmm5,%xmm13 + pxor %xmm2,%xmm7 + + paddd %xmm5,%xmm9 + paddd %xmm7,%xmm9 + movdqa 0-128(%rax),%xmm5 + paddd 128-128(%rax),%xmm6 + + movdqa %xmm5,%xmm7 + movdqa %xmm5,%xmm1 + psrld $3,%xmm7 + movdqa %xmm5,%xmm2 + + psrld $7,%xmm1 + movdqa 208-128(%rax),%xmm0 + pslld $14,%xmm2 + pxor %xmm1,%xmm7 + psrld $18-7,%xmm1 + movdqa %xmm0,%xmm4 + pxor %xmm2,%xmm7 + pslld $25-14,%xmm2 + pxor %xmm1,%xmm7 + psrld $10,%xmm0 + movdqa %xmm4,%xmm1 + + psrld $17,%xmm4 + pxor %xmm2,%xmm7 + pslld $13,%xmm1 + paddd %xmm7,%xmm6 + pxor %xmm4,%xmm0 + psrld $19-17,%xmm4 + pxor %xmm1,%xmm0 + pslld $15-13,%xmm1 + pxor %xmm4,%xmm0 + pxor %xmm1,%xmm0 + paddd %xmm0,%xmm6 + movdqa %xmm13,%xmm7 + + movdqa %xmm13,%xmm2 + + psrld $6,%xmm7 + movdqa %xmm13,%xmm1 + pslld $7,%xmm2 + movdqa %xmm6,240-128(%rax) + paddd %xmm8,%xmm6 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd 96(%rbp),%xmm6 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm13,%xmm0 + + pxor %xmm2,%xmm7 + movdqa %xmm13,%xmm4 + pslld $26-21,%xmm2 + pandn %xmm15,%xmm0 + pand %xmm14,%xmm4 + pxor %xmm1,%xmm7 + + + movdqa %xmm9,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm9,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm6 + pxor %xmm4,%xmm0 + movdqa %xmm10,%xmm4 + movdqa %xmm9,%xmm7 + pslld $10,%xmm2 + pxor %xmm9,%xmm4 + + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm6 + pslld $19-10,%xmm2 + pand %xmm4,%xmm3 + pxor %xmm7,%xmm1 + + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm10,%xmm8 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm3,%xmm8 + paddd %xmm6,%xmm12 + pxor %xmm2,%xmm7 + + paddd %xmm6,%xmm8 + paddd %xmm7,%xmm8 + leaq 256(%rbp),%rbp + decl %ecx + jnz .Loop_16_xx + + movl $1,%ecx + leaq K256+128(%rip),%rbp + + movdqa (%rbx),%xmm7 + cmpl 0(%rbx),%ecx + pxor %xmm0,%xmm0 + cmovgeq %rbp,%r8 + cmpl 4(%rbx),%ecx + movdqa %xmm7,%xmm6 + cmovgeq %rbp,%r9 + cmpl 8(%rbx),%ecx + pcmpgtd %xmm0,%xmm6 + cmovgeq %rbp,%r10 + cmpl 12(%rbx),%ecx + paddd %xmm6,%xmm7 + cmovgeq %rbp,%r11 + + movdqu 0-128(%rdi),%xmm0 + pand %xmm6,%xmm8 + movdqu 32-128(%rdi),%xmm1 + pand %xmm6,%xmm9 + movdqu 64-128(%rdi),%xmm2 + pand %xmm6,%xmm10 + movdqu 96-128(%rdi),%xmm5 + pand %xmm6,%xmm11 + paddd %xmm0,%xmm8 + movdqu 128-128(%rdi),%xmm0 + pand %xmm6,%xmm12 + paddd %xmm1,%xmm9 + movdqu 160-128(%rdi),%xmm1 + pand %xmm6,%xmm13 + paddd %xmm2,%xmm10 + movdqu 192-128(%rdi),%xmm2 + pand %xmm6,%xmm14 + paddd %xmm5,%xmm11 + movdqu 224-128(%rdi),%xmm5 + pand %xmm6,%xmm15 + paddd %xmm0,%xmm12 + paddd %xmm1,%xmm13 + movdqu %xmm8,0-128(%rdi) + paddd %xmm2,%xmm14 + movdqu %xmm9,32-128(%rdi) + paddd %xmm5,%xmm15 + movdqu %xmm10,64-128(%rdi) + movdqu %xmm11,96-128(%rdi) + movdqu %xmm12,128-128(%rdi) + movdqu %xmm13,160-128(%rdi) + movdqu %xmm14,192-128(%rdi) + movdqu %xmm15,224-128(%rdi) + + movdqa %xmm7,(%rbx) + movdqa .Lpbswap(%rip),%xmm6 + decl %edx + jnz .Loop + + movl 280(%rsp),%edx + leaq 16(%rdi),%rdi + leaq 64(%rsi),%rsi + decl %edx + jnz .Loop_grande + +.Ldone: + movq 272(%rsp),%rax + movq -16(%rax),%rbp + movq -8(%rax),%rbx + leaq (%rax),%rsp +.Lepilogue: + .byte 0xf3,0xc3 +.size sha256_multi_block,.-sha256_multi_block +.type sha256_multi_block_shaext,@function +.align 32 +sha256_multi_block_shaext: +_shaext_shortcut: + movq %rsp,%rax + pushq %rbx + pushq %rbp + subq $288,%rsp + shll $1,%edx + andq $-256,%rsp + leaq 128(%rdi),%rdi + movq %rax,272(%rsp) +.Lbody_shaext: + leaq 256(%rsp),%rbx + leaq K256_shaext+128(%rip),%rbp + +.Loop_grande_shaext: + movl %edx,280(%rsp) + xorl %edx,%edx + movq 0(%rsi),%r8 + movl 8(%rsi),%ecx + cmpl %edx,%ecx + cmovgl %ecx,%edx + testl %ecx,%ecx + movl %ecx,0(%rbx) + cmovleq %rsp,%r8 + movq 16(%rsi),%r9 + movl 24(%rsi),%ecx + cmpl %edx,%ecx + cmovgl %ecx,%edx + testl %ecx,%ecx + movl %ecx,4(%rbx) + cmovleq %rsp,%r9 + testl %edx,%edx + jz .Ldone_shaext + + movq 0-128(%rdi),%xmm12 + movq 32-128(%rdi),%xmm4 + movq 64-128(%rdi),%xmm13 + movq 96-128(%rdi),%xmm5 + movq 128-128(%rdi),%xmm8 + movq 160-128(%rdi),%xmm9 + movq 192-128(%rdi),%xmm10 + movq 224-128(%rdi),%xmm11 + + punpckldq %xmm4,%xmm12 + punpckldq %xmm5,%xmm13 + punpckldq %xmm9,%xmm8 + punpckldq %xmm11,%xmm10 + movdqa K256_shaext-16(%rip),%xmm3 + + movdqa %xmm12,%xmm14 + movdqa %xmm13,%xmm15 + punpcklqdq %xmm8,%xmm12 + punpcklqdq %xmm10,%xmm13 + punpckhqdq %xmm8,%xmm14 + punpckhqdq %xmm10,%xmm15 + + pshufd $27,%xmm12,%xmm12 + pshufd $27,%xmm13,%xmm13 + pshufd $27,%xmm14,%xmm14 + pshufd $27,%xmm15,%xmm15 + jmp .Loop_shaext + +.align 32 +.Loop_shaext: + movdqu 0(%r8),%xmm4 + movdqu 0(%r9),%xmm8 + movdqu 16(%r8),%xmm5 + movdqu 16(%r9),%xmm9 + movdqu 32(%r8),%xmm6 +.byte 102,15,56,0,227 + movdqu 32(%r9),%xmm10 +.byte 102,68,15,56,0,195 + movdqu 48(%r8),%xmm7 + leaq 64(%r8),%r8 + movdqu 48(%r9),%xmm11 + leaq 64(%r9),%r9 + + movdqa 0-128(%rbp),%xmm0 +.byte 102,15,56,0,235 + paddd %xmm4,%xmm0 + pxor %xmm12,%xmm4 + movdqa %xmm0,%xmm1 + movdqa 0-128(%rbp),%xmm2 +.byte 102,68,15,56,0,203 + paddd %xmm8,%xmm2 + movdqa %xmm13,80(%rsp) +.byte 69,15,56,203,236 + pxor %xmm14,%xmm8 + movdqa %xmm2,%xmm0 + movdqa %xmm15,112(%rsp) +.byte 69,15,56,203,254 + pshufd $14,%xmm1,%xmm0 + pxor %xmm12,%xmm4 + movdqa %xmm12,64(%rsp) +.byte 69,15,56,203,229 + pshufd $14,%xmm2,%xmm0 + pxor %xmm14,%xmm8 + movdqa %xmm14,96(%rsp) + movdqa 16-128(%rbp),%xmm1 + paddd %xmm5,%xmm1 +.byte 102,15,56,0,243 +.byte 69,15,56,203,247 + + movdqa %xmm1,%xmm0 + movdqa 16-128(%rbp),%xmm2 + paddd %xmm9,%xmm2 +.byte 69,15,56,203,236 + movdqa %xmm2,%xmm0 + prefetcht0 127(%r8) +.byte 102,15,56,0,251 +.byte 102,68,15,56,0,211 + prefetcht0 127(%r9) +.byte 69,15,56,203,254 + pshufd $14,%xmm1,%xmm0 +.byte 102,68,15,56,0,219 +.byte 15,56,204,229 +.byte 69,15,56,203,229 + pshufd $14,%xmm2,%xmm0 + movdqa 32-128(%rbp),%xmm1 + paddd %xmm6,%xmm1 +.byte 69,15,56,203,247 + + movdqa %xmm1,%xmm0 + movdqa 32-128(%rbp),%xmm2 + paddd %xmm10,%xmm2 +.byte 69,15,56,203,236 +.byte 69,15,56,204,193 + movdqa %xmm2,%xmm0 + movdqa %xmm7,%xmm3 +.byte 69,15,56,203,254 + pshufd $14,%xmm1,%xmm0 +.byte 102,15,58,15,222,4 + paddd %xmm3,%xmm4 + movdqa %xmm11,%xmm3 +.byte 102,65,15,58,15,218,4 +.byte 15,56,204,238 +.byte 69,15,56,203,229 + pshufd $14,%xmm2,%xmm0 + movdqa 48-128(%rbp),%xmm1 + paddd %xmm7,%xmm1 +.byte 69,15,56,203,247 +.byte 69,15,56,204,202 + + movdqa %xmm1,%xmm0 + movdqa 48-128(%rbp),%xmm2 + paddd %xmm3,%xmm8 + paddd %xmm11,%xmm2 +.byte 15,56,205,231 +.byte 69,15,56,203,236 + movdqa %xmm2,%xmm0 + movdqa %xmm4,%xmm3 +.byte 102,15,58,15,223,4 +.byte 69,15,56,203,254 +.byte 69,15,56,205,195 + pshufd $14,%xmm1,%xmm0 + paddd %xmm3,%xmm5 + movdqa %xmm8,%xmm3 +.byte 102,65,15,58,15,219,4 +.byte 15,56,204,247 +.byte 69,15,56,203,229 + pshufd $14,%xmm2,%xmm0 + movdqa 64-128(%rbp),%xmm1 + paddd %xmm4,%xmm1 +.byte 69,15,56,203,247 +.byte 69,15,56,204,211 + movdqa %xmm1,%xmm0 + movdqa 64-128(%rbp),%xmm2 + paddd %xmm3,%xmm9 + paddd %xmm8,%xmm2 +.byte 15,56,205,236 +.byte 69,15,56,203,236 + movdqa %xmm2,%xmm0 + movdqa %xmm5,%xmm3 +.byte 102,15,58,15,220,4 +.byte 69,15,56,203,254 +.byte 69,15,56,205,200 + pshufd $14,%xmm1,%xmm0 + paddd %xmm3,%xmm6 + movdqa %xmm9,%xmm3 +.byte 102,65,15,58,15,216,4 +.byte 15,56,204,252 +.byte 69,15,56,203,229 + pshufd $14,%xmm2,%xmm0 + movdqa 80-128(%rbp),%xmm1 + paddd %xmm5,%xmm1 +.byte 69,15,56,203,247 +.byte 69,15,56,204,216 + movdqa %xmm1,%xmm0 + movdqa 80-128(%rbp),%xmm2 + paddd %xmm3,%xmm10 + paddd %xmm9,%xmm2 +.byte 15,56,205,245 +.byte 69,15,56,203,236 + movdqa %xmm2,%xmm0 + movdqa %xmm6,%xmm3 +.byte 102,15,58,15,221,4 +.byte 69,15,56,203,254 +.byte 69,15,56,205,209 + pshufd $14,%xmm1,%xmm0 + paddd %xmm3,%xmm7 + movdqa %xmm10,%xmm3 +.byte 102,65,15,58,15,217,4 +.byte 15,56,204,229 +.byte 69,15,56,203,229 + pshufd $14,%xmm2,%xmm0 + movdqa 96-128(%rbp),%xmm1 + paddd %xmm6,%xmm1 +.byte 69,15,56,203,247 +.byte 69,15,56,204,193 + movdqa %xmm1,%xmm0 + movdqa 96-128(%rbp),%xmm2 + paddd %xmm3,%xmm11 + paddd %xmm10,%xmm2 +.byte 15,56,205,254 +.byte 69,15,56,203,236 + movdqa %xmm2,%xmm0 + movdqa %xmm7,%xmm3 +.byte 102,15,58,15,222,4 +.byte 69,15,56,203,254 +.byte 69,15,56,205,218 + pshufd $14,%xmm1,%xmm0 + paddd %xmm3,%xmm4 + movdqa %xmm11,%xmm3 +.byte 102,65,15,58,15,218,4 +.byte 15,56,204,238 +.byte 69,15,56,203,229 + pshufd $14,%xmm2,%xmm0 + movdqa 112-128(%rbp),%xmm1 + paddd %xmm7,%xmm1 +.byte 69,15,56,203,247 +.byte 69,15,56,204,202 + movdqa %xmm1,%xmm0 + movdqa 112-128(%rbp),%xmm2 + paddd %xmm3,%xmm8 + paddd %xmm11,%xmm2 +.byte 15,56,205,231 +.byte 69,15,56,203,236 + movdqa %xmm2,%xmm0 + movdqa %xmm4,%xmm3 +.byte 102,15,58,15,223,4 +.byte 69,15,56,203,254 +.byte 69,15,56,205,195 + pshufd $14,%xmm1,%xmm0 + paddd %xmm3,%xmm5 + movdqa %xmm8,%xmm3 +.byte 102,65,15,58,15,219,4 +.byte 15,56,204,247 +.byte 69,15,56,203,229 + pshufd $14,%xmm2,%xmm0 + movdqa 128-128(%rbp),%xmm1 + paddd %xmm4,%xmm1 +.byte 69,15,56,203,247 +.byte 69,15,56,204,211 + movdqa %xmm1,%xmm0 + movdqa 128-128(%rbp),%xmm2 + paddd %xmm3,%xmm9 + paddd %xmm8,%xmm2 +.byte 15,56,205,236 +.byte 69,15,56,203,236 + movdqa %xmm2,%xmm0 + movdqa %xmm5,%xmm3 +.byte 102,15,58,15,220,4 +.byte 69,15,56,203,254 +.byte 69,15,56,205,200 + pshufd $14,%xmm1,%xmm0 + paddd %xmm3,%xmm6 + movdqa %xmm9,%xmm3 +.byte 102,65,15,58,15,216,4 +.byte 15,56,204,252 +.byte 69,15,56,203,229 + pshufd $14,%xmm2,%xmm0 + movdqa 144-128(%rbp),%xmm1 + paddd %xmm5,%xmm1 +.byte 69,15,56,203,247 +.byte 69,15,56,204,216 + movdqa %xmm1,%xmm0 + movdqa 144-128(%rbp),%xmm2 + paddd %xmm3,%xmm10 + paddd %xmm9,%xmm2 +.byte 15,56,205,245 +.byte 69,15,56,203,236 + movdqa %xmm2,%xmm0 + movdqa %xmm6,%xmm3 +.byte 102,15,58,15,221,4 +.byte 69,15,56,203,254 +.byte 69,15,56,205,209 + pshufd $14,%xmm1,%xmm0 + paddd %xmm3,%xmm7 + movdqa %xmm10,%xmm3 +.byte 102,65,15,58,15,217,4 +.byte 15,56,204,229 +.byte 69,15,56,203,229 + pshufd $14,%xmm2,%xmm0 + movdqa 160-128(%rbp),%xmm1 + paddd %xmm6,%xmm1 +.byte 69,15,56,203,247 +.byte 69,15,56,204,193 + movdqa %xmm1,%xmm0 + movdqa 160-128(%rbp),%xmm2 + paddd %xmm3,%xmm11 + paddd %xmm10,%xmm2 +.byte 15,56,205,254 +.byte 69,15,56,203,236 + movdqa %xmm2,%xmm0 + movdqa %xmm7,%xmm3 +.byte 102,15,58,15,222,4 +.byte 69,15,56,203,254 +.byte 69,15,56,205,218 + pshufd $14,%xmm1,%xmm0 + paddd %xmm3,%xmm4 + movdqa %xmm11,%xmm3 +.byte 102,65,15,58,15,218,4 +.byte 15,56,204,238 +.byte 69,15,56,203,229 + pshufd $14,%xmm2,%xmm0 + movdqa 176-128(%rbp),%xmm1 + paddd %xmm7,%xmm1 +.byte 69,15,56,203,247 +.byte 69,15,56,204,202 + movdqa %xmm1,%xmm0 + movdqa 176-128(%rbp),%xmm2 + paddd %xmm3,%xmm8 + paddd %xmm11,%xmm2 +.byte 15,56,205,231 +.byte 69,15,56,203,236 + movdqa %xmm2,%xmm0 + movdqa %xmm4,%xmm3 +.byte 102,15,58,15,223,4 +.byte 69,15,56,203,254 +.byte 69,15,56,205,195 + pshufd $14,%xmm1,%xmm0 + paddd %xmm3,%xmm5 + movdqa %xmm8,%xmm3 +.byte 102,65,15,58,15,219,4 +.byte 15,56,204,247 +.byte 69,15,56,203,229 + pshufd $14,%xmm2,%xmm0 + movdqa 192-128(%rbp),%xmm1 + paddd %xmm4,%xmm1 +.byte 69,15,56,203,247 +.byte 69,15,56,204,211 + movdqa %xmm1,%xmm0 + movdqa 192-128(%rbp),%xmm2 + paddd %xmm3,%xmm9 + paddd %xmm8,%xmm2 +.byte 15,56,205,236 +.byte 69,15,56,203,236 + movdqa %xmm2,%xmm0 + movdqa %xmm5,%xmm3 +.byte 102,15,58,15,220,4 +.byte 69,15,56,203,254 +.byte 69,15,56,205,200 + pshufd $14,%xmm1,%xmm0 + paddd %xmm3,%xmm6 + movdqa %xmm9,%xmm3 +.byte 102,65,15,58,15,216,4 +.byte 15,56,204,252 +.byte 69,15,56,203,229 + pshufd $14,%xmm2,%xmm0 + movdqa 208-128(%rbp),%xmm1 + paddd %xmm5,%xmm1 +.byte 69,15,56,203,247 +.byte 69,15,56,204,216 + movdqa %xmm1,%xmm0 + movdqa 208-128(%rbp),%xmm2 + paddd %xmm3,%xmm10 + paddd %xmm9,%xmm2 +.byte 15,56,205,245 +.byte 69,15,56,203,236 + movdqa %xmm2,%xmm0 + movdqa %xmm6,%xmm3 +.byte 102,15,58,15,221,4 +.byte 69,15,56,203,254 +.byte 69,15,56,205,209 + pshufd $14,%xmm1,%xmm0 + paddd %xmm3,%xmm7 + movdqa %xmm10,%xmm3 +.byte 102,65,15,58,15,217,4 + nop +.byte 69,15,56,203,229 + pshufd $14,%xmm2,%xmm0 + movdqa 224-128(%rbp),%xmm1 + paddd %xmm6,%xmm1 +.byte 69,15,56,203,247 + + movdqa %xmm1,%xmm0 + movdqa 224-128(%rbp),%xmm2 + paddd %xmm3,%xmm11 + paddd %xmm10,%xmm2 +.byte 15,56,205,254 + nop +.byte 69,15,56,203,236 + movdqa %xmm2,%xmm0 + movl $1,%ecx + pxor %xmm6,%xmm6 +.byte 69,15,56,203,254 +.byte 69,15,56,205,218 + pshufd $14,%xmm1,%xmm0 + movdqa 240-128(%rbp),%xmm1 + paddd %xmm7,%xmm1 + movq (%rbx),%xmm7 + nop +.byte 69,15,56,203,229 + pshufd $14,%xmm2,%xmm0 + movdqa 240-128(%rbp),%xmm2 + paddd %xmm11,%xmm2 +.byte 69,15,56,203,247 + + movdqa %xmm1,%xmm0 + cmpl 0(%rbx),%ecx + cmovgeq %rsp,%r8 + cmpl 4(%rbx),%ecx + cmovgeq %rsp,%r9 + pshufd $0,%xmm7,%xmm9 +.byte 69,15,56,203,236 + movdqa %xmm2,%xmm0 + pshufd $85,%xmm7,%xmm10 + movdqa %xmm7,%xmm11 +.byte 69,15,56,203,254 + pshufd $14,%xmm1,%xmm0 + pcmpgtd %xmm6,%xmm9 + pcmpgtd %xmm6,%xmm10 +.byte 69,15,56,203,229 + pshufd $14,%xmm2,%xmm0 + pcmpgtd %xmm6,%xmm11 + movdqa K256_shaext-16(%rip),%xmm3 +.byte 69,15,56,203,247 + + pand %xmm9,%xmm13 + pand %xmm10,%xmm15 + pand %xmm9,%xmm12 + pand %xmm10,%xmm14 + paddd %xmm7,%xmm11 + + paddd 80(%rsp),%xmm13 + paddd 112(%rsp),%xmm15 + paddd 64(%rsp),%xmm12 + paddd 96(%rsp),%xmm14 + + movq %xmm11,(%rbx) + decl %edx + jnz .Loop_shaext + + movl 280(%rsp),%edx + + pshufd $27,%xmm12,%xmm12 + pshufd $27,%xmm13,%xmm13 + pshufd $27,%xmm14,%xmm14 + pshufd $27,%xmm15,%xmm15 + + movdqa %xmm12,%xmm5 + movdqa %xmm13,%xmm6 + punpckldq %xmm14,%xmm12 + punpckhdq %xmm14,%xmm5 + punpckldq %xmm15,%xmm13 + punpckhdq %xmm15,%xmm6 + + movq %xmm12,0-128(%rdi) + psrldq $8,%xmm12 + movq %xmm5,128-128(%rdi) + psrldq $8,%xmm5 + movq %xmm12,32-128(%rdi) + movq %xmm5,160-128(%rdi) + + movq %xmm13,64-128(%rdi) + psrldq $8,%xmm13 + movq %xmm6,192-128(%rdi) + psrldq $8,%xmm6 + movq %xmm13,96-128(%rdi) + movq %xmm6,224-128(%rdi) + + leaq 8(%rdi),%rdi + leaq 32(%rsi),%rsi + decl %edx + jnz .Loop_grande_shaext + +.Ldone_shaext: + + movq -16(%rax),%rbp + movq -8(%rax),%rbx + leaq (%rax),%rsp +.Lepilogue_shaext: + .byte 0xf3,0xc3 +.size sha256_multi_block_shaext,.-sha256_multi_block_shaext +.type sha256_multi_block_avx,@function +.align 32 +sha256_multi_block_avx: +_avx_shortcut: + shrq $32,%rcx + cmpl $2,%edx + jb .Lavx + testl $32,%ecx + jnz _avx2_shortcut + jmp .Lavx +.align 32 +.Lavx: + movq %rsp,%rax + pushq %rbx + pushq %rbp + subq $288,%rsp + andq $-256,%rsp + movq %rax,272(%rsp) +.Lbody_avx: + leaq K256+128(%rip),%rbp + leaq 256(%rsp),%rbx + leaq 128(%rdi),%rdi + +.Loop_grande_avx: + movl %edx,280(%rsp) + xorl %edx,%edx + movq 0(%rsi),%r8 + movl 8(%rsi),%ecx + cmpl %edx,%ecx + cmovgl %ecx,%edx + testl %ecx,%ecx + movl %ecx,0(%rbx) + cmovleq %rbp,%r8 + movq 16(%rsi),%r9 + movl 24(%rsi),%ecx + cmpl %edx,%ecx + cmovgl %ecx,%edx + testl %ecx,%ecx + movl %ecx,4(%rbx) + cmovleq %rbp,%r9 + movq 32(%rsi),%r10 + movl 40(%rsi),%ecx + cmpl %edx,%ecx + cmovgl %ecx,%edx + testl %ecx,%ecx + movl %ecx,8(%rbx) + cmovleq %rbp,%r10 + movq 48(%rsi),%r11 + movl 56(%rsi),%ecx + cmpl %edx,%ecx + cmovgl %ecx,%edx + testl %ecx,%ecx + movl %ecx,12(%rbx) + cmovleq %rbp,%r11 + testl %edx,%edx + jz .Ldone_avx + + vmovdqu 0-128(%rdi),%xmm8 + leaq 128(%rsp),%rax + vmovdqu 32-128(%rdi),%xmm9 + vmovdqu 64-128(%rdi),%xmm10 + vmovdqu 96-128(%rdi),%xmm11 + vmovdqu 128-128(%rdi),%xmm12 + vmovdqu 160-128(%rdi),%xmm13 + vmovdqu 192-128(%rdi),%xmm14 + vmovdqu 224-128(%rdi),%xmm15 + vmovdqu .Lpbswap(%rip),%xmm6 + jmp .Loop_avx + +.align 32 +.Loop_avx: + vpxor %xmm9,%xmm10,%xmm4 + vmovd 0(%r8),%xmm5 + vmovd 0(%r9),%xmm0 + vpinsrd $1,0(%r10),%xmm5,%xmm5 + vpinsrd $1,0(%r11),%xmm0,%xmm0 + vpunpckldq %xmm0,%xmm5,%xmm5 + vpshufb %xmm6,%xmm5,%xmm5 + vpsrld $6,%xmm12,%xmm7 + vpslld $26,%xmm12,%xmm2 + vmovdqu %xmm5,0-128(%rax) + vpaddd %xmm15,%xmm5,%xmm5 + + vpsrld $11,%xmm12,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $21,%xmm12,%xmm2 + vpaddd -128(%rbp),%xmm5,%xmm5 + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $25,%xmm12,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $7,%xmm12,%xmm2 + vpandn %xmm14,%xmm12,%xmm0 + vpand %xmm13,%xmm12,%xmm3 + + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $2,%xmm8,%xmm15 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $30,%xmm8,%xmm1 + vpxor %xmm3,%xmm0,%xmm0 + vpxor %xmm8,%xmm9,%xmm3 + + vpxor %xmm1,%xmm15,%xmm15 + vpaddd %xmm7,%xmm5,%xmm5 + + vpsrld $13,%xmm8,%xmm1 + + vpslld $19,%xmm8,%xmm2 + vpaddd %xmm0,%xmm5,%xmm5 + vpand %xmm3,%xmm4,%xmm4 + + vpxor %xmm1,%xmm15,%xmm7 + + vpsrld $22,%xmm8,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $10,%xmm8,%xmm2 + vpxor %xmm4,%xmm9,%xmm15 + vpaddd %xmm5,%xmm11,%xmm11 + + vpxor %xmm1,%xmm7,%xmm7 + vpxor %xmm2,%xmm7,%xmm7 + + vpaddd %xmm5,%xmm15,%xmm15 + vpaddd %xmm7,%xmm15,%xmm15 + vmovd 4(%r8),%xmm5 + vmovd 4(%r9),%xmm0 + vpinsrd $1,4(%r10),%xmm5,%xmm5 + vpinsrd $1,4(%r11),%xmm0,%xmm0 + vpunpckldq %xmm0,%xmm5,%xmm5 + vpshufb %xmm6,%xmm5,%xmm5 + vpsrld $6,%xmm11,%xmm7 + vpslld $26,%xmm11,%xmm2 + vmovdqu %xmm5,16-128(%rax) + vpaddd %xmm14,%xmm5,%xmm5 + + vpsrld $11,%xmm11,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $21,%xmm11,%xmm2 + vpaddd -96(%rbp),%xmm5,%xmm5 + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $25,%xmm11,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $7,%xmm11,%xmm2 + vpandn %xmm13,%xmm11,%xmm0 + vpand %xmm12,%xmm11,%xmm4 + + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $2,%xmm15,%xmm14 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $30,%xmm15,%xmm1 + vpxor %xmm4,%xmm0,%xmm0 + vpxor %xmm15,%xmm8,%xmm4 + + vpxor %xmm1,%xmm14,%xmm14 + vpaddd %xmm7,%xmm5,%xmm5 + + vpsrld $13,%xmm15,%xmm1 + + vpslld $19,%xmm15,%xmm2 + vpaddd %xmm0,%xmm5,%xmm5 + vpand %xmm4,%xmm3,%xmm3 + + vpxor %xmm1,%xmm14,%xmm7 + + vpsrld $22,%xmm15,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $10,%xmm15,%xmm2 + vpxor %xmm3,%xmm8,%xmm14 + vpaddd %xmm5,%xmm10,%xmm10 + + vpxor %xmm1,%xmm7,%xmm7 + vpxor %xmm2,%xmm7,%xmm7 + + vpaddd %xmm5,%xmm14,%xmm14 + vpaddd %xmm7,%xmm14,%xmm14 + vmovd 8(%r8),%xmm5 + vmovd 8(%r9),%xmm0 + vpinsrd $1,8(%r10),%xmm5,%xmm5 + vpinsrd $1,8(%r11),%xmm0,%xmm0 + vpunpckldq %xmm0,%xmm5,%xmm5 + vpshufb %xmm6,%xmm5,%xmm5 + vpsrld $6,%xmm10,%xmm7 + vpslld $26,%xmm10,%xmm2 + vmovdqu %xmm5,32-128(%rax) + vpaddd %xmm13,%xmm5,%xmm5 + + vpsrld $11,%xmm10,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $21,%xmm10,%xmm2 + vpaddd -64(%rbp),%xmm5,%xmm5 + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $25,%xmm10,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $7,%xmm10,%xmm2 + vpandn %xmm12,%xmm10,%xmm0 + vpand %xmm11,%xmm10,%xmm3 + + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $2,%xmm14,%xmm13 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $30,%xmm14,%xmm1 + vpxor %xmm3,%xmm0,%xmm0 + vpxor %xmm14,%xmm15,%xmm3 + + vpxor %xmm1,%xmm13,%xmm13 + vpaddd %xmm7,%xmm5,%xmm5 + + vpsrld $13,%xmm14,%xmm1 + + vpslld $19,%xmm14,%xmm2 + vpaddd %xmm0,%xmm5,%xmm5 + vpand %xmm3,%xmm4,%xmm4 + + vpxor %xmm1,%xmm13,%xmm7 + + vpsrld $22,%xmm14,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $10,%xmm14,%xmm2 + vpxor %xmm4,%xmm15,%xmm13 + vpaddd %xmm5,%xmm9,%xmm9 + + vpxor %xmm1,%xmm7,%xmm7 + vpxor %xmm2,%xmm7,%xmm7 + + vpaddd %xmm5,%xmm13,%xmm13 + vpaddd %xmm7,%xmm13,%xmm13 + vmovd 12(%r8),%xmm5 + vmovd 12(%r9),%xmm0 + vpinsrd $1,12(%r10),%xmm5,%xmm5 + vpinsrd $1,12(%r11),%xmm0,%xmm0 + vpunpckldq %xmm0,%xmm5,%xmm5 + vpshufb %xmm6,%xmm5,%xmm5 + vpsrld $6,%xmm9,%xmm7 + vpslld $26,%xmm9,%xmm2 + vmovdqu %xmm5,48-128(%rax) + vpaddd %xmm12,%xmm5,%xmm5 + + vpsrld $11,%xmm9,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $21,%xmm9,%xmm2 + vpaddd -32(%rbp),%xmm5,%xmm5 + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $25,%xmm9,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $7,%xmm9,%xmm2 + vpandn %xmm11,%xmm9,%xmm0 + vpand %xmm10,%xmm9,%xmm4 + + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $2,%xmm13,%xmm12 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $30,%xmm13,%xmm1 + vpxor %xmm4,%xmm0,%xmm0 + vpxor %xmm13,%xmm14,%xmm4 + + vpxor %xmm1,%xmm12,%xmm12 + vpaddd %xmm7,%xmm5,%xmm5 + + vpsrld $13,%xmm13,%xmm1 + + vpslld $19,%xmm13,%xmm2 + vpaddd %xmm0,%xmm5,%xmm5 + vpand %xmm4,%xmm3,%xmm3 + + vpxor %xmm1,%xmm12,%xmm7 + + vpsrld $22,%xmm13,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $10,%xmm13,%xmm2 + vpxor %xmm3,%xmm14,%xmm12 + vpaddd %xmm5,%xmm8,%xmm8 + + vpxor %xmm1,%xmm7,%xmm7 + vpxor %xmm2,%xmm7,%xmm7 + + vpaddd %xmm5,%xmm12,%xmm12 + vpaddd %xmm7,%xmm12,%xmm12 + vmovd 16(%r8),%xmm5 + vmovd 16(%r9),%xmm0 + vpinsrd $1,16(%r10),%xmm5,%xmm5 + vpinsrd $1,16(%r11),%xmm0,%xmm0 + vpunpckldq %xmm0,%xmm5,%xmm5 + vpshufb %xmm6,%xmm5,%xmm5 + vpsrld $6,%xmm8,%xmm7 + vpslld $26,%xmm8,%xmm2 + vmovdqu %xmm5,64-128(%rax) + vpaddd %xmm11,%xmm5,%xmm5 + + vpsrld $11,%xmm8,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $21,%xmm8,%xmm2 + vpaddd 0(%rbp),%xmm5,%xmm5 + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $25,%xmm8,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $7,%xmm8,%xmm2 + vpandn %xmm10,%xmm8,%xmm0 + vpand %xmm9,%xmm8,%xmm3 + + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $2,%xmm12,%xmm11 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $30,%xmm12,%xmm1 + vpxor %xmm3,%xmm0,%xmm0 + vpxor %xmm12,%xmm13,%xmm3 + + vpxor %xmm1,%xmm11,%xmm11 + vpaddd %xmm7,%xmm5,%xmm5 + + vpsrld $13,%xmm12,%xmm1 + + vpslld $19,%xmm12,%xmm2 + vpaddd %xmm0,%xmm5,%xmm5 + vpand %xmm3,%xmm4,%xmm4 + + vpxor %xmm1,%xmm11,%xmm7 + + vpsrld $22,%xmm12,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $10,%xmm12,%xmm2 + vpxor %xmm4,%xmm13,%xmm11 + vpaddd %xmm5,%xmm15,%xmm15 + + vpxor %xmm1,%xmm7,%xmm7 + vpxor %xmm2,%xmm7,%xmm7 + + vpaddd %xmm5,%xmm11,%xmm11 + vpaddd %xmm7,%xmm11,%xmm11 + vmovd 20(%r8),%xmm5 + vmovd 20(%r9),%xmm0 + vpinsrd $1,20(%r10),%xmm5,%xmm5 + vpinsrd $1,20(%r11),%xmm0,%xmm0 + vpunpckldq %xmm0,%xmm5,%xmm5 + vpshufb %xmm6,%xmm5,%xmm5 + vpsrld $6,%xmm15,%xmm7 + vpslld $26,%xmm15,%xmm2 + vmovdqu %xmm5,80-128(%rax) + vpaddd %xmm10,%xmm5,%xmm5 + + vpsrld $11,%xmm15,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $21,%xmm15,%xmm2 + vpaddd 32(%rbp),%xmm5,%xmm5 + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $25,%xmm15,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $7,%xmm15,%xmm2 + vpandn %xmm9,%xmm15,%xmm0 + vpand %xmm8,%xmm15,%xmm4 + + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $2,%xmm11,%xmm10 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $30,%xmm11,%xmm1 + vpxor %xmm4,%xmm0,%xmm0 + vpxor %xmm11,%xmm12,%xmm4 + + vpxor %xmm1,%xmm10,%xmm10 + vpaddd %xmm7,%xmm5,%xmm5 + + vpsrld $13,%xmm11,%xmm1 + + vpslld $19,%xmm11,%xmm2 + vpaddd %xmm0,%xmm5,%xmm5 + vpand %xmm4,%xmm3,%xmm3 + + vpxor %xmm1,%xmm10,%xmm7 + + vpsrld $22,%xmm11,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $10,%xmm11,%xmm2 + vpxor %xmm3,%xmm12,%xmm10 + vpaddd %xmm5,%xmm14,%xmm14 + + vpxor %xmm1,%xmm7,%xmm7 + vpxor %xmm2,%xmm7,%xmm7 + + vpaddd %xmm5,%xmm10,%xmm10 + vpaddd %xmm7,%xmm10,%xmm10 + vmovd 24(%r8),%xmm5 + vmovd 24(%r9),%xmm0 + vpinsrd $1,24(%r10),%xmm5,%xmm5 + vpinsrd $1,24(%r11),%xmm0,%xmm0 + vpunpckldq %xmm0,%xmm5,%xmm5 + vpshufb %xmm6,%xmm5,%xmm5 + vpsrld $6,%xmm14,%xmm7 + vpslld $26,%xmm14,%xmm2 + vmovdqu %xmm5,96-128(%rax) + vpaddd %xmm9,%xmm5,%xmm5 + + vpsrld $11,%xmm14,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $21,%xmm14,%xmm2 + vpaddd 64(%rbp),%xmm5,%xmm5 + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $25,%xmm14,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $7,%xmm14,%xmm2 + vpandn %xmm8,%xmm14,%xmm0 + vpand %xmm15,%xmm14,%xmm3 + + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $2,%xmm10,%xmm9 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $30,%xmm10,%xmm1 + vpxor %xmm3,%xmm0,%xmm0 + vpxor %xmm10,%xmm11,%xmm3 + + vpxor %xmm1,%xmm9,%xmm9 + vpaddd %xmm7,%xmm5,%xmm5 + + vpsrld $13,%xmm10,%xmm1 + + vpslld $19,%xmm10,%xmm2 + vpaddd %xmm0,%xmm5,%xmm5 + vpand %xmm3,%xmm4,%xmm4 + + vpxor %xmm1,%xmm9,%xmm7 + + vpsrld $22,%xmm10,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $10,%xmm10,%xmm2 + vpxor %xmm4,%xmm11,%xmm9 + vpaddd %xmm5,%xmm13,%xmm13 + + vpxor %xmm1,%xmm7,%xmm7 + vpxor %xmm2,%xmm7,%xmm7 + + vpaddd %xmm5,%xmm9,%xmm9 + vpaddd %xmm7,%xmm9,%xmm9 + vmovd 28(%r8),%xmm5 + vmovd 28(%r9),%xmm0 + vpinsrd $1,28(%r10),%xmm5,%xmm5 + vpinsrd $1,28(%r11),%xmm0,%xmm0 + vpunpckldq %xmm0,%xmm5,%xmm5 + vpshufb %xmm6,%xmm5,%xmm5 + vpsrld $6,%xmm13,%xmm7 + vpslld $26,%xmm13,%xmm2 + vmovdqu %xmm5,112-128(%rax) + vpaddd %xmm8,%xmm5,%xmm5 + + vpsrld $11,%xmm13,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $21,%xmm13,%xmm2 + vpaddd 96(%rbp),%xmm5,%xmm5 + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $25,%xmm13,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $7,%xmm13,%xmm2 + vpandn %xmm15,%xmm13,%xmm0 + vpand %xmm14,%xmm13,%xmm4 + + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $2,%xmm9,%xmm8 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $30,%xmm9,%xmm1 + vpxor %xmm4,%xmm0,%xmm0 + vpxor %xmm9,%xmm10,%xmm4 + + vpxor %xmm1,%xmm8,%xmm8 + vpaddd %xmm7,%xmm5,%xmm5 + + vpsrld $13,%xmm9,%xmm1 + + vpslld $19,%xmm9,%xmm2 + vpaddd %xmm0,%xmm5,%xmm5 + vpand %xmm4,%xmm3,%xmm3 + + vpxor %xmm1,%xmm8,%xmm7 + + vpsrld $22,%xmm9,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $10,%xmm9,%xmm2 + vpxor %xmm3,%xmm10,%xmm8 + vpaddd %xmm5,%xmm12,%xmm12 + + vpxor %xmm1,%xmm7,%xmm7 + vpxor %xmm2,%xmm7,%xmm7 + + vpaddd %xmm5,%xmm8,%xmm8 + vpaddd %xmm7,%xmm8,%xmm8 + addq $256,%rbp + vmovd 32(%r8),%xmm5 + vmovd 32(%r9),%xmm0 + vpinsrd $1,32(%r10),%xmm5,%xmm5 + vpinsrd $1,32(%r11),%xmm0,%xmm0 + vpunpckldq %xmm0,%xmm5,%xmm5 + vpshufb %xmm6,%xmm5,%xmm5 + vpsrld $6,%xmm12,%xmm7 + vpslld $26,%xmm12,%xmm2 + vmovdqu %xmm5,128-128(%rax) + vpaddd %xmm15,%xmm5,%xmm5 + + vpsrld $11,%xmm12,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $21,%xmm12,%xmm2 + vpaddd -128(%rbp),%xmm5,%xmm5 + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $25,%xmm12,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $7,%xmm12,%xmm2 + vpandn %xmm14,%xmm12,%xmm0 + vpand %xmm13,%xmm12,%xmm3 + + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $2,%xmm8,%xmm15 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $30,%xmm8,%xmm1 + vpxor %xmm3,%xmm0,%xmm0 + vpxor %xmm8,%xmm9,%xmm3 + + vpxor %xmm1,%xmm15,%xmm15 + vpaddd %xmm7,%xmm5,%xmm5 + + vpsrld $13,%xmm8,%xmm1 + + vpslld $19,%xmm8,%xmm2 + vpaddd %xmm0,%xmm5,%xmm5 + vpand %xmm3,%xmm4,%xmm4 + + vpxor %xmm1,%xmm15,%xmm7 + + vpsrld $22,%xmm8,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $10,%xmm8,%xmm2 + vpxor %xmm4,%xmm9,%xmm15 + vpaddd %xmm5,%xmm11,%xmm11 + + vpxor %xmm1,%xmm7,%xmm7 + vpxor %xmm2,%xmm7,%xmm7 + + vpaddd %xmm5,%xmm15,%xmm15 + vpaddd %xmm7,%xmm15,%xmm15 + vmovd 36(%r8),%xmm5 + vmovd 36(%r9),%xmm0 + vpinsrd $1,36(%r10),%xmm5,%xmm5 + vpinsrd $1,36(%r11),%xmm0,%xmm0 + vpunpckldq %xmm0,%xmm5,%xmm5 + vpshufb %xmm6,%xmm5,%xmm5 + vpsrld $6,%xmm11,%xmm7 + vpslld $26,%xmm11,%xmm2 + vmovdqu %xmm5,144-128(%rax) + vpaddd %xmm14,%xmm5,%xmm5 + + vpsrld $11,%xmm11,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $21,%xmm11,%xmm2 + vpaddd -96(%rbp),%xmm5,%xmm5 + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $25,%xmm11,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $7,%xmm11,%xmm2 + vpandn %xmm13,%xmm11,%xmm0 + vpand %xmm12,%xmm11,%xmm4 + + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $2,%xmm15,%xmm14 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $30,%xmm15,%xmm1 + vpxor %xmm4,%xmm0,%xmm0 + vpxor %xmm15,%xmm8,%xmm4 + + vpxor %xmm1,%xmm14,%xmm14 + vpaddd %xmm7,%xmm5,%xmm5 + + vpsrld $13,%xmm15,%xmm1 + + vpslld $19,%xmm15,%xmm2 + vpaddd %xmm0,%xmm5,%xmm5 + vpand %xmm4,%xmm3,%xmm3 + + vpxor %xmm1,%xmm14,%xmm7 + + vpsrld $22,%xmm15,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $10,%xmm15,%xmm2 + vpxor %xmm3,%xmm8,%xmm14 + vpaddd %xmm5,%xmm10,%xmm10 + + vpxor %xmm1,%xmm7,%xmm7 + vpxor %xmm2,%xmm7,%xmm7 + + vpaddd %xmm5,%xmm14,%xmm14 + vpaddd %xmm7,%xmm14,%xmm14 + vmovd 40(%r8),%xmm5 + vmovd 40(%r9),%xmm0 + vpinsrd $1,40(%r10),%xmm5,%xmm5 + vpinsrd $1,40(%r11),%xmm0,%xmm0 + vpunpckldq %xmm0,%xmm5,%xmm5 + vpshufb %xmm6,%xmm5,%xmm5 + vpsrld $6,%xmm10,%xmm7 + vpslld $26,%xmm10,%xmm2 + vmovdqu %xmm5,160-128(%rax) + vpaddd %xmm13,%xmm5,%xmm5 + + vpsrld $11,%xmm10,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $21,%xmm10,%xmm2 + vpaddd -64(%rbp),%xmm5,%xmm5 + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $25,%xmm10,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $7,%xmm10,%xmm2 + vpandn %xmm12,%xmm10,%xmm0 + vpand %xmm11,%xmm10,%xmm3 + + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $2,%xmm14,%xmm13 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $30,%xmm14,%xmm1 + vpxor %xmm3,%xmm0,%xmm0 + vpxor %xmm14,%xmm15,%xmm3 + + vpxor %xmm1,%xmm13,%xmm13 + vpaddd %xmm7,%xmm5,%xmm5 + + vpsrld $13,%xmm14,%xmm1 + + vpslld $19,%xmm14,%xmm2 + vpaddd %xmm0,%xmm5,%xmm5 + vpand %xmm3,%xmm4,%xmm4 + + vpxor %xmm1,%xmm13,%xmm7 + + vpsrld $22,%xmm14,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $10,%xmm14,%xmm2 + vpxor %xmm4,%xmm15,%xmm13 + vpaddd %xmm5,%xmm9,%xmm9 + + vpxor %xmm1,%xmm7,%xmm7 + vpxor %xmm2,%xmm7,%xmm7 + + vpaddd %xmm5,%xmm13,%xmm13 + vpaddd %xmm7,%xmm13,%xmm13 + vmovd 44(%r8),%xmm5 + vmovd 44(%r9),%xmm0 + vpinsrd $1,44(%r10),%xmm5,%xmm5 + vpinsrd $1,44(%r11),%xmm0,%xmm0 + vpunpckldq %xmm0,%xmm5,%xmm5 + vpshufb %xmm6,%xmm5,%xmm5 + vpsrld $6,%xmm9,%xmm7 + vpslld $26,%xmm9,%xmm2 + vmovdqu %xmm5,176-128(%rax) + vpaddd %xmm12,%xmm5,%xmm5 + + vpsrld $11,%xmm9,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $21,%xmm9,%xmm2 + vpaddd -32(%rbp),%xmm5,%xmm5 + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $25,%xmm9,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $7,%xmm9,%xmm2 + vpandn %xmm11,%xmm9,%xmm0 + vpand %xmm10,%xmm9,%xmm4 + + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $2,%xmm13,%xmm12 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $30,%xmm13,%xmm1 + vpxor %xmm4,%xmm0,%xmm0 + vpxor %xmm13,%xmm14,%xmm4 + + vpxor %xmm1,%xmm12,%xmm12 + vpaddd %xmm7,%xmm5,%xmm5 + + vpsrld $13,%xmm13,%xmm1 + + vpslld $19,%xmm13,%xmm2 + vpaddd %xmm0,%xmm5,%xmm5 + vpand %xmm4,%xmm3,%xmm3 + + vpxor %xmm1,%xmm12,%xmm7 + + vpsrld $22,%xmm13,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $10,%xmm13,%xmm2 + vpxor %xmm3,%xmm14,%xmm12 + vpaddd %xmm5,%xmm8,%xmm8 + + vpxor %xmm1,%xmm7,%xmm7 + vpxor %xmm2,%xmm7,%xmm7 + + vpaddd %xmm5,%xmm12,%xmm12 + vpaddd %xmm7,%xmm12,%xmm12 + vmovd 48(%r8),%xmm5 + vmovd 48(%r9),%xmm0 + vpinsrd $1,48(%r10),%xmm5,%xmm5 + vpinsrd $1,48(%r11),%xmm0,%xmm0 + vpunpckldq %xmm0,%xmm5,%xmm5 + vpshufb %xmm6,%xmm5,%xmm5 + vpsrld $6,%xmm8,%xmm7 + vpslld $26,%xmm8,%xmm2 + vmovdqu %xmm5,192-128(%rax) + vpaddd %xmm11,%xmm5,%xmm5 + + vpsrld $11,%xmm8,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $21,%xmm8,%xmm2 + vpaddd 0(%rbp),%xmm5,%xmm5 + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $25,%xmm8,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $7,%xmm8,%xmm2 + vpandn %xmm10,%xmm8,%xmm0 + vpand %xmm9,%xmm8,%xmm3 + + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $2,%xmm12,%xmm11 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $30,%xmm12,%xmm1 + vpxor %xmm3,%xmm0,%xmm0 + vpxor %xmm12,%xmm13,%xmm3 + + vpxor %xmm1,%xmm11,%xmm11 + vpaddd %xmm7,%xmm5,%xmm5 + + vpsrld $13,%xmm12,%xmm1 + + vpslld $19,%xmm12,%xmm2 + vpaddd %xmm0,%xmm5,%xmm5 + vpand %xmm3,%xmm4,%xmm4 + + vpxor %xmm1,%xmm11,%xmm7 + + vpsrld $22,%xmm12,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $10,%xmm12,%xmm2 + vpxor %xmm4,%xmm13,%xmm11 + vpaddd %xmm5,%xmm15,%xmm15 + + vpxor %xmm1,%xmm7,%xmm7 + vpxor %xmm2,%xmm7,%xmm7 + + vpaddd %xmm5,%xmm11,%xmm11 + vpaddd %xmm7,%xmm11,%xmm11 + vmovd 52(%r8),%xmm5 + vmovd 52(%r9),%xmm0 + vpinsrd $1,52(%r10),%xmm5,%xmm5 + vpinsrd $1,52(%r11),%xmm0,%xmm0 + vpunpckldq %xmm0,%xmm5,%xmm5 + vpshufb %xmm6,%xmm5,%xmm5 + vpsrld $6,%xmm15,%xmm7 + vpslld $26,%xmm15,%xmm2 + vmovdqu %xmm5,208-128(%rax) + vpaddd %xmm10,%xmm5,%xmm5 + + vpsrld $11,%xmm15,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $21,%xmm15,%xmm2 + vpaddd 32(%rbp),%xmm5,%xmm5 + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $25,%xmm15,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $7,%xmm15,%xmm2 + vpandn %xmm9,%xmm15,%xmm0 + vpand %xmm8,%xmm15,%xmm4 + + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $2,%xmm11,%xmm10 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $30,%xmm11,%xmm1 + vpxor %xmm4,%xmm0,%xmm0 + vpxor %xmm11,%xmm12,%xmm4 + + vpxor %xmm1,%xmm10,%xmm10 + vpaddd %xmm7,%xmm5,%xmm5 + + vpsrld $13,%xmm11,%xmm1 + + vpslld $19,%xmm11,%xmm2 + vpaddd %xmm0,%xmm5,%xmm5 + vpand %xmm4,%xmm3,%xmm3 + + vpxor %xmm1,%xmm10,%xmm7 + + vpsrld $22,%xmm11,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $10,%xmm11,%xmm2 + vpxor %xmm3,%xmm12,%xmm10 + vpaddd %xmm5,%xmm14,%xmm14 + + vpxor %xmm1,%xmm7,%xmm7 + vpxor %xmm2,%xmm7,%xmm7 + + vpaddd %xmm5,%xmm10,%xmm10 + vpaddd %xmm7,%xmm10,%xmm10 + vmovd 56(%r8),%xmm5 + vmovd 56(%r9),%xmm0 + vpinsrd $1,56(%r10),%xmm5,%xmm5 + vpinsrd $1,56(%r11),%xmm0,%xmm0 + vpunpckldq %xmm0,%xmm5,%xmm5 + vpshufb %xmm6,%xmm5,%xmm5 + vpsrld $6,%xmm14,%xmm7 + vpslld $26,%xmm14,%xmm2 + vmovdqu %xmm5,224-128(%rax) + vpaddd %xmm9,%xmm5,%xmm5 + + vpsrld $11,%xmm14,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $21,%xmm14,%xmm2 + vpaddd 64(%rbp),%xmm5,%xmm5 + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $25,%xmm14,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $7,%xmm14,%xmm2 + vpandn %xmm8,%xmm14,%xmm0 + vpand %xmm15,%xmm14,%xmm3 + + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $2,%xmm10,%xmm9 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $30,%xmm10,%xmm1 + vpxor %xmm3,%xmm0,%xmm0 + vpxor %xmm10,%xmm11,%xmm3 + + vpxor %xmm1,%xmm9,%xmm9 + vpaddd %xmm7,%xmm5,%xmm5 + + vpsrld $13,%xmm10,%xmm1 + + vpslld $19,%xmm10,%xmm2 + vpaddd %xmm0,%xmm5,%xmm5 + vpand %xmm3,%xmm4,%xmm4 + + vpxor %xmm1,%xmm9,%xmm7 + + vpsrld $22,%xmm10,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $10,%xmm10,%xmm2 + vpxor %xmm4,%xmm11,%xmm9 + vpaddd %xmm5,%xmm13,%xmm13 + + vpxor %xmm1,%xmm7,%xmm7 + vpxor %xmm2,%xmm7,%xmm7 + + vpaddd %xmm5,%xmm9,%xmm9 + vpaddd %xmm7,%xmm9,%xmm9 + vmovd 60(%r8),%xmm5 + leaq 64(%r8),%r8 + vmovd 60(%r9),%xmm0 + leaq 64(%r9),%r9 + vpinsrd $1,60(%r10),%xmm5,%xmm5 + leaq 64(%r10),%r10 + vpinsrd $1,60(%r11),%xmm0,%xmm0 + leaq 64(%r11),%r11 + vpunpckldq %xmm0,%xmm5,%xmm5 + vpshufb %xmm6,%xmm5,%xmm5 + vpsrld $6,%xmm13,%xmm7 + vpslld $26,%xmm13,%xmm2 + vmovdqu %xmm5,240-128(%rax) + vpaddd %xmm8,%xmm5,%xmm5 + + vpsrld $11,%xmm13,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $21,%xmm13,%xmm2 + vpaddd 96(%rbp),%xmm5,%xmm5 + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $25,%xmm13,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + prefetcht0 63(%r8) + vpslld $7,%xmm13,%xmm2 + vpandn %xmm15,%xmm13,%xmm0 + vpand %xmm14,%xmm13,%xmm4 + prefetcht0 63(%r9) + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $2,%xmm9,%xmm8 + vpxor %xmm2,%xmm7,%xmm7 + prefetcht0 63(%r10) + vpslld $30,%xmm9,%xmm1 + vpxor %xmm4,%xmm0,%xmm0 + vpxor %xmm9,%xmm10,%xmm4 + prefetcht0 63(%r11) + vpxor %xmm1,%xmm8,%xmm8 + vpaddd %xmm7,%xmm5,%xmm5 + + vpsrld $13,%xmm9,%xmm1 + + vpslld $19,%xmm9,%xmm2 + vpaddd %xmm0,%xmm5,%xmm5 + vpand %xmm4,%xmm3,%xmm3 + + vpxor %xmm1,%xmm8,%xmm7 + + vpsrld $22,%xmm9,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $10,%xmm9,%xmm2 + vpxor %xmm3,%xmm10,%xmm8 + vpaddd %xmm5,%xmm12,%xmm12 + + vpxor %xmm1,%xmm7,%xmm7 + vpxor %xmm2,%xmm7,%xmm7 + + vpaddd %xmm5,%xmm8,%xmm8 + vpaddd %xmm7,%xmm8,%xmm8 + addq $256,%rbp + vmovdqu 0-128(%rax),%xmm5 + movl $3,%ecx + jmp .Loop_16_xx_avx +.align 32 +.Loop_16_xx_avx: + vmovdqu 16-128(%rax),%xmm6 + vpaddd 144-128(%rax),%xmm5,%xmm5 + + vpsrld $3,%xmm6,%xmm7 + vpsrld $7,%xmm6,%xmm1 + vpslld $25,%xmm6,%xmm2 + vpxor %xmm1,%xmm7,%xmm7 + vpsrld $18,%xmm6,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $14,%xmm6,%xmm2 + vmovdqu 224-128(%rax),%xmm0 + vpsrld $10,%xmm0,%xmm3 + + vpxor %xmm1,%xmm7,%xmm7 + vpsrld $17,%xmm0,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $15,%xmm0,%xmm2 + vpaddd %xmm7,%xmm5,%xmm5 + vpxor %xmm1,%xmm3,%xmm7 + vpsrld $19,%xmm0,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $13,%xmm0,%xmm2 + vpxor %xmm1,%xmm7,%xmm7 + vpxor %xmm2,%xmm7,%xmm7 + vpaddd %xmm7,%xmm5,%xmm5 + vpsrld $6,%xmm12,%xmm7 + vpslld $26,%xmm12,%xmm2 + vmovdqu %xmm5,0-128(%rax) + vpaddd %xmm15,%xmm5,%xmm5 + + vpsrld $11,%xmm12,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $21,%xmm12,%xmm2 + vpaddd -128(%rbp),%xmm5,%xmm5 + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $25,%xmm12,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $7,%xmm12,%xmm2 + vpandn %xmm14,%xmm12,%xmm0 + vpand %xmm13,%xmm12,%xmm3 + + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $2,%xmm8,%xmm15 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $30,%xmm8,%xmm1 + vpxor %xmm3,%xmm0,%xmm0 + vpxor %xmm8,%xmm9,%xmm3 + + vpxor %xmm1,%xmm15,%xmm15 + vpaddd %xmm7,%xmm5,%xmm5 + + vpsrld $13,%xmm8,%xmm1 + + vpslld $19,%xmm8,%xmm2 + vpaddd %xmm0,%xmm5,%xmm5 + vpand %xmm3,%xmm4,%xmm4 + + vpxor %xmm1,%xmm15,%xmm7 + + vpsrld $22,%xmm8,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $10,%xmm8,%xmm2 + vpxor %xmm4,%xmm9,%xmm15 + vpaddd %xmm5,%xmm11,%xmm11 + + vpxor %xmm1,%xmm7,%xmm7 + vpxor %xmm2,%xmm7,%xmm7 + + vpaddd %xmm5,%xmm15,%xmm15 + vpaddd %xmm7,%xmm15,%xmm15 + vmovdqu 32-128(%rax),%xmm5 + vpaddd 160-128(%rax),%xmm6,%xmm6 + + vpsrld $3,%xmm5,%xmm7 + vpsrld $7,%xmm5,%xmm1 + vpslld $25,%xmm5,%xmm2 + vpxor %xmm1,%xmm7,%xmm7 + vpsrld $18,%xmm5,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $14,%xmm5,%xmm2 + vmovdqu 240-128(%rax),%xmm0 + vpsrld $10,%xmm0,%xmm4 + + vpxor %xmm1,%xmm7,%xmm7 + vpsrld $17,%xmm0,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $15,%xmm0,%xmm2 + vpaddd %xmm7,%xmm6,%xmm6 + vpxor %xmm1,%xmm4,%xmm7 + vpsrld $19,%xmm0,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $13,%xmm0,%xmm2 + vpxor %xmm1,%xmm7,%xmm7 + vpxor %xmm2,%xmm7,%xmm7 + vpaddd %xmm7,%xmm6,%xmm6 + vpsrld $6,%xmm11,%xmm7 + vpslld $26,%xmm11,%xmm2 + vmovdqu %xmm6,16-128(%rax) + vpaddd %xmm14,%xmm6,%xmm6 + + vpsrld $11,%xmm11,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $21,%xmm11,%xmm2 + vpaddd -96(%rbp),%xmm6,%xmm6 + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $25,%xmm11,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $7,%xmm11,%xmm2 + vpandn %xmm13,%xmm11,%xmm0 + vpand %xmm12,%xmm11,%xmm4 + + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $2,%xmm15,%xmm14 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $30,%xmm15,%xmm1 + vpxor %xmm4,%xmm0,%xmm0 + vpxor %xmm15,%xmm8,%xmm4 + + vpxor %xmm1,%xmm14,%xmm14 + vpaddd %xmm7,%xmm6,%xmm6 + + vpsrld $13,%xmm15,%xmm1 + + vpslld $19,%xmm15,%xmm2 + vpaddd %xmm0,%xmm6,%xmm6 + vpand %xmm4,%xmm3,%xmm3 + + vpxor %xmm1,%xmm14,%xmm7 + + vpsrld $22,%xmm15,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $10,%xmm15,%xmm2 + vpxor %xmm3,%xmm8,%xmm14 + vpaddd %xmm6,%xmm10,%xmm10 + + vpxor %xmm1,%xmm7,%xmm7 + vpxor %xmm2,%xmm7,%xmm7 + + vpaddd %xmm6,%xmm14,%xmm14 + vpaddd %xmm7,%xmm14,%xmm14 + vmovdqu 48-128(%rax),%xmm6 + vpaddd 176-128(%rax),%xmm5,%xmm5 + + vpsrld $3,%xmm6,%xmm7 + vpsrld $7,%xmm6,%xmm1 + vpslld $25,%xmm6,%xmm2 + vpxor %xmm1,%xmm7,%xmm7 + vpsrld $18,%xmm6,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $14,%xmm6,%xmm2 + vmovdqu 0-128(%rax),%xmm0 + vpsrld $10,%xmm0,%xmm3 + + vpxor %xmm1,%xmm7,%xmm7 + vpsrld $17,%xmm0,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $15,%xmm0,%xmm2 + vpaddd %xmm7,%xmm5,%xmm5 + vpxor %xmm1,%xmm3,%xmm7 + vpsrld $19,%xmm0,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $13,%xmm0,%xmm2 + vpxor %xmm1,%xmm7,%xmm7 + vpxor %xmm2,%xmm7,%xmm7 + vpaddd %xmm7,%xmm5,%xmm5 + vpsrld $6,%xmm10,%xmm7 + vpslld $26,%xmm10,%xmm2 + vmovdqu %xmm5,32-128(%rax) + vpaddd %xmm13,%xmm5,%xmm5 + + vpsrld $11,%xmm10,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $21,%xmm10,%xmm2 + vpaddd -64(%rbp),%xmm5,%xmm5 + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $25,%xmm10,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $7,%xmm10,%xmm2 + vpandn %xmm12,%xmm10,%xmm0 + vpand %xmm11,%xmm10,%xmm3 + + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $2,%xmm14,%xmm13 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $30,%xmm14,%xmm1 + vpxor %xmm3,%xmm0,%xmm0 + vpxor %xmm14,%xmm15,%xmm3 + + vpxor %xmm1,%xmm13,%xmm13 + vpaddd %xmm7,%xmm5,%xmm5 + + vpsrld $13,%xmm14,%xmm1 + + vpslld $19,%xmm14,%xmm2 + vpaddd %xmm0,%xmm5,%xmm5 + vpand %xmm3,%xmm4,%xmm4 + + vpxor %xmm1,%xmm13,%xmm7 + + vpsrld $22,%xmm14,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $10,%xmm14,%xmm2 + vpxor %xmm4,%xmm15,%xmm13 + vpaddd %xmm5,%xmm9,%xmm9 + + vpxor %xmm1,%xmm7,%xmm7 + vpxor %xmm2,%xmm7,%xmm7 + + vpaddd %xmm5,%xmm13,%xmm13 + vpaddd %xmm7,%xmm13,%xmm13 + vmovdqu 64-128(%rax),%xmm5 + vpaddd 192-128(%rax),%xmm6,%xmm6 + + vpsrld $3,%xmm5,%xmm7 + vpsrld $7,%xmm5,%xmm1 + vpslld $25,%xmm5,%xmm2 + vpxor %xmm1,%xmm7,%xmm7 + vpsrld $18,%xmm5,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $14,%xmm5,%xmm2 + vmovdqu 16-128(%rax),%xmm0 + vpsrld $10,%xmm0,%xmm4 + + vpxor %xmm1,%xmm7,%xmm7 + vpsrld $17,%xmm0,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $15,%xmm0,%xmm2 + vpaddd %xmm7,%xmm6,%xmm6 + vpxor %xmm1,%xmm4,%xmm7 + vpsrld $19,%xmm0,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $13,%xmm0,%xmm2 + vpxor %xmm1,%xmm7,%xmm7 + vpxor %xmm2,%xmm7,%xmm7 + vpaddd %xmm7,%xmm6,%xmm6 + vpsrld $6,%xmm9,%xmm7 + vpslld $26,%xmm9,%xmm2 + vmovdqu %xmm6,48-128(%rax) + vpaddd %xmm12,%xmm6,%xmm6 + + vpsrld $11,%xmm9,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $21,%xmm9,%xmm2 + vpaddd -32(%rbp),%xmm6,%xmm6 + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $25,%xmm9,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $7,%xmm9,%xmm2 + vpandn %xmm11,%xmm9,%xmm0 + vpand %xmm10,%xmm9,%xmm4 + + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $2,%xmm13,%xmm12 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $30,%xmm13,%xmm1 + vpxor %xmm4,%xmm0,%xmm0 + vpxor %xmm13,%xmm14,%xmm4 + + vpxor %xmm1,%xmm12,%xmm12 + vpaddd %xmm7,%xmm6,%xmm6 + + vpsrld $13,%xmm13,%xmm1 + + vpslld $19,%xmm13,%xmm2 + vpaddd %xmm0,%xmm6,%xmm6 + vpand %xmm4,%xmm3,%xmm3 + + vpxor %xmm1,%xmm12,%xmm7 + + vpsrld $22,%xmm13,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $10,%xmm13,%xmm2 + vpxor %xmm3,%xmm14,%xmm12 + vpaddd %xmm6,%xmm8,%xmm8 + + vpxor %xmm1,%xmm7,%xmm7 + vpxor %xmm2,%xmm7,%xmm7 + + vpaddd %xmm6,%xmm12,%xmm12 + vpaddd %xmm7,%xmm12,%xmm12 + vmovdqu 80-128(%rax),%xmm6 + vpaddd 208-128(%rax),%xmm5,%xmm5 + + vpsrld $3,%xmm6,%xmm7 + vpsrld $7,%xmm6,%xmm1 + vpslld $25,%xmm6,%xmm2 + vpxor %xmm1,%xmm7,%xmm7 + vpsrld $18,%xmm6,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $14,%xmm6,%xmm2 + vmovdqu 32-128(%rax),%xmm0 + vpsrld $10,%xmm0,%xmm3 + + vpxor %xmm1,%xmm7,%xmm7 + vpsrld $17,%xmm0,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $15,%xmm0,%xmm2 + vpaddd %xmm7,%xmm5,%xmm5 + vpxor %xmm1,%xmm3,%xmm7 + vpsrld $19,%xmm0,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $13,%xmm0,%xmm2 + vpxor %xmm1,%xmm7,%xmm7 + vpxor %xmm2,%xmm7,%xmm7 + vpaddd %xmm7,%xmm5,%xmm5 + vpsrld $6,%xmm8,%xmm7 + vpslld $26,%xmm8,%xmm2 + vmovdqu %xmm5,64-128(%rax) + vpaddd %xmm11,%xmm5,%xmm5 + + vpsrld $11,%xmm8,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $21,%xmm8,%xmm2 + vpaddd 0(%rbp),%xmm5,%xmm5 + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $25,%xmm8,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $7,%xmm8,%xmm2 + vpandn %xmm10,%xmm8,%xmm0 + vpand %xmm9,%xmm8,%xmm3 + + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $2,%xmm12,%xmm11 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $30,%xmm12,%xmm1 + vpxor %xmm3,%xmm0,%xmm0 + vpxor %xmm12,%xmm13,%xmm3 + + vpxor %xmm1,%xmm11,%xmm11 + vpaddd %xmm7,%xmm5,%xmm5 + + vpsrld $13,%xmm12,%xmm1 + + vpslld $19,%xmm12,%xmm2 + vpaddd %xmm0,%xmm5,%xmm5 + vpand %xmm3,%xmm4,%xmm4 + + vpxor %xmm1,%xmm11,%xmm7 + + vpsrld $22,%xmm12,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $10,%xmm12,%xmm2 + vpxor %xmm4,%xmm13,%xmm11 + vpaddd %xmm5,%xmm15,%xmm15 + + vpxor %xmm1,%xmm7,%xmm7 + vpxor %xmm2,%xmm7,%xmm7 + + vpaddd %xmm5,%xmm11,%xmm11 + vpaddd %xmm7,%xmm11,%xmm11 + vmovdqu 96-128(%rax),%xmm5 + vpaddd 224-128(%rax),%xmm6,%xmm6 + + vpsrld $3,%xmm5,%xmm7 + vpsrld $7,%xmm5,%xmm1 + vpslld $25,%xmm5,%xmm2 + vpxor %xmm1,%xmm7,%xmm7 + vpsrld $18,%xmm5,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $14,%xmm5,%xmm2 + vmovdqu 48-128(%rax),%xmm0 + vpsrld $10,%xmm0,%xmm4 + + vpxor %xmm1,%xmm7,%xmm7 + vpsrld $17,%xmm0,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $15,%xmm0,%xmm2 + vpaddd %xmm7,%xmm6,%xmm6 + vpxor %xmm1,%xmm4,%xmm7 + vpsrld $19,%xmm0,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $13,%xmm0,%xmm2 + vpxor %xmm1,%xmm7,%xmm7 + vpxor %xmm2,%xmm7,%xmm7 + vpaddd %xmm7,%xmm6,%xmm6 + vpsrld $6,%xmm15,%xmm7 + vpslld $26,%xmm15,%xmm2 + vmovdqu %xmm6,80-128(%rax) + vpaddd %xmm10,%xmm6,%xmm6 + + vpsrld $11,%xmm15,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $21,%xmm15,%xmm2 + vpaddd 32(%rbp),%xmm6,%xmm6 + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $25,%xmm15,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $7,%xmm15,%xmm2 + vpandn %xmm9,%xmm15,%xmm0 + vpand %xmm8,%xmm15,%xmm4 + + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $2,%xmm11,%xmm10 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $30,%xmm11,%xmm1 + vpxor %xmm4,%xmm0,%xmm0 + vpxor %xmm11,%xmm12,%xmm4 + + vpxor %xmm1,%xmm10,%xmm10 + vpaddd %xmm7,%xmm6,%xmm6 + + vpsrld $13,%xmm11,%xmm1 + + vpslld $19,%xmm11,%xmm2 + vpaddd %xmm0,%xmm6,%xmm6 + vpand %xmm4,%xmm3,%xmm3 + + vpxor %xmm1,%xmm10,%xmm7 + + vpsrld $22,%xmm11,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $10,%xmm11,%xmm2 + vpxor %xmm3,%xmm12,%xmm10 + vpaddd %xmm6,%xmm14,%xmm14 + + vpxor %xmm1,%xmm7,%xmm7 + vpxor %xmm2,%xmm7,%xmm7 + + vpaddd %xmm6,%xmm10,%xmm10 + vpaddd %xmm7,%xmm10,%xmm10 + vmovdqu 112-128(%rax),%xmm6 + vpaddd 240-128(%rax),%xmm5,%xmm5 + + vpsrld $3,%xmm6,%xmm7 + vpsrld $7,%xmm6,%xmm1 + vpslld $25,%xmm6,%xmm2 + vpxor %xmm1,%xmm7,%xmm7 + vpsrld $18,%xmm6,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $14,%xmm6,%xmm2 + vmovdqu 64-128(%rax),%xmm0 + vpsrld $10,%xmm0,%xmm3 + + vpxor %xmm1,%xmm7,%xmm7 + vpsrld $17,%xmm0,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $15,%xmm0,%xmm2 + vpaddd %xmm7,%xmm5,%xmm5 + vpxor %xmm1,%xmm3,%xmm7 + vpsrld $19,%xmm0,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $13,%xmm0,%xmm2 + vpxor %xmm1,%xmm7,%xmm7 + vpxor %xmm2,%xmm7,%xmm7 + vpaddd %xmm7,%xmm5,%xmm5 + vpsrld $6,%xmm14,%xmm7 + vpslld $26,%xmm14,%xmm2 + vmovdqu %xmm5,96-128(%rax) + vpaddd %xmm9,%xmm5,%xmm5 + + vpsrld $11,%xmm14,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $21,%xmm14,%xmm2 + vpaddd 64(%rbp),%xmm5,%xmm5 + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $25,%xmm14,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $7,%xmm14,%xmm2 + vpandn %xmm8,%xmm14,%xmm0 + vpand %xmm15,%xmm14,%xmm3 + + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $2,%xmm10,%xmm9 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $30,%xmm10,%xmm1 + vpxor %xmm3,%xmm0,%xmm0 + vpxor %xmm10,%xmm11,%xmm3 + + vpxor %xmm1,%xmm9,%xmm9 + vpaddd %xmm7,%xmm5,%xmm5 + + vpsrld $13,%xmm10,%xmm1 + + vpslld $19,%xmm10,%xmm2 + vpaddd %xmm0,%xmm5,%xmm5 + vpand %xmm3,%xmm4,%xmm4 + + vpxor %xmm1,%xmm9,%xmm7 + + vpsrld $22,%xmm10,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $10,%xmm10,%xmm2 + vpxor %xmm4,%xmm11,%xmm9 + vpaddd %xmm5,%xmm13,%xmm13 + + vpxor %xmm1,%xmm7,%xmm7 + vpxor %xmm2,%xmm7,%xmm7 + + vpaddd %xmm5,%xmm9,%xmm9 + vpaddd %xmm7,%xmm9,%xmm9 + vmovdqu 128-128(%rax),%xmm5 + vpaddd 0-128(%rax),%xmm6,%xmm6 + + vpsrld $3,%xmm5,%xmm7 + vpsrld $7,%xmm5,%xmm1 + vpslld $25,%xmm5,%xmm2 + vpxor %xmm1,%xmm7,%xmm7 + vpsrld $18,%xmm5,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $14,%xmm5,%xmm2 + vmovdqu 80-128(%rax),%xmm0 + vpsrld $10,%xmm0,%xmm4 + + vpxor %xmm1,%xmm7,%xmm7 + vpsrld $17,%xmm0,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $15,%xmm0,%xmm2 + vpaddd %xmm7,%xmm6,%xmm6 + vpxor %xmm1,%xmm4,%xmm7 + vpsrld $19,%xmm0,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $13,%xmm0,%xmm2 + vpxor %xmm1,%xmm7,%xmm7 + vpxor %xmm2,%xmm7,%xmm7 + vpaddd %xmm7,%xmm6,%xmm6 + vpsrld $6,%xmm13,%xmm7 + vpslld $26,%xmm13,%xmm2 + vmovdqu %xmm6,112-128(%rax) + vpaddd %xmm8,%xmm6,%xmm6 + + vpsrld $11,%xmm13,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $21,%xmm13,%xmm2 + vpaddd 96(%rbp),%xmm6,%xmm6 + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $25,%xmm13,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $7,%xmm13,%xmm2 + vpandn %xmm15,%xmm13,%xmm0 + vpand %xmm14,%xmm13,%xmm4 + + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $2,%xmm9,%xmm8 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $30,%xmm9,%xmm1 + vpxor %xmm4,%xmm0,%xmm0 + vpxor %xmm9,%xmm10,%xmm4 + + vpxor %xmm1,%xmm8,%xmm8 + vpaddd %xmm7,%xmm6,%xmm6 + + vpsrld $13,%xmm9,%xmm1 + + vpslld $19,%xmm9,%xmm2 + vpaddd %xmm0,%xmm6,%xmm6 + vpand %xmm4,%xmm3,%xmm3 + + vpxor %xmm1,%xmm8,%xmm7 + + vpsrld $22,%xmm9,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $10,%xmm9,%xmm2 + vpxor %xmm3,%xmm10,%xmm8 + vpaddd %xmm6,%xmm12,%xmm12 + + vpxor %xmm1,%xmm7,%xmm7 + vpxor %xmm2,%xmm7,%xmm7 + + vpaddd %xmm6,%xmm8,%xmm8 + vpaddd %xmm7,%xmm8,%xmm8 + addq $256,%rbp + vmovdqu 144-128(%rax),%xmm6 + vpaddd 16-128(%rax),%xmm5,%xmm5 + + vpsrld $3,%xmm6,%xmm7 + vpsrld $7,%xmm6,%xmm1 + vpslld $25,%xmm6,%xmm2 + vpxor %xmm1,%xmm7,%xmm7 + vpsrld $18,%xmm6,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $14,%xmm6,%xmm2 + vmovdqu 96-128(%rax),%xmm0 + vpsrld $10,%xmm0,%xmm3 + + vpxor %xmm1,%xmm7,%xmm7 + vpsrld $17,%xmm0,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $15,%xmm0,%xmm2 + vpaddd %xmm7,%xmm5,%xmm5 + vpxor %xmm1,%xmm3,%xmm7 + vpsrld $19,%xmm0,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $13,%xmm0,%xmm2 + vpxor %xmm1,%xmm7,%xmm7 + vpxor %xmm2,%xmm7,%xmm7 + vpaddd %xmm7,%xmm5,%xmm5 + vpsrld $6,%xmm12,%xmm7 + vpslld $26,%xmm12,%xmm2 + vmovdqu %xmm5,128-128(%rax) + vpaddd %xmm15,%xmm5,%xmm5 + + vpsrld $11,%xmm12,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $21,%xmm12,%xmm2 + vpaddd -128(%rbp),%xmm5,%xmm5 + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $25,%xmm12,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $7,%xmm12,%xmm2 + vpandn %xmm14,%xmm12,%xmm0 + vpand %xmm13,%xmm12,%xmm3 + + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $2,%xmm8,%xmm15 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $30,%xmm8,%xmm1 + vpxor %xmm3,%xmm0,%xmm0 + vpxor %xmm8,%xmm9,%xmm3 + + vpxor %xmm1,%xmm15,%xmm15 + vpaddd %xmm7,%xmm5,%xmm5 + + vpsrld $13,%xmm8,%xmm1 + + vpslld $19,%xmm8,%xmm2 + vpaddd %xmm0,%xmm5,%xmm5 + vpand %xmm3,%xmm4,%xmm4 + + vpxor %xmm1,%xmm15,%xmm7 + + vpsrld $22,%xmm8,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $10,%xmm8,%xmm2 + vpxor %xmm4,%xmm9,%xmm15 + vpaddd %xmm5,%xmm11,%xmm11 + + vpxor %xmm1,%xmm7,%xmm7 + vpxor %xmm2,%xmm7,%xmm7 + + vpaddd %xmm5,%xmm15,%xmm15 + vpaddd %xmm7,%xmm15,%xmm15 + vmovdqu 160-128(%rax),%xmm5 + vpaddd 32-128(%rax),%xmm6,%xmm6 + + vpsrld $3,%xmm5,%xmm7 + vpsrld $7,%xmm5,%xmm1 + vpslld $25,%xmm5,%xmm2 + vpxor %xmm1,%xmm7,%xmm7 + vpsrld $18,%xmm5,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $14,%xmm5,%xmm2 + vmovdqu 112-128(%rax),%xmm0 + vpsrld $10,%xmm0,%xmm4 + + vpxor %xmm1,%xmm7,%xmm7 + vpsrld $17,%xmm0,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $15,%xmm0,%xmm2 + vpaddd %xmm7,%xmm6,%xmm6 + vpxor %xmm1,%xmm4,%xmm7 + vpsrld $19,%xmm0,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $13,%xmm0,%xmm2 + vpxor %xmm1,%xmm7,%xmm7 + vpxor %xmm2,%xmm7,%xmm7 + vpaddd %xmm7,%xmm6,%xmm6 + vpsrld $6,%xmm11,%xmm7 + vpslld $26,%xmm11,%xmm2 + vmovdqu %xmm6,144-128(%rax) + vpaddd %xmm14,%xmm6,%xmm6 + + vpsrld $11,%xmm11,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $21,%xmm11,%xmm2 + vpaddd -96(%rbp),%xmm6,%xmm6 + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $25,%xmm11,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $7,%xmm11,%xmm2 + vpandn %xmm13,%xmm11,%xmm0 + vpand %xmm12,%xmm11,%xmm4 + + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $2,%xmm15,%xmm14 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $30,%xmm15,%xmm1 + vpxor %xmm4,%xmm0,%xmm0 + vpxor %xmm15,%xmm8,%xmm4 + + vpxor %xmm1,%xmm14,%xmm14 + vpaddd %xmm7,%xmm6,%xmm6 + + vpsrld $13,%xmm15,%xmm1 + + vpslld $19,%xmm15,%xmm2 + vpaddd %xmm0,%xmm6,%xmm6 + vpand %xmm4,%xmm3,%xmm3 + + vpxor %xmm1,%xmm14,%xmm7 + + vpsrld $22,%xmm15,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $10,%xmm15,%xmm2 + vpxor %xmm3,%xmm8,%xmm14 + vpaddd %xmm6,%xmm10,%xmm10 + + vpxor %xmm1,%xmm7,%xmm7 + vpxor %xmm2,%xmm7,%xmm7 + + vpaddd %xmm6,%xmm14,%xmm14 + vpaddd %xmm7,%xmm14,%xmm14 + vmovdqu 176-128(%rax),%xmm6 + vpaddd 48-128(%rax),%xmm5,%xmm5 + + vpsrld $3,%xmm6,%xmm7 + vpsrld $7,%xmm6,%xmm1 + vpslld $25,%xmm6,%xmm2 + vpxor %xmm1,%xmm7,%xmm7 + vpsrld $18,%xmm6,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $14,%xmm6,%xmm2 + vmovdqu 128-128(%rax),%xmm0 + vpsrld $10,%xmm0,%xmm3 + + vpxor %xmm1,%xmm7,%xmm7 + vpsrld $17,%xmm0,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $15,%xmm0,%xmm2 + vpaddd %xmm7,%xmm5,%xmm5 + vpxor %xmm1,%xmm3,%xmm7 + vpsrld $19,%xmm0,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $13,%xmm0,%xmm2 + vpxor %xmm1,%xmm7,%xmm7 + vpxor %xmm2,%xmm7,%xmm7 + vpaddd %xmm7,%xmm5,%xmm5 + vpsrld $6,%xmm10,%xmm7 + vpslld $26,%xmm10,%xmm2 + vmovdqu %xmm5,160-128(%rax) + vpaddd %xmm13,%xmm5,%xmm5 + + vpsrld $11,%xmm10,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $21,%xmm10,%xmm2 + vpaddd -64(%rbp),%xmm5,%xmm5 + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $25,%xmm10,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $7,%xmm10,%xmm2 + vpandn %xmm12,%xmm10,%xmm0 + vpand %xmm11,%xmm10,%xmm3 + + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $2,%xmm14,%xmm13 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $30,%xmm14,%xmm1 + vpxor %xmm3,%xmm0,%xmm0 + vpxor %xmm14,%xmm15,%xmm3 + + vpxor %xmm1,%xmm13,%xmm13 + vpaddd %xmm7,%xmm5,%xmm5 + + vpsrld $13,%xmm14,%xmm1 + + vpslld $19,%xmm14,%xmm2 + vpaddd %xmm0,%xmm5,%xmm5 + vpand %xmm3,%xmm4,%xmm4 + + vpxor %xmm1,%xmm13,%xmm7 + + vpsrld $22,%xmm14,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $10,%xmm14,%xmm2 + vpxor %xmm4,%xmm15,%xmm13 + vpaddd %xmm5,%xmm9,%xmm9 + + vpxor %xmm1,%xmm7,%xmm7 + vpxor %xmm2,%xmm7,%xmm7 + + vpaddd %xmm5,%xmm13,%xmm13 + vpaddd %xmm7,%xmm13,%xmm13 + vmovdqu 192-128(%rax),%xmm5 + vpaddd 64-128(%rax),%xmm6,%xmm6 + + vpsrld $3,%xmm5,%xmm7 + vpsrld $7,%xmm5,%xmm1 + vpslld $25,%xmm5,%xmm2 + vpxor %xmm1,%xmm7,%xmm7 + vpsrld $18,%xmm5,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $14,%xmm5,%xmm2 + vmovdqu 144-128(%rax),%xmm0 + vpsrld $10,%xmm0,%xmm4 + + vpxor %xmm1,%xmm7,%xmm7 + vpsrld $17,%xmm0,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $15,%xmm0,%xmm2 + vpaddd %xmm7,%xmm6,%xmm6 + vpxor %xmm1,%xmm4,%xmm7 + vpsrld $19,%xmm0,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $13,%xmm0,%xmm2 + vpxor %xmm1,%xmm7,%xmm7 + vpxor %xmm2,%xmm7,%xmm7 + vpaddd %xmm7,%xmm6,%xmm6 + vpsrld $6,%xmm9,%xmm7 + vpslld $26,%xmm9,%xmm2 + vmovdqu %xmm6,176-128(%rax) + vpaddd %xmm12,%xmm6,%xmm6 + + vpsrld $11,%xmm9,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $21,%xmm9,%xmm2 + vpaddd -32(%rbp),%xmm6,%xmm6 + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $25,%xmm9,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $7,%xmm9,%xmm2 + vpandn %xmm11,%xmm9,%xmm0 + vpand %xmm10,%xmm9,%xmm4 + + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $2,%xmm13,%xmm12 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $30,%xmm13,%xmm1 + vpxor %xmm4,%xmm0,%xmm0 + vpxor %xmm13,%xmm14,%xmm4 + + vpxor %xmm1,%xmm12,%xmm12 + vpaddd %xmm7,%xmm6,%xmm6 + + vpsrld $13,%xmm13,%xmm1 + + vpslld $19,%xmm13,%xmm2 + vpaddd %xmm0,%xmm6,%xmm6 + vpand %xmm4,%xmm3,%xmm3 + + vpxor %xmm1,%xmm12,%xmm7 + + vpsrld $22,%xmm13,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $10,%xmm13,%xmm2 + vpxor %xmm3,%xmm14,%xmm12 + vpaddd %xmm6,%xmm8,%xmm8 + + vpxor %xmm1,%xmm7,%xmm7 + vpxor %xmm2,%xmm7,%xmm7 + + vpaddd %xmm6,%xmm12,%xmm12 + vpaddd %xmm7,%xmm12,%xmm12 + vmovdqu 208-128(%rax),%xmm6 + vpaddd 80-128(%rax),%xmm5,%xmm5 + + vpsrld $3,%xmm6,%xmm7 + vpsrld $7,%xmm6,%xmm1 + vpslld $25,%xmm6,%xmm2 + vpxor %xmm1,%xmm7,%xmm7 + vpsrld $18,%xmm6,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $14,%xmm6,%xmm2 + vmovdqu 160-128(%rax),%xmm0 + vpsrld $10,%xmm0,%xmm3 + + vpxor %xmm1,%xmm7,%xmm7 + vpsrld $17,%xmm0,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $15,%xmm0,%xmm2 + vpaddd %xmm7,%xmm5,%xmm5 + vpxor %xmm1,%xmm3,%xmm7 + vpsrld $19,%xmm0,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $13,%xmm0,%xmm2 + vpxor %xmm1,%xmm7,%xmm7 + vpxor %xmm2,%xmm7,%xmm7 + vpaddd %xmm7,%xmm5,%xmm5 + vpsrld $6,%xmm8,%xmm7 + vpslld $26,%xmm8,%xmm2 + vmovdqu %xmm5,192-128(%rax) + vpaddd %xmm11,%xmm5,%xmm5 + + vpsrld $11,%xmm8,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $21,%xmm8,%xmm2 + vpaddd 0(%rbp),%xmm5,%xmm5 + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $25,%xmm8,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $7,%xmm8,%xmm2 + vpandn %xmm10,%xmm8,%xmm0 + vpand %xmm9,%xmm8,%xmm3 + + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $2,%xmm12,%xmm11 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $30,%xmm12,%xmm1 + vpxor %xmm3,%xmm0,%xmm0 + vpxor %xmm12,%xmm13,%xmm3 + + vpxor %xmm1,%xmm11,%xmm11 + vpaddd %xmm7,%xmm5,%xmm5 + + vpsrld $13,%xmm12,%xmm1 + + vpslld $19,%xmm12,%xmm2 + vpaddd %xmm0,%xmm5,%xmm5 + vpand %xmm3,%xmm4,%xmm4 + + vpxor %xmm1,%xmm11,%xmm7 + + vpsrld $22,%xmm12,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $10,%xmm12,%xmm2 + vpxor %xmm4,%xmm13,%xmm11 + vpaddd %xmm5,%xmm15,%xmm15 + + vpxor %xmm1,%xmm7,%xmm7 + vpxor %xmm2,%xmm7,%xmm7 + + vpaddd %xmm5,%xmm11,%xmm11 + vpaddd %xmm7,%xmm11,%xmm11 + vmovdqu 224-128(%rax),%xmm5 + vpaddd 96-128(%rax),%xmm6,%xmm6 + + vpsrld $3,%xmm5,%xmm7 + vpsrld $7,%xmm5,%xmm1 + vpslld $25,%xmm5,%xmm2 + vpxor %xmm1,%xmm7,%xmm7 + vpsrld $18,%xmm5,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $14,%xmm5,%xmm2 + vmovdqu 176-128(%rax),%xmm0 + vpsrld $10,%xmm0,%xmm4 + + vpxor %xmm1,%xmm7,%xmm7 + vpsrld $17,%xmm0,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $15,%xmm0,%xmm2 + vpaddd %xmm7,%xmm6,%xmm6 + vpxor %xmm1,%xmm4,%xmm7 + vpsrld $19,%xmm0,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $13,%xmm0,%xmm2 + vpxor %xmm1,%xmm7,%xmm7 + vpxor %xmm2,%xmm7,%xmm7 + vpaddd %xmm7,%xmm6,%xmm6 + vpsrld $6,%xmm15,%xmm7 + vpslld $26,%xmm15,%xmm2 + vmovdqu %xmm6,208-128(%rax) + vpaddd %xmm10,%xmm6,%xmm6 + + vpsrld $11,%xmm15,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $21,%xmm15,%xmm2 + vpaddd 32(%rbp),%xmm6,%xmm6 + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $25,%xmm15,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $7,%xmm15,%xmm2 + vpandn %xmm9,%xmm15,%xmm0 + vpand %xmm8,%xmm15,%xmm4 + + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $2,%xmm11,%xmm10 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $30,%xmm11,%xmm1 + vpxor %xmm4,%xmm0,%xmm0 + vpxor %xmm11,%xmm12,%xmm4 + + vpxor %xmm1,%xmm10,%xmm10 + vpaddd %xmm7,%xmm6,%xmm6 + + vpsrld $13,%xmm11,%xmm1 + + vpslld $19,%xmm11,%xmm2 + vpaddd %xmm0,%xmm6,%xmm6 + vpand %xmm4,%xmm3,%xmm3 + + vpxor %xmm1,%xmm10,%xmm7 + + vpsrld $22,%xmm11,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $10,%xmm11,%xmm2 + vpxor %xmm3,%xmm12,%xmm10 + vpaddd %xmm6,%xmm14,%xmm14 + + vpxor %xmm1,%xmm7,%xmm7 + vpxor %xmm2,%xmm7,%xmm7 + + vpaddd %xmm6,%xmm10,%xmm10 + vpaddd %xmm7,%xmm10,%xmm10 + vmovdqu 240-128(%rax),%xmm6 + vpaddd 112-128(%rax),%xmm5,%xmm5 + + vpsrld $3,%xmm6,%xmm7 + vpsrld $7,%xmm6,%xmm1 + vpslld $25,%xmm6,%xmm2 + vpxor %xmm1,%xmm7,%xmm7 + vpsrld $18,%xmm6,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $14,%xmm6,%xmm2 + vmovdqu 192-128(%rax),%xmm0 + vpsrld $10,%xmm0,%xmm3 + + vpxor %xmm1,%xmm7,%xmm7 + vpsrld $17,%xmm0,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $15,%xmm0,%xmm2 + vpaddd %xmm7,%xmm5,%xmm5 + vpxor %xmm1,%xmm3,%xmm7 + vpsrld $19,%xmm0,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $13,%xmm0,%xmm2 + vpxor %xmm1,%xmm7,%xmm7 + vpxor %xmm2,%xmm7,%xmm7 + vpaddd %xmm7,%xmm5,%xmm5 + vpsrld $6,%xmm14,%xmm7 + vpslld $26,%xmm14,%xmm2 + vmovdqu %xmm5,224-128(%rax) + vpaddd %xmm9,%xmm5,%xmm5 + + vpsrld $11,%xmm14,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $21,%xmm14,%xmm2 + vpaddd 64(%rbp),%xmm5,%xmm5 + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $25,%xmm14,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $7,%xmm14,%xmm2 + vpandn %xmm8,%xmm14,%xmm0 + vpand %xmm15,%xmm14,%xmm3 + + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $2,%xmm10,%xmm9 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $30,%xmm10,%xmm1 + vpxor %xmm3,%xmm0,%xmm0 + vpxor %xmm10,%xmm11,%xmm3 + + vpxor %xmm1,%xmm9,%xmm9 + vpaddd %xmm7,%xmm5,%xmm5 + + vpsrld $13,%xmm10,%xmm1 + + vpslld $19,%xmm10,%xmm2 + vpaddd %xmm0,%xmm5,%xmm5 + vpand %xmm3,%xmm4,%xmm4 + + vpxor %xmm1,%xmm9,%xmm7 + + vpsrld $22,%xmm10,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $10,%xmm10,%xmm2 + vpxor %xmm4,%xmm11,%xmm9 + vpaddd %xmm5,%xmm13,%xmm13 + + vpxor %xmm1,%xmm7,%xmm7 + vpxor %xmm2,%xmm7,%xmm7 + + vpaddd %xmm5,%xmm9,%xmm9 + vpaddd %xmm7,%xmm9,%xmm9 + vmovdqu 0-128(%rax),%xmm5 + vpaddd 128-128(%rax),%xmm6,%xmm6 + + vpsrld $3,%xmm5,%xmm7 + vpsrld $7,%xmm5,%xmm1 + vpslld $25,%xmm5,%xmm2 + vpxor %xmm1,%xmm7,%xmm7 + vpsrld $18,%xmm5,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $14,%xmm5,%xmm2 + vmovdqu 208-128(%rax),%xmm0 + vpsrld $10,%xmm0,%xmm4 + + vpxor %xmm1,%xmm7,%xmm7 + vpsrld $17,%xmm0,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $15,%xmm0,%xmm2 + vpaddd %xmm7,%xmm6,%xmm6 + vpxor %xmm1,%xmm4,%xmm7 + vpsrld $19,%xmm0,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $13,%xmm0,%xmm2 + vpxor %xmm1,%xmm7,%xmm7 + vpxor %xmm2,%xmm7,%xmm7 + vpaddd %xmm7,%xmm6,%xmm6 + vpsrld $6,%xmm13,%xmm7 + vpslld $26,%xmm13,%xmm2 + vmovdqu %xmm6,240-128(%rax) + vpaddd %xmm8,%xmm6,%xmm6 + + vpsrld $11,%xmm13,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $21,%xmm13,%xmm2 + vpaddd 96(%rbp),%xmm6,%xmm6 + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $25,%xmm13,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $7,%xmm13,%xmm2 + vpandn %xmm15,%xmm13,%xmm0 + vpand %xmm14,%xmm13,%xmm4 + + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $2,%xmm9,%xmm8 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $30,%xmm9,%xmm1 + vpxor %xmm4,%xmm0,%xmm0 + vpxor %xmm9,%xmm10,%xmm4 + + vpxor %xmm1,%xmm8,%xmm8 + vpaddd %xmm7,%xmm6,%xmm6 + + vpsrld $13,%xmm9,%xmm1 + + vpslld $19,%xmm9,%xmm2 + vpaddd %xmm0,%xmm6,%xmm6 + vpand %xmm4,%xmm3,%xmm3 + + vpxor %xmm1,%xmm8,%xmm7 + + vpsrld $22,%xmm9,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $10,%xmm9,%xmm2 + vpxor %xmm3,%xmm10,%xmm8 + vpaddd %xmm6,%xmm12,%xmm12 + + vpxor %xmm1,%xmm7,%xmm7 + vpxor %xmm2,%xmm7,%xmm7 + + vpaddd %xmm6,%xmm8,%xmm8 + vpaddd %xmm7,%xmm8,%xmm8 + addq $256,%rbp + decl %ecx + jnz .Loop_16_xx_avx + + movl $1,%ecx + leaq K256+128(%rip),%rbp + cmpl 0(%rbx),%ecx + cmovgeq %rbp,%r8 + cmpl 4(%rbx),%ecx + cmovgeq %rbp,%r9 + cmpl 8(%rbx),%ecx + cmovgeq %rbp,%r10 + cmpl 12(%rbx),%ecx + cmovgeq %rbp,%r11 + vmovdqa (%rbx),%xmm7 + vpxor %xmm0,%xmm0,%xmm0 + vmovdqa %xmm7,%xmm6 + vpcmpgtd %xmm0,%xmm6,%xmm6 + vpaddd %xmm6,%xmm7,%xmm7 + + vmovdqu 0-128(%rdi),%xmm0 + vpand %xmm6,%xmm8,%xmm8 + vmovdqu 32-128(%rdi),%xmm1 + vpand %xmm6,%xmm9,%xmm9 + vmovdqu 64-128(%rdi),%xmm2 + vpand %xmm6,%xmm10,%xmm10 + vmovdqu 96-128(%rdi),%xmm5 + vpand %xmm6,%xmm11,%xmm11 + vpaddd %xmm0,%xmm8,%xmm8 + vmovdqu 128-128(%rdi),%xmm0 + vpand %xmm6,%xmm12,%xmm12 + vpaddd %xmm1,%xmm9,%xmm9 + vmovdqu 160-128(%rdi),%xmm1 + vpand %xmm6,%xmm13,%xmm13 + vpaddd %xmm2,%xmm10,%xmm10 + vmovdqu 192-128(%rdi),%xmm2 + vpand %xmm6,%xmm14,%xmm14 + vpaddd %xmm5,%xmm11,%xmm11 + vmovdqu 224-128(%rdi),%xmm5 + vpand %xmm6,%xmm15,%xmm15 + vpaddd %xmm0,%xmm12,%xmm12 + vpaddd %xmm1,%xmm13,%xmm13 + vmovdqu %xmm8,0-128(%rdi) + vpaddd %xmm2,%xmm14,%xmm14 + vmovdqu %xmm9,32-128(%rdi) + vpaddd %xmm5,%xmm15,%xmm15 + vmovdqu %xmm10,64-128(%rdi) + vmovdqu %xmm11,96-128(%rdi) + vmovdqu %xmm12,128-128(%rdi) + vmovdqu %xmm13,160-128(%rdi) + vmovdqu %xmm14,192-128(%rdi) + vmovdqu %xmm15,224-128(%rdi) + + vmovdqu %xmm7,(%rbx) + vmovdqu .Lpbswap(%rip),%xmm6 + decl %edx + jnz .Loop_avx + + movl 280(%rsp),%edx + leaq 16(%rdi),%rdi + leaq 64(%rsi),%rsi + decl %edx + jnz .Loop_grande_avx + +.Ldone_avx: + movq 272(%rsp),%rax + vzeroupper + movq -16(%rax),%rbp + movq -8(%rax),%rbx + leaq (%rax),%rsp +.Lepilogue_avx: + .byte 0xf3,0xc3 +.size sha256_multi_block_avx,.-sha256_multi_block_avx +.type sha256_multi_block_avx2,@function +.align 32 +sha256_multi_block_avx2: +_avx2_shortcut: + movq %rsp,%rax + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + subq $576,%rsp + andq $-256,%rsp + movq %rax,544(%rsp) +.Lbody_avx2: + leaq K256+128(%rip),%rbp + leaq 128(%rdi),%rdi + +.Loop_grande_avx2: + movl %edx,552(%rsp) + xorl %edx,%edx + leaq 512(%rsp),%rbx + movq 0(%rsi),%r12 + movl 8(%rsi),%ecx + cmpl %edx,%ecx + cmovgl %ecx,%edx + testl %ecx,%ecx + movl %ecx,0(%rbx) + cmovleq %rbp,%r12 + movq 16(%rsi),%r13 + movl 24(%rsi),%ecx + cmpl %edx,%ecx + cmovgl %ecx,%edx + testl %ecx,%ecx + movl %ecx,4(%rbx) + cmovleq %rbp,%r13 + movq 32(%rsi),%r14 + movl 40(%rsi),%ecx + cmpl %edx,%ecx + cmovgl %ecx,%edx + testl %ecx,%ecx + movl %ecx,8(%rbx) + cmovleq %rbp,%r14 + movq 48(%rsi),%r15 + movl 56(%rsi),%ecx + cmpl %edx,%ecx + cmovgl %ecx,%edx + testl %ecx,%ecx + movl %ecx,12(%rbx) + cmovleq %rbp,%r15 + movq 64(%rsi),%r8 + movl 72(%rsi),%ecx + cmpl %edx,%ecx + cmovgl %ecx,%edx + testl %ecx,%ecx + movl %ecx,16(%rbx) + cmovleq %rbp,%r8 + movq 80(%rsi),%r9 + movl 88(%rsi),%ecx + cmpl %edx,%ecx + cmovgl %ecx,%edx + testl %ecx,%ecx + movl %ecx,20(%rbx) + cmovleq %rbp,%r9 + movq 96(%rsi),%r10 + movl 104(%rsi),%ecx + cmpl %edx,%ecx + cmovgl %ecx,%edx + testl %ecx,%ecx + movl %ecx,24(%rbx) + cmovleq %rbp,%r10 + movq 112(%rsi),%r11 + movl 120(%rsi),%ecx + cmpl %edx,%ecx + cmovgl %ecx,%edx + testl %ecx,%ecx + movl %ecx,28(%rbx) + cmovleq %rbp,%r11 + vmovdqu 0-128(%rdi),%ymm8 + leaq 128(%rsp),%rax + vmovdqu 32-128(%rdi),%ymm9 + leaq 256+128(%rsp),%rbx + vmovdqu 64-128(%rdi),%ymm10 + vmovdqu 96-128(%rdi),%ymm11 + vmovdqu 128-128(%rdi),%ymm12 + vmovdqu 160-128(%rdi),%ymm13 + vmovdqu 192-128(%rdi),%ymm14 + vmovdqu 224-128(%rdi),%ymm15 + vmovdqu .Lpbswap(%rip),%ymm6 + jmp .Loop_avx2 + +.align 32 +.Loop_avx2: + vpxor %ymm9,%ymm10,%ymm4 + vmovd 0(%r12),%xmm5 + vmovd 0(%r8),%xmm0 + vmovd 0(%r13),%xmm1 + vmovd 0(%r9),%xmm2 + vpinsrd $1,0(%r14),%xmm5,%xmm5 + vpinsrd $1,0(%r10),%xmm0,%xmm0 + vpinsrd $1,0(%r15),%xmm1,%xmm1 + vpunpckldq %ymm1,%ymm5,%ymm5 + vpinsrd $1,0(%r11),%xmm2,%xmm2 + vpunpckldq %ymm2,%ymm0,%ymm0 + vinserti128 $1,%xmm0,%ymm5,%ymm5 + vpshufb %ymm6,%ymm5,%ymm5 + vpsrld $6,%ymm12,%ymm7 + vpslld $26,%ymm12,%ymm2 + vmovdqu %ymm5,0-128(%rax) + vpaddd %ymm15,%ymm5,%ymm5 + + vpsrld $11,%ymm12,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $21,%ymm12,%ymm2 + vpaddd -128(%rbp),%ymm5,%ymm5 + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $25,%ymm12,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $7,%ymm12,%ymm2 + vpandn %ymm14,%ymm12,%ymm0 + vpand %ymm13,%ymm12,%ymm3 + + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $2,%ymm8,%ymm15 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $30,%ymm8,%ymm1 + vpxor %ymm3,%ymm0,%ymm0 + vpxor %ymm8,%ymm9,%ymm3 + + vpxor %ymm1,%ymm15,%ymm15 + vpaddd %ymm7,%ymm5,%ymm5 + + vpsrld $13,%ymm8,%ymm1 + + vpslld $19,%ymm8,%ymm2 + vpaddd %ymm0,%ymm5,%ymm5 + vpand %ymm3,%ymm4,%ymm4 + + vpxor %ymm1,%ymm15,%ymm7 + + vpsrld $22,%ymm8,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $10,%ymm8,%ymm2 + vpxor %ymm4,%ymm9,%ymm15 + vpaddd %ymm5,%ymm11,%ymm11 + + vpxor %ymm1,%ymm7,%ymm7 + vpxor %ymm2,%ymm7,%ymm7 + + vpaddd %ymm5,%ymm15,%ymm15 + vpaddd %ymm7,%ymm15,%ymm15 + vmovd 4(%r12),%xmm5 + vmovd 4(%r8),%xmm0 + vmovd 4(%r13),%xmm1 + vmovd 4(%r9),%xmm2 + vpinsrd $1,4(%r14),%xmm5,%xmm5 + vpinsrd $1,4(%r10),%xmm0,%xmm0 + vpinsrd $1,4(%r15),%xmm1,%xmm1 + vpunpckldq %ymm1,%ymm5,%ymm5 + vpinsrd $1,4(%r11),%xmm2,%xmm2 + vpunpckldq %ymm2,%ymm0,%ymm0 + vinserti128 $1,%xmm0,%ymm5,%ymm5 + vpshufb %ymm6,%ymm5,%ymm5 + vpsrld $6,%ymm11,%ymm7 + vpslld $26,%ymm11,%ymm2 + vmovdqu %ymm5,32-128(%rax) + vpaddd %ymm14,%ymm5,%ymm5 + + vpsrld $11,%ymm11,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $21,%ymm11,%ymm2 + vpaddd -96(%rbp),%ymm5,%ymm5 + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $25,%ymm11,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $7,%ymm11,%ymm2 + vpandn %ymm13,%ymm11,%ymm0 + vpand %ymm12,%ymm11,%ymm4 + + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $2,%ymm15,%ymm14 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $30,%ymm15,%ymm1 + vpxor %ymm4,%ymm0,%ymm0 + vpxor %ymm15,%ymm8,%ymm4 + + vpxor %ymm1,%ymm14,%ymm14 + vpaddd %ymm7,%ymm5,%ymm5 + + vpsrld $13,%ymm15,%ymm1 + + vpslld $19,%ymm15,%ymm2 + vpaddd %ymm0,%ymm5,%ymm5 + vpand %ymm4,%ymm3,%ymm3 + + vpxor %ymm1,%ymm14,%ymm7 + + vpsrld $22,%ymm15,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $10,%ymm15,%ymm2 + vpxor %ymm3,%ymm8,%ymm14 + vpaddd %ymm5,%ymm10,%ymm10 + + vpxor %ymm1,%ymm7,%ymm7 + vpxor %ymm2,%ymm7,%ymm7 + + vpaddd %ymm5,%ymm14,%ymm14 + vpaddd %ymm7,%ymm14,%ymm14 + vmovd 8(%r12),%xmm5 + vmovd 8(%r8),%xmm0 + vmovd 8(%r13),%xmm1 + vmovd 8(%r9),%xmm2 + vpinsrd $1,8(%r14),%xmm5,%xmm5 + vpinsrd $1,8(%r10),%xmm0,%xmm0 + vpinsrd $1,8(%r15),%xmm1,%xmm1 + vpunpckldq %ymm1,%ymm5,%ymm5 + vpinsrd $1,8(%r11),%xmm2,%xmm2 + vpunpckldq %ymm2,%ymm0,%ymm0 + vinserti128 $1,%xmm0,%ymm5,%ymm5 + vpshufb %ymm6,%ymm5,%ymm5 + vpsrld $6,%ymm10,%ymm7 + vpslld $26,%ymm10,%ymm2 + vmovdqu %ymm5,64-128(%rax) + vpaddd %ymm13,%ymm5,%ymm5 + + vpsrld $11,%ymm10,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $21,%ymm10,%ymm2 + vpaddd -64(%rbp),%ymm5,%ymm5 + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $25,%ymm10,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $7,%ymm10,%ymm2 + vpandn %ymm12,%ymm10,%ymm0 + vpand %ymm11,%ymm10,%ymm3 + + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $2,%ymm14,%ymm13 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $30,%ymm14,%ymm1 + vpxor %ymm3,%ymm0,%ymm0 + vpxor %ymm14,%ymm15,%ymm3 + + vpxor %ymm1,%ymm13,%ymm13 + vpaddd %ymm7,%ymm5,%ymm5 + + vpsrld $13,%ymm14,%ymm1 + + vpslld $19,%ymm14,%ymm2 + vpaddd %ymm0,%ymm5,%ymm5 + vpand %ymm3,%ymm4,%ymm4 + + vpxor %ymm1,%ymm13,%ymm7 + + vpsrld $22,%ymm14,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $10,%ymm14,%ymm2 + vpxor %ymm4,%ymm15,%ymm13 + vpaddd %ymm5,%ymm9,%ymm9 + + vpxor %ymm1,%ymm7,%ymm7 + vpxor %ymm2,%ymm7,%ymm7 + + vpaddd %ymm5,%ymm13,%ymm13 + vpaddd %ymm7,%ymm13,%ymm13 + vmovd 12(%r12),%xmm5 + vmovd 12(%r8),%xmm0 + vmovd 12(%r13),%xmm1 + vmovd 12(%r9),%xmm2 + vpinsrd $1,12(%r14),%xmm5,%xmm5 + vpinsrd $1,12(%r10),%xmm0,%xmm0 + vpinsrd $1,12(%r15),%xmm1,%xmm1 + vpunpckldq %ymm1,%ymm5,%ymm5 + vpinsrd $1,12(%r11),%xmm2,%xmm2 + vpunpckldq %ymm2,%ymm0,%ymm0 + vinserti128 $1,%xmm0,%ymm5,%ymm5 + vpshufb %ymm6,%ymm5,%ymm5 + vpsrld $6,%ymm9,%ymm7 + vpslld $26,%ymm9,%ymm2 + vmovdqu %ymm5,96-128(%rax) + vpaddd %ymm12,%ymm5,%ymm5 + + vpsrld $11,%ymm9,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $21,%ymm9,%ymm2 + vpaddd -32(%rbp),%ymm5,%ymm5 + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $25,%ymm9,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $7,%ymm9,%ymm2 + vpandn %ymm11,%ymm9,%ymm0 + vpand %ymm10,%ymm9,%ymm4 + + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $2,%ymm13,%ymm12 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $30,%ymm13,%ymm1 + vpxor %ymm4,%ymm0,%ymm0 + vpxor %ymm13,%ymm14,%ymm4 + + vpxor %ymm1,%ymm12,%ymm12 + vpaddd %ymm7,%ymm5,%ymm5 + + vpsrld $13,%ymm13,%ymm1 + + vpslld $19,%ymm13,%ymm2 + vpaddd %ymm0,%ymm5,%ymm5 + vpand %ymm4,%ymm3,%ymm3 + + vpxor %ymm1,%ymm12,%ymm7 + + vpsrld $22,%ymm13,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $10,%ymm13,%ymm2 + vpxor %ymm3,%ymm14,%ymm12 + vpaddd %ymm5,%ymm8,%ymm8 + + vpxor %ymm1,%ymm7,%ymm7 + vpxor %ymm2,%ymm7,%ymm7 + + vpaddd %ymm5,%ymm12,%ymm12 + vpaddd %ymm7,%ymm12,%ymm12 + vmovd 16(%r12),%xmm5 + vmovd 16(%r8),%xmm0 + vmovd 16(%r13),%xmm1 + vmovd 16(%r9),%xmm2 + vpinsrd $1,16(%r14),%xmm5,%xmm5 + vpinsrd $1,16(%r10),%xmm0,%xmm0 + vpinsrd $1,16(%r15),%xmm1,%xmm1 + vpunpckldq %ymm1,%ymm5,%ymm5 + vpinsrd $1,16(%r11),%xmm2,%xmm2 + vpunpckldq %ymm2,%ymm0,%ymm0 + vinserti128 $1,%xmm0,%ymm5,%ymm5 + vpshufb %ymm6,%ymm5,%ymm5 + vpsrld $6,%ymm8,%ymm7 + vpslld $26,%ymm8,%ymm2 + vmovdqu %ymm5,128-128(%rax) + vpaddd %ymm11,%ymm5,%ymm5 + + vpsrld $11,%ymm8,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $21,%ymm8,%ymm2 + vpaddd 0(%rbp),%ymm5,%ymm5 + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $25,%ymm8,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $7,%ymm8,%ymm2 + vpandn %ymm10,%ymm8,%ymm0 + vpand %ymm9,%ymm8,%ymm3 + + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $2,%ymm12,%ymm11 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $30,%ymm12,%ymm1 + vpxor %ymm3,%ymm0,%ymm0 + vpxor %ymm12,%ymm13,%ymm3 + + vpxor %ymm1,%ymm11,%ymm11 + vpaddd %ymm7,%ymm5,%ymm5 + + vpsrld $13,%ymm12,%ymm1 + + vpslld $19,%ymm12,%ymm2 + vpaddd %ymm0,%ymm5,%ymm5 + vpand %ymm3,%ymm4,%ymm4 + + vpxor %ymm1,%ymm11,%ymm7 + + vpsrld $22,%ymm12,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $10,%ymm12,%ymm2 + vpxor %ymm4,%ymm13,%ymm11 + vpaddd %ymm5,%ymm15,%ymm15 + + vpxor %ymm1,%ymm7,%ymm7 + vpxor %ymm2,%ymm7,%ymm7 + + vpaddd %ymm5,%ymm11,%ymm11 + vpaddd %ymm7,%ymm11,%ymm11 + vmovd 20(%r12),%xmm5 + vmovd 20(%r8),%xmm0 + vmovd 20(%r13),%xmm1 + vmovd 20(%r9),%xmm2 + vpinsrd $1,20(%r14),%xmm5,%xmm5 + vpinsrd $1,20(%r10),%xmm0,%xmm0 + vpinsrd $1,20(%r15),%xmm1,%xmm1 + vpunpckldq %ymm1,%ymm5,%ymm5 + vpinsrd $1,20(%r11),%xmm2,%xmm2 + vpunpckldq %ymm2,%ymm0,%ymm0 + vinserti128 $1,%xmm0,%ymm5,%ymm5 + vpshufb %ymm6,%ymm5,%ymm5 + vpsrld $6,%ymm15,%ymm7 + vpslld $26,%ymm15,%ymm2 + vmovdqu %ymm5,160-128(%rax) + vpaddd %ymm10,%ymm5,%ymm5 + + vpsrld $11,%ymm15,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $21,%ymm15,%ymm2 + vpaddd 32(%rbp),%ymm5,%ymm5 + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $25,%ymm15,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $7,%ymm15,%ymm2 + vpandn %ymm9,%ymm15,%ymm0 + vpand %ymm8,%ymm15,%ymm4 + + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $2,%ymm11,%ymm10 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $30,%ymm11,%ymm1 + vpxor %ymm4,%ymm0,%ymm0 + vpxor %ymm11,%ymm12,%ymm4 + + vpxor %ymm1,%ymm10,%ymm10 + vpaddd %ymm7,%ymm5,%ymm5 + + vpsrld $13,%ymm11,%ymm1 + + vpslld $19,%ymm11,%ymm2 + vpaddd %ymm0,%ymm5,%ymm5 + vpand %ymm4,%ymm3,%ymm3 + + vpxor %ymm1,%ymm10,%ymm7 + + vpsrld $22,%ymm11,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $10,%ymm11,%ymm2 + vpxor %ymm3,%ymm12,%ymm10 + vpaddd %ymm5,%ymm14,%ymm14 + + vpxor %ymm1,%ymm7,%ymm7 + vpxor %ymm2,%ymm7,%ymm7 + + vpaddd %ymm5,%ymm10,%ymm10 + vpaddd %ymm7,%ymm10,%ymm10 + vmovd 24(%r12),%xmm5 + vmovd 24(%r8),%xmm0 + vmovd 24(%r13),%xmm1 + vmovd 24(%r9),%xmm2 + vpinsrd $1,24(%r14),%xmm5,%xmm5 + vpinsrd $1,24(%r10),%xmm0,%xmm0 + vpinsrd $1,24(%r15),%xmm1,%xmm1 + vpunpckldq %ymm1,%ymm5,%ymm5 + vpinsrd $1,24(%r11),%xmm2,%xmm2 + vpunpckldq %ymm2,%ymm0,%ymm0 + vinserti128 $1,%xmm0,%ymm5,%ymm5 + vpshufb %ymm6,%ymm5,%ymm5 + vpsrld $6,%ymm14,%ymm7 + vpslld $26,%ymm14,%ymm2 + vmovdqu %ymm5,192-128(%rax) + vpaddd %ymm9,%ymm5,%ymm5 + + vpsrld $11,%ymm14,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $21,%ymm14,%ymm2 + vpaddd 64(%rbp),%ymm5,%ymm5 + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $25,%ymm14,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $7,%ymm14,%ymm2 + vpandn %ymm8,%ymm14,%ymm0 + vpand %ymm15,%ymm14,%ymm3 + + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $2,%ymm10,%ymm9 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $30,%ymm10,%ymm1 + vpxor %ymm3,%ymm0,%ymm0 + vpxor %ymm10,%ymm11,%ymm3 + + vpxor %ymm1,%ymm9,%ymm9 + vpaddd %ymm7,%ymm5,%ymm5 + + vpsrld $13,%ymm10,%ymm1 + + vpslld $19,%ymm10,%ymm2 + vpaddd %ymm0,%ymm5,%ymm5 + vpand %ymm3,%ymm4,%ymm4 + + vpxor %ymm1,%ymm9,%ymm7 + + vpsrld $22,%ymm10,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $10,%ymm10,%ymm2 + vpxor %ymm4,%ymm11,%ymm9 + vpaddd %ymm5,%ymm13,%ymm13 + + vpxor %ymm1,%ymm7,%ymm7 + vpxor %ymm2,%ymm7,%ymm7 + + vpaddd %ymm5,%ymm9,%ymm9 + vpaddd %ymm7,%ymm9,%ymm9 + vmovd 28(%r12),%xmm5 + vmovd 28(%r8),%xmm0 + vmovd 28(%r13),%xmm1 + vmovd 28(%r9),%xmm2 + vpinsrd $1,28(%r14),%xmm5,%xmm5 + vpinsrd $1,28(%r10),%xmm0,%xmm0 + vpinsrd $1,28(%r15),%xmm1,%xmm1 + vpunpckldq %ymm1,%ymm5,%ymm5 + vpinsrd $1,28(%r11),%xmm2,%xmm2 + vpunpckldq %ymm2,%ymm0,%ymm0 + vinserti128 $1,%xmm0,%ymm5,%ymm5 + vpshufb %ymm6,%ymm5,%ymm5 + vpsrld $6,%ymm13,%ymm7 + vpslld $26,%ymm13,%ymm2 + vmovdqu %ymm5,224-128(%rax) + vpaddd %ymm8,%ymm5,%ymm5 + + vpsrld $11,%ymm13,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $21,%ymm13,%ymm2 + vpaddd 96(%rbp),%ymm5,%ymm5 + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $25,%ymm13,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $7,%ymm13,%ymm2 + vpandn %ymm15,%ymm13,%ymm0 + vpand %ymm14,%ymm13,%ymm4 + + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $2,%ymm9,%ymm8 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $30,%ymm9,%ymm1 + vpxor %ymm4,%ymm0,%ymm0 + vpxor %ymm9,%ymm10,%ymm4 + + vpxor %ymm1,%ymm8,%ymm8 + vpaddd %ymm7,%ymm5,%ymm5 + + vpsrld $13,%ymm9,%ymm1 + + vpslld $19,%ymm9,%ymm2 + vpaddd %ymm0,%ymm5,%ymm5 + vpand %ymm4,%ymm3,%ymm3 + + vpxor %ymm1,%ymm8,%ymm7 + + vpsrld $22,%ymm9,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $10,%ymm9,%ymm2 + vpxor %ymm3,%ymm10,%ymm8 + vpaddd %ymm5,%ymm12,%ymm12 + + vpxor %ymm1,%ymm7,%ymm7 + vpxor %ymm2,%ymm7,%ymm7 + + vpaddd %ymm5,%ymm8,%ymm8 + vpaddd %ymm7,%ymm8,%ymm8 + addq $256,%rbp + vmovd 32(%r12),%xmm5 + vmovd 32(%r8),%xmm0 + vmovd 32(%r13),%xmm1 + vmovd 32(%r9),%xmm2 + vpinsrd $1,32(%r14),%xmm5,%xmm5 + vpinsrd $1,32(%r10),%xmm0,%xmm0 + vpinsrd $1,32(%r15),%xmm1,%xmm1 + vpunpckldq %ymm1,%ymm5,%ymm5 + vpinsrd $1,32(%r11),%xmm2,%xmm2 + vpunpckldq %ymm2,%ymm0,%ymm0 + vinserti128 $1,%xmm0,%ymm5,%ymm5 + vpshufb %ymm6,%ymm5,%ymm5 + vpsrld $6,%ymm12,%ymm7 + vpslld $26,%ymm12,%ymm2 + vmovdqu %ymm5,256-256-128(%rbx) + vpaddd %ymm15,%ymm5,%ymm5 + + vpsrld $11,%ymm12,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $21,%ymm12,%ymm2 + vpaddd -128(%rbp),%ymm5,%ymm5 + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $25,%ymm12,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $7,%ymm12,%ymm2 + vpandn %ymm14,%ymm12,%ymm0 + vpand %ymm13,%ymm12,%ymm3 + + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $2,%ymm8,%ymm15 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $30,%ymm8,%ymm1 + vpxor %ymm3,%ymm0,%ymm0 + vpxor %ymm8,%ymm9,%ymm3 + + vpxor %ymm1,%ymm15,%ymm15 + vpaddd %ymm7,%ymm5,%ymm5 + + vpsrld $13,%ymm8,%ymm1 + + vpslld $19,%ymm8,%ymm2 + vpaddd %ymm0,%ymm5,%ymm5 + vpand %ymm3,%ymm4,%ymm4 + + vpxor %ymm1,%ymm15,%ymm7 + + vpsrld $22,%ymm8,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $10,%ymm8,%ymm2 + vpxor %ymm4,%ymm9,%ymm15 + vpaddd %ymm5,%ymm11,%ymm11 + + vpxor %ymm1,%ymm7,%ymm7 + vpxor %ymm2,%ymm7,%ymm7 + + vpaddd %ymm5,%ymm15,%ymm15 + vpaddd %ymm7,%ymm15,%ymm15 + vmovd 36(%r12),%xmm5 + vmovd 36(%r8),%xmm0 + vmovd 36(%r13),%xmm1 + vmovd 36(%r9),%xmm2 + vpinsrd $1,36(%r14),%xmm5,%xmm5 + vpinsrd $1,36(%r10),%xmm0,%xmm0 + vpinsrd $1,36(%r15),%xmm1,%xmm1 + vpunpckldq %ymm1,%ymm5,%ymm5 + vpinsrd $1,36(%r11),%xmm2,%xmm2 + vpunpckldq %ymm2,%ymm0,%ymm0 + vinserti128 $1,%xmm0,%ymm5,%ymm5 + vpshufb %ymm6,%ymm5,%ymm5 + vpsrld $6,%ymm11,%ymm7 + vpslld $26,%ymm11,%ymm2 + vmovdqu %ymm5,288-256-128(%rbx) + vpaddd %ymm14,%ymm5,%ymm5 + + vpsrld $11,%ymm11,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $21,%ymm11,%ymm2 + vpaddd -96(%rbp),%ymm5,%ymm5 + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $25,%ymm11,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $7,%ymm11,%ymm2 + vpandn %ymm13,%ymm11,%ymm0 + vpand %ymm12,%ymm11,%ymm4 + + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $2,%ymm15,%ymm14 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $30,%ymm15,%ymm1 + vpxor %ymm4,%ymm0,%ymm0 + vpxor %ymm15,%ymm8,%ymm4 + + vpxor %ymm1,%ymm14,%ymm14 + vpaddd %ymm7,%ymm5,%ymm5 + + vpsrld $13,%ymm15,%ymm1 + + vpslld $19,%ymm15,%ymm2 + vpaddd %ymm0,%ymm5,%ymm5 + vpand %ymm4,%ymm3,%ymm3 + + vpxor %ymm1,%ymm14,%ymm7 + + vpsrld $22,%ymm15,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $10,%ymm15,%ymm2 + vpxor %ymm3,%ymm8,%ymm14 + vpaddd %ymm5,%ymm10,%ymm10 + + vpxor %ymm1,%ymm7,%ymm7 + vpxor %ymm2,%ymm7,%ymm7 + + vpaddd %ymm5,%ymm14,%ymm14 + vpaddd %ymm7,%ymm14,%ymm14 + vmovd 40(%r12),%xmm5 + vmovd 40(%r8),%xmm0 + vmovd 40(%r13),%xmm1 + vmovd 40(%r9),%xmm2 + vpinsrd $1,40(%r14),%xmm5,%xmm5 + vpinsrd $1,40(%r10),%xmm0,%xmm0 + vpinsrd $1,40(%r15),%xmm1,%xmm1 + vpunpckldq %ymm1,%ymm5,%ymm5 + vpinsrd $1,40(%r11),%xmm2,%xmm2 + vpunpckldq %ymm2,%ymm0,%ymm0 + vinserti128 $1,%xmm0,%ymm5,%ymm5 + vpshufb %ymm6,%ymm5,%ymm5 + vpsrld $6,%ymm10,%ymm7 + vpslld $26,%ymm10,%ymm2 + vmovdqu %ymm5,320-256-128(%rbx) + vpaddd %ymm13,%ymm5,%ymm5 + + vpsrld $11,%ymm10,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $21,%ymm10,%ymm2 + vpaddd -64(%rbp),%ymm5,%ymm5 + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $25,%ymm10,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $7,%ymm10,%ymm2 + vpandn %ymm12,%ymm10,%ymm0 + vpand %ymm11,%ymm10,%ymm3 + + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $2,%ymm14,%ymm13 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $30,%ymm14,%ymm1 + vpxor %ymm3,%ymm0,%ymm0 + vpxor %ymm14,%ymm15,%ymm3 + + vpxor %ymm1,%ymm13,%ymm13 + vpaddd %ymm7,%ymm5,%ymm5 + + vpsrld $13,%ymm14,%ymm1 + + vpslld $19,%ymm14,%ymm2 + vpaddd %ymm0,%ymm5,%ymm5 + vpand %ymm3,%ymm4,%ymm4 + + vpxor %ymm1,%ymm13,%ymm7 + + vpsrld $22,%ymm14,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $10,%ymm14,%ymm2 + vpxor %ymm4,%ymm15,%ymm13 + vpaddd %ymm5,%ymm9,%ymm9 + + vpxor %ymm1,%ymm7,%ymm7 + vpxor %ymm2,%ymm7,%ymm7 + + vpaddd %ymm5,%ymm13,%ymm13 + vpaddd %ymm7,%ymm13,%ymm13 + vmovd 44(%r12),%xmm5 + vmovd 44(%r8),%xmm0 + vmovd 44(%r13),%xmm1 + vmovd 44(%r9),%xmm2 + vpinsrd $1,44(%r14),%xmm5,%xmm5 + vpinsrd $1,44(%r10),%xmm0,%xmm0 + vpinsrd $1,44(%r15),%xmm1,%xmm1 + vpunpckldq %ymm1,%ymm5,%ymm5 + vpinsrd $1,44(%r11),%xmm2,%xmm2 + vpunpckldq %ymm2,%ymm0,%ymm0 + vinserti128 $1,%xmm0,%ymm5,%ymm5 + vpshufb %ymm6,%ymm5,%ymm5 + vpsrld $6,%ymm9,%ymm7 + vpslld $26,%ymm9,%ymm2 + vmovdqu %ymm5,352-256-128(%rbx) + vpaddd %ymm12,%ymm5,%ymm5 + + vpsrld $11,%ymm9,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $21,%ymm9,%ymm2 + vpaddd -32(%rbp),%ymm5,%ymm5 + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $25,%ymm9,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $7,%ymm9,%ymm2 + vpandn %ymm11,%ymm9,%ymm0 + vpand %ymm10,%ymm9,%ymm4 + + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $2,%ymm13,%ymm12 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $30,%ymm13,%ymm1 + vpxor %ymm4,%ymm0,%ymm0 + vpxor %ymm13,%ymm14,%ymm4 + + vpxor %ymm1,%ymm12,%ymm12 + vpaddd %ymm7,%ymm5,%ymm5 + + vpsrld $13,%ymm13,%ymm1 + + vpslld $19,%ymm13,%ymm2 + vpaddd %ymm0,%ymm5,%ymm5 + vpand %ymm4,%ymm3,%ymm3 + + vpxor %ymm1,%ymm12,%ymm7 + + vpsrld $22,%ymm13,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $10,%ymm13,%ymm2 + vpxor %ymm3,%ymm14,%ymm12 + vpaddd %ymm5,%ymm8,%ymm8 + + vpxor %ymm1,%ymm7,%ymm7 + vpxor %ymm2,%ymm7,%ymm7 + + vpaddd %ymm5,%ymm12,%ymm12 + vpaddd %ymm7,%ymm12,%ymm12 + vmovd 48(%r12),%xmm5 + vmovd 48(%r8),%xmm0 + vmovd 48(%r13),%xmm1 + vmovd 48(%r9),%xmm2 + vpinsrd $1,48(%r14),%xmm5,%xmm5 + vpinsrd $1,48(%r10),%xmm0,%xmm0 + vpinsrd $1,48(%r15),%xmm1,%xmm1 + vpunpckldq %ymm1,%ymm5,%ymm5 + vpinsrd $1,48(%r11),%xmm2,%xmm2 + vpunpckldq %ymm2,%ymm0,%ymm0 + vinserti128 $1,%xmm0,%ymm5,%ymm5 + vpshufb %ymm6,%ymm5,%ymm5 + vpsrld $6,%ymm8,%ymm7 + vpslld $26,%ymm8,%ymm2 + vmovdqu %ymm5,384-256-128(%rbx) + vpaddd %ymm11,%ymm5,%ymm5 + + vpsrld $11,%ymm8,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $21,%ymm8,%ymm2 + vpaddd 0(%rbp),%ymm5,%ymm5 + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $25,%ymm8,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $7,%ymm8,%ymm2 + vpandn %ymm10,%ymm8,%ymm0 + vpand %ymm9,%ymm8,%ymm3 + + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $2,%ymm12,%ymm11 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $30,%ymm12,%ymm1 + vpxor %ymm3,%ymm0,%ymm0 + vpxor %ymm12,%ymm13,%ymm3 + + vpxor %ymm1,%ymm11,%ymm11 + vpaddd %ymm7,%ymm5,%ymm5 + + vpsrld $13,%ymm12,%ymm1 + + vpslld $19,%ymm12,%ymm2 + vpaddd %ymm0,%ymm5,%ymm5 + vpand %ymm3,%ymm4,%ymm4 + + vpxor %ymm1,%ymm11,%ymm7 + + vpsrld $22,%ymm12,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $10,%ymm12,%ymm2 + vpxor %ymm4,%ymm13,%ymm11 + vpaddd %ymm5,%ymm15,%ymm15 + + vpxor %ymm1,%ymm7,%ymm7 + vpxor %ymm2,%ymm7,%ymm7 + + vpaddd %ymm5,%ymm11,%ymm11 + vpaddd %ymm7,%ymm11,%ymm11 + vmovd 52(%r12),%xmm5 + vmovd 52(%r8),%xmm0 + vmovd 52(%r13),%xmm1 + vmovd 52(%r9),%xmm2 + vpinsrd $1,52(%r14),%xmm5,%xmm5 + vpinsrd $1,52(%r10),%xmm0,%xmm0 + vpinsrd $1,52(%r15),%xmm1,%xmm1 + vpunpckldq %ymm1,%ymm5,%ymm5 + vpinsrd $1,52(%r11),%xmm2,%xmm2 + vpunpckldq %ymm2,%ymm0,%ymm0 + vinserti128 $1,%xmm0,%ymm5,%ymm5 + vpshufb %ymm6,%ymm5,%ymm5 + vpsrld $6,%ymm15,%ymm7 + vpslld $26,%ymm15,%ymm2 + vmovdqu %ymm5,416-256-128(%rbx) + vpaddd %ymm10,%ymm5,%ymm5 + + vpsrld $11,%ymm15,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $21,%ymm15,%ymm2 + vpaddd 32(%rbp),%ymm5,%ymm5 + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $25,%ymm15,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $7,%ymm15,%ymm2 + vpandn %ymm9,%ymm15,%ymm0 + vpand %ymm8,%ymm15,%ymm4 + + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $2,%ymm11,%ymm10 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $30,%ymm11,%ymm1 + vpxor %ymm4,%ymm0,%ymm0 + vpxor %ymm11,%ymm12,%ymm4 + + vpxor %ymm1,%ymm10,%ymm10 + vpaddd %ymm7,%ymm5,%ymm5 + + vpsrld $13,%ymm11,%ymm1 + + vpslld $19,%ymm11,%ymm2 + vpaddd %ymm0,%ymm5,%ymm5 + vpand %ymm4,%ymm3,%ymm3 + + vpxor %ymm1,%ymm10,%ymm7 + + vpsrld $22,%ymm11,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $10,%ymm11,%ymm2 + vpxor %ymm3,%ymm12,%ymm10 + vpaddd %ymm5,%ymm14,%ymm14 + + vpxor %ymm1,%ymm7,%ymm7 + vpxor %ymm2,%ymm7,%ymm7 + + vpaddd %ymm5,%ymm10,%ymm10 + vpaddd %ymm7,%ymm10,%ymm10 + vmovd 56(%r12),%xmm5 + vmovd 56(%r8),%xmm0 + vmovd 56(%r13),%xmm1 + vmovd 56(%r9),%xmm2 + vpinsrd $1,56(%r14),%xmm5,%xmm5 + vpinsrd $1,56(%r10),%xmm0,%xmm0 + vpinsrd $1,56(%r15),%xmm1,%xmm1 + vpunpckldq %ymm1,%ymm5,%ymm5 + vpinsrd $1,56(%r11),%xmm2,%xmm2 + vpunpckldq %ymm2,%ymm0,%ymm0 + vinserti128 $1,%xmm0,%ymm5,%ymm5 + vpshufb %ymm6,%ymm5,%ymm5 + vpsrld $6,%ymm14,%ymm7 + vpslld $26,%ymm14,%ymm2 + vmovdqu %ymm5,448-256-128(%rbx) + vpaddd %ymm9,%ymm5,%ymm5 + + vpsrld $11,%ymm14,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $21,%ymm14,%ymm2 + vpaddd 64(%rbp),%ymm5,%ymm5 + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $25,%ymm14,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $7,%ymm14,%ymm2 + vpandn %ymm8,%ymm14,%ymm0 + vpand %ymm15,%ymm14,%ymm3 + + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $2,%ymm10,%ymm9 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $30,%ymm10,%ymm1 + vpxor %ymm3,%ymm0,%ymm0 + vpxor %ymm10,%ymm11,%ymm3 + + vpxor %ymm1,%ymm9,%ymm9 + vpaddd %ymm7,%ymm5,%ymm5 + + vpsrld $13,%ymm10,%ymm1 + + vpslld $19,%ymm10,%ymm2 + vpaddd %ymm0,%ymm5,%ymm5 + vpand %ymm3,%ymm4,%ymm4 + + vpxor %ymm1,%ymm9,%ymm7 + + vpsrld $22,%ymm10,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $10,%ymm10,%ymm2 + vpxor %ymm4,%ymm11,%ymm9 + vpaddd %ymm5,%ymm13,%ymm13 + + vpxor %ymm1,%ymm7,%ymm7 + vpxor %ymm2,%ymm7,%ymm7 + + vpaddd %ymm5,%ymm9,%ymm9 + vpaddd %ymm7,%ymm9,%ymm9 + vmovd 60(%r12),%xmm5 + leaq 64(%r12),%r12 + vmovd 60(%r8),%xmm0 + leaq 64(%r8),%r8 + vmovd 60(%r13),%xmm1 + leaq 64(%r13),%r13 + vmovd 60(%r9),%xmm2 + leaq 64(%r9),%r9 + vpinsrd $1,60(%r14),%xmm5,%xmm5 + leaq 64(%r14),%r14 + vpinsrd $1,60(%r10),%xmm0,%xmm0 + leaq 64(%r10),%r10 + vpinsrd $1,60(%r15),%xmm1,%xmm1 + leaq 64(%r15),%r15 + vpunpckldq %ymm1,%ymm5,%ymm5 + vpinsrd $1,60(%r11),%xmm2,%xmm2 + leaq 64(%r11),%r11 + vpunpckldq %ymm2,%ymm0,%ymm0 + vinserti128 $1,%xmm0,%ymm5,%ymm5 + vpshufb %ymm6,%ymm5,%ymm5 + vpsrld $6,%ymm13,%ymm7 + vpslld $26,%ymm13,%ymm2 + vmovdqu %ymm5,480-256-128(%rbx) + vpaddd %ymm8,%ymm5,%ymm5 + + vpsrld $11,%ymm13,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $21,%ymm13,%ymm2 + vpaddd 96(%rbp),%ymm5,%ymm5 + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $25,%ymm13,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + prefetcht0 63(%r12) + vpslld $7,%ymm13,%ymm2 + vpandn %ymm15,%ymm13,%ymm0 + vpand %ymm14,%ymm13,%ymm4 + prefetcht0 63(%r13) + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $2,%ymm9,%ymm8 + vpxor %ymm2,%ymm7,%ymm7 + prefetcht0 63(%r14) + vpslld $30,%ymm9,%ymm1 + vpxor %ymm4,%ymm0,%ymm0 + vpxor %ymm9,%ymm10,%ymm4 + prefetcht0 63(%r15) + vpxor %ymm1,%ymm8,%ymm8 + vpaddd %ymm7,%ymm5,%ymm5 + + vpsrld $13,%ymm9,%ymm1 + prefetcht0 63(%r8) + vpslld $19,%ymm9,%ymm2 + vpaddd %ymm0,%ymm5,%ymm5 + vpand %ymm4,%ymm3,%ymm3 + prefetcht0 63(%r9) + vpxor %ymm1,%ymm8,%ymm7 + + vpsrld $22,%ymm9,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + prefetcht0 63(%r10) + vpslld $10,%ymm9,%ymm2 + vpxor %ymm3,%ymm10,%ymm8 + vpaddd %ymm5,%ymm12,%ymm12 + prefetcht0 63(%r11) + vpxor %ymm1,%ymm7,%ymm7 + vpxor %ymm2,%ymm7,%ymm7 + + vpaddd %ymm5,%ymm8,%ymm8 + vpaddd %ymm7,%ymm8,%ymm8 + addq $256,%rbp + vmovdqu 0-128(%rax),%ymm5 + movl $3,%ecx + jmp .Loop_16_xx_avx2 +.align 32 +.Loop_16_xx_avx2: + vmovdqu 32-128(%rax),%ymm6 + vpaddd 288-256-128(%rbx),%ymm5,%ymm5 + + vpsrld $3,%ymm6,%ymm7 + vpsrld $7,%ymm6,%ymm1 + vpslld $25,%ymm6,%ymm2 + vpxor %ymm1,%ymm7,%ymm7 + vpsrld $18,%ymm6,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $14,%ymm6,%ymm2 + vmovdqu 448-256-128(%rbx),%ymm0 + vpsrld $10,%ymm0,%ymm3 + + vpxor %ymm1,%ymm7,%ymm7 + vpsrld $17,%ymm0,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $15,%ymm0,%ymm2 + vpaddd %ymm7,%ymm5,%ymm5 + vpxor %ymm1,%ymm3,%ymm7 + vpsrld $19,%ymm0,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $13,%ymm0,%ymm2 + vpxor %ymm1,%ymm7,%ymm7 + vpxor %ymm2,%ymm7,%ymm7 + vpaddd %ymm7,%ymm5,%ymm5 + vpsrld $6,%ymm12,%ymm7 + vpslld $26,%ymm12,%ymm2 + vmovdqu %ymm5,0-128(%rax) + vpaddd %ymm15,%ymm5,%ymm5 + + vpsrld $11,%ymm12,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $21,%ymm12,%ymm2 + vpaddd -128(%rbp),%ymm5,%ymm5 + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $25,%ymm12,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $7,%ymm12,%ymm2 + vpandn %ymm14,%ymm12,%ymm0 + vpand %ymm13,%ymm12,%ymm3 + + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $2,%ymm8,%ymm15 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $30,%ymm8,%ymm1 + vpxor %ymm3,%ymm0,%ymm0 + vpxor %ymm8,%ymm9,%ymm3 + + vpxor %ymm1,%ymm15,%ymm15 + vpaddd %ymm7,%ymm5,%ymm5 + + vpsrld $13,%ymm8,%ymm1 + + vpslld $19,%ymm8,%ymm2 + vpaddd %ymm0,%ymm5,%ymm5 + vpand %ymm3,%ymm4,%ymm4 + + vpxor %ymm1,%ymm15,%ymm7 + + vpsrld $22,%ymm8,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $10,%ymm8,%ymm2 + vpxor %ymm4,%ymm9,%ymm15 + vpaddd %ymm5,%ymm11,%ymm11 + + vpxor %ymm1,%ymm7,%ymm7 + vpxor %ymm2,%ymm7,%ymm7 + + vpaddd %ymm5,%ymm15,%ymm15 + vpaddd %ymm7,%ymm15,%ymm15 + vmovdqu 64-128(%rax),%ymm5 + vpaddd 320-256-128(%rbx),%ymm6,%ymm6 + + vpsrld $3,%ymm5,%ymm7 + vpsrld $7,%ymm5,%ymm1 + vpslld $25,%ymm5,%ymm2 + vpxor %ymm1,%ymm7,%ymm7 + vpsrld $18,%ymm5,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $14,%ymm5,%ymm2 + vmovdqu 480-256-128(%rbx),%ymm0 + vpsrld $10,%ymm0,%ymm4 + + vpxor %ymm1,%ymm7,%ymm7 + vpsrld $17,%ymm0,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $15,%ymm0,%ymm2 + vpaddd %ymm7,%ymm6,%ymm6 + vpxor %ymm1,%ymm4,%ymm7 + vpsrld $19,%ymm0,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $13,%ymm0,%ymm2 + vpxor %ymm1,%ymm7,%ymm7 + vpxor %ymm2,%ymm7,%ymm7 + vpaddd %ymm7,%ymm6,%ymm6 + vpsrld $6,%ymm11,%ymm7 + vpslld $26,%ymm11,%ymm2 + vmovdqu %ymm6,32-128(%rax) + vpaddd %ymm14,%ymm6,%ymm6 + + vpsrld $11,%ymm11,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $21,%ymm11,%ymm2 + vpaddd -96(%rbp),%ymm6,%ymm6 + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $25,%ymm11,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $7,%ymm11,%ymm2 + vpandn %ymm13,%ymm11,%ymm0 + vpand %ymm12,%ymm11,%ymm4 + + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $2,%ymm15,%ymm14 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $30,%ymm15,%ymm1 + vpxor %ymm4,%ymm0,%ymm0 + vpxor %ymm15,%ymm8,%ymm4 + + vpxor %ymm1,%ymm14,%ymm14 + vpaddd %ymm7,%ymm6,%ymm6 + + vpsrld $13,%ymm15,%ymm1 + + vpslld $19,%ymm15,%ymm2 + vpaddd %ymm0,%ymm6,%ymm6 + vpand %ymm4,%ymm3,%ymm3 + + vpxor %ymm1,%ymm14,%ymm7 + + vpsrld $22,%ymm15,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $10,%ymm15,%ymm2 + vpxor %ymm3,%ymm8,%ymm14 + vpaddd %ymm6,%ymm10,%ymm10 + + vpxor %ymm1,%ymm7,%ymm7 + vpxor %ymm2,%ymm7,%ymm7 + + vpaddd %ymm6,%ymm14,%ymm14 + vpaddd %ymm7,%ymm14,%ymm14 + vmovdqu 96-128(%rax),%ymm6 + vpaddd 352-256-128(%rbx),%ymm5,%ymm5 + + vpsrld $3,%ymm6,%ymm7 + vpsrld $7,%ymm6,%ymm1 + vpslld $25,%ymm6,%ymm2 + vpxor %ymm1,%ymm7,%ymm7 + vpsrld $18,%ymm6,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $14,%ymm6,%ymm2 + vmovdqu 0-128(%rax),%ymm0 + vpsrld $10,%ymm0,%ymm3 + + vpxor %ymm1,%ymm7,%ymm7 + vpsrld $17,%ymm0,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $15,%ymm0,%ymm2 + vpaddd %ymm7,%ymm5,%ymm5 + vpxor %ymm1,%ymm3,%ymm7 + vpsrld $19,%ymm0,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $13,%ymm0,%ymm2 + vpxor %ymm1,%ymm7,%ymm7 + vpxor %ymm2,%ymm7,%ymm7 + vpaddd %ymm7,%ymm5,%ymm5 + vpsrld $6,%ymm10,%ymm7 + vpslld $26,%ymm10,%ymm2 + vmovdqu %ymm5,64-128(%rax) + vpaddd %ymm13,%ymm5,%ymm5 + + vpsrld $11,%ymm10,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $21,%ymm10,%ymm2 + vpaddd -64(%rbp),%ymm5,%ymm5 + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $25,%ymm10,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $7,%ymm10,%ymm2 + vpandn %ymm12,%ymm10,%ymm0 + vpand %ymm11,%ymm10,%ymm3 + + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $2,%ymm14,%ymm13 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $30,%ymm14,%ymm1 + vpxor %ymm3,%ymm0,%ymm0 + vpxor %ymm14,%ymm15,%ymm3 + + vpxor %ymm1,%ymm13,%ymm13 + vpaddd %ymm7,%ymm5,%ymm5 + + vpsrld $13,%ymm14,%ymm1 + + vpslld $19,%ymm14,%ymm2 + vpaddd %ymm0,%ymm5,%ymm5 + vpand %ymm3,%ymm4,%ymm4 + + vpxor %ymm1,%ymm13,%ymm7 + + vpsrld $22,%ymm14,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $10,%ymm14,%ymm2 + vpxor %ymm4,%ymm15,%ymm13 + vpaddd %ymm5,%ymm9,%ymm9 + + vpxor %ymm1,%ymm7,%ymm7 + vpxor %ymm2,%ymm7,%ymm7 + + vpaddd %ymm5,%ymm13,%ymm13 + vpaddd %ymm7,%ymm13,%ymm13 + vmovdqu 128-128(%rax),%ymm5 + vpaddd 384-256-128(%rbx),%ymm6,%ymm6 + + vpsrld $3,%ymm5,%ymm7 + vpsrld $7,%ymm5,%ymm1 + vpslld $25,%ymm5,%ymm2 + vpxor %ymm1,%ymm7,%ymm7 + vpsrld $18,%ymm5,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $14,%ymm5,%ymm2 + vmovdqu 32-128(%rax),%ymm0 + vpsrld $10,%ymm0,%ymm4 + + vpxor %ymm1,%ymm7,%ymm7 + vpsrld $17,%ymm0,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $15,%ymm0,%ymm2 + vpaddd %ymm7,%ymm6,%ymm6 + vpxor %ymm1,%ymm4,%ymm7 + vpsrld $19,%ymm0,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $13,%ymm0,%ymm2 + vpxor %ymm1,%ymm7,%ymm7 + vpxor %ymm2,%ymm7,%ymm7 + vpaddd %ymm7,%ymm6,%ymm6 + vpsrld $6,%ymm9,%ymm7 + vpslld $26,%ymm9,%ymm2 + vmovdqu %ymm6,96-128(%rax) + vpaddd %ymm12,%ymm6,%ymm6 + + vpsrld $11,%ymm9,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $21,%ymm9,%ymm2 + vpaddd -32(%rbp),%ymm6,%ymm6 + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $25,%ymm9,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $7,%ymm9,%ymm2 + vpandn %ymm11,%ymm9,%ymm0 + vpand %ymm10,%ymm9,%ymm4 + + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $2,%ymm13,%ymm12 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $30,%ymm13,%ymm1 + vpxor %ymm4,%ymm0,%ymm0 + vpxor %ymm13,%ymm14,%ymm4 + + vpxor %ymm1,%ymm12,%ymm12 + vpaddd %ymm7,%ymm6,%ymm6 + + vpsrld $13,%ymm13,%ymm1 + + vpslld $19,%ymm13,%ymm2 + vpaddd %ymm0,%ymm6,%ymm6 + vpand %ymm4,%ymm3,%ymm3 + + vpxor %ymm1,%ymm12,%ymm7 + + vpsrld $22,%ymm13,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $10,%ymm13,%ymm2 + vpxor %ymm3,%ymm14,%ymm12 + vpaddd %ymm6,%ymm8,%ymm8 + + vpxor %ymm1,%ymm7,%ymm7 + vpxor %ymm2,%ymm7,%ymm7 + + vpaddd %ymm6,%ymm12,%ymm12 + vpaddd %ymm7,%ymm12,%ymm12 + vmovdqu 160-128(%rax),%ymm6 + vpaddd 416-256-128(%rbx),%ymm5,%ymm5 + + vpsrld $3,%ymm6,%ymm7 + vpsrld $7,%ymm6,%ymm1 + vpslld $25,%ymm6,%ymm2 + vpxor %ymm1,%ymm7,%ymm7 + vpsrld $18,%ymm6,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $14,%ymm6,%ymm2 + vmovdqu 64-128(%rax),%ymm0 + vpsrld $10,%ymm0,%ymm3 + + vpxor %ymm1,%ymm7,%ymm7 + vpsrld $17,%ymm0,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $15,%ymm0,%ymm2 + vpaddd %ymm7,%ymm5,%ymm5 + vpxor %ymm1,%ymm3,%ymm7 + vpsrld $19,%ymm0,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $13,%ymm0,%ymm2 + vpxor %ymm1,%ymm7,%ymm7 + vpxor %ymm2,%ymm7,%ymm7 + vpaddd %ymm7,%ymm5,%ymm5 + vpsrld $6,%ymm8,%ymm7 + vpslld $26,%ymm8,%ymm2 + vmovdqu %ymm5,128-128(%rax) + vpaddd %ymm11,%ymm5,%ymm5 + + vpsrld $11,%ymm8,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $21,%ymm8,%ymm2 + vpaddd 0(%rbp),%ymm5,%ymm5 + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $25,%ymm8,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $7,%ymm8,%ymm2 + vpandn %ymm10,%ymm8,%ymm0 + vpand %ymm9,%ymm8,%ymm3 + + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $2,%ymm12,%ymm11 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $30,%ymm12,%ymm1 + vpxor %ymm3,%ymm0,%ymm0 + vpxor %ymm12,%ymm13,%ymm3 + + vpxor %ymm1,%ymm11,%ymm11 + vpaddd %ymm7,%ymm5,%ymm5 + + vpsrld $13,%ymm12,%ymm1 + + vpslld $19,%ymm12,%ymm2 + vpaddd %ymm0,%ymm5,%ymm5 + vpand %ymm3,%ymm4,%ymm4 + + vpxor %ymm1,%ymm11,%ymm7 + + vpsrld $22,%ymm12,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $10,%ymm12,%ymm2 + vpxor %ymm4,%ymm13,%ymm11 + vpaddd %ymm5,%ymm15,%ymm15 + + vpxor %ymm1,%ymm7,%ymm7 + vpxor %ymm2,%ymm7,%ymm7 + + vpaddd %ymm5,%ymm11,%ymm11 + vpaddd %ymm7,%ymm11,%ymm11 + vmovdqu 192-128(%rax),%ymm5 + vpaddd 448-256-128(%rbx),%ymm6,%ymm6 + + vpsrld $3,%ymm5,%ymm7 + vpsrld $7,%ymm5,%ymm1 + vpslld $25,%ymm5,%ymm2 + vpxor %ymm1,%ymm7,%ymm7 + vpsrld $18,%ymm5,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $14,%ymm5,%ymm2 + vmovdqu 96-128(%rax),%ymm0 + vpsrld $10,%ymm0,%ymm4 + + vpxor %ymm1,%ymm7,%ymm7 + vpsrld $17,%ymm0,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $15,%ymm0,%ymm2 + vpaddd %ymm7,%ymm6,%ymm6 + vpxor %ymm1,%ymm4,%ymm7 + vpsrld $19,%ymm0,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $13,%ymm0,%ymm2 + vpxor %ymm1,%ymm7,%ymm7 + vpxor %ymm2,%ymm7,%ymm7 + vpaddd %ymm7,%ymm6,%ymm6 + vpsrld $6,%ymm15,%ymm7 + vpslld $26,%ymm15,%ymm2 + vmovdqu %ymm6,160-128(%rax) + vpaddd %ymm10,%ymm6,%ymm6 + + vpsrld $11,%ymm15,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $21,%ymm15,%ymm2 + vpaddd 32(%rbp),%ymm6,%ymm6 + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $25,%ymm15,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $7,%ymm15,%ymm2 + vpandn %ymm9,%ymm15,%ymm0 + vpand %ymm8,%ymm15,%ymm4 + + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $2,%ymm11,%ymm10 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $30,%ymm11,%ymm1 + vpxor %ymm4,%ymm0,%ymm0 + vpxor %ymm11,%ymm12,%ymm4 + + vpxor %ymm1,%ymm10,%ymm10 + vpaddd %ymm7,%ymm6,%ymm6 + + vpsrld $13,%ymm11,%ymm1 + + vpslld $19,%ymm11,%ymm2 + vpaddd %ymm0,%ymm6,%ymm6 + vpand %ymm4,%ymm3,%ymm3 + + vpxor %ymm1,%ymm10,%ymm7 + + vpsrld $22,%ymm11,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $10,%ymm11,%ymm2 + vpxor %ymm3,%ymm12,%ymm10 + vpaddd %ymm6,%ymm14,%ymm14 + + vpxor %ymm1,%ymm7,%ymm7 + vpxor %ymm2,%ymm7,%ymm7 + + vpaddd %ymm6,%ymm10,%ymm10 + vpaddd %ymm7,%ymm10,%ymm10 + vmovdqu 224-128(%rax),%ymm6 + vpaddd 480-256-128(%rbx),%ymm5,%ymm5 + + vpsrld $3,%ymm6,%ymm7 + vpsrld $7,%ymm6,%ymm1 + vpslld $25,%ymm6,%ymm2 + vpxor %ymm1,%ymm7,%ymm7 + vpsrld $18,%ymm6,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $14,%ymm6,%ymm2 + vmovdqu 128-128(%rax),%ymm0 + vpsrld $10,%ymm0,%ymm3 + + vpxor %ymm1,%ymm7,%ymm7 + vpsrld $17,%ymm0,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $15,%ymm0,%ymm2 + vpaddd %ymm7,%ymm5,%ymm5 + vpxor %ymm1,%ymm3,%ymm7 + vpsrld $19,%ymm0,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $13,%ymm0,%ymm2 + vpxor %ymm1,%ymm7,%ymm7 + vpxor %ymm2,%ymm7,%ymm7 + vpaddd %ymm7,%ymm5,%ymm5 + vpsrld $6,%ymm14,%ymm7 + vpslld $26,%ymm14,%ymm2 + vmovdqu %ymm5,192-128(%rax) + vpaddd %ymm9,%ymm5,%ymm5 + + vpsrld $11,%ymm14,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $21,%ymm14,%ymm2 + vpaddd 64(%rbp),%ymm5,%ymm5 + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $25,%ymm14,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $7,%ymm14,%ymm2 + vpandn %ymm8,%ymm14,%ymm0 + vpand %ymm15,%ymm14,%ymm3 + + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $2,%ymm10,%ymm9 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $30,%ymm10,%ymm1 + vpxor %ymm3,%ymm0,%ymm0 + vpxor %ymm10,%ymm11,%ymm3 + + vpxor %ymm1,%ymm9,%ymm9 + vpaddd %ymm7,%ymm5,%ymm5 + + vpsrld $13,%ymm10,%ymm1 + + vpslld $19,%ymm10,%ymm2 + vpaddd %ymm0,%ymm5,%ymm5 + vpand %ymm3,%ymm4,%ymm4 + + vpxor %ymm1,%ymm9,%ymm7 + + vpsrld $22,%ymm10,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $10,%ymm10,%ymm2 + vpxor %ymm4,%ymm11,%ymm9 + vpaddd %ymm5,%ymm13,%ymm13 + + vpxor %ymm1,%ymm7,%ymm7 + vpxor %ymm2,%ymm7,%ymm7 + + vpaddd %ymm5,%ymm9,%ymm9 + vpaddd %ymm7,%ymm9,%ymm9 + vmovdqu 256-256-128(%rbx),%ymm5 + vpaddd 0-128(%rax),%ymm6,%ymm6 + + vpsrld $3,%ymm5,%ymm7 + vpsrld $7,%ymm5,%ymm1 + vpslld $25,%ymm5,%ymm2 + vpxor %ymm1,%ymm7,%ymm7 + vpsrld $18,%ymm5,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $14,%ymm5,%ymm2 + vmovdqu 160-128(%rax),%ymm0 + vpsrld $10,%ymm0,%ymm4 + + vpxor %ymm1,%ymm7,%ymm7 + vpsrld $17,%ymm0,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $15,%ymm0,%ymm2 + vpaddd %ymm7,%ymm6,%ymm6 + vpxor %ymm1,%ymm4,%ymm7 + vpsrld $19,%ymm0,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $13,%ymm0,%ymm2 + vpxor %ymm1,%ymm7,%ymm7 + vpxor %ymm2,%ymm7,%ymm7 + vpaddd %ymm7,%ymm6,%ymm6 + vpsrld $6,%ymm13,%ymm7 + vpslld $26,%ymm13,%ymm2 + vmovdqu %ymm6,224-128(%rax) + vpaddd %ymm8,%ymm6,%ymm6 + + vpsrld $11,%ymm13,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $21,%ymm13,%ymm2 + vpaddd 96(%rbp),%ymm6,%ymm6 + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $25,%ymm13,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $7,%ymm13,%ymm2 + vpandn %ymm15,%ymm13,%ymm0 + vpand %ymm14,%ymm13,%ymm4 + + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $2,%ymm9,%ymm8 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $30,%ymm9,%ymm1 + vpxor %ymm4,%ymm0,%ymm0 + vpxor %ymm9,%ymm10,%ymm4 + + vpxor %ymm1,%ymm8,%ymm8 + vpaddd %ymm7,%ymm6,%ymm6 + + vpsrld $13,%ymm9,%ymm1 + + vpslld $19,%ymm9,%ymm2 + vpaddd %ymm0,%ymm6,%ymm6 + vpand %ymm4,%ymm3,%ymm3 + + vpxor %ymm1,%ymm8,%ymm7 + + vpsrld $22,%ymm9,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $10,%ymm9,%ymm2 + vpxor %ymm3,%ymm10,%ymm8 + vpaddd %ymm6,%ymm12,%ymm12 + + vpxor %ymm1,%ymm7,%ymm7 + vpxor %ymm2,%ymm7,%ymm7 + + vpaddd %ymm6,%ymm8,%ymm8 + vpaddd %ymm7,%ymm8,%ymm8 + addq $256,%rbp + vmovdqu 288-256-128(%rbx),%ymm6 + vpaddd 32-128(%rax),%ymm5,%ymm5 + + vpsrld $3,%ymm6,%ymm7 + vpsrld $7,%ymm6,%ymm1 + vpslld $25,%ymm6,%ymm2 + vpxor %ymm1,%ymm7,%ymm7 + vpsrld $18,%ymm6,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $14,%ymm6,%ymm2 + vmovdqu 192-128(%rax),%ymm0 + vpsrld $10,%ymm0,%ymm3 + + vpxor %ymm1,%ymm7,%ymm7 + vpsrld $17,%ymm0,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $15,%ymm0,%ymm2 + vpaddd %ymm7,%ymm5,%ymm5 + vpxor %ymm1,%ymm3,%ymm7 + vpsrld $19,%ymm0,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $13,%ymm0,%ymm2 + vpxor %ymm1,%ymm7,%ymm7 + vpxor %ymm2,%ymm7,%ymm7 + vpaddd %ymm7,%ymm5,%ymm5 + vpsrld $6,%ymm12,%ymm7 + vpslld $26,%ymm12,%ymm2 + vmovdqu %ymm5,256-256-128(%rbx) + vpaddd %ymm15,%ymm5,%ymm5 + + vpsrld $11,%ymm12,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $21,%ymm12,%ymm2 + vpaddd -128(%rbp),%ymm5,%ymm5 + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $25,%ymm12,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $7,%ymm12,%ymm2 + vpandn %ymm14,%ymm12,%ymm0 + vpand %ymm13,%ymm12,%ymm3 + + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $2,%ymm8,%ymm15 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $30,%ymm8,%ymm1 + vpxor %ymm3,%ymm0,%ymm0 + vpxor %ymm8,%ymm9,%ymm3 + + vpxor %ymm1,%ymm15,%ymm15 + vpaddd %ymm7,%ymm5,%ymm5 + + vpsrld $13,%ymm8,%ymm1 + + vpslld $19,%ymm8,%ymm2 + vpaddd %ymm0,%ymm5,%ymm5 + vpand %ymm3,%ymm4,%ymm4 + + vpxor %ymm1,%ymm15,%ymm7 + + vpsrld $22,%ymm8,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $10,%ymm8,%ymm2 + vpxor %ymm4,%ymm9,%ymm15 + vpaddd %ymm5,%ymm11,%ymm11 + + vpxor %ymm1,%ymm7,%ymm7 + vpxor %ymm2,%ymm7,%ymm7 + + vpaddd %ymm5,%ymm15,%ymm15 + vpaddd %ymm7,%ymm15,%ymm15 + vmovdqu 320-256-128(%rbx),%ymm5 + vpaddd 64-128(%rax),%ymm6,%ymm6 + + vpsrld $3,%ymm5,%ymm7 + vpsrld $7,%ymm5,%ymm1 + vpslld $25,%ymm5,%ymm2 + vpxor %ymm1,%ymm7,%ymm7 + vpsrld $18,%ymm5,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $14,%ymm5,%ymm2 + vmovdqu 224-128(%rax),%ymm0 + vpsrld $10,%ymm0,%ymm4 + + vpxor %ymm1,%ymm7,%ymm7 + vpsrld $17,%ymm0,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $15,%ymm0,%ymm2 + vpaddd %ymm7,%ymm6,%ymm6 + vpxor %ymm1,%ymm4,%ymm7 + vpsrld $19,%ymm0,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $13,%ymm0,%ymm2 + vpxor %ymm1,%ymm7,%ymm7 + vpxor %ymm2,%ymm7,%ymm7 + vpaddd %ymm7,%ymm6,%ymm6 + vpsrld $6,%ymm11,%ymm7 + vpslld $26,%ymm11,%ymm2 + vmovdqu %ymm6,288-256-128(%rbx) + vpaddd %ymm14,%ymm6,%ymm6 + + vpsrld $11,%ymm11,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $21,%ymm11,%ymm2 + vpaddd -96(%rbp),%ymm6,%ymm6 + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $25,%ymm11,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $7,%ymm11,%ymm2 + vpandn %ymm13,%ymm11,%ymm0 + vpand %ymm12,%ymm11,%ymm4 + + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $2,%ymm15,%ymm14 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $30,%ymm15,%ymm1 + vpxor %ymm4,%ymm0,%ymm0 + vpxor %ymm15,%ymm8,%ymm4 + + vpxor %ymm1,%ymm14,%ymm14 + vpaddd %ymm7,%ymm6,%ymm6 + + vpsrld $13,%ymm15,%ymm1 + + vpslld $19,%ymm15,%ymm2 + vpaddd %ymm0,%ymm6,%ymm6 + vpand %ymm4,%ymm3,%ymm3 + + vpxor %ymm1,%ymm14,%ymm7 + + vpsrld $22,%ymm15,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $10,%ymm15,%ymm2 + vpxor %ymm3,%ymm8,%ymm14 + vpaddd %ymm6,%ymm10,%ymm10 + + vpxor %ymm1,%ymm7,%ymm7 + vpxor %ymm2,%ymm7,%ymm7 + + vpaddd %ymm6,%ymm14,%ymm14 + vpaddd %ymm7,%ymm14,%ymm14 + vmovdqu 352-256-128(%rbx),%ymm6 + vpaddd 96-128(%rax),%ymm5,%ymm5 + + vpsrld $3,%ymm6,%ymm7 + vpsrld $7,%ymm6,%ymm1 + vpslld $25,%ymm6,%ymm2 + vpxor %ymm1,%ymm7,%ymm7 + vpsrld $18,%ymm6,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $14,%ymm6,%ymm2 + vmovdqu 256-256-128(%rbx),%ymm0 + vpsrld $10,%ymm0,%ymm3 + + vpxor %ymm1,%ymm7,%ymm7 + vpsrld $17,%ymm0,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $15,%ymm0,%ymm2 + vpaddd %ymm7,%ymm5,%ymm5 + vpxor %ymm1,%ymm3,%ymm7 + vpsrld $19,%ymm0,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $13,%ymm0,%ymm2 + vpxor %ymm1,%ymm7,%ymm7 + vpxor %ymm2,%ymm7,%ymm7 + vpaddd %ymm7,%ymm5,%ymm5 + vpsrld $6,%ymm10,%ymm7 + vpslld $26,%ymm10,%ymm2 + vmovdqu %ymm5,320-256-128(%rbx) + vpaddd %ymm13,%ymm5,%ymm5 + + vpsrld $11,%ymm10,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $21,%ymm10,%ymm2 + vpaddd -64(%rbp),%ymm5,%ymm5 + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $25,%ymm10,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $7,%ymm10,%ymm2 + vpandn %ymm12,%ymm10,%ymm0 + vpand %ymm11,%ymm10,%ymm3 + + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $2,%ymm14,%ymm13 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $30,%ymm14,%ymm1 + vpxor %ymm3,%ymm0,%ymm0 + vpxor %ymm14,%ymm15,%ymm3 + + vpxor %ymm1,%ymm13,%ymm13 + vpaddd %ymm7,%ymm5,%ymm5 + + vpsrld $13,%ymm14,%ymm1 + + vpslld $19,%ymm14,%ymm2 + vpaddd %ymm0,%ymm5,%ymm5 + vpand %ymm3,%ymm4,%ymm4 + + vpxor %ymm1,%ymm13,%ymm7 + + vpsrld $22,%ymm14,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $10,%ymm14,%ymm2 + vpxor %ymm4,%ymm15,%ymm13 + vpaddd %ymm5,%ymm9,%ymm9 + + vpxor %ymm1,%ymm7,%ymm7 + vpxor %ymm2,%ymm7,%ymm7 + + vpaddd %ymm5,%ymm13,%ymm13 + vpaddd %ymm7,%ymm13,%ymm13 + vmovdqu 384-256-128(%rbx),%ymm5 + vpaddd 128-128(%rax),%ymm6,%ymm6 + + vpsrld $3,%ymm5,%ymm7 + vpsrld $7,%ymm5,%ymm1 + vpslld $25,%ymm5,%ymm2 + vpxor %ymm1,%ymm7,%ymm7 + vpsrld $18,%ymm5,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $14,%ymm5,%ymm2 + vmovdqu 288-256-128(%rbx),%ymm0 + vpsrld $10,%ymm0,%ymm4 + + vpxor %ymm1,%ymm7,%ymm7 + vpsrld $17,%ymm0,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $15,%ymm0,%ymm2 + vpaddd %ymm7,%ymm6,%ymm6 + vpxor %ymm1,%ymm4,%ymm7 + vpsrld $19,%ymm0,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $13,%ymm0,%ymm2 + vpxor %ymm1,%ymm7,%ymm7 + vpxor %ymm2,%ymm7,%ymm7 + vpaddd %ymm7,%ymm6,%ymm6 + vpsrld $6,%ymm9,%ymm7 + vpslld $26,%ymm9,%ymm2 + vmovdqu %ymm6,352-256-128(%rbx) + vpaddd %ymm12,%ymm6,%ymm6 + + vpsrld $11,%ymm9,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $21,%ymm9,%ymm2 + vpaddd -32(%rbp),%ymm6,%ymm6 + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $25,%ymm9,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $7,%ymm9,%ymm2 + vpandn %ymm11,%ymm9,%ymm0 + vpand %ymm10,%ymm9,%ymm4 + + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $2,%ymm13,%ymm12 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $30,%ymm13,%ymm1 + vpxor %ymm4,%ymm0,%ymm0 + vpxor %ymm13,%ymm14,%ymm4 + + vpxor %ymm1,%ymm12,%ymm12 + vpaddd %ymm7,%ymm6,%ymm6 + + vpsrld $13,%ymm13,%ymm1 + + vpslld $19,%ymm13,%ymm2 + vpaddd %ymm0,%ymm6,%ymm6 + vpand %ymm4,%ymm3,%ymm3 + + vpxor %ymm1,%ymm12,%ymm7 + + vpsrld $22,%ymm13,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $10,%ymm13,%ymm2 + vpxor %ymm3,%ymm14,%ymm12 + vpaddd %ymm6,%ymm8,%ymm8 + + vpxor %ymm1,%ymm7,%ymm7 + vpxor %ymm2,%ymm7,%ymm7 + + vpaddd %ymm6,%ymm12,%ymm12 + vpaddd %ymm7,%ymm12,%ymm12 + vmovdqu 416-256-128(%rbx),%ymm6 + vpaddd 160-128(%rax),%ymm5,%ymm5 + + vpsrld $3,%ymm6,%ymm7 + vpsrld $7,%ymm6,%ymm1 + vpslld $25,%ymm6,%ymm2 + vpxor %ymm1,%ymm7,%ymm7 + vpsrld $18,%ymm6,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $14,%ymm6,%ymm2 + vmovdqu 320-256-128(%rbx),%ymm0 + vpsrld $10,%ymm0,%ymm3 + + vpxor %ymm1,%ymm7,%ymm7 + vpsrld $17,%ymm0,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $15,%ymm0,%ymm2 + vpaddd %ymm7,%ymm5,%ymm5 + vpxor %ymm1,%ymm3,%ymm7 + vpsrld $19,%ymm0,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $13,%ymm0,%ymm2 + vpxor %ymm1,%ymm7,%ymm7 + vpxor %ymm2,%ymm7,%ymm7 + vpaddd %ymm7,%ymm5,%ymm5 + vpsrld $6,%ymm8,%ymm7 + vpslld $26,%ymm8,%ymm2 + vmovdqu %ymm5,384-256-128(%rbx) + vpaddd %ymm11,%ymm5,%ymm5 + + vpsrld $11,%ymm8,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $21,%ymm8,%ymm2 + vpaddd 0(%rbp),%ymm5,%ymm5 + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $25,%ymm8,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $7,%ymm8,%ymm2 + vpandn %ymm10,%ymm8,%ymm0 + vpand %ymm9,%ymm8,%ymm3 + + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $2,%ymm12,%ymm11 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $30,%ymm12,%ymm1 + vpxor %ymm3,%ymm0,%ymm0 + vpxor %ymm12,%ymm13,%ymm3 + + vpxor %ymm1,%ymm11,%ymm11 + vpaddd %ymm7,%ymm5,%ymm5 + + vpsrld $13,%ymm12,%ymm1 + + vpslld $19,%ymm12,%ymm2 + vpaddd %ymm0,%ymm5,%ymm5 + vpand %ymm3,%ymm4,%ymm4 + + vpxor %ymm1,%ymm11,%ymm7 + + vpsrld $22,%ymm12,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $10,%ymm12,%ymm2 + vpxor %ymm4,%ymm13,%ymm11 + vpaddd %ymm5,%ymm15,%ymm15 + + vpxor %ymm1,%ymm7,%ymm7 + vpxor %ymm2,%ymm7,%ymm7 + + vpaddd %ymm5,%ymm11,%ymm11 + vpaddd %ymm7,%ymm11,%ymm11 + vmovdqu 448-256-128(%rbx),%ymm5 + vpaddd 192-128(%rax),%ymm6,%ymm6 + + vpsrld $3,%ymm5,%ymm7 + vpsrld $7,%ymm5,%ymm1 + vpslld $25,%ymm5,%ymm2 + vpxor %ymm1,%ymm7,%ymm7 + vpsrld $18,%ymm5,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $14,%ymm5,%ymm2 + vmovdqu 352-256-128(%rbx),%ymm0 + vpsrld $10,%ymm0,%ymm4 + + vpxor %ymm1,%ymm7,%ymm7 + vpsrld $17,%ymm0,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $15,%ymm0,%ymm2 + vpaddd %ymm7,%ymm6,%ymm6 + vpxor %ymm1,%ymm4,%ymm7 + vpsrld $19,%ymm0,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $13,%ymm0,%ymm2 + vpxor %ymm1,%ymm7,%ymm7 + vpxor %ymm2,%ymm7,%ymm7 + vpaddd %ymm7,%ymm6,%ymm6 + vpsrld $6,%ymm15,%ymm7 + vpslld $26,%ymm15,%ymm2 + vmovdqu %ymm6,416-256-128(%rbx) + vpaddd %ymm10,%ymm6,%ymm6 + + vpsrld $11,%ymm15,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $21,%ymm15,%ymm2 + vpaddd 32(%rbp),%ymm6,%ymm6 + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $25,%ymm15,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $7,%ymm15,%ymm2 + vpandn %ymm9,%ymm15,%ymm0 + vpand %ymm8,%ymm15,%ymm4 + + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $2,%ymm11,%ymm10 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $30,%ymm11,%ymm1 + vpxor %ymm4,%ymm0,%ymm0 + vpxor %ymm11,%ymm12,%ymm4 + + vpxor %ymm1,%ymm10,%ymm10 + vpaddd %ymm7,%ymm6,%ymm6 + + vpsrld $13,%ymm11,%ymm1 + + vpslld $19,%ymm11,%ymm2 + vpaddd %ymm0,%ymm6,%ymm6 + vpand %ymm4,%ymm3,%ymm3 + + vpxor %ymm1,%ymm10,%ymm7 + + vpsrld $22,%ymm11,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $10,%ymm11,%ymm2 + vpxor %ymm3,%ymm12,%ymm10 + vpaddd %ymm6,%ymm14,%ymm14 + + vpxor %ymm1,%ymm7,%ymm7 + vpxor %ymm2,%ymm7,%ymm7 + + vpaddd %ymm6,%ymm10,%ymm10 + vpaddd %ymm7,%ymm10,%ymm10 + vmovdqu 480-256-128(%rbx),%ymm6 + vpaddd 224-128(%rax),%ymm5,%ymm5 + + vpsrld $3,%ymm6,%ymm7 + vpsrld $7,%ymm6,%ymm1 + vpslld $25,%ymm6,%ymm2 + vpxor %ymm1,%ymm7,%ymm7 + vpsrld $18,%ymm6,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $14,%ymm6,%ymm2 + vmovdqu 384-256-128(%rbx),%ymm0 + vpsrld $10,%ymm0,%ymm3 + + vpxor %ymm1,%ymm7,%ymm7 + vpsrld $17,%ymm0,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $15,%ymm0,%ymm2 + vpaddd %ymm7,%ymm5,%ymm5 + vpxor %ymm1,%ymm3,%ymm7 + vpsrld $19,%ymm0,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $13,%ymm0,%ymm2 + vpxor %ymm1,%ymm7,%ymm7 + vpxor %ymm2,%ymm7,%ymm7 + vpaddd %ymm7,%ymm5,%ymm5 + vpsrld $6,%ymm14,%ymm7 + vpslld $26,%ymm14,%ymm2 + vmovdqu %ymm5,448-256-128(%rbx) + vpaddd %ymm9,%ymm5,%ymm5 + + vpsrld $11,%ymm14,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $21,%ymm14,%ymm2 + vpaddd 64(%rbp),%ymm5,%ymm5 + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $25,%ymm14,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $7,%ymm14,%ymm2 + vpandn %ymm8,%ymm14,%ymm0 + vpand %ymm15,%ymm14,%ymm3 + + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $2,%ymm10,%ymm9 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $30,%ymm10,%ymm1 + vpxor %ymm3,%ymm0,%ymm0 + vpxor %ymm10,%ymm11,%ymm3 + + vpxor %ymm1,%ymm9,%ymm9 + vpaddd %ymm7,%ymm5,%ymm5 + + vpsrld $13,%ymm10,%ymm1 + + vpslld $19,%ymm10,%ymm2 + vpaddd %ymm0,%ymm5,%ymm5 + vpand %ymm3,%ymm4,%ymm4 + + vpxor %ymm1,%ymm9,%ymm7 + + vpsrld $22,%ymm10,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $10,%ymm10,%ymm2 + vpxor %ymm4,%ymm11,%ymm9 + vpaddd %ymm5,%ymm13,%ymm13 + + vpxor %ymm1,%ymm7,%ymm7 + vpxor %ymm2,%ymm7,%ymm7 + + vpaddd %ymm5,%ymm9,%ymm9 + vpaddd %ymm7,%ymm9,%ymm9 + vmovdqu 0-128(%rax),%ymm5 + vpaddd 256-256-128(%rbx),%ymm6,%ymm6 + + vpsrld $3,%ymm5,%ymm7 + vpsrld $7,%ymm5,%ymm1 + vpslld $25,%ymm5,%ymm2 + vpxor %ymm1,%ymm7,%ymm7 + vpsrld $18,%ymm5,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $14,%ymm5,%ymm2 + vmovdqu 416-256-128(%rbx),%ymm0 + vpsrld $10,%ymm0,%ymm4 + + vpxor %ymm1,%ymm7,%ymm7 + vpsrld $17,%ymm0,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $15,%ymm0,%ymm2 + vpaddd %ymm7,%ymm6,%ymm6 + vpxor %ymm1,%ymm4,%ymm7 + vpsrld $19,%ymm0,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $13,%ymm0,%ymm2 + vpxor %ymm1,%ymm7,%ymm7 + vpxor %ymm2,%ymm7,%ymm7 + vpaddd %ymm7,%ymm6,%ymm6 + vpsrld $6,%ymm13,%ymm7 + vpslld $26,%ymm13,%ymm2 + vmovdqu %ymm6,480-256-128(%rbx) + vpaddd %ymm8,%ymm6,%ymm6 + + vpsrld $11,%ymm13,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $21,%ymm13,%ymm2 + vpaddd 96(%rbp),%ymm6,%ymm6 + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $25,%ymm13,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $7,%ymm13,%ymm2 + vpandn %ymm15,%ymm13,%ymm0 + vpand %ymm14,%ymm13,%ymm4 + + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $2,%ymm9,%ymm8 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $30,%ymm9,%ymm1 + vpxor %ymm4,%ymm0,%ymm0 + vpxor %ymm9,%ymm10,%ymm4 + + vpxor %ymm1,%ymm8,%ymm8 + vpaddd %ymm7,%ymm6,%ymm6 + + vpsrld $13,%ymm9,%ymm1 + + vpslld $19,%ymm9,%ymm2 + vpaddd %ymm0,%ymm6,%ymm6 + vpand %ymm4,%ymm3,%ymm3 + + vpxor %ymm1,%ymm8,%ymm7 + + vpsrld $22,%ymm9,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $10,%ymm9,%ymm2 + vpxor %ymm3,%ymm10,%ymm8 + vpaddd %ymm6,%ymm12,%ymm12 + + vpxor %ymm1,%ymm7,%ymm7 + vpxor %ymm2,%ymm7,%ymm7 + + vpaddd %ymm6,%ymm8,%ymm8 + vpaddd %ymm7,%ymm8,%ymm8 + addq $256,%rbp + decl %ecx + jnz .Loop_16_xx_avx2 + + movl $1,%ecx + leaq 512(%rsp),%rbx + leaq K256+128(%rip),%rbp + cmpl 0(%rbx),%ecx + cmovgeq %rbp,%r12 + cmpl 4(%rbx),%ecx + cmovgeq %rbp,%r13 + cmpl 8(%rbx),%ecx + cmovgeq %rbp,%r14 + cmpl 12(%rbx),%ecx + cmovgeq %rbp,%r15 + cmpl 16(%rbx),%ecx + cmovgeq %rbp,%r8 + cmpl 20(%rbx),%ecx + cmovgeq %rbp,%r9 + cmpl 24(%rbx),%ecx + cmovgeq %rbp,%r10 + cmpl 28(%rbx),%ecx + cmovgeq %rbp,%r11 + vmovdqa (%rbx),%ymm7 + vpxor %ymm0,%ymm0,%ymm0 + vmovdqa %ymm7,%ymm6 + vpcmpgtd %ymm0,%ymm6,%ymm6 + vpaddd %ymm6,%ymm7,%ymm7 + + vmovdqu 0-128(%rdi),%ymm0 + vpand %ymm6,%ymm8,%ymm8 + vmovdqu 32-128(%rdi),%ymm1 + vpand %ymm6,%ymm9,%ymm9 + vmovdqu 64-128(%rdi),%ymm2 + vpand %ymm6,%ymm10,%ymm10 + vmovdqu 96-128(%rdi),%ymm5 + vpand %ymm6,%ymm11,%ymm11 + vpaddd %ymm0,%ymm8,%ymm8 + vmovdqu 128-128(%rdi),%ymm0 + vpand %ymm6,%ymm12,%ymm12 + vpaddd %ymm1,%ymm9,%ymm9 + vmovdqu 160-128(%rdi),%ymm1 + vpand %ymm6,%ymm13,%ymm13 + vpaddd %ymm2,%ymm10,%ymm10 + vmovdqu 192-128(%rdi),%ymm2 + vpand %ymm6,%ymm14,%ymm14 + vpaddd %ymm5,%ymm11,%ymm11 + vmovdqu 224-128(%rdi),%ymm5 + vpand %ymm6,%ymm15,%ymm15 + vpaddd %ymm0,%ymm12,%ymm12 + vpaddd %ymm1,%ymm13,%ymm13 + vmovdqu %ymm8,0-128(%rdi) + vpaddd %ymm2,%ymm14,%ymm14 + vmovdqu %ymm9,32-128(%rdi) + vpaddd %ymm5,%ymm15,%ymm15 + vmovdqu %ymm10,64-128(%rdi) + vmovdqu %ymm11,96-128(%rdi) + vmovdqu %ymm12,128-128(%rdi) + vmovdqu %ymm13,160-128(%rdi) + vmovdqu %ymm14,192-128(%rdi) + vmovdqu %ymm15,224-128(%rdi) + + vmovdqu %ymm7,(%rbx) + leaq 256+128(%rsp),%rbx + vmovdqu .Lpbswap(%rip),%ymm6 + decl %edx + jnz .Loop_avx2 + + + + + + + +.Ldone_avx2: + movq 544(%rsp),%rax + vzeroupper + movq -48(%rax),%r15 + movq -40(%rax),%r14 + movq -32(%rax),%r13 + movq -24(%rax),%r12 + movq -16(%rax),%rbp + movq -8(%rax),%rbx + leaq (%rax),%rsp +.Lepilogue_avx2: + .byte 0xf3,0xc3 +.size sha256_multi_block_avx2,.-sha256_multi_block_avx2 +.align 256 +K256: +.long 1116352408,1116352408,1116352408,1116352408 +.long 1116352408,1116352408,1116352408,1116352408 +.long 1899447441,1899447441,1899447441,1899447441 +.long 1899447441,1899447441,1899447441,1899447441 +.long 3049323471,3049323471,3049323471,3049323471 +.long 3049323471,3049323471,3049323471,3049323471 +.long 3921009573,3921009573,3921009573,3921009573 +.long 3921009573,3921009573,3921009573,3921009573 +.long 961987163,961987163,961987163,961987163 +.long 961987163,961987163,961987163,961987163 +.long 1508970993,1508970993,1508970993,1508970993 +.long 1508970993,1508970993,1508970993,1508970993 +.long 2453635748,2453635748,2453635748,2453635748 +.long 2453635748,2453635748,2453635748,2453635748 +.long 2870763221,2870763221,2870763221,2870763221 +.long 2870763221,2870763221,2870763221,2870763221 +.long 3624381080,3624381080,3624381080,3624381080 +.long 3624381080,3624381080,3624381080,3624381080 +.long 310598401,310598401,310598401,310598401 +.long 310598401,310598401,310598401,310598401 +.long 607225278,607225278,607225278,607225278 +.long 607225278,607225278,607225278,607225278 +.long 1426881987,1426881987,1426881987,1426881987 +.long 1426881987,1426881987,1426881987,1426881987 +.long 1925078388,1925078388,1925078388,1925078388 +.long 1925078388,1925078388,1925078388,1925078388 +.long 2162078206,2162078206,2162078206,2162078206 +.long 2162078206,2162078206,2162078206,2162078206 +.long 2614888103,2614888103,2614888103,2614888103 +.long 2614888103,2614888103,2614888103,2614888103 +.long 3248222580,3248222580,3248222580,3248222580 +.long 3248222580,3248222580,3248222580,3248222580 +.long 3835390401,3835390401,3835390401,3835390401 +.long 3835390401,3835390401,3835390401,3835390401 +.long 4022224774,4022224774,4022224774,4022224774 +.long 4022224774,4022224774,4022224774,4022224774 +.long 264347078,264347078,264347078,264347078 +.long 264347078,264347078,264347078,264347078 +.long 604807628,604807628,604807628,604807628 +.long 604807628,604807628,604807628,604807628 +.long 770255983,770255983,770255983,770255983 +.long 770255983,770255983,770255983,770255983 +.long 1249150122,1249150122,1249150122,1249150122 +.long 1249150122,1249150122,1249150122,1249150122 +.long 1555081692,1555081692,1555081692,1555081692 +.long 1555081692,1555081692,1555081692,1555081692 +.long 1996064986,1996064986,1996064986,1996064986 +.long 1996064986,1996064986,1996064986,1996064986 +.long 2554220882,2554220882,2554220882,2554220882 +.long 2554220882,2554220882,2554220882,2554220882 +.long 2821834349,2821834349,2821834349,2821834349 +.long 2821834349,2821834349,2821834349,2821834349 +.long 2952996808,2952996808,2952996808,2952996808 +.long 2952996808,2952996808,2952996808,2952996808 +.long 3210313671,3210313671,3210313671,3210313671 +.long 3210313671,3210313671,3210313671,3210313671 +.long 3336571891,3336571891,3336571891,3336571891 +.long 3336571891,3336571891,3336571891,3336571891 +.long 3584528711,3584528711,3584528711,3584528711 +.long 3584528711,3584528711,3584528711,3584528711 +.long 113926993,113926993,113926993,113926993 +.long 113926993,113926993,113926993,113926993 +.long 338241895,338241895,338241895,338241895 +.long 338241895,338241895,338241895,338241895 +.long 666307205,666307205,666307205,666307205 +.long 666307205,666307205,666307205,666307205 +.long 773529912,773529912,773529912,773529912 +.long 773529912,773529912,773529912,773529912 +.long 1294757372,1294757372,1294757372,1294757372 +.long 1294757372,1294757372,1294757372,1294757372 +.long 1396182291,1396182291,1396182291,1396182291 +.long 1396182291,1396182291,1396182291,1396182291 +.long 1695183700,1695183700,1695183700,1695183700 +.long 1695183700,1695183700,1695183700,1695183700 +.long 1986661051,1986661051,1986661051,1986661051 +.long 1986661051,1986661051,1986661051,1986661051 +.long 2177026350,2177026350,2177026350,2177026350 +.long 2177026350,2177026350,2177026350,2177026350 +.long 2456956037,2456956037,2456956037,2456956037 +.long 2456956037,2456956037,2456956037,2456956037 +.long 2730485921,2730485921,2730485921,2730485921 +.long 2730485921,2730485921,2730485921,2730485921 +.long 2820302411,2820302411,2820302411,2820302411 +.long 2820302411,2820302411,2820302411,2820302411 +.long 3259730800,3259730800,3259730800,3259730800 +.long 3259730800,3259730800,3259730800,3259730800 +.long 3345764771,3345764771,3345764771,3345764771 +.long 3345764771,3345764771,3345764771,3345764771 +.long 3516065817,3516065817,3516065817,3516065817 +.long 3516065817,3516065817,3516065817,3516065817 +.long 3600352804,3600352804,3600352804,3600352804 +.long 3600352804,3600352804,3600352804,3600352804 +.long 4094571909,4094571909,4094571909,4094571909 +.long 4094571909,4094571909,4094571909,4094571909 +.long 275423344,275423344,275423344,275423344 +.long 275423344,275423344,275423344,275423344 +.long 430227734,430227734,430227734,430227734 +.long 430227734,430227734,430227734,430227734 +.long 506948616,506948616,506948616,506948616 +.long 506948616,506948616,506948616,506948616 +.long 659060556,659060556,659060556,659060556 +.long 659060556,659060556,659060556,659060556 +.long 883997877,883997877,883997877,883997877 +.long 883997877,883997877,883997877,883997877 +.long 958139571,958139571,958139571,958139571 +.long 958139571,958139571,958139571,958139571 +.long 1322822218,1322822218,1322822218,1322822218 +.long 1322822218,1322822218,1322822218,1322822218 +.long 1537002063,1537002063,1537002063,1537002063 +.long 1537002063,1537002063,1537002063,1537002063 +.long 1747873779,1747873779,1747873779,1747873779 +.long 1747873779,1747873779,1747873779,1747873779 +.long 1955562222,1955562222,1955562222,1955562222 +.long 1955562222,1955562222,1955562222,1955562222 +.long 2024104815,2024104815,2024104815,2024104815 +.long 2024104815,2024104815,2024104815,2024104815 +.long 2227730452,2227730452,2227730452,2227730452 +.long 2227730452,2227730452,2227730452,2227730452 +.long 2361852424,2361852424,2361852424,2361852424 +.long 2361852424,2361852424,2361852424,2361852424 +.long 2428436474,2428436474,2428436474,2428436474 +.long 2428436474,2428436474,2428436474,2428436474 +.long 2756734187,2756734187,2756734187,2756734187 +.long 2756734187,2756734187,2756734187,2756734187 +.long 3204031479,3204031479,3204031479,3204031479 +.long 3204031479,3204031479,3204031479,3204031479 +.long 3329325298,3329325298,3329325298,3329325298 +.long 3329325298,3329325298,3329325298,3329325298 +.Lpbswap: +.long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f +.long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f +K256_shaext: +.long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 +.long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 +.long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 +.long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 +.long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc +.long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da +.long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 +.long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967 +.long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 +.long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 +.long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 +.long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070 +.long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 +.long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 +.long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 +.long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 +.byte 83,72,65,50,53,54,32,109,117,108,116,105,45,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 diff --git a/deps/openssl/asm/x64-elf-gas/sha/sha256-x86_64.s b/deps/openssl/asm/x64-elf-gas/sha/sha256-x86_64.s index 576d7d8bfdb897..a2fbedaf8c6b9e 100644 --- a/deps/openssl/asm/x64-elf-gas/sha/sha256-x86_64.s +++ b/deps/openssl/asm/x64-elf-gas/sha/sha256-x86_64.s @@ -5,6 +5,22 @@ .type sha256_block_data_order,@function .align 16 sha256_block_data_order: + leaq OPENSSL_ia32cap_P(%rip),%r11 + movl 0(%r11),%r9d + movl 4(%r11),%r10d + movl 8(%r11),%r11d + testl $536870912,%r11d + jnz _shaext_shortcut + andl $296,%r11d + cmpl $296,%r11d + je .Lavx2_shortcut + andl $1073741824,%r9d + andl $268435968,%r10d + orl %r9d,%r10d + cmpl $1342177792,%r10d + je .Lavx_shortcut + testl $512,%r10d + jnz .Lssse3_shortcut pushq %rbx pushq %rbp pushq %r12 @@ -22,8 +38,6 @@ sha256_block_data_order: movq %r11,64+24(%rsp) .Lprologue: - leaq K256(%rip),%rbp - movl 0(%rdi),%eax movl 4(%rdi),%ebx movl 8(%rdi),%ecx @@ -36,1694 +50,1632 @@ sha256_block_data_order: .align 16 .Lloop: - xorq %rdi,%rdi + movl %ebx,%edi + leaq K256(%rip),%rbp + xorl %ecx,%edi movl 0(%rsi),%r12d movl %r8d,%r13d movl %eax,%r14d bswapl %r12d rorl $14,%r13d movl %r9d,%r15d - movl %r12d,0(%rsp) - rorl $9,%r14d xorl %r8d,%r13d + rorl $9,%r14d xorl %r10d,%r15d - rorl $5,%r13d - addl %r11d,%r12d + movl %r12d,0(%rsp) xorl %eax,%r14d - - addl (%rbp,%rdi,4),%r12d andl %r8d,%r15d - movl %ebx,%r11d + + rorl $5,%r13d + addl %r11d,%r12d + xorl %r10d,%r15d rorl $11,%r14d xorl %r8d,%r13d - xorl %r10d,%r15d + addl %r15d,%r12d - xorl %ecx,%r11d + movl %eax,%r15d + addl (%rbp),%r12d xorl %eax,%r14d - addl %r15d,%r12d - movl %ebx,%r15d + xorl %ebx,%r15d rorl $6,%r13d - andl %eax,%r11d - andl %ecx,%r15d + movl %ebx,%r11d + andl %r15d,%edi rorl $2,%r14d addl %r13d,%r12d - addl %r15d,%r11d + xorl %edi,%r11d addl %r12d,%edx addl %r12d,%r11d - leaq 1(%rdi),%rdi - addl %r14d,%r11d + leaq 4(%rbp),%rbp + addl %r14d,%r11d movl 4(%rsi),%r12d movl %edx,%r13d movl %r11d,%r14d bswapl %r12d rorl $14,%r13d - movl %r8d,%r15d - movl %r12d,4(%rsp) + movl %r8d,%edi - rorl $9,%r14d xorl %edx,%r13d - xorl %r9d,%r15d + rorl $9,%r14d + xorl %r9d,%edi - rorl $5,%r13d - addl %r10d,%r12d + movl %r12d,4(%rsp) xorl %r11d,%r14d + andl %edx,%edi - addl (%rbp,%rdi,4),%r12d - andl %edx,%r15d - movl %eax,%r10d + rorl $5,%r13d + addl %r10d,%r12d + xorl %r9d,%edi rorl $11,%r14d xorl %edx,%r13d - xorl %r9d,%r15d + addl %edi,%r12d - xorl %ebx,%r10d + movl %r11d,%edi + addl (%rbp),%r12d xorl %r11d,%r14d - addl %r15d,%r12d - movl %eax,%r15d + xorl %eax,%edi rorl $6,%r13d - andl %r11d,%r10d - andl %ebx,%r15d + movl %eax,%r10d + andl %edi,%r15d rorl $2,%r14d addl %r13d,%r12d - addl %r15d,%r10d + xorl %r15d,%r10d addl %r12d,%ecx addl %r12d,%r10d - leaq 1(%rdi),%rdi - addl %r14d,%r10d + leaq 4(%rbp),%rbp + addl %r14d,%r10d movl 8(%rsi),%r12d movl %ecx,%r13d movl %r10d,%r14d bswapl %r12d rorl $14,%r13d movl %edx,%r15d - movl %r12d,8(%rsp) - rorl $9,%r14d xorl %ecx,%r13d + rorl $9,%r14d xorl %r8d,%r15d - rorl $5,%r13d - addl %r9d,%r12d + movl %r12d,8(%rsp) xorl %r10d,%r14d - - addl (%rbp,%rdi,4),%r12d andl %ecx,%r15d - movl %r11d,%r9d + + rorl $5,%r13d + addl %r9d,%r12d + xorl %r8d,%r15d rorl $11,%r14d xorl %ecx,%r13d - xorl %r8d,%r15d + addl %r15d,%r12d - xorl %eax,%r9d + movl %r10d,%r15d + addl (%rbp),%r12d xorl %r10d,%r14d - addl %r15d,%r12d - movl %r11d,%r15d + xorl %r11d,%r15d rorl $6,%r13d - andl %r10d,%r9d - andl %eax,%r15d + movl %r11d,%r9d + andl %r15d,%edi rorl $2,%r14d addl %r13d,%r12d - addl %r15d,%r9d + xorl %edi,%r9d addl %r12d,%ebx addl %r12d,%r9d - leaq 1(%rdi),%rdi - addl %r14d,%r9d + leaq 4(%rbp),%rbp + addl %r14d,%r9d movl 12(%rsi),%r12d movl %ebx,%r13d movl %r9d,%r14d bswapl %r12d rorl $14,%r13d - movl %ecx,%r15d - movl %r12d,12(%rsp) + movl %ecx,%edi - rorl $9,%r14d xorl %ebx,%r13d - xorl %edx,%r15d + rorl $9,%r14d + xorl %edx,%edi - rorl $5,%r13d - addl %r8d,%r12d + movl %r12d,12(%rsp) xorl %r9d,%r14d + andl %ebx,%edi - addl (%rbp,%rdi,4),%r12d - andl %ebx,%r15d - movl %r10d,%r8d + rorl $5,%r13d + addl %r8d,%r12d + xorl %edx,%edi rorl $11,%r14d xorl %ebx,%r13d - xorl %edx,%r15d + addl %edi,%r12d - xorl %r11d,%r8d + movl %r9d,%edi + addl (%rbp),%r12d xorl %r9d,%r14d - addl %r15d,%r12d - movl %r10d,%r15d + xorl %r10d,%edi rorl $6,%r13d - andl %r9d,%r8d - andl %r11d,%r15d + movl %r10d,%r8d + andl %edi,%r15d rorl $2,%r14d addl %r13d,%r12d - addl %r15d,%r8d + xorl %r15d,%r8d addl %r12d,%eax addl %r12d,%r8d - leaq 1(%rdi),%rdi - addl %r14d,%r8d + leaq 20(%rbp),%rbp + addl %r14d,%r8d movl 16(%rsi),%r12d movl %eax,%r13d movl %r8d,%r14d bswapl %r12d rorl $14,%r13d movl %ebx,%r15d - movl %r12d,16(%rsp) - rorl $9,%r14d xorl %eax,%r13d + rorl $9,%r14d xorl %ecx,%r15d - rorl $5,%r13d - addl %edx,%r12d + movl %r12d,16(%rsp) xorl %r8d,%r14d - - addl (%rbp,%rdi,4),%r12d andl %eax,%r15d - movl %r9d,%edx + + rorl $5,%r13d + addl %edx,%r12d + xorl %ecx,%r15d rorl $11,%r14d xorl %eax,%r13d - xorl %ecx,%r15d + addl %r15d,%r12d - xorl %r10d,%edx + movl %r8d,%r15d + addl (%rbp),%r12d xorl %r8d,%r14d - addl %r15d,%r12d - movl %r9d,%r15d + xorl %r9d,%r15d rorl $6,%r13d - andl %r8d,%edx - andl %r10d,%r15d + movl %r9d,%edx + andl %r15d,%edi rorl $2,%r14d addl %r13d,%r12d - addl %r15d,%edx + xorl %edi,%edx addl %r12d,%r11d addl %r12d,%edx - leaq 1(%rdi),%rdi - addl %r14d,%edx + leaq 4(%rbp),%rbp + addl %r14d,%edx movl 20(%rsi),%r12d movl %r11d,%r13d movl %edx,%r14d bswapl %r12d rorl $14,%r13d - movl %eax,%r15d - movl %r12d,20(%rsp) + movl %eax,%edi - rorl $9,%r14d xorl %r11d,%r13d - xorl %ebx,%r15d + rorl $9,%r14d + xorl %ebx,%edi - rorl $5,%r13d - addl %ecx,%r12d + movl %r12d,20(%rsp) xorl %edx,%r14d + andl %r11d,%edi - addl (%rbp,%rdi,4),%r12d - andl %r11d,%r15d - movl %r8d,%ecx + rorl $5,%r13d + addl %ecx,%r12d + xorl %ebx,%edi rorl $11,%r14d xorl %r11d,%r13d - xorl %ebx,%r15d + addl %edi,%r12d - xorl %r9d,%ecx + movl %edx,%edi + addl (%rbp),%r12d xorl %edx,%r14d - addl %r15d,%r12d - movl %r8d,%r15d + xorl %r8d,%edi rorl $6,%r13d - andl %edx,%ecx - andl %r9d,%r15d + movl %r8d,%ecx + andl %edi,%r15d rorl $2,%r14d addl %r13d,%r12d - addl %r15d,%ecx + xorl %r15d,%ecx addl %r12d,%r10d addl %r12d,%ecx - leaq 1(%rdi),%rdi - addl %r14d,%ecx + leaq 4(%rbp),%rbp + addl %r14d,%ecx movl 24(%rsi),%r12d movl %r10d,%r13d movl %ecx,%r14d bswapl %r12d rorl $14,%r13d movl %r11d,%r15d - movl %r12d,24(%rsp) - rorl $9,%r14d xorl %r10d,%r13d + rorl $9,%r14d xorl %eax,%r15d - rorl $5,%r13d - addl %ebx,%r12d + movl %r12d,24(%rsp) xorl %ecx,%r14d - - addl (%rbp,%rdi,4),%r12d andl %r10d,%r15d - movl %edx,%ebx + + rorl $5,%r13d + addl %ebx,%r12d + xorl %eax,%r15d rorl $11,%r14d xorl %r10d,%r13d - xorl %eax,%r15d + addl %r15d,%r12d - xorl %r8d,%ebx + movl %ecx,%r15d + addl (%rbp),%r12d xorl %ecx,%r14d - addl %r15d,%r12d - movl %edx,%r15d + xorl %edx,%r15d rorl $6,%r13d - andl %ecx,%ebx - andl %r8d,%r15d + movl %edx,%ebx + andl %r15d,%edi rorl $2,%r14d addl %r13d,%r12d - addl %r15d,%ebx + xorl %edi,%ebx addl %r12d,%r9d addl %r12d,%ebx - leaq 1(%rdi),%rdi - addl %r14d,%ebx + leaq 4(%rbp),%rbp + addl %r14d,%ebx movl 28(%rsi),%r12d movl %r9d,%r13d movl %ebx,%r14d bswapl %r12d rorl $14,%r13d - movl %r10d,%r15d - movl %r12d,28(%rsp) + movl %r10d,%edi - rorl $9,%r14d xorl %r9d,%r13d - xorl %r11d,%r15d + rorl $9,%r14d + xorl %r11d,%edi - rorl $5,%r13d - addl %eax,%r12d + movl %r12d,28(%rsp) xorl %ebx,%r14d + andl %r9d,%edi - addl (%rbp,%rdi,4),%r12d - andl %r9d,%r15d - movl %ecx,%eax + rorl $5,%r13d + addl %eax,%r12d + xorl %r11d,%edi rorl $11,%r14d xorl %r9d,%r13d - xorl %r11d,%r15d + addl %edi,%r12d - xorl %edx,%eax + movl %ebx,%edi + addl (%rbp),%r12d xorl %ebx,%r14d - addl %r15d,%r12d - movl %ecx,%r15d + xorl %ecx,%edi rorl $6,%r13d - andl %ebx,%eax - andl %edx,%r15d + movl %ecx,%eax + andl %edi,%r15d rorl $2,%r14d addl %r13d,%r12d - addl %r15d,%eax + xorl %r15d,%eax addl %r12d,%r8d addl %r12d,%eax - leaq 1(%rdi),%rdi - addl %r14d,%eax + leaq 20(%rbp),%rbp + addl %r14d,%eax movl 32(%rsi),%r12d movl %r8d,%r13d movl %eax,%r14d bswapl %r12d rorl $14,%r13d movl %r9d,%r15d - movl %r12d,32(%rsp) - rorl $9,%r14d xorl %r8d,%r13d + rorl $9,%r14d xorl %r10d,%r15d - rorl $5,%r13d - addl %r11d,%r12d + movl %r12d,32(%rsp) xorl %eax,%r14d - - addl (%rbp,%rdi,4),%r12d andl %r8d,%r15d - movl %ebx,%r11d + + rorl $5,%r13d + addl %r11d,%r12d + xorl %r10d,%r15d rorl $11,%r14d xorl %r8d,%r13d - xorl %r10d,%r15d + addl %r15d,%r12d - xorl %ecx,%r11d + movl %eax,%r15d + addl (%rbp),%r12d xorl %eax,%r14d - addl %r15d,%r12d - movl %ebx,%r15d + xorl %ebx,%r15d rorl $6,%r13d - andl %eax,%r11d - andl %ecx,%r15d + movl %ebx,%r11d + andl %r15d,%edi rorl $2,%r14d addl %r13d,%r12d - addl %r15d,%r11d + xorl %edi,%r11d addl %r12d,%edx addl %r12d,%r11d - leaq 1(%rdi),%rdi - addl %r14d,%r11d + leaq 4(%rbp),%rbp + addl %r14d,%r11d movl 36(%rsi),%r12d movl %edx,%r13d movl %r11d,%r14d bswapl %r12d rorl $14,%r13d - movl %r8d,%r15d - movl %r12d,36(%rsp) + movl %r8d,%edi - rorl $9,%r14d xorl %edx,%r13d - xorl %r9d,%r15d + rorl $9,%r14d + xorl %r9d,%edi - rorl $5,%r13d - addl %r10d,%r12d + movl %r12d,36(%rsp) xorl %r11d,%r14d + andl %edx,%edi - addl (%rbp,%rdi,4),%r12d - andl %edx,%r15d - movl %eax,%r10d + rorl $5,%r13d + addl %r10d,%r12d + xorl %r9d,%edi rorl $11,%r14d xorl %edx,%r13d - xorl %r9d,%r15d + addl %edi,%r12d - xorl %ebx,%r10d + movl %r11d,%edi + addl (%rbp),%r12d xorl %r11d,%r14d - addl %r15d,%r12d - movl %eax,%r15d + xorl %eax,%edi rorl $6,%r13d - andl %r11d,%r10d - andl %ebx,%r15d + movl %eax,%r10d + andl %edi,%r15d rorl $2,%r14d addl %r13d,%r12d - addl %r15d,%r10d + xorl %r15d,%r10d addl %r12d,%ecx addl %r12d,%r10d - leaq 1(%rdi),%rdi - addl %r14d,%r10d + leaq 4(%rbp),%rbp + addl %r14d,%r10d movl 40(%rsi),%r12d movl %ecx,%r13d movl %r10d,%r14d bswapl %r12d rorl $14,%r13d movl %edx,%r15d - movl %r12d,40(%rsp) - rorl $9,%r14d xorl %ecx,%r13d + rorl $9,%r14d xorl %r8d,%r15d - rorl $5,%r13d - addl %r9d,%r12d + movl %r12d,40(%rsp) xorl %r10d,%r14d - - addl (%rbp,%rdi,4),%r12d andl %ecx,%r15d - movl %r11d,%r9d + + rorl $5,%r13d + addl %r9d,%r12d + xorl %r8d,%r15d rorl $11,%r14d xorl %ecx,%r13d - xorl %r8d,%r15d + addl %r15d,%r12d - xorl %eax,%r9d + movl %r10d,%r15d + addl (%rbp),%r12d xorl %r10d,%r14d - addl %r15d,%r12d - movl %r11d,%r15d + xorl %r11d,%r15d rorl $6,%r13d - andl %r10d,%r9d - andl %eax,%r15d + movl %r11d,%r9d + andl %r15d,%edi rorl $2,%r14d addl %r13d,%r12d - addl %r15d,%r9d + xorl %edi,%r9d addl %r12d,%ebx addl %r12d,%r9d - leaq 1(%rdi),%rdi - addl %r14d,%r9d + leaq 4(%rbp),%rbp + addl %r14d,%r9d movl 44(%rsi),%r12d movl %ebx,%r13d movl %r9d,%r14d bswapl %r12d rorl $14,%r13d - movl %ecx,%r15d - movl %r12d,44(%rsp) + movl %ecx,%edi - rorl $9,%r14d xorl %ebx,%r13d - xorl %edx,%r15d + rorl $9,%r14d + xorl %edx,%edi - rorl $5,%r13d - addl %r8d,%r12d + movl %r12d,44(%rsp) xorl %r9d,%r14d + andl %ebx,%edi - addl (%rbp,%rdi,4),%r12d - andl %ebx,%r15d - movl %r10d,%r8d + rorl $5,%r13d + addl %r8d,%r12d + xorl %edx,%edi rorl $11,%r14d xorl %ebx,%r13d - xorl %edx,%r15d + addl %edi,%r12d - xorl %r11d,%r8d + movl %r9d,%edi + addl (%rbp),%r12d xorl %r9d,%r14d - addl %r15d,%r12d - movl %r10d,%r15d + xorl %r10d,%edi rorl $6,%r13d - andl %r9d,%r8d - andl %r11d,%r15d + movl %r10d,%r8d + andl %edi,%r15d rorl $2,%r14d addl %r13d,%r12d - addl %r15d,%r8d + xorl %r15d,%r8d addl %r12d,%eax addl %r12d,%r8d - leaq 1(%rdi),%rdi - addl %r14d,%r8d + leaq 20(%rbp),%rbp + addl %r14d,%r8d movl 48(%rsi),%r12d movl %eax,%r13d movl %r8d,%r14d bswapl %r12d rorl $14,%r13d movl %ebx,%r15d - movl %r12d,48(%rsp) - rorl $9,%r14d xorl %eax,%r13d + rorl $9,%r14d xorl %ecx,%r15d - rorl $5,%r13d - addl %edx,%r12d + movl %r12d,48(%rsp) xorl %r8d,%r14d - - addl (%rbp,%rdi,4),%r12d andl %eax,%r15d - movl %r9d,%edx - rorl $11,%r14d - xorl %eax,%r13d + rorl $5,%r13d + addl %edx,%r12d xorl %ecx,%r15d - xorl %r10d,%edx - xorl %r8d,%r14d + rorl $11,%r14d + xorl %eax,%r13d addl %r15d,%r12d - movl %r9d,%r15d + movl %r8d,%r15d + addl (%rbp),%r12d + xorl %r8d,%r14d + + xorl %r9d,%r15d rorl $6,%r13d - andl %r8d,%edx - andl %r10d,%r15d + movl %r9d,%edx + andl %r15d,%edi rorl $2,%r14d addl %r13d,%r12d - addl %r15d,%edx + xorl %edi,%edx addl %r12d,%r11d addl %r12d,%edx - leaq 1(%rdi),%rdi - addl %r14d,%edx + leaq 4(%rbp),%rbp + addl %r14d,%edx movl 52(%rsi),%r12d movl %r11d,%r13d movl %edx,%r14d bswapl %r12d rorl $14,%r13d - movl %eax,%r15d - movl %r12d,52(%rsp) + movl %eax,%edi - rorl $9,%r14d xorl %r11d,%r13d - xorl %ebx,%r15d + rorl $9,%r14d + xorl %ebx,%edi - rorl $5,%r13d - addl %ecx,%r12d + movl %r12d,52(%rsp) xorl %edx,%r14d + andl %r11d,%edi - addl (%rbp,%rdi,4),%r12d - andl %r11d,%r15d - movl %r8d,%ecx + rorl $5,%r13d + addl %ecx,%r12d + xorl %ebx,%edi rorl $11,%r14d xorl %r11d,%r13d - xorl %ebx,%r15d + addl %edi,%r12d - xorl %r9d,%ecx + movl %edx,%edi + addl (%rbp),%r12d xorl %edx,%r14d - addl %r15d,%r12d - movl %r8d,%r15d + xorl %r8d,%edi rorl $6,%r13d - andl %edx,%ecx - andl %r9d,%r15d + movl %r8d,%ecx + andl %edi,%r15d rorl $2,%r14d addl %r13d,%r12d - addl %r15d,%ecx + xorl %r15d,%ecx addl %r12d,%r10d addl %r12d,%ecx - leaq 1(%rdi),%rdi - addl %r14d,%ecx + leaq 4(%rbp),%rbp + addl %r14d,%ecx movl 56(%rsi),%r12d movl %r10d,%r13d movl %ecx,%r14d bswapl %r12d rorl $14,%r13d movl %r11d,%r15d - movl %r12d,56(%rsp) - rorl $9,%r14d xorl %r10d,%r13d + rorl $9,%r14d xorl %eax,%r15d - rorl $5,%r13d - addl %ebx,%r12d + movl %r12d,56(%rsp) xorl %ecx,%r14d - - addl (%rbp,%rdi,4),%r12d andl %r10d,%r15d - movl %edx,%ebx + + rorl $5,%r13d + addl %ebx,%r12d + xorl %eax,%r15d rorl $11,%r14d xorl %r10d,%r13d - xorl %eax,%r15d + addl %r15d,%r12d - xorl %r8d,%ebx + movl %ecx,%r15d + addl (%rbp),%r12d xorl %ecx,%r14d - addl %r15d,%r12d - movl %edx,%r15d + xorl %edx,%r15d rorl $6,%r13d - andl %ecx,%ebx - andl %r8d,%r15d + movl %edx,%ebx + andl %r15d,%edi rorl $2,%r14d addl %r13d,%r12d - addl %r15d,%ebx + xorl %edi,%ebx addl %r12d,%r9d addl %r12d,%ebx - leaq 1(%rdi),%rdi - addl %r14d,%ebx + leaq 4(%rbp),%rbp + addl %r14d,%ebx movl 60(%rsi),%r12d movl %r9d,%r13d movl %ebx,%r14d bswapl %r12d rorl $14,%r13d - movl %r10d,%r15d - movl %r12d,60(%rsp) + movl %r10d,%edi - rorl $9,%r14d xorl %r9d,%r13d - xorl %r11d,%r15d + rorl $9,%r14d + xorl %r11d,%edi - rorl $5,%r13d - addl %eax,%r12d + movl %r12d,60(%rsp) xorl %ebx,%r14d + andl %r9d,%edi - addl (%rbp,%rdi,4),%r12d - andl %r9d,%r15d - movl %ecx,%eax + rorl $5,%r13d + addl %eax,%r12d + xorl %r11d,%edi rorl $11,%r14d xorl %r9d,%r13d - xorl %r11d,%r15d + addl %edi,%r12d - xorl %edx,%eax + movl %ebx,%edi + addl (%rbp),%r12d xorl %ebx,%r14d - addl %r15d,%r12d - movl %ecx,%r15d + xorl %ecx,%edi rorl $6,%r13d - andl %ebx,%eax - andl %edx,%r15d + movl %ecx,%eax + andl %edi,%r15d rorl $2,%r14d addl %r13d,%r12d - addl %r15d,%eax + xorl %r15d,%eax addl %r12d,%r8d addl %r12d,%eax - leaq 1(%rdi),%rdi - addl %r14d,%eax + leaq 20(%rbp),%rbp jmp .Lrounds_16_xx .align 16 .Lrounds_16_xx: movl 4(%rsp),%r13d - movl 56(%rsp),%r14d - movl %r13d,%r12d - movl %r14d,%r15d + movl 56(%rsp),%r15d - rorl $11,%r12d - xorl %r13d,%r12d - shrl $3,%r13d + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%eax + movl %r15d,%r14d + rorl $2,%r15d - rorl $7,%r12d xorl %r12d,%r13d - movl 36(%rsp),%r12d - - rorl $2,%r15d + shrl $3,%r12d + rorl $7,%r13d xorl %r14d,%r15d shrl $10,%r14d rorl $17,%r15d - addl %r13d,%r12d - xorl %r15d,%r14d + xorl %r13d,%r12d + xorl %r14d,%r15d + addl 36(%rsp),%r12d addl 0(%rsp),%r12d movl %r8d,%r13d - addl %r14d,%r12d + addl %r15d,%r12d movl %eax,%r14d rorl $14,%r13d movl %r9d,%r15d - movl %r12d,0(%rsp) - rorl $9,%r14d xorl %r8d,%r13d + rorl $9,%r14d xorl %r10d,%r15d - rorl $5,%r13d - addl %r11d,%r12d + movl %r12d,0(%rsp) xorl %eax,%r14d - - addl (%rbp,%rdi,4),%r12d andl %r8d,%r15d - movl %ebx,%r11d + + rorl $5,%r13d + addl %r11d,%r12d + xorl %r10d,%r15d rorl $11,%r14d xorl %r8d,%r13d - xorl %r10d,%r15d + addl %r15d,%r12d - xorl %ecx,%r11d + movl %eax,%r15d + addl (%rbp),%r12d xorl %eax,%r14d - addl %r15d,%r12d - movl %ebx,%r15d + xorl %ebx,%r15d rorl $6,%r13d - andl %eax,%r11d - andl %ecx,%r15d + movl %ebx,%r11d + andl %r15d,%edi rorl $2,%r14d addl %r13d,%r12d - addl %r15d,%r11d + xorl %edi,%r11d addl %r12d,%edx addl %r12d,%r11d - leaq 1(%rdi),%rdi - addl %r14d,%r11d + leaq 4(%rbp),%rbp movl 8(%rsp),%r13d - movl 60(%rsp),%r14d - movl %r13d,%r12d - movl %r14d,%r15d + movl 60(%rsp),%edi - rorl $11,%r12d - xorl %r13d,%r12d - shrl $3,%r13d + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%r11d + movl %edi,%r14d + rorl $2,%edi - rorl $7,%r12d xorl %r12d,%r13d - movl 40(%rsp),%r12d - - rorl $2,%r15d - xorl %r14d,%r15d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%edi shrl $10,%r14d - rorl $17,%r15d - addl %r13d,%r12d - xorl %r15d,%r14d + rorl $17,%edi + xorl %r13d,%r12d + xorl %r14d,%edi + addl 40(%rsp),%r12d addl 4(%rsp),%r12d movl %edx,%r13d - addl %r14d,%r12d + addl %edi,%r12d movl %r11d,%r14d rorl $14,%r13d - movl %r8d,%r15d - movl %r12d,4(%rsp) + movl %r8d,%edi - rorl $9,%r14d xorl %edx,%r13d - xorl %r9d,%r15d + rorl $9,%r14d + xorl %r9d,%edi - rorl $5,%r13d - addl %r10d,%r12d + movl %r12d,4(%rsp) xorl %r11d,%r14d + andl %edx,%edi - addl (%rbp,%rdi,4),%r12d - andl %edx,%r15d - movl %eax,%r10d + rorl $5,%r13d + addl %r10d,%r12d + xorl %r9d,%edi rorl $11,%r14d xorl %edx,%r13d - xorl %r9d,%r15d + addl %edi,%r12d - xorl %ebx,%r10d + movl %r11d,%edi + addl (%rbp),%r12d xorl %r11d,%r14d - addl %r15d,%r12d - movl %eax,%r15d + xorl %eax,%edi rorl $6,%r13d - andl %r11d,%r10d - andl %ebx,%r15d + movl %eax,%r10d + andl %edi,%r15d rorl $2,%r14d addl %r13d,%r12d - addl %r15d,%r10d + xorl %r15d,%r10d addl %r12d,%ecx addl %r12d,%r10d - leaq 1(%rdi),%rdi - addl %r14d,%r10d + leaq 4(%rbp),%rbp movl 12(%rsp),%r13d - movl 0(%rsp),%r14d - movl %r13d,%r12d - movl %r14d,%r15d + movl 0(%rsp),%r15d - rorl $11,%r12d - xorl %r13d,%r12d - shrl $3,%r13d + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%r10d + movl %r15d,%r14d + rorl $2,%r15d - rorl $7,%r12d xorl %r12d,%r13d - movl 44(%rsp),%r12d - - rorl $2,%r15d + shrl $3,%r12d + rorl $7,%r13d xorl %r14d,%r15d shrl $10,%r14d rorl $17,%r15d - addl %r13d,%r12d - xorl %r15d,%r14d + xorl %r13d,%r12d + xorl %r14d,%r15d + addl 44(%rsp),%r12d addl 8(%rsp),%r12d movl %ecx,%r13d - addl %r14d,%r12d + addl %r15d,%r12d movl %r10d,%r14d rorl $14,%r13d movl %edx,%r15d - movl %r12d,8(%rsp) - rorl $9,%r14d xorl %ecx,%r13d + rorl $9,%r14d xorl %r8d,%r15d - rorl $5,%r13d - addl %r9d,%r12d + movl %r12d,8(%rsp) xorl %r10d,%r14d - - addl (%rbp,%rdi,4),%r12d andl %ecx,%r15d - movl %r11d,%r9d + + rorl $5,%r13d + addl %r9d,%r12d + xorl %r8d,%r15d rorl $11,%r14d xorl %ecx,%r13d - xorl %r8d,%r15d + addl %r15d,%r12d - xorl %eax,%r9d + movl %r10d,%r15d + addl (%rbp),%r12d xorl %r10d,%r14d - addl %r15d,%r12d - movl %r11d,%r15d + xorl %r11d,%r15d rorl $6,%r13d - andl %r10d,%r9d - andl %eax,%r15d + movl %r11d,%r9d + andl %r15d,%edi rorl $2,%r14d addl %r13d,%r12d - addl %r15d,%r9d + xorl %edi,%r9d addl %r12d,%ebx addl %r12d,%r9d - leaq 1(%rdi),%rdi - addl %r14d,%r9d + leaq 4(%rbp),%rbp movl 16(%rsp),%r13d - movl 4(%rsp),%r14d - movl %r13d,%r12d - movl %r14d,%r15d + movl 4(%rsp),%edi - rorl $11,%r12d - xorl %r13d,%r12d - shrl $3,%r13d + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%r9d + movl %edi,%r14d + rorl $2,%edi - rorl $7,%r12d xorl %r12d,%r13d - movl 48(%rsp),%r12d - - rorl $2,%r15d - xorl %r14d,%r15d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%edi shrl $10,%r14d - rorl $17,%r15d - addl %r13d,%r12d - xorl %r15d,%r14d + rorl $17,%edi + xorl %r13d,%r12d + xorl %r14d,%edi + addl 48(%rsp),%r12d addl 12(%rsp),%r12d movl %ebx,%r13d - addl %r14d,%r12d + addl %edi,%r12d movl %r9d,%r14d rorl $14,%r13d - movl %ecx,%r15d - movl %r12d,12(%rsp) + movl %ecx,%edi - rorl $9,%r14d xorl %ebx,%r13d - xorl %edx,%r15d + rorl $9,%r14d + xorl %edx,%edi - rorl $5,%r13d - addl %r8d,%r12d + movl %r12d,12(%rsp) xorl %r9d,%r14d + andl %ebx,%edi - addl (%rbp,%rdi,4),%r12d - andl %ebx,%r15d - movl %r10d,%r8d + rorl $5,%r13d + addl %r8d,%r12d + xorl %edx,%edi rorl $11,%r14d xorl %ebx,%r13d - xorl %edx,%r15d + addl %edi,%r12d - xorl %r11d,%r8d + movl %r9d,%edi + addl (%rbp),%r12d xorl %r9d,%r14d - addl %r15d,%r12d - movl %r10d,%r15d + xorl %r10d,%edi rorl $6,%r13d - andl %r9d,%r8d - andl %r11d,%r15d + movl %r10d,%r8d + andl %edi,%r15d rorl $2,%r14d addl %r13d,%r12d - addl %r15d,%r8d + xorl %r15d,%r8d addl %r12d,%eax addl %r12d,%r8d - leaq 1(%rdi),%rdi - addl %r14d,%r8d + leaq 20(%rbp),%rbp movl 20(%rsp),%r13d - movl 8(%rsp),%r14d - movl %r13d,%r12d - movl %r14d,%r15d + movl 8(%rsp),%r15d - rorl $11,%r12d - xorl %r13d,%r12d - shrl $3,%r13d + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%r8d + movl %r15d,%r14d + rorl $2,%r15d - rorl $7,%r12d xorl %r12d,%r13d - movl 52(%rsp),%r12d - - rorl $2,%r15d + shrl $3,%r12d + rorl $7,%r13d xorl %r14d,%r15d shrl $10,%r14d rorl $17,%r15d - addl %r13d,%r12d - xorl %r15d,%r14d + xorl %r13d,%r12d + xorl %r14d,%r15d + addl 52(%rsp),%r12d addl 16(%rsp),%r12d movl %eax,%r13d - addl %r14d,%r12d + addl %r15d,%r12d movl %r8d,%r14d rorl $14,%r13d movl %ebx,%r15d - movl %r12d,16(%rsp) - rorl $9,%r14d xorl %eax,%r13d + rorl $9,%r14d xorl %ecx,%r15d - rorl $5,%r13d - addl %edx,%r12d + movl %r12d,16(%rsp) xorl %r8d,%r14d - - addl (%rbp,%rdi,4),%r12d andl %eax,%r15d - movl %r9d,%edx + + rorl $5,%r13d + addl %edx,%r12d + xorl %ecx,%r15d rorl $11,%r14d xorl %eax,%r13d - xorl %ecx,%r15d + addl %r15d,%r12d - xorl %r10d,%edx + movl %r8d,%r15d + addl (%rbp),%r12d xorl %r8d,%r14d - addl %r15d,%r12d - movl %r9d,%r15d + xorl %r9d,%r15d rorl $6,%r13d - andl %r8d,%edx - andl %r10d,%r15d + movl %r9d,%edx + andl %r15d,%edi rorl $2,%r14d addl %r13d,%r12d - addl %r15d,%edx + xorl %edi,%edx addl %r12d,%r11d addl %r12d,%edx - leaq 1(%rdi),%rdi - addl %r14d,%edx + leaq 4(%rbp),%rbp movl 24(%rsp),%r13d - movl 12(%rsp),%r14d - movl %r13d,%r12d - movl %r14d,%r15d + movl 12(%rsp),%edi - rorl $11,%r12d - xorl %r13d,%r12d - shrl $3,%r13d + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%edx + movl %edi,%r14d + rorl $2,%edi - rorl $7,%r12d xorl %r12d,%r13d - movl 56(%rsp),%r12d - - rorl $2,%r15d - xorl %r14d,%r15d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%edi shrl $10,%r14d - rorl $17,%r15d - addl %r13d,%r12d - xorl %r15d,%r14d + rorl $17,%edi + xorl %r13d,%r12d + xorl %r14d,%edi + addl 56(%rsp),%r12d addl 20(%rsp),%r12d movl %r11d,%r13d - addl %r14d,%r12d + addl %edi,%r12d movl %edx,%r14d rorl $14,%r13d - movl %eax,%r15d - movl %r12d,20(%rsp) + movl %eax,%edi - rorl $9,%r14d xorl %r11d,%r13d - xorl %ebx,%r15d + rorl $9,%r14d + xorl %ebx,%edi - rorl $5,%r13d - addl %ecx,%r12d + movl %r12d,20(%rsp) xorl %edx,%r14d + andl %r11d,%edi - addl (%rbp,%rdi,4),%r12d - andl %r11d,%r15d - movl %r8d,%ecx + rorl $5,%r13d + addl %ecx,%r12d + xorl %ebx,%edi rorl $11,%r14d xorl %r11d,%r13d - xorl %ebx,%r15d + addl %edi,%r12d - xorl %r9d,%ecx + movl %edx,%edi + addl (%rbp),%r12d xorl %edx,%r14d - addl %r15d,%r12d - movl %r8d,%r15d + xorl %r8d,%edi rorl $6,%r13d - andl %edx,%ecx - andl %r9d,%r15d + movl %r8d,%ecx + andl %edi,%r15d rorl $2,%r14d addl %r13d,%r12d - addl %r15d,%ecx + xorl %r15d,%ecx addl %r12d,%r10d addl %r12d,%ecx - leaq 1(%rdi),%rdi - addl %r14d,%ecx + leaq 4(%rbp),%rbp movl 28(%rsp),%r13d - movl 16(%rsp),%r14d - movl %r13d,%r12d - movl %r14d,%r15d + movl 16(%rsp),%r15d - rorl $11,%r12d - xorl %r13d,%r12d - shrl $3,%r13d + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%ecx + movl %r15d,%r14d + rorl $2,%r15d - rorl $7,%r12d xorl %r12d,%r13d - movl 60(%rsp),%r12d - - rorl $2,%r15d + shrl $3,%r12d + rorl $7,%r13d xorl %r14d,%r15d shrl $10,%r14d rorl $17,%r15d - addl %r13d,%r12d - xorl %r15d,%r14d + xorl %r13d,%r12d + xorl %r14d,%r15d + addl 60(%rsp),%r12d addl 24(%rsp),%r12d movl %r10d,%r13d - addl %r14d,%r12d + addl %r15d,%r12d movl %ecx,%r14d rorl $14,%r13d movl %r11d,%r15d - movl %r12d,24(%rsp) - rorl $9,%r14d xorl %r10d,%r13d + rorl $9,%r14d xorl %eax,%r15d - rorl $5,%r13d - addl %ebx,%r12d + movl %r12d,24(%rsp) xorl %ecx,%r14d - - addl (%rbp,%rdi,4),%r12d andl %r10d,%r15d - movl %edx,%ebx + + rorl $5,%r13d + addl %ebx,%r12d + xorl %eax,%r15d rorl $11,%r14d xorl %r10d,%r13d - xorl %eax,%r15d + addl %r15d,%r12d - xorl %r8d,%ebx + movl %ecx,%r15d + addl (%rbp),%r12d xorl %ecx,%r14d - addl %r15d,%r12d - movl %edx,%r15d + xorl %edx,%r15d rorl $6,%r13d - andl %ecx,%ebx - andl %r8d,%r15d + movl %edx,%ebx + andl %r15d,%edi rorl $2,%r14d addl %r13d,%r12d - addl %r15d,%ebx + xorl %edi,%ebx addl %r12d,%r9d addl %r12d,%ebx - leaq 1(%rdi),%rdi - addl %r14d,%ebx + leaq 4(%rbp),%rbp movl 32(%rsp),%r13d - movl 20(%rsp),%r14d - movl %r13d,%r12d - movl %r14d,%r15d + movl 20(%rsp),%edi - rorl $11,%r12d - xorl %r13d,%r12d - shrl $3,%r13d + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%ebx + movl %edi,%r14d + rorl $2,%edi - rorl $7,%r12d xorl %r12d,%r13d - movl 0(%rsp),%r12d - - rorl $2,%r15d - xorl %r14d,%r15d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%edi shrl $10,%r14d - rorl $17,%r15d - addl %r13d,%r12d - xorl %r15d,%r14d + rorl $17,%edi + xorl %r13d,%r12d + xorl %r14d,%edi + addl 0(%rsp),%r12d addl 28(%rsp),%r12d movl %r9d,%r13d - addl %r14d,%r12d + addl %edi,%r12d movl %ebx,%r14d rorl $14,%r13d - movl %r10d,%r15d - movl %r12d,28(%rsp) + movl %r10d,%edi - rorl $9,%r14d xorl %r9d,%r13d - xorl %r11d,%r15d + rorl $9,%r14d + xorl %r11d,%edi - rorl $5,%r13d - addl %eax,%r12d + movl %r12d,28(%rsp) xorl %ebx,%r14d + andl %r9d,%edi - addl (%rbp,%rdi,4),%r12d - andl %r9d,%r15d - movl %ecx,%eax + rorl $5,%r13d + addl %eax,%r12d + xorl %r11d,%edi rorl $11,%r14d xorl %r9d,%r13d - xorl %r11d,%r15d + addl %edi,%r12d - xorl %edx,%eax + movl %ebx,%edi + addl (%rbp),%r12d xorl %ebx,%r14d - addl %r15d,%r12d - movl %ecx,%r15d + xorl %ecx,%edi rorl $6,%r13d - andl %ebx,%eax - andl %edx,%r15d + movl %ecx,%eax + andl %edi,%r15d rorl $2,%r14d addl %r13d,%r12d - addl %r15d,%eax + xorl %r15d,%eax addl %r12d,%r8d addl %r12d,%eax - leaq 1(%rdi),%rdi - addl %r14d,%eax + leaq 20(%rbp),%rbp movl 36(%rsp),%r13d - movl 24(%rsp),%r14d - movl %r13d,%r12d - movl %r14d,%r15d + movl 24(%rsp),%r15d - rorl $11,%r12d - xorl %r13d,%r12d - shrl $3,%r13d + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%eax + movl %r15d,%r14d + rorl $2,%r15d - rorl $7,%r12d xorl %r12d,%r13d - movl 4(%rsp),%r12d - - rorl $2,%r15d + shrl $3,%r12d + rorl $7,%r13d xorl %r14d,%r15d shrl $10,%r14d rorl $17,%r15d - addl %r13d,%r12d - xorl %r15d,%r14d + xorl %r13d,%r12d + xorl %r14d,%r15d + addl 4(%rsp),%r12d addl 32(%rsp),%r12d movl %r8d,%r13d - addl %r14d,%r12d + addl %r15d,%r12d movl %eax,%r14d rorl $14,%r13d movl %r9d,%r15d - movl %r12d,32(%rsp) - rorl $9,%r14d xorl %r8d,%r13d + rorl $9,%r14d xorl %r10d,%r15d - rorl $5,%r13d - addl %r11d,%r12d + movl %r12d,32(%rsp) xorl %eax,%r14d - - addl (%rbp,%rdi,4),%r12d andl %r8d,%r15d - movl %ebx,%r11d + + rorl $5,%r13d + addl %r11d,%r12d + xorl %r10d,%r15d rorl $11,%r14d xorl %r8d,%r13d - xorl %r10d,%r15d + addl %r15d,%r12d - xorl %ecx,%r11d + movl %eax,%r15d + addl (%rbp),%r12d xorl %eax,%r14d - addl %r15d,%r12d - movl %ebx,%r15d + xorl %ebx,%r15d rorl $6,%r13d - andl %eax,%r11d - andl %ecx,%r15d + movl %ebx,%r11d + andl %r15d,%edi rorl $2,%r14d addl %r13d,%r12d - addl %r15d,%r11d + xorl %edi,%r11d addl %r12d,%edx addl %r12d,%r11d - leaq 1(%rdi),%rdi - addl %r14d,%r11d + leaq 4(%rbp),%rbp movl 40(%rsp),%r13d - movl 28(%rsp),%r14d - movl %r13d,%r12d - movl %r14d,%r15d + movl 28(%rsp),%edi - rorl $11,%r12d - xorl %r13d,%r12d - shrl $3,%r13d + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%r11d + movl %edi,%r14d + rorl $2,%edi - rorl $7,%r12d xorl %r12d,%r13d - movl 8(%rsp),%r12d - - rorl $2,%r15d - xorl %r14d,%r15d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%edi shrl $10,%r14d - rorl $17,%r15d - addl %r13d,%r12d - xorl %r15d,%r14d + rorl $17,%edi + xorl %r13d,%r12d + xorl %r14d,%edi + addl 8(%rsp),%r12d addl 36(%rsp),%r12d movl %edx,%r13d - addl %r14d,%r12d + addl %edi,%r12d movl %r11d,%r14d rorl $14,%r13d - movl %r8d,%r15d - movl %r12d,36(%rsp) + movl %r8d,%edi - rorl $9,%r14d xorl %edx,%r13d - xorl %r9d,%r15d + rorl $9,%r14d + xorl %r9d,%edi - rorl $5,%r13d - addl %r10d,%r12d + movl %r12d,36(%rsp) xorl %r11d,%r14d + andl %edx,%edi - addl (%rbp,%rdi,4),%r12d - andl %edx,%r15d - movl %eax,%r10d + rorl $5,%r13d + addl %r10d,%r12d + xorl %r9d,%edi rorl $11,%r14d xorl %edx,%r13d - xorl %r9d,%r15d + addl %edi,%r12d - xorl %ebx,%r10d + movl %r11d,%edi + addl (%rbp),%r12d xorl %r11d,%r14d - addl %r15d,%r12d - movl %eax,%r15d + xorl %eax,%edi rorl $6,%r13d - andl %r11d,%r10d - andl %ebx,%r15d + movl %eax,%r10d + andl %edi,%r15d rorl $2,%r14d addl %r13d,%r12d - addl %r15d,%r10d + xorl %r15d,%r10d addl %r12d,%ecx addl %r12d,%r10d - leaq 1(%rdi),%rdi - addl %r14d,%r10d + leaq 4(%rbp),%rbp movl 44(%rsp),%r13d - movl 32(%rsp),%r14d - movl %r13d,%r12d - movl %r14d,%r15d + movl 32(%rsp),%r15d - rorl $11,%r12d - xorl %r13d,%r12d - shrl $3,%r13d + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%r10d + movl %r15d,%r14d + rorl $2,%r15d - rorl $7,%r12d xorl %r12d,%r13d - movl 12(%rsp),%r12d - - rorl $2,%r15d + shrl $3,%r12d + rorl $7,%r13d xorl %r14d,%r15d shrl $10,%r14d rorl $17,%r15d - addl %r13d,%r12d - xorl %r15d,%r14d + xorl %r13d,%r12d + xorl %r14d,%r15d + addl 12(%rsp),%r12d addl 40(%rsp),%r12d movl %ecx,%r13d - addl %r14d,%r12d + addl %r15d,%r12d movl %r10d,%r14d rorl $14,%r13d movl %edx,%r15d - movl %r12d,40(%rsp) - rorl $9,%r14d xorl %ecx,%r13d + rorl $9,%r14d xorl %r8d,%r15d - rorl $5,%r13d - addl %r9d,%r12d + movl %r12d,40(%rsp) xorl %r10d,%r14d - - addl (%rbp,%rdi,4),%r12d andl %ecx,%r15d - movl %r11d,%r9d + + rorl $5,%r13d + addl %r9d,%r12d + xorl %r8d,%r15d rorl $11,%r14d xorl %ecx,%r13d - xorl %r8d,%r15d + addl %r15d,%r12d - xorl %eax,%r9d + movl %r10d,%r15d + addl (%rbp),%r12d xorl %r10d,%r14d - addl %r15d,%r12d - movl %r11d,%r15d + xorl %r11d,%r15d rorl $6,%r13d - andl %r10d,%r9d - andl %eax,%r15d + movl %r11d,%r9d + andl %r15d,%edi rorl $2,%r14d addl %r13d,%r12d - addl %r15d,%r9d + xorl %edi,%r9d addl %r12d,%ebx addl %r12d,%r9d - leaq 1(%rdi),%rdi - addl %r14d,%r9d + leaq 4(%rbp),%rbp movl 48(%rsp),%r13d - movl 36(%rsp),%r14d - movl %r13d,%r12d - movl %r14d,%r15d + movl 36(%rsp),%edi - rorl $11,%r12d - xorl %r13d,%r12d - shrl $3,%r13d + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%r9d + movl %edi,%r14d + rorl $2,%edi - rorl $7,%r12d xorl %r12d,%r13d - movl 16(%rsp),%r12d - - rorl $2,%r15d - xorl %r14d,%r15d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%edi shrl $10,%r14d - rorl $17,%r15d - addl %r13d,%r12d - xorl %r15d,%r14d + rorl $17,%edi + xorl %r13d,%r12d + xorl %r14d,%edi + addl 16(%rsp),%r12d addl 44(%rsp),%r12d movl %ebx,%r13d - addl %r14d,%r12d + addl %edi,%r12d movl %r9d,%r14d rorl $14,%r13d - movl %ecx,%r15d - movl %r12d,44(%rsp) + movl %ecx,%edi - rorl $9,%r14d xorl %ebx,%r13d - xorl %edx,%r15d + rorl $9,%r14d + xorl %edx,%edi - rorl $5,%r13d - addl %r8d,%r12d + movl %r12d,44(%rsp) xorl %r9d,%r14d + andl %ebx,%edi - addl (%rbp,%rdi,4),%r12d - andl %ebx,%r15d - movl %r10d,%r8d + rorl $5,%r13d + addl %r8d,%r12d + xorl %edx,%edi rorl $11,%r14d xorl %ebx,%r13d - xorl %edx,%r15d + addl %edi,%r12d - xorl %r11d,%r8d + movl %r9d,%edi + addl (%rbp),%r12d xorl %r9d,%r14d - addl %r15d,%r12d - movl %r10d,%r15d + xorl %r10d,%edi rorl $6,%r13d - andl %r9d,%r8d - andl %r11d,%r15d + movl %r10d,%r8d + andl %edi,%r15d rorl $2,%r14d addl %r13d,%r12d - addl %r15d,%r8d + xorl %r15d,%r8d addl %r12d,%eax addl %r12d,%r8d - leaq 1(%rdi),%rdi - addl %r14d,%r8d + leaq 20(%rbp),%rbp movl 52(%rsp),%r13d - movl 40(%rsp),%r14d - movl %r13d,%r12d - movl %r14d,%r15d + movl 40(%rsp),%r15d - rorl $11,%r12d - xorl %r13d,%r12d - shrl $3,%r13d + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%r8d + movl %r15d,%r14d + rorl $2,%r15d - rorl $7,%r12d xorl %r12d,%r13d - movl 20(%rsp),%r12d - - rorl $2,%r15d + shrl $3,%r12d + rorl $7,%r13d xorl %r14d,%r15d shrl $10,%r14d rorl $17,%r15d - addl %r13d,%r12d - xorl %r15d,%r14d + xorl %r13d,%r12d + xorl %r14d,%r15d + addl 20(%rsp),%r12d addl 48(%rsp),%r12d movl %eax,%r13d - addl %r14d,%r12d + addl %r15d,%r12d movl %r8d,%r14d rorl $14,%r13d movl %ebx,%r15d - movl %r12d,48(%rsp) - rorl $9,%r14d xorl %eax,%r13d + rorl $9,%r14d xorl %ecx,%r15d - rorl $5,%r13d - addl %edx,%r12d + movl %r12d,48(%rsp) xorl %r8d,%r14d - - addl (%rbp,%rdi,4),%r12d andl %eax,%r15d - movl %r9d,%edx + + rorl $5,%r13d + addl %edx,%r12d + xorl %ecx,%r15d rorl $11,%r14d xorl %eax,%r13d - xorl %ecx,%r15d + addl %r15d,%r12d - xorl %r10d,%edx + movl %r8d,%r15d + addl (%rbp),%r12d xorl %r8d,%r14d - addl %r15d,%r12d - movl %r9d,%r15d + xorl %r9d,%r15d rorl $6,%r13d - andl %r8d,%edx - andl %r10d,%r15d + movl %r9d,%edx + andl %r15d,%edi rorl $2,%r14d addl %r13d,%r12d - addl %r15d,%edx + xorl %edi,%edx addl %r12d,%r11d addl %r12d,%edx - leaq 1(%rdi),%rdi - addl %r14d,%edx + leaq 4(%rbp),%rbp movl 56(%rsp),%r13d - movl 44(%rsp),%r14d - movl %r13d,%r12d - movl %r14d,%r15d + movl 44(%rsp),%edi - rorl $11,%r12d - xorl %r13d,%r12d - shrl $3,%r13d + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%edx + movl %edi,%r14d + rorl $2,%edi - rorl $7,%r12d xorl %r12d,%r13d - movl 24(%rsp),%r12d - - rorl $2,%r15d - xorl %r14d,%r15d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%edi shrl $10,%r14d - rorl $17,%r15d - addl %r13d,%r12d - xorl %r15d,%r14d + rorl $17,%edi + xorl %r13d,%r12d + xorl %r14d,%edi + addl 24(%rsp),%r12d addl 52(%rsp),%r12d movl %r11d,%r13d - addl %r14d,%r12d + addl %edi,%r12d movl %edx,%r14d rorl $14,%r13d - movl %eax,%r15d - movl %r12d,52(%rsp) + movl %eax,%edi - rorl $9,%r14d xorl %r11d,%r13d - xorl %ebx,%r15d + rorl $9,%r14d + xorl %ebx,%edi - rorl $5,%r13d - addl %ecx,%r12d + movl %r12d,52(%rsp) xorl %edx,%r14d + andl %r11d,%edi - addl (%rbp,%rdi,4),%r12d - andl %r11d,%r15d - movl %r8d,%ecx + rorl $5,%r13d + addl %ecx,%r12d + xorl %ebx,%edi rorl $11,%r14d xorl %r11d,%r13d - xorl %ebx,%r15d + addl %edi,%r12d - xorl %r9d,%ecx + movl %edx,%edi + addl (%rbp),%r12d xorl %edx,%r14d - addl %r15d,%r12d - movl %r8d,%r15d + xorl %r8d,%edi rorl $6,%r13d - andl %edx,%ecx - andl %r9d,%r15d + movl %r8d,%ecx + andl %edi,%r15d rorl $2,%r14d addl %r13d,%r12d - addl %r15d,%ecx + xorl %r15d,%ecx addl %r12d,%r10d addl %r12d,%ecx - leaq 1(%rdi),%rdi - addl %r14d,%ecx + leaq 4(%rbp),%rbp movl 60(%rsp),%r13d - movl 48(%rsp),%r14d - movl %r13d,%r12d - movl %r14d,%r15d + movl 48(%rsp),%r15d - rorl $11,%r12d - xorl %r13d,%r12d - shrl $3,%r13d + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%ecx + movl %r15d,%r14d + rorl $2,%r15d - rorl $7,%r12d xorl %r12d,%r13d - movl 28(%rsp),%r12d - - rorl $2,%r15d + shrl $3,%r12d + rorl $7,%r13d xorl %r14d,%r15d shrl $10,%r14d rorl $17,%r15d - addl %r13d,%r12d - xorl %r15d,%r14d + xorl %r13d,%r12d + xorl %r14d,%r15d + addl 28(%rsp),%r12d addl 56(%rsp),%r12d movl %r10d,%r13d - addl %r14d,%r12d + addl %r15d,%r12d movl %ecx,%r14d rorl $14,%r13d movl %r11d,%r15d - movl %r12d,56(%rsp) - rorl $9,%r14d xorl %r10d,%r13d + rorl $9,%r14d xorl %eax,%r15d - rorl $5,%r13d - addl %ebx,%r12d + movl %r12d,56(%rsp) xorl %ecx,%r14d - - addl (%rbp,%rdi,4),%r12d andl %r10d,%r15d - movl %edx,%ebx + + rorl $5,%r13d + addl %ebx,%r12d + xorl %eax,%r15d rorl $11,%r14d xorl %r10d,%r13d - xorl %eax,%r15d + addl %r15d,%r12d - xorl %r8d,%ebx + movl %ecx,%r15d + addl (%rbp),%r12d xorl %ecx,%r14d - addl %r15d,%r12d - movl %edx,%r15d + xorl %edx,%r15d rorl $6,%r13d - andl %ecx,%ebx - andl %r8d,%r15d + movl %edx,%ebx + andl %r15d,%edi rorl $2,%r14d addl %r13d,%r12d - addl %r15d,%ebx + xorl %edi,%ebx addl %r12d,%r9d addl %r12d,%ebx - leaq 1(%rdi),%rdi - addl %r14d,%ebx + leaq 4(%rbp),%rbp movl 0(%rsp),%r13d - movl 52(%rsp),%r14d - movl %r13d,%r12d - movl %r14d,%r15d + movl 52(%rsp),%edi - rorl $11,%r12d - xorl %r13d,%r12d - shrl $3,%r13d + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%ebx + movl %edi,%r14d + rorl $2,%edi - rorl $7,%r12d xorl %r12d,%r13d - movl 32(%rsp),%r12d - - rorl $2,%r15d - xorl %r14d,%r15d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%edi shrl $10,%r14d - rorl $17,%r15d - addl %r13d,%r12d - xorl %r15d,%r14d + rorl $17,%edi + xorl %r13d,%r12d + xorl %r14d,%edi + addl 32(%rsp),%r12d addl 60(%rsp),%r12d movl %r9d,%r13d - addl %r14d,%r12d + addl %edi,%r12d movl %ebx,%r14d rorl $14,%r13d - movl %r10d,%r15d - movl %r12d,60(%rsp) + movl %r10d,%edi - rorl $9,%r14d xorl %r9d,%r13d - xorl %r11d,%r15d + rorl $9,%r14d + xorl %r11d,%edi - rorl $5,%r13d - addl %eax,%r12d + movl %r12d,60(%rsp) xorl %ebx,%r14d + andl %r9d,%edi - addl (%rbp,%rdi,4),%r12d - andl %r9d,%r15d - movl %ecx,%eax + rorl $5,%r13d + addl %eax,%r12d + xorl %r11d,%edi rorl $11,%r14d xorl %r9d,%r13d - xorl %r11d,%r15d + addl %edi,%r12d - xorl %edx,%eax + movl %ebx,%edi + addl (%rbp),%r12d xorl %ebx,%r14d - addl %r15d,%r12d - movl %ecx,%r15d + xorl %ecx,%edi rorl $6,%r13d - andl %ebx,%eax - andl %edx,%r15d + movl %ecx,%eax + andl %edi,%r15d rorl $2,%r14d addl %r13d,%r12d - addl %r15d,%eax + xorl %r15d,%eax addl %r12d,%r8d addl %r12d,%eax - leaq 1(%rdi),%rdi - addl %r14d,%eax - cmpq $64,%rdi - jb .Lrounds_16_xx + leaq 20(%rbp),%rbp + cmpb $0,3(%rbp) + jnz .Lrounds_16_xx movq 64+0(%rsp),%rdi + addl %r14d,%eax leaq 64(%rsi),%rsi addl 0(%rdi),%eax @@ -1762,18 +1714,3645 @@ sha256_block_data_order: .type K256,@object K256: .long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 +.long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 .long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 +.long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 +.long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 .long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 .long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 +.long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 +.long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc .long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc .long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da +.long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da +.long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 .long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 .long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967 +.long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967 +.long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 .long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 .long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 +.long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 +.long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 .long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 .long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070 +.long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070 +.long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 .long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 .long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 +.long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 +.long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 .long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 .long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 +.long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 + +.long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f +.long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f +.long 0x03020100,0x0b0a0908,0xffffffff,0xffffffff +.long 0x03020100,0x0b0a0908,0xffffffff,0xffffffff +.long 0xffffffff,0xffffffff,0x03020100,0x0b0a0908 +.long 0xffffffff,0xffffffff,0x03020100,0x0b0a0908 +.byte 83,72,65,50,53,54,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 +.type sha256_block_data_order_shaext,@function +.align 64 +sha256_block_data_order_shaext: +_shaext_shortcut: + leaq K256+128(%rip),%rcx + movdqu (%rdi),%xmm1 + movdqu 16(%rdi),%xmm2 + movdqa 512-128(%rcx),%xmm7 + + pshufd $27,%xmm1,%xmm0 + pshufd $177,%xmm1,%xmm1 + pshufd $27,%xmm2,%xmm2 + movdqa %xmm7,%xmm8 +.byte 102,15,58,15,202,8 + punpcklqdq %xmm0,%xmm2 + jmp .Loop_shaext + +.align 16 +.Loop_shaext: + movdqu (%rsi),%xmm3 + movdqu 16(%rsi),%xmm4 + movdqu 32(%rsi),%xmm5 +.byte 102,15,56,0,223 + movdqu 48(%rsi),%xmm6 + + movdqa 0-128(%rcx),%xmm0 + paddd %xmm3,%xmm0 +.byte 102,15,56,0,231 + movdqa %xmm2,%xmm10 +.byte 15,56,203,209 + pshufd $14,%xmm0,%xmm0 + nop + movdqa %xmm1,%xmm9 +.byte 15,56,203,202 + + movdqa 32-128(%rcx),%xmm0 + paddd %xmm4,%xmm0 +.byte 102,15,56,0,239 +.byte 15,56,203,209 + pshufd $14,%xmm0,%xmm0 + leaq 64(%rsi),%rsi +.byte 15,56,204,220 +.byte 15,56,203,202 + + movdqa 64-128(%rcx),%xmm0 + paddd %xmm5,%xmm0 +.byte 102,15,56,0,247 +.byte 15,56,203,209 + pshufd $14,%xmm0,%xmm0 + movdqa %xmm6,%xmm7 +.byte 102,15,58,15,253,4 + nop + paddd %xmm7,%xmm3 +.byte 15,56,204,229 +.byte 15,56,203,202 + + movdqa 96-128(%rcx),%xmm0 + paddd %xmm6,%xmm0 +.byte 15,56,205,222 +.byte 15,56,203,209 + pshufd $14,%xmm0,%xmm0 + movdqa %xmm3,%xmm7 +.byte 102,15,58,15,254,4 + nop + paddd %xmm7,%xmm4 +.byte 15,56,204,238 +.byte 15,56,203,202 + movdqa 128-128(%rcx),%xmm0 + paddd %xmm3,%xmm0 +.byte 15,56,205,227 +.byte 15,56,203,209 + pshufd $14,%xmm0,%xmm0 + movdqa %xmm4,%xmm7 +.byte 102,15,58,15,251,4 + nop + paddd %xmm7,%xmm5 +.byte 15,56,204,243 +.byte 15,56,203,202 + movdqa 160-128(%rcx),%xmm0 + paddd %xmm4,%xmm0 +.byte 15,56,205,236 +.byte 15,56,203,209 + pshufd $14,%xmm0,%xmm0 + movdqa %xmm5,%xmm7 +.byte 102,15,58,15,252,4 + nop + paddd %xmm7,%xmm6 +.byte 15,56,204,220 +.byte 15,56,203,202 + movdqa 192-128(%rcx),%xmm0 + paddd %xmm5,%xmm0 +.byte 15,56,205,245 +.byte 15,56,203,209 + pshufd $14,%xmm0,%xmm0 + movdqa %xmm6,%xmm7 +.byte 102,15,58,15,253,4 + nop + paddd %xmm7,%xmm3 +.byte 15,56,204,229 +.byte 15,56,203,202 + movdqa 224-128(%rcx),%xmm0 + paddd %xmm6,%xmm0 +.byte 15,56,205,222 +.byte 15,56,203,209 + pshufd $14,%xmm0,%xmm0 + movdqa %xmm3,%xmm7 +.byte 102,15,58,15,254,4 + nop + paddd %xmm7,%xmm4 +.byte 15,56,204,238 +.byte 15,56,203,202 + movdqa 256-128(%rcx),%xmm0 + paddd %xmm3,%xmm0 +.byte 15,56,205,227 +.byte 15,56,203,209 + pshufd $14,%xmm0,%xmm0 + movdqa %xmm4,%xmm7 +.byte 102,15,58,15,251,4 + nop + paddd %xmm7,%xmm5 +.byte 15,56,204,243 +.byte 15,56,203,202 + movdqa 288-128(%rcx),%xmm0 + paddd %xmm4,%xmm0 +.byte 15,56,205,236 +.byte 15,56,203,209 + pshufd $14,%xmm0,%xmm0 + movdqa %xmm5,%xmm7 +.byte 102,15,58,15,252,4 + nop + paddd %xmm7,%xmm6 +.byte 15,56,204,220 +.byte 15,56,203,202 + movdqa 320-128(%rcx),%xmm0 + paddd %xmm5,%xmm0 +.byte 15,56,205,245 +.byte 15,56,203,209 + pshufd $14,%xmm0,%xmm0 + movdqa %xmm6,%xmm7 +.byte 102,15,58,15,253,4 + nop + paddd %xmm7,%xmm3 +.byte 15,56,204,229 +.byte 15,56,203,202 + movdqa 352-128(%rcx),%xmm0 + paddd %xmm6,%xmm0 +.byte 15,56,205,222 +.byte 15,56,203,209 + pshufd $14,%xmm0,%xmm0 + movdqa %xmm3,%xmm7 +.byte 102,15,58,15,254,4 + nop + paddd %xmm7,%xmm4 +.byte 15,56,204,238 +.byte 15,56,203,202 + movdqa 384-128(%rcx),%xmm0 + paddd %xmm3,%xmm0 +.byte 15,56,205,227 +.byte 15,56,203,209 + pshufd $14,%xmm0,%xmm0 + movdqa %xmm4,%xmm7 +.byte 102,15,58,15,251,4 + nop + paddd %xmm7,%xmm5 +.byte 15,56,204,243 +.byte 15,56,203,202 + movdqa 416-128(%rcx),%xmm0 + paddd %xmm4,%xmm0 +.byte 15,56,205,236 +.byte 15,56,203,209 + pshufd $14,%xmm0,%xmm0 + movdqa %xmm5,%xmm7 +.byte 102,15,58,15,252,4 +.byte 15,56,203,202 + paddd %xmm7,%xmm6 + + movdqa 448-128(%rcx),%xmm0 + paddd %xmm5,%xmm0 +.byte 15,56,203,209 + pshufd $14,%xmm0,%xmm0 +.byte 15,56,205,245 + movdqa %xmm8,%xmm7 +.byte 15,56,203,202 + + movdqa 480-128(%rcx),%xmm0 + paddd %xmm6,%xmm0 + nop +.byte 15,56,203,209 + pshufd $14,%xmm0,%xmm0 + decq %rdx + nop +.byte 15,56,203,202 + + paddd %xmm10,%xmm2 + paddd %xmm9,%xmm1 + jnz .Loop_shaext + + pshufd $177,%xmm2,%xmm2 + pshufd $27,%xmm1,%xmm7 + pshufd $177,%xmm1,%xmm1 + punpckhqdq %xmm2,%xmm1 +.byte 102,15,58,15,215,8 + + movdqu %xmm1,(%rdi) + movdqu %xmm2,16(%rdi) + .byte 0xf3,0xc3 +.size sha256_block_data_order_shaext,.-sha256_block_data_order_shaext +.type sha256_block_data_order_ssse3,@function +.align 64 +sha256_block_data_order_ssse3: +.Lssse3_shortcut: + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + movq %rsp,%r11 + shlq $4,%rdx + subq $96,%rsp + leaq (%rsi,%rdx,4),%rdx + andq $-64,%rsp + movq %rdi,64+0(%rsp) + movq %rsi,64+8(%rsp) + movq %rdx,64+16(%rsp) + movq %r11,64+24(%rsp) +.Lprologue_ssse3: + + movl 0(%rdi),%eax + movl 4(%rdi),%ebx + movl 8(%rdi),%ecx + movl 12(%rdi),%edx + movl 16(%rdi),%r8d + movl 20(%rdi),%r9d + movl 24(%rdi),%r10d + movl 28(%rdi),%r11d + + + jmp .Lloop_ssse3 +.align 16 +.Lloop_ssse3: + movdqa K256+512(%rip),%xmm7 + movdqu 0(%rsi),%xmm0 + movdqu 16(%rsi),%xmm1 + movdqu 32(%rsi),%xmm2 +.byte 102,15,56,0,199 + movdqu 48(%rsi),%xmm3 + leaq K256(%rip),%rbp +.byte 102,15,56,0,207 + movdqa 0(%rbp),%xmm4 + movdqa 32(%rbp),%xmm5 +.byte 102,15,56,0,215 + paddd %xmm0,%xmm4 + movdqa 64(%rbp),%xmm6 +.byte 102,15,56,0,223 + movdqa 96(%rbp),%xmm7 + paddd %xmm1,%xmm5 + paddd %xmm2,%xmm6 + paddd %xmm3,%xmm7 + movdqa %xmm4,0(%rsp) + movl %eax,%r14d + movdqa %xmm5,16(%rsp) + movl %ebx,%edi + movdqa %xmm6,32(%rsp) + xorl %ecx,%edi + movdqa %xmm7,48(%rsp) + movl %r8d,%r13d + jmp .Lssse3_00_47 + +.align 16 +.Lssse3_00_47: + subq $-128,%rbp + rorl $14,%r13d + movdqa %xmm1,%xmm4 + movl %r14d,%eax + movl %r9d,%r12d + movdqa %xmm3,%xmm7 + rorl $9,%r14d + xorl %r8d,%r13d + xorl %r10d,%r12d + rorl $5,%r13d + xorl %eax,%r14d +.byte 102,15,58,15,224,4 + andl %r8d,%r12d + xorl %r8d,%r13d +.byte 102,15,58,15,250,4 + addl 0(%rsp),%r11d + movl %eax,%r15d + xorl %r10d,%r12d + rorl $11,%r14d + movdqa %xmm4,%xmm5 + xorl %ebx,%r15d + addl %r12d,%r11d + movdqa %xmm4,%xmm6 + rorl $6,%r13d + andl %r15d,%edi + psrld $3,%xmm4 + xorl %eax,%r14d + addl %r13d,%r11d + xorl %ebx,%edi + paddd %xmm7,%xmm0 + rorl $2,%r14d + addl %r11d,%edx + psrld $7,%xmm6 + addl %edi,%r11d + movl %edx,%r13d + pshufd $250,%xmm3,%xmm7 + addl %r11d,%r14d + rorl $14,%r13d + pslld $14,%xmm5 + movl %r14d,%r11d + movl %r8d,%r12d + pxor %xmm6,%xmm4 + rorl $9,%r14d + xorl %edx,%r13d + xorl %r9d,%r12d + rorl $5,%r13d + psrld $11,%xmm6 + xorl %r11d,%r14d + pxor %xmm5,%xmm4 + andl %edx,%r12d + xorl %edx,%r13d + pslld $11,%xmm5 + addl 4(%rsp),%r10d + movl %r11d,%edi + pxor %xmm6,%xmm4 + xorl %r9d,%r12d + rorl $11,%r14d + movdqa %xmm7,%xmm6 + xorl %eax,%edi + addl %r12d,%r10d + pxor %xmm5,%xmm4 + rorl $6,%r13d + andl %edi,%r15d + xorl %r11d,%r14d + psrld $10,%xmm7 + addl %r13d,%r10d + xorl %eax,%r15d + paddd %xmm4,%xmm0 + rorl $2,%r14d + addl %r10d,%ecx + psrlq $17,%xmm6 + addl %r15d,%r10d + movl %ecx,%r13d + addl %r10d,%r14d + pxor %xmm6,%xmm7 + rorl $14,%r13d + movl %r14d,%r10d + movl %edx,%r12d + rorl $9,%r14d + psrlq $2,%xmm6 + xorl %ecx,%r13d + xorl %r8d,%r12d + pxor %xmm6,%xmm7 + rorl $5,%r13d + xorl %r10d,%r14d + andl %ecx,%r12d + pshufd $128,%xmm7,%xmm7 + xorl %ecx,%r13d + addl 8(%rsp),%r9d + movl %r10d,%r15d + psrldq $8,%xmm7 + xorl %r8d,%r12d + rorl $11,%r14d + xorl %r11d,%r15d + addl %r12d,%r9d + rorl $6,%r13d + paddd %xmm7,%xmm0 + andl %r15d,%edi + xorl %r10d,%r14d + addl %r13d,%r9d + pshufd $80,%xmm0,%xmm7 + xorl %r11d,%edi + rorl $2,%r14d + addl %r9d,%ebx + movdqa %xmm7,%xmm6 + addl %edi,%r9d + movl %ebx,%r13d + psrld $10,%xmm7 + addl %r9d,%r14d + rorl $14,%r13d + psrlq $17,%xmm6 + movl %r14d,%r9d + movl %ecx,%r12d + pxor %xmm6,%xmm7 + rorl $9,%r14d + xorl %ebx,%r13d + xorl %edx,%r12d + rorl $5,%r13d + xorl %r9d,%r14d + psrlq $2,%xmm6 + andl %ebx,%r12d + xorl %ebx,%r13d + addl 12(%rsp),%r8d + pxor %xmm6,%xmm7 + movl %r9d,%edi + xorl %edx,%r12d + rorl $11,%r14d + pshufd $8,%xmm7,%xmm7 + xorl %r10d,%edi + addl %r12d,%r8d + movdqa 0(%rbp),%xmm6 + rorl $6,%r13d + andl %edi,%r15d + pslldq $8,%xmm7 + xorl %r9d,%r14d + addl %r13d,%r8d + xorl %r10d,%r15d + paddd %xmm7,%xmm0 + rorl $2,%r14d + addl %r8d,%eax + addl %r15d,%r8d + paddd %xmm0,%xmm6 + movl %eax,%r13d + addl %r8d,%r14d + movdqa %xmm6,0(%rsp) + rorl $14,%r13d + movdqa %xmm2,%xmm4 + movl %r14d,%r8d + movl %ebx,%r12d + movdqa %xmm0,%xmm7 + rorl $9,%r14d + xorl %eax,%r13d + xorl %ecx,%r12d + rorl $5,%r13d + xorl %r8d,%r14d +.byte 102,15,58,15,225,4 + andl %eax,%r12d + xorl %eax,%r13d +.byte 102,15,58,15,251,4 + addl 16(%rsp),%edx + movl %r8d,%r15d + xorl %ecx,%r12d + rorl $11,%r14d + movdqa %xmm4,%xmm5 + xorl %r9d,%r15d + addl %r12d,%edx + movdqa %xmm4,%xmm6 + rorl $6,%r13d + andl %r15d,%edi + psrld $3,%xmm4 + xorl %r8d,%r14d + addl %r13d,%edx + xorl %r9d,%edi + paddd %xmm7,%xmm1 + rorl $2,%r14d + addl %edx,%r11d + psrld $7,%xmm6 + addl %edi,%edx + movl %r11d,%r13d + pshufd $250,%xmm0,%xmm7 + addl %edx,%r14d + rorl $14,%r13d + pslld $14,%xmm5 + movl %r14d,%edx + movl %eax,%r12d + pxor %xmm6,%xmm4 + rorl $9,%r14d + xorl %r11d,%r13d + xorl %ebx,%r12d + rorl $5,%r13d + psrld $11,%xmm6 + xorl %edx,%r14d + pxor %xmm5,%xmm4 + andl %r11d,%r12d + xorl %r11d,%r13d + pslld $11,%xmm5 + addl 20(%rsp),%ecx + movl %edx,%edi + pxor %xmm6,%xmm4 + xorl %ebx,%r12d + rorl $11,%r14d + movdqa %xmm7,%xmm6 + xorl %r8d,%edi + addl %r12d,%ecx + pxor %xmm5,%xmm4 + rorl $6,%r13d + andl %edi,%r15d + xorl %edx,%r14d + psrld $10,%xmm7 + addl %r13d,%ecx + xorl %r8d,%r15d + paddd %xmm4,%xmm1 + rorl $2,%r14d + addl %ecx,%r10d + psrlq $17,%xmm6 + addl %r15d,%ecx + movl %r10d,%r13d + addl %ecx,%r14d + pxor %xmm6,%xmm7 + rorl $14,%r13d + movl %r14d,%ecx + movl %r11d,%r12d + rorl $9,%r14d + psrlq $2,%xmm6 + xorl %r10d,%r13d + xorl %eax,%r12d + pxor %xmm6,%xmm7 + rorl $5,%r13d + xorl %ecx,%r14d + andl %r10d,%r12d + pshufd $128,%xmm7,%xmm7 + xorl %r10d,%r13d + addl 24(%rsp),%ebx + movl %ecx,%r15d + psrldq $8,%xmm7 + xorl %eax,%r12d + rorl $11,%r14d + xorl %edx,%r15d + addl %r12d,%ebx + rorl $6,%r13d + paddd %xmm7,%xmm1 + andl %r15d,%edi + xorl %ecx,%r14d + addl %r13d,%ebx + pshufd $80,%xmm1,%xmm7 + xorl %edx,%edi + rorl $2,%r14d + addl %ebx,%r9d + movdqa %xmm7,%xmm6 + addl %edi,%ebx + movl %r9d,%r13d + psrld $10,%xmm7 + addl %ebx,%r14d + rorl $14,%r13d + psrlq $17,%xmm6 + movl %r14d,%ebx + movl %r10d,%r12d + pxor %xmm6,%xmm7 + rorl $9,%r14d + xorl %r9d,%r13d + xorl %r11d,%r12d + rorl $5,%r13d + xorl %ebx,%r14d + psrlq $2,%xmm6 + andl %r9d,%r12d + xorl %r9d,%r13d + addl 28(%rsp),%eax + pxor %xmm6,%xmm7 + movl %ebx,%edi + xorl %r11d,%r12d + rorl $11,%r14d + pshufd $8,%xmm7,%xmm7 + xorl %ecx,%edi + addl %r12d,%eax + movdqa 32(%rbp),%xmm6 + rorl $6,%r13d + andl %edi,%r15d + pslldq $8,%xmm7 + xorl %ebx,%r14d + addl %r13d,%eax + xorl %ecx,%r15d + paddd %xmm7,%xmm1 + rorl $2,%r14d + addl %eax,%r8d + addl %r15d,%eax + paddd %xmm1,%xmm6 + movl %r8d,%r13d + addl %eax,%r14d + movdqa %xmm6,16(%rsp) + rorl $14,%r13d + movdqa %xmm3,%xmm4 + movl %r14d,%eax + movl %r9d,%r12d + movdqa %xmm1,%xmm7 + rorl $9,%r14d + xorl %r8d,%r13d + xorl %r10d,%r12d + rorl $5,%r13d + xorl %eax,%r14d +.byte 102,15,58,15,226,4 + andl %r8d,%r12d + xorl %r8d,%r13d +.byte 102,15,58,15,248,4 + addl 32(%rsp),%r11d + movl %eax,%r15d + xorl %r10d,%r12d + rorl $11,%r14d + movdqa %xmm4,%xmm5 + xorl %ebx,%r15d + addl %r12d,%r11d + movdqa %xmm4,%xmm6 + rorl $6,%r13d + andl %r15d,%edi + psrld $3,%xmm4 + xorl %eax,%r14d + addl %r13d,%r11d + xorl %ebx,%edi + paddd %xmm7,%xmm2 + rorl $2,%r14d + addl %r11d,%edx + psrld $7,%xmm6 + addl %edi,%r11d + movl %edx,%r13d + pshufd $250,%xmm1,%xmm7 + addl %r11d,%r14d + rorl $14,%r13d + pslld $14,%xmm5 + movl %r14d,%r11d + movl %r8d,%r12d + pxor %xmm6,%xmm4 + rorl $9,%r14d + xorl %edx,%r13d + xorl %r9d,%r12d + rorl $5,%r13d + psrld $11,%xmm6 + xorl %r11d,%r14d + pxor %xmm5,%xmm4 + andl %edx,%r12d + xorl %edx,%r13d + pslld $11,%xmm5 + addl 36(%rsp),%r10d + movl %r11d,%edi + pxor %xmm6,%xmm4 + xorl %r9d,%r12d + rorl $11,%r14d + movdqa %xmm7,%xmm6 + xorl %eax,%edi + addl %r12d,%r10d + pxor %xmm5,%xmm4 + rorl $6,%r13d + andl %edi,%r15d + xorl %r11d,%r14d + psrld $10,%xmm7 + addl %r13d,%r10d + xorl %eax,%r15d + paddd %xmm4,%xmm2 + rorl $2,%r14d + addl %r10d,%ecx + psrlq $17,%xmm6 + addl %r15d,%r10d + movl %ecx,%r13d + addl %r10d,%r14d + pxor %xmm6,%xmm7 + rorl $14,%r13d + movl %r14d,%r10d + movl %edx,%r12d + rorl $9,%r14d + psrlq $2,%xmm6 + xorl %ecx,%r13d + xorl %r8d,%r12d + pxor %xmm6,%xmm7 + rorl $5,%r13d + xorl %r10d,%r14d + andl %ecx,%r12d + pshufd $128,%xmm7,%xmm7 + xorl %ecx,%r13d + addl 40(%rsp),%r9d + movl %r10d,%r15d + psrldq $8,%xmm7 + xorl %r8d,%r12d + rorl $11,%r14d + xorl %r11d,%r15d + addl %r12d,%r9d + rorl $6,%r13d + paddd %xmm7,%xmm2 + andl %r15d,%edi + xorl %r10d,%r14d + addl %r13d,%r9d + pshufd $80,%xmm2,%xmm7 + xorl %r11d,%edi + rorl $2,%r14d + addl %r9d,%ebx + movdqa %xmm7,%xmm6 + addl %edi,%r9d + movl %ebx,%r13d + psrld $10,%xmm7 + addl %r9d,%r14d + rorl $14,%r13d + psrlq $17,%xmm6 + movl %r14d,%r9d + movl %ecx,%r12d + pxor %xmm6,%xmm7 + rorl $9,%r14d + xorl %ebx,%r13d + xorl %edx,%r12d + rorl $5,%r13d + xorl %r9d,%r14d + psrlq $2,%xmm6 + andl %ebx,%r12d + xorl %ebx,%r13d + addl 44(%rsp),%r8d + pxor %xmm6,%xmm7 + movl %r9d,%edi + xorl %edx,%r12d + rorl $11,%r14d + pshufd $8,%xmm7,%xmm7 + xorl %r10d,%edi + addl %r12d,%r8d + movdqa 64(%rbp),%xmm6 + rorl $6,%r13d + andl %edi,%r15d + pslldq $8,%xmm7 + xorl %r9d,%r14d + addl %r13d,%r8d + xorl %r10d,%r15d + paddd %xmm7,%xmm2 + rorl $2,%r14d + addl %r8d,%eax + addl %r15d,%r8d + paddd %xmm2,%xmm6 + movl %eax,%r13d + addl %r8d,%r14d + movdqa %xmm6,32(%rsp) + rorl $14,%r13d + movdqa %xmm0,%xmm4 + movl %r14d,%r8d + movl %ebx,%r12d + movdqa %xmm2,%xmm7 + rorl $9,%r14d + xorl %eax,%r13d + xorl %ecx,%r12d + rorl $5,%r13d + xorl %r8d,%r14d +.byte 102,15,58,15,227,4 + andl %eax,%r12d + xorl %eax,%r13d +.byte 102,15,58,15,249,4 + addl 48(%rsp),%edx + movl %r8d,%r15d + xorl %ecx,%r12d + rorl $11,%r14d + movdqa %xmm4,%xmm5 + xorl %r9d,%r15d + addl %r12d,%edx + movdqa %xmm4,%xmm6 + rorl $6,%r13d + andl %r15d,%edi + psrld $3,%xmm4 + xorl %r8d,%r14d + addl %r13d,%edx + xorl %r9d,%edi + paddd %xmm7,%xmm3 + rorl $2,%r14d + addl %edx,%r11d + psrld $7,%xmm6 + addl %edi,%edx + movl %r11d,%r13d + pshufd $250,%xmm2,%xmm7 + addl %edx,%r14d + rorl $14,%r13d + pslld $14,%xmm5 + movl %r14d,%edx + movl %eax,%r12d + pxor %xmm6,%xmm4 + rorl $9,%r14d + xorl %r11d,%r13d + xorl %ebx,%r12d + rorl $5,%r13d + psrld $11,%xmm6 + xorl %edx,%r14d + pxor %xmm5,%xmm4 + andl %r11d,%r12d + xorl %r11d,%r13d + pslld $11,%xmm5 + addl 52(%rsp),%ecx + movl %edx,%edi + pxor %xmm6,%xmm4 + xorl %ebx,%r12d + rorl $11,%r14d + movdqa %xmm7,%xmm6 + xorl %r8d,%edi + addl %r12d,%ecx + pxor %xmm5,%xmm4 + rorl $6,%r13d + andl %edi,%r15d + xorl %edx,%r14d + psrld $10,%xmm7 + addl %r13d,%ecx + xorl %r8d,%r15d + paddd %xmm4,%xmm3 + rorl $2,%r14d + addl %ecx,%r10d + psrlq $17,%xmm6 + addl %r15d,%ecx + movl %r10d,%r13d + addl %ecx,%r14d + pxor %xmm6,%xmm7 + rorl $14,%r13d + movl %r14d,%ecx + movl %r11d,%r12d + rorl $9,%r14d + psrlq $2,%xmm6 + xorl %r10d,%r13d + xorl %eax,%r12d + pxor %xmm6,%xmm7 + rorl $5,%r13d + xorl %ecx,%r14d + andl %r10d,%r12d + pshufd $128,%xmm7,%xmm7 + xorl %r10d,%r13d + addl 56(%rsp),%ebx + movl %ecx,%r15d + psrldq $8,%xmm7 + xorl %eax,%r12d + rorl $11,%r14d + xorl %edx,%r15d + addl %r12d,%ebx + rorl $6,%r13d + paddd %xmm7,%xmm3 + andl %r15d,%edi + xorl %ecx,%r14d + addl %r13d,%ebx + pshufd $80,%xmm3,%xmm7 + xorl %edx,%edi + rorl $2,%r14d + addl %ebx,%r9d + movdqa %xmm7,%xmm6 + addl %edi,%ebx + movl %r9d,%r13d + psrld $10,%xmm7 + addl %ebx,%r14d + rorl $14,%r13d + psrlq $17,%xmm6 + movl %r14d,%ebx + movl %r10d,%r12d + pxor %xmm6,%xmm7 + rorl $9,%r14d + xorl %r9d,%r13d + xorl %r11d,%r12d + rorl $5,%r13d + xorl %ebx,%r14d + psrlq $2,%xmm6 + andl %r9d,%r12d + xorl %r9d,%r13d + addl 60(%rsp),%eax + pxor %xmm6,%xmm7 + movl %ebx,%edi + xorl %r11d,%r12d + rorl $11,%r14d + pshufd $8,%xmm7,%xmm7 + xorl %ecx,%edi + addl %r12d,%eax + movdqa 96(%rbp),%xmm6 + rorl $6,%r13d + andl %edi,%r15d + pslldq $8,%xmm7 + xorl %ebx,%r14d + addl %r13d,%eax + xorl %ecx,%r15d + paddd %xmm7,%xmm3 + rorl $2,%r14d + addl %eax,%r8d + addl %r15d,%eax + paddd %xmm3,%xmm6 + movl %r8d,%r13d + addl %eax,%r14d + movdqa %xmm6,48(%rsp) + cmpb $0,131(%rbp) + jne .Lssse3_00_47 + rorl $14,%r13d + movl %r14d,%eax + movl %r9d,%r12d + rorl $9,%r14d + xorl %r8d,%r13d + xorl %r10d,%r12d + rorl $5,%r13d + xorl %eax,%r14d + andl %r8d,%r12d + xorl %r8d,%r13d + addl 0(%rsp),%r11d + movl %eax,%r15d + xorl %r10d,%r12d + rorl $11,%r14d + xorl %ebx,%r15d + addl %r12d,%r11d + rorl $6,%r13d + andl %r15d,%edi + xorl %eax,%r14d + addl %r13d,%r11d + xorl %ebx,%edi + rorl $2,%r14d + addl %r11d,%edx + addl %edi,%r11d + movl %edx,%r13d + addl %r11d,%r14d + rorl $14,%r13d + movl %r14d,%r11d + movl %r8d,%r12d + rorl $9,%r14d + xorl %edx,%r13d + xorl %r9d,%r12d + rorl $5,%r13d + xorl %r11d,%r14d + andl %edx,%r12d + xorl %edx,%r13d + addl 4(%rsp),%r10d + movl %r11d,%edi + xorl %r9d,%r12d + rorl $11,%r14d + xorl %eax,%edi + addl %r12d,%r10d + rorl $6,%r13d + andl %edi,%r15d + xorl %r11d,%r14d + addl %r13d,%r10d + xorl %eax,%r15d + rorl $2,%r14d + addl %r10d,%ecx + addl %r15d,%r10d + movl %ecx,%r13d + addl %r10d,%r14d + rorl $14,%r13d + movl %r14d,%r10d + movl %edx,%r12d + rorl $9,%r14d + xorl %ecx,%r13d + xorl %r8d,%r12d + rorl $5,%r13d + xorl %r10d,%r14d + andl %ecx,%r12d + xorl %ecx,%r13d + addl 8(%rsp),%r9d + movl %r10d,%r15d + xorl %r8d,%r12d + rorl $11,%r14d + xorl %r11d,%r15d + addl %r12d,%r9d + rorl $6,%r13d + andl %r15d,%edi + xorl %r10d,%r14d + addl %r13d,%r9d + xorl %r11d,%edi + rorl $2,%r14d + addl %r9d,%ebx + addl %edi,%r9d + movl %ebx,%r13d + addl %r9d,%r14d + rorl $14,%r13d + movl %r14d,%r9d + movl %ecx,%r12d + rorl $9,%r14d + xorl %ebx,%r13d + xorl %edx,%r12d + rorl $5,%r13d + xorl %r9d,%r14d + andl %ebx,%r12d + xorl %ebx,%r13d + addl 12(%rsp),%r8d + movl %r9d,%edi + xorl %edx,%r12d + rorl $11,%r14d + xorl %r10d,%edi + addl %r12d,%r8d + rorl $6,%r13d + andl %edi,%r15d + xorl %r9d,%r14d + addl %r13d,%r8d + xorl %r10d,%r15d + rorl $2,%r14d + addl %r8d,%eax + addl %r15d,%r8d + movl %eax,%r13d + addl %r8d,%r14d + rorl $14,%r13d + movl %r14d,%r8d + movl %ebx,%r12d + rorl $9,%r14d + xorl %eax,%r13d + xorl %ecx,%r12d + rorl $5,%r13d + xorl %r8d,%r14d + andl %eax,%r12d + xorl %eax,%r13d + addl 16(%rsp),%edx + movl %r8d,%r15d + xorl %ecx,%r12d + rorl $11,%r14d + xorl %r9d,%r15d + addl %r12d,%edx + rorl $6,%r13d + andl %r15d,%edi + xorl %r8d,%r14d + addl %r13d,%edx + xorl %r9d,%edi + rorl $2,%r14d + addl %edx,%r11d + addl %edi,%edx + movl %r11d,%r13d + addl %edx,%r14d + rorl $14,%r13d + movl %r14d,%edx + movl %eax,%r12d + rorl $9,%r14d + xorl %r11d,%r13d + xorl %ebx,%r12d + rorl $5,%r13d + xorl %edx,%r14d + andl %r11d,%r12d + xorl %r11d,%r13d + addl 20(%rsp),%ecx + movl %edx,%edi + xorl %ebx,%r12d + rorl $11,%r14d + xorl %r8d,%edi + addl %r12d,%ecx + rorl $6,%r13d + andl %edi,%r15d + xorl %edx,%r14d + addl %r13d,%ecx + xorl %r8d,%r15d + rorl $2,%r14d + addl %ecx,%r10d + addl %r15d,%ecx + movl %r10d,%r13d + addl %ecx,%r14d + rorl $14,%r13d + movl %r14d,%ecx + movl %r11d,%r12d + rorl $9,%r14d + xorl %r10d,%r13d + xorl %eax,%r12d + rorl $5,%r13d + xorl %ecx,%r14d + andl %r10d,%r12d + xorl %r10d,%r13d + addl 24(%rsp),%ebx + movl %ecx,%r15d + xorl %eax,%r12d + rorl $11,%r14d + xorl %edx,%r15d + addl %r12d,%ebx + rorl $6,%r13d + andl %r15d,%edi + xorl %ecx,%r14d + addl %r13d,%ebx + xorl %edx,%edi + rorl $2,%r14d + addl %ebx,%r9d + addl %edi,%ebx + movl %r9d,%r13d + addl %ebx,%r14d + rorl $14,%r13d + movl %r14d,%ebx + movl %r10d,%r12d + rorl $9,%r14d + xorl %r9d,%r13d + xorl %r11d,%r12d + rorl $5,%r13d + xorl %ebx,%r14d + andl %r9d,%r12d + xorl %r9d,%r13d + addl 28(%rsp),%eax + movl %ebx,%edi + xorl %r11d,%r12d + rorl $11,%r14d + xorl %ecx,%edi + addl %r12d,%eax + rorl $6,%r13d + andl %edi,%r15d + xorl %ebx,%r14d + addl %r13d,%eax + xorl %ecx,%r15d + rorl $2,%r14d + addl %eax,%r8d + addl %r15d,%eax + movl %r8d,%r13d + addl %eax,%r14d + rorl $14,%r13d + movl %r14d,%eax + movl %r9d,%r12d + rorl $9,%r14d + xorl %r8d,%r13d + xorl %r10d,%r12d + rorl $5,%r13d + xorl %eax,%r14d + andl %r8d,%r12d + xorl %r8d,%r13d + addl 32(%rsp),%r11d + movl %eax,%r15d + xorl %r10d,%r12d + rorl $11,%r14d + xorl %ebx,%r15d + addl %r12d,%r11d + rorl $6,%r13d + andl %r15d,%edi + xorl %eax,%r14d + addl %r13d,%r11d + xorl %ebx,%edi + rorl $2,%r14d + addl %r11d,%edx + addl %edi,%r11d + movl %edx,%r13d + addl %r11d,%r14d + rorl $14,%r13d + movl %r14d,%r11d + movl %r8d,%r12d + rorl $9,%r14d + xorl %edx,%r13d + xorl %r9d,%r12d + rorl $5,%r13d + xorl %r11d,%r14d + andl %edx,%r12d + xorl %edx,%r13d + addl 36(%rsp),%r10d + movl %r11d,%edi + xorl %r9d,%r12d + rorl $11,%r14d + xorl %eax,%edi + addl %r12d,%r10d + rorl $6,%r13d + andl %edi,%r15d + xorl %r11d,%r14d + addl %r13d,%r10d + xorl %eax,%r15d + rorl $2,%r14d + addl %r10d,%ecx + addl %r15d,%r10d + movl %ecx,%r13d + addl %r10d,%r14d + rorl $14,%r13d + movl %r14d,%r10d + movl %edx,%r12d + rorl $9,%r14d + xorl %ecx,%r13d + xorl %r8d,%r12d + rorl $5,%r13d + xorl %r10d,%r14d + andl %ecx,%r12d + xorl %ecx,%r13d + addl 40(%rsp),%r9d + movl %r10d,%r15d + xorl %r8d,%r12d + rorl $11,%r14d + xorl %r11d,%r15d + addl %r12d,%r9d + rorl $6,%r13d + andl %r15d,%edi + xorl %r10d,%r14d + addl %r13d,%r9d + xorl %r11d,%edi + rorl $2,%r14d + addl %r9d,%ebx + addl %edi,%r9d + movl %ebx,%r13d + addl %r9d,%r14d + rorl $14,%r13d + movl %r14d,%r9d + movl %ecx,%r12d + rorl $9,%r14d + xorl %ebx,%r13d + xorl %edx,%r12d + rorl $5,%r13d + xorl %r9d,%r14d + andl %ebx,%r12d + xorl %ebx,%r13d + addl 44(%rsp),%r8d + movl %r9d,%edi + xorl %edx,%r12d + rorl $11,%r14d + xorl %r10d,%edi + addl %r12d,%r8d + rorl $6,%r13d + andl %edi,%r15d + xorl %r9d,%r14d + addl %r13d,%r8d + xorl %r10d,%r15d + rorl $2,%r14d + addl %r8d,%eax + addl %r15d,%r8d + movl %eax,%r13d + addl %r8d,%r14d + rorl $14,%r13d + movl %r14d,%r8d + movl %ebx,%r12d + rorl $9,%r14d + xorl %eax,%r13d + xorl %ecx,%r12d + rorl $5,%r13d + xorl %r8d,%r14d + andl %eax,%r12d + xorl %eax,%r13d + addl 48(%rsp),%edx + movl %r8d,%r15d + xorl %ecx,%r12d + rorl $11,%r14d + xorl %r9d,%r15d + addl %r12d,%edx + rorl $6,%r13d + andl %r15d,%edi + xorl %r8d,%r14d + addl %r13d,%edx + xorl %r9d,%edi + rorl $2,%r14d + addl %edx,%r11d + addl %edi,%edx + movl %r11d,%r13d + addl %edx,%r14d + rorl $14,%r13d + movl %r14d,%edx + movl %eax,%r12d + rorl $9,%r14d + xorl %r11d,%r13d + xorl %ebx,%r12d + rorl $5,%r13d + xorl %edx,%r14d + andl %r11d,%r12d + xorl %r11d,%r13d + addl 52(%rsp),%ecx + movl %edx,%edi + xorl %ebx,%r12d + rorl $11,%r14d + xorl %r8d,%edi + addl %r12d,%ecx + rorl $6,%r13d + andl %edi,%r15d + xorl %edx,%r14d + addl %r13d,%ecx + xorl %r8d,%r15d + rorl $2,%r14d + addl %ecx,%r10d + addl %r15d,%ecx + movl %r10d,%r13d + addl %ecx,%r14d + rorl $14,%r13d + movl %r14d,%ecx + movl %r11d,%r12d + rorl $9,%r14d + xorl %r10d,%r13d + xorl %eax,%r12d + rorl $5,%r13d + xorl %ecx,%r14d + andl %r10d,%r12d + xorl %r10d,%r13d + addl 56(%rsp),%ebx + movl %ecx,%r15d + xorl %eax,%r12d + rorl $11,%r14d + xorl %edx,%r15d + addl %r12d,%ebx + rorl $6,%r13d + andl %r15d,%edi + xorl %ecx,%r14d + addl %r13d,%ebx + xorl %edx,%edi + rorl $2,%r14d + addl %ebx,%r9d + addl %edi,%ebx + movl %r9d,%r13d + addl %ebx,%r14d + rorl $14,%r13d + movl %r14d,%ebx + movl %r10d,%r12d + rorl $9,%r14d + xorl %r9d,%r13d + xorl %r11d,%r12d + rorl $5,%r13d + xorl %ebx,%r14d + andl %r9d,%r12d + xorl %r9d,%r13d + addl 60(%rsp),%eax + movl %ebx,%edi + xorl %r11d,%r12d + rorl $11,%r14d + xorl %ecx,%edi + addl %r12d,%eax + rorl $6,%r13d + andl %edi,%r15d + xorl %ebx,%r14d + addl %r13d,%eax + xorl %ecx,%r15d + rorl $2,%r14d + addl %eax,%r8d + addl %r15d,%eax + movl %r8d,%r13d + addl %eax,%r14d + movq 64+0(%rsp),%rdi + movl %r14d,%eax + + addl 0(%rdi),%eax + leaq 64(%rsi),%rsi + addl 4(%rdi),%ebx + addl 8(%rdi),%ecx + addl 12(%rdi),%edx + addl 16(%rdi),%r8d + addl 20(%rdi),%r9d + addl 24(%rdi),%r10d + addl 28(%rdi),%r11d + + cmpq 64+16(%rsp),%rsi + + movl %eax,0(%rdi) + movl %ebx,4(%rdi) + movl %ecx,8(%rdi) + movl %edx,12(%rdi) + movl %r8d,16(%rdi) + movl %r9d,20(%rdi) + movl %r10d,24(%rdi) + movl %r11d,28(%rdi) + jb .Lloop_ssse3 + + movq 64+24(%rsp),%rsi + movq (%rsi),%r15 + movq 8(%rsi),%r14 + movq 16(%rsi),%r13 + movq 24(%rsi),%r12 + movq 32(%rsi),%rbp + movq 40(%rsi),%rbx + leaq 48(%rsi),%rsp +.Lepilogue_ssse3: + .byte 0xf3,0xc3 +.size sha256_block_data_order_ssse3,.-sha256_block_data_order_ssse3 +.type sha256_block_data_order_avx,@function +.align 64 +sha256_block_data_order_avx: +.Lavx_shortcut: + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + movq %rsp,%r11 + shlq $4,%rdx + subq $96,%rsp + leaq (%rsi,%rdx,4),%rdx + andq $-64,%rsp + movq %rdi,64+0(%rsp) + movq %rsi,64+8(%rsp) + movq %rdx,64+16(%rsp) + movq %r11,64+24(%rsp) +.Lprologue_avx: + + vzeroupper + movl 0(%rdi),%eax + movl 4(%rdi),%ebx + movl 8(%rdi),%ecx + movl 12(%rdi),%edx + movl 16(%rdi),%r8d + movl 20(%rdi),%r9d + movl 24(%rdi),%r10d + movl 28(%rdi),%r11d + vmovdqa K256+512+32(%rip),%xmm8 + vmovdqa K256+512+64(%rip),%xmm9 + jmp .Lloop_avx +.align 16 +.Lloop_avx: + vmovdqa K256+512(%rip),%xmm7 + vmovdqu 0(%rsi),%xmm0 + vmovdqu 16(%rsi),%xmm1 + vmovdqu 32(%rsi),%xmm2 + vmovdqu 48(%rsi),%xmm3 + vpshufb %xmm7,%xmm0,%xmm0 + leaq K256(%rip),%rbp + vpshufb %xmm7,%xmm1,%xmm1 + vpshufb %xmm7,%xmm2,%xmm2 + vpaddd 0(%rbp),%xmm0,%xmm4 + vpshufb %xmm7,%xmm3,%xmm3 + vpaddd 32(%rbp),%xmm1,%xmm5 + vpaddd 64(%rbp),%xmm2,%xmm6 + vpaddd 96(%rbp),%xmm3,%xmm7 + vmovdqa %xmm4,0(%rsp) + movl %eax,%r14d + vmovdqa %xmm5,16(%rsp) + movl %ebx,%edi + vmovdqa %xmm6,32(%rsp) + xorl %ecx,%edi + vmovdqa %xmm7,48(%rsp) + movl %r8d,%r13d + jmp .Lavx_00_47 + +.align 16 +.Lavx_00_47: + subq $-128,%rbp + vpalignr $4,%xmm0,%xmm1,%xmm4 + shrdl $14,%r13d,%r13d + movl %r14d,%eax + movl %r9d,%r12d + vpalignr $4,%xmm2,%xmm3,%xmm7 + shrdl $9,%r14d,%r14d + xorl %r8d,%r13d + xorl %r10d,%r12d + vpsrld $7,%xmm4,%xmm6 + shrdl $5,%r13d,%r13d + xorl %eax,%r14d + andl %r8d,%r12d + vpaddd %xmm7,%xmm0,%xmm0 + xorl %r8d,%r13d + addl 0(%rsp),%r11d + movl %eax,%r15d + vpsrld $3,%xmm4,%xmm7 + xorl %r10d,%r12d + shrdl $11,%r14d,%r14d + xorl %ebx,%r15d + vpslld $14,%xmm4,%xmm5 + addl %r12d,%r11d + shrdl $6,%r13d,%r13d + andl %r15d,%edi + vpxor %xmm6,%xmm7,%xmm4 + xorl %eax,%r14d + addl %r13d,%r11d + xorl %ebx,%edi + vpshufd $250,%xmm3,%xmm7 + shrdl $2,%r14d,%r14d + addl %r11d,%edx + addl %edi,%r11d + vpsrld $11,%xmm6,%xmm6 + movl %edx,%r13d + addl %r11d,%r14d + shrdl $14,%r13d,%r13d + vpxor %xmm5,%xmm4,%xmm4 + movl %r14d,%r11d + movl %r8d,%r12d + shrdl $9,%r14d,%r14d + vpslld $11,%xmm5,%xmm5 + xorl %edx,%r13d + xorl %r9d,%r12d + shrdl $5,%r13d,%r13d + vpxor %xmm6,%xmm4,%xmm4 + xorl %r11d,%r14d + andl %edx,%r12d + xorl %edx,%r13d + vpsrld $10,%xmm7,%xmm6 + addl 4(%rsp),%r10d + movl %r11d,%edi + xorl %r9d,%r12d + vpxor %xmm5,%xmm4,%xmm4 + shrdl $11,%r14d,%r14d + xorl %eax,%edi + addl %r12d,%r10d + vpsrlq $17,%xmm7,%xmm7 + shrdl $6,%r13d,%r13d + andl %edi,%r15d + xorl %r11d,%r14d + vpaddd %xmm4,%xmm0,%xmm0 + addl %r13d,%r10d + xorl %eax,%r15d + shrdl $2,%r14d,%r14d + vpxor %xmm7,%xmm6,%xmm6 + addl %r10d,%ecx + addl %r15d,%r10d + movl %ecx,%r13d + vpsrlq $2,%xmm7,%xmm7 + addl %r10d,%r14d + shrdl $14,%r13d,%r13d + movl %r14d,%r10d + vpxor %xmm7,%xmm6,%xmm6 + movl %edx,%r12d + shrdl $9,%r14d,%r14d + xorl %ecx,%r13d + vpshufb %xmm8,%xmm6,%xmm6 + xorl %r8d,%r12d + shrdl $5,%r13d,%r13d + xorl %r10d,%r14d + vpaddd %xmm6,%xmm0,%xmm0 + andl %ecx,%r12d + xorl %ecx,%r13d + addl 8(%rsp),%r9d + vpshufd $80,%xmm0,%xmm7 + movl %r10d,%r15d + xorl %r8d,%r12d + shrdl $11,%r14d,%r14d + vpsrld $10,%xmm7,%xmm6 + xorl %r11d,%r15d + addl %r12d,%r9d + shrdl $6,%r13d,%r13d + vpsrlq $17,%xmm7,%xmm7 + andl %r15d,%edi + xorl %r10d,%r14d + addl %r13d,%r9d + vpxor %xmm7,%xmm6,%xmm6 + xorl %r11d,%edi + shrdl $2,%r14d,%r14d + addl %r9d,%ebx + vpsrlq $2,%xmm7,%xmm7 + addl %edi,%r9d + movl %ebx,%r13d + addl %r9d,%r14d + vpxor %xmm7,%xmm6,%xmm6 + shrdl $14,%r13d,%r13d + movl %r14d,%r9d + movl %ecx,%r12d + vpshufb %xmm9,%xmm6,%xmm6 + shrdl $9,%r14d,%r14d + xorl %ebx,%r13d + xorl %edx,%r12d + vpaddd %xmm6,%xmm0,%xmm0 + shrdl $5,%r13d,%r13d + xorl %r9d,%r14d + andl %ebx,%r12d + vpaddd 0(%rbp),%xmm0,%xmm6 + xorl %ebx,%r13d + addl 12(%rsp),%r8d + movl %r9d,%edi + xorl %edx,%r12d + shrdl $11,%r14d,%r14d + xorl %r10d,%edi + addl %r12d,%r8d + shrdl $6,%r13d,%r13d + andl %edi,%r15d + xorl %r9d,%r14d + addl %r13d,%r8d + xorl %r10d,%r15d + shrdl $2,%r14d,%r14d + addl %r8d,%eax + addl %r15d,%r8d + movl %eax,%r13d + addl %r8d,%r14d + vmovdqa %xmm6,0(%rsp) + vpalignr $4,%xmm1,%xmm2,%xmm4 + shrdl $14,%r13d,%r13d + movl %r14d,%r8d + movl %ebx,%r12d + vpalignr $4,%xmm3,%xmm0,%xmm7 + shrdl $9,%r14d,%r14d + xorl %eax,%r13d + xorl %ecx,%r12d + vpsrld $7,%xmm4,%xmm6 + shrdl $5,%r13d,%r13d + xorl %r8d,%r14d + andl %eax,%r12d + vpaddd %xmm7,%xmm1,%xmm1 + xorl %eax,%r13d + addl 16(%rsp),%edx + movl %r8d,%r15d + vpsrld $3,%xmm4,%xmm7 + xorl %ecx,%r12d + shrdl $11,%r14d,%r14d + xorl %r9d,%r15d + vpslld $14,%xmm4,%xmm5 + addl %r12d,%edx + shrdl $6,%r13d,%r13d + andl %r15d,%edi + vpxor %xmm6,%xmm7,%xmm4 + xorl %r8d,%r14d + addl %r13d,%edx + xorl %r9d,%edi + vpshufd $250,%xmm0,%xmm7 + shrdl $2,%r14d,%r14d + addl %edx,%r11d + addl %edi,%edx + vpsrld $11,%xmm6,%xmm6 + movl %r11d,%r13d + addl %edx,%r14d + shrdl $14,%r13d,%r13d + vpxor %xmm5,%xmm4,%xmm4 + movl %r14d,%edx + movl %eax,%r12d + shrdl $9,%r14d,%r14d + vpslld $11,%xmm5,%xmm5 + xorl %r11d,%r13d + xorl %ebx,%r12d + shrdl $5,%r13d,%r13d + vpxor %xmm6,%xmm4,%xmm4 + xorl %edx,%r14d + andl %r11d,%r12d + xorl %r11d,%r13d + vpsrld $10,%xmm7,%xmm6 + addl 20(%rsp),%ecx + movl %edx,%edi + xorl %ebx,%r12d + vpxor %xmm5,%xmm4,%xmm4 + shrdl $11,%r14d,%r14d + xorl %r8d,%edi + addl %r12d,%ecx + vpsrlq $17,%xmm7,%xmm7 + shrdl $6,%r13d,%r13d + andl %edi,%r15d + xorl %edx,%r14d + vpaddd %xmm4,%xmm1,%xmm1 + addl %r13d,%ecx + xorl %r8d,%r15d + shrdl $2,%r14d,%r14d + vpxor %xmm7,%xmm6,%xmm6 + addl %ecx,%r10d + addl %r15d,%ecx + movl %r10d,%r13d + vpsrlq $2,%xmm7,%xmm7 + addl %ecx,%r14d + shrdl $14,%r13d,%r13d + movl %r14d,%ecx + vpxor %xmm7,%xmm6,%xmm6 + movl %r11d,%r12d + shrdl $9,%r14d,%r14d + xorl %r10d,%r13d + vpshufb %xmm8,%xmm6,%xmm6 + xorl %eax,%r12d + shrdl $5,%r13d,%r13d + xorl %ecx,%r14d + vpaddd %xmm6,%xmm1,%xmm1 + andl %r10d,%r12d + xorl %r10d,%r13d + addl 24(%rsp),%ebx + vpshufd $80,%xmm1,%xmm7 + movl %ecx,%r15d + xorl %eax,%r12d + shrdl $11,%r14d,%r14d + vpsrld $10,%xmm7,%xmm6 + xorl %edx,%r15d + addl %r12d,%ebx + shrdl $6,%r13d,%r13d + vpsrlq $17,%xmm7,%xmm7 + andl %r15d,%edi + xorl %ecx,%r14d + addl %r13d,%ebx + vpxor %xmm7,%xmm6,%xmm6 + xorl %edx,%edi + shrdl $2,%r14d,%r14d + addl %ebx,%r9d + vpsrlq $2,%xmm7,%xmm7 + addl %edi,%ebx + movl %r9d,%r13d + addl %ebx,%r14d + vpxor %xmm7,%xmm6,%xmm6 + shrdl $14,%r13d,%r13d + movl %r14d,%ebx + movl %r10d,%r12d + vpshufb %xmm9,%xmm6,%xmm6 + shrdl $9,%r14d,%r14d + xorl %r9d,%r13d + xorl %r11d,%r12d + vpaddd %xmm6,%xmm1,%xmm1 + shrdl $5,%r13d,%r13d + xorl %ebx,%r14d + andl %r9d,%r12d + vpaddd 32(%rbp),%xmm1,%xmm6 + xorl %r9d,%r13d + addl 28(%rsp),%eax + movl %ebx,%edi + xorl %r11d,%r12d + shrdl $11,%r14d,%r14d + xorl %ecx,%edi + addl %r12d,%eax + shrdl $6,%r13d,%r13d + andl %edi,%r15d + xorl %ebx,%r14d + addl %r13d,%eax + xorl %ecx,%r15d + shrdl $2,%r14d,%r14d + addl %eax,%r8d + addl %r15d,%eax + movl %r8d,%r13d + addl %eax,%r14d + vmovdqa %xmm6,16(%rsp) + vpalignr $4,%xmm2,%xmm3,%xmm4 + shrdl $14,%r13d,%r13d + movl %r14d,%eax + movl %r9d,%r12d + vpalignr $4,%xmm0,%xmm1,%xmm7 + shrdl $9,%r14d,%r14d + xorl %r8d,%r13d + xorl %r10d,%r12d + vpsrld $7,%xmm4,%xmm6 + shrdl $5,%r13d,%r13d + xorl %eax,%r14d + andl %r8d,%r12d + vpaddd %xmm7,%xmm2,%xmm2 + xorl %r8d,%r13d + addl 32(%rsp),%r11d + movl %eax,%r15d + vpsrld $3,%xmm4,%xmm7 + xorl %r10d,%r12d + shrdl $11,%r14d,%r14d + xorl %ebx,%r15d + vpslld $14,%xmm4,%xmm5 + addl %r12d,%r11d + shrdl $6,%r13d,%r13d + andl %r15d,%edi + vpxor %xmm6,%xmm7,%xmm4 + xorl %eax,%r14d + addl %r13d,%r11d + xorl %ebx,%edi + vpshufd $250,%xmm1,%xmm7 + shrdl $2,%r14d,%r14d + addl %r11d,%edx + addl %edi,%r11d + vpsrld $11,%xmm6,%xmm6 + movl %edx,%r13d + addl %r11d,%r14d + shrdl $14,%r13d,%r13d + vpxor %xmm5,%xmm4,%xmm4 + movl %r14d,%r11d + movl %r8d,%r12d + shrdl $9,%r14d,%r14d + vpslld $11,%xmm5,%xmm5 + xorl %edx,%r13d + xorl %r9d,%r12d + shrdl $5,%r13d,%r13d + vpxor %xmm6,%xmm4,%xmm4 + xorl %r11d,%r14d + andl %edx,%r12d + xorl %edx,%r13d + vpsrld $10,%xmm7,%xmm6 + addl 36(%rsp),%r10d + movl %r11d,%edi + xorl %r9d,%r12d + vpxor %xmm5,%xmm4,%xmm4 + shrdl $11,%r14d,%r14d + xorl %eax,%edi + addl %r12d,%r10d + vpsrlq $17,%xmm7,%xmm7 + shrdl $6,%r13d,%r13d + andl %edi,%r15d + xorl %r11d,%r14d + vpaddd %xmm4,%xmm2,%xmm2 + addl %r13d,%r10d + xorl %eax,%r15d + shrdl $2,%r14d,%r14d + vpxor %xmm7,%xmm6,%xmm6 + addl %r10d,%ecx + addl %r15d,%r10d + movl %ecx,%r13d + vpsrlq $2,%xmm7,%xmm7 + addl %r10d,%r14d + shrdl $14,%r13d,%r13d + movl %r14d,%r10d + vpxor %xmm7,%xmm6,%xmm6 + movl %edx,%r12d + shrdl $9,%r14d,%r14d + xorl %ecx,%r13d + vpshufb %xmm8,%xmm6,%xmm6 + xorl %r8d,%r12d + shrdl $5,%r13d,%r13d + xorl %r10d,%r14d + vpaddd %xmm6,%xmm2,%xmm2 + andl %ecx,%r12d + xorl %ecx,%r13d + addl 40(%rsp),%r9d + vpshufd $80,%xmm2,%xmm7 + movl %r10d,%r15d + xorl %r8d,%r12d + shrdl $11,%r14d,%r14d + vpsrld $10,%xmm7,%xmm6 + xorl %r11d,%r15d + addl %r12d,%r9d + shrdl $6,%r13d,%r13d + vpsrlq $17,%xmm7,%xmm7 + andl %r15d,%edi + xorl %r10d,%r14d + addl %r13d,%r9d + vpxor %xmm7,%xmm6,%xmm6 + xorl %r11d,%edi + shrdl $2,%r14d,%r14d + addl %r9d,%ebx + vpsrlq $2,%xmm7,%xmm7 + addl %edi,%r9d + movl %ebx,%r13d + addl %r9d,%r14d + vpxor %xmm7,%xmm6,%xmm6 + shrdl $14,%r13d,%r13d + movl %r14d,%r9d + movl %ecx,%r12d + vpshufb %xmm9,%xmm6,%xmm6 + shrdl $9,%r14d,%r14d + xorl %ebx,%r13d + xorl %edx,%r12d + vpaddd %xmm6,%xmm2,%xmm2 + shrdl $5,%r13d,%r13d + xorl %r9d,%r14d + andl %ebx,%r12d + vpaddd 64(%rbp),%xmm2,%xmm6 + xorl %ebx,%r13d + addl 44(%rsp),%r8d + movl %r9d,%edi + xorl %edx,%r12d + shrdl $11,%r14d,%r14d + xorl %r10d,%edi + addl %r12d,%r8d + shrdl $6,%r13d,%r13d + andl %edi,%r15d + xorl %r9d,%r14d + addl %r13d,%r8d + xorl %r10d,%r15d + shrdl $2,%r14d,%r14d + addl %r8d,%eax + addl %r15d,%r8d + movl %eax,%r13d + addl %r8d,%r14d + vmovdqa %xmm6,32(%rsp) + vpalignr $4,%xmm3,%xmm0,%xmm4 + shrdl $14,%r13d,%r13d + movl %r14d,%r8d + movl %ebx,%r12d + vpalignr $4,%xmm1,%xmm2,%xmm7 + shrdl $9,%r14d,%r14d + xorl %eax,%r13d + xorl %ecx,%r12d + vpsrld $7,%xmm4,%xmm6 + shrdl $5,%r13d,%r13d + xorl %r8d,%r14d + andl %eax,%r12d + vpaddd %xmm7,%xmm3,%xmm3 + xorl %eax,%r13d + addl 48(%rsp),%edx + movl %r8d,%r15d + vpsrld $3,%xmm4,%xmm7 + xorl %ecx,%r12d + shrdl $11,%r14d,%r14d + xorl %r9d,%r15d + vpslld $14,%xmm4,%xmm5 + addl %r12d,%edx + shrdl $6,%r13d,%r13d + andl %r15d,%edi + vpxor %xmm6,%xmm7,%xmm4 + xorl %r8d,%r14d + addl %r13d,%edx + xorl %r9d,%edi + vpshufd $250,%xmm2,%xmm7 + shrdl $2,%r14d,%r14d + addl %edx,%r11d + addl %edi,%edx + vpsrld $11,%xmm6,%xmm6 + movl %r11d,%r13d + addl %edx,%r14d + shrdl $14,%r13d,%r13d + vpxor %xmm5,%xmm4,%xmm4 + movl %r14d,%edx + movl %eax,%r12d + shrdl $9,%r14d,%r14d + vpslld $11,%xmm5,%xmm5 + xorl %r11d,%r13d + xorl %ebx,%r12d + shrdl $5,%r13d,%r13d + vpxor %xmm6,%xmm4,%xmm4 + xorl %edx,%r14d + andl %r11d,%r12d + xorl %r11d,%r13d + vpsrld $10,%xmm7,%xmm6 + addl 52(%rsp),%ecx + movl %edx,%edi + xorl %ebx,%r12d + vpxor %xmm5,%xmm4,%xmm4 + shrdl $11,%r14d,%r14d + xorl %r8d,%edi + addl %r12d,%ecx + vpsrlq $17,%xmm7,%xmm7 + shrdl $6,%r13d,%r13d + andl %edi,%r15d + xorl %edx,%r14d + vpaddd %xmm4,%xmm3,%xmm3 + addl %r13d,%ecx + xorl %r8d,%r15d + shrdl $2,%r14d,%r14d + vpxor %xmm7,%xmm6,%xmm6 + addl %ecx,%r10d + addl %r15d,%ecx + movl %r10d,%r13d + vpsrlq $2,%xmm7,%xmm7 + addl %ecx,%r14d + shrdl $14,%r13d,%r13d + movl %r14d,%ecx + vpxor %xmm7,%xmm6,%xmm6 + movl %r11d,%r12d + shrdl $9,%r14d,%r14d + xorl %r10d,%r13d + vpshufb %xmm8,%xmm6,%xmm6 + xorl %eax,%r12d + shrdl $5,%r13d,%r13d + xorl %ecx,%r14d + vpaddd %xmm6,%xmm3,%xmm3 + andl %r10d,%r12d + xorl %r10d,%r13d + addl 56(%rsp),%ebx + vpshufd $80,%xmm3,%xmm7 + movl %ecx,%r15d + xorl %eax,%r12d + shrdl $11,%r14d,%r14d + vpsrld $10,%xmm7,%xmm6 + xorl %edx,%r15d + addl %r12d,%ebx + shrdl $6,%r13d,%r13d + vpsrlq $17,%xmm7,%xmm7 + andl %r15d,%edi + xorl %ecx,%r14d + addl %r13d,%ebx + vpxor %xmm7,%xmm6,%xmm6 + xorl %edx,%edi + shrdl $2,%r14d,%r14d + addl %ebx,%r9d + vpsrlq $2,%xmm7,%xmm7 + addl %edi,%ebx + movl %r9d,%r13d + addl %ebx,%r14d + vpxor %xmm7,%xmm6,%xmm6 + shrdl $14,%r13d,%r13d + movl %r14d,%ebx + movl %r10d,%r12d + vpshufb %xmm9,%xmm6,%xmm6 + shrdl $9,%r14d,%r14d + xorl %r9d,%r13d + xorl %r11d,%r12d + vpaddd %xmm6,%xmm3,%xmm3 + shrdl $5,%r13d,%r13d + xorl %ebx,%r14d + andl %r9d,%r12d + vpaddd 96(%rbp),%xmm3,%xmm6 + xorl %r9d,%r13d + addl 60(%rsp),%eax + movl %ebx,%edi + xorl %r11d,%r12d + shrdl $11,%r14d,%r14d + xorl %ecx,%edi + addl %r12d,%eax + shrdl $6,%r13d,%r13d + andl %edi,%r15d + xorl %ebx,%r14d + addl %r13d,%eax + xorl %ecx,%r15d + shrdl $2,%r14d,%r14d + addl %eax,%r8d + addl %r15d,%eax + movl %r8d,%r13d + addl %eax,%r14d + vmovdqa %xmm6,48(%rsp) + cmpb $0,131(%rbp) + jne .Lavx_00_47 + shrdl $14,%r13d,%r13d + movl %r14d,%eax + movl %r9d,%r12d + shrdl $9,%r14d,%r14d + xorl %r8d,%r13d + xorl %r10d,%r12d + shrdl $5,%r13d,%r13d + xorl %eax,%r14d + andl %r8d,%r12d + xorl %r8d,%r13d + addl 0(%rsp),%r11d + movl %eax,%r15d + xorl %r10d,%r12d + shrdl $11,%r14d,%r14d + xorl %ebx,%r15d + addl %r12d,%r11d + shrdl $6,%r13d,%r13d + andl %r15d,%edi + xorl %eax,%r14d + addl %r13d,%r11d + xorl %ebx,%edi + shrdl $2,%r14d,%r14d + addl %r11d,%edx + addl %edi,%r11d + movl %edx,%r13d + addl %r11d,%r14d + shrdl $14,%r13d,%r13d + movl %r14d,%r11d + movl %r8d,%r12d + shrdl $9,%r14d,%r14d + xorl %edx,%r13d + xorl %r9d,%r12d + shrdl $5,%r13d,%r13d + xorl %r11d,%r14d + andl %edx,%r12d + xorl %edx,%r13d + addl 4(%rsp),%r10d + movl %r11d,%edi + xorl %r9d,%r12d + shrdl $11,%r14d,%r14d + xorl %eax,%edi + addl %r12d,%r10d + shrdl $6,%r13d,%r13d + andl %edi,%r15d + xorl %r11d,%r14d + addl %r13d,%r10d + xorl %eax,%r15d + shrdl $2,%r14d,%r14d + addl %r10d,%ecx + addl %r15d,%r10d + movl %ecx,%r13d + addl %r10d,%r14d + shrdl $14,%r13d,%r13d + movl %r14d,%r10d + movl %edx,%r12d + shrdl $9,%r14d,%r14d + xorl %ecx,%r13d + xorl %r8d,%r12d + shrdl $5,%r13d,%r13d + xorl %r10d,%r14d + andl %ecx,%r12d + xorl %ecx,%r13d + addl 8(%rsp),%r9d + movl %r10d,%r15d + xorl %r8d,%r12d + shrdl $11,%r14d,%r14d + xorl %r11d,%r15d + addl %r12d,%r9d + shrdl $6,%r13d,%r13d + andl %r15d,%edi + xorl %r10d,%r14d + addl %r13d,%r9d + xorl %r11d,%edi + shrdl $2,%r14d,%r14d + addl %r9d,%ebx + addl %edi,%r9d + movl %ebx,%r13d + addl %r9d,%r14d + shrdl $14,%r13d,%r13d + movl %r14d,%r9d + movl %ecx,%r12d + shrdl $9,%r14d,%r14d + xorl %ebx,%r13d + xorl %edx,%r12d + shrdl $5,%r13d,%r13d + xorl %r9d,%r14d + andl %ebx,%r12d + xorl %ebx,%r13d + addl 12(%rsp),%r8d + movl %r9d,%edi + xorl %edx,%r12d + shrdl $11,%r14d,%r14d + xorl %r10d,%edi + addl %r12d,%r8d + shrdl $6,%r13d,%r13d + andl %edi,%r15d + xorl %r9d,%r14d + addl %r13d,%r8d + xorl %r10d,%r15d + shrdl $2,%r14d,%r14d + addl %r8d,%eax + addl %r15d,%r8d + movl %eax,%r13d + addl %r8d,%r14d + shrdl $14,%r13d,%r13d + movl %r14d,%r8d + movl %ebx,%r12d + shrdl $9,%r14d,%r14d + xorl %eax,%r13d + xorl %ecx,%r12d + shrdl $5,%r13d,%r13d + xorl %r8d,%r14d + andl %eax,%r12d + xorl %eax,%r13d + addl 16(%rsp),%edx + movl %r8d,%r15d + xorl %ecx,%r12d + shrdl $11,%r14d,%r14d + xorl %r9d,%r15d + addl %r12d,%edx + shrdl $6,%r13d,%r13d + andl %r15d,%edi + xorl %r8d,%r14d + addl %r13d,%edx + xorl %r9d,%edi + shrdl $2,%r14d,%r14d + addl %edx,%r11d + addl %edi,%edx + movl %r11d,%r13d + addl %edx,%r14d + shrdl $14,%r13d,%r13d + movl %r14d,%edx + movl %eax,%r12d + shrdl $9,%r14d,%r14d + xorl %r11d,%r13d + xorl %ebx,%r12d + shrdl $5,%r13d,%r13d + xorl %edx,%r14d + andl %r11d,%r12d + xorl %r11d,%r13d + addl 20(%rsp),%ecx + movl %edx,%edi + xorl %ebx,%r12d + shrdl $11,%r14d,%r14d + xorl %r8d,%edi + addl %r12d,%ecx + shrdl $6,%r13d,%r13d + andl %edi,%r15d + xorl %edx,%r14d + addl %r13d,%ecx + xorl %r8d,%r15d + shrdl $2,%r14d,%r14d + addl %ecx,%r10d + addl %r15d,%ecx + movl %r10d,%r13d + addl %ecx,%r14d + shrdl $14,%r13d,%r13d + movl %r14d,%ecx + movl %r11d,%r12d + shrdl $9,%r14d,%r14d + xorl %r10d,%r13d + xorl %eax,%r12d + shrdl $5,%r13d,%r13d + xorl %ecx,%r14d + andl %r10d,%r12d + xorl %r10d,%r13d + addl 24(%rsp),%ebx + movl %ecx,%r15d + xorl %eax,%r12d + shrdl $11,%r14d,%r14d + xorl %edx,%r15d + addl %r12d,%ebx + shrdl $6,%r13d,%r13d + andl %r15d,%edi + xorl %ecx,%r14d + addl %r13d,%ebx + xorl %edx,%edi + shrdl $2,%r14d,%r14d + addl %ebx,%r9d + addl %edi,%ebx + movl %r9d,%r13d + addl %ebx,%r14d + shrdl $14,%r13d,%r13d + movl %r14d,%ebx + movl %r10d,%r12d + shrdl $9,%r14d,%r14d + xorl %r9d,%r13d + xorl %r11d,%r12d + shrdl $5,%r13d,%r13d + xorl %ebx,%r14d + andl %r9d,%r12d + xorl %r9d,%r13d + addl 28(%rsp),%eax + movl %ebx,%edi + xorl %r11d,%r12d + shrdl $11,%r14d,%r14d + xorl %ecx,%edi + addl %r12d,%eax + shrdl $6,%r13d,%r13d + andl %edi,%r15d + xorl %ebx,%r14d + addl %r13d,%eax + xorl %ecx,%r15d + shrdl $2,%r14d,%r14d + addl %eax,%r8d + addl %r15d,%eax + movl %r8d,%r13d + addl %eax,%r14d + shrdl $14,%r13d,%r13d + movl %r14d,%eax + movl %r9d,%r12d + shrdl $9,%r14d,%r14d + xorl %r8d,%r13d + xorl %r10d,%r12d + shrdl $5,%r13d,%r13d + xorl %eax,%r14d + andl %r8d,%r12d + xorl %r8d,%r13d + addl 32(%rsp),%r11d + movl %eax,%r15d + xorl %r10d,%r12d + shrdl $11,%r14d,%r14d + xorl %ebx,%r15d + addl %r12d,%r11d + shrdl $6,%r13d,%r13d + andl %r15d,%edi + xorl %eax,%r14d + addl %r13d,%r11d + xorl %ebx,%edi + shrdl $2,%r14d,%r14d + addl %r11d,%edx + addl %edi,%r11d + movl %edx,%r13d + addl %r11d,%r14d + shrdl $14,%r13d,%r13d + movl %r14d,%r11d + movl %r8d,%r12d + shrdl $9,%r14d,%r14d + xorl %edx,%r13d + xorl %r9d,%r12d + shrdl $5,%r13d,%r13d + xorl %r11d,%r14d + andl %edx,%r12d + xorl %edx,%r13d + addl 36(%rsp),%r10d + movl %r11d,%edi + xorl %r9d,%r12d + shrdl $11,%r14d,%r14d + xorl %eax,%edi + addl %r12d,%r10d + shrdl $6,%r13d,%r13d + andl %edi,%r15d + xorl %r11d,%r14d + addl %r13d,%r10d + xorl %eax,%r15d + shrdl $2,%r14d,%r14d + addl %r10d,%ecx + addl %r15d,%r10d + movl %ecx,%r13d + addl %r10d,%r14d + shrdl $14,%r13d,%r13d + movl %r14d,%r10d + movl %edx,%r12d + shrdl $9,%r14d,%r14d + xorl %ecx,%r13d + xorl %r8d,%r12d + shrdl $5,%r13d,%r13d + xorl %r10d,%r14d + andl %ecx,%r12d + xorl %ecx,%r13d + addl 40(%rsp),%r9d + movl %r10d,%r15d + xorl %r8d,%r12d + shrdl $11,%r14d,%r14d + xorl %r11d,%r15d + addl %r12d,%r9d + shrdl $6,%r13d,%r13d + andl %r15d,%edi + xorl %r10d,%r14d + addl %r13d,%r9d + xorl %r11d,%edi + shrdl $2,%r14d,%r14d + addl %r9d,%ebx + addl %edi,%r9d + movl %ebx,%r13d + addl %r9d,%r14d + shrdl $14,%r13d,%r13d + movl %r14d,%r9d + movl %ecx,%r12d + shrdl $9,%r14d,%r14d + xorl %ebx,%r13d + xorl %edx,%r12d + shrdl $5,%r13d,%r13d + xorl %r9d,%r14d + andl %ebx,%r12d + xorl %ebx,%r13d + addl 44(%rsp),%r8d + movl %r9d,%edi + xorl %edx,%r12d + shrdl $11,%r14d,%r14d + xorl %r10d,%edi + addl %r12d,%r8d + shrdl $6,%r13d,%r13d + andl %edi,%r15d + xorl %r9d,%r14d + addl %r13d,%r8d + xorl %r10d,%r15d + shrdl $2,%r14d,%r14d + addl %r8d,%eax + addl %r15d,%r8d + movl %eax,%r13d + addl %r8d,%r14d + shrdl $14,%r13d,%r13d + movl %r14d,%r8d + movl %ebx,%r12d + shrdl $9,%r14d,%r14d + xorl %eax,%r13d + xorl %ecx,%r12d + shrdl $5,%r13d,%r13d + xorl %r8d,%r14d + andl %eax,%r12d + xorl %eax,%r13d + addl 48(%rsp),%edx + movl %r8d,%r15d + xorl %ecx,%r12d + shrdl $11,%r14d,%r14d + xorl %r9d,%r15d + addl %r12d,%edx + shrdl $6,%r13d,%r13d + andl %r15d,%edi + xorl %r8d,%r14d + addl %r13d,%edx + xorl %r9d,%edi + shrdl $2,%r14d,%r14d + addl %edx,%r11d + addl %edi,%edx + movl %r11d,%r13d + addl %edx,%r14d + shrdl $14,%r13d,%r13d + movl %r14d,%edx + movl %eax,%r12d + shrdl $9,%r14d,%r14d + xorl %r11d,%r13d + xorl %ebx,%r12d + shrdl $5,%r13d,%r13d + xorl %edx,%r14d + andl %r11d,%r12d + xorl %r11d,%r13d + addl 52(%rsp),%ecx + movl %edx,%edi + xorl %ebx,%r12d + shrdl $11,%r14d,%r14d + xorl %r8d,%edi + addl %r12d,%ecx + shrdl $6,%r13d,%r13d + andl %edi,%r15d + xorl %edx,%r14d + addl %r13d,%ecx + xorl %r8d,%r15d + shrdl $2,%r14d,%r14d + addl %ecx,%r10d + addl %r15d,%ecx + movl %r10d,%r13d + addl %ecx,%r14d + shrdl $14,%r13d,%r13d + movl %r14d,%ecx + movl %r11d,%r12d + shrdl $9,%r14d,%r14d + xorl %r10d,%r13d + xorl %eax,%r12d + shrdl $5,%r13d,%r13d + xorl %ecx,%r14d + andl %r10d,%r12d + xorl %r10d,%r13d + addl 56(%rsp),%ebx + movl %ecx,%r15d + xorl %eax,%r12d + shrdl $11,%r14d,%r14d + xorl %edx,%r15d + addl %r12d,%ebx + shrdl $6,%r13d,%r13d + andl %r15d,%edi + xorl %ecx,%r14d + addl %r13d,%ebx + xorl %edx,%edi + shrdl $2,%r14d,%r14d + addl %ebx,%r9d + addl %edi,%ebx + movl %r9d,%r13d + addl %ebx,%r14d + shrdl $14,%r13d,%r13d + movl %r14d,%ebx + movl %r10d,%r12d + shrdl $9,%r14d,%r14d + xorl %r9d,%r13d + xorl %r11d,%r12d + shrdl $5,%r13d,%r13d + xorl %ebx,%r14d + andl %r9d,%r12d + xorl %r9d,%r13d + addl 60(%rsp),%eax + movl %ebx,%edi + xorl %r11d,%r12d + shrdl $11,%r14d,%r14d + xorl %ecx,%edi + addl %r12d,%eax + shrdl $6,%r13d,%r13d + andl %edi,%r15d + xorl %ebx,%r14d + addl %r13d,%eax + xorl %ecx,%r15d + shrdl $2,%r14d,%r14d + addl %eax,%r8d + addl %r15d,%eax + movl %r8d,%r13d + addl %eax,%r14d + movq 64+0(%rsp),%rdi + movl %r14d,%eax + + addl 0(%rdi),%eax + leaq 64(%rsi),%rsi + addl 4(%rdi),%ebx + addl 8(%rdi),%ecx + addl 12(%rdi),%edx + addl 16(%rdi),%r8d + addl 20(%rdi),%r9d + addl 24(%rdi),%r10d + addl 28(%rdi),%r11d + + cmpq 64+16(%rsp),%rsi + + movl %eax,0(%rdi) + movl %ebx,4(%rdi) + movl %ecx,8(%rdi) + movl %edx,12(%rdi) + movl %r8d,16(%rdi) + movl %r9d,20(%rdi) + movl %r10d,24(%rdi) + movl %r11d,28(%rdi) + jb .Lloop_avx + + movq 64+24(%rsp),%rsi + vzeroupper + movq (%rsi),%r15 + movq 8(%rsi),%r14 + movq 16(%rsi),%r13 + movq 24(%rsi),%r12 + movq 32(%rsi),%rbp + movq 40(%rsi),%rbx + leaq 48(%rsi),%rsp +.Lepilogue_avx: + .byte 0xf3,0xc3 +.size sha256_block_data_order_avx,.-sha256_block_data_order_avx +.type sha256_block_data_order_avx2,@function +.align 64 +sha256_block_data_order_avx2: +.Lavx2_shortcut: + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + movq %rsp,%r11 + subq $544,%rsp + shlq $4,%rdx + andq $-1024,%rsp + leaq (%rsi,%rdx,4),%rdx + addq $448,%rsp + movq %rdi,64+0(%rsp) + movq %rsi,64+8(%rsp) + movq %rdx,64+16(%rsp) + movq %r11,64+24(%rsp) +.Lprologue_avx2: + + vzeroupper + subq $-64,%rsi + movl 0(%rdi),%eax + movq %rsi,%r12 + movl 4(%rdi),%ebx + cmpq %rdx,%rsi + movl 8(%rdi),%ecx + cmoveq %rsp,%r12 + movl 12(%rdi),%edx + movl 16(%rdi),%r8d + movl 20(%rdi),%r9d + movl 24(%rdi),%r10d + movl 28(%rdi),%r11d + vmovdqa K256+512+32(%rip),%ymm8 + vmovdqa K256+512+64(%rip),%ymm9 + jmp .Loop_avx2 +.align 16 +.Loop_avx2: + vmovdqa K256+512(%rip),%ymm7 + vmovdqu -64+0(%rsi),%xmm0 + vmovdqu -64+16(%rsi),%xmm1 + vmovdqu -64+32(%rsi),%xmm2 + vmovdqu -64+48(%rsi),%xmm3 + + vinserti128 $1,(%r12),%ymm0,%ymm0 + vinserti128 $1,16(%r12),%ymm1,%ymm1 + vpshufb %ymm7,%ymm0,%ymm0 + vinserti128 $1,32(%r12),%ymm2,%ymm2 + vpshufb %ymm7,%ymm1,%ymm1 + vinserti128 $1,48(%r12),%ymm3,%ymm3 + + leaq K256(%rip),%rbp + vpshufb %ymm7,%ymm2,%ymm2 + vpaddd 0(%rbp),%ymm0,%ymm4 + vpshufb %ymm7,%ymm3,%ymm3 + vpaddd 32(%rbp),%ymm1,%ymm5 + vpaddd 64(%rbp),%ymm2,%ymm6 + vpaddd 96(%rbp),%ymm3,%ymm7 + vmovdqa %ymm4,0(%rsp) + xorl %r14d,%r14d + vmovdqa %ymm5,32(%rsp) + leaq -64(%rsp),%rsp + movl %ebx,%edi + vmovdqa %ymm6,0(%rsp) + xorl %ecx,%edi + vmovdqa %ymm7,32(%rsp) + movl %r9d,%r12d + subq $-32*4,%rbp + jmp .Lavx2_00_47 + +.align 16 +.Lavx2_00_47: + leaq -64(%rsp),%rsp + vpalignr $4,%ymm0,%ymm1,%ymm4 + addl 0+128(%rsp),%r11d + andl %r8d,%r12d + rorxl $25,%r8d,%r13d + vpalignr $4,%ymm2,%ymm3,%ymm7 + rorxl $11,%r8d,%r15d + leal (%rax,%r14,1),%eax + leal (%r11,%r12,1),%r11d + vpsrld $7,%ymm4,%ymm6 + andnl %r10d,%r8d,%r12d + xorl %r15d,%r13d + rorxl $6,%r8d,%r14d + vpaddd %ymm7,%ymm0,%ymm0 + leal (%r11,%r12,1),%r11d + xorl %r14d,%r13d + movl %eax,%r15d + vpsrld $3,%ymm4,%ymm7 + rorxl $22,%eax,%r12d + leal (%r11,%r13,1),%r11d + xorl %ebx,%r15d + vpslld $14,%ymm4,%ymm5 + rorxl $13,%eax,%r14d + rorxl $2,%eax,%r13d + leal (%rdx,%r11,1),%edx + vpxor %ymm6,%ymm7,%ymm4 + andl %r15d,%edi + xorl %r12d,%r14d + xorl %ebx,%edi + vpshufd $250,%ymm3,%ymm7 + xorl %r13d,%r14d + leal (%r11,%rdi,1),%r11d + movl %r8d,%r12d + vpsrld $11,%ymm6,%ymm6 + addl 4+128(%rsp),%r10d + andl %edx,%r12d + rorxl $25,%edx,%r13d + vpxor %ymm5,%ymm4,%ymm4 + rorxl $11,%edx,%edi + leal (%r11,%r14,1),%r11d + leal (%r10,%r12,1),%r10d + vpslld $11,%ymm5,%ymm5 + andnl %r9d,%edx,%r12d + xorl %edi,%r13d + rorxl $6,%edx,%r14d + vpxor %ymm6,%ymm4,%ymm4 + leal (%r10,%r12,1),%r10d + xorl %r14d,%r13d + movl %r11d,%edi + vpsrld $10,%ymm7,%ymm6 + rorxl $22,%r11d,%r12d + leal (%r10,%r13,1),%r10d + xorl %eax,%edi + vpxor %ymm5,%ymm4,%ymm4 + rorxl $13,%r11d,%r14d + rorxl $2,%r11d,%r13d + leal (%rcx,%r10,1),%ecx + vpsrlq $17,%ymm7,%ymm7 + andl %edi,%r15d + xorl %r12d,%r14d + xorl %eax,%r15d + vpaddd %ymm4,%ymm0,%ymm0 + xorl %r13d,%r14d + leal (%r10,%r15,1),%r10d + movl %edx,%r12d + vpxor %ymm7,%ymm6,%ymm6 + addl 8+128(%rsp),%r9d + andl %ecx,%r12d + rorxl $25,%ecx,%r13d + vpsrlq $2,%ymm7,%ymm7 + rorxl $11,%ecx,%r15d + leal (%r10,%r14,1),%r10d + leal (%r9,%r12,1),%r9d + vpxor %ymm7,%ymm6,%ymm6 + andnl %r8d,%ecx,%r12d + xorl %r15d,%r13d + rorxl $6,%ecx,%r14d + vpshufb %ymm8,%ymm6,%ymm6 + leal (%r9,%r12,1),%r9d + xorl %r14d,%r13d + movl %r10d,%r15d + vpaddd %ymm6,%ymm0,%ymm0 + rorxl $22,%r10d,%r12d + leal (%r9,%r13,1),%r9d + xorl %r11d,%r15d + vpshufd $80,%ymm0,%ymm7 + rorxl $13,%r10d,%r14d + rorxl $2,%r10d,%r13d + leal (%rbx,%r9,1),%ebx + vpsrld $10,%ymm7,%ymm6 + andl %r15d,%edi + xorl %r12d,%r14d + xorl %r11d,%edi + vpsrlq $17,%ymm7,%ymm7 + xorl %r13d,%r14d + leal (%r9,%rdi,1),%r9d + movl %ecx,%r12d + vpxor %ymm7,%ymm6,%ymm6 + addl 12+128(%rsp),%r8d + andl %ebx,%r12d + rorxl $25,%ebx,%r13d + vpsrlq $2,%ymm7,%ymm7 + rorxl $11,%ebx,%edi + leal (%r9,%r14,1),%r9d + leal (%r8,%r12,1),%r8d + vpxor %ymm7,%ymm6,%ymm6 + andnl %edx,%ebx,%r12d + xorl %edi,%r13d + rorxl $6,%ebx,%r14d + vpshufb %ymm9,%ymm6,%ymm6 + leal (%r8,%r12,1),%r8d + xorl %r14d,%r13d + movl %r9d,%edi + vpaddd %ymm6,%ymm0,%ymm0 + rorxl $22,%r9d,%r12d + leal (%r8,%r13,1),%r8d + xorl %r10d,%edi + vpaddd 0(%rbp),%ymm0,%ymm6 + rorxl $13,%r9d,%r14d + rorxl $2,%r9d,%r13d + leal (%rax,%r8,1),%eax + andl %edi,%r15d + xorl %r12d,%r14d + xorl %r10d,%r15d + xorl %r13d,%r14d + leal (%r8,%r15,1),%r8d + movl %ebx,%r12d + vmovdqa %ymm6,0(%rsp) + vpalignr $4,%ymm1,%ymm2,%ymm4 + addl 32+128(%rsp),%edx + andl %eax,%r12d + rorxl $25,%eax,%r13d + vpalignr $4,%ymm3,%ymm0,%ymm7 + rorxl $11,%eax,%r15d + leal (%r8,%r14,1),%r8d + leal (%rdx,%r12,1),%edx + vpsrld $7,%ymm4,%ymm6 + andnl %ecx,%eax,%r12d + xorl %r15d,%r13d + rorxl $6,%eax,%r14d + vpaddd %ymm7,%ymm1,%ymm1 + leal (%rdx,%r12,1),%edx + xorl %r14d,%r13d + movl %r8d,%r15d + vpsrld $3,%ymm4,%ymm7 + rorxl $22,%r8d,%r12d + leal (%rdx,%r13,1),%edx + xorl %r9d,%r15d + vpslld $14,%ymm4,%ymm5 + rorxl $13,%r8d,%r14d + rorxl $2,%r8d,%r13d + leal (%r11,%rdx,1),%r11d + vpxor %ymm6,%ymm7,%ymm4 + andl %r15d,%edi + xorl %r12d,%r14d + xorl %r9d,%edi + vpshufd $250,%ymm0,%ymm7 + xorl %r13d,%r14d + leal (%rdx,%rdi,1),%edx + movl %eax,%r12d + vpsrld $11,%ymm6,%ymm6 + addl 36+128(%rsp),%ecx + andl %r11d,%r12d + rorxl $25,%r11d,%r13d + vpxor %ymm5,%ymm4,%ymm4 + rorxl $11,%r11d,%edi + leal (%rdx,%r14,1),%edx + leal (%rcx,%r12,1),%ecx + vpslld $11,%ymm5,%ymm5 + andnl %ebx,%r11d,%r12d + xorl %edi,%r13d + rorxl $6,%r11d,%r14d + vpxor %ymm6,%ymm4,%ymm4 + leal (%rcx,%r12,1),%ecx + xorl %r14d,%r13d + movl %edx,%edi + vpsrld $10,%ymm7,%ymm6 + rorxl $22,%edx,%r12d + leal (%rcx,%r13,1),%ecx + xorl %r8d,%edi + vpxor %ymm5,%ymm4,%ymm4 + rorxl $13,%edx,%r14d + rorxl $2,%edx,%r13d + leal (%r10,%rcx,1),%r10d + vpsrlq $17,%ymm7,%ymm7 + andl %edi,%r15d + xorl %r12d,%r14d + xorl %r8d,%r15d + vpaddd %ymm4,%ymm1,%ymm1 + xorl %r13d,%r14d + leal (%rcx,%r15,1),%ecx + movl %r11d,%r12d + vpxor %ymm7,%ymm6,%ymm6 + addl 40+128(%rsp),%ebx + andl %r10d,%r12d + rorxl $25,%r10d,%r13d + vpsrlq $2,%ymm7,%ymm7 + rorxl $11,%r10d,%r15d + leal (%rcx,%r14,1),%ecx + leal (%rbx,%r12,1),%ebx + vpxor %ymm7,%ymm6,%ymm6 + andnl %eax,%r10d,%r12d + xorl %r15d,%r13d + rorxl $6,%r10d,%r14d + vpshufb %ymm8,%ymm6,%ymm6 + leal (%rbx,%r12,1),%ebx + xorl %r14d,%r13d + movl %ecx,%r15d + vpaddd %ymm6,%ymm1,%ymm1 + rorxl $22,%ecx,%r12d + leal (%rbx,%r13,1),%ebx + xorl %edx,%r15d + vpshufd $80,%ymm1,%ymm7 + rorxl $13,%ecx,%r14d + rorxl $2,%ecx,%r13d + leal (%r9,%rbx,1),%r9d + vpsrld $10,%ymm7,%ymm6 + andl %r15d,%edi + xorl %r12d,%r14d + xorl %edx,%edi + vpsrlq $17,%ymm7,%ymm7 + xorl %r13d,%r14d + leal (%rbx,%rdi,1),%ebx + movl %r10d,%r12d + vpxor %ymm7,%ymm6,%ymm6 + addl 44+128(%rsp),%eax + andl %r9d,%r12d + rorxl $25,%r9d,%r13d + vpsrlq $2,%ymm7,%ymm7 + rorxl $11,%r9d,%edi + leal (%rbx,%r14,1),%ebx + leal (%rax,%r12,1),%eax + vpxor %ymm7,%ymm6,%ymm6 + andnl %r11d,%r9d,%r12d + xorl %edi,%r13d + rorxl $6,%r9d,%r14d + vpshufb %ymm9,%ymm6,%ymm6 + leal (%rax,%r12,1),%eax + xorl %r14d,%r13d + movl %ebx,%edi + vpaddd %ymm6,%ymm1,%ymm1 + rorxl $22,%ebx,%r12d + leal (%rax,%r13,1),%eax + xorl %ecx,%edi + vpaddd 32(%rbp),%ymm1,%ymm6 + rorxl $13,%ebx,%r14d + rorxl $2,%ebx,%r13d + leal (%r8,%rax,1),%r8d + andl %edi,%r15d + xorl %r12d,%r14d + xorl %ecx,%r15d + xorl %r13d,%r14d + leal (%rax,%r15,1),%eax + movl %r9d,%r12d + vmovdqa %ymm6,32(%rsp) + leaq -64(%rsp),%rsp + vpalignr $4,%ymm2,%ymm3,%ymm4 + addl 0+128(%rsp),%r11d + andl %r8d,%r12d + rorxl $25,%r8d,%r13d + vpalignr $4,%ymm0,%ymm1,%ymm7 + rorxl $11,%r8d,%r15d + leal (%rax,%r14,1),%eax + leal (%r11,%r12,1),%r11d + vpsrld $7,%ymm4,%ymm6 + andnl %r10d,%r8d,%r12d + xorl %r15d,%r13d + rorxl $6,%r8d,%r14d + vpaddd %ymm7,%ymm2,%ymm2 + leal (%r11,%r12,1),%r11d + xorl %r14d,%r13d + movl %eax,%r15d + vpsrld $3,%ymm4,%ymm7 + rorxl $22,%eax,%r12d + leal (%r11,%r13,1),%r11d + xorl %ebx,%r15d + vpslld $14,%ymm4,%ymm5 + rorxl $13,%eax,%r14d + rorxl $2,%eax,%r13d + leal (%rdx,%r11,1),%edx + vpxor %ymm6,%ymm7,%ymm4 + andl %r15d,%edi + xorl %r12d,%r14d + xorl %ebx,%edi + vpshufd $250,%ymm1,%ymm7 + xorl %r13d,%r14d + leal (%r11,%rdi,1),%r11d + movl %r8d,%r12d + vpsrld $11,%ymm6,%ymm6 + addl 4+128(%rsp),%r10d + andl %edx,%r12d + rorxl $25,%edx,%r13d + vpxor %ymm5,%ymm4,%ymm4 + rorxl $11,%edx,%edi + leal (%r11,%r14,1),%r11d + leal (%r10,%r12,1),%r10d + vpslld $11,%ymm5,%ymm5 + andnl %r9d,%edx,%r12d + xorl %edi,%r13d + rorxl $6,%edx,%r14d + vpxor %ymm6,%ymm4,%ymm4 + leal (%r10,%r12,1),%r10d + xorl %r14d,%r13d + movl %r11d,%edi + vpsrld $10,%ymm7,%ymm6 + rorxl $22,%r11d,%r12d + leal (%r10,%r13,1),%r10d + xorl %eax,%edi + vpxor %ymm5,%ymm4,%ymm4 + rorxl $13,%r11d,%r14d + rorxl $2,%r11d,%r13d + leal (%rcx,%r10,1),%ecx + vpsrlq $17,%ymm7,%ymm7 + andl %edi,%r15d + xorl %r12d,%r14d + xorl %eax,%r15d + vpaddd %ymm4,%ymm2,%ymm2 + xorl %r13d,%r14d + leal (%r10,%r15,1),%r10d + movl %edx,%r12d + vpxor %ymm7,%ymm6,%ymm6 + addl 8+128(%rsp),%r9d + andl %ecx,%r12d + rorxl $25,%ecx,%r13d + vpsrlq $2,%ymm7,%ymm7 + rorxl $11,%ecx,%r15d + leal (%r10,%r14,1),%r10d + leal (%r9,%r12,1),%r9d + vpxor %ymm7,%ymm6,%ymm6 + andnl %r8d,%ecx,%r12d + xorl %r15d,%r13d + rorxl $6,%ecx,%r14d + vpshufb %ymm8,%ymm6,%ymm6 + leal (%r9,%r12,1),%r9d + xorl %r14d,%r13d + movl %r10d,%r15d + vpaddd %ymm6,%ymm2,%ymm2 + rorxl $22,%r10d,%r12d + leal (%r9,%r13,1),%r9d + xorl %r11d,%r15d + vpshufd $80,%ymm2,%ymm7 + rorxl $13,%r10d,%r14d + rorxl $2,%r10d,%r13d + leal (%rbx,%r9,1),%ebx + vpsrld $10,%ymm7,%ymm6 + andl %r15d,%edi + xorl %r12d,%r14d + xorl %r11d,%edi + vpsrlq $17,%ymm7,%ymm7 + xorl %r13d,%r14d + leal (%r9,%rdi,1),%r9d + movl %ecx,%r12d + vpxor %ymm7,%ymm6,%ymm6 + addl 12+128(%rsp),%r8d + andl %ebx,%r12d + rorxl $25,%ebx,%r13d + vpsrlq $2,%ymm7,%ymm7 + rorxl $11,%ebx,%edi + leal (%r9,%r14,1),%r9d + leal (%r8,%r12,1),%r8d + vpxor %ymm7,%ymm6,%ymm6 + andnl %edx,%ebx,%r12d + xorl %edi,%r13d + rorxl $6,%ebx,%r14d + vpshufb %ymm9,%ymm6,%ymm6 + leal (%r8,%r12,1),%r8d + xorl %r14d,%r13d + movl %r9d,%edi + vpaddd %ymm6,%ymm2,%ymm2 + rorxl $22,%r9d,%r12d + leal (%r8,%r13,1),%r8d + xorl %r10d,%edi + vpaddd 64(%rbp),%ymm2,%ymm6 + rorxl $13,%r9d,%r14d + rorxl $2,%r9d,%r13d + leal (%rax,%r8,1),%eax + andl %edi,%r15d + xorl %r12d,%r14d + xorl %r10d,%r15d + xorl %r13d,%r14d + leal (%r8,%r15,1),%r8d + movl %ebx,%r12d + vmovdqa %ymm6,0(%rsp) + vpalignr $4,%ymm3,%ymm0,%ymm4 + addl 32+128(%rsp),%edx + andl %eax,%r12d + rorxl $25,%eax,%r13d + vpalignr $4,%ymm1,%ymm2,%ymm7 + rorxl $11,%eax,%r15d + leal (%r8,%r14,1),%r8d + leal (%rdx,%r12,1),%edx + vpsrld $7,%ymm4,%ymm6 + andnl %ecx,%eax,%r12d + xorl %r15d,%r13d + rorxl $6,%eax,%r14d + vpaddd %ymm7,%ymm3,%ymm3 + leal (%rdx,%r12,1),%edx + xorl %r14d,%r13d + movl %r8d,%r15d + vpsrld $3,%ymm4,%ymm7 + rorxl $22,%r8d,%r12d + leal (%rdx,%r13,1),%edx + xorl %r9d,%r15d + vpslld $14,%ymm4,%ymm5 + rorxl $13,%r8d,%r14d + rorxl $2,%r8d,%r13d + leal (%r11,%rdx,1),%r11d + vpxor %ymm6,%ymm7,%ymm4 + andl %r15d,%edi + xorl %r12d,%r14d + xorl %r9d,%edi + vpshufd $250,%ymm2,%ymm7 + xorl %r13d,%r14d + leal (%rdx,%rdi,1),%edx + movl %eax,%r12d + vpsrld $11,%ymm6,%ymm6 + addl 36+128(%rsp),%ecx + andl %r11d,%r12d + rorxl $25,%r11d,%r13d + vpxor %ymm5,%ymm4,%ymm4 + rorxl $11,%r11d,%edi + leal (%rdx,%r14,1),%edx + leal (%rcx,%r12,1),%ecx + vpslld $11,%ymm5,%ymm5 + andnl %ebx,%r11d,%r12d + xorl %edi,%r13d + rorxl $6,%r11d,%r14d + vpxor %ymm6,%ymm4,%ymm4 + leal (%rcx,%r12,1),%ecx + xorl %r14d,%r13d + movl %edx,%edi + vpsrld $10,%ymm7,%ymm6 + rorxl $22,%edx,%r12d + leal (%rcx,%r13,1),%ecx + xorl %r8d,%edi + vpxor %ymm5,%ymm4,%ymm4 + rorxl $13,%edx,%r14d + rorxl $2,%edx,%r13d + leal (%r10,%rcx,1),%r10d + vpsrlq $17,%ymm7,%ymm7 + andl %edi,%r15d + xorl %r12d,%r14d + xorl %r8d,%r15d + vpaddd %ymm4,%ymm3,%ymm3 + xorl %r13d,%r14d + leal (%rcx,%r15,1),%ecx + movl %r11d,%r12d + vpxor %ymm7,%ymm6,%ymm6 + addl 40+128(%rsp),%ebx + andl %r10d,%r12d + rorxl $25,%r10d,%r13d + vpsrlq $2,%ymm7,%ymm7 + rorxl $11,%r10d,%r15d + leal (%rcx,%r14,1),%ecx + leal (%rbx,%r12,1),%ebx + vpxor %ymm7,%ymm6,%ymm6 + andnl %eax,%r10d,%r12d + xorl %r15d,%r13d + rorxl $6,%r10d,%r14d + vpshufb %ymm8,%ymm6,%ymm6 + leal (%rbx,%r12,1),%ebx + xorl %r14d,%r13d + movl %ecx,%r15d + vpaddd %ymm6,%ymm3,%ymm3 + rorxl $22,%ecx,%r12d + leal (%rbx,%r13,1),%ebx + xorl %edx,%r15d + vpshufd $80,%ymm3,%ymm7 + rorxl $13,%ecx,%r14d + rorxl $2,%ecx,%r13d + leal (%r9,%rbx,1),%r9d + vpsrld $10,%ymm7,%ymm6 + andl %r15d,%edi + xorl %r12d,%r14d + xorl %edx,%edi + vpsrlq $17,%ymm7,%ymm7 + xorl %r13d,%r14d + leal (%rbx,%rdi,1),%ebx + movl %r10d,%r12d + vpxor %ymm7,%ymm6,%ymm6 + addl 44+128(%rsp),%eax + andl %r9d,%r12d + rorxl $25,%r9d,%r13d + vpsrlq $2,%ymm7,%ymm7 + rorxl $11,%r9d,%edi + leal (%rbx,%r14,1),%ebx + leal (%rax,%r12,1),%eax + vpxor %ymm7,%ymm6,%ymm6 + andnl %r11d,%r9d,%r12d + xorl %edi,%r13d + rorxl $6,%r9d,%r14d + vpshufb %ymm9,%ymm6,%ymm6 + leal (%rax,%r12,1),%eax + xorl %r14d,%r13d + movl %ebx,%edi + vpaddd %ymm6,%ymm3,%ymm3 + rorxl $22,%ebx,%r12d + leal (%rax,%r13,1),%eax + xorl %ecx,%edi + vpaddd 96(%rbp),%ymm3,%ymm6 + rorxl $13,%ebx,%r14d + rorxl $2,%ebx,%r13d + leal (%r8,%rax,1),%r8d + andl %edi,%r15d + xorl %r12d,%r14d + xorl %ecx,%r15d + xorl %r13d,%r14d + leal (%rax,%r15,1),%eax + movl %r9d,%r12d + vmovdqa %ymm6,32(%rsp) + leaq 128(%rbp),%rbp + cmpb $0,3(%rbp) + jne .Lavx2_00_47 + addl 0+64(%rsp),%r11d + andl %r8d,%r12d + rorxl $25,%r8d,%r13d + rorxl $11,%r8d,%r15d + leal (%rax,%r14,1),%eax + leal (%r11,%r12,1),%r11d + andnl %r10d,%r8d,%r12d + xorl %r15d,%r13d + rorxl $6,%r8d,%r14d + leal (%r11,%r12,1),%r11d + xorl %r14d,%r13d + movl %eax,%r15d + rorxl $22,%eax,%r12d + leal (%r11,%r13,1),%r11d + xorl %ebx,%r15d + rorxl $13,%eax,%r14d + rorxl $2,%eax,%r13d + leal (%rdx,%r11,1),%edx + andl %r15d,%edi + xorl %r12d,%r14d + xorl %ebx,%edi + xorl %r13d,%r14d + leal (%r11,%rdi,1),%r11d + movl %r8d,%r12d + addl 4+64(%rsp),%r10d + andl %edx,%r12d + rorxl $25,%edx,%r13d + rorxl $11,%edx,%edi + leal (%r11,%r14,1),%r11d + leal (%r10,%r12,1),%r10d + andnl %r9d,%edx,%r12d + xorl %edi,%r13d + rorxl $6,%edx,%r14d + leal (%r10,%r12,1),%r10d + xorl %r14d,%r13d + movl %r11d,%edi + rorxl $22,%r11d,%r12d + leal (%r10,%r13,1),%r10d + xorl %eax,%edi + rorxl $13,%r11d,%r14d + rorxl $2,%r11d,%r13d + leal (%rcx,%r10,1),%ecx + andl %edi,%r15d + xorl %r12d,%r14d + xorl %eax,%r15d + xorl %r13d,%r14d + leal (%r10,%r15,1),%r10d + movl %edx,%r12d + addl 8+64(%rsp),%r9d + andl %ecx,%r12d + rorxl $25,%ecx,%r13d + rorxl $11,%ecx,%r15d + leal (%r10,%r14,1),%r10d + leal (%r9,%r12,1),%r9d + andnl %r8d,%ecx,%r12d + xorl %r15d,%r13d + rorxl $6,%ecx,%r14d + leal (%r9,%r12,1),%r9d + xorl %r14d,%r13d + movl %r10d,%r15d + rorxl $22,%r10d,%r12d + leal (%r9,%r13,1),%r9d + xorl %r11d,%r15d + rorxl $13,%r10d,%r14d + rorxl $2,%r10d,%r13d + leal (%rbx,%r9,1),%ebx + andl %r15d,%edi + xorl %r12d,%r14d + xorl %r11d,%edi + xorl %r13d,%r14d + leal (%r9,%rdi,1),%r9d + movl %ecx,%r12d + addl 12+64(%rsp),%r8d + andl %ebx,%r12d + rorxl $25,%ebx,%r13d + rorxl $11,%ebx,%edi + leal (%r9,%r14,1),%r9d + leal (%r8,%r12,1),%r8d + andnl %edx,%ebx,%r12d + xorl %edi,%r13d + rorxl $6,%ebx,%r14d + leal (%r8,%r12,1),%r8d + xorl %r14d,%r13d + movl %r9d,%edi + rorxl $22,%r9d,%r12d + leal (%r8,%r13,1),%r8d + xorl %r10d,%edi + rorxl $13,%r9d,%r14d + rorxl $2,%r9d,%r13d + leal (%rax,%r8,1),%eax + andl %edi,%r15d + xorl %r12d,%r14d + xorl %r10d,%r15d + xorl %r13d,%r14d + leal (%r8,%r15,1),%r8d + movl %ebx,%r12d + addl 32+64(%rsp),%edx + andl %eax,%r12d + rorxl $25,%eax,%r13d + rorxl $11,%eax,%r15d + leal (%r8,%r14,1),%r8d + leal (%rdx,%r12,1),%edx + andnl %ecx,%eax,%r12d + xorl %r15d,%r13d + rorxl $6,%eax,%r14d + leal (%rdx,%r12,1),%edx + xorl %r14d,%r13d + movl %r8d,%r15d + rorxl $22,%r8d,%r12d + leal (%rdx,%r13,1),%edx + xorl %r9d,%r15d + rorxl $13,%r8d,%r14d + rorxl $2,%r8d,%r13d + leal (%r11,%rdx,1),%r11d + andl %r15d,%edi + xorl %r12d,%r14d + xorl %r9d,%edi + xorl %r13d,%r14d + leal (%rdx,%rdi,1),%edx + movl %eax,%r12d + addl 36+64(%rsp),%ecx + andl %r11d,%r12d + rorxl $25,%r11d,%r13d + rorxl $11,%r11d,%edi + leal (%rdx,%r14,1),%edx + leal (%rcx,%r12,1),%ecx + andnl %ebx,%r11d,%r12d + xorl %edi,%r13d + rorxl $6,%r11d,%r14d + leal (%rcx,%r12,1),%ecx + xorl %r14d,%r13d + movl %edx,%edi + rorxl $22,%edx,%r12d + leal (%rcx,%r13,1),%ecx + xorl %r8d,%edi + rorxl $13,%edx,%r14d + rorxl $2,%edx,%r13d + leal (%r10,%rcx,1),%r10d + andl %edi,%r15d + xorl %r12d,%r14d + xorl %r8d,%r15d + xorl %r13d,%r14d + leal (%rcx,%r15,1),%ecx + movl %r11d,%r12d + addl 40+64(%rsp),%ebx + andl %r10d,%r12d + rorxl $25,%r10d,%r13d + rorxl $11,%r10d,%r15d + leal (%rcx,%r14,1),%ecx + leal (%rbx,%r12,1),%ebx + andnl %eax,%r10d,%r12d + xorl %r15d,%r13d + rorxl $6,%r10d,%r14d + leal (%rbx,%r12,1),%ebx + xorl %r14d,%r13d + movl %ecx,%r15d + rorxl $22,%ecx,%r12d + leal (%rbx,%r13,1),%ebx + xorl %edx,%r15d + rorxl $13,%ecx,%r14d + rorxl $2,%ecx,%r13d + leal (%r9,%rbx,1),%r9d + andl %r15d,%edi + xorl %r12d,%r14d + xorl %edx,%edi + xorl %r13d,%r14d + leal (%rbx,%rdi,1),%ebx + movl %r10d,%r12d + addl 44+64(%rsp),%eax + andl %r9d,%r12d + rorxl $25,%r9d,%r13d + rorxl $11,%r9d,%edi + leal (%rbx,%r14,1),%ebx + leal (%rax,%r12,1),%eax + andnl %r11d,%r9d,%r12d + xorl %edi,%r13d + rorxl $6,%r9d,%r14d + leal (%rax,%r12,1),%eax + xorl %r14d,%r13d + movl %ebx,%edi + rorxl $22,%ebx,%r12d + leal (%rax,%r13,1),%eax + xorl %ecx,%edi + rorxl $13,%ebx,%r14d + rorxl $2,%ebx,%r13d + leal (%r8,%rax,1),%r8d + andl %edi,%r15d + xorl %r12d,%r14d + xorl %ecx,%r15d + xorl %r13d,%r14d + leal (%rax,%r15,1),%eax + movl %r9d,%r12d + addl 0(%rsp),%r11d + andl %r8d,%r12d + rorxl $25,%r8d,%r13d + rorxl $11,%r8d,%r15d + leal (%rax,%r14,1),%eax + leal (%r11,%r12,1),%r11d + andnl %r10d,%r8d,%r12d + xorl %r15d,%r13d + rorxl $6,%r8d,%r14d + leal (%r11,%r12,1),%r11d + xorl %r14d,%r13d + movl %eax,%r15d + rorxl $22,%eax,%r12d + leal (%r11,%r13,1),%r11d + xorl %ebx,%r15d + rorxl $13,%eax,%r14d + rorxl $2,%eax,%r13d + leal (%rdx,%r11,1),%edx + andl %r15d,%edi + xorl %r12d,%r14d + xorl %ebx,%edi + xorl %r13d,%r14d + leal (%r11,%rdi,1),%r11d + movl %r8d,%r12d + addl 4(%rsp),%r10d + andl %edx,%r12d + rorxl $25,%edx,%r13d + rorxl $11,%edx,%edi + leal (%r11,%r14,1),%r11d + leal (%r10,%r12,1),%r10d + andnl %r9d,%edx,%r12d + xorl %edi,%r13d + rorxl $6,%edx,%r14d + leal (%r10,%r12,1),%r10d + xorl %r14d,%r13d + movl %r11d,%edi + rorxl $22,%r11d,%r12d + leal (%r10,%r13,1),%r10d + xorl %eax,%edi + rorxl $13,%r11d,%r14d + rorxl $2,%r11d,%r13d + leal (%rcx,%r10,1),%ecx + andl %edi,%r15d + xorl %r12d,%r14d + xorl %eax,%r15d + xorl %r13d,%r14d + leal (%r10,%r15,1),%r10d + movl %edx,%r12d + addl 8(%rsp),%r9d + andl %ecx,%r12d + rorxl $25,%ecx,%r13d + rorxl $11,%ecx,%r15d + leal (%r10,%r14,1),%r10d + leal (%r9,%r12,1),%r9d + andnl %r8d,%ecx,%r12d + xorl %r15d,%r13d + rorxl $6,%ecx,%r14d + leal (%r9,%r12,1),%r9d + xorl %r14d,%r13d + movl %r10d,%r15d + rorxl $22,%r10d,%r12d + leal (%r9,%r13,1),%r9d + xorl %r11d,%r15d + rorxl $13,%r10d,%r14d + rorxl $2,%r10d,%r13d + leal (%rbx,%r9,1),%ebx + andl %r15d,%edi + xorl %r12d,%r14d + xorl %r11d,%edi + xorl %r13d,%r14d + leal (%r9,%rdi,1),%r9d + movl %ecx,%r12d + addl 12(%rsp),%r8d + andl %ebx,%r12d + rorxl $25,%ebx,%r13d + rorxl $11,%ebx,%edi + leal (%r9,%r14,1),%r9d + leal (%r8,%r12,1),%r8d + andnl %edx,%ebx,%r12d + xorl %edi,%r13d + rorxl $6,%ebx,%r14d + leal (%r8,%r12,1),%r8d + xorl %r14d,%r13d + movl %r9d,%edi + rorxl $22,%r9d,%r12d + leal (%r8,%r13,1),%r8d + xorl %r10d,%edi + rorxl $13,%r9d,%r14d + rorxl $2,%r9d,%r13d + leal (%rax,%r8,1),%eax + andl %edi,%r15d + xorl %r12d,%r14d + xorl %r10d,%r15d + xorl %r13d,%r14d + leal (%r8,%r15,1),%r8d + movl %ebx,%r12d + addl 32(%rsp),%edx + andl %eax,%r12d + rorxl $25,%eax,%r13d + rorxl $11,%eax,%r15d + leal (%r8,%r14,1),%r8d + leal (%rdx,%r12,1),%edx + andnl %ecx,%eax,%r12d + xorl %r15d,%r13d + rorxl $6,%eax,%r14d + leal (%rdx,%r12,1),%edx + xorl %r14d,%r13d + movl %r8d,%r15d + rorxl $22,%r8d,%r12d + leal (%rdx,%r13,1),%edx + xorl %r9d,%r15d + rorxl $13,%r8d,%r14d + rorxl $2,%r8d,%r13d + leal (%r11,%rdx,1),%r11d + andl %r15d,%edi + xorl %r12d,%r14d + xorl %r9d,%edi + xorl %r13d,%r14d + leal (%rdx,%rdi,1),%edx + movl %eax,%r12d + addl 36(%rsp),%ecx + andl %r11d,%r12d + rorxl $25,%r11d,%r13d + rorxl $11,%r11d,%edi + leal (%rdx,%r14,1),%edx + leal (%rcx,%r12,1),%ecx + andnl %ebx,%r11d,%r12d + xorl %edi,%r13d + rorxl $6,%r11d,%r14d + leal (%rcx,%r12,1),%ecx + xorl %r14d,%r13d + movl %edx,%edi + rorxl $22,%edx,%r12d + leal (%rcx,%r13,1),%ecx + xorl %r8d,%edi + rorxl $13,%edx,%r14d + rorxl $2,%edx,%r13d + leal (%r10,%rcx,1),%r10d + andl %edi,%r15d + xorl %r12d,%r14d + xorl %r8d,%r15d + xorl %r13d,%r14d + leal (%rcx,%r15,1),%ecx + movl %r11d,%r12d + addl 40(%rsp),%ebx + andl %r10d,%r12d + rorxl $25,%r10d,%r13d + rorxl $11,%r10d,%r15d + leal (%rcx,%r14,1),%ecx + leal (%rbx,%r12,1),%ebx + andnl %eax,%r10d,%r12d + xorl %r15d,%r13d + rorxl $6,%r10d,%r14d + leal (%rbx,%r12,1),%ebx + xorl %r14d,%r13d + movl %ecx,%r15d + rorxl $22,%ecx,%r12d + leal (%rbx,%r13,1),%ebx + xorl %edx,%r15d + rorxl $13,%ecx,%r14d + rorxl $2,%ecx,%r13d + leal (%r9,%rbx,1),%r9d + andl %r15d,%edi + xorl %r12d,%r14d + xorl %edx,%edi + xorl %r13d,%r14d + leal (%rbx,%rdi,1),%ebx + movl %r10d,%r12d + addl 44(%rsp),%eax + andl %r9d,%r12d + rorxl $25,%r9d,%r13d + rorxl $11,%r9d,%edi + leal (%rbx,%r14,1),%ebx + leal (%rax,%r12,1),%eax + andnl %r11d,%r9d,%r12d + xorl %edi,%r13d + rorxl $6,%r9d,%r14d + leal (%rax,%r12,1),%eax + xorl %r14d,%r13d + movl %ebx,%edi + rorxl $22,%ebx,%r12d + leal (%rax,%r13,1),%eax + xorl %ecx,%edi + rorxl $13,%ebx,%r14d + rorxl $2,%ebx,%r13d + leal (%r8,%rax,1),%r8d + andl %edi,%r15d + xorl %r12d,%r14d + xorl %ecx,%r15d + xorl %r13d,%r14d + leal (%rax,%r15,1),%eax + movl %r9d,%r12d + movq 512(%rsp),%rdi + addl %r14d,%eax + + leaq 448(%rsp),%rbp + + addl 0(%rdi),%eax + addl 4(%rdi),%ebx + addl 8(%rdi),%ecx + addl 12(%rdi),%edx + addl 16(%rdi),%r8d + addl 20(%rdi),%r9d + addl 24(%rdi),%r10d + addl 28(%rdi),%r11d + + movl %eax,0(%rdi) + movl %ebx,4(%rdi) + movl %ecx,8(%rdi) + movl %edx,12(%rdi) + movl %r8d,16(%rdi) + movl %r9d,20(%rdi) + movl %r10d,24(%rdi) + movl %r11d,28(%rdi) + + cmpq 80(%rbp),%rsi + je .Ldone_avx2 + + xorl %r14d,%r14d + movl %ebx,%edi + xorl %ecx,%edi + movl %r9d,%r12d + jmp .Lower_avx2 +.align 16 +.Lower_avx2: + addl 0+16(%rbp),%r11d + andl %r8d,%r12d + rorxl $25,%r8d,%r13d + rorxl $11,%r8d,%r15d + leal (%rax,%r14,1),%eax + leal (%r11,%r12,1),%r11d + andnl %r10d,%r8d,%r12d + xorl %r15d,%r13d + rorxl $6,%r8d,%r14d + leal (%r11,%r12,1),%r11d + xorl %r14d,%r13d + movl %eax,%r15d + rorxl $22,%eax,%r12d + leal (%r11,%r13,1),%r11d + xorl %ebx,%r15d + rorxl $13,%eax,%r14d + rorxl $2,%eax,%r13d + leal (%rdx,%r11,1),%edx + andl %r15d,%edi + xorl %r12d,%r14d + xorl %ebx,%edi + xorl %r13d,%r14d + leal (%r11,%rdi,1),%r11d + movl %r8d,%r12d + addl 4+16(%rbp),%r10d + andl %edx,%r12d + rorxl $25,%edx,%r13d + rorxl $11,%edx,%edi + leal (%r11,%r14,1),%r11d + leal (%r10,%r12,1),%r10d + andnl %r9d,%edx,%r12d + xorl %edi,%r13d + rorxl $6,%edx,%r14d + leal (%r10,%r12,1),%r10d + xorl %r14d,%r13d + movl %r11d,%edi + rorxl $22,%r11d,%r12d + leal (%r10,%r13,1),%r10d + xorl %eax,%edi + rorxl $13,%r11d,%r14d + rorxl $2,%r11d,%r13d + leal (%rcx,%r10,1),%ecx + andl %edi,%r15d + xorl %r12d,%r14d + xorl %eax,%r15d + xorl %r13d,%r14d + leal (%r10,%r15,1),%r10d + movl %edx,%r12d + addl 8+16(%rbp),%r9d + andl %ecx,%r12d + rorxl $25,%ecx,%r13d + rorxl $11,%ecx,%r15d + leal (%r10,%r14,1),%r10d + leal (%r9,%r12,1),%r9d + andnl %r8d,%ecx,%r12d + xorl %r15d,%r13d + rorxl $6,%ecx,%r14d + leal (%r9,%r12,1),%r9d + xorl %r14d,%r13d + movl %r10d,%r15d + rorxl $22,%r10d,%r12d + leal (%r9,%r13,1),%r9d + xorl %r11d,%r15d + rorxl $13,%r10d,%r14d + rorxl $2,%r10d,%r13d + leal (%rbx,%r9,1),%ebx + andl %r15d,%edi + xorl %r12d,%r14d + xorl %r11d,%edi + xorl %r13d,%r14d + leal (%r9,%rdi,1),%r9d + movl %ecx,%r12d + addl 12+16(%rbp),%r8d + andl %ebx,%r12d + rorxl $25,%ebx,%r13d + rorxl $11,%ebx,%edi + leal (%r9,%r14,1),%r9d + leal (%r8,%r12,1),%r8d + andnl %edx,%ebx,%r12d + xorl %edi,%r13d + rorxl $6,%ebx,%r14d + leal (%r8,%r12,1),%r8d + xorl %r14d,%r13d + movl %r9d,%edi + rorxl $22,%r9d,%r12d + leal (%r8,%r13,1),%r8d + xorl %r10d,%edi + rorxl $13,%r9d,%r14d + rorxl $2,%r9d,%r13d + leal (%rax,%r8,1),%eax + andl %edi,%r15d + xorl %r12d,%r14d + xorl %r10d,%r15d + xorl %r13d,%r14d + leal (%r8,%r15,1),%r8d + movl %ebx,%r12d + addl 32+16(%rbp),%edx + andl %eax,%r12d + rorxl $25,%eax,%r13d + rorxl $11,%eax,%r15d + leal (%r8,%r14,1),%r8d + leal (%rdx,%r12,1),%edx + andnl %ecx,%eax,%r12d + xorl %r15d,%r13d + rorxl $6,%eax,%r14d + leal (%rdx,%r12,1),%edx + xorl %r14d,%r13d + movl %r8d,%r15d + rorxl $22,%r8d,%r12d + leal (%rdx,%r13,1),%edx + xorl %r9d,%r15d + rorxl $13,%r8d,%r14d + rorxl $2,%r8d,%r13d + leal (%r11,%rdx,1),%r11d + andl %r15d,%edi + xorl %r12d,%r14d + xorl %r9d,%edi + xorl %r13d,%r14d + leal (%rdx,%rdi,1),%edx + movl %eax,%r12d + addl 36+16(%rbp),%ecx + andl %r11d,%r12d + rorxl $25,%r11d,%r13d + rorxl $11,%r11d,%edi + leal (%rdx,%r14,1),%edx + leal (%rcx,%r12,1),%ecx + andnl %ebx,%r11d,%r12d + xorl %edi,%r13d + rorxl $6,%r11d,%r14d + leal (%rcx,%r12,1),%ecx + xorl %r14d,%r13d + movl %edx,%edi + rorxl $22,%edx,%r12d + leal (%rcx,%r13,1),%ecx + xorl %r8d,%edi + rorxl $13,%edx,%r14d + rorxl $2,%edx,%r13d + leal (%r10,%rcx,1),%r10d + andl %edi,%r15d + xorl %r12d,%r14d + xorl %r8d,%r15d + xorl %r13d,%r14d + leal (%rcx,%r15,1),%ecx + movl %r11d,%r12d + addl 40+16(%rbp),%ebx + andl %r10d,%r12d + rorxl $25,%r10d,%r13d + rorxl $11,%r10d,%r15d + leal (%rcx,%r14,1),%ecx + leal (%rbx,%r12,1),%ebx + andnl %eax,%r10d,%r12d + xorl %r15d,%r13d + rorxl $6,%r10d,%r14d + leal (%rbx,%r12,1),%ebx + xorl %r14d,%r13d + movl %ecx,%r15d + rorxl $22,%ecx,%r12d + leal (%rbx,%r13,1),%ebx + xorl %edx,%r15d + rorxl $13,%ecx,%r14d + rorxl $2,%ecx,%r13d + leal (%r9,%rbx,1),%r9d + andl %r15d,%edi + xorl %r12d,%r14d + xorl %edx,%edi + xorl %r13d,%r14d + leal (%rbx,%rdi,1),%ebx + movl %r10d,%r12d + addl 44+16(%rbp),%eax + andl %r9d,%r12d + rorxl $25,%r9d,%r13d + rorxl $11,%r9d,%edi + leal (%rbx,%r14,1),%ebx + leal (%rax,%r12,1),%eax + andnl %r11d,%r9d,%r12d + xorl %edi,%r13d + rorxl $6,%r9d,%r14d + leal (%rax,%r12,1),%eax + xorl %r14d,%r13d + movl %ebx,%edi + rorxl $22,%ebx,%r12d + leal (%rax,%r13,1),%eax + xorl %ecx,%edi + rorxl $13,%ebx,%r14d + rorxl $2,%ebx,%r13d + leal (%r8,%rax,1),%r8d + andl %edi,%r15d + xorl %r12d,%r14d + xorl %ecx,%r15d + xorl %r13d,%r14d + leal (%rax,%r15,1),%eax + movl %r9d,%r12d + leaq -64(%rbp),%rbp + cmpq %rsp,%rbp + jae .Lower_avx2 + + movq 512(%rsp),%rdi + addl %r14d,%eax + + leaq 448(%rsp),%rsp + + addl 0(%rdi),%eax + addl 4(%rdi),%ebx + addl 8(%rdi),%ecx + addl 12(%rdi),%edx + addl 16(%rdi),%r8d + addl 20(%rdi),%r9d + leaq 128(%rsi),%rsi + addl 24(%rdi),%r10d + movq %rsi,%r12 + addl 28(%rdi),%r11d + cmpq 64+16(%rsp),%rsi + + movl %eax,0(%rdi) + cmoveq %rsp,%r12 + movl %ebx,4(%rdi) + movl %ecx,8(%rdi) + movl %edx,12(%rdi) + movl %r8d,16(%rdi) + movl %r9d,20(%rdi) + movl %r10d,24(%rdi) + movl %r11d,28(%rdi) + + jbe .Loop_avx2 + leaq (%rsp),%rbp + +.Ldone_avx2: + leaq (%rbp),%rsp + movq 64+24(%rsp),%rsi + vzeroupper + movq (%rsi),%r15 + movq 8(%rsi),%r14 + movq 16(%rsi),%r13 + movq 24(%rsi),%r12 + movq 32(%rsi),%rbp + movq 40(%rsi),%rbx + leaq 48(%rsi),%rsp +.Lepilogue_avx2: + .byte 0xf3,0xc3 +.size sha256_block_data_order_avx2,.-sha256_block_data_order_avx2 diff --git a/deps/openssl/asm/x64-elf-gas/sha/sha512-x86_64.s b/deps/openssl/asm/x64-elf-gas/sha/sha512-x86_64.s index 7e578547ee50e0..a1021c17a966b8 100644 --- a/deps/openssl/asm/x64-elf-gas/sha/sha512-x86_64.s +++ b/deps/openssl/asm/x64-elf-gas/sha/sha512-x86_64.s @@ -5,6 +5,20 @@ .type sha512_block_data_order,@function .align 16 sha512_block_data_order: + leaq OPENSSL_ia32cap_P(%rip),%r11 + movl 0(%r11),%r9d + movl 4(%r11),%r10d + movl 8(%r11),%r11d + testl $2048,%r10d + jnz .Lxop_shortcut + andl $296,%r11d + cmpl $296,%r11d + je .Lavx2_shortcut + andl $1073741824,%r9d + andl $268435968,%r10d + orl %r9d,%r10d + cmpl $1342177792,%r10d + je .Lavx_shortcut pushq %rbx pushq %rbp pushq %r12 @@ -22,8 +36,6 @@ sha512_block_data_order: movq %r11,128+24(%rsp) .Lprologue: - leaq K512(%rip),%rbp - movq 0(%rdi),%rax movq 8(%rdi),%rbx movq 16(%rdi),%rcx @@ -36,1694 +48,1632 @@ sha512_block_data_order: .align 16 .Lloop: - xorq %rdi,%rdi + movq %rbx,%rdi + leaq K512(%rip),%rbp + xorq %rcx,%rdi movq 0(%rsi),%r12 movq %r8,%r13 movq %rax,%r14 bswapq %r12 rorq $23,%r13 movq %r9,%r15 - movq %r12,0(%rsp) - rorq $5,%r14 xorq %r8,%r13 + rorq $5,%r14 xorq %r10,%r15 - rorq $4,%r13 - addq %r11,%r12 + movq %r12,0(%rsp) xorq %rax,%r14 - - addq (%rbp,%rdi,8),%r12 andq %r8,%r15 - movq %rbx,%r11 + + rorq $4,%r13 + addq %r11,%r12 + xorq %r10,%r15 rorq $6,%r14 xorq %r8,%r13 - xorq %r10,%r15 + addq %r15,%r12 - xorq %rcx,%r11 + movq %rax,%r15 + addq (%rbp),%r12 xorq %rax,%r14 - addq %r15,%r12 - movq %rbx,%r15 + xorq %rbx,%r15 rorq $14,%r13 - andq %rax,%r11 - andq %rcx,%r15 + movq %rbx,%r11 + andq %r15,%rdi rorq $28,%r14 addq %r13,%r12 - addq %r15,%r11 + xorq %rdi,%r11 addq %r12,%rdx addq %r12,%r11 - leaq 1(%rdi),%rdi - addq %r14,%r11 + leaq 8(%rbp),%rbp + addq %r14,%r11 movq 8(%rsi),%r12 movq %rdx,%r13 movq %r11,%r14 bswapq %r12 rorq $23,%r13 - movq %r8,%r15 - movq %r12,8(%rsp) + movq %r8,%rdi - rorq $5,%r14 xorq %rdx,%r13 - xorq %r9,%r15 + rorq $5,%r14 + xorq %r9,%rdi - rorq $4,%r13 - addq %r10,%r12 + movq %r12,8(%rsp) xorq %r11,%r14 + andq %rdx,%rdi - addq (%rbp,%rdi,8),%r12 - andq %rdx,%r15 - movq %rax,%r10 + rorq $4,%r13 + addq %r10,%r12 + xorq %r9,%rdi rorq $6,%r14 xorq %rdx,%r13 - xorq %r9,%r15 + addq %rdi,%r12 - xorq %rbx,%r10 + movq %r11,%rdi + addq (%rbp),%r12 xorq %r11,%r14 - addq %r15,%r12 - movq %rax,%r15 + xorq %rax,%rdi rorq $14,%r13 - andq %r11,%r10 - andq %rbx,%r15 + movq %rax,%r10 + andq %rdi,%r15 rorq $28,%r14 addq %r13,%r12 - addq %r15,%r10 + xorq %r15,%r10 addq %r12,%rcx addq %r12,%r10 - leaq 1(%rdi),%rdi - addq %r14,%r10 + leaq 24(%rbp),%rbp + addq %r14,%r10 movq 16(%rsi),%r12 movq %rcx,%r13 movq %r10,%r14 bswapq %r12 rorq $23,%r13 movq %rdx,%r15 - movq %r12,16(%rsp) - rorq $5,%r14 xorq %rcx,%r13 + rorq $5,%r14 xorq %r8,%r15 - rorq $4,%r13 - addq %r9,%r12 + movq %r12,16(%rsp) xorq %r10,%r14 - - addq (%rbp,%rdi,8),%r12 andq %rcx,%r15 - movq %r11,%r9 + + rorq $4,%r13 + addq %r9,%r12 + xorq %r8,%r15 rorq $6,%r14 xorq %rcx,%r13 - xorq %r8,%r15 + addq %r15,%r12 - xorq %rax,%r9 + movq %r10,%r15 + addq (%rbp),%r12 xorq %r10,%r14 - addq %r15,%r12 - movq %r11,%r15 + xorq %r11,%r15 rorq $14,%r13 - andq %r10,%r9 - andq %rax,%r15 + movq %r11,%r9 + andq %r15,%rdi rorq $28,%r14 addq %r13,%r12 - addq %r15,%r9 + xorq %rdi,%r9 addq %r12,%rbx addq %r12,%r9 - leaq 1(%rdi),%rdi - addq %r14,%r9 + leaq 8(%rbp),%rbp + addq %r14,%r9 movq 24(%rsi),%r12 movq %rbx,%r13 movq %r9,%r14 bswapq %r12 rorq $23,%r13 - movq %rcx,%r15 - movq %r12,24(%rsp) + movq %rcx,%rdi - rorq $5,%r14 xorq %rbx,%r13 - xorq %rdx,%r15 + rorq $5,%r14 + xorq %rdx,%rdi - rorq $4,%r13 - addq %r8,%r12 + movq %r12,24(%rsp) xorq %r9,%r14 + andq %rbx,%rdi - addq (%rbp,%rdi,8),%r12 - andq %rbx,%r15 - movq %r10,%r8 + rorq $4,%r13 + addq %r8,%r12 + xorq %rdx,%rdi rorq $6,%r14 xorq %rbx,%r13 - xorq %rdx,%r15 + addq %rdi,%r12 - xorq %r11,%r8 + movq %r9,%rdi + addq (%rbp),%r12 xorq %r9,%r14 - addq %r15,%r12 - movq %r10,%r15 + xorq %r10,%rdi rorq $14,%r13 - andq %r9,%r8 - andq %r11,%r15 + movq %r10,%r8 + andq %rdi,%r15 rorq $28,%r14 addq %r13,%r12 - addq %r15,%r8 + xorq %r15,%r8 addq %r12,%rax addq %r12,%r8 - leaq 1(%rdi),%rdi - addq %r14,%r8 + leaq 24(%rbp),%rbp + addq %r14,%r8 movq 32(%rsi),%r12 movq %rax,%r13 movq %r8,%r14 bswapq %r12 rorq $23,%r13 movq %rbx,%r15 - movq %r12,32(%rsp) - rorq $5,%r14 xorq %rax,%r13 + rorq $5,%r14 xorq %rcx,%r15 - rorq $4,%r13 - addq %rdx,%r12 + movq %r12,32(%rsp) xorq %r8,%r14 - - addq (%rbp,%rdi,8),%r12 andq %rax,%r15 - movq %r9,%rdx + + rorq $4,%r13 + addq %rdx,%r12 + xorq %rcx,%r15 rorq $6,%r14 xorq %rax,%r13 - xorq %rcx,%r15 + addq %r15,%r12 - xorq %r10,%rdx + movq %r8,%r15 + addq (%rbp),%r12 xorq %r8,%r14 - addq %r15,%r12 - movq %r9,%r15 + xorq %r9,%r15 rorq $14,%r13 - andq %r8,%rdx - andq %r10,%r15 + movq %r9,%rdx + andq %r15,%rdi rorq $28,%r14 addq %r13,%r12 - addq %r15,%rdx + xorq %rdi,%rdx addq %r12,%r11 addq %r12,%rdx - leaq 1(%rdi),%rdi - addq %r14,%rdx + leaq 8(%rbp),%rbp + addq %r14,%rdx movq 40(%rsi),%r12 movq %r11,%r13 movq %rdx,%r14 bswapq %r12 rorq $23,%r13 - movq %rax,%r15 - movq %r12,40(%rsp) + movq %rax,%rdi - rorq $5,%r14 xorq %r11,%r13 - xorq %rbx,%r15 + rorq $5,%r14 + xorq %rbx,%rdi - rorq $4,%r13 - addq %rcx,%r12 + movq %r12,40(%rsp) xorq %rdx,%r14 + andq %r11,%rdi - addq (%rbp,%rdi,8),%r12 - andq %r11,%r15 - movq %r8,%rcx + rorq $4,%r13 + addq %rcx,%r12 + xorq %rbx,%rdi rorq $6,%r14 xorq %r11,%r13 - xorq %rbx,%r15 + addq %rdi,%r12 - xorq %r9,%rcx + movq %rdx,%rdi + addq (%rbp),%r12 xorq %rdx,%r14 - addq %r15,%r12 - movq %r8,%r15 + xorq %r8,%rdi rorq $14,%r13 - andq %rdx,%rcx - andq %r9,%r15 + movq %r8,%rcx + andq %rdi,%r15 rorq $28,%r14 addq %r13,%r12 - addq %r15,%rcx + xorq %r15,%rcx addq %r12,%r10 addq %r12,%rcx - leaq 1(%rdi),%rdi - addq %r14,%rcx + leaq 24(%rbp),%rbp + addq %r14,%rcx movq 48(%rsi),%r12 movq %r10,%r13 movq %rcx,%r14 bswapq %r12 rorq $23,%r13 movq %r11,%r15 - movq %r12,48(%rsp) - rorq $5,%r14 xorq %r10,%r13 + rorq $5,%r14 xorq %rax,%r15 - rorq $4,%r13 - addq %rbx,%r12 + movq %r12,48(%rsp) xorq %rcx,%r14 - - addq (%rbp,%rdi,8),%r12 andq %r10,%r15 - movq %rdx,%rbx + + rorq $4,%r13 + addq %rbx,%r12 + xorq %rax,%r15 rorq $6,%r14 xorq %r10,%r13 - xorq %rax,%r15 + addq %r15,%r12 - xorq %r8,%rbx + movq %rcx,%r15 + addq (%rbp),%r12 xorq %rcx,%r14 - addq %r15,%r12 - movq %rdx,%r15 + xorq %rdx,%r15 rorq $14,%r13 - andq %rcx,%rbx - andq %r8,%r15 + movq %rdx,%rbx + andq %r15,%rdi rorq $28,%r14 addq %r13,%r12 - addq %r15,%rbx + xorq %rdi,%rbx addq %r12,%r9 addq %r12,%rbx - leaq 1(%rdi),%rdi - addq %r14,%rbx + leaq 8(%rbp),%rbp + addq %r14,%rbx movq 56(%rsi),%r12 movq %r9,%r13 movq %rbx,%r14 bswapq %r12 rorq $23,%r13 - movq %r10,%r15 - movq %r12,56(%rsp) + movq %r10,%rdi - rorq $5,%r14 xorq %r9,%r13 - xorq %r11,%r15 + rorq $5,%r14 + xorq %r11,%rdi - rorq $4,%r13 - addq %rax,%r12 + movq %r12,56(%rsp) xorq %rbx,%r14 + andq %r9,%rdi - addq (%rbp,%rdi,8),%r12 - andq %r9,%r15 - movq %rcx,%rax + rorq $4,%r13 + addq %rax,%r12 + xorq %r11,%rdi rorq $6,%r14 xorq %r9,%r13 - xorq %r11,%r15 + addq %rdi,%r12 - xorq %rdx,%rax + movq %rbx,%rdi + addq (%rbp),%r12 xorq %rbx,%r14 - addq %r15,%r12 - movq %rcx,%r15 + xorq %rcx,%rdi rorq $14,%r13 - andq %rbx,%rax - andq %rdx,%r15 + movq %rcx,%rax + andq %rdi,%r15 rorq $28,%r14 addq %r13,%r12 - addq %r15,%rax + xorq %r15,%rax addq %r12,%r8 addq %r12,%rax - leaq 1(%rdi),%rdi - addq %r14,%rax + leaq 24(%rbp),%rbp + addq %r14,%rax movq 64(%rsi),%r12 movq %r8,%r13 movq %rax,%r14 bswapq %r12 rorq $23,%r13 movq %r9,%r15 - movq %r12,64(%rsp) - rorq $5,%r14 xorq %r8,%r13 + rorq $5,%r14 xorq %r10,%r15 - rorq $4,%r13 - addq %r11,%r12 + movq %r12,64(%rsp) xorq %rax,%r14 - - addq (%rbp,%rdi,8),%r12 andq %r8,%r15 - movq %rbx,%r11 + + rorq $4,%r13 + addq %r11,%r12 + xorq %r10,%r15 rorq $6,%r14 xorq %r8,%r13 - xorq %r10,%r15 + addq %r15,%r12 - xorq %rcx,%r11 + movq %rax,%r15 + addq (%rbp),%r12 xorq %rax,%r14 - addq %r15,%r12 - movq %rbx,%r15 + xorq %rbx,%r15 rorq $14,%r13 - andq %rax,%r11 - andq %rcx,%r15 + movq %rbx,%r11 + andq %r15,%rdi rorq $28,%r14 addq %r13,%r12 - addq %r15,%r11 + xorq %rdi,%r11 addq %r12,%rdx addq %r12,%r11 - leaq 1(%rdi),%rdi - addq %r14,%r11 + leaq 8(%rbp),%rbp + addq %r14,%r11 movq 72(%rsi),%r12 movq %rdx,%r13 movq %r11,%r14 bswapq %r12 rorq $23,%r13 - movq %r8,%r15 - movq %r12,72(%rsp) + movq %r8,%rdi - rorq $5,%r14 xorq %rdx,%r13 - xorq %r9,%r15 + rorq $5,%r14 + xorq %r9,%rdi - rorq $4,%r13 - addq %r10,%r12 + movq %r12,72(%rsp) xorq %r11,%r14 + andq %rdx,%rdi - addq (%rbp,%rdi,8),%r12 - andq %rdx,%r15 - movq %rax,%r10 + rorq $4,%r13 + addq %r10,%r12 + xorq %r9,%rdi rorq $6,%r14 xorq %rdx,%r13 - xorq %r9,%r15 + addq %rdi,%r12 - xorq %rbx,%r10 + movq %r11,%rdi + addq (%rbp),%r12 xorq %r11,%r14 - addq %r15,%r12 - movq %rax,%r15 + xorq %rax,%rdi rorq $14,%r13 - andq %r11,%r10 - andq %rbx,%r15 + movq %rax,%r10 + andq %rdi,%r15 rorq $28,%r14 addq %r13,%r12 - addq %r15,%r10 + xorq %r15,%r10 addq %r12,%rcx addq %r12,%r10 - leaq 1(%rdi),%rdi - addq %r14,%r10 + leaq 24(%rbp),%rbp + addq %r14,%r10 movq 80(%rsi),%r12 movq %rcx,%r13 movq %r10,%r14 bswapq %r12 rorq $23,%r13 movq %rdx,%r15 - movq %r12,80(%rsp) - rorq $5,%r14 xorq %rcx,%r13 + rorq $5,%r14 xorq %r8,%r15 - rorq $4,%r13 - addq %r9,%r12 + movq %r12,80(%rsp) xorq %r10,%r14 - - addq (%rbp,%rdi,8),%r12 andq %rcx,%r15 - movq %r11,%r9 + + rorq $4,%r13 + addq %r9,%r12 + xorq %r8,%r15 rorq $6,%r14 xorq %rcx,%r13 - xorq %r8,%r15 + addq %r15,%r12 - xorq %rax,%r9 + movq %r10,%r15 + addq (%rbp),%r12 xorq %r10,%r14 - addq %r15,%r12 - movq %r11,%r15 + xorq %r11,%r15 rorq $14,%r13 - andq %r10,%r9 - andq %rax,%r15 + movq %r11,%r9 + andq %r15,%rdi rorq $28,%r14 addq %r13,%r12 - addq %r15,%r9 + xorq %rdi,%r9 addq %r12,%rbx addq %r12,%r9 - leaq 1(%rdi),%rdi - addq %r14,%r9 + leaq 8(%rbp),%rbp + addq %r14,%r9 movq 88(%rsi),%r12 movq %rbx,%r13 movq %r9,%r14 bswapq %r12 rorq $23,%r13 - movq %rcx,%r15 - movq %r12,88(%rsp) + movq %rcx,%rdi - rorq $5,%r14 xorq %rbx,%r13 - xorq %rdx,%r15 + rorq $5,%r14 + xorq %rdx,%rdi - rorq $4,%r13 - addq %r8,%r12 + movq %r12,88(%rsp) xorq %r9,%r14 + andq %rbx,%rdi - addq (%rbp,%rdi,8),%r12 - andq %rbx,%r15 - movq %r10,%r8 + rorq $4,%r13 + addq %r8,%r12 + xorq %rdx,%rdi rorq $6,%r14 xorq %rbx,%r13 - xorq %rdx,%r15 + addq %rdi,%r12 - xorq %r11,%r8 + movq %r9,%rdi + addq (%rbp),%r12 xorq %r9,%r14 - addq %r15,%r12 - movq %r10,%r15 + xorq %r10,%rdi rorq $14,%r13 - andq %r9,%r8 - andq %r11,%r15 + movq %r10,%r8 + andq %rdi,%r15 rorq $28,%r14 addq %r13,%r12 - addq %r15,%r8 + xorq %r15,%r8 addq %r12,%rax addq %r12,%r8 - leaq 1(%rdi),%rdi - addq %r14,%r8 + leaq 24(%rbp),%rbp + addq %r14,%r8 movq 96(%rsi),%r12 movq %rax,%r13 movq %r8,%r14 bswapq %r12 rorq $23,%r13 movq %rbx,%r15 - movq %r12,96(%rsp) - rorq $5,%r14 xorq %rax,%r13 + rorq $5,%r14 xorq %rcx,%r15 - rorq $4,%r13 - addq %rdx,%r12 + movq %r12,96(%rsp) xorq %r8,%r14 - - addq (%rbp,%rdi,8),%r12 andq %rax,%r15 - movq %r9,%rdx - rorq $6,%r14 - xorq %rax,%r13 + rorq $4,%r13 + addq %rdx,%r12 xorq %rcx,%r15 - xorq %r10,%rdx - xorq %r8,%r14 + rorq $6,%r14 + xorq %rax,%r13 addq %r15,%r12 - movq %r9,%r15 + movq %r8,%r15 + addq (%rbp),%r12 + xorq %r8,%r14 + + xorq %r9,%r15 rorq $14,%r13 - andq %r8,%rdx - andq %r10,%r15 + movq %r9,%rdx + andq %r15,%rdi rorq $28,%r14 addq %r13,%r12 - addq %r15,%rdx + xorq %rdi,%rdx addq %r12,%r11 addq %r12,%rdx - leaq 1(%rdi),%rdi - addq %r14,%rdx + leaq 8(%rbp),%rbp + addq %r14,%rdx movq 104(%rsi),%r12 movq %r11,%r13 movq %rdx,%r14 bswapq %r12 rorq $23,%r13 - movq %rax,%r15 - movq %r12,104(%rsp) + movq %rax,%rdi - rorq $5,%r14 xorq %r11,%r13 - xorq %rbx,%r15 + rorq $5,%r14 + xorq %rbx,%rdi - rorq $4,%r13 - addq %rcx,%r12 + movq %r12,104(%rsp) xorq %rdx,%r14 + andq %r11,%rdi - addq (%rbp,%rdi,8),%r12 - andq %r11,%r15 - movq %r8,%rcx + rorq $4,%r13 + addq %rcx,%r12 + xorq %rbx,%rdi rorq $6,%r14 xorq %r11,%r13 - xorq %rbx,%r15 + addq %rdi,%r12 - xorq %r9,%rcx + movq %rdx,%rdi + addq (%rbp),%r12 xorq %rdx,%r14 - addq %r15,%r12 - movq %r8,%r15 + xorq %r8,%rdi rorq $14,%r13 - andq %rdx,%rcx - andq %r9,%r15 + movq %r8,%rcx + andq %rdi,%r15 rorq $28,%r14 addq %r13,%r12 - addq %r15,%rcx + xorq %r15,%rcx addq %r12,%r10 addq %r12,%rcx - leaq 1(%rdi),%rdi - addq %r14,%rcx + leaq 24(%rbp),%rbp + addq %r14,%rcx movq 112(%rsi),%r12 movq %r10,%r13 movq %rcx,%r14 bswapq %r12 rorq $23,%r13 movq %r11,%r15 - movq %r12,112(%rsp) - rorq $5,%r14 xorq %r10,%r13 + rorq $5,%r14 xorq %rax,%r15 - rorq $4,%r13 - addq %rbx,%r12 + movq %r12,112(%rsp) xorq %rcx,%r14 - - addq (%rbp,%rdi,8),%r12 andq %r10,%r15 - movq %rdx,%rbx + + rorq $4,%r13 + addq %rbx,%r12 + xorq %rax,%r15 rorq $6,%r14 xorq %r10,%r13 - xorq %rax,%r15 + addq %r15,%r12 - xorq %r8,%rbx + movq %rcx,%r15 + addq (%rbp),%r12 xorq %rcx,%r14 - addq %r15,%r12 - movq %rdx,%r15 + xorq %rdx,%r15 rorq $14,%r13 - andq %rcx,%rbx - andq %r8,%r15 + movq %rdx,%rbx + andq %r15,%rdi rorq $28,%r14 addq %r13,%r12 - addq %r15,%rbx + xorq %rdi,%rbx addq %r12,%r9 addq %r12,%rbx - leaq 1(%rdi),%rdi - addq %r14,%rbx + leaq 8(%rbp),%rbp + addq %r14,%rbx movq 120(%rsi),%r12 movq %r9,%r13 movq %rbx,%r14 bswapq %r12 rorq $23,%r13 - movq %r10,%r15 - movq %r12,120(%rsp) + movq %r10,%rdi - rorq $5,%r14 xorq %r9,%r13 - xorq %r11,%r15 + rorq $5,%r14 + xorq %r11,%rdi - rorq $4,%r13 - addq %rax,%r12 + movq %r12,120(%rsp) xorq %rbx,%r14 + andq %r9,%rdi - addq (%rbp,%rdi,8),%r12 - andq %r9,%r15 - movq %rcx,%rax + rorq $4,%r13 + addq %rax,%r12 + xorq %r11,%rdi rorq $6,%r14 xorq %r9,%r13 - xorq %r11,%r15 + addq %rdi,%r12 - xorq %rdx,%rax + movq %rbx,%rdi + addq (%rbp),%r12 xorq %rbx,%r14 - addq %r15,%r12 - movq %rcx,%r15 + xorq %rcx,%rdi rorq $14,%r13 - andq %rbx,%rax - andq %rdx,%r15 + movq %rcx,%rax + andq %rdi,%r15 rorq $28,%r14 addq %r13,%r12 - addq %r15,%rax + xorq %r15,%rax addq %r12,%r8 addq %r12,%rax - leaq 1(%rdi),%rdi - addq %r14,%rax + leaq 24(%rbp),%rbp jmp .Lrounds_16_xx .align 16 .Lrounds_16_xx: movq 8(%rsp),%r13 - movq 112(%rsp),%r14 - movq %r13,%r12 - movq %r14,%r15 + movq 112(%rsp),%r15 - rorq $7,%r12 - xorq %r13,%r12 - shrq $7,%r13 + movq %r13,%r12 + rorq $7,%r13 + addq %r14,%rax + movq %r15,%r14 + rorq $42,%r15 - rorq $1,%r12 xorq %r12,%r13 - movq 72(%rsp),%r12 - - rorq $42,%r15 + shrq $7,%r12 + rorq $1,%r13 xorq %r14,%r15 shrq $6,%r14 rorq $19,%r15 - addq %r13,%r12 - xorq %r15,%r14 + xorq %r13,%r12 + xorq %r14,%r15 + addq 72(%rsp),%r12 addq 0(%rsp),%r12 movq %r8,%r13 - addq %r14,%r12 + addq %r15,%r12 movq %rax,%r14 rorq $23,%r13 movq %r9,%r15 - movq %r12,0(%rsp) - rorq $5,%r14 xorq %r8,%r13 + rorq $5,%r14 xorq %r10,%r15 - rorq $4,%r13 - addq %r11,%r12 + movq %r12,0(%rsp) xorq %rax,%r14 - - addq (%rbp,%rdi,8),%r12 andq %r8,%r15 - movq %rbx,%r11 + + rorq $4,%r13 + addq %r11,%r12 + xorq %r10,%r15 rorq $6,%r14 xorq %r8,%r13 - xorq %r10,%r15 + addq %r15,%r12 - xorq %rcx,%r11 + movq %rax,%r15 + addq (%rbp),%r12 xorq %rax,%r14 - addq %r15,%r12 - movq %rbx,%r15 + xorq %rbx,%r15 rorq $14,%r13 - andq %rax,%r11 - andq %rcx,%r15 + movq %rbx,%r11 + andq %r15,%rdi rorq $28,%r14 addq %r13,%r12 - addq %r15,%r11 + xorq %rdi,%r11 addq %r12,%rdx addq %r12,%r11 - leaq 1(%rdi),%rdi - addq %r14,%r11 + leaq 8(%rbp),%rbp movq 16(%rsp),%r13 - movq 120(%rsp),%r14 - movq %r13,%r12 - movq %r14,%r15 + movq 120(%rsp),%rdi - rorq $7,%r12 - xorq %r13,%r12 - shrq $7,%r13 + movq %r13,%r12 + rorq $7,%r13 + addq %r14,%r11 + movq %rdi,%r14 + rorq $42,%rdi - rorq $1,%r12 xorq %r12,%r13 - movq 80(%rsp),%r12 - - rorq $42,%r15 - xorq %r14,%r15 + shrq $7,%r12 + rorq $1,%r13 + xorq %r14,%rdi shrq $6,%r14 - rorq $19,%r15 - addq %r13,%r12 - xorq %r15,%r14 + rorq $19,%rdi + xorq %r13,%r12 + xorq %r14,%rdi + addq 80(%rsp),%r12 addq 8(%rsp),%r12 movq %rdx,%r13 - addq %r14,%r12 + addq %rdi,%r12 movq %r11,%r14 rorq $23,%r13 - movq %r8,%r15 - movq %r12,8(%rsp) + movq %r8,%rdi - rorq $5,%r14 xorq %rdx,%r13 - xorq %r9,%r15 + rorq $5,%r14 + xorq %r9,%rdi - rorq $4,%r13 - addq %r10,%r12 + movq %r12,8(%rsp) xorq %r11,%r14 + andq %rdx,%rdi - addq (%rbp,%rdi,8),%r12 - andq %rdx,%r15 - movq %rax,%r10 + rorq $4,%r13 + addq %r10,%r12 + xorq %r9,%rdi rorq $6,%r14 xorq %rdx,%r13 - xorq %r9,%r15 + addq %rdi,%r12 - xorq %rbx,%r10 + movq %r11,%rdi + addq (%rbp),%r12 xorq %r11,%r14 - addq %r15,%r12 - movq %rax,%r15 + xorq %rax,%rdi rorq $14,%r13 - andq %r11,%r10 - andq %rbx,%r15 + movq %rax,%r10 + andq %rdi,%r15 rorq $28,%r14 addq %r13,%r12 - addq %r15,%r10 + xorq %r15,%r10 addq %r12,%rcx addq %r12,%r10 - leaq 1(%rdi),%rdi - addq %r14,%r10 + leaq 24(%rbp),%rbp movq 24(%rsp),%r13 - movq 0(%rsp),%r14 - movq %r13,%r12 - movq %r14,%r15 + movq 0(%rsp),%r15 - rorq $7,%r12 - xorq %r13,%r12 - shrq $7,%r13 + movq %r13,%r12 + rorq $7,%r13 + addq %r14,%r10 + movq %r15,%r14 + rorq $42,%r15 - rorq $1,%r12 xorq %r12,%r13 - movq 88(%rsp),%r12 - - rorq $42,%r15 + shrq $7,%r12 + rorq $1,%r13 xorq %r14,%r15 shrq $6,%r14 rorq $19,%r15 - addq %r13,%r12 - xorq %r15,%r14 + xorq %r13,%r12 + xorq %r14,%r15 + addq 88(%rsp),%r12 addq 16(%rsp),%r12 movq %rcx,%r13 - addq %r14,%r12 + addq %r15,%r12 movq %r10,%r14 rorq $23,%r13 movq %rdx,%r15 - movq %r12,16(%rsp) - rorq $5,%r14 xorq %rcx,%r13 + rorq $5,%r14 xorq %r8,%r15 - rorq $4,%r13 - addq %r9,%r12 + movq %r12,16(%rsp) xorq %r10,%r14 - - addq (%rbp,%rdi,8),%r12 andq %rcx,%r15 - movq %r11,%r9 + + rorq $4,%r13 + addq %r9,%r12 + xorq %r8,%r15 rorq $6,%r14 xorq %rcx,%r13 - xorq %r8,%r15 + addq %r15,%r12 - xorq %rax,%r9 + movq %r10,%r15 + addq (%rbp),%r12 xorq %r10,%r14 - addq %r15,%r12 - movq %r11,%r15 + xorq %r11,%r15 rorq $14,%r13 - andq %r10,%r9 - andq %rax,%r15 + movq %r11,%r9 + andq %r15,%rdi rorq $28,%r14 addq %r13,%r12 - addq %r15,%r9 + xorq %rdi,%r9 addq %r12,%rbx addq %r12,%r9 - leaq 1(%rdi),%rdi - addq %r14,%r9 + leaq 8(%rbp),%rbp movq 32(%rsp),%r13 - movq 8(%rsp),%r14 - movq %r13,%r12 - movq %r14,%r15 + movq 8(%rsp),%rdi - rorq $7,%r12 - xorq %r13,%r12 - shrq $7,%r13 + movq %r13,%r12 + rorq $7,%r13 + addq %r14,%r9 + movq %rdi,%r14 + rorq $42,%rdi - rorq $1,%r12 xorq %r12,%r13 - movq 96(%rsp),%r12 - - rorq $42,%r15 - xorq %r14,%r15 + shrq $7,%r12 + rorq $1,%r13 + xorq %r14,%rdi shrq $6,%r14 - rorq $19,%r15 - addq %r13,%r12 - xorq %r15,%r14 + rorq $19,%rdi + xorq %r13,%r12 + xorq %r14,%rdi + addq 96(%rsp),%r12 addq 24(%rsp),%r12 movq %rbx,%r13 - addq %r14,%r12 + addq %rdi,%r12 movq %r9,%r14 rorq $23,%r13 - movq %rcx,%r15 - movq %r12,24(%rsp) + movq %rcx,%rdi - rorq $5,%r14 xorq %rbx,%r13 - xorq %rdx,%r15 + rorq $5,%r14 + xorq %rdx,%rdi - rorq $4,%r13 - addq %r8,%r12 + movq %r12,24(%rsp) xorq %r9,%r14 + andq %rbx,%rdi - addq (%rbp,%rdi,8),%r12 - andq %rbx,%r15 - movq %r10,%r8 + rorq $4,%r13 + addq %r8,%r12 + xorq %rdx,%rdi rorq $6,%r14 xorq %rbx,%r13 - xorq %rdx,%r15 + addq %rdi,%r12 - xorq %r11,%r8 + movq %r9,%rdi + addq (%rbp),%r12 xorq %r9,%r14 - addq %r15,%r12 - movq %r10,%r15 + xorq %r10,%rdi rorq $14,%r13 - andq %r9,%r8 - andq %r11,%r15 + movq %r10,%r8 + andq %rdi,%r15 rorq $28,%r14 addq %r13,%r12 - addq %r15,%r8 + xorq %r15,%r8 addq %r12,%rax addq %r12,%r8 - leaq 1(%rdi),%rdi - addq %r14,%r8 + leaq 24(%rbp),%rbp movq 40(%rsp),%r13 - movq 16(%rsp),%r14 - movq %r13,%r12 - movq %r14,%r15 + movq 16(%rsp),%r15 - rorq $7,%r12 - xorq %r13,%r12 - shrq $7,%r13 + movq %r13,%r12 + rorq $7,%r13 + addq %r14,%r8 + movq %r15,%r14 + rorq $42,%r15 - rorq $1,%r12 xorq %r12,%r13 - movq 104(%rsp),%r12 - - rorq $42,%r15 + shrq $7,%r12 + rorq $1,%r13 xorq %r14,%r15 shrq $6,%r14 rorq $19,%r15 - addq %r13,%r12 - xorq %r15,%r14 + xorq %r13,%r12 + xorq %r14,%r15 + addq 104(%rsp),%r12 addq 32(%rsp),%r12 movq %rax,%r13 - addq %r14,%r12 + addq %r15,%r12 movq %r8,%r14 rorq $23,%r13 movq %rbx,%r15 - movq %r12,32(%rsp) - rorq $5,%r14 xorq %rax,%r13 + rorq $5,%r14 xorq %rcx,%r15 - rorq $4,%r13 - addq %rdx,%r12 + movq %r12,32(%rsp) xorq %r8,%r14 - - addq (%rbp,%rdi,8),%r12 andq %rax,%r15 - movq %r9,%rdx + + rorq $4,%r13 + addq %rdx,%r12 + xorq %rcx,%r15 rorq $6,%r14 xorq %rax,%r13 - xorq %rcx,%r15 + addq %r15,%r12 - xorq %r10,%rdx + movq %r8,%r15 + addq (%rbp),%r12 xorq %r8,%r14 - addq %r15,%r12 - movq %r9,%r15 + xorq %r9,%r15 rorq $14,%r13 - andq %r8,%rdx - andq %r10,%r15 + movq %r9,%rdx + andq %r15,%rdi rorq $28,%r14 addq %r13,%r12 - addq %r15,%rdx + xorq %rdi,%rdx addq %r12,%r11 addq %r12,%rdx - leaq 1(%rdi),%rdi - addq %r14,%rdx + leaq 8(%rbp),%rbp movq 48(%rsp),%r13 - movq 24(%rsp),%r14 - movq %r13,%r12 - movq %r14,%r15 + movq 24(%rsp),%rdi - rorq $7,%r12 - xorq %r13,%r12 - shrq $7,%r13 + movq %r13,%r12 + rorq $7,%r13 + addq %r14,%rdx + movq %rdi,%r14 + rorq $42,%rdi - rorq $1,%r12 xorq %r12,%r13 - movq 112(%rsp),%r12 - - rorq $42,%r15 - xorq %r14,%r15 + shrq $7,%r12 + rorq $1,%r13 + xorq %r14,%rdi shrq $6,%r14 - rorq $19,%r15 - addq %r13,%r12 - xorq %r15,%r14 + rorq $19,%rdi + xorq %r13,%r12 + xorq %r14,%rdi + addq 112(%rsp),%r12 addq 40(%rsp),%r12 movq %r11,%r13 - addq %r14,%r12 + addq %rdi,%r12 movq %rdx,%r14 rorq $23,%r13 - movq %rax,%r15 - movq %r12,40(%rsp) + movq %rax,%rdi - rorq $5,%r14 xorq %r11,%r13 - xorq %rbx,%r15 + rorq $5,%r14 + xorq %rbx,%rdi - rorq $4,%r13 - addq %rcx,%r12 + movq %r12,40(%rsp) xorq %rdx,%r14 + andq %r11,%rdi - addq (%rbp,%rdi,8),%r12 - andq %r11,%r15 - movq %r8,%rcx + rorq $4,%r13 + addq %rcx,%r12 + xorq %rbx,%rdi rorq $6,%r14 xorq %r11,%r13 - xorq %rbx,%r15 + addq %rdi,%r12 - xorq %r9,%rcx + movq %rdx,%rdi + addq (%rbp),%r12 xorq %rdx,%r14 - addq %r15,%r12 - movq %r8,%r15 + xorq %r8,%rdi rorq $14,%r13 - andq %rdx,%rcx - andq %r9,%r15 + movq %r8,%rcx + andq %rdi,%r15 rorq $28,%r14 addq %r13,%r12 - addq %r15,%rcx + xorq %r15,%rcx addq %r12,%r10 addq %r12,%rcx - leaq 1(%rdi),%rdi - addq %r14,%rcx + leaq 24(%rbp),%rbp movq 56(%rsp),%r13 - movq 32(%rsp),%r14 - movq %r13,%r12 - movq %r14,%r15 + movq 32(%rsp),%r15 - rorq $7,%r12 - xorq %r13,%r12 - shrq $7,%r13 + movq %r13,%r12 + rorq $7,%r13 + addq %r14,%rcx + movq %r15,%r14 + rorq $42,%r15 - rorq $1,%r12 xorq %r12,%r13 - movq 120(%rsp),%r12 - - rorq $42,%r15 + shrq $7,%r12 + rorq $1,%r13 xorq %r14,%r15 shrq $6,%r14 rorq $19,%r15 - addq %r13,%r12 - xorq %r15,%r14 + xorq %r13,%r12 + xorq %r14,%r15 + addq 120(%rsp),%r12 addq 48(%rsp),%r12 movq %r10,%r13 - addq %r14,%r12 + addq %r15,%r12 movq %rcx,%r14 rorq $23,%r13 movq %r11,%r15 - movq %r12,48(%rsp) - rorq $5,%r14 xorq %r10,%r13 + rorq $5,%r14 xorq %rax,%r15 - rorq $4,%r13 - addq %rbx,%r12 + movq %r12,48(%rsp) xorq %rcx,%r14 - - addq (%rbp,%rdi,8),%r12 andq %r10,%r15 - movq %rdx,%rbx + + rorq $4,%r13 + addq %rbx,%r12 + xorq %rax,%r15 rorq $6,%r14 xorq %r10,%r13 - xorq %rax,%r15 + addq %r15,%r12 - xorq %r8,%rbx + movq %rcx,%r15 + addq (%rbp),%r12 xorq %rcx,%r14 - addq %r15,%r12 - movq %rdx,%r15 + xorq %rdx,%r15 rorq $14,%r13 - andq %rcx,%rbx - andq %r8,%r15 + movq %rdx,%rbx + andq %r15,%rdi rorq $28,%r14 addq %r13,%r12 - addq %r15,%rbx + xorq %rdi,%rbx addq %r12,%r9 addq %r12,%rbx - leaq 1(%rdi),%rdi - addq %r14,%rbx + leaq 8(%rbp),%rbp movq 64(%rsp),%r13 - movq 40(%rsp),%r14 - movq %r13,%r12 - movq %r14,%r15 + movq 40(%rsp),%rdi - rorq $7,%r12 - xorq %r13,%r12 - shrq $7,%r13 + movq %r13,%r12 + rorq $7,%r13 + addq %r14,%rbx + movq %rdi,%r14 + rorq $42,%rdi - rorq $1,%r12 xorq %r12,%r13 - movq 0(%rsp),%r12 - - rorq $42,%r15 - xorq %r14,%r15 + shrq $7,%r12 + rorq $1,%r13 + xorq %r14,%rdi shrq $6,%r14 - rorq $19,%r15 - addq %r13,%r12 - xorq %r15,%r14 + rorq $19,%rdi + xorq %r13,%r12 + xorq %r14,%rdi + addq 0(%rsp),%r12 addq 56(%rsp),%r12 movq %r9,%r13 - addq %r14,%r12 + addq %rdi,%r12 movq %rbx,%r14 rorq $23,%r13 - movq %r10,%r15 - movq %r12,56(%rsp) + movq %r10,%rdi - rorq $5,%r14 xorq %r9,%r13 - xorq %r11,%r15 + rorq $5,%r14 + xorq %r11,%rdi - rorq $4,%r13 - addq %rax,%r12 + movq %r12,56(%rsp) xorq %rbx,%r14 + andq %r9,%rdi - addq (%rbp,%rdi,8),%r12 - andq %r9,%r15 - movq %rcx,%rax + rorq $4,%r13 + addq %rax,%r12 + xorq %r11,%rdi rorq $6,%r14 xorq %r9,%r13 - xorq %r11,%r15 + addq %rdi,%r12 - xorq %rdx,%rax + movq %rbx,%rdi + addq (%rbp),%r12 xorq %rbx,%r14 - addq %r15,%r12 - movq %rcx,%r15 + xorq %rcx,%rdi rorq $14,%r13 - andq %rbx,%rax - andq %rdx,%r15 + movq %rcx,%rax + andq %rdi,%r15 rorq $28,%r14 addq %r13,%r12 - addq %r15,%rax + xorq %r15,%rax addq %r12,%r8 addq %r12,%rax - leaq 1(%rdi),%rdi - addq %r14,%rax + leaq 24(%rbp),%rbp movq 72(%rsp),%r13 - movq 48(%rsp),%r14 - movq %r13,%r12 - movq %r14,%r15 + movq 48(%rsp),%r15 - rorq $7,%r12 - xorq %r13,%r12 - shrq $7,%r13 + movq %r13,%r12 + rorq $7,%r13 + addq %r14,%rax + movq %r15,%r14 + rorq $42,%r15 - rorq $1,%r12 xorq %r12,%r13 - movq 8(%rsp),%r12 - - rorq $42,%r15 + shrq $7,%r12 + rorq $1,%r13 xorq %r14,%r15 shrq $6,%r14 rorq $19,%r15 - addq %r13,%r12 - xorq %r15,%r14 + xorq %r13,%r12 + xorq %r14,%r15 + addq 8(%rsp),%r12 addq 64(%rsp),%r12 movq %r8,%r13 - addq %r14,%r12 + addq %r15,%r12 movq %rax,%r14 rorq $23,%r13 movq %r9,%r15 - movq %r12,64(%rsp) - rorq $5,%r14 xorq %r8,%r13 + rorq $5,%r14 xorq %r10,%r15 - rorq $4,%r13 - addq %r11,%r12 + movq %r12,64(%rsp) xorq %rax,%r14 - - addq (%rbp,%rdi,8),%r12 andq %r8,%r15 - movq %rbx,%r11 + + rorq $4,%r13 + addq %r11,%r12 + xorq %r10,%r15 rorq $6,%r14 xorq %r8,%r13 - xorq %r10,%r15 + addq %r15,%r12 - xorq %rcx,%r11 + movq %rax,%r15 + addq (%rbp),%r12 xorq %rax,%r14 - addq %r15,%r12 - movq %rbx,%r15 + xorq %rbx,%r15 rorq $14,%r13 - andq %rax,%r11 - andq %rcx,%r15 + movq %rbx,%r11 + andq %r15,%rdi rorq $28,%r14 addq %r13,%r12 - addq %r15,%r11 + xorq %rdi,%r11 addq %r12,%rdx addq %r12,%r11 - leaq 1(%rdi),%rdi - addq %r14,%r11 + leaq 8(%rbp),%rbp movq 80(%rsp),%r13 - movq 56(%rsp),%r14 - movq %r13,%r12 - movq %r14,%r15 + movq 56(%rsp),%rdi - rorq $7,%r12 - xorq %r13,%r12 - shrq $7,%r13 + movq %r13,%r12 + rorq $7,%r13 + addq %r14,%r11 + movq %rdi,%r14 + rorq $42,%rdi - rorq $1,%r12 xorq %r12,%r13 - movq 16(%rsp),%r12 - - rorq $42,%r15 - xorq %r14,%r15 + shrq $7,%r12 + rorq $1,%r13 + xorq %r14,%rdi shrq $6,%r14 - rorq $19,%r15 - addq %r13,%r12 - xorq %r15,%r14 + rorq $19,%rdi + xorq %r13,%r12 + xorq %r14,%rdi + addq 16(%rsp),%r12 addq 72(%rsp),%r12 movq %rdx,%r13 - addq %r14,%r12 + addq %rdi,%r12 movq %r11,%r14 rorq $23,%r13 - movq %r8,%r15 - movq %r12,72(%rsp) + movq %r8,%rdi - rorq $5,%r14 xorq %rdx,%r13 - xorq %r9,%r15 + rorq $5,%r14 + xorq %r9,%rdi - rorq $4,%r13 - addq %r10,%r12 + movq %r12,72(%rsp) xorq %r11,%r14 + andq %rdx,%rdi - addq (%rbp,%rdi,8),%r12 - andq %rdx,%r15 - movq %rax,%r10 + rorq $4,%r13 + addq %r10,%r12 + xorq %r9,%rdi rorq $6,%r14 xorq %rdx,%r13 - xorq %r9,%r15 + addq %rdi,%r12 - xorq %rbx,%r10 + movq %r11,%rdi + addq (%rbp),%r12 xorq %r11,%r14 - addq %r15,%r12 - movq %rax,%r15 + xorq %rax,%rdi rorq $14,%r13 - andq %r11,%r10 - andq %rbx,%r15 + movq %rax,%r10 + andq %rdi,%r15 rorq $28,%r14 addq %r13,%r12 - addq %r15,%r10 + xorq %r15,%r10 addq %r12,%rcx addq %r12,%r10 - leaq 1(%rdi),%rdi - addq %r14,%r10 + leaq 24(%rbp),%rbp movq 88(%rsp),%r13 - movq 64(%rsp),%r14 - movq %r13,%r12 - movq %r14,%r15 + movq 64(%rsp),%r15 - rorq $7,%r12 - xorq %r13,%r12 - shrq $7,%r13 + movq %r13,%r12 + rorq $7,%r13 + addq %r14,%r10 + movq %r15,%r14 + rorq $42,%r15 - rorq $1,%r12 xorq %r12,%r13 - movq 24(%rsp),%r12 - - rorq $42,%r15 + shrq $7,%r12 + rorq $1,%r13 xorq %r14,%r15 shrq $6,%r14 rorq $19,%r15 - addq %r13,%r12 - xorq %r15,%r14 + xorq %r13,%r12 + xorq %r14,%r15 + addq 24(%rsp),%r12 addq 80(%rsp),%r12 movq %rcx,%r13 - addq %r14,%r12 + addq %r15,%r12 movq %r10,%r14 rorq $23,%r13 movq %rdx,%r15 - movq %r12,80(%rsp) - rorq $5,%r14 xorq %rcx,%r13 + rorq $5,%r14 xorq %r8,%r15 - rorq $4,%r13 - addq %r9,%r12 + movq %r12,80(%rsp) xorq %r10,%r14 - - addq (%rbp,%rdi,8),%r12 andq %rcx,%r15 - movq %r11,%r9 + + rorq $4,%r13 + addq %r9,%r12 + xorq %r8,%r15 rorq $6,%r14 xorq %rcx,%r13 - xorq %r8,%r15 + addq %r15,%r12 - xorq %rax,%r9 + movq %r10,%r15 + addq (%rbp),%r12 xorq %r10,%r14 - addq %r15,%r12 - movq %r11,%r15 + xorq %r11,%r15 rorq $14,%r13 - andq %r10,%r9 - andq %rax,%r15 + movq %r11,%r9 + andq %r15,%rdi rorq $28,%r14 addq %r13,%r12 - addq %r15,%r9 + xorq %rdi,%r9 addq %r12,%rbx addq %r12,%r9 - leaq 1(%rdi),%rdi - addq %r14,%r9 + leaq 8(%rbp),%rbp movq 96(%rsp),%r13 - movq 72(%rsp),%r14 - movq %r13,%r12 - movq %r14,%r15 + movq 72(%rsp),%rdi - rorq $7,%r12 - xorq %r13,%r12 - shrq $7,%r13 + movq %r13,%r12 + rorq $7,%r13 + addq %r14,%r9 + movq %rdi,%r14 + rorq $42,%rdi - rorq $1,%r12 xorq %r12,%r13 - movq 32(%rsp),%r12 - - rorq $42,%r15 - xorq %r14,%r15 + shrq $7,%r12 + rorq $1,%r13 + xorq %r14,%rdi shrq $6,%r14 - rorq $19,%r15 - addq %r13,%r12 - xorq %r15,%r14 + rorq $19,%rdi + xorq %r13,%r12 + xorq %r14,%rdi + addq 32(%rsp),%r12 addq 88(%rsp),%r12 movq %rbx,%r13 - addq %r14,%r12 + addq %rdi,%r12 movq %r9,%r14 rorq $23,%r13 - movq %rcx,%r15 - movq %r12,88(%rsp) + movq %rcx,%rdi - rorq $5,%r14 xorq %rbx,%r13 - xorq %rdx,%r15 + rorq $5,%r14 + xorq %rdx,%rdi - rorq $4,%r13 - addq %r8,%r12 + movq %r12,88(%rsp) xorq %r9,%r14 + andq %rbx,%rdi - addq (%rbp,%rdi,8),%r12 - andq %rbx,%r15 - movq %r10,%r8 + rorq $4,%r13 + addq %r8,%r12 + xorq %rdx,%rdi rorq $6,%r14 xorq %rbx,%r13 - xorq %rdx,%r15 + addq %rdi,%r12 - xorq %r11,%r8 + movq %r9,%rdi + addq (%rbp),%r12 xorq %r9,%r14 - addq %r15,%r12 - movq %r10,%r15 + xorq %r10,%rdi rorq $14,%r13 - andq %r9,%r8 - andq %r11,%r15 + movq %r10,%r8 + andq %rdi,%r15 rorq $28,%r14 addq %r13,%r12 - addq %r15,%r8 + xorq %r15,%r8 addq %r12,%rax addq %r12,%r8 - leaq 1(%rdi),%rdi - addq %r14,%r8 + leaq 24(%rbp),%rbp movq 104(%rsp),%r13 - movq 80(%rsp),%r14 - movq %r13,%r12 - movq %r14,%r15 + movq 80(%rsp),%r15 - rorq $7,%r12 - xorq %r13,%r12 - shrq $7,%r13 + movq %r13,%r12 + rorq $7,%r13 + addq %r14,%r8 + movq %r15,%r14 + rorq $42,%r15 - rorq $1,%r12 xorq %r12,%r13 - movq 40(%rsp),%r12 - - rorq $42,%r15 + shrq $7,%r12 + rorq $1,%r13 xorq %r14,%r15 shrq $6,%r14 rorq $19,%r15 - addq %r13,%r12 - xorq %r15,%r14 + xorq %r13,%r12 + xorq %r14,%r15 + addq 40(%rsp),%r12 addq 96(%rsp),%r12 movq %rax,%r13 - addq %r14,%r12 + addq %r15,%r12 movq %r8,%r14 rorq $23,%r13 movq %rbx,%r15 - movq %r12,96(%rsp) - rorq $5,%r14 xorq %rax,%r13 + rorq $5,%r14 xorq %rcx,%r15 - rorq $4,%r13 - addq %rdx,%r12 + movq %r12,96(%rsp) xorq %r8,%r14 - - addq (%rbp,%rdi,8),%r12 andq %rax,%r15 - movq %r9,%rdx + + rorq $4,%r13 + addq %rdx,%r12 + xorq %rcx,%r15 rorq $6,%r14 xorq %rax,%r13 - xorq %rcx,%r15 + addq %r15,%r12 - xorq %r10,%rdx + movq %r8,%r15 + addq (%rbp),%r12 xorq %r8,%r14 - addq %r15,%r12 - movq %r9,%r15 + xorq %r9,%r15 rorq $14,%r13 - andq %r8,%rdx - andq %r10,%r15 + movq %r9,%rdx + andq %r15,%rdi rorq $28,%r14 addq %r13,%r12 - addq %r15,%rdx + xorq %rdi,%rdx addq %r12,%r11 addq %r12,%rdx - leaq 1(%rdi),%rdi - addq %r14,%rdx + leaq 8(%rbp),%rbp movq 112(%rsp),%r13 - movq 88(%rsp),%r14 - movq %r13,%r12 - movq %r14,%r15 + movq 88(%rsp),%rdi - rorq $7,%r12 - xorq %r13,%r12 - shrq $7,%r13 + movq %r13,%r12 + rorq $7,%r13 + addq %r14,%rdx + movq %rdi,%r14 + rorq $42,%rdi - rorq $1,%r12 xorq %r12,%r13 - movq 48(%rsp),%r12 - - rorq $42,%r15 - xorq %r14,%r15 + shrq $7,%r12 + rorq $1,%r13 + xorq %r14,%rdi shrq $6,%r14 - rorq $19,%r15 - addq %r13,%r12 - xorq %r15,%r14 + rorq $19,%rdi + xorq %r13,%r12 + xorq %r14,%rdi + addq 48(%rsp),%r12 addq 104(%rsp),%r12 movq %r11,%r13 - addq %r14,%r12 + addq %rdi,%r12 movq %rdx,%r14 rorq $23,%r13 - movq %rax,%r15 - movq %r12,104(%rsp) + movq %rax,%rdi - rorq $5,%r14 xorq %r11,%r13 - xorq %rbx,%r15 + rorq $5,%r14 + xorq %rbx,%rdi - rorq $4,%r13 - addq %rcx,%r12 + movq %r12,104(%rsp) xorq %rdx,%r14 + andq %r11,%rdi - addq (%rbp,%rdi,8),%r12 - andq %r11,%r15 - movq %r8,%rcx + rorq $4,%r13 + addq %rcx,%r12 + xorq %rbx,%rdi rorq $6,%r14 xorq %r11,%r13 - xorq %rbx,%r15 + addq %rdi,%r12 - xorq %r9,%rcx + movq %rdx,%rdi + addq (%rbp),%r12 xorq %rdx,%r14 - addq %r15,%r12 - movq %r8,%r15 + xorq %r8,%rdi rorq $14,%r13 - andq %rdx,%rcx - andq %r9,%r15 + movq %r8,%rcx + andq %rdi,%r15 rorq $28,%r14 addq %r13,%r12 - addq %r15,%rcx + xorq %r15,%rcx addq %r12,%r10 addq %r12,%rcx - leaq 1(%rdi),%rdi - addq %r14,%rcx + leaq 24(%rbp),%rbp movq 120(%rsp),%r13 - movq 96(%rsp),%r14 - movq %r13,%r12 - movq %r14,%r15 + movq 96(%rsp),%r15 - rorq $7,%r12 - xorq %r13,%r12 - shrq $7,%r13 + movq %r13,%r12 + rorq $7,%r13 + addq %r14,%rcx + movq %r15,%r14 + rorq $42,%r15 - rorq $1,%r12 xorq %r12,%r13 - movq 56(%rsp),%r12 - - rorq $42,%r15 + shrq $7,%r12 + rorq $1,%r13 xorq %r14,%r15 shrq $6,%r14 rorq $19,%r15 - addq %r13,%r12 - xorq %r15,%r14 + xorq %r13,%r12 + xorq %r14,%r15 + addq 56(%rsp),%r12 addq 112(%rsp),%r12 movq %r10,%r13 - addq %r14,%r12 + addq %r15,%r12 movq %rcx,%r14 rorq $23,%r13 movq %r11,%r15 - movq %r12,112(%rsp) - rorq $5,%r14 xorq %r10,%r13 + rorq $5,%r14 xorq %rax,%r15 - rorq $4,%r13 - addq %rbx,%r12 + movq %r12,112(%rsp) xorq %rcx,%r14 - - addq (%rbp,%rdi,8),%r12 andq %r10,%r15 - movq %rdx,%rbx + + rorq $4,%r13 + addq %rbx,%r12 + xorq %rax,%r15 rorq $6,%r14 xorq %r10,%r13 - xorq %rax,%r15 + addq %r15,%r12 - xorq %r8,%rbx + movq %rcx,%r15 + addq (%rbp),%r12 xorq %rcx,%r14 - addq %r15,%r12 - movq %rdx,%r15 + xorq %rdx,%r15 rorq $14,%r13 - andq %rcx,%rbx - andq %r8,%r15 + movq %rdx,%rbx + andq %r15,%rdi rorq $28,%r14 addq %r13,%r12 - addq %r15,%rbx + xorq %rdi,%rbx addq %r12,%r9 addq %r12,%rbx - leaq 1(%rdi),%rdi - addq %r14,%rbx + leaq 8(%rbp),%rbp movq 0(%rsp),%r13 - movq 104(%rsp),%r14 - movq %r13,%r12 - movq %r14,%r15 + movq 104(%rsp),%rdi - rorq $7,%r12 - xorq %r13,%r12 - shrq $7,%r13 + movq %r13,%r12 + rorq $7,%r13 + addq %r14,%rbx + movq %rdi,%r14 + rorq $42,%rdi - rorq $1,%r12 xorq %r12,%r13 - movq 64(%rsp),%r12 - - rorq $42,%r15 - xorq %r14,%r15 + shrq $7,%r12 + rorq $1,%r13 + xorq %r14,%rdi shrq $6,%r14 - rorq $19,%r15 - addq %r13,%r12 - xorq %r15,%r14 + rorq $19,%rdi + xorq %r13,%r12 + xorq %r14,%rdi + addq 64(%rsp),%r12 addq 120(%rsp),%r12 movq %r9,%r13 - addq %r14,%r12 + addq %rdi,%r12 movq %rbx,%r14 rorq $23,%r13 - movq %r10,%r15 - movq %r12,120(%rsp) + movq %r10,%rdi - rorq $5,%r14 xorq %r9,%r13 - xorq %r11,%r15 + rorq $5,%r14 + xorq %r11,%rdi - rorq $4,%r13 - addq %rax,%r12 + movq %r12,120(%rsp) xorq %rbx,%r14 + andq %r9,%rdi - addq (%rbp,%rdi,8),%r12 - andq %r9,%r15 - movq %rcx,%rax + rorq $4,%r13 + addq %rax,%r12 + xorq %r11,%rdi rorq $6,%r14 xorq %r9,%r13 - xorq %r11,%r15 + addq %rdi,%r12 - xorq %rdx,%rax + movq %rbx,%rdi + addq (%rbp),%r12 xorq %rbx,%r14 - addq %r15,%r12 - movq %rcx,%r15 + xorq %rcx,%rdi rorq $14,%r13 - andq %rbx,%rax - andq %rdx,%r15 + movq %rcx,%rax + andq %rdi,%r15 rorq $28,%r14 addq %r13,%r12 - addq %r15,%rax + xorq %r15,%rax addq %r12,%r8 addq %r12,%rax - leaq 1(%rdi),%rdi - addq %r14,%rax - cmpq $80,%rdi - jb .Lrounds_16_xx + leaq 24(%rbp),%rbp + cmpb $0,7(%rbp) + jnz .Lrounds_16_xx movq 128+0(%rsp),%rdi + addq %r14,%rax leaq 128(%rsi),%rsi addq 0(%rdi),%rax @@ -1762,42 +1712,3654 @@ sha512_block_data_order: .type K512,@object K512: .quad 0x428a2f98d728ae22,0x7137449123ef65cd +.quad 0x428a2f98d728ae22,0x7137449123ef65cd .quad 0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc +.quad 0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc +.quad 0x3956c25bf348b538,0x59f111f1b605d019 .quad 0x3956c25bf348b538,0x59f111f1b605d019 .quad 0x923f82a4af194f9b,0xab1c5ed5da6d8118 +.quad 0x923f82a4af194f9b,0xab1c5ed5da6d8118 +.quad 0xd807aa98a3030242,0x12835b0145706fbe .quad 0xd807aa98a3030242,0x12835b0145706fbe .quad 0x243185be4ee4b28c,0x550c7dc3d5ffb4e2 +.quad 0x243185be4ee4b28c,0x550c7dc3d5ffb4e2 +.quad 0x72be5d74f27b896f,0x80deb1fe3b1696b1 .quad 0x72be5d74f27b896f,0x80deb1fe3b1696b1 .quad 0x9bdc06a725c71235,0xc19bf174cf692694 +.quad 0x9bdc06a725c71235,0xc19bf174cf692694 +.quad 0xe49b69c19ef14ad2,0xefbe4786384f25e3 .quad 0xe49b69c19ef14ad2,0xefbe4786384f25e3 .quad 0x0fc19dc68b8cd5b5,0x240ca1cc77ac9c65 +.quad 0x0fc19dc68b8cd5b5,0x240ca1cc77ac9c65 +.quad 0x2de92c6f592b0275,0x4a7484aa6ea6e483 .quad 0x2de92c6f592b0275,0x4a7484aa6ea6e483 .quad 0x5cb0a9dcbd41fbd4,0x76f988da831153b5 +.quad 0x5cb0a9dcbd41fbd4,0x76f988da831153b5 +.quad 0x983e5152ee66dfab,0xa831c66d2db43210 .quad 0x983e5152ee66dfab,0xa831c66d2db43210 .quad 0xb00327c898fb213f,0xbf597fc7beef0ee4 +.quad 0xb00327c898fb213f,0xbf597fc7beef0ee4 +.quad 0xc6e00bf33da88fc2,0xd5a79147930aa725 .quad 0xc6e00bf33da88fc2,0xd5a79147930aa725 .quad 0x06ca6351e003826f,0x142929670a0e6e70 +.quad 0x06ca6351e003826f,0x142929670a0e6e70 +.quad 0x27b70a8546d22ffc,0x2e1b21385c26c926 .quad 0x27b70a8546d22ffc,0x2e1b21385c26c926 .quad 0x4d2c6dfc5ac42aed,0x53380d139d95b3df +.quad 0x4d2c6dfc5ac42aed,0x53380d139d95b3df +.quad 0x650a73548baf63de,0x766a0abb3c77b2a8 .quad 0x650a73548baf63de,0x766a0abb3c77b2a8 .quad 0x81c2c92e47edaee6,0x92722c851482353b +.quad 0x81c2c92e47edaee6,0x92722c851482353b +.quad 0xa2bfe8a14cf10364,0xa81a664bbc423001 .quad 0xa2bfe8a14cf10364,0xa81a664bbc423001 .quad 0xc24b8b70d0f89791,0xc76c51a30654be30 +.quad 0xc24b8b70d0f89791,0xc76c51a30654be30 +.quad 0xd192e819d6ef5218,0xd69906245565a910 .quad 0xd192e819d6ef5218,0xd69906245565a910 .quad 0xf40e35855771202a,0x106aa07032bbd1b8 +.quad 0xf40e35855771202a,0x106aa07032bbd1b8 +.quad 0x19a4c116b8d2d0c8,0x1e376c085141ab53 .quad 0x19a4c116b8d2d0c8,0x1e376c085141ab53 .quad 0x2748774cdf8eeb99,0x34b0bcb5e19b48a8 +.quad 0x2748774cdf8eeb99,0x34b0bcb5e19b48a8 +.quad 0x391c0cb3c5c95a63,0x4ed8aa4ae3418acb .quad 0x391c0cb3c5c95a63,0x4ed8aa4ae3418acb .quad 0x5b9cca4f7763e373,0x682e6ff3d6b2b8a3 +.quad 0x5b9cca4f7763e373,0x682e6ff3d6b2b8a3 +.quad 0x748f82ee5defb2fc,0x78a5636f43172f60 .quad 0x748f82ee5defb2fc,0x78a5636f43172f60 .quad 0x84c87814a1f0ab72,0x8cc702081a6439ec +.quad 0x84c87814a1f0ab72,0x8cc702081a6439ec +.quad 0x90befffa23631e28,0xa4506cebde82bde9 .quad 0x90befffa23631e28,0xa4506cebde82bde9 .quad 0xbef9a3f7b2c67915,0xc67178f2e372532b +.quad 0xbef9a3f7b2c67915,0xc67178f2e372532b +.quad 0xca273eceea26619c,0xd186b8c721c0c207 .quad 0xca273eceea26619c,0xd186b8c721c0c207 .quad 0xeada7dd6cde0eb1e,0xf57d4f7fee6ed178 +.quad 0xeada7dd6cde0eb1e,0xf57d4f7fee6ed178 +.quad 0x06f067aa72176fba,0x0a637dc5a2c898a6 .quad 0x06f067aa72176fba,0x0a637dc5a2c898a6 .quad 0x113f9804bef90dae,0x1b710b35131c471b +.quad 0x113f9804bef90dae,0x1b710b35131c471b +.quad 0x28db77f523047d84,0x32caab7b40c72493 .quad 0x28db77f523047d84,0x32caab7b40c72493 .quad 0x3c9ebe0a15c9bebc,0x431d67c49c100d4c +.quad 0x3c9ebe0a15c9bebc,0x431d67c49c100d4c +.quad 0x4cc5d4becb3e42b6,0x597f299cfc657e2a .quad 0x4cc5d4becb3e42b6,0x597f299cfc657e2a .quad 0x5fcb6fab3ad6faec,0x6c44198c4a475817 +.quad 0x5fcb6fab3ad6faec,0x6c44198c4a475817 + +.quad 0x0001020304050607,0x08090a0b0c0d0e0f +.quad 0x0001020304050607,0x08090a0b0c0d0e0f +.byte 83,72,65,53,49,50,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 +.type sha512_block_data_order_xop,@function +.align 64 +sha512_block_data_order_xop: +.Lxop_shortcut: + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + movq %rsp,%r11 + shlq $4,%rdx + subq $160,%rsp + leaq (%rsi,%rdx,8),%rdx + andq $-64,%rsp + movq %rdi,128+0(%rsp) + movq %rsi,128+8(%rsp) + movq %rdx,128+16(%rsp) + movq %r11,128+24(%rsp) +.Lprologue_xop: + + vzeroupper + movq 0(%rdi),%rax + movq 8(%rdi),%rbx + movq 16(%rdi),%rcx + movq 24(%rdi),%rdx + movq 32(%rdi),%r8 + movq 40(%rdi),%r9 + movq 48(%rdi),%r10 + movq 56(%rdi),%r11 + jmp .Lloop_xop +.align 16 +.Lloop_xop: + vmovdqa K512+1280(%rip),%xmm11 + vmovdqu 0(%rsi),%xmm0 + leaq K512+128(%rip),%rbp + vmovdqu 16(%rsi),%xmm1 + vmovdqu 32(%rsi),%xmm2 + vpshufb %xmm11,%xmm0,%xmm0 + vmovdqu 48(%rsi),%xmm3 + vpshufb %xmm11,%xmm1,%xmm1 + vmovdqu 64(%rsi),%xmm4 + vpshufb %xmm11,%xmm2,%xmm2 + vmovdqu 80(%rsi),%xmm5 + vpshufb %xmm11,%xmm3,%xmm3 + vmovdqu 96(%rsi),%xmm6 + vpshufb %xmm11,%xmm4,%xmm4 + vmovdqu 112(%rsi),%xmm7 + vpshufb %xmm11,%xmm5,%xmm5 + vpaddq -128(%rbp),%xmm0,%xmm8 + vpshufb %xmm11,%xmm6,%xmm6 + vpaddq -96(%rbp),%xmm1,%xmm9 + vpshufb %xmm11,%xmm7,%xmm7 + vpaddq -64(%rbp),%xmm2,%xmm10 + vpaddq -32(%rbp),%xmm3,%xmm11 + vmovdqa %xmm8,0(%rsp) + vpaddq 0(%rbp),%xmm4,%xmm8 + vmovdqa %xmm9,16(%rsp) + vpaddq 32(%rbp),%xmm5,%xmm9 + vmovdqa %xmm10,32(%rsp) + vpaddq 64(%rbp),%xmm6,%xmm10 + vmovdqa %xmm11,48(%rsp) + vpaddq 96(%rbp),%xmm7,%xmm11 + vmovdqa %xmm8,64(%rsp) + movq %rax,%r14 + vmovdqa %xmm9,80(%rsp) + movq %rbx,%rdi + vmovdqa %xmm10,96(%rsp) + xorq %rcx,%rdi + vmovdqa %xmm11,112(%rsp) + movq %r8,%r13 + jmp .Lxop_00_47 + +.align 16 +.Lxop_00_47: + addq $256,%rbp + vpalignr $8,%xmm0,%xmm1,%xmm8 + rorq $23,%r13 + movq %r14,%rax + vpalignr $8,%xmm4,%xmm5,%xmm11 + movq %r9,%r12 + rorq $5,%r14 +.byte 143,72,120,195,200,56 + xorq %r8,%r13 + xorq %r10,%r12 + vpsrlq $7,%xmm8,%xmm8 + rorq $4,%r13 + xorq %rax,%r14 + vpaddq %xmm11,%xmm0,%xmm0 + andq %r8,%r12 + xorq %r8,%r13 + addq 0(%rsp),%r11 + movq %rax,%r15 +.byte 143,72,120,195,209,7 + xorq %r10,%r12 + rorq $6,%r14 + vpxor %xmm9,%xmm8,%xmm8 + xorq %rbx,%r15 + addq %r12,%r11 + rorq $14,%r13 + andq %r15,%rdi +.byte 143,104,120,195,223,3 + xorq %rax,%r14 + addq %r13,%r11 + vpxor %xmm10,%xmm8,%xmm8 + xorq %rbx,%rdi + rorq $28,%r14 + vpsrlq $6,%xmm7,%xmm10 + addq %r11,%rdx + addq %rdi,%r11 + vpaddq %xmm8,%xmm0,%xmm0 + movq %rdx,%r13 + addq %r11,%r14 +.byte 143,72,120,195,203,42 + rorq $23,%r13 + movq %r14,%r11 + vpxor %xmm10,%xmm11,%xmm11 + movq %r8,%r12 + rorq $5,%r14 + xorq %rdx,%r13 + xorq %r9,%r12 + vpxor %xmm9,%xmm11,%xmm11 + rorq $4,%r13 + xorq %r11,%r14 + andq %rdx,%r12 + xorq %rdx,%r13 + vpaddq %xmm11,%xmm0,%xmm0 + addq 8(%rsp),%r10 + movq %r11,%rdi + xorq %r9,%r12 + rorq $6,%r14 + vpaddq -128(%rbp),%xmm0,%xmm10 + xorq %rax,%rdi + addq %r12,%r10 + rorq $14,%r13 + andq %rdi,%r15 + xorq %r11,%r14 + addq %r13,%r10 + xorq %rax,%r15 + rorq $28,%r14 + addq %r10,%rcx + addq %r15,%r10 + movq %rcx,%r13 + addq %r10,%r14 + vmovdqa %xmm10,0(%rsp) + vpalignr $8,%xmm1,%xmm2,%xmm8 + rorq $23,%r13 + movq %r14,%r10 + vpalignr $8,%xmm5,%xmm6,%xmm11 + movq %rdx,%r12 + rorq $5,%r14 +.byte 143,72,120,195,200,56 + xorq %rcx,%r13 + xorq %r8,%r12 + vpsrlq $7,%xmm8,%xmm8 + rorq $4,%r13 + xorq %r10,%r14 + vpaddq %xmm11,%xmm1,%xmm1 + andq %rcx,%r12 + xorq %rcx,%r13 + addq 16(%rsp),%r9 + movq %r10,%r15 +.byte 143,72,120,195,209,7 + xorq %r8,%r12 + rorq $6,%r14 + vpxor %xmm9,%xmm8,%xmm8 + xorq %r11,%r15 + addq %r12,%r9 + rorq $14,%r13 + andq %r15,%rdi +.byte 143,104,120,195,216,3 + xorq %r10,%r14 + addq %r13,%r9 + vpxor %xmm10,%xmm8,%xmm8 + xorq %r11,%rdi + rorq $28,%r14 + vpsrlq $6,%xmm0,%xmm10 + addq %r9,%rbx + addq %rdi,%r9 + vpaddq %xmm8,%xmm1,%xmm1 + movq %rbx,%r13 + addq %r9,%r14 +.byte 143,72,120,195,203,42 + rorq $23,%r13 + movq %r14,%r9 + vpxor %xmm10,%xmm11,%xmm11 + movq %rcx,%r12 + rorq $5,%r14 + xorq %rbx,%r13 + xorq %rdx,%r12 + vpxor %xmm9,%xmm11,%xmm11 + rorq $4,%r13 + xorq %r9,%r14 + andq %rbx,%r12 + xorq %rbx,%r13 + vpaddq %xmm11,%xmm1,%xmm1 + addq 24(%rsp),%r8 + movq %r9,%rdi + xorq %rdx,%r12 + rorq $6,%r14 + vpaddq -96(%rbp),%xmm1,%xmm10 + xorq %r10,%rdi + addq %r12,%r8 + rorq $14,%r13 + andq %rdi,%r15 + xorq %r9,%r14 + addq %r13,%r8 + xorq %r10,%r15 + rorq $28,%r14 + addq %r8,%rax + addq %r15,%r8 + movq %rax,%r13 + addq %r8,%r14 + vmovdqa %xmm10,16(%rsp) + vpalignr $8,%xmm2,%xmm3,%xmm8 + rorq $23,%r13 + movq %r14,%r8 + vpalignr $8,%xmm6,%xmm7,%xmm11 + movq %rbx,%r12 + rorq $5,%r14 +.byte 143,72,120,195,200,56 + xorq %rax,%r13 + xorq %rcx,%r12 + vpsrlq $7,%xmm8,%xmm8 + rorq $4,%r13 + xorq %r8,%r14 + vpaddq %xmm11,%xmm2,%xmm2 + andq %rax,%r12 + xorq %rax,%r13 + addq 32(%rsp),%rdx + movq %r8,%r15 +.byte 143,72,120,195,209,7 + xorq %rcx,%r12 + rorq $6,%r14 + vpxor %xmm9,%xmm8,%xmm8 + xorq %r9,%r15 + addq %r12,%rdx + rorq $14,%r13 + andq %r15,%rdi +.byte 143,104,120,195,217,3 + xorq %r8,%r14 + addq %r13,%rdx + vpxor %xmm10,%xmm8,%xmm8 + xorq %r9,%rdi + rorq $28,%r14 + vpsrlq $6,%xmm1,%xmm10 + addq %rdx,%r11 + addq %rdi,%rdx + vpaddq %xmm8,%xmm2,%xmm2 + movq %r11,%r13 + addq %rdx,%r14 +.byte 143,72,120,195,203,42 + rorq $23,%r13 + movq %r14,%rdx + vpxor %xmm10,%xmm11,%xmm11 + movq %rax,%r12 + rorq $5,%r14 + xorq %r11,%r13 + xorq %rbx,%r12 + vpxor %xmm9,%xmm11,%xmm11 + rorq $4,%r13 + xorq %rdx,%r14 + andq %r11,%r12 + xorq %r11,%r13 + vpaddq %xmm11,%xmm2,%xmm2 + addq 40(%rsp),%rcx + movq %rdx,%rdi + xorq %rbx,%r12 + rorq $6,%r14 + vpaddq -64(%rbp),%xmm2,%xmm10 + xorq %r8,%rdi + addq %r12,%rcx + rorq $14,%r13 + andq %rdi,%r15 + xorq %rdx,%r14 + addq %r13,%rcx + xorq %r8,%r15 + rorq $28,%r14 + addq %rcx,%r10 + addq %r15,%rcx + movq %r10,%r13 + addq %rcx,%r14 + vmovdqa %xmm10,32(%rsp) + vpalignr $8,%xmm3,%xmm4,%xmm8 + rorq $23,%r13 + movq %r14,%rcx + vpalignr $8,%xmm7,%xmm0,%xmm11 + movq %r11,%r12 + rorq $5,%r14 +.byte 143,72,120,195,200,56 + xorq %r10,%r13 + xorq %rax,%r12 + vpsrlq $7,%xmm8,%xmm8 + rorq $4,%r13 + xorq %rcx,%r14 + vpaddq %xmm11,%xmm3,%xmm3 + andq %r10,%r12 + xorq %r10,%r13 + addq 48(%rsp),%rbx + movq %rcx,%r15 +.byte 143,72,120,195,209,7 + xorq %rax,%r12 + rorq $6,%r14 + vpxor %xmm9,%xmm8,%xmm8 + xorq %rdx,%r15 + addq %r12,%rbx + rorq $14,%r13 + andq %r15,%rdi +.byte 143,104,120,195,218,3 + xorq %rcx,%r14 + addq %r13,%rbx + vpxor %xmm10,%xmm8,%xmm8 + xorq %rdx,%rdi + rorq $28,%r14 + vpsrlq $6,%xmm2,%xmm10 + addq %rbx,%r9 + addq %rdi,%rbx + vpaddq %xmm8,%xmm3,%xmm3 + movq %r9,%r13 + addq %rbx,%r14 +.byte 143,72,120,195,203,42 + rorq $23,%r13 + movq %r14,%rbx + vpxor %xmm10,%xmm11,%xmm11 + movq %r10,%r12 + rorq $5,%r14 + xorq %r9,%r13 + xorq %r11,%r12 + vpxor %xmm9,%xmm11,%xmm11 + rorq $4,%r13 + xorq %rbx,%r14 + andq %r9,%r12 + xorq %r9,%r13 + vpaddq %xmm11,%xmm3,%xmm3 + addq 56(%rsp),%rax + movq %rbx,%rdi + xorq %r11,%r12 + rorq $6,%r14 + vpaddq -32(%rbp),%xmm3,%xmm10 + xorq %rcx,%rdi + addq %r12,%rax + rorq $14,%r13 + andq %rdi,%r15 + xorq %rbx,%r14 + addq %r13,%rax + xorq %rcx,%r15 + rorq $28,%r14 + addq %rax,%r8 + addq %r15,%rax + movq %r8,%r13 + addq %rax,%r14 + vmovdqa %xmm10,48(%rsp) + vpalignr $8,%xmm4,%xmm5,%xmm8 + rorq $23,%r13 + movq %r14,%rax + vpalignr $8,%xmm0,%xmm1,%xmm11 + movq %r9,%r12 + rorq $5,%r14 +.byte 143,72,120,195,200,56 + xorq %r8,%r13 + xorq %r10,%r12 + vpsrlq $7,%xmm8,%xmm8 + rorq $4,%r13 + xorq %rax,%r14 + vpaddq %xmm11,%xmm4,%xmm4 + andq %r8,%r12 + xorq %r8,%r13 + addq 64(%rsp),%r11 + movq %rax,%r15 +.byte 143,72,120,195,209,7 + xorq %r10,%r12 + rorq $6,%r14 + vpxor %xmm9,%xmm8,%xmm8 + xorq %rbx,%r15 + addq %r12,%r11 + rorq $14,%r13 + andq %r15,%rdi +.byte 143,104,120,195,219,3 + xorq %rax,%r14 + addq %r13,%r11 + vpxor %xmm10,%xmm8,%xmm8 + xorq %rbx,%rdi + rorq $28,%r14 + vpsrlq $6,%xmm3,%xmm10 + addq %r11,%rdx + addq %rdi,%r11 + vpaddq %xmm8,%xmm4,%xmm4 + movq %rdx,%r13 + addq %r11,%r14 +.byte 143,72,120,195,203,42 + rorq $23,%r13 + movq %r14,%r11 + vpxor %xmm10,%xmm11,%xmm11 + movq %r8,%r12 + rorq $5,%r14 + xorq %rdx,%r13 + xorq %r9,%r12 + vpxor %xmm9,%xmm11,%xmm11 + rorq $4,%r13 + xorq %r11,%r14 + andq %rdx,%r12 + xorq %rdx,%r13 + vpaddq %xmm11,%xmm4,%xmm4 + addq 72(%rsp),%r10 + movq %r11,%rdi + xorq %r9,%r12 + rorq $6,%r14 + vpaddq 0(%rbp),%xmm4,%xmm10 + xorq %rax,%rdi + addq %r12,%r10 + rorq $14,%r13 + andq %rdi,%r15 + xorq %r11,%r14 + addq %r13,%r10 + xorq %rax,%r15 + rorq $28,%r14 + addq %r10,%rcx + addq %r15,%r10 + movq %rcx,%r13 + addq %r10,%r14 + vmovdqa %xmm10,64(%rsp) + vpalignr $8,%xmm5,%xmm6,%xmm8 + rorq $23,%r13 + movq %r14,%r10 + vpalignr $8,%xmm1,%xmm2,%xmm11 + movq %rdx,%r12 + rorq $5,%r14 +.byte 143,72,120,195,200,56 + xorq %rcx,%r13 + xorq %r8,%r12 + vpsrlq $7,%xmm8,%xmm8 + rorq $4,%r13 + xorq %r10,%r14 + vpaddq %xmm11,%xmm5,%xmm5 + andq %rcx,%r12 + xorq %rcx,%r13 + addq 80(%rsp),%r9 + movq %r10,%r15 +.byte 143,72,120,195,209,7 + xorq %r8,%r12 + rorq $6,%r14 + vpxor %xmm9,%xmm8,%xmm8 + xorq %r11,%r15 + addq %r12,%r9 + rorq $14,%r13 + andq %r15,%rdi +.byte 143,104,120,195,220,3 + xorq %r10,%r14 + addq %r13,%r9 + vpxor %xmm10,%xmm8,%xmm8 + xorq %r11,%rdi + rorq $28,%r14 + vpsrlq $6,%xmm4,%xmm10 + addq %r9,%rbx + addq %rdi,%r9 + vpaddq %xmm8,%xmm5,%xmm5 + movq %rbx,%r13 + addq %r9,%r14 +.byte 143,72,120,195,203,42 + rorq $23,%r13 + movq %r14,%r9 + vpxor %xmm10,%xmm11,%xmm11 + movq %rcx,%r12 + rorq $5,%r14 + xorq %rbx,%r13 + xorq %rdx,%r12 + vpxor %xmm9,%xmm11,%xmm11 + rorq $4,%r13 + xorq %r9,%r14 + andq %rbx,%r12 + xorq %rbx,%r13 + vpaddq %xmm11,%xmm5,%xmm5 + addq 88(%rsp),%r8 + movq %r9,%rdi + xorq %rdx,%r12 + rorq $6,%r14 + vpaddq 32(%rbp),%xmm5,%xmm10 + xorq %r10,%rdi + addq %r12,%r8 + rorq $14,%r13 + andq %rdi,%r15 + xorq %r9,%r14 + addq %r13,%r8 + xorq %r10,%r15 + rorq $28,%r14 + addq %r8,%rax + addq %r15,%r8 + movq %rax,%r13 + addq %r8,%r14 + vmovdqa %xmm10,80(%rsp) + vpalignr $8,%xmm6,%xmm7,%xmm8 + rorq $23,%r13 + movq %r14,%r8 + vpalignr $8,%xmm2,%xmm3,%xmm11 + movq %rbx,%r12 + rorq $5,%r14 +.byte 143,72,120,195,200,56 + xorq %rax,%r13 + xorq %rcx,%r12 + vpsrlq $7,%xmm8,%xmm8 + rorq $4,%r13 + xorq %r8,%r14 + vpaddq %xmm11,%xmm6,%xmm6 + andq %rax,%r12 + xorq %rax,%r13 + addq 96(%rsp),%rdx + movq %r8,%r15 +.byte 143,72,120,195,209,7 + xorq %rcx,%r12 + rorq $6,%r14 + vpxor %xmm9,%xmm8,%xmm8 + xorq %r9,%r15 + addq %r12,%rdx + rorq $14,%r13 + andq %r15,%rdi +.byte 143,104,120,195,221,3 + xorq %r8,%r14 + addq %r13,%rdx + vpxor %xmm10,%xmm8,%xmm8 + xorq %r9,%rdi + rorq $28,%r14 + vpsrlq $6,%xmm5,%xmm10 + addq %rdx,%r11 + addq %rdi,%rdx + vpaddq %xmm8,%xmm6,%xmm6 + movq %r11,%r13 + addq %rdx,%r14 +.byte 143,72,120,195,203,42 + rorq $23,%r13 + movq %r14,%rdx + vpxor %xmm10,%xmm11,%xmm11 + movq %rax,%r12 + rorq $5,%r14 + xorq %r11,%r13 + xorq %rbx,%r12 + vpxor %xmm9,%xmm11,%xmm11 + rorq $4,%r13 + xorq %rdx,%r14 + andq %r11,%r12 + xorq %r11,%r13 + vpaddq %xmm11,%xmm6,%xmm6 + addq 104(%rsp),%rcx + movq %rdx,%rdi + xorq %rbx,%r12 + rorq $6,%r14 + vpaddq 64(%rbp),%xmm6,%xmm10 + xorq %r8,%rdi + addq %r12,%rcx + rorq $14,%r13 + andq %rdi,%r15 + xorq %rdx,%r14 + addq %r13,%rcx + xorq %r8,%r15 + rorq $28,%r14 + addq %rcx,%r10 + addq %r15,%rcx + movq %r10,%r13 + addq %rcx,%r14 + vmovdqa %xmm10,96(%rsp) + vpalignr $8,%xmm7,%xmm0,%xmm8 + rorq $23,%r13 + movq %r14,%rcx + vpalignr $8,%xmm3,%xmm4,%xmm11 + movq %r11,%r12 + rorq $5,%r14 +.byte 143,72,120,195,200,56 + xorq %r10,%r13 + xorq %rax,%r12 + vpsrlq $7,%xmm8,%xmm8 + rorq $4,%r13 + xorq %rcx,%r14 + vpaddq %xmm11,%xmm7,%xmm7 + andq %r10,%r12 + xorq %r10,%r13 + addq 112(%rsp),%rbx + movq %rcx,%r15 +.byte 143,72,120,195,209,7 + xorq %rax,%r12 + rorq $6,%r14 + vpxor %xmm9,%xmm8,%xmm8 + xorq %rdx,%r15 + addq %r12,%rbx + rorq $14,%r13 + andq %r15,%rdi +.byte 143,104,120,195,222,3 + xorq %rcx,%r14 + addq %r13,%rbx + vpxor %xmm10,%xmm8,%xmm8 + xorq %rdx,%rdi + rorq $28,%r14 + vpsrlq $6,%xmm6,%xmm10 + addq %rbx,%r9 + addq %rdi,%rbx + vpaddq %xmm8,%xmm7,%xmm7 + movq %r9,%r13 + addq %rbx,%r14 +.byte 143,72,120,195,203,42 + rorq $23,%r13 + movq %r14,%rbx + vpxor %xmm10,%xmm11,%xmm11 + movq %r10,%r12 + rorq $5,%r14 + xorq %r9,%r13 + xorq %r11,%r12 + vpxor %xmm9,%xmm11,%xmm11 + rorq $4,%r13 + xorq %rbx,%r14 + andq %r9,%r12 + xorq %r9,%r13 + vpaddq %xmm11,%xmm7,%xmm7 + addq 120(%rsp),%rax + movq %rbx,%rdi + xorq %r11,%r12 + rorq $6,%r14 + vpaddq 96(%rbp),%xmm7,%xmm10 + xorq %rcx,%rdi + addq %r12,%rax + rorq $14,%r13 + andq %rdi,%r15 + xorq %rbx,%r14 + addq %r13,%rax + xorq %rcx,%r15 + rorq $28,%r14 + addq %rax,%r8 + addq %r15,%rax + movq %r8,%r13 + addq %rax,%r14 + vmovdqa %xmm10,112(%rsp) + cmpb $0,135(%rbp) + jne .Lxop_00_47 + rorq $23,%r13 + movq %r14,%rax + movq %r9,%r12 + rorq $5,%r14 + xorq %r8,%r13 + xorq %r10,%r12 + rorq $4,%r13 + xorq %rax,%r14 + andq %r8,%r12 + xorq %r8,%r13 + addq 0(%rsp),%r11 + movq %rax,%r15 + xorq %r10,%r12 + rorq $6,%r14 + xorq %rbx,%r15 + addq %r12,%r11 + rorq $14,%r13 + andq %r15,%rdi + xorq %rax,%r14 + addq %r13,%r11 + xorq %rbx,%rdi + rorq $28,%r14 + addq %r11,%rdx + addq %rdi,%r11 + movq %rdx,%r13 + addq %r11,%r14 + rorq $23,%r13 + movq %r14,%r11 + movq %r8,%r12 + rorq $5,%r14 + xorq %rdx,%r13 + xorq %r9,%r12 + rorq $4,%r13 + xorq %r11,%r14 + andq %rdx,%r12 + xorq %rdx,%r13 + addq 8(%rsp),%r10 + movq %r11,%rdi + xorq %r9,%r12 + rorq $6,%r14 + xorq %rax,%rdi + addq %r12,%r10 + rorq $14,%r13 + andq %rdi,%r15 + xorq %r11,%r14 + addq %r13,%r10 + xorq %rax,%r15 + rorq $28,%r14 + addq %r10,%rcx + addq %r15,%r10 + movq %rcx,%r13 + addq %r10,%r14 + rorq $23,%r13 + movq %r14,%r10 + movq %rdx,%r12 + rorq $5,%r14 + xorq %rcx,%r13 + xorq %r8,%r12 + rorq $4,%r13 + xorq %r10,%r14 + andq %rcx,%r12 + xorq %rcx,%r13 + addq 16(%rsp),%r9 + movq %r10,%r15 + xorq %r8,%r12 + rorq $6,%r14 + xorq %r11,%r15 + addq %r12,%r9 + rorq $14,%r13 + andq %r15,%rdi + xorq %r10,%r14 + addq %r13,%r9 + xorq %r11,%rdi + rorq $28,%r14 + addq %r9,%rbx + addq %rdi,%r9 + movq %rbx,%r13 + addq %r9,%r14 + rorq $23,%r13 + movq %r14,%r9 + movq %rcx,%r12 + rorq $5,%r14 + xorq %rbx,%r13 + xorq %rdx,%r12 + rorq $4,%r13 + xorq %r9,%r14 + andq %rbx,%r12 + xorq %rbx,%r13 + addq 24(%rsp),%r8 + movq %r9,%rdi + xorq %rdx,%r12 + rorq $6,%r14 + xorq %r10,%rdi + addq %r12,%r8 + rorq $14,%r13 + andq %rdi,%r15 + xorq %r9,%r14 + addq %r13,%r8 + xorq %r10,%r15 + rorq $28,%r14 + addq %r8,%rax + addq %r15,%r8 + movq %rax,%r13 + addq %r8,%r14 + rorq $23,%r13 + movq %r14,%r8 + movq %rbx,%r12 + rorq $5,%r14 + xorq %rax,%r13 + xorq %rcx,%r12 + rorq $4,%r13 + xorq %r8,%r14 + andq %rax,%r12 + xorq %rax,%r13 + addq 32(%rsp),%rdx + movq %r8,%r15 + xorq %rcx,%r12 + rorq $6,%r14 + xorq %r9,%r15 + addq %r12,%rdx + rorq $14,%r13 + andq %r15,%rdi + xorq %r8,%r14 + addq %r13,%rdx + xorq %r9,%rdi + rorq $28,%r14 + addq %rdx,%r11 + addq %rdi,%rdx + movq %r11,%r13 + addq %rdx,%r14 + rorq $23,%r13 + movq %r14,%rdx + movq %rax,%r12 + rorq $5,%r14 + xorq %r11,%r13 + xorq %rbx,%r12 + rorq $4,%r13 + xorq %rdx,%r14 + andq %r11,%r12 + xorq %r11,%r13 + addq 40(%rsp),%rcx + movq %rdx,%rdi + xorq %rbx,%r12 + rorq $6,%r14 + xorq %r8,%rdi + addq %r12,%rcx + rorq $14,%r13 + andq %rdi,%r15 + xorq %rdx,%r14 + addq %r13,%rcx + xorq %r8,%r15 + rorq $28,%r14 + addq %rcx,%r10 + addq %r15,%rcx + movq %r10,%r13 + addq %rcx,%r14 + rorq $23,%r13 + movq %r14,%rcx + movq %r11,%r12 + rorq $5,%r14 + xorq %r10,%r13 + xorq %rax,%r12 + rorq $4,%r13 + xorq %rcx,%r14 + andq %r10,%r12 + xorq %r10,%r13 + addq 48(%rsp),%rbx + movq %rcx,%r15 + xorq %rax,%r12 + rorq $6,%r14 + xorq %rdx,%r15 + addq %r12,%rbx + rorq $14,%r13 + andq %r15,%rdi + xorq %rcx,%r14 + addq %r13,%rbx + xorq %rdx,%rdi + rorq $28,%r14 + addq %rbx,%r9 + addq %rdi,%rbx + movq %r9,%r13 + addq %rbx,%r14 + rorq $23,%r13 + movq %r14,%rbx + movq %r10,%r12 + rorq $5,%r14 + xorq %r9,%r13 + xorq %r11,%r12 + rorq $4,%r13 + xorq %rbx,%r14 + andq %r9,%r12 + xorq %r9,%r13 + addq 56(%rsp),%rax + movq %rbx,%rdi + xorq %r11,%r12 + rorq $6,%r14 + xorq %rcx,%rdi + addq %r12,%rax + rorq $14,%r13 + andq %rdi,%r15 + xorq %rbx,%r14 + addq %r13,%rax + xorq %rcx,%r15 + rorq $28,%r14 + addq %rax,%r8 + addq %r15,%rax + movq %r8,%r13 + addq %rax,%r14 + rorq $23,%r13 + movq %r14,%rax + movq %r9,%r12 + rorq $5,%r14 + xorq %r8,%r13 + xorq %r10,%r12 + rorq $4,%r13 + xorq %rax,%r14 + andq %r8,%r12 + xorq %r8,%r13 + addq 64(%rsp),%r11 + movq %rax,%r15 + xorq %r10,%r12 + rorq $6,%r14 + xorq %rbx,%r15 + addq %r12,%r11 + rorq $14,%r13 + andq %r15,%rdi + xorq %rax,%r14 + addq %r13,%r11 + xorq %rbx,%rdi + rorq $28,%r14 + addq %r11,%rdx + addq %rdi,%r11 + movq %rdx,%r13 + addq %r11,%r14 + rorq $23,%r13 + movq %r14,%r11 + movq %r8,%r12 + rorq $5,%r14 + xorq %rdx,%r13 + xorq %r9,%r12 + rorq $4,%r13 + xorq %r11,%r14 + andq %rdx,%r12 + xorq %rdx,%r13 + addq 72(%rsp),%r10 + movq %r11,%rdi + xorq %r9,%r12 + rorq $6,%r14 + xorq %rax,%rdi + addq %r12,%r10 + rorq $14,%r13 + andq %rdi,%r15 + xorq %r11,%r14 + addq %r13,%r10 + xorq %rax,%r15 + rorq $28,%r14 + addq %r10,%rcx + addq %r15,%r10 + movq %rcx,%r13 + addq %r10,%r14 + rorq $23,%r13 + movq %r14,%r10 + movq %rdx,%r12 + rorq $5,%r14 + xorq %rcx,%r13 + xorq %r8,%r12 + rorq $4,%r13 + xorq %r10,%r14 + andq %rcx,%r12 + xorq %rcx,%r13 + addq 80(%rsp),%r9 + movq %r10,%r15 + xorq %r8,%r12 + rorq $6,%r14 + xorq %r11,%r15 + addq %r12,%r9 + rorq $14,%r13 + andq %r15,%rdi + xorq %r10,%r14 + addq %r13,%r9 + xorq %r11,%rdi + rorq $28,%r14 + addq %r9,%rbx + addq %rdi,%r9 + movq %rbx,%r13 + addq %r9,%r14 + rorq $23,%r13 + movq %r14,%r9 + movq %rcx,%r12 + rorq $5,%r14 + xorq %rbx,%r13 + xorq %rdx,%r12 + rorq $4,%r13 + xorq %r9,%r14 + andq %rbx,%r12 + xorq %rbx,%r13 + addq 88(%rsp),%r8 + movq %r9,%rdi + xorq %rdx,%r12 + rorq $6,%r14 + xorq %r10,%rdi + addq %r12,%r8 + rorq $14,%r13 + andq %rdi,%r15 + xorq %r9,%r14 + addq %r13,%r8 + xorq %r10,%r15 + rorq $28,%r14 + addq %r8,%rax + addq %r15,%r8 + movq %rax,%r13 + addq %r8,%r14 + rorq $23,%r13 + movq %r14,%r8 + movq %rbx,%r12 + rorq $5,%r14 + xorq %rax,%r13 + xorq %rcx,%r12 + rorq $4,%r13 + xorq %r8,%r14 + andq %rax,%r12 + xorq %rax,%r13 + addq 96(%rsp),%rdx + movq %r8,%r15 + xorq %rcx,%r12 + rorq $6,%r14 + xorq %r9,%r15 + addq %r12,%rdx + rorq $14,%r13 + andq %r15,%rdi + xorq %r8,%r14 + addq %r13,%rdx + xorq %r9,%rdi + rorq $28,%r14 + addq %rdx,%r11 + addq %rdi,%rdx + movq %r11,%r13 + addq %rdx,%r14 + rorq $23,%r13 + movq %r14,%rdx + movq %rax,%r12 + rorq $5,%r14 + xorq %r11,%r13 + xorq %rbx,%r12 + rorq $4,%r13 + xorq %rdx,%r14 + andq %r11,%r12 + xorq %r11,%r13 + addq 104(%rsp),%rcx + movq %rdx,%rdi + xorq %rbx,%r12 + rorq $6,%r14 + xorq %r8,%rdi + addq %r12,%rcx + rorq $14,%r13 + andq %rdi,%r15 + xorq %rdx,%r14 + addq %r13,%rcx + xorq %r8,%r15 + rorq $28,%r14 + addq %rcx,%r10 + addq %r15,%rcx + movq %r10,%r13 + addq %rcx,%r14 + rorq $23,%r13 + movq %r14,%rcx + movq %r11,%r12 + rorq $5,%r14 + xorq %r10,%r13 + xorq %rax,%r12 + rorq $4,%r13 + xorq %rcx,%r14 + andq %r10,%r12 + xorq %r10,%r13 + addq 112(%rsp),%rbx + movq %rcx,%r15 + xorq %rax,%r12 + rorq $6,%r14 + xorq %rdx,%r15 + addq %r12,%rbx + rorq $14,%r13 + andq %r15,%rdi + xorq %rcx,%r14 + addq %r13,%rbx + xorq %rdx,%rdi + rorq $28,%r14 + addq %rbx,%r9 + addq %rdi,%rbx + movq %r9,%r13 + addq %rbx,%r14 + rorq $23,%r13 + movq %r14,%rbx + movq %r10,%r12 + rorq $5,%r14 + xorq %r9,%r13 + xorq %r11,%r12 + rorq $4,%r13 + xorq %rbx,%r14 + andq %r9,%r12 + xorq %r9,%r13 + addq 120(%rsp),%rax + movq %rbx,%rdi + xorq %r11,%r12 + rorq $6,%r14 + xorq %rcx,%rdi + addq %r12,%rax + rorq $14,%r13 + andq %rdi,%r15 + xorq %rbx,%r14 + addq %r13,%rax + xorq %rcx,%r15 + rorq $28,%r14 + addq %rax,%r8 + addq %r15,%rax + movq %r8,%r13 + addq %rax,%r14 + movq 128+0(%rsp),%rdi + movq %r14,%rax + + addq 0(%rdi),%rax + leaq 128(%rsi),%rsi + addq 8(%rdi),%rbx + addq 16(%rdi),%rcx + addq 24(%rdi),%rdx + addq 32(%rdi),%r8 + addq 40(%rdi),%r9 + addq 48(%rdi),%r10 + addq 56(%rdi),%r11 + + cmpq 128+16(%rsp),%rsi + + movq %rax,0(%rdi) + movq %rbx,8(%rdi) + movq %rcx,16(%rdi) + movq %rdx,24(%rdi) + movq %r8,32(%rdi) + movq %r9,40(%rdi) + movq %r10,48(%rdi) + movq %r11,56(%rdi) + jb .Lloop_xop + + movq 128+24(%rsp),%rsi + vzeroupper + movq (%rsi),%r15 + movq 8(%rsi),%r14 + movq 16(%rsi),%r13 + movq 24(%rsi),%r12 + movq 32(%rsi),%rbp + movq 40(%rsi),%rbx + leaq 48(%rsi),%rsp +.Lepilogue_xop: + .byte 0xf3,0xc3 +.size sha512_block_data_order_xop,.-sha512_block_data_order_xop +.type sha512_block_data_order_avx,@function +.align 64 +sha512_block_data_order_avx: +.Lavx_shortcut: + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + movq %rsp,%r11 + shlq $4,%rdx + subq $160,%rsp + leaq (%rsi,%rdx,8),%rdx + andq $-64,%rsp + movq %rdi,128+0(%rsp) + movq %rsi,128+8(%rsp) + movq %rdx,128+16(%rsp) + movq %r11,128+24(%rsp) +.Lprologue_avx: + + vzeroupper + movq 0(%rdi),%rax + movq 8(%rdi),%rbx + movq 16(%rdi),%rcx + movq 24(%rdi),%rdx + movq 32(%rdi),%r8 + movq 40(%rdi),%r9 + movq 48(%rdi),%r10 + movq 56(%rdi),%r11 + jmp .Lloop_avx +.align 16 +.Lloop_avx: + vmovdqa K512+1280(%rip),%xmm11 + vmovdqu 0(%rsi),%xmm0 + leaq K512+128(%rip),%rbp + vmovdqu 16(%rsi),%xmm1 + vmovdqu 32(%rsi),%xmm2 + vpshufb %xmm11,%xmm0,%xmm0 + vmovdqu 48(%rsi),%xmm3 + vpshufb %xmm11,%xmm1,%xmm1 + vmovdqu 64(%rsi),%xmm4 + vpshufb %xmm11,%xmm2,%xmm2 + vmovdqu 80(%rsi),%xmm5 + vpshufb %xmm11,%xmm3,%xmm3 + vmovdqu 96(%rsi),%xmm6 + vpshufb %xmm11,%xmm4,%xmm4 + vmovdqu 112(%rsi),%xmm7 + vpshufb %xmm11,%xmm5,%xmm5 + vpaddq -128(%rbp),%xmm0,%xmm8 + vpshufb %xmm11,%xmm6,%xmm6 + vpaddq -96(%rbp),%xmm1,%xmm9 + vpshufb %xmm11,%xmm7,%xmm7 + vpaddq -64(%rbp),%xmm2,%xmm10 + vpaddq -32(%rbp),%xmm3,%xmm11 + vmovdqa %xmm8,0(%rsp) + vpaddq 0(%rbp),%xmm4,%xmm8 + vmovdqa %xmm9,16(%rsp) + vpaddq 32(%rbp),%xmm5,%xmm9 + vmovdqa %xmm10,32(%rsp) + vpaddq 64(%rbp),%xmm6,%xmm10 + vmovdqa %xmm11,48(%rsp) + vpaddq 96(%rbp),%xmm7,%xmm11 + vmovdqa %xmm8,64(%rsp) + movq %rax,%r14 + vmovdqa %xmm9,80(%rsp) + movq %rbx,%rdi + vmovdqa %xmm10,96(%rsp) + xorq %rcx,%rdi + vmovdqa %xmm11,112(%rsp) + movq %r8,%r13 + jmp .Lavx_00_47 + +.align 16 +.Lavx_00_47: + addq $256,%rbp + vpalignr $8,%xmm0,%xmm1,%xmm8 + shrdq $23,%r13,%r13 + movq %r14,%rax + vpalignr $8,%xmm4,%xmm5,%xmm11 + movq %r9,%r12 + shrdq $5,%r14,%r14 + vpsrlq $1,%xmm8,%xmm10 + xorq %r8,%r13 + xorq %r10,%r12 + vpaddq %xmm11,%xmm0,%xmm0 + shrdq $4,%r13,%r13 + xorq %rax,%r14 + vpsrlq $7,%xmm8,%xmm11 + andq %r8,%r12 + xorq %r8,%r13 + vpsllq $56,%xmm8,%xmm9 + addq 0(%rsp),%r11 + movq %rax,%r15 + vpxor %xmm10,%xmm11,%xmm8 + xorq %r10,%r12 + shrdq $6,%r14,%r14 + vpsrlq $7,%xmm10,%xmm10 + xorq %rbx,%r15 + addq %r12,%r11 + vpxor %xmm9,%xmm8,%xmm8 + shrdq $14,%r13,%r13 + andq %r15,%rdi + vpsllq $7,%xmm9,%xmm9 + xorq %rax,%r14 + addq %r13,%r11 + vpxor %xmm10,%xmm8,%xmm8 + xorq %rbx,%rdi + shrdq $28,%r14,%r14 + vpsrlq $6,%xmm7,%xmm11 + addq %r11,%rdx + addq %rdi,%r11 + vpxor %xmm9,%xmm8,%xmm8 + movq %rdx,%r13 + addq %r11,%r14 + vpsllq $3,%xmm7,%xmm10 + shrdq $23,%r13,%r13 + movq %r14,%r11 + vpaddq %xmm8,%xmm0,%xmm0 + movq %r8,%r12 + shrdq $5,%r14,%r14 + vpsrlq $19,%xmm7,%xmm9 + xorq %rdx,%r13 + xorq %r9,%r12 + vpxor %xmm10,%xmm11,%xmm11 + shrdq $4,%r13,%r13 + xorq %r11,%r14 + vpsllq $42,%xmm10,%xmm10 + andq %rdx,%r12 + xorq %rdx,%r13 + vpxor %xmm9,%xmm11,%xmm11 + addq 8(%rsp),%r10 + movq %r11,%rdi + vpsrlq $42,%xmm9,%xmm9 + xorq %r9,%r12 + shrdq $6,%r14,%r14 + vpxor %xmm10,%xmm11,%xmm11 + xorq %rax,%rdi + addq %r12,%r10 + vpxor %xmm9,%xmm11,%xmm11 + shrdq $14,%r13,%r13 + andq %rdi,%r15 + vpaddq %xmm11,%xmm0,%xmm0 + xorq %r11,%r14 + addq %r13,%r10 + vpaddq -128(%rbp),%xmm0,%xmm10 + xorq %rax,%r15 + shrdq $28,%r14,%r14 + addq %r10,%rcx + addq %r15,%r10 + movq %rcx,%r13 + addq %r10,%r14 + vmovdqa %xmm10,0(%rsp) + vpalignr $8,%xmm1,%xmm2,%xmm8 + shrdq $23,%r13,%r13 + movq %r14,%r10 + vpalignr $8,%xmm5,%xmm6,%xmm11 + movq %rdx,%r12 + shrdq $5,%r14,%r14 + vpsrlq $1,%xmm8,%xmm10 + xorq %rcx,%r13 + xorq %r8,%r12 + vpaddq %xmm11,%xmm1,%xmm1 + shrdq $4,%r13,%r13 + xorq %r10,%r14 + vpsrlq $7,%xmm8,%xmm11 + andq %rcx,%r12 + xorq %rcx,%r13 + vpsllq $56,%xmm8,%xmm9 + addq 16(%rsp),%r9 + movq %r10,%r15 + vpxor %xmm10,%xmm11,%xmm8 + xorq %r8,%r12 + shrdq $6,%r14,%r14 + vpsrlq $7,%xmm10,%xmm10 + xorq %r11,%r15 + addq %r12,%r9 + vpxor %xmm9,%xmm8,%xmm8 + shrdq $14,%r13,%r13 + andq %r15,%rdi + vpsllq $7,%xmm9,%xmm9 + xorq %r10,%r14 + addq %r13,%r9 + vpxor %xmm10,%xmm8,%xmm8 + xorq %r11,%rdi + shrdq $28,%r14,%r14 + vpsrlq $6,%xmm0,%xmm11 + addq %r9,%rbx + addq %rdi,%r9 + vpxor %xmm9,%xmm8,%xmm8 + movq %rbx,%r13 + addq %r9,%r14 + vpsllq $3,%xmm0,%xmm10 + shrdq $23,%r13,%r13 + movq %r14,%r9 + vpaddq %xmm8,%xmm1,%xmm1 + movq %rcx,%r12 + shrdq $5,%r14,%r14 + vpsrlq $19,%xmm0,%xmm9 + xorq %rbx,%r13 + xorq %rdx,%r12 + vpxor %xmm10,%xmm11,%xmm11 + shrdq $4,%r13,%r13 + xorq %r9,%r14 + vpsllq $42,%xmm10,%xmm10 + andq %rbx,%r12 + xorq %rbx,%r13 + vpxor %xmm9,%xmm11,%xmm11 + addq 24(%rsp),%r8 + movq %r9,%rdi + vpsrlq $42,%xmm9,%xmm9 + xorq %rdx,%r12 + shrdq $6,%r14,%r14 + vpxor %xmm10,%xmm11,%xmm11 + xorq %r10,%rdi + addq %r12,%r8 + vpxor %xmm9,%xmm11,%xmm11 + shrdq $14,%r13,%r13 + andq %rdi,%r15 + vpaddq %xmm11,%xmm1,%xmm1 + xorq %r9,%r14 + addq %r13,%r8 + vpaddq -96(%rbp),%xmm1,%xmm10 + xorq %r10,%r15 + shrdq $28,%r14,%r14 + addq %r8,%rax + addq %r15,%r8 + movq %rax,%r13 + addq %r8,%r14 + vmovdqa %xmm10,16(%rsp) + vpalignr $8,%xmm2,%xmm3,%xmm8 + shrdq $23,%r13,%r13 + movq %r14,%r8 + vpalignr $8,%xmm6,%xmm7,%xmm11 + movq %rbx,%r12 + shrdq $5,%r14,%r14 + vpsrlq $1,%xmm8,%xmm10 + xorq %rax,%r13 + xorq %rcx,%r12 + vpaddq %xmm11,%xmm2,%xmm2 + shrdq $4,%r13,%r13 + xorq %r8,%r14 + vpsrlq $7,%xmm8,%xmm11 + andq %rax,%r12 + xorq %rax,%r13 + vpsllq $56,%xmm8,%xmm9 + addq 32(%rsp),%rdx + movq %r8,%r15 + vpxor %xmm10,%xmm11,%xmm8 + xorq %rcx,%r12 + shrdq $6,%r14,%r14 + vpsrlq $7,%xmm10,%xmm10 + xorq %r9,%r15 + addq %r12,%rdx + vpxor %xmm9,%xmm8,%xmm8 + shrdq $14,%r13,%r13 + andq %r15,%rdi + vpsllq $7,%xmm9,%xmm9 + xorq %r8,%r14 + addq %r13,%rdx + vpxor %xmm10,%xmm8,%xmm8 + xorq %r9,%rdi + shrdq $28,%r14,%r14 + vpsrlq $6,%xmm1,%xmm11 + addq %rdx,%r11 + addq %rdi,%rdx + vpxor %xmm9,%xmm8,%xmm8 + movq %r11,%r13 + addq %rdx,%r14 + vpsllq $3,%xmm1,%xmm10 + shrdq $23,%r13,%r13 + movq %r14,%rdx + vpaddq %xmm8,%xmm2,%xmm2 + movq %rax,%r12 + shrdq $5,%r14,%r14 + vpsrlq $19,%xmm1,%xmm9 + xorq %r11,%r13 + xorq %rbx,%r12 + vpxor %xmm10,%xmm11,%xmm11 + shrdq $4,%r13,%r13 + xorq %rdx,%r14 + vpsllq $42,%xmm10,%xmm10 + andq %r11,%r12 + xorq %r11,%r13 + vpxor %xmm9,%xmm11,%xmm11 + addq 40(%rsp),%rcx + movq %rdx,%rdi + vpsrlq $42,%xmm9,%xmm9 + xorq %rbx,%r12 + shrdq $6,%r14,%r14 + vpxor %xmm10,%xmm11,%xmm11 + xorq %r8,%rdi + addq %r12,%rcx + vpxor %xmm9,%xmm11,%xmm11 + shrdq $14,%r13,%r13 + andq %rdi,%r15 + vpaddq %xmm11,%xmm2,%xmm2 + xorq %rdx,%r14 + addq %r13,%rcx + vpaddq -64(%rbp),%xmm2,%xmm10 + xorq %r8,%r15 + shrdq $28,%r14,%r14 + addq %rcx,%r10 + addq %r15,%rcx + movq %r10,%r13 + addq %rcx,%r14 + vmovdqa %xmm10,32(%rsp) + vpalignr $8,%xmm3,%xmm4,%xmm8 + shrdq $23,%r13,%r13 + movq %r14,%rcx + vpalignr $8,%xmm7,%xmm0,%xmm11 + movq %r11,%r12 + shrdq $5,%r14,%r14 + vpsrlq $1,%xmm8,%xmm10 + xorq %r10,%r13 + xorq %rax,%r12 + vpaddq %xmm11,%xmm3,%xmm3 + shrdq $4,%r13,%r13 + xorq %rcx,%r14 + vpsrlq $7,%xmm8,%xmm11 + andq %r10,%r12 + xorq %r10,%r13 + vpsllq $56,%xmm8,%xmm9 + addq 48(%rsp),%rbx + movq %rcx,%r15 + vpxor %xmm10,%xmm11,%xmm8 + xorq %rax,%r12 + shrdq $6,%r14,%r14 + vpsrlq $7,%xmm10,%xmm10 + xorq %rdx,%r15 + addq %r12,%rbx + vpxor %xmm9,%xmm8,%xmm8 + shrdq $14,%r13,%r13 + andq %r15,%rdi + vpsllq $7,%xmm9,%xmm9 + xorq %rcx,%r14 + addq %r13,%rbx + vpxor %xmm10,%xmm8,%xmm8 + xorq %rdx,%rdi + shrdq $28,%r14,%r14 + vpsrlq $6,%xmm2,%xmm11 + addq %rbx,%r9 + addq %rdi,%rbx + vpxor %xmm9,%xmm8,%xmm8 + movq %r9,%r13 + addq %rbx,%r14 + vpsllq $3,%xmm2,%xmm10 + shrdq $23,%r13,%r13 + movq %r14,%rbx + vpaddq %xmm8,%xmm3,%xmm3 + movq %r10,%r12 + shrdq $5,%r14,%r14 + vpsrlq $19,%xmm2,%xmm9 + xorq %r9,%r13 + xorq %r11,%r12 + vpxor %xmm10,%xmm11,%xmm11 + shrdq $4,%r13,%r13 + xorq %rbx,%r14 + vpsllq $42,%xmm10,%xmm10 + andq %r9,%r12 + xorq %r9,%r13 + vpxor %xmm9,%xmm11,%xmm11 + addq 56(%rsp),%rax + movq %rbx,%rdi + vpsrlq $42,%xmm9,%xmm9 + xorq %r11,%r12 + shrdq $6,%r14,%r14 + vpxor %xmm10,%xmm11,%xmm11 + xorq %rcx,%rdi + addq %r12,%rax + vpxor %xmm9,%xmm11,%xmm11 + shrdq $14,%r13,%r13 + andq %rdi,%r15 + vpaddq %xmm11,%xmm3,%xmm3 + xorq %rbx,%r14 + addq %r13,%rax + vpaddq -32(%rbp),%xmm3,%xmm10 + xorq %rcx,%r15 + shrdq $28,%r14,%r14 + addq %rax,%r8 + addq %r15,%rax + movq %r8,%r13 + addq %rax,%r14 + vmovdqa %xmm10,48(%rsp) + vpalignr $8,%xmm4,%xmm5,%xmm8 + shrdq $23,%r13,%r13 + movq %r14,%rax + vpalignr $8,%xmm0,%xmm1,%xmm11 + movq %r9,%r12 + shrdq $5,%r14,%r14 + vpsrlq $1,%xmm8,%xmm10 + xorq %r8,%r13 + xorq %r10,%r12 + vpaddq %xmm11,%xmm4,%xmm4 + shrdq $4,%r13,%r13 + xorq %rax,%r14 + vpsrlq $7,%xmm8,%xmm11 + andq %r8,%r12 + xorq %r8,%r13 + vpsllq $56,%xmm8,%xmm9 + addq 64(%rsp),%r11 + movq %rax,%r15 + vpxor %xmm10,%xmm11,%xmm8 + xorq %r10,%r12 + shrdq $6,%r14,%r14 + vpsrlq $7,%xmm10,%xmm10 + xorq %rbx,%r15 + addq %r12,%r11 + vpxor %xmm9,%xmm8,%xmm8 + shrdq $14,%r13,%r13 + andq %r15,%rdi + vpsllq $7,%xmm9,%xmm9 + xorq %rax,%r14 + addq %r13,%r11 + vpxor %xmm10,%xmm8,%xmm8 + xorq %rbx,%rdi + shrdq $28,%r14,%r14 + vpsrlq $6,%xmm3,%xmm11 + addq %r11,%rdx + addq %rdi,%r11 + vpxor %xmm9,%xmm8,%xmm8 + movq %rdx,%r13 + addq %r11,%r14 + vpsllq $3,%xmm3,%xmm10 + shrdq $23,%r13,%r13 + movq %r14,%r11 + vpaddq %xmm8,%xmm4,%xmm4 + movq %r8,%r12 + shrdq $5,%r14,%r14 + vpsrlq $19,%xmm3,%xmm9 + xorq %rdx,%r13 + xorq %r9,%r12 + vpxor %xmm10,%xmm11,%xmm11 + shrdq $4,%r13,%r13 + xorq %r11,%r14 + vpsllq $42,%xmm10,%xmm10 + andq %rdx,%r12 + xorq %rdx,%r13 + vpxor %xmm9,%xmm11,%xmm11 + addq 72(%rsp),%r10 + movq %r11,%rdi + vpsrlq $42,%xmm9,%xmm9 + xorq %r9,%r12 + shrdq $6,%r14,%r14 + vpxor %xmm10,%xmm11,%xmm11 + xorq %rax,%rdi + addq %r12,%r10 + vpxor %xmm9,%xmm11,%xmm11 + shrdq $14,%r13,%r13 + andq %rdi,%r15 + vpaddq %xmm11,%xmm4,%xmm4 + xorq %r11,%r14 + addq %r13,%r10 + vpaddq 0(%rbp),%xmm4,%xmm10 + xorq %rax,%r15 + shrdq $28,%r14,%r14 + addq %r10,%rcx + addq %r15,%r10 + movq %rcx,%r13 + addq %r10,%r14 + vmovdqa %xmm10,64(%rsp) + vpalignr $8,%xmm5,%xmm6,%xmm8 + shrdq $23,%r13,%r13 + movq %r14,%r10 + vpalignr $8,%xmm1,%xmm2,%xmm11 + movq %rdx,%r12 + shrdq $5,%r14,%r14 + vpsrlq $1,%xmm8,%xmm10 + xorq %rcx,%r13 + xorq %r8,%r12 + vpaddq %xmm11,%xmm5,%xmm5 + shrdq $4,%r13,%r13 + xorq %r10,%r14 + vpsrlq $7,%xmm8,%xmm11 + andq %rcx,%r12 + xorq %rcx,%r13 + vpsllq $56,%xmm8,%xmm9 + addq 80(%rsp),%r9 + movq %r10,%r15 + vpxor %xmm10,%xmm11,%xmm8 + xorq %r8,%r12 + shrdq $6,%r14,%r14 + vpsrlq $7,%xmm10,%xmm10 + xorq %r11,%r15 + addq %r12,%r9 + vpxor %xmm9,%xmm8,%xmm8 + shrdq $14,%r13,%r13 + andq %r15,%rdi + vpsllq $7,%xmm9,%xmm9 + xorq %r10,%r14 + addq %r13,%r9 + vpxor %xmm10,%xmm8,%xmm8 + xorq %r11,%rdi + shrdq $28,%r14,%r14 + vpsrlq $6,%xmm4,%xmm11 + addq %r9,%rbx + addq %rdi,%r9 + vpxor %xmm9,%xmm8,%xmm8 + movq %rbx,%r13 + addq %r9,%r14 + vpsllq $3,%xmm4,%xmm10 + shrdq $23,%r13,%r13 + movq %r14,%r9 + vpaddq %xmm8,%xmm5,%xmm5 + movq %rcx,%r12 + shrdq $5,%r14,%r14 + vpsrlq $19,%xmm4,%xmm9 + xorq %rbx,%r13 + xorq %rdx,%r12 + vpxor %xmm10,%xmm11,%xmm11 + shrdq $4,%r13,%r13 + xorq %r9,%r14 + vpsllq $42,%xmm10,%xmm10 + andq %rbx,%r12 + xorq %rbx,%r13 + vpxor %xmm9,%xmm11,%xmm11 + addq 88(%rsp),%r8 + movq %r9,%rdi + vpsrlq $42,%xmm9,%xmm9 + xorq %rdx,%r12 + shrdq $6,%r14,%r14 + vpxor %xmm10,%xmm11,%xmm11 + xorq %r10,%rdi + addq %r12,%r8 + vpxor %xmm9,%xmm11,%xmm11 + shrdq $14,%r13,%r13 + andq %rdi,%r15 + vpaddq %xmm11,%xmm5,%xmm5 + xorq %r9,%r14 + addq %r13,%r8 + vpaddq 32(%rbp),%xmm5,%xmm10 + xorq %r10,%r15 + shrdq $28,%r14,%r14 + addq %r8,%rax + addq %r15,%r8 + movq %rax,%r13 + addq %r8,%r14 + vmovdqa %xmm10,80(%rsp) + vpalignr $8,%xmm6,%xmm7,%xmm8 + shrdq $23,%r13,%r13 + movq %r14,%r8 + vpalignr $8,%xmm2,%xmm3,%xmm11 + movq %rbx,%r12 + shrdq $5,%r14,%r14 + vpsrlq $1,%xmm8,%xmm10 + xorq %rax,%r13 + xorq %rcx,%r12 + vpaddq %xmm11,%xmm6,%xmm6 + shrdq $4,%r13,%r13 + xorq %r8,%r14 + vpsrlq $7,%xmm8,%xmm11 + andq %rax,%r12 + xorq %rax,%r13 + vpsllq $56,%xmm8,%xmm9 + addq 96(%rsp),%rdx + movq %r8,%r15 + vpxor %xmm10,%xmm11,%xmm8 + xorq %rcx,%r12 + shrdq $6,%r14,%r14 + vpsrlq $7,%xmm10,%xmm10 + xorq %r9,%r15 + addq %r12,%rdx + vpxor %xmm9,%xmm8,%xmm8 + shrdq $14,%r13,%r13 + andq %r15,%rdi + vpsllq $7,%xmm9,%xmm9 + xorq %r8,%r14 + addq %r13,%rdx + vpxor %xmm10,%xmm8,%xmm8 + xorq %r9,%rdi + shrdq $28,%r14,%r14 + vpsrlq $6,%xmm5,%xmm11 + addq %rdx,%r11 + addq %rdi,%rdx + vpxor %xmm9,%xmm8,%xmm8 + movq %r11,%r13 + addq %rdx,%r14 + vpsllq $3,%xmm5,%xmm10 + shrdq $23,%r13,%r13 + movq %r14,%rdx + vpaddq %xmm8,%xmm6,%xmm6 + movq %rax,%r12 + shrdq $5,%r14,%r14 + vpsrlq $19,%xmm5,%xmm9 + xorq %r11,%r13 + xorq %rbx,%r12 + vpxor %xmm10,%xmm11,%xmm11 + shrdq $4,%r13,%r13 + xorq %rdx,%r14 + vpsllq $42,%xmm10,%xmm10 + andq %r11,%r12 + xorq %r11,%r13 + vpxor %xmm9,%xmm11,%xmm11 + addq 104(%rsp),%rcx + movq %rdx,%rdi + vpsrlq $42,%xmm9,%xmm9 + xorq %rbx,%r12 + shrdq $6,%r14,%r14 + vpxor %xmm10,%xmm11,%xmm11 + xorq %r8,%rdi + addq %r12,%rcx + vpxor %xmm9,%xmm11,%xmm11 + shrdq $14,%r13,%r13 + andq %rdi,%r15 + vpaddq %xmm11,%xmm6,%xmm6 + xorq %rdx,%r14 + addq %r13,%rcx + vpaddq 64(%rbp),%xmm6,%xmm10 + xorq %r8,%r15 + shrdq $28,%r14,%r14 + addq %rcx,%r10 + addq %r15,%rcx + movq %r10,%r13 + addq %rcx,%r14 + vmovdqa %xmm10,96(%rsp) + vpalignr $8,%xmm7,%xmm0,%xmm8 + shrdq $23,%r13,%r13 + movq %r14,%rcx + vpalignr $8,%xmm3,%xmm4,%xmm11 + movq %r11,%r12 + shrdq $5,%r14,%r14 + vpsrlq $1,%xmm8,%xmm10 + xorq %r10,%r13 + xorq %rax,%r12 + vpaddq %xmm11,%xmm7,%xmm7 + shrdq $4,%r13,%r13 + xorq %rcx,%r14 + vpsrlq $7,%xmm8,%xmm11 + andq %r10,%r12 + xorq %r10,%r13 + vpsllq $56,%xmm8,%xmm9 + addq 112(%rsp),%rbx + movq %rcx,%r15 + vpxor %xmm10,%xmm11,%xmm8 + xorq %rax,%r12 + shrdq $6,%r14,%r14 + vpsrlq $7,%xmm10,%xmm10 + xorq %rdx,%r15 + addq %r12,%rbx + vpxor %xmm9,%xmm8,%xmm8 + shrdq $14,%r13,%r13 + andq %r15,%rdi + vpsllq $7,%xmm9,%xmm9 + xorq %rcx,%r14 + addq %r13,%rbx + vpxor %xmm10,%xmm8,%xmm8 + xorq %rdx,%rdi + shrdq $28,%r14,%r14 + vpsrlq $6,%xmm6,%xmm11 + addq %rbx,%r9 + addq %rdi,%rbx + vpxor %xmm9,%xmm8,%xmm8 + movq %r9,%r13 + addq %rbx,%r14 + vpsllq $3,%xmm6,%xmm10 + shrdq $23,%r13,%r13 + movq %r14,%rbx + vpaddq %xmm8,%xmm7,%xmm7 + movq %r10,%r12 + shrdq $5,%r14,%r14 + vpsrlq $19,%xmm6,%xmm9 + xorq %r9,%r13 + xorq %r11,%r12 + vpxor %xmm10,%xmm11,%xmm11 + shrdq $4,%r13,%r13 + xorq %rbx,%r14 + vpsllq $42,%xmm10,%xmm10 + andq %r9,%r12 + xorq %r9,%r13 + vpxor %xmm9,%xmm11,%xmm11 + addq 120(%rsp),%rax + movq %rbx,%rdi + vpsrlq $42,%xmm9,%xmm9 + xorq %r11,%r12 + shrdq $6,%r14,%r14 + vpxor %xmm10,%xmm11,%xmm11 + xorq %rcx,%rdi + addq %r12,%rax + vpxor %xmm9,%xmm11,%xmm11 + shrdq $14,%r13,%r13 + andq %rdi,%r15 + vpaddq %xmm11,%xmm7,%xmm7 + xorq %rbx,%r14 + addq %r13,%rax + vpaddq 96(%rbp),%xmm7,%xmm10 + xorq %rcx,%r15 + shrdq $28,%r14,%r14 + addq %rax,%r8 + addq %r15,%rax + movq %r8,%r13 + addq %rax,%r14 + vmovdqa %xmm10,112(%rsp) + cmpb $0,135(%rbp) + jne .Lavx_00_47 + shrdq $23,%r13,%r13 + movq %r14,%rax + movq %r9,%r12 + shrdq $5,%r14,%r14 + xorq %r8,%r13 + xorq %r10,%r12 + shrdq $4,%r13,%r13 + xorq %rax,%r14 + andq %r8,%r12 + xorq %r8,%r13 + addq 0(%rsp),%r11 + movq %rax,%r15 + xorq %r10,%r12 + shrdq $6,%r14,%r14 + xorq %rbx,%r15 + addq %r12,%r11 + shrdq $14,%r13,%r13 + andq %r15,%rdi + xorq %rax,%r14 + addq %r13,%r11 + xorq %rbx,%rdi + shrdq $28,%r14,%r14 + addq %r11,%rdx + addq %rdi,%r11 + movq %rdx,%r13 + addq %r11,%r14 + shrdq $23,%r13,%r13 + movq %r14,%r11 + movq %r8,%r12 + shrdq $5,%r14,%r14 + xorq %rdx,%r13 + xorq %r9,%r12 + shrdq $4,%r13,%r13 + xorq %r11,%r14 + andq %rdx,%r12 + xorq %rdx,%r13 + addq 8(%rsp),%r10 + movq %r11,%rdi + xorq %r9,%r12 + shrdq $6,%r14,%r14 + xorq %rax,%rdi + addq %r12,%r10 + shrdq $14,%r13,%r13 + andq %rdi,%r15 + xorq %r11,%r14 + addq %r13,%r10 + xorq %rax,%r15 + shrdq $28,%r14,%r14 + addq %r10,%rcx + addq %r15,%r10 + movq %rcx,%r13 + addq %r10,%r14 + shrdq $23,%r13,%r13 + movq %r14,%r10 + movq %rdx,%r12 + shrdq $5,%r14,%r14 + xorq %rcx,%r13 + xorq %r8,%r12 + shrdq $4,%r13,%r13 + xorq %r10,%r14 + andq %rcx,%r12 + xorq %rcx,%r13 + addq 16(%rsp),%r9 + movq %r10,%r15 + xorq %r8,%r12 + shrdq $6,%r14,%r14 + xorq %r11,%r15 + addq %r12,%r9 + shrdq $14,%r13,%r13 + andq %r15,%rdi + xorq %r10,%r14 + addq %r13,%r9 + xorq %r11,%rdi + shrdq $28,%r14,%r14 + addq %r9,%rbx + addq %rdi,%r9 + movq %rbx,%r13 + addq %r9,%r14 + shrdq $23,%r13,%r13 + movq %r14,%r9 + movq %rcx,%r12 + shrdq $5,%r14,%r14 + xorq %rbx,%r13 + xorq %rdx,%r12 + shrdq $4,%r13,%r13 + xorq %r9,%r14 + andq %rbx,%r12 + xorq %rbx,%r13 + addq 24(%rsp),%r8 + movq %r9,%rdi + xorq %rdx,%r12 + shrdq $6,%r14,%r14 + xorq %r10,%rdi + addq %r12,%r8 + shrdq $14,%r13,%r13 + andq %rdi,%r15 + xorq %r9,%r14 + addq %r13,%r8 + xorq %r10,%r15 + shrdq $28,%r14,%r14 + addq %r8,%rax + addq %r15,%r8 + movq %rax,%r13 + addq %r8,%r14 + shrdq $23,%r13,%r13 + movq %r14,%r8 + movq %rbx,%r12 + shrdq $5,%r14,%r14 + xorq %rax,%r13 + xorq %rcx,%r12 + shrdq $4,%r13,%r13 + xorq %r8,%r14 + andq %rax,%r12 + xorq %rax,%r13 + addq 32(%rsp),%rdx + movq %r8,%r15 + xorq %rcx,%r12 + shrdq $6,%r14,%r14 + xorq %r9,%r15 + addq %r12,%rdx + shrdq $14,%r13,%r13 + andq %r15,%rdi + xorq %r8,%r14 + addq %r13,%rdx + xorq %r9,%rdi + shrdq $28,%r14,%r14 + addq %rdx,%r11 + addq %rdi,%rdx + movq %r11,%r13 + addq %rdx,%r14 + shrdq $23,%r13,%r13 + movq %r14,%rdx + movq %rax,%r12 + shrdq $5,%r14,%r14 + xorq %r11,%r13 + xorq %rbx,%r12 + shrdq $4,%r13,%r13 + xorq %rdx,%r14 + andq %r11,%r12 + xorq %r11,%r13 + addq 40(%rsp),%rcx + movq %rdx,%rdi + xorq %rbx,%r12 + shrdq $6,%r14,%r14 + xorq %r8,%rdi + addq %r12,%rcx + shrdq $14,%r13,%r13 + andq %rdi,%r15 + xorq %rdx,%r14 + addq %r13,%rcx + xorq %r8,%r15 + shrdq $28,%r14,%r14 + addq %rcx,%r10 + addq %r15,%rcx + movq %r10,%r13 + addq %rcx,%r14 + shrdq $23,%r13,%r13 + movq %r14,%rcx + movq %r11,%r12 + shrdq $5,%r14,%r14 + xorq %r10,%r13 + xorq %rax,%r12 + shrdq $4,%r13,%r13 + xorq %rcx,%r14 + andq %r10,%r12 + xorq %r10,%r13 + addq 48(%rsp),%rbx + movq %rcx,%r15 + xorq %rax,%r12 + shrdq $6,%r14,%r14 + xorq %rdx,%r15 + addq %r12,%rbx + shrdq $14,%r13,%r13 + andq %r15,%rdi + xorq %rcx,%r14 + addq %r13,%rbx + xorq %rdx,%rdi + shrdq $28,%r14,%r14 + addq %rbx,%r9 + addq %rdi,%rbx + movq %r9,%r13 + addq %rbx,%r14 + shrdq $23,%r13,%r13 + movq %r14,%rbx + movq %r10,%r12 + shrdq $5,%r14,%r14 + xorq %r9,%r13 + xorq %r11,%r12 + shrdq $4,%r13,%r13 + xorq %rbx,%r14 + andq %r9,%r12 + xorq %r9,%r13 + addq 56(%rsp),%rax + movq %rbx,%rdi + xorq %r11,%r12 + shrdq $6,%r14,%r14 + xorq %rcx,%rdi + addq %r12,%rax + shrdq $14,%r13,%r13 + andq %rdi,%r15 + xorq %rbx,%r14 + addq %r13,%rax + xorq %rcx,%r15 + shrdq $28,%r14,%r14 + addq %rax,%r8 + addq %r15,%rax + movq %r8,%r13 + addq %rax,%r14 + shrdq $23,%r13,%r13 + movq %r14,%rax + movq %r9,%r12 + shrdq $5,%r14,%r14 + xorq %r8,%r13 + xorq %r10,%r12 + shrdq $4,%r13,%r13 + xorq %rax,%r14 + andq %r8,%r12 + xorq %r8,%r13 + addq 64(%rsp),%r11 + movq %rax,%r15 + xorq %r10,%r12 + shrdq $6,%r14,%r14 + xorq %rbx,%r15 + addq %r12,%r11 + shrdq $14,%r13,%r13 + andq %r15,%rdi + xorq %rax,%r14 + addq %r13,%r11 + xorq %rbx,%rdi + shrdq $28,%r14,%r14 + addq %r11,%rdx + addq %rdi,%r11 + movq %rdx,%r13 + addq %r11,%r14 + shrdq $23,%r13,%r13 + movq %r14,%r11 + movq %r8,%r12 + shrdq $5,%r14,%r14 + xorq %rdx,%r13 + xorq %r9,%r12 + shrdq $4,%r13,%r13 + xorq %r11,%r14 + andq %rdx,%r12 + xorq %rdx,%r13 + addq 72(%rsp),%r10 + movq %r11,%rdi + xorq %r9,%r12 + shrdq $6,%r14,%r14 + xorq %rax,%rdi + addq %r12,%r10 + shrdq $14,%r13,%r13 + andq %rdi,%r15 + xorq %r11,%r14 + addq %r13,%r10 + xorq %rax,%r15 + shrdq $28,%r14,%r14 + addq %r10,%rcx + addq %r15,%r10 + movq %rcx,%r13 + addq %r10,%r14 + shrdq $23,%r13,%r13 + movq %r14,%r10 + movq %rdx,%r12 + shrdq $5,%r14,%r14 + xorq %rcx,%r13 + xorq %r8,%r12 + shrdq $4,%r13,%r13 + xorq %r10,%r14 + andq %rcx,%r12 + xorq %rcx,%r13 + addq 80(%rsp),%r9 + movq %r10,%r15 + xorq %r8,%r12 + shrdq $6,%r14,%r14 + xorq %r11,%r15 + addq %r12,%r9 + shrdq $14,%r13,%r13 + andq %r15,%rdi + xorq %r10,%r14 + addq %r13,%r9 + xorq %r11,%rdi + shrdq $28,%r14,%r14 + addq %r9,%rbx + addq %rdi,%r9 + movq %rbx,%r13 + addq %r9,%r14 + shrdq $23,%r13,%r13 + movq %r14,%r9 + movq %rcx,%r12 + shrdq $5,%r14,%r14 + xorq %rbx,%r13 + xorq %rdx,%r12 + shrdq $4,%r13,%r13 + xorq %r9,%r14 + andq %rbx,%r12 + xorq %rbx,%r13 + addq 88(%rsp),%r8 + movq %r9,%rdi + xorq %rdx,%r12 + shrdq $6,%r14,%r14 + xorq %r10,%rdi + addq %r12,%r8 + shrdq $14,%r13,%r13 + andq %rdi,%r15 + xorq %r9,%r14 + addq %r13,%r8 + xorq %r10,%r15 + shrdq $28,%r14,%r14 + addq %r8,%rax + addq %r15,%r8 + movq %rax,%r13 + addq %r8,%r14 + shrdq $23,%r13,%r13 + movq %r14,%r8 + movq %rbx,%r12 + shrdq $5,%r14,%r14 + xorq %rax,%r13 + xorq %rcx,%r12 + shrdq $4,%r13,%r13 + xorq %r8,%r14 + andq %rax,%r12 + xorq %rax,%r13 + addq 96(%rsp),%rdx + movq %r8,%r15 + xorq %rcx,%r12 + shrdq $6,%r14,%r14 + xorq %r9,%r15 + addq %r12,%rdx + shrdq $14,%r13,%r13 + andq %r15,%rdi + xorq %r8,%r14 + addq %r13,%rdx + xorq %r9,%rdi + shrdq $28,%r14,%r14 + addq %rdx,%r11 + addq %rdi,%rdx + movq %r11,%r13 + addq %rdx,%r14 + shrdq $23,%r13,%r13 + movq %r14,%rdx + movq %rax,%r12 + shrdq $5,%r14,%r14 + xorq %r11,%r13 + xorq %rbx,%r12 + shrdq $4,%r13,%r13 + xorq %rdx,%r14 + andq %r11,%r12 + xorq %r11,%r13 + addq 104(%rsp),%rcx + movq %rdx,%rdi + xorq %rbx,%r12 + shrdq $6,%r14,%r14 + xorq %r8,%rdi + addq %r12,%rcx + shrdq $14,%r13,%r13 + andq %rdi,%r15 + xorq %rdx,%r14 + addq %r13,%rcx + xorq %r8,%r15 + shrdq $28,%r14,%r14 + addq %rcx,%r10 + addq %r15,%rcx + movq %r10,%r13 + addq %rcx,%r14 + shrdq $23,%r13,%r13 + movq %r14,%rcx + movq %r11,%r12 + shrdq $5,%r14,%r14 + xorq %r10,%r13 + xorq %rax,%r12 + shrdq $4,%r13,%r13 + xorq %rcx,%r14 + andq %r10,%r12 + xorq %r10,%r13 + addq 112(%rsp),%rbx + movq %rcx,%r15 + xorq %rax,%r12 + shrdq $6,%r14,%r14 + xorq %rdx,%r15 + addq %r12,%rbx + shrdq $14,%r13,%r13 + andq %r15,%rdi + xorq %rcx,%r14 + addq %r13,%rbx + xorq %rdx,%rdi + shrdq $28,%r14,%r14 + addq %rbx,%r9 + addq %rdi,%rbx + movq %r9,%r13 + addq %rbx,%r14 + shrdq $23,%r13,%r13 + movq %r14,%rbx + movq %r10,%r12 + shrdq $5,%r14,%r14 + xorq %r9,%r13 + xorq %r11,%r12 + shrdq $4,%r13,%r13 + xorq %rbx,%r14 + andq %r9,%r12 + xorq %r9,%r13 + addq 120(%rsp),%rax + movq %rbx,%rdi + xorq %r11,%r12 + shrdq $6,%r14,%r14 + xorq %rcx,%rdi + addq %r12,%rax + shrdq $14,%r13,%r13 + andq %rdi,%r15 + xorq %rbx,%r14 + addq %r13,%rax + xorq %rcx,%r15 + shrdq $28,%r14,%r14 + addq %rax,%r8 + addq %r15,%rax + movq %r8,%r13 + addq %rax,%r14 + movq 128+0(%rsp),%rdi + movq %r14,%rax + + addq 0(%rdi),%rax + leaq 128(%rsi),%rsi + addq 8(%rdi),%rbx + addq 16(%rdi),%rcx + addq 24(%rdi),%rdx + addq 32(%rdi),%r8 + addq 40(%rdi),%r9 + addq 48(%rdi),%r10 + addq 56(%rdi),%r11 + + cmpq 128+16(%rsp),%rsi + + movq %rax,0(%rdi) + movq %rbx,8(%rdi) + movq %rcx,16(%rdi) + movq %rdx,24(%rdi) + movq %r8,32(%rdi) + movq %r9,40(%rdi) + movq %r10,48(%rdi) + movq %r11,56(%rdi) + jb .Lloop_avx + + movq 128+24(%rsp),%rsi + vzeroupper + movq (%rsi),%r15 + movq 8(%rsi),%r14 + movq 16(%rsi),%r13 + movq 24(%rsi),%r12 + movq 32(%rsi),%rbp + movq 40(%rsi),%rbx + leaq 48(%rsi),%rsp +.Lepilogue_avx: + .byte 0xf3,0xc3 +.size sha512_block_data_order_avx,.-sha512_block_data_order_avx +.type sha512_block_data_order_avx2,@function +.align 64 +sha512_block_data_order_avx2: +.Lavx2_shortcut: + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + movq %rsp,%r11 + subq $1312,%rsp + shlq $4,%rdx + andq $-2048,%rsp + leaq (%rsi,%rdx,8),%rdx + addq $1152,%rsp + movq %rdi,128+0(%rsp) + movq %rsi,128+8(%rsp) + movq %rdx,128+16(%rsp) + movq %r11,128+24(%rsp) +.Lprologue_avx2: + + vzeroupper + subq $-128,%rsi + movq 0(%rdi),%rax + movq %rsi,%r12 + movq 8(%rdi),%rbx + cmpq %rdx,%rsi + movq 16(%rdi),%rcx + cmoveq %rsp,%r12 + movq 24(%rdi),%rdx + movq 32(%rdi),%r8 + movq 40(%rdi),%r9 + movq 48(%rdi),%r10 + movq 56(%rdi),%r11 + jmp .Loop_avx2 +.align 16 +.Loop_avx2: + vmovdqu -128(%rsi),%xmm0 + vmovdqu -128+16(%rsi),%xmm1 + vmovdqu -128+32(%rsi),%xmm2 + leaq K512+128(%rip),%rbp + vmovdqu -128+48(%rsi),%xmm3 + vmovdqu -128+64(%rsi),%xmm4 + vmovdqu -128+80(%rsi),%xmm5 + vmovdqu -128+96(%rsi),%xmm6 + vmovdqu -128+112(%rsi),%xmm7 + + vmovdqa 1152(%rbp),%ymm10 + vinserti128 $1,(%r12),%ymm0,%ymm0 + vinserti128 $1,16(%r12),%ymm1,%ymm1 + vpshufb %ymm10,%ymm0,%ymm0 + vinserti128 $1,32(%r12),%ymm2,%ymm2 + vpshufb %ymm10,%ymm1,%ymm1 + vinserti128 $1,48(%r12),%ymm3,%ymm3 + vpshufb %ymm10,%ymm2,%ymm2 + vinserti128 $1,64(%r12),%ymm4,%ymm4 + vpshufb %ymm10,%ymm3,%ymm3 + vinserti128 $1,80(%r12),%ymm5,%ymm5 + vpshufb %ymm10,%ymm4,%ymm4 + vinserti128 $1,96(%r12),%ymm6,%ymm6 + vpshufb %ymm10,%ymm5,%ymm5 + vinserti128 $1,112(%r12),%ymm7,%ymm7 + + vpaddq -128(%rbp),%ymm0,%ymm8 + vpshufb %ymm10,%ymm6,%ymm6 + vpaddq -96(%rbp),%ymm1,%ymm9 + vpshufb %ymm10,%ymm7,%ymm7 + vpaddq -64(%rbp),%ymm2,%ymm10 + vpaddq -32(%rbp),%ymm3,%ymm11 + vmovdqa %ymm8,0(%rsp) + vpaddq 0(%rbp),%ymm4,%ymm8 + vmovdqa %ymm9,32(%rsp) + vpaddq 32(%rbp),%ymm5,%ymm9 + vmovdqa %ymm10,64(%rsp) + vpaddq 64(%rbp),%ymm6,%ymm10 + vmovdqa %ymm11,96(%rsp) + leaq -128(%rsp),%rsp + vpaddq 96(%rbp),%ymm7,%ymm11 + vmovdqa %ymm8,0(%rsp) + xorq %r14,%r14 + vmovdqa %ymm9,32(%rsp) + movq %rbx,%rdi + vmovdqa %ymm10,64(%rsp) + xorq %rcx,%rdi + vmovdqa %ymm11,96(%rsp) + movq %r9,%r12 + addq $32*8,%rbp + jmp .Lavx2_00_47 + +.align 16 +.Lavx2_00_47: + leaq -128(%rsp),%rsp + vpalignr $8,%ymm0,%ymm1,%ymm8 + addq 0+256(%rsp),%r11 + andq %r8,%r12 + rorxq $41,%r8,%r13 + vpalignr $8,%ymm4,%ymm5,%ymm11 + rorxq $18,%r8,%r15 + leaq (%rax,%r14,1),%rax + leaq (%r11,%r12,1),%r11 + vpsrlq $1,%ymm8,%ymm10 + andnq %r10,%r8,%r12 + xorq %r15,%r13 + rorxq $14,%r8,%r14 + vpaddq %ymm11,%ymm0,%ymm0 + vpsrlq $7,%ymm8,%ymm11 + leaq (%r11,%r12,1),%r11 + xorq %r14,%r13 + movq %rax,%r15 + vpsllq $56,%ymm8,%ymm9 + vpxor %ymm10,%ymm11,%ymm8 + rorxq $39,%rax,%r12 + leaq (%r11,%r13,1),%r11 + xorq %rbx,%r15 + vpsrlq $7,%ymm10,%ymm10 + vpxor %ymm9,%ymm8,%ymm8 + rorxq $34,%rax,%r14 + rorxq $28,%rax,%r13 + leaq (%rdx,%r11,1),%rdx + vpsllq $7,%ymm9,%ymm9 + vpxor %ymm10,%ymm8,%ymm8 + andq %r15,%rdi + xorq %r12,%r14 + xorq %rbx,%rdi + vpsrlq $6,%ymm7,%ymm11 + vpxor %ymm9,%ymm8,%ymm8 + xorq %r13,%r14 + leaq (%r11,%rdi,1),%r11 + movq %r8,%r12 + vpsllq $3,%ymm7,%ymm10 + vpaddq %ymm8,%ymm0,%ymm0 + addq 8+256(%rsp),%r10 + andq %rdx,%r12 + rorxq $41,%rdx,%r13 + vpsrlq $19,%ymm7,%ymm9 + vpxor %ymm10,%ymm11,%ymm11 + rorxq $18,%rdx,%rdi + leaq (%r11,%r14,1),%r11 + leaq (%r10,%r12,1),%r10 + vpsllq $42,%ymm10,%ymm10 + vpxor %ymm9,%ymm11,%ymm11 + andnq %r9,%rdx,%r12 + xorq %rdi,%r13 + rorxq $14,%rdx,%r14 + vpsrlq $42,%ymm9,%ymm9 + vpxor %ymm10,%ymm11,%ymm11 + leaq (%r10,%r12,1),%r10 + xorq %r14,%r13 + movq %r11,%rdi + vpxor %ymm9,%ymm11,%ymm11 + rorxq $39,%r11,%r12 + leaq (%r10,%r13,1),%r10 + xorq %rax,%rdi + vpaddq %ymm11,%ymm0,%ymm0 + rorxq $34,%r11,%r14 + rorxq $28,%r11,%r13 + leaq (%rcx,%r10,1),%rcx + vpaddq -128(%rbp),%ymm0,%ymm10 + andq %rdi,%r15 + xorq %r12,%r14 + xorq %rax,%r15 + xorq %r13,%r14 + leaq (%r10,%r15,1),%r10 + movq %rdx,%r12 + vmovdqa %ymm10,0(%rsp) + vpalignr $8,%ymm1,%ymm2,%ymm8 + addq 32+256(%rsp),%r9 + andq %rcx,%r12 + rorxq $41,%rcx,%r13 + vpalignr $8,%ymm5,%ymm6,%ymm11 + rorxq $18,%rcx,%r15 + leaq (%r10,%r14,1),%r10 + leaq (%r9,%r12,1),%r9 + vpsrlq $1,%ymm8,%ymm10 + andnq %r8,%rcx,%r12 + xorq %r15,%r13 + rorxq $14,%rcx,%r14 + vpaddq %ymm11,%ymm1,%ymm1 + vpsrlq $7,%ymm8,%ymm11 + leaq (%r9,%r12,1),%r9 + xorq %r14,%r13 + movq %r10,%r15 + vpsllq $56,%ymm8,%ymm9 + vpxor %ymm10,%ymm11,%ymm8 + rorxq $39,%r10,%r12 + leaq (%r9,%r13,1),%r9 + xorq %r11,%r15 + vpsrlq $7,%ymm10,%ymm10 + vpxor %ymm9,%ymm8,%ymm8 + rorxq $34,%r10,%r14 + rorxq $28,%r10,%r13 + leaq (%rbx,%r9,1),%rbx + vpsllq $7,%ymm9,%ymm9 + vpxor %ymm10,%ymm8,%ymm8 + andq %r15,%rdi + xorq %r12,%r14 + xorq %r11,%rdi + vpsrlq $6,%ymm0,%ymm11 + vpxor %ymm9,%ymm8,%ymm8 + xorq %r13,%r14 + leaq (%r9,%rdi,1),%r9 + movq %rcx,%r12 + vpsllq $3,%ymm0,%ymm10 + vpaddq %ymm8,%ymm1,%ymm1 + addq 40+256(%rsp),%r8 + andq %rbx,%r12 + rorxq $41,%rbx,%r13 + vpsrlq $19,%ymm0,%ymm9 + vpxor %ymm10,%ymm11,%ymm11 + rorxq $18,%rbx,%rdi + leaq (%r9,%r14,1),%r9 + leaq (%r8,%r12,1),%r8 + vpsllq $42,%ymm10,%ymm10 + vpxor %ymm9,%ymm11,%ymm11 + andnq %rdx,%rbx,%r12 + xorq %rdi,%r13 + rorxq $14,%rbx,%r14 + vpsrlq $42,%ymm9,%ymm9 + vpxor %ymm10,%ymm11,%ymm11 + leaq (%r8,%r12,1),%r8 + xorq %r14,%r13 + movq %r9,%rdi + vpxor %ymm9,%ymm11,%ymm11 + rorxq $39,%r9,%r12 + leaq (%r8,%r13,1),%r8 + xorq %r10,%rdi + vpaddq %ymm11,%ymm1,%ymm1 + rorxq $34,%r9,%r14 + rorxq $28,%r9,%r13 + leaq (%rax,%r8,1),%rax + vpaddq -96(%rbp),%ymm1,%ymm10 + andq %rdi,%r15 + xorq %r12,%r14 + xorq %r10,%r15 + xorq %r13,%r14 + leaq (%r8,%r15,1),%r8 + movq %rbx,%r12 + vmovdqa %ymm10,32(%rsp) + vpalignr $8,%ymm2,%ymm3,%ymm8 + addq 64+256(%rsp),%rdx + andq %rax,%r12 + rorxq $41,%rax,%r13 + vpalignr $8,%ymm6,%ymm7,%ymm11 + rorxq $18,%rax,%r15 + leaq (%r8,%r14,1),%r8 + leaq (%rdx,%r12,1),%rdx + vpsrlq $1,%ymm8,%ymm10 + andnq %rcx,%rax,%r12 + xorq %r15,%r13 + rorxq $14,%rax,%r14 + vpaddq %ymm11,%ymm2,%ymm2 + vpsrlq $7,%ymm8,%ymm11 + leaq (%rdx,%r12,1),%rdx + xorq %r14,%r13 + movq %r8,%r15 + vpsllq $56,%ymm8,%ymm9 + vpxor %ymm10,%ymm11,%ymm8 + rorxq $39,%r8,%r12 + leaq (%rdx,%r13,1),%rdx + xorq %r9,%r15 + vpsrlq $7,%ymm10,%ymm10 + vpxor %ymm9,%ymm8,%ymm8 + rorxq $34,%r8,%r14 + rorxq $28,%r8,%r13 + leaq (%r11,%rdx,1),%r11 + vpsllq $7,%ymm9,%ymm9 + vpxor %ymm10,%ymm8,%ymm8 + andq %r15,%rdi + xorq %r12,%r14 + xorq %r9,%rdi + vpsrlq $6,%ymm1,%ymm11 + vpxor %ymm9,%ymm8,%ymm8 + xorq %r13,%r14 + leaq (%rdx,%rdi,1),%rdx + movq %rax,%r12 + vpsllq $3,%ymm1,%ymm10 + vpaddq %ymm8,%ymm2,%ymm2 + addq 72+256(%rsp),%rcx + andq %r11,%r12 + rorxq $41,%r11,%r13 + vpsrlq $19,%ymm1,%ymm9 + vpxor %ymm10,%ymm11,%ymm11 + rorxq $18,%r11,%rdi + leaq (%rdx,%r14,1),%rdx + leaq (%rcx,%r12,1),%rcx + vpsllq $42,%ymm10,%ymm10 + vpxor %ymm9,%ymm11,%ymm11 + andnq %rbx,%r11,%r12 + xorq %rdi,%r13 + rorxq $14,%r11,%r14 + vpsrlq $42,%ymm9,%ymm9 + vpxor %ymm10,%ymm11,%ymm11 + leaq (%rcx,%r12,1),%rcx + xorq %r14,%r13 + movq %rdx,%rdi + vpxor %ymm9,%ymm11,%ymm11 + rorxq $39,%rdx,%r12 + leaq (%rcx,%r13,1),%rcx + xorq %r8,%rdi + vpaddq %ymm11,%ymm2,%ymm2 + rorxq $34,%rdx,%r14 + rorxq $28,%rdx,%r13 + leaq (%r10,%rcx,1),%r10 + vpaddq -64(%rbp),%ymm2,%ymm10 + andq %rdi,%r15 + xorq %r12,%r14 + xorq %r8,%r15 + xorq %r13,%r14 + leaq (%rcx,%r15,1),%rcx + movq %r11,%r12 + vmovdqa %ymm10,64(%rsp) + vpalignr $8,%ymm3,%ymm4,%ymm8 + addq 96+256(%rsp),%rbx + andq %r10,%r12 + rorxq $41,%r10,%r13 + vpalignr $8,%ymm7,%ymm0,%ymm11 + rorxq $18,%r10,%r15 + leaq (%rcx,%r14,1),%rcx + leaq (%rbx,%r12,1),%rbx + vpsrlq $1,%ymm8,%ymm10 + andnq %rax,%r10,%r12 + xorq %r15,%r13 + rorxq $14,%r10,%r14 + vpaddq %ymm11,%ymm3,%ymm3 + vpsrlq $7,%ymm8,%ymm11 + leaq (%rbx,%r12,1),%rbx + xorq %r14,%r13 + movq %rcx,%r15 + vpsllq $56,%ymm8,%ymm9 + vpxor %ymm10,%ymm11,%ymm8 + rorxq $39,%rcx,%r12 + leaq (%rbx,%r13,1),%rbx + xorq %rdx,%r15 + vpsrlq $7,%ymm10,%ymm10 + vpxor %ymm9,%ymm8,%ymm8 + rorxq $34,%rcx,%r14 + rorxq $28,%rcx,%r13 + leaq (%r9,%rbx,1),%r9 + vpsllq $7,%ymm9,%ymm9 + vpxor %ymm10,%ymm8,%ymm8 + andq %r15,%rdi + xorq %r12,%r14 + xorq %rdx,%rdi + vpsrlq $6,%ymm2,%ymm11 + vpxor %ymm9,%ymm8,%ymm8 + xorq %r13,%r14 + leaq (%rbx,%rdi,1),%rbx + movq %r10,%r12 + vpsllq $3,%ymm2,%ymm10 + vpaddq %ymm8,%ymm3,%ymm3 + addq 104+256(%rsp),%rax + andq %r9,%r12 + rorxq $41,%r9,%r13 + vpsrlq $19,%ymm2,%ymm9 + vpxor %ymm10,%ymm11,%ymm11 + rorxq $18,%r9,%rdi + leaq (%rbx,%r14,1),%rbx + leaq (%rax,%r12,1),%rax + vpsllq $42,%ymm10,%ymm10 + vpxor %ymm9,%ymm11,%ymm11 + andnq %r11,%r9,%r12 + xorq %rdi,%r13 + rorxq $14,%r9,%r14 + vpsrlq $42,%ymm9,%ymm9 + vpxor %ymm10,%ymm11,%ymm11 + leaq (%rax,%r12,1),%rax + xorq %r14,%r13 + movq %rbx,%rdi + vpxor %ymm9,%ymm11,%ymm11 + rorxq $39,%rbx,%r12 + leaq (%rax,%r13,1),%rax + xorq %rcx,%rdi + vpaddq %ymm11,%ymm3,%ymm3 + rorxq $34,%rbx,%r14 + rorxq $28,%rbx,%r13 + leaq (%r8,%rax,1),%r8 + vpaddq -32(%rbp),%ymm3,%ymm10 + andq %rdi,%r15 + xorq %r12,%r14 + xorq %rcx,%r15 + xorq %r13,%r14 + leaq (%rax,%r15,1),%rax + movq %r9,%r12 + vmovdqa %ymm10,96(%rsp) + leaq -128(%rsp),%rsp + vpalignr $8,%ymm4,%ymm5,%ymm8 + addq 0+256(%rsp),%r11 + andq %r8,%r12 + rorxq $41,%r8,%r13 + vpalignr $8,%ymm0,%ymm1,%ymm11 + rorxq $18,%r8,%r15 + leaq (%rax,%r14,1),%rax + leaq (%r11,%r12,1),%r11 + vpsrlq $1,%ymm8,%ymm10 + andnq %r10,%r8,%r12 + xorq %r15,%r13 + rorxq $14,%r8,%r14 + vpaddq %ymm11,%ymm4,%ymm4 + vpsrlq $7,%ymm8,%ymm11 + leaq (%r11,%r12,1),%r11 + xorq %r14,%r13 + movq %rax,%r15 + vpsllq $56,%ymm8,%ymm9 + vpxor %ymm10,%ymm11,%ymm8 + rorxq $39,%rax,%r12 + leaq (%r11,%r13,1),%r11 + xorq %rbx,%r15 + vpsrlq $7,%ymm10,%ymm10 + vpxor %ymm9,%ymm8,%ymm8 + rorxq $34,%rax,%r14 + rorxq $28,%rax,%r13 + leaq (%rdx,%r11,1),%rdx + vpsllq $7,%ymm9,%ymm9 + vpxor %ymm10,%ymm8,%ymm8 + andq %r15,%rdi + xorq %r12,%r14 + xorq %rbx,%rdi + vpsrlq $6,%ymm3,%ymm11 + vpxor %ymm9,%ymm8,%ymm8 + xorq %r13,%r14 + leaq (%r11,%rdi,1),%r11 + movq %r8,%r12 + vpsllq $3,%ymm3,%ymm10 + vpaddq %ymm8,%ymm4,%ymm4 + addq 8+256(%rsp),%r10 + andq %rdx,%r12 + rorxq $41,%rdx,%r13 + vpsrlq $19,%ymm3,%ymm9 + vpxor %ymm10,%ymm11,%ymm11 + rorxq $18,%rdx,%rdi + leaq (%r11,%r14,1),%r11 + leaq (%r10,%r12,1),%r10 + vpsllq $42,%ymm10,%ymm10 + vpxor %ymm9,%ymm11,%ymm11 + andnq %r9,%rdx,%r12 + xorq %rdi,%r13 + rorxq $14,%rdx,%r14 + vpsrlq $42,%ymm9,%ymm9 + vpxor %ymm10,%ymm11,%ymm11 + leaq (%r10,%r12,1),%r10 + xorq %r14,%r13 + movq %r11,%rdi + vpxor %ymm9,%ymm11,%ymm11 + rorxq $39,%r11,%r12 + leaq (%r10,%r13,1),%r10 + xorq %rax,%rdi + vpaddq %ymm11,%ymm4,%ymm4 + rorxq $34,%r11,%r14 + rorxq $28,%r11,%r13 + leaq (%rcx,%r10,1),%rcx + vpaddq 0(%rbp),%ymm4,%ymm10 + andq %rdi,%r15 + xorq %r12,%r14 + xorq %rax,%r15 + xorq %r13,%r14 + leaq (%r10,%r15,1),%r10 + movq %rdx,%r12 + vmovdqa %ymm10,0(%rsp) + vpalignr $8,%ymm5,%ymm6,%ymm8 + addq 32+256(%rsp),%r9 + andq %rcx,%r12 + rorxq $41,%rcx,%r13 + vpalignr $8,%ymm1,%ymm2,%ymm11 + rorxq $18,%rcx,%r15 + leaq (%r10,%r14,1),%r10 + leaq (%r9,%r12,1),%r9 + vpsrlq $1,%ymm8,%ymm10 + andnq %r8,%rcx,%r12 + xorq %r15,%r13 + rorxq $14,%rcx,%r14 + vpaddq %ymm11,%ymm5,%ymm5 + vpsrlq $7,%ymm8,%ymm11 + leaq (%r9,%r12,1),%r9 + xorq %r14,%r13 + movq %r10,%r15 + vpsllq $56,%ymm8,%ymm9 + vpxor %ymm10,%ymm11,%ymm8 + rorxq $39,%r10,%r12 + leaq (%r9,%r13,1),%r9 + xorq %r11,%r15 + vpsrlq $7,%ymm10,%ymm10 + vpxor %ymm9,%ymm8,%ymm8 + rorxq $34,%r10,%r14 + rorxq $28,%r10,%r13 + leaq (%rbx,%r9,1),%rbx + vpsllq $7,%ymm9,%ymm9 + vpxor %ymm10,%ymm8,%ymm8 + andq %r15,%rdi + xorq %r12,%r14 + xorq %r11,%rdi + vpsrlq $6,%ymm4,%ymm11 + vpxor %ymm9,%ymm8,%ymm8 + xorq %r13,%r14 + leaq (%r9,%rdi,1),%r9 + movq %rcx,%r12 + vpsllq $3,%ymm4,%ymm10 + vpaddq %ymm8,%ymm5,%ymm5 + addq 40+256(%rsp),%r8 + andq %rbx,%r12 + rorxq $41,%rbx,%r13 + vpsrlq $19,%ymm4,%ymm9 + vpxor %ymm10,%ymm11,%ymm11 + rorxq $18,%rbx,%rdi + leaq (%r9,%r14,1),%r9 + leaq (%r8,%r12,1),%r8 + vpsllq $42,%ymm10,%ymm10 + vpxor %ymm9,%ymm11,%ymm11 + andnq %rdx,%rbx,%r12 + xorq %rdi,%r13 + rorxq $14,%rbx,%r14 + vpsrlq $42,%ymm9,%ymm9 + vpxor %ymm10,%ymm11,%ymm11 + leaq (%r8,%r12,1),%r8 + xorq %r14,%r13 + movq %r9,%rdi + vpxor %ymm9,%ymm11,%ymm11 + rorxq $39,%r9,%r12 + leaq (%r8,%r13,1),%r8 + xorq %r10,%rdi + vpaddq %ymm11,%ymm5,%ymm5 + rorxq $34,%r9,%r14 + rorxq $28,%r9,%r13 + leaq (%rax,%r8,1),%rax + vpaddq 32(%rbp),%ymm5,%ymm10 + andq %rdi,%r15 + xorq %r12,%r14 + xorq %r10,%r15 + xorq %r13,%r14 + leaq (%r8,%r15,1),%r8 + movq %rbx,%r12 + vmovdqa %ymm10,32(%rsp) + vpalignr $8,%ymm6,%ymm7,%ymm8 + addq 64+256(%rsp),%rdx + andq %rax,%r12 + rorxq $41,%rax,%r13 + vpalignr $8,%ymm2,%ymm3,%ymm11 + rorxq $18,%rax,%r15 + leaq (%r8,%r14,1),%r8 + leaq (%rdx,%r12,1),%rdx + vpsrlq $1,%ymm8,%ymm10 + andnq %rcx,%rax,%r12 + xorq %r15,%r13 + rorxq $14,%rax,%r14 + vpaddq %ymm11,%ymm6,%ymm6 + vpsrlq $7,%ymm8,%ymm11 + leaq (%rdx,%r12,1),%rdx + xorq %r14,%r13 + movq %r8,%r15 + vpsllq $56,%ymm8,%ymm9 + vpxor %ymm10,%ymm11,%ymm8 + rorxq $39,%r8,%r12 + leaq (%rdx,%r13,1),%rdx + xorq %r9,%r15 + vpsrlq $7,%ymm10,%ymm10 + vpxor %ymm9,%ymm8,%ymm8 + rorxq $34,%r8,%r14 + rorxq $28,%r8,%r13 + leaq (%r11,%rdx,1),%r11 + vpsllq $7,%ymm9,%ymm9 + vpxor %ymm10,%ymm8,%ymm8 + andq %r15,%rdi + xorq %r12,%r14 + xorq %r9,%rdi + vpsrlq $6,%ymm5,%ymm11 + vpxor %ymm9,%ymm8,%ymm8 + xorq %r13,%r14 + leaq (%rdx,%rdi,1),%rdx + movq %rax,%r12 + vpsllq $3,%ymm5,%ymm10 + vpaddq %ymm8,%ymm6,%ymm6 + addq 72+256(%rsp),%rcx + andq %r11,%r12 + rorxq $41,%r11,%r13 + vpsrlq $19,%ymm5,%ymm9 + vpxor %ymm10,%ymm11,%ymm11 + rorxq $18,%r11,%rdi + leaq (%rdx,%r14,1),%rdx + leaq (%rcx,%r12,1),%rcx + vpsllq $42,%ymm10,%ymm10 + vpxor %ymm9,%ymm11,%ymm11 + andnq %rbx,%r11,%r12 + xorq %rdi,%r13 + rorxq $14,%r11,%r14 + vpsrlq $42,%ymm9,%ymm9 + vpxor %ymm10,%ymm11,%ymm11 + leaq (%rcx,%r12,1),%rcx + xorq %r14,%r13 + movq %rdx,%rdi + vpxor %ymm9,%ymm11,%ymm11 + rorxq $39,%rdx,%r12 + leaq (%rcx,%r13,1),%rcx + xorq %r8,%rdi + vpaddq %ymm11,%ymm6,%ymm6 + rorxq $34,%rdx,%r14 + rorxq $28,%rdx,%r13 + leaq (%r10,%rcx,1),%r10 + vpaddq 64(%rbp),%ymm6,%ymm10 + andq %rdi,%r15 + xorq %r12,%r14 + xorq %r8,%r15 + xorq %r13,%r14 + leaq (%rcx,%r15,1),%rcx + movq %r11,%r12 + vmovdqa %ymm10,64(%rsp) + vpalignr $8,%ymm7,%ymm0,%ymm8 + addq 96+256(%rsp),%rbx + andq %r10,%r12 + rorxq $41,%r10,%r13 + vpalignr $8,%ymm3,%ymm4,%ymm11 + rorxq $18,%r10,%r15 + leaq (%rcx,%r14,1),%rcx + leaq (%rbx,%r12,1),%rbx + vpsrlq $1,%ymm8,%ymm10 + andnq %rax,%r10,%r12 + xorq %r15,%r13 + rorxq $14,%r10,%r14 + vpaddq %ymm11,%ymm7,%ymm7 + vpsrlq $7,%ymm8,%ymm11 + leaq (%rbx,%r12,1),%rbx + xorq %r14,%r13 + movq %rcx,%r15 + vpsllq $56,%ymm8,%ymm9 + vpxor %ymm10,%ymm11,%ymm8 + rorxq $39,%rcx,%r12 + leaq (%rbx,%r13,1),%rbx + xorq %rdx,%r15 + vpsrlq $7,%ymm10,%ymm10 + vpxor %ymm9,%ymm8,%ymm8 + rorxq $34,%rcx,%r14 + rorxq $28,%rcx,%r13 + leaq (%r9,%rbx,1),%r9 + vpsllq $7,%ymm9,%ymm9 + vpxor %ymm10,%ymm8,%ymm8 + andq %r15,%rdi + xorq %r12,%r14 + xorq %rdx,%rdi + vpsrlq $6,%ymm6,%ymm11 + vpxor %ymm9,%ymm8,%ymm8 + xorq %r13,%r14 + leaq (%rbx,%rdi,1),%rbx + movq %r10,%r12 + vpsllq $3,%ymm6,%ymm10 + vpaddq %ymm8,%ymm7,%ymm7 + addq 104+256(%rsp),%rax + andq %r9,%r12 + rorxq $41,%r9,%r13 + vpsrlq $19,%ymm6,%ymm9 + vpxor %ymm10,%ymm11,%ymm11 + rorxq $18,%r9,%rdi + leaq (%rbx,%r14,1),%rbx + leaq (%rax,%r12,1),%rax + vpsllq $42,%ymm10,%ymm10 + vpxor %ymm9,%ymm11,%ymm11 + andnq %r11,%r9,%r12 + xorq %rdi,%r13 + rorxq $14,%r9,%r14 + vpsrlq $42,%ymm9,%ymm9 + vpxor %ymm10,%ymm11,%ymm11 + leaq (%rax,%r12,1),%rax + xorq %r14,%r13 + movq %rbx,%rdi + vpxor %ymm9,%ymm11,%ymm11 + rorxq $39,%rbx,%r12 + leaq (%rax,%r13,1),%rax + xorq %rcx,%rdi + vpaddq %ymm11,%ymm7,%ymm7 + rorxq $34,%rbx,%r14 + rorxq $28,%rbx,%r13 + leaq (%r8,%rax,1),%r8 + vpaddq 96(%rbp),%ymm7,%ymm10 + andq %rdi,%r15 + xorq %r12,%r14 + xorq %rcx,%r15 + xorq %r13,%r14 + leaq (%rax,%r15,1),%rax + movq %r9,%r12 + vmovdqa %ymm10,96(%rsp) + leaq 256(%rbp),%rbp + cmpb $0,-121(%rbp) + jne .Lavx2_00_47 + addq 0+128(%rsp),%r11 + andq %r8,%r12 + rorxq $41,%r8,%r13 + rorxq $18,%r8,%r15 + leaq (%rax,%r14,1),%rax + leaq (%r11,%r12,1),%r11 + andnq %r10,%r8,%r12 + xorq %r15,%r13 + rorxq $14,%r8,%r14 + leaq (%r11,%r12,1),%r11 + xorq %r14,%r13 + movq %rax,%r15 + rorxq $39,%rax,%r12 + leaq (%r11,%r13,1),%r11 + xorq %rbx,%r15 + rorxq $34,%rax,%r14 + rorxq $28,%rax,%r13 + leaq (%rdx,%r11,1),%rdx + andq %r15,%rdi + xorq %r12,%r14 + xorq %rbx,%rdi + xorq %r13,%r14 + leaq (%r11,%rdi,1),%r11 + movq %r8,%r12 + addq 8+128(%rsp),%r10 + andq %rdx,%r12 + rorxq $41,%rdx,%r13 + rorxq $18,%rdx,%rdi + leaq (%r11,%r14,1),%r11 + leaq (%r10,%r12,1),%r10 + andnq %r9,%rdx,%r12 + xorq %rdi,%r13 + rorxq $14,%rdx,%r14 + leaq (%r10,%r12,1),%r10 + xorq %r14,%r13 + movq %r11,%rdi + rorxq $39,%r11,%r12 + leaq (%r10,%r13,1),%r10 + xorq %rax,%rdi + rorxq $34,%r11,%r14 + rorxq $28,%r11,%r13 + leaq (%rcx,%r10,1),%rcx + andq %rdi,%r15 + xorq %r12,%r14 + xorq %rax,%r15 + xorq %r13,%r14 + leaq (%r10,%r15,1),%r10 + movq %rdx,%r12 + addq 32+128(%rsp),%r9 + andq %rcx,%r12 + rorxq $41,%rcx,%r13 + rorxq $18,%rcx,%r15 + leaq (%r10,%r14,1),%r10 + leaq (%r9,%r12,1),%r9 + andnq %r8,%rcx,%r12 + xorq %r15,%r13 + rorxq $14,%rcx,%r14 + leaq (%r9,%r12,1),%r9 + xorq %r14,%r13 + movq %r10,%r15 + rorxq $39,%r10,%r12 + leaq (%r9,%r13,1),%r9 + xorq %r11,%r15 + rorxq $34,%r10,%r14 + rorxq $28,%r10,%r13 + leaq (%rbx,%r9,1),%rbx + andq %r15,%rdi + xorq %r12,%r14 + xorq %r11,%rdi + xorq %r13,%r14 + leaq (%r9,%rdi,1),%r9 + movq %rcx,%r12 + addq 40+128(%rsp),%r8 + andq %rbx,%r12 + rorxq $41,%rbx,%r13 + rorxq $18,%rbx,%rdi + leaq (%r9,%r14,1),%r9 + leaq (%r8,%r12,1),%r8 + andnq %rdx,%rbx,%r12 + xorq %rdi,%r13 + rorxq $14,%rbx,%r14 + leaq (%r8,%r12,1),%r8 + xorq %r14,%r13 + movq %r9,%rdi + rorxq $39,%r9,%r12 + leaq (%r8,%r13,1),%r8 + xorq %r10,%rdi + rorxq $34,%r9,%r14 + rorxq $28,%r9,%r13 + leaq (%rax,%r8,1),%rax + andq %rdi,%r15 + xorq %r12,%r14 + xorq %r10,%r15 + xorq %r13,%r14 + leaq (%r8,%r15,1),%r8 + movq %rbx,%r12 + addq 64+128(%rsp),%rdx + andq %rax,%r12 + rorxq $41,%rax,%r13 + rorxq $18,%rax,%r15 + leaq (%r8,%r14,1),%r8 + leaq (%rdx,%r12,1),%rdx + andnq %rcx,%rax,%r12 + xorq %r15,%r13 + rorxq $14,%rax,%r14 + leaq (%rdx,%r12,1),%rdx + xorq %r14,%r13 + movq %r8,%r15 + rorxq $39,%r8,%r12 + leaq (%rdx,%r13,1),%rdx + xorq %r9,%r15 + rorxq $34,%r8,%r14 + rorxq $28,%r8,%r13 + leaq (%r11,%rdx,1),%r11 + andq %r15,%rdi + xorq %r12,%r14 + xorq %r9,%rdi + xorq %r13,%r14 + leaq (%rdx,%rdi,1),%rdx + movq %rax,%r12 + addq 72+128(%rsp),%rcx + andq %r11,%r12 + rorxq $41,%r11,%r13 + rorxq $18,%r11,%rdi + leaq (%rdx,%r14,1),%rdx + leaq (%rcx,%r12,1),%rcx + andnq %rbx,%r11,%r12 + xorq %rdi,%r13 + rorxq $14,%r11,%r14 + leaq (%rcx,%r12,1),%rcx + xorq %r14,%r13 + movq %rdx,%rdi + rorxq $39,%rdx,%r12 + leaq (%rcx,%r13,1),%rcx + xorq %r8,%rdi + rorxq $34,%rdx,%r14 + rorxq $28,%rdx,%r13 + leaq (%r10,%rcx,1),%r10 + andq %rdi,%r15 + xorq %r12,%r14 + xorq %r8,%r15 + xorq %r13,%r14 + leaq (%rcx,%r15,1),%rcx + movq %r11,%r12 + addq 96+128(%rsp),%rbx + andq %r10,%r12 + rorxq $41,%r10,%r13 + rorxq $18,%r10,%r15 + leaq (%rcx,%r14,1),%rcx + leaq (%rbx,%r12,1),%rbx + andnq %rax,%r10,%r12 + xorq %r15,%r13 + rorxq $14,%r10,%r14 + leaq (%rbx,%r12,1),%rbx + xorq %r14,%r13 + movq %rcx,%r15 + rorxq $39,%rcx,%r12 + leaq (%rbx,%r13,1),%rbx + xorq %rdx,%r15 + rorxq $34,%rcx,%r14 + rorxq $28,%rcx,%r13 + leaq (%r9,%rbx,1),%r9 + andq %r15,%rdi + xorq %r12,%r14 + xorq %rdx,%rdi + xorq %r13,%r14 + leaq (%rbx,%rdi,1),%rbx + movq %r10,%r12 + addq 104+128(%rsp),%rax + andq %r9,%r12 + rorxq $41,%r9,%r13 + rorxq $18,%r9,%rdi + leaq (%rbx,%r14,1),%rbx + leaq (%rax,%r12,1),%rax + andnq %r11,%r9,%r12 + xorq %rdi,%r13 + rorxq $14,%r9,%r14 + leaq (%rax,%r12,1),%rax + xorq %r14,%r13 + movq %rbx,%rdi + rorxq $39,%rbx,%r12 + leaq (%rax,%r13,1),%rax + xorq %rcx,%rdi + rorxq $34,%rbx,%r14 + rorxq $28,%rbx,%r13 + leaq (%r8,%rax,1),%r8 + andq %rdi,%r15 + xorq %r12,%r14 + xorq %rcx,%r15 + xorq %r13,%r14 + leaq (%rax,%r15,1),%rax + movq %r9,%r12 + addq 0(%rsp),%r11 + andq %r8,%r12 + rorxq $41,%r8,%r13 + rorxq $18,%r8,%r15 + leaq (%rax,%r14,1),%rax + leaq (%r11,%r12,1),%r11 + andnq %r10,%r8,%r12 + xorq %r15,%r13 + rorxq $14,%r8,%r14 + leaq (%r11,%r12,1),%r11 + xorq %r14,%r13 + movq %rax,%r15 + rorxq $39,%rax,%r12 + leaq (%r11,%r13,1),%r11 + xorq %rbx,%r15 + rorxq $34,%rax,%r14 + rorxq $28,%rax,%r13 + leaq (%rdx,%r11,1),%rdx + andq %r15,%rdi + xorq %r12,%r14 + xorq %rbx,%rdi + xorq %r13,%r14 + leaq (%r11,%rdi,1),%r11 + movq %r8,%r12 + addq 8(%rsp),%r10 + andq %rdx,%r12 + rorxq $41,%rdx,%r13 + rorxq $18,%rdx,%rdi + leaq (%r11,%r14,1),%r11 + leaq (%r10,%r12,1),%r10 + andnq %r9,%rdx,%r12 + xorq %rdi,%r13 + rorxq $14,%rdx,%r14 + leaq (%r10,%r12,1),%r10 + xorq %r14,%r13 + movq %r11,%rdi + rorxq $39,%r11,%r12 + leaq (%r10,%r13,1),%r10 + xorq %rax,%rdi + rorxq $34,%r11,%r14 + rorxq $28,%r11,%r13 + leaq (%rcx,%r10,1),%rcx + andq %rdi,%r15 + xorq %r12,%r14 + xorq %rax,%r15 + xorq %r13,%r14 + leaq (%r10,%r15,1),%r10 + movq %rdx,%r12 + addq 32(%rsp),%r9 + andq %rcx,%r12 + rorxq $41,%rcx,%r13 + rorxq $18,%rcx,%r15 + leaq (%r10,%r14,1),%r10 + leaq (%r9,%r12,1),%r9 + andnq %r8,%rcx,%r12 + xorq %r15,%r13 + rorxq $14,%rcx,%r14 + leaq (%r9,%r12,1),%r9 + xorq %r14,%r13 + movq %r10,%r15 + rorxq $39,%r10,%r12 + leaq (%r9,%r13,1),%r9 + xorq %r11,%r15 + rorxq $34,%r10,%r14 + rorxq $28,%r10,%r13 + leaq (%rbx,%r9,1),%rbx + andq %r15,%rdi + xorq %r12,%r14 + xorq %r11,%rdi + xorq %r13,%r14 + leaq (%r9,%rdi,1),%r9 + movq %rcx,%r12 + addq 40(%rsp),%r8 + andq %rbx,%r12 + rorxq $41,%rbx,%r13 + rorxq $18,%rbx,%rdi + leaq (%r9,%r14,1),%r9 + leaq (%r8,%r12,1),%r8 + andnq %rdx,%rbx,%r12 + xorq %rdi,%r13 + rorxq $14,%rbx,%r14 + leaq (%r8,%r12,1),%r8 + xorq %r14,%r13 + movq %r9,%rdi + rorxq $39,%r9,%r12 + leaq (%r8,%r13,1),%r8 + xorq %r10,%rdi + rorxq $34,%r9,%r14 + rorxq $28,%r9,%r13 + leaq (%rax,%r8,1),%rax + andq %rdi,%r15 + xorq %r12,%r14 + xorq %r10,%r15 + xorq %r13,%r14 + leaq (%r8,%r15,1),%r8 + movq %rbx,%r12 + addq 64(%rsp),%rdx + andq %rax,%r12 + rorxq $41,%rax,%r13 + rorxq $18,%rax,%r15 + leaq (%r8,%r14,1),%r8 + leaq (%rdx,%r12,1),%rdx + andnq %rcx,%rax,%r12 + xorq %r15,%r13 + rorxq $14,%rax,%r14 + leaq (%rdx,%r12,1),%rdx + xorq %r14,%r13 + movq %r8,%r15 + rorxq $39,%r8,%r12 + leaq (%rdx,%r13,1),%rdx + xorq %r9,%r15 + rorxq $34,%r8,%r14 + rorxq $28,%r8,%r13 + leaq (%r11,%rdx,1),%r11 + andq %r15,%rdi + xorq %r12,%r14 + xorq %r9,%rdi + xorq %r13,%r14 + leaq (%rdx,%rdi,1),%rdx + movq %rax,%r12 + addq 72(%rsp),%rcx + andq %r11,%r12 + rorxq $41,%r11,%r13 + rorxq $18,%r11,%rdi + leaq (%rdx,%r14,1),%rdx + leaq (%rcx,%r12,1),%rcx + andnq %rbx,%r11,%r12 + xorq %rdi,%r13 + rorxq $14,%r11,%r14 + leaq (%rcx,%r12,1),%rcx + xorq %r14,%r13 + movq %rdx,%rdi + rorxq $39,%rdx,%r12 + leaq (%rcx,%r13,1),%rcx + xorq %r8,%rdi + rorxq $34,%rdx,%r14 + rorxq $28,%rdx,%r13 + leaq (%r10,%rcx,1),%r10 + andq %rdi,%r15 + xorq %r12,%r14 + xorq %r8,%r15 + xorq %r13,%r14 + leaq (%rcx,%r15,1),%rcx + movq %r11,%r12 + addq 96(%rsp),%rbx + andq %r10,%r12 + rorxq $41,%r10,%r13 + rorxq $18,%r10,%r15 + leaq (%rcx,%r14,1),%rcx + leaq (%rbx,%r12,1),%rbx + andnq %rax,%r10,%r12 + xorq %r15,%r13 + rorxq $14,%r10,%r14 + leaq (%rbx,%r12,1),%rbx + xorq %r14,%r13 + movq %rcx,%r15 + rorxq $39,%rcx,%r12 + leaq (%rbx,%r13,1),%rbx + xorq %rdx,%r15 + rorxq $34,%rcx,%r14 + rorxq $28,%rcx,%r13 + leaq (%r9,%rbx,1),%r9 + andq %r15,%rdi + xorq %r12,%r14 + xorq %rdx,%rdi + xorq %r13,%r14 + leaq (%rbx,%rdi,1),%rbx + movq %r10,%r12 + addq 104(%rsp),%rax + andq %r9,%r12 + rorxq $41,%r9,%r13 + rorxq $18,%r9,%rdi + leaq (%rbx,%r14,1),%rbx + leaq (%rax,%r12,1),%rax + andnq %r11,%r9,%r12 + xorq %rdi,%r13 + rorxq $14,%r9,%r14 + leaq (%rax,%r12,1),%rax + xorq %r14,%r13 + movq %rbx,%rdi + rorxq $39,%rbx,%r12 + leaq (%rax,%r13,1),%rax + xorq %rcx,%rdi + rorxq $34,%rbx,%r14 + rorxq $28,%rbx,%r13 + leaq (%r8,%rax,1),%r8 + andq %rdi,%r15 + xorq %r12,%r14 + xorq %rcx,%r15 + xorq %r13,%r14 + leaq (%rax,%r15,1),%rax + movq %r9,%r12 + movq 1280(%rsp),%rdi + addq %r14,%rax + + leaq 1152(%rsp),%rbp + + addq 0(%rdi),%rax + addq 8(%rdi),%rbx + addq 16(%rdi),%rcx + addq 24(%rdi),%rdx + addq 32(%rdi),%r8 + addq 40(%rdi),%r9 + addq 48(%rdi),%r10 + addq 56(%rdi),%r11 + + movq %rax,0(%rdi) + movq %rbx,8(%rdi) + movq %rcx,16(%rdi) + movq %rdx,24(%rdi) + movq %r8,32(%rdi) + movq %r9,40(%rdi) + movq %r10,48(%rdi) + movq %r11,56(%rdi) + + cmpq 144(%rbp),%rsi + je .Ldone_avx2 + + xorq %r14,%r14 + movq %rbx,%rdi + xorq %rcx,%rdi + movq %r9,%r12 + jmp .Lower_avx2 +.align 16 +.Lower_avx2: + addq 0+16(%rbp),%r11 + andq %r8,%r12 + rorxq $41,%r8,%r13 + rorxq $18,%r8,%r15 + leaq (%rax,%r14,1),%rax + leaq (%r11,%r12,1),%r11 + andnq %r10,%r8,%r12 + xorq %r15,%r13 + rorxq $14,%r8,%r14 + leaq (%r11,%r12,1),%r11 + xorq %r14,%r13 + movq %rax,%r15 + rorxq $39,%rax,%r12 + leaq (%r11,%r13,1),%r11 + xorq %rbx,%r15 + rorxq $34,%rax,%r14 + rorxq $28,%rax,%r13 + leaq (%rdx,%r11,1),%rdx + andq %r15,%rdi + xorq %r12,%r14 + xorq %rbx,%rdi + xorq %r13,%r14 + leaq (%r11,%rdi,1),%r11 + movq %r8,%r12 + addq 8+16(%rbp),%r10 + andq %rdx,%r12 + rorxq $41,%rdx,%r13 + rorxq $18,%rdx,%rdi + leaq (%r11,%r14,1),%r11 + leaq (%r10,%r12,1),%r10 + andnq %r9,%rdx,%r12 + xorq %rdi,%r13 + rorxq $14,%rdx,%r14 + leaq (%r10,%r12,1),%r10 + xorq %r14,%r13 + movq %r11,%rdi + rorxq $39,%r11,%r12 + leaq (%r10,%r13,1),%r10 + xorq %rax,%rdi + rorxq $34,%r11,%r14 + rorxq $28,%r11,%r13 + leaq (%rcx,%r10,1),%rcx + andq %rdi,%r15 + xorq %r12,%r14 + xorq %rax,%r15 + xorq %r13,%r14 + leaq (%r10,%r15,1),%r10 + movq %rdx,%r12 + addq 32+16(%rbp),%r9 + andq %rcx,%r12 + rorxq $41,%rcx,%r13 + rorxq $18,%rcx,%r15 + leaq (%r10,%r14,1),%r10 + leaq (%r9,%r12,1),%r9 + andnq %r8,%rcx,%r12 + xorq %r15,%r13 + rorxq $14,%rcx,%r14 + leaq (%r9,%r12,1),%r9 + xorq %r14,%r13 + movq %r10,%r15 + rorxq $39,%r10,%r12 + leaq (%r9,%r13,1),%r9 + xorq %r11,%r15 + rorxq $34,%r10,%r14 + rorxq $28,%r10,%r13 + leaq (%rbx,%r9,1),%rbx + andq %r15,%rdi + xorq %r12,%r14 + xorq %r11,%rdi + xorq %r13,%r14 + leaq (%r9,%rdi,1),%r9 + movq %rcx,%r12 + addq 40+16(%rbp),%r8 + andq %rbx,%r12 + rorxq $41,%rbx,%r13 + rorxq $18,%rbx,%rdi + leaq (%r9,%r14,1),%r9 + leaq (%r8,%r12,1),%r8 + andnq %rdx,%rbx,%r12 + xorq %rdi,%r13 + rorxq $14,%rbx,%r14 + leaq (%r8,%r12,1),%r8 + xorq %r14,%r13 + movq %r9,%rdi + rorxq $39,%r9,%r12 + leaq (%r8,%r13,1),%r8 + xorq %r10,%rdi + rorxq $34,%r9,%r14 + rorxq $28,%r9,%r13 + leaq (%rax,%r8,1),%rax + andq %rdi,%r15 + xorq %r12,%r14 + xorq %r10,%r15 + xorq %r13,%r14 + leaq (%r8,%r15,1),%r8 + movq %rbx,%r12 + addq 64+16(%rbp),%rdx + andq %rax,%r12 + rorxq $41,%rax,%r13 + rorxq $18,%rax,%r15 + leaq (%r8,%r14,1),%r8 + leaq (%rdx,%r12,1),%rdx + andnq %rcx,%rax,%r12 + xorq %r15,%r13 + rorxq $14,%rax,%r14 + leaq (%rdx,%r12,1),%rdx + xorq %r14,%r13 + movq %r8,%r15 + rorxq $39,%r8,%r12 + leaq (%rdx,%r13,1),%rdx + xorq %r9,%r15 + rorxq $34,%r8,%r14 + rorxq $28,%r8,%r13 + leaq (%r11,%rdx,1),%r11 + andq %r15,%rdi + xorq %r12,%r14 + xorq %r9,%rdi + xorq %r13,%r14 + leaq (%rdx,%rdi,1),%rdx + movq %rax,%r12 + addq 72+16(%rbp),%rcx + andq %r11,%r12 + rorxq $41,%r11,%r13 + rorxq $18,%r11,%rdi + leaq (%rdx,%r14,1),%rdx + leaq (%rcx,%r12,1),%rcx + andnq %rbx,%r11,%r12 + xorq %rdi,%r13 + rorxq $14,%r11,%r14 + leaq (%rcx,%r12,1),%rcx + xorq %r14,%r13 + movq %rdx,%rdi + rorxq $39,%rdx,%r12 + leaq (%rcx,%r13,1),%rcx + xorq %r8,%rdi + rorxq $34,%rdx,%r14 + rorxq $28,%rdx,%r13 + leaq (%r10,%rcx,1),%r10 + andq %rdi,%r15 + xorq %r12,%r14 + xorq %r8,%r15 + xorq %r13,%r14 + leaq (%rcx,%r15,1),%rcx + movq %r11,%r12 + addq 96+16(%rbp),%rbx + andq %r10,%r12 + rorxq $41,%r10,%r13 + rorxq $18,%r10,%r15 + leaq (%rcx,%r14,1),%rcx + leaq (%rbx,%r12,1),%rbx + andnq %rax,%r10,%r12 + xorq %r15,%r13 + rorxq $14,%r10,%r14 + leaq (%rbx,%r12,1),%rbx + xorq %r14,%r13 + movq %rcx,%r15 + rorxq $39,%rcx,%r12 + leaq (%rbx,%r13,1),%rbx + xorq %rdx,%r15 + rorxq $34,%rcx,%r14 + rorxq $28,%rcx,%r13 + leaq (%r9,%rbx,1),%r9 + andq %r15,%rdi + xorq %r12,%r14 + xorq %rdx,%rdi + xorq %r13,%r14 + leaq (%rbx,%rdi,1),%rbx + movq %r10,%r12 + addq 104+16(%rbp),%rax + andq %r9,%r12 + rorxq $41,%r9,%r13 + rorxq $18,%r9,%rdi + leaq (%rbx,%r14,1),%rbx + leaq (%rax,%r12,1),%rax + andnq %r11,%r9,%r12 + xorq %rdi,%r13 + rorxq $14,%r9,%r14 + leaq (%rax,%r12,1),%rax + xorq %r14,%r13 + movq %rbx,%rdi + rorxq $39,%rbx,%r12 + leaq (%rax,%r13,1),%rax + xorq %rcx,%rdi + rorxq $34,%rbx,%r14 + rorxq $28,%rbx,%r13 + leaq (%r8,%rax,1),%r8 + andq %rdi,%r15 + xorq %r12,%r14 + xorq %rcx,%r15 + xorq %r13,%r14 + leaq (%rax,%r15,1),%rax + movq %r9,%r12 + leaq -128(%rbp),%rbp + cmpq %rsp,%rbp + jae .Lower_avx2 + + movq 1280(%rsp),%rdi + addq %r14,%rax + + leaq 1152(%rsp),%rsp + + addq 0(%rdi),%rax + addq 8(%rdi),%rbx + addq 16(%rdi),%rcx + addq 24(%rdi),%rdx + addq 32(%rdi),%r8 + addq 40(%rdi),%r9 + leaq 256(%rsi),%rsi + addq 48(%rdi),%r10 + movq %rsi,%r12 + addq 56(%rdi),%r11 + cmpq 128+16(%rsp),%rsi + + movq %rax,0(%rdi) + cmoveq %rsp,%r12 + movq %rbx,8(%rdi) + movq %rcx,16(%rdi) + movq %rdx,24(%rdi) + movq %r8,32(%rdi) + movq %r9,40(%rdi) + movq %r10,48(%rdi) + movq %r11,56(%rdi) + + jbe .Loop_avx2 + leaq (%rsp),%rbp + +.Ldone_avx2: + leaq (%rbp),%rsp + movq 128+24(%rsp),%rsi + vzeroupper + movq (%rsi),%r15 + movq 8(%rsi),%r14 + movq 16(%rsi),%r13 + movq 24(%rsi),%r12 + movq 32(%rsi),%rbp + movq 40(%rsi),%rbx + leaq 48(%rsi),%rsp +.Lepilogue_avx2: + .byte 0xf3,0xc3 +.size sha512_block_data_order_avx2,.-sha512_block_data_order_avx2 diff --git a/deps/openssl/asm/x64-elf-gas/whrlpool/wp-x86_64.s b/deps/openssl/asm/x64-elf-gas/whrlpool/wp-x86_64.s index 6c2656c9a1fd22..f83130ea68634b 100644 --- a/deps/openssl/asm/x64-elf-gas/whrlpool/wp-x86_64.s +++ b/deps/openssl/asm/x64-elf-gas/whrlpool/wp-x86_64.s @@ -1,6 +1,5 @@ .text - .globl whirlpool_block .type whirlpool_block,@function .align 16 @@ -63,233 +62,236 @@ whirlpool_block: movq %r15,64+56(%rsp) xorq %rsi,%rsi movq %rsi,24(%rbx) + jmp .Lround .align 16 .Lround: movq 4096(%rbp,%rsi,8),%r8 movl 0(%rsp),%eax movl 4(%rsp),%ebx - movb %al,%cl - movb %ah,%dl + movzbl %al,%ecx + movzbl %ah,%edx + shrl $16,%eax leaq (%rcx,%rcx,1),%rsi + movzbl %al,%ecx leaq (%rdx,%rdx,1),%rdi - shrl $16,%eax + movzbl %ah,%edx xorq 0(%rbp,%rsi,8),%r8 movq 7(%rbp,%rdi,8),%r9 - movb %al,%cl - movb %ah,%dl movl 0+8(%rsp),%eax leaq (%rcx,%rcx,1),%rsi + movzbl %bl,%ecx leaq (%rdx,%rdx,1),%rdi + movzbl %bh,%edx movq 6(%rbp,%rsi,8),%r10 movq 5(%rbp,%rdi,8),%r11 - movb %bl,%cl - movb %bh,%dl + shrl $16,%ebx leaq (%rcx,%rcx,1),%rsi + movzbl %bl,%ecx leaq (%rdx,%rdx,1),%rdi - shrl $16,%ebx + movzbl %bh,%edx movq 4(%rbp,%rsi,8),%r12 movq 3(%rbp,%rdi,8),%r13 - movb %bl,%cl - movb %bh,%dl movl 0+8+4(%rsp),%ebx leaq (%rcx,%rcx,1),%rsi + movzbl %al,%ecx leaq (%rdx,%rdx,1),%rdi + movzbl %ah,%edx movq 2(%rbp,%rsi,8),%r14 movq 1(%rbp,%rdi,8),%r15 - movb %al,%cl - movb %ah,%dl + shrl $16,%eax leaq (%rcx,%rcx,1),%rsi + movzbl %al,%ecx leaq (%rdx,%rdx,1),%rdi - shrl $16,%eax + movzbl %ah,%edx xorq 0(%rbp,%rsi,8),%r9 xorq 7(%rbp,%rdi,8),%r10 - movb %al,%cl - movb %ah,%dl movl 8+8(%rsp),%eax leaq (%rcx,%rcx,1),%rsi + movzbl %bl,%ecx leaq (%rdx,%rdx,1),%rdi + movzbl %bh,%edx xorq 6(%rbp,%rsi,8),%r11 xorq 5(%rbp,%rdi,8),%r12 - movb %bl,%cl - movb %bh,%dl + shrl $16,%ebx leaq (%rcx,%rcx,1),%rsi + movzbl %bl,%ecx leaq (%rdx,%rdx,1),%rdi - shrl $16,%ebx + movzbl %bh,%edx xorq 4(%rbp,%rsi,8),%r13 xorq 3(%rbp,%rdi,8),%r14 - movb %bl,%cl - movb %bh,%dl movl 8+8+4(%rsp),%ebx leaq (%rcx,%rcx,1),%rsi + movzbl %al,%ecx leaq (%rdx,%rdx,1),%rdi + movzbl %ah,%edx xorq 2(%rbp,%rsi,8),%r15 xorq 1(%rbp,%rdi,8),%r8 - movb %al,%cl - movb %ah,%dl + shrl $16,%eax leaq (%rcx,%rcx,1),%rsi + movzbl %al,%ecx leaq (%rdx,%rdx,1),%rdi - shrl $16,%eax + movzbl %ah,%edx xorq 0(%rbp,%rsi,8),%r10 xorq 7(%rbp,%rdi,8),%r11 - movb %al,%cl - movb %ah,%dl movl 16+8(%rsp),%eax leaq (%rcx,%rcx,1),%rsi + movzbl %bl,%ecx leaq (%rdx,%rdx,1),%rdi + movzbl %bh,%edx xorq 6(%rbp,%rsi,8),%r12 xorq 5(%rbp,%rdi,8),%r13 - movb %bl,%cl - movb %bh,%dl + shrl $16,%ebx leaq (%rcx,%rcx,1),%rsi + movzbl %bl,%ecx leaq (%rdx,%rdx,1),%rdi - shrl $16,%ebx + movzbl %bh,%edx xorq 4(%rbp,%rsi,8),%r14 xorq 3(%rbp,%rdi,8),%r15 - movb %bl,%cl - movb %bh,%dl movl 16+8+4(%rsp),%ebx leaq (%rcx,%rcx,1),%rsi + movzbl %al,%ecx leaq (%rdx,%rdx,1),%rdi + movzbl %ah,%edx xorq 2(%rbp,%rsi,8),%r8 xorq 1(%rbp,%rdi,8),%r9 - movb %al,%cl - movb %ah,%dl + shrl $16,%eax leaq (%rcx,%rcx,1),%rsi + movzbl %al,%ecx leaq (%rdx,%rdx,1),%rdi - shrl $16,%eax + movzbl %ah,%edx xorq 0(%rbp,%rsi,8),%r11 xorq 7(%rbp,%rdi,8),%r12 - movb %al,%cl - movb %ah,%dl movl 24+8(%rsp),%eax leaq (%rcx,%rcx,1),%rsi + movzbl %bl,%ecx leaq (%rdx,%rdx,1),%rdi + movzbl %bh,%edx xorq 6(%rbp,%rsi,8),%r13 xorq 5(%rbp,%rdi,8),%r14 - movb %bl,%cl - movb %bh,%dl + shrl $16,%ebx leaq (%rcx,%rcx,1),%rsi + movzbl %bl,%ecx leaq (%rdx,%rdx,1),%rdi - shrl $16,%ebx + movzbl %bh,%edx xorq 4(%rbp,%rsi,8),%r15 xorq 3(%rbp,%rdi,8),%r8 - movb %bl,%cl - movb %bh,%dl movl 24+8+4(%rsp),%ebx leaq (%rcx,%rcx,1),%rsi + movzbl %al,%ecx leaq (%rdx,%rdx,1),%rdi + movzbl %ah,%edx xorq 2(%rbp,%rsi,8),%r9 xorq 1(%rbp,%rdi,8),%r10 - movb %al,%cl - movb %ah,%dl + shrl $16,%eax leaq (%rcx,%rcx,1),%rsi + movzbl %al,%ecx leaq (%rdx,%rdx,1),%rdi - shrl $16,%eax + movzbl %ah,%edx xorq 0(%rbp,%rsi,8),%r12 xorq 7(%rbp,%rdi,8),%r13 - movb %al,%cl - movb %ah,%dl movl 32+8(%rsp),%eax leaq (%rcx,%rcx,1),%rsi + movzbl %bl,%ecx leaq (%rdx,%rdx,1),%rdi + movzbl %bh,%edx xorq 6(%rbp,%rsi,8),%r14 xorq 5(%rbp,%rdi,8),%r15 - movb %bl,%cl - movb %bh,%dl + shrl $16,%ebx leaq (%rcx,%rcx,1),%rsi + movzbl %bl,%ecx leaq (%rdx,%rdx,1),%rdi - shrl $16,%ebx + movzbl %bh,%edx xorq 4(%rbp,%rsi,8),%r8 xorq 3(%rbp,%rdi,8),%r9 - movb %bl,%cl - movb %bh,%dl movl 32+8+4(%rsp),%ebx leaq (%rcx,%rcx,1),%rsi + movzbl %al,%ecx leaq (%rdx,%rdx,1),%rdi + movzbl %ah,%edx xorq 2(%rbp,%rsi,8),%r10 xorq 1(%rbp,%rdi,8),%r11 - movb %al,%cl - movb %ah,%dl + shrl $16,%eax leaq (%rcx,%rcx,1),%rsi + movzbl %al,%ecx leaq (%rdx,%rdx,1),%rdi - shrl $16,%eax + movzbl %ah,%edx xorq 0(%rbp,%rsi,8),%r13 xorq 7(%rbp,%rdi,8),%r14 - movb %al,%cl - movb %ah,%dl movl 40+8(%rsp),%eax leaq (%rcx,%rcx,1),%rsi + movzbl %bl,%ecx leaq (%rdx,%rdx,1),%rdi + movzbl %bh,%edx xorq 6(%rbp,%rsi,8),%r15 xorq 5(%rbp,%rdi,8),%r8 - movb %bl,%cl - movb %bh,%dl + shrl $16,%ebx leaq (%rcx,%rcx,1),%rsi + movzbl %bl,%ecx leaq (%rdx,%rdx,1),%rdi - shrl $16,%ebx + movzbl %bh,%edx xorq 4(%rbp,%rsi,8),%r9 xorq 3(%rbp,%rdi,8),%r10 - movb %bl,%cl - movb %bh,%dl movl 40+8+4(%rsp),%ebx leaq (%rcx,%rcx,1),%rsi + movzbl %al,%ecx leaq (%rdx,%rdx,1),%rdi + movzbl %ah,%edx xorq 2(%rbp,%rsi,8),%r11 xorq 1(%rbp,%rdi,8),%r12 - movb %al,%cl - movb %ah,%dl + shrl $16,%eax leaq (%rcx,%rcx,1),%rsi + movzbl %al,%ecx leaq (%rdx,%rdx,1),%rdi - shrl $16,%eax + movzbl %ah,%edx xorq 0(%rbp,%rsi,8),%r14 xorq 7(%rbp,%rdi,8),%r15 - movb %al,%cl - movb %ah,%dl movl 48+8(%rsp),%eax leaq (%rcx,%rcx,1),%rsi + movzbl %bl,%ecx leaq (%rdx,%rdx,1),%rdi + movzbl %bh,%edx xorq 6(%rbp,%rsi,8),%r8 xorq 5(%rbp,%rdi,8),%r9 - movb %bl,%cl - movb %bh,%dl + shrl $16,%ebx leaq (%rcx,%rcx,1),%rsi + movzbl %bl,%ecx leaq (%rdx,%rdx,1),%rdi - shrl $16,%ebx + movzbl %bh,%edx xorq 4(%rbp,%rsi,8),%r10 xorq 3(%rbp,%rdi,8),%r11 - movb %bl,%cl - movb %bh,%dl movl 48+8+4(%rsp),%ebx leaq (%rcx,%rcx,1),%rsi + movzbl %al,%ecx leaq (%rdx,%rdx,1),%rdi + movzbl %ah,%edx xorq 2(%rbp,%rsi,8),%r12 xorq 1(%rbp,%rdi,8),%r13 - movb %al,%cl - movb %ah,%dl + shrl $16,%eax leaq (%rcx,%rcx,1),%rsi + movzbl %al,%ecx leaq (%rdx,%rdx,1),%rdi - shrl $16,%eax + movzbl %ah,%edx xorq 0(%rbp,%rsi,8),%r15 xorq 7(%rbp,%rdi,8),%r8 - movb %al,%cl - movb %ah,%dl movl 56+8(%rsp),%eax leaq (%rcx,%rcx,1),%rsi + movzbl %bl,%ecx leaq (%rdx,%rdx,1),%rdi + movzbl %bh,%edx xorq 6(%rbp,%rsi,8),%r9 xorq 5(%rbp,%rdi,8),%r10 - movb %bl,%cl - movb %bh,%dl + shrl $16,%ebx leaq (%rcx,%rcx,1),%rsi + movzbl %bl,%ecx leaq (%rdx,%rdx,1),%rdi - shrl $16,%ebx + movzbl %bh,%edx xorq 4(%rbp,%rsi,8),%r11 xorq 3(%rbp,%rdi,8),%r12 - movb %bl,%cl - movb %bh,%dl movl 56+8+4(%rsp),%ebx leaq (%rcx,%rcx,1),%rsi + movzbl %al,%ecx leaq (%rdx,%rdx,1),%rdi + movzbl %ah,%edx xorq 2(%rbp,%rsi,8),%r13 xorq 1(%rbp,%rdi,8),%r14 movq %r8,0(%rsp) @@ -300,228 +302,228 @@ whirlpool_block: movq %r13,40(%rsp) movq %r14,48(%rsp) movq %r15,56(%rsp) - movb %al,%cl - movb %ah,%dl + shrl $16,%eax leaq (%rcx,%rcx,1),%rsi + movzbl %al,%ecx leaq (%rdx,%rdx,1),%rdi - shrl $16,%eax + movzbl %ah,%edx xorq 0(%rbp,%rsi,8),%r8 xorq 7(%rbp,%rdi,8),%r9 - movb %al,%cl - movb %ah,%dl movl 64+0+8(%rsp),%eax leaq (%rcx,%rcx,1),%rsi + movzbl %bl,%ecx leaq (%rdx,%rdx,1),%rdi + movzbl %bh,%edx xorq 6(%rbp,%rsi,8),%r10 xorq 5(%rbp,%rdi,8),%r11 - movb %bl,%cl - movb %bh,%dl + shrl $16,%ebx leaq (%rcx,%rcx,1),%rsi + movzbl %bl,%ecx leaq (%rdx,%rdx,1),%rdi - shrl $16,%ebx + movzbl %bh,%edx xorq 4(%rbp,%rsi,8),%r12 xorq 3(%rbp,%rdi,8),%r13 - movb %bl,%cl - movb %bh,%dl movl 64+0+8+4(%rsp),%ebx leaq (%rcx,%rcx,1),%rsi + movzbl %al,%ecx leaq (%rdx,%rdx,1),%rdi + movzbl %ah,%edx xorq 2(%rbp,%rsi,8),%r14 xorq 1(%rbp,%rdi,8),%r15 - movb %al,%cl - movb %ah,%dl + shrl $16,%eax leaq (%rcx,%rcx,1),%rsi + movzbl %al,%ecx leaq (%rdx,%rdx,1),%rdi - shrl $16,%eax + movzbl %ah,%edx xorq 0(%rbp,%rsi,8),%r9 xorq 7(%rbp,%rdi,8),%r10 - movb %al,%cl - movb %ah,%dl movl 64+8+8(%rsp),%eax leaq (%rcx,%rcx,1),%rsi + movzbl %bl,%ecx leaq (%rdx,%rdx,1),%rdi + movzbl %bh,%edx xorq 6(%rbp,%rsi,8),%r11 xorq 5(%rbp,%rdi,8),%r12 - movb %bl,%cl - movb %bh,%dl + shrl $16,%ebx leaq (%rcx,%rcx,1),%rsi + movzbl %bl,%ecx leaq (%rdx,%rdx,1),%rdi - shrl $16,%ebx + movzbl %bh,%edx xorq 4(%rbp,%rsi,8),%r13 xorq 3(%rbp,%rdi,8),%r14 - movb %bl,%cl - movb %bh,%dl movl 64+8+8+4(%rsp),%ebx leaq (%rcx,%rcx,1),%rsi + movzbl %al,%ecx leaq (%rdx,%rdx,1),%rdi + movzbl %ah,%edx xorq 2(%rbp,%rsi,8),%r15 xorq 1(%rbp,%rdi,8),%r8 - movb %al,%cl - movb %ah,%dl + shrl $16,%eax leaq (%rcx,%rcx,1),%rsi + movzbl %al,%ecx leaq (%rdx,%rdx,1),%rdi - shrl $16,%eax + movzbl %ah,%edx xorq 0(%rbp,%rsi,8),%r10 xorq 7(%rbp,%rdi,8),%r11 - movb %al,%cl - movb %ah,%dl movl 64+16+8(%rsp),%eax leaq (%rcx,%rcx,1),%rsi + movzbl %bl,%ecx leaq (%rdx,%rdx,1),%rdi + movzbl %bh,%edx xorq 6(%rbp,%rsi,8),%r12 xorq 5(%rbp,%rdi,8),%r13 - movb %bl,%cl - movb %bh,%dl + shrl $16,%ebx leaq (%rcx,%rcx,1),%rsi + movzbl %bl,%ecx leaq (%rdx,%rdx,1),%rdi - shrl $16,%ebx + movzbl %bh,%edx xorq 4(%rbp,%rsi,8),%r14 xorq 3(%rbp,%rdi,8),%r15 - movb %bl,%cl - movb %bh,%dl movl 64+16+8+4(%rsp),%ebx leaq (%rcx,%rcx,1),%rsi + movzbl %al,%ecx leaq (%rdx,%rdx,1),%rdi + movzbl %ah,%edx xorq 2(%rbp,%rsi,8),%r8 xorq 1(%rbp,%rdi,8),%r9 - movb %al,%cl - movb %ah,%dl + shrl $16,%eax leaq (%rcx,%rcx,1),%rsi + movzbl %al,%ecx leaq (%rdx,%rdx,1),%rdi - shrl $16,%eax + movzbl %ah,%edx xorq 0(%rbp,%rsi,8),%r11 xorq 7(%rbp,%rdi,8),%r12 - movb %al,%cl - movb %ah,%dl movl 64+24+8(%rsp),%eax leaq (%rcx,%rcx,1),%rsi + movzbl %bl,%ecx leaq (%rdx,%rdx,1),%rdi + movzbl %bh,%edx xorq 6(%rbp,%rsi,8),%r13 xorq 5(%rbp,%rdi,8),%r14 - movb %bl,%cl - movb %bh,%dl + shrl $16,%ebx leaq (%rcx,%rcx,1),%rsi + movzbl %bl,%ecx leaq (%rdx,%rdx,1),%rdi - shrl $16,%ebx + movzbl %bh,%edx xorq 4(%rbp,%rsi,8),%r15 xorq 3(%rbp,%rdi,8),%r8 - movb %bl,%cl - movb %bh,%dl movl 64+24+8+4(%rsp),%ebx leaq (%rcx,%rcx,1),%rsi + movzbl %al,%ecx leaq (%rdx,%rdx,1),%rdi + movzbl %ah,%edx xorq 2(%rbp,%rsi,8),%r9 xorq 1(%rbp,%rdi,8),%r10 - movb %al,%cl - movb %ah,%dl + shrl $16,%eax leaq (%rcx,%rcx,1),%rsi + movzbl %al,%ecx leaq (%rdx,%rdx,1),%rdi - shrl $16,%eax + movzbl %ah,%edx xorq 0(%rbp,%rsi,8),%r12 xorq 7(%rbp,%rdi,8),%r13 - movb %al,%cl - movb %ah,%dl movl 64+32+8(%rsp),%eax leaq (%rcx,%rcx,1),%rsi + movzbl %bl,%ecx leaq (%rdx,%rdx,1),%rdi + movzbl %bh,%edx xorq 6(%rbp,%rsi,8),%r14 xorq 5(%rbp,%rdi,8),%r15 - movb %bl,%cl - movb %bh,%dl + shrl $16,%ebx leaq (%rcx,%rcx,1),%rsi + movzbl %bl,%ecx leaq (%rdx,%rdx,1),%rdi - shrl $16,%ebx + movzbl %bh,%edx xorq 4(%rbp,%rsi,8),%r8 xorq 3(%rbp,%rdi,8),%r9 - movb %bl,%cl - movb %bh,%dl movl 64+32+8+4(%rsp),%ebx leaq (%rcx,%rcx,1),%rsi + movzbl %al,%ecx leaq (%rdx,%rdx,1),%rdi + movzbl %ah,%edx xorq 2(%rbp,%rsi,8),%r10 xorq 1(%rbp,%rdi,8),%r11 - movb %al,%cl - movb %ah,%dl + shrl $16,%eax leaq (%rcx,%rcx,1),%rsi + movzbl %al,%ecx leaq (%rdx,%rdx,1),%rdi - shrl $16,%eax + movzbl %ah,%edx xorq 0(%rbp,%rsi,8),%r13 xorq 7(%rbp,%rdi,8),%r14 - movb %al,%cl - movb %ah,%dl movl 64+40+8(%rsp),%eax leaq (%rcx,%rcx,1),%rsi + movzbl %bl,%ecx leaq (%rdx,%rdx,1),%rdi + movzbl %bh,%edx xorq 6(%rbp,%rsi,8),%r15 xorq 5(%rbp,%rdi,8),%r8 - movb %bl,%cl - movb %bh,%dl + shrl $16,%ebx leaq (%rcx,%rcx,1),%rsi + movzbl %bl,%ecx leaq (%rdx,%rdx,1),%rdi - shrl $16,%ebx + movzbl %bh,%edx xorq 4(%rbp,%rsi,8),%r9 xorq 3(%rbp,%rdi,8),%r10 - movb %bl,%cl - movb %bh,%dl movl 64+40+8+4(%rsp),%ebx leaq (%rcx,%rcx,1),%rsi + movzbl %al,%ecx leaq (%rdx,%rdx,1),%rdi + movzbl %ah,%edx xorq 2(%rbp,%rsi,8),%r11 xorq 1(%rbp,%rdi,8),%r12 - movb %al,%cl - movb %ah,%dl + shrl $16,%eax leaq (%rcx,%rcx,1),%rsi + movzbl %al,%ecx leaq (%rdx,%rdx,1),%rdi - shrl $16,%eax + movzbl %ah,%edx xorq 0(%rbp,%rsi,8),%r14 xorq 7(%rbp,%rdi,8),%r15 - movb %al,%cl - movb %ah,%dl movl 64+48+8(%rsp),%eax leaq (%rcx,%rcx,1),%rsi + movzbl %bl,%ecx leaq (%rdx,%rdx,1),%rdi + movzbl %bh,%edx xorq 6(%rbp,%rsi,8),%r8 xorq 5(%rbp,%rdi,8),%r9 - movb %bl,%cl - movb %bh,%dl + shrl $16,%ebx leaq (%rcx,%rcx,1),%rsi + movzbl %bl,%ecx leaq (%rdx,%rdx,1),%rdi - shrl $16,%ebx + movzbl %bh,%edx xorq 4(%rbp,%rsi,8),%r10 xorq 3(%rbp,%rdi,8),%r11 - movb %bl,%cl - movb %bh,%dl movl 64+48+8+4(%rsp),%ebx leaq (%rcx,%rcx,1),%rsi + movzbl %al,%ecx leaq (%rdx,%rdx,1),%rdi + movzbl %ah,%edx xorq 2(%rbp,%rsi,8),%r12 xorq 1(%rbp,%rdi,8),%r13 - movb %al,%cl - movb %ah,%dl + shrl $16,%eax leaq (%rcx,%rcx,1),%rsi + movzbl %al,%ecx leaq (%rdx,%rdx,1),%rdi - shrl $16,%eax + movzbl %ah,%edx xorq 0(%rbp,%rsi,8),%r15 xorq 7(%rbp,%rdi,8),%r8 - movb %al,%cl - movb %ah,%dl leaq (%rcx,%rcx,1),%rsi + movzbl %bl,%ecx leaq (%rdx,%rdx,1),%rdi + movzbl %bh,%edx xorq 6(%rbp,%rsi,8),%r9 xorq 5(%rbp,%rdi,8),%r10 - movb %bl,%cl - movb %bh,%dl + shrl $16,%ebx leaq (%rcx,%rcx,1),%rsi + movzbl %bl,%ecx leaq (%rdx,%rdx,1),%rdi - shrl $16,%ebx + movzbl %bh,%edx xorq 4(%rbp,%rsi,8),%r11 xorq 3(%rbp,%rdi,8),%r12 - movb %bl,%cl - movb %bh,%dl leaq (%rcx,%rcx,1),%rsi + movzbl %al,%ecx leaq (%rdx,%rdx,1),%rdi + movzbl %ah,%edx xorq 2(%rbp,%rsi,8),%r13 xorq 1(%rbp,%rdi,8),%r14 leaq 128(%rsp),%rbx diff --git a/deps/openssl/asm/x64-elf-gas/x86_64cpuid.s b/deps/openssl/asm/x64-elf-gas/x86_64cpuid.s index e0a82870859d08..656a5ce85576e0 100644 --- a/deps/openssl/asm/x64-elf-gas/x86_64cpuid.s +++ b/deps/openssl/asm/x64-elf-gas/x86_64cpuid.s @@ -4,11 +4,10 @@ call OPENSSL_cpuid_setup .hidden OPENSSL_ia32cap_P -.comm OPENSSL_ia32cap_P,8,4 +.comm OPENSSL_ia32cap_P,16,4 .text - .globl OPENSSL_atomic_add .type OPENSSL_atomic_add,@function .align 16 @@ -16,12 +15,10 @@ OPENSSL_atomic_add: movl (%rdi),%eax .Lspin: leaq (%rsi,%rax,1),%r8 .byte 0xf0 - cmpxchgl %r8d,(%rdi) jne .Lspin movl %r8d,%eax .byte 0x48,0x98 - .byte 0xf3,0xc3 .size OPENSSL_atomic_add,.-OPENSSL_atomic_add @@ -42,6 +39,7 @@ OPENSSL_ia32_cpuid: movq %rbx,%r8 xorl %eax,%eax + movl %eax,8(%rdi) cpuid movl %eax,%r11d @@ -109,6 +107,14 @@ OPENSSL_ia32_cpuid: shrl $14,%r10d andl $4095,%r10d + cmpl $7,%r11d + jb .Lnocacheinfo + + movl $7,%eax + xorl %ecx,%ecx + cpuid + movl %ebx,8(%rdi) + .Lnocacheinfo: movl $1,%eax cpuid @@ -142,13 +148,13 @@ OPENSSL_ia32_cpuid: jnc .Lclear_avx xorl %ecx,%ecx .byte 0x0f,0x01,0xd0 - andl $6,%eax cmpl $6,%eax je .Ldone .Lclear_avx: movl $4026525695,%eax andl %eax,%r9d + andl $4294967263,8(%rdi) .Ldone: shlq $32,%r9 movl %r10d,%eax @@ -236,3 +242,18 @@ OPENSSL_ia32_rdrand: cmoveq %rcx,%rax .byte 0xf3,0xc3 .size OPENSSL_ia32_rdrand,.-OPENSSL_ia32_rdrand + +.globl OPENSSL_ia32_rdseed +.type OPENSSL_ia32_rdseed,@function +.align 16 +OPENSSL_ia32_rdseed: + movl $8,%ecx +.Loop_rdseed: +.byte 72,15,199,248 + jc .Lbreak_rdseed + loop .Loop_rdseed +.Lbreak_rdseed: + cmpq $0,%rax + cmoveq %rcx,%rax + .byte 0xf3,0xc3 +.size OPENSSL_ia32_rdseed,.-OPENSSL_ia32_rdseed diff --git a/deps/openssl/asm/x64-macosx-gas/aes/aes-x86_64.s b/deps/openssl/asm/x64-macosx-gas/aes/aes-x86_64.s index ebe9bc1f50471d..a50170a9a201f5 100644 --- a/deps/openssl/asm/x64-macosx-gas/aes/aes-x86_64.s +++ b/deps/openssl/asm/x64-macosx-gas/aes/aes-x86_64.s @@ -1,6 +1,5 @@ .text - .p2align 4 _x86_64_AES_encrypt: xorl 0(%r15),%eax @@ -154,7 +153,6 @@ L$enc_loop: .byte 0xf3,0xc3 - .p2align 4 _x86_64_AES_encrypt_compact: leaq 128(%r14),%r8 @@ -177,80 +175,78 @@ L$enc_loop_compact: movzbl %al,%r10d movzbl %bl,%r11d movzbl %cl,%r12d - movzbl (%r14,%r10,1),%r10d - movzbl (%r14,%r11,1),%r11d - movzbl (%r14,%r12,1),%r12d - movzbl %dl,%r8d movzbl %bh,%esi movzbl %ch,%edi + shrl $16,%ecx + movzbl %dh,%ebp + movzbl (%r14,%r10,1),%r10d + movzbl (%r14,%r11,1),%r11d + movzbl (%r14,%r12,1),%r12d movzbl (%r14,%r8,1),%r8d - movzbl (%r14,%rsi,1),%r9d - movzbl (%r14,%rdi,1),%r13d - movzbl %dh,%ebp + movzbl (%r14,%rsi,1),%r9d movzbl %ah,%esi - shrl $16,%ecx + movzbl (%r14,%rdi,1),%r13d + movzbl %cl,%edi movzbl (%r14,%rbp,1),%ebp movzbl (%r14,%rsi,1),%esi - shrl $16,%edx - movzbl %cl,%edi shll $8,%r9d + shrl $16,%edx shll $8,%r13d - movzbl (%r14,%rdi,1),%edi xorl %r9d,%r10d - xorl %r13d,%r11d - - movzbl %dl,%r9d shrl $16,%eax + movzbl %dl,%r9d shrl $16,%ebx - movzbl %al,%r13d + xorl %r13d,%r11d shll $8,%ebp - shll $8,%esi - movzbl (%r14,%r9,1),%r9d - movzbl (%r14,%r13,1),%r13d + movzbl %al,%r13d + movzbl (%r14,%rdi,1),%edi xorl %ebp,%r12d - xorl %esi,%r8d + shll $8,%esi movzbl %bl,%ebp - movzbl %dh,%esi shll $16,%edi - movzbl (%r14,%rbp,1),%ebp - movzbl (%r14,%rsi,1),%esi + xorl %esi,%r8d + movzbl (%r14,%r9,1),%r9d + movzbl %dh,%esi + movzbl (%r14,%r13,1),%r13d xorl %edi,%r10d - movzbl %ah,%edi shrl $8,%ecx + movzbl %ah,%edi + shll $16,%r9d shrl $8,%ebx + shll $16,%r13d + xorl %r9d,%r11d + movzbl (%r14,%rbp,1),%ebp + movzbl (%r14,%rsi,1),%esi movzbl (%r14,%rdi,1),%edi movzbl (%r14,%rcx,1),%edx movzbl (%r14,%rbx,1),%ecx - shll $16,%r9d - shll $16,%r13d + shll $16,%ebp - xorl %r9d,%r11d xorl %r13d,%r12d - xorl %ebp,%r8d - shll $24,%esi + xorl %ebp,%r8d shll $24,%edi - shll $24,%edx xorl %esi,%r10d - shll $24,%ecx + shll $24,%edx xorl %edi,%r11d + shll $24,%ecx movl %r10d,%eax movl %r11d,%ebx xorl %r12d,%ecx xorl %r8d,%edx cmpq 16(%rsp),%r15 je L$enc_compact_done - movl %eax,%esi - movl %ebx,%edi - andl $2155905152,%esi - andl $2155905152,%edi - movl %esi,%r10d - movl %edi,%r11d + movl $2155905152,%r10d + movl $2155905152,%r11d + andl %eax,%r10d + andl %ebx,%r11d + movl %r10d,%esi + movl %r11d,%edi shrl $7,%r10d leal (%rax,%rax,1),%r8d shrl $7,%r11d @@ -268,25 +264,25 @@ L$enc_loop_compact: xorl %r8d,%eax xorl %r9d,%ebx - movl %ecx,%esi - movl %edx,%edi + movl $2155905152,%r12d roll $24,%eax + movl $2155905152,%ebp roll $24,%ebx - andl $2155905152,%esi - andl $2155905152,%edi + andl %ecx,%r12d + andl %edx,%ebp xorl %r8d,%eax xorl %r9d,%ebx - movl %esi,%r12d - movl %edi,%ebp + movl %r12d,%esi rorl $16,%r10d + movl %ebp,%edi rorl $16,%r11d - shrl $7,%r12d leal (%rcx,%rcx,1),%r8d + shrl $7,%r12d xorl %r10d,%eax - xorl %r11d,%ebx shrl $7,%ebp - leal (%rdx,%rdx,1),%r9d + xorl %r11d,%ebx rorl $8,%r10d + leal (%rdx,%rdx,1),%r9d rorl $8,%r11d subl %r12d,%esi subl %ebp,%edi @@ -302,23 +298,23 @@ L$enc_loop_compact: xorl %esi,%r8d xorl %edi,%r9d + rorl $16,%r12d xorl %r8d,%ecx + rorl $16,%ebp xorl %r9d,%edx roll $24,%ecx + movl 0(%r14),%esi roll $24,%edx xorl %r8d,%ecx - xorl %r9d,%edx - movl 0(%r14),%esi - rorl $16,%r12d - rorl $16,%ebp movl 64(%r14),%edi - xorl %r12d,%ecx - xorl %ebp,%edx + xorl %r9d,%edx movl 128(%r14),%r8d + xorl %r12d,%ecx rorl $8,%r12d + xorl %ebp,%edx rorl $8,%ebp - movl 192(%r14),%r9d xorl %r12d,%ecx + movl 192(%r14),%r9d xorl %ebp,%edx jmp L$enc_loop_compact .p2align 4 @@ -329,7 +325,6 @@ L$enc_compact_done: xorl 12(%r15),%edx .byte 0xf3,0xc3 - .globl _AES_encrypt .p2align 4 @@ -553,7 +548,6 @@ L$dec_loop: .byte 0xf3,0xc3 - .p2align 4 _x86_64_AES_decrypt_compact: leaq 128(%r14),%r8 @@ -577,70 +571,69 @@ L$dec_loop_compact: movzbl %al,%r10d movzbl %bl,%r11d movzbl %cl,%r12d - movzbl (%r14,%r10,1),%r10d - movzbl (%r14,%r11,1),%r11d - movzbl (%r14,%r12,1),%r12d - movzbl %dl,%r8d movzbl %dh,%esi movzbl %ah,%edi + shrl $16,%edx + movzbl %bh,%ebp + movzbl (%r14,%r10,1),%r10d + movzbl (%r14,%r11,1),%r11d + movzbl (%r14,%r12,1),%r12d movzbl (%r14,%r8,1),%r8d - movzbl (%r14,%rsi,1),%r9d - movzbl (%r14,%rdi,1),%r13d - movzbl %bh,%ebp + movzbl (%r14,%rsi,1),%r9d movzbl %ch,%esi - shrl $16,%ecx + movzbl (%r14,%rdi,1),%r13d movzbl (%r14,%rbp,1),%ebp movzbl (%r14,%rsi,1),%esi - shrl $16,%edx - movzbl %cl,%edi - shll $8,%r9d + shrl $16,%ecx shll $8,%r13d - movzbl (%r14,%rdi,1),%edi - xorl %r9d,%r10d - xorl %r13d,%r11d - - movzbl %dl,%r9d + shll $8,%r9d + movzbl %cl,%edi shrl $16,%eax + xorl %r9d,%r10d shrl $16,%ebx - movzbl %al,%r13d + movzbl %dl,%r9d + shll $8,%ebp + xorl %r13d,%r11d shll $8,%esi - movzbl (%r14,%r9,1),%r9d - movzbl (%r14,%r13,1),%r13d + movzbl %al,%r13d + movzbl (%r14,%rdi,1),%edi xorl %ebp,%r12d - xorl %esi,%r8d - movzbl %bl,%ebp - movzbl %bh,%esi + shll $16,%edi + xorl %esi,%r8d + movzbl (%r14,%r9,1),%r9d + movzbl %bh,%esi movzbl (%r14,%rbp,1),%ebp - movzbl (%r14,%rsi,1),%esi xorl %edi,%r10d - + movzbl (%r14,%r13,1),%r13d movzbl %ch,%edi + + shll $16,%ebp shll $16,%r9d shll $16,%r13d - movzbl (%r14,%rdi,1),%ebx + xorl %ebp,%r8d + movzbl %dh,%ebp xorl %r9d,%r11d + shrl $8,%eax xorl %r13d,%r12d - movzbl %dh,%edi - shrl $8,%eax - shll $16,%ebp - movzbl (%r14,%rdi,1),%ecx + movzbl (%r14,%rsi,1),%esi + movzbl (%r14,%rdi,1),%ebx + movzbl (%r14,%rbp,1),%ecx movzbl (%r14,%rax,1),%edx - xorl %ebp,%r8d + movl %r10d,%eax shll $24,%esi shll $24,%ebx shll $24,%ecx - xorl %esi,%r10d + xorl %esi,%eax shll $24,%edx xorl %r11d,%ebx - movl %r10d,%eax xorl %r12d,%ecx xorl %r8d,%edx cmpq 16(%rsp),%r15 @@ -653,12 +646,12 @@ L$dec_loop_compact: orq %rbx,%rax orq %rdx,%rcx movq 256+16(%r14),%rbp - movq %rax,%rbx - movq %rcx,%rdx - andq %rsi,%rbx - andq %rsi,%rdx - movq %rbx,%r9 - movq %rdx,%r12 + movq %rsi,%r9 + movq %rsi,%r12 + andq %rax,%r9 + andq %rcx,%r12 + movq %r9,%rbx + movq %r12,%rdx shrq $7,%r9 leaq (%rax,%rax,1),%r8 shrq $7,%r12 @@ -669,15 +662,15 @@ L$dec_loop_compact: andq %rdi,%r11 andq %rbp,%rbx andq %rbp,%rdx - xorq %r8,%rbx - xorq %r11,%rdx - movq %rbx,%r8 - movq %rdx,%r11 - - andq %rsi,%rbx - andq %rsi,%rdx - movq %rbx,%r10 - movq %rdx,%r13 + xorq %rbx,%r8 + xorq %rdx,%r11 + movq %rsi,%r10 + movq %rsi,%r13 + + andq %r8,%r10 + andq %r11,%r13 + movq %r10,%rbx + movq %r13,%rdx shrq $7,%r10 leaq (%r8,%r8,1),%r9 shrq $7,%r13 @@ -688,15 +681,15 @@ L$dec_loop_compact: andq %rdi,%r12 andq %rbp,%rbx andq %rbp,%rdx - xorq %r9,%rbx - xorq %r12,%rdx - movq %rbx,%r9 - movq %rdx,%r12 - - andq %rsi,%rbx - andq %rsi,%rdx - movq %rbx,%r10 - movq %rdx,%r13 + xorq %rbx,%r9 + xorq %rdx,%r12 + movq %rsi,%r10 + movq %rsi,%r13 + + andq %r9,%r10 + andq %r12,%r13 + movq %r10,%rbx + movq %r13,%rdx shrq $7,%r10 xorq %rax,%r8 shrq $7,%r13 @@ -721,51 +714,51 @@ L$dec_loop_compact: movq %rax,%rbx movq %rcx,%rdx xorq %r10,%r9 - xorq %r13,%r12 shrq $32,%rbx + xorq %r13,%r12 shrq $32,%rdx xorq %r8,%r10 - xorq %r11,%r13 roll $8,%eax + xorq %r11,%r13 roll $8,%ecx xorq %r9,%r10 + roll $8,%ebx xorq %r12,%r13 - roll $8,%ebx roll $8,%edx xorl %r10d,%eax - xorl %r13d,%ecx shrq $32,%r10 + xorl %r13d,%ecx shrq $32,%r13 xorl %r10d,%ebx xorl %r13d,%edx movq %r8,%r10 - movq %r11,%r13 - shrq $32,%r10 - shrq $32,%r13 roll $24,%r8d + movq %r11,%r13 roll $24,%r11d - roll $24,%r10d - roll $24,%r13d + shrq $32,%r10 xorl %r8d,%eax + shrq $32,%r13 xorl %r11d,%ecx + roll $24,%r10d movq %r9,%r8 + roll $24,%r13d movq %r12,%r11 + shrq $32,%r8 xorl %r10d,%ebx + shrq $32,%r11 xorl %r13d,%edx movq 0(%r14),%rsi - shrq $32,%r8 - shrq $32,%r11 - movq 64(%r14),%rdi roll $16,%r9d + movq 64(%r14),%rdi roll $16,%r12d movq 128(%r14),%rbp roll $16,%r8d - roll $16,%r11d movq 192(%r14),%r10 xorl %r9d,%eax + roll $16,%r11d xorl %r12d,%ecx movq 256(%r14),%r13 xorl %r8d,%ebx @@ -779,7 +772,6 @@ L$dec_compact_done: xorl 12(%r15),%edx .byte 0xf3,0xc3 - .globl _AES_decrypt .p2align 4 @@ -864,10 +856,6 @@ L$enc_key_prologue: call _x86_64_AES_set_encrypt_key - movq 8(%rsp),%r15 - movq 16(%rsp),%r14 - movq 24(%rsp),%r13 - movq 32(%rsp),%r12 movq 40(%rsp),%rbp movq 48(%rsp),%rbx addq $56,%rsp @@ -1114,7 +1102,6 @@ L$badpointer: L$exit: .byte 0xf3,0xc3 - .globl _private_AES_set_decrypt_key .p2align 4 @@ -1166,12 +1153,12 @@ L$permute: leaq 16(%r15),%r15 movq 0(%r15),%rax movq 8(%r15),%rcx - movq %rax,%rbx - movq %rcx,%rdx - andq %rsi,%rbx - andq %rsi,%rdx - movq %rbx,%r9 - movq %rdx,%r12 + movq %rsi,%r9 + movq %rsi,%r12 + andq %rax,%r9 + andq %rcx,%r12 + movq %r9,%rbx + movq %r12,%rdx shrq $7,%r9 leaq (%rax,%rax,1),%r8 shrq $7,%r12 @@ -1182,15 +1169,15 @@ L$permute: andq %rdi,%r11 andq %rbp,%rbx andq %rbp,%rdx - xorq %r8,%rbx - xorq %r11,%rdx - movq %rbx,%r8 - movq %rdx,%r11 - - andq %rsi,%rbx - andq %rsi,%rdx - movq %rbx,%r10 - movq %rdx,%r13 + xorq %rbx,%r8 + xorq %rdx,%r11 + movq %rsi,%r10 + movq %rsi,%r13 + + andq %r8,%r10 + andq %r11,%r13 + movq %r10,%rbx + movq %r13,%rdx shrq $7,%r10 leaq (%r8,%r8,1),%r9 shrq $7,%r13 @@ -1201,15 +1188,15 @@ L$permute: andq %rdi,%r12 andq %rbp,%rbx andq %rbp,%rdx - xorq %r9,%rbx - xorq %r12,%rdx - movq %rbx,%r9 - movq %rdx,%r12 - - andq %rsi,%rbx - andq %rsi,%rdx - movq %rbx,%r10 - movq %rdx,%r13 + xorq %rbx,%r9 + xorq %rdx,%r12 + movq %rsi,%r10 + movq %rsi,%r13 + + andq %r9,%r10 + andq %r12,%r13 + movq %r10,%rbx + movq %r13,%rdx shrq $7,%r10 xorq %rax,%r8 shrq $7,%r13 @@ -1234,51 +1221,51 @@ L$permute: movq %rax,%rbx movq %rcx,%rdx xorq %r10,%r9 - xorq %r13,%r12 shrq $32,%rbx + xorq %r13,%r12 shrq $32,%rdx xorq %r8,%r10 - xorq %r11,%r13 roll $8,%eax + xorq %r11,%r13 roll $8,%ecx xorq %r9,%r10 + roll $8,%ebx xorq %r12,%r13 - roll $8,%ebx roll $8,%edx xorl %r10d,%eax - xorl %r13d,%ecx shrq $32,%r10 + xorl %r13d,%ecx shrq $32,%r13 xorl %r10d,%ebx xorl %r13d,%edx movq %r8,%r10 - movq %r11,%r13 - shrq $32,%r10 - shrq $32,%r13 roll $24,%r8d + movq %r11,%r13 roll $24,%r11d - roll $24,%r10d - roll $24,%r13d + shrq $32,%r10 xorl %r8d,%eax + shrq $32,%r13 xorl %r11d,%ecx + roll $24,%r10d movq %r9,%r8 + roll $24,%r13d movq %r12,%r11 + shrq $32,%r8 xorl %r10d,%ebx + shrq $32,%r11 xorl %r13d,%edx - shrq $32,%r8 - shrq $32,%r11 - roll $16,%r9d + roll $16,%r12d roll $16,%r8d - roll $16,%r11d xorl %r9d,%eax + roll $16,%r11d xorl %r12d,%ecx xorl %r8d,%ebx @@ -1395,7 +1382,6 @@ L$cbc_do_ecopy: leaq 80(%rsp),%r15 movl $30,%ecx .long 0x90A548F3 - movl %eax,(%rdi) L$cbc_skip_ecopy: movq %r15,0(%rsp) @@ -1559,7 +1545,6 @@ L$cbc_fast_cleanup: xorq %rax,%rax .long 0x90AB48F3 - jmp L$cbc_exit @@ -1615,7 +1600,6 @@ L$cbc_slow_body: movl 12(%rbp),%edx jz L$cbc_slow_enc_tail - .p2align 2 L$cbc_slow_enc_loop: xorl 0(%r8),%eax @@ -1660,19 +1644,16 @@ L$cbc_slow_enc_tail: movq %r8,%rsi movq %r9,%rdi .long 0x9066A4F3 - movq $16,%rcx subq %r10,%rcx xorq %rax,%rax .long 0x9066AAF3 - movq %r9,%r8 movq $16,%r10 movq %r11,%rax movq %r12,%rcx jmp L$cbc_slow_enc_loop - .p2align 4 L$SLOW_DECRYPT: shrq $3,%rax @@ -1748,7 +1729,6 @@ L$cbc_slow_dec_partial: leaq 64(%rsp),%rsi leaq 16(%r10),%rcx .long 0x9066A4F3 - jmp L$cbc_exit .p2align 4 diff --git a/deps/openssl/asm/x64-macosx-gas/aes/aesni-mb-x86_64.s b/deps/openssl/asm/x64-macosx-gas/aes/aesni-mb-x86_64.s new file mode 100644 index 00000000000000..ccd3c7090025ff --- /dev/null +++ b/deps/openssl/asm/x64-macosx-gas/aes/aesni-mb-x86_64.s @@ -0,0 +1,1434 @@ +.text + + + +.globl _aesni_multi_cbc_encrypt + +.p2align 5 +_aesni_multi_cbc_encrypt: + cmpl $2,%edx + jb L$enc_non_avx + movl _OPENSSL_ia32cap_P+4(%rip),%ecx + testl $268435456,%ecx + jnz _avx_cbc_enc_shortcut + jmp L$enc_non_avx +.p2align 4 +L$enc_non_avx: + movq %rsp,%rax + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + + + + + + + subq $48,%rsp + andq $-64,%rsp + movq %rax,16(%rsp) + +L$enc4x_body: + movdqu (%rsi),%xmm12 + leaq 120(%rsi),%rsi + leaq 80(%rdi),%rdi + +L$enc4x_loop_grande: + movl %edx,24(%rsp) + xorl %edx,%edx + movl -64(%rdi),%ecx + movq -80(%rdi),%r8 + cmpl %edx,%ecx + movq -72(%rdi),%r12 + cmovgl %ecx,%edx + testl %ecx,%ecx + movdqu -56(%rdi),%xmm2 + movl %ecx,32(%rsp) + cmovleq %rsp,%r8 + movl -24(%rdi),%ecx + movq -40(%rdi),%r9 + cmpl %edx,%ecx + movq -32(%rdi),%r13 + cmovgl %ecx,%edx + testl %ecx,%ecx + movdqu -16(%rdi),%xmm3 + movl %ecx,36(%rsp) + cmovleq %rsp,%r9 + movl 16(%rdi),%ecx + movq 0(%rdi),%r10 + cmpl %edx,%ecx + movq 8(%rdi),%r14 + cmovgl %ecx,%edx + testl %ecx,%ecx + movdqu 24(%rdi),%xmm4 + movl %ecx,40(%rsp) + cmovleq %rsp,%r10 + movl 56(%rdi),%ecx + movq 40(%rdi),%r11 + cmpl %edx,%ecx + movq 48(%rdi),%r15 + cmovgl %ecx,%edx + testl %ecx,%ecx + movdqu 64(%rdi),%xmm5 + movl %ecx,44(%rsp) + cmovleq %rsp,%r11 + testl %edx,%edx + jz L$enc4x_done + + movups 16-120(%rsi),%xmm1 + pxor %xmm12,%xmm2 + movups 32-120(%rsi),%xmm0 + pxor %xmm12,%xmm3 + movl 240-120(%rsi),%eax + pxor %xmm12,%xmm4 + movdqu (%r8),%xmm6 + pxor %xmm12,%xmm5 + movdqu (%r9),%xmm7 + pxor %xmm6,%xmm2 + movdqu (%r10),%xmm8 + pxor %xmm7,%xmm3 + movdqu (%r11),%xmm9 + pxor %xmm8,%xmm4 + pxor %xmm9,%xmm5 + movdqa 32(%rsp),%xmm10 + xorq %rbx,%rbx + jmp L$oop_enc4x + +.p2align 5 +L$oop_enc4x: + addq $16,%rbx + leaq 16(%rsp),%rbp + movl $1,%ecx + subq %rbx,%rbp + +.byte 102,15,56,220,209 + prefetcht0 31(%r8,%rbx,1) + prefetcht0 31(%r9,%rbx,1) +.byte 102,15,56,220,217 + prefetcht0 31(%r10,%rbx,1) + prefetcht0 31(%r10,%rbx,1) +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 + movups 48-120(%rsi),%xmm1 + cmpl 32(%rsp),%ecx +.byte 102,15,56,220,208 +.byte 102,15,56,220,216 +.byte 102,15,56,220,224 + cmovgeq %rbp,%r8 + cmovgq %rbp,%r12 +.byte 102,15,56,220,232 + movups -56(%rsi),%xmm0 + cmpl 36(%rsp),%ecx +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 +.byte 102,15,56,220,225 + cmovgeq %rbp,%r9 + cmovgq %rbp,%r13 +.byte 102,15,56,220,233 + movups -40(%rsi),%xmm1 + cmpl 40(%rsp),%ecx +.byte 102,15,56,220,208 +.byte 102,15,56,220,216 +.byte 102,15,56,220,224 + cmovgeq %rbp,%r10 + cmovgq %rbp,%r14 +.byte 102,15,56,220,232 + movups -24(%rsi),%xmm0 + cmpl 44(%rsp),%ecx +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 +.byte 102,15,56,220,225 + cmovgeq %rbp,%r11 + cmovgq %rbp,%r15 +.byte 102,15,56,220,233 + movups -8(%rsi),%xmm1 + movdqa %xmm10,%xmm11 +.byte 102,15,56,220,208 + prefetcht0 15(%r12,%rbx,1) + prefetcht0 15(%r13,%rbx,1) +.byte 102,15,56,220,216 + prefetcht0 15(%r14,%rbx,1) + prefetcht0 15(%r15,%rbx,1) +.byte 102,15,56,220,224 +.byte 102,15,56,220,232 + movups 128-120(%rsi),%xmm0 + pxor %xmm12,%xmm12 + +.byte 102,15,56,220,209 + pcmpgtd %xmm12,%xmm11 + movdqu -120(%rsi),%xmm12 +.byte 102,15,56,220,217 + paddd %xmm11,%xmm10 + movdqa %xmm10,32(%rsp) +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 + movups 144-120(%rsi),%xmm1 + + cmpl $11,%eax + +.byte 102,15,56,220,208 +.byte 102,15,56,220,216 +.byte 102,15,56,220,224 +.byte 102,15,56,220,232 + movups 160-120(%rsi),%xmm0 + + jb L$enc4x_tail + +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 + movups 176-120(%rsi),%xmm1 + +.byte 102,15,56,220,208 +.byte 102,15,56,220,216 +.byte 102,15,56,220,224 +.byte 102,15,56,220,232 + movups 192-120(%rsi),%xmm0 + + je L$enc4x_tail + +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 + movups 208-120(%rsi),%xmm1 + +.byte 102,15,56,220,208 +.byte 102,15,56,220,216 +.byte 102,15,56,220,224 +.byte 102,15,56,220,232 + movups 224-120(%rsi),%xmm0 + jmp L$enc4x_tail + +.p2align 5 +L$enc4x_tail: +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 + movdqu (%r8,%rbx,1),%xmm6 + movdqu 16-120(%rsi),%xmm1 + +.byte 102,15,56,221,208 + movdqu (%r9,%rbx,1),%xmm7 + pxor %xmm12,%xmm6 +.byte 102,15,56,221,216 + movdqu (%r10,%rbx,1),%xmm8 + pxor %xmm12,%xmm7 +.byte 102,15,56,221,224 + movdqu (%r11,%rbx,1),%xmm9 + pxor %xmm12,%xmm8 +.byte 102,15,56,221,232 + movdqu 32-120(%rsi),%xmm0 + pxor %xmm12,%xmm9 + + movups %xmm2,-16(%r12,%rbx,1) + pxor %xmm6,%xmm2 + movups %xmm3,-16(%r13,%rbx,1) + pxor %xmm7,%xmm3 + movups %xmm4,-16(%r14,%rbx,1) + pxor %xmm8,%xmm4 + movups %xmm5,-16(%r15,%rbx,1) + pxor %xmm9,%xmm5 + + decl %edx + jnz L$oop_enc4x + + movq 16(%rsp),%rax + movl 24(%rsp),%edx + + + + + + + + + + + leaq 160(%rdi),%rdi + decl %edx + jnz L$enc4x_loop_grande + +L$enc4x_done: + movq -48(%rax),%r15 + movq -40(%rax),%r14 + movq -32(%rax),%r13 + movq -24(%rax),%r12 + movq -16(%rax),%rbp + movq -8(%rax),%rbx + leaq (%rax),%rsp +L$enc4x_epilogue: + .byte 0xf3,0xc3 + + +.globl _aesni_multi_cbc_decrypt + +.p2align 5 +_aesni_multi_cbc_decrypt: + cmpl $2,%edx + jb L$dec_non_avx + movl _OPENSSL_ia32cap_P+4(%rip),%ecx + testl $268435456,%ecx + jnz _avx_cbc_dec_shortcut + jmp L$dec_non_avx +.p2align 4 +L$dec_non_avx: + movq %rsp,%rax + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + + + + + + + subq $48,%rsp + andq $-64,%rsp + movq %rax,16(%rsp) + +L$dec4x_body: + movdqu (%rsi),%xmm12 + leaq 120(%rsi),%rsi + leaq 80(%rdi),%rdi + +L$dec4x_loop_grande: + movl %edx,24(%rsp) + xorl %edx,%edx + movl -64(%rdi),%ecx + movq -80(%rdi),%r8 + cmpl %edx,%ecx + movq -72(%rdi),%r12 + cmovgl %ecx,%edx + testl %ecx,%ecx + movdqu -56(%rdi),%xmm6 + movl %ecx,32(%rsp) + cmovleq %rsp,%r8 + movl -24(%rdi),%ecx + movq -40(%rdi),%r9 + cmpl %edx,%ecx + movq -32(%rdi),%r13 + cmovgl %ecx,%edx + testl %ecx,%ecx + movdqu -16(%rdi),%xmm7 + movl %ecx,36(%rsp) + cmovleq %rsp,%r9 + movl 16(%rdi),%ecx + movq 0(%rdi),%r10 + cmpl %edx,%ecx + movq 8(%rdi),%r14 + cmovgl %ecx,%edx + testl %ecx,%ecx + movdqu 24(%rdi),%xmm8 + movl %ecx,40(%rsp) + cmovleq %rsp,%r10 + movl 56(%rdi),%ecx + movq 40(%rdi),%r11 + cmpl %edx,%ecx + movq 48(%rdi),%r15 + cmovgl %ecx,%edx + testl %ecx,%ecx + movdqu 64(%rdi),%xmm9 + movl %ecx,44(%rsp) + cmovleq %rsp,%r11 + testl %edx,%edx + jz L$dec4x_done + + movups 16-120(%rsi),%xmm1 + movups 32-120(%rsi),%xmm0 + movl 240-120(%rsi),%eax + movdqu (%r8),%xmm2 + movdqu (%r9),%xmm3 + pxor %xmm12,%xmm2 + movdqu (%r10),%xmm4 + pxor %xmm12,%xmm3 + movdqu (%r11),%xmm5 + pxor %xmm12,%xmm4 + pxor %xmm12,%xmm5 + movdqa 32(%rsp),%xmm10 + xorq %rbx,%rbx + jmp L$oop_dec4x + +.p2align 5 +L$oop_dec4x: + addq $16,%rbx + leaq 16(%rsp),%rbp + movl $1,%ecx + subq %rbx,%rbp + +.byte 102,15,56,222,209 + prefetcht0 31(%r8,%rbx,1) + prefetcht0 31(%r9,%rbx,1) +.byte 102,15,56,222,217 + prefetcht0 31(%r10,%rbx,1) + prefetcht0 31(%r11,%rbx,1) +.byte 102,15,56,222,225 +.byte 102,15,56,222,233 + movups 48-120(%rsi),%xmm1 + cmpl 32(%rsp),%ecx +.byte 102,15,56,222,208 +.byte 102,15,56,222,216 +.byte 102,15,56,222,224 + cmovgeq %rbp,%r8 + cmovgq %rbp,%r12 +.byte 102,15,56,222,232 + movups -56(%rsi),%xmm0 + cmpl 36(%rsp),%ecx +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 +.byte 102,15,56,222,225 + cmovgeq %rbp,%r9 + cmovgq %rbp,%r13 +.byte 102,15,56,222,233 + movups -40(%rsi),%xmm1 + cmpl 40(%rsp),%ecx +.byte 102,15,56,222,208 +.byte 102,15,56,222,216 +.byte 102,15,56,222,224 + cmovgeq %rbp,%r10 + cmovgq %rbp,%r14 +.byte 102,15,56,222,232 + movups -24(%rsi),%xmm0 + cmpl 44(%rsp),%ecx +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 +.byte 102,15,56,222,225 + cmovgeq %rbp,%r11 + cmovgq %rbp,%r15 +.byte 102,15,56,222,233 + movups -8(%rsi),%xmm1 + movdqa %xmm10,%xmm11 +.byte 102,15,56,222,208 + prefetcht0 15(%r12,%rbx,1) + prefetcht0 15(%r13,%rbx,1) +.byte 102,15,56,222,216 + prefetcht0 15(%r14,%rbx,1) + prefetcht0 15(%r15,%rbx,1) +.byte 102,15,56,222,224 +.byte 102,15,56,222,232 + movups 128-120(%rsi),%xmm0 + pxor %xmm12,%xmm12 + +.byte 102,15,56,222,209 + pcmpgtd %xmm12,%xmm11 + movdqu -120(%rsi),%xmm12 +.byte 102,15,56,222,217 + paddd %xmm11,%xmm10 + movdqa %xmm10,32(%rsp) +.byte 102,15,56,222,225 +.byte 102,15,56,222,233 + movups 144-120(%rsi),%xmm1 + + cmpl $11,%eax + +.byte 102,15,56,222,208 +.byte 102,15,56,222,216 +.byte 102,15,56,222,224 +.byte 102,15,56,222,232 + movups 160-120(%rsi),%xmm0 + + jb L$dec4x_tail + +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 +.byte 102,15,56,222,225 +.byte 102,15,56,222,233 + movups 176-120(%rsi),%xmm1 + +.byte 102,15,56,222,208 +.byte 102,15,56,222,216 +.byte 102,15,56,222,224 +.byte 102,15,56,222,232 + movups 192-120(%rsi),%xmm0 + + je L$dec4x_tail + +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 +.byte 102,15,56,222,225 +.byte 102,15,56,222,233 + movups 208-120(%rsi),%xmm1 + +.byte 102,15,56,222,208 +.byte 102,15,56,222,216 +.byte 102,15,56,222,224 +.byte 102,15,56,222,232 + movups 224-120(%rsi),%xmm0 + jmp L$dec4x_tail + +.p2align 5 +L$dec4x_tail: +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 +.byte 102,15,56,222,225 + pxor %xmm0,%xmm6 + pxor %xmm0,%xmm7 +.byte 102,15,56,222,233 + movdqu 16-120(%rsi),%xmm1 + pxor %xmm0,%xmm8 + pxor %xmm0,%xmm9 + movdqu 32-120(%rsi),%xmm0 + +.byte 102,15,56,223,214 +.byte 102,15,56,223,223 + movdqu -16(%r8,%rbx,1),%xmm6 + movdqu -16(%r9,%rbx,1),%xmm7 +.byte 102,65,15,56,223,224 +.byte 102,65,15,56,223,233 + movdqu -16(%r10,%rbx,1),%xmm8 + movdqu -16(%r11,%rbx,1),%xmm9 + + movups %xmm2,-16(%r12,%rbx,1) + movdqu (%r8,%rbx,1),%xmm2 + movups %xmm3,-16(%r13,%rbx,1) + movdqu (%r9,%rbx,1),%xmm3 + pxor %xmm12,%xmm2 + movups %xmm4,-16(%r14,%rbx,1) + movdqu (%r10,%rbx,1),%xmm4 + pxor %xmm12,%xmm3 + movups %xmm5,-16(%r15,%rbx,1) + movdqu (%r11,%rbx,1),%xmm5 + pxor %xmm12,%xmm4 + pxor %xmm12,%xmm5 + + decl %edx + jnz L$oop_dec4x + + movq 16(%rsp),%rax + movl 24(%rsp),%edx + + leaq 160(%rdi),%rdi + decl %edx + jnz L$dec4x_loop_grande + +L$dec4x_done: + movq -48(%rax),%r15 + movq -40(%rax),%r14 + movq -32(%rax),%r13 + movq -24(%rax),%r12 + movq -16(%rax),%rbp + movq -8(%rax),%rbx + leaq (%rax),%rsp +L$dec4x_epilogue: + .byte 0xf3,0xc3 + + +.p2align 5 +aesni_multi_cbc_encrypt_avx: +_avx_cbc_enc_shortcut: + movq %rsp,%rax + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + + + + + + + + + subq $192,%rsp + andq $-128,%rsp + movq %rax,16(%rsp) + +L$enc8x_body: + vzeroupper + vmovdqu (%rsi),%xmm15 + leaq 120(%rsi),%rsi + leaq 160(%rdi),%rdi + shrl $1,%edx + +L$enc8x_loop_grande: + + xorl %edx,%edx + movl -144(%rdi),%ecx + movq -160(%rdi),%r8 + cmpl %edx,%ecx + movq -152(%rdi),%rbx + cmovgl %ecx,%edx + testl %ecx,%ecx + vmovdqu -136(%rdi),%xmm2 + movl %ecx,32(%rsp) + cmovleq %rsp,%r8 + subq %r8,%rbx + movq %rbx,64(%rsp) + movl -104(%rdi),%ecx + movq -120(%rdi),%r9 + cmpl %edx,%ecx + movq -112(%rdi),%rbp + cmovgl %ecx,%edx + testl %ecx,%ecx + vmovdqu -96(%rdi),%xmm3 + movl %ecx,36(%rsp) + cmovleq %rsp,%r9 + subq %r9,%rbp + movq %rbp,72(%rsp) + movl -64(%rdi),%ecx + movq -80(%rdi),%r10 + cmpl %edx,%ecx + movq -72(%rdi),%rbp + cmovgl %ecx,%edx + testl %ecx,%ecx + vmovdqu -56(%rdi),%xmm4 + movl %ecx,40(%rsp) + cmovleq %rsp,%r10 + subq %r10,%rbp + movq %rbp,80(%rsp) + movl -24(%rdi),%ecx + movq -40(%rdi),%r11 + cmpl %edx,%ecx + movq -32(%rdi),%rbp + cmovgl %ecx,%edx + testl %ecx,%ecx + vmovdqu -16(%rdi),%xmm5 + movl %ecx,44(%rsp) + cmovleq %rsp,%r11 + subq %r11,%rbp + movq %rbp,88(%rsp) + movl 16(%rdi),%ecx + movq 0(%rdi),%r12 + cmpl %edx,%ecx + movq 8(%rdi),%rbp + cmovgl %ecx,%edx + testl %ecx,%ecx + vmovdqu 24(%rdi),%xmm6 + movl %ecx,48(%rsp) + cmovleq %rsp,%r12 + subq %r12,%rbp + movq %rbp,96(%rsp) + movl 56(%rdi),%ecx + movq 40(%rdi),%r13 + cmpl %edx,%ecx + movq 48(%rdi),%rbp + cmovgl %ecx,%edx + testl %ecx,%ecx + vmovdqu 64(%rdi),%xmm7 + movl %ecx,52(%rsp) + cmovleq %rsp,%r13 + subq %r13,%rbp + movq %rbp,104(%rsp) + movl 96(%rdi),%ecx + movq 80(%rdi),%r14 + cmpl %edx,%ecx + movq 88(%rdi),%rbp + cmovgl %ecx,%edx + testl %ecx,%ecx + vmovdqu 104(%rdi),%xmm8 + movl %ecx,56(%rsp) + cmovleq %rsp,%r14 + subq %r14,%rbp + movq %rbp,112(%rsp) + movl 136(%rdi),%ecx + movq 120(%rdi),%r15 + cmpl %edx,%ecx + movq 128(%rdi),%rbp + cmovgl %ecx,%edx + testl %ecx,%ecx + vmovdqu 144(%rdi),%xmm9 + movl %ecx,60(%rsp) + cmovleq %rsp,%r15 + subq %r15,%rbp + movq %rbp,120(%rsp) + testl %edx,%edx + jz L$enc8x_done + + vmovups 16-120(%rsi),%xmm1 + vmovups 32-120(%rsi),%xmm0 + movl 240-120(%rsi),%eax + + vpxor (%r8),%xmm15,%xmm10 + leaq 128(%rsp),%rbp + vpxor (%r9),%xmm15,%xmm11 + vpxor (%r10),%xmm15,%xmm12 + vpxor (%r11),%xmm15,%xmm13 + vpxor %xmm10,%xmm2,%xmm2 + vpxor (%r12),%xmm15,%xmm10 + vpxor %xmm11,%xmm3,%xmm3 + vpxor (%r13),%xmm15,%xmm11 + vpxor %xmm12,%xmm4,%xmm4 + vpxor (%r14),%xmm15,%xmm12 + vpxor %xmm13,%xmm5,%xmm5 + vpxor (%r15),%xmm15,%xmm13 + vpxor %xmm10,%xmm6,%xmm6 + movl $1,%ecx + vpxor %xmm11,%xmm7,%xmm7 + vpxor %xmm12,%xmm8,%xmm8 + vpxor %xmm13,%xmm9,%xmm9 + jmp L$oop_enc8x + +.p2align 5 +L$oop_enc8x: + vaesenc %xmm1,%xmm2,%xmm2 + cmpl 32+0(%rsp),%ecx + vaesenc %xmm1,%xmm3,%xmm3 + prefetcht0 31(%r8) + vaesenc %xmm1,%xmm4,%xmm4 + vaesenc %xmm1,%xmm5,%xmm5 + leaq (%r8,%rbx,1),%rbx + cmovgeq %rsp,%r8 + vaesenc %xmm1,%xmm6,%xmm6 + cmovgq %rsp,%rbx + vaesenc %xmm1,%xmm7,%xmm7 + subq %r8,%rbx + vaesenc %xmm1,%xmm8,%xmm8 + vpxor 16(%r8),%xmm15,%xmm10 + movq %rbx,64+0(%rsp) + vaesenc %xmm1,%xmm9,%xmm9 + vmovups -72(%rsi),%xmm1 + leaq 16(%r8,%rbx,1),%r8 + vmovdqu %xmm10,0(%rbp) + vaesenc %xmm0,%xmm2,%xmm2 + cmpl 32+4(%rsp),%ecx + movq 64+8(%rsp),%rbx + vaesenc %xmm0,%xmm3,%xmm3 + prefetcht0 31(%r9) + vaesenc %xmm0,%xmm4,%xmm4 + vaesenc %xmm0,%xmm5,%xmm5 + leaq (%r9,%rbx,1),%rbx + cmovgeq %rsp,%r9 + vaesenc %xmm0,%xmm6,%xmm6 + cmovgq %rsp,%rbx + vaesenc %xmm0,%xmm7,%xmm7 + subq %r9,%rbx + vaesenc %xmm0,%xmm8,%xmm8 + vpxor 16(%r9),%xmm15,%xmm11 + movq %rbx,64+8(%rsp) + vaesenc %xmm0,%xmm9,%xmm9 + vmovups -56(%rsi),%xmm0 + leaq 16(%r9,%rbx,1),%r9 + vmovdqu %xmm11,16(%rbp) + vaesenc %xmm1,%xmm2,%xmm2 + cmpl 32+8(%rsp),%ecx + movq 64+16(%rsp),%rbx + vaesenc %xmm1,%xmm3,%xmm3 + prefetcht0 31(%r10) + vaesenc %xmm1,%xmm4,%xmm4 + prefetcht0 15(%r8) + vaesenc %xmm1,%xmm5,%xmm5 + leaq (%r10,%rbx,1),%rbx + cmovgeq %rsp,%r10 + vaesenc %xmm1,%xmm6,%xmm6 + cmovgq %rsp,%rbx + vaesenc %xmm1,%xmm7,%xmm7 + subq %r10,%rbx + vaesenc %xmm1,%xmm8,%xmm8 + vpxor 16(%r10),%xmm15,%xmm12 + movq %rbx,64+16(%rsp) + vaesenc %xmm1,%xmm9,%xmm9 + vmovups -40(%rsi),%xmm1 + leaq 16(%r10,%rbx,1),%r10 + vmovdqu %xmm12,32(%rbp) + vaesenc %xmm0,%xmm2,%xmm2 + cmpl 32+12(%rsp),%ecx + movq 64+24(%rsp),%rbx + vaesenc %xmm0,%xmm3,%xmm3 + prefetcht0 31(%r11) + vaesenc %xmm0,%xmm4,%xmm4 + prefetcht0 15(%r9) + vaesenc %xmm0,%xmm5,%xmm5 + leaq (%r11,%rbx,1),%rbx + cmovgeq %rsp,%r11 + vaesenc %xmm0,%xmm6,%xmm6 + cmovgq %rsp,%rbx + vaesenc %xmm0,%xmm7,%xmm7 + subq %r11,%rbx + vaesenc %xmm0,%xmm8,%xmm8 + vpxor 16(%r11),%xmm15,%xmm13 + movq %rbx,64+24(%rsp) + vaesenc %xmm0,%xmm9,%xmm9 + vmovups -24(%rsi),%xmm0 + leaq 16(%r11,%rbx,1),%r11 + vmovdqu %xmm13,48(%rbp) + vaesenc %xmm1,%xmm2,%xmm2 + cmpl 32+16(%rsp),%ecx + movq 64+32(%rsp),%rbx + vaesenc %xmm1,%xmm3,%xmm3 + prefetcht0 31(%r12) + vaesenc %xmm1,%xmm4,%xmm4 + prefetcht0 15(%r10) + vaesenc %xmm1,%xmm5,%xmm5 + leaq (%r12,%rbx,1),%rbx + cmovgeq %rsp,%r12 + vaesenc %xmm1,%xmm6,%xmm6 + cmovgq %rsp,%rbx + vaesenc %xmm1,%xmm7,%xmm7 + subq %r12,%rbx + vaesenc %xmm1,%xmm8,%xmm8 + vpxor 16(%r12),%xmm15,%xmm10 + movq %rbx,64+32(%rsp) + vaesenc %xmm1,%xmm9,%xmm9 + vmovups -8(%rsi),%xmm1 + leaq 16(%r12,%rbx,1),%r12 + vaesenc %xmm0,%xmm2,%xmm2 + cmpl 32+20(%rsp),%ecx + movq 64+40(%rsp),%rbx + vaesenc %xmm0,%xmm3,%xmm3 + prefetcht0 31(%r13) + vaesenc %xmm0,%xmm4,%xmm4 + prefetcht0 15(%r11) + vaesenc %xmm0,%xmm5,%xmm5 + leaq (%rbx,%r13,1),%rbx + cmovgeq %rsp,%r13 + vaesenc %xmm0,%xmm6,%xmm6 + cmovgq %rsp,%rbx + vaesenc %xmm0,%xmm7,%xmm7 + subq %r13,%rbx + vaesenc %xmm0,%xmm8,%xmm8 + vpxor 16(%r13),%xmm15,%xmm11 + movq %rbx,64+40(%rsp) + vaesenc %xmm0,%xmm9,%xmm9 + vmovups 8(%rsi),%xmm0 + leaq 16(%r13,%rbx,1),%r13 + vaesenc %xmm1,%xmm2,%xmm2 + cmpl 32+24(%rsp),%ecx + movq 64+48(%rsp),%rbx + vaesenc %xmm1,%xmm3,%xmm3 + prefetcht0 31(%r14) + vaesenc %xmm1,%xmm4,%xmm4 + prefetcht0 15(%r12) + vaesenc %xmm1,%xmm5,%xmm5 + leaq (%r14,%rbx,1),%rbx + cmovgeq %rsp,%r14 + vaesenc %xmm1,%xmm6,%xmm6 + cmovgq %rsp,%rbx + vaesenc %xmm1,%xmm7,%xmm7 + subq %r14,%rbx + vaesenc %xmm1,%xmm8,%xmm8 + vpxor 16(%r14),%xmm15,%xmm12 + movq %rbx,64+48(%rsp) + vaesenc %xmm1,%xmm9,%xmm9 + vmovups 24(%rsi),%xmm1 + leaq 16(%r14,%rbx,1),%r14 + vaesenc %xmm0,%xmm2,%xmm2 + cmpl 32+28(%rsp),%ecx + movq 64+56(%rsp),%rbx + vaesenc %xmm0,%xmm3,%xmm3 + prefetcht0 31(%r15) + vaesenc %xmm0,%xmm4,%xmm4 + prefetcht0 15(%r13) + vaesenc %xmm0,%xmm5,%xmm5 + leaq (%r15,%rbx,1),%rbx + cmovgeq %rsp,%r15 + vaesenc %xmm0,%xmm6,%xmm6 + cmovgq %rsp,%rbx + vaesenc %xmm0,%xmm7,%xmm7 + subq %r15,%rbx + vaesenc %xmm0,%xmm8,%xmm8 + vpxor 16(%r15),%xmm15,%xmm13 + movq %rbx,64+56(%rsp) + vaesenc %xmm0,%xmm9,%xmm9 + vmovups 40(%rsi),%xmm0 + leaq 16(%r15,%rbx,1),%r15 + vmovdqu 32(%rsp),%xmm14 + prefetcht0 15(%r14) + prefetcht0 15(%r15) + cmpl $11,%eax + jb L$enc8x_tail + + vaesenc %xmm1,%xmm2,%xmm2 + vaesenc %xmm1,%xmm3,%xmm3 + vaesenc %xmm1,%xmm4,%xmm4 + vaesenc %xmm1,%xmm5,%xmm5 + vaesenc %xmm1,%xmm6,%xmm6 + vaesenc %xmm1,%xmm7,%xmm7 + vaesenc %xmm1,%xmm8,%xmm8 + vaesenc %xmm1,%xmm9,%xmm9 + vmovups 176-120(%rsi),%xmm1 + + vaesenc %xmm0,%xmm2,%xmm2 + vaesenc %xmm0,%xmm3,%xmm3 + vaesenc %xmm0,%xmm4,%xmm4 + vaesenc %xmm0,%xmm5,%xmm5 + vaesenc %xmm0,%xmm6,%xmm6 + vaesenc %xmm0,%xmm7,%xmm7 + vaesenc %xmm0,%xmm8,%xmm8 + vaesenc %xmm0,%xmm9,%xmm9 + vmovups 192-120(%rsi),%xmm0 + je L$enc8x_tail + + vaesenc %xmm1,%xmm2,%xmm2 + vaesenc %xmm1,%xmm3,%xmm3 + vaesenc %xmm1,%xmm4,%xmm4 + vaesenc %xmm1,%xmm5,%xmm5 + vaesenc %xmm1,%xmm6,%xmm6 + vaesenc %xmm1,%xmm7,%xmm7 + vaesenc %xmm1,%xmm8,%xmm8 + vaesenc %xmm1,%xmm9,%xmm9 + vmovups 208-120(%rsi),%xmm1 + + vaesenc %xmm0,%xmm2,%xmm2 + vaesenc %xmm0,%xmm3,%xmm3 + vaesenc %xmm0,%xmm4,%xmm4 + vaesenc %xmm0,%xmm5,%xmm5 + vaesenc %xmm0,%xmm6,%xmm6 + vaesenc %xmm0,%xmm7,%xmm7 + vaesenc %xmm0,%xmm8,%xmm8 + vaesenc %xmm0,%xmm9,%xmm9 + vmovups 224-120(%rsi),%xmm0 + +L$enc8x_tail: + vaesenc %xmm1,%xmm2,%xmm2 + vpxor %xmm15,%xmm15,%xmm15 + vaesenc %xmm1,%xmm3,%xmm3 + vaesenc %xmm1,%xmm4,%xmm4 + vpcmpgtd %xmm15,%xmm14,%xmm15 + vaesenc %xmm1,%xmm5,%xmm5 + vaesenc %xmm1,%xmm6,%xmm6 + vpaddd %xmm14,%xmm15,%xmm15 + vmovdqu 48(%rsp),%xmm14 + vaesenc %xmm1,%xmm7,%xmm7 + movq 64(%rsp),%rbx + vaesenc %xmm1,%xmm8,%xmm8 + vaesenc %xmm1,%xmm9,%xmm9 + vmovups 16-120(%rsi),%xmm1 + + vaesenclast %xmm0,%xmm2,%xmm2 + vmovdqa %xmm15,32(%rsp) + vpxor %xmm15,%xmm15,%xmm15 + vaesenclast %xmm0,%xmm3,%xmm3 + vaesenclast %xmm0,%xmm4,%xmm4 + vpcmpgtd %xmm15,%xmm14,%xmm15 + vaesenclast %xmm0,%xmm5,%xmm5 + vaesenclast %xmm0,%xmm6,%xmm6 + vpaddd %xmm15,%xmm14,%xmm14 + vmovdqu -120(%rsi),%xmm15 + vaesenclast %xmm0,%xmm7,%xmm7 + vaesenclast %xmm0,%xmm8,%xmm8 + vmovdqa %xmm14,48(%rsp) + vaesenclast %xmm0,%xmm9,%xmm9 + vmovups 32-120(%rsi),%xmm0 + + vmovups %xmm2,-16(%r8) + subq %rbx,%r8 + vpxor 0(%rbp),%xmm2,%xmm2 + vmovups %xmm3,-16(%r9) + subq 72(%rsp),%r9 + vpxor 16(%rbp),%xmm3,%xmm3 + vmovups %xmm4,-16(%r10) + subq 80(%rsp),%r10 + vpxor 32(%rbp),%xmm4,%xmm4 + vmovups %xmm5,-16(%r11) + subq 88(%rsp),%r11 + vpxor 48(%rbp),%xmm5,%xmm5 + vmovups %xmm6,-16(%r12) + subq 96(%rsp),%r12 + vpxor %xmm10,%xmm6,%xmm6 + vmovups %xmm7,-16(%r13) + subq 104(%rsp),%r13 + vpxor %xmm11,%xmm7,%xmm7 + vmovups %xmm8,-16(%r14) + subq 112(%rsp),%r14 + vpxor %xmm12,%xmm8,%xmm8 + vmovups %xmm9,-16(%r15) + subq 120(%rsp),%r15 + vpxor %xmm13,%xmm9,%xmm9 + + decl %edx + jnz L$oop_enc8x + + movq 16(%rsp),%rax + + + + + +L$enc8x_done: + vzeroupper + movq -48(%rax),%r15 + movq -40(%rax),%r14 + movq -32(%rax),%r13 + movq -24(%rax),%r12 + movq -16(%rax),%rbp + movq -8(%rax),%rbx + leaq (%rax),%rsp +L$enc8x_epilogue: + .byte 0xf3,0xc3 + + + +.p2align 5 +aesni_multi_cbc_decrypt_avx: +_avx_cbc_dec_shortcut: + movq %rsp,%rax + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + + + + + + + + + + subq $256,%rsp + andq $-256,%rsp + subq $192,%rsp + movq %rax,16(%rsp) + +L$dec8x_body: + vzeroupper + vmovdqu (%rsi),%xmm15 + leaq 120(%rsi),%rsi + leaq 160(%rdi),%rdi + shrl $1,%edx + +L$dec8x_loop_grande: + + xorl %edx,%edx + movl -144(%rdi),%ecx + movq -160(%rdi),%r8 + cmpl %edx,%ecx + movq -152(%rdi),%rbx + cmovgl %ecx,%edx + testl %ecx,%ecx + vmovdqu -136(%rdi),%xmm2 + movl %ecx,32(%rsp) + cmovleq %rsp,%r8 + subq %r8,%rbx + movq %rbx,64(%rsp) + vmovdqu %xmm2,192(%rsp) + movl -104(%rdi),%ecx + movq -120(%rdi),%r9 + cmpl %edx,%ecx + movq -112(%rdi),%rbp + cmovgl %ecx,%edx + testl %ecx,%ecx + vmovdqu -96(%rdi),%xmm3 + movl %ecx,36(%rsp) + cmovleq %rsp,%r9 + subq %r9,%rbp + movq %rbp,72(%rsp) + vmovdqu %xmm3,208(%rsp) + movl -64(%rdi),%ecx + movq -80(%rdi),%r10 + cmpl %edx,%ecx + movq -72(%rdi),%rbp + cmovgl %ecx,%edx + testl %ecx,%ecx + vmovdqu -56(%rdi),%xmm4 + movl %ecx,40(%rsp) + cmovleq %rsp,%r10 + subq %r10,%rbp + movq %rbp,80(%rsp) + vmovdqu %xmm4,224(%rsp) + movl -24(%rdi),%ecx + movq -40(%rdi),%r11 + cmpl %edx,%ecx + movq -32(%rdi),%rbp + cmovgl %ecx,%edx + testl %ecx,%ecx + vmovdqu -16(%rdi),%xmm5 + movl %ecx,44(%rsp) + cmovleq %rsp,%r11 + subq %r11,%rbp + movq %rbp,88(%rsp) + vmovdqu %xmm5,240(%rsp) + movl 16(%rdi),%ecx + movq 0(%rdi),%r12 + cmpl %edx,%ecx + movq 8(%rdi),%rbp + cmovgl %ecx,%edx + testl %ecx,%ecx + vmovdqu 24(%rdi),%xmm6 + movl %ecx,48(%rsp) + cmovleq %rsp,%r12 + subq %r12,%rbp + movq %rbp,96(%rsp) + vmovdqu %xmm6,256(%rsp) + movl 56(%rdi),%ecx + movq 40(%rdi),%r13 + cmpl %edx,%ecx + movq 48(%rdi),%rbp + cmovgl %ecx,%edx + testl %ecx,%ecx + vmovdqu 64(%rdi),%xmm7 + movl %ecx,52(%rsp) + cmovleq %rsp,%r13 + subq %r13,%rbp + movq %rbp,104(%rsp) + vmovdqu %xmm7,272(%rsp) + movl 96(%rdi),%ecx + movq 80(%rdi),%r14 + cmpl %edx,%ecx + movq 88(%rdi),%rbp + cmovgl %ecx,%edx + testl %ecx,%ecx + vmovdqu 104(%rdi),%xmm8 + movl %ecx,56(%rsp) + cmovleq %rsp,%r14 + subq %r14,%rbp + movq %rbp,112(%rsp) + vmovdqu %xmm8,288(%rsp) + movl 136(%rdi),%ecx + movq 120(%rdi),%r15 + cmpl %edx,%ecx + movq 128(%rdi),%rbp + cmovgl %ecx,%edx + testl %ecx,%ecx + vmovdqu 144(%rdi),%xmm9 + movl %ecx,60(%rsp) + cmovleq %rsp,%r15 + subq %r15,%rbp + movq %rbp,120(%rsp) + vmovdqu %xmm9,304(%rsp) + testl %edx,%edx + jz L$dec8x_done + + vmovups 16-120(%rsi),%xmm1 + vmovups 32-120(%rsi),%xmm0 + movl 240-120(%rsi),%eax + leaq 192+128(%rsp),%rbp + + vmovdqu (%r8),%xmm2 + vmovdqu (%r9),%xmm3 + vmovdqu (%r10),%xmm4 + vmovdqu (%r11),%xmm5 + vmovdqu (%r12),%xmm6 + vmovdqu (%r13),%xmm7 + vmovdqu (%r14),%xmm8 + vmovdqu (%r15),%xmm9 + vmovdqu %xmm2,0(%rbp) + vpxor %xmm15,%xmm2,%xmm2 + vmovdqu %xmm3,16(%rbp) + vpxor %xmm15,%xmm3,%xmm3 + vmovdqu %xmm4,32(%rbp) + vpxor %xmm15,%xmm4,%xmm4 + vmovdqu %xmm5,48(%rbp) + vpxor %xmm15,%xmm5,%xmm5 + vmovdqu %xmm6,64(%rbp) + vpxor %xmm15,%xmm6,%xmm6 + vmovdqu %xmm7,80(%rbp) + vpxor %xmm15,%xmm7,%xmm7 + vmovdqu %xmm8,96(%rbp) + vpxor %xmm15,%xmm8,%xmm8 + vmovdqu %xmm9,112(%rbp) + vpxor %xmm15,%xmm9,%xmm9 + xorq $128,%rbp + movl $1,%ecx + jmp L$oop_dec8x + +.p2align 5 +L$oop_dec8x: + vaesdec %xmm1,%xmm2,%xmm2 + cmpl 32+0(%rsp),%ecx + vaesdec %xmm1,%xmm3,%xmm3 + prefetcht0 31(%r8) + vaesdec %xmm1,%xmm4,%xmm4 + vaesdec %xmm1,%xmm5,%xmm5 + leaq (%r8,%rbx,1),%rbx + cmovgeq %rsp,%r8 + vaesdec %xmm1,%xmm6,%xmm6 + cmovgq %rsp,%rbx + vaesdec %xmm1,%xmm7,%xmm7 + subq %r8,%rbx + vaesdec %xmm1,%xmm8,%xmm8 + vmovdqu 16(%r8),%xmm10 + movq %rbx,64+0(%rsp) + vaesdec %xmm1,%xmm9,%xmm9 + vmovups -72(%rsi),%xmm1 + leaq 16(%r8,%rbx,1),%r8 + vmovdqu %xmm10,128(%rsp) + vaesdec %xmm0,%xmm2,%xmm2 + cmpl 32+4(%rsp),%ecx + movq 64+8(%rsp),%rbx + vaesdec %xmm0,%xmm3,%xmm3 + prefetcht0 31(%r9) + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + leaq (%r9,%rbx,1),%rbx + cmovgeq %rsp,%r9 + vaesdec %xmm0,%xmm6,%xmm6 + cmovgq %rsp,%rbx + vaesdec %xmm0,%xmm7,%xmm7 + subq %r9,%rbx + vaesdec %xmm0,%xmm8,%xmm8 + vmovdqu 16(%r9),%xmm11 + movq %rbx,64+8(%rsp) + vaesdec %xmm0,%xmm9,%xmm9 + vmovups -56(%rsi),%xmm0 + leaq 16(%r9,%rbx,1),%r9 + vmovdqu %xmm11,144(%rsp) + vaesdec %xmm1,%xmm2,%xmm2 + cmpl 32+8(%rsp),%ecx + movq 64+16(%rsp),%rbx + vaesdec %xmm1,%xmm3,%xmm3 + prefetcht0 31(%r10) + vaesdec %xmm1,%xmm4,%xmm4 + prefetcht0 15(%r8) + vaesdec %xmm1,%xmm5,%xmm5 + leaq (%r10,%rbx,1),%rbx + cmovgeq %rsp,%r10 + vaesdec %xmm1,%xmm6,%xmm6 + cmovgq %rsp,%rbx + vaesdec %xmm1,%xmm7,%xmm7 + subq %r10,%rbx + vaesdec %xmm1,%xmm8,%xmm8 + vmovdqu 16(%r10),%xmm12 + movq %rbx,64+16(%rsp) + vaesdec %xmm1,%xmm9,%xmm9 + vmovups -40(%rsi),%xmm1 + leaq 16(%r10,%rbx,1),%r10 + vmovdqu %xmm12,160(%rsp) + vaesdec %xmm0,%xmm2,%xmm2 + cmpl 32+12(%rsp),%ecx + movq 64+24(%rsp),%rbx + vaesdec %xmm0,%xmm3,%xmm3 + prefetcht0 31(%r11) + vaesdec %xmm0,%xmm4,%xmm4 + prefetcht0 15(%r9) + vaesdec %xmm0,%xmm5,%xmm5 + leaq (%r11,%rbx,1),%rbx + cmovgeq %rsp,%r11 + vaesdec %xmm0,%xmm6,%xmm6 + cmovgq %rsp,%rbx + vaesdec %xmm0,%xmm7,%xmm7 + subq %r11,%rbx + vaesdec %xmm0,%xmm8,%xmm8 + vmovdqu 16(%r11),%xmm13 + movq %rbx,64+24(%rsp) + vaesdec %xmm0,%xmm9,%xmm9 + vmovups -24(%rsi),%xmm0 + leaq 16(%r11,%rbx,1),%r11 + vmovdqu %xmm13,176(%rsp) + vaesdec %xmm1,%xmm2,%xmm2 + cmpl 32+16(%rsp),%ecx + movq 64+32(%rsp),%rbx + vaesdec %xmm1,%xmm3,%xmm3 + prefetcht0 31(%r12) + vaesdec %xmm1,%xmm4,%xmm4 + prefetcht0 15(%r10) + vaesdec %xmm1,%xmm5,%xmm5 + leaq (%r12,%rbx,1),%rbx + cmovgeq %rsp,%r12 + vaesdec %xmm1,%xmm6,%xmm6 + cmovgq %rsp,%rbx + vaesdec %xmm1,%xmm7,%xmm7 + subq %r12,%rbx + vaesdec %xmm1,%xmm8,%xmm8 + vmovdqu 16(%r12),%xmm10 + movq %rbx,64+32(%rsp) + vaesdec %xmm1,%xmm9,%xmm9 + vmovups -8(%rsi),%xmm1 + leaq 16(%r12,%rbx,1),%r12 + vaesdec %xmm0,%xmm2,%xmm2 + cmpl 32+20(%rsp),%ecx + movq 64+40(%rsp),%rbx + vaesdec %xmm0,%xmm3,%xmm3 + prefetcht0 31(%r13) + vaesdec %xmm0,%xmm4,%xmm4 + prefetcht0 15(%r11) + vaesdec %xmm0,%xmm5,%xmm5 + leaq (%rbx,%r13,1),%rbx + cmovgeq %rsp,%r13 + vaesdec %xmm0,%xmm6,%xmm6 + cmovgq %rsp,%rbx + vaesdec %xmm0,%xmm7,%xmm7 + subq %r13,%rbx + vaesdec %xmm0,%xmm8,%xmm8 + vmovdqu 16(%r13),%xmm11 + movq %rbx,64+40(%rsp) + vaesdec %xmm0,%xmm9,%xmm9 + vmovups 8(%rsi),%xmm0 + leaq 16(%r13,%rbx,1),%r13 + vaesdec %xmm1,%xmm2,%xmm2 + cmpl 32+24(%rsp),%ecx + movq 64+48(%rsp),%rbx + vaesdec %xmm1,%xmm3,%xmm3 + prefetcht0 31(%r14) + vaesdec %xmm1,%xmm4,%xmm4 + prefetcht0 15(%r12) + vaesdec %xmm1,%xmm5,%xmm5 + leaq (%r14,%rbx,1),%rbx + cmovgeq %rsp,%r14 + vaesdec %xmm1,%xmm6,%xmm6 + cmovgq %rsp,%rbx + vaesdec %xmm1,%xmm7,%xmm7 + subq %r14,%rbx + vaesdec %xmm1,%xmm8,%xmm8 + vmovdqu 16(%r14),%xmm12 + movq %rbx,64+48(%rsp) + vaesdec %xmm1,%xmm9,%xmm9 + vmovups 24(%rsi),%xmm1 + leaq 16(%r14,%rbx,1),%r14 + vaesdec %xmm0,%xmm2,%xmm2 + cmpl 32+28(%rsp),%ecx + movq 64+56(%rsp),%rbx + vaesdec %xmm0,%xmm3,%xmm3 + prefetcht0 31(%r15) + vaesdec %xmm0,%xmm4,%xmm4 + prefetcht0 15(%r13) + vaesdec %xmm0,%xmm5,%xmm5 + leaq (%r15,%rbx,1),%rbx + cmovgeq %rsp,%r15 + vaesdec %xmm0,%xmm6,%xmm6 + cmovgq %rsp,%rbx + vaesdec %xmm0,%xmm7,%xmm7 + subq %r15,%rbx + vaesdec %xmm0,%xmm8,%xmm8 + vmovdqu 16(%r15),%xmm13 + movq %rbx,64+56(%rsp) + vaesdec %xmm0,%xmm9,%xmm9 + vmovups 40(%rsi),%xmm0 + leaq 16(%r15,%rbx,1),%r15 + vmovdqu 32(%rsp),%xmm14 + prefetcht0 15(%r14) + prefetcht0 15(%r15) + cmpl $11,%eax + jb L$dec8x_tail + + vaesdec %xmm1,%xmm2,%xmm2 + vaesdec %xmm1,%xmm3,%xmm3 + vaesdec %xmm1,%xmm4,%xmm4 + vaesdec %xmm1,%xmm5,%xmm5 + vaesdec %xmm1,%xmm6,%xmm6 + vaesdec %xmm1,%xmm7,%xmm7 + vaesdec %xmm1,%xmm8,%xmm8 + vaesdec %xmm1,%xmm9,%xmm9 + vmovups 176-120(%rsi),%xmm1 + + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vaesdec %xmm0,%xmm7,%xmm7 + vaesdec %xmm0,%xmm8,%xmm8 + vaesdec %xmm0,%xmm9,%xmm9 + vmovups 192-120(%rsi),%xmm0 + je L$dec8x_tail + + vaesdec %xmm1,%xmm2,%xmm2 + vaesdec %xmm1,%xmm3,%xmm3 + vaesdec %xmm1,%xmm4,%xmm4 + vaesdec %xmm1,%xmm5,%xmm5 + vaesdec %xmm1,%xmm6,%xmm6 + vaesdec %xmm1,%xmm7,%xmm7 + vaesdec %xmm1,%xmm8,%xmm8 + vaesdec %xmm1,%xmm9,%xmm9 + vmovups 208-120(%rsi),%xmm1 + + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vaesdec %xmm0,%xmm7,%xmm7 + vaesdec %xmm0,%xmm8,%xmm8 + vaesdec %xmm0,%xmm9,%xmm9 + vmovups 224-120(%rsi),%xmm0 + +L$dec8x_tail: + vaesdec %xmm1,%xmm2,%xmm2 + vpxor %xmm15,%xmm15,%xmm15 + vaesdec %xmm1,%xmm3,%xmm3 + vaesdec %xmm1,%xmm4,%xmm4 + vpcmpgtd %xmm15,%xmm14,%xmm15 + vaesdec %xmm1,%xmm5,%xmm5 + vaesdec %xmm1,%xmm6,%xmm6 + vpaddd %xmm14,%xmm15,%xmm15 + vmovdqu 48(%rsp),%xmm14 + vaesdec %xmm1,%xmm7,%xmm7 + movq 64(%rsp),%rbx + vaesdec %xmm1,%xmm8,%xmm8 + vaesdec %xmm1,%xmm9,%xmm9 + vmovups 16-120(%rsi),%xmm1 + + vaesdeclast %xmm0,%xmm2,%xmm2 + vmovdqa %xmm15,32(%rsp) + vpxor %xmm15,%xmm15,%xmm15 + vaesdeclast %xmm0,%xmm3,%xmm3 + vpxor 0(%rbp),%xmm2,%xmm2 + vaesdeclast %xmm0,%xmm4,%xmm4 + vpxor 16(%rbp),%xmm3,%xmm3 + vpcmpgtd %xmm15,%xmm14,%xmm15 + vaesdeclast %xmm0,%xmm5,%xmm5 + vpxor 32(%rbp),%xmm4,%xmm4 + vaesdeclast %xmm0,%xmm6,%xmm6 + vpxor 48(%rbp),%xmm5,%xmm5 + vpaddd %xmm15,%xmm14,%xmm14 + vmovdqu -120(%rsi),%xmm15 + vaesdeclast %xmm0,%xmm7,%xmm7 + vpxor 64(%rbp),%xmm6,%xmm6 + vaesdeclast %xmm0,%xmm8,%xmm8 + vpxor 80(%rbp),%xmm7,%xmm7 + vmovdqa %xmm14,48(%rsp) + vaesdeclast %xmm0,%xmm9,%xmm9 + vpxor 96(%rbp),%xmm8,%xmm8 + vmovups 32-120(%rsi),%xmm0 + + vmovups %xmm2,-16(%r8) + subq %rbx,%r8 + vmovdqu 128+0(%rsp),%xmm2 + vpxor 112(%rbp),%xmm9,%xmm9 + vmovups %xmm3,-16(%r9) + subq 72(%rsp),%r9 + vmovdqu %xmm2,0(%rbp) + vpxor %xmm15,%xmm2,%xmm2 + vmovdqu 128+16(%rsp),%xmm3 + vmovups %xmm4,-16(%r10) + subq 80(%rsp),%r10 + vmovdqu %xmm3,16(%rbp) + vpxor %xmm15,%xmm3,%xmm3 + vmovdqu 128+32(%rsp),%xmm4 + vmovups %xmm5,-16(%r11) + subq 88(%rsp),%r11 + vmovdqu %xmm4,32(%rbp) + vpxor %xmm15,%xmm4,%xmm4 + vmovdqu 128+48(%rsp),%xmm5 + vmovups %xmm6,-16(%r12) + subq 96(%rsp),%r12 + vmovdqu %xmm5,48(%rbp) + vpxor %xmm15,%xmm5,%xmm5 + vmovdqu %xmm10,64(%rbp) + vpxor %xmm10,%xmm15,%xmm6 + vmovups %xmm7,-16(%r13) + subq 104(%rsp),%r13 + vmovdqu %xmm11,80(%rbp) + vpxor %xmm11,%xmm15,%xmm7 + vmovups %xmm8,-16(%r14) + subq 112(%rsp),%r14 + vmovdqu %xmm12,96(%rbp) + vpxor %xmm12,%xmm15,%xmm8 + vmovups %xmm9,-16(%r15) + subq 120(%rsp),%r15 + vmovdqu %xmm13,112(%rbp) + vpxor %xmm13,%xmm15,%xmm9 + + xorq $128,%rbp + decl %edx + jnz L$oop_dec8x + + movq 16(%rsp),%rax + + + + + +L$dec8x_done: + vzeroupper + movq -48(%rax),%r15 + movq -40(%rax),%r14 + movq -32(%rax),%r13 + movq -24(%rax),%r12 + movq -16(%rax),%rbp + movq -8(%rax),%rbx + leaq (%rax),%rsp +L$dec8x_epilogue: + .byte 0xf3,0xc3 diff --git a/deps/openssl/asm/x64-macosx-gas/aes/aesni-sha1-x86_64.s b/deps/openssl/asm/x64-macosx-gas/aes/aesni-sha1-x86_64.s index 5ae41e019e7264..c7606aec49a95b 100644 --- a/deps/openssl/asm/x64-macosx-gas/aes/aesni-sha1-x86_64.s +++ b/deps/openssl/asm/x64-macosx-gas/aes/aesni-sha1-x86_64.s @@ -1,19 +1,25 @@ .text - .globl _aesni_cbc_sha1_enc -.p2align 4 +.p2align 5 _aesni_cbc_sha1_enc: movl _OPENSSL_ia32cap_P+0(%rip),%r10d - movl _OPENSSL_ia32cap_P+4(%rip),%r11d + movq _OPENSSL_ia32cap_P+4(%rip),%r11 + btq $61,%r11 + jc aesni_cbc_sha1_enc_shaext + andl $268435456,%r11d + andl $1073741824,%r10d + orl %r11d,%r10d + cmpl $1342177280,%r10d + je aesni_cbc_sha1_enc_avx jmp aesni_cbc_sha1_enc_ssse3 .byte 0xf3,0xc3 -.p2align 4 +.p2align 5 aesni_cbc_sha1_enc_ssse3: movq 8(%rsp),%r10 @@ -30,12 +36,12 @@ aesni_cbc_sha1_enc_ssse3: movq %rdi,%r12 movq %rsi,%r13 movq %rdx,%r14 - movq %rcx,%r15 - movdqu (%r8),%xmm11 + leaq 112(%rcx),%r15 + movdqu (%r8),%xmm2 movq %r8,88(%rsp) shlq $6,%r14 subq %r12,%r13 - movl 240(%r15),%r8d + movl 240-112(%r15),%r8d addq %r10,%r14 leaq K_XX_XX(%rip),%r11 @@ -45,1188 +51,2464 @@ aesni_cbc_sha1_enc_ssse3: movl 12(%r9),%edx movl %ebx,%esi movl 16(%r9),%ebp + movl %ecx,%edi + xorl %edx,%edi + andl %edi,%esi - movdqa 64(%r11),%xmm6 - movdqa 0(%r11),%xmm9 - movdqu 0(%r10),%xmm0 - movdqu 16(%r10),%xmm1 - movdqu 32(%r10),%xmm2 - movdqu 48(%r10),%xmm3 -.byte 102,15,56,0,198 + movdqa 64(%r11),%xmm3 + movdqa 0(%r11),%xmm13 + movdqu 0(%r10),%xmm4 + movdqu 16(%r10),%xmm5 + movdqu 32(%r10),%xmm6 + movdqu 48(%r10),%xmm7 +.byte 102,15,56,0,227 +.byte 102,15,56,0,235 +.byte 102,15,56,0,243 addq $64,%r10 -.byte 102,15,56,0,206 -.byte 102,15,56,0,214 -.byte 102,15,56,0,222 - paddd %xmm9,%xmm0 - paddd %xmm9,%xmm1 - paddd %xmm9,%xmm2 - movdqa %xmm0,0(%rsp) - psubd %xmm9,%xmm0 - movdqa %xmm1,16(%rsp) - psubd %xmm9,%xmm1 - movdqa %xmm2,32(%rsp) - psubd %xmm9,%xmm2 - movups (%r15),%xmm13 - movups 16(%r15),%xmm14 + paddd %xmm13,%xmm4 +.byte 102,15,56,0,251 + paddd %xmm13,%xmm5 + paddd %xmm13,%xmm6 + movdqa %xmm4,0(%rsp) + psubd %xmm13,%xmm4 + movdqa %xmm5,16(%rsp) + psubd %xmm13,%xmm5 + movdqa %xmm6,32(%rsp) + psubd %xmm13,%xmm6 + movups -112(%r15),%xmm15 + movups 16-112(%r15),%xmm0 jmp L$oop_ssse3 -.p2align 4 +.p2align 5 L$oop_ssse3: - movdqa %xmm1,%xmm4 - addl 0(%rsp),%ebp - movups 0(%r12),%xmm12 - xorps %xmm13,%xmm12 - xorps %xmm12,%xmm11 -.byte 102,69,15,56,220,222 - movups 32(%r15),%xmm15 - xorl %edx,%ecx - movdqa %xmm3,%xmm8 -.byte 102,15,58,15,224,8 + rorl $2,%ebx + movups 0(%r12),%xmm14 + xorps %xmm15,%xmm14 + xorps %xmm14,%xmm2 + movups -80(%r15),%xmm1 +.byte 102,15,56,220,208 + pshufd $238,%xmm4,%xmm8 + xorl %edx,%esi + movdqa %xmm7,%xmm12 + paddd %xmm7,%xmm13 movl %eax,%edi + addl 0(%rsp),%ebp + punpcklqdq %xmm5,%xmm8 + xorl %ecx,%ebx roll $5,%eax - paddd %xmm3,%xmm9 - andl %ecx,%esi - xorl %edx,%ecx - psrldq $4,%xmm8 - xorl %edx,%esi - addl %eax,%ebp - pxor %xmm0,%xmm4 - rorl $2,%ebx addl %esi,%ebp - pxor %xmm2,%xmm8 - addl 4(%rsp),%edx - xorl %ecx,%ebx - movl %ebp,%esi - roll $5,%ebp - pxor %xmm8,%xmm4 + psrldq $4,%xmm12 andl %ebx,%edi xorl %ecx,%ebx - movdqa %xmm9,48(%rsp) - xorl %ecx,%edi -.byte 102,69,15,56,220,223 - movups 48(%r15),%xmm14 - addl %ebp,%edx - movdqa %xmm4,%xmm10 - movdqa %xmm4,%xmm8 + pxor %xmm4,%xmm8 + addl %eax,%ebp rorl $7,%eax - addl %edi,%edx - addl 8(%rsp),%ecx + pxor %xmm6,%xmm12 + xorl %ecx,%edi + movl %ebp,%esi + addl 4(%rsp),%edx + pxor %xmm12,%xmm8 xorl %ebx,%eax - pslldq $12,%xmm10 - paddd %xmm4,%xmm4 - movl %edx,%edi - roll $5,%edx + roll $5,%ebp + movdqa %xmm13,48(%rsp) + addl %edi,%edx + movups -64(%r15),%xmm0 +.byte 102,15,56,220,209 andl %eax,%esi + movdqa %xmm8,%xmm3 xorl %ebx,%eax - psrld $31,%xmm8 - xorl %ebx,%esi - addl %edx,%ecx - movdqa %xmm10,%xmm9 + addl %ebp,%edx rorl $7,%ebp - addl %esi,%ecx - psrld $30,%xmm10 - por %xmm8,%xmm4 - addl 12(%rsp),%ebx + movdqa %xmm8,%xmm12 + xorl %ebx,%esi + pslldq $12,%xmm3 + paddd %xmm8,%xmm8 + movl %edx,%edi + addl 8(%rsp),%ecx + psrld $31,%xmm12 xorl %eax,%ebp - movl %ecx,%esi - roll $5,%ecx -.byte 102,69,15,56,220,222 - movups 64(%r15),%xmm15 - pslld $2,%xmm9 - pxor %xmm10,%xmm4 + roll $5,%edx + addl %esi,%ecx + movdqa %xmm3,%xmm13 andl %ebp,%edi xorl %eax,%ebp - movdqa 0(%r11),%xmm10 - xorl %eax,%edi - addl %ecx,%ebx - pxor %xmm9,%xmm4 + psrld $30,%xmm3 + addl %edx,%ecx rorl $7,%edx - addl %edi,%ebx - movdqa %xmm2,%xmm5 - addl 16(%rsp),%eax + por %xmm12,%xmm8 + xorl %eax,%edi + movl %ecx,%esi + addl 12(%rsp),%ebx + movups -48(%r15),%xmm1 +.byte 102,15,56,220,208 + pslld $2,%xmm13 + pxor %xmm3,%xmm8 xorl %ebp,%edx - movdqa %xmm4,%xmm9 -.byte 102,15,58,15,233,8 - movl %ebx,%edi - roll $5,%ebx - paddd %xmm4,%xmm10 + movdqa 0(%r11),%xmm3 + roll $5,%ecx + addl %edi,%ebx andl %edx,%esi + pxor %xmm13,%xmm8 xorl %ebp,%edx - psrldq $4,%xmm9 - xorl %ebp,%esi - addl %ebx,%eax - pxor %xmm1,%xmm5 + addl %ecx,%ebx rorl $7,%ecx - addl %esi,%eax - pxor %xmm3,%xmm9 - addl 20(%rsp),%ebp -.byte 102,69,15,56,220,223 - movups 80(%r15),%xmm14 + pshufd $238,%xmm5,%xmm9 + xorl %ebp,%esi + movdqa %xmm8,%xmm13 + paddd %xmm8,%xmm3 + movl %ebx,%edi + addl 16(%rsp),%eax + punpcklqdq %xmm6,%xmm9 xorl %edx,%ecx - movl %eax,%esi - roll $5,%eax - pxor %xmm9,%xmm5 + roll $5,%ebx + addl %esi,%eax + psrldq $4,%xmm13 andl %ecx,%edi xorl %edx,%ecx - movdqa %xmm10,0(%rsp) - xorl %edx,%edi - addl %eax,%ebp - movdqa %xmm5,%xmm8 - movdqa %xmm5,%xmm9 + pxor %xmm5,%xmm9 + addl %ebx,%eax rorl $7,%ebx - addl %edi,%ebp - addl 24(%rsp),%edx + movups -32(%r15),%xmm0 +.byte 102,15,56,220,209 + pxor %xmm7,%xmm13 + xorl %edx,%edi + movl %eax,%esi + addl 20(%rsp),%ebp + pxor %xmm13,%xmm9 xorl %ecx,%ebx - pslldq $12,%xmm8 - paddd %xmm5,%xmm5 - movl %ebp,%edi - roll $5,%ebp + roll $5,%eax + movdqa %xmm3,0(%rsp) + addl %edi,%ebp andl %ebx,%esi + movdqa %xmm9,%xmm12 xorl %ecx,%ebx - psrld $31,%xmm9 - xorl %ecx,%esi -.byte 102,69,15,56,220,222 - movups 96(%r15),%xmm15 - addl %ebp,%edx - movdqa %xmm8,%xmm10 + addl %eax,%ebp rorl $7,%eax - addl %esi,%edx - psrld $30,%xmm8 - por %xmm9,%xmm5 - addl 28(%rsp),%ecx + movdqa %xmm9,%xmm13 + xorl %ecx,%esi + pslldq $12,%xmm12 + paddd %xmm9,%xmm9 + movl %ebp,%edi + addl 24(%rsp),%edx + psrld $31,%xmm13 xorl %ebx,%eax - movl %edx,%esi - roll $5,%edx - pslld $2,%xmm10 - pxor %xmm8,%xmm5 + roll $5,%ebp + addl %esi,%edx + movups -16(%r15),%xmm1 +.byte 102,15,56,220,208 + movdqa %xmm12,%xmm3 andl %eax,%edi xorl %ebx,%eax - movdqa 16(%r11),%xmm8 - xorl %ebx,%edi - addl %edx,%ecx - pxor %xmm10,%xmm5 + psrld $30,%xmm12 + addl %ebp,%edx rorl $7,%ebp - addl %edi,%ecx - movdqa %xmm3,%xmm6 - addl 32(%rsp),%ebx + por %xmm13,%xmm9 + xorl %ebx,%edi + movl %edx,%esi + addl 28(%rsp),%ecx + pslld $2,%xmm3 + pxor %xmm12,%xmm9 xorl %eax,%ebp - movdqa %xmm5,%xmm10 -.byte 102,15,58,15,242,8 - movl %ecx,%edi - roll $5,%ecx -.byte 102,69,15,56,220,223 - movups 112(%r15),%xmm14 - paddd %xmm5,%xmm8 + movdqa 16(%r11),%xmm12 + roll $5,%edx + addl %edi,%ecx andl %ebp,%esi + pxor %xmm3,%xmm9 xorl %eax,%ebp - psrldq $4,%xmm10 - xorl %eax,%esi - addl %ecx,%ebx - pxor %xmm2,%xmm6 + addl %edx,%ecx rorl $7,%edx - addl %esi,%ebx - pxor %xmm4,%xmm10 - addl 36(%rsp),%eax + pshufd $238,%xmm6,%xmm10 + xorl %eax,%esi + movdqa %xmm9,%xmm3 + paddd %xmm9,%xmm12 + movl %ecx,%edi + addl 32(%rsp),%ebx + movups 0(%r15),%xmm0 +.byte 102,15,56,220,209 + punpcklqdq %xmm7,%xmm10 xorl %ebp,%edx - movl %ebx,%esi - roll $5,%ebx - pxor %xmm10,%xmm6 + roll $5,%ecx + addl %esi,%ebx + psrldq $4,%xmm3 andl %edx,%edi xorl %ebp,%edx - movdqa %xmm8,16(%rsp) - xorl %ebp,%edi - addl %ebx,%eax - movdqa %xmm6,%xmm9 - movdqa %xmm6,%xmm10 + pxor %xmm6,%xmm10 + addl %ecx,%ebx rorl $7,%ecx - addl %edi,%eax - addl 40(%rsp),%ebp -.byte 102,69,15,56,220,222 - movups 128(%r15),%xmm15 + pxor %xmm8,%xmm3 + xorl %ebp,%edi + movl %ebx,%esi + addl 36(%rsp),%eax + pxor %xmm3,%xmm10 xorl %edx,%ecx - pslldq $12,%xmm9 - paddd %xmm6,%xmm6 - movl %eax,%edi - roll $5,%eax + roll $5,%ebx + movdqa %xmm12,16(%rsp) + addl %edi,%eax andl %ecx,%esi + movdqa %xmm10,%xmm13 xorl %edx,%ecx - psrld $31,%xmm10 - xorl %edx,%esi - addl %eax,%ebp - movdqa %xmm9,%xmm8 + addl %ebx,%eax rorl $7,%ebx - addl %esi,%ebp - psrld $30,%xmm9 - por %xmm10,%xmm6 - addl 44(%rsp),%edx + movups 16(%r15),%xmm1 +.byte 102,15,56,220,208 + movdqa %xmm10,%xmm3 + xorl %edx,%esi + pslldq $12,%xmm13 + paddd %xmm10,%xmm10 + movl %eax,%edi + addl 40(%rsp),%ebp + psrld $31,%xmm3 xorl %ecx,%ebx - movl %ebp,%esi - roll $5,%ebp - pslld $2,%xmm8 - pxor %xmm9,%xmm6 + roll $5,%eax + addl %esi,%ebp + movdqa %xmm13,%xmm12 andl %ebx,%edi xorl %ecx,%ebx - movdqa 16(%r11),%xmm9 - xorl %ecx,%edi -.byte 102,69,15,56,220,223 - movups 144(%r15),%xmm14 - addl %ebp,%edx - pxor %xmm8,%xmm6 + psrld $30,%xmm13 + addl %eax,%ebp rorl $7,%eax - addl %edi,%edx - movdqa %xmm4,%xmm7 - addl 48(%rsp),%ecx + por %xmm3,%xmm10 + xorl %ecx,%edi + movl %ebp,%esi + addl 44(%rsp),%edx + pslld $2,%xmm12 + pxor %xmm13,%xmm10 xorl %ebx,%eax - movdqa %xmm6,%xmm8 -.byte 102,15,58,15,251,8 - movl %edx,%edi - roll $5,%edx - paddd %xmm6,%xmm9 + movdqa 16(%r11),%xmm13 + roll $5,%ebp + addl %edi,%edx + movups 32(%r15),%xmm0 +.byte 102,15,56,220,209 andl %eax,%esi + pxor %xmm12,%xmm10 xorl %ebx,%eax - psrldq $4,%xmm8 - xorl %ebx,%esi - addl %edx,%ecx - pxor %xmm3,%xmm7 + addl %ebp,%edx rorl $7,%ebp - addl %esi,%ecx - pxor %xmm5,%xmm8 - addl 52(%rsp),%ebx + pshufd $238,%xmm7,%xmm11 + xorl %ebx,%esi + movdqa %xmm10,%xmm12 + paddd %xmm10,%xmm13 + movl %edx,%edi + addl 48(%rsp),%ecx + punpcklqdq %xmm8,%xmm11 xorl %eax,%ebp - movl %ecx,%esi - roll $5,%ecx -.byte 102,69,15,56,220,222 - movups 160(%r15),%xmm15 - pxor %xmm8,%xmm7 + roll $5,%edx + addl %esi,%ecx + psrldq $4,%xmm12 andl %ebp,%edi xorl %eax,%ebp - movdqa %xmm9,32(%rsp) - xorl %eax,%edi - addl %ecx,%ebx - movdqa %xmm7,%xmm10 - movdqa %xmm7,%xmm8 + pxor %xmm7,%xmm11 + addl %edx,%ecx rorl $7,%edx - addl %edi,%ebx - addl 56(%rsp),%eax + pxor %xmm9,%xmm12 + xorl %eax,%edi + movl %ecx,%esi + addl 52(%rsp),%ebx + movups 48(%r15),%xmm1 +.byte 102,15,56,220,208 + pxor %xmm12,%xmm11 xorl %ebp,%edx - pslldq $12,%xmm10 - paddd %xmm7,%xmm7 - movl %ebx,%edi - roll $5,%ebx + roll $5,%ecx + movdqa %xmm13,32(%rsp) + addl %edi,%ebx andl %edx,%esi + movdqa %xmm11,%xmm3 xorl %ebp,%edx - psrld $31,%xmm8 - xorl %ebp,%esi - addl %ebx,%eax - movdqa %xmm10,%xmm9 + addl %ecx,%ebx rorl $7,%ecx + movdqa %xmm11,%xmm12 + xorl %ebp,%esi + pslldq $12,%xmm3 + paddd %xmm11,%xmm11 + movl %ebx,%edi + addl 56(%rsp),%eax + psrld $31,%xmm12 + xorl %edx,%ecx + roll $5,%ebx addl %esi,%eax - psrld $30,%xmm10 - por %xmm8,%xmm7 - addl 60(%rsp),%ebp + movdqa %xmm3,%xmm13 + andl %ecx,%edi + xorl %edx,%ecx + psrld $30,%xmm3 + addl %ebx,%eax + rorl $7,%ebx cmpl $11,%r8d jb L$aesenclast1 - movups 176(%r15),%xmm14 -.byte 102,69,15,56,220,223 - movups 192(%r15),%xmm15 -.byte 102,69,15,56,220,222 + movups 64(%r15),%xmm0 +.byte 102,15,56,220,209 + movups 80(%r15),%xmm1 +.byte 102,15,56,220,208 je L$aesenclast1 - movups 208(%r15),%xmm14 -.byte 102,69,15,56,220,223 - movups 224(%r15),%xmm15 -.byte 102,69,15,56,220,222 + movups 96(%r15),%xmm0 +.byte 102,15,56,220,209 + movups 112(%r15),%xmm1 +.byte 102,15,56,220,208 L$aesenclast1: -.byte 102,69,15,56,221,223 - movups 16(%r15),%xmm14 - xorl %edx,%ecx +.byte 102,15,56,221,209 + movups 16-112(%r15),%xmm0 + por %xmm12,%xmm11 + xorl %edx,%edi movl %eax,%esi + addl 60(%rsp),%ebp + pslld $2,%xmm13 + pxor %xmm3,%xmm11 + xorl %ecx,%ebx + movdqa 16(%r11),%xmm3 roll $5,%eax - pslld $2,%xmm9 - pxor %xmm10,%xmm7 - andl %ecx,%edi - xorl %edx,%ecx - movdqa 16(%r11),%xmm10 - xorl %edx,%edi - addl %eax,%ebp - pxor %xmm9,%xmm7 - rorl $7,%ebx addl %edi,%ebp - movdqa %xmm7,%xmm9 - addl 0(%rsp),%edx - pxor %xmm4,%xmm0 -.byte 102,68,15,58,15,206,8 - xorl %ecx,%ebx - movl %ebp,%edi - roll $5,%ebp - pxor %xmm1,%xmm0 andl %ebx,%esi + pxor %xmm13,%xmm11 + pshufd $238,%xmm10,%xmm13 xorl %ecx,%ebx - movdqa %xmm10,%xmm8 - paddd %xmm7,%xmm10 - xorl %ecx,%esi - movups 16(%r12),%xmm12 - xorps %xmm13,%xmm12 - movups %xmm11,0(%r13,%r12,1) - xorps %xmm12,%xmm11 -.byte 102,69,15,56,220,222 - movups 32(%r15),%xmm15 - addl %ebp,%edx - pxor %xmm9,%xmm0 + addl %eax,%ebp rorl $7,%eax - addl %esi,%edx - addl 4(%rsp),%ecx + pxor %xmm8,%xmm4 + xorl %ecx,%esi + movl %ebp,%edi + addl 0(%rsp),%edx + punpcklqdq %xmm11,%xmm13 xorl %ebx,%eax - movdqa %xmm0,%xmm9 - movdqa %xmm10,48(%rsp) - movl %edx,%esi - roll $5,%edx + roll $5,%ebp + pxor %xmm5,%xmm4 + addl %esi,%edx + movups 16(%r12),%xmm14 + xorps %xmm15,%xmm14 + movups %xmm2,0(%r12,%r13,1) + xorps %xmm14,%xmm2 + movups -80(%r15),%xmm1 +.byte 102,15,56,220,208 andl %eax,%edi + movdqa %xmm3,%xmm12 xorl %ebx,%eax - pslld $2,%xmm0 - xorl %ebx,%edi - addl %edx,%ecx - psrld $30,%xmm9 + paddd %xmm11,%xmm3 + addl %ebp,%edx + pxor %xmm13,%xmm4 rorl $7,%ebp - addl %edi,%ecx - addl 8(%rsp),%ebx + xorl %ebx,%edi + movl %edx,%esi + addl 4(%rsp),%ecx + movdqa %xmm4,%xmm13 xorl %eax,%ebp - movl %ecx,%edi - roll $5,%ecx -.byte 102,69,15,56,220,223 - movups 48(%r15),%xmm14 - por %xmm9,%xmm0 + roll $5,%edx + movdqa %xmm3,48(%rsp) + addl %edi,%ecx andl %ebp,%esi xorl %eax,%ebp - movdqa %xmm0,%xmm10 - xorl %eax,%esi - addl %ecx,%ebx + pslld $2,%xmm4 + addl %edx,%ecx rorl $7,%edx - addl %esi,%ebx - addl 12(%rsp),%eax + psrld $30,%xmm13 + xorl %eax,%esi + movl %ecx,%edi + addl 8(%rsp),%ebx + movups -64(%r15),%xmm0 +.byte 102,15,56,220,209 + por %xmm13,%xmm4 xorl %ebp,%edx - movl %ebx,%esi - roll $5,%ebx + roll $5,%ecx + pshufd $238,%xmm11,%xmm3 + addl %esi,%ebx andl %edx,%edi xorl %ebp,%edx + addl %ecx,%ebx + addl 12(%rsp),%eax xorl %ebp,%edi - addl %ebx,%eax - rorl $7,%ecx + movl %ebx,%esi + roll $5,%ebx addl %edi,%eax - addl 16(%rsp),%ebp -.byte 102,69,15,56,220,222 - movups 64(%r15),%xmm15 - pxor %xmm5,%xmm1 -.byte 102,68,15,58,15,215,8 xorl %edx,%esi + rorl $7,%ecx + addl %ebx,%eax + pxor %xmm9,%xmm5 + addl 16(%rsp),%ebp + movups -48(%r15),%xmm1 +.byte 102,15,56,220,208 + xorl %ecx,%esi + punpcklqdq %xmm4,%xmm3 movl %eax,%edi roll $5,%eax - pxor %xmm2,%xmm1 - xorl %ecx,%esi - addl %eax,%ebp - movdqa %xmm8,%xmm9 - paddd %xmm0,%xmm8 - rorl $7,%ebx + pxor %xmm6,%xmm5 addl %esi,%ebp - pxor %xmm10,%xmm1 - addl 20(%rsp),%edx xorl %ecx,%edi + movdqa %xmm12,%xmm13 + rorl $7,%ebx + paddd %xmm4,%xmm12 + addl %eax,%ebp + pxor %xmm3,%xmm5 + addl 20(%rsp),%edx + xorl %ebx,%edi movl %ebp,%esi roll $5,%ebp - movdqa %xmm1,%xmm10 - movdqa %xmm8,0(%rsp) - xorl %ebx,%edi - addl %ebp,%edx - rorl $7,%eax + movdqa %xmm5,%xmm3 addl %edi,%edx - pslld $2,%xmm1 - addl 24(%rsp),%ecx xorl %ebx,%esi - psrld $30,%xmm10 + movdqa %xmm12,0(%rsp) + rorl $7,%eax + addl %ebp,%edx + addl 24(%rsp),%ecx + pslld $2,%xmm5 + xorl %eax,%esi movl %edx,%edi + psrld $30,%xmm3 roll $5,%edx - xorl %eax,%esi -.byte 102,69,15,56,220,223 - movups 80(%r15),%xmm14 - addl %edx,%ecx - rorl $7,%ebp addl %esi,%ecx - por %xmm10,%xmm1 - addl 28(%rsp),%ebx + movups -32(%r15),%xmm0 +.byte 102,15,56,220,209 xorl %eax,%edi - movdqa %xmm1,%xmm8 + rorl $7,%ebp + por %xmm3,%xmm5 + addl %edx,%ecx + addl 28(%rsp),%ebx + pshufd $238,%xmm4,%xmm12 + xorl %ebp,%edi movl %ecx,%esi roll $5,%ecx - xorl %ebp,%edi - addl %ecx,%ebx - rorl $7,%edx addl %edi,%ebx - addl 32(%rsp),%eax - pxor %xmm6,%xmm2 -.byte 102,68,15,58,15,192,8 xorl %ebp,%esi + rorl $7,%edx + addl %ecx,%ebx + pxor %xmm10,%xmm6 + addl 32(%rsp),%eax + xorl %edx,%esi + punpcklqdq %xmm5,%xmm12 movl %ebx,%edi roll $5,%ebx - pxor %xmm3,%xmm2 - xorl %edx,%esi - addl %ebx,%eax - movdqa 32(%r11),%xmm10 - paddd %xmm1,%xmm9 - rorl $7,%ecx + pxor %xmm7,%xmm6 addl %esi,%eax - pxor %xmm8,%xmm2 - addl 36(%rsp),%ebp -.byte 102,69,15,56,220,222 - movups 96(%r15),%xmm15 xorl %edx,%edi + movdqa 32(%r11),%xmm3 + rorl $7,%ecx + paddd %xmm5,%xmm13 + addl %ebx,%eax + pxor %xmm12,%xmm6 + addl 36(%rsp),%ebp + movups -16(%r15),%xmm1 +.byte 102,15,56,220,208 + xorl %ecx,%edi movl %eax,%esi roll $5,%eax - movdqa %xmm2,%xmm8 - movdqa %xmm9,16(%rsp) - xorl %ecx,%edi - addl %eax,%ebp - rorl $7,%ebx + movdqa %xmm6,%xmm12 addl %edi,%ebp - pslld $2,%xmm2 - addl 40(%rsp),%edx xorl %ecx,%esi - psrld $30,%xmm8 + movdqa %xmm13,16(%rsp) + rorl $7,%ebx + addl %eax,%ebp + addl 40(%rsp),%edx + pslld $2,%xmm6 + xorl %ebx,%esi movl %ebp,%edi + psrld $30,%xmm12 roll $5,%ebp - xorl %ebx,%esi - addl %ebp,%edx - rorl $7,%eax addl %esi,%edx - por %xmm8,%xmm2 - addl 44(%rsp),%ecx xorl %ebx,%edi - movdqa %xmm2,%xmm9 + rorl $7,%eax + por %xmm12,%xmm6 + addl %ebp,%edx + addl 44(%rsp),%ecx + pshufd $238,%xmm5,%xmm13 + xorl %eax,%edi movl %edx,%esi roll $5,%edx - xorl %eax,%edi -.byte 102,69,15,56,220,223 - movups 112(%r15),%xmm14 - addl %edx,%ecx - rorl $7,%ebp addl %edi,%ecx - addl 48(%rsp),%ebx - pxor %xmm7,%xmm3 -.byte 102,68,15,58,15,201,8 + movups 0(%r15),%xmm0 +.byte 102,15,56,220,209 xorl %eax,%esi + rorl $7,%ebp + addl %edx,%ecx + pxor %xmm11,%xmm7 + addl 48(%rsp),%ebx + xorl %ebp,%esi + punpcklqdq %xmm6,%xmm13 movl %ecx,%edi roll $5,%ecx - pxor %xmm4,%xmm3 - xorl %ebp,%esi - addl %ecx,%ebx - movdqa %xmm10,%xmm8 - paddd %xmm2,%xmm10 - rorl $7,%edx + pxor %xmm8,%xmm7 addl %esi,%ebx - pxor %xmm9,%xmm3 - addl 52(%rsp),%eax xorl %ebp,%edi + movdqa %xmm3,%xmm12 + rorl $7,%edx + paddd %xmm6,%xmm3 + addl %ecx,%ebx + pxor %xmm13,%xmm7 + addl 52(%rsp),%eax + xorl %edx,%edi movl %ebx,%esi roll $5,%ebx - movdqa %xmm3,%xmm9 - movdqa %xmm10,32(%rsp) - xorl %edx,%edi - addl %ebx,%eax - rorl $7,%ecx + movdqa %xmm7,%xmm13 addl %edi,%eax - pslld $2,%xmm3 - addl 56(%rsp),%ebp -.byte 102,69,15,56,220,222 - movups 128(%r15),%xmm15 xorl %edx,%esi - psrld $30,%xmm9 + movdqa %xmm3,32(%rsp) + rorl $7,%ecx + addl %ebx,%eax + addl 56(%rsp),%ebp + movups 16(%r15),%xmm1 +.byte 102,15,56,220,208 + pslld $2,%xmm7 + xorl %ecx,%esi movl %eax,%edi + psrld $30,%xmm13 roll $5,%eax - xorl %ecx,%esi - addl %eax,%ebp - rorl $7,%ebx addl %esi,%ebp - por %xmm9,%xmm3 - addl 60(%rsp),%edx xorl %ecx,%edi - movdqa %xmm3,%xmm10 + rorl $7,%ebx + por %xmm13,%xmm7 + addl %eax,%ebp + addl 60(%rsp),%edx + pshufd $238,%xmm6,%xmm3 + xorl %ebx,%edi movl %ebp,%esi roll $5,%ebp - xorl %ebx,%edi - addl %ebp,%edx - rorl $7,%eax addl %edi,%edx - addl 0(%rsp),%ecx - pxor %xmm0,%xmm4 -.byte 102,68,15,58,15,210,8 xorl %ebx,%esi + rorl $7,%eax + addl %ebp,%edx + pxor %xmm4,%xmm8 + addl 0(%rsp),%ecx + xorl %eax,%esi + punpcklqdq %xmm7,%xmm3 movl %edx,%edi roll $5,%edx - pxor %xmm5,%xmm4 - xorl %eax,%esi -.byte 102,69,15,56,220,223 - movups 144(%r15),%xmm14 - addl %edx,%ecx - movdqa %xmm8,%xmm9 - paddd %xmm3,%xmm8 - rorl $7,%ebp + pxor %xmm9,%xmm8 addl %esi,%ecx - pxor %xmm10,%xmm4 - addl 4(%rsp),%ebx + movups 32(%r15),%xmm0 +.byte 102,15,56,220,209 xorl %eax,%edi + movdqa %xmm12,%xmm13 + rorl $7,%ebp + paddd %xmm7,%xmm12 + addl %edx,%ecx + pxor %xmm3,%xmm8 + addl 4(%rsp),%ebx + xorl %ebp,%edi movl %ecx,%esi roll $5,%ecx - movdqa %xmm4,%xmm10 - movdqa %xmm8,48(%rsp) - xorl %ebp,%edi - addl %ecx,%ebx - rorl $7,%edx + movdqa %xmm8,%xmm3 addl %edi,%ebx - pslld $2,%xmm4 - addl 8(%rsp),%eax xorl %ebp,%esi - psrld $30,%xmm10 + movdqa %xmm12,48(%rsp) + rorl $7,%edx + addl %ecx,%ebx + addl 8(%rsp),%eax + pslld $2,%xmm8 + xorl %edx,%esi movl %ebx,%edi + psrld $30,%xmm3 roll $5,%ebx - xorl %edx,%esi - addl %ebx,%eax - rorl $7,%ecx addl %esi,%eax - por %xmm10,%xmm4 - addl 12(%rsp),%ebp -.byte 102,69,15,56,220,222 - movups 160(%r15),%xmm15 xorl %edx,%edi - movdqa %xmm4,%xmm8 + rorl $7,%ecx + por %xmm3,%xmm8 + addl %ebx,%eax + addl 12(%rsp),%ebp + movups 48(%r15),%xmm1 +.byte 102,15,56,220,208 + pshufd $238,%xmm7,%xmm12 + xorl %ecx,%edi movl %eax,%esi roll $5,%eax - xorl %ecx,%edi - addl %eax,%ebp - rorl $7,%ebx addl %edi,%ebp - addl 16(%rsp),%edx - pxor %xmm1,%xmm5 -.byte 102,68,15,58,15,195,8 xorl %ecx,%esi + rorl $7,%ebx + addl %eax,%ebp + pxor %xmm5,%xmm9 + addl 16(%rsp),%edx + xorl %ebx,%esi + punpcklqdq %xmm8,%xmm12 movl %ebp,%edi roll $5,%ebp - pxor %xmm6,%xmm5 - xorl %ebx,%esi - addl %ebp,%edx - movdqa %xmm9,%xmm10 - paddd %xmm4,%xmm9 - rorl $7,%eax + pxor %xmm10,%xmm9 addl %esi,%edx - pxor %xmm8,%xmm5 - addl 20(%rsp),%ecx xorl %ebx,%edi + movdqa %xmm13,%xmm3 + rorl $7,%eax + paddd %xmm8,%xmm13 + addl %ebp,%edx + pxor %xmm12,%xmm9 + addl 20(%rsp),%ecx + xorl %eax,%edi movl %edx,%esi roll $5,%edx - movdqa %xmm5,%xmm8 - movdqa %xmm9,0(%rsp) - xorl %eax,%edi + movdqa %xmm9,%xmm12 + addl %edi,%ecx cmpl $11,%r8d jb L$aesenclast2 - movups 176(%r15),%xmm14 -.byte 102,69,15,56,220,223 - movups 192(%r15),%xmm15 -.byte 102,69,15,56,220,222 + movups 64(%r15),%xmm0 +.byte 102,15,56,220,209 + movups 80(%r15),%xmm1 +.byte 102,15,56,220,208 je L$aesenclast2 - movups 208(%r15),%xmm14 -.byte 102,69,15,56,220,223 - movups 224(%r15),%xmm15 -.byte 102,69,15,56,220,222 + movups 96(%r15),%xmm0 +.byte 102,15,56,220,209 + movups 112(%r15),%xmm1 +.byte 102,15,56,220,208 L$aesenclast2: -.byte 102,69,15,56,221,223 - movups 16(%r15),%xmm14 - addl %edx,%ecx +.byte 102,15,56,221,209 + movups 16-112(%r15),%xmm0 + xorl %eax,%esi + movdqa %xmm13,0(%rsp) rorl $7,%ebp - addl %edi,%ecx - pslld $2,%xmm5 + addl %edx,%ecx addl 24(%rsp),%ebx - xorl %eax,%esi - psrld $30,%xmm8 + pslld $2,%xmm9 + xorl %ebp,%esi movl %ecx,%edi + psrld $30,%xmm12 roll $5,%ecx - xorl %ebp,%esi - addl %ecx,%ebx - rorl $7,%edx addl %esi,%ebx - por %xmm8,%xmm5 - addl 28(%rsp),%eax xorl %ebp,%edi - movdqa %xmm5,%xmm9 + rorl $7,%edx + por %xmm12,%xmm9 + addl %ecx,%ebx + addl 28(%rsp),%eax + pshufd $238,%xmm8,%xmm13 + rorl $7,%ecx movl %ebx,%esi - roll $5,%ebx xorl %edx,%edi - addl %ebx,%eax - rorl $7,%ecx + roll $5,%ebx addl %edi,%eax - movl %ecx,%edi - movups 32(%r12),%xmm12 - xorps %xmm13,%xmm12 - movups %xmm11,16(%r13,%r12,1) - xorps %xmm12,%xmm11 -.byte 102,69,15,56,220,222 - movups 32(%r15),%xmm15 - pxor %xmm2,%xmm6 -.byte 102,68,15,58,15,204,8 + xorl %ecx,%esi xorl %edx,%ecx + addl %ebx,%eax + pxor %xmm6,%xmm10 addl 32(%rsp),%ebp - andl %edx,%edi - pxor %xmm7,%xmm6 + movups 32(%r12),%xmm14 + xorps %xmm15,%xmm14 + movups %xmm2,16(%r13,%r12,1) + xorps %xmm14,%xmm2 + movups -80(%r15),%xmm1 +.byte 102,15,56,220,208 andl %ecx,%esi + xorl %edx,%ecx rorl $7,%ebx - movdqa %xmm10,%xmm8 - paddd %xmm5,%xmm10 - addl %edi,%ebp + punpcklqdq %xmm9,%xmm13 movl %eax,%edi - pxor %xmm9,%xmm6 + xorl %ecx,%esi + pxor %xmm11,%xmm10 roll $5,%eax addl %esi,%ebp - xorl %edx,%ecx - addl %eax,%ebp - movdqa %xmm6,%xmm9 - movdqa %xmm10,16(%rsp) - movl %ebx,%esi + movdqa %xmm3,%xmm12 + xorl %ebx,%edi + paddd %xmm9,%xmm3 xorl %ecx,%ebx + pxor %xmm13,%xmm10 + addl %eax,%ebp addl 36(%rsp),%edx - andl %ecx,%esi - pslld $2,%xmm6 andl %ebx,%edi + xorl %ecx,%ebx rorl $7,%eax - psrld $30,%xmm9 - addl %esi,%edx + movdqa %xmm10,%xmm13 movl %ebp,%esi + xorl %ebx,%edi + movdqa %xmm3,16(%rsp) roll $5,%ebp -.byte 102,69,15,56,220,223 - movups 48(%r15),%xmm14 addl %edi,%edx - xorl %ecx,%ebx - addl %ebp,%edx - por %xmm9,%xmm6 - movl %eax,%edi + movups -64(%r15),%xmm0 +.byte 102,15,56,220,209 + xorl %eax,%esi + pslld $2,%xmm10 xorl %ebx,%eax - movdqa %xmm6,%xmm10 + addl %ebp,%edx + psrld $30,%xmm13 addl 40(%rsp),%ecx - andl %ebx,%edi andl %eax,%esi + xorl %ebx,%eax + por %xmm13,%xmm10 rorl $7,%ebp - addl %edi,%ecx movl %edx,%edi + xorl %eax,%esi roll $5,%edx + pshufd $238,%xmm9,%xmm3 addl %esi,%ecx - xorl %ebx,%eax - addl %edx,%ecx - movl %ebp,%esi + xorl %ebp,%edi xorl %eax,%ebp + addl %edx,%ecx addl 44(%rsp),%ebx - andl %eax,%esi andl %ebp,%edi -.byte 102,69,15,56,220,222 - movups 64(%r15),%xmm15 + xorl %eax,%ebp rorl $7,%edx - addl %esi,%ebx + movups -48(%r15),%xmm1 +.byte 102,15,56,220,208 movl %ecx,%esi + xorl %ebp,%edi roll $5,%ecx addl %edi,%ebx - xorl %eax,%ebp - addl %ecx,%ebx - movl %edx,%edi - pxor %xmm3,%xmm7 -.byte 102,68,15,58,15,213,8 + xorl %edx,%esi xorl %ebp,%edx + addl %ecx,%ebx + pxor %xmm7,%xmm11 addl 48(%rsp),%eax - andl %ebp,%edi - pxor %xmm0,%xmm7 andl %edx,%esi + xorl %ebp,%edx rorl $7,%ecx - movdqa 48(%r11),%xmm9 - paddd %xmm6,%xmm8 - addl %edi,%eax + punpcklqdq %xmm10,%xmm3 movl %ebx,%edi - pxor %xmm10,%xmm7 + xorl %edx,%esi + pxor %xmm4,%xmm11 roll $5,%ebx addl %esi,%eax - xorl %ebp,%edx - addl %ebx,%eax - movdqa %xmm7,%xmm10 - movdqa %xmm8,32(%rsp) - movl %ecx,%esi -.byte 102,69,15,56,220,223 - movups 80(%r15),%xmm14 + movdqa 48(%r11),%xmm13 + xorl %ecx,%edi + paddd %xmm10,%xmm12 xorl %edx,%ecx + pxor %xmm3,%xmm11 + addl %ebx,%eax addl 52(%rsp),%ebp - andl %edx,%esi - pslld $2,%xmm7 + movups -32(%r15),%xmm0 +.byte 102,15,56,220,209 andl %ecx,%edi + xorl %edx,%ecx rorl $7,%ebx - psrld $30,%xmm10 - addl %esi,%ebp + movdqa %xmm11,%xmm3 movl %eax,%esi + xorl %ecx,%edi + movdqa %xmm12,32(%rsp) roll $5,%eax addl %edi,%ebp - xorl %edx,%ecx - addl %eax,%ebp - por %xmm10,%xmm7 - movl %ebx,%edi + xorl %ebx,%esi + pslld $2,%xmm11 xorl %ecx,%ebx - movdqa %xmm7,%xmm8 + addl %eax,%ebp + psrld $30,%xmm3 addl 56(%rsp),%edx - andl %ecx,%edi andl %ebx,%esi + xorl %ecx,%ebx + por %xmm3,%xmm11 rorl $7,%eax - addl %edi,%edx movl %ebp,%edi + xorl %ebx,%esi roll $5,%ebp -.byte 102,69,15,56,220,222 - movups 96(%r15),%xmm15 + pshufd $238,%xmm10,%xmm12 addl %esi,%edx - xorl %ecx,%ebx - addl %ebp,%edx - movl %eax,%esi + movups -16(%r15),%xmm1 +.byte 102,15,56,220,208 + xorl %eax,%edi xorl %ebx,%eax + addl %ebp,%edx addl 60(%rsp),%ecx - andl %ebx,%esi andl %eax,%edi + xorl %ebx,%eax rorl $7,%ebp - addl %esi,%ecx movl %edx,%esi + xorl %eax,%edi roll $5,%edx addl %edi,%ecx - xorl %ebx,%eax - addl %edx,%ecx - movl %ebp,%edi - pxor %xmm4,%xmm0 -.byte 102,68,15,58,15,198,8 + xorl %ebp,%esi xorl %eax,%ebp + addl %edx,%ecx + pxor %xmm8,%xmm4 addl 0(%rsp),%ebx - andl %eax,%edi - pxor %xmm1,%xmm0 andl %ebp,%esi -.byte 102,69,15,56,220,223 - movups 112(%r15),%xmm14 + xorl %eax,%ebp rorl $7,%edx - movdqa %xmm9,%xmm10 - paddd %xmm7,%xmm9 - addl %edi,%ebx + movups 0(%r15),%xmm0 +.byte 102,15,56,220,209 + punpcklqdq %xmm11,%xmm12 movl %ecx,%edi - pxor %xmm8,%xmm0 + xorl %ebp,%esi + pxor %xmm5,%xmm4 roll $5,%ecx addl %esi,%ebx - xorl %eax,%ebp - addl %ecx,%ebx - movdqa %xmm0,%xmm8 - movdqa %xmm9,48(%rsp) - movl %edx,%esi + movdqa %xmm13,%xmm3 + xorl %edx,%edi + paddd %xmm11,%xmm13 xorl %ebp,%edx + pxor %xmm12,%xmm4 + addl %ecx,%ebx addl 4(%rsp),%eax - andl %ebp,%esi - pslld $2,%xmm0 andl %edx,%edi + xorl %ebp,%edx rorl $7,%ecx - psrld $30,%xmm8 - addl %esi,%eax + movdqa %xmm4,%xmm12 movl %ebx,%esi + xorl %edx,%edi + movdqa %xmm13,48(%rsp) roll $5,%ebx addl %edi,%eax - xorl %ebp,%edx - addl %ebx,%eax - por %xmm8,%xmm0 - movl %ecx,%edi -.byte 102,69,15,56,220,222 - movups 128(%r15),%xmm15 + xorl %ecx,%esi + pslld $2,%xmm4 xorl %edx,%ecx - movdqa %xmm0,%xmm9 + addl %ebx,%eax + psrld $30,%xmm12 addl 8(%rsp),%ebp - andl %edx,%edi + movups 16(%r15),%xmm1 +.byte 102,15,56,220,208 andl %ecx,%esi + xorl %edx,%ecx + por %xmm12,%xmm4 rorl $7,%ebx - addl %edi,%ebp movl %eax,%edi + xorl %ecx,%esi roll $5,%eax + pshufd $238,%xmm11,%xmm13 addl %esi,%ebp - xorl %edx,%ecx - addl %eax,%ebp - movl %ebx,%esi + xorl %ebx,%edi xorl %ecx,%ebx + addl %eax,%ebp addl 12(%rsp),%edx - andl %ecx,%esi andl %ebx,%edi + xorl %ecx,%ebx rorl $7,%eax - addl %esi,%edx movl %ebp,%esi + xorl %ebx,%edi roll $5,%ebp -.byte 102,69,15,56,220,223 - movups 144(%r15),%xmm14 addl %edi,%edx - xorl %ecx,%ebx - addl %ebp,%edx - movl %eax,%edi - pxor %xmm5,%xmm1 -.byte 102,68,15,58,15,207,8 + movups 32(%r15),%xmm0 +.byte 102,15,56,220,209 + xorl %eax,%esi xorl %ebx,%eax + addl %ebp,%edx + pxor %xmm9,%xmm5 addl 16(%rsp),%ecx - andl %ebx,%edi - pxor %xmm2,%xmm1 andl %eax,%esi + xorl %ebx,%eax rorl $7,%ebp - movdqa %xmm10,%xmm8 - paddd %xmm0,%xmm10 - addl %edi,%ecx + punpcklqdq %xmm4,%xmm13 movl %edx,%edi - pxor %xmm9,%xmm1 + xorl %eax,%esi + pxor %xmm6,%xmm5 roll $5,%edx addl %esi,%ecx - xorl %ebx,%eax - addl %edx,%ecx - movdqa %xmm1,%xmm9 - movdqa %xmm10,0(%rsp) - movl %ebp,%esi + movdqa %xmm3,%xmm12 + xorl %ebp,%edi + paddd %xmm4,%xmm3 xorl %eax,%ebp + pxor %xmm13,%xmm5 + addl %edx,%ecx addl 20(%rsp),%ebx - andl %eax,%esi - pslld $2,%xmm1 andl %ebp,%edi -.byte 102,69,15,56,220,222 - movups 160(%r15),%xmm15 + xorl %eax,%ebp rorl $7,%edx - psrld $30,%xmm9 - addl %esi,%ebx + movups 48(%r15),%xmm1 +.byte 102,15,56,220,208 + movdqa %xmm5,%xmm13 movl %ecx,%esi + xorl %ebp,%edi + movdqa %xmm3,0(%rsp) roll $5,%ecx addl %edi,%ebx - xorl %eax,%ebp - addl %ecx,%ebx - por %xmm9,%xmm1 - movl %edx,%edi + xorl %edx,%esi + pslld $2,%xmm5 xorl %ebp,%edx - movdqa %xmm1,%xmm10 + addl %ecx,%ebx + psrld $30,%xmm13 addl 24(%rsp),%eax - andl %ebp,%edi andl %edx,%esi + xorl %ebp,%edx + por %xmm13,%xmm5 rorl $7,%ecx - addl %edi,%eax movl %ebx,%edi + xorl %edx,%esi roll $5,%ebx + pshufd $238,%xmm4,%xmm3 addl %esi,%eax - xorl %ebp,%edx + xorl %ecx,%edi + xorl %edx,%ecx addl %ebx,%eax - movl %ecx,%esi + addl 28(%rsp),%ebp cmpl $11,%r8d jb L$aesenclast3 - movups 176(%r15),%xmm14 -.byte 102,69,15,56,220,223 - movups 192(%r15),%xmm15 -.byte 102,69,15,56,220,222 + movups 64(%r15),%xmm0 +.byte 102,15,56,220,209 + movups 80(%r15),%xmm1 +.byte 102,15,56,220,208 je L$aesenclast3 - movups 208(%r15),%xmm14 -.byte 102,69,15,56,220,223 - movups 224(%r15),%xmm15 -.byte 102,69,15,56,220,222 + movups 96(%r15),%xmm0 +.byte 102,15,56,220,209 + movups 112(%r15),%xmm1 +.byte 102,15,56,220,208 L$aesenclast3: -.byte 102,69,15,56,221,223 - movups 16(%r15),%xmm14 - xorl %edx,%ecx - addl 28(%rsp),%ebp - andl %edx,%esi +.byte 102,15,56,221,209 + movups 16-112(%r15),%xmm0 andl %ecx,%edi + xorl %edx,%ecx rorl $7,%ebx - addl %esi,%ebp movl %eax,%esi + xorl %ecx,%edi roll $5,%eax addl %edi,%ebp - xorl %edx,%ecx - addl %eax,%ebp - movl %ebx,%edi - pxor %xmm6,%xmm2 -.byte 102,68,15,58,15,208,8 + xorl %ebx,%esi xorl %ecx,%ebx + addl %eax,%ebp + pxor %xmm10,%xmm6 addl 32(%rsp),%edx - andl %ecx,%edi - pxor %xmm3,%xmm2 andl %ebx,%esi + xorl %ecx,%ebx rorl $7,%eax - movdqa %xmm8,%xmm9 - paddd %xmm1,%xmm8 - addl %edi,%edx + punpcklqdq %xmm5,%xmm3 movl %ebp,%edi - pxor %xmm10,%xmm2 + xorl %ebx,%esi + pxor %xmm7,%xmm6 roll $5,%ebp - movups 48(%r12),%xmm12 - xorps %xmm13,%xmm12 - movups %xmm11,32(%r13,%r12,1) - xorps %xmm12,%xmm11 -.byte 102,69,15,56,220,222 - movups 32(%r15),%xmm15 addl %esi,%edx - xorl %ecx,%ebx - addl %ebp,%edx - movdqa %xmm2,%xmm10 - movdqa %xmm8,16(%rsp) - movl %eax,%esi + movups 48(%r12),%xmm14 + xorps %xmm15,%xmm14 + movups %xmm2,32(%r13,%r12,1) + xorps %xmm14,%xmm2 + movups -80(%r15),%xmm1 +.byte 102,15,56,220,208 + movdqa %xmm12,%xmm13 + xorl %eax,%edi + paddd %xmm5,%xmm12 xorl %ebx,%eax + pxor %xmm3,%xmm6 + addl %ebp,%edx addl 36(%rsp),%ecx - andl %ebx,%esi - pslld $2,%xmm2 andl %eax,%edi + xorl %ebx,%eax rorl $7,%ebp - psrld $30,%xmm10 - addl %esi,%ecx + movdqa %xmm6,%xmm3 movl %edx,%esi + xorl %eax,%edi + movdqa %xmm12,16(%rsp) roll $5,%edx addl %edi,%ecx - xorl %ebx,%eax - addl %edx,%ecx - por %xmm10,%xmm2 - movl %ebp,%edi + xorl %ebp,%esi + pslld $2,%xmm6 xorl %eax,%ebp - movdqa %xmm2,%xmm8 + addl %edx,%ecx + psrld $30,%xmm3 addl 40(%rsp),%ebx - andl %eax,%edi andl %ebp,%esi -.byte 102,69,15,56,220,223 - movups 48(%r15),%xmm14 + xorl %eax,%ebp + por %xmm3,%xmm6 rorl $7,%edx - addl %edi,%ebx + movups -64(%r15),%xmm0 +.byte 102,15,56,220,209 movl %ecx,%edi + xorl %ebp,%esi roll $5,%ecx + pshufd $238,%xmm5,%xmm12 addl %esi,%ebx - xorl %eax,%ebp - addl %ecx,%ebx - movl %edx,%esi + xorl %edx,%edi xorl %ebp,%edx + addl %ecx,%ebx addl 44(%rsp),%eax - andl %ebp,%esi andl %edx,%edi + xorl %ebp,%edx rorl $7,%ecx - addl %esi,%eax movl %ebx,%esi + xorl %edx,%edi roll $5,%ebx addl %edi,%eax - xorl %ebp,%edx + xorl %edx,%esi addl %ebx,%eax + pxor %xmm11,%xmm7 addl 48(%rsp),%ebp -.byte 102,69,15,56,220,222 - movups 64(%r15),%xmm15 - pxor %xmm7,%xmm3 -.byte 102,68,15,58,15,193,8 - xorl %edx,%esi + movups -48(%r15),%xmm1 +.byte 102,15,56,220,208 + xorl %ecx,%esi + punpcklqdq %xmm6,%xmm12 movl %eax,%edi roll $5,%eax - pxor %xmm4,%xmm3 - xorl %ecx,%esi - addl %eax,%ebp - movdqa %xmm9,%xmm10 - paddd %xmm2,%xmm9 - rorl $7,%ebx + pxor %xmm8,%xmm7 addl %esi,%ebp - pxor %xmm8,%xmm3 - addl 52(%rsp),%edx xorl %ecx,%edi + movdqa %xmm13,%xmm3 + rorl $7,%ebx + paddd %xmm6,%xmm13 + addl %eax,%ebp + pxor %xmm12,%xmm7 + addl 52(%rsp),%edx + xorl %ebx,%edi movl %ebp,%esi roll $5,%ebp - movdqa %xmm3,%xmm8 - movdqa %xmm9,32(%rsp) - xorl %ebx,%edi - addl %ebp,%edx - rorl $7,%eax + movdqa %xmm7,%xmm12 addl %edi,%edx - pslld $2,%xmm3 - addl 56(%rsp),%ecx xorl %ebx,%esi - psrld $30,%xmm8 + movdqa %xmm13,32(%rsp) + rorl $7,%eax + addl %ebp,%edx + addl 56(%rsp),%ecx + pslld $2,%xmm7 + xorl %eax,%esi movl %edx,%edi + psrld $30,%xmm12 roll $5,%edx - xorl %eax,%esi -.byte 102,69,15,56,220,223 - movups 80(%r15),%xmm14 - addl %edx,%ecx - rorl $7,%ebp addl %esi,%ecx - por %xmm8,%xmm3 - addl 60(%rsp),%ebx + movups -32(%r15),%xmm0 +.byte 102,15,56,220,209 xorl %eax,%edi + rorl $7,%ebp + por %xmm12,%xmm7 + addl %edx,%ecx + addl 60(%rsp),%ebx + xorl %ebp,%edi movl %ecx,%esi roll $5,%ecx - xorl %ebp,%edi - addl %ecx,%ebx - rorl $7,%edx addl %edi,%ebx - addl 0(%rsp),%eax - paddd %xmm3,%xmm10 xorl %ebp,%esi + rorl $7,%edx + addl %ecx,%ebx + addl 0(%rsp),%eax + xorl %edx,%esi movl %ebx,%edi roll $5,%ebx - xorl %edx,%esi - movdqa %xmm10,48(%rsp) - addl %ebx,%eax - rorl $7,%ecx + paddd %xmm7,%xmm3 addl %esi,%eax - addl 4(%rsp),%ebp -.byte 102,69,15,56,220,222 - movups 96(%r15),%xmm15 xorl %edx,%edi + movdqa %xmm3,48(%rsp) + rorl $7,%ecx + addl %ebx,%eax + addl 4(%rsp),%ebp + movups -16(%r15),%xmm1 +.byte 102,15,56,220,208 + xorl %ecx,%edi movl %eax,%esi roll $5,%eax - xorl %ecx,%edi + addl %edi,%ebp + xorl %ecx,%esi + rorl $7,%ebx addl %eax,%ebp + addl 8(%rsp),%edx + xorl %ebx,%esi + movl %ebp,%edi + roll $5,%ebp + addl %esi,%edx + xorl %ebx,%edi + rorl $7,%eax + addl %ebp,%edx + addl 12(%rsp),%ecx + xorl %eax,%edi + movl %edx,%esi + roll $5,%edx + addl %edi,%ecx + movups 0(%r15),%xmm0 +.byte 102,15,56,220,209 + xorl %eax,%esi + rorl $7,%ebp + addl %edx,%ecx + cmpq %r14,%r10 + je L$done_ssse3 + movdqa 64(%r11),%xmm3 + movdqa 0(%r11),%xmm13 + movdqu 0(%r10),%xmm4 + movdqu 16(%r10),%xmm5 + movdqu 32(%r10),%xmm6 + movdqu 48(%r10),%xmm7 +.byte 102,15,56,0,227 + addq $64,%r10 + addl 16(%rsp),%ebx + xorl %ebp,%esi + movl %ecx,%edi +.byte 102,15,56,0,235 + roll $5,%ecx + addl %esi,%ebx + xorl %ebp,%edi + rorl $7,%edx + paddd %xmm13,%xmm4 + addl %ecx,%ebx + addl 20(%rsp),%eax + xorl %edx,%edi + movl %ebx,%esi + movdqa %xmm4,0(%rsp) + roll $5,%ebx + addl %edi,%eax + xorl %edx,%esi + rorl $7,%ecx + psubd %xmm13,%xmm4 + addl %ebx,%eax + addl 24(%rsp),%ebp + movups 16(%r15),%xmm1 +.byte 102,15,56,220,208 + xorl %ecx,%esi + movl %eax,%edi + roll $5,%eax + addl %esi,%ebp + xorl %ecx,%edi rorl $7,%ebx + addl %eax,%ebp + addl 28(%rsp),%edx + xorl %ebx,%edi + movl %ebp,%esi + roll $5,%ebp + addl %edi,%edx + xorl %ebx,%esi + rorl $7,%eax + addl %ebp,%edx + addl 32(%rsp),%ecx + xorl %eax,%esi + movl %edx,%edi +.byte 102,15,56,0,243 + roll $5,%edx + addl %esi,%ecx + movups 32(%r15),%xmm0 +.byte 102,15,56,220,209 + xorl %eax,%edi + rorl $7,%ebp + paddd %xmm13,%xmm5 + addl %edx,%ecx + addl 36(%rsp),%ebx + xorl %ebp,%edi + movl %ecx,%esi + movdqa %xmm5,16(%rsp) + roll $5,%ecx + addl %edi,%ebx + xorl %ebp,%esi + rorl $7,%edx + psubd %xmm13,%xmm5 + addl %ecx,%ebx + addl 40(%rsp),%eax + xorl %edx,%esi + movl %ebx,%edi + roll $5,%ebx + addl %esi,%eax + xorl %edx,%edi + rorl $7,%ecx + addl %ebx,%eax + addl 44(%rsp),%ebp + movups 48(%r15),%xmm1 +.byte 102,15,56,220,208 + xorl %ecx,%edi + movl %eax,%esi + roll $5,%eax + addl %edi,%ebp + xorl %ecx,%esi + rorl $7,%ebx + addl %eax,%ebp + addl 48(%rsp),%edx + xorl %ebx,%esi + movl %ebp,%edi +.byte 102,15,56,0,251 + roll $5,%ebp + addl %esi,%edx + xorl %ebx,%edi + rorl $7,%eax + paddd %xmm13,%xmm6 + addl %ebp,%edx + addl 52(%rsp),%ecx + xorl %eax,%edi + movl %edx,%esi + movdqa %xmm6,32(%rsp) + roll $5,%edx + addl %edi,%ecx + cmpl $11,%r8d + jb L$aesenclast4 + movups 64(%r15),%xmm0 +.byte 102,15,56,220,209 + movups 80(%r15),%xmm1 +.byte 102,15,56,220,208 + je L$aesenclast4 + movups 96(%r15),%xmm0 +.byte 102,15,56,220,209 + movups 112(%r15),%xmm1 +.byte 102,15,56,220,208 +L$aesenclast4: +.byte 102,15,56,221,209 + movups 16-112(%r15),%xmm0 + xorl %eax,%esi + rorl $7,%ebp + psubd %xmm13,%xmm6 + addl %edx,%ecx + addl 56(%rsp),%ebx + xorl %ebp,%esi + movl %ecx,%edi + roll $5,%ecx + addl %esi,%ebx + xorl %ebp,%edi + rorl $7,%edx + addl %ecx,%ebx + addl 60(%rsp),%eax + xorl %edx,%edi + movl %ebx,%esi + roll $5,%ebx + addl %edi,%eax + rorl $7,%ecx + addl %ebx,%eax + movups %xmm2,48(%r13,%r12,1) + leaq 64(%r12),%r12 + + addl 0(%r9),%eax + addl 4(%r9),%esi + addl 8(%r9),%ecx + addl 12(%r9),%edx + movl %eax,0(%r9) + addl 16(%r9),%ebp + movl %esi,4(%r9) + movl %esi,%ebx + movl %ecx,8(%r9) + movl %ecx,%edi + movl %edx,12(%r9) + xorl %edx,%edi + movl %ebp,16(%r9) + andl %edi,%esi + jmp L$oop_ssse3 + +L$done_ssse3: + addl 16(%rsp),%ebx + xorl %ebp,%esi + movl %ecx,%edi + roll $5,%ecx + addl %esi,%ebx + xorl %ebp,%edi + rorl $7,%edx + addl %ecx,%ebx + addl 20(%rsp),%eax + xorl %edx,%edi + movl %ebx,%esi + roll $5,%ebx + addl %edi,%eax + xorl %edx,%esi + rorl $7,%ecx + addl %ebx,%eax + addl 24(%rsp),%ebp + movups 16(%r15),%xmm1 +.byte 102,15,56,220,208 + xorl %ecx,%esi + movl %eax,%edi + roll $5,%eax + addl %esi,%ebp + xorl %ecx,%edi + rorl $7,%ebx + addl %eax,%ebp + addl 28(%rsp),%edx + xorl %ebx,%edi + movl %ebp,%esi + roll $5,%ebp + addl %edi,%edx + xorl %ebx,%esi + rorl $7,%eax + addl %ebp,%edx + addl 32(%rsp),%ecx + xorl %eax,%esi + movl %edx,%edi + roll $5,%edx + addl %esi,%ecx + movups 32(%r15),%xmm0 +.byte 102,15,56,220,209 + xorl %eax,%edi + rorl $7,%ebp + addl %edx,%ecx + addl 36(%rsp),%ebx + xorl %ebp,%edi + movl %ecx,%esi + roll $5,%ecx + addl %edi,%ebx + xorl %ebp,%esi + rorl $7,%edx + addl %ecx,%ebx + addl 40(%rsp),%eax + xorl %edx,%esi + movl %ebx,%edi + roll $5,%ebx + addl %esi,%eax + xorl %edx,%edi + rorl $7,%ecx + addl %ebx,%eax + addl 44(%rsp),%ebp + movups 48(%r15),%xmm1 +.byte 102,15,56,220,208 + xorl %ecx,%edi + movl %eax,%esi + roll $5,%eax + addl %edi,%ebp + xorl %ecx,%esi + rorl $7,%ebx + addl %eax,%ebp + addl 48(%rsp),%edx + xorl %ebx,%esi + movl %ebp,%edi + roll $5,%ebp + addl %esi,%edx + xorl %ebx,%edi + rorl $7,%eax + addl %ebp,%edx + addl 52(%rsp),%ecx + xorl %eax,%edi + movl %edx,%esi + roll $5,%edx + addl %edi,%ecx + cmpl $11,%r8d + jb L$aesenclast5 + movups 64(%r15),%xmm0 +.byte 102,15,56,220,209 + movups 80(%r15),%xmm1 +.byte 102,15,56,220,208 + je L$aesenclast5 + movups 96(%r15),%xmm0 +.byte 102,15,56,220,209 + movups 112(%r15),%xmm1 +.byte 102,15,56,220,208 +L$aesenclast5: +.byte 102,15,56,221,209 + movups 16-112(%r15),%xmm0 + xorl %eax,%esi + rorl $7,%ebp + addl %edx,%ecx + addl 56(%rsp),%ebx + xorl %ebp,%esi + movl %ecx,%edi + roll $5,%ecx + addl %esi,%ebx + xorl %ebp,%edi + rorl $7,%edx + addl %ecx,%ebx + addl 60(%rsp),%eax + xorl %edx,%edi + movl %ebx,%esi + roll $5,%ebx + addl %edi,%eax + rorl $7,%ecx + addl %ebx,%eax + movups %xmm2,48(%r13,%r12,1) + movq 88(%rsp),%r8 + + addl 0(%r9),%eax + addl 4(%r9),%esi + addl 8(%r9),%ecx + movl %eax,0(%r9) + addl 12(%r9),%edx + movl %esi,4(%r9) + addl 16(%r9),%ebp + movl %ecx,8(%r9) + movl %edx,12(%r9) + movl %ebp,16(%r9) + movups %xmm2,(%r8) + leaq 104(%rsp),%rsi + movq 0(%rsi),%r15 + movq 8(%rsi),%r14 + movq 16(%rsi),%r13 + movq 24(%rsi),%r12 + movq 32(%rsi),%rbp + movq 40(%rsi),%rbx + leaq 48(%rsi),%rsp +L$epilogue_ssse3: + .byte 0xf3,0xc3 + + +.p2align 5 +aesni_cbc_sha1_enc_avx: + movq 8(%rsp),%r10 + + + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + leaq -104(%rsp),%rsp + + + vzeroall + movq %rdi,%r12 + movq %rsi,%r13 + movq %rdx,%r14 + leaq 112(%rcx),%r15 + vmovdqu (%r8),%xmm12 + movq %r8,88(%rsp) + shlq $6,%r14 + subq %r12,%r13 + movl 240-112(%r15),%r8d + addq %r10,%r14 + + leaq K_XX_XX(%rip),%r11 + movl 0(%r9),%eax + movl 4(%r9),%ebx + movl 8(%r9),%ecx + movl 12(%r9),%edx + movl %ebx,%esi + movl 16(%r9),%ebp + movl %ecx,%edi + xorl %edx,%edi + andl %edi,%esi + + vmovdqa 64(%r11),%xmm6 + vmovdqa 0(%r11),%xmm10 + vmovdqu 0(%r10),%xmm0 + vmovdqu 16(%r10),%xmm1 + vmovdqu 32(%r10),%xmm2 + vmovdqu 48(%r10),%xmm3 + vpshufb %xmm6,%xmm0,%xmm0 + addq $64,%r10 + vpshufb %xmm6,%xmm1,%xmm1 + vpshufb %xmm6,%xmm2,%xmm2 + vpshufb %xmm6,%xmm3,%xmm3 + vpaddd %xmm10,%xmm0,%xmm4 + vpaddd %xmm10,%xmm1,%xmm5 + vpaddd %xmm10,%xmm2,%xmm6 + vmovdqa %xmm4,0(%rsp) + vmovdqa %xmm5,16(%rsp) + vmovdqa %xmm6,32(%rsp) + vmovups -112(%r15),%xmm15 + vmovups 16-112(%r15),%xmm14 + jmp L$oop_avx +.p2align 5 +L$oop_avx: + shrdl $2,%ebx,%ebx + vmovdqu 0(%r12),%xmm13 + vpxor %xmm15,%xmm13,%xmm13 + vpxor %xmm13,%xmm12,%xmm12 + vaesenc %xmm14,%xmm12,%xmm12 + vmovups -80(%r15),%xmm15 + xorl %edx,%esi + vpalignr $8,%xmm0,%xmm1,%xmm4 + movl %eax,%edi + addl 0(%rsp),%ebp + vpaddd %xmm3,%xmm10,%xmm9 + xorl %ecx,%ebx + shldl $5,%eax,%eax + vpsrldq $4,%xmm3,%xmm8 + addl %esi,%ebp + andl %ebx,%edi + vpxor %xmm0,%xmm4,%xmm4 + xorl %ecx,%ebx + addl %eax,%ebp + vpxor %xmm2,%xmm8,%xmm8 + shrdl $7,%eax,%eax + xorl %ecx,%edi + movl %ebp,%esi + addl 4(%rsp),%edx + vpxor %xmm8,%xmm4,%xmm4 + xorl %ebx,%eax + shldl $5,%ebp,%ebp + vmovdqa %xmm9,48(%rsp) + addl %edi,%edx + vaesenc %xmm15,%xmm12,%xmm12 + vmovups -64(%r15),%xmm14 + andl %eax,%esi + vpsrld $31,%xmm4,%xmm8 + xorl %ebx,%eax + addl %ebp,%edx + shrdl $7,%ebp,%ebp + xorl %ebx,%esi + vpslldq $12,%xmm4,%xmm9 + vpaddd %xmm4,%xmm4,%xmm4 + movl %edx,%edi + addl 8(%rsp),%ecx + xorl %eax,%ebp + shldl $5,%edx,%edx + vpor %xmm8,%xmm4,%xmm4 + vpsrld $30,%xmm9,%xmm8 + addl %esi,%ecx + andl %ebp,%edi + xorl %eax,%ebp + addl %edx,%ecx + vpslld $2,%xmm9,%xmm9 + vpxor %xmm8,%xmm4,%xmm4 + shrdl $7,%edx,%edx + xorl %eax,%edi + movl %ecx,%esi + addl 12(%rsp),%ebx + vaesenc %xmm14,%xmm12,%xmm12 + vmovups -48(%r15),%xmm15 + vpxor %xmm9,%xmm4,%xmm4 + xorl %ebp,%edx + shldl $5,%ecx,%ecx + addl %edi,%ebx + andl %edx,%esi + xorl %ebp,%edx + addl %ecx,%ebx + shrdl $7,%ecx,%ecx + xorl %ebp,%esi + vpalignr $8,%xmm1,%xmm2,%xmm5 + movl %ebx,%edi + addl 16(%rsp),%eax + vpaddd %xmm4,%xmm10,%xmm9 + xorl %edx,%ecx + shldl $5,%ebx,%ebx + vpsrldq $4,%xmm4,%xmm8 + addl %esi,%eax + andl %ecx,%edi + vpxor %xmm1,%xmm5,%xmm5 + xorl %edx,%ecx + addl %ebx,%eax + vpxor %xmm3,%xmm8,%xmm8 + shrdl $7,%ebx,%ebx + vaesenc %xmm15,%xmm12,%xmm12 + vmovups -32(%r15),%xmm14 + xorl %edx,%edi + movl %eax,%esi + addl 20(%rsp),%ebp + vpxor %xmm8,%xmm5,%xmm5 + xorl %ecx,%ebx + shldl $5,%eax,%eax + vmovdqa %xmm9,0(%rsp) + addl %edi,%ebp + andl %ebx,%esi + vpsrld $31,%xmm5,%xmm8 + xorl %ecx,%ebx + addl %eax,%ebp + shrdl $7,%eax,%eax + xorl %ecx,%esi + vpslldq $12,%xmm5,%xmm9 + vpaddd %xmm5,%xmm5,%xmm5 + movl %ebp,%edi + addl 24(%rsp),%edx + xorl %ebx,%eax + shldl $5,%ebp,%ebp + vpor %xmm8,%xmm5,%xmm5 + vpsrld $30,%xmm9,%xmm8 + addl %esi,%edx + vaesenc %xmm14,%xmm12,%xmm12 + vmovups -16(%r15),%xmm15 + andl %eax,%edi + xorl %ebx,%eax + addl %ebp,%edx + vpslld $2,%xmm9,%xmm9 + vpxor %xmm8,%xmm5,%xmm5 + shrdl $7,%ebp,%ebp + xorl %ebx,%edi + movl %edx,%esi + addl 28(%rsp),%ecx + vpxor %xmm9,%xmm5,%xmm5 + xorl %eax,%ebp + shldl $5,%edx,%edx + vmovdqa 16(%r11),%xmm10 + addl %edi,%ecx + andl %ebp,%esi + xorl %eax,%ebp + addl %edx,%ecx + shrdl $7,%edx,%edx + xorl %eax,%esi + vpalignr $8,%xmm2,%xmm3,%xmm6 + movl %ecx,%edi + addl 32(%rsp),%ebx + vaesenc %xmm15,%xmm12,%xmm12 + vmovups 0(%r15),%xmm14 + vpaddd %xmm5,%xmm10,%xmm9 + xorl %ebp,%edx + shldl $5,%ecx,%ecx + vpsrldq $4,%xmm5,%xmm8 + addl %esi,%ebx + andl %edx,%edi + vpxor %xmm2,%xmm6,%xmm6 + xorl %ebp,%edx + addl %ecx,%ebx + vpxor %xmm4,%xmm8,%xmm8 + shrdl $7,%ecx,%ecx + xorl %ebp,%edi + movl %ebx,%esi + addl 36(%rsp),%eax + vpxor %xmm8,%xmm6,%xmm6 + xorl %edx,%ecx + shldl $5,%ebx,%ebx + vmovdqa %xmm9,16(%rsp) + addl %edi,%eax + andl %ecx,%esi + vpsrld $31,%xmm6,%xmm8 + xorl %edx,%ecx + addl %ebx,%eax + shrdl $7,%ebx,%ebx + vaesenc %xmm14,%xmm12,%xmm12 + vmovups 16(%r15),%xmm15 + xorl %edx,%esi + vpslldq $12,%xmm6,%xmm9 + vpaddd %xmm6,%xmm6,%xmm6 + movl %eax,%edi + addl 40(%rsp),%ebp + xorl %ecx,%ebx + shldl $5,%eax,%eax + vpor %xmm8,%xmm6,%xmm6 + vpsrld $30,%xmm9,%xmm8 + addl %esi,%ebp + andl %ebx,%edi + xorl %ecx,%ebx + addl %eax,%ebp + vpslld $2,%xmm9,%xmm9 + vpxor %xmm8,%xmm6,%xmm6 + shrdl $7,%eax,%eax + xorl %ecx,%edi + movl %ebp,%esi + addl 44(%rsp),%edx + vpxor %xmm9,%xmm6,%xmm6 + xorl %ebx,%eax + shldl $5,%ebp,%ebp + addl %edi,%edx + vaesenc %xmm15,%xmm12,%xmm12 + vmovups 32(%r15),%xmm14 + andl %eax,%esi + xorl %ebx,%eax + addl %ebp,%edx + shrdl $7,%ebp,%ebp + xorl %ebx,%esi + vpalignr $8,%xmm3,%xmm4,%xmm7 + movl %edx,%edi + addl 48(%rsp),%ecx + vpaddd %xmm6,%xmm10,%xmm9 + xorl %eax,%ebp + shldl $5,%edx,%edx + vpsrldq $4,%xmm6,%xmm8 + addl %esi,%ecx + andl %ebp,%edi + vpxor %xmm3,%xmm7,%xmm7 + xorl %eax,%ebp + addl %edx,%ecx + vpxor %xmm5,%xmm8,%xmm8 + shrdl $7,%edx,%edx + xorl %eax,%edi + movl %ecx,%esi + addl 52(%rsp),%ebx + vaesenc %xmm14,%xmm12,%xmm12 + vmovups 48(%r15),%xmm15 + vpxor %xmm8,%xmm7,%xmm7 + xorl %ebp,%edx + shldl $5,%ecx,%ecx + vmovdqa %xmm9,32(%rsp) + addl %edi,%ebx + andl %edx,%esi + vpsrld $31,%xmm7,%xmm8 + xorl %ebp,%edx + addl %ecx,%ebx + shrdl $7,%ecx,%ecx + xorl %ebp,%esi + vpslldq $12,%xmm7,%xmm9 + vpaddd %xmm7,%xmm7,%xmm7 + movl %ebx,%edi + addl 56(%rsp),%eax + xorl %edx,%ecx + shldl $5,%ebx,%ebx + vpor %xmm8,%xmm7,%xmm7 + vpsrld $30,%xmm9,%xmm8 + addl %esi,%eax + andl %ecx,%edi + xorl %edx,%ecx + addl %ebx,%eax + vpslld $2,%xmm9,%xmm9 + vpxor %xmm8,%xmm7,%xmm7 + shrdl $7,%ebx,%ebx + cmpl $11,%r8d + jb L$vaesenclast6 + vaesenc %xmm15,%xmm12,%xmm12 + vmovups 64(%r15),%xmm14 + vaesenc %xmm14,%xmm12,%xmm12 + vmovups 80(%r15),%xmm15 + je L$vaesenclast6 + vaesenc %xmm15,%xmm12,%xmm12 + vmovups 96(%r15),%xmm14 + vaesenc %xmm14,%xmm12,%xmm12 + vmovups 112(%r15),%xmm15 +L$vaesenclast6: + vaesenclast %xmm15,%xmm12,%xmm12 + vmovups -112(%r15),%xmm15 + vmovups 16-112(%r15),%xmm14 + xorl %edx,%edi + movl %eax,%esi + addl 60(%rsp),%ebp + vpxor %xmm9,%xmm7,%xmm7 + xorl %ecx,%ebx + shldl $5,%eax,%eax + addl %edi,%ebp + andl %ebx,%esi + xorl %ecx,%ebx + addl %eax,%ebp + vpalignr $8,%xmm6,%xmm7,%xmm8 + vpxor %xmm4,%xmm0,%xmm0 + shrdl $7,%eax,%eax + xorl %ecx,%esi + movl %ebp,%edi + addl 0(%rsp),%edx + vpxor %xmm1,%xmm0,%xmm0 + xorl %ebx,%eax + shldl $5,%ebp,%ebp + vpaddd %xmm7,%xmm10,%xmm9 + addl %esi,%edx + vmovdqu 16(%r12),%xmm13 + vpxor %xmm15,%xmm13,%xmm13 + vmovups %xmm12,0(%r12,%r13,1) + vpxor %xmm13,%xmm12,%xmm12 + vaesenc %xmm14,%xmm12,%xmm12 + vmovups -80(%r15),%xmm15 + andl %eax,%edi + vpxor %xmm8,%xmm0,%xmm0 + xorl %ebx,%eax + addl %ebp,%edx + shrdl $7,%ebp,%ebp + xorl %ebx,%edi + vpsrld $30,%xmm0,%xmm8 + vmovdqa %xmm9,48(%rsp) + movl %edx,%esi + addl 4(%rsp),%ecx + xorl %eax,%ebp + shldl $5,%edx,%edx + vpslld $2,%xmm0,%xmm0 + addl %edi,%ecx + andl %ebp,%esi + xorl %eax,%ebp + addl %edx,%ecx + shrdl $7,%edx,%edx + xorl %eax,%esi + movl %ecx,%edi + addl 8(%rsp),%ebx + vaesenc %xmm15,%xmm12,%xmm12 + vmovups -64(%r15),%xmm14 + vpor %xmm8,%xmm0,%xmm0 + xorl %ebp,%edx + shldl $5,%ecx,%ecx + addl %esi,%ebx + andl %edx,%edi + xorl %ebp,%edx + addl %ecx,%ebx + addl 12(%rsp),%eax + xorl %ebp,%edi + movl %ebx,%esi + shldl $5,%ebx,%ebx + addl %edi,%eax + xorl %edx,%esi + shrdl $7,%ecx,%ecx + addl %ebx,%eax + vpalignr $8,%xmm7,%xmm0,%xmm8 + vpxor %xmm5,%xmm1,%xmm1 + addl 16(%rsp),%ebp + vaesenc %xmm14,%xmm12,%xmm12 + vmovups -48(%r15),%xmm15 + xorl %ecx,%esi + movl %eax,%edi + shldl $5,%eax,%eax + vpxor %xmm2,%xmm1,%xmm1 + addl %esi,%ebp + xorl %ecx,%edi + vpaddd %xmm0,%xmm10,%xmm9 + shrdl $7,%ebx,%ebx + addl %eax,%ebp + vpxor %xmm8,%xmm1,%xmm1 + addl 20(%rsp),%edx + xorl %ebx,%edi + movl %ebp,%esi + shldl $5,%ebp,%ebp + vpsrld $30,%xmm1,%xmm8 + vmovdqa %xmm9,0(%rsp) + addl %edi,%edx + xorl %ebx,%esi + shrdl $7,%eax,%eax + addl %ebp,%edx + vpslld $2,%xmm1,%xmm1 + addl 24(%rsp),%ecx + xorl %eax,%esi + movl %edx,%edi + shldl $5,%edx,%edx + addl %esi,%ecx + vaesenc %xmm15,%xmm12,%xmm12 + vmovups -32(%r15),%xmm14 + xorl %eax,%edi + shrdl $7,%ebp,%ebp + addl %edx,%ecx + vpor %xmm8,%xmm1,%xmm1 + addl 28(%rsp),%ebx + xorl %ebp,%edi + movl %ecx,%esi + shldl $5,%ecx,%ecx + addl %edi,%ebx + xorl %ebp,%esi + shrdl $7,%edx,%edx + addl %ecx,%ebx + vpalignr $8,%xmm0,%xmm1,%xmm8 + vpxor %xmm6,%xmm2,%xmm2 + addl 32(%rsp),%eax + xorl %edx,%esi + movl %ebx,%edi + shldl $5,%ebx,%ebx + vpxor %xmm3,%xmm2,%xmm2 + addl %esi,%eax + xorl %edx,%edi + vpaddd %xmm1,%xmm10,%xmm9 + vmovdqa 32(%r11),%xmm10 + shrdl $7,%ecx,%ecx + addl %ebx,%eax + vpxor %xmm8,%xmm2,%xmm2 + addl 36(%rsp),%ebp + vaesenc %xmm14,%xmm12,%xmm12 + vmovups -16(%r15),%xmm15 + xorl %ecx,%edi + movl %eax,%esi + shldl $5,%eax,%eax + vpsrld $30,%xmm2,%xmm8 + vmovdqa %xmm9,16(%rsp) + addl %edi,%ebp + xorl %ecx,%esi + shrdl $7,%ebx,%ebx + addl %eax,%ebp + vpslld $2,%xmm2,%xmm2 + addl 40(%rsp),%edx + xorl %ebx,%esi + movl %ebp,%edi + shldl $5,%ebp,%ebp + addl %esi,%edx + xorl %ebx,%edi + shrdl $7,%eax,%eax + addl %ebp,%edx + vpor %xmm8,%xmm2,%xmm2 + addl 44(%rsp),%ecx + xorl %eax,%edi + movl %edx,%esi + shldl $5,%edx,%edx + addl %edi,%ecx + vaesenc %xmm15,%xmm12,%xmm12 + vmovups 0(%r15),%xmm14 + xorl %eax,%esi + shrdl $7,%ebp,%ebp + addl %edx,%ecx + vpalignr $8,%xmm1,%xmm2,%xmm8 + vpxor %xmm7,%xmm3,%xmm3 + addl 48(%rsp),%ebx + xorl %ebp,%esi + movl %ecx,%edi + shldl $5,%ecx,%ecx + vpxor %xmm4,%xmm3,%xmm3 + addl %esi,%ebx + xorl %ebp,%edi + vpaddd %xmm2,%xmm10,%xmm9 + shrdl $7,%edx,%edx + addl %ecx,%ebx + vpxor %xmm8,%xmm3,%xmm3 + addl 52(%rsp),%eax + xorl %edx,%edi + movl %ebx,%esi + shldl $5,%ebx,%ebx + vpsrld $30,%xmm3,%xmm8 + vmovdqa %xmm9,32(%rsp) + addl %edi,%eax + xorl %edx,%esi + shrdl $7,%ecx,%ecx + addl %ebx,%eax + vpslld $2,%xmm3,%xmm3 + addl 56(%rsp),%ebp + vaesenc %xmm14,%xmm12,%xmm12 + vmovups 16(%r15),%xmm15 + xorl %ecx,%esi + movl %eax,%edi + shldl $5,%eax,%eax + addl %esi,%ebp + xorl %ecx,%edi + shrdl $7,%ebx,%ebx + addl %eax,%ebp + vpor %xmm8,%xmm3,%xmm3 + addl 60(%rsp),%edx + xorl %ebx,%edi + movl %ebp,%esi + shldl $5,%ebp,%ebp + addl %edi,%edx + xorl %ebx,%esi + shrdl $7,%eax,%eax + addl %ebp,%edx + vpalignr $8,%xmm2,%xmm3,%xmm8 + vpxor %xmm0,%xmm4,%xmm4 + addl 0(%rsp),%ecx + xorl %eax,%esi + movl %edx,%edi + shldl $5,%edx,%edx + vpxor %xmm5,%xmm4,%xmm4 + addl %esi,%ecx + vaesenc %xmm15,%xmm12,%xmm12 + vmovups 32(%r15),%xmm14 + xorl %eax,%edi + vpaddd %xmm3,%xmm10,%xmm9 + shrdl $7,%ebp,%ebp + addl %edx,%ecx + vpxor %xmm8,%xmm4,%xmm4 + addl 4(%rsp),%ebx + xorl %ebp,%edi + movl %ecx,%esi + shldl $5,%ecx,%ecx + vpsrld $30,%xmm4,%xmm8 + vmovdqa %xmm9,48(%rsp) + addl %edi,%ebx + xorl %ebp,%esi + shrdl $7,%edx,%edx + addl %ecx,%ebx + vpslld $2,%xmm4,%xmm4 + addl 8(%rsp),%eax + xorl %edx,%esi + movl %ebx,%edi + shldl $5,%ebx,%ebx + addl %esi,%eax + xorl %edx,%edi + shrdl $7,%ecx,%ecx + addl %ebx,%eax + vpor %xmm8,%xmm4,%xmm4 + addl 12(%rsp),%ebp + vaesenc %xmm14,%xmm12,%xmm12 + vmovups 48(%r15),%xmm15 + xorl %ecx,%edi + movl %eax,%esi + shldl $5,%eax,%eax + addl %edi,%ebp + xorl %ecx,%esi + shrdl $7,%ebx,%ebx + addl %eax,%ebp + vpalignr $8,%xmm3,%xmm4,%xmm8 + vpxor %xmm1,%xmm5,%xmm5 + addl 16(%rsp),%edx + xorl %ebx,%esi + movl %ebp,%edi + shldl $5,%ebp,%ebp + vpxor %xmm6,%xmm5,%xmm5 + addl %esi,%edx + xorl %ebx,%edi + vpaddd %xmm4,%xmm10,%xmm9 + shrdl $7,%eax,%eax + addl %ebp,%edx + vpxor %xmm8,%xmm5,%xmm5 + addl 20(%rsp),%ecx + xorl %eax,%edi + movl %edx,%esi + shldl $5,%edx,%edx + vpsrld $30,%xmm5,%xmm8 + vmovdqa %xmm9,0(%rsp) + addl %edi,%ecx + cmpl $11,%r8d + jb L$vaesenclast7 + vaesenc %xmm15,%xmm12,%xmm12 + vmovups 64(%r15),%xmm14 + vaesenc %xmm14,%xmm12,%xmm12 + vmovups 80(%r15),%xmm15 + je L$vaesenclast7 + vaesenc %xmm15,%xmm12,%xmm12 + vmovups 96(%r15),%xmm14 + vaesenc %xmm14,%xmm12,%xmm12 + vmovups 112(%r15),%xmm15 +L$vaesenclast7: + vaesenclast %xmm15,%xmm12,%xmm12 + vmovups -112(%r15),%xmm15 + vmovups 16-112(%r15),%xmm14 + xorl %eax,%esi + shrdl $7,%ebp,%ebp + addl %edx,%ecx + vpslld $2,%xmm5,%xmm5 + addl 24(%rsp),%ebx + xorl %ebp,%esi + movl %ecx,%edi + shldl $5,%ecx,%ecx + addl %esi,%ebx + xorl %ebp,%edi + shrdl $7,%edx,%edx + addl %ecx,%ebx + vpor %xmm8,%xmm5,%xmm5 + addl 28(%rsp),%eax + shrdl $7,%ecx,%ecx + movl %ebx,%esi + xorl %edx,%edi + shldl $5,%ebx,%ebx + addl %edi,%eax + xorl %ecx,%esi + xorl %edx,%ecx + addl %ebx,%eax + vpalignr $8,%xmm4,%xmm5,%xmm8 + vpxor %xmm2,%xmm6,%xmm6 + addl 32(%rsp),%ebp + vmovdqu 32(%r12),%xmm13 + vpxor %xmm15,%xmm13,%xmm13 + vmovups %xmm12,16(%r13,%r12,1) + vpxor %xmm13,%xmm12,%xmm12 + vaesenc %xmm14,%xmm12,%xmm12 + vmovups -80(%r15),%xmm15 + andl %ecx,%esi + xorl %edx,%ecx + shrdl $7,%ebx,%ebx + vpxor %xmm7,%xmm6,%xmm6 + movl %eax,%edi + xorl %ecx,%esi + vpaddd %xmm5,%xmm10,%xmm9 + shldl $5,%eax,%eax + addl %esi,%ebp + vpxor %xmm8,%xmm6,%xmm6 + xorl %ebx,%edi + xorl %ecx,%ebx + addl %eax,%ebp + addl 36(%rsp),%edx + vpsrld $30,%xmm6,%xmm8 + vmovdqa %xmm9,16(%rsp) + andl %ebx,%edi + xorl %ecx,%ebx + shrdl $7,%eax,%eax + movl %ebp,%esi + vpslld $2,%xmm6,%xmm6 + xorl %ebx,%edi + shldl $5,%ebp,%ebp + addl %edi,%edx + vaesenc %xmm15,%xmm12,%xmm12 + vmovups -64(%r15),%xmm14 + xorl %eax,%esi + xorl %ebx,%eax + addl %ebp,%edx + addl 40(%rsp),%ecx + andl %eax,%esi + vpor %xmm8,%xmm6,%xmm6 + xorl %ebx,%eax + shrdl $7,%ebp,%ebp + movl %edx,%edi + xorl %eax,%esi + shldl $5,%edx,%edx + addl %esi,%ecx + xorl %ebp,%edi + xorl %eax,%ebp + addl %edx,%ecx + addl 44(%rsp),%ebx + andl %ebp,%edi + xorl %eax,%ebp + shrdl $7,%edx,%edx + vaesenc %xmm14,%xmm12,%xmm12 + vmovups -48(%r15),%xmm15 + movl %ecx,%esi + xorl %ebp,%edi + shldl $5,%ecx,%ecx + addl %edi,%ebx + xorl %edx,%esi + xorl %ebp,%edx + addl %ecx,%ebx + vpalignr $8,%xmm5,%xmm6,%xmm8 + vpxor %xmm3,%xmm7,%xmm7 + addl 48(%rsp),%eax + andl %edx,%esi + xorl %ebp,%edx + shrdl $7,%ecx,%ecx + vpxor %xmm0,%xmm7,%xmm7 + movl %ebx,%edi + xorl %edx,%esi + vpaddd %xmm6,%xmm10,%xmm9 + vmovdqa 48(%r11),%xmm10 + shldl $5,%ebx,%ebx + addl %esi,%eax + vpxor %xmm8,%xmm7,%xmm7 + xorl %ecx,%edi + xorl %edx,%ecx + addl %ebx,%eax + addl 52(%rsp),%ebp + vaesenc %xmm15,%xmm12,%xmm12 + vmovups -32(%r15),%xmm14 + vpsrld $30,%xmm7,%xmm8 + vmovdqa %xmm9,32(%rsp) + andl %ecx,%edi + xorl %edx,%ecx + shrdl $7,%ebx,%ebx + movl %eax,%esi + vpslld $2,%xmm7,%xmm7 + xorl %ecx,%edi + shldl $5,%eax,%eax + addl %edi,%ebp + xorl %ebx,%esi + xorl %ecx,%ebx + addl %eax,%ebp + addl 56(%rsp),%edx + andl %ebx,%esi + vpor %xmm8,%xmm7,%xmm7 + xorl %ecx,%ebx + shrdl $7,%eax,%eax + movl %ebp,%edi + xorl %ebx,%esi + shldl $5,%ebp,%ebp + addl %esi,%edx + vaesenc %xmm14,%xmm12,%xmm12 + vmovups -16(%r15),%xmm15 + xorl %eax,%edi + xorl %ebx,%eax + addl %ebp,%edx + addl 60(%rsp),%ecx + andl %eax,%edi + xorl %ebx,%eax + shrdl $7,%ebp,%ebp + movl %edx,%esi + xorl %eax,%edi + shldl $5,%edx,%edx + addl %edi,%ecx + xorl %ebp,%esi + xorl %eax,%ebp + addl %edx,%ecx + vpalignr $8,%xmm6,%xmm7,%xmm8 + vpxor %xmm4,%xmm0,%xmm0 + addl 0(%rsp),%ebx + andl %ebp,%esi + xorl %eax,%ebp + shrdl $7,%edx,%edx + vaesenc %xmm15,%xmm12,%xmm12 + vmovups 0(%r15),%xmm14 + vpxor %xmm1,%xmm0,%xmm0 + movl %ecx,%edi + xorl %ebp,%esi + vpaddd %xmm7,%xmm10,%xmm9 + shldl $5,%ecx,%ecx + addl %esi,%ebx + vpxor %xmm8,%xmm0,%xmm0 + xorl %edx,%edi + xorl %ebp,%edx + addl %ecx,%ebx + addl 4(%rsp),%eax + vpsrld $30,%xmm0,%xmm8 + vmovdqa %xmm9,48(%rsp) + andl %edx,%edi + xorl %ebp,%edx + shrdl $7,%ecx,%ecx + movl %ebx,%esi + vpslld $2,%xmm0,%xmm0 + xorl %edx,%edi + shldl $5,%ebx,%ebx + addl %edi,%eax + xorl %ecx,%esi + xorl %edx,%ecx + addl %ebx,%eax + addl 8(%rsp),%ebp + vaesenc %xmm14,%xmm12,%xmm12 + vmovups 16(%r15),%xmm15 + andl %ecx,%esi + vpor %xmm8,%xmm0,%xmm0 + xorl %edx,%ecx + shrdl $7,%ebx,%ebx + movl %eax,%edi + xorl %ecx,%esi + shldl $5,%eax,%eax + addl %esi,%ebp + xorl %ebx,%edi + xorl %ecx,%ebx + addl %eax,%ebp + addl 12(%rsp),%edx + andl %ebx,%edi + xorl %ecx,%ebx + shrdl $7,%eax,%eax + movl %ebp,%esi + xorl %ebx,%edi + shldl $5,%ebp,%ebp + addl %edi,%edx + vaesenc %xmm15,%xmm12,%xmm12 + vmovups 32(%r15),%xmm14 + xorl %eax,%esi + xorl %ebx,%eax + addl %ebp,%edx + vpalignr $8,%xmm7,%xmm0,%xmm8 + vpxor %xmm5,%xmm1,%xmm1 + addl 16(%rsp),%ecx + andl %eax,%esi + xorl %ebx,%eax + shrdl $7,%ebp,%ebp + vpxor %xmm2,%xmm1,%xmm1 + movl %edx,%edi + xorl %eax,%esi + vpaddd %xmm0,%xmm10,%xmm9 + shldl $5,%edx,%edx + addl %esi,%ecx + vpxor %xmm8,%xmm1,%xmm1 + xorl %ebp,%edi + xorl %eax,%ebp + addl %edx,%ecx + addl 20(%rsp),%ebx + vpsrld $30,%xmm1,%xmm8 + vmovdqa %xmm9,0(%rsp) + andl %ebp,%edi + xorl %eax,%ebp + shrdl $7,%edx,%edx + vaesenc %xmm14,%xmm12,%xmm12 + vmovups 48(%r15),%xmm15 + movl %ecx,%esi + vpslld $2,%xmm1,%xmm1 + xorl %ebp,%edi + shldl $5,%ecx,%ecx + addl %edi,%ebx + xorl %edx,%esi + xorl %ebp,%edx + addl %ecx,%ebx + addl 24(%rsp),%eax + andl %edx,%esi + vpor %xmm8,%xmm1,%xmm1 + xorl %ebp,%edx + shrdl $7,%ecx,%ecx + movl %ebx,%edi + xorl %edx,%esi + shldl $5,%ebx,%ebx + addl %esi,%eax + xorl %ecx,%edi + xorl %edx,%ecx + addl %ebx,%eax + addl 28(%rsp),%ebp + cmpl $11,%r8d + jb L$vaesenclast8 + vaesenc %xmm15,%xmm12,%xmm12 + vmovups 64(%r15),%xmm14 + vaesenc %xmm14,%xmm12,%xmm12 + vmovups 80(%r15),%xmm15 + je L$vaesenclast8 + vaesenc %xmm15,%xmm12,%xmm12 + vmovups 96(%r15),%xmm14 + vaesenc %xmm14,%xmm12,%xmm12 + vmovups 112(%r15),%xmm15 +L$vaesenclast8: + vaesenclast %xmm15,%xmm12,%xmm12 + vmovups -112(%r15),%xmm15 + vmovups 16-112(%r15),%xmm14 + andl %ecx,%edi + xorl %edx,%ecx + shrdl $7,%ebx,%ebx + movl %eax,%esi + xorl %ecx,%edi + shldl $5,%eax,%eax + addl %edi,%ebp + xorl %ebx,%esi + xorl %ecx,%ebx + addl %eax,%ebp + vpalignr $8,%xmm0,%xmm1,%xmm8 + vpxor %xmm6,%xmm2,%xmm2 + addl 32(%rsp),%edx + andl %ebx,%esi + xorl %ecx,%ebx + shrdl $7,%eax,%eax + vpxor %xmm3,%xmm2,%xmm2 + movl %ebp,%edi + xorl %ebx,%esi + vpaddd %xmm1,%xmm10,%xmm9 + shldl $5,%ebp,%ebp + addl %esi,%edx + vmovdqu 48(%r12),%xmm13 + vpxor %xmm15,%xmm13,%xmm13 + vmovups %xmm12,32(%r13,%r12,1) + vpxor %xmm13,%xmm12,%xmm12 + vaesenc %xmm14,%xmm12,%xmm12 + vmovups -80(%r15),%xmm15 + vpxor %xmm8,%xmm2,%xmm2 + xorl %eax,%edi + xorl %ebx,%eax + addl %ebp,%edx + addl 36(%rsp),%ecx + vpsrld $30,%xmm2,%xmm8 + vmovdqa %xmm9,16(%rsp) + andl %eax,%edi + xorl %ebx,%eax + shrdl $7,%ebp,%ebp + movl %edx,%esi + vpslld $2,%xmm2,%xmm2 + xorl %eax,%edi + shldl $5,%edx,%edx + addl %edi,%ecx + xorl %ebp,%esi + xorl %eax,%ebp + addl %edx,%ecx + addl 40(%rsp),%ebx + andl %ebp,%esi + vpor %xmm8,%xmm2,%xmm2 + xorl %eax,%ebp + shrdl $7,%edx,%edx + vaesenc %xmm15,%xmm12,%xmm12 + vmovups -64(%r15),%xmm14 + movl %ecx,%edi + xorl %ebp,%esi + shldl $5,%ecx,%ecx + addl %esi,%ebx + xorl %edx,%edi + xorl %ebp,%edx + addl %ecx,%ebx + addl 44(%rsp),%eax + andl %edx,%edi + xorl %ebp,%edx + shrdl $7,%ecx,%ecx + movl %ebx,%esi + xorl %edx,%edi + shldl $5,%ebx,%ebx + addl %edi,%eax + xorl %edx,%esi + addl %ebx,%eax + vpalignr $8,%xmm1,%xmm2,%xmm8 + vpxor %xmm7,%xmm3,%xmm3 + addl 48(%rsp),%ebp + vaesenc %xmm14,%xmm12,%xmm12 + vmovups -48(%r15),%xmm15 + xorl %ecx,%esi + movl %eax,%edi + shldl $5,%eax,%eax + vpxor %xmm4,%xmm3,%xmm3 + addl %esi,%ebp + xorl %ecx,%edi + vpaddd %xmm2,%xmm10,%xmm9 + shrdl $7,%ebx,%ebx + addl %eax,%ebp + vpxor %xmm8,%xmm3,%xmm3 + addl 52(%rsp),%edx + xorl %ebx,%edi + movl %ebp,%esi + shldl $5,%ebp,%ebp + vpsrld $30,%xmm3,%xmm8 + vmovdqa %xmm9,32(%rsp) + addl %edi,%edx + xorl %ebx,%esi + shrdl $7,%eax,%eax + addl %ebp,%edx + vpslld $2,%xmm3,%xmm3 + addl 56(%rsp),%ecx + xorl %eax,%esi + movl %edx,%edi + shldl $5,%edx,%edx + addl %esi,%ecx + vaesenc %xmm15,%xmm12,%xmm12 + vmovups -32(%r15),%xmm14 + xorl %eax,%edi + shrdl $7,%ebp,%ebp + addl %edx,%ecx + vpor %xmm8,%xmm3,%xmm3 + addl 60(%rsp),%ebx + xorl %ebp,%edi + movl %ecx,%esi + shldl $5,%ecx,%ecx + addl %edi,%ebx + xorl %ebp,%esi + shrdl $7,%edx,%edx + addl %ecx,%ebx + addl 0(%rsp),%eax + vpaddd %xmm3,%xmm10,%xmm9 + xorl %edx,%esi + movl %ebx,%edi + shldl $5,%ebx,%ebx + addl %esi,%eax + vmovdqa %xmm9,48(%rsp) + xorl %edx,%edi + shrdl $7,%ecx,%ecx + addl %ebx,%eax + addl 4(%rsp),%ebp + vaesenc %xmm14,%xmm12,%xmm12 + vmovups -16(%r15),%xmm15 + xorl %ecx,%edi + movl %eax,%esi + shldl $5,%eax,%eax addl %edi,%ebp - addl 8(%rsp),%edx xorl %ecx,%esi - movl %ebp,%edi - roll $5,%ebp + shrdl $7,%ebx,%ebx + addl %eax,%ebp + addl 8(%rsp),%edx xorl %ebx,%esi - addl %ebp,%edx - rorl $7,%eax + movl %ebp,%edi + shldl $5,%ebp,%ebp addl %esi,%edx - addl 12(%rsp),%ecx xorl %ebx,%edi - movl %edx,%esi - roll $5,%edx + shrdl $7,%eax,%eax + addl %ebp,%edx + addl 12(%rsp),%ecx xorl %eax,%edi -.byte 102,69,15,56,220,223 - movups 112(%r15),%xmm14 - addl %edx,%ecx - rorl $7,%ebp + movl %edx,%esi + shldl $5,%edx,%edx addl %edi,%ecx + vaesenc %xmm15,%xmm12,%xmm12 + vmovups 0(%r15),%xmm14 + xorl %eax,%esi + shrdl $7,%ebp,%ebp + addl %edx,%ecx cmpq %r14,%r10 - je L$done_ssse3 - movdqa 64(%r11),%xmm6 - movdqa 0(%r11),%xmm9 - movdqu 0(%r10),%xmm0 - movdqu 16(%r10),%xmm1 - movdqu 32(%r10),%xmm2 - movdqu 48(%r10),%xmm3 -.byte 102,15,56,0,198 + je L$done_avx + vmovdqa 64(%r11),%xmm9 + vmovdqa 0(%r11),%xmm10 + vmovdqu 0(%r10),%xmm0 + vmovdqu 16(%r10),%xmm1 + vmovdqu 32(%r10),%xmm2 + vmovdqu 48(%r10),%xmm3 + vpshufb %xmm9,%xmm0,%xmm0 addq $64,%r10 addl 16(%rsp),%ebx - xorl %eax,%esi -.byte 102,15,56,0,206 - movl %ecx,%edi - roll $5,%ecx - paddd %xmm9,%xmm0 xorl %ebp,%esi - addl %ecx,%ebx - rorl $7,%edx + vpshufb %xmm9,%xmm1,%xmm1 + movl %ecx,%edi + shldl $5,%ecx,%ecx + vpaddd %xmm10,%xmm0,%xmm8 addl %esi,%ebx - movdqa %xmm0,0(%rsp) - addl 20(%rsp),%eax xorl %ebp,%edi - psubd %xmm9,%xmm0 - movl %ebx,%esi - roll $5,%ebx + shrdl $7,%edx,%edx + addl %ecx,%ebx + vmovdqa %xmm8,0(%rsp) + addl 20(%rsp),%eax xorl %edx,%edi - addl %ebx,%eax - rorl $7,%ecx + movl %ebx,%esi + shldl $5,%ebx,%ebx addl %edi,%eax - addl 24(%rsp),%ebp -.byte 102,69,15,56,220,222 - movups 128(%r15),%xmm15 xorl %edx,%esi - movl %eax,%edi - roll $5,%eax + shrdl $7,%ecx,%ecx + addl %ebx,%eax + addl 24(%rsp),%ebp + vaesenc %xmm14,%xmm12,%xmm12 + vmovups 16(%r15),%xmm15 xorl %ecx,%esi - addl %eax,%ebp - rorl $7,%ebx + movl %eax,%edi + shldl $5,%eax,%eax addl %esi,%ebp - addl 28(%rsp),%edx xorl %ecx,%edi - movl %ebp,%esi - roll $5,%ebp + shrdl $7,%ebx,%ebx + addl %eax,%ebp + addl 28(%rsp),%edx xorl %ebx,%edi - addl %ebp,%edx - rorl $7,%eax + movl %ebp,%esi + shldl $5,%ebp,%ebp addl %edi,%edx - addl 32(%rsp),%ecx xorl %ebx,%esi -.byte 102,15,56,0,214 - movl %edx,%edi - roll $5,%edx - paddd %xmm9,%xmm1 + shrdl $7,%eax,%eax + addl %ebp,%edx + addl 32(%rsp),%ecx xorl %eax,%esi -.byte 102,69,15,56,220,223 - movups 144(%r15),%xmm14 - addl %edx,%ecx - rorl $7,%ebp + vpshufb %xmm9,%xmm2,%xmm2 + movl %edx,%edi + shldl $5,%edx,%edx + vpaddd %xmm10,%xmm1,%xmm8 addl %esi,%ecx - movdqa %xmm1,16(%rsp) - addl 36(%rsp),%ebx + vaesenc %xmm15,%xmm12,%xmm12 + vmovups 32(%r15),%xmm14 xorl %eax,%edi - psubd %xmm9,%xmm1 - movl %ecx,%esi - roll $5,%ecx + shrdl $7,%ebp,%ebp + addl %edx,%ecx + vmovdqa %xmm8,16(%rsp) + addl 36(%rsp),%ebx xorl %ebp,%edi - addl %ecx,%ebx - rorl $7,%edx + movl %ecx,%esi + shldl $5,%ecx,%ecx addl %edi,%ebx - addl 40(%rsp),%eax xorl %ebp,%esi - movl %ebx,%edi - roll $5,%ebx + shrdl $7,%edx,%edx + addl %ecx,%ebx + addl 40(%rsp),%eax xorl %edx,%esi - addl %ebx,%eax - rorl $7,%ecx + movl %ebx,%edi + shldl $5,%ebx,%ebx addl %esi,%eax - addl 44(%rsp),%ebp -.byte 102,69,15,56,220,222 - movups 160(%r15),%xmm15 xorl %edx,%edi - movl %eax,%esi - roll $5,%eax + shrdl $7,%ecx,%ecx + addl %ebx,%eax + addl 44(%rsp),%ebp + vaesenc %xmm14,%xmm12,%xmm12 + vmovups 48(%r15),%xmm15 xorl %ecx,%edi - addl %eax,%ebp - rorl $7,%ebx + movl %eax,%esi + shldl $5,%eax,%eax addl %edi,%ebp - addl 48(%rsp),%edx xorl %ecx,%esi -.byte 102,15,56,0,222 - movl %ebp,%edi - roll $5,%ebp - paddd %xmm9,%xmm2 + shrdl $7,%ebx,%ebx + addl %eax,%ebp + addl 48(%rsp),%edx xorl %ebx,%esi - addl %ebp,%edx - rorl $7,%eax + vpshufb %xmm9,%xmm3,%xmm3 + movl %ebp,%edi + shldl $5,%ebp,%ebp + vpaddd %xmm10,%xmm2,%xmm8 addl %esi,%edx - movdqa %xmm2,32(%rsp) - addl 52(%rsp),%ecx xorl %ebx,%edi - psubd %xmm9,%xmm2 - movl %edx,%esi - roll $5,%edx + shrdl $7,%eax,%eax + addl %ebp,%edx + vmovdqa %xmm8,32(%rsp) + addl 52(%rsp),%ecx xorl %eax,%edi + movl %edx,%esi + shldl $5,%edx,%edx + addl %edi,%ecx cmpl $11,%r8d - jb L$aesenclast4 - movups 176(%r15),%xmm14 -.byte 102,69,15,56,220,223 - movups 192(%r15),%xmm15 -.byte 102,69,15,56,220,222 - je L$aesenclast4 - movups 208(%r15),%xmm14 -.byte 102,69,15,56,220,223 - movups 224(%r15),%xmm15 -.byte 102,69,15,56,220,222 -L$aesenclast4: -.byte 102,69,15,56,221,223 - movups 16(%r15),%xmm14 + jb L$vaesenclast9 + vaesenc %xmm15,%xmm12,%xmm12 + vmovups 64(%r15),%xmm14 + vaesenc %xmm14,%xmm12,%xmm12 + vmovups 80(%r15),%xmm15 + je L$vaesenclast9 + vaesenc %xmm15,%xmm12,%xmm12 + vmovups 96(%r15),%xmm14 + vaesenc %xmm14,%xmm12,%xmm12 + vmovups 112(%r15),%xmm15 +L$vaesenclast9: + vaesenclast %xmm15,%xmm12,%xmm12 + vmovups -112(%r15),%xmm15 + vmovups 16-112(%r15),%xmm14 + xorl %eax,%esi + shrdl $7,%ebp,%ebp addl %edx,%ecx - rorl $7,%ebp - addl %edi,%ecx addl 56(%rsp),%ebx - xorl %eax,%esi - movl %ecx,%edi - roll $5,%ecx xorl %ebp,%esi - addl %ecx,%ebx - rorl $7,%edx + movl %ecx,%edi + shldl $5,%ecx,%ecx addl %esi,%ebx - addl 60(%rsp),%eax xorl %ebp,%edi - movl %ebx,%esi - roll $5,%ebx + shrdl $7,%edx,%edx + addl %ecx,%ebx + addl 60(%rsp),%eax xorl %edx,%edi - addl %ebx,%eax - rorl $7,%ecx + movl %ebx,%esi + shldl $5,%ebx,%ebx addl %edi,%eax - movups %xmm11,48(%r13,%r12,1) + shrdl $7,%ecx,%ecx + addl %ebx,%eax + vmovups %xmm12,48(%r13,%r12,1) leaq 64(%r12),%r12 addl 0(%r9),%eax @@ -1238,129 +2520,131 @@ L$aesenclast4: movl %esi,4(%r9) movl %esi,%ebx movl %ecx,8(%r9) + movl %ecx,%edi movl %edx,12(%r9) + xorl %edx,%edi movl %ebp,16(%r9) - jmp L$oop_ssse3 + andl %edi,%esi + jmp L$oop_avx -.p2align 4 -L$done_ssse3: +L$done_avx: addl 16(%rsp),%ebx - xorl %eax,%esi - movl %ecx,%edi - roll $5,%ecx xorl %ebp,%esi - addl %ecx,%ebx - rorl $7,%edx + movl %ecx,%edi + shldl $5,%ecx,%ecx addl %esi,%ebx - addl 20(%rsp),%eax xorl %ebp,%edi - movl %ebx,%esi - roll $5,%ebx + shrdl $7,%edx,%edx + addl %ecx,%ebx + addl 20(%rsp),%eax xorl %edx,%edi - addl %ebx,%eax - rorl $7,%ecx + movl %ebx,%esi + shldl $5,%ebx,%ebx addl %edi,%eax - addl 24(%rsp),%ebp -.byte 102,69,15,56,220,222 - movups 128(%r15),%xmm15 xorl %edx,%esi - movl %eax,%edi - roll $5,%eax + shrdl $7,%ecx,%ecx + addl %ebx,%eax + addl 24(%rsp),%ebp + vaesenc %xmm14,%xmm12,%xmm12 + vmovups 16(%r15),%xmm15 xorl %ecx,%esi - addl %eax,%ebp - rorl $7,%ebx + movl %eax,%edi + shldl $5,%eax,%eax addl %esi,%ebp - addl 28(%rsp),%edx xorl %ecx,%edi - movl %ebp,%esi - roll $5,%ebp + shrdl $7,%ebx,%ebx + addl %eax,%ebp + addl 28(%rsp),%edx xorl %ebx,%edi - addl %ebp,%edx - rorl $7,%eax + movl %ebp,%esi + shldl $5,%ebp,%ebp addl %edi,%edx - addl 32(%rsp),%ecx xorl %ebx,%esi - movl %edx,%edi - roll $5,%edx + shrdl $7,%eax,%eax + addl %ebp,%edx + addl 32(%rsp),%ecx xorl %eax,%esi -.byte 102,69,15,56,220,223 - movups 144(%r15),%xmm14 - addl %edx,%ecx - rorl $7,%ebp + movl %edx,%edi + shldl $5,%edx,%edx addl %esi,%ecx - addl 36(%rsp),%ebx + vaesenc %xmm15,%xmm12,%xmm12 + vmovups 32(%r15),%xmm14 xorl %eax,%edi - movl %ecx,%esi - roll $5,%ecx + shrdl $7,%ebp,%ebp + addl %edx,%ecx + addl 36(%rsp),%ebx xorl %ebp,%edi - addl %ecx,%ebx - rorl $7,%edx + movl %ecx,%esi + shldl $5,%ecx,%ecx addl %edi,%ebx - addl 40(%rsp),%eax xorl %ebp,%esi - movl %ebx,%edi - roll $5,%ebx + shrdl $7,%edx,%edx + addl %ecx,%ebx + addl 40(%rsp),%eax xorl %edx,%esi - addl %ebx,%eax - rorl $7,%ecx + movl %ebx,%edi + shldl $5,%ebx,%ebx addl %esi,%eax - addl 44(%rsp),%ebp -.byte 102,69,15,56,220,222 - movups 160(%r15),%xmm15 xorl %edx,%edi - movl %eax,%esi - roll $5,%eax + shrdl $7,%ecx,%ecx + addl %ebx,%eax + addl 44(%rsp),%ebp + vaesenc %xmm14,%xmm12,%xmm12 + vmovups 48(%r15),%xmm15 xorl %ecx,%edi - addl %eax,%ebp - rorl $7,%ebx + movl %eax,%esi + shldl $5,%eax,%eax addl %edi,%ebp - addl 48(%rsp),%edx xorl %ecx,%esi - movl %ebp,%edi - roll $5,%ebp + shrdl $7,%ebx,%ebx + addl %eax,%ebp + addl 48(%rsp),%edx xorl %ebx,%esi - addl %ebp,%edx - rorl $7,%eax + movl %ebp,%edi + shldl $5,%ebp,%ebp addl %esi,%edx - addl 52(%rsp),%ecx xorl %ebx,%edi - movl %edx,%esi - roll $5,%edx + shrdl $7,%eax,%eax + addl %ebp,%edx + addl 52(%rsp),%ecx xorl %eax,%edi + movl %edx,%esi + shldl $5,%edx,%edx + addl %edi,%ecx cmpl $11,%r8d - jb L$aesenclast5 - movups 176(%r15),%xmm14 -.byte 102,69,15,56,220,223 - movups 192(%r15),%xmm15 -.byte 102,69,15,56,220,222 - je L$aesenclast5 - movups 208(%r15),%xmm14 -.byte 102,69,15,56,220,223 - movups 224(%r15),%xmm15 -.byte 102,69,15,56,220,222 -L$aesenclast5: -.byte 102,69,15,56,221,223 - movups 16(%r15),%xmm14 + jb L$vaesenclast10 + vaesenc %xmm15,%xmm12,%xmm12 + vmovups 64(%r15),%xmm14 + vaesenc %xmm14,%xmm12,%xmm12 + vmovups 80(%r15),%xmm15 + je L$vaesenclast10 + vaesenc %xmm15,%xmm12,%xmm12 + vmovups 96(%r15),%xmm14 + vaesenc %xmm14,%xmm12,%xmm12 + vmovups 112(%r15),%xmm15 +L$vaesenclast10: + vaesenclast %xmm15,%xmm12,%xmm12 + vmovups -112(%r15),%xmm15 + vmovups 16-112(%r15),%xmm14 + xorl %eax,%esi + shrdl $7,%ebp,%ebp addl %edx,%ecx - rorl $7,%ebp - addl %edi,%ecx addl 56(%rsp),%ebx - xorl %eax,%esi - movl %ecx,%edi - roll $5,%ecx xorl %ebp,%esi - addl %ecx,%ebx - rorl $7,%edx + movl %ecx,%edi + shldl $5,%ecx,%ecx addl %esi,%ebx - addl 60(%rsp),%eax xorl %ebp,%edi - movl %ebx,%esi - roll $5,%ebx + shrdl $7,%edx,%edx + addl %ecx,%ebx + addl 60(%rsp),%eax xorl %edx,%edi - addl %ebx,%eax - rorl $7,%ecx + movl %ebx,%esi + shldl $5,%ebx,%ebx addl %edi,%eax - movups %xmm11,48(%r13,%r12,1) + shrdl $7,%ecx,%ecx + addl %ebx,%eax + vmovups %xmm12,48(%r13,%r12,1) movq 88(%rsp),%r8 addl 0(%r9),%eax @@ -1373,7 +2657,8 @@ L$aesenclast5: movl %ecx,8(%r9) movl %edx,12(%r9) movl %ebp,16(%r9) - movups %xmm11,(%r8) + vmovups %xmm12,(%r8) + vzeroall leaq 104(%rsp),%rsi movq 0(%rsi),%r15 movq 8(%rsi),%r14 @@ -1382,21 +2667,317 @@ L$aesenclast5: movq 32(%rsi),%rbp movq 40(%rsi),%rbx leaq 48(%rsi),%rsp -L$epilogue_ssse3: +L$epilogue_avx: .byte 0xf3,0xc3 .p2align 6 K_XX_XX: .long 0x5a827999,0x5a827999,0x5a827999,0x5a827999 - .long 0x6ed9eba1,0x6ed9eba1,0x6ed9eba1,0x6ed9eba1 - .long 0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc - .long 0xca62c1d6,0xca62c1d6,0xca62c1d6,0xca62c1d6 - .long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f - +.byte 0xf,0xe,0xd,0xc,0xb,0xa,0x9,0x8,0x7,0x6,0x5,0x4,0x3,0x2,0x1,0x0 .byte 65,69,83,78,73,45,67,66,67,43,83,72,65,49,32,115,116,105,116,99,104,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 .p2align 6 + +.p2align 5 +aesni_cbc_sha1_enc_shaext: + movq 8(%rsp),%r10 + movdqu (%r9),%xmm8 + movd 16(%r9),%xmm9 + movdqa K_XX_XX+80(%rip),%xmm7 + + movl 240(%rcx),%r11d + subq %rdi,%rsi + movups (%rcx),%xmm15 + movups 16(%rcx),%xmm0 + leaq 112(%rcx),%rcx + + pshufd $27,%xmm8,%xmm8 + pshufd $27,%xmm9,%xmm9 + jmp L$oop_shaext + +.p2align 4 +L$oop_shaext: + movups 0(%rdi),%xmm14 + xorps %xmm15,%xmm14 + xorps %xmm14,%xmm2 + movups -80(%rcx),%xmm1 +.byte 102,15,56,220,208 + movdqu (%r10),%xmm3 + movdqa %xmm9,%xmm12 +.byte 102,15,56,0,223 + movdqu 16(%r10),%xmm4 + movdqa %xmm8,%xmm11 + movups -64(%rcx),%xmm0 +.byte 102,15,56,220,209 +.byte 102,15,56,0,231 + + paddd %xmm3,%xmm9 + movdqu 32(%r10),%xmm5 + leaq 64(%r10),%r10 + pxor %xmm12,%xmm3 + movups -48(%rcx),%xmm1 +.byte 102,15,56,220,208 + pxor %xmm12,%xmm3 + movdqa %xmm8,%xmm10 +.byte 102,15,56,0,239 +.byte 69,15,58,204,193,0 +.byte 68,15,56,200,212 + movups -32(%rcx),%xmm0 +.byte 102,15,56,220,209 +.byte 15,56,201,220 + movdqu -16(%r10),%xmm6 + movdqa %xmm8,%xmm9 +.byte 102,15,56,0,247 + movups -16(%rcx),%xmm1 +.byte 102,15,56,220,208 +.byte 69,15,58,204,194,0 +.byte 68,15,56,200,205 + pxor %xmm5,%xmm3 +.byte 15,56,201,229 + movups 0(%rcx),%xmm0 +.byte 102,15,56,220,209 + movdqa %xmm8,%xmm10 +.byte 69,15,58,204,193,0 +.byte 68,15,56,200,214 + movups 16(%rcx),%xmm1 +.byte 102,15,56,220,208 +.byte 15,56,202,222 + pxor %xmm6,%xmm4 +.byte 15,56,201,238 + movups 32(%rcx),%xmm0 +.byte 102,15,56,220,209 + movdqa %xmm8,%xmm9 +.byte 69,15,58,204,194,0 +.byte 68,15,56,200,203 + movups 48(%rcx),%xmm1 +.byte 102,15,56,220,208 +.byte 15,56,202,227 + pxor %xmm3,%xmm5 +.byte 15,56,201,243 + cmpl $11,%r11d + jb L$aesenclast11 + movups 64(%rcx),%xmm0 +.byte 102,15,56,220,209 + movups 80(%rcx),%xmm1 +.byte 102,15,56,220,208 + je L$aesenclast11 + movups 96(%rcx),%xmm0 +.byte 102,15,56,220,209 + movups 112(%rcx),%xmm1 +.byte 102,15,56,220,208 +L$aesenclast11: +.byte 102,15,56,221,209 + movups 16-112(%rcx),%xmm0 + movdqa %xmm8,%xmm10 +.byte 69,15,58,204,193,0 +.byte 68,15,56,200,212 + movups 16(%rdi),%xmm14 + xorps %xmm15,%xmm14 + movups %xmm2,0(%rsi,%rdi,1) + xorps %xmm14,%xmm2 + movups -80(%rcx),%xmm1 +.byte 102,15,56,220,208 +.byte 15,56,202,236 + pxor %xmm4,%xmm6 +.byte 15,56,201,220 + movups -64(%rcx),%xmm0 +.byte 102,15,56,220,209 + movdqa %xmm8,%xmm9 +.byte 69,15,58,204,194,1 +.byte 68,15,56,200,205 + movups -48(%rcx),%xmm1 +.byte 102,15,56,220,208 +.byte 15,56,202,245 + pxor %xmm5,%xmm3 +.byte 15,56,201,229 + movups -32(%rcx),%xmm0 +.byte 102,15,56,220,209 + movdqa %xmm8,%xmm10 +.byte 69,15,58,204,193,1 +.byte 68,15,56,200,214 + movups -16(%rcx),%xmm1 +.byte 102,15,56,220,208 +.byte 15,56,202,222 + pxor %xmm6,%xmm4 +.byte 15,56,201,238 + movups 0(%rcx),%xmm0 +.byte 102,15,56,220,209 + movdqa %xmm8,%xmm9 +.byte 69,15,58,204,194,1 +.byte 68,15,56,200,203 + movups 16(%rcx),%xmm1 +.byte 102,15,56,220,208 +.byte 15,56,202,227 + pxor %xmm3,%xmm5 +.byte 15,56,201,243 + movups 32(%rcx),%xmm0 +.byte 102,15,56,220,209 + movdqa %xmm8,%xmm10 +.byte 69,15,58,204,193,1 +.byte 68,15,56,200,212 + movups 48(%rcx),%xmm1 +.byte 102,15,56,220,208 +.byte 15,56,202,236 + pxor %xmm4,%xmm6 +.byte 15,56,201,220 + cmpl $11,%r11d + jb L$aesenclast12 + movups 64(%rcx),%xmm0 +.byte 102,15,56,220,209 + movups 80(%rcx),%xmm1 +.byte 102,15,56,220,208 + je L$aesenclast12 + movups 96(%rcx),%xmm0 +.byte 102,15,56,220,209 + movups 112(%rcx),%xmm1 +.byte 102,15,56,220,208 +L$aesenclast12: +.byte 102,15,56,221,209 + movups 16-112(%rcx),%xmm0 + movdqa %xmm8,%xmm9 +.byte 69,15,58,204,194,1 +.byte 68,15,56,200,205 + movups 32(%rdi),%xmm14 + xorps %xmm15,%xmm14 + movups %xmm2,16(%rsi,%rdi,1) + xorps %xmm14,%xmm2 + movups -80(%rcx),%xmm1 +.byte 102,15,56,220,208 +.byte 15,56,202,245 + pxor %xmm5,%xmm3 +.byte 15,56,201,229 + movups -64(%rcx),%xmm0 +.byte 102,15,56,220,209 + movdqa %xmm8,%xmm10 +.byte 69,15,58,204,193,2 +.byte 68,15,56,200,214 + movups -48(%rcx),%xmm1 +.byte 102,15,56,220,208 +.byte 15,56,202,222 + pxor %xmm6,%xmm4 +.byte 15,56,201,238 + movups -32(%rcx),%xmm0 +.byte 102,15,56,220,209 + movdqa %xmm8,%xmm9 +.byte 69,15,58,204,194,2 +.byte 68,15,56,200,203 + movups -16(%rcx),%xmm1 +.byte 102,15,56,220,208 +.byte 15,56,202,227 + pxor %xmm3,%xmm5 +.byte 15,56,201,243 + movups 0(%rcx),%xmm0 +.byte 102,15,56,220,209 + movdqa %xmm8,%xmm10 +.byte 69,15,58,204,193,2 +.byte 68,15,56,200,212 + movups 16(%rcx),%xmm1 +.byte 102,15,56,220,208 +.byte 15,56,202,236 + pxor %xmm4,%xmm6 +.byte 15,56,201,220 + movups 32(%rcx),%xmm0 +.byte 102,15,56,220,209 + movdqa %xmm8,%xmm9 +.byte 69,15,58,204,194,2 +.byte 68,15,56,200,205 + movups 48(%rcx),%xmm1 +.byte 102,15,56,220,208 +.byte 15,56,202,245 + pxor %xmm5,%xmm3 +.byte 15,56,201,229 + cmpl $11,%r11d + jb L$aesenclast13 + movups 64(%rcx),%xmm0 +.byte 102,15,56,220,209 + movups 80(%rcx),%xmm1 +.byte 102,15,56,220,208 + je L$aesenclast13 + movups 96(%rcx),%xmm0 +.byte 102,15,56,220,209 + movups 112(%rcx),%xmm1 +.byte 102,15,56,220,208 +L$aesenclast13: +.byte 102,15,56,221,209 + movups 16-112(%rcx),%xmm0 + movdqa %xmm8,%xmm10 +.byte 69,15,58,204,193,2 +.byte 68,15,56,200,214 + movups 48(%rdi),%xmm14 + xorps %xmm15,%xmm14 + movups %xmm2,32(%rsi,%rdi,1) + xorps %xmm14,%xmm2 + movups -80(%rcx),%xmm1 +.byte 102,15,56,220,208 +.byte 15,56,202,222 + pxor %xmm6,%xmm4 +.byte 15,56,201,238 + movups -64(%rcx),%xmm0 +.byte 102,15,56,220,209 + movdqa %xmm8,%xmm9 +.byte 69,15,58,204,194,3 +.byte 68,15,56,200,203 + movups -48(%rcx),%xmm1 +.byte 102,15,56,220,208 +.byte 15,56,202,227 + pxor %xmm3,%xmm5 +.byte 15,56,201,243 + movups -32(%rcx),%xmm0 +.byte 102,15,56,220,209 + movdqa %xmm8,%xmm10 +.byte 69,15,58,204,193,3 +.byte 68,15,56,200,212 +.byte 15,56,202,236 + pxor %xmm4,%xmm6 + movups -16(%rcx),%xmm1 +.byte 102,15,56,220,208 + movdqa %xmm8,%xmm9 +.byte 69,15,58,204,194,3 +.byte 68,15,56,200,205 +.byte 15,56,202,245 + movups 0(%rcx),%xmm0 +.byte 102,15,56,220,209 + movdqa %xmm12,%xmm5 + movdqa %xmm8,%xmm10 +.byte 69,15,58,204,193,3 +.byte 68,15,56,200,214 + movups 16(%rcx),%xmm1 +.byte 102,15,56,220,208 + movdqa %xmm8,%xmm9 +.byte 69,15,58,204,194,3 +.byte 68,15,56,200,205 + movups 32(%rcx),%xmm0 +.byte 102,15,56,220,209 + movups 48(%rcx),%xmm1 +.byte 102,15,56,220,208 + cmpl $11,%r11d + jb L$aesenclast14 + movups 64(%rcx),%xmm0 +.byte 102,15,56,220,209 + movups 80(%rcx),%xmm1 +.byte 102,15,56,220,208 + je L$aesenclast14 + movups 96(%rcx),%xmm0 +.byte 102,15,56,220,209 + movups 112(%rcx),%xmm1 +.byte 102,15,56,220,208 +L$aesenclast14: +.byte 102,15,56,221,209 + movups 16-112(%rcx),%xmm0 + decq %rdx + + paddd %xmm11,%xmm8 + movups %xmm2,48(%rsi,%rdi,1) + leaq 64(%rdi),%rdi + jnz L$oop_shaext + + pshufd $27,%xmm8,%xmm8 + pshufd $27,%xmm9,%xmm9 + movups %xmm2,(%r8) + movdqu %xmm8,(%r9) + movd %xmm9,16(%r9) + .byte 0xf3,0xc3 diff --git a/deps/openssl/asm/x64-macosx-gas/aes/aesni-sha256-x86_64.s b/deps/openssl/asm/x64-macosx-gas/aes/aesni-sha256-x86_64.s new file mode 100644 index 00000000000000..6c6685fba4f79e --- /dev/null +++ b/deps/openssl/asm/x64-macosx-gas/aes/aesni-sha256-x86_64.s @@ -0,0 +1,4356 @@ +.text + + +.globl _aesni_cbc_sha256_enc + +.p2align 4 +_aesni_cbc_sha256_enc: + leaq _OPENSSL_ia32cap_P(%rip),%r11 + movl $1,%eax + cmpq $0,%rdi + je L$probe + movl 0(%r11),%eax + movq 4(%r11),%r10 + btq $61,%r10 + jc aesni_cbc_sha256_enc_shaext + movq %r10,%r11 + shrq $32,%r11 + + testl $2048,%r10d + jnz aesni_cbc_sha256_enc_xop + andl $296,%r11d + cmpl $296,%r11d + je aesni_cbc_sha256_enc_avx2 + andl $1073741824,%eax + andl $268435968,%r10d + orl %eax,%r10d + cmpl $1342177792,%r10d + je aesni_cbc_sha256_enc_avx + ud2 + xorl %eax,%eax + cmpq $0,%rdi + je L$probe + ud2 +L$probe: + .byte 0xf3,0xc3 + + +.p2align 6 + +K256: +.long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 +.long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 +.long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 +.long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 +.long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 +.long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 +.long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 +.long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 +.long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc +.long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc +.long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da +.long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da +.long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 +.long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 +.long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967 +.long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967 +.long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 +.long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 +.long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 +.long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 +.long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 +.long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 +.long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070 +.long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070 +.long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 +.long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 +.long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 +.long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 +.long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 +.long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 +.long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 +.long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 + +.long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f +.long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f +.long 0,0,0,0, 0,0,0,0, -1,-1,-1,-1 +.long 0,0,0,0, 0,0,0,0 +.byte 65,69,83,78,73,45,67,66,67,43,83,72,65,50,53,54,32,115,116,105,116,99,104,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 +.p2align 6 + +.p2align 6 +aesni_cbc_sha256_enc_xop: +L$xop_shortcut: + movq 8(%rsp),%r10 + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + movq %rsp,%r11 + subq $128,%rsp + andq $-64,%rsp + + shlq $6,%rdx + subq %rdi,%rsi + subq %rdi,%r10 + addq %rdi,%rdx + + + movq %rsi,64+8(%rsp) + movq %rdx,64+16(%rsp) + + movq %r8,64+32(%rsp) + movq %r9,64+40(%rsp) + movq %r10,64+48(%rsp) + movq %r11,64+56(%rsp) +L$prologue_xop: + vzeroall + + movq %rdi,%r12 + leaq 128(%rcx),%rdi + leaq K256+544(%rip),%r13 + movl 240-128(%rdi),%r14d + movq %r9,%r15 + movq %r10,%rsi + vmovdqu (%r8),%xmm8 + subq $9,%r14 + + movl 0(%r15),%eax + movl 4(%r15),%ebx + movl 8(%r15),%ecx + movl 12(%r15),%edx + movl 16(%r15),%r8d + movl 20(%r15),%r9d + movl 24(%r15),%r10d + movl 28(%r15),%r11d + + vmovdqa 0(%r13,%r14,8),%xmm14 + vmovdqa 16(%r13,%r14,8),%xmm13 + vmovdqa 32(%r13,%r14,8),%xmm12 + vmovdqu 0-128(%rdi),%xmm10 + jmp L$loop_xop +.p2align 4 +L$loop_xop: + vmovdqa K256+512(%rip),%xmm7 + vmovdqu 0(%rsi,%r12,1),%xmm0 + vmovdqu 16(%rsi,%r12,1),%xmm1 + vmovdqu 32(%rsi,%r12,1),%xmm2 + vmovdqu 48(%rsi,%r12,1),%xmm3 + vpshufb %xmm7,%xmm0,%xmm0 + leaq K256(%rip),%rbp + vpshufb %xmm7,%xmm1,%xmm1 + vpshufb %xmm7,%xmm2,%xmm2 + vpaddd 0(%rbp),%xmm0,%xmm4 + vpshufb %xmm7,%xmm3,%xmm3 + vpaddd 32(%rbp),%xmm1,%xmm5 + vpaddd 64(%rbp),%xmm2,%xmm6 + vpaddd 96(%rbp),%xmm3,%xmm7 + vmovdqa %xmm4,0(%rsp) + movl %eax,%r14d + vmovdqa %xmm5,16(%rsp) + movl %ebx,%esi + vmovdqa %xmm6,32(%rsp) + xorl %ecx,%esi + vmovdqa %xmm7,48(%rsp) + movl %r8d,%r13d + jmp L$xop_00_47 + +.p2align 4 +L$xop_00_47: + subq $-32*4,%rbp + vmovdqu (%r12),%xmm9 + movq %r12,64+0(%rsp) + vpalignr $4,%xmm0,%xmm1,%xmm4 + rorl $14,%r13d + movl %r14d,%eax + vpalignr $4,%xmm2,%xmm3,%xmm7 + movl %r9d,%r12d + xorl %r8d,%r13d +.byte 143,232,120,194,236,14 + rorl $9,%r14d + xorl %r10d,%r12d + vpsrld $3,%xmm4,%xmm4 + rorl $5,%r13d + xorl %eax,%r14d + vpaddd %xmm7,%xmm0,%xmm0 + andl %r8d,%r12d + vpxor %xmm10,%xmm9,%xmm9 + vmovdqu 16-128(%rdi),%xmm10 + xorl %r8d,%r13d + addl 0(%rsp),%r11d + movl %eax,%r15d +.byte 143,232,120,194,245,11 + rorl $11,%r14d + xorl %r10d,%r12d + vpxor %xmm5,%xmm4,%xmm4 + xorl %ebx,%r15d + rorl $6,%r13d + addl %r12d,%r11d + andl %r15d,%esi +.byte 143,232,120,194,251,13 + xorl %eax,%r14d + addl %r13d,%r11d + vpxor %xmm6,%xmm4,%xmm4 + xorl %ebx,%esi + addl %r11d,%edx + vpsrld $10,%xmm3,%xmm6 + rorl $2,%r14d + addl %esi,%r11d + vpaddd %xmm4,%xmm0,%xmm0 + movl %edx,%r13d + addl %r11d,%r14d +.byte 143,232,120,194,239,2 + rorl $14,%r13d + movl %r14d,%r11d + vpxor %xmm6,%xmm7,%xmm7 + movl %r8d,%r12d + xorl %edx,%r13d + rorl $9,%r14d + xorl %r9d,%r12d + vpxor %xmm5,%xmm7,%xmm7 + rorl $5,%r13d + xorl %r11d,%r14d + andl %edx,%r12d + vpxor %xmm8,%xmm9,%xmm9 + xorl %edx,%r13d + vpsrldq $8,%xmm7,%xmm7 + addl 4(%rsp),%r10d + movl %r11d,%esi + rorl $11,%r14d + xorl %r9d,%r12d + vpaddd %xmm7,%xmm0,%xmm0 + xorl %eax,%esi + rorl $6,%r13d + addl %r12d,%r10d + andl %esi,%r15d +.byte 143,232,120,194,248,13 + xorl %r11d,%r14d + addl %r13d,%r10d + vpsrld $10,%xmm0,%xmm6 + xorl %eax,%r15d + addl %r10d,%ecx +.byte 143,232,120,194,239,2 + rorl $2,%r14d + addl %r15d,%r10d + vpxor %xmm6,%xmm7,%xmm7 + movl %ecx,%r13d + addl %r10d,%r14d + rorl $14,%r13d + movl %r14d,%r10d + vpxor %xmm5,%xmm7,%xmm7 + movl %edx,%r12d + xorl %ecx,%r13d + rorl $9,%r14d + xorl %r8d,%r12d + vpslldq $8,%xmm7,%xmm7 + rorl $5,%r13d + xorl %r10d,%r14d + andl %ecx,%r12d + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 32-128(%rdi),%xmm10 + xorl %ecx,%r13d + vpaddd %xmm7,%xmm0,%xmm0 + addl 8(%rsp),%r9d + movl %r10d,%r15d + rorl $11,%r14d + xorl %r8d,%r12d + vpaddd 0(%rbp),%xmm0,%xmm6 + xorl %r11d,%r15d + rorl $6,%r13d + addl %r12d,%r9d + andl %r15d,%esi + xorl %r10d,%r14d + addl %r13d,%r9d + xorl %r11d,%esi + addl %r9d,%ebx + rorl $2,%r14d + addl %esi,%r9d + movl %ebx,%r13d + addl %r9d,%r14d + rorl $14,%r13d + movl %r14d,%r9d + movl %ecx,%r12d + xorl %ebx,%r13d + rorl $9,%r14d + xorl %edx,%r12d + rorl $5,%r13d + xorl %r9d,%r14d + andl %ebx,%r12d + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 48-128(%rdi),%xmm10 + xorl %ebx,%r13d + addl 12(%rsp),%r8d + movl %r9d,%esi + rorl $11,%r14d + xorl %edx,%r12d + xorl %r10d,%esi + rorl $6,%r13d + addl %r12d,%r8d + andl %esi,%r15d + xorl %r9d,%r14d + addl %r13d,%r8d + xorl %r10d,%r15d + addl %r8d,%eax + rorl $2,%r14d + addl %r15d,%r8d + movl %eax,%r13d + addl %r8d,%r14d + vmovdqa %xmm6,0(%rsp) + vpalignr $4,%xmm1,%xmm2,%xmm4 + rorl $14,%r13d + movl %r14d,%r8d + vpalignr $4,%xmm3,%xmm0,%xmm7 + movl %ebx,%r12d + xorl %eax,%r13d +.byte 143,232,120,194,236,14 + rorl $9,%r14d + xorl %ecx,%r12d + vpsrld $3,%xmm4,%xmm4 + rorl $5,%r13d + xorl %r8d,%r14d + vpaddd %xmm7,%xmm1,%xmm1 + andl %eax,%r12d + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 64-128(%rdi),%xmm10 + xorl %eax,%r13d + addl 16(%rsp),%edx + movl %r8d,%r15d +.byte 143,232,120,194,245,11 + rorl $11,%r14d + xorl %ecx,%r12d + vpxor %xmm5,%xmm4,%xmm4 + xorl %r9d,%r15d + rorl $6,%r13d + addl %r12d,%edx + andl %r15d,%esi +.byte 143,232,120,194,248,13 + xorl %r8d,%r14d + addl %r13d,%edx + vpxor %xmm6,%xmm4,%xmm4 + xorl %r9d,%esi + addl %edx,%r11d + vpsrld $10,%xmm0,%xmm6 + rorl $2,%r14d + addl %esi,%edx + vpaddd %xmm4,%xmm1,%xmm1 + movl %r11d,%r13d + addl %edx,%r14d +.byte 143,232,120,194,239,2 + rorl $14,%r13d + movl %r14d,%edx + vpxor %xmm6,%xmm7,%xmm7 + movl %eax,%r12d + xorl %r11d,%r13d + rorl $9,%r14d + xorl %ebx,%r12d + vpxor %xmm5,%xmm7,%xmm7 + rorl $5,%r13d + xorl %edx,%r14d + andl %r11d,%r12d + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 80-128(%rdi),%xmm10 + xorl %r11d,%r13d + vpsrldq $8,%xmm7,%xmm7 + addl 20(%rsp),%ecx + movl %edx,%esi + rorl $11,%r14d + xorl %ebx,%r12d + vpaddd %xmm7,%xmm1,%xmm1 + xorl %r8d,%esi + rorl $6,%r13d + addl %r12d,%ecx + andl %esi,%r15d +.byte 143,232,120,194,249,13 + xorl %edx,%r14d + addl %r13d,%ecx + vpsrld $10,%xmm1,%xmm6 + xorl %r8d,%r15d + addl %ecx,%r10d +.byte 143,232,120,194,239,2 + rorl $2,%r14d + addl %r15d,%ecx + vpxor %xmm6,%xmm7,%xmm7 + movl %r10d,%r13d + addl %ecx,%r14d + rorl $14,%r13d + movl %r14d,%ecx + vpxor %xmm5,%xmm7,%xmm7 + movl %r11d,%r12d + xorl %r10d,%r13d + rorl $9,%r14d + xorl %eax,%r12d + vpslldq $8,%xmm7,%xmm7 + rorl $5,%r13d + xorl %ecx,%r14d + andl %r10d,%r12d + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 96-128(%rdi),%xmm10 + xorl %r10d,%r13d + vpaddd %xmm7,%xmm1,%xmm1 + addl 24(%rsp),%ebx + movl %ecx,%r15d + rorl $11,%r14d + xorl %eax,%r12d + vpaddd 32(%rbp),%xmm1,%xmm6 + xorl %edx,%r15d + rorl $6,%r13d + addl %r12d,%ebx + andl %r15d,%esi + xorl %ecx,%r14d + addl %r13d,%ebx + xorl %edx,%esi + addl %ebx,%r9d + rorl $2,%r14d + addl %esi,%ebx + movl %r9d,%r13d + addl %ebx,%r14d + rorl $14,%r13d + movl %r14d,%ebx + movl %r10d,%r12d + xorl %r9d,%r13d + rorl $9,%r14d + xorl %r11d,%r12d + rorl $5,%r13d + xorl %ebx,%r14d + andl %r9d,%r12d + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 112-128(%rdi),%xmm10 + xorl %r9d,%r13d + addl 28(%rsp),%eax + movl %ebx,%esi + rorl $11,%r14d + xorl %r11d,%r12d + xorl %ecx,%esi + rorl $6,%r13d + addl %r12d,%eax + andl %esi,%r15d + xorl %ebx,%r14d + addl %r13d,%eax + xorl %ecx,%r15d + addl %eax,%r8d + rorl $2,%r14d + addl %r15d,%eax + movl %r8d,%r13d + addl %eax,%r14d + vmovdqa %xmm6,16(%rsp) + vpalignr $4,%xmm2,%xmm3,%xmm4 + rorl $14,%r13d + movl %r14d,%eax + vpalignr $4,%xmm0,%xmm1,%xmm7 + movl %r9d,%r12d + xorl %r8d,%r13d +.byte 143,232,120,194,236,14 + rorl $9,%r14d + xorl %r10d,%r12d + vpsrld $3,%xmm4,%xmm4 + rorl $5,%r13d + xorl %eax,%r14d + vpaddd %xmm7,%xmm2,%xmm2 + andl %r8d,%r12d + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 128-128(%rdi),%xmm10 + xorl %r8d,%r13d + addl 32(%rsp),%r11d + movl %eax,%r15d +.byte 143,232,120,194,245,11 + rorl $11,%r14d + xorl %r10d,%r12d + vpxor %xmm5,%xmm4,%xmm4 + xorl %ebx,%r15d + rorl $6,%r13d + addl %r12d,%r11d + andl %r15d,%esi +.byte 143,232,120,194,249,13 + xorl %eax,%r14d + addl %r13d,%r11d + vpxor %xmm6,%xmm4,%xmm4 + xorl %ebx,%esi + addl %r11d,%edx + vpsrld $10,%xmm1,%xmm6 + rorl $2,%r14d + addl %esi,%r11d + vpaddd %xmm4,%xmm2,%xmm2 + movl %edx,%r13d + addl %r11d,%r14d +.byte 143,232,120,194,239,2 + rorl $14,%r13d + movl %r14d,%r11d + vpxor %xmm6,%xmm7,%xmm7 + movl %r8d,%r12d + xorl %edx,%r13d + rorl $9,%r14d + xorl %r9d,%r12d + vpxor %xmm5,%xmm7,%xmm7 + rorl $5,%r13d + xorl %r11d,%r14d + andl %edx,%r12d + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 144-128(%rdi),%xmm10 + xorl %edx,%r13d + vpsrldq $8,%xmm7,%xmm7 + addl 36(%rsp),%r10d + movl %r11d,%esi + rorl $11,%r14d + xorl %r9d,%r12d + vpaddd %xmm7,%xmm2,%xmm2 + xorl %eax,%esi + rorl $6,%r13d + addl %r12d,%r10d + andl %esi,%r15d +.byte 143,232,120,194,250,13 + xorl %r11d,%r14d + addl %r13d,%r10d + vpsrld $10,%xmm2,%xmm6 + xorl %eax,%r15d + addl %r10d,%ecx +.byte 143,232,120,194,239,2 + rorl $2,%r14d + addl %r15d,%r10d + vpxor %xmm6,%xmm7,%xmm7 + movl %ecx,%r13d + addl %r10d,%r14d + rorl $14,%r13d + movl %r14d,%r10d + vpxor %xmm5,%xmm7,%xmm7 + movl %edx,%r12d + xorl %ecx,%r13d + rorl $9,%r14d + xorl %r8d,%r12d + vpslldq $8,%xmm7,%xmm7 + rorl $5,%r13d + xorl %r10d,%r14d + andl %ecx,%r12d + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 160-128(%rdi),%xmm10 + xorl %ecx,%r13d + vpaddd %xmm7,%xmm2,%xmm2 + addl 40(%rsp),%r9d + movl %r10d,%r15d + rorl $11,%r14d + xorl %r8d,%r12d + vpaddd 64(%rbp),%xmm2,%xmm6 + xorl %r11d,%r15d + rorl $6,%r13d + addl %r12d,%r9d + andl %r15d,%esi + xorl %r10d,%r14d + addl %r13d,%r9d + xorl %r11d,%esi + addl %r9d,%ebx + rorl $2,%r14d + addl %esi,%r9d + movl %ebx,%r13d + addl %r9d,%r14d + rorl $14,%r13d + movl %r14d,%r9d + movl %ecx,%r12d + xorl %ebx,%r13d + rorl $9,%r14d + xorl %edx,%r12d + rorl $5,%r13d + xorl %r9d,%r14d + andl %ebx,%r12d + vaesenclast %xmm10,%xmm9,%xmm11 + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 176-128(%rdi),%xmm10 + xorl %ebx,%r13d + addl 44(%rsp),%r8d + movl %r9d,%esi + rorl $11,%r14d + xorl %edx,%r12d + xorl %r10d,%esi + rorl $6,%r13d + addl %r12d,%r8d + andl %esi,%r15d + xorl %r9d,%r14d + addl %r13d,%r8d + xorl %r10d,%r15d + addl %r8d,%eax + rorl $2,%r14d + addl %r15d,%r8d + movl %eax,%r13d + addl %r8d,%r14d + vmovdqa %xmm6,32(%rsp) + vpalignr $4,%xmm3,%xmm0,%xmm4 + rorl $14,%r13d + movl %r14d,%r8d + vpalignr $4,%xmm1,%xmm2,%xmm7 + movl %ebx,%r12d + xorl %eax,%r13d +.byte 143,232,120,194,236,14 + rorl $9,%r14d + xorl %ecx,%r12d + vpsrld $3,%xmm4,%xmm4 + rorl $5,%r13d + xorl %r8d,%r14d + vpaddd %xmm7,%xmm3,%xmm3 + andl %eax,%r12d + vpand %xmm12,%xmm11,%xmm8 + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 192-128(%rdi),%xmm10 + xorl %eax,%r13d + addl 48(%rsp),%edx + movl %r8d,%r15d +.byte 143,232,120,194,245,11 + rorl $11,%r14d + xorl %ecx,%r12d + vpxor %xmm5,%xmm4,%xmm4 + xorl %r9d,%r15d + rorl $6,%r13d + addl %r12d,%edx + andl %r15d,%esi +.byte 143,232,120,194,250,13 + xorl %r8d,%r14d + addl %r13d,%edx + vpxor %xmm6,%xmm4,%xmm4 + xorl %r9d,%esi + addl %edx,%r11d + vpsrld $10,%xmm2,%xmm6 + rorl $2,%r14d + addl %esi,%edx + vpaddd %xmm4,%xmm3,%xmm3 + movl %r11d,%r13d + addl %edx,%r14d +.byte 143,232,120,194,239,2 + rorl $14,%r13d + movl %r14d,%edx + vpxor %xmm6,%xmm7,%xmm7 + movl %eax,%r12d + xorl %r11d,%r13d + rorl $9,%r14d + xorl %ebx,%r12d + vpxor %xmm5,%xmm7,%xmm7 + rorl $5,%r13d + xorl %edx,%r14d + andl %r11d,%r12d + vaesenclast %xmm10,%xmm9,%xmm11 + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 208-128(%rdi),%xmm10 + xorl %r11d,%r13d + vpsrldq $8,%xmm7,%xmm7 + addl 52(%rsp),%ecx + movl %edx,%esi + rorl $11,%r14d + xorl %ebx,%r12d + vpaddd %xmm7,%xmm3,%xmm3 + xorl %r8d,%esi + rorl $6,%r13d + addl %r12d,%ecx + andl %esi,%r15d +.byte 143,232,120,194,251,13 + xorl %edx,%r14d + addl %r13d,%ecx + vpsrld $10,%xmm3,%xmm6 + xorl %r8d,%r15d + addl %ecx,%r10d +.byte 143,232,120,194,239,2 + rorl $2,%r14d + addl %r15d,%ecx + vpxor %xmm6,%xmm7,%xmm7 + movl %r10d,%r13d + addl %ecx,%r14d + rorl $14,%r13d + movl %r14d,%ecx + vpxor %xmm5,%xmm7,%xmm7 + movl %r11d,%r12d + xorl %r10d,%r13d + rorl $9,%r14d + xorl %eax,%r12d + vpslldq $8,%xmm7,%xmm7 + rorl $5,%r13d + xorl %ecx,%r14d + andl %r10d,%r12d + vpand %xmm13,%xmm11,%xmm11 + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 224-128(%rdi),%xmm10 + xorl %r10d,%r13d + vpaddd %xmm7,%xmm3,%xmm3 + addl 56(%rsp),%ebx + movl %ecx,%r15d + rorl $11,%r14d + xorl %eax,%r12d + vpaddd 96(%rbp),%xmm3,%xmm6 + xorl %edx,%r15d + rorl $6,%r13d + addl %r12d,%ebx + andl %r15d,%esi + xorl %ecx,%r14d + addl %r13d,%ebx + xorl %edx,%esi + addl %ebx,%r9d + rorl $2,%r14d + addl %esi,%ebx + movl %r9d,%r13d + addl %ebx,%r14d + rorl $14,%r13d + movl %r14d,%ebx + movl %r10d,%r12d + xorl %r9d,%r13d + rorl $9,%r14d + xorl %r11d,%r12d + rorl $5,%r13d + xorl %ebx,%r14d + andl %r9d,%r12d + vpor %xmm11,%xmm8,%xmm8 + vaesenclast %xmm10,%xmm9,%xmm11 + vmovdqu 0-128(%rdi),%xmm10 + xorl %r9d,%r13d + addl 60(%rsp),%eax + movl %ebx,%esi + rorl $11,%r14d + xorl %r11d,%r12d + xorl %ecx,%esi + rorl $6,%r13d + addl %r12d,%eax + andl %esi,%r15d + xorl %ebx,%r14d + addl %r13d,%eax + xorl %ecx,%r15d + addl %eax,%r8d + rorl $2,%r14d + addl %r15d,%eax + movl %r8d,%r13d + addl %eax,%r14d + vmovdqa %xmm6,48(%rsp) + movq 64+0(%rsp),%r12 + vpand %xmm14,%xmm11,%xmm11 + movq 64+8(%rsp),%r15 + vpor %xmm11,%xmm8,%xmm8 + vmovdqu %xmm8,(%r15,%r12,1) + leaq 16(%r12),%r12 + cmpb $0,131(%rbp) + jne L$xop_00_47 + vmovdqu (%r12),%xmm9 + movq %r12,64+0(%rsp) + rorl $14,%r13d + movl %r14d,%eax + movl %r9d,%r12d + xorl %r8d,%r13d + rorl $9,%r14d + xorl %r10d,%r12d + rorl $5,%r13d + xorl %eax,%r14d + andl %r8d,%r12d + vpxor %xmm10,%xmm9,%xmm9 + vmovdqu 16-128(%rdi),%xmm10 + xorl %r8d,%r13d + addl 0(%rsp),%r11d + movl %eax,%r15d + rorl $11,%r14d + xorl %r10d,%r12d + xorl %ebx,%r15d + rorl $6,%r13d + addl %r12d,%r11d + andl %r15d,%esi + xorl %eax,%r14d + addl %r13d,%r11d + xorl %ebx,%esi + addl %r11d,%edx + rorl $2,%r14d + addl %esi,%r11d + movl %edx,%r13d + addl %r11d,%r14d + rorl $14,%r13d + movl %r14d,%r11d + movl %r8d,%r12d + xorl %edx,%r13d + rorl $9,%r14d + xorl %r9d,%r12d + rorl $5,%r13d + xorl %r11d,%r14d + andl %edx,%r12d + vpxor %xmm8,%xmm9,%xmm9 + xorl %edx,%r13d + addl 4(%rsp),%r10d + movl %r11d,%esi + rorl $11,%r14d + xorl %r9d,%r12d + xorl %eax,%esi + rorl $6,%r13d + addl %r12d,%r10d + andl %esi,%r15d + xorl %r11d,%r14d + addl %r13d,%r10d + xorl %eax,%r15d + addl %r10d,%ecx + rorl $2,%r14d + addl %r15d,%r10d + movl %ecx,%r13d + addl %r10d,%r14d + rorl $14,%r13d + movl %r14d,%r10d + movl %edx,%r12d + xorl %ecx,%r13d + rorl $9,%r14d + xorl %r8d,%r12d + rorl $5,%r13d + xorl %r10d,%r14d + andl %ecx,%r12d + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 32-128(%rdi),%xmm10 + xorl %ecx,%r13d + addl 8(%rsp),%r9d + movl %r10d,%r15d + rorl $11,%r14d + xorl %r8d,%r12d + xorl %r11d,%r15d + rorl $6,%r13d + addl %r12d,%r9d + andl %r15d,%esi + xorl %r10d,%r14d + addl %r13d,%r9d + xorl %r11d,%esi + addl %r9d,%ebx + rorl $2,%r14d + addl %esi,%r9d + movl %ebx,%r13d + addl %r9d,%r14d + rorl $14,%r13d + movl %r14d,%r9d + movl %ecx,%r12d + xorl %ebx,%r13d + rorl $9,%r14d + xorl %edx,%r12d + rorl $5,%r13d + xorl %r9d,%r14d + andl %ebx,%r12d + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 48-128(%rdi),%xmm10 + xorl %ebx,%r13d + addl 12(%rsp),%r8d + movl %r9d,%esi + rorl $11,%r14d + xorl %edx,%r12d + xorl %r10d,%esi + rorl $6,%r13d + addl %r12d,%r8d + andl %esi,%r15d + xorl %r9d,%r14d + addl %r13d,%r8d + xorl %r10d,%r15d + addl %r8d,%eax + rorl $2,%r14d + addl %r15d,%r8d + movl %eax,%r13d + addl %r8d,%r14d + rorl $14,%r13d + movl %r14d,%r8d + movl %ebx,%r12d + xorl %eax,%r13d + rorl $9,%r14d + xorl %ecx,%r12d + rorl $5,%r13d + xorl %r8d,%r14d + andl %eax,%r12d + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 64-128(%rdi),%xmm10 + xorl %eax,%r13d + addl 16(%rsp),%edx + movl %r8d,%r15d + rorl $11,%r14d + xorl %ecx,%r12d + xorl %r9d,%r15d + rorl $6,%r13d + addl %r12d,%edx + andl %r15d,%esi + xorl %r8d,%r14d + addl %r13d,%edx + xorl %r9d,%esi + addl %edx,%r11d + rorl $2,%r14d + addl %esi,%edx + movl %r11d,%r13d + addl %edx,%r14d + rorl $14,%r13d + movl %r14d,%edx + movl %eax,%r12d + xorl %r11d,%r13d + rorl $9,%r14d + xorl %ebx,%r12d + rorl $5,%r13d + xorl %edx,%r14d + andl %r11d,%r12d + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 80-128(%rdi),%xmm10 + xorl %r11d,%r13d + addl 20(%rsp),%ecx + movl %edx,%esi + rorl $11,%r14d + xorl %ebx,%r12d + xorl %r8d,%esi + rorl $6,%r13d + addl %r12d,%ecx + andl %esi,%r15d + xorl %edx,%r14d + addl %r13d,%ecx + xorl %r8d,%r15d + addl %ecx,%r10d + rorl $2,%r14d + addl %r15d,%ecx + movl %r10d,%r13d + addl %ecx,%r14d + rorl $14,%r13d + movl %r14d,%ecx + movl %r11d,%r12d + xorl %r10d,%r13d + rorl $9,%r14d + xorl %eax,%r12d + rorl $5,%r13d + xorl %ecx,%r14d + andl %r10d,%r12d + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 96-128(%rdi),%xmm10 + xorl %r10d,%r13d + addl 24(%rsp),%ebx + movl %ecx,%r15d + rorl $11,%r14d + xorl %eax,%r12d + xorl %edx,%r15d + rorl $6,%r13d + addl %r12d,%ebx + andl %r15d,%esi + xorl %ecx,%r14d + addl %r13d,%ebx + xorl %edx,%esi + addl %ebx,%r9d + rorl $2,%r14d + addl %esi,%ebx + movl %r9d,%r13d + addl %ebx,%r14d + rorl $14,%r13d + movl %r14d,%ebx + movl %r10d,%r12d + xorl %r9d,%r13d + rorl $9,%r14d + xorl %r11d,%r12d + rorl $5,%r13d + xorl %ebx,%r14d + andl %r9d,%r12d + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 112-128(%rdi),%xmm10 + xorl %r9d,%r13d + addl 28(%rsp),%eax + movl %ebx,%esi + rorl $11,%r14d + xorl %r11d,%r12d + xorl %ecx,%esi + rorl $6,%r13d + addl %r12d,%eax + andl %esi,%r15d + xorl %ebx,%r14d + addl %r13d,%eax + xorl %ecx,%r15d + addl %eax,%r8d + rorl $2,%r14d + addl %r15d,%eax + movl %r8d,%r13d + addl %eax,%r14d + rorl $14,%r13d + movl %r14d,%eax + movl %r9d,%r12d + xorl %r8d,%r13d + rorl $9,%r14d + xorl %r10d,%r12d + rorl $5,%r13d + xorl %eax,%r14d + andl %r8d,%r12d + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 128-128(%rdi),%xmm10 + xorl %r8d,%r13d + addl 32(%rsp),%r11d + movl %eax,%r15d + rorl $11,%r14d + xorl %r10d,%r12d + xorl %ebx,%r15d + rorl $6,%r13d + addl %r12d,%r11d + andl %r15d,%esi + xorl %eax,%r14d + addl %r13d,%r11d + xorl %ebx,%esi + addl %r11d,%edx + rorl $2,%r14d + addl %esi,%r11d + movl %edx,%r13d + addl %r11d,%r14d + rorl $14,%r13d + movl %r14d,%r11d + movl %r8d,%r12d + xorl %edx,%r13d + rorl $9,%r14d + xorl %r9d,%r12d + rorl $5,%r13d + xorl %r11d,%r14d + andl %edx,%r12d + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 144-128(%rdi),%xmm10 + xorl %edx,%r13d + addl 36(%rsp),%r10d + movl %r11d,%esi + rorl $11,%r14d + xorl %r9d,%r12d + xorl %eax,%esi + rorl $6,%r13d + addl %r12d,%r10d + andl %esi,%r15d + xorl %r11d,%r14d + addl %r13d,%r10d + xorl %eax,%r15d + addl %r10d,%ecx + rorl $2,%r14d + addl %r15d,%r10d + movl %ecx,%r13d + addl %r10d,%r14d + rorl $14,%r13d + movl %r14d,%r10d + movl %edx,%r12d + xorl %ecx,%r13d + rorl $9,%r14d + xorl %r8d,%r12d + rorl $5,%r13d + xorl %r10d,%r14d + andl %ecx,%r12d + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 160-128(%rdi),%xmm10 + xorl %ecx,%r13d + addl 40(%rsp),%r9d + movl %r10d,%r15d + rorl $11,%r14d + xorl %r8d,%r12d + xorl %r11d,%r15d + rorl $6,%r13d + addl %r12d,%r9d + andl %r15d,%esi + xorl %r10d,%r14d + addl %r13d,%r9d + xorl %r11d,%esi + addl %r9d,%ebx + rorl $2,%r14d + addl %esi,%r9d + movl %ebx,%r13d + addl %r9d,%r14d + rorl $14,%r13d + movl %r14d,%r9d + movl %ecx,%r12d + xorl %ebx,%r13d + rorl $9,%r14d + xorl %edx,%r12d + rorl $5,%r13d + xorl %r9d,%r14d + andl %ebx,%r12d + vaesenclast %xmm10,%xmm9,%xmm11 + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 176-128(%rdi),%xmm10 + xorl %ebx,%r13d + addl 44(%rsp),%r8d + movl %r9d,%esi + rorl $11,%r14d + xorl %edx,%r12d + xorl %r10d,%esi + rorl $6,%r13d + addl %r12d,%r8d + andl %esi,%r15d + xorl %r9d,%r14d + addl %r13d,%r8d + xorl %r10d,%r15d + addl %r8d,%eax + rorl $2,%r14d + addl %r15d,%r8d + movl %eax,%r13d + addl %r8d,%r14d + rorl $14,%r13d + movl %r14d,%r8d + movl %ebx,%r12d + xorl %eax,%r13d + rorl $9,%r14d + xorl %ecx,%r12d + rorl $5,%r13d + xorl %r8d,%r14d + andl %eax,%r12d + vpand %xmm12,%xmm11,%xmm8 + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 192-128(%rdi),%xmm10 + xorl %eax,%r13d + addl 48(%rsp),%edx + movl %r8d,%r15d + rorl $11,%r14d + xorl %ecx,%r12d + xorl %r9d,%r15d + rorl $6,%r13d + addl %r12d,%edx + andl %r15d,%esi + xorl %r8d,%r14d + addl %r13d,%edx + xorl %r9d,%esi + addl %edx,%r11d + rorl $2,%r14d + addl %esi,%edx + movl %r11d,%r13d + addl %edx,%r14d + rorl $14,%r13d + movl %r14d,%edx + movl %eax,%r12d + xorl %r11d,%r13d + rorl $9,%r14d + xorl %ebx,%r12d + rorl $5,%r13d + xorl %edx,%r14d + andl %r11d,%r12d + vaesenclast %xmm10,%xmm9,%xmm11 + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 208-128(%rdi),%xmm10 + xorl %r11d,%r13d + addl 52(%rsp),%ecx + movl %edx,%esi + rorl $11,%r14d + xorl %ebx,%r12d + xorl %r8d,%esi + rorl $6,%r13d + addl %r12d,%ecx + andl %esi,%r15d + xorl %edx,%r14d + addl %r13d,%ecx + xorl %r8d,%r15d + addl %ecx,%r10d + rorl $2,%r14d + addl %r15d,%ecx + movl %r10d,%r13d + addl %ecx,%r14d + rorl $14,%r13d + movl %r14d,%ecx + movl %r11d,%r12d + xorl %r10d,%r13d + rorl $9,%r14d + xorl %eax,%r12d + rorl $5,%r13d + xorl %ecx,%r14d + andl %r10d,%r12d + vpand %xmm13,%xmm11,%xmm11 + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 224-128(%rdi),%xmm10 + xorl %r10d,%r13d + addl 56(%rsp),%ebx + movl %ecx,%r15d + rorl $11,%r14d + xorl %eax,%r12d + xorl %edx,%r15d + rorl $6,%r13d + addl %r12d,%ebx + andl %r15d,%esi + xorl %ecx,%r14d + addl %r13d,%ebx + xorl %edx,%esi + addl %ebx,%r9d + rorl $2,%r14d + addl %esi,%ebx + movl %r9d,%r13d + addl %ebx,%r14d + rorl $14,%r13d + movl %r14d,%ebx + movl %r10d,%r12d + xorl %r9d,%r13d + rorl $9,%r14d + xorl %r11d,%r12d + rorl $5,%r13d + xorl %ebx,%r14d + andl %r9d,%r12d + vpor %xmm11,%xmm8,%xmm8 + vaesenclast %xmm10,%xmm9,%xmm11 + vmovdqu 0-128(%rdi),%xmm10 + xorl %r9d,%r13d + addl 60(%rsp),%eax + movl %ebx,%esi + rorl $11,%r14d + xorl %r11d,%r12d + xorl %ecx,%esi + rorl $6,%r13d + addl %r12d,%eax + andl %esi,%r15d + xorl %ebx,%r14d + addl %r13d,%eax + xorl %ecx,%r15d + addl %eax,%r8d + rorl $2,%r14d + addl %r15d,%eax + movl %r8d,%r13d + addl %eax,%r14d + movq 64+0(%rsp),%r12 + movq 64+8(%rsp),%r13 + movq 64+40(%rsp),%r15 + movq 64+48(%rsp),%rsi + + vpand %xmm14,%xmm11,%xmm11 + movl %r14d,%eax + vpor %xmm11,%xmm8,%xmm8 + vmovdqu %xmm8,(%r12,%r13,1) + leaq 16(%r12),%r12 + + addl 0(%r15),%eax + addl 4(%r15),%ebx + addl 8(%r15),%ecx + addl 12(%r15),%edx + addl 16(%r15),%r8d + addl 20(%r15),%r9d + addl 24(%r15),%r10d + addl 28(%r15),%r11d + + cmpq 64+16(%rsp),%r12 + + movl %eax,0(%r15) + movl %ebx,4(%r15) + movl %ecx,8(%r15) + movl %edx,12(%r15) + movl %r8d,16(%r15) + movl %r9d,20(%r15) + movl %r10d,24(%r15) + movl %r11d,28(%r15) + + jb L$loop_xop + + movq 64+32(%rsp),%r8 + movq 64+56(%rsp),%rsi + vmovdqu %xmm8,(%r8) + vzeroall + movq (%rsi),%r15 + movq 8(%rsi),%r14 + movq 16(%rsi),%r13 + movq 24(%rsi),%r12 + movq 32(%rsi),%rbp + movq 40(%rsi),%rbx + leaq 48(%rsi),%rsp +L$epilogue_xop: + .byte 0xf3,0xc3 + + +.p2align 6 +aesni_cbc_sha256_enc_avx: +L$avx_shortcut: + movq 8(%rsp),%r10 + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + movq %rsp,%r11 + subq $128,%rsp + andq $-64,%rsp + + shlq $6,%rdx + subq %rdi,%rsi + subq %rdi,%r10 + addq %rdi,%rdx + + + movq %rsi,64+8(%rsp) + movq %rdx,64+16(%rsp) + + movq %r8,64+32(%rsp) + movq %r9,64+40(%rsp) + movq %r10,64+48(%rsp) + movq %r11,64+56(%rsp) +L$prologue_avx: + vzeroall + + movq %rdi,%r12 + leaq 128(%rcx),%rdi + leaq K256+544(%rip),%r13 + movl 240-128(%rdi),%r14d + movq %r9,%r15 + movq %r10,%rsi + vmovdqu (%r8),%xmm8 + subq $9,%r14 + + movl 0(%r15),%eax + movl 4(%r15),%ebx + movl 8(%r15),%ecx + movl 12(%r15),%edx + movl 16(%r15),%r8d + movl 20(%r15),%r9d + movl 24(%r15),%r10d + movl 28(%r15),%r11d + + vmovdqa 0(%r13,%r14,8),%xmm14 + vmovdqa 16(%r13,%r14,8),%xmm13 + vmovdqa 32(%r13,%r14,8),%xmm12 + vmovdqu 0-128(%rdi),%xmm10 + jmp L$loop_avx +.p2align 4 +L$loop_avx: + vmovdqa K256+512(%rip),%xmm7 + vmovdqu 0(%rsi,%r12,1),%xmm0 + vmovdqu 16(%rsi,%r12,1),%xmm1 + vmovdqu 32(%rsi,%r12,1),%xmm2 + vmovdqu 48(%rsi,%r12,1),%xmm3 + vpshufb %xmm7,%xmm0,%xmm0 + leaq K256(%rip),%rbp + vpshufb %xmm7,%xmm1,%xmm1 + vpshufb %xmm7,%xmm2,%xmm2 + vpaddd 0(%rbp),%xmm0,%xmm4 + vpshufb %xmm7,%xmm3,%xmm3 + vpaddd 32(%rbp),%xmm1,%xmm5 + vpaddd 64(%rbp),%xmm2,%xmm6 + vpaddd 96(%rbp),%xmm3,%xmm7 + vmovdqa %xmm4,0(%rsp) + movl %eax,%r14d + vmovdqa %xmm5,16(%rsp) + movl %ebx,%esi + vmovdqa %xmm6,32(%rsp) + xorl %ecx,%esi + vmovdqa %xmm7,48(%rsp) + movl %r8d,%r13d + jmp L$avx_00_47 + +.p2align 4 +L$avx_00_47: + subq $-32*4,%rbp + vmovdqu (%r12),%xmm9 + movq %r12,64+0(%rsp) + vpalignr $4,%xmm0,%xmm1,%xmm4 + shrdl $14,%r13d,%r13d + movl %r14d,%eax + movl %r9d,%r12d + vpalignr $4,%xmm2,%xmm3,%xmm7 + xorl %r8d,%r13d + shrdl $9,%r14d,%r14d + xorl %r10d,%r12d + vpsrld $7,%xmm4,%xmm6 + shrdl $5,%r13d,%r13d + xorl %eax,%r14d + andl %r8d,%r12d + vpaddd %xmm7,%xmm0,%xmm0 + vpxor %xmm10,%xmm9,%xmm9 + vmovdqu 16-128(%rdi),%xmm10 + xorl %r8d,%r13d + addl 0(%rsp),%r11d + movl %eax,%r15d + vpsrld $3,%xmm4,%xmm7 + shrdl $11,%r14d,%r14d + xorl %r10d,%r12d + xorl %ebx,%r15d + vpslld $14,%xmm4,%xmm5 + shrdl $6,%r13d,%r13d + addl %r12d,%r11d + andl %r15d,%esi + vpxor %xmm6,%xmm7,%xmm4 + xorl %eax,%r14d + addl %r13d,%r11d + xorl %ebx,%esi + vpshufd $250,%xmm3,%xmm7 + addl %r11d,%edx + shrdl $2,%r14d,%r14d + addl %esi,%r11d + vpsrld $11,%xmm6,%xmm6 + movl %edx,%r13d + addl %r11d,%r14d + shrdl $14,%r13d,%r13d + vpxor %xmm5,%xmm4,%xmm4 + movl %r14d,%r11d + movl %r8d,%r12d + xorl %edx,%r13d + vpslld $11,%xmm5,%xmm5 + shrdl $9,%r14d,%r14d + xorl %r9d,%r12d + shrdl $5,%r13d,%r13d + vpxor %xmm6,%xmm4,%xmm4 + xorl %r11d,%r14d + andl %edx,%r12d + vpxor %xmm8,%xmm9,%xmm9 + xorl %edx,%r13d + vpsrld $10,%xmm7,%xmm6 + addl 4(%rsp),%r10d + movl %r11d,%esi + shrdl $11,%r14d,%r14d + vpxor %xmm5,%xmm4,%xmm4 + xorl %r9d,%r12d + xorl %eax,%esi + shrdl $6,%r13d,%r13d + vpsrlq $17,%xmm7,%xmm7 + addl %r12d,%r10d + andl %esi,%r15d + xorl %r11d,%r14d + vpaddd %xmm4,%xmm0,%xmm0 + addl %r13d,%r10d + xorl %eax,%r15d + addl %r10d,%ecx + vpxor %xmm7,%xmm6,%xmm6 + shrdl $2,%r14d,%r14d + addl %r15d,%r10d + movl %ecx,%r13d + vpsrlq $2,%xmm7,%xmm7 + addl %r10d,%r14d + shrdl $14,%r13d,%r13d + movl %r14d,%r10d + vpxor %xmm7,%xmm6,%xmm6 + movl %edx,%r12d + xorl %ecx,%r13d + shrdl $9,%r14d,%r14d + vpshufd $132,%xmm6,%xmm6 + xorl %r8d,%r12d + shrdl $5,%r13d,%r13d + xorl %r10d,%r14d + vpsrldq $8,%xmm6,%xmm6 + andl %ecx,%r12d + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 32-128(%rdi),%xmm10 + xorl %ecx,%r13d + addl 8(%rsp),%r9d + vpaddd %xmm6,%xmm0,%xmm0 + movl %r10d,%r15d + shrdl $11,%r14d,%r14d + xorl %r8d,%r12d + vpshufd $80,%xmm0,%xmm7 + xorl %r11d,%r15d + shrdl $6,%r13d,%r13d + addl %r12d,%r9d + vpsrld $10,%xmm7,%xmm6 + andl %r15d,%esi + xorl %r10d,%r14d + addl %r13d,%r9d + vpsrlq $17,%xmm7,%xmm7 + xorl %r11d,%esi + addl %r9d,%ebx + shrdl $2,%r14d,%r14d + vpxor %xmm7,%xmm6,%xmm6 + addl %esi,%r9d + movl %ebx,%r13d + addl %r9d,%r14d + vpsrlq $2,%xmm7,%xmm7 + shrdl $14,%r13d,%r13d + movl %r14d,%r9d + movl %ecx,%r12d + vpxor %xmm7,%xmm6,%xmm6 + xorl %ebx,%r13d + shrdl $9,%r14d,%r14d + xorl %edx,%r12d + vpshufd $232,%xmm6,%xmm6 + shrdl $5,%r13d,%r13d + xorl %r9d,%r14d + andl %ebx,%r12d + vpslldq $8,%xmm6,%xmm6 + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 48-128(%rdi),%xmm10 + xorl %ebx,%r13d + addl 12(%rsp),%r8d + movl %r9d,%esi + vpaddd %xmm6,%xmm0,%xmm0 + shrdl $11,%r14d,%r14d + xorl %edx,%r12d + xorl %r10d,%esi + vpaddd 0(%rbp),%xmm0,%xmm6 + shrdl $6,%r13d,%r13d + addl %r12d,%r8d + andl %esi,%r15d + xorl %r9d,%r14d + addl %r13d,%r8d + xorl %r10d,%r15d + addl %r8d,%eax + shrdl $2,%r14d,%r14d + addl %r15d,%r8d + movl %eax,%r13d + addl %r8d,%r14d + vmovdqa %xmm6,0(%rsp) + vpalignr $4,%xmm1,%xmm2,%xmm4 + shrdl $14,%r13d,%r13d + movl %r14d,%r8d + movl %ebx,%r12d + vpalignr $4,%xmm3,%xmm0,%xmm7 + xorl %eax,%r13d + shrdl $9,%r14d,%r14d + xorl %ecx,%r12d + vpsrld $7,%xmm4,%xmm6 + shrdl $5,%r13d,%r13d + xorl %r8d,%r14d + andl %eax,%r12d + vpaddd %xmm7,%xmm1,%xmm1 + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 64-128(%rdi),%xmm10 + xorl %eax,%r13d + addl 16(%rsp),%edx + movl %r8d,%r15d + vpsrld $3,%xmm4,%xmm7 + shrdl $11,%r14d,%r14d + xorl %ecx,%r12d + xorl %r9d,%r15d + vpslld $14,%xmm4,%xmm5 + shrdl $6,%r13d,%r13d + addl %r12d,%edx + andl %r15d,%esi + vpxor %xmm6,%xmm7,%xmm4 + xorl %r8d,%r14d + addl %r13d,%edx + xorl %r9d,%esi + vpshufd $250,%xmm0,%xmm7 + addl %edx,%r11d + shrdl $2,%r14d,%r14d + addl %esi,%edx + vpsrld $11,%xmm6,%xmm6 + movl %r11d,%r13d + addl %edx,%r14d + shrdl $14,%r13d,%r13d + vpxor %xmm5,%xmm4,%xmm4 + movl %r14d,%edx + movl %eax,%r12d + xorl %r11d,%r13d + vpslld $11,%xmm5,%xmm5 + shrdl $9,%r14d,%r14d + xorl %ebx,%r12d + shrdl $5,%r13d,%r13d + vpxor %xmm6,%xmm4,%xmm4 + xorl %edx,%r14d + andl %r11d,%r12d + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 80-128(%rdi),%xmm10 + xorl %r11d,%r13d + vpsrld $10,%xmm7,%xmm6 + addl 20(%rsp),%ecx + movl %edx,%esi + shrdl $11,%r14d,%r14d + vpxor %xmm5,%xmm4,%xmm4 + xorl %ebx,%r12d + xorl %r8d,%esi + shrdl $6,%r13d,%r13d + vpsrlq $17,%xmm7,%xmm7 + addl %r12d,%ecx + andl %esi,%r15d + xorl %edx,%r14d + vpaddd %xmm4,%xmm1,%xmm1 + addl %r13d,%ecx + xorl %r8d,%r15d + addl %ecx,%r10d + vpxor %xmm7,%xmm6,%xmm6 + shrdl $2,%r14d,%r14d + addl %r15d,%ecx + movl %r10d,%r13d + vpsrlq $2,%xmm7,%xmm7 + addl %ecx,%r14d + shrdl $14,%r13d,%r13d + movl %r14d,%ecx + vpxor %xmm7,%xmm6,%xmm6 + movl %r11d,%r12d + xorl %r10d,%r13d + shrdl $9,%r14d,%r14d + vpshufd $132,%xmm6,%xmm6 + xorl %eax,%r12d + shrdl $5,%r13d,%r13d + xorl %ecx,%r14d + vpsrldq $8,%xmm6,%xmm6 + andl %r10d,%r12d + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 96-128(%rdi),%xmm10 + xorl %r10d,%r13d + addl 24(%rsp),%ebx + vpaddd %xmm6,%xmm1,%xmm1 + movl %ecx,%r15d + shrdl $11,%r14d,%r14d + xorl %eax,%r12d + vpshufd $80,%xmm1,%xmm7 + xorl %edx,%r15d + shrdl $6,%r13d,%r13d + addl %r12d,%ebx + vpsrld $10,%xmm7,%xmm6 + andl %r15d,%esi + xorl %ecx,%r14d + addl %r13d,%ebx + vpsrlq $17,%xmm7,%xmm7 + xorl %edx,%esi + addl %ebx,%r9d + shrdl $2,%r14d,%r14d + vpxor %xmm7,%xmm6,%xmm6 + addl %esi,%ebx + movl %r9d,%r13d + addl %ebx,%r14d + vpsrlq $2,%xmm7,%xmm7 + shrdl $14,%r13d,%r13d + movl %r14d,%ebx + movl %r10d,%r12d + vpxor %xmm7,%xmm6,%xmm6 + xorl %r9d,%r13d + shrdl $9,%r14d,%r14d + xorl %r11d,%r12d + vpshufd $232,%xmm6,%xmm6 + shrdl $5,%r13d,%r13d + xorl %ebx,%r14d + andl %r9d,%r12d + vpslldq $8,%xmm6,%xmm6 + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 112-128(%rdi),%xmm10 + xorl %r9d,%r13d + addl 28(%rsp),%eax + movl %ebx,%esi + vpaddd %xmm6,%xmm1,%xmm1 + shrdl $11,%r14d,%r14d + xorl %r11d,%r12d + xorl %ecx,%esi + vpaddd 32(%rbp),%xmm1,%xmm6 + shrdl $6,%r13d,%r13d + addl %r12d,%eax + andl %esi,%r15d + xorl %ebx,%r14d + addl %r13d,%eax + xorl %ecx,%r15d + addl %eax,%r8d + shrdl $2,%r14d,%r14d + addl %r15d,%eax + movl %r8d,%r13d + addl %eax,%r14d + vmovdqa %xmm6,16(%rsp) + vpalignr $4,%xmm2,%xmm3,%xmm4 + shrdl $14,%r13d,%r13d + movl %r14d,%eax + movl %r9d,%r12d + vpalignr $4,%xmm0,%xmm1,%xmm7 + xorl %r8d,%r13d + shrdl $9,%r14d,%r14d + xorl %r10d,%r12d + vpsrld $7,%xmm4,%xmm6 + shrdl $5,%r13d,%r13d + xorl %eax,%r14d + andl %r8d,%r12d + vpaddd %xmm7,%xmm2,%xmm2 + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 128-128(%rdi),%xmm10 + xorl %r8d,%r13d + addl 32(%rsp),%r11d + movl %eax,%r15d + vpsrld $3,%xmm4,%xmm7 + shrdl $11,%r14d,%r14d + xorl %r10d,%r12d + xorl %ebx,%r15d + vpslld $14,%xmm4,%xmm5 + shrdl $6,%r13d,%r13d + addl %r12d,%r11d + andl %r15d,%esi + vpxor %xmm6,%xmm7,%xmm4 + xorl %eax,%r14d + addl %r13d,%r11d + xorl %ebx,%esi + vpshufd $250,%xmm1,%xmm7 + addl %r11d,%edx + shrdl $2,%r14d,%r14d + addl %esi,%r11d + vpsrld $11,%xmm6,%xmm6 + movl %edx,%r13d + addl %r11d,%r14d + shrdl $14,%r13d,%r13d + vpxor %xmm5,%xmm4,%xmm4 + movl %r14d,%r11d + movl %r8d,%r12d + xorl %edx,%r13d + vpslld $11,%xmm5,%xmm5 + shrdl $9,%r14d,%r14d + xorl %r9d,%r12d + shrdl $5,%r13d,%r13d + vpxor %xmm6,%xmm4,%xmm4 + xorl %r11d,%r14d + andl %edx,%r12d + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 144-128(%rdi),%xmm10 + xorl %edx,%r13d + vpsrld $10,%xmm7,%xmm6 + addl 36(%rsp),%r10d + movl %r11d,%esi + shrdl $11,%r14d,%r14d + vpxor %xmm5,%xmm4,%xmm4 + xorl %r9d,%r12d + xorl %eax,%esi + shrdl $6,%r13d,%r13d + vpsrlq $17,%xmm7,%xmm7 + addl %r12d,%r10d + andl %esi,%r15d + xorl %r11d,%r14d + vpaddd %xmm4,%xmm2,%xmm2 + addl %r13d,%r10d + xorl %eax,%r15d + addl %r10d,%ecx + vpxor %xmm7,%xmm6,%xmm6 + shrdl $2,%r14d,%r14d + addl %r15d,%r10d + movl %ecx,%r13d + vpsrlq $2,%xmm7,%xmm7 + addl %r10d,%r14d + shrdl $14,%r13d,%r13d + movl %r14d,%r10d + vpxor %xmm7,%xmm6,%xmm6 + movl %edx,%r12d + xorl %ecx,%r13d + shrdl $9,%r14d,%r14d + vpshufd $132,%xmm6,%xmm6 + xorl %r8d,%r12d + shrdl $5,%r13d,%r13d + xorl %r10d,%r14d + vpsrldq $8,%xmm6,%xmm6 + andl %ecx,%r12d + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 160-128(%rdi),%xmm10 + xorl %ecx,%r13d + addl 40(%rsp),%r9d + vpaddd %xmm6,%xmm2,%xmm2 + movl %r10d,%r15d + shrdl $11,%r14d,%r14d + xorl %r8d,%r12d + vpshufd $80,%xmm2,%xmm7 + xorl %r11d,%r15d + shrdl $6,%r13d,%r13d + addl %r12d,%r9d + vpsrld $10,%xmm7,%xmm6 + andl %r15d,%esi + xorl %r10d,%r14d + addl %r13d,%r9d + vpsrlq $17,%xmm7,%xmm7 + xorl %r11d,%esi + addl %r9d,%ebx + shrdl $2,%r14d,%r14d + vpxor %xmm7,%xmm6,%xmm6 + addl %esi,%r9d + movl %ebx,%r13d + addl %r9d,%r14d + vpsrlq $2,%xmm7,%xmm7 + shrdl $14,%r13d,%r13d + movl %r14d,%r9d + movl %ecx,%r12d + vpxor %xmm7,%xmm6,%xmm6 + xorl %ebx,%r13d + shrdl $9,%r14d,%r14d + xorl %edx,%r12d + vpshufd $232,%xmm6,%xmm6 + shrdl $5,%r13d,%r13d + xorl %r9d,%r14d + andl %ebx,%r12d + vpslldq $8,%xmm6,%xmm6 + vaesenclast %xmm10,%xmm9,%xmm11 + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 176-128(%rdi),%xmm10 + xorl %ebx,%r13d + addl 44(%rsp),%r8d + movl %r9d,%esi + vpaddd %xmm6,%xmm2,%xmm2 + shrdl $11,%r14d,%r14d + xorl %edx,%r12d + xorl %r10d,%esi + vpaddd 64(%rbp),%xmm2,%xmm6 + shrdl $6,%r13d,%r13d + addl %r12d,%r8d + andl %esi,%r15d + xorl %r9d,%r14d + addl %r13d,%r8d + xorl %r10d,%r15d + addl %r8d,%eax + shrdl $2,%r14d,%r14d + addl %r15d,%r8d + movl %eax,%r13d + addl %r8d,%r14d + vmovdqa %xmm6,32(%rsp) + vpalignr $4,%xmm3,%xmm0,%xmm4 + shrdl $14,%r13d,%r13d + movl %r14d,%r8d + movl %ebx,%r12d + vpalignr $4,%xmm1,%xmm2,%xmm7 + xorl %eax,%r13d + shrdl $9,%r14d,%r14d + xorl %ecx,%r12d + vpsrld $7,%xmm4,%xmm6 + shrdl $5,%r13d,%r13d + xorl %r8d,%r14d + andl %eax,%r12d + vpaddd %xmm7,%xmm3,%xmm3 + vpand %xmm12,%xmm11,%xmm8 + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 192-128(%rdi),%xmm10 + xorl %eax,%r13d + addl 48(%rsp),%edx + movl %r8d,%r15d + vpsrld $3,%xmm4,%xmm7 + shrdl $11,%r14d,%r14d + xorl %ecx,%r12d + xorl %r9d,%r15d + vpslld $14,%xmm4,%xmm5 + shrdl $6,%r13d,%r13d + addl %r12d,%edx + andl %r15d,%esi + vpxor %xmm6,%xmm7,%xmm4 + xorl %r8d,%r14d + addl %r13d,%edx + xorl %r9d,%esi + vpshufd $250,%xmm2,%xmm7 + addl %edx,%r11d + shrdl $2,%r14d,%r14d + addl %esi,%edx + vpsrld $11,%xmm6,%xmm6 + movl %r11d,%r13d + addl %edx,%r14d + shrdl $14,%r13d,%r13d + vpxor %xmm5,%xmm4,%xmm4 + movl %r14d,%edx + movl %eax,%r12d + xorl %r11d,%r13d + vpslld $11,%xmm5,%xmm5 + shrdl $9,%r14d,%r14d + xorl %ebx,%r12d + shrdl $5,%r13d,%r13d + vpxor %xmm6,%xmm4,%xmm4 + xorl %edx,%r14d + andl %r11d,%r12d + vaesenclast %xmm10,%xmm9,%xmm11 + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 208-128(%rdi),%xmm10 + xorl %r11d,%r13d + vpsrld $10,%xmm7,%xmm6 + addl 52(%rsp),%ecx + movl %edx,%esi + shrdl $11,%r14d,%r14d + vpxor %xmm5,%xmm4,%xmm4 + xorl %ebx,%r12d + xorl %r8d,%esi + shrdl $6,%r13d,%r13d + vpsrlq $17,%xmm7,%xmm7 + addl %r12d,%ecx + andl %esi,%r15d + xorl %edx,%r14d + vpaddd %xmm4,%xmm3,%xmm3 + addl %r13d,%ecx + xorl %r8d,%r15d + addl %ecx,%r10d + vpxor %xmm7,%xmm6,%xmm6 + shrdl $2,%r14d,%r14d + addl %r15d,%ecx + movl %r10d,%r13d + vpsrlq $2,%xmm7,%xmm7 + addl %ecx,%r14d + shrdl $14,%r13d,%r13d + movl %r14d,%ecx + vpxor %xmm7,%xmm6,%xmm6 + movl %r11d,%r12d + xorl %r10d,%r13d + shrdl $9,%r14d,%r14d + vpshufd $132,%xmm6,%xmm6 + xorl %eax,%r12d + shrdl $5,%r13d,%r13d + xorl %ecx,%r14d + vpsrldq $8,%xmm6,%xmm6 + andl %r10d,%r12d + vpand %xmm13,%xmm11,%xmm11 + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 224-128(%rdi),%xmm10 + xorl %r10d,%r13d + addl 56(%rsp),%ebx + vpaddd %xmm6,%xmm3,%xmm3 + movl %ecx,%r15d + shrdl $11,%r14d,%r14d + xorl %eax,%r12d + vpshufd $80,%xmm3,%xmm7 + xorl %edx,%r15d + shrdl $6,%r13d,%r13d + addl %r12d,%ebx + vpsrld $10,%xmm7,%xmm6 + andl %r15d,%esi + xorl %ecx,%r14d + addl %r13d,%ebx + vpsrlq $17,%xmm7,%xmm7 + xorl %edx,%esi + addl %ebx,%r9d + shrdl $2,%r14d,%r14d + vpxor %xmm7,%xmm6,%xmm6 + addl %esi,%ebx + movl %r9d,%r13d + addl %ebx,%r14d + vpsrlq $2,%xmm7,%xmm7 + shrdl $14,%r13d,%r13d + movl %r14d,%ebx + movl %r10d,%r12d + vpxor %xmm7,%xmm6,%xmm6 + xorl %r9d,%r13d + shrdl $9,%r14d,%r14d + xorl %r11d,%r12d + vpshufd $232,%xmm6,%xmm6 + shrdl $5,%r13d,%r13d + xorl %ebx,%r14d + andl %r9d,%r12d + vpslldq $8,%xmm6,%xmm6 + vpor %xmm11,%xmm8,%xmm8 + vaesenclast %xmm10,%xmm9,%xmm11 + vmovdqu 0-128(%rdi),%xmm10 + xorl %r9d,%r13d + addl 60(%rsp),%eax + movl %ebx,%esi + vpaddd %xmm6,%xmm3,%xmm3 + shrdl $11,%r14d,%r14d + xorl %r11d,%r12d + xorl %ecx,%esi + vpaddd 96(%rbp),%xmm3,%xmm6 + shrdl $6,%r13d,%r13d + addl %r12d,%eax + andl %esi,%r15d + xorl %ebx,%r14d + addl %r13d,%eax + xorl %ecx,%r15d + addl %eax,%r8d + shrdl $2,%r14d,%r14d + addl %r15d,%eax + movl %r8d,%r13d + addl %eax,%r14d + vmovdqa %xmm6,48(%rsp) + movq 64+0(%rsp),%r12 + vpand %xmm14,%xmm11,%xmm11 + movq 64+8(%rsp),%r15 + vpor %xmm11,%xmm8,%xmm8 + vmovdqu %xmm8,(%r15,%r12,1) + leaq 16(%r12),%r12 + cmpb $0,131(%rbp) + jne L$avx_00_47 + vmovdqu (%r12),%xmm9 + movq %r12,64+0(%rsp) + shrdl $14,%r13d,%r13d + movl %r14d,%eax + movl %r9d,%r12d + xorl %r8d,%r13d + shrdl $9,%r14d,%r14d + xorl %r10d,%r12d + shrdl $5,%r13d,%r13d + xorl %eax,%r14d + andl %r8d,%r12d + vpxor %xmm10,%xmm9,%xmm9 + vmovdqu 16-128(%rdi),%xmm10 + xorl %r8d,%r13d + addl 0(%rsp),%r11d + movl %eax,%r15d + shrdl $11,%r14d,%r14d + xorl %r10d,%r12d + xorl %ebx,%r15d + shrdl $6,%r13d,%r13d + addl %r12d,%r11d + andl %r15d,%esi + xorl %eax,%r14d + addl %r13d,%r11d + xorl %ebx,%esi + addl %r11d,%edx + shrdl $2,%r14d,%r14d + addl %esi,%r11d + movl %edx,%r13d + addl %r11d,%r14d + shrdl $14,%r13d,%r13d + movl %r14d,%r11d + movl %r8d,%r12d + xorl %edx,%r13d + shrdl $9,%r14d,%r14d + xorl %r9d,%r12d + shrdl $5,%r13d,%r13d + xorl %r11d,%r14d + andl %edx,%r12d + vpxor %xmm8,%xmm9,%xmm9 + xorl %edx,%r13d + addl 4(%rsp),%r10d + movl %r11d,%esi + shrdl $11,%r14d,%r14d + xorl %r9d,%r12d + xorl %eax,%esi + shrdl $6,%r13d,%r13d + addl %r12d,%r10d + andl %esi,%r15d + xorl %r11d,%r14d + addl %r13d,%r10d + xorl %eax,%r15d + addl %r10d,%ecx + shrdl $2,%r14d,%r14d + addl %r15d,%r10d + movl %ecx,%r13d + addl %r10d,%r14d + shrdl $14,%r13d,%r13d + movl %r14d,%r10d + movl %edx,%r12d + xorl %ecx,%r13d + shrdl $9,%r14d,%r14d + xorl %r8d,%r12d + shrdl $5,%r13d,%r13d + xorl %r10d,%r14d + andl %ecx,%r12d + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 32-128(%rdi),%xmm10 + xorl %ecx,%r13d + addl 8(%rsp),%r9d + movl %r10d,%r15d + shrdl $11,%r14d,%r14d + xorl %r8d,%r12d + xorl %r11d,%r15d + shrdl $6,%r13d,%r13d + addl %r12d,%r9d + andl %r15d,%esi + xorl %r10d,%r14d + addl %r13d,%r9d + xorl %r11d,%esi + addl %r9d,%ebx + shrdl $2,%r14d,%r14d + addl %esi,%r9d + movl %ebx,%r13d + addl %r9d,%r14d + shrdl $14,%r13d,%r13d + movl %r14d,%r9d + movl %ecx,%r12d + xorl %ebx,%r13d + shrdl $9,%r14d,%r14d + xorl %edx,%r12d + shrdl $5,%r13d,%r13d + xorl %r9d,%r14d + andl %ebx,%r12d + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 48-128(%rdi),%xmm10 + xorl %ebx,%r13d + addl 12(%rsp),%r8d + movl %r9d,%esi + shrdl $11,%r14d,%r14d + xorl %edx,%r12d + xorl %r10d,%esi + shrdl $6,%r13d,%r13d + addl %r12d,%r8d + andl %esi,%r15d + xorl %r9d,%r14d + addl %r13d,%r8d + xorl %r10d,%r15d + addl %r8d,%eax + shrdl $2,%r14d,%r14d + addl %r15d,%r8d + movl %eax,%r13d + addl %r8d,%r14d + shrdl $14,%r13d,%r13d + movl %r14d,%r8d + movl %ebx,%r12d + xorl %eax,%r13d + shrdl $9,%r14d,%r14d + xorl %ecx,%r12d + shrdl $5,%r13d,%r13d + xorl %r8d,%r14d + andl %eax,%r12d + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 64-128(%rdi),%xmm10 + xorl %eax,%r13d + addl 16(%rsp),%edx + movl %r8d,%r15d + shrdl $11,%r14d,%r14d + xorl %ecx,%r12d + xorl %r9d,%r15d + shrdl $6,%r13d,%r13d + addl %r12d,%edx + andl %r15d,%esi + xorl %r8d,%r14d + addl %r13d,%edx + xorl %r9d,%esi + addl %edx,%r11d + shrdl $2,%r14d,%r14d + addl %esi,%edx + movl %r11d,%r13d + addl %edx,%r14d + shrdl $14,%r13d,%r13d + movl %r14d,%edx + movl %eax,%r12d + xorl %r11d,%r13d + shrdl $9,%r14d,%r14d + xorl %ebx,%r12d + shrdl $5,%r13d,%r13d + xorl %edx,%r14d + andl %r11d,%r12d + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 80-128(%rdi),%xmm10 + xorl %r11d,%r13d + addl 20(%rsp),%ecx + movl %edx,%esi + shrdl $11,%r14d,%r14d + xorl %ebx,%r12d + xorl %r8d,%esi + shrdl $6,%r13d,%r13d + addl %r12d,%ecx + andl %esi,%r15d + xorl %edx,%r14d + addl %r13d,%ecx + xorl %r8d,%r15d + addl %ecx,%r10d + shrdl $2,%r14d,%r14d + addl %r15d,%ecx + movl %r10d,%r13d + addl %ecx,%r14d + shrdl $14,%r13d,%r13d + movl %r14d,%ecx + movl %r11d,%r12d + xorl %r10d,%r13d + shrdl $9,%r14d,%r14d + xorl %eax,%r12d + shrdl $5,%r13d,%r13d + xorl %ecx,%r14d + andl %r10d,%r12d + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 96-128(%rdi),%xmm10 + xorl %r10d,%r13d + addl 24(%rsp),%ebx + movl %ecx,%r15d + shrdl $11,%r14d,%r14d + xorl %eax,%r12d + xorl %edx,%r15d + shrdl $6,%r13d,%r13d + addl %r12d,%ebx + andl %r15d,%esi + xorl %ecx,%r14d + addl %r13d,%ebx + xorl %edx,%esi + addl %ebx,%r9d + shrdl $2,%r14d,%r14d + addl %esi,%ebx + movl %r9d,%r13d + addl %ebx,%r14d + shrdl $14,%r13d,%r13d + movl %r14d,%ebx + movl %r10d,%r12d + xorl %r9d,%r13d + shrdl $9,%r14d,%r14d + xorl %r11d,%r12d + shrdl $5,%r13d,%r13d + xorl %ebx,%r14d + andl %r9d,%r12d + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 112-128(%rdi),%xmm10 + xorl %r9d,%r13d + addl 28(%rsp),%eax + movl %ebx,%esi + shrdl $11,%r14d,%r14d + xorl %r11d,%r12d + xorl %ecx,%esi + shrdl $6,%r13d,%r13d + addl %r12d,%eax + andl %esi,%r15d + xorl %ebx,%r14d + addl %r13d,%eax + xorl %ecx,%r15d + addl %eax,%r8d + shrdl $2,%r14d,%r14d + addl %r15d,%eax + movl %r8d,%r13d + addl %eax,%r14d + shrdl $14,%r13d,%r13d + movl %r14d,%eax + movl %r9d,%r12d + xorl %r8d,%r13d + shrdl $9,%r14d,%r14d + xorl %r10d,%r12d + shrdl $5,%r13d,%r13d + xorl %eax,%r14d + andl %r8d,%r12d + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 128-128(%rdi),%xmm10 + xorl %r8d,%r13d + addl 32(%rsp),%r11d + movl %eax,%r15d + shrdl $11,%r14d,%r14d + xorl %r10d,%r12d + xorl %ebx,%r15d + shrdl $6,%r13d,%r13d + addl %r12d,%r11d + andl %r15d,%esi + xorl %eax,%r14d + addl %r13d,%r11d + xorl %ebx,%esi + addl %r11d,%edx + shrdl $2,%r14d,%r14d + addl %esi,%r11d + movl %edx,%r13d + addl %r11d,%r14d + shrdl $14,%r13d,%r13d + movl %r14d,%r11d + movl %r8d,%r12d + xorl %edx,%r13d + shrdl $9,%r14d,%r14d + xorl %r9d,%r12d + shrdl $5,%r13d,%r13d + xorl %r11d,%r14d + andl %edx,%r12d + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 144-128(%rdi),%xmm10 + xorl %edx,%r13d + addl 36(%rsp),%r10d + movl %r11d,%esi + shrdl $11,%r14d,%r14d + xorl %r9d,%r12d + xorl %eax,%esi + shrdl $6,%r13d,%r13d + addl %r12d,%r10d + andl %esi,%r15d + xorl %r11d,%r14d + addl %r13d,%r10d + xorl %eax,%r15d + addl %r10d,%ecx + shrdl $2,%r14d,%r14d + addl %r15d,%r10d + movl %ecx,%r13d + addl %r10d,%r14d + shrdl $14,%r13d,%r13d + movl %r14d,%r10d + movl %edx,%r12d + xorl %ecx,%r13d + shrdl $9,%r14d,%r14d + xorl %r8d,%r12d + shrdl $5,%r13d,%r13d + xorl %r10d,%r14d + andl %ecx,%r12d + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 160-128(%rdi),%xmm10 + xorl %ecx,%r13d + addl 40(%rsp),%r9d + movl %r10d,%r15d + shrdl $11,%r14d,%r14d + xorl %r8d,%r12d + xorl %r11d,%r15d + shrdl $6,%r13d,%r13d + addl %r12d,%r9d + andl %r15d,%esi + xorl %r10d,%r14d + addl %r13d,%r9d + xorl %r11d,%esi + addl %r9d,%ebx + shrdl $2,%r14d,%r14d + addl %esi,%r9d + movl %ebx,%r13d + addl %r9d,%r14d + shrdl $14,%r13d,%r13d + movl %r14d,%r9d + movl %ecx,%r12d + xorl %ebx,%r13d + shrdl $9,%r14d,%r14d + xorl %edx,%r12d + shrdl $5,%r13d,%r13d + xorl %r9d,%r14d + andl %ebx,%r12d + vaesenclast %xmm10,%xmm9,%xmm11 + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 176-128(%rdi),%xmm10 + xorl %ebx,%r13d + addl 44(%rsp),%r8d + movl %r9d,%esi + shrdl $11,%r14d,%r14d + xorl %edx,%r12d + xorl %r10d,%esi + shrdl $6,%r13d,%r13d + addl %r12d,%r8d + andl %esi,%r15d + xorl %r9d,%r14d + addl %r13d,%r8d + xorl %r10d,%r15d + addl %r8d,%eax + shrdl $2,%r14d,%r14d + addl %r15d,%r8d + movl %eax,%r13d + addl %r8d,%r14d + shrdl $14,%r13d,%r13d + movl %r14d,%r8d + movl %ebx,%r12d + xorl %eax,%r13d + shrdl $9,%r14d,%r14d + xorl %ecx,%r12d + shrdl $5,%r13d,%r13d + xorl %r8d,%r14d + andl %eax,%r12d + vpand %xmm12,%xmm11,%xmm8 + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 192-128(%rdi),%xmm10 + xorl %eax,%r13d + addl 48(%rsp),%edx + movl %r8d,%r15d + shrdl $11,%r14d,%r14d + xorl %ecx,%r12d + xorl %r9d,%r15d + shrdl $6,%r13d,%r13d + addl %r12d,%edx + andl %r15d,%esi + xorl %r8d,%r14d + addl %r13d,%edx + xorl %r9d,%esi + addl %edx,%r11d + shrdl $2,%r14d,%r14d + addl %esi,%edx + movl %r11d,%r13d + addl %edx,%r14d + shrdl $14,%r13d,%r13d + movl %r14d,%edx + movl %eax,%r12d + xorl %r11d,%r13d + shrdl $9,%r14d,%r14d + xorl %ebx,%r12d + shrdl $5,%r13d,%r13d + xorl %edx,%r14d + andl %r11d,%r12d + vaesenclast %xmm10,%xmm9,%xmm11 + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 208-128(%rdi),%xmm10 + xorl %r11d,%r13d + addl 52(%rsp),%ecx + movl %edx,%esi + shrdl $11,%r14d,%r14d + xorl %ebx,%r12d + xorl %r8d,%esi + shrdl $6,%r13d,%r13d + addl %r12d,%ecx + andl %esi,%r15d + xorl %edx,%r14d + addl %r13d,%ecx + xorl %r8d,%r15d + addl %ecx,%r10d + shrdl $2,%r14d,%r14d + addl %r15d,%ecx + movl %r10d,%r13d + addl %ecx,%r14d + shrdl $14,%r13d,%r13d + movl %r14d,%ecx + movl %r11d,%r12d + xorl %r10d,%r13d + shrdl $9,%r14d,%r14d + xorl %eax,%r12d + shrdl $5,%r13d,%r13d + xorl %ecx,%r14d + andl %r10d,%r12d + vpand %xmm13,%xmm11,%xmm11 + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 224-128(%rdi),%xmm10 + xorl %r10d,%r13d + addl 56(%rsp),%ebx + movl %ecx,%r15d + shrdl $11,%r14d,%r14d + xorl %eax,%r12d + xorl %edx,%r15d + shrdl $6,%r13d,%r13d + addl %r12d,%ebx + andl %r15d,%esi + xorl %ecx,%r14d + addl %r13d,%ebx + xorl %edx,%esi + addl %ebx,%r9d + shrdl $2,%r14d,%r14d + addl %esi,%ebx + movl %r9d,%r13d + addl %ebx,%r14d + shrdl $14,%r13d,%r13d + movl %r14d,%ebx + movl %r10d,%r12d + xorl %r9d,%r13d + shrdl $9,%r14d,%r14d + xorl %r11d,%r12d + shrdl $5,%r13d,%r13d + xorl %ebx,%r14d + andl %r9d,%r12d + vpor %xmm11,%xmm8,%xmm8 + vaesenclast %xmm10,%xmm9,%xmm11 + vmovdqu 0-128(%rdi),%xmm10 + xorl %r9d,%r13d + addl 60(%rsp),%eax + movl %ebx,%esi + shrdl $11,%r14d,%r14d + xorl %r11d,%r12d + xorl %ecx,%esi + shrdl $6,%r13d,%r13d + addl %r12d,%eax + andl %esi,%r15d + xorl %ebx,%r14d + addl %r13d,%eax + xorl %ecx,%r15d + addl %eax,%r8d + shrdl $2,%r14d,%r14d + addl %r15d,%eax + movl %r8d,%r13d + addl %eax,%r14d + movq 64+0(%rsp),%r12 + movq 64+8(%rsp),%r13 + movq 64+40(%rsp),%r15 + movq 64+48(%rsp),%rsi + + vpand %xmm14,%xmm11,%xmm11 + movl %r14d,%eax + vpor %xmm11,%xmm8,%xmm8 + vmovdqu %xmm8,(%r12,%r13,1) + leaq 16(%r12),%r12 + + addl 0(%r15),%eax + addl 4(%r15),%ebx + addl 8(%r15),%ecx + addl 12(%r15),%edx + addl 16(%r15),%r8d + addl 20(%r15),%r9d + addl 24(%r15),%r10d + addl 28(%r15),%r11d + + cmpq 64+16(%rsp),%r12 + + movl %eax,0(%r15) + movl %ebx,4(%r15) + movl %ecx,8(%r15) + movl %edx,12(%r15) + movl %r8d,16(%r15) + movl %r9d,20(%r15) + movl %r10d,24(%r15) + movl %r11d,28(%r15) + jb L$loop_avx + + movq 64+32(%rsp),%r8 + movq 64+56(%rsp),%rsi + vmovdqu %xmm8,(%r8) + vzeroall + movq (%rsi),%r15 + movq 8(%rsi),%r14 + movq 16(%rsi),%r13 + movq 24(%rsi),%r12 + movq 32(%rsi),%rbp + movq 40(%rsi),%rbx + leaq 48(%rsi),%rsp +L$epilogue_avx: + .byte 0xf3,0xc3 + + +.p2align 6 +aesni_cbc_sha256_enc_avx2: +L$avx2_shortcut: + movq 8(%rsp),%r10 + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + movq %rsp,%r11 + subq $576,%rsp + andq $-1024,%rsp + addq $448,%rsp + + shlq $6,%rdx + subq %rdi,%rsi + subq %rdi,%r10 + addq %rdi,%rdx + + + + movq %rdx,64+16(%rsp) + + movq %r8,64+32(%rsp) + movq %r9,64+40(%rsp) + movq %r10,64+48(%rsp) + movq %r11,64+56(%rsp) +L$prologue_avx2: + vzeroall + + movq %rdi,%r13 + vpinsrq $1,%rsi,%xmm15,%xmm15 + leaq 128(%rcx),%rdi + leaq K256+544(%rip),%r12 + movl 240-128(%rdi),%r14d + movq %r9,%r15 + movq %r10,%rsi + vmovdqu (%r8),%xmm8 + leaq -9(%r14),%r14 + + vmovdqa 0(%r12,%r14,8),%xmm14 + vmovdqa 16(%r12,%r14,8),%xmm13 + vmovdqa 32(%r12,%r14,8),%xmm12 + + subq $-64,%r13 + movl 0(%r15),%eax + leaq (%rsi,%r13,1),%r12 + movl 4(%r15),%ebx + cmpq %rdx,%r13 + movl 8(%r15),%ecx + cmoveq %rsp,%r12 + movl 12(%r15),%edx + movl 16(%r15),%r8d + movl 20(%r15),%r9d + movl 24(%r15),%r10d + movl 28(%r15),%r11d + vmovdqu 0-128(%rdi),%xmm10 + jmp L$oop_avx2 +.p2align 4 +L$oop_avx2: + vmovdqa K256+512(%rip),%ymm7 + vmovdqu -64+0(%rsi,%r13,1),%xmm0 + vmovdqu -64+16(%rsi,%r13,1),%xmm1 + vmovdqu -64+32(%rsi,%r13,1),%xmm2 + vmovdqu -64+48(%rsi,%r13,1),%xmm3 + + vinserti128 $1,(%r12),%ymm0,%ymm0 + vinserti128 $1,16(%r12),%ymm1,%ymm1 + vpshufb %ymm7,%ymm0,%ymm0 + vinserti128 $1,32(%r12),%ymm2,%ymm2 + vpshufb %ymm7,%ymm1,%ymm1 + vinserti128 $1,48(%r12),%ymm3,%ymm3 + + leaq K256(%rip),%rbp + vpshufb %ymm7,%ymm2,%ymm2 + leaq -64(%r13),%r13 + vpaddd 0(%rbp),%ymm0,%ymm4 + vpshufb %ymm7,%ymm3,%ymm3 + vpaddd 32(%rbp),%ymm1,%ymm5 + vpaddd 64(%rbp),%ymm2,%ymm6 + vpaddd 96(%rbp),%ymm3,%ymm7 + vmovdqa %ymm4,0(%rsp) + xorl %r14d,%r14d + vmovdqa %ymm5,32(%rsp) + leaq -64(%rsp),%rsp + movl %ebx,%esi + vmovdqa %ymm6,0(%rsp) + xorl %ecx,%esi + vmovdqa %ymm7,32(%rsp) + movl %r9d,%r12d + subq $-32*4,%rbp + jmp L$avx2_00_47 + +.p2align 4 +L$avx2_00_47: + vmovdqu (%r13),%xmm9 + vpinsrq $0,%r13,%xmm15,%xmm15 + leaq -64(%rsp),%rsp + vpalignr $4,%ymm0,%ymm1,%ymm4 + addl 0+128(%rsp),%r11d + andl %r8d,%r12d + rorxl $25,%r8d,%r13d + vpalignr $4,%ymm2,%ymm3,%ymm7 + rorxl $11,%r8d,%r15d + leal (%rax,%r14,1),%eax + leal (%r11,%r12,1),%r11d + vpsrld $7,%ymm4,%ymm6 + andnl %r10d,%r8d,%r12d + xorl %r15d,%r13d + rorxl $6,%r8d,%r14d + vpaddd %ymm7,%ymm0,%ymm0 + leal (%r11,%r12,1),%r11d + xorl %r14d,%r13d + movl %eax,%r15d + vpsrld $3,%ymm4,%ymm7 + rorxl $22,%eax,%r12d + leal (%r11,%r13,1),%r11d + xorl %ebx,%r15d + vpslld $14,%ymm4,%ymm5 + rorxl $13,%eax,%r14d + rorxl $2,%eax,%r13d + leal (%rdx,%r11,1),%edx + vpxor %ymm6,%ymm7,%ymm4 + andl %r15d,%esi + vpxor %xmm10,%xmm9,%xmm9 + vmovdqu 16-128(%rdi),%xmm10 + xorl %r12d,%r14d + xorl %ebx,%esi + vpshufd $250,%ymm3,%ymm7 + xorl %r13d,%r14d + leal (%r11,%rsi,1),%r11d + movl %r8d,%r12d + vpsrld $11,%ymm6,%ymm6 + addl 4+128(%rsp),%r10d + andl %edx,%r12d + rorxl $25,%edx,%r13d + vpxor %ymm5,%ymm4,%ymm4 + rorxl $11,%edx,%esi + leal (%r11,%r14,1),%r11d + leal (%r10,%r12,1),%r10d + vpslld $11,%ymm5,%ymm5 + andnl %r9d,%edx,%r12d + xorl %esi,%r13d + rorxl $6,%edx,%r14d + vpxor %ymm6,%ymm4,%ymm4 + leal (%r10,%r12,1),%r10d + xorl %r14d,%r13d + movl %r11d,%esi + vpsrld $10,%ymm7,%ymm6 + rorxl $22,%r11d,%r12d + leal (%r10,%r13,1),%r10d + xorl %eax,%esi + vpxor %ymm5,%ymm4,%ymm4 + rorxl $13,%r11d,%r14d + rorxl $2,%r11d,%r13d + leal (%rcx,%r10,1),%ecx + vpsrlq $17,%ymm7,%ymm7 + andl %esi,%r15d + vpxor %xmm8,%xmm9,%xmm9 + xorl %r12d,%r14d + xorl %eax,%r15d + vpaddd %ymm4,%ymm0,%ymm0 + xorl %r13d,%r14d + leal (%r10,%r15,1),%r10d + movl %edx,%r12d + vpxor %ymm7,%ymm6,%ymm6 + addl 8+128(%rsp),%r9d + andl %ecx,%r12d + rorxl $25,%ecx,%r13d + vpsrlq $2,%ymm7,%ymm7 + rorxl $11,%ecx,%r15d + leal (%r10,%r14,1),%r10d + leal (%r9,%r12,1),%r9d + vpxor %ymm7,%ymm6,%ymm6 + andnl %r8d,%ecx,%r12d + xorl %r15d,%r13d + rorxl $6,%ecx,%r14d + vpshufd $132,%ymm6,%ymm6 + leal (%r9,%r12,1),%r9d + xorl %r14d,%r13d + movl %r10d,%r15d + vpsrldq $8,%ymm6,%ymm6 + rorxl $22,%r10d,%r12d + leal (%r9,%r13,1),%r9d + xorl %r11d,%r15d + vpaddd %ymm6,%ymm0,%ymm0 + rorxl $13,%r10d,%r14d + rorxl $2,%r10d,%r13d + leal (%rbx,%r9,1),%ebx + vpshufd $80,%ymm0,%ymm7 + andl %r15d,%esi + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 32-128(%rdi),%xmm10 + xorl %r12d,%r14d + xorl %r11d,%esi + vpsrld $10,%ymm7,%ymm6 + xorl %r13d,%r14d + leal (%r9,%rsi,1),%r9d + movl %ecx,%r12d + vpsrlq $17,%ymm7,%ymm7 + addl 12+128(%rsp),%r8d + andl %ebx,%r12d + rorxl $25,%ebx,%r13d + vpxor %ymm7,%ymm6,%ymm6 + rorxl $11,%ebx,%esi + leal (%r9,%r14,1),%r9d + leal (%r8,%r12,1),%r8d + vpsrlq $2,%ymm7,%ymm7 + andnl %edx,%ebx,%r12d + xorl %esi,%r13d + rorxl $6,%ebx,%r14d + vpxor %ymm7,%ymm6,%ymm6 + leal (%r8,%r12,1),%r8d + xorl %r14d,%r13d + movl %r9d,%esi + vpshufd $232,%ymm6,%ymm6 + rorxl $22,%r9d,%r12d + leal (%r8,%r13,1),%r8d + xorl %r10d,%esi + vpslldq $8,%ymm6,%ymm6 + rorxl $13,%r9d,%r14d + rorxl $2,%r9d,%r13d + leal (%rax,%r8,1),%eax + vpaddd %ymm6,%ymm0,%ymm0 + andl %esi,%r15d + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 48-128(%rdi),%xmm10 + xorl %r12d,%r14d + xorl %r10d,%r15d + vpaddd 0(%rbp),%ymm0,%ymm6 + xorl %r13d,%r14d + leal (%r8,%r15,1),%r8d + movl %ebx,%r12d + vmovdqa %ymm6,0(%rsp) + vpalignr $4,%ymm1,%ymm2,%ymm4 + addl 32+128(%rsp),%edx + andl %eax,%r12d + rorxl $25,%eax,%r13d + vpalignr $4,%ymm3,%ymm0,%ymm7 + rorxl $11,%eax,%r15d + leal (%r8,%r14,1),%r8d + leal (%rdx,%r12,1),%edx + vpsrld $7,%ymm4,%ymm6 + andnl %ecx,%eax,%r12d + xorl %r15d,%r13d + rorxl $6,%eax,%r14d + vpaddd %ymm7,%ymm1,%ymm1 + leal (%rdx,%r12,1),%edx + xorl %r14d,%r13d + movl %r8d,%r15d + vpsrld $3,%ymm4,%ymm7 + rorxl $22,%r8d,%r12d + leal (%rdx,%r13,1),%edx + xorl %r9d,%r15d + vpslld $14,%ymm4,%ymm5 + rorxl $13,%r8d,%r14d + rorxl $2,%r8d,%r13d + leal (%r11,%rdx,1),%r11d + vpxor %ymm6,%ymm7,%ymm4 + andl %r15d,%esi + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 64-128(%rdi),%xmm10 + xorl %r12d,%r14d + xorl %r9d,%esi + vpshufd $250,%ymm0,%ymm7 + xorl %r13d,%r14d + leal (%rdx,%rsi,1),%edx + movl %eax,%r12d + vpsrld $11,%ymm6,%ymm6 + addl 36+128(%rsp),%ecx + andl %r11d,%r12d + rorxl $25,%r11d,%r13d + vpxor %ymm5,%ymm4,%ymm4 + rorxl $11,%r11d,%esi + leal (%rdx,%r14,1),%edx + leal (%rcx,%r12,1),%ecx + vpslld $11,%ymm5,%ymm5 + andnl %ebx,%r11d,%r12d + xorl %esi,%r13d + rorxl $6,%r11d,%r14d + vpxor %ymm6,%ymm4,%ymm4 + leal (%rcx,%r12,1),%ecx + xorl %r14d,%r13d + movl %edx,%esi + vpsrld $10,%ymm7,%ymm6 + rorxl $22,%edx,%r12d + leal (%rcx,%r13,1),%ecx + xorl %r8d,%esi + vpxor %ymm5,%ymm4,%ymm4 + rorxl $13,%edx,%r14d + rorxl $2,%edx,%r13d + leal (%r10,%rcx,1),%r10d + vpsrlq $17,%ymm7,%ymm7 + andl %esi,%r15d + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 80-128(%rdi),%xmm10 + xorl %r12d,%r14d + xorl %r8d,%r15d + vpaddd %ymm4,%ymm1,%ymm1 + xorl %r13d,%r14d + leal (%rcx,%r15,1),%ecx + movl %r11d,%r12d + vpxor %ymm7,%ymm6,%ymm6 + addl 40+128(%rsp),%ebx + andl %r10d,%r12d + rorxl $25,%r10d,%r13d + vpsrlq $2,%ymm7,%ymm7 + rorxl $11,%r10d,%r15d + leal (%rcx,%r14,1),%ecx + leal (%rbx,%r12,1),%ebx + vpxor %ymm7,%ymm6,%ymm6 + andnl %eax,%r10d,%r12d + xorl %r15d,%r13d + rorxl $6,%r10d,%r14d + vpshufd $132,%ymm6,%ymm6 + leal (%rbx,%r12,1),%ebx + xorl %r14d,%r13d + movl %ecx,%r15d + vpsrldq $8,%ymm6,%ymm6 + rorxl $22,%ecx,%r12d + leal (%rbx,%r13,1),%ebx + xorl %edx,%r15d + vpaddd %ymm6,%ymm1,%ymm1 + rorxl $13,%ecx,%r14d + rorxl $2,%ecx,%r13d + leal (%r9,%rbx,1),%r9d + vpshufd $80,%ymm1,%ymm7 + andl %r15d,%esi + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 96-128(%rdi),%xmm10 + xorl %r12d,%r14d + xorl %edx,%esi + vpsrld $10,%ymm7,%ymm6 + xorl %r13d,%r14d + leal (%rbx,%rsi,1),%ebx + movl %r10d,%r12d + vpsrlq $17,%ymm7,%ymm7 + addl 44+128(%rsp),%eax + andl %r9d,%r12d + rorxl $25,%r9d,%r13d + vpxor %ymm7,%ymm6,%ymm6 + rorxl $11,%r9d,%esi + leal (%rbx,%r14,1),%ebx + leal (%rax,%r12,1),%eax + vpsrlq $2,%ymm7,%ymm7 + andnl %r11d,%r9d,%r12d + xorl %esi,%r13d + rorxl $6,%r9d,%r14d + vpxor %ymm7,%ymm6,%ymm6 + leal (%rax,%r12,1),%eax + xorl %r14d,%r13d + movl %ebx,%esi + vpshufd $232,%ymm6,%ymm6 + rorxl $22,%ebx,%r12d + leal (%rax,%r13,1),%eax + xorl %ecx,%esi + vpslldq $8,%ymm6,%ymm6 + rorxl $13,%ebx,%r14d + rorxl $2,%ebx,%r13d + leal (%r8,%rax,1),%r8d + vpaddd %ymm6,%ymm1,%ymm1 + andl %esi,%r15d + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 112-128(%rdi),%xmm10 + xorl %r12d,%r14d + xorl %ecx,%r15d + vpaddd 32(%rbp),%ymm1,%ymm6 + xorl %r13d,%r14d + leal (%rax,%r15,1),%eax + movl %r9d,%r12d + vmovdqa %ymm6,32(%rsp) + leaq -64(%rsp),%rsp + vpalignr $4,%ymm2,%ymm3,%ymm4 + addl 0+128(%rsp),%r11d + andl %r8d,%r12d + rorxl $25,%r8d,%r13d + vpalignr $4,%ymm0,%ymm1,%ymm7 + rorxl $11,%r8d,%r15d + leal (%rax,%r14,1),%eax + leal (%r11,%r12,1),%r11d + vpsrld $7,%ymm4,%ymm6 + andnl %r10d,%r8d,%r12d + xorl %r15d,%r13d + rorxl $6,%r8d,%r14d + vpaddd %ymm7,%ymm2,%ymm2 + leal (%r11,%r12,1),%r11d + xorl %r14d,%r13d + movl %eax,%r15d + vpsrld $3,%ymm4,%ymm7 + rorxl $22,%eax,%r12d + leal (%r11,%r13,1),%r11d + xorl %ebx,%r15d + vpslld $14,%ymm4,%ymm5 + rorxl $13,%eax,%r14d + rorxl $2,%eax,%r13d + leal (%rdx,%r11,1),%edx + vpxor %ymm6,%ymm7,%ymm4 + andl %r15d,%esi + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 128-128(%rdi),%xmm10 + xorl %r12d,%r14d + xorl %ebx,%esi + vpshufd $250,%ymm1,%ymm7 + xorl %r13d,%r14d + leal (%r11,%rsi,1),%r11d + movl %r8d,%r12d + vpsrld $11,%ymm6,%ymm6 + addl 4+128(%rsp),%r10d + andl %edx,%r12d + rorxl $25,%edx,%r13d + vpxor %ymm5,%ymm4,%ymm4 + rorxl $11,%edx,%esi + leal (%r11,%r14,1),%r11d + leal (%r10,%r12,1),%r10d + vpslld $11,%ymm5,%ymm5 + andnl %r9d,%edx,%r12d + xorl %esi,%r13d + rorxl $6,%edx,%r14d + vpxor %ymm6,%ymm4,%ymm4 + leal (%r10,%r12,1),%r10d + xorl %r14d,%r13d + movl %r11d,%esi + vpsrld $10,%ymm7,%ymm6 + rorxl $22,%r11d,%r12d + leal (%r10,%r13,1),%r10d + xorl %eax,%esi + vpxor %ymm5,%ymm4,%ymm4 + rorxl $13,%r11d,%r14d + rorxl $2,%r11d,%r13d + leal (%rcx,%r10,1),%ecx + vpsrlq $17,%ymm7,%ymm7 + andl %esi,%r15d + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 144-128(%rdi),%xmm10 + xorl %r12d,%r14d + xorl %eax,%r15d + vpaddd %ymm4,%ymm2,%ymm2 + xorl %r13d,%r14d + leal (%r10,%r15,1),%r10d + movl %edx,%r12d + vpxor %ymm7,%ymm6,%ymm6 + addl 8+128(%rsp),%r9d + andl %ecx,%r12d + rorxl $25,%ecx,%r13d + vpsrlq $2,%ymm7,%ymm7 + rorxl $11,%ecx,%r15d + leal (%r10,%r14,1),%r10d + leal (%r9,%r12,1),%r9d + vpxor %ymm7,%ymm6,%ymm6 + andnl %r8d,%ecx,%r12d + xorl %r15d,%r13d + rorxl $6,%ecx,%r14d + vpshufd $132,%ymm6,%ymm6 + leal (%r9,%r12,1),%r9d + xorl %r14d,%r13d + movl %r10d,%r15d + vpsrldq $8,%ymm6,%ymm6 + rorxl $22,%r10d,%r12d + leal (%r9,%r13,1),%r9d + xorl %r11d,%r15d + vpaddd %ymm6,%ymm2,%ymm2 + rorxl $13,%r10d,%r14d + rorxl $2,%r10d,%r13d + leal (%rbx,%r9,1),%ebx + vpshufd $80,%ymm2,%ymm7 + andl %r15d,%esi + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 160-128(%rdi),%xmm10 + xorl %r12d,%r14d + xorl %r11d,%esi + vpsrld $10,%ymm7,%ymm6 + xorl %r13d,%r14d + leal (%r9,%rsi,1),%r9d + movl %ecx,%r12d + vpsrlq $17,%ymm7,%ymm7 + addl 12+128(%rsp),%r8d + andl %ebx,%r12d + rorxl $25,%ebx,%r13d + vpxor %ymm7,%ymm6,%ymm6 + rorxl $11,%ebx,%esi + leal (%r9,%r14,1),%r9d + leal (%r8,%r12,1),%r8d + vpsrlq $2,%ymm7,%ymm7 + andnl %edx,%ebx,%r12d + xorl %esi,%r13d + rorxl $6,%ebx,%r14d + vpxor %ymm7,%ymm6,%ymm6 + leal (%r8,%r12,1),%r8d + xorl %r14d,%r13d + movl %r9d,%esi + vpshufd $232,%ymm6,%ymm6 + rorxl $22,%r9d,%r12d + leal (%r8,%r13,1),%r8d + xorl %r10d,%esi + vpslldq $8,%ymm6,%ymm6 + rorxl $13,%r9d,%r14d + rorxl $2,%r9d,%r13d + leal (%rax,%r8,1),%eax + vpaddd %ymm6,%ymm2,%ymm2 + andl %esi,%r15d + vaesenclast %xmm10,%xmm9,%xmm11 + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 176-128(%rdi),%xmm10 + xorl %r12d,%r14d + xorl %r10d,%r15d + vpaddd 64(%rbp),%ymm2,%ymm6 + xorl %r13d,%r14d + leal (%r8,%r15,1),%r8d + movl %ebx,%r12d + vmovdqa %ymm6,0(%rsp) + vpalignr $4,%ymm3,%ymm0,%ymm4 + addl 32+128(%rsp),%edx + andl %eax,%r12d + rorxl $25,%eax,%r13d + vpalignr $4,%ymm1,%ymm2,%ymm7 + rorxl $11,%eax,%r15d + leal (%r8,%r14,1),%r8d + leal (%rdx,%r12,1),%edx + vpsrld $7,%ymm4,%ymm6 + andnl %ecx,%eax,%r12d + xorl %r15d,%r13d + rorxl $6,%eax,%r14d + vpaddd %ymm7,%ymm3,%ymm3 + leal (%rdx,%r12,1),%edx + xorl %r14d,%r13d + movl %r8d,%r15d + vpsrld $3,%ymm4,%ymm7 + rorxl $22,%r8d,%r12d + leal (%rdx,%r13,1),%edx + xorl %r9d,%r15d + vpslld $14,%ymm4,%ymm5 + rorxl $13,%r8d,%r14d + rorxl $2,%r8d,%r13d + leal (%r11,%rdx,1),%r11d + vpxor %ymm6,%ymm7,%ymm4 + andl %r15d,%esi + vpand %xmm12,%xmm11,%xmm8 + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 192-128(%rdi),%xmm10 + xorl %r12d,%r14d + xorl %r9d,%esi + vpshufd $250,%ymm2,%ymm7 + xorl %r13d,%r14d + leal (%rdx,%rsi,1),%edx + movl %eax,%r12d + vpsrld $11,%ymm6,%ymm6 + addl 36+128(%rsp),%ecx + andl %r11d,%r12d + rorxl $25,%r11d,%r13d + vpxor %ymm5,%ymm4,%ymm4 + rorxl $11,%r11d,%esi + leal (%rdx,%r14,1),%edx + leal (%rcx,%r12,1),%ecx + vpslld $11,%ymm5,%ymm5 + andnl %ebx,%r11d,%r12d + xorl %esi,%r13d + rorxl $6,%r11d,%r14d + vpxor %ymm6,%ymm4,%ymm4 + leal (%rcx,%r12,1),%ecx + xorl %r14d,%r13d + movl %edx,%esi + vpsrld $10,%ymm7,%ymm6 + rorxl $22,%edx,%r12d + leal (%rcx,%r13,1),%ecx + xorl %r8d,%esi + vpxor %ymm5,%ymm4,%ymm4 + rorxl $13,%edx,%r14d + rorxl $2,%edx,%r13d + leal (%r10,%rcx,1),%r10d + vpsrlq $17,%ymm7,%ymm7 + andl %esi,%r15d + vaesenclast %xmm10,%xmm9,%xmm11 + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 208-128(%rdi),%xmm10 + xorl %r12d,%r14d + xorl %r8d,%r15d + vpaddd %ymm4,%ymm3,%ymm3 + xorl %r13d,%r14d + leal (%rcx,%r15,1),%ecx + movl %r11d,%r12d + vpxor %ymm7,%ymm6,%ymm6 + addl 40+128(%rsp),%ebx + andl %r10d,%r12d + rorxl $25,%r10d,%r13d + vpsrlq $2,%ymm7,%ymm7 + rorxl $11,%r10d,%r15d + leal (%rcx,%r14,1),%ecx + leal (%rbx,%r12,1),%ebx + vpxor %ymm7,%ymm6,%ymm6 + andnl %eax,%r10d,%r12d + xorl %r15d,%r13d + rorxl $6,%r10d,%r14d + vpshufd $132,%ymm6,%ymm6 + leal (%rbx,%r12,1),%ebx + xorl %r14d,%r13d + movl %ecx,%r15d + vpsrldq $8,%ymm6,%ymm6 + rorxl $22,%ecx,%r12d + leal (%rbx,%r13,1),%ebx + xorl %edx,%r15d + vpaddd %ymm6,%ymm3,%ymm3 + rorxl $13,%ecx,%r14d + rorxl $2,%ecx,%r13d + leal (%r9,%rbx,1),%r9d + vpshufd $80,%ymm3,%ymm7 + andl %r15d,%esi + vpand %xmm13,%xmm11,%xmm11 + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 224-128(%rdi),%xmm10 + xorl %r12d,%r14d + xorl %edx,%esi + vpsrld $10,%ymm7,%ymm6 + xorl %r13d,%r14d + leal (%rbx,%rsi,1),%ebx + movl %r10d,%r12d + vpsrlq $17,%ymm7,%ymm7 + addl 44+128(%rsp),%eax + andl %r9d,%r12d + rorxl $25,%r9d,%r13d + vpxor %ymm7,%ymm6,%ymm6 + rorxl $11,%r9d,%esi + leal (%rbx,%r14,1),%ebx + leal (%rax,%r12,1),%eax + vpsrlq $2,%ymm7,%ymm7 + andnl %r11d,%r9d,%r12d + xorl %esi,%r13d + rorxl $6,%r9d,%r14d + vpxor %ymm7,%ymm6,%ymm6 + leal (%rax,%r12,1),%eax + xorl %r14d,%r13d + movl %ebx,%esi + vpshufd $232,%ymm6,%ymm6 + rorxl $22,%ebx,%r12d + leal (%rax,%r13,1),%eax + xorl %ecx,%esi + vpslldq $8,%ymm6,%ymm6 + rorxl $13,%ebx,%r14d + rorxl $2,%ebx,%r13d + leal (%r8,%rax,1),%r8d + vpaddd %ymm6,%ymm3,%ymm3 + andl %esi,%r15d + vpor %xmm11,%xmm8,%xmm8 + vaesenclast %xmm10,%xmm9,%xmm11 + vmovdqu 0-128(%rdi),%xmm10 + xorl %r12d,%r14d + xorl %ecx,%r15d + vpaddd 96(%rbp),%ymm3,%ymm6 + xorl %r13d,%r14d + leal (%rax,%r15,1),%eax + movl %r9d,%r12d + vmovdqa %ymm6,32(%rsp) + vmovq %xmm15,%r13 + vpextrq $1,%xmm15,%r15 + vpand %xmm14,%xmm11,%xmm11 + vpor %xmm11,%xmm8,%xmm8 + vmovdqu %xmm8,(%r15,%r13,1) + leaq 16(%r13),%r13 + leaq 128(%rbp),%rbp + cmpb $0,3(%rbp) + jne L$avx2_00_47 + vmovdqu (%r13),%xmm9 + vpinsrq $0,%r13,%xmm15,%xmm15 + addl 0+64(%rsp),%r11d + andl %r8d,%r12d + rorxl $25,%r8d,%r13d + rorxl $11,%r8d,%r15d + leal (%rax,%r14,1),%eax + leal (%r11,%r12,1),%r11d + andnl %r10d,%r8d,%r12d + xorl %r15d,%r13d + rorxl $6,%r8d,%r14d + leal (%r11,%r12,1),%r11d + xorl %r14d,%r13d + movl %eax,%r15d + rorxl $22,%eax,%r12d + leal (%r11,%r13,1),%r11d + xorl %ebx,%r15d + rorxl $13,%eax,%r14d + rorxl $2,%eax,%r13d + leal (%rdx,%r11,1),%edx + andl %r15d,%esi + vpxor %xmm10,%xmm9,%xmm9 + vmovdqu 16-128(%rdi),%xmm10 + xorl %r12d,%r14d + xorl %ebx,%esi + xorl %r13d,%r14d + leal (%r11,%rsi,1),%r11d + movl %r8d,%r12d + addl 4+64(%rsp),%r10d + andl %edx,%r12d + rorxl $25,%edx,%r13d + rorxl $11,%edx,%esi + leal (%r11,%r14,1),%r11d + leal (%r10,%r12,1),%r10d + andnl %r9d,%edx,%r12d + xorl %esi,%r13d + rorxl $6,%edx,%r14d + leal (%r10,%r12,1),%r10d + xorl %r14d,%r13d + movl %r11d,%esi + rorxl $22,%r11d,%r12d + leal (%r10,%r13,1),%r10d + xorl %eax,%esi + rorxl $13,%r11d,%r14d + rorxl $2,%r11d,%r13d + leal (%rcx,%r10,1),%ecx + andl %esi,%r15d + vpxor %xmm8,%xmm9,%xmm9 + xorl %r12d,%r14d + xorl %eax,%r15d + xorl %r13d,%r14d + leal (%r10,%r15,1),%r10d + movl %edx,%r12d + addl 8+64(%rsp),%r9d + andl %ecx,%r12d + rorxl $25,%ecx,%r13d + rorxl $11,%ecx,%r15d + leal (%r10,%r14,1),%r10d + leal (%r9,%r12,1),%r9d + andnl %r8d,%ecx,%r12d + xorl %r15d,%r13d + rorxl $6,%ecx,%r14d + leal (%r9,%r12,1),%r9d + xorl %r14d,%r13d + movl %r10d,%r15d + rorxl $22,%r10d,%r12d + leal (%r9,%r13,1),%r9d + xorl %r11d,%r15d + rorxl $13,%r10d,%r14d + rorxl $2,%r10d,%r13d + leal (%rbx,%r9,1),%ebx + andl %r15d,%esi + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 32-128(%rdi),%xmm10 + xorl %r12d,%r14d + xorl %r11d,%esi + xorl %r13d,%r14d + leal (%r9,%rsi,1),%r9d + movl %ecx,%r12d + addl 12+64(%rsp),%r8d + andl %ebx,%r12d + rorxl $25,%ebx,%r13d + rorxl $11,%ebx,%esi + leal (%r9,%r14,1),%r9d + leal (%r8,%r12,1),%r8d + andnl %edx,%ebx,%r12d + xorl %esi,%r13d + rorxl $6,%ebx,%r14d + leal (%r8,%r12,1),%r8d + xorl %r14d,%r13d + movl %r9d,%esi + rorxl $22,%r9d,%r12d + leal (%r8,%r13,1),%r8d + xorl %r10d,%esi + rorxl $13,%r9d,%r14d + rorxl $2,%r9d,%r13d + leal (%rax,%r8,1),%eax + andl %esi,%r15d + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 48-128(%rdi),%xmm10 + xorl %r12d,%r14d + xorl %r10d,%r15d + xorl %r13d,%r14d + leal (%r8,%r15,1),%r8d + movl %ebx,%r12d + addl 32+64(%rsp),%edx + andl %eax,%r12d + rorxl $25,%eax,%r13d + rorxl $11,%eax,%r15d + leal (%r8,%r14,1),%r8d + leal (%rdx,%r12,1),%edx + andnl %ecx,%eax,%r12d + xorl %r15d,%r13d + rorxl $6,%eax,%r14d + leal (%rdx,%r12,1),%edx + xorl %r14d,%r13d + movl %r8d,%r15d + rorxl $22,%r8d,%r12d + leal (%rdx,%r13,1),%edx + xorl %r9d,%r15d + rorxl $13,%r8d,%r14d + rorxl $2,%r8d,%r13d + leal (%r11,%rdx,1),%r11d + andl %r15d,%esi + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 64-128(%rdi),%xmm10 + xorl %r12d,%r14d + xorl %r9d,%esi + xorl %r13d,%r14d + leal (%rdx,%rsi,1),%edx + movl %eax,%r12d + addl 36+64(%rsp),%ecx + andl %r11d,%r12d + rorxl $25,%r11d,%r13d + rorxl $11,%r11d,%esi + leal (%rdx,%r14,1),%edx + leal (%rcx,%r12,1),%ecx + andnl %ebx,%r11d,%r12d + xorl %esi,%r13d + rorxl $6,%r11d,%r14d + leal (%rcx,%r12,1),%ecx + xorl %r14d,%r13d + movl %edx,%esi + rorxl $22,%edx,%r12d + leal (%rcx,%r13,1),%ecx + xorl %r8d,%esi + rorxl $13,%edx,%r14d + rorxl $2,%edx,%r13d + leal (%r10,%rcx,1),%r10d + andl %esi,%r15d + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 80-128(%rdi),%xmm10 + xorl %r12d,%r14d + xorl %r8d,%r15d + xorl %r13d,%r14d + leal (%rcx,%r15,1),%ecx + movl %r11d,%r12d + addl 40+64(%rsp),%ebx + andl %r10d,%r12d + rorxl $25,%r10d,%r13d + rorxl $11,%r10d,%r15d + leal (%rcx,%r14,1),%ecx + leal (%rbx,%r12,1),%ebx + andnl %eax,%r10d,%r12d + xorl %r15d,%r13d + rorxl $6,%r10d,%r14d + leal (%rbx,%r12,1),%ebx + xorl %r14d,%r13d + movl %ecx,%r15d + rorxl $22,%ecx,%r12d + leal (%rbx,%r13,1),%ebx + xorl %edx,%r15d + rorxl $13,%ecx,%r14d + rorxl $2,%ecx,%r13d + leal (%r9,%rbx,1),%r9d + andl %r15d,%esi + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 96-128(%rdi),%xmm10 + xorl %r12d,%r14d + xorl %edx,%esi + xorl %r13d,%r14d + leal (%rbx,%rsi,1),%ebx + movl %r10d,%r12d + addl 44+64(%rsp),%eax + andl %r9d,%r12d + rorxl $25,%r9d,%r13d + rorxl $11,%r9d,%esi + leal (%rbx,%r14,1),%ebx + leal (%rax,%r12,1),%eax + andnl %r11d,%r9d,%r12d + xorl %esi,%r13d + rorxl $6,%r9d,%r14d + leal (%rax,%r12,1),%eax + xorl %r14d,%r13d + movl %ebx,%esi + rorxl $22,%ebx,%r12d + leal (%rax,%r13,1),%eax + xorl %ecx,%esi + rorxl $13,%ebx,%r14d + rorxl $2,%ebx,%r13d + leal (%r8,%rax,1),%r8d + andl %esi,%r15d + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 112-128(%rdi),%xmm10 + xorl %r12d,%r14d + xorl %ecx,%r15d + xorl %r13d,%r14d + leal (%rax,%r15,1),%eax + movl %r9d,%r12d + addl 0(%rsp),%r11d + andl %r8d,%r12d + rorxl $25,%r8d,%r13d + rorxl $11,%r8d,%r15d + leal (%rax,%r14,1),%eax + leal (%r11,%r12,1),%r11d + andnl %r10d,%r8d,%r12d + xorl %r15d,%r13d + rorxl $6,%r8d,%r14d + leal (%r11,%r12,1),%r11d + xorl %r14d,%r13d + movl %eax,%r15d + rorxl $22,%eax,%r12d + leal (%r11,%r13,1),%r11d + xorl %ebx,%r15d + rorxl $13,%eax,%r14d + rorxl $2,%eax,%r13d + leal (%rdx,%r11,1),%edx + andl %r15d,%esi + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 128-128(%rdi),%xmm10 + xorl %r12d,%r14d + xorl %ebx,%esi + xorl %r13d,%r14d + leal (%r11,%rsi,1),%r11d + movl %r8d,%r12d + addl 4(%rsp),%r10d + andl %edx,%r12d + rorxl $25,%edx,%r13d + rorxl $11,%edx,%esi + leal (%r11,%r14,1),%r11d + leal (%r10,%r12,1),%r10d + andnl %r9d,%edx,%r12d + xorl %esi,%r13d + rorxl $6,%edx,%r14d + leal (%r10,%r12,1),%r10d + xorl %r14d,%r13d + movl %r11d,%esi + rorxl $22,%r11d,%r12d + leal (%r10,%r13,1),%r10d + xorl %eax,%esi + rorxl $13,%r11d,%r14d + rorxl $2,%r11d,%r13d + leal (%rcx,%r10,1),%ecx + andl %esi,%r15d + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 144-128(%rdi),%xmm10 + xorl %r12d,%r14d + xorl %eax,%r15d + xorl %r13d,%r14d + leal (%r10,%r15,1),%r10d + movl %edx,%r12d + addl 8(%rsp),%r9d + andl %ecx,%r12d + rorxl $25,%ecx,%r13d + rorxl $11,%ecx,%r15d + leal (%r10,%r14,1),%r10d + leal (%r9,%r12,1),%r9d + andnl %r8d,%ecx,%r12d + xorl %r15d,%r13d + rorxl $6,%ecx,%r14d + leal (%r9,%r12,1),%r9d + xorl %r14d,%r13d + movl %r10d,%r15d + rorxl $22,%r10d,%r12d + leal (%r9,%r13,1),%r9d + xorl %r11d,%r15d + rorxl $13,%r10d,%r14d + rorxl $2,%r10d,%r13d + leal (%rbx,%r9,1),%ebx + andl %r15d,%esi + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 160-128(%rdi),%xmm10 + xorl %r12d,%r14d + xorl %r11d,%esi + xorl %r13d,%r14d + leal (%r9,%rsi,1),%r9d + movl %ecx,%r12d + addl 12(%rsp),%r8d + andl %ebx,%r12d + rorxl $25,%ebx,%r13d + rorxl $11,%ebx,%esi + leal (%r9,%r14,1),%r9d + leal (%r8,%r12,1),%r8d + andnl %edx,%ebx,%r12d + xorl %esi,%r13d + rorxl $6,%ebx,%r14d + leal (%r8,%r12,1),%r8d + xorl %r14d,%r13d + movl %r9d,%esi + rorxl $22,%r9d,%r12d + leal (%r8,%r13,1),%r8d + xorl %r10d,%esi + rorxl $13,%r9d,%r14d + rorxl $2,%r9d,%r13d + leal (%rax,%r8,1),%eax + andl %esi,%r15d + vaesenclast %xmm10,%xmm9,%xmm11 + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 176-128(%rdi),%xmm10 + xorl %r12d,%r14d + xorl %r10d,%r15d + xorl %r13d,%r14d + leal (%r8,%r15,1),%r8d + movl %ebx,%r12d + addl 32(%rsp),%edx + andl %eax,%r12d + rorxl $25,%eax,%r13d + rorxl $11,%eax,%r15d + leal (%r8,%r14,1),%r8d + leal (%rdx,%r12,1),%edx + andnl %ecx,%eax,%r12d + xorl %r15d,%r13d + rorxl $6,%eax,%r14d + leal (%rdx,%r12,1),%edx + xorl %r14d,%r13d + movl %r8d,%r15d + rorxl $22,%r8d,%r12d + leal (%rdx,%r13,1),%edx + xorl %r9d,%r15d + rorxl $13,%r8d,%r14d + rorxl $2,%r8d,%r13d + leal (%r11,%rdx,1),%r11d + andl %r15d,%esi + vpand %xmm12,%xmm11,%xmm8 + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 192-128(%rdi),%xmm10 + xorl %r12d,%r14d + xorl %r9d,%esi + xorl %r13d,%r14d + leal (%rdx,%rsi,1),%edx + movl %eax,%r12d + addl 36(%rsp),%ecx + andl %r11d,%r12d + rorxl $25,%r11d,%r13d + rorxl $11,%r11d,%esi + leal (%rdx,%r14,1),%edx + leal (%rcx,%r12,1),%ecx + andnl %ebx,%r11d,%r12d + xorl %esi,%r13d + rorxl $6,%r11d,%r14d + leal (%rcx,%r12,1),%ecx + xorl %r14d,%r13d + movl %edx,%esi + rorxl $22,%edx,%r12d + leal (%rcx,%r13,1),%ecx + xorl %r8d,%esi + rorxl $13,%edx,%r14d + rorxl $2,%edx,%r13d + leal (%r10,%rcx,1),%r10d + andl %esi,%r15d + vaesenclast %xmm10,%xmm9,%xmm11 + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 208-128(%rdi),%xmm10 + xorl %r12d,%r14d + xorl %r8d,%r15d + xorl %r13d,%r14d + leal (%rcx,%r15,1),%ecx + movl %r11d,%r12d + addl 40(%rsp),%ebx + andl %r10d,%r12d + rorxl $25,%r10d,%r13d + rorxl $11,%r10d,%r15d + leal (%rcx,%r14,1),%ecx + leal (%rbx,%r12,1),%ebx + andnl %eax,%r10d,%r12d + xorl %r15d,%r13d + rorxl $6,%r10d,%r14d + leal (%rbx,%r12,1),%ebx + xorl %r14d,%r13d + movl %ecx,%r15d + rorxl $22,%ecx,%r12d + leal (%rbx,%r13,1),%ebx + xorl %edx,%r15d + rorxl $13,%ecx,%r14d + rorxl $2,%ecx,%r13d + leal (%r9,%rbx,1),%r9d + andl %r15d,%esi + vpand %xmm13,%xmm11,%xmm11 + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 224-128(%rdi),%xmm10 + xorl %r12d,%r14d + xorl %edx,%esi + xorl %r13d,%r14d + leal (%rbx,%rsi,1),%ebx + movl %r10d,%r12d + addl 44(%rsp),%eax + andl %r9d,%r12d + rorxl $25,%r9d,%r13d + rorxl $11,%r9d,%esi + leal (%rbx,%r14,1),%ebx + leal (%rax,%r12,1),%eax + andnl %r11d,%r9d,%r12d + xorl %esi,%r13d + rorxl $6,%r9d,%r14d + leal (%rax,%r12,1),%eax + xorl %r14d,%r13d + movl %ebx,%esi + rorxl $22,%ebx,%r12d + leal (%rax,%r13,1),%eax + xorl %ecx,%esi + rorxl $13,%ebx,%r14d + rorxl $2,%ebx,%r13d + leal (%r8,%rax,1),%r8d + andl %esi,%r15d + vpor %xmm11,%xmm8,%xmm8 + vaesenclast %xmm10,%xmm9,%xmm11 + vmovdqu 0-128(%rdi),%xmm10 + xorl %r12d,%r14d + xorl %ecx,%r15d + xorl %r13d,%r14d + leal (%rax,%r15,1),%eax + movl %r9d,%r12d + vpextrq $1,%xmm15,%r12 + vmovq %xmm15,%r13 + movq 552(%rsp),%r15 + addl %r14d,%eax + leaq 448(%rsp),%rbp + + vpand %xmm14,%xmm11,%xmm11 + vpor %xmm11,%xmm8,%xmm8 + vmovdqu %xmm8,(%r12,%r13,1) + leaq 16(%r13),%r13 + + addl 0(%r15),%eax + addl 4(%r15),%ebx + addl 8(%r15),%ecx + addl 12(%r15),%edx + addl 16(%r15),%r8d + addl 20(%r15),%r9d + addl 24(%r15),%r10d + addl 28(%r15),%r11d + + movl %eax,0(%r15) + movl %ebx,4(%r15) + movl %ecx,8(%r15) + movl %edx,12(%r15) + movl %r8d,16(%r15) + movl %r9d,20(%r15) + movl %r10d,24(%r15) + movl %r11d,28(%r15) + + cmpq 80(%rbp),%r13 + je L$done_avx2 + + xorl %r14d,%r14d + movl %ebx,%esi + movl %r9d,%r12d + xorl %ecx,%esi + jmp L$ower_avx2 +.p2align 4 +L$ower_avx2: + vmovdqu (%r13),%xmm9 + vpinsrq $0,%r13,%xmm15,%xmm15 + addl 0+16(%rbp),%r11d + andl %r8d,%r12d + rorxl $25,%r8d,%r13d + rorxl $11,%r8d,%r15d + leal (%rax,%r14,1),%eax + leal (%r11,%r12,1),%r11d + andnl %r10d,%r8d,%r12d + xorl %r15d,%r13d + rorxl $6,%r8d,%r14d + leal (%r11,%r12,1),%r11d + xorl %r14d,%r13d + movl %eax,%r15d + rorxl $22,%eax,%r12d + leal (%r11,%r13,1),%r11d + xorl %ebx,%r15d + rorxl $13,%eax,%r14d + rorxl $2,%eax,%r13d + leal (%rdx,%r11,1),%edx + andl %r15d,%esi + vpxor %xmm10,%xmm9,%xmm9 + vmovdqu 16-128(%rdi),%xmm10 + xorl %r12d,%r14d + xorl %ebx,%esi + xorl %r13d,%r14d + leal (%r11,%rsi,1),%r11d + movl %r8d,%r12d + addl 4+16(%rbp),%r10d + andl %edx,%r12d + rorxl $25,%edx,%r13d + rorxl $11,%edx,%esi + leal (%r11,%r14,1),%r11d + leal (%r10,%r12,1),%r10d + andnl %r9d,%edx,%r12d + xorl %esi,%r13d + rorxl $6,%edx,%r14d + leal (%r10,%r12,1),%r10d + xorl %r14d,%r13d + movl %r11d,%esi + rorxl $22,%r11d,%r12d + leal (%r10,%r13,1),%r10d + xorl %eax,%esi + rorxl $13,%r11d,%r14d + rorxl $2,%r11d,%r13d + leal (%rcx,%r10,1),%ecx + andl %esi,%r15d + vpxor %xmm8,%xmm9,%xmm9 + xorl %r12d,%r14d + xorl %eax,%r15d + xorl %r13d,%r14d + leal (%r10,%r15,1),%r10d + movl %edx,%r12d + addl 8+16(%rbp),%r9d + andl %ecx,%r12d + rorxl $25,%ecx,%r13d + rorxl $11,%ecx,%r15d + leal (%r10,%r14,1),%r10d + leal (%r9,%r12,1),%r9d + andnl %r8d,%ecx,%r12d + xorl %r15d,%r13d + rorxl $6,%ecx,%r14d + leal (%r9,%r12,1),%r9d + xorl %r14d,%r13d + movl %r10d,%r15d + rorxl $22,%r10d,%r12d + leal (%r9,%r13,1),%r9d + xorl %r11d,%r15d + rorxl $13,%r10d,%r14d + rorxl $2,%r10d,%r13d + leal (%rbx,%r9,1),%ebx + andl %r15d,%esi + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 32-128(%rdi),%xmm10 + xorl %r12d,%r14d + xorl %r11d,%esi + xorl %r13d,%r14d + leal (%r9,%rsi,1),%r9d + movl %ecx,%r12d + addl 12+16(%rbp),%r8d + andl %ebx,%r12d + rorxl $25,%ebx,%r13d + rorxl $11,%ebx,%esi + leal (%r9,%r14,1),%r9d + leal (%r8,%r12,1),%r8d + andnl %edx,%ebx,%r12d + xorl %esi,%r13d + rorxl $6,%ebx,%r14d + leal (%r8,%r12,1),%r8d + xorl %r14d,%r13d + movl %r9d,%esi + rorxl $22,%r9d,%r12d + leal (%r8,%r13,1),%r8d + xorl %r10d,%esi + rorxl $13,%r9d,%r14d + rorxl $2,%r9d,%r13d + leal (%rax,%r8,1),%eax + andl %esi,%r15d + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 48-128(%rdi),%xmm10 + xorl %r12d,%r14d + xorl %r10d,%r15d + xorl %r13d,%r14d + leal (%r8,%r15,1),%r8d + movl %ebx,%r12d + addl 32+16(%rbp),%edx + andl %eax,%r12d + rorxl $25,%eax,%r13d + rorxl $11,%eax,%r15d + leal (%r8,%r14,1),%r8d + leal (%rdx,%r12,1),%edx + andnl %ecx,%eax,%r12d + xorl %r15d,%r13d + rorxl $6,%eax,%r14d + leal (%rdx,%r12,1),%edx + xorl %r14d,%r13d + movl %r8d,%r15d + rorxl $22,%r8d,%r12d + leal (%rdx,%r13,1),%edx + xorl %r9d,%r15d + rorxl $13,%r8d,%r14d + rorxl $2,%r8d,%r13d + leal (%r11,%rdx,1),%r11d + andl %r15d,%esi + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 64-128(%rdi),%xmm10 + xorl %r12d,%r14d + xorl %r9d,%esi + xorl %r13d,%r14d + leal (%rdx,%rsi,1),%edx + movl %eax,%r12d + addl 36+16(%rbp),%ecx + andl %r11d,%r12d + rorxl $25,%r11d,%r13d + rorxl $11,%r11d,%esi + leal (%rdx,%r14,1),%edx + leal (%rcx,%r12,1),%ecx + andnl %ebx,%r11d,%r12d + xorl %esi,%r13d + rorxl $6,%r11d,%r14d + leal (%rcx,%r12,1),%ecx + xorl %r14d,%r13d + movl %edx,%esi + rorxl $22,%edx,%r12d + leal (%rcx,%r13,1),%ecx + xorl %r8d,%esi + rorxl $13,%edx,%r14d + rorxl $2,%edx,%r13d + leal (%r10,%rcx,1),%r10d + andl %esi,%r15d + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 80-128(%rdi),%xmm10 + xorl %r12d,%r14d + xorl %r8d,%r15d + xorl %r13d,%r14d + leal (%rcx,%r15,1),%ecx + movl %r11d,%r12d + addl 40+16(%rbp),%ebx + andl %r10d,%r12d + rorxl $25,%r10d,%r13d + rorxl $11,%r10d,%r15d + leal (%rcx,%r14,1),%ecx + leal (%rbx,%r12,1),%ebx + andnl %eax,%r10d,%r12d + xorl %r15d,%r13d + rorxl $6,%r10d,%r14d + leal (%rbx,%r12,1),%ebx + xorl %r14d,%r13d + movl %ecx,%r15d + rorxl $22,%ecx,%r12d + leal (%rbx,%r13,1),%ebx + xorl %edx,%r15d + rorxl $13,%ecx,%r14d + rorxl $2,%ecx,%r13d + leal (%r9,%rbx,1),%r9d + andl %r15d,%esi + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 96-128(%rdi),%xmm10 + xorl %r12d,%r14d + xorl %edx,%esi + xorl %r13d,%r14d + leal (%rbx,%rsi,1),%ebx + movl %r10d,%r12d + addl 44+16(%rbp),%eax + andl %r9d,%r12d + rorxl $25,%r9d,%r13d + rorxl $11,%r9d,%esi + leal (%rbx,%r14,1),%ebx + leal (%rax,%r12,1),%eax + andnl %r11d,%r9d,%r12d + xorl %esi,%r13d + rorxl $6,%r9d,%r14d + leal (%rax,%r12,1),%eax + xorl %r14d,%r13d + movl %ebx,%esi + rorxl $22,%ebx,%r12d + leal (%rax,%r13,1),%eax + xorl %ecx,%esi + rorxl $13,%ebx,%r14d + rorxl $2,%ebx,%r13d + leal (%r8,%rax,1),%r8d + andl %esi,%r15d + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 112-128(%rdi),%xmm10 + xorl %r12d,%r14d + xorl %ecx,%r15d + xorl %r13d,%r14d + leal (%rax,%r15,1),%eax + movl %r9d,%r12d + leaq -64(%rbp),%rbp + addl 0+16(%rbp),%r11d + andl %r8d,%r12d + rorxl $25,%r8d,%r13d + rorxl $11,%r8d,%r15d + leal (%rax,%r14,1),%eax + leal (%r11,%r12,1),%r11d + andnl %r10d,%r8d,%r12d + xorl %r15d,%r13d + rorxl $6,%r8d,%r14d + leal (%r11,%r12,1),%r11d + xorl %r14d,%r13d + movl %eax,%r15d + rorxl $22,%eax,%r12d + leal (%r11,%r13,1),%r11d + xorl %ebx,%r15d + rorxl $13,%eax,%r14d + rorxl $2,%eax,%r13d + leal (%rdx,%r11,1),%edx + andl %r15d,%esi + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 128-128(%rdi),%xmm10 + xorl %r12d,%r14d + xorl %ebx,%esi + xorl %r13d,%r14d + leal (%r11,%rsi,1),%r11d + movl %r8d,%r12d + addl 4+16(%rbp),%r10d + andl %edx,%r12d + rorxl $25,%edx,%r13d + rorxl $11,%edx,%esi + leal (%r11,%r14,1),%r11d + leal (%r10,%r12,1),%r10d + andnl %r9d,%edx,%r12d + xorl %esi,%r13d + rorxl $6,%edx,%r14d + leal (%r10,%r12,1),%r10d + xorl %r14d,%r13d + movl %r11d,%esi + rorxl $22,%r11d,%r12d + leal (%r10,%r13,1),%r10d + xorl %eax,%esi + rorxl $13,%r11d,%r14d + rorxl $2,%r11d,%r13d + leal (%rcx,%r10,1),%ecx + andl %esi,%r15d + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 144-128(%rdi),%xmm10 + xorl %r12d,%r14d + xorl %eax,%r15d + xorl %r13d,%r14d + leal (%r10,%r15,1),%r10d + movl %edx,%r12d + addl 8+16(%rbp),%r9d + andl %ecx,%r12d + rorxl $25,%ecx,%r13d + rorxl $11,%ecx,%r15d + leal (%r10,%r14,1),%r10d + leal (%r9,%r12,1),%r9d + andnl %r8d,%ecx,%r12d + xorl %r15d,%r13d + rorxl $6,%ecx,%r14d + leal (%r9,%r12,1),%r9d + xorl %r14d,%r13d + movl %r10d,%r15d + rorxl $22,%r10d,%r12d + leal (%r9,%r13,1),%r9d + xorl %r11d,%r15d + rorxl $13,%r10d,%r14d + rorxl $2,%r10d,%r13d + leal (%rbx,%r9,1),%ebx + andl %r15d,%esi + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 160-128(%rdi),%xmm10 + xorl %r12d,%r14d + xorl %r11d,%esi + xorl %r13d,%r14d + leal (%r9,%rsi,1),%r9d + movl %ecx,%r12d + addl 12+16(%rbp),%r8d + andl %ebx,%r12d + rorxl $25,%ebx,%r13d + rorxl $11,%ebx,%esi + leal (%r9,%r14,1),%r9d + leal (%r8,%r12,1),%r8d + andnl %edx,%ebx,%r12d + xorl %esi,%r13d + rorxl $6,%ebx,%r14d + leal (%r8,%r12,1),%r8d + xorl %r14d,%r13d + movl %r9d,%esi + rorxl $22,%r9d,%r12d + leal (%r8,%r13,1),%r8d + xorl %r10d,%esi + rorxl $13,%r9d,%r14d + rorxl $2,%r9d,%r13d + leal (%rax,%r8,1),%eax + andl %esi,%r15d + vaesenclast %xmm10,%xmm9,%xmm11 + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 176-128(%rdi),%xmm10 + xorl %r12d,%r14d + xorl %r10d,%r15d + xorl %r13d,%r14d + leal (%r8,%r15,1),%r8d + movl %ebx,%r12d + addl 32+16(%rbp),%edx + andl %eax,%r12d + rorxl $25,%eax,%r13d + rorxl $11,%eax,%r15d + leal (%r8,%r14,1),%r8d + leal (%rdx,%r12,1),%edx + andnl %ecx,%eax,%r12d + xorl %r15d,%r13d + rorxl $6,%eax,%r14d + leal (%rdx,%r12,1),%edx + xorl %r14d,%r13d + movl %r8d,%r15d + rorxl $22,%r8d,%r12d + leal (%rdx,%r13,1),%edx + xorl %r9d,%r15d + rorxl $13,%r8d,%r14d + rorxl $2,%r8d,%r13d + leal (%r11,%rdx,1),%r11d + andl %r15d,%esi + vpand %xmm12,%xmm11,%xmm8 + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 192-128(%rdi),%xmm10 + xorl %r12d,%r14d + xorl %r9d,%esi + xorl %r13d,%r14d + leal (%rdx,%rsi,1),%edx + movl %eax,%r12d + addl 36+16(%rbp),%ecx + andl %r11d,%r12d + rorxl $25,%r11d,%r13d + rorxl $11,%r11d,%esi + leal (%rdx,%r14,1),%edx + leal (%rcx,%r12,1),%ecx + andnl %ebx,%r11d,%r12d + xorl %esi,%r13d + rorxl $6,%r11d,%r14d + leal (%rcx,%r12,1),%ecx + xorl %r14d,%r13d + movl %edx,%esi + rorxl $22,%edx,%r12d + leal (%rcx,%r13,1),%ecx + xorl %r8d,%esi + rorxl $13,%edx,%r14d + rorxl $2,%edx,%r13d + leal (%r10,%rcx,1),%r10d + andl %esi,%r15d + vaesenclast %xmm10,%xmm9,%xmm11 + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 208-128(%rdi),%xmm10 + xorl %r12d,%r14d + xorl %r8d,%r15d + xorl %r13d,%r14d + leal (%rcx,%r15,1),%ecx + movl %r11d,%r12d + addl 40+16(%rbp),%ebx + andl %r10d,%r12d + rorxl $25,%r10d,%r13d + rorxl $11,%r10d,%r15d + leal (%rcx,%r14,1),%ecx + leal (%rbx,%r12,1),%ebx + andnl %eax,%r10d,%r12d + xorl %r15d,%r13d + rorxl $6,%r10d,%r14d + leal (%rbx,%r12,1),%ebx + xorl %r14d,%r13d + movl %ecx,%r15d + rorxl $22,%ecx,%r12d + leal (%rbx,%r13,1),%ebx + xorl %edx,%r15d + rorxl $13,%ecx,%r14d + rorxl $2,%ecx,%r13d + leal (%r9,%rbx,1),%r9d + andl %r15d,%esi + vpand %xmm13,%xmm11,%xmm11 + vaesenc %xmm10,%xmm9,%xmm9 + vmovdqu 224-128(%rdi),%xmm10 + xorl %r12d,%r14d + xorl %edx,%esi + xorl %r13d,%r14d + leal (%rbx,%rsi,1),%ebx + movl %r10d,%r12d + addl 44+16(%rbp),%eax + andl %r9d,%r12d + rorxl $25,%r9d,%r13d + rorxl $11,%r9d,%esi + leal (%rbx,%r14,1),%ebx + leal (%rax,%r12,1),%eax + andnl %r11d,%r9d,%r12d + xorl %esi,%r13d + rorxl $6,%r9d,%r14d + leal (%rax,%r12,1),%eax + xorl %r14d,%r13d + movl %ebx,%esi + rorxl $22,%ebx,%r12d + leal (%rax,%r13,1),%eax + xorl %ecx,%esi + rorxl $13,%ebx,%r14d + rorxl $2,%ebx,%r13d + leal (%r8,%rax,1),%r8d + andl %esi,%r15d + vpor %xmm11,%xmm8,%xmm8 + vaesenclast %xmm10,%xmm9,%xmm11 + vmovdqu 0-128(%rdi),%xmm10 + xorl %r12d,%r14d + xorl %ecx,%r15d + xorl %r13d,%r14d + leal (%rax,%r15,1),%eax + movl %r9d,%r12d + vmovq %xmm15,%r13 + vpextrq $1,%xmm15,%r15 + vpand %xmm14,%xmm11,%xmm11 + vpor %xmm11,%xmm8,%xmm8 + leaq -64(%rbp),%rbp + vmovdqu %xmm8,(%r15,%r13,1) + leaq 16(%r13),%r13 + cmpq %rsp,%rbp + jae L$ower_avx2 + + movq 552(%rsp),%r15 + leaq 64(%r13),%r13 + movq 560(%rsp),%rsi + addl %r14d,%eax + leaq 448(%rsp),%rsp + + addl 0(%r15),%eax + addl 4(%r15),%ebx + addl 8(%r15),%ecx + addl 12(%r15),%edx + addl 16(%r15),%r8d + addl 20(%r15),%r9d + addl 24(%r15),%r10d + leaq (%rsi,%r13,1),%r12 + addl 28(%r15),%r11d + + cmpq 64+16(%rsp),%r13 + + movl %eax,0(%r15) + cmoveq %rsp,%r12 + movl %ebx,4(%r15) + movl %ecx,8(%r15) + movl %edx,12(%r15) + movl %r8d,16(%r15) + movl %r9d,20(%r15) + movl %r10d,24(%r15) + movl %r11d,28(%r15) + + jbe L$oop_avx2 + leaq (%rsp),%rbp + +L$done_avx2: + leaq (%rbp),%rsp + movq 64+32(%rsp),%r8 + movq 64+56(%rsp),%rsi + vmovdqu %xmm8,(%r8) + vzeroall + movq (%rsi),%r15 + movq 8(%rsi),%r14 + movq 16(%rsi),%r13 + movq 24(%rsi),%r12 + movq 32(%rsi),%rbp + movq 40(%rsi),%rbx + leaq 48(%rsi),%rsp +L$epilogue_avx2: + .byte 0xf3,0xc3 + + +.p2align 5 +aesni_cbc_sha256_enc_shaext: + movq 8(%rsp),%r10 + leaq K256+128(%rip),%rax + movdqu (%r9),%xmm1 + movdqu 16(%r9),%xmm2 + movdqa 512-128(%rax),%xmm3 + + movl 240(%rcx),%r11d + subq %rdi,%rsi + movups (%rcx),%xmm15 + movups 16(%rcx),%xmm4 + leaq 112(%rcx),%rcx + + pshufd $27,%xmm1,%xmm0 + pshufd $177,%xmm1,%xmm1 + pshufd $27,%xmm2,%xmm2 + movdqa %xmm3,%xmm7 +.byte 102,15,58,15,202,8 + punpcklqdq %xmm0,%xmm2 + + jmp L$oop_shaext + +.p2align 4 +L$oop_shaext: + movdqu (%r10),%xmm10 + movdqu 16(%r10),%xmm11 + movdqu 32(%r10),%xmm12 +.byte 102,68,15,56,0,211 + movdqu 48(%r10),%xmm13 + + movdqa 0-128(%rax),%xmm0 + paddd %xmm10,%xmm0 +.byte 102,68,15,56,0,219 + movdqa %xmm2,%xmm9 + movdqa %xmm1,%xmm8 + movups 0(%rdi),%xmm14 + xorps %xmm15,%xmm14 + xorps %xmm14,%xmm6 + movups -80(%rcx),%xmm5 + aesenc %xmm4,%xmm6 +.byte 15,56,203,209 + pshufd $14,%xmm0,%xmm0 + movups -64(%rcx),%xmm4 + aesenc %xmm5,%xmm6 +.byte 15,56,203,202 + + movdqa 32-128(%rax),%xmm0 + paddd %xmm11,%xmm0 +.byte 102,68,15,56,0,227 + leaq 64(%r10),%r10 + movups -48(%rcx),%xmm5 + aesenc %xmm4,%xmm6 +.byte 15,56,203,209 + pshufd $14,%xmm0,%xmm0 + movups -32(%rcx),%xmm4 + aesenc %xmm5,%xmm6 +.byte 15,56,203,202 + + movdqa 64-128(%rax),%xmm0 + paddd %xmm12,%xmm0 +.byte 102,68,15,56,0,235 +.byte 69,15,56,204,211 + movups -16(%rcx),%xmm5 + aesenc %xmm4,%xmm6 +.byte 15,56,203,209 + pshufd $14,%xmm0,%xmm0 + movdqa %xmm13,%xmm3 +.byte 102,65,15,58,15,220,4 + paddd %xmm3,%xmm10 + movups 0(%rcx),%xmm4 + aesenc %xmm5,%xmm6 +.byte 15,56,203,202 + + movdqa 96-128(%rax),%xmm0 + paddd %xmm13,%xmm0 +.byte 69,15,56,205,213 +.byte 69,15,56,204,220 + movups 16(%rcx),%xmm5 + aesenc %xmm4,%xmm6 +.byte 15,56,203,209 + pshufd $14,%xmm0,%xmm0 + movups 32(%rcx),%xmm4 + aesenc %xmm5,%xmm6 + movdqa %xmm10,%xmm3 +.byte 102,65,15,58,15,221,4 + paddd %xmm3,%xmm11 +.byte 15,56,203,202 + movdqa 128-128(%rax),%xmm0 + paddd %xmm10,%xmm0 +.byte 69,15,56,205,218 +.byte 69,15,56,204,229 + movups 48(%rcx),%xmm5 + aesenc %xmm4,%xmm6 +.byte 15,56,203,209 + pshufd $14,%xmm0,%xmm0 + movdqa %xmm11,%xmm3 +.byte 102,65,15,58,15,218,4 + paddd %xmm3,%xmm12 + cmpl $11,%r11d + jb L$aesenclast1 + movups 64(%rcx),%xmm4 + aesenc %xmm5,%xmm6 + movups 80(%rcx),%xmm5 + aesenc %xmm4,%xmm6 + je L$aesenclast1 + movups 96(%rcx),%xmm4 + aesenc %xmm5,%xmm6 + movups 112(%rcx),%xmm5 + aesenc %xmm4,%xmm6 +L$aesenclast1: + aesenclast %xmm5,%xmm6 + movups 16-112(%rcx),%xmm4 + nop +.byte 15,56,203,202 + movups 16(%rdi),%xmm14 + xorps %xmm15,%xmm14 + movups %xmm6,0(%rsi,%rdi,1) + xorps %xmm14,%xmm6 + movups -80(%rcx),%xmm5 + aesenc %xmm4,%xmm6 + movdqa 160-128(%rax),%xmm0 + paddd %xmm11,%xmm0 +.byte 69,15,56,205,227 +.byte 69,15,56,204,234 + movups -64(%rcx),%xmm4 + aesenc %xmm5,%xmm6 +.byte 15,56,203,209 + pshufd $14,%xmm0,%xmm0 + movdqa %xmm12,%xmm3 +.byte 102,65,15,58,15,219,4 + paddd %xmm3,%xmm13 + movups -48(%rcx),%xmm5 + aesenc %xmm4,%xmm6 +.byte 15,56,203,202 + movdqa 192-128(%rax),%xmm0 + paddd %xmm12,%xmm0 +.byte 69,15,56,205,236 +.byte 69,15,56,204,211 + movups -32(%rcx),%xmm4 + aesenc %xmm5,%xmm6 +.byte 15,56,203,209 + pshufd $14,%xmm0,%xmm0 + movdqa %xmm13,%xmm3 +.byte 102,65,15,58,15,220,4 + paddd %xmm3,%xmm10 + movups -16(%rcx),%xmm5 + aesenc %xmm4,%xmm6 +.byte 15,56,203,202 + movdqa 224-128(%rax),%xmm0 + paddd %xmm13,%xmm0 +.byte 69,15,56,205,213 +.byte 69,15,56,204,220 + movups 0(%rcx),%xmm4 + aesenc %xmm5,%xmm6 +.byte 15,56,203,209 + pshufd $14,%xmm0,%xmm0 + movdqa %xmm10,%xmm3 +.byte 102,65,15,58,15,221,4 + paddd %xmm3,%xmm11 + movups 16(%rcx),%xmm5 + aesenc %xmm4,%xmm6 +.byte 15,56,203,202 + movdqa 256-128(%rax),%xmm0 + paddd %xmm10,%xmm0 +.byte 69,15,56,205,218 +.byte 69,15,56,204,229 + movups 32(%rcx),%xmm4 + aesenc %xmm5,%xmm6 +.byte 15,56,203,209 + pshufd $14,%xmm0,%xmm0 + movdqa %xmm11,%xmm3 +.byte 102,65,15,58,15,218,4 + paddd %xmm3,%xmm12 + movups 48(%rcx),%xmm5 + aesenc %xmm4,%xmm6 + cmpl $11,%r11d + jb L$aesenclast2 + movups 64(%rcx),%xmm4 + aesenc %xmm5,%xmm6 + movups 80(%rcx),%xmm5 + aesenc %xmm4,%xmm6 + je L$aesenclast2 + movups 96(%rcx),%xmm4 + aesenc %xmm5,%xmm6 + movups 112(%rcx),%xmm5 + aesenc %xmm4,%xmm6 +L$aesenclast2: + aesenclast %xmm5,%xmm6 + movups 16-112(%rcx),%xmm4 + nop +.byte 15,56,203,202 + movups 32(%rdi),%xmm14 + xorps %xmm15,%xmm14 + movups %xmm6,16(%rsi,%rdi,1) + xorps %xmm14,%xmm6 + movups -80(%rcx),%xmm5 + aesenc %xmm4,%xmm6 + movdqa 288-128(%rax),%xmm0 + paddd %xmm11,%xmm0 +.byte 69,15,56,205,227 +.byte 69,15,56,204,234 + movups -64(%rcx),%xmm4 + aesenc %xmm5,%xmm6 +.byte 15,56,203,209 + pshufd $14,%xmm0,%xmm0 + movdqa %xmm12,%xmm3 +.byte 102,65,15,58,15,219,4 + paddd %xmm3,%xmm13 + movups -48(%rcx),%xmm5 + aesenc %xmm4,%xmm6 +.byte 15,56,203,202 + movdqa 320-128(%rax),%xmm0 + paddd %xmm12,%xmm0 +.byte 69,15,56,205,236 +.byte 69,15,56,204,211 + movups -32(%rcx),%xmm4 + aesenc %xmm5,%xmm6 +.byte 15,56,203,209 + pshufd $14,%xmm0,%xmm0 + movdqa %xmm13,%xmm3 +.byte 102,65,15,58,15,220,4 + paddd %xmm3,%xmm10 + movups -16(%rcx),%xmm5 + aesenc %xmm4,%xmm6 +.byte 15,56,203,202 + movdqa 352-128(%rax),%xmm0 + paddd %xmm13,%xmm0 +.byte 69,15,56,205,213 +.byte 69,15,56,204,220 + movups 0(%rcx),%xmm4 + aesenc %xmm5,%xmm6 +.byte 15,56,203,209 + pshufd $14,%xmm0,%xmm0 + movdqa %xmm10,%xmm3 +.byte 102,65,15,58,15,221,4 + paddd %xmm3,%xmm11 + movups 16(%rcx),%xmm5 + aesenc %xmm4,%xmm6 +.byte 15,56,203,202 + movdqa 384-128(%rax),%xmm0 + paddd %xmm10,%xmm0 +.byte 69,15,56,205,218 +.byte 69,15,56,204,229 + movups 32(%rcx),%xmm4 + aesenc %xmm5,%xmm6 +.byte 15,56,203,209 + pshufd $14,%xmm0,%xmm0 + movdqa %xmm11,%xmm3 +.byte 102,65,15,58,15,218,4 + paddd %xmm3,%xmm12 + movups 48(%rcx),%xmm5 + aesenc %xmm4,%xmm6 +.byte 15,56,203,202 + movdqa 416-128(%rax),%xmm0 + paddd %xmm11,%xmm0 +.byte 69,15,56,205,227 +.byte 69,15,56,204,234 + cmpl $11,%r11d + jb L$aesenclast3 + movups 64(%rcx),%xmm4 + aesenc %xmm5,%xmm6 + movups 80(%rcx),%xmm5 + aesenc %xmm4,%xmm6 + je L$aesenclast3 + movups 96(%rcx),%xmm4 + aesenc %xmm5,%xmm6 + movups 112(%rcx),%xmm5 + aesenc %xmm4,%xmm6 +L$aesenclast3: + aesenclast %xmm5,%xmm6 + movups 16-112(%rcx),%xmm4 + nop +.byte 15,56,203,209 + pshufd $14,%xmm0,%xmm0 + movdqa %xmm12,%xmm3 +.byte 102,65,15,58,15,219,4 + paddd %xmm3,%xmm13 + movups 48(%rdi),%xmm14 + xorps %xmm15,%xmm14 + movups %xmm6,32(%rsi,%rdi,1) + xorps %xmm14,%xmm6 + movups -80(%rcx),%xmm5 + aesenc %xmm4,%xmm6 + movups -64(%rcx),%xmm4 + aesenc %xmm5,%xmm6 +.byte 15,56,203,202 + + movdqa 448-128(%rax),%xmm0 + paddd %xmm12,%xmm0 +.byte 69,15,56,205,236 + movdqa %xmm7,%xmm3 + movups -48(%rcx),%xmm5 + aesenc %xmm4,%xmm6 +.byte 15,56,203,209 + pshufd $14,%xmm0,%xmm0 + movups -32(%rcx),%xmm4 + aesenc %xmm5,%xmm6 +.byte 15,56,203,202 + + movdqa 480-128(%rax),%xmm0 + paddd %xmm13,%xmm0 + movups -16(%rcx),%xmm5 + aesenc %xmm4,%xmm6 + movups 0(%rcx),%xmm4 + aesenc %xmm5,%xmm6 +.byte 15,56,203,209 + pshufd $14,%xmm0,%xmm0 + movups 16(%rcx),%xmm5 + aesenc %xmm4,%xmm6 +.byte 15,56,203,202 + + movups 32(%rcx),%xmm4 + aesenc %xmm5,%xmm6 + movups 48(%rcx),%xmm5 + aesenc %xmm4,%xmm6 + cmpl $11,%r11d + jb L$aesenclast4 + movups 64(%rcx),%xmm4 + aesenc %xmm5,%xmm6 + movups 80(%rcx),%xmm5 + aesenc %xmm4,%xmm6 + je L$aesenclast4 + movups 96(%rcx),%xmm4 + aesenc %xmm5,%xmm6 + movups 112(%rcx),%xmm5 + aesenc %xmm4,%xmm6 +L$aesenclast4: + aesenclast %xmm5,%xmm6 + movups 16-112(%rcx),%xmm4 + nop + + paddd %xmm9,%xmm2 + paddd %xmm8,%xmm1 + + decq %rdx + movups %xmm6,48(%rsi,%rdi,1) + leaq 64(%rdi),%rdi + jnz L$oop_shaext + + pshufd $177,%xmm2,%xmm2 + pshufd $27,%xmm1,%xmm3 + pshufd $177,%xmm1,%xmm1 + punpckhqdq %xmm2,%xmm1 +.byte 102,15,58,15,211,8 + + movups %xmm6,(%r8) + movdqu %xmm1,(%r9) + movdqu %xmm2,16(%r9) + .byte 0xf3,0xc3 diff --git a/deps/openssl/asm/x64-macosx-gas/aes/aesni-x86_64.s b/deps/openssl/asm/x64-macosx-gas/aes/aesni-x86_64.s index 2ea2d3460ad310..57509ae7196c08 100644 --- a/deps/openssl/asm/x64-macosx-gas/aes/aesni-x86_64.s +++ b/deps/openssl/asm/x64-macosx-gas/aes/aesni-x86_64.s @@ -16,7 +16,6 @@ L$oop_enc1_1: movups (%rdx),%xmm1 leaq 16(%rdx),%rdx jnz L$oop_enc1_1 - .byte 102,15,56,221,209 movups %xmm2,(%rsi) .byte 0xf3,0xc3 @@ -38,34 +37,92 @@ L$oop_dec1_2: movups (%rdx),%xmm1 leaq 16(%rdx),%rdx jnz L$oop_dec1_2 - .byte 102,15,56,223,209 movups %xmm2,(%rsi) .byte 0xf3,0xc3 +.p2align 4 +_aesni_encrypt2: + movups (%rcx),%xmm0 + shll $4,%eax + movups 16(%rcx),%xmm1 + xorps %xmm0,%xmm2 + xorps %xmm0,%xmm3 + movups 32(%rcx),%xmm0 + leaq 32(%rcx,%rax,1),%rcx + negq %rax + addq $16,%rax + +L$enc_loop2: +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 + movups (%rcx,%rax,1),%xmm1 + addq $32,%rax +.byte 102,15,56,220,208 +.byte 102,15,56,220,216 + movups -16(%rcx,%rax,1),%xmm0 + jnz L$enc_loop2 + +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 +.byte 102,15,56,221,208 +.byte 102,15,56,221,216 + .byte 0xf3,0xc3 + + +.p2align 4 +_aesni_decrypt2: + movups (%rcx),%xmm0 + shll $4,%eax + movups 16(%rcx),%xmm1 + xorps %xmm0,%xmm2 + xorps %xmm0,%xmm3 + movups 32(%rcx),%xmm0 + leaq 32(%rcx,%rax,1),%rcx + negq %rax + addq $16,%rax + +L$dec_loop2: +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 + movups (%rcx,%rax,1),%xmm1 + addq $32,%rax +.byte 102,15,56,222,208 +.byte 102,15,56,222,216 + movups -16(%rcx,%rax,1),%xmm0 + jnz L$dec_loop2 + +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 +.byte 102,15,56,223,208 +.byte 102,15,56,223,216 + .byte 0xf3,0xc3 + + .p2align 4 _aesni_encrypt3: movups (%rcx),%xmm0 - shrl $1,%eax + shll $4,%eax movups 16(%rcx),%xmm1 - leaq 32(%rcx),%rcx xorps %xmm0,%xmm2 xorps %xmm0,%xmm3 xorps %xmm0,%xmm4 - movups (%rcx),%xmm0 + movups 32(%rcx),%xmm0 + leaq 32(%rcx,%rax,1),%rcx + negq %rax + addq $16,%rax L$enc_loop3: .byte 102,15,56,220,209 .byte 102,15,56,220,217 - decl %eax .byte 102,15,56,220,225 - movups 16(%rcx),%xmm1 + movups (%rcx,%rax,1),%xmm1 + addq $32,%rax .byte 102,15,56,220,208 .byte 102,15,56,220,216 - leaq 32(%rcx),%rcx .byte 102,15,56,220,224 - movups (%rcx),%xmm0 + movups -16(%rcx,%rax,1),%xmm0 jnz L$enc_loop3 .byte 102,15,56,220,209 @@ -80,25 +137,26 @@ L$enc_loop3: .p2align 4 _aesni_decrypt3: movups (%rcx),%xmm0 - shrl $1,%eax + shll $4,%eax movups 16(%rcx),%xmm1 - leaq 32(%rcx),%rcx xorps %xmm0,%xmm2 xorps %xmm0,%xmm3 xorps %xmm0,%xmm4 - movups (%rcx),%xmm0 + movups 32(%rcx),%xmm0 + leaq 32(%rcx,%rax,1),%rcx + negq %rax + addq $16,%rax L$dec_loop3: .byte 102,15,56,222,209 .byte 102,15,56,222,217 - decl %eax .byte 102,15,56,222,225 - movups 16(%rcx),%xmm1 + movups (%rcx,%rax,1),%xmm1 + addq $32,%rax .byte 102,15,56,222,208 .byte 102,15,56,222,216 - leaq 32(%rcx),%rcx .byte 102,15,56,222,224 - movups (%rcx),%xmm0 + movups -16(%rcx,%rax,1),%xmm0 jnz L$dec_loop3 .byte 102,15,56,222,209 @@ -113,28 +171,30 @@ L$dec_loop3: .p2align 4 _aesni_encrypt4: movups (%rcx),%xmm0 - shrl $1,%eax + shll $4,%eax movups 16(%rcx),%xmm1 - leaq 32(%rcx),%rcx xorps %xmm0,%xmm2 xorps %xmm0,%xmm3 xorps %xmm0,%xmm4 xorps %xmm0,%xmm5 - movups (%rcx),%xmm0 + movups 32(%rcx),%xmm0 + leaq 32(%rcx,%rax,1),%rcx + negq %rax +.byte 0x0f,0x1f,0x00 + addq $16,%rax L$enc_loop4: .byte 102,15,56,220,209 .byte 102,15,56,220,217 - decl %eax .byte 102,15,56,220,225 .byte 102,15,56,220,233 - movups 16(%rcx),%xmm1 + movups (%rcx,%rax,1),%xmm1 + addq $32,%rax .byte 102,15,56,220,208 .byte 102,15,56,220,216 - leaq 32(%rcx),%rcx .byte 102,15,56,220,224 .byte 102,15,56,220,232 - movups (%rcx),%xmm0 + movups -16(%rcx,%rax,1),%xmm0 jnz L$enc_loop4 .byte 102,15,56,220,209 @@ -151,28 +211,30 @@ L$enc_loop4: .p2align 4 _aesni_decrypt4: movups (%rcx),%xmm0 - shrl $1,%eax + shll $4,%eax movups 16(%rcx),%xmm1 - leaq 32(%rcx),%rcx xorps %xmm0,%xmm2 xorps %xmm0,%xmm3 xorps %xmm0,%xmm4 xorps %xmm0,%xmm5 - movups (%rcx),%xmm0 + movups 32(%rcx),%xmm0 + leaq 32(%rcx,%rax,1),%rcx + negq %rax +.byte 0x0f,0x1f,0x00 + addq $16,%rax L$dec_loop4: .byte 102,15,56,222,209 .byte 102,15,56,222,217 - decl %eax .byte 102,15,56,222,225 .byte 102,15,56,222,233 - movups 16(%rcx),%xmm1 + movups (%rcx,%rax,1),%xmm1 + addq $32,%rax .byte 102,15,56,222,208 .byte 102,15,56,222,216 - leaq 32(%rcx),%rcx .byte 102,15,56,222,224 .byte 102,15,56,222,232 - movups (%rcx),%xmm0 + movups -16(%rcx,%rax,1),%xmm0 jnz L$dec_loop4 .byte 102,15,56,222,209 @@ -189,43 +251,43 @@ L$dec_loop4: .p2align 4 _aesni_encrypt6: movups (%rcx),%xmm0 - shrl $1,%eax + shll $4,%eax movups 16(%rcx),%xmm1 - leaq 32(%rcx),%rcx xorps %xmm0,%xmm2 pxor %xmm0,%xmm3 -.byte 102,15,56,220,209 pxor %xmm0,%xmm4 +.byte 102,15,56,220,209 + leaq 32(%rcx,%rax,1),%rcx + negq %rax .byte 102,15,56,220,217 pxor %xmm0,%xmm5 -.byte 102,15,56,220,225 pxor %xmm0,%xmm6 -.byte 102,15,56,220,233 +.byte 102,15,56,220,225 pxor %xmm0,%xmm7 - decl %eax + addq $16,%rax +.byte 102,15,56,220,233 .byte 102,15,56,220,241 - movups (%rcx),%xmm0 .byte 102,15,56,220,249 + movups -16(%rcx,%rax,1),%xmm0 jmp L$enc_loop6_enter .p2align 4 L$enc_loop6: .byte 102,15,56,220,209 .byte 102,15,56,220,217 - decl %eax .byte 102,15,56,220,225 .byte 102,15,56,220,233 .byte 102,15,56,220,241 .byte 102,15,56,220,249 L$enc_loop6_enter: - movups 16(%rcx),%xmm1 + movups (%rcx,%rax,1),%xmm1 + addq $32,%rax .byte 102,15,56,220,208 .byte 102,15,56,220,216 - leaq 32(%rcx),%rcx .byte 102,15,56,220,224 .byte 102,15,56,220,232 .byte 102,15,56,220,240 .byte 102,15,56,220,248 - movups (%rcx),%xmm0 + movups -16(%rcx,%rax,1),%xmm0 jnz L$enc_loop6 .byte 102,15,56,220,209 @@ -246,43 +308,43 @@ L$enc_loop6_enter: .p2align 4 _aesni_decrypt6: movups (%rcx),%xmm0 - shrl $1,%eax + shll $4,%eax movups 16(%rcx),%xmm1 - leaq 32(%rcx),%rcx xorps %xmm0,%xmm2 pxor %xmm0,%xmm3 -.byte 102,15,56,222,209 pxor %xmm0,%xmm4 +.byte 102,15,56,222,209 + leaq 32(%rcx,%rax,1),%rcx + negq %rax .byte 102,15,56,222,217 pxor %xmm0,%xmm5 -.byte 102,15,56,222,225 pxor %xmm0,%xmm6 -.byte 102,15,56,222,233 +.byte 102,15,56,222,225 pxor %xmm0,%xmm7 - decl %eax + addq $16,%rax +.byte 102,15,56,222,233 .byte 102,15,56,222,241 - movups (%rcx),%xmm0 .byte 102,15,56,222,249 + movups -16(%rcx,%rax,1),%xmm0 jmp L$dec_loop6_enter .p2align 4 L$dec_loop6: .byte 102,15,56,222,209 .byte 102,15,56,222,217 - decl %eax .byte 102,15,56,222,225 .byte 102,15,56,222,233 .byte 102,15,56,222,241 .byte 102,15,56,222,249 L$dec_loop6_enter: - movups 16(%rcx),%xmm1 + movups (%rcx,%rax,1),%xmm1 + addq $32,%rax .byte 102,15,56,222,208 .byte 102,15,56,222,216 - leaq 32(%rcx),%rcx .byte 102,15,56,222,224 .byte 102,15,56,222,232 .byte 102,15,56,222,240 .byte 102,15,56,222,248 - movups (%rcx),%xmm0 + movups -16(%rcx,%rax,1),%xmm0 jnz L$dec_loop6 .byte 102,15,56,222,209 @@ -303,52 +365,51 @@ L$dec_loop6_enter: .p2align 4 _aesni_encrypt8: movups (%rcx),%xmm0 - shrl $1,%eax + shll $4,%eax movups 16(%rcx),%xmm1 - leaq 32(%rcx),%rcx xorps %xmm0,%xmm2 xorps %xmm0,%xmm3 -.byte 102,15,56,220,209 pxor %xmm0,%xmm4 -.byte 102,15,56,220,217 pxor %xmm0,%xmm5 -.byte 102,15,56,220,225 pxor %xmm0,%xmm6 -.byte 102,15,56,220,233 + leaq 32(%rcx,%rax,1),%rcx + negq %rax +.byte 102,15,56,220,209 + addq $16,%rax pxor %xmm0,%xmm7 - decl %eax -.byte 102,15,56,220,241 +.byte 102,15,56,220,217 pxor %xmm0,%xmm8 -.byte 102,15,56,220,249 pxor %xmm0,%xmm9 - movups (%rcx),%xmm0 +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 +.byte 102,15,56,220,241 +.byte 102,15,56,220,249 .byte 102,68,15,56,220,193 .byte 102,68,15,56,220,201 - movups 16(%rcx),%xmm1 + movups -16(%rcx,%rax,1),%xmm0 jmp L$enc_loop8_enter .p2align 4 L$enc_loop8: .byte 102,15,56,220,209 .byte 102,15,56,220,217 - decl %eax .byte 102,15,56,220,225 .byte 102,15,56,220,233 .byte 102,15,56,220,241 .byte 102,15,56,220,249 .byte 102,68,15,56,220,193 .byte 102,68,15,56,220,201 - movups 16(%rcx),%xmm1 L$enc_loop8_enter: + movups (%rcx,%rax,1),%xmm1 + addq $32,%rax .byte 102,15,56,220,208 .byte 102,15,56,220,216 - leaq 32(%rcx),%rcx .byte 102,15,56,220,224 .byte 102,15,56,220,232 .byte 102,15,56,220,240 .byte 102,15,56,220,248 .byte 102,68,15,56,220,192 .byte 102,68,15,56,220,200 - movups (%rcx),%xmm0 + movups -16(%rcx,%rax,1),%xmm0 jnz L$enc_loop8 .byte 102,15,56,220,209 @@ -373,52 +434,51 @@ L$enc_loop8_enter: .p2align 4 _aesni_decrypt8: movups (%rcx),%xmm0 - shrl $1,%eax + shll $4,%eax movups 16(%rcx),%xmm1 - leaq 32(%rcx),%rcx xorps %xmm0,%xmm2 xorps %xmm0,%xmm3 -.byte 102,15,56,222,209 pxor %xmm0,%xmm4 -.byte 102,15,56,222,217 pxor %xmm0,%xmm5 -.byte 102,15,56,222,225 pxor %xmm0,%xmm6 -.byte 102,15,56,222,233 + leaq 32(%rcx,%rax,1),%rcx + negq %rax +.byte 102,15,56,222,209 + addq $16,%rax pxor %xmm0,%xmm7 - decl %eax -.byte 102,15,56,222,241 +.byte 102,15,56,222,217 pxor %xmm0,%xmm8 -.byte 102,15,56,222,249 pxor %xmm0,%xmm9 - movups (%rcx),%xmm0 +.byte 102,15,56,222,225 +.byte 102,15,56,222,233 +.byte 102,15,56,222,241 +.byte 102,15,56,222,249 .byte 102,68,15,56,222,193 .byte 102,68,15,56,222,201 - movups 16(%rcx),%xmm1 + movups -16(%rcx,%rax,1),%xmm0 jmp L$dec_loop8_enter .p2align 4 L$dec_loop8: .byte 102,15,56,222,209 .byte 102,15,56,222,217 - decl %eax .byte 102,15,56,222,225 .byte 102,15,56,222,233 .byte 102,15,56,222,241 .byte 102,15,56,222,249 .byte 102,68,15,56,222,193 .byte 102,68,15,56,222,201 - movups 16(%rcx),%xmm1 L$dec_loop8_enter: + movups (%rcx,%rax,1),%xmm1 + addq $32,%rax .byte 102,15,56,222,208 .byte 102,15,56,222,216 - leaq 32(%rcx),%rcx .byte 102,15,56,222,224 .byte 102,15,56,222,232 .byte 102,15,56,222,240 .byte 102,15,56,222,248 .byte 102,68,15,56,222,192 .byte 102,68,15,56,222,200 - movups (%rcx),%xmm0 + movups -16(%rcx,%rax,1),%xmm0 jnz L$dec_loop8 .byte 102,15,56,222,209 @@ -548,14 +608,12 @@ L$oop_enc1_3: movups (%rcx),%xmm1 leaq 16(%rcx),%rcx jnz L$oop_enc1_3 - .byte 102,15,56,221,209 movups %xmm2,(%rsi) jmp L$ecb_ret .p2align 4 L$ecb_enc_two: - xorps %xmm4,%xmm4 - call _aesni_encrypt3 + call _aesni_encrypt2 movups %xmm2,(%rsi) movups %xmm3,16(%rsi) jmp L$ecb_ret @@ -694,14 +752,12 @@ L$oop_dec1_4: movups (%rcx),%xmm1 leaq 16(%rcx),%rcx jnz L$oop_dec1_4 - .byte 102,15,56,223,209 movups %xmm2,(%rsi) jmp L$ecb_ret .p2align 4 L$ecb_dec_two: - xorps %xmm4,%xmm4 - call _aesni_decrypt3 + call _aesni_decrypt2 movups %xmm2,(%rsi) movups %xmm3,16(%rsi) jmp L$ecb_ret @@ -748,53 +804,53 @@ L$ecb_ret: .p2align 4 _aesni_ccm64_encrypt_blocks: movl 240(%rcx),%eax - movdqu (%r8),%xmm9 - movdqa L$increment64(%rip),%xmm6 + movdqu (%r8),%xmm6 + movdqa L$increment64(%rip),%xmm9 movdqa L$bswap_mask(%rip),%xmm7 - shrl $1,%eax + shll $4,%eax + movl $16,%r10d leaq 0(%rcx),%r11 movdqu (%r9),%xmm3 - movdqa %xmm9,%xmm2 - movl %eax,%r10d -.byte 102,68,15,56,0,207 + movdqa %xmm6,%xmm2 + leaq 32(%rcx,%rax,1),%rcx +.byte 102,15,56,0,247 + subq %rax,%r10 jmp L$ccm64_enc_outer .p2align 4 L$ccm64_enc_outer: movups (%r11),%xmm0 - movl %r10d,%eax + movq %r10,%rax movups (%rdi),%xmm8 xorps %xmm0,%xmm2 movups 16(%r11),%xmm1 xorps %xmm8,%xmm0 - leaq 32(%r11),%rcx xorps %xmm0,%xmm3 - movups (%rcx),%xmm0 + movups 32(%r11),%xmm0 L$ccm64_enc2_loop: .byte 102,15,56,220,209 - decl %eax .byte 102,15,56,220,217 - movups 16(%rcx),%xmm1 + movups (%rcx,%rax,1),%xmm1 + addq $32,%rax .byte 102,15,56,220,208 - leaq 32(%rcx),%rcx .byte 102,15,56,220,216 - movups 0(%rcx),%xmm0 + movups -16(%rcx,%rax,1),%xmm0 jnz L$ccm64_enc2_loop .byte 102,15,56,220,209 .byte 102,15,56,220,217 - paddq %xmm6,%xmm9 + paddq %xmm9,%xmm6 + decq %rdx .byte 102,15,56,221,208 .byte 102,15,56,221,216 - decq %rdx leaq 16(%rdi),%rdi xorps %xmm2,%xmm8 - movdqa %xmm9,%xmm2 + movdqa %xmm6,%xmm2 movups %xmm8,(%rsi) - leaq 16(%rsi),%rsi .byte 102,15,56,0,215 + leaq 16(%rsi),%rsi jnz L$ccm64_enc_outer movups %xmm3,(%r9) @@ -805,15 +861,15 @@ L$ccm64_enc2_loop: .p2align 4 _aesni_ccm64_decrypt_blocks: movl 240(%rcx),%eax - movups (%r8),%xmm9 + movups (%r8),%xmm6 movdqu (%r9),%xmm3 - movdqa L$increment64(%rip),%xmm6 + movdqa L$increment64(%rip),%xmm9 movdqa L$bswap_mask(%rip),%xmm7 - movaps %xmm9,%xmm2 + movaps %xmm6,%xmm2 movl %eax,%r10d movq %rcx,%r11 -.byte 102,68,15,56,0,207 +.byte 102,15,56,0,247 movups (%rcx),%xmm0 movups 16(%rcx),%xmm1 leaq 32(%rcx),%rcx @@ -824,17 +880,20 @@ L$oop_enc1_5: movups (%rcx),%xmm1 leaq 16(%rcx),%rcx jnz L$oop_enc1_5 - .byte 102,15,56,221,209 + shll $4,%r10d + movl $16,%eax movups (%rdi),%xmm8 - paddq %xmm6,%xmm9 + paddq %xmm9,%xmm6 leaq 16(%rdi),%rdi + subq %r10,%rax + leaq 32(%r11,%r10,1),%rcx + movq %rax,%r10 jmp L$ccm64_dec_outer .p2align 4 L$ccm64_dec_outer: xorps %xmm2,%xmm8 - movdqa %xmm9,%xmm2 - movl %r10d,%eax + movdqa %xmm6,%xmm2 movups %xmm8,(%rsi) leaq 16(%rsi),%rsi .byte 102,15,56,0,215 @@ -843,36 +902,36 @@ L$ccm64_dec_outer: jz L$ccm64_dec_break movups (%r11),%xmm0 - shrl $1,%eax + movq %r10,%rax movups 16(%r11),%xmm1 xorps %xmm0,%xmm8 - leaq 32(%r11),%rcx xorps %xmm0,%xmm2 xorps %xmm8,%xmm3 - movups (%rcx),%xmm0 - + movups 32(%r11),%xmm0 + jmp L$ccm64_dec2_loop +.p2align 4 L$ccm64_dec2_loop: .byte 102,15,56,220,209 - decl %eax .byte 102,15,56,220,217 - movups 16(%rcx),%xmm1 + movups (%rcx,%rax,1),%xmm1 + addq $32,%rax .byte 102,15,56,220,208 - leaq 32(%rcx),%rcx .byte 102,15,56,220,216 - movups 0(%rcx),%xmm0 + movups -16(%rcx,%rax,1),%xmm0 jnz L$ccm64_dec2_loop movups (%rdi),%xmm8 - paddq %xmm6,%xmm9 + paddq %xmm9,%xmm6 .byte 102,15,56,220,209 .byte 102,15,56,220,217 - leaq 16(%rdi),%rdi .byte 102,15,56,221,208 .byte 102,15,56,221,216 + leaq 16(%rdi),%rdi jmp L$ccm64_dec_outer .p2align 4 L$ccm64_dec_break: + movl 240(%r11),%eax movups (%r11),%xmm0 movups 16(%r11),%xmm1 xorps %xmm0,%xmm8 @@ -884,7 +943,6 @@ L$oop_enc1_6: movups (%r11),%xmm1 leaq 16(%r11),%r11 jnz L$oop_enc1_6 - .byte 102,15,56,221,217 movups %xmm3,(%r9) .byte 0xf3,0xc3 @@ -893,199 +951,518 @@ L$oop_enc1_6: .p2align 4 _aesni_ctr32_encrypt_blocks: + leaq (%rsp),%rax + pushq %rbp + subq $128,%rsp + andq $-16,%rsp + leaq -8(%rax),%rbp + cmpq $1,%rdx je L$ctr32_one_shortcut - movdqu (%r8),%xmm14 - movdqa L$bswap_mask(%rip),%xmm15 - xorl %eax,%eax -.byte 102,69,15,58,22,242,3 -.byte 102,68,15,58,34,240,3 - + movdqu (%r8),%xmm2 + movdqu (%rcx),%xmm0 + movl 12(%r8),%r8d + pxor %xmm0,%xmm2 + movl 12(%rcx),%r11d + movdqa %xmm2,0(%rsp) + bswapl %r8d + movdqa %xmm2,%xmm3 + movdqa %xmm2,%xmm4 + movdqa %xmm2,%xmm5 + movdqa %xmm2,64(%rsp) + movdqa %xmm2,80(%rsp) + movdqa %xmm2,96(%rsp) + movq %rdx,%r10 + movdqa %xmm2,112(%rsp) + + leaq 1(%r8),%rax + leaq 2(%r8),%rdx + bswapl %eax + bswapl %edx + xorl %r11d,%eax + xorl %r11d,%edx +.byte 102,15,58,34,216,3 + leaq 3(%r8),%rax + movdqa %xmm3,16(%rsp) +.byte 102,15,58,34,226,3 + bswapl %eax + movq %r10,%rdx + leaq 4(%r8),%r10 + movdqa %xmm4,32(%rsp) + xorl %r11d,%eax + bswapl %r10d +.byte 102,15,58,34,232,3 + xorl %r11d,%r10d + movdqa %xmm5,48(%rsp) + leaq 5(%r8),%r9 + movl %r10d,64+12(%rsp) + bswapl %r9d + leaq 6(%r8),%r10 movl 240(%rcx),%eax + xorl %r11d,%r9d bswapl %r10d - pxor %xmm12,%xmm12 - pxor %xmm13,%xmm13 -.byte 102,69,15,58,34,226,0 - leaq 3(%r10),%r11 -.byte 102,69,15,58,34,235,0 - incl %r10d -.byte 102,69,15,58,34,226,1 - incq %r11 -.byte 102,69,15,58,34,235,1 - incl %r10d -.byte 102,69,15,58,34,226,2 - incq %r11 -.byte 102,69,15,58,34,235,2 - movdqa %xmm12,-40(%rsp) -.byte 102,69,15,56,0,231 - movdqa %xmm13,-24(%rsp) -.byte 102,69,15,56,0,239 - - pshufd $192,%xmm12,%xmm2 - pshufd $128,%xmm12,%xmm3 - pshufd $64,%xmm12,%xmm4 - cmpq $6,%rdx + movl %r9d,80+12(%rsp) + xorl %r11d,%r10d + leaq 7(%r8),%r9 + movl %r10d,96+12(%rsp) + bswapl %r9d + movl _OPENSSL_ia32cap_P+4(%rip),%r10d + xorl %r11d,%r9d + andl $71303168,%r10d + movl %r9d,112+12(%rsp) + + movups 16(%rcx),%xmm1 + + movdqa 64(%rsp),%xmm6 + movdqa 80(%rsp),%xmm7 + + cmpq $8,%rdx jb L$ctr32_tail - shrl $1,%eax - movq %rcx,%r11 - movl %eax,%r10d + subq $6,%rdx + cmpl $4194304,%r10d + je L$ctr32_6x + + leaq 128(%rcx),%rcx + subq $2,%rdx + jmp L$ctr32_loop8 + +.p2align 4 +L$ctr32_6x: + shll $4,%eax + movl $48,%r10d + bswapl %r11d + leaq 32(%rcx,%rax,1),%rcx + subq %rax,%r10 jmp L$ctr32_loop6 .p2align 4 L$ctr32_loop6: - pshufd $192,%xmm13,%xmm5 - por %xmm14,%xmm2 - movups (%r11),%xmm0 - pshufd $128,%xmm13,%xmm6 - por %xmm14,%xmm3 - movups 16(%r11),%xmm1 - pshufd $64,%xmm13,%xmm7 - por %xmm14,%xmm4 - por %xmm14,%xmm5 - xorps %xmm0,%xmm2 - por %xmm14,%xmm6 - por %xmm14,%xmm7 + addl $6,%r8d + movups -48(%rcx,%r10,1),%xmm0 +.byte 102,15,56,220,209 + movl %r8d,%eax + xorl %r11d,%eax +.byte 102,15,56,220,217 +.byte 0x0f,0x38,0xf1,0x44,0x24,12 + leal 1(%r8),%eax +.byte 102,15,56,220,225 + xorl %r11d,%eax +.byte 0x0f,0x38,0xf1,0x44,0x24,28 +.byte 102,15,56,220,233 + leal 2(%r8),%eax + xorl %r11d,%eax +.byte 102,15,56,220,241 +.byte 0x0f,0x38,0xf1,0x44,0x24,44 + leal 3(%r8),%eax +.byte 102,15,56,220,249 + movups -32(%rcx,%r10,1),%xmm1 + xorl %r11d,%eax + +.byte 102,15,56,220,208 +.byte 0x0f,0x38,0xf1,0x44,0x24,60 + leal 4(%r8),%eax +.byte 102,15,56,220,216 + xorl %r11d,%eax +.byte 0x0f,0x38,0xf1,0x44,0x24,76 +.byte 102,15,56,220,224 + leal 5(%r8),%eax + xorl %r11d,%eax +.byte 102,15,56,220,232 +.byte 0x0f,0x38,0xf1,0x44,0x24,92 + movq %r10,%rax +.byte 102,15,56,220,240 +.byte 102,15,56,220,248 + movups -16(%rcx,%r10,1),%xmm0 + + call L$enc_loop6 + + movdqu (%rdi),%xmm8 + movdqu 16(%rdi),%xmm9 + movdqu 32(%rdi),%xmm10 + movdqu 48(%rdi),%xmm11 + movdqu 64(%rdi),%xmm12 + movdqu 80(%rdi),%xmm13 + leaq 96(%rdi),%rdi + movups -64(%rcx,%r10,1),%xmm1 + pxor %xmm2,%xmm8 + movaps 0(%rsp),%xmm2 + pxor %xmm3,%xmm9 + movaps 16(%rsp),%xmm3 + pxor %xmm4,%xmm10 + movaps 32(%rsp),%xmm4 + pxor %xmm5,%xmm11 + movaps 48(%rsp),%xmm5 + pxor %xmm6,%xmm12 + movaps 64(%rsp),%xmm6 + pxor %xmm7,%xmm13 + movaps 80(%rsp),%xmm7 + movdqu %xmm8,(%rsi) + movdqu %xmm9,16(%rsi) + movdqu %xmm10,32(%rsi) + movdqu %xmm11,48(%rsi) + movdqu %xmm12,64(%rsi) + movdqu %xmm13,80(%rsi) + leaq 96(%rsi),%rsi + subq $6,%rdx + jnc L$ctr32_loop6 + addq $6,%rdx + jz L$ctr32_done + leal -48(%r10),%eax + leaq -80(%rcx,%r10,1),%rcx + negl %eax + shrl $4,%eax + jmp L$ctr32_tail - pxor %xmm0,%xmm3 +.p2align 5 +L$ctr32_loop8: + addl $8,%r8d + movdqa 96(%rsp),%xmm8 .byte 102,15,56,220,209 - leaq 32(%r11),%rcx - pxor %xmm0,%xmm4 + movl %r8d,%r9d + movdqa 112(%rsp),%xmm9 .byte 102,15,56,220,217 - movdqa L$increment32(%rip),%xmm13 - pxor %xmm0,%xmm5 + bswapl %r9d + movups 32-128(%rcx),%xmm0 .byte 102,15,56,220,225 - movdqa -40(%rsp),%xmm12 - pxor %xmm0,%xmm6 + xorl %r11d,%r9d + nop .byte 102,15,56,220,233 - pxor %xmm0,%xmm7 - movups (%rcx),%xmm0 - decl %eax + movl %r9d,0+12(%rsp) + leaq 1(%r8),%r9 .byte 102,15,56,220,241 .byte 102,15,56,220,249 - jmp L$ctr32_enc_loop6_enter -.p2align 4 -L$ctr32_enc_loop6: +.byte 102,68,15,56,220,193 +.byte 102,68,15,56,220,201 + movups 48-128(%rcx),%xmm1 + bswapl %r9d +.byte 102,15,56,220,208 +.byte 102,15,56,220,216 + xorl %r11d,%r9d +.byte 0x66,0x90 +.byte 102,15,56,220,224 +.byte 102,15,56,220,232 + movl %r9d,16+12(%rsp) + leaq 2(%r8),%r9 +.byte 102,15,56,220,240 +.byte 102,15,56,220,248 +.byte 102,68,15,56,220,192 +.byte 102,68,15,56,220,200 + movups 64-128(%rcx),%xmm0 + bswapl %r9d .byte 102,15,56,220,209 .byte 102,15,56,220,217 - decl %eax + xorl %r11d,%r9d +.byte 0x66,0x90 .byte 102,15,56,220,225 .byte 102,15,56,220,233 + movl %r9d,32+12(%rsp) + leaq 3(%r8),%r9 .byte 102,15,56,220,241 .byte 102,15,56,220,249 -L$ctr32_enc_loop6_enter: - movups 16(%rcx),%xmm1 +.byte 102,68,15,56,220,193 +.byte 102,68,15,56,220,201 + movups 80-128(%rcx),%xmm1 + bswapl %r9d .byte 102,15,56,220,208 .byte 102,15,56,220,216 - leaq 32(%rcx),%rcx + xorl %r11d,%r9d +.byte 0x66,0x90 .byte 102,15,56,220,224 .byte 102,15,56,220,232 + movl %r9d,48+12(%rsp) + leaq 4(%r8),%r9 .byte 102,15,56,220,240 .byte 102,15,56,220,248 - movups (%rcx),%xmm0 - jnz L$ctr32_enc_loop6 - +.byte 102,68,15,56,220,192 +.byte 102,68,15,56,220,200 + movups 96-128(%rcx),%xmm0 + bswapl %r9d .byte 102,15,56,220,209 - paddd %xmm13,%xmm12 .byte 102,15,56,220,217 - paddd -24(%rsp),%xmm13 + xorl %r11d,%r9d +.byte 0x66,0x90 .byte 102,15,56,220,225 - movdqa %xmm12,-40(%rsp) .byte 102,15,56,220,233 - movdqa %xmm13,-24(%rsp) + movl %r9d,64+12(%rsp) + leaq 5(%r8),%r9 .byte 102,15,56,220,241 -.byte 102,69,15,56,0,231 .byte 102,15,56,220,249 -.byte 102,69,15,56,0,239 - -.byte 102,15,56,221,208 - movups (%rdi),%xmm8 -.byte 102,15,56,221,216 - movups 16(%rdi),%xmm9 -.byte 102,15,56,221,224 - movups 32(%rdi),%xmm10 -.byte 102,15,56,221,232 - movups 48(%rdi),%xmm11 -.byte 102,15,56,221,240 - movups 64(%rdi),%xmm1 -.byte 102,15,56,221,248 - movups 80(%rdi),%xmm0 - leaq 96(%rdi),%rdi - - xorps %xmm2,%xmm8 - pshufd $192,%xmm12,%xmm2 - xorps %xmm3,%xmm9 - pshufd $128,%xmm12,%xmm3 - movups %xmm8,(%rsi) - xorps %xmm4,%xmm10 - pshufd $64,%xmm12,%xmm4 - movups %xmm9,16(%rsi) - xorps %xmm5,%xmm11 - movups %xmm10,32(%rsi) - xorps %xmm6,%xmm1 - movups %xmm11,48(%rsi) - xorps %xmm7,%xmm0 - movups %xmm1,64(%rsi) - movups %xmm0,80(%rsi) - leaq 96(%rsi),%rsi - movl %r10d,%eax - subq $6,%rdx - jnc L$ctr32_loop6 - - addq $6,%rdx - jz L$ctr32_done - movq %r11,%rcx - leal 1(%rax,%rax,1),%eax - -L$ctr32_tail: - por %xmm14,%xmm2 - movups (%rdi),%xmm8 - cmpq $2,%rdx - jb L$ctr32_one - - por %xmm14,%xmm3 - movups 16(%rdi),%xmm9 - je L$ctr32_two +.byte 102,68,15,56,220,193 +.byte 102,68,15,56,220,201 + movups 112-128(%rcx),%xmm1 + bswapl %r9d +.byte 102,15,56,220,208 +.byte 102,15,56,220,216 + xorl %r11d,%r9d +.byte 0x66,0x90 +.byte 102,15,56,220,224 +.byte 102,15,56,220,232 + movl %r9d,80+12(%rsp) + leaq 6(%r8),%r9 +.byte 102,15,56,220,240 +.byte 102,15,56,220,248 +.byte 102,68,15,56,220,192 +.byte 102,68,15,56,220,200 + movups 128-128(%rcx),%xmm0 + bswapl %r9d +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 + xorl %r11d,%r9d +.byte 0x66,0x90 +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 + movl %r9d,96+12(%rsp) + leaq 7(%r8),%r9 +.byte 102,15,56,220,241 +.byte 102,15,56,220,249 +.byte 102,68,15,56,220,193 +.byte 102,68,15,56,220,201 + movups 144-128(%rcx),%xmm1 + bswapl %r9d +.byte 102,15,56,220,208 +.byte 102,15,56,220,216 +.byte 102,15,56,220,224 + xorl %r11d,%r9d + movdqu 0(%rdi),%xmm10 +.byte 102,15,56,220,232 + movl %r9d,112+12(%rsp) + cmpl $11,%eax +.byte 102,15,56,220,240 +.byte 102,15,56,220,248 +.byte 102,68,15,56,220,192 +.byte 102,68,15,56,220,200 + movups 160-128(%rcx),%xmm0 + + jb L$ctr32_enc_done + +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 +.byte 102,15,56,220,241 +.byte 102,15,56,220,249 +.byte 102,68,15,56,220,193 +.byte 102,68,15,56,220,201 + movups 176-128(%rcx),%xmm1 + +.byte 102,15,56,220,208 +.byte 102,15,56,220,216 +.byte 102,15,56,220,224 +.byte 102,15,56,220,232 +.byte 102,15,56,220,240 +.byte 102,15,56,220,248 +.byte 102,68,15,56,220,192 +.byte 102,68,15,56,220,200 + movups 192-128(%rcx),%xmm0 + je L$ctr32_enc_done - pshufd $192,%xmm13,%xmm5 - por %xmm14,%xmm4 - movups 32(%rdi),%xmm10 +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 +.byte 102,15,56,220,241 +.byte 102,15,56,220,249 +.byte 102,68,15,56,220,193 +.byte 102,68,15,56,220,201 + movups 208-128(%rcx),%xmm1 + +.byte 102,15,56,220,208 +.byte 102,15,56,220,216 +.byte 102,15,56,220,224 +.byte 102,15,56,220,232 +.byte 102,15,56,220,240 +.byte 102,15,56,220,248 +.byte 102,68,15,56,220,192 +.byte 102,68,15,56,220,200 + movups 224-128(%rcx),%xmm0 + jmp L$ctr32_enc_done + +.p2align 4 +L$ctr32_enc_done: + movdqu 16(%rdi),%xmm11 + pxor %xmm0,%xmm10 + movdqu 32(%rdi),%xmm12 + pxor %xmm0,%xmm11 + movdqu 48(%rdi),%xmm13 + pxor %xmm0,%xmm12 + movdqu 64(%rdi),%xmm14 + pxor %xmm0,%xmm13 + movdqu 80(%rdi),%xmm15 + pxor %xmm0,%xmm14 + pxor %xmm0,%xmm15 +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 +.byte 102,15,56,220,241 +.byte 102,15,56,220,249 +.byte 102,68,15,56,220,193 +.byte 102,68,15,56,220,201 + movdqu 96(%rdi),%xmm1 + leaq 128(%rdi),%rdi + +.byte 102,65,15,56,221,210 + pxor %xmm0,%xmm1 + movdqu 112-128(%rdi),%xmm10 +.byte 102,65,15,56,221,219 + pxor %xmm0,%xmm10 + movdqa 0(%rsp),%xmm11 +.byte 102,65,15,56,221,228 +.byte 102,65,15,56,221,237 + movdqa 16(%rsp),%xmm12 + movdqa 32(%rsp),%xmm13 +.byte 102,65,15,56,221,246 +.byte 102,65,15,56,221,255 + movdqa 48(%rsp),%xmm14 + movdqa 64(%rsp),%xmm15 +.byte 102,68,15,56,221,193 + movdqa 80(%rsp),%xmm0 + movups 16-128(%rcx),%xmm1 +.byte 102,69,15,56,221,202 + + movups %xmm2,(%rsi) + movdqa %xmm11,%xmm2 + movups %xmm3,16(%rsi) + movdqa %xmm12,%xmm3 + movups %xmm4,32(%rsi) + movdqa %xmm13,%xmm4 + movups %xmm5,48(%rsi) + movdqa %xmm14,%xmm5 + movups %xmm6,64(%rsi) + movdqa %xmm15,%xmm6 + movups %xmm7,80(%rsi) + movdqa %xmm0,%xmm7 + movups %xmm8,96(%rsi) + movups %xmm9,112(%rsi) + leaq 128(%rsi),%rsi + + subq $8,%rdx + jnc L$ctr32_loop8 + + addq $8,%rdx + jz L$ctr32_done + leaq -128(%rcx),%rcx + +L$ctr32_tail: + leaq 16(%rcx),%rcx cmpq $4,%rdx - jb L$ctr32_three + jb L$ctr32_loop3 + je L$ctr32_loop4 - pshufd $128,%xmm13,%xmm6 - por %xmm14,%xmm5 - movups 48(%rdi),%xmm11 - je L$ctr32_four + shll $4,%eax + movdqa 96(%rsp),%xmm8 + pxor %xmm9,%xmm9 - por %xmm14,%xmm6 - xorps %xmm7,%xmm7 + movups 16(%rcx),%xmm0 +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 + leaq 32-16(%rcx,%rax,1),%rcx + negq %rax +.byte 102,15,56,220,225 + addq $16,%rax + movups (%rdi),%xmm10 +.byte 102,15,56,220,233 +.byte 102,15,56,220,241 + movups 16(%rdi),%xmm11 + movups 32(%rdi),%xmm12 +.byte 102,15,56,220,249 +.byte 102,68,15,56,220,193 - call _aesni_encrypt6 + call L$enc_loop8_enter - movups 64(%rdi),%xmm1 - xorps %xmm2,%xmm8 - xorps %xmm3,%xmm9 - movups %xmm8,(%rsi) - xorps %xmm4,%xmm10 - movups %xmm9,16(%rsi) - xorps %xmm5,%xmm11 - movups %xmm10,32(%rsi) - xorps %xmm6,%xmm1 - movups %xmm11,48(%rsi) - movups %xmm1,64(%rsi) + movdqu 48(%rdi),%xmm13 + pxor %xmm10,%xmm2 + movdqu 64(%rdi),%xmm10 + pxor %xmm11,%xmm3 + movdqu %xmm2,(%rsi) + pxor %xmm12,%xmm4 + movdqu %xmm3,16(%rsi) + pxor %xmm13,%xmm5 + movdqu %xmm4,32(%rsi) + pxor %xmm10,%xmm6 + movdqu %xmm5,48(%rsi) + movdqu %xmm6,64(%rsi) + cmpq $6,%rdx + jb L$ctr32_done + + movups 80(%rdi),%xmm11 + xorps %xmm11,%xmm7 + movups %xmm7,80(%rsi) + je L$ctr32_done + + movups 96(%rdi),%xmm12 + xorps %xmm12,%xmm8 + movups %xmm8,96(%rsi) + jmp L$ctr32_done + +.p2align 5 +L$ctr32_loop4: +.byte 102,15,56,220,209 + leaq 16(%rcx),%rcx + decl %eax +.byte 102,15,56,220,217 +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 + movups (%rcx),%xmm1 + jnz L$ctr32_loop4 +.byte 102,15,56,221,209 +.byte 102,15,56,221,217 + movups (%rdi),%xmm10 + movups 16(%rdi),%xmm11 +.byte 102,15,56,221,225 +.byte 102,15,56,221,233 + movups 32(%rdi),%xmm12 + movups 48(%rdi),%xmm13 + + xorps %xmm10,%xmm2 + movups %xmm2,(%rsi) + xorps %xmm11,%xmm3 + movups %xmm3,16(%rsi) + pxor %xmm12,%xmm4 + movdqu %xmm4,32(%rsi) + pxor %xmm13,%xmm5 + movdqu %xmm5,48(%rsi) + jmp L$ctr32_done + +.p2align 5 +L$ctr32_loop3: +.byte 102,15,56,220,209 + leaq 16(%rcx),%rcx + decl %eax +.byte 102,15,56,220,217 +.byte 102,15,56,220,225 + movups (%rcx),%xmm1 + jnz L$ctr32_loop3 +.byte 102,15,56,221,209 +.byte 102,15,56,221,217 +.byte 102,15,56,221,225 + + movups (%rdi),%xmm10 + xorps %xmm10,%xmm2 + movups %xmm2,(%rsi) + cmpq $2,%rdx + jb L$ctr32_done + + movups 16(%rdi),%xmm11 + xorps %xmm11,%xmm3 + movups %xmm3,16(%rsi) + je L$ctr32_done + + movups 32(%rdi),%xmm12 + xorps %xmm12,%xmm4 + movups %xmm4,32(%rsi) jmp L$ctr32_done .p2align 4 L$ctr32_one_shortcut: movups (%r8),%xmm2 - movups (%rdi),%xmm8 + movups (%rdi),%xmm10 movl 240(%rcx),%eax -L$ctr32_one: movups (%rcx),%xmm0 movups 16(%rcx),%xmm1 leaq 32(%rcx),%rcx @@ -1096,290 +1473,302 @@ L$oop_enc1_7: movups (%rcx),%xmm1 leaq 16(%rcx),%rcx jnz L$oop_enc1_7 - .byte 102,15,56,221,209 - xorps %xmm2,%xmm8 - movups %xmm8,(%rsi) - jmp L$ctr32_done - -.p2align 4 -L$ctr32_two: - xorps %xmm4,%xmm4 - call _aesni_encrypt3 - xorps %xmm2,%xmm8 - xorps %xmm3,%xmm9 - movups %xmm8,(%rsi) - movups %xmm9,16(%rsi) - jmp L$ctr32_done - -.p2align 4 -L$ctr32_three: - call _aesni_encrypt3 - xorps %xmm2,%xmm8 - xorps %xmm3,%xmm9 - movups %xmm8,(%rsi) - xorps %xmm4,%xmm10 - movups %xmm9,16(%rsi) - movups %xmm10,32(%rsi) + xorps %xmm10,%xmm2 + movups %xmm2,(%rsi) jmp L$ctr32_done .p2align 4 -L$ctr32_four: - call _aesni_encrypt4 - xorps %xmm2,%xmm8 - xorps %xmm3,%xmm9 - movups %xmm8,(%rsi) - xorps %xmm4,%xmm10 - movups %xmm9,16(%rsi) - xorps %xmm5,%xmm11 - movups %xmm10,32(%rsi) - movups %xmm11,48(%rsi) - L$ctr32_done: + leaq (%rbp),%rsp + popq %rbp +L$ctr32_epilogue: .byte 0xf3,0xc3 .globl _aesni_xts_encrypt .p2align 4 _aesni_xts_encrypt: - leaq -104(%rsp),%rsp - movups (%r9),%xmm15 + leaq (%rsp),%rax + pushq %rbp + subq $112,%rsp + andq $-16,%rsp + leaq -8(%rax),%rbp + movups (%r9),%xmm2 movl 240(%r8),%eax movl 240(%rcx),%r10d movups (%r8),%xmm0 movups 16(%r8),%xmm1 leaq 32(%r8),%r8 - xorps %xmm0,%xmm15 + xorps %xmm0,%xmm2 L$oop_enc1_8: -.byte 102,68,15,56,220,249 +.byte 102,15,56,220,209 decl %eax movups (%r8),%xmm1 leaq 16(%r8),%r8 jnz L$oop_enc1_8 - -.byte 102,68,15,56,221,249 +.byte 102,15,56,221,209 + movups (%rcx),%xmm0 movq %rcx,%r11 movl %r10d,%eax + shll $4,%r10d movq %rdx,%r9 andq $-16,%rdx + movups 16(%rcx,%r10,1),%xmm1 + movdqa L$xts_magic(%rip),%xmm8 - pxor %xmm14,%xmm14 - pcmpgtd %xmm15,%xmm14 - pshufd $19,%xmm14,%xmm9 - pxor %xmm14,%xmm14 + movdqa %xmm2,%xmm15 + pshufd $95,%xmm2,%xmm9 + pxor %xmm0,%xmm1 + movdqa %xmm9,%xmm14 + paddd %xmm9,%xmm9 movdqa %xmm15,%xmm10 + psrad $31,%xmm14 paddq %xmm15,%xmm15 - pand %xmm8,%xmm9 - pcmpgtd %xmm15,%xmm14 - pxor %xmm9,%xmm15 - pshufd $19,%xmm14,%xmm9 - pxor %xmm14,%xmm14 + pand %xmm8,%xmm14 + pxor %xmm0,%xmm10 + pxor %xmm14,%xmm15 + movdqa %xmm9,%xmm14 + paddd %xmm9,%xmm9 movdqa %xmm15,%xmm11 + psrad $31,%xmm14 paddq %xmm15,%xmm15 - pand %xmm8,%xmm9 - pcmpgtd %xmm15,%xmm14 - pxor %xmm9,%xmm15 - pshufd $19,%xmm14,%xmm9 - pxor %xmm14,%xmm14 + pand %xmm8,%xmm14 + pxor %xmm0,%xmm11 + pxor %xmm14,%xmm15 + movdqa %xmm9,%xmm14 + paddd %xmm9,%xmm9 movdqa %xmm15,%xmm12 + psrad $31,%xmm14 paddq %xmm15,%xmm15 - pand %xmm8,%xmm9 - pcmpgtd %xmm15,%xmm14 - pxor %xmm9,%xmm15 - pshufd $19,%xmm14,%xmm9 - pxor %xmm14,%xmm14 + pand %xmm8,%xmm14 + pxor %xmm0,%xmm12 + pxor %xmm14,%xmm15 + movdqa %xmm9,%xmm14 + paddd %xmm9,%xmm9 movdqa %xmm15,%xmm13 + psrad $31,%xmm14 + paddq %xmm15,%xmm15 + pand %xmm8,%xmm14 + pxor %xmm0,%xmm13 + pxor %xmm14,%xmm15 + movdqa %xmm15,%xmm14 + psrad $31,%xmm9 paddq %xmm15,%xmm15 pand %xmm8,%xmm9 - pcmpgtd %xmm15,%xmm14 + pxor %xmm0,%xmm14 pxor %xmm9,%xmm15 + movaps %xmm1,96(%rsp) + subq $96,%rdx jc L$xts_enc_short - shrl $1,%eax - subl $1,%eax - movl %eax,%r10d + movl $16+96,%eax + leaq 32(%r11,%r10,1),%rcx + subq %r10,%rax + movups 16(%r11),%xmm1 + movq %rax,%r10 + leaq L$xts_magic(%rip),%r8 jmp L$xts_enc_grandloop -.p2align 4 +.p2align 5 L$xts_enc_grandloop: - pshufd $19,%xmm14,%xmm9 - movdqa %xmm15,%xmm14 - paddq %xmm15,%xmm15 movdqu 0(%rdi),%xmm2 - pand %xmm8,%xmm9 + movdqa %xmm0,%xmm8 movdqu 16(%rdi),%xmm3 - pxor %xmm9,%xmm15 - - movdqu 32(%rdi),%xmm4 pxor %xmm10,%xmm2 - movdqu 48(%rdi),%xmm5 + movdqu 32(%rdi),%xmm4 pxor %xmm11,%xmm3 - movdqu 64(%rdi),%xmm6 +.byte 102,15,56,220,209 + movdqu 48(%rdi),%xmm5 pxor %xmm12,%xmm4 - movdqu 80(%rdi),%xmm7 - leaq 96(%rdi),%rdi +.byte 102,15,56,220,217 + movdqu 64(%rdi),%xmm6 pxor %xmm13,%xmm5 - movups (%r11),%xmm0 +.byte 102,15,56,220,225 + movdqu 80(%rdi),%xmm7 + pxor %xmm15,%xmm8 + movdqa 96(%rsp),%xmm9 pxor %xmm14,%xmm6 - pxor %xmm15,%xmm7 - - +.byte 102,15,56,220,233 + movups 32(%r11),%xmm0 + leaq 96(%rdi),%rdi + pxor %xmm8,%xmm7 - movups 16(%r11),%xmm1 - pxor %xmm0,%xmm2 - pxor %xmm0,%xmm3 + pxor %xmm9,%xmm10 +.byte 102,15,56,220,241 + pxor %xmm9,%xmm11 movdqa %xmm10,0(%rsp) -.byte 102,15,56,220,209 - leaq 32(%r11),%rcx - pxor %xmm0,%xmm4 +.byte 102,15,56,220,249 + movups 48(%r11),%xmm1 + pxor %xmm9,%xmm12 + +.byte 102,15,56,220,208 + pxor %xmm9,%xmm13 movdqa %xmm11,16(%rsp) -.byte 102,15,56,220,217 - pxor %xmm0,%xmm5 +.byte 102,15,56,220,216 + pxor %xmm9,%xmm14 movdqa %xmm12,32(%rsp) -.byte 102,15,56,220,225 - pxor %xmm0,%xmm6 - movdqa %xmm13,48(%rsp) -.byte 102,15,56,220,233 - pxor %xmm0,%xmm7 - movups (%rcx),%xmm0 - decl %eax +.byte 102,15,56,220,224 +.byte 102,15,56,220,232 + pxor %xmm9,%xmm8 movdqa %xmm14,64(%rsp) -.byte 102,15,56,220,241 - movdqa %xmm15,80(%rsp) -.byte 102,15,56,220,249 - pxor %xmm14,%xmm14 - pcmpgtd %xmm15,%xmm14 - jmp L$xts_enc_loop6_enter - -.p2align 4 +.byte 102,15,56,220,240 +.byte 102,15,56,220,248 + movups 64(%r11),%xmm0 + movdqa %xmm8,80(%rsp) + pshufd $95,%xmm15,%xmm9 + jmp L$xts_enc_loop6 +.p2align 5 L$xts_enc_loop6: .byte 102,15,56,220,209 .byte 102,15,56,220,217 - decl %eax .byte 102,15,56,220,225 .byte 102,15,56,220,233 .byte 102,15,56,220,241 .byte 102,15,56,220,249 -L$xts_enc_loop6_enter: - movups 16(%rcx),%xmm1 + movups -64(%rcx,%rax,1),%xmm1 + addq $32,%rax + .byte 102,15,56,220,208 .byte 102,15,56,220,216 - leaq 32(%rcx),%rcx .byte 102,15,56,220,224 .byte 102,15,56,220,232 .byte 102,15,56,220,240 .byte 102,15,56,220,248 - movups (%rcx),%xmm0 + movups -80(%rcx,%rax,1),%xmm0 jnz L$xts_enc_loop6 - pshufd $19,%xmm14,%xmm9 - pxor %xmm14,%xmm14 - paddq %xmm15,%xmm15 + movdqa (%r8),%xmm8 + movdqa %xmm9,%xmm14 + paddd %xmm9,%xmm9 .byte 102,15,56,220,209 - pand %xmm8,%xmm9 + paddq %xmm15,%xmm15 + psrad $31,%xmm14 .byte 102,15,56,220,217 - pcmpgtd %xmm15,%xmm14 + pand %xmm8,%xmm14 + movups (%r11),%xmm10 .byte 102,15,56,220,225 - pxor %xmm9,%xmm15 .byte 102,15,56,220,233 .byte 102,15,56,220,241 + pxor %xmm14,%xmm15 + movaps %xmm10,%xmm11 .byte 102,15,56,220,249 - movups 16(%rcx),%xmm1 + movups -64(%rcx),%xmm1 - pshufd $19,%xmm14,%xmm9 - pxor %xmm14,%xmm14 - movdqa %xmm15,%xmm10 - paddq %xmm15,%xmm15 + movdqa %xmm9,%xmm14 .byte 102,15,56,220,208 - pand %xmm8,%xmm9 + paddd %xmm9,%xmm9 + pxor %xmm15,%xmm10 .byte 102,15,56,220,216 - pcmpgtd %xmm15,%xmm14 + psrad $31,%xmm14 + paddq %xmm15,%xmm15 .byte 102,15,56,220,224 - pxor %xmm9,%xmm15 .byte 102,15,56,220,232 + pand %xmm8,%xmm14 + movaps %xmm11,%xmm12 .byte 102,15,56,220,240 + pxor %xmm14,%xmm15 + movdqa %xmm9,%xmm14 .byte 102,15,56,220,248 - movups 32(%rcx),%xmm0 + movups -48(%rcx),%xmm0 - pshufd $19,%xmm14,%xmm9 - pxor %xmm14,%xmm14 - movdqa %xmm15,%xmm11 - paddq %xmm15,%xmm15 + paddd %xmm9,%xmm9 .byte 102,15,56,220,209 - pand %xmm8,%xmm9 + pxor %xmm15,%xmm11 + psrad $31,%xmm14 .byte 102,15,56,220,217 - pcmpgtd %xmm15,%xmm14 + paddq %xmm15,%xmm15 + pand %xmm8,%xmm14 .byte 102,15,56,220,225 - pxor %xmm9,%xmm15 .byte 102,15,56,220,233 + movdqa %xmm13,48(%rsp) + pxor %xmm14,%xmm15 .byte 102,15,56,220,241 + movaps %xmm12,%xmm13 + movdqa %xmm9,%xmm14 .byte 102,15,56,220,249 + movups -32(%rcx),%xmm1 - pshufd $19,%xmm14,%xmm9 - pxor %xmm14,%xmm14 - movdqa %xmm15,%xmm12 + paddd %xmm9,%xmm9 +.byte 102,15,56,220,208 + pxor %xmm15,%xmm12 + psrad $31,%xmm14 +.byte 102,15,56,220,216 paddq %xmm15,%xmm15 -.byte 102,15,56,221,208 - pand %xmm8,%xmm9 -.byte 102,15,56,221,216 - pcmpgtd %xmm15,%xmm14 -.byte 102,15,56,221,224 - pxor %xmm9,%xmm15 -.byte 102,15,56,221,232 -.byte 102,15,56,221,240 -.byte 102,15,56,221,248 + pand %xmm8,%xmm14 +.byte 102,15,56,220,224 +.byte 102,15,56,220,232 +.byte 102,15,56,220,240 + pxor %xmm14,%xmm15 + movaps %xmm13,%xmm14 +.byte 102,15,56,220,248 - pshufd $19,%xmm14,%xmm9 - pxor %xmm14,%xmm14 - movdqa %xmm15,%xmm13 + movdqa %xmm9,%xmm0 + paddd %xmm9,%xmm9 +.byte 102,15,56,220,209 + pxor %xmm15,%xmm13 + psrad $31,%xmm0 +.byte 102,15,56,220,217 + paddq %xmm15,%xmm15 + pand %xmm8,%xmm0 +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 + pxor %xmm0,%xmm15 + movups (%r11),%xmm0 +.byte 102,15,56,220,241 +.byte 102,15,56,220,249 + movups 16(%r11),%xmm1 + + pxor %xmm15,%xmm14 +.byte 102,15,56,221,84,36,0 + psrad $31,%xmm9 paddq %xmm15,%xmm15 - xorps 0(%rsp),%xmm2 +.byte 102,15,56,221,92,36,16 +.byte 102,15,56,221,100,36,32 pand %xmm8,%xmm9 - xorps 16(%rsp),%xmm3 - pcmpgtd %xmm15,%xmm14 + movq %r10,%rax +.byte 102,15,56,221,108,36,48 +.byte 102,15,56,221,116,36,64 +.byte 102,15,56,221,124,36,80 pxor %xmm9,%xmm15 - xorps 32(%rsp),%xmm4 - movups %xmm2,0(%rsi) - xorps 48(%rsp),%xmm5 - movups %xmm3,16(%rsi) - xorps 64(%rsp),%xmm6 - movups %xmm4,32(%rsi) - xorps 80(%rsp),%xmm7 - movups %xmm5,48(%rsi) - movl %r10d,%eax - movups %xmm6,64(%rsi) - movups %xmm7,80(%rsi) leaq 96(%rsi),%rsi + movups %xmm2,-96(%rsi) + movups %xmm3,-80(%rsi) + movups %xmm4,-64(%rsi) + movups %xmm5,-48(%rsi) + movups %xmm6,-32(%rsi) + movups %xmm7,-16(%rsi) subq $96,%rdx jnc L$xts_enc_grandloop - leal 3(%rax,%rax,1),%eax + movl $16+96,%eax + subl %r10d,%eax movq %r11,%rcx - movl %eax,%r10d + shrl $4,%eax L$xts_enc_short: + movl %eax,%r10d + pxor %xmm0,%xmm10 addq $96,%rdx jz L$xts_enc_done + pxor %xmm0,%xmm11 cmpq $32,%rdx jb L$xts_enc_one + pxor %xmm0,%xmm12 je L$xts_enc_two + pxor %xmm0,%xmm13 cmpq $64,%rdx jb L$xts_enc_three + pxor %xmm0,%xmm14 je L$xts_enc_four - pshufd $19,%xmm14,%xmm9 - movdqa %xmm15,%xmm14 - paddq %xmm15,%xmm15 movdqu (%rdi),%xmm2 - pand %xmm8,%xmm9 movdqu 16(%rdi),%xmm3 - pxor %xmm9,%xmm15 - movdqu 32(%rdi),%xmm4 pxor %xmm10,%xmm2 movdqu 48(%rdi),%xmm5 @@ -1421,7 +1810,6 @@ L$oop_enc1_9: movups (%rcx),%xmm1 leaq 16(%rcx),%rcx jnz L$oop_enc1_9 - .byte 102,15,56,221,209 xorps %xmm10,%xmm2 movdqa %xmm11,%xmm10 @@ -1437,7 +1825,7 @@ L$xts_enc_two: xorps %xmm10,%xmm2 xorps %xmm11,%xmm3 - call _aesni_encrypt3 + call _aesni_encrypt2 xorps %xmm10,%xmm2 movdqa %xmm12,%xmm10 @@ -1483,15 +1871,15 @@ L$xts_enc_four: call _aesni_encrypt4 - xorps %xmm10,%xmm2 - movdqa %xmm15,%xmm10 - xorps %xmm11,%xmm3 - xorps %xmm12,%xmm4 - movups %xmm2,(%rsi) - xorps %xmm13,%xmm5 - movups %xmm3,16(%rsi) - movups %xmm4,32(%rsi) - movups %xmm5,48(%rsi) + pxor %xmm10,%xmm2 + movdqa %xmm14,%xmm10 + pxor %xmm11,%xmm3 + pxor %xmm12,%xmm4 + movdqu %xmm2,(%rsi) + pxor %xmm13,%xmm5 + movdqu %xmm3,16(%rsi) + movdqu %xmm4,32(%rsi) + movdqu %xmm5,48(%rsi) leaq 64(%rsi),%rsi jmp L$xts_enc_done @@ -1527,13 +1915,13 @@ L$oop_enc1_10: movups (%rcx),%xmm1 leaq 16(%rcx),%rcx jnz L$oop_enc1_10 - .byte 102,15,56,221,209 xorps %xmm10,%xmm2 movups %xmm2,-16(%rsi) L$xts_enc_ret: - leaq 104(%rsp),%rsp + leaq (%rbp),%rsp + popq %rbp L$xts_enc_epilogue: .byte 0xf3,0xc3 @@ -1541,250 +1929,292 @@ L$xts_enc_epilogue: .p2align 4 _aesni_xts_decrypt: - leaq -104(%rsp),%rsp - movups (%r9),%xmm15 + leaq (%rsp),%rax + pushq %rbp + subq $112,%rsp + andq $-16,%rsp + leaq -8(%rax),%rbp + movups (%r9),%xmm2 movl 240(%r8),%eax movl 240(%rcx),%r10d movups (%r8),%xmm0 movups 16(%r8),%xmm1 leaq 32(%r8),%r8 - xorps %xmm0,%xmm15 + xorps %xmm0,%xmm2 L$oop_enc1_11: -.byte 102,68,15,56,220,249 +.byte 102,15,56,220,209 decl %eax movups (%r8),%xmm1 leaq 16(%r8),%r8 jnz L$oop_enc1_11 - -.byte 102,68,15,56,221,249 +.byte 102,15,56,221,209 xorl %eax,%eax testq $15,%rdx setnz %al shlq $4,%rax subq %rax,%rdx + movups (%rcx),%xmm0 movq %rcx,%r11 movl %r10d,%eax + shll $4,%r10d movq %rdx,%r9 andq $-16,%rdx + movups 16(%rcx,%r10,1),%xmm1 + movdqa L$xts_magic(%rip),%xmm8 - pxor %xmm14,%xmm14 - pcmpgtd %xmm15,%xmm14 - pshufd $19,%xmm14,%xmm9 - pxor %xmm14,%xmm14 + movdqa %xmm2,%xmm15 + pshufd $95,%xmm2,%xmm9 + pxor %xmm0,%xmm1 + movdqa %xmm9,%xmm14 + paddd %xmm9,%xmm9 movdqa %xmm15,%xmm10 + psrad $31,%xmm14 paddq %xmm15,%xmm15 - pand %xmm8,%xmm9 - pcmpgtd %xmm15,%xmm14 - pxor %xmm9,%xmm15 - pshufd $19,%xmm14,%xmm9 - pxor %xmm14,%xmm14 + pand %xmm8,%xmm14 + pxor %xmm0,%xmm10 + pxor %xmm14,%xmm15 + movdqa %xmm9,%xmm14 + paddd %xmm9,%xmm9 movdqa %xmm15,%xmm11 + psrad $31,%xmm14 paddq %xmm15,%xmm15 - pand %xmm8,%xmm9 - pcmpgtd %xmm15,%xmm14 - pxor %xmm9,%xmm15 - pshufd $19,%xmm14,%xmm9 - pxor %xmm14,%xmm14 + pand %xmm8,%xmm14 + pxor %xmm0,%xmm11 + pxor %xmm14,%xmm15 + movdqa %xmm9,%xmm14 + paddd %xmm9,%xmm9 movdqa %xmm15,%xmm12 + psrad $31,%xmm14 paddq %xmm15,%xmm15 - pand %xmm8,%xmm9 - pcmpgtd %xmm15,%xmm14 - pxor %xmm9,%xmm15 - pshufd $19,%xmm14,%xmm9 - pxor %xmm14,%xmm14 + pand %xmm8,%xmm14 + pxor %xmm0,%xmm12 + pxor %xmm14,%xmm15 + movdqa %xmm9,%xmm14 + paddd %xmm9,%xmm9 movdqa %xmm15,%xmm13 + psrad $31,%xmm14 + paddq %xmm15,%xmm15 + pand %xmm8,%xmm14 + pxor %xmm0,%xmm13 + pxor %xmm14,%xmm15 + movdqa %xmm15,%xmm14 + psrad $31,%xmm9 paddq %xmm15,%xmm15 pand %xmm8,%xmm9 - pcmpgtd %xmm15,%xmm14 + pxor %xmm0,%xmm14 pxor %xmm9,%xmm15 + movaps %xmm1,96(%rsp) + subq $96,%rdx jc L$xts_dec_short - shrl $1,%eax - subl $1,%eax - movl %eax,%r10d + movl $16+96,%eax + leaq 32(%r11,%r10,1),%rcx + subq %r10,%rax + movups 16(%r11),%xmm1 + movq %rax,%r10 + leaq L$xts_magic(%rip),%r8 jmp L$xts_dec_grandloop -.p2align 4 +.p2align 5 L$xts_dec_grandloop: - pshufd $19,%xmm14,%xmm9 - movdqa %xmm15,%xmm14 - paddq %xmm15,%xmm15 movdqu 0(%rdi),%xmm2 - pand %xmm8,%xmm9 + movdqa %xmm0,%xmm8 movdqu 16(%rdi),%xmm3 - pxor %xmm9,%xmm15 - - movdqu 32(%rdi),%xmm4 pxor %xmm10,%xmm2 - movdqu 48(%rdi),%xmm5 + movdqu 32(%rdi),%xmm4 pxor %xmm11,%xmm3 - movdqu 64(%rdi),%xmm6 +.byte 102,15,56,222,209 + movdqu 48(%rdi),%xmm5 pxor %xmm12,%xmm4 - movdqu 80(%rdi),%xmm7 - leaq 96(%rdi),%rdi +.byte 102,15,56,222,217 + movdqu 64(%rdi),%xmm6 pxor %xmm13,%xmm5 - movups (%r11),%xmm0 +.byte 102,15,56,222,225 + movdqu 80(%rdi),%xmm7 + pxor %xmm15,%xmm8 + movdqa 96(%rsp),%xmm9 pxor %xmm14,%xmm6 - pxor %xmm15,%xmm7 - - +.byte 102,15,56,222,233 + movups 32(%r11),%xmm0 + leaq 96(%rdi),%rdi + pxor %xmm8,%xmm7 - movups 16(%r11),%xmm1 - pxor %xmm0,%xmm2 - pxor %xmm0,%xmm3 + pxor %xmm9,%xmm10 +.byte 102,15,56,222,241 + pxor %xmm9,%xmm11 movdqa %xmm10,0(%rsp) -.byte 102,15,56,222,209 - leaq 32(%r11),%rcx - pxor %xmm0,%xmm4 +.byte 102,15,56,222,249 + movups 48(%r11),%xmm1 + pxor %xmm9,%xmm12 + +.byte 102,15,56,222,208 + pxor %xmm9,%xmm13 movdqa %xmm11,16(%rsp) -.byte 102,15,56,222,217 - pxor %xmm0,%xmm5 +.byte 102,15,56,222,216 + pxor %xmm9,%xmm14 movdqa %xmm12,32(%rsp) -.byte 102,15,56,222,225 - pxor %xmm0,%xmm6 - movdqa %xmm13,48(%rsp) -.byte 102,15,56,222,233 - pxor %xmm0,%xmm7 - movups (%rcx),%xmm0 - decl %eax +.byte 102,15,56,222,224 +.byte 102,15,56,222,232 + pxor %xmm9,%xmm8 movdqa %xmm14,64(%rsp) -.byte 102,15,56,222,241 - movdqa %xmm15,80(%rsp) -.byte 102,15,56,222,249 - pxor %xmm14,%xmm14 - pcmpgtd %xmm15,%xmm14 - jmp L$xts_dec_loop6_enter - -.p2align 4 +.byte 102,15,56,222,240 +.byte 102,15,56,222,248 + movups 64(%r11),%xmm0 + movdqa %xmm8,80(%rsp) + pshufd $95,%xmm15,%xmm9 + jmp L$xts_dec_loop6 +.p2align 5 L$xts_dec_loop6: .byte 102,15,56,222,209 .byte 102,15,56,222,217 - decl %eax .byte 102,15,56,222,225 .byte 102,15,56,222,233 .byte 102,15,56,222,241 .byte 102,15,56,222,249 -L$xts_dec_loop6_enter: - movups 16(%rcx),%xmm1 + movups -64(%rcx,%rax,1),%xmm1 + addq $32,%rax + .byte 102,15,56,222,208 .byte 102,15,56,222,216 - leaq 32(%rcx),%rcx .byte 102,15,56,222,224 .byte 102,15,56,222,232 .byte 102,15,56,222,240 .byte 102,15,56,222,248 - movups (%rcx),%xmm0 + movups -80(%rcx,%rax,1),%xmm0 jnz L$xts_dec_loop6 - pshufd $19,%xmm14,%xmm9 - pxor %xmm14,%xmm14 - paddq %xmm15,%xmm15 + movdqa (%r8),%xmm8 + movdqa %xmm9,%xmm14 + paddd %xmm9,%xmm9 .byte 102,15,56,222,209 - pand %xmm8,%xmm9 + paddq %xmm15,%xmm15 + psrad $31,%xmm14 .byte 102,15,56,222,217 - pcmpgtd %xmm15,%xmm14 + pand %xmm8,%xmm14 + movups (%r11),%xmm10 .byte 102,15,56,222,225 - pxor %xmm9,%xmm15 .byte 102,15,56,222,233 .byte 102,15,56,222,241 + pxor %xmm14,%xmm15 + movaps %xmm10,%xmm11 .byte 102,15,56,222,249 - movups 16(%rcx),%xmm1 + movups -64(%rcx),%xmm1 - pshufd $19,%xmm14,%xmm9 - pxor %xmm14,%xmm14 - movdqa %xmm15,%xmm10 - paddq %xmm15,%xmm15 + movdqa %xmm9,%xmm14 .byte 102,15,56,222,208 - pand %xmm8,%xmm9 + paddd %xmm9,%xmm9 + pxor %xmm15,%xmm10 .byte 102,15,56,222,216 - pcmpgtd %xmm15,%xmm14 + psrad $31,%xmm14 + paddq %xmm15,%xmm15 .byte 102,15,56,222,224 - pxor %xmm9,%xmm15 .byte 102,15,56,222,232 + pand %xmm8,%xmm14 + movaps %xmm11,%xmm12 .byte 102,15,56,222,240 + pxor %xmm14,%xmm15 + movdqa %xmm9,%xmm14 .byte 102,15,56,222,248 - movups 32(%rcx),%xmm0 + movups -48(%rcx),%xmm0 - pshufd $19,%xmm14,%xmm9 - pxor %xmm14,%xmm14 - movdqa %xmm15,%xmm11 - paddq %xmm15,%xmm15 + paddd %xmm9,%xmm9 .byte 102,15,56,222,209 - pand %xmm8,%xmm9 + pxor %xmm15,%xmm11 + psrad $31,%xmm14 .byte 102,15,56,222,217 - pcmpgtd %xmm15,%xmm14 + paddq %xmm15,%xmm15 + pand %xmm8,%xmm14 .byte 102,15,56,222,225 - pxor %xmm9,%xmm15 .byte 102,15,56,222,233 + movdqa %xmm13,48(%rsp) + pxor %xmm14,%xmm15 .byte 102,15,56,222,241 + movaps %xmm12,%xmm13 + movdqa %xmm9,%xmm14 .byte 102,15,56,222,249 + movups -32(%rcx),%xmm1 - pshufd $19,%xmm14,%xmm9 - pxor %xmm14,%xmm14 - movdqa %xmm15,%xmm12 + paddd %xmm9,%xmm9 +.byte 102,15,56,222,208 + pxor %xmm15,%xmm12 + psrad $31,%xmm14 +.byte 102,15,56,222,216 paddq %xmm15,%xmm15 -.byte 102,15,56,223,208 - pand %xmm8,%xmm9 -.byte 102,15,56,223,216 - pcmpgtd %xmm15,%xmm14 -.byte 102,15,56,223,224 - pxor %xmm9,%xmm15 -.byte 102,15,56,223,232 -.byte 102,15,56,223,240 -.byte 102,15,56,223,248 + pand %xmm8,%xmm14 +.byte 102,15,56,222,224 +.byte 102,15,56,222,232 +.byte 102,15,56,222,240 + pxor %xmm14,%xmm15 + movaps %xmm13,%xmm14 +.byte 102,15,56,222,248 - pshufd $19,%xmm14,%xmm9 - pxor %xmm14,%xmm14 - movdqa %xmm15,%xmm13 + movdqa %xmm9,%xmm0 + paddd %xmm9,%xmm9 +.byte 102,15,56,222,209 + pxor %xmm15,%xmm13 + psrad $31,%xmm0 +.byte 102,15,56,222,217 + paddq %xmm15,%xmm15 + pand %xmm8,%xmm0 +.byte 102,15,56,222,225 +.byte 102,15,56,222,233 + pxor %xmm0,%xmm15 + movups (%r11),%xmm0 +.byte 102,15,56,222,241 +.byte 102,15,56,222,249 + movups 16(%r11),%xmm1 + + pxor %xmm15,%xmm14 +.byte 102,15,56,223,84,36,0 + psrad $31,%xmm9 paddq %xmm15,%xmm15 - xorps 0(%rsp),%xmm2 +.byte 102,15,56,223,92,36,16 +.byte 102,15,56,223,100,36,32 pand %xmm8,%xmm9 - xorps 16(%rsp),%xmm3 - pcmpgtd %xmm15,%xmm14 + movq %r10,%rax +.byte 102,15,56,223,108,36,48 +.byte 102,15,56,223,116,36,64 +.byte 102,15,56,223,124,36,80 pxor %xmm9,%xmm15 - xorps 32(%rsp),%xmm4 - movups %xmm2,0(%rsi) - xorps 48(%rsp),%xmm5 - movups %xmm3,16(%rsi) - xorps 64(%rsp),%xmm6 - movups %xmm4,32(%rsi) - xorps 80(%rsp),%xmm7 - movups %xmm5,48(%rsi) - movl %r10d,%eax - movups %xmm6,64(%rsi) - movups %xmm7,80(%rsi) leaq 96(%rsi),%rsi + movups %xmm2,-96(%rsi) + movups %xmm3,-80(%rsi) + movups %xmm4,-64(%rsi) + movups %xmm5,-48(%rsi) + movups %xmm6,-32(%rsi) + movups %xmm7,-16(%rsi) subq $96,%rdx jnc L$xts_dec_grandloop - leal 3(%rax,%rax,1),%eax + movl $16+96,%eax + subl %r10d,%eax movq %r11,%rcx - movl %eax,%r10d + shrl $4,%eax L$xts_dec_short: + movl %eax,%r10d + pxor %xmm0,%xmm10 + pxor %xmm0,%xmm11 addq $96,%rdx jz L$xts_dec_done + pxor %xmm0,%xmm12 cmpq $32,%rdx jb L$xts_dec_one + pxor %xmm0,%xmm13 je L$xts_dec_two + pxor %xmm0,%xmm14 cmpq $64,%rdx jb L$xts_dec_three je L$xts_dec_four - pshufd $19,%xmm14,%xmm9 - movdqa %xmm15,%xmm14 - paddq %xmm15,%xmm15 movdqu (%rdi),%xmm2 - pand %xmm8,%xmm9 movdqu 16(%rdi),%xmm3 - pxor %xmm9,%xmm15 - movdqu 32(%rdi),%xmm4 pxor %xmm10,%xmm2 movdqu 48(%rdi),%xmm5 @@ -1835,7 +2265,6 @@ L$oop_dec1_12: movups (%rcx),%xmm1 leaq 16(%rcx),%rcx jnz L$oop_dec1_12 - .byte 102,15,56,223,209 xorps %xmm10,%xmm2 movdqa %xmm11,%xmm10 @@ -1852,7 +2281,7 @@ L$xts_dec_two: xorps %xmm10,%xmm2 xorps %xmm11,%xmm3 - call _aesni_decrypt3 + call _aesni_decrypt2 xorps %xmm10,%xmm2 movdqa %xmm12,%xmm10 @@ -1878,7 +2307,7 @@ L$xts_dec_three: xorps %xmm10,%xmm2 movdqa %xmm13,%xmm10 xorps %xmm11,%xmm3 - movdqa %xmm15,%xmm11 + movdqa %xmm14,%xmm11 xorps %xmm12,%xmm4 movups %xmm2,(%rsi) movups %xmm3,16(%rsi) @@ -1888,14 +2317,8 @@ L$xts_dec_three: .p2align 4 L$xts_dec_four: - pshufd $19,%xmm14,%xmm9 - movdqa %xmm15,%xmm14 - paddq %xmm15,%xmm15 movups (%rdi),%xmm2 - pand %xmm8,%xmm9 movups 16(%rdi),%xmm3 - pxor %xmm9,%xmm15 - movups 32(%rdi),%xmm4 xorps %xmm10,%xmm2 movups 48(%rdi),%xmm5 @@ -1906,16 +2329,16 @@ L$xts_dec_four: call _aesni_decrypt4 - xorps %xmm10,%xmm2 + pxor %xmm10,%xmm2 movdqa %xmm14,%xmm10 - xorps %xmm11,%xmm3 + pxor %xmm11,%xmm3 movdqa %xmm15,%xmm11 - xorps %xmm12,%xmm4 - movups %xmm2,(%rsi) - xorps %xmm13,%xmm5 - movups %xmm3,16(%rsi) - movups %xmm4,32(%rsi) - movups %xmm5,48(%rsi) + pxor %xmm12,%xmm4 + movdqu %xmm2,(%rsi) + pxor %xmm13,%xmm5 + movdqu %xmm3,16(%rsi) + movdqu %xmm4,32(%rsi) + movdqu %xmm5,48(%rsi) leaq 64(%rsi),%rsi jmp L$xts_dec_done @@ -1940,7 +2363,6 @@ L$oop_dec1_13: movups (%rcx),%xmm1 leaq 16(%rcx),%rcx jnz L$oop_dec1_13 - .byte 102,15,56,223,209 xorps %xmm11,%xmm2 movups %xmm2,(%rsi) @@ -1971,13 +2393,13 @@ L$oop_dec1_14: movups (%rcx),%xmm1 leaq 16(%rcx),%rcx jnz L$oop_dec1_14 - .byte 102,15,56,223,209 xorps %xmm10,%xmm2 movups %xmm2,(%rsi) L$xts_dec_ret: - leaq 104(%rsp),%rsp + leaq (%rbp),%rsp + popq %rbp L$xts_dec_epilogue: .byte 0xf3,0xc3 @@ -2015,7 +2437,6 @@ L$oop_enc1_15: movups (%rcx),%xmm1 leaq 16(%rcx),%rcx jnz L$oop_enc1_15 - .byte 102,15,56,221,209 movl %r10d,%eax movq %r11,%rcx @@ -2032,12 +2453,10 @@ L$cbc_enc_tail: movq %rdx,%rcx xchgq %rdi,%rsi .long 0x9066A4F3 - movl $16,%ecx subq %rdx,%rcx xorl %eax,%eax .long 0x9066AAF3 - leaq -16(%rdi),%rdi movl %r10d,%eax movq %rdi,%rsi @@ -2045,152 +2464,385 @@ L$cbc_enc_tail: xorq %rdx,%rdx jmp L$cbc_enc_loop - .p2align 4 L$cbc_decrypt: - movups (%r8),%xmm9 + leaq (%rsp),%rax + pushq %rbp + subq $16,%rsp + andq $-16,%rsp + leaq -8(%rax),%rbp + movups (%r8),%xmm10 movl %r10d,%eax - cmpq $112,%rdx + cmpq $80,%rdx jbe L$cbc_dec_tail - shrl $1,%r10d - subq $112,%rdx - movl %r10d,%eax - movaps %xmm9,-24(%rsp) + + movups (%rcx),%xmm0 + movdqu 0(%rdi),%xmm2 + movdqu 16(%rdi),%xmm3 + movdqa %xmm2,%xmm11 + movdqu 32(%rdi),%xmm4 + movdqa %xmm3,%xmm12 + movdqu 48(%rdi),%xmm5 + movdqa %xmm4,%xmm13 + movdqu 64(%rdi),%xmm6 + movdqa %xmm5,%xmm14 + movdqu 80(%rdi),%xmm7 + movdqa %xmm6,%xmm15 + movl _OPENSSL_ia32cap_P+4(%rip),%r9d + cmpq $112,%rdx + jbe L$cbc_dec_six_or_seven + + andl $71303168,%r9d + subq $80,%rdx + cmpl $4194304,%r9d + je L$cbc_dec_loop6_enter + subq $32,%rdx + leaq 112(%rcx),%rcx jmp L$cbc_dec_loop8_enter .p2align 4 -L$cbc_dec_loop8: - movaps %xmm0,-24(%rsp) - movups %xmm9,(%rsi) - leaq 16(%rsi),%rsi -L$cbc_dec_loop8_enter: - movups (%rcx),%xmm0 - movups (%rdi),%xmm2 - movups 16(%rdi),%xmm3 - movups 16(%rcx),%xmm1 - - leaq 32(%rcx),%rcx - movdqu 32(%rdi),%xmm4 - xorps %xmm0,%xmm2 - movdqu 48(%rdi),%xmm5 - xorps %xmm0,%xmm3 - movdqu 64(%rdi),%xmm6 +L$cbc_dec_loop8: + movups %xmm9,(%rsi) + leaq 16(%rsi),%rsi +L$cbc_dec_loop8_enter: + movdqu 96(%rdi),%xmm8 + pxor %xmm0,%xmm2 + movdqu 112(%rdi),%xmm9 + pxor %xmm0,%xmm3 + movups 16-112(%rcx),%xmm1 + pxor %xmm0,%xmm4 + xorq %r11,%r11 + cmpq $112,%rdx + pxor %xmm0,%xmm5 + pxor %xmm0,%xmm6 + pxor %xmm0,%xmm7 + pxor %xmm0,%xmm8 + +.byte 102,15,56,222,209 + pxor %xmm0,%xmm9 + movups 32-112(%rcx),%xmm0 +.byte 102,15,56,222,217 +.byte 102,15,56,222,225 +.byte 102,15,56,222,233 +.byte 102,15,56,222,241 +.byte 102,15,56,222,249 +.byte 102,68,15,56,222,193 + setnc %r11b + shlq $7,%r11 +.byte 102,68,15,56,222,201 + addq %rdi,%r11 + movups 48-112(%rcx),%xmm1 +.byte 102,15,56,222,208 +.byte 102,15,56,222,216 +.byte 102,15,56,222,224 +.byte 102,15,56,222,232 +.byte 102,15,56,222,240 +.byte 102,15,56,222,248 +.byte 102,68,15,56,222,192 +.byte 102,68,15,56,222,200 + movups 64-112(%rcx),%xmm0 + nop +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 +.byte 102,15,56,222,225 +.byte 102,15,56,222,233 +.byte 102,15,56,222,241 +.byte 102,15,56,222,249 +.byte 102,68,15,56,222,193 +.byte 102,68,15,56,222,201 + movups 80-112(%rcx),%xmm1 + nop +.byte 102,15,56,222,208 +.byte 102,15,56,222,216 +.byte 102,15,56,222,224 +.byte 102,15,56,222,232 +.byte 102,15,56,222,240 +.byte 102,15,56,222,248 +.byte 102,68,15,56,222,192 +.byte 102,68,15,56,222,200 + movups 96-112(%rcx),%xmm0 + nop +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 +.byte 102,15,56,222,225 +.byte 102,15,56,222,233 +.byte 102,15,56,222,241 +.byte 102,15,56,222,249 +.byte 102,68,15,56,222,193 +.byte 102,68,15,56,222,201 + movups 112-112(%rcx),%xmm1 + nop +.byte 102,15,56,222,208 +.byte 102,15,56,222,216 +.byte 102,15,56,222,224 +.byte 102,15,56,222,232 +.byte 102,15,56,222,240 +.byte 102,15,56,222,248 +.byte 102,68,15,56,222,192 +.byte 102,68,15,56,222,200 + movups 128-112(%rcx),%xmm0 + nop +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 +.byte 102,15,56,222,225 +.byte 102,15,56,222,233 +.byte 102,15,56,222,241 +.byte 102,15,56,222,249 +.byte 102,68,15,56,222,193 +.byte 102,68,15,56,222,201 + movups 144-112(%rcx),%xmm1 + cmpl $11,%eax +.byte 102,15,56,222,208 +.byte 102,15,56,222,216 +.byte 102,15,56,222,224 +.byte 102,15,56,222,232 +.byte 102,15,56,222,240 +.byte 102,15,56,222,248 +.byte 102,68,15,56,222,192 +.byte 102,68,15,56,222,200 + movups 160-112(%rcx),%xmm0 + jb L$cbc_dec_done +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 +.byte 102,15,56,222,225 +.byte 102,15,56,222,233 +.byte 102,15,56,222,241 +.byte 102,15,56,222,249 +.byte 102,68,15,56,222,193 +.byte 102,68,15,56,222,201 + movups 176-112(%rcx),%xmm1 + nop +.byte 102,15,56,222,208 +.byte 102,15,56,222,216 +.byte 102,15,56,222,224 +.byte 102,15,56,222,232 +.byte 102,15,56,222,240 +.byte 102,15,56,222,248 +.byte 102,68,15,56,222,192 +.byte 102,68,15,56,222,200 + movups 192-112(%rcx),%xmm0 + je L$cbc_dec_done +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 +.byte 102,15,56,222,225 +.byte 102,15,56,222,233 +.byte 102,15,56,222,241 +.byte 102,15,56,222,249 +.byte 102,68,15,56,222,193 +.byte 102,68,15,56,222,201 + movups 208-112(%rcx),%xmm1 + nop +.byte 102,15,56,222,208 +.byte 102,15,56,222,216 +.byte 102,15,56,222,224 +.byte 102,15,56,222,232 +.byte 102,15,56,222,240 +.byte 102,15,56,222,248 +.byte 102,68,15,56,222,192 +.byte 102,68,15,56,222,200 + movups 224-112(%rcx),%xmm0 + jmp L$cbc_dec_done +.p2align 4 +L$cbc_dec_done: .byte 102,15,56,222,209 - pxor %xmm0,%xmm4 - movdqu 80(%rdi),%xmm7 .byte 102,15,56,222,217 - pxor %xmm0,%xmm5 - movdqu 96(%rdi),%xmm8 + pxor %xmm0,%xmm10 + pxor %xmm0,%xmm11 .byte 102,15,56,222,225 - pxor %xmm0,%xmm6 - movdqu 112(%rdi),%xmm9 .byte 102,15,56,222,233 - pxor %xmm0,%xmm7 - decl %eax + pxor %xmm0,%xmm12 + pxor %xmm0,%xmm13 .byte 102,15,56,222,241 - pxor %xmm0,%xmm8 .byte 102,15,56,222,249 - pxor %xmm0,%xmm9 - movups (%rcx),%xmm0 + pxor %xmm0,%xmm14 + pxor %xmm0,%xmm15 .byte 102,68,15,56,222,193 .byte 102,68,15,56,222,201 - movups 16(%rcx),%xmm1 - - call L$dec_loop8_enter + movdqu 80(%rdi),%xmm1 + +.byte 102,65,15,56,223,210 + movdqu 96(%rdi),%xmm10 + pxor %xmm0,%xmm1 +.byte 102,65,15,56,223,219 + pxor %xmm0,%xmm10 + movdqu 112(%rdi),%xmm0 +.byte 102,65,15,56,223,228 + leaq 128(%rdi),%rdi + movdqu 0(%r11),%xmm11 +.byte 102,65,15,56,223,237 +.byte 102,65,15,56,223,246 + movdqu 16(%r11),%xmm12 + movdqu 32(%r11),%xmm13 +.byte 102,65,15,56,223,255 +.byte 102,68,15,56,223,193 + movdqu 48(%r11),%xmm14 + movdqu 64(%r11),%xmm15 +.byte 102,69,15,56,223,202 + movdqa %xmm0,%xmm10 + movdqu 80(%r11),%xmm1 + movups -112(%rcx),%xmm0 - movups (%rdi),%xmm1 - movups 16(%rdi),%xmm0 - xorps -24(%rsp),%xmm2 - xorps %xmm1,%xmm3 - movups 32(%rdi),%xmm1 - xorps %xmm0,%xmm4 - movups 48(%rdi),%xmm0 - xorps %xmm1,%xmm5 - movups 64(%rdi),%xmm1 - xorps %xmm0,%xmm6 - movups 80(%rdi),%xmm0 - xorps %xmm1,%xmm7 - movups 96(%rdi),%xmm1 - xorps %xmm0,%xmm8 - movups 112(%rdi),%xmm0 - xorps %xmm1,%xmm9 movups %xmm2,(%rsi) + movdqa %xmm11,%xmm2 movups %xmm3,16(%rsi) + movdqa %xmm12,%xmm3 movups %xmm4,32(%rsi) + movdqa %xmm13,%xmm4 movups %xmm5,48(%rsi) - movl %r10d,%eax + movdqa %xmm14,%xmm5 movups %xmm6,64(%rsi) - movq %r11,%rcx + movdqa %xmm15,%xmm6 movups %xmm7,80(%rsi) - leaq 128(%rdi),%rdi + movdqa %xmm1,%xmm7 movups %xmm8,96(%rsi) leaq 112(%rsi),%rsi + subq $128,%rdx ja L$cbc_dec_loop8 movaps %xmm9,%xmm2 - movaps %xmm0,%xmm9 + leaq -112(%rcx),%rcx addq $112,%rdx jle L$cbc_dec_tail_collected - movups %xmm2,(%rsi) - leal 1(%r10,%r10,1),%eax + movups %xmm9,(%rsi) + leaq 16(%rsi),%rsi + cmpq $80,%rdx + jbe L$cbc_dec_tail + + movaps %xmm11,%xmm2 +L$cbc_dec_six_or_seven: + cmpq $96,%rdx + ja L$cbc_dec_seven + + movaps %xmm7,%xmm8 + call _aesni_decrypt6 + pxor %xmm10,%xmm2 + movaps %xmm8,%xmm10 + pxor %xmm11,%xmm3 + movdqu %xmm2,(%rsi) + pxor %xmm12,%xmm4 + movdqu %xmm3,16(%rsi) + pxor %xmm13,%xmm5 + movdqu %xmm4,32(%rsi) + pxor %xmm14,%xmm6 + movdqu %xmm5,48(%rsi) + pxor %xmm15,%xmm7 + movdqu %xmm6,64(%rsi) + leaq 80(%rsi),%rsi + movdqa %xmm7,%xmm2 + jmp L$cbc_dec_tail_collected + +.p2align 4 +L$cbc_dec_seven: + movups 96(%rdi),%xmm8 + xorps %xmm9,%xmm9 + call _aesni_decrypt8 + movups 80(%rdi),%xmm9 + pxor %xmm10,%xmm2 + movups 96(%rdi),%xmm10 + pxor %xmm11,%xmm3 + movdqu %xmm2,(%rsi) + pxor %xmm12,%xmm4 + movdqu %xmm3,16(%rsi) + pxor %xmm13,%xmm5 + movdqu %xmm4,32(%rsi) + pxor %xmm14,%xmm6 + movdqu %xmm5,48(%rsi) + pxor %xmm15,%xmm7 + movdqu %xmm6,64(%rsi) + pxor %xmm9,%xmm8 + movdqu %xmm7,80(%rsi) + leaq 96(%rsi),%rsi + movdqa %xmm8,%xmm2 + jmp L$cbc_dec_tail_collected + +.p2align 4 +L$cbc_dec_loop6: + movups %xmm7,(%rsi) + leaq 16(%rsi),%rsi + movdqu 0(%rdi),%xmm2 + movdqu 16(%rdi),%xmm3 + movdqa %xmm2,%xmm11 + movdqu 32(%rdi),%xmm4 + movdqa %xmm3,%xmm12 + movdqu 48(%rdi),%xmm5 + movdqa %xmm4,%xmm13 + movdqu 64(%rdi),%xmm6 + movdqa %xmm5,%xmm14 + movdqu 80(%rdi),%xmm7 + movdqa %xmm6,%xmm15 +L$cbc_dec_loop6_enter: + leaq 96(%rdi),%rdi + movdqa %xmm7,%xmm8 + + call _aesni_decrypt6 + + pxor %xmm10,%xmm2 + movdqa %xmm8,%xmm10 + pxor %xmm11,%xmm3 + movdqu %xmm2,(%rsi) + pxor %xmm12,%xmm4 + movdqu %xmm3,16(%rsi) + pxor %xmm13,%xmm5 + movdqu %xmm4,32(%rsi) + pxor %xmm14,%xmm6 + movq %r11,%rcx + movdqu %xmm5,48(%rsi) + pxor %xmm15,%xmm7 + movl %r10d,%eax + movdqu %xmm6,64(%rsi) + leaq 80(%rsi),%rsi + subq $96,%rdx + ja L$cbc_dec_loop6 + + movdqa %xmm7,%xmm2 + addq $80,%rdx + jle L$cbc_dec_tail_collected + movups %xmm7,(%rsi) leaq 16(%rsi),%rsi + L$cbc_dec_tail: movups (%rdi),%xmm2 - movaps %xmm2,%xmm8 - cmpq $16,%rdx + subq $16,%rdx jbe L$cbc_dec_one movups 16(%rdi),%xmm3 - movaps %xmm3,%xmm7 - cmpq $32,%rdx + movaps %xmm2,%xmm11 + subq $16,%rdx jbe L$cbc_dec_two movups 32(%rdi),%xmm4 - movaps %xmm4,%xmm6 - cmpq $48,%rdx + movaps %xmm3,%xmm12 + subq $16,%rdx jbe L$cbc_dec_three movups 48(%rdi),%xmm5 - cmpq $64,%rdx + movaps %xmm4,%xmm13 + subq $16,%rdx jbe L$cbc_dec_four movups 64(%rdi),%xmm6 - cmpq $80,%rdx - jbe L$cbc_dec_five - - movups 80(%rdi),%xmm7 - cmpq $96,%rdx - jbe L$cbc_dec_six - - movups 96(%rdi),%xmm8 - movaps %xmm9,-24(%rsp) - call _aesni_decrypt8 - movups (%rdi),%xmm1 - movups 16(%rdi),%xmm0 - xorps -24(%rsp),%xmm2 - xorps %xmm1,%xmm3 - movups 32(%rdi),%xmm1 - xorps %xmm0,%xmm4 - movups 48(%rdi),%xmm0 - xorps %xmm1,%xmm5 - movups 64(%rdi),%xmm1 - xorps %xmm0,%xmm6 - movups 80(%rdi),%xmm0 - xorps %xmm1,%xmm7 - movups 96(%rdi),%xmm9 - xorps %xmm0,%xmm8 - movups %xmm2,(%rsi) - movups %xmm3,16(%rsi) - movups %xmm4,32(%rsi) - movups %xmm5,48(%rsi) - movups %xmm6,64(%rsi) - movups %xmm7,80(%rsi) - leaq 96(%rsi),%rsi - movaps %xmm8,%xmm2 - subq $112,%rdx + movaps %xmm5,%xmm14 + movaps %xmm6,%xmm15 + xorps %xmm7,%xmm7 + call _aesni_decrypt6 + pxor %xmm10,%xmm2 + movaps %xmm15,%xmm10 + pxor %xmm11,%xmm3 + movdqu %xmm2,(%rsi) + pxor %xmm12,%xmm4 + movdqu %xmm3,16(%rsi) + pxor %xmm13,%xmm5 + movdqu %xmm4,32(%rsi) + pxor %xmm14,%xmm6 + movdqu %xmm5,48(%rsi) + leaq 64(%rsi),%rsi + movdqa %xmm6,%xmm2 + subq $16,%rdx jmp L$cbc_dec_tail_collected + .p2align 4 L$cbc_dec_one: + movaps %xmm2,%xmm11 movups (%rcx),%xmm0 movups 16(%rcx),%xmm1 leaq 32(%rcx),%rcx @@ -2201,114 +2853,69 @@ L$oop_dec1_16: movups (%rcx),%xmm1 leaq 16(%rcx),%rcx jnz L$oop_dec1_16 - .byte 102,15,56,223,209 - xorps %xmm9,%xmm2 - movaps %xmm8,%xmm9 - subq $16,%rdx + xorps %xmm10,%xmm2 + movaps %xmm11,%xmm10 jmp L$cbc_dec_tail_collected .p2align 4 L$cbc_dec_two: - xorps %xmm4,%xmm4 - call _aesni_decrypt3 - xorps %xmm9,%xmm2 - xorps %xmm8,%xmm3 - movups %xmm2,(%rsi) - movaps %xmm7,%xmm9 - movaps %xmm3,%xmm2 + movaps %xmm3,%xmm12 + call _aesni_decrypt2 + pxor %xmm10,%xmm2 + movaps %xmm12,%xmm10 + pxor %xmm11,%xmm3 + movdqu %xmm2,(%rsi) + movdqa %xmm3,%xmm2 leaq 16(%rsi),%rsi - subq $32,%rdx jmp L$cbc_dec_tail_collected .p2align 4 L$cbc_dec_three: + movaps %xmm4,%xmm13 call _aesni_decrypt3 - xorps %xmm9,%xmm2 - xorps %xmm8,%xmm3 - movups %xmm2,(%rsi) - xorps %xmm7,%xmm4 - movups %xmm3,16(%rsi) - movaps %xmm6,%xmm9 - movaps %xmm4,%xmm2 + pxor %xmm10,%xmm2 + movaps %xmm13,%xmm10 + pxor %xmm11,%xmm3 + movdqu %xmm2,(%rsi) + pxor %xmm12,%xmm4 + movdqu %xmm3,16(%rsi) + movdqa %xmm4,%xmm2 leaq 32(%rsi),%rsi - subq $48,%rdx jmp L$cbc_dec_tail_collected .p2align 4 L$cbc_dec_four: + movaps %xmm5,%xmm14 call _aesni_decrypt4 - xorps %xmm9,%xmm2 - movups 48(%rdi),%xmm9 - xorps %xmm8,%xmm3 - movups %xmm2,(%rsi) - xorps %xmm7,%xmm4 - movups %xmm3,16(%rsi) - xorps %xmm6,%xmm5 - movups %xmm4,32(%rsi) - movaps %xmm5,%xmm2 + pxor %xmm10,%xmm2 + movaps %xmm14,%xmm10 + pxor %xmm11,%xmm3 + movdqu %xmm2,(%rsi) + pxor %xmm12,%xmm4 + movdqu %xmm3,16(%rsi) + pxor %xmm13,%xmm5 + movdqu %xmm4,32(%rsi) + movdqa %xmm5,%xmm2 leaq 48(%rsi),%rsi - subq $64,%rdx - jmp L$cbc_dec_tail_collected -.p2align 4 -L$cbc_dec_five: - xorps %xmm7,%xmm7 - call _aesni_decrypt6 - movups 16(%rdi),%xmm1 - movups 32(%rdi),%xmm0 - xorps %xmm9,%xmm2 - xorps %xmm8,%xmm3 - xorps %xmm1,%xmm4 - movups 48(%rdi),%xmm1 - xorps %xmm0,%xmm5 - movups 64(%rdi),%xmm9 - xorps %xmm1,%xmm6 - movups %xmm2,(%rsi) - movups %xmm3,16(%rsi) - movups %xmm4,32(%rsi) - movups %xmm5,48(%rsi) - leaq 64(%rsi),%rsi - movaps %xmm6,%xmm2 - subq $80,%rdx - jmp L$cbc_dec_tail_collected -.p2align 4 -L$cbc_dec_six: - call _aesni_decrypt6 - movups 16(%rdi),%xmm1 - movups 32(%rdi),%xmm0 - xorps %xmm9,%xmm2 - xorps %xmm8,%xmm3 - xorps %xmm1,%xmm4 - movups 48(%rdi),%xmm1 - xorps %xmm0,%xmm5 - movups 64(%rdi),%xmm0 - xorps %xmm1,%xmm6 - movups 80(%rdi),%xmm9 - xorps %xmm0,%xmm7 - movups %xmm2,(%rsi) - movups %xmm3,16(%rsi) - movups %xmm4,32(%rsi) - movups %xmm5,48(%rsi) - movups %xmm6,64(%rsi) - leaq 80(%rsi),%rsi - movaps %xmm7,%xmm2 - subq $96,%rdx jmp L$cbc_dec_tail_collected + .p2align 4 L$cbc_dec_tail_collected: + movups %xmm10,(%r8) andq $15,%rdx - movups %xmm9,(%r8) jnz L$cbc_dec_tail_partial movups %xmm2,(%rsi) jmp L$cbc_dec_ret .p2align 4 L$cbc_dec_tail_partial: - movaps %xmm2,-24(%rsp) + movaps %xmm2,(%rsp) movq $16,%rcx movq %rsi,%rdi subq %rdx,%rcx - leaq -24(%rsp),%rsi + leaq (%rsp),%rsi .long 0x9066A4F3 - L$cbc_dec_ret: + leaq (%rbp),%rsp + popq %rbp L$cbc_ret: .byte 0xf3,0xc3 @@ -2317,7 +2924,6 @@ L$cbc_ret: .p2align 4 _aesni_set_decrypt_key: .byte 0x48,0x83,0xEC,0x08 - call __aesni_set_encrypt_key shll $4,%esi testl %eax,%eax @@ -2357,7 +2963,6 @@ L$SEH_end_set_decrypt_key: _aesni_set_encrypt_key: __aesni_set_encrypt_key: .byte 0x48,0x83,0xEC,0x08 - movq $-1,%rax testq %rdi,%rdi jz L$enc_key_ret @@ -2553,6 +3158,8 @@ L$increment64: .long 1,0,0,0 L$xts_magic: .long 0x87,0,1,0 +L$increment1: +.byte 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1 .byte 65,69,83,32,102,111,114,32,73,110,116,101,108,32,65,69,83,45,78,73,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 .p2align 6 diff --git a/deps/openssl/asm/x64-macosx-gas/aes/bsaes-x86_64.s b/deps/openssl/asm/x64-macosx-gas/aes/bsaes-x86_64.s index c337107abc52b6..2af36a90b05f91 100644 --- a/deps/openssl/asm/x64-macosx-gas/aes/bsaes-x86_64.s +++ b/deps/openssl/asm/x64-macosx-gas/aes/bsaes-x86_64.s @@ -4,7 +4,6 @@ - .p2align 6 _bsaes_encrypt8: leaq L$BS0(%rip),%r11 @@ -14,18 +13,18 @@ _bsaes_encrypt8: movdqa 80(%r11),%xmm7 pxor %xmm8,%xmm15 pxor %xmm8,%xmm0 -.byte 102,68,15,56,0,255 pxor %xmm8,%xmm1 -.byte 102,15,56,0,199 pxor %xmm8,%xmm2 -.byte 102,15,56,0,207 +.byte 102,68,15,56,0,255 +.byte 102,15,56,0,199 pxor %xmm8,%xmm3 -.byte 102,15,56,0,215 pxor %xmm8,%xmm4 -.byte 102,15,56,0,223 +.byte 102,15,56,0,207 +.byte 102,15,56,0,215 pxor %xmm8,%xmm5 -.byte 102,15,56,0,231 pxor %xmm8,%xmm6 +.byte 102,15,56,0,223 +.byte 102,15,56,0,231 .byte 102,15,56,0,239 .byte 102,15,56,0,247 _bsaes_encrypt8_bitslice: @@ -122,21 +121,21 @@ _bsaes_encrypt8_bitslice: L$enc_loop: pxor 0(%rax),%xmm15 pxor 16(%rax),%xmm0 -.byte 102,68,15,56,0,255 pxor 32(%rax),%xmm1 -.byte 102,15,56,0,199 pxor 48(%rax),%xmm2 -.byte 102,15,56,0,207 +.byte 102,68,15,56,0,255 +.byte 102,15,56,0,199 pxor 64(%rax),%xmm3 -.byte 102,15,56,0,215 pxor 80(%rax),%xmm4 -.byte 102,15,56,0,223 +.byte 102,15,56,0,207 +.byte 102,15,56,0,215 pxor 96(%rax),%xmm5 -.byte 102,15,56,0,231 pxor 112(%rax),%xmm6 +.byte 102,15,56,0,223 +.byte 102,15,56,0,231 .byte 102,15,56,0,239 - leaq 128(%rax),%rax .byte 102,15,56,0,247 + leaq 128(%rax),%rax L$enc_sbox: pxor %xmm5,%xmm4 pxor %xmm0,%xmm1 @@ -486,18 +485,18 @@ _bsaes_decrypt8: movdqa -48(%r11),%xmm7 pxor %xmm8,%xmm15 pxor %xmm8,%xmm0 -.byte 102,68,15,56,0,255 pxor %xmm8,%xmm1 -.byte 102,15,56,0,199 pxor %xmm8,%xmm2 -.byte 102,15,56,0,207 +.byte 102,68,15,56,0,255 +.byte 102,15,56,0,199 pxor %xmm8,%xmm3 -.byte 102,15,56,0,215 pxor %xmm8,%xmm4 -.byte 102,15,56,0,223 +.byte 102,15,56,0,207 +.byte 102,15,56,0,215 pxor %xmm8,%xmm5 -.byte 102,15,56,0,231 pxor %xmm8,%xmm6 +.byte 102,15,56,0,223 +.byte 102,15,56,0,231 .byte 102,15,56,0,239 .byte 102,15,56,0,247 movdqa 0(%r11),%xmm7 @@ -593,21 +592,21 @@ _bsaes_decrypt8: L$dec_loop: pxor 0(%rax),%xmm15 pxor 16(%rax),%xmm0 -.byte 102,68,15,56,0,255 pxor 32(%rax),%xmm1 -.byte 102,15,56,0,199 pxor 48(%rax),%xmm2 -.byte 102,15,56,0,207 +.byte 102,68,15,56,0,255 +.byte 102,15,56,0,199 pxor 64(%rax),%xmm3 -.byte 102,15,56,0,215 pxor 80(%rax),%xmm4 -.byte 102,15,56,0,223 +.byte 102,15,56,0,207 +.byte 102,15,56,0,215 pxor 96(%rax),%xmm5 -.byte 102,15,56,0,231 pxor 112(%rax),%xmm6 +.byte 102,15,56,0,223 +.byte 102,15,56,0,231 .byte 102,15,56,0,239 - leaq 128(%rax),%rax .byte 102,15,56,0,247 + leaq 128(%rax),%rax L$dec_sbox: pxor %xmm3,%xmm2 @@ -1286,7 +1285,6 @@ L$cbc_dec_one: leaq 32(%rbp),%rsi leaq (%r15),%rdx call _asm_AES_decrypt - pxor 32(%rbp),%xmm14 movdqu %xmm14,(%r13) movdqa %xmm15,%xmm14 @@ -1384,21 +1382,21 @@ L$ctr_enc_loop: movdqa -16(%r11),%xmm7 pxor %xmm8,%xmm15 pxor %xmm8,%xmm0 -.byte 102,68,15,56,0,255 pxor %xmm8,%xmm1 -.byte 102,15,56,0,199 pxor %xmm8,%xmm2 -.byte 102,15,56,0,207 +.byte 102,68,15,56,0,255 +.byte 102,15,56,0,199 pxor %xmm8,%xmm3 -.byte 102,15,56,0,215 pxor %xmm8,%xmm4 -.byte 102,15,56,0,223 +.byte 102,15,56,0,207 +.byte 102,15,56,0,215 pxor %xmm8,%xmm5 -.byte 102,15,56,0,231 pxor %xmm8,%xmm6 +.byte 102,15,56,0,223 +.byte 102,15,56,0,231 .byte 102,15,56,0,239 - leaq L$BS0(%rip),%r11 .byte 102,15,56,0,247 + leaq L$BS0(%rip),%r11 movl %ebx,%r10d call _bsaes_encrypt8_bitslice @@ -1538,7 +1536,6 @@ L$xts_enc_prologue: leaq (%r8),%rdx call _asm_AES_encrypt - movl 240(%r15),%eax movq %r14,%rbx @@ -1908,7 +1905,6 @@ L$xts_enc_1: leaq 32(%rbp),%rsi leaq (%r15),%rdx call _asm_AES_encrypt - pxor 32(%rbp),%xmm15 @@ -1942,7 +1938,6 @@ L$xts_enc_steal: movdqa %xmm15,32(%rbp) leaq (%r15),%rdx call _asm_AES_encrypt - pxor 32(%rbp),%xmm6 movdqu %xmm6,-16(%r13) @@ -1993,7 +1988,6 @@ L$xts_dec_prologue: leaq (%r8),%rdx call _asm_AES_encrypt - movl 240(%r15),%eax movq %r14,%rbx @@ -2370,7 +2364,6 @@ L$xts_dec_1: leaq 32(%rbp),%rsi leaq (%r15),%rdx call _asm_AES_decrypt - pxor 32(%rbp),%xmm15 @@ -2402,7 +2395,6 @@ L$xts_dec_done: movdqa %xmm15,32(%rbp) leaq (%r15),%rdx call _asm_AES_decrypt - pxor 32(%rbp),%xmm6 movq %r13,%rdx movdqu %xmm6,(%r13) @@ -2424,7 +2416,6 @@ L$xts_dec_steal: movdqa %xmm15,32(%rbp) leaq (%r15),%rdx call _asm_AES_decrypt - pxor 32(%rbp),%xmm5 movdqu %xmm5,(%r13) diff --git a/deps/openssl/asm/x64-macosx-gas/aes/vpaes-x86_64.s b/deps/openssl/asm/x64-macosx-gas/aes/vpaes-x86_64.s index 803ab41c504320..c724170ce99e1b 100644 --- a/deps/openssl/asm/x64-macosx-gas/aes/vpaes-x86_64.s +++ b/deps/openssl/asm/x64-macosx-gas/aes/vpaes-x86_64.s @@ -16,7 +16,6 @@ - .p2align 4 _vpaes_encrypt_core: movq %rdx,%r9 @@ -32,8 +31,8 @@ _vpaes_encrypt_core: movdqa L$k_ipt+16(%rip),%xmm0 .byte 102,15,56,0,193 pxor %xmm5,%xmm2 - pxor %xmm2,%xmm0 addq $16,%r9 + pxor %xmm2,%xmm0 leaq L$k_mc_backward(%rip),%r10 jmp L$enc_entry @@ -41,19 +40,19 @@ _vpaes_encrypt_core: L$enc_loop: movdqa %xmm13,%xmm4 -.byte 102,15,56,0,226 - pxor %xmm5,%xmm4 movdqa %xmm12,%xmm0 +.byte 102,15,56,0,226 .byte 102,15,56,0,195 - pxor %xmm4,%xmm0 + pxor %xmm5,%xmm4 movdqa %xmm15,%xmm5 -.byte 102,15,56,0,234 + pxor %xmm4,%xmm0 movdqa -64(%r11,%r10,1),%xmm1 +.byte 102,15,56,0,234 + movdqa (%r11,%r10,1),%xmm4 movdqa %xmm14,%xmm2 .byte 102,15,56,0,211 - pxor %xmm5,%xmm2 - movdqa (%r11,%r10,1),%xmm4 movdqa %xmm0,%xmm3 + pxor %xmm5,%xmm2 .byte 102,15,56,0,193 addq $16,%r9 pxor %xmm2,%xmm0 @@ -62,30 +61,30 @@ L$enc_loop: pxor %xmm0,%xmm3 .byte 102,15,56,0,193 andq $48,%r11 - pxor %xmm3,%xmm0 subq $1,%rax + pxor %xmm3,%xmm0 L$enc_entry: movdqa %xmm9,%xmm1 + movdqa %xmm11,%xmm5 pandn %xmm0,%xmm1 psrld $4,%xmm1 pand %xmm9,%xmm0 - movdqa %xmm11,%xmm5 .byte 102,15,56,0,232 - pxor %xmm1,%xmm0 movdqa %xmm10,%xmm3 + pxor %xmm1,%xmm0 .byte 102,15,56,0,217 - pxor %xmm5,%xmm3 movdqa %xmm10,%xmm4 + pxor %xmm5,%xmm3 .byte 102,15,56,0,224 - pxor %xmm5,%xmm4 movdqa %xmm10,%xmm2 + pxor %xmm5,%xmm4 .byte 102,15,56,0,211 - pxor %xmm0,%xmm2 movdqa %xmm10,%xmm3 - movdqu (%r9),%xmm5 + pxor %xmm0,%xmm2 .byte 102,15,56,0,220 + movdqu (%r9),%xmm5 pxor %xmm1,%xmm3 jnz L$enc_loop @@ -138,62 +137,61 @@ L$dec_loop: movdqa -32(%r10),%xmm4 + movdqa -16(%r10),%xmm1 .byte 102,15,56,0,226 - pxor %xmm0,%xmm4 - movdqa -16(%r10),%xmm0 -.byte 102,15,56,0,195 +.byte 102,15,56,0,203 pxor %xmm4,%xmm0 - addq $16,%r9 - -.byte 102,15,56,0,197 movdqa 0(%r10),%xmm4 -.byte 102,15,56,0,226 - pxor %xmm0,%xmm4 - movdqa 16(%r10),%xmm0 -.byte 102,15,56,0,195 - pxor %xmm4,%xmm0 - subq $1,%rax + pxor %xmm1,%xmm0 + movdqa 16(%r10),%xmm1 -.byte 102,15,56,0,197 - movdqa 32(%r10),%xmm4 .byte 102,15,56,0,226 - pxor %xmm0,%xmm4 - movdqa 48(%r10),%xmm0 -.byte 102,15,56,0,195 +.byte 102,15,56,0,197 +.byte 102,15,56,0,203 pxor %xmm4,%xmm0 + movdqa 32(%r10),%xmm4 + pxor %xmm1,%xmm0 + movdqa 48(%r10),%xmm1 +.byte 102,15,56,0,226 .byte 102,15,56,0,197 +.byte 102,15,56,0,203 + pxor %xmm4,%xmm0 movdqa 64(%r10),%xmm4 + pxor %xmm1,%xmm0 + movdqa 80(%r10),%xmm1 + .byte 102,15,56,0,226 - pxor %xmm0,%xmm4 - movdqa 80(%r10),%xmm0 -.byte 102,15,56,0,195 +.byte 102,15,56,0,197 +.byte 102,15,56,0,203 pxor %xmm4,%xmm0 - + addq $16,%r9 .byte 102,15,58,15,237,12 + pxor %xmm1,%xmm0 + subq $1,%rax L$dec_entry: movdqa %xmm9,%xmm1 pandn %xmm0,%xmm1 + movdqa %xmm11,%xmm2 psrld $4,%xmm1 pand %xmm9,%xmm0 - movdqa %xmm11,%xmm2 .byte 102,15,56,0,208 - pxor %xmm1,%xmm0 movdqa %xmm10,%xmm3 + pxor %xmm1,%xmm0 .byte 102,15,56,0,217 - pxor %xmm2,%xmm3 movdqa %xmm10,%xmm4 + pxor %xmm2,%xmm3 .byte 102,15,56,0,224 pxor %xmm2,%xmm4 movdqa %xmm10,%xmm2 .byte 102,15,56,0,211 - pxor %xmm0,%xmm2 movdqa %xmm10,%xmm3 + pxor %xmm0,%xmm2 .byte 102,15,56,0,220 - pxor %xmm1,%xmm3 movdqu (%r9),%xmm0 + pxor %xmm1,%xmm3 jnz L$dec_loop @@ -222,7 +220,6 @@ _vpaes_schedule_core: call _vpaes_preheat - movdqa L$k_rcon(%rip),%xmm8 movdqu (%rdi),%xmm0 @@ -269,7 +266,6 @@ L$oop_schedule_128: decq %rsi jz L$schedule_mangle_last call _vpaes_schedule_mangle - jmp L$oop_schedule_128 @@ -291,7 +287,6 @@ L$oop_schedule_128: L$schedule_192: movdqu 8(%rdi),%xmm0 call _vpaes_schedule_transform - movdqa %xmm0,%xmm6 pxor %xmm4,%xmm4 movhlps %xmm4,%xmm6 @@ -301,15 +296,12 @@ L$oop_schedule_192: call _vpaes_schedule_round .byte 102,15,58,15,198,8 call _vpaes_schedule_mangle - call _vpaes_schedule_192_smear call _vpaes_schedule_mangle - call _vpaes_schedule_round decq %rsi jz L$schedule_mangle_last call _vpaes_schedule_mangle - call _vpaes_schedule_192_smear jmp L$oop_schedule_192 @@ -327,12 +319,10 @@ L$oop_schedule_192: L$schedule_256: movdqu 16(%rdi),%xmm0 call _vpaes_schedule_transform - movl $7,%esi L$oop_schedule_256: call _vpaes_schedule_mangle - movdqa %xmm0,%xmm6 @@ -342,7 +332,6 @@ L$oop_schedule_256: call _vpaes_schedule_mangle - pshufd $255,%xmm0,%xmm0 movdqa %xmm7,%xmm5 movdqa %xmm6,%xmm7 @@ -379,7 +368,6 @@ L$schedule_mangle_last_dec: addq $-16,%rdx pxor L$k_s63(%rip),%xmm0 call _vpaes_schedule_transform - movdqu %xmm0,(%rdx) @@ -411,12 +399,12 @@ L$schedule_mangle_last_dec: .p2align 4 _vpaes_schedule_192_smear: - pshufd $128,%xmm6,%xmm0 - pxor %xmm0,%xmm6 + pshufd $128,%xmm6,%xmm1 pshufd $254,%xmm7,%xmm0 + pxor %xmm1,%xmm6 + pxor %xmm1,%xmm1 pxor %xmm0,%xmm6 movdqa %xmm6,%xmm0 - pxor %xmm1,%xmm1 movhlps %xmm1,%xmm6 .byte 0xf3,0xc3 diff --git a/deps/openssl/asm/x64-macosx-gas/bn/modexp512-x86_64.s b/deps/openssl/asm/x64-macosx-gas/bn/modexp512-x86_64.s deleted file mode 100644 index 00c529a0791979..00000000000000 --- a/deps/openssl/asm/x64-macosx-gas/bn/modexp512-x86_64.s +++ /dev/null @@ -1,1775 +0,0 @@ -.text - - - -.p2align 4 -MULADD_128x512: - movq 0(%rsi),%rax - mulq %rbp - addq %rax,%r8 - adcq $0,%rdx - movq %r8,0(%rcx) - movq %rdx,%rbx - - movq 8(%rsi),%rax - mulq %rbp - addq %rax,%r9 - adcq $0,%rdx - addq %rbx,%r9 - adcq $0,%rdx - movq %rdx,%rbx - - movq 16(%rsi),%rax - mulq %rbp - addq %rax,%r10 - adcq $0,%rdx - addq %rbx,%r10 - adcq $0,%rdx - movq %rdx,%rbx - - movq 24(%rsi),%rax - mulq %rbp - addq %rax,%r11 - adcq $0,%rdx - addq %rbx,%r11 - adcq $0,%rdx - movq %rdx,%rbx - - movq 32(%rsi),%rax - mulq %rbp - addq %rax,%r12 - adcq $0,%rdx - addq %rbx,%r12 - adcq $0,%rdx - movq %rdx,%rbx - - movq 40(%rsi),%rax - mulq %rbp - addq %rax,%r13 - adcq $0,%rdx - addq %rbx,%r13 - adcq $0,%rdx - movq %rdx,%rbx - - movq 48(%rsi),%rax - mulq %rbp - addq %rax,%r14 - adcq $0,%rdx - addq %rbx,%r14 - adcq $0,%rdx - movq %rdx,%rbx - - movq 56(%rsi),%rax - mulq %rbp - addq %rax,%r15 - adcq $0,%rdx - addq %rbx,%r15 - adcq $0,%rdx - movq %rdx,%r8 - movq 8(%rdi),%rbp - movq 0(%rsi),%rax - mulq %rbp - addq %rax,%r9 - adcq $0,%rdx - movq %r9,8(%rcx) - movq %rdx,%rbx - - movq 8(%rsi),%rax - mulq %rbp - addq %rax,%r10 - adcq $0,%rdx - addq %rbx,%r10 - adcq $0,%rdx - movq %rdx,%rbx - - movq 16(%rsi),%rax - mulq %rbp - addq %rax,%r11 - adcq $0,%rdx - addq %rbx,%r11 - adcq $0,%rdx - movq %rdx,%rbx - - movq 24(%rsi),%rax - mulq %rbp - addq %rax,%r12 - adcq $0,%rdx - addq %rbx,%r12 - adcq $0,%rdx - movq %rdx,%rbx - - movq 32(%rsi),%rax - mulq %rbp - addq %rax,%r13 - adcq $0,%rdx - addq %rbx,%r13 - adcq $0,%rdx - movq %rdx,%rbx - - movq 40(%rsi),%rax - mulq %rbp - addq %rax,%r14 - adcq $0,%rdx - addq %rbx,%r14 - adcq $0,%rdx - movq %rdx,%rbx - - movq 48(%rsi),%rax - mulq %rbp - addq %rax,%r15 - adcq $0,%rdx - addq %rbx,%r15 - adcq $0,%rdx - movq %rdx,%rbx - - movq 56(%rsi),%rax - mulq %rbp - addq %rax,%r8 - adcq $0,%rdx - addq %rbx,%r8 - adcq $0,%rdx - movq %rdx,%r9 - .byte 0xf3,0xc3 - - -.p2align 4 -mont_reduce: - leaq 192(%rsp),%rdi - movq 32(%rsp),%rsi - addq $576,%rsi - leaq 520(%rsp),%rcx - - movq 96(%rcx),%rbp - movq 0(%rsi),%rax - mulq %rbp - movq (%rcx),%r8 - addq %rax,%r8 - adcq $0,%rdx - movq %r8,0(%rdi) - movq %rdx,%rbx - - movq 8(%rsi),%rax - mulq %rbp - movq 8(%rcx),%r9 - addq %rax,%r9 - adcq $0,%rdx - addq %rbx,%r9 - adcq $0,%rdx - movq %rdx,%rbx - - movq 16(%rsi),%rax - mulq %rbp - movq 16(%rcx),%r10 - addq %rax,%r10 - adcq $0,%rdx - addq %rbx,%r10 - adcq $0,%rdx - movq %rdx,%rbx - - movq 24(%rsi),%rax - mulq %rbp - movq 24(%rcx),%r11 - addq %rax,%r11 - adcq $0,%rdx - addq %rbx,%r11 - adcq $0,%rdx - movq %rdx,%rbx - - movq 32(%rsi),%rax - mulq %rbp - movq 32(%rcx),%r12 - addq %rax,%r12 - adcq $0,%rdx - addq %rbx,%r12 - adcq $0,%rdx - movq %rdx,%rbx - - movq 40(%rsi),%rax - mulq %rbp - movq 40(%rcx),%r13 - addq %rax,%r13 - adcq $0,%rdx - addq %rbx,%r13 - adcq $0,%rdx - movq %rdx,%rbx - - movq 48(%rsi),%rax - mulq %rbp - movq 48(%rcx),%r14 - addq %rax,%r14 - adcq $0,%rdx - addq %rbx,%r14 - adcq $0,%rdx - movq %rdx,%rbx - - movq 56(%rsi),%rax - mulq %rbp - movq 56(%rcx),%r15 - addq %rax,%r15 - adcq $0,%rdx - addq %rbx,%r15 - adcq $0,%rdx - movq %rdx,%r8 - movq 104(%rcx),%rbp - movq 0(%rsi),%rax - mulq %rbp - addq %rax,%r9 - adcq $0,%rdx - movq %r9,8(%rdi) - movq %rdx,%rbx - - movq 8(%rsi),%rax - mulq %rbp - addq %rax,%r10 - adcq $0,%rdx - addq %rbx,%r10 - adcq $0,%rdx - movq %rdx,%rbx - - movq 16(%rsi),%rax - mulq %rbp - addq %rax,%r11 - adcq $0,%rdx - addq %rbx,%r11 - adcq $0,%rdx - movq %rdx,%rbx - - movq 24(%rsi),%rax - mulq %rbp - addq %rax,%r12 - adcq $0,%rdx - addq %rbx,%r12 - adcq $0,%rdx - movq %rdx,%rbx - - movq 32(%rsi),%rax - mulq %rbp - addq %rax,%r13 - adcq $0,%rdx - addq %rbx,%r13 - adcq $0,%rdx - movq %rdx,%rbx - - movq 40(%rsi),%rax - mulq %rbp - addq %rax,%r14 - adcq $0,%rdx - addq %rbx,%r14 - adcq $0,%rdx - movq %rdx,%rbx - - movq 48(%rsi),%rax - mulq %rbp - addq %rax,%r15 - adcq $0,%rdx - addq %rbx,%r15 - adcq $0,%rdx - movq %rdx,%rbx - - movq 56(%rsi),%rax - mulq %rbp - addq %rax,%r8 - adcq $0,%rdx - addq %rbx,%r8 - adcq $0,%rdx - movq %rdx,%r9 - movq 112(%rcx),%rbp - movq 0(%rsi),%rax - mulq %rbp - addq %rax,%r10 - adcq $0,%rdx - movq %r10,16(%rdi) - movq %rdx,%rbx - - movq 8(%rsi),%rax - mulq %rbp - addq %rax,%r11 - adcq $0,%rdx - addq %rbx,%r11 - adcq $0,%rdx - movq %rdx,%rbx - - movq 16(%rsi),%rax - mulq %rbp - addq %rax,%r12 - adcq $0,%rdx - addq %rbx,%r12 - adcq $0,%rdx - movq %rdx,%rbx - - movq 24(%rsi),%rax - mulq %rbp - addq %rax,%r13 - adcq $0,%rdx - addq %rbx,%r13 - adcq $0,%rdx - movq %rdx,%rbx - - movq 32(%rsi),%rax - mulq %rbp - addq %rax,%r14 - adcq $0,%rdx - addq %rbx,%r14 - adcq $0,%rdx - movq %rdx,%rbx - - movq 40(%rsi),%rax - mulq %rbp - addq %rax,%r15 - adcq $0,%rdx - addq %rbx,%r15 - adcq $0,%rdx - movq %rdx,%rbx - - movq 48(%rsi),%rax - mulq %rbp - addq %rax,%r8 - adcq $0,%rdx - addq %rbx,%r8 - adcq $0,%rdx - movq %rdx,%rbx - - movq 56(%rsi),%rax - mulq %rbp - addq %rax,%r9 - adcq $0,%rdx - addq %rbx,%r9 - adcq $0,%rdx - movq %rdx,%r10 - movq 120(%rcx),%rbp - movq 0(%rsi),%rax - mulq %rbp - addq %rax,%r11 - adcq $0,%rdx - movq %r11,24(%rdi) - movq %rdx,%rbx - - movq 8(%rsi),%rax - mulq %rbp - addq %rax,%r12 - adcq $0,%rdx - addq %rbx,%r12 - adcq $0,%rdx - movq %rdx,%rbx - - movq 16(%rsi),%rax - mulq %rbp - addq %rax,%r13 - adcq $0,%rdx - addq %rbx,%r13 - adcq $0,%rdx - movq %rdx,%rbx - - movq 24(%rsi),%rax - mulq %rbp - addq %rax,%r14 - adcq $0,%rdx - addq %rbx,%r14 - adcq $0,%rdx - movq %rdx,%rbx - - movq 32(%rsi),%rax - mulq %rbp - addq %rax,%r15 - adcq $0,%rdx - addq %rbx,%r15 - adcq $0,%rdx - movq %rdx,%rbx - - movq 40(%rsi),%rax - mulq %rbp - addq %rax,%r8 - adcq $0,%rdx - addq %rbx,%r8 - adcq $0,%rdx - movq %rdx,%rbx - - movq 48(%rsi),%rax - mulq %rbp - addq %rax,%r9 - adcq $0,%rdx - addq %rbx,%r9 - adcq $0,%rdx - movq %rdx,%rbx - - movq 56(%rsi),%rax - mulq %rbp - addq %rax,%r10 - adcq $0,%rdx - addq %rbx,%r10 - adcq $0,%rdx - movq %rdx,%r11 - xorq %rax,%rax - - addq 64(%rcx),%r8 - adcq 72(%rcx),%r9 - adcq 80(%rcx),%r10 - adcq 88(%rcx),%r11 - adcq $0,%rax - - - - - movq %r8,64(%rdi) - movq %r9,72(%rdi) - movq %r10,%rbp - movq %r11,88(%rdi) - - movq %rax,384(%rsp) - - movq 0(%rdi),%r8 - movq 8(%rdi),%r9 - movq 16(%rdi),%r10 - movq 24(%rdi),%r11 - - - - - - - - - addq $80,%rdi - - addq $64,%rsi - leaq 296(%rsp),%rcx - - call MULADD_128x512 - - - movq 384(%rsp),%rax - - - addq -16(%rdi),%r8 - adcq -8(%rdi),%r9 - movq %r8,64(%rcx) - movq %r9,72(%rcx) - - adcq %rax,%rax - movq %rax,384(%rsp) - - leaq 192(%rsp),%rdi - addq $64,%rsi - - - - - - movq (%rsi),%r8 - movq 8(%rsi),%rbx - - movq (%rcx),%rax - mulq %r8 - movq %rax,%rbp - movq %rdx,%r9 - - movq 8(%rcx),%rax - mulq %r8 - addq %rax,%r9 - - movq (%rcx),%rax - mulq %rbx - addq %rax,%r9 - - movq %r9,8(%rdi) - - - subq $192,%rsi - - movq (%rcx),%r8 - movq 8(%rcx),%r9 - - call MULADD_128x512 - - - - - - movq 0(%rsi),%rax - movq 8(%rsi),%rbx - movq 16(%rsi),%rdi - movq 24(%rsi),%rdx - - - movq 384(%rsp),%rbp - - addq 64(%rcx),%r8 - adcq 72(%rcx),%r9 - - - adcq %rbp,%rbp - - - - shlq $3,%rbp - movq 32(%rsp),%rcx - addq %rcx,%rbp - - - xorq %rsi,%rsi - - addq 0(%rbp),%r10 - adcq 64(%rbp),%r11 - adcq 128(%rbp),%r12 - adcq 192(%rbp),%r13 - adcq 256(%rbp),%r14 - adcq 320(%rbp),%r15 - adcq 384(%rbp),%r8 - adcq 448(%rbp),%r9 - - - - sbbq $0,%rsi - - - andq %rsi,%rax - andq %rsi,%rbx - andq %rsi,%rdi - andq %rsi,%rdx - - movq $1,%rbp - subq %rax,%r10 - sbbq %rbx,%r11 - sbbq %rdi,%r12 - sbbq %rdx,%r13 - - - - - sbbq $0,%rbp - - - - addq $512,%rcx - movq 32(%rcx),%rax - movq 40(%rcx),%rbx - movq 48(%rcx),%rdi - movq 56(%rcx),%rdx - - - - andq %rsi,%rax - andq %rsi,%rbx - andq %rsi,%rdi - andq %rsi,%rdx - - - - subq $1,%rbp - - sbbq %rax,%r14 - sbbq %rbx,%r15 - sbbq %rdi,%r8 - sbbq %rdx,%r9 - - - - movq 144(%rsp),%rsi - movq %r10,0(%rsi) - movq %r11,8(%rsi) - movq %r12,16(%rsi) - movq %r13,24(%rsi) - movq %r14,32(%rsi) - movq %r15,40(%rsi) - movq %r8,48(%rsi) - movq %r9,56(%rsi) - - .byte 0xf3,0xc3 - - -.p2align 4 -mont_mul_a3b: - - - - - movq 0(%rdi),%rbp - - movq %r10,%rax - mulq %rbp - movq %rax,520(%rsp) - movq %rdx,%r10 - movq %r11,%rax - mulq %rbp - addq %rax,%r10 - adcq $0,%rdx - movq %rdx,%r11 - movq %r12,%rax - mulq %rbp - addq %rax,%r11 - adcq $0,%rdx - movq %rdx,%r12 - movq %r13,%rax - mulq %rbp - addq %rax,%r12 - adcq $0,%rdx - movq %rdx,%r13 - movq %r14,%rax - mulq %rbp - addq %rax,%r13 - adcq $0,%rdx - movq %rdx,%r14 - movq %r15,%rax - mulq %rbp - addq %rax,%r14 - adcq $0,%rdx - movq %rdx,%r15 - movq %r8,%rax - mulq %rbp - addq %rax,%r15 - adcq $0,%rdx - movq %rdx,%r8 - movq %r9,%rax - mulq %rbp - addq %rax,%r8 - adcq $0,%rdx - movq %rdx,%r9 - movq 8(%rdi),%rbp - movq 0(%rsi),%rax - mulq %rbp - addq %rax,%r10 - adcq $0,%rdx - movq %r10,528(%rsp) - movq %rdx,%rbx - - movq 8(%rsi),%rax - mulq %rbp - addq %rax,%r11 - adcq $0,%rdx - addq %rbx,%r11 - adcq $0,%rdx - movq %rdx,%rbx - - movq 16(%rsi),%rax - mulq %rbp - addq %rax,%r12 - adcq $0,%rdx - addq %rbx,%r12 - adcq $0,%rdx - movq %rdx,%rbx - - movq 24(%rsi),%rax - mulq %rbp - addq %rax,%r13 - adcq $0,%rdx - addq %rbx,%r13 - adcq $0,%rdx - movq %rdx,%rbx - - movq 32(%rsi),%rax - mulq %rbp - addq %rax,%r14 - adcq $0,%rdx - addq %rbx,%r14 - adcq $0,%rdx - movq %rdx,%rbx - - movq 40(%rsi),%rax - mulq %rbp - addq %rax,%r15 - adcq $0,%rdx - addq %rbx,%r15 - adcq $0,%rdx - movq %rdx,%rbx - - movq 48(%rsi),%rax - mulq %rbp - addq %rax,%r8 - adcq $0,%rdx - addq %rbx,%r8 - adcq $0,%rdx - movq %rdx,%rbx - - movq 56(%rsi),%rax - mulq %rbp - addq %rax,%r9 - adcq $0,%rdx - addq %rbx,%r9 - adcq $0,%rdx - movq %rdx,%r10 - movq 16(%rdi),%rbp - movq 0(%rsi),%rax - mulq %rbp - addq %rax,%r11 - adcq $0,%rdx - movq %r11,536(%rsp) - movq %rdx,%rbx - - movq 8(%rsi),%rax - mulq %rbp - addq %rax,%r12 - adcq $0,%rdx - addq %rbx,%r12 - adcq $0,%rdx - movq %rdx,%rbx - - movq 16(%rsi),%rax - mulq %rbp - addq %rax,%r13 - adcq $0,%rdx - addq %rbx,%r13 - adcq $0,%rdx - movq %rdx,%rbx - - movq 24(%rsi),%rax - mulq %rbp - addq %rax,%r14 - adcq $0,%rdx - addq %rbx,%r14 - adcq $0,%rdx - movq %rdx,%rbx - - movq 32(%rsi),%rax - mulq %rbp - addq %rax,%r15 - adcq $0,%rdx - addq %rbx,%r15 - adcq $0,%rdx - movq %rdx,%rbx - - movq 40(%rsi),%rax - mulq %rbp - addq %rax,%r8 - adcq $0,%rdx - addq %rbx,%r8 - adcq $0,%rdx - movq %rdx,%rbx - - movq 48(%rsi),%rax - mulq %rbp - addq %rax,%r9 - adcq $0,%rdx - addq %rbx,%r9 - adcq $0,%rdx - movq %rdx,%rbx - - movq 56(%rsi),%rax - mulq %rbp - addq %rax,%r10 - adcq $0,%rdx - addq %rbx,%r10 - adcq $0,%rdx - movq %rdx,%r11 - movq 24(%rdi),%rbp - movq 0(%rsi),%rax - mulq %rbp - addq %rax,%r12 - adcq $0,%rdx - movq %r12,544(%rsp) - movq %rdx,%rbx - - movq 8(%rsi),%rax - mulq %rbp - addq %rax,%r13 - adcq $0,%rdx - addq %rbx,%r13 - adcq $0,%rdx - movq %rdx,%rbx - - movq 16(%rsi),%rax - mulq %rbp - addq %rax,%r14 - adcq $0,%rdx - addq %rbx,%r14 - adcq $0,%rdx - movq %rdx,%rbx - - movq 24(%rsi),%rax - mulq %rbp - addq %rax,%r15 - adcq $0,%rdx - addq %rbx,%r15 - adcq $0,%rdx - movq %rdx,%rbx - - movq 32(%rsi),%rax - mulq %rbp - addq %rax,%r8 - adcq $0,%rdx - addq %rbx,%r8 - adcq $0,%rdx - movq %rdx,%rbx - - movq 40(%rsi),%rax - mulq %rbp - addq %rax,%r9 - adcq $0,%rdx - addq %rbx,%r9 - adcq $0,%rdx - movq %rdx,%rbx - - movq 48(%rsi),%rax - mulq %rbp - addq %rax,%r10 - adcq $0,%rdx - addq %rbx,%r10 - adcq $0,%rdx - movq %rdx,%rbx - - movq 56(%rsi),%rax - mulq %rbp - addq %rax,%r11 - adcq $0,%rdx - addq %rbx,%r11 - adcq $0,%rdx - movq %rdx,%r12 - movq 32(%rdi),%rbp - movq 0(%rsi),%rax - mulq %rbp - addq %rax,%r13 - adcq $0,%rdx - movq %r13,552(%rsp) - movq %rdx,%rbx - - movq 8(%rsi),%rax - mulq %rbp - addq %rax,%r14 - adcq $0,%rdx - addq %rbx,%r14 - adcq $0,%rdx - movq %rdx,%rbx - - movq 16(%rsi),%rax - mulq %rbp - addq %rax,%r15 - adcq $0,%rdx - addq %rbx,%r15 - adcq $0,%rdx - movq %rdx,%rbx - - movq 24(%rsi),%rax - mulq %rbp - addq %rax,%r8 - adcq $0,%rdx - addq %rbx,%r8 - adcq $0,%rdx - movq %rdx,%rbx - - movq 32(%rsi),%rax - mulq %rbp - addq %rax,%r9 - adcq $0,%rdx - addq %rbx,%r9 - adcq $0,%rdx - movq %rdx,%rbx - - movq 40(%rsi),%rax - mulq %rbp - addq %rax,%r10 - adcq $0,%rdx - addq %rbx,%r10 - adcq $0,%rdx - movq %rdx,%rbx - - movq 48(%rsi),%rax - mulq %rbp - addq %rax,%r11 - adcq $0,%rdx - addq %rbx,%r11 - adcq $0,%rdx - movq %rdx,%rbx - - movq 56(%rsi),%rax - mulq %rbp - addq %rax,%r12 - adcq $0,%rdx - addq %rbx,%r12 - adcq $0,%rdx - movq %rdx,%r13 - movq 40(%rdi),%rbp - movq 0(%rsi),%rax - mulq %rbp - addq %rax,%r14 - adcq $0,%rdx - movq %r14,560(%rsp) - movq %rdx,%rbx - - movq 8(%rsi),%rax - mulq %rbp - addq %rax,%r15 - adcq $0,%rdx - addq %rbx,%r15 - adcq $0,%rdx - movq %rdx,%rbx - - movq 16(%rsi),%rax - mulq %rbp - addq %rax,%r8 - adcq $0,%rdx - addq %rbx,%r8 - adcq $0,%rdx - movq %rdx,%rbx - - movq 24(%rsi),%rax - mulq %rbp - addq %rax,%r9 - adcq $0,%rdx - addq %rbx,%r9 - adcq $0,%rdx - movq %rdx,%rbx - - movq 32(%rsi),%rax - mulq %rbp - addq %rax,%r10 - adcq $0,%rdx - addq %rbx,%r10 - adcq $0,%rdx - movq %rdx,%rbx - - movq 40(%rsi),%rax - mulq %rbp - addq %rax,%r11 - adcq $0,%rdx - addq %rbx,%r11 - adcq $0,%rdx - movq %rdx,%rbx - - movq 48(%rsi),%rax - mulq %rbp - addq %rax,%r12 - adcq $0,%rdx - addq %rbx,%r12 - adcq $0,%rdx - movq %rdx,%rbx - - movq 56(%rsi),%rax - mulq %rbp - addq %rax,%r13 - adcq $0,%rdx - addq %rbx,%r13 - adcq $0,%rdx - movq %rdx,%r14 - movq 48(%rdi),%rbp - movq 0(%rsi),%rax - mulq %rbp - addq %rax,%r15 - adcq $0,%rdx - movq %r15,568(%rsp) - movq %rdx,%rbx - - movq 8(%rsi),%rax - mulq %rbp - addq %rax,%r8 - adcq $0,%rdx - addq %rbx,%r8 - adcq $0,%rdx - movq %rdx,%rbx - - movq 16(%rsi),%rax - mulq %rbp - addq %rax,%r9 - adcq $0,%rdx - addq %rbx,%r9 - adcq $0,%rdx - movq %rdx,%rbx - - movq 24(%rsi),%rax - mulq %rbp - addq %rax,%r10 - adcq $0,%rdx - addq %rbx,%r10 - adcq $0,%rdx - movq %rdx,%rbx - - movq 32(%rsi),%rax - mulq %rbp - addq %rax,%r11 - adcq $0,%rdx - addq %rbx,%r11 - adcq $0,%rdx - movq %rdx,%rbx - - movq 40(%rsi),%rax - mulq %rbp - addq %rax,%r12 - adcq $0,%rdx - addq %rbx,%r12 - adcq $0,%rdx - movq %rdx,%rbx - - movq 48(%rsi),%rax - mulq %rbp - addq %rax,%r13 - adcq $0,%rdx - addq %rbx,%r13 - adcq $0,%rdx - movq %rdx,%rbx - - movq 56(%rsi),%rax - mulq %rbp - addq %rax,%r14 - adcq $0,%rdx - addq %rbx,%r14 - adcq $0,%rdx - movq %rdx,%r15 - movq 56(%rdi),%rbp - movq 0(%rsi),%rax - mulq %rbp - addq %rax,%r8 - adcq $0,%rdx - movq %r8,576(%rsp) - movq %rdx,%rbx - - movq 8(%rsi),%rax - mulq %rbp - addq %rax,%r9 - adcq $0,%rdx - addq %rbx,%r9 - adcq $0,%rdx - movq %rdx,%rbx - - movq 16(%rsi),%rax - mulq %rbp - addq %rax,%r10 - adcq $0,%rdx - addq %rbx,%r10 - adcq $0,%rdx - movq %rdx,%rbx - - movq 24(%rsi),%rax - mulq %rbp - addq %rax,%r11 - adcq $0,%rdx - addq %rbx,%r11 - adcq $0,%rdx - movq %rdx,%rbx - - movq 32(%rsi),%rax - mulq %rbp - addq %rax,%r12 - adcq $0,%rdx - addq %rbx,%r12 - adcq $0,%rdx - movq %rdx,%rbx - - movq 40(%rsi),%rax - mulq %rbp - addq %rax,%r13 - adcq $0,%rdx - addq %rbx,%r13 - adcq $0,%rdx - movq %rdx,%rbx - - movq 48(%rsi),%rax - mulq %rbp - addq %rax,%r14 - adcq $0,%rdx - addq %rbx,%r14 - adcq $0,%rdx - movq %rdx,%rbx - - movq 56(%rsi),%rax - mulq %rbp - addq %rax,%r15 - adcq $0,%rdx - addq %rbx,%r15 - adcq $0,%rdx - movq %rdx,%r8 - movq %r9,584(%rsp) - movq %r10,592(%rsp) - movq %r11,600(%rsp) - movq %r12,608(%rsp) - movq %r13,616(%rsp) - movq %r14,624(%rsp) - movq %r15,632(%rsp) - movq %r8,640(%rsp) - - - - - - jmp mont_reduce - - - - -.p2align 4 -sqr_reduce: - movq 16(%rsp),%rcx - - - - movq %r10,%rbx - - movq %r11,%rax - mulq %rbx - movq %rax,528(%rsp) - movq %rdx,%r10 - movq %r12,%rax - mulq %rbx - addq %rax,%r10 - adcq $0,%rdx - movq %rdx,%r11 - movq %r13,%rax - mulq %rbx - addq %rax,%r11 - adcq $0,%rdx - movq %rdx,%r12 - movq %r14,%rax - mulq %rbx - addq %rax,%r12 - adcq $0,%rdx - movq %rdx,%r13 - movq %r15,%rax - mulq %rbx - addq %rax,%r13 - adcq $0,%rdx - movq %rdx,%r14 - movq %r8,%rax - mulq %rbx - addq %rax,%r14 - adcq $0,%rdx - movq %rdx,%r15 - movq %r9,%rax - mulq %rbx - addq %rax,%r15 - adcq $0,%rdx - movq %rdx,%rsi - - movq %r10,536(%rsp) - - - - - - movq 8(%rcx),%rbx - - movq 16(%rcx),%rax - mulq %rbx - addq %rax,%r11 - adcq $0,%rdx - movq %r11,544(%rsp) - - movq %rdx,%r10 - movq 24(%rcx),%rax - mulq %rbx - addq %rax,%r12 - adcq $0,%rdx - addq %r10,%r12 - adcq $0,%rdx - movq %r12,552(%rsp) - - movq %rdx,%r10 - movq 32(%rcx),%rax - mulq %rbx - addq %rax,%r13 - adcq $0,%rdx - addq %r10,%r13 - adcq $0,%rdx - - movq %rdx,%r10 - movq 40(%rcx),%rax - mulq %rbx - addq %rax,%r14 - adcq $0,%rdx - addq %r10,%r14 - adcq $0,%rdx - - movq %rdx,%r10 - movq %r8,%rax - mulq %rbx - addq %rax,%r15 - adcq $0,%rdx - addq %r10,%r15 - adcq $0,%rdx - - movq %rdx,%r10 - movq %r9,%rax - mulq %rbx - addq %rax,%rsi - adcq $0,%rdx - addq %r10,%rsi - adcq $0,%rdx - - movq %rdx,%r11 - - - - - movq 16(%rcx),%rbx - - movq 24(%rcx),%rax - mulq %rbx - addq %rax,%r13 - adcq $0,%rdx - movq %r13,560(%rsp) - - movq %rdx,%r10 - movq 32(%rcx),%rax - mulq %rbx - addq %rax,%r14 - adcq $0,%rdx - addq %r10,%r14 - adcq $0,%rdx - movq %r14,568(%rsp) - - movq %rdx,%r10 - movq 40(%rcx),%rax - mulq %rbx - addq %rax,%r15 - adcq $0,%rdx - addq %r10,%r15 - adcq $0,%rdx - - movq %rdx,%r10 - movq %r8,%rax - mulq %rbx - addq %rax,%rsi - adcq $0,%rdx - addq %r10,%rsi - adcq $0,%rdx - - movq %rdx,%r10 - movq %r9,%rax - mulq %rbx - addq %rax,%r11 - adcq $0,%rdx - addq %r10,%r11 - adcq $0,%rdx - - movq %rdx,%r12 - - - - - - movq 24(%rcx),%rbx - - movq 32(%rcx),%rax - mulq %rbx - addq %rax,%r15 - adcq $0,%rdx - movq %r15,576(%rsp) - - movq %rdx,%r10 - movq 40(%rcx),%rax - mulq %rbx - addq %rax,%rsi - adcq $0,%rdx - addq %r10,%rsi - adcq $0,%rdx - movq %rsi,584(%rsp) - - movq %rdx,%r10 - movq %r8,%rax - mulq %rbx - addq %rax,%r11 - adcq $0,%rdx - addq %r10,%r11 - adcq $0,%rdx - - movq %rdx,%r10 - movq %r9,%rax - mulq %rbx - addq %rax,%r12 - adcq $0,%rdx - addq %r10,%r12 - adcq $0,%rdx - - movq %rdx,%r15 - - - - - movq 32(%rcx),%rbx - - movq 40(%rcx),%rax - mulq %rbx - addq %rax,%r11 - adcq $0,%rdx - movq %r11,592(%rsp) - - movq %rdx,%r10 - movq %r8,%rax - mulq %rbx - addq %rax,%r12 - adcq $0,%rdx - addq %r10,%r12 - adcq $0,%rdx - movq %r12,600(%rsp) - - movq %rdx,%r10 - movq %r9,%rax - mulq %rbx - addq %rax,%r15 - adcq $0,%rdx - addq %r10,%r15 - adcq $0,%rdx - - movq %rdx,%r11 - - - - - movq 40(%rcx),%rbx - - movq %r8,%rax - mulq %rbx - addq %rax,%r15 - adcq $0,%rdx - movq %r15,608(%rsp) - - movq %rdx,%r10 - movq %r9,%rax - mulq %rbx - addq %rax,%r11 - adcq $0,%rdx - addq %r10,%r11 - adcq $0,%rdx - movq %r11,616(%rsp) - - movq %rdx,%r12 - - - - - movq %r8,%rbx - - movq %r9,%rax - mulq %rbx - addq %rax,%r12 - adcq $0,%rdx - movq %r12,624(%rsp) - - movq %rdx,632(%rsp) - - - movq 528(%rsp),%r10 - movq 536(%rsp),%r11 - movq 544(%rsp),%r12 - movq 552(%rsp),%r13 - movq 560(%rsp),%r14 - movq 568(%rsp),%r15 - - movq 24(%rcx),%rax - mulq %rax - movq %rax,%rdi - movq %rdx,%r8 - - addq %r10,%r10 - adcq %r11,%r11 - adcq %r12,%r12 - adcq %r13,%r13 - adcq %r14,%r14 - adcq %r15,%r15 - adcq $0,%r8 - - movq 0(%rcx),%rax - mulq %rax - movq %rax,520(%rsp) - movq %rdx,%rbx - - movq 8(%rcx),%rax - mulq %rax - - addq %rbx,%r10 - adcq %rax,%r11 - adcq $0,%rdx - - movq %rdx,%rbx - movq %r10,528(%rsp) - movq %r11,536(%rsp) - - movq 16(%rcx),%rax - mulq %rax - - addq %rbx,%r12 - adcq %rax,%r13 - adcq $0,%rdx - - movq %rdx,%rbx - - movq %r12,544(%rsp) - movq %r13,552(%rsp) - - xorq %rbp,%rbp - addq %rbx,%r14 - adcq %rdi,%r15 - adcq $0,%rbp - - movq %r14,560(%rsp) - movq %r15,568(%rsp) - - - - - movq 576(%rsp),%r10 - movq 584(%rsp),%r11 - movq 592(%rsp),%r12 - movq 600(%rsp),%r13 - movq 608(%rsp),%r14 - movq 616(%rsp),%r15 - movq 624(%rsp),%rdi - movq 632(%rsp),%rsi - - movq %r9,%rax - mulq %rax - movq %rax,%r9 - movq %rdx,%rbx - - addq %r10,%r10 - adcq %r11,%r11 - adcq %r12,%r12 - adcq %r13,%r13 - adcq %r14,%r14 - adcq %r15,%r15 - adcq %rdi,%rdi - adcq %rsi,%rsi - adcq $0,%rbx - - addq %rbp,%r10 - - movq 32(%rcx),%rax - mulq %rax - - addq %r8,%r10 - adcq %rax,%r11 - adcq $0,%rdx - - movq %rdx,%rbp - - movq %r10,576(%rsp) - movq %r11,584(%rsp) - - movq 40(%rcx),%rax - mulq %rax - - addq %rbp,%r12 - adcq %rax,%r13 - adcq $0,%rdx - - movq %rdx,%rbp - - movq %r12,592(%rsp) - movq %r13,600(%rsp) - - movq 48(%rcx),%rax - mulq %rax - - addq %rbp,%r14 - adcq %rax,%r15 - adcq $0,%rdx - - movq %r14,608(%rsp) - movq %r15,616(%rsp) - - addq %rdx,%rdi - adcq %r9,%rsi - adcq $0,%rbx - - movq %rdi,624(%rsp) - movq %rsi,632(%rsp) - movq %rbx,640(%rsp) - - jmp mont_reduce - - - -.globl _mod_exp_512 - -_mod_exp_512: - pushq %rbp - pushq %rbx - pushq %r12 - pushq %r13 - pushq %r14 - pushq %r15 - - - movq %rsp,%r8 - subq $2688,%rsp - andq $-64,%rsp - - - movq %r8,0(%rsp) - movq %rdi,8(%rsp) - movq %rsi,16(%rsp) - movq %rcx,24(%rsp) -L$body: - - - - pxor %xmm4,%xmm4 - movdqu 0(%rsi),%xmm0 - movdqu 16(%rsi),%xmm1 - movdqu 32(%rsi),%xmm2 - movdqu 48(%rsi),%xmm3 - movdqa %xmm4,512(%rsp) - movdqa %xmm4,528(%rsp) - movdqa %xmm4,608(%rsp) - movdqa %xmm4,624(%rsp) - movdqa %xmm0,544(%rsp) - movdqa %xmm1,560(%rsp) - movdqa %xmm2,576(%rsp) - movdqa %xmm3,592(%rsp) - - - movdqu 0(%rdx),%xmm0 - movdqu 16(%rdx),%xmm1 - movdqu 32(%rdx),%xmm2 - movdqu 48(%rdx),%xmm3 - - leaq 384(%rsp),%rbx - movq %rbx,136(%rsp) - call mont_reduce - - - leaq 448(%rsp),%rcx - xorq %rax,%rax - movq %rax,0(%rcx) - movq %rax,8(%rcx) - movq %rax,24(%rcx) - movq %rax,32(%rcx) - movq %rax,40(%rcx) - movq %rax,48(%rcx) - movq %rax,56(%rcx) - movq %rax,128(%rsp) - movq $1,16(%rcx) - - leaq 640(%rsp),%rbp - movq %rcx,%rsi - movq %rbp,%rdi - movq $8,%rax -loop_0: - movq (%rcx),%rbx - movw %bx,(%rdi) - shrq $16,%rbx - movw %bx,64(%rdi) - shrq $16,%rbx - movw %bx,128(%rdi) - shrq $16,%rbx - movw %bx,192(%rdi) - leaq 8(%rcx),%rcx - leaq 256(%rdi),%rdi - decq %rax - jnz loop_0 - movq $31,%rax - movq %rax,32(%rsp) - movq %rbp,40(%rsp) - - movq %rsi,136(%rsp) - movq 0(%rsi),%r10 - movq 8(%rsi),%r11 - movq 16(%rsi),%r12 - movq 24(%rsi),%r13 - movq 32(%rsi),%r14 - movq 40(%rsi),%r15 - movq 48(%rsi),%r8 - movq 56(%rsi),%r9 -init_loop: - leaq 384(%rsp),%rdi - call mont_mul_a3b - leaq 448(%rsp),%rsi - movq 40(%rsp),%rbp - addq $2,%rbp - movq %rbp,40(%rsp) - movq %rsi,%rcx - movq $8,%rax -loop_1: - movq (%rcx),%rbx - movw %bx,(%rbp) - shrq $16,%rbx - movw %bx,64(%rbp) - shrq $16,%rbx - movw %bx,128(%rbp) - shrq $16,%rbx - movw %bx,192(%rbp) - leaq 8(%rcx),%rcx - leaq 256(%rbp),%rbp - decq %rax - jnz loop_1 - movq 32(%rsp),%rax - subq $1,%rax - movq %rax,32(%rsp) - jne init_loop - - - - movdqa %xmm0,64(%rsp) - movdqa %xmm1,80(%rsp) - movdqa %xmm2,96(%rsp) - movdqa %xmm3,112(%rsp) - - - - - - movl 126(%rsp),%eax - movq %rax,%rdx - shrq $11,%rax - andl $2047,%edx - movl %edx,126(%rsp) - leaq 640(%rsp,%rax,2),%rsi - movq 8(%rsp),%rdx - movq $4,%rbp -loop_2: - movzwq 192(%rsi),%rbx - movzwq 448(%rsi),%rax - shlq $16,%rbx - shlq $16,%rax - movw 128(%rsi),%bx - movw 384(%rsi),%ax - shlq $16,%rbx - shlq $16,%rax - movw 64(%rsi),%bx - movw 320(%rsi),%ax - shlq $16,%rbx - shlq $16,%rax - movw 0(%rsi),%bx - movw 256(%rsi),%ax - movq %rbx,0(%rdx) - movq %rax,8(%rdx) - leaq 512(%rsi),%rsi - leaq 16(%rdx),%rdx - subq $1,%rbp - jnz loop_2 - movq $505,48(%rsp) - - movq 8(%rsp),%rcx - movq %rcx,136(%rsp) - movq 0(%rcx),%r10 - movq 8(%rcx),%r11 - movq 16(%rcx),%r12 - movq 24(%rcx),%r13 - movq 32(%rcx),%r14 - movq 40(%rcx),%r15 - movq 48(%rcx),%r8 - movq 56(%rcx),%r9 - jmp sqr_2 - -main_loop_a3b: - call sqr_reduce - call sqr_reduce - call sqr_reduce -sqr_2: - call sqr_reduce - call sqr_reduce - - - - movq 48(%rsp),%rcx - movq %rcx,%rax - shrq $4,%rax - movl 64(%rsp,%rax,2),%edx - andq $15,%rcx - shrq %cl,%rdx - andq $31,%rdx - - leaq 640(%rsp,%rdx,2),%rsi - leaq 448(%rsp),%rdx - movq %rdx,%rdi - movq $4,%rbp -loop_3: - movzwq 192(%rsi),%rbx - movzwq 448(%rsi),%rax - shlq $16,%rbx - shlq $16,%rax - movw 128(%rsi),%bx - movw 384(%rsi),%ax - shlq $16,%rbx - shlq $16,%rax - movw 64(%rsi),%bx - movw 320(%rsi),%ax - shlq $16,%rbx - shlq $16,%rax - movw 0(%rsi),%bx - movw 256(%rsi),%ax - movq %rbx,0(%rdx) - movq %rax,8(%rdx) - leaq 512(%rsi),%rsi - leaq 16(%rdx),%rdx - subq $1,%rbp - jnz loop_3 - movq 8(%rsp),%rsi - call mont_mul_a3b - - - - movq 48(%rsp),%rcx - subq $5,%rcx - movq %rcx,48(%rsp) - jge main_loop_a3b - - - -end_main_loop_a3b: - - - movq 8(%rsp),%rdx - pxor %xmm4,%xmm4 - movdqu 0(%rdx),%xmm0 - movdqu 16(%rdx),%xmm1 - movdqu 32(%rdx),%xmm2 - movdqu 48(%rdx),%xmm3 - movdqa %xmm4,576(%rsp) - movdqa %xmm4,592(%rsp) - movdqa %xmm4,608(%rsp) - movdqa %xmm4,624(%rsp) - movdqa %xmm0,512(%rsp) - movdqa %xmm1,528(%rsp) - movdqa %xmm2,544(%rsp) - movdqa %xmm3,560(%rsp) - call mont_reduce - - - - movq 8(%rsp),%rax - movq 0(%rax),%r8 - movq 8(%rax),%r9 - movq 16(%rax),%r10 - movq 24(%rax),%r11 - movq 32(%rax),%r12 - movq 40(%rax),%r13 - movq 48(%rax),%r14 - movq 56(%rax),%r15 - - - movq 24(%rsp),%rbx - addq $512,%rbx - - subq 0(%rbx),%r8 - sbbq 8(%rbx),%r9 - sbbq 16(%rbx),%r10 - sbbq 24(%rbx),%r11 - sbbq 32(%rbx),%r12 - sbbq 40(%rbx),%r13 - sbbq 48(%rbx),%r14 - sbbq 56(%rbx),%r15 - - - movq 0(%rax),%rsi - movq 8(%rax),%rdi - movq 16(%rax),%rcx - movq 24(%rax),%rdx - cmovncq %r8,%rsi - cmovncq %r9,%rdi - cmovncq %r10,%rcx - cmovncq %r11,%rdx - movq %rsi,0(%rax) - movq %rdi,8(%rax) - movq %rcx,16(%rax) - movq %rdx,24(%rax) - - movq 32(%rax),%rsi - movq 40(%rax),%rdi - movq 48(%rax),%rcx - movq 56(%rax),%rdx - cmovncq %r12,%rsi - cmovncq %r13,%rdi - cmovncq %r14,%rcx - cmovncq %r15,%rdx - movq %rsi,32(%rax) - movq %rdi,40(%rax) - movq %rcx,48(%rax) - movq %rdx,56(%rax) - - movq 0(%rsp),%rsi - movq 0(%rsi),%r15 - movq 8(%rsi),%r14 - movq 16(%rsi),%r13 - movq 24(%rsi),%r12 - movq 32(%rsi),%rbx - movq 40(%rsi),%rbp - leaq 48(%rsi),%rsp -L$epilogue: - .byte 0xf3,0xc3 diff --git a/deps/openssl/asm/x64-macosx-gas/bn/rsaz-avx2.s b/deps/openssl/asm/x64-macosx-gas/bn/rsaz-avx2.s new file mode 100644 index 00000000000000..1819757f0b4f4f --- /dev/null +++ b/deps/openssl/asm/x64-macosx-gas/bn/rsaz-avx2.s @@ -0,0 +1,1632 @@ +.text + +.globl _rsaz_1024_sqr_avx2 + +.p2align 6 +_rsaz_1024_sqr_avx2: + leaq (%rsp),%rax + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + vzeroupper + movq %rax,%rbp + movq %rdx,%r13 + subq $832,%rsp + movq %r13,%r15 + subq $-128,%rdi + subq $-128,%rsi + subq $-128,%r13 + + andq $4095,%r15 + addq $320,%r15 + shrq $12,%r15 + vpxor %ymm9,%ymm9,%ymm9 + jz L$sqr_1024_no_n_copy + + + + + + subq $320,%rsp + vmovdqu 0-128(%r13),%ymm0 + andq $-2048,%rsp + vmovdqu 32-128(%r13),%ymm1 + vmovdqu 64-128(%r13),%ymm2 + vmovdqu 96-128(%r13),%ymm3 + vmovdqu 128-128(%r13),%ymm4 + vmovdqu 160-128(%r13),%ymm5 + vmovdqu 192-128(%r13),%ymm6 + vmovdqu 224-128(%r13),%ymm7 + vmovdqu 256-128(%r13),%ymm8 + leaq 832+128(%rsp),%r13 + vmovdqu %ymm0,0-128(%r13) + vmovdqu %ymm1,32-128(%r13) + vmovdqu %ymm2,64-128(%r13) + vmovdqu %ymm3,96-128(%r13) + vmovdqu %ymm4,128-128(%r13) + vmovdqu %ymm5,160-128(%r13) + vmovdqu %ymm6,192-128(%r13) + vmovdqu %ymm7,224-128(%r13) + vmovdqu %ymm8,256-128(%r13) + vmovdqu %ymm9,288-128(%r13) + +L$sqr_1024_no_n_copy: + andq $-1024,%rsp + + vmovdqu 32-128(%rsi),%ymm1 + vmovdqu 64-128(%rsi),%ymm2 + vmovdqu 96-128(%rsi),%ymm3 + vmovdqu 128-128(%rsi),%ymm4 + vmovdqu 160-128(%rsi),%ymm5 + vmovdqu 192-128(%rsi),%ymm6 + vmovdqu 224-128(%rsi),%ymm7 + vmovdqu 256-128(%rsi),%ymm8 + + leaq 192(%rsp),%rbx + vpbroadcastq L$and_mask(%rip),%ymm15 + jmp L$OOP_GRANDE_SQR_1024 + +.p2align 5 +L$OOP_GRANDE_SQR_1024: + leaq 576+128(%rsp),%r9 + leaq 448(%rsp),%r12 + + + + + vpaddq %ymm1,%ymm1,%ymm1 + vpbroadcastq 0-128(%rsi),%ymm10 + vpaddq %ymm2,%ymm2,%ymm2 + vmovdqa %ymm1,0-128(%r9) + vpaddq %ymm3,%ymm3,%ymm3 + vmovdqa %ymm2,32-128(%r9) + vpaddq %ymm4,%ymm4,%ymm4 + vmovdqa %ymm3,64-128(%r9) + vpaddq %ymm5,%ymm5,%ymm5 + vmovdqa %ymm4,96-128(%r9) + vpaddq %ymm6,%ymm6,%ymm6 + vmovdqa %ymm5,128-128(%r9) + vpaddq %ymm7,%ymm7,%ymm7 + vmovdqa %ymm6,160-128(%r9) + vpaddq %ymm8,%ymm8,%ymm8 + vmovdqa %ymm7,192-128(%r9) + vpxor %ymm9,%ymm9,%ymm9 + vmovdqa %ymm8,224-128(%r9) + + vpmuludq 0-128(%rsi),%ymm10,%ymm0 + vpbroadcastq 32-128(%rsi),%ymm11 + vmovdqu %ymm9,288-192(%rbx) + vpmuludq %ymm10,%ymm1,%ymm1 + vmovdqu %ymm9,320-448(%r12) + vpmuludq %ymm10,%ymm2,%ymm2 + vmovdqu %ymm9,352-448(%r12) + vpmuludq %ymm10,%ymm3,%ymm3 + vmovdqu %ymm9,384-448(%r12) + vpmuludq %ymm10,%ymm4,%ymm4 + vmovdqu %ymm9,416-448(%r12) + vpmuludq %ymm10,%ymm5,%ymm5 + vmovdqu %ymm9,448-448(%r12) + vpmuludq %ymm10,%ymm6,%ymm6 + vmovdqu %ymm9,480-448(%r12) + vpmuludq %ymm10,%ymm7,%ymm7 + vmovdqu %ymm9,512-448(%r12) + vpmuludq %ymm10,%ymm8,%ymm8 + vpbroadcastq 64-128(%rsi),%ymm10 + vmovdqu %ymm9,544-448(%r12) + + movq %rsi,%r15 + movl $4,%r14d + jmp L$sqr_entry_1024 +.p2align 5 +L$OOP_SQR_1024: + vpbroadcastq 32-128(%r15),%ymm11 + vpmuludq 0-128(%rsi),%ymm10,%ymm0 + vpaddq 0-192(%rbx),%ymm0,%ymm0 + vpmuludq 0-128(%r9),%ymm10,%ymm1 + vpaddq 32-192(%rbx),%ymm1,%ymm1 + vpmuludq 32-128(%r9),%ymm10,%ymm2 + vpaddq 64-192(%rbx),%ymm2,%ymm2 + vpmuludq 64-128(%r9),%ymm10,%ymm3 + vpaddq 96-192(%rbx),%ymm3,%ymm3 + vpmuludq 96-128(%r9),%ymm10,%ymm4 + vpaddq 128-192(%rbx),%ymm4,%ymm4 + vpmuludq 128-128(%r9),%ymm10,%ymm5 + vpaddq 160-192(%rbx),%ymm5,%ymm5 + vpmuludq 160-128(%r9),%ymm10,%ymm6 + vpaddq 192-192(%rbx),%ymm6,%ymm6 + vpmuludq 192-128(%r9),%ymm10,%ymm7 + vpaddq 224-192(%rbx),%ymm7,%ymm7 + vpmuludq 224-128(%r9),%ymm10,%ymm8 + vpbroadcastq 64-128(%r15),%ymm10 + vpaddq 256-192(%rbx),%ymm8,%ymm8 +L$sqr_entry_1024: + vmovdqu %ymm0,0-192(%rbx) + vmovdqu %ymm1,32-192(%rbx) + + vpmuludq 32-128(%rsi),%ymm11,%ymm12 + vpaddq %ymm12,%ymm2,%ymm2 + vpmuludq 32-128(%r9),%ymm11,%ymm14 + vpaddq %ymm14,%ymm3,%ymm3 + vpmuludq 64-128(%r9),%ymm11,%ymm13 + vpaddq %ymm13,%ymm4,%ymm4 + vpmuludq 96-128(%r9),%ymm11,%ymm12 + vpaddq %ymm12,%ymm5,%ymm5 + vpmuludq 128-128(%r9),%ymm11,%ymm14 + vpaddq %ymm14,%ymm6,%ymm6 + vpmuludq 160-128(%r9),%ymm11,%ymm13 + vpaddq %ymm13,%ymm7,%ymm7 + vpmuludq 192-128(%r9),%ymm11,%ymm12 + vpaddq %ymm12,%ymm8,%ymm8 + vpmuludq 224-128(%r9),%ymm11,%ymm0 + vpbroadcastq 96-128(%r15),%ymm11 + vpaddq 288-192(%rbx),%ymm0,%ymm0 + + vmovdqu %ymm2,64-192(%rbx) + vmovdqu %ymm3,96-192(%rbx) + + vpmuludq 64-128(%rsi),%ymm10,%ymm13 + vpaddq %ymm13,%ymm4,%ymm4 + vpmuludq 64-128(%r9),%ymm10,%ymm12 + vpaddq %ymm12,%ymm5,%ymm5 + vpmuludq 96-128(%r9),%ymm10,%ymm14 + vpaddq %ymm14,%ymm6,%ymm6 + vpmuludq 128-128(%r9),%ymm10,%ymm13 + vpaddq %ymm13,%ymm7,%ymm7 + vpmuludq 160-128(%r9),%ymm10,%ymm12 + vpaddq %ymm12,%ymm8,%ymm8 + vpmuludq 192-128(%r9),%ymm10,%ymm14 + vpaddq %ymm14,%ymm0,%ymm0 + vpmuludq 224-128(%r9),%ymm10,%ymm1 + vpbroadcastq 128-128(%r15),%ymm10 + vpaddq 320-448(%r12),%ymm1,%ymm1 + + vmovdqu %ymm4,128-192(%rbx) + vmovdqu %ymm5,160-192(%rbx) + + vpmuludq 96-128(%rsi),%ymm11,%ymm12 + vpaddq %ymm12,%ymm6,%ymm6 + vpmuludq 96-128(%r9),%ymm11,%ymm14 + vpaddq %ymm14,%ymm7,%ymm7 + vpmuludq 128-128(%r9),%ymm11,%ymm13 + vpaddq %ymm13,%ymm8,%ymm8 + vpmuludq 160-128(%r9),%ymm11,%ymm12 + vpaddq %ymm12,%ymm0,%ymm0 + vpmuludq 192-128(%r9),%ymm11,%ymm14 + vpaddq %ymm14,%ymm1,%ymm1 + vpmuludq 224-128(%r9),%ymm11,%ymm2 + vpbroadcastq 160-128(%r15),%ymm11 + vpaddq 352-448(%r12),%ymm2,%ymm2 + + vmovdqu %ymm6,192-192(%rbx) + vmovdqu %ymm7,224-192(%rbx) + + vpmuludq 128-128(%rsi),%ymm10,%ymm12 + vpaddq %ymm12,%ymm8,%ymm8 + vpmuludq 128-128(%r9),%ymm10,%ymm14 + vpaddq %ymm14,%ymm0,%ymm0 + vpmuludq 160-128(%r9),%ymm10,%ymm13 + vpaddq %ymm13,%ymm1,%ymm1 + vpmuludq 192-128(%r9),%ymm10,%ymm12 + vpaddq %ymm12,%ymm2,%ymm2 + vpmuludq 224-128(%r9),%ymm10,%ymm3 + vpbroadcastq 192-128(%r15),%ymm10 + vpaddq 384-448(%r12),%ymm3,%ymm3 + + vmovdqu %ymm8,256-192(%rbx) + vmovdqu %ymm0,288-192(%rbx) + leaq 8(%rbx),%rbx + + vpmuludq 160-128(%rsi),%ymm11,%ymm13 + vpaddq %ymm13,%ymm1,%ymm1 + vpmuludq 160-128(%r9),%ymm11,%ymm12 + vpaddq %ymm12,%ymm2,%ymm2 + vpmuludq 192-128(%r9),%ymm11,%ymm14 + vpaddq %ymm14,%ymm3,%ymm3 + vpmuludq 224-128(%r9),%ymm11,%ymm4 + vpbroadcastq 224-128(%r15),%ymm11 + vpaddq 416-448(%r12),%ymm4,%ymm4 + + vmovdqu %ymm1,320-448(%r12) + vmovdqu %ymm2,352-448(%r12) + + vpmuludq 192-128(%rsi),%ymm10,%ymm12 + vpaddq %ymm12,%ymm3,%ymm3 + vpmuludq 192-128(%r9),%ymm10,%ymm14 + vpbroadcastq 256-128(%r15),%ymm0 + vpaddq %ymm14,%ymm4,%ymm4 + vpmuludq 224-128(%r9),%ymm10,%ymm5 + vpbroadcastq 0+8-128(%r15),%ymm10 + vpaddq 448-448(%r12),%ymm5,%ymm5 + + vmovdqu %ymm3,384-448(%r12) + vmovdqu %ymm4,416-448(%r12) + leaq 8(%r15),%r15 + + vpmuludq 224-128(%rsi),%ymm11,%ymm12 + vpaddq %ymm12,%ymm5,%ymm5 + vpmuludq 224-128(%r9),%ymm11,%ymm6 + vpaddq 480-448(%r12),%ymm6,%ymm6 + + vpmuludq 256-128(%rsi),%ymm0,%ymm7 + vmovdqu %ymm5,448-448(%r12) + vpaddq 512-448(%r12),%ymm7,%ymm7 + vmovdqu %ymm6,480-448(%r12) + vmovdqu %ymm7,512-448(%r12) + leaq 8(%r12),%r12 + + decl %r14d + jnz L$OOP_SQR_1024 + + vmovdqu 256(%rsp),%ymm8 + vmovdqu 288(%rsp),%ymm1 + vmovdqu 320(%rsp),%ymm2 + leaq 192(%rsp),%rbx + + vpsrlq $29,%ymm8,%ymm14 + vpand %ymm15,%ymm8,%ymm8 + vpsrlq $29,%ymm1,%ymm11 + vpand %ymm15,%ymm1,%ymm1 + + vpermq $147,%ymm14,%ymm14 + vpxor %ymm9,%ymm9,%ymm9 + vpermq $147,%ymm11,%ymm11 + + vpblendd $3,%ymm9,%ymm14,%ymm10 + vpblendd $3,%ymm14,%ymm11,%ymm14 + vpaddq %ymm10,%ymm8,%ymm8 + vpblendd $3,%ymm11,%ymm9,%ymm11 + vpaddq %ymm14,%ymm1,%ymm1 + vpaddq %ymm11,%ymm2,%ymm2 + vmovdqu %ymm1,288-192(%rbx) + vmovdqu %ymm2,320-192(%rbx) + + movq (%rsp),%rax + movq 8(%rsp),%r10 + movq 16(%rsp),%r11 + movq 24(%rsp),%r12 + vmovdqu 32(%rsp),%ymm1 + vmovdqu 64-192(%rbx),%ymm2 + vmovdqu 96-192(%rbx),%ymm3 + vmovdqu 128-192(%rbx),%ymm4 + vmovdqu 160-192(%rbx),%ymm5 + vmovdqu 192-192(%rbx),%ymm6 + vmovdqu 224-192(%rbx),%ymm7 + + movq %rax,%r9 + imull %ecx,%eax + andl $536870911,%eax + vmovd %eax,%xmm12 + + movq %rax,%rdx + imulq -128(%r13),%rax + vpbroadcastq %xmm12,%ymm12 + addq %rax,%r9 + movq %rdx,%rax + imulq 8-128(%r13),%rax + shrq $29,%r9 + addq %rax,%r10 + movq %rdx,%rax + imulq 16-128(%r13),%rax + addq %r9,%r10 + addq %rax,%r11 + imulq 24-128(%r13),%rdx + addq %rdx,%r12 + + movq %r10,%rax + imull %ecx,%eax + andl $536870911,%eax + + movl $9,%r14d + jmp L$OOP_REDUCE_1024 + +.p2align 5 +L$OOP_REDUCE_1024: + vmovd %eax,%xmm13 + vpbroadcastq %xmm13,%ymm13 + + vpmuludq 32-128(%r13),%ymm12,%ymm10 + movq %rax,%rdx + imulq -128(%r13),%rax + vpaddq %ymm10,%ymm1,%ymm1 + addq %rax,%r10 + vpmuludq 64-128(%r13),%ymm12,%ymm14 + movq %rdx,%rax + imulq 8-128(%r13),%rax + vpaddq %ymm14,%ymm2,%ymm2 + vpmuludq 96-128(%r13),%ymm12,%ymm11 +.byte 0x67 + addq %rax,%r11 +.byte 0x67 + movq %rdx,%rax + imulq 16-128(%r13),%rax + shrq $29,%r10 + vpaddq %ymm11,%ymm3,%ymm3 + vpmuludq 128-128(%r13),%ymm12,%ymm10 + addq %rax,%r12 + addq %r10,%r11 + vpaddq %ymm10,%ymm4,%ymm4 + vpmuludq 160-128(%r13),%ymm12,%ymm14 + movq %r11,%rax + imull %ecx,%eax + vpaddq %ymm14,%ymm5,%ymm5 + vpmuludq 192-128(%r13),%ymm12,%ymm11 + andl $536870911,%eax + vpaddq %ymm11,%ymm6,%ymm6 + vpmuludq 224-128(%r13),%ymm12,%ymm10 + vpaddq %ymm10,%ymm7,%ymm7 + vpmuludq 256-128(%r13),%ymm12,%ymm14 + vmovd %eax,%xmm12 + + vpaddq %ymm14,%ymm8,%ymm8 + + vpbroadcastq %xmm12,%ymm12 + + vpmuludq 32-8-128(%r13),%ymm13,%ymm11 + vmovdqu 96-8-128(%r13),%ymm14 + movq %rax,%rdx + imulq -128(%r13),%rax + vpaddq %ymm11,%ymm1,%ymm1 + vpmuludq 64-8-128(%r13),%ymm13,%ymm10 + vmovdqu 128-8-128(%r13),%ymm11 + addq %rax,%r11 + movq %rdx,%rax + imulq 8-128(%r13),%rax + vpaddq %ymm10,%ymm2,%ymm2 + addq %r12,%rax + shrq $29,%r11 + vpmuludq %ymm13,%ymm14,%ymm14 + vmovdqu 160-8-128(%r13),%ymm10 + addq %r11,%rax + vpaddq %ymm14,%ymm3,%ymm3 + vpmuludq %ymm13,%ymm11,%ymm11 + vmovdqu 192-8-128(%r13),%ymm14 +.byte 0x67 + movq %rax,%r12 + imull %ecx,%eax + vpaddq %ymm11,%ymm4,%ymm4 + vpmuludq %ymm13,%ymm10,%ymm10 +.byte 0xc4,0x41,0x7e,0x6f,0x9d,0x58,0x00,0x00,0x00 + andl $536870911,%eax + vpaddq %ymm10,%ymm5,%ymm5 + vpmuludq %ymm13,%ymm14,%ymm14 + vmovdqu 256-8-128(%r13),%ymm10 + vpaddq %ymm14,%ymm6,%ymm6 + vpmuludq %ymm13,%ymm11,%ymm11 + vmovdqu 288-8-128(%r13),%ymm9 + vmovd %eax,%xmm0 + imulq -128(%r13),%rax + vpaddq %ymm11,%ymm7,%ymm7 + vpmuludq %ymm13,%ymm10,%ymm10 + vmovdqu 32-16-128(%r13),%ymm14 + vpbroadcastq %xmm0,%ymm0 + vpaddq %ymm10,%ymm8,%ymm8 + vpmuludq %ymm13,%ymm9,%ymm9 + vmovdqu 64-16-128(%r13),%ymm11 + addq %rax,%r12 + + vmovdqu 32-24-128(%r13),%ymm13 + vpmuludq %ymm12,%ymm14,%ymm14 + vmovdqu 96-16-128(%r13),%ymm10 + vpaddq %ymm14,%ymm1,%ymm1 + vpmuludq %ymm0,%ymm13,%ymm13 + vpmuludq %ymm12,%ymm11,%ymm11 +.byte 0xc4,0x41,0x7e,0x6f,0xb5,0xf0,0xff,0xff,0xff + vpaddq %ymm1,%ymm13,%ymm13 + vpaddq %ymm11,%ymm2,%ymm2 + vpmuludq %ymm12,%ymm10,%ymm10 + vmovdqu 160-16-128(%r13),%ymm11 +.byte 0x67 + vmovq %xmm13,%rax + vmovdqu %ymm13,(%rsp) + vpaddq %ymm10,%ymm3,%ymm3 + vpmuludq %ymm12,%ymm14,%ymm14 + vmovdqu 192-16-128(%r13),%ymm10 + vpaddq %ymm14,%ymm4,%ymm4 + vpmuludq %ymm12,%ymm11,%ymm11 + vmovdqu 224-16-128(%r13),%ymm14 + vpaddq %ymm11,%ymm5,%ymm5 + vpmuludq %ymm12,%ymm10,%ymm10 + vmovdqu 256-16-128(%r13),%ymm11 + vpaddq %ymm10,%ymm6,%ymm6 + vpmuludq %ymm12,%ymm14,%ymm14 + shrq $29,%r12 + vmovdqu 288-16-128(%r13),%ymm10 + addq %r12,%rax + vpaddq %ymm14,%ymm7,%ymm7 + vpmuludq %ymm12,%ymm11,%ymm11 + + movq %rax,%r9 + imull %ecx,%eax + vpaddq %ymm11,%ymm8,%ymm8 + vpmuludq %ymm12,%ymm10,%ymm10 + andl $536870911,%eax + vmovd %eax,%xmm12 + vmovdqu 96-24-128(%r13),%ymm11 +.byte 0x67 + vpaddq %ymm10,%ymm9,%ymm9 + vpbroadcastq %xmm12,%ymm12 + + vpmuludq 64-24-128(%r13),%ymm0,%ymm14 + vmovdqu 128-24-128(%r13),%ymm10 + movq %rax,%rdx + imulq -128(%r13),%rax + movq 8(%rsp),%r10 + vpaddq %ymm14,%ymm2,%ymm1 + vpmuludq %ymm0,%ymm11,%ymm11 + vmovdqu 160-24-128(%r13),%ymm14 + addq %rax,%r9 + movq %rdx,%rax + imulq 8-128(%r13),%rax +.byte 0x67 + shrq $29,%r9 + movq 16(%rsp),%r11 + vpaddq %ymm11,%ymm3,%ymm2 + vpmuludq %ymm0,%ymm10,%ymm10 + vmovdqu 192-24-128(%r13),%ymm11 + addq %rax,%r10 + movq %rdx,%rax + imulq 16-128(%r13),%rax + vpaddq %ymm10,%ymm4,%ymm3 + vpmuludq %ymm0,%ymm14,%ymm14 + vmovdqu 224-24-128(%r13),%ymm10 + imulq 24-128(%r13),%rdx + addq %rax,%r11 + leaq (%r9,%r10,1),%rax + vpaddq %ymm14,%ymm5,%ymm4 + vpmuludq %ymm0,%ymm11,%ymm11 + vmovdqu 256-24-128(%r13),%ymm14 + movq %rax,%r10 + imull %ecx,%eax + vpmuludq %ymm0,%ymm10,%ymm10 + vpaddq %ymm11,%ymm6,%ymm5 + vmovdqu 288-24-128(%r13),%ymm11 + andl $536870911,%eax + vpaddq %ymm10,%ymm7,%ymm6 + vpmuludq %ymm0,%ymm14,%ymm14 + addq 24(%rsp),%rdx + vpaddq %ymm14,%ymm8,%ymm7 + vpmuludq %ymm0,%ymm11,%ymm11 + vpaddq %ymm11,%ymm9,%ymm8 + vmovq %r12,%xmm9 + movq %rdx,%r12 + + decl %r14d + jnz L$OOP_REDUCE_1024 + leaq 448(%rsp),%r12 + vpaddq %ymm9,%ymm13,%ymm0 + vpxor %ymm9,%ymm9,%ymm9 + + vpaddq 288-192(%rbx),%ymm0,%ymm0 + vpaddq 320-448(%r12),%ymm1,%ymm1 + vpaddq 352-448(%r12),%ymm2,%ymm2 + vpaddq 384-448(%r12),%ymm3,%ymm3 + vpaddq 416-448(%r12),%ymm4,%ymm4 + vpaddq 448-448(%r12),%ymm5,%ymm5 + vpaddq 480-448(%r12),%ymm6,%ymm6 + vpaddq 512-448(%r12),%ymm7,%ymm7 + vpaddq 544-448(%r12),%ymm8,%ymm8 + + vpsrlq $29,%ymm0,%ymm14 + vpand %ymm15,%ymm0,%ymm0 + vpsrlq $29,%ymm1,%ymm11 + vpand %ymm15,%ymm1,%ymm1 + vpsrlq $29,%ymm2,%ymm12 + vpermq $147,%ymm14,%ymm14 + vpand %ymm15,%ymm2,%ymm2 + vpsrlq $29,%ymm3,%ymm13 + vpermq $147,%ymm11,%ymm11 + vpand %ymm15,%ymm3,%ymm3 + vpermq $147,%ymm12,%ymm12 + + vpblendd $3,%ymm9,%ymm14,%ymm10 + vpermq $147,%ymm13,%ymm13 + vpblendd $3,%ymm14,%ymm11,%ymm14 + vpaddq %ymm10,%ymm0,%ymm0 + vpblendd $3,%ymm11,%ymm12,%ymm11 + vpaddq %ymm14,%ymm1,%ymm1 + vpblendd $3,%ymm12,%ymm13,%ymm12 + vpaddq %ymm11,%ymm2,%ymm2 + vpblendd $3,%ymm13,%ymm9,%ymm13 + vpaddq %ymm12,%ymm3,%ymm3 + vpaddq %ymm13,%ymm4,%ymm4 + + vpsrlq $29,%ymm0,%ymm14 + vpand %ymm15,%ymm0,%ymm0 + vpsrlq $29,%ymm1,%ymm11 + vpand %ymm15,%ymm1,%ymm1 + vpsrlq $29,%ymm2,%ymm12 + vpermq $147,%ymm14,%ymm14 + vpand %ymm15,%ymm2,%ymm2 + vpsrlq $29,%ymm3,%ymm13 + vpermq $147,%ymm11,%ymm11 + vpand %ymm15,%ymm3,%ymm3 + vpermq $147,%ymm12,%ymm12 + + vpblendd $3,%ymm9,%ymm14,%ymm10 + vpermq $147,%ymm13,%ymm13 + vpblendd $3,%ymm14,%ymm11,%ymm14 + vpaddq %ymm10,%ymm0,%ymm0 + vpblendd $3,%ymm11,%ymm12,%ymm11 + vpaddq %ymm14,%ymm1,%ymm1 + vmovdqu %ymm0,0-128(%rdi) + vpblendd $3,%ymm12,%ymm13,%ymm12 + vpaddq %ymm11,%ymm2,%ymm2 + vmovdqu %ymm1,32-128(%rdi) + vpblendd $3,%ymm13,%ymm9,%ymm13 + vpaddq %ymm12,%ymm3,%ymm3 + vmovdqu %ymm2,64-128(%rdi) + vpaddq %ymm13,%ymm4,%ymm4 + vmovdqu %ymm3,96-128(%rdi) + vpsrlq $29,%ymm4,%ymm14 + vpand %ymm15,%ymm4,%ymm4 + vpsrlq $29,%ymm5,%ymm11 + vpand %ymm15,%ymm5,%ymm5 + vpsrlq $29,%ymm6,%ymm12 + vpermq $147,%ymm14,%ymm14 + vpand %ymm15,%ymm6,%ymm6 + vpsrlq $29,%ymm7,%ymm13 + vpermq $147,%ymm11,%ymm11 + vpand %ymm15,%ymm7,%ymm7 + vpsrlq $29,%ymm8,%ymm0 + vpermq $147,%ymm12,%ymm12 + vpand %ymm15,%ymm8,%ymm8 + vpermq $147,%ymm13,%ymm13 + + vpblendd $3,%ymm9,%ymm14,%ymm10 + vpermq $147,%ymm0,%ymm0 + vpblendd $3,%ymm14,%ymm11,%ymm14 + vpaddq %ymm10,%ymm4,%ymm4 + vpblendd $3,%ymm11,%ymm12,%ymm11 + vpaddq %ymm14,%ymm5,%ymm5 + vpblendd $3,%ymm12,%ymm13,%ymm12 + vpaddq %ymm11,%ymm6,%ymm6 + vpblendd $3,%ymm13,%ymm0,%ymm13 + vpaddq %ymm12,%ymm7,%ymm7 + vpaddq %ymm13,%ymm8,%ymm8 + + vpsrlq $29,%ymm4,%ymm14 + vpand %ymm15,%ymm4,%ymm4 + vpsrlq $29,%ymm5,%ymm11 + vpand %ymm15,%ymm5,%ymm5 + vpsrlq $29,%ymm6,%ymm12 + vpermq $147,%ymm14,%ymm14 + vpand %ymm15,%ymm6,%ymm6 + vpsrlq $29,%ymm7,%ymm13 + vpermq $147,%ymm11,%ymm11 + vpand %ymm15,%ymm7,%ymm7 + vpsrlq $29,%ymm8,%ymm0 + vpermq $147,%ymm12,%ymm12 + vpand %ymm15,%ymm8,%ymm8 + vpermq $147,%ymm13,%ymm13 + + vpblendd $3,%ymm9,%ymm14,%ymm10 + vpermq $147,%ymm0,%ymm0 + vpblendd $3,%ymm14,%ymm11,%ymm14 + vpaddq %ymm10,%ymm4,%ymm4 + vpblendd $3,%ymm11,%ymm12,%ymm11 + vpaddq %ymm14,%ymm5,%ymm5 + vmovdqu %ymm4,128-128(%rdi) + vpblendd $3,%ymm12,%ymm13,%ymm12 + vpaddq %ymm11,%ymm6,%ymm6 + vmovdqu %ymm5,160-128(%rdi) + vpblendd $3,%ymm13,%ymm0,%ymm13 + vpaddq %ymm12,%ymm7,%ymm7 + vmovdqu %ymm6,192-128(%rdi) + vpaddq %ymm13,%ymm8,%ymm8 + vmovdqu %ymm7,224-128(%rdi) + vmovdqu %ymm8,256-128(%rdi) + + movq %rdi,%rsi + decl %r8d + jne L$OOP_GRANDE_SQR_1024 + + vzeroall + movq %rbp,%rax + movq -48(%rax),%r15 + movq -40(%rax),%r14 + movq -32(%rax),%r13 + movq -24(%rax),%r12 + movq -16(%rax),%rbp + movq -8(%rax),%rbx + leaq (%rax),%rsp +L$sqr_1024_epilogue: + .byte 0xf3,0xc3 + +.globl _rsaz_1024_mul_avx2 + +.p2align 6 +_rsaz_1024_mul_avx2: + leaq (%rsp),%rax + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + movq %rax,%rbp + vzeroall + movq %rdx,%r13 + subq $64,%rsp + + + + + + +.byte 0x67,0x67 + movq %rsi,%r15 + andq $4095,%r15 + addq $320,%r15 + shrq $12,%r15 + movq %rsi,%r15 + cmovnzq %r13,%rsi + cmovnzq %r15,%r13 + + movq %rcx,%r15 + subq $-128,%rsi + subq $-128,%rcx + subq $-128,%rdi + + andq $4095,%r15 + addq $320,%r15 +.byte 0x67,0x67 + shrq $12,%r15 + jz L$mul_1024_no_n_copy + + + + + + subq $320,%rsp + vmovdqu 0-128(%rcx),%ymm0 + andq $-512,%rsp + vmovdqu 32-128(%rcx),%ymm1 + vmovdqu 64-128(%rcx),%ymm2 + vmovdqu 96-128(%rcx),%ymm3 + vmovdqu 128-128(%rcx),%ymm4 + vmovdqu 160-128(%rcx),%ymm5 + vmovdqu 192-128(%rcx),%ymm6 + vmovdqu 224-128(%rcx),%ymm7 + vmovdqu 256-128(%rcx),%ymm8 + leaq 64+128(%rsp),%rcx + vmovdqu %ymm0,0-128(%rcx) + vpxor %ymm0,%ymm0,%ymm0 + vmovdqu %ymm1,32-128(%rcx) + vpxor %ymm1,%ymm1,%ymm1 + vmovdqu %ymm2,64-128(%rcx) + vpxor %ymm2,%ymm2,%ymm2 + vmovdqu %ymm3,96-128(%rcx) + vpxor %ymm3,%ymm3,%ymm3 + vmovdqu %ymm4,128-128(%rcx) + vpxor %ymm4,%ymm4,%ymm4 + vmovdqu %ymm5,160-128(%rcx) + vpxor %ymm5,%ymm5,%ymm5 + vmovdqu %ymm6,192-128(%rcx) + vpxor %ymm6,%ymm6,%ymm6 + vmovdqu %ymm7,224-128(%rcx) + vpxor %ymm7,%ymm7,%ymm7 + vmovdqu %ymm8,256-128(%rcx) + vmovdqa %ymm0,%ymm8 + vmovdqu %ymm9,288-128(%rcx) +L$mul_1024_no_n_copy: + andq $-64,%rsp + + movq (%r13),%rbx + vpbroadcastq (%r13),%ymm10 + vmovdqu %ymm0,(%rsp) + xorq %r9,%r9 +.byte 0x67 + xorq %r10,%r10 + xorq %r11,%r11 + xorq %r12,%r12 + + vmovdqu L$and_mask(%rip),%ymm15 + movl $9,%r14d + vmovdqu %ymm9,288-128(%rdi) + jmp L$oop_mul_1024 + +.p2align 5 +L$oop_mul_1024: + vpsrlq $29,%ymm3,%ymm9 + movq %rbx,%rax + imulq -128(%rsi),%rax + addq %r9,%rax + movq %rbx,%r10 + imulq 8-128(%rsi),%r10 + addq 8(%rsp),%r10 + + movq %rax,%r9 + imull %r8d,%eax + andl $536870911,%eax + + movq %rbx,%r11 + imulq 16-128(%rsi),%r11 + addq 16(%rsp),%r11 + + movq %rbx,%r12 + imulq 24-128(%rsi),%r12 + addq 24(%rsp),%r12 + vpmuludq 32-128(%rsi),%ymm10,%ymm0 + vmovd %eax,%xmm11 + vpaddq %ymm0,%ymm1,%ymm1 + vpmuludq 64-128(%rsi),%ymm10,%ymm12 + vpbroadcastq %xmm11,%ymm11 + vpaddq %ymm12,%ymm2,%ymm2 + vpmuludq 96-128(%rsi),%ymm10,%ymm13 + vpand %ymm15,%ymm3,%ymm3 + vpaddq %ymm13,%ymm3,%ymm3 + vpmuludq 128-128(%rsi),%ymm10,%ymm0 + vpaddq %ymm0,%ymm4,%ymm4 + vpmuludq 160-128(%rsi),%ymm10,%ymm12 + vpaddq %ymm12,%ymm5,%ymm5 + vpmuludq 192-128(%rsi),%ymm10,%ymm13 + vpaddq %ymm13,%ymm6,%ymm6 + vpmuludq 224-128(%rsi),%ymm10,%ymm0 + vpermq $147,%ymm9,%ymm9 + vpaddq %ymm0,%ymm7,%ymm7 + vpmuludq 256-128(%rsi),%ymm10,%ymm12 + vpbroadcastq 8(%r13),%ymm10 + vpaddq %ymm12,%ymm8,%ymm8 + + movq %rax,%rdx + imulq -128(%rcx),%rax + addq %rax,%r9 + movq %rdx,%rax + imulq 8-128(%rcx),%rax + addq %rax,%r10 + movq %rdx,%rax + imulq 16-128(%rcx),%rax + addq %rax,%r11 + shrq $29,%r9 + imulq 24-128(%rcx),%rdx + addq %rdx,%r12 + addq %r9,%r10 + + vpmuludq 32-128(%rcx),%ymm11,%ymm13 + vmovq %xmm10,%rbx + vpaddq %ymm13,%ymm1,%ymm1 + vpmuludq 64-128(%rcx),%ymm11,%ymm0 + vpaddq %ymm0,%ymm2,%ymm2 + vpmuludq 96-128(%rcx),%ymm11,%ymm12 + vpaddq %ymm12,%ymm3,%ymm3 + vpmuludq 128-128(%rcx),%ymm11,%ymm13 + vpaddq %ymm13,%ymm4,%ymm4 + vpmuludq 160-128(%rcx),%ymm11,%ymm0 + vpaddq %ymm0,%ymm5,%ymm5 + vpmuludq 192-128(%rcx),%ymm11,%ymm12 + vpaddq %ymm12,%ymm6,%ymm6 + vpmuludq 224-128(%rcx),%ymm11,%ymm13 + vpblendd $3,%ymm14,%ymm9,%ymm9 + vpaddq %ymm13,%ymm7,%ymm7 + vpmuludq 256-128(%rcx),%ymm11,%ymm0 + vpaddq %ymm9,%ymm3,%ymm3 + vpaddq %ymm0,%ymm8,%ymm8 + + movq %rbx,%rax + imulq -128(%rsi),%rax + addq %rax,%r10 + vmovdqu -8+32-128(%rsi),%ymm12 + movq %rbx,%rax + imulq 8-128(%rsi),%rax + addq %rax,%r11 + vmovdqu -8+64-128(%rsi),%ymm13 + + movq %r10,%rax + imull %r8d,%eax + andl $536870911,%eax + + imulq 16-128(%rsi),%rbx + addq %rbx,%r12 + vpmuludq %ymm10,%ymm12,%ymm12 + vmovd %eax,%xmm11 + vmovdqu -8+96-128(%rsi),%ymm0 + vpaddq %ymm12,%ymm1,%ymm1 + vpmuludq %ymm10,%ymm13,%ymm13 + vpbroadcastq %xmm11,%ymm11 + vmovdqu -8+128-128(%rsi),%ymm12 + vpaddq %ymm13,%ymm2,%ymm2 + vpmuludq %ymm10,%ymm0,%ymm0 + vmovdqu -8+160-128(%rsi),%ymm13 + vpaddq %ymm0,%ymm3,%ymm3 + vpmuludq %ymm10,%ymm12,%ymm12 + vmovdqu -8+192-128(%rsi),%ymm0 + vpaddq %ymm12,%ymm4,%ymm4 + vpmuludq %ymm10,%ymm13,%ymm13 + vmovdqu -8+224-128(%rsi),%ymm12 + vpaddq %ymm13,%ymm5,%ymm5 + vpmuludq %ymm10,%ymm0,%ymm0 + vmovdqu -8+256-128(%rsi),%ymm13 + vpaddq %ymm0,%ymm6,%ymm6 + vpmuludq %ymm10,%ymm12,%ymm12 + vmovdqu -8+288-128(%rsi),%ymm9 + vpaddq %ymm12,%ymm7,%ymm7 + vpmuludq %ymm10,%ymm13,%ymm13 + vpaddq %ymm13,%ymm8,%ymm8 + vpmuludq %ymm10,%ymm9,%ymm9 + vpbroadcastq 16(%r13),%ymm10 + + movq %rax,%rdx + imulq -128(%rcx),%rax + addq %rax,%r10 + vmovdqu -8+32-128(%rcx),%ymm0 + movq %rdx,%rax + imulq 8-128(%rcx),%rax + addq %rax,%r11 + vmovdqu -8+64-128(%rcx),%ymm12 + shrq $29,%r10 + imulq 16-128(%rcx),%rdx + addq %rdx,%r12 + addq %r10,%r11 + + vpmuludq %ymm11,%ymm0,%ymm0 + vmovq %xmm10,%rbx + vmovdqu -8+96-128(%rcx),%ymm13 + vpaddq %ymm0,%ymm1,%ymm1 + vpmuludq %ymm11,%ymm12,%ymm12 + vmovdqu -8+128-128(%rcx),%ymm0 + vpaddq %ymm12,%ymm2,%ymm2 + vpmuludq %ymm11,%ymm13,%ymm13 + vmovdqu -8+160-128(%rcx),%ymm12 + vpaddq %ymm13,%ymm3,%ymm3 + vpmuludq %ymm11,%ymm0,%ymm0 + vmovdqu -8+192-128(%rcx),%ymm13 + vpaddq %ymm0,%ymm4,%ymm4 + vpmuludq %ymm11,%ymm12,%ymm12 + vmovdqu -8+224-128(%rcx),%ymm0 + vpaddq %ymm12,%ymm5,%ymm5 + vpmuludq %ymm11,%ymm13,%ymm13 + vmovdqu -8+256-128(%rcx),%ymm12 + vpaddq %ymm13,%ymm6,%ymm6 + vpmuludq %ymm11,%ymm0,%ymm0 + vmovdqu -8+288-128(%rcx),%ymm13 + vpaddq %ymm0,%ymm7,%ymm7 + vpmuludq %ymm11,%ymm12,%ymm12 + vpaddq %ymm12,%ymm8,%ymm8 + vpmuludq %ymm11,%ymm13,%ymm13 + vpaddq %ymm13,%ymm9,%ymm9 + + vmovdqu -16+32-128(%rsi),%ymm0 + movq %rbx,%rax + imulq -128(%rsi),%rax + addq %r11,%rax + + vmovdqu -16+64-128(%rsi),%ymm12 + movq %rax,%r11 + imull %r8d,%eax + andl $536870911,%eax + + imulq 8-128(%rsi),%rbx + addq %rbx,%r12 + vpmuludq %ymm10,%ymm0,%ymm0 + vmovd %eax,%xmm11 + vmovdqu -16+96-128(%rsi),%ymm13 + vpaddq %ymm0,%ymm1,%ymm1 + vpmuludq %ymm10,%ymm12,%ymm12 + vpbroadcastq %xmm11,%ymm11 + vmovdqu -16+128-128(%rsi),%ymm0 + vpaddq %ymm12,%ymm2,%ymm2 + vpmuludq %ymm10,%ymm13,%ymm13 + vmovdqu -16+160-128(%rsi),%ymm12 + vpaddq %ymm13,%ymm3,%ymm3 + vpmuludq %ymm10,%ymm0,%ymm0 + vmovdqu -16+192-128(%rsi),%ymm13 + vpaddq %ymm0,%ymm4,%ymm4 + vpmuludq %ymm10,%ymm12,%ymm12 + vmovdqu -16+224-128(%rsi),%ymm0 + vpaddq %ymm12,%ymm5,%ymm5 + vpmuludq %ymm10,%ymm13,%ymm13 + vmovdqu -16+256-128(%rsi),%ymm12 + vpaddq %ymm13,%ymm6,%ymm6 + vpmuludq %ymm10,%ymm0,%ymm0 + vmovdqu -16+288-128(%rsi),%ymm13 + vpaddq %ymm0,%ymm7,%ymm7 + vpmuludq %ymm10,%ymm12,%ymm12 + vpaddq %ymm12,%ymm8,%ymm8 + vpmuludq %ymm10,%ymm13,%ymm13 + vpbroadcastq 24(%r13),%ymm10 + vpaddq %ymm13,%ymm9,%ymm9 + + vmovdqu -16+32-128(%rcx),%ymm0 + movq %rax,%rdx + imulq -128(%rcx),%rax + addq %rax,%r11 + vmovdqu -16+64-128(%rcx),%ymm12 + imulq 8-128(%rcx),%rdx + addq %rdx,%r12 + shrq $29,%r11 + + vpmuludq %ymm11,%ymm0,%ymm0 + vmovq %xmm10,%rbx + vmovdqu -16+96-128(%rcx),%ymm13 + vpaddq %ymm0,%ymm1,%ymm1 + vpmuludq %ymm11,%ymm12,%ymm12 + vmovdqu -16+128-128(%rcx),%ymm0 + vpaddq %ymm12,%ymm2,%ymm2 + vpmuludq %ymm11,%ymm13,%ymm13 + vmovdqu -16+160-128(%rcx),%ymm12 + vpaddq %ymm13,%ymm3,%ymm3 + vpmuludq %ymm11,%ymm0,%ymm0 + vmovdqu -16+192-128(%rcx),%ymm13 + vpaddq %ymm0,%ymm4,%ymm4 + vpmuludq %ymm11,%ymm12,%ymm12 + vmovdqu -16+224-128(%rcx),%ymm0 + vpaddq %ymm12,%ymm5,%ymm5 + vpmuludq %ymm11,%ymm13,%ymm13 + vmovdqu -16+256-128(%rcx),%ymm12 + vpaddq %ymm13,%ymm6,%ymm6 + vpmuludq %ymm11,%ymm0,%ymm0 + vmovdqu -16+288-128(%rcx),%ymm13 + vpaddq %ymm0,%ymm7,%ymm7 + vpmuludq %ymm11,%ymm12,%ymm12 + vmovdqu -24+32-128(%rsi),%ymm0 + vpaddq %ymm12,%ymm8,%ymm8 + vpmuludq %ymm11,%ymm13,%ymm13 + vmovdqu -24+64-128(%rsi),%ymm12 + vpaddq %ymm13,%ymm9,%ymm9 + + addq %r11,%r12 + imulq -128(%rsi),%rbx + addq %rbx,%r12 + + movq %r12,%rax + imull %r8d,%eax + andl $536870911,%eax + + vpmuludq %ymm10,%ymm0,%ymm0 + vmovd %eax,%xmm11 + vmovdqu -24+96-128(%rsi),%ymm13 + vpaddq %ymm0,%ymm1,%ymm1 + vpmuludq %ymm10,%ymm12,%ymm12 + vpbroadcastq %xmm11,%ymm11 + vmovdqu -24+128-128(%rsi),%ymm0 + vpaddq %ymm12,%ymm2,%ymm2 + vpmuludq %ymm10,%ymm13,%ymm13 + vmovdqu -24+160-128(%rsi),%ymm12 + vpaddq %ymm13,%ymm3,%ymm3 + vpmuludq %ymm10,%ymm0,%ymm0 + vmovdqu -24+192-128(%rsi),%ymm13 + vpaddq %ymm0,%ymm4,%ymm4 + vpmuludq %ymm10,%ymm12,%ymm12 + vmovdqu -24+224-128(%rsi),%ymm0 + vpaddq %ymm12,%ymm5,%ymm5 + vpmuludq %ymm10,%ymm13,%ymm13 + vmovdqu -24+256-128(%rsi),%ymm12 + vpaddq %ymm13,%ymm6,%ymm6 + vpmuludq %ymm10,%ymm0,%ymm0 + vmovdqu -24+288-128(%rsi),%ymm13 + vpaddq %ymm0,%ymm7,%ymm7 + vpmuludq %ymm10,%ymm12,%ymm12 + vpaddq %ymm12,%ymm8,%ymm8 + vpmuludq %ymm10,%ymm13,%ymm13 + vpbroadcastq 32(%r13),%ymm10 + vpaddq %ymm13,%ymm9,%ymm9 + addq $32,%r13 + + vmovdqu -24+32-128(%rcx),%ymm0 + imulq -128(%rcx),%rax + addq %rax,%r12 + shrq $29,%r12 + + vmovdqu -24+64-128(%rcx),%ymm12 + vpmuludq %ymm11,%ymm0,%ymm0 + vmovq %xmm10,%rbx + vmovdqu -24+96-128(%rcx),%ymm13 + vpaddq %ymm0,%ymm1,%ymm0 + vpmuludq %ymm11,%ymm12,%ymm12 + vmovdqu %ymm0,(%rsp) + vpaddq %ymm12,%ymm2,%ymm1 + vmovdqu -24+128-128(%rcx),%ymm0 + vpmuludq %ymm11,%ymm13,%ymm13 + vmovdqu -24+160-128(%rcx),%ymm12 + vpaddq %ymm13,%ymm3,%ymm2 + vpmuludq %ymm11,%ymm0,%ymm0 + vmovdqu -24+192-128(%rcx),%ymm13 + vpaddq %ymm0,%ymm4,%ymm3 + vpmuludq %ymm11,%ymm12,%ymm12 + vmovdqu -24+224-128(%rcx),%ymm0 + vpaddq %ymm12,%ymm5,%ymm4 + vpmuludq %ymm11,%ymm13,%ymm13 + vmovdqu -24+256-128(%rcx),%ymm12 + vpaddq %ymm13,%ymm6,%ymm5 + vpmuludq %ymm11,%ymm0,%ymm0 + vmovdqu -24+288-128(%rcx),%ymm13 + movq %r12,%r9 + vpaddq %ymm0,%ymm7,%ymm6 + vpmuludq %ymm11,%ymm12,%ymm12 + addq (%rsp),%r9 + vpaddq %ymm12,%ymm8,%ymm7 + vpmuludq %ymm11,%ymm13,%ymm13 + vmovq %r12,%xmm12 + vpaddq %ymm13,%ymm9,%ymm8 + + decl %r14d + jnz L$oop_mul_1024 + vpermq $0,%ymm15,%ymm15 + vpaddq (%rsp),%ymm12,%ymm0 + + vpsrlq $29,%ymm0,%ymm12 + vpand %ymm15,%ymm0,%ymm0 + vpsrlq $29,%ymm1,%ymm13 + vpand %ymm15,%ymm1,%ymm1 + vpsrlq $29,%ymm2,%ymm10 + vpermq $147,%ymm12,%ymm12 + vpand %ymm15,%ymm2,%ymm2 + vpsrlq $29,%ymm3,%ymm11 + vpermq $147,%ymm13,%ymm13 + vpand %ymm15,%ymm3,%ymm3 + + vpblendd $3,%ymm14,%ymm12,%ymm9 + vpermq $147,%ymm10,%ymm10 + vpblendd $3,%ymm12,%ymm13,%ymm12 + vpermq $147,%ymm11,%ymm11 + vpaddq %ymm9,%ymm0,%ymm0 + vpblendd $3,%ymm13,%ymm10,%ymm13 + vpaddq %ymm12,%ymm1,%ymm1 + vpblendd $3,%ymm10,%ymm11,%ymm10 + vpaddq %ymm13,%ymm2,%ymm2 + vpblendd $3,%ymm11,%ymm14,%ymm11 + vpaddq %ymm10,%ymm3,%ymm3 + vpaddq %ymm11,%ymm4,%ymm4 + + vpsrlq $29,%ymm0,%ymm12 + vpand %ymm15,%ymm0,%ymm0 + vpsrlq $29,%ymm1,%ymm13 + vpand %ymm15,%ymm1,%ymm1 + vpsrlq $29,%ymm2,%ymm10 + vpermq $147,%ymm12,%ymm12 + vpand %ymm15,%ymm2,%ymm2 + vpsrlq $29,%ymm3,%ymm11 + vpermq $147,%ymm13,%ymm13 + vpand %ymm15,%ymm3,%ymm3 + vpermq $147,%ymm10,%ymm10 + + vpblendd $3,%ymm14,%ymm12,%ymm9 + vpermq $147,%ymm11,%ymm11 + vpblendd $3,%ymm12,%ymm13,%ymm12 + vpaddq %ymm9,%ymm0,%ymm0 + vpblendd $3,%ymm13,%ymm10,%ymm13 + vpaddq %ymm12,%ymm1,%ymm1 + vpblendd $3,%ymm10,%ymm11,%ymm10 + vpaddq %ymm13,%ymm2,%ymm2 + vpblendd $3,%ymm11,%ymm14,%ymm11 + vpaddq %ymm10,%ymm3,%ymm3 + vpaddq %ymm11,%ymm4,%ymm4 + + vmovdqu %ymm0,0-128(%rdi) + vmovdqu %ymm1,32-128(%rdi) + vmovdqu %ymm2,64-128(%rdi) + vmovdqu %ymm3,96-128(%rdi) + vpsrlq $29,%ymm4,%ymm12 + vpand %ymm15,%ymm4,%ymm4 + vpsrlq $29,%ymm5,%ymm13 + vpand %ymm15,%ymm5,%ymm5 + vpsrlq $29,%ymm6,%ymm10 + vpermq $147,%ymm12,%ymm12 + vpand %ymm15,%ymm6,%ymm6 + vpsrlq $29,%ymm7,%ymm11 + vpermq $147,%ymm13,%ymm13 + vpand %ymm15,%ymm7,%ymm7 + vpsrlq $29,%ymm8,%ymm0 + vpermq $147,%ymm10,%ymm10 + vpand %ymm15,%ymm8,%ymm8 + vpermq $147,%ymm11,%ymm11 + + vpblendd $3,%ymm14,%ymm12,%ymm9 + vpermq $147,%ymm0,%ymm0 + vpblendd $3,%ymm12,%ymm13,%ymm12 + vpaddq %ymm9,%ymm4,%ymm4 + vpblendd $3,%ymm13,%ymm10,%ymm13 + vpaddq %ymm12,%ymm5,%ymm5 + vpblendd $3,%ymm10,%ymm11,%ymm10 + vpaddq %ymm13,%ymm6,%ymm6 + vpblendd $3,%ymm11,%ymm0,%ymm11 + vpaddq %ymm10,%ymm7,%ymm7 + vpaddq %ymm11,%ymm8,%ymm8 + + vpsrlq $29,%ymm4,%ymm12 + vpand %ymm15,%ymm4,%ymm4 + vpsrlq $29,%ymm5,%ymm13 + vpand %ymm15,%ymm5,%ymm5 + vpsrlq $29,%ymm6,%ymm10 + vpermq $147,%ymm12,%ymm12 + vpand %ymm15,%ymm6,%ymm6 + vpsrlq $29,%ymm7,%ymm11 + vpermq $147,%ymm13,%ymm13 + vpand %ymm15,%ymm7,%ymm7 + vpsrlq $29,%ymm8,%ymm0 + vpermq $147,%ymm10,%ymm10 + vpand %ymm15,%ymm8,%ymm8 + vpermq $147,%ymm11,%ymm11 + + vpblendd $3,%ymm14,%ymm12,%ymm9 + vpermq $147,%ymm0,%ymm0 + vpblendd $3,%ymm12,%ymm13,%ymm12 + vpaddq %ymm9,%ymm4,%ymm4 + vpblendd $3,%ymm13,%ymm10,%ymm13 + vpaddq %ymm12,%ymm5,%ymm5 + vpblendd $3,%ymm10,%ymm11,%ymm10 + vpaddq %ymm13,%ymm6,%ymm6 + vpblendd $3,%ymm11,%ymm0,%ymm11 + vpaddq %ymm10,%ymm7,%ymm7 + vpaddq %ymm11,%ymm8,%ymm8 + + vmovdqu %ymm4,128-128(%rdi) + vmovdqu %ymm5,160-128(%rdi) + vmovdqu %ymm6,192-128(%rdi) + vmovdqu %ymm7,224-128(%rdi) + vmovdqu %ymm8,256-128(%rdi) + vzeroupper + + movq %rbp,%rax + movq -48(%rax),%r15 + movq -40(%rax),%r14 + movq -32(%rax),%r13 + movq -24(%rax),%r12 + movq -16(%rax),%rbp + movq -8(%rax),%rbx + leaq (%rax),%rsp +L$mul_1024_epilogue: + .byte 0xf3,0xc3 + +.globl _rsaz_1024_red2norm_avx2 + +.p2align 5 +_rsaz_1024_red2norm_avx2: + subq $-128,%rsi + xorq %rax,%rax + movq -128(%rsi),%r8 + movq -120(%rsi),%r9 + movq -112(%rsi),%r10 + shlq $0,%r8 + shlq $29,%r9 + movq %r10,%r11 + shlq $58,%r10 + shrq $6,%r11 + addq %r8,%rax + addq %r9,%rax + addq %r10,%rax + adcq $0,%r11 + movq %rax,0(%rdi) + movq %r11,%rax + movq -104(%rsi),%r8 + movq -96(%rsi),%r9 + shlq $23,%r8 + movq %r9,%r10 + shlq $52,%r9 + shrq $12,%r10 + addq %r8,%rax + addq %r9,%rax + adcq $0,%r10 + movq %rax,8(%rdi) + movq %r10,%rax + movq -88(%rsi),%r11 + movq -80(%rsi),%r8 + shlq $17,%r11 + movq %r8,%r9 + shlq $46,%r8 + shrq $18,%r9 + addq %r11,%rax + addq %r8,%rax + adcq $0,%r9 + movq %rax,16(%rdi) + movq %r9,%rax + movq -72(%rsi),%r10 + movq -64(%rsi),%r11 + shlq $11,%r10 + movq %r11,%r8 + shlq $40,%r11 + shrq $24,%r8 + addq %r10,%rax + addq %r11,%rax + adcq $0,%r8 + movq %rax,24(%rdi) + movq %r8,%rax + movq -56(%rsi),%r9 + movq -48(%rsi),%r10 + movq -40(%rsi),%r11 + shlq $5,%r9 + shlq $34,%r10 + movq %r11,%r8 + shlq $63,%r11 + shrq $1,%r8 + addq %r9,%rax + addq %r10,%rax + addq %r11,%rax + adcq $0,%r8 + movq %rax,32(%rdi) + movq %r8,%rax + movq -32(%rsi),%r9 + movq -24(%rsi),%r10 + shlq $28,%r9 + movq %r10,%r11 + shlq $57,%r10 + shrq $7,%r11 + addq %r9,%rax + addq %r10,%rax + adcq $0,%r11 + movq %rax,40(%rdi) + movq %r11,%rax + movq -16(%rsi),%r8 + movq -8(%rsi),%r9 + shlq $22,%r8 + movq %r9,%r10 + shlq $51,%r9 + shrq $13,%r10 + addq %r8,%rax + addq %r9,%rax + adcq $0,%r10 + movq %rax,48(%rdi) + movq %r10,%rax + movq 0(%rsi),%r11 + movq 8(%rsi),%r8 + shlq $16,%r11 + movq %r8,%r9 + shlq $45,%r8 + shrq $19,%r9 + addq %r11,%rax + addq %r8,%rax + adcq $0,%r9 + movq %rax,56(%rdi) + movq %r9,%rax + movq 16(%rsi),%r10 + movq 24(%rsi),%r11 + shlq $10,%r10 + movq %r11,%r8 + shlq $39,%r11 + shrq $25,%r8 + addq %r10,%rax + addq %r11,%rax + adcq $0,%r8 + movq %rax,64(%rdi) + movq %r8,%rax + movq 32(%rsi),%r9 + movq 40(%rsi),%r10 + movq 48(%rsi),%r11 + shlq $4,%r9 + shlq $33,%r10 + movq %r11,%r8 + shlq $62,%r11 + shrq $2,%r8 + addq %r9,%rax + addq %r10,%rax + addq %r11,%rax + adcq $0,%r8 + movq %rax,72(%rdi) + movq %r8,%rax + movq 56(%rsi),%r9 + movq 64(%rsi),%r10 + shlq $27,%r9 + movq %r10,%r11 + shlq $56,%r10 + shrq $8,%r11 + addq %r9,%rax + addq %r10,%rax + adcq $0,%r11 + movq %rax,80(%rdi) + movq %r11,%rax + movq 72(%rsi),%r8 + movq 80(%rsi),%r9 + shlq $21,%r8 + movq %r9,%r10 + shlq $50,%r9 + shrq $14,%r10 + addq %r8,%rax + addq %r9,%rax + adcq $0,%r10 + movq %rax,88(%rdi) + movq %r10,%rax + movq 88(%rsi),%r11 + movq 96(%rsi),%r8 + shlq $15,%r11 + movq %r8,%r9 + shlq $44,%r8 + shrq $20,%r9 + addq %r11,%rax + addq %r8,%rax + adcq $0,%r9 + movq %rax,96(%rdi) + movq %r9,%rax + movq 104(%rsi),%r10 + movq 112(%rsi),%r11 + shlq $9,%r10 + movq %r11,%r8 + shlq $38,%r11 + shrq $26,%r8 + addq %r10,%rax + addq %r11,%rax + adcq $0,%r8 + movq %rax,104(%rdi) + movq %r8,%rax + movq 120(%rsi),%r9 + movq 128(%rsi),%r10 + movq 136(%rsi),%r11 + shlq $3,%r9 + shlq $32,%r10 + movq %r11,%r8 + shlq $61,%r11 + shrq $3,%r8 + addq %r9,%rax + addq %r10,%rax + addq %r11,%rax + adcq $0,%r8 + movq %rax,112(%rdi) + movq %r8,%rax + movq 144(%rsi),%r9 + movq 152(%rsi),%r10 + shlq $26,%r9 + movq %r10,%r11 + shlq $55,%r10 + shrq $9,%r11 + addq %r9,%rax + addq %r10,%rax + adcq $0,%r11 + movq %rax,120(%rdi) + movq %r11,%rax + .byte 0xf3,0xc3 + + +.globl _rsaz_1024_norm2red_avx2 + +.p2align 5 +_rsaz_1024_norm2red_avx2: + subq $-128,%rdi + movq (%rsi),%r8 + movl $536870911,%eax + movq 8(%rsi),%r9 + movq %r8,%r11 + shrq $0,%r11 + andq %rax,%r11 + movq %r11,-128(%rdi) + movq %r8,%r10 + shrq $29,%r10 + andq %rax,%r10 + movq %r10,-120(%rdi) + shrdq $58,%r9,%r8 + andq %rax,%r8 + movq %r8,-112(%rdi) + movq 16(%rsi),%r10 + movq %r9,%r8 + shrq $23,%r8 + andq %rax,%r8 + movq %r8,-104(%rdi) + shrdq $52,%r10,%r9 + andq %rax,%r9 + movq %r9,-96(%rdi) + movq 24(%rsi),%r11 + movq %r10,%r9 + shrq $17,%r9 + andq %rax,%r9 + movq %r9,-88(%rdi) + shrdq $46,%r11,%r10 + andq %rax,%r10 + movq %r10,-80(%rdi) + movq 32(%rsi),%r8 + movq %r11,%r10 + shrq $11,%r10 + andq %rax,%r10 + movq %r10,-72(%rdi) + shrdq $40,%r8,%r11 + andq %rax,%r11 + movq %r11,-64(%rdi) + movq 40(%rsi),%r9 + movq %r8,%r11 + shrq $5,%r11 + andq %rax,%r11 + movq %r11,-56(%rdi) + movq %r8,%r10 + shrq $34,%r10 + andq %rax,%r10 + movq %r10,-48(%rdi) + shrdq $63,%r9,%r8 + andq %rax,%r8 + movq %r8,-40(%rdi) + movq 48(%rsi),%r10 + movq %r9,%r8 + shrq $28,%r8 + andq %rax,%r8 + movq %r8,-32(%rdi) + shrdq $57,%r10,%r9 + andq %rax,%r9 + movq %r9,-24(%rdi) + movq 56(%rsi),%r11 + movq %r10,%r9 + shrq $22,%r9 + andq %rax,%r9 + movq %r9,-16(%rdi) + shrdq $51,%r11,%r10 + andq %rax,%r10 + movq %r10,-8(%rdi) + movq 64(%rsi),%r8 + movq %r11,%r10 + shrq $16,%r10 + andq %rax,%r10 + movq %r10,0(%rdi) + shrdq $45,%r8,%r11 + andq %rax,%r11 + movq %r11,8(%rdi) + movq 72(%rsi),%r9 + movq %r8,%r11 + shrq $10,%r11 + andq %rax,%r11 + movq %r11,16(%rdi) + shrdq $39,%r9,%r8 + andq %rax,%r8 + movq %r8,24(%rdi) + movq 80(%rsi),%r10 + movq %r9,%r8 + shrq $4,%r8 + andq %rax,%r8 + movq %r8,32(%rdi) + movq %r9,%r11 + shrq $33,%r11 + andq %rax,%r11 + movq %r11,40(%rdi) + shrdq $62,%r10,%r9 + andq %rax,%r9 + movq %r9,48(%rdi) + movq 88(%rsi),%r11 + movq %r10,%r9 + shrq $27,%r9 + andq %rax,%r9 + movq %r9,56(%rdi) + shrdq $56,%r11,%r10 + andq %rax,%r10 + movq %r10,64(%rdi) + movq 96(%rsi),%r8 + movq %r11,%r10 + shrq $21,%r10 + andq %rax,%r10 + movq %r10,72(%rdi) + shrdq $50,%r8,%r11 + andq %rax,%r11 + movq %r11,80(%rdi) + movq 104(%rsi),%r9 + movq %r8,%r11 + shrq $15,%r11 + andq %rax,%r11 + movq %r11,88(%rdi) + shrdq $44,%r9,%r8 + andq %rax,%r8 + movq %r8,96(%rdi) + movq 112(%rsi),%r10 + movq %r9,%r8 + shrq $9,%r8 + andq %rax,%r8 + movq %r8,104(%rdi) + shrdq $38,%r10,%r9 + andq %rax,%r9 + movq %r9,112(%rdi) + movq 120(%rsi),%r11 + movq %r10,%r9 + shrq $3,%r9 + andq %rax,%r9 + movq %r9,120(%rdi) + movq %r10,%r8 + shrq $32,%r8 + andq %rax,%r8 + movq %r8,128(%rdi) + shrdq $61,%r11,%r10 + andq %rax,%r10 + movq %r10,136(%rdi) + xorq %r8,%r8 + movq %r11,%r10 + shrq $26,%r10 + andq %rax,%r10 + movq %r10,144(%rdi) + shrdq $55,%r8,%r11 + andq %rax,%r11 + movq %r11,152(%rdi) + movq %r8,160(%rdi) + movq %r8,168(%rdi) + movq %r8,176(%rdi) + movq %r8,184(%rdi) + .byte 0xf3,0xc3 + +.globl _rsaz_1024_scatter5_avx2 + +.p2align 5 +_rsaz_1024_scatter5_avx2: + vzeroupper + vmovdqu L$scatter_permd(%rip),%ymm5 + shll $4,%edx + leaq (%rdi,%rdx,1),%rdi + movl $9,%eax + jmp L$oop_scatter_1024 + +.p2align 5 +L$oop_scatter_1024: + vmovdqu (%rsi),%ymm0 + leaq 32(%rsi),%rsi + vpermd %ymm0,%ymm5,%ymm0 + vmovdqu %xmm0,(%rdi) + leaq 512(%rdi),%rdi + decl %eax + jnz L$oop_scatter_1024 + + vzeroupper + .byte 0xf3,0xc3 + + +.globl _rsaz_1024_gather5_avx2 + +.p2align 5 +_rsaz_1024_gather5_avx2: + leaq L$gather_table(%rip),%r11 + movl %edx,%eax + andl $3,%edx + shrl $2,%eax + shll $4,%edx + + vmovdqu -32(%r11),%ymm7 + vpbroadcastb 8(%r11,%rax,1),%xmm8 + vpbroadcastb 7(%r11,%rax,1),%xmm9 + vpbroadcastb 6(%r11,%rax,1),%xmm10 + vpbroadcastb 5(%r11,%rax,1),%xmm11 + vpbroadcastb 4(%r11,%rax,1),%xmm12 + vpbroadcastb 3(%r11,%rax,1),%xmm13 + vpbroadcastb 2(%r11,%rax,1),%xmm14 + vpbroadcastb 1(%r11,%rax,1),%xmm15 + + leaq 64(%rsi,%rdx,1),%rsi + movq $64,%r11 + movl $9,%eax + jmp L$oop_gather_1024 + +.p2align 5 +L$oop_gather_1024: + vpand -64(%rsi),%xmm8,%xmm0 + vpand (%rsi),%xmm9,%xmm1 + vpand 64(%rsi),%xmm10,%xmm2 + vpand (%rsi,%r11,2),%xmm11,%xmm3 + vpor %xmm0,%xmm1,%xmm1 + vpand 64(%rsi,%r11,2),%xmm12,%xmm4 + vpor %xmm2,%xmm3,%xmm3 + vpand (%rsi,%r11,4),%xmm13,%xmm5 + vpor %xmm1,%xmm3,%xmm3 + vpand 64(%rsi,%r11,4),%xmm14,%xmm6 + vpor %xmm4,%xmm5,%xmm5 + vpand -128(%rsi,%r11,8),%xmm15,%xmm2 + leaq (%rsi,%r11,8),%rsi + vpor %xmm3,%xmm5,%xmm5 + vpor %xmm2,%xmm6,%xmm6 + vpor %xmm5,%xmm6,%xmm6 + vpermd %ymm6,%ymm7,%ymm6 + vmovdqu %ymm6,(%rdi) + leaq 32(%rdi),%rdi + decl %eax + jnz L$oop_gather_1024 + + vpxor %ymm0,%ymm0,%ymm0 + vmovdqu %ymm0,(%rdi) + vzeroupper + .byte 0xf3,0xc3 + + +.globl _rsaz_avx2_eligible + +.p2align 5 +_rsaz_avx2_eligible: + movl _OPENSSL_ia32cap_P+8(%rip),%eax + movl $524544,%ecx + movl $0,%edx + andl %eax,%ecx + cmpl $524544,%ecx + cmovel %edx,%eax + andl $32,%eax + shrl $5,%eax + .byte 0xf3,0xc3 + + +.p2align 6 +L$and_mask: +.quad 0x1fffffff,0x1fffffff,0x1fffffff,-1 +L$scatter_permd: +.long 0,2,4,6,7,7,7,7 +L$gather_permd: +.long 0,7,1,7,2,7,3,7 +L$gather_table: +.byte 0,0,0,0,0,0,0,0, 0xff,0,0,0,0,0,0,0 +.p2align 6 diff --git a/deps/openssl/asm/x64-macosx-gas/bn/rsaz-x86_64.s b/deps/openssl/asm/x64-macosx-gas/bn/rsaz-x86_64.s new file mode 100644 index 00000000000000..23c540d3acb221 --- /dev/null +++ b/deps/openssl/asm/x64-macosx-gas/bn/rsaz-x86_64.s @@ -0,0 +1,1754 @@ +.text + + + +.globl _rsaz_512_sqr + +.p2align 5 +_rsaz_512_sqr: + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + + subq $128+24,%rsp +L$sqr_body: + movq %rdx,%rbp + movq (%rsi),%rdx + movq 8(%rsi),%rax + movq %rcx,128(%rsp) + movl $524544,%r11d + andl _OPENSSL_ia32cap_P+8(%rip),%r11d + cmpl $524544,%r11d + je L$oop_sqrx + jmp L$oop_sqr + +.p2align 5 +L$oop_sqr: + movl %r8d,128+8(%rsp) + + movq %rdx,%rbx + mulq %rdx + movq %rax,%r8 + movq 16(%rsi),%rax + movq %rdx,%r9 + + mulq %rbx + addq %rax,%r9 + movq 24(%rsi),%rax + movq %rdx,%r10 + adcq $0,%r10 + + mulq %rbx + addq %rax,%r10 + movq 32(%rsi),%rax + movq %rdx,%r11 + adcq $0,%r11 + + mulq %rbx + addq %rax,%r11 + movq 40(%rsi),%rax + movq %rdx,%r12 + adcq $0,%r12 + + mulq %rbx + addq %rax,%r12 + movq 48(%rsi),%rax + movq %rdx,%r13 + adcq $0,%r13 + + mulq %rbx + addq %rax,%r13 + movq 56(%rsi),%rax + movq %rdx,%r14 + adcq $0,%r14 + + mulq %rbx + addq %rax,%r14 + movq %rbx,%rax + movq %rdx,%r15 + adcq $0,%r15 + + addq %r8,%r8 + movq %r9,%rcx + adcq %r9,%r9 + + mulq %rax + movq %rax,(%rsp) + addq %rdx,%r8 + adcq $0,%r9 + + movq %r8,8(%rsp) + shrq $63,%rcx + + + movq 8(%rsi),%r8 + movq 16(%rsi),%rax + mulq %r8 + addq %rax,%r10 + movq 24(%rsi),%rax + movq %rdx,%rbx + adcq $0,%rbx + + mulq %r8 + addq %rax,%r11 + movq 32(%rsi),%rax + adcq $0,%rdx + addq %rbx,%r11 + movq %rdx,%rbx + adcq $0,%rbx + + mulq %r8 + addq %rax,%r12 + movq 40(%rsi),%rax + adcq $0,%rdx + addq %rbx,%r12 + movq %rdx,%rbx + adcq $0,%rbx + + mulq %r8 + addq %rax,%r13 + movq 48(%rsi),%rax + adcq $0,%rdx + addq %rbx,%r13 + movq %rdx,%rbx + adcq $0,%rbx + + mulq %r8 + addq %rax,%r14 + movq 56(%rsi),%rax + adcq $0,%rdx + addq %rbx,%r14 + movq %rdx,%rbx + adcq $0,%rbx + + mulq %r8 + addq %rax,%r15 + movq %r8,%rax + adcq $0,%rdx + addq %rbx,%r15 + movq %rdx,%r8 + movq %r10,%rdx + adcq $0,%r8 + + addq %rdx,%rdx + leaq (%rcx,%r10,2),%r10 + movq %r11,%rbx + adcq %r11,%r11 + + mulq %rax + addq %rax,%r9 + adcq %rdx,%r10 + adcq $0,%r11 + + movq %r9,16(%rsp) + movq %r10,24(%rsp) + shrq $63,%rbx + + + movq 16(%rsi),%r9 + movq 24(%rsi),%rax + mulq %r9 + addq %rax,%r12 + movq 32(%rsi),%rax + movq %rdx,%rcx + adcq $0,%rcx + + mulq %r9 + addq %rax,%r13 + movq 40(%rsi),%rax + adcq $0,%rdx + addq %rcx,%r13 + movq %rdx,%rcx + adcq $0,%rcx + + mulq %r9 + addq %rax,%r14 + movq 48(%rsi),%rax + adcq $0,%rdx + addq %rcx,%r14 + movq %rdx,%rcx + adcq $0,%rcx + + mulq %r9 + movq %r12,%r10 + leaq (%rbx,%r12,2),%r12 + addq %rax,%r15 + movq 56(%rsi),%rax + adcq $0,%rdx + addq %rcx,%r15 + movq %rdx,%rcx + adcq $0,%rcx + + mulq %r9 + shrq $63,%r10 + addq %rax,%r8 + movq %r9,%rax + adcq $0,%rdx + addq %rcx,%r8 + movq %rdx,%r9 + adcq $0,%r9 + + movq %r13,%rcx + leaq (%r10,%r13,2),%r13 + + mulq %rax + addq %rax,%r11 + adcq %rdx,%r12 + adcq $0,%r13 + + movq %r11,32(%rsp) + movq %r12,40(%rsp) + shrq $63,%rcx + + + movq 24(%rsi),%r10 + movq 32(%rsi),%rax + mulq %r10 + addq %rax,%r14 + movq 40(%rsi),%rax + movq %rdx,%rbx + adcq $0,%rbx + + mulq %r10 + addq %rax,%r15 + movq 48(%rsi),%rax + adcq $0,%rdx + addq %rbx,%r15 + movq %rdx,%rbx + adcq $0,%rbx + + mulq %r10 + movq %r14,%r12 + leaq (%rcx,%r14,2),%r14 + addq %rax,%r8 + movq 56(%rsi),%rax + adcq $0,%rdx + addq %rbx,%r8 + movq %rdx,%rbx + adcq $0,%rbx + + mulq %r10 + shrq $63,%r12 + addq %rax,%r9 + movq %r10,%rax + adcq $0,%rdx + addq %rbx,%r9 + movq %rdx,%r10 + adcq $0,%r10 + + movq %r15,%rbx + leaq (%r12,%r15,2),%r15 + + mulq %rax + addq %rax,%r13 + adcq %rdx,%r14 + adcq $0,%r15 + + movq %r13,48(%rsp) + movq %r14,56(%rsp) + shrq $63,%rbx + + + movq 32(%rsi),%r11 + movq 40(%rsi),%rax + mulq %r11 + addq %rax,%r8 + movq 48(%rsi),%rax + movq %rdx,%rcx + adcq $0,%rcx + + mulq %r11 + addq %rax,%r9 + movq 56(%rsi),%rax + adcq $0,%rdx + movq %r8,%r12 + leaq (%rbx,%r8,2),%r8 + addq %rcx,%r9 + movq %rdx,%rcx + adcq $0,%rcx + + mulq %r11 + shrq $63,%r12 + addq %rax,%r10 + movq %r11,%rax + adcq $0,%rdx + addq %rcx,%r10 + movq %rdx,%r11 + adcq $0,%r11 + + movq %r9,%rcx + leaq (%r12,%r9,2),%r9 + + mulq %rax + addq %rax,%r15 + adcq %rdx,%r8 + adcq $0,%r9 + + movq %r15,64(%rsp) + movq %r8,72(%rsp) + shrq $63,%rcx + + + movq 40(%rsi),%r12 + movq 48(%rsi),%rax + mulq %r12 + addq %rax,%r10 + movq 56(%rsi),%rax + movq %rdx,%rbx + adcq $0,%rbx + + mulq %r12 + addq %rax,%r11 + movq %r12,%rax + movq %r10,%r15 + leaq (%rcx,%r10,2),%r10 + adcq $0,%rdx + shrq $63,%r15 + addq %rbx,%r11 + movq %rdx,%r12 + adcq $0,%r12 + + movq %r11,%rbx + leaq (%r15,%r11,2),%r11 + + mulq %rax + addq %rax,%r9 + adcq %rdx,%r10 + adcq $0,%r11 + + movq %r9,80(%rsp) + movq %r10,88(%rsp) + + + movq 48(%rsi),%r13 + movq 56(%rsi),%rax + mulq %r13 + addq %rax,%r12 + movq %r13,%rax + movq %rdx,%r13 + adcq $0,%r13 + + xorq %r14,%r14 + shlq $1,%rbx + adcq %r12,%r12 + adcq %r13,%r13 + adcq %r14,%r14 + + mulq %rax + addq %rax,%r11 + adcq %rdx,%r12 + adcq $0,%r13 + + movq %r11,96(%rsp) + movq %r12,104(%rsp) + + + movq 56(%rsi),%rax + mulq %rax + addq %rax,%r13 + adcq $0,%rdx + + addq %rdx,%r14 + + movq %r13,112(%rsp) + movq %r14,120(%rsp) + + movq (%rsp),%r8 + movq 8(%rsp),%r9 + movq 16(%rsp),%r10 + movq 24(%rsp),%r11 + movq 32(%rsp),%r12 + movq 40(%rsp),%r13 + movq 48(%rsp),%r14 + movq 56(%rsp),%r15 + + call __rsaz_512_reduce + + addq 64(%rsp),%r8 + adcq 72(%rsp),%r9 + adcq 80(%rsp),%r10 + adcq 88(%rsp),%r11 + adcq 96(%rsp),%r12 + adcq 104(%rsp),%r13 + adcq 112(%rsp),%r14 + adcq 120(%rsp),%r15 + sbbq %rcx,%rcx + + call __rsaz_512_subtract + + movq %r8,%rdx + movq %r9,%rax + movl 128+8(%rsp),%r8d + movq %rdi,%rsi + + decl %r8d + jnz L$oop_sqr + jmp L$sqr_tail + +.p2align 5 +L$oop_sqrx: + movl %r8d,128+8(%rsp) +.byte 102,72,15,110,199 +.byte 102,72,15,110,205 + + mulxq %rax,%r8,%r9 + + mulxq 16(%rsi),%rcx,%r10 + xorq %rbp,%rbp + + mulxq 24(%rsi),%rax,%r11 + adcxq %rcx,%r9 + + mulxq 32(%rsi),%rcx,%r12 + adcxq %rax,%r10 + + mulxq 40(%rsi),%rax,%r13 + adcxq %rcx,%r11 + +.byte 0xc4,0x62,0xf3,0xf6,0xb6,0x30,0x00,0x00,0x00 + adcxq %rax,%r12 + adcxq %rcx,%r13 + +.byte 0xc4,0x62,0xfb,0xf6,0xbe,0x38,0x00,0x00,0x00 + adcxq %rax,%r14 + adcxq %rbp,%r15 + + movq %r9,%rcx + shldq $1,%r8,%r9 + shlq $1,%r8 + + xorl %ebp,%ebp + mulxq %rdx,%rax,%rdx + adcxq %rdx,%r8 + movq 8(%rsi),%rdx + adcxq %rbp,%r9 + + movq %rax,(%rsp) + movq %r8,8(%rsp) + + + mulxq 16(%rsi),%rax,%rbx + adoxq %rax,%r10 + adcxq %rbx,%r11 + +.byte 0xc4,0x62,0xc3,0xf6,0x86,0x18,0x00,0x00,0x00 + adoxq %rdi,%r11 + adcxq %r8,%r12 + + mulxq 32(%rsi),%rax,%rbx + adoxq %rax,%r12 + adcxq %rbx,%r13 + + mulxq 40(%rsi),%rdi,%r8 + adoxq %rdi,%r13 + adcxq %r8,%r14 + +.byte 0xc4,0xe2,0xfb,0xf6,0x9e,0x30,0x00,0x00,0x00 + adoxq %rax,%r14 + adcxq %rbx,%r15 + +.byte 0xc4,0x62,0xc3,0xf6,0x86,0x38,0x00,0x00,0x00 + adoxq %rdi,%r15 + adcxq %rbp,%r8 + adoxq %rbp,%r8 + + movq %r11,%rbx + shldq $1,%r10,%r11 + shldq $1,%rcx,%r10 + + xorl %ebp,%ebp + mulxq %rdx,%rax,%rcx + movq 16(%rsi),%rdx + adcxq %rax,%r9 + adcxq %rcx,%r10 + adcxq %rbp,%r11 + + movq %r9,16(%rsp) +.byte 0x4c,0x89,0x94,0x24,0x18,0x00,0x00,0x00 + + +.byte 0xc4,0x62,0xc3,0xf6,0x8e,0x18,0x00,0x00,0x00 + adoxq %rdi,%r12 + adcxq %r9,%r13 + + mulxq 32(%rsi),%rax,%rcx + adoxq %rax,%r13 + adcxq %rcx,%r14 + + mulxq 40(%rsi),%rdi,%r9 + adoxq %rdi,%r14 + adcxq %r9,%r15 + +.byte 0xc4,0xe2,0xfb,0xf6,0x8e,0x30,0x00,0x00,0x00 + adoxq %rax,%r15 + adcxq %rcx,%r8 + +.byte 0xc4,0x62,0xc3,0xf6,0x8e,0x38,0x00,0x00,0x00 + adoxq %rdi,%r8 + adcxq %rbp,%r9 + adoxq %rbp,%r9 + + movq %r13,%rcx + shldq $1,%r12,%r13 + shldq $1,%rbx,%r12 + + xorl %ebp,%ebp + mulxq %rdx,%rax,%rdx + adcxq %rax,%r11 + adcxq %rdx,%r12 + movq 24(%rsi),%rdx + adcxq %rbp,%r13 + + movq %r11,32(%rsp) +.byte 0x4c,0x89,0xa4,0x24,0x28,0x00,0x00,0x00 + + +.byte 0xc4,0xe2,0xfb,0xf6,0x9e,0x20,0x00,0x00,0x00 + adoxq %rax,%r14 + adcxq %rbx,%r15 + + mulxq 40(%rsi),%rdi,%r10 + adoxq %rdi,%r15 + adcxq %r10,%r8 + + mulxq 48(%rsi),%rax,%rbx + adoxq %rax,%r8 + adcxq %rbx,%r9 + + mulxq 56(%rsi),%rdi,%r10 + adoxq %rdi,%r9 + adcxq %rbp,%r10 + adoxq %rbp,%r10 + +.byte 0x66 + movq %r15,%rbx + shldq $1,%r14,%r15 + shldq $1,%rcx,%r14 + + xorl %ebp,%ebp + mulxq %rdx,%rax,%rdx + adcxq %rax,%r13 + adcxq %rdx,%r14 + movq 32(%rsi),%rdx + adcxq %rbp,%r15 + + movq %r13,48(%rsp) + movq %r14,56(%rsp) + + +.byte 0xc4,0x62,0xc3,0xf6,0x9e,0x28,0x00,0x00,0x00 + adoxq %rdi,%r8 + adcxq %r11,%r9 + + mulxq 48(%rsi),%rax,%rcx + adoxq %rax,%r9 + adcxq %rcx,%r10 + + mulxq 56(%rsi),%rdi,%r11 + adoxq %rdi,%r10 + adcxq %rbp,%r11 + adoxq %rbp,%r11 + + movq %r9,%rcx + shldq $1,%r8,%r9 + shldq $1,%rbx,%r8 + + xorl %ebp,%ebp + mulxq %rdx,%rax,%rdx + adcxq %rax,%r15 + adcxq %rdx,%r8 + movq 40(%rsi),%rdx + adcxq %rbp,%r9 + + movq %r15,64(%rsp) + movq %r8,72(%rsp) + + +.byte 0xc4,0xe2,0xfb,0xf6,0x9e,0x30,0x00,0x00,0x00 + adoxq %rax,%r10 + adcxq %rbx,%r11 + +.byte 0xc4,0x62,0xc3,0xf6,0xa6,0x38,0x00,0x00,0x00 + adoxq %rdi,%r11 + adcxq %rbp,%r12 + adoxq %rbp,%r12 + + movq %r11,%rbx + shldq $1,%r10,%r11 + shldq $1,%rcx,%r10 + + xorl %ebp,%ebp + mulxq %rdx,%rax,%rdx + adcxq %rax,%r9 + adcxq %rdx,%r10 + movq 48(%rsi),%rdx + adcxq %rbp,%r11 + + movq %r9,80(%rsp) + movq %r10,88(%rsp) + + +.byte 0xc4,0x62,0xfb,0xf6,0xae,0x38,0x00,0x00,0x00 + adoxq %rax,%r12 + adoxq %rbp,%r13 + + xorq %r14,%r14 + shldq $1,%r13,%r14 + shldq $1,%r12,%r13 + shldq $1,%rbx,%r12 + + xorl %ebp,%ebp + mulxq %rdx,%rax,%rdx + adcxq %rax,%r11 + adcxq %rdx,%r12 + movq 56(%rsi),%rdx + adcxq %rbp,%r13 + +.byte 0x4c,0x89,0x9c,0x24,0x60,0x00,0x00,0x00 +.byte 0x4c,0x89,0xa4,0x24,0x68,0x00,0x00,0x00 + + + mulxq %rdx,%rax,%rdx + adoxq %rax,%r13 + adoxq %rbp,%rdx + +.byte 0x66 + addq %rdx,%r14 + + movq %r13,112(%rsp) + movq %r14,120(%rsp) +.byte 102,72,15,126,199 +.byte 102,72,15,126,205 + + movq 128(%rsp),%rdx + movq (%rsp),%r8 + movq 8(%rsp),%r9 + movq 16(%rsp),%r10 + movq 24(%rsp),%r11 + movq 32(%rsp),%r12 + movq 40(%rsp),%r13 + movq 48(%rsp),%r14 + movq 56(%rsp),%r15 + + call __rsaz_512_reducex + + addq 64(%rsp),%r8 + adcq 72(%rsp),%r9 + adcq 80(%rsp),%r10 + adcq 88(%rsp),%r11 + adcq 96(%rsp),%r12 + adcq 104(%rsp),%r13 + adcq 112(%rsp),%r14 + adcq 120(%rsp),%r15 + sbbq %rcx,%rcx + + call __rsaz_512_subtract + + movq %r8,%rdx + movq %r9,%rax + movl 128+8(%rsp),%r8d + movq %rdi,%rsi + + decl %r8d + jnz L$oop_sqrx + +L$sqr_tail: + + leaq 128+24+48(%rsp),%rax + movq -48(%rax),%r15 + movq -40(%rax),%r14 + movq -32(%rax),%r13 + movq -24(%rax),%r12 + movq -16(%rax),%rbp + movq -8(%rax),%rbx + leaq (%rax),%rsp +L$sqr_epilogue: + .byte 0xf3,0xc3 + +.globl _rsaz_512_mul + +.p2align 5 +_rsaz_512_mul: + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + + subq $128+24,%rsp +L$mul_body: +.byte 102,72,15,110,199 +.byte 102,72,15,110,201 + movq %r8,128(%rsp) + movl $524544,%r11d + andl _OPENSSL_ia32cap_P+8(%rip),%r11d + cmpl $524544,%r11d + je L$mulx + movq (%rdx),%rbx + movq %rdx,%rbp + call __rsaz_512_mul + +.byte 102,72,15,126,199 +.byte 102,72,15,126,205 + + movq (%rsp),%r8 + movq 8(%rsp),%r9 + movq 16(%rsp),%r10 + movq 24(%rsp),%r11 + movq 32(%rsp),%r12 + movq 40(%rsp),%r13 + movq 48(%rsp),%r14 + movq 56(%rsp),%r15 + + call __rsaz_512_reduce + jmp L$mul_tail + +.p2align 5 +L$mulx: + movq %rdx,%rbp + movq (%rdx),%rdx + call __rsaz_512_mulx + +.byte 102,72,15,126,199 +.byte 102,72,15,126,205 + + movq 128(%rsp),%rdx + movq (%rsp),%r8 + movq 8(%rsp),%r9 + movq 16(%rsp),%r10 + movq 24(%rsp),%r11 + movq 32(%rsp),%r12 + movq 40(%rsp),%r13 + movq 48(%rsp),%r14 + movq 56(%rsp),%r15 + + call __rsaz_512_reducex +L$mul_tail: + addq 64(%rsp),%r8 + adcq 72(%rsp),%r9 + adcq 80(%rsp),%r10 + adcq 88(%rsp),%r11 + adcq 96(%rsp),%r12 + adcq 104(%rsp),%r13 + adcq 112(%rsp),%r14 + adcq 120(%rsp),%r15 + sbbq %rcx,%rcx + + call __rsaz_512_subtract + + leaq 128+24+48(%rsp),%rax + movq -48(%rax),%r15 + movq -40(%rax),%r14 + movq -32(%rax),%r13 + movq -24(%rax),%r12 + movq -16(%rax),%rbp + movq -8(%rax),%rbx + leaq (%rax),%rsp +L$mul_epilogue: + .byte 0xf3,0xc3 + +.globl _rsaz_512_mul_gather4 + +.p2align 5 +_rsaz_512_mul_gather4: + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + + movl %r9d,%r9d + subq $128+24,%rsp +L$mul_gather4_body: + movl $524544,%r11d + andl _OPENSSL_ia32cap_P+8(%rip),%r11d + cmpl $524544,%r11d + je L$mulx_gather + movl 64(%rdx,%r9,4),%eax +.byte 102,72,15,110,199 + movl (%rdx,%r9,4),%ebx +.byte 102,72,15,110,201 + movq %r8,128(%rsp) + + shlq $32,%rax + orq %rax,%rbx + movq (%rsi),%rax + movq 8(%rsi),%rcx + leaq 128(%rdx,%r9,4),%rbp + mulq %rbx + movq %rax,(%rsp) + movq %rcx,%rax + movq %rdx,%r8 + + mulq %rbx + movd (%rbp),%xmm4 + addq %rax,%r8 + movq 16(%rsi),%rax + movq %rdx,%r9 + adcq $0,%r9 + + mulq %rbx + movd 64(%rbp),%xmm5 + addq %rax,%r9 + movq 24(%rsi),%rax + movq %rdx,%r10 + adcq $0,%r10 + + mulq %rbx + pslldq $4,%xmm5 + addq %rax,%r10 + movq 32(%rsi),%rax + movq %rdx,%r11 + adcq $0,%r11 + + mulq %rbx + por %xmm5,%xmm4 + addq %rax,%r11 + movq 40(%rsi),%rax + movq %rdx,%r12 + adcq $0,%r12 + + mulq %rbx + addq %rax,%r12 + movq 48(%rsi),%rax + movq %rdx,%r13 + adcq $0,%r13 + + mulq %rbx + leaq 128(%rbp),%rbp + addq %rax,%r13 + movq 56(%rsi),%rax + movq %rdx,%r14 + adcq $0,%r14 + + mulq %rbx +.byte 102,72,15,126,227 + addq %rax,%r14 + movq (%rsi),%rax + movq %rdx,%r15 + adcq $0,%r15 + + leaq 8(%rsp),%rdi + movl $7,%ecx + jmp L$oop_mul_gather + +.p2align 5 +L$oop_mul_gather: + mulq %rbx + addq %rax,%r8 + movq 8(%rsi),%rax + movq %r8,(%rdi) + movq %rdx,%r8 + adcq $0,%r8 + + mulq %rbx + movd (%rbp),%xmm4 + addq %rax,%r9 + movq 16(%rsi),%rax + adcq $0,%rdx + addq %r9,%r8 + movq %rdx,%r9 + adcq $0,%r9 + + mulq %rbx + movd 64(%rbp),%xmm5 + addq %rax,%r10 + movq 24(%rsi),%rax + adcq $0,%rdx + addq %r10,%r9 + movq %rdx,%r10 + adcq $0,%r10 + + mulq %rbx + pslldq $4,%xmm5 + addq %rax,%r11 + movq 32(%rsi),%rax + adcq $0,%rdx + addq %r11,%r10 + movq %rdx,%r11 + adcq $0,%r11 + + mulq %rbx + por %xmm5,%xmm4 + addq %rax,%r12 + movq 40(%rsi),%rax + adcq $0,%rdx + addq %r12,%r11 + movq %rdx,%r12 + adcq $0,%r12 + + mulq %rbx + addq %rax,%r13 + movq 48(%rsi),%rax + adcq $0,%rdx + addq %r13,%r12 + movq %rdx,%r13 + adcq $0,%r13 + + mulq %rbx + addq %rax,%r14 + movq 56(%rsi),%rax + adcq $0,%rdx + addq %r14,%r13 + movq %rdx,%r14 + adcq $0,%r14 + + mulq %rbx +.byte 102,72,15,126,227 + addq %rax,%r15 + movq (%rsi),%rax + adcq $0,%rdx + addq %r15,%r14 + movq %rdx,%r15 + adcq $0,%r15 + + leaq 128(%rbp),%rbp + leaq 8(%rdi),%rdi + + decl %ecx + jnz L$oop_mul_gather + + movq %r8,(%rdi) + movq %r9,8(%rdi) + movq %r10,16(%rdi) + movq %r11,24(%rdi) + movq %r12,32(%rdi) + movq %r13,40(%rdi) + movq %r14,48(%rdi) + movq %r15,56(%rdi) + +.byte 102,72,15,126,199 +.byte 102,72,15,126,205 + + movq (%rsp),%r8 + movq 8(%rsp),%r9 + movq 16(%rsp),%r10 + movq 24(%rsp),%r11 + movq 32(%rsp),%r12 + movq 40(%rsp),%r13 + movq 48(%rsp),%r14 + movq 56(%rsp),%r15 + + call __rsaz_512_reduce + jmp L$mul_gather_tail + +.p2align 5 +L$mulx_gather: + movl 64(%rdx,%r9,4),%eax +.byte 102,72,15,110,199 + leaq 128(%rdx,%r9,4),%rbp + movl (%rdx,%r9,4),%edx +.byte 102,72,15,110,201 + movq %r8,128(%rsp) + + shlq $32,%rax + orq %rax,%rdx + mulxq (%rsi),%rbx,%r8 + movq %rbx,(%rsp) + xorl %edi,%edi + + mulxq 8(%rsi),%rax,%r9 + movd (%rbp),%xmm4 + + mulxq 16(%rsi),%rbx,%r10 + movd 64(%rbp),%xmm5 + adcxq %rax,%r8 + + mulxq 24(%rsi),%rax,%r11 + pslldq $4,%xmm5 + adcxq %rbx,%r9 + + mulxq 32(%rsi),%rbx,%r12 + por %xmm5,%xmm4 + adcxq %rax,%r10 + + mulxq 40(%rsi),%rax,%r13 + adcxq %rbx,%r11 + + mulxq 48(%rsi),%rbx,%r14 + leaq 128(%rbp),%rbp + adcxq %rax,%r12 + + mulxq 56(%rsi),%rax,%r15 +.byte 102,72,15,126,226 + adcxq %rbx,%r13 + adcxq %rax,%r14 + movq %r8,%rbx + adcxq %rdi,%r15 + + movq $-7,%rcx + jmp L$oop_mulx_gather + +.p2align 5 +L$oop_mulx_gather: + mulxq (%rsi),%rax,%r8 + adcxq %rax,%rbx + adoxq %r9,%r8 + + mulxq 8(%rsi),%rax,%r9 +.byte 0x66,0x0f,0x6e,0xa5,0x00,0x00,0x00,0x00 + adcxq %rax,%r8 + adoxq %r10,%r9 + + mulxq 16(%rsi),%rax,%r10 + movd 64(%rbp),%xmm5 + leaq 128(%rbp),%rbp + adcxq %rax,%r9 + adoxq %r11,%r10 + +.byte 0xc4,0x62,0xfb,0xf6,0x9e,0x18,0x00,0x00,0x00 + pslldq $4,%xmm5 + por %xmm5,%xmm4 + adcxq %rax,%r10 + adoxq %r12,%r11 + + mulxq 32(%rsi),%rax,%r12 + adcxq %rax,%r11 + adoxq %r13,%r12 + + mulxq 40(%rsi),%rax,%r13 + adcxq %rax,%r12 + adoxq %r14,%r13 + +.byte 0xc4,0x62,0xfb,0xf6,0xb6,0x30,0x00,0x00,0x00 + adcxq %rax,%r13 + adoxq %r15,%r14 + + mulxq 56(%rsi),%rax,%r15 +.byte 102,72,15,126,226 + movq %rbx,64(%rsp,%rcx,8) + adcxq %rax,%r14 + adoxq %rdi,%r15 + movq %r8,%rbx + adcxq %rdi,%r15 + + incq %rcx + jnz L$oop_mulx_gather + + movq %r8,64(%rsp) + movq %r9,64+8(%rsp) + movq %r10,64+16(%rsp) + movq %r11,64+24(%rsp) + movq %r12,64+32(%rsp) + movq %r13,64+40(%rsp) + movq %r14,64+48(%rsp) + movq %r15,64+56(%rsp) + +.byte 102,72,15,126,199 +.byte 102,72,15,126,205 + + movq 128(%rsp),%rdx + movq (%rsp),%r8 + movq 8(%rsp),%r9 + movq 16(%rsp),%r10 + movq 24(%rsp),%r11 + movq 32(%rsp),%r12 + movq 40(%rsp),%r13 + movq 48(%rsp),%r14 + movq 56(%rsp),%r15 + + call __rsaz_512_reducex + +L$mul_gather_tail: + addq 64(%rsp),%r8 + adcq 72(%rsp),%r9 + adcq 80(%rsp),%r10 + adcq 88(%rsp),%r11 + adcq 96(%rsp),%r12 + adcq 104(%rsp),%r13 + adcq 112(%rsp),%r14 + adcq 120(%rsp),%r15 + sbbq %rcx,%rcx + + call __rsaz_512_subtract + + leaq 128+24+48(%rsp),%rax + movq -48(%rax),%r15 + movq -40(%rax),%r14 + movq -32(%rax),%r13 + movq -24(%rax),%r12 + movq -16(%rax),%rbp + movq -8(%rax),%rbx + leaq (%rax),%rsp +L$mul_gather4_epilogue: + .byte 0xf3,0xc3 + +.globl _rsaz_512_mul_scatter4 + +.p2align 5 +_rsaz_512_mul_scatter4: + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + + movl %r9d,%r9d + subq $128+24,%rsp +L$mul_scatter4_body: + leaq (%r8,%r9,4),%r8 +.byte 102,72,15,110,199 +.byte 102,72,15,110,202 +.byte 102,73,15,110,208 + movq %rcx,128(%rsp) + + movq %rdi,%rbp + movl $524544,%r11d + andl _OPENSSL_ia32cap_P+8(%rip),%r11d + cmpl $524544,%r11d + je L$mulx_scatter + movq (%rdi),%rbx + call __rsaz_512_mul + +.byte 102,72,15,126,199 +.byte 102,72,15,126,205 + + movq (%rsp),%r8 + movq 8(%rsp),%r9 + movq 16(%rsp),%r10 + movq 24(%rsp),%r11 + movq 32(%rsp),%r12 + movq 40(%rsp),%r13 + movq 48(%rsp),%r14 + movq 56(%rsp),%r15 + + call __rsaz_512_reduce + jmp L$mul_scatter_tail + +.p2align 5 +L$mulx_scatter: + movq (%rdi),%rdx + call __rsaz_512_mulx + +.byte 102,72,15,126,199 +.byte 102,72,15,126,205 + + movq 128(%rsp),%rdx + movq (%rsp),%r8 + movq 8(%rsp),%r9 + movq 16(%rsp),%r10 + movq 24(%rsp),%r11 + movq 32(%rsp),%r12 + movq 40(%rsp),%r13 + movq 48(%rsp),%r14 + movq 56(%rsp),%r15 + + call __rsaz_512_reducex + +L$mul_scatter_tail: + addq 64(%rsp),%r8 + adcq 72(%rsp),%r9 + adcq 80(%rsp),%r10 + adcq 88(%rsp),%r11 + adcq 96(%rsp),%r12 + adcq 104(%rsp),%r13 + adcq 112(%rsp),%r14 + adcq 120(%rsp),%r15 +.byte 102,72,15,126,214 + sbbq %rcx,%rcx + + call __rsaz_512_subtract + + movl %r8d,0(%rsi) + shrq $32,%r8 + movl %r9d,128(%rsi) + shrq $32,%r9 + movl %r10d,256(%rsi) + shrq $32,%r10 + movl %r11d,384(%rsi) + shrq $32,%r11 + movl %r12d,512(%rsi) + shrq $32,%r12 + movl %r13d,640(%rsi) + shrq $32,%r13 + movl %r14d,768(%rsi) + shrq $32,%r14 + movl %r15d,896(%rsi) + shrq $32,%r15 + movl %r8d,64(%rsi) + movl %r9d,192(%rsi) + movl %r10d,320(%rsi) + movl %r11d,448(%rsi) + movl %r12d,576(%rsi) + movl %r13d,704(%rsi) + movl %r14d,832(%rsi) + movl %r15d,960(%rsi) + + leaq 128+24+48(%rsp),%rax + movq -48(%rax),%r15 + movq -40(%rax),%r14 + movq -32(%rax),%r13 + movq -24(%rax),%r12 + movq -16(%rax),%rbp + movq -8(%rax),%rbx + leaq (%rax),%rsp +L$mul_scatter4_epilogue: + .byte 0xf3,0xc3 + +.globl _rsaz_512_mul_by_one + +.p2align 5 +_rsaz_512_mul_by_one: + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + + subq $128+24,%rsp +L$mul_by_one_body: + movl _OPENSSL_ia32cap_P+8(%rip),%eax + movq %rdx,%rbp + movq %rcx,128(%rsp) + + movq (%rsi),%r8 + pxor %xmm0,%xmm0 + movq 8(%rsi),%r9 + movq 16(%rsi),%r10 + movq 24(%rsi),%r11 + movq 32(%rsi),%r12 + movq 40(%rsi),%r13 + movq 48(%rsi),%r14 + movq 56(%rsi),%r15 + + movdqa %xmm0,(%rsp) + movdqa %xmm0,16(%rsp) + movdqa %xmm0,32(%rsp) + movdqa %xmm0,48(%rsp) + movdqa %xmm0,64(%rsp) + movdqa %xmm0,80(%rsp) + movdqa %xmm0,96(%rsp) + andl $524544,%eax + cmpl $524544,%eax + je L$by_one_callx + call __rsaz_512_reduce + jmp L$by_one_tail +.p2align 5 +L$by_one_callx: + movq 128(%rsp),%rdx + call __rsaz_512_reducex +L$by_one_tail: + movq %r8,(%rdi) + movq %r9,8(%rdi) + movq %r10,16(%rdi) + movq %r11,24(%rdi) + movq %r12,32(%rdi) + movq %r13,40(%rdi) + movq %r14,48(%rdi) + movq %r15,56(%rdi) + + leaq 128+24+48(%rsp),%rax + movq -48(%rax),%r15 + movq -40(%rax),%r14 + movq -32(%rax),%r13 + movq -24(%rax),%r12 + movq -16(%rax),%rbp + movq -8(%rax),%rbx + leaq (%rax),%rsp +L$mul_by_one_epilogue: + .byte 0xf3,0xc3 + + +.p2align 5 +__rsaz_512_reduce: + movq %r8,%rbx + imulq 128+8(%rsp),%rbx + movq 0(%rbp),%rax + movl $8,%ecx + jmp L$reduction_loop + +.p2align 5 +L$reduction_loop: + mulq %rbx + movq 8(%rbp),%rax + negq %r8 + movq %rdx,%r8 + adcq $0,%r8 + + mulq %rbx + addq %rax,%r9 + movq 16(%rbp),%rax + adcq $0,%rdx + addq %r9,%r8 + movq %rdx,%r9 + adcq $0,%r9 + + mulq %rbx + addq %rax,%r10 + movq 24(%rbp),%rax + adcq $0,%rdx + addq %r10,%r9 + movq %rdx,%r10 + adcq $0,%r10 + + mulq %rbx + addq %rax,%r11 + movq 32(%rbp),%rax + adcq $0,%rdx + addq %r11,%r10 + movq 128+8(%rsp),%rsi + + + adcq $0,%rdx + movq %rdx,%r11 + + mulq %rbx + addq %rax,%r12 + movq 40(%rbp),%rax + adcq $0,%rdx + imulq %r8,%rsi + addq %r12,%r11 + movq %rdx,%r12 + adcq $0,%r12 + + mulq %rbx + addq %rax,%r13 + movq 48(%rbp),%rax + adcq $0,%rdx + addq %r13,%r12 + movq %rdx,%r13 + adcq $0,%r13 + + mulq %rbx + addq %rax,%r14 + movq 56(%rbp),%rax + adcq $0,%rdx + addq %r14,%r13 + movq %rdx,%r14 + adcq $0,%r14 + + mulq %rbx + movq %rsi,%rbx + addq %rax,%r15 + movq 0(%rbp),%rax + adcq $0,%rdx + addq %r15,%r14 + movq %rdx,%r15 + adcq $0,%r15 + + decl %ecx + jne L$reduction_loop + + .byte 0xf3,0xc3 + + +.p2align 5 +__rsaz_512_reducex: + + imulq %r8,%rdx + xorq %rsi,%rsi + movl $8,%ecx + jmp L$reduction_loopx + +.p2align 5 +L$reduction_loopx: + movq %r8,%rbx + mulxq 0(%rbp),%rax,%r8 + adcxq %rbx,%rax + adoxq %r9,%r8 + + mulxq 8(%rbp),%rax,%r9 + adcxq %rax,%r8 + adoxq %r10,%r9 + + mulxq 16(%rbp),%rbx,%r10 + adcxq %rbx,%r9 + adoxq %r11,%r10 + + mulxq 24(%rbp),%rbx,%r11 + adcxq %rbx,%r10 + adoxq %r12,%r11 + +.byte 0xc4,0x62,0xe3,0xf6,0xa5,0x20,0x00,0x00,0x00 + movq %rdx,%rax + movq %r8,%rdx + adcxq %rbx,%r11 + adoxq %r13,%r12 + + mulxq 128+8(%rsp),%rbx,%rdx + movq %rax,%rdx + + mulxq 40(%rbp),%rax,%r13 + adcxq %rax,%r12 + adoxq %r14,%r13 + +.byte 0xc4,0x62,0xfb,0xf6,0xb5,0x30,0x00,0x00,0x00 + adcxq %rax,%r13 + adoxq %r15,%r14 + + mulxq 56(%rbp),%rax,%r15 + movq %rbx,%rdx + adcxq %rax,%r14 + adoxq %rsi,%r15 + adcxq %rsi,%r15 + + decl %ecx + jne L$reduction_loopx + + .byte 0xf3,0xc3 + + +.p2align 5 +__rsaz_512_subtract: + movq %r8,(%rdi) + movq %r9,8(%rdi) + movq %r10,16(%rdi) + movq %r11,24(%rdi) + movq %r12,32(%rdi) + movq %r13,40(%rdi) + movq %r14,48(%rdi) + movq %r15,56(%rdi) + + movq 0(%rbp),%r8 + movq 8(%rbp),%r9 + negq %r8 + notq %r9 + andq %rcx,%r8 + movq 16(%rbp),%r10 + andq %rcx,%r9 + notq %r10 + movq 24(%rbp),%r11 + andq %rcx,%r10 + notq %r11 + movq 32(%rbp),%r12 + andq %rcx,%r11 + notq %r12 + movq 40(%rbp),%r13 + andq %rcx,%r12 + notq %r13 + movq 48(%rbp),%r14 + andq %rcx,%r13 + notq %r14 + movq 56(%rbp),%r15 + andq %rcx,%r14 + notq %r15 + andq %rcx,%r15 + + addq (%rdi),%r8 + adcq 8(%rdi),%r9 + adcq 16(%rdi),%r10 + adcq 24(%rdi),%r11 + adcq 32(%rdi),%r12 + adcq 40(%rdi),%r13 + adcq 48(%rdi),%r14 + adcq 56(%rdi),%r15 + + movq %r8,(%rdi) + movq %r9,8(%rdi) + movq %r10,16(%rdi) + movq %r11,24(%rdi) + movq %r12,32(%rdi) + movq %r13,40(%rdi) + movq %r14,48(%rdi) + movq %r15,56(%rdi) + + .byte 0xf3,0xc3 + + +.p2align 5 +__rsaz_512_mul: + leaq 8(%rsp),%rdi + + movq (%rsi),%rax + mulq %rbx + movq %rax,(%rdi) + movq 8(%rsi),%rax + movq %rdx,%r8 + + mulq %rbx + addq %rax,%r8 + movq 16(%rsi),%rax + movq %rdx,%r9 + adcq $0,%r9 + + mulq %rbx + addq %rax,%r9 + movq 24(%rsi),%rax + movq %rdx,%r10 + adcq $0,%r10 + + mulq %rbx + addq %rax,%r10 + movq 32(%rsi),%rax + movq %rdx,%r11 + adcq $0,%r11 + + mulq %rbx + addq %rax,%r11 + movq 40(%rsi),%rax + movq %rdx,%r12 + adcq $0,%r12 + + mulq %rbx + addq %rax,%r12 + movq 48(%rsi),%rax + movq %rdx,%r13 + adcq $0,%r13 + + mulq %rbx + addq %rax,%r13 + movq 56(%rsi),%rax + movq %rdx,%r14 + adcq $0,%r14 + + mulq %rbx + addq %rax,%r14 + movq (%rsi),%rax + movq %rdx,%r15 + adcq $0,%r15 + + leaq 8(%rbp),%rbp + leaq 8(%rdi),%rdi + + movl $7,%ecx + jmp L$oop_mul + +.p2align 5 +L$oop_mul: + movq (%rbp),%rbx + mulq %rbx + addq %rax,%r8 + movq 8(%rsi),%rax + movq %r8,(%rdi) + movq %rdx,%r8 + adcq $0,%r8 + + mulq %rbx + addq %rax,%r9 + movq 16(%rsi),%rax + adcq $0,%rdx + addq %r9,%r8 + movq %rdx,%r9 + adcq $0,%r9 + + mulq %rbx + addq %rax,%r10 + movq 24(%rsi),%rax + adcq $0,%rdx + addq %r10,%r9 + movq %rdx,%r10 + adcq $0,%r10 + + mulq %rbx + addq %rax,%r11 + movq 32(%rsi),%rax + adcq $0,%rdx + addq %r11,%r10 + movq %rdx,%r11 + adcq $0,%r11 + + mulq %rbx + addq %rax,%r12 + movq 40(%rsi),%rax + adcq $0,%rdx + addq %r12,%r11 + movq %rdx,%r12 + adcq $0,%r12 + + mulq %rbx + addq %rax,%r13 + movq 48(%rsi),%rax + adcq $0,%rdx + addq %r13,%r12 + movq %rdx,%r13 + adcq $0,%r13 + + mulq %rbx + addq %rax,%r14 + movq 56(%rsi),%rax + adcq $0,%rdx + addq %r14,%r13 + movq %rdx,%r14 + leaq 8(%rbp),%rbp + adcq $0,%r14 + + mulq %rbx + addq %rax,%r15 + movq (%rsi),%rax + adcq $0,%rdx + addq %r15,%r14 + movq %rdx,%r15 + adcq $0,%r15 + + leaq 8(%rdi),%rdi + + decl %ecx + jnz L$oop_mul + + movq %r8,(%rdi) + movq %r9,8(%rdi) + movq %r10,16(%rdi) + movq %r11,24(%rdi) + movq %r12,32(%rdi) + movq %r13,40(%rdi) + movq %r14,48(%rdi) + movq %r15,56(%rdi) + + .byte 0xf3,0xc3 + + +.p2align 5 +__rsaz_512_mulx: + mulxq (%rsi),%rbx,%r8 + movq $-6,%rcx + + mulxq 8(%rsi),%rax,%r9 + movq %rbx,8(%rsp) + + mulxq 16(%rsi),%rbx,%r10 + adcq %rax,%r8 + + mulxq 24(%rsi),%rax,%r11 + adcq %rbx,%r9 + + mulxq 32(%rsi),%rbx,%r12 + adcq %rax,%r10 + + mulxq 40(%rsi),%rax,%r13 + adcq %rbx,%r11 + + mulxq 48(%rsi),%rbx,%r14 + adcq %rax,%r12 + + mulxq 56(%rsi),%rax,%r15 + movq 8(%rbp),%rdx + adcq %rbx,%r13 + adcq %rax,%r14 + adcq $0,%r15 + + xorq %rdi,%rdi + jmp L$oop_mulx + +.p2align 5 +L$oop_mulx: + movq %r8,%rbx + mulxq (%rsi),%rax,%r8 + adcxq %rax,%rbx + adoxq %r9,%r8 + + mulxq 8(%rsi),%rax,%r9 + adcxq %rax,%r8 + adoxq %r10,%r9 + + mulxq 16(%rsi),%rax,%r10 + adcxq %rax,%r9 + adoxq %r11,%r10 + + mulxq 24(%rsi),%rax,%r11 + adcxq %rax,%r10 + adoxq %r12,%r11 + +.byte 0x3e,0xc4,0x62,0xfb,0xf6,0xa6,0x20,0x00,0x00,0x00 + adcxq %rax,%r11 + adoxq %r13,%r12 + + mulxq 40(%rsi),%rax,%r13 + adcxq %rax,%r12 + adoxq %r14,%r13 + + mulxq 48(%rsi),%rax,%r14 + adcxq %rax,%r13 + adoxq %r15,%r14 + + mulxq 56(%rsi),%rax,%r15 + movq 64(%rbp,%rcx,8),%rdx + movq %rbx,8+64-8(%rsp,%rcx,8) + adcxq %rax,%r14 + adoxq %rdi,%r15 + adcxq %rdi,%r15 + + incq %rcx + jnz L$oop_mulx + + movq %r8,%rbx + mulxq (%rsi),%rax,%r8 + adcxq %rax,%rbx + adoxq %r9,%r8 + +.byte 0xc4,0x62,0xfb,0xf6,0x8e,0x08,0x00,0x00,0x00 + adcxq %rax,%r8 + adoxq %r10,%r9 + +.byte 0xc4,0x62,0xfb,0xf6,0x96,0x10,0x00,0x00,0x00 + adcxq %rax,%r9 + adoxq %r11,%r10 + + mulxq 24(%rsi),%rax,%r11 + adcxq %rax,%r10 + adoxq %r12,%r11 + + mulxq 32(%rsi),%rax,%r12 + adcxq %rax,%r11 + adoxq %r13,%r12 + + mulxq 40(%rsi),%rax,%r13 + adcxq %rax,%r12 + adoxq %r14,%r13 + +.byte 0xc4,0x62,0xfb,0xf6,0xb6,0x30,0x00,0x00,0x00 + adcxq %rax,%r13 + adoxq %r15,%r14 + +.byte 0xc4,0x62,0xfb,0xf6,0xbe,0x38,0x00,0x00,0x00 + adcxq %rax,%r14 + adoxq %rdi,%r15 + adcxq %rdi,%r15 + + movq %rbx,8+64-8(%rsp) + movq %r8,8+64(%rsp) + movq %r9,8+64+8(%rsp) + movq %r10,8+64+16(%rsp) + movq %r11,8+64+24(%rsp) + movq %r12,8+64+32(%rsp) + movq %r13,8+64+40(%rsp) + movq %r14,8+64+48(%rsp) + movq %r15,8+64+56(%rsp) + + .byte 0xf3,0xc3 + +.globl _rsaz_512_scatter4 + +.p2align 4 +_rsaz_512_scatter4: + leaq (%rdi,%rdx,4),%rdi + movl $8,%r9d + jmp L$oop_scatter +.p2align 4 +L$oop_scatter: + movq (%rsi),%rax + leaq 8(%rsi),%rsi + movl %eax,(%rdi) + shrq $32,%rax + movl %eax,64(%rdi) + leaq 128(%rdi),%rdi + decl %r9d + jnz L$oop_scatter + .byte 0xf3,0xc3 + + +.globl _rsaz_512_gather4 + +.p2align 4 +_rsaz_512_gather4: + leaq (%rsi,%rdx,4),%rsi + movl $8,%r9d + jmp L$oop_gather +.p2align 4 +L$oop_gather: + movl (%rsi),%eax + movl 64(%rsi),%r8d + leaq 128(%rsi),%rsi + shlq $32,%r8 + orq %r8,%rax + movq %rax,(%rdi) + leaq 8(%rdi),%rdi + decl %r9d + jnz L$oop_gather + .byte 0xf3,0xc3 diff --git a/deps/openssl/asm/x64-macosx-gas/bn/x86_64-gf2m.s b/deps/openssl/asm/x64-macosx-gas/bn/x86_64-gf2m.s index 59268018c44008..040c324c49a753 100644 --- a/deps/openssl/asm/x64-macosx-gas/bn/x86_64-gf2m.s +++ b/deps/openssl/asm/x64-macosx-gas/bn/x86_64-gf2m.s @@ -1,7 +1,6 @@ .text - .p2align 4 _mul_1x1: subq $128+8,%rsp @@ -247,14 +246,12 @@ L$body_mul_2x2: movq %rsi,%rax movq %rcx,%rbp call _mul_1x1 - movq %rax,16(%rsp) movq %rdx,24(%rsp) movq 48(%rsp),%rax movq 64(%rsp),%rbp call _mul_1x1 - movq %rax,0(%rsp) movq %rdx,8(%rsp) @@ -263,7 +260,6 @@ L$body_mul_2x2: xorq 48(%rsp),%rax xorq 64(%rsp),%rbp call _mul_1x1 - movq 0(%rsp),%rbx movq 8(%rsp),%rcx movq 16(%rsp),%rdi diff --git a/deps/openssl/asm/x64-macosx-gas/bn/x86_64-mont.s b/deps/openssl/asm/x64-macosx-gas/bn/x86_64-mont.s index ece106c49842d7..03b9c7d949de52 100644 --- a/deps/openssl/asm/x64-macosx-gas/bn/x86_64-mont.s +++ b/deps/openssl/asm/x64-macosx-gas/bn/x86_64-mont.s @@ -1,6 +1,7 @@ .text + .globl _bn_mul_mont .p2align 4 @@ -9,9 +10,12 @@ _bn_mul_mont: jnz L$mul_enter cmpl $8,%r9d jb L$mul_enter + movl _OPENSSL_ia32cap_P+8(%rip),%r11d cmpq %rsi,%rdx jne L$mul4x_enter - jmp L$sqr4x_enter + testl $7,%r9d + jz L$sqr8x_enter + jmp L$mul4x_enter .p2align 4 L$mul_enter: @@ -164,7 +168,7 @@ L$inner_enter: leaq 1(%r14),%r14 cmpq %r9,%r14 - jl L$outer + jb L$outer xorq %r14,%r14 movq (%rsp),%rax @@ -212,6 +216,9 @@ L$mul_epilogue: .p2align 4 bn_mul4x_mont: L$mul4x_enter: + andl $524544,%r11d + cmpl $524544,%r11d + je L$mulx4x_enter pushq %rbx pushq %rbp pushq %r12 @@ -330,7 +337,7 @@ L$1st4x: movq %rdi,-32(%rsp,%r15,8) movq %rdx,%r13 cmpq %r9,%r15 - jl L$1st4x + jb L$1st4x mulq %rbx addq %rax,%r10 @@ -478,7 +485,7 @@ L$inner4x: movq %rdi,-32(%rsp,%r15,8) movq %rdx,%r13 cmpq %r9,%r15 - jl L$inner4x + jb L$inner4x mulq %rbx addq %rax,%r10 @@ -524,7 +531,7 @@ L$inner4x: movq %rdi,(%rsp,%r15,8) cmpq %r9,%r14 - jl L$outer4x + jb L$outer4x movq 16(%rsp,%r9,8),%rdi movq 0(%rsp),%rax pxor %xmm0,%xmm0 @@ -607,9 +614,13 @@ L$mul4x_epilogue: .byte 0xf3,0xc3 -.p2align 4 -bn_sqr4x_mont: -L$sqr4x_enter: + + + +.p2align 5 +bn_sqr8x_mont: +L$sqr8x_enter: + movq %rsp,%rax pushq %rbx pushq %rbp pushq %r12 @@ -617,758 +628,427 @@ L$sqr4x_enter: pushq %r14 pushq %r15 + movl %r9d,%r10d shll $3,%r9d - xorq %r10,%r10 - movq %rsp,%r11 - subq %r9,%r10 - movq (%r8),%r8 - leaq -72(%rsp,%r10,2),%rsp - andq $-1024,%rsp - - - - - - - - - - - - movq %rdi,32(%rsp) - movq %rcx,40(%rsp) - movq %r8,48(%rsp) - movq %r11,56(%rsp) -L$sqr4x_body: - - - - - - - - leaq 32(%r10),%rbp - leaq (%rsi,%r9,1),%rsi - - movq %r9,%rcx - - - movq -32(%rsi,%rbp,1),%r14 - leaq 64(%rsp,%r9,2),%rdi - movq -24(%rsi,%rbp,1),%rax - leaq -32(%rdi,%rbp,1),%rdi - movq -16(%rsi,%rbp,1),%rbx - movq %rax,%r15 - - mulq %r14 - movq %rax,%r10 - movq %rbx,%rax - movq %rdx,%r11 - movq %r10,-24(%rdi,%rbp,1) - - xorq %r10,%r10 - mulq %r14 - addq %rax,%r11 - movq %rbx,%rax - adcq %rdx,%r10 - movq %r11,-16(%rdi,%rbp,1) - - leaq -16(%rbp),%rcx - - - movq 8(%rsi,%rcx,1),%rbx - mulq %r15 - movq %rax,%r12 - movq %rbx,%rax - movq %rdx,%r13 - - xorq %r11,%r11 - addq %r12,%r10 - leaq 16(%rcx),%rcx - adcq $0,%r11 - mulq %r14 - addq %rax,%r10 - movq %rbx,%rax - adcq %rdx,%r11 - movq %r10,-8(%rdi,%rcx,1) - jmp L$sqr4x_1st - -.p2align 4 -L$sqr4x_1st: - movq (%rsi,%rcx,1),%rbx - xorq %r12,%r12 - mulq %r15 - addq %rax,%r13 - movq %rbx,%rax - adcq %rdx,%r12 - - xorq %r10,%r10 - addq %r13,%r11 - adcq $0,%r10 - mulq %r14 - addq %rax,%r11 - movq %rbx,%rax - adcq %rdx,%r10 - movq %r11,(%rdi,%rcx,1) + shlq $3+2,%r10 + negq %r9 - movq 8(%rsi,%rcx,1),%rbx - xorq %r13,%r13 - mulq %r15 - addq %rax,%r12 - movq %rbx,%rax - adcq %rdx,%r13 - xorq %r11,%r11 - addq %r12,%r10 - adcq $0,%r11 - mulq %r14 - addq %rax,%r10 - movq %rbx,%rax - adcq %rdx,%r11 - movq %r10,8(%rdi,%rcx,1) - movq 16(%rsi,%rcx,1),%rbx - xorq %r12,%r12 - mulq %r15 - addq %rax,%r13 - movq %rbx,%rax - adcq %rdx,%r12 - - xorq %r10,%r10 - addq %r13,%r11 - adcq $0,%r10 - mulq %r14 - addq %rax,%r11 - movq %rbx,%rax - adcq %rdx,%r10 - movq %r11,16(%rdi,%rcx,1) - movq 24(%rsi,%rcx,1),%rbx - xorq %r13,%r13 - mulq %r15 - addq %rax,%r12 - movq %rbx,%rax - adcq %rdx,%r13 - - xorq %r11,%r11 - addq %r12,%r10 + leaq -64(%rsp,%r9,4),%r11 + movq (%r8),%r8 + subq %rsi,%r11 + andq $4095,%r11 + cmpq %r11,%r10 + jb L$sqr8x_sp_alt + subq %r11,%rsp + leaq -64(%rsp,%r9,4),%rsp + jmp L$sqr8x_sp_done + +.p2align 5 +L$sqr8x_sp_alt: + leaq 4096-64(,%r9,4),%r10 + leaq -64(%rsp,%r9,4),%rsp + subq %r10,%r11 + movq $0,%r10 + cmovcq %r10,%r11 + subq %r11,%rsp +L$sqr8x_sp_done: + andq $-64,%rsp + movq %r9,%r10 + negq %r9 + + leaq 64(%rsp,%r9,2),%r11 + movq %r8,32(%rsp) + movq %rax,40(%rsp) +L$sqr8x_body: + + movq %r9,%rbp +.byte 102,73,15,110,211 + shrq $3+2,%rbp + movl _OPENSSL_ia32cap_P+8(%rip),%eax + jmp L$sqr8x_copy_n + +.p2align 5 +L$sqr8x_copy_n: + movq 0(%rcx),%xmm0 + movq 8(%rcx),%xmm1 + movq 16(%rcx),%xmm3 + movq 24(%rcx),%xmm4 leaq 32(%rcx),%rcx - adcq $0,%r11 - mulq %r14 - addq %rax,%r10 - movq %rbx,%rax - adcq %rdx,%r11 - movq %r10,-8(%rdi,%rcx,1) - - cmpq $0,%rcx - jne L$sqr4x_1st + movdqa %xmm0,0(%r11) + movdqa %xmm1,16(%r11) + movdqa %xmm3,32(%r11) + movdqa %xmm4,48(%r11) + leaq 64(%r11),%r11 + decq %rbp + jnz L$sqr8x_copy_n - xorq %r12,%r12 - addq %r11,%r13 - adcq $0,%r12 - mulq %r15 - addq %rax,%r13 - adcq %rdx,%r12 - - movq %r13,(%rdi) - leaq 16(%rbp),%rbp - movq %r12,8(%rdi) - jmp L$sqr4x_outer - -.p2align 4 -L$sqr4x_outer: - movq -32(%rsi,%rbp,1),%r14 - leaq 64(%rsp,%r9,2),%rdi - movq -24(%rsi,%rbp,1),%rax - leaq -32(%rdi,%rbp,1),%rdi - movq -16(%rsi,%rbp,1),%rbx - movq %rax,%r15 - - movq -24(%rdi,%rbp,1),%r10 - xorq %r11,%r11 - mulq %r14 - addq %rax,%r10 - movq %rbx,%rax - adcq %rdx,%r11 - movq %r10,-24(%rdi,%rbp,1) + pxor %xmm0,%xmm0 +.byte 102,72,15,110,207 +.byte 102,73,15,110,218 + andl $524544,%eax + cmpl $524544,%eax + jne L$sqr8x_nox - xorq %r10,%r10 - addq -16(%rdi,%rbp,1),%r11 - adcq $0,%r10 - mulq %r14 - addq %rax,%r11 - movq %rbx,%rax - adcq %rdx,%r10 - movq %r11,-16(%rdi,%rbp,1) + call _bn_sqrx8x_internal - leaq -16(%rbp),%rcx - xorq %r12,%r12 + pxor %xmm0,%xmm0 + leaq 48(%rsp),%rax + leaq 64(%rsp,%r9,2),%rdx + shrq $3+2,%r9 + movq 40(%rsp),%rsi + jmp L$sqr8x_zero +.p2align 5 +L$sqr8x_nox: + call _bn_sqr8x_internal - movq 8(%rsi,%rcx,1),%rbx - xorq %r13,%r13 - addq 8(%rdi,%rcx,1),%r12 - adcq $0,%r13 - mulq %r15 - addq %rax,%r12 - movq %rbx,%rax - adcq %rdx,%r13 + pxor %xmm0,%xmm0 + leaq 48(%rsp),%rax + leaq 64(%rsp,%r9,2),%rdx + shrq $3+2,%r9 + movq 40(%rsp),%rsi + jmp L$sqr8x_zero + +.p2align 5 +L$sqr8x_zero: + movdqa %xmm0,0(%rax) + movdqa %xmm0,16(%rax) + movdqa %xmm0,32(%rax) + movdqa %xmm0,48(%rax) + leaq 64(%rax),%rax + movdqa %xmm0,0(%rdx) + movdqa %xmm0,16(%rdx) + movdqa %xmm0,32(%rdx) + movdqa %xmm0,48(%rdx) + leaq 64(%rdx),%rdx + decq %r9 + jnz L$sqr8x_zero - xorq %r11,%r11 - addq %r12,%r10 - adcq $0,%r11 - mulq %r14 - addq %rax,%r10 - movq %rbx,%rax - adcq %rdx,%r11 - movq %r10,8(%rdi,%rcx,1) + movq $1,%rax + movq -48(%rsi),%r15 + movq -40(%rsi),%r14 + movq -32(%rsi),%r13 + movq -24(%rsi),%r12 + movq -16(%rsi),%rbp + movq -8(%rsi),%rbx + leaq (%rsi),%rsp +L$sqr8x_epilogue: + .byte 0xf3,0xc3 - leaq 16(%rcx),%rcx - jmp L$sqr4x_inner -.p2align 4 -L$sqr4x_inner: - movq (%rsi,%rcx,1),%rbx - xorq %r12,%r12 - addq (%rdi,%rcx,1),%r13 - adcq $0,%r12 - mulq %r15 - addq %rax,%r13 - movq %rbx,%rax - adcq %rdx,%r12 +.p2align 5 +bn_mulx4x_mont: +L$mulx4x_enter: + movq %rsp,%rax + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + shll $3,%r9d +.byte 0x67 xorq %r10,%r10 - addq %r13,%r11 - adcq $0,%r10 - mulq %r14 - addq %rax,%r11 - movq %rbx,%rax - adcq %rdx,%r10 - movq %r11,(%rdi,%rcx,1) - - movq 8(%rsi,%rcx,1),%rbx - xorq %r13,%r13 - addq 8(%rdi,%rcx,1),%r12 - adcq $0,%r13 - mulq %r15 - addq %rax,%r12 - movq %rbx,%rax - adcq %rdx,%r13 - - xorq %r11,%r11 - addq %r12,%r10 - leaq 16(%rcx),%rcx - adcq $0,%r11 - mulq %r14 - addq %rax,%r10 - movq %rbx,%rax - adcq %rdx,%r11 - movq %r10,-8(%rdi,%rcx,1) - - cmpq $0,%rcx - jne L$sqr4x_inner - - xorq %r12,%r12 - addq %r11,%r13 - adcq $0,%r12 - mulq %r15 - addq %rax,%r13 - adcq %rdx,%r12 - - movq %r13,(%rdi) - movq %r12,8(%rdi) + subq %r9,%r10 + movq (%r8),%r8 + leaq -72(%rsp,%r10,1),%rsp + leaq (%rdx,%r9,1),%r10 + andq $-128,%rsp - addq $16,%rbp - jnz L$sqr4x_outer - movq -32(%rsi),%r14 - leaq 64(%rsp,%r9,2),%rdi - movq -24(%rsi),%rax - leaq -32(%rdi,%rbp,1),%rdi - movq -16(%rsi),%rbx - movq %rax,%r15 - xorq %r11,%r11 - mulq %r14 - addq %rax,%r10 - movq %rbx,%rax - adcq %rdx,%r11 - movq %r10,-24(%rdi) - xorq %r10,%r10 - addq %r13,%r11 - adcq $0,%r10 - mulq %r14 - addq %rax,%r11 - movq %rbx,%rax - adcq %rdx,%r10 - movq %r11,-16(%rdi) - movq -8(%rsi),%rbx - mulq %r15 - addq %rax,%r12 - movq %rbx,%rax - adcq $0,%rdx - xorq %r11,%r11 - addq %r12,%r10 - movq %rdx,%r13 - adcq $0,%r11 - mulq %r14 - addq %rax,%r10 - movq %rbx,%rax - adcq %rdx,%r11 - movq %r10,-8(%rdi) - xorq %r12,%r12 - addq %r11,%r13 - adcq $0,%r12 - mulq %r15 - addq %rax,%r13 - movq -16(%rsi),%rax - adcq %rdx,%r12 - movq %r13,(%rdi) - movq %r12,8(%rdi) - mulq %rbx - addq $16,%rbp - xorq %r14,%r14 - subq %r9,%rbp - xorq %r15,%r15 - - addq %r12,%rax - adcq $0,%rdx - movq %rax,8(%rdi) - movq %rdx,16(%rdi) - movq %r15,24(%rdi) - movq -16(%rsi,%rbp,1),%rax - leaq 64(%rsp,%r9,2),%rdi - xorq %r10,%r10 - movq -24(%rdi,%rbp,2),%r11 - - leaq (%r14,%r10,2),%r12 - shrq $63,%r10 - leaq (%rcx,%r11,2),%r13 - shrq $63,%r11 - orq %r10,%r13 - movq -16(%rdi,%rbp,2),%r10 - movq %r11,%r14 - mulq %rax - negq %r15 - movq -8(%rdi,%rbp,2),%r11 - adcq %rax,%r12 - movq -8(%rsi,%rbp,1),%rax - movq %r12,-32(%rdi,%rbp,2) - adcq %rdx,%r13 - - leaq (%r14,%r10,2),%rbx - movq %r13,-24(%rdi,%rbp,2) - sbbq %r15,%r15 - shrq $63,%r10 - leaq (%rcx,%r11,2),%r8 - shrq $63,%r11 - orq %r10,%r8 - movq 0(%rdi,%rbp,2),%r10 - movq %r11,%r14 - mulq %rax - negq %r15 - movq 8(%rdi,%rbp,2),%r11 - adcq %rax,%rbx - movq 0(%rsi,%rbp,1),%rax - movq %rbx,-16(%rdi,%rbp,2) - adcq %rdx,%r8 - leaq 16(%rbp),%rbp - movq %r8,-40(%rdi,%rbp,2) - sbbq %r15,%r15 - jmp L$sqr4x_shift_n_add -.p2align 4 -L$sqr4x_shift_n_add: - leaq (%r14,%r10,2),%r12 - shrq $63,%r10 - leaq (%rcx,%r11,2),%r13 - shrq $63,%r11 - orq %r10,%r13 - movq -16(%rdi,%rbp,2),%r10 - movq %r11,%r14 - mulq %rax - negq %r15 - movq -8(%rdi,%rbp,2),%r11 - adcq %rax,%r12 - movq -8(%rsi,%rbp,1),%rax - movq %r12,-32(%rdi,%rbp,2) - adcq %rdx,%r13 - - leaq (%r14,%r10,2),%rbx - movq %r13,-24(%rdi,%rbp,2) - sbbq %r15,%r15 - shrq $63,%r10 - leaq (%rcx,%r11,2),%r8 - shrq $63,%r11 - orq %r10,%r8 - movq 0(%rdi,%rbp,2),%r10 - movq %r11,%r14 - mulq %rax - negq %r15 - movq 8(%rdi,%rbp,2),%r11 - adcq %rax,%rbx - movq 0(%rsi,%rbp,1),%rax - movq %rbx,-16(%rdi,%rbp,2) - adcq %rdx,%r8 - - leaq (%r14,%r10,2),%r12 - movq %r8,-8(%rdi,%rbp,2) - sbbq %r15,%r15 - shrq $63,%r10 - leaq (%rcx,%r11,2),%r13 - shrq $63,%r11 - orq %r10,%r13 - movq 16(%rdi,%rbp,2),%r10 - movq %r11,%r14 - mulq %rax - negq %r15 - movq 24(%rdi,%rbp,2),%r11 - adcq %rax,%r12 - movq 8(%rsi,%rbp,1),%rax - movq %r12,0(%rdi,%rbp,2) - adcq %rdx,%r13 - - leaq (%r14,%r10,2),%rbx - movq %r13,8(%rdi,%rbp,2) - sbbq %r15,%r15 - shrq $63,%r10 - leaq (%rcx,%r11,2),%r8 - shrq $63,%r11 - orq %r10,%r8 - movq 32(%rdi,%rbp,2),%r10 - movq %r11,%r14 - mulq %rax - negq %r15 - movq 40(%rdi,%rbp,2),%r11 - adcq %rax,%rbx - movq 16(%rsi,%rbp,1),%rax - movq %rbx,16(%rdi,%rbp,2) - adcq %rdx,%r8 - movq %r8,24(%rdi,%rbp,2) - sbbq %r15,%r15 - addq $32,%rbp - jnz L$sqr4x_shift_n_add - - leaq (%r14,%r10,2),%r12 - shrq $63,%r10 - leaq (%rcx,%r11,2),%r13 - shrq $63,%r11 - orq %r10,%r13 - movq -16(%rdi),%r10 - movq %r11,%r14 - mulq %rax - negq %r15 - movq -8(%rdi),%r11 - adcq %rax,%r12 - movq -8(%rsi),%rax - movq %r12,-32(%rdi) - adcq %rdx,%r13 - - leaq (%r14,%r10,2),%rbx - movq %r13,-24(%rdi) - sbbq %r15,%r15 - shrq $63,%r10 - leaq (%rcx,%r11,2),%r8 - shrq $63,%r11 - orq %r10,%r8 - mulq %rax - negq %r15 - adcq %rax,%rbx - adcq %rdx,%r8 - movq %rbx,-16(%rdi) - movq %r8,-8(%rdi) - movq 40(%rsp),%rsi - movq 48(%rsp),%r8 - xorq %rcx,%rcx movq %r9,0(%rsp) - subq %r9,%rcx - movq 64(%rsp),%r10 - movq %r8,%r14 - leaq 64(%rsp,%r9,2),%rax - leaq 64(%rsp,%r9,1),%rdi - movq %rax,8(%rsp) - leaq (%rsi,%r9,1),%rsi - xorq %rbp,%rbp - - movq 0(%rsi,%rcx,1),%rax - movq 8(%rsi,%rcx,1),%r9 - imulq %r10,%r14 - movq %rax,%rbx - jmp L$sqr4x_mont_outer - -.p2align 4 -L$sqr4x_mont_outer: - xorq %r11,%r11 - mulq %r14 - addq %rax,%r10 - movq %r9,%rax - adcq %rdx,%r11 - movq %r8,%r15 - - xorq %r10,%r10 - addq 8(%rdi,%rcx,1),%r11 - adcq $0,%r10 - mulq %r14 + shrq $5,%r9 + movq %r10,16(%rsp) + subq $1,%r9 + movq %r8,24(%rsp) + movq %rdi,32(%rsp) + movq %rax,40(%rsp) + movq %r9,48(%rsp) + jmp L$mulx4x_body + +.p2align 5 +L$mulx4x_body: + leaq 8(%rdx),%rdi + movq (%rdx),%rdx + leaq 64+32(%rsp),%rbx + movq %rdx,%r9 + + mulxq 0(%rsi),%r8,%rax + mulxq 8(%rsi),%r11,%r14 addq %rax,%r11 - movq %rbx,%rax - adcq %rdx,%r10 - - imulq %r11,%r15 - - movq 16(%rsi,%rcx,1),%rbx - xorq %r13,%r13 - addq %r11,%r12 + movq %rdi,8(%rsp) + mulxq 16(%rsi),%r12,%r13 + adcq %r14,%r12 adcq $0,%r13 - mulq %r15 - addq %rax,%r12 - movq %rbx,%rax - adcq %rdx,%r13 - movq %r12,8(%rdi,%rcx,1) - - xorq %r11,%r11 - addq 16(%rdi,%rcx,1),%r10 - adcq $0,%r11 - mulq %r14 - addq %rax,%r10 - movq %r9,%rax - adcq %rdx,%r11 - movq 24(%rsi,%rcx,1),%r9 - xorq %r12,%r12 - addq %r10,%r13 - adcq $0,%r12 - mulq %r15 - addq %rax,%r13 - movq %r9,%rax - adcq %rdx,%r12 - movq %r13,16(%rdi,%rcx,1) + movq %r8,%rdi + imulq 24(%rsp),%r8 + xorq %rbp,%rbp - xorq %r10,%r10 - addq 24(%rdi,%rcx,1),%r11 + mulxq 24(%rsi),%rax,%r14 + movq %r8,%rdx + leaq 32(%rsi),%rsi + adcxq %rax,%r13 + adcxq %rbp,%r14 + + mulxq 0(%rcx),%rax,%r10 + adcxq %rax,%rdi + adoxq %r11,%r10 + mulxq 8(%rcx),%rax,%r11 + adcxq %rax,%r10 + adoxq %r12,%r11 +.byte 0xc4,0x62,0xfb,0xf6,0xa1,0x10,0x00,0x00,0x00 + movq 48(%rsp),%rdi + movq %r10,-32(%rbx) + adcxq %rax,%r11 + adoxq %r13,%r12 + mulxq 24(%rcx),%rax,%r15 + movq %r9,%rdx + movq %r11,-24(%rbx) + adcxq %rax,%r12 + adoxq %rbp,%r15 leaq 32(%rcx),%rcx - adcq $0,%r10 - mulq %r14 - addq %rax,%r11 - movq %rbx,%rax - adcq %rdx,%r10 - jmp L$sqr4x_mont_inner - -.p2align 4 -L$sqr4x_mont_inner: - movq (%rsi,%rcx,1),%rbx - xorq %r13,%r13 - addq %r11,%r12 - adcq $0,%r13 - mulq %r15 - addq %rax,%r12 - movq %rbx,%rax - adcq %rdx,%r13 - movq %r12,-8(%rdi,%rcx,1) - - xorq %r11,%r11 - addq (%rdi,%rcx,1),%r10 - adcq $0,%r11 - mulq %r14 - addq %rax,%r10 - movq %r9,%rax - adcq %rdx,%r11 - - movq 8(%rsi,%rcx,1),%r9 - xorq %r12,%r12 - addq %r10,%r13 - adcq $0,%r12 - mulq %r15 - addq %rax,%r13 - movq %r9,%rax - adcq %rdx,%r12 - movq %r13,(%rdi,%rcx,1) - - xorq %r10,%r10 - addq 8(%rdi,%rcx,1),%r11 - adcq $0,%r10 - mulq %r14 - addq %rax,%r11 - movq %rbx,%rax - adcq %rdx,%r10 - + movq %r12,-16(%rbx) + + jmp L$mulx4x_1st + +.p2align 5 +L$mulx4x_1st: + adcxq %rbp,%r15 + mulxq 0(%rsi),%r10,%rax + adcxq %r14,%r10 + mulxq 8(%rsi),%r11,%r14 + adcxq %rax,%r11 + mulxq 16(%rsi),%r12,%rax + adcxq %r14,%r12 + mulxq 24(%rsi),%r13,%r14 +.byte 0x67,0x67 + movq %r8,%rdx + adcxq %rax,%r13 + adcxq %rbp,%r14 + leaq 32(%rsi),%rsi + leaq 32(%rbx),%rbx + + adoxq %r15,%r10 + mulxq 0(%rcx),%rax,%r15 + adcxq %rax,%r10 + adoxq %r15,%r11 + mulxq 8(%rcx),%rax,%r15 + adcxq %rax,%r11 + adoxq %r15,%r12 + mulxq 16(%rcx),%rax,%r15 + movq %r10,-40(%rbx) + adcxq %rax,%r12 + movq %r11,-32(%rbx) + adoxq %r15,%r13 + mulxq 24(%rcx),%rax,%r15 + movq %r9,%rdx + movq %r12,-24(%rbx) + adcxq %rax,%r13 + adoxq %rbp,%r15 + leaq 32(%rcx),%rcx + movq %r13,-16(%rbx) - movq 16(%rsi,%rcx,1),%rbx - xorq %r13,%r13 - addq %r11,%r12 - adcq $0,%r13 - mulq %r15 - addq %rax,%r12 - movq %rbx,%rax - adcq %rdx,%r13 - movq %r12,8(%rdi,%rcx,1) - - xorq %r11,%r11 - addq 16(%rdi,%rcx,1),%r10 - adcq $0,%r11 - mulq %r14 - addq %rax,%r10 - movq %r9,%rax - adcq %rdx,%r11 + decq %rdi + jnz L$mulx4x_1st - movq 24(%rsi,%rcx,1),%r9 - xorq %r12,%r12 - addq %r10,%r13 - adcq $0,%r12 - mulq %r15 - addq %rax,%r13 - movq %r9,%rax - adcq %rdx,%r12 - movq %r13,16(%rdi,%rcx,1) - - xorq %r10,%r10 - addq 24(%rdi,%rcx,1),%r11 + movq 0(%rsp),%rax + movq 8(%rsp),%rdi + adcq %rbp,%r15 + addq %r15,%r14 + sbbq %r15,%r15 + movq %r14,-8(%rbx) + jmp L$mulx4x_outer + +.p2align 5 +L$mulx4x_outer: + movq (%rdi),%rdx + leaq 8(%rdi),%rdi + subq %rax,%rsi + movq %r15,(%rbx) + leaq 64+32(%rsp),%rbx + subq %rax,%rcx + + mulxq 0(%rsi),%r8,%r11 + xorl %ebp,%ebp + movq %rdx,%r9 + mulxq 8(%rsi),%r14,%r12 + adoxq -32(%rbx),%r8 + adcxq %r14,%r11 + mulxq 16(%rsi),%r15,%r13 + adoxq -24(%rbx),%r11 + adcxq %r15,%r12 + adoxq %rbp,%r12 + adcxq %rbp,%r13 + + movq %rdi,8(%rsp) +.byte 0x67 + movq %r8,%r15 + imulq 24(%rsp),%r8 + xorl %ebp,%ebp + + mulxq 24(%rsi),%rax,%r14 + movq %r8,%rdx + adoxq -16(%rbx),%r12 + adcxq %rax,%r13 + adoxq -8(%rbx),%r13 + adcxq %rbp,%r14 + leaq 32(%rsi),%rsi + adoxq %rbp,%r14 + + mulxq 0(%rcx),%rax,%r10 + adcxq %rax,%r15 + adoxq %r11,%r10 + mulxq 8(%rcx),%rax,%r11 + adcxq %rax,%r10 + adoxq %r12,%r11 + mulxq 16(%rcx),%rax,%r12 + movq %r10,-32(%rbx) + adcxq %rax,%r11 + adoxq %r13,%r12 + mulxq 24(%rcx),%rax,%r15 + movq %r9,%rdx + movq %r11,-24(%rbx) leaq 32(%rcx),%rcx - adcq $0,%r10 - mulq %r14 - addq %rax,%r11 - movq %rbx,%rax - adcq %rdx,%r10 - cmpq $0,%rcx - jne L$sqr4x_mont_inner + adcxq %rax,%r12 + adoxq %rbp,%r15 + movq 48(%rsp),%rdi + movq %r12,-16(%rbx) + + jmp L$mulx4x_inner + +.p2align 5 +L$mulx4x_inner: + mulxq 0(%rsi),%r10,%rax + adcxq %rbp,%r15 + adoxq %r14,%r10 + mulxq 8(%rsi),%r11,%r14 + adcxq 0(%rbx),%r10 + adoxq %rax,%r11 + mulxq 16(%rsi),%r12,%rax + adcxq 8(%rbx),%r11 + adoxq %r14,%r12 + mulxq 24(%rsi),%r13,%r14 + movq %r8,%rdx + adcxq 16(%rbx),%r12 + adoxq %rax,%r13 + adcxq 24(%rbx),%r13 + adoxq %rbp,%r14 + leaq 32(%rsi),%rsi + leaq 32(%rbx),%rbx + adcxq %rbp,%r14 + + adoxq %r15,%r10 + mulxq 0(%rcx),%rax,%r15 + adcxq %rax,%r10 + adoxq %r15,%r11 + mulxq 8(%rcx),%rax,%r15 + adcxq %rax,%r11 + adoxq %r15,%r12 + mulxq 16(%rcx),%rax,%r15 + movq %r10,-40(%rbx) + adcxq %rax,%r12 + adoxq %r15,%r13 + mulxq 24(%rcx),%rax,%r15 + movq %r9,%rdx + movq %r11,-32(%rbx) + movq %r12,-24(%rbx) + adcxq %rax,%r13 + adoxq %rbp,%r15 + leaq 32(%rcx),%rcx + movq %r13,-16(%rbx) - subq 0(%rsp),%rcx - movq %r8,%r14 + decq %rdi + jnz L$mulx4x_inner - xorq %r13,%r13 - addq %r11,%r12 - adcq $0,%r13 - mulq %r15 - addq %rax,%r12 - movq %r9,%rax - adcq %rdx,%r13 - movq %r12,-8(%rdi) - - xorq %r11,%r11 - addq (%rdi),%r10 - adcq $0,%r11 - movq 0(%rsi,%rcx,1),%rbx - addq %rbp,%r10 - adcq $0,%r11 + movq 0(%rsp),%rax + movq 8(%rsp),%rdi + adcq %rbp,%r15 + subq 0(%rbx),%rbp + adcq %r15,%r14 + movq -8(%rcx),%r8 + sbbq %r15,%r15 + movq %r14,-8(%rbx) - imulq 16(%rdi,%rcx,1),%r14 - xorq %r12,%r12 - movq 8(%rsi,%rcx,1),%r9 - addq %r10,%r13 - movq 16(%rdi,%rcx,1),%r10 - adcq $0,%r12 - mulq %r15 - addq %rax,%r13 - movq %rbx,%rax - adcq %rdx,%r12 - movq %r13,(%rdi) + cmpq 16(%rsp),%rdi + jne L$mulx4x_outer - xorq %rbp,%rbp - addq 8(%rdi),%r12 - adcq %rbp,%rbp - addq %r11,%r12 - leaq 16(%rdi),%rdi - adcq $0,%rbp - movq %r12,-8(%rdi) - cmpq 8(%rsp),%rdi - jb L$sqr4x_mont_outer - - movq 0(%rsp),%r9 - movq %rbp,(%rdi) - movq 64(%rsp,%r9,1),%rax - leaq 64(%rsp,%r9,1),%rbx - movq 40(%rsp),%rsi - shrq $5,%r9 - movq 8(%rbx),%rdx - xorq %rbp,%rbp + subq %r14,%r8 + sbbq %r8,%r8 + orq %r8,%r15 + negq %rax + xorq %rdx,%rdx movq 32(%rsp),%rdi - subq 0(%rsi),%rax - movq 16(%rbx),%r10 - movq 24(%rbx),%r11 - sbbq 8(%rsi),%rdx - leaq -1(%r9),%rcx - jmp L$sqr4x_sub -.p2align 4 -L$sqr4x_sub: - movq %rax,0(%rdi,%rbp,8) - movq %rdx,8(%rdi,%rbp,8) - sbbq 16(%rsi,%rbp,8),%r10 - movq 32(%rbx,%rbp,8),%rax - movq 40(%rbx,%rbp,8),%rdx - sbbq 24(%rsi,%rbp,8),%r11 - movq %r10,16(%rdi,%rbp,8) - movq %r11,24(%rdi,%rbp,8) - sbbq 32(%rsi,%rbp,8),%rax - movq 48(%rbx,%rbp,8),%r10 - movq 56(%rbx,%rbp,8),%r11 - sbbq 40(%rsi,%rbp,8),%rdx - leaq 4(%rbp),%rbp - decq %rcx - jnz L$sqr4x_sub - - movq %rax,0(%rdi,%rbp,8) - movq 32(%rbx,%rbp,8),%rax - sbbq 16(%rsi,%rbp,8),%r10 - movq %rdx,8(%rdi,%rbp,8) - sbbq 24(%rsi,%rbp,8),%r11 - movq %r10,16(%rdi,%rbp,8) - - sbbq $0,%rax - movq %r11,24(%rdi,%rbp,8) - xorq %rbp,%rbp - andq %rax,%rbx - notq %rax - movq %rdi,%rsi - andq %rax,%rsi - leaq -1(%r9),%rcx - orq %rsi,%rbx + leaq 64(%rsp),%rbx pxor %xmm0,%xmm0 - leaq 64(%rsp,%r9,8),%rsi - movdqu (%rbx),%xmm1 - leaq (%rsi,%r9,8),%rsi - movdqa %xmm0,64(%rsp) - movdqa %xmm0,(%rsi) - movdqu %xmm1,(%rdi) - jmp L$sqr4x_copy -.p2align 4 -L$sqr4x_copy: - movdqu 16(%rbx,%rbp,1),%xmm2 - movdqu 32(%rbx,%rbp,1),%xmm1 - movdqa %xmm0,80(%rsp,%rbp,1) - movdqa %xmm0,96(%rsp,%rbp,1) - movdqa %xmm0,16(%rsi,%rbp,1) - movdqa %xmm0,32(%rsi,%rbp,1) - movdqu %xmm2,16(%rdi,%rbp,1) - movdqu %xmm1,32(%rdi,%rbp,1) - leaq 32(%rbp),%rbp - decq %rcx - jnz L$sqr4x_copy - - movdqu 16(%rbx,%rbp,1),%xmm2 - movdqa %xmm0,80(%rsp,%rbp,1) - movdqa %xmm0,16(%rsi,%rbp,1) - movdqu %xmm2,16(%rdi,%rbp,1) - movq 56(%rsp),%rsi + movq 0(%rcx,%rax,1),%r8 + movq 8(%rcx,%rax,1),%r9 + negq %r8 + jmp L$mulx4x_sub_entry + +.p2align 5 +L$mulx4x_sub: + movq 0(%rcx,%rax,1),%r8 + movq 8(%rcx,%rax,1),%r9 + notq %r8 +L$mulx4x_sub_entry: + movq 16(%rcx,%rax,1),%r10 + notq %r9 + andq %r15,%r8 + movq 24(%rcx,%rax,1),%r11 + notq %r10 + andq %r15,%r9 + notq %r11 + andq %r15,%r10 + andq %r15,%r11 + + negq %rdx + adcq 0(%rbx),%r8 + adcq 8(%rbx),%r9 + movdqa %xmm0,(%rbx) + adcq 16(%rbx),%r10 + adcq 24(%rbx),%r11 + movdqa %xmm0,16(%rbx) + leaq 32(%rbx),%rbx + sbbq %rdx,%rdx + + movq %r8,0(%rdi) + movq %r9,8(%rdi) + movq %r10,16(%rdi) + movq %r11,24(%rdi) + leaq 32(%rdi),%rdi + + addq $32,%rax + jnz L$mulx4x_sub + + movq 40(%rsp),%rsi movq $1,%rax - movq 0(%rsi),%r15 - movq 8(%rsi),%r14 - movq 16(%rsi),%r13 - movq 24(%rsi),%r12 - movq 32(%rsi),%rbp - movq 40(%rsi),%rbx - leaq 48(%rsi),%rsp -L$sqr4x_epilogue: + movq -48(%rsi),%r15 + movq -40(%rsi),%r14 + movq -32(%rsi),%r13 + movq -24(%rsi),%r12 + movq -16(%rsi),%rbp + movq -8(%rsi),%rbx + leaq (%rsi),%rsp +L$mulx4x_epilogue: .byte 0xf3,0xc3 .byte 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105,112,108,105,99,97,116,105,111,110,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 diff --git a/deps/openssl/asm/x64-macosx-gas/bn/x86_64-mont5.s b/deps/openssl/asm/x64-macosx-gas/bn/x86_64-mont5.s index f3bfb046d3b32b..65cf9993d80396 100644 --- a/deps/openssl/asm/x64-macosx-gas/bn/x86_64-mont5.s +++ b/deps/openssl/asm/x64-macosx-gas/bn/x86_64-mont5.s @@ -1,19 +1,20 @@ .text + .globl _bn_mul_mont_gather5 .p2align 6 _bn_mul_mont_gather5: - testl $3,%r9d + testl $7,%r9d jnz L$mul_enter - cmpl $8,%r9d - jb L$mul_enter + movl _OPENSSL_ia32cap_P+8(%rip),%r11d jmp L$mul4x_enter .p2align 4 L$mul_enter: movl %r9d,%r9d + movq %rsp,%rax movl 8(%rsp),%r10d pushq %rbx pushq %rbp @@ -21,7 +22,6 @@ L$mul_enter: pushq %r13 pushq %r14 pushq %r15 - movq %rsp,%rax leaq 2(%r9),%r11 negq %r11 leaq (%rsp,%r11,8),%rsp @@ -222,7 +222,7 @@ L$inner_enter: leaq 1(%r14),%r14 cmpq %r9,%r14 - jl L$outer + jb L$outer xorq %r14,%r14 movq (%rsp),%rax @@ -256,477 +256,2649 @@ L$copy: movq 8(%rsp,%r9,8),%rsi movq $1,%rax - movq (%rsi),%r15 - movq 8(%rsi),%r14 - movq 16(%rsi),%r13 - movq 24(%rsi),%r12 - movq 32(%rsi),%rbp - movq 40(%rsi),%rbx - leaq 48(%rsi),%rsp + movq -48(%rsi),%r15 + movq -40(%rsi),%r14 + movq -32(%rsi),%r13 + movq -24(%rsi),%r12 + movq -16(%rsi),%rbp + movq -8(%rsi),%rbx + leaq (%rsi),%rsp L$mul_epilogue: .byte 0xf3,0xc3 -.p2align 4 +.p2align 5 bn_mul4x_mont_gather5: L$mul4x_enter: - movl %r9d,%r9d - movl 8(%rsp),%r10d + andl $524544,%r11d + cmpl $524544,%r11d + je L$mulx4x_enter +.byte 0x67 + movq %rsp,%rax pushq %rbx pushq %rbp pushq %r12 pushq %r13 pushq %r14 pushq %r15 - movq %rsp,%rax - leaq 4(%r9),%r11 - negq %r11 - leaq (%rsp,%r11,8),%rsp - andq $-1024,%rsp +.byte 0x67 + movl %r9d,%r10d + shll $3,%r9d + shll $3+2,%r10d + negq %r9 - movq %rax,8(%rsp,%r9,8) + + + + + + + + leaq -64(%rsp,%r9,2),%r11 + subq %rsi,%r11 + andq $4095,%r11 + cmpq %r11,%r10 + jb L$mul4xsp_alt + subq %r11,%rsp + leaq -64(%rsp,%r9,2),%rsp + jmp L$mul4xsp_done + +.p2align 5 +L$mul4xsp_alt: + leaq 4096-64(,%r9,2),%r10 + leaq -64(%rsp,%r9,2),%rsp + subq %r10,%r11 + movq $0,%r10 + cmovcq %r10,%r11 + subq %r11,%rsp +L$mul4xsp_done: + andq $-64,%rsp + negq %r9 + + movq %rax,40(%rsp) L$mul4x_body: - movq %rdi,16(%rsp,%r9,8) - movq %rdx,%r12 + + call mul4x_internal + + movq 40(%rsp),%rsi + movq $1,%rax + movq -48(%rsi),%r15 + movq -40(%rsi),%r14 + movq -32(%rsi),%r13 + movq -24(%rsi),%r12 + movq -16(%rsi),%rbp + movq -8(%rsi),%rbx + leaq (%rsi),%rsp +L$mul4x_epilogue: + .byte 0xf3,0xc3 + + + +.p2align 5 +mul4x_internal: + shlq $5,%r9 + movl 8(%rax),%r10d + leaq 256(%rdx,%r9,1),%r13 + shrq $5,%r9 movq %r10,%r11 shrq $3,%r10 andq $7,%r11 notq %r10 leaq L$magic_masks(%rip),%rax andq $3,%r10 - leaq 96(%r12,%r11,8),%r12 + leaq 96(%rdx,%r11,8),%r12 movq 0(%rax,%r10,8),%xmm4 movq 8(%rax,%r10,8),%xmm5 + addq $7,%r11 movq 16(%rax,%r10,8),%xmm6 movq 24(%rax,%r10,8),%xmm7 + andq $7,%r11 movq -96(%r12),%xmm0 + leaq 256(%r12),%r14 movq -32(%r12),%xmm1 pand %xmm4,%xmm0 movq 32(%r12),%xmm2 pand %xmm5,%xmm1 movq 96(%r12),%xmm3 pand %xmm6,%xmm2 +.byte 0x67 por %xmm1,%xmm0 + movq -96(%r14),%xmm1 +.byte 0x67 pand %xmm7,%xmm3 +.byte 0x67 por %xmm2,%xmm0 - leaq 256(%r12),%r12 + movq -32(%r14),%xmm2 +.byte 0x67 + pand %xmm4,%xmm1 +.byte 0x67 por %xmm3,%xmm0 + movq 32(%r14),%xmm3 .byte 102,72,15,126,195 + movq 96(%r14),%xmm0 + movq %r13,16+8(%rsp) + movq %rdi,56+8(%rsp) + movq (%r8),%r8 movq (%rsi),%rax - - xorq %r14,%r14 - xorq %r15,%r15 - - movq -96(%r12),%xmm0 - movq -32(%r12),%xmm1 - pand %xmm4,%xmm0 - movq 32(%r12),%xmm2 - pand %xmm5,%xmm1 + leaq (%rsi,%r9,1),%rsi + negq %r9 movq %r8,%rbp mulq %rbx movq %rax,%r10 movq (%rcx),%rax - movq 96(%r12),%xmm3 - pand %xmm6,%xmm2 - por %xmm1,%xmm0 - pand %xmm7,%xmm3 + pand %xmm5,%xmm2 + pand %xmm6,%xmm3 + por %xmm2,%xmm1 imulq %r10,%rbp + + + + + + + + leaq 64+8(%rsp,%r11,8),%r14 movq %rdx,%r11 - por %xmm2,%xmm0 - leaq 256(%r12),%r12 - por %xmm3,%xmm0 + pand %xmm7,%xmm0 + por %xmm3,%xmm1 + leaq 512(%r12),%r12 + por %xmm1,%xmm0 mulq %rbp addq %rax,%r10 - movq 8(%rsi),%rax + movq 8(%rsi,%r9,1),%rax adcq $0,%rdx movq %rdx,%rdi mulq %rbx addq %rax,%r11 - movq 8(%rcx),%rax + movq 16(%rcx),%rax adcq $0,%rdx movq %rdx,%r10 mulq %rbp addq %rax,%rdi - movq 16(%rsi),%rax + movq 16(%rsi,%r9,1),%rax adcq $0,%rdx addq %r11,%rdi - leaq 4(%r15),%r15 + leaq 32(%r9),%r15 + leaq 64(%rcx),%rcx adcq $0,%rdx - movq %rdi,(%rsp) + movq %rdi,(%r14) movq %rdx,%r13 jmp L$1st4x -.p2align 4 + +.p2align 5 L$1st4x: mulq %rbx addq %rax,%r10 - movq -16(%rcx,%r15,8),%rax + movq -32(%rcx),%rax + leaq 32(%r14),%r14 adcq $0,%rdx movq %rdx,%r11 mulq %rbp addq %rax,%r13 - movq -8(%rsi,%r15,8),%rax + movq -8(%rsi,%r15,1),%rax adcq $0,%rdx addq %r10,%r13 adcq $0,%rdx - movq %r13,-24(%rsp,%r15,8) + movq %r13,-24(%r14) movq %rdx,%rdi mulq %rbx addq %rax,%r11 - movq -8(%rcx,%r15,8),%rax + movq -16(%rcx),%rax adcq $0,%rdx movq %rdx,%r10 mulq %rbp addq %rax,%rdi - movq (%rsi,%r15,8),%rax + movq (%rsi,%r15,1),%rax adcq $0,%rdx addq %r11,%rdi adcq $0,%rdx - movq %rdi,-16(%rsp,%r15,8) + movq %rdi,-16(%r14) movq %rdx,%r13 mulq %rbx addq %rax,%r10 - movq (%rcx,%r15,8),%rax + movq 0(%rcx),%rax adcq $0,%rdx movq %rdx,%r11 mulq %rbp addq %rax,%r13 - movq 8(%rsi,%r15,8),%rax + movq 8(%rsi,%r15,1),%rax adcq $0,%rdx addq %r10,%r13 adcq $0,%rdx - movq %r13,-8(%rsp,%r15,8) + movq %r13,-8(%r14) movq %rdx,%rdi mulq %rbx addq %rax,%r11 - movq 8(%rcx,%r15,8),%rax + movq 16(%rcx),%rax adcq $0,%rdx - leaq 4(%r15),%r15 movq %rdx,%r10 mulq %rbp addq %rax,%rdi - movq -16(%rsi,%r15,8),%rax + movq 16(%rsi,%r15,1),%rax adcq $0,%rdx addq %r11,%rdi + leaq 64(%rcx),%rcx adcq $0,%rdx - movq %rdi,-32(%rsp,%r15,8) + movq %rdi,(%r14) movq %rdx,%r13 - cmpq %r9,%r15 - jl L$1st4x + + addq $32,%r15 + jnz L$1st4x mulq %rbx addq %rax,%r10 - movq -16(%rcx,%r15,8),%rax + movq -32(%rcx),%rax + leaq 32(%r14),%r14 adcq $0,%rdx movq %rdx,%r11 mulq %rbp addq %rax,%r13 - movq -8(%rsi,%r15,8),%rax + movq -8(%rsi),%rax adcq $0,%rdx addq %r10,%r13 adcq $0,%rdx - movq %r13,-24(%rsp,%r15,8) + movq %r13,-24(%r14) movq %rdx,%rdi mulq %rbx addq %rax,%r11 - movq -8(%rcx,%r15,8),%rax + movq -16(%rcx),%rax adcq $0,%rdx movq %rdx,%r10 mulq %rbp addq %rax,%rdi - movq (%rsi),%rax + movq (%rsi,%r9,1),%rax adcq $0,%rdx addq %r11,%rdi adcq $0,%rdx - movq %rdi,-16(%rsp,%r15,8) + movq %rdi,-16(%r14) movq %rdx,%r13 .byte 102,72,15,126,195 + leaq (%rcx,%r9,2),%rcx xorq %rdi,%rdi addq %r10,%r13 adcq $0,%rdi - movq %r13,-8(%rsp,%r15,8) - movq %rdi,(%rsp,%r15,8) + movq %r13,-8(%r14) - leaq 1(%r14),%r14 -.p2align 2 -L$outer4x: - xorq %r15,%r15 - movq -96(%r12),%xmm0 - movq -32(%r12),%xmm1 - pand %xmm4,%xmm0 - movq 32(%r12),%xmm2 - pand %xmm5,%xmm1 + jmp L$outer4x - movq (%rsp),%r10 +.p2align 5 +L$outer4x: + movq (%r14,%r9,1),%r10 movq %r8,%rbp mulq %rbx addq %rax,%r10 movq (%rcx),%rax adcq $0,%rdx + movq -96(%r12),%xmm0 + movq -32(%r12),%xmm1 + pand %xmm4,%xmm0 + movq 32(%r12),%xmm2 + pand %xmm5,%xmm1 movq 96(%r12),%xmm3 - pand %xmm6,%xmm2 - por %xmm1,%xmm0 - pand %xmm7,%xmm3 imulq %r10,%rbp +.byte 0x67 movq %rdx,%r11 + movq %rdi,(%r14) + pand %xmm6,%xmm2 + por %xmm1,%xmm0 + pand %xmm7,%xmm3 por %xmm2,%xmm0 + leaq (%r14,%r9,1),%r14 leaq 256(%r12),%r12 por %xmm3,%xmm0 mulq %rbp addq %rax,%r10 - movq 8(%rsi),%rax + movq 8(%rsi,%r9,1),%rax adcq $0,%rdx movq %rdx,%rdi mulq %rbx addq %rax,%r11 - movq 8(%rcx),%rax + movq 16(%rcx),%rax adcq $0,%rdx - addq 8(%rsp),%r11 + addq 8(%r14),%r11 adcq $0,%rdx movq %rdx,%r10 mulq %rbp addq %rax,%rdi - movq 16(%rsi),%rax + movq 16(%rsi,%r9,1),%rax adcq $0,%rdx addq %r11,%rdi - leaq 4(%r15),%r15 + leaq 32(%r9),%r15 + leaq 64(%rcx),%rcx adcq $0,%rdx movq %rdx,%r13 jmp L$inner4x -.p2align 4 + +.p2align 5 L$inner4x: mulq %rbx addq %rax,%r10 - movq -16(%rcx,%r15,8),%rax + movq -32(%rcx),%rax adcq $0,%rdx - addq -16(%rsp,%r15,8),%r10 + addq 16(%r14),%r10 + leaq 32(%r14),%r14 adcq $0,%rdx movq %rdx,%r11 mulq %rbp addq %rax,%r13 - movq -8(%rsi,%r15,8),%rax + movq -8(%rsi,%r15,1),%rax adcq $0,%rdx addq %r10,%r13 adcq $0,%rdx - movq %rdi,-32(%rsp,%r15,8) + movq %rdi,-32(%r14) movq %rdx,%rdi mulq %rbx addq %rax,%r11 - movq -8(%rcx,%r15,8),%rax + movq -16(%rcx),%rax adcq $0,%rdx - addq -8(%rsp,%r15,8),%r11 + addq -8(%r14),%r11 adcq $0,%rdx movq %rdx,%r10 mulq %rbp addq %rax,%rdi - movq (%rsi,%r15,8),%rax + movq (%rsi,%r15,1),%rax adcq $0,%rdx addq %r11,%rdi adcq $0,%rdx - movq %r13,-24(%rsp,%r15,8) + movq %r13,-24(%r14) movq %rdx,%r13 mulq %rbx addq %rax,%r10 - movq (%rcx,%r15,8),%rax + movq 0(%rcx),%rax adcq $0,%rdx - addq (%rsp,%r15,8),%r10 + addq (%r14),%r10 adcq $0,%rdx movq %rdx,%r11 mulq %rbp addq %rax,%r13 - movq 8(%rsi,%r15,8),%rax + movq 8(%rsi,%r15,1),%rax adcq $0,%rdx addq %r10,%r13 adcq $0,%rdx - movq %rdi,-16(%rsp,%r15,8) + movq %rdi,-16(%r14) movq %rdx,%rdi mulq %rbx addq %rax,%r11 - movq 8(%rcx,%r15,8),%rax + movq 16(%rcx),%rax adcq $0,%rdx - addq 8(%rsp,%r15,8),%r11 + addq 8(%r14),%r11 adcq $0,%rdx - leaq 4(%r15),%r15 movq %rdx,%r10 mulq %rbp addq %rax,%rdi - movq -16(%rsi,%r15,8),%rax + movq 16(%rsi,%r15,1),%rax adcq $0,%rdx addq %r11,%rdi + leaq 64(%rcx),%rcx adcq $0,%rdx - movq %r13,-40(%rsp,%r15,8) + movq %r13,-8(%r14) movq %rdx,%r13 - cmpq %r9,%r15 - jl L$inner4x + + addq $32,%r15 + jnz L$inner4x mulq %rbx addq %rax,%r10 - movq -16(%rcx,%r15,8),%rax + movq -32(%rcx),%rax adcq $0,%rdx - addq -16(%rsp,%r15,8),%r10 + addq 16(%r14),%r10 + leaq 32(%r14),%r14 adcq $0,%rdx movq %rdx,%r11 mulq %rbp addq %rax,%r13 - movq -8(%rsi,%r15,8),%rax + movq -8(%rsi),%rax adcq $0,%rdx addq %r10,%r13 adcq $0,%rdx - movq %rdi,-32(%rsp,%r15,8) + movq %rdi,-32(%r14) movq %rdx,%rdi mulq %rbx addq %rax,%r11 - movq -8(%rcx,%r15,8),%rax + movq %rbp,%rax + movq -16(%rcx),%rbp adcq $0,%rdx - addq -8(%rsp,%r15,8),%r11 + addq -8(%r14),%r11 adcq $0,%rdx - leaq 1(%r14),%r14 movq %rdx,%r10 mulq %rbp addq %rax,%rdi - movq (%rsi),%rax + movq (%rsi,%r9,1),%rax adcq $0,%rdx addq %r11,%rdi adcq $0,%rdx - movq %r13,-24(%rsp,%r15,8) + movq %r13,-24(%r14) movq %rdx,%r13 .byte 102,72,15,126,195 - movq %rdi,-16(%rsp,%r15,8) + movq %rdi,-16(%r14) + leaq (%rcx,%r9,2),%rcx xorq %rdi,%rdi addq %r10,%r13 adcq $0,%rdi - addq (%rsp,%r9,8),%r13 + addq (%r14),%r13 adcq $0,%rdi - movq %r13,-8(%rsp,%r15,8) - movq %rdi,(%rsp,%r15,8) + movq %r13,-8(%r14) + + cmpq 16+8(%rsp),%r12 + jb L$outer4x + subq %r13,%rbp + adcq %r15,%r15 + orq %r15,%rdi + xorq $1,%rdi + leaq (%r14,%r9,1),%rbx + leaq (%rcx,%rdi,8),%rbp + movq %r9,%rcx + sarq $3+2,%rcx + movq 56+8(%rsp),%rdi + jmp L$sqr4x_sub + +.globl _bn_power5 + +.p2align 5 +_bn_power5: + movl _OPENSSL_ia32cap_P+8(%rip),%r11d + andl $524544,%r11d + cmpl $524544,%r11d + je L$powerx5_enter + movq %rsp,%rax + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + movl %r9d,%r10d + shll $3,%r9d + shll $3+2,%r10d + negq %r9 + movq (%r8),%r8 - cmpq %r9,%r14 - jl L$outer4x - movq 16(%rsp,%r9,8),%rdi - movq 0(%rsp),%rax - pxor %xmm0,%xmm0 - movq 8(%rsp),%rdx - shrq $2,%r9 - leaq (%rsp),%rsi - xorq %r14,%r14 - subq 0(%rcx),%rax - movq 16(%rsi),%rbx - movq 24(%rsi),%rbp - sbbq 8(%rcx),%rdx - leaq -1(%r9),%r15 - jmp L$sub4x -.p2align 4 -L$sub4x: - movq %rax,0(%rdi,%r14,8) - movq %rdx,8(%rdi,%r14,8) - sbbq 16(%rcx,%r14,8),%rbx - movq 32(%rsi,%r14,8),%rax - movq 40(%rsi,%r14,8),%rdx - sbbq 24(%rcx,%r14,8),%rbp - movq %rbx,16(%rdi,%r14,8) - movq %rbp,24(%rdi,%r14,8) - sbbq 32(%rcx,%r14,8),%rax - movq 48(%rsi,%r14,8),%rbx - movq 56(%rsi,%r14,8),%rbp - sbbq 40(%rcx,%r14,8),%rdx - leaq 4(%r14),%r14 - decq %r15 - jnz L$sub4x - movq %rax,0(%rdi,%r14,8) - movq 32(%rsi,%r14,8),%rax - sbbq 16(%rcx,%r14,8),%rbx - movq %rdx,8(%rdi,%r14,8) - sbbq 24(%rcx,%r14,8),%rbp - movq %rbx,16(%rdi,%r14,8) - sbbq $0,%rax - movq %rbp,24(%rdi,%r14,8) - xorq %r14,%r14 - andq %rax,%rsi - notq %rax - movq %rdi,%rcx - andq %rax,%rcx - leaq -1(%r9),%r15 - orq %rcx,%rsi - movdqu (%rsi),%xmm1 - movdqa %xmm0,(%rsp) - movdqu %xmm1,(%rdi) - jmp L$copy4x -.p2align 4 -L$copy4x: - movdqu 16(%rsi,%r14,1),%xmm2 - movdqu 32(%rsi,%r14,1),%xmm1 - movdqa %xmm0,16(%rsp,%r14,1) - movdqu %xmm2,16(%rdi,%r14,1) - movdqa %xmm0,32(%rsp,%r14,1) - movdqu %xmm1,32(%rdi,%r14,1) - leaq 32(%r14),%r14 - decq %r15 - jnz L$copy4x - shlq $2,%r9 - movdqu 16(%rsi,%r14,1),%xmm2 - movdqa %xmm0,16(%rsp,%r14,1) - movdqu %xmm2,16(%rdi,%r14,1) - movq 8(%rsp,%r9,8),%rsi + + leaq -64(%rsp,%r9,2),%r11 + subq %rsi,%r11 + andq $4095,%r11 + cmpq %r11,%r10 + jb L$pwr_sp_alt + subq %r11,%rsp + leaq -64(%rsp,%r9,2),%rsp + jmp L$pwr_sp_done + +.p2align 5 +L$pwr_sp_alt: + leaq 4096-64(,%r9,2),%r10 + leaq -64(%rsp,%r9,2),%rsp + subq %r10,%r11 + movq $0,%r10 + cmovcq %r10,%r11 + subq %r11,%rsp +L$pwr_sp_done: + andq $-64,%rsp + movq %r9,%r10 + negq %r9 + + + + + + + + + + + movq %r8,32(%rsp) + movq %rax,40(%rsp) +L$power5_body: +.byte 102,72,15,110,207 +.byte 102,72,15,110,209 +.byte 102,73,15,110,218 +.byte 102,72,15,110,226 + + call __bn_sqr8x_internal + call __bn_sqr8x_internal + call __bn_sqr8x_internal + call __bn_sqr8x_internal + call __bn_sqr8x_internal + +.byte 102,72,15,126,209 +.byte 102,72,15,126,226 + movq %rsi,%rdi + movq 40(%rsp),%rax + leaq 32(%rsp),%r8 + + call mul4x_internal + + movq 40(%rsp),%rsi movq $1,%rax - movq (%rsi),%r15 - movq 8(%rsi),%r14 - movq 16(%rsi),%r13 - movq 24(%rsi),%r12 - movq 32(%rsi),%rbp - movq 40(%rsi),%rbx - leaq 48(%rsi),%rsp -L$mul4x_epilogue: + movq -48(%rsi),%r15 + movq -40(%rsi),%r14 + movq -32(%rsi),%r13 + movq -24(%rsi),%r12 + movq -16(%rsi),%rbp + movq -8(%rsi),%rbx + leaq (%rsi),%rsp +L$power5_epilogue: .byte 0xf3,0xc3 + +.globl _bn_sqr8x_internal +.private_extern _bn_sqr8x_internal + +.p2align 5 +_bn_sqr8x_internal: +__bn_sqr8x_internal: + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + leaq 32(%r10),%rbp + leaq (%rsi,%r9,1),%rsi + + movq %r9,%rcx + + + movq -32(%rsi,%rbp,1),%r14 + leaq 48+8(%rsp,%r9,2),%rdi + movq -24(%rsi,%rbp,1),%rax + leaq -32(%rdi,%rbp,1),%rdi + movq -16(%rsi,%rbp,1),%rbx + movq %rax,%r15 + + mulq %r14 + movq %rax,%r10 + movq %rbx,%rax + movq %rdx,%r11 + movq %r10,-24(%rdi,%rbp,1) + + mulq %r14 + addq %rax,%r11 + movq %rbx,%rax + adcq $0,%rdx + movq %r11,-16(%rdi,%rbp,1) + movq %rdx,%r10 + + + movq -8(%rsi,%rbp,1),%rbx + mulq %r15 + movq %rax,%r12 + movq %rbx,%rax + movq %rdx,%r13 + + leaq (%rbp),%rcx + mulq %r14 + addq %rax,%r10 + movq %rbx,%rax + movq %rdx,%r11 + adcq $0,%r11 + addq %r12,%r10 + adcq $0,%r11 + movq %r10,-8(%rdi,%rcx,1) + jmp L$sqr4x_1st + +.p2align 5 +L$sqr4x_1st: + movq (%rsi,%rcx,1),%rbx + mulq %r15 + addq %rax,%r13 + movq %rbx,%rax + movq %rdx,%r12 + adcq $0,%r12 + + mulq %r14 + addq %rax,%r11 + movq %rbx,%rax + movq 8(%rsi,%rcx,1),%rbx + movq %rdx,%r10 + adcq $0,%r10 + addq %r13,%r11 + adcq $0,%r10 + + + mulq %r15 + addq %rax,%r12 + movq %rbx,%rax + movq %r11,(%rdi,%rcx,1) + movq %rdx,%r13 + adcq $0,%r13 + + mulq %r14 + addq %rax,%r10 + movq %rbx,%rax + movq 16(%rsi,%rcx,1),%rbx + movq %rdx,%r11 + adcq $0,%r11 + addq %r12,%r10 + adcq $0,%r11 + + mulq %r15 + addq %rax,%r13 + movq %rbx,%rax + movq %r10,8(%rdi,%rcx,1) + movq %rdx,%r12 + adcq $0,%r12 + + mulq %r14 + addq %rax,%r11 + movq %rbx,%rax + movq 24(%rsi,%rcx,1),%rbx + movq %rdx,%r10 + adcq $0,%r10 + addq %r13,%r11 + adcq $0,%r10 + + + mulq %r15 + addq %rax,%r12 + movq %rbx,%rax + movq %r11,16(%rdi,%rcx,1) + movq %rdx,%r13 + adcq $0,%r13 + leaq 32(%rcx),%rcx + + mulq %r14 + addq %rax,%r10 + movq %rbx,%rax + movq %rdx,%r11 + adcq $0,%r11 + addq %r12,%r10 + adcq $0,%r11 + movq %r10,-8(%rdi,%rcx,1) + + cmpq $0,%rcx + jne L$sqr4x_1st + + mulq %r15 + addq %rax,%r13 + leaq 16(%rbp),%rbp + adcq $0,%rdx + addq %r11,%r13 + adcq $0,%rdx + + movq %r13,(%rdi) + movq %rdx,%r12 + movq %rdx,8(%rdi) + jmp L$sqr4x_outer + +.p2align 5 +L$sqr4x_outer: + movq -32(%rsi,%rbp,1),%r14 + leaq 48+8(%rsp,%r9,2),%rdi + movq -24(%rsi,%rbp,1),%rax + leaq -32(%rdi,%rbp,1),%rdi + movq -16(%rsi,%rbp,1),%rbx + movq %rax,%r15 + + mulq %r14 + movq -24(%rdi,%rbp,1),%r10 + addq %rax,%r10 + movq %rbx,%rax + adcq $0,%rdx + movq %r10,-24(%rdi,%rbp,1) + movq %rdx,%r11 + + mulq %r14 + addq %rax,%r11 + movq %rbx,%rax + adcq $0,%rdx + addq -16(%rdi,%rbp,1),%r11 + movq %rdx,%r10 + adcq $0,%r10 + movq %r11,-16(%rdi,%rbp,1) + + xorq %r12,%r12 + + movq -8(%rsi,%rbp,1),%rbx + mulq %r15 + addq %rax,%r12 + movq %rbx,%rax + adcq $0,%rdx + addq -8(%rdi,%rbp,1),%r12 + movq %rdx,%r13 + adcq $0,%r13 + + mulq %r14 + addq %rax,%r10 + movq %rbx,%rax + adcq $0,%rdx + addq %r12,%r10 + movq %rdx,%r11 + adcq $0,%r11 + movq %r10,-8(%rdi,%rbp,1) + + leaq (%rbp),%rcx + jmp L$sqr4x_inner + +.p2align 5 +L$sqr4x_inner: + movq (%rsi,%rcx,1),%rbx + mulq %r15 + addq %rax,%r13 + movq %rbx,%rax + movq %rdx,%r12 + adcq $0,%r12 + addq (%rdi,%rcx,1),%r13 + adcq $0,%r12 + +.byte 0x67 + mulq %r14 + addq %rax,%r11 + movq %rbx,%rax + movq 8(%rsi,%rcx,1),%rbx + movq %rdx,%r10 + adcq $0,%r10 + addq %r13,%r11 + adcq $0,%r10 + + mulq %r15 + addq %rax,%r12 + movq %r11,(%rdi,%rcx,1) + movq %rbx,%rax + movq %rdx,%r13 + adcq $0,%r13 + addq 8(%rdi,%rcx,1),%r12 + leaq 16(%rcx),%rcx + adcq $0,%r13 + + mulq %r14 + addq %rax,%r10 + movq %rbx,%rax + adcq $0,%rdx + addq %r12,%r10 + movq %rdx,%r11 + adcq $0,%r11 + movq %r10,-8(%rdi,%rcx,1) + + cmpq $0,%rcx + jne L$sqr4x_inner + +.byte 0x67 + mulq %r15 + addq %rax,%r13 + adcq $0,%rdx + addq %r11,%r13 + adcq $0,%rdx + + movq %r13,(%rdi) + movq %rdx,%r12 + movq %rdx,8(%rdi) + + addq $16,%rbp + jnz L$sqr4x_outer + + + movq -32(%rsi),%r14 + leaq 48+8(%rsp,%r9,2),%rdi + movq -24(%rsi),%rax + leaq -32(%rdi,%rbp,1),%rdi + movq -16(%rsi),%rbx + movq %rax,%r15 + + mulq %r14 + addq %rax,%r10 + movq %rbx,%rax + movq %rdx,%r11 + adcq $0,%r11 + + mulq %r14 + addq %rax,%r11 + movq %rbx,%rax + movq %r10,-24(%rdi) + movq %rdx,%r10 + adcq $0,%r10 + addq %r13,%r11 + movq -8(%rsi),%rbx + adcq $0,%r10 + + mulq %r15 + addq %rax,%r12 + movq %rbx,%rax + movq %r11,-16(%rdi) + movq %rdx,%r13 + adcq $0,%r13 + + mulq %r14 + addq %rax,%r10 + movq %rbx,%rax + movq %rdx,%r11 + adcq $0,%r11 + addq %r12,%r10 + adcq $0,%r11 + movq %r10,-8(%rdi) + + mulq %r15 + addq %rax,%r13 + movq -16(%rsi),%rax + adcq $0,%rdx + addq %r11,%r13 + adcq $0,%rdx + + movq %r13,(%rdi) + movq %rdx,%r12 + movq %rdx,8(%rdi) + + mulq %rbx + addq $16,%rbp + xorq %r14,%r14 + subq %r9,%rbp + xorq %r15,%r15 + + addq %r12,%rax + adcq $0,%rdx + movq %rax,8(%rdi) + movq %rdx,16(%rdi) + movq %r15,24(%rdi) + + movq -16(%rsi,%rbp,1),%rax + leaq 48+8(%rsp),%rdi + xorq %r10,%r10 + movq 8(%rdi),%r11 + + leaq (%r14,%r10,2),%r12 + shrq $63,%r10 + leaq (%rcx,%r11,2),%r13 + shrq $63,%r11 + orq %r10,%r13 + movq 16(%rdi),%r10 + movq %r11,%r14 + mulq %rax + negq %r15 + movq 24(%rdi),%r11 + adcq %rax,%r12 + movq -8(%rsi,%rbp,1),%rax + movq %r12,(%rdi) + adcq %rdx,%r13 + + leaq (%r14,%r10,2),%rbx + movq %r13,8(%rdi) + sbbq %r15,%r15 + shrq $63,%r10 + leaq (%rcx,%r11,2),%r8 + shrq $63,%r11 + orq %r10,%r8 + movq 32(%rdi),%r10 + movq %r11,%r14 + mulq %rax + negq %r15 + movq 40(%rdi),%r11 + adcq %rax,%rbx + movq 0(%rsi,%rbp,1),%rax + movq %rbx,16(%rdi) + adcq %rdx,%r8 + leaq 16(%rbp),%rbp + movq %r8,24(%rdi) + sbbq %r15,%r15 + leaq 64(%rdi),%rdi + jmp L$sqr4x_shift_n_add + +.p2align 5 +L$sqr4x_shift_n_add: + leaq (%r14,%r10,2),%r12 + shrq $63,%r10 + leaq (%rcx,%r11,2),%r13 + shrq $63,%r11 + orq %r10,%r13 + movq -16(%rdi),%r10 + movq %r11,%r14 + mulq %rax + negq %r15 + movq -8(%rdi),%r11 + adcq %rax,%r12 + movq -8(%rsi,%rbp,1),%rax + movq %r12,-32(%rdi) + adcq %rdx,%r13 + + leaq (%r14,%r10,2),%rbx + movq %r13,-24(%rdi) + sbbq %r15,%r15 + shrq $63,%r10 + leaq (%rcx,%r11,2),%r8 + shrq $63,%r11 + orq %r10,%r8 + movq 0(%rdi),%r10 + movq %r11,%r14 + mulq %rax + negq %r15 + movq 8(%rdi),%r11 + adcq %rax,%rbx + movq 0(%rsi,%rbp,1),%rax + movq %rbx,-16(%rdi) + adcq %rdx,%r8 + + leaq (%r14,%r10,2),%r12 + movq %r8,-8(%rdi) + sbbq %r15,%r15 + shrq $63,%r10 + leaq (%rcx,%r11,2),%r13 + shrq $63,%r11 + orq %r10,%r13 + movq 16(%rdi),%r10 + movq %r11,%r14 + mulq %rax + negq %r15 + movq 24(%rdi),%r11 + adcq %rax,%r12 + movq 8(%rsi,%rbp,1),%rax + movq %r12,0(%rdi) + adcq %rdx,%r13 + + leaq (%r14,%r10,2),%rbx + movq %r13,8(%rdi) + sbbq %r15,%r15 + shrq $63,%r10 + leaq (%rcx,%r11,2),%r8 + shrq $63,%r11 + orq %r10,%r8 + movq 32(%rdi),%r10 + movq %r11,%r14 + mulq %rax + negq %r15 + movq 40(%rdi),%r11 + adcq %rax,%rbx + movq 16(%rsi,%rbp,1),%rax + movq %rbx,16(%rdi) + adcq %rdx,%r8 + movq %r8,24(%rdi) + sbbq %r15,%r15 + leaq 64(%rdi),%rdi + addq $32,%rbp + jnz L$sqr4x_shift_n_add + + leaq (%r14,%r10,2),%r12 +.byte 0x67 + shrq $63,%r10 + leaq (%rcx,%r11,2),%r13 + shrq $63,%r11 + orq %r10,%r13 + movq -16(%rdi),%r10 + movq %r11,%r14 + mulq %rax + negq %r15 + movq -8(%rdi),%r11 + adcq %rax,%r12 + movq -8(%rsi),%rax + movq %r12,-32(%rdi) + adcq %rdx,%r13 + + leaq (%r14,%r10,2),%rbx + movq %r13,-24(%rdi) + sbbq %r15,%r15 + shrq $63,%r10 + leaq (%rcx,%r11,2),%r8 + shrq $63,%r11 + orq %r10,%r8 + mulq %rax + negq %r15 + adcq %rax,%rbx + adcq %rdx,%r8 + movq %rbx,-16(%rdi) + movq %r8,-8(%rdi) +.byte 102,72,15,126,213 +sqr8x_reduction: + xorq %rax,%rax + leaq (%rbp,%r9,2),%rcx + leaq 48+8(%rsp,%r9,2),%rdx + movq %rcx,0+8(%rsp) + leaq 48+8(%rsp,%r9,1),%rdi + movq %rdx,8+8(%rsp) + negq %r9 + jmp L$8x_reduction_loop + +.p2align 5 +L$8x_reduction_loop: + leaq (%rdi,%r9,1),%rdi +.byte 0x66 + movq 0(%rdi),%rbx + movq 8(%rdi),%r9 + movq 16(%rdi),%r10 + movq 24(%rdi),%r11 + movq 32(%rdi),%r12 + movq 40(%rdi),%r13 + movq 48(%rdi),%r14 + movq 56(%rdi),%r15 + movq %rax,(%rdx) + leaq 64(%rdi),%rdi + +.byte 0x67 + movq %rbx,%r8 + imulq 32+8(%rsp),%rbx + movq 0(%rbp),%rax + movl $8,%ecx + jmp L$8x_reduce + +.p2align 5 +L$8x_reduce: + mulq %rbx + movq 16(%rbp),%rax + negq %r8 + movq %rdx,%r8 + adcq $0,%r8 + + mulq %rbx + addq %rax,%r9 + movq 32(%rbp),%rax + adcq $0,%rdx + addq %r9,%r8 + movq %rbx,48-8+8(%rsp,%rcx,8) + movq %rdx,%r9 + adcq $0,%r9 + + mulq %rbx + addq %rax,%r10 + movq 48(%rbp),%rax + adcq $0,%rdx + addq %r10,%r9 + movq 32+8(%rsp),%rsi + movq %rdx,%r10 + adcq $0,%r10 + + mulq %rbx + addq %rax,%r11 + movq 64(%rbp),%rax + adcq $0,%rdx + imulq %r8,%rsi + addq %r11,%r10 + movq %rdx,%r11 + adcq $0,%r11 + + mulq %rbx + addq %rax,%r12 + movq 80(%rbp),%rax + adcq $0,%rdx + addq %r12,%r11 + movq %rdx,%r12 + adcq $0,%r12 + + mulq %rbx + addq %rax,%r13 + movq 96(%rbp),%rax + adcq $0,%rdx + addq %r13,%r12 + movq %rdx,%r13 + adcq $0,%r13 + + mulq %rbx + addq %rax,%r14 + movq 112(%rbp),%rax + adcq $0,%rdx + addq %r14,%r13 + movq %rdx,%r14 + adcq $0,%r14 + + mulq %rbx + movq %rsi,%rbx + addq %rax,%r15 + movq 0(%rbp),%rax + adcq $0,%rdx + addq %r15,%r14 + movq %rdx,%r15 + adcq $0,%r15 + + decl %ecx + jnz L$8x_reduce + + leaq 128(%rbp),%rbp + xorq %rax,%rax + movq 8+8(%rsp),%rdx + cmpq 0+8(%rsp),%rbp + jae L$8x_no_tail + +.byte 0x66 + addq 0(%rdi),%r8 + adcq 8(%rdi),%r9 + adcq 16(%rdi),%r10 + adcq 24(%rdi),%r11 + adcq 32(%rdi),%r12 + adcq 40(%rdi),%r13 + adcq 48(%rdi),%r14 + adcq 56(%rdi),%r15 + sbbq %rsi,%rsi + + movq 48+56+8(%rsp),%rbx + movl $8,%ecx + movq 0(%rbp),%rax + jmp L$8x_tail + +.p2align 5 +L$8x_tail: + mulq %rbx + addq %rax,%r8 + movq 16(%rbp),%rax + movq %r8,(%rdi) + movq %rdx,%r8 + adcq $0,%r8 + + mulq %rbx + addq %rax,%r9 + movq 32(%rbp),%rax + adcq $0,%rdx + addq %r9,%r8 + leaq 8(%rdi),%rdi + movq %rdx,%r9 + adcq $0,%r9 + + mulq %rbx + addq %rax,%r10 + movq 48(%rbp),%rax + adcq $0,%rdx + addq %r10,%r9 + movq %rdx,%r10 + adcq $0,%r10 + + mulq %rbx + addq %rax,%r11 + movq 64(%rbp),%rax + adcq $0,%rdx + addq %r11,%r10 + movq %rdx,%r11 + adcq $0,%r11 + + mulq %rbx + addq %rax,%r12 + movq 80(%rbp),%rax + adcq $0,%rdx + addq %r12,%r11 + movq %rdx,%r12 + adcq $0,%r12 + + mulq %rbx + addq %rax,%r13 + movq 96(%rbp),%rax + adcq $0,%rdx + addq %r13,%r12 + movq %rdx,%r13 + adcq $0,%r13 + + mulq %rbx + addq %rax,%r14 + movq 112(%rbp),%rax + adcq $0,%rdx + addq %r14,%r13 + movq %rdx,%r14 + adcq $0,%r14 + + mulq %rbx + movq 48-16+8(%rsp,%rcx,8),%rbx + addq %rax,%r15 + adcq $0,%rdx + addq %r15,%r14 + movq 0(%rbp),%rax + movq %rdx,%r15 + adcq $0,%r15 + + decl %ecx + jnz L$8x_tail + + leaq 128(%rbp),%rbp + movq 8+8(%rsp),%rdx + cmpq 0+8(%rsp),%rbp + jae L$8x_tail_done + + movq 48+56+8(%rsp),%rbx + negq %rsi + movq 0(%rbp),%rax + adcq 0(%rdi),%r8 + adcq 8(%rdi),%r9 + adcq 16(%rdi),%r10 + adcq 24(%rdi),%r11 + adcq 32(%rdi),%r12 + adcq 40(%rdi),%r13 + adcq 48(%rdi),%r14 + adcq 56(%rdi),%r15 + sbbq %rsi,%rsi + + movl $8,%ecx + jmp L$8x_tail + +.p2align 5 +L$8x_tail_done: + addq (%rdx),%r8 + xorq %rax,%rax + + negq %rsi +L$8x_no_tail: + adcq 0(%rdi),%r8 + adcq 8(%rdi),%r9 + adcq 16(%rdi),%r10 + adcq 24(%rdi),%r11 + adcq 32(%rdi),%r12 + adcq 40(%rdi),%r13 + adcq 48(%rdi),%r14 + adcq 56(%rdi),%r15 + adcq $0,%rax + movq -16(%rbp),%rcx + xorq %rsi,%rsi + +.byte 102,72,15,126,213 + + movq %r8,0(%rdi) + movq %r9,8(%rdi) +.byte 102,73,15,126,217 + movq %r10,16(%rdi) + movq %r11,24(%rdi) + movq %r12,32(%rdi) + movq %r13,40(%rdi) + movq %r14,48(%rdi) + movq %r15,56(%rdi) + leaq 64(%rdi),%rdi + + cmpq %rdx,%rdi + jb L$8x_reduction_loop + + subq %r15,%rcx + leaq (%rdi,%r9,1),%rbx + adcq %rsi,%rsi + movq %r9,%rcx + orq %rsi,%rax +.byte 102,72,15,126,207 + xorq $1,%rax +.byte 102,72,15,126,206 + leaq (%rbp,%rax,8),%rbp + sarq $3+2,%rcx + jmp L$sqr4x_sub + +.p2align 5 +L$sqr4x_sub: +.byte 0x66 + movq 0(%rbx),%r12 + movq 8(%rbx),%r13 + sbbq 0(%rbp),%r12 + movq 16(%rbx),%r14 + sbbq 16(%rbp),%r13 + movq 24(%rbx),%r15 + leaq 32(%rbx),%rbx + sbbq 32(%rbp),%r14 + movq %r12,0(%rdi) + sbbq 48(%rbp),%r15 + leaq 64(%rbp),%rbp + movq %r13,8(%rdi) + movq %r14,16(%rdi) + movq %r15,24(%rdi) + leaq 32(%rdi),%rdi + + incq %rcx + jnz L$sqr4x_sub + movq %r9,%r10 + negq %r9 + .byte 0xf3,0xc3 + +.globl _bn_from_montgomery + +.p2align 5 +_bn_from_montgomery: + testl $7,%r9d + jz bn_from_mont8x + xorl %eax,%eax + .byte 0xf3,0xc3 + + + +.p2align 5 +bn_from_mont8x: +.byte 0x67 + movq %rsp,%rax + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 +.byte 0x67 + movl %r9d,%r10d + shll $3,%r9d + shll $3+2,%r10d + negq %r9 + movq (%r8),%r8 + + + + + + + + leaq -64(%rsp,%r9,2),%r11 + subq %rsi,%r11 + andq $4095,%r11 + cmpq %r11,%r10 + jb L$from_sp_alt + subq %r11,%rsp + leaq -64(%rsp,%r9,2),%rsp + jmp L$from_sp_done + +.p2align 5 +L$from_sp_alt: + leaq 4096-64(,%r9,2),%r10 + leaq -64(%rsp,%r9,2),%rsp + subq %r10,%r11 + movq $0,%r10 + cmovcq %r10,%r11 + subq %r11,%rsp +L$from_sp_done: + andq $-64,%rsp + movq %r9,%r10 + negq %r9 + + + + + + + + + + + movq %r8,32(%rsp) + movq %rax,40(%rsp) +L$from_body: + movq %r9,%r11 + leaq 48(%rsp),%rax + pxor %xmm0,%xmm0 + jmp L$mul_by_1 + +.p2align 5 +L$mul_by_1: + movdqu (%rsi),%xmm1 + movdqu 16(%rsi),%xmm2 + movdqu 32(%rsi),%xmm3 + movdqa %xmm0,(%rax,%r9,1) + movdqu 48(%rsi),%xmm4 + movdqa %xmm0,16(%rax,%r9,1) +.byte 0x48,0x8d,0xb6,0x40,0x00,0x00,0x00 + movdqa %xmm1,(%rax) + movdqa %xmm0,32(%rax,%r9,1) + movdqa %xmm2,16(%rax) + movdqa %xmm0,48(%rax,%r9,1) + movdqa %xmm3,32(%rax) + movdqa %xmm4,48(%rax) + leaq 64(%rax),%rax + subq $64,%r11 + jnz L$mul_by_1 + +.byte 102,72,15,110,207 +.byte 102,72,15,110,209 +.byte 0x67 + movq %rcx,%rbp +.byte 102,73,15,110,218 + movl _OPENSSL_ia32cap_P+8(%rip),%r11d + andl $524544,%r11d + cmpl $524544,%r11d + jne L$from_mont_nox + + leaq (%rax,%r9,1),%rdi + call sqrx8x_reduction + + pxor %xmm0,%xmm0 + leaq 48(%rsp),%rax + movq 40(%rsp),%rsi + jmp L$from_mont_zero + +.p2align 5 +L$from_mont_nox: + call sqr8x_reduction + + pxor %xmm0,%xmm0 + leaq 48(%rsp),%rax + movq 40(%rsp),%rsi + jmp L$from_mont_zero + +.p2align 5 +L$from_mont_zero: + movdqa %xmm0,0(%rax) + movdqa %xmm0,16(%rax) + movdqa %xmm0,32(%rax) + movdqa %xmm0,48(%rax) + leaq 64(%rax),%rax + subq $32,%r9 + jnz L$from_mont_zero + + movq $1,%rax + movq -48(%rsi),%r15 + movq -40(%rsi),%r14 + movq -32(%rsi),%r13 + movq -24(%rsi),%r12 + movq -16(%rsi),%rbp + movq -8(%rsi),%rbx + leaq (%rsi),%rsp +L$from_epilogue: + .byte 0xf3,0xc3 + + +.p2align 5 +bn_mulx4x_mont_gather5: +L$mulx4x_enter: +.byte 0x67 + movq %rsp,%rax + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 +.byte 0x67 + movl %r9d,%r10d + shll $3,%r9d + shll $3+2,%r10d + negq %r9 + movq (%r8),%r8 + + + + + + + + + leaq -64(%rsp,%r9,2),%r11 + subq %rsi,%r11 + andq $4095,%r11 + cmpq %r11,%r10 + jb L$mulx4xsp_alt + subq %r11,%rsp + leaq -64(%rsp,%r9,2),%rsp + jmp L$mulx4xsp_done + +.p2align 5 +L$mulx4xsp_alt: + leaq 4096-64(,%r9,2),%r10 + leaq -64(%rsp,%r9,2),%rsp + subq %r10,%r11 + movq $0,%r10 + cmovcq %r10,%r11 + subq %r11,%rsp +L$mulx4xsp_done: + andq $-64,%rsp + + + + + + + + + + + + + movq %r8,32(%rsp) + movq %rax,40(%rsp) +L$mulx4x_body: + call mulx4x_internal + + movq 40(%rsp),%rsi + movq $1,%rax + movq -48(%rsi),%r15 + movq -40(%rsi),%r14 + movq -32(%rsi),%r13 + movq -24(%rsi),%r12 + movq -16(%rsi),%rbp + movq -8(%rsi),%rbx + leaq (%rsi),%rsp +L$mulx4x_epilogue: + .byte 0xf3,0xc3 + + + +.p2align 5 +mulx4x_internal: +.byte 0x4c,0x89,0x8c,0x24,0x08,0x00,0x00,0x00 +.byte 0x67 + negq %r9 + shlq $5,%r9 + leaq 256(%rdx,%r9,1),%r13 + shrq $5+5,%r9 + movl 8(%rax),%r10d + subq $1,%r9 + movq %r13,16+8(%rsp) + movq %r9,24+8(%rsp) + movq %rdi,56+8(%rsp) + movq %r10,%r11 + shrq $3,%r10 + andq $7,%r11 + notq %r10 + leaq L$magic_masks(%rip),%rax + andq $3,%r10 + leaq 96(%rdx,%r11,8),%rdi + movq 0(%rax,%r10,8),%xmm4 + movq 8(%rax,%r10,8),%xmm5 + addq $7,%r11 + movq 16(%rax,%r10,8),%xmm6 + movq 24(%rax,%r10,8),%xmm7 + andq $7,%r11 + + movq -96(%rdi),%xmm0 + leaq 256(%rdi),%rbx + movq -32(%rdi),%xmm1 + pand %xmm4,%xmm0 + movq 32(%rdi),%xmm2 + pand %xmm5,%xmm1 + movq 96(%rdi),%xmm3 + pand %xmm6,%xmm2 + por %xmm1,%xmm0 + movq -96(%rbx),%xmm1 + pand %xmm7,%xmm3 + por %xmm2,%xmm0 + movq -32(%rbx),%xmm2 + por %xmm3,%xmm0 +.byte 0x67,0x67 + pand %xmm4,%xmm1 + movq 32(%rbx),%xmm3 + +.byte 102,72,15,126,194 + movq 96(%rbx),%xmm0 + leaq 512(%rdi),%rdi + pand %xmm5,%xmm2 +.byte 0x67,0x67 + pand %xmm6,%xmm3 + + + + + + + + leaq 64+32+8(%rsp,%r11,8),%rbx + + movq %rdx,%r9 + mulxq 0(%rsi),%r8,%rax + mulxq 8(%rsi),%r11,%r12 + addq %rax,%r11 + mulxq 16(%rsi),%rax,%r13 + adcq %rax,%r12 + adcq $0,%r13 + mulxq 24(%rsi),%rax,%r14 + + movq %r8,%r15 + imulq 32+8(%rsp),%r8 + xorq %rbp,%rbp + movq %r8,%rdx + + por %xmm2,%xmm1 + pand %xmm7,%xmm0 + por %xmm3,%xmm1 + movq %rdi,8+8(%rsp) + por %xmm1,%xmm0 + +.byte 0x48,0x8d,0xb6,0x20,0x00,0x00,0x00 + adcxq %rax,%r13 + adcxq %rbp,%r14 + + mulxq 0(%rcx),%rax,%r10 + adcxq %rax,%r15 + adoxq %r11,%r10 + mulxq 16(%rcx),%rax,%r11 + adcxq %rax,%r10 + adoxq %r12,%r11 + mulxq 32(%rcx),%rax,%r12 + movq 24+8(%rsp),%rdi +.byte 0x66 + movq %r10,-32(%rbx) + adcxq %rax,%r11 + adoxq %r13,%r12 + mulxq 48(%rcx),%rax,%r15 +.byte 0x67,0x67 + movq %r9,%rdx + movq %r11,-24(%rbx) + adcxq %rax,%r12 + adoxq %rbp,%r15 +.byte 0x48,0x8d,0x89,0x40,0x00,0x00,0x00 + movq %r12,-16(%rbx) + + +.p2align 5 +L$mulx4x_1st: + adcxq %rbp,%r15 + mulxq 0(%rsi),%r10,%rax + adcxq %r14,%r10 + mulxq 8(%rsi),%r11,%r14 + adcxq %rax,%r11 + mulxq 16(%rsi),%r12,%rax + adcxq %r14,%r12 + mulxq 24(%rsi),%r13,%r14 +.byte 0x67,0x67 + movq %r8,%rdx + adcxq %rax,%r13 + adcxq %rbp,%r14 + leaq 32(%rsi),%rsi + leaq 32(%rbx),%rbx + + adoxq %r15,%r10 + mulxq 0(%rcx),%rax,%r15 + adcxq %rax,%r10 + adoxq %r15,%r11 + mulxq 16(%rcx),%rax,%r15 + adcxq %rax,%r11 + adoxq %r15,%r12 + mulxq 32(%rcx),%rax,%r15 + movq %r10,-40(%rbx) + adcxq %rax,%r12 + movq %r11,-32(%rbx) + adoxq %r15,%r13 + mulxq 48(%rcx),%rax,%r15 + movq %r9,%rdx + movq %r12,-24(%rbx) + adcxq %rax,%r13 + adoxq %rbp,%r15 + leaq 64(%rcx),%rcx + movq %r13,-16(%rbx) + + decq %rdi + jnz L$mulx4x_1st + + movq 8(%rsp),%rax +.byte 102,72,15,126,194 + adcq %rbp,%r15 + leaq (%rsi,%rax,1),%rsi + addq %r15,%r14 + movq 8+8(%rsp),%rdi + adcq %rbp,%rbp + movq %r14,-8(%rbx) + jmp L$mulx4x_outer + +.p2align 5 +L$mulx4x_outer: + movq %rbp,(%rbx) + leaq 32(%rbx,%rax,1),%rbx + mulxq 0(%rsi),%r8,%r11 + xorq %rbp,%rbp + movq %rdx,%r9 + mulxq 8(%rsi),%r14,%r12 + adoxq -32(%rbx),%r8 + adcxq %r14,%r11 + mulxq 16(%rsi),%r15,%r13 + adoxq -24(%rbx),%r11 + adcxq %r15,%r12 + mulxq 24(%rsi),%rdx,%r14 + adoxq -16(%rbx),%r12 + adcxq %rdx,%r13 + leaq (%rcx,%rax,2),%rcx + leaq 32(%rsi),%rsi + adoxq -8(%rbx),%r13 + adcxq %rbp,%r14 + adoxq %rbp,%r14 + +.byte 0x67 + movq %r8,%r15 + imulq 32+8(%rsp),%r8 + + movq -96(%rdi),%xmm0 +.byte 0x67,0x67 + movq %r8,%rdx + movq -32(%rdi),%xmm1 +.byte 0x67 + pand %xmm4,%xmm0 + movq 32(%rdi),%xmm2 +.byte 0x67 + pand %xmm5,%xmm1 + movq 96(%rdi),%xmm3 + addq $256,%rdi +.byte 0x67 + pand %xmm6,%xmm2 + por %xmm1,%xmm0 + pand %xmm7,%xmm3 + xorq %rbp,%rbp + movq %rdi,8+8(%rsp) + + mulxq 0(%rcx),%rax,%r10 + adcxq %rax,%r15 + adoxq %r11,%r10 + mulxq 16(%rcx),%rax,%r11 + adcxq %rax,%r10 + adoxq %r12,%r11 + mulxq 32(%rcx),%rax,%r12 + adcxq %rax,%r11 + adoxq %r13,%r12 + mulxq 48(%rcx),%rax,%r15 + movq %r9,%rdx + por %xmm2,%xmm0 + movq 24+8(%rsp),%rdi + movq %r10,-32(%rbx) + por %xmm3,%xmm0 + adcxq %rax,%r12 + movq %r11,-24(%rbx) + adoxq %rbp,%r15 + movq %r12,-16(%rbx) + leaq 64(%rcx),%rcx + jmp L$mulx4x_inner + +.p2align 5 +L$mulx4x_inner: + mulxq 0(%rsi),%r10,%rax + adcxq %rbp,%r15 + adoxq %r14,%r10 + mulxq 8(%rsi),%r11,%r14 + adcxq 0(%rbx),%r10 + adoxq %rax,%r11 + mulxq 16(%rsi),%r12,%rax + adcxq 8(%rbx),%r11 + adoxq %r14,%r12 + mulxq 24(%rsi),%r13,%r14 + movq %r8,%rdx + adcxq 16(%rbx),%r12 + adoxq %rax,%r13 + adcxq 24(%rbx),%r13 + adoxq %rbp,%r14 + leaq 32(%rsi),%rsi + leaq 32(%rbx),%rbx + adcxq %rbp,%r14 + + adoxq %r15,%r10 + mulxq 0(%rcx),%rax,%r15 + adcxq %rax,%r10 + adoxq %r15,%r11 + mulxq 16(%rcx),%rax,%r15 + adcxq %rax,%r11 + adoxq %r15,%r12 + mulxq 32(%rcx),%rax,%r15 + movq %r10,-40(%rbx) + adcxq %rax,%r12 + adoxq %r15,%r13 + movq %r11,-32(%rbx) + mulxq 48(%rcx),%rax,%r15 + movq %r9,%rdx + leaq 64(%rcx),%rcx + movq %r12,-24(%rbx) + adcxq %rax,%r13 + adoxq %rbp,%r15 + movq %r13,-16(%rbx) + + decq %rdi + jnz L$mulx4x_inner + + movq 0+8(%rsp),%rax +.byte 102,72,15,126,194 + adcq %rbp,%r15 + subq 0(%rbx),%rdi + movq 8+8(%rsp),%rdi + movq 16+8(%rsp),%r10 + adcq %r15,%r14 + leaq (%rsi,%rax,1),%rsi + adcq %rbp,%rbp + movq %r14,-8(%rbx) + + cmpq %r10,%rdi + jb L$mulx4x_outer + + movq -16(%rcx),%r10 + xorq %r15,%r15 + subq %r14,%r10 + adcq %r15,%r15 + orq %r15,%rbp + xorq $1,%rbp + leaq (%rbx,%rax,1),%rdi + leaq (%rcx,%rax,2),%rcx +.byte 0x67,0x67 + sarq $3+2,%rax + leaq (%rcx,%rbp,8),%rbp + movq 56+8(%rsp),%rdx + movq %rax,%rcx + jmp L$sqrx4x_sub + + +.p2align 5 +bn_powerx5: +L$powerx5_enter: +.byte 0x67 + movq %rsp,%rax + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 +.byte 0x67 + movl %r9d,%r10d + shll $3,%r9d + shll $3+2,%r10d + negq %r9 + movq (%r8),%r8 + + + + + + + + leaq -64(%rsp,%r9,2),%r11 + subq %rsi,%r11 + andq $4095,%r11 + cmpq %r11,%r10 + jb L$pwrx_sp_alt + subq %r11,%rsp + leaq -64(%rsp,%r9,2),%rsp + jmp L$pwrx_sp_done + +.p2align 5 +L$pwrx_sp_alt: + leaq 4096-64(,%r9,2),%r10 + leaq -64(%rsp,%r9,2),%rsp + subq %r10,%r11 + movq $0,%r10 + cmovcq %r10,%r11 + subq %r11,%rsp +L$pwrx_sp_done: + andq $-64,%rsp + movq %r9,%r10 + negq %r9 + + + + + + + + + + + + + pxor %xmm0,%xmm0 +.byte 102,72,15,110,207 +.byte 102,72,15,110,209 +.byte 102,73,15,110,218 +.byte 102,72,15,110,226 + movq %r8,32(%rsp) + movq %rax,40(%rsp) +L$powerx5_body: + + call __bn_sqrx8x_internal + call __bn_sqrx8x_internal + call __bn_sqrx8x_internal + call __bn_sqrx8x_internal + call __bn_sqrx8x_internal + + movq %r10,%r9 + movq %rsi,%rdi +.byte 102,72,15,126,209 +.byte 102,72,15,126,226 + movq 40(%rsp),%rax + + call mulx4x_internal + + movq 40(%rsp),%rsi + movq $1,%rax + movq -48(%rsi),%r15 + movq -40(%rsi),%r14 + movq -32(%rsi),%r13 + movq -24(%rsi),%r12 + movq -16(%rsi),%rbp + movq -8(%rsi),%rbx + leaq (%rsi),%rsp +L$powerx5_epilogue: + .byte 0xf3,0xc3 + + +.globl _bn_sqrx8x_internal +.private_extern _bn_sqrx8x_internal + +.p2align 5 +_bn_sqrx8x_internal: +__bn_sqrx8x_internal: + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + leaq 48+8(%rsp),%rdi + leaq (%rsi,%r9,1),%rbp + movq %r9,0+8(%rsp) + movq %rbp,8+8(%rsp) + jmp L$sqr8x_zero_start + +.p2align 5 +.byte 0x66,0x66,0x66,0x2e,0x0f,0x1f,0x84,0x00,0x00,0x00,0x00,0x00 +L$sqrx8x_zero: +.byte 0x3e + movdqa %xmm0,0(%rdi) + movdqa %xmm0,16(%rdi) + movdqa %xmm0,32(%rdi) + movdqa %xmm0,48(%rdi) +L$sqr8x_zero_start: + movdqa %xmm0,64(%rdi) + movdqa %xmm0,80(%rdi) + movdqa %xmm0,96(%rdi) + movdqa %xmm0,112(%rdi) + leaq 128(%rdi),%rdi + subq $64,%r9 + jnz L$sqrx8x_zero + + movq 0(%rsi),%rdx + + xorq %r10,%r10 + xorq %r11,%r11 + xorq %r12,%r12 + xorq %r13,%r13 + xorq %r14,%r14 + xorq %r15,%r15 + leaq 48+8(%rsp),%rdi + xorq %rbp,%rbp + jmp L$sqrx8x_outer_loop + +.p2align 5 +L$sqrx8x_outer_loop: + mulxq 8(%rsi),%r8,%rax + adcxq %r9,%r8 + adoxq %rax,%r10 + mulxq 16(%rsi),%r9,%rax + adcxq %r10,%r9 + adoxq %rax,%r11 +.byte 0xc4,0xe2,0xab,0xf6,0x86,0x18,0x00,0x00,0x00 + adcxq %r11,%r10 + adoxq %rax,%r12 +.byte 0xc4,0xe2,0xa3,0xf6,0x86,0x20,0x00,0x00,0x00 + adcxq %r12,%r11 + adoxq %rax,%r13 + mulxq 40(%rsi),%r12,%rax + adcxq %r13,%r12 + adoxq %rax,%r14 + mulxq 48(%rsi),%r13,%rax + adcxq %r14,%r13 + adoxq %r15,%rax + mulxq 56(%rsi),%r14,%r15 + movq 8(%rsi),%rdx + adcxq %rax,%r14 + adoxq %rbp,%r15 + adcq 64(%rdi),%r15 + movq %r8,8(%rdi) + movq %r9,16(%rdi) + sbbq %rcx,%rcx + xorq %rbp,%rbp + + + mulxq 16(%rsi),%r8,%rbx + mulxq 24(%rsi),%r9,%rax + adcxq %r10,%r8 + adoxq %rbx,%r9 + mulxq 32(%rsi),%r10,%rbx + adcxq %r11,%r9 + adoxq %rax,%r10 +.byte 0xc4,0xe2,0xa3,0xf6,0x86,0x28,0x00,0x00,0x00 + adcxq %r12,%r10 + adoxq %rbx,%r11 +.byte 0xc4,0xe2,0x9b,0xf6,0x9e,0x30,0x00,0x00,0x00 + adcxq %r13,%r11 + adoxq %r14,%r12 +.byte 0xc4,0x62,0x93,0xf6,0xb6,0x38,0x00,0x00,0x00 + movq 16(%rsi),%rdx + adcxq %rax,%r12 + adoxq %rbx,%r13 + adcxq %r15,%r13 + adoxq %rbp,%r14 + adcxq %rbp,%r14 + + movq %r8,24(%rdi) + movq %r9,32(%rdi) + + mulxq 24(%rsi),%r8,%rbx + mulxq 32(%rsi),%r9,%rax + adcxq %r10,%r8 + adoxq %rbx,%r9 + mulxq 40(%rsi),%r10,%rbx + adcxq %r11,%r9 + adoxq %rax,%r10 +.byte 0xc4,0xe2,0xa3,0xf6,0x86,0x30,0x00,0x00,0x00 + adcxq %r12,%r10 + adoxq %r13,%r11 +.byte 0xc4,0x62,0x9b,0xf6,0xae,0x38,0x00,0x00,0x00 +.byte 0x3e + movq 24(%rsi),%rdx + adcxq %rbx,%r11 + adoxq %rax,%r12 + adcxq %r14,%r12 + movq %r8,40(%rdi) + movq %r9,48(%rdi) + mulxq 32(%rsi),%r8,%rax + adoxq %rbp,%r13 + adcxq %rbp,%r13 + + mulxq 40(%rsi),%r9,%rbx + adcxq %r10,%r8 + adoxq %rax,%r9 + mulxq 48(%rsi),%r10,%rax + adcxq %r11,%r9 + adoxq %r12,%r10 + mulxq 56(%rsi),%r11,%r12 + movq 32(%rsi),%rdx + movq 40(%rsi),%r14 + adcxq %rbx,%r10 + adoxq %rax,%r11 + movq 48(%rsi),%r15 + adcxq %r13,%r11 + adoxq %rbp,%r12 + adcxq %rbp,%r12 + + movq %r8,56(%rdi) + movq %r9,64(%rdi) + + mulxq %r14,%r9,%rax + movq 56(%rsi),%r8 + adcxq %r10,%r9 + mulxq %r15,%r10,%rbx + adoxq %rax,%r10 + adcxq %r11,%r10 + mulxq %r8,%r11,%rax + movq %r14,%rdx + adoxq %rbx,%r11 + adcxq %r12,%r11 + + adcxq %rbp,%rax + + mulxq %r15,%r14,%rbx + mulxq %r8,%r12,%r13 + movq %r15,%rdx + leaq 64(%rsi),%rsi + adcxq %r14,%r11 + adoxq %rbx,%r12 + adcxq %rax,%r12 + adoxq %rbp,%r13 + +.byte 0x67,0x67 + mulxq %r8,%r8,%r14 + adcxq %r8,%r13 + adcxq %rbp,%r14 + + cmpq 8+8(%rsp),%rsi + je L$sqrx8x_outer_break + + negq %rcx + movq $-8,%rcx + movq %rbp,%r15 + movq 64(%rdi),%r8 + adcxq 72(%rdi),%r9 + adcxq 80(%rdi),%r10 + adcxq 88(%rdi),%r11 + adcq 96(%rdi),%r12 + adcq 104(%rdi),%r13 + adcq 112(%rdi),%r14 + adcq 120(%rdi),%r15 + leaq (%rsi),%rbp + leaq 128(%rdi),%rdi + sbbq %rax,%rax + + movq -64(%rsi),%rdx + movq %rax,16+8(%rsp) + movq %rdi,24+8(%rsp) + + + xorl %eax,%eax + jmp L$sqrx8x_loop + +.p2align 5 +L$sqrx8x_loop: + movq %r8,%rbx + mulxq 0(%rbp),%rax,%r8 + adcxq %rax,%rbx + adoxq %r9,%r8 + + mulxq 8(%rbp),%rax,%r9 + adcxq %rax,%r8 + adoxq %r10,%r9 + + mulxq 16(%rbp),%rax,%r10 + adcxq %rax,%r9 + adoxq %r11,%r10 + + mulxq 24(%rbp),%rax,%r11 + adcxq %rax,%r10 + adoxq %r12,%r11 + +.byte 0xc4,0x62,0xfb,0xf6,0xa5,0x20,0x00,0x00,0x00 + adcxq %rax,%r11 + adoxq %r13,%r12 + + mulxq 40(%rbp),%rax,%r13 + adcxq %rax,%r12 + adoxq %r14,%r13 + + mulxq 48(%rbp),%rax,%r14 + movq %rbx,(%rdi,%rcx,8) + movl $0,%ebx + adcxq %rax,%r13 + adoxq %r15,%r14 + +.byte 0xc4,0x62,0xfb,0xf6,0xbd,0x38,0x00,0x00,0x00 + movq 8(%rsi,%rcx,8),%rdx + adcxq %rax,%r14 + adoxq %rbx,%r15 + adcxq %rbx,%r15 + +.byte 0x67 + incq %rcx + jnz L$sqrx8x_loop + + leaq 64(%rbp),%rbp + movq $-8,%rcx + cmpq 8+8(%rsp),%rbp + je L$sqrx8x_break + + subq 16+8(%rsp),%rbx +.byte 0x66 + movq -64(%rsi),%rdx + adcxq 0(%rdi),%r8 + adcxq 8(%rdi),%r9 + adcq 16(%rdi),%r10 + adcq 24(%rdi),%r11 + adcq 32(%rdi),%r12 + adcq 40(%rdi),%r13 + adcq 48(%rdi),%r14 + adcq 56(%rdi),%r15 + leaq 64(%rdi),%rdi +.byte 0x67 + sbbq %rax,%rax + xorl %ebx,%ebx + movq %rax,16+8(%rsp) + jmp L$sqrx8x_loop + +.p2align 5 +L$sqrx8x_break: + subq 16+8(%rsp),%r8 + movq 24+8(%rsp),%rcx + movq 0(%rsi),%rdx + xorl %ebp,%ebp + movq %r8,0(%rdi) + cmpq %rcx,%rdi + je L$sqrx8x_outer_loop + + movq %r9,8(%rdi) + movq 8(%rcx),%r9 + movq %r10,16(%rdi) + movq 16(%rcx),%r10 + movq %r11,24(%rdi) + movq 24(%rcx),%r11 + movq %r12,32(%rdi) + movq 32(%rcx),%r12 + movq %r13,40(%rdi) + movq 40(%rcx),%r13 + movq %r14,48(%rdi) + movq 48(%rcx),%r14 + movq %r15,56(%rdi) + movq 56(%rcx),%r15 + movq %rcx,%rdi + jmp L$sqrx8x_outer_loop + +.p2align 5 +L$sqrx8x_outer_break: + movq %r9,72(%rdi) +.byte 102,72,15,126,217 + movq %r10,80(%rdi) + movq %r11,88(%rdi) + movq %r12,96(%rdi) + movq %r13,104(%rdi) + movq %r14,112(%rdi) + leaq 48+8(%rsp),%rdi + movq (%rsi,%rcx,1),%rdx + + movq 8(%rdi),%r11 + xorq %r10,%r10 + movq 0+8(%rsp),%r9 + adoxq %r11,%r11 + movq 16(%rdi),%r12 + movq 24(%rdi),%r13 + + +.p2align 5 +L$sqrx4x_shift_n_add: + mulxq %rdx,%rax,%rbx + adoxq %r12,%r12 + adcxq %r10,%rax +.byte 0x48,0x8b,0x94,0x0e,0x08,0x00,0x00,0x00 +.byte 0x4c,0x8b,0x97,0x20,0x00,0x00,0x00 + adoxq %r13,%r13 + adcxq %r11,%rbx + movq 40(%rdi),%r11 + movq %rax,0(%rdi) + movq %rbx,8(%rdi) + + mulxq %rdx,%rax,%rbx + adoxq %r10,%r10 + adcxq %r12,%rax + movq 16(%rsi,%rcx,1),%rdx + movq 48(%rdi),%r12 + adoxq %r11,%r11 + adcxq %r13,%rbx + movq 56(%rdi),%r13 + movq %rax,16(%rdi) + movq %rbx,24(%rdi) + + mulxq %rdx,%rax,%rbx + adoxq %r12,%r12 + adcxq %r10,%rax + movq 24(%rsi,%rcx,1),%rdx + leaq 32(%rcx),%rcx + movq 64(%rdi),%r10 + adoxq %r13,%r13 + adcxq %r11,%rbx + movq 72(%rdi),%r11 + movq %rax,32(%rdi) + movq %rbx,40(%rdi) + + mulxq %rdx,%rax,%rbx + adoxq %r10,%r10 + adcxq %r12,%rax + jrcxz L$sqrx4x_shift_n_add_break +.byte 0x48,0x8b,0x94,0x0e,0x00,0x00,0x00,0x00 + adoxq %r11,%r11 + adcxq %r13,%rbx + movq 80(%rdi),%r12 + movq 88(%rdi),%r13 + movq %rax,48(%rdi) + movq %rbx,56(%rdi) + leaq 64(%rdi),%rdi + nop + jmp L$sqrx4x_shift_n_add + +.p2align 5 +L$sqrx4x_shift_n_add_break: + adcxq %r13,%rbx + movq %rax,48(%rdi) + movq %rbx,56(%rdi) + leaq 64(%rdi),%rdi +.byte 102,72,15,126,213 +sqrx8x_reduction: + xorl %eax,%eax + movq 32+8(%rsp),%rbx + movq 48+8(%rsp),%rdx + leaq -128(%rbp,%r9,2),%rcx + + movq %rcx,0+8(%rsp) + movq %rdi,8+8(%rsp) + + leaq 48+8(%rsp),%rdi + jmp L$sqrx8x_reduction_loop + +.p2align 5 +L$sqrx8x_reduction_loop: + movq 8(%rdi),%r9 + movq 16(%rdi),%r10 + movq 24(%rdi),%r11 + movq 32(%rdi),%r12 + movq %rdx,%r8 + imulq %rbx,%rdx + movq 40(%rdi),%r13 + movq 48(%rdi),%r14 + movq 56(%rdi),%r15 + movq %rax,24+8(%rsp) + + leaq 64(%rdi),%rdi + xorq %rsi,%rsi + movq $-8,%rcx + jmp L$sqrx8x_reduce + +.p2align 5 +L$sqrx8x_reduce: + movq %r8,%rbx + mulxq 0(%rbp),%rax,%r8 + adcxq %rbx,%rax + adoxq %r9,%r8 + + mulxq 16(%rbp),%rbx,%r9 + adcxq %rbx,%r8 + adoxq %r10,%r9 + + mulxq 32(%rbp),%rbx,%r10 + adcxq %rbx,%r9 + adoxq %r11,%r10 + + mulxq 48(%rbp),%rbx,%r11 + adcxq %rbx,%r10 + adoxq %r12,%r11 + +.byte 0xc4,0x62,0xe3,0xf6,0xa5,0x40,0x00,0x00,0x00 + movq %rdx,%rax + movq %r8,%rdx + adcxq %rbx,%r11 + adoxq %r13,%r12 + + mulxq 32+8(%rsp),%rbx,%rdx + movq %rax,%rdx + movq %rax,64+48+8(%rsp,%rcx,8) + + mulxq 80(%rbp),%rax,%r13 + adcxq %rax,%r12 + adoxq %r14,%r13 + + mulxq 96(%rbp),%rax,%r14 + adcxq %rax,%r13 + adoxq %r15,%r14 + + mulxq 112(%rbp),%rax,%r15 + movq %rbx,%rdx + adcxq %rax,%r14 + adoxq %rsi,%r15 + adcxq %rsi,%r15 + +.byte 0x67,0x67,0x67 + incq %rcx + jnz L$sqrx8x_reduce + + movq %rsi,%rax + cmpq 0+8(%rsp),%rbp + jae L$sqrx8x_no_tail + + movq 48+8(%rsp),%rdx + addq 0(%rdi),%r8 + leaq 128(%rbp),%rbp + movq $-8,%rcx + adcxq 8(%rdi),%r9 + adcxq 16(%rdi),%r10 + adcq 24(%rdi),%r11 + adcq 32(%rdi),%r12 + adcq 40(%rdi),%r13 + adcq 48(%rdi),%r14 + adcq 56(%rdi),%r15 + leaq 64(%rdi),%rdi + sbbq %rax,%rax + + xorq %rsi,%rsi + movq %rax,16+8(%rsp) + jmp L$sqrx8x_tail + +.p2align 5 +L$sqrx8x_tail: + movq %r8,%rbx + mulxq 0(%rbp),%rax,%r8 + adcxq %rax,%rbx + adoxq %r9,%r8 + + mulxq 16(%rbp),%rax,%r9 + adcxq %rax,%r8 + adoxq %r10,%r9 + + mulxq 32(%rbp),%rax,%r10 + adcxq %rax,%r9 + adoxq %r11,%r10 + + mulxq 48(%rbp),%rax,%r11 + adcxq %rax,%r10 + adoxq %r12,%r11 + +.byte 0xc4,0x62,0xfb,0xf6,0xa5,0x40,0x00,0x00,0x00 + adcxq %rax,%r11 + adoxq %r13,%r12 + + mulxq 80(%rbp),%rax,%r13 + adcxq %rax,%r12 + adoxq %r14,%r13 + + mulxq 96(%rbp),%rax,%r14 + adcxq %rax,%r13 + adoxq %r15,%r14 + + mulxq 112(%rbp),%rax,%r15 + movq 72+48+8(%rsp,%rcx,8),%rdx + adcxq %rax,%r14 + adoxq %rsi,%r15 + movq %rbx,(%rdi,%rcx,8) + movq %r8,%rbx + adcxq %rsi,%r15 + + incq %rcx + jnz L$sqrx8x_tail + + cmpq 0+8(%rsp),%rbp + jae L$sqrx8x_tail_done + + subq 16+8(%rsp),%rsi + movq 48+8(%rsp),%rdx + leaq 128(%rbp),%rbp + adcq 0(%rdi),%r8 + adcq 8(%rdi),%r9 + adcq 16(%rdi),%r10 + adcq 24(%rdi),%r11 + adcq 32(%rdi),%r12 + adcq 40(%rdi),%r13 + adcq 48(%rdi),%r14 + adcq 56(%rdi),%r15 + leaq 64(%rdi),%rdi + sbbq %rax,%rax + subq $8,%rcx + + xorq %rsi,%rsi + movq %rax,16+8(%rsp) + jmp L$sqrx8x_tail + +.p2align 5 +L$sqrx8x_tail_done: + addq 24+8(%rsp),%r8 + movq %rsi,%rax + + subq 16+8(%rsp),%rsi +L$sqrx8x_no_tail: + adcq 0(%rdi),%r8 +.byte 102,72,15,126,217 + adcq 8(%rdi),%r9 + movq 112(%rbp),%rsi +.byte 102,72,15,126,213 + adcq 16(%rdi),%r10 + adcq 24(%rdi),%r11 + adcq 32(%rdi),%r12 + adcq 40(%rdi),%r13 + adcq 48(%rdi),%r14 + adcq 56(%rdi),%r15 + adcq %rax,%rax + + movq 32+8(%rsp),%rbx + movq 64(%rdi,%rcx,1),%rdx + + movq %r8,0(%rdi) + leaq 64(%rdi),%r8 + movq %r9,8(%rdi) + movq %r10,16(%rdi) + movq %r11,24(%rdi) + movq %r12,32(%rdi) + movq %r13,40(%rdi) + movq %r14,48(%rdi) + movq %r15,56(%rdi) + + leaq 64(%rdi,%rcx,1),%rdi + cmpq 8+8(%rsp),%r8 + jb L$sqrx8x_reduction_loop + xorq %rbx,%rbx + subq %r15,%rsi + adcq %rbx,%rbx + movq %rcx,%r10 +.byte 0x67 + orq %rbx,%rax +.byte 0x67 + movq %rcx,%r9 + xorq $1,%rax + sarq $3+2,%rcx + + leaq (%rbp,%rax,8),%rbp +.byte 102,72,15,126,202 +.byte 102,72,15,126,206 + jmp L$sqrx4x_sub + +.p2align 5 +L$sqrx4x_sub: +.byte 0x66 + movq 0(%rdi),%r12 + movq 8(%rdi),%r13 + sbbq 0(%rbp),%r12 + movq 16(%rdi),%r14 + sbbq 16(%rbp),%r13 + movq 24(%rdi),%r15 + leaq 32(%rdi),%rdi + sbbq 32(%rbp),%r14 + movq %r12,0(%rdx) + sbbq 48(%rbp),%r15 + leaq 64(%rbp),%rbp + movq %r13,8(%rdx) + movq %r14,16(%rdx) + movq %r15,24(%rdx) + leaq 32(%rdx),%rdx + + incq %rcx + jnz L$sqrx4x_sub + negq %r9 + + .byte 0xf3,0xc3 + +.globl _bn_get_bits5 + +.p2align 4 +_bn_get_bits5: + movq %rdi,%r10 + movl %esi,%ecx + shrl $3,%esi + movzwl (%r10,%rsi,1),%eax + andl $7,%ecx + shrl %cl,%eax + andl $31,%eax + .byte 0xf3,0xc3 + + .globl _bn_scatter5 .p2align 4 _bn_scatter5: - cmpq $0,%rsi + cmpl $0,%esi jz L$scatter_epilogue leaq (%rdx,%rcx,8),%rdx L$scatter: @@ -734,7 +2906,7 @@ L$scatter: leaq 8(%rdi),%rdi movq %rax,(%rdx) leaq 256(%rdx),%rdx - subq $1,%rsi + subl $1,%esi jnz L$scatter L$scatter_epilogue: .byte 0xf3,0xc3 @@ -744,13 +2916,13 @@ L$scatter_epilogue: .p2align 4 _bn_gather5: - movq %rcx,%r11 - shrq $3,%rcx + movl %ecx,%r11d + shrl $3,%ecx andq $7,%r11 - notq %rcx + notl %ecx leaq L$magic_masks(%rip),%rax - andq $3,%rcx - leaq 96(%rdx,%r11,8),%rdx + andl $3,%ecx + leaq 128(%rdx,%r11,8),%rdx movq 0(%rax,%rcx,8),%xmm4 movq 8(%rax,%rcx,8),%xmm5 movq 16(%rax,%rcx,8),%xmm6 @@ -758,22 +2930,23 @@ _bn_gather5: jmp L$gather .p2align 4 L$gather: - movq -96(%rdx),%xmm0 - movq -32(%rdx),%xmm1 + movq -128(%rdx),%xmm0 + movq -64(%rdx),%xmm1 pand %xmm4,%xmm0 - movq 32(%rdx),%xmm2 + movq 0(%rdx),%xmm2 pand %xmm5,%xmm1 - movq 96(%rdx),%xmm3 + movq 64(%rdx),%xmm3 pand %xmm6,%xmm2 por %xmm1,%xmm0 pand %xmm7,%xmm3 +.byte 0x67,0x67 por %xmm2,%xmm0 leaq 256(%rdx),%rdx por %xmm3,%xmm0 movq %xmm0,(%rdi) leaq 8(%rdi),%rdi - subq $1,%rsi + subl $1,%esi jnz L$gather .byte 0xf3,0xc3 L$SEH_end_bn_gather5: diff --git a/deps/openssl/asm/x64-macosx-gas/camellia/cmll-x86_64.s b/deps/openssl/asm/x64-macosx-gas/camellia/cmll-x86_64.s index dfc8d592e81abf..0a3145ad4b8969 100644 --- a/deps/openssl/asm/x64-macosx-gas/camellia/cmll-x86_64.s +++ b/deps/openssl/asm/x64-macosx-gas/camellia/cmll-x86_64.s @@ -1,7 +1,6 @@ .text - .globl _Camellia_EncryptBlock .p2align 4 @@ -271,7 +270,6 @@ L$edone: - .globl _Camellia_DecryptBlock .p2align 4 @@ -540,7 +538,6 @@ L$ddone: .byte 0xf3,0xc3 - .globl _Camellia_Ekeygen .p2align 4 @@ -552,7 +549,7 @@ _Camellia_Ekeygen: pushq %r15 L$key_prologue: - movq %rdi,%r15 + movl %edi,%r15d movq %rdx,%r13 movl 0(%rsi),%r8d @@ -1727,7 +1724,6 @@ L$cbc_enc_pushf: movq %r12,%rsi leaq 8+24(%rsp),%rdi .long 0x9066A4F3 - popfq L$cbc_enc_popf: @@ -1736,7 +1732,6 @@ L$cbc_enc_popf: movq %rax,8(%rsp) jmp L$cbc_eloop - .p2align 4 L$CBC_DECRYPT: xchgq %r14,%r15 @@ -1819,7 +1814,6 @@ L$cbc_dec_pushf: leaq 8+24(%rsp),%rsi leaq (%r13),%rdi .long 0x9066A4F3 - popfq L$cbc_dec_popf: diff --git a/deps/openssl/asm/x64-macosx-gas/ec/ecp_nistz256-x86_64.s b/deps/openssl/asm/x64-macosx-gas/ec/ecp_nistz256-x86_64.s new file mode 100644 index 00000000000000..f2eb8554e87067 --- /dev/null +++ b/deps/openssl/asm/x64-macosx-gas/ec/ecp_nistz256-x86_64.s @@ -0,0 +1,3503 @@ +.text + + + +.p2align 6 +L$poly: +.quad 0xffffffffffffffff, 0x00000000ffffffff, 0x0000000000000000, 0xffffffff00000001 + + +L$RR: +.quad 0x0000000000000003, 0xfffffffbffffffff, 0xfffffffffffffffe, 0x00000004fffffffd + +L$One: +.long 1,1,1,1,1,1,1,1 +L$Two: +.long 2,2,2,2,2,2,2,2 +L$Three: +.long 3,3,3,3,3,3,3,3 +L$ONE_mont: +.quad 0x0000000000000001, 0xffffffff00000000, 0xffffffffffffffff, 0x00000000fffffffe + +.globl _ecp_nistz256_mul_by_2 + +.p2align 6 +_ecp_nistz256_mul_by_2: + pushq %r12 + pushq %r13 + + movq 0(%rsi),%r8 + movq 8(%rsi),%r9 + addq %r8,%r8 + movq 16(%rsi),%r10 + adcq %r9,%r9 + movq 24(%rsi),%r11 + leaq L$poly(%rip),%rsi + movq %r8,%rax + adcq %r10,%r10 + adcq %r11,%r11 + movq %r9,%rdx + sbbq %r13,%r13 + + subq 0(%rsi),%r8 + movq %r10,%rcx + sbbq 8(%rsi),%r9 + sbbq 16(%rsi),%r10 + movq %r11,%r12 + sbbq 24(%rsi),%r11 + testq %r13,%r13 + + cmovzq %rax,%r8 + cmovzq %rdx,%r9 + movq %r8,0(%rdi) + cmovzq %rcx,%r10 + movq %r9,8(%rdi) + cmovzq %r12,%r11 + movq %r10,16(%rdi) + movq %r11,24(%rdi) + + popq %r13 + popq %r12 + .byte 0xf3,0xc3 + + + + +.globl _ecp_nistz256_div_by_2 + +.p2align 5 +_ecp_nistz256_div_by_2: + pushq %r12 + pushq %r13 + + movq 0(%rsi),%r8 + movq 8(%rsi),%r9 + movq 16(%rsi),%r10 + movq %r8,%rax + movq 24(%rsi),%r11 + leaq L$poly(%rip),%rsi + + movq %r9,%rdx + xorq %r13,%r13 + addq 0(%rsi),%r8 + movq %r10,%rcx + adcq 8(%rsi),%r9 + adcq 16(%rsi),%r10 + movq %r11,%r12 + adcq 24(%rsi),%r11 + adcq $0,%r13 + xorq %rsi,%rsi + testq $1,%rax + + cmovzq %rax,%r8 + cmovzq %rdx,%r9 + cmovzq %rcx,%r10 + cmovzq %r12,%r11 + cmovzq %rsi,%r13 + + movq %r9,%rax + shrq $1,%r8 + shlq $63,%rax + movq %r10,%rdx + shrq $1,%r9 + orq %rax,%r8 + shlq $63,%rdx + movq %r11,%rcx + shrq $1,%r10 + orq %rdx,%r9 + shlq $63,%rcx + shrq $1,%r11 + shlq $63,%r13 + orq %rcx,%r10 + orq %r13,%r11 + + movq %r8,0(%rdi) + movq %r9,8(%rdi) + movq %r10,16(%rdi) + movq %r11,24(%rdi) + + popq %r13 + popq %r12 + .byte 0xf3,0xc3 + + + + +.globl _ecp_nistz256_mul_by_3 + +.p2align 5 +_ecp_nistz256_mul_by_3: + pushq %r12 + pushq %r13 + + movq 0(%rsi),%r8 + xorq %r13,%r13 + movq 8(%rsi),%r9 + addq %r8,%r8 + movq 16(%rsi),%r10 + adcq %r9,%r9 + movq 24(%rsi),%r11 + movq %r8,%rax + adcq %r10,%r10 + adcq %r11,%r11 + movq %r9,%rdx + adcq $0,%r13 + + subq $-1,%r8 + movq %r10,%rcx + sbbq L$poly+8(%rip),%r9 + sbbq $0,%r10 + movq %r11,%r12 + sbbq L$poly+24(%rip),%r11 + testq %r13,%r13 + + cmovzq %rax,%r8 + cmovzq %rdx,%r9 + cmovzq %rcx,%r10 + cmovzq %r12,%r11 + + xorq %r13,%r13 + addq 0(%rsi),%r8 + adcq 8(%rsi),%r9 + movq %r8,%rax + adcq 16(%rsi),%r10 + adcq 24(%rsi),%r11 + movq %r9,%rdx + adcq $0,%r13 + + subq $-1,%r8 + movq %r10,%rcx + sbbq L$poly+8(%rip),%r9 + sbbq $0,%r10 + movq %r11,%r12 + sbbq L$poly+24(%rip),%r11 + testq %r13,%r13 + + cmovzq %rax,%r8 + cmovzq %rdx,%r9 + movq %r8,0(%rdi) + cmovzq %rcx,%r10 + movq %r9,8(%rdi) + cmovzq %r12,%r11 + movq %r10,16(%rdi) + movq %r11,24(%rdi) + + popq %r13 + popq %r12 + .byte 0xf3,0xc3 + + + + +.globl _ecp_nistz256_add + +.p2align 5 +_ecp_nistz256_add: + pushq %r12 + pushq %r13 + + movq 0(%rsi),%r8 + xorq %r13,%r13 + movq 8(%rsi),%r9 + movq 16(%rsi),%r10 + movq 24(%rsi),%r11 + leaq L$poly(%rip),%rsi + + addq 0(%rdx),%r8 + adcq 8(%rdx),%r9 + movq %r8,%rax + adcq 16(%rdx),%r10 + adcq 24(%rdx),%r11 + movq %r9,%rdx + adcq $0,%r13 + + subq 0(%rsi),%r8 + movq %r10,%rcx + sbbq 8(%rsi),%r9 + sbbq 16(%rsi),%r10 + movq %r11,%r12 + sbbq 24(%rsi),%r11 + testq %r13,%r13 + + cmovzq %rax,%r8 + cmovzq %rdx,%r9 + movq %r8,0(%rdi) + cmovzq %rcx,%r10 + movq %r9,8(%rdi) + cmovzq %r12,%r11 + movq %r10,16(%rdi) + movq %r11,24(%rdi) + + popq %r13 + popq %r12 + .byte 0xf3,0xc3 + + + + +.globl _ecp_nistz256_sub + +.p2align 5 +_ecp_nistz256_sub: + pushq %r12 + pushq %r13 + + movq 0(%rsi),%r8 + xorq %r13,%r13 + movq 8(%rsi),%r9 + movq 16(%rsi),%r10 + movq 24(%rsi),%r11 + leaq L$poly(%rip),%rsi + + subq 0(%rdx),%r8 + sbbq 8(%rdx),%r9 + movq %r8,%rax + sbbq 16(%rdx),%r10 + sbbq 24(%rdx),%r11 + movq %r9,%rdx + sbbq $0,%r13 + + addq 0(%rsi),%r8 + movq %r10,%rcx + adcq 8(%rsi),%r9 + adcq 16(%rsi),%r10 + movq %r11,%r12 + adcq 24(%rsi),%r11 + testq %r13,%r13 + + cmovzq %rax,%r8 + cmovzq %rdx,%r9 + movq %r8,0(%rdi) + cmovzq %rcx,%r10 + movq %r9,8(%rdi) + cmovzq %r12,%r11 + movq %r10,16(%rdi) + movq %r11,24(%rdi) + + popq %r13 + popq %r12 + .byte 0xf3,0xc3 + + + + +.globl _ecp_nistz256_neg + +.p2align 5 +_ecp_nistz256_neg: + pushq %r12 + pushq %r13 + + xorq %r8,%r8 + xorq %r9,%r9 + xorq %r10,%r10 + xorq %r11,%r11 + xorq %r13,%r13 + + subq 0(%rsi),%r8 + sbbq 8(%rsi),%r9 + sbbq 16(%rsi),%r10 + movq %r8,%rax + sbbq 24(%rsi),%r11 + leaq L$poly(%rip),%rsi + movq %r9,%rdx + sbbq $0,%r13 + + addq 0(%rsi),%r8 + movq %r10,%rcx + adcq 8(%rsi),%r9 + adcq 16(%rsi),%r10 + movq %r11,%r12 + adcq 24(%rsi),%r11 + testq %r13,%r13 + + cmovzq %rax,%r8 + cmovzq %rdx,%r9 + movq %r8,0(%rdi) + cmovzq %rcx,%r10 + movq %r9,8(%rdi) + cmovzq %r12,%r11 + movq %r10,16(%rdi) + movq %r11,24(%rdi) + + popq %r13 + popq %r12 + .byte 0xf3,0xc3 + + + + + +.globl _ecp_nistz256_to_mont + +.p2align 5 +_ecp_nistz256_to_mont: + movl $524544,%ecx + andl _OPENSSL_ia32cap_P+8(%rip),%ecx + leaq L$RR(%rip),%rdx + jmp L$mul_mont + + + + + + + + +.globl _ecp_nistz256_mul_mont + +.p2align 5 +_ecp_nistz256_mul_mont: + movl $524544,%ecx + andl _OPENSSL_ia32cap_P+8(%rip),%ecx +L$mul_mont: + pushq %rbp + pushq %rbx + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + cmpl $524544,%ecx + je L$mul_montx + movq %rdx,%rbx + movq 0(%rdx),%rax + movq 0(%rsi),%r9 + movq 8(%rsi),%r10 + movq 16(%rsi),%r11 + movq 24(%rsi),%r12 + + call __ecp_nistz256_mul_montq + jmp L$mul_mont_done + +.p2align 5 +L$mul_montx: + movq %rdx,%rbx + movq 0(%rdx),%rdx + movq 0(%rsi),%r9 + movq 8(%rsi),%r10 + movq 16(%rsi),%r11 + movq 24(%rsi),%r12 + leaq -128(%rsi),%rsi + + call __ecp_nistz256_mul_montx +L$mul_mont_done: + popq %r15 + popq %r14 + popq %r13 + popq %r12 + popq %rbx + popq %rbp + .byte 0xf3,0xc3 + + + +.p2align 5 +__ecp_nistz256_mul_montq: + + + movq %rax,%rbp + mulq %r9 + movq L$poly+8(%rip),%r14 + movq %rax,%r8 + movq %rbp,%rax + movq %rdx,%r9 + + mulq %r10 + movq L$poly+24(%rip),%r15 + addq %rax,%r9 + movq %rbp,%rax + adcq $0,%rdx + movq %rdx,%r10 + + mulq %r11 + addq %rax,%r10 + movq %rbp,%rax + adcq $0,%rdx + movq %rdx,%r11 + + mulq %r12 + addq %rax,%r11 + movq %r8,%rax + adcq $0,%rdx + xorq %r13,%r13 + movq %rdx,%r12 + + + + + + + + + + + movq %r8,%rbp + shlq $32,%r8 + mulq %r15 + shrq $32,%rbp + addq %r8,%r9 + adcq %rbp,%r10 + adcq %rax,%r11 + movq 8(%rbx),%rax + adcq %rdx,%r12 + adcq $0,%r13 + xorq %r8,%r8 + + + + movq %rax,%rbp + mulq 0(%rsi) + addq %rax,%r9 + movq %rbp,%rax + adcq $0,%rdx + movq %rdx,%rcx + + mulq 8(%rsi) + addq %rcx,%r10 + adcq $0,%rdx + addq %rax,%r10 + movq %rbp,%rax + adcq $0,%rdx + movq %rdx,%rcx + + mulq 16(%rsi) + addq %rcx,%r11 + adcq $0,%rdx + addq %rax,%r11 + movq %rbp,%rax + adcq $0,%rdx + movq %rdx,%rcx + + mulq 24(%rsi) + addq %rcx,%r12 + adcq $0,%rdx + addq %rax,%r12 + movq %r9,%rax + adcq %rdx,%r13 + adcq $0,%r8 + + + + movq %r9,%rbp + shlq $32,%r9 + mulq %r15 + shrq $32,%rbp + addq %r9,%r10 + adcq %rbp,%r11 + adcq %rax,%r12 + movq 16(%rbx),%rax + adcq %rdx,%r13 + adcq $0,%r8 + xorq %r9,%r9 + + + + movq %rax,%rbp + mulq 0(%rsi) + addq %rax,%r10 + movq %rbp,%rax + adcq $0,%rdx + movq %rdx,%rcx + + mulq 8(%rsi) + addq %rcx,%r11 + adcq $0,%rdx + addq %rax,%r11 + movq %rbp,%rax + adcq $0,%rdx + movq %rdx,%rcx + + mulq 16(%rsi) + addq %rcx,%r12 + adcq $0,%rdx + addq %rax,%r12 + movq %rbp,%rax + adcq $0,%rdx + movq %rdx,%rcx + + mulq 24(%rsi) + addq %rcx,%r13 + adcq $0,%rdx + addq %rax,%r13 + movq %r10,%rax + adcq %rdx,%r8 + adcq $0,%r9 + + + + movq %r10,%rbp + shlq $32,%r10 + mulq %r15 + shrq $32,%rbp + addq %r10,%r11 + adcq %rbp,%r12 + adcq %rax,%r13 + movq 24(%rbx),%rax + adcq %rdx,%r8 + adcq $0,%r9 + xorq %r10,%r10 + + + + movq %rax,%rbp + mulq 0(%rsi) + addq %rax,%r11 + movq %rbp,%rax + adcq $0,%rdx + movq %rdx,%rcx + + mulq 8(%rsi) + addq %rcx,%r12 + adcq $0,%rdx + addq %rax,%r12 + movq %rbp,%rax + adcq $0,%rdx + movq %rdx,%rcx + + mulq 16(%rsi) + addq %rcx,%r13 + adcq $0,%rdx + addq %rax,%r13 + movq %rbp,%rax + adcq $0,%rdx + movq %rdx,%rcx + + mulq 24(%rsi) + addq %rcx,%r8 + adcq $0,%rdx + addq %rax,%r8 + movq %r11,%rax + adcq %rdx,%r9 + adcq $0,%r10 + + + + movq %r11,%rbp + shlq $32,%r11 + mulq %r15 + shrq $32,%rbp + addq %r11,%r12 + adcq %rbp,%r13 + movq %r12,%rcx + adcq %rax,%r8 + adcq %rdx,%r9 + movq %r13,%rbp + adcq $0,%r10 + + + + subq $-1,%r12 + movq %r8,%rbx + sbbq %r14,%r13 + sbbq $0,%r8 + movq %r9,%rdx + sbbq %r15,%r9 + sbbq $0,%r10 + + cmovcq %rcx,%r12 + cmovcq %rbp,%r13 + movq %r12,0(%rdi) + cmovcq %rbx,%r8 + movq %r13,8(%rdi) + cmovcq %rdx,%r9 + movq %r8,16(%rdi) + movq %r9,24(%rdi) + + .byte 0xf3,0xc3 + + + + + + + + + +.globl _ecp_nistz256_sqr_mont + +.p2align 5 +_ecp_nistz256_sqr_mont: + movl $524544,%ecx + andl _OPENSSL_ia32cap_P+8(%rip),%ecx + pushq %rbp + pushq %rbx + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + cmpl $524544,%ecx + je L$sqr_montx + movq 0(%rsi),%rax + movq 8(%rsi),%r14 + movq 16(%rsi),%r15 + movq 24(%rsi),%r8 + + call __ecp_nistz256_sqr_montq + jmp L$sqr_mont_done + +.p2align 5 +L$sqr_montx: + movq 0(%rsi),%rdx + movq 8(%rsi),%r14 + movq 16(%rsi),%r15 + movq 24(%rsi),%r8 + leaq -128(%rsi),%rsi + + call __ecp_nistz256_sqr_montx +L$sqr_mont_done: + popq %r15 + popq %r14 + popq %r13 + popq %r12 + popq %rbx + popq %rbp + .byte 0xf3,0xc3 + + + +.p2align 5 +__ecp_nistz256_sqr_montq: + movq %rax,%r13 + mulq %r14 + movq %rax,%r9 + movq %r15,%rax + movq %rdx,%r10 + + mulq %r13 + addq %rax,%r10 + movq %r8,%rax + adcq $0,%rdx + movq %rdx,%r11 + + mulq %r13 + addq %rax,%r11 + movq %r15,%rax + adcq $0,%rdx + movq %rdx,%r12 + + + mulq %r14 + addq %rax,%r11 + movq %r8,%rax + adcq $0,%rdx + movq %rdx,%rbp + + mulq %r14 + addq %rax,%r12 + movq %r8,%rax + adcq $0,%rdx + addq %rbp,%r12 + movq %rdx,%r13 + adcq $0,%r13 + + + mulq %r15 + xorq %r15,%r15 + addq %rax,%r13 + movq 0(%rsi),%rax + movq %rdx,%r14 + adcq $0,%r14 + + addq %r9,%r9 + adcq %r10,%r10 + adcq %r11,%r11 + adcq %r12,%r12 + adcq %r13,%r13 + adcq %r14,%r14 + adcq $0,%r15 + + mulq %rax + movq %rax,%r8 + movq 8(%rsi),%rax + movq %rdx,%rcx + + mulq %rax + addq %rcx,%r9 + adcq %rax,%r10 + movq 16(%rsi),%rax + adcq $0,%rdx + movq %rdx,%rcx + + mulq %rax + addq %rcx,%r11 + adcq %rax,%r12 + movq 24(%rsi),%rax + adcq $0,%rdx + movq %rdx,%rcx + + mulq %rax + addq %rcx,%r13 + adcq %rax,%r14 + movq %r8,%rax + adcq %rdx,%r15 + + movq L$poly+8(%rip),%rsi + movq L$poly+24(%rip),%rbp + + + + + movq %r8,%rcx + shlq $32,%r8 + mulq %rbp + shrq $32,%rcx + addq %r8,%r9 + adcq %rcx,%r10 + adcq %rax,%r11 + movq %r9,%rax + adcq $0,%rdx + + + + movq %r9,%rcx + shlq $32,%r9 + movq %rdx,%r8 + mulq %rbp + shrq $32,%rcx + addq %r9,%r10 + adcq %rcx,%r11 + adcq %rax,%r8 + movq %r10,%rax + adcq $0,%rdx + + + + movq %r10,%rcx + shlq $32,%r10 + movq %rdx,%r9 + mulq %rbp + shrq $32,%rcx + addq %r10,%r11 + adcq %rcx,%r8 + adcq %rax,%r9 + movq %r11,%rax + adcq $0,%rdx + + + + movq %r11,%rcx + shlq $32,%r11 + movq %rdx,%r10 + mulq %rbp + shrq $32,%rcx + addq %r11,%r8 + adcq %rcx,%r9 + adcq %rax,%r10 + adcq $0,%rdx + xorq %r11,%r11 + + + + addq %r8,%r12 + adcq %r9,%r13 + movq %r12,%r8 + adcq %r10,%r14 + adcq %rdx,%r15 + movq %r13,%r9 + adcq $0,%r11 + + subq $-1,%r12 + movq %r14,%r10 + sbbq %rsi,%r13 + sbbq $0,%r14 + movq %r15,%rcx + sbbq %rbp,%r15 + sbbq $0,%r11 + + cmovcq %r8,%r12 + cmovcq %r9,%r13 + movq %r12,0(%rdi) + cmovcq %r10,%r14 + movq %r13,8(%rdi) + cmovcq %rcx,%r15 + movq %r14,16(%rdi) + movq %r15,24(%rdi) + + .byte 0xf3,0xc3 + + +.p2align 5 +__ecp_nistz256_mul_montx: + + + mulxq %r9,%r8,%r9 + mulxq %r10,%rcx,%r10 + movq $32,%r14 + xorq %r13,%r13 + mulxq %r11,%rbp,%r11 + movq L$poly+24(%rip),%r15 + adcq %rcx,%r9 + mulxq %r12,%rcx,%r12 + movq %r8,%rdx + adcq %rbp,%r10 + shlxq %r14,%r8,%rbp + adcq %rcx,%r11 + shrxq %r14,%r8,%rcx + adcq $0,%r12 + + + + addq %rbp,%r9 + adcq %rcx,%r10 + + mulxq %r15,%rcx,%rbp + movq 8(%rbx),%rdx + adcq %rcx,%r11 + adcq %rbp,%r12 + adcq $0,%r13 + xorq %r8,%r8 + + + + mulxq 0+128(%rsi),%rcx,%rbp + adcxq %rcx,%r9 + adoxq %rbp,%r10 + + mulxq 8+128(%rsi),%rcx,%rbp + adcxq %rcx,%r10 + adoxq %rbp,%r11 + + mulxq 16+128(%rsi),%rcx,%rbp + adcxq %rcx,%r11 + adoxq %rbp,%r12 + + mulxq 24+128(%rsi),%rcx,%rbp + movq %r9,%rdx + adcxq %rcx,%r12 + shlxq %r14,%r9,%rcx + adoxq %rbp,%r13 + shrxq %r14,%r9,%rbp + + adcxq %r8,%r13 + adoxq %r8,%r8 + adcq $0,%r8 + + + + addq %rcx,%r10 + adcq %rbp,%r11 + + mulxq %r15,%rcx,%rbp + movq 16(%rbx),%rdx + adcq %rcx,%r12 + adcq %rbp,%r13 + adcq $0,%r8 + xorq %r9,%r9 + + + + mulxq 0+128(%rsi),%rcx,%rbp + adcxq %rcx,%r10 + adoxq %rbp,%r11 + + mulxq 8+128(%rsi),%rcx,%rbp + adcxq %rcx,%r11 + adoxq %rbp,%r12 + + mulxq 16+128(%rsi),%rcx,%rbp + adcxq %rcx,%r12 + adoxq %rbp,%r13 + + mulxq 24+128(%rsi),%rcx,%rbp + movq %r10,%rdx + adcxq %rcx,%r13 + shlxq %r14,%r10,%rcx + adoxq %rbp,%r8 + shrxq %r14,%r10,%rbp + + adcxq %r9,%r8 + adoxq %r9,%r9 + adcq $0,%r9 + + + + addq %rcx,%r11 + adcq %rbp,%r12 + + mulxq %r15,%rcx,%rbp + movq 24(%rbx),%rdx + adcq %rcx,%r13 + adcq %rbp,%r8 + adcq $0,%r9 + xorq %r10,%r10 + + + + mulxq 0+128(%rsi),%rcx,%rbp + adcxq %rcx,%r11 + adoxq %rbp,%r12 + + mulxq 8+128(%rsi),%rcx,%rbp + adcxq %rcx,%r12 + adoxq %rbp,%r13 + + mulxq 16+128(%rsi),%rcx,%rbp + adcxq %rcx,%r13 + adoxq %rbp,%r8 + + mulxq 24+128(%rsi),%rcx,%rbp + movq %r11,%rdx + adcxq %rcx,%r8 + shlxq %r14,%r11,%rcx + adoxq %rbp,%r9 + shrxq %r14,%r11,%rbp + + adcxq %r10,%r9 + adoxq %r10,%r10 + adcq $0,%r10 + + + + addq %rcx,%r12 + adcq %rbp,%r13 + + mulxq %r15,%rcx,%rbp + movq %r12,%rbx + movq L$poly+8(%rip),%r14 + adcq %rcx,%r8 + movq %r13,%rdx + adcq %rbp,%r9 + adcq $0,%r10 + + + + xorl %eax,%eax + movq %r8,%rcx + sbbq $-1,%r12 + sbbq %r14,%r13 + sbbq $0,%r8 + movq %r9,%rbp + sbbq %r15,%r9 + sbbq $0,%r10 + + cmovcq %rbx,%r12 + cmovcq %rdx,%r13 + movq %r12,0(%rdi) + cmovcq %rcx,%r8 + movq %r13,8(%rdi) + cmovcq %rbp,%r9 + movq %r8,16(%rdi) + movq %r9,24(%rdi) + + .byte 0xf3,0xc3 + + + +.p2align 5 +__ecp_nistz256_sqr_montx: + mulxq %r14,%r9,%r10 + mulxq %r15,%rcx,%r11 + xorl %eax,%eax + adcq %rcx,%r10 + mulxq %r8,%rbp,%r12 + movq %r14,%rdx + adcq %rbp,%r11 + adcq $0,%r12 + xorq %r13,%r13 + + + mulxq %r15,%rcx,%rbp + adcxq %rcx,%r11 + adoxq %rbp,%r12 + + mulxq %r8,%rcx,%rbp + movq %r15,%rdx + adcxq %rcx,%r12 + adoxq %rbp,%r13 + adcq $0,%r13 + + + mulxq %r8,%rcx,%r14 + movq 0+128(%rsi),%rdx + xorq %r15,%r15 + adcxq %r9,%r9 + adoxq %rcx,%r13 + adcxq %r10,%r10 + adoxq %r15,%r14 + + mulxq %rdx,%r8,%rbp + movq 8+128(%rsi),%rdx + adcxq %r11,%r11 + adoxq %rbp,%r9 + adcxq %r12,%r12 + mulxq %rdx,%rcx,%rax + movq 16+128(%rsi),%rdx + adcxq %r13,%r13 + adoxq %rcx,%r10 + adcxq %r14,%r14 +.byte 0x67 + mulxq %rdx,%rcx,%rbp + movq 24+128(%rsi),%rdx + adoxq %rax,%r11 + adcxq %r15,%r15 + adoxq %rcx,%r12 + movq $32,%rsi + adoxq %rbp,%r13 +.byte 0x67,0x67 + mulxq %rdx,%rcx,%rax + movq %r8,%rdx + adoxq %rcx,%r14 + shlxq %rsi,%r8,%rcx + adoxq %rax,%r15 + shrxq %rsi,%r8,%rax + movq L$poly+24(%rip),%rbp + + + addq %rcx,%r9 + adcq %rax,%r10 + + mulxq %rbp,%rcx,%r8 + movq %r9,%rdx + adcq %rcx,%r11 + shlxq %rsi,%r9,%rcx + adcq $0,%r8 + shrxq %rsi,%r9,%rax + + + addq %rcx,%r10 + adcq %rax,%r11 + + mulxq %rbp,%rcx,%r9 + movq %r10,%rdx + adcq %rcx,%r8 + shlxq %rsi,%r10,%rcx + adcq $0,%r9 + shrxq %rsi,%r10,%rax + + + addq %rcx,%r11 + adcq %rax,%r8 + + mulxq %rbp,%rcx,%r10 + movq %r11,%rdx + adcq %rcx,%r9 + shlxq %rsi,%r11,%rcx + adcq $0,%r10 + shrxq %rsi,%r11,%rax + + + addq %rcx,%r8 + adcq %rax,%r9 + + mulxq %rbp,%rcx,%r11 + adcq %rcx,%r10 + adcq $0,%r11 + + xorq %rdx,%rdx + adcq %r8,%r12 + movq L$poly+8(%rip),%rsi + adcq %r9,%r13 + movq %r12,%r8 + adcq %r10,%r14 + adcq %r11,%r15 + movq %r13,%r9 + adcq $0,%rdx + + xorl %eax,%eax + sbbq $-1,%r12 + movq %r14,%r10 + sbbq %rsi,%r13 + sbbq $0,%r14 + movq %r15,%r11 + sbbq %rbp,%r15 + sbbq $0,%rdx + + cmovcq %r8,%r12 + cmovcq %r9,%r13 + movq %r12,0(%rdi) + cmovcq %r10,%r14 + movq %r13,8(%rdi) + cmovcq %r11,%r15 + movq %r14,16(%rdi) + movq %r15,24(%rdi) + + .byte 0xf3,0xc3 + + + + + + + +.globl _ecp_nistz256_from_mont + +.p2align 5 +_ecp_nistz256_from_mont: + pushq %r12 + pushq %r13 + + movq 0(%rsi),%rax + movq L$poly+24(%rip),%r13 + movq 8(%rsi),%r9 + movq 16(%rsi),%r10 + movq 24(%rsi),%r11 + movq %rax,%r8 + movq L$poly+8(%rip),%r12 + + + + movq %rax,%rcx + shlq $32,%r8 + mulq %r13 + shrq $32,%rcx + addq %r8,%r9 + adcq %rcx,%r10 + adcq %rax,%r11 + movq %r9,%rax + adcq $0,%rdx + + + + movq %r9,%rcx + shlq $32,%r9 + movq %rdx,%r8 + mulq %r13 + shrq $32,%rcx + addq %r9,%r10 + adcq %rcx,%r11 + adcq %rax,%r8 + movq %r10,%rax + adcq $0,%rdx + + + + movq %r10,%rcx + shlq $32,%r10 + movq %rdx,%r9 + mulq %r13 + shrq $32,%rcx + addq %r10,%r11 + adcq %rcx,%r8 + adcq %rax,%r9 + movq %r11,%rax + adcq $0,%rdx + + + + movq %r11,%rcx + shlq $32,%r11 + movq %rdx,%r10 + mulq %r13 + shrq $32,%rcx + addq %r11,%r8 + adcq %rcx,%r9 + movq %r8,%rcx + adcq %rax,%r10 + movq %r9,%rsi + adcq $0,%rdx + + + + subq $-1,%r8 + movq %r10,%rax + sbbq %r12,%r9 + sbbq $0,%r10 + movq %rdx,%r11 + sbbq %r13,%rdx + sbbq %r13,%r13 + + cmovnzq %rcx,%r8 + cmovnzq %rsi,%r9 + movq %r8,0(%rdi) + cmovnzq %rax,%r10 + movq %r9,8(%rdi) + cmovzq %rdx,%r11 + movq %r10,16(%rdi) + movq %r11,24(%rdi) + + popq %r13 + popq %r12 + .byte 0xf3,0xc3 + + + +.globl _ecp_nistz256_select_w5 + +.p2align 5 +_ecp_nistz256_select_w5: + movl _OPENSSL_ia32cap_P+8(%rip),%eax + testl $32,%eax + jnz L$avx2_select_w5 + movdqa L$One(%rip),%xmm0 + movd %edx,%xmm1 + + pxor %xmm2,%xmm2 + pxor %xmm3,%xmm3 + pxor %xmm4,%xmm4 + pxor %xmm5,%xmm5 + pxor %xmm6,%xmm6 + pxor %xmm7,%xmm7 + + movdqa %xmm0,%xmm8 + pshufd $0,%xmm1,%xmm1 + + movq $16,%rax +L$select_loop_sse_w5: + + movdqa %xmm8,%xmm15 + paddd %xmm0,%xmm8 + pcmpeqd %xmm1,%xmm15 + + movdqa 0(%rsi),%xmm9 + movdqa 16(%rsi),%xmm10 + movdqa 32(%rsi),%xmm11 + movdqa 48(%rsi),%xmm12 + movdqa 64(%rsi),%xmm13 + movdqa 80(%rsi),%xmm14 + leaq 96(%rsi),%rsi + + pand %xmm15,%xmm9 + pand %xmm15,%xmm10 + por %xmm9,%xmm2 + pand %xmm15,%xmm11 + por %xmm10,%xmm3 + pand %xmm15,%xmm12 + por %xmm11,%xmm4 + pand %xmm15,%xmm13 + por %xmm12,%xmm5 + pand %xmm15,%xmm14 + por %xmm13,%xmm6 + por %xmm14,%xmm7 + + decq %rax + jnz L$select_loop_sse_w5 + + movdqu %xmm2,0(%rdi) + movdqu %xmm3,16(%rdi) + movdqu %xmm4,32(%rdi) + movdqu %xmm5,48(%rdi) + movdqu %xmm6,64(%rdi) + movdqu %xmm7,80(%rdi) + .byte 0xf3,0xc3 + + + + +.globl _ecp_nistz256_select_w7 + +.p2align 5 +_ecp_nistz256_select_w7: + movl _OPENSSL_ia32cap_P+8(%rip),%eax + testl $32,%eax + jnz L$avx2_select_w7 + movdqa L$One(%rip),%xmm8 + movd %edx,%xmm1 + + pxor %xmm2,%xmm2 + pxor %xmm3,%xmm3 + pxor %xmm4,%xmm4 + pxor %xmm5,%xmm5 + + movdqa %xmm8,%xmm0 + pshufd $0,%xmm1,%xmm1 + movq $64,%rax + +L$select_loop_sse_w7: + movdqa %xmm8,%xmm15 + paddd %xmm0,%xmm8 + movdqa 0(%rsi),%xmm9 + movdqa 16(%rsi),%xmm10 + pcmpeqd %xmm1,%xmm15 + movdqa 32(%rsi),%xmm11 + movdqa 48(%rsi),%xmm12 + leaq 64(%rsi),%rsi + + pand %xmm15,%xmm9 + pand %xmm15,%xmm10 + por %xmm9,%xmm2 + pand %xmm15,%xmm11 + por %xmm10,%xmm3 + pand %xmm15,%xmm12 + por %xmm11,%xmm4 + prefetcht0 255(%rsi) + por %xmm12,%xmm5 + + decq %rax + jnz L$select_loop_sse_w7 + + movdqu %xmm2,0(%rdi) + movdqu %xmm3,16(%rdi) + movdqu %xmm4,32(%rdi) + movdqu %xmm5,48(%rdi) + .byte 0xf3,0xc3 + + + + +.p2align 5 +ecp_nistz256_avx2_select_w5: +L$avx2_select_w5: + vzeroupper + vmovdqa L$Two(%rip),%ymm0 + + vpxor %ymm2,%ymm2,%ymm2 + vpxor %ymm3,%ymm3,%ymm3 + vpxor %ymm4,%ymm4,%ymm4 + + vmovdqa L$One(%rip),%ymm5 + vmovdqa L$Two(%rip),%ymm10 + + vmovd %edx,%xmm1 + vpermd %ymm1,%ymm2,%ymm1 + + movq $8,%rax +L$select_loop_avx2_w5: + + vmovdqa 0(%rsi),%ymm6 + vmovdqa 32(%rsi),%ymm7 + vmovdqa 64(%rsi),%ymm8 + + vmovdqa 96(%rsi),%ymm11 + vmovdqa 128(%rsi),%ymm12 + vmovdqa 160(%rsi),%ymm13 + + vpcmpeqd %ymm1,%ymm5,%ymm9 + vpcmpeqd %ymm1,%ymm10,%ymm14 + + vpaddd %ymm0,%ymm5,%ymm5 + vpaddd %ymm0,%ymm10,%ymm10 + leaq 192(%rsi),%rsi + + vpand %ymm9,%ymm6,%ymm6 + vpand %ymm9,%ymm7,%ymm7 + vpand %ymm9,%ymm8,%ymm8 + vpand %ymm14,%ymm11,%ymm11 + vpand %ymm14,%ymm12,%ymm12 + vpand %ymm14,%ymm13,%ymm13 + + vpxor %ymm6,%ymm2,%ymm2 + vpxor %ymm7,%ymm3,%ymm3 + vpxor %ymm8,%ymm4,%ymm4 + vpxor %ymm11,%ymm2,%ymm2 + vpxor %ymm12,%ymm3,%ymm3 + vpxor %ymm13,%ymm4,%ymm4 + + decq %rax + jnz L$select_loop_avx2_w5 + + vmovdqu %ymm2,0(%rdi) + vmovdqu %ymm3,32(%rdi) + vmovdqu %ymm4,64(%rdi) + vzeroupper + .byte 0xf3,0xc3 + + + + +.globl _ecp_nistz256_avx2_select_w7 + +.p2align 5 +_ecp_nistz256_avx2_select_w7: +L$avx2_select_w7: + vzeroupper + vmovdqa L$Three(%rip),%ymm0 + + vpxor %ymm2,%ymm2,%ymm2 + vpxor %ymm3,%ymm3,%ymm3 + + vmovdqa L$One(%rip),%ymm4 + vmovdqa L$Two(%rip),%ymm8 + vmovdqa L$Three(%rip),%ymm12 + + vmovd %edx,%xmm1 + vpermd %ymm1,%ymm2,%ymm1 + + + movq $21,%rax +L$select_loop_avx2_w7: + + vmovdqa 0(%rsi),%ymm5 + vmovdqa 32(%rsi),%ymm6 + + vmovdqa 64(%rsi),%ymm9 + vmovdqa 96(%rsi),%ymm10 + + vmovdqa 128(%rsi),%ymm13 + vmovdqa 160(%rsi),%ymm14 + + vpcmpeqd %ymm1,%ymm4,%ymm7 + vpcmpeqd %ymm1,%ymm8,%ymm11 + vpcmpeqd %ymm1,%ymm12,%ymm15 + + vpaddd %ymm0,%ymm4,%ymm4 + vpaddd %ymm0,%ymm8,%ymm8 + vpaddd %ymm0,%ymm12,%ymm12 + leaq 192(%rsi),%rsi + + vpand %ymm7,%ymm5,%ymm5 + vpand %ymm7,%ymm6,%ymm6 + vpand %ymm11,%ymm9,%ymm9 + vpand %ymm11,%ymm10,%ymm10 + vpand %ymm15,%ymm13,%ymm13 + vpand %ymm15,%ymm14,%ymm14 + + vpxor %ymm5,%ymm2,%ymm2 + vpxor %ymm6,%ymm3,%ymm3 + vpxor %ymm9,%ymm2,%ymm2 + vpxor %ymm10,%ymm3,%ymm3 + vpxor %ymm13,%ymm2,%ymm2 + vpxor %ymm14,%ymm3,%ymm3 + + decq %rax + jnz L$select_loop_avx2_w7 + + + vmovdqa 0(%rsi),%ymm5 + vmovdqa 32(%rsi),%ymm6 + + vpcmpeqd %ymm1,%ymm4,%ymm7 + + vpand %ymm7,%ymm5,%ymm5 + vpand %ymm7,%ymm6,%ymm6 + + vpxor %ymm5,%ymm2,%ymm2 + vpxor %ymm6,%ymm3,%ymm3 + + vmovdqu %ymm2,0(%rdi) + vmovdqu %ymm3,32(%rdi) + vzeroupper + .byte 0xf3,0xc3 + + +.p2align 5 +__ecp_nistz256_add_toq: + addq 0(%rbx),%r12 + adcq 8(%rbx),%r13 + movq %r12,%rax + adcq 16(%rbx),%r8 + adcq 24(%rbx),%r9 + movq %r13,%rbp + sbbq %r11,%r11 + + subq $-1,%r12 + movq %r8,%rcx + sbbq %r14,%r13 + sbbq $0,%r8 + movq %r9,%r10 + sbbq %r15,%r9 + testq %r11,%r11 + + cmovzq %rax,%r12 + cmovzq %rbp,%r13 + movq %r12,0(%rdi) + cmovzq %rcx,%r8 + movq %r13,8(%rdi) + cmovzq %r10,%r9 + movq %r8,16(%rdi) + movq %r9,24(%rdi) + + .byte 0xf3,0xc3 + + + +.p2align 5 +__ecp_nistz256_sub_fromq: + subq 0(%rbx),%r12 + sbbq 8(%rbx),%r13 + movq %r12,%rax + sbbq 16(%rbx),%r8 + sbbq 24(%rbx),%r9 + movq %r13,%rbp + sbbq %r11,%r11 + + addq $-1,%r12 + movq %r8,%rcx + adcq %r14,%r13 + adcq $0,%r8 + movq %r9,%r10 + adcq %r15,%r9 + testq %r11,%r11 + + cmovzq %rax,%r12 + cmovzq %rbp,%r13 + movq %r12,0(%rdi) + cmovzq %rcx,%r8 + movq %r13,8(%rdi) + cmovzq %r10,%r9 + movq %r8,16(%rdi) + movq %r9,24(%rdi) + + .byte 0xf3,0xc3 + + + +.p2align 5 +__ecp_nistz256_subq: + subq %r12,%rax + sbbq %r13,%rbp + movq %rax,%r12 + sbbq %r8,%rcx + sbbq %r9,%r10 + movq %rbp,%r13 + sbbq %r11,%r11 + + addq $-1,%rax + movq %rcx,%r8 + adcq %r14,%rbp + adcq $0,%rcx + movq %r10,%r9 + adcq %r15,%r10 + testq %r11,%r11 + + cmovnzq %rax,%r12 + cmovnzq %rbp,%r13 + cmovnzq %rcx,%r8 + cmovnzq %r10,%r9 + + .byte 0xf3,0xc3 + + + +.p2align 5 +__ecp_nistz256_mul_by_2q: + addq %r12,%r12 + adcq %r13,%r13 + movq %r12,%rax + adcq %r8,%r8 + adcq %r9,%r9 + movq %r13,%rbp + sbbq %r11,%r11 + + subq $-1,%r12 + movq %r8,%rcx + sbbq %r14,%r13 + sbbq $0,%r8 + movq %r9,%r10 + sbbq %r15,%r9 + testq %r11,%r11 + + cmovzq %rax,%r12 + cmovzq %rbp,%r13 + movq %r12,0(%rdi) + cmovzq %rcx,%r8 + movq %r13,8(%rdi) + cmovzq %r10,%r9 + movq %r8,16(%rdi) + movq %r9,24(%rdi) + + .byte 0xf3,0xc3 + +.globl _ecp_nistz256_point_double + +.p2align 5 +_ecp_nistz256_point_double: + movl $524544,%ecx + andl _OPENSSL_ia32cap_P+8(%rip),%ecx + cmpl $524544,%ecx + je L$point_doublex + pushq %rbp + pushq %rbx + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + subq $160+8,%rsp + + movdqu 0(%rsi),%xmm0 + movq %rsi,%rbx + movdqu 16(%rsi),%xmm1 + movq 32+0(%rsi),%r12 + movq 32+8(%rsi),%r13 + movq 32+16(%rsi),%r8 + movq 32+24(%rsi),%r9 + movq L$poly+8(%rip),%r14 + movq L$poly+24(%rip),%r15 + movdqa %xmm0,96(%rsp) + movdqa %xmm1,96+16(%rsp) + leaq 32(%rdi),%r10 + leaq 64(%rdi),%r11 +.byte 102,72,15,110,199 +.byte 102,73,15,110,202 +.byte 102,73,15,110,211 + + leaq 0(%rsp),%rdi + call __ecp_nistz256_mul_by_2q + + movq 64+0(%rsi),%rax + movq 64+8(%rsi),%r14 + movq 64+16(%rsi),%r15 + movq 64+24(%rsi),%r8 + leaq 64-0(%rsi),%rsi + leaq 64(%rsp),%rdi + call __ecp_nistz256_sqr_montq + + movq 0+0(%rsp),%rax + movq 8+0(%rsp),%r14 + leaq 0+0(%rsp),%rsi + movq 16+0(%rsp),%r15 + movq 24+0(%rsp),%r8 + leaq 0(%rsp),%rdi + call __ecp_nistz256_sqr_montq + + movq 32(%rbx),%rax + movq 64+0(%rbx),%r9 + movq 64+8(%rbx),%r10 + movq 64+16(%rbx),%r11 + movq 64+24(%rbx),%r12 + leaq 64-0(%rbx),%rsi + leaq 32(%rbx),%rbx +.byte 102,72,15,126,215 + call __ecp_nistz256_mul_montq + call __ecp_nistz256_mul_by_2q + + movq 96+0(%rsp),%r12 + movq 96+8(%rsp),%r13 + leaq 64(%rsp),%rbx + movq 96+16(%rsp),%r8 + movq 96+24(%rsp),%r9 + leaq 32(%rsp),%rdi + call __ecp_nistz256_add_toq + + movq 96+0(%rsp),%r12 + movq 96+8(%rsp),%r13 + leaq 64(%rsp),%rbx + movq 96+16(%rsp),%r8 + movq 96+24(%rsp),%r9 + leaq 64(%rsp),%rdi + call __ecp_nistz256_sub_fromq + + movq 0+0(%rsp),%rax + movq 8+0(%rsp),%r14 + leaq 0+0(%rsp),%rsi + movq 16+0(%rsp),%r15 + movq 24+0(%rsp),%r8 +.byte 102,72,15,126,207 + call __ecp_nistz256_sqr_montq + xorq %r9,%r9 + movq %r12,%rax + addq $-1,%r12 + movq %r13,%r10 + adcq %rsi,%r13 + movq %r14,%rcx + adcq $0,%r14 + movq %r15,%r8 + adcq %rbp,%r15 + adcq $0,%r9 + xorq %rsi,%rsi + testq $1,%rax + + cmovzq %rax,%r12 + cmovzq %r10,%r13 + cmovzq %rcx,%r14 + cmovzq %r8,%r15 + cmovzq %rsi,%r9 + + movq %r13,%rax + shrq $1,%r12 + shlq $63,%rax + movq %r14,%r10 + shrq $1,%r13 + orq %rax,%r12 + shlq $63,%r10 + movq %r15,%rcx + shrq $1,%r14 + orq %r10,%r13 + shlq $63,%rcx + movq %r12,0(%rdi) + shrq $1,%r15 + movq %r13,8(%rdi) + shlq $63,%r9 + orq %rcx,%r14 + orq %r9,%r15 + movq %r14,16(%rdi) + movq %r15,24(%rdi) + movq 64(%rsp),%rax + leaq 64(%rsp),%rbx + movq 0+32(%rsp),%r9 + movq 8+32(%rsp),%r10 + leaq 0+32(%rsp),%rsi + movq 16+32(%rsp),%r11 + movq 24+32(%rsp),%r12 + leaq 32(%rsp),%rdi + call __ecp_nistz256_mul_montq + + leaq 128(%rsp),%rdi + call __ecp_nistz256_mul_by_2q + + leaq 32(%rsp),%rbx + leaq 32(%rsp),%rdi + call __ecp_nistz256_add_toq + + movq 96(%rsp),%rax + leaq 96(%rsp),%rbx + movq 0+0(%rsp),%r9 + movq 8+0(%rsp),%r10 + leaq 0+0(%rsp),%rsi + movq 16+0(%rsp),%r11 + movq 24+0(%rsp),%r12 + leaq 0(%rsp),%rdi + call __ecp_nistz256_mul_montq + + leaq 128(%rsp),%rdi + call __ecp_nistz256_mul_by_2q + + movq 0+32(%rsp),%rax + movq 8+32(%rsp),%r14 + leaq 0+32(%rsp),%rsi + movq 16+32(%rsp),%r15 + movq 24+32(%rsp),%r8 +.byte 102,72,15,126,199 + call __ecp_nistz256_sqr_montq + + leaq 128(%rsp),%rbx + movq %r14,%r8 + movq %r15,%r9 + movq %rsi,%r14 + movq %rbp,%r15 + call __ecp_nistz256_sub_fromq + + movq 0+0(%rsp),%rax + movq 0+8(%rsp),%rbp + movq 0+16(%rsp),%rcx + movq 0+24(%rsp),%r10 + leaq 0(%rsp),%rdi + call __ecp_nistz256_subq + + movq 32(%rsp),%rax + leaq 32(%rsp),%rbx + movq %r12,%r14 + xorl %ecx,%ecx + movq %r12,0+0(%rsp) + movq %r13,%r10 + movq %r13,0+8(%rsp) + cmovzq %r8,%r11 + movq %r8,0+16(%rsp) + leaq 0-0(%rsp),%rsi + cmovzq %r9,%r12 + movq %r9,0+24(%rsp) + movq %r14,%r9 + leaq 0(%rsp),%rdi + call __ecp_nistz256_mul_montq + +.byte 102,72,15,126,203 +.byte 102,72,15,126,207 + call __ecp_nistz256_sub_fromq + + addq $160+8,%rsp + popq %r15 + popq %r14 + popq %r13 + popq %r12 + popq %rbx + popq %rbp + .byte 0xf3,0xc3 + +.globl _ecp_nistz256_point_add + +.p2align 5 +_ecp_nistz256_point_add: + movl $524544,%ecx + andl _OPENSSL_ia32cap_P+8(%rip),%ecx + cmpl $524544,%ecx + je L$point_addx + pushq %rbp + pushq %rbx + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + subq $576+8,%rsp + + movdqu 0(%rsi),%xmm0 + movdqu 16(%rsi),%xmm1 + movdqu 32(%rsi),%xmm2 + movdqu 48(%rsi),%xmm3 + movdqu 64(%rsi),%xmm4 + movdqu 80(%rsi),%xmm5 + movq %rsi,%rbx + movq %rdx,%rsi + movdqa %xmm0,384(%rsp) + movdqa %xmm1,384+16(%rsp) + por %xmm0,%xmm1 + movdqa %xmm2,416(%rsp) + movdqa %xmm3,416+16(%rsp) + por %xmm2,%xmm3 + movdqa %xmm4,448(%rsp) + movdqa %xmm5,448+16(%rsp) + por %xmm1,%xmm3 + + movdqu 0(%rsi),%xmm0 + pshufd $177,%xmm3,%xmm5 + movdqu 16(%rsi),%xmm1 + movdqu 32(%rsi),%xmm2 + por %xmm3,%xmm5 + movdqu 48(%rsi),%xmm3 + movq 64+0(%rsi),%rax + movq 64+8(%rsi),%r14 + movq 64+16(%rsi),%r15 + movq 64+24(%rsi),%r8 + movdqa %xmm0,480(%rsp) + pshufd $30,%xmm5,%xmm4 + movdqa %xmm1,480+16(%rsp) + por %xmm0,%xmm1 +.byte 102,72,15,110,199 + movdqa %xmm2,512(%rsp) + movdqa %xmm3,512+16(%rsp) + por %xmm2,%xmm3 + por %xmm4,%xmm5 + pxor %xmm4,%xmm4 + por %xmm1,%xmm3 + + leaq 64-0(%rsi),%rsi + movq %rax,544+0(%rsp) + movq %r14,544+8(%rsp) + movq %r15,544+16(%rsp) + movq %r8,544+24(%rsp) + leaq 96(%rsp),%rdi + call __ecp_nistz256_sqr_montq + + pcmpeqd %xmm4,%xmm5 + pshufd $177,%xmm3,%xmm4 + por %xmm3,%xmm4 + pshufd $0,%xmm5,%xmm5 + pshufd $30,%xmm4,%xmm3 + por %xmm3,%xmm4 + pxor %xmm3,%xmm3 + pcmpeqd %xmm3,%xmm4 + pshufd $0,%xmm4,%xmm4 + movq 64+0(%rbx),%rax + movq 64+8(%rbx),%r14 + movq 64+16(%rbx),%r15 + movq 64+24(%rbx),%r8 + + leaq 64-0(%rbx),%rsi + leaq 32(%rsp),%rdi + call __ecp_nistz256_sqr_montq + + movq 544(%rsp),%rax + leaq 544(%rsp),%rbx + movq 0+96(%rsp),%r9 + movq 8+96(%rsp),%r10 + leaq 0+96(%rsp),%rsi + movq 16+96(%rsp),%r11 + movq 24+96(%rsp),%r12 + leaq 224(%rsp),%rdi + call __ecp_nistz256_mul_montq + + movq 448(%rsp),%rax + leaq 448(%rsp),%rbx + movq 0+32(%rsp),%r9 + movq 8+32(%rsp),%r10 + leaq 0+32(%rsp),%rsi + movq 16+32(%rsp),%r11 + movq 24+32(%rsp),%r12 + leaq 256(%rsp),%rdi + call __ecp_nistz256_mul_montq + + movq 416(%rsp),%rax + leaq 416(%rsp),%rbx + movq 0+224(%rsp),%r9 + movq 8+224(%rsp),%r10 + leaq 0+224(%rsp),%rsi + movq 16+224(%rsp),%r11 + movq 24+224(%rsp),%r12 + leaq 224(%rsp),%rdi + call __ecp_nistz256_mul_montq + + movq 512(%rsp),%rax + leaq 512(%rsp),%rbx + movq 0+256(%rsp),%r9 + movq 8+256(%rsp),%r10 + leaq 0+256(%rsp),%rsi + movq 16+256(%rsp),%r11 + movq 24+256(%rsp),%r12 + leaq 256(%rsp),%rdi + call __ecp_nistz256_mul_montq + + leaq 224(%rsp),%rbx + leaq 64(%rsp),%rdi + call __ecp_nistz256_sub_fromq + + orq %r13,%r12 + movdqa %xmm4,%xmm2 + orq %r8,%r12 + orq %r9,%r12 + por %xmm5,%xmm2 +.byte 102,73,15,110,220 + + movq 384(%rsp),%rax + leaq 384(%rsp),%rbx + movq 0+96(%rsp),%r9 + movq 8+96(%rsp),%r10 + leaq 0+96(%rsp),%rsi + movq 16+96(%rsp),%r11 + movq 24+96(%rsp),%r12 + leaq 160(%rsp),%rdi + call __ecp_nistz256_mul_montq + + movq 480(%rsp),%rax + leaq 480(%rsp),%rbx + movq 0+32(%rsp),%r9 + movq 8+32(%rsp),%r10 + leaq 0+32(%rsp),%rsi + movq 16+32(%rsp),%r11 + movq 24+32(%rsp),%r12 + leaq 192(%rsp),%rdi + call __ecp_nistz256_mul_montq + + leaq 160(%rsp),%rbx + leaq 0(%rsp),%rdi + call __ecp_nistz256_sub_fromq + + orq %r13,%r12 + orq %r8,%r12 + orq %r9,%r12 + +.byte 0x3e + jnz L$add_proceedq +.byte 102,73,15,126,208 +.byte 102,73,15,126,217 + testq %r8,%r8 + jnz L$add_proceedq + testq %r9,%r9 + jz L$add_proceedq + +.byte 102,72,15,126,199 + pxor %xmm0,%xmm0 + movdqu %xmm0,0(%rdi) + movdqu %xmm0,16(%rdi) + movdqu %xmm0,32(%rdi) + movdqu %xmm0,48(%rdi) + movdqu %xmm0,64(%rdi) + movdqu %xmm0,80(%rdi) + jmp L$add_doneq + +.p2align 5 +L$add_proceedq: + movq 0+64(%rsp),%rax + movq 8+64(%rsp),%r14 + leaq 0+64(%rsp),%rsi + movq 16+64(%rsp),%r15 + movq 24+64(%rsp),%r8 + leaq 96(%rsp),%rdi + call __ecp_nistz256_sqr_montq + + movq 448(%rsp),%rax + leaq 448(%rsp),%rbx + movq 0+0(%rsp),%r9 + movq 8+0(%rsp),%r10 + leaq 0+0(%rsp),%rsi + movq 16+0(%rsp),%r11 + movq 24+0(%rsp),%r12 + leaq 352(%rsp),%rdi + call __ecp_nistz256_mul_montq + + movq 0+0(%rsp),%rax + movq 8+0(%rsp),%r14 + leaq 0+0(%rsp),%rsi + movq 16+0(%rsp),%r15 + movq 24+0(%rsp),%r8 + leaq 32(%rsp),%rdi + call __ecp_nistz256_sqr_montq + + movq 544(%rsp),%rax + leaq 544(%rsp),%rbx + movq 0+352(%rsp),%r9 + movq 8+352(%rsp),%r10 + leaq 0+352(%rsp),%rsi + movq 16+352(%rsp),%r11 + movq 24+352(%rsp),%r12 + leaq 352(%rsp),%rdi + call __ecp_nistz256_mul_montq + + movq 0(%rsp),%rax + leaq 0(%rsp),%rbx + movq 0+32(%rsp),%r9 + movq 8+32(%rsp),%r10 + leaq 0+32(%rsp),%rsi + movq 16+32(%rsp),%r11 + movq 24+32(%rsp),%r12 + leaq 128(%rsp),%rdi + call __ecp_nistz256_mul_montq + + movq 160(%rsp),%rax + leaq 160(%rsp),%rbx + movq 0+32(%rsp),%r9 + movq 8+32(%rsp),%r10 + leaq 0+32(%rsp),%rsi + movq 16+32(%rsp),%r11 + movq 24+32(%rsp),%r12 + leaq 192(%rsp),%rdi + call __ecp_nistz256_mul_montq + + + + + addq %r12,%r12 + leaq 96(%rsp),%rsi + adcq %r13,%r13 + movq %r12,%rax + adcq %r8,%r8 + adcq %r9,%r9 + movq %r13,%rbp + sbbq %r11,%r11 + + subq $-1,%r12 + movq %r8,%rcx + sbbq %r14,%r13 + sbbq $0,%r8 + movq %r9,%r10 + sbbq %r15,%r9 + testq %r11,%r11 + + cmovzq %rax,%r12 + movq 0(%rsi),%rax + cmovzq %rbp,%r13 + movq 8(%rsi),%rbp + cmovzq %rcx,%r8 + movq 16(%rsi),%rcx + cmovzq %r10,%r9 + movq 24(%rsi),%r10 + + call __ecp_nistz256_subq + + leaq 128(%rsp),%rbx + leaq 288(%rsp),%rdi + call __ecp_nistz256_sub_fromq + + movq 192+0(%rsp),%rax + movq 192+8(%rsp),%rbp + movq 192+16(%rsp),%rcx + movq 192+24(%rsp),%r10 + leaq 320(%rsp),%rdi + + call __ecp_nistz256_subq + + movq %r12,0(%rdi) + movq %r13,8(%rdi) + movq %r8,16(%rdi) + movq %r9,24(%rdi) + movq 128(%rsp),%rax + leaq 128(%rsp),%rbx + movq 0+224(%rsp),%r9 + movq 8+224(%rsp),%r10 + leaq 0+224(%rsp),%rsi + movq 16+224(%rsp),%r11 + movq 24+224(%rsp),%r12 + leaq 256(%rsp),%rdi + call __ecp_nistz256_mul_montq + + movq 320(%rsp),%rax + leaq 320(%rsp),%rbx + movq 0+64(%rsp),%r9 + movq 8+64(%rsp),%r10 + leaq 0+64(%rsp),%rsi + movq 16+64(%rsp),%r11 + movq 24+64(%rsp),%r12 + leaq 320(%rsp),%rdi + call __ecp_nistz256_mul_montq + + leaq 256(%rsp),%rbx + leaq 320(%rsp),%rdi + call __ecp_nistz256_sub_fromq + +.byte 102,72,15,126,199 + + movdqa %xmm5,%xmm0 + movdqa %xmm5,%xmm1 + pandn 352(%rsp),%xmm0 + movdqa %xmm5,%xmm2 + pandn 352+16(%rsp),%xmm1 + movdqa %xmm5,%xmm3 + pand 544(%rsp),%xmm2 + pand 544+16(%rsp),%xmm3 + por %xmm0,%xmm2 + por %xmm1,%xmm3 + + movdqa %xmm4,%xmm0 + movdqa %xmm4,%xmm1 + pandn %xmm2,%xmm0 + movdqa %xmm4,%xmm2 + pandn %xmm3,%xmm1 + movdqa %xmm4,%xmm3 + pand 448(%rsp),%xmm2 + pand 448+16(%rsp),%xmm3 + por %xmm0,%xmm2 + por %xmm1,%xmm3 + movdqu %xmm2,64(%rdi) + movdqu %xmm3,80(%rdi) + + movdqa %xmm5,%xmm0 + movdqa %xmm5,%xmm1 + pandn 288(%rsp),%xmm0 + movdqa %xmm5,%xmm2 + pandn 288+16(%rsp),%xmm1 + movdqa %xmm5,%xmm3 + pand 480(%rsp),%xmm2 + pand 480+16(%rsp),%xmm3 + por %xmm0,%xmm2 + por %xmm1,%xmm3 + + movdqa %xmm4,%xmm0 + movdqa %xmm4,%xmm1 + pandn %xmm2,%xmm0 + movdqa %xmm4,%xmm2 + pandn %xmm3,%xmm1 + movdqa %xmm4,%xmm3 + pand 384(%rsp),%xmm2 + pand 384+16(%rsp),%xmm3 + por %xmm0,%xmm2 + por %xmm1,%xmm3 + movdqu %xmm2,0(%rdi) + movdqu %xmm3,16(%rdi) + + movdqa %xmm5,%xmm0 + movdqa %xmm5,%xmm1 + pandn 320(%rsp),%xmm0 + movdqa %xmm5,%xmm2 + pandn 320+16(%rsp),%xmm1 + movdqa %xmm5,%xmm3 + pand 512(%rsp),%xmm2 + pand 512+16(%rsp),%xmm3 + por %xmm0,%xmm2 + por %xmm1,%xmm3 + + movdqa %xmm4,%xmm0 + movdqa %xmm4,%xmm1 + pandn %xmm2,%xmm0 + movdqa %xmm4,%xmm2 + pandn %xmm3,%xmm1 + movdqa %xmm4,%xmm3 + pand 416(%rsp),%xmm2 + pand 416+16(%rsp),%xmm3 + por %xmm0,%xmm2 + por %xmm1,%xmm3 + movdqu %xmm2,32(%rdi) + movdqu %xmm3,48(%rdi) + +L$add_doneq: + addq $576+8,%rsp + popq %r15 + popq %r14 + popq %r13 + popq %r12 + popq %rbx + popq %rbp + .byte 0xf3,0xc3 + +.globl _ecp_nistz256_point_add_affine + +.p2align 5 +_ecp_nistz256_point_add_affine: + movl $524544,%ecx + andl _OPENSSL_ia32cap_P+8(%rip),%ecx + cmpl $524544,%ecx + je L$point_add_affinex + pushq %rbp + pushq %rbx + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + subq $480+8,%rsp + + movdqu 0(%rsi),%xmm0 + movq %rdx,%rbx + movdqu 16(%rsi),%xmm1 + movdqu 32(%rsi),%xmm2 + movdqu 48(%rsi),%xmm3 + movdqu 64(%rsi),%xmm4 + movdqu 80(%rsi),%xmm5 + movq 64+0(%rsi),%rax + movq 64+8(%rsi),%r14 + movq 64+16(%rsi),%r15 + movq 64+24(%rsi),%r8 + movdqa %xmm0,320(%rsp) + movdqa %xmm1,320+16(%rsp) + por %xmm0,%xmm1 + movdqa %xmm2,352(%rsp) + movdqa %xmm3,352+16(%rsp) + por %xmm2,%xmm3 + movdqa %xmm4,384(%rsp) + movdqa %xmm5,384+16(%rsp) + por %xmm1,%xmm3 + + movdqu 0(%rbx),%xmm0 + pshufd $177,%xmm3,%xmm5 + movdqu 16(%rbx),%xmm1 + movdqu 32(%rbx),%xmm2 + por %xmm3,%xmm5 + movdqu 48(%rbx),%xmm3 + movdqa %xmm0,416(%rsp) + pshufd $30,%xmm5,%xmm4 + movdqa %xmm1,416+16(%rsp) + por %xmm0,%xmm1 +.byte 102,72,15,110,199 + movdqa %xmm2,448(%rsp) + movdqa %xmm3,448+16(%rsp) + por %xmm2,%xmm3 + por %xmm4,%xmm5 + pxor %xmm4,%xmm4 + por %xmm1,%xmm3 + + leaq 64-0(%rsi),%rsi + leaq 32(%rsp),%rdi + call __ecp_nistz256_sqr_montq + + pcmpeqd %xmm4,%xmm5 + pshufd $177,%xmm3,%xmm4 + movq 0(%rbx),%rax + + movq %r12,%r9 + por %xmm3,%xmm4 + pshufd $0,%xmm5,%xmm5 + pshufd $30,%xmm4,%xmm3 + movq %r13,%r10 + por %xmm3,%xmm4 + pxor %xmm3,%xmm3 + movq %r14,%r11 + pcmpeqd %xmm3,%xmm4 + pshufd $0,%xmm4,%xmm4 + + leaq 32-0(%rsp),%rsi + movq %r15,%r12 + leaq 0(%rsp),%rdi + call __ecp_nistz256_mul_montq + + leaq 320(%rsp),%rbx + leaq 64(%rsp),%rdi + call __ecp_nistz256_sub_fromq + + movq 384(%rsp),%rax + leaq 384(%rsp),%rbx + movq 0+32(%rsp),%r9 + movq 8+32(%rsp),%r10 + leaq 0+32(%rsp),%rsi + movq 16+32(%rsp),%r11 + movq 24+32(%rsp),%r12 + leaq 32(%rsp),%rdi + call __ecp_nistz256_mul_montq + + movq 384(%rsp),%rax + leaq 384(%rsp),%rbx + movq 0+64(%rsp),%r9 + movq 8+64(%rsp),%r10 + leaq 0+64(%rsp),%rsi + movq 16+64(%rsp),%r11 + movq 24+64(%rsp),%r12 + leaq 288(%rsp),%rdi + call __ecp_nistz256_mul_montq + + movq 448(%rsp),%rax + leaq 448(%rsp),%rbx + movq 0+32(%rsp),%r9 + movq 8+32(%rsp),%r10 + leaq 0+32(%rsp),%rsi + movq 16+32(%rsp),%r11 + movq 24+32(%rsp),%r12 + leaq 32(%rsp),%rdi + call __ecp_nistz256_mul_montq + + leaq 352(%rsp),%rbx + leaq 96(%rsp),%rdi + call __ecp_nistz256_sub_fromq + + movq 0+64(%rsp),%rax + movq 8+64(%rsp),%r14 + leaq 0+64(%rsp),%rsi + movq 16+64(%rsp),%r15 + movq 24+64(%rsp),%r8 + leaq 128(%rsp),%rdi + call __ecp_nistz256_sqr_montq + + movq 0+96(%rsp),%rax + movq 8+96(%rsp),%r14 + leaq 0+96(%rsp),%rsi + movq 16+96(%rsp),%r15 + movq 24+96(%rsp),%r8 + leaq 192(%rsp),%rdi + call __ecp_nistz256_sqr_montq + + movq 128(%rsp),%rax + leaq 128(%rsp),%rbx + movq 0+64(%rsp),%r9 + movq 8+64(%rsp),%r10 + leaq 0+64(%rsp),%rsi + movq 16+64(%rsp),%r11 + movq 24+64(%rsp),%r12 + leaq 160(%rsp),%rdi + call __ecp_nistz256_mul_montq + + movq 320(%rsp),%rax + leaq 320(%rsp),%rbx + movq 0+128(%rsp),%r9 + movq 8+128(%rsp),%r10 + leaq 0+128(%rsp),%rsi + movq 16+128(%rsp),%r11 + movq 24+128(%rsp),%r12 + leaq 0(%rsp),%rdi + call __ecp_nistz256_mul_montq + + + + + addq %r12,%r12 + leaq 192(%rsp),%rsi + adcq %r13,%r13 + movq %r12,%rax + adcq %r8,%r8 + adcq %r9,%r9 + movq %r13,%rbp + sbbq %r11,%r11 + + subq $-1,%r12 + movq %r8,%rcx + sbbq %r14,%r13 + sbbq $0,%r8 + movq %r9,%r10 + sbbq %r15,%r9 + testq %r11,%r11 + + cmovzq %rax,%r12 + movq 0(%rsi),%rax + cmovzq %rbp,%r13 + movq 8(%rsi),%rbp + cmovzq %rcx,%r8 + movq 16(%rsi),%rcx + cmovzq %r10,%r9 + movq 24(%rsi),%r10 + + call __ecp_nistz256_subq + + leaq 160(%rsp),%rbx + leaq 224(%rsp),%rdi + call __ecp_nistz256_sub_fromq + + movq 0+0(%rsp),%rax + movq 0+8(%rsp),%rbp + movq 0+16(%rsp),%rcx + movq 0+24(%rsp),%r10 + leaq 64(%rsp),%rdi + + call __ecp_nistz256_subq + + movq %r12,0(%rdi) + movq %r13,8(%rdi) + movq %r8,16(%rdi) + movq %r9,24(%rdi) + movq 352(%rsp),%rax + leaq 352(%rsp),%rbx + movq 0+160(%rsp),%r9 + movq 8+160(%rsp),%r10 + leaq 0+160(%rsp),%rsi + movq 16+160(%rsp),%r11 + movq 24+160(%rsp),%r12 + leaq 32(%rsp),%rdi + call __ecp_nistz256_mul_montq + + movq 96(%rsp),%rax + leaq 96(%rsp),%rbx + movq 0+64(%rsp),%r9 + movq 8+64(%rsp),%r10 + leaq 0+64(%rsp),%rsi + movq 16+64(%rsp),%r11 + movq 24+64(%rsp),%r12 + leaq 64(%rsp),%rdi + call __ecp_nistz256_mul_montq + + leaq 32(%rsp),%rbx + leaq 256(%rsp),%rdi + call __ecp_nistz256_sub_fromq + +.byte 102,72,15,126,199 + + movdqa %xmm5,%xmm0 + movdqa %xmm5,%xmm1 + pandn 288(%rsp),%xmm0 + movdqa %xmm5,%xmm2 + pandn 288+16(%rsp),%xmm1 + movdqa %xmm5,%xmm3 + pand L$ONE_mont(%rip),%xmm2 + pand L$ONE_mont+16(%rip),%xmm3 + por %xmm0,%xmm2 + por %xmm1,%xmm3 + + movdqa %xmm4,%xmm0 + movdqa %xmm4,%xmm1 + pandn %xmm2,%xmm0 + movdqa %xmm4,%xmm2 + pandn %xmm3,%xmm1 + movdqa %xmm4,%xmm3 + pand 384(%rsp),%xmm2 + pand 384+16(%rsp),%xmm3 + por %xmm0,%xmm2 + por %xmm1,%xmm3 + movdqu %xmm2,64(%rdi) + movdqu %xmm3,80(%rdi) + + movdqa %xmm5,%xmm0 + movdqa %xmm5,%xmm1 + pandn 224(%rsp),%xmm0 + movdqa %xmm5,%xmm2 + pandn 224+16(%rsp),%xmm1 + movdqa %xmm5,%xmm3 + pand 416(%rsp),%xmm2 + pand 416+16(%rsp),%xmm3 + por %xmm0,%xmm2 + por %xmm1,%xmm3 + + movdqa %xmm4,%xmm0 + movdqa %xmm4,%xmm1 + pandn %xmm2,%xmm0 + movdqa %xmm4,%xmm2 + pandn %xmm3,%xmm1 + movdqa %xmm4,%xmm3 + pand 320(%rsp),%xmm2 + pand 320+16(%rsp),%xmm3 + por %xmm0,%xmm2 + por %xmm1,%xmm3 + movdqu %xmm2,0(%rdi) + movdqu %xmm3,16(%rdi) + + movdqa %xmm5,%xmm0 + movdqa %xmm5,%xmm1 + pandn 256(%rsp),%xmm0 + movdqa %xmm5,%xmm2 + pandn 256+16(%rsp),%xmm1 + movdqa %xmm5,%xmm3 + pand 448(%rsp),%xmm2 + pand 448+16(%rsp),%xmm3 + por %xmm0,%xmm2 + por %xmm1,%xmm3 + + movdqa %xmm4,%xmm0 + movdqa %xmm4,%xmm1 + pandn %xmm2,%xmm0 + movdqa %xmm4,%xmm2 + pandn %xmm3,%xmm1 + movdqa %xmm4,%xmm3 + pand 352(%rsp),%xmm2 + pand 352+16(%rsp),%xmm3 + por %xmm0,%xmm2 + por %xmm1,%xmm3 + movdqu %xmm2,32(%rdi) + movdqu %xmm3,48(%rdi) + + addq $480+8,%rsp + popq %r15 + popq %r14 + popq %r13 + popq %r12 + popq %rbx + popq %rbp + .byte 0xf3,0xc3 + + +.p2align 5 +__ecp_nistz256_add_tox: + xorq %r11,%r11 + adcq 0(%rbx),%r12 + adcq 8(%rbx),%r13 + movq %r12,%rax + adcq 16(%rbx),%r8 + adcq 24(%rbx),%r9 + movq %r13,%rbp + adcq $0,%r11 + + xorq %r10,%r10 + sbbq $-1,%r12 + movq %r8,%rcx + sbbq %r14,%r13 + sbbq $0,%r8 + movq %r9,%r10 + sbbq %r15,%r9 + + btq $0,%r11 + cmovncq %rax,%r12 + cmovncq %rbp,%r13 + movq %r12,0(%rdi) + cmovncq %rcx,%r8 + movq %r13,8(%rdi) + cmovncq %r10,%r9 + movq %r8,16(%rdi) + movq %r9,24(%rdi) + + .byte 0xf3,0xc3 + + + +.p2align 5 +__ecp_nistz256_sub_fromx: + xorq %r11,%r11 + sbbq 0(%rbx),%r12 + sbbq 8(%rbx),%r13 + movq %r12,%rax + sbbq 16(%rbx),%r8 + sbbq 24(%rbx),%r9 + movq %r13,%rbp + sbbq $0,%r11 + + xorq %r10,%r10 + adcq $-1,%r12 + movq %r8,%rcx + adcq %r14,%r13 + adcq $0,%r8 + movq %r9,%r10 + adcq %r15,%r9 + + btq $0,%r11 + cmovncq %rax,%r12 + cmovncq %rbp,%r13 + movq %r12,0(%rdi) + cmovncq %rcx,%r8 + movq %r13,8(%rdi) + cmovncq %r10,%r9 + movq %r8,16(%rdi) + movq %r9,24(%rdi) + + .byte 0xf3,0xc3 + + + +.p2align 5 +__ecp_nistz256_subx: + xorq %r11,%r11 + sbbq %r12,%rax + sbbq %r13,%rbp + movq %rax,%r12 + sbbq %r8,%rcx + sbbq %r9,%r10 + movq %rbp,%r13 + sbbq $0,%r11 + + xorq %r9,%r9 + adcq $-1,%rax + movq %rcx,%r8 + adcq %r14,%rbp + adcq $0,%rcx + movq %r10,%r9 + adcq %r15,%r10 + + btq $0,%r11 + cmovcq %rax,%r12 + cmovcq %rbp,%r13 + cmovcq %rcx,%r8 + cmovcq %r10,%r9 + + .byte 0xf3,0xc3 + + + +.p2align 5 +__ecp_nistz256_mul_by_2x: + xorq %r11,%r11 + adcq %r12,%r12 + adcq %r13,%r13 + movq %r12,%rax + adcq %r8,%r8 + adcq %r9,%r9 + movq %r13,%rbp + adcq $0,%r11 + + xorq %r10,%r10 + sbbq $-1,%r12 + movq %r8,%rcx + sbbq %r14,%r13 + sbbq $0,%r8 + movq %r9,%r10 + sbbq %r15,%r9 + + btq $0,%r11 + cmovncq %rax,%r12 + cmovncq %rbp,%r13 + movq %r12,0(%rdi) + cmovncq %rcx,%r8 + movq %r13,8(%rdi) + cmovncq %r10,%r9 + movq %r8,16(%rdi) + movq %r9,24(%rdi) + + .byte 0xf3,0xc3 + + +.p2align 5 +ecp_nistz256_point_doublex: +L$point_doublex: + pushq %rbp + pushq %rbx + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + subq $160+8,%rsp + + movdqu 0(%rsi),%xmm0 + movq %rsi,%rbx + movdqu 16(%rsi),%xmm1 + movq 32+0(%rsi),%r12 + movq 32+8(%rsi),%r13 + movq 32+16(%rsi),%r8 + movq 32+24(%rsi),%r9 + movq L$poly+8(%rip),%r14 + movq L$poly+24(%rip),%r15 + movdqa %xmm0,96(%rsp) + movdqa %xmm1,96+16(%rsp) + leaq 32(%rdi),%r10 + leaq 64(%rdi),%r11 +.byte 102,72,15,110,199 +.byte 102,73,15,110,202 +.byte 102,73,15,110,211 + + leaq 0(%rsp),%rdi + call __ecp_nistz256_mul_by_2x + + movq 64+0(%rsi),%rdx + movq 64+8(%rsi),%r14 + movq 64+16(%rsi),%r15 + movq 64+24(%rsi),%r8 + leaq 64-128(%rsi),%rsi + leaq 64(%rsp),%rdi + call __ecp_nistz256_sqr_montx + + movq 0+0(%rsp),%rdx + movq 8+0(%rsp),%r14 + leaq -128+0(%rsp),%rsi + movq 16+0(%rsp),%r15 + movq 24+0(%rsp),%r8 + leaq 0(%rsp),%rdi + call __ecp_nistz256_sqr_montx + + movq 32(%rbx),%rdx + movq 64+0(%rbx),%r9 + movq 64+8(%rbx),%r10 + movq 64+16(%rbx),%r11 + movq 64+24(%rbx),%r12 + leaq 64-128(%rbx),%rsi + leaq 32(%rbx),%rbx +.byte 102,72,15,126,215 + call __ecp_nistz256_mul_montx + call __ecp_nistz256_mul_by_2x + + movq 96+0(%rsp),%r12 + movq 96+8(%rsp),%r13 + leaq 64(%rsp),%rbx + movq 96+16(%rsp),%r8 + movq 96+24(%rsp),%r9 + leaq 32(%rsp),%rdi + call __ecp_nistz256_add_tox + + movq 96+0(%rsp),%r12 + movq 96+8(%rsp),%r13 + leaq 64(%rsp),%rbx + movq 96+16(%rsp),%r8 + movq 96+24(%rsp),%r9 + leaq 64(%rsp),%rdi + call __ecp_nistz256_sub_fromx + + movq 0+0(%rsp),%rdx + movq 8+0(%rsp),%r14 + leaq -128+0(%rsp),%rsi + movq 16+0(%rsp),%r15 + movq 24+0(%rsp),%r8 +.byte 102,72,15,126,207 + call __ecp_nistz256_sqr_montx + xorq %r9,%r9 + movq %r12,%rax + addq $-1,%r12 + movq %r13,%r10 + adcq %rsi,%r13 + movq %r14,%rcx + adcq $0,%r14 + movq %r15,%r8 + adcq %rbp,%r15 + adcq $0,%r9 + xorq %rsi,%rsi + testq $1,%rax + + cmovzq %rax,%r12 + cmovzq %r10,%r13 + cmovzq %rcx,%r14 + cmovzq %r8,%r15 + cmovzq %rsi,%r9 + + movq %r13,%rax + shrq $1,%r12 + shlq $63,%rax + movq %r14,%r10 + shrq $1,%r13 + orq %rax,%r12 + shlq $63,%r10 + movq %r15,%rcx + shrq $1,%r14 + orq %r10,%r13 + shlq $63,%rcx + movq %r12,0(%rdi) + shrq $1,%r15 + movq %r13,8(%rdi) + shlq $63,%r9 + orq %rcx,%r14 + orq %r9,%r15 + movq %r14,16(%rdi) + movq %r15,24(%rdi) + movq 64(%rsp),%rdx + leaq 64(%rsp),%rbx + movq 0+32(%rsp),%r9 + movq 8+32(%rsp),%r10 + leaq -128+32(%rsp),%rsi + movq 16+32(%rsp),%r11 + movq 24+32(%rsp),%r12 + leaq 32(%rsp),%rdi + call __ecp_nistz256_mul_montx + + leaq 128(%rsp),%rdi + call __ecp_nistz256_mul_by_2x + + leaq 32(%rsp),%rbx + leaq 32(%rsp),%rdi + call __ecp_nistz256_add_tox + + movq 96(%rsp),%rdx + leaq 96(%rsp),%rbx + movq 0+0(%rsp),%r9 + movq 8+0(%rsp),%r10 + leaq -128+0(%rsp),%rsi + movq 16+0(%rsp),%r11 + movq 24+0(%rsp),%r12 + leaq 0(%rsp),%rdi + call __ecp_nistz256_mul_montx + + leaq 128(%rsp),%rdi + call __ecp_nistz256_mul_by_2x + + movq 0+32(%rsp),%rdx + movq 8+32(%rsp),%r14 + leaq -128+32(%rsp),%rsi + movq 16+32(%rsp),%r15 + movq 24+32(%rsp),%r8 +.byte 102,72,15,126,199 + call __ecp_nistz256_sqr_montx + + leaq 128(%rsp),%rbx + movq %r14,%r8 + movq %r15,%r9 + movq %rsi,%r14 + movq %rbp,%r15 + call __ecp_nistz256_sub_fromx + + movq 0+0(%rsp),%rax + movq 0+8(%rsp),%rbp + movq 0+16(%rsp),%rcx + movq 0+24(%rsp),%r10 + leaq 0(%rsp),%rdi + call __ecp_nistz256_subx + + movq 32(%rsp),%rdx + leaq 32(%rsp),%rbx + movq %r12,%r14 + xorl %ecx,%ecx + movq %r12,0+0(%rsp) + movq %r13,%r10 + movq %r13,0+8(%rsp) + cmovzq %r8,%r11 + movq %r8,0+16(%rsp) + leaq 0-128(%rsp),%rsi + cmovzq %r9,%r12 + movq %r9,0+24(%rsp) + movq %r14,%r9 + leaq 0(%rsp),%rdi + call __ecp_nistz256_mul_montx + +.byte 102,72,15,126,203 +.byte 102,72,15,126,207 + call __ecp_nistz256_sub_fromx + + addq $160+8,%rsp + popq %r15 + popq %r14 + popq %r13 + popq %r12 + popq %rbx + popq %rbp + .byte 0xf3,0xc3 + + +.p2align 5 +ecp_nistz256_point_addx: +L$point_addx: + pushq %rbp + pushq %rbx + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + subq $576+8,%rsp + + movdqu 0(%rsi),%xmm0 + movdqu 16(%rsi),%xmm1 + movdqu 32(%rsi),%xmm2 + movdqu 48(%rsi),%xmm3 + movdqu 64(%rsi),%xmm4 + movdqu 80(%rsi),%xmm5 + movq %rsi,%rbx + movq %rdx,%rsi + movdqa %xmm0,384(%rsp) + movdqa %xmm1,384+16(%rsp) + por %xmm0,%xmm1 + movdqa %xmm2,416(%rsp) + movdqa %xmm3,416+16(%rsp) + por %xmm2,%xmm3 + movdqa %xmm4,448(%rsp) + movdqa %xmm5,448+16(%rsp) + por %xmm1,%xmm3 + + movdqu 0(%rsi),%xmm0 + pshufd $177,%xmm3,%xmm5 + movdqu 16(%rsi),%xmm1 + movdqu 32(%rsi),%xmm2 + por %xmm3,%xmm5 + movdqu 48(%rsi),%xmm3 + movq 64+0(%rsi),%rdx + movq 64+8(%rsi),%r14 + movq 64+16(%rsi),%r15 + movq 64+24(%rsi),%r8 + movdqa %xmm0,480(%rsp) + pshufd $30,%xmm5,%xmm4 + movdqa %xmm1,480+16(%rsp) + por %xmm0,%xmm1 +.byte 102,72,15,110,199 + movdqa %xmm2,512(%rsp) + movdqa %xmm3,512+16(%rsp) + por %xmm2,%xmm3 + por %xmm4,%xmm5 + pxor %xmm4,%xmm4 + por %xmm1,%xmm3 + + leaq 64-128(%rsi),%rsi + movq %rdx,544+0(%rsp) + movq %r14,544+8(%rsp) + movq %r15,544+16(%rsp) + movq %r8,544+24(%rsp) + leaq 96(%rsp),%rdi + call __ecp_nistz256_sqr_montx + + pcmpeqd %xmm4,%xmm5 + pshufd $177,%xmm3,%xmm4 + por %xmm3,%xmm4 + pshufd $0,%xmm5,%xmm5 + pshufd $30,%xmm4,%xmm3 + por %xmm3,%xmm4 + pxor %xmm3,%xmm3 + pcmpeqd %xmm3,%xmm4 + pshufd $0,%xmm4,%xmm4 + movq 64+0(%rbx),%rdx + movq 64+8(%rbx),%r14 + movq 64+16(%rbx),%r15 + movq 64+24(%rbx),%r8 + + leaq 64-128(%rbx),%rsi + leaq 32(%rsp),%rdi + call __ecp_nistz256_sqr_montx + + movq 544(%rsp),%rdx + leaq 544(%rsp),%rbx + movq 0+96(%rsp),%r9 + movq 8+96(%rsp),%r10 + leaq -128+96(%rsp),%rsi + movq 16+96(%rsp),%r11 + movq 24+96(%rsp),%r12 + leaq 224(%rsp),%rdi + call __ecp_nistz256_mul_montx + + movq 448(%rsp),%rdx + leaq 448(%rsp),%rbx + movq 0+32(%rsp),%r9 + movq 8+32(%rsp),%r10 + leaq -128+32(%rsp),%rsi + movq 16+32(%rsp),%r11 + movq 24+32(%rsp),%r12 + leaq 256(%rsp),%rdi + call __ecp_nistz256_mul_montx + + movq 416(%rsp),%rdx + leaq 416(%rsp),%rbx + movq 0+224(%rsp),%r9 + movq 8+224(%rsp),%r10 + leaq -128+224(%rsp),%rsi + movq 16+224(%rsp),%r11 + movq 24+224(%rsp),%r12 + leaq 224(%rsp),%rdi + call __ecp_nistz256_mul_montx + + movq 512(%rsp),%rdx + leaq 512(%rsp),%rbx + movq 0+256(%rsp),%r9 + movq 8+256(%rsp),%r10 + leaq -128+256(%rsp),%rsi + movq 16+256(%rsp),%r11 + movq 24+256(%rsp),%r12 + leaq 256(%rsp),%rdi + call __ecp_nistz256_mul_montx + + leaq 224(%rsp),%rbx + leaq 64(%rsp),%rdi + call __ecp_nistz256_sub_fromx + + orq %r13,%r12 + movdqa %xmm4,%xmm2 + orq %r8,%r12 + orq %r9,%r12 + por %xmm5,%xmm2 +.byte 102,73,15,110,220 + + movq 384(%rsp),%rdx + leaq 384(%rsp),%rbx + movq 0+96(%rsp),%r9 + movq 8+96(%rsp),%r10 + leaq -128+96(%rsp),%rsi + movq 16+96(%rsp),%r11 + movq 24+96(%rsp),%r12 + leaq 160(%rsp),%rdi + call __ecp_nistz256_mul_montx + + movq 480(%rsp),%rdx + leaq 480(%rsp),%rbx + movq 0+32(%rsp),%r9 + movq 8+32(%rsp),%r10 + leaq -128+32(%rsp),%rsi + movq 16+32(%rsp),%r11 + movq 24+32(%rsp),%r12 + leaq 192(%rsp),%rdi + call __ecp_nistz256_mul_montx + + leaq 160(%rsp),%rbx + leaq 0(%rsp),%rdi + call __ecp_nistz256_sub_fromx + + orq %r13,%r12 + orq %r8,%r12 + orq %r9,%r12 + +.byte 0x3e + jnz L$add_proceedx +.byte 102,73,15,126,208 +.byte 102,73,15,126,217 + testq %r8,%r8 + jnz L$add_proceedx + testq %r9,%r9 + jz L$add_proceedx + +.byte 102,72,15,126,199 + pxor %xmm0,%xmm0 + movdqu %xmm0,0(%rdi) + movdqu %xmm0,16(%rdi) + movdqu %xmm0,32(%rdi) + movdqu %xmm0,48(%rdi) + movdqu %xmm0,64(%rdi) + movdqu %xmm0,80(%rdi) + jmp L$add_donex + +.p2align 5 +L$add_proceedx: + movq 0+64(%rsp),%rdx + movq 8+64(%rsp),%r14 + leaq -128+64(%rsp),%rsi + movq 16+64(%rsp),%r15 + movq 24+64(%rsp),%r8 + leaq 96(%rsp),%rdi + call __ecp_nistz256_sqr_montx + + movq 448(%rsp),%rdx + leaq 448(%rsp),%rbx + movq 0+0(%rsp),%r9 + movq 8+0(%rsp),%r10 + leaq -128+0(%rsp),%rsi + movq 16+0(%rsp),%r11 + movq 24+0(%rsp),%r12 + leaq 352(%rsp),%rdi + call __ecp_nistz256_mul_montx + + movq 0+0(%rsp),%rdx + movq 8+0(%rsp),%r14 + leaq -128+0(%rsp),%rsi + movq 16+0(%rsp),%r15 + movq 24+0(%rsp),%r8 + leaq 32(%rsp),%rdi + call __ecp_nistz256_sqr_montx + + movq 544(%rsp),%rdx + leaq 544(%rsp),%rbx + movq 0+352(%rsp),%r9 + movq 8+352(%rsp),%r10 + leaq -128+352(%rsp),%rsi + movq 16+352(%rsp),%r11 + movq 24+352(%rsp),%r12 + leaq 352(%rsp),%rdi + call __ecp_nistz256_mul_montx + + movq 0(%rsp),%rdx + leaq 0(%rsp),%rbx + movq 0+32(%rsp),%r9 + movq 8+32(%rsp),%r10 + leaq -128+32(%rsp),%rsi + movq 16+32(%rsp),%r11 + movq 24+32(%rsp),%r12 + leaq 128(%rsp),%rdi + call __ecp_nistz256_mul_montx + + movq 160(%rsp),%rdx + leaq 160(%rsp),%rbx + movq 0+32(%rsp),%r9 + movq 8+32(%rsp),%r10 + leaq -128+32(%rsp),%rsi + movq 16+32(%rsp),%r11 + movq 24+32(%rsp),%r12 + leaq 192(%rsp),%rdi + call __ecp_nistz256_mul_montx + + + + + addq %r12,%r12 + leaq 96(%rsp),%rsi + adcq %r13,%r13 + movq %r12,%rax + adcq %r8,%r8 + adcq %r9,%r9 + movq %r13,%rbp + sbbq %r11,%r11 + + subq $-1,%r12 + movq %r8,%rcx + sbbq %r14,%r13 + sbbq $0,%r8 + movq %r9,%r10 + sbbq %r15,%r9 + testq %r11,%r11 + + cmovzq %rax,%r12 + movq 0(%rsi),%rax + cmovzq %rbp,%r13 + movq 8(%rsi),%rbp + cmovzq %rcx,%r8 + movq 16(%rsi),%rcx + cmovzq %r10,%r9 + movq 24(%rsi),%r10 + + call __ecp_nistz256_subx + + leaq 128(%rsp),%rbx + leaq 288(%rsp),%rdi + call __ecp_nistz256_sub_fromx + + movq 192+0(%rsp),%rax + movq 192+8(%rsp),%rbp + movq 192+16(%rsp),%rcx + movq 192+24(%rsp),%r10 + leaq 320(%rsp),%rdi + + call __ecp_nistz256_subx + + movq %r12,0(%rdi) + movq %r13,8(%rdi) + movq %r8,16(%rdi) + movq %r9,24(%rdi) + movq 128(%rsp),%rdx + leaq 128(%rsp),%rbx + movq 0+224(%rsp),%r9 + movq 8+224(%rsp),%r10 + leaq -128+224(%rsp),%rsi + movq 16+224(%rsp),%r11 + movq 24+224(%rsp),%r12 + leaq 256(%rsp),%rdi + call __ecp_nistz256_mul_montx + + movq 320(%rsp),%rdx + leaq 320(%rsp),%rbx + movq 0+64(%rsp),%r9 + movq 8+64(%rsp),%r10 + leaq -128+64(%rsp),%rsi + movq 16+64(%rsp),%r11 + movq 24+64(%rsp),%r12 + leaq 320(%rsp),%rdi + call __ecp_nistz256_mul_montx + + leaq 256(%rsp),%rbx + leaq 320(%rsp),%rdi + call __ecp_nistz256_sub_fromx + +.byte 102,72,15,126,199 + + movdqa %xmm5,%xmm0 + movdqa %xmm5,%xmm1 + pandn 352(%rsp),%xmm0 + movdqa %xmm5,%xmm2 + pandn 352+16(%rsp),%xmm1 + movdqa %xmm5,%xmm3 + pand 544(%rsp),%xmm2 + pand 544+16(%rsp),%xmm3 + por %xmm0,%xmm2 + por %xmm1,%xmm3 + + movdqa %xmm4,%xmm0 + movdqa %xmm4,%xmm1 + pandn %xmm2,%xmm0 + movdqa %xmm4,%xmm2 + pandn %xmm3,%xmm1 + movdqa %xmm4,%xmm3 + pand 448(%rsp),%xmm2 + pand 448+16(%rsp),%xmm3 + por %xmm0,%xmm2 + por %xmm1,%xmm3 + movdqu %xmm2,64(%rdi) + movdqu %xmm3,80(%rdi) + + movdqa %xmm5,%xmm0 + movdqa %xmm5,%xmm1 + pandn 288(%rsp),%xmm0 + movdqa %xmm5,%xmm2 + pandn 288+16(%rsp),%xmm1 + movdqa %xmm5,%xmm3 + pand 480(%rsp),%xmm2 + pand 480+16(%rsp),%xmm3 + por %xmm0,%xmm2 + por %xmm1,%xmm3 + + movdqa %xmm4,%xmm0 + movdqa %xmm4,%xmm1 + pandn %xmm2,%xmm0 + movdqa %xmm4,%xmm2 + pandn %xmm3,%xmm1 + movdqa %xmm4,%xmm3 + pand 384(%rsp),%xmm2 + pand 384+16(%rsp),%xmm3 + por %xmm0,%xmm2 + por %xmm1,%xmm3 + movdqu %xmm2,0(%rdi) + movdqu %xmm3,16(%rdi) + + movdqa %xmm5,%xmm0 + movdqa %xmm5,%xmm1 + pandn 320(%rsp),%xmm0 + movdqa %xmm5,%xmm2 + pandn 320+16(%rsp),%xmm1 + movdqa %xmm5,%xmm3 + pand 512(%rsp),%xmm2 + pand 512+16(%rsp),%xmm3 + por %xmm0,%xmm2 + por %xmm1,%xmm3 + + movdqa %xmm4,%xmm0 + movdqa %xmm4,%xmm1 + pandn %xmm2,%xmm0 + movdqa %xmm4,%xmm2 + pandn %xmm3,%xmm1 + movdqa %xmm4,%xmm3 + pand 416(%rsp),%xmm2 + pand 416+16(%rsp),%xmm3 + por %xmm0,%xmm2 + por %xmm1,%xmm3 + movdqu %xmm2,32(%rdi) + movdqu %xmm3,48(%rdi) + +L$add_donex: + addq $576+8,%rsp + popq %r15 + popq %r14 + popq %r13 + popq %r12 + popq %rbx + popq %rbp + .byte 0xf3,0xc3 + + +.p2align 5 +ecp_nistz256_point_add_affinex: +L$point_add_affinex: + pushq %rbp + pushq %rbx + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + subq $480+8,%rsp + + movdqu 0(%rsi),%xmm0 + movq %rdx,%rbx + movdqu 16(%rsi),%xmm1 + movdqu 32(%rsi),%xmm2 + movdqu 48(%rsi),%xmm3 + movdqu 64(%rsi),%xmm4 + movdqu 80(%rsi),%xmm5 + movq 64+0(%rsi),%rdx + movq 64+8(%rsi),%r14 + movq 64+16(%rsi),%r15 + movq 64+24(%rsi),%r8 + movdqa %xmm0,320(%rsp) + movdqa %xmm1,320+16(%rsp) + por %xmm0,%xmm1 + movdqa %xmm2,352(%rsp) + movdqa %xmm3,352+16(%rsp) + por %xmm2,%xmm3 + movdqa %xmm4,384(%rsp) + movdqa %xmm5,384+16(%rsp) + por %xmm1,%xmm3 + + movdqu 0(%rbx),%xmm0 + pshufd $177,%xmm3,%xmm5 + movdqu 16(%rbx),%xmm1 + movdqu 32(%rbx),%xmm2 + por %xmm3,%xmm5 + movdqu 48(%rbx),%xmm3 + movdqa %xmm0,416(%rsp) + pshufd $30,%xmm5,%xmm4 + movdqa %xmm1,416+16(%rsp) + por %xmm0,%xmm1 +.byte 102,72,15,110,199 + movdqa %xmm2,448(%rsp) + movdqa %xmm3,448+16(%rsp) + por %xmm2,%xmm3 + por %xmm4,%xmm5 + pxor %xmm4,%xmm4 + por %xmm1,%xmm3 + + leaq 64-128(%rsi),%rsi + leaq 32(%rsp),%rdi + call __ecp_nistz256_sqr_montx + + pcmpeqd %xmm4,%xmm5 + pshufd $177,%xmm3,%xmm4 + movq 0(%rbx),%rdx + + movq %r12,%r9 + por %xmm3,%xmm4 + pshufd $0,%xmm5,%xmm5 + pshufd $30,%xmm4,%xmm3 + movq %r13,%r10 + por %xmm3,%xmm4 + pxor %xmm3,%xmm3 + movq %r14,%r11 + pcmpeqd %xmm3,%xmm4 + pshufd $0,%xmm4,%xmm4 + + leaq 32-128(%rsp),%rsi + movq %r15,%r12 + leaq 0(%rsp),%rdi + call __ecp_nistz256_mul_montx + + leaq 320(%rsp),%rbx + leaq 64(%rsp),%rdi + call __ecp_nistz256_sub_fromx + + movq 384(%rsp),%rdx + leaq 384(%rsp),%rbx + movq 0+32(%rsp),%r9 + movq 8+32(%rsp),%r10 + leaq -128+32(%rsp),%rsi + movq 16+32(%rsp),%r11 + movq 24+32(%rsp),%r12 + leaq 32(%rsp),%rdi + call __ecp_nistz256_mul_montx + + movq 384(%rsp),%rdx + leaq 384(%rsp),%rbx + movq 0+64(%rsp),%r9 + movq 8+64(%rsp),%r10 + leaq -128+64(%rsp),%rsi + movq 16+64(%rsp),%r11 + movq 24+64(%rsp),%r12 + leaq 288(%rsp),%rdi + call __ecp_nistz256_mul_montx + + movq 448(%rsp),%rdx + leaq 448(%rsp),%rbx + movq 0+32(%rsp),%r9 + movq 8+32(%rsp),%r10 + leaq -128+32(%rsp),%rsi + movq 16+32(%rsp),%r11 + movq 24+32(%rsp),%r12 + leaq 32(%rsp),%rdi + call __ecp_nistz256_mul_montx + + leaq 352(%rsp),%rbx + leaq 96(%rsp),%rdi + call __ecp_nistz256_sub_fromx + + movq 0+64(%rsp),%rdx + movq 8+64(%rsp),%r14 + leaq -128+64(%rsp),%rsi + movq 16+64(%rsp),%r15 + movq 24+64(%rsp),%r8 + leaq 128(%rsp),%rdi + call __ecp_nistz256_sqr_montx + + movq 0+96(%rsp),%rdx + movq 8+96(%rsp),%r14 + leaq -128+96(%rsp),%rsi + movq 16+96(%rsp),%r15 + movq 24+96(%rsp),%r8 + leaq 192(%rsp),%rdi + call __ecp_nistz256_sqr_montx + + movq 128(%rsp),%rdx + leaq 128(%rsp),%rbx + movq 0+64(%rsp),%r9 + movq 8+64(%rsp),%r10 + leaq -128+64(%rsp),%rsi + movq 16+64(%rsp),%r11 + movq 24+64(%rsp),%r12 + leaq 160(%rsp),%rdi + call __ecp_nistz256_mul_montx + + movq 320(%rsp),%rdx + leaq 320(%rsp),%rbx + movq 0+128(%rsp),%r9 + movq 8+128(%rsp),%r10 + leaq -128+128(%rsp),%rsi + movq 16+128(%rsp),%r11 + movq 24+128(%rsp),%r12 + leaq 0(%rsp),%rdi + call __ecp_nistz256_mul_montx + + + + + addq %r12,%r12 + leaq 192(%rsp),%rsi + adcq %r13,%r13 + movq %r12,%rax + adcq %r8,%r8 + adcq %r9,%r9 + movq %r13,%rbp + sbbq %r11,%r11 + + subq $-1,%r12 + movq %r8,%rcx + sbbq %r14,%r13 + sbbq $0,%r8 + movq %r9,%r10 + sbbq %r15,%r9 + testq %r11,%r11 + + cmovzq %rax,%r12 + movq 0(%rsi),%rax + cmovzq %rbp,%r13 + movq 8(%rsi),%rbp + cmovzq %rcx,%r8 + movq 16(%rsi),%rcx + cmovzq %r10,%r9 + movq 24(%rsi),%r10 + + call __ecp_nistz256_subx + + leaq 160(%rsp),%rbx + leaq 224(%rsp),%rdi + call __ecp_nistz256_sub_fromx + + movq 0+0(%rsp),%rax + movq 0+8(%rsp),%rbp + movq 0+16(%rsp),%rcx + movq 0+24(%rsp),%r10 + leaq 64(%rsp),%rdi + + call __ecp_nistz256_subx + + movq %r12,0(%rdi) + movq %r13,8(%rdi) + movq %r8,16(%rdi) + movq %r9,24(%rdi) + movq 352(%rsp),%rdx + leaq 352(%rsp),%rbx + movq 0+160(%rsp),%r9 + movq 8+160(%rsp),%r10 + leaq -128+160(%rsp),%rsi + movq 16+160(%rsp),%r11 + movq 24+160(%rsp),%r12 + leaq 32(%rsp),%rdi + call __ecp_nistz256_mul_montx + + movq 96(%rsp),%rdx + leaq 96(%rsp),%rbx + movq 0+64(%rsp),%r9 + movq 8+64(%rsp),%r10 + leaq -128+64(%rsp),%rsi + movq 16+64(%rsp),%r11 + movq 24+64(%rsp),%r12 + leaq 64(%rsp),%rdi + call __ecp_nistz256_mul_montx + + leaq 32(%rsp),%rbx + leaq 256(%rsp),%rdi + call __ecp_nistz256_sub_fromx + +.byte 102,72,15,126,199 + + movdqa %xmm5,%xmm0 + movdqa %xmm5,%xmm1 + pandn 288(%rsp),%xmm0 + movdqa %xmm5,%xmm2 + pandn 288+16(%rsp),%xmm1 + movdqa %xmm5,%xmm3 + pand L$ONE_mont(%rip),%xmm2 + pand L$ONE_mont+16(%rip),%xmm3 + por %xmm0,%xmm2 + por %xmm1,%xmm3 + + movdqa %xmm4,%xmm0 + movdqa %xmm4,%xmm1 + pandn %xmm2,%xmm0 + movdqa %xmm4,%xmm2 + pandn %xmm3,%xmm1 + movdqa %xmm4,%xmm3 + pand 384(%rsp),%xmm2 + pand 384+16(%rsp),%xmm3 + por %xmm0,%xmm2 + por %xmm1,%xmm3 + movdqu %xmm2,64(%rdi) + movdqu %xmm3,80(%rdi) + + movdqa %xmm5,%xmm0 + movdqa %xmm5,%xmm1 + pandn 224(%rsp),%xmm0 + movdqa %xmm5,%xmm2 + pandn 224+16(%rsp),%xmm1 + movdqa %xmm5,%xmm3 + pand 416(%rsp),%xmm2 + pand 416+16(%rsp),%xmm3 + por %xmm0,%xmm2 + por %xmm1,%xmm3 + + movdqa %xmm4,%xmm0 + movdqa %xmm4,%xmm1 + pandn %xmm2,%xmm0 + movdqa %xmm4,%xmm2 + pandn %xmm3,%xmm1 + movdqa %xmm4,%xmm3 + pand 320(%rsp),%xmm2 + pand 320+16(%rsp),%xmm3 + por %xmm0,%xmm2 + por %xmm1,%xmm3 + movdqu %xmm2,0(%rdi) + movdqu %xmm3,16(%rdi) + + movdqa %xmm5,%xmm0 + movdqa %xmm5,%xmm1 + pandn 256(%rsp),%xmm0 + movdqa %xmm5,%xmm2 + pandn 256+16(%rsp),%xmm1 + movdqa %xmm5,%xmm3 + pand 448(%rsp),%xmm2 + pand 448+16(%rsp),%xmm3 + por %xmm0,%xmm2 + por %xmm1,%xmm3 + + movdqa %xmm4,%xmm0 + movdqa %xmm4,%xmm1 + pandn %xmm2,%xmm0 + movdqa %xmm4,%xmm2 + pandn %xmm3,%xmm1 + movdqa %xmm4,%xmm3 + pand 352(%rsp),%xmm2 + pand 352+16(%rsp),%xmm3 + por %xmm0,%xmm2 + por %xmm1,%xmm3 + movdqu %xmm2,32(%rdi) + movdqu %xmm3,48(%rdi) + + addq $480+8,%rsp + popq %r15 + popq %r14 + popq %r13 + popq %r12 + popq %rbx + popq %rbp + .byte 0xf3,0xc3 diff --git a/deps/openssl/asm/x64-macosx-gas/md5/md5-x86_64.s b/deps/openssl/asm/x64-macosx-gas/md5/md5-x86_64.s index cdecac7b4c8a1e..712a871d343760 100644 --- a/deps/openssl/asm/x64-macosx-gas/md5/md5-x86_64.s +++ b/deps/openssl/asm/x64-macosx-gas/md5/md5-x86_64.s @@ -1,5 +1,4 @@ .text - .p2align 4 .globl _md5_block_asm_data_order @@ -33,7 +32,6 @@ L$prologue: je L$end - L$loop: movl %eax,%r8d movl %ebx,%r9d @@ -653,7 +651,6 @@ L$loop: jb L$loop - L$end: movl %eax,0(%rbp) movl %ebx,4(%rbp) diff --git a/deps/openssl/asm/x64-macosx-gas/modes/aesni-gcm-x86_64.s b/deps/openssl/asm/x64-macosx-gas/modes/aesni-gcm-x86_64.s new file mode 100644 index 00000000000000..86665d6e995628 --- /dev/null +++ b/deps/openssl/asm/x64-macosx-gas/modes/aesni-gcm-x86_64.s @@ -0,0 +1,753 @@ +.text + + +.p2align 5 +_aesni_ctr32_ghash_6x: + vmovdqu 32(%r11),%xmm2 + subq $6,%rdx + vpxor %xmm4,%xmm4,%xmm4 + vmovdqu 0-128(%rcx),%xmm15 + vpaddb %xmm2,%xmm1,%xmm10 + vpaddb %xmm2,%xmm10,%xmm11 + vpaddb %xmm2,%xmm11,%xmm12 + vpaddb %xmm2,%xmm12,%xmm13 + vpaddb %xmm2,%xmm13,%xmm14 + vpxor %xmm15,%xmm1,%xmm9 + vmovdqu %xmm4,16+8(%rsp) + jmp L$oop6x + +.p2align 5 +L$oop6x: + addl $100663296,%ebx + jc L$handle_ctr32 + vmovdqu 0-32(%r9),%xmm3 + vpaddb %xmm2,%xmm14,%xmm1 + vpxor %xmm15,%xmm10,%xmm10 + vpxor %xmm15,%xmm11,%xmm11 + +L$resume_ctr32: + vmovdqu %xmm1,(%r8) + vpclmulqdq $16,%xmm3,%xmm7,%xmm5 + vpxor %xmm15,%xmm12,%xmm12 + vmovups 16-128(%rcx),%xmm2 + vpclmulqdq $1,%xmm3,%xmm7,%xmm6 + xorq %r12,%r12 + cmpq %r14,%r15 + + vaesenc %xmm2,%xmm9,%xmm9 + vmovdqu 48+8(%rsp),%xmm0 + vpxor %xmm15,%xmm13,%xmm13 + vpclmulqdq $0,%xmm3,%xmm7,%xmm1 + vaesenc %xmm2,%xmm10,%xmm10 + vpxor %xmm15,%xmm14,%xmm14 + setnc %r12b + vpclmulqdq $17,%xmm3,%xmm7,%xmm7 + vaesenc %xmm2,%xmm11,%xmm11 + vmovdqu 16-32(%r9),%xmm3 + negq %r12 + vaesenc %xmm2,%xmm12,%xmm12 + vpxor %xmm5,%xmm6,%xmm6 + vpclmulqdq $0,%xmm3,%xmm0,%xmm5 + vpxor %xmm4,%xmm8,%xmm8 + vaesenc %xmm2,%xmm13,%xmm13 + vpxor %xmm5,%xmm1,%xmm4 + andq $96,%r12 + vmovups 32-128(%rcx),%xmm15 + vpclmulqdq $16,%xmm3,%xmm0,%xmm1 + vaesenc %xmm2,%xmm14,%xmm14 + + vpclmulqdq $1,%xmm3,%xmm0,%xmm2 + leaq (%r14,%r12,1),%r14 + vaesenc %xmm15,%xmm9,%xmm9 + vpxor 16+8(%rsp),%xmm8,%xmm8 + vpclmulqdq $17,%xmm3,%xmm0,%xmm3 + vmovdqu 64+8(%rsp),%xmm0 + vaesenc %xmm15,%xmm10,%xmm10 + movbeq 88(%r14),%r13 + vaesenc %xmm15,%xmm11,%xmm11 + movbeq 80(%r14),%r12 + vaesenc %xmm15,%xmm12,%xmm12 + movq %r13,32+8(%rsp) + vaesenc %xmm15,%xmm13,%xmm13 + movq %r12,40+8(%rsp) + vmovdqu 48-32(%r9),%xmm5 + vaesenc %xmm15,%xmm14,%xmm14 + + vmovups 48-128(%rcx),%xmm15 + vpxor %xmm1,%xmm6,%xmm6 + vpclmulqdq $0,%xmm5,%xmm0,%xmm1 + vaesenc %xmm15,%xmm9,%xmm9 + vpxor %xmm2,%xmm6,%xmm6 + vpclmulqdq $16,%xmm5,%xmm0,%xmm2 + vaesenc %xmm15,%xmm10,%xmm10 + vpxor %xmm3,%xmm7,%xmm7 + vpclmulqdq $1,%xmm5,%xmm0,%xmm3 + vaesenc %xmm15,%xmm11,%xmm11 + vpclmulqdq $17,%xmm5,%xmm0,%xmm5 + vmovdqu 80+8(%rsp),%xmm0 + vaesenc %xmm15,%xmm12,%xmm12 + vaesenc %xmm15,%xmm13,%xmm13 + vpxor %xmm1,%xmm4,%xmm4 + vmovdqu 64-32(%r9),%xmm1 + vaesenc %xmm15,%xmm14,%xmm14 + + vmovups 64-128(%rcx),%xmm15 + vpxor %xmm2,%xmm6,%xmm6 + vpclmulqdq $0,%xmm1,%xmm0,%xmm2 + vaesenc %xmm15,%xmm9,%xmm9 + vpxor %xmm3,%xmm6,%xmm6 + vpclmulqdq $16,%xmm1,%xmm0,%xmm3 + vaesenc %xmm15,%xmm10,%xmm10 + movbeq 72(%r14),%r13 + vpxor %xmm5,%xmm7,%xmm7 + vpclmulqdq $1,%xmm1,%xmm0,%xmm5 + vaesenc %xmm15,%xmm11,%xmm11 + movbeq 64(%r14),%r12 + vpclmulqdq $17,%xmm1,%xmm0,%xmm1 + vmovdqu 96+8(%rsp),%xmm0 + vaesenc %xmm15,%xmm12,%xmm12 + movq %r13,48+8(%rsp) + vaesenc %xmm15,%xmm13,%xmm13 + movq %r12,56+8(%rsp) + vpxor %xmm2,%xmm4,%xmm4 + vmovdqu 96-32(%r9),%xmm2 + vaesenc %xmm15,%xmm14,%xmm14 + + vmovups 80-128(%rcx),%xmm15 + vpxor %xmm3,%xmm6,%xmm6 + vpclmulqdq $0,%xmm2,%xmm0,%xmm3 + vaesenc %xmm15,%xmm9,%xmm9 + vpxor %xmm5,%xmm6,%xmm6 + vpclmulqdq $16,%xmm2,%xmm0,%xmm5 + vaesenc %xmm15,%xmm10,%xmm10 + movbeq 56(%r14),%r13 + vpxor %xmm1,%xmm7,%xmm7 + vpclmulqdq $1,%xmm2,%xmm0,%xmm1 + vpxor 112+8(%rsp),%xmm8,%xmm8 + vaesenc %xmm15,%xmm11,%xmm11 + movbeq 48(%r14),%r12 + vpclmulqdq $17,%xmm2,%xmm0,%xmm2 + vaesenc %xmm15,%xmm12,%xmm12 + movq %r13,64+8(%rsp) + vaesenc %xmm15,%xmm13,%xmm13 + movq %r12,72+8(%rsp) + vpxor %xmm3,%xmm4,%xmm4 + vmovdqu 112-32(%r9),%xmm3 + vaesenc %xmm15,%xmm14,%xmm14 + + vmovups 96-128(%rcx),%xmm15 + vpxor %xmm5,%xmm6,%xmm6 + vpclmulqdq $16,%xmm3,%xmm8,%xmm5 + vaesenc %xmm15,%xmm9,%xmm9 + vpxor %xmm1,%xmm6,%xmm6 + vpclmulqdq $1,%xmm3,%xmm8,%xmm1 + vaesenc %xmm15,%xmm10,%xmm10 + movbeq 40(%r14),%r13 + vpxor %xmm2,%xmm7,%xmm7 + vpclmulqdq $0,%xmm3,%xmm8,%xmm2 + vaesenc %xmm15,%xmm11,%xmm11 + movbeq 32(%r14),%r12 + vpclmulqdq $17,%xmm3,%xmm8,%xmm8 + vaesenc %xmm15,%xmm12,%xmm12 + movq %r13,80+8(%rsp) + vaesenc %xmm15,%xmm13,%xmm13 + movq %r12,88+8(%rsp) + vpxor %xmm5,%xmm6,%xmm6 + vaesenc %xmm15,%xmm14,%xmm14 + vpxor %xmm1,%xmm6,%xmm6 + + vmovups 112-128(%rcx),%xmm15 + vpslldq $8,%xmm6,%xmm5 + vpxor %xmm2,%xmm4,%xmm4 + vmovdqu 16(%r11),%xmm3 + + vaesenc %xmm15,%xmm9,%xmm9 + vpxor %xmm8,%xmm7,%xmm7 + vaesenc %xmm15,%xmm10,%xmm10 + vpxor %xmm5,%xmm4,%xmm4 + movbeq 24(%r14),%r13 + vaesenc %xmm15,%xmm11,%xmm11 + movbeq 16(%r14),%r12 + vpalignr $8,%xmm4,%xmm4,%xmm0 + vpclmulqdq $16,%xmm3,%xmm4,%xmm4 + movq %r13,96+8(%rsp) + vaesenc %xmm15,%xmm12,%xmm12 + movq %r12,104+8(%rsp) + vaesenc %xmm15,%xmm13,%xmm13 + vmovups 128-128(%rcx),%xmm1 + vaesenc %xmm15,%xmm14,%xmm14 + + vaesenc %xmm1,%xmm9,%xmm9 + vmovups 144-128(%rcx),%xmm15 + vaesenc %xmm1,%xmm10,%xmm10 + vpsrldq $8,%xmm6,%xmm6 + vaesenc %xmm1,%xmm11,%xmm11 + vpxor %xmm6,%xmm7,%xmm7 + vaesenc %xmm1,%xmm12,%xmm12 + vpxor %xmm0,%xmm4,%xmm4 + movbeq 8(%r14),%r13 + vaesenc %xmm1,%xmm13,%xmm13 + movbeq 0(%r14),%r12 + vaesenc %xmm1,%xmm14,%xmm14 + vmovups 160-128(%rcx),%xmm1 + cmpl $11,%ebp + jb L$enc_tail + + vaesenc %xmm15,%xmm9,%xmm9 + vaesenc %xmm15,%xmm10,%xmm10 + vaesenc %xmm15,%xmm11,%xmm11 + vaesenc %xmm15,%xmm12,%xmm12 + vaesenc %xmm15,%xmm13,%xmm13 + vaesenc %xmm15,%xmm14,%xmm14 + + vaesenc %xmm1,%xmm9,%xmm9 + vaesenc %xmm1,%xmm10,%xmm10 + vaesenc %xmm1,%xmm11,%xmm11 + vaesenc %xmm1,%xmm12,%xmm12 + vaesenc %xmm1,%xmm13,%xmm13 + vmovups 176-128(%rcx),%xmm15 + vaesenc %xmm1,%xmm14,%xmm14 + vmovups 192-128(%rcx),%xmm1 + je L$enc_tail + + vaesenc %xmm15,%xmm9,%xmm9 + vaesenc %xmm15,%xmm10,%xmm10 + vaesenc %xmm15,%xmm11,%xmm11 + vaesenc %xmm15,%xmm12,%xmm12 + vaesenc %xmm15,%xmm13,%xmm13 + vaesenc %xmm15,%xmm14,%xmm14 + + vaesenc %xmm1,%xmm9,%xmm9 + vaesenc %xmm1,%xmm10,%xmm10 + vaesenc %xmm1,%xmm11,%xmm11 + vaesenc %xmm1,%xmm12,%xmm12 + vaesenc %xmm1,%xmm13,%xmm13 + vmovups 208-128(%rcx),%xmm15 + vaesenc %xmm1,%xmm14,%xmm14 + vmovups 224-128(%rcx),%xmm1 + jmp L$enc_tail + +.p2align 5 +L$handle_ctr32: + vmovdqu (%r11),%xmm0 + vpshufb %xmm0,%xmm1,%xmm6 + vmovdqu 48(%r11),%xmm5 + vpaddd 64(%r11),%xmm6,%xmm10 + vpaddd %xmm5,%xmm6,%xmm11 + vmovdqu 0-32(%r9),%xmm3 + vpaddd %xmm5,%xmm10,%xmm12 + vpshufb %xmm0,%xmm10,%xmm10 + vpaddd %xmm5,%xmm11,%xmm13 + vpshufb %xmm0,%xmm11,%xmm11 + vpxor %xmm15,%xmm10,%xmm10 + vpaddd %xmm5,%xmm12,%xmm14 + vpshufb %xmm0,%xmm12,%xmm12 + vpxor %xmm15,%xmm11,%xmm11 + vpaddd %xmm5,%xmm13,%xmm1 + vpshufb %xmm0,%xmm13,%xmm13 + vpshufb %xmm0,%xmm14,%xmm14 + vpshufb %xmm0,%xmm1,%xmm1 + jmp L$resume_ctr32 + +.p2align 5 +L$enc_tail: + vaesenc %xmm15,%xmm9,%xmm9 + vmovdqu %xmm7,16+8(%rsp) + vpalignr $8,%xmm4,%xmm4,%xmm8 + vaesenc %xmm15,%xmm10,%xmm10 + vpclmulqdq $16,%xmm3,%xmm4,%xmm4 + vpxor 0(%rdi),%xmm1,%xmm2 + vaesenc %xmm15,%xmm11,%xmm11 + vpxor 16(%rdi),%xmm1,%xmm0 + vaesenc %xmm15,%xmm12,%xmm12 + vpxor 32(%rdi),%xmm1,%xmm5 + vaesenc %xmm15,%xmm13,%xmm13 + vpxor 48(%rdi),%xmm1,%xmm6 + vaesenc %xmm15,%xmm14,%xmm14 + vpxor 64(%rdi),%xmm1,%xmm7 + vpxor 80(%rdi),%xmm1,%xmm3 + vmovdqu (%r8),%xmm1 + + vaesenclast %xmm2,%xmm9,%xmm9 + vmovdqu 32(%r11),%xmm2 + vaesenclast %xmm0,%xmm10,%xmm10 + vpaddb %xmm2,%xmm1,%xmm0 + movq %r13,112+8(%rsp) + leaq 96(%rdi),%rdi + vaesenclast %xmm5,%xmm11,%xmm11 + vpaddb %xmm2,%xmm0,%xmm5 + movq %r12,120+8(%rsp) + leaq 96(%rsi),%rsi + vmovdqu 0-128(%rcx),%xmm15 + vaesenclast %xmm6,%xmm12,%xmm12 + vpaddb %xmm2,%xmm5,%xmm6 + vaesenclast %xmm7,%xmm13,%xmm13 + vpaddb %xmm2,%xmm6,%xmm7 + vaesenclast %xmm3,%xmm14,%xmm14 + vpaddb %xmm2,%xmm7,%xmm3 + + addq $96,%r10 + subq $6,%rdx + jc L$6x_done + + vmovups %xmm9,-96(%rsi) + vpxor %xmm15,%xmm1,%xmm9 + vmovups %xmm10,-80(%rsi) + vmovdqa %xmm0,%xmm10 + vmovups %xmm11,-64(%rsi) + vmovdqa %xmm5,%xmm11 + vmovups %xmm12,-48(%rsi) + vmovdqa %xmm6,%xmm12 + vmovups %xmm13,-32(%rsi) + vmovdqa %xmm7,%xmm13 + vmovups %xmm14,-16(%rsi) + vmovdqa %xmm3,%xmm14 + vmovdqu 32+8(%rsp),%xmm7 + jmp L$oop6x + +L$6x_done: + vpxor 16+8(%rsp),%xmm8,%xmm8 + vpxor %xmm4,%xmm8,%xmm8 + + .byte 0xf3,0xc3 + +.globl _aesni_gcm_decrypt + +.p2align 5 +_aesni_gcm_decrypt: + xorq %r10,%r10 + cmpq $96,%rdx + jb L$gcm_dec_abort + + leaq (%rsp),%rax + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + vzeroupper + + vmovdqu (%r8),%xmm1 + addq $-128,%rsp + movl 12(%r8),%ebx + leaq L$bswap_mask(%rip),%r11 + leaq -128(%rcx),%r14 + movq $3968,%r15 + vmovdqu (%r9),%xmm8 + andq $-128,%rsp + vmovdqu (%r11),%xmm0 + leaq 128(%rcx),%rcx + leaq 32+32(%r9),%r9 + movl 240-128(%rcx),%ebp + vpshufb %xmm0,%xmm8,%xmm8 + + andq %r15,%r14 + andq %rsp,%r15 + subq %r14,%r15 + jc L$dec_no_key_aliasing + cmpq $768,%r15 + jnc L$dec_no_key_aliasing + subq %r15,%rsp +L$dec_no_key_aliasing: + + vmovdqu 80(%rdi),%xmm7 + leaq (%rdi),%r14 + vmovdqu 64(%rdi),%xmm4 + leaq -192(%rdi,%rdx,1),%r15 + vmovdqu 48(%rdi),%xmm5 + shrq $4,%rdx + xorq %r10,%r10 + vmovdqu 32(%rdi),%xmm6 + vpshufb %xmm0,%xmm7,%xmm7 + vmovdqu 16(%rdi),%xmm2 + vpshufb %xmm0,%xmm4,%xmm4 + vmovdqu (%rdi),%xmm3 + vpshufb %xmm0,%xmm5,%xmm5 + vmovdqu %xmm4,48(%rsp) + vpshufb %xmm0,%xmm6,%xmm6 + vmovdqu %xmm5,64(%rsp) + vpshufb %xmm0,%xmm2,%xmm2 + vmovdqu %xmm6,80(%rsp) + vpshufb %xmm0,%xmm3,%xmm3 + vmovdqu %xmm2,96(%rsp) + vmovdqu %xmm3,112(%rsp) + + call _aesni_ctr32_ghash_6x + + vmovups %xmm9,-96(%rsi) + vmovups %xmm10,-80(%rsi) + vmovups %xmm11,-64(%rsi) + vmovups %xmm12,-48(%rsi) + vmovups %xmm13,-32(%rsi) + vmovups %xmm14,-16(%rsi) + + vpshufb (%r11),%xmm8,%xmm8 + vmovdqu %xmm8,-64(%r9) + + vzeroupper + movq -48(%rax),%r15 + movq -40(%rax),%r14 + movq -32(%rax),%r13 + movq -24(%rax),%r12 + movq -16(%rax),%rbp + movq -8(%rax),%rbx + leaq (%rax),%rsp +L$gcm_dec_abort: + movq %r10,%rax + .byte 0xf3,0xc3 + + +.p2align 5 +_aesni_ctr32_6x: + vmovdqu 0-128(%rcx),%xmm4 + vmovdqu 32(%r11),%xmm2 + leaq -1(%rbp),%r13 + vmovups 16-128(%rcx),%xmm15 + leaq 32-128(%rcx),%r12 + vpxor %xmm4,%xmm1,%xmm9 + addl $100663296,%ebx + jc L$handle_ctr32_2 + vpaddb %xmm2,%xmm1,%xmm10 + vpaddb %xmm2,%xmm10,%xmm11 + vpxor %xmm4,%xmm10,%xmm10 + vpaddb %xmm2,%xmm11,%xmm12 + vpxor %xmm4,%xmm11,%xmm11 + vpaddb %xmm2,%xmm12,%xmm13 + vpxor %xmm4,%xmm12,%xmm12 + vpaddb %xmm2,%xmm13,%xmm14 + vpxor %xmm4,%xmm13,%xmm13 + vpaddb %xmm2,%xmm14,%xmm1 + vpxor %xmm4,%xmm14,%xmm14 + jmp L$oop_ctr32 + +.p2align 4 +L$oop_ctr32: + vaesenc %xmm15,%xmm9,%xmm9 + vaesenc %xmm15,%xmm10,%xmm10 + vaesenc %xmm15,%xmm11,%xmm11 + vaesenc %xmm15,%xmm12,%xmm12 + vaesenc %xmm15,%xmm13,%xmm13 + vaesenc %xmm15,%xmm14,%xmm14 + vmovups (%r12),%xmm15 + leaq 16(%r12),%r12 + decl %r13d + jnz L$oop_ctr32 + + vmovdqu (%r12),%xmm3 + vaesenc %xmm15,%xmm9,%xmm9 + vpxor 0(%rdi),%xmm3,%xmm4 + vaesenc %xmm15,%xmm10,%xmm10 + vpxor 16(%rdi),%xmm3,%xmm5 + vaesenc %xmm15,%xmm11,%xmm11 + vpxor 32(%rdi),%xmm3,%xmm6 + vaesenc %xmm15,%xmm12,%xmm12 + vpxor 48(%rdi),%xmm3,%xmm8 + vaesenc %xmm15,%xmm13,%xmm13 + vpxor 64(%rdi),%xmm3,%xmm2 + vaesenc %xmm15,%xmm14,%xmm14 + vpxor 80(%rdi),%xmm3,%xmm3 + leaq 96(%rdi),%rdi + + vaesenclast %xmm4,%xmm9,%xmm9 + vaesenclast %xmm5,%xmm10,%xmm10 + vaesenclast %xmm6,%xmm11,%xmm11 + vaesenclast %xmm8,%xmm12,%xmm12 + vaesenclast %xmm2,%xmm13,%xmm13 + vaesenclast %xmm3,%xmm14,%xmm14 + vmovups %xmm9,0(%rsi) + vmovups %xmm10,16(%rsi) + vmovups %xmm11,32(%rsi) + vmovups %xmm12,48(%rsi) + vmovups %xmm13,64(%rsi) + vmovups %xmm14,80(%rsi) + leaq 96(%rsi),%rsi + + .byte 0xf3,0xc3 +.p2align 5 +L$handle_ctr32_2: + vpshufb %xmm0,%xmm1,%xmm6 + vmovdqu 48(%r11),%xmm5 + vpaddd 64(%r11),%xmm6,%xmm10 + vpaddd %xmm5,%xmm6,%xmm11 + vpaddd %xmm5,%xmm10,%xmm12 + vpshufb %xmm0,%xmm10,%xmm10 + vpaddd %xmm5,%xmm11,%xmm13 + vpshufb %xmm0,%xmm11,%xmm11 + vpxor %xmm4,%xmm10,%xmm10 + vpaddd %xmm5,%xmm12,%xmm14 + vpshufb %xmm0,%xmm12,%xmm12 + vpxor %xmm4,%xmm11,%xmm11 + vpaddd %xmm5,%xmm13,%xmm1 + vpshufb %xmm0,%xmm13,%xmm13 + vpxor %xmm4,%xmm12,%xmm12 + vpshufb %xmm0,%xmm14,%xmm14 + vpxor %xmm4,%xmm13,%xmm13 + vpshufb %xmm0,%xmm1,%xmm1 + vpxor %xmm4,%xmm14,%xmm14 + jmp L$oop_ctr32 + + +.globl _aesni_gcm_encrypt + +.p2align 5 +_aesni_gcm_encrypt: + xorq %r10,%r10 + cmpq $288,%rdx + jb L$gcm_enc_abort + + leaq (%rsp),%rax + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + vzeroupper + + vmovdqu (%r8),%xmm1 + addq $-128,%rsp + movl 12(%r8),%ebx + leaq L$bswap_mask(%rip),%r11 + leaq -128(%rcx),%r14 + movq $3968,%r15 + leaq 128(%rcx),%rcx + vmovdqu (%r11),%xmm0 + andq $-128,%rsp + movl 240-128(%rcx),%ebp + + andq %r15,%r14 + andq %rsp,%r15 + subq %r14,%r15 + jc L$enc_no_key_aliasing + cmpq $768,%r15 + jnc L$enc_no_key_aliasing + subq %r15,%rsp +L$enc_no_key_aliasing: + + leaq (%rsi),%r14 + leaq -192(%rsi,%rdx,1),%r15 + shrq $4,%rdx + + call _aesni_ctr32_6x + vpshufb %xmm0,%xmm9,%xmm8 + vpshufb %xmm0,%xmm10,%xmm2 + vmovdqu %xmm8,112(%rsp) + vpshufb %xmm0,%xmm11,%xmm4 + vmovdqu %xmm2,96(%rsp) + vpshufb %xmm0,%xmm12,%xmm5 + vmovdqu %xmm4,80(%rsp) + vpshufb %xmm0,%xmm13,%xmm6 + vmovdqu %xmm5,64(%rsp) + vpshufb %xmm0,%xmm14,%xmm7 + vmovdqu %xmm6,48(%rsp) + + call _aesni_ctr32_6x + + vmovdqu (%r9),%xmm8 + leaq 32+32(%r9),%r9 + subq $12,%rdx + movq $192,%r10 + vpshufb %xmm0,%xmm8,%xmm8 + + call _aesni_ctr32_ghash_6x + vmovdqu 32(%rsp),%xmm7 + vmovdqu (%r11),%xmm0 + vmovdqu 0-32(%r9),%xmm3 + vpunpckhqdq %xmm7,%xmm7,%xmm1 + vmovdqu 32-32(%r9),%xmm15 + vmovups %xmm9,-96(%rsi) + vpshufb %xmm0,%xmm9,%xmm9 + vpxor %xmm7,%xmm1,%xmm1 + vmovups %xmm10,-80(%rsi) + vpshufb %xmm0,%xmm10,%xmm10 + vmovups %xmm11,-64(%rsi) + vpshufb %xmm0,%xmm11,%xmm11 + vmovups %xmm12,-48(%rsi) + vpshufb %xmm0,%xmm12,%xmm12 + vmovups %xmm13,-32(%rsi) + vpshufb %xmm0,%xmm13,%xmm13 + vmovups %xmm14,-16(%rsi) + vpshufb %xmm0,%xmm14,%xmm14 + vmovdqu %xmm9,16(%rsp) + vmovdqu 48(%rsp),%xmm6 + vmovdqu 16-32(%r9),%xmm0 + vpunpckhqdq %xmm6,%xmm6,%xmm2 + vpclmulqdq $0,%xmm3,%xmm7,%xmm5 + vpxor %xmm6,%xmm2,%xmm2 + vpclmulqdq $17,%xmm3,%xmm7,%xmm7 + vpclmulqdq $0,%xmm15,%xmm1,%xmm1 + + vmovdqu 64(%rsp),%xmm9 + vpclmulqdq $0,%xmm0,%xmm6,%xmm4 + vmovdqu 48-32(%r9),%xmm3 + vpxor %xmm5,%xmm4,%xmm4 + vpunpckhqdq %xmm9,%xmm9,%xmm5 + vpclmulqdq $17,%xmm0,%xmm6,%xmm6 + vpxor %xmm9,%xmm5,%xmm5 + vpxor %xmm7,%xmm6,%xmm6 + vpclmulqdq $16,%xmm15,%xmm2,%xmm2 + vmovdqu 80-32(%r9),%xmm15 + vpxor %xmm1,%xmm2,%xmm2 + + vmovdqu 80(%rsp),%xmm1 + vpclmulqdq $0,%xmm3,%xmm9,%xmm7 + vmovdqu 64-32(%r9),%xmm0 + vpxor %xmm4,%xmm7,%xmm7 + vpunpckhqdq %xmm1,%xmm1,%xmm4 + vpclmulqdq $17,%xmm3,%xmm9,%xmm9 + vpxor %xmm1,%xmm4,%xmm4 + vpxor %xmm6,%xmm9,%xmm9 + vpclmulqdq $0,%xmm15,%xmm5,%xmm5 + vpxor %xmm2,%xmm5,%xmm5 + + vmovdqu 96(%rsp),%xmm2 + vpclmulqdq $0,%xmm0,%xmm1,%xmm6 + vmovdqu 96-32(%r9),%xmm3 + vpxor %xmm7,%xmm6,%xmm6 + vpunpckhqdq %xmm2,%xmm2,%xmm7 + vpclmulqdq $17,%xmm0,%xmm1,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpxor %xmm9,%xmm1,%xmm1 + vpclmulqdq $16,%xmm15,%xmm4,%xmm4 + vmovdqu 128-32(%r9),%xmm15 + vpxor %xmm5,%xmm4,%xmm4 + + vpxor 112(%rsp),%xmm8,%xmm8 + vpclmulqdq $0,%xmm3,%xmm2,%xmm5 + vmovdqu 112-32(%r9),%xmm0 + vpunpckhqdq %xmm8,%xmm8,%xmm9 + vpxor %xmm6,%xmm5,%xmm5 + vpclmulqdq $17,%xmm3,%xmm2,%xmm2 + vpxor %xmm8,%xmm9,%xmm9 + vpxor %xmm1,%xmm2,%xmm2 + vpclmulqdq $0,%xmm15,%xmm7,%xmm7 + vpxor %xmm4,%xmm7,%xmm4 + + vpclmulqdq $0,%xmm0,%xmm8,%xmm6 + vmovdqu 0-32(%r9),%xmm3 + vpunpckhqdq %xmm14,%xmm14,%xmm1 + vpclmulqdq $17,%xmm0,%xmm8,%xmm8 + vpxor %xmm14,%xmm1,%xmm1 + vpxor %xmm5,%xmm6,%xmm5 + vpclmulqdq $16,%xmm15,%xmm9,%xmm9 + vmovdqu 32-32(%r9),%xmm15 + vpxor %xmm2,%xmm8,%xmm7 + vpxor %xmm4,%xmm9,%xmm6 + + vmovdqu 16-32(%r9),%xmm0 + vpxor %xmm5,%xmm7,%xmm9 + vpclmulqdq $0,%xmm3,%xmm14,%xmm4 + vpxor %xmm9,%xmm6,%xmm6 + vpunpckhqdq %xmm13,%xmm13,%xmm2 + vpclmulqdq $17,%xmm3,%xmm14,%xmm14 + vpxor %xmm13,%xmm2,%xmm2 + vpslldq $8,%xmm6,%xmm9 + vpclmulqdq $0,%xmm15,%xmm1,%xmm1 + vpxor %xmm9,%xmm5,%xmm8 + vpsrldq $8,%xmm6,%xmm6 + vpxor %xmm6,%xmm7,%xmm7 + + vpclmulqdq $0,%xmm0,%xmm13,%xmm5 + vmovdqu 48-32(%r9),%xmm3 + vpxor %xmm4,%xmm5,%xmm5 + vpunpckhqdq %xmm12,%xmm12,%xmm9 + vpclmulqdq $17,%xmm0,%xmm13,%xmm13 + vpxor %xmm12,%xmm9,%xmm9 + vpxor %xmm14,%xmm13,%xmm13 + vpalignr $8,%xmm8,%xmm8,%xmm14 + vpclmulqdq $16,%xmm15,%xmm2,%xmm2 + vmovdqu 80-32(%r9),%xmm15 + vpxor %xmm1,%xmm2,%xmm2 + + vpclmulqdq $0,%xmm3,%xmm12,%xmm4 + vmovdqu 64-32(%r9),%xmm0 + vpxor %xmm5,%xmm4,%xmm4 + vpunpckhqdq %xmm11,%xmm11,%xmm1 + vpclmulqdq $17,%xmm3,%xmm12,%xmm12 + vpxor %xmm11,%xmm1,%xmm1 + vpxor %xmm13,%xmm12,%xmm12 + vxorps 16(%rsp),%xmm7,%xmm7 + vpclmulqdq $0,%xmm15,%xmm9,%xmm9 + vpxor %xmm2,%xmm9,%xmm9 + + vpclmulqdq $16,16(%r11),%xmm8,%xmm8 + vxorps %xmm14,%xmm8,%xmm8 + + vpclmulqdq $0,%xmm0,%xmm11,%xmm5 + vmovdqu 96-32(%r9),%xmm3 + vpxor %xmm4,%xmm5,%xmm5 + vpunpckhqdq %xmm10,%xmm10,%xmm2 + vpclmulqdq $17,%xmm0,%xmm11,%xmm11 + vpxor %xmm10,%xmm2,%xmm2 + vpalignr $8,%xmm8,%xmm8,%xmm14 + vpxor %xmm12,%xmm11,%xmm11 + vpclmulqdq $16,%xmm15,%xmm1,%xmm1 + vmovdqu 128-32(%r9),%xmm15 + vpxor %xmm9,%xmm1,%xmm1 + + vxorps %xmm7,%xmm14,%xmm14 + vpclmulqdq $16,16(%r11),%xmm8,%xmm8 + vxorps %xmm14,%xmm8,%xmm8 + + vpclmulqdq $0,%xmm3,%xmm10,%xmm4 + vmovdqu 112-32(%r9),%xmm0 + vpxor %xmm5,%xmm4,%xmm4 + vpunpckhqdq %xmm8,%xmm8,%xmm9 + vpclmulqdq $17,%xmm3,%xmm10,%xmm10 + vpxor %xmm8,%xmm9,%xmm9 + vpxor %xmm11,%xmm10,%xmm10 + vpclmulqdq $0,%xmm15,%xmm2,%xmm2 + vpxor %xmm1,%xmm2,%xmm2 + + vpclmulqdq $0,%xmm0,%xmm8,%xmm5 + vpclmulqdq $17,%xmm0,%xmm8,%xmm7 + vpxor %xmm4,%xmm5,%xmm5 + vpclmulqdq $16,%xmm15,%xmm9,%xmm6 + vpxor %xmm10,%xmm7,%xmm7 + vpxor %xmm2,%xmm6,%xmm6 + + vpxor %xmm5,%xmm7,%xmm4 + vpxor %xmm4,%xmm6,%xmm6 + vpslldq $8,%xmm6,%xmm1 + vmovdqu 16(%r11),%xmm3 + vpsrldq $8,%xmm6,%xmm6 + vpxor %xmm1,%xmm5,%xmm8 + vpxor %xmm6,%xmm7,%xmm7 + + vpalignr $8,%xmm8,%xmm8,%xmm2 + vpclmulqdq $16,%xmm3,%xmm8,%xmm8 + vpxor %xmm2,%xmm8,%xmm8 + + vpalignr $8,%xmm8,%xmm8,%xmm2 + vpclmulqdq $16,%xmm3,%xmm8,%xmm8 + vpxor %xmm7,%xmm2,%xmm2 + vpxor %xmm2,%xmm8,%xmm8 + vpshufb (%r11),%xmm8,%xmm8 + vmovdqu %xmm8,-64(%r9) + + vzeroupper + movq -48(%rax),%r15 + movq -40(%rax),%r14 + movq -32(%rax),%r13 + movq -24(%rax),%r12 + movq -16(%rax),%rbp + movq -8(%rax),%rbx + leaq (%rax),%rsp +L$gcm_enc_abort: + movq %r10,%rax + .byte 0xf3,0xc3 + +.p2align 6 +L$bswap_mask: +.byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0 +L$poly: +.byte 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xc2 +L$one_msb: +.byte 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1 +L$two_lsb: +.byte 2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 +L$one_lsb: +.byte 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 +.byte 65,69,83,45,78,73,32,71,67,77,32,109,111,100,117,108,101,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 +.p2align 6 diff --git a/deps/openssl/asm/x64-macosx-gas/modes/ghash-x86_64.s b/deps/openssl/asm/x64-macosx-gas/modes/ghash-x86_64.s index 189f17f450c0b0..09ac73bc97a349 100644 --- a/deps/openssl/asm/x64-macosx-gas/modes/ghash-x86_64.s +++ b/deps/openssl/asm/x64-macosx-gas/modes/ghash-x86_64.s @@ -659,6 +659,7 @@ L$ghash_epilogue: .p2align 4 _gcm_init_clmul: +L$_init_clmul: movdqu (%rsi),%xmm2 pshufd $78,%xmm2,%xmm2 @@ -677,15 +678,15 @@ _gcm_init_clmul: pxor %xmm5,%xmm2 + pshufd $78,%xmm2,%xmm6 movdqa %xmm2,%xmm0 + pxor %xmm2,%xmm6 movdqa %xmm0,%xmm1 pshufd $78,%xmm0,%xmm3 - pshufd $78,%xmm2,%xmm4 pxor %xmm0,%xmm3 - pxor %xmm2,%xmm4 .byte 102,15,58,68,194,0 .byte 102,15,58,68,202,17 -.byte 102,15,58,68,220,0 +.byte 102,15,58,68,222,0 pxor %xmm0,%xmm3 pxor %xmm1,%xmm3 @@ -695,44 +696,134 @@ _gcm_init_clmul: pxor %xmm3,%xmm1 pxor %xmm4,%xmm0 + movdqa %xmm0,%xmm4 movdqa %xmm0,%xmm3 + psllq $5,%xmm0 + pxor %xmm0,%xmm3 psllq $1,%xmm0 pxor %xmm3,%xmm0 + psllq $57,%xmm0 + movdqa %xmm0,%xmm3 + pslldq $8,%xmm0 + psrldq $8,%xmm3 + pxor %xmm4,%xmm0 + pxor %xmm3,%xmm1 + + + movdqa %xmm0,%xmm4 + psrlq $1,%xmm0 + pxor %xmm4,%xmm1 + pxor %xmm0,%xmm4 + psrlq $5,%xmm0 + pxor %xmm4,%xmm0 + psrlq $1,%xmm0 + pxor %xmm1,%xmm0 + pshufd $78,%xmm2,%xmm3 + pshufd $78,%xmm0,%xmm4 + pxor %xmm2,%xmm3 + movdqu %xmm2,0(%rdi) + pxor %xmm0,%xmm4 + movdqu %xmm0,16(%rdi) +.byte 102,15,58,15,227,8 + movdqu %xmm4,32(%rdi) + movdqa %xmm0,%xmm1 + pshufd $78,%xmm0,%xmm3 + pxor %xmm0,%xmm3 +.byte 102,15,58,68,194,0 +.byte 102,15,58,68,202,17 +.byte 102,15,58,68,222,0 + pxor %xmm0,%xmm3 + pxor %xmm1,%xmm3 + + movdqa %xmm3,%xmm4 + psrldq $8,%xmm3 + pslldq $8,%xmm4 + pxor %xmm3,%xmm1 + pxor %xmm4,%xmm0 + + movdqa %xmm0,%xmm4 + movdqa %xmm0,%xmm3 psllq $5,%xmm0 + pxor %xmm0,%xmm3 + psllq $1,%xmm0 pxor %xmm3,%xmm0 psllq $57,%xmm0 - movdqa %xmm0,%xmm4 + movdqa %xmm0,%xmm3 pslldq $8,%xmm0 - psrldq $8,%xmm4 - pxor %xmm3,%xmm0 - pxor %xmm4,%xmm1 + psrldq $8,%xmm3 + pxor %xmm4,%xmm0 + pxor %xmm3,%xmm1 movdqa %xmm0,%xmm4 + psrlq $1,%xmm0 + pxor %xmm4,%xmm1 + pxor %xmm0,%xmm4 psrlq $5,%xmm0 pxor %xmm4,%xmm0 psrlq $1,%xmm0 + pxor %xmm1,%xmm0 + movdqa %xmm0,%xmm5 + movdqa %xmm0,%xmm1 + pshufd $78,%xmm0,%xmm3 + pxor %xmm0,%xmm3 +.byte 102,15,58,68,194,0 +.byte 102,15,58,68,202,17 +.byte 102,15,58,68,222,0 + pxor %xmm0,%xmm3 + pxor %xmm1,%xmm3 + + movdqa %xmm3,%xmm4 + psrldq $8,%xmm3 + pslldq $8,%xmm4 + pxor %xmm3,%xmm1 pxor %xmm4,%xmm0 - pxor %xmm1,%xmm4 + + movdqa %xmm0,%xmm4 + movdqa %xmm0,%xmm3 + psllq $5,%xmm0 + pxor %xmm0,%xmm3 + psllq $1,%xmm0 + pxor %xmm3,%xmm0 + psllq $57,%xmm0 + movdqa %xmm0,%xmm3 + pslldq $8,%xmm0 + psrldq $8,%xmm3 + pxor %xmm4,%xmm0 + pxor %xmm3,%xmm1 + + + movdqa %xmm0,%xmm4 psrlq $1,%xmm0 + pxor %xmm4,%xmm1 + pxor %xmm0,%xmm4 + psrlq $5,%xmm0 pxor %xmm4,%xmm0 - movdqu %xmm2,(%rdi) - movdqu %xmm0,16(%rdi) + psrlq $1,%xmm0 + pxor %xmm1,%xmm0 + pshufd $78,%xmm5,%xmm3 + pshufd $78,%xmm0,%xmm4 + pxor %xmm5,%xmm3 + movdqu %xmm5,48(%rdi) + pxor %xmm0,%xmm4 + movdqu %xmm0,64(%rdi) +.byte 102,15,58,15,227,8 + movdqu %xmm4,80(%rdi) .byte 0xf3,0xc3 .globl _gcm_gmult_clmul .p2align 4 _gcm_gmult_clmul: +L$_gmult_clmul: movdqu (%rdi),%xmm0 movdqa L$bswap_mask(%rip),%xmm5 movdqu (%rsi),%xmm2 + movdqu 32(%rsi),%xmm4 .byte 102,15,56,0,197 movdqa %xmm0,%xmm1 pshufd $78,%xmm0,%xmm3 - pshufd $78,%xmm2,%xmm4 pxor %xmm0,%xmm3 - pxor %xmm2,%xmm4 .byte 102,15,58,68,194,0 .byte 102,15,58,68,202,17 .byte 102,15,58,68,220,0 @@ -745,201 +836,379 @@ _gcm_gmult_clmul: pxor %xmm3,%xmm1 pxor %xmm4,%xmm0 + movdqa %xmm0,%xmm4 movdqa %xmm0,%xmm3 - psllq $1,%xmm0 - pxor %xmm3,%xmm0 psllq $5,%xmm0 + pxor %xmm0,%xmm3 + psllq $1,%xmm0 pxor %xmm3,%xmm0 psllq $57,%xmm0 - movdqa %xmm0,%xmm4 + movdqa %xmm0,%xmm3 pslldq $8,%xmm0 - psrldq $8,%xmm4 - pxor %xmm3,%xmm0 - pxor %xmm4,%xmm1 + psrldq $8,%xmm3 + pxor %xmm4,%xmm0 + pxor %xmm3,%xmm1 movdqa %xmm0,%xmm4 - psrlq $5,%xmm0 - pxor %xmm4,%xmm0 psrlq $1,%xmm0 + pxor %xmm4,%xmm1 + pxor %xmm0,%xmm4 + psrlq $5,%xmm0 pxor %xmm4,%xmm0 - pxor %xmm1,%xmm4 psrlq $1,%xmm0 - pxor %xmm4,%xmm0 + pxor %xmm1,%xmm0 .byte 102,15,56,0,197 movdqu %xmm0,(%rdi) .byte 0xf3,0xc3 .globl _gcm_ghash_clmul -.p2align 4 +.p2align 5 _gcm_ghash_clmul: - movdqa L$bswap_mask(%rip),%xmm5 +L$_ghash_clmul: + movdqa L$bswap_mask(%rip),%xmm10 movdqu (%rdi),%xmm0 movdqu (%rsi),%xmm2 -.byte 102,15,56,0,197 + movdqu 32(%rsi),%xmm7 +.byte 102,65,15,56,0,194 subq $16,%rcx jz L$odd_tail - movdqu 16(%rsi),%xmm8 + movdqu 16(%rsi),%xmm6 + movl _OPENSSL_ia32cap_P+4(%rip),%eax + cmpq $48,%rcx + jb L$skip4x + + andl $71303168,%eax + cmpl $4194304,%eax + je L$skip4x + + subq $48,%rcx + movq $11547335547999543296,%rax + movdqu 48(%rsi),%xmm14 + movdqu 64(%rsi),%xmm15 + + + + + movdqu 48(%rdx),%xmm3 + movdqu 32(%rdx),%xmm11 +.byte 102,65,15,56,0,218 +.byte 102,69,15,56,0,218 + movdqa %xmm3,%xmm5 + pshufd $78,%xmm3,%xmm4 + pxor %xmm3,%xmm4 +.byte 102,15,58,68,218,0 +.byte 102,15,58,68,234,17 +.byte 102,15,58,68,231,0 + + movdqa %xmm11,%xmm13 + pshufd $78,%xmm11,%xmm12 + pxor %xmm11,%xmm12 +.byte 102,68,15,58,68,222,0 +.byte 102,68,15,58,68,238,17 +.byte 102,68,15,58,68,231,16 + xorps %xmm11,%xmm3 + xorps %xmm13,%xmm5 + movups 80(%rsi),%xmm7 + xorps %xmm12,%xmm4 + + movdqu 16(%rdx),%xmm11 + movdqu 0(%rdx),%xmm8 +.byte 102,69,15,56,0,218 +.byte 102,69,15,56,0,194 + movdqa %xmm11,%xmm13 + pshufd $78,%xmm11,%xmm12 + pxor %xmm8,%xmm0 + pxor %xmm11,%xmm12 +.byte 102,69,15,58,68,222,0 + movdqa %xmm0,%xmm1 + pshufd $78,%xmm0,%xmm8 + pxor %xmm0,%xmm8 +.byte 102,69,15,58,68,238,17 +.byte 102,68,15,58,68,231,0 + xorps %xmm11,%xmm3 + xorps %xmm13,%xmm5 + + leaq 64(%rdx),%rdx + subq $64,%rcx + jc L$tail4x + + jmp L$mod4_loop +.p2align 5 +L$mod4_loop: +.byte 102,65,15,58,68,199,0 + xorps %xmm12,%xmm4 + movdqu 48(%rdx),%xmm11 +.byte 102,69,15,56,0,218 +.byte 102,65,15,58,68,207,17 + xorps %xmm3,%xmm0 + movdqu 32(%rdx),%xmm3 + movdqa %xmm11,%xmm13 +.byte 102,68,15,58,68,199,16 + pshufd $78,%xmm11,%xmm12 + xorps %xmm5,%xmm1 + pxor %xmm11,%xmm12 +.byte 102,65,15,56,0,218 + movups 32(%rsi),%xmm7 + xorps %xmm4,%xmm8 +.byte 102,68,15,58,68,218,0 + pshufd $78,%xmm3,%xmm4 + + pxor %xmm0,%xmm8 + movdqa %xmm3,%xmm5 + pxor %xmm1,%xmm8 + pxor %xmm3,%xmm4 + movdqa %xmm8,%xmm9 +.byte 102,68,15,58,68,234,17 + pslldq $8,%xmm8 + psrldq $8,%xmm9 + pxor %xmm8,%xmm0 + movdqa L$7_mask(%rip),%xmm8 + pxor %xmm9,%xmm1 +.byte 102,76,15,110,200 + + pand %xmm0,%xmm8 +.byte 102,69,15,56,0,200 + pxor %xmm0,%xmm9 +.byte 102,68,15,58,68,231,0 + psllq $57,%xmm9 + movdqa %xmm9,%xmm8 + pslldq $8,%xmm9 +.byte 102,15,58,68,222,0 + psrldq $8,%xmm8 + pxor %xmm9,%xmm0 + pxor %xmm8,%xmm1 + movdqu 0(%rdx),%xmm8 + + movdqa %xmm0,%xmm9 + psrlq $1,%xmm0 +.byte 102,15,58,68,238,17 + xorps %xmm11,%xmm3 + movdqu 16(%rdx),%xmm11 +.byte 102,69,15,56,0,218 +.byte 102,15,58,68,231,16 + xorps %xmm13,%xmm5 + movups 80(%rsi),%xmm7 +.byte 102,69,15,56,0,194 + pxor %xmm9,%xmm1 + pxor %xmm0,%xmm9 + psrlq $5,%xmm0 + + movdqa %xmm11,%xmm13 + pxor %xmm12,%xmm4 + pshufd $78,%xmm11,%xmm12 + pxor %xmm9,%xmm0 + pxor %xmm8,%xmm1 + pxor %xmm11,%xmm12 +.byte 102,69,15,58,68,222,0 + psrlq $1,%xmm0 + pxor %xmm1,%xmm0 + movdqa %xmm0,%xmm1 +.byte 102,69,15,58,68,238,17 + xorps %xmm11,%xmm3 + pshufd $78,%xmm0,%xmm8 + pxor %xmm0,%xmm8 + +.byte 102,68,15,58,68,231,0 + xorps %xmm13,%xmm5 + + leaq 64(%rdx),%rdx + subq $64,%rcx + jnc L$mod4_loop + +L$tail4x: +.byte 102,65,15,58,68,199,0 +.byte 102,65,15,58,68,207,17 +.byte 102,68,15,58,68,199,16 + xorps %xmm12,%xmm4 + xorps %xmm3,%xmm0 + xorps %xmm5,%xmm1 + pxor %xmm0,%xmm1 + pxor %xmm4,%xmm8 + + pxor %xmm1,%xmm8 + pxor %xmm0,%xmm1 + + movdqa %xmm8,%xmm9 + psrldq $8,%xmm8 + pslldq $8,%xmm9 + pxor %xmm8,%xmm1 + pxor %xmm9,%xmm0 + + movdqa %xmm0,%xmm4 + movdqa %xmm0,%xmm3 + psllq $5,%xmm0 + pxor %xmm0,%xmm3 + psllq $1,%xmm0 + pxor %xmm3,%xmm0 + psllq $57,%xmm0 + movdqa %xmm0,%xmm3 + pslldq $8,%xmm0 + psrldq $8,%xmm3 + pxor %xmm4,%xmm0 + pxor %xmm3,%xmm1 + + movdqa %xmm0,%xmm4 + psrlq $1,%xmm0 + pxor %xmm4,%xmm1 + pxor %xmm0,%xmm4 + psrlq $5,%xmm0 + pxor %xmm4,%xmm0 + psrlq $1,%xmm0 + pxor %xmm1,%xmm0 + addq $64,%rcx + jz L$done + movdqu 32(%rsi),%xmm7 + subq $16,%rcx + jz L$odd_tail +L$skip4x: - movdqu (%rdx),%xmm3 - movdqu 16(%rdx),%xmm6 -.byte 102,15,56,0,221 -.byte 102,15,56,0,245 - pxor %xmm3,%xmm0 - movdqa %xmm6,%xmm7 - pshufd $78,%xmm6,%xmm3 - pshufd $78,%xmm2,%xmm4 - pxor %xmm6,%xmm3 - pxor %xmm2,%xmm4 -.byte 102,15,58,68,242,0 -.byte 102,15,58,68,250,17 -.byte 102,15,58,68,220,0 - pxor %xmm6,%xmm3 - pxor %xmm7,%xmm3 - movdqa %xmm3,%xmm4 - psrldq $8,%xmm3 - pslldq $8,%xmm4 - pxor %xmm3,%xmm7 - pxor %xmm4,%xmm6 - movdqa %xmm0,%xmm1 - pshufd $78,%xmm0,%xmm3 - pshufd $78,%xmm8,%xmm4 - pxor %xmm0,%xmm3 - pxor %xmm8,%xmm4 + movdqu (%rdx),%xmm8 + movdqu 16(%rdx),%xmm3 +.byte 102,69,15,56,0,194 +.byte 102,65,15,56,0,218 + pxor %xmm8,%xmm0 + + movdqa %xmm3,%xmm5 + pshufd $78,%xmm3,%xmm4 + pxor %xmm3,%xmm4 +.byte 102,15,58,68,218,0 +.byte 102,15,58,68,234,17 +.byte 102,15,58,68,231,0 leaq 32(%rdx),%rdx + nop subq $32,%rcx jbe L$even_tail + nop + jmp L$mod_loop +.p2align 5 L$mod_loop: -.byte 102,65,15,58,68,192,0 -.byte 102,65,15,58,68,200,17 -.byte 102,15,58,68,220,0 - pxor %xmm0,%xmm3 - pxor %xmm1,%xmm3 + movdqa %xmm0,%xmm1 + movdqa %xmm4,%xmm8 + pshufd $78,%xmm0,%xmm4 + pxor %xmm0,%xmm4 - movdqa %xmm3,%xmm4 - psrldq $8,%xmm3 +.byte 102,15,58,68,198,0 +.byte 102,15,58,68,206,17 +.byte 102,15,58,68,231,16 + + pxor %xmm3,%xmm0 + pxor %xmm5,%xmm1 + movdqu (%rdx),%xmm9 + pxor %xmm0,%xmm8 +.byte 102,69,15,56,0,202 + movdqu 16(%rdx),%xmm3 + + pxor %xmm1,%xmm8 + pxor %xmm9,%xmm1 + pxor %xmm8,%xmm4 +.byte 102,65,15,56,0,218 + movdqa %xmm4,%xmm8 + psrldq $8,%xmm8 pslldq $8,%xmm4 - pxor %xmm3,%xmm1 + pxor %xmm8,%xmm1 pxor %xmm4,%xmm0 - movdqu (%rdx),%xmm3 - pxor %xmm6,%xmm0 - pxor %xmm7,%xmm1 - - movdqu 16(%rdx),%xmm6 -.byte 102,15,56,0,221 -.byte 102,15,56,0,245 - - movdqa %xmm6,%xmm7 - pshufd $78,%xmm6,%xmm9 - pshufd $78,%xmm2,%xmm10 - pxor %xmm6,%xmm9 - pxor %xmm2,%xmm10 - pxor %xmm3,%xmm1 - movdqa %xmm0,%xmm3 - psllq $1,%xmm0 - pxor %xmm3,%xmm0 + movdqa %xmm3,%xmm5 + + movdqa %xmm0,%xmm9 + movdqa %xmm0,%xmm8 psllq $5,%xmm0 - pxor %xmm3,%xmm0 -.byte 102,15,58,68,242,0 + pxor %xmm0,%xmm8 +.byte 102,15,58,68,218,0 + psllq $1,%xmm0 + pxor %xmm8,%xmm0 psllq $57,%xmm0 - movdqa %xmm0,%xmm4 + movdqa %xmm0,%xmm8 pslldq $8,%xmm0 - psrldq $8,%xmm4 - pxor %xmm3,%xmm0 - pxor %xmm4,%xmm1 + psrldq $8,%xmm8 + pxor %xmm9,%xmm0 + pshufd $78,%xmm5,%xmm4 + pxor %xmm8,%xmm1 + pxor %xmm5,%xmm4 -.byte 102,15,58,68,250,17 - movdqa %xmm0,%xmm4 - psrlq $5,%xmm0 - pxor %xmm4,%xmm0 + movdqa %xmm0,%xmm9 psrlq $1,%xmm0 - pxor %xmm4,%xmm0 - pxor %xmm1,%xmm4 +.byte 102,15,58,68,234,17 + pxor %xmm9,%xmm1 + pxor %xmm0,%xmm9 + psrlq $5,%xmm0 + pxor %xmm9,%xmm0 + leaq 32(%rdx),%rdx psrlq $1,%xmm0 - pxor %xmm4,%xmm0 - -.byte 102,69,15,58,68,202,0 - movdqa %xmm0,%xmm1 - pshufd $78,%xmm0,%xmm3 - pshufd $78,%xmm8,%xmm4 - pxor %xmm0,%xmm3 - pxor %xmm8,%xmm4 +.byte 102,15,58,68,231,0 + pxor %xmm1,%xmm0 - pxor %xmm6,%xmm9 - pxor %xmm7,%xmm9 - movdqa %xmm9,%xmm10 - psrldq $8,%xmm9 - pslldq $8,%xmm10 - pxor %xmm9,%xmm7 - pxor %xmm10,%xmm6 - - leaq 32(%rdx),%rdx subq $32,%rcx ja L$mod_loop L$even_tail: -.byte 102,65,15,58,68,192,0 -.byte 102,65,15,58,68,200,17 -.byte 102,15,58,68,220,0 - pxor %xmm0,%xmm3 - pxor %xmm1,%xmm3 + movdqa %xmm0,%xmm1 + movdqa %xmm4,%xmm8 + pshufd $78,%xmm0,%xmm4 + pxor %xmm0,%xmm4 - movdqa %xmm3,%xmm4 - psrldq $8,%xmm3 +.byte 102,15,58,68,198,0 +.byte 102,15,58,68,206,17 +.byte 102,15,58,68,231,16 + + pxor %xmm3,%xmm0 + pxor %xmm5,%xmm1 + pxor %xmm0,%xmm8 + pxor %xmm1,%xmm8 + pxor %xmm8,%xmm4 + movdqa %xmm4,%xmm8 + psrldq $8,%xmm8 pslldq $8,%xmm4 - pxor %xmm3,%xmm1 + pxor %xmm8,%xmm1 pxor %xmm4,%xmm0 - pxor %xmm6,%xmm0 - pxor %xmm7,%xmm1 + movdqa %xmm0,%xmm4 movdqa %xmm0,%xmm3 - psllq $1,%xmm0 - pxor %xmm3,%xmm0 psllq $5,%xmm0 + pxor %xmm0,%xmm3 + psllq $1,%xmm0 pxor %xmm3,%xmm0 psllq $57,%xmm0 - movdqa %xmm0,%xmm4 + movdqa %xmm0,%xmm3 pslldq $8,%xmm0 - psrldq $8,%xmm4 - pxor %xmm3,%xmm0 - pxor %xmm4,%xmm1 + psrldq $8,%xmm3 + pxor %xmm4,%xmm0 + pxor %xmm3,%xmm1 movdqa %xmm0,%xmm4 - psrlq $5,%xmm0 - pxor %xmm4,%xmm0 psrlq $1,%xmm0 + pxor %xmm4,%xmm1 + pxor %xmm0,%xmm4 + psrlq $5,%xmm0 pxor %xmm4,%xmm0 - pxor %xmm1,%xmm4 psrlq $1,%xmm0 - pxor %xmm4,%xmm0 + pxor %xmm1,%xmm0 testq %rcx,%rcx jnz L$done L$odd_tail: - movdqu (%rdx),%xmm3 -.byte 102,15,56,0,221 - pxor %xmm3,%xmm0 + movdqu (%rdx),%xmm8 +.byte 102,69,15,56,0,194 + pxor %xmm8,%xmm0 movdqa %xmm0,%xmm1 pshufd $78,%xmm0,%xmm3 - pshufd $78,%xmm2,%xmm4 pxor %xmm0,%xmm3 - pxor %xmm2,%xmm4 .byte 102,15,58,68,194,0 .byte 102,15,58,68,202,17 -.byte 102,15,58,68,220,0 +.byte 102,15,58,68,223,0 pxor %xmm0,%xmm3 pxor %xmm1,%xmm3 @@ -949,38 +1218,531 @@ L$odd_tail: pxor %xmm3,%xmm1 pxor %xmm4,%xmm0 + movdqa %xmm0,%xmm4 movdqa %xmm0,%xmm3 - psllq $1,%xmm0 - pxor %xmm3,%xmm0 psllq $5,%xmm0 + pxor %xmm0,%xmm3 + psllq $1,%xmm0 pxor %xmm3,%xmm0 psllq $57,%xmm0 - movdqa %xmm0,%xmm4 + movdqa %xmm0,%xmm3 pslldq $8,%xmm0 - psrldq $8,%xmm4 - pxor %xmm3,%xmm0 - pxor %xmm4,%xmm1 + psrldq $8,%xmm3 + pxor %xmm4,%xmm0 + pxor %xmm3,%xmm1 movdqa %xmm0,%xmm4 - psrlq $5,%xmm0 - pxor %xmm4,%xmm0 psrlq $1,%xmm0 + pxor %xmm4,%xmm1 + pxor %xmm0,%xmm4 + psrlq $5,%xmm0 pxor %xmm4,%xmm0 - pxor %xmm1,%xmm4 psrlq $1,%xmm0 - pxor %xmm4,%xmm0 + pxor %xmm1,%xmm0 L$done: -.byte 102,15,56,0,197 +.byte 102,65,15,56,0,194 movdqu %xmm0,(%rdi) .byte 0xf3,0xc3 -L$SEH_end_gcm_ghash_clmul: + +.globl _gcm_init_avx + +.p2align 5 +_gcm_init_avx: + vzeroupper + + vmovdqu (%rsi),%xmm2 + vpshufd $78,%xmm2,%xmm2 + + + vpshufd $255,%xmm2,%xmm4 + vpsrlq $63,%xmm2,%xmm3 + vpsllq $1,%xmm2,%xmm2 + vpxor %xmm5,%xmm5,%xmm5 + vpcmpgtd %xmm4,%xmm5,%xmm5 + vpslldq $8,%xmm3,%xmm3 + vpor %xmm3,%xmm2,%xmm2 + + + vpand L$0x1c2_polynomial(%rip),%xmm5,%xmm5 + vpxor %xmm5,%xmm2,%xmm2 + + vpunpckhqdq %xmm2,%xmm2,%xmm6 + vmovdqa %xmm2,%xmm0 + vpxor %xmm2,%xmm6,%xmm6 + movq $4,%r10 + jmp L$init_start_avx +.p2align 5 +L$init_loop_avx: + vpalignr $8,%xmm3,%xmm4,%xmm5 + vmovdqu %xmm5,-16(%rdi) + vpunpckhqdq %xmm0,%xmm0,%xmm3 + vpxor %xmm0,%xmm3,%xmm3 + vpclmulqdq $17,%xmm2,%xmm0,%xmm1 + vpclmulqdq $0,%xmm2,%xmm0,%xmm0 + vpclmulqdq $0,%xmm6,%xmm3,%xmm3 + vpxor %xmm0,%xmm1,%xmm4 + vpxor %xmm4,%xmm3,%xmm3 + + vpslldq $8,%xmm3,%xmm4 + vpsrldq $8,%xmm3,%xmm3 + vpxor %xmm4,%xmm0,%xmm0 + vpxor %xmm3,%xmm1,%xmm1 + vpsllq $57,%xmm0,%xmm3 + vpsllq $62,%xmm0,%xmm4 + vpxor %xmm3,%xmm4,%xmm4 + vpsllq $63,%xmm0,%xmm3 + vpxor %xmm3,%xmm4,%xmm4 + vpslldq $8,%xmm4,%xmm3 + vpsrldq $8,%xmm4,%xmm4 + vpxor %xmm3,%xmm0,%xmm0 + vpxor %xmm4,%xmm1,%xmm1 + + vpsrlq $1,%xmm0,%xmm4 + vpxor %xmm0,%xmm1,%xmm1 + vpxor %xmm4,%xmm0,%xmm0 + vpsrlq $5,%xmm4,%xmm4 + vpxor %xmm4,%xmm0,%xmm0 + vpsrlq $1,%xmm0,%xmm0 + vpxor %xmm1,%xmm0,%xmm0 +L$init_start_avx: + vmovdqa %xmm0,%xmm5 + vpunpckhqdq %xmm0,%xmm0,%xmm3 + vpxor %xmm0,%xmm3,%xmm3 + vpclmulqdq $17,%xmm2,%xmm0,%xmm1 + vpclmulqdq $0,%xmm2,%xmm0,%xmm0 + vpclmulqdq $0,%xmm6,%xmm3,%xmm3 + vpxor %xmm0,%xmm1,%xmm4 + vpxor %xmm4,%xmm3,%xmm3 + + vpslldq $8,%xmm3,%xmm4 + vpsrldq $8,%xmm3,%xmm3 + vpxor %xmm4,%xmm0,%xmm0 + vpxor %xmm3,%xmm1,%xmm1 + vpsllq $57,%xmm0,%xmm3 + vpsllq $62,%xmm0,%xmm4 + vpxor %xmm3,%xmm4,%xmm4 + vpsllq $63,%xmm0,%xmm3 + vpxor %xmm3,%xmm4,%xmm4 + vpslldq $8,%xmm4,%xmm3 + vpsrldq $8,%xmm4,%xmm4 + vpxor %xmm3,%xmm0,%xmm0 + vpxor %xmm4,%xmm1,%xmm1 + + vpsrlq $1,%xmm0,%xmm4 + vpxor %xmm0,%xmm1,%xmm1 + vpxor %xmm4,%xmm0,%xmm0 + vpsrlq $5,%xmm4,%xmm4 + vpxor %xmm4,%xmm0,%xmm0 + vpsrlq $1,%xmm0,%xmm0 + vpxor %xmm1,%xmm0,%xmm0 + vpshufd $78,%xmm5,%xmm3 + vpshufd $78,%xmm0,%xmm4 + vpxor %xmm5,%xmm3,%xmm3 + vmovdqu %xmm5,0(%rdi) + vpxor %xmm0,%xmm4,%xmm4 + vmovdqu %xmm0,16(%rdi) + leaq 48(%rdi),%rdi + subq $1,%r10 + jnz L$init_loop_avx + + vpalignr $8,%xmm4,%xmm3,%xmm5 + vmovdqu %xmm5,-16(%rdi) + + vzeroupper + .byte 0xf3,0xc3 + +.globl _gcm_gmult_avx + +.p2align 5 +_gcm_gmult_avx: + jmp L$_gmult_clmul + +.globl _gcm_ghash_avx + +.p2align 5 +_gcm_ghash_avx: + vzeroupper + + vmovdqu (%rdi),%xmm10 + leaq L$0x1c2_polynomial(%rip),%r10 + leaq 64(%rsi),%rsi + vmovdqu L$bswap_mask(%rip),%xmm13 + vpshufb %xmm13,%xmm10,%xmm10 + cmpq $128,%rcx + jb L$short_avx + subq $128,%rcx + + vmovdqu 112(%rdx),%xmm14 + vmovdqu 0-64(%rsi),%xmm6 + vpshufb %xmm13,%xmm14,%xmm14 + vmovdqu 32-64(%rsi),%xmm7 + + vpunpckhqdq %xmm14,%xmm14,%xmm9 + vmovdqu 96(%rdx),%xmm15 + vpclmulqdq $0,%xmm6,%xmm14,%xmm0 + vpxor %xmm14,%xmm9,%xmm9 + vpshufb %xmm13,%xmm15,%xmm15 + vpclmulqdq $17,%xmm6,%xmm14,%xmm1 + vmovdqu 16-64(%rsi),%xmm6 + vpunpckhqdq %xmm15,%xmm15,%xmm8 + vmovdqu 80(%rdx),%xmm14 + vpclmulqdq $0,%xmm7,%xmm9,%xmm2 + vpxor %xmm15,%xmm8,%xmm8 + + vpshufb %xmm13,%xmm14,%xmm14 + vpclmulqdq $0,%xmm6,%xmm15,%xmm3 + vpunpckhqdq %xmm14,%xmm14,%xmm9 + vpclmulqdq $17,%xmm6,%xmm15,%xmm4 + vmovdqu 48-64(%rsi),%xmm6 + vpxor %xmm14,%xmm9,%xmm9 + vmovdqu 64(%rdx),%xmm15 + vpclmulqdq $16,%xmm7,%xmm8,%xmm5 + vmovdqu 80-64(%rsi),%xmm7 + + vpshufb %xmm13,%xmm15,%xmm15 + vpxor %xmm0,%xmm3,%xmm3 + vpclmulqdq $0,%xmm6,%xmm14,%xmm0 + vpxor %xmm1,%xmm4,%xmm4 + vpunpckhqdq %xmm15,%xmm15,%xmm8 + vpclmulqdq $17,%xmm6,%xmm14,%xmm1 + vmovdqu 64-64(%rsi),%xmm6 + vpxor %xmm2,%xmm5,%xmm5 + vpclmulqdq $0,%xmm7,%xmm9,%xmm2 + vpxor %xmm15,%xmm8,%xmm8 + + vmovdqu 48(%rdx),%xmm14 + vpxor %xmm3,%xmm0,%xmm0 + vpclmulqdq $0,%xmm6,%xmm15,%xmm3 + vpxor %xmm4,%xmm1,%xmm1 + vpshufb %xmm13,%xmm14,%xmm14 + vpclmulqdq $17,%xmm6,%xmm15,%xmm4 + vmovdqu 96-64(%rsi),%xmm6 + vpxor %xmm5,%xmm2,%xmm2 + vpunpckhqdq %xmm14,%xmm14,%xmm9 + vpclmulqdq $16,%xmm7,%xmm8,%xmm5 + vmovdqu 128-64(%rsi),%xmm7 + vpxor %xmm14,%xmm9,%xmm9 + + vmovdqu 32(%rdx),%xmm15 + vpxor %xmm0,%xmm3,%xmm3 + vpclmulqdq $0,%xmm6,%xmm14,%xmm0 + vpxor %xmm1,%xmm4,%xmm4 + vpshufb %xmm13,%xmm15,%xmm15 + vpclmulqdq $17,%xmm6,%xmm14,%xmm1 + vmovdqu 112-64(%rsi),%xmm6 + vpxor %xmm2,%xmm5,%xmm5 + vpunpckhqdq %xmm15,%xmm15,%xmm8 + vpclmulqdq $0,%xmm7,%xmm9,%xmm2 + vpxor %xmm15,%xmm8,%xmm8 + + vmovdqu 16(%rdx),%xmm14 + vpxor %xmm3,%xmm0,%xmm0 + vpclmulqdq $0,%xmm6,%xmm15,%xmm3 + vpxor %xmm4,%xmm1,%xmm1 + vpshufb %xmm13,%xmm14,%xmm14 + vpclmulqdq $17,%xmm6,%xmm15,%xmm4 + vmovdqu 144-64(%rsi),%xmm6 + vpxor %xmm5,%xmm2,%xmm2 + vpunpckhqdq %xmm14,%xmm14,%xmm9 + vpclmulqdq $16,%xmm7,%xmm8,%xmm5 + vmovdqu 176-64(%rsi),%xmm7 + vpxor %xmm14,%xmm9,%xmm9 + + vmovdqu (%rdx),%xmm15 + vpxor %xmm0,%xmm3,%xmm3 + vpclmulqdq $0,%xmm6,%xmm14,%xmm0 + vpxor %xmm1,%xmm4,%xmm4 + vpshufb %xmm13,%xmm15,%xmm15 + vpclmulqdq $17,%xmm6,%xmm14,%xmm1 + vmovdqu 160-64(%rsi),%xmm6 + vpxor %xmm2,%xmm5,%xmm5 + vpclmulqdq $16,%xmm7,%xmm9,%xmm2 + + leaq 128(%rdx),%rdx + cmpq $128,%rcx + jb L$tail_avx + + vpxor %xmm10,%xmm15,%xmm15 + subq $128,%rcx + jmp L$oop8x_avx + +.p2align 5 +L$oop8x_avx: + vpunpckhqdq %xmm15,%xmm15,%xmm8 + vmovdqu 112(%rdx),%xmm14 + vpxor %xmm0,%xmm3,%xmm3 + vpxor %xmm15,%xmm8,%xmm8 + vpclmulqdq $0,%xmm6,%xmm15,%xmm10 + vpshufb %xmm13,%xmm14,%xmm14 + vpxor %xmm1,%xmm4,%xmm4 + vpclmulqdq $17,%xmm6,%xmm15,%xmm11 + vmovdqu 0-64(%rsi),%xmm6 + vpunpckhqdq %xmm14,%xmm14,%xmm9 + vpxor %xmm2,%xmm5,%xmm5 + vpclmulqdq $0,%xmm7,%xmm8,%xmm12 + vmovdqu 32-64(%rsi),%xmm7 + vpxor %xmm14,%xmm9,%xmm9 + + vmovdqu 96(%rdx),%xmm15 + vpclmulqdq $0,%xmm6,%xmm14,%xmm0 + vpxor %xmm3,%xmm10,%xmm10 + vpshufb %xmm13,%xmm15,%xmm15 + vpclmulqdq $17,%xmm6,%xmm14,%xmm1 + vxorps %xmm4,%xmm11,%xmm11 + vmovdqu 16-64(%rsi),%xmm6 + vpunpckhqdq %xmm15,%xmm15,%xmm8 + vpclmulqdq $0,%xmm7,%xmm9,%xmm2 + vpxor %xmm5,%xmm12,%xmm12 + vxorps %xmm15,%xmm8,%xmm8 + + vmovdqu 80(%rdx),%xmm14 + vpxor %xmm10,%xmm12,%xmm12 + vpclmulqdq $0,%xmm6,%xmm15,%xmm3 + vpxor %xmm11,%xmm12,%xmm12 + vpslldq $8,%xmm12,%xmm9 + vpxor %xmm0,%xmm3,%xmm3 + vpclmulqdq $17,%xmm6,%xmm15,%xmm4 + vpsrldq $8,%xmm12,%xmm12 + vpxor %xmm9,%xmm10,%xmm10 + vmovdqu 48-64(%rsi),%xmm6 + vpshufb %xmm13,%xmm14,%xmm14 + vxorps %xmm12,%xmm11,%xmm11 + vpxor %xmm1,%xmm4,%xmm4 + vpunpckhqdq %xmm14,%xmm14,%xmm9 + vpclmulqdq $16,%xmm7,%xmm8,%xmm5 + vmovdqu 80-64(%rsi),%xmm7 + vpxor %xmm14,%xmm9,%xmm9 + vpxor %xmm2,%xmm5,%xmm5 + + vmovdqu 64(%rdx),%xmm15 + vpalignr $8,%xmm10,%xmm10,%xmm12 + vpclmulqdq $0,%xmm6,%xmm14,%xmm0 + vpshufb %xmm13,%xmm15,%xmm15 + vpxor %xmm3,%xmm0,%xmm0 + vpclmulqdq $17,%xmm6,%xmm14,%xmm1 + vmovdqu 64-64(%rsi),%xmm6 + vpunpckhqdq %xmm15,%xmm15,%xmm8 + vpxor %xmm4,%xmm1,%xmm1 + vpclmulqdq $0,%xmm7,%xmm9,%xmm2 + vxorps %xmm15,%xmm8,%xmm8 + vpxor %xmm5,%xmm2,%xmm2 + + vmovdqu 48(%rdx),%xmm14 + vpclmulqdq $16,(%r10),%xmm10,%xmm10 + vpclmulqdq $0,%xmm6,%xmm15,%xmm3 + vpshufb %xmm13,%xmm14,%xmm14 + vpxor %xmm0,%xmm3,%xmm3 + vpclmulqdq $17,%xmm6,%xmm15,%xmm4 + vmovdqu 96-64(%rsi),%xmm6 + vpunpckhqdq %xmm14,%xmm14,%xmm9 + vpxor %xmm1,%xmm4,%xmm4 + vpclmulqdq $16,%xmm7,%xmm8,%xmm5 + vmovdqu 128-64(%rsi),%xmm7 + vpxor %xmm14,%xmm9,%xmm9 + vpxor %xmm2,%xmm5,%xmm5 + + vmovdqu 32(%rdx),%xmm15 + vpclmulqdq $0,%xmm6,%xmm14,%xmm0 + vpshufb %xmm13,%xmm15,%xmm15 + vpxor %xmm3,%xmm0,%xmm0 + vpclmulqdq $17,%xmm6,%xmm14,%xmm1 + vmovdqu 112-64(%rsi),%xmm6 + vpunpckhqdq %xmm15,%xmm15,%xmm8 + vpxor %xmm4,%xmm1,%xmm1 + vpclmulqdq $0,%xmm7,%xmm9,%xmm2 + vpxor %xmm15,%xmm8,%xmm8 + vpxor %xmm5,%xmm2,%xmm2 + vxorps %xmm12,%xmm10,%xmm10 + + vmovdqu 16(%rdx),%xmm14 + vpalignr $8,%xmm10,%xmm10,%xmm12 + vpclmulqdq $0,%xmm6,%xmm15,%xmm3 + vpshufb %xmm13,%xmm14,%xmm14 + vpxor %xmm0,%xmm3,%xmm3 + vpclmulqdq $17,%xmm6,%xmm15,%xmm4 + vmovdqu 144-64(%rsi),%xmm6 + vpclmulqdq $16,(%r10),%xmm10,%xmm10 + vxorps %xmm11,%xmm12,%xmm12 + vpunpckhqdq %xmm14,%xmm14,%xmm9 + vpxor %xmm1,%xmm4,%xmm4 + vpclmulqdq $16,%xmm7,%xmm8,%xmm5 + vmovdqu 176-64(%rsi),%xmm7 + vpxor %xmm14,%xmm9,%xmm9 + vpxor %xmm2,%xmm5,%xmm5 + + vmovdqu (%rdx),%xmm15 + vpclmulqdq $0,%xmm6,%xmm14,%xmm0 + vpshufb %xmm13,%xmm15,%xmm15 + vpclmulqdq $17,%xmm6,%xmm14,%xmm1 + vmovdqu 160-64(%rsi),%xmm6 + vpxor %xmm12,%xmm15,%xmm15 + vpclmulqdq $16,%xmm7,%xmm9,%xmm2 + vpxor %xmm10,%xmm15,%xmm15 + + leaq 128(%rdx),%rdx + subq $128,%rcx + jnc L$oop8x_avx + + addq $128,%rcx + jmp L$tail_no_xor_avx + +.p2align 5 +L$short_avx: + vmovdqu -16(%rdx,%rcx,1),%xmm14 + leaq (%rdx,%rcx,1),%rdx + vmovdqu 0-64(%rsi),%xmm6 + vmovdqu 32-64(%rsi),%xmm7 + vpshufb %xmm13,%xmm14,%xmm15 + + vmovdqa %xmm0,%xmm3 + vmovdqa %xmm1,%xmm4 + vmovdqa %xmm2,%xmm5 + subq $16,%rcx + jz L$tail_avx + + vpunpckhqdq %xmm15,%xmm15,%xmm8 + vpxor %xmm0,%xmm3,%xmm3 + vpclmulqdq $0,%xmm6,%xmm15,%xmm0 + vpxor %xmm15,%xmm8,%xmm8 + vmovdqu -32(%rdx),%xmm14 + vpxor %xmm1,%xmm4,%xmm4 + vpclmulqdq $17,%xmm6,%xmm15,%xmm1 + vmovdqu 16-64(%rsi),%xmm6 + vpshufb %xmm13,%xmm14,%xmm15 + vpxor %xmm2,%xmm5,%xmm5 + vpclmulqdq $0,%xmm7,%xmm8,%xmm2 + vpsrldq $8,%xmm7,%xmm7 + subq $16,%rcx + jz L$tail_avx + + vpunpckhqdq %xmm15,%xmm15,%xmm8 + vpxor %xmm0,%xmm3,%xmm3 + vpclmulqdq $0,%xmm6,%xmm15,%xmm0 + vpxor %xmm15,%xmm8,%xmm8 + vmovdqu -48(%rdx),%xmm14 + vpxor %xmm1,%xmm4,%xmm4 + vpclmulqdq $17,%xmm6,%xmm15,%xmm1 + vmovdqu 48-64(%rsi),%xmm6 + vpshufb %xmm13,%xmm14,%xmm15 + vpxor %xmm2,%xmm5,%xmm5 + vpclmulqdq $0,%xmm7,%xmm8,%xmm2 + vmovdqu 80-64(%rsi),%xmm7 + subq $16,%rcx + jz L$tail_avx + + vpunpckhqdq %xmm15,%xmm15,%xmm8 + vpxor %xmm0,%xmm3,%xmm3 + vpclmulqdq $0,%xmm6,%xmm15,%xmm0 + vpxor %xmm15,%xmm8,%xmm8 + vmovdqu -64(%rdx),%xmm14 + vpxor %xmm1,%xmm4,%xmm4 + vpclmulqdq $17,%xmm6,%xmm15,%xmm1 + vmovdqu 64-64(%rsi),%xmm6 + vpshufb %xmm13,%xmm14,%xmm15 + vpxor %xmm2,%xmm5,%xmm5 + vpclmulqdq $0,%xmm7,%xmm8,%xmm2 + vpsrldq $8,%xmm7,%xmm7 + subq $16,%rcx + jz L$tail_avx + + vpunpckhqdq %xmm15,%xmm15,%xmm8 + vpxor %xmm0,%xmm3,%xmm3 + vpclmulqdq $0,%xmm6,%xmm15,%xmm0 + vpxor %xmm15,%xmm8,%xmm8 + vmovdqu -80(%rdx),%xmm14 + vpxor %xmm1,%xmm4,%xmm4 + vpclmulqdq $17,%xmm6,%xmm15,%xmm1 + vmovdqu 96-64(%rsi),%xmm6 + vpshufb %xmm13,%xmm14,%xmm15 + vpxor %xmm2,%xmm5,%xmm5 + vpclmulqdq $0,%xmm7,%xmm8,%xmm2 + vmovdqu 128-64(%rsi),%xmm7 + subq $16,%rcx + jz L$tail_avx + + vpunpckhqdq %xmm15,%xmm15,%xmm8 + vpxor %xmm0,%xmm3,%xmm3 + vpclmulqdq $0,%xmm6,%xmm15,%xmm0 + vpxor %xmm15,%xmm8,%xmm8 + vmovdqu -96(%rdx),%xmm14 + vpxor %xmm1,%xmm4,%xmm4 + vpclmulqdq $17,%xmm6,%xmm15,%xmm1 + vmovdqu 112-64(%rsi),%xmm6 + vpshufb %xmm13,%xmm14,%xmm15 + vpxor %xmm2,%xmm5,%xmm5 + vpclmulqdq $0,%xmm7,%xmm8,%xmm2 + vpsrldq $8,%xmm7,%xmm7 + subq $16,%rcx + jz L$tail_avx + + vpunpckhqdq %xmm15,%xmm15,%xmm8 + vpxor %xmm0,%xmm3,%xmm3 + vpclmulqdq $0,%xmm6,%xmm15,%xmm0 + vpxor %xmm15,%xmm8,%xmm8 + vmovdqu -112(%rdx),%xmm14 + vpxor %xmm1,%xmm4,%xmm4 + vpclmulqdq $17,%xmm6,%xmm15,%xmm1 + vmovdqu 144-64(%rsi),%xmm6 + vpshufb %xmm13,%xmm14,%xmm15 + vpxor %xmm2,%xmm5,%xmm5 + vpclmulqdq $0,%xmm7,%xmm8,%xmm2 + vmovq 184-64(%rsi),%xmm7 + subq $16,%rcx + jmp L$tail_avx + +.p2align 5 +L$tail_avx: + vpxor %xmm10,%xmm15,%xmm15 +L$tail_no_xor_avx: + vpunpckhqdq %xmm15,%xmm15,%xmm8 + vpxor %xmm0,%xmm3,%xmm3 + vpclmulqdq $0,%xmm6,%xmm15,%xmm0 + vpxor %xmm15,%xmm8,%xmm8 + vpxor %xmm1,%xmm4,%xmm4 + vpclmulqdq $17,%xmm6,%xmm15,%xmm1 + vpxor %xmm2,%xmm5,%xmm5 + vpclmulqdq $0,%xmm7,%xmm8,%xmm2 + + vmovdqu (%r10),%xmm12 + + vpxor %xmm0,%xmm3,%xmm10 + vpxor %xmm1,%xmm4,%xmm11 + vpxor %xmm2,%xmm5,%xmm5 + + vpxor %xmm10,%xmm5,%xmm5 + vpxor %xmm11,%xmm5,%xmm5 + vpslldq $8,%xmm5,%xmm9 + vpsrldq $8,%xmm5,%xmm5 + vpxor %xmm9,%xmm10,%xmm10 + vpxor %xmm5,%xmm11,%xmm11 + + vpclmulqdq $16,%xmm12,%xmm10,%xmm9 + vpalignr $8,%xmm10,%xmm10,%xmm10 + vpxor %xmm9,%xmm10,%xmm10 + + vpclmulqdq $16,%xmm12,%xmm10,%xmm9 + vpalignr $8,%xmm10,%xmm10,%xmm10 + vpxor %xmm11,%xmm10,%xmm10 + vpxor %xmm9,%xmm10,%xmm10 + + cmpq $0,%rcx + jne L$short_avx + + vpshufb %xmm13,%xmm10,%xmm10 + vmovdqu %xmm10,(%rdi) + vzeroupper + .byte 0xf3,0xc3 .p2align 6 L$bswap_mask: .byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0 L$0x1c2_polynomial: .byte 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xc2 +L$7_mask: +.long 7,0,7,0 +L$7_mask_poly: +.long 7,0,450,0 .p2align 6 L$rem_4bit: diff --git a/deps/openssl/asm/x64-macosx-gas/sha/sha1-mb-x86_64.s b/deps/openssl/asm/x64-macosx-gas/sha/sha1-mb-x86_64.s new file mode 100644 index 00000000000000..c164fc3c42bb38 --- /dev/null +++ b/deps/openssl/asm/x64-macosx-gas/sha/sha1-mb-x86_64.s @@ -0,0 +1,7221 @@ +.text + + + +.globl _sha1_multi_block + +.p2align 5 +_sha1_multi_block: + movq _OPENSSL_ia32cap_P+4(%rip),%rcx + btq $61,%rcx + jc _shaext_shortcut + testl $268435456,%ecx + jnz _avx_shortcut + movq %rsp,%rax + pushq %rbx + pushq %rbp + subq $288,%rsp + andq $-256,%rsp + movq %rax,272(%rsp) +L$body: + leaq K_XX_XX(%rip),%rbp + leaq 256(%rsp),%rbx + +L$oop_grande: + movl %edx,280(%rsp) + xorl %edx,%edx + movq 0(%rsi),%r8 + movl 8(%rsi),%ecx + cmpl %edx,%ecx + cmovgl %ecx,%edx + testl %ecx,%ecx + movl %ecx,0(%rbx) + cmovleq %rbp,%r8 + movq 16(%rsi),%r9 + movl 24(%rsi),%ecx + cmpl %edx,%ecx + cmovgl %ecx,%edx + testl %ecx,%ecx + movl %ecx,4(%rbx) + cmovleq %rbp,%r9 + movq 32(%rsi),%r10 + movl 40(%rsi),%ecx + cmpl %edx,%ecx + cmovgl %ecx,%edx + testl %ecx,%ecx + movl %ecx,8(%rbx) + cmovleq %rbp,%r10 + movq 48(%rsi),%r11 + movl 56(%rsi),%ecx + cmpl %edx,%ecx + cmovgl %ecx,%edx + testl %ecx,%ecx + movl %ecx,12(%rbx) + cmovleq %rbp,%r11 + testl %edx,%edx + jz L$done + + movdqu 0(%rdi),%xmm10 + leaq 128(%rsp),%rax + movdqu 32(%rdi),%xmm11 + movdqu 64(%rdi),%xmm12 + movdqu 96(%rdi),%xmm13 + movdqu 128(%rdi),%xmm14 + movdqa 96(%rbp),%xmm5 + movdqa -32(%rbp),%xmm15 + jmp L$oop + +.p2align 5 +L$oop: + movd (%r8),%xmm0 + leaq 64(%r8),%r8 + movd (%r9),%xmm2 + leaq 64(%r9),%r9 + movd (%r10),%xmm3 + leaq 64(%r10),%r10 + movd (%r11),%xmm4 + leaq 64(%r11),%r11 + punpckldq %xmm3,%xmm0 + movd -60(%r8),%xmm1 + punpckldq %xmm4,%xmm2 + movd -60(%r9),%xmm9 + punpckldq %xmm2,%xmm0 + movd -60(%r10),%xmm8 +.byte 102,15,56,0,197 + movd -60(%r11),%xmm7 + punpckldq %xmm8,%xmm1 + movdqa %xmm10,%xmm8 + paddd %xmm15,%xmm14 + punpckldq %xmm7,%xmm9 + movdqa %xmm11,%xmm7 + movdqa %xmm11,%xmm6 + pslld $5,%xmm8 + pandn %xmm13,%xmm7 + pand %xmm12,%xmm6 + punpckldq %xmm9,%xmm1 + movdqa %xmm10,%xmm9 + + movdqa %xmm0,0-128(%rax) + paddd %xmm0,%xmm14 + movd -56(%r8),%xmm2 + psrld $27,%xmm9 + pxor %xmm7,%xmm6 + movdqa %xmm11,%xmm7 + + por %xmm9,%xmm8 + movd -56(%r9),%xmm9 + pslld $30,%xmm7 + paddd %xmm6,%xmm14 + + psrld $2,%xmm11 + paddd %xmm8,%xmm14 +.byte 102,15,56,0,205 + movd -56(%r10),%xmm8 + por %xmm7,%xmm11 + movd -56(%r11),%xmm7 + punpckldq %xmm8,%xmm2 + movdqa %xmm14,%xmm8 + paddd %xmm15,%xmm13 + punpckldq %xmm7,%xmm9 + movdqa %xmm10,%xmm7 + movdqa %xmm10,%xmm6 + pslld $5,%xmm8 + pandn %xmm12,%xmm7 + pand %xmm11,%xmm6 + punpckldq %xmm9,%xmm2 + movdqa %xmm14,%xmm9 + + movdqa %xmm1,16-128(%rax) + paddd %xmm1,%xmm13 + movd -52(%r8),%xmm3 + psrld $27,%xmm9 + pxor %xmm7,%xmm6 + movdqa %xmm10,%xmm7 + + por %xmm9,%xmm8 + movd -52(%r9),%xmm9 + pslld $30,%xmm7 + paddd %xmm6,%xmm13 + + psrld $2,%xmm10 + paddd %xmm8,%xmm13 +.byte 102,15,56,0,213 + movd -52(%r10),%xmm8 + por %xmm7,%xmm10 + movd -52(%r11),%xmm7 + punpckldq %xmm8,%xmm3 + movdqa %xmm13,%xmm8 + paddd %xmm15,%xmm12 + punpckldq %xmm7,%xmm9 + movdqa %xmm14,%xmm7 + movdqa %xmm14,%xmm6 + pslld $5,%xmm8 + pandn %xmm11,%xmm7 + pand %xmm10,%xmm6 + punpckldq %xmm9,%xmm3 + movdqa %xmm13,%xmm9 + + movdqa %xmm2,32-128(%rax) + paddd %xmm2,%xmm12 + movd -48(%r8),%xmm4 + psrld $27,%xmm9 + pxor %xmm7,%xmm6 + movdqa %xmm14,%xmm7 + + por %xmm9,%xmm8 + movd -48(%r9),%xmm9 + pslld $30,%xmm7 + paddd %xmm6,%xmm12 + + psrld $2,%xmm14 + paddd %xmm8,%xmm12 +.byte 102,15,56,0,221 + movd -48(%r10),%xmm8 + por %xmm7,%xmm14 + movd -48(%r11),%xmm7 + punpckldq %xmm8,%xmm4 + movdqa %xmm12,%xmm8 + paddd %xmm15,%xmm11 + punpckldq %xmm7,%xmm9 + movdqa %xmm13,%xmm7 + movdqa %xmm13,%xmm6 + pslld $5,%xmm8 + pandn %xmm10,%xmm7 + pand %xmm14,%xmm6 + punpckldq %xmm9,%xmm4 + movdqa %xmm12,%xmm9 + + movdqa %xmm3,48-128(%rax) + paddd %xmm3,%xmm11 + movd -44(%r8),%xmm0 + psrld $27,%xmm9 + pxor %xmm7,%xmm6 + movdqa %xmm13,%xmm7 + + por %xmm9,%xmm8 + movd -44(%r9),%xmm9 + pslld $30,%xmm7 + paddd %xmm6,%xmm11 + + psrld $2,%xmm13 + paddd %xmm8,%xmm11 +.byte 102,15,56,0,229 + movd -44(%r10),%xmm8 + por %xmm7,%xmm13 + movd -44(%r11),%xmm7 + punpckldq %xmm8,%xmm0 + movdqa %xmm11,%xmm8 + paddd %xmm15,%xmm10 + punpckldq %xmm7,%xmm9 + movdqa %xmm12,%xmm7 + movdqa %xmm12,%xmm6 + pslld $5,%xmm8 + pandn %xmm14,%xmm7 + pand %xmm13,%xmm6 + punpckldq %xmm9,%xmm0 + movdqa %xmm11,%xmm9 + + movdqa %xmm4,64-128(%rax) + paddd %xmm4,%xmm10 + movd -40(%r8),%xmm1 + psrld $27,%xmm9 + pxor %xmm7,%xmm6 + movdqa %xmm12,%xmm7 + + por %xmm9,%xmm8 + movd -40(%r9),%xmm9 + pslld $30,%xmm7 + paddd %xmm6,%xmm10 + + psrld $2,%xmm12 + paddd %xmm8,%xmm10 +.byte 102,15,56,0,197 + movd -40(%r10),%xmm8 + por %xmm7,%xmm12 + movd -40(%r11),%xmm7 + punpckldq %xmm8,%xmm1 + movdqa %xmm10,%xmm8 + paddd %xmm15,%xmm14 + punpckldq %xmm7,%xmm9 + movdqa %xmm11,%xmm7 + movdqa %xmm11,%xmm6 + pslld $5,%xmm8 + pandn %xmm13,%xmm7 + pand %xmm12,%xmm6 + punpckldq %xmm9,%xmm1 + movdqa %xmm10,%xmm9 + + movdqa %xmm0,80-128(%rax) + paddd %xmm0,%xmm14 + movd -36(%r8),%xmm2 + psrld $27,%xmm9 + pxor %xmm7,%xmm6 + movdqa %xmm11,%xmm7 + + por %xmm9,%xmm8 + movd -36(%r9),%xmm9 + pslld $30,%xmm7 + paddd %xmm6,%xmm14 + + psrld $2,%xmm11 + paddd %xmm8,%xmm14 +.byte 102,15,56,0,205 + movd -36(%r10),%xmm8 + por %xmm7,%xmm11 + movd -36(%r11),%xmm7 + punpckldq %xmm8,%xmm2 + movdqa %xmm14,%xmm8 + paddd %xmm15,%xmm13 + punpckldq %xmm7,%xmm9 + movdqa %xmm10,%xmm7 + movdqa %xmm10,%xmm6 + pslld $5,%xmm8 + pandn %xmm12,%xmm7 + pand %xmm11,%xmm6 + punpckldq %xmm9,%xmm2 + movdqa %xmm14,%xmm9 + + movdqa %xmm1,96-128(%rax) + paddd %xmm1,%xmm13 + movd -32(%r8),%xmm3 + psrld $27,%xmm9 + pxor %xmm7,%xmm6 + movdqa %xmm10,%xmm7 + + por %xmm9,%xmm8 + movd -32(%r9),%xmm9 + pslld $30,%xmm7 + paddd %xmm6,%xmm13 + + psrld $2,%xmm10 + paddd %xmm8,%xmm13 +.byte 102,15,56,0,213 + movd -32(%r10),%xmm8 + por %xmm7,%xmm10 + movd -32(%r11),%xmm7 + punpckldq %xmm8,%xmm3 + movdqa %xmm13,%xmm8 + paddd %xmm15,%xmm12 + punpckldq %xmm7,%xmm9 + movdqa %xmm14,%xmm7 + movdqa %xmm14,%xmm6 + pslld $5,%xmm8 + pandn %xmm11,%xmm7 + pand %xmm10,%xmm6 + punpckldq %xmm9,%xmm3 + movdqa %xmm13,%xmm9 + + movdqa %xmm2,112-128(%rax) + paddd %xmm2,%xmm12 + movd -28(%r8),%xmm4 + psrld $27,%xmm9 + pxor %xmm7,%xmm6 + movdqa %xmm14,%xmm7 + + por %xmm9,%xmm8 + movd -28(%r9),%xmm9 + pslld $30,%xmm7 + paddd %xmm6,%xmm12 + + psrld $2,%xmm14 + paddd %xmm8,%xmm12 +.byte 102,15,56,0,221 + movd -28(%r10),%xmm8 + por %xmm7,%xmm14 + movd -28(%r11),%xmm7 + punpckldq %xmm8,%xmm4 + movdqa %xmm12,%xmm8 + paddd %xmm15,%xmm11 + punpckldq %xmm7,%xmm9 + movdqa %xmm13,%xmm7 + movdqa %xmm13,%xmm6 + pslld $5,%xmm8 + pandn %xmm10,%xmm7 + pand %xmm14,%xmm6 + punpckldq %xmm9,%xmm4 + movdqa %xmm12,%xmm9 + + movdqa %xmm3,128-128(%rax) + paddd %xmm3,%xmm11 + movd -24(%r8),%xmm0 + psrld $27,%xmm9 + pxor %xmm7,%xmm6 + movdqa %xmm13,%xmm7 + + por %xmm9,%xmm8 + movd -24(%r9),%xmm9 + pslld $30,%xmm7 + paddd %xmm6,%xmm11 + + psrld $2,%xmm13 + paddd %xmm8,%xmm11 +.byte 102,15,56,0,229 + movd -24(%r10),%xmm8 + por %xmm7,%xmm13 + movd -24(%r11),%xmm7 + punpckldq %xmm8,%xmm0 + movdqa %xmm11,%xmm8 + paddd %xmm15,%xmm10 + punpckldq %xmm7,%xmm9 + movdqa %xmm12,%xmm7 + movdqa %xmm12,%xmm6 + pslld $5,%xmm8 + pandn %xmm14,%xmm7 + pand %xmm13,%xmm6 + punpckldq %xmm9,%xmm0 + movdqa %xmm11,%xmm9 + + movdqa %xmm4,144-128(%rax) + paddd %xmm4,%xmm10 + movd -20(%r8),%xmm1 + psrld $27,%xmm9 + pxor %xmm7,%xmm6 + movdqa %xmm12,%xmm7 + + por %xmm9,%xmm8 + movd -20(%r9),%xmm9 + pslld $30,%xmm7 + paddd %xmm6,%xmm10 + + psrld $2,%xmm12 + paddd %xmm8,%xmm10 +.byte 102,15,56,0,197 + movd -20(%r10),%xmm8 + por %xmm7,%xmm12 + movd -20(%r11),%xmm7 + punpckldq %xmm8,%xmm1 + movdqa %xmm10,%xmm8 + paddd %xmm15,%xmm14 + punpckldq %xmm7,%xmm9 + movdqa %xmm11,%xmm7 + movdqa %xmm11,%xmm6 + pslld $5,%xmm8 + pandn %xmm13,%xmm7 + pand %xmm12,%xmm6 + punpckldq %xmm9,%xmm1 + movdqa %xmm10,%xmm9 + + movdqa %xmm0,160-128(%rax) + paddd %xmm0,%xmm14 + movd -16(%r8),%xmm2 + psrld $27,%xmm9 + pxor %xmm7,%xmm6 + movdqa %xmm11,%xmm7 + + por %xmm9,%xmm8 + movd -16(%r9),%xmm9 + pslld $30,%xmm7 + paddd %xmm6,%xmm14 + + psrld $2,%xmm11 + paddd %xmm8,%xmm14 +.byte 102,15,56,0,205 + movd -16(%r10),%xmm8 + por %xmm7,%xmm11 + movd -16(%r11),%xmm7 + punpckldq %xmm8,%xmm2 + movdqa %xmm14,%xmm8 + paddd %xmm15,%xmm13 + punpckldq %xmm7,%xmm9 + movdqa %xmm10,%xmm7 + movdqa %xmm10,%xmm6 + pslld $5,%xmm8 + pandn %xmm12,%xmm7 + pand %xmm11,%xmm6 + punpckldq %xmm9,%xmm2 + movdqa %xmm14,%xmm9 + + movdqa %xmm1,176-128(%rax) + paddd %xmm1,%xmm13 + movd -12(%r8),%xmm3 + psrld $27,%xmm9 + pxor %xmm7,%xmm6 + movdqa %xmm10,%xmm7 + + por %xmm9,%xmm8 + movd -12(%r9),%xmm9 + pslld $30,%xmm7 + paddd %xmm6,%xmm13 + + psrld $2,%xmm10 + paddd %xmm8,%xmm13 +.byte 102,15,56,0,213 + movd -12(%r10),%xmm8 + por %xmm7,%xmm10 + movd -12(%r11),%xmm7 + punpckldq %xmm8,%xmm3 + movdqa %xmm13,%xmm8 + paddd %xmm15,%xmm12 + punpckldq %xmm7,%xmm9 + movdqa %xmm14,%xmm7 + movdqa %xmm14,%xmm6 + pslld $5,%xmm8 + pandn %xmm11,%xmm7 + pand %xmm10,%xmm6 + punpckldq %xmm9,%xmm3 + movdqa %xmm13,%xmm9 + + movdqa %xmm2,192-128(%rax) + paddd %xmm2,%xmm12 + movd -8(%r8),%xmm4 + psrld $27,%xmm9 + pxor %xmm7,%xmm6 + movdqa %xmm14,%xmm7 + + por %xmm9,%xmm8 + movd -8(%r9),%xmm9 + pslld $30,%xmm7 + paddd %xmm6,%xmm12 + + psrld $2,%xmm14 + paddd %xmm8,%xmm12 +.byte 102,15,56,0,221 + movd -8(%r10),%xmm8 + por %xmm7,%xmm14 + movd -8(%r11),%xmm7 + punpckldq %xmm8,%xmm4 + movdqa %xmm12,%xmm8 + paddd %xmm15,%xmm11 + punpckldq %xmm7,%xmm9 + movdqa %xmm13,%xmm7 + movdqa %xmm13,%xmm6 + pslld $5,%xmm8 + pandn %xmm10,%xmm7 + pand %xmm14,%xmm6 + punpckldq %xmm9,%xmm4 + movdqa %xmm12,%xmm9 + + movdqa %xmm3,208-128(%rax) + paddd %xmm3,%xmm11 + movd -4(%r8),%xmm0 + psrld $27,%xmm9 + pxor %xmm7,%xmm6 + movdqa %xmm13,%xmm7 + + por %xmm9,%xmm8 + movd -4(%r9),%xmm9 + pslld $30,%xmm7 + paddd %xmm6,%xmm11 + + psrld $2,%xmm13 + paddd %xmm8,%xmm11 +.byte 102,15,56,0,229 + movd -4(%r10),%xmm8 + por %xmm7,%xmm13 + movdqa 0-128(%rax),%xmm1 + movd -4(%r11),%xmm7 + punpckldq %xmm8,%xmm0 + movdqa %xmm11,%xmm8 + paddd %xmm15,%xmm10 + punpckldq %xmm7,%xmm9 + movdqa %xmm12,%xmm7 + movdqa %xmm12,%xmm6 + pslld $5,%xmm8 + prefetcht0 63(%r8) + pandn %xmm14,%xmm7 + pand %xmm13,%xmm6 + punpckldq %xmm9,%xmm0 + movdqa %xmm11,%xmm9 + + movdqa %xmm4,224-128(%rax) + paddd %xmm4,%xmm10 + psrld $27,%xmm9 + pxor %xmm7,%xmm6 + movdqa %xmm12,%xmm7 + prefetcht0 63(%r9) + + por %xmm9,%xmm8 + pslld $30,%xmm7 + paddd %xmm6,%xmm10 + prefetcht0 63(%r10) + + psrld $2,%xmm12 + paddd %xmm8,%xmm10 +.byte 102,15,56,0,197 + prefetcht0 63(%r11) + por %xmm7,%xmm12 + movdqa 16-128(%rax),%xmm2 + pxor %xmm3,%xmm1 + movdqa 32-128(%rax),%xmm3 + + movdqa %xmm10,%xmm8 + pxor 128-128(%rax),%xmm1 + paddd %xmm15,%xmm14 + movdqa %xmm11,%xmm7 + pslld $5,%xmm8 + pxor %xmm3,%xmm1 + movdqa %xmm11,%xmm6 + pandn %xmm13,%xmm7 + movdqa %xmm1,%xmm5 + pand %xmm12,%xmm6 + movdqa %xmm10,%xmm9 + psrld $31,%xmm5 + paddd %xmm1,%xmm1 + + movdqa %xmm0,240-128(%rax) + paddd %xmm0,%xmm14 + psrld $27,%xmm9 + pxor %xmm7,%xmm6 + + movdqa %xmm11,%xmm7 + por %xmm9,%xmm8 + pslld $30,%xmm7 + paddd %xmm6,%xmm14 + + psrld $2,%xmm11 + paddd %xmm8,%xmm14 + por %xmm5,%xmm1 + por %xmm7,%xmm11 + pxor %xmm4,%xmm2 + movdqa 48-128(%rax),%xmm4 + + movdqa %xmm14,%xmm8 + pxor 144-128(%rax),%xmm2 + paddd %xmm15,%xmm13 + movdqa %xmm10,%xmm7 + pslld $5,%xmm8 + pxor %xmm4,%xmm2 + movdqa %xmm10,%xmm6 + pandn %xmm12,%xmm7 + movdqa %xmm2,%xmm5 + pand %xmm11,%xmm6 + movdqa %xmm14,%xmm9 + psrld $31,%xmm5 + paddd %xmm2,%xmm2 + + movdqa %xmm1,0-128(%rax) + paddd %xmm1,%xmm13 + psrld $27,%xmm9 + pxor %xmm7,%xmm6 + + movdqa %xmm10,%xmm7 + por %xmm9,%xmm8 + pslld $30,%xmm7 + paddd %xmm6,%xmm13 + + psrld $2,%xmm10 + paddd %xmm8,%xmm13 + por %xmm5,%xmm2 + por %xmm7,%xmm10 + pxor %xmm0,%xmm3 + movdqa 64-128(%rax),%xmm0 + + movdqa %xmm13,%xmm8 + pxor 160-128(%rax),%xmm3 + paddd %xmm15,%xmm12 + movdqa %xmm14,%xmm7 + pslld $5,%xmm8 + pxor %xmm0,%xmm3 + movdqa %xmm14,%xmm6 + pandn %xmm11,%xmm7 + movdqa %xmm3,%xmm5 + pand %xmm10,%xmm6 + movdqa %xmm13,%xmm9 + psrld $31,%xmm5 + paddd %xmm3,%xmm3 + + movdqa %xmm2,16-128(%rax) + paddd %xmm2,%xmm12 + psrld $27,%xmm9 + pxor %xmm7,%xmm6 + + movdqa %xmm14,%xmm7 + por %xmm9,%xmm8 + pslld $30,%xmm7 + paddd %xmm6,%xmm12 + + psrld $2,%xmm14 + paddd %xmm8,%xmm12 + por %xmm5,%xmm3 + por %xmm7,%xmm14 + pxor %xmm1,%xmm4 + movdqa 80-128(%rax),%xmm1 + + movdqa %xmm12,%xmm8 + pxor 176-128(%rax),%xmm4 + paddd %xmm15,%xmm11 + movdqa %xmm13,%xmm7 + pslld $5,%xmm8 + pxor %xmm1,%xmm4 + movdqa %xmm13,%xmm6 + pandn %xmm10,%xmm7 + movdqa %xmm4,%xmm5 + pand %xmm14,%xmm6 + movdqa %xmm12,%xmm9 + psrld $31,%xmm5 + paddd %xmm4,%xmm4 + + movdqa %xmm3,32-128(%rax) + paddd %xmm3,%xmm11 + psrld $27,%xmm9 + pxor %xmm7,%xmm6 + + movdqa %xmm13,%xmm7 + por %xmm9,%xmm8 + pslld $30,%xmm7 + paddd %xmm6,%xmm11 + + psrld $2,%xmm13 + paddd %xmm8,%xmm11 + por %xmm5,%xmm4 + por %xmm7,%xmm13 + pxor %xmm2,%xmm0 + movdqa 96-128(%rax),%xmm2 + + movdqa %xmm11,%xmm8 + pxor 192-128(%rax),%xmm0 + paddd %xmm15,%xmm10 + movdqa %xmm12,%xmm7 + pslld $5,%xmm8 + pxor %xmm2,%xmm0 + movdqa %xmm12,%xmm6 + pandn %xmm14,%xmm7 + movdqa %xmm0,%xmm5 + pand %xmm13,%xmm6 + movdqa %xmm11,%xmm9 + psrld $31,%xmm5 + paddd %xmm0,%xmm0 + + movdqa %xmm4,48-128(%rax) + paddd %xmm4,%xmm10 + psrld $27,%xmm9 + pxor %xmm7,%xmm6 + + movdqa %xmm12,%xmm7 + por %xmm9,%xmm8 + pslld $30,%xmm7 + paddd %xmm6,%xmm10 + + psrld $2,%xmm12 + paddd %xmm8,%xmm10 + por %xmm5,%xmm0 + por %xmm7,%xmm12 + movdqa 0(%rbp),%xmm15 + pxor %xmm3,%xmm1 + movdqa 112-128(%rax),%xmm3 + + movdqa %xmm10,%xmm8 + movdqa %xmm13,%xmm6 + pxor 208-128(%rax),%xmm1 + paddd %xmm15,%xmm14 + pslld $5,%xmm8 + pxor %xmm11,%xmm6 + + movdqa %xmm10,%xmm9 + movdqa %xmm0,64-128(%rax) + paddd %xmm0,%xmm14 + pxor %xmm3,%xmm1 + psrld $27,%xmm9 + pxor %xmm12,%xmm6 + movdqa %xmm11,%xmm7 + + pslld $30,%xmm7 + movdqa %xmm1,%xmm5 + por %xmm9,%xmm8 + psrld $31,%xmm5 + paddd %xmm6,%xmm14 + paddd %xmm1,%xmm1 + + psrld $2,%xmm11 + paddd %xmm8,%xmm14 + por %xmm5,%xmm1 + por %xmm7,%xmm11 + pxor %xmm4,%xmm2 + movdqa 128-128(%rax),%xmm4 + + movdqa %xmm14,%xmm8 + movdqa %xmm12,%xmm6 + pxor 224-128(%rax),%xmm2 + paddd %xmm15,%xmm13 + pslld $5,%xmm8 + pxor %xmm10,%xmm6 + + movdqa %xmm14,%xmm9 + movdqa %xmm1,80-128(%rax) + paddd %xmm1,%xmm13 + pxor %xmm4,%xmm2 + psrld $27,%xmm9 + pxor %xmm11,%xmm6 + movdqa %xmm10,%xmm7 + + pslld $30,%xmm7 + movdqa %xmm2,%xmm5 + por %xmm9,%xmm8 + psrld $31,%xmm5 + paddd %xmm6,%xmm13 + paddd %xmm2,%xmm2 + + psrld $2,%xmm10 + paddd %xmm8,%xmm13 + por %xmm5,%xmm2 + por %xmm7,%xmm10 + pxor %xmm0,%xmm3 + movdqa 144-128(%rax),%xmm0 + + movdqa %xmm13,%xmm8 + movdqa %xmm11,%xmm6 + pxor 240-128(%rax),%xmm3 + paddd %xmm15,%xmm12 + pslld $5,%xmm8 + pxor %xmm14,%xmm6 + + movdqa %xmm13,%xmm9 + movdqa %xmm2,96-128(%rax) + paddd %xmm2,%xmm12 + pxor %xmm0,%xmm3 + psrld $27,%xmm9 + pxor %xmm10,%xmm6 + movdqa %xmm14,%xmm7 + + pslld $30,%xmm7 + movdqa %xmm3,%xmm5 + por %xmm9,%xmm8 + psrld $31,%xmm5 + paddd %xmm6,%xmm12 + paddd %xmm3,%xmm3 + + psrld $2,%xmm14 + paddd %xmm8,%xmm12 + por %xmm5,%xmm3 + por %xmm7,%xmm14 + pxor %xmm1,%xmm4 + movdqa 160-128(%rax),%xmm1 + + movdqa %xmm12,%xmm8 + movdqa %xmm10,%xmm6 + pxor 0-128(%rax),%xmm4 + paddd %xmm15,%xmm11 + pslld $5,%xmm8 + pxor %xmm13,%xmm6 + + movdqa %xmm12,%xmm9 + movdqa %xmm3,112-128(%rax) + paddd %xmm3,%xmm11 + pxor %xmm1,%xmm4 + psrld $27,%xmm9 + pxor %xmm14,%xmm6 + movdqa %xmm13,%xmm7 + + pslld $30,%xmm7 + movdqa %xmm4,%xmm5 + por %xmm9,%xmm8 + psrld $31,%xmm5 + paddd %xmm6,%xmm11 + paddd %xmm4,%xmm4 + + psrld $2,%xmm13 + paddd %xmm8,%xmm11 + por %xmm5,%xmm4 + por %xmm7,%xmm13 + pxor %xmm2,%xmm0 + movdqa 176-128(%rax),%xmm2 + + movdqa %xmm11,%xmm8 + movdqa %xmm14,%xmm6 + pxor 16-128(%rax),%xmm0 + paddd %xmm15,%xmm10 + pslld $5,%xmm8 + pxor %xmm12,%xmm6 + + movdqa %xmm11,%xmm9 + movdqa %xmm4,128-128(%rax) + paddd %xmm4,%xmm10 + pxor %xmm2,%xmm0 + psrld $27,%xmm9 + pxor %xmm13,%xmm6 + movdqa %xmm12,%xmm7 + + pslld $30,%xmm7 + movdqa %xmm0,%xmm5 + por %xmm9,%xmm8 + psrld $31,%xmm5 + paddd %xmm6,%xmm10 + paddd %xmm0,%xmm0 + + psrld $2,%xmm12 + paddd %xmm8,%xmm10 + por %xmm5,%xmm0 + por %xmm7,%xmm12 + pxor %xmm3,%xmm1 + movdqa 192-128(%rax),%xmm3 + + movdqa %xmm10,%xmm8 + movdqa %xmm13,%xmm6 + pxor 32-128(%rax),%xmm1 + paddd %xmm15,%xmm14 + pslld $5,%xmm8 + pxor %xmm11,%xmm6 + + movdqa %xmm10,%xmm9 + movdqa %xmm0,144-128(%rax) + paddd %xmm0,%xmm14 + pxor %xmm3,%xmm1 + psrld $27,%xmm9 + pxor %xmm12,%xmm6 + movdqa %xmm11,%xmm7 + + pslld $30,%xmm7 + movdqa %xmm1,%xmm5 + por %xmm9,%xmm8 + psrld $31,%xmm5 + paddd %xmm6,%xmm14 + paddd %xmm1,%xmm1 + + psrld $2,%xmm11 + paddd %xmm8,%xmm14 + por %xmm5,%xmm1 + por %xmm7,%xmm11 + pxor %xmm4,%xmm2 + movdqa 208-128(%rax),%xmm4 + + movdqa %xmm14,%xmm8 + movdqa %xmm12,%xmm6 + pxor 48-128(%rax),%xmm2 + paddd %xmm15,%xmm13 + pslld $5,%xmm8 + pxor %xmm10,%xmm6 + + movdqa %xmm14,%xmm9 + movdqa %xmm1,160-128(%rax) + paddd %xmm1,%xmm13 + pxor %xmm4,%xmm2 + psrld $27,%xmm9 + pxor %xmm11,%xmm6 + movdqa %xmm10,%xmm7 + + pslld $30,%xmm7 + movdqa %xmm2,%xmm5 + por %xmm9,%xmm8 + psrld $31,%xmm5 + paddd %xmm6,%xmm13 + paddd %xmm2,%xmm2 + + psrld $2,%xmm10 + paddd %xmm8,%xmm13 + por %xmm5,%xmm2 + por %xmm7,%xmm10 + pxor %xmm0,%xmm3 + movdqa 224-128(%rax),%xmm0 + + movdqa %xmm13,%xmm8 + movdqa %xmm11,%xmm6 + pxor 64-128(%rax),%xmm3 + paddd %xmm15,%xmm12 + pslld $5,%xmm8 + pxor %xmm14,%xmm6 + + movdqa %xmm13,%xmm9 + movdqa %xmm2,176-128(%rax) + paddd %xmm2,%xmm12 + pxor %xmm0,%xmm3 + psrld $27,%xmm9 + pxor %xmm10,%xmm6 + movdqa %xmm14,%xmm7 + + pslld $30,%xmm7 + movdqa %xmm3,%xmm5 + por %xmm9,%xmm8 + psrld $31,%xmm5 + paddd %xmm6,%xmm12 + paddd %xmm3,%xmm3 + + psrld $2,%xmm14 + paddd %xmm8,%xmm12 + por %xmm5,%xmm3 + por %xmm7,%xmm14 + pxor %xmm1,%xmm4 + movdqa 240-128(%rax),%xmm1 + + movdqa %xmm12,%xmm8 + movdqa %xmm10,%xmm6 + pxor 80-128(%rax),%xmm4 + paddd %xmm15,%xmm11 + pslld $5,%xmm8 + pxor %xmm13,%xmm6 + + movdqa %xmm12,%xmm9 + movdqa %xmm3,192-128(%rax) + paddd %xmm3,%xmm11 + pxor %xmm1,%xmm4 + psrld $27,%xmm9 + pxor %xmm14,%xmm6 + movdqa %xmm13,%xmm7 + + pslld $30,%xmm7 + movdqa %xmm4,%xmm5 + por %xmm9,%xmm8 + psrld $31,%xmm5 + paddd %xmm6,%xmm11 + paddd %xmm4,%xmm4 + + psrld $2,%xmm13 + paddd %xmm8,%xmm11 + por %xmm5,%xmm4 + por %xmm7,%xmm13 + pxor %xmm2,%xmm0 + movdqa 0-128(%rax),%xmm2 + + movdqa %xmm11,%xmm8 + movdqa %xmm14,%xmm6 + pxor 96-128(%rax),%xmm0 + paddd %xmm15,%xmm10 + pslld $5,%xmm8 + pxor %xmm12,%xmm6 + + movdqa %xmm11,%xmm9 + movdqa %xmm4,208-128(%rax) + paddd %xmm4,%xmm10 + pxor %xmm2,%xmm0 + psrld $27,%xmm9 + pxor %xmm13,%xmm6 + movdqa %xmm12,%xmm7 + + pslld $30,%xmm7 + movdqa %xmm0,%xmm5 + por %xmm9,%xmm8 + psrld $31,%xmm5 + paddd %xmm6,%xmm10 + paddd %xmm0,%xmm0 + + psrld $2,%xmm12 + paddd %xmm8,%xmm10 + por %xmm5,%xmm0 + por %xmm7,%xmm12 + pxor %xmm3,%xmm1 + movdqa 16-128(%rax),%xmm3 + + movdqa %xmm10,%xmm8 + movdqa %xmm13,%xmm6 + pxor 112-128(%rax),%xmm1 + paddd %xmm15,%xmm14 + pslld $5,%xmm8 + pxor %xmm11,%xmm6 + + movdqa %xmm10,%xmm9 + movdqa %xmm0,224-128(%rax) + paddd %xmm0,%xmm14 + pxor %xmm3,%xmm1 + psrld $27,%xmm9 + pxor %xmm12,%xmm6 + movdqa %xmm11,%xmm7 + + pslld $30,%xmm7 + movdqa %xmm1,%xmm5 + por %xmm9,%xmm8 + psrld $31,%xmm5 + paddd %xmm6,%xmm14 + paddd %xmm1,%xmm1 + + psrld $2,%xmm11 + paddd %xmm8,%xmm14 + por %xmm5,%xmm1 + por %xmm7,%xmm11 + pxor %xmm4,%xmm2 + movdqa 32-128(%rax),%xmm4 + + movdqa %xmm14,%xmm8 + movdqa %xmm12,%xmm6 + pxor 128-128(%rax),%xmm2 + paddd %xmm15,%xmm13 + pslld $5,%xmm8 + pxor %xmm10,%xmm6 + + movdqa %xmm14,%xmm9 + movdqa %xmm1,240-128(%rax) + paddd %xmm1,%xmm13 + pxor %xmm4,%xmm2 + psrld $27,%xmm9 + pxor %xmm11,%xmm6 + movdqa %xmm10,%xmm7 + + pslld $30,%xmm7 + movdqa %xmm2,%xmm5 + por %xmm9,%xmm8 + psrld $31,%xmm5 + paddd %xmm6,%xmm13 + paddd %xmm2,%xmm2 + + psrld $2,%xmm10 + paddd %xmm8,%xmm13 + por %xmm5,%xmm2 + por %xmm7,%xmm10 + pxor %xmm0,%xmm3 + movdqa 48-128(%rax),%xmm0 + + movdqa %xmm13,%xmm8 + movdqa %xmm11,%xmm6 + pxor 144-128(%rax),%xmm3 + paddd %xmm15,%xmm12 + pslld $5,%xmm8 + pxor %xmm14,%xmm6 + + movdqa %xmm13,%xmm9 + movdqa %xmm2,0-128(%rax) + paddd %xmm2,%xmm12 + pxor %xmm0,%xmm3 + psrld $27,%xmm9 + pxor %xmm10,%xmm6 + movdqa %xmm14,%xmm7 + + pslld $30,%xmm7 + movdqa %xmm3,%xmm5 + por %xmm9,%xmm8 + psrld $31,%xmm5 + paddd %xmm6,%xmm12 + paddd %xmm3,%xmm3 + + psrld $2,%xmm14 + paddd %xmm8,%xmm12 + por %xmm5,%xmm3 + por %xmm7,%xmm14 + pxor %xmm1,%xmm4 + movdqa 64-128(%rax),%xmm1 + + movdqa %xmm12,%xmm8 + movdqa %xmm10,%xmm6 + pxor 160-128(%rax),%xmm4 + paddd %xmm15,%xmm11 + pslld $5,%xmm8 + pxor %xmm13,%xmm6 + + movdqa %xmm12,%xmm9 + movdqa %xmm3,16-128(%rax) + paddd %xmm3,%xmm11 + pxor %xmm1,%xmm4 + psrld $27,%xmm9 + pxor %xmm14,%xmm6 + movdqa %xmm13,%xmm7 + + pslld $30,%xmm7 + movdqa %xmm4,%xmm5 + por %xmm9,%xmm8 + psrld $31,%xmm5 + paddd %xmm6,%xmm11 + paddd %xmm4,%xmm4 + + psrld $2,%xmm13 + paddd %xmm8,%xmm11 + por %xmm5,%xmm4 + por %xmm7,%xmm13 + pxor %xmm2,%xmm0 + movdqa 80-128(%rax),%xmm2 + + movdqa %xmm11,%xmm8 + movdqa %xmm14,%xmm6 + pxor 176-128(%rax),%xmm0 + paddd %xmm15,%xmm10 + pslld $5,%xmm8 + pxor %xmm12,%xmm6 + + movdqa %xmm11,%xmm9 + movdqa %xmm4,32-128(%rax) + paddd %xmm4,%xmm10 + pxor %xmm2,%xmm0 + psrld $27,%xmm9 + pxor %xmm13,%xmm6 + movdqa %xmm12,%xmm7 + + pslld $30,%xmm7 + movdqa %xmm0,%xmm5 + por %xmm9,%xmm8 + psrld $31,%xmm5 + paddd %xmm6,%xmm10 + paddd %xmm0,%xmm0 + + psrld $2,%xmm12 + paddd %xmm8,%xmm10 + por %xmm5,%xmm0 + por %xmm7,%xmm12 + pxor %xmm3,%xmm1 + movdqa 96-128(%rax),%xmm3 + + movdqa %xmm10,%xmm8 + movdqa %xmm13,%xmm6 + pxor 192-128(%rax),%xmm1 + paddd %xmm15,%xmm14 + pslld $5,%xmm8 + pxor %xmm11,%xmm6 + + movdqa %xmm10,%xmm9 + movdqa %xmm0,48-128(%rax) + paddd %xmm0,%xmm14 + pxor %xmm3,%xmm1 + psrld $27,%xmm9 + pxor %xmm12,%xmm6 + movdqa %xmm11,%xmm7 + + pslld $30,%xmm7 + movdqa %xmm1,%xmm5 + por %xmm9,%xmm8 + psrld $31,%xmm5 + paddd %xmm6,%xmm14 + paddd %xmm1,%xmm1 + + psrld $2,%xmm11 + paddd %xmm8,%xmm14 + por %xmm5,%xmm1 + por %xmm7,%xmm11 + pxor %xmm4,%xmm2 + movdqa 112-128(%rax),%xmm4 + + movdqa %xmm14,%xmm8 + movdqa %xmm12,%xmm6 + pxor 208-128(%rax),%xmm2 + paddd %xmm15,%xmm13 + pslld $5,%xmm8 + pxor %xmm10,%xmm6 + + movdqa %xmm14,%xmm9 + movdqa %xmm1,64-128(%rax) + paddd %xmm1,%xmm13 + pxor %xmm4,%xmm2 + psrld $27,%xmm9 + pxor %xmm11,%xmm6 + movdqa %xmm10,%xmm7 + + pslld $30,%xmm7 + movdqa %xmm2,%xmm5 + por %xmm9,%xmm8 + psrld $31,%xmm5 + paddd %xmm6,%xmm13 + paddd %xmm2,%xmm2 + + psrld $2,%xmm10 + paddd %xmm8,%xmm13 + por %xmm5,%xmm2 + por %xmm7,%xmm10 + pxor %xmm0,%xmm3 + movdqa 128-128(%rax),%xmm0 + + movdqa %xmm13,%xmm8 + movdqa %xmm11,%xmm6 + pxor 224-128(%rax),%xmm3 + paddd %xmm15,%xmm12 + pslld $5,%xmm8 + pxor %xmm14,%xmm6 + + movdqa %xmm13,%xmm9 + movdqa %xmm2,80-128(%rax) + paddd %xmm2,%xmm12 + pxor %xmm0,%xmm3 + psrld $27,%xmm9 + pxor %xmm10,%xmm6 + movdqa %xmm14,%xmm7 + + pslld $30,%xmm7 + movdqa %xmm3,%xmm5 + por %xmm9,%xmm8 + psrld $31,%xmm5 + paddd %xmm6,%xmm12 + paddd %xmm3,%xmm3 + + psrld $2,%xmm14 + paddd %xmm8,%xmm12 + por %xmm5,%xmm3 + por %xmm7,%xmm14 + pxor %xmm1,%xmm4 + movdqa 144-128(%rax),%xmm1 + + movdqa %xmm12,%xmm8 + movdqa %xmm10,%xmm6 + pxor 240-128(%rax),%xmm4 + paddd %xmm15,%xmm11 + pslld $5,%xmm8 + pxor %xmm13,%xmm6 + + movdqa %xmm12,%xmm9 + movdqa %xmm3,96-128(%rax) + paddd %xmm3,%xmm11 + pxor %xmm1,%xmm4 + psrld $27,%xmm9 + pxor %xmm14,%xmm6 + movdqa %xmm13,%xmm7 + + pslld $30,%xmm7 + movdqa %xmm4,%xmm5 + por %xmm9,%xmm8 + psrld $31,%xmm5 + paddd %xmm6,%xmm11 + paddd %xmm4,%xmm4 + + psrld $2,%xmm13 + paddd %xmm8,%xmm11 + por %xmm5,%xmm4 + por %xmm7,%xmm13 + pxor %xmm2,%xmm0 + movdqa 160-128(%rax),%xmm2 + + movdqa %xmm11,%xmm8 + movdqa %xmm14,%xmm6 + pxor 0-128(%rax),%xmm0 + paddd %xmm15,%xmm10 + pslld $5,%xmm8 + pxor %xmm12,%xmm6 + + movdqa %xmm11,%xmm9 + movdqa %xmm4,112-128(%rax) + paddd %xmm4,%xmm10 + pxor %xmm2,%xmm0 + psrld $27,%xmm9 + pxor %xmm13,%xmm6 + movdqa %xmm12,%xmm7 + + pslld $30,%xmm7 + movdqa %xmm0,%xmm5 + por %xmm9,%xmm8 + psrld $31,%xmm5 + paddd %xmm6,%xmm10 + paddd %xmm0,%xmm0 + + psrld $2,%xmm12 + paddd %xmm8,%xmm10 + por %xmm5,%xmm0 + por %xmm7,%xmm12 + movdqa 32(%rbp),%xmm15 + pxor %xmm3,%xmm1 + movdqa 176-128(%rax),%xmm3 + + movdqa %xmm10,%xmm8 + movdqa %xmm13,%xmm7 + pxor 16-128(%rax),%xmm1 + pxor %xmm3,%xmm1 + paddd %xmm15,%xmm14 + pslld $5,%xmm8 + movdqa %xmm10,%xmm9 + pand %xmm12,%xmm7 + + movdqa %xmm13,%xmm6 + movdqa %xmm1,%xmm5 + psrld $27,%xmm9 + paddd %xmm7,%xmm14 + pxor %xmm12,%xmm6 + + movdqa %xmm0,128-128(%rax) + paddd %xmm0,%xmm14 + por %xmm9,%xmm8 + psrld $31,%xmm5 + pand %xmm11,%xmm6 + movdqa %xmm11,%xmm7 + + pslld $30,%xmm7 + paddd %xmm1,%xmm1 + paddd %xmm6,%xmm14 + + psrld $2,%xmm11 + paddd %xmm8,%xmm14 + por %xmm5,%xmm1 + por %xmm7,%xmm11 + pxor %xmm4,%xmm2 + movdqa 192-128(%rax),%xmm4 + + movdqa %xmm14,%xmm8 + movdqa %xmm12,%xmm7 + pxor 32-128(%rax),%xmm2 + pxor %xmm4,%xmm2 + paddd %xmm15,%xmm13 + pslld $5,%xmm8 + movdqa %xmm14,%xmm9 + pand %xmm11,%xmm7 + + movdqa %xmm12,%xmm6 + movdqa %xmm2,%xmm5 + psrld $27,%xmm9 + paddd %xmm7,%xmm13 + pxor %xmm11,%xmm6 + + movdqa %xmm1,144-128(%rax) + paddd %xmm1,%xmm13 + por %xmm9,%xmm8 + psrld $31,%xmm5 + pand %xmm10,%xmm6 + movdqa %xmm10,%xmm7 + + pslld $30,%xmm7 + paddd %xmm2,%xmm2 + paddd %xmm6,%xmm13 + + psrld $2,%xmm10 + paddd %xmm8,%xmm13 + por %xmm5,%xmm2 + por %xmm7,%xmm10 + pxor %xmm0,%xmm3 + movdqa 208-128(%rax),%xmm0 + + movdqa %xmm13,%xmm8 + movdqa %xmm11,%xmm7 + pxor 48-128(%rax),%xmm3 + pxor %xmm0,%xmm3 + paddd %xmm15,%xmm12 + pslld $5,%xmm8 + movdqa %xmm13,%xmm9 + pand %xmm10,%xmm7 + + movdqa %xmm11,%xmm6 + movdqa %xmm3,%xmm5 + psrld $27,%xmm9 + paddd %xmm7,%xmm12 + pxor %xmm10,%xmm6 + + movdqa %xmm2,160-128(%rax) + paddd %xmm2,%xmm12 + por %xmm9,%xmm8 + psrld $31,%xmm5 + pand %xmm14,%xmm6 + movdqa %xmm14,%xmm7 + + pslld $30,%xmm7 + paddd %xmm3,%xmm3 + paddd %xmm6,%xmm12 + + psrld $2,%xmm14 + paddd %xmm8,%xmm12 + por %xmm5,%xmm3 + por %xmm7,%xmm14 + pxor %xmm1,%xmm4 + movdqa 224-128(%rax),%xmm1 + + movdqa %xmm12,%xmm8 + movdqa %xmm10,%xmm7 + pxor 64-128(%rax),%xmm4 + pxor %xmm1,%xmm4 + paddd %xmm15,%xmm11 + pslld $5,%xmm8 + movdqa %xmm12,%xmm9 + pand %xmm14,%xmm7 + + movdqa %xmm10,%xmm6 + movdqa %xmm4,%xmm5 + psrld $27,%xmm9 + paddd %xmm7,%xmm11 + pxor %xmm14,%xmm6 + + movdqa %xmm3,176-128(%rax) + paddd %xmm3,%xmm11 + por %xmm9,%xmm8 + psrld $31,%xmm5 + pand %xmm13,%xmm6 + movdqa %xmm13,%xmm7 + + pslld $30,%xmm7 + paddd %xmm4,%xmm4 + paddd %xmm6,%xmm11 + + psrld $2,%xmm13 + paddd %xmm8,%xmm11 + por %xmm5,%xmm4 + por %xmm7,%xmm13 + pxor %xmm2,%xmm0 + movdqa 240-128(%rax),%xmm2 + + movdqa %xmm11,%xmm8 + movdqa %xmm14,%xmm7 + pxor 80-128(%rax),%xmm0 + pxor %xmm2,%xmm0 + paddd %xmm15,%xmm10 + pslld $5,%xmm8 + movdqa %xmm11,%xmm9 + pand %xmm13,%xmm7 + + movdqa %xmm14,%xmm6 + movdqa %xmm0,%xmm5 + psrld $27,%xmm9 + paddd %xmm7,%xmm10 + pxor %xmm13,%xmm6 + + movdqa %xmm4,192-128(%rax) + paddd %xmm4,%xmm10 + por %xmm9,%xmm8 + psrld $31,%xmm5 + pand %xmm12,%xmm6 + movdqa %xmm12,%xmm7 + + pslld $30,%xmm7 + paddd %xmm0,%xmm0 + paddd %xmm6,%xmm10 + + psrld $2,%xmm12 + paddd %xmm8,%xmm10 + por %xmm5,%xmm0 + por %xmm7,%xmm12 + pxor %xmm3,%xmm1 + movdqa 0-128(%rax),%xmm3 + + movdqa %xmm10,%xmm8 + movdqa %xmm13,%xmm7 + pxor 96-128(%rax),%xmm1 + pxor %xmm3,%xmm1 + paddd %xmm15,%xmm14 + pslld $5,%xmm8 + movdqa %xmm10,%xmm9 + pand %xmm12,%xmm7 + + movdqa %xmm13,%xmm6 + movdqa %xmm1,%xmm5 + psrld $27,%xmm9 + paddd %xmm7,%xmm14 + pxor %xmm12,%xmm6 + + movdqa %xmm0,208-128(%rax) + paddd %xmm0,%xmm14 + por %xmm9,%xmm8 + psrld $31,%xmm5 + pand %xmm11,%xmm6 + movdqa %xmm11,%xmm7 + + pslld $30,%xmm7 + paddd %xmm1,%xmm1 + paddd %xmm6,%xmm14 + + psrld $2,%xmm11 + paddd %xmm8,%xmm14 + por %xmm5,%xmm1 + por %xmm7,%xmm11 + pxor %xmm4,%xmm2 + movdqa 16-128(%rax),%xmm4 + + movdqa %xmm14,%xmm8 + movdqa %xmm12,%xmm7 + pxor 112-128(%rax),%xmm2 + pxor %xmm4,%xmm2 + paddd %xmm15,%xmm13 + pslld $5,%xmm8 + movdqa %xmm14,%xmm9 + pand %xmm11,%xmm7 + + movdqa %xmm12,%xmm6 + movdqa %xmm2,%xmm5 + psrld $27,%xmm9 + paddd %xmm7,%xmm13 + pxor %xmm11,%xmm6 + + movdqa %xmm1,224-128(%rax) + paddd %xmm1,%xmm13 + por %xmm9,%xmm8 + psrld $31,%xmm5 + pand %xmm10,%xmm6 + movdqa %xmm10,%xmm7 + + pslld $30,%xmm7 + paddd %xmm2,%xmm2 + paddd %xmm6,%xmm13 + + psrld $2,%xmm10 + paddd %xmm8,%xmm13 + por %xmm5,%xmm2 + por %xmm7,%xmm10 + pxor %xmm0,%xmm3 + movdqa 32-128(%rax),%xmm0 + + movdqa %xmm13,%xmm8 + movdqa %xmm11,%xmm7 + pxor 128-128(%rax),%xmm3 + pxor %xmm0,%xmm3 + paddd %xmm15,%xmm12 + pslld $5,%xmm8 + movdqa %xmm13,%xmm9 + pand %xmm10,%xmm7 + + movdqa %xmm11,%xmm6 + movdqa %xmm3,%xmm5 + psrld $27,%xmm9 + paddd %xmm7,%xmm12 + pxor %xmm10,%xmm6 + + movdqa %xmm2,240-128(%rax) + paddd %xmm2,%xmm12 + por %xmm9,%xmm8 + psrld $31,%xmm5 + pand %xmm14,%xmm6 + movdqa %xmm14,%xmm7 + + pslld $30,%xmm7 + paddd %xmm3,%xmm3 + paddd %xmm6,%xmm12 + + psrld $2,%xmm14 + paddd %xmm8,%xmm12 + por %xmm5,%xmm3 + por %xmm7,%xmm14 + pxor %xmm1,%xmm4 + movdqa 48-128(%rax),%xmm1 + + movdqa %xmm12,%xmm8 + movdqa %xmm10,%xmm7 + pxor 144-128(%rax),%xmm4 + pxor %xmm1,%xmm4 + paddd %xmm15,%xmm11 + pslld $5,%xmm8 + movdqa %xmm12,%xmm9 + pand %xmm14,%xmm7 + + movdqa %xmm10,%xmm6 + movdqa %xmm4,%xmm5 + psrld $27,%xmm9 + paddd %xmm7,%xmm11 + pxor %xmm14,%xmm6 + + movdqa %xmm3,0-128(%rax) + paddd %xmm3,%xmm11 + por %xmm9,%xmm8 + psrld $31,%xmm5 + pand %xmm13,%xmm6 + movdqa %xmm13,%xmm7 + + pslld $30,%xmm7 + paddd %xmm4,%xmm4 + paddd %xmm6,%xmm11 + + psrld $2,%xmm13 + paddd %xmm8,%xmm11 + por %xmm5,%xmm4 + por %xmm7,%xmm13 + pxor %xmm2,%xmm0 + movdqa 64-128(%rax),%xmm2 + + movdqa %xmm11,%xmm8 + movdqa %xmm14,%xmm7 + pxor 160-128(%rax),%xmm0 + pxor %xmm2,%xmm0 + paddd %xmm15,%xmm10 + pslld $5,%xmm8 + movdqa %xmm11,%xmm9 + pand %xmm13,%xmm7 + + movdqa %xmm14,%xmm6 + movdqa %xmm0,%xmm5 + psrld $27,%xmm9 + paddd %xmm7,%xmm10 + pxor %xmm13,%xmm6 + + movdqa %xmm4,16-128(%rax) + paddd %xmm4,%xmm10 + por %xmm9,%xmm8 + psrld $31,%xmm5 + pand %xmm12,%xmm6 + movdqa %xmm12,%xmm7 + + pslld $30,%xmm7 + paddd %xmm0,%xmm0 + paddd %xmm6,%xmm10 + + psrld $2,%xmm12 + paddd %xmm8,%xmm10 + por %xmm5,%xmm0 + por %xmm7,%xmm12 + pxor %xmm3,%xmm1 + movdqa 80-128(%rax),%xmm3 + + movdqa %xmm10,%xmm8 + movdqa %xmm13,%xmm7 + pxor 176-128(%rax),%xmm1 + pxor %xmm3,%xmm1 + paddd %xmm15,%xmm14 + pslld $5,%xmm8 + movdqa %xmm10,%xmm9 + pand %xmm12,%xmm7 + + movdqa %xmm13,%xmm6 + movdqa %xmm1,%xmm5 + psrld $27,%xmm9 + paddd %xmm7,%xmm14 + pxor %xmm12,%xmm6 + + movdqa %xmm0,32-128(%rax) + paddd %xmm0,%xmm14 + por %xmm9,%xmm8 + psrld $31,%xmm5 + pand %xmm11,%xmm6 + movdqa %xmm11,%xmm7 + + pslld $30,%xmm7 + paddd %xmm1,%xmm1 + paddd %xmm6,%xmm14 + + psrld $2,%xmm11 + paddd %xmm8,%xmm14 + por %xmm5,%xmm1 + por %xmm7,%xmm11 + pxor %xmm4,%xmm2 + movdqa 96-128(%rax),%xmm4 + + movdqa %xmm14,%xmm8 + movdqa %xmm12,%xmm7 + pxor 192-128(%rax),%xmm2 + pxor %xmm4,%xmm2 + paddd %xmm15,%xmm13 + pslld $5,%xmm8 + movdqa %xmm14,%xmm9 + pand %xmm11,%xmm7 + + movdqa %xmm12,%xmm6 + movdqa %xmm2,%xmm5 + psrld $27,%xmm9 + paddd %xmm7,%xmm13 + pxor %xmm11,%xmm6 + + movdqa %xmm1,48-128(%rax) + paddd %xmm1,%xmm13 + por %xmm9,%xmm8 + psrld $31,%xmm5 + pand %xmm10,%xmm6 + movdqa %xmm10,%xmm7 + + pslld $30,%xmm7 + paddd %xmm2,%xmm2 + paddd %xmm6,%xmm13 + + psrld $2,%xmm10 + paddd %xmm8,%xmm13 + por %xmm5,%xmm2 + por %xmm7,%xmm10 + pxor %xmm0,%xmm3 + movdqa 112-128(%rax),%xmm0 + + movdqa %xmm13,%xmm8 + movdqa %xmm11,%xmm7 + pxor 208-128(%rax),%xmm3 + pxor %xmm0,%xmm3 + paddd %xmm15,%xmm12 + pslld $5,%xmm8 + movdqa %xmm13,%xmm9 + pand %xmm10,%xmm7 + + movdqa %xmm11,%xmm6 + movdqa %xmm3,%xmm5 + psrld $27,%xmm9 + paddd %xmm7,%xmm12 + pxor %xmm10,%xmm6 + + movdqa %xmm2,64-128(%rax) + paddd %xmm2,%xmm12 + por %xmm9,%xmm8 + psrld $31,%xmm5 + pand %xmm14,%xmm6 + movdqa %xmm14,%xmm7 + + pslld $30,%xmm7 + paddd %xmm3,%xmm3 + paddd %xmm6,%xmm12 + + psrld $2,%xmm14 + paddd %xmm8,%xmm12 + por %xmm5,%xmm3 + por %xmm7,%xmm14 + pxor %xmm1,%xmm4 + movdqa 128-128(%rax),%xmm1 + + movdqa %xmm12,%xmm8 + movdqa %xmm10,%xmm7 + pxor 224-128(%rax),%xmm4 + pxor %xmm1,%xmm4 + paddd %xmm15,%xmm11 + pslld $5,%xmm8 + movdqa %xmm12,%xmm9 + pand %xmm14,%xmm7 + + movdqa %xmm10,%xmm6 + movdqa %xmm4,%xmm5 + psrld $27,%xmm9 + paddd %xmm7,%xmm11 + pxor %xmm14,%xmm6 + + movdqa %xmm3,80-128(%rax) + paddd %xmm3,%xmm11 + por %xmm9,%xmm8 + psrld $31,%xmm5 + pand %xmm13,%xmm6 + movdqa %xmm13,%xmm7 + + pslld $30,%xmm7 + paddd %xmm4,%xmm4 + paddd %xmm6,%xmm11 + + psrld $2,%xmm13 + paddd %xmm8,%xmm11 + por %xmm5,%xmm4 + por %xmm7,%xmm13 + pxor %xmm2,%xmm0 + movdqa 144-128(%rax),%xmm2 + + movdqa %xmm11,%xmm8 + movdqa %xmm14,%xmm7 + pxor 240-128(%rax),%xmm0 + pxor %xmm2,%xmm0 + paddd %xmm15,%xmm10 + pslld $5,%xmm8 + movdqa %xmm11,%xmm9 + pand %xmm13,%xmm7 + + movdqa %xmm14,%xmm6 + movdqa %xmm0,%xmm5 + psrld $27,%xmm9 + paddd %xmm7,%xmm10 + pxor %xmm13,%xmm6 + + movdqa %xmm4,96-128(%rax) + paddd %xmm4,%xmm10 + por %xmm9,%xmm8 + psrld $31,%xmm5 + pand %xmm12,%xmm6 + movdqa %xmm12,%xmm7 + + pslld $30,%xmm7 + paddd %xmm0,%xmm0 + paddd %xmm6,%xmm10 + + psrld $2,%xmm12 + paddd %xmm8,%xmm10 + por %xmm5,%xmm0 + por %xmm7,%xmm12 + pxor %xmm3,%xmm1 + movdqa 160-128(%rax),%xmm3 + + movdqa %xmm10,%xmm8 + movdqa %xmm13,%xmm7 + pxor 0-128(%rax),%xmm1 + pxor %xmm3,%xmm1 + paddd %xmm15,%xmm14 + pslld $5,%xmm8 + movdqa %xmm10,%xmm9 + pand %xmm12,%xmm7 + + movdqa %xmm13,%xmm6 + movdqa %xmm1,%xmm5 + psrld $27,%xmm9 + paddd %xmm7,%xmm14 + pxor %xmm12,%xmm6 + + movdqa %xmm0,112-128(%rax) + paddd %xmm0,%xmm14 + por %xmm9,%xmm8 + psrld $31,%xmm5 + pand %xmm11,%xmm6 + movdqa %xmm11,%xmm7 + + pslld $30,%xmm7 + paddd %xmm1,%xmm1 + paddd %xmm6,%xmm14 + + psrld $2,%xmm11 + paddd %xmm8,%xmm14 + por %xmm5,%xmm1 + por %xmm7,%xmm11 + pxor %xmm4,%xmm2 + movdqa 176-128(%rax),%xmm4 + + movdqa %xmm14,%xmm8 + movdqa %xmm12,%xmm7 + pxor 16-128(%rax),%xmm2 + pxor %xmm4,%xmm2 + paddd %xmm15,%xmm13 + pslld $5,%xmm8 + movdqa %xmm14,%xmm9 + pand %xmm11,%xmm7 + + movdqa %xmm12,%xmm6 + movdqa %xmm2,%xmm5 + psrld $27,%xmm9 + paddd %xmm7,%xmm13 + pxor %xmm11,%xmm6 + + movdqa %xmm1,128-128(%rax) + paddd %xmm1,%xmm13 + por %xmm9,%xmm8 + psrld $31,%xmm5 + pand %xmm10,%xmm6 + movdqa %xmm10,%xmm7 + + pslld $30,%xmm7 + paddd %xmm2,%xmm2 + paddd %xmm6,%xmm13 + + psrld $2,%xmm10 + paddd %xmm8,%xmm13 + por %xmm5,%xmm2 + por %xmm7,%xmm10 + pxor %xmm0,%xmm3 + movdqa 192-128(%rax),%xmm0 + + movdqa %xmm13,%xmm8 + movdqa %xmm11,%xmm7 + pxor 32-128(%rax),%xmm3 + pxor %xmm0,%xmm3 + paddd %xmm15,%xmm12 + pslld $5,%xmm8 + movdqa %xmm13,%xmm9 + pand %xmm10,%xmm7 + + movdqa %xmm11,%xmm6 + movdqa %xmm3,%xmm5 + psrld $27,%xmm9 + paddd %xmm7,%xmm12 + pxor %xmm10,%xmm6 + + movdqa %xmm2,144-128(%rax) + paddd %xmm2,%xmm12 + por %xmm9,%xmm8 + psrld $31,%xmm5 + pand %xmm14,%xmm6 + movdqa %xmm14,%xmm7 + + pslld $30,%xmm7 + paddd %xmm3,%xmm3 + paddd %xmm6,%xmm12 + + psrld $2,%xmm14 + paddd %xmm8,%xmm12 + por %xmm5,%xmm3 + por %xmm7,%xmm14 + pxor %xmm1,%xmm4 + movdqa 208-128(%rax),%xmm1 + + movdqa %xmm12,%xmm8 + movdqa %xmm10,%xmm7 + pxor 48-128(%rax),%xmm4 + pxor %xmm1,%xmm4 + paddd %xmm15,%xmm11 + pslld $5,%xmm8 + movdqa %xmm12,%xmm9 + pand %xmm14,%xmm7 + + movdqa %xmm10,%xmm6 + movdqa %xmm4,%xmm5 + psrld $27,%xmm9 + paddd %xmm7,%xmm11 + pxor %xmm14,%xmm6 + + movdqa %xmm3,160-128(%rax) + paddd %xmm3,%xmm11 + por %xmm9,%xmm8 + psrld $31,%xmm5 + pand %xmm13,%xmm6 + movdqa %xmm13,%xmm7 + + pslld $30,%xmm7 + paddd %xmm4,%xmm4 + paddd %xmm6,%xmm11 + + psrld $2,%xmm13 + paddd %xmm8,%xmm11 + por %xmm5,%xmm4 + por %xmm7,%xmm13 + pxor %xmm2,%xmm0 + movdqa 224-128(%rax),%xmm2 + + movdqa %xmm11,%xmm8 + movdqa %xmm14,%xmm7 + pxor 64-128(%rax),%xmm0 + pxor %xmm2,%xmm0 + paddd %xmm15,%xmm10 + pslld $5,%xmm8 + movdqa %xmm11,%xmm9 + pand %xmm13,%xmm7 + + movdqa %xmm14,%xmm6 + movdqa %xmm0,%xmm5 + psrld $27,%xmm9 + paddd %xmm7,%xmm10 + pxor %xmm13,%xmm6 + + movdqa %xmm4,176-128(%rax) + paddd %xmm4,%xmm10 + por %xmm9,%xmm8 + psrld $31,%xmm5 + pand %xmm12,%xmm6 + movdqa %xmm12,%xmm7 + + pslld $30,%xmm7 + paddd %xmm0,%xmm0 + paddd %xmm6,%xmm10 + + psrld $2,%xmm12 + paddd %xmm8,%xmm10 + por %xmm5,%xmm0 + por %xmm7,%xmm12 + movdqa 64(%rbp),%xmm15 + pxor %xmm3,%xmm1 + movdqa 240-128(%rax),%xmm3 + + movdqa %xmm10,%xmm8 + movdqa %xmm13,%xmm6 + pxor 80-128(%rax),%xmm1 + paddd %xmm15,%xmm14 + pslld $5,%xmm8 + pxor %xmm11,%xmm6 + + movdqa %xmm10,%xmm9 + movdqa %xmm0,192-128(%rax) + paddd %xmm0,%xmm14 + pxor %xmm3,%xmm1 + psrld $27,%xmm9 + pxor %xmm12,%xmm6 + movdqa %xmm11,%xmm7 + + pslld $30,%xmm7 + movdqa %xmm1,%xmm5 + por %xmm9,%xmm8 + psrld $31,%xmm5 + paddd %xmm6,%xmm14 + paddd %xmm1,%xmm1 + + psrld $2,%xmm11 + paddd %xmm8,%xmm14 + por %xmm5,%xmm1 + por %xmm7,%xmm11 + pxor %xmm4,%xmm2 + movdqa 0-128(%rax),%xmm4 + + movdqa %xmm14,%xmm8 + movdqa %xmm12,%xmm6 + pxor 96-128(%rax),%xmm2 + paddd %xmm15,%xmm13 + pslld $5,%xmm8 + pxor %xmm10,%xmm6 + + movdqa %xmm14,%xmm9 + movdqa %xmm1,208-128(%rax) + paddd %xmm1,%xmm13 + pxor %xmm4,%xmm2 + psrld $27,%xmm9 + pxor %xmm11,%xmm6 + movdqa %xmm10,%xmm7 + + pslld $30,%xmm7 + movdqa %xmm2,%xmm5 + por %xmm9,%xmm8 + psrld $31,%xmm5 + paddd %xmm6,%xmm13 + paddd %xmm2,%xmm2 + + psrld $2,%xmm10 + paddd %xmm8,%xmm13 + por %xmm5,%xmm2 + por %xmm7,%xmm10 + pxor %xmm0,%xmm3 + movdqa 16-128(%rax),%xmm0 + + movdqa %xmm13,%xmm8 + movdqa %xmm11,%xmm6 + pxor 112-128(%rax),%xmm3 + paddd %xmm15,%xmm12 + pslld $5,%xmm8 + pxor %xmm14,%xmm6 + + movdqa %xmm13,%xmm9 + movdqa %xmm2,224-128(%rax) + paddd %xmm2,%xmm12 + pxor %xmm0,%xmm3 + psrld $27,%xmm9 + pxor %xmm10,%xmm6 + movdqa %xmm14,%xmm7 + + pslld $30,%xmm7 + movdqa %xmm3,%xmm5 + por %xmm9,%xmm8 + psrld $31,%xmm5 + paddd %xmm6,%xmm12 + paddd %xmm3,%xmm3 + + psrld $2,%xmm14 + paddd %xmm8,%xmm12 + por %xmm5,%xmm3 + por %xmm7,%xmm14 + pxor %xmm1,%xmm4 + movdqa 32-128(%rax),%xmm1 + + movdqa %xmm12,%xmm8 + movdqa %xmm10,%xmm6 + pxor 128-128(%rax),%xmm4 + paddd %xmm15,%xmm11 + pslld $5,%xmm8 + pxor %xmm13,%xmm6 + + movdqa %xmm12,%xmm9 + movdqa %xmm3,240-128(%rax) + paddd %xmm3,%xmm11 + pxor %xmm1,%xmm4 + psrld $27,%xmm9 + pxor %xmm14,%xmm6 + movdqa %xmm13,%xmm7 + + pslld $30,%xmm7 + movdqa %xmm4,%xmm5 + por %xmm9,%xmm8 + psrld $31,%xmm5 + paddd %xmm6,%xmm11 + paddd %xmm4,%xmm4 + + psrld $2,%xmm13 + paddd %xmm8,%xmm11 + por %xmm5,%xmm4 + por %xmm7,%xmm13 + pxor %xmm2,%xmm0 + movdqa 48-128(%rax),%xmm2 + + movdqa %xmm11,%xmm8 + movdqa %xmm14,%xmm6 + pxor 144-128(%rax),%xmm0 + paddd %xmm15,%xmm10 + pslld $5,%xmm8 + pxor %xmm12,%xmm6 + + movdqa %xmm11,%xmm9 + movdqa %xmm4,0-128(%rax) + paddd %xmm4,%xmm10 + pxor %xmm2,%xmm0 + psrld $27,%xmm9 + pxor %xmm13,%xmm6 + movdqa %xmm12,%xmm7 + + pslld $30,%xmm7 + movdqa %xmm0,%xmm5 + por %xmm9,%xmm8 + psrld $31,%xmm5 + paddd %xmm6,%xmm10 + paddd %xmm0,%xmm0 + + psrld $2,%xmm12 + paddd %xmm8,%xmm10 + por %xmm5,%xmm0 + por %xmm7,%xmm12 + pxor %xmm3,%xmm1 + movdqa 64-128(%rax),%xmm3 + + movdqa %xmm10,%xmm8 + movdqa %xmm13,%xmm6 + pxor 160-128(%rax),%xmm1 + paddd %xmm15,%xmm14 + pslld $5,%xmm8 + pxor %xmm11,%xmm6 + + movdqa %xmm10,%xmm9 + movdqa %xmm0,16-128(%rax) + paddd %xmm0,%xmm14 + pxor %xmm3,%xmm1 + psrld $27,%xmm9 + pxor %xmm12,%xmm6 + movdqa %xmm11,%xmm7 + + pslld $30,%xmm7 + movdqa %xmm1,%xmm5 + por %xmm9,%xmm8 + psrld $31,%xmm5 + paddd %xmm6,%xmm14 + paddd %xmm1,%xmm1 + + psrld $2,%xmm11 + paddd %xmm8,%xmm14 + por %xmm5,%xmm1 + por %xmm7,%xmm11 + pxor %xmm4,%xmm2 + movdqa 80-128(%rax),%xmm4 + + movdqa %xmm14,%xmm8 + movdqa %xmm12,%xmm6 + pxor 176-128(%rax),%xmm2 + paddd %xmm15,%xmm13 + pslld $5,%xmm8 + pxor %xmm10,%xmm6 + + movdqa %xmm14,%xmm9 + movdqa %xmm1,32-128(%rax) + paddd %xmm1,%xmm13 + pxor %xmm4,%xmm2 + psrld $27,%xmm9 + pxor %xmm11,%xmm6 + movdqa %xmm10,%xmm7 + + pslld $30,%xmm7 + movdqa %xmm2,%xmm5 + por %xmm9,%xmm8 + psrld $31,%xmm5 + paddd %xmm6,%xmm13 + paddd %xmm2,%xmm2 + + psrld $2,%xmm10 + paddd %xmm8,%xmm13 + por %xmm5,%xmm2 + por %xmm7,%xmm10 + pxor %xmm0,%xmm3 + movdqa 96-128(%rax),%xmm0 + + movdqa %xmm13,%xmm8 + movdqa %xmm11,%xmm6 + pxor 192-128(%rax),%xmm3 + paddd %xmm15,%xmm12 + pslld $5,%xmm8 + pxor %xmm14,%xmm6 + + movdqa %xmm13,%xmm9 + movdqa %xmm2,48-128(%rax) + paddd %xmm2,%xmm12 + pxor %xmm0,%xmm3 + psrld $27,%xmm9 + pxor %xmm10,%xmm6 + movdqa %xmm14,%xmm7 + + pslld $30,%xmm7 + movdqa %xmm3,%xmm5 + por %xmm9,%xmm8 + psrld $31,%xmm5 + paddd %xmm6,%xmm12 + paddd %xmm3,%xmm3 + + psrld $2,%xmm14 + paddd %xmm8,%xmm12 + por %xmm5,%xmm3 + por %xmm7,%xmm14 + pxor %xmm1,%xmm4 + movdqa 112-128(%rax),%xmm1 + + movdqa %xmm12,%xmm8 + movdqa %xmm10,%xmm6 + pxor 208-128(%rax),%xmm4 + paddd %xmm15,%xmm11 + pslld $5,%xmm8 + pxor %xmm13,%xmm6 + + movdqa %xmm12,%xmm9 + movdqa %xmm3,64-128(%rax) + paddd %xmm3,%xmm11 + pxor %xmm1,%xmm4 + psrld $27,%xmm9 + pxor %xmm14,%xmm6 + movdqa %xmm13,%xmm7 + + pslld $30,%xmm7 + movdqa %xmm4,%xmm5 + por %xmm9,%xmm8 + psrld $31,%xmm5 + paddd %xmm6,%xmm11 + paddd %xmm4,%xmm4 + + psrld $2,%xmm13 + paddd %xmm8,%xmm11 + por %xmm5,%xmm4 + por %xmm7,%xmm13 + pxor %xmm2,%xmm0 + movdqa 128-128(%rax),%xmm2 + + movdqa %xmm11,%xmm8 + movdqa %xmm14,%xmm6 + pxor 224-128(%rax),%xmm0 + paddd %xmm15,%xmm10 + pslld $5,%xmm8 + pxor %xmm12,%xmm6 + + movdqa %xmm11,%xmm9 + movdqa %xmm4,80-128(%rax) + paddd %xmm4,%xmm10 + pxor %xmm2,%xmm0 + psrld $27,%xmm9 + pxor %xmm13,%xmm6 + movdqa %xmm12,%xmm7 + + pslld $30,%xmm7 + movdqa %xmm0,%xmm5 + por %xmm9,%xmm8 + psrld $31,%xmm5 + paddd %xmm6,%xmm10 + paddd %xmm0,%xmm0 + + psrld $2,%xmm12 + paddd %xmm8,%xmm10 + por %xmm5,%xmm0 + por %xmm7,%xmm12 + pxor %xmm3,%xmm1 + movdqa 144-128(%rax),%xmm3 + + movdqa %xmm10,%xmm8 + movdqa %xmm13,%xmm6 + pxor 240-128(%rax),%xmm1 + paddd %xmm15,%xmm14 + pslld $5,%xmm8 + pxor %xmm11,%xmm6 + + movdqa %xmm10,%xmm9 + movdqa %xmm0,96-128(%rax) + paddd %xmm0,%xmm14 + pxor %xmm3,%xmm1 + psrld $27,%xmm9 + pxor %xmm12,%xmm6 + movdqa %xmm11,%xmm7 + + pslld $30,%xmm7 + movdqa %xmm1,%xmm5 + por %xmm9,%xmm8 + psrld $31,%xmm5 + paddd %xmm6,%xmm14 + paddd %xmm1,%xmm1 + + psrld $2,%xmm11 + paddd %xmm8,%xmm14 + por %xmm5,%xmm1 + por %xmm7,%xmm11 + pxor %xmm4,%xmm2 + movdqa 160-128(%rax),%xmm4 + + movdqa %xmm14,%xmm8 + movdqa %xmm12,%xmm6 + pxor 0-128(%rax),%xmm2 + paddd %xmm15,%xmm13 + pslld $5,%xmm8 + pxor %xmm10,%xmm6 + + movdqa %xmm14,%xmm9 + movdqa %xmm1,112-128(%rax) + paddd %xmm1,%xmm13 + pxor %xmm4,%xmm2 + psrld $27,%xmm9 + pxor %xmm11,%xmm6 + movdqa %xmm10,%xmm7 + + pslld $30,%xmm7 + movdqa %xmm2,%xmm5 + por %xmm9,%xmm8 + psrld $31,%xmm5 + paddd %xmm6,%xmm13 + paddd %xmm2,%xmm2 + + psrld $2,%xmm10 + paddd %xmm8,%xmm13 + por %xmm5,%xmm2 + por %xmm7,%xmm10 + pxor %xmm0,%xmm3 + movdqa 176-128(%rax),%xmm0 + + movdqa %xmm13,%xmm8 + movdqa %xmm11,%xmm6 + pxor 16-128(%rax),%xmm3 + paddd %xmm15,%xmm12 + pslld $5,%xmm8 + pxor %xmm14,%xmm6 + + movdqa %xmm13,%xmm9 + paddd %xmm2,%xmm12 + pxor %xmm0,%xmm3 + psrld $27,%xmm9 + pxor %xmm10,%xmm6 + movdqa %xmm14,%xmm7 + + pslld $30,%xmm7 + movdqa %xmm3,%xmm5 + por %xmm9,%xmm8 + psrld $31,%xmm5 + paddd %xmm6,%xmm12 + paddd %xmm3,%xmm3 + + psrld $2,%xmm14 + paddd %xmm8,%xmm12 + por %xmm5,%xmm3 + por %xmm7,%xmm14 + pxor %xmm1,%xmm4 + movdqa 192-128(%rax),%xmm1 + + movdqa %xmm12,%xmm8 + movdqa %xmm10,%xmm6 + pxor 32-128(%rax),%xmm4 + paddd %xmm15,%xmm11 + pslld $5,%xmm8 + pxor %xmm13,%xmm6 + + movdqa %xmm12,%xmm9 + paddd %xmm3,%xmm11 + pxor %xmm1,%xmm4 + psrld $27,%xmm9 + pxor %xmm14,%xmm6 + movdqa %xmm13,%xmm7 + + pslld $30,%xmm7 + movdqa %xmm4,%xmm5 + por %xmm9,%xmm8 + psrld $31,%xmm5 + paddd %xmm6,%xmm11 + paddd %xmm4,%xmm4 + + psrld $2,%xmm13 + paddd %xmm8,%xmm11 + por %xmm5,%xmm4 + por %xmm7,%xmm13 + pxor %xmm2,%xmm0 + movdqa 208-128(%rax),%xmm2 + + movdqa %xmm11,%xmm8 + movdqa %xmm14,%xmm6 + pxor 48-128(%rax),%xmm0 + paddd %xmm15,%xmm10 + pslld $5,%xmm8 + pxor %xmm12,%xmm6 + + movdqa %xmm11,%xmm9 + paddd %xmm4,%xmm10 + pxor %xmm2,%xmm0 + psrld $27,%xmm9 + pxor %xmm13,%xmm6 + movdqa %xmm12,%xmm7 + + pslld $30,%xmm7 + movdqa %xmm0,%xmm5 + por %xmm9,%xmm8 + psrld $31,%xmm5 + paddd %xmm6,%xmm10 + paddd %xmm0,%xmm0 + + psrld $2,%xmm12 + paddd %xmm8,%xmm10 + por %xmm5,%xmm0 + por %xmm7,%xmm12 + pxor %xmm3,%xmm1 + movdqa 224-128(%rax),%xmm3 + + movdqa %xmm10,%xmm8 + movdqa %xmm13,%xmm6 + pxor 64-128(%rax),%xmm1 + paddd %xmm15,%xmm14 + pslld $5,%xmm8 + pxor %xmm11,%xmm6 + + movdqa %xmm10,%xmm9 + paddd %xmm0,%xmm14 + pxor %xmm3,%xmm1 + psrld $27,%xmm9 + pxor %xmm12,%xmm6 + movdqa %xmm11,%xmm7 + + pslld $30,%xmm7 + movdqa %xmm1,%xmm5 + por %xmm9,%xmm8 + psrld $31,%xmm5 + paddd %xmm6,%xmm14 + paddd %xmm1,%xmm1 + + psrld $2,%xmm11 + paddd %xmm8,%xmm14 + por %xmm5,%xmm1 + por %xmm7,%xmm11 + pxor %xmm4,%xmm2 + movdqa 240-128(%rax),%xmm4 + + movdqa %xmm14,%xmm8 + movdqa %xmm12,%xmm6 + pxor 80-128(%rax),%xmm2 + paddd %xmm15,%xmm13 + pslld $5,%xmm8 + pxor %xmm10,%xmm6 + + movdqa %xmm14,%xmm9 + paddd %xmm1,%xmm13 + pxor %xmm4,%xmm2 + psrld $27,%xmm9 + pxor %xmm11,%xmm6 + movdqa %xmm10,%xmm7 + + pslld $30,%xmm7 + movdqa %xmm2,%xmm5 + por %xmm9,%xmm8 + psrld $31,%xmm5 + paddd %xmm6,%xmm13 + paddd %xmm2,%xmm2 + + psrld $2,%xmm10 + paddd %xmm8,%xmm13 + por %xmm5,%xmm2 + por %xmm7,%xmm10 + pxor %xmm0,%xmm3 + movdqa 0-128(%rax),%xmm0 + + movdqa %xmm13,%xmm8 + movdqa %xmm11,%xmm6 + pxor 96-128(%rax),%xmm3 + paddd %xmm15,%xmm12 + pslld $5,%xmm8 + pxor %xmm14,%xmm6 + + movdqa %xmm13,%xmm9 + paddd %xmm2,%xmm12 + pxor %xmm0,%xmm3 + psrld $27,%xmm9 + pxor %xmm10,%xmm6 + movdqa %xmm14,%xmm7 + + pslld $30,%xmm7 + movdqa %xmm3,%xmm5 + por %xmm9,%xmm8 + psrld $31,%xmm5 + paddd %xmm6,%xmm12 + paddd %xmm3,%xmm3 + + psrld $2,%xmm14 + paddd %xmm8,%xmm12 + por %xmm5,%xmm3 + por %xmm7,%xmm14 + pxor %xmm1,%xmm4 + movdqa 16-128(%rax),%xmm1 + + movdqa %xmm12,%xmm8 + movdqa %xmm10,%xmm6 + pxor 112-128(%rax),%xmm4 + paddd %xmm15,%xmm11 + pslld $5,%xmm8 + pxor %xmm13,%xmm6 + + movdqa %xmm12,%xmm9 + paddd %xmm3,%xmm11 + pxor %xmm1,%xmm4 + psrld $27,%xmm9 + pxor %xmm14,%xmm6 + movdqa %xmm13,%xmm7 + + pslld $30,%xmm7 + movdqa %xmm4,%xmm5 + por %xmm9,%xmm8 + psrld $31,%xmm5 + paddd %xmm6,%xmm11 + paddd %xmm4,%xmm4 + + psrld $2,%xmm13 + paddd %xmm8,%xmm11 + por %xmm5,%xmm4 + por %xmm7,%xmm13 + movdqa %xmm11,%xmm8 + paddd %xmm15,%xmm10 + movdqa %xmm14,%xmm6 + pslld $5,%xmm8 + pxor %xmm12,%xmm6 + + movdqa %xmm11,%xmm9 + paddd %xmm4,%xmm10 + psrld $27,%xmm9 + movdqa %xmm12,%xmm7 + pxor %xmm13,%xmm6 + + pslld $30,%xmm7 + por %xmm9,%xmm8 + paddd %xmm6,%xmm10 + + psrld $2,%xmm12 + paddd %xmm8,%xmm10 + por %xmm7,%xmm12 + movdqa (%rbx),%xmm0 + movl $1,%ecx + cmpl 0(%rbx),%ecx + pxor %xmm8,%xmm8 + cmovgeq %rbp,%r8 + cmpl 4(%rbx),%ecx + movdqa %xmm0,%xmm1 + cmovgeq %rbp,%r9 + cmpl 8(%rbx),%ecx + pcmpgtd %xmm8,%xmm1 + cmovgeq %rbp,%r10 + cmpl 12(%rbx),%ecx + paddd %xmm1,%xmm0 + cmovgeq %rbp,%r11 + + movdqu 0(%rdi),%xmm6 + pand %xmm1,%xmm10 + movdqu 32(%rdi),%xmm7 + pand %xmm1,%xmm11 + paddd %xmm6,%xmm10 + movdqu 64(%rdi),%xmm8 + pand %xmm1,%xmm12 + paddd %xmm7,%xmm11 + movdqu 96(%rdi),%xmm9 + pand %xmm1,%xmm13 + paddd %xmm8,%xmm12 + movdqu 128(%rdi),%xmm5 + pand %xmm1,%xmm14 + movdqu %xmm10,0(%rdi) + paddd %xmm9,%xmm13 + movdqu %xmm11,32(%rdi) + paddd %xmm5,%xmm14 + movdqu %xmm12,64(%rdi) + movdqu %xmm13,96(%rdi) + movdqu %xmm14,128(%rdi) + + movdqa %xmm0,(%rbx) + movdqa 96(%rbp),%xmm5 + movdqa -32(%rbp),%xmm15 + decl %edx + jnz L$oop + + movl 280(%rsp),%edx + leaq 16(%rdi),%rdi + leaq 64(%rsi),%rsi + decl %edx + jnz L$oop_grande + +L$done: + movq 272(%rsp),%rax + movq -16(%rax),%rbp + movq -8(%rax),%rbx + leaq (%rax),%rsp +L$epilogue: + .byte 0xf3,0xc3 + + +.p2align 5 +sha1_multi_block_shaext: +_shaext_shortcut: + movq %rsp,%rax + pushq %rbx + pushq %rbp + subq $288,%rsp + shll $1,%edx + andq $-256,%rsp + leaq 64(%rdi),%rdi + movq %rax,272(%rsp) +L$body_shaext: + leaq 256(%rsp),%rbx + movdqa K_XX_XX+128(%rip),%xmm3 + +L$oop_grande_shaext: + movl %edx,280(%rsp) + xorl %edx,%edx + movq 0(%rsi),%r8 + movl 8(%rsi),%ecx + cmpl %edx,%ecx + cmovgl %ecx,%edx + testl %ecx,%ecx + movl %ecx,0(%rbx) + cmovleq %rsp,%r8 + movq 16(%rsi),%r9 + movl 24(%rsi),%ecx + cmpl %edx,%ecx + cmovgl %ecx,%edx + testl %ecx,%ecx + movl %ecx,4(%rbx) + cmovleq %rsp,%r9 + testl %edx,%edx + jz L$done_shaext + + movq 0-64(%rdi),%xmm0 + movq 32-64(%rdi),%xmm4 + movq 64-64(%rdi),%xmm5 + movq 96-64(%rdi),%xmm6 + movq 128-64(%rdi),%xmm7 + + punpckldq %xmm4,%xmm0 + punpckldq %xmm6,%xmm5 + + movdqa %xmm0,%xmm8 + punpcklqdq %xmm5,%xmm0 + punpckhqdq %xmm5,%xmm8 + + pshufd $63,%xmm7,%xmm1 + pshufd $127,%xmm7,%xmm9 + pshufd $27,%xmm0,%xmm0 + pshufd $27,%xmm8,%xmm8 + jmp L$oop_shaext + +.p2align 5 +L$oop_shaext: + movdqu 0(%r8),%xmm4 + movdqu 0(%r9),%xmm11 + movdqu 16(%r8),%xmm5 + movdqu 16(%r9),%xmm12 + movdqu 32(%r8),%xmm6 +.byte 102,15,56,0,227 + movdqu 32(%r9),%xmm13 +.byte 102,68,15,56,0,219 + movdqu 48(%r8),%xmm7 + leaq 64(%r8),%r8 +.byte 102,15,56,0,235 + movdqu 48(%r9),%xmm14 + leaq 64(%r9),%r9 +.byte 102,68,15,56,0,227 + + movdqa %xmm1,80(%rsp) + paddd %xmm4,%xmm1 + movdqa %xmm9,112(%rsp) + paddd %xmm11,%xmm9 + movdqa %xmm0,64(%rsp) + movdqa %xmm0,%xmm2 + movdqa %xmm8,96(%rsp) + movdqa %xmm8,%xmm10 +.byte 15,58,204,193,0 +.byte 15,56,200,213 +.byte 69,15,58,204,193,0 +.byte 69,15,56,200,212 +.byte 102,15,56,0,243 + prefetcht0 127(%r8) +.byte 15,56,201,229 +.byte 102,68,15,56,0,235 + prefetcht0 127(%r9) +.byte 69,15,56,201,220 + +.byte 102,15,56,0,251 + movdqa %xmm0,%xmm1 +.byte 102,68,15,56,0,243 + movdqa %xmm8,%xmm9 +.byte 15,58,204,194,0 +.byte 15,56,200,206 +.byte 69,15,58,204,194,0 +.byte 69,15,56,200,205 + pxor %xmm6,%xmm4 +.byte 15,56,201,238 + pxor %xmm13,%xmm11 +.byte 69,15,56,201,229 + movdqa %xmm0,%xmm2 + movdqa %xmm8,%xmm10 +.byte 15,58,204,193,0 +.byte 15,56,200,215 +.byte 69,15,58,204,193,0 +.byte 69,15,56,200,214 +.byte 15,56,202,231 +.byte 69,15,56,202,222 + pxor %xmm7,%xmm5 +.byte 15,56,201,247 + pxor %xmm14,%xmm12 +.byte 69,15,56,201,238 + movdqa %xmm0,%xmm1 + movdqa %xmm8,%xmm9 +.byte 15,58,204,194,0 +.byte 15,56,200,204 +.byte 69,15,58,204,194,0 +.byte 69,15,56,200,203 +.byte 15,56,202,236 +.byte 69,15,56,202,227 + pxor %xmm4,%xmm6 +.byte 15,56,201,252 + pxor %xmm11,%xmm13 +.byte 69,15,56,201,243 + movdqa %xmm0,%xmm2 + movdqa %xmm8,%xmm10 +.byte 15,58,204,193,0 +.byte 15,56,200,213 +.byte 69,15,58,204,193,0 +.byte 69,15,56,200,212 +.byte 15,56,202,245 +.byte 69,15,56,202,236 + pxor %xmm5,%xmm7 +.byte 15,56,201,229 + pxor %xmm12,%xmm14 +.byte 69,15,56,201,220 + movdqa %xmm0,%xmm1 + movdqa %xmm8,%xmm9 +.byte 15,58,204,194,1 +.byte 15,56,200,206 +.byte 69,15,58,204,194,1 +.byte 69,15,56,200,205 +.byte 15,56,202,254 +.byte 69,15,56,202,245 + pxor %xmm6,%xmm4 +.byte 15,56,201,238 + pxor %xmm13,%xmm11 +.byte 69,15,56,201,229 + movdqa %xmm0,%xmm2 + movdqa %xmm8,%xmm10 +.byte 15,58,204,193,1 +.byte 15,56,200,215 +.byte 69,15,58,204,193,1 +.byte 69,15,56,200,214 +.byte 15,56,202,231 +.byte 69,15,56,202,222 + pxor %xmm7,%xmm5 +.byte 15,56,201,247 + pxor %xmm14,%xmm12 +.byte 69,15,56,201,238 + movdqa %xmm0,%xmm1 + movdqa %xmm8,%xmm9 +.byte 15,58,204,194,1 +.byte 15,56,200,204 +.byte 69,15,58,204,194,1 +.byte 69,15,56,200,203 +.byte 15,56,202,236 +.byte 69,15,56,202,227 + pxor %xmm4,%xmm6 +.byte 15,56,201,252 + pxor %xmm11,%xmm13 +.byte 69,15,56,201,243 + movdqa %xmm0,%xmm2 + movdqa %xmm8,%xmm10 +.byte 15,58,204,193,1 +.byte 15,56,200,213 +.byte 69,15,58,204,193,1 +.byte 69,15,56,200,212 +.byte 15,56,202,245 +.byte 69,15,56,202,236 + pxor %xmm5,%xmm7 +.byte 15,56,201,229 + pxor %xmm12,%xmm14 +.byte 69,15,56,201,220 + movdqa %xmm0,%xmm1 + movdqa %xmm8,%xmm9 +.byte 15,58,204,194,1 +.byte 15,56,200,206 +.byte 69,15,58,204,194,1 +.byte 69,15,56,200,205 +.byte 15,56,202,254 +.byte 69,15,56,202,245 + pxor %xmm6,%xmm4 +.byte 15,56,201,238 + pxor %xmm13,%xmm11 +.byte 69,15,56,201,229 + movdqa %xmm0,%xmm2 + movdqa %xmm8,%xmm10 +.byte 15,58,204,193,2 +.byte 15,56,200,215 +.byte 69,15,58,204,193,2 +.byte 69,15,56,200,214 +.byte 15,56,202,231 +.byte 69,15,56,202,222 + pxor %xmm7,%xmm5 +.byte 15,56,201,247 + pxor %xmm14,%xmm12 +.byte 69,15,56,201,238 + movdqa %xmm0,%xmm1 + movdqa %xmm8,%xmm9 +.byte 15,58,204,194,2 +.byte 15,56,200,204 +.byte 69,15,58,204,194,2 +.byte 69,15,56,200,203 +.byte 15,56,202,236 +.byte 69,15,56,202,227 + pxor %xmm4,%xmm6 +.byte 15,56,201,252 + pxor %xmm11,%xmm13 +.byte 69,15,56,201,243 + movdqa %xmm0,%xmm2 + movdqa %xmm8,%xmm10 +.byte 15,58,204,193,2 +.byte 15,56,200,213 +.byte 69,15,58,204,193,2 +.byte 69,15,56,200,212 +.byte 15,56,202,245 +.byte 69,15,56,202,236 + pxor %xmm5,%xmm7 +.byte 15,56,201,229 + pxor %xmm12,%xmm14 +.byte 69,15,56,201,220 + movdqa %xmm0,%xmm1 + movdqa %xmm8,%xmm9 +.byte 15,58,204,194,2 +.byte 15,56,200,206 +.byte 69,15,58,204,194,2 +.byte 69,15,56,200,205 +.byte 15,56,202,254 +.byte 69,15,56,202,245 + pxor %xmm6,%xmm4 +.byte 15,56,201,238 + pxor %xmm13,%xmm11 +.byte 69,15,56,201,229 + movdqa %xmm0,%xmm2 + movdqa %xmm8,%xmm10 +.byte 15,58,204,193,2 +.byte 15,56,200,215 +.byte 69,15,58,204,193,2 +.byte 69,15,56,200,214 +.byte 15,56,202,231 +.byte 69,15,56,202,222 + pxor %xmm7,%xmm5 +.byte 15,56,201,247 + pxor %xmm14,%xmm12 +.byte 69,15,56,201,238 + movdqa %xmm0,%xmm1 + movdqa %xmm8,%xmm9 +.byte 15,58,204,194,3 +.byte 15,56,200,204 +.byte 69,15,58,204,194,3 +.byte 69,15,56,200,203 +.byte 15,56,202,236 +.byte 69,15,56,202,227 + pxor %xmm4,%xmm6 +.byte 15,56,201,252 + pxor %xmm11,%xmm13 +.byte 69,15,56,201,243 + movdqa %xmm0,%xmm2 + movdqa %xmm8,%xmm10 +.byte 15,58,204,193,3 +.byte 15,56,200,213 +.byte 69,15,58,204,193,3 +.byte 69,15,56,200,212 +.byte 15,56,202,245 +.byte 69,15,56,202,236 + pxor %xmm5,%xmm7 + pxor %xmm12,%xmm14 + + movl $1,%ecx + pxor %xmm4,%xmm4 + cmpl 0(%rbx),%ecx + cmovgeq %rsp,%r8 + + movdqa %xmm0,%xmm1 + movdqa %xmm8,%xmm9 +.byte 15,58,204,194,3 +.byte 15,56,200,206 +.byte 69,15,58,204,194,3 +.byte 69,15,56,200,205 +.byte 15,56,202,254 +.byte 69,15,56,202,245 + + cmpl 4(%rbx),%ecx + cmovgeq %rsp,%r9 + movq (%rbx),%xmm6 + + movdqa %xmm0,%xmm2 + movdqa %xmm8,%xmm10 +.byte 15,58,204,193,3 +.byte 15,56,200,215 +.byte 69,15,58,204,193,3 +.byte 69,15,56,200,214 + + pshufd $0,%xmm6,%xmm11 + pshufd $85,%xmm6,%xmm12 + movdqa %xmm6,%xmm7 + pcmpgtd %xmm4,%xmm11 + pcmpgtd %xmm4,%xmm12 + + movdqa %xmm0,%xmm1 + movdqa %xmm8,%xmm9 +.byte 15,58,204,194,3 +.byte 15,56,200,204 +.byte 69,15,58,204,194,3 +.byte 68,15,56,200,204 + + pcmpgtd %xmm4,%xmm7 + pand %xmm11,%xmm0 + pand %xmm11,%xmm1 + pand %xmm12,%xmm8 + pand %xmm12,%xmm9 + paddd %xmm7,%xmm6 + + paddd 64(%rsp),%xmm0 + paddd 80(%rsp),%xmm1 + paddd 96(%rsp),%xmm8 + paddd 112(%rsp),%xmm9 + + movq %xmm6,(%rbx) + decl %edx + jnz L$oop_shaext + + movl 280(%rsp),%edx + + pshufd $27,%xmm0,%xmm0 + pshufd $27,%xmm8,%xmm8 + + movdqa %xmm0,%xmm6 + punpckldq %xmm8,%xmm0 + punpckhdq %xmm8,%xmm6 + punpckhdq %xmm9,%xmm1 + movq %xmm0,0-64(%rdi) + psrldq $8,%xmm0 + movq %xmm6,64-64(%rdi) + psrldq $8,%xmm6 + movq %xmm0,32-64(%rdi) + psrldq $8,%xmm1 + movq %xmm6,96-64(%rdi) + movq %xmm1,128-64(%rdi) + + leaq 8(%rdi),%rdi + leaq 32(%rsi),%rsi + decl %edx + jnz L$oop_grande_shaext + +L$done_shaext: + + movq -16(%rax),%rbp + movq -8(%rax),%rbx + leaq (%rax),%rsp +L$epilogue_shaext: + .byte 0xf3,0xc3 + + +.p2align 5 +sha1_multi_block_avx: +_avx_shortcut: + shrq $32,%rcx + cmpl $2,%edx + jb L$avx + testl $32,%ecx + jnz _avx2_shortcut + jmp L$avx +.p2align 5 +L$avx: + movq %rsp,%rax + pushq %rbx + pushq %rbp + subq $288,%rsp + andq $-256,%rsp + movq %rax,272(%rsp) +L$body_avx: + leaq K_XX_XX(%rip),%rbp + leaq 256(%rsp),%rbx + + vzeroupper +L$oop_grande_avx: + movl %edx,280(%rsp) + xorl %edx,%edx + movq 0(%rsi),%r8 + movl 8(%rsi),%ecx + cmpl %edx,%ecx + cmovgl %ecx,%edx + testl %ecx,%ecx + movl %ecx,0(%rbx) + cmovleq %rbp,%r8 + movq 16(%rsi),%r9 + movl 24(%rsi),%ecx + cmpl %edx,%ecx + cmovgl %ecx,%edx + testl %ecx,%ecx + movl %ecx,4(%rbx) + cmovleq %rbp,%r9 + movq 32(%rsi),%r10 + movl 40(%rsi),%ecx + cmpl %edx,%ecx + cmovgl %ecx,%edx + testl %ecx,%ecx + movl %ecx,8(%rbx) + cmovleq %rbp,%r10 + movq 48(%rsi),%r11 + movl 56(%rsi),%ecx + cmpl %edx,%ecx + cmovgl %ecx,%edx + testl %ecx,%ecx + movl %ecx,12(%rbx) + cmovleq %rbp,%r11 + testl %edx,%edx + jz L$done_avx + + vmovdqu 0(%rdi),%xmm10 + leaq 128(%rsp),%rax + vmovdqu 32(%rdi),%xmm11 + vmovdqu 64(%rdi),%xmm12 + vmovdqu 96(%rdi),%xmm13 + vmovdqu 128(%rdi),%xmm14 + vmovdqu 96(%rbp),%xmm5 + jmp L$oop_avx + +.p2align 5 +L$oop_avx: + vmovdqa -32(%rbp),%xmm15 + vmovd (%r8),%xmm0 + leaq 64(%r8),%r8 + vmovd (%r9),%xmm2 + leaq 64(%r9),%r9 + vpinsrd $1,(%r10),%xmm0,%xmm0 + leaq 64(%r10),%r10 + vpinsrd $1,(%r11),%xmm2,%xmm2 + leaq 64(%r11),%r11 + vmovd -60(%r8),%xmm1 + vpunpckldq %xmm2,%xmm0,%xmm0 + vmovd -60(%r9),%xmm9 + vpshufb %xmm5,%xmm0,%xmm0 + vpinsrd $1,-60(%r10),%xmm1,%xmm1 + vpinsrd $1,-60(%r11),%xmm9,%xmm9 + vpaddd %xmm15,%xmm14,%xmm14 + vpslld $5,%xmm10,%xmm8 + vpandn %xmm13,%xmm11,%xmm7 + vpand %xmm12,%xmm11,%xmm6 + + vmovdqa %xmm0,0-128(%rax) + vpaddd %xmm0,%xmm14,%xmm14 + vpunpckldq %xmm9,%xmm1,%xmm1 + vpsrld $27,%xmm10,%xmm9 + vpxor %xmm7,%xmm6,%xmm6 + vmovd -56(%r8),%xmm2 + + vpslld $30,%xmm11,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vmovd -56(%r9),%xmm9 + vpaddd %xmm6,%xmm14,%xmm14 + + vpsrld $2,%xmm11,%xmm11 + vpaddd %xmm8,%xmm14,%xmm14 + vpshufb %xmm5,%xmm1,%xmm1 + vpor %xmm7,%xmm11,%xmm11 + vpinsrd $1,-56(%r10),%xmm2,%xmm2 + vpinsrd $1,-56(%r11),%xmm9,%xmm9 + vpaddd %xmm15,%xmm13,%xmm13 + vpslld $5,%xmm14,%xmm8 + vpandn %xmm12,%xmm10,%xmm7 + vpand %xmm11,%xmm10,%xmm6 + + vmovdqa %xmm1,16-128(%rax) + vpaddd %xmm1,%xmm13,%xmm13 + vpunpckldq %xmm9,%xmm2,%xmm2 + vpsrld $27,%xmm14,%xmm9 + vpxor %xmm7,%xmm6,%xmm6 + vmovd -52(%r8),%xmm3 + + vpslld $30,%xmm10,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vmovd -52(%r9),%xmm9 + vpaddd %xmm6,%xmm13,%xmm13 + + vpsrld $2,%xmm10,%xmm10 + vpaddd %xmm8,%xmm13,%xmm13 + vpshufb %xmm5,%xmm2,%xmm2 + vpor %xmm7,%xmm10,%xmm10 + vpinsrd $1,-52(%r10),%xmm3,%xmm3 + vpinsrd $1,-52(%r11),%xmm9,%xmm9 + vpaddd %xmm15,%xmm12,%xmm12 + vpslld $5,%xmm13,%xmm8 + vpandn %xmm11,%xmm14,%xmm7 + vpand %xmm10,%xmm14,%xmm6 + + vmovdqa %xmm2,32-128(%rax) + vpaddd %xmm2,%xmm12,%xmm12 + vpunpckldq %xmm9,%xmm3,%xmm3 + vpsrld $27,%xmm13,%xmm9 + vpxor %xmm7,%xmm6,%xmm6 + vmovd -48(%r8),%xmm4 + + vpslld $30,%xmm14,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vmovd -48(%r9),%xmm9 + vpaddd %xmm6,%xmm12,%xmm12 + + vpsrld $2,%xmm14,%xmm14 + vpaddd %xmm8,%xmm12,%xmm12 + vpshufb %xmm5,%xmm3,%xmm3 + vpor %xmm7,%xmm14,%xmm14 + vpinsrd $1,-48(%r10),%xmm4,%xmm4 + vpinsrd $1,-48(%r11),%xmm9,%xmm9 + vpaddd %xmm15,%xmm11,%xmm11 + vpslld $5,%xmm12,%xmm8 + vpandn %xmm10,%xmm13,%xmm7 + vpand %xmm14,%xmm13,%xmm6 + + vmovdqa %xmm3,48-128(%rax) + vpaddd %xmm3,%xmm11,%xmm11 + vpunpckldq %xmm9,%xmm4,%xmm4 + vpsrld $27,%xmm12,%xmm9 + vpxor %xmm7,%xmm6,%xmm6 + vmovd -44(%r8),%xmm0 + + vpslld $30,%xmm13,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vmovd -44(%r9),%xmm9 + vpaddd %xmm6,%xmm11,%xmm11 + + vpsrld $2,%xmm13,%xmm13 + vpaddd %xmm8,%xmm11,%xmm11 + vpshufb %xmm5,%xmm4,%xmm4 + vpor %xmm7,%xmm13,%xmm13 + vpinsrd $1,-44(%r10),%xmm0,%xmm0 + vpinsrd $1,-44(%r11),%xmm9,%xmm9 + vpaddd %xmm15,%xmm10,%xmm10 + vpslld $5,%xmm11,%xmm8 + vpandn %xmm14,%xmm12,%xmm7 + vpand %xmm13,%xmm12,%xmm6 + + vmovdqa %xmm4,64-128(%rax) + vpaddd %xmm4,%xmm10,%xmm10 + vpunpckldq %xmm9,%xmm0,%xmm0 + vpsrld $27,%xmm11,%xmm9 + vpxor %xmm7,%xmm6,%xmm6 + vmovd -40(%r8),%xmm1 + + vpslld $30,%xmm12,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vmovd -40(%r9),%xmm9 + vpaddd %xmm6,%xmm10,%xmm10 + + vpsrld $2,%xmm12,%xmm12 + vpaddd %xmm8,%xmm10,%xmm10 + vpshufb %xmm5,%xmm0,%xmm0 + vpor %xmm7,%xmm12,%xmm12 + vpinsrd $1,-40(%r10),%xmm1,%xmm1 + vpinsrd $1,-40(%r11),%xmm9,%xmm9 + vpaddd %xmm15,%xmm14,%xmm14 + vpslld $5,%xmm10,%xmm8 + vpandn %xmm13,%xmm11,%xmm7 + vpand %xmm12,%xmm11,%xmm6 + + vmovdqa %xmm0,80-128(%rax) + vpaddd %xmm0,%xmm14,%xmm14 + vpunpckldq %xmm9,%xmm1,%xmm1 + vpsrld $27,%xmm10,%xmm9 + vpxor %xmm7,%xmm6,%xmm6 + vmovd -36(%r8),%xmm2 + + vpslld $30,%xmm11,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vmovd -36(%r9),%xmm9 + vpaddd %xmm6,%xmm14,%xmm14 + + vpsrld $2,%xmm11,%xmm11 + vpaddd %xmm8,%xmm14,%xmm14 + vpshufb %xmm5,%xmm1,%xmm1 + vpor %xmm7,%xmm11,%xmm11 + vpinsrd $1,-36(%r10),%xmm2,%xmm2 + vpinsrd $1,-36(%r11),%xmm9,%xmm9 + vpaddd %xmm15,%xmm13,%xmm13 + vpslld $5,%xmm14,%xmm8 + vpandn %xmm12,%xmm10,%xmm7 + vpand %xmm11,%xmm10,%xmm6 + + vmovdqa %xmm1,96-128(%rax) + vpaddd %xmm1,%xmm13,%xmm13 + vpunpckldq %xmm9,%xmm2,%xmm2 + vpsrld $27,%xmm14,%xmm9 + vpxor %xmm7,%xmm6,%xmm6 + vmovd -32(%r8),%xmm3 + + vpslld $30,%xmm10,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vmovd -32(%r9),%xmm9 + vpaddd %xmm6,%xmm13,%xmm13 + + vpsrld $2,%xmm10,%xmm10 + vpaddd %xmm8,%xmm13,%xmm13 + vpshufb %xmm5,%xmm2,%xmm2 + vpor %xmm7,%xmm10,%xmm10 + vpinsrd $1,-32(%r10),%xmm3,%xmm3 + vpinsrd $1,-32(%r11),%xmm9,%xmm9 + vpaddd %xmm15,%xmm12,%xmm12 + vpslld $5,%xmm13,%xmm8 + vpandn %xmm11,%xmm14,%xmm7 + vpand %xmm10,%xmm14,%xmm6 + + vmovdqa %xmm2,112-128(%rax) + vpaddd %xmm2,%xmm12,%xmm12 + vpunpckldq %xmm9,%xmm3,%xmm3 + vpsrld $27,%xmm13,%xmm9 + vpxor %xmm7,%xmm6,%xmm6 + vmovd -28(%r8),%xmm4 + + vpslld $30,%xmm14,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vmovd -28(%r9),%xmm9 + vpaddd %xmm6,%xmm12,%xmm12 + + vpsrld $2,%xmm14,%xmm14 + vpaddd %xmm8,%xmm12,%xmm12 + vpshufb %xmm5,%xmm3,%xmm3 + vpor %xmm7,%xmm14,%xmm14 + vpinsrd $1,-28(%r10),%xmm4,%xmm4 + vpinsrd $1,-28(%r11),%xmm9,%xmm9 + vpaddd %xmm15,%xmm11,%xmm11 + vpslld $5,%xmm12,%xmm8 + vpandn %xmm10,%xmm13,%xmm7 + vpand %xmm14,%xmm13,%xmm6 + + vmovdqa %xmm3,128-128(%rax) + vpaddd %xmm3,%xmm11,%xmm11 + vpunpckldq %xmm9,%xmm4,%xmm4 + vpsrld $27,%xmm12,%xmm9 + vpxor %xmm7,%xmm6,%xmm6 + vmovd -24(%r8),%xmm0 + + vpslld $30,%xmm13,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vmovd -24(%r9),%xmm9 + vpaddd %xmm6,%xmm11,%xmm11 + + vpsrld $2,%xmm13,%xmm13 + vpaddd %xmm8,%xmm11,%xmm11 + vpshufb %xmm5,%xmm4,%xmm4 + vpor %xmm7,%xmm13,%xmm13 + vpinsrd $1,-24(%r10),%xmm0,%xmm0 + vpinsrd $1,-24(%r11),%xmm9,%xmm9 + vpaddd %xmm15,%xmm10,%xmm10 + vpslld $5,%xmm11,%xmm8 + vpandn %xmm14,%xmm12,%xmm7 + vpand %xmm13,%xmm12,%xmm6 + + vmovdqa %xmm4,144-128(%rax) + vpaddd %xmm4,%xmm10,%xmm10 + vpunpckldq %xmm9,%xmm0,%xmm0 + vpsrld $27,%xmm11,%xmm9 + vpxor %xmm7,%xmm6,%xmm6 + vmovd -20(%r8),%xmm1 + + vpslld $30,%xmm12,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vmovd -20(%r9),%xmm9 + vpaddd %xmm6,%xmm10,%xmm10 + + vpsrld $2,%xmm12,%xmm12 + vpaddd %xmm8,%xmm10,%xmm10 + vpshufb %xmm5,%xmm0,%xmm0 + vpor %xmm7,%xmm12,%xmm12 + vpinsrd $1,-20(%r10),%xmm1,%xmm1 + vpinsrd $1,-20(%r11),%xmm9,%xmm9 + vpaddd %xmm15,%xmm14,%xmm14 + vpslld $5,%xmm10,%xmm8 + vpandn %xmm13,%xmm11,%xmm7 + vpand %xmm12,%xmm11,%xmm6 + + vmovdqa %xmm0,160-128(%rax) + vpaddd %xmm0,%xmm14,%xmm14 + vpunpckldq %xmm9,%xmm1,%xmm1 + vpsrld $27,%xmm10,%xmm9 + vpxor %xmm7,%xmm6,%xmm6 + vmovd -16(%r8),%xmm2 + + vpslld $30,%xmm11,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vmovd -16(%r9),%xmm9 + vpaddd %xmm6,%xmm14,%xmm14 + + vpsrld $2,%xmm11,%xmm11 + vpaddd %xmm8,%xmm14,%xmm14 + vpshufb %xmm5,%xmm1,%xmm1 + vpor %xmm7,%xmm11,%xmm11 + vpinsrd $1,-16(%r10),%xmm2,%xmm2 + vpinsrd $1,-16(%r11),%xmm9,%xmm9 + vpaddd %xmm15,%xmm13,%xmm13 + vpslld $5,%xmm14,%xmm8 + vpandn %xmm12,%xmm10,%xmm7 + vpand %xmm11,%xmm10,%xmm6 + + vmovdqa %xmm1,176-128(%rax) + vpaddd %xmm1,%xmm13,%xmm13 + vpunpckldq %xmm9,%xmm2,%xmm2 + vpsrld $27,%xmm14,%xmm9 + vpxor %xmm7,%xmm6,%xmm6 + vmovd -12(%r8),%xmm3 + + vpslld $30,%xmm10,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vmovd -12(%r9),%xmm9 + vpaddd %xmm6,%xmm13,%xmm13 + + vpsrld $2,%xmm10,%xmm10 + vpaddd %xmm8,%xmm13,%xmm13 + vpshufb %xmm5,%xmm2,%xmm2 + vpor %xmm7,%xmm10,%xmm10 + vpinsrd $1,-12(%r10),%xmm3,%xmm3 + vpinsrd $1,-12(%r11),%xmm9,%xmm9 + vpaddd %xmm15,%xmm12,%xmm12 + vpslld $5,%xmm13,%xmm8 + vpandn %xmm11,%xmm14,%xmm7 + vpand %xmm10,%xmm14,%xmm6 + + vmovdqa %xmm2,192-128(%rax) + vpaddd %xmm2,%xmm12,%xmm12 + vpunpckldq %xmm9,%xmm3,%xmm3 + vpsrld $27,%xmm13,%xmm9 + vpxor %xmm7,%xmm6,%xmm6 + vmovd -8(%r8),%xmm4 + + vpslld $30,%xmm14,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vmovd -8(%r9),%xmm9 + vpaddd %xmm6,%xmm12,%xmm12 + + vpsrld $2,%xmm14,%xmm14 + vpaddd %xmm8,%xmm12,%xmm12 + vpshufb %xmm5,%xmm3,%xmm3 + vpor %xmm7,%xmm14,%xmm14 + vpinsrd $1,-8(%r10),%xmm4,%xmm4 + vpinsrd $1,-8(%r11),%xmm9,%xmm9 + vpaddd %xmm15,%xmm11,%xmm11 + vpslld $5,%xmm12,%xmm8 + vpandn %xmm10,%xmm13,%xmm7 + vpand %xmm14,%xmm13,%xmm6 + + vmovdqa %xmm3,208-128(%rax) + vpaddd %xmm3,%xmm11,%xmm11 + vpunpckldq %xmm9,%xmm4,%xmm4 + vpsrld $27,%xmm12,%xmm9 + vpxor %xmm7,%xmm6,%xmm6 + vmovd -4(%r8),%xmm0 + + vpslld $30,%xmm13,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vmovd -4(%r9),%xmm9 + vpaddd %xmm6,%xmm11,%xmm11 + + vpsrld $2,%xmm13,%xmm13 + vpaddd %xmm8,%xmm11,%xmm11 + vpshufb %xmm5,%xmm4,%xmm4 + vpor %xmm7,%xmm13,%xmm13 + vmovdqa 0-128(%rax),%xmm1 + vpinsrd $1,-4(%r10),%xmm0,%xmm0 + vpinsrd $1,-4(%r11),%xmm9,%xmm9 + vpaddd %xmm15,%xmm10,%xmm10 + prefetcht0 63(%r8) + vpslld $5,%xmm11,%xmm8 + vpandn %xmm14,%xmm12,%xmm7 + vpand %xmm13,%xmm12,%xmm6 + + vmovdqa %xmm4,224-128(%rax) + vpaddd %xmm4,%xmm10,%xmm10 + vpunpckldq %xmm9,%xmm0,%xmm0 + vpsrld $27,%xmm11,%xmm9 + prefetcht0 63(%r9) + vpxor %xmm7,%xmm6,%xmm6 + + vpslld $30,%xmm12,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + prefetcht0 63(%r10) + vpaddd %xmm6,%xmm10,%xmm10 + + vpsrld $2,%xmm12,%xmm12 + vpaddd %xmm8,%xmm10,%xmm10 + prefetcht0 63(%r11) + vpshufb %xmm5,%xmm0,%xmm0 + vpor %xmm7,%xmm12,%xmm12 + vmovdqa 16-128(%rax),%xmm2 + vpxor %xmm3,%xmm1,%xmm1 + vmovdqa 32-128(%rax),%xmm3 + + vpaddd %xmm15,%xmm14,%xmm14 + vpslld $5,%xmm10,%xmm8 + vpandn %xmm13,%xmm11,%xmm7 + + vpand %xmm12,%xmm11,%xmm6 + + vmovdqa %xmm0,240-128(%rax) + vpaddd %xmm0,%xmm14,%xmm14 + vpxor 128-128(%rax),%xmm1,%xmm1 + vpsrld $27,%xmm10,%xmm9 + vpxor %xmm7,%xmm6,%xmm6 + vpxor %xmm3,%xmm1,%xmm1 + + + vpslld $30,%xmm11,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vpaddd %xmm6,%xmm14,%xmm14 + + vpsrld $31,%xmm1,%xmm5 + vpaddd %xmm1,%xmm1,%xmm1 + + vpsrld $2,%xmm11,%xmm11 + + vpaddd %xmm8,%xmm14,%xmm14 + vpor %xmm5,%xmm1,%xmm1 + vpor %xmm7,%xmm11,%xmm11 + vpxor %xmm4,%xmm2,%xmm2 + vmovdqa 48-128(%rax),%xmm4 + + vpaddd %xmm15,%xmm13,%xmm13 + vpslld $5,%xmm14,%xmm8 + vpandn %xmm12,%xmm10,%xmm7 + + vpand %xmm11,%xmm10,%xmm6 + + vmovdqa %xmm1,0-128(%rax) + vpaddd %xmm1,%xmm13,%xmm13 + vpxor 144-128(%rax),%xmm2,%xmm2 + vpsrld $27,%xmm14,%xmm9 + vpxor %xmm7,%xmm6,%xmm6 + vpxor %xmm4,%xmm2,%xmm2 + + + vpslld $30,%xmm10,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vpaddd %xmm6,%xmm13,%xmm13 + + vpsrld $31,%xmm2,%xmm5 + vpaddd %xmm2,%xmm2,%xmm2 + + vpsrld $2,%xmm10,%xmm10 + + vpaddd %xmm8,%xmm13,%xmm13 + vpor %xmm5,%xmm2,%xmm2 + vpor %xmm7,%xmm10,%xmm10 + vpxor %xmm0,%xmm3,%xmm3 + vmovdqa 64-128(%rax),%xmm0 + + vpaddd %xmm15,%xmm12,%xmm12 + vpslld $5,%xmm13,%xmm8 + vpandn %xmm11,%xmm14,%xmm7 + + vpand %xmm10,%xmm14,%xmm6 + + vmovdqa %xmm2,16-128(%rax) + vpaddd %xmm2,%xmm12,%xmm12 + vpxor 160-128(%rax),%xmm3,%xmm3 + vpsrld $27,%xmm13,%xmm9 + vpxor %xmm7,%xmm6,%xmm6 + vpxor %xmm0,%xmm3,%xmm3 + + + vpslld $30,%xmm14,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vpaddd %xmm6,%xmm12,%xmm12 + + vpsrld $31,%xmm3,%xmm5 + vpaddd %xmm3,%xmm3,%xmm3 + + vpsrld $2,%xmm14,%xmm14 + + vpaddd %xmm8,%xmm12,%xmm12 + vpor %xmm5,%xmm3,%xmm3 + vpor %xmm7,%xmm14,%xmm14 + vpxor %xmm1,%xmm4,%xmm4 + vmovdqa 80-128(%rax),%xmm1 + + vpaddd %xmm15,%xmm11,%xmm11 + vpslld $5,%xmm12,%xmm8 + vpandn %xmm10,%xmm13,%xmm7 + + vpand %xmm14,%xmm13,%xmm6 + + vmovdqa %xmm3,32-128(%rax) + vpaddd %xmm3,%xmm11,%xmm11 + vpxor 176-128(%rax),%xmm4,%xmm4 + vpsrld $27,%xmm12,%xmm9 + vpxor %xmm7,%xmm6,%xmm6 + vpxor %xmm1,%xmm4,%xmm4 + + + vpslld $30,%xmm13,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vpaddd %xmm6,%xmm11,%xmm11 + + vpsrld $31,%xmm4,%xmm5 + vpaddd %xmm4,%xmm4,%xmm4 + + vpsrld $2,%xmm13,%xmm13 + + vpaddd %xmm8,%xmm11,%xmm11 + vpor %xmm5,%xmm4,%xmm4 + vpor %xmm7,%xmm13,%xmm13 + vpxor %xmm2,%xmm0,%xmm0 + vmovdqa 96-128(%rax),%xmm2 + + vpaddd %xmm15,%xmm10,%xmm10 + vpslld $5,%xmm11,%xmm8 + vpandn %xmm14,%xmm12,%xmm7 + + vpand %xmm13,%xmm12,%xmm6 + + vmovdqa %xmm4,48-128(%rax) + vpaddd %xmm4,%xmm10,%xmm10 + vpxor 192-128(%rax),%xmm0,%xmm0 + vpsrld $27,%xmm11,%xmm9 + vpxor %xmm7,%xmm6,%xmm6 + vpxor %xmm2,%xmm0,%xmm0 + + + vpslld $30,%xmm12,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vpaddd %xmm6,%xmm10,%xmm10 + + vpsrld $31,%xmm0,%xmm5 + vpaddd %xmm0,%xmm0,%xmm0 + + vpsrld $2,%xmm12,%xmm12 + + vpaddd %xmm8,%xmm10,%xmm10 + vpor %xmm5,%xmm0,%xmm0 + vpor %xmm7,%xmm12,%xmm12 + vmovdqa 0(%rbp),%xmm15 + vpxor %xmm3,%xmm1,%xmm1 + vmovdqa 112-128(%rax),%xmm3 + + vpslld $5,%xmm10,%xmm8 + vpaddd %xmm15,%xmm14,%xmm14 + vpxor %xmm11,%xmm13,%xmm6 + vmovdqa %xmm0,64-128(%rax) + vpaddd %xmm0,%xmm14,%xmm14 + vpxor 208-128(%rax),%xmm1,%xmm1 + vpsrld $27,%xmm10,%xmm9 + vpxor %xmm12,%xmm6,%xmm6 + vpxor %xmm3,%xmm1,%xmm1 + + vpslld $30,%xmm11,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vpaddd %xmm6,%xmm14,%xmm14 + vpsrld $31,%xmm1,%xmm5 + vpaddd %xmm1,%xmm1,%xmm1 + + vpsrld $2,%xmm11,%xmm11 + vpaddd %xmm8,%xmm14,%xmm14 + vpor %xmm5,%xmm1,%xmm1 + vpor %xmm7,%xmm11,%xmm11 + vpxor %xmm4,%xmm2,%xmm2 + vmovdqa 128-128(%rax),%xmm4 + + vpslld $5,%xmm14,%xmm8 + vpaddd %xmm15,%xmm13,%xmm13 + vpxor %xmm10,%xmm12,%xmm6 + vmovdqa %xmm1,80-128(%rax) + vpaddd %xmm1,%xmm13,%xmm13 + vpxor 224-128(%rax),%xmm2,%xmm2 + vpsrld $27,%xmm14,%xmm9 + vpxor %xmm11,%xmm6,%xmm6 + vpxor %xmm4,%xmm2,%xmm2 + + vpslld $30,%xmm10,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vpaddd %xmm6,%xmm13,%xmm13 + vpsrld $31,%xmm2,%xmm5 + vpaddd %xmm2,%xmm2,%xmm2 + + vpsrld $2,%xmm10,%xmm10 + vpaddd %xmm8,%xmm13,%xmm13 + vpor %xmm5,%xmm2,%xmm2 + vpor %xmm7,%xmm10,%xmm10 + vpxor %xmm0,%xmm3,%xmm3 + vmovdqa 144-128(%rax),%xmm0 + + vpslld $5,%xmm13,%xmm8 + vpaddd %xmm15,%xmm12,%xmm12 + vpxor %xmm14,%xmm11,%xmm6 + vmovdqa %xmm2,96-128(%rax) + vpaddd %xmm2,%xmm12,%xmm12 + vpxor 240-128(%rax),%xmm3,%xmm3 + vpsrld $27,%xmm13,%xmm9 + vpxor %xmm10,%xmm6,%xmm6 + vpxor %xmm0,%xmm3,%xmm3 + + vpslld $30,%xmm14,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vpaddd %xmm6,%xmm12,%xmm12 + vpsrld $31,%xmm3,%xmm5 + vpaddd %xmm3,%xmm3,%xmm3 + + vpsrld $2,%xmm14,%xmm14 + vpaddd %xmm8,%xmm12,%xmm12 + vpor %xmm5,%xmm3,%xmm3 + vpor %xmm7,%xmm14,%xmm14 + vpxor %xmm1,%xmm4,%xmm4 + vmovdqa 160-128(%rax),%xmm1 + + vpslld $5,%xmm12,%xmm8 + vpaddd %xmm15,%xmm11,%xmm11 + vpxor %xmm13,%xmm10,%xmm6 + vmovdqa %xmm3,112-128(%rax) + vpaddd %xmm3,%xmm11,%xmm11 + vpxor 0-128(%rax),%xmm4,%xmm4 + vpsrld $27,%xmm12,%xmm9 + vpxor %xmm14,%xmm6,%xmm6 + vpxor %xmm1,%xmm4,%xmm4 + + vpslld $30,%xmm13,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vpaddd %xmm6,%xmm11,%xmm11 + vpsrld $31,%xmm4,%xmm5 + vpaddd %xmm4,%xmm4,%xmm4 + + vpsrld $2,%xmm13,%xmm13 + vpaddd %xmm8,%xmm11,%xmm11 + vpor %xmm5,%xmm4,%xmm4 + vpor %xmm7,%xmm13,%xmm13 + vpxor %xmm2,%xmm0,%xmm0 + vmovdqa 176-128(%rax),%xmm2 + + vpslld $5,%xmm11,%xmm8 + vpaddd %xmm15,%xmm10,%xmm10 + vpxor %xmm12,%xmm14,%xmm6 + vmovdqa %xmm4,128-128(%rax) + vpaddd %xmm4,%xmm10,%xmm10 + vpxor 16-128(%rax),%xmm0,%xmm0 + vpsrld $27,%xmm11,%xmm9 + vpxor %xmm13,%xmm6,%xmm6 + vpxor %xmm2,%xmm0,%xmm0 + + vpslld $30,%xmm12,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vpaddd %xmm6,%xmm10,%xmm10 + vpsrld $31,%xmm0,%xmm5 + vpaddd %xmm0,%xmm0,%xmm0 + + vpsrld $2,%xmm12,%xmm12 + vpaddd %xmm8,%xmm10,%xmm10 + vpor %xmm5,%xmm0,%xmm0 + vpor %xmm7,%xmm12,%xmm12 + vpxor %xmm3,%xmm1,%xmm1 + vmovdqa 192-128(%rax),%xmm3 + + vpslld $5,%xmm10,%xmm8 + vpaddd %xmm15,%xmm14,%xmm14 + vpxor %xmm11,%xmm13,%xmm6 + vmovdqa %xmm0,144-128(%rax) + vpaddd %xmm0,%xmm14,%xmm14 + vpxor 32-128(%rax),%xmm1,%xmm1 + vpsrld $27,%xmm10,%xmm9 + vpxor %xmm12,%xmm6,%xmm6 + vpxor %xmm3,%xmm1,%xmm1 + + vpslld $30,%xmm11,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vpaddd %xmm6,%xmm14,%xmm14 + vpsrld $31,%xmm1,%xmm5 + vpaddd %xmm1,%xmm1,%xmm1 + + vpsrld $2,%xmm11,%xmm11 + vpaddd %xmm8,%xmm14,%xmm14 + vpor %xmm5,%xmm1,%xmm1 + vpor %xmm7,%xmm11,%xmm11 + vpxor %xmm4,%xmm2,%xmm2 + vmovdqa 208-128(%rax),%xmm4 + + vpslld $5,%xmm14,%xmm8 + vpaddd %xmm15,%xmm13,%xmm13 + vpxor %xmm10,%xmm12,%xmm6 + vmovdqa %xmm1,160-128(%rax) + vpaddd %xmm1,%xmm13,%xmm13 + vpxor 48-128(%rax),%xmm2,%xmm2 + vpsrld $27,%xmm14,%xmm9 + vpxor %xmm11,%xmm6,%xmm6 + vpxor %xmm4,%xmm2,%xmm2 + + vpslld $30,%xmm10,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vpaddd %xmm6,%xmm13,%xmm13 + vpsrld $31,%xmm2,%xmm5 + vpaddd %xmm2,%xmm2,%xmm2 + + vpsrld $2,%xmm10,%xmm10 + vpaddd %xmm8,%xmm13,%xmm13 + vpor %xmm5,%xmm2,%xmm2 + vpor %xmm7,%xmm10,%xmm10 + vpxor %xmm0,%xmm3,%xmm3 + vmovdqa 224-128(%rax),%xmm0 + + vpslld $5,%xmm13,%xmm8 + vpaddd %xmm15,%xmm12,%xmm12 + vpxor %xmm14,%xmm11,%xmm6 + vmovdqa %xmm2,176-128(%rax) + vpaddd %xmm2,%xmm12,%xmm12 + vpxor 64-128(%rax),%xmm3,%xmm3 + vpsrld $27,%xmm13,%xmm9 + vpxor %xmm10,%xmm6,%xmm6 + vpxor %xmm0,%xmm3,%xmm3 + + vpslld $30,%xmm14,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vpaddd %xmm6,%xmm12,%xmm12 + vpsrld $31,%xmm3,%xmm5 + vpaddd %xmm3,%xmm3,%xmm3 + + vpsrld $2,%xmm14,%xmm14 + vpaddd %xmm8,%xmm12,%xmm12 + vpor %xmm5,%xmm3,%xmm3 + vpor %xmm7,%xmm14,%xmm14 + vpxor %xmm1,%xmm4,%xmm4 + vmovdqa 240-128(%rax),%xmm1 + + vpslld $5,%xmm12,%xmm8 + vpaddd %xmm15,%xmm11,%xmm11 + vpxor %xmm13,%xmm10,%xmm6 + vmovdqa %xmm3,192-128(%rax) + vpaddd %xmm3,%xmm11,%xmm11 + vpxor 80-128(%rax),%xmm4,%xmm4 + vpsrld $27,%xmm12,%xmm9 + vpxor %xmm14,%xmm6,%xmm6 + vpxor %xmm1,%xmm4,%xmm4 + + vpslld $30,%xmm13,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vpaddd %xmm6,%xmm11,%xmm11 + vpsrld $31,%xmm4,%xmm5 + vpaddd %xmm4,%xmm4,%xmm4 + + vpsrld $2,%xmm13,%xmm13 + vpaddd %xmm8,%xmm11,%xmm11 + vpor %xmm5,%xmm4,%xmm4 + vpor %xmm7,%xmm13,%xmm13 + vpxor %xmm2,%xmm0,%xmm0 + vmovdqa 0-128(%rax),%xmm2 + + vpslld $5,%xmm11,%xmm8 + vpaddd %xmm15,%xmm10,%xmm10 + vpxor %xmm12,%xmm14,%xmm6 + vmovdqa %xmm4,208-128(%rax) + vpaddd %xmm4,%xmm10,%xmm10 + vpxor 96-128(%rax),%xmm0,%xmm0 + vpsrld $27,%xmm11,%xmm9 + vpxor %xmm13,%xmm6,%xmm6 + vpxor %xmm2,%xmm0,%xmm0 + + vpslld $30,%xmm12,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vpaddd %xmm6,%xmm10,%xmm10 + vpsrld $31,%xmm0,%xmm5 + vpaddd %xmm0,%xmm0,%xmm0 + + vpsrld $2,%xmm12,%xmm12 + vpaddd %xmm8,%xmm10,%xmm10 + vpor %xmm5,%xmm0,%xmm0 + vpor %xmm7,%xmm12,%xmm12 + vpxor %xmm3,%xmm1,%xmm1 + vmovdqa 16-128(%rax),%xmm3 + + vpslld $5,%xmm10,%xmm8 + vpaddd %xmm15,%xmm14,%xmm14 + vpxor %xmm11,%xmm13,%xmm6 + vmovdqa %xmm0,224-128(%rax) + vpaddd %xmm0,%xmm14,%xmm14 + vpxor 112-128(%rax),%xmm1,%xmm1 + vpsrld $27,%xmm10,%xmm9 + vpxor %xmm12,%xmm6,%xmm6 + vpxor %xmm3,%xmm1,%xmm1 + + vpslld $30,%xmm11,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vpaddd %xmm6,%xmm14,%xmm14 + vpsrld $31,%xmm1,%xmm5 + vpaddd %xmm1,%xmm1,%xmm1 + + vpsrld $2,%xmm11,%xmm11 + vpaddd %xmm8,%xmm14,%xmm14 + vpor %xmm5,%xmm1,%xmm1 + vpor %xmm7,%xmm11,%xmm11 + vpxor %xmm4,%xmm2,%xmm2 + vmovdqa 32-128(%rax),%xmm4 + + vpslld $5,%xmm14,%xmm8 + vpaddd %xmm15,%xmm13,%xmm13 + vpxor %xmm10,%xmm12,%xmm6 + vmovdqa %xmm1,240-128(%rax) + vpaddd %xmm1,%xmm13,%xmm13 + vpxor 128-128(%rax),%xmm2,%xmm2 + vpsrld $27,%xmm14,%xmm9 + vpxor %xmm11,%xmm6,%xmm6 + vpxor %xmm4,%xmm2,%xmm2 + + vpslld $30,%xmm10,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vpaddd %xmm6,%xmm13,%xmm13 + vpsrld $31,%xmm2,%xmm5 + vpaddd %xmm2,%xmm2,%xmm2 + + vpsrld $2,%xmm10,%xmm10 + vpaddd %xmm8,%xmm13,%xmm13 + vpor %xmm5,%xmm2,%xmm2 + vpor %xmm7,%xmm10,%xmm10 + vpxor %xmm0,%xmm3,%xmm3 + vmovdqa 48-128(%rax),%xmm0 + + vpslld $5,%xmm13,%xmm8 + vpaddd %xmm15,%xmm12,%xmm12 + vpxor %xmm14,%xmm11,%xmm6 + vmovdqa %xmm2,0-128(%rax) + vpaddd %xmm2,%xmm12,%xmm12 + vpxor 144-128(%rax),%xmm3,%xmm3 + vpsrld $27,%xmm13,%xmm9 + vpxor %xmm10,%xmm6,%xmm6 + vpxor %xmm0,%xmm3,%xmm3 + + vpslld $30,%xmm14,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vpaddd %xmm6,%xmm12,%xmm12 + vpsrld $31,%xmm3,%xmm5 + vpaddd %xmm3,%xmm3,%xmm3 + + vpsrld $2,%xmm14,%xmm14 + vpaddd %xmm8,%xmm12,%xmm12 + vpor %xmm5,%xmm3,%xmm3 + vpor %xmm7,%xmm14,%xmm14 + vpxor %xmm1,%xmm4,%xmm4 + vmovdqa 64-128(%rax),%xmm1 + + vpslld $5,%xmm12,%xmm8 + vpaddd %xmm15,%xmm11,%xmm11 + vpxor %xmm13,%xmm10,%xmm6 + vmovdqa %xmm3,16-128(%rax) + vpaddd %xmm3,%xmm11,%xmm11 + vpxor 160-128(%rax),%xmm4,%xmm4 + vpsrld $27,%xmm12,%xmm9 + vpxor %xmm14,%xmm6,%xmm6 + vpxor %xmm1,%xmm4,%xmm4 + + vpslld $30,%xmm13,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vpaddd %xmm6,%xmm11,%xmm11 + vpsrld $31,%xmm4,%xmm5 + vpaddd %xmm4,%xmm4,%xmm4 + + vpsrld $2,%xmm13,%xmm13 + vpaddd %xmm8,%xmm11,%xmm11 + vpor %xmm5,%xmm4,%xmm4 + vpor %xmm7,%xmm13,%xmm13 + vpxor %xmm2,%xmm0,%xmm0 + vmovdqa 80-128(%rax),%xmm2 + + vpslld $5,%xmm11,%xmm8 + vpaddd %xmm15,%xmm10,%xmm10 + vpxor %xmm12,%xmm14,%xmm6 + vmovdqa %xmm4,32-128(%rax) + vpaddd %xmm4,%xmm10,%xmm10 + vpxor 176-128(%rax),%xmm0,%xmm0 + vpsrld $27,%xmm11,%xmm9 + vpxor %xmm13,%xmm6,%xmm6 + vpxor %xmm2,%xmm0,%xmm0 + + vpslld $30,%xmm12,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vpaddd %xmm6,%xmm10,%xmm10 + vpsrld $31,%xmm0,%xmm5 + vpaddd %xmm0,%xmm0,%xmm0 + + vpsrld $2,%xmm12,%xmm12 + vpaddd %xmm8,%xmm10,%xmm10 + vpor %xmm5,%xmm0,%xmm0 + vpor %xmm7,%xmm12,%xmm12 + vpxor %xmm3,%xmm1,%xmm1 + vmovdqa 96-128(%rax),%xmm3 + + vpslld $5,%xmm10,%xmm8 + vpaddd %xmm15,%xmm14,%xmm14 + vpxor %xmm11,%xmm13,%xmm6 + vmovdqa %xmm0,48-128(%rax) + vpaddd %xmm0,%xmm14,%xmm14 + vpxor 192-128(%rax),%xmm1,%xmm1 + vpsrld $27,%xmm10,%xmm9 + vpxor %xmm12,%xmm6,%xmm6 + vpxor %xmm3,%xmm1,%xmm1 + + vpslld $30,%xmm11,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vpaddd %xmm6,%xmm14,%xmm14 + vpsrld $31,%xmm1,%xmm5 + vpaddd %xmm1,%xmm1,%xmm1 + + vpsrld $2,%xmm11,%xmm11 + vpaddd %xmm8,%xmm14,%xmm14 + vpor %xmm5,%xmm1,%xmm1 + vpor %xmm7,%xmm11,%xmm11 + vpxor %xmm4,%xmm2,%xmm2 + vmovdqa 112-128(%rax),%xmm4 + + vpslld $5,%xmm14,%xmm8 + vpaddd %xmm15,%xmm13,%xmm13 + vpxor %xmm10,%xmm12,%xmm6 + vmovdqa %xmm1,64-128(%rax) + vpaddd %xmm1,%xmm13,%xmm13 + vpxor 208-128(%rax),%xmm2,%xmm2 + vpsrld $27,%xmm14,%xmm9 + vpxor %xmm11,%xmm6,%xmm6 + vpxor %xmm4,%xmm2,%xmm2 + + vpslld $30,%xmm10,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vpaddd %xmm6,%xmm13,%xmm13 + vpsrld $31,%xmm2,%xmm5 + vpaddd %xmm2,%xmm2,%xmm2 + + vpsrld $2,%xmm10,%xmm10 + vpaddd %xmm8,%xmm13,%xmm13 + vpor %xmm5,%xmm2,%xmm2 + vpor %xmm7,%xmm10,%xmm10 + vpxor %xmm0,%xmm3,%xmm3 + vmovdqa 128-128(%rax),%xmm0 + + vpslld $5,%xmm13,%xmm8 + vpaddd %xmm15,%xmm12,%xmm12 + vpxor %xmm14,%xmm11,%xmm6 + vmovdqa %xmm2,80-128(%rax) + vpaddd %xmm2,%xmm12,%xmm12 + vpxor 224-128(%rax),%xmm3,%xmm3 + vpsrld $27,%xmm13,%xmm9 + vpxor %xmm10,%xmm6,%xmm6 + vpxor %xmm0,%xmm3,%xmm3 + + vpslld $30,%xmm14,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vpaddd %xmm6,%xmm12,%xmm12 + vpsrld $31,%xmm3,%xmm5 + vpaddd %xmm3,%xmm3,%xmm3 + + vpsrld $2,%xmm14,%xmm14 + vpaddd %xmm8,%xmm12,%xmm12 + vpor %xmm5,%xmm3,%xmm3 + vpor %xmm7,%xmm14,%xmm14 + vpxor %xmm1,%xmm4,%xmm4 + vmovdqa 144-128(%rax),%xmm1 + + vpslld $5,%xmm12,%xmm8 + vpaddd %xmm15,%xmm11,%xmm11 + vpxor %xmm13,%xmm10,%xmm6 + vmovdqa %xmm3,96-128(%rax) + vpaddd %xmm3,%xmm11,%xmm11 + vpxor 240-128(%rax),%xmm4,%xmm4 + vpsrld $27,%xmm12,%xmm9 + vpxor %xmm14,%xmm6,%xmm6 + vpxor %xmm1,%xmm4,%xmm4 + + vpslld $30,%xmm13,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vpaddd %xmm6,%xmm11,%xmm11 + vpsrld $31,%xmm4,%xmm5 + vpaddd %xmm4,%xmm4,%xmm4 + + vpsrld $2,%xmm13,%xmm13 + vpaddd %xmm8,%xmm11,%xmm11 + vpor %xmm5,%xmm4,%xmm4 + vpor %xmm7,%xmm13,%xmm13 + vpxor %xmm2,%xmm0,%xmm0 + vmovdqa 160-128(%rax),%xmm2 + + vpslld $5,%xmm11,%xmm8 + vpaddd %xmm15,%xmm10,%xmm10 + vpxor %xmm12,%xmm14,%xmm6 + vmovdqa %xmm4,112-128(%rax) + vpaddd %xmm4,%xmm10,%xmm10 + vpxor 0-128(%rax),%xmm0,%xmm0 + vpsrld $27,%xmm11,%xmm9 + vpxor %xmm13,%xmm6,%xmm6 + vpxor %xmm2,%xmm0,%xmm0 + + vpslld $30,%xmm12,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vpaddd %xmm6,%xmm10,%xmm10 + vpsrld $31,%xmm0,%xmm5 + vpaddd %xmm0,%xmm0,%xmm0 + + vpsrld $2,%xmm12,%xmm12 + vpaddd %xmm8,%xmm10,%xmm10 + vpor %xmm5,%xmm0,%xmm0 + vpor %xmm7,%xmm12,%xmm12 + vmovdqa 32(%rbp),%xmm15 + vpxor %xmm3,%xmm1,%xmm1 + vmovdqa 176-128(%rax),%xmm3 + + vpaddd %xmm15,%xmm14,%xmm14 + vpslld $5,%xmm10,%xmm8 + vpand %xmm12,%xmm13,%xmm7 + vpxor 16-128(%rax),%xmm1,%xmm1 + + vpaddd %xmm7,%xmm14,%xmm14 + vpsrld $27,%xmm10,%xmm9 + vpxor %xmm12,%xmm13,%xmm6 + vpxor %xmm3,%xmm1,%xmm1 + + vmovdqu %xmm0,128-128(%rax) + vpaddd %xmm0,%xmm14,%xmm14 + vpor %xmm9,%xmm8,%xmm8 + vpsrld $31,%xmm1,%xmm5 + vpand %xmm11,%xmm6,%xmm6 + vpaddd %xmm1,%xmm1,%xmm1 + + vpslld $30,%xmm11,%xmm7 + vpaddd %xmm6,%xmm14,%xmm14 + + vpsrld $2,%xmm11,%xmm11 + vpaddd %xmm8,%xmm14,%xmm14 + vpor %xmm5,%xmm1,%xmm1 + vpor %xmm7,%xmm11,%xmm11 + vpxor %xmm4,%xmm2,%xmm2 + vmovdqa 192-128(%rax),%xmm4 + + vpaddd %xmm15,%xmm13,%xmm13 + vpslld $5,%xmm14,%xmm8 + vpand %xmm11,%xmm12,%xmm7 + vpxor 32-128(%rax),%xmm2,%xmm2 + + vpaddd %xmm7,%xmm13,%xmm13 + vpsrld $27,%xmm14,%xmm9 + vpxor %xmm11,%xmm12,%xmm6 + vpxor %xmm4,%xmm2,%xmm2 + + vmovdqu %xmm1,144-128(%rax) + vpaddd %xmm1,%xmm13,%xmm13 + vpor %xmm9,%xmm8,%xmm8 + vpsrld $31,%xmm2,%xmm5 + vpand %xmm10,%xmm6,%xmm6 + vpaddd %xmm2,%xmm2,%xmm2 + + vpslld $30,%xmm10,%xmm7 + vpaddd %xmm6,%xmm13,%xmm13 + + vpsrld $2,%xmm10,%xmm10 + vpaddd %xmm8,%xmm13,%xmm13 + vpor %xmm5,%xmm2,%xmm2 + vpor %xmm7,%xmm10,%xmm10 + vpxor %xmm0,%xmm3,%xmm3 + vmovdqa 208-128(%rax),%xmm0 + + vpaddd %xmm15,%xmm12,%xmm12 + vpslld $5,%xmm13,%xmm8 + vpand %xmm10,%xmm11,%xmm7 + vpxor 48-128(%rax),%xmm3,%xmm3 + + vpaddd %xmm7,%xmm12,%xmm12 + vpsrld $27,%xmm13,%xmm9 + vpxor %xmm10,%xmm11,%xmm6 + vpxor %xmm0,%xmm3,%xmm3 + + vmovdqu %xmm2,160-128(%rax) + vpaddd %xmm2,%xmm12,%xmm12 + vpor %xmm9,%xmm8,%xmm8 + vpsrld $31,%xmm3,%xmm5 + vpand %xmm14,%xmm6,%xmm6 + vpaddd %xmm3,%xmm3,%xmm3 + + vpslld $30,%xmm14,%xmm7 + vpaddd %xmm6,%xmm12,%xmm12 + + vpsrld $2,%xmm14,%xmm14 + vpaddd %xmm8,%xmm12,%xmm12 + vpor %xmm5,%xmm3,%xmm3 + vpor %xmm7,%xmm14,%xmm14 + vpxor %xmm1,%xmm4,%xmm4 + vmovdqa 224-128(%rax),%xmm1 + + vpaddd %xmm15,%xmm11,%xmm11 + vpslld $5,%xmm12,%xmm8 + vpand %xmm14,%xmm10,%xmm7 + vpxor 64-128(%rax),%xmm4,%xmm4 + + vpaddd %xmm7,%xmm11,%xmm11 + vpsrld $27,%xmm12,%xmm9 + vpxor %xmm14,%xmm10,%xmm6 + vpxor %xmm1,%xmm4,%xmm4 + + vmovdqu %xmm3,176-128(%rax) + vpaddd %xmm3,%xmm11,%xmm11 + vpor %xmm9,%xmm8,%xmm8 + vpsrld $31,%xmm4,%xmm5 + vpand %xmm13,%xmm6,%xmm6 + vpaddd %xmm4,%xmm4,%xmm4 + + vpslld $30,%xmm13,%xmm7 + vpaddd %xmm6,%xmm11,%xmm11 + + vpsrld $2,%xmm13,%xmm13 + vpaddd %xmm8,%xmm11,%xmm11 + vpor %xmm5,%xmm4,%xmm4 + vpor %xmm7,%xmm13,%xmm13 + vpxor %xmm2,%xmm0,%xmm0 + vmovdqa 240-128(%rax),%xmm2 + + vpaddd %xmm15,%xmm10,%xmm10 + vpslld $5,%xmm11,%xmm8 + vpand %xmm13,%xmm14,%xmm7 + vpxor 80-128(%rax),%xmm0,%xmm0 + + vpaddd %xmm7,%xmm10,%xmm10 + vpsrld $27,%xmm11,%xmm9 + vpxor %xmm13,%xmm14,%xmm6 + vpxor %xmm2,%xmm0,%xmm0 + + vmovdqu %xmm4,192-128(%rax) + vpaddd %xmm4,%xmm10,%xmm10 + vpor %xmm9,%xmm8,%xmm8 + vpsrld $31,%xmm0,%xmm5 + vpand %xmm12,%xmm6,%xmm6 + vpaddd %xmm0,%xmm0,%xmm0 + + vpslld $30,%xmm12,%xmm7 + vpaddd %xmm6,%xmm10,%xmm10 + + vpsrld $2,%xmm12,%xmm12 + vpaddd %xmm8,%xmm10,%xmm10 + vpor %xmm5,%xmm0,%xmm0 + vpor %xmm7,%xmm12,%xmm12 + vpxor %xmm3,%xmm1,%xmm1 + vmovdqa 0-128(%rax),%xmm3 + + vpaddd %xmm15,%xmm14,%xmm14 + vpslld $5,%xmm10,%xmm8 + vpand %xmm12,%xmm13,%xmm7 + vpxor 96-128(%rax),%xmm1,%xmm1 + + vpaddd %xmm7,%xmm14,%xmm14 + vpsrld $27,%xmm10,%xmm9 + vpxor %xmm12,%xmm13,%xmm6 + vpxor %xmm3,%xmm1,%xmm1 + + vmovdqu %xmm0,208-128(%rax) + vpaddd %xmm0,%xmm14,%xmm14 + vpor %xmm9,%xmm8,%xmm8 + vpsrld $31,%xmm1,%xmm5 + vpand %xmm11,%xmm6,%xmm6 + vpaddd %xmm1,%xmm1,%xmm1 + + vpslld $30,%xmm11,%xmm7 + vpaddd %xmm6,%xmm14,%xmm14 + + vpsrld $2,%xmm11,%xmm11 + vpaddd %xmm8,%xmm14,%xmm14 + vpor %xmm5,%xmm1,%xmm1 + vpor %xmm7,%xmm11,%xmm11 + vpxor %xmm4,%xmm2,%xmm2 + vmovdqa 16-128(%rax),%xmm4 + + vpaddd %xmm15,%xmm13,%xmm13 + vpslld $5,%xmm14,%xmm8 + vpand %xmm11,%xmm12,%xmm7 + vpxor 112-128(%rax),%xmm2,%xmm2 + + vpaddd %xmm7,%xmm13,%xmm13 + vpsrld $27,%xmm14,%xmm9 + vpxor %xmm11,%xmm12,%xmm6 + vpxor %xmm4,%xmm2,%xmm2 + + vmovdqu %xmm1,224-128(%rax) + vpaddd %xmm1,%xmm13,%xmm13 + vpor %xmm9,%xmm8,%xmm8 + vpsrld $31,%xmm2,%xmm5 + vpand %xmm10,%xmm6,%xmm6 + vpaddd %xmm2,%xmm2,%xmm2 + + vpslld $30,%xmm10,%xmm7 + vpaddd %xmm6,%xmm13,%xmm13 + + vpsrld $2,%xmm10,%xmm10 + vpaddd %xmm8,%xmm13,%xmm13 + vpor %xmm5,%xmm2,%xmm2 + vpor %xmm7,%xmm10,%xmm10 + vpxor %xmm0,%xmm3,%xmm3 + vmovdqa 32-128(%rax),%xmm0 + + vpaddd %xmm15,%xmm12,%xmm12 + vpslld $5,%xmm13,%xmm8 + vpand %xmm10,%xmm11,%xmm7 + vpxor 128-128(%rax),%xmm3,%xmm3 + + vpaddd %xmm7,%xmm12,%xmm12 + vpsrld $27,%xmm13,%xmm9 + vpxor %xmm10,%xmm11,%xmm6 + vpxor %xmm0,%xmm3,%xmm3 + + vmovdqu %xmm2,240-128(%rax) + vpaddd %xmm2,%xmm12,%xmm12 + vpor %xmm9,%xmm8,%xmm8 + vpsrld $31,%xmm3,%xmm5 + vpand %xmm14,%xmm6,%xmm6 + vpaddd %xmm3,%xmm3,%xmm3 + + vpslld $30,%xmm14,%xmm7 + vpaddd %xmm6,%xmm12,%xmm12 + + vpsrld $2,%xmm14,%xmm14 + vpaddd %xmm8,%xmm12,%xmm12 + vpor %xmm5,%xmm3,%xmm3 + vpor %xmm7,%xmm14,%xmm14 + vpxor %xmm1,%xmm4,%xmm4 + vmovdqa 48-128(%rax),%xmm1 + + vpaddd %xmm15,%xmm11,%xmm11 + vpslld $5,%xmm12,%xmm8 + vpand %xmm14,%xmm10,%xmm7 + vpxor 144-128(%rax),%xmm4,%xmm4 + + vpaddd %xmm7,%xmm11,%xmm11 + vpsrld $27,%xmm12,%xmm9 + vpxor %xmm14,%xmm10,%xmm6 + vpxor %xmm1,%xmm4,%xmm4 + + vmovdqu %xmm3,0-128(%rax) + vpaddd %xmm3,%xmm11,%xmm11 + vpor %xmm9,%xmm8,%xmm8 + vpsrld $31,%xmm4,%xmm5 + vpand %xmm13,%xmm6,%xmm6 + vpaddd %xmm4,%xmm4,%xmm4 + + vpslld $30,%xmm13,%xmm7 + vpaddd %xmm6,%xmm11,%xmm11 + + vpsrld $2,%xmm13,%xmm13 + vpaddd %xmm8,%xmm11,%xmm11 + vpor %xmm5,%xmm4,%xmm4 + vpor %xmm7,%xmm13,%xmm13 + vpxor %xmm2,%xmm0,%xmm0 + vmovdqa 64-128(%rax),%xmm2 + + vpaddd %xmm15,%xmm10,%xmm10 + vpslld $5,%xmm11,%xmm8 + vpand %xmm13,%xmm14,%xmm7 + vpxor 160-128(%rax),%xmm0,%xmm0 + + vpaddd %xmm7,%xmm10,%xmm10 + vpsrld $27,%xmm11,%xmm9 + vpxor %xmm13,%xmm14,%xmm6 + vpxor %xmm2,%xmm0,%xmm0 + + vmovdqu %xmm4,16-128(%rax) + vpaddd %xmm4,%xmm10,%xmm10 + vpor %xmm9,%xmm8,%xmm8 + vpsrld $31,%xmm0,%xmm5 + vpand %xmm12,%xmm6,%xmm6 + vpaddd %xmm0,%xmm0,%xmm0 + + vpslld $30,%xmm12,%xmm7 + vpaddd %xmm6,%xmm10,%xmm10 + + vpsrld $2,%xmm12,%xmm12 + vpaddd %xmm8,%xmm10,%xmm10 + vpor %xmm5,%xmm0,%xmm0 + vpor %xmm7,%xmm12,%xmm12 + vpxor %xmm3,%xmm1,%xmm1 + vmovdqa 80-128(%rax),%xmm3 + + vpaddd %xmm15,%xmm14,%xmm14 + vpslld $5,%xmm10,%xmm8 + vpand %xmm12,%xmm13,%xmm7 + vpxor 176-128(%rax),%xmm1,%xmm1 + + vpaddd %xmm7,%xmm14,%xmm14 + vpsrld $27,%xmm10,%xmm9 + vpxor %xmm12,%xmm13,%xmm6 + vpxor %xmm3,%xmm1,%xmm1 + + vmovdqu %xmm0,32-128(%rax) + vpaddd %xmm0,%xmm14,%xmm14 + vpor %xmm9,%xmm8,%xmm8 + vpsrld $31,%xmm1,%xmm5 + vpand %xmm11,%xmm6,%xmm6 + vpaddd %xmm1,%xmm1,%xmm1 + + vpslld $30,%xmm11,%xmm7 + vpaddd %xmm6,%xmm14,%xmm14 + + vpsrld $2,%xmm11,%xmm11 + vpaddd %xmm8,%xmm14,%xmm14 + vpor %xmm5,%xmm1,%xmm1 + vpor %xmm7,%xmm11,%xmm11 + vpxor %xmm4,%xmm2,%xmm2 + vmovdqa 96-128(%rax),%xmm4 + + vpaddd %xmm15,%xmm13,%xmm13 + vpslld $5,%xmm14,%xmm8 + vpand %xmm11,%xmm12,%xmm7 + vpxor 192-128(%rax),%xmm2,%xmm2 + + vpaddd %xmm7,%xmm13,%xmm13 + vpsrld $27,%xmm14,%xmm9 + vpxor %xmm11,%xmm12,%xmm6 + vpxor %xmm4,%xmm2,%xmm2 + + vmovdqu %xmm1,48-128(%rax) + vpaddd %xmm1,%xmm13,%xmm13 + vpor %xmm9,%xmm8,%xmm8 + vpsrld $31,%xmm2,%xmm5 + vpand %xmm10,%xmm6,%xmm6 + vpaddd %xmm2,%xmm2,%xmm2 + + vpslld $30,%xmm10,%xmm7 + vpaddd %xmm6,%xmm13,%xmm13 + + vpsrld $2,%xmm10,%xmm10 + vpaddd %xmm8,%xmm13,%xmm13 + vpor %xmm5,%xmm2,%xmm2 + vpor %xmm7,%xmm10,%xmm10 + vpxor %xmm0,%xmm3,%xmm3 + vmovdqa 112-128(%rax),%xmm0 + + vpaddd %xmm15,%xmm12,%xmm12 + vpslld $5,%xmm13,%xmm8 + vpand %xmm10,%xmm11,%xmm7 + vpxor 208-128(%rax),%xmm3,%xmm3 + + vpaddd %xmm7,%xmm12,%xmm12 + vpsrld $27,%xmm13,%xmm9 + vpxor %xmm10,%xmm11,%xmm6 + vpxor %xmm0,%xmm3,%xmm3 + + vmovdqu %xmm2,64-128(%rax) + vpaddd %xmm2,%xmm12,%xmm12 + vpor %xmm9,%xmm8,%xmm8 + vpsrld $31,%xmm3,%xmm5 + vpand %xmm14,%xmm6,%xmm6 + vpaddd %xmm3,%xmm3,%xmm3 + + vpslld $30,%xmm14,%xmm7 + vpaddd %xmm6,%xmm12,%xmm12 + + vpsrld $2,%xmm14,%xmm14 + vpaddd %xmm8,%xmm12,%xmm12 + vpor %xmm5,%xmm3,%xmm3 + vpor %xmm7,%xmm14,%xmm14 + vpxor %xmm1,%xmm4,%xmm4 + vmovdqa 128-128(%rax),%xmm1 + + vpaddd %xmm15,%xmm11,%xmm11 + vpslld $5,%xmm12,%xmm8 + vpand %xmm14,%xmm10,%xmm7 + vpxor 224-128(%rax),%xmm4,%xmm4 + + vpaddd %xmm7,%xmm11,%xmm11 + vpsrld $27,%xmm12,%xmm9 + vpxor %xmm14,%xmm10,%xmm6 + vpxor %xmm1,%xmm4,%xmm4 + + vmovdqu %xmm3,80-128(%rax) + vpaddd %xmm3,%xmm11,%xmm11 + vpor %xmm9,%xmm8,%xmm8 + vpsrld $31,%xmm4,%xmm5 + vpand %xmm13,%xmm6,%xmm6 + vpaddd %xmm4,%xmm4,%xmm4 + + vpslld $30,%xmm13,%xmm7 + vpaddd %xmm6,%xmm11,%xmm11 + + vpsrld $2,%xmm13,%xmm13 + vpaddd %xmm8,%xmm11,%xmm11 + vpor %xmm5,%xmm4,%xmm4 + vpor %xmm7,%xmm13,%xmm13 + vpxor %xmm2,%xmm0,%xmm0 + vmovdqa 144-128(%rax),%xmm2 + + vpaddd %xmm15,%xmm10,%xmm10 + vpslld $5,%xmm11,%xmm8 + vpand %xmm13,%xmm14,%xmm7 + vpxor 240-128(%rax),%xmm0,%xmm0 + + vpaddd %xmm7,%xmm10,%xmm10 + vpsrld $27,%xmm11,%xmm9 + vpxor %xmm13,%xmm14,%xmm6 + vpxor %xmm2,%xmm0,%xmm0 + + vmovdqu %xmm4,96-128(%rax) + vpaddd %xmm4,%xmm10,%xmm10 + vpor %xmm9,%xmm8,%xmm8 + vpsrld $31,%xmm0,%xmm5 + vpand %xmm12,%xmm6,%xmm6 + vpaddd %xmm0,%xmm0,%xmm0 + + vpslld $30,%xmm12,%xmm7 + vpaddd %xmm6,%xmm10,%xmm10 + + vpsrld $2,%xmm12,%xmm12 + vpaddd %xmm8,%xmm10,%xmm10 + vpor %xmm5,%xmm0,%xmm0 + vpor %xmm7,%xmm12,%xmm12 + vpxor %xmm3,%xmm1,%xmm1 + vmovdqa 160-128(%rax),%xmm3 + + vpaddd %xmm15,%xmm14,%xmm14 + vpslld $5,%xmm10,%xmm8 + vpand %xmm12,%xmm13,%xmm7 + vpxor 0-128(%rax),%xmm1,%xmm1 + + vpaddd %xmm7,%xmm14,%xmm14 + vpsrld $27,%xmm10,%xmm9 + vpxor %xmm12,%xmm13,%xmm6 + vpxor %xmm3,%xmm1,%xmm1 + + vmovdqu %xmm0,112-128(%rax) + vpaddd %xmm0,%xmm14,%xmm14 + vpor %xmm9,%xmm8,%xmm8 + vpsrld $31,%xmm1,%xmm5 + vpand %xmm11,%xmm6,%xmm6 + vpaddd %xmm1,%xmm1,%xmm1 + + vpslld $30,%xmm11,%xmm7 + vpaddd %xmm6,%xmm14,%xmm14 + + vpsrld $2,%xmm11,%xmm11 + vpaddd %xmm8,%xmm14,%xmm14 + vpor %xmm5,%xmm1,%xmm1 + vpor %xmm7,%xmm11,%xmm11 + vpxor %xmm4,%xmm2,%xmm2 + vmovdqa 176-128(%rax),%xmm4 + + vpaddd %xmm15,%xmm13,%xmm13 + vpslld $5,%xmm14,%xmm8 + vpand %xmm11,%xmm12,%xmm7 + vpxor 16-128(%rax),%xmm2,%xmm2 + + vpaddd %xmm7,%xmm13,%xmm13 + vpsrld $27,%xmm14,%xmm9 + vpxor %xmm11,%xmm12,%xmm6 + vpxor %xmm4,%xmm2,%xmm2 + + vmovdqu %xmm1,128-128(%rax) + vpaddd %xmm1,%xmm13,%xmm13 + vpor %xmm9,%xmm8,%xmm8 + vpsrld $31,%xmm2,%xmm5 + vpand %xmm10,%xmm6,%xmm6 + vpaddd %xmm2,%xmm2,%xmm2 + + vpslld $30,%xmm10,%xmm7 + vpaddd %xmm6,%xmm13,%xmm13 + + vpsrld $2,%xmm10,%xmm10 + vpaddd %xmm8,%xmm13,%xmm13 + vpor %xmm5,%xmm2,%xmm2 + vpor %xmm7,%xmm10,%xmm10 + vpxor %xmm0,%xmm3,%xmm3 + vmovdqa 192-128(%rax),%xmm0 + + vpaddd %xmm15,%xmm12,%xmm12 + vpslld $5,%xmm13,%xmm8 + vpand %xmm10,%xmm11,%xmm7 + vpxor 32-128(%rax),%xmm3,%xmm3 + + vpaddd %xmm7,%xmm12,%xmm12 + vpsrld $27,%xmm13,%xmm9 + vpxor %xmm10,%xmm11,%xmm6 + vpxor %xmm0,%xmm3,%xmm3 + + vmovdqu %xmm2,144-128(%rax) + vpaddd %xmm2,%xmm12,%xmm12 + vpor %xmm9,%xmm8,%xmm8 + vpsrld $31,%xmm3,%xmm5 + vpand %xmm14,%xmm6,%xmm6 + vpaddd %xmm3,%xmm3,%xmm3 + + vpslld $30,%xmm14,%xmm7 + vpaddd %xmm6,%xmm12,%xmm12 + + vpsrld $2,%xmm14,%xmm14 + vpaddd %xmm8,%xmm12,%xmm12 + vpor %xmm5,%xmm3,%xmm3 + vpor %xmm7,%xmm14,%xmm14 + vpxor %xmm1,%xmm4,%xmm4 + vmovdqa 208-128(%rax),%xmm1 + + vpaddd %xmm15,%xmm11,%xmm11 + vpslld $5,%xmm12,%xmm8 + vpand %xmm14,%xmm10,%xmm7 + vpxor 48-128(%rax),%xmm4,%xmm4 + + vpaddd %xmm7,%xmm11,%xmm11 + vpsrld $27,%xmm12,%xmm9 + vpxor %xmm14,%xmm10,%xmm6 + vpxor %xmm1,%xmm4,%xmm4 + + vmovdqu %xmm3,160-128(%rax) + vpaddd %xmm3,%xmm11,%xmm11 + vpor %xmm9,%xmm8,%xmm8 + vpsrld $31,%xmm4,%xmm5 + vpand %xmm13,%xmm6,%xmm6 + vpaddd %xmm4,%xmm4,%xmm4 + + vpslld $30,%xmm13,%xmm7 + vpaddd %xmm6,%xmm11,%xmm11 + + vpsrld $2,%xmm13,%xmm13 + vpaddd %xmm8,%xmm11,%xmm11 + vpor %xmm5,%xmm4,%xmm4 + vpor %xmm7,%xmm13,%xmm13 + vpxor %xmm2,%xmm0,%xmm0 + vmovdqa 224-128(%rax),%xmm2 + + vpaddd %xmm15,%xmm10,%xmm10 + vpslld $5,%xmm11,%xmm8 + vpand %xmm13,%xmm14,%xmm7 + vpxor 64-128(%rax),%xmm0,%xmm0 + + vpaddd %xmm7,%xmm10,%xmm10 + vpsrld $27,%xmm11,%xmm9 + vpxor %xmm13,%xmm14,%xmm6 + vpxor %xmm2,%xmm0,%xmm0 + + vmovdqu %xmm4,176-128(%rax) + vpaddd %xmm4,%xmm10,%xmm10 + vpor %xmm9,%xmm8,%xmm8 + vpsrld $31,%xmm0,%xmm5 + vpand %xmm12,%xmm6,%xmm6 + vpaddd %xmm0,%xmm0,%xmm0 + + vpslld $30,%xmm12,%xmm7 + vpaddd %xmm6,%xmm10,%xmm10 + + vpsrld $2,%xmm12,%xmm12 + vpaddd %xmm8,%xmm10,%xmm10 + vpor %xmm5,%xmm0,%xmm0 + vpor %xmm7,%xmm12,%xmm12 + vmovdqa 64(%rbp),%xmm15 + vpxor %xmm3,%xmm1,%xmm1 + vmovdqa 240-128(%rax),%xmm3 + + vpslld $5,%xmm10,%xmm8 + vpaddd %xmm15,%xmm14,%xmm14 + vpxor %xmm11,%xmm13,%xmm6 + vmovdqa %xmm0,192-128(%rax) + vpaddd %xmm0,%xmm14,%xmm14 + vpxor 80-128(%rax),%xmm1,%xmm1 + vpsrld $27,%xmm10,%xmm9 + vpxor %xmm12,%xmm6,%xmm6 + vpxor %xmm3,%xmm1,%xmm1 + + vpslld $30,%xmm11,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vpaddd %xmm6,%xmm14,%xmm14 + vpsrld $31,%xmm1,%xmm5 + vpaddd %xmm1,%xmm1,%xmm1 + + vpsrld $2,%xmm11,%xmm11 + vpaddd %xmm8,%xmm14,%xmm14 + vpor %xmm5,%xmm1,%xmm1 + vpor %xmm7,%xmm11,%xmm11 + vpxor %xmm4,%xmm2,%xmm2 + vmovdqa 0-128(%rax),%xmm4 + + vpslld $5,%xmm14,%xmm8 + vpaddd %xmm15,%xmm13,%xmm13 + vpxor %xmm10,%xmm12,%xmm6 + vmovdqa %xmm1,208-128(%rax) + vpaddd %xmm1,%xmm13,%xmm13 + vpxor 96-128(%rax),%xmm2,%xmm2 + vpsrld $27,%xmm14,%xmm9 + vpxor %xmm11,%xmm6,%xmm6 + vpxor %xmm4,%xmm2,%xmm2 + + vpslld $30,%xmm10,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vpaddd %xmm6,%xmm13,%xmm13 + vpsrld $31,%xmm2,%xmm5 + vpaddd %xmm2,%xmm2,%xmm2 + + vpsrld $2,%xmm10,%xmm10 + vpaddd %xmm8,%xmm13,%xmm13 + vpor %xmm5,%xmm2,%xmm2 + vpor %xmm7,%xmm10,%xmm10 + vpxor %xmm0,%xmm3,%xmm3 + vmovdqa 16-128(%rax),%xmm0 + + vpslld $5,%xmm13,%xmm8 + vpaddd %xmm15,%xmm12,%xmm12 + vpxor %xmm14,%xmm11,%xmm6 + vmovdqa %xmm2,224-128(%rax) + vpaddd %xmm2,%xmm12,%xmm12 + vpxor 112-128(%rax),%xmm3,%xmm3 + vpsrld $27,%xmm13,%xmm9 + vpxor %xmm10,%xmm6,%xmm6 + vpxor %xmm0,%xmm3,%xmm3 + + vpslld $30,%xmm14,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vpaddd %xmm6,%xmm12,%xmm12 + vpsrld $31,%xmm3,%xmm5 + vpaddd %xmm3,%xmm3,%xmm3 + + vpsrld $2,%xmm14,%xmm14 + vpaddd %xmm8,%xmm12,%xmm12 + vpor %xmm5,%xmm3,%xmm3 + vpor %xmm7,%xmm14,%xmm14 + vpxor %xmm1,%xmm4,%xmm4 + vmovdqa 32-128(%rax),%xmm1 + + vpslld $5,%xmm12,%xmm8 + vpaddd %xmm15,%xmm11,%xmm11 + vpxor %xmm13,%xmm10,%xmm6 + vmovdqa %xmm3,240-128(%rax) + vpaddd %xmm3,%xmm11,%xmm11 + vpxor 128-128(%rax),%xmm4,%xmm4 + vpsrld $27,%xmm12,%xmm9 + vpxor %xmm14,%xmm6,%xmm6 + vpxor %xmm1,%xmm4,%xmm4 + + vpslld $30,%xmm13,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vpaddd %xmm6,%xmm11,%xmm11 + vpsrld $31,%xmm4,%xmm5 + vpaddd %xmm4,%xmm4,%xmm4 + + vpsrld $2,%xmm13,%xmm13 + vpaddd %xmm8,%xmm11,%xmm11 + vpor %xmm5,%xmm4,%xmm4 + vpor %xmm7,%xmm13,%xmm13 + vpxor %xmm2,%xmm0,%xmm0 + vmovdqa 48-128(%rax),%xmm2 + + vpslld $5,%xmm11,%xmm8 + vpaddd %xmm15,%xmm10,%xmm10 + vpxor %xmm12,%xmm14,%xmm6 + vmovdqa %xmm4,0-128(%rax) + vpaddd %xmm4,%xmm10,%xmm10 + vpxor 144-128(%rax),%xmm0,%xmm0 + vpsrld $27,%xmm11,%xmm9 + vpxor %xmm13,%xmm6,%xmm6 + vpxor %xmm2,%xmm0,%xmm0 + + vpslld $30,%xmm12,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vpaddd %xmm6,%xmm10,%xmm10 + vpsrld $31,%xmm0,%xmm5 + vpaddd %xmm0,%xmm0,%xmm0 + + vpsrld $2,%xmm12,%xmm12 + vpaddd %xmm8,%xmm10,%xmm10 + vpor %xmm5,%xmm0,%xmm0 + vpor %xmm7,%xmm12,%xmm12 + vpxor %xmm3,%xmm1,%xmm1 + vmovdqa 64-128(%rax),%xmm3 + + vpslld $5,%xmm10,%xmm8 + vpaddd %xmm15,%xmm14,%xmm14 + vpxor %xmm11,%xmm13,%xmm6 + vmovdqa %xmm0,16-128(%rax) + vpaddd %xmm0,%xmm14,%xmm14 + vpxor 160-128(%rax),%xmm1,%xmm1 + vpsrld $27,%xmm10,%xmm9 + vpxor %xmm12,%xmm6,%xmm6 + vpxor %xmm3,%xmm1,%xmm1 + + vpslld $30,%xmm11,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vpaddd %xmm6,%xmm14,%xmm14 + vpsrld $31,%xmm1,%xmm5 + vpaddd %xmm1,%xmm1,%xmm1 + + vpsrld $2,%xmm11,%xmm11 + vpaddd %xmm8,%xmm14,%xmm14 + vpor %xmm5,%xmm1,%xmm1 + vpor %xmm7,%xmm11,%xmm11 + vpxor %xmm4,%xmm2,%xmm2 + vmovdqa 80-128(%rax),%xmm4 + + vpslld $5,%xmm14,%xmm8 + vpaddd %xmm15,%xmm13,%xmm13 + vpxor %xmm10,%xmm12,%xmm6 + vmovdqa %xmm1,32-128(%rax) + vpaddd %xmm1,%xmm13,%xmm13 + vpxor 176-128(%rax),%xmm2,%xmm2 + vpsrld $27,%xmm14,%xmm9 + vpxor %xmm11,%xmm6,%xmm6 + vpxor %xmm4,%xmm2,%xmm2 + + vpslld $30,%xmm10,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vpaddd %xmm6,%xmm13,%xmm13 + vpsrld $31,%xmm2,%xmm5 + vpaddd %xmm2,%xmm2,%xmm2 + + vpsrld $2,%xmm10,%xmm10 + vpaddd %xmm8,%xmm13,%xmm13 + vpor %xmm5,%xmm2,%xmm2 + vpor %xmm7,%xmm10,%xmm10 + vpxor %xmm0,%xmm3,%xmm3 + vmovdqa 96-128(%rax),%xmm0 + + vpslld $5,%xmm13,%xmm8 + vpaddd %xmm15,%xmm12,%xmm12 + vpxor %xmm14,%xmm11,%xmm6 + vmovdqa %xmm2,48-128(%rax) + vpaddd %xmm2,%xmm12,%xmm12 + vpxor 192-128(%rax),%xmm3,%xmm3 + vpsrld $27,%xmm13,%xmm9 + vpxor %xmm10,%xmm6,%xmm6 + vpxor %xmm0,%xmm3,%xmm3 + + vpslld $30,%xmm14,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vpaddd %xmm6,%xmm12,%xmm12 + vpsrld $31,%xmm3,%xmm5 + vpaddd %xmm3,%xmm3,%xmm3 + + vpsrld $2,%xmm14,%xmm14 + vpaddd %xmm8,%xmm12,%xmm12 + vpor %xmm5,%xmm3,%xmm3 + vpor %xmm7,%xmm14,%xmm14 + vpxor %xmm1,%xmm4,%xmm4 + vmovdqa 112-128(%rax),%xmm1 + + vpslld $5,%xmm12,%xmm8 + vpaddd %xmm15,%xmm11,%xmm11 + vpxor %xmm13,%xmm10,%xmm6 + vmovdqa %xmm3,64-128(%rax) + vpaddd %xmm3,%xmm11,%xmm11 + vpxor 208-128(%rax),%xmm4,%xmm4 + vpsrld $27,%xmm12,%xmm9 + vpxor %xmm14,%xmm6,%xmm6 + vpxor %xmm1,%xmm4,%xmm4 + + vpslld $30,%xmm13,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vpaddd %xmm6,%xmm11,%xmm11 + vpsrld $31,%xmm4,%xmm5 + vpaddd %xmm4,%xmm4,%xmm4 + + vpsrld $2,%xmm13,%xmm13 + vpaddd %xmm8,%xmm11,%xmm11 + vpor %xmm5,%xmm4,%xmm4 + vpor %xmm7,%xmm13,%xmm13 + vpxor %xmm2,%xmm0,%xmm0 + vmovdqa 128-128(%rax),%xmm2 + + vpslld $5,%xmm11,%xmm8 + vpaddd %xmm15,%xmm10,%xmm10 + vpxor %xmm12,%xmm14,%xmm6 + vmovdqa %xmm4,80-128(%rax) + vpaddd %xmm4,%xmm10,%xmm10 + vpxor 224-128(%rax),%xmm0,%xmm0 + vpsrld $27,%xmm11,%xmm9 + vpxor %xmm13,%xmm6,%xmm6 + vpxor %xmm2,%xmm0,%xmm0 + + vpslld $30,%xmm12,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vpaddd %xmm6,%xmm10,%xmm10 + vpsrld $31,%xmm0,%xmm5 + vpaddd %xmm0,%xmm0,%xmm0 + + vpsrld $2,%xmm12,%xmm12 + vpaddd %xmm8,%xmm10,%xmm10 + vpor %xmm5,%xmm0,%xmm0 + vpor %xmm7,%xmm12,%xmm12 + vpxor %xmm3,%xmm1,%xmm1 + vmovdqa 144-128(%rax),%xmm3 + + vpslld $5,%xmm10,%xmm8 + vpaddd %xmm15,%xmm14,%xmm14 + vpxor %xmm11,%xmm13,%xmm6 + vmovdqa %xmm0,96-128(%rax) + vpaddd %xmm0,%xmm14,%xmm14 + vpxor 240-128(%rax),%xmm1,%xmm1 + vpsrld $27,%xmm10,%xmm9 + vpxor %xmm12,%xmm6,%xmm6 + vpxor %xmm3,%xmm1,%xmm1 + + vpslld $30,%xmm11,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vpaddd %xmm6,%xmm14,%xmm14 + vpsrld $31,%xmm1,%xmm5 + vpaddd %xmm1,%xmm1,%xmm1 + + vpsrld $2,%xmm11,%xmm11 + vpaddd %xmm8,%xmm14,%xmm14 + vpor %xmm5,%xmm1,%xmm1 + vpor %xmm7,%xmm11,%xmm11 + vpxor %xmm4,%xmm2,%xmm2 + vmovdqa 160-128(%rax),%xmm4 + + vpslld $5,%xmm14,%xmm8 + vpaddd %xmm15,%xmm13,%xmm13 + vpxor %xmm10,%xmm12,%xmm6 + vmovdqa %xmm1,112-128(%rax) + vpaddd %xmm1,%xmm13,%xmm13 + vpxor 0-128(%rax),%xmm2,%xmm2 + vpsrld $27,%xmm14,%xmm9 + vpxor %xmm11,%xmm6,%xmm6 + vpxor %xmm4,%xmm2,%xmm2 + + vpslld $30,%xmm10,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vpaddd %xmm6,%xmm13,%xmm13 + vpsrld $31,%xmm2,%xmm5 + vpaddd %xmm2,%xmm2,%xmm2 + + vpsrld $2,%xmm10,%xmm10 + vpaddd %xmm8,%xmm13,%xmm13 + vpor %xmm5,%xmm2,%xmm2 + vpor %xmm7,%xmm10,%xmm10 + vpxor %xmm0,%xmm3,%xmm3 + vmovdqa 176-128(%rax),%xmm0 + + vpslld $5,%xmm13,%xmm8 + vpaddd %xmm15,%xmm12,%xmm12 + vpxor %xmm14,%xmm11,%xmm6 + vpaddd %xmm2,%xmm12,%xmm12 + vpxor 16-128(%rax),%xmm3,%xmm3 + vpsrld $27,%xmm13,%xmm9 + vpxor %xmm10,%xmm6,%xmm6 + vpxor %xmm0,%xmm3,%xmm3 + + vpslld $30,%xmm14,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vpaddd %xmm6,%xmm12,%xmm12 + vpsrld $31,%xmm3,%xmm5 + vpaddd %xmm3,%xmm3,%xmm3 + + vpsrld $2,%xmm14,%xmm14 + vpaddd %xmm8,%xmm12,%xmm12 + vpor %xmm5,%xmm3,%xmm3 + vpor %xmm7,%xmm14,%xmm14 + vpxor %xmm1,%xmm4,%xmm4 + vmovdqa 192-128(%rax),%xmm1 + + vpslld $5,%xmm12,%xmm8 + vpaddd %xmm15,%xmm11,%xmm11 + vpxor %xmm13,%xmm10,%xmm6 + vpaddd %xmm3,%xmm11,%xmm11 + vpxor 32-128(%rax),%xmm4,%xmm4 + vpsrld $27,%xmm12,%xmm9 + vpxor %xmm14,%xmm6,%xmm6 + vpxor %xmm1,%xmm4,%xmm4 + + vpslld $30,%xmm13,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vpaddd %xmm6,%xmm11,%xmm11 + vpsrld $31,%xmm4,%xmm5 + vpaddd %xmm4,%xmm4,%xmm4 + + vpsrld $2,%xmm13,%xmm13 + vpaddd %xmm8,%xmm11,%xmm11 + vpor %xmm5,%xmm4,%xmm4 + vpor %xmm7,%xmm13,%xmm13 + vpxor %xmm2,%xmm0,%xmm0 + vmovdqa 208-128(%rax),%xmm2 + + vpslld $5,%xmm11,%xmm8 + vpaddd %xmm15,%xmm10,%xmm10 + vpxor %xmm12,%xmm14,%xmm6 + vpaddd %xmm4,%xmm10,%xmm10 + vpxor 48-128(%rax),%xmm0,%xmm0 + vpsrld $27,%xmm11,%xmm9 + vpxor %xmm13,%xmm6,%xmm6 + vpxor %xmm2,%xmm0,%xmm0 + + vpslld $30,%xmm12,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vpaddd %xmm6,%xmm10,%xmm10 + vpsrld $31,%xmm0,%xmm5 + vpaddd %xmm0,%xmm0,%xmm0 + + vpsrld $2,%xmm12,%xmm12 + vpaddd %xmm8,%xmm10,%xmm10 + vpor %xmm5,%xmm0,%xmm0 + vpor %xmm7,%xmm12,%xmm12 + vpxor %xmm3,%xmm1,%xmm1 + vmovdqa 224-128(%rax),%xmm3 + + vpslld $5,%xmm10,%xmm8 + vpaddd %xmm15,%xmm14,%xmm14 + vpxor %xmm11,%xmm13,%xmm6 + vpaddd %xmm0,%xmm14,%xmm14 + vpxor 64-128(%rax),%xmm1,%xmm1 + vpsrld $27,%xmm10,%xmm9 + vpxor %xmm12,%xmm6,%xmm6 + vpxor %xmm3,%xmm1,%xmm1 + + vpslld $30,%xmm11,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vpaddd %xmm6,%xmm14,%xmm14 + vpsrld $31,%xmm1,%xmm5 + vpaddd %xmm1,%xmm1,%xmm1 + + vpsrld $2,%xmm11,%xmm11 + vpaddd %xmm8,%xmm14,%xmm14 + vpor %xmm5,%xmm1,%xmm1 + vpor %xmm7,%xmm11,%xmm11 + vpxor %xmm4,%xmm2,%xmm2 + vmovdqa 240-128(%rax),%xmm4 + + vpslld $5,%xmm14,%xmm8 + vpaddd %xmm15,%xmm13,%xmm13 + vpxor %xmm10,%xmm12,%xmm6 + vpaddd %xmm1,%xmm13,%xmm13 + vpxor 80-128(%rax),%xmm2,%xmm2 + vpsrld $27,%xmm14,%xmm9 + vpxor %xmm11,%xmm6,%xmm6 + vpxor %xmm4,%xmm2,%xmm2 + + vpslld $30,%xmm10,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vpaddd %xmm6,%xmm13,%xmm13 + vpsrld $31,%xmm2,%xmm5 + vpaddd %xmm2,%xmm2,%xmm2 + + vpsrld $2,%xmm10,%xmm10 + vpaddd %xmm8,%xmm13,%xmm13 + vpor %xmm5,%xmm2,%xmm2 + vpor %xmm7,%xmm10,%xmm10 + vpxor %xmm0,%xmm3,%xmm3 + vmovdqa 0-128(%rax),%xmm0 + + vpslld $5,%xmm13,%xmm8 + vpaddd %xmm15,%xmm12,%xmm12 + vpxor %xmm14,%xmm11,%xmm6 + vpaddd %xmm2,%xmm12,%xmm12 + vpxor 96-128(%rax),%xmm3,%xmm3 + vpsrld $27,%xmm13,%xmm9 + vpxor %xmm10,%xmm6,%xmm6 + vpxor %xmm0,%xmm3,%xmm3 + + vpslld $30,%xmm14,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vpaddd %xmm6,%xmm12,%xmm12 + vpsrld $31,%xmm3,%xmm5 + vpaddd %xmm3,%xmm3,%xmm3 + + vpsrld $2,%xmm14,%xmm14 + vpaddd %xmm8,%xmm12,%xmm12 + vpor %xmm5,%xmm3,%xmm3 + vpor %xmm7,%xmm14,%xmm14 + vpxor %xmm1,%xmm4,%xmm4 + vmovdqa 16-128(%rax),%xmm1 + + vpslld $5,%xmm12,%xmm8 + vpaddd %xmm15,%xmm11,%xmm11 + vpxor %xmm13,%xmm10,%xmm6 + vpaddd %xmm3,%xmm11,%xmm11 + vpxor 112-128(%rax),%xmm4,%xmm4 + vpsrld $27,%xmm12,%xmm9 + vpxor %xmm14,%xmm6,%xmm6 + vpxor %xmm1,%xmm4,%xmm4 + + vpslld $30,%xmm13,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vpaddd %xmm6,%xmm11,%xmm11 + vpsrld $31,%xmm4,%xmm5 + vpaddd %xmm4,%xmm4,%xmm4 + + vpsrld $2,%xmm13,%xmm13 + vpaddd %xmm8,%xmm11,%xmm11 + vpor %xmm5,%xmm4,%xmm4 + vpor %xmm7,%xmm13,%xmm13 + vpslld $5,%xmm11,%xmm8 + vpaddd %xmm15,%xmm10,%xmm10 + vpxor %xmm12,%xmm14,%xmm6 + + vpsrld $27,%xmm11,%xmm9 + vpaddd %xmm4,%xmm10,%xmm10 + vpxor %xmm13,%xmm6,%xmm6 + + vpslld $30,%xmm12,%xmm7 + vpor %xmm9,%xmm8,%xmm8 + vpaddd %xmm6,%xmm10,%xmm10 + + vpsrld $2,%xmm12,%xmm12 + vpaddd %xmm8,%xmm10,%xmm10 + vpor %xmm7,%xmm12,%xmm12 + movl $1,%ecx + cmpl 0(%rbx),%ecx + cmovgeq %rbp,%r8 + cmpl 4(%rbx),%ecx + cmovgeq %rbp,%r9 + cmpl 8(%rbx),%ecx + cmovgeq %rbp,%r10 + cmpl 12(%rbx),%ecx + cmovgeq %rbp,%r11 + vmovdqu (%rbx),%xmm6 + vpxor %xmm8,%xmm8,%xmm8 + vmovdqa %xmm6,%xmm7 + vpcmpgtd %xmm8,%xmm7,%xmm7 + vpaddd %xmm7,%xmm6,%xmm6 + + vpand %xmm7,%xmm10,%xmm10 + vpand %xmm7,%xmm11,%xmm11 + vpaddd 0(%rdi),%xmm10,%xmm10 + vpand %xmm7,%xmm12,%xmm12 + vpaddd 32(%rdi),%xmm11,%xmm11 + vpand %xmm7,%xmm13,%xmm13 + vpaddd 64(%rdi),%xmm12,%xmm12 + vpand %xmm7,%xmm14,%xmm14 + vpaddd 96(%rdi),%xmm13,%xmm13 + vpaddd 128(%rdi),%xmm14,%xmm14 + vmovdqu %xmm10,0(%rdi) + vmovdqu %xmm11,32(%rdi) + vmovdqu %xmm12,64(%rdi) + vmovdqu %xmm13,96(%rdi) + vmovdqu %xmm14,128(%rdi) + + vmovdqu %xmm6,(%rbx) + vmovdqu 96(%rbp),%xmm5 + decl %edx + jnz L$oop_avx + + movl 280(%rsp),%edx + leaq 16(%rdi),%rdi + leaq 64(%rsi),%rsi + decl %edx + jnz L$oop_grande_avx + +L$done_avx: + movq 272(%rsp),%rax + vzeroupper + movq -16(%rax),%rbp + movq -8(%rax),%rbx + leaq (%rax),%rsp +L$epilogue_avx: + .byte 0xf3,0xc3 + + +.p2align 5 +sha1_multi_block_avx2: +_avx2_shortcut: + movq %rsp,%rax + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + subq $576,%rsp + andq $-256,%rsp + movq %rax,544(%rsp) +L$body_avx2: + leaq K_XX_XX(%rip),%rbp + shrl $1,%edx + + vzeroupper +L$oop_grande_avx2: + movl %edx,552(%rsp) + xorl %edx,%edx + leaq 512(%rsp),%rbx + movq 0(%rsi),%r12 + movl 8(%rsi),%ecx + cmpl %edx,%ecx + cmovgl %ecx,%edx + testl %ecx,%ecx + movl %ecx,0(%rbx) + cmovleq %rbp,%r12 + movq 16(%rsi),%r13 + movl 24(%rsi),%ecx + cmpl %edx,%ecx + cmovgl %ecx,%edx + testl %ecx,%ecx + movl %ecx,4(%rbx) + cmovleq %rbp,%r13 + movq 32(%rsi),%r14 + movl 40(%rsi),%ecx + cmpl %edx,%ecx + cmovgl %ecx,%edx + testl %ecx,%ecx + movl %ecx,8(%rbx) + cmovleq %rbp,%r14 + movq 48(%rsi),%r15 + movl 56(%rsi),%ecx + cmpl %edx,%ecx + cmovgl %ecx,%edx + testl %ecx,%ecx + movl %ecx,12(%rbx) + cmovleq %rbp,%r15 + movq 64(%rsi),%r8 + movl 72(%rsi),%ecx + cmpl %edx,%ecx + cmovgl %ecx,%edx + testl %ecx,%ecx + movl %ecx,16(%rbx) + cmovleq %rbp,%r8 + movq 80(%rsi),%r9 + movl 88(%rsi),%ecx + cmpl %edx,%ecx + cmovgl %ecx,%edx + testl %ecx,%ecx + movl %ecx,20(%rbx) + cmovleq %rbp,%r9 + movq 96(%rsi),%r10 + movl 104(%rsi),%ecx + cmpl %edx,%ecx + cmovgl %ecx,%edx + testl %ecx,%ecx + movl %ecx,24(%rbx) + cmovleq %rbp,%r10 + movq 112(%rsi),%r11 + movl 120(%rsi),%ecx + cmpl %edx,%ecx + cmovgl %ecx,%edx + testl %ecx,%ecx + movl %ecx,28(%rbx) + cmovleq %rbp,%r11 + vmovdqu 0(%rdi),%ymm0 + leaq 128(%rsp),%rax + vmovdqu 32(%rdi),%ymm1 + leaq 256+128(%rsp),%rbx + vmovdqu 64(%rdi),%ymm2 + vmovdqu 96(%rdi),%ymm3 + vmovdqu 128(%rdi),%ymm4 + vmovdqu 96(%rbp),%ymm9 + jmp L$oop_avx2 + +.p2align 5 +L$oop_avx2: + vmovdqa -32(%rbp),%ymm15 + vmovd (%r12),%xmm10 + leaq 64(%r12),%r12 + vmovd (%r8),%xmm12 + leaq 64(%r8),%r8 + vmovd (%r13),%xmm7 + leaq 64(%r13),%r13 + vmovd (%r9),%xmm6 + leaq 64(%r9),%r9 + vpinsrd $1,(%r14),%xmm10,%xmm10 + leaq 64(%r14),%r14 + vpinsrd $1,(%r10),%xmm12,%xmm12 + leaq 64(%r10),%r10 + vpinsrd $1,(%r15),%xmm7,%xmm7 + leaq 64(%r15),%r15 + vpunpckldq %ymm7,%ymm10,%ymm10 + vpinsrd $1,(%r11),%xmm6,%xmm6 + leaq 64(%r11),%r11 + vpunpckldq %ymm6,%ymm12,%ymm12 + vmovd -60(%r12),%xmm11 + vinserti128 $1,%xmm12,%ymm10,%ymm10 + vmovd -60(%r8),%xmm8 + vpshufb %ymm9,%ymm10,%ymm10 + vmovd -60(%r13),%xmm7 + vmovd -60(%r9),%xmm6 + vpinsrd $1,-60(%r14),%xmm11,%xmm11 + vpinsrd $1,-60(%r10),%xmm8,%xmm8 + vpinsrd $1,-60(%r15),%xmm7,%xmm7 + vpunpckldq %ymm7,%ymm11,%ymm11 + vpinsrd $1,-60(%r11),%xmm6,%xmm6 + vpunpckldq %ymm6,%ymm8,%ymm8 + vpaddd %ymm15,%ymm4,%ymm4 + vpslld $5,%ymm0,%ymm7 + vpandn %ymm3,%ymm1,%ymm6 + vpand %ymm2,%ymm1,%ymm5 + + vmovdqa %ymm10,0-128(%rax) + vpaddd %ymm10,%ymm4,%ymm4 + vinserti128 $1,%xmm8,%ymm11,%ymm11 + vpsrld $27,%ymm0,%ymm8 + vpxor %ymm6,%ymm5,%ymm5 + vmovd -56(%r12),%xmm12 + + vpslld $30,%ymm1,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vmovd -56(%r8),%xmm8 + vpaddd %ymm5,%ymm4,%ymm4 + + vpsrld $2,%ymm1,%ymm1 + vpaddd %ymm7,%ymm4,%ymm4 + vpshufb %ymm9,%ymm11,%ymm11 + vpor %ymm6,%ymm1,%ymm1 + vmovd -56(%r13),%xmm7 + vmovd -56(%r9),%xmm6 + vpinsrd $1,-56(%r14),%xmm12,%xmm12 + vpinsrd $1,-56(%r10),%xmm8,%xmm8 + vpinsrd $1,-56(%r15),%xmm7,%xmm7 + vpunpckldq %ymm7,%ymm12,%ymm12 + vpinsrd $1,-56(%r11),%xmm6,%xmm6 + vpunpckldq %ymm6,%ymm8,%ymm8 + vpaddd %ymm15,%ymm3,%ymm3 + vpslld $5,%ymm4,%ymm7 + vpandn %ymm2,%ymm0,%ymm6 + vpand %ymm1,%ymm0,%ymm5 + + vmovdqa %ymm11,32-128(%rax) + vpaddd %ymm11,%ymm3,%ymm3 + vinserti128 $1,%xmm8,%ymm12,%ymm12 + vpsrld $27,%ymm4,%ymm8 + vpxor %ymm6,%ymm5,%ymm5 + vmovd -52(%r12),%xmm13 + + vpslld $30,%ymm0,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vmovd -52(%r8),%xmm8 + vpaddd %ymm5,%ymm3,%ymm3 + + vpsrld $2,%ymm0,%ymm0 + vpaddd %ymm7,%ymm3,%ymm3 + vpshufb %ymm9,%ymm12,%ymm12 + vpor %ymm6,%ymm0,%ymm0 + vmovd -52(%r13),%xmm7 + vmovd -52(%r9),%xmm6 + vpinsrd $1,-52(%r14),%xmm13,%xmm13 + vpinsrd $1,-52(%r10),%xmm8,%xmm8 + vpinsrd $1,-52(%r15),%xmm7,%xmm7 + vpunpckldq %ymm7,%ymm13,%ymm13 + vpinsrd $1,-52(%r11),%xmm6,%xmm6 + vpunpckldq %ymm6,%ymm8,%ymm8 + vpaddd %ymm15,%ymm2,%ymm2 + vpslld $5,%ymm3,%ymm7 + vpandn %ymm1,%ymm4,%ymm6 + vpand %ymm0,%ymm4,%ymm5 + + vmovdqa %ymm12,64-128(%rax) + vpaddd %ymm12,%ymm2,%ymm2 + vinserti128 $1,%xmm8,%ymm13,%ymm13 + vpsrld $27,%ymm3,%ymm8 + vpxor %ymm6,%ymm5,%ymm5 + vmovd -48(%r12),%xmm14 + + vpslld $30,%ymm4,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vmovd -48(%r8),%xmm8 + vpaddd %ymm5,%ymm2,%ymm2 + + vpsrld $2,%ymm4,%ymm4 + vpaddd %ymm7,%ymm2,%ymm2 + vpshufb %ymm9,%ymm13,%ymm13 + vpor %ymm6,%ymm4,%ymm4 + vmovd -48(%r13),%xmm7 + vmovd -48(%r9),%xmm6 + vpinsrd $1,-48(%r14),%xmm14,%xmm14 + vpinsrd $1,-48(%r10),%xmm8,%xmm8 + vpinsrd $1,-48(%r15),%xmm7,%xmm7 + vpunpckldq %ymm7,%ymm14,%ymm14 + vpinsrd $1,-48(%r11),%xmm6,%xmm6 + vpunpckldq %ymm6,%ymm8,%ymm8 + vpaddd %ymm15,%ymm1,%ymm1 + vpslld $5,%ymm2,%ymm7 + vpandn %ymm0,%ymm3,%ymm6 + vpand %ymm4,%ymm3,%ymm5 + + vmovdqa %ymm13,96-128(%rax) + vpaddd %ymm13,%ymm1,%ymm1 + vinserti128 $1,%xmm8,%ymm14,%ymm14 + vpsrld $27,%ymm2,%ymm8 + vpxor %ymm6,%ymm5,%ymm5 + vmovd -44(%r12),%xmm10 + + vpslld $30,%ymm3,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vmovd -44(%r8),%xmm8 + vpaddd %ymm5,%ymm1,%ymm1 + + vpsrld $2,%ymm3,%ymm3 + vpaddd %ymm7,%ymm1,%ymm1 + vpshufb %ymm9,%ymm14,%ymm14 + vpor %ymm6,%ymm3,%ymm3 + vmovd -44(%r13),%xmm7 + vmovd -44(%r9),%xmm6 + vpinsrd $1,-44(%r14),%xmm10,%xmm10 + vpinsrd $1,-44(%r10),%xmm8,%xmm8 + vpinsrd $1,-44(%r15),%xmm7,%xmm7 + vpunpckldq %ymm7,%ymm10,%ymm10 + vpinsrd $1,-44(%r11),%xmm6,%xmm6 + vpunpckldq %ymm6,%ymm8,%ymm8 + vpaddd %ymm15,%ymm0,%ymm0 + vpslld $5,%ymm1,%ymm7 + vpandn %ymm4,%ymm2,%ymm6 + vpand %ymm3,%ymm2,%ymm5 + + vmovdqa %ymm14,128-128(%rax) + vpaddd %ymm14,%ymm0,%ymm0 + vinserti128 $1,%xmm8,%ymm10,%ymm10 + vpsrld $27,%ymm1,%ymm8 + vpxor %ymm6,%ymm5,%ymm5 + vmovd -40(%r12),%xmm11 + + vpslld $30,%ymm2,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vmovd -40(%r8),%xmm8 + vpaddd %ymm5,%ymm0,%ymm0 + + vpsrld $2,%ymm2,%ymm2 + vpaddd %ymm7,%ymm0,%ymm0 + vpshufb %ymm9,%ymm10,%ymm10 + vpor %ymm6,%ymm2,%ymm2 + vmovd -40(%r13),%xmm7 + vmovd -40(%r9),%xmm6 + vpinsrd $1,-40(%r14),%xmm11,%xmm11 + vpinsrd $1,-40(%r10),%xmm8,%xmm8 + vpinsrd $1,-40(%r15),%xmm7,%xmm7 + vpunpckldq %ymm7,%ymm11,%ymm11 + vpinsrd $1,-40(%r11),%xmm6,%xmm6 + vpunpckldq %ymm6,%ymm8,%ymm8 + vpaddd %ymm15,%ymm4,%ymm4 + vpslld $5,%ymm0,%ymm7 + vpandn %ymm3,%ymm1,%ymm6 + vpand %ymm2,%ymm1,%ymm5 + + vmovdqa %ymm10,160-128(%rax) + vpaddd %ymm10,%ymm4,%ymm4 + vinserti128 $1,%xmm8,%ymm11,%ymm11 + vpsrld $27,%ymm0,%ymm8 + vpxor %ymm6,%ymm5,%ymm5 + vmovd -36(%r12),%xmm12 + + vpslld $30,%ymm1,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vmovd -36(%r8),%xmm8 + vpaddd %ymm5,%ymm4,%ymm4 + + vpsrld $2,%ymm1,%ymm1 + vpaddd %ymm7,%ymm4,%ymm4 + vpshufb %ymm9,%ymm11,%ymm11 + vpor %ymm6,%ymm1,%ymm1 + vmovd -36(%r13),%xmm7 + vmovd -36(%r9),%xmm6 + vpinsrd $1,-36(%r14),%xmm12,%xmm12 + vpinsrd $1,-36(%r10),%xmm8,%xmm8 + vpinsrd $1,-36(%r15),%xmm7,%xmm7 + vpunpckldq %ymm7,%ymm12,%ymm12 + vpinsrd $1,-36(%r11),%xmm6,%xmm6 + vpunpckldq %ymm6,%ymm8,%ymm8 + vpaddd %ymm15,%ymm3,%ymm3 + vpslld $5,%ymm4,%ymm7 + vpandn %ymm2,%ymm0,%ymm6 + vpand %ymm1,%ymm0,%ymm5 + + vmovdqa %ymm11,192-128(%rax) + vpaddd %ymm11,%ymm3,%ymm3 + vinserti128 $1,%xmm8,%ymm12,%ymm12 + vpsrld $27,%ymm4,%ymm8 + vpxor %ymm6,%ymm5,%ymm5 + vmovd -32(%r12),%xmm13 + + vpslld $30,%ymm0,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vmovd -32(%r8),%xmm8 + vpaddd %ymm5,%ymm3,%ymm3 + + vpsrld $2,%ymm0,%ymm0 + vpaddd %ymm7,%ymm3,%ymm3 + vpshufb %ymm9,%ymm12,%ymm12 + vpor %ymm6,%ymm0,%ymm0 + vmovd -32(%r13),%xmm7 + vmovd -32(%r9),%xmm6 + vpinsrd $1,-32(%r14),%xmm13,%xmm13 + vpinsrd $1,-32(%r10),%xmm8,%xmm8 + vpinsrd $1,-32(%r15),%xmm7,%xmm7 + vpunpckldq %ymm7,%ymm13,%ymm13 + vpinsrd $1,-32(%r11),%xmm6,%xmm6 + vpunpckldq %ymm6,%ymm8,%ymm8 + vpaddd %ymm15,%ymm2,%ymm2 + vpslld $5,%ymm3,%ymm7 + vpandn %ymm1,%ymm4,%ymm6 + vpand %ymm0,%ymm4,%ymm5 + + vmovdqa %ymm12,224-128(%rax) + vpaddd %ymm12,%ymm2,%ymm2 + vinserti128 $1,%xmm8,%ymm13,%ymm13 + vpsrld $27,%ymm3,%ymm8 + vpxor %ymm6,%ymm5,%ymm5 + vmovd -28(%r12),%xmm14 + + vpslld $30,%ymm4,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vmovd -28(%r8),%xmm8 + vpaddd %ymm5,%ymm2,%ymm2 + + vpsrld $2,%ymm4,%ymm4 + vpaddd %ymm7,%ymm2,%ymm2 + vpshufb %ymm9,%ymm13,%ymm13 + vpor %ymm6,%ymm4,%ymm4 + vmovd -28(%r13),%xmm7 + vmovd -28(%r9),%xmm6 + vpinsrd $1,-28(%r14),%xmm14,%xmm14 + vpinsrd $1,-28(%r10),%xmm8,%xmm8 + vpinsrd $1,-28(%r15),%xmm7,%xmm7 + vpunpckldq %ymm7,%ymm14,%ymm14 + vpinsrd $1,-28(%r11),%xmm6,%xmm6 + vpunpckldq %ymm6,%ymm8,%ymm8 + vpaddd %ymm15,%ymm1,%ymm1 + vpslld $5,%ymm2,%ymm7 + vpandn %ymm0,%ymm3,%ymm6 + vpand %ymm4,%ymm3,%ymm5 + + vmovdqa %ymm13,256-256-128(%rbx) + vpaddd %ymm13,%ymm1,%ymm1 + vinserti128 $1,%xmm8,%ymm14,%ymm14 + vpsrld $27,%ymm2,%ymm8 + vpxor %ymm6,%ymm5,%ymm5 + vmovd -24(%r12),%xmm10 + + vpslld $30,%ymm3,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vmovd -24(%r8),%xmm8 + vpaddd %ymm5,%ymm1,%ymm1 + + vpsrld $2,%ymm3,%ymm3 + vpaddd %ymm7,%ymm1,%ymm1 + vpshufb %ymm9,%ymm14,%ymm14 + vpor %ymm6,%ymm3,%ymm3 + vmovd -24(%r13),%xmm7 + vmovd -24(%r9),%xmm6 + vpinsrd $1,-24(%r14),%xmm10,%xmm10 + vpinsrd $1,-24(%r10),%xmm8,%xmm8 + vpinsrd $1,-24(%r15),%xmm7,%xmm7 + vpunpckldq %ymm7,%ymm10,%ymm10 + vpinsrd $1,-24(%r11),%xmm6,%xmm6 + vpunpckldq %ymm6,%ymm8,%ymm8 + vpaddd %ymm15,%ymm0,%ymm0 + vpslld $5,%ymm1,%ymm7 + vpandn %ymm4,%ymm2,%ymm6 + vpand %ymm3,%ymm2,%ymm5 + + vmovdqa %ymm14,288-256-128(%rbx) + vpaddd %ymm14,%ymm0,%ymm0 + vinserti128 $1,%xmm8,%ymm10,%ymm10 + vpsrld $27,%ymm1,%ymm8 + vpxor %ymm6,%ymm5,%ymm5 + vmovd -20(%r12),%xmm11 + + vpslld $30,%ymm2,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vmovd -20(%r8),%xmm8 + vpaddd %ymm5,%ymm0,%ymm0 + + vpsrld $2,%ymm2,%ymm2 + vpaddd %ymm7,%ymm0,%ymm0 + vpshufb %ymm9,%ymm10,%ymm10 + vpor %ymm6,%ymm2,%ymm2 + vmovd -20(%r13),%xmm7 + vmovd -20(%r9),%xmm6 + vpinsrd $1,-20(%r14),%xmm11,%xmm11 + vpinsrd $1,-20(%r10),%xmm8,%xmm8 + vpinsrd $1,-20(%r15),%xmm7,%xmm7 + vpunpckldq %ymm7,%ymm11,%ymm11 + vpinsrd $1,-20(%r11),%xmm6,%xmm6 + vpunpckldq %ymm6,%ymm8,%ymm8 + vpaddd %ymm15,%ymm4,%ymm4 + vpslld $5,%ymm0,%ymm7 + vpandn %ymm3,%ymm1,%ymm6 + vpand %ymm2,%ymm1,%ymm5 + + vmovdqa %ymm10,320-256-128(%rbx) + vpaddd %ymm10,%ymm4,%ymm4 + vinserti128 $1,%xmm8,%ymm11,%ymm11 + vpsrld $27,%ymm0,%ymm8 + vpxor %ymm6,%ymm5,%ymm5 + vmovd -16(%r12),%xmm12 + + vpslld $30,%ymm1,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vmovd -16(%r8),%xmm8 + vpaddd %ymm5,%ymm4,%ymm4 + + vpsrld $2,%ymm1,%ymm1 + vpaddd %ymm7,%ymm4,%ymm4 + vpshufb %ymm9,%ymm11,%ymm11 + vpor %ymm6,%ymm1,%ymm1 + vmovd -16(%r13),%xmm7 + vmovd -16(%r9),%xmm6 + vpinsrd $1,-16(%r14),%xmm12,%xmm12 + vpinsrd $1,-16(%r10),%xmm8,%xmm8 + vpinsrd $1,-16(%r15),%xmm7,%xmm7 + vpunpckldq %ymm7,%ymm12,%ymm12 + vpinsrd $1,-16(%r11),%xmm6,%xmm6 + vpunpckldq %ymm6,%ymm8,%ymm8 + vpaddd %ymm15,%ymm3,%ymm3 + vpslld $5,%ymm4,%ymm7 + vpandn %ymm2,%ymm0,%ymm6 + vpand %ymm1,%ymm0,%ymm5 + + vmovdqa %ymm11,352-256-128(%rbx) + vpaddd %ymm11,%ymm3,%ymm3 + vinserti128 $1,%xmm8,%ymm12,%ymm12 + vpsrld $27,%ymm4,%ymm8 + vpxor %ymm6,%ymm5,%ymm5 + vmovd -12(%r12),%xmm13 + + vpslld $30,%ymm0,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vmovd -12(%r8),%xmm8 + vpaddd %ymm5,%ymm3,%ymm3 + + vpsrld $2,%ymm0,%ymm0 + vpaddd %ymm7,%ymm3,%ymm3 + vpshufb %ymm9,%ymm12,%ymm12 + vpor %ymm6,%ymm0,%ymm0 + vmovd -12(%r13),%xmm7 + vmovd -12(%r9),%xmm6 + vpinsrd $1,-12(%r14),%xmm13,%xmm13 + vpinsrd $1,-12(%r10),%xmm8,%xmm8 + vpinsrd $1,-12(%r15),%xmm7,%xmm7 + vpunpckldq %ymm7,%ymm13,%ymm13 + vpinsrd $1,-12(%r11),%xmm6,%xmm6 + vpunpckldq %ymm6,%ymm8,%ymm8 + vpaddd %ymm15,%ymm2,%ymm2 + vpslld $5,%ymm3,%ymm7 + vpandn %ymm1,%ymm4,%ymm6 + vpand %ymm0,%ymm4,%ymm5 + + vmovdqa %ymm12,384-256-128(%rbx) + vpaddd %ymm12,%ymm2,%ymm2 + vinserti128 $1,%xmm8,%ymm13,%ymm13 + vpsrld $27,%ymm3,%ymm8 + vpxor %ymm6,%ymm5,%ymm5 + vmovd -8(%r12),%xmm14 + + vpslld $30,%ymm4,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vmovd -8(%r8),%xmm8 + vpaddd %ymm5,%ymm2,%ymm2 + + vpsrld $2,%ymm4,%ymm4 + vpaddd %ymm7,%ymm2,%ymm2 + vpshufb %ymm9,%ymm13,%ymm13 + vpor %ymm6,%ymm4,%ymm4 + vmovd -8(%r13),%xmm7 + vmovd -8(%r9),%xmm6 + vpinsrd $1,-8(%r14),%xmm14,%xmm14 + vpinsrd $1,-8(%r10),%xmm8,%xmm8 + vpinsrd $1,-8(%r15),%xmm7,%xmm7 + vpunpckldq %ymm7,%ymm14,%ymm14 + vpinsrd $1,-8(%r11),%xmm6,%xmm6 + vpunpckldq %ymm6,%ymm8,%ymm8 + vpaddd %ymm15,%ymm1,%ymm1 + vpslld $5,%ymm2,%ymm7 + vpandn %ymm0,%ymm3,%ymm6 + vpand %ymm4,%ymm3,%ymm5 + + vmovdqa %ymm13,416-256-128(%rbx) + vpaddd %ymm13,%ymm1,%ymm1 + vinserti128 $1,%xmm8,%ymm14,%ymm14 + vpsrld $27,%ymm2,%ymm8 + vpxor %ymm6,%ymm5,%ymm5 + vmovd -4(%r12),%xmm10 + + vpslld $30,%ymm3,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vmovd -4(%r8),%xmm8 + vpaddd %ymm5,%ymm1,%ymm1 + + vpsrld $2,%ymm3,%ymm3 + vpaddd %ymm7,%ymm1,%ymm1 + vpshufb %ymm9,%ymm14,%ymm14 + vpor %ymm6,%ymm3,%ymm3 + vmovdqa 0-128(%rax),%ymm11 + vmovd -4(%r13),%xmm7 + vmovd -4(%r9),%xmm6 + vpinsrd $1,-4(%r14),%xmm10,%xmm10 + vpinsrd $1,-4(%r10),%xmm8,%xmm8 + vpinsrd $1,-4(%r15),%xmm7,%xmm7 + vpunpckldq %ymm7,%ymm10,%ymm10 + vpinsrd $1,-4(%r11),%xmm6,%xmm6 + vpunpckldq %ymm6,%ymm8,%ymm8 + vpaddd %ymm15,%ymm0,%ymm0 + prefetcht0 63(%r12) + vpslld $5,%ymm1,%ymm7 + vpandn %ymm4,%ymm2,%ymm6 + vpand %ymm3,%ymm2,%ymm5 + + vmovdqa %ymm14,448-256-128(%rbx) + vpaddd %ymm14,%ymm0,%ymm0 + vinserti128 $1,%xmm8,%ymm10,%ymm10 + vpsrld $27,%ymm1,%ymm8 + prefetcht0 63(%r13) + vpxor %ymm6,%ymm5,%ymm5 + + vpslld $30,%ymm2,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + prefetcht0 63(%r14) + vpaddd %ymm5,%ymm0,%ymm0 + + vpsrld $2,%ymm2,%ymm2 + vpaddd %ymm7,%ymm0,%ymm0 + prefetcht0 63(%r15) + vpshufb %ymm9,%ymm10,%ymm10 + vpor %ymm6,%ymm2,%ymm2 + vmovdqa 32-128(%rax),%ymm12 + vpxor %ymm13,%ymm11,%ymm11 + vmovdqa 64-128(%rax),%ymm13 + + vpaddd %ymm15,%ymm4,%ymm4 + vpslld $5,%ymm0,%ymm7 + vpandn %ymm3,%ymm1,%ymm6 + prefetcht0 63(%r8) + vpand %ymm2,%ymm1,%ymm5 + + vmovdqa %ymm10,480-256-128(%rbx) + vpaddd %ymm10,%ymm4,%ymm4 + vpxor 256-256-128(%rbx),%ymm11,%ymm11 + vpsrld $27,%ymm0,%ymm8 + vpxor %ymm6,%ymm5,%ymm5 + vpxor %ymm13,%ymm11,%ymm11 + prefetcht0 63(%r9) + + vpslld $30,%ymm1,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vpaddd %ymm5,%ymm4,%ymm4 + prefetcht0 63(%r10) + vpsrld $31,%ymm11,%ymm9 + vpaddd %ymm11,%ymm11,%ymm11 + + vpsrld $2,%ymm1,%ymm1 + prefetcht0 63(%r11) + vpaddd %ymm7,%ymm4,%ymm4 + vpor %ymm9,%ymm11,%ymm11 + vpor %ymm6,%ymm1,%ymm1 + vpxor %ymm14,%ymm12,%ymm12 + vmovdqa 96-128(%rax),%ymm14 + + vpaddd %ymm15,%ymm3,%ymm3 + vpslld $5,%ymm4,%ymm7 + vpandn %ymm2,%ymm0,%ymm6 + + vpand %ymm1,%ymm0,%ymm5 + + vmovdqa %ymm11,0-128(%rax) + vpaddd %ymm11,%ymm3,%ymm3 + vpxor 288-256-128(%rbx),%ymm12,%ymm12 + vpsrld $27,%ymm4,%ymm8 + vpxor %ymm6,%ymm5,%ymm5 + vpxor %ymm14,%ymm12,%ymm12 + + + vpslld $30,%ymm0,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vpaddd %ymm5,%ymm3,%ymm3 + + vpsrld $31,%ymm12,%ymm9 + vpaddd %ymm12,%ymm12,%ymm12 + + vpsrld $2,%ymm0,%ymm0 + + vpaddd %ymm7,%ymm3,%ymm3 + vpor %ymm9,%ymm12,%ymm12 + vpor %ymm6,%ymm0,%ymm0 + vpxor %ymm10,%ymm13,%ymm13 + vmovdqa 128-128(%rax),%ymm10 + + vpaddd %ymm15,%ymm2,%ymm2 + vpslld $5,%ymm3,%ymm7 + vpandn %ymm1,%ymm4,%ymm6 + + vpand %ymm0,%ymm4,%ymm5 + + vmovdqa %ymm12,32-128(%rax) + vpaddd %ymm12,%ymm2,%ymm2 + vpxor 320-256-128(%rbx),%ymm13,%ymm13 + vpsrld $27,%ymm3,%ymm8 + vpxor %ymm6,%ymm5,%ymm5 + vpxor %ymm10,%ymm13,%ymm13 + + + vpslld $30,%ymm4,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vpaddd %ymm5,%ymm2,%ymm2 + + vpsrld $31,%ymm13,%ymm9 + vpaddd %ymm13,%ymm13,%ymm13 + + vpsrld $2,%ymm4,%ymm4 + + vpaddd %ymm7,%ymm2,%ymm2 + vpor %ymm9,%ymm13,%ymm13 + vpor %ymm6,%ymm4,%ymm4 + vpxor %ymm11,%ymm14,%ymm14 + vmovdqa 160-128(%rax),%ymm11 + + vpaddd %ymm15,%ymm1,%ymm1 + vpslld $5,%ymm2,%ymm7 + vpandn %ymm0,%ymm3,%ymm6 + + vpand %ymm4,%ymm3,%ymm5 + + vmovdqa %ymm13,64-128(%rax) + vpaddd %ymm13,%ymm1,%ymm1 + vpxor 352-256-128(%rbx),%ymm14,%ymm14 + vpsrld $27,%ymm2,%ymm8 + vpxor %ymm6,%ymm5,%ymm5 + vpxor %ymm11,%ymm14,%ymm14 + + + vpslld $30,%ymm3,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vpaddd %ymm5,%ymm1,%ymm1 + + vpsrld $31,%ymm14,%ymm9 + vpaddd %ymm14,%ymm14,%ymm14 + + vpsrld $2,%ymm3,%ymm3 + + vpaddd %ymm7,%ymm1,%ymm1 + vpor %ymm9,%ymm14,%ymm14 + vpor %ymm6,%ymm3,%ymm3 + vpxor %ymm12,%ymm10,%ymm10 + vmovdqa 192-128(%rax),%ymm12 + + vpaddd %ymm15,%ymm0,%ymm0 + vpslld $5,%ymm1,%ymm7 + vpandn %ymm4,%ymm2,%ymm6 + + vpand %ymm3,%ymm2,%ymm5 + + vmovdqa %ymm14,96-128(%rax) + vpaddd %ymm14,%ymm0,%ymm0 + vpxor 384-256-128(%rbx),%ymm10,%ymm10 + vpsrld $27,%ymm1,%ymm8 + vpxor %ymm6,%ymm5,%ymm5 + vpxor %ymm12,%ymm10,%ymm10 + + + vpslld $30,%ymm2,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vpaddd %ymm5,%ymm0,%ymm0 + + vpsrld $31,%ymm10,%ymm9 + vpaddd %ymm10,%ymm10,%ymm10 + + vpsrld $2,%ymm2,%ymm2 + + vpaddd %ymm7,%ymm0,%ymm0 + vpor %ymm9,%ymm10,%ymm10 + vpor %ymm6,%ymm2,%ymm2 + vmovdqa 0(%rbp),%ymm15 + vpxor %ymm13,%ymm11,%ymm11 + vmovdqa 224-128(%rax),%ymm13 + + vpslld $5,%ymm0,%ymm7 + vpaddd %ymm15,%ymm4,%ymm4 + vpxor %ymm1,%ymm3,%ymm5 + vmovdqa %ymm10,128-128(%rax) + vpaddd %ymm10,%ymm4,%ymm4 + vpxor 416-256-128(%rbx),%ymm11,%ymm11 + vpsrld $27,%ymm0,%ymm8 + vpxor %ymm2,%ymm5,%ymm5 + vpxor %ymm13,%ymm11,%ymm11 + + vpslld $30,%ymm1,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vpaddd %ymm5,%ymm4,%ymm4 + vpsrld $31,%ymm11,%ymm9 + vpaddd %ymm11,%ymm11,%ymm11 + + vpsrld $2,%ymm1,%ymm1 + vpaddd %ymm7,%ymm4,%ymm4 + vpor %ymm9,%ymm11,%ymm11 + vpor %ymm6,%ymm1,%ymm1 + vpxor %ymm14,%ymm12,%ymm12 + vmovdqa 256-256-128(%rbx),%ymm14 + + vpslld $5,%ymm4,%ymm7 + vpaddd %ymm15,%ymm3,%ymm3 + vpxor %ymm0,%ymm2,%ymm5 + vmovdqa %ymm11,160-128(%rax) + vpaddd %ymm11,%ymm3,%ymm3 + vpxor 448-256-128(%rbx),%ymm12,%ymm12 + vpsrld $27,%ymm4,%ymm8 + vpxor %ymm1,%ymm5,%ymm5 + vpxor %ymm14,%ymm12,%ymm12 + + vpslld $30,%ymm0,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vpaddd %ymm5,%ymm3,%ymm3 + vpsrld $31,%ymm12,%ymm9 + vpaddd %ymm12,%ymm12,%ymm12 + + vpsrld $2,%ymm0,%ymm0 + vpaddd %ymm7,%ymm3,%ymm3 + vpor %ymm9,%ymm12,%ymm12 + vpor %ymm6,%ymm0,%ymm0 + vpxor %ymm10,%ymm13,%ymm13 + vmovdqa 288-256-128(%rbx),%ymm10 + + vpslld $5,%ymm3,%ymm7 + vpaddd %ymm15,%ymm2,%ymm2 + vpxor %ymm4,%ymm1,%ymm5 + vmovdqa %ymm12,192-128(%rax) + vpaddd %ymm12,%ymm2,%ymm2 + vpxor 480-256-128(%rbx),%ymm13,%ymm13 + vpsrld $27,%ymm3,%ymm8 + vpxor %ymm0,%ymm5,%ymm5 + vpxor %ymm10,%ymm13,%ymm13 + + vpslld $30,%ymm4,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vpaddd %ymm5,%ymm2,%ymm2 + vpsrld $31,%ymm13,%ymm9 + vpaddd %ymm13,%ymm13,%ymm13 + + vpsrld $2,%ymm4,%ymm4 + vpaddd %ymm7,%ymm2,%ymm2 + vpor %ymm9,%ymm13,%ymm13 + vpor %ymm6,%ymm4,%ymm4 + vpxor %ymm11,%ymm14,%ymm14 + vmovdqa 320-256-128(%rbx),%ymm11 + + vpslld $5,%ymm2,%ymm7 + vpaddd %ymm15,%ymm1,%ymm1 + vpxor %ymm3,%ymm0,%ymm5 + vmovdqa %ymm13,224-128(%rax) + vpaddd %ymm13,%ymm1,%ymm1 + vpxor 0-128(%rax),%ymm14,%ymm14 + vpsrld $27,%ymm2,%ymm8 + vpxor %ymm4,%ymm5,%ymm5 + vpxor %ymm11,%ymm14,%ymm14 + + vpslld $30,%ymm3,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vpaddd %ymm5,%ymm1,%ymm1 + vpsrld $31,%ymm14,%ymm9 + vpaddd %ymm14,%ymm14,%ymm14 + + vpsrld $2,%ymm3,%ymm3 + vpaddd %ymm7,%ymm1,%ymm1 + vpor %ymm9,%ymm14,%ymm14 + vpor %ymm6,%ymm3,%ymm3 + vpxor %ymm12,%ymm10,%ymm10 + vmovdqa 352-256-128(%rbx),%ymm12 + + vpslld $5,%ymm1,%ymm7 + vpaddd %ymm15,%ymm0,%ymm0 + vpxor %ymm2,%ymm4,%ymm5 + vmovdqa %ymm14,256-256-128(%rbx) + vpaddd %ymm14,%ymm0,%ymm0 + vpxor 32-128(%rax),%ymm10,%ymm10 + vpsrld $27,%ymm1,%ymm8 + vpxor %ymm3,%ymm5,%ymm5 + vpxor %ymm12,%ymm10,%ymm10 + + vpslld $30,%ymm2,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vpaddd %ymm5,%ymm0,%ymm0 + vpsrld $31,%ymm10,%ymm9 + vpaddd %ymm10,%ymm10,%ymm10 + + vpsrld $2,%ymm2,%ymm2 + vpaddd %ymm7,%ymm0,%ymm0 + vpor %ymm9,%ymm10,%ymm10 + vpor %ymm6,%ymm2,%ymm2 + vpxor %ymm13,%ymm11,%ymm11 + vmovdqa 384-256-128(%rbx),%ymm13 + + vpslld $5,%ymm0,%ymm7 + vpaddd %ymm15,%ymm4,%ymm4 + vpxor %ymm1,%ymm3,%ymm5 + vmovdqa %ymm10,288-256-128(%rbx) + vpaddd %ymm10,%ymm4,%ymm4 + vpxor 64-128(%rax),%ymm11,%ymm11 + vpsrld $27,%ymm0,%ymm8 + vpxor %ymm2,%ymm5,%ymm5 + vpxor %ymm13,%ymm11,%ymm11 + + vpslld $30,%ymm1,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vpaddd %ymm5,%ymm4,%ymm4 + vpsrld $31,%ymm11,%ymm9 + vpaddd %ymm11,%ymm11,%ymm11 + + vpsrld $2,%ymm1,%ymm1 + vpaddd %ymm7,%ymm4,%ymm4 + vpor %ymm9,%ymm11,%ymm11 + vpor %ymm6,%ymm1,%ymm1 + vpxor %ymm14,%ymm12,%ymm12 + vmovdqa 416-256-128(%rbx),%ymm14 + + vpslld $5,%ymm4,%ymm7 + vpaddd %ymm15,%ymm3,%ymm3 + vpxor %ymm0,%ymm2,%ymm5 + vmovdqa %ymm11,320-256-128(%rbx) + vpaddd %ymm11,%ymm3,%ymm3 + vpxor 96-128(%rax),%ymm12,%ymm12 + vpsrld $27,%ymm4,%ymm8 + vpxor %ymm1,%ymm5,%ymm5 + vpxor %ymm14,%ymm12,%ymm12 + + vpslld $30,%ymm0,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vpaddd %ymm5,%ymm3,%ymm3 + vpsrld $31,%ymm12,%ymm9 + vpaddd %ymm12,%ymm12,%ymm12 + + vpsrld $2,%ymm0,%ymm0 + vpaddd %ymm7,%ymm3,%ymm3 + vpor %ymm9,%ymm12,%ymm12 + vpor %ymm6,%ymm0,%ymm0 + vpxor %ymm10,%ymm13,%ymm13 + vmovdqa 448-256-128(%rbx),%ymm10 + + vpslld $5,%ymm3,%ymm7 + vpaddd %ymm15,%ymm2,%ymm2 + vpxor %ymm4,%ymm1,%ymm5 + vmovdqa %ymm12,352-256-128(%rbx) + vpaddd %ymm12,%ymm2,%ymm2 + vpxor 128-128(%rax),%ymm13,%ymm13 + vpsrld $27,%ymm3,%ymm8 + vpxor %ymm0,%ymm5,%ymm5 + vpxor %ymm10,%ymm13,%ymm13 + + vpslld $30,%ymm4,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vpaddd %ymm5,%ymm2,%ymm2 + vpsrld $31,%ymm13,%ymm9 + vpaddd %ymm13,%ymm13,%ymm13 + + vpsrld $2,%ymm4,%ymm4 + vpaddd %ymm7,%ymm2,%ymm2 + vpor %ymm9,%ymm13,%ymm13 + vpor %ymm6,%ymm4,%ymm4 + vpxor %ymm11,%ymm14,%ymm14 + vmovdqa 480-256-128(%rbx),%ymm11 + + vpslld $5,%ymm2,%ymm7 + vpaddd %ymm15,%ymm1,%ymm1 + vpxor %ymm3,%ymm0,%ymm5 + vmovdqa %ymm13,384-256-128(%rbx) + vpaddd %ymm13,%ymm1,%ymm1 + vpxor 160-128(%rax),%ymm14,%ymm14 + vpsrld $27,%ymm2,%ymm8 + vpxor %ymm4,%ymm5,%ymm5 + vpxor %ymm11,%ymm14,%ymm14 + + vpslld $30,%ymm3,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vpaddd %ymm5,%ymm1,%ymm1 + vpsrld $31,%ymm14,%ymm9 + vpaddd %ymm14,%ymm14,%ymm14 + + vpsrld $2,%ymm3,%ymm3 + vpaddd %ymm7,%ymm1,%ymm1 + vpor %ymm9,%ymm14,%ymm14 + vpor %ymm6,%ymm3,%ymm3 + vpxor %ymm12,%ymm10,%ymm10 + vmovdqa 0-128(%rax),%ymm12 + + vpslld $5,%ymm1,%ymm7 + vpaddd %ymm15,%ymm0,%ymm0 + vpxor %ymm2,%ymm4,%ymm5 + vmovdqa %ymm14,416-256-128(%rbx) + vpaddd %ymm14,%ymm0,%ymm0 + vpxor 192-128(%rax),%ymm10,%ymm10 + vpsrld $27,%ymm1,%ymm8 + vpxor %ymm3,%ymm5,%ymm5 + vpxor %ymm12,%ymm10,%ymm10 + + vpslld $30,%ymm2,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vpaddd %ymm5,%ymm0,%ymm0 + vpsrld $31,%ymm10,%ymm9 + vpaddd %ymm10,%ymm10,%ymm10 + + vpsrld $2,%ymm2,%ymm2 + vpaddd %ymm7,%ymm0,%ymm0 + vpor %ymm9,%ymm10,%ymm10 + vpor %ymm6,%ymm2,%ymm2 + vpxor %ymm13,%ymm11,%ymm11 + vmovdqa 32-128(%rax),%ymm13 + + vpslld $5,%ymm0,%ymm7 + vpaddd %ymm15,%ymm4,%ymm4 + vpxor %ymm1,%ymm3,%ymm5 + vmovdqa %ymm10,448-256-128(%rbx) + vpaddd %ymm10,%ymm4,%ymm4 + vpxor 224-128(%rax),%ymm11,%ymm11 + vpsrld $27,%ymm0,%ymm8 + vpxor %ymm2,%ymm5,%ymm5 + vpxor %ymm13,%ymm11,%ymm11 + + vpslld $30,%ymm1,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vpaddd %ymm5,%ymm4,%ymm4 + vpsrld $31,%ymm11,%ymm9 + vpaddd %ymm11,%ymm11,%ymm11 + + vpsrld $2,%ymm1,%ymm1 + vpaddd %ymm7,%ymm4,%ymm4 + vpor %ymm9,%ymm11,%ymm11 + vpor %ymm6,%ymm1,%ymm1 + vpxor %ymm14,%ymm12,%ymm12 + vmovdqa 64-128(%rax),%ymm14 + + vpslld $5,%ymm4,%ymm7 + vpaddd %ymm15,%ymm3,%ymm3 + vpxor %ymm0,%ymm2,%ymm5 + vmovdqa %ymm11,480-256-128(%rbx) + vpaddd %ymm11,%ymm3,%ymm3 + vpxor 256-256-128(%rbx),%ymm12,%ymm12 + vpsrld $27,%ymm4,%ymm8 + vpxor %ymm1,%ymm5,%ymm5 + vpxor %ymm14,%ymm12,%ymm12 + + vpslld $30,%ymm0,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vpaddd %ymm5,%ymm3,%ymm3 + vpsrld $31,%ymm12,%ymm9 + vpaddd %ymm12,%ymm12,%ymm12 + + vpsrld $2,%ymm0,%ymm0 + vpaddd %ymm7,%ymm3,%ymm3 + vpor %ymm9,%ymm12,%ymm12 + vpor %ymm6,%ymm0,%ymm0 + vpxor %ymm10,%ymm13,%ymm13 + vmovdqa 96-128(%rax),%ymm10 + + vpslld $5,%ymm3,%ymm7 + vpaddd %ymm15,%ymm2,%ymm2 + vpxor %ymm4,%ymm1,%ymm5 + vmovdqa %ymm12,0-128(%rax) + vpaddd %ymm12,%ymm2,%ymm2 + vpxor 288-256-128(%rbx),%ymm13,%ymm13 + vpsrld $27,%ymm3,%ymm8 + vpxor %ymm0,%ymm5,%ymm5 + vpxor %ymm10,%ymm13,%ymm13 + + vpslld $30,%ymm4,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vpaddd %ymm5,%ymm2,%ymm2 + vpsrld $31,%ymm13,%ymm9 + vpaddd %ymm13,%ymm13,%ymm13 + + vpsrld $2,%ymm4,%ymm4 + vpaddd %ymm7,%ymm2,%ymm2 + vpor %ymm9,%ymm13,%ymm13 + vpor %ymm6,%ymm4,%ymm4 + vpxor %ymm11,%ymm14,%ymm14 + vmovdqa 128-128(%rax),%ymm11 + + vpslld $5,%ymm2,%ymm7 + vpaddd %ymm15,%ymm1,%ymm1 + vpxor %ymm3,%ymm0,%ymm5 + vmovdqa %ymm13,32-128(%rax) + vpaddd %ymm13,%ymm1,%ymm1 + vpxor 320-256-128(%rbx),%ymm14,%ymm14 + vpsrld $27,%ymm2,%ymm8 + vpxor %ymm4,%ymm5,%ymm5 + vpxor %ymm11,%ymm14,%ymm14 + + vpslld $30,%ymm3,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vpaddd %ymm5,%ymm1,%ymm1 + vpsrld $31,%ymm14,%ymm9 + vpaddd %ymm14,%ymm14,%ymm14 + + vpsrld $2,%ymm3,%ymm3 + vpaddd %ymm7,%ymm1,%ymm1 + vpor %ymm9,%ymm14,%ymm14 + vpor %ymm6,%ymm3,%ymm3 + vpxor %ymm12,%ymm10,%ymm10 + vmovdqa 160-128(%rax),%ymm12 + + vpslld $5,%ymm1,%ymm7 + vpaddd %ymm15,%ymm0,%ymm0 + vpxor %ymm2,%ymm4,%ymm5 + vmovdqa %ymm14,64-128(%rax) + vpaddd %ymm14,%ymm0,%ymm0 + vpxor 352-256-128(%rbx),%ymm10,%ymm10 + vpsrld $27,%ymm1,%ymm8 + vpxor %ymm3,%ymm5,%ymm5 + vpxor %ymm12,%ymm10,%ymm10 + + vpslld $30,%ymm2,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vpaddd %ymm5,%ymm0,%ymm0 + vpsrld $31,%ymm10,%ymm9 + vpaddd %ymm10,%ymm10,%ymm10 + + vpsrld $2,%ymm2,%ymm2 + vpaddd %ymm7,%ymm0,%ymm0 + vpor %ymm9,%ymm10,%ymm10 + vpor %ymm6,%ymm2,%ymm2 + vpxor %ymm13,%ymm11,%ymm11 + vmovdqa 192-128(%rax),%ymm13 + + vpslld $5,%ymm0,%ymm7 + vpaddd %ymm15,%ymm4,%ymm4 + vpxor %ymm1,%ymm3,%ymm5 + vmovdqa %ymm10,96-128(%rax) + vpaddd %ymm10,%ymm4,%ymm4 + vpxor 384-256-128(%rbx),%ymm11,%ymm11 + vpsrld $27,%ymm0,%ymm8 + vpxor %ymm2,%ymm5,%ymm5 + vpxor %ymm13,%ymm11,%ymm11 + + vpslld $30,%ymm1,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vpaddd %ymm5,%ymm4,%ymm4 + vpsrld $31,%ymm11,%ymm9 + vpaddd %ymm11,%ymm11,%ymm11 + + vpsrld $2,%ymm1,%ymm1 + vpaddd %ymm7,%ymm4,%ymm4 + vpor %ymm9,%ymm11,%ymm11 + vpor %ymm6,%ymm1,%ymm1 + vpxor %ymm14,%ymm12,%ymm12 + vmovdqa 224-128(%rax),%ymm14 + + vpslld $5,%ymm4,%ymm7 + vpaddd %ymm15,%ymm3,%ymm3 + vpxor %ymm0,%ymm2,%ymm5 + vmovdqa %ymm11,128-128(%rax) + vpaddd %ymm11,%ymm3,%ymm3 + vpxor 416-256-128(%rbx),%ymm12,%ymm12 + vpsrld $27,%ymm4,%ymm8 + vpxor %ymm1,%ymm5,%ymm5 + vpxor %ymm14,%ymm12,%ymm12 + + vpslld $30,%ymm0,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vpaddd %ymm5,%ymm3,%ymm3 + vpsrld $31,%ymm12,%ymm9 + vpaddd %ymm12,%ymm12,%ymm12 + + vpsrld $2,%ymm0,%ymm0 + vpaddd %ymm7,%ymm3,%ymm3 + vpor %ymm9,%ymm12,%ymm12 + vpor %ymm6,%ymm0,%ymm0 + vpxor %ymm10,%ymm13,%ymm13 + vmovdqa 256-256-128(%rbx),%ymm10 + + vpslld $5,%ymm3,%ymm7 + vpaddd %ymm15,%ymm2,%ymm2 + vpxor %ymm4,%ymm1,%ymm5 + vmovdqa %ymm12,160-128(%rax) + vpaddd %ymm12,%ymm2,%ymm2 + vpxor 448-256-128(%rbx),%ymm13,%ymm13 + vpsrld $27,%ymm3,%ymm8 + vpxor %ymm0,%ymm5,%ymm5 + vpxor %ymm10,%ymm13,%ymm13 + + vpslld $30,%ymm4,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vpaddd %ymm5,%ymm2,%ymm2 + vpsrld $31,%ymm13,%ymm9 + vpaddd %ymm13,%ymm13,%ymm13 + + vpsrld $2,%ymm4,%ymm4 + vpaddd %ymm7,%ymm2,%ymm2 + vpor %ymm9,%ymm13,%ymm13 + vpor %ymm6,%ymm4,%ymm4 + vpxor %ymm11,%ymm14,%ymm14 + vmovdqa 288-256-128(%rbx),%ymm11 + + vpslld $5,%ymm2,%ymm7 + vpaddd %ymm15,%ymm1,%ymm1 + vpxor %ymm3,%ymm0,%ymm5 + vmovdqa %ymm13,192-128(%rax) + vpaddd %ymm13,%ymm1,%ymm1 + vpxor 480-256-128(%rbx),%ymm14,%ymm14 + vpsrld $27,%ymm2,%ymm8 + vpxor %ymm4,%ymm5,%ymm5 + vpxor %ymm11,%ymm14,%ymm14 + + vpslld $30,%ymm3,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vpaddd %ymm5,%ymm1,%ymm1 + vpsrld $31,%ymm14,%ymm9 + vpaddd %ymm14,%ymm14,%ymm14 + + vpsrld $2,%ymm3,%ymm3 + vpaddd %ymm7,%ymm1,%ymm1 + vpor %ymm9,%ymm14,%ymm14 + vpor %ymm6,%ymm3,%ymm3 + vpxor %ymm12,%ymm10,%ymm10 + vmovdqa 320-256-128(%rbx),%ymm12 + + vpslld $5,%ymm1,%ymm7 + vpaddd %ymm15,%ymm0,%ymm0 + vpxor %ymm2,%ymm4,%ymm5 + vmovdqa %ymm14,224-128(%rax) + vpaddd %ymm14,%ymm0,%ymm0 + vpxor 0-128(%rax),%ymm10,%ymm10 + vpsrld $27,%ymm1,%ymm8 + vpxor %ymm3,%ymm5,%ymm5 + vpxor %ymm12,%ymm10,%ymm10 + + vpslld $30,%ymm2,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vpaddd %ymm5,%ymm0,%ymm0 + vpsrld $31,%ymm10,%ymm9 + vpaddd %ymm10,%ymm10,%ymm10 + + vpsrld $2,%ymm2,%ymm2 + vpaddd %ymm7,%ymm0,%ymm0 + vpor %ymm9,%ymm10,%ymm10 + vpor %ymm6,%ymm2,%ymm2 + vmovdqa 32(%rbp),%ymm15 + vpxor %ymm13,%ymm11,%ymm11 + vmovdqa 352-256-128(%rbx),%ymm13 + + vpaddd %ymm15,%ymm4,%ymm4 + vpslld $5,%ymm0,%ymm7 + vpand %ymm2,%ymm3,%ymm6 + vpxor 32-128(%rax),%ymm11,%ymm11 + + vpaddd %ymm6,%ymm4,%ymm4 + vpsrld $27,%ymm0,%ymm8 + vpxor %ymm2,%ymm3,%ymm5 + vpxor %ymm13,%ymm11,%ymm11 + + vmovdqu %ymm10,256-256-128(%rbx) + vpaddd %ymm10,%ymm4,%ymm4 + vpor %ymm8,%ymm7,%ymm7 + vpsrld $31,%ymm11,%ymm9 + vpand %ymm1,%ymm5,%ymm5 + vpaddd %ymm11,%ymm11,%ymm11 + + vpslld $30,%ymm1,%ymm6 + vpaddd %ymm5,%ymm4,%ymm4 + + vpsrld $2,%ymm1,%ymm1 + vpaddd %ymm7,%ymm4,%ymm4 + vpor %ymm9,%ymm11,%ymm11 + vpor %ymm6,%ymm1,%ymm1 + vpxor %ymm14,%ymm12,%ymm12 + vmovdqa 384-256-128(%rbx),%ymm14 + + vpaddd %ymm15,%ymm3,%ymm3 + vpslld $5,%ymm4,%ymm7 + vpand %ymm1,%ymm2,%ymm6 + vpxor 64-128(%rax),%ymm12,%ymm12 + + vpaddd %ymm6,%ymm3,%ymm3 + vpsrld $27,%ymm4,%ymm8 + vpxor %ymm1,%ymm2,%ymm5 + vpxor %ymm14,%ymm12,%ymm12 + + vmovdqu %ymm11,288-256-128(%rbx) + vpaddd %ymm11,%ymm3,%ymm3 + vpor %ymm8,%ymm7,%ymm7 + vpsrld $31,%ymm12,%ymm9 + vpand %ymm0,%ymm5,%ymm5 + vpaddd %ymm12,%ymm12,%ymm12 + + vpslld $30,%ymm0,%ymm6 + vpaddd %ymm5,%ymm3,%ymm3 + + vpsrld $2,%ymm0,%ymm0 + vpaddd %ymm7,%ymm3,%ymm3 + vpor %ymm9,%ymm12,%ymm12 + vpor %ymm6,%ymm0,%ymm0 + vpxor %ymm10,%ymm13,%ymm13 + vmovdqa 416-256-128(%rbx),%ymm10 + + vpaddd %ymm15,%ymm2,%ymm2 + vpslld $5,%ymm3,%ymm7 + vpand %ymm0,%ymm1,%ymm6 + vpxor 96-128(%rax),%ymm13,%ymm13 + + vpaddd %ymm6,%ymm2,%ymm2 + vpsrld $27,%ymm3,%ymm8 + vpxor %ymm0,%ymm1,%ymm5 + vpxor %ymm10,%ymm13,%ymm13 + + vmovdqu %ymm12,320-256-128(%rbx) + vpaddd %ymm12,%ymm2,%ymm2 + vpor %ymm8,%ymm7,%ymm7 + vpsrld $31,%ymm13,%ymm9 + vpand %ymm4,%ymm5,%ymm5 + vpaddd %ymm13,%ymm13,%ymm13 + + vpslld $30,%ymm4,%ymm6 + vpaddd %ymm5,%ymm2,%ymm2 + + vpsrld $2,%ymm4,%ymm4 + vpaddd %ymm7,%ymm2,%ymm2 + vpor %ymm9,%ymm13,%ymm13 + vpor %ymm6,%ymm4,%ymm4 + vpxor %ymm11,%ymm14,%ymm14 + vmovdqa 448-256-128(%rbx),%ymm11 + + vpaddd %ymm15,%ymm1,%ymm1 + vpslld $5,%ymm2,%ymm7 + vpand %ymm4,%ymm0,%ymm6 + vpxor 128-128(%rax),%ymm14,%ymm14 + + vpaddd %ymm6,%ymm1,%ymm1 + vpsrld $27,%ymm2,%ymm8 + vpxor %ymm4,%ymm0,%ymm5 + vpxor %ymm11,%ymm14,%ymm14 + + vmovdqu %ymm13,352-256-128(%rbx) + vpaddd %ymm13,%ymm1,%ymm1 + vpor %ymm8,%ymm7,%ymm7 + vpsrld $31,%ymm14,%ymm9 + vpand %ymm3,%ymm5,%ymm5 + vpaddd %ymm14,%ymm14,%ymm14 + + vpslld $30,%ymm3,%ymm6 + vpaddd %ymm5,%ymm1,%ymm1 + + vpsrld $2,%ymm3,%ymm3 + vpaddd %ymm7,%ymm1,%ymm1 + vpor %ymm9,%ymm14,%ymm14 + vpor %ymm6,%ymm3,%ymm3 + vpxor %ymm12,%ymm10,%ymm10 + vmovdqa 480-256-128(%rbx),%ymm12 + + vpaddd %ymm15,%ymm0,%ymm0 + vpslld $5,%ymm1,%ymm7 + vpand %ymm3,%ymm4,%ymm6 + vpxor 160-128(%rax),%ymm10,%ymm10 + + vpaddd %ymm6,%ymm0,%ymm0 + vpsrld $27,%ymm1,%ymm8 + vpxor %ymm3,%ymm4,%ymm5 + vpxor %ymm12,%ymm10,%ymm10 + + vmovdqu %ymm14,384-256-128(%rbx) + vpaddd %ymm14,%ymm0,%ymm0 + vpor %ymm8,%ymm7,%ymm7 + vpsrld $31,%ymm10,%ymm9 + vpand %ymm2,%ymm5,%ymm5 + vpaddd %ymm10,%ymm10,%ymm10 + + vpslld $30,%ymm2,%ymm6 + vpaddd %ymm5,%ymm0,%ymm0 + + vpsrld $2,%ymm2,%ymm2 + vpaddd %ymm7,%ymm0,%ymm0 + vpor %ymm9,%ymm10,%ymm10 + vpor %ymm6,%ymm2,%ymm2 + vpxor %ymm13,%ymm11,%ymm11 + vmovdqa 0-128(%rax),%ymm13 + + vpaddd %ymm15,%ymm4,%ymm4 + vpslld $5,%ymm0,%ymm7 + vpand %ymm2,%ymm3,%ymm6 + vpxor 192-128(%rax),%ymm11,%ymm11 + + vpaddd %ymm6,%ymm4,%ymm4 + vpsrld $27,%ymm0,%ymm8 + vpxor %ymm2,%ymm3,%ymm5 + vpxor %ymm13,%ymm11,%ymm11 + + vmovdqu %ymm10,416-256-128(%rbx) + vpaddd %ymm10,%ymm4,%ymm4 + vpor %ymm8,%ymm7,%ymm7 + vpsrld $31,%ymm11,%ymm9 + vpand %ymm1,%ymm5,%ymm5 + vpaddd %ymm11,%ymm11,%ymm11 + + vpslld $30,%ymm1,%ymm6 + vpaddd %ymm5,%ymm4,%ymm4 + + vpsrld $2,%ymm1,%ymm1 + vpaddd %ymm7,%ymm4,%ymm4 + vpor %ymm9,%ymm11,%ymm11 + vpor %ymm6,%ymm1,%ymm1 + vpxor %ymm14,%ymm12,%ymm12 + vmovdqa 32-128(%rax),%ymm14 + + vpaddd %ymm15,%ymm3,%ymm3 + vpslld $5,%ymm4,%ymm7 + vpand %ymm1,%ymm2,%ymm6 + vpxor 224-128(%rax),%ymm12,%ymm12 + + vpaddd %ymm6,%ymm3,%ymm3 + vpsrld $27,%ymm4,%ymm8 + vpxor %ymm1,%ymm2,%ymm5 + vpxor %ymm14,%ymm12,%ymm12 + + vmovdqu %ymm11,448-256-128(%rbx) + vpaddd %ymm11,%ymm3,%ymm3 + vpor %ymm8,%ymm7,%ymm7 + vpsrld $31,%ymm12,%ymm9 + vpand %ymm0,%ymm5,%ymm5 + vpaddd %ymm12,%ymm12,%ymm12 + + vpslld $30,%ymm0,%ymm6 + vpaddd %ymm5,%ymm3,%ymm3 + + vpsrld $2,%ymm0,%ymm0 + vpaddd %ymm7,%ymm3,%ymm3 + vpor %ymm9,%ymm12,%ymm12 + vpor %ymm6,%ymm0,%ymm0 + vpxor %ymm10,%ymm13,%ymm13 + vmovdqa 64-128(%rax),%ymm10 + + vpaddd %ymm15,%ymm2,%ymm2 + vpslld $5,%ymm3,%ymm7 + vpand %ymm0,%ymm1,%ymm6 + vpxor 256-256-128(%rbx),%ymm13,%ymm13 + + vpaddd %ymm6,%ymm2,%ymm2 + vpsrld $27,%ymm3,%ymm8 + vpxor %ymm0,%ymm1,%ymm5 + vpxor %ymm10,%ymm13,%ymm13 + + vmovdqu %ymm12,480-256-128(%rbx) + vpaddd %ymm12,%ymm2,%ymm2 + vpor %ymm8,%ymm7,%ymm7 + vpsrld $31,%ymm13,%ymm9 + vpand %ymm4,%ymm5,%ymm5 + vpaddd %ymm13,%ymm13,%ymm13 + + vpslld $30,%ymm4,%ymm6 + vpaddd %ymm5,%ymm2,%ymm2 + + vpsrld $2,%ymm4,%ymm4 + vpaddd %ymm7,%ymm2,%ymm2 + vpor %ymm9,%ymm13,%ymm13 + vpor %ymm6,%ymm4,%ymm4 + vpxor %ymm11,%ymm14,%ymm14 + vmovdqa 96-128(%rax),%ymm11 + + vpaddd %ymm15,%ymm1,%ymm1 + vpslld $5,%ymm2,%ymm7 + vpand %ymm4,%ymm0,%ymm6 + vpxor 288-256-128(%rbx),%ymm14,%ymm14 + + vpaddd %ymm6,%ymm1,%ymm1 + vpsrld $27,%ymm2,%ymm8 + vpxor %ymm4,%ymm0,%ymm5 + vpxor %ymm11,%ymm14,%ymm14 + + vmovdqu %ymm13,0-128(%rax) + vpaddd %ymm13,%ymm1,%ymm1 + vpor %ymm8,%ymm7,%ymm7 + vpsrld $31,%ymm14,%ymm9 + vpand %ymm3,%ymm5,%ymm5 + vpaddd %ymm14,%ymm14,%ymm14 + + vpslld $30,%ymm3,%ymm6 + vpaddd %ymm5,%ymm1,%ymm1 + + vpsrld $2,%ymm3,%ymm3 + vpaddd %ymm7,%ymm1,%ymm1 + vpor %ymm9,%ymm14,%ymm14 + vpor %ymm6,%ymm3,%ymm3 + vpxor %ymm12,%ymm10,%ymm10 + vmovdqa 128-128(%rax),%ymm12 + + vpaddd %ymm15,%ymm0,%ymm0 + vpslld $5,%ymm1,%ymm7 + vpand %ymm3,%ymm4,%ymm6 + vpxor 320-256-128(%rbx),%ymm10,%ymm10 + + vpaddd %ymm6,%ymm0,%ymm0 + vpsrld $27,%ymm1,%ymm8 + vpxor %ymm3,%ymm4,%ymm5 + vpxor %ymm12,%ymm10,%ymm10 + + vmovdqu %ymm14,32-128(%rax) + vpaddd %ymm14,%ymm0,%ymm0 + vpor %ymm8,%ymm7,%ymm7 + vpsrld $31,%ymm10,%ymm9 + vpand %ymm2,%ymm5,%ymm5 + vpaddd %ymm10,%ymm10,%ymm10 + + vpslld $30,%ymm2,%ymm6 + vpaddd %ymm5,%ymm0,%ymm0 + + vpsrld $2,%ymm2,%ymm2 + vpaddd %ymm7,%ymm0,%ymm0 + vpor %ymm9,%ymm10,%ymm10 + vpor %ymm6,%ymm2,%ymm2 + vpxor %ymm13,%ymm11,%ymm11 + vmovdqa 160-128(%rax),%ymm13 + + vpaddd %ymm15,%ymm4,%ymm4 + vpslld $5,%ymm0,%ymm7 + vpand %ymm2,%ymm3,%ymm6 + vpxor 352-256-128(%rbx),%ymm11,%ymm11 + + vpaddd %ymm6,%ymm4,%ymm4 + vpsrld $27,%ymm0,%ymm8 + vpxor %ymm2,%ymm3,%ymm5 + vpxor %ymm13,%ymm11,%ymm11 + + vmovdqu %ymm10,64-128(%rax) + vpaddd %ymm10,%ymm4,%ymm4 + vpor %ymm8,%ymm7,%ymm7 + vpsrld $31,%ymm11,%ymm9 + vpand %ymm1,%ymm5,%ymm5 + vpaddd %ymm11,%ymm11,%ymm11 + + vpslld $30,%ymm1,%ymm6 + vpaddd %ymm5,%ymm4,%ymm4 + + vpsrld $2,%ymm1,%ymm1 + vpaddd %ymm7,%ymm4,%ymm4 + vpor %ymm9,%ymm11,%ymm11 + vpor %ymm6,%ymm1,%ymm1 + vpxor %ymm14,%ymm12,%ymm12 + vmovdqa 192-128(%rax),%ymm14 + + vpaddd %ymm15,%ymm3,%ymm3 + vpslld $5,%ymm4,%ymm7 + vpand %ymm1,%ymm2,%ymm6 + vpxor 384-256-128(%rbx),%ymm12,%ymm12 + + vpaddd %ymm6,%ymm3,%ymm3 + vpsrld $27,%ymm4,%ymm8 + vpxor %ymm1,%ymm2,%ymm5 + vpxor %ymm14,%ymm12,%ymm12 + + vmovdqu %ymm11,96-128(%rax) + vpaddd %ymm11,%ymm3,%ymm3 + vpor %ymm8,%ymm7,%ymm7 + vpsrld $31,%ymm12,%ymm9 + vpand %ymm0,%ymm5,%ymm5 + vpaddd %ymm12,%ymm12,%ymm12 + + vpslld $30,%ymm0,%ymm6 + vpaddd %ymm5,%ymm3,%ymm3 + + vpsrld $2,%ymm0,%ymm0 + vpaddd %ymm7,%ymm3,%ymm3 + vpor %ymm9,%ymm12,%ymm12 + vpor %ymm6,%ymm0,%ymm0 + vpxor %ymm10,%ymm13,%ymm13 + vmovdqa 224-128(%rax),%ymm10 + + vpaddd %ymm15,%ymm2,%ymm2 + vpslld $5,%ymm3,%ymm7 + vpand %ymm0,%ymm1,%ymm6 + vpxor 416-256-128(%rbx),%ymm13,%ymm13 + + vpaddd %ymm6,%ymm2,%ymm2 + vpsrld $27,%ymm3,%ymm8 + vpxor %ymm0,%ymm1,%ymm5 + vpxor %ymm10,%ymm13,%ymm13 + + vmovdqu %ymm12,128-128(%rax) + vpaddd %ymm12,%ymm2,%ymm2 + vpor %ymm8,%ymm7,%ymm7 + vpsrld $31,%ymm13,%ymm9 + vpand %ymm4,%ymm5,%ymm5 + vpaddd %ymm13,%ymm13,%ymm13 + + vpslld $30,%ymm4,%ymm6 + vpaddd %ymm5,%ymm2,%ymm2 + + vpsrld $2,%ymm4,%ymm4 + vpaddd %ymm7,%ymm2,%ymm2 + vpor %ymm9,%ymm13,%ymm13 + vpor %ymm6,%ymm4,%ymm4 + vpxor %ymm11,%ymm14,%ymm14 + vmovdqa 256-256-128(%rbx),%ymm11 + + vpaddd %ymm15,%ymm1,%ymm1 + vpslld $5,%ymm2,%ymm7 + vpand %ymm4,%ymm0,%ymm6 + vpxor 448-256-128(%rbx),%ymm14,%ymm14 + + vpaddd %ymm6,%ymm1,%ymm1 + vpsrld $27,%ymm2,%ymm8 + vpxor %ymm4,%ymm0,%ymm5 + vpxor %ymm11,%ymm14,%ymm14 + + vmovdqu %ymm13,160-128(%rax) + vpaddd %ymm13,%ymm1,%ymm1 + vpor %ymm8,%ymm7,%ymm7 + vpsrld $31,%ymm14,%ymm9 + vpand %ymm3,%ymm5,%ymm5 + vpaddd %ymm14,%ymm14,%ymm14 + + vpslld $30,%ymm3,%ymm6 + vpaddd %ymm5,%ymm1,%ymm1 + + vpsrld $2,%ymm3,%ymm3 + vpaddd %ymm7,%ymm1,%ymm1 + vpor %ymm9,%ymm14,%ymm14 + vpor %ymm6,%ymm3,%ymm3 + vpxor %ymm12,%ymm10,%ymm10 + vmovdqa 288-256-128(%rbx),%ymm12 + + vpaddd %ymm15,%ymm0,%ymm0 + vpslld $5,%ymm1,%ymm7 + vpand %ymm3,%ymm4,%ymm6 + vpxor 480-256-128(%rbx),%ymm10,%ymm10 + + vpaddd %ymm6,%ymm0,%ymm0 + vpsrld $27,%ymm1,%ymm8 + vpxor %ymm3,%ymm4,%ymm5 + vpxor %ymm12,%ymm10,%ymm10 + + vmovdqu %ymm14,192-128(%rax) + vpaddd %ymm14,%ymm0,%ymm0 + vpor %ymm8,%ymm7,%ymm7 + vpsrld $31,%ymm10,%ymm9 + vpand %ymm2,%ymm5,%ymm5 + vpaddd %ymm10,%ymm10,%ymm10 + + vpslld $30,%ymm2,%ymm6 + vpaddd %ymm5,%ymm0,%ymm0 + + vpsrld $2,%ymm2,%ymm2 + vpaddd %ymm7,%ymm0,%ymm0 + vpor %ymm9,%ymm10,%ymm10 + vpor %ymm6,%ymm2,%ymm2 + vpxor %ymm13,%ymm11,%ymm11 + vmovdqa 320-256-128(%rbx),%ymm13 + + vpaddd %ymm15,%ymm4,%ymm4 + vpslld $5,%ymm0,%ymm7 + vpand %ymm2,%ymm3,%ymm6 + vpxor 0-128(%rax),%ymm11,%ymm11 + + vpaddd %ymm6,%ymm4,%ymm4 + vpsrld $27,%ymm0,%ymm8 + vpxor %ymm2,%ymm3,%ymm5 + vpxor %ymm13,%ymm11,%ymm11 + + vmovdqu %ymm10,224-128(%rax) + vpaddd %ymm10,%ymm4,%ymm4 + vpor %ymm8,%ymm7,%ymm7 + vpsrld $31,%ymm11,%ymm9 + vpand %ymm1,%ymm5,%ymm5 + vpaddd %ymm11,%ymm11,%ymm11 + + vpslld $30,%ymm1,%ymm6 + vpaddd %ymm5,%ymm4,%ymm4 + + vpsrld $2,%ymm1,%ymm1 + vpaddd %ymm7,%ymm4,%ymm4 + vpor %ymm9,%ymm11,%ymm11 + vpor %ymm6,%ymm1,%ymm1 + vpxor %ymm14,%ymm12,%ymm12 + vmovdqa 352-256-128(%rbx),%ymm14 + + vpaddd %ymm15,%ymm3,%ymm3 + vpslld $5,%ymm4,%ymm7 + vpand %ymm1,%ymm2,%ymm6 + vpxor 32-128(%rax),%ymm12,%ymm12 + + vpaddd %ymm6,%ymm3,%ymm3 + vpsrld $27,%ymm4,%ymm8 + vpxor %ymm1,%ymm2,%ymm5 + vpxor %ymm14,%ymm12,%ymm12 + + vmovdqu %ymm11,256-256-128(%rbx) + vpaddd %ymm11,%ymm3,%ymm3 + vpor %ymm8,%ymm7,%ymm7 + vpsrld $31,%ymm12,%ymm9 + vpand %ymm0,%ymm5,%ymm5 + vpaddd %ymm12,%ymm12,%ymm12 + + vpslld $30,%ymm0,%ymm6 + vpaddd %ymm5,%ymm3,%ymm3 + + vpsrld $2,%ymm0,%ymm0 + vpaddd %ymm7,%ymm3,%ymm3 + vpor %ymm9,%ymm12,%ymm12 + vpor %ymm6,%ymm0,%ymm0 + vpxor %ymm10,%ymm13,%ymm13 + vmovdqa 384-256-128(%rbx),%ymm10 + + vpaddd %ymm15,%ymm2,%ymm2 + vpslld $5,%ymm3,%ymm7 + vpand %ymm0,%ymm1,%ymm6 + vpxor 64-128(%rax),%ymm13,%ymm13 + + vpaddd %ymm6,%ymm2,%ymm2 + vpsrld $27,%ymm3,%ymm8 + vpxor %ymm0,%ymm1,%ymm5 + vpxor %ymm10,%ymm13,%ymm13 + + vmovdqu %ymm12,288-256-128(%rbx) + vpaddd %ymm12,%ymm2,%ymm2 + vpor %ymm8,%ymm7,%ymm7 + vpsrld $31,%ymm13,%ymm9 + vpand %ymm4,%ymm5,%ymm5 + vpaddd %ymm13,%ymm13,%ymm13 + + vpslld $30,%ymm4,%ymm6 + vpaddd %ymm5,%ymm2,%ymm2 + + vpsrld $2,%ymm4,%ymm4 + vpaddd %ymm7,%ymm2,%ymm2 + vpor %ymm9,%ymm13,%ymm13 + vpor %ymm6,%ymm4,%ymm4 + vpxor %ymm11,%ymm14,%ymm14 + vmovdqa 416-256-128(%rbx),%ymm11 + + vpaddd %ymm15,%ymm1,%ymm1 + vpslld $5,%ymm2,%ymm7 + vpand %ymm4,%ymm0,%ymm6 + vpxor 96-128(%rax),%ymm14,%ymm14 + + vpaddd %ymm6,%ymm1,%ymm1 + vpsrld $27,%ymm2,%ymm8 + vpxor %ymm4,%ymm0,%ymm5 + vpxor %ymm11,%ymm14,%ymm14 + + vmovdqu %ymm13,320-256-128(%rbx) + vpaddd %ymm13,%ymm1,%ymm1 + vpor %ymm8,%ymm7,%ymm7 + vpsrld $31,%ymm14,%ymm9 + vpand %ymm3,%ymm5,%ymm5 + vpaddd %ymm14,%ymm14,%ymm14 + + vpslld $30,%ymm3,%ymm6 + vpaddd %ymm5,%ymm1,%ymm1 + + vpsrld $2,%ymm3,%ymm3 + vpaddd %ymm7,%ymm1,%ymm1 + vpor %ymm9,%ymm14,%ymm14 + vpor %ymm6,%ymm3,%ymm3 + vpxor %ymm12,%ymm10,%ymm10 + vmovdqa 448-256-128(%rbx),%ymm12 + + vpaddd %ymm15,%ymm0,%ymm0 + vpslld $5,%ymm1,%ymm7 + vpand %ymm3,%ymm4,%ymm6 + vpxor 128-128(%rax),%ymm10,%ymm10 + + vpaddd %ymm6,%ymm0,%ymm0 + vpsrld $27,%ymm1,%ymm8 + vpxor %ymm3,%ymm4,%ymm5 + vpxor %ymm12,%ymm10,%ymm10 + + vmovdqu %ymm14,352-256-128(%rbx) + vpaddd %ymm14,%ymm0,%ymm0 + vpor %ymm8,%ymm7,%ymm7 + vpsrld $31,%ymm10,%ymm9 + vpand %ymm2,%ymm5,%ymm5 + vpaddd %ymm10,%ymm10,%ymm10 + + vpslld $30,%ymm2,%ymm6 + vpaddd %ymm5,%ymm0,%ymm0 + + vpsrld $2,%ymm2,%ymm2 + vpaddd %ymm7,%ymm0,%ymm0 + vpor %ymm9,%ymm10,%ymm10 + vpor %ymm6,%ymm2,%ymm2 + vmovdqa 64(%rbp),%ymm15 + vpxor %ymm13,%ymm11,%ymm11 + vmovdqa 480-256-128(%rbx),%ymm13 + + vpslld $5,%ymm0,%ymm7 + vpaddd %ymm15,%ymm4,%ymm4 + vpxor %ymm1,%ymm3,%ymm5 + vmovdqa %ymm10,384-256-128(%rbx) + vpaddd %ymm10,%ymm4,%ymm4 + vpxor 160-128(%rax),%ymm11,%ymm11 + vpsrld $27,%ymm0,%ymm8 + vpxor %ymm2,%ymm5,%ymm5 + vpxor %ymm13,%ymm11,%ymm11 + + vpslld $30,%ymm1,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vpaddd %ymm5,%ymm4,%ymm4 + vpsrld $31,%ymm11,%ymm9 + vpaddd %ymm11,%ymm11,%ymm11 + + vpsrld $2,%ymm1,%ymm1 + vpaddd %ymm7,%ymm4,%ymm4 + vpor %ymm9,%ymm11,%ymm11 + vpor %ymm6,%ymm1,%ymm1 + vpxor %ymm14,%ymm12,%ymm12 + vmovdqa 0-128(%rax),%ymm14 + + vpslld $5,%ymm4,%ymm7 + vpaddd %ymm15,%ymm3,%ymm3 + vpxor %ymm0,%ymm2,%ymm5 + vmovdqa %ymm11,416-256-128(%rbx) + vpaddd %ymm11,%ymm3,%ymm3 + vpxor 192-128(%rax),%ymm12,%ymm12 + vpsrld $27,%ymm4,%ymm8 + vpxor %ymm1,%ymm5,%ymm5 + vpxor %ymm14,%ymm12,%ymm12 + + vpslld $30,%ymm0,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vpaddd %ymm5,%ymm3,%ymm3 + vpsrld $31,%ymm12,%ymm9 + vpaddd %ymm12,%ymm12,%ymm12 + + vpsrld $2,%ymm0,%ymm0 + vpaddd %ymm7,%ymm3,%ymm3 + vpor %ymm9,%ymm12,%ymm12 + vpor %ymm6,%ymm0,%ymm0 + vpxor %ymm10,%ymm13,%ymm13 + vmovdqa 32-128(%rax),%ymm10 + + vpslld $5,%ymm3,%ymm7 + vpaddd %ymm15,%ymm2,%ymm2 + vpxor %ymm4,%ymm1,%ymm5 + vmovdqa %ymm12,448-256-128(%rbx) + vpaddd %ymm12,%ymm2,%ymm2 + vpxor 224-128(%rax),%ymm13,%ymm13 + vpsrld $27,%ymm3,%ymm8 + vpxor %ymm0,%ymm5,%ymm5 + vpxor %ymm10,%ymm13,%ymm13 + + vpslld $30,%ymm4,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vpaddd %ymm5,%ymm2,%ymm2 + vpsrld $31,%ymm13,%ymm9 + vpaddd %ymm13,%ymm13,%ymm13 + + vpsrld $2,%ymm4,%ymm4 + vpaddd %ymm7,%ymm2,%ymm2 + vpor %ymm9,%ymm13,%ymm13 + vpor %ymm6,%ymm4,%ymm4 + vpxor %ymm11,%ymm14,%ymm14 + vmovdqa 64-128(%rax),%ymm11 + + vpslld $5,%ymm2,%ymm7 + vpaddd %ymm15,%ymm1,%ymm1 + vpxor %ymm3,%ymm0,%ymm5 + vmovdqa %ymm13,480-256-128(%rbx) + vpaddd %ymm13,%ymm1,%ymm1 + vpxor 256-256-128(%rbx),%ymm14,%ymm14 + vpsrld $27,%ymm2,%ymm8 + vpxor %ymm4,%ymm5,%ymm5 + vpxor %ymm11,%ymm14,%ymm14 + + vpslld $30,%ymm3,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vpaddd %ymm5,%ymm1,%ymm1 + vpsrld $31,%ymm14,%ymm9 + vpaddd %ymm14,%ymm14,%ymm14 + + vpsrld $2,%ymm3,%ymm3 + vpaddd %ymm7,%ymm1,%ymm1 + vpor %ymm9,%ymm14,%ymm14 + vpor %ymm6,%ymm3,%ymm3 + vpxor %ymm12,%ymm10,%ymm10 + vmovdqa 96-128(%rax),%ymm12 + + vpslld $5,%ymm1,%ymm7 + vpaddd %ymm15,%ymm0,%ymm0 + vpxor %ymm2,%ymm4,%ymm5 + vmovdqa %ymm14,0-128(%rax) + vpaddd %ymm14,%ymm0,%ymm0 + vpxor 288-256-128(%rbx),%ymm10,%ymm10 + vpsrld $27,%ymm1,%ymm8 + vpxor %ymm3,%ymm5,%ymm5 + vpxor %ymm12,%ymm10,%ymm10 + + vpslld $30,%ymm2,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vpaddd %ymm5,%ymm0,%ymm0 + vpsrld $31,%ymm10,%ymm9 + vpaddd %ymm10,%ymm10,%ymm10 + + vpsrld $2,%ymm2,%ymm2 + vpaddd %ymm7,%ymm0,%ymm0 + vpor %ymm9,%ymm10,%ymm10 + vpor %ymm6,%ymm2,%ymm2 + vpxor %ymm13,%ymm11,%ymm11 + vmovdqa 128-128(%rax),%ymm13 + + vpslld $5,%ymm0,%ymm7 + vpaddd %ymm15,%ymm4,%ymm4 + vpxor %ymm1,%ymm3,%ymm5 + vmovdqa %ymm10,32-128(%rax) + vpaddd %ymm10,%ymm4,%ymm4 + vpxor 320-256-128(%rbx),%ymm11,%ymm11 + vpsrld $27,%ymm0,%ymm8 + vpxor %ymm2,%ymm5,%ymm5 + vpxor %ymm13,%ymm11,%ymm11 + + vpslld $30,%ymm1,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vpaddd %ymm5,%ymm4,%ymm4 + vpsrld $31,%ymm11,%ymm9 + vpaddd %ymm11,%ymm11,%ymm11 + + vpsrld $2,%ymm1,%ymm1 + vpaddd %ymm7,%ymm4,%ymm4 + vpor %ymm9,%ymm11,%ymm11 + vpor %ymm6,%ymm1,%ymm1 + vpxor %ymm14,%ymm12,%ymm12 + vmovdqa 160-128(%rax),%ymm14 + + vpslld $5,%ymm4,%ymm7 + vpaddd %ymm15,%ymm3,%ymm3 + vpxor %ymm0,%ymm2,%ymm5 + vmovdqa %ymm11,64-128(%rax) + vpaddd %ymm11,%ymm3,%ymm3 + vpxor 352-256-128(%rbx),%ymm12,%ymm12 + vpsrld $27,%ymm4,%ymm8 + vpxor %ymm1,%ymm5,%ymm5 + vpxor %ymm14,%ymm12,%ymm12 + + vpslld $30,%ymm0,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vpaddd %ymm5,%ymm3,%ymm3 + vpsrld $31,%ymm12,%ymm9 + vpaddd %ymm12,%ymm12,%ymm12 + + vpsrld $2,%ymm0,%ymm0 + vpaddd %ymm7,%ymm3,%ymm3 + vpor %ymm9,%ymm12,%ymm12 + vpor %ymm6,%ymm0,%ymm0 + vpxor %ymm10,%ymm13,%ymm13 + vmovdqa 192-128(%rax),%ymm10 + + vpslld $5,%ymm3,%ymm7 + vpaddd %ymm15,%ymm2,%ymm2 + vpxor %ymm4,%ymm1,%ymm5 + vmovdqa %ymm12,96-128(%rax) + vpaddd %ymm12,%ymm2,%ymm2 + vpxor 384-256-128(%rbx),%ymm13,%ymm13 + vpsrld $27,%ymm3,%ymm8 + vpxor %ymm0,%ymm5,%ymm5 + vpxor %ymm10,%ymm13,%ymm13 + + vpslld $30,%ymm4,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vpaddd %ymm5,%ymm2,%ymm2 + vpsrld $31,%ymm13,%ymm9 + vpaddd %ymm13,%ymm13,%ymm13 + + vpsrld $2,%ymm4,%ymm4 + vpaddd %ymm7,%ymm2,%ymm2 + vpor %ymm9,%ymm13,%ymm13 + vpor %ymm6,%ymm4,%ymm4 + vpxor %ymm11,%ymm14,%ymm14 + vmovdqa 224-128(%rax),%ymm11 + + vpslld $5,%ymm2,%ymm7 + vpaddd %ymm15,%ymm1,%ymm1 + vpxor %ymm3,%ymm0,%ymm5 + vmovdqa %ymm13,128-128(%rax) + vpaddd %ymm13,%ymm1,%ymm1 + vpxor 416-256-128(%rbx),%ymm14,%ymm14 + vpsrld $27,%ymm2,%ymm8 + vpxor %ymm4,%ymm5,%ymm5 + vpxor %ymm11,%ymm14,%ymm14 + + vpslld $30,%ymm3,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vpaddd %ymm5,%ymm1,%ymm1 + vpsrld $31,%ymm14,%ymm9 + vpaddd %ymm14,%ymm14,%ymm14 + + vpsrld $2,%ymm3,%ymm3 + vpaddd %ymm7,%ymm1,%ymm1 + vpor %ymm9,%ymm14,%ymm14 + vpor %ymm6,%ymm3,%ymm3 + vpxor %ymm12,%ymm10,%ymm10 + vmovdqa 256-256-128(%rbx),%ymm12 + + vpslld $5,%ymm1,%ymm7 + vpaddd %ymm15,%ymm0,%ymm0 + vpxor %ymm2,%ymm4,%ymm5 + vmovdqa %ymm14,160-128(%rax) + vpaddd %ymm14,%ymm0,%ymm0 + vpxor 448-256-128(%rbx),%ymm10,%ymm10 + vpsrld $27,%ymm1,%ymm8 + vpxor %ymm3,%ymm5,%ymm5 + vpxor %ymm12,%ymm10,%ymm10 + + vpslld $30,%ymm2,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vpaddd %ymm5,%ymm0,%ymm0 + vpsrld $31,%ymm10,%ymm9 + vpaddd %ymm10,%ymm10,%ymm10 + + vpsrld $2,%ymm2,%ymm2 + vpaddd %ymm7,%ymm0,%ymm0 + vpor %ymm9,%ymm10,%ymm10 + vpor %ymm6,%ymm2,%ymm2 + vpxor %ymm13,%ymm11,%ymm11 + vmovdqa 288-256-128(%rbx),%ymm13 + + vpslld $5,%ymm0,%ymm7 + vpaddd %ymm15,%ymm4,%ymm4 + vpxor %ymm1,%ymm3,%ymm5 + vmovdqa %ymm10,192-128(%rax) + vpaddd %ymm10,%ymm4,%ymm4 + vpxor 480-256-128(%rbx),%ymm11,%ymm11 + vpsrld $27,%ymm0,%ymm8 + vpxor %ymm2,%ymm5,%ymm5 + vpxor %ymm13,%ymm11,%ymm11 + + vpslld $30,%ymm1,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vpaddd %ymm5,%ymm4,%ymm4 + vpsrld $31,%ymm11,%ymm9 + vpaddd %ymm11,%ymm11,%ymm11 + + vpsrld $2,%ymm1,%ymm1 + vpaddd %ymm7,%ymm4,%ymm4 + vpor %ymm9,%ymm11,%ymm11 + vpor %ymm6,%ymm1,%ymm1 + vpxor %ymm14,%ymm12,%ymm12 + vmovdqa 320-256-128(%rbx),%ymm14 + + vpslld $5,%ymm4,%ymm7 + vpaddd %ymm15,%ymm3,%ymm3 + vpxor %ymm0,%ymm2,%ymm5 + vmovdqa %ymm11,224-128(%rax) + vpaddd %ymm11,%ymm3,%ymm3 + vpxor 0-128(%rax),%ymm12,%ymm12 + vpsrld $27,%ymm4,%ymm8 + vpxor %ymm1,%ymm5,%ymm5 + vpxor %ymm14,%ymm12,%ymm12 + + vpslld $30,%ymm0,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vpaddd %ymm5,%ymm3,%ymm3 + vpsrld $31,%ymm12,%ymm9 + vpaddd %ymm12,%ymm12,%ymm12 + + vpsrld $2,%ymm0,%ymm0 + vpaddd %ymm7,%ymm3,%ymm3 + vpor %ymm9,%ymm12,%ymm12 + vpor %ymm6,%ymm0,%ymm0 + vpxor %ymm10,%ymm13,%ymm13 + vmovdqa 352-256-128(%rbx),%ymm10 + + vpslld $5,%ymm3,%ymm7 + vpaddd %ymm15,%ymm2,%ymm2 + vpxor %ymm4,%ymm1,%ymm5 + vpaddd %ymm12,%ymm2,%ymm2 + vpxor 32-128(%rax),%ymm13,%ymm13 + vpsrld $27,%ymm3,%ymm8 + vpxor %ymm0,%ymm5,%ymm5 + vpxor %ymm10,%ymm13,%ymm13 + + vpslld $30,%ymm4,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vpaddd %ymm5,%ymm2,%ymm2 + vpsrld $31,%ymm13,%ymm9 + vpaddd %ymm13,%ymm13,%ymm13 + + vpsrld $2,%ymm4,%ymm4 + vpaddd %ymm7,%ymm2,%ymm2 + vpor %ymm9,%ymm13,%ymm13 + vpor %ymm6,%ymm4,%ymm4 + vpxor %ymm11,%ymm14,%ymm14 + vmovdqa 384-256-128(%rbx),%ymm11 + + vpslld $5,%ymm2,%ymm7 + vpaddd %ymm15,%ymm1,%ymm1 + vpxor %ymm3,%ymm0,%ymm5 + vpaddd %ymm13,%ymm1,%ymm1 + vpxor 64-128(%rax),%ymm14,%ymm14 + vpsrld $27,%ymm2,%ymm8 + vpxor %ymm4,%ymm5,%ymm5 + vpxor %ymm11,%ymm14,%ymm14 + + vpslld $30,%ymm3,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vpaddd %ymm5,%ymm1,%ymm1 + vpsrld $31,%ymm14,%ymm9 + vpaddd %ymm14,%ymm14,%ymm14 + + vpsrld $2,%ymm3,%ymm3 + vpaddd %ymm7,%ymm1,%ymm1 + vpor %ymm9,%ymm14,%ymm14 + vpor %ymm6,%ymm3,%ymm3 + vpxor %ymm12,%ymm10,%ymm10 + vmovdqa 416-256-128(%rbx),%ymm12 + + vpslld $5,%ymm1,%ymm7 + vpaddd %ymm15,%ymm0,%ymm0 + vpxor %ymm2,%ymm4,%ymm5 + vpaddd %ymm14,%ymm0,%ymm0 + vpxor 96-128(%rax),%ymm10,%ymm10 + vpsrld $27,%ymm1,%ymm8 + vpxor %ymm3,%ymm5,%ymm5 + vpxor %ymm12,%ymm10,%ymm10 + + vpslld $30,%ymm2,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vpaddd %ymm5,%ymm0,%ymm0 + vpsrld $31,%ymm10,%ymm9 + vpaddd %ymm10,%ymm10,%ymm10 + + vpsrld $2,%ymm2,%ymm2 + vpaddd %ymm7,%ymm0,%ymm0 + vpor %ymm9,%ymm10,%ymm10 + vpor %ymm6,%ymm2,%ymm2 + vpxor %ymm13,%ymm11,%ymm11 + vmovdqa 448-256-128(%rbx),%ymm13 + + vpslld $5,%ymm0,%ymm7 + vpaddd %ymm15,%ymm4,%ymm4 + vpxor %ymm1,%ymm3,%ymm5 + vpaddd %ymm10,%ymm4,%ymm4 + vpxor 128-128(%rax),%ymm11,%ymm11 + vpsrld $27,%ymm0,%ymm8 + vpxor %ymm2,%ymm5,%ymm5 + vpxor %ymm13,%ymm11,%ymm11 + + vpslld $30,%ymm1,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vpaddd %ymm5,%ymm4,%ymm4 + vpsrld $31,%ymm11,%ymm9 + vpaddd %ymm11,%ymm11,%ymm11 + + vpsrld $2,%ymm1,%ymm1 + vpaddd %ymm7,%ymm4,%ymm4 + vpor %ymm9,%ymm11,%ymm11 + vpor %ymm6,%ymm1,%ymm1 + vpxor %ymm14,%ymm12,%ymm12 + vmovdqa 480-256-128(%rbx),%ymm14 + + vpslld $5,%ymm4,%ymm7 + vpaddd %ymm15,%ymm3,%ymm3 + vpxor %ymm0,%ymm2,%ymm5 + vpaddd %ymm11,%ymm3,%ymm3 + vpxor 160-128(%rax),%ymm12,%ymm12 + vpsrld $27,%ymm4,%ymm8 + vpxor %ymm1,%ymm5,%ymm5 + vpxor %ymm14,%ymm12,%ymm12 + + vpslld $30,%ymm0,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vpaddd %ymm5,%ymm3,%ymm3 + vpsrld $31,%ymm12,%ymm9 + vpaddd %ymm12,%ymm12,%ymm12 + + vpsrld $2,%ymm0,%ymm0 + vpaddd %ymm7,%ymm3,%ymm3 + vpor %ymm9,%ymm12,%ymm12 + vpor %ymm6,%ymm0,%ymm0 + vpxor %ymm10,%ymm13,%ymm13 + vmovdqa 0-128(%rax),%ymm10 + + vpslld $5,%ymm3,%ymm7 + vpaddd %ymm15,%ymm2,%ymm2 + vpxor %ymm4,%ymm1,%ymm5 + vpaddd %ymm12,%ymm2,%ymm2 + vpxor 192-128(%rax),%ymm13,%ymm13 + vpsrld $27,%ymm3,%ymm8 + vpxor %ymm0,%ymm5,%ymm5 + vpxor %ymm10,%ymm13,%ymm13 + + vpslld $30,%ymm4,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vpaddd %ymm5,%ymm2,%ymm2 + vpsrld $31,%ymm13,%ymm9 + vpaddd %ymm13,%ymm13,%ymm13 + + vpsrld $2,%ymm4,%ymm4 + vpaddd %ymm7,%ymm2,%ymm2 + vpor %ymm9,%ymm13,%ymm13 + vpor %ymm6,%ymm4,%ymm4 + vpxor %ymm11,%ymm14,%ymm14 + vmovdqa 32-128(%rax),%ymm11 + + vpslld $5,%ymm2,%ymm7 + vpaddd %ymm15,%ymm1,%ymm1 + vpxor %ymm3,%ymm0,%ymm5 + vpaddd %ymm13,%ymm1,%ymm1 + vpxor 224-128(%rax),%ymm14,%ymm14 + vpsrld $27,%ymm2,%ymm8 + vpxor %ymm4,%ymm5,%ymm5 + vpxor %ymm11,%ymm14,%ymm14 + + vpslld $30,%ymm3,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vpaddd %ymm5,%ymm1,%ymm1 + vpsrld $31,%ymm14,%ymm9 + vpaddd %ymm14,%ymm14,%ymm14 + + vpsrld $2,%ymm3,%ymm3 + vpaddd %ymm7,%ymm1,%ymm1 + vpor %ymm9,%ymm14,%ymm14 + vpor %ymm6,%ymm3,%ymm3 + vpslld $5,%ymm1,%ymm7 + vpaddd %ymm15,%ymm0,%ymm0 + vpxor %ymm2,%ymm4,%ymm5 + + vpsrld $27,%ymm1,%ymm8 + vpaddd %ymm14,%ymm0,%ymm0 + vpxor %ymm3,%ymm5,%ymm5 + + vpslld $30,%ymm2,%ymm6 + vpor %ymm8,%ymm7,%ymm7 + vpaddd %ymm5,%ymm0,%ymm0 + + vpsrld $2,%ymm2,%ymm2 + vpaddd %ymm7,%ymm0,%ymm0 + vpor %ymm6,%ymm2,%ymm2 + movl $1,%ecx + leaq 512(%rsp),%rbx + cmpl 0(%rbx),%ecx + cmovgeq %rbp,%r12 + cmpl 4(%rbx),%ecx + cmovgeq %rbp,%r13 + cmpl 8(%rbx),%ecx + cmovgeq %rbp,%r14 + cmpl 12(%rbx),%ecx + cmovgeq %rbp,%r15 + cmpl 16(%rbx),%ecx + cmovgeq %rbp,%r8 + cmpl 20(%rbx),%ecx + cmovgeq %rbp,%r9 + cmpl 24(%rbx),%ecx + cmovgeq %rbp,%r10 + cmpl 28(%rbx),%ecx + cmovgeq %rbp,%r11 + vmovdqu (%rbx),%ymm5 + vpxor %ymm7,%ymm7,%ymm7 + vmovdqa %ymm5,%ymm6 + vpcmpgtd %ymm7,%ymm6,%ymm6 + vpaddd %ymm6,%ymm5,%ymm5 + + vpand %ymm6,%ymm0,%ymm0 + vpand %ymm6,%ymm1,%ymm1 + vpaddd 0(%rdi),%ymm0,%ymm0 + vpand %ymm6,%ymm2,%ymm2 + vpaddd 32(%rdi),%ymm1,%ymm1 + vpand %ymm6,%ymm3,%ymm3 + vpaddd 64(%rdi),%ymm2,%ymm2 + vpand %ymm6,%ymm4,%ymm4 + vpaddd 96(%rdi),%ymm3,%ymm3 + vpaddd 128(%rdi),%ymm4,%ymm4 + vmovdqu %ymm0,0(%rdi) + vmovdqu %ymm1,32(%rdi) + vmovdqu %ymm2,64(%rdi) + vmovdqu %ymm3,96(%rdi) + vmovdqu %ymm4,128(%rdi) + + vmovdqu %ymm5,(%rbx) + leaq 256+128(%rsp),%rbx + vmovdqu 96(%rbp),%ymm9 + decl %edx + jnz L$oop_avx2 + + + + + + + +L$done_avx2: + movq 544(%rsp),%rax + vzeroupper + movq -48(%rax),%r15 + movq -40(%rax),%r14 + movq -32(%rax),%r13 + movq -24(%rax),%r12 + movq -16(%rax),%rbp + movq -8(%rax),%rbx + leaq (%rax),%rsp +L$epilogue_avx2: + .byte 0xf3,0xc3 + + +.p2align 8 +.long 0x5a827999,0x5a827999,0x5a827999,0x5a827999 +.long 0x5a827999,0x5a827999,0x5a827999,0x5a827999 +K_XX_XX: +.long 0x6ed9eba1,0x6ed9eba1,0x6ed9eba1,0x6ed9eba1 +.long 0x6ed9eba1,0x6ed9eba1,0x6ed9eba1,0x6ed9eba1 +.long 0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc +.long 0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc +.long 0xca62c1d6,0xca62c1d6,0xca62c1d6,0xca62c1d6 +.long 0xca62c1d6,0xca62c1d6,0xca62c1d6,0xca62c1d6 +.long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f +.long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f +.byte 0xf,0xe,0xd,0xc,0xb,0xa,0x9,0x8,0x7,0x6,0x5,0x4,0x3,0x2,0x1,0x0 +.byte 83,72,65,49,32,109,117,108,116,105,45,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 diff --git a/deps/openssl/asm/x64-macosx-gas/sha/sha1-x86_64.s b/deps/openssl/asm/x64-macosx-gas/sha/sha1-x86_64.s index 9bb9bf0f222f81..c89ffe3df60f54 100644 --- a/deps/openssl/asm/x64-macosx-gas/sha/sha1-x86_64.s +++ b/deps/openssl/asm/x64-macosx-gas/sha/sha1-x86_64.s @@ -1,30 +1,41 @@ .text - .globl _sha1_block_data_order .p2align 4 _sha1_block_data_order: movl _OPENSSL_ia32cap_P+0(%rip),%r9d movl _OPENSSL_ia32cap_P+4(%rip),%r8d + movl _OPENSSL_ia32cap_P+8(%rip),%r10d testl $512,%r8d jz L$ialu + testl $536870912,%r10d + jnz _shaext_shortcut + andl $296,%r10d + cmpl $296,%r10d + je _avx2_shortcut + andl $268435456,%r8d + andl $1073741824,%r9d + orl %r9d,%r8d + cmpl $1342177280,%r8d + je _avx_shortcut jmp _ssse3_shortcut .p2align 4 L$ialu: + movq %rsp,%rax pushq %rbx pushq %rbp pushq %r12 pushq %r13 - movq %rsp,%r11 + pushq %r14 movq %rdi,%r8 subq $72,%rsp movq %rsi,%r9 andq $-64,%rsp movq %rdx,%r10 - movq %r11,64(%rsp) + movq %rax,64(%rsp) L$prologue: movl 0(%r8),%esi @@ -38,1230 +49,1168 @@ L$prologue: L$loop: movl 0(%r9),%edx bswapl %edx - movl %edx,0(%rsp) - movl %r11d,%eax movl 4(%r9),%ebp + movl %r12d,%eax + movl %edx,0(%rsp) movl %esi,%ecx - xorl %r12d,%eax bswapl %ebp + xorl %r11d,%eax roll $5,%ecx - leal 1518500249(%rdx,%r13,1),%r13d andl %edi,%eax - movl %ebp,4(%rsp) + leal 1518500249(%rdx,%r13,1),%r13d addl %ecx,%r13d xorl %r12d,%eax roll $30,%edi addl %eax,%r13d - movl %edi,%eax - movl 8(%r9),%edx + movl 8(%r9),%r14d + movl %r11d,%eax + movl %ebp,4(%rsp) movl %r13d,%ecx - xorl %r11d,%eax - bswapl %edx + bswapl %r14d + xorl %edi,%eax roll $5,%ecx - leal 1518500249(%rbp,%r12,1),%r12d andl %esi,%eax - movl %edx,8(%rsp) + leal 1518500249(%rbp,%r12,1),%r12d addl %ecx,%r12d xorl %r11d,%eax roll $30,%esi addl %eax,%r12d - movl %esi,%eax - movl 12(%r9),%ebp + movl 12(%r9),%edx + movl %edi,%eax + movl %r14d,8(%rsp) movl %r12d,%ecx - xorl %edi,%eax - bswapl %ebp + bswapl %edx + xorl %esi,%eax roll $5,%ecx - leal 1518500249(%rdx,%r11,1),%r11d andl %r13d,%eax - movl %ebp,12(%rsp) + leal 1518500249(%r14,%r11,1),%r11d addl %ecx,%r11d xorl %edi,%eax roll $30,%r13d addl %eax,%r11d - movl %r13d,%eax - movl 16(%r9),%edx + movl 16(%r9),%ebp + movl %esi,%eax + movl %edx,12(%rsp) movl %r11d,%ecx - xorl %esi,%eax - bswapl %edx + bswapl %ebp + xorl %r13d,%eax roll $5,%ecx - leal 1518500249(%rbp,%rdi,1),%edi andl %r12d,%eax - movl %edx,16(%rsp) + leal 1518500249(%rdx,%rdi,1),%edi addl %ecx,%edi xorl %esi,%eax roll $30,%r12d addl %eax,%edi - movl %r12d,%eax - movl 20(%r9),%ebp + movl 20(%r9),%r14d + movl %r13d,%eax + movl %ebp,16(%rsp) movl %edi,%ecx - xorl %r13d,%eax - bswapl %ebp + bswapl %r14d + xorl %r12d,%eax roll $5,%ecx - leal 1518500249(%rdx,%rsi,1),%esi andl %r11d,%eax - movl %ebp,20(%rsp) + leal 1518500249(%rbp,%rsi,1),%esi addl %ecx,%esi xorl %r13d,%eax roll $30,%r11d addl %eax,%esi - movl %r11d,%eax movl 24(%r9),%edx + movl %r12d,%eax + movl %r14d,20(%rsp) movl %esi,%ecx - xorl %r12d,%eax bswapl %edx + xorl %r11d,%eax roll $5,%ecx - leal 1518500249(%rbp,%r13,1),%r13d andl %edi,%eax - movl %edx,24(%rsp) + leal 1518500249(%r14,%r13,1),%r13d addl %ecx,%r13d xorl %r12d,%eax roll $30,%edi addl %eax,%r13d - movl %edi,%eax movl 28(%r9),%ebp + movl %r11d,%eax + movl %edx,24(%rsp) movl %r13d,%ecx - xorl %r11d,%eax bswapl %ebp + xorl %edi,%eax roll $5,%ecx - leal 1518500249(%rdx,%r12,1),%r12d andl %esi,%eax - movl %ebp,28(%rsp) + leal 1518500249(%rdx,%r12,1),%r12d addl %ecx,%r12d xorl %r11d,%eax roll $30,%esi addl %eax,%r12d - movl %esi,%eax - movl 32(%r9),%edx + movl 32(%r9),%r14d + movl %edi,%eax + movl %ebp,28(%rsp) movl %r12d,%ecx - xorl %edi,%eax - bswapl %edx + bswapl %r14d + xorl %esi,%eax roll $5,%ecx - leal 1518500249(%rbp,%r11,1),%r11d andl %r13d,%eax - movl %edx,32(%rsp) + leal 1518500249(%rbp,%r11,1),%r11d addl %ecx,%r11d xorl %edi,%eax roll $30,%r13d addl %eax,%r11d - movl %r13d,%eax - movl 36(%r9),%ebp + movl 36(%r9),%edx + movl %esi,%eax + movl %r14d,32(%rsp) movl %r11d,%ecx - xorl %esi,%eax - bswapl %ebp + bswapl %edx + xorl %r13d,%eax roll $5,%ecx - leal 1518500249(%rdx,%rdi,1),%edi andl %r12d,%eax - movl %ebp,36(%rsp) + leal 1518500249(%r14,%rdi,1),%edi addl %ecx,%edi xorl %esi,%eax roll $30,%r12d addl %eax,%edi - movl %r12d,%eax - movl 40(%r9),%edx + movl 40(%r9),%ebp + movl %r13d,%eax + movl %edx,36(%rsp) movl %edi,%ecx - xorl %r13d,%eax - bswapl %edx + bswapl %ebp + xorl %r12d,%eax roll $5,%ecx - leal 1518500249(%rbp,%rsi,1),%esi andl %r11d,%eax - movl %edx,40(%rsp) + leal 1518500249(%rdx,%rsi,1),%esi addl %ecx,%esi xorl %r13d,%eax roll $30,%r11d addl %eax,%esi - movl %r11d,%eax - movl 44(%r9),%ebp + movl 44(%r9),%r14d + movl %r12d,%eax + movl %ebp,40(%rsp) movl %esi,%ecx - xorl %r12d,%eax - bswapl %ebp + bswapl %r14d + xorl %r11d,%eax roll $5,%ecx - leal 1518500249(%rdx,%r13,1),%r13d andl %edi,%eax - movl %ebp,44(%rsp) + leal 1518500249(%rbp,%r13,1),%r13d addl %ecx,%r13d xorl %r12d,%eax roll $30,%edi addl %eax,%r13d - movl %edi,%eax movl 48(%r9),%edx + movl %r11d,%eax + movl %r14d,44(%rsp) movl %r13d,%ecx - xorl %r11d,%eax bswapl %edx + xorl %edi,%eax roll $5,%ecx - leal 1518500249(%rbp,%r12,1),%r12d andl %esi,%eax - movl %edx,48(%rsp) + leal 1518500249(%r14,%r12,1),%r12d addl %ecx,%r12d xorl %r11d,%eax roll $30,%esi addl %eax,%r12d - movl %esi,%eax movl 52(%r9),%ebp + movl %edi,%eax + movl %edx,48(%rsp) movl %r12d,%ecx - xorl %edi,%eax bswapl %ebp + xorl %esi,%eax roll $5,%ecx - leal 1518500249(%rdx,%r11,1),%r11d andl %r13d,%eax - movl %ebp,52(%rsp) + leal 1518500249(%rdx,%r11,1),%r11d addl %ecx,%r11d xorl %edi,%eax roll $30,%r13d addl %eax,%r11d - movl %r13d,%eax - movl 56(%r9),%edx + movl 56(%r9),%r14d + movl %esi,%eax + movl %ebp,52(%rsp) movl %r11d,%ecx - xorl %esi,%eax - bswapl %edx + bswapl %r14d + xorl %r13d,%eax roll $5,%ecx - leal 1518500249(%rbp,%rdi,1),%edi andl %r12d,%eax - movl %edx,56(%rsp) + leal 1518500249(%rbp,%rdi,1),%edi addl %ecx,%edi xorl %esi,%eax roll $30,%r12d addl %eax,%edi - movl %r12d,%eax - movl 60(%r9),%ebp + movl 60(%r9),%edx + movl %r13d,%eax + movl %r14d,56(%rsp) movl %edi,%ecx - xorl %r13d,%eax - bswapl %ebp + bswapl %edx + xorl %r12d,%eax roll $5,%ecx - leal 1518500249(%rdx,%rsi,1),%esi andl %r11d,%eax - movl %ebp,60(%rsp) + leal 1518500249(%r14,%rsi,1),%esi addl %ecx,%esi xorl %r13d,%eax roll $30,%r11d addl %eax,%esi - movl 0(%rsp),%edx - movl %r11d,%eax + xorl 0(%rsp),%ebp + movl %r12d,%eax + movl %edx,60(%rsp) movl %esi,%ecx - xorl 8(%rsp),%edx - xorl %r12d,%eax + xorl 8(%rsp),%ebp + xorl %r11d,%eax roll $5,%ecx - xorl 32(%rsp),%edx + xorl 32(%rsp),%ebp andl %edi,%eax - leal 1518500249(%rbp,%r13,1),%r13d - xorl 52(%rsp),%edx + leal 1518500249(%rdx,%r13,1),%r13d + roll $30,%edi xorl %r12d,%eax - roll $1,%edx addl %ecx,%r13d - roll $30,%edi - movl %edx,0(%rsp) + roll $1,%ebp addl %eax,%r13d - movl 4(%rsp),%ebp - movl %edi,%eax + xorl 4(%rsp),%r14d + movl %r11d,%eax + movl %ebp,0(%rsp) movl %r13d,%ecx - xorl 12(%rsp),%ebp - xorl %r11d,%eax + xorl 12(%rsp),%r14d + xorl %edi,%eax roll $5,%ecx - xorl 36(%rsp),%ebp + xorl 36(%rsp),%r14d andl %esi,%eax - leal 1518500249(%rdx,%r12,1),%r12d - xorl 56(%rsp),%ebp + leal 1518500249(%rbp,%r12,1),%r12d + roll $30,%esi xorl %r11d,%eax - roll $1,%ebp addl %ecx,%r12d - roll $30,%esi - movl %ebp,4(%rsp) + roll $1,%r14d addl %eax,%r12d - movl 8(%rsp),%edx - movl %esi,%eax + xorl 8(%rsp),%edx + movl %edi,%eax + movl %r14d,4(%rsp) movl %r12d,%ecx xorl 16(%rsp),%edx - xorl %edi,%eax + xorl %esi,%eax roll $5,%ecx xorl 40(%rsp),%edx andl %r13d,%eax - leal 1518500249(%rbp,%r11,1),%r11d - xorl 60(%rsp),%edx + leal 1518500249(%r14,%r11,1),%r11d + roll $30,%r13d xorl %edi,%eax - roll $1,%edx addl %ecx,%r11d - roll $30,%r13d - movl %edx,8(%rsp) + roll $1,%edx addl %eax,%r11d - movl 12(%rsp),%ebp - movl %r13d,%eax + xorl 12(%rsp),%ebp + movl %esi,%eax + movl %edx,8(%rsp) movl %r11d,%ecx xorl 20(%rsp),%ebp - xorl %esi,%eax + xorl %r13d,%eax roll $5,%ecx xorl 44(%rsp),%ebp andl %r12d,%eax leal 1518500249(%rdx,%rdi,1),%edi - xorl 0(%rsp),%ebp + roll $30,%r12d xorl %esi,%eax - roll $1,%ebp addl %ecx,%edi - roll $30,%r12d - movl %ebp,12(%rsp) + roll $1,%ebp addl %eax,%edi - movl 16(%rsp),%edx - movl %r12d,%eax + xorl 16(%rsp),%r14d + movl %r13d,%eax + movl %ebp,12(%rsp) movl %edi,%ecx - xorl 24(%rsp),%edx - xorl %r13d,%eax + xorl 24(%rsp),%r14d + xorl %r12d,%eax roll $5,%ecx - xorl 48(%rsp),%edx + xorl 48(%rsp),%r14d andl %r11d,%eax leal 1518500249(%rbp,%rsi,1),%esi - xorl 4(%rsp),%edx + roll $30,%r11d xorl %r13d,%eax - roll $1,%edx addl %ecx,%esi - roll $30,%r11d - movl %edx,16(%rsp) + roll $1,%r14d addl %eax,%esi - movl 20(%rsp),%ebp - movl %r11d,%eax + xorl 20(%rsp),%edx + movl %edi,%eax + movl %r14d,16(%rsp) movl %esi,%ecx - xorl 28(%rsp),%ebp - xorl %edi,%eax - roll $5,%ecx - leal 1859775393(%rdx,%r13,1),%r13d - xorl 52(%rsp),%ebp + xorl 28(%rsp),%edx xorl %r12d,%eax + roll $5,%ecx + xorl 52(%rsp),%edx + leal 1859775393(%r14,%r13,1),%r13d + xorl %r11d,%eax addl %ecx,%r13d - xorl 8(%rsp),%ebp roll $30,%edi addl %eax,%r13d - roll $1,%ebp - movl %ebp,20(%rsp) - movl 24(%rsp),%edx - movl %edi,%eax + roll $1,%edx + xorl 24(%rsp),%ebp + movl %esi,%eax + movl %edx,20(%rsp) movl %r13d,%ecx - xorl 32(%rsp),%edx - xorl %esi,%eax - roll $5,%ecx - leal 1859775393(%rbp,%r12,1),%r12d - xorl 56(%rsp),%edx + xorl 32(%rsp),%ebp xorl %r11d,%eax + roll $5,%ecx + xorl 56(%rsp),%ebp + leal 1859775393(%rdx,%r12,1),%r12d + xorl %edi,%eax addl %ecx,%r12d - xorl 12(%rsp),%edx roll $30,%esi addl %eax,%r12d - roll $1,%edx - movl %edx,24(%rsp) - movl 28(%rsp),%ebp - movl %esi,%eax + roll $1,%ebp + xorl 28(%rsp),%r14d + movl %r13d,%eax + movl %ebp,24(%rsp) movl %r12d,%ecx - xorl 36(%rsp),%ebp - xorl %r13d,%eax - roll $5,%ecx - leal 1859775393(%rdx,%r11,1),%r11d - xorl 60(%rsp),%ebp + xorl 36(%rsp),%r14d xorl %edi,%eax + roll $5,%ecx + xorl 60(%rsp),%r14d + leal 1859775393(%rbp,%r11,1),%r11d + xorl %esi,%eax addl %ecx,%r11d - xorl 16(%rsp),%ebp roll $30,%r13d addl %eax,%r11d - roll $1,%ebp - movl %ebp,28(%rsp) - movl 32(%rsp),%edx - movl %r13d,%eax + roll $1,%r14d + xorl 32(%rsp),%edx + movl %r12d,%eax + movl %r14d,28(%rsp) movl %r11d,%ecx xorl 40(%rsp),%edx - xorl %r12d,%eax + xorl %esi,%eax roll $5,%ecx - leal 1859775393(%rbp,%rdi,1),%edi xorl 0(%rsp),%edx - xorl %esi,%eax + leal 1859775393(%r14,%rdi,1),%edi + xorl %r13d,%eax addl %ecx,%edi - xorl 20(%rsp),%edx roll $30,%r12d addl %eax,%edi roll $1,%edx + xorl 36(%rsp),%ebp + movl %r11d,%eax movl %edx,32(%rsp) - movl 36(%rsp),%ebp - movl %r12d,%eax movl %edi,%ecx xorl 44(%rsp),%ebp - xorl %r11d,%eax + xorl %r13d,%eax roll $5,%ecx - leal 1859775393(%rdx,%rsi,1),%esi xorl 4(%rsp),%ebp - xorl %r13d,%eax + leal 1859775393(%rdx,%rsi,1),%esi + xorl %r12d,%eax addl %ecx,%esi - xorl 24(%rsp),%ebp roll $30,%r11d addl %eax,%esi roll $1,%ebp + xorl 40(%rsp),%r14d + movl %edi,%eax movl %ebp,36(%rsp) - movl 40(%rsp),%edx - movl %r11d,%eax movl %esi,%ecx - xorl 48(%rsp),%edx - xorl %edi,%eax + xorl 48(%rsp),%r14d + xorl %r12d,%eax roll $5,%ecx + xorl 8(%rsp),%r14d leal 1859775393(%rbp,%r13,1),%r13d - xorl 8(%rsp),%edx - xorl %r12d,%eax + xorl %r11d,%eax addl %ecx,%r13d - xorl 28(%rsp),%edx roll $30,%edi addl %eax,%r13d - roll $1,%edx - movl %edx,40(%rsp) - movl 44(%rsp),%ebp - movl %edi,%eax + roll $1,%r14d + xorl 44(%rsp),%edx + movl %esi,%eax + movl %r14d,40(%rsp) movl %r13d,%ecx - xorl 52(%rsp),%ebp - xorl %esi,%eax - roll $5,%ecx - leal 1859775393(%rdx,%r12,1),%r12d - xorl 12(%rsp),%ebp + xorl 52(%rsp),%edx xorl %r11d,%eax + roll $5,%ecx + xorl 12(%rsp),%edx + leal 1859775393(%r14,%r12,1),%r12d + xorl %edi,%eax addl %ecx,%r12d - xorl 32(%rsp),%ebp roll $30,%esi addl %eax,%r12d - roll $1,%ebp - movl %ebp,44(%rsp) - movl 48(%rsp),%edx - movl %esi,%eax + roll $1,%edx + xorl 48(%rsp),%ebp + movl %r13d,%eax + movl %edx,44(%rsp) movl %r12d,%ecx - xorl 56(%rsp),%edx - xorl %r13d,%eax - roll $5,%ecx - leal 1859775393(%rbp,%r11,1),%r11d - xorl 16(%rsp),%edx + xorl 56(%rsp),%ebp xorl %edi,%eax + roll $5,%ecx + xorl 16(%rsp),%ebp + leal 1859775393(%rdx,%r11,1),%r11d + xorl %esi,%eax addl %ecx,%r11d - xorl 36(%rsp),%edx roll $30,%r13d addl %eax,%r11d - roll $1,%edx - movl %edx,48(%rsp) - movl 52(%rsp),%ebp - movl %r13d,%eax + roll $1,%ebp + xorl 52(%rsp),%r14d + movl %r12d,%eax + movl %ebp,48(%rsp) movl %r11d,%ecx - xorl 60(%rsp),%ebp - xorl %r12d,%eax - roll $5,%ecx - leal 1859775393(%rdx,%rdi,1),%edi - xorl 20(%rsp),%ebp + xorl 60(%rsp),%r14d xorl %esi,%eax + roll $5,%ecx + xorl 20(%rsp),%r14d + leal 1859775393(%rbp,%rdi,1),%edi + xorl %r13d,%eax addl %ecx,%edi - xorl 40(%rsp),%ebp roll $30,%r12d addl %eax,%edi - roll $1,%ebp - movl %ebp,52(%rsp) - movl 56(%rsp),%edx - movl %r12d,%eax + roll $1,%r14d + xorl 56(%rsp),%edx + movl %r11d,%eax + movl %r14d,52(%rsp) movl %edi,%ecx xorl 0(%rsp),%edx - xorl %r11d,%eax + xorl %r13d,%eax roll $5,%ecx - leal 1859775393(%rbp,%rsi,1),%esi xorl 24(%rsp),%edx - xorl %r13d,%eax + leal 1859775393(%r14,%rsi,1),%esi + xorl %r12d,%eax addl %ecx,%esi - xorl 44(%rsp),%edx roll $30,%r11d addl %eax,%esi roll $1,%edx + xorl 60(%rsp),%ebp + movl %edi,%eax movl %edx,56(%rsp) - movl 60(%rsp),%ebp - movl %r11d,%eax movl %esi,%ecx xorl 4(%rsp),%ebp - xorl %edi,%eax + xorl %r12d,%eax roll $5,%ecx - leal 1859775393(%rdx,%r13,1),%r13d xorl 28(%rsp),%ebp - xorl %r12d,%eax + leal 1859775393(%rdx,%r13,1),%r13d + xorl %r11d,%eax addl %ecx,%r13d - xorl 48(%rsp),%ebp roll $30,%edi addl %eax,%r13d roll $1,%ebp + xorl 0(%rsp),%r14d + movl %esi,%eax movl %ebp,60(%rsp) - movl 0(%rsp),%edx - movl %edi,%eax movl %r13d,%ecx - xorl 8(%rsp),%edx - xorl %esi,%eax + xorl 8(%rsp),%r14d + xorl %r11d,%eax roll $5,%ecx + xorl 32(%rsp),%r14d leal 1859775393(%rbp,%r12,1),%r12d - xorl 32(%rsp),%edx - xorl %r11d,%eax + xorl %edi,%eax addl %ecx,%r12d - xorl 52(%rsp),%edx roll $30,%esi addl %eax,%r12d - roll $1,%edx - movl %edx,0(%rsp) - movl 4(%rsp),%ebp - movl %esi,%eax + roll $1,%r14d + xorl 4(%rsp),%edx + movl %r13d,%eax + movl %r14d,0(%rsp) movl %r12d,%ecx - xorl 12(%rsp),%ebp - xorl %r13d,%eax - roll $5,%ecx - leal 1859775393(%rdx,%r11,1),%r11d - xorl 36(%rsp),%ebp + xorl 12(%rsp),%edx xorl %edi,%eax + roll $5,%ecx + xorl 36(%rsp),%edx + leal 1859775393(%r14,%r11,1),%r11d + xorl %esi,%eax addl %ecx,%r11d - xorl 56(%rsp),%ebp roll $30,%r13d addl %eax,%r11d - roll $1,%ebp - movl %ebp,4(%rsp) - movl 8(%rsp),%edx - movl %r13d,%eax + roll $1,%edx + xorl 8(%rsp),%ebp + movl %r12d,%eax + movl %edx,4(%rsp) movl %r11d,%ecx - xorl 16(%rsp),%edx - xorl %r12d,%eax - roll $5,%ecx - leal 1859775393(%rbp,%rdi,1),%edi - xorl 40(%rsp),%edx + xorl 16(%rsp),%ebp xorl %esi,%eax + roll $5,%ecx + xorl 40(%rsp),%ebp + leal 1859775393(%rdx,%rdi,1),%edi + xorl %r13d,%eax addl %ecx,%edi - xorl 60(%rsp),%edx roll $30,%r12d addl %eax,%edi - roll $1,%edx - movl %edx,8(%rsp) - movl 12(%rsp),%ebp - movl %r12d,%eax + roll $1,%ebp + xorl 12(%rsp),%r14d + movl %r11d,%eax + movl %ebp,8(%rsp) movl %edi,%ecx - xorl 20(%rsp),%ebp - xorl %r11d,%eax - roll $5,%ecx - leal 1859775393(%rdx,%rsi,1),%esi - xorl 44(%rsp),%ebp + xorl 20(%rsp),%r14d xorl %r13d,%eax + roll $5,%ecx + xorl 44(%rsp),%r14d + leal 1859775393(%rbp,%rsi,1),%esi + xorl %r12d,%eax addl %ecx,%esi - xorl 0(%rsp),%ebp roll $30,%r11d addl %eax,%esi - roll $1,%ebp - movl %ebp,12(%rsp) - movl 16(%rsp),%edx - movl %r11d,%eax + roll $1,%r14d + xorl 16(%rsp),%edx + movl %edi,%eax + movl %r14d,12(%rsp) movl %esi,%ecx xorl 24(%rsp),%edx - xorl %edi,%eax + xorl %r12d,%eax roll $5,%ecx - leal 1859775393(%rbp,%r13,1),%r13d xorl 48(%rsp),%edx - xorl %r12d,%eax + leal 1859775393(%r14,%r13,1),%r13d + xorl %r11d,%eax addl %ecx,%r13d - xorl 4(%rsp),%edx roll $30,%edi addl %eax,%r13d roll $1,%edx + xorl 20(%rsp),%ebp + movl %esi,%eax movl %edx,16(%rsp) - movl 20(%rsp),%ebp - movl %edi,%eax movl %r13d,%ecx xorl 28(%rsp),%ebp - xorl %esi,%eax + xorl %r11d,%eax roll $5,%ecx - leal 1859775393(%rdx,%r12,1),%r12d xorl 52(%rsp),%ebp - xorl %r11d,%eax + leal 1859775393(%rdx,%r12,1),%r12d + xorl %edi,%eax addl %ecx,%r12d - xorl 8(%rsp),%ebp roll $30,%esi addl %eax,%r12d roll $1,%ebp + xorl 24(%rsp),%r14d + movl %r13d,%eax movl %ebp,20(%rsp) - movl 24(%rsp),%edx - movl %esi,%eax movl %r12d,%ecx - xorl 32(%rsp),%edx - xorl %r13d,%eax + xorl 32(%rsp),%r14d + xorl %edi,%eax roll $5,%ecx + xorl 56(%rsp),%r14d leal 1859775393(%rbp,%r11,1),%r11d - xorl 56(%rsp),%edx - xorl %edi,%eax + xorl %esi,%eax addl %ecx,%r11d - xorl 12(%rsp),%edx roll $30,%r13d addl %eax,%r11d - roll $1,%edx - movl %edx,24(%rsp) - movl 28(%rsp),%ebp - movl %r13d,%eax + roll $1,%r14d + xorl 28(%rsp),%edx + movl %r12d,%eax + movl %r14d,24(%rsp) movl %r11d,%ecx - xorl 36(%rsp),%ebp - xorl %r12d,%eax - roll $5,%ecx - leal 1859775393(%rdx,%rdi,1),%edi - xorl 60(%rsp),%ebp + xorl 36(%rsp),%edx xorl %esi,%eax + roll $5,%ecx + xorl 60(%rsp),%edx + leal 1859775393(%r14,%rdi,1),%edi + xorl %r13d,%eax addl %ecx,%edi - xorl 16(%rsp),%ebp roll $30,%r12d addl %eax,%edi - roll $1,%ebp - movl %ebp,28(%rsp) - movl 32(%rsp),%edx - movl %r12d,%eax + roll $1,%edx + xorl 32(%rsp),%ebp + movl %r11d,%eax + movl %edx,28(%rsp) movl %edi,%ecx - xorl 40(%rsp),%edx - xorl %r11d,%eax - roll $5,%ecx - leal 1859775393(%rbp,%rsi,1),%esi - xorl 0(%rsp),%edx + xorl 40(%rsp),%ebp xorl %r13d,%eax + roll $5,%ecx + xorl 0(%rsp),%ebp + leal 1859775393(%rdx,%rsi,1),%esi + xorl %r12d,%eax addl %ecx,%esi - xorl 20(%rsp),%edx roll $30,%r11d addl %eax,%esi - roll $1,%edx - movl %edx,32(%rsp) - movl 36(%rsp),%ebp - movl %r11d,%eax - movl %r11d,%ebx - xorl 44(%rsp),%ebp - andl %r12d,%eax + roll $1,%ebp + xorl 36(%rsp),%r14d + movl %r12d,%eax + movl %ebp,32(%rsp) + movl %r12d,%ebx + xorl 44(%rsp),%r14d + andl %r11d,%eax movl %esi,%ecx - xorl 4(%rsp),%ebp - xorl %r12d,%ebx - leal -1894007588(%rdx,%r13,1),%r13d + xorl 4(%rsp),%r14d + leal -1894007588(%rbp,%r13,1),%r13d + xorl %r11d,%ebx roll $5,%ecx - xorl 24(%rsp),%ebp addl %eax,%r13d + roll $1,%r14d andl %edi,%ebx - roll $1,%ebp - addl %ebx,%r13d - roll $30,%edi - movl %ebp,36(%rsp) addl %ecx,%r13d - movl 40(%rsp),%edx - movl %edi,%eax - movl %edi,%ebx + roll $30,%edi + addl %ebx,%r13d + xorl 40(%rsp),%edx + movl %r11d,%eax + movl %r14d,36(%rsp) + movl %r11d,%ebx xorl 48(%rsp),%edx - andl %r11d,%eax + andl %edi,%eax movl %r13d,%ecx xorl 8(%rsp),%edx - xorl %r11d,%ebx - leal -1894007588(%rbp,%r12,1),%r12d + leal -1894007588(%r14,%r12,1),%r12d + xorl %edi,%ebx roll $5,%ecx - xorl 28(%rsp),%edx addl %eax,%r12d - andl %esi,%ebx roll $1,%edx - addl %ebx,%r12d + andl %esi,%ebx + addl %ecx,%r12d roll $30,%esi + addl %ebx,%r12d + xorl 44(%rsp),%ebp + movl %edi,%eax movl %edx,40(%rsp) - addl %ecx,%r12d - movl 44(%rsp),%ebp - movl %esi,%eax - movl %esi,%ebx + movl %edi,%ebx xorl 52(%rsp),%ebp - andl %edi,%eax + andl %esi,%eax movl %r12d,%ecx xorl 12(%rsp),%ebp - xorl %edi,%ebx leal -1894007588(%rdx,%r11,1),%r11d + xorl %esi,%ebx roll $5,%ecx - xorl 32(%rsp),%ebp addl %eax,%r11d - andl %r13d,%ebx roll $1,%ebp - addl %ebx,%r11d + andl %r13d,%ebx + addl %ecx,%r11d roll $30,%r13d + addl %ebx,%r11d + xorl 48(%rsp),%r14d + movl %esi,%eax movl %ebp,44(%rsp) - addl %ecx,%r11d - movl 48(%rsp),%edx - movl %r13d,%eax - movl %r13d,%ebx - xorl 56(%rsp),%edx - andl %esi,%eax + movl %esi,%ebx + xorl 56(%rsp),%r14d + andl %r13d,%eax movl %r11d,%ecx - xorl 16(%rsp),%edx - xorl %esi,%ebx + xorl 16(%rsp),%r14d leal -1894007588(%rbp,%rdi,1),%edi + xorl %r13d,%ebx roll $5,%ecx - xorl 36(%rsp),%edx addl %eax,%edi + roll $1,%r14d andl %r12d,%ebx - roll $1,%edx - addl %ebx,%edi - roll $30,%r12d - movl %edx,48(%rsp) addl %ecx,%edi - movl 52(%rsp),%ebp - movl %r12d,%eax - movl %r12d,%ebx - xorl 60(%rsp),%ebp - andl %r13d,%eax + roll $30,%r12d + addl %ebx,%edi + xorl 52(%rsp),%edx + movl %r13d,%eax + movl %r14d,48(%rsp) + movl %r13d,%ebx + xorl 60(%rsp),%edx + andl %r12d,%eax movl %edi,%ecx - xorl 20(%rsp),%ebp - xorl %r13d,%ebx - leal -1894007588(%rdx,%rsi,1),%esi + xorl 20(%rsp),%edx + leal -1894007588(%r14,%rsi,1),%esi + xorl %r12d,%ebx roll $5,%ecx - xorl 40(%rsp),%ebp addl %eax,%esi + roll $1,%edx andl %r11d,%ebx - roll $1,%ebp - addl %ebx,%esi - roll $30,%r11d - movl %ebp,52(%rsp) addl %ecx,%esi - movl 56(%rsp),%edx - movl %r11d,%eax - movl %r11d,%ebx - xorl 0(%rsp),%edx - andl %r12d,%eax + roll $30,%r11d + addl %ebx,%esi + xorl 56(%rsp),%ebp + movl %r12d,%eax + movl %edx,52(%rsp) + movl %r12d,%ebx + xorl 0(%rsp),%ebp + andl %r11d,%eax movl %esi,%ecx - xorl 24(%rsp),%edx - xorl %r12d,%ebx - leal -1894007588(%rbp,%r13,1),%r13d + xorl 24(%rsp),%ebp + leal -1894007588(%rdx,%r13,1),%r13d + xorl %r11d,%ebx roll $5,%ecx - xorl 44(%rsp),%edx addl %eax,%r13d + roll $1,%ebp andl %edi,%ebx - roll $1,%edx - addl %ebx,%r13d - roll $30,%edi - movl %edx,56(%rsp) addl %ecx,%r13d - movl 60(%rsp),%ebp - movl %edi,%eax - movl %edi,%ebx - xorl 4(%rsp),%ebp - andl %r11d,%eax + roll $30,%edi + addl %ebx,%r13d + xorl 60(%rsp),%r14d + movl %r11d,%eax + movl %ebp,56(%rsp) + movl %r11d,%ebx + xorl 4(%rsp),%r14d + andl %edi,%eax movl %r13d,%ecx - xorl 28(%rsp),%ebp - xorl %r11d,%ebx - leal -1894007588(%rdx,%r12,1),%r12d + xorl 28(%rsp),%r14d + leal -1894007588(%rbp,%r12,1),%r12d + xorl %edi,%ebx roll $5,%ecx - xorl 48(%rsp),%ebp addl %eax,%r12d + roll $1,%r14d andl %esi,%ebx - roll $1,%ebp - addl %ebx,%r12d - roll $30,%esi - movl %ebp,60(%rsp) addl %ecx,%r12d - movl 0(%rsp),%edx - movl %esi,%eax - movl %esi,%ebx + roll $30,%esi + addl %ebx,%r12d + xorl 0(%rsp),%edx + movl %edi,%eax + movl %r14d,60(%rsp) + movl %edi,%ebx xorl 8(%rsp),%edx - andl %edi,%eax + andl %esi,%eax movl %r12d,%ecx xorl 32(%rsp),%edx - xorl %edi,%ebx - leal -1894007588(%rbp,%r11,1),%r11d + leal -1894007588(%r14,%r11,1),%r11d + xorl %esi,%ebx roll $5,%ecx - xorl 52(%rsp),%edx addl %eax,%r11d - andl %r13d,%ebx roll $1,%edx - addl %ebx,%r11d + andl %r13d,%ebx + addl %ecx,%r11d roll $30,%r13d + addl %ebx,%r11d + xorl 4(%rsp),%ebp + movl %esi,%eax movl %edx,0(%rsp) - addl %ecx,%r11d - movl 4(%rsp),%ebp - movl %r13d,%eax - movl %r13d,%ebx + movl %esi,%ebx xorl 12(%rsp),%ebp - andl %esi,%eax + andl %r13d,%eax movl %r11d,%ecx xorl 36(%rsp),%ebp - xorl %esi,%ebx leal -1894007588(%rdx,%rdi,1),%edi + xorl %r13d,%ebx roll $5,%ecx - xorl 56(%rsp),%ebp addl %eax,%edi - andl %r12d,%ebx roll $1,%ebp - addl %ebx,%edi + andl %r12d,%ebx + addl %ecx,%edi roll $30,%r12d + addl %ebx,%edi + xorl 8(%rsp),%r14d + movl %r13d,%eax movl %ebp,4(%rsp) - addl %ecx,%edi - movl 8(%rsp),%edx - movl %r12d,%eax - movl %r12d,%ebx - xorl 16(%rsp),%edx - andl %r13d,%eax + movl %r13d,%ebx + xorl 16(%rsp),%r14d + andl %r12d,%eax movl %edi,%ecx - xorl 40(%rsp),%edx - xorl %r13d,%ebx + xorl 40(%rsp),%r14d leal -1894007588(%rbp,%rsi,1),%esi + xorl %r12d,%ebx roll $5,%ecx - xorl 60(%rsp),%edx addl %eax,%esi + roll $1,%r14d andl %r11d,%ebx - roll $1,%edx - addl %ebx,%esi - roll $30,%r11d - movl %edx,8(%rsp) addl %ecx,%esi - movl 12(%rsp),%ebp - movl %r11d,%eax - movl %r11d,%ebx - xorl 20(%rsp),%ebp - andl %r12d,%eax + roll $30,%r11d + addl %ebx,%esi + xorl 12(%rsp),%edx + movl %r12d,%eax + movl %r14d,8(%rsp) + movl %r12d,%ebx + xorl 20(%rsp),%edx + andl %r11d,%eax movl %esi,%ecx - xorl 44(%rsp),%ebp - xorl %r12d,%ebx - leal -1894007588(%rdx,%r13,1),%r13d + xorl 44(%rsp),%edx + leal -1894007588(%r14,%r13,1),%r13d + xorl %r11d,%ebx roll $5,%ecx - xorl 0(%rsp),%ebp addl %eax,%r13d + roll $1,%edx andl %edi,%ebx - roll $1,%ebp - addl %ebx,%r13d - roll $30,%edi - movl %ebp,12(%rsp) addl %ecx,%r13d - movl 16(%rsp),%edx - movl %edi,%eax - movl %edi,%ebx - xorl 24(%rsp),%edx - andl %r11d,%eax + roll $30,%edi + addl %ebx,%r13d + xorl 16(%rsp),%ebp + movl %r11d,%eax + movl %edx,12(%rsp) + movl %r11d,%ebx + xorl 24(%rsp),%ebp + andl %edi,%eax movl %r13d,%ecx - xorl 48(%rsp),%edx - xorl %r11d,%ebx - leal -1894007588(%rbp,%r12,1),%r12d + xorl 48(%rsp),%ebp + leal -1894007588(%rdx,%r12,1),%r12d + xorl %edi,%ebx roll $5,%ecx - xorl 4(%rsp),%edx addl %eax,%r12d + roll $1,%ebp andl %esi,%ebx - roll $1,%edx - addl %ebx,%r12d - roll $30,%esi - movl %edx,16(%rsp) addl %ecx,%r12d - movl 20(%rsp),%ebp - movl %esi,%eax - movl %esi,%ebx - xorl 28(%rsp),%ebp - andl %edi,%eax + roll $30,%esi + addl %ebx,%r12d + xorl 20(%rsp),%r14d + movl %edi,%eax + movl %ebp,16(%rsp) + movl %edi,%ebx + xorl 28(%rsp),%r14d + andl %esi,%eax movl %r12d,%ecx - xorl 52(%rsp),%ebp - xorl %edi,%ebx - leal -1894007588(%rdx,%r11,1),%r11d + xorl 52(%rsp),%r14d + leal -1894007588(%rbp,%r11,1),%r11d + xorl %esi,%ebx roll $5,%ecx - xorl 8(%rsp),%ebp addl %eax,%r11d + roll $1,%r14d andl %r13d,%ebx - roll $1,%ebp - addl %ebx,%r11d - roll $30,%r13d - movl %ebp,20(%rsp) addl %ecx,%r11d - movl 24(%rsp),%edx - movl %r13d,%eax - movl %r13d,%ebx + roll $30,%r13d + addl %ebx,%r11d + xorl 24(%rsp),%edx + movl %esi,%eax + movl %r14d,20(%rsp) + movl %esi,%ebx xorl 32(%rsp),%edx - andl %esi,%eax + andl %r13d,%eax movl %r11d,%ecx xorl 56(%rsp),%edx - xorl %esi,%ebx - leal -1894007588(%rbp,%rdi,1),%edi + leal -1894007588(%r14,%rdi,1),%edi + xorl %r13d,%ebx roll $5,%ecx - xorl 12(%rsp),%edx addl %eax,%edi - andl %r12d,%ebx roll $1,%edx - addl %ebx,%edi + andl %r12d,%ebx + addl %ecx,%edi roll $30,%r12d + addl %ebx,%edi + xorl 28(%rsp),%ebp + movl %r13d,%eax movl %edx,24(%rsp) - addl %ecx,%edi - movl 28(%rsp),%ebp - movl %r12d,%eax - movl %r12d,%ebx + movl %r13d,%ebx xorl 36(%rsp),%ebp - andl %r13d,%eax + andl %r12d,%eax movl %edi,%ecx xorl 60(%rsp),%ebp - xorl %r13d,%ebx leal -1894007588(%rdx,%rsi,1),%esi + xorl %r12d,%ebx roll $5,%ecx - xorl 16(%rsp),%ebp addl %eax,%esi - andl %r11d,%ebx roll $1,%ebp - addl %ebx,%esi + andl %r11d,%ebx + addl %ecx,%esi roll $30,%r11d + addl %ebx,%esi + xorl 32(%rsp),%r14d + movl %r12d,%eax movl %ebp,28(%rsp) - addl %ecx,%esi - movl 32(%rsp),%edx - movl %r11d,%eax - movl %r11d,%ebx - xorl 40(%rsp),%edx - andl %r12d,%eax + movl %r12d,%ebx + xorl 40(%rsp),%r14d + andl %r11d,%eax movl %esi,%ecx - xorl 0(%rsp),%edx - xorl %r12d,%ebx + xorl 0(%rsp),%r14d leal -1894007588(%rbp,%r13,1),%r13d + xorl %r11d,%ebx roll $5,%ecx - xorl 20(%rsp),%edx addl %eax,%r13d + roll $1,%r14d andl %edi,%ebx - roll $1,%edx - addl %ebx,%r13d - roll $30,%edi - movl %edx,32(%rsp) addl %ecx,%r13d - movl 36(%rsp),%ebp - movl %edi,%eax - movl %edi,%ebx - xorl 44(%rsp),%ebp - andl %r11d,%eax + roll $30,%edi + addl %ebx,%r13d + xorl 36(%rsp),%edx + movl %r11d,%eax + movl %r14d,32(%rsp) + movl %r11d,%ebx + xorl 44(%rsp),%edx + andl %edi,%eax movl %r13d,%ecx - xorl 4(%rsp),%ebp - xorl %r11d,%ebx - leal -1894007588(%rdx,%r12,1),%r12d + xorl 4(%rsp),%edx + leal -1894007588(%r14,%r12,1),%r12d + xorl %edi,%ebx roll $5,%ecx - xorl 24(%rsp),%ebp addl %eax,%r12d + roll $1,%edx andl %esi,%ebx - roll $1,%ebp - addl %ebx,%r12d - roll $30,%esi - movl %ebp,36(%rsp) addl %ecx,%r12d - movl 40(%rsp),%edx - movl %esi,%eax - movl %esi,%ebx - xorl 48(%rsp),%edx - andl %edi,%eax + roll $30,%esi + addl %ebx,%r12d + xorl 40(%rsp),%ebp + movl %edi,%eax + movl %edx,36(%rsp) + movl %edi,%ebx + xorl 48(%rsp),%ebp + andl %esi,%eax movl %r12d,%ecx - xorl 8(%rsp),%edx - xorl %edi,%ebx - leal -1894007588(%rbp,%r11,1),%r11d + xorl 8(%rsp),%ebp + leal -1894007588(%rdx,%r11,1),%r11d + xorl %esi,%ebx roll $5,%ecx - xorl 28(%rsp),%edx addl %eax,%r11d + roll $1,%ebp andl %r13d,%ebx - roll $1,%edx - addl %ebx,%r11d - roll $30,%r13d - movl %edx,40(%rsp) addl %ecx,%r11d - movl 44(%rsp),%ebp - movl %r13d,%eax - movl %r13d,%ebx - xorl 52(%rsp),%ebp - andl %esi,%eax + roll $30,%r13d + addl %ebx,%r11d + xorl 44(%rsp),%r14d + movl %esi,%eax + movl %ebp,40(%rsp) + movl %esi,%ebx + xorl 52(%rsp),%r14d + andl %r13d,%eax movl %r11d,%ecx - xorl 12(%rsp),%ebp - xorl %esi,%ebx - leal -1894007588(%rdx,%rdi,1),%edi + xorl 12(%rsp),%r14d + leal -1894007588(%rbp,%rdi,1),%edi + xorl %r13d,%ebx roll $5,%ecx - xorl 32(%rsp),%ebp addl %eax,%edi + roll $1,%r14d andl %r12d,%ebx - roll $1,%ebp - addl %ebx,%edi - roll $30,%r12d - movl %ebp,44(%rsp) addl %ecx,%edi - movl 48(%rsp),%edx - movl %r12d,%eax - movl %r12d,%ebx + roll $30,%r12d + addl %ebx,%edi + xorl 48(%rsp),%edx + movl %r13d,%eax + movl %r14d,44(%rsp) + movl %r13d,%ebx xorl 56(%rsp),%edx - andl %r13d,%eax + andl %r12d,%eax movl %edi,%ecx xorl 16(%rsp),%edx - xorl %r13d,%ebx - leal -1894007588(%rbp,%rsi,1),%esi + leal -1894007588(%r14,%rsi,1),%esi + xorl %r12d,%ebx roll $5,%ecx - xorl 36(%rsp),%edx addl %eax,%esi - andl %r11d,%ebx roll $1,%edx - addl %ebx,%esi + andl %r11d,%ebx + addl %ecx,%esi roll $30,%r11d + addl %ebx,%esi + xorl 52(%rsp),%ebp + movl %edi,%eax movl %edx,48(%rsp) - addl %ecx,%esi - movl 52(%rsp),%ebp - movl %r11d,%eax movl %esi,%ecx xorl 60(%rsp),%ebp - xorl %edi,%eax + xorl %r12d,%eax roll $5,%ecx - leal -899497514(%rdx,%r13,1),%r13d xorl 20(%rsp),%ebp - xorl %r12d,%eax + leal -899497514(%rdx,%r13,1),%r13d + xorl %r11d,%eax addl %ecx,%r13d - xorl 40(%rsp),%ebp roll $30,%edi addl %eax,%r13d roll $1,%ebp + xorl 56(%rsp),%r14d + movl %esi,%eax movl %ebp,52(%rsp) - movl 56(%rsp),%edx - movl %edi,%eax movl %r13d,%ecx - xorl 0(%rsp),%edx - xorl %esi,%eax + xorl 0(%rsp),%r14d + xorl %r11d,%eax roll $5,%ecx + xorl 24(%rsp),%r14d leal -899497514(%rbp,%r12,1),%r12d - xorl 24(%rsp),%edx - xorl %r11d,%eax + xorl %edi,%eax addl %ecx,%r12d - xorl 44(%rsp),%edx roll $30,%esi addl %eax,%r12d - roll $1,%edx - movl %edx,56(%rsp) - movl 60(%rsp),%ebp - movl %esi,%eax + roll $1,%r14d + xorl 60(%rsp),%edx + movl %r13d,%eax + movl %r14d,56(%rsp) movl %r12d,%ecx - xorl 4(%rsp),%ebp - xorl %r13d,%eax - roll $5,%ecx - leal -899497514(%rdx,%r11,1),%r11d - xorl 28(%rsp),%ebp + xorl 4(%rsp),%edx xorl %edi,%eax + roll $5,%ecx + xorl 28(%rsp),%edx + leal -899497514(%r14,%r11,1),%r11d + xorl %esi,%eax addl %ecx,%r11d - xorl 48(%rsp),%ebp roll $30,%r13d addl %eax,%r11d - roll $1,%ebp - movl %ebp,60(%rsp) - movl 0(%rsp),%edx - movl %r13d,%eax + roll $1,%edx + xorl 0(%rsp),%ebp + movl %r12d,%eax + movl %edx,60(%rsp) movl %r11d,%ecx - xorl 8(%rsp),%edx - xorl %r12d,%eax - roll $5,%ecx - leal -899497514(%rbp,%rdi,1),%edi - xorl 32(%rsp),%edx + xorl 8(%rsp),%ebp xorl %esi,%eax + roll $5,%ecx + xorl 32(%rsp),%ebp + leal -899497514(%rdx,%rdi,1),%edi + xorl %r13d,%eax addl %ecx,%edi - xorl 52(%rsp),%edx roll $30,%r12d addl %eax,%edi - roll $1,%edx - movl %edx,0(%rsp) - movl 4(%rsp),%ebp - movl %r12d,%eax + roll $1,%ebp + xorl 4(%rsp),%r14d + movl %r11d,%eax + movl %ebp,0(%rsp) movl %edi,%ecx - xorl 12(%rsp),%ebp - xorl %r11d,%eax - roll $5,%ecx - leal -899497514(%rdx,%rsi,1),%esi - xorl 36(%rsp),%ebp + xorl 12(%rsp),%r14d xorl %r13d,%eax + roll $5,%ecx + xorl 36(%rsp),%r14d + leal -899497514(%rbp,%rsi,1),%esi + xorl %r12d,%eax addl %ecx,%esi - xorl 56(%rsp),%ebp roll $30,%r11d addl %eax,%esi - roll $1,%ebp - movl %ebp,4(%rsp) - movl 8(%rsp),%edx - movl %r11d,%eax + roll $1,%r14d + xorl 8(%rsp),%edx + movl %edi,%eax + movl %r14d,4(%rsp) movl %esi,%ecx xorl 16(%rsp),%edx - xorl %edi,%eax + xorl %r12d,%eax roll $5,%ecx - leal -899497514(%rbp,%r13,1),%r13d xorl 40(%rsp),%edx - xorl %r12d,%eax + leal -899497514(%r14,%r13,1),%r13d + xorl %r11d,%eax addl %ecx,%r13d - xorl 60(%rsp),%edx roll $30,%edi addl %eax,%r13d roll $1,%edx + xorl 12(%rsp),%ebp + movl %esi,%eax movl %edx,8(%rsp) - movl 12(%rsp),%ebp - movl %edi,%eax movl %r13d,%ecx xorl 20(%rsp),%ebp - xorl %esi,%eax + xorl %r11d,%eax roll $5,%ecx - leal -899497514(%rdx,%r12,1),%r12d xorl 44(%rsp),%ebp - xorl %r11d,%eax + leal -899497514(%rdx,%r12,1),%r12d + xorl %edi,%eax addl %ecx,%r12d - xorl 0(%rsp),%ebp roll $30,%esi addl %eax,%r12d roll $1,%ebp + xorl 16(%rsp),%r14d + movl %r13d,%eax movl %ebp,12(%rsp) - movl 16(%rsp),%edx - movl %esi,%eax movl %r12d,%ecx - xorl 24(%rsp),%edx - xorl %r13d,%eax + xorl 24(%rsp),%r14d + xorl %edi,%eax roll $5,%ecx + xorl 48(%rsp),%r14d leal -899497514(%rbp,%r11,1),%r11d - xorl 48(%rsp),%edx - xorl %edi,%eax + xorl %esi,%eax addl %ecx,%r11d - xorl 4(%rsp),%edx roll $30,%r13d addl %eax,%r11d - roll $1,%edx - movl %edx,16(%rsp) - movl 20(%rsp),%ebp - movl %r13d,%eax + roll $1,%r14d + xorl 20(%rsp),%edx + movl %r12d,%eax + movl %r14d,16(%rsp) movl %r11d,%ecx - xorl 28(%rsp),%ebp - xorl %r12d,%eax - roll $5,%ecx - leal -899497514(%rdx,%rdi,1),%edi - xorl 52(%rsp),%ebp + xorl 28(%rsp),%edx xorl %esi,%eax + roll $5,%ecx + xorl 52(%rsp),%edx + leal -899497514(%r14,%rdi,1),%edi + xorl %r13d,%eax addl %ecx,%edi - xorl 8(%rsp),%ebp roll $30,%r12d addl %eax,%edi - roll $1,%ebp - movl %ebp,20(%rsp) - movl 24(%rsp),%edx - movl %r12d,%eax + roll $1,%edx + xorl 24(%rsp),%ebp + movl %r11d,%eax + movl %edx,20(%rsp) movl %edi,%ecx - xorl 32(%rsp),%edx - xorl %r11d,%eax - roll $5,%ecx - leal -899497514(%rbp,%rsi,1),%esi - xorl 56(%rsp),%edx + xorl 32(%rsp),%ebp xorl %r13d,%eax + roll $5,%ecx + xorl 56(%rsp),%ebp + leal -899497514(%rdx,%rsi,1),%esi + xorl %r12d,%eax addl %ecx,%esi - xorl 12(%rsp),%edx roll $30,%r11d addl %eax,%esi - roll $1,%edx - movl %edx,24(%rsp) - movl 28(%rsp),%ebp - movl %r11d,%eax + roll $1,%ebp + xorl 28(%rsp),%r14d + movl %edi,%eax + movl %ebp,24(%rsp) movl %esi,%ecx - xorl 36(%rsp),%ebp - xorl %edi,%eax - roll $5,%ecx - leal -899497514(%rdx,%r13,1),%r13d - xorl 60(%rsp),%ebp + xorl 36(%rsp),%r14d xorl %r12d,%eax + roll $5,%ecx + xorl 60(%rsp),%r14d + leal -899497514(%rbp,%r13,1),%r13d + xorl %r11d,%eax addl %ecx,%r13d - xorl 16(%rsp),%ebp roll $30,%edi addl %eax,%r13d - roll $1,%ebp - movl %ebp,28(%rsp) - movl 32(%rsp),%edx - movl %edi,%eax + roll $1,%r14d + xorl 32(%rsp),%edx + movl %esi,%eax + movl %r14d,28(%rsp) movl %r13d,%ecx xorl 40(%rsp),%edx - xorl %esi,%eax + xorl %r11d,%eax roll $5,%ecx - leal -899497514(%rbp,%r12,1),%r12d xorl 0(%rsp),%edx - xorl %r11d,%eax + leal -899497514(%r14,%r12,1),%r12d + xorl %edi,%eax addl %ecx,%r12d - xorl 20(%rsp),%edx roll $30,%esi addl %eax,%r12d roll $1,%edx - movl %edx,32(%rsp) - movl 36(%rsp),%ebp - movl %esi,%eax + xorl 36(%rsp),%ebp + movl %r13d,%eax + movl %r12d,%ecx xorl 44(%rsp),%ebp - xorl %r13d,%eax + xorl %edi,%eax roll $5,%ecx - leal -899497514(%rdx,%r11,1),%r11d xorl 4(%rsp),%ebp - xorl %edi,%eax + leal -899497514(%rdx,%r11,1),%r11d + xorl %esi,%eax addl %ecx,%r11d - xorl 24(%rsp),%ebp roll $30,%r13d addl %eax,%r11d roll $1,%ebp - movl %ebp,36(%rsp) - movl 40(%rsp),%edx - movl %r13d,%eax + xorl 40(%rsp),%r14d + movl %r12d,%eax + movl %r11d,%ecx - xorl 48(%rsp),%edx - xorl %r12d,%eax + xorl 48(%rsp),%r14d + xorl %esi,%eax roll $5,%ecx + xorl 8(%rsp),%r14d leal -899497514(%rbp,%rdi,1),%edi - xorl 8(%rsp),%edx - xorl %esi,%eax + xorl %r13d,%eax addl %ecx,%edi - xorl 28(%rsp),%edx roll $30,%r12d addl %eax,%edi - roll $1,%edx - movl %edx,40(%rsp) - movl 44(%rsp),%ebp - movl %r12d,%eax + roll $1,%r14d + xorl 44(%rsp),%edx + movl %r11d,%eax + movl %edi,%ecx - xorl 52(%rsp),%ebp - xorl %r11d,%eax - roll $5,%ecx - leal -899497514(%rdx,%rsi,1),%esi - xorl 12(%rsp),%ebp + xorl 52(%rsp),%edx xorl %r13d,%eax + roll $5,%ecx + xorl 12(%rsp),%edx + leal -899497514(%r14,%rsi,1),%esi + xorl %r12d,%eax addl %ecx,%esi - xorl 32(%rsp),%ebp roll $30,%r11d addl %eax,%esi - roll $1,%ebp - movl %ebp,44(%rsp) - movl 48(%rsp),%edx - movl %r11d,%eax - movl %esi,%ecx - xorl 56(%rsp),%edx - xorl %edi,%eax - roll $5,%ecx - leal -899497514(%rbp,%r13,1),%r13d - xorl 16(%rsp),%edx + roll $1,%edx + xorl 48(%rsp),%ebp + movl %edi,%eax + + movl %esi,%ecx + xorl 56(%rsp),%ebp xorl %r12d,%eax + roll $5,%ecx + xorl 16(%rsp),%ebp + leal -899497514(%rdx,%r13,1),%r13d + xorl %r11d,%eax addl %ecx,%r13d - xorl 36(%rsp),%edx roll $30,%edi addl %eax,%r13d - roll $1,%edx - movl %edx,48(%rsp) - movl 52(%rsp),%ebp - movl %edi,%eax + roll $1,%ebp + xorl 52(%rsp),%r14d + movl %esi,%eax + movl %r13d,%ecx - xorl 60(%rsp),%ebp - xorl %esi,%eax - roll $5,%ecx - leal -899497514(%rdx,%r12,1),%r12d - xorl 20(%rsp),%ebp + xorl 60(%rsp),%r14d xorl %r11d,%eax + roll $5,%ecx + xorl 20(%rsp),%r14d + leal -899497514(%rbp,%r12,1),%r12d + xorl %edi,%eax addl %ecx,%r12d - xorl 40(%rsp),%ebp roll $30,%esi addl %eax,%r12d - roll $1,%ebp - movl 56(%rsp),%edx - movl %esi,%eax + roll $1,%r14d + xorl 56(%rsp),%edx + movl %r13d,%eax + movl %r12d,%ecx xorl 0(%rsp),%edx - xorl %r13d,%eax + xorl %edi,%eax roll $5,%ecx - leal -899497514(%rbp,%r11,1),%r11d xorl 24(%rsp),%edx - xorl %edi,%eax + leal -899497514(%r14,%r11,1),%r11d + xorl %esi,%eax addl %ecx,%r11d - xorl 44(%rsp),%edx roll $30,%r13d addl %eax,%r11d roll $1,%edx - movl 60(%rsp),%ebp - movl %r13d,%eax + xorl 60(%rsp),%ebp + movl %r12d,%eax + movl %r11d,%ecx xorl 4(%rsp),%ebp - xorl %r12d,%eax + xorl %esi,%eax roll $5,%ecx - leal -899497514(%rdx,%rdi,1),%edi xorl 28(%rsp),%ebp - xorl %esi,%eax + leal -899497514(%rdx,%rdi,1),%edi + xorl %r13d,%eax addl %ecx,%edi - xorl 48(%rsp),%ebp roll $30,%r12d addl %eax,%edi roll $1,%ebp - movl %r12d,%eax + movl %r11d,%eax movl %edi,%ecx - xorl %r11d,%eax + xorl %r13d,%eax leal -899497514(%rbp,%rsi,1),%esi roll $5,%ecx - xorl %r13d,%eax + xorl %r12d,%eax addl %ecx,%esi roll $30,%r11d addl %eax,%esi @@ -1281,29 +1230,202 @@ L$loop: jnz L$loop movq 64(%rsp),%rsi - movq (%rsi),%r13 - movq 8(%rsi),%r12 - movq 16(%rsi),%rbp - movq 24(%rsi),%rbx - leaq 32(%rsi),%rsp + movq -40(%rsi),%r14 + movq -32(%rsi),%r13 + movq -24(%rsi),%r12 + movq -16(%rsi),%rbp + movq -8(%rsi),%rbx + leaq (%rsi),%rsp L$epilogue: .byte 0xf3,0xc3 +.p2align 5 +sha1_block_data_order_shaext: +_shaext_shortcut: + movdqu (%rdi),%xmm0 + movd 16(%rdi),%xmm1 + movdqa K_XX_XX+160(%rip),%xmm3 + + movdqu (%rsi),%xmm4 + pshufd $27,%xmm0,%xmm0 + movdqu 16(%rsi),%xmm5 + pshufd $27,%xmm1,%xmm1 + movdqu 32(%rsi),%xmm6 +.byte 102,15,56,0,227 + movdqu 48(%rsi),%xmm7 +.byte 102,15,56,0,235 +.byte 102,15,56,0,243 + movdqa %xmm1,%xmm9 +.byte 102,15,56,0,251 + jmp L$oop_shaext + +.p2align 4 +L$oop_shaext: + decq %rdx + leaq 64(%rsi),%rax + paddd %xmm4,%xmm1 + cmovneq %rax,%rsi + movdqa %xmm0,%xmm8 +.byte 15,56,201,229 + movdqa %xmm0,%xmm2 +.byte 15,58,204,193,0 +.byte 15,56,200,213 + pxor %xmm6,%xmm4 +.byte 15,56,201,238 +.byte 15,56,202,231 + + movdqa %xmm0,%xmm1 +.byte 15,58,204,194,0 +.byte 15,56,200,206 + pxor %xmm7,%xmm5 +.byte 15,56,202,236 +.byte 15,56,201,247 + movdqa %xmm0,%xmm2 +.byte 15,58,204,193,0 +.byte 15,56,200,215 + pxor %xmm4,%xmm6 +.byte 15,56,201,252 +.byte 15,56,202,245 + + movdqa %xmm0,%xmm1 +.byte 15,58,204,194,0 +.byte 15,56,200,204 + pxor %xmm5,%xmm7 +.byte 15,56,202,254 +.byte 15,56,201,229 + movdqa %xmm0,%xmm2 +.byte 15,58,204,193,0 +.byte 15,56,200,213 + pxor %xmm6,%xmm4 +.byte 15,56,201,238 +.byte 15,56,202,231 + + movdqa %xmm0,%xmm1 +.byte 15,58,204,194,1 +.byte 15,56,200,206 + pxor %xmm7,%xmm5 +.byte 15,56,202,236 +.byte 15,56,201,247 + movdqa %xmm0,%xmm2 +.byte 15,58,204,193,1 +.byte 15,56,200,215 + pxor %xmm4,%xmm6 +.byte 15,56,201,252 +.byte 15,56,202,245 + + movdqa %xmm0,%xmm1 +.byte 15,58,204,194,1 +.byte 15,56,200,204 + pxor %xmm5,%xmm7 +.byte 15,56,202,254 +.byte 15,56,201,229 + movdqa %xmm0,%xmm2 +.byte 15,58,204,193,1 +.byte 15,56,200,213 + pxor %xmm6,%xmm4 +.byte 15,56,201,238 +.byte 15,56,202,231 + + movdqa %xmm0,%xmm1 +.byte 15,58,204,194,1 +.byte 15,56,200,206 + pxor %xmm7,%xmm5 +.byte 15,56,202,236 +.byte 15,56,201,247 + movdqa %xmm0,%xmm2 +.byte 15,58,204,193,2 +.byte 15,56,200,215 + pxor %xmm4,%xmm6 +.byte 15,56,201,252 +.byte 15,56,202,245 + + movdqa %xmm0,%xmm1 +.byte 15,58,204,194,2 +.byte 15,56,200,204 + pxor %xmm5,%xmm7 +.byte 15,56,202,254 +.byte 15,56,201,229 + movdqa %xmm0,%xmm2 +.byte 15,58,204,193,2 +.byte 15,56,200,213 + pxor %xmm6,%xmm4 +.byte 15,56,201,238 +.byte 15,56,202,231 + + movdqa %xmm0,%xmm1 +.byte 15,58,204,194,2 +.byte 15,56,200,206 + pxor %xmm7,%xmm5 +.byte 15,56,202,236 +.byte 15,56,201,247 + movdqa %xmm0,%xmm2 +.byte 15,58,204,193,2 +.byte 15,56,200,215 + pxor %xmm4,%xmm6 +.byte 15,56,201,252 +.byte 15,56,202,245 + + movdqa %xmm0,%xmm1 +.byte 15,58,204,194,3 +.byte 15,56,200,204 + pxor %xmm5,%xmm7 +.byte 15,56,202,254 + movdqu (%rsi),%xmm4 + movdqa %xmm0,%xmm2 +.byte 15,58,204,193,3 +.byte 15,56,200,213 + movdqu 16(%rsi),%xmm5 +.byte 102,15,56,0,227 + + movdqa %xmm0,%xmm1 +.byte 15,58,204,194,3 +.byte 15,56,200,206 + movdqu 32(%rsi),%xmm6 +.byte 102,15,56,0,235 + + movdqa %xmm0,%xmm2 +.byte 15,58,204,193,3 +.byte 15,56,200,215 + movdqu 48(%rsi),%xmm7 +.byte 102,15,56,0,243 + + movdqa %xmm0,%xmm1 +.byte 15,58,204,194,3 +.byte 65,15,56,200,201 +.byte 102,15,56,0,251 + + paddd %xmm8,%xmm0 + movdqa %xmm1,%xmm9 + + jnz L$oop_shaext + + pshufd $27,%xmm0,%xmm0 + pshufd $27,%xmm1,%xmm1 + movdqu %xmm0,(%rdi) + movd %xmm1,16(%rdi) + .byte 0xf3,0xc3 + + .p2align 4 sha1_block_data_order_ssse3: _ssse3_shortcut: + movq %rsp,%rax pushq %rbx pushq %rbp pushq %r12 + pushq %r13 + pushq %r14 leaq -64(%rsp),%rsp + movq %rax,%r14 + andq $-64,%rsp movq %rdi,%r8 movq %rsi,%r9 movq %rdx,%r10 shlq $6,%r10 addq %r9,%r10 - leaq K_XX_XX(%rip),%r11 + leaq K_XX_XX+64(%rip),%r11 movl 0(%r8),%eax movl 4(%r8),%ebx @@ -1311,19 +1433,22 @@ _ssse3_shortcut: movl 12(%r8),%edx movl %ebx,%esi movl 16(%r8),%ebp + movl %ecx,%edi + xorl %edx,%edi + andl %edi,%esi movdqa 64(%r11),%xmm6 - movdqa 0(%r11),%xmm9 + movdqa -64(%r11),%xmm9 movdqu 0(%r9),%xmm0 movdqu 16(%r9),%xmm1 movdqu 32(%r9),%xmm2 movdqu 48(%r9),%xmm3 .byte 102,15,56,0,198 - addq $64,%r9 .byte 102,15,56,0,206 .byte 102,15,56,0,214 -.byte 102,15,56,0,222 + addq $64,%r9 paddd %xmm9,%xmm0 +.byte 102,15,56,0,222 paddd %xmm9,%xmm1 paddd %xmm9,%xmm2 movdqa %xmm0,0(%rsp) @@ -1335,904 +1460,882 @@ _ssse3_shortcut: jmp L$oop_ssse3 .p2align 4 L$oop_ssse3: - movdqa %xmm1,%xmm4 - addl 0(%rsp),%ebp - xorl %edx,%ecx + rorl $2,%ebx + pshufd $238,%xmm0,%xmm4 + xorl %edx,%esi movdqa %xmm3,%xmm8 -.byte 102,15,58,15,224,8 + paddd %xmm3,%xmm9 movl %eax,%edi + addl 0(%rsp),%ebp + punpcklqdq %xmm1,%xmm4 + xorl %ecx,%ebx roll $5,%eax - paddd %xmm3,%xmm9 - andl %ecx,%esi - xorl %edx,%ecx + addl %esi,%ebp psrldq $4,%xmm8 - xorl %edx,%esi - addl %eax,%ebp + andl %ebx,%edi + xorl %ecx,%ebx pxor %xmm0,%xmm4 - rorl $2,%ebx - addl %esi,%ebp + addl %eax,%ebp + rorl $7,%eax pxor %xmm2,%xmm8 - addl 4(%rsp),%edx - xorl %ecx,%ebx + xorl %ecx,%edi movl %ebp,%esi - roll $5,%ebp + addl 4(%rsp),%edx pxor %xmm8,%xmm4 - andl %ebx,%edi - xorl %ecx,%ebx + xorl %ebx,%eax + roll $5,%ebp movdqa %xmm9,48(%rsp) - xorl %ecx,%edi - addl %ebp,%edx - movdqa %xmm4,%xmm10 - movdqa %xmm4,%xmm8 - rorl $7,%eax addl %edi,%edx - addl 8(%rsp),%ecx + andl %eax,%esi + movdqa %xmm4,%xmm10 xorl %ebx,%eax + addl %ebp,%edx + rorl $7,%ebp + movdqa %xmm4,%xmm8 + xorl %ebx,%esi pslldq $12,%xmm10 paddd %xmm4,%xmm4 movl %edx,%edi - roll $5,%edx - andl %eax,%esi - xorl %ebx,%eax + addl 8(%rsp),%ecx psrld $31,%xmm8 - xorl %ebx,%esi - addl %edx,%ecx - movdqa %xmm10,%xmm9 - rorl $7,%ebp + xorl %eax,%ebp + roll $5,%edx addl %esi,%ecx + movdqa %xmm10,%xmm9 + andl %ebp,%edi + xorl %eax,%ebp psrld $30,%xmm10 + addl %edx,%ecx + rorl $7,%edx por %xmm8,%xmm4 - addl 12(%rsp),%ebx - xorl %eax,%ebp + xorl %eax,%edi movl %ecx,%esi - roll $5,%ecx + addl 12(%rsp),%ebx pslld $2,%xmm9 pxor %xmm10,%xmm4 - andl %ebp,%edi - xorl %eax,%ebp - movdqa 0(%r11),%xmm10 - xorl %eax,%edi - addl %ecx,%ebx - pxor %xmm9,%xmm4 - rorl $7,%edx + xorl %ebp,%edx + movdqa -64(%r11),%xmm10 + roll $5,%ecx addl %edi,%ebx - movdqa %xmm2,%xmm5 - addl 16(%rsp),%eax + andl %edx,%esi + pxor %xmm9,%xmm4 xorl %ebp,%edx + addl %ecx,%ebx + rorl $7,%ecx + pshufd $238,%xmm1,%xmm5 + xorl %ebp,%esi movdqa %xmm4,%xmm9 -.byte 102,15,58,15,233,8 + paddd %xmm4,%xmm10 movl %ebx,%edi + addl 16(%rsp),%eax + punpcklqdq %xmm2,%xmm5 + xorl %edx,%ecx roll $5,%ebx - paddd %xmm4,%xmm10 - andl %edx,%esi - xorl %ebp,%edx + addl %esi,%eax psrldq $4,%xmm9 - xorl %ebp,%esi - addl %ebx,%eax + andl %ecx,%edi + xorl %edx,%ecx pxor %xmm1,%xmm5 - rorl $7,%ecx - addl %esi,%eax + addl %ebx,%eax + rorl $7,%ebx pxor %xmm3,%xmm9 - addl 20(%rsp),%ebp - xorl %edx,%ecx + xorl %edx,%edi movl %eax,%esi - roll $5,%eax + addl 20(%rsp),%ebp pxor %xmm9,%xmm5 - andl %ecx,%edi - xorl %edx,%ecx + xorl %ecx,%ebx + roll $5,%eax movdqa %xmm10,0(%rsp) - xorl %edx,%edi - addl %eax,%ebp - movdqa %xmm5,%xmm8 - movdqa %xmm5,%xmm9 - rorl $7,%ebx addl %edi,%ebp - addl 24(%rsp),%edx + andl %ebx,%esi + movdqa %xmm5,%xmm8 xorl %ecx,%ebx + addl %eax,%ebp + rorl $7,%eax + movdqa %xmm5,%xmm9 + xorl %ecx,%esi pslldq $12,%xmm8 paddd %xmm5,%xmm5 movl %ebp,%edi - roll $5,%ebp - andl %ebx,%esi - xorl %ecx,%ebx + addl 24(%rsp),%edx psrld $31,%xmm9 - xorl %ecx,%esi - addl %ebp,%edx - movdqa %xmm8,%xmm10 - rorl $7,%eax + xorl %ebx,%eax + roll $5,%ebp addl %esi,%edx + movdqa %xmm8,%xmm10 + andl %eax,%edi + xorl %ebx,%eax psrld $30,%xmm8 + addl %ebp,%edx + rorl $7,%ebp por %xmm9,%xmm5 - addl 28(%rsp),%ecx - xorl %ebx,%eax + xorl %ebx,%edi movl %edx,%esi - roll $5,%edx + addl 28(%rsp),%ecx pslld $2,%xmm10 pxor %xmm8,%xmm5 - andl %eax,%edi - xorl %ebx,%eax - movdqa 16(%r11),%xmm8 - xorl %ebx,%edi - addl %edx,%ecx - pxor %xmm10,%xmm5 - rorl $7,%ebp + xorl %eax,%ebp + movdqa -32(%r11),%xmm8 + roll $5,%edx addl %edi,%ecx - movdqa %xmm3,%xmm6 - addl 32(%rsp),%ebx + andl %ebp,%esi + pxor %xmm10,%xmm5 xorl %eax,%ebp + addl %edx,%ecx + rorl $7,%edx + pshufd $238,%xmm2,%xmm6 + xorl %eax,%esi movdqa %xmm5,%xmm10 -.byte 102,15,58,15,242,8 + paddd %xmm5,%xmm8 movl %ecx,%edi + addl 32(%rsp),%ebx + punpcklqdq %xmm3,%xmm6 + xorl %ebp,%edx roll $5,%ecx - paddd %xmm5,%xmm8 - andl %ebp,%esi - xorl %eax,%ebp + addl %esi,%ebx psrldq $4,%xmm10 - xorl %eax,%esi - addl %ecx,%ebx + andl %edx,%edi + xorl %ebp,%edx pxor %xmm2,%xmm6 - rorl $7,%edx - addl %esi,%ebx + addl %ecx,%ebx + rorl $7,%ecx pxor %xmm4,%xmm10 - addl 36(%rsp),%eax - xorl %ebp,%edx + xorl %ebp,%edi movl %ebx,%esi - roll $5,%ebx + addl 36(%rsp),%eax pxor %xmm10,%xmm6 - andl %edx,%edi - xorl %ebp,%edx + xorl %edx,%ecx + roll $5,%ebx movdqa %xmm8,16(%rsp) - xorl %ebp,%edi - addl %ebx,%eax - movdqa %xmm6,%xmm9 - movdqa %xmm6,%xmm10 - rorl $7,%ecx addl %edi,%eax - addl 40(%rsp),%ebp + andl %ecx,%esi + movdqa %xmm6,%xmm9 xorl %edx,%ecx + addl %ebx,%eax + rorl $7,%ebx + movdqa %xmm6,%xmm10 + xorl %edx,%esi pslldq $12,%xmm9 paddd %xmm6,%xmm6 movl %eax,%edi - roll $5,%eax - andl %ecx,%esi - xorl %edx,%ecx + addl 40(%rsp),%ebp psrld $31,%xmm10 - xorl %edx,%esi - addl %eax,%ebp - movdqa %xmm9,%xmm8 - rorl $7,%ebx + xorl %ecx,%ebx + roll $5,%eax addl %esi,%ebp + movdqa %xmm9,%xmm8 + andl %ebx,%edi + xorl %ecx,%ebx psrld $30,%xmm9 + addl %eax,%ebp + rorl $7,%eax por %xmm10,%xmm6 - addl 44(%rsp),%edx - xorl %ecx,%ebx + xorl %ecx,%edi movl %ebp,%esi - roll $5,%ebp + addl 44(%rsp),%edx pslld $2,%xmm8 pxor %xmm9,%xmm6 - andl %ebx,%edi - xorl %ecx,%ebx - movdqa 16(%r11),%xmm9 - xorl %ecx,%edi - addl %ebp,%edx - pxor %xmm8,%xmm6 - rorl $7,%eax + xorl %ebx,%eax + movdqa -32(%r11),%xmm9 + roll $5,%ebp addl %edi,%edx - movdqa %xmm4,%xmm7 - addl 48(%rsp),%ecx + andl %eax,%esi + pxor %xmm8,%xmm6 xorl %ebx,%eax + addl %ebp,%edx + rorl $7,%ebp + pshufd $238,%xmm3,%xmm7 + xorl %ebx,%esi movdqa %xmm6,%xmm8 -.byte 102,15,58,15,251,8 + paddd %xmm6,%xmm9 movl %edx,%edi + addl 48(%rsp),%ecx + punpcklqdq %xmm4,%xmm7 + xorl %eax,%ebp roll $5,%edx - paddd %xmm6,%xmm9 - andl %eax,%esi - xorl %ebx,%eax + addl %esi,%ecx psrldq $4,%xmm8 - xorl %ebx,%esi - addl %edx,%ecx + andl %ebp,%edi + xorl %eax,%ebp pxor %xmm3,%xmm7 - rorl $7,%ebp - addl %esi,%ecx + addl %edx,%ecx + rorl $7,%edx pxor %xmm5,%xmm8 - addl 52(%rsp),%ebx - xorl %eax,%ebp + xorl %eax,%edi movl %ecx,%esi - roll $5,%ecx + addl 52(%rsp),%ebx pxor %xmm8,%xmm7 - andl %ebp,%edi - xorl %eax,%ebp + xorl %ebp,%edx + roll $5,%ecx movdqa %xmm9,32(%rsp) - xorl %eax,%edi - addl %ecx,%ebx - movdqa %xmm7,%xmm10 - movdqa %xmm7,%xmm8 - rorl $7,%edx addl %edi,%ebx - addl 56(%rsp),%eax + andl %edx,%esi + movdqa %xmm7,%xmm10 xorl %ebp,%edx + addl %ecx,%ebx + rorl $7,%ecx + movdqa %xmm7,%xmm8 + xorl %ebp,%esi pslldq $12,%xmm10 paddd %xmm7,%xmm7 movl %ebx,%edi - roll $5,%ebx - andl %edx,%esi - xorl %ebp,%edx + addl 56(%rsp),%eax psrld $31,%xmm8 - xorl %ebp,%esi - addl %ebx,%eax - movdqa %xmm10,%xmm9 - rorl $7,%ecx + xorl %edx,%ecx + roll $5,%ebx addl %esi,%eax + movdqa %xmm10,%xmm9 + andl %ecx,%edi + xorl %edx,%ecx psrld $30,%xmm10 + addl %ebx,%eax + rorl $7,%ebx por %xmm8,%xmm7 - addl 60(%rsp),%ebp - xorl %edx,%ecx + xorl %edx,%edi movl %eax,%esi - roll $5,%eax + addl 60(%rsp),%ebp pslld $2,%xmm9 pxor %xmm10,%xmm7 - andl %ecx,%edi - xorl %edx,%ecx - movdqa 16(%r11),%xmm10 - xorl %edx,%edi - addl %eax,%ebp - pxor %xmm9,%xmm7 - rorl $7,%ebx + xorl %ecx,%ebx + movdqa -32(%r11),%xmm10 + roll $5,%eax addl %edi,%ebp - movdqa %xmm7,%xmm9 - addl 0(%rsp),%edx - pxor %xmm4,%xmm0 -.byte 102,68,15,58,15,206,8 + andl %ebx,%esi + pxor %xmm9,%xmm7 + pshufd $238,%xmm6,%xmm9 xorl %ecx,%ebx + addl %eax,%ebp + rorl $7,%eax + pxor %xmm4,%xmm0 + xorl %ecx,%esi movl %ebp,%edi + addl 0(%rsp),%edx + punpcklqdq %xmm7,%xmm9 + xorl %ebx,%eax roll $5,%ebp pxor %xmm1,%xmm0 - andl %ebx,%esi - xorl %ecx,%ebx + addl %esi,%edx + andl %eax,%edi movdqa %xmm10,%xmm8 + xorl %ebx,%eax paddd %xmm7,%xmm10 - xorl %ecx,%esi addl %ebp,%edx pxor %xmm9,%xmm0 - rorl $7,%eax - addl %esi,%edx + rorl $7,%ebp + xorl %ebx,%edi + movl %edx,%esi addl 4(%rsp),%ecx - xorl %ebx,%eax movdqa %xmm0,%xmm9 - movdqa %xmm10,48(%rsp) - movl %edx,%esi + xorl %eax,%ebp roll $5,%edx - andl %eax,%edi - xorl %ebx,%eax + movdqa %xmm10,48(%rsp) + addl %edi,%ecx + andl %ebp,%esi + xorl %eax,%ebp pslld $2,%xmm0 - xorl %ebx,%edi addl %edx,%ecx + rorl $7,%edx psrld $30,%xmm9 - rorl $7,%ebp - addl %edi,%ecx - addl 8(%rsp),%ebx - xorl %eax,%ebp + xorl %eax,%esi movl %ecx,%edi - roll $5,%ecx + addl 8(%rsp),%ebx por %xmm9,%xmm0 - andl %ebp,%esi - xorl %eax,%ebp - movdqa %xmm0,%xmm10 - xorl %eax,%esi - addl %ecx,%ebx - rorl $7,%edx - addl %esi,%ebx - addl 12(%rsp),%eax xorl %ebp,%edx - movl %ebx,%esi - roll $5,%ebx + roll $5,%ecx + pshufd $238,%xmm7,%xmm10 + addl %esi,%ebx andl %edx,%edi xorl %ebp,%edx + addl %ecx,%ebx + addl 12(%rsp),%eax xorl %ebp,%edi - addl %ebx,%eax - rorl $7,%ecx + movl %ebx,%esi + roll $5,%ebx addl %edi,%eax - addl 16(%rsp),%ebp - pxor %xmm5,%xmm1 -.byte 102,68,15,58,15,215,8 xorl %edx,%esi + rorl $7,%ecx + addl %ebx,%eax + pxor %xmm5,%xmm1 + addl 16(%rsp),%ebp + xorl %ecx,%esi + punpcklqdq %xmm0,%xmm10 movl %eax,%edi roll $5,%eax pxor %xmm2,%xmm1 - xorl %ecx,%esi - addl %eax,%ebp + addl %esi,%ebp + xorl %ecx,%edi movdqa %xmm8,%xmm9 - paddd %xmm0,%xmm8 rorl $7,%ebx - addl %esi,%ebp + paddd %xmm0,%xmm8 + addl %eax,%ebp pxor %xmm10,%xmm1 addl 20(%rsp),%edx - xorl %ecx,%edi + xorl %ebx,%edi movl %ebp,%esi roll $5,%ebp movdqa %xmm1,%xmm10 + addl %edi,%edx + xorl %ebx,%esi movdqa %xmm8,0(%rsp) - xorl %ebx,%edi - addl %ebp,%edx rorl $7,%eax - addl %edi,%edx - pslld $2,%xmm1 + addl %ebp,%edx addl 24(%rsp),%ecx - xorl %ebx,%esi - psrld $30,%xmm10 + pslld $2,%xmm1 + xorl %eax,%esi movl %edx,%edi + psrld $30,%xmm10 roll $5,%edx - xorl %eax,%esi - addl %edx,%ecx - rorl $7,%ebp addl %esi,%ecx + xorl %eax,%edi + rorl $7,%ebp por %xmm10,%xmm1 + addl %edx,%ecx addl 28(%rsp),%ebx - xorl %eax,%edi - movdqa %xmm1,%xmm8 + pshufd $238,%xmm0,%xmm8 + xorl %ebp,%edi movl %ecx,%esi roll $5,%ecx - xorl %ebp,%edi - addl %ecx,%ebx - rorl $7,%edx addl %edi,%ebx - addl 32(%rsp),%eax - pxor %xmm6,%xmm2 -.byte 102,68,15,58,15,192,8 xorl %ebp,%esi + rorl $7,%edx + addl %ecx,%ebx + pxor %xmm6,%xmm2 + addl 32(%rsp),%eax + xorl %edx,%esi + punpcklqdq %xmm1,%xmm8 movl %ebx,%edi roll $5,%ebx pxor %xmm3,%xmm2 - xorl %edx,%esi - addl %ebx,%eax - movdqa 32(%r11),%xmm10 - paddd %xmm1,%xmm9 - rorl $7,%ecx addl %esi,%eax + xorl %edx,%edi + movdqa 0(%r11),%xmm10 + rorl $7,%ecx + paddd %xmm1,%xmm9 + addl %ebx,%eax pxor %xmm8,%xmm2 addl 36(%rsp),%ebp - xorl %edx,%edi + xorl %ecx,%edi movl %eax,%esi roll $5,%eax movdqa %xmm2,%xmm8 + addl %edi,%ebp + xorl %ecx,%esi movdqa %xmm9,16(%rsp) - xorl %ecx,%edi - addl %eax,%ebp rorl $7,%ebx - addl %edi,%ebp - pslld $2,%xmm2 + addl %eax,%ebp addl 40(%rsp),%edx - xorl %ecx,%esi - psrld $30,%xmm8 + pslld $2,%xmm2 + xorl %ebx,%esi movl %ebp,%edi + psrld $30,%xmm8 roll $5,%ebp - xorl %ebx,%esi - addl %ebp,%edx - rorl $7,%eax addl %esi,%edx + xorl %ebx,%edi + rorl $7,%eax por %xmm8,%xmm2 + addl %ebp,%edx addl 44(%rsp),%ecx - xorl %ebx,%edi - movdqa %xmm2,%xmm9 + pshufd $238,%xmm1,%xmm9 + xorl %eax,%edi movl %edx,%esi roll $5,%edx - xorl %eax,%edi - addl %edx,%ecx - rorl $7,%ebp addl %edi,%ecx - addl 48(%rsp),%ebx - pxor %xmm7,%xmm3 -.byte 102,68,15,58,15,201,8 xorl %eax,%esi - movl %ecx,%edi - roll $5,%ecx - pxor %xmm4,%xmm3 - xorl %ebp,%esi - addl %ecx,%ebx + rorl $7,%ebp + addl %edx,%ecx + pxor %xmm7,%xmm3 + addl 48(%rsp),%ebx + xorl %ebp,%esi + punpcklqdq %xmm2,%xmm9 + movl %ecx,%edi + roll $5,%ecx + pxor %xmm4,%xmm3 + addl %esi,%ebx + xorl %ebp,%edi movdqa %xmm10,%xmm8 - paddd %xmm2,%xmm10 rorl $7,%edx - addl %esi,%ebx + paddd %xmm2,%xmm10 + addl %ecx,%ebx pxor %xmm9,%xmm3 addl 52(%rsp),%eax - xorl %ebp,%edi + xorl %edx,%edi movl %ebx,%esi roll $5,%ebx movdqa %xmm3,%xmm9 + addl %edi,%eax + xorl %edx,%esi movdqa %xmm10,32(%rsp) - xorl %edx,%edi - addl %ebx,%eax rorl $7,%ecx - addl %edi,%eax - pslld $2,%xmm3 + addl %ebx,%eax addl 56(%rsp),%ebp - xorl %edx,%esi - psrld $30,%xmm9 + pslld $2,%xmm3 + xorl %ecx,%esi movl %eax,%edi + psrld $30,%xmm9 roll $5,%eax - xorl %ecx,%esi - addl %eax,%ebp - rorl $7,%ebx addl %esi,%ebp + xorl %ecx,%edi + rorl $7,%ebx por %xmm9,%xmm3 + addl %eax,%ebp addl 60(%rsp),%edx - xorl %ecx,%edi - movdqa %xmm3,%xmm10 + pshufd $238,%xmm2,%xmm10 + xorl %ebx,%edi movl %ebp,%esi roll $5,%ebp - xorl %ebx,%edi - addl %ebp,%edx - rorl $7,%eax addl %edi,%edx - addl 0(%rsp),%ecx - pxor %xmm0,%xmm4 -.byte 102,68,15,58,15,210,8 xorl %ebx,%esi + rorl $7,%eax + addl %ebp,%edx + pxor %xmm0,%xmm4 + addl 0(%rsp),%ecx + xorl %eax,%esi + punpcklqdq %xmm3,%xmm10 movl %edx,%edi roll $5,%edx pxor %xmm5,%xmm4 - xorl %eax,%esi - addl %edx,%ecx + addl %esi,%ecx + xorl %eax,%edi movdqa %xmm8,%xmm9 - paddd %xmm3,%xmm8 rorl $7,%ebp - addl %esi,%ecx + paddd %xmm3,%xmm8 + addl %edx,%ecx pxor %xmm10,%xmm4 addl 4(%rsp),%ebx - xorl %eax,%edi + xorl %ebp,%edi movl %ecx,%esi roll $5,%ecx movdqa %xmm4,%xmm10 + addl %edi,%ebx + xorl %ebp,%esi movdqa %xmm8,48(%rsp) - xorl %ebp,%edi - addl %ecx,%ebx rorl $7,%edx - addl %edi,%ebx - pslld $2,%xmm4 + addl %ecx,%ebx addl 8(%rsp),%eax - xorl %ebp,%esi - psrld $30,%xmm10 + pslld $2,%xmm4 + xorl %edx,%esi movl %ebx,%edi + psrld $30,%xmm10 roll $5,%ebx - xorl %edx,%esi - addl %ebx,%eax - rorl $7,%ecx addl %esi,%eax + xorl %edx,%edi + rorl $7,%ecx por %xmm10,%xmm4 + addl %ebx,%eax addl 12(%rsp),%ebp - xorl %edx,%edi - movdqa %xmm4,%xmm8 + pshufd $238,%xmm3,%xmm8 + xorl %ecx,%edi movl %eax,%esi roll $5,%eax - xorl %ecx,%edi - addl %eax,%ebp - rorl $7,%ebx addl %edi,%ebp - addl 16(%rsp),%edx - pxor %xmm1,%xmm5 -.byte 102,68,15,58,15,195,8 xorl %ecx,%esi + rorl $7,%ebx + addl %eax,%ebp + pxor %xmm1,%xmm5 + addl 16(%rsp),%edx + xorl %ebx,%esi + punpcklqdq %xmm4,%xmm8 movl %ebp,%edi roll $5,%ebp pxor %xmm6,%xmm5 - xorl %ebx,%esi - addl %ebp,%edx + addl %esi,%edx + xorl %ebx,%edi movdqa %xmm9,%xmm10 - paddd %xmm4,%xmm9 rorl $7,%eax - addl %esi,%edx + paddd %xmm4,%xmm9 + addl %ebp,%edx pxor %xmm8,%xmm5 addl 20(%rsp),%ecx - xorl %ebx,%edi + xorl %eax,%edi movl %edx,%esi roll $5,%edx movdqa %xmm5,%xmm8 + addl %edi,%ecx + xorl %eax,%esi movdqa %xmm9,0(%rsp) - xorl %eax,%edi - addl %edx,%ecx rorl $7,%ebp - addl %edi,%ecx - pslld $2,%xmm5 + addl %edx,%ecx addl 24(%rsp),%ebx - xorl %eax,%esi - psrld $30,%xmm8 + pslld $2,%xmm5 + xorl %ebp,%esi movl %ecx,%edi + psrld $30,%xmm8 roll $5,%ecx - xorl %ebp,%esi - addl %ecx,%ebx - rorl $7,%edx addl %esi,%ebx + xorl %ebp,%edi + rorl $7,%edx por %xmm8,%xmm5 + addl %ecx,%ebx addl 28(%rsp),%eax - xorl %ebp,%edi - movdqa %xmm5,%xmm9 + pshufd $238,%xmm4,%xmm9 + rorl $7,%ecx movl %ebx,%esi - roll $5,%ebx xorl %edx,%edi - addl %ebx,%eax - rorl $7,%ecx + roll $5,%ebx addl %edi,%eax - movl %ecx,%edi - pxor %xmm2,%xmm6 -.byte 102,68,15,58,15,204,8 + xorl %ecx,%esi xorl %edx,%ecx + addl %ebx,%eax + pxor %xmm2,%xmm6 addl 32(%rsp),%ebp - andl %edx,%edi - pxor %xmm7,%xmm6 andl %ecx,%esi + xorl %edx,%ecx rorl $7,%ebx - movdqa %xmm10,%xmm8 - paddd %xmm5,%xmm10 - addl %edi,%ebp + punpcklqdq %xmm5,%xmm9 movl %eax,%edi - pxor %xmm9,%xmm6 + xorl %ecx,%esi + pxor %xmm7,%xmm6 roll $5,%eax addl %esi,%ebp - xorl %edx,%ecx - addl %eax,%ebp - movdqa %xmm6,%xmm9 - movdqa %xmm10,16(%rsp) - movl %ebx,%esi + movdqa %xmm10,%xmm8 + xorl %ebx,%edi + paddd %xmm5,%xmm10 xorl %ecx,%ebx + pxor %xmm9,%xmm6 + addl %eax,%ebp addl 36(%rsp),%edx - andl %ecx,%esi - pslld $2,%xmm6 andl %ebx,%edi + xorl %ecx,%ebx rorl $7,%eax - psrld $30,%xmm9 - addl %esi,%edx + movdqa %xmm6,%xmm9 movl %ebp,%esi + xorl %ebx,%edi + movdqa %xmm10,16(%rsp) roll $5,%ebp addl %edi,%edx - xorl %ecx,%ebx - addl %ebp,%edx - por %xmm9,%xmm6 - movl %eax,%edi + xorl %eax,%esi + pslld $2,%xmm6 xorl %ebx,%eax - movdqa %xmm6,%xmm10 + addl %ebp,%edx + psrld $30,%xmm9 addl 40(%rsp),%ecx - andl %ebx,%edi andl %eax,%esi + xorl %ebx,%eax + por %xmm9,%xmm6 rorl $7,%ebp - addl %edi,%ecx movl %edx,%edi + xorl %eax,%esi roll $5,%edx + pshufd $238,%xmm5,%xmm10 addl %esi,%ecx - xorl %ebx,%eax - addl %edx,%ecx - movl %ebp,%esi + xorl %ebp,%edi xorl %eax,%ebp + addl %edx,%ecx addl 44(%rsp),%ebx - andl %eax,%esi andl %ebp,%edi + xorl %eax,%ebp rorl $7,%edx - addl %esi,%ebx movl %ecx,%esi + xorl %ebp,%edi roll $5,%ecx addl %edi,%ebx - xorl %eax,%ebp + xorl %edx,%esi + xorl %ebp,%edx addl %ecx,%ebx - movl %edx,%edi pxor %xmm3,%xmm7 -.byte 102,68,15,58,15,213,8 - xorl %ebp,%edx addl 48(%rsp),%eax - andl %ebp,%edi - pxor %xmm0,%xmm7 andl %edx,%esi + xorl %ebp,%edx rorl $7,%ecx - movdqa 48(%r11),%xmm9 - paddd %xmm6,%xmm8 - addl %edi,%eax + punpcklqdq %xmm6,%xmm10 movl %ebx,%edi - pxor %xmm10,%xmm7 + xorl %edx,%esi + pxor %xmm0,%xmm7 roll $5,%ebx addl %esi,%eax - xorl %ebp,%edx - addl %ebx,%eax - movdqa %xmm7,%xmm10 - movdqa %xmm8,32(%rsp) - movl %ecx,%esi + movdqa 32(%r11),%xmm9 + xorl %ecx,%edi + paddd %xmm6,%xmm8 xorl %edx,%ecx + pxor %xmm10,%xmm7 + addl %ebx,%eax addl 52(%rsp),%ebp - andl %edx,%esi - pslld $2,%xmm7 andl %ecx,%edi + xorl %edx,%ecx rorl $7,%ebx - psrld $30,%xmm10 - addl %esi,%ebp + movdqa %xmm7,%xmm10 movl %eax,%esi + xorl %ecx,%edi + movdqa %xmm8,32(%rsp) roll $5,%eax addl %edi,%ebp - xorl %edx,%ecx - addl %eax,%ebp - por %xmm10,%xmm7 - movl %ebx,%edi + xorl %ebx,%esi + pslld $2,%xmm7 xorl %ecx,%ebx - movdqa %xmm7,%xmm8 + addl %eax,%ebp + psrld $30,%xmm10 addl 56(%rsp),%edx - andl %ecx,%edi andl %ebx,%esi + xorl %ecx,%ebx + por %xmm10,%xmm7 rorl $7,%eax - addl %edi,%edx movl %ebp,%edi + xorl %ebx,%esi roll $5,%ebp + pshufd $238,%xmm6,%xmm8 addl %esi,%edx - xorl %ecx,%ebx - addl %ebp,%edx - movl %eax,%esi + xorl %eax,%edi xorl %ebx,%eax + addl %ebp,%edx addl 60(%rsp),%ecx - andl %ebx,%esi andl %eax,%edi + xorl %ebx,%eax rorl $7,%ebp - addl %esi,%ecx movl %edx,%esi + xorl %eax,%edi roll $5,%edx addl %edi,%ecx - xorl %ebx,%eax + xorl %ebp,%esi + xorl %eax,%ebp addl %edx,%ecx - movl %ebp,%edi pxor %xmm4,%xmm0 -.byte 102,68,15,58,15,198,8 - xorl %eax,%ebp addl 0(%rsp),%ebx - andl %eax,%edi - pxor %xmm1,%xmm0 andl %ebp,%esi + xorl %eax,%ebp rorl $7,%edx - movdqa %xmm9,%xmm10 - paddd %xmm7,%xmm9 - addl %edi,%ebx + punpcklqdq %xmm7,%xmm8 movl %ecx,%edi - pxor %xmm8,%xmm0 + xorl %ebp,%esi + pxor %xmm1,%xmm0 roll $5,%ecx addl %esi,%ebx - xorl %eax,%ebp - addl %ecx,%ebx - movdqa %xmm0,%xmm8 - movdqa %xmm9,48(%rsp) - movl %edx,%esi + movdqa %xmm9,%xmm10 + xorl %edx,%edi + paddd %xmm7,%xmm9 xorl %ebp,%edx + pxor %xmm8,%xmm0 + addl %ecx,%ebx addl 4(%rsp),%eax - andl %ebp,%esi - pslld $2,%xmm0 andl %edx,%edi + xorl %ebp,%edx rorl $7,%ecx - psrld $30,%xmm8 - addl %esi,%eax + movdqa %xmm0,%xmm8 movl %ebx,%esi + xorl %edx,%edi + movdqa %xmm9,48(%rsp) roll $5,%ebx addl %edi,%eax - xorl %ebp,%edx - addl %ebx,%eax - por %xmm8,%xmm0 - movl %ecx,%edi + xorl %ecx,%esi + pslld $2,%xmm0 xorl %edx,%ecx - movdqa %xmm0,%xmm9 + addl %ebx,%eax + psrld $30,%xmm8 addl 8(%rsp),%ebp - andl %edx,%edi andl %ecx,%esi + xorl %edx,%ecx + por %xmm8,%xmm0 rorl $7,%ebx - addl %edi,%ebp movl %eax,%edi + xorl %ecx,%esi roll $5,%eax + pshufd $238,%xmm7,%xmm9 addl %esi,%ebp - xorl %edx,%ecx - addl %eax,%ebp - movl %ebx,%esi + xorl %ebx,%edi xorl %ecx,%ebx + addl %eax,%ebp addl 12(%rsp),%edx - andl %ecx,%esi andl %ebx,%edi + xorl %ecx,%ebx rorl $7,%eax - addl %esi,%edx movl %ebp,%esi + xorl %ebx,%edi roll $5,%ebp addl %edi,%edx - xorl %ecx,%ebx + xorl %eax,%esi + xorl %ebx,%eax addl %ebp,%edx - movl %eax,%edi pxor %xmm5,%xmm1 -.byte 102,68,15,58,15,207,8 - xorl %ebx,%eax addl 16(%rsp),%ecx - andl %ebx,%edi - pxor %xmm2,%xmm1 andl %eax,%esi + xorl %ebx,%eax rorl $7,%ebp - movdqa %xmm10,%xmm8 - paddd %xmm0,%xmm10 - addl %edi,%ecx + punpcklqdq %xmm0,%xmm9 movl %edx,%edi - pxor %xmm9,%xmm1 + xorl %eax,%esi + pxor %xmm2,%xmm1 roll $5,%edx addl %esi,%ecx - xorl %ebx,%eax - addl %edx,%ecx - movdqa %xmm1,%xmm9 - movdqa %xmm10,0(%rsp) - movl %ebp,%esi + movdqa %xmm10,%xmm8 + xorl %ebp,%edi + paddd %xmm0,%xmm10 xorl %eax,%ebp + pxor %xmm9,%xmm1 + addl %edx,%ecx addl 20(%rsp),%ebx - andl %eax,%esi - pslld $2,%xmm1 andl %ebp,%edi + xorl %eax,%ebp rorl $7,%edx - psrld $30,%xmm9 - addl %esi,%ebx + movdqa %xmm1,%xmm9 movl %ecx,%esi + xorl %ebp,%edi + movdqa %xmm10,0(%rsp) roll $5,%ecx addl %edi,%ebx - xorl %eax,%ebp - addl %ecx,%ebx - por %xmm9,%xmm1 - movl %edx,%edi + xorl %edx,%esi + pslld $2,%xmm1 xorl %ebp,%edx - movdqa %xmm1,%xmm10 + addl %ecx,%ebx + psrld $30,%xmm9 addl 24(%rsp),%eax - andl %ebp,%edi andl %edx,%esi + xorl %ebp,%edx + por %xmm9,%xmm1 rorl $7,%ecx - addl %edi,%eax movl %ebx,%edi + xorl %edx,%esi roll $5,%ebx + pshufd $238,%xmm0,%xmm10 addl %esi,%eax - xorl %ebp,%edx - addl %ebx,%eax - movl %ecx,%esi + xorl %ecx,%edi xorl %edx,%ecx + addl %ebx,%eax addl 28(%rsp),%ebp - andl %edx,%esi andl %ecx,%edi + xorl %edx,%ecx rorl $7,%ebx - addl %esi,%ebp movl %eax,%esi + xorl %ecx,%edi roll $5,%eax addl %edi,%ebp - xorl %edx,%ecx + xorl %ebx,%esi + xorl %ecx,%ebx addl %eax,%ebp - movl %ebx,%edi pxor %xmm6,%xmm2 -.byte 102,68,15,58,15,208,8 - xorl %ecx,%ebx addl 32(%rsp),%edx - andl %ecx,%edi - pxor %xmm3,%xmm2 andl %ebx,%esi + xorl %ecx,%ebx rorl $7,%eax - movdqa %xmm8,%xmm9 - paddd %xmm1,%xmm8 - addl %edi,%edx + punpcklqdq %xmm1,%xmm10 movl %ebp,%edi - pxor %xmm10,%xmm2 + xorl %ebx,%esi + pxor %xmm3,%xmm2 roll $5,%ebp addl %esi,%edx - xorl %ecx,%ebx - addl %ebp,%edx - movdqa %xmm2,%xmm10 - movdqa %xmm8,16(%rsp) - movl %eax,%esi + movdqa %xmm8,%xmm9 + xorl %eax,%edi + paddd %xmm1,%xmm8 xorl %ebx,%eax + pxor %xmm10,%xmm2 + addl %ebp,%edx addl 36(%rsp),%ecx - andl %ebx,%esi - pslld $2,%xmm2 andl %eax,%edi + xorl %ebx,%eax rorl $7,%ebp - psrld $30,%xmm10 - addl %esi,%ecx + movdqa %xmm2,%xmm10 movl %edx,%esi + xorl %eax,%edi + movdqa %xmm8,16(%rsp) roll $5,%edx addl %edi,%ecx - xorl %ebx,%eax - addl %edx,%ecx - por %xmm10,%xmm2 - movl %ebp,%edi + xorl %ebp,%esi + pslld $2,%xmm2 xorl %eax,%ebp - movdqa %xmm2,%xmm8 + addl %edx,%ecx + psrld $30,%xmm10 addl 40(%rsp),%ebx - andl %eax,%edi andl %ebp,%esi + xorl %eax,%ebp + por %xmm10,%xmm2 rorl $7,%edx - addl %edi,%ebx movl %ecx,%edi + xorl %ebp,%esi roll $5,%ecx + pshufd $238,%xmm1,%xmm8 addl %esi,%ebx - xorl %eax,%ebp - addl %ecx,%ebx - movl %edx,%esi + xorl %edx,%edi xorl %ebp,%edx + addl %ecx,%ebx addl 44(%rsp),%eax - andl %ebp,%esi andl %edx,%edi + xorl %ebp,%edx rorl $7,%ecx - addl %esi,%eax movl %ebx,%esi + xorl %edx,%edi roll $5,%ebx addl %edi,%eax - xorl %ebp,%edx + xorl %edx,%esi addl %ebx,%eax - addl 48(%rsp),%ebp pxor %xmm7,%xmm3 -.byte 102,68,15,58,15,193,8 - xorl %edx,%esi + addl 48(%rsp),%ebp + xorl %ecx,%esi + punpcklqdq %xmm2,%xmm8 movl %eax,%edi roll $5,%eax pxor %xmm4,%xmm3 - xorl %ecx,%esi - addl %eax,%ebp + addl %esi,%ebp + xorl %ecx,%edi movdqa %xmm9,%xmm10 - paddd %xmm2,%xmm9 rorl $7,%ebx - addl %esi,%ebp + paddd %xmm2,%xmm9 + addl %eax,%ebp pxor %xmm8,%xmm3 addl 52(%rsp),%edx - xorl %ecx,%edi + xorl %ebx,%edi movl %ebp,%esi roll $5,%ebp movdqa %xmm3,%xmm8 + addl %edi,%edx + xorl %ebx,%esi movdqa %xmm9,32(%rsp) - xorl %ebx,%edi - addl %ebp,%edx rorl $7,%eax - addl %edi,%edx - pslld $2,%xmm3 + addl %ebp,%edx addl 56(%rsp),%ecx - xorl %ebx,%esi - psrld $30,%xmm8 + pslld $2,%xmm3 + xorl %eax,%esi movl %edx,%edi + psrld $30,%xmm8 roll $5,%edx - xorl %eax,%esi - addl %edx,%ecx - rorl $7,%ebp addl %esi,%ecx + xorl %eax,%edi + rorl $7,%ebp por %xmm8,%xmm3 + addl %edx,%ecx addl 60(%rsp),%ebx - xorl %eax,%edi + xorl %ebp,%edi movl %ecx,%esi roll $5,%ecx - xorl %ebp,%edi - addl %ecx,%ebx - rorl $7,%edx addl %edi,%ebx - addl 0(%rsp),%eax - paddd %xmm3,%xmm10 xorl %ebp,%esi + rorl $7,%edx + addl %ecx,%ebx + addl 0(%rsp),%eax + xorl %edx,%esi movl %ebx,%edi roll $5,%ebx - xorl %edx,%esi + paddd %xmm3,%xmm10 + addl %esi,%eax + xorl %edx,%edi movdqa %xmm10,48(%rsp) - addl %ebx,%eax rorl $7,%ecx - addl %esi,%eax + addl %ebx,%eax addl 4(%rsp),%ebp - xorl %edx,%edi + xorl %ecx,%edi movl %eax,%esi roll $5,%eax - xorl %ecx,%edi - addl %eax,%ebp - rorl $7,%ebx addl %edi,%ebp - addl 8(%rsp),%edx xorl %ecx,%esi + rorl $7,%ebx + addl %eax,%ebp + addl 8(%rsp),%edx + xorl %ebx,%esi movl %ebp,%edi roll $5,%ebp - xorl %ebx,%esi - addl %ebp,%edx - rorl $7,%eax addl %esi,%edx - addl 12(%rsp),%ecx xorl %ebx,%edi + rorl $7,%eax + addl %ebp,%edx + addl 12(%rsp),%ecx + xorl %eax,%edi movl %edx,%esi roll $5,%edx - xorl %eax,%edi - addl %edx,%ecx - rorl $7,%ebp addl %edi,%ecx + xorl %eax,%esi + rorl $7,%ebp + addl %edx,%ecx cmpq %r10,%r9 je L$done_ssse3 movdqa 64(%r11),%xmm6 - movdqa 0(%r11),%xmm9 + movdqa -64(%r11),%xmm9 movdqu 0(%r9),%xmm0 movdqu 16(%r9),%xmm1 movdqu 32(%r9),%xmm2 @@ -2240,113 +2343,112 @@ L$oop_ssse3: .byte 102,15,56,0,198 addq $64,%r9 addl 16(%rsp),%ebx - xorl %eax,%esi -.byte 102,15,56,0,206 + xorl %ebp,%esi movl %ecx,%edi +.byte 102,15,56,0,206 roll $5,%ecx + addl %esi,%ebx + xorl %ebp,%edi + rorl $7,%edx paddd %xmm9,%xmm0 - xorl %ebp,%esi addl %ecx,%ebx - rorl $7,%edx - addl %esi,%ebx - movdqa %xmm0,0(%rsp) addl 20(%rsp),%eax - xorl %ebp,%edi - psubd %xmm9,%xmm0 + xorl %edx,%edi movl %ebx,%esi + movdqa %xmm0,0(%rsp) roll $5,%ebx - xorl %edx,%edi - addl %ebx,%eax - rorl $7,%ecx addl %edi,%eax - addl 24(%rsp),%ebp xorl %edx,%esi + rorl $7,%ecx + psubd %xmm9,%xmm0 + addl %ebx,%eax + addl 24(%rsp),%ebp + xorl %ecx,%esi movl %eax,%edi roll $5,%eax - xorl %ecx,%esi - addl %eax,%ebp - rorl $7,%ebx addl %esi,%ebp - addl 28(%rsp),%edx xorl %ecx,%edi + rorl $7,%ebx + addl %eax,%ebp + addl 28(%rsp),%edx + xorl %ebx,%edi movl %ebp,%esi roll $5,%ebp - xorl %ebx,%edi - addl %ebp,%edx - rorl $7,%eax addl %edi,%edx - addl 32(%rsp),%ecx xorl %ebx,%esi -.byte 102,15,56,0,214 - movl %edx,%edi - roll $5,%edx - paddd %xmm9,%xmm1 + rorl $7,%eax + addl %ebp,%edx + addl 32(%rsp),%ecx xorl %eax,%esi - addl %edx,%ecx - rorl $7,%ebp + movl %edx,%edi +.byte 102,15,56,0,214 + roll $5,%edx addl %esi,%ecx - movdqa %xmm1,16(%rsp) - addl 36(%rsp),%ebx xorl %eax,%edi - psubd %xmm9,%xmm1 + rorl $7,%ebp + paddd %xmm9,%xmm1 + addl %edx,%ecx + addl 36(%rsp),%ebx + xorl %ebp,%edi movl %ecx,%esi + movdqa %xmm1,16(%rsp) roll $5,%ecx - xorl %ebp,%edi - addl %ecx,%ebx - rorl $7,%edx addl %edi,%ebx - addl 40(%rsp),%eax xorl %ebp,%esi + rorl $7,%edx + psubd %xmm9,%xmm1 + addl %ecx,%ebx + addl 40(%rsp),%eax + xorl %edx,%esi movl %ebx,%edi roll $5,%ebx - xorl %edx,%esi - addl %ebx,%eax - rorl $7,%ecx addl %esi,%eax - addl 44(%rsp),%ebp xorl %edx,%edi + rorl $7,%ecx + addl %ebx,%eax + addl 44(%rsp),%ebp + xorl %ecx,%edi movl %eax,%esi roll $5,%eax - xorl %ecx,%edi - addl %eax,%ebp - rorl $7,%ebx addl %edi,%ebp - addl 48(%rsp),%edx xorl %ecx,%esi -.byte 102,15,56,0,222 + rorl $7,%ebx + addl %eax,%ebp + addl 48(%rsp),%edx + xorl %ebx,%esi movl %ebp,%edi +.byte 102,15,56,0,222 roll $5,%ebp + addl %esi,%edx + xorl %ebx,%edi + rorl $7,%eax paddd %xmm9,%xmm2 - xorl %ebx,%esi addl %ebp,%edx - rorl $7,%eax - addl %esi,%edx - movdqa %xmm2,32(%rsp) addl 52(%rsp),%ecx - xorl %ebx,%edi - psubd %xmm9,%xmm2 + xorl %eax,%edi movl %edx,%esi + movdqa %xmm2,32(%rsp) roll $5,%edx - xorl %eax,%edi - addl %edx,%ecx - rorl $7,%ebp addl %edi,%ecx - addl 56(%rsp),%ebx xorl %eax,%esi + rorl $7,%ebp + psubd %xmm9,%xmm2 + addl %edx,%ecx + addl 56(%rsp),%ebx + xorl %ebp,%esi movl %ecx,%edi roll $5,%ecx - xorl %ebp,%esi - addl %ecx,%ebx - rorl $7,%edx addl %esi,%ebx - addl 60(%rsp),%eax xorl %ebp,%edi + rorl $7,%edx + addl %ecx,%ebx + addl 60(%rsp),%eax + xorl %edx,%edi movl %ebx,%esi roll $5,%ebx - xorl %edx,%edi - addl %ebx,%eax - rorl $7,%ecx addl %edi,%eax + rorl $7,%ecx + addl %ebx,%eax addl 0(%r8),%eax addl 4(%r8),%esi addl 8(%r8),%ecx @@ -2356,108 +2458,110 @@ L$oop_ssse3: movl %esi,4(%r8) movl %esi,%ebx movl %ecx,8(%r8) + movl %ecx,%edi movl %edx,12(%r8) + xorl %edx,%edi movl %ebp,16(%r8) + andl %edi,%esi jmp L$oop_ssse3 .p2align 4 L$done_ssse3: addl 16(%rsp),%ebx - xorl %eax,%esi + xorl %ebp,%esi movl %ecx,%edi roll $5,%ecx - xorl %ebp,%esi - addl %ecx,%ebx - rorl $7,%edx addl %esi,%ebx - addl 20(%rsp),%eax xorl %ebp,%edi + rorl $7,%edx + addl %ecx,%ebx + addl 20(%rsp),%eax + xorl %edx,%edi movl %ebx,%esi roll $5,%ebx - xorl %edx,%edi - addl %ebx,%eax - rorl $7,%ecx addl %edi,%eax - addl 24(%rsp),%ebp xorl %edx,%esi + rorl $7,%ecx + addl %ebx,%eax + addl 24(%rsp),%ebp + xorl %ecx,%esi movl %eax,%edi roll $5,%eax - xorl %ecx,%esi - addl %eax,%ebp - rorl $7,%ebx addl %esi,%ebp - addl 28(%rsp),%edx xorl %ecx,%edi + rorl $7,%ebx + addl %eax,%ebp + addl 28(%rsp),%edx + xorl %ebx,%edi movl %ebp,%esi roll $5,%ebp - xorl %ebx,%edi - addl %ebp,%edx - rorl $7,%eax addl %edi,%edx - addl 32(%rsp),%ecx xorl %ebx,%esi + rorl $7,%eax + addl %ebp,%edx + addl 32(%rsp),%ecx + xorl %eax,%esi movl %edx,%edi roll $5,%edx - xorl %eax,%esi - addl %edx,%ecx - rorl $7,%ebp addl %esi,%ecx - addl 36(%rsp),%ebx xorl %eax,%edi + rorl $7,%ebp + addl %edx,%ecx + addl 36(%rsp),%ebx + xorl %ebp,%edi movl %ecx,%esi roll $5,%ecx - xorl %ebp,%edi - addl %ecx,%ebx - rorl $7,%edx addl %edi,%ebx - addl 40(%rsp),%eax xorl %ebp,%esi + rorl $7,%edx + addl %ecx,%ebx + addl 40(%rsp),%eax + xorl %edx,%esi movl %ebx,%edi roll $5,%ebx - xorl %edx,%esi - addl %ebx,%eax - rorl $7,%ecx addl %esi,%eax - addl 44(%rsp),%ebp xorl %edx,%edi + rorl $7,%ecx + addl %ebx,%eax + addl 44(%rsp),%ebp + xorl %ecx,%edi movl %eax,%esi roll $5,%eax - xorl %ecx,%edi - addl %eax,%ebp - rorl $7,%ebx addl %edi,%ebp - addl 48(%rsp),%edx xorl %ecx,%esi + rorl $7,%ebx + addl %eax,%ebp + addl 48(%rsp),%edx + xorl %ebx,%esi movl %ebp,%edi roll $5,%ebp - xorl %ebx,%esi - addl %ebp,%edx - rorl $7,%eax addl %esi,%edx - addl 52(%rsp),%ecx xorl %ebx,%edi + rorl $7,%eax + addl %ebp,%edx + addl 52(%rsp),%ecx + xorl %eax,%edi movl %edx,%esi roll $5,%edx - xorl %eax,%edi - addl %edx,%ecx - rorl $7,%ebp addl %edi,%ecx - addl 56(%rsp),%ebx xorl %eax,%esi + rorl $7,%ebp + addl %edx,%ecx + addl 56(%rsp),%ebx + xorl %ebp,%esi movl %ecx,%edi roll $5,%ecx - xorl %ebp,%esi - addl %ecx,%ebx - rorl $7,%edx addl %esi,%ebx - addl 60(%rsp),%eax xorl %ebp,%edi + rorl $7,%edx + addl %ecx,%ebx + addl 60(%rsp),%eax + xorl %edx,%edi movl %ebx,%esi roll $5,%ebx - xorl %edx,%edi - addl %ebx,%eax - rorl $7,%ecx addl %edi,%eax + rorl $7,%ecx + addl %ebx,%eax addl 0(%r8),%eax addl 4(%r8),%esi addl 8(%r8),%ecx @@ -2468,25 +2572,2825 @@ L$done_ssse3: movl %ecx,8(%r8) movl %edx,12(%r8) movl %ebp,16(%r8) - leaq 64(%rsp),%rsi - movq 0(%rsi),%r12 - movq 8(%rsi),%rbp - movq 16(%rsi),%rbx - leaq 24(%rsi),%rsp + leaq (%r14),%rsi + movq -40(%rsi),%r14 + movq -32(%rsi),%r13 + movq -24(%rsi),%r12 + movq -16(%rsi),%rbp + movq -8(%rsi),%rbx + leaq (%rsi),%rsp L$epilogue_ssse3: .byte 0xf3,0xc3 -.p2align 6 -K_XX_XX: -.long 0x5a827999,0x5a827999,0x5a827999,0x5a827999 - -.long 0x6ed9eba1,0x6ed9eba1,0x6ed9eba1,0x6ed9eba1 -.long 0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc +.p2align 4 +sha1_block_data_order_avx: +_avx_shortcut: + movq %rsp,%rax + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + leaq -64(%rsp),%rsp + vzeroupper + movq %rax,%r14 + andq $-64,%rsp + movq %rdi,%r8 + movq %rsi,%r9 + movq %rdx,%r10 -.long 0xca62c1d6,0xca62c1d6,0xca62c1d6,0xca62c1d6 + shlq $6,%r10 + addq %r9,%r10 + leaq K_XX_XX+64(%rip),%r11 -.long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f + movl 0(%r8),%eax + movl 4(%r8),%ebx + movl 8(%r8),%ecx + movl 12(%r8),%edx + movl %ebx,%esi + movl 16(%r8),%ebp + movl %ecx,%edi + xorl %edx,%edi + andl %edi,%esi + vmovdqa 64(%r11),%xmm6 + vmovdqa -64(%r11),%xmm11 + vmovdqu 0(%r9),%xmm0 + vmovdqu 16(%r9),%xmm1 + vmovdqu 32(%r9),%xmm2 + vmovdqu 48(%r9),%xmm3 + vpshufb %xmm6,%xmm0,%xmm0 + addq $64,%r9 + vpshufb %xmm6,%xmm1,%xmm1 + vpshufb %xmm6,%xmm2,%xmm2 + vpshufb %xmm6,%xmm3,%xmm3 + vpaddd %xmm11,%xmm0,%xmm4 + vpaddd %xmm11,%xmm1,%xmm5 + vpaddd %xmm11,%xmm2,%xmm6 + vmovdqa %xmm4,0(%rsp) + vmovdqa %xmm5,16(%rsp) + vmovdqa %xmm6,32(%rsp) + jmp L$oop_avx +.p2align 4 +L$oop_avx: + shrdl $2,%ebx,%ebx + xorl %edx,%esi + vpalignr $8,%xmm0,%xmm1,%xmm4 + movl %eax,%edi + addl 0(%rsp),%ebp + vpaddd %xmm3,%xmm11,%xmm9 + xorl %ecx,%ebx + shldl $5,%eax,%eax + vpsrldq $4,%xmm3,%xmm8 + addl %esi,%ebp + andl %ebx,%edi + vpxor %xmm0,%xmm4,%xmm4 + xorl %ecx,%ebx + addl %eax,%ebp + vpxor %xmm2,%xmm8,%xmm8 + shrdl $7,%eax,%eax + xorl %ecx,%edi + movl %ebp,%esi + addl 4(%rsp),%edx + vpxor %xmm8,%xmm4,%xmm4 + xorl %ebx,%eax + shldl $5,%ebp,%ebp + vmovdqa %xmm9,48(%rsp) + addl %edi,%edx + andl %eax,%esi + vpsrld $31,%xmm4,%xmm8 + xorl %ebx,%eax + addl %ebp,%edx + shrdl $7,%ebp,%ebp + xorl %ebx,%esi + vpslldq $12,%xmm4,%xmm10 + vpaddd %xmm4,%xmm4,%xmm4 + movl %edx,%edi + addl 8(%rsp),%ecx + xorl %eax,%ebp + shldl $5,%edx,%edx + vpsrld $30,%xmm10,%xmm9 + vpor %xmm8,%xmm4,%xmm4 + addl %esi,%ecx + andl %ebp,%edi + xorl %eax,%ebp + addl %edx,%ecx + vpslld $2,%xmm10,%xmm10 + vpxor %xmm9,%xmm4,%xmm4 + shrdl $7,%edx,%edx + xorl %eax,%edi + movl %ecx,%esi + addl 12(%rsp),%ebx + vpxor %xmm10,%xmm4,%xmm4 + xorl %ebp,%edx + shldl $5,%ecx,%ecx + addl %edi,%ebx + andl %edx,%esi + xorl %ebp,%edx + addl %ecx,%ebx + shrdl $7,%ecx,%ecx + xorl %ebp,%esi + vpalignr $8,%xmm1,%xmm2,%xmm5 + movl %ebx,%edi + addl 16(%rsp),%eax + vpaddd %xmm4,%xmm11,%xmm9 + xorl %edx,%ecx + shldl $5,%ebx,%ebx + vpsrldq $4,%xmm4,%xmm8 + addl %esi,%eax + andl %ecx,%edi + vpxor %xmm1,%xmm5,%xmm5 + xorl %edx,%ecx + addl %ebx,%eax + vpxor %xmm3,%xmm8,%xmm8 + shrdl $7,%ebx,%ebx + xorl %edx,%edi + movl %eax,%esi + addl 20(%rsp),%ebp + vpxor %xmm8,%xmm5,%xmm5 + xorl %ecx,%ebx + shldl $5,%eax,%eax + vmovdqa %xmm9,0(%rsp) + addl %edi,%ebp + andl %ebx,%esi + vpsrld $31,%xmm5,%xmm8 + xorl %ecx,%ebx + addl %eax,%ebp + shrdl $7,%eax,%eax + xorl %ecx,%esi + vpslldq $12,%xmm5,%xmm10 + vpaddd %xmm5,%xmm5,%xmm5 + movl %ebp,%edi + addl 24(%rsp),%edx + xorl %ebx,%eax + shldl $5,%ebp,%ebp + vpsrld $30,%xmm10,%xmm9 + vpor %xmm8,%xmm5,%xmm5 + addl %esi,%edx + andl %eax,%edi + xorl %ebx,%eax + addl %ebp,%edx + vpslld $2,%xmm10,%xmm10 + vpxor %xmm9,%xmm5,%xmm5 + shrdl $7,%ebp,%ebp + xorl %ebx,%edi + movl %edx,%esi + addl 28(%rsp),%ecx + vpxor %xmm10,%xmm5,%xmm5 + xorl %eax,%ebp + shldl $5,%edx,%edx + vmovdqa -32(%r11),%xmm11 + addl %edi,%ecx + andl %ebp,%esi + xorl %eax,%ebp + addl %edx,%ecx + shrdl $7,%edx,%edx + xorl %eax,%esi + vpalignr $8,%xmm2,%xmm3,%xmm6 + movl %ecx,%edi + addl 32(%rsp),%ebx + vpaddd %xmm5,%xmm11,%xmm9 + xorl %ebp,%edx + shldl $5,%ecx,%ecx + vpsrldq $4,%xmm5,%xmm8 + addl %esi,%ebx + andl %edx,%edi + vpxor %xmm2,%xmm6,%xmm6 + xorl %ebp,%edx + addl %ecx,%ebx + vpxor %xmm4,%xmm8,%xmm8 + shrdl $7,%ecx,%ecx + xorl %ebp,%edi + movl %ebx,%esi + addl 36(%rsp),%eax + vpxor %xmm8,%xmm6,%xmm6 + xorl %edx,%ecx + shldl $5,%ebx,%ebx + vmovdqa %xmm9,16(%rsp) + addl %edi,%eax + andl %ecx,%esi + vpsrld $31,%xmm6,%xmm8 + xorl %edx,%ecx + addl %ebx,%eax + shrdl $7,%ebx,%ebx + xorl %edx,%esi + vpslldq $12,%xmm6,%xmm10 + vpaddd %xmm6,%xmm6,%xmm6 + movl %eax,%edi + addl 40(%rsp),%ebp + xorl %ecx,%ebx + shldl $5,%eax,%eax + vpsrld $30,%xmm10,%xmm9 + vpor %xmm8,%xmm6,%xmm6 + addl %esi,%ebp + andl %ebx,%edi + xorl %ecx,%ebx + addl %eax,%ebp + vpslld $2,%xmm10,%xmm10 + vpxor %xmm9,%xmm6,%xmm6 + shrdl $7,%eax,%eax + xorl %ecx,%edi + movl %ebp,%esi + addl 44(%rsp),%edx + vpxor %xmm10,%xmm6,%xmm6 + xorl %ebx,%eax + shldl $5,%ebp,%ebp + addl %edi,%edx + andl %eax,%esi + xorl %ebx,%eax + addl %ebp,%edx + shrdl $7,%ebp,%ebp + xorl %ebx,%esi + vpalignr $8,%xmm3,%xmm4,%xmm7 + movl %edx,%edi + addl 48(%rsp),%ecx + vpaddd %xmm6,%xmm11,%xmm9 + xorl %eax,%ebp + shldl $5,%edx,%edx + vpsrldq $4,%xmm6,%xmm8 + addl %esi,%ecx + andl %ebp,%edi + vpxor %xmm3,%xmm7,%xmm7 + xorl %eax,%ebp + addl %edx,%ecx + vpxor %xmm5,%xmm8,%xmm8 + shrdl $7,%edx,%edx + xorl %eax,%edi + movl %ecx,%esi + addl 52(%rsp),%ebx + vpxor %xmm8,%xmm7,%xmm7 + xorl %ebp,%edx + shldl $5,%ecx,%ecx + vmovdqa %xmm9,32(%rsp) + addl %edi,%ebx + andl %edx,%esi + vpsrld $31,%xmm7,%xmm8 + xorl %ebp,%edx + addl %ecx,%ebx + shrdl $7,%ecx,%ecx + xorl %ebp,%esi + vpslldq $12,%xmm7,%xmm10 + vpaddd %xmm7,%xmm7,%xmm7 + movl %ebx,%edi + addl 56(%rsp),%eax + xorl %edx,%ecx + shldl $5,%ebx,%ebx + vpsrld $30,%xmm10,%xmm9 + vpor %xmm8,%xmm7,%xmm7 + addl %esi,%eax + andl %ecx,%edi + xorl %edx,%ecx + addl %ebx,%eax + vpslld $2,%xmm10,%xmm10 + vpxor %xmm9,%xmm7,%xmm7 + shrdl $7,%ebx,%ebx + xorl %edx,%edi + movl %eax,%esi + addl 60(%rsp),%ebp + vpxor %xmm10,%xmm7,%xmm7 + xorl %ecx,%ebx + shldl $5,%eax,%eax + addl %edi,%ebp + andl %ebx,%esi + xorl %ecx,%ebx + addl %eax,%ebp + vpalignr $8,%xmm6,%xmm7,%xmm8 + vpxor %xmm4,%xmm0,%xmm0 + shrdl $7,%eax,%eax + xorl %ecx,%esi + movl %ebp,%edi + addl 0(%rsp),%edx + vpxor %xmm1,%xmm0,%xmm0 + xorl %ebx,%eax + shldl $5,%ebp,%ebp + vpaddd %xmm7,%xmm11,%xmm9 + addl %esi,%edx + andl %eax,%edi + vpxor %xmm8,%xmm0,%xmm0 + xorl %ebx,%eax + addl %ebp,%edx + shrdl $7,%ebp,%ebp + xorl %ebx,%edi + vpsrld $30,%xmm0,%xmm8 + vmovdqa %xmm9,48(%rsp) + movl %edx,%esi + addl 4(%rsp),%ecx + xorl %eax,%ebp + shldl $5,%edx,%edx + vpslld $2,%xmm0,%xmm0 + addl %edi,%ecx + andl %ebp,%esi + xorl %eax,%ebp + addl %edx,%ecx + shrdl $7,%edx,%edx + xorl %eax,%esi + movl %ecx,%edi + addl 8(%rsp),%ebx + vpor %xmm8,%xmm0,%xmm0 + xorl %ebp,%edx + shldl $5,%ecx,%ecx + addl %esi,%ebx + andl %edx,%edi + xorl %ebp,%edx + addl %ecx,%ebx + addl 12(%rsp),%eax + xorl %ebp,%edi + movl %ebx,%esi + shldl $5,%ebx,%ebx + addl %edi,%eax + xorl %edx,%esi + shrdl $7,%ecx,%ecx + addl %ebx,%eax + vpalignr $8,%xmm7,%xmm0,%xmm8 + vpxor %xmm5,%xmm1,%xmm1 + addl 16(%rsp),%ebp + xorl %ecx,%esi + movl %eax,%edi + shldl $5,%eax,%eax + vpxor %xmm2,%xmm1,%xmm1 + addl %esi,%ebp + xorl %ecx,%edi + vpaddd %xmm0,%xmm11,%xmm9 + shrdl $7,%ebx,%ebx + addl %eax,%ebp + vpxor %xmm8,%xmm1,%xmm1 + addl 20(%rsp),%edx + xorl %ebx,%edi + movl %ebp,%esi + shldl $5,%ebp,%ebp + vpsrld $30,%xmm1,%xmm8 + vmovdqa %xmm9,0(%rsp) + addl %edi,%edx + xorl %ebx,%esi + shrdl $7,%eax,%eax + addl %ebp,%edx + vpslld $2,%xmm1,%xmm1 + addl 24(%rsp),%ecx + xorl %eax,%esi + movl %edx,%edi + shldl $5,%edx,%edx + addl %esi,%ecx + xorl %eax,%edi + shrdl $7,%ebp,%ebp + addl %edx,%ecx + vpor %xmm8,%xmm1,%xmm1 + addl 28(%rsp),%ebx + xorl %ebp,%edi + movl %ecx,%esi + shldl $5,%ecx,%ecx + addl %edi,%ebx + xorl %ebp,%esi + shrdl $7,%edx,%edx + addl %ecx,%ebx + vpalignr $8,%xmm0,%xmm1,%xmm8 + vpxor %xmm6,%xmm2,%xmm2 + addl 32(%rsp),%eax + xorl %edx,%esi + movl %ebx,%edi + shldl $5,%ebx,%ebx + vpxor %xmm3,%xmm2,%xmm2 + addl %esi,%eax + xorl %edx,%edi + vpaddd %xmm1,%xmm11,%xmm9 + vmovdqa 0(%r11),%xmm11 + shrdl $7,%ecx,%ecx + addl %ebx,%eax + vpxor %xmm8,%xmm2,%xmm2 + addl 36(%rsp),%ebp + xorl %ecx,%edi + movl %eax,%esi + shldl $5,%eax,%eax + vpsrld $30,%xmm2,%xmm8 + vmovdqa %xmm9,16(%rsp) + addl %edi,%ebp + xorl %ecx,%esi + shrdl $7,%ebx,%ebx + addl %eax,%ebp + vpslld $2,%xmm2,%xmm2 + addl 40(%rsp),%edx + xorl %ebx,%esi + movl %ebp,%edi + shldl $5,%ebp,%ebp + addl %esi,%edx + xorl %ebx,%edi + shrdl $7,%eax,%eax + addl %ebp,%edx + vpor %xmm8,%xmm2,%xmm2 + addl 44(%rsp),%ecx + xorl %eax,%edi + movl %edx,%esi + shldl $5,%edx,%edx + addl %edi,%ecx + xorl %eax,%esi + shrdl $7,%ebp,%ebp + addl %edx,%ecx + vpalignr $8,%xmm1,%xmm2,%xmm8 + vpxor %xmm7,%xmm3,%xmm3 + addl 48(%rsp),%ebx + xorl %ebp,%esi + movl %ecx,%edi + shldl $5,%ecx,%ecx + vpxor %xmm4,%xmm3,%xmm3 + addl %esi,%ebx + xorl %ebp,%edi + vpaddd %xmm2,%xmm11,%xmm9 + shrdl $7,%edx,%edx + addl %ecx,%ebx + vpxor %xmm8,%xmm3,%xmm3 + addl 52(%rsp),%eax + xorl %edx,%edi + movl %ebx,%esi + shldl $5,%ebx,%ebx + vpsrld $30,%xmm3,%xmm8 + vmovdqa %xmm9,32(%rsp) + addl %edi,%eax + xorl %edx,%esi + shrdl $7,%ecx,%ecx + addl %ebx,%eax + vpslld $2,%xmm3,%xmm3 + addl 56(%rsp),%ebp + xorl %ecx,%esi + movl %eax,%edi + shldl $5,%eax,%eax + addl %esi,%ebp + xorl %ecx,%edi + shrdl $7,%ebx,%ebx + addl %eax,%ebp + vpor %xmm8,%xmm3,%xmm3 + addl 60(%rsp),%edx + xorl %ebx,%edi + movl %ebp,%esi + shldl $5,%ebp,%ebp + addl %edi,%edx + xorl %ebx,%esi + shrdl $7,%eax,%eax + addl %ebp,%edx + vpalignr $8,%xmm2,%xmm3,%xmm8 + vpxor %xmm0,%xmm4,%xmm4 + addl 0(%rsp),%ecx + xorl %eax,%esi + movl %edx,%edi + shldl $5,%edx,%edx + vpxor %xmm5,%xmm4,%xmm4 + addl %esi,%ecx + xorl %eax,%edi + vpaddd %xmm3,%xmm11,%xmm9 + shrdl $7,%ebp,%ebp + addl %edx,%ecx + vpxor %xmm8,%xmm4,%xmm4 + addl 4(%rsp),%ebx + xorl %ebp,%edi + movl %ecx,%esi + shldl $5,%ecx,%ecx + vpsrld $30,%xmm4,%xmm8 + vmovdqa %xmm9,48(%rsp) + addl %edi,%ebx + xorl %ebp,%esi + shrdl $7,%edx,%edx + addl %ecx,%ebx + vpslld $2,%xmm4,%xmm4 + addl 8(%rsp),%eax + xorl %edx,%esi + movl %ebx,%edi + shldl $5,%ebx,%ebx + addl %esi,%eax + xorl %edx,%edi + shrdl $7,%ecx,%ecx + addl %ebx,%eax + vpor %xmm8,%xmm4,%xmm4 + addl 12(%rsp),%ebp + xorl %ecx,%edi + movl %eax,%esi + shldl $5,%eax,%eax + addl %edi,%ebp + xorl %ecx,%esi + shrdl $7,%ebx,%ebx + addl %eax,%ebp + vpalignr $8,%xmm3,%xmm4,%xmm8 + vpxor %xmm1,%xmm5,%xmm5 + addl 16(%rsp),%edx + xorl %ebx,%esi + movl %ebp,%edi + shldl $5,%ebp,%ebp + vpxor %xmm6,%xmm5,%xmm5 + addl %esi,%edx + xorl %ebx,%edi + vpaddd %xmm4,%xmm11,%xmm9 + shrdl $7,%eax,%eax + addl %ebp,%edx + vpxor %xmm8,%xmm5,%xmm5 + addl 20(%rsp),%ecx + xorl %eax,%edi + movl %edx,%esi + shldl $5,%edx,%edx + vpsrld $30,%xmm5,%xmm8 + vmovdqa %xmm9,0(%rsp) + addl %edi,%ecx + xorl %eax,%esi + shrdl $7,%ebp,%ebp + addl %edx,%ecx + vpslld $2,%xmm5,%xmm5 + addl 24(%rsp),%ebx + xorl %ebp,%esi + movl %ecx,%edi + shldl $5,%ecx,%ecx + addl %esi,%ebx + xorl %ebp,%edi + shrdl $7,%edx,%edx + addl %ecx,%ebx + vpor %xmm8,%xmm5,%xmm5 + addl 28(%rsp),%eax + shrdl $7,%ecx,%ecx + movl %ebx,%esi + xorl %edx,%edi + shldl $5,%ebx,%ebx + addl %edi,%eax + xorl %ecx,%esi + xorl %edx,%ecx + addl %ebx,%eax + vpalignr $8,%xmm4,%xmm5,%xmm8 + vpxor %xmm2,%xmm6,%xmm6 + addl 32(%rsp),%ebp + andl %ecx,%esi + xorl %edx,%ecx + shrdl $7,%ebx,%ebx + vpxor %xmm7,%xmm6,%xmm6 + movl %eax,%edi + xorl %ecx,%esi + vpaddd %xmm5,%xmm11,%xmm9 + shldl $5,%eax,%eax + addl %esi,%ebp + vpxor %xmm8,%xmm6,%xmm6 + xorl %ebx,%edi + xorl %ecx,%ebx + addl %eax,%ebp + addl 36(%rsp),%edx + vpsrld $30,%xmm6,%xmm8 + vmovdqa %xmm9,16(%rsp) + andl %ebx,%edi + xorl %ecx,%ebx + shrdl $7,%eax,%eax + movl %ebp,%esi + vpslld $2,%xmm6,%xmm6 + xorl %ebx,%edi + shldl $5,%ebp,%ebp + addl %edi,%edx + xorl %eax,%esi + xorl %ebx,%eax + addl %ebp,%edx + addl 40(%rsp),%ecx + andl %eax,%esi + vpor %xmm8,%xmm6,%xmm6 + xorl %ebx,%eax + shrdl $7,%ebp,%ebp + movl %edx,%edi + xorl %eax,%esi + shldl $5,%edx,%edx + addl %esi,%ecx + xorl %ebp,%edi + xorl %eax,%ebp + addl %edx,%ecx + addl 44(%rsp),%ebx + andl %ebp,%edi + xorl %eax,%ebp + shrdl $7,%edx,%edx + movl %ecx,%esi + xorl %ebp,%edi + shldl $5,%ecx,%ecx + addl %edi,%ebx + xorl %edx,%esi + xorl %ebp,%edx + addl %ecx,%ebx + vpalignr $8,%xmm5,%xmm6,%xmm8 + vpxor %xmm3,%xmm7,%xmm7 + addl 48(%rsp),%eax + andl %edx,%esi + xorl %ebp,%edx + shrdl $7,%ecx,%ecx + vpxor %xmm0,%xmm7,%xmm7 + movl %ebx,%edi + xorl %edx,%esi + vpaddd %xmm6,%xmm11,%xmm9 + vmovdqa 32(%r11),%xmm11 + shldl $5,%ebx,%ebx + addl %esi,%eax + vpxor %xmm8,%xmm7,%xmm7 + xorl %ecx,%edi + xorl %edx,%ecx + addl %ebx,%eax + addl 52(%rsp),%ebp + vpsrld $30,%xmm7,%xmm8 + vmovdqa %xmm9,32(%rsp) + andl %ecx,%edi + xorl %edx,%ecx + shrdl $7,%ebx,%ebx + movl %eax,%esi + vpslld $2,%xmm7,%xmm7 + xorl %ecx,%edi + shldl $5,%eax,%eax + addl %edi,%ebp + xorl %ebx,%esi + xorl %ecx,%ebx + addl %eax,%ebp + addl 56(%rsp),%edx + andl %ebx,%esi + vpor %xmm8,%xmm7,%xmm7 + xorl %ecx,%ebx + shrdl $7,%eax,%eax + movl %ebp,%edi + xorl %ebx,%esi + shldl $5,%ebp,%ebp + addl %esi,%edx + xorl %eax,%edi + xorl %ebx,%eax + addl %ebp,%edx + addl 60(%rsp),%ecx + andl %eax,%edi + xorl %ebx,%eax + shrdl $7,%ebp,%ebp + movl %edx,%esi + xorl %eax,%edi + shldl $5,%edx,%edx + addl %edi,%ecx + xorl %ebp,%esi + xorl %eax,%ebp + addl %edx,%ecx + vpalignr $8,%xmm6,%xmm7,%xmm8 + vpxor %xmm4,%xmm0,%xmm0 + addl 0(%rsp),%ebx + andl %ebp,%esi + xorl %eax,%ebp + shrdl $7,%edx,%edx + vpxor %xmm1,%xmm0,%xmm0 + movl %ecx,%edi + xorl %ebp,%esi + vpaddd %xmm7,%xmm11,%xmm9 + shldl $5,%ecx,%ecx + addl %esi,%ebx + vpxor %xmm8,%xmm0,%xmm0 + xorl %edx,%edi + xorl %ebp,%edx + addl %ecx,%ebx + addl 4(%rsp),%eax + vpsrld $30,%xmm0,%xmm8 + vmovdqa %xmm9,48(%rsp) + andl %edx,%edi + xorl %ebp,%edx + shrdl $7,%ecx,%ecx + movl %ebx,%esi + vpslld $2,%xmm0,%xmm0 + xorl %edx,%edi + shldl $5,%ebx,%ebx + addl %edi,%eax + xorl %ecx,%esi + xorl %edx,%ecx + addl %ebx,%eax + addl 8(%rsp),%ebp + andl %ecx,%esi + vpor %xmm8,%xmm0,%xmm0 + xorl %edx,%ecx + shrdl $7,%ebx,%ebx + movl %eax,%edi + xorl %ecx,%esi + shldl $5,%eax,%eax + addl %esi,%ebp + xorl %ebx,%edi + xorl %ecx,%ebx + addl %eax,%ebp + addl 12(%rsp),%edx + andl %ebx,%edi + xorl %ecx,%ebx + shrdl $7,%eax,%eax + movl %ebp,%esi + xorl %ebx,%edi + shldl $5,%ebp,%ebp + addl %edi,%edx + xorl %eax,%esi + xorl %ebx,%eax + addl %ebp,%edx + vpalignr $8,%xmm7,%xmm0,%xmm8 + vpxor %xmm5,%xmm1,%xmm1 + addl 16(%rsp),%ecx + andl %eax,%esi + xorl %ebx,%eax + shrdl $7,%ebp,%ebp + vpxor %xmm2,%xmm1,%xmm1 + movl %edx,%edi + xorl %eax,%esi + vpaddd %xmm0,%xmm11,%xmm9 + shldl $5,%edx,%edx + addl %esi,%ecx + vpxor %xmm8,%xmm1,%xmm1 + xorl %ebp,%edi + xorl %eax,%ebp + addl %edx,%ecx + addl 20(%rsp),%ebx + vpsrld $30,%xmm1,%xmm8 + vmovdqa %xmm9,0(%rsp) + andl %ebp,%edi + xorl %eax,%ebp + shrdl $7,%edx,%edx + movl %ecx,%esi + vpslld $2,%xmm1,%xmm1 + xorl %ebp,%edi + shldl $5,%ecx,%ecx + addl %edi,%ebx + xorl %edx,%esi + xorl %ebp,%edx + addl %ecx,%ebx + addl 24(%rsp),%eax + andl %edx,%esi + vpor %xmm8,%xmm1,%xmm1 + xorl %ebp,%edx + shrdl $7,%ecx,%ecx + movl %ebx,%edi + xorl %edx,%esi + shldl $5,%ebx,%ebx + addl %esi,%eax + xorl %ecx,%edi + xorl %edx,%ecx + addl %ebx,%eax + addl 28(%rsp),%ebp + andl %ecx,%edi + xorl %edx,%ecx + shrdl $7,%ebx,%ebx + movl %eax,%esi + xorl %ecx,%edi + shldl $5,%eax,%eax + addl %edi,%ebp + xorl %ebx,%esi + xorl %ecx,%ebx + addl %eax,%ebp + vpalignr $8,%xmm0,%xmm1,%xmm8 + vpxor %xmm6,%xmm2,%xmm2 + addl 32(%rsp),%edx + andl %ebx,%esi + xorl %ecx,%ebx + shrdl $7,%eax,%eax + vpxor %xmm3,%xmm2,%xmm2 + movl %ebp,%edi + xorl %ebx,%esi + vpaddd %xmm1,%xmm11,%xmm9 + shldl $5,%ebp,%ebp + addl %esi,%edx + vpxor %xmm8,%xmm2,%xmm2 + xorl %eax,%edi + xorl %ebx,%eax + addl %ebp,%edx + addl 36(%rsp),%ecx + vpsrld $30,%xmm2,%xmm8 + vmovdqa %xmm9,16(%rsp) + andl %eax,%edi + xorl %ebx,%eax + shrdl $7,%ebp,%ebp + movl %edx,%esi + vpslld $2,%xmm2,%xmm2 + xorl %eax,%edi + shldl $5,%edx,%edx + addl %edi,%ecx + xorl %ebp,%esi + xorl %eax,%ebp + addl %edx,%ecx + addl 40(%rsp),%ebx + andl %ebp,%esi + vpor %xmm8,%xmm2,%xmm2 + xorl %eax,%ebp + shrdl $7,%edx,%edx + movl %ecx,%edi + xorl %ebp,%esi + shldl $5,%ecx,%ecx + addl %esi,%ebx + xorl %edx,%edi + xorl %ebp,%edx + addl %ecx,%ebx + addl 44(%rsp),%eax + andl %edx,%edi + xorl %ebp,%edx + shrdl $7,%ecx,%ecx + movl %ebx,%esi + xorl %edx,%edi + shldl $5,%ebx,%ebx + addl %edi,%eax + xorl %edx,%esi + addl %ebx,%eax + vpalignr $8,%xmm1,%xmm2,%xmm8 + vpxor %xmm7,%xmm3,%xmm3 + addl 48(%rsp),%ebp + xorl %ecx,%esi + movl %eax,%edi + shldl $5,%eax,%eax + vpxor %xmm4,%xmm3,%xmm3 + addl %esi,%ebp + xorl %ecx,%edi + vpaddd %xmm2,%xmm11,%xmm9 + shrdl $7,%ebx,%ebx + addl %eax,%ebp + vpxor %xmm8,%xmm3,%xmm3 + addl 52(%rsp),%edx + xorl %ebx,%edi + movl %ebp,%esi + shldl $5,%ebp,%ebp + vpsrld $30,%xmm3,%xmm8 + vmovdqa %xmm9,32(%rsp) + addl %edi,%edx + xorl %ebx,%esi + shrdl $7,%eax,%eax + addl %ebp,%edx + vpslld $2,%xmm3,%xmm3 + addl 56(%rsp),%ecx + xorl %eax,%esi + movl %edx,%edi + shldl $5,%edx,%edx + addl %esi,%ecx + xorl %eax,%edi + shrdl $7,%ebp,%ebp + addl %edx,%ecx + vpor %xmm8,%xmm3,%xmm3 + addl 60(%rsp),%ebx + xorl %ebp,%edi + movl %ecx,%esi + shldl $5,%ecx,%ecx + addl %edi,%ebx + xorl %ebp,%esi + shrdl $7,%edx,%edx + addl %ecx,%ebx + addl 0(%rsp),%eax + vpaddd %xmm3,%xmm11,%xmm9 + xorl %edx,%esi + movl %ebx,%edi + shldl $5,%ebx,%ebx + addl %esi,%eax + vmovdqa %xmm9,48(%rsp) + xorl %edx,%edi + shrdl $7,%ecx,%ecx + addl %ebx,%eax + addl 4(%rsp),%ebp + xorl %ecx,%edi + movl %eax,%esi + shldl $5,%eax,%eax + addl %edi,%ebp + xorl %ecx,%esi + shrdl $7,%ebx,%ebx + addl %eax,%ebp + addl 8(%rsp),%edx + xorl %ebx,%esi + movl %ebp,%edi + shldl $5,%ebp,%ebp + addl %esi,%edx + xorl %ebx,%edi + shrdl $7,%eax,%eax + addl %ebp,%edx + addl 12(%rsp),%ecx + xorl %eax,%edi + movl %edx,%esi + shldl $5,%edx,%edx + addl %edi,%ecx + xorl %eax,%esi + shrdl $7,%ebp,%ebp + addl %edx,%ecx + cmpq %r10,%r9 + je L$done_avx + vmovdqa 64(%r11),%xmm6 + vmovdqa -64(%r11),%xmm11 + vmovdqu 0(%r9),%xmm0 + vmovdqu 16(%r9),%xmm1 + vmovdqu 32(%r9),%xmm2 + vmovdqu 48(%r9),%xmm3 + vpshufb %xmm6,%xmm0,%xmm0 + addq $64,%r9 + addl 16(%rsp),%ebx + xorl %ebp,%esi + vpshufb %xmm6,%xmm1,%xmm1 + movl %ecx,%edi + shldl $5,%ecx,%ecx + vpaddd %xmm11,%xmm0,%xmm4 + addl %esi,%ebx + xorl %ebp,%edi + shrdl $7,%edx,%edx + addl %ecx,%ebx + vmovdqa %xmm4,0(%rsp) + addl 20(%rsp),%eax + xorl %edx,%edi + movl %ebx,%esi + shldl $5,%ebx,%ebx + addl %edi,%eax + xorl %edx,%esi + shrdl $7,%ecx,%ecx + addl %ebx,%eax + addl 24(%rsp),%ebp + xorl %ecx,%esi + movl %eax,%edi + shldl $5,%eax,%eax + addl %esi,%ebp + xorl %ecx,%edi + shrdl $7,%ebx,%ebx + addl %eax,%ebp + addl 28(%rsp),%edx + xorl %ebx,%edi + movl %ebp,%esi + shldl $5,%ebp,%ebp + addl %edi,%edx + xorl %ebx,%esi + shrdl $7,%eax,%eax + addl %ebp,%edx + addl 32(%rsp),%ecx + xorl %eax,%esi + vpshufb %xmm6,%xmm2,%xmm2 + movl %edx,%edi + shldl $5,%edx,%edx + vpaddd %xmm11,%xmm1,%xmm5 + addl %esi,%ecx + xorl %eax,%edi + shrdl $7,%ebp,%ebp + addl %edx,%ecx + vmovdqa %xmm5,16(%rsp) + addl 36(%rsp),%ebx + xorl %ebp,%edi + movl %ecx,%esi + shldl $5,%ecx,%ecx + addl %edi,%ebx + xorl %ebp,%esi + shrdl $7,%edx,%edx + addl %ecx,%ebx + addl 40(%rsp),%eax + xorl %edx,%esi + movl %ebx,%edi + shldl $5,%ebx,%ebx + addl %esi,%eax + xorl %edx,%edi + shrdl $7,%ecx,%ecx + addl %ebx,%eax + addl 44(%rsp),%ebp + xorl %ecx,%edi + movl %eax,%esi + shldl $5,%eax,%eax + addl %edi,%ebp + xorl %ecx,%esi + shrdl $7,%ebx,%ebx + addl %eax,%ebp + addl 48(%rsp),%edx + xorl %ebx,%esi + vpshufb %xmm6,%xmm3,%xmm3 + movl %ebp,%edi + shldl $5,%ebp,%ebp + vpaddd %xmm11,%xmm2,%xmm6 + addl %esi,%edx + xorl %ebx,%edi + shrdl $7,%eax,%eax + addl %ebp,%edx + vmovdqa %xmm6,32(%rsp) + addl 52(%rsp),%ecx + xorl %eax,%edi + movl %edx,%esi + shldl $5,%edx,%edx + addl %edi,%ecx + xorl %eax,%esi + shrdl $7,%ebp,%ebp + addl %edx,%ecx + addl 56(%rsp),%ebx + xorl %ebp,%esi + movl %ecx,%edi + shldl $5,%ecx,%ecx + addl %esi,%ebx + xorl %ebp,%edi + shrdl $7,%edx,%edx + addl %ecx,%ebx + addl 60(%rsp),%eax + xorl %edx,%edi + movl %ebx,%esi + shldl $5,%ebx,%ebx + addl %edi,%eax + shrdl $7,%ecx,%ecx + addl %ebx,%eax + addl 0(%r8),%eax + addl 4(%r8),%esi + addl 8(%r8),%ecx + addl 12(%r8),%edx + movl %eax,0(%r8) + addl 16(%r8),%ebp + movl %esi,4(%r8) + movl %esi,%ebx + movl %ecx,8(%r8) + movl %ecx,%edi + movl %edx,12(%r8) + xorl %edx,%edi + movl %ebp,16(%r8) + andl %edi,%esi + jmp L$oop_avx + +.p2align 4 +L$done_avx: + addl 16(%rsp),%ebx + xorl %ebp,%esi + movl %ecx,%edi + shldl $5,%ecx,%ecx + addl %esi,%ebx + xorl %ebp,%edi + shrdl $7,%edx,%edx + addl %ecx,%ebx + addl 20(%rsp),%eax + xorl %edx,%edi + movl %ebx,%esi + shldl $5,%ebx,%ebx + addl %edi,%eax + xorl %edx,%esi + shrdl $7,%ecx,%ecx + addl %ebx,%eax + addl 24(%rsp),%ebp + xorl %ecx,%esi + movl %eax,%edi + shldl $5,%eax,%eax + addl %esi,%ebp + xorl %ecx,%edi + shrdl $7,%ebx,%ebx + addl %eax,%ebp + addl 28(%rsp),%edx + xorl %ebx,%edi + movl %ebp,%esi + shldl $5,%ebp,%ebp + addl %edi,%edx + xorl %ebx,%esi + shrdl $7,%eax,%eax + addl %ebp,%edx + addl 32(%rsp),%ecx + xorl %eax,%esi + movl %edx,%edi + shldl $5,%edx,%edx + addl %esi,%ecx + xorl %eax,%edi + shrdl $7,%ebp,%ebp + addl %edx,%ecx + addl 36(%rsp),%ebx + xorl %ebp,%edi + movl %ecx,%esi + shldl $5,%ecx,%ecx + addl %edi,%ebx + xorl %ebp,%esi + shrdl $7,%edx,%edx + addl %ecx,%ebx + addl 40(%rsp),%eax + xorl %edx,%esi + movl %ebx,%edi + shldl $5,%ebx,%ebx + addl %esi,%eax + xorl %edx,%edi + shrdl $7,%ecx,%ecx + addl %ebx,%eax + addl 44(%rsp),%ebp + xorl %ecx,%edi + movl %eax,%esi + shldl $5,%eax,%eax + addl %edi,%ebp + xorl %ecx,%esi + shrdl $7,%ebx,%ebx + addl %eax,%ebp + addl 48(%rsp),%edx + xorl %ebx,%esi + movl %ebp,%edi + shldl $5,%ebp,%ebp + addl %esi,%edx + xorl %ebx,%edi + shrdl $7,%eax,%eax + addl %ebp,%edx + addl 52(%rsp),%ecx + xorl %eax,%edi + movl %edx,%esi + shldl $5,%edx,%edx + addl %edi,%ecx + xorl %eax,%esi + shrdl $7,%ebp,%ebp + addl %edx,%ecx + addl 56(%rsp),%ebx + xorl %ebp,%esi + movl %ecx,%edi + shldl $5,%ecx,%ecx + addl %esi,%ebx + xorl %ebp,%edi + shrdl $7,%edx,%edx + addl %ecx,%ebx + addl 60(%rsp),%eax + xorl %edx,%edi + movl %ebx,%esi + shldl $5,%ebx,%ebx + addl %edi,%eax + shrdl $7,%ecx,%ecx + addl %ebx,%eax + vzeroupper + + addl 0(%r8),%eax + addl 4(%r8),%esi + addl 8(%r8),%ecx + movl %eax,0(%r8) + addl 12(%r8),%edx + movl %esi,4(%r8) + addl 16(%r8),%ebp + movl %ecx,8(%r8) + movl %edx,12(%r8) + movl %ebp,16(%r8) + leaq (%r14),%rsi + movq -40(%rsi),%r14 + movq -32(%rsi),%r13 + movq -24(%rsi),%r12 + movq -16(%rsi),%rbp + movq -8(%rsi),%rbx + leaq (%rsi),%rsp +L$epilogue_avx: + .byte 0xf3,0xc3 + + +.p2align 4 +sha1_block_data_order_avx2: +_avx2_shortcut: + movq %rsp,%rax + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + vzeroupper + movq %rax,%r14 + movq %rdi,%r8 + movq %rsi,%r9 + movq %rdx,%r10 + + leaq -640(%rsp),%rsp + shlq $6,%r10 + leaq 64(%r9),%r13 + andq $-128,%rsp + addq %r9,%r10 + leaq K_XX_XX+64(%rip),%r11 + + movl 0(%r8),%eax + cmpq %r10,%r13 + cmovaeq %r9,%r13 + movl 4(%r8),%ebp + movl 8(%r8),%ecx + movl 12(%r8),%edx + movl 16(%r8),%esi + vmovdqu 64(%r11),%ymm6 + + vmovdqu (%r9),%xmm0 + vmovdqu 16(%r9),%xmm1 + vmovdqu 32(%r9),%xmm2 + vmovdqu 48(%r9),%xmm3 + leaq 64(%r9),%r9 + vinserti128 $1,(%r13),%ymm0,%ymm0 + vinserti128 $1,16(%r13),%ymm1,%ymm1 + vpshufb %ymm6,%ymm0,%ymm0 + vinserti128 $1,32(%r13),%ymm2,%ymm2 + vpshufb %ymm6,%ymm1,%ymm1 + vinserti128 $1,48(%r13),%ymm3,%ymm3 + vpshufb %ymm6,%ymm2,%ymm2 + vmovdqu -64(%r11),%ymm11 + vpshufb %ymm6,%ymm3,%ymm3 + + vpaddd %ymm11,%ymm0,%ymm4 + vpaddd %ymm11,%ymm1,%ymm5 + vmovdqu %ymm4,0(%rsp) + vpaddd %ymm11,%ymm2,%ymm6 + vmovdqu %ymm5,32(%rsp) + vpaddd %ymm11,%ymm3,%ymm7 + vmovdqu %ymm6,64(%rsp) + vmovdqu %ymm7,96(%rsp) + vpalignr $8,%ymm0,%ymm1,%ymm4 + vpsrldq $4,%ymm3,%ymm8 + vpxor %ymm0,%ymm4,%ymm4 + vpxor %ymm2,%ymm8,%ymm8 + vpxor %ymm8,%ymm4,%ymm4 + vpsrld $31,%ymm4,%ymm8 + vpslldq $12,%ymm4,%ymm10 + vpaddd %ymm4,%ymm4,%ymm4 + vpsrld $30,%ymm10,%ymm9 + vpor %ymm8,%ymm4,%ymm4 + vpslld $2,%ymm10,%ymm10 + vpxor %ymm9,%ymm4,%ymm4 + vpxor %ymm10,%ymm4,%ymm4 + vpaddd %ymm11,%ymm4,%ymm9 + vmovdqu %ymm9,128(%rsp) + vpalignr $8,%ymm1,%ymm2,%ymm5 + vpsrldq $4,%ymm4,%ymm8 + vpxor %ymm1,%ymm5,%ymm5 + vpxor %ymm3,%ymm8,%ymm8 + vpxor %ymm8,%ymm5,%ymm5 + vpsrld $31,%ymm5,%ymm8 + vmovdqu -32(%r11),%ymm11 + vpslldq $12,%ymm5,%ymm10 + vpaddd %ymm5,%ymm5,%ymm5 + vpsrld $30,%ymm10,%ymm9 + vpor %ymm8,%ymm5,%ymm5 + vpslld $2,%ymm10,%ymm10 + vpxor %ymm9,%ymm5,%ymm5 + vpxor %ymm10,%ymm5,%ymm5 + vpaddd %ymm11,%ymm5,%ymm9 + vmovdqu %ymm9,160(%rsp) + vpalignr $8,%ymm2,%ymm3,%ymm6 + vpsrldq $4,%ymm5,%ymm8 + vpxor %ymm2,%ymm6,%ymm6 + vpxor %ymm4,%ymm8,%ymm8 + vpxor %ymm8,%ymm6,%ymm6 + vpsrld $31,%ymm6,%ymm8 + vpslldq $12,%ymm6,%ymm10 + vpaddd %ymm6,%ymm6,%ymm6 + vpsrld $30,%ymm10,%ymm9 + vpor %ymm8,%ymm6,%ymm6 + vpslld $2,%ymm10,%ymm10 + vpxor %ymm9,%ymm6,%ymm6 + vpxor %ymm10,%ymm6,%ymm6 + vpaddd %ymm11,%ymm6,%ymm9 + vmovdqu %ymm9,192(%rsp) + vpalignr $8,%ymm3,%ymm4,%ymm7 + vpsrldq $4,%ymm6,%ymm8 + vpxor %ymm3,%ymm7,%ymm7 + vpxor %ymm5,%ymm8,%ymm8 + vpxor %ymm8,%ymm7,%ymm7 + vpsrld $31,%ymm7,%ymm8 + vpslldq $12,%ymm7,%ymm10 + vpaddd %ymm7,%ymm7,%ymm7 + vpsrld $30,%ymm10,%ymm9 + vpor %ymm8,%ymm7,%ymm7 + vpslld $2,%ymm10,%ymm10 + vpxor %ymm9,%ymm7,%ymm7 + vpxor %ymm10,%ymm7,%ymm7 + vpaddd %ymm11,%ymm7,%ymm9 + vmovdqu %ymm9,224(%rsp) + leaq 128(%rsp),%r13 + jmp L$oop_avx2 +.p2align 5 +L$oop_avx2: + rorxl $2,%ebp,%ebx + andnl %edx,%ebp,%edi + andl %ecx,%ebp + xorl %edi,%ebp + jmp L$align32_1 +.p2align 5 +L$align32_1: + vpalignr $8,%ymm6,%ymm7,%ymm8 + vpxor %ymm4,%ymm0,%ymm0 + addl -128(%r13),%esi + andnl %ecx,%eax,%edi + vpxor %ymm1,%ymm0,%ymm0 + addl %ebp,%esi + rorxl $27,%eax,%r12d + rorxl $2,%eax,%ebp + vpxor %ymm8,%ymm0,%ymm0 + andl %ebx,%eax + addl %r12d,%esi + xorl %edi,%eax + vpsrld $30,%ymm0,%ymm8 + vpslld $2,%ymm0,%ymm0 + addl -124(%r13),%edx + andnl %ebx,%esi,%edi + addl %eax,%edx + rorxl $27,%esi,%r12d + rorxl $2,%esi,%eax + andl %ebp,%esi + vpor %ymm8,%ymm0,%ymm0 + addl %r12d,%edx + xorl %edi,%esi + addl -120(%r13),%ecx + andnl %ebp,%edx,%edi + vpaddd %ymm11,%ymm0,%ymm9 + addl %esi,%ecx + rorxl $27,%edx,%r12d + rorxl $2,%edx,%esi + andl %eax,%edx + vmovdqu %ymm9,256(%rsp) + addl %r12d,%ecx + xorl %edi,%edx + addl -116(%r13),%ebx + andnl %eax,%ecx,%edi + addl %edx,%ebx + rorxl $27,%ecx,%r12d + rorxl $2,%ecx,%edx + andl %esi,%ecx + addl %r12d,%ebx + xorl %edi,%ecx + addl -96(%r13),%ebp + andnl %esi,%ebx,%edi + addl %ecx,%ebp + rorxl $27,%ebx,%r12d + rorxl $2,%ebx,%ecx + andl %edx,%ebx + addl %r12d,%ebp + xorl %edi,%ebx + vpalignr $8,%ymm7,%ymm0,%ymm8 + vpxor %ymm5,%ymm1,%ymm1 + addl -92(%r13),%eax + andnl %edx,%ebp,%edi + vpxor %ymm2,%ymm1,%ymm1 + addl %ebx,%eax + rorxl $27,%ebp,%r12d + rorxl $2,%ebp,%ebx + vpxor %ymm8,%ymm1,%ymm1 + andl %ecx,%ebp + addl %r12d,%eax + xorl %edi,%ebp + vpsrld $30,%ymm1,%ymm8 + vpslld $2,%ymm1,%ymm1 + addl -88(%r13),%esi + andnl %ecx,%eax,%edi + addl %ebp,%esi + rorxl $27,%eax,%r12d + rorxl $2,%eax,%ebp + andl %ebx,%eax + vpor %ymm8,%ymm1,%ymm1 + addl %r12d,%esi + xorl %edi,%eax + addl -84(%r13),%edx + andnl %ebx,%esi,%edi + vpaddd %ymm11,%ymm1,%ymm9 + addl %eax,%edx + rorxl $27,%esi,%r12d + rorxl $2,%esi,%eax + andl %ebp,%esi + vmovdqu %ymm9,288(%rsp) + addl %r12d,%edx + xorl %edi,%esi + addl -64(%r13),%ecx + andnl %ebp,%edx,%edi + addl %esi,%ecx + rorxl $27,%edx,%r12d + rorxl $2,%edx,%esi + andl %eax,%edx + addl %r12d,%ecx + xorl %edi,%edx + addl -60(%r13),%ebx + andnl %eax,%ecx,%edi + addl %edx,%ebx + rorxl $27,%ecx,%r12d + rorxl $2,%ecx,%edx + andl %esi,%ecx + addl %r12d,%ebx + xorl %edi,%ecx + vpalignr $8,%ymm0,%ymm1,%ymm8 + vpxor %ymm6,%ymm2,%ymm2 + addl -56(%r13),%ebp + andnl %esi,%ebx,%edi + vpxor %ymm3,%ymm2,%ymm2 + vmovdqu 0(%r11),%ymm11 + addl %ecx,%ebp + rorxl $27,%ebx,%r12d + rorxl $2,%ebx,%ecx + vpxor %ymm8,%ymm2,%ymm2 + andl %edx,%ebx + addl %r12d,%ebp + xorl %edi,%ebx + vpsrld $30,%ymm2,%ymm8 + vpslld $2,%ymm2,%ymm2 + addl -52(%r13),%eax + andnl %edx,%ebp,%edi + addl %ebx,%eax + rorxl $27,%ebp,%r12d + rorxl $2,%ebp,%ebx + andl %ecx,%ebp + vpor %ymm8,%ymm2,%ymm2 + addl %r12d,%eax + xorl %edi,%ebp + addl -32(%r13),%esi + andnl %ecx,%eax,%edi + vpaddd %ymm11,%ymm2,%ymm9 + addl %ebp,%esi + rorxl $27,%eax,%r12d + rorxl $2,%eax,%ebp + andl %ebx,%eax + vmovdqu %ymm9,320(%rsp) + addl %r12d,%esi + xorl %edi,%eax + addl -28(%r13),%edx + andnl %ebx,%esi,%edi + addl %eax,%edx + rorxl $27,%esi,%r12d + rorxl $2,%esi,%eax + andl %ebp,%esi + addl %r12d,%edx + xorl %edi,%esi + addl -24(%r13),%ecx + andnl %ebp,%edx,%edi + addl %esi,%ecx + rorxl $27,%edx,%r12d + rorxl $2,%edx,%esi + andl %eax,%edx + addl %r12d,%ecx + xorl %edi,%edx + vpalignr $8,%ymm1,%ymm2,%ymm8 + vpxor %ymm7,%ymm3,%ymm3 + addl -20(%r13),%ebx + andnl %eax,%ecx,%edi + vpxor %ymm4,%ymm3,%ymm3 + addl %edx,%ebx + rorxl $27,%ecx,%r12d + rorxl $2,%ecx,%edx + vpxor %ymm8,%ymm3,%ymm3 + andl %esi,%ecx + addl %r12d,%ebx + xorl %edi,%ecx + vpsrld $30,%ymm3,%ymm8 + vpslld $2,%ymm3,%ymm3 + addl 0(%r13),%ebp + andnl %esi,%ebx,%edi + addl %ecx,%ebp + rorxl $27,%ebx,%r12d + rorxl $2,%ebx,%ecx + andl %edx,%ebx + vpor %ymm8,%ymm3,%ymm3 + addl %r12d,%ebp + xorl %edi,%ebx + addl 4(%r13),%eax + andnl %edx,%ebp,%edi + vpaddd %ymm11,%ymm3,%ymm9 + addl %ebx,%eax + rorxl $27,%ebp,%r12d + rorxl $2,%ebp,%ebx + andl %ecx,%ebp + vmovdqu %ymm9,352(%rsp) + addl %r12d,%eax + xorl %edi,%ebp + addl 8(%r13),%esi + andnl %ecx,%eax,%edi + addl %ebp,%esi + rorxl $27,%eax,%r12d + rorxl $2,%eax,%ebp + andl %ebx,%eax + addl %r12d,%esi + xorl %edi,%eax + addl 12(%r13),%edx + leal (%rdx,%rax,1),%edx + rorxl $27,%esi,%r12d + rorxl $2,%esi,%eax + xorl %ebp,%esi + addl %r12d,%edx + xorl %ebx,%esi + vpalignr $8,%ymm2,%ymm3,%ymm8 + vpxor %ymm0,%ymm4,%ymm4 + addl 32(%r13),%ecx + leal (%rcx,%rsi,1),%ecx + vpxor %ymm5,%ymm4,%ymm4 + rorxl $27,%edx,%r12d + rorxl $2,%edx,%esi + xorl %eax,%edx + vpxor %ymm8,%ymm4,%ymm4 + addl %r12d,%ecx + xorl %ebp,%edx + addl 36(%r13),%ebx + vpsrld $30,%ymm4,%ymm8 + vpslld $2,%ymm4,%ymm4 + leal (%rbx,%rdx,1),%ebx + rorxl $27,%ecx,%r12d + rorxl $2,%ecx,%edx + xorl %esi,%ecx + addl %r12d,%ebx + xorl %eax,%ecx + vpor %ymm8,%ymm4,%ymm4 + addl 40(%r13),%ebp + leal (%rcx,%rbp,1),%ebp + rorxl $27,%ebx,%r12d + rorxl $2,%ebx,%ecx + vpaddd %ymm11,%ymm4,%ymm9 + xorl %edx,%ebx + addl %r12d,%ebp + xorl %esi,%ebx + addl 44(%r13),%eax + vmovdqu %ymm9,384(%rsp) + leal (%rax,%rbx,1),%eax + rorxl $27,%ebp,%r12d + rorxl $2,%ebp,%ebx + xorl %ecx,%ebp + addl %r12d,%eax + xorl %edx,%ebp + addl 64(%r13),%esi + leal (%rsi,%rbp,1),%esi + rorxl $27,%eax,%r12d + rorxl $2,%eax,%ebp + xorl %ebx,%eax + addl %r12d,%esi + xorl %ecx,%eax + vpalignr $8,%ymm3,%ymm4,%ymm8 + vpxor %ymm1,%ymm5,%ymm5 + addl 68(%r13),%edx + leal (%rdx,%rax,1),%edx + vpxor %ymm6,%ymm5,%ymm5 + rorxl $27,%esi,%r12d + rorxl $2,%esi,%eax + xorl %ebp,%esi + vpxor %ymm8,%ymm5,%ymm5 + addl %r12d,%edx + xorl %ebx,%esi + addl 72(%r13),%ecx + vpsrld $30,%ymm5,%ymm8 + vpslld $2,%ymm5,%ymm5 + leal (%rcx,%rsi,1),%ecx + rorxl $27,%edx,%r12d + rorxl $2,%edx,%esi + xorl %eax,%edx + addl %r12d,%ecx + xorl %ebp,%edx + vpor %ymm8,%ymm5,%ymm5 + addl 76(%r13),%ebx + leal (%rbx,%rdx,1),%ebx + rorxl $27,%ecx,%r12d + rorxl $2,%ecx,%edx + vpaddd %ymm11,%ymm5,%ymm9 + xorl %esi,%ecx + addl %r12d,%ebx + xorl %eax,%ecx + addl 96(%r13),%ebp + vmovdqu %ymm9,416(%rsp) + leal (%rcx,%rbp,1),%ebp + rorxl $27,%ebx,%r12d + rorxl $2,%ebx,%ecx + xorl %edx,%ebx + addl %r12d,%ebp + xorl %esi,%ebx + addl 100(%r13),%eax + leal (%rax,%rbx,1),%eax + rorxl $27,%ebp,%r12d + rorxl $2,%ebp,%ebx + xorl %ecx,%ebp + addl %r12d,%eax + xorl %edx,%ebp + vpalignr $8,%ymm4,%ymm5,%ymm8 + vpxor %ymm2,%ymm6,%ymm6 + addl 104(%r13),%esi + leal (%rsi,%rbp,1),%esi + vpxor %ymm7,%ymm6,%ymm6 + rorxl $27,%eax,%r12d + rorxl $2,%eax,%ebp + xorl %ebx,%eax + vpxor %ymm8,%ymm6,%ymm6 + addl %r12d,%esi + xorl %ecx,%eax + addl 108(%r13),%edx + leaq 256(%r13),%r13 + vpsrld $30,%ymm6,%ymm8 + vpslld $2,%ymm6,%ymm6 + leal (%rdx,%rax,1),%edx + rorxl $27,%esi,%r12d + rorxl $2,%esi,%eax + xorl %ebp,%esi + addl %r12d,%edx + xorl %ebx,%esi + vpor %ymm8,%ymm6,%ymm6 + addl -128(%r13),%ecx + leal (%rcx,%rsi,1),%ecx + rorxl $27,%edx,%r12d + rorxl $2,%edx,%esi + vpaddd %ymm11,%ymm6,%ymm9 + xorl %eax,%edx + addl %r12d,%ecx + xorl %ebp,%edx + addl -124(%r13),%ebx + vmovdqu %ymm9,448(%rsp) + leal (%rbx,%rdx,1),%ebx + rorxl $27,%ecx,%r12d + rorxl $2,%ecx,%edx + xorl %esi,%ecx + addl %r12d,%ebx + xorl %eax,%ecx + addl -120(%r13),%ebp + leal (%rcx,%rbp,1),%ebp + rorxl $27,%ebx,%r12d + rorxl $2,%ebx,%ecx + xorl %edx,%ebx + addl %r12d,%ebp + xorl %esi,%ebx + vpalignr $8,%ymm5,%ymm6,%ymm8 + vpxor %ymm3,%ymm7,%ymm7 + addl -116(%r13),%eax + leal (%rax,%rbx,1),%eax + vpxor %ymm0,%ymm7,%ymm7 + vmovdqu 32(%r11),%ymm11 + rorxl $27,%ebp,%r12d + rorxl $2,%ebp,%ebx + xorl %ecx,%ebp + vpxor %ymm8,%ymm7,%ymm7 + addl %r12d,%eax + xorl %edx,%ebp + addl -96(%r13),%esi + vpsrld $30,%ymm7,%ymm8 + vpslld $2,%ymm7,%ymm7 + leal (%rsi,%rbp,1),%esi + rorxl $27,%eax,%r12d + rorxl $2,%eax,%ebp + xorl %ebx,%eax + addl %r12d,%esi + xorl %ecx,%eax + vpor %ymm8,%ymm7,%ymm7 + addl -92(%r13),%edx + leal (%rdx,%rax,1),%edx + rorxl $27,%esi,%r12d + rorxl $2,%esi,%eax + vpaddd %ymm11,%ymm7,%ymm9 + xorl %ebp,%esi + addl %r12d,%edx + xorl %ebx,%esi + addl -88(%r13),%ecx + vmovdqu %ymm9,480(%rsp) + leal (%rcx,%rsi,1),%ecx + rorxl $27,%edx,%r12d + rorxl $2,%edx,%esi + xorl %eax,%edx + addl %r12d,%ecx + xorl %ebp,%edx + addl -84(%r13),%ebx + movl %esi,%edi + xorl %eax,%edi + leal (%rbx,%rdx,1),%ebx + rorxl $27,%ecx,%r12d + rorxl $2,%ecx,%edx + xorl %esi,%ecx + addl %r12d,%ebx + andl %edi,%ecx + jmp L$align32_2 +.p2align 5 +L$align32_2: + vpalignr $8,%ymm6,%ymm7,%ymm8 + vpxor %ymm4,%ymm0,%ymm0 + addl -64(%r13),%ebp + xorl %esi,%ecx + vpxor %ymm1,%ymm0,%ymm0 + movl %edx,%edi + xorl %esi,%edi + leal (%rcx,%rbp,1),%ebp + vpxor %ymm8,%ymm0,%ymm0 + rorxl $27,%ebx,%r12d + rorxl $2,%ebx,%ecx + xorl %edx,%ebx + vpsrld $30,%ymm0,%ymm8 + vpslld $2,%ymm0,%ymm0 + addl %r12d,%ebp + andl %edi,%ebx + addl -60(%r13),%eax + xorl %edx,%ebx + movl %ecx,%edi + xorl %edx,%edi + vpor %ymm8,%ymm0,%ymm0 + leal (%rax,%rbx,1),%eax + rorxl $27,%ebp,%r12d + rorxl $2,%ebp,%ebx + xorl %ecx,%ebp + vpaddd %ymm11,%ymm0,%ymm9 + addl %r12d,%eax + andl %edi,%ebp + addl -56(%r13),%esi + xorl %ecx,%ebp + vmovdqu %ymm9,512(%rsp) + movl %ebx,%edi + xorl %ecx,%edi + leal (%rsi,%rbp,1),%esi + rorxl $27,%eax,%r12d + rorxl $2,%eax,%ebp + xorl %ebx,%eax + addl %r12d,%esi + andl %edi,%eax + addl -52(%r13),%edx + xorl %ebx,%eax + movl %ebp,%edi + xorl %ebx,%edi + leal (%rdx,%rax,1),%edx + rorxl $27,%esi,%r12d + rorxl $2,%esi,%eax + xorl %ebp,%esi + addl %r12d,%edx + andl %edi,%esi + addl -32(%r13),%ecx + xorl %ebp,%esi + movl %eax,%edi + xorl %ebp,%edi + leal (%rcx,%rsi,1),%ecx + rorxl $27,%edx,%r12d + rorxl $2,%edx,%esi + xorl %eax,%edx + addl %r12d,%ecx + andl %edi,%edx + vpalignr $8,%ymm7,%ymm0,%ymm8 + vpxor %ymm5,%ymm1,%ymm1 + addl -28(%r13),%ebx + xorl %eax,%edx + vpxor %ymm2,%ymm1,%ymm1 + movl %esi,%edi + xorl %eax,%edi + leal (%rbx,%rdx,1),%ebx + vpxor %ymm8,%ymm1,%ymm1 + rorxl $27,%ecx,%r12d + rorxl $2,%ecx,%edx + xorl %esi,%ecx + vpsrld $30,%ymm1,%ymm8 + vpslld $2,%ymm1,%ymm1 + addl %r12d,%ebx + andl %edi,%ecx + addl -24(%r13),%ebp + xorl %esi,%ecx + movl %edx,%edi + xorl %esi,%edi + vpor %ymm8,%ymm1,%ymm1 + leal (%rcx,%rbp,1),%ebp + rorxl $27,%ebx,%r12d + rorxl $2,%ebx,%ecx + xorl %edx,%ebx + vpaddd %ymm11,%ymm1,%ymm9 + addl %r12d,%ebp + andl %edi,%ebx + addl -20(%r13),%eax + xorl %edx,%ebx + vmovdqu %ymm9,544(%rsp) + movl %ecx,%edi + xorl %edx,%edi + leal (%rax,%rbx,1),%eax + rorxl $27,%ebp,%r12d + rorxl $2,%ebp,%ebx + xorl %ecx,%ebp + addl %r12d,%eax + andl %edi,%ebp + addl 0(%r13),%esi + xorl %ecx,%ebp + movl %ebx,%edi + xorl %ecx,%edi + leal (%rsi,%rbp,1),%esi + rorxl $27,%eax,%r12d + rorxl $2,%eax,%ebp + xorl %ebx,%eax + addl %r12d,%esi + andl %edi,%eax + addl 4(%r13),%edx + xorl %ebx,%eax + movl %ebp,%edi + xorl %ebx,%edi + leal (%rdx,%rax,1),%edx + rorxl $27,%esi,%r12d + rorxl $2,%esi,%eax + xorl %ebp,%esi + addl %r12d,%edx + andl %edi,%esi + vpalignr $8,%ymm0,%ymm1,%ymm8 + vpxor %ymm6,%ymm2,%ymm2 + addl 8(%r13),%ecx + xorl %ebp,%esi + vpxor %ymm3,%ymm2,%ymm2 + movl %eax,%edi + xorl %ebp,%edi + leal (%rcx,%rsi,1),%ecx + vpxor %ymm8,%ymm2,%ymm2 + rorxl $27,%edx,%r12d + rorxl $2,%edx,%esi + xorl %eax,%edx + vpsrld $30,%ymm2,%ymm8 + vpslld $2,%ymm2,%ymm2 + addl %r12d,%ecx + andl %edi,%edx + addl 12(%r13),%ebx + xorl %eax,%edx + movl %esi,%edi + xorl %eax,%edi + vpor %ymm8,%ymm2,%ymm2 + leal (%rbx,%rdx,1),%ebx + rorxl $27,%ecx,%r12d + rorxl $2,%ecx,%edx + xorl %esi,%ecx + vpaddd %ymm11,%ymm2,%ymm9 + addl %r12d,%ebx + andl %edi,%ecx + addl 32(%r13),%ebp + xorl %esi,%ecx + vmovdqu %ymm9,576(%rsp) + movl %edx,%edi + xorl %esi,%edi + leal (%rcx,%rbp,1),%ebp + rorxl $27,%ebx,%r12d + rorxl $2,%ebx,%ecx + xorl %edx,%ebx + addl %r12d,%ebp + andl %edi,%ebx + addl 36(%r13),%eax + xorl %edx,%ebx + movl %ecx,%edi + xorl %edx,%edi + leal (%rax,%rbx,1),%eax + rorxl $27,%ebp,%r12d + rorxl $2,%ebp,%ebx + xorl %ecx,%ebp + addl %r12d,%eax + andl %edi,%ebp + addl 40(%r13),%esi + xorl %ecx,%ebp + movl %ebx,%edi + xorl %ecx,%edi + leal (%rsi,%rbp,1),%esi + rorxl $27,%eax,%r12d + rorxl $2,%eax,%ebp + xorl %ebx,%eax + addl %r12d,%esi + andl %edi,%eax + vpalignr $8,%ymm1,%ymm2,%ymm8 + vpxor %ymm7,%ymm3,%ymm3 + addl 44(%r13),%edx + xorl %ebx,%eax + vpxor %ymm4,%ymm3,%ymm3 + movl %ebp,%edi + xorl %ebx,%edi + leal (%rdx,%rax,1),%edx + vpxor %ymm8,%ymm3,%ymm3 + rorxl $27,%esi,%r12d + rorxl $2,%esi,%eax + xorl %ebp,%esi + vpsrld $30,%ymm3,%ymm8 + vpslld $2,%ymm3,%ymm3 + addl %r12d,%edx + andl %edi,%esi + addl 64(%r13),%ecx + xorl %ebp,%esi + movl %eax,%edi + xorl %ebp,%edi + vpor %ymm8,%ymm3,%ymm3 + leal (%rcx,%rsi,1),%ecx + rorxl $27,%edx,%r12d + rorxl $2,%edx,%esi + xorl %eax,%edx + vpaddd %ymm11,%ymm3,%ymm9 + addl %r12d,%ecx + andl %edi,%edx + addl 68(%r13),%ebx + xorl %eax,%edx + vmovdqu %ymm9,608(%rsp) + movl %esi,%edi + xorl %eax,%edi + leal (%rbx,%rdx,1),%ebx + rorxl $27,%ecx,%r12d + rorxl $2,%ecx,%edx + xorl %esi,%ecx + addl %r12d,%ebx + andl %edi,%ecx + addl 72(%r13),%ebp + xorl %esi,%ecx + movl %edx,%edi + xorl %esi,%edi + leal (%rcx,%rbp,1),%ebp + rorxl $27,%ebx,%r12d + rorxl $2,%ebx,%ecx + xorl %edx,%ebx + addl %r12d,%ebp + andl %edi,%ebx + addl 76(%r13),%eax + xorl %edx,%ebx + leal (%rax,%rbx,1),%eax + rorxl $27,%ebp,%r12d + rorxl $2,%ebp,%ebx + xorl %ecx,%ebp + addl %r12d,%eax + xorl %edx,%ebp + addl 96(%r13),%esi + leal (%rsi,%rbp,1),%esi + rorxl $27,%eax,%r12d + rorxl $2,%eax,%ebp + xorl %ebx,%eax + addl %r12d,%esi + xorl %ecx,%eax + addl 100(%r13),%edx + leal (%rdx,%rax,1),%edx + rorxl $27,%esi,%r12d + rorxl $2,%esi,%eax + xorl %ebp,%esi + addl %r12d,%edx + xorl %ebx,%esi + addl 104(%r13),%ecx + leal (%rcx,%rsi,1),%ecx + rorxl $27,%edx,%r12d + rorxl $2,%edx,%esi + xorl %eax,%edx + addl %r12d,%ecx + xorl %ebp,%edx + addl 108(%r13),%ebx + leaq 256(%r13),%r13 + leal (%rbx,%rdx,1),%ebx + rorxl $27,%ecx,%r12d + rorxl $2,%ecx,%edx + xorl %esi,%ecx + addl %r12d,%ebx + xorl %eax,%ecx + addl -128(%r13),%ebp + leal (%rcx,%rbp,1),%ebp + rorxl $27,%ebx,%r12d + rorxl $2,%ebx,%ecx + xorl %edx,%ebx + addl %r12d,%ebp + xorl %esi,%ebx + addl -124(%r13),%eax + leal (%rax,%rbx,1),%eax + rorxl $27,%ebp,%r12d + rorxl $2,%ebp,%ebx + xorl %ecx,%ebp + addl %r12d,%eax + xorl %edx,%ebp + addl -120(%r13),%esi + leal (%rsi,%rbp,1),%esi + rorxl $27,%eax,%r12d + rorxl $2,%eax,%ebp + xorl %ebx,%eax + addl %r12d,%esi + xorl %ecx,%eax + addl -116(%r13),%edx + leal (%rdx,%rax,1),%edx + rorxl $27,%esi,%r12d + rorxl $2,%esi,%eax + xorl %ebp,%esi + addl %r12d,%edx + xorl %ebx,%esi + addl -96(%r13),%ecx + leal (%rcx,%rsi,1),%ecx + rorxl $27,%edx,%r12d + rorxl $2,%edx,%esi + xorl %eax,%edx + addl %r12d,%ecx + xorl %ebp,%edx + addl -92(%r13),%ebx + leal (%rbx,%rdx,1),%ebx + rorxl $27,%ecx,%r12d + rorxl $2,%ecx,%edx + xorl %esi,%ecx + addl %r12d,%ebx + xorl %eax,%ecx + addl -88(%r13),%ebp + leal (%rcx,%rbp,1),%ebp + rorxl $27,%ebx,%r12d + rorxl $2,%ebx,%ecx + xorl %edx,%ebx + addl %r12d,%ebp + xorl %esi,%ebx + addl -84(%r13),%eax + leal (%rax,%rbx,1),%eax + rorxl $27,%ebp,%r12d + rorxl $2,%ebp,%ebx + xorl %ecx,%ebp + addl %r12d,%eax + xorl %edx,%ebp + addl -64(%r13),%esi + leal (%rsi,%rbp,1),%esi + rorxl $27,%eax,%r12d + rorxl $2,%eax,%ebp + xorl %ebx,%eax + addl %r12d,%esi + xorl %ecx,%eax + addl -60(%r13),%edx + leal (%rdx,%rax,1),%edx + rorxl $27,%esi,%r12d + rorxl $2,%esi,%eax + xorl %ebp,%esi + addl %r12d,%edx + xorl %ebx,%esi + addl -56(%r13),%ecx + leal (%rcx,%rsi,1),%ecx + rorxl $27,%edx,%r12d + rorxl $2,%edx,%esi + xorl %eax,%edx + addl %r12d,%ecx + xorl %ebp,%edx + addl -52(%r13),%ebx + leal (%rbx,%rdx,1),%ebx + rorxl $27,%ecx,%r12d + rorxl $2,%ecx,%edx + xorl %esi,%ecx + addl %r12d,%ebx + xorl %eax,%ecx + addl -32(%r13),%ebp + leal (%rcx,%rbp,1),%ebp + rorxl $27,%ebx,%r12d + rorxl $2,%ebx,%ecx + xorl %edx,%ebx + addl %r12d,%ebp + xorl %esi,%ebx + addl -28(%r13),%eax + leal (%rax,%rbx,1),%eax + rorxl $27,%ebp,%r12d + rorxl $2,%ebp,%ebx + xorl %ecx,%ebp + addl %r12d,%eax + xorl %edx,%ebp + addl -24(%r13),%esi + leal (%rsi,%rbp,1),%esi + rorxl $27,%eax,%r12d + rorxl $2,%eax,%ebp + xorl %ebx,%eax + addl %r12d,%esi + xorl %ecx,%eax + addl -20(%r13),%edx + leal (%rdx,%rax,1),%edx + rorxl $27,%esi,%r12d + addl %r12d,%edx + leaq 128(%r9),%r13 + leaq 128(%r9),%rdi + cmpq %r10,%r13 + cmovaeq %r9,%r13 + + + addl 0(%r8),%edx + addl 4(%r8),%esi + addl 8(%r8),%ebp + movl %edx,0(%r8) + addl 12(%r8),%ebx + movl %esi,4(%r8) + movl %edx,%eax + addl 16(%r8),%ecx + movl %ebp,%r12d + movl %ebp,8(%r8) + movl %ebx,%edx + + movl %ebx,12(%r8) + movl %esi,%ebp + movl %ecx,16(%r8) + + movl %ecx,%esi + movl %r12d,%ecx + + + cmpq %r10,%r9 + je L$done_avx2 + vmovdqu 64(%r11),%ymm6 + cmpq %r10,%rdi + ja L$ast_avx2 + + vmovdqu -64(%rdi),%xmm0 + vmovdqu -48(%rdi),%xmm1 + vmovdqu -32(%rdi),%xmm2 + vmovdqu -16(%rdi),%xmm3 + vinserti128 $1,0(%r13),%ymm0,%ymm0 + vinserti128 $1,16(%r13),%ymm1,%ymm1 + vinserti128 $1,32(%r13),%ymm2,%ymm2 + vinserti128 $1,48(%r13),%ymm3,%ymm3 + jmp L$ast_avx2 + +.p2align 5 +L$ast_avx2: + leaq 128+16(%rsp),%r13 + rorxl $2,%ebp,%ebx + andnl %edx,%ebp,%edi + andl %ecx,%ebp + xorl %edi,%ebp + subq $-128,%r9 + addl -128(%r13),%esi + andnl %ecx,%eax,%edi + addl %ebp,%esi + rorxl $27,%eax,%r12d + rorxl $2,%eax,%ebp + andl %ebx,%eax + addl %r12d,%esi + xorl %edi,%eax + addl -124(%r13),%edx + andnl %ebx,%esi,%edi + addl %eax,%edx + rorxl $27,%esi,%r12d + rorxl $2,%esi,%eax + andl %ebp,%esi + addl %r12d,%edx + xorl %edi,%esi + addl -120(%r13),%ecx + andnl %ebp,%edx,%edi + addl %esi,%ecx + rorxl $27,%edx,%r12d + rorxl $2,%edx,%esi + andl %eax,%edx + addl %r12d,%ecx + xorl %edi,%edx + addl -116(%r13),%ebx + andnl %eax,%ecx,%edi + addl %edx,%ebx + rorxl $27,%ecx,%r12d + rorxl $2,%ecx,%edx + andl %esi,%ecx + addl %r12d,%ebx + xorl %edi,%ecx + addl -96(%r13),%ebp + andnl %esi,%ebx,%edi + addl %ecx,%ebp + rorxl $27,%ebx,%r12d + rorxl $2,%ebx,%ecx + andl %edx,%ebx + addl %r12d,%ebp + xorl %edi,%ebx + addl -92(%r13),%eax + andnl %edx,%ebp,%edi + addl %ebx,%eax + rorxl $27,%ebp,%r12d + rorxl $2,%ebp,%ebx + andl %ecx,%ebp + addl %r12d,%eax + xorl %edi,%ebp + addl -88(%r13),%esi + andnl %ecx,%eax,%edi + addl %ebp,%esi + rorxl $27,%eax,%r12d + rorxl $2,%eax,%ebp + andl %ebx,%eax + addl %r12d,%esi + xorl %edi,%eax + addl -84(%r13),%edx + andnl %ebx,%esi,%edi + addl %eax,%edx + rorxl $27,%esi,%r12d + rorxl $2,%esi,%eax + andl %ebp,%esi + addl %r12d,%edx + xorl %edi,%esi + addl -64(%r13),%ecx + andnl %ebp,%edx,%edi + addl %esi,%ecx + rorxl $27,%edx,%r12d + rorxl $2,%edx,%esi + andl %eax,%edx + addl %r12d,%ecx + xorl %edi,%edx + addl -60(%r13),%ebx + andnl %eax,%ecx,%edi + addl %edx,%ebx + rorxl $27,%ecx,%r12d + rorxl $2,%ecx,%edx + andl %esi,%ecx + addl %r12d,%ebx + xorl %edi,%ecx + addl -56(%r13),%ebp + andnl %esi,%ebx,%edi + addl %ecx,%ebp + rorxl $27,%ebx,%r12d + rorxl $2,%ebx,%ecx + andl %edx,%ebx + addl %r12d,%ebp + xorl %edi,%ebx + addl -52(%r13),%eax + andnl %edx,%ebp,%edi + addl %ebx,%eax + rorxl $27,%ebp,%r12d + rorxl $2,%ebp,%ebx + andl %ecx,%ebp + addl %r12d,%eax + xorl %edi,%ebp + addl -32(%r13),%esi + andnl %ecx,%eax,%edi + addl %ebp,%esi + rorxl $27,%eax,%r12d + rorxl $2,%eax,%ebp + andl %ebx,%eax + addl %r12d,%esi + xorl %edi,%eax + addl -28(%r13),%edx + andnl %ebx,%esi,%edi + addl %eax,%edx + rorxl $27,%esi,%r12d + rorxl $2,%esi,%eax + andl %ebp,%esi + addl %r12d,%edx + xorl %edi,%esi + addl -24(%r13),%ecx + andnl %ebp,%edx,%edi + addl %esi,%ecx + rorxl $27,%edx,%r12d + rorxl $2,%edx,%esi + andl %eax,%edx + addl %r12d,%ecx + xorl %edi,%edx + addl -20(%r13),%ebx + andnl %eax,%ecx,%edi + addl %edx,%ebx + rorxl $27,%ecx,%r12d + rorxl $2,%ecx,%edx + andl %esi,%ecx + addl %r12d,%ebx + xorl %edi,%ecx + addl 0(%r13),%ebp + andnl %esi,%ebx,%edi + addl %ecx,%ebp + rorxl $27,%ebx,%r12d + rorxl $2,%ebx,%ecx + andl %edx,%ebx + addl %r12d,%ebp + xorl %edi,%ebx + addl 4(%r13),%eax + andnl %edx,%ebp,%edi + addl %ebx,%eax + rorxl $27,%ebp,%r12d + rorxl $2,%ebp,%ebx + andl %ecx,%ebp + addl %r12d,%eax + xorl %edi,%ebp + addl 8(%r13),%esi + andnl %ecx,%eax,%edi + addl %ebp,%esi + rorxl $27,%eax,%r12d + rorxl $2,%eax,%ebp + andl %ebx,%eax + addl %r12d,%esi + xorl %edi,%eax + addl 12(%r13),%edx + leal (%rdx,%rax,1),%edx + rorxl $27,%esi,%r12d + rorxl $2,%esi,%eax + xorl %ebp,%esi + addl %r12d,%edx + xorl %ebx,%esi + addl 32(%r13),%ecx + leal (%rcx,%rsi,1),%ecx + rorxl $27,%edx,%r12d + rorxl $2,%edx,%esi + xorl %eax,%edx + addl %r12d,%ecx + xorl %ebp,%edx + addl 36(%r13),%ebx + leal (%rbx,%rdx,1),%ebx + rorxl $27,%ecx,%r12d + rorxl $2,%ecx,%edx + xorl %esi,%ecx + addl %r12d,%ebx + xorl %eax,%ecx + addl 40(%r13),%ebp + leal (%rcx,%rbp,1),%ebp + rorxl $27,%ebx,%r12d + rorxl $2,%ebx,%ecx + xorl %edx,%ebx + addl %r12d,%ebp + xorl %esi,%ebx + addl 44(%r13),%eax + leal (%rax,%rbx,1),%eax + rorxl $27,%ebp,%r12d + rorxl $2,%ebp,%ebx + xorl %ecx,%ebp + addl %r12d,%eax + xorl %edx,%ebp + addl 64(%r13),%esi + leal (%rsi,%rbp,1),%esi + rorxl $27,%eax,%r12d + rorxl $2,%eax,%ebp + xorl %ebx,%eax + addl %r12d,%esi + xorl %ecx,%eax + vmovdqu -64(%r11),%ymm11 + vpshufb %ymm6,%ymm0,%ymm0 + addl 68(%r13),%edx + leal (%rdx,%rax,1),%edx + rorxl $27,%esi,%r12d + rorxl $2,%esi,%eax + xorl %ebp,%esi + addl %r12d,%edx + xorl %ebx,%esi + addl 72(%r13),%ecx + leal (%rcx,%rsi,1),%ecx + rorxl $27,%edx,%r12d + rorxl $2,%edx,%esi + xorl %eax,%edx + addl %r12d,%ecx + xorl %ebp,%edx + addl 76(%r13),%ebx + leal (%rbx,%rdx,1),%ebx + rorxl $27,%ecx,%r12d + rorxl $2,%ecx,%edx + xorl %esi,%ecx + addl %r12d,%ebx + xorl %eax,%ecx + addl 96(%r13),%ebp + leal (%rcx,%rbp,1),%ebp + rorxl $27,%ebx,%r12d + rorxl $2,%ebx,%ecx + xorl %edx,%ebx + addl %r12d,%ebp + xorl %esi,%ebx + addl 100(%r13),%eax + leal (%rax,%rbx,1),%eax + rorxl $27,%ebp,%r12d + rorxl $2,%ebp,%ebx + xorl %ecx,%ebp + addl %r12d,%eax + xorl %edx,%ebp + vpshufb %ymm6,%ymm1,%ymm1 + vpaddd %ymm11,%ymm0,%ymm8 + addl 104(%r13),%esi + leal (%rsi,%rbp,1),%esi + rorxl $27,%eax,%r12d + rorxl $2,%eax,%ebp + xorl %ebx,%eax + addl %r12d,%esi + xorl %ecx,%eax + addl 108(%r13),%edx + leaq 256(%r13),%r13 + leal (%rdx,%rax,1),%edx + rorxl $27,%esi,%r12d + rorxl $2,%esi,%eax + xorl %ebp,%esi + addl %r12d,%edx + xorl %ebx,%esi + addl -128(%r13),%ecx + leal (%rcx,%rsi,1),%ecx + rorxl $27,%edx,%r12d + rorxl $2,%edx,%esi + xorl %eax,%edx + addl %r12d,%ecx + xorl %ebp,%edx + addl -124(%r13),%ebx + leal (%rbx,%rdx,1),%ebx + rorxl $27,%ecx,%r12d + rorxl $2,%ecx,%edx + xorl %esi,%ecx + addl %r12d,%ebx + xorl %eax,%ecx + addl -120(%r13),%ebp + leal (%rcx,%rbp,1),%ebp + rorxl $27,%ebx,%r12d + rorxl $2,%ebx,%ecx + xorl %edx,%ebx + addl %r12d,%ebp + xorl %esi,%ebx + vmovdqu %ymm8,0(%rsp) + vpshufb %ymm6,%ymm2,%ymm2 + vpaddd %ymm11,%ymm1,%ymm9 + addl -116(%r13),%eax + leal (%rax,%rbx,1),%eax + rorxl $27,%ebp,%r12d + rorxl $2,%ebp,%ebx + xorl %ecx,%ebp + addl %r12d,%eax + xorl %edx,%ebp + addl -96(%r13),%esi + leal (%rsi,%rbp,1),%esi + rorxl $27,%eax,%r12d + rorxl $2,%eax,%ebp + xorl %ebx,%eax + addl %r12d,%esi + xorl %ecx,%eax + addl -92(%r13),%edx + leal (%rdx,%rax,1),%edx + rorxl $27,%esi,%r12d + rorxl $2,%esi,%eax + xorl %ebp,%esi + addl %r12d,%edx + xorl %ebx,%esi + addl -88(%r13),%ecx + leal (%rcx,%rsi,1),%ecx + rorxl $27,%edx,%r12d + rorxl $2,%edx,%esi + xorl %eax,%edx + addl %r12d,%ecx + xorl %ebp,%edx + addl -84(%r13),%ebx + movl %esi,%edi + xorl %eax,%edi + leal (%rbx,%rdx,1),%ebx + rorxl $27,%ecx,%r12d + rorxl $2,%ecx,%edx + xorl %esi,%ecx + addl %r12d,%ebx + andl %edi,%ecx + vmovdqu %ymm9,32(%rsp) + vpshufb %ymm6,%ymm3,%ymm3 + vpaddd %ymm11,%ymm2,%ymm6 + addl -64(%r13),%ebp + xorl %esi,%ecx + movl %edx,%edi + xorl %esi,%edi + leal (%rcx,%rbp,1),%ebp + rorxl $27,%ebx,%r12d + rorxl $2,%ebx,%ecx + xorl %edx,%ebx + addl %r12d,%ebp + andl %edi,%ebx + addl -60(%r13),%eax + xorl %edx,%ebx + movl %ecx,%edi + xorl %edx,%edi + leal (%rax,%rbx,1),%eax + rorxl $27,%ebp,%r12d + rorxl $2,%ebp,%ebx + xorl %ecx,%ebp + addl %r12d,%eax + andl %edi,%ebp + addl -56(%r13),%esi + xorl %ecx,%ebp + movl %ebx,%edi + xorl %ecx,%edi + leal (%rsi,%rbp,1),%esi + rorxl $27,%eax,%r12d + rorxl $2,%eax,%ebp + xorl %ebx,%eax + addl %r12d,%esi + andl %edi,%eax + addl -52(%r13),%edx + xorl %ebx,%eax + movl %ebp,%edi + xorl %ebx,%edi + leal (%rdx,%rax,1),%edx + rorxl $27,%esi,%r12d + rorxl $2,%esi,%eax + xorl %ebp,%esi + addl %r12d,%edx + andl %edi,%esi + addl -32(%r13),%ecx + xorl %ebp,%esi + movl %eax,%edi + xorl %ebp,%edi + leal (%rcx,%rsi,1),%ecx + rorxl $27,%edx,%r12d + rorxl $2,%edx,%esi + xorl %eax,%edx + addl %r12d,%ecx + andl %edi,%edx + jmp L$align32_3 +.p2align 5 +L$align32_3: + vmovdqu %ymm6,64(%rsp) + vpaddd %ymm11,%ymm3,%ymm7 + addl -28(%r13),%ebx + xorl %eax,%edx + movl %esi,%edi + xorl %eax,%edi + leal (%rbx,%rdx,1),%ebx + rorxl $27,%ecx,%r12d + rorxl $2,%ecx,%edx + xorl %esi,%ecx + addl %r12d,%ebx + andl %edi,%ecx + addl -24(%r13),%ebp + xorl %esi,%ecx + movl %edx,%edi + xorl %esi,%edi + leal (%rcx,%rbp,1),%ebp + rorxl $27,%ebx,%r12d + rorxl $2,%ebx,%ecx + xorl %edx,%ebx + addl %r12d,%ebp + andl %edi,%ebx + addl -20(%r13),%eax + xorl %edx,%ebx + movl %ecx,%edi + xorl %edx,%edi + leal (%rax,%rbx,1),%eax + rorxl $27,%ebp,%r12d + rorxl $2,%ebp,%ebx + xorl %ecx,%ebp + addl %r12d,%eax + andl %edi,%ebp + addl 0(%r13),%esi + xorl %ecx,%ebp + movl %ebx,%edi + xorl %ecx,%edi + leal (%rsi,%rbp,1),%esi + rorxl $27,%eax,%r12d + rorxl $2,%eax,%ebp + xorl %ebx,%eax + addl %r12d,%esi + andl %edi,%eax + addl 4(%r13),%edx + xorl %ebx,%eax + movl %ebp,%edi + xorl %ebx,%edi + leal (%rdx,%rax,1),%edx + rorxl $27,%esi,%r12d + rorxl $2,%esi,%eax + xorl %ebp,%esi + addl %r12d,%edx + andl %edi,%esi + vmovdqu %ymm7,96(%rsp) + addl 8(%r13),%ecx + xorl %ebp,%esi + movl %eax,%edi + xorl %ebp,%edi + leal (%rcx,%rsi,1),%ecx + rorxl $27,%edx,%r12d + rorxl $2,%edx,%esi + xorl %eax,%edx + addl %r12d,%ecx + andl %edi,%edx + addl 12(%r13),%ebx + xorl %eax,%edx + movl %esi,%edi + xorl %eax,%edi + leal (%rbx,%rdx,1),%ebx + rorxl $27,%ecx,%r12d + rorxl $2,%ecx,%edx + xorl %esi,%ecx + addl %r12d,%ebx + andl %edi,%ecx + addl 32(%r13),%ebp + xorl %esi,%ecx + movl %edx,%edi + xorl %esi,%edi + leal (%rcx,%rbp,1),%ebp + rorxl $27,%ebx,%r12d + rorxl $2,%ebx,%ecx + xorl %edx,%ebx + addl %r12d,%ebp + andl %edi,%ebx + addl 36(%r13),%eax + xorl %edx,%ebx + movl %ecx,%edi + xorl %edx,%edi + leal (%rax,%rbx,1),%eax + rorxl $27,%ebp,%r12d + rorxl $2,%ebp,%ebx + xorl %ecx,%ebp + addl %r12d,%eax + andl %edi,%ebp + addl 40(%r13),%esi + xorl %ecx,%ebp + movl %ebx,%edi + xorl %ecx,%edi + leal (%rsi,%rbp,1),%esi + rorxl $27,%eax,%r12d + rorxl $2,%eax,%ebp + xorl %ebx,%eax + addl %r12d,%esi + andl %edi,%eax + vpalignr $8,%ymm0,%ymm1,%ymm4 + addl 44(%r13),%edx + xorl %ebx,%eax + movl %ebp,%edi + xorl %ebx,%edi + vpsrldq $4,%ymm3,%ymm8 + leal (%rdx,%rax,1),%edx + rorxl $27,%esi,%r12d + rorxl $2,%esi,%eax + vpxor %ymm0,%ymm4,%ymm4 + vpxor %ymm2,%ymm8,%ymm8 + xorl %ebp,%esi + addl %r12d,%edx + vpxor %ymm8,%ymm4,%ymm4 + andl %edi,%esi + addl 64(%r13),%ecx + xorl %ebp,%esi + movl %eax,%edi + vpsrld $31,%ymm4,%ymm8 + xorl %ebp,%edi + leal (%rcx,%rsi,1),%ecx + rorxl $27,%edx,%r12d + vpslldq $12,%ymm4,%ymm10 + vpaddd %ymm4,%ymm4,%ymm4 + rorxl $2,%edx,%esi + xorl %eax,%edx + vpsrld $30,%ymm10,%ymm9 + vpor %ymm8,%ymm4,%ymm4 + addl %r12d,%ecx + andl %edi,%edx + vpslld $2,%ymm10,%ymm10 + vpxor %ymm9,%ymm4,%ymm4 + addl 68(%r13),%ebx + xorl %eax,%edx + vpxor %ymm10,%ymm4,%ymm4 + movl %esi,%edi + xorl %eax,%edi + leal (%rbx,%rdx,1),%ebx + vpaddd %ymm11,%ymm4,%ymm9 + rorxl $27,%ecx,%r12d + rorxl $2,%ecx,%edx + xorl %esi,%ecx + vmovdqu %ymm9,128(%rsp) + addl %r12d,%ebx + andl %edi,%ecx + addl 72(%r13),%ebp + xorl %esi,%ecx + movl %edx,%edi + xorl %esi,%edi + leal (%rcx,%rbp,1),%ebp + rorxl $27,%ebx,%r12d + rorxl $2,%ebx,%ecx + xorl %edx,%ebx + addl %r12d,%ebp + andl %edi,%ebx + addl 76(%r13),%eax + xorl %edx,%ebx + leal (%rax,%rbx,1),%eax + rorxl $27,%ebp,%r12d + rorxl $2,%ebp,%ebx + xorl %ecx,%ebp + addl %r12d,%eax + xorl %edx,%ebp + vpalignr $8,%ymm1,%ymm2,%ymm5 + addl 96(%r13),%esi + leal (%rsi,%rbp,1),%esi + rorxl $27,%eax,%r12d + rorxl $2,%eax,%ebp + vpsrldq $4,%ymm4,%ymm8 + xorl %ebx,%eax + addl %r12d,%esi + xorl %ecx,%eax + vpxor %ymm1,%ymm5,%ymm5 + vpxor %ymm3,%ymm8,%ymm8 + addl 100(%r13),%edx + leal (%rdx,%rax,1),%edx + vpxor %ymm8,%ymm5,%ymm5 + rorxl $27,%esi,%r12d + rorxl $2,%esi,%eax + xorl %ebp,%esi + addl %r12d,%edx + vpsrld $31,%ymm5,%ymm8 + vmovdqu -32(%r11),%ymm11 + xorl %ebx,%esi + addl 104(%r13),%ecx + leal (%rcx,%rsi,1),%ecx + vpslldq $12,%ymm5,%ymm10 + vpaddd %ymm5,%ymm5,%ymm5 + rorxl $27,%edx,%r12d + rorxl $2,%edx,%esi + vpsrld $30,%ymm10,%ymm9 + vpor %ymm8,%ymm5,%ymm5 + xorl %eax,%edx + addl %r12d,%ecx + vpslld $2,%ymm10,%ymm10 + vpxor %ymm9,%ymm5,%ymm5 + xorl %ebp,%edx + addl 108(%r13),%ebx + leaq 256(%r13),%r13 + vpxor %ymm10,%ymm5,%ymm5 + leal (%rbx,%rdx,1),%ebx + rorxl $27,%ecx,%r12d + rorxl $2,%ecx,%edx + vpaddd %ymm11,%ymm5,%ymm9 + xorl %esi,%ecx + addl %r12d,%ebx + xorl %eax,%ecx + vmovdqu %ymm9,160(%rsp) + addl -128(%r13),%ebp + leal (%rcx,%rbp,1),%ebp + rorxl $27,%ebx,%r12d + rorxl $2,%ebx,%ecx + xorl %edx,%ebx + addl %r12d,%ebp + xorl %esi,%ebx + vpalignr $8,%ymm2,%ymm3,%ymm6 + addl -124(%r13),%eax + leal (%rax,%rbx,1),%eax + rorxl $27,%ebp,%r12d + rorxl $2,%ebp,%ebx + vpsrldq $4,%ymm5,%ymm8 + xorl %ecx,%ebp + addl %r12d,%eax + xorl %edx,%ebp + vpxor %ymm2,%ymm6,%ymm6 + vpxor %ymm4,%ymm8,%ymm8 + addl -120(%r13),%esi + leal (%rsi,%rbp,1),%esi + vpxor %ymm8,%ymm6,%ymm6 + rorxl $27,%eax,%r12d + rorxl $2,%eax,%ebp + xorl %ebx,%eax + addl %r12d,%esi + vpsrld $31,%ymm6,%ymm8 + xorl %ecx,%eax + addl -116(%r13),%edx + leal (%rdx,%rax,1),%edx + vpslldq $12,%ymm6,%ymm10 + vpaddd %ymm6,%ymm6,%ymm6 + rorxl $27,%esi,%r12d + rorxl $2,%esi,%eax + vpsrld $30,%ymm10,%ymm9 + vpor %ymm8,%ymm6,%ymm6 + xorl %ebp,%esi + addl %r12d,%edx + vpslld $2,%ymm10,%ymm10 + vpxor %ymm9,%ymm6,%ymm6 + xorl %ebx,%esi + addl -96(%r13),%ecx + vpxor %ymm10,%ymm6,%ymm6 + leal (%rcx,%rsi,1),%ecx + rorxl $27,%edx,%r12d + rorxl $2,%edx,%esi + vpaddd %ymm11,%ymm6,%ymm9 + xorl %eax,%edx + addl %r12d,%ecx + xorl %ebp,%edx + vmovdqu %ymm9,192(%rsp) + addl -92(%r13),%ebx + leal (%rbx,%rdx,1),%ebx + rorxl $27,%ecx,%r12d + rorxl $2,%ecx,%edx + xorl %esi,%ecx + addl %r12d,%ebx + xorl %eax,%ecx + vpalignr $8,%ymm3,%ymm4,%ymm7 + addl -88(%r13),%ebp + leal (%rcx,%rbp,1),%ebp + rorxl $27,%ebx,%r12d + rorxl $2,%ebx,%ecx + vpsrldq $4,%ymm6,%ymm8 + xorl %edx,%ebx + addl %r12d,%ebp + xorl %esi,%ebx + vpxor %ymm3,%ymm7,%ymm7 + vpxor %ymm5,%ymm8,%ymm8 + addl -84(%r13),%eax + leal (%rax,%rbx,1),%eax + vpxor %ymm8,%ymm7,%ymm7 + rorxl $27,%ebp,%r12d + rorxl $2,%ebp,%ebx + xorl %ecx,%ebp + addl %r12d,%eax + vpsrld $31,%ymm7,%ymm8 + xorl %edx,%ebp + addl -64(%r13),%esi + leal (%rsi,%rbp,1),%esi + vpslldq $12,%ymm7,%ymm10 + vpaddd %ymm7,%ymm7,%ymm7 + rorxl $27,%eax,%r12d + rorxl $2,%eax,%ebp + vpsrld $30,%ymm10,%ymm9 + vpor %ymm8,%ymm7,%ymm7 + xorl %ebx,%eax + addl %r12d,%esi + vpslld $2,%ymm10,%ymm10 + vpxor %ymm9,%ymm7,%ymm7 + xorl %ecx,%eax + addl -60(%r13),%edx + vpxor %ymm10,%ymm7,%ymm7 + leal (%rdx,%rax,1),%edx + rorxl $27,%esi,%r12d + rorxl $2,%esi,%eax + vpaddd %ymm11,%ymm7,%ymm9 + xorl %ebp,%esi + addl %r12d,%edx + xorl %ebx,%esi + vmovdqu %ymm9,224(%rsp) + addl -56(%r13),%ecx + leal (%rcx,%rsi,1),%ecx + rorxl $27,%edx,%r12d + rorxl $2,%edx,%esi + xorl %eax,%edx + addl %r12d,%ecx + xorl %ebp,%edx + addl -52(%r13),%ebx + leal (%rbx,%rdx,1),%ebx + rorxl $27,%ecx,%r12d + rorxl $2,%ecx,%edx + xorl %esi,%ecx + addl %r12d,%ebx + xorl %eax,%ecx + addl -32(%r13),%ebp + leal (%rcx,%rbp,1),%ebp + rorxl $27,%ebx,%r12d + rorxl $2,%ebx,%ecx + xorl %edx,%ebx + addl %r12d,%ebp + xorl %esi,%ebx + addl -28(%r13),%eax + leal (%rax,%rbx,1),%eax + rorxl $27,%ebp,%r12d + rorxl $2,%ebp,%ebx + xorl %ecx,%ebp + addl %r12d,%eax + xorl %edx,%ebp + addl -24(%r13),%esi + leal (%rsi,%rbp,1),%esi + rorxl $27,%eax,%r12d + rorxl $2,%eax,%ebp + xorl %ebx,%eax + addl %r12d,%esi + xorl %ecx,%eax + addl -20(%r13),%edx + leal (%rdx,%rax,1),%edx + rorxl $27,%esi,%r12d + addl %r12d,%edx + leaq 128(%rsp),%r13 + + + addl 0(%r8),%edx + addl 4(%r8),%esi + addl 8(%r8),%ebp + movl %edx,0(%r8) + addl 12(%r8),%ebx + movl %esi,4(%r8) + movl %edx,%eax + addl 16(%r8),%ecx + movl %ebp,%r12d + movl %ebp,8(%r8) + movl %ebx,%edx + + movl %ebx,12(%r8) + movl %esi,%ebp + movl %ecx,16(%r8) + + movl %ecx,%esi + movl %r12d,%ecx + + + cmpq %r10,%r9 + jbe L$oop_avx2 + +L$done_avx2: + vzeroupper + leaq (%r14),%rsi + movq -40(%rsi),%r14 + movq -32(%rsi),%r13 + movq -24(%rsi),%r12 + movq -16(%rsi),%rbp + movq -8(%rsi),%rbx + leaq (%rsi),%rsp +L$epilogue_avx2: + .byte 0xf3,0xc3 + +.p2align 6 +K_XX_XX: +.long 0x5a827999,0x5a827999,0x5a827999,0x5a827999 +.long 0x5a827999,0x5a827999,0x5a827999,0x5a827999 +.long 0x6ed9eba1,0x6ed9eba1,0x6ed9eba1,0x6ed9eba1 +.long 0x6ed9eba1,0x6ed9eba1,0x6ed9eba1,0x6ed9eba1 +.long 0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc +.long 0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc +.long 0xca62c1d6,0xca62c1d6,0xca62c1d6,0xca62c1d6 +.long 0xca62c1d6,0xca62c1d6,0xca62c1d6,0xca62c1d6 +.long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f +.long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f +.byte 0xf,0xe,0xd,0xc,0xb,0xa,0x9,0x8,0x7,0x6,0x5,0x4,0x3,0x2,0x1,0x0 .byte 83,72,65,49,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 .p2align 6 diff --git a/deps/openssl/asm/x64-macosx-gas/sha/sha256-mb-x86_64.s b/deps/openssl/asm/x64-macosx-gas/sha/sha256-mb-x86_64.s new file mode 100644 index 00000000000000..77c24f1cf5d973 --- /dev/null +++ b/deps/openssl/asm/x64-macosx-gas/sha/sha256-mb-x86_64.s @@ -0,0 +1,7902 @@ +.text + + + +.globl _sha256_multi_block + +.p2align 5 +_sha256_multi_block: + movq _OPENSSL_ia32cap_P+4(%rip),%rcx + btq $61,%rcx + jc _shaext_shortcut + testl $268435456,%ecx + jnz _avx_shortcut + movq %rsp,%rax + pushq %rbx + pushq %rbp + subq $288,%rsp + andq $-256,%rsp + movq %rax,272(%rsp) +L$body: + leaq K256+128(%rip),%rbp + leaq 256(%rsp),%rbx + leaq 128(%rdi),%rdi + +L$oop_grande: + movl %edx,280(%rsp) + xorl %edx,%edx + movq 0(%rsi),%r8 + movl 8(%rsi),%ecx + cmpl %edx,%ecx + cmovgl %ecx,%edx + testl %ecx,%ecx + movl %ecx,0(%rbx) + cmovleq %rbp,%r8 + movq 16(%rsi),%r9 + movl 24(%rsi),%ecx + cmpl %edx,%ecx + cmovgl %ecx,%edx + testl %ecx,%ecx + movl %ecx,4(%rbx) + cmovleq %rbp,%r9 + movq 32(%rsi),%r10 + movl 40(%rsi),%ecx + cmpl %edx,%ecx + cmovgl %ecx,%edx + testl %ecx,%ecx + movl %ecx,8(%rbx) + cmovleq %rbp,%r10 + movq 48(%rsi),%r11 + movl 56(%rsi),%ecx + cmpl %edx,%ecx + cmovgl %ecx,%edx + testl %ecx,%ecx + movl %ecx,12(%rbx) + cmovleq %rbp,%r11 + testl %edx,%edx + jz L$done + + movdqu 0-128(%rdi),%xmm8 + leaq 128(%rsp),%rax + movdqu 32-128(%rdi),%xmm9 + movdqu 64-128(%rdi),%xmm10 + movdqu 96-128(%rdi),%xmm11 + movdqu 128-128(%rdi),%xmm12 + movdqu 160-128(%rdi),%xmm13 + movdqu 192-128(%rdi),%xmm14 + movdqu 224-128(%rdi),%xmm15 + movdqu L$pbswap(%rip),%xmm6 + jmp L$oop + +.p2align 5 +L$oop: + movdqa %xmm10,%xmm4 + pxor %xmm9,%xmm4 + movd 0(%r8),%xmm5 + movd 0(%r9),%xmm0 + movd 0(%r10),%xmm1 + movd 0(%r11),%xmm2 + punpckldq %xmm1,%xmm5 + punpckldq %xmm2,%xmm0 + punpckldq %xmm0,%xmm5 + movdqa %xmm12,%xmm7 +.byte 102,15,56,0,238 + movdqa %xmm12,%xmm2 + + psrld $6,%xmm7 + movdqa %xmm12,%xmm1 + pslld $7,%xmm2 + movdqa %xmm5,0-128(%rax) + paddd %xmm15,%xmm5 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd -128(%rbp),%xmm5 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm12,%xmm0 + + pxor %xmm2,%xmm7 + movdqa %xmm12,%xmm3 + pslld $26-21,%xmm2 + pandn %xmm14,%xmm0 + pand %xmm13,%xmm3 + pxor %xmm1,%xmm7 + + + movdqa %xmm8,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm8,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm3,%xmm0 + movdqa %xmm9,%xmm3 + movdqa %xmm8,%xmm7 + pslld $10,%xmm2 + pxor %xmm8,%xmm3 + + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm5 + pslld $19-10,%xmm2 + pand %xmm3,%xmm4 + pxor %xmm7,%xmm1 + + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm9,%xmm15 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm4,%xmm15 + paddd %xmm5,%xmm11 + pxor %xmm2,%xmm7 + + paddd %xmm5,%xmm15 + paddd %xmm7,%xmm15 + movd 4(%r8),%xmm5 + movd 4(%r9),%xmm0 + movd 4(%r10),%xmm1 + movd 4(%r11),%xmm2 + punpckldq %xmm1,%xmm5 + punpckldq %xmm2,%xmm0 + punpckldq %xmm0,%xmm5 + movdqa %xmm11,%xmm7 + + movdqa %xmm11,%xmm2 +.byte 102,15,56,0,238 + psrld $6,%xmm7 + movdqa %xmm11,%xmm1 + pslld $7,%xmm2 + movdqa %xmm5,16-128(%rax) + paddd %xmm14,%xmm5 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd -96(%rbp),%xmm5 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm11,%xmm0 + + pxor %xmm2,%xmm7 + movdqa %xmm11,%xmm4 + pslld $26-21,%xmm2 + pandn %xmm13,%xmm0 + pand %xmm12,%xmm4 + pxor %xmm1,%xmm7 + + + movdqa %xmm15,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm15,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm4,%xmm0 + movdqa %xmm8,%xmm4 + movdqa %xmm15,%xmm7 + pslld $10,%xmm2 + pxor %xmm15,%xmm4 + + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm5 + pslld $19-10,%xmm2 + pand %xmm4,%xmm3 + pxor %xmm7,%xmm1 + + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm8,%xmm14 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm3,%xmm14 + paddd %xmm5,%xmm10 + pxor %xmm2,%xmm7 + + paddd %xmm5,%xmm14 + paddd %xmm7,%xmm14 + movd 8(%r8),%xmm5 + movd 8(%r9),%xmm0 + movd 8(%r10),%xmm1 + movd 8(%r11),%xmm2 + punpckldq %xmm1,%xmm5 + punpckldq %xmm2,%xmm0 + punpckldq %xmm0,%xmm5 + movdqa %xmm10,%xmm7 +.byte 102,15,56,0,238 + movdqa %xmm10,%xmm2 + + psrld $6,%xmm7 + movdqa %xmm10,%xmm1 + pslld $7,%xmm2 + movdqa %xmm5,32-128(%rax) + paddd %xmm13,%xmm5 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd -64(%rbp),%xmm5 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm10,%xmm0 + + pxor %xmm2,%xmm7 + movdqa %xmm10,%xmm3 + pslld $26-21,%xmm2 + pandn %xmm12,%xmm0 + pand %xmm11,%xmm3 + pxor %xmm1,%xmm7 + + + movdqa %xmm14,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm14,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm3,%xmm0 + movdqa %xmm15,%xmm3 + movdqa %xmm14,%xmm7 + pslld $10,%xmm2 + pxor %xmm14,%xmm3 + + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm5 + pslld $19-10,%xmm2 + pand %xmm3,%xmm4 + pxor %xmm7,%xmm1 + + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm15,%xmm13 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm4,%xmm13 + paddd %xmm5,%xmm9 + pxor %xmm2,%xmm7 + + paddd %xmm5,%xmm13 + paddd %xmm7,%xmm13 + movd 12(%r8),%xmm5 + movd 12(%r9),%xmm0 + movd 12(%r10),%xmm1 + movd 12(%r11),%xmm2 + punpckldq %xmm1,%xmm5 + punpckldq %xmm2,%xmm0 + punpckldq %xmm0,%xmm5 + movdqa %xmm9,%xmm7 + + movdqa %xmm9,%xmm2 +.byte 102,15,56,0,238 + psrld $6,%xmm7 + movdqa %xmm9,%xmm1 + pslld $7,%xmm2 + movdqa %xmm5,48-128(%rax) + paddd %xmm12,%xmm5 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd -32(%rbp),%xmm5 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm9,%xmm0 + + pxor %xmm2,%xmm7 + movdqa %xmm9,%xmm4 + pslld $26-21,%xmm2 + pandn %xmm11,%xmm0 + pand %xmm10,%xmm4 + pxor %xmm1,%xmm7 + + + movdqa %xmm13,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm13,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm4,%xmm0 + movdqa %xmm14,%xmm4 + movdqa %xmm13,%xmm7 + pslld $10,%xmm2 + pxor %xmm13,%xmm4 + + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm5 + pslld $19-10,%xmm2 + pand %xmm4,%xmm3 + pxor %xmm7,%xmm1 + + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm14,%xmm12 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm3,%xmm12 + paddd %xmm5,%xmm8 + pxor %xmm2,%xmm7 + + paddd %xmm5,%xmm12 + paddd %xmm7,%xmm12 + movd 16(%r8),%xmm5 + movd 16(%r9),%xmm0 + movd 16(%r10),%xmm1 + movd 16(%r11),%xmm2 + punpckldq %xmm1,%xmm5 + punpckldq %xmm2,%xmm0 + punpckldq %xmm0,%xmm5 + movdqa %xmm8,%xmm7 +.byte 102,15,56,0,238 + movdqa %xmm8,%xmm2 + + psrld $6,%xmm7 + movdqa %xmm8,%xmm1 + pslld $7,%xmm2 + movdqa %xmm5,64-128(%rax) + paddd %xmm11,%xmm5 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd 0(%rbp),%xmm5 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm8,%xmm0 + + pxor %xmm2,%xmm7 + movdqa %xmm8,%xmm3 + pslld $26-21,%xmm2 + pandn %xmm10,%xmm0 + pand %xmm9,%xmm3 + pxor %xmm1,%xmm7 + + + movdqa %xmm12,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm12,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm3,%xmm0 + movdqa %xmm13,%xmm3 + movdqa %xmm12,%xmm7 + pslld $10,%xmm2 + pxor %xmm12,%xmm3 + + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm5 + pslld $19-10,%xmm2 + pand %xmm3,%xmm4 + pxor %xmm7,%xmm1 + + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm13,%xmm11 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm4,%xmm11 + paddd %xmm5,%xmm15 + pxor %xmm2,%xmm7 + + paddd %xmm5,%xmm11 + paddd %xmm7,%xmm11 + movd 20(%r8),%xmm5 + movd 20(%r9),%xmm0 + movd 20(%r10),%xmm1 + movd 20(%r11),%xmm2 + punpckldq %xmm1,%xmm5 + punpckldq %xmm2,%xmm0 + punpckldq %xmm0,%xmm5 + movdqa %xmm15,%xmm7 + + movdqa %xmm15,%xmm2 +.byte 102,15,56,0,238 + psrld $6,%xmm7 + movdqa %xmm15,%xmm1 + pslld $7,%xmm2 + movdqa %xmm5,80-128(%rax) + paddd %xmm10,%xmm5 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd 32(%rbp),%xmm5 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm15,%xmm0 + + pxor %xmm2,%xmm7 + movdqa %xmm15,%xmm4 + pslld $26-21,%xmm2 + pandn %xmm9,%xmm0 + pand %xmm8,%xmm4 + pxor %xmm1,%xmm7 + + + movdqa %xmm11,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm11,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm4,%xmm0 + movdqa %xmm12,%xmm4 + movdqa %xmm11,%xmm7 + pslld $10,%xmm2 + pxor %xmm11,%xmm4 + + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm5 + pslld $19-10,%xmm2 + pand %xmm4,%xmm3 + pxor %xmm7,%xmm1 + + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm12,%xmm10 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm3,%xmm10 + paddd %xmm5,%xmm14 + pxor %xmm2,%xmm7 + + paddd %xmm5,%xmm10 + paddd %xmm7,%xmm10 + movd 24(%r8),%xmm5 + movd 24(%r9),%xmm0 + movd 24(%r10),%xmm1 + movd 24(%r11),%xmm2 + punpckldq %xmm1,%xmm5 + punpckldq %xmm2,%xmm0 + punpckldq %xmm0,%xmm5 + movdqa %xmm14,%xmm7 +.byte 102,15,56,0,238 + movdqa %xmm14,%xmm2 + + psrld $6,%xmm7 + movdqa %xmm14,%xmm1 + pslld $7,%xmm2 + movdqa %xmm5,96-128(%rax) + paddd %xmm9,%xmm5 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd 64(%rbp),%xmm5 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm14,%xmm0 + + pxor %xmm2,%xmm7 + movdqa %xmm14,%xmm3 + pslld $26-21,%xmm2 + pandn %xmm8,%xmm0 + pand %xmm15,%xmm3 + pxor %xmm1,%xmm7 + + + movdqa %xmm10,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm10,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm3,%xmm0 + movdqa %xmm11,%xmm3 + movdqa %xmm10,%xmm7 + pslld $10,%xmm2 + pxor %xmm10,%xmm3 + + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm5 + pslld $19-10,%xmm2 + pand %xmm3,%xmm4 + pxor %xmm7,%xmm1 + + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm11,%xmm9 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm4,%xmm9 + paddd %xmm5,%xmm13 + pxor %xmm2,%xmm7 + + paddd %xmm5,%xmm9 + paddd %xmm7,%xmm9 + movd 28(%r8),%xmm5 + movd 28(%r9),%xmm0 + movd 28(%r10),%xmm1 + movd 28(%r11),%xmm2 + punpckldq %xmm1,%xmm5 + punpckldq %xmm2,%xmm0 + punpckldq %xmm0,%xmm5 + movdqa %xmm13,%xmm7 + + movdqa %xmm13,%xmm2 +.byte 102,15,56,0,238 + psrld $6,%xmm7 + movdqa %xmm13,%xmm1 + pslld $7,%xmm2 + movdqa %xmm5,112-128(%rax) + paddd %xmm8,%xmm5 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd 96(%rbp),%xmm5 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm13,%xmm0 + + pxor %xmm2,%xmm7 + movdqa %xmm13,%xmm4 + pslld $26-21,%xmm2 + pandn %xmm15,%xmm0 + pand %xmm14,%xmm4 + pxor %xmm1,%xmm7 + + + movdqa %xmm9,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm9,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm4,%xmm0 + movdqa %xmm10,%xmm4 + movdqa %xmm9,%xmm7 + pslld $10,%xmm2 + pxor %xmm9,%xmm4 + + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm5 + pslld $19-10,%xmm2 + pand %xmm4,%xmm3 + pxor %xmm7,%xmm1 + + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm10,%xmm8 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm3,%xmm8 + paddd %xmm5,%xmm12 + pxor %xmm2,%xmm7 + + paddd %xmm5,%xmm8 + paddd %xmm7,%xmm8 + leaq 256(%rbp),%rbp + movd 32(%r8),%xmm5 + movd 32(%r9),%xmm0 + movd 32(%r10),%xmm1 + movd 32(%r11),%xmm2 + punpckldq %xmm1,%xmm5 + punpckldq %xmm2,%xmm0 + punpckldq %xmm0,%xmm5 + movdqa %xmm12,%xmm7 +.byte 102,15,56,0,238 + movdqa %xmm12,%xmm2 + + psrld $6,%xmm7 + movdqa %xmm12,%xmm1 + pslld $7,%xmm2 + movdqa %xmm5,128-128(%rax) + paddd %xmm15,%xmm5 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd -128(%rbp),%xmm5 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm12,%xmm0 + + pxor %xmm2,%xmm7 + movdqa %xmm12,%xmm3 + pslld $26-21,%xmm2 + pandn %xmm14,%xmm0 + pand %xmm13,%xmm3 + pxor %xmm1,%xmm7 + + + movdqa %xmm8,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm8,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm3,%xmm0 + movdqa %xmm9,%xmm3 + movdqa %xmm8,%xmm7 + pslld $10,%xmm2 + pxor %xmm8,%xmm3 + + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm5 + pslld $19-10,%xmm2 + pand %xmm3,%xmm4 + pxor %xmm7,%xmm1 + + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm9,%xmm15 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm4,%xmm15 + paddd %xmm5,%xmm11 + pxor %xmm2,%xmm7 + + paddd %xmm5,%xmm15 + paddd %xmm7,%xmm15 + movd 36(%r8),%xmm5 + movd 36(%r9),%xmm0 + movd 36(%r10),%xmm1 + movd 36(%r11),%xmm2 + punpckldq %xmm1,%xmm5 + punpckldq %xmm2,%xmm0 + punpckldq %xmm0,%xmm5 + movdqa %xmm11,%xmm7 + + movdqa %xmm11,%xmm2 +.byte 102,15,56,0,238 + psrld $6,%xmm7 + movdqa %xmm11,%xmm1 + pslld $7,%xmm2 + movdqa %xmm5,144-128(%rax) + paddd %xmm14,%xmm5 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd -96(%rbp),%xmm5 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm11,%xmm0 + + pxor %xmm2,%xmm7 + movdqa %xmm11,%xmm4 + pslld $26-21,%xmm2 + pandn %xmm13,%xmm0 + pand %xmm12,%xmm4 + pxor %xmm1,%xmm7 + + + movdqa %xmm15,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm15,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm4,%xmm0 + movdqa %xmm8,%xmm4 + movdqa %xmm15,%xmm7 + pslld $10,%xmm2 + pxor %xmm15,%xmm4 + + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm5 + pslld $19-10,%xmm2 + pand %xmm4,%xmm3 + pxor %xmm7,%xmm1 + + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm8,%xmm14 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm3,%xmm14 + paddd %xmm5,%xmm10 + pxor %xmm2,%xmm7 + + paddd %xmm5,%xmm14 + paddd %xmm7,%xmm14 + movd 40(%r8),%xmm5 + movd 40(%r9),%xmm0 + movd 40(%r10),%xmm1 + movd 40(%r11),%xmm2 + punpckldq %xmm1,%xmm5 + punpckldq %xmm2,%xmm0 + punpckldq %xmm0,%xmm5 + movdqa %xmm10,%xmm7 +.byte 102,15,56,0,238 + movdqa %xmm10,%xmm2 + + psrld $6,%xmm7 + movdqa %xmm10,%xmm1 + pslld $7,%xmm2 + movdqa %xmm5,160-128(%rax) + paddd %xmm13,%xmm5 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd -64(%rbp),%xmm5 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm10,%xmm0 + + pxor %xmm2,%xmm7 + movdqa %xmm10,%xmm3 + pslld $26-21,%xmm2 + pandn %xmm12,%xmm0 + pand %xmm11,%xmm3 + pxor %xmm1,%xmm7 + + + movdqa %xmm14,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm14,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm3,%xmm0 + movdqa %xmm15,%xmm3 + movdqa %xmm14,%xmm7 + pslld $10,%xmm2 + pxor %xmm14,%xmm3 + + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm5 + pslld $19-10,%xmm2 + pand %xmm3,%xmm4 + pxor %xmm7,%xmm1 + + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm15,%xmm13 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm4,%xmm13 + paddd %xmm5,%xmm9 + pxor %xmm2,%xmm7 + + paddd %xmm5,%xmm13 + paddd %xmm7,%xmm13 + movd 44(%r8),%xmm5 + movd 44(%r9),%xmm0 + movd 44(%r10),%xmm1 + movd 44(%r11),%xmm2 + punpckldq %xmm1,%xmm5 + punpckldq %xmm2,%xmm0 + punpckldq %xmm0,%xmm5 + movdqa %xmm9,%xmm7 + + movdqa %xmm9,%xmm2 +.byte 102,15,56,0,238 + psrld $6,%xmm7 + movdqa %xmm9,%xmm1 + pslld $7,%xmm2 + movdqa %xmm5,176-128(%rax) + paddd %xmm12,%xmm5 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd -32(%rbp),%xmm5 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm9,%xmm0 + + pxor %xmm2,%xmm7 + movdqa %xmm9,%xmm4 + pslld $26-21,%xmm2 + pandn %xmm11,%xmm0 + pand %xmm10,%xmm4 + pxor %xmm1,%xmm7 + + + movdqa %xmm13,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm13,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm4,%xmm0 + movdqa %xmm14,%xmm4 + movdqa %xmm13,%xmm7 + pslld $10,%xmm2 + pxor %xmm13,%xmm4 + + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm5 + pslld $19-10,%xmm2 + pand %xmm4,%xmm3 + pxor %xmm7,%xmm1 + + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm14,%xmm12 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm3,%xmm12 + paddd %xmm5,%xmm8 + pxor %xmm2,%xmm7 + + paddd %xmm5,%xmm12 + paddd %xmm7,%xmm12 + movd 48(%r8),%xmm5 + movd 48(%r9),%xmm0 + movd 48(%r10),%xmm1 + movd 48(%r11),%xmm2 + punpckldq %xmm1,%xmm5 + punpckldq %xmm2,%xmm0 + punpckldq %xmm0,%xmm5 + movdqa %xmm8,%xmm7 +.byte 102,15,56,0,238 + movdqa %xmm8,%xmm2 + + psrld $6,%xmm7 + movdqa %xmm8,%xmm1 + pslld $7,%xmm2 + movdqa %xmm5,192-128(%rax) + paddd %xmm11,%xmm5 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd 0(%rbp),%xmm5 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm8,%xmm0 + + pxor %xmm2,%xmm7 + movdqa %xmm8,%xmm3 + pslld $26-21,%xmm2 + pandn %xmm10,%xmm0 + pand %xmm9,%xmm3 + pxor %xmm1,%xmm7 + + + movdqa %xmm12,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm12,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm3,%xmm0 + movdqa %xmm13,%xmm3 + movdqa %xmm12,%xmm7 + pslld $10,%xmm2 + pxor %xmm12,%xmm3 + + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm5 + pslld $19-10,%xmm2 + pand %xmm3,%xmm4 + pxor %xmm7,%xmm1 + + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm13,%xmm11 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm4,%xmm11 + paddd %xmm5,%xmm15 + pxor %xmm2,%xmm7 + + paddd %xmm5,%xmm11 + paddd %xmm7,%xmm11 + movd 52(%r8),%xmm5 + movd 52(%r9),%xmm0 + movd 52(%r10),%xmm1 + movd 52(%r11),%xmm2 + punpckldq %xmm1,%xmm5 + punpckldq %xmm2,%xmm0 + punpckldq %xmm0,%xmm5 + movdqa %xmm15,%xmm7 + + movdqa %xmm15,%xmm2 +.byte 102,15,56,0,238 + psrld $6,%xmm7 + movdqa %xmm15,%xmm1 + pslld $7,%xmm2 + movdqa %xmm5,208-128(%rax) + paddd %xmm10,%xmm5 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd 32(%rbp),%xmm5 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm15,%xmm0 + + pxor %xmm2,%xmm7 + movdqa %xmm15,%xmm4 + pslld $26-21,%xmm2 + pandn %xmm9,%xmm0 + pand %xmm8,%xmm4 + pxor %xmm1,%xmm7 + + + movdqa %xmm11,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm11,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm4,%xmm0 + movdqa %xmm12,%xmm4 + movdqa %xmm11,%xmm7 + pslld $10,%xmm2 + pxor %xmm11,%xmm4 + + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm5 + pslld $19-10,%xmm2 + pand %xmm4,%xmm3 + pxor %xmm7,%xmm1 + + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm12,%xmm10 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm3,%xmm10 + paddd %xmm5,%xmm14 + pxor %xmm2,%xmm7 + + paddd %xmm5,%xmm10 + paddd %xmm7,%xmm10 + movd 56(%r8),%xmm5 + movd 56(%r9),%xmm0 + movd 56(%r10),%xmm1 + movd 56(%r11),%xmm2 + punpckldq %xmm1,%xmm5 + punpckldq %xmm2,%xmm0 + punpckldq %xmm0,%xmm5 + movdqa %xmm14,%xmm7 +.byte 102,15,56,0,238 + movdqa %xmm14,%xmm2 + + psrld $6,%xmm7 + movdqa %xmm14,%xmm1 + pslld $7,%xmm2 + movdqa %xmm5,224-128(%rax) + paddd %xmm9,%xmm5 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd 64(%rbp),%xmm5 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm14,%xmm0 + + pxor %xmm2,%xmm7 + movdqa %xmm14,%xmm3 + pslld $26-21,%xmm2 + pandn %xmm8,%xmm0 + pand %xmm15,%xmm3 + pxor %xmm1,%xmm7 + + + movdqa %xmm10,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm10,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm3,%xmm0 + movdqa %xmm11,%xmm3 + movdqa %xmm10,%xmm7 + pslld $10,%xmm2 + pxor %xmm10,%xmm3 + + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm5 + pslld $19-10,%xmm2 + pand %xmm3,%xmm4 + pxor %xmm7,%xmm1 + + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm11,%xmm9 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm4,%xmm9 + paddd %xmm5,%xmm13 + pxor %xmm2,%xmm7 + + paddd %xmm5,%xmm9 + paddd %xmm7,%xmm9 + movd 60(%r8),%xmm5 + leaq 64(%r8),%r8 + movd 60(%r9),%xmm0 + leaq 64(%r9),%r9 + movd 60(%r10),%xmm1 + leaq 64(%r10),%r10 + movd 60(%r11),%xmm2 + leaq 64(%r11),%r11 + punpckldq %xmm1,%xmm5 + punpckldq %xmm2,%xmm0 + punpckldq %xmm0,%xmm5 + movdqa %xmm13,%xmm7 + + movdqa %xmm13,%xmm2 +.byte 102,15,56,0,238 + psrld $6,%xmm7 + movdqa %xmm13,%xmm1 + pslld $7,%xmm2 + movdqa %xmm5,240-128(%rax) + paddd %xmm8,%xmm5 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd 96(%rbp),%xmm5 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm13,%xmm0 + prefetcht0 63(%r8) + pxor %xmm2,%xmm7 + movdqa %xmm13,%xmm4 + pslld $26-21,%xmm2 + pandn %xmm15,%xmm0 + pand %xmm14,%xmm4 + pxor %xmm1,%xmm7 + + prefetcht0 63(%r9) + movdqa %xmm9,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm9,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm4,%xmm0 + movdqa %xmm10,%xmm4 + movdqa %xmm9,%xmm7 + pslld $10,%xmm2 + pxor %xmm9,%xmm4 + + prefetcht0 63(%r10) + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm5 + pslld $19-10,%xmm2 + pand %xmm4,%xmm3 + pxor %xmm7,%xmm1 + + prefetcht0 63(%r11) + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm10,%xmm8 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm3,%xmm8 + paddd %xmm5,%xmm12 + pxor %xmm2,%xmm7 + + paddd %xmm5,%xmm8 + paddd %xmm7,%xmm8 + leaq 256(%rbp),%rbp + movdqu 0-128(%rax),%xmm5 + movl $3,%ecx + jmp L$oop_16_xx +.p2align 5 +L$oop_16_xx: + movdqa 16-128(%rax),%xmm6 + paddd 144-128(%rax),%xmm5 + + movdqa %xmm6,%xmm7 + movdqa %xmm6,%xmm1 + psrld $3,%xmm7 + movdqa %xmm6,%xmm2 + + psrld $7,%xmm1 + movdqa 224-128(%rax),%xmm0 + pslld $14,%xmm2 + pxor %xmm1,%xmm7 + psrld $18-7,%xmm1 + movdqa %xmm0,%xmm3 + pxor %xmm2,%xmm7 + pslld $25-14,%xmm2 + pxor %xmm1,%xmm7 + psrld $10,%xmm0 + movdqa %xmm3,%xmm1 + + psrld $17,%xmm3 + pxor %xmm2,%xmm7 + pslld $13,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm3,%xmm0 + psrld $19-17,%xmm3 + pxor %xmm1,%xmm0 + pslld $15-13,%xmm1 + pxor %xmm3,%xmm0 + pxor %xmm1,%xmm0 + paddd %xmm0,%xmm5 + movdqa %xmm12,%xmm7 + + movdqa %xmm12,%xmm2 + + psrld $6,%xmm7 + movdqa %xmm12,%xmm1 + pslld $7,%xmm2 + movdqa %xmm5,0-128(%rax) + paddd %xmm15,%xmm5 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd -128(%rbp),%xmm5 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm12,%xmm0 + + pxor %xmm2,%xmm7 + movdqa %xmm12,%xmm3 + pslld $26-21,%xmm2 + pandn %xmm14,%xmm0 + pand %xmm13,%xmm3 + pxor %xmm1,%xmm7 + + + movdqa %xmm8,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm8,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm3,%xmm0 + movdqa %xmm9,%xmm3 + movdqa %xmm8,%xmm7 + pslld $10,%xmm2 + pxor %xmm8,%xmm3 + + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm5 + pslld $19-10,%xmm2 + pand %xmm3,%xmm4 + pxor %xmm7,%xmm1 + + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm9,%xmm15 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm4,%xmm15 + paddd %xmm5,%xmm11 + pxor %xmm2,%xmm7 + + paddd %xmm5,%xmm15 + paddd %xmm7,%xmm15 + movdqa 32-128(%rax),%xmm5 + paddd 160-128(%rax),%xmm6 + + movdqa %xmm5,%xmm7 + movdqa %xmm5,%xmm1 + psrld $3,%xmm7 + movdqa %xmm5,%xmm2 + + psrld $7,%xmm1 + movdqa 240-128(%rax),%xmm0 + pslld $14,%xmm2 + pxor %xmm1,%xmm7 + psrld $18-7,%xmm1 + movdqa %xmm0,%xmm4 + pxor %xmm2,%xmm7 + pslld $25-14,%xmm2 + pxor %xmm1,%xmm7 + psrld $10,%xmm0 + movdqa %xmm4,%xmm1 + + psrld $17,%xmm4 + pxor %xmm2,%xmm7 + pslld $13,%xmm1 + paddd %xmm7,%xmm6 + pxor %xmm4,%xmm0 + psrld $19-17,%xmm4 + pxor %xmm1,%xmm0 + pslld $15-13,%xmm1 + pxor %xmm4,%xmm0 + pxor %xmm1,%xmm0 + paddd %xmm0,%xmm6 + movdqa %xmm11,%xmm7 + + movdqa %xmm11,%xmm2 + + psrld $6,%xmm7 + movdqa %xmm11,%xmm1 + pslld $7,%xmm2 + movdqa %xmm6,16-128(%rax) + paddd %xmm14,%xmm6 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd -96(%rbp),%xmm6 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm11,%xmm0 + + pxor %xmm2,%xmm7 + movdqa %xmm11,%xmm4 + pslld $26-21,%xmm2 + pandn %xmm13,%xmm0 + pand %xmm12,%xmm4 + pxor %xmm1,%xmm7 + + + movdqa %xmm15,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm15,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm6 + pxor %xmm4,%xmm0 + movdqa %xmm8,%xmm4 + movdqa %xmm15,%xmm7 + pslld $10,%xmm2 + pxor %xmm15,%xmm4 + + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm6 + pslld $19-10,%xmm2 + pand %xmm4,%xmm3 + pxor %xmm7,%xmm1 + + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm8,%xmm14 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm3,%xmm14 + paddd %xmm6,%xmm10 + pxor %xmm2,%xmm7 + + paddd %xmm6,%xmm14 + paddd %xmm7,%xmm14 + movdqa 48-128(%rax),%xmm6 + paddd 176-128(%rax),%xmm5 + + movdqa %xmm6,%xmm7 + movdqa %xmm6,%xmm1 + psrld $3,%xmm7 + movdqa %xmm6,%xmm2 + + psrld $7,%xmm1 + movdqa 0-128(%rax),%xmm0 + pslld $14,%xmm2 + pxor %xmm1,%xmm7 + psrld $18-7,%xmm1 + movdqa %xmm0,%xmm3 + pxor %xmm2,%xmm7 + pslld $25-14,%xmm2 + pxor %xmm1,%xmm7 + psrld $10,%xmm0 + movdqa %xmm3,%xmm1 + + psrld $17,%xmm3 + pxor %xmm2,%xmm7 + pslld $13,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm3,%xmm0 + psrld $19-17,%xmm3 + pxor %xmm1,%xmm0 + pslld $15-13,%xmm1 + pxor %xmm3,%xmm0 + pxor %xmm1,%xmm0 + paddd %xmm0,%xmm5 + movdqa %xmm10,%xmm7 + + movdqa %xmm10,%xmm2 + + psrld $6,%xmm7 + movdqa %xmm10,%xmm1 + pslld $7,%xmm2 + movdqa %xmm5,32-128(%rax) + paddd %xmm13,%xmm5 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd -64(%rbp),%xmm5 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm10,%xmm0 + + pxor %xmm2,%xmm7 + movdqa %xmm10,%xmm3 + pslld $26-21,%xmm2 + pandn %xmm12,%xmm0 + pand %xmm11,%xmm3 + pxor %xmm1,%xmm7 + + + movdqa %xmm14,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm14,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm3,%xmm0 + movdqa %xmm15,%xmm3 + movdqa %xmm14,%xmm7 + pslld $10,%xmm2 + pxor %xmm14,%xmm3 + + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm5 + pslld $19-10,%xmm2 + pand %xmm3,%xmm4 + pxor %xmm7,%xmm1 + + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm15,%xmm13 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm4,%xmm13 + paddd %xmm5,%xmm9 + pxor %xmm2,%xmm7 + + paddd %xmm5,%xmm13 + paddd %xmm7,%xmm13 + movdqa 64-128(%rax),%xmm5 + paddd 192-128(%rax),%xmm6 + + movdqa %xmm5,%xmm7 + movdqa %xmm5,%xmm1 + psrld $3,%xmm7 + movdqa %xmm5,%xmm2 + + psrld $7,%xmm1 + movdqa 16-128(%rax),%xmm0 + pslld $14,%xmm2 + pxor %xmm1,%xmm7 + psrld $18-7,%xmm1 + movdqa %xmm0,%xmm4 + pxor %xmm2,%xmm7 + pslld $25-14,%xmm2 + pxor %xmm1,%xmm7 + psrld $10,%xmm0 + movdqa %xmm4,%xmm1 + + psrld $17,%xmm4 + pxor %xmm2,%xmm7 + pslld $13,%xmm1 + paddd %xmm7,%xmm6 + pxor %xmm4,%xmm0 + psrld $19-17,%xmm4 + pxor %xmm1,%xmm0 + pslld $15-13,%xmm1 + pxor %xmm4,%xmm0 + pxor %xmm1,%xmm0 + paddd %xmm0,%xmm6 + movdqa %xmm9,%xmm7 + + movdqa %xmm9,%xmm2 + + psrld $6,%xmm7 + movdqa %xmm9,%xmm1 + pslld $7,%xmm2 + movdqa %xmm6,48-128(%rax) + paddd %xmm12,%xmm6 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd -32(%rbp),%xmm6 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm9,%xmm0 + + pxor %xmm2,%xmm7 + movdqa %xmm9,%xmm4 + pslld $26-21,%xmm2 + pandn %xmm11,%xmm0 + pand %xmm10,%xmm4 + pxor %xmm1,%xmm7 + + + movdqa %xmm13,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm13,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm6 + pxor %xmm4,%xmm0 + movdqa %xmm14,%xmm4 + movdqa %xmm13,%xmm7 + pslld $10,%xmm2 + pxor %xmm13,%xmm4 + + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm6 + pslld $19-10,%xmm2 + pand %xmm4,%xmm3 + pxor %xmm7,%xmm1 + + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm14,%xmm12 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm3,%xmm12 + paddd %xmm6,%xmm8 + pxor %xmm2,%xmm7 + + paddd %xmm6,%xmm12 + paddd %xmm7,%xmm12 + movdqa 80-128(%rax),%xmm6 + paddd 208-128(%rax),%xmm5 + + movdqa %xmm6,%xmm7 + movdqa %xmm6,%xmm1 + psrld $3,%xmm7 + movdqa %xmm6,%xmm2 + + psrld $7,%xmm1 + movdqa 32-128(%rax),%xmm0 + pslld $14,%xmm2 + pxor %xmm1,%xmm7 + psrld $18-7,%xmm1 + movdqa %xmm0,%xmm3 + pxor %xmm2,%xmm7 + pslld $25-14,%xmm2 + pxor %xmm1,%xmm7 + psrld $10,%xmm0 + movdqa %xmm3,%xmm1 + + psrld $17,%xmm3 + pxor %xmm2,%xmm7 + pslld $13,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm3,%xmm0 + psrld $19-17,%xmm3 + pxor %xmm1,%xmm0 + pslld $15-13,%xmm1 + pxor %xmm3,%xmm0 + pxor %xmm1,%xmm0 + paddd %xmm0,%xmm5 + movdqa %xmm8,%xmm7 + + movdqa %xmm8,%xmm2 + + psrld $6,%xmm7 + movdqa %xmm8,%xmm1 + pslld $7,%xmm2 + movdqa %xmm5,64-128(%rax) + paddd %xmm11,%xmm5 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd 0(%rbp),%xmm5 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm8,%xmm0 + + pxor %xmm2,%xmm7 + movdqa %xmm8,%xmm3 + pslld $26-21,%xmm2 + pandn %xmm10,%xmm0 + pand %xmm9,%xmm3 + pxor %xmm1,%xmm7 + + + movdqa %xmm12,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm12,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm3,%xmm0 + movdqa %xmm13,%xmm3 + movdqa %xmm12,%xmm7 + pslld $10,%xmm2 + pxor %xmm12,%xmm3 + + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm5 + pslld $19-10,%xmm2 + pand %xmm3,%xmm4 + pxor %xmm7,%xmm1 + + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm13,%xmm11 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm4,%xmm11 + paddd %xmm5,%xmm15 + pxor %xmm2,%xmm7 + + paddd %xmm5,%xmm11 + paddd %xmm7,%xmm11 + movdqa 96-128(%rax),%xmm5 + paddd 224-128(%rax),%xmm6 + + movdqa %xmm5,%xmm7 + movdqa %xmm5,%xmm1 + psrld $3,%xmm7 + movdqa %xmm5,%xmm2 + + psrld $7,%xmm1 + movdqa 48-128(%rax),%xmm0 + pslld $14,%xmm2 + pxor %xmm1,%xmm7 + psrld $18-7,%xmm1 + movdqa %xmm0,%xmm4 + pxor %xmm2,%xmm7 + pslld $25-14,%xmm2 + pxor %xmm1,%xmm7 + psrld $10,%xmm0 + movdqa %xmm4,%xmm1 + + psrld $17,%xmm4 + pxor %xmm2,%xmm7 + pslld $13,%xmm1 + paddd %xmm7,%xmm6 + pxor %xmm4,%xmm0 + psrld $19-17,%xmm4 + pxor %xmm1,%xmm0 + pslld $15-13,%xmm1 + pxor %xmm4,%xmm0 + pxor %xmm1,%xmm0 + paddd %xmm0,%xmm6 + movdqa %xmm15,%xmm7 + + movdqa %xmm15,%xmm2 + + psrld $6,%xmm7 + movdqa %xmm15,%xmm1 + pslld $7,%xmm2 + movdqa %xmm6,80-128(%rax) + paddd %xmm10,%xmm6 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd 32(%rbp),%xmm6 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm15,%xmm0 + + pxor %xmm2,%xmm7 + movdqa %xmm15,%xmm4 + pslld $26-21,%xmm2 + pandn %xmm9,%xmm0 + pand %xmm8,%xmm4 + pxor %xmm1,%xmm7 + + + movdqa %xmm11,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm11,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm6 + pxor %xmm4,%xmm0 + movdqa %xmm12,%xmm4 + movdqa %xmm11,%xmm7 + pslld $10,%xmm2 + pxor %xmm11,%xmm4 + + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm6 + pslld $19-10,%xmm2 + pand %xmm4,%xmm3 + pxor %xmm7,%xmm1 + + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm12,%xmm10 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm3,%xmm10 + paddd %xmm6,%xmm14 + pxor %xmm2,%xmm7 + + paddd %xmm6,%xmm10 + paddd %xmm7,%xmm10 + movdqa 112-128(%rax),%xmm6 + paddd 240-128(%rax),%xmm5 + + movdqa %xmm6,%xmm7 + movdqa %xmm6,%xmm1 + psrld $3,%xmm7 + movdqa %xmm6,%xmm2 + + psrld $7,%xmm1 + movdqa 64-128(%rax),%xmm0 + pslld $14,%xmm2 + pxor %xmm1,%xmm7 + psrld $18-7,%xmm1 + movdqa %xmm0,%xmm3 + pxor %xmm2,%xmm7 + pslld $25-14,%xmm2 + pxor %xmm1,%xmm7 + psrld $10,%xmm0 + movdqa %xmm3,%xmm1 + + psrld $17,%xmm3 + pxor %xmm2,%xmm7 + pslld $13,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm3,%xmm0 + psrld $19-17,%xmm3 + pxor %xmm1,%xmm0 + pslld $15-13,%xmm1 + pxor %xmm3,%xmm0 + pxor %xmm1,%xmm0 + paddd %xmm0,%xmm5 + movdqa %xmm14,%xmm7 + + movdqa %xmm14,%xmm2 + + psrld $6,%xmm7 + movdqa %xmm14,%xmm1 + pslld $7,%xmm2 + movdqa %xmm5,96-128(%rax) + paddd %xmm9,%xmm5 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd 64(%rbp),%xmm5 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm14,%xmm0 + + pxor %xmm2,%xmm7 + movdqa %xmm14,%xmm3 + pslld $26-21,%xmm2 + pandn %xmm8,%xmm0 + pand %xmm15,%xmm3 + pxor %xmm1,%xmm7 + + + movdqa %xmm10,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm10,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm3,%xmm0 + movdqa %xmm11,%xmm3 + movdqa %xmm10,%xmm7 + pslld $10,%xmm2 + pxor %xmm10,%xmm3 + + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm5 + pslld $19-10,%xmm2 + pand %xmm3,%xmm4 + pxor %xmm7,%xmm1 + + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm11,%xmm9 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm4,%xmm9 + paddd %xmm5,%xmm13 + pxor %xmm2,%xmm7 + + paddd %xmm5,%xmm9 + paddd %xmm7,%xmm9 + movdqa 128-128(%rax),%xmm5 + paddd 0-128(%rax),%xmm6 + + movdqa %xmm5,%xmm7 + movdqa %xmm5,%xmm1 + psrld $3,%xmm7 + movdqa %xmm5,%xmm2 + + psrld $7,%xmm1 + movdqa 80-128(%rax),%xmm0 + pslld $14,%xmm2 + pxor %xmm1,%xmm7 + psrld $18-7,%xmm1 + movdqa %xmm0,%xmm4 + pxor %xmm2,%xmm7 + pslld $25-14,%xmm2 + pxor %xmm1,%xmm7 + psrld $10,%xmm0 + movdqa %xmm4,%xmm1 + + psrld $17,%xmm4 + pxor %xmm2,%xmm7 + pslld $13,%xmm1 + paddd %xmm7,%xmm6 + pxor %xmm4,%xmm0 + psrld $19-17,%xmm4 + pxor %xmm1,%xmm0 + pslld $15-13,%xmm1 + pxor %xmm4,%xmm0 + pxor %xmm1,%xmm0 + paddd %xmm0,%xmm6 + movdqa %xmm13,%xmm7 + + movdqa %xmm13,%xmm2 + + psrld $6,%xmm7 + movdqa %xmm13,%xmm1 + pslld $7,%xmm2 + movdqa %xmm6,112-128(%rax) + paddd %xmm8,%xmm6 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd 96(%rbp),%xmm6 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm13,%xmm0 + + pxor %xmm2,%xmm7 + movdqa %xmm13,%xmm4 + pslld $26-21,%xmm2 + pandn %xmm15,%xmm0 + pand %xmm14,%xmm4 + pxor %xmm1,%xmm7 + + + movdqa %xmm9,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm9,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm6 + pxor %xmm4,%xmm0 + movdqa %xmm10,%xmm4 + movdqa %xmm9,%xmm7 + pslld $10,%xmm2 + pxor %xmm9,%xmm4 + + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm6 + pslld $19-10,%xmm2 + pand %xmm4,%xmm3 + pxor %xmm7,%xmm1 + + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm10,%xmm8 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm3,%xmm8 + paddd %xmm6,%xmm12 + pxor %xmm2,%xmm7 + + paddd %xmm6,%xmm8 + paddd %xmm7,%xmm8 + leaq 256(%rbp),%rbp + movdqa 144-128(%rax),%xmm6 + paddd 16-128(%rax),%xmm5 + + movdqa %xmm6,%xmm7 + movdqa %xmm6,%xmm1 + psrld $3,%xmm7 + movdqa %xmm6,%xmm2 + + psrld $7,%xmm1 + movdqa 96-128(%rax),%xmm0 + pslld $14,%xmm2 + pxor %xmm1,%xmm7 + psrld $18-7,%xmm1 + movdqa %xmm0,%xmm3 + pxor %xmm2,%xmm7 + pslld $25-14,%xmm2 + pxor %xmm1,%xmm7 + psrld $10,%xmm0 + movdqa %xmm3,%xmm1 + + psrld $17,%xmm3 + pxor %xmm2,%xmm7 + pslld $13,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm3,%xmm0 + psrld $19-17,%xmm3 + pxor %xmm1,%xmm0 + pslld $15-13,%xmm1 + pxor %xmm3,%xmm0 + pxor %xmm1,%xmm0 + paddd %xmm0,%xmm5 + movdqa %xmm12,%xmm7 + + movdqa %xmm12,%xmm2 + + psrld $6,%xmm7 + movdqa %xmm12,%xmm1 + pslld $7,%xmm2 + movdqa %xmm5,128-128(%rax) + paddd %xmm15,%xmm5 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd -128(%rbp),%xmm5 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm12,%xmm0 + + pxor %xmm2,%xmm7 + movdqa %xmm12,%xmm3 + pslld $26-21,%xmm2 + pandn %xmm14,%xmm0 + pand %xmm13,%xmm3 + pxor %xmm1,%xmm7 + + + movdqa %xmm8,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm8,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm3,%xmm0 + movdqa %xmm9,%xmm3 + movdqa %xmm8,%xmm7 + pslld $10,%xmm2 + pxor %xmm8,%xmm3 + + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm5 + pslld $19-10,%xmm2 + pand %xmm3,%xmm4 + pxor %xmm7,%xmm1 + + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm9,%xmm15 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm4,%xmm15 + paddd %xmm5,%xmm11 + pxor %xmm2,%xmm7 + + paddd %xmm5,%xmm15 + paddd %xmm7,%xmm15 + movdqa 160-128(%rax),%xmm5 + paddd 32-128(%rax),%xmm6 + + movdqa %xmm5,%xmm7 + movdqa %xmm5,%xmm1 + psrld $3,%xmm7 + movdqa %xmm5,%xmm2 + + psrld $7,%xmm1 + movdqa 112-128(%rax),%xmm0 + pslld $14,%xmm2 + pxor %xmm1,%xmm7 + psrld $18-7,%xmm1 + movdqa %xmm0,%xmm4 + pxor %xmm2,%xmm7 + pslld $25-14,%xmm2 + pxor %xmm1,%xmm7 + psrld $10,%xmm0 + movdqa %xmm4,%xmm1 + + psrld $17,%xmm4 + pxor %xmm2,%xmm7 + pslld $13,%xmm1 + paddd %xmm7,%xmm6 + pxor %xmm4,%xmm0 + psrld $19-17,%xmm4 + pxor %xmm1,%xmm0 + pslld $15-13,%xmm1 + pxor %xmm4,%xmm0 + pxor %xmm1,%xmm0 + paddd %xmm0,%xmm6 + movdqa %xmm11,%xmm7 + + movdqa %xmm11,%xmm2 + + psrld $6,%xmm7 + movdqa %xmm11,%xmm1 + pslld $7,%xmm2 + movdqa %xmm6,144-128(%rax) + paddd %xmm14,%xmm6 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd -96(%rbp),%xmm6 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm11,%xmm0 + + pxor %xmm2,%xmm7 + movdqa %xmm11,%xmm4 + pslld $26-21,%xmm2 + pandn %xmm13,%xmm0 + pand %xmm12,%xmm4 + pxor %xmm1,%xmm7 + + + movdqa %xmm15,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm15,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm6 + pxor %xmm4,%xmm0 + movdqa %xmm8,%xmm4 + movdqa %xmm15,%xmm7 + pslld $10,%xmm2 + pxor %xmm15,%xmm4 + + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm6 + pslld $19-10,%xmm2 + pand %xmm4,%xmm3 + pxor %xmm7,%xmm1 + + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm8,%xmm14 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm3,%xmm14 + paddd %xmm6,%xmm10 + pxor %xmm2,%xmm7 + + paddd %xmm6,%xmm14 + paddd %xmm7,%xmm14 + movdqa 176-128(%rax),%xmm6 + paddd 48-128(%rax),%xmm5 + + movdqa %xmm6,%xmm7 + movdqa %xmm6,%xmm1 + psrld $3,%xmm7 + movdqa %xmm6,%xmm2 + + psrld $7,%xmm1 + movdqa 128-128(%rax),%xmm0 + pslld $14,%xmm2 + pxor %xmm1,%xmm7 + psrld $18-7,%xmm1 + movdqa %xmm0,%xmm3 + pxor %xmm2,%xmm7 + pslld $25-14,%xmm2 + pxor %xmm1,%xmm7 + psrld $10,%xmm0 + movdqa %xmm3,%xmm1 + + psrld $17,%xmm3 + pxor %xmm2,%xmm7 + pslld $13,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm3,%xmm0 + psrld $19-17,%xmm3 + pxor %xmm1,%xmm0 + pslld $15-13,%xmm1 + pxor %xmm3,%xmm0 + pxor %xmm1,%xmm0 + paddd %xmm0,%xmm5 + movdqa %xmm10,%xmm7 + + movdqa %xmm10,%xmm2 + + psrld $6,%xmm7 + movdqa %xmm10,%xmm1 + pslld $7,%xmm2 + movdqa %xmm5,160-128(%rax) + paddd %xmm13,%xmm5 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd -64(%rbp),%xmm5 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm10,%xmm0 + + pxor %xmm2,%xmm7 + movdqa %xmm10,%xmm3 + pslld $26-21,%xmm2 + pandn %xmm12,%xmm0 + pand %xmm11,%xmm3 + pxor %xmm1,%xmm7 + + + movdqa %xmm14,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm14,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm3,%xmm0 + movdqa %xmm15,%xmm3 + movdqa %xmm14,%xmm7 + pslld $10,%xmm2 + pxor %xmm14,%xmm3 + + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm5 + pslld $19-10,%xmm2 + pand %xmm3,%xmm4 + pxor %xmm7,%xmm1 + + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm15,%xmm13 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm4,%xmm13 + paddd %xmm5,%xmm9 + pxor %xmm2,%xmm7 + + paddd %xmm5,%xmm13 + paddd %xmm7,%xmm13 + movdqa 192-128(%rax),%xmm5 + paddd 64-128(%rax),%xmm6 + + movdqa %xmm5,%xmm7 + movdqa %xmm5,%xmm1 + psrld $3,%xmm7 + movdqa %xmm5,%xmm2 + + psrld $7,%xmm1 + movdqa 144-128(%rax),%xmm0 + pslld $14,%xmm2 + pxor %xmm1,%xmm7 + psrld $18-7,%xmm1 + movdqa %xmm0,%xmm4 + pxor %xmm2,%xmm7 + pslld $25-14,%xmm2 + pxor %xmm1,%xmm7 + psrld $10,%xmm0 + movdqa %xmm4,%xmm1 + + psrld $17,%xmm4 + pxor %xmm2,%xmm7 + pslld $13,%xmm1 + paddd %xmm7,%xmm6 + pxor %xmm4,%xmm0 + psrld $19-17,%xmm4 + pxor %xmm1,%xmm0 + pslld $15-13,%xmm1 + pxor %xmm4,%xmm0 + pxor %xmm1,%xmm0 + paddd %xmm0,%xmm6 + movdqa %xmm9,%xmm7 + + movdqa %xmm9,%xmm2 + + psrld $6,%xmm7 + movdqa %xmm9,%xmm1 + pslld $7,%xmm2 + movdqa %xmm6,176-128(%rax) + paddd %xmm12,%xmm6 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd -32(%rbp),%xmm6 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm9,%xmm0 + + pxor %xmm2,%xmm7 + movdqa %xmm9,%xmm4 + pslld $26-21,%xmm2 + pandn %xmm11,%xmm0 + pand %xmm10,%xmm4 + pxor %xmm1,%xmm7 + + + movdqa %xmm13,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm13,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm6 + pxor %xmm4,%xmm0 + movdqa %xmm14,%xmm4 + movdqa %xmm13,%xmm7 + pslld $10,%xmm2 + pxor %xmm13,%xmm4 + + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm6 + pslld $19-10,%xmm2 + pand %xmm4,%xmm3 + pxor %xmm7,%xmm1 + + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm14,%xmm12 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm3,%xmm12 + paddd %xmm6,%xmm8 + pxor %xmm2,%xmm7 + + paddd %xmm6,%xmm12 + paddd %xmm7,%xmm12 + movdqa 208-128(%rax),%xmm6 + paddd 80-128(%rax),%xmm5 + + movdqa %xmm6,%xmm7 + movdqa %xmm6,%xmm1 + psrld $3,%xmm7 + movdqa %xmm6,%xmm2 + + psrld $7,%xmm1 + movdqa 160-128(%rax),%xmm0 + pslld $14,%xmm2 + pxor %xmm1,%xmm7 + psrld $18-7,%xmm1 + movdqa %xmm0,%xmm3 + pxor %xmm2,%xmm7 + pslld $25-14,%xmm2 + pxor %xmm1,%xmm7 + psrld $10,%xmm0 + movdqa %xmm3,%xmm1 + + psrld $17,%xmm3 + pxor %xmm2,%xmm7 + pslld $13,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm3,%xmm0 + psrld $19-17,%xmm3 + pxor %xmm1,%xmm0 + pslld $15-13,%xmm1 + pxor %xmm3,%xmm0 + pxor %xmm1,%xmm0 + paddd %xmm0,%xmm5 + movdqa %xmm8,%xmm7 + + movdqa %xmm8,%xmm2 + + psrld $6,%xmm7 + movdqa %xmm8,%xmm1 + pslld $7,%xmm2 + movdqa %xmm5,192-128(%rax) + paddd %xmm11,%xmm5 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd 0(%rbp),%xmm5 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm8,%xmm0 + + pxor %xmm2,%xmm7 + movdqa %xmm8,%xmm3 + pslld $26-21,%xmm2 + pandn %xmm10,%xmm0 + pand %xmm9,%xmm3 + pxor %xmm1,%xmm7 + + + movdqa %xmm12,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm12,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm3,%xmm0 + movdqa %xmm13,%xmm3 + movdqa %xmm12,%xmm7 + pslld $10,%xmm2 + pxor %xmm12,%xmm3 + + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm5 + pslld $19-10,%xmm2 + pand %xmm3,%xmm4 + pxor %xmm7,%xmm1 + + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm13,%xmm11 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm4,%xmm11 + paddd %xmm5,%xmm15 + pxor %xmm2,%xmm7 + + paddd %xmm5,%xmm11 + paddd %xmm7,%xmm11 + movdqa 224-128(%rax),%xmm5 + paddd 96-128(%rax),%xmm6 + + movdqa %xmm5,%xmm7 + movdqa %xmm5,%xmm1 + psrld $3,%xmm7 + movdqa %xmm5,%xmm2 + + psrld $7,%xmm1 + movdqa 176-128(%rax),%xmm0 + pslld $14,%xmm2 + pxor %xmm1,%xmm7 + psrld $18-7,%xmm1 + movdqa %xmm0,%xmm4 + pxor %xmm2,%xmm7 + pslld $25-14,%xmm2 + pxor %xmm1,%xmm7 + psrld $10,%xmm0 + movdqa %xmm4,%xmm1 + + psrld $17,%xmm4 + pxor %xmm2,%xmm7 + pslld $13,%xmm1 + paddd %xmm7,%xmm6 + pxor %xmm4,%xmm0 + psrld $19-17,%xmm4 + pxor %xmm1,%xmm0 + pslld $15-13,%xmm1 + pxor %xmm4,%xmm0 + pxor %xmm1,%xmm0 + paddd %xmm0,%xmm6 + movdqa %xmm15,%xmm7 + + movdqa %xmm15,%xmm2 + + psrld $6,%xmm7 + movdqa %xmm15,%xmm1 + pslld $7,%xmm2 + movdqa %xmm6,208-128(%rax) + paddd %xmm10,%xmm6 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd 32(%rbp),%xmm6 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm15,%xmm0 + + pxor %xmm2,%xmm7 + movdqa %xmm15,%xmm4 + pslld $26-21,%xmm2 + pandn %xmm9,%xmm0 + pand %xmm8,%xmm4 + pxor %xmm1,%xmm7 + + + movdqa %xmm11,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm11,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm6 + pxor %xmm4,%xmm0 + movdqa %xmm12,%xmm4 + movdqa %xmm11,%xmm7 + pslld $10,%xmm2 + pxor %xmm11,%xmm4 + + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm6 + pslld $19-10,%xmm2 + pand %xmm4,%xmm3 + pxor %xmm7,%xmm1 + + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm12,%xmm10 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm3,%xmm10 + paddd %xmm6,%xmm14 + pxor %xmm2,%xmm7 + + paddd %xmm6,%xmm10 + paddd %xmm7,%xmm10 + movdqa 240-128(%rax),%xmm6 + paddd 112-128(%rax),%xmm5 + + movdqa %xmm6,%xmm7 + movdqa %xmm6,%xmm1 + psrld $3,%xmm7 + movdqa %xmm6,%xmm2 + + psrld $7,%xmm1 + movdqa 192-128(%rax),%xmm0 + pslld $14,%xmm2 + pxor %xmm1,%xmm7 + psrld $18-7,%xmm1 + movdqa %xmm0,%xmm3 + pxor %xmm2,%xmm7 + pslld $25-14,%xmm2 + pxor %xmm1,%xmm7 + psrld $10,%xmm0 + movdqa %xmm3,%xmm1 + + psrld $17,%xmm3 + pxor %xmm2,%xmm7 + pslld $13,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm3,%xmm0 + psrld $19-17,%xmm3 + pxor %xmm1,%xmm0 + pslld $15-13,%xmm1 + pxor %xmm3,%xmm0 + pxor %xmm1,%xmm0 + paddd %xmm0,%xmm5 + movdqa %xmm14,%xmm7 + + movdqa %xmm14,%xmm2 + + psrld $6,%xmm7 + movdqa %xmm14,%xmm1 + pslld $7,%xmm2 + movdqa %xmm5,224-128(%rax) + paddd %xmm9,%xmm5 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd 64(%rbp),%xmm5 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm14,%xmm0 + + pxor %xmm2,%xmm7 + movdqa %xmm14,%xmm3 + pslld $26-21,%xmm2 + pandn %xmm8,%xmm0 + pand %xmm15,%xmm3 + pxor %xmm1,%xmm7 + + + movdqa %xmm10,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm10,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm3,%xmm0 + movdqa %xmm11,%xmm3 + movdqa %xmm10,%xmm7 + pslld $10,%xmm2 + pxor %xmm10,%xmm3 + + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm5 + pslld $19-10,%xmm2 + pand %xmm3,%xmm4 + pxor %xmm7,%xmm1 + + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm11,%xmm9 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm4,%xmm9 + paddd %xmm5,%xmm13 + pxor %xmm2,%xmm7 + + paddd %xmm5,%xmm9 + paddd %xmm7,%xmm9 + movdqa 0-128(%rax),%xmm5 + paddd 128-128(%rax),%xmm6 + + movdqa %xmm5,%xmm7 + movdqa %xmm5,%xmm1 + psrld $3,%xmm7 + movdqa %xmm5,%xmm2 + + psrld $7,%xmm1 + movdqa 208-128(%rax),%xmm0 + pslld $14,%xmm2 + pxor %xmm1,%xmm7 + psrld $18-7,%xmm1 + movdqa %xmm0,%xmm4 + pxor %xmm2,%xmm7 + pslld $25-14,%xmm2 + pxor %xmm1,%xmm7 + psrld $10,%xmm0 + movdqa %xmm4,%xmm1 + + psrld $17,%xmm4 + pxor %xmm2,%xmm7 + pslld $13,%xmm1 + paddd %xmm7,%xmm6 + pxor %xmm4,%xmm0 + psrld $19-17,%xmm4 + pxor %xmm1,%xmm0 + pslld $15-13,%xmm1 + pxor %xmm4,%xmm0 + pxor %xmm1,%xmm0 + paddd %xmm0,%xmm6 + movdqa %xmm13,%xmm7 + + movdqa %xmm13,%xmm2 + + psrld $6,%xmm7 + movdqa %xmm13,%xmm1 + pslld $7,%xmm2 + movdqa %xmm6,240-128(%rax) + paddd %xmm8,%xmm6 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd 96(%rbp),%xmm6 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm13,%xmm0 + + pxor %xmm2,%xmm7 + movdqa %xmm13,%xmm4 + pslld $26-21,%xmm2 + pandn %xmm15,%xmm0 + pand %xmm14,%xmm4 + pxor %xmm1,%xmm7 + + + movdqa %xmm9,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm9,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm6 + pxor %xmm4,%xmm0 + movdqa %xmm10,%xmm4 + movdqa %xmm9,%xmm7 + pslld $10,%xmm2 + pxor %xmm9,%xmm4 + + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm6 + pslld $19-10,%xmm2 + pand %xmm4,%xmm3 + pxor %xmm7,%xmm1 + + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm10,%xmm8 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm3,%xmm8 + paddd %xmm6,%xmm12 + pxor %xmm2,%xmm7 + + paddd %xmm6,%xmm8 + paddd %xmm7,%xmm8 + leaq 256(%rbp),%rbp + decl %ecx + jnz L$oop_16_xx + + movl $1,%ecx + leaq K256+128(%rip),%rbp + + movdqa (%rbx),%xmm7 + cmpl 0(%rbx),%ecx + pxor %xmm0,%xmm0 + cmovgeq %rbp,%r8 + cmpl 4(%rbx),%ecx + movdqa %xmm7,%xmm6 + cmovgeq %rbp,%r9 + cmpl 8(%rbx),%ecx + pcmpgtd %xmm0,%xmm6 + cmovgeq %rbp,%r10 + cmpl 12(%rbx),%ecx + paddd %xmm6,%xmm7 + cmovgeq %rbp,%r11 + + movdqu 0-128(%rdi),%xmm0 + pand %xmm6,%xmm8 + movdqu 32-128(%rdi),%xmm1 + pand %xmm6,%xmm9 + movdqu 64-128(%rdi),%xmm2 + pand %xmm6,%xmm10 + movdqu 96-128(%rdi),%xmm5 + pand %xmm6,%xmm11 + paddd %xmm0,%xmm8 + movdqu 128-128(%rdi),%xmm0 + pand %xmm6,%xmm12 + paddd %xmm1,%xmm9 + movdqu 160-128(%rdi),%xmm1 + pand %xmm6,%xmm13 + paddd %xmm2,%xmm10 + movdqu 192-128(%rdi),%xmm2 + pand %xmm6,%xmm14 + paddd %xmm5,%xmm11 + movdqu 224-128(%rdi),%xmm5 + pand %xmm6,%xmm15 + paddd %xmm0,%xmm12 + paddd %xmm1,%xmm13 + movdqu %xmm8,0-128(%rdi) + paddd %xmm2,%xmm14 + movdqu %xmm9,32-128(%rdi) + paddd %xmm5,%xmm15 + movdqu %xmm10,64-128(%rdi) + movdqu %xmm11,96-128(%rdi) + movdqu %xmm12,128-128(%rdi) + movdqu %xmm13,160-128(%rdi) + movdqu %xmm14,192-128(%rdi) + movdqu %xmm15,224-128(%rdi) + + movdqa %xmm7,(%rbx) + movdqa L$pbswap(%rip),%xmm6 + decl %edx + jnz L$oop + + movl 280(%rsp),%edx + leaq 16(%rdi),%rdi + leaq 64(%rsi),%rsi + decl %edx + jnz L$oop_grande + +L$done: + movq 272(%rsp),%rax + movq -16(%rax),%rbp + movq -8(%rax),%rbx + leaq (%rax),%rsp +L$epilogue: + .byte 0xf3,0xc3 + + +.p2align 5 +sha256_multi_block_shaext: +_shaext_shortcut: + movq %rsp,%rax + pushq %rbx + pushq %rbp + subq $288,%rsp + shll $1,%edx + andq $-256,%rsp + leaq 128(%rdi),%rdi + movq %rax,272(%rsp) +L$body_shaext: + leaq 256(%rsp),%rbx + leaq K256_shaext+128(%rip),%rbp + +L$oop_grande_shaext: + movl %edx,280(%rsp) + xorl %edx,%edx + movq 0(%rsi),%r8 + movl 8(%rsi),%ecx + cmpl %edx,%ecx + cmovgl %ecx,%edx + testl %ecx,%ecx + movl %ecx,0(%rbx) + cmovleq %rsp,%r8 + movq 16(%rsi),%r9 + movl 24(%rsi),%ecx + cmpl %edx,%ecx + cmovgl %ecx,%edx + testl %ecx,%ecx + movl %ecx,4(%rbx) + cmovleq %rsp,%r9 + testl %edx,%edx + jz L$done_shaext + + movq 0-128(%rdi),%xmm12 + movq 32-128(%rdi),%xmm4 + movq 64-128(%rdi),%xmm13 + movq 96-128(%rdi),%xmm5 + movq 128-128(%rdi),%xmm8 + movq 160-128(%rdi),%xmm9 + movq 192-128(%rdi),%xmm10 + movq 224-128(%rdi),%xmm11 + + punpckldq %xmm4,%xmm12 + punpckldq %xmm5,%xmm13 + punpckldq %xmm9,%xmm8 + punpckldq %xmm11,%xmm10 + movdqa K256_shaext-16(%rip),%xmm3 + + movdqa %xmm12,%xmm14 + movdqa %xmm13,%xmm15 + punpcklqdq %xmm8,%xmm12 + punpcklqdq %xmm10,%xmm13 + punpckhqdq %xmm8,%xmm14 + punpckhqdq %xmm10,%xmm15 + + pshufd $27,%xmm12,%xmm12 + pshufd $27,%xmm13,%xmm13 + pshufd $27,%xmm14,%xmm14 + pshufd $27,%xmm15,%xmm15 + jmp L$oop_shaext + +.p2align 5 +L$oop_shaext: + movdqu 0(%r8),%xmm4 + movdqu 0(%r9),%xmm8 + movdqu 16(%r8),%xmm5 + movdqu 16(%r9),%xmm9 + movdqu 32(%r8),%xmm6 +.byte 102,15,56,0,227 + movdqu 32(%r9),%xmm10 +.byte 102,68,15,56,0,195 + movdqu 48(%r8),%xmm7 + leaq 64(%r8),%r8 + movdqu 48(%r9),%xmm11 + leaq 64(%r9),%r9 + + movdqa 0-128(%rbp),%xmm0 +.byte 102,15,56,0,235 + paddd %xmm4,%xmm0 + pxor %xmm12,%xmm4 + movdqa %xmm0,%xmm1 + movdqa 0-128(%rbp),%xmm2 +.byte 102,68,15,56,0,203 + paddd %xmm8,%xmm2 + movdqa %xmm13,80(%rsp) +.byte 69,15,56,203,236 + pxor %xmm14,%xmm8 + movdqa %xmm2,%xmm0 + movdqa %xmm15,112(%rsp) +.byte 69,15,56,203,254 + pshufd $14,%xmm1,%xmm0 + pxor %xmm12,%xmm4 + movdqa %xmm12,64(%rsp) +.byte 69,15,56,203,229 + pshufd $14,%xmm2,%xmm0 + pxor %xmm14,%xmm8 + movdqa %xmm14,96(%rsp) + movdqa 16-128(%rbp),%xmm1 + paddd %xmm5,%xmm1 +.byte 102,15,56,0,243 +.byte 69,15,56,203,247 + + movdqa %xmm1,%xmm0 + movdqa 16-128(%rbp),%xmm2 + paddd %xmm9,%xmm2 +.byte 69,15,56,203,236 + movdqa %xmm2,%xmm0 + prefetcht0 127(%r8) +.byte 102,15,56,0,251 +.byte 102,68,15,56,0,211 + prefetcht0 127(%r9) +.byte 69,15,56,203,254 + pshufd $14,%xmm1,%xmm0 +.byte 102,68,15,56,0,219 +.byte 15,56,204,229 +.byte 69,15,56,203,229 + pshufd $14,%xmm2,%xmm0 + movdqa 32-128(%rbp),%xmm1 + paddd %xmm6,%xmm1 +.byte 69,15,56,203,247 + + movdqa %xmm1,%xmm0 + movdqa 32-128(%rbp),%xmm2 + paddd %xmm10,%xmm2 +.byte 69,15,56,203,236 +.byte 69,15,56,204,193 + movdqa %xmm2,%xmm0 + movdqa %xmm7,%xmm3 +.byte 69,15,56,203,254 + pshufd $14,%xmm1,%xmm0 +.byte 102,15,58,15,222,4 + paddd %xmm3,%xmm4 + movdqa %xmm11,%xmm3 +.byte 102,65,15,58,15,218,4 +.byte 15,56,204,238 +.byte 69,15,56,203,229 + pshufd $14,%xmm2,%xmm0 + movdqa 48-128(%rbp),%xmm1 + paddd %xmm7,%xmm1 +.byte 69,15,56,203,247 +.byte 69,15,56,204,202 + + movdqa %xmm1,%xmm0 + movdqa 48-128(%rbp),%xmm2 + paddd %xmm3,%xmm8 + paddd %xmm11,%xmm2 +.byte 15,56,205,231 +.byte 69,15,56,203,236 + movdqa %xmm2,%xmm0 + movdqa %xmm4,%xmm3 +.byte 102,15,58,15,223,4 +.byte 69,15,56,203,254 +.byte 69,15,56,205,195 + pshufd $14,%xmm1,%xmm0 + paddd %xmm3,%xmm5 + movdqa %xmm8,%xmm3 +.byte 102,65,15,58,15,219,4 +.byte 15,56,204,247 +.byte 69,15,56,203,229 + pshufd $14,%xmm2,%xmm0 + movdqa 64-128(%rbp),%xmm1 + paddd %xmm4,%xmm1 +.byte 69,15,56,203,247 +.byte 69,15,56,204,211 + movdqa %xmm1,%xmm0 + movdqa 64-128(%rbp),%xmm2 + paddd %xmm3,%xmm9 + paddd %xmm8,%xmm2 +.byte 15,56,205,236 +.byte 69,15,56,203,236 + movdqa %xmm2,%xmm0 + movdqa %xmm5,%xmm3 +.byte 102,15,58,15,220,4 +.byte 69,15,56,203,254 +.byte 69,15,56,205,200 + pshufd $14,%xmm1,%xmm0 + paddd %xmm3,%xmm6 + movdqa %xmm9,%xmm3 +.byte 102,65,15,58,15,216,4 +.byte 15,56,204,252 +.byte 69,15,56,203,229 + pshufd $14,%xmm2,%xmm0 + movdqa 80-128(%rbp),%xmm1 + paddd %xmm5,%xmm1 +.byte 69,15,56,203,247 +.byte 69,15,56,204,216 + movdqa %xmm1,%xmm0 + movdqa 80-128(%rbp),%xmm2 + paddd %xmm3,%xmm10 + paddd %xmm9,%xmm2 +.byte 15,56,205,245 +.byte 69,15,56,203,236 + movdqa %xmm2,%xmm0 + movdqa %xmm6,%xmm3 +.byte 102,15,58,15,221,4 +.byte 69,15,56,203,254 +.byte 69,15,56,205,209 + pshufd $14,%xmm1,%xmm0 + paddd %xmm3,%xmm7 + movdqa %xmm10,%xmm3 +.byte 102,65,15,58,15,217,4 +.byte 15,56,204,229 +.byte 69,15,56,203,229 + pshufd $14,%xmm2,%xmm0 + movdqa 96-128(%rbp),%xmm1 + paddd %xmm6,%xmm1 +.byte 69,15,56,203,247 +.byte 69,15,56,204,193 + movdqa %xmm1,%xmm0 + movdqa 96-128(%rbp),%xmm2 + paddd %xmm3,%xmm11 + paddd %xmm10,%xmm2 +.byte 15,56,205,254 +.byte 69,15,56,203,236 + movdqa %xmm2,%xmm0 + movdqa %xmm7,%xmm3 +.byte 102,15,58,15,222,4 +.byte 69,15,56,203,254 +.byte 69,15,56,205,218 + pshufd $14,%xmm1,%xmm0 + paddd %xmm3,%xmm4 + movdqa %xmm11,%xmm3 +.byte 102,65,15,58,15,218,4 +.byte 15,56,204,238 +.byte 69,15,56,203,229 + pshufd $14,%xmm2,%xmm0 + movdqa 112-128(%rbp),%xmm1 + paddd %xmm7,%xmm1 +.byte 69,15,56,203,247 +.byte 69,15,56,204,202 + movdqa %xmm1,%xmm0 + movdqa 112-128(%rbp),%xmm2 + paddd %xmm3,%xmm8 + paddd %xmm11,%xmm2 +.byte 15,56,205,231 +.byte 69,15,56,203,236 + movdqa %xmm2,%xmm0 + movdqa %xmm4,%xmm3 +.byte 102,15,58,15,223,4 +.byte 69,15,56,203,254 +.byte 69,15,56,205,195 + pshufd $14,%xmm1,%xmm0 + paddd %xmm3,%xmm5 + movdqa %xmm8,%xmm3 +.byte 102,65,15,58,15,219,4 +.byte 15,56,204,247 +.byte 69,15,56,203,229 + pshufd $14,%xmm2,%xmm0 + movdqa 128-128(%rbp),%xmm1 + paddd %xmm4,%xmm1 +.byte 69,15,56,203,247 +.byte 69,15,56,204,211 + movdqa %xmm1,%xmm0 + movdqa 128-128(%rbp),%xmm2 + paddd %xmm3,%xmm9 + paddd %xmm8,%xmm2 +.byte 15,56,205,236 +.byte 69,15,56,203,236 + movdqa %xmm2,%xmm0 + movdqa %xmm5,%xmm3 +.byte 102,15,58,15,220,4 +.byte 69,15,56,203,254 +.byte 69,15,56,205,200 + pshufd $14,%xmm1,%xmm0 + paddd %xmm3,%xmm6 + movdqa %xmm9,%xmm3 +.byte 102,65,15,58,15,216,4 +.byte 15,56,204,252 +.byte 69,15,56,203,229 + pshufd $14,%xmm2,%xmm0 + movdqa 144-128(%rbp),%xmm1 + paddd %xmm5,%xmm1 +.byte 69,15,56,203,247 +.byte 69,15,56,204,216 + movdqa %xmm1,%xmm0 + movdqa 144-128(%rbp),%xmm2 + paddd %xmm3,%xmm10 + paddd %xmm9,%xmm2 +.byte 15,56,205,245 +.byte 69,15,56,203,236 + movdqa %xmm2,%xmm0 + movdqa %xmm6,%xmm3 +.byte 102,15,58,15,221,4 +.byte 69,15,56,203,254 +.byte 69,15,56,205,209 + pshufd $14,%xmm1,%xmm0 + paddd %xmm3,%xmm7 + movdqa %xmm10,%xmm3 +.byte 102,65,15,58,15,217,4 +.byte 15,56,204,229 +.byte 69,15,56,203,229 + pshufd $14,%xmm2,%xmm0 + movdqa 160-128(%rbp),%xmm1 + paddd %xmm6,%xmm1 +.byte 69,15,56,203,247 +.byte 69,15,56,204,193 + movdqa %xmm1,%xmm0 + movdqa 160-128(%rbp),%xmm2 + paddd %xmm3,%xmm11 + paddd %xmm10,%xmm2 +.byte 15,56,205,254 +.byte 69,15,56,203,236 + movdqa %xmm2,%xmm0 + movdqa %xmm7,%xmm3 +.byte 102,15,58,15,222,4 +.byte 69,15,56,203,254 +.byte 69,15,56,205,218 + pshufd $14,%xmm1,%xmm0 + paddd %xmm3,%xmm4 + movdqa %xmm11,%xmm3 +.byte 102,65,15,58,15,218,4 +.byte 15,56,204,238 +.byte 69,15,56,203,229 + pshufd $14,%xmm2,%xmm0 + movdqa 176-128(%rbp),%xmm1 + paddd %xmm7,%xmm1 +.byte 69,15,56,203,247 +.byte 69,15,56,204,202 + movdqa %xmm1,%xmm0 + movdqa 176-128(%rbp),%xmm2 + paddd %xmm3,%xmm8 + paddd %xmm11,%xmm2 +.byte 15,56,205,231 +.byte 69,15,56,203,236 + movdqa %xmm2,%xmm0 + movdqa %xmm4,%xmm3 +.byte 102,15,58,15,223,4 +.byte 69,15,56,203,254 +.byte 69,15,56,205,195 + pshufd $14,%xmm1,%xmm0 + paddd %xmm3,%xmm5 + movdqa %xmm8,%xmm3 +.byte 102,65,15,58,15,219,4 +.byte 15,56,204,247 +.byte 69,15,56,203,229 + pshufd $14,%xmm2,%xmm0 + movdqa 192-128(%rbp),%xmm1 + paddd %xmm4,%xmm1 +.byte 69,15,56,203,247 +.byte 69,15,56,204,211 + movdqa %xmm1,%xmm0 + movdqa 192-128(%rbp),%xmm2 + paddd %xmm3,%xmm9 + paddd %xmm8,%xmm2 +.byte 15,56,205,236 +.byte 69,15,56,203,236 + movdqa %xmm2,%xmm0 + movdqa %xmm5,%xmm3 +.byte 102,15,58,15,220,4 +.byte 69,15,56,203,254 +.byte 69,15,56,205,200 + pshufd $14,%xmm1,%xmm0 + paddd %xmm3,%xmm6 + movdqa %xmm9,%xmm3 +.byte 102,65,15,58,15,216,4 +.byte 15,56,204,252 +.byte 69,15,56,203,229 + pshufd $14,%xmm2,%xmm0 + movdqa 208-128(%rbp),%xmm1 + paddd %xmm5,%xmm1 +.byte 69,15,56,203,247 +.byte 69,15,56,204,216 + movdqa %xmm1,%xmm0 + movdqa 208-128(%rbp),%xmm2 + paddd %xmm3,%xmm10 + paddd %xmm9,%xmm2 +.byte 15,56,205,245 +.byte 69,15,56,203,236 + movdqa %xmm2,%xmm0 + movdqa %xmm6,%xmm3 +.byte 102,15,58,15,221,4 +.byte 69,15,56,203,254 +.byte 69,15,56,205,209 + pshufd $14,%xmm1,%xmm0 + paddd %xmm3,%xmm7 + movdqa %xmm10,%xmm3 +.byte 102,65,15,58,15,217,4 + nop +.byte 69,15,56,203,229 + pshufd $14,%xmm2,%xmm0 + movdqa 224-128(%rbp),%xmm1 + paddd %xmm6,%xmm1 +.byte 69,15,56,203,247 + + movdqa %xmm1,%xmm0 + movdqa 224-128(%rbp),%xmm2 + paddd %xmm3,%xmm11 + paddd %xmm10,%xmm2 +.byte 15,56,205,254 + nop +.byte 69,15,56,203,236 + movdqa %xmm2,%xmm0 + movl $1,%ecx + pxor %xmm6,%xmm6 +.byte 69,15,56,203,254 +.byte 69,15,56,205,218 + pshufd $14,%xmm1,%xmm0 + movdqa 240-128(%rbp),%xmm1 + paddd %xmm7,%xmm1 + movq (%rbx),%xmm7 + nop +.byte 69,15,56,203,229 + pshufd $14,%xmm2,%xmm0 + movdqa 240-128(%rbp),%xmm2 + paddd %xmm11,%xmm2 +.byte 69,15,56,203,247 + + movdqa %xmm1,%xmm0 + cmpl 0(%rbx),%ecx + cmovgeq %rsp,%r8 + cmpl 4(%rbx),%ecx + cmovgeq %rsp,%r9 + pshufd $0,%xmm7,%xmm9 +.byte 69,15,56,203,236 + movdqa %xmm2,%xmm0 + pshufd $85,%xmm7,%xmm10 + movdqa %xmm7,%xmm11 +.byte 69,15,56,203,254 + pshufd $14,%xmm1,%xmm0 + pcmpgtd %xmm6,%xmm9 + pcmpgtd %xmm6,%xmm10 +.byte 69,15,56,203,229 + pshufd $14,%xmm2,%xmm0 + pcmpgtd %xmm6,%xmm11 + movdqa K256_shaext-16(%rip),%xmm3 +.byte 69,15,56,203,247 + + pand %xmm9,%xmm13 + pand %xmm10,%xmm15 + pand %xmm9,%xmm12 + pand %xmm10,%xmm14 + paddd %xmm7,%xmm11 + + paddd 80(%rsp),%xmm13 + paddd 112(%rsp),%xmm15 + paddd 64(%rsp),%xmm12 + paddd 96(%rsp),%xmm14 + + movq %xmm11,(%rbx) + decl %edx + jnz L$oop_shaext + + movl 280(%rsp),%edx + + pshufd $27,%xmm12,%xmm12 + pshufd $27,%xmm13,%xmm13 + pshufd $27,%xmm14,%xmm14 + pshufd $27,%xmm15,%xmm15 + + movdqa %xmm12,%xmm5 + movdqa %xmm13,%xmm6 + punpckldq %xmm14,%xmm12 + punpckhdq %xmm14,%xmm5 + punpckldq %xmm15,%xmm13 + punpckhdq %xmm15,%xmm6 + + movq %xmm12,0-128(%rdi) + psrldq $8,%xmm12 + movq %xmm5,128-128(%rdi) + psrldq $8,%xmm5 + movq %xmm12,32-128(%rdi) + movq %xmm5,160-128(%rdi) + + movq %xmm13,64-128(%rdi) + psrldq $8,%xmm13 + movq %xmm6,192-128(%rdi) + psrldq $8,%xmm6 + movq %xmm13,96-128(%rdi) + movq %xmm6,224-128(%rdi) + + leaq 8(%rdi),%rdi + leaq 32(%rsi),%rsi + decl %edx + jnz L$oop_grande_shaext + +L$done_shaext: + + movq -16(%rax),%rbp + movq -8(%rax),%rbx + leaq (%rax),%rsp +L$epilogue_shaext: + .byte 0xf3,0xc3 + + +.p2align 5 +sha256_multi_block_avx: +_avx_shortcut: + shrq $32,%rcx + cmpl $2,%edx + jb L$avx + testl $32,%ecx + jnz _avx2_shortcut + jmp L$avx +.p2align 5 +L$avx: + movq %rsp,%rax + pushq %rbx + pushq %rbp + subq $288,%rsp + andq $-256,%rsp + movq %rax,272(%rsp) +L$body_avx: + leaq K256+128(%rip),%rbp + leaq 256(%rsp),%rbx + leaq 128(%rdi),%rdi + +L$oop_grande_avx: + movl %edx,280(%rsp) + xorl %edx,%edx + movq 0(%rsi),%r8 + movl 8(%rsi),%ecx + cmpl %edx,%ecx + cmovgl %ecx,%edx + testl %ecx,%ecx + movl %ecx,0(%rbx) + cmovleq %rbp,%r8 + movq 16(%rsi),%r9 + movl 24(%rsi),%ecx + cmpl %edx,%ecx + cmovgl %ecx,%edx + testl %ecx,%ecx + movl %ecx,4(%rbx) + cmovleq %rbp,%r9 + movq 32(%rsi),%r10 + movl 40(%rsi),%ecx + cmpl %edx,%ecx + cmovgl %ecx,%edx + testl %ecx,%ecx + movl %ecx,8(%rbx) + cmovleq %rbp,%r10 + movq 48(%rsi),%r11 + movl 56(%rsi),%ecx + cmpl %edx,%ecx + cmovgl %ecx,%edx + testl %ecx,%ecx + movl %ecx,12(%rbx) + cmovleq %rbp,%r11 + testl %edx,%edx + jz L$done_avx + + vmovdqu 0-128(%rdi),%xmm8 + leaq 128(%rsp),%rax + vmovdqu 32-128(%rdi),%xmm9 + vmovdqu 64-128(%rdi),%xmm10 + vmovdqu 96-128(%rdi),%xmm11 + vmovdqu 128-128(%rdi),%xmm12 + vmovdqu 160-128(%rdi),%xmm13 + vmovdqu 192-128(%rdi),%xmm14 + vmovdqu 224-128(%rdi),%xmm15 + vmovdqu L$pbswap(%rip),%xmm6 + jmp L$oop_avx + +.p2align 5 +L$oop_avx: + vpxor %xmm9,%xmm10,%xmm4 + vmovd 0(%r8),%xmm5 + vmovd 0(%r9),%xmm0 + vpinsrd $1,0(%r10),%xmm5,%xmm5 + vpinsrd $1,0(%r11),%xmm0,%xmm0 + vpunpckldq %xmm0,%xmm5,%xmm5 + vpshufb %xmm6,%xmm5,%xmm5 + vpsrld $6,%xmm12,%xmm7 + vpslld $26,%xmm12,%xmm2 + vmovdqu %xmm5,0-128(%rax) + vpaddd %xmm15,%xmm5,%xmm5 + + vpsrld $11,%xmm12,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $21,%xmm12,%xmm2 + vpaddd -128(%rbp),%xmm5,%xmm5 + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $25,%xmm12,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $7,%xmm12,%xmm2 + vpandn %xmm14,%xmm12,%xmm0 + vpand %xmm13,%xmm12,%xmm3 + + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $2,%xmm8,%xmm15 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $30,%xmm8,%xmm1 + vpxor %xmm3,%xmm0,%xmm0 + vpxor %xmm8,%xmm9,%xmm3 + + vpxor %xmm1,%xmm15,%xmm15 + vpaddd %xmm7,%xmm5,%xmm5 + + vpsrld $13,%xmm8,%xmm1 + + vpslld $19,%xmm8,%xmm2 + vpaddd %xmm0,%xmm5,%xmm5 + vpand %xmm3,%xmm4,%xmm4 + + vpxor %xmm1,%xmm15,%xmm7 + + vpsrld $22,%xmm8,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $10,%xmm8,%xmm2 + vpxor %xmm4,%xmm9,%xmm15 + vpaddd %xmm5,%xmm11,%xmm11 + + vpxor %xmm1,%xmm7,%xmm7 + vpxor %xmm2,%xmm7,%xmm7 + + vpaddd %xmm5,%xmm15,%xmm15 + vpaddd %xmm7,%xmm15,%xmm15 + vmovd 4(%r8),%xmm5 + vmovd 4(%r9),%xmm0 + vpinsrd $1,4(%r10),%xmm5,%xmm5 + vpinsrd $1,4(%r11),%xmm0,%xmm0 + vpunpckldq %xmm0,%xmm5,%xmm5 + vpshufb %xmm6,%xmm5,%xmm5 + vpsrld $6,%xmm11,%xmm7 + vpslld $26,%xmm11,%xmm2 + vmovdqu %xmm5,16-128(%rax) + vpaddd %xmm14,%xmm5,%xmm5 + + vpsrld $11,%xmm11,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $21,%xmm11,%xmm2 + vpaddd -96(%rbp),%xmm5,%xmm5 + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $25,%xmm11,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $7,%xmm11,%xmm2 + vpandn %xmm13,%xmm11,%xmm0 + vpand %xmm12,%xmm11,%xmm4 + + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $2,%xmm15,%xmm14 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $30,%xmm15,%xmm1 + vpxor %xmm4,%xmm0,%xmm0 + vpxor %xmm15,%xmm8,%xmm4 + + vpxor %xmm1,%xmm14,%xmm14 + vpaddd %xmm7,%xmm5,%xmm5 + + vpsrld $13,%xmm15,%xmm1 + + vpslld $19,%xmm15,%xmm2 + vpaddd %xmm0,%xmm5,%xmm5 + vpand %xmm4,%xmm3,%xmm3 + + vpxor %xmm1,%xmm14,%xmm7 + + vpsrld $22,%xmm15,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $10,%xmm15,%xmm2 + vpxor %xmm3,%xmm8,%xmm14 + vpaddd %xmm5,%xmm10,%xmm10 + + vpxor %xmm1,%xmm7,%xmm7 + vpxor %xmm2,%xmm7,%xmm7 + + vpaddd %xmm5,%xmm14,%xmm14 + vpaddd %xmm7,%xmm14,%xmm14 + vmovd 8(%r8),%xmm5 + vmovd 8(%r9),%xmm0 + vpinsrd $1,8(%r10),%xmm5,%xmm5 + vpinsrd $1,8(%r11),%xmm0,%xmm0 + vpunpckldq %xmm0,%xmm5,%xmm5 + vpshufb %xmm6,%xmm5,%xmm5 + vpsrld $6,%xmm10,%xmm7 + vpslld $26,%xmm10,%xmm2 + vmovdqu %xmm5,32-128(%rax) + vpaddd %xmm13,%xmm5,%xmm5 + + vpsrld $11,%xmm10,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $21,%xmm10,%xmm2 + vpaddd -64(%rbp),%xmm5,%xmm5 + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $25,%xmm10,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $7,%xmm10,%xmm2 + vpandn %xmm12,%xmm10,%xmm0 + vpand %xmm11,%xmm10,%xmm3 + + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $2,%xmm14,%xmm13 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $30,%xmm14,%xmm1 + vpxor %xmm3,%xmm0,%xmm0 + vpxor %xmm14,%xmm15,%xmm3 + + vpxor %xmm1,%xmm13,%xmm13 + vpaddd %xmm7,%xmm5,%xmm5 + + vpsrld $13,%xmm14,%xmm1 + + vpslld $19,%xmm14,%xmm2 + vpaddd %xmm0,%xmm5,%xmm5 + vpand %xmm3,%xmm4,%xmm4 + + vpxor %xmm1,%xmm13,%xmm7 + + vpsrld $22,%xmm14,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $10,%xmm14,%xmm2 + vpxor %xmm4,%xmm15,%xmm13 + vpaddd %xmm5,%xmm9,%xmm9 + + vpxor %xmm1,%xmm7,%xmm7 + vpxor %xmm2,%xmm7,%xmm7 + + vpaddd %xmm5,%xmm13,%xmm13 + vpaddd %xmm7,%xmm13,%xmm13 + vmovd 12(%r8),%xmm5 + vmovd 12(%r9),%xmm0 + vpinsrd $1,12(%r10),%xmm5,%xmm5 + vpinsrd $1,12(%r11),%xmm0,%xmm0 + vpunpckldq %xmm0,%xmm5,%xmm5 + vpshufb %xmm6,%xmm5,%xmm5 + vpsrld $6,%xmm9,%xmm7 + vpslld $26,%xmm9,%xmm2 + vmovdqu %xmm5,48-128(%rax) + vpaddd %xmm12,%xmm5,%xmm5 + + vpsrld $11,%xmm9,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $21,%xmm9,%xmm2 + vpaddd -32(%rbp),%xmm5,%xmm5 + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $25,%xmm9,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $7,%xmm9,%xmm2 + vpandn %xmm11,%xmm9,%xmm0 + vpand %xmm10,%xmm9,%xmm4 + + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $2,%xmm13,%xmm12 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $30,%xmm13,%xmm1 + vpxor %xmm4,%xmm0,%xmm0 + vpxor %xmm13,%xmm14,%xmm4 + + vpxor %xmm1,%xmm12,%xmm12 + vpaddd %xmm7,%xmm5,%xmm5 + + vpsrld $13,%xmm13,%xmm1 + + vpslld $19,%xmm13,%xmm2 + vpaddd %xmm0,%xmm5,%xmm5 + vpand %xmm4,%xmm3,%xmm3 + + vpxor %xmm1,%xmm12,%xmm7 + + vpsrld $22,%xmm13,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $10,%xmm13,%xmm2 + vpxor %xmm3,%xmm14,%xmm12 + vpaddd %xmm5,%xmm8,%xmm8 + + vpxor %xmm1,%xmm7,%xmm7 + vpxor %xmm2,%xmm7,%xmm7 + + vpaddd %xmm5,%xmm12,%xmm12 + vpaddd %xmm7,%xmm12,%xmm12 + vmovd 16(%r8),%xmm5 + vmovd 16(%r9),%xmm0 + vpinsrd $1,16(%r10),%xmm5,%xmm5 + vpinsrd $1,16(%r11),%xmm0,%xmm0 + vpunpckldq %xmm0,%xmm5,%xmm5 + vpshufb %xmm6,%xmm5,%xmm5 + vpsrld $6,%xmm8,%xmm7 + vpslld $26,%xmm8,%xmm2 + vmovdqu %xmm5,64-128(%rax) + vpaddd %xmm11,%xmm5,%xmm5 + + vpsrld $11,%xmm8,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $21,%xmm8,%xmm2 + vpaddd 0(%rbp),%xmm5,%xmm5 + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $25,%xmm8,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $7,%xmm8,%xmm2 + vpandn %xmm10,%xmm8,%xmm0 + vpand %xmm9,%xmm8,%xmm3 + + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $2,%xmm12,%xmm11 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $30,%xmm12,%xmm1 + vpxor %xmm3,%xmm0,%xmm0 + vpxor %xmm12,%xmm13,%xmm3 + + vpxor %xmm1,%xmm11,%xmm11 + vpaddd %xmm7,%xmm5,%xmm5 + + vpsrld $13,%xmm12,%xmm1 + + vpslld $19,%xmm12,%xmm2 + vpaddd %xmm0,%xmm5,%xmm5 + vpand %xmm3,%xmm4,%xmm4 + + vpxor %xmm1,%xmm11,%xmm7 + + vpsrld $22,%xmm12,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $10,%xmm12,%xmm2 + vpxor %xmm4,%xmm13,%xmm11 + vpaddd %xmm5,%xmm15,%xmm15 + + vpxor %xmm1,%xmm7,%xmm7 + vpxor %xmm2,%xmm7,%xmm7 + + vpaddd %xmm5,%xmm11,%xmm11 + vpaddd %xmm7,%xmm11,%xmm11 + vmovd 20(%r8),%xmm5 + vmovd 20(%r9),%xmm0 + vpinsrd $1,20(%r10),%xmm5,%xmm5 + vpinsrd $1,20(%r11),%xmm0,%xmm0 + vpunpckldq %xmm0,%xmm5,%xmm5 + vpshufb %xmm6,%xmm5,%xmm5 + vpsrld $6,%xmm15,%xmm7 + vpslld $26,%xmm15,%xmm2 + vmovdqu %xmm5,80-128(%rax) + vpaddd %xmm10,%xmm5,%xmm5 + + vpsrld $11,%xmm15,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $21,%xmm15,%xmm2 + vpaddd 32(%rbp),%xmm5,%xmm5 + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $25,%xmm15,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $7,%xmm15,%xmm2 + vpandn %xmm9,%xmm15,%xmm0 + vpand %xmm8,%xmm15,%xmm4 + + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $2,%xmm11,%xmm10 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $30,%xmm11,%xmm1 + vpxor %xmm4,%xmm0,%xmm0 + vpxor %xmm11,%xmm12,%xmm4 + + vpxor %xmm1,%xmm10,%xmm10 + vpaddd %xmm7,%xmm5,%xmm5 + + vpsrld $13,%xmm11,%xmm1 + + vpslld $19,%xmm11,%xmm2 + vpaddd %xmm0,%xmm5,%xmm5 + vpand %xmm4,%xmm3,%xmm3 + + vpxor %xmm1,%xmm10,%xmm7 + + vpsrld $22,%xmm11,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $10,%xmm11,%xmm2 + vpxor %xmm3,%xmm12,%xmm10 + vpaddd %xmm5,%xmm14,%xmm14 + + vpxor %xmm1,%xmm7,%xmm7 + vpxor %xmm2,%xmm7,%xmm7 + + vpaddd %xmm5,%xmm10,%xmm10 + vpaddd %xmm7,%xmm10,%xmm10 + vmovd 24(%r8),%xmm5 + vmovd 24(%r9),%xmm0 + vpinsrd $1,24(%r10),%xmm5,%xmm5 + vpinsrd $1,24(%r11),%xmm0,%xmm0 + vpunpckldq %xmm0,%xmm5,%xmm5 + vpshufb %xmm6,%xmm5,%xmm5 + vpsrld $6,%xmm14,%xmm7 + vpslld $26,%xmm14,%xmm2 + vmovdqu %xmm5,96-128(%rax) + vpaddd %xmm9,%xmm5,%xmm5 + + vpsrld $11,%xmm14,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $21,%xmm14,%xmm2 + vpaddd 64(%rbp),%xmm5,%xmm5 + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $25,%xmm14,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $7,%xmm14,%xmm2 + vpandn %xmm8,%xmm14,%xmm0 + vpand %xmm15,%xmm14,%xmm3 + + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $2,%xmm10,%xmm9 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $30,%xmm10,%xmm1 + vpxor %xmm3,%xmm0,%xmm0 + vpxor %xmm10,%xmm11,%xmm3 + + vpxor %xmm1,%xmm9,%xmm9 + vpaddd %xmm7,%xmm5,%xmm5 + + vpsrld $13,%xmm10,%xmm1 + + vpslld $19,%xmm10,%xmm2 + vpaddd %xmm0,%xmm5,%xmm5 + vpand %xmm3,%xmm4,%xmm4 + + vpxor %xmm1,%xmm9,%xmm7 + + vpsrld $22,%xmm10,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $10,%xmm10,%xmm2 + vpxor %xmm4,%xmm11,%xmm9 + vpaddd %xmm5,%xmm13,%xmm13 + + vpxor %xmm1,%xmm7,%xmm7 + vpxor %xmm2,%xmm7,%xmm7 + + vpaddd %xmm5,%xmm9,%xmm9 + vpaddd %xmm7,%xmm9,%xmm9 + vmovd 28(%r8),%xmm5 + vmovd 28(%r9),%xmm0 + vpinsrd $1,28(%r10),%xmm5,%xmm5 + vpinsrd $1,28(%r11),%xmm0,%xmm0 + vpunpckldq %xmm0,%xmm5,%xmm5 + vpshufb %xmm6,%xmm5,%xmm5 + vpsrld $6,%xmm13,%xmm7 + vpslld $26,%xmm13,%xmm2 + vmovdqu %xmm5,112-128(%rax) + vpaddd %xmm8,%xmm5,%xmm5 + + vpsrld $11,%xmm13,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $21,%xmm13,%xmm2 + vpaddd 96(%rbp),%xmm5,%xmm5 + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $25,%xmm13,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $7,%xmm13,%xmm2 + vpandn %xmm15,%xmm13,%xmm0 + vpand %xmm14,%xmm13,%xmm4 + + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $2,%xmm9,%xmm8 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $30,%xmm9,%xmm1 + vpxor %xmm4,%xmm0,%xmm0 + vpxor %xmm9,%xmm10,%xmm4 + + vpxor %xmm1,%xmm8,%xmm8 + vpaddd %xmm7,%xmm5,%xmm5 + + vpsrld $13,%xmm9,%xmm1 + + vpslld $19,%xmm9,%xmm2 + vpaddd %xmm0,%xmm5,%xmm5 + vpand %xmm4,%xmm3,%xmm3 + + vpxor %xmm1,%xmm8,%xmm7 + + vpsrld $22,%xmm9,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $10,%xmm9,%xmm2 + vpxor %xmm3,%xmm10,%xmm8 + vpaddd %xmm5,%xmm12,%xmm12 + + vpxor %xmm1,%xmm7,%xmm7 + vpxor %xmm2,%xmm7,%xmm7 + + vpaddd %xmm5,%xmm8,%xmm8 + vpaddd %xmm7,%xmm8,%xmm8 + addq $256,%rbp + vmovd 32(%r8),%xmm5 + vmovd 32(%r9),%xmm0 + vpinsrd $1,32(%r10),%xmm5,%xmm5 + vpinsrd $1,32(%r11),%xmm0,%xmm0 + vpunpckldq %xmm0,%xmm5,%xmm5 + vpshufb %xmm6,%xmm5,%xmm5 + vpsrld $6,%xmm12,%xmm7 + vpslld $26,%xmm12,%xmm2 + vmovdqu %xmm5,128-128(%rax) + vpaddd %xmm15,%xmm5,%xmm5 + + vpsrld $11,%xmm12,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $21,%xmm12,%xmm2 + vpaddd -128(%rbp),%xmm5,%xmm5 + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $25,%xmm12,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $7,%xmm12,%xmm2 + vpandn %xmm14,%xmm12,%xmm0 + vpand %xmm13,%xmm12,%xmm3 + + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $2,%xmm8,%xmm15 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $30,%xmm8,%xmm1 + vpxor %xmm3,%xmm0,%xmm0 + vpxor %xmm8,%xmm9,%xmm3 + + vpxor %xmm1,%xmm15,%xmm15 + vpaddd %xmm7,%xmm5,%xmm5 + + vpsrld $13,%xmm8,%xmm1 + + vpslld $19,%xmm8,%xmm2 + vpaddd %xmm0,%xmm5,%xmm5 + vpand %xmm3,%xmm4,%xmm4 + + vpxor %xmm1,%xmm15,%xmm7 + + vpsrld $22,%xmm8,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $10,%xmm8,%xmm2 + vpxor %xmm4,%xmm9,%xmm15 + vpaddd %xmm5,%xmm11,%xmm11 + + vpxor %xmm1,%xmm7,%xmm7 + vpxor %xmm2,%xmm7,%xmm7 + + vpaddd %xmm5,%xmm15,%xmm15 + vpaddd %xmm7,%xmm15,%xmm15 + vmovd 36(%r8),%xmm5 + vmovd 36(%r9),%xmm0 + vpinsrd $1,36(%r10),%xmm5,%xmm5 + vpinsrd $1,36(%r11),%xmm0,%xmm0 + vpunpckldq %xmm0,%xmm5,%xmm5 + vpshufb %xmm6,%xmm5,%xmm5 + vpsrld $6,%xmm11,%xmm7 + vpslld $26,%xmm11,%xmm2 + vmovdqu %xmm5,144-128(%rax) + vpaddd %xmm14,%xmm5,%xmm5 + + vpsrld $11,%xmm11,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $21,%xmm11,%xmm2 + vpaddd -96(%rbp),%xmm5,%xmm5 + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $25,%xmm11,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $7,%xmm11,%xmm2 + vpandn %xmm13,%xmm11,%xmm0 + vpand %xmm12,%xmm11,%xmm4 + + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $2,%xmm15,%xmm14 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $30,%xmm15,%xmm1 + vpxor %xmm4,%xmm0,%xmm0 + vpxor %xmm15,%xmm8,%xmm4 + + vpxor %xmm1,%xmm14,%xmm14 + vpaddd %xmm7,%xmm5,%xmm5 + + vpsrld $13,%xmm15,%xmm1 + + vpslld $19,%xmm15,%xmm2 + vpaddd %xmm0,%xmm5,%xmm5 + vpand %xmm4,%xmm3,%xmm3 + + vpxor %xmm1,%xmm14,%xmm7 + + vpsrld $22,%xmm15,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $10,%xmm15,%xmm2 + vpxor %xmm3,%xmm8,%xmm14 + vpaddd %xmm5,%xmm10,%xmm10 + + vpxor %xmm1,%xmm7,%xmm7 + vpxor %xmm2,%xmm7,%xmm7 + + vpaddd %xmm5,%xmm14,%xmm14 + vpaddd %xmm7,%xmm14,%xmm14 + vmovd 40(%r8),%xmm5 + vmovd 40(%r9),%xmm0 + vpinsrd $1,40(%r10),%xmm5,%xmm5 + vpinsrd $1,40(%r11),%xmm0,%xmm0 + vpunpckldq %xmm0,%xmm5,%xmm5 + vpshufb %xmm6,%xmm5,%xmm5 + vpsrld $6,%xmm10,%xmm7 + vpslld $26,%xmm10,%xmm2 + vmovdqu %xmm5,160-128(%rax) + vpaddd %xmm13,%xmm5,%xmm5 + + vpsrld $11,%xmm10,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $21,%xmm10,%xmm2 + vpaddd -64(%rbp),%xmm5,%xmm5 + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $25,%xmm10,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $7,%xmm10,%xmm2 + vpandn %xmm12,%xmm10,%xmm0 + vpand %xmm11,%xmm10,%xmm3 + + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $2,%xmm14,%xmm13 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $30,%xmm14,%xmm1 + vpxor %xmm3,%xmm0,%xmm0 + vpxor %xmm14,%xmm15,%xmm3 + + vpxor %xmm1,%xmm13,%xmm13 + vpaddd %xmm7,%xmm5,%xmm5 + + vpsrld $13,%xmm14,%xmm1 + + vpslld $19,%xmm14,%xmm2 + vpaddd %xmm0,%xmm5,%xmm5 + vpand %xmm3,%xmm4,%xmm4 + + vpxor %xmm1,%xmm13,%xmm7 + + vpsrld $22,%xmm14,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $10,%xmm14,%xmm2 + vpxor %xmm4,%xmm15,%xmm13 + vpaddd %xmm5,%xmm9,%xmm9 + + vpxor %xmm1,%xmm7,%xmm7 + vpxor %xmm2,%xmm7,%xmm7 + + vpaddd %xmm5,%xmm13,%xmm13 + vpaddd %xmm7,%xmm13,%xmm13 + vmovd 44(%r8),%xmm5 + vmovd 44(%r9),%xmm0 + vpinsrd $1,44(%r10),%xmm5,%xmm5 + vpinsrd $1,44(%r11),%xmm0,%xmm0 + vpunpckldq %xmm0,%xmm5,%xmm5 + vpshufb %xmm6,%xmm5,%xmm5 + vpsrld $6,%xmm9,%xmm7 + vpslld $26,%xmm9,%xmm2 + vmovdqu %xmm5,176-128(%rax) + vpaddd %xmm12,%xmm5,%xmm5 + + vpsrld $11,%xmm9,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $21,%xmm9,%xmm2 + vpaddd -32(%rbp),%xmm5,%xmm5 + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $25,%xmm9,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $7,%xmm9,%xmm2 + vpandn %xmm11,%xmm9,%xmm0 + vpand %xmm10,%xmm9,%xmm4 + + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $2,%xmm13,%xmm12 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $30,%xmm13,%xmm1 + vpxor %xmm4,%xmm0,%xmm0 + vpxor %xmm13,%xmm14,%xmm4 + + vpxor %xmm1,%xmm12,%xmm12 + vpaddd %xmm7,%xmm5,%xmm5 + + vpsrld $13,%xmm13,%xmm1 + + vpslld $19,%xmm13,%xmm2 + vpaddd %xmm0,%xmm5,%xmm5 + vpand %xmm4,%xmm3,%xmm3 + + vpxor %xmm1,%xmm12,%xmm7 + + vpsrld $22,%xmm13,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $10,%xmm13,%xmm2 + vpxor %xmm3,%xmm14,%xmm12 + vpaddd %xmm5,%xmm8,%xmm8 + + vpxor %xmm1,%xmm7,%xmm7 + vpxor %xmm2,%xmm7,%xmm7 + + vpaddd %xmm5,%xmm12,%xmm12 + vpaddd %xmm7,%xmm12,%xmm12 + vmovd 48(%r8),%xmm5 + vmovd 48(%r9),%xmm0 + vpinsrd $1,48(%r10),%xmm5,%xmm5 + vpinsrd $1,48(%r11),%xmm0,%xmm0 + vpunpckldq %xmm0,%xmm5,%xmm5 + vpshufb %xmm6,%xmm5,%xmm5 + vpsrld $6,%xmm8,%xmm7 + vpslld $26,%xmm8,%xmm2 + vmovdqu %xmm5,192-128(%rax) + vpaddd %xmm11,%xmm5,%xmm5 + + vpsrld $11,%xmm8,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $21,%xmm8,%xmm2 + vpaddd 0(%rbp),%xmm5,%xmm5 + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $25,%xmm8,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $7,%xmm8,%xmm2 + vpandn %xmm10,%xmm8,%xmm0 + vpand %xmm9,%xmm8,%xmm3 + + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $2,%xmm12,%xmm11 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $30,%xmm12,%xmm1 + vpxor %xmm3,%xmm0,%xmm0 + vpxor %xmm12,%xmm13,%xmm3 + + vpxor %xmm1,%xmm11,%xmm11 + vpaddd %xmm7,%xmm5,%xmm5 + + vpsrld $13,%xmm12,%xmm1 + + vpslld $19,%xmm12,%xmm2 + vpaddd %xmm0,%xmm5,%xmm5 + vpand %xmm3,%xmm4,%xmm4 + + vpxor %xmm1,%xmm11,%xmm7 + + vpsrld $22,%xmm12,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $10,%xmm12,%xmm2 + vpxor %xmm4,%xmm13,%xmm11 + vpaddd %xmm5,%xmm15,%xmm15 + + vpxor %xmm1,%xmm7,%xmm7 + vpxor %xmm2,%xmm7,%xmm7 + + vpaddd %xmm5,%xmm11,%xmm11 + vpaddd %xmm7,%xmm11,%xmm11 + vmovd 52(%r8),%xmm5 + vmovd 52(%r9),%xmm0 + vpinsrd $1,52(%r10),%xmm5,%xmm5 + vpinsrd $1,52(%r11),%xmm0,%xmm0 + vpunpckldq %xmm0,%xmm5,%xmm5 + vpshufb %xmm6,%xmm5,%xmm5 + vpsrld $6,%xmm15,%xmm7 + vpslld $26,%xmm15,%xmm2 + vmovdqu %xmm5,208-128(%rax) + vpaddd %xmm10,%xmm5,%xmm5 + + vpsrld $11,%xmm15,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $21,%xmm15,%xmm2 + vpaddd 32(%rbp),%xmm5,%xmm5 + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $25,%xmm15,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $7,%xmm15,%xmm2 + vpandn %xmm9,%xmm15,%xmm0 + vpand %xmm8,%xmm15,%xmm4 + + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $2,%xmm11,%xmm10 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $30,%xmm11,%xmm1 + vpxor %xmm4,%xmm0,%xmm0 + vpxor %xmm11,%xmm12,%xmm4 + + vpxor %xmm1,%xmm10,%xmm10 + vpaddd %xmm7,%xmm5,%xmm5 + + vpsrld $13,%xmm11,%xmm1 + + vpslld $19,%xmm11,%xmm2 + vpaddd %xmm0,%xmm5,%xmm5 + vpand %xmm4,%xmm3,%xmm3 + + vpxor %xmm1,%xmm10,%xmm7 + + vpsrld $22,%xmm11,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $10,%xmm11,%xmm2 + vpxor %xmm3,%xmm12,%xmm10 + vpaddd %xmm5,%xmm14,%xmm14 + + vpxor %xmm1,%xmm7,%xmm7 + vpxor %xmm2,%xmm7,%xmm7 + + vpaddd %xmm5,%xmm10,%xmm10 + vpaddd %xmm7,%xmm10,%xmm10 + vmovd 56(%r8),%xmm5 + vmovd 56(%r9),%xmm0 + vpinsrd $1,56(%r10),%xmm5,%xmm5 + vpinsrd $1,56(%r11),%xmm0,%xmm0 + vpunpckldq %xmm0,%xmm5,%xmm5 + vpshufb %xmm6,%xmm5,%xmm5 + vpsrld $6,%xmm14,%xmm7 + vpslld $26,%xmm14,%xmm2 + vmovdqu %xmm5,224-128(%rax) + vpaddd %xmm9,%xmm5,%xmm5 + + vpsrld $11,%xmm14,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $21,%xmm14,%xmm2 + vpaddd 64(%rbp),%xmm5,%xmm5 + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $25,%xmm14,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $7,%xmm14,%xmm2 + vpandn %xmm8,%xmm14,%xmm0 + vpand %xmm15,%xmm14,%xmm3 + + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $2,%xmm10,%xmm9 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $30,%xmm10,%xmm1 + vpxor %xmm3,%xmm0,%xmm0 + vpxor %xmm10,%xmm11,%xmm3 + + vpxor %xmm1,%xmm9,%xmm9 + vpaddd %xmm7,%xmm5,%xmm5 + + vpsrld $13,%xmm10,%xmm1 + + vpslld $19,%xmm10,%xmm2 + vpaddd %xmm0,%xmm5,%xmm5 + vpand %xmm3,%xmm4,%xmm4 + + vpxor %xmm1,%xmm9,%xmm7 + + vpsrld $22,%xmm10,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $10,%xmm10,%xmm2 + vpxor %xmm4,%xmm11,%xmm9 + vpaddd %xmm5,%xmm13,%xmm13 + + vpxor %xmm1,%xmm7,%xmm7 + vpxor %xmm2,%xmm7,%xmm7 + + vpaddd %xmm5,%xmm9,%xmm9 + vpaddd %xmm7,%xmm9,%xmm9 + vmovd 60(%r8),%xmm5 + leaq 64(%r8),%r8 + vmovd 60(%r9),%xmm0 + leaq 64(%r9),%r9 + vpinsrd $1,60(%r10),%xmm5,%xmm5 + leaq 64(%r10),%r10 + vpinsrd $1,60(%r11),%xmm0,%xmm0 + leaq 64(%r11),%r11 + vpunpckldq %xmm0,%xmm5,%xmm5 + vpshufb %xmm6,%xmm5,%xmm5 + vpsrld $6,%xmm13,%xmm7 + vpslld $26,%xmm13,%xmm2 + vmovdqu %xmm5,240-128(%rax) + vpaddd %xmm8,%xmm5,%xmm5 + + vpsrld $11,%xmm13,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $21,%xmm13,%xmm2 + vpaddd 96(%rbp),%xmm5,%xmm5 + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $25,%xmm13,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + prefetcht0 63(%r8) + vpslld $7,%xmm13,%xmm2 + vpandn %xmm15,%xmm13,%xmm0 + vpand %xmm14,%xmm13,%xmm4 + prefetcht0 63(%r9) + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $2,%xmm9,%xmm8 + vpxor %xmm2,%xmm7,%xmm7 + prefetcht0 63(%r10) + vpslld $30,%xmm9,%xmm1 + vpxor %xmm4,%xmm0,%xmm0 + vpxor %xmm9,%xmm10,%xmm4 + prefetcht0 63(%r11) + vpxor %xmm1,%xmm8,%xmm8 + vpaddd %xmm7,%xmm5,%xmm5 + + vpsrld $13,%xmm9,%xmm1 + + vpslld $19,%xmm9,%xmm2 + vpaddd %xmm0,%xmm5,%xmm5 + vpand %xmm4,%xmm3,%xmm3 + + vpxor %xmm1,%xmm8,%xmm7 + + vpsrld $22,%xmm9,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $10,%xmm9,%xmm2 + vpxor %xmm3,%xmm10,%xmm8 + vpaddd %xmm5,%xmm12,%xmm12 + + vpxor %xmm1,%xmm7,%xmm7 + vpxor %xmm2,%xmm7,%xmm7 + + vpaddd %xmm5,%xmm8,%xmm8 + vpaddd %xmm7,%xmm8,%xmm8 + addq $256,%rbp + vmovdqu 0-128(%rax),%xmm5 + movl $3,%ecx + jmp L$oop_16_xx_avx +.p2align 5 +L$oop_16_xx_avx: + vmovdqu 16-128(%rax),%xmm6 + vpaddd 144-128(%rax),%xmm5,%xmm5 + + vpsrld $3,%xmm6,%xmm7 + vpsrld $7,%xmm6,%xmm1 + vpslld $25,%xmm6,%xmm2 + vpxor %xmm1,%xmm7,%xmm7 + vpsrld $18,%xmm6,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $14,%xmm6,%xmm2 + vmovdqu 224-128(%rax),%xmm0 + vpsrld $10,%xmm0,%xmm3 + + vpxor %xmm1,%xmm7,%xmm7 + vpsrld $17,%xmm0,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $15,%xmm0,%xmm2 + vpaddd %xmm7,%xmm5,%xmm5 + vpxor %xmm1,%xmm3,%xmm7 + vpsrld $19,%xmm0,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $13,%xmm0,%xmm2 + vpxor %xmm1,%xmm7,%xmm7 + vpxor %xmm2,%xmm7,%xmm7 + vpaddd %xmm7,%xmm5,%xmm5 + vpsrld $6,%xmm12,%xmm7 + vpslld $26,%xmm12,%xmm2 + vmovdqu %xmm5,0-128(%rax) + vpaddd %xmm15,%xmm5,%xmm5 + + vpsrld $11,%xmm12,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $21,%xmm12,%xmm2 + vpaddd -128(%rbp),%xmm5,%xmm5 + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $25,%xmm12,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $7,%xmm12,%xmm2 + vpandn %xmm14,%xmm12,%xmm0 + vpand %xmm13,%xmm12,%xmm3 + + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $2,%xmm8,%xmm15 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $30,%xmm8,%xmm1 + vpxor %xmm3,%xmm0,%xmm0 + vpxor %xmm8,%xmm9,%xmm3 + + vpxor %xmm1,%xmm15,%xmm15 + vpaddd %xmm7,%xmm5,%xmm5 + + vpsrld $13,%xmm8,%xmm1 + + vpslld $19,%xmm8,%xmm2 + vpaddd %xmm0,%xmm5,%xmm5 + vpand %xmm3,%xmm4,%xmm4 + + vpxor %xmm1,%xmm15,%xmm7 + + vpsrld $22,%xmm8,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $10,%xmm8,%xmm2 + vpxor %xmm4,%xmm9,%xmm15 + vpaddd %xmm5,%xmm11,%xmm11 + + vpxor %xmm1,%xmm7,%xmm7 + vpxor %xmm2,%xmm7,%xmm7 + + vpaddd %xmm5,%xmm15,%xmm15 + vpaddd %xmm7,%xmm15,%xmm15 + vmovdqu 32-128(%rax),%xmm5 + vpaddd 160-128(%rax),%xmm6,%xmm6 + + vpsrld $3,%xmm5,%xmm7 + vpsrld $7,%xmm5,%xmm1 + vpslld $25,%xmm5,%xmm2 + vpxor %xmm1,%xmm7,%xmm7 + vpsrld $18,%xmm5,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $14,%xmm5,%xmm2 + vmovdqu 240-128(%rax),%xmm0 + vpsrld $10,%xmm0,%xmm4 + + vpxor %xmm1,%xmm7,%xmm7 + vpsrld $17,%xmm0,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $15,%xmm0,%xmm2 + vpaddd %xmm7,%xmm6,%xmm6 + vpxor %xmm1,%xmm4,%xmm7 + vpsrld $19,%xmm0,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $13,%xmm0,%xmm2 + vpxor %xmm1,%xmm7,%xmm7 + vpxor %xmm2,%xmm7,%xmm7 + vpaddd %xmm7,%xmm6,%xmm6 + vpsrld $6,%xmm11,%xmm7 + vpslld $26,%xmm11,%xmm2 + vmovdqu %xmm6,16-128(%rax) + vpaddd %xmm14,%xmm6,%xmm6 + + vpsrld $11,%xmm11,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $21,%xmm11,%xmm2 + vpaddd -96(%rbp),%xmm6,%xmm6 + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $25,%xmm11,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $7,%xmm11,%xmm2 + vpandn %xmm13,%xmm11,%xmm0 + vpand %xmm12,%xmm11,%xmm4 + + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $2,%xmm15,%xmm14 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $30,%xmm15,%xmm1 + vpxor %xmm4,%xmm0,%xmm0 + vpxor %xmm15,%xmm8,%xmm4 + + vpxor %xmm1,%xmm14,%xmm14 + vpaddd %xmm7,%xmm6,%xmm6 + + vpsrld $13,%xmm15,%xmm1 + + vpslld $19,%xmm15,%xmm2 + vpaddd %xmm0,%xmm6,%xmm6 + vpand %xmm4,%xmm3,%xmm3 + + vpxor %xmm1,%xmm14,%xmm7 + + vpsrld $22,%xmm15,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $10,%xmm15,%xmm2 + vpxor %xmm3,%xmm8,%xmm14 + vpaddd %xmm6,%xmm10,%xmm10 + + vpxor %xmm1,%xmm7,%xmm7 + vpxor %xmm2,%xmm7,%xmm7 + + vpaddd %xmm6,%xmm14,%xmm14 + vpaddd %xmm7,%xmm14,%xmm14 + vmovdqu 48-128(%rax),%xmm6 + vpaddd 176-128(%rax),%xmm5,%xmm5 + + vpsrld $3,%xmm6,%xmm7 + vpsrld $7,%xmm6,%xmm1 + vpslld $25,%xmm6,%xmm2 + vpxor %xmm1,%xmm7,%xmm7 + vpsrld $18,%xmm6,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $14,%xmm6,%xmm2 + vmovdqu 0-128(%rax),%xmm0 + vpsrld $10,%xmm0,%xmm3 + + vpxor %xmm1,%xmm7,%xmm7 + vpsrld $17,%xmm0,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $15,%xmm0,%xmm2 + vpaddd %xmm7,%xmm5,%xmm5 + vpxor %xmm1,%xmm3,%xmm7 + vpsrld $19,%xmm0,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $13,%xmm0,%xmm2 + vpxor %xmm1,%xmm7,%xmm7 + vpxor %xmm2,%xmm7,%xmm7 + vpaddd %xmm7,%xmm5,%xmm5 + vpsrld $6,%xmm10,%xmm7 + vpslld $26,%xmm10,%xmm2 + vmovdqu %xmm5,32-128(%rax) + vpaddd %xmm13,%xmm5,%xmm5 + + vpsrld $11,%xmm10,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $21,%xmm10,%xmm2 + vpaddd -64(%rbp),%xmm5,%xmm5 + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $25,%xmm10,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $7,%xmm10,%xmm2 + vpandn %xmm12,%xmm10,%xmm0 + vpand %xmm11,%xmm10,%xmm3 + + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $2,%xmm14,%xmm13 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $30,%xmm14,%xmm1 + vpxor %xmm3,%xmm0,%xmm0 + vpxor %xmm14,%xmm15,%xmm3 + + vpxor %xmm1,%xmm13,%xmm13 + vpaddd %xmm7,%xmm5,%xmm5 + + vpsrld $13,%xmm14,%xmm1 + + vpslld $19,%xmm14,%xmm2 + vpaddd %xmm0,%xmm5,%xmm5 + vpand %xmm3,%xmm4,%xmm4 + + vpxor %xmm1,%xmm13,%xmm7 + + vpsrld $22,%xmm14,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $10,%xmm14,%xmm2 + vpxor %xmm4,%xmm15,%xmm13 + vpaddd %xmm5,%xmm9,%xmm9 + + vpxor %xmm1,%xmm7,%xmm7 + vpxor %xmm2,%xmm7,%xmm7 + + vpaddd %xmm5,%xmm13,%xmm13 + vpaddd %xmm7,%xmm13,%xmm13 + vmovdqu 64-128(%rax),%xmm5 + vpaddd 192-128(%rax),%xmm6,%xmm6 + + vpsrld $3,%xmm5,%xmm7 + vpsrld $7,%xmm5,%xmm1 + vpslld $25,%xmm5,%xmm2 + vpxor %xmm1,%xmm7,%xmm7 + vpsrld $18,%xmm5,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $14,%xmm5,%xmm2 + vmovdqu 16-128(%rax),%xmm0 + vpsrld $10,%xmm0,%xmm4 + + vpxor %xmm1,%xmm7,%xmm7 + vpsrld $17,%xmm0,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $15,%xmm0,%xmm2 + vpaddd %xmm7,%xmm6,%xmm6 + vpxor %xmm1,%xmm4,%xmm7 + vpsrld $19,%xmm0,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $13,%xmm0,%xmm2 + vpxor %xmm1,%xmm7,%xmm7 + vpxor %xmm2,%xmm7,%xmm7 + vpaddd %xmm7,%xmm6,%xmm6 + vpsrld $6,%xmm9,%xmm7 + vpslld $26,%xmm9,%xmm2 + vmovdqu %xmm6,48-128(%rax) + vpaddd %xmm12,%xmm6,%xmm6 + + vpsrld $11,%xmm9,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $21,%xmm9,%xmm2 + vpaddd -32(%rbp),%xmm6,%xmm6 + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $25,%xmm9,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $7,%xmm9,%xmm2 + vpandn %xmm11,%xmm9,%xmm0 + vpand %xmm10,%xmm9,%xmm4 + + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $2,%xmm13,%xmm12 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $30,%xmm13,%xmm1 + vpxor %xmm4,%xmm0,%xmm0 + vpxor %xmm13,%xmm14,%xmm4 + + vpxor %xmm1,%xmm12,%xmm12 + vpaddd %xmm7,%xmm6,%xmm6 + + vpsrld $13,%xmm13,%xmm1 + + vpslld $19,%xmm13,%xmm2 + vpaddd %xmm0,%xmm6,%xmm6 + vpand %xmm4,%xmm3,%xmm3 + + vpxor %xmm1,%xmm12,%xmm7 + + vpsrld $22,%xmm13,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $10,%xmm13,%xmm2 + vpxor %xmm3,%xmm14,%xmm12 + vpaddd %xmm6,%xmm8,%xmm8 + + vpxor %xmm1,%xmm7,%xmm7 + vpxor %xmm2,%xmm7,%xmm7 + + vpaddd %xmm6,%xmm12,%xmm12 + vpaddd %xmm7,%xmm12,%xmm12 + vmovdqu 80-128(%rax),%xmm6 + vpaddd 208-128(%rax),%xmm5,%xmm5 + + vpsrld $3,%xmm6,%xmm7 + vpsrld $7,%xmm6,%xmm1 + vpslld $25,%xmm6,%xmm2 + vpxor %xmm1,%xmm7,%xmm7 + vpsrld $18,%xmm6,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $14,%xmm6,%xmm2 + vmovdqu 32-128(%rax),%xmm0 + vpsrld $10,%xmm0,%xmm3 + + vpxor %xmm1,%xmm7,%xmm7 + vpsrld $17,%xmm0,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $15,%xmm0,%xmm2 + vpaddd %xmm7,%xmm5,%xmm5 + vpxor %xmm1,%xmm3,%xmm7 + vpsrld $19,%xmm0,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $13,%xmm0,%xmm2 + vpxor %xmm1,%xmm7,%xmm7 + vpxor %xmm2,%xmm7,%xmm7 + vpaddd %xmm7,%xmm5,%xmm5 + vpsrld $6,%xmm8,%xmm7 + vpslld $26,%xmm8,%xmm2 + vmovdqu %xmm5,64-128(%rax) + vpaddd %xmm11,%xmm5,%xmm5 + + vpsrld $11,%xmm8,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $21,%xmm8,%xmm2 + vpaddd 0(%rbp),%xmm5,%xmm5 + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $25,%xmm8,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $7,%xmm8,%xmm2 + vpandn %xmm10,%xmm8,%xmm0 + vpand %xmm9,%xmm8,%xmm3 + + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $2,%xmm12,%xmm11 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $30,%xmm12,%xmm1 + vpxor %xmm3,%xmm0,%xmm0 + vpxor %xmm12,%xmm13,%xmm3 + + vpxor %xmm1,%xmm11,%xmm11 + vpaddd %xmm7,%xmm5,%xmm5 + + vpsrld $13,%xmm12,%xmm1 + + vpslld $19,%xmm12,%xmm2 + vpaddd %xmm0,%xmm5,%xmm5 + vpand %xmm3,%xmm4,%xmm4 + + vpxor %xmm1,%xmm11,%xmm7 + + vpsrld $22,%xmm12,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $10,%xmm12,%xmm2 + vpxor %xmm4,%xmm13,%xmm11 + vpaddd %xmm5,%xmm15,%xmm15 + + vpxor %xmm1,%xmm7,%xmm7 + vpxor %xmm2,%xmm7,%xmm7 + + vpaddd %xmm5,%xmm11,%xmm11 + vpaddd %xmm7,%xmm11,%xmm11 + vmovdqu 96-128(%rax),%xmm5 + vpaddd 224-128(%rax),%xmm6,%xmm6 + + vpsrld $3,%xmm5,%xmm7 + vpsrld $7,%xmm5,%xmm1 + vpslld $25,%xmm5,%xmm2 + vpxor %xmm1,%xmm7,%xmm7 + vpsrld $18,%xmm5,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $14,%xmm5,%xmm2 + vmovdqu 48-128(%rax),%xmm0 + vpsrld $10,%xmm0,%xmm4 + + vpxor %xmm1,%xmm7,%xmm7 + vpsrld $17,%xmm0,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $15,%xmm0,%xmm2 + vpaddd %xmm7,%xmm6,%xmm6 + vpxor %xmm1,%xmm4,%xmm7 + vpsrld $19,%xmm0,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $13,%xmm0,%xmm2 + vpxor %xmm1,%xmm7,%xmm7 + vpxor %xmm2,%xmm7,%xmm7 + vpaddd %xmm7,%xmm6,%xmm6 + vpsrld $6,%xmm15,%xmm7 + vpslld $26,%xmm15,%xmm2 + vmovdqu %xmm6,80-128(%rax) + vpaddd %xmm10,%xmm6,%xmm6 + + vpsrld $11,%xmm15,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $21,%xmm15,%xmm2 + vpaddd 32(%rbp),%xmm6,%xmm6 + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $25,%xmm15,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $7,%xmm15,%xmm2 + vpandn %xmm9,%xmm15,%xmm0 + vpand %xmm8,%xmm15,%xmm4 + + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $2,%xmm11,%xmm10 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $30,%xmm11,%xmm1 + vpxor %xmm4,%xmm0,%xmm0 + vpxor %xmm11,%xmm12,%xmm4 + + vpxor %xmm1,%xmm10,%xmm10 + vpaddd %xmm7,%xmm6,%xmm6 + + vpsrld $13,%xmm11,%xmm1 + + vpslld $19,%xmm11,%xmm2 + vpaddd %xmm0,%xmm6,%xmm6 + vpand %xmm4,%xmm3,%xmm3 + + vpxor %xmm1,%xmm10,%xmm7 + + vpsrld $22,%xmm11,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $10,%xmm11,%xmm2 + vpxor %xmm3,%xmm12,%xmm10 + vpaddd %xmm6,%xmm14,%xmm14 + + vpxor %xmm1,%xmm7,%xmm7 + vpxor %xmm2,%xmm7,%xmm7 + + vpaddd %xmm6,%xmm10,%xmm10 + vpaddd %xmm7,%xmm10,%xmm10 + vmovdqu 112-128(%rax),%xmm6 + vpaddd 240-128(%rax),%xmm5,%xmm5 + + vpsrld $3,%xmm6,%xmm7 + vpsrld $7,%xmm6,%xmm1 + vpslld $25,%xmm6,%xmm2 + vpxor %xmm1,%xmm7,%xmm7 + vpsrld $18,%xmm6,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $14,%xmm6,%xmm2 + vmovdqu 64-128(%rax),%xmm0 + vpsrld $10,%xmm0,%xmm3 + + vpxor %xmm1,%xmm7,%xmm7 + vpsrld $17,%xmm0,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $15,%xmm0,%xmm2 + vpaddd %xmm7,%xmm5,%xmm5 + vpxor %xmm1,%xmm3,%xmm7 + vpsrld $19,%xmm0,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $13,%xmm0,%xmm2 + vpxor %xmm1,%xmm7,%xmm7 + vpxor %xmm2,%xmm7,%xmm7 + vpaddd %xmm7,%xmm5,%xmm5 + vpsrld $6,%xmm14,%xmm7 + vpslld $26,%xmm14,%xmm2 + vmovdqu %xmm5,96-128(%rax) + vpaddd %xmm9,%xmm5,%xmm5 + + vpsrld $11,%xmm14,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $21,%xmm14,%xmm2 + vpaddd 64(%rbp),%xmm5,%xmm5 + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $25,%xmm14,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $7,%xmm14,%xmm2 + vpandn %xmm8,%xmm14,%xmm0 + vpand %xmm15,%xmm14,%xmm3 + + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $2,%xmm10,%xmm9 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $30,%xmm10,%xmm1 + vpxor %xmm3,%xmm0,%xmm0 + vpxor %xmm10,%xmm11,%xmm3 + + vpxor %xmm1,%xmm9,%xmm9 + vpaddd %xmm7,%xmm5,%xmm5 + + vpsrld $13,%xmm10,%xmm1 + + vpslld $19,%xmm10,%xmm2 + vpaddd %xmm0,%xmm5,%xmm5 + vpand %xmm3,%xmm4,%xmm4 + + vpxor %xmm1,%xmm9,%xmm7 + + vpsrld $22,%xmm10,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $10,%xmm10,%xmm2 + vpxor %xmm4,%xmm11,%xmm9 + vpaddd %xmm5,%xmm13,%xmm13 + + vpxor %xmm1,%xmm7,%xmm7 + vpxor %xmm2,%xmm7,%xmm7 + + vpaddd %xmm5,%xmm9,%xmm9 + vpaddd %xmm7,%xmm9,%xmm9 + vmovdqu 128-128(%rax),%xmm5 + vpaddd 0-128(%rax),%xmm6,%xmm6 + + vpsrld $3,%xmm5,%xmm7 + vpsrld $7,%xmm5,%xmm1 + vpslld $25,%xmm5,%xmm2 + vpxor %xmm1,%xmm7,%xmm7 + vpsrld $18,%xmm5,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $14,%xmm5,%xmm2 + vmovdqu 80-128(%rax),%xmm0 + vpsrld $10,%xmm0,%xmm4 + + vpxor %xmm1,%xmm7,%xmm7 + vpsrld $17,%xmm0,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $15,%xmm0,%xmm2 + vpaddd %xmm7,%xmm6,%xmm6 + vpxor %xmm1,%xmm4,%xmm7 + vpsrld $19,%xmm0,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $13,%xmm0,%xmm2 + vpxor %xmm1,%xmm7,%xmm7 + vpxor %xmm2,%xmm7,%xmm7 + vpaddd %xmm7,%xmm6,%xmm6 + vpsrld $6,%xmm13,%xmm7 + vpslld $26,%xmm13,%xmm2 + vmovdqu %xmm6,112-128(%rax) + vpaddd %xmm8,%xmm6,%xmm6 + + vpsrld $11,%xmm13,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $21,%xmm13,%xmm2 + vpaddd 96(%rbp),%xmm6,%xmm6 + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $25,%xmm13,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $7,%xmm13,%xmm2 + vpandn %xmm15,%xmm13,%xmm0 + vpand %xmm14,%xmm13,%xmm4 + + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $2,%xmm9,%xmm8 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $30,%xmm9,%xmm1 + vpxor %xmm4,%xmm0,%xmm0 + vpxor %xmm9,%xmm10,%xmm4 + + vpxor %xmm1,%xmm8,%xmm8 + vpaddd %xmm7,%xmm6,%xmm6 + + vpsrld $13,%xmm9,%xmm1 + + vpslld $19,%xmm9,%xmm2 + vpaddd %xmm0,%xmm6,%xmm6 + vpand %xmm4,%xmm3,%xmm3 + + vpxor %xmm1,%xmm8,%xmm7 + + vpsrld $22,%xmm9,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $10,%xmm9,%xmm2 + vpxor %xmm3,%xmm10,%xmm8 + vpaddd %xmm6,%xmm12,%xmm12 + + vpxor %xmm1,%xmm7,%xmm7 + vpxor %xmm2,%xmm7,%xmm7 + + vpaddd %xmm6,%xmm8,%xmm8 + vpaddd %xmm7,%xmm8,%xmm8 + addq $256,%rbp + vmovdqu 144-128(%rax),%xmm6 + vpaddd 16-128(%rax),%xmm5,%xmm5 + + vpsrld $3,%xmm6,%xmm7 + vpsrld $7,%xmm6,%xmm1 + vpslld $25,%xmm6,%xmm2 + vpxor %xmm1,%xmm7,%xmm7 + vpsrld $18,%xmm6,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $14,%xmm6,%xmm2 + vmovdqu 96-128(%rax),%xmm0 + vpsrld $10,%xmm0,%xmm3 + + vpxor %xmm1,%xmm7,%xmm7 + vpsrld $17,%xmm0,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $15,%xmm0,%xmm2 + vpaddd %xmm7,%xmm5,%xmm5 + vpxor %xmm1,%xmm3,%xmm7 + vpsrld $19,%xmm0,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $13,%xmm0,%xmm2 + vpxor %xmm1,%xmm7,%xmm7 + vpxor %xmm2,%xmm7,%xmm7 + vpaddd %xmm7,%xmm5,%xmm5 + vpsrld $6,%xmm12,%xmm7 + vpslld $26,%xmm12,%xmm2 + vmovdqu %xmm5,128-128(%rax) + vpaddd %xmm15,%xmm5,%xmm5 + + vpsrld $11,%xmm12,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $21,%xmm12,%xmm2 + vpaddd -128(%rbp),%xmm5,%xmm5 + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $25,%xmm12,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $7,%xmm12,%xmm2 + vpandn %xmm14,%xmm12,%xmm0 + vpand %xmm13,%xmm12,%xmm3 + + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $2,%xmm8,%xmm15 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $30,%xmm8,%xmm1 + vpxor %xmm3,%xmm0,%xmm0 + vpxor %xmm8,%xmm9,%xmm3 + + vpxor %xmm1,%xmm15,%xmm15 + vpaddd %xmm7,%xmm5,%xmm5 + + vpsrld $13,%xmm8,%xmm1 + + vpslld $19,%xmm8,%xmm2 + vpaddd %xmm0,%xmm5,%xmm5 + vpand %xmm3,%xmm4,%xmm4 + + vpxor %xmm1,%xmm15,%xmm7 + + vpsrld $22,%xmm8,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $10,%xmm8,%xmm2 + vpxor %xmm4,%xmm9,%xmm15 + vpaddd %xmm5,%xmm11,%xmm11 + + vpxor %xmm1,%xmm7,%xmm7 + vpxor %xmm2,%xmm7,%xmm7 + + vpaddd %xmm5,%xmm15,%xmm15 + vpaddd %xmm7,%xmm15,%xmm15 + vmovdqu 160-128(%rax),%xmm5 + vpaddd 32-128(%rax),%xmm6,%xmm6 + + vpsrld $3,%xmm5,%xmm7 + vpsrld $7,%xmm5,%xmm1 + vpslld $25,%xmm5,%xmm2 + vpxor %xmm1,%xmm7,%xmm7 + vpsrld $18,%xmm5,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $14,%xmm5,%xmm2 + vmovdqu 112-128(%rax),%xmm0 + vpsrld $10,%xmm0,%xmm4 + + vpxor %xmm1,%xmm7,%xmm7 + vpsrld $17,%xmm0,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $15,%xmm0,%xmm2 + vpaddd %xmm7,%xmm6,%xmm6 + vpxor %xmm1,%xmm4,%xmm7 + vpsrld $19,%xmm0,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $13,%xmm0,%xmm2 + vpxor %xmm1,%xmm7,%xmm7 + vpxor %xmm2,%xmm7,%xmm7 + vpaddd %xmm7,%xmm6,%xmm6 + vpsrld $6,%xmm11,%xmm7 + vpslld $26,%xmm11,%xmm2 + vmovdqu %xmm6,144-128(%rax) + vpaddd %xmm14,%xmm6,%xmm6 + + vpsrld $11,%xmm11,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $21,%xmm11,%xmm2 + vpaddd -96(%rbp),%xmm6,%xmm6 + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $25,%xmm11,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $7,%xmm11,%xmm2 + vpandn %xmm13,%xmm11,%xmm0 + vpand %xmm12,%xmm11,%xmm4 + + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $2,%xmm15,%xmm14 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $30,%xmm15,%xmm1 + vpxor %xmm4,%xmm0,%xmm0 + vpxor %xmm15,%xmm8,%xmm4 + + vpxor %xmm1,%xmm14,%xmm14 + vpaddd %xmm7,%xmm6,%xmm6 + + vpsrld $13,%xmm15,%xmm1 + + vpslld $19,%xmm15,%xmm2 + vpaddd %xmm0,%xmm6,%xmm6 + vpand %xmm4,%xmm3,%xmm3 + + vpxor %xmm1,%xmm14,%xmm7 + + vpsrld $22,%xmm15,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $10,%xmm15,%xmm2 + vpxor %xmm3,%xmm8,%xmm14 + vpaddd %xmm6,%xmm10,%xmm10 + + vpxor %xmm1,%xmm7,%xmm7 + vpxor %xmm2,%xmm7,%xmm7 + + vpaddd %xmm6,%xmm14,%xmm14 + vpaddd %xmm7,%xmm14,%xmm14 + vmovdqu 176-128(%rax),%xmm6 + vpaddd 48-128(%rax),%xmm5,%xmm5 + + vpsrld $3,%xmm6,%xmm7 + vpsrld $7,%xmm6,%xmm1 + vpslld $25,%xmm6,%xmm2 + vpxor %xmm1,%xmm7,%xmm7 + vpsrld $18,%xmm6,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $14,%xmm6,%xmm2 + vmovdqu 128-128(%rax),%xmm0 + vpsrld $10,%xmm0,%xmm3 + + vpxor %xmm1,%xmm7,%xmm7 + vpsrld $17,%xmm0,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $15,%xmm0,%xmm2 + vpaddd %xmm7,%xmm5,%xmm5 + vpxor %xmm1,%xmm3,%xmm7 + vpsrld $19,%xmm0,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $13,%xmm0,%xmm2 + vpxor %xmm1,%xmm7,%xmm7 + vpxor %xmm2,%xmm7,%xmm7 + vpaddd %xmm7,%xmm5,%xmm5 + vpsrld $6,%xmm10,%xmm7 + vpslld $26,%xmm10,%xmm2 + vmovdqu %xmm5,160-128(%rax) + vpaddd %xmm13,%xmm5,%xmm5 + + vpsrld $11,%xmm10,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $21,%xmm10,%xmm2 + vpaddd -64(%rbp),%xmm5,%xmm5 + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $25,%xmm10,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $7,%xmm10,%xmm2 + vpandn %xmm12,%xmm10,%xmm0 + vpand %xmm11,%xmm10,%xmm3 + + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $2,%xmm14,%xmm13 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $30,%xmm14,%xmm1 + vpxor %xmm3,%xmm0,%xmm0 + vpxor %xmm14,%xmm15,%xmm3 + + vpxor %xmm1,%xmm13,%xmm13 + vpaddd %xmm7,%xmm5,%xmm5 + + vpsrld $13,%xmm14,%xmm1 + + vpslld $19,%xmm14,%xmm2 + vpaddd %xmm0,%xmm5,%xmm5 + vpand %xmm3,%xmm4,%xmm4 + + vpxor %xmm1,%xmm13,%xmm7 + + vpsrld $22,%xmm14,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $10,%xmm14,%xmm2 + vpxor %xmm4,%xmm15,%xmm13 + vpaddd %xmm5,%xmm9,%xmm9 + + vpxor %xmm1,%xmm7,%xmm7 + vpxor %xmm2,%xmm7,%xmm7 + + vpaddd %xmm5,%xmm13,%xmm13 + vpaddd %xmm7,%xmm13,%xmm13 + vmovdqu 192-128(%rax),%xmm5 + vpaddd 64-128(%rax),%xmm6,%xmm6 + + vpsrld $3,%xmm5,%xmm7 + vpsrld $7,%xmm5,%xmm1 + vpslld $25,%xmm5,%xmm2 + vpxor %xmm1,%xmm7,%xmm7 + vpsrld $18,%xmm5,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $14,%xmm5,%xmm2 + vmovdqu 144-128(%rax),%xmm0 + vpsrld $10,%xmm0,%xmm4 + + vpxor %xmm1,%xmm7,%xmm7 + vpsrld $17,%xmm0,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $15,%xmm0,%xmm2 + vpaddd %xmm7,%xmm6,%xmm6 + vpxor %xmm1,%xmm4,%xmm7 + vpsrld $19,%xmm0,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $13,%xmm0,%xmm2 + vpxor %xmm1,%xmm7,%xmm7 + vpxor %xmm2,%xmm7,%xmm7 + vpaddd %xmm7,%xmm6,%xmm6 + vpsrld $6,%xmm9,%xmm7 + vpslld $26,%xmm9,%xmm2 + vmovdqu %xmm6,176-128(%rax) + vpaddd %xmm12,%xmm6,%xmm6 + + vpsrld $11,%xmm9,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $21,%xmm9,%xmm2 + vpaddd -32(%rbp),%xmm6,%xmm6 + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $25,%xmm9,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $7,%xmm9,%xmm2 + vpandn %xmm11,%xmm9,%xmm0 + vpand %xmm10,%xmm9,%xmm4 + + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $2,%xmm13,%xmm12 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $30,%xmm13,%xmm1 + vpxor %xmm4,%xmm0,%xmm0 + vpxor %xmm13,%xmm14,%xmm4 + + vpxor %xmm1,%xmm12,%xmm12 + vpaddd %xmm7,%xmm6,%xmm6 + + vpsrld $13,%xmm13,%xmm1 + + vpslld $19,%xmm13,%xmm2 + vpaddd %xmm0,%xmm6,%xmm6 + vpand %xmm4,%xmm3,%xmm3 + + vpxor %xmm1,%xmm12,%xmm7 + + vpsrld $22,%xmm13,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $10,%xmm13,%xmm2 + vpxor %xmm3,%xmm14,%xmm12 + vpaddd %xmm6,%xmm8,%xmm8 + + vpxor %xmm1,%xmm7,%xmm7 + vpxor %xmm2,%xmm7,%xmm7 + + vpaddd %xmm6,%xmm12,%xmm12 + vpaddd %xmm7,%xmm12,%xmm12 + vmovdqu 208-128(%rax),%xmm6 + vpaddd 80-128(%rax),%xmm5,%xmm5 + + vpsrld $3,%xmm6,%xmm7 + vpsrld $7,%xmm6,%xmm1 + vpslld $25,%xmm6,%xmm2 + vpxor %xmm1,%xmm7,%xmm7 + vpsrld $18,%xmm6,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $14,%xmm6,%xmm2 + vmovdqu 160-128(%rax),%xmm0 + vpsrld $10,%xmm0,%xmm3 + + vpxor %xmm1,%xmm7,%xmm7 + vpsrld $17,%xmm0,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $15,%xmm0,%xmm2 + vpaddd %xmm7,%xmm5,%xmm5 + vpxor %xmm1,%xmm3,%xmm7 + vpsrld $19,%xmm0,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $13,%xmm0,%xmm2 + vpxor %xmm1,%xmm7,%xmm7 + vpxor %xmm2,%xmm7,%xmm7 + vpaddd %xmm7,%xmm5,%xmm5 + vpsrld $6,%xmm8,%xmm7 + vpslld $26,%xmm8,%xmm2 + vmovdqu %xmm5,192-128(%rax) + vpaddd %xmm11,%xmm5,%xmm5 + + vpsrld $11,%xmm8,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $21,%xmm8,%xmm2 + vpaddd 0(%rbp),%xmm5,%xmm5 + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $25,%xmm8,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $7,%xmm8,%xmm2 + vpandn %xmm10,%xmm8,%xmm0 + vpand %xmm9,%xmm8,%xmm3 + + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $2,%xmm12,%xmm11 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $30,%xmm12,%xmm1 + vpxor %xmm3,%xmm0,%xmm0 + vpxor %xmm12,%xmm13,%xmm3 + + vpxor %xmm1,%xmm11,%xmm11 + vpaddd %xmm7,%xmm5,%xmm5 + + vpsrld $13,%xmm12,%xmm1 + + vpslld $19,%xmm12,%xmm2 + vpaddd %xmm0,%xmm5,%xmm5 + vpand %xmm3,%xmm4,%xmm4 + + vpxor %xmm1,%xmm11,%xmm7 + + vpsrld $22,%xmm12,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $10,%xmm12,%xmm2 + vpxor %xmm4,%xmm13,%xmm11 + vpaddd %xmm5,%xmm15,%xmm15 + + vpxor %xmm1,%xmm7,%xmm7 + vpxor %xmm2,%xmm7,%xmm7 + + vpaddd %xmm5,%xmm11,%xmm11 + vpaddd %xmm7,%xmm11,%xmm11 + vmovdqu 224-128(%rax),%xmm5 + vpaddd 96-128(%rax),%xmm6,%xmm6 + + vpsrld $3,%xmm5,%xmm7 + vpsrld $7,%xmm5,%xmm1 + vpslld $25,%xmm5,%xmm2 + vpxor %xmm1,%xmm7,%xmm7 + vpsrld $18,%xmm5,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $14,%xmm5,%xmm2 + vmovdqu 176-128(%rax),%xmm0 + vpsrld $10,%xmm0,%xmm4 + + vpxor %xmm1,%xmm7,%xmm7 + vpsrld $17,%xmm0,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $15,%xmm0,%xmm2 + vpaddd %xmm7,%xmm6,%xmm6 + vpxor %xmm1,%xmm4,%xmm7 + vpsrld $19,%xmm0,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $13,%xmm0,%xmm2 + vpxor %xmm1,%xmm7,%xmm7 + vpxor %xmm2,%xmm7,%xmm7 + vpaddd %xmm7,%xmm6,%xmm6 + vpsrld $6,%xmm15,%xmm7 + vpslld $26,%xmm15,%xmm2 + vmovdqu %xmm6,208-128(%rax) + vpaddd %xmm10,%xmm6,%xmm6 + + vpsrld $11,%xmm15,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $21,%xmm15,%xmm2 + vpaddd 32(%rbp),%xmm6,%xmm6 + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $25,%xmm15,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $7,%xmm15,%xmm2 + vpandn %xmm9,%xmm15,%xmm0 + vpand %xmm8,%xmm15,%xmm4 + + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $2,%xmm11,%xmm10 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $30,%xmm11,%xmm1 + vpxor %xmm4,%xmm0,%xmm0 + vpxor %xmm11,%xmm12,%xmm4 + + vpxor %xmm1,%xmm10,%xmm10 + vpaddd %xmm7,%xmm6,%xmm6 + + vpsrld $13,%xmm11,%xmm1 + + vpslld $19,%xmm11,%xmm2 + vpaddd %xmm0,%xmm6,%xmm6 + vpand %xmm4,%xmm3,%xmm3 + + vpxor %xmm1,%xmm10,%xmm7 + + vpsrld $22,%xmm11,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $10,%xmm11,%xmm2 + vpxor %xmm3,%xmm12,%xmm10 + vpaddd %xmm6,%xmm14,%xmm14 + + vpxor %xmm1,%xmm7,%xmm7 + vpxor %xmm2,%xmm7,%xmm7 + + vpaddd %xmm6,%xmm10,%xmm10 + vpaddd %xmm7,%xmm10,%xmm10 + vmovdqu 240-128(%rax),%xmm6 + vpaddd 112-128(%rax),%xmm5,%xmm5 + + vpsrld $3,%xmm6,%xmm7 + vpsrld $7,%xmm6,%xmm1 + vpslld $25,%xmm6,%xmm2 + vpxor %xmm1,%xmm7,%xmm7 + vpsrld $18,%xmm6,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $14,%xmm6,%xmm2 + vmovdqu 192-128(%rax),%xmm0 + vpsrld $10,%xmm0,%xmm3 + + vpxor %xmm1,%xmm7,%xmm7 + vpsrld $17,%xmm0,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $15,%xmm0,%xmm2 + vpaddd %xmm7,%xmm5,%xmm5 + vpxor %xmm1,%xmm3,%xmm7 + vpsrld $19,%xmm0,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $13,%xmm0,%xmm2 + vpxor %xmm1,%xmm7,%xmm7 + vpxor %xmm2,%xmm7,%xmm7 + vpaddd %xmm7,%xmm5,%xmm5 + vpsrld $6,%xmm14,%xmm7 + vpslld $26,%xmm14,%xmm2 + vmovdqu %xmm5,224-128(%rax) + vpaddd %xmm9,%xmm5,%xmm5 + + vpsrld $11,%xmm14,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $21,%xmm14,%xmm2 + vpaddd 64(%rbp),%xmm5,%xmm5 + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $25,%xmm14,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $7,%xmm14,%xmm2 + vpandn %xmm8,%xmm14,%xmm0 + vpand %xmm15,%xmm14,%xmm3 + + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $2,%xmm10,%xmm9 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $30,%xmm10,%xmm1 + vpxor %xmm3,%xmm0,%xmm0 + vpxor %xmm10,%xmm11,%xmm3 + + vpxor %xmm1,%xmm9,%xmm9 + vpaddd %xmm7,%xmm5,%xmm5 + + vpsrld $13,%xmm10,%xmm1 + + vpslld $19,%xmm10,%xmm2 + vpaddd %xmm0,%xmm5,%xmm5 + vpand %xmm3,%xmm4,%xmm4 + + vpxor %xmm1,%xmm9,%xmm7 + + vpsrld $22,%xmm10,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $10,%xmm10,%xmm2 + vpxor %xmm4,%xmm11,%xmm9 + vpaddd %xmm5,%xmm13,%xmm13 + + vpxor %xmm1,%xmm7,%xmm7 + vpxor %xmm2,%xmm7,%xmm7 + + vpaddd %xmm5,%xmm9,%xmm9 + vpaddd %xmm7,%xmm9,%xmm9 + vmovdqu 0-128(%rax),%xmm5 + vpaddd 128-128(%rax),%xmm6,%xmm6 + + vpsrld $3,%xmm5,%xmm7 + vpsrld $7,%xmm5,%xmm1 + vpslld $25,%xmm5,%xmm2 + vpxor %xmm1,%xmm7,%xmm7 + vpsrld $18,%xmm5,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $14,%xmm5,%xmm2 + vmovdqu 208-128(%rax),%xmm0 + vpsrld $10,%xmm0,%xmm4 + + vpxor %xmm1,%xmm7,%xmm7 + vpsrld $17,%xmm0,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $15,%xmm0,%xmm2 + vpaddd %xmm7,%xmm6,%xmm6 + vpxor %xmm1,%xmm4,%xmm7 + vpsrld $19,%xmm0,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $13,%xmm0,%xmm2 + vpxor %xmm1,%xmm7,%xmm7 + vpxor %xmm2,%xmm7,%xmm7 + vpaddd %xmm7,%xmm6,%xmm6 + vpsrld $6,%xmm13,%xmm7 + vpslld $26,%xmm13,%xmm2 + vmovdqu %xmm6,240-128(%rax) + vpaddd %xmm8,%xmm6,%xmm6 + + vpsrld $11,%xmm13,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + vpslld $21,%xmm13,%xmm2 + vpaddd 96(%rbp),%xmm6,%xmm6 + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $25,%xmm13,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $7,%xmm13,%xmm2 + vpandn %xmm15,%xmm13,%xmm0 + vpand %xmm14,%xmm13,%xmm4 + + vpxor %xmm1,%xmm7,%xmm7 + + vpsrld $2,%xmm9,%xmm8 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $30,%xmm9,%xmm1 + vpxor %xmm4,%xmm0,%xmm0 + vpxor %xmm9,%xmm10,%xmm4 + + vpxor %xmm1,%xmm8,%xmm8 + vpaddd %xmm7,%xmm6,%xmm6 + + vpsrld $13,%xmm9,%xmm1 + + vpslld $19,%xmm9,%xmm2 + vpaddd %xmm0,%xmm6,%xmm6 + vpand %xmm4,%xmm3,%xmm3 + + vpxor %xmm1,%xmm8,%xmm7 + + vpsrld $22,%xmm9,%xmm1 + vpxor %xmm2,%xmm7,%xmm7 + + vpslld $10,%xmm9,%xmm2 + vpxor %xmm3,%xmm10,%xmm8 + vpaddd %xmm6,%xmm12,%xmm12 + + vpxor %xmm1,%xmm7,%xmm7 + vpxor %xmm2,%xmm7,%xmm7 + + vpaddd %xmm6,%xmm8,%xmm8 + vpaddd %xmm7,%xmm8,%xmm8 + addq $256,%rbp + decl %ecx + jnz L$oop_16_xx_avx + + movl $1,%ecx + leaq K256+128(%rip),%rbp + cmpl 0(%rbx),%ecx + cmovgeq %rbp,%r8 + cmpl 4(%rbx),%ecx + cmovgeq %rbp,%r9 + cmpl 8(%rbx),%ecx + cmovgeq %rbp,%r10 + cmpl 12(%rbx),%ecx + cmovgeq %rbp,%r11 + vmovdqa (%rbx),%xmm7 + vpxor %xmm0,%xmm0,%xmm0 + vmovdqa %xmm7,%xmm6 + vpcmpgtd %xmm0,%xmm6,%xmm6 + vpaddd %xmm6,%xmm7,%xmm7 + + vmovdqu 0-128(%rdi),%xmm0 + vpand %xmm6,%xmm8,%xmm8 + vmovdqu 32-128(%rdi),%xmm1 + vpand %xmm6,%xmm9,%xmm9 + vmovdqu 64-128(%rdi),%xmm2 + vpand %xmm6,%xmm10,%xmm10 + vmovdqu 96-128(%rdi),%xmm5 + vpand %xmm6,%xmm11,%xmm11 + vpaddd %xmm0,%xmm8,%xmm8 + vmovdqu 128-128(%rdi),%xmm0 + vpand %xmm6,%xmm12,%xmm12 + vpaddd %xmm1,%xmm9,%xmm9 + vmovdqu 160-128(%rdi),%xmm1 + vpand %xmm6,%xmm13,%xmm13 + vpaddd %xmm2,%xmm10,%xmm10 + vmovdqu 192-128(%rdi),%xmm2 + vpand %xmm6,%xmm14,%xmm14 + vpaddd %xmm5,%xmm11,%xmm11 + vmovdqu 224-128(%rdi),%xmm5 + vpand %xmm6,%xmm15,%xmm15 + vpaddd %xmm0,%xmm12,%xmm12 + vpaddd %xmm1,%xmm13,%xmm13 + vmovdqu %xmm8,0-128(%rdi) + vpaddd %xmm2,%xmm14,%xmm14 + vmovdqu %xmm9,32-128(%rdi) + vpaddd %xmm5,%xmm15,%xmm15 + vmovdqu %xmm10,64-128(%rdi) + vmovdqu %xmm11,96-128(%rdi) + vmovdqu %xmm12,128-128(%rdi) + vmovdqu %xmm13,160-128(%rdi) + vmovdqu %xmm14,192-128(%rdi) + vmovdqu %xmm15,224-128(%rdi) + + vmovdqu %xmm7,(%rbx) + vmovdqu L$pbswap(%rip),%xmm6 + decl %edx + jnz L$oop_avx + + movl 280(%rsp),%edx + leaq 16(%rdi),%rdi + leaq 64(%rsi),%rsi + decl %edx + jnz L$oop_grande_avx + +L$done_avx: + movq 272(%rsp),%rax + vzeroupper + movq -16(%rax),%rbp + movq -8(%rax),%rbx + leaq (%rax),%rsp +L$epilogue_avx: + .byte 0xf3,0xc3 + + +.p2align 5 +sha256_multi_block_avx2: +_avx2_shortcut: + movq %rsp,%rax + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + subq $576,%rsp + andq $-256,%rsp + movq %rax,544(%rsp) +L$body_avx2: + leaq K256+128(%rip),%rbp + leaq 128(%rdi),%rdi + +L$oop_grande_avx2: + movl %edx,552(%rsp) + xorl %edx,%edx + leaq 512(%rsp),%rbx + movq 0(%rsi),%r12 + movl 8(%rsi),%ecx + cmpl %edx,%ecx + cmovgl %ecx,%edx + testl %ecx,%ecx + movl %ecx,0(%rbx) + cmovleq %rbp,%r12 + movq 16(%rsi),%r13 + movl 24(%rsi),%ecx + cmpl %edx,%ecx + cmovgl %ecx,%edx + testl %ecx,%ecx + movl %ecx,4(%rbx) + cmovleq %rbp,%r13 + movq 32(%rsi),%r14 + movl 40(%rsi),%ecx + cmpl %edx,%ecx + cmovgl %ecx,%edx + testl %ecx,%ecx + movl %ecx,8(%rbx) + cmovleq %rbp,%r14 + movq 48(%rsi),%r15 + movl 56(%rsi),%ecx + cmpl %edx,%ecx + cmovgl %ecx,%edx + testl %ecx,%ecx + movl %ecx,12(%rbx) + cmovleq %rbp,%r15 + movq 64(%rsi),%r8 + movl 72(%rsi),%ecx + cmpl %edx,%ecx + cmovgl %ecx,%edx + testl %ecx,%ecx + movl %ecx,16(%rbx) + cmovleq %rbp,%r8 + movq 80(%rsi),%r9 + movl 88(%rsi),%ecx + cmpl %edx,%ecx + cmovgl %ecx,%edx + testl %ecx,%ecx + movl %ecx,20(%rbx) + cmovleq %rbp,%r9 + movq 96(%rsi),%r10 + movl 104(%rsi),%ecx + cmpl %edx,%ecx + cmovgl %ecx,%edx + testl %ecx,%ecx + movl %ecx,24(%rbx) + cmovleq %rbp,%r10 + movq 112(%rsi),%r11 + movl 120(%rsi),%ecx + cmpl %edx,%ecx + cmovgl %ecx,%edx + testl %ecx,%ecx + movl %ecx,28(%rbx) + cmovleq %rbp,%r11 + vmovdqu 0-128(%rdi),%ymm8 + leaq 128(%rsp),%rax + vmovdqu 32-128(%rdi),%ymm9 + leaq 256+128(%rsp),%rbx + vmovdqu 64-128(%rdi),%ymm10 + vmovdqu 96-128(%rdi),%ymm11 + vmovdqu 128-128(%rdi),%ymm12 + vmovdqu 160-128(%rdi),%ymm13 + vmovdqu 192-128(%rdi),%ymm14 + vmovdqu 224-128(%rdi),%ymm15 + vmovdqu L$pbswap(%rip),%ymm6 + jmp L$oop_avx2 + +.p2align 5 +L$oop_avx2: + vpxor %ymm9,%ymm10,%ymm4 + vmovd 0(%r12),%xmm5 + vmovd 0(%r8),%xmm0 + vmovd 0(%r13),%xmm1 + vmovd 0(%r9),%xmm2 + vpinsrd $1,0(%r14),%xmm5,%xmm5 + vpinsrd $1,0(%r10),%xmm0,%xmm0 + vpinsrd $1,0(%r15),%xmm1,%xmm1 + vpunpckldq %ymm1,%ymm5,%ymm5 + vpinsrd $1,0(%r11),%xmm2,%xmm2 + vpunpckldq %ymm2,%ymm0,%ymm0 + vinserti128 $1,%xmm0,%ymm5,%ymm5 + vpshufb %ymm6,%ymm5,%ymm5 + vpsrld $6,%ymm12,%ymm7 + vpslld $26,%ymm12,%ymm2 + vmovdqu %ymm5,0-128(%rax) + vpaddd %ymm15,%ymm5,%ymm5 + + vpsrld $11,%ymm12,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $21,%ymm12,%ymm2 + vpaddd -128(%rbp),%ymm5,%ymm5 + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $25,%ymm12,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $7,%ymm12,%ymm2 + vpandn %ymm14,%ymm12,%ymm0 + vpand %ymm13,%ymm12,%ymm3 + + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $2,%ymm8,%ymm15 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $30,%ymm8,%ymm1 + vpxor %ymm3,%ymm0,%ymm0 + vpxor %ymm8,%ymm9,%ymm3 + + vpxor %ymm1,%ymm15,%ymm15 + vpaddd %ymm7,%ymm5,%ymm5 + + vpsrld $13,%ymm8,%ymm1 + + vpslld $19,%ymm8,%ymm2 + vpaddd %ymm0,%ymm5,%ymm5 + vpand %ymm3,%ymm4,%ymm4 + + vpxor %ymm1,%ymm15,%ymm7 + + vpsrld $22,%ymm8,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $10,%ymm8,%ymm2 + vpxor %ymm4,%ymm9,%ymm15 + vpaddd %ymm5,%ymm11,%ymm11 + + vpxor %ymm1,%ymm7,%ymm7 + vpxor %ymm2,%ymm7,%ymm7 + + vpaddd %ymm5,%ymm15,%ymm15 + vpaddd %ymm7,%ymm15,%ymm15 + vmovd 4(%r12),%xmm5 + vmovd 4(%r8),%xmm0 + vmovd 4(%r13),%xmm1 + vmovd 4(%r9),%xmm2 + vpinsrd $1,4(%r14),%xmm5,%xmm5 + vpinsrd $1,4(%r10),%xmm0,%xmm0 + vpinsrd $1,4(%r15),%xmm1,%xmm1 + vpunpckldq %ymm1,%ymm5,%ymm5 + vpinsrd $1,4(%r11),%xmm2,%xmm2 + vpunpckldq %ymm2,%ymm0,%ymm0 + vinserti128 $1,%xmm0,%ymm5,%ymm5 + vpshufb %ymm6,%ymm5,%ymm5 + vpsrld $6,%ymm11,%ymm7 + vpslld $26,%ymm11,%ymm2 + vmovdqu %ymm5,32-128(%rax) + vpaddd %ymm14,%ymm5,%ymm5 + + vpsrld $11,%ymm11,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $21,%ymm11,%ymm2 + vpaddd -96(%rbp),%ymm5,%ymm5 + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $25,%ymm11,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $7,%ymm11,%ymm2 + vpandn %ymm13,%ymm11,%ymm0 + vpand %ymm12,%ymm11,%ymm4 + + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $2,%ymm15,%ymm14 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $30,%ymm15,%ymm1 + vpxor %ymm4,%ymm0,%ymm0 + vpxor %ymm15,%ymm8,%ymm4 + + vpxor %ymm1,%ymm14,%ymm14 + vpaddd %ymm7,%ymm5,%ymm5 + + vpsrld $13,%ymm15,%ymm1 + + vpslld $19,%ymm15,%ymm2 + vpaddd %ymm0,%ymm5,%ymm5 + vpand %ymm4,%ymm3,%ymm3 + + vpxor %ymm1,%ymm14,%ymm7 + + vpsrld $22,%ymm15,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $10,%ymm15,%ymm2 + vpxor %ymm3,%ymm8,%ymm14 + vpaddd %ymm5,%ymm10,%ymm10 + + vpxor %ymm1,%ymm7,%ymm7 + vpxor %ymm2,%ymm7,%ymm7 + + vpaddd %ymm5,%ymm14,%ymm14 + vpaddd %ymm7,%ymm14,%ymm14 + vmovd 8(%r12),%xmm5 + vmovd 8(%r8),%xmm0 + vmovd 8(%r13),%xmm1 + vmovd 8(%r9),%xmm2 + vpinsrd $1,8(%r14),%xmm5,%xmm5 + vpinsrd $1,8(%r10),%xmm0,%xmm0 + vpinsrd $1,8(%r15),%xmm1,%xmm1 + vpunpckldq %ymm1,%ymm5,%ymm5 + vpinsrd $1,8(%r11),%xmm2,%xmm2 + vpunpckldq %ymm2,%ymm0,%ymm0 + vinserti128 $1,%xmm0,%ymm5,%ymm5 + vpshufb %ymm6,%ymm5,%ymm5 + vpsrld $6,%ymm10,%ymm7 + vpslld $26,%ymm10,%ymm2 + vmovdqu %ymm5,64-128(%rax) + vpaddd %ymm13,%ymm5,%ymm5 + + vpsrld $11,%ymm10,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $21,%ymm10,%ymm2 + vpaddd -64(%rbp),%ymm5,%ymm5 + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $25,%ymm10,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $7,%ymm10,%ymm2 + vpandn %ymm12,%ymm10,%ymm0 + vpand %ymm11,%ymm10,%ymm3 + + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $2,%ymm14,%ymm13 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $30,%ymm14,%ymm1 + vpxor %ymm3,%ymm0,%ymm0 + vpxor %ymm14,%ymm15,%ymm3 + + vpxor %ymm1,%ymm13,%ymm13 + vpaddd %ymm7,%ymm5,%ymm5 + + vpsrld $13,%ymm14,%ymm1 + + vpslld $19,%ymm14,%ymm2 + vpaddd %ymm0,%ymm5,%ymm5 + vpand %ymm3,%ymm4,%ymm4 + + vpxor %ymm1,%ymm13,%ymm7 + + vpsrld $22,%ymm14,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $10,%ymm14,%ymm2 + vpxor %ymm4,%ymm15,%ymm13 + vpaddd %ymm5,%ymm9,%ymm9 + + vpxor %ymm1,%ymm7,%ymm7 + vpxor %ymm2,%ymm7,%ymm7 + + vpaddd %ymm5,%ymm13,%ymm13 + vpaddd %ymm7,%ymm13,%ymm13 + vmovd 12(%r12),%xmm5 + vmovd 12(%r8),%xmm0 + vmovd 12(%r13),%xmm1 + vmovd 12(%r9),%xmm2 + vpinsrd $1,12(%r14),%xmm5,%xmm5 + vpinsrd $1,12(%r10),%xmm0,%xmm0 + vpinsrd $1,12(%r15),%xmm1,%xmm1 + vpunpckldq %ymm1,%ymm5,%ymm5 + vpinsrd $1,12(%r11),%xmm2,%xmm2 + vpunpckldq %ymm2,%ymm0,%ymm0 + vinserti128 $1,%xmm0,%ymm5,%ymm5 + vpshufb %ymm6,%ymm5,%ymm5 + vpsrld $6,%ymm9,%ymm7 + vpslld $26,%ymm9,%ymm2 + vmovdqu %ymm5,96-128(%rax) + vpaddd %ymm12,%ymm5,%ymm5 + + vpsrld $11,%ymm9,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $21,%ymm9,%ymm2 + vpaddd -32(%rbp),%ymm5,%ymm5 + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $25,%ymm9,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $7,%ymm9,%ymm2 + vpandn %ymm11,%ymm9,%ymm0 + vpand %ymm10,%ymm9,%ymm4 + + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $2,%ymm13,%ymm12 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $30,%ymm13,%ymm1 + vpxor %ymm4,%ymm0,%ymm0 + vpxor %ymm13,%ymm14,%ymm4 + + vpxor %ymm1,%ymm12,%ymm12 + vpaddd %ymm7,%ymm5,%ymm5 + + vpsrld $13,%ymm13,%ymm1 + + vpslld $19,%ymm13,%ymm2 + vpaddd %ymm0,%ymm5,%ymm5 + vpand %ymm4,%ymm3,%ymm3 + + vpxor %ymm1,%ymm12,%ymm7 + + vpsrld $22,%ymm13,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $10,%ymm13,%ymm2 + vpxor %ymm3,%ymm14,%ymm12 + vpaddd %ymm5,%ymm8,%ymm8 + + vpxor %ymm1,%ymm7,%ymm7 + vpxor %ymm2,%ymm7,%ymm7 + + vpaddd %ymm5,%ymm12,%ymm12 + vpaddd %ymm7,%ymm12,%ymm12 + vmovd 16(%r12),%xmm5 + vmovd 16(%r8),%xmm0 + vmovd 16(%r13),%xmm1 + vmovd 16(%r9),%xmm2 + vpinsrd $1,16(%r14),%xmm5,%xmm5 + vpinsrd $1,16(%r10),%xmm0,%xmm0 + vpinsrd $1,16(%r15),%xmm1,%xmm1 + vpunpckldq %ymm1,%ymm5,%ymm5 + vpinsrd $1,16(%r11),%xmm2,%xmm2 + vpunpckldq %ymm2,%ymm0,%ymm0 + vinserti128 $1,%xmm0,%ymm5,%ymm5 + vpshufb %ymm6,%ymm5,%ymm5 + vpsrld $6,%ymm8,%ymm7 + vpslld $26,%ymm8,%ymm2 + vmovdqu %ymm5,128-128(%rax) + vpaddd %ymm11,%ymm5,%ymm5 + + vpsrld $11,%ymm8,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $21,%ymm8,%ymm2 + vpaddd 0(%rbp),%ymm5,%ymm5 + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $25,%ymm8,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $7,%ymm8,%ymm2 + vpandn %ymm10,%ymm8,%ymm0 + vpand %ymm9,%ymm8,%ymm3 + + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $2,%ymm12,%ymm11 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $30,%ymm12,%ymm1 + vpxor %ymm3,%ymm0,%ymm0 + vpxor %ymm12,%ymm13,%ymm3 + + vpxor %ymm1,%ymm11,%ymm11 + vpaddd %ymm7,%ymm5,%ymm5 + + vpsrld $13,%ymm12,%ymm1 + + vpslld $19,%ymm12,%ymm2 + vpaddd %ymm0,%ymm5,%ymm5 + vpand %ymm3,%ymm4,%ymm4 + + vpxor %ymm1,%ymm11,%ymm7 + + vpsrld $22,%ymm12,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $10,%ymm12,%ymm2 + vpxor %ymm4,%ymm13,%ymm11 + vpaddd %ymm5,%ymm15,%ymm15 + + vpxor %ymm1,%ymm7,%ymm7 + vpxor %ymm2,%ymm7,%ymm7 + + vpaddd %ymm5,%ymm11,%ymm11 + vpaddd %ymm7,%ymm11,%ymm11 + vmovd 20(%r12),%xmm5 + vmovd 20(%r8),%xmm0 + vmovd 20(%r13),%xmm1 + vmovd 20(%r9),%xmm2 + vpinsrd $1,20(%r14),%xmm5,%xmm5 + vpinsrd $1,20(%r10),%xmm0,%xmm0 + vpinsrd $1,20(%r15),%xmm1,%xmm1 + vpunpckldq %ymm1,%ymm5,%ymm5 + vpinsrd $1,20(%r11),%xmm2,%xmm2 + vpunpckldq %ymm2,%ymm0,%ymm0 + vinserti128 $1,%xmm0,%ymm5,%ymm5 + vpshufb %ymm6,%ymm5,%ymm5 + vpsrld $6,%ymm15,%ymm7 + vpslld $26,%ymm15,%ymm2 + vmovdqu %ymm5,160-128(%rax) + vpaddd %ymm10,%ymm5,%ymm5 + + vpsrld $11,%ymm15,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $21,%ymm15,%ymm2 + vpaddd 32(%rbp),%ymm5,%ymm5 + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $25,%ymm15,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $7,%ymm15,%ymm2 + vpandn %ymm9,%ymm15,%ymm0 + vpand %ymm8,%ymm15,%ymm4 + + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $2,%ymm11,%ymm10 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $30,%ymm11,%ymm1 + vpxor %ymm4,%ymm0,%ymm0 + vpxor %ymm11,%ymm12,%ymm4 + + vpxor %ymm1,%ymm10,%ymm10 + vpaddd %ymm7,%ymm5,%ymm5 + + vpsrld $13,%ymm11,%ymm1 + + vpslld $19,%ymm11,%ymm2 + vpaddd %ymm0,%ymm5,%ymm5 + vpand %ymm4,%ymm3,%ymm3 + + vpxor %ymm1,%ymm10,%ymm7 + + vpsrld $22,%ymm11,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $10,%ymm11,%ymm2 + vpxor %ymm3,%ymm12,%ymm10 + vpaddd %ymm5,%ymm14,%ymm14 + + vpxor %ymm1,%ymm7,%ymm7 + vpxor %ymm2,%ymm7,%ymm7 + + vpaddd %ymm5,%ymm10,%ymm10 + vpaddd %ymm7,%ymm10,%ymm10 + vmovd 24(%r12),%xmm5 + vmovd 24(%r8),%xmm0 + vmovd 24(%r13),%xmm1 + vmovd 24(%r9),%xmm2 + vpinsrd $1,24(%r14),%xmm5,%xmm5 + vpinsrd $1,24(%r10),%xmm0,%xmm0 + vpinsrd $1,24(%r15),%xmm1,%xmm1 + vpunpckldq %ymm1,%ymm5,%ymm5 + vpinsrd $1,24(%r11),%xmm2,%xmm2 + vpunpckldq %ymm2,%ymm0,%ymm0 + vinserti128 $1,%xmm0,%ymm5,%ymm5 + vpshufb %ymm6,%ymm5,%ymm5 + vpsrld $6,%ymm14,%ymm7 + vpslld $26,%ymm14,%ymm2 + vmovdqu %ymm5,192-128(%rax) + vpaddd %ymm9,%ymm5,%ymm5 + + vpsrld $11,%ymm14,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $21,%ymm14,%ymm2 + vpaddd 64(%rbp),%ymm5,%ymm5 + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $25,%ymm14,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $7,%ymm14,%ymm2 + vpandn %ymm8,%ymm14,%ymm0 + vpand %ymm15,%ymm14,%ymm3 + + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $2,%ymm10,%ymm9 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $30,%ymm10,%ymm1 + vpxor %ymm3,%ymm0,%ymm0 + vpxor %ymm10,%ymm11,%ymm3 + + vpxor %ymm1,%ymm9,%ymm9 + vpaddd %ymm7,%ymm5,%ymm5 + + vpsrld $13,%ymm10,%ymm1 + + vpslld $19,%ymm10,%ymm2 + vpaddd %ymm0,%ymm5,%ymm5 + vpand %ymm3,%ymm4,%ymm4 + + vpxor %ymm1,%ymm9,%ymm7 + + vpsrld $22,%ymm10,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $10,%ymm10,%ymm2 + vpxor %ymm4,%ymm11,%ymm9 + vpaddd %ymm5,%ymm13,%ymm13 + + vpxor %ymm1,%ymm7,%ymm7 + vpxor %ymm2,%ymm7,%ymm7 + + vpaddd %ymm5,%ymm9,%ymm9 + vpaddd %ymm7,%ymm9,%ymm9 + vmovd 28(%r12),%xmm5 + vmovd 28(%r8),%xmm0 + vmovd 28(%r13),%xmm1 + vmovd 28(%r9),%xmm2 + vpinsrd $1,28(%r14),%xmm5,%xmm5 + vpinsrd $1,28(%r10),%xmm0,%xmm0 + vpinsrd $1,28(%r15),%xmm1,%xmm1 + vpunpckldq %ymm1,%ymm5,%ymm5 + vpinsrd $1,28(%r11),%xmm2,%xmm2 + vpunpckldq %ymm2,%ymm0,%ymm0 + vinserti128 $1,%xmm0,%ymm5,%ymm5 + vpshufb %ymm6,%ymm5,%ymm5 + vpsrld $6,%ymm13,%ymm7 + vpslld $26,%ymm13,%ymm2 + vmovdqu %ymm5,224-128(%rax) + vpaddd %ymm8,%ymm5,%ymm5 + + vpsrld $11,%ymm13,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $21,%ymm13,%ymm2 + vpaddd 96(%rbp),%ymm5,%ymm5 + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $25,%ymm13,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $7,%ymm13,%ymm2 + vpandn %ymm15,%ymm13,%ymm0 + vpand %ymm14,%ymm13,%ymm4 + + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $2,%ymm9,%ymm8 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $30,%ymm9,%ymm1 + vpxor %ymm4,%ymm0,%ymm0 + vpxor %ymm9,%ymm10,%ymm4 + + vpxor %ymm1,%ymm8,%ymm8 + vpaddd %ymm7,%ymm5,%ymm5 + + vpsrld $13,%ymm9,%ymm1 + + vpslld $19,%ymm9,%ymm2 + vpaddd %ymm0,%ymm5,%ymm5 + vpand %ymm4,%ymm3,%ymm3 + + vpxor %ymm1,%ymm8,%ymm7 + + vpsrld $22,%ymm9,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $10,%ymm9,%ymm2 + vpxor %ymm3,%ymm10,%ymm8 + vpaddd %ymm5,%ymm12,%ymm12 + + vpxor %ymm1,%ymm7,%ymm7 + vpxor %ymm2,%ymm7,%ymm7 + + vpaddd %ymm5,%ymm8,%ymm8 + vpaddd %ymm7,%ymm8,%ymm8 + addq $256,%rbp + vmovd 32(%r12),%xmm5 + vmovd 32(%r8),%xmm0 + vmovd 32(%r13),%xmm1 + vmovd 32(%r9),%xmm2 + vpinsrd $1,32(%r14),%xmm5,%xmm5 + vpinsrd $1,32(%r10),%xmm0,%xmm0 + vpinsrd $1,32(%r15),%xmm1,%xmm1 + vpunpckldq %ymm1,%ymm5,%ymm5 + vpinsrd $1,32(%r11),%xmm2,%xmm2 + vpunpckldq %ymm2,%ymm0,%ymm0 + vinserti128 $1,%xmm0,%ymm5,%ymm5 + vpshufb %ymm6,%ymm5,%ymm5 + vpsrld $6,%ymm12,%ymm7 + vpslld $26,%ymm12,%ymm2 + vmovdqu %ymm5,256-256-128(%rbx) + vpaddd %ymm15,%ymm5,%ymm5 + + vpsrld $11,%ymm12,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $21,%ymm12,%ymm2 + vpaddd -128(%rbp),%ymm5,%ymm5 + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $25,%ymm12,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $7,%ymm12,%ymm2 + vpandn %ymm14,%ymm12,%ymm0 + vpand %ymm13,%ymm12,%ymm3 + + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $2,%ymm8,%ymm15 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $30,%ymm8,%ymm1 + vpxor %ymm3,%ymm0,%ymm0 + vpxor %ymm8,%ymm9,%ymm3 + + vpxor %ymm1,%ymm15,%ymm15 + vpaddd %ymm7,%ymm5,%ymm5 + + vpsrld $13,%ymm8,%ymm1 + + vpslld $19,%ymm8,%ymm2 + vpaddd %ymm0,%ymm5,%ymm5 + vpand %ymm3,%ymm4,%ymm4 + + vpxor %ymm1,%ymm15,%ymm7 + + vpsrld $22,%ymm8,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $10,%ymm8,%ymm2 + vpxor %ymm4,%ymm9,%ymm15 + vpaddd %ymm5,%ymm11,%ymm11 + + vpxor %ymm1,%ymm7,%ymm7 + vpxor %ymm2,%ymm7,%ymm7 + + vpaddd %ymm5,%ymm15,%ymm15 + vpaddd %ymm7,%ymm15,%ymm15 + vmovd 36(%r12),%xmm5 + vmovd 36(%r8),%xmm0 + vmovd 36(%r13),%xmm1 + vmovd 36(%r9),%xmm2 + vpinsrd $1,36(%r14),%xmm5,%xmm5 + vpinsrd $1,36(%r10),%xmm0,%xmm0 + vpinsrd $1,36(%r15),%xmm1,%xmm1 + vpunpckldq %ymm1,%ymm5,%ymm5 + vpinsrd $1,36(%r11),%xmm2,%xmm2 + vpunpckldq %ymm2,%ymm0,%ymm0 + vinserti128 $1,%xmm0,%ymm5,%ymm5 + vpshufb %ymm6,%ymm5,%ymm5 + vpsrld $6,%ymm11,%ymm7 + vpslld $26,%ymm11,%ymm2 + vmovdqu %ymm5,288-256-128(%rbx) + vpaddd %ymm14,%ymm5,%ymm5 + + vpsrld $11,%ymm11,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $21,%ymm11,%ymm2 + vpaddd -96(%rbp),%ymm5,%ymm5 + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $25,%ymm11,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $7,%ymm11,%ymm2 + vpandn %ymm13,%ymm11,%ymm0 + vpand %ymm12,%ymm11,%ymm4 + + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $2,%ymm15,%ymm14 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $30,%ymm15,%ymm1 + vpxor %ymm4,%ymm0,%ymm0 + vpxor %ymm15,%ymm8,%ymm4 + + vpxor %ymm1,%ymm14,%ymm14 + vpaddd %ymm7,%ymm5,%ymm5 + + vpsrld $13,%ymm15,%ymm1 + + vpslld $19,%ymm15,%ymm2 + vpaddd %ymm0,%ymm5,%ymm5 + vpand %ymm4,%ymm3,%ymm3 + + vpxor %ymm1,%ymm14,%ymm7 + + vpsrld $22,%ymm15,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $10,%ymm15,%ymm2 + vpxor %ymm3,%ymm8,%ymm14 + vpaddd %ymm5,%ymm10,%ymm10 + + vpxor %ymm1,%ymm7,%ymm7 + vpxor %ymm2,%ymm7,%ymm7 + + vpaddd %ymm5,%ymm14,%ymm14 + vpaddd %ymm7,%ymm14,%ymm14 + vmovd 40(%r12),%xmm5 + vmovd 40(%r8),%xmm0 + vmovd 40(%r13),%xmm1 + vmovd 40(%r9),%xmm2 + vpinsrd $1,40(%r14),%xmm5,%xmm5 + vpinsrd $1,40(%r10),%xmm0,%xmm0 + vpinsrd $1,40(%r15),%xmm1,%xmm1 + vpunpckldq %ymm1,%ymm5,%ymm5 + vpinsrd $1,40(%r11),%xmm2,%xmm2 + vpunpckldq %ymm2,%ymm0,%ymm0 + vinserti128 $1,%xmm0,%ymm5,%ymm5 + vpshufb %ymm6,%ymm5,%ymm5 + vpsrld $6,%ymm10,%ymm7 + vpslld $26,%ymm10,%ymm2 + vmovdqu %ymm5,320-256-128(%rbx) + vpaddd %ymm13,%ymm5,%ymm5 + + vpsrld $11,%ymm10,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $21,%ymm10,%ymm2 + vpaddd -64(%rbp),%ymm5,%ymm5 + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $25,%ymm10,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $7,%ymm10,%ymm2 + vpandn %ymm12,%ymm10,%ymm0 + vpand %ymm11,%ymm10,%ymm3 + + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $2,%ymm14,%ymm13 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $30,%ymm14,%ymm1 + vpxor %ymm3,%ymm0,%ymm0 + vpxor %ymm14,%ymm15,%ymm3 + + vpxor %ymm1,%ymm13,%ymm13 + vpaddd %ymm7,%ymm5,%ymm5 + + vpsrld $13,%ymm14,%ymm1 + + vpslld $19,%ymm14,%ymm2 + vpaddd %ymm0,%ymm5,%ymm5 + vpand %ymm3,%ymm4,%ymm4 + + vpxor %ymm1,%ymm13,%ymm7 + + vpsrld $22,%ymm14,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $10,%ymm14,%ymm2 + vpxor %ymm4,%ymm15,%ymm13 + vpaddd %ymm5,%ymm9,%ymm9 + + vpxor %ymm1,%ymm7,%ymm7 + vpxor %ymm2,%ymm7,%ymm7 + + vpaddd %ymm5,%ymm13,%ymm13 + vpaddd %ymm7,%ymm13,%ymm13 + vmovd 44(%r12),%xmm5 + vmovd 44(%r8),%xmm0 + vmovd 44(%r13),%xmm1 + vmovd 44(%r9),%xmm2 + vpinsrd $1,44(%r14),%xmm5,%xmm5 + vpinsrd $1,44(%r10),%xmm0,%xmm0 + vpinsrd $1,44(%r15),%xmm1,%xmm1 + vpunpckldq %ymm1,%ymm5,%ymm5 + vpinsrd $1,44(%r11),%xmm2,%xmm2 + vpunpckldq %ymm2,%ymm0,%ymm0 + vinserti128 $1,%xmm0,%ymm5,%ymm5 + vpshufb %ymm6,%ymm5,%ymm5 + vpsrld $6,%ymm9,%ymm7 + vpslld $26,%ymm9,%ymm2 + vmovdqu %ymm5,352-256-128(%rbx) + vpaddd %ymm12,%ymm5,%ymm5 + + vpsrld $11,%ymm9,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $21,%ymm9,%ymm2 + vpaddd -32(%rbp),%ymm5,%ymm5 + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $25,%ymm9,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $7,%ymm9,%ymm2 + vpandn %ymm11,%ymm9,%ymm0 + vpand %ymm10,%ymm9,%ymm4 + + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $2,%ymm13,%ymm12 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $30,%ymm13,%ymm1 + vpxor %ymm4,%ymm0,%ymm0 + vpxor %ymm13,%ymm14,%ymm4 + + vpxor %ymm1,%ymm12,%ymm12 + vpaddd %ymm7,%ymm5,%ymm5 + + vpsrld $13,%ymm13,%ymm1 + + vpslld $19,%ymm13,%ymm2 + vpaddd %ymm0,%ymm5,%ymm5 + vpand %ymm4,%ymm3,%ymm3 + + vpxor %ymm1,%ymm12,%ymm7 + + vpsrld $22,%ymm13,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $10,%ymm13,%ymm2 + vpxor %ymm3,%ymm14,%ymm12 + vpaddd %ymm5,%ymm8,%ymm8 + + vpxor %ymm1,%ymm7,%ymm7 + vpxor %ymm2,%ymm7,%ymm7 + + vpaddd %ymm5,%ymm12,%ymm12 + vpaddd %ymm7,%ymm12,%ymm12 + vmovd 48(%r12),%xmm5 + vmovd 48(%r8),%xmm0 + vmovd 48(%r13),%xmm1 + vmovd 48(%r9),%xmm2 + vpinsrd $1,48(%r14),%xmm5,%xmm5 + vpinsrd $1,48(%r10),%xmm0,%xmm0 + vpinsrd $1,48(%r15),%xmm1,%xmm1 + vpunpckldq %ymm1,%ymm5,%ymm5 + vpinsrd $1,48(%r11),%xmm2,%xmm2 + vpunpckldq %ymm2,%ymm0,%ymm0 + vinserti128 $1,%xmm0,%ymm5,%ymm5 + vpshufb %ymm6,%ymm5,%ymm5 + vpsrld $6,%ymm8,%ymm7 + vpslld $26,%ymm8,%ymm2 + vmovdqu %ymm5,384-256-128(%rbx) + vpaddd %ymm11,%ymm5,%ymm5 + + vpsrld $11,%ymm8,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $21,%ymm8,%ymm2 + vpaddd 0(%rbp),%ymm5,%ymm5 + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $25,%ymm8,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $7,%ymm8,%ymm2 + vpandn %ymm10,%ymm8,%ymm0 + vpand %ymm9,%ymm8,%ymm3 + + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $2,%ymm12,%ymm11 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $30,%ymm12,%ymm1 + vpxor %ymm3,%ymm0,%ymm0 + vpxor %ymm12,%ymm13,%ymm3 + + vpxor %ymm1,%ymm11,%ymm11 + vpaddd %ymm7,%ymm5,%ymm5 + + vpsrld $13,%ymm12,%ymm1 + + vpslld $19,%ymm12,%ymm2 + vpaddd %ymm0,%ymm5,%ymm5 + vpand %ymm3,%ymm4,%ymm4 + + vpxor %ymm1,%ymm11,%ymm7 + + vpsrld $22,%ymm12,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $10,%ymm12,%ymm2 + vpxor %ymm4,%ymm13,%ymm11 + vpaddd %ymm5,%ymm15,%ymm15 + + vpxor %ymm1,%ymm7,%ymm7 + vpxor %ymm2,%ymm7,%ymm7 + + vpaddd %ymm5,%ymm11,%ymm11 + vpaddd %ymm7,%ymm11,%ymm11 + vmovd 52(%r12),%xmm5 + vmovd 52(%r8),%xmm0 + vmovd 52(%r13),%xmm1 + vmovd 52(%r9),%xmm2 + vpinsrd $1,52(%r14),%xmm5,%xmm5 + vpinsrd $1,52(%r10),%xmm0,%xmm0 + vpinsrd $1,52(%r15),%xmm1,%xmm1 + vpunpckldq %ymm1,%ymm5,%ymm5 + vpinsrd $1,52(%r11),%xmm2,%xmm2 + vpunpckldq %ymm2,%ymm0,%ymm0 + vinserti128 $1,%xmm0,%ymm5,%ymm5 + vpshufb %ymm6,%ymm5,%ymm5 + vpsrld $6,%ymm15,%ymm7 + vpslld $26,%ymm15,%ymm2 + vmovdqu %ymm5,416-256-128(%rbx) + vpaddd %ymm10,%ymm5,%ymm5 + + vpsrld $11,%ymm15,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $21,%ymm15,%ymm2 + vpaddd 32(%rbp),%ymm5,%ymm5 + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $25,%ymm15,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $7,%ymm15,%ymm2 + vpandn %ymm9,%ymm15,%ymm0 + vpand %ymm8,%ymm15,%ymm4 + + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $2,%ymm11,%ymm10 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $30,%ymm11,%ymm1 + vpxor %ymm4,%ymm0,%ymm0 + vpxor %ymm11,%ymm12,%ymm4 + + vpxor %ymm1,%ymm10,%ymm10 + vpaddd %ymm7,%ymm5,%ymm5 + + vpsrld $13,%ymm11,%ymm1 + + vpslld $19,%ymm11,%ymm2 + vpaddd %ymm0,%ymm5,%ymm5 + vpand %ymm4,%ymm3,%ymm3 + + vpxor %ymm1,%ymm10,%ymm7 + + vpsrld $22,%ymm11,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $10,%ymm11,%ymm2 + vpxor %ymm3,%ymm12,%ymm10 + vpaddd %ymm5,%ymm14,%ymm14 + + vpxor %ymm1,%ymm7,%ymm7 + vpxor %ymm2,%ymm7,%ymm7 + + vpaddd %ymm5,%ymm10,%ymm10 + vpaddd %ymm7,%ymm10,%ymm10 + vmovd 56(%r12),%xmm5 + vmovd 56(%r8),%xmm0 + vmovd 56(%r13),%xmm1 + vmovd 56(%r9),%xmm2 + vpinsrd $1,56(%r14),%xmm5,%xmm5 + vpinsrd $1,56(%r10),%xmm0,%xmm0 + vpinsrd $1,56(%r15),%xmm1,%xmm1 + vpunpckldq %ymm1,%ymm5,%ymm5 + vpinsrd $1,56(%r11),%xmm2,%xmm2 + vpunpckldq %ymm2,%ymm0,%ymm0 + vinserti128 $1,%xmm0,%ymm5,%ymm5 + vpshufb %ymm6,%ymm5,%ymm5 + vpsrld $6,%ymm14,%ymm7 + vpslld $26,%ymm14,%ymm2 + vmovdqu %ymm5,448-256-128(%rbx) + vpaddd %ymm9,%ymm5,%ymm5 + + vpsrld $11,%ymm14,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $21,%ymm14,%ymm2 + vpaddd 64(%rbp),%ymm5,%ymm5 + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $25,%ymm14,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $7,%ymm14,%ymm2 + vpandn %ymm8,%ymm14,%ymm0 + vpand %ymm15,%ymm14,%ymm3 + + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $2,%ymm10,%ymm9 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $30,%ymm10,%ymm1 + vpxor %ymm3,%ymm0,%ymm0 + vpxor %ymm10,%ymm11,%ymm3 + + vpxor %ymm1,%ymm9,%ymm9 + vpaddd %ymm7,%ymm5,%ymm5 + + vpsrld $13,%ymm10,%ymm1 + + vpslld $19,%ymm10,%ymm2 + vpaddd %ymm0,%ymm5,%ymm5 + vpand %ymm3,%ymm4,%ymm4 + + vpxor %ymm1,%ymm9,%ymm7 + + vpsrld $22,%ymm10,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $10,%ymm10,%ymm2 + vpxor %ymm4,%ymm11,%ymm9 + vpaddd %ymm5,%ymm13,%ymm13 + + vpxor %ymm1,%ymm7,%ymm7 + vpxor %ymm2,%ymm7,%ymm7 + + vpaddd %ymm5,%ymm9,%ymm9 + vpaddd %ymm7,%ymm9,%ymm9 + vmovd 60(%r12),%xmm5 + leaq 64(%r12),%r12 + vmovd 60(%r8),%xmm0 + leaq 64(%r8),%r8 + vmovd 60(%r13),%xmm1 + leaq 64(%r13),%r13 + vmovd 60(%r9),%xmm2 + leaq 64(%r9),%r9 + vpinsrd $1,60(%r14),%xmm5,%xmm5 + leaq 64(%r14),%r14 + vpinsrd $1,60(%r10),%xmm0,%xmm0 + leaq 64(%r10),%r10 + vpinsrd $1,60(%r15),%xmm1,%xmm1 + leaq 64(%r15),%r15 + vpunpckldq %ymm1,%ymm5,%ymm5 + vpinsrd $1,60(%r11),%xmm2,%xmm2 + leaq 64(%r11),%r11 + vpunpckldq %ymm2,%ymm0,%ymm0 + vinserti128 $1,%xmm0,%ymm5,%ymm5 + vpshufb %ymm6,%ymm5,%ymm5 + vpsrld $6,%ymm13,%ymm7 + vpslld $26,%ymm13,%ymm2 + vmovdqu %ymm5,480-256-128(%rbx) + vpaddd %ymm8,%ymm5,%ymm5 + + vpsrld $11,%ymm13,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $21,%ymm13,%ymm2 + vpaddd 96(%rbp),%ymm5,%ymm5 + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $25,%ymm13,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + prefetcht0 63(%r12) + vpslld $7,%ymm13,%ymm2 + vpandn %ymm15,%ymm13,%ymm0 + vpand %ymm14,%ymm13,%ymm4 + prefetcht0 63(%r13) + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $2,%ymm9,%ymm8 + vpxor %ymm2,%ymm7,%ymm7 + prefetcht0 63(%r14) + vpslld $30,%ymm9,%ymm1 + vpxor %ymm4,%ymm0,%ymm0 + vpxor %ymm9,%ymm10,%ymm4 + prefetcht0 63(%r15) + vpxor %ymm1,%ymm8,%ymm8 + vpaddd %ymm7,%ymm5,%ymm5 + + vpsrld $13,%ymm9,%ymm1 + prefetcht0 63(%r8) + vpslld $19,%ymm9,%ymm2 + vpaddd %ymm0,%ymm5,%ymm5 + vpand %ymm4,%ymm3,%ymm3 + prefetcht0 63(%r9) + vpxor %ymm1,%ymm8,%ymm7 + + vpsrld $22,%ymm9,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + prefetcht0 63(%r10) + vpslld $10,%ymm9,%ymm2 + vpxor %ymm3,%ymm10,%ymm8 + vpaddd %ymm5,%ymm12,%ymm12 + prefetcht0 63(%r11) + vpxor %ymm1,%ymm7,%ymm7 + vpxor %ymm2,%ymm7,%ymm7 + + vpaddd %ymm5,%ymm8,%ymm8 + vpaddd %ymm7,%ymm8,%ymm8 + addq $256,%rbp + vmovdqu 0-128(%rax),%ymm5 + movl $3,%ecx + jmp L$oop_16_xx_avx2 +.p2align 5 +L$oop_16_xx_avx2: + vmovdqu 32-128(%rax),%ymm6 + vpaddd 288-256-128(%rbx),%ymm5,%ymm5 + + vpsrld $3,%ymm6,%ymm7 + vpsrld $7,%ymm6,%ymm1 + vpslld $25,%ymm6,%ymm2 + vpxor %ymm1,%ymm7,%ymm7 + vpsrld $18,%ymm6,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $14,%ymm6,%ymm2 + vmovdqu 448-256-128(%rbx),%ymm0 + vpsrld $10,%ymm0,%ymm3 + + vpxor %ymm1,%ymm7,%ymm7 + vpsrld $17,%ymm0,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $15,%ymm0,%ymm2 + vpaddd %ymm7,%ymm5,%ymm5 + vpxor %ymm1,%ymm3,%ymm7 + vpsrld $19,%ymm0,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $13,%ymm0,%ymm2 + vpxor %ymm1,%ymm7,%ymm7 + vpxor %ymm2,%ymm7,%ymm7 + vpaddd %ymm7,%ymm5,%ymm5 + vpsrld $6,%ymm12,%ymm7 + vpslld $26,%ymm12,%ymm2 + vmovdqu %ymm5,0-128(%rax) + vpaddd %ymm15,%ymm5,%ymm5 + + vpsrld $11,%ymm12,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $21,%ymm12,%ymm2 + vpaddd -128(%rbp),%ymm5,%ymm5 + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $25,%ymm12,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $7,%ymm12,%ymm2 + vpandn %ymm14,%ymm12,%ymm0 + vpand %ymm13,%ymm12,%ymm3 + + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $2,%ymm8,%ymm15 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $30,%ymm8,%ymm1 + vpxor %ymm3,%ymm0,%ymm0 + vpxor %ymm8,%ymm9,%ymm3 + + vpxor %ymm1,%ymm15,%ymm15 + vpaddd %ymm7,%ymm5,%ymm5 + + vpsrld $13,%ymm8,%ymm1 + + vpslld $19,%ymm8,%ymm2 + vpaddd %ymm0,%ymm5,%ymm5 + vpand %ymm3,%ymm4,%ymm4 + + vpxor %ymm1,%ymm15,%ymm7 + + vpsrld $22,%ymm8,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $10,%ymm8,%ymm2 + vpxor %ymm4,%ymm9,%ymm15 + vpaddd %ymm5,%ymm11,%ymm11 + + vpxor %ymm1,%ymm7,%ymm7 + vpxor %ymm2,%ymm7,%ymm7 + + vpaddd %ymm5,%ymm15,%ymm15 + vpaddd %ymm7,%ymm15,%ymm15 + vmovdqu 64-128(%rax),%ymm5 + vpaddd 320-256-128(%rbx),%ymm6,%ymm6 + + vpsrld $3,%ymm5,%ymm7 + vpsrld $7,%ymm5,%ymm1 + vpslld $25,%ymm5,%ymm2 + vpxor %ymm1,%ymm7,%ymm7 + vpsrld $18,%ymm5,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $14,%ymm5,%ymm2 + vmovdqu 480-256-128(%rbx),%ymm0 + vpsrld $10,%ymm0,%ymm4 + + vpxor %ymm1,%ymm7,%ymm7 + vpsrld $17,%ymm0,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $15,%ymm0,%ymm2 + vpaddd %ymm7,%ymm6,%ymm6 + vpxor %ymm1,%ymm4,%ymm7 + vpsrld $19,%ymm0,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $13,%ymm0,%ymm2 + vpxor %ymm1,%ymm7,%ymm7 + vpxor %ymm2,%ymm7,%ymm7 + vpaddd %ymm7,%ymm6,%ymm6 + vpsrld $6,%ymm11,%ymm7 + vpslld $26,%ymm11,%ymm2 + vmovdqu %ymm6,32-128(%rax) + vpaddd %ymm14,%ymm6,%ymm6 + + vpsrld $11,%ymm11,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $21,%ymm11,%ymm2 + vpaddd -96(%rbp),%ymm6,%ymm6 + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $25,%ymm11,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $7,%ymm11,%ymm2 + vpandn %ymm13,%ymm11,%ymm0 + vpand %ymm12,%ymm11,%ymm4 + + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $2,%ymm15,%ymm14 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $30,%ymm15,%ymm1 + vpxor %ymm4,%ymm0,%ymm0 + vpxor %ymm15,%ymm8,%ymm4 + + vpxor %ymm1,%ymm14,%ymm14 + vpaddd %ymm7,%ymm6,%ymm6 + + vpsrld $13,%ymm15,%ymm1 + + vpslld $19,%ymm15,%ymm2 + vpaddd %ymm0,%ymm6,%ymm6 + vpand %ymm4,%ymm3,%ymm3 + + vpxor %ymm1,%ymm14,%ymm7 + + vpsrld $22,%ymm15,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $10,%ymm15,%ymm2 + vpxor %ymm3,%ymm8,%ymm14 + vpaddd %ymm6,%ymm10,%ymm10 + + vpxor %ymm1,%ymm7,%ymm7 + vpxor %ymm2,%ymm7,%ymm7 + + vpaddd %ymm6,%ymm14,%ymm14 + vpaddd %ymm7,%ymm14,%ymm14 + vmovdqu 96-128(%rax),%ymm6 + vpaddd 352-256-128(%rbx),%ymm5,%ymm5 + + vpsrld $3,%ymm6,%ymm7 + vpsrld $7,%ymm6,%ymm1 + vpslld $25,%ymm6,%ymm2 + vpxor %ymm1,%ymm7,%ymm7 + vpsrld $18,%ymm6,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $14,%ymm6,%ymm2 + vmovdqu 0-128(%rax),%ymm0 + vpsrld $10,%ymm0,%ymm3 + + vpxor %ymm1,%ymm7,%ymm7 + vpsrld $17,%ymm0,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $15,%ymm0,%ymm2 + vpaddd %ymm7,%ymm5,%ymm5 + vpxor %ymm1,%ymm3,%ymm7 + vpsrld $19,%ymm0,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $13,%ymm0,%ymm2 + vpxor %ymm1,%ymm7,%ymm7 + vpxor %ymm2,%ymm7,%ymm7 + vpaddd %ymm7,%ymm5,%ymm5 + vpsrld $6,%ymm10,%ymm7 + vpslld $26,%ymm10,%ymm2 + vmovdqu %ymm5,64-128(%rax) + vpaddd %ymm13,%ymm5,%ymm5 + + vpsrld $11,%ymm10,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $21,%ymm10,%ymm2 + vpaddd -64(%rbp),%ymm5,%ymm5 + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $25,%ymm10,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $7,%ymm10,%ymm2 + vpandn %ymm12,%ymm10,%ymm0 + vpand %ymm11,%ymm10,%ymm3 + + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $2,%ymm14,%ymm13 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $30,%ymm14,%ymm1 + vpxor %ymm3,%ymm0,%ymm0 + vpxor %ymm14,%ymm15,%ymm3 + + vpxor %ymm1,%ymm13,%ymm13 + vpaddd %ymm7,%ymm5,%ymm5 + + vpsrld $13,%ymm14,%ymm1 + + vpslld $19,%ymm14,%ymm2 + vpaddd %ymm0,%ymm5,%ymm5 + vpand %ymm3,%ymm4,%ymm4 + + vpxor %ymm1,%ymm13,%ymm7 + + vpsrld $22,%ymm14,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $10,%ymm14,%ymm2 + vpxor %ymm4,%ymm15,%ymm13 + vpaddd %ymm5,%ymm9,%ymm9 + + vpxor %ymm1,%ymm7,%ymm7 + vpxor %ymm2,%ymm7,%ymm7 + + vpaddd %ymm5,%ymm13,%ymm13 + vpaddd %ymm7,%ymm13,%ymm13 + vmovdqu 128-128(%rax),%ymm5 + vpaddd 384-256-128(%rbx),%ymm6,%ymm6 + + vpsrld $3,%ymm5,%ymm7 + vpsrld $7,%ymm5,%ymm1 + vpslld $25,%ymm5,%ymm2 + vpxor %ymm1,%ymm7,%ymm7 + vpsrld $18,%ymm5,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $14,%ymm5,%ymm2 + vmovdqu 32-128(%rax),%ymm0 + vpsrld $10,%ymm0,%ymm4 + + vpxor %ymm1,%ymm7,%ymm7 + vpsrld $17,%ymm0,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $15,%ymm0,%ymm2 + vpaddd %ymm7,%ymm6,%ymm6 + vpxor %ymm1,%ymm4,%ymm7 + vpsrld $19,%ymm0,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $13,%ymm0,%ymm2 + vpxor %ymm1,%ymm7,%ymm7 + vpxor %ymm2,%ymm7,%ymm7 + vpaddd %ymm7,%ymm6,%ymm6 + vpsrld $6,%ymm9,%ymm7 + vpslld $26,%ymm9,%ymm2 + vmovdqu %ymm6,96-128(%rax) + vpaddd %ymm12,%ymm6,%ymm6 + + vpsrld $11,%ymm9,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $21,%ymm9,%ymm2 + vpaddd -32(%rbp),%ymm6,%ymm6 + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $25,%ymm9,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $7,%ymm9,%ymm2 + vpandn %ymm11,%ymm9,%ymm0 + vpand %ymm10,%ymm9,%ymm4 + + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $2,%ymm13,%ymm12 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $30,%ymm13,%ymm1 + vpxor %ymm4,%ymm0,%ymm0 + vpxor %ymm13,%ymm14,%ymm4 + + vpxor %ymm1,%ymm12,%ymm12 + vpaddd %ymm7,%ymm6,%ymm6 + + vpsrld $13,%ymm13,%ymm1 + + vpslld $19,%ymm13,%ymm2 + vpaddd %ymm0,%ymm6,%ymm6 + vpand %ymm4,%ymm3,%ymm3 + + vpxor %ymm1,%ymm12,%ymm7 + + vpsrld $22,%ymm13,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $10,%ymm13,%ymm2 + vpxor %ymm3,%ymm14,%ymm12 + vpaddd %ymm6,%ymm8,%ymm8 + + vpxor %ymm1,%ymm7,%ymm7 + vpxor %ymm2,%ymm7,%ymm7 + + vpaddd %ymm6,%ymm12,%ymm12 + vpaddd %ymm7,%ymm12,%ymm12 + vmovdqu 160-128(%rax),%ymm6 + vpaddd 416-256-128(%rbx),%ymm5,%ymm5 + + vpsrld $3,%ymm6,%ymm7 + vpsrld $7,%ymm6,%ymm1 + vpslld $25,%ymm6,%ymm2 + vpxor %ymm1,%ymm7,%ymm7 + vpsrld $18,%ymm6,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $14,%ymm6,%ymm2 + vmovdqu 64-128(%rax),%ymm0 + vpsrld $10,%ymm0,%ymm3 + + vpxor %ymm1,%ymm7,%ymm7 + vpsrld $17,%ymm0,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $15,%ymm0,%ymm2 + vpaddd %ymm7,%ymm5,%ymm5 + vpxor %ymm1,%ymm3,%ymm7 + vpsrld $19,%ymm0,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $13,%ymm0,%ymm2 + vpxor %ymm1,%ymm7,%ymm7 + vpxor %ymm2,%ymm7,%ymm7 + vpaddd %ymm7,%ymm5,%ymm5 + vpsrld $6,%ymm8,%ymm7 + vpslld $26,%ymm8,%ymm2 + vmovdqu %ymm5,128-128(%rax) + vpaddd %ymm11,%ymm5,%ymm5 + + vpsrld $11,%ymm8,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $21,%ymm8,%ymm2 + vpaddd 0(%rbp),%ymm5,%ymm5 + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $25,%ymm8,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $7,%ymm8,%ymm2 + vpandn %ymm10,%ymm8,%ymm0 + vpand %ymm9,%ymm8,%ymm3 + + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $2,%ymm12,%ymm11 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $30,%ymm12,%ymm1 + vpxor %ymm3,%ymm0,%ymm0 + vpxor %ymm12,%ymm13,%ymm3 + + vpxor %ymm1,%ymm11,%ymm11 + vpaddd %ymm7,%ymm5,%ymm5 + + vpsrld $13,%ymm12,%ymm1 + + vpslld $19,%ymm12,%ymm2 + vpaddd %ymm0,%ymm5,%ymm5 + vpand %ymm3,%ymm4,%ymm4 + + vpxor %ymm1,%ymm11,%ymm7 + + vpsrld $22,%ymm12,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $10,%ymm12,%ymm2 + vpxor %ymm4,%ymm13,%ymm11 + vpaddd %ymm5,%ymm15,%ymm15 + + vpxor %ymm1,%ymm7,%ymm7 + vpxor %ymm2,%ymm7,%ymm7 + + vpaddd %ymm5,%ymm11,%ymm11 + vpaddd %ymm7,%ymm11,%ymm11 + vmovdqu 192-128(%rax),%ymm5 + vpaddd 448-256-128(%rbx),%ymm6,%ymm6 + + vpsrld $3,%ymm5,%ymm7 + vpsrld $7,%ymm5,%ymm1 + vpslld $25,%ymm5,%ymm2 + vpxor %ymm1,%ymm7,%ymm7 + vpsrld $18,%ymm5,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $14,%ymm5,%ymm2 + vmovdqu 96-128(%rax),%ymm0 + vpsrld $10,%ymm0,%ymm4 + + vpxor %ymm1,%ymm7,%ymm7 + vpsrld $17,%ymm0,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $15,%ymm0,%ymm2 + vpaddd %ymm7,%ymm6,%ymm6 + vpxor %ymm1,%ymm4,%ymm7 + vpsrld $19,%ymm0,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $13,%ymm0,%ymm2 + vpxor %ymm1,%ymm7,%ymm7 + vpxor %ymm2,%ymm7,%ymm7 + vpaddd %ymm7,%ymm6,%ymm6 + vpsrld $6,%ymm15,%ymm7 + vpslld $26,%ymm15,%ymm2 + vmovdqu %ymm6,160-128(%rax) + vpaddd %ymm10,%ymm6,%ymm6 + + vpsrld $11,%ymm15,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $21,%ymm15,%ymm2 + vpaddd 32(%rbp),%ymm6,%ymm6 + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $25,%ymm15,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $7,%ymm15,%ymm2 + vpandn %ymm9,%ymm15,%ymm0 + vpand %ymm8,%ymm15,%ymm4 + + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $2,%ymm11,%ymm10 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $30,%ymm11,%ymm1 + vpxor %ymm4,%ymm0,%ymm0 + vpxor %ymm11,%ymm12,%ymm4 + + vpxor %ymm1,%ymm10,%ymm10 + vpaddd %ymm7,%ymm6,%ymm6 + + vpsrld $13,%ymm11,%ymm1 + + vpslld $19,%ymm11,%ymm2 + vpaddd %ymm0,%ymm6,%ymm6 + vpand %ymm4,%ymm3,%ymm3 + + vpxor %ymm1,%ymm10,%ymm7 + + vpsrld $22,%ymm11,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $10,%ymm11,%ymm2 + vpxor %ymm3,%ymm12,%ymm10 + vpaddd %ymm6,%ymm14,%ymm14 + + vpxor %ymm1,%ymm7,%ymm7 + vpxor %ymm2,%ymm7,%ymm7 + + vpaddd %ymm6,%ymm10,%ymm10 + vpaddd %ymm7,%ymm10,%ymm10 + vmovdqu 224-128(%rax),%ymm6 + vpaddd 480-256-128(%rbx),%ymm5,%ymm5 + + vpsrld $3,%ymm6,%ymm7 + vpsrld $7,%ymm6,%ymm1 + vpslld $25,%ymm6,%ymm2 + vpxor %ymm1,%ymm7,%ymm7 + vpsrld $18,%ymm6,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $14,%ymm6,%ymm2 + vmovdqu 128-128(%rax),%ymm0 + vpsrld $10,%ymm0,%ymm3 + + vpxor %ymm1,%ymm7,%ymm7 + vpsrld $17,%ymm0,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $15,%ymm0,%ymm2 + vpaddd %ymm7,%ymm5,%ymm5 + vpxor %ymm1,%ymm3,%ymm7 + vpsrld $19,%ymm0,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $13,%ymm0,%ymm2 + vpxor %ymm1,%ymm7,%ymm7 + vpxor %ymm2,%ymm7,%ymm7 + vpaddd %ymm7,%ymm5,%ymm5 + vpsrld $6,%ymm14,%ymm7 + vpslld $26,%ymm14,%ymm2 + vmovdqu %ymm5,192-128(%rax) + vpaddd %ymm9,%ymm5,%ymm5 + + vpsrld $11,%ymm14,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $21,%ymm14,%ymm2 + vpaddd 64(%rbp),%ymm5,%ymm5 + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $25,%ymm14,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $7,%ymm14,%ymm2 + vpandn %ymm8,%ymm14,%ymm0 + vpand %ymm15,%ymm14,%ymm3 + + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $2,%ymm10,%ymm9 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $30,%ymm10,%ymm1 + vpxor %ymm3,%ymm0,%ymm0 + vpxor %ymm10,%ymm11,%ymm3 + + vpxor %ymm1,%ymm9,%ymm9 + vpaddd %ymm7,%ymm5,%ymm5 + + vpsrld $13,%ymm10,%ymm1 + + vpslld $19,%ymm10,%ymm2 + vpaddd %ymm0,%ymm5,%ymm5 + vpand %ymm3,%ymm4,%ymm4 + + vpxor %ymm1,%ymm9,%ymm7 + + vpsrld $22,%ymm10,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $10,%ymm10,%ymm2 + vpxor %ymm4,%ymm11,%ymm9 + vpaddd %ymm5,%ymm13,%ymm13 + + vpxor %ymm1,%ymm7,%ymm7 + vpxor %ymm2,%ymm7,%ymm7 + + vpaddd %ymm5,%ymm9,%ymm9 + vpaddd %ymm7,%ymm9,%ymm9 + vmovdqu 256-256-128(%rbx),%ymm5 + vpaddd 0-128(%rax),%ymm6,%ymm6 + + vpsrld $3,%ymm5,%ymm7 + vpsrld $7,%ymm5,%ymm1 + vpslld $25,%ymm5,%ymm2 + vpxor %ymm1,%ymm7,%ymm7 + vpsrld $18,%ymm5,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $14,%ymm5,%ymm2 + vmovdqu 160-128(%rax),%ymm0 + vpsrld $10,%ymm0,%ymm4 + + vpxor %ymm1,%ymm7,%ymm7 + vpsrld $17,%ymm0,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $15,%ymm0,%ymm2 + vpaddd %ymm7,%ymm6,%ymm6 + vpxor %ymm1,%ymm4,%ymm7 + vpsrld $19,%ymm0,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $13,%ymm0,%ymm2 + vpxor %ymm1,%ymm7,%ymm7 + vpxor %ymm2,%ymm7,%ymm7 + vpaddd %ymm7,%ymm6,%ymm6 + vpsrld $6,%ymm13,%ymm7 + vpslld $26,%ymm13,%ymm2 + vmovdqu %ymm6,224-128(%rax) + vpaddd %ymm8,%ymm6,%ymm6 + + vpsrld $11,%ymm13,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $21,%ymm13,%ymm2 + vpaddd 96(%rbp),%ymm6,%ymm6 + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $25,%ymm13,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $7,%ymm13,%ymm2 + vpandn %ymm15,%ymm13,%ymm0 + vpand %ymm14,%ymm13,%ymm4 + + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $2,%ymm9,%ymm8 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $30,%ymm9,%ymm1 + vpxor %ymm4,%ymm0,%ymm0 + vpxor %ymm9,%ymm10,%ymm4 + + vpxor %ymm1,%ymm8,%ymm8 + vpaddd %ymm7,%ymm6,%ymm6 + + vpsrld $13,%ymm9,%ymm1 + + vpslld $19,%ymm9,%ymm2 + vpaddd %ymm0,%ymm6,%ymm6 + vpand %ymm4,%ymm3,%ymm3 + + vpxor %ymm1,%ymm8,%ymm7 + + vpsrld $22,%ymm9,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $10,%ymm9,%ymm2 + vpxor %ymm3,%ymm10,%ymm8 + vpaddd %ymm6,%ymm12,%ymm12 + + vpxor %ymm1,%ymm7,%ymm7 + vpxor %ymm2,%ymm7,%ymm7 + + vpaddd %ymm6,%ymm8,%ymm8 + vpaddd %ymm7,%ymm8,%ymm8 + addq $256,%rbp + vmovdqu 288-256-128(%rbx),%ymm6 + vpaddd 32-128(%rax),%ymm5,%ymm5 + + vpsrld $3,%ymm6,%ymm7 + vpsrld $7,%ymm6,%ymm1 + vpslld $25,%ymm6,%ymm2 + vpxor %ymm1,%ymm7,%ymm7 + vpsrld $18,%ymm6,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $14,%ymm6,%ymm2 + vmovdqu 192-128(%rax),%ymm0 + vpsrld $10,%ymm0,%ymm3 + + vpxor %ymm1,%ymm7,%ymm7 + vpsrld $17,%ymm0,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $15,%ymm0,%ymm2 + vpaddd %ymm7,%ymm5,%ymm5 + vpxor %ymm1,%ymm3,%ymm7 + vpsrld $19,%ymm0,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $13,%ymm0,%ymm2 + vpxor %ymm1,%ymm7,%ymm7 + vpxor %ymm2,%ymm7,%ymm7 + vpaddd %ymm7,%ymm5,%ymm5 + vpsrld $6,%ymm12,%ymm7 + vpslld $26,%ymm12,%ymm2 + vmovdqu %ymm5,256-256-128(%rbx) + vpaddd %ymm15,%ymm5,%ymm5 + + vpsrld $11,%ymm12,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $21,%ymm12,%ymm2 + vpaddd -128(%rbp),%ymm5,%ymm5 + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $25,%ymm12,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $7,%ymm12,%ymm2 + vpandn %ymm14,%ymm12,%ymm0 + vpand %ymm13,%ymm12,%ymm3 + + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $2,%ymm8,%ymm15 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $30,%ymm8,%ymm1 + vpxor %ymm3,%ymm0,%ymm0 + vpxor %ymm8,%ymm9,%ymm3 + + vpxor %ymm1,%ymm15,%ymm15 + vpaddd %ymm7,%ymm5,%ymm5 + + vpsrld $13,%ymm8,%ymm1 + + vpslld $19,%ymm8,%ymm2 + vpaddd %ymm0,%ymm5,%ymm5 + vpand %ymm3,%ymm4,%ymm4 + + vpxor %ymm1,%ymm15,%ymm7 + + vpsrld $22,%ymm8,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $10,%ymm8,%ymm2 + vpxor %ymm4,%ymm9,%ymm15 + vpaddd %ymm5,%ymm11,%ymm11 + + vpxor %ymm1,%ymm7,%ymm7 + vpxor %ymm2,%ymm7,%ymm7 + + vpaddd %ymm5,%ymm15,%ymm15 + vpaddd %ymm7,%ymm15,%ymm15 + vmovdqu 320-256-128(%rbx),%ymm5 + vpaddd 64-128(%rax),%ymm6,%ymm6 + + vpsrld $3,%ymm5,%ymm7 + vpsrld $7,%ymm5,%ymm1 + vpslld $25,%ymm5,%ymm2 + vpxor %ymm1,%ymm7,%ymm7 + vpsrld $18,%ymm5,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $14,%ymm5,%ymm2 + vmovdqu 224-128(%rax),%ymm0 + vpsrld $10,%ymm0,%ymm4 + + vpxor %ymm1,%ymm7,%ymm7 + vpsrld $17,%ymm0,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $15,%ymm0,%ymm2 + vpaddd %ymm7,%ymm6,%ymm6 + vpxor %ymm1,%ymm4,%ymm7 + vpsrld $19,%ymm0,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $13,%ymm0,%ymm2 + vpxor %ymm1,%ymm7,%ymm7 + vpxor %ymm2,%ymm7,%ymm7 + vpaddd %ymm7,%ymm6,%ymm6 + vpsrld $6,%ymm11,%ymm7 + vpslld $26,%ymm11,%ymm2 + vmovdqu %ymm6,288-256-128(%rbx) + vpaddd %ymm14,%ymm6,%ymm6 + + vpsrld $11,%ymm11,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $21,%ymm11,%ymm2 + vpaddd -96(%rbp),%ymm6,%ymm6 + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $25,%ymm11,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $7,%ymm11,%ymm2 + vpandn %ymm13,%ymm11,%ymm0 + vpand %ymm12,%ymm11,%ymm4 + + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $2,%ymm15,%ymm14 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $30,%ymm15,%ymm1 + vpxor %ymm4,%ymm0,%ymm0 + vpxor %ymm15,%ymm8,%ymm4 + + vpxor %ymm1,%ymm14,%ymm14 + vpaddd %ymm7,%ymm6,%ymm6 + + vpsrld $13,%ymm15,%ymm1 + + vpslld $19,%ymm15,%ymm2 + vpaddd %ymm0,%ymm6,%ymm6 + vpand %ymm4,%ymm3,%ymm3 + + vpxor %ymm1,%ymm14,%ymm7 + + vpsrld $22,%ymm15,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $10,%ymm15,%ymm2 + vpxor %ymm3,%ymm8,%ymm14 + vpaddd %ymm6,%ymm10,%ymm10 + + vpxor %ymm1,%ymm7,%ymm7 + vpxor %ymm2,%ymm7,%ymm7 + + vpaddd %ymm6,%ymm14,%ymm14 + vpaddd %ymm7,%ymm14,%ymm14 + vmovdqu 352-256-128(%rbx),%ymm6 + vpaddd 96-128(%rax),%ymm5,%ymm5 + + vpsrld $3,%ymm6,%ymm7 + vpsrld $7,%ymm6,%ymm1 + vpslld $25,%ymm6,%ymm2 + vpxor %ymm1,%ymm7,%ymm7 + vpsrld $18,%ymm6,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $14,%ymm6,%ymm2 + vmovdqu 256-256-128(%rbx),%ymm0 + vpsrld $10,%ymm0,%ymm3 + + vpxor %ymm1,%ymm7,%ymm7 + vpsrld $17,%ymm0,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $15,%ymm0,%ymm2 + vpaddd %ymm7,%ymm5,%ymm5 + vpxor %ymm1,%ymm3,%ymm7 + vpsrld $19,%ymm0,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $13,%ymm0,%ymm2 + vpxor %ymm1,%ymm7,%ymm7 + vpxor %ymm2,%ymm7,%ymm7 + vpaddd %ymm7,%ymm5,%ymm5 + vpsrld $6,%ymm10,%ymm7 + vpslld $26,%ymm10,%ymm2 + vmovdqu %ymm5,320-256-128(%rbx) + vpaddd %ymm13,%ymm5,%ymm5 + + vpsrld $11,%ymm10,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $21,%ymm10,%ymm2 + vpaddd -64(%rbp),%ymm5,%ymm5 + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $25,%ymm10,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $7,%ymm10,%ymm2 + vpandn %ymm12,%ymm10,%ymm0 + vpand %ymm11,%ymm10,%ymm3 + + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $2,%ymm14,%ymm13 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $30,%ymm14,%ymm1 + vpxor %ymm3,%ymm0,%ymm0 + vpxor %ymm14,%ymm15,%ymm3 + + vpxor %ymm1,%ymm13,%ymm13 + vpaddd %ymm7,%ymm5,%ymm5 + + vpsrld $13,%ymm14,%ymm1 + + vpslld $19,%ymm14,%ymm2 + vpaddd %ymm0,%ymm5,%ymm5 + vpand %ymm3,%ymm4,%ymm4 + + vpxor %ymm1,%ymm13,%ymm7 + + vpsrld $22,%ymm14,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $10,%ymm14,%ymm2 + vpxor %ymm4,%ymm15,%ymm13 + vpaddd %ymm5,%ymm9,%ymm9 + + vpxor %ymm1,%ymm7,%ymm7 + vpxor %ymm2,%ymm7,%ymm7 + + vpaddd %ymm5,%ymm13,%ymm13 + vpaddd %ymm7,%ymm13,%ymm13 + vmovdqu 384-256-128(%rbx),%ymm5 + vpaddd 128-128(%rax),%ymm6,%ymm6 + + vpsrld $3,%ymm5,%ymm7 + vpsrld $7,%ymm5,%ymm1 + vpslld $25,%ymm5,%ymm2 + vpxor %ymm1,%ymm7,%ymm7 + vpsrld $18,%ymm5,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $14,%ymm5,%ymm2 + vmovdqu 288-256-128(%rbx),%ymm0 + vpsrld $10,%ymm0,%ymm4 + + vpxor %ymm1,%ymm7,%ymm7 + vpsrld $17,%ymm0,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $15,%ymm0,%ymm2 + vpaddd %ymm7,%ymm6,%ymm6 + vpxor %ymm1,%ymm4,%ymm7 + vpsrld $19,%ymm0,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $13,%ymm0,%ymm2 + vpxor %ymm1,%ymm7,%ymm7 + vpxor %ymm2,%ymm7,%ymm7 + vpaddd %ymm7,%ymm6,%ymm6 + vpsrld $6,%ymm9,%ymm7 + vpslld $26,%ymm9,%ymm2 + vmovdqu %ymm6,352-256-128(%rbx) + vpaddd %ymm12,%ymm6,%ymm6 + + vpsrld $11,%ymm9,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $21,%ymm9,%ymm2 + vpaddd -32(%rbp),%ymm6,%ymm6 + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $25,%ymm9,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $7,%ymm9,%ymm2 + vpandn %ymm11,%ymm9,%ymm0 + vpand %ymm10,%ymm9,%ymm4 + + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $2,%ymm13,%ymm12 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $30,%ymm13,%ymm1 + vpxor %ymm4,%ymm0,%ymm0 + vpxor %ymm13,%ymm14,%ymm4 + + vpxor %ymm1,%ymm12,%ymm12 + vpaddd %ymm7,%ymm6,%ymm6 + + vpsrld $13,%ymm13,%ymm1 + + vpslld $19,%ymm13,%ymm2 + vpaddd %ymm0,%ymm6,%ymm6 + vpand %ymm4,%ymm3,%ymm3 + + vpxor %ymm1,%ymm12,%ymm7 + + vpsrld $22,%ymm13,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $10,%ymm13,%ymm2 + vpxor %ymm3,%ymm14,%ymm12 + vpaddd %ymm6,%ymm8,%ymm8 + + vpxor %ymm1,%ymm7,%ymm7 + vpxor %ymm2,%ymm7,%ymm7 + + vpaddd %ymm6,%ymm12,%ymm12 + vpaddd %ymm7,%ymm12,%ymm12 + vmovdqu 416-256-128(%rbx),%ymm6 + vpaddd 160-128(%rax),%ymm5,%ymm5 + + vpsrld $3,%ymm6,%ymm7 + vpsrld $7,%ymm6,%ymm1 + vpslld $25,%ymm6,%ymm2 + vpxor %ymm1,%ymm7,%ymm7 + vpsrld $18,%ymm6,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $14,%ymm6,%ymm2 + vmovdqu 320-256-128(%rbx),%ymm0 + vpsrld $10,%ymm0,%ymm3 + + vpxor %ymm1,%ymm7,%ymm7 + vpsrld $17,%ymm0,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $15,%ymm0,%ymm2 + vpaddd %ymm7,%ymm5,%ymm5 + vpxor %ymm1,%ymm3,%ymm7 + vpsrld $19,%ymm0,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $13,%ymm0,%ymm2 + vpxor %ymm1,%ymm7,%ymm7 + vpxor %ymm2,%ymm7,%ymm7 + vpaddd %ymm7,%ymm5,%ymm5 + vpsrld $6,%ymm8,%ymm7 + vpslld $26,%ymm8,%ymm2 + vmovdqu %ymm5,384-256-128(%rbx) + vpaddd %ymm11,%ymm5,%ymm5 + + vpsrld $11,%ymm8,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $21,%ymm8,%ymm2 + vpaddd 0(%rbp),%ymm5,%ymm5 + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $25,%ymm8,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $7,%ymm8,%ymm2 + vpandn %ymm10,%ymm8,%ymm0 + vpand %ymm9,%ymm8,%ymm3 + + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $2,%ymm12,%ymm11 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $30,%ymm12,%ymm1 + vpxor %ymm3,%ymm0,%ymm0 + vpxor %ymm12,%ymm13,%ymm3 + + vpxor %ymm1,%ymm11,%ymm11 + vpaddd %ymm7,%ymm5,%ymm5 + + vpsrld $13,%ymm12,%ymm1 + + vpslld $19,%ymm12,%ymm2 + vpaddd %ymm0,%ymm5,%ymm5 + vpand %ymm3,%ymm4,%ymm4 + + vpxor %ymm1,%ymm11,%ymm7 + + vpsrld $22,%ymm12,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $10,%ymm12,%ymm2 + vpxor %ymm4,%ymm13,%ymm11 + vpaddd %ymm5,%ymm15,%ymm15 + + vpxor %ymm1,%ymm7,%ymm7 + vpxor %ymm2,%ymm7,%ymm7 + + vpaddd %ymm5,%ymm11,%ymm11 + vpaddd %ymm7,%ymm11,%ymm11 + vmovdqu 448-256-128(%rbx),%ymm5 + vpaddd 192-128(%rax),%ymm6,%ymm6 + + vpsrld $3,%ymm5,%ymm7 + vpsrld $7,%ymm5,%ymm1 + vpslld $25,%ymm5,%ymm2 + vpxor %ymm1,%ymm7,%ymm7 + vpsrld $18,%ymm5,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $14,%ymm5,%ymm2 + vmovdqu 352-256-128(%rbx),%ymm0 + vpsrld $10,%ymm0,%ymm4 + + vpxor %ymm1,%ymm7,%ymm7 + vpsrld $17,%ymm0,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $15,%ymm0,%ymm2 + vpaddd %ymm7,%ymm6,%ymm6 + vpxor %ymm1,%ymm4,%ymm7 + vpsrld $19,%ymm0,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $13,%ymm0,%ymm2 + vpxor %ymm1,%ymm7,%ymm7 + vpxor %ymm2,%ymm7,%ymm7 + vpaddd %ymm7,%ymm6,%ymm6 + vpsrld $6,%ymm15,%ymm7 + vpslld $26,%ymm15,%ymm2 + vmovdqu %ymm6,416-256-128(%rbx) + vpaddd %ymm10,%ymm6,%ymm6 + + vpsrld $11,%ymm15,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $21,%ymm15,%ymm2 + vpaddd 32(%rbp),%ymm6,%ymm6 + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $25,%ymm15,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $7,%ymm15,%ymm2 + vpandn %ymm9,%ymm15,%ymm0 + vpand %ymm8,%ymm15,%ymm4 + + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $2,%ymm11,%ymm10 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $30,%ymm11,%ymm1 + vpxor %ymm4,%ymm0,%ymm0 + vpxor %ymm11,%ymm12,%ymm4 + + vpxor %ymm1,%ymm10,%ymm10 + vpaddd %ymm7,%ymm6,%ymm6 + + vpsrld $13,%ymm11,%ymm1 + + vpslld $19,%ymm11,%ymm2 + vpaddd %ymm0,%ymm6,%ymm6 + vpand %ymm4,%ymm3,%ymm3 + + vpxor %ymm1,%ymm10,%ymm7 + + vpsrld $22,%ymm11,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $10,%ymm11,%ymm2 + vpxor %ymm3,%ymm12,%ymm10 + vpaddd %ymm6,%ymm14,%ymm14 + + vpxor %ymm1,%ymm7,%ymm7 + vpxor %ymm2,%ymm7,%ymm7 + + vpaddd %ymm6,%ymm10,%ymm10 + vpaddd %ymm7,%ymm10,%ymm10 + vmovdqu 480-256-128(%rbx),%ymm6 + vpaddd 224-128(%rax),%ymm5,%ymm5 + + vpsrld $3,%ymm6,%ymm7 + vpsrld $7,%ymm6,%ymm1 + vpslld $25,%ymm6,%ymm2 + vpxor %ymm1,%ymm7,%ymm7 + vpsrld $18,%ymm6,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $14,%ymm6,%ymm2 + vmovdqu 384-256-128(%rbx),%ymm0 + vpsrld $10,%ymm0,%ymm3 + + vpxor %ymm1,%ymm7,%ymm7 + vpsrld $17,%ymm0,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $15,%ymm0,%ymm2 + vpaddd %ymm7,%ymm5,%ymm5 + vpxor %ymm1,%ymm3,%ymm7 + vpsrld $19,%ymm0,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $13,%ymm0,%ymm2 + vpxor %ymm1,%ymm7,%ymm7 + vpxor %ymm2,%ymm7,%ymm7 + vpaddd %ymm7,%ymm5,%ymm5 + vpsrld $6,%ymm14,%ymm7 + vpslld $26,%ymm14,%ymm2 + vmovdqu %ymm5,448-256-128(%rbx) + vpaddd %ymm9,%ymm5,%ymm5 + + vpsrld $11,%ymm14,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $21,%ymm14,%ymm2 + vpaddd 64(%rbp),%ymm5,%ymm5 + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $25,%ymm14,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $7,%ymm14,%ymm2 + vpandn %ymm8,%ymm14,%ymm0 + vpand %ymm15,%ymm14,%ymm3 + + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $2,%ymm10,%ymm9 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $30,%ymm10,%ymm1 + vpxor %ymm3,%ymm0,%ymm0 + vpxor %ymm10,%ymm11,%ymm3 + + vpxor %ymm1,%ymm9,%ymm9 + vpaddd %ymm7,%ymm5,%ymm5 + + vpsrld $13,%ymm10,%ymm1 + + vpslld $19,%ymm10,%ymm2 + vpaddd %ymm0,%ymm5,%ymm5 + vpand %ymm3,%ymm4,%ymm4 + + vpxor %ymm1,%ymm9,%ymm7 + + vpsrld $22,%ymm10,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $10,%ymm10,%ymm2 + vpxor %ymm4,%ymm11,%ymm9 + vpaddd %ymm5,%ymm13,%ymm13 + + vpxor %ymm1,%ymm7,%ymm7 + vpxor %ymm2,%ymm7,%ymm7 + + vpaddd %ymm5,%ymm9,%ymm9 + vpaddd %ymm7,%ymm9,%ymm9 + vmovdqu 0-128(%rax),%ymm5 + vpaddd 256-256-128(%rbx),%ymm6,%ymm6 + + vpsrld $3,%ymm5,%ymm7 + vpsrld $7,%ymm5,%ymm1 + vpslld $25,%ymm5,%ymm2 + vpxor %ymm1,%ymm7,%ymm7 + vpsrld $18,%ymm5,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $14,%ymm5,%ymm2 + vmovdqu 416-256-128(%rbx),%ymm0 + vpsrld $10,%ymm0,%ymm4 + + vpxor %ymm1,%ymm7,%ymm7 + vpsrld $17,%ymm0,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $15,%ymm0,%ymm2 + vpaddd %ymm7,%ymm6,%ymm6 + vpxor %ymm1,%ymm4,%ymm7 + vpsrld $19,%ymm0,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $13,%ymm0,%ymm2 + vpxor %ymm1,%ymm7,%ymm7 + vpxor %ymm2,%ymm7,%ymm7 + vpaddd %ymm7,%ymm6,%ymm6 + vpsrld $6,%ymm13,%ymm7 + vpslld $26,%ymm13,%ymm2 + vmovdqu %ymm6,480-256-128(%rbx) + vpaddd %ymm8,%ymm6,%ymm6 + + vpsrld $11,%ymm13,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + vpslld $21,%ymm13,%ymm2 + vpaddd 96(%rbp),%ymm6,%ymm6 + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $25,%ymm13,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $7,%ymm13,%ymm2 + vpandn %ymm15,%ymm13,%ymm0 + vpand %ymm14,%ymm13,%ymm4 + + vpxor %ymm1,%ymm7,%ymm7 + + vpsrld $2,%ymm9,%ymm8 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $30,%ymm9,%ymm1 + vpxor %ymm4,%ymm0,%ymm0 + vpxor %ymm9,%ymm10,%ymm4 + + vpxor %ymm1,%ymm8,%ymm8 + vpaddd %ymm7,%ymm6,%ymm6 + + vpsrld $13,%ymm9,%ymm1 + + vpslld $19,%ymm9,%ymm2 + vpaddd %ymm0,%ymm6,%ymm6 + vpand %ymm4,%ymm3,%ymm3 + + vpxor %ymm1,%ymm8,%ymm7 + + vpsrld $22,%ymm9,%ymm1 + vpxor %ymm2,%ymm7,%ymm7 + + vpslld $10,%ymm9,%ymm2 + vpxor %ymm3,%ymm10,%ymm8 + vpaddd %ymm6,%ymm12,%ymm12 + + vpxor %ymm1,%ymm7,%ymm7 + vpxor %ymm2,%ymm7,%ymm7 + + vpaddd %ymm6,%ymm8,%ymm8 + vpaddd %ymm7,%ymm8,%ymm8 + addq $256,%rbp + decl %ecx + jnz L$oop_16_xx_avx2 + + movl $1,%ecx + leaq 512(%rsp),%rbx + leaq K256+128(%rip),%rbp + cmpl 0(%rbx),%ecx + cmovgeq %rbp,%r12 + cmpl 4(%rbx),%ecx + cmovgeq %rbp,%r13 + cmpl 8(%rbx),%ecx + cmovgeq %rbp,%r14 + cmpl 12(%rbx),%ecx + cmovgeq %rbp,%r15 + cmpl 16(%rbx),%ecx + cmovgeq %rbp,%r8 + cmpl 20(%rbx),%ecx + cmovgeq %rbp,%r9 + cmpl 24(%rbx),%ecx + cmovgeq %rbp,%r10 + cmpl 28(%rbx),%ecx + cmovgeq %rbp,%r11 + vmovdqa (%rbx),%ymm7 + vpxor %ymm0,%ymm0,%ymm0 + vmovdqa %ymm7,%ymm6 + vpcmpgtd %ymm0,%ymm6,%ymm6 + vpaddd %ymm6,%ymm7,%ymm7 + + vmovdqu 0-128(%rdi),%ymm0 + vpand %ymm6,%ymm8,%ymm8 + vmovdqu 32-128(%rdi),%ymm1 + vpand %ymm6,%ymm9,%ymm9 + vmovdqu 64-128(%rdi),%ymm2 + vpand %ymm6,%ymm10,%ymm10 + vmovdqu 96-128(%rdi),%ymm5 + vpand %ymm6,%ymm11,%ymm11 + vpaddd %ymm0,%ymm8,%ymm8 + vmovdqu 128-128(%rdi),%ymm0 + vpand %ymm6,%ymm12,%ymm12 + vpaddd %ymm1,%ymm9,%ymm9 + vmovdqu 160-128(%rdi),%ymm1 + vpand %ymm6,%ymm13,%ymm13 + vpaddd %ymm2,%ymm10,%ymm10 + vmovdqu 192-128(%rdi),%ymm2 + vpand %ymm6,%ymm14,%ymm14 + vpaddd %ymm5,%ymm11,%ymm11 + vmovdqu 224-128(%rdi),%ymm5 + vpand %ymm6,%ymm15,%ymm15 + vpaddd %ymm0,%ymm12,%ymm12 + vpaddd %ymm1,%ymm13,%ymm13 + vmovdqu %ymm8,0-128(%rdi) + vpaddd %ymm2,%ymm14,%ymm14 + vmovdqu %ymm9,32-128(%rdi) + vpaddd %ymm5,%ymm15,%ymm15 + vmovdqu %ymm10,64-128(%rdi) + vmovdqu %ymm11,96-128(%rdi) + vmovdqu %ymm12,128-128(%rdi) + vmovdqu %ymm13,160-128(%rdi) + vmovdqu %ymm14,192-128(%rdi) + vmovdqu %ymm15,224-128(%rdi) + + vmovdqu %ymm7,(%rbx) + leaq 256+128(%rsp),%rbx + vmovdqu L$pbswap(%rip),%ymm6 + decl %edx + jnz L$oop_avx2 + + + + + + + +L$done_avx2: + movq 544(%rsp),%rax + vzeroupper + movq -48(%rax),%r15 + movq -40(%rax),%r14 + movq -32(%rax),%r13 + movq -24(%rax),%r12 + movq -16(%rax),%rbp + movq -8(%rax),%rbx + leaq (%rax),%rsp +L$epilogue_avx2: + .byte 0xf3,0xc3 + +.p2align 8 +K256: +.long 1116352408,1116352408,1116352408,1116352408 +.long 1116352408,1116352408,1116352408,1116352408 +.long 1899447441,1899447441,1899447441,1899447441 +.long 1899447441,1899447441,1899447441,1899447441 +.long 3049323471,3049323471,3049323471,3049323471 +.long 3049323471,3049323471,3049323471,3049323471 +.long 3921009573,3921009573,3921009573,3921009573 +.long 3921009573,3921009573,3921009573,3921009573 +.long 961987163,961987163,961987163,961987163 +.long 961987163,961987163,961987163,961987163 +.long 1508970993,1508970993,1508970993,1508970993 +.long 1508970993,1508970993,1508970993,1508970993 +.long 2453635748,2453635748,2453635748,2453635748 +.long 2453635748,2453635748,2453635748,2453635748 +.long 2870763221,2870763221,2870763221,2870763221 +.long 2870763221,2870763221,2870763221,2870763221 +.long 3624381080,3624381080,3624381080,3624381080 +.long 3624381080,3624381080,3624381080,3624381080 +.long 310598401,310598401,310598401,310598401 +.long 310598401,310598401,310598401,310598401 +.long 607225278,607225278,607225278,607225278 +.long 607225278,607225278,607225278,607225278 +.long 1426881987,1426881987,1426881987,1426881987 +.long 1426881987,1426881987,1426881987,1426881987 +.long 1925078388,1925078388,1925078388,1925078388 +.long 1925078388,1925078388,1925078388,1925078388 +.long 2162078206,2162078206,2162078206,2162078206 +.long 2162078206,2162078206,2162078206,2162078206 +.long 2614888103,2614888103,2614888103,2614888103 +.long 2614888103,2614888103,2614888103,2614888103 +.long 3248222580,3248222580,3248222580,3248222580 +.long 3248222580,3248222580,3248222580,3248222580 +.long 3835390401,3835390401,3835390401,3835390401 +.long 3835390401,3835390401,3835390401,3835390401 +.long 4022224774,4022224774,4022224774,4022224774 +.long 4022224774,4022224774,4022224774,4022224774 +.long 264347078,264347078,264347078,264347078 +.long 264347078,264347078,264347078,264347078 +.long 604807628,604807628,604807628,604807628 +.long 604807628,604807628,604807628,604807628 +.long 770255983,770255983,770255983,770255983 +.long 770255983,770255983,770255983,770255983 +.long 1249150122,1249150122,1249150122,1249150122 +.long 1249150122,1249150122,1249150122,1249150122 +.long 1555081692,1555081692,1555081692,1555081692 +.long 1555081692,1555081692,1555081692,1555081692 +.long 1996064986,1996064986,1996064986,1996064986 +.long 1996064986,1996064986,1996064986,1996064986 +.long 2554220882,2554220882,2554220882,2554220882 +.long 2554220882,2554220882,2554220882,2554220882 +.long 2821834349,2821834349,2821834349,2821834349 +.long 2821834349,2821834349,2821834349,2821834349 +.long 2952996808,2952996808,2952996808,2952996808 +.long 2952996808,2952996808,2952996808,2952996808 +.long 3210313671,3210313671,3210313671,3210313671 +.long 3210313671,3210313671,3210313671,3210313671 +.long 3336571891,3336571891,3336571891,3336571891 +.long 3336571891,3336571891,3336571891,3336571891 +.long 3584528711,3584528711,3584528711,3584528711 +.long 3584528711,3584528711,3584528711,3584528711 +.long 113926993,113926993,113926993,113926993 +.long 113926993,113926993,113926993,113926993 +.long 338241895,338241895,338241895,338241895 +.long 338241895,338241895,338241895,338241895 +.long 666307205,666307205,666307205,666307205 +.long 666307205,666307205,666307205,666307205 +.long 773529912,773529912,773529912,773529912 +.long 773529912,773529912,773529912,773529912 +.long 1294757372,1294757372,1294757372,1294757372 +.long 1294757372,1294757372,1294757372,1294757372 +.long 1396182291,1396182291,1396182291,1396182291 +.long 1396182291,1396182291,1396182291,1396182291 +.long 1695183700,1695183700,1695183700,1695183700 +.long 1695183700,1695183700,1695183700,1695183700 +.long 1986661051,1986661051,1986661051,1986661051 +.long 1986661051,1986661051,1986661051,1986661051 +.long 2177026350,2177026350,2177026350,2177026350 +.long 2177026350,2177026350,2177026350,2177026350 +.long 2456956037,2456956037,2456956037,2456956037 +.long 2456956037,2456956037,2456956037,2456956037 +.long 2730485921,2730485921,2730485921,2730485921 +.long 2730485921,2730485921,2730485921,2730485921 +.long 2820302411,2820302411,2820302411,2820302411 +.long 2820302411,2820302411,2820302411,2820302411 +.long 3259730800,3259730800,3259730800,3259730800 +.long 3259730800,3259730800,3259730800,3259730800 +.long 3345764771,3345764771,3345764771,3345764771 +.long 3345764771,3345764771,3345764771,3345764771 +.long 3516065817,3516065817,3516065817,3516065817 +.long 3516065817,3516065817,3516065817,3516065817 +.long 3600352804,3600352804,3600352804,3600352804 +.long 3600352804,3600352804,3600352804,3600352804 +.long 4094571909,4094571909,4094571909,4094571909 +.long 4094571909,4094571909,4094571909,4094571909 +.long 275423344,275423344,275423344,275423344 +.long 275423344,275423344,275423344,275423344 +.long 430227734,430227734,430227734,430227734 +.long 430227734,430227734,430227734,430227734 +.long 506948616,506948616,506948616,506948616 +.long 506948616,506948616,506948616,506948616 +.long 659060556,659060556,659060556,659060556 +.long 659060556,659060556,659060556,659060556 +.long 883997877,883997877,883997877,883997877 +.long 883997877,883997877,883997877,883997877 +.long 958139571,958139571,958139571,958139571 +.long 958139571,958139571,958139571,958139571 +.long 1322822218,1322822218,1322822218,1322822218 +.long 1322822218,1322822218,1322822218,1322822218 +.long 1537002063,1537002063,1537002063,1537002063 +.long 1537002063,1537002063,1537002063,1537002063 +.long 1747873779,1747873779,1747873779,1747873779 +.long 1747873779,1747873779,1747873779,1747873779 +.long 1955562222,1955562222,1955562222,1955562222 +.long 1955562222,1955562222,1955562222,1955562222 +.long 2024104815,2024104815,2024104815,2024104815 +.long 2024104815,2024104815,2024104815,2024104815 +.long 2227730452,2227730452,2227730452,2227730452 +.long 2227730452,2227730452,2227730452,2227730452 +.long 2361852424,2361852424,2361852424,2361852424 +.long 2361852424,2361852424,2361852424,2361852424 +.long 2428436474,2428436474,2428436474,2428436474 +.long 2428436474,2428436474,2428436474,2428436474 +.long 2756734187,2756734187,2756734187,2756734187 +.long 2756734187,2756734187,2756734187,2756734187 +.long 3204031479,3204031479,3204031479,3204031479 +.long 3204031479,3204031479,3204031479,3204031479 +.long 3329325298,3329325298,3329325298,3329325298 +.long 3329325298,3329325298,3329325298,3329325298 +L$pbswap: +.long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f +.long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f +K256_shaext: +.long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 +.long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 +.long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 +.long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 +.long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc +.long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da +.long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 +.long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967 +.long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 +.long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 +.long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 +.long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070 +.long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 +.long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 +.long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 +.long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 +.byte 83,72,65,50,53,54,32,109,117,108,116,105,45,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 diff --git a/deps/openssl/asm/x64-macosx-gas/sha/sha256-x86_64.s b/deps/openssl/asm/x64-macosx-gas/sha/sha256-x86_64.s index dda5a96e9dc4bb..b66bd34406cc74 100644 --- a/deps/openssl/asm/x64-macosx-gas/sha/sha256-x86_64.s +++ b/deps/openssl/asm/x64-macosx-gas/sha/sha256-x86_64.s @@ -5,6 +5,22 @@ .p2align 4 _sha256_block_data_order: + leaq _OPENSSL_ia32cap_P(%rip),%r11 + movl 0(%r11),%r9d + movl 4(%r11),%r10d + movl 8(%r11),%r11d + testl $536870912,%r11d + jnz _shaext_shortcut + andl $296,%r11d + cmpl $296,%r11d + je L$avx2_shortcut + andl $1073741824,%r9d + andl $268435968,%r10d + orl %r9d,%r10d + cmpl $1342177792,%r10d + je L$avx_shortcut + testl $512,%r10d + jnz L$ssse3_shortcut pushq %rbx pushq %rbp pushq %r12 @@ -22,8 +38,6 @@ _sha256_block_data_order: movq %r11,64+24(%rsp) L$prologue: - leaq K256(%rip),%rbp - movl 0(%rdi),%eax movl 4(%rdi),%ebx movl 8(%rdi),%ecx @@ -36,1694 +50,1632 @@ L$prologue: .p2align 4 L$loop: - xorq %rdi,%rdi + movl %ebx,%edi + leaq K256(%rip),%rbp + xorl %ecx,%edi movl 0(%rsi),%r12d movl %r8d,%r13d movl %eax,%r14d bswapl %r12d rorl $14,%r13d movl %r9d,%r15d - movl %r12d,0(%rsp) - rorl $9,%r14d xorl %r8d,%r13d + rorl $9,%r14d xorl %r10d,%r15d - rorl $5,%r13d - addl %r11d,%r12d + movl %r12d,0(%rsp) xorl %eax,%r14d - - addl (%rbp,%rdi,4),%r12d andl %r8d,%r15d - movl %ebx,%r11d + + rorl $5,%r13d + addl %r11d,%r12d + xorl %r10d,%r15d rorl $11,%r14d xorl %r8d,%r13d - xorl %r10d,%r15d + addl %r15d,%r12d - xorl %ecx,%r11d + movl %eax,%r15d + addl (%rbp),%r12d xorl %eax,%r14d - addl %r15d,%r12d - movl %ebx,%r15d + xorl %ebx,%r15d rorl $6,%r13d - andl %eax,%r11d - andl %ecx,%r15d + movl %ebx,%r11d + andl %r15d,%edi rorl $2,%r14d addl %r13d,%r12d - addl %r15d,%r11d + xorl %edi,%r11d addl %r12d,%edx addl %r12d,%r11d - leaq 1(%rdi),%rdi - addl %r14d,%r11d + leaq 4(%rbp),%rbp + addl %r14d,%r11d movl 4(%rsi),%r12d movl %edx,%r13d movl %r11d,%r14d bswapl %r12d rorl $14,%r13d - movl %r8d,%r15d - movl %r12d,4(%rsp) + movl %r8d,%edi - rorl $9,%r14d xorl %edx,%r13d - xorl %r9d,%r15d + rorl $9,%r14d + xorl %r9d,%edi - rorl $5,%r13d - addl %r10d,%r12d + movl %r12d,4(%rsp) xorl %r11d,%r14d + andl %edx,%edi - addl (%rbp,%rdi,4),%r12d - andl %edx,%r15d - movl %eax,%r10d + rorl $5,%r13d + addl %r10d,%r12d + xorl %r9d,%edi rorl $11,%r14d xorl %edx,%r13d - xorl %r9d,%r15d + addl %edi,%r12d - xorl %ebx,%r10d + movl %r11d,%edi + addl (%rbp),%r12d xorl %r11d,%r14d - addl %r15d,%r12d - movl %eax,%r15d + xorl %eax,%edi rorl $6,%r13d - andl %r11d,%r10d - andl %ebx,%r15d + movl %eax,%r10d + andl %edi,%r15d rorl $2,%r14d addl %r13d,%r12d - addl %r15d,%r10d + xorl %r15d,%r10d addl %r12d,%ecx addl %r12d,%r10d - leaq 1(%rdi),%rdi - addl %r14d,%r10d + leaq 4(%rbp),%rbp + addl %r14d,%r10d movl 8(%rsi),%r12d movl %ecx,%r13d movl %r10d,%r14d bswapl %r12d rorl $14,%r13d movl %edx,%r15d - movl %r12d,8(%rsp) - rorl $9,%r14d xorl %ecx,%r13d + rorl $9,%r14d xorl %r8d,%r15d - rorl $5,%r13d - addl %r9d,%r12d + movl %r12d,8(%rsp) xorl %r10d,%r14d - - addl (%rbp,%rdi,4),%r12d andl %ecx,%r15d - movl %r11d,%r9d + + rorl $5,%r13d + addl %r9d,%r12d + xorl %r8d,%r15d rorl $11,%r14d xorl %ecx,%r13d - xorl %r8d,%r15d + addl %r15d,%r12d - xorl %eax,%r9d + movl %r10d,%r15d + addl (%rbp),%r12d xorl %r10d,%r14d - addl %r15d,%r12d - movl %r11d,%r15d + xorl %r11d,%r15d rorl $6,%r13d - andl %r10d,%r9d - andl %eax,%r15d + movl %r11d,%r9d + andl %r15d,%edi rorl $2,%r14d addl %r13d,%r12d - addl %r15d,%r9d + xorl %edi,%r9d addl %r12d,%ebx addl %r12d,%r9d - leaq 1(%rdi),%rdi - addl %r14d,%r9d + leaq 4(%rbp),%rbp + addl %r14d,%r9d movl 12(%rsi),%r12d movl %ebx,%r13d movl %r9d,%r14d bswapl %r12d rorl $14,%r13d - movl %ecx,%r15d - movl %r12d,12(%rsp) + movl %ecx,%edi - rorl $9,%r14d xorl %ebx,%r13d - xorl %edx,%r15d + rorl $9,%r14d + xorl %edx,%edi - rorl $5,%r13d - addl %r8d,%r12d + movl %r12d,12(%rsp) xorl %r9d,%r14d + andl %ebx,%edi - addl (%rbp,%rdi,4),%r12d - andl %ebx,%r15d - movl %r10d,%r8d + rorl $5,%r13d + addl %r8d,%r12d + xorl %edx,%edi rorl $11,%r14d xorl %ebx,%r13d - xorl %edx,%r15d + addl %edi,%r12d - xorl %r11d,%r8d + movl %r9d,%edi + addl (%rbp),%r12d xorl %r9d,%r14d - addl %r15d,%r12d - movl %r10d,%r15d + xorl %r10d,%edi rorl $6,%r13d - andl %r9d,%r8d - andl %r11d,%r15d + movl %r10d,%r8d + andl %edi,%r15d rorl $2,%r14d addl %r13d,%r12d - addl %r15d,%r8d + xorl %r15d,%r8d addl %r12d,%eax addl %r12d,%r8d - leaq 1(%rdi),%rdi - addl %r14d,%r8d + leaq 20(%rbp),%rbp + addl %r14d,%r8d movl 16(%rsi),%r12d movl %eax,%r13d movl %r8d,%r14d bswapl %r12d rorl $14,%r13d movl %ebx,%r15d - movl %r12d,16(%rsp) - rorl $9,%r14d xorl %eax,%r13d + rorl $9,%r14d xorl %ecx,%r15d - rorl $5,%r13d - addl %edx,%r12d + movl %r12d,16(%rsp) xorl %r8d,%r14d - - addl (%rbp,%rdi,4),%r12d andl %eax,%r15d - movl %r9d,%edx + + rorl $5,%r13d + addl %edx,%r12d + xorl %ecx,%r15d rorl $11,%r14d xorl %eax,%r13d - xorl %ecx,%r15d + addl %r15d,%r12d - xorl %r10d,%edx + movl %r8d,%r15d + addl (%rbp),%r12d xorl %r8d,%r14d - addl %r15d,%r12d - movl %r9d,%r15d + xorl %r9d,%r15d rorl $6,%r13d - andl %r8d,%edx - andl %r10d,%r15d + movl %r9d,%edx + andl %r15d,%edi rorl $2,%r14d addl %r13d,%r12d - addl %r15d,%edx + xorl %edi,%edx addl %r12d,%r11d addl %r12d,%edx - leaq 1(%rdi),%rdi - addl %r14d,%edx + leaq 4(%rbp),%rbp + addl %r14d,%edx movl 20(%rsi),%r12d movl %r11d,%r13d movl %edx,%r14d bswapl %r12d rorl $14,%r13d - movl %eax,%r15d - movl %r12d,20(%rsp) + movl %eax,%edi - rorl $9,%r14d xorl %r11d,%r13d - xorl %ebx,%r15d + rorl $9,%r14d + xorl %ebx,%edi - rorl $5,%r13d - addl %ecx,%r12d + movl %r12d,20(%rsp) xorl %edx,%r14d + andl %r11d,%edi - addl (%rbp,%rdi,4),%r12d - andl %r11d,%r15d - movl %r8d,%ecx + rorl $5,%r13d + addl %ecx,%r12d + xorl %ebx,%edi rorl $11,%r14d xorl %r11d,%r13d - xorl %ebx,%r15d + addl %edi,%r12d - xorl %r9d,%ecx + movl %edx,%edi + addl (%rbp),%r12d xorl %edx,%r14d - addl %r15d,%r12d - movl %r8d,%r15d + xorl %r8d,%edi rorl $6,%r13d - andl %edx,%ecx - andl %r9d,%r15d + movl %r8d,%ecx + andl %edi,%r15d rorl $2,%r14d addl %r13d,%r12d - addl %r15d,%ecx + xorl %r15d,%ecx addl %r12d,%r10d addl %r12d,%ecx - leaq 1(%rdi),%rdi - addl %r14d,%ecx + leaq 4(%rbp),%rbp + addl %r14d,%ecx movl 24(%rsi),%r12d movl %r10d,%r13d movl %ecx,%r14d bswapl %r12d rorl $14,%r13d movl %r11d,%r15d - movl %r12d,24(%rsp) - rorl $9,%r14d xorl %r10d,%r13d + rorl $9,%r14d xorl %eax,%r15d - rorl $5,%r13d - addl %ebx,%r12d + movl %r12d,24(%rsp) xorl %ecx,%r14d - - addl (%rbp,%rdi,4),%r12d andl %r10d,%r15d - movl %edx,%ebx + + rorl $5,%r13d + addl %ebx,%r12d + xorl %eax,%r15d rorl $11,%r14d xorl %r10d,%r13d - xorl %eax,%r15d + addl %r15d,%r12d - xorl %r8d,%ebx + movl %ecx,%r15d + addl (%rbp),%r12d xorl %ecx,%r14d - addl %r15d,%r12d - movl %edx,%r15d + xorl %edx,%r15d rorl $6,%r13d - andl %ecx,%ebx - andl %r8d,%r15d + movl %edx,%ebx + andl %r15d,%edi rorl $2,%r14d addl %r13d,%r12d - addl %r15d,%ebx + xorl %edi,%ebx addl %r12d,%r9d addl %r12d,%ebx - leaq 1(%rdi),%rdi - addl %r14d,%ebx + leaq 4(%rbp),%rbp + addl %r14d,%ebx movl 28(%rsi),%r12d movl %r9d,%r13d movl %ebx,%r14d bswapl %r12d rorl $14,%r13d - movl %r10d,%r15d - movl %r12d,28(%rsp) + movl %r10d,%edi - rorl $9,%r14d xorl %r9d,%r13d - xorl %r11d,%r15d + rorl $9,%r14d + xorl %r11d,%edi - rorl $5,%r13d - addl %eax,%r12d + movl %r12d,28(%rsp) xorl %ebx,%r14d + andl %r9d,%edi - addl (%rbp,%rdi,4),%r12d - andl %r9d,%r15d - movl %ecx,%eax + rorl $5,%r13d + addl %eax,%r12d + xorl %r11d,%edi rorl $11,%r14d xorl %r9d,%r13d - xorl %r11d,%r15d + addl %edi,%r12d - xorl %edx,%eax + movl %ebx,%edi + addl (%rbp),%r12d xorl %ebx,%r14d - addl %r15d,%r12d - movl %ecx,%r15d + xorl %ecx,%edi rorl $6,%r13d - andl %ebx,%eax - andl %edx,%r15d + movl %ecx,%eax + andl %edi,%r15d rorl $2,%r14d addl %r13d,%r12d - addl %r15d,%eax + xorl %r15d,%eax addl %r12d,%r8d addl %r12d,%eax - leaq 1(%rdi),%rdi - addl %r14d,%eax + leaq 20(%rbp),%rbp + addl %r14d,%eax movl 32(%rsi),%r12d movl %r8d,%r13d movl %eax,%r14d bswapl %r12d rorl $14,%r13d movl %r9d,%r15d - movl %r12d,32(%rsp) - rorl $9,%r14d xorl %r8d,%r13d + rorl $9,%r14d xorl %r10d,%r15d - rorl $5,%r13d - addl %r11d,%r12d + movl %r12d,32(%rsp) xorl %eax,%r14d - - addl (%rbp,%rdi,4),%r12d andl %r8d,%r15d - movl %ebx,%r11d + + rorl $5,%r13d + addl %r11d,%r12d + xorl %r10d,%r15d rorl $11,%r14d xorl %r8d,%r13d - xorl %r10d,%r15d + addl %r15d,%r12d - xorl %ecx,%r11d + movl %eax,%r15d + addl (%rbp),%r12d xorl %eax,%r14d - addl %r15d,%r12d - movl %ebx,%r15d + xorl %ebx,%r15d rorl $6,%r13d - andl %eax,%r11d - andl %ecx,%r15d + movl %ebx,%r11d + andl %r15d,%edi rorl $2,%r14d addl %r13d,%r12d - addl %r15d,%r11d + xorl %edi,%r11d addl %r12d,%edx addl %r12d,%r11d - leaq 1(%rdi),%rdi - addl %r14d,%r11d + leaq 4(%rbp),%rbp + addl %r14d,%r11d movl 36(%rsi),%r12d movl %edx,%r13d movl %r11d,%r14d bswapl %r12d rorl $14,%r13d - movl %r8d,%r15d - movl %r12d,36(%rsp) + movl %r8d,%edi - rorl $9,%r14d xorl %edx,%r13d - xorl %r9d,%r15d + rorl $9,%r14d + xorl %r9d,%edi - rorl $5,%r13d - addl %r10d,%r12d + movl %r12d,36(%rsp) xorl %r11d,%r14d + andl %edx,%edi - addl (%rbp,%rdi,4),%r12d - andl %edx,%r15d - movl %eax,%r10d + rorl $5,%r13d + addl %r10d,%r12d + xorl %r9d,%edi rorl $11,%r14d xorl %edx,%r13d - xorl %r9d,%r15d + addl %edi,%r12d - xorl %ebx,%r10d + movl %r11d,%edi + addl (%rbp),%r12d xorl %r11d,%r14d - addl %r15d,%r12d - movl %eax,%r15d + xorl %eax,%edi rorl $6,%r13d - andl %r11d,%r10d - andl %ebx,%r15d + movl %eax,%r10d + andl %edi,%r15d rorl $2,%r14d addl %r13d,%r12d - addl %r15d,%r10d + xorl %r15d,%r10d addl %r12d,%ecx addl %r12d,%r10d - leaq 1(%rdi),%rdi - addl %r14d,%r10d + leaq 4(%rbp),%rbp + addl %r14d,%r10d movl 40(%rsi),%r12d movl %ecx,%r13d movl %r10d,%r14d bswapl %r12d rorl $14,%r13d movl %edx,%r15d - movl %r12d,40(%rsp) - rorl $9,%r14d xorl %ecx,%r13d + rorl $9,%r14d xorl %r8d,%r15d - rorl $5,%r13d - addl %r9d,%r12d + movl %r12d,40(%rsp) xorl %r10d,%r14d - - addl (%rbp,%rdi,4),%r12d andl %ecx,%r15d - movl %r11d,%r9d + + rorl $5,%r13d + addl %r9d,%r12d + xorl %r8d,%r15d rorl $11,%r14d xorl %ecx,%r13d - xorl %r8d,%r15d + addl %r15d,%r12d - xorl %eax,%r9d + movl %r10d,%r15d + addl (%rbp),%r12d xorl %r10d,%r14d - addl %r15d,%r12d - movl %r11d,%r15d + xorl %r11d,%r15d rorl $6,%r13d - andl %r10d,%r9d - andl %eax,%r15d + movl %r11d,%r9d + andl %r15d,%edi rorl $2,%r14d addl %r13d,%r12d - addl %r15d,%r9d + xorl %edi,%r9d addl %r12d,%ebx addl %r12d,%r9d - leaq 1(%rdi),%rdi - addl %r14d,%r9d + leaq 4(%rbp),%rbp + addl %r14d,%r9d movl 44(%rsi),%r12d movl %ebx,%r13d movl %r9d,%r14d bswapl %r12d rorl $14,%r13d - movl %ecx,%r15d - movl %r12d,44(%rsp) + movl %ecx,%edi - rorl $9,%r14d xorl %ebx,%r13d - xorl %edx,%r15d + rorl $9,%r14d + xorl %edx,%edi - rorl $5,%r13d - addl %r8d,%r12d + movl %r12d,44(%rsp) xorl %r9d,%r14d + andl %ebx,%edi - addl (%rbp,%rdi,4),%r12d - andl %ebx,%r15d - movl %r10d,%r8d + rorl $5,%r13d + addl %r8d,%r12d + xorl %edx,%edi rorl $11,%r14d xorl %ebx,%r13d - xorl %edx,%r15d + addl %edi,%r12d - xorl %r11d,%r8d + movl %r9d,%edi + addl (%rbp),%r12d xorl %r9d,%r14d - addl %r15d,%r12d - movl %r10d,%r15d + xorl %r10d,%edi rorl $6,%r13d - andl %r9d,%r8d - andl %r11d,%r15d + movl %r10d,%r8d + andl %edi,%r15d rorl $2,%r14d addl %r13d,%r12d - addl %r15d,%r8d + xorl %r15d,%r8d addl %r12d,%eax addl %r12d,%r8d - leaq 1(%rdi),%rdi - addl %r14d,%r8d + leaq 20(%rbp),%rbp + addl %r14d,%r8d movl 48(%rsi),%r12d movl %eax,%r13d movl %r8d,%r14d bswapl %r12d rorl $14,%r13d movl %ebx,%r15d - movl %r12d,48(%rsp) - rorl $9,%r14d xorl %eax,%r13d + rorl $9,%r14d xorl %ecx,%r15d - rorl $5,%r13d - addl %edx,%r12d + movl %r12d,48(%rsp) xorl %r8d,%r14d - - addl (%rbp,%rdi,4),%r12d andl %eax,%r15d - movl %r9d,%edx - rorl $11,%r14d - xorl %eax,%r13d + rorl $5,%r13d + addl %edx,%r12d xorl %ecx,%r15d - xorl %r10d,%edx - xorl %r8d,%r14d + rorl $11,%r14d + xorl %eax,%r13d addl %r15d,%r12d - movl %r9d,%r15d + movl %r8d,%r15d + addl (%rbp),%r12d + xorl %r8d,%r14d + + xorl %r9d,%r15d rorl $6,%r13d - andl %r8d,%edx - andl %r10d,%r15d + movl %r9d,%edx + andl %r15d,%edi rorl $2,%r14d addl %r13d,%r12d - addl %r15d,%edx + xorl %edi,%edx addl %r12d,%r11d addl %r12d,%edx - leaq 1(%rdi),%rdi - addl %r14d,%edx + leaq 4(%rbp),%rbp + addl %r14d,%edx movl 52(%rsi),%r12d movl %r11d,%r13d movl %edx,%r14d bswapl %r12d rorl $14,%r13d - movl %eax,%r15d - movl %r12d,52(%rsp) + movl %eax,%edi - rorl $9,%r14d xorl %r11d,%r13d - xorl %ebx,%r15d + rorl $9,%r14d + xorl %ebx,%edi - rorl $5,%r13d - addl %ecx,%r12d + movl %r12d,52(%rsp) xorl %edx,%r14d + andl %r11d,%edi - addl (%rbp,%rdi,4),%r12d - andl %r11d,%r15d - movl %r8d,%ecx + rorl $5,%r13d + addl %ecx,%r12d + xorl %ebx,%edi rorl $11,%r14d xorl %r11d,%r13d - xorl %ebx,%r15d + addl %edi,%r12d - xorl %r9d,%ecx + movl %edx,%edi + addl (%rbp),%r12d xorl %edx,%r14d - addl %r15d,%r12d - movl %r8d,%r15d + xorl %r8d,%edi rorl $6,%r13d - andl %edx,%ecx - andl %r9d,%r15d + movl %r8d,%ecx + andl %edi,%r15d rorl $2,%r14d addl %r13d,%r12d - addl %r15d,%ecx + xorl %r15d,%ecx addl %r12d,%r10d addl %r12d,%ecx - leaq 1(%rdi),%rdi - addl %r14d,%ecx + leaq 4(%rbp),%rbp + addl %r14d,%ecx movl 56(%rsi),%r12d movl %r10d,%r13d movl %ecx,%r14d bswapl %r12d rorl $14,%r13d movl %r11d,%r15d - movl %r12d,56(%rsp) - rorl $9,%r14d xorl %r10d,%r13d + rorl $9,%r14d xorl %eax,%r15d - rorl $5,%r13d - addl %ebx,%r12d + movl %r12d,56(%rsp) xorl %ecx,%r14d - - addl (%rbp,%rdi,4),%r12d andl %r10d,%r15d - movl %edx,%ebx + + rorl $5,%r13d + addl %ebx,%r12d + xorl %eax,%r15d rorl $11,%r14d xorl %r10d,%r13d - xorl %eax,%r15d + addl %r15d,%r12d - xorl %r8d,%ebx + movl %ecx,%r15d + addl (%rbp),%r12d xorl %ecx,%r14d - addl %r15d,%r12d - movl %edx,%r15d + xorl %edx,%r15d rorl $6,%r13d - andl %ecx,%ebx - andl %r8d,%r15d + movl %edx,%ebx + andl %r15d,%edi rorl $2,%r14d addl %r13d,%r12d - addl %r15d,%ebx + xorl %edi,%ebx addl %r12d,%r9d addl %r12d,%ebx - leaq 1(%rdi),%rdi - addl %r14d,%ebx + leaq 4(%rbp),%rbp + addl %r14d,%ebx movl 60(%rsi),%r12d movl %r9d,%r13d movl %ebx,%r14d bswapl %r12d rorl $14,%r13d - movl %r10d,%r15d - movl %r12d,60(%rsp) + movl %r10d,%edi - rorl $9,%r14d xorl %r9d,%r13d - xorl %r11d,%r15d + rorl $9,%r14d + xorl %r11d,%edi - rorl $5,%r13d - addl %eax,%r12d + movl %r12d,60(%rsp) xorl %ebx,%r14d + andl %r9d,%edi - addl (%rbp,%rdi,4),%r12d - andl %r9d,%r15d - movl %ecx,%eax + rorl $5,%r13d + addl %eax,%r12d + xorl %r11d,%edi rorl $11,%r14d xorl %r9d,%r13d - xorl %r11d,%r15d + addl %edi,%r12d - xorl %edx,%eax + movl %ebx,%edi + addl (%rbp),%r12d xorl %ebx,%r14d - addl %r15d,%r12d - movl %ecx,%r15d + xorl %ecx,%edi rorl $6,%r13d - andl %ebx,%eax - andl %edx,%r15d + movl %ecx,%eax + andl %edi,%r15d rorl $2,%r14d addl %r13d,%r12d - addl %r15d,%eax + xorl %r15d,%eax addl %r12d,%r8d addl %r12d,%eax - leaq 1(%rdi),%rdi - addl %r14d,%eax + leaq 20(%rbp),%rbp jmp L$rounds_16_xx .p2align 4 L$rounds_16_xx: movl 4(%rsp),%r13d - movl 56(%rsp),%r14d - movl %r13d,%r12d - movl %r14d,%r15d + movl 56(%rsp),%r15d - rorl $11,%r12d - xorl %r13d,%r12d - shrl $3,%r13d + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%eax + movl %r15d,%r14d + rorl $2,%r15d - rorl $7,%r12d xorl %r12d,%r13d - movl 36(%rsp),%r12d - - rorl $2,%r15d + shrl $3,%r12d + rorl $7,%r13d xorl %r14d,%r15d shrl $10,%r14d rorl $17,%r15d - addl %r13d,%r12d - xorl %r15d,%r14d + xorl %r13d,%r12d + xorl %r14d,%r15d + addl 36(%rsp),%r12d addl 0(%rsp),%r12d movl %r8d,%r13d - addl %r14d,%r12d + addl %r15d,%r12d movl %eax,%r14d rorl $14,%r13d movl %r9d,%r15d - movl %r12d,0(%rsp) - rorl $9,%r14d xorl %r8d,%r13d + rorl $9,%r14d xorl %r10d,%r15d - rorl $5,%r13d - addl %r11d,%r12d + movl %r12d,0(%rsp) xorl %eax,%r14d - - addl (%rbp,%rdi,4),%r12d andl %r8d,%r15d - movl %ebx,%r11d + + rorl $5,%r13d + addl %r11d,%r12d + xorl %r10d,%r15d rorl $11,%r14d xorl %r8d,%r13d - xorl %r10d,%r15d + addl %r15d,%r12d - xorl %ecx,%r11d + movl %eax,%r15d + addl (%rbp),%r12d xorl %eax,%r14d - addl %r15d,%r12d - movl %ebx,%r15d + xorl %ebx,%r15d rorl $6,%r13d - andl %eax,%r11d - andl %ecx,%r15d + movl %ebx,%r11d + andl %r15d,%edi rorl $2,%r14d addl %r13d,%r12d - addl %r15d,%r11d + xorl %edi,%r11d addl %r12d,%edx addl %r12d,%r11d - leaq 1(%rdi),%rdi - addl %r14d,%r11d + leaq 4(%rbp),%rbp movl 8(%rsp),%r13d - movl 60(%rsp),%r14d - movl %r13d,%r12d - movl %r14d,%r15d + movl 60(%rsp),%edi - rorl $11,%r12d - xorl %r13d,%r12d - shrl $3,%r13d + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%r11d + movl %edi,%r14d + rorl $2,%edi - rorl $7,%r12d xorl %r12d,%r13d - movl 40(%rsp),%r12d - - rorl $2,%r15d - xorl %r14d,%r15d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%edi shrl $10,%r14d - rorl $17,%r15d - addl %r13d,%r12d - xorl %r15d,%r14d + rorl $17,%edi + xorl %r13d,%r12d + xorl %r14d,%edi + addl 40(%rsp),%r12d addl 4(%rsp),%r12d movl %edx,%r13d - addl %r14d,%r12d + addl %edi,%r12d movl %r11d,%r14d rorl $14,%r13d - movl %r8d,%r15d - movl %r12d,4(%rsp) + movl %r8d,%edi - rorl $9,%r14d xorl %edx,%r13d - xorl %r9d,%r15d + rorl $9,%r14d + xorl %r9d,%edi - rorl $5,%r13d - addl %r10d,%r12d + movl %r12d,4(%rsp) xorl %r11d,%r14d + andl %edx,%edi - addl (%rbp,%rdi,4),%r12d - andl %edx,%r15d - movl %eax,%r10d + rorl $5,%r13d + addl %r10d,%r12d + xorl %r9d,%edi rorl $11,%r14d xorl %edx,%r13d - xorl %r9d,%r15d + addl %edi,%r12d - xorl %ebx,%r10d + movl %r11d,%edi + addl (%rbp),%r12d xorl %r11d,%r14d - addl %r15d,%r12d - movl %eax,%r15d + xorl %eax,%edi rorl $6,%r13d - andl %r11d,%r10d - andl %ebx,%r15d + movl %eax,%r10d + andl %edi,%r15d rorl $2,%r14d addl %r13d,%r12d - addl %r15d,%r10d + xorl %r15d,%r10d addl %r12d,%ecx addl %r12d,%r10d - leaq 1(%rdi),%rdi - addl %r14d,%r10d + leaq 4(%rbp),%rbp movl 12(%rsp),%r13d - movl 0(%rsp),%r14d - movl %r13d,%r12d - movl %r14d,%r15d + movl 0(%rsp),%r15d - rorl $11,%r12d - xorl %r13d,%r12d - shrl $3,%r13d + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%r10d + movl %r15d,%r14d + rorl $2,%r15d - rorl $7,%r12d xorl %r12d,%r13d - movl 44(%rsp),%r12d - - rorl $2,%r15d + shrl $3,%r12d + rorl $7,%r13d xorl %r14d,%r15d shrl $10,%r14d rorl $17,%r15d - addl %r13d,%r12d - xorl %r15d,%r14d + xorl %r13d,%r12d + xorl %r14d,%r15d + addl 44(%rsp),%r12d addl 8(%rsp),%r12d movl %ecx,%r13d - addl %r14d,%r12d + addl %r15d,%r12d movl %r10d,%r14d rorl $14,%r13d movl %edx,%r15d - movl %r12d,8(%rsp) - rorl $9,%r14d xorl %ecx,%r13d + rorl $9,%r14d xorl %r8d,%r15d - rorl $5,%r13d - addl %r9d,%r12d + movl %r12d,8(%rsp) xorl %r10d,%r14d - - addl (%rbp,%rdi,4),%r12d andl %ecx,%r15d - movl %r11d,%r9d + + rorl $5,%r13d + addl %r9d,%r12d + xorl %r8d,%r15d rorl $11,%r14d xorl %ecx,%r13d - xorl %r8d,%r15d + addl %r15d,%r12d - xorl %eax,%r9d + movl %r10d,%r15d + addl (%rbp),%r12d xorl %r10d,%r14d - addl %r15d,%r12d - movl %r11d,%r15d + xorl %r11d,%r15d rorl $6,%r13d - andl %r10d,%r9d - andl %eax,%r15d + movl %r11d,%r9d + andl %r15d,%edi rorl $2,%r14d addl %r13d,%r12d - addl %r15d,%r9d + xorl %edi,%r9d addl %r12d,%ebx addl %r12d,%r9d - leaq 1(%rdi),%rdi - addl %r14d,%r9d + leaq 4(%rbp),%rbp movl 16(%rsp),%r13d - movl 4(%rsp),%r14d - movl %r13d,%r12d - movl %r14d,%r15d + movl 4(%rsp),%edi - rorl $11,%r12d - xorl %r13d,%r12d - shrl $3,%r13d + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%r9d + movl %edi,%r14d + rorl $2,%edi - rorl $7,%r12d xorl %r12d,%r13d - movl 48(%rsp),%r12d - - rorl $2,%r15d - xorl %r14d,%r15d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%edi shrl $10,%r14d - rorl $17,%r15d - addl %r13d,%r12d - xorl %r15d,%r14d + rorl $17,%edi + xorl %r13d,%r12d + xorl %r14d,%edi + addl 48(%rsp),%r12d addl 12(%rsp),%r12d movl %ebx,%r13d - addl %r14d,%r12d + addl %edi,%r12d movl %r9d,%r14d rorl $14,%r13d - movl %ecx,%r15d - movl %r12d,12(%rsp) + movl %ecx,%edi - rorl $9,%r14d xorl %ebx,%r13d - xorl %edx,%r15d + rorl $9,%r14d + xorl %edx,%edi - rorl $5,%r13d - addl %r8d,%r12d + movl %r12d,12(%rsp) xorl %r9d,%r14d + andl %ebx,%edi - addl (%rbp,%rdi,4),%r12d - andl %ebx,%r15d - movl %r10d,%r8d + rorl $5,%r13d + addl %r8d,%r12d + xorl %edx,%edi rorl $11,%r14d xorl %ebx,%r13d - xorl %edx,%r15d + addl %edi,%r12d - xorl %r11d,%r8d + movl %r9d,%edi + addl (%rbp),%r12d xorl %r9d,%r14d - addl %r15d,%r12d - movl %r10d,%r15d + xorl %r10d,%edi rorl $6,%r13d - andl %r9d,%r8d - andl %r11d,%r15d + movl %r10d,%r8d + andl %edi,%r15d rorl $2,%r14d addl %r13d,%r12d - addl %r15d,%r8d + xorl %r15d,%r8d addl %r12d,%eax addl %r12d,%r8d - leaq 1(%rdi),%rdi - addl %r14d,%r8d + leaq 20(%rbp),%rbp movl 20(%rsp),%r13d - movl 8(%rsp),%r14d - movl %r13d,%r12d - movl %r14d,%r15d + movl 8(%rsp),%r15d - rorl $11,%r12d - xorl %r13d,%r12d - shrl $3,%r13d + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%r8d + movl %r15d,%r14d + rorl $2,%r15d - rorl $7,%r12d xorl %r12d,%r13d - movl 52(%rsp),%r12d - - rorl $2,%r15d + shrl $3,%r12d + rorl $7,%r13d xorl %r14d,%r15d shrl $10,%r14d rorl $17,%r15d - addl %r13d,%r12d - xorl %r15d,%r14d + xorl %r13d,%r12d + xorl %r14d,%r15d + addl 52(%rsp),%r12d addl 16(%rsp),%r12d movl %eax,%r13d - addl %r14d,%r12d + addl %r15d,%r12d movl %r8d,%r14d rorl $14,%r13d movl %ebx,%r15d - movl %r12d,16(%rsp) - rorl $9,%r14d xorl %eax,%r13d + rorl $9,%r14d xorl %ecx,%r15d - rorl $5,%r13d - addl %edx,%r12d + movl %r12d,16(%rsp) xorl %r8d,%r14d - - addl (%rbp,%rdi,4),%r12d andl %eax,%r15d - movl %r9d,%edx + + rorl $5,%r13d + addl %edx,%r12d + xorl %ecx,%r15d rorl $11,%r14d xorl %eax,%r13d - xorl %ecx,%r15d + addl %r15d,%r12d - xorl %r10d,%edx + movl %r8d,%r15d + addl (%rbp),%r12d xorl %r8d,%r14d - addl %r15d,%r12d - movl %r9d,%r15d + xorl %r9d,%r15d rorl $6,%r13d - andl %r8d,%edx - andl %r10d,%r15d + movl %r9d,%edx + andl %r15d,%edi rorl $2,%r14d addl %r13d,%r12d - addl %r15d,%edx + xorl %edi,%edx addl %r12d,%r11d addl %r12d,%edx - leaq 1(%rdi),%rdi - addl %r14d,%edx + leaq 4(%rbp),%rbp movl 24(%rsp),%r13d - movl 12(%rsp),%r14d - movl %r13d,%r12d - movl %r14d,%r15d + movl 12(%rsp),%edi - rorl $11,%r12d - xorl %r13d,%r12d - shrl $3,%r13d + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%edx + movl %edi,%r14d + rorl $2,%edi - rorl $7,%r12d xorl %r12d,%r13d - movl 56(%rsp),%r12d - - rorl $2,%r15d - xorl %r14d,%r15d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%edi shrl $10,%r14d - rorl $17,%r15d - addl %r13d,%r12d - xorl %r15d,%r14d + rorl $17,%edi + xorl %r13d,%r12d + xorl %r14d,%edi + addl 56(%rsp),%r12d addl 20(%rsp),%r12d movl %r11d,%r13d - addl %r14d,%r12d + addl %edi,%r12d movl %edx,%r14d rorl $14,%r13d - movl %eax,%r15d - movl %r12d,20(%rsp) + movl %eax,%edi - rorl $9,%r14d xorl %r11d,%r13d - xorl %ebx,%r15d + rorl $9,%r14d + xorl %ebx,%edi - rorl $5,%r13d - addl %ecx,%r12d + movl %r12d,20(%rsp) xorl %edx,%r14d + andl %r11d,%edi - addl (%rbp,%rdi,4),%r12d - andl %r11d,%r15d - movl %r8d,%ecx + rorl $5,%r13d + addl %ecx,%r12d + xorl %ebx,%edi rorl $11,%r14d xorl %r11d,%r13d - xorl %ebx,%r15d + addl %edi,%r12d - xorl %r9d,%ecx + movl %edx,%edi + addl (%rbp),%r12d xorl %edx,%r14d - addl %r15d,%r12d - movl %r8d,%r15d + xorl %r8d,%edi rorl $6,%r13d - andl %edx,%ecx - andl %r9d,%r15d + movl %r8d,%ecx + andl %edi,%r15d rorl $2,%r14d addl %r13d,%r12d - addl %r15d,%ecx + xorl %r15d,%ecx addl %r12d,%r10d addl %r12d,%ecx - leaq 1(%rdi),%rdi - addl %r14d,%ecx + leaq 4(%rbp),%rbp movl 28(%rsp),%r13d - movl 16(%rsp),%r14d - movl %r13d,%r12d - movl %r14d,%r15d + movl 16(%rsp),%r15d - rorl $11,%r12d - xorl %r13d,%r12d - shrl $3,%r13d + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%ecx + movl %r15d,%r14d + rorl $2,%r15d - rorl $7,%r12d xorl %r12d,%r13d - movl 60(%rsp),%r12d - - rorl $2,%r15d + shrl $3,%r12d + rorl $7,%r13d xorl %r14d,%r15d shrl $10,%r14d rorl $17,%r15d - addl %r13d,%r12d - xorl %r15d,%r14d + xorl %r13d,%r12d + xorl %r14d,%r15d + addl 60(%rsp),%r12d addl 24(%rsp),%r12d movl %r10d,%r13d - addl %r14d,%r12d + addl %r15d,%r12d movl %ecx,%r14d rorl $14,%r13d movl %r11d,%r15d - movl %r12d,24(%rsp) - rorl $9,%r14d xorl %r10d,%r13d + rorl $9,%r14d xorl %eax,%r15d - rorl $5,%r13d - addl %ebx,%r12d + movl %r12d,24(%rsp) xorl %ecx,%r14d - - addl (%rbp,%rdi,4),%r12d andl %r10d,%r15d - movl %edx,%ebx + + rorl $5,%r13d + addl %ebx,%r12d + xorl %eax,%r15d rorl $11,%r14d xorl %r10d,%r13d - xorl %eax,%r15d + addl %r15d,%r12d - xorl %r8d,%ebx + movl %ecx,%r15d + addl (%rbp),%r12d xorl %ecx,%r14d - addl %r15d,%r12d - movl %edx,%r15d + xorl %edx,%r15d rorl $6,%r13d - andl %ecx,%ebx - andl %r8d,%r15d + movl %edx,%ebx + andl %r15d,%edi rorl $2,%r14d addl %r13d,%r12d - addl %r15d,%ebx + xorl %edi,%ebx addl %r12d,%r9d addl %r12d,%ebx - leaq 1(%rdi),%rdi - addl %r14d,%ebx + leaq 4(%rbp),%rbp movl 32(%rsp),%r13d - movl 20(%rsp),%r14d - movl %r13d,%r12d - movl %r14d,%r15d + movl 20(%rsp),%edi - rorl $11,%r12d - xorl %r13d,%r12d - shrl $3,%r13d + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%ebx + movl %edi,%r14d + rorl $2,%edi - rorl $7,%r12d xorl %r12d,%r13d - movl 0(%rsp),%r12d - - rorl $2,%r15d - xorl %r14d,%r15d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%edi shrl $10,%r14d - rorl $17,%r15d - addl %r13d,%r12d - xorl %r15d,%r14d + rorl $17,%edi + xorl %r13d,%r12d + xorl %r14d,%edi + addl 0(%rsp),%r12d addl 28(%rsp),%r12d movl %r9d,%r13d - addl %r14d,%r12d + addl %edi,%r12d movl %ebx,%r14d rorl $14,%r13d - movl %r10d,%r15d - movl %r12d,28(%rsp) + movl %r10d,%edi - rorl $9,%r14d xorl %r9d,%r13d - xorl %r11d,%r15d + rorl $9,%r14d + xorl %r11d,%edi - rorl $5,%r13d - addl %eax,%r12d + movl %r12d,28(%rsp) xorl %ebx,%r14d + andl %r9d,%edi - addl (%rbp,%rdi,4),%r12d - andl %r9d,%r15d - movl %ecx,%eax + rorl $5,%r13d + addl %eax,%r12d + xorl %r11d,%edi rorl $11,%r14d xorl %r9d,%r13d - xorl %r11d,%r15d + addl %edi,%r12d - xorl %edx,%eax + movl %ebx,%edi + addl (%rbp),%r12d xorl %ebx,%r14d - addl %r15d,%r12d - movl %ecx,%r15d + xorl %ecx,%edi rorl $6,%r13d - andl %ebx,%eax - andl %edx,%r15d + movl %ecx,%eax + andl %edi,%r15d rorl $2,%r14d addl %r13d,%r12d - addl %r15d,%eax + xorl %r15d,%eax addl %r12d,%r8d addl %r12d,%eax - leaq 1(%rdi),%rdi - addl %r14d,%eax + leaq 20(%rbp),%rbp movl 36(%rsp),%r13d - movl 24(%rsp),%r14d - movl %r13d,%r12d - movl %r14d,%r15d + movl 24(%rsp),%r15d - rorl $11,%r12d - xorl %r13d,%r12d - shrl $3,%r13d + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%eax + movl %r15d,%r14d + rorl $2,%r15d - rorl $7,%r12d xorl %r12d,%r13d - movl 4(%rsp),%r12d - - rorl $2,%r15d + shrl $3,%r12d + rorl $7,%r13d xorl %r14d,%r15d shrl $10,%r14d rorl $17,%r15d - addl %r13d,%r12d - xorl %r15d,%r14d + xorl %r13d,%r12d + xorl %r14d,%r15d + addl 4(%rsp),%r12d addl 32(%rsp),%r12d movl %r8d,%r13d - addl %r14d,%r12d + addl %r15d,%r12d movl %eax,%r14d rorl $14,%r13d movl %r9d,%r15d - movl %r12d,32(%rsp) - rorl $9,%r14d xorl %r8d,%r13d + rorl $9,%r14d xorl %r10d,%r15d - rorl $5,%r13d - addl %r11d,%r12d + movl %r12d,32(%rsp) xorl %eax,%r14d - - addl (%rbp,%rdi,4),%r12d andl %r8d,%r15d - movl %ebx,%r11d + + rorl $5,%r13d + addl %r11d,%r12d + xorl %r10d,%r15d rorl $11,%r14d xorl %r8d,%r13d - xorl %r10d,%r15d + addl %r15d,%r12d - xorl %ecx,%r11d + movl %eax,%r15d + addl (%rbp),%r12d xorl %eax,%r14d - addl %r15d,%r12d - movl %ebx,%r15d + xorl %ebx,%r15d rorl $6,%r13d - andl %eax,%r11d - andl %ecx,%r15d + movl %ebx,%r11d + andl %r15d,%edi rorl $2,%r14d addl %r13d,%r12d - addl %r15d,%r11d + xorl %edi,%r11d addl %r12d,%edx addl %r12d,%r11d - leaq 1(%rdi),%rdi - addl %r14d,%r11d + leaq 4(%rbp),%rbp movl 40(%rsp),%r13d - movl 28(%rsp),%r14d - movl %r13d,%r12d - movl %r14d,%r15d + movl 28(%rsp),%edi - rorl $11,%r12d - xorl %r13d,%r12d - shrl $3,%r13d + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%r11d + movl %edi,%r14d + rorl $2,%edi - rorl $7,%r12d xorl %r12d,%r13d - movl 8(%rsp),%r12d - - rorl $2,%r15d - xorl %r14d,%r15d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%edi shrl $10,%r14d - rorl $17,%r15d - addl %r13d,%r12d - xorl %r15d,%r14d + rorl $17,%edi + xorl %r13d,%r12d + xorl %r14d,%edi + addl 8(%rsp),%r12d addl 36(%rsp),%r12d movl %edx,%r13d - addl %r14d,%r12d + addl %edi,%r12d movl %r11d,%r14d rorl $14,%r13d - movl %r8d,%r15d - movl %r12d,36(%rsp) + movl %r8d,%edi - rorl $9,%r14d xorl %edx,%r13d - xorl %r9d,%r15d + rorl $9,%r14d + xorl %r9d,%edi - rorl $5,%r13d - addl %r10d,%r12d + movl %r12d,36(%rsp) xorl %r11d,%r14d + andl %edx,%edi - addl (%rbp,%rdi,4),%r12d - andl %edx,%r15d - movl %eax,%r10d + rorl $5,%r13d + addl %r10d,%r12d + xorl %r9d,%edi rorl $11,%r14d xorl %edx,%r13d - xorl %r9d,%r15d + addl %edi,%r12d - xorl %ebx,%r10d + movl %r11d,%edi + addl (%rbp),%r12d xorl %r11d,%r14d - addl %r15d,%r12d - movl %eax,%r15d + xorl %eax,%edi rorl $6,%r13d - andl %r11d,%r10d - andl %ebx,%r15d + movl %eax,%r10d + andl %edi,%r15d rorl $2,%r14d addl %r13d,%r12d - addl %r15d,%r10d + xorl %r15d,%r10d addl %r12d,%ecx addl %r12d,%r10d - leaq 1(%rdi),%rdi - addl %r14d,%r10d + leaq 4(%rbp),%rbp movl 44(%rsp),%r13d - movl 32(%rsp),%r14d - movl %r13d,%r12d - movl %r14d,%r15d + movl 32(%rsp),%r15d - rorl $11,%r12d - xorl %r13d,%r12d - shrl $3,%r13d + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%r10d + movl %r15d,%r14d + rorl $2,%r15d - rorl $7,%r12d xorl %r12d,%r13d - movl 12(%rsp),%r12d - - rorl $2,%r15d + shrl $3,%r12d + rorl $7,%r13d xorl %r14d,%r15d shrl $10,%r14d rorl $17,%r15d - addl %r13d,%r12d - xorl %r15d,%r14d + xorl %r13d,%r12d + xorl %r14d,%r15d + addl 12(%rsp),%r12d addl 40(%rsp),%r12d movl %ecx,%r13d - addl %r14d,%r12d + addl %r15d,%r12d movl %r10d,%r14d rorl $14,%r13d movl %edx,%r15d - movl %r12d,40(%rsp) - rorl $9,%r14d xorl %ecx,%r13d + rorl $9,%r14d xorl %r8d,%r15d - rorl $5,%r13d - addl %r9d,%r12d + movl %r12d,40(%rsp) xorl %r10d,%r14d - - addl (%rbp,%rdi,4),%r12d andl %ecx,%r15d - movl %r11d,%r9d + + rorl $5,%r13d + addl %r9d,%r12d + xorl %r8d,%r15d rorl $11,%r14d xorl %ecx,%r13d - xorl %r8d,%r15d + addl %r15d,%r12d - xorl %eax,%r9d + movl %r10d,%r15d + addl (%rbp),%r12d xorl %r10d,%r14d - addl %r15d,%r12d - movl %r11d,%r15d + xorl %r11d,%r15d rorl $6,%r13d - andl %r10d,%r9d - andl %eax,%r15d + movl %r11d,%r9d + andl %r15d,%edi rorl $2,%r14d addl %r13d,%r12d - addl %r15d,%r9d + xorl %edi,%r9d addl %r12d,%ebx addl %r12d,%r9d - leaq 1(%rdi),%rdi - addl %r14d,%r9d + leaq 4(%rbp),%rbp movl 48(%rsp),%r13d - movl 36(%rsp),%r14d - movl %r13d,%r12d - movl %r14d,%r15d + movl 36(%rsp),%edi - rorl $11,%r12d - xorl %r13d,%r12d - shrl $3,%r13d + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%r9d + movl %edi,%r14d + rorl $2,%edi - rorl $7,%r12d xorl %r12d,%r13d - movl 16(%rsp),%r12d - - rorl $2,%r15d - xorl %r14d,%r15d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%edi shrl $10,%r14d - rorl $17,%r15d - addl %r13d,%r12d - xorl %r15d,%r14d + rorl $17,%edi + xorl %r13d,%r12d + xorl %r14d,%edi + addl 16(%rsp),%r12d addl 44(%rsp),%r12d movl %ebx,%r13d - addl %r14d,%r12d + addl %edi,%r12d movl %r9d,%r14d rorl $14,%r13d - movl %ecx,%r15d - movl %r12d,44(%rsp) + movl %ecx,%edi - rorl $9,%r14d xorl %ebx,%r13d - xorl %edx,%r15d + rorl $9,%r14d + xorl %edx,%edi - rorl $5,%r13d - addl %r8d,%r12d + movl %r12d,44(%rsp) xorl %r9d,%r14d + andl %ebx,%edi - addl (%rbp,%rdi,4),%r12d - andl %ebx,%r15d - movl %r10d,%r8d + rorl $5,%r13d + addl %r8d,%r12d + xorl %edx,%edi rorl $11,%r14d xorl %ebx,%r13d - xorl %edx,%r15d + addl %edi,%r12d - xorl %r11d,%r8d + movl %r9d,%edi + addl (%rbp),%r12d xorl %r9d,%r14d - addl %r15d,%r12d - movl %r10d,%r15d + xorl %r10d,%edi rorl $6,%r13d - andl %r9d,%r8d - andl %r11d,%r15d + movl %r10d,%r8d + andl %edi,%r15d rorl $2,%r14d addl %r13d,%r12d - addl %r15d,%r8d + xorl %r15d,%r8d addl %r12d,%eax addl %r12d,%r8d - leaq 1(%rdi),%rdi - addl %r14d,%r8d + leaq 20(%rbp),%rbp movl 52(%rsp),%r13d - movl 40(%rsp),%r14d - movl %r13d,%r12d - movl %r14d,%r15d + movl 40(%rsp),%r15d - rorl $11,%r12d - xorl %r13d,%r12d - shrl $3,%r13d + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%r8d + movl %r15d,%r14d + rorl $2,%r15d - rorl $7,%r12d xorl %r12d,%r13d - movl 20(%rsp),%r12d - - rorl $2,%r15d + shrl $3,%r12d + rorl $7,%r13d xorl %r14d,%r15d shrl $10,%r14d rorl $17,%r15d - addl %r13d,%r12d - xorl %r15d,%r14d + xorl %r13d,%r12d + xorl %r14d,%r15d + addl 20(%rsp),%r12d addl 48(%rsp),%r12d movl %eax,%r13d - addl %r14d,%r12d + addl %r15d,%r12d movl %r8d,%r14d rorl $14,%r13d movl %ebx,%r15d - movl %r12d,48(%rsp) - rorl $9,%r14d xorl %eax,%r13d + rorl $9,%r14d xorl %ecx,%r15d - rorl $5,%r13d - addl %edx,%r12d + movl %r12d,48(%rsp) xorl %r8d,%r14d - - addl (%rbp,%rdi,4),%r12d andl %eax,%r15d - movl %r9d,%edx + + rorl $5,%r13d + addl %edx,%r12d + xorl %ecx,%r15d rorl $11,%r14d xorl %eax,%r13d - xorl %ecx,%r15d + addl %r15d,%r12d - xorl %r10d,%edx + movl %r8d,%r15d + addl (%rbp),%r12d xorl %r8d,%r14d - addl %r15d,%r12d - movl %r9d,%r15d + xorl %r9d,%r15d rorl $6,%r13d - andl %r8d,%edx - andl %r10d,%r15d + movl %r9d,%edx + andl %r15d,%edi rorl $2,%r14d addl %r13d,%r12d - addl %r15d,%edx + xorl %edi,%edx addl %r12d,%r11d addl %r12d,%edx - leaq 1(%rdi),%rdi - addl %r14d,%edx + leaq 4(%rbp),%rbp movl 56(%rsp),%r13d - movl 44(%rsp),%r14d - movl %r13d,%r12d - movl %r14d,%r15d + movl 44(%rsp),%edi - rorl $11,%r12d - xorl %r13d,%r12d - shrl $3,%r13d + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%edx + movl %edi,%r14d + rorl $2,%edi - rorl $7,%r12d xorl %r12d,%r13d - movl 24(%rsp),%r12d - - rorl $2,%r15d - xorl %r14d,%r15d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%edi shrl $10,%r14d - rorl $17,%r15d - addl %r13d,%r12d - xorl %r15d,%r14d + rorl $17,%edi + xorl %r13d,%r12d + xorl %r14d,%edi + addl 24(%rsp),%r12d addl 52(%rsp),%r12d movl %r11d,%r13d - addl %r14d,%r12d + addl %edi,%r12d movl %edx,%r14d rorl $14,%r13d - movl %eax,%r15d - movl %r12d,52(%rsp) + movl %eax,%edi - rorl $9,%r14d xorl %r11d,%r13d - xorl %ebx,%r15d + rorl $9,%r14d + xorl %ebx,%edi - rorl $5,%r13d - addl %ecx,%r12d + movl %r12d,52(%rsp) xorl %edx,%r14d + andl %r11d,%edi - addl (%rbp,%rdi,4),%r12d - andl %r11d,%r15d - movl %r8d,%ecx + rorl $5,%r13d + addl %ecx,%r12d + xorl %ebx,%edi rorl $11,%r14d xorl %r11d,%r13d - xorl %ebx,%r15d + addl %edi,%r12d - xorl %r9d,%ecx + movl %edx,%edi + addl (%rbp),%r12d xorl %edx,%r14d - addl %r15d,%r12d - movl %r8d,%r15d + xorl %r8d,%edi rorl $6,%r13d - andl %edx,%ecx - andl %r9d,%r15d + movl %r8d,%ecx + andl %edi,%r15d rorl $2,%r14d addl %r13d,%r12d - addl %r15d,%ecx + xorl %r15d,%ecx addl %r12d,%r10d addl %r12d,%ecx - leaq 1(%rdi),%rdi - addl %r14d,%ecx + leaq 4(%rbp),%rbp movl 60(%rsp),%r13d - movl 48(%rsp),%r14d - movl %r13d,%r12d - movl %r14d,%r15d + movl 48(%rsp),%r15d - rorl $11,%r12d - xorl %r13d,%r12d - shrl $3,%r13d + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%ecx + movl %r15d,%r14d + rorl $2,%r15d - rorl $7,%r12d xorl %r12d,%r13d - movl 28(%rsp),%r12d - - rorl $2,%r15d + shrl $3,%r12d + rorl $7,%r13d xorl %r14d,%r15d shrl $10,%r14d rorl $17,%r15d - addl %r13d,%r12d - xorl %r15d,%r14d + xorl %r13d,%r12d + xorl %r14d,%r15d + addl 28(%rsp),%r12d addl 56(%rsp),%r12d movl %r10d,%r13d - addl %r14d,%r12d + addl %r15d,%r12d movl %ecx,%r14d rorl $14,%r13d movl %r11d,%r15d - movl %r12d,56(%rsp) - rorl $9,%r14d xorl %r10d,%r13d + rorl $9,%r14d xorl %eax,%r15d - rorl $5,%r13d - addl %ebx,%r12d + movl %r12d,56(%rsp) xorl %ecx,%r14d - - addl (%rbp,%rdi,4),%r12d andl %r10d,%r15d - movl %edx,%ebx + + rorl $5,%r13d + addl %ebx,%r12d + xorl %eax,%r15d rorl $11,%r14d xorl %r10d,%r13d - xorl %eax,%r15d + addl %r15d,%r12d - xorl %r8d,%ebx + movl %ecx,%r15d + addl (%rbp),%r12d xorl %ecx,%r14d - addl %r15d,%r12d - movl %edx,%r15d + xorl %edx,%r15d rorl $6,%r13d - andl %ecx,%ebx - andl %r8d,%r15d + movl %edx,%ebx + andl %r15d,%edi rorl $2,%r14d addl %r13d,%r12d - addl %r15d,%ebx + xorl %edi,%ebx addl %r12d,%r9d addl %r12d,%ebx - leaq 1(%rdi),%rdi - addl %r14d,%ebx + leaq 4(%rbp),%rbp movl 0(%rsp),%r13d - movl 52(%rsp),%r14d - movl %r13d,%r12d - movl %r14d,%r15d + movl 52(%rsp),%edi - rorl $11,%r12d - xorl %r13d,%r12d - shrl $3,%r13d + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%ebx + movl %edi,%r14d + rorl $2,%edi - rorl $7,%r12d xorl %r12d,%r13d - movl 32(%rsp),%r12d - - rorl $2,%r15d - xorl %r14d,%r15d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%edi shrl $10,%r14d - rorl $17,%r15d - addl %r13d,%r12d - xorl %r15d,%r14d + rorl $17,%edi + xorl %r13d,%r12d + xorl %r14d,%edi + addl 32(%rsp),%r12d addl 60(%rsp),%r12d movl %r9d,%r13d - addl %r14d,%r12d + addl %edi,%r12d movl %ebx,%r14d rorl $14,%r13d - movl %r10d,%r15d - movl %r12d,60(%rsp) + movl %r10d,%edi - rorl $9,%r14d xorl %r9d,%r13d - xorl %r11d,%r15d + rorl $9,%r14d + xorl %r11d,%edi - rorl $5,%r13d - addl %eax,%r12d + movl %r12d,60(%rsp) xorl %ebx,%r14d + andl %r9d,%edi - addl (%rbp,%rdi,4),%r12d - andl %r9d,%r15d - movl %ecx,%eax + rorl $5,%r13d + addl %eax,%r12d + xorl %r11d,%edi rorl $11,%r14d xorl %r9d,%r13d - xorl %r11d,%r15d + addl %edi,%r12d - xorl %edx,%eax + movl %ebx,%edi + addl (%rbp),%r12d xorl %ebx,%r14d - addl %r15d,%r12d - movl %ecx,%r15d + xorl %ecx,%edi rorl $6,%r13d - andl %ebx,%eax - andl %edx,%r15d + movl %ecx,%eax + andl %edi,%r15d rorl $2,%r14d addl %r13d,%r12d - addl %r15d,%eax + xorl %r15d,%eax addl %r12d,%r8d addl %r12d,%eax - leaq 1(%rdi),%rdi - addl %r14d,%eax - cmpq $64,%rdi - jb L$rounds_16_xx + leaq 20(%rbp),%rbp + cmpb $0,3(%rbp) + jnz L$rounds_16_xx movq 64+0(%rsp),%rdi + addl %r14d,%eax leaq 64(%rsi),%rsi addl 0(%rdi),%eax @@ -1762,18 +1714,3644 @@ L$epilogue: K256: .long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 +.long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 .long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 +.long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 +.long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 .long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 .long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 +.long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 +.long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc .long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc .long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da +.long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da +.long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 .long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 .long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967 +.long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967 +.long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 .long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 .long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 +.long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 +.long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 .long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 .long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070 +.long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070 +.long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 .long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 .long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 +.long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 +.long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 .long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 .long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 +.long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 + +.long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f +.long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f +.long 0x03020100,0x0b0a0908,0xffffffff,0xffffffff +.long 0x03020100,0x0b0a0908,0xffffffff,0xffffffff +.long 0xffffffff,0xffffffff,0x03020100,0x0b0a0908 +.long 0xffffffff,0xffffffff,0x03020100,0x0b0a0908 +.byte 83,72,65,50,53,54,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 + +.p2align 6 +sha256_block_data_order_shaext: +_shaext_shortcut: + leaq K256+128(%rip),%rcx + movdqu (%rdi),%xmm1 + movdqu 16(%rdi),%xmm2 + movdqa 512-128(%rcx),%xmm7 + + pshufd $27,%xmm1,%xmm0 + pshufd $177,%xmm1,%xmm1 + pshufd $27,%xmm2,%xmm2 + movdqa %xmm7,%xmm8 +.byte 102,15,58,15,202,8 + punpcklqdq %xmm0,%xmm2 + jmp L$oop_shaext + +.p2align 4 +L$oop_shaext: + movdqu (%rsi),%xmm3 + movdqu 16(%rsi),%xmm4 + movdqu 32(%rsi),%xmm5 +.byte 102,15,56,0,223 + movdqu 48(%rsi),%xmm6 + + movdqa 0-128(%rcx),%xmm0 + paddd %xmm3,%xmm0 +.byte 102,15,56,0,231 + movdqa %xmm2,%xmm10 +.byte 15,56,203,209 + pshufd $14,%xmm0,%xmm0 + nop + movdqa %xmm1,%xmm9 +.byte 15,56,203,202 + + movdqa 32-128(%rcx),%xmm0 + paddd %xmm4,%xmm0 +.byte 102,15,56,0,239 +.byte 15,56,203,209 + pshufd $14,%xmm0,%xmm0 + leaq 64(%rsi),%rsi +.byte 15,56,204,220 +.byte 15,56,203,202 + + movdqa 64-128(%rcx),%xmm0 + paddd %xmm5,%xmm0 +.byte 102,15,56,0,247 +.byte 15,56,203,209 + pshufd $14,%xmm0,%xmm0 + movdqa %xmm6,%xmm7 +.byte 102,15,58,15,253,4 + nop + paddd %xmm7,%xmm3 +.byte 15,56,204,229 +.byte 15,56,203,202 + + movdqa 96-128(%rcx),%xmm0 + paddd %xmm6,%xmm0 +.byte 15,56,205,222 +.byte 15,56,203,209 + pshufd $14,%xmm0,%xmm0 + movdqa %xmm3,%xmm7 +.byte 102,15,58,15,254,4 + nop + paddd %xmm7,%xmm4 +.byte 15,56,204,238 +.byte 15,56,203,202 + movdqa 128-128(%rcx),%xmm0 + paddd %xmm3,%xmm0 +.byte 15,56,205,227 +.byte 15,56,203,209 + pshufd $14,%xmm0,%xmm0 + movdqa %xmm4,%xmm7 +.byte 102,15,58,15,251,4 + nop + paddd %xmm7,%xmm5 +.byte 15,56,204,243 +.byte 15,56,203,202 + movdqa 160-128(%rcx),%xmm0 + paddd %xmm4,%xmm0 +.byte 15,56,205,236 +.byte 15,56,203,209 + pshufd $14,%xmm0,%xmm0 + movdqa %xmm5,%xmm7 +.byte 102,15,58,15,252,4 + nop + paddd %xmm7,%xmm6 +.byte 15,56,204,220 +.byte 15,56,203,202 + movdqa 192-128(%rcx),%xmm0 + paddd %xmm5,%xmm0 +.byte 15,56,205,245 +.byte 15,56,203,209 + pshufd $14,%xmm0,%xmm0 + movdqa %xmm6,%xmm7 +.byte 102,15,58,15,253,4 + nop + paddd %xmm7,%xmm3 +.byte 15,56,204,229 +.byte 15,56,203,202 + movdqa 224-128(%rcx),%xmm0 + paddd %xmm6,%xmm0 +.byte 15,56,205,222 +.byte 15,56,203,209 + pshufd $14,%xmm0,%xmm0 + movdqa %xmm3,%xmm7 +.byte 102,15,58,15,254,4 + nop + paddd %xmm7,%xmm4 +.byte 15,56,204,238 +.byte 15,56,203,202 + movdqa 256-128(%rcx),%xmm0 + paddd %xmm3,%xmm0 +.byte 15,56,205,227 +.byte 15,56,203,209 + pshufd $14,%xmm0,%xmm0 + movdqa %xmm4,%xmm7 +.byte 102,15,58,15,251,4 + nop + paddd %xmm7,%xmm5 +.byte 15,56,204,243 +.byte 15,56,203,202 + movdqa 288-128(%rcx),%xmm0 + paddd %xmm4,%xmm0 +.byte 15,56,205,236 +.byte 15,56,203,209 + pshufd $14,%xmm0,%xmm0 + movdqa %xmm5,%xmm7 +.byte 102,15,58,15,252,4 + nop + paddd %xmm7,%xmm6 +.byte 15,56,204,220 +.byte 15,56,203,202 + movdqa 320-128(%rcx),%xmm0 + paddd %xmm5,%xmm0 +.byte 15,56,205,245 +.byte 15,56,203,209 + pshufd $14,%xmm0,%xmm0 + movdqa %xmm6,%xmm7 +.byte 102,15,58,15,253,4 + nop + paddd %xmm7,%xmm3 +.byte 15,56,204,229 +.byte 15,56,203,202 + movdqa 352-128(%rcx),%xmm0 + paddd %xmm6,%xmm0 +.byte 15,56,205,222 +.byte 15,56,203,209 + pshufd $14,%xmm0,%xmm0 + movdqa %xmm3,%xmm7 +.byte 102,15,58,15,254,4 + nop + paddd %xmm7,%xmm4 +.byte 15,56,204,238 +.byte 15,56,203,202 + movdqa 384-128(%rcx),%xmm0 + paddd %xmm3,%xmm0 +.byte 15,56,205,227 +.byte 15,56,203,209 + pshufd $14,%xmm0,%xmm0 + movdqa %xmm4,%xmm7 +.byte 102,15,58,15,251,4 + nop + paddd %xmm7,%xmm5 +.byte 15,56,204,243 +.byte 15,56,203,202 + movdqa 416-128(%rcx),%xmm0 + paddd %xmm4,%xmm0 +.byte 15,56,205,236 +.byte 15,56,203,209 + pshufd $14,%xmm0,%xmm0 + movdqa %xmm5,%xmm7 +.byte 102,15,58,15,252,4 +.byte 15,56,203,202 + paddd %xmm7,%xmm6 + + movdqa 448-128(%rcx),%xmm0 + paddd %xmm5,%xmm0 +.byte 15,56,203,209 + pshufd $14,%xmm0,%xmm0 +.byte 15,56,205,245 + movdqa %xmm8,%xmm7 +.byte 15,56,203,202 + + movdqa 480-128(%rcx),%xmm0 + paddd %xmm6,%xmm0 + nop +.byte 15,56,203,209 + pshufd $14,%xmm0,%xmm0 + decq %rdx + nop +.byte 15,56,203,202 + + paddd %xmm10,%xmm2 + paddd %xmm9,%xmm1 + jnz L$oop_shaext + + pshufd $177,%xmm2,%xmm2 + pshufd $27,%xmm1,%xmm7 + pshufd $177,%xmm1,%xmm1 + punpckhqdq %xmm2,%xmm1 +.byte 102,15,58,15,215,8 + + movdqu %xmm1,(%rdi) + movdqu %xmm2,16(%rdi) + .byte 0xf3,0xc3 + + +.p2align 6 +sha256_block_data_order_ssse3: +L$ssse3_shortcut: + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + movq %rsp,%r11 + shlq $4,%rdx + subq $96,%rsp + leaq (%rsi,%rdx,4),%rdx + andq $-64,%rsp + movq %rdi,64+0(%rsp) + movq %rsi,64+8(%rsp) + movq %rdx,64+16(%rsp) + movq %r11,64+24(%rsp) +L$prologue_ssse3: + + movl 0(%rdi),%eax + movl 4(%rdi),%ebx + movl 8(%rdi),%ecx + movl 12(%rdi),%edx + movl 16(%rdi),%r8d + movl 20(%rdi),%r9d + movl 24(%rdi),%r10d + movl 28(%rdi),%r11d + + + jmp L$loop_ssse3 +.p2align 4 +L$loop_ssse3: + movdqa K256+512(%rip),%xmm7 + movdqu 0(%rsi),%xmm0 + movdqu 16(%rsi),%xmm1 + movdqu 32(%rsi),%xmm2 +.byte 102,15,56,0,199 + movdqu 48(%rsi),%xmm3 + leaq K256(%rip),%rbp +.byte 102,15,56,0,207 + movdqa 0(%rbp),%xmm4 + movdqa 32(%rbp),%xmm5 +.byte 102,15,56,0,215 + paddd %xmm0,%xmm4 + movdqa 64(%rbp),%xmm6 +.byte 102,15,56,0,223 + movdqa 96(%rbp),%xmm7 + paddd %xmm1,%xmm5 + paddd %xmm2,%xmm6 + paddd %xmm3,%xmm7 + movdqa %xmm4,0(%rsp) + movl %eax,%r14d + movdqa %xmm5,16(%rsp) + movl %ebx,%edi + movdqa %xmm6,32(%rsp) + xorl %ecx,%edi + movdqa %xmm7,48(%rsp) + movl %r8d,%r13d + jmp L$ssse3_00_47 + +.p2align 4 +L$ssse3_00_47: + subq $-128,%rbp + rorl $14,%r13d + movdqa %xmm1,%xmm4 + movl %r14d,%eax + movl %r9d,%r12d + movdqa %xmm3,%xmm7 + rorl $9,%r14d + xorl %r8d,%r13d + xorl %r10d,%r12d + rorl $5,%r13d + xorl %eax,%r14d +.byte 102,15,58,15,224,4 + andl %r8d,%r12d + xorl %r8d,%r13d +.byte 102,15,58,15,250,4 + addl 0(%rsp),%r11d + movl %eax,%r15d + xorl %r10d,%r12d + rorl $11,%r14d + movdqa %xmm4,%xmm5 + xorl %ebx,%r15d + addl %r12d,%r11d + movdqa %xmm4,%xmm6 + rorl $6,%r13d + andl %r15d,%edi + psrld $3,%xmm4 + xorl %eax,%r14d + addl %r13d,%r11d + xorl %ebx,%edi + paddd %xmm7,%xmm0 + rorl $2,%r14d + addl %r11d,%edx + psrld $7,%xmm6 + addl %edi,%r11d + movl %edx,%r13d + pshufd $250,%xmm3,%xmm7 + addl %r11d,%r14d + rorl $14,%r13d + pslld $14,%xmm5 + movl %r14d,%r11d + movl %r8d,%r12d + pxor %xmm6,%xmm4 + rorl $9,%r14d + xorl %edx,%r13d + xorl %r9d,%r12d + rorl $5,%r13d + psrld $11,%xmm6 + xorl %r11d,%r14d + pxor %xmm5,%xmm4 + andl %edx,%r12d + xorl %edx,%r13d + pslld $11,%xmm5 + addl 4(%rsp),%r10d + movl %r11d,%edi + pxor %xmm6,%xmm4 + xorl %r9d,%r12d + rorl $11,%r14d + movdqa %xmm7,%xmm6 + xorl %eax,%edi + addl %r12d,%r10d + pxor %xmm5,%xmm4 + rorl $6,%r13d + andl %edi,%r15d + xorl %r11d,%r14d + psrld $10,%xmm7 + addl %r13d,%r10d + xorl %eax,%r15d + paddd %xmm4,%xmm0 + rorl $2,%r14d + addl %r10d,%ecx + psrlq $17,%xmm6 + addl %r15d,%r10d + movl %ecx,%r13d + addl %r10d,%r14d + pxor %xmm6,%xmm7 + rorl $14,%r13d + movl %r14d,%r10d + movl %edx,%r12d + rorl $9,%r14d + psrlq $2,%xmm6 + xorl %ecx,%r13d + xorl %r8d,%r12d + pxor %xmm6,%xmm7 + rorl $5,%r13d + xorl %r10d,%r14d + andl %ecx,%r12d + pshufd $128,%xmm7,%xmm7 + xorl %ecx,%r13d + addl 8(%rsp),%r9d + movl %r10d,%r15d + psrldq $8,%xmm7 + xorl %r8d,%r12d + rorl $11,%r14d + xorl %r11d,%r15d + addl %r12d,%r9d + rorl $6,%r13d + paddd %xmm7,%xmm0 + andl %r15d,%edi + xorl %r10d,%r14d + addl %r13d,%r9d + pshufd $80,%xmm0,%xmm7 + xorl %r11d,%edi + rorl $2,%r14d + addl %r9d,%ebx + movdqa %xmm7,%xmm6 + addl %edi,%r9d + movl %ebx,%r13d + psrld $10,%xmm7 + addl %r9d,%r14d + rorl $14,%r13d + psrlq $17,%xmm6 + movl %r14d,%r9d + movl %ecx,%r12d + pxor %xmm6,%xmm7 + rorl $9,%r14d + xorl %ebx,%r13d + xorl %edx,%r12d + rorl $5,%r13d + xorl %r9d,%r14d + psrlq $2,%xmm6 + andl %ebx,%r12d + xorl %ebx,%r13d + addl 12(%rsp),%r8d + pxor %xmm6,%xmm7 + movl %r9d,%edi + xorl %edx,%r12d + rorl $11,%r14d + pshufd $8,%xmm7,%xmm7 + xorl %r10d,%edi + addl %r12d,%r8d + movdqa 0(%rbp),%xmm6 + rorl $6,%r13d + andl %edi,%r15d + pslldq $8,%xmm7 + xorl %r9d,%r14d + addl %r13d,%r8d + xorl %r10d,%r15d + paddd %xmm7,%xmm0 + rorl $2,%r14d + addl %r8d,%eax + addl %r15d,%r8d + paddd %xmm0,%xmm6 + movl %eax,%r13d + addl %r8d,%r14d + movdqa %xmm6,0(%rsp) + rorl $14,%r13d + movdqa %xmm2,%xmm4 + movl %r14d,%r8d + movl %ebx,%r12d + movdqa %xmm0,%xmm7 + rorl $9,%r14d + xorl %eax,%r13d + xorl %ecx,%r12d + rorl $5,%r13d + xorl %r8d,%r14d +.byte 102,15,58,15,225,4 + andl %eax,%r12d + xorl %eax,%r13d +.byte 102,15,58,15,251,4 + addl 16(%rsp),%edx + movl %r8d,%r15d + xorl %ecx,%r12d + rorl $11,%r14d + movdqa %xmm4,%xmm5 + xorl %r9d,%r15d + addl %r12d,%edx + movdqa %xmm4,%xmm6 + rorl $6,%r13d + andl %r15d,%edi + psrld $3,%xmm4 + xorl %r8d,%r14d + addl %r13d,%edx + xorl %r9d,%edi + paddd %xmm7,%xmm1 + rorl $2,%r14d + addl %edx,%r11d + psrld $7,%xmm6 + addl %edi,%edx + movl %r11d,%r13d + pshufd $250,%xmm0,%xmm7 + addl %edx,%r14d + rorl $14,%r13d + pslld $14,%xmm5 + movl %r14d,%edx + movl %eax,%r12d + pxor %xmm6,%xmm4 + rorl $9,%r14d + xorl %r11d,%r13d + xorl %ebx,%r12d + rorl $5,%r13d + psrld $11,%xmm6 + xorl %edx,%r14d + pxor %xmm5,%xmm4 + andl %r11d,%r12d + xorl %r11d,%r13d + pslld $11,%xmm5 + addl 20(%rsp),%ecx + movl %edx,%edi + pxor %xmm6,%xmm4 + xorl %ebx,%r12d + rorl $11,%r14d + movdqa %xmm7,%xmm6 + xorl %r8d,%edi + addl %r12d,%ecx + pxor %xmm5,%xmm4 + rorl $6,%r13d + andl %edi,%r15d + xorl %edx,%r14d + psrld $10,%xmm7 + addl %r13d,%ecx + xorl %r8d,%r15d + paddd %xmm4,%xmm1 + rorl $2,%r14d + addl %ecx,%r10d + psrlq $17,%xmm6 + addl %r15d,%ecx + movl %r10d,%r13d + addl %ecx,%r14d + pxor %xmm6,%xmm7 + rorl $14,%r13d + movl %r14d,%ecx + movl %r11d,%r12d + rorl $9,%r14d + psrlq $2,%xmm6 + xorl %r10d,%r13d + xorl %eax,%r12d + pxor %xmm6,%xmm7 + rorl $5,%r13d + xorl %ecx,%r14d + andl %r10d,%r12d + pshufd $128,%xmm7,%xmm7 + xorl %r10d,%r13d + addl 24(%rsp),%ebx + movl %ecx,%r15d + psrldq $8,%xmm7 + xorl %eax,%r12d + rorl $11,%r14d + xorl %edx,%r15d + addl %r12d,%ebx + rorl $6,%r13d + paddd %xmm7,%xmm1 + andl %r15d,%edi + xorl %ecx,%r14d + addl %r13d,%ebx + pshufd $80,%xmm1,%xmm7 + xorl %edx,%edi + rorl $2,%r14d + addl %ebx,%r9d + movdqa %xmm7,%xmm6 + addl %edi,%ebx + movl %r9d,%r13d + psrld $10,%xmm7 + addl %ebx,%r14d + rorl $14,%r13d + psrlq $17,%xmm6 + movl %r14d,%ebx + movl %r10d,%r12d + pxor %xmm6,%xmm7 + rorl $9,%r14d + xorl %r9d,%r13d + xorl %r11d,%r12d + rorl $5,%r13d + xorl %ebx,%r14d + psrlq $2,%xmm6 + andl %r9d,%r12d + xorl %r9d,%r13d + addl 28(%rsp),%eax + pxor %xmm6,%xmm7 + movl %ebx,%edi + xorl %r11d,%r12d + rorl $11,%r14d + pshufd $8,%xmm7,%xmm7 + xorl %ecx,%edi + addl %r12d,%eax + movdqa 32(%rbp),%xmm6 + rorl $6,%r13d + andl %edi,%r15d + pslldq $8,%xmm7 + xorl %ebx,%r14d + addl %r13d,%eax + xorl %ecx,%r15d + paddd %xmm7,%xmm1 + rorl $2,%r14d + addl %eax,%r8d + addl %r15d,%eax + paddd %xmm1,%xmm6 + movl %r8d,%r13d + addl %eax,%r14d + movdqa %xmm6,16(%rsp) + rorl $14,%r13d + movdqa %xmm3,%xmm4 + movl %r14d,%eax + movl %r9d,%r12d + movdqa %xmm1,%xmm7 + rorl $9,%r14d + xorl %r8d,%r13d + xorl %r10d,%r12d + rorl $5,%r13d + xorl %eax,%r14d +.byte 102,15,58,15,226,4 + andl %r8d,%r12d + xorl %r8d,%r13d +.byte 102,15,58,15,248,4 + addl 32(%rsp),%r11d + movl %eax,%r15d + xorl %r10d,%r12d + rorl $11,%r14d + movdqa %xmm4,%xmm5 + xorl %ebx,%r15d + addl %r12d,%r11d + movdqa %xmm4,%xmm6 + rorl $6,%r13d + andl %r15d,%edi + psrld $3,%xmm4 + xorl %eax,%r14d + addl %r13d,%r11d + xorl %ebx,%edi + paddd %xmm7,%xmm2 + rorl $2,%r14d + addl %r11d,%edx + psrld $7,%xmm6 + addl %edi,%r11d + movl %edx,%r13d + pshufd $250,%xmm1,%xmm7 + addl %r11d,%r14d + rorl $14,%r13d + pslld $14,%xmm5 + movl %r14d,%r11d + movl %r8d,%r12d + pxor %xmm6,%xmm4 + rorl $9,%r14d + xorl %edx,%r13d + xorl %r9d,%r12d + rorl $5,%r13d + psrld $11,%xmm6 + xorl %r11d,%r14d + pxor %xmm5,%xmm4 + andl %edx,%r12d + xorl %edx,%r13d + pslld $11,%xmm5 + addl 36(%rsp),%r10d + movl %r11d,%edi + pxor %xmm6,%xmm4 + xorl %r9d,%r12d + rorl $11,%r14d + movdqa %xmm7,%xmm6 + xorl %eax,%edi + addl %r12d,%r10d + pxor %xmm5,%xmm4 + rorl $6,%r13d + andl %edi,%r15d + xorl %r11d,%r14d + psrld $10,%xmm7 + addl %r13d,%r10d + xorl %eax,%r15d + paddd %xmm4,%xmm2 + rorl $2,%r14d + addl %r10d,%ecx + psrlq $17,%xmm6 + addl %r15d,%r10d + movl %ecx,%r13d + addl %r10d,%r14d + pxor %xmm6,%xmm7 + rorl $14,%r13d + movl %r14d,%r10d + movl %edx,%r12d + rorl $9,%r14d + psrlq $2,%xmm6 + xorl %ecx,%r13d + xorl %r8d,%r12d + pxor %xmm6,%xmm7 + rorl $5,%r13d + xorl %r10d,%r14d + andl %ecx,%r12d + pshufd $128,%xmm7,%xmm7 + xorl %ecx,%r13d + addl 40(%rsp),%r9d + movl %r10d,%r15d + psrldq $8,%xmm7 + xorl %r8d,%r12d + rorl $11,%r14d + xorl %r11d,%r15d + addl %r12d,%r9d + rorl $6,%r13d + paddd %xmm7,%xmm2 + andl %r15d,%edi + xorl %r10d,%r14d + addl %r13d,%r9d + pshufd $80,%xmm2,%xmm7 + xorl %r11d,%edi + rorl $2,%r14d + addl %r9d,%ebx + movdqa %xmm7,%xmm6 + addl %edi,%r9d + movl %ebx,%r13d + psrld $10,%xmm7 + addl %r9d,%r14d + rorl $14,%r13d + psrlq $17,%xmm6 + movl %r14d,%r9d + movl %ecx,%r12d + pxor %xmm6,%xmm7 + rorl $9,%r14d + xorl %ebx,%r13d + xorl %edx,%r12d + rorl $5,%r13d + xorl %r9d,%r14d + psrlq $2,%xmm6 + andl %ebx,%r12d + xorl %ebx,%r13d + addl 44(%rsp),%r8d + pxor %xmm6,%xmm7 + movl %r9d,%edi + xorl %edx,%r12d + rorl $11,%r14d + pshufd $8,%xmm7,%xmm7 + xorl %r10d,%edi + addl %r12d,%r8d + movdqa 64(%rbp),%xmm6 + rorl $6,%r13d + andl %edi,%r15d + pslldq $8,%xmm7 + xorl %r9d,%r14d + addl %r13d,%r8d + xorl %r10d,%r15d + paddd %xmm7,%xmm2 + rorl $2,%r14d + addl %r8d,%eax + addl %r15d,%r8d + paddd %xmm2,%xmm6 + movl %eax,%r13d + addl %r8d,%r14d + movdqa %xmm6,32(%rsp) + rorl $14,%r13d + movdqa %xmm0,%xmm4 + movl %r14d,%r8d + movl %ebx,%r12d + movdqa %xmm2,%xmm7 + rorl $9,%r14d + xorl %eax,%r13d + xorl %ecx,%r12d + rorl $5,%r13d + xorl %r8d,%r14d +.byte 102,15,58,15,227,4 + andl %eax,%r12d + xorl %eax,%r13d +.byte 102,15,58,15,249,4 + addl 48(%rsp),%edx + movl %r8d,%r15d + xorl %ecx,%r12d + rorl $11,%r14d + movdqa %xmm4,%xmm5 + xorl %r9d,%r15d + addl %r12d,%edx + movdqa %xmm4,%xmm6 + rorl $6,%r13d + andl %r15d,%edi + psrld $3,%xmm4 + xorl %r8d,%r14d + addl %r13d,%edx + xorl %r9d,%edi + paddd %xmm7,%xmm3 + rorl $2,%r14d + addl %edx,%r11d + psrld $7,%xmm6 + addl %edi,%edx + movl %r11d,%r13d + pshufd $250,%xmm2,%xmm7 + addl %edx,%r14d + rorl $14,%r13d + pslld $14,%xmm5 + movl %r14d,%edx + movl %eax,%r12d + pxor %xmm6,%xmm4 + rorl $9,%r14d + xorl %r11d,%r13d + xorl %ebx,%r12d + rorl $5,%r13d + psrld $11,%xmm6 + xorl %edx,%r14d + pxor %xmm5,%xmm4 + andl %r11d,%r12d + xorl %r11d,%r13d + pslld $11,%xmm5 + addl 52(%rsp),%ecx + movl %edx,%edi + pxor %xmm6,%xmm4 + xorl %ebx,%r12d + rorl $11,%r14d + movdqa %xmm7,%xmm6 + xorl %r8d,%edi + addl %r12d,%ecx + pxor %xmm5,%xmm4 + rorl $6,%r13d + andl %edi,%r15d + xorl %edx,%r14d + psrld $10,%xmm7 + addl %r13d,%ecx + xorl %r8d,%r15d + paddd %xmm4,%xmm3 + rorl $2,%r14d + addl %ecx,%r10d + psrlq $17,%xmm6 + addl %r15d,%ecx + movl %r10d,%r13d + addl %ecx,%r14d + pxor %xmm6,%xmm7 + rorl $14,%r13d + movl %r14d,%ecx + movl %r11d,%r12d + rorl $9,%r14d + psrlq $2,%xmm6 + xorl %r10d,%r13d + xorl %eax,%r12d + pxor %xmm6,%xmm7 + rorl $5,%r13d + xorl %ecx,%r14d + andl %r10d,%r12d + pshufd $128,%xmm7,%xmm7 + xorl %r10d,%r13d + addl 56(%rsp),%ebx + movl %ecx,%r15d + psrldq $8,%xmm7 + xorl %eax,%r12d + rorl $11,%r14d + xorl %edx,%r15d + addl %r12d,%ebx + rorl $6,%r13d + paddd %xmm7,%xmm3 + andl %r15d,%edi + xorl %ecx,%r14d + addl %r13d,%ebx + pshufd $80,%xmm3,%xmm7 + xorl %edx,%edi + rorl $2,%r14d + addl %ebx,%r9d + movdqa %xmm7,%xmm6 + addl %edi,%ebx + movl %r9d,%r13d + psrld $10,%xmm7 + addl %ebx,%r14d + rorl $14,%r13d + psrlq $17,%xmm6 + movl %r14d,%ebx + movl %r10d,%r12d + pxor %xmm6,%xmm7 + rorl $9,%r14d + xorl %r9d,%r13d + xorl %r11d,%r12d + rorl $5,%r13d + xorl %ebx,%r14d + psrlq $2,%xmm6 + andl %r9d,%r12d + xorl %r9d,%r13d + addl 60(%rsp),%eax + pxor %xmm6,%xmm7 + movl %ebx,%edi + xorl %r11d,%r12d + rorl $11,%r14d + pshufd $8,%xmm7,%xmm7 + xorl %ecx,%edi + addl %r12d,%eax + movdqa 96(%rbp),%xmm6 + rorl $6,%r13d + andl %edi,%r15d + pslldq $8,%xmm7 + xorl %ebx,%r14d + addl %r13d,%eax + xorl %ecx,%r15d + paddd %xmm7,%xmm3 + rorl $2,%r14d + addl %eax,%r8d + addl %r15d,%eax + paddd %xmm3,%xmm6 + movl %r8d,%r13d + addl %eax,%r14d + movdqa %xmm6,48(%rsp) + cmpb $0,131(%rbp) + jne L$ssse3_00_47 + rorl $14,%r13d + movl %r14d,%eax + movl %r9d,%r12d + rorl $9,%r14d + xorl %r8d,%r13d + xorl %r10d,%r12d + rorl $5,%r13d + xorl %eax,%r14d + andl %r8d,%r12d + xorl %r8d,%r13d + addl 0(%rsp),%r11d + movl %eax,%r15d + xorl %r10d,%r12d + rorl $11,%r14d + xorl %ebx,%r15d + addl %r12d,%r11d + rorl $6,%r13d + andl %r15d,%edi + xorl %eax,%r14d + addl %r13d,%r11d + xorl %ebx,%edi + rorl $2,%r14d + addl %r11d,%edx + addl %edi,%r11d + movl %edx,%r13d + addl %r11d,%r14d + rorl $14,%r13d + movl %r14d,%r11d + movl %r8d,%r12d + rorl $9,%r14d + xorl %edx,%r13d + xorl %r9d,%r12d + rorl $5,%r13d + xorl %r11d,%r14d + andl %edx,%r12d + xorl %edx,%r13d + addl 4(%rsp),%r10d + movl %r11d,%edi + xorl %r9d,%r12d + rorl $11,%r14d + xorl %eax,%edi + addl %r12d,%r10d + rorl $6,%r13d + andl %edi,%r15d + xorl %r11d,%r14d + addl %r13d,%r10d + xorl %eax,%r15d + rorl $2,%r14d + addl %r10d,%ecx + addl %r15d,%r10d + movl %ecx,%r13d + addl %r10d,%r14d + rorl $14,%r13d + movl %r14d,%r10d + movl %edx,%r12d + rorl $9,%r14d + xorl %ecx,%r13d + xorl %r8d,%r12d + rorl $5,%r13d + xorl %r10d,%r14d + andl %ecx,%r12d + xorl %ecx,%r13d + addl 8(%rsp),%r9d + movl %r10d,%r15d + xorl %r8d,%r12d + rorl $11,%r14d + xorl %r11d,%r15d + addl %r12d,%r9d + rorl $6,%r13d + andl %r15d,%edi + xorl %r10d,%r14d + addl %r13d,%r9d + xorl %r11d,%edi + rorl $2,%r14d + addl %r9d,%ebx + addl %edi,%r9d + movl %ebx,%r13d + addl %r9d,%r14d + rorl $14,%r13d + movl %r14d,%r9d + movl %ecx,%r12d + rorl $9,%r14d + xorl %ebx,%r13d + xorl %edx,%r12d + rorl $5,%r13d + xorl %r9d,%r14d + andl %ebx,%r12d + xorl %ebx,%r13d + addl 12(%rsp),%r8d + movl %r9d,%edi + xorl %edx,%r12d + rorl $11,%r14d + xorl %r10d,%edi + addl %r12d,%r8d + rorl $6,%r13d + andl %edi,%r15d + xorl %r9d,%r14d + addl %r13d,%r8d + xorl %r10d,%r15d + rorl $2,%r14d + addl %r8d,%eax + addl %r15d,%r8d + movl %eax,%r13d + addl %r8d,%r14d + rorl $14,%r13d + movl %r14d,%r8d + movl %ebx,%r12d + rorl $9,%r14d + xorl %eax,%r13d + xorl %ecx,%r12d + rorl $5,%r13d + xorl %r8d,%r14d + andl %eax,%r12d + xorl %eax,%r13d + addl 16(%rsp),%edx + movl %r8d,%r15d + xorl %ecx,%r12d + rorl $11,%r14d + xorl %r9d,%r15d + addl %r12d,%edx + rorl $6,%r13d + andl %r15d,%edi + xorl %r8d,%r14d + addl %r13d,%edx + xorl %r9d,%edi + rorl $2,%r14d + addl %edx,%r11d + addl %edi,%edx + movl %r11d,%r13d + addl %edx,%r14d + rorl $14,%r13d + movl %r14d,%edx + movl %eax,%r12d + rorl $9,%r14d + xorl %r11d,%r13d + xorl %ebx,%r12d + rorl $5,%r13d + xorl %edx,%r14d + andl %r11d,%r12d + xorl %r11d,%r13d + addl 20(%rsp),%ecx + movl %edx,%edi + xorl %ebx,%r12d + rorl $11,%r14d + xorl %r8d,%edi + addl %r12d,%ecx + rorl $6,%r13d + andl %edi,%r15d + xorl %edx,%r14d + addl %r13d,%ecx + xorl %r8d,%r15d + rorl $2,%r14d + addl %ecx,%r10d + addl %r15d,%ecx + movl %r10d,%r13d + addl %ecx,%r14d + rorl $14,%r13d + movl %r14d,%ecx + movl %r11d,%r12d + rorl $9,%r14d + xorl %r10d,%r13d + xorl %eax,%r12d + rorl $5,%r13d + xorl %ecx,%r14d + andl %r10d,%r12d + xorl %r10d,%r13d + addl 24(%rsp),%ebx + movl %ecx,%r15d + xorl %eax,%r12d + rorl $11,%r14d + xorl %edx,%r15d + addl %r12d,%ebx + rorl $6,%r13d + andl %r15d,%edi + xorl %ecx,%r14d + addl %r13d,%ebx + xorl %edx,%edi + rorl $2,%r14d + addl %ebx,%r9d + addl %edi,%ebx + movl %r9d,%r13d + addl %ebx,%r14d + rorl $14,%r13d + movl %r14d,%ebx + movl %r10d,%r12d + rorl $9,%r14d + xorl %r9d,%r13d + xorl %r11d,%r12d + rorl $5,%r13d + xorl %ebx,%r14d + andl %r9d,%r12d + xorl %r9d,%r13d + addl 28(%rsp),%eax + movl %ebx,%edi + xorl %r11d,%r12d + rorl $11,%r14d + xorl %ecx,%edi + addl %r12d,%eax + rorl $6,%r13d + andl %edi,%r15d + xorl %ebx,%r14d + addl %r13d,%eax + xorl %ecx,%r15d + rorl $2,%r14d + addl %eax,%r8d + addl %r15d,%eax + movl %r8d,%r13d + addl %eax,%r14d + rorl $14,%r13d + movl %r14d,%eax + movl %r9d,%r12d + rorl $9,%r14d + xorl %r8d,%r13d + xorl %r10d,%r12d + rorl $5,%r13d + xorl %eax,%r14d + andl %r8d,%r12d + xorl %r8d,%r13d + addl 32(%rsp),%r11d + movl %eax,%r15d + xorl %r10d,%r12d + rorl $11,%r14d + xorl %ebx,%r15d + addl %r12d,%r11d + rorl $6,%r13d + andl %r15d,%edi + xorl %eax,%r14d + addl %r13d,%r11d + xorl %ebx,%edi + rorl $2,%r14d + addl %r11d,%edx + addl %edi,%r11d + movl %edx,%r13d + addl %r11d,%r14d + rorl $14,%r13d + movl %r14d,%r11d + movl %r8d,%r12d + rorl $9,%r14d + xorl %edx,%r13d + xorl %r9d,%r12d + rorl $5,%r13d + xorl %r11d,%r14d + andl %edx,%r12d + xorl %edx,%r13d + addl 36(%rsp),%r10d + movl %r11d,%edi + xorl %r9d,%r12d + rorl $11,%r14d + xorl %eax,%edi + addl %r12d,%r10d + rorl $6,%r13d + andl %edi,%r15d + xorl %r11d,%r14d + addl %r13d,%r10d + xorl %eax,%r15d + rorl $2,%r14d + addl %r10d,%ecx + addl %r15d,%r10d + movl %ecx,%r13d + addl %r10d,%r14d + rorl $14,%r13d + movl %r14d,%r10d + movl %edx,%r12d + rorl $9,%r14d + xorl %ecx,%r13d + xorl %r8d,%r12d + rorl $5,%r13d + xorl %r10d,%r14d + andl %ecx,%r12d + xorl %ecx,%r13d + addl 40(%rsp),%r9d + movl %r10d,%r15d + xorl %r8d,%r12d + rorl $11,%r14d + xorl %r11d,%r15d + addl %r12d,%r9d + rorl $6,%r13d + andl %r15d,%edi + xorl %r10d,%r14d + addl %r13d,%r9d + xorl %r11d,%edi + rorl $2,%r14d + addl %r9d,%ebx + addl %edi,%r9d + movl %ebx,%r13d + addl %r9d,%r14d + rorl $14,%r13d + movl %r14d,%r9d + movl %ecx,%r12d + rorl $9,%r14d + xorl %ebx,%r13d + xorl %edx,%r12d + rorl $5,%r13d + xorl %r9d,%r14d + andl %ebx,%r12d + xorl %ebx,%r13d + addl 44(%rsp),%r8d + movl %r9d,%edi + xorl %edx,%r12d + rorl $11,%r14d + xorl %r10d,%edi + addl %r12d,%r8d + rorl $6,%r13d + andl %edi,%r15d + xorl %r9d,%r14d + addl %r13d,%r8d + xorl %r10d,%r15d + rorl $2,%r14d + addl %r8d,%eax + addl %r15d,%r8d + movl %eax,%r13d + addl %r8d,%r14d + rorl $14,%r13d + movl %r14d,%r8d + movl %ebx,%r12d + rorl $9,%r14d + xorl %eax,%r13d + xorl %ecx,%r12d + rorl $5,%r13d + xorl %r8d,%r14d + andl %eax,%r12d + xorl %eax,%r13d + addl 48(%rsp),%edx + movl %r8d,%r15d + xorl %ecx,%r12d + rorl $11,%r14d + xorl %r9d,%r15d + addl %r12d,%edx + rorl $6,%r13d + andl %r15d,%edi + xorl %r8d,%r14d + addl %r13d,%edx + xorl %r9d,%edi + rorl $2,%r14d + addl %edx,%r11d + addl %edi,%edx + movl %r11d,%r13d + addl %edx,%r14d + rorl $14,%r13d + movl %r14d,%edx + movl %eax,%r12d + rorl $9,%r14d + xorl %r11d,%r13d + xorl %ebx,%r12d + rorl $5,%r13d + xorl %edx,%r14d + andl %r11d,%r12d + xorl %r11d,%r13d + addl 52(%rsp),%ecx + movl %edx,%edi + xorl %ebx,%r12d + rorl $11,%r14d + xorl %r8d,%edi + addl %r12d,%ecx + rorl $6,%r13d + andl %edi,%r15d + xorl %edx,%r14d + addl %r13d,%ecx + xorl %r8d,%r15d + rorl $2,%r14d + addl %ecx,%r10d + addl %r15d,%ecx + movl %r10d,%r13d + addl %ecx,%r14d + rorl $14,%r13d + movl %r14d,%ecx + movl %r11d,%r12d + rorl $9,%r14d + xorl %r10d,%r13d + xorl %eax,%r12d + rorl $5,%r13d + xorl %ecx,%r14d + andl %r10d,%r12d + xorl %r10d,%r13d + addl 56(%rsp),%ebx + movl %ecx,%r15d + xorl %eax,%r12d + rorl $11,%r14d + xorl %edx,%r15d + addl %r12d,%ebx + rorl $6,%r13d + andl %r15d,%edi + xorl %ecx,%r14d + addl %r13d,%ebx + xorl %edx,%edi + rorl $2,%r14d + addl %ebx,%r9d + addl %edi,%ebx + movl %r9d,%r13d + addl %ebx,%r14d + rorl $14,%r13d + movl %r14d,%ebx + movl %r10d,%r12d + rorl $9,%r14d + xorl %r9d,%r13d + xorl %r11d,%r12d + rorl $5,%r13d + xorl %ebx,%r14d + andl %r9d,%r12d + xorl %r9d,%r13d + addl 60(%rsp),%eax + movl %ebx,%edi + xorl %r11d,%r12d + rorl $11,%r14d + xorl %ecx,%edi + addl %r12d,%eax + rorl $6,%r13d + andl %edi,%r15d + xorl %ebx,%r14d + addl %r13d,%eax + xorl %ecx,%r15d + rorl $2,%r14d + addl %eax,%r8d + addl %r15d,%eax + movl %r8d,%r13d + addl %eax,%r14d + movq 64+0(%rsp),%rdi + movl %r14d,%eax + + addl 0(%rdi),%eax + leaq 64(%rsi),%rsi + addl 4(%rdi),%ebx + addl 8(%rdi),%ecx + addl 12(%rdi),%edx + addl 16(%rdi),%r8d + addl 20(%rdi),%r9d + addl 24(%rdi),%r10d + addl 28(%rdi),%r11d + + cmpq 64+16(%rsp),%rsi + + movl %eax,0(%rdi) + movl %ebx,4(%rdi) + movl %ecx,8(%rdi) + movl %edx,12(%rdi) + movl %r8d,16(%rdi) + movl %r9d,20(%rdi) + movl %r10d,24(%rdi) + movl %r11d,28(%rdi) + jb L$loop_ssse3 + + movq 64+24(%rsp),%rsi + movq (%rsi),%r15 + movq 8(%rsi),%r14 + movq 16(%rsi),%r13 + movq 24(%rsi),%r12 + movq 32(%rsi),%rbp + movq 40(%rsi),%rbx + leaq 48(%rsi),%rsp +L$epilogue_ssse3: + .byte 0xf3,0xc3 + + +.p2align 6 +sha256_block_data_order_avx: +L$avx_shortcut: + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + movq %rsp,%r11 + shlq $4,%rdx + subq $96,%rsp + leaq (%rsi,%rdx,4),%rdx + andq $-64,%rsp + movq %rdi,64+0(%rsp) + movq %rsi,64+8(%rsp) + movq %rdx,64+16(%rsp) + movq %r11,64+24(%rsp) +L$prologue_avx: + + vzeroupper + movl 0(%rdi),%eax + movl 4(%rdi),%ebx + movl 8(%rdi),%ecx + movl 12(%rdi),%edx + movl 16(%rdi),%r8d + movl 20(%rdi),%r9d + movl 24(%rdi),%r10d + movl 28(%rdi),%r11d + vmovdqa K256+512+32(%rip),%xmm8 + vmovdqa K256+512+64(%rip),%xmm9 + jmp L$loop_avx +.p2align 4 +L$loop_avx: + vmovdqa K256+512(%rip),%xmm7 + vmovdqu 0(%rsi),%xmm0 + vmovdqu 16(%rsi),%xmm1 + vmovdqu 32(%rsi),%xmm2 + vmovdqu 48(%rsi),%xmm3 + vpshufb %xmm7,%xmm0,%xmm0 + leaq K256(%rip),%rbp + vpshufb %xmm7,%xmm1,%xmm1 + vpshufb %xmm7,%xmm2,%xmm2 + vpaddd 0(%rbp),%xmm0,%xmm4 + vpshufb %xmm7,%xmm3,%xmm3 + vpaddd 32(%rbp),%xmm1,%xmm5 + vpaddd 64(%rbp),%xmm2,%xmm6 + vpaddd 96(%rbp),%xmm3,%xmm7 + vmovdqa %xmm4,0(%rsp) + movl %eax,%r14d + vmovdqa %xmm5,16(%rsp) + movl %ebx,%edi + vmovdqa %xmm6,32(%rsp) + xorl %ecx,%edi + vmovdqa %xmm7,48(%rsp) + movl %r8d,%r13d + jmp L$avx_00_47 + +.p2align 4 +L$avx_00_47: + subq $-128,%rbp + vpalignr $4,%xmm0,%xmm1,%xmm4 + shrdl $14,%r13d,%r13d + movl %r14d,%eax + movl %r9d,%r12d + vpalignr $4,%xmm2,%xmm3,%xmm7 + shrdl $9,%r14d,%r14d + xorl %r8d,%r13d + xorl %r10d,%r12d + vpsrld $7,%xmm4,%xmm6 + shrdl $5,%r13d,%r13d + xorl %eax,%r14d + andl %r8d,%r12d + vpaddd %xmm7,%xmm0,%xmm0 + xorl %r8d,%r13d + addl 0(%rsp),%r11d + movl %eax,%r15d + vpsrld $3,%xmm4,%xmm7 + xorl %r10d,%r12d + shrdl $11,%r14d,%r14d + xorl %ebx,%r15d + vpslld $14,%xmm4,%xmm5 + addl %r12d,%r11d + shrdl $6,%r13d,%r13d + andl %r15d,%edi + vpxor %xmm6,%xmm7,%xmm4 + xorl %eax,%r14d + addl %r13d,%r11d + xorl %ebx,%edi + vpshufd $250,%xmm3,%xmm7 + shrdl $2,%r14d,%r14d + addl %r11d,%edx + addl %edi,%r11d + vpsrld $11,%xmm6,%xmm6 + movl %edx,%r13d + addl %r11d,%r14d + shrdl $14,%r13d,%r13d + vpxor %xmm5,%xmm4,%xmm4 + movl %r14d,%r11d + movl %r8d,%r12d + shrdl $9,%r14d,%r14d + vpslld $11,%xmm5,%xmm5 + xorl %edx,%r13d + xorl %r9d,%r12d + shrdl $5,%r13d,%r13d + vpxor %xmm6,%xmm4,%xmm4 + xorl %r11d,%r14d + andl %edx,%r12d + xorl %edx,%r13d + vpsrld $10,%xmm7,%xmm6 + addl 4(%rsp),%r10d + movl %r11d,%edi + xorl %r9d,%r12d + vpxor %xmm5,%xmm4,%xmm4 + shrdl $11,%r14d,%r14d + xorl %eax,%edi + addl %r12d,%r10d + vpsrlq $17,%xmm7,%xmm7 + shrdl $6,%r13d,%r13d + andl %edi,%r15d + xorl %r11d,%r14d + vpaddd %xmm4,%xmm0,%xmm0 + addl %r13d,%r10d + xorl %eax,%r15d + shrdl $2,%r14d,%r14d + vpxor %xmm7,%xmm6,%xmm6 + addl %r10d,%ecx + addl %r15d,%r10d + movl %ecx,%r13d + vpsrlq $2,%xmm7,%xmm7 + addl %r10d,%r14d + shrdl $14,%r13d,%r13d + movl %r14d,%r10d + vpxor %xmm7,%xmm6,%xmm6 + movl %edx,%r12d + shrdl $9,%r14d,%r14d + xorl %ecx,%r13d + vpshufb %xmm8,%xmm6,%xmm6 + xorl %r8d,%r12d + shrdl $5,%r13d,%r13d + xorl %r10d,%r14d + vpaddd %xmm6,%xmm0,%xmm0 + andl %ecx,%r12d + xorl %ecx,%r13d + addl 8(%rsp),%r9d + vpshufd $80,%xmm0,%xmm7 + movl %r10d,%r15d + xorl %r8d,%r12d + shrdl $11,%r14d,%r14d + vpsrld $10,%xmm7,%xmm6 + xorl %r11d,%r15d + addl %r12d,%r9d + shrdl $6,%r13d,%r13d + vpsrlq $17,%xmm7,%xmm7 + andl %r15d,%edi + xorl %r10d,%r14d + addl %r13d,%r9d + vpxor %xmm7,%xmm6,%xmm6 + xorl %r11d,%edi + shrdl $2,%r14d,%r14d + addl %r9d,%ebx + vpsrlq $2,%xmm7,%xmm7 + addl %edi,%r9d + movl %ebx,%r13d + addl %r9d,%r14d + vpxor %xmm7,%xmm6,%xmm6 + shrdl $14,%r13d,%r13d + movl %r14d,%r9d + movl %ecx,%r12d + vpshufb %xmm9,%xmm6,%xmm6 + shrdl $9,%r14d,%r14d + xorl %ebx,%r13d + xorl %edx,%r12d + vpaddd %xmm6,%xmm0,%xmm0 + shrdl $5,%r13d,%r13d + xorl %r9d,%r14d + andl %ebx,%r12d + vpaddd 0(%rbp),%xmm0,%xmm6 + xorl %ebx,%r13d + addl 12(%rsp),%r8d + movl %r9d,%edi + xorl %edx,%r12d + shrdl $11,%r14d,%r14d + xorl %r10d,%edi + addl %r12d,%r8d + shrdl $6,%r13d,%r13d + andl %edi,%r15d + xorl %r9d,%r14d + addl %r13d,%r8d + xorl %r10d,%r15d + shrdl $2,%r14d,%r14d + addl %r8d,%eax + addl %r15d,%r8d + movl %eax,%r13d + addl %r8d,%r14d + vmovdqa %xmm6,0(%rsp) + vpalignr $4,%xmm1,%xmm2,%xmm4 + shrdl $14,%r13d,%r13d + movl %r14d,%r8d + movl %ebx,%r12d + vpalignr $4,%xmm3,%xmm0,%xmm7 + shrdl $9,%r14d,%r14d + xorl %eax,%r13d + xorl %ecx,%r12d + vpsrld $7,%xmm4,%xmm6 + shrdl $5,%r13d,%r13d + xorl %r8d,%r14d + andl %eax,%r12d + vpaddd %xmm7,%xmm1,%xmm1 + xorl %eax,%r13d + addl 16(%rsp),%edx + movl %r8d,%r15d + vpsrld $3,%xmm4,%xmm7 + xorl %ecx,%r12d + shrdl $11,%r14d,%r14d + xorl %r9d,%r15d + vpslld $14,%xmm4,%xmm5 + addl %r12d,%edx + shrdl $6,%r13d,%r13d + andl %r15d,%edi + vpxor %xmm6,%xmm7,%xmm4 + xorl %r8d,%r14d + addl %r13d,%edx + xorl %r9d,%edi + vpshufd $250,%xmm0,%xmm7 + shrdl $2,%r14d,%r14d + addl %edx,%r11d + addl %edi,%edx + vpsrld $11,%xmm6,%xmm6 + movl %r11d,%r13d + addl %edx,%r14d + shrdl $14,%r13d,%r13d + vpxor %xmm5,%xmm4,%xmm4 + movl %r14d,%edx + movl %eax,%r12d + shrdl $9,%r14d,%r14d + vpslld $11,%xmm5,%xmm5 + xorl %r11d,%r13d + xorl %ebx,%r12d + shrdl $5,%r13d,%r13d + vpxor %xmm6,%xmm4,%xmm4 + xorl %edx,%r14d + andl %r11d,%r12d + xorl %r11d,%r13d + vpsrld $10,%xmm7,%xmm6 + addl 20(%rsp),%ecx + movl %edx,%edi + xorl %ebx,%r12d + vpxor %xmm5,%xmm4,%xmm4 + shrdl $11,%r14d,%r14d + xorl %r8d,%edi + addl %r12d,%ecx + vpsrlq $17,%xmm7,%xmm7 + shrdl $6,%r13d,%r13d + andl %edi,%r15d + xorl %edx,%r14d + vpaddd %xmm4,%xmm1,%xmm1 + addl %r13d,%ecx + xorl %r8d,%r15d + shrdl $2,%r14d,%r14d + vpxor %xmm7,%xmm6,%xmm6 + addl %ecx,%r10d + addl %r15d,%ecx + movl %r10d,%r13d + vpsrlq $2,%xmm7,%xmm7 + addl %ecx,%r14d + shrdl $14,%r13d,%r13d + movl %r14d,%ecx + vpxor %xmm7,%xmm6,%xmm6 + movl %r11d,%r12d + shrdl $9,%r14d,%r14d + xorl %r10d,%r13d + vpshufb %xmm8,%xmm6,%xmm6 + xorl %eax,%r12d + shrdl $5,%r13d,%r13d + xorl %ecx,%r14d + vpaddd %xmm6,%xmm1,%xmm1 + andl %r10d,%r12d + xorl %r10d,%r13d + addl 24(%rsp),%ebx + vpshufd $80,%xmm1,%xmm7 + movl %ecx,%r15d + xorl %eax,%r12d + shrdl $11,%r14d,%r14d + vpsrld $10,%xmm7,%xmm6 + xorl %edx,%r15d + addl %r12d,%ebx + shrdl $6,%r13d,%r13d + vpsrlq $17,%xmm7,%xmm7 + andl %r15d,%edi + xorl %ecx,%r14d + addl %r13d,%ebx + vpxor %xmm7,%xmm6,%xmm6 + xorl %edx,%edi + shrdl $2,%r14d,%r14d + addl %ebx,%r9d + vpsrlq $2,%xmm7,%xmm7 + addl %edi,%ebx + movl %r9d,%r13d + addl %ebx,%r14d + vpxor %xmm7,%xmm6,%xmm6 + shrdl $14,%r13d,%r13d + movl %r14d,%ebx + movl %r10d,%r12d + vpshufb %xmm9,%xmm6,%xmm6 + shrdl $9,%r14d,%r14d + xorl %r9d,%r13d + xorl %r11d,%r12d + vpaddd %xmm6,%xmm1,%xmm1 + shrdl $5,%r13d,%r13d + xorl %ebx,%r14d + andl %r9d,%r12d + vpaddd 32(%rbp),%xmm1,%xmm6 + xorl %r9d,%r13d + addl 28(%rsp),%eax + movl %ebx,%edi + xorl %r11d,%r12d + shrdl $11,%r14d,%r14d + xorl %ecx,%edi + addl %r12d,%eax + shrdl $6,%r13d,%r13d + andl %edi,%r15d + xorl %ebx,%r14d + addl %r13d,%eax + xorl %ecx,%r15d + shrdl $2,%r14d,%r14d + addl %eax,%r8d + addl %r15d,%eax + movl %r8d,%r13d + addl %eax,%r14d + vmovdqa %xmm6,16(%rsp) + vpalignr $4,%xmm2,%xmm3,%xmm4 + shrdl $14,%r13d,%r13d + movl %r14d,%eax + movl %r9d,%r12d + vpalignr $4,%xmm0,%xmm1,%xmm7 + shrdl $9,%r14d,%r14d + xorl %r8d,%r13d + xorl %r10d,%r12d + vpsrld $7,%xmm4,%xmm6 + shrdl $5,%r13d,%r13d + xorl %eax,%r14d + andl %r8d,%r12d + vpaddd %xmm7,%xmm2,%xmm2 + xorl %r8d,%r13d + addl 32(%rsp),%r11d + movl %eax,%r15d + vpsrld $3,%xmm4,%xmm7 + xorl %r10d,%r12d + shrdl $11,%r14d,%r14d + xorl %ebx,%r15d + vpslld $14,%xmm4,%xmm5 + addl %r12d,%r11d + shrdl $6,%r13d,%r13d + andl %r15d,%edi + vpxor %xmm6,%xmm7,%xmm4 + xorl %eax,%r14d + addl %r13d,%r11d + xorl %ebx,%edi + vpshufd $250,%xmm1,%xmm7 + shrdl $2,%r14d,%r14d + addl %r11d,%edx + addl %edi,%r11d + vpsrld $11,%xmm6,%xmm6 + movl %edx,%r13d + addl %r11d,%r14d + shrdl $14,%r13d,%r13d + vpxor %xmm5,%xmm4,%xmm4 + movl %r14d,%r11d + movl %r8d,%r12d + shrdl $9,%r14d,%r14d + vpslld $11,%xmm5,%xmm5 + xorl %edx,%r13d + xorl %r9d,%r12d + shrdl $5,%r13d,%r13d + vpxor %xmm6,%xmm4,%xmm4 + xorl %r11d,%r14d + andl %edx,%r12d + xorl %edx,%r13d + vpsrld $10,%xmm7,%xmm6 + addl 36(%rsp),%r10d + movl %r11d,%edi + xorl %r9d,%r12d + vpxor %xmm5,%xmm4,%xmm4 + shrdl $11,%r14d,%r14d + xorl %eax,%edi + addl %r12d,%r10d + vpsrlq $17,%xmm7,%xmm7 + shrdl $6,%r13d,%r13d + andl %edi,%r15d + xorl %r11d,%r14d + vpaddd %xmm4,%xmm2,%xmm2 + addl %r13d,%r10d + xorl %eax,%r15d + shrdl $2,%r14d,%r14d + vpxor %xmm7,%xmm6,%xmm6 + addl %r10d,%ecx + addl %r15d,%r10d + movl %ecx,%r13d + vpsrlq $2,%xmm7,%xmm7 + addl %r10d,%r14d + shrdl $14,%r13d,%r13d + movl %r14d,%r10d + vpxor %xmm7,%xmm6,%xmm6 + movl %edx,%r12d + shrdl $9,%r14d,%r14d + xorl %ecx,%r13d + vpshufb %xmm8,%xmm6,%xmm6 + xorl %r8d,%r12d + shrdl $5,%r13d,%r13d + xorl %r10d,%r14d + vpaddd %xmm6,%xmm2,%xmm2 + andl %ecx,%r12d + xorl %ecx,%r13d + addl 40(%rsp),%r9d + vpshufd $80,%xmm2,%xmm7 + movl %r10d,%r15d + xorl %r8d,%r12d + shrdl $11,%r14d,%r14d + vpsrld $10,%xmm7,%xmm6 + xorl %r11d,%r15d + addl %r12d,%r9d + shrdl $6,%r13d,%r13d + vpsrlq $17,%xmm7,%xmm7 + andl %r15d,%edi + xorl %r10d,%r14d + addl %r13d,%r9d + vpxor %xmm7,%xmm6,%xmm6 + xorl %r11d,%edi + shrdl $2,%r14d,%r14d + addl %r9d,%ebx + vpsrlq $2,%xmm7,%xmm7 + addl %edi,%r9d + movl %ebx,%r13d + addl %r9d,%r14d + vpxor %xmm7,%xmm6,%xmm6 + shrdl $14,%r13d,%r13d + movl %r14d,%r9d + movl %ecx,%r12d + vpshufb %xmm9,%xmm6,%xmm6 + shrdl $9,%r14d,%r14d + xorl %ebx,%r13d + xorl %edx,%r12d + vpaddd %xmm6,%xmm2,%xmm2 + shrdl $5,%r13d,%r13d + xorl %r9d,%r14d + andl %ebx,%r12d + vpaddd 64(%rbp),%xmm2,%xmm6 + xorl %ebx,%r13d + addl 44(%rsp),%r8d + movl %r9d,%edi + xorl %edx,%r12d + shrdl $11,%r14d,%r14d + xorl %r10d,%edi + addl %r12d,%r8d + shrdl $6,%r13d,%r13d + andl %edi,%r15d + xorl %r9d,%r14d + addl %r13d,%r8d + xorl %r10d,%r15d + shrdl $2,%r14d,%r14d + addl %r8d,%eax + addl %r15d,%r8d + movl %eax,%r13d + addl %r8d,%r14d + vmovdqa %xmm6,32(%rsp) + vpalignr $4,%xmm3,%xmm0,%xmm4 + shrdl $14,%r13d,%r13d + movl %r14d,%r8d + movl %ebx,%r12d + vpalignr $4,%xmm1,%xmm2,%xmm7 + shrdl $9,%r14d,%r14d + xorl %eax,%r13d + xorl %ecx,%r12d + vpsrld $7,%xmm4,%xmm6 + shrdl $5,%r13d,%r13d + xorl %r8d,%r14d + andl %eax,%r12d + vpaddd %xmm7,%xmm3,%xmm3 + xorl %eax,%r13d + addl 48(%rsp),%edx + movl %r8d,%r15d + vpsrld $3,%xmm4,%xmm7 + xorl %ecx,%r12d + shrdl $11,%r14d,%r14d + xorl %r9d,%r15d + vpslld $14,%xmm4,%xmm5 + addl %r12d,%edx + shrdl $6,%r13d,%r13d + andl %r15d,%edi + vpxor %xmm6,%xmm7,%xmm4 + xorl %r8d,%r14d + addl %r13d,%edx + xorl %r9d,%edi + vpshufd $250,%xmm2,%xmm7 + shrdl $2,%r14d,%r14d + addl %edx,%r11d + addl %edi,%edx + vpsrld $11,%xmm6,%xmm6 + movl %r11d,%r13d + addl %edx,%r14d + shrdl $14,%r13d,%r13d + vpxor %xmm5,%xmm4,%xmm4 + movl %r14d,%edx + movl %eax,%r12d + shrdl $9,%r14d,%r14d + vpslld $11,%xmm5,%xmm5 + xorl %r11d,%r13d + xorl %ebx,%r12d + shrdl $5,%r13d,%r13d + vpxor %xmm6,%xmm4,%xmm4 + xorl %edx,%r14d + andl %r11d,%r12d + xorl %r11d,%r13d + vpsrld $10,%xmm7,%xmm6 + addl 52(%rsp),%ecx + movl %edx,%edi + xorl %ebx,%r12d + vpxor %xmm5,%xmm4,%xmm4 + shrdl $11,%r14d,%r14d + xorl %r8d,%edi + addl %r12d,%ecx + vpsrlq $17,%xmm7,%xmm7 + shrdl $6,%r13d,%r13d + andl %edi,%r15d + xorl %edx,%r14d + vpaddd %xmm4,%xmm3,%xmm3 + addl %r13d,%ecx + xorl %r8d,%r15d + shrdl $2,%r14d,%r14d + vpxor %xmm7,%xmm6,%xmm6 + addl %ecx,%r10d + addl %r15d,%ecx + movl %r10d,%r13d + vpsrlq $2,%xmm7,%xmm7 + addl %ecx,%r14d + shrdl $14,%r13d,%r13d + movl %r14d,%ecx + vpxor %xmm7,%xmm6,%xmm6 + movl %r11d,%r12d + shrdl $9,%r14d,%r14d + xorl %r10d,%r13d + vpshufb %xmm8,%xmm6,%xmm6 + xorl %eax,%r12d + shrdl $5,%r13d,%r13d + xorl %ecx,%r14d + vpaddd %xmm6,%xmm3,%xmm3 + andl %r10d,%r12d + xorl %r10d,%r13d + addl 56(%rsp),%ebx + vpshufd $80,%xmm3,%xmm7 + movl %ecx,%r15d + xorl %eax,%r12d + shrdl $11,%r14d,%r14d + vpsrld $10,%xmm7,%xmm6 + xorl %edx,%r15d + addl %r12d,%ebx + shrdl $6,%r13d,%r13d + vpsrlq $17,%xmm7,%xmm7 + andl %r15d,%edi + xorl %ecx,%r14d + addl %r13d,%ebx + vpxor %xmm7,%xmm6,%xmm6 + xorl %edx,%edi + shrdl $2,%r14d,%r14d + addl %ebx,%r9d + vpsrlq $2,%xmm7,%xmm7 + addl %edi,%ebx + movl %r9d,%r13d + addl %ebx,%r14d + vpxor %xmm7,%xmm6,%xmm6 + shrdl $14,%r13d,%r13d + movl %r14d,%ebx + movl %r10d,%r12d + vpshufb %xmm9,%xmm6,%xmm6 + shrdl $9,%r14d,%r14d + xorl %r9d,%r13d + xorl %r11d,%r12d + vpaddd %xmm6,%xmm3,%xmm3 + shrdl $5,%r13d,%r13d + xorl %ebx,%r14d + andl %r9d,%r12d + vpaddd 96(%rbp),%xmm3,%xmm6 + xorl %r9d,%r13d + addl 60(%rsp),%eax + movl %ebx,%edi + xorl %r11d,%r12d + shrdl $11,%r14d,%r14d + xorl %ecx,%edi + addl %r12d,%eax + shrdl $6,%r13d,%r13d + andl %edi,%r15d + xorl %ebx,%r14d + addl %r13d,%eax + xorl %ecx,%r15d + shrdl $2,%r14d,%r14d + addl %eax,%r8d + addl %r15d,%eax + movl %r8d,%r13d + addl %eax,%r14d + vmovdqa %xmm6,48(%rsp) + cmpb $0,131(%rbp) + jne L$avx_00_47 + shrdl $14,%r13d,%r13d + movl %r14d,%eax + movl %r9d,%r12d + shrdl $9,%r14d,%r14d + xorl %r8d,%r13d + xorl %r10d,%r12d + shrdl $5,%r13d,%r13d + xorl %eax,%r14d + andl %r8d,%r12d + xorl %r8d,%r13d + addl 0(%rsp),%r11d + movl %eax,%r15d + xorl %r10d,%r12d + shrdl $11,%r14d,%r14d + xorl %ebx,%r15d + addl %r12d,%r11d + shrdl $6,%r13d,%r13d + andl %r15d,%edi + xorl %eax,%r14d + addl %r13d,%r11d + xorl %ebx,%edi + shrdl $2,%r14d,%r14d + addl %r11d,%edx + addl %edi,%r11d + movl %edx,%r13d + addl %r11d,%r14d + shrdl $14,%r13d,%r13d + movl %r14d,%r11d + movl %r8d,%r12d + shrdl $9,%r14d,%r14d + xorl %edx,%r13d + xorl %r9d,%r12d + shrdl $5,%r13d,%r13d + xorl %r11d,%r14d + andl %edx,%r12d + xorl %edx,%r13d + addl 4(%rsp),%r10d + movl %r11d,%edi + xorl %r9d,%r12d + shrdl $11,%r14d,%r14d + xorl %eax,%edi + addl %r12d,%r10d + shrdl $6,%r13d,%r13d + andl %edi,%r15d + xorl %r11d,%r14d + addl %r13d,%r10d + xorl %eax,%r15d + shrdl $2,%r14d,%r14d + addl %r10d,%ecx + addl %r15d,%r10d + movl %ecx,%r13d + addl %r10d,%r14d + shrdl $14,%r13d,%r13d + movl %r14d,%r10d + movl %edx,%r12d + shrdl $9,%r14d,%r14d + xorl %ecx,%r13d + xorl %r8d,%r12d + shrdl $5,%r13d,%r13d + xorl %r10d,%r14d + andl %ecx,%r12d + xorl %ecx,%r13d + addl 8(%rsp),%r9d + movl %r10d,%r15d + xorl %r8d,%r12d + shrdl $11,%r14d,%r14d + xorl %r11d,%r15d + addl %r12d,%r9d + shrdl $6,%r13d,%r13d + andl %r15d,%edi + xorl %r10d,%r14d + addl %r13d,%r9d + xorl %r11d,%edi + shrdl $2,%r14d,%r14d + addl %r9d,%ebx + addl %edi,%r9d + movl %ebx,%r13d + addl %r9d,%r14d + shrdl $14,%r13d,%r13d + movl %r14d,%r9d + movl %ecx,%r12d + shrdl $9,%r14d,%r14d + xorl %ebx,%r13d + xorl %edx,%r12d + shrdl $5,%r13d,%r13d + xorl %r9d,%r14d + andl %ebx,%r12d + xorl %ebx,%r13d + addl 12(%rsp),%r8d + movl %r9d,%edi + xorl %edx,%r12d + shrdl $11,%r14d,%r14d + xorl %r10d,%edi + addl %r12d,%r8d + shrdl $6,%r13d,%r13d + andl %edi,%r15d + xorl %r9d,%r14d + addl %r13d,%r8d + xorl %r10d,%r15d + shrdl $2,%r14d,%r14d + addl %r8d,%eax + addl %r15d,%r8d + movl %eax,%r13d + addl %r8d,%r14d + shrdl $14,%r13d,%r13d + movl %r14d,%r8d + movl %ebx,%r12d + shrdl $9,%r14d,%r14d + xorl %eax,%r13d + xorl %ecx,%r12d + shrdl $5,%r13d,%r13d + xorl %r8d,%r14d + andl %eax,%r12d + xorl %eax,%r13d + addl 16(%rsp),%edx + movl %r8d,%r15d + xorl %ecx,%r12d + shrdl $11,%r14d,%r14d + xorl %r9d,%r15d + addl %r12d,%edx + shrdl $6,%r13d,%r13d + andl %r15d,%edi + xorl %r8d,%r14d + addl %r13d,%edx + xorl %r9d,%edi + shrdl $2,%r14d,%r14d + addl %edx,%r11d + addl %edi,%edx + movl %r11d,%r13d + addl %edx,%r14d + shrdl $14,%r13d,%r13d + movl %r14d,%edx + movl %eax,%r12d + shrdl $9,%r14d,%r14d + xorl %r11d,%r13d + xorl %ebx,%r12d + shrdl $5,%r13d,%r13d + xorl %edx,%r14d + andl %r11d,%r12d + xorl %r11d,%r13d + addl 20(%rsp),%ecx + movl %edx,%edi + xorl %ebx,%r12d + shrdl $11,%r14d,%r14d + xorl %r8d,%edi + addl %r12d,%ecx + shrdl $6,%r13d,%r13d + andl %edi,%r15d + xorl %edx,%r14d + addl %r13d,%ecx + xorl %r8d,%r15d + shrdl $2,%r14d,%r14d + addl %ecx,%r10d + addl %r15d,%ecx + movl %r10d,%r13d + addl %ecx,%r14d + shrdl $14,%r13d,%r13d + movl %r14d,%ecx + movl %r11d,%r12d + shrdl $9,%r14d,%r14d + xorl %r10d,%r13d + xorl %eax,%r12d + shrdl $5,%r13d,%r13d + xorl %ecx,%r14d + andl %r10d,%r12d + xorl %r10d,%r13d + addl 24(%rsp),%ebx + movl %ecx,%r15d + xorl %eax,%r12d + shrdl $11,%r14d,%r14d + xorl %edx,%r15d + addl %r12d,%ebx + shrdl $6,%r13d,%r13d + andl %r15d,%edi + xorl %ecx,%r14d + addl %r13d,%ebx + xorl %edx,%edi + shrdl $2,%r14d,%r14d + addl %ebx,%r9d + addl %edi,%ebx + movl %r9d,%r13d + addl %ebx,%r14d + shrdl $14,%r13d,%r13d + movl %r14d,%ebx + movl %r10d,%r12d + shrdl $9,%r14d,%r14d + xorl %r9d,%r13d + xorl %r11d,%r12d + shrdl $5,%r13d,%r13d + xorl %ebx,%r14d + andl %r9d,%r12d + xorl %r9d,%r13d + addl 28(%rsp),%eax + movl %ebx,%edi + xorl %r11d,%r12d + shrdl $11,%r14d,%r14d + xorl %ecx,%edi + addl %r12d,%eax + shrdl $6,%r13d,%r13d + andl %edi,%r15d + xorl %ebx,%r14d + addl %r13d,%eax + xorl %ecx,%r15d + shrdl $2,%r14d,%r14d + addl %eax,%r8d + addl %r15d,%eax + movl %r8d,%r13d + addl %eax,%r14d + shrdl $14,%r13d,%r13d + movl %r14d,%eax + movl %r9d,%r12d + shrdl $9,%r14d,%r14d + xorl %r8d,%r13d + xorl %r10d,%r12d + shrdl $5,%r13d,%r13d + xorl %eax,%r14d + andl %r8d,%r12d + xorl %r8d,%r13d + addl 32(%rsp),%r11d + movl %eax,%r15d + xorl %r10d,%r12d + shrdl $11,%r14d,%r14d + xorl %ebx,%r15d + addl %r12d,%r11d + shrdl $6,%r13d,%r13d + andl %r15d,%edi + xorl %eax,%r14d + addl %r13d,%r11d + xorl %ebx,%edi + shrdl $2,%r14d,%r14d + addl %r11d,%edx + addl %edi,%r11d + movl %edx,%r13d + addl %r11d,%r14d + shrdl $14,%r13d,%r13d + movl %r14d,%r11d + movl %r8d,%r12d + shrdl $9,%r14d,%r14d + xorl %edx,%r13d + xorl %r9d,%r12d + shrdl $5,%r13d,%r13d + xorl %r11d,%r14d + andl %edx,%r12d + xorl %edx,%r13d + addl 36(%rsp),%r10d + movl %r11d,%edi + xorl %r9d,%r12d + shrdl $11,%r14d,%r14d + xorl %eax,%edi + addl %r12d,%r10d + shrdl $6,%r13d,%r13d + andl %edi,%r15d + xorl %r11d,%r14d + addl %r13d,%r10d + xorl %eax,%r15d + shrdl $2,%r14d,%r14d + addl %r10d,%ecx + addl %r15d,%r10d + movl %ecx,%r13d + addl %r10d,%r14d + shrdl $14,%r13d,%r13d + movl %r14d,%r10d + movl %edx,%r12d + shrdl $9,%r14d,%r14d + xorl %ecx,%r13d + xorl %r8d,%r12d + shrdl $5,%r13d,%r13d + xorl %r10d,%r14d + andl %ecx,%r12d + xorl %ecx,%r13d + addl 40(%rsp),%r9d + movl %r10d,%r15d + xorl %r8d,%r12d + shrdl $11,%r14d,%r14d + xorl %r11d,%r15d + addl %r12d,%r9d + shrdl $6,%r13d,%r13d + andl %r15d,%edi + xorl %r10d,%r14d + addl %r13d,%r9d + xorl %r11d,%edi + shrdl $2,%r14d,%r14d + addl %r9d,%ebx + addl %edi,%r9d + movl %ebx,%r13d + addl %r9d,%r14d + shrdl $14,%r13d,%r13d + movl %r14d,%r9d + movl %ecx,%r12d + shrdl $9,%r14d,%r14d + xorl %ebx,%r13d + xorl %edx,%r12d + shrdl $5,%r13d,%r13d + xorl %r9d,%r14d + andl %ebx,%r12d + xorl %ebx,%r13d + addl 44(%rsp),%r8d + movl %r9d,%edi + xorl %edx,%r12d + shrdl $11,%r14d,%r14d + xorl %r10d,%edi + addl %r12d,%r8d + shrdl $6,%r13d,%r13d + andl %edi,%r15d + xorl %r9d,%r14d + addl %r13d,%r8d + xorl %r10d,%r15d + shrdl $2,%r14d,%r14d + addl %r8d,%eax + addl %r15d,%r8d + movl %eax,%r13d + addl %r8d,%r14d + shrdl $14,%r13d,%r13d + movl %r14d,%r8d + movl %ebx,%r12d + shrdl $9,%r14d,%r14d + xorl %eax,%r13d + xorl %ecx,%r12d + shrdl $5,%r13d,%r13d + xorl %r8d,%r14d + andl %eax,%r12d + xorl %eax,%r13d + addl 48(%rsp),%edx + movl %r8d,%r15d + xorl %ecx,%r12d + shrdl $11,%r14d,%r14d + xorl %r9d,%r15d + addl %r12d,%edx + shrdl $6,%r13d,%r13d + andl %r15d,%edi + xorl %r8d,%r14d + addl %r13d,%edx + xorl %r9d,%edi + shrdl $2,%r14d,%r14d + addl %edx,%r11d + addl %edi,%edx + movl %r11d,%r13d + addl %edx,%r14d + shrdl $14,%r13d,%r13d + movl %r14d,%edx + movl %eax,%r12d + shrdl $9,%r14d,%r14d + xorl %r11d,%r13d + xorl %ebx,%r12d + shrdl $5,%r13d,%r13d + xorl %edx,%r14d + andl %r11d,%r12d + xorl %r11d,%r13d + addl 52(%rsp),%ecx + movl %edx,%edi + xorl %ebx,%r12d + shrdl $11,%r14d,%r14d + xorl %r8d,%edi + addl %r12d,%ecx + shrdl $6,%r13d,%r13d + andl %edi,%r15d + xorl %edx,%r14d + addl %r13d,%ecx + xorl %r8d,%r15d + shrdl $2,%r14d,%r14d + addl %ecx,%r10d + addl %r15d,%ecx + movl %r10d,%r13d + addl %ecx,%r14d + shrdl $14,%r13d,%r13d + movl %r14d,%ecx + movl %r11d,%r12d + shrdl $9,%r14d,%r14d + xorl %r10d,%r13d + xorl %eax,%r12d + shrdl $5,%r13d,%r13d + xorl %ecx,%r14d + andl %r10d,%r12d + xorl %r10d,%r13d + addl 56(%rsp),%ebx + movl %ecx,%r15d + xorl %eax,%r12d + shrdl $11,%r14d,%r14d + xorl %edx,%r15d + addl %r12d,%ebx + shrdl $6,%r13d,%r13d + andl %r15d,%edi + xorl %ecx,%r14d + addl %r13d,%ebx + xorl %edx,%edi + shrdl $2,%r14d,%r14d + addl %ebx,%r9d + addl %edi,%ebx + movl %r9d,%r13d + addl %ebx,%r14d + shrdl $14,%r13d,%r13d + movl %r14d,%ebx + movl %r10d,%r12d + shrdl $9,%r14d,%r14d + xorl %r9d,%r13d + xorl %r11d,%r12d + shrdl $5,%r13d,%r13d + xorl %ebx,%r14d + andl %r9d,%r12d + xorl %r9d,%r13d + addl 60(%rsp),%eax + movl %ebx,%edi + xorl %r11d,%r12d + shrdl $11,%r14d,%r14d + xorl %ecx,%edi + addl %r12d,%eax + shrdl $6,%r13d,%r13d + andl %edi,%r15d + xorl %ebx,%r14d + addl %r13d,%eax + xorl %ecx,%r15d + shrdl $2,%r14d,%r14d + addl %eax,%r8d + addl %r15d,%eax + movl %r8d,%r13d + addl %eax,%r14d + movq 64+0(%rsp),%rdi + movl %r14d,%eax + + addl 0(%rdi),%eax + leaq 64(%rsi),%rsi + addl 4(%rdi),%ebx + addl 8(%rdi),%ecx + addl 12(%rdi),%edx + addl 16(%rdi),%r8d + addl 20(%rdi),%r9d + addl 24(%rdi),%r10d + addl 28(%rdi),%r11d + + cmpq 64+16(%rsp),%rsi + + movl %eax,0(%rdi) + movl %ebx,4(%rdi) + movl %ecx,8(%rdi) + movl %edx,12(%rdi) + movl %r8d,16(%rdi) + movl %r9d,20(%rdi) + movl %r10d,24(%rdi) + movl %r11d,28(%rdi) + jb L$loop_avx + + movq 64+24(%rsp),%rsi + vzeroupper + movq (%rsi),%r15 + movq 8(%rsi),%r14 + movq 16(%rsi),%r13 + movq 24(%rsi),%r12 + movq 32(%rsi),%rbp + movq 40(%rsi),%rbx + leaq 48(%rsi),%rsp +L$epilogue_avx: + .byte 0xf3,0xc3 + + +.p2align 6 +sha256_block_data_order_avx2: +L$avx2_shortcut: + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + movq %rsp,%r11 + subq $544,%rsp + shlq $4,%rdx + andq $-1024,%rsp + leaq (%rsi,%rdx,4),%rdx + addq $448,%rsp + movq %rdi,64+0(%rsp) + movq %rsi,64+8(%rsp) + movq %rdx,64+16(%rsp) + movq %r11,64+24(%rsp) +L$prologue_avx2: + + vzeroupper + subq $-64,%rsi + movl 0(%rdi),%eax + movq %rsi,%r12 + movl 4(%rdi),%ebx + cmpq %rdx,%rsi + movl 8(%rdi),%ecx + cmoveq %rsp,%r12 + movl 12(%rdi),%edx + movl 16(%rdi),%r8d + movl 20(%rdi),%r9d + movl 24(%rdi),%r10d + movl 28(%rdi),%r11d + vmovdqa K256+512+32(%rip),%ymm8 + vmovdqa K256+512+64(%rip),%ymm9 + jmp L$oop_avx2 +.p2align 4 +L$oop_avx2: + vmovdqa K256+512(%rip),%ymm7 + vmovdqu -64+0(%rsi),%xmm0 + vmovdqu -64+16(%rsi),%xmm1 + vmovdqu -64+32(%rsi),%xmm2 + vmovdqu -64+48(%rsi),%xmm3 + + vinserti128 $1,(%r12),%ymm0,%ymm0 + vinserti128 $1,16(%r12),%ymm1,%ymm1 + vpshufb %ymm7,%ymm0,%ymm0 + vinserti128 $1,32(%r12),%ymm2,%ymm2 + vpshufb %ymm7,%ymm1,%ymm1 + vinserti128 $1,48(%r12),%ymm3,%ymm3 + + leaq K256(%rip),%rbp + vpshufb %ymm7,%ymm2,%ymm2 + vpaddd 0(%rbp),%ymm0,%ymm4 + vpshufb %ymm7,%ymm3,%ymm3 + vpaddd 32(%rbp),%ymm1,%ymm5 + vpaddd 64(%rbp),%ymm2,%ymm6 + vpaddd 96(%rbp),%ymm3,%ymm7 + vmovdqa %ymm4,0(%rsp) + xorl %r14d,%r14d + vmovdqa %ymm5,32(%rsp) + leaq -64(%rsp),%rsp + movl %ebx,%edi + vmovdqa %ymm6,0(%rsp) + xorl %ecx,%edi + vmovdqa %ymm7,32(%rsp) + movl %r9d,%r12d + subq $-32*4,%rbp + jmp L$avx2_00_47 + +.p2align 4 +L$avx2_00_47: + leaq -64(%rsp),%rsp + vpalignr $4,%ymm0,%ymm1,%ymm4 + addl 0+128(%rsp),%r11d + andl %r8d,%r12d + rorxl $25,%r8d,%r13d + vpalignr $4,%ymm2,%ymm3,%ymm7 + rorxl $11,%r8d,%r15d + leal (%rax,%r14,1),%eax + leal (%r11,%r12,1),%r11d + vpsrld $7,%ymm4,%ymm6 + andnl %r10d,%r8d,%r12d + xorl %r15d,%r13d + rorxl $6,%r8d,%r14d + vpaddd %ymm7,%ymm0,%ymm0 + leal (%r11,%r12,1),%r11d + xorl %r14d,%r13d + movl %eax,%r15d + vpsrld $3,%ymm4,%ymm7 + rorxl $22,%eax,%r12d + leal (%r11,%r13,1),%r11d + xorl %ebx,%r15d + vpslld $14,%ymm4,%ymm5 + rorxl $13,%eax,%r14d + rorxl $2,%eax,%r13d + leal (%rdx,%r11,1),%edx + vpxor %ymm6,%ymm7,%ymm4 + andl %r15d,%edi + xorl %r12d,%r14d + xorl %ebx,%edi + vpshufd $250,%ymm3,%ymm7 + xorl %r13d,%r14d + leal (%r11,%rdi,1),%r11d + movl %r8d,%r12d + vpsrld $11,%ymm6,%ymm6 + addl 4+128(%rsp),%r10d + andl %edx,%r12d + rorxl $25,%edx,%r13d + vpxor %ymm5,%ymm4,%ymm4 + rorxl $11,%edx,%edi + leal (%r11,%r14,1),%r11d + leal (%r10,%r12,1),%r10d + vpslld $11,%ymm5,%ymm5 + andnl %r9d,%edx,%r12d + xorl %edi,%r13d + rorxl $6,%edx,%r14d + vpxor %ymm6,%ymm4,%ymm4 + leal (%r10,%r12,1),%r10d + xorl %r14d,%r13d + movl %r11d,%edi + vpsrld $10,%ymm7,%ymm6 + rorxl $22,%r11d,%r12d + leal (%r10,%r13,1),%r10d + xorl %eax,%edi + vpxor %ymm5,%ymm4,%ymm4 + rorxl $13,%r11d,%r14d + rorxl $2,%r11d,%r13d + leal (%rcx,%r10,1),%ecx + vpsrlq $17,%ymm7,%ymm7 + andl %edi,%r15d + xorl %r12d,%r14d + xorl %eax,%r15d + vpaddd %ymm4,%ymm0,%ymm0 + xorl %r13d,%r14d + leal (%r10,%r15,1),%r10d + movl %edx,%r12d + vpxor %ymm7,%ymm6,%ymm6 + addl 8+128(%rsp),%r9d + andl %ecx,%r12d + rorxl $25,%ecx,%r13d + vpsrlq $2,%ymm7,%ymm7 + rorxl $11,%ecx,%r15d + leal (%r10,%r14,1),%r10d + leal (%r9,%r12,1),%r9d + vpxor %ymm7,%ymm6,%ymm6 + andnl %r8d,%ecx,%r12d + xorl %r15d,%r13d + rorxl $6,%ecx,%r14d + vpshufb %ymm8,%ymm6,%ymm6 + leal (%r9,%r12,1),%r9d + xorl %r14d,%r13d + movl %r10d,%r15d + vpaddd %ymm6,%ymm0,%ymm0 + rorxl $22,%r10d,%r12d + leal (%r9,%r13,1),%r9d + xorl %r11d,%r15d + vpshufd $80,%ymm0,%ymm7 + rorxl $13,%r10d,%r14d + rorxl $2,%r10d,%r13d + leal (%rbx,%r9,1),%ebx + vpsrld $10,%ymm7,%ymm6 + andl %r15d,%edi + xorl %r12d,%r14d + xorl %r11d,%edi + vpsrlq $17,%ymm7,%ymm7 + xorl %r13d,%r14d + leal (%r9,%rdi,1),%r9d + movl %ecx,%r12d + vpxor %ymm7,%ymm6,%ymm6 + addl 12+128(%rsp),%r8d + andl %ebx,%r12d + rorxl $25,%ebx,%r13d + vpsrlq $2,%ymm7,%ymm7 + rorxl $11,%ebx,%edi + leal (%r9,%r14,1),%r9d + leal (%r8,%r12,1),%r8d + vpxor %ymm7,%ymm6,%ymm6 + andnl %edx,%ebx,%r12d + xorl %edi,%r13d + rorxl $6,%ebx,%r14d + vpshufb %ymm9,%ymm6,%ymm6 + leal (%r8,%r12,1),%r8d + xorl %r14d,%r13d + movl %r9d,%edi + vpaddd %ymm6,%ymm0,%ymm0 + rorxl $22,%r9d,%r12d + leal (%r8,%r13,1),%r8d + xorl %r10d,%edi + vpaddd 0(%rbp),%ymm0,%ymm6 + rorxl $13,%r9d,%r14d + rorxl $2,%r9d,%r13d + leal (%rax,%r8,1),%eax + andl %edi,%r15d + xorl %r12d,%r14d + xorl %r10d,%r15d + xorl %r13d,%r14d + leal (%r8,%r15,1),%r8d + movl %ebx,%r12d + vmovdqa %ymm6,0(%rsp) + vpalignr $4,%ymm1,%ymm2,%ymm4 + addl 32+128(%rsp),%edx + andl %eax,%r12d + rorxl $25,%eax,%r13d + vpalignr $4,%ymm3,%ymm0,%ymm7 + rorxl $11,%eax,%r15d + leal (%r8,%r14,1),%r8d + leal (%rdx,%r12,1),%edx + vpsrld $7,%ymm4,%ymm6 + andnl %ecx,%eax,%r12d + xorl %r15d,%r13d + rorxl $6,%eax,%r14d + vpaddd %ymm7,%ymm1,%ymm1 + leal (%rdx,%r12,1),%edx + xorl %r14d,%r13d + movl %r8d,%r15d + vpsrld $3,%ymm4,%ymm7 + rorxl $22,%r8d,%r12d + leal (%rdx,%r13,1),%edx + xorl %r9d,%r15d + vpslld $14,%ymm4,%ymm5 + rorxl $13,%r8d,%r14d + rorxl $2,%r8d,%r13d + leal (%r11,%rdx,1),%r11d + vpxor %ymm6,%ymm7,%ymm4 + andl %r15d,%edi + xorl %r12d,%r14d + xorl %r9d,%edi + vpshufd $250,%ymm0,%ymm7 + xorl %r13d,%r14d + leal (%rdx,%rdi,1),%edx + movl %eax,%r12d + vpsrld $11,%ymm6,%ymm6 + addl 36+128(%rsp),%ecx + andl %r11d,%r12d + rorxl $25,%r11d,%r13d + vpxor %ymm5,%ymm4,%ymm4 + rorxl $11,%r11d,%edi + leal (%rdx,%r14,1),%edx + leal (%rcx,%r12,1),%ecx + vpslld $11,%ymm5,%ymm5 + andnl %ebx,%r11d,%r12d + xorl %edi,%r13d + rorxl $6,%r11d,%r14d + vpxor %ymm6,%ymm4,%ymm4 + leal (%rcx,%r12,1),%ecx + xorl %r14d,%r13d + movl %edx,%edi + vpsrld $10,%ymm7,%ymm6 + rorxl $22,%edx,%r12d + leal (%rcx,%r13,1),%ecx + xorl %r8d,%edi + vpxor %ymm5,%ymm4,%ymm4 + rorxl $13,%edx,%r14d + rorxl $2,%edx,%r13d + leal (%r10,%rcx,1),%r10d + vpsrlq $17,%ymm7,%ymm7 + andl %edi,%r15d + xorl %r12d,%r14d + xorl %r8d,%r15d + vpaddd %ymm4,%ymm1,%ymm1 + xorl %r13d,%r14d + leal (%rcx,%r15,1),%ecx + movl %r11d,%r12d + vpxor %ymm7,%ymm6,%ymm6 + addl 40+128(%rsp),%ebx + andl %r10d,%r12d + rorxl $25,%r10d,%r13d + vpsrlq $2,%ymm7,%ymm7 + rorxl $11,%r10d,%r15d + leal (%rcx,%r14,1),%ecx + leal (%rbx,%r12,1),%ebx + vpxor %ymm7,%ymm6,%ymm6 + andnl %eax,%r10d,%r12d + xorl %r15d,%r13d + rorxl $6,%r10d,%r14d + vpshufb %ymm8,%ymm6,%ymm6 + leal (%rbx,%r12,1),%ebx + xorl %r14d,%r13d + movl %ecx,%r15d + vpaddd %ymm6,%ymm1,%ymm1 + rorxl $22,%ecx,%r12d + leal (%rbx,%r13,1),%ebx + xorl %edx,%r15d + vpshufd $80,%ymm1,%ymm7 + rorxl $13,%ecx,%r14d + rorxl $2,%ecx,%r13d + leal (%r9,%rbx,1),%r9d + vpsrld $10,%ymm7,%ymm6 + andl %r15d,%edi + xorl %r12d,%r14d + xorl %edx,%edi + vpsrlq $17,%ymm7,%ymm7 + xorl %r13d,%r14d + leal (%rbx,%rdi,1),%ebx + movl %r10d,%r12d + vpxor %ymm7,%ymm6,%ymm6 + addl 44+128(%rsp),%eax + andl %r9d,%r12d + rorxl $25,%r9d,%r13d + vpsrlq $2,%ymm7,%ymm7 + rorxl $11,%r9d,%edi + leal (%rbx,%r14,1),%ebx + leal (%rax,%r12,1),%eax + vpxor %ymm7,%ymm6,%ymm6 + andnl %r11d,%r9d,%r12d + xorl %edi,%r13d + rorxl $6,%r9d,%r14d + vpshufb %ymm9,%ymm6,%ymm6 + leal (%rax,%r12,1),%eax + xorl %r14d,%r13d + movl %ebx,%edi + vpaddd %ymm6,%ymm1,%ymm1 + rorxl $22,%ebx,%r12d + leal (%rax,%r13,1),%eax + xorl %ecx,%edi + vpaddd 32(%rbp),%ymm1,%ymm6 + rorxl $13,%ebx,%r14d + rorxl $2,%ebx,%r13d + leal (%r8,%rax,1),%r8d + andl %edi,%r15d + xorl %r12d,%r14d + xorl %ecx,%r15d + xorl %r13d,%r14d + leal (%rax,%r15,1),%eax + movl %r9d,%r12d + vmovdqa %ymm6,32(%rsp) + leaq -64(%rsp),%rsp + vpalignr $4,%ymm2,%ymm3,%ymm4 + addl 0+128(%rsp),%r11d + andl %r8d,%r12d + rorxl $25,%r8d,%r13d + vpalignr $4,%ymm0,%ymm1,%ymm7 + rorxl $11,%r8d,%r15d + leal (%rax,%r14,1),%eax + leal (%r11,%r12,1),%r11d + vpsrld $7,%ymm4,%ymm6 + andnl %r10d,%r8d,%r12d + xorl %r15d,%r13d + rorxl $6,%r8d,%r14d + vpaddd %ymm7,%ymm2,%ymm2 + leal (%r11,%r12,1),%r11d + xorl %r14d,%r13d + movl %eax,%r15d + vpsrld $3,%ymm4,%ymm7 + rorxl $22,%eax,%r12d + leal (%r11,%r13,1),%r11d + xorl %ebx,%r15d + vpslld $14,%ymm4,%ymm5 + rorxl $13,%eax,%r14d + rorxl $2,%eax,%r13d + leal (%rdx,%r11,1),%edx + vpxor %ymm6,%ymm7,%ymm4 + andl %r15d,%edi + xorl %r12d,%r14d + xorl %ebx,%edi + vpshufd $250,%ymm1,%ymm7 + xorl %r13d,%r14d + leal (%r11,%rdi,1),%r11d + movl %r8d,%r12d + vpsrld $11,%ymm6,%ymm6 + addl 4+128(%rsp),%r10d + andl %edx,%r12d + rorxl $25,%edx,%r13d + vpxor %ymm5,%ymm4,%ymm4 + rorxl $11,%edx,%edi + leal (%r11,%r14,1),%r11d + leal (%r10,%r12,1),%r10d + vpslld $11,%ymm5,%ymm5 + andnl %r9d,%edx,%r12d + xorl %edi,%r13d + rorxl $6,%edx,%r14d + vpxor %ymm6,%ymm4,%ymm4 + leal (%r10,%r12,1),%r10d + xorl %r14d,%r13d + movl %r11d,%edi + vpsrld $10,%ymm7,%ymm6 + rorxl $22,%r11d,%r12d + leal (%r10,%r13,1),%r10d + xorl %eax,%edi + vpxor %ymm5,%ymm4,%ymm4 + rorxl $13,%r11d,%r14d + rorxl $2,%r11d,%r13d + leal (%rcx,%r10,1),%ecx + vpsrlq $17,%ymm7,%ymm7 + andl %edi,%r15d + xorl %r12d,%r14d + xorl %eax,%r15d + vpaddd %ymm4,%ymm2,%ymm2 + xorl %r13d,%r14d + leal (%r10,%r15,1),%r10d + movl %edx,%r12d + vpxor %ymm7,%ymm6,%ymm6 + addl 8+128(%rsp),%r9d + andl %ecx,%r12d + rorxl $25,%ecx,%r13d + vpsrlq $2,%ymm7,%ymm7 + rorxl $11,%ecx,%r15d + leal (%r10,%r14,1),%r10d + leal (%r9,%r12,1),%r9d + vpxor %ymm7,%ymm6,%ymm6 + andnl %r8d,%ecx,%r12d + xorl %r15d,%r13d + rorxl $6,%ecx,%r14d + vpshufb %ymm8,%ymm6,%ymm6 + leal (%r9,%r12,1),%r9d + xorl %r14d,%r13d + movl %r10d,%r15d + vpaddd %ymm6,%ymm2,%ymm2 + rorxl $22,%r10d,%r12d + leal (%r9,%r13,1),%r9d + xorl %r11d,%r15d + vpshufd $80,%ymm2,%ymm7 + rorxl $13,%r10d,%r14d + rorxl $2,%r10d,%r13d + leal (%rbx,%r9,1),%ebx + vpsrld $10,%ymm7,%ymm6 + andl %r15d,%edi + xorl %r12d,%r14d + xorl %r11d,%edi + vpsrlq $17,%ymm7,%ymm7 + xorl %r13d,%r14d + leal (%r9,%rdi,1),%r9d + movl %ecx,%r12d + vpxor %ymm7,%ymm6,%ymm6 + addl 12+128(%rsp),%r8d + andl %ebx,%r12d + rorxl $25,%ebx,%r13d + vpsrlq $2,%ymm7,%ymm7 + rorxl $11,%ebx,%edi + leal (%r9,%r14,1),%r9d + leal (%r8,%r12,1),%r8d + vpxor %ymm7,%ymm6,%ymm6 + andnl %edx,%ebx,%r12d + xorl %edi,%r13d + rorxl $6,%ebx,%r14d + vpshufb %ymm9,%ymm6,%ymm6 + leal (%r8,%r12,1),%r8d + xorl %r14d,%r13d + movl %r9d,%edi + vpaddd %ymm6,%ymm2,%ymm2 + rorxl $22,%r9d,%r12d + leal (%r8,%r13,1),%r8d + xorl %r10d,%edi + vpaddd 64(%rbp),%ymm2,%ymm6 + rorxl $13,%r9d,%r14d + rorxl $2,%r9d,%r13d + leal (%rax,%r8,1),%eax + andl %edi,%r15d + xorl %r12d,%r14d + xorl %r10d,%r15d + xorl %r13d,%r14d + leal (%r8,%r15,1),%r8d + movl %ebx,%r12d + vmovdqa %ymm6,0(%rsp) + vpalignr $4,%ymm3,%ymm0,%ymm4 + addl 32+128(%rsp),%edx + andl %eax,%r12d + rorxl $25,%eax,%r13d + vpalignr $4,%ymm1,%ymm2,%ymm7 + rorxl $11,%eax,%r15d + leal (%r8,%r14,1),%r8d + leal (%rdx,%r12,1),%edx + vpsrld $7,%ymm4,%ymm6 + andnl %ecx,%eax,%r12d + xorl %r15d,%r13d + rorxl $6,%eax,%r14d + vpaddd %ymm7,%ymm3,%ymm3 + leal (%rdx,%r12,1),%edx + xorl %r14d,%r13d + movl %r8d,%r15d + vpsrld $3,%ymm4,%ymm7 + rorxl $22,%r8d,%r12d + leal (%rdx,%r13,1),%edx + xorl %r9d,%r15d + vpslld $14,%ymm4,%ymm5 + rorxl $13,%r8d,%r14d + rorxl $2,%r8d,%r13d + leal (%r11,%rdx,1),%r11d + vpxor %ymm6,%ymm7,%ymm4 + andl %r15d,%edi + xorl %r12d,%r14d + xorl %r9d,%edi + vpshufd $250,%ymm2,%ymm7 + xorl %r13d,%r14d + leal (%rdx,%rdi,1),%edx + movl %eax,%r12d + vpsrld $11,%ymm6,%ymm6 + addl 36+128(%rsp),%ecx + andl %r11d,%r12d + rorxl $25,%r11d,%r13d + vpxor %ymm5,%ymm4,%ymm4 + rorxl $11,%r11d,%edi + leal (%rdx,%r14,1),%edx + leal (%rcx,%r12,1),%ecx + vpslld $11,%ymm5,%ymm5 + andnl %ebx,%r11d,%r12d + xorl %edi,%r13d + rorxl $6,%r11d,%r14d + vpxor %ymm6,%ymm4,%ymm4 + leal (%rcx,%r12,1),%ecx + xorl %r14d,%r13d + movl %edx,%edi + vpsrld $10,%ymm7,%ymm6 + rorxl $22,%edx,%r12d + leal (%rcx,%r13,1),%ecx + xorl %r8d,%edi + vpxor %ymm5,%ymm4,%ymm4 + rorxl $13,%edx,%r14d + rorxl $2,%edx,%r13d + leal (%r10,%rcx,1),%r10d + vpsrlq $17,%ymm7,%ymm7 + andl %edi,%r15d + xorl %r12d,%r14d + xorl %r8d,%r15d + vpaddd %ymm4,%ymm3,%ymm3 + xorl %r13d,%r14d + leal (%rcx,%r15,1),%ecx + movl %r11d,%r12d + vpxor %ymm7,%ymm6,%ymm6 + addl 40+128(%rsp),%ebx + andl %r10d,%r12d + rorxl $25,%r10d,%r13d + vpsrlq $2,%ymm7,%ymm7 + rorxl $11,%r10d,%r15d + leal (%rcx,%r14,1),%ecx + leal (%rbx,%r12,1),%ebx + vpxor %ymm7,%ymm6,%ymm6 + andnl %eax,%r10d,%r12d + xorl %r15d,%r13d + rorxl $6,%r10d,%r14d + vpshufb %ymm8,%ymm6,%ymm6 + leal (%rbx,%r12,1),%ebx + xorl %r14d,%r13d + movl %ecx,%r15d + vpaddd %ymm6,%ymm3,%ymm3 + rorxl $22,%ecx,%r12d + leal (%rbx,%r13,1),%ebx + xorl %edx,%r15d + vpshufd $80,%ymm3,%ymm7 + rorxl $13,%ecx,%r14d + rorxl $2,%ecx,%r13d + leal (%r9,%rbx,1),%r9d + vpsrld $10,%ymm7,%ymm6 + andl %r15d,%edi + xorl %r12d,%r14d + xorl %edx,%edi + vpsrlq $17,%ymm7,%ymm7 + xorl %r13d,%r14d + leal (%rbx,%rdi,1),%ebx + movl %r10d,%r12d + vpxor %ymm7,%ymm6,%ymm6 + addl 44+128(%rsp),%eax + andl %r9d,%r12d + rorxl $25,%r9d,%r13d + vpsrlq $2,%ymm7,%ymm7 + rorxl $11,%r9d,%edi + leal (%rbx,%r14,1),%ebx + leal (%rax,%r12,1),%eax + vpxor %ymm7,%ymm6,%ymm6 + andnl %r11d,%r9d,%r12d + xorl %edi,%r13d + rorxl $6,%r9d,%r14d + vpshufb %ymm9,%ymm6,%ymm6 + leal (%rax,%r12,1),%eax + xorl %r14d,%r13d + movl %ebx,%edi + vpaddd %ymm6,%ymm3,%ymm3 + rorxl $22,%ebx,%r12d + leal (%rax,%r13,1),%eax + xorl %ecx,%edi + vpaddd 96(%rbp),%ymm3,%ymm6 + rorxl $13,%ebx,%r14d + rorxl $2,%ebx,%r13d + leal (%r8,%rax,1),%r8d + andl %edi,%r15d + xorl %r12d,%r14d + xorl %ecx,%r15d + xorl %r13d,%r14d + leal (%rax,%r15,1),%eax + movl %r9d,%r12d + vmovdqa %ymm6,32(%rsp) + leaq 128(%rbp),%rbp + cmpb $0,3(%rbp) + jne L$avx2_00_47 + addl 0+64(%rsp),%r11d + andl %r8d,%r12d + rorxl $25,%r8d,%r13d + rorxl $11,%r8d,%r15d + leal (%rax,%r14,1),%eax + leal (%r11,%r12,1),%r11d + andnl %r10d,%r8d,%r12d + xorl %r15d,%r13d + rorxl $6,%r8d,%r14d + leal (%r11,%r12,1),%r11d + xorl %r14d,%r13d + movl %eax,%r15d + rorxl $22,%eax,%r12d + leal (%r11,%r13,1),%r11d + xorl %ebx,%r15d + rorxl $13,%eax,%r14d + rorxl $2,%eax,%r13d + leal (%rdx,%r11,1),%edx + andl %r15d,%edi + xorl %r12d,%r14d + xorl %ebx,%edi + xorl %r13d,%r14d + leal (%r11,%rdi,1),%r11d + movl %r8d,%r12d + addl 4+64(%rsp),%r10d + andl %edx,%r12d + rorxl $25,%edx,%r13d + rorxl $11,%edx,%edi + leal (%r11,%r14,1),%r11d + leal (%r10,%r12,1),%r10d + andnl %r9d,%edx,%r12d + xorl %edi,%r13d + rorxl $6,%edx,%r14d + leal (%r10,%r12,1),%r10d + xorl %r14d,%r13d + movl %r11d,%edi + rorxl $22,%r11d,%r12d + leal (%r10,%r13,1),%r10d + xorl %eax,%edi + rorxl $13,%r11d,%r14d + rorxl $2,%r11d,%r13d + leal (%rcx,%r10,1),%ecx + andl %edi,%r15d + xorl %r12d,%r14d + xorl %eax,%r15d + xorl %r13d,%r14d + leal (%r10,%r15,1),%r10d + movl %edx,%r12d + addl 8+64(%rsp),%r9d + andl %ecx,%r12d + rorxl $25,%ecx,%r13d + rorxl $11,%ecx,%r15d + leal (%r10,%r14,1),%r10d + leal (%r9,%r12,1),%r9d + andnl %r8d,%ecx,%r12d + xorl %r15d,%r13d + rorxl $6,%ecx,%r14d + leal (%r9,%r12,1),%r9d + xorl %r14d,%r13d + movl %r10d,%r15d + rorxl $22,%r10d,%r12d + leal (%r9,%r13,1),%r9d + xorl %r11d,%r15d + rorxl $13,%r10d,%r14d + rorxl $2,%r10d,%r13d + leal (%rbx,%r9,1),%ebx + andl %r15d,%edi + xorl %r12d,%r14d + xorl %r11d,%edi + xorl %r13d,%r14d + leal (%r9,%rdi,1),%r9d + movl %ecx,%r12d + addl 12+64(%rsp),%r8d + andl %ebx,%r12d + rorxl $25,%ebx,%r13d + rorxl $11,%ebx,%edi + leal (%r9,%r14,1),%r9d + leal (%r8,%r12,1),%r8d + andnl %edx,%ebx,%r12d + xorl %edi,%r13d + rorxl $6,%ebx,%r14d + leal (%r8,%r12,1),%r8d + xorl %r14d,%r13d + movl %r9d,%edi + rorxl $22,%r9d,%r12d + leal (%r8,%r13,1),%r8d + xorl %r10d,%edi + rorxl $13,%r9d,%r14d + rorxl $2,%r9d,%r13d + leal (%rax,%r8,1),%eax + andl %edi,%r15d + xorl %r12d,%r14d + xorl %r10d,%r15d + xorl %r13d,%r14d + leal (%r8,%r15,1),%r8d + movl %ebx,%r12d + addl 32+64(%rsp),%edx + andl %eax,%r12d + rorxl $25,%eax,%r13d + rorxl $11,%eax,%r15d + leal (%r8,%r14,1),%r8d + leal (%rdx,%r12,1),%edx + andnl %ecx,%eax,%r12d + xorl %r15d,%r13d + rorxl $6,%eax,%r14d + leal (%rdx,%r12,1),%edx + xorl %r14d,%r13d + movl %r8d,%r15d + rorxl $22,%r8d,%r12d + leal (%rdx,%r13,1),%edx + xorl %r9d,%r15d + rorxl $13,%r8d,%r14d + rorxl $2,%r8d,%r13d + leal (%r11,%rdx,1),%r11d + andl %r15d,%edi + xorl %r12d,%r14d + xorl %r9d,%edi + xorl %r13d,%r14d + leal (%rdx,%rdi,1),%edx + movl %eax,%r12d + addl 36+64(%rsp),%ecx + andl %r11d,%r12d + rorxl $25,%r11d,%r13d + rorxl $11,%r11d,%edi + leal (%rdx,%r14,1),%edx + leal (%rcx,%r12,1),%ecx + andnl %ebx,%r11d,%r12d + xorl %edi,%r13d + rorxl $6,%r11d,%r14d + leal (%rcx,%r12,1),%ecx + xorl %r14d,%r13d + movl %edx,%edi + rorxl $22,%edx,%r12d + leal (%rcx,%r13,1),%ecx + xorl %r8d,%edi + rorxl $13,%edx,%r14d + rorxl $2,%edx,%r13d + leal (%r10,%rcx,1),%r10d + andl %edi,%r15d + xorl %r12d,%r14d + xorl %r8d,%r15d + xorl %r13d,%r14d + leal (%rcx,%r15,1),%ecx + movl %r11d,%r12d + addl 40+64(%rsp),%ebx + andl %r10d,%r12d + rorxl $25,%r10d,%r13d + rorxl $11,%r10d,%r15d + leal (%rcx,%r14,1),%ecx + leal (%rbx,%r12,1),%ebx + andnl %eax,%r10d,%r12d + xorl %r15d,%r13d + rorxl $6,%r10d,%r14d + leal (%rbx,%r12,1),%ebx + xorl %r14d,%r13d + movl %ecx,%r15d + rorxl $22,%ecx,%r12d + leal (%rbx,%r13,1),%ebx + xorl %edx,%r15d + rorxl $13,%ecx,%r14d + rorxl $2,%ecx,%r13d + leal (%r9,%rbx,1),%r9d + andl %r15d,%edi + xorl %r12d,%r14d + xorl %edx,%edi + xorl %r13d,%r14d + leal (%rbx,%rdi,1),%ebx + movl %r10d,%r12d + addl 44+64(%rsp),%eax + andl %r9d,%r12d + rorxl $25,%r9d,%r13d + rorxl $11,%r9d,%edi + leal (%rbx,%r14,1),%ebx + leal (%rax,%r12,1),%eax + andnl %r11d,%r9d,%r12d + xorl %edi,%r13d + rorxl $6,%r9d,%r14d + leal (%rax,%r12,1),%eax + xorl %r14d,%r13d + movl %ebx,%edi + rorxl $22,%ebx,%r12d + leal (%rax,%r13,1),%eax + xorl %ecx,%edi + rorxl $13,%ebx,%r14d + rorxl $2,%ebx,%r13d + leal (%r8,%rax,1),%r8d + andl %edi,%r15d + xorl %r12d,%r14d + xorl %ecx,%r15d + xorl %r13d,%r14d + leal (%rax,%r15,1),%eax + movl %r9d,%r12d + addl 0(%rsp),%r11d + andl %r8d,%r12d + rorxl $25,%r8d,%r13d + rorxl $11,%r8d,%r15d + leal (%rax,%r14,1),%eax + leal (%r11,%r12,1),%r11d + andnl %r10d,%r8d,%r12d + xorl %r15d,%r13d + rorxl $6,%r8d,%r14d + leal (%r11,%r12,1),%r11d + xorl %r14d,%r13d + movl %eax,%r15d + rorxl $22,%eax,%r12d + leal (%r11,%r13,1),%r11d + xorl %ebx,%r15d + rorxl $13,%eax,%r14d + rorxl $2,%eax,%r13d + leal (%rdx,%r11,1),%edx + andl %r15d,%edi + xorl %r12d,%r14d + xorl %ebx,%edi + xorl %r13d,%r14d + leal (%r11,%rdi,1),%r11d + movl %r8d,%r12d + addl 4(%rsp),%r10d + andl %edx,%r12d + rorxl $25,%edx,%r13d + rorxl $11,%edx,%edi + leal (%r11,%r14,1),%r11d + leal (%r10,%r12,1),%r10d + andnl %r9d,%edx,%r12d + xorl %edi,%r13d + rorxl $6,%edx,%r14d + leal (%r10,%r12,1),%r10d + xorl %r14d,%r13d + movl %r11d,%edi + rorxl $22,%r11d,%r12d + leal (%r10,%r13,1),%r10d + xorl %eax,%edi + rorxl $13,%r11d,%r14d + rorxl $2,%r11d,%r13d + leal (%rcx,%r10,1),%ecx + andl %edi,%r15d + xorl %r12d,%r14d + xorl %eax,%r15d + xorl %r13d,%r14d + leal (%r10,%r15,1),%r10d + movl %edx,%r12d + addl 8(%rsp),%r9d + andl %ecx,%r12d + rorxl $25,%ecx,%r13d + rorxl $11,%ecx,%r15d + leal (%r10,%r14,1),%r10d + leal (%r9,%r12,1),%r9d + andnl %r8d,%ecx,%r12d + xorl %r15d,%r13d + rorxl $6,%ecx,%r14d + leal (%r9,%r12,1),%r9d + xorl %r14d,%r13d + movl %r10d,%r15d + rorxl $22,%r10d,%r12d + leal (%r9,%r13,1),%r9d + xorl %r11d,%r15d + rorxl $13,%r10d,%r14d + rorxl $2,%r10d,%r13d + leal (%rbx,%r9,1),%ebx + andl %r15d,%edi + xorl %r12d,%r14d + xorl %r11d,%edi + xorl %r13d,%r14d + leal (%r9,%rdi,1),%r9d + movl %ecx,%r12d + addl 12(%rsp),%r8d + andl %ebx,%r12d + rorxl $25,%ebx,%r13d + rorxl $11,%ebx,%edi + leal (%r9,%r14,1),%r9d + leal (%r8,%r12,1),%r8d + andnl %edx,%ebx,%r12d + xorl %edi,%r13d + rorxl $6,%ebx,%r14d + leal (%r8,%r12,1),%r8d + xorl %r14d,%r13d + movl %r9d,%edi + rorxl $22,%r9d,%r12d + leal (%r8,%r13,1),%r8d + xorl %r10d,%edi + rorxl $13,%r9d,%r14d + rorxl $2,%r9d,%r13d + leal (%rax,%r8,1),%eax + andl %edi,%r15d + xorl %r12d,%r14d + xorl %r10d,%r15d + xorl %r13d,%r14d + leal (%r8,%r15,1),%r8d + movl %ebx,%r12d + addl 32(%rsp),%edx + andl %eax,%r12d + rorxl $25,%eax,%r13d + rorxl $11,%eax,%r15d + leal (%r8,%r14,1),%r8d + leal (%rdx,%r12,1),%edx + andnl %ecx,%eax,%r12d + xorl %r15d,%r13d + rorxl $6,%eax,%r14d + leal (%rdx,%r12,1),%edx + xorl %r14d,%r13d + movl %r8d,%r15d + rorxl $22,%r8d,%r12d + leal (%rdx,%r13,1),%edx + xorl %r9d,%r15d + rorxl $13,%r8d,%r14d + rorxl $2,%r8d,%r13d + leal (%r11,%rdx,1),%r11d + andl %r15d,%edi + xorl %r12d,%r14d + xorl %r9d,%edi + xorl %r13d,%r14d + leal (%rdx,%rdi,1),%edx + movl %eax,%r12d + addl 36(%rsp),%ecx + andl %r11d,%r12d + rorxl $25,%r11d,%r13d + rorxl $11,%r11d,%edi + leal (%rdx,%r14,1),%edx + leal (%rcx,%r12,1),%ecx + andnl %ebx,%r11d,%r12d + xorl %edi,%r13d + rorxl $6,%r11d,%r14d + leal (%rcx,%r12,1),%ecx + xorl %r14d,%r13d + movl %edx,%edi + rorxl $22,%edx,%r12d + leal (%rcx,%r13,1),%ecx + xorl %r8d,%edi + rorxl $13,%edx,%r14d + rorxl $2,%edx,%r13d + leal (%r10,%rcx,1),%r10d + andl %edi,%r15d + xorl %r12d,%r14d + xorl %r8d,%r15d + xorl %r13d,%r14d + leal (%rcx,%r15,1),%ecx + movl %r11d,%r12d + addl 40(%rsp),%ebx + andl %r10d,%r12d + rorxl $25,%r10d,%r13d + rorxl $11,%r10d,%r15d + leal (%rcx,%r14,1),%ecx + leal (%rbx,%r12,1),%ebx + andnl %eax,%r10d,%r12d + xorl %r15d,%r13d + rorxl $6,%r10d,%r14d + leal (%rbx,%r12,1),%ebx + xorl %r14d,%r13d + movl %ecx,%r15d + rorxl $22,%ecx,%r12d + leal (%rbx,%r13,1),%ebx + xorl %edx,%r15d + rorxl $13,%ecx,%r14d + rorxl $2,%ecx,%r13d + leal (%r9,%rbx,1),%r9d + andl %r15d,%edi + xorl %r12d,%r14d + xorl %edx,%edi + xorl %r13d,%r14d + leal (%rbx,%rdi,1),%ebx + movl %r10d,%r12d + addl 44(%rsp),%eax + andl %r9d,%r12d + rorxl $25,%r9d,%r13d + rorxl $11,%r9d,%edi + leal (%rbx,%r14,1),%ebx + leal (%rax,%r12,1),%eax + andnl %r11d,%r9d,%r12d + xorl %edi,%r13d + rorxl $6,%r9d,%r14d + leal (%rax,%r12,1),%eax + xorl %r14d,%r13d + movl %ebx,%edi + rorxl $22,%ebx,%r12d + leal (%rax,%r13,1),%eax + xorl %ecx,%edi + rorxl $13,%ebx,%r14d + rorxl $2,%ebx,%r13d + leal (%r8,%rax,1),%r8d + andl %edi,%r15d + xorl %r12d,%r14d + xorl %ecx,%r15d + xorl %r13d,%r14d + leal (%rax,%r15,1),%eax + movl %r9d,%r12d + movq 512(%rsp),%rdi + addl %r14d,%eax + + leaq 448(%rsp),%rbp + + addl 0(%rdi),%eax + addl 4(%rdi),%ebx + addl 8(%rdi),%ecx + addl 12(%rdi),%edx + addl 16(%rdi),%r8d + addl 20(%rdi),%r9d + addl 24(%rdi),%r10d + addl 28(%rdi),%r11d + + movl %eax,0(%rdi) + movl %ebx,4(%rdi) + movl %ecx,8(%rdi) + movl %edx,12(%rdi) + movl %r8d,16(%rdi) + movl %r9d,20(%rdi) + movl %r10d,24(%rdi) + movl %r11d,28(%rdi) + + cmpq 80(%rbp),%rsi + je L$done_avx2 + + xorl %r14d,%r14d + movl %ebx,%edi + xorl %ecx,%edi + movl %r9d,%r12d + jmp L$ower_avx2 +.p2align 4 +L$ower_avx2: + addl 0+16(%rbp),%r11d + andl %r8d,%r12d + rorxl $25,%r8d,%r13d + rorxl $11,%r8d,%r15d + leal (%rax,%r14,1),%eax + leal (%r11,%r12,1),%r11d + andnl %r10d,%r8d,%r12d + xorl %r15d,%r13d + rorxl $6,%r8d,%r14d + leal (%r11,%r12,1),%r11d + xorl %r14d,%r13d + movl %eax,%r15d + rorxl $22,%eax,%r12d + leal (%r11,%r13,1),%r11d + xorl %ebx,%r15d + rorxl $13,%eax,%r14d + rorxl $2,%eax,%r13d + leal (%rdx,%r11,1),%edx + andl %r15d,%edi + xorl %r12d,%r14d + xorl %ebx,%edi + xorl %r13d,%r14d + leal (%r11,%rdi,1),%r11d + movl %r8d,%r12d + addl 4+16(%rbp),%r10d + andl %edx,%r12d + rorxl $25,%edx,%r13d + rorxl $11,%edx,%edi + leal (%r11,%r14,1),%r11d + leal (%r10,%r12,1),%r10d + andnl %r9d,%edx,%r12d + xorl %edi,%r13d + rorxl $6,%edx,%r14d + leal (%r10,%r12,1),%r10d + xorl %r14d,%r13d + movl %r11d,%edi + rorxl $22,%r11d,%r12d + leal (%r10,%r13,1),%r10d + xorl %eax,%edi + rorxl $13,%r11d,%r14d + rorxl $2,%r11d,%r13d + leal (%rcx,%r10,1),%ecx + andl %edi,%r15d + xorl %r12d,%r14d + xorl %eax,%r15d + xorl %r13d,%r14d + leal (%r10,%r15,1),%r10d + movl %edx,%r12d + addl 8+16(%rbp),%r9d + andl %ecx,%r12d + rorxl $25,%ecx,%r13d + rorxl $11,%ecx,%r15d + leal (%r10,%r14,1),%r10d + leal (%r9,%r12,1),%r9d + andnl %r8d,%ecx,%r12d + xorl %r15d,%r13d + rorxl $6,%ecx,%r14d + leal (%r9,%r12,1),%r9d + xorl %r14d,%r13d + movl %r10d,%r15d + rorxl $22,%r10d,%r12d + leal (%r9,%r13,1),%r9d + xorl %r11d,%r15d + rorxl $13,%r10d,%r14d + rorxl $2,%r10d,%r13d + leal (%rbx,%r9,1),%ebx + andl %r15d,%edi + xorl %r12d,%r14d + xorl %r11d,%edi + xorl %r13d,%r14d + leal (%r9,%rdi,1),%r9d + movl %ecx,%r12d + addl 12+16(%rbp),%r8d + andl %ebx,%r12d + rorxl $25,%ebx,%r13d + rorxl $11,%ebx,%edi + leal (%r9,%r14,1),%r9d + leal (%r8,%r12,1),%r8d + andnl %edx,%ebx,%r12d + xorl %edi,%r13d + rorxl $6,%ebx,%r14d + leal (%r8,%r12,1),%r8d + xorl %r14d,%r13d + movl %r9d,%edi + rorxl $22,%r9d,%r12d + leal (%r8,%r13,1),%r8d + xorl %r10d,%edi + rorxl $13,%r9d,%r14d + rorxl $2,%r9d,%r13d + leal (%rax,%r8,1),%eax + andl %edi,%r15d + xorl %r12d,%r14d + xorl %r10d,%r15d + xorl %r13d,%r14d + leal (%r8,%r15,1),%r8d + movl %ebx,%r12d + addl 32+16(%rbp),%edx + andl %eax,%r12d + rorxl $25,%eax,%r13d + rorxl $11,%eax,%r15d + leal (%r8,%r14,1),%r8d + leal (%rdx,%r12,1),%edx + andnl %ecx,%eax,%r12d + xorl %r15d,%r13d + rorxl $6,%eax,%r14d + leal (%rdx,%r12,1),%edx + xorl %r14d,%r13d + movl %r8d,%r15d + rorxl $22,%r8d,%r12d + leal (%rdx,%r13,1),%edx + xorl %r9d,%r15d + rorxl $13,%r8d,%r14d + rorxl $2,%r8d,%r13d + leal (%r11,%rdx,1),%r11d + andl %r15d,%edi + xorl %r12d,%r14d + xorl %r9d,%edi + xorl %r13d,%r14d + leal (%rdx,%rdi,1),%edx + movl %eax,%r12d + addl 36+16(%rbp),%ecx + andl %r11d,%r12d + rorxl $25,%r11d,%r13d + rorxl $11,%r11d,%edi + leal (%rdx,%r14,1),%edx + leal (%rcx,%r12,1),%ecx + andnl %ebx,%r11d,%r12d + xorl %edi,%r13d + rorxl $6,%r11d,%r14d + leal (%rcx,%r12,1),%ecx + xorl %r14d,%r13d + movl %edx,%edi + rorxl $22,%edx,%r12d + leal (%rcx,%r13,1),%ecx + xorl %r8d,%edi + rorxl $13,%edx,%r14d + rorxl $2,%edx,%r13d + leal (%r10,%rcx,1),%r10d + andl %edi,%r15d + xorl %r12d,%r14d + xorl %r8d,%r15d + xorl %r13d,%r14d + leal (%rcx,%r15,1),%ecx + movl %r11d,%r12d + addl 40+16(%rbp),%ebx + andl %r10d,%r12d + rorxl $25,%r10d,%r13d + rorxl $11,%r10d,%r15d + leal (%rcx,%r14,1),%ecx + leal (%rbx,%r12,1),%ebx + andnl %eax,%r10d,%r12d + xorl %r15d,%r13d + rorxl $6,%r10d,%r14d + leal (%rbx,%r12,1),%ebx + xorl %r14d,%r13d + movl %ecx,%r15d + rorxl $22,%ecx,%r12d + leal (%rbx,%r13,1),%ebx + xorl %edx,%r15d + rorxl $13,%ecx,%r14d + rorxl $2,%ecx,%r13d + leal (%r9,%rbx,1),%r9d + andl %r15d,%edi + xorl %r12d,%r14d + xorl %edx,%edi + xorl %r13d,%r14d + leal (%rbx,%rdi,1),%ebx + movl %r10d,%r12d + addl 44+16(%rbp),%eax + andl %r9d,%r12d + rorxl $25,%r9d,%r13d + rorxl $11,%r9d,%edi + leal (%rbx,%r14,1),%ebx + leal (%rax,%r12,1),%eax + andnl %r11d,%r9d,%r12d + xorl %edi,%r13d + rorxl $6,%r9d,%r14d + leal (%rax,%r12,1),%eax + xorl %r14d,%r13d + movl %ebx,%edi + rorxl $22,%ebx,%r12d + leal (%rax,%r13,1),%eax + xorl %ecx,%edi + rorxl $13,%ebx,%r14d + rorxl $2,%ebx,%r13d + leal (%r8,%rax,1),%r8d + andl %edi,%r15d + xorl %r12d,%r14d + xorl %ecx,%r15d + xorl %r13d,%r14d + leal (%rax,%r15,1),%eax + movl %r9d,%r12d + leaq -64(%rbp),%rbp + cmpq %rsp,%rbp + jae L$ower_avx2 + + movq 512(%rsp),%rdi + addl %r14d,%eax + + leaq 448(%rsp),%rsp + + addl 0(%rdi),%eax + addl 4(%rdi),%ebx + addl 8(%rdi),%ecx + addl 12(%rdi),%edx + addl 16(%rdi),%r8d + addl 20(%rdi),%r9d + leaq 128(%rsi),%rsi + addl 24(%rdi),%r10d + movq %rsi,%r12 + addl 28(%rdi),%r11d + cmpq 64+16(%rsp),%rsi + + movl %eax,0(%rdi) + cmoveq %rsp,%r12 + movl %ebx,4(%rdi) + movl %ecx,8(%rdi) + movl %edx,12(%rdi) + movl %r8d,16(%rdi) + movl %r9d,20(%rdi) + movl %r10d,24(%rdi) + movl %r11d,28(%rdi) + + jbe L$oop_avx2 + leaq (%rsp),%rbp + +L$done_avx2: + leaq (%rbp),%rsp + movq 64+24(%rsp),%rsi + vzeroupper + movq (%rsi),%r15 + movq 8(%rsi),%r14 + movq 16(%rsi),%r13 + movq 24(%rsi),%r12 + movq 32(%rsi),%rbp + movq 40(%rsi),%rbx + leaq 48(%rsi),%rsp +L$epilogue_avx2: + .byte 0xf3,0xc3 diff --git a/deps/openssl/asm/x64-macosx-gas/sha/sha512-x86_64.s b/deps/openssl/asm/x64-macosx-gas/sha/sha512-x86_64.s index b5882eaf3c0bb5..91821da1264e86 100644 --- a/deps/openssl/asm/x64-macosx-gas/sha/sha512-x86_64.s +++ b/deps/openssl/asm/x64-macosx-gas/sha/sha512-x86_64.s @@ -5,6 +5,20 @@ .p2align 4 _sha512_block_data_order: + leaq _OPENSSL_ia32cap_P(%rip),%r11 + movl 0(%r11),%r9d + movl 4(%r11),%r10d + movl 8(%r11),%r11d + testl $2048,%r10d + jnz L$xop_shortcut + andl $296,%r11d + cmpl $296,%r11d + je L$avx2_shortcut + andl $1073741824,%r9d + andl $268435968,%r10d + orl %r9d,%r10d + cmpl $1342177792,%r10d + je L$avx_shortcut pushq %rbx pushq %rbp pushq %r12 @@ -22,8 +36,6 @@ _sha512_block_data_order: movq %r11,128+24(%rsp) L$prologue: - leaq K512(%rip),%rbp - movq 0(%rdi),%rax movq 8(%rdi),%rbx movq 16(%rdi),%rcx @@ -36,1694 +48,1632 @@ L$prologue: .p2align 4 L$loop: - xorq %rdi,%rdi + movq %rbx,%rdi + leaq K512(%rip),%rbp + xorq %rcx,%rdi movq 0(%rsi),%r12 movq %r8,%r13 movq %rax,%r14 bswapq %r12 rorq $23,%r13 movq %r9,%r15 - movq %r12,0(%rsp) - rorq $5,%r14 xorq %r8,%r13 + rorq $5,%r14 xorq %r10,%r15 - rorq $4,%r13 - addq %r11,%r12 + movq %r12,0(%rsp) xorq %rax,%r14 - - addq (%rbp,%rdi,8),%r12 andq %r8,%r15 - movq %rbx,%r11 + + rorq $4,%r13 + addq %r11,%r12 + xorq %r10,%r15 rorq $6,%r14 xorq %r8,%r13 - xorq %r10,%r15 + addq %r15,%r12 - xorq %rcx,%r11 + movq %rax,%r15 + addq (%rbp),%r12 xorq %rax,%r14 - addq %r15,%r12 - movq %rbx,%r15 + xorq %rbx,%r15 rorq $14,%r13 - andq %rax,%r11 - andq %rcx,%r15 + movq %rbx,%r11 + andq %r15,%rdi rorq $28,%r14 addq %r13,%r12 - addq %r15,%r11 + xorq %rdi,%r11 addq %r12,%rdx addq %r12,%r11 - leaq 1(%rdi),%rdi - addq %r14,%r11 + leaq 8(%rbp),%rbp + addq %r14,%r11 movq 8(%rsi),%r12 movq %rdx,%r13 movq %r11,%r14 bswapq %r12 rorq $23,%r13 - movq %r8,%r15 - movq %r12,8(%rsp) + movq %r8,%rdi - rorq $5,%r14 xorq %rdx,%r13 - xorq %r9,%r15 + rorq $5,%r14 + xorq %r9,%rdi - rorq $4,%r13 - addq %r10,%r12 + movq %r12,8(%rsp) xorq %r11,%r14 + andq %rdx,%rdi - addq (%rbp,%rdi,8),%r12 - andq %rdx,%r15 - movq %rax,%r10 + rorq $4,%r13 + addq %r10,%r12 + xorq %r9,%rdi rorq $6,%r14 xorq %rdx,%r13 - xorq %r9,%r15 + addq %rdi,%r12 - xorq %rbx,%r10 + movq %r11,%rdi + addq (%rbp),%r12 xorq %r11,%r14 - addq %r15,%r12 - movq %rax,%r15 + xorq %rax,%rdi rorq $14,%r13 - andq %r11,%r10 - andq %rbx,%r15 + movq %rax,%r10 + andq %rdi,%r15 rorq $28,%r14 addq %r13,%r12 - addq %r15,%r10 + xorq %r15,%r10 addq %r12,%rcx addq %r12,%r10 - leaq 1(%rdi),%rdi - addq %r14,%r10 + leaq 24(%rbp),%rbp + addq %r14,%r10 movq 16(%rsi),%r12 movq %rcx,%r13 movq %r10,%r14 bswapq %r12 rorq $23,%r13 movq %rdx,%r15 - movq %r12,16(%rsp) - rorq $5,%r14 xorq %rcx,%r13 + rorq $5,%r14 xorq %r8,%r15 - rorq $4,%r13 - addq %r9,%r12 + movq %r12,16(%rsp) xorq %r10,%r14 - - addq (%rbp,%rdi,8),%r12 andq %rcx,%r15 - movq %r11,%r9 + + rorq $4,%r13 + addq %r9,%r12 + xorq %r8,%r15 rorq $6,%r14 xorq %rcx,%r13 - xorq %r8,%r15 + addq %r15,%r12 - xorq %rax,%r9 + movq %r10,%r15 + addq (%rbp),%r12 xorq %r10,%r14 - addq %r15,%r12 - movq %r11,%r15 + xorq %r11,%r15 rorq $14,%r13 - andq %r10,%r9 - andq %rax,%r15 + movq %r11,%r9 + andq %r15,%rdi rorq $28,%r14 addq %r13,%r12 - addq %r15,%r9 + xorq %rdi,%r9 addq %r12,%rbx addq %r12,%r9 - leaq 1(%rdi),%rdi - addq %r14,%r9 + leaq 8(%rbp),%rbp + addq %r14,%r9 movq 24(%rsi),%r12 movq %rbx,%r13 movq %r9,%r14 bswapq %r12 rorq $23,%r13 - movq %rcx,%r15 - movq %r12,24(%rsp) + movq %rcx,%rdi - rorq $5,%r14 xorq %rbx,%r13 - xorq %rdx,%r15 + rorq $5,%r14 + xorq %rdx,%rdi - rorq $4,%r13 - addq %r8,%r12 + movq %r12,24(%rsp) xorq %r9,%r14 + andq %rbx,%rdi - addq (%rbp,%rdi,8),%r12 - andq %rbx,%r15 - movq %r10,%r8 + rorq $4,%r13 + addq %r8,%r12 + xorq %rdx,%rdi rorq $6,%r14 xorq %rbx,%r13 - xorq %rdx,%r15 + addq %rdi,%r12 - xorq %r11,%r8 + movq %r9,%rdi + addq (%rbp),%r12 xorq %r9,%r14 - addq %r15,%r12 - movq %r10,%r15 + xorq %r10,%rdi rorq $14,%r13 - andq %r9,%r8 - andq %r11,%r15 + movq %r10,%r8 + andq %rdi,%r15 rorq $28,%r14 addq %r13,%r12 - addq %r15,%r8 + xorq %r15,%r8 addq %r12,%rax addq %r12,%r8 - leaq 1(%rdi),%rdi - addq %r14,%r8 + leaq 24(%rbp),%rbp + addq %r14,%r8 movq 32(%rsi),%r12 movq %rax,%r13 movq %r8,%r14 bswapq %r12 rorq $23,%r13 movq %rbx,%r15 - movq %r12,32(%rsp) - rorq $5,%r14 xorq %rax,%r13 + rorq $5,%r14 xorq %rcx,%r15 - rorq $4,%r13 - addq %rdx,%r12 + movq %r12,32(%rsp) xorq %r8,%r14 - - addq (%rbp,%rdi,8),%r12 andq %rax,%r15 - movq %r9,%rdx + + rorq $4,%r13 + addq %rdx,%r12 + xorq %rcx,%r15 rorq $6,%r14 xorq %rax,%r13 - xorq %rcx,%r15 + addq %r15,%r12 - xorq %r10,%rdx + movq %r8,%r15 + addq (%rbp),%r12 xorq %r8,%r14 - addq %r15,%r12 - movq %r9,%r15 + xorq %r9,%r15 rorq $14,%r13 - andq %r8,%rdx - andq %r10,%r15 + movq %r9,%rdx + andq %r15,%rdi rorq $28,%r14 addq %r13,%r12 - addq %r15,%rdx + xorq %rdi,%rdx addq %r12,%r11 addq %r12,%rdx - leaq 1(%rdi),%rdi - addq %r14,%rdx + leaq 8(%rbp),%rbp + addq %r14,%rdx movq 40(%rsi),%r12 movq %r11,%r13 movq %rdx,%r14 bswapq %r12 rorq $23,%r13 - movq %rax,%r15 - movq %r12,40(%rsp) + movq %rax,%rdi - rorq $5,%r14 xorq %r11,%r13 - xorq %rbx,%r15 + rorq $5,%r14 + xorq %rbx,%rdi - rorq $4,%r13 - addq %rcx,%r12 + movq %r12,40(%rsp) xorq %rdx,%r14 + andq %r11,%rdi - addq (%rbp,%rdi,8),%r12 - andq %r11,%r15 - movq %r8,%rcx + rorq $4,%r13 + addq %rcx,%r12 + xorq %rbx,%rdi rorq $6,%r14 xorq %r11,%r13 - xorq %rbx,%r15 + addq %rdi,%r12 - xorq %r9,%rcx + movq %rdx,%rdi + addq (%rbp),%r12 xorq %rdx,%r14 - addq %r15,%r12 - movq %r8,%r15 + xorq %r8,%rdi rorq $14,%r13 - andq %rdx,%rcx - andq %r9,%r15 + movq %r8,%rcx + andq %rdi,%r15 rorq $28,%r14 addq %r13,%r12 - addq %r15,%rcx + xorq %r15,%rcx addq %r12,%r10 addq %r12,%rcx - leaq 1(%rdi),%rdi - addq %r14,%rcx + leaq 24(%rbp),%rbp + addq %r14,%rcx movq 48(%rsi),%r12 movq %r10,%r13 movq %rcx,%r14 bswapq %r12 rorq $23,%r13 movq %r11,%r15 - movq %r12,48(%rsp) - rorq $5,%r14 xorq %r10,%r13 + rorq $5,%r14 xorq %rax,%r15 - rorq $4,%r13 - addq %rbx,%r12 + movq %r12,48(%rsp) xorq %rcx,%r14 - - addq (%rbp,%rdi,8),%r12 andq %r10,%r15 - movq %rdx,%rbx + + rorq $4,%r13 + addq %rbx,%r12 + xorq %rax,%r15 rorq $6,%r14 xorq %r10,%r13 - xorq %rax,%r15 + addq %r15,%r12 - xorq %r8,%rbx + movq %rcx,%r15 + addq (%rbp),%r12 xorq %rcx,%r14 - addq %r15,%r12 - movq %rdx,%r15 + xorq %rdx,%r15 rorq $14,%r13 - andq %rcx,%rbx - andq %r8,%r15 + movq %rdx,%rbx + andq %r15,%rdi rorq $28,%r14 addq %r13,%r12 - addq %r15,%rbx + xorq %rdi,%rbx addq %r12,%r9 addq %r12,%rbx - leaq 1(%rdi),%rdi - addq %r14,%rbx + leaq 8(%rbp),%rbp + addq %r14,%rbx movq 56(%rsi),%r12 movq %r9,%r13 movq %rbx,%r14 bswapq %r12 rorq $23,%r13 - movq %r10,%r15 - movq %r12,56(%rsp) + movq %r10,%rdi - rorq $5,%r14 xorq %r9,%r13 - xorq %r11,%r15 + rorq $5,%r14 + xorq %r11,%rdi - rorq $4,%r13 - addq %rax,%r12 + movq %r12,56(%rsp) xorq %rbx,%r14 + andq %r9,%rdi - addq (%rbp,%rdi,8),%r12 - andq %r9,%r15 - movq %rcx,%rax + rorq $4,%r13 + addq %rax,%r12 + xorq %r11,%rdi rorq $6,%r14 xorq %r9,%r13 - xorq %r11,%r15 + addq %rdi,%r12 - xorq %rdx,%rax + movq %rbx,%rdi + addq (%rbp),%r12 xorq %rbx,%r14 - addq %r15,%r12 - movq %rcx,%r15 + xorq %rcx,%rdi rorq $14,%r13 - andq %rbx,%rax - andq %rdx,%r15 + movq %rcx,%rax + andq %rdi,%r15 rorq $28,%r14 addq %r13,%r12 - addq %r15,%rax + xorq %r15,%rax addq %r12,%r8 addq %r12,%rax - leaq 1(%rdi),%rdi - addq %r14,%rax + leaq 24(%rbp),%rbp + addq %r14,%rax movq 64(%rsi),%r12 movq %r8,%r13 movq %rax,%r14 bswapq %r12 rorq $23,%r13 movq %r9,%r15 - movq %r12,64(%rsp) - rorq $5,%r14 xorq %r8,%r13 + rorq $5,%r14 xorq %r10,%r15 - rorq $4,%r13 - addq %r11,%r12 + movq %r12,64(%rsp) xorq %rax,%r14 - - addq (%rbp,%rdi,8),%r12 andq %r8,%r15 - movq %rbx,%r11 + + rorq $4,%r13 + addq %r11,%r12 + xorq %r10,%r15 rorq $6,%r14 xorq %r8,%r13 - xorq %r10,%r15 + addq %r15,%r12 - xorq %rcx,%r11 + movq %rax,%r15 + addq (%rbp),%r12 xorq %rax,%r14 - addq %r15,%r12 - movq %rbx,%r15 + xorq %rbx,%r15 rorq $14,%r13 - andq %rax,%r11 - andq %rcx,%r15 + movq %rbx,%r11 + andq %r15,%rdi rorq $28,%r14 addq %r13,%r12 - addq %r15,%r11 + xorq %rdi,%r11 addq %r12,%rdx addq %r12,%r11 - leaq 1(%rdi),%rdi - addq %r14,%r11 + leaq 8(%rbp),%rbp + addq %r14,%r11 movq 72(%rsi),%r12 movq %rdx,%r13 movq %r11,%r14 bswapq %r12 rorq $23,%r13 - movq %r8,%r15 - movq %r12,72(%rsp) + movq %r8,%rdi - rorq $5,%r14 xorq %rdx,%r13 - xorq %r9,%r15 + rorq $5,%r14 + xorq %r9,%rdi - rorq $4,%r13 - addq %r10,%r12 + movq %r12,72(%rsp) xorq %r11,%r14 + andq %rdx,%rdi - addq (%rbp,%rdi,8),%r12 - andq %rdx,%r15 - movq %rax,%r10 + rorq $4,%r13 + addq %r10,%r12 + xorq %r9,%rdi rorq $6,%r14 xorq %rdx,%r13 - xorq %r9,%r15 + addq %rdi,%r12 - xorq %rbx,%r10 + movq %r11,%rdi + addq (%rbp),%r12 xorq %r11,%r14 - addq %r15,%r12 - movq %rax,%r15 + xorq %rax,%rdi rorq $14,%r13 - andq %r11,%r10 - andq %rbx,%r15 + movq %rax,%r10 + andq %rdi,%r15 rorq $28,%r14 addq %r13,%r12 - addq %r15,%r10 + xorq %r15,%r10 addq %r12,%rcx addq %r12,%r10 - leaq 1(%rdi),%rdi - addq %r14,%r10 + leaq 24(%rbp),%rbp + addq %r14,%r10 movq 80(%rsi),%r12 movq %rcx,%r13 movq %r10,%r14 bswapq %r12 rorq $23,%r13 movq %rdx,%r15 - movq %r12,80(%rsp) - rorq $5,%r14 xorq %rcx,%r13 + rorq $5,%r14 xorq %r8,%r15 - rorq $4,%r13 - addq %r9,%r12 + movq %r12,80(%rsp) xorq %r10,%r14 - - addq (%rbp,%rdi,8),%r12 andq %rcx,%r15 - movq %r11,%r9 + + rorq $4,%r13 + addq %r9,%r12 + xorq %r8,%r15 rorq $6,%r14 xorq %rcx,%r13 - xorq %r8,%r15 + addq %r15,%r12 - xorq %rax,%r9 + movq %r10,%r15 + addq (%rbp),%r12 xorq %r10,%r14 - addq %r15,%r12 - movq %r11,%r15 + xorq %r11,%r15 rorq $14,%r13 - andq %r10,%r9 - andq %rax,%r15 + movq %r11,%r9 + andq %r15,%rdi rorq $28,%r14 addq %r13,%r12 - addq %r15,%r9 + xorq %rdi,%r9 addq %r12,%rbx addq %r12,%r9 - leaq 1(%rdi),%rdi - addq %r14,%r9 + leaq 8(%rbp),%rbp + addq %r14,%r9 movq 88(%rsi),%r12 movq %rbx,%r13 movq %r9,%r14 bswapq %r12 rorq $23,%r13 - movq %rcx,%r15 - movq %r12,88(%rsp) + movq %rcx,%rdi - rorq $5,%r14 xorq %rbx,%r13 - xorq %rdx,%r15 + rorq $5,%r14 + xorq %rdx,%rdi - rorq $4,%r13 - addq %r8,%r12 + movq %r12,88(%rsp) xorq %r9,%r14 + andq %rbx,%rdi - addq (%rbp,%rdi,8),%r12 - andq %rbx,%r15 - movq %r10,%r8 + rorq $4,%r13 + addq %r8,%r12 + xorq %rdx,%rdi rorq $6,%r14 xorq %rbx,%r13 - xorq %rdx,%r15 + addq %rdi,%r12 - xorq %r11,%r8 + movq %r9,%rdi + addq (%rbp),%r12 xorq %r9,%r14 - addq %r15,%r12 - movq %r10,%r15 + xorq %r10,%rdi rorq $14,%r13 - andq %r9,%r8 - andq %r11,%r15 + movq %r10,%r8 + andq %rdi,%r15 rorq $28,%r14 addq %r13,%r12 - addq %r15,%r8 + xorq %r15,%r8 addq %r12,%rax addq %r12,%r8 - leaq 1(%rdi),%rdi - addq %r14,%r8 + leaq 24(%rbp),%rbp + addq %r14,%r8 movq 96(%rsi),%r12 movq %rax,%r13 movq %r8,%r14 bswapq %r12 rorq $23,%r13 movq %rbx,%r15 - movq %r12,96(%rsp) - rorq $5,%r14 xorq %rax,%r13 + rorq $5,%r14 xorq %rcx,%r15 - rorq $4,%r13 - addq %rdx,%r12 + movq %r12,96(%rsp) xorq %r8,%r14 - - addq (%rbp,%rdi,8),%r12 andq %rax,%r15 - movq %r9,%rdx - rorq $6,%r14 - xorq %rax,%r13 + rorq $4,%r13 + addq %rdx,%r12 xorq %rcx,%r15 - xorq %r10,%rdx - xorq %r8,%r14 + rorq $6,%r14 + xorq %rax,%r13 addq %r15,%r12 - movq %r9,%r15 + movq %r8,%r15 + addq (%rbp),%r12 + xorq %r8,%r14 + + xorq %r9,%r15 rorq $14,%r13 - andq %r8,%rdx - andq %r10,%r15 + movq %r9,%rdx + andq %r15,%rdi rorq $28,%r14 addq %r13,%r12 - addq %r15,%rdx + xorq %rdi,%rdx addq %r12,%r11 addq %r12,%rdx - leaq 1(%rdi),%rdi - addq %r14,%rdx + leaq 8(%rbp),%rbp + addq %r14,%rdx movq 104(%rsi),%r12 movq %r11,%r13 movq %rdx,%r14 bswapq %r12 rorq $23,%r13 - movq %rax,%r15 - movq %r12,104(%rsp) + movq %rax,%rdi - rorq $5,%r14 xorq %r11,%r13 - xorq %rbx,%r15 + rorq $5,%r14 + xorq %rbx,%rdi - rorq $4,%r13 - addq %rcx,%r12 + movq %r12,104(%rsp) xorq %rdx,%r14 + andq %r11,%rdi - addq (%rbp,%rdi,8),%r12 - andq %r11,%r15 - movq %r8,%rcx + rorq $4,%r13 + addq %rcx,%r12 + xorq %rbx,%rdi rorq $6,%r14 xorq %r11,%r13 - xorq %rbx,%r15 + addq %rdi,%r12 - xorq %r9,%rcx + movq %rdx,%rdi + addq (%rbp),%r12 xorq %rdx,%r14 - addq %r15,%r12 - movq %r8,%r15 + xorq %r8,%rdi rorq $14,%r13 - andq %rdx,%rcx - andq %r9,%r15 + movq %r8,%rcx + andq %rdi,%r15 rorq $28,%r14 addq %r13,%r12 - addq %r15,%rcx + xorq %r15,%rcx addq %r12,%r10 addq %r12,%rcx - leaq 1(%rdi),%rdi - addq %r14,%rcx + leaq 24(%rbp),%rbp + addq %r14,%rcx movq 112(%rsi),%r12 movq %r10,%r13 movq %rcx,%r14 bswapq %r12 rorq $23,%r13 movq %r11,%r15 - movq %r12,112(%rsp) - rorq $5,%r14 xorq %r10,%r13 + rorq $5,%r14 xorq %rax,%r15 - rorq $4,%r13 - addq %rbx,%r12 + movq %r12,112(%rsp) xorq %rcx,%r14 - - addq (%rbp,%rdi,8),%r12 andq %r10,%r15 - movq %rdx,%rbx + + rorq $4,%r13 + addq %rbx,%r12 + xorq %rax,%r15 rorq $6,%r14 xorq %r10,%r13 - xorq %rax,%r15 + addq %r15,%r12 - xorq %r8,%rbx + movq %rcx,%r15 + addq (%rbp),%r12 xorq %rcx,%r14 - addq %r15,%r12 - movq %rdx,%r15 + xorq %rdx,%r15 rorq $14,%r13 - andq %rcx,%rbx - andq %r8,%r15 + movq %rdx,%rbx + andq %r15,%rdi rorq $28,%r14 addq %r13,%r12 - addq %r15,%rbx + xorq %rdi,%rbx addq %r12,%r9 addq %r12,%rbx - leaq 1(%rdi),%rdi - addq %r14,%rbx + leaq 8(%rbp),%rbp + addq %r14,%rbx movq 120(%rsi),%r12 movq %r9,%r13 movq %rbx,%r14 bswapq %r12 rorq $23,%r13 - movq %r10,%r15 - movq %r12,120(%rsp) + movq %r10,%rdi - rorq $5,%r14 xorq %r9,%r13 - xorq %r11,%r15 + rorq $5,%r14 + xorq %r11,%rdi - rorq $4,%r13 - addq %rax,%r12 + movq %r12,120(%rsp) xorq %rbx,%r14 + andq %r9,%rdi - addq (%rbp,%rdi,8),%r12 - andq %r9,%r15 - movq %rcx,%rax + rorq $4,%r13 + addq %rax,%r12 + xorq %r11,%rdi rorq $6,%r14 xorq %r9,%r13 - xorq %r11,%r15 + addq %rdi,%r12 - xorq %rdx,%rax + movq %rbx,%rdi + addq (%rbp),%r12 xorq %rbx,%r14 - addq %r15,%r12 - movq %rcx,%r15 + xorq %rcx,%rdi rorq $14,%r13 - andq %rbx,%rax - andq %rdx,%r15 + movq %rcx,%rax + andq %rdi,%r15 rorq $28,%r14 addq %r13,%r12 - addq %r15,%rax + xorq %r15,%rax addq %r12,%r8 addq %r12,%rax - leaq 1(%rdi),%rdi - addq %r14,%rax + leaq 24(%rbp),%rbp jmp L$rounds_16_xx .p2align 4 L$rounds_16_xx: movq 8(%rsp),%r13 - movq 112(%rsp),%r14 - movq %r13,%r12 - movq %r14,%r15 + movq 112(%rsp),%r15 - rorq $7,%r12 - xorq %r13,%r12 - shrq $7,%r13 + movq %r13,%r12 + rorq $7,%r13 + addq %r14,%rax + movq %r15,%r14 + rorq $42,%r15 - rorq $1,%r12 xorq %r12,%r13 - movq 72(%rsp),%r12 - - rorq $42,%r15 + shrq $7,%r12 + rorq $1,%r13 xorq %r14,%r15 shrq $6,%r14 rorq $19,%r15 - addq %r13,%r12 - xorq %r15,%r14 + xorq %r13,%r12 + xorq %r14,%r15 + addq 72(%rsp),%r12 addq 0(%rsp),%r12 movq %r8,%r13 - addq %r14,%r12 + addq %r15,%r12 movq %rax,%r14 rorq $23,%r13 movq %r9,%r15 - movq %r12,0(%rsp) - rorq $5,%r14 xorq %r8,%r13 + rorq $5,%r14 xorq %r10,%r15 - rorq $4,%r13 - addq %r11,%r12 + movq %r12,0(%rsp) xorq %rax,%r14 - - addq (%rbp,%rdi,8),%r12 andq %r8,%r15 - movq %rbx,%r11 + + rorq $4,%r13 + addq %r11,%r12 + xorq %r10,%r15 rorq $6,%r14 xorq %r8,%r13 - xorq %r10,%r15 + addq %r15,%r12 - xorq %rcx,%r11 + movq %rax,%r15 + addq (%rbp),%r12 xorq %rax,%r14 - addq %r15,%r12 - movq %rbx,%r15 + xorq %rbx,%r15 rorq $14,%r13 - andq %rax,%r11 - andq %rcx,%r15 + movq %rbx,%r11 + andq %r15,%rdi rorq $28,%r14 addq %r13,%r12 - addq %r15,%r11 + xorq %rdi,%r11 addq %r12,%rdx addq %r12,%r11 - leaq 1(%rdi),%rdi - addq %r14,%r11 + leaq 8(%rbp),%rbp movq 16(%rsp),%r13 - movq 120(%rsp),%r14 - movq %r13,%r12 - movq %r14,%r15 + movq 120(%rsp),%rdi - rorq $7,%r12 - xorq %r13,%r12 - shrq $7,%r13 + movq %r13,%r12 + rorq $7,%r13 + addq %r14,%r11 + movq %rdi,%r14 + rorq $42,%rdi - rorq $1,%r12 xorq %r12,%r13 - movq 80(%rsp),%r12 - - rorq $42,%r15 - xorq %r14,%r15 + shrq $7,%r12 + rorq $1,%r13 + xorq %r14,%rdi shrq $6,%r14 - rorq $19,%r15 - addq %r13,%r12 - xorq %r15,%r14 + rorq $19,%rdi + xorq %r13,%r12 + xorq %r14,%rdi + addq 80(%rsp),%r12 addq 8(%rsp),%r12 movq %rdx,%r13 - addq %r14,%r12 + addq %rdi,%r12 movq %r11,%r14 rorq $23,%r13 - movq %r8,%r15 - movq %r12,8(%rsp) + movq %r8,%rdi - rorq $5,%r14 xorq %rdx,%r13 - xorq %r9,%r15 + rorq $5,%r14 + xorq %r9,%rdi - rorq $4,%r13 - addq %r10,%r12 + movq %r12,8(%rsp) xorq %r11,%r14 + andq %rdx,%rdi - addq (%rbp,%rdi,8),%r12 - andq %rdx,%r15 - movq %rax,%r10 + rorq $4,%r13 + addq %r10,%r12 + xorq %r9,%rdi rorq $6,%r14 xorq %rdx,%r13 - xorq %r9,%r15 + addq %rdi,%r12 - xorq %rbx,%r10 + movq %r11,%rdi + addq (%rbp),%r12 xorq %r11,%r14 - addq %r15,%r12 - movq %rax,%r15 + xorq %rax,%rdi rorq $14,%r13 - andq %r11,%r10 - andq %rbx,%r15 + movq %rax,%r10 + andq %rdi,%r15 rorq $28,%r14 addq %r13,%r12 - addq %r15,%r10 + xorq %r15,%r10 addq %r12,%rcx addq %r12,%r10 - leaq 1(%rdi),%rdi - addq %r14,%r10 + leaq 24(%rbp),%rbp movq 24(%rsp),%r13 - movq 0(%rsp),%r14 - movq %r13,%r12 - movq %r14,%r15 + movq 0(%rsp),%r15 - rorq $7,%r12 - xorq %r13,%r12 - shrq $7,%r13 + movq %r13,%r12 + rorq $7,%r13 + addq %r14,%r10 + movq %r15,%r14 + rorq $42,%r15 - rorq $1,%r12 xorq %r12,%r13 - movq 88(%rsp),%r12 - - rorq $42,%r15 + shrq $7,%r12 + rorq $1,%r13 xorq %r14,%r15 shrq $6,%r14 rorq $19,%r15 - addq %r13,%r12 - xorq %r15,%r14 + xorq %r13,%r12 + xorq %r14,%r15 + addq 88(%rsp),%r12 addq 16(%rsp),%r12 movq %rcx,%r13 - addq %r14,%r12 + addq %r15,%r12 movq %r10,%r14 rorq $23,%r13 movq %rdx,%r15 - movq %r12,16(%rsp) - rorq $5,%r14 xorq %rcx,%r13 + rorq $5,%r14 xorq %r8,%r15 - rorq $4,%r13 - addq %r9,%r12 + movq %r12,16(%rsp) xorq %r10,%r14 - - addq (%rbp,%rdi,8),%r12 andq %rcx,%r15 - movq %r11,%r9 + + rorq $4,%r13 + addq %r9,%r12 + xorq %r8,%r15 rorq $6,%r14 xorq %rcx,%r13 - xorq %r8,%r15 + addq %r15,%r12 - xorq %rax,%r9 + movq %r10,%r15 + addq (%rbp),%r12 xorq %r10,%r14 - addq %r15,%r12 - movq %r11,%r15 + xorq %r11,%r15 rorq $14,%r13 - andq %r10,%r9 - andq %rax,%r15 + movq %r11,%r9 + andq %r15,%rdi rorq $28,%r14 addq %r13,%r12 - addq %r15,%r9 + xorq %rdi,%r9 addq %r12,%rbx addq %r12,%r9 - leaq 1(%rdi),%rdi - addq %r14,%r9 + leaq 8(%rbp),%rbp movq 32(%rsp),%r13 - movq 8(%rsp),%r14 - movq %r13,%r12 - movq %r14,%r15 + movq 8(%rsp),%rdi - rorq $7,%r12 - xorq %r13,%r12 - shrq $7,%r13 + movq %r13,%r12 + rorq $7,%r13 + addq %r14,%r9 + movq %rdi,%r14 + rorq $42,%rdi - rorq $1,%r12 xorq %r12,%r13 - movq 96(%rsp),%r12 - - rorq $42,%r15 - xorq %r14,%r15 + shrq $7,%r12 + rorq $1,%r13 + xorq %r14,%rdi shrq $6,%r14 - rorq $19,%r15 - addq %r13,%r12 - xorq %r15,%r14 + rorq $19,%rdi + xorq %r13,%r12 + xorq %r14,%rdi + addq 96(%rsp),%r12 addq 24(%rsp),%r12 movq %rbx,%r13 - addq %r14,%r12 + addq %rdi,%r12 movq %r9,%r14 rorq $23,%r13 - movq %rcx,%r15 - movq %r12,24(%rsp) + movq %rcx,%rdi - rorq $5,%r14 xorq %rbx,%r13 - xorq %rdx,%r15 + rorq $5,%r14 + xorq %rdx,%rdi - rorq $4,%r13 - addq %r8,%r12 + movq %r12,24(%rsp) xorq %r9,%r14 + andq %rbx,%rdi - addq (%rbp,%rdi,8),%r12 - andq %rbx,%r15 - movq %r10,%r8 + rorq $4,%r13 + addq %r8,%r12 + xorq %rdx,%rdi rorq $6,%r14 xorq %rbx,%r13 - xorq %rdx,%r15 + addq %rdi,%r12 - xorq %r11,%r8 + movq %r9,%rdi + addq (%rbp),%r12 xorq %r9,%r14 - addq %r15,%r12 - movq %r10,%r15 + xorq %r10,%rdi rorq $14,%r13 - andq %r9,%r8 - andq %r11,%r15 + movq %r10,%r8 + andq %rdi,%r15 rorq $28,%r14 addq %r13,%r12 - addq %r15,%r8 + xorq %r15,%r8 addq %r12,%rax addq %r12,%r8 - leaq 1(%rdi),%rdi - addq %r14,%r8 + leaq 24(%rbp),%rbp movq 40(%rsp),%r13 - movq 16(%rsp),%r14 - movq %r13,%r12 - movq %r14,%r15 + movq 16(%rsp),%r15 - rorq $7,%r12 - xorq %r13,%r12 - shrq $7,%r13 + movq %r13,%r12 + rorq $7,%r13 + addq %r14,%r8 + movq %r15,%r14 + rorq $42,%r15 - rorq $1,%r12 xorq %r12,%r13 - movq 104(%rsp),%r12 - - rorq $42,%r15 + shrq $7,%r12 + rorq $1,%r13 xorq %r14,%r15 shrq $6,%r14 rorq $19,%r15 - addq %r13,%r12 - xorq %r15,%r14 + xorq %r13,%r12 + xorq %r14,%r15 + addq 104(%rsp),%r12 addq 32(%rsp),%r12 movq %rax,%r13 - addq %r14,%r12 + addq %r15,%r12 movq %r8,%r14 rorq $23,%r13 movq %rbx,%r15 - movq %r12,32(%rsp) - rorq $5,%r14 xorq %rax,%r13 + rorq $5,%r14 xorq %rcx,%r15 - rorq $4,%r13 - addq %rdx,%r12 + movq %r12,32(%rsp) xorq %r8,%r14 - - addq (%rbp,%rdi,8),%r12 andq %rax,%r15 - movq %r9,%rdx + + rorq $4,%r13 + addq %rdx,%r12 + xorq %rcx,%r15 rorq $6,%r14 xorq %rax,%r13 - xorq %rcx,%r15 + addq %r15,%r12 - xorq %r10,%rdx + movq %r8,%r15 + addq (%rbp),%r12 xorq %r8,%r14 - addq %r15,%r12 - movq %r9,%r15 + xorq %r9,%r15 rorq $14,%r13 - andq %r8,%rdx - andq %r10,%r15 + movq %r9,%rdx + andq %r15,%rdi rorq $28,%r14 addq %r13,%r12 - addq %r15,%rdx + xorq %rdi,%rdx addq %r12,%r11 addq %r12,%rdx - leaq 1(%rdi),%rdi - addq %r14,%rdx + leaq 8(%rbp),%rbp movq 48(%rsp),%r13 - movq 24(%rsp),%r14 - movq %r13,%r12 - movq %r14,%r15 + movq 24(%rsp),%rdi - rorq $7,%r12 - xorq %r13,%r12 - shrq $7,%r13 + movq %r13,%r12 + rorq $7,%r13 + addq %r14,%rdx + movq %rdi,%r14 + rorq $42,%rdi - rorq $1,%r12 xorq %r12,%r13 - movq 112(%rsp),%r12 - - rorq $42,%r15 - xorq %r14,%r15 + shrq $7,%r12 + rorq $1,%r13 + xorq %r14,%rdi shrq $6,%r14 - rorq $19,%r15 - addq %r13,%r12 - xorq %r15,%r14 + rorq $19,%rdi + xorq %r13,%r12 + xorq %r14,%rdi + addq 112(%rsp),%r12 addq 40(%rsp),%r12 movq %r11,%r13 - addq %r14,%r12 + addq %rdi,%r12 movq %rdx,%r14 rorq $23,%r13 - movq %rax,%r15 - movq %r12,40(%rsp) + movq %rax,%rdi - rorq $5,%r14 xorq %r11,%r13 - xorq %rbx,%r15 + rorq $5,%r14 + xorq %rbx,%rdi - rorq $4,%r13 - addq %rcx,%r12 + movq %r12,40(%rsp) xorq %rdx,%r14 + andq %r11,%rdi - addq (%rbp,%rdi,8),%r12 - andq %r11,%r15 - movq %r8,%rcx + rorq $4,%r13 + addq %rcx,%r12 + xorq %rbx,%rdi rorq $6,%r14 xorq %r11,%r13 - xorq %rbx,%r15 + addq %rdi,%r12 - xorq %r9,%rcx + movq %rdx,%rdi + addq (%rbp),%r12 xorq %rdx,%r14 - addq %r15,%r12 - movq %r8,%r15 + xorq %r8,%rdi rorq $14,%r13 - andq %rdx,%rcx - andq %r9,%r15 + movq %r8,%rcx + andq %rdi,%r15 rorq $28,%r14 addq %r13,%r12 - addq %r15,%rcx + xorq %r15,%rcx addq %r12,%r10 addq %r12,%rcx - leaq 1(%rdi),%rdi - addq %r14,%rcx + leaq 24(%rbp),%rbp movq 56(%rsp),%r13 - movq 32(%rsp),%r14 - movq %r13,%r12 - movq %r14,%r15 + movq 32(%rsp),%r15 - rorq $7,%r12 - xorq %r13,%r12 - shrq $7,%r13 + movq %r13,%r12 + rorq $7,%r13 + addq %r14,%rcx + movq %r15,%r14 + rorq $42,%r15 - rorq $1,%r12 xorq %r12,%r13 - movq 120(%rsp),%r12 - - rorq $42,%r15 + shrq $7,%r12 + rorq $1,%r13 xorq %r14,%r15 shrq $6,%r14 rorq $19,%r15 - addq %r13,%r12 - xorq %r15,%r14 + xorq %r13,%r12 + xorq %r14,%r15 + addq 120(%rsp),%r12 addq 48(%rsp),%r12 movq %r10,%r13 - addq %r14,%r12 + addq %r15,%r12 movq %rcx,%r14 rorq $23,%r13 movq %r11,%r15 - movq %r12,48(%rsp) - rorq $5,%r14 xorq %r10,%r13 + rorq $5,%r14 xorq %rax,%r15 - rorq $4,%r13 - addq %rbx,%r12 + movq %r12,48(%rsp) xorq %rcx,%r14 - - addq (%rbp,%rdi,8),%r12 andq %r10,%r15 - movq %rdx,%rbx + + rorq $4,%r13 + addq %rbx,%r12 + xorq %rax,%r15 rorq $6,%r14 xorq %r10,%r13 - xorq %rax,%r15 + addq %r15,%r12 - xorq %r8,%rbx + movq %rcx,%r15 + addq (%rbp),%r12 xorq %rcx,%r14 - addq %r15,%r12 - movq %rdx,%r15 + xorq %rdx,%r15 rorq $14,%r13 - andq %rcx,%rbx - andq %r8,%r15 + movq %rdx,%rbx + andq %r15,%rdi rorq $28,%r14 addq %r13,%r12 - addq %r15,%rbx + xorq %rdi,%rbx addq %r12,%r9 addq %r12,%rbx - leaq 1(%rdi),%rdi - addq %r14,%rbx + leaq 8(%rbp),%rbp movq 64(%rsp),%r13 - movq 40(%rsp),%r14 - movq %r13,%r12 - movq %r14,%r15 + movq 40(%rsp),%rdi - rorq $7,%r12 - xorq %r13,%r12 - shrq $7,%r13 + movq %r13,%r12 + rorq $7,%r13 + addq %r14,%rbx + movq %rdi,%r14 + rorq $42,%rdi - rorq $1,%r12 xorq %r12,%r13 - movq 0(%rsp),%r12 - - rorq $42,%r15 - xorq %r14,%r15 + shrq $7,%r12 + rorq $1,%r13 + xorq %r14,%rdi shrq $6,%r14 - rorq $19,%r15 - addq %r13,%r12 - xorq %r15,%r14 + rorq $19,%rdi + xorq %r13,%r12 + xorq %r14,%rdi + addq 0(%rsp),%r12 addq 56(%rsp),%r12 movq %r9,%r13 - addq %r14,%r12 + addq %rdi,%r12 movq %rbx,%r14 rorq $23,%r13 - movq %r10,%r15 - movq %r12,56(%rsp) + movq %r10,%rdi - rorq $5,%r14 xorq %r9,%r13 - xorq %r11,%r15 + rorq $5,%r14 + xorq %r11,%rdi - rorq $4,%r13 - addq %rax,%r12 + movq %r12,56(%rsp) xorq %rbx,%r14 + andq %r9,%rdi - addq (%rbp,%rdi,8),%r12 - andq %r9,%r15 - movq %rcx,%rax + rorq $4,%r13 + addq %rax,%r12 + xorq %r11,%rdi rorq $6,%r14 xorq %r9,%r13 - xorq %r11,%r15 + addq %rdi,%r12 - xorq %rdx,%rax + movq %rbx,%rdi + addq (%rbp),%r12 xorq %rbx,%r14 - addq %r15,%r12 - movq %rcx,%r15 + xorq %rcx,%rdi rorq $14,%r13 - andq %rbx,%rax - andq %rdx,%r15 + movq %rcx,%rax + andq %rdi,%r15 rorq $28,%r14 addq %r13,%r12 - addq %r15,%rax + xorq %r15,%rax addq %r12,%r8 addq %r12,%rax - leaq 1(%rdi),%rdi - addq %r14,%rax + leaq 24(%rbp),%rbp movq 72(%rsp),%r13 - movq 48(%rsp),%r14 - movq %r13,%r12 - movq %r14,%r15 + movq 48(%rsp),%r15 - rorq $7,%r12 - xorq %r13,%r12 - shrq $7,%r13 + movq %r13,%r12 + rorq $7,%r13 + addq %r14,%rax + movq %r15,%r14 + rorq $42,%r15 - rorq $1,%r12 xorq %r12,%r13 - movq 8(%rsp),%r12 - - rorq $42,%r15 + shrq $7,%r12 + rorq $1,%r13 xorq %r14,%r15 shrq $6,%r14 rorq $19,%r15 - addq %r13,%r12 - xorq %r15,%r14 + xorq %r13,%r12 + xorq %r14,%r15 + addq 8(%rsp),%r12 addq 64(%rsp),%r12 movq %r8,%r13 - addq %r14,%r12 + addq %r15,%r12 movq %rax,%r14 rorq $23,%r13 movq %r9,%r15 - movq %r12,64(%rsp) - rorq $5,%r14 xorq %r8,%r13 + rorq $5,%r14 xorq %r10,%r15 - rorq $4,%r13 - addq %r11,%r12 + movq %r12,64(%rsp) xorq %rax,%r14 - - addq (%rbp,%rdi,8),%r12 andq %r8,%r15 - movq %rbx,%r11 + + rorq $4,%r13 + addq %r11,%r12 + xorq %r10,%r15 rorq $6,%r14 xorq %r8,%r13 - xorq %r10,%r15 + addq %r15,%r12 - xorq %rcx,%r11 + movq %rax,%r15 + addq (%rbp),%r12 xorq %rax,%r14 - addq %r15,%r12 - movq %rbx,%r15 + xorq %rbx,%r15 rorq $14,%r13 - andq %rax,%r11 - andq %rcx,%r15 + movq %rbx,%r11 + andq %r15,%rdi rorq $28,%r14 addq %r13,%r12 - addq %r15,%r11 + xorq %rdi,%r11 addq %r12,%rdx addq %r12,%r11 - leaq 1(%rdi),%rdi - addq %r14,%r11 + leaq 8(%rbp),%rbp movq 80(%rsp),%r13 - movq 56(%rsp),%r14 - movq %r13,%r12 - movq %r14,%r15 + movq 56(%rsp),%rdi - rorq $7,%r12 - xorq %r13,%r12 - shrq $7,%r13 + movq %r13,%r12 + rorq $7,%r13 + addq %r14,%r11 + movq %rdi,%r14 + rorq $42,%rdi - rorq $1,%r12 xorq %r12,%r13 - movq 16(%rsp),%r12 - - rorq $42,%r15 - xorq %r14,%r15 + shrq $7,%r12 + rorq $1,%r13 + xorq %r14,%rdi shrq $6,%r14 - rorq $19,%r15 - addq %r13,%r12 - xorq %r15,%r14 + rorq $19,%rdi + xorq %r13,%r12 + xorq %r14,%rdi + addq 16(%rsp),%r12 addq 72(%rsp),%r12 movq %rdx,%r13 - addq %r14,%r12 + addq %rdi,%r12 movq %r11,%r14 rorq $23,%r13 - movq %r8,%r15 - movq %r12,72(%rsp) + movq %r8,%rdi - rorq $5,%r14 xorq %rdx,%r13 - xorq %r9,%r15 + rorq $5,%r14 + xorq %r9,%rdi - rorq $4,%r13 - addq %r10,%r12 + movq %r12,72(%rsp) xorq %r11,%r14 + andq %rdx,%rdi - addq (%rbp,%rdi,8),%r12 - andq %rdx,%r15 - movq %rax,%r10 + rorq $4,%r13 + addq %r10,%r12 + xorq %r9,%rdi rorq $6,%r14 xorq %rdx,%r13 - xorq %r9,%r15 + addq %rdi,%r12 - xorq %rbx,%r10 + movq %r11,%rdi + addq (%rbp),%r12 xorq %r11,%r14 - addq %r15,%r12 - movq %rax,%r15 + xorq %rax,%rdi rorq $14,%r13 - andq %r11,%r10 - andq %rbx,%r15 + movq %rax,%r10 + andq %rdi,%r15 rorq $28,%r14 addq %r13,%r12 - addq %r15,%r10 + xorq %r15,%r10 addq %r12,%rcx addq %r12,%r10 - leaq 1(%rdi),%rdi - addq %r14,%r10 + leaq 24(%rbp),%rbp movq 88(%rsp),%r13 - movq 64(%rsp),%r14 - movq %r13,%r12 - movq %r14,%r15 + movq 64(%rsp),%r15 - rorq $7,%r12 - xorq %r13,%r12 - shrq $7,%r13 + movq %r13,%r12 + rorq $7,%r13 + addq %r14,%r10 + movq %r15,%r14 + rorq $42,%r15 - rorq $1,%r12 xorq %r12,%r13 - movq 24(%rsp),%r12 - - rorq $42,%r15 + shrq $7,%r12 + rorq $1,%r13 xorq %r14,%r15 shrq $6,%r14 rorq $19,%r15 - addq %r13,%r12 - xorq %r15,%r14 + xorq %r13,%r12 + xorq %r14,%r15 + addq 24(%rsp),%r12 addq 80(%rsp),%r12 movq %rcx,%r13 - addq %r14,%r12 + addq %r15,%r12 movq %r10,%r14 rorq $23,%r13 movq %rdx,%r15 - movq %r12,80(%rsp) - rorq $5,%r14 xorq %rcx,%r13 + rorq $5,%r14 xorq %r8,%r15 - rorq $4,%r13 - addq %r9,%r12 + movq %r12,80(%rsp) xorq %r10,%r14 - - addq (%rbp,%rdi,8),%r12 andq %rcx,%r15 - movq %r11,%r9 + + rorq $4,%r13 + addq %r9,%r12 + xorq %r8,%r15 rorq $6,%r14 xorq %rcx,%r13 - xorq %r8,%r15 + addq %r15,%r12 - xorq %rax,%r9 + movq %r10,%r15 + addq (%rbp),%r12 xorq %r10,%r14 - addq %r15,%r12 - movq %r11,%r15 + xorq %r11,%r15 rorq $14,%r13 - andq %r10,%r9 - andq %rax,%r15 + movq %r11,%r9 + andq %r15,%rdi rorq $28,%r14 addq %r13,%r12 - addq %r15,%r9 + xorq %rdi,%r9 addq %r12,%rbx addq %r12,%r9 - leaq 1(%rdi),%rdi - addq %r14,%r9 + leaq 8(%rbp),%rbp movq 96(%rsp),%r13 - movq 72(%rsp),%r14 - movq %r13,%r12 - movq %r14,%r15 + movq 72(%rsp),%rdi - rorq $7,%r12 - xorq %r13,%r12 - shrq $7,%r13 + movq %r13,%r12 + rorq $7,%r13 + addq %r14,%r9 + movq %rdi,%r14 + rorq $42,%rdi - rorq $1,%r12 xorq %r12,%r13 - movq 32(%rsp),%r12 - - rorq $42,%r15 - xorq %r14,%r15 + shrq $7,%r12 + rorq $1,%r13 + xorq %r14,%rdi shrq $6,%r14 - rorq $19,%r15 - addq %r13,%r12 - xorq %r15,%r14 + rorq $19,%rdi + xorq %r13,%r12 + xorq %r14,%rdi + addq 32(%rsp),%r12 addq 88(%rsp),%r12 movq %rbx,%r13 - addq %r14,%r12 + addq %rdi,%r12 movq %r9,%r14 rorq $23,%r13 - movq %rcx,%r15 - movq %r12,88(%rsp) + movq %rcx,%rdi - rorq $5,%r14 xorq %rbx,%r13 - xorq %rdx,%r15 + rorq $5,%r14 + xorq %rdx,%rdi - rorq $4,%r13 - addq %r8,%r12 + movq %r12,88(%rsp) xorq %r9,%r14 + andq %rbx,%rdi - addq (%rbp,%rdi,8),%r12 - andq %rbx,%r15 - movq %r10,%r8 + rorq $4,%r13 + addq %r8,%r12 + xorq %rdx,%rdi rorq $6,%r14 xorq %rbx,%r13 - xorq %rdx,%r15 + addq %rdi,%r12 - xorq %r11,%r8 + movq %r9,%rdi + addq (%rbp),%r12 xorq %r9,%r14 - addq %r15,%r12 - movq %r10,%r15 + xorq %r10,%rdi rorq $14,%r13 - andq %r9,%r8 - andq %r11,%r15 + movq %r10,%r8 + andq %rdi,%r15 rorq $28,%r14 addq %r13,%r12 - addq %r15,%r8 + xorq %r15,%r8 addq %r12,%rax addq %r12,%r8 - leaq 1(%rdi),%rdi - addq %r14,%r8 + leaq 24(%rbp),%rbp movq 104(%rsp),%r13 - movq 80(%rsp),%r14 - movq %r13,%r12 - movq %r14,%r15 + movq 80(%rsp),%r15 - rorq $7,%r12 - xorq %r13,%r12 - shrq $7,%r13 + movq %r13,%r12 + rorq $7,%r13 + addq %r14,%r8 + movq %r15,%r14 + rorq $42,%r15 - rorq $1,%r12 xorq %r12,%r13 - movq 40(%rsp),%r12 - - rorq $42,%r15 + shrq $7,%r12 + rorq $1,%r13 xorq %r14,%r15 shrq $6,%r14 rorq $19,%r15 - addq %r13,%r12 - xorq %r15,%r14 + xorq %r13,%r12 + xorq %r14,%r15 + addq 40(%rsp),%r12 addq 96(%rsp),%r12 movq %rax,%r13 - addq %r14,%r12 + addq %r15,%r12 movq %r8,%r14 rorq $23,%r13 movq %rbx,%r15 - movq %r12,96(%rsp) - rorq $5,%r14 xorq %rax,%r13 + rorq $5,%r14 xorq %rcx,%r15 - rorq $4,%r13 - addq %rdx,%r12 + movq %r12,96(%rsp) xorq %r8,%r14 - - addq (%rbp,%rdi,8),%r12 andq %rax,%r15 - movq %r9,%rdx + + rorq $4,%r13 + addq %rdx,%r12 + xorq %rcx,%r15 rorq $6,%r14 xorq %rax,%r13 - xorq %rcx,%r15 + addq %r15,%r12 - xorq %r10,%rdx + movq %r8,%r15 + addq (%rbp),%r12 xorq %r8,%r14 - addq %r15,%r12 - movq %r9,%r15 + xorq %r9,%r15 rorq $14,%r13 - andq %r8,%rdx - andq %r10,%r15 + movq %r9,%rdx + andq %r15,%rdi rorq $28,%r14 addq %r13,%r12 - addq %r15,%rdx + xorq %rdi,%rdx addq %r12,%r11 addq %r12,%rdx - leaq 1(%rdi),%rdi - addq %r14,%rdx + leaq 8(%rbp),%rbp movq 112(%rsp),%r13 - movq 88(%rsp),%r14 - movq %r13,%r12 - movq %r14,%r15 + movq 88(%rsp),%rdi - rorq $7,%r12 - xorq %r13,%r12 - shrq $7,%r13 + movq %r13,%r12 + rorq $7,%r13 + addq %r14,%rdx + movq %rdi,%r14 + rorq $42,%rdi - rorq $1,%r12 xorq %r12,%r13 - movq 48(%rsp),%r12 - - rorq $42,%r15 - xorq %r14,%r15 + shrq $7,%r12 + rorq $1,%r13 + xorq %r14,%rdi shrq $6,%r14 - rorq $19,%r15 - addq %r13,%r12 - xorq %r15,%r14 + rorq $19,%rdi + xorq %r13,%r12 + xorq %r14,%rdi + addq 48(%rsp),%r12 addq 104(%rsp),%r12 movq %r11,%r13 - addq %r14,%r12 + addq %rdi,%r12 movq %rdx,%r14 rorq $23,%r13 - movq %rax,%r15 - movq %r12,104(%rsp) + movq %rax,%rdi - rorq $5,%r14 xorq %r11,%r13 - xorq %rbx,%r15 + rorq $5,%r14 + xorq %rbx,%rdi - rorq $4,%r13 - addq %rcx,%r12 + movq %r12,104(%rsp) xorq %rdx,%r14 + andq %r11,%rdi - addq (%rbp,%rdi,8),%r12 - andq %r11,%r15 - movq %r8,%rcx + rorq $4,%r13 + addq %rcx,%r12 + xorq %rbx,%rdi rorq $6,%r14 xorq %r11,%r13 - xorq %rbx,%r15 + addq %rdi,%r12 - xorq %r9,%rcx + movq %rdx,%rdi + addq (%rbp),%r12 xorq %rdx,%r14 - addq %r15,%r12 - movq %r8,%r15 + xorq %r8,%rdi rorq $14,%r13 - andq %rdx,%rcx - andq %r9,%r15 + movq %r8,%rcx + andq %rdi,%r15 rorq $28,%r14 addq %r13,%r12 - addq %r15,%rcx + xorq %r15,%rcx addq %r12,%r10 addq %r12,%rcx - leaq 1(%rdi),%rdi - addq %r14,%rcx + leaq 24(%rbp),%rbp movq 120(%rsp),%r13 - movq 96(%rsp),%r14 - movq %r13,%r12 - movq %r14,%r15 + movq 96(%rsp),%r15 - rorq $7,%r12 - xorq %r13,%r12 - shrq $7,%r13 + movq %r13,%r12 + rorq $7,%r13 + addq %r14,%rcx + movq %r15,%r14 + rorq $42,%r15 - rorq $1,%r12 xorq %r12,%r13 - movq 56(%rsp),%r12 - - rorq $42,%r15 + shrq $7,%r12 + rorq $1,%r13 xorq %r14,%r15 shrq $6,%r14 rorq $19,%r15 - addq %r13,%r12 - xorq %r15,%r14 + xorq %r13,%r12 + xorq %r14,%r15 + addq 56(%rsp),%r12 addq 112(%rsp),%r12 movq %r10,%r13 - addq %r14,%r12 + addq %r15,%r12 movq %rcx,%r14 rorq $23,%r13 movq %r11,%r15 - movq %r12,112(%rsp) - rorq $5,%r14 xorq %r10,%r13 + rorq $5,%r14 xorq %rax,%r15 - rorq $4,%r13 - addq %rbx,%r12 + movq %r12,112(%rsp) xorq %rcx,%r14 - - addq (%rbp,%rdi,8),%r12 andq %r10,%r15 - movq %rdx,%rbx + + rorq $4,%r13 + addq %rbx,%r12 + xorq %rax,%r15 rorq $6,%r14 xorq %r10,%r13 - xorq %rax,%r15 + addq %r15,%r12 - xorq %r8,%rbx + movq %rcx,%r15 + addq (%rbp),%r12 xorq %rcx,%r14 - addq %r15,%r12 - movq %rdx,%r15 + xorq %rdx,%r15 rorq $14,%r13 - andq %rcx,%rbx - andq %r8,%r15 + movq %rdx,%rbx + andq %r15,%rdi rorq $28,%r14 addq %r13,%r12 - addq %r15,%rbx + xorq %rdi,%rbx addq %r12,%r9 addq %r12,%rbx - leaq 1(%rdi),%rdi - addq %r14,%rbx + leaq 8(%rbp),%rbp movq 0(%rsp),%r13 - movq 104(%rsp),%r14 - movq %r13,%r12 - movq %r14,%r15 + movq 104(%rsp),%rdi - rorq $7,%r12 - xorq %r13,%r12 - shrq $7,%r13 + movq %r13,%r12 + rorq $7,%r13 + addq %r14,%rbx + movq %rdi,%r14 + rorq $42,%rdi - rorq $1,%r12 xorq %r12,%r13 - movq 64(%rsp),%r12 - - rorq $42,%r15 - xorq %r14,%r15 + shrq $7,%r12 + rorq $1,%r13 + xorq %r14,%rdi shrq $6,%r14 - rorq $19,%r15 - addq %r13,%r12 - xorq %r15,%r14 + rorq $19,%rdi + xorq %r13,%r12 + xorq %r14,%rdi + addq 64(%rsp),%r12 addq 120(%rsp),%r12 movq %r9,%r13 - addq %r14,%r12 + addq %rdi,%r12 movq %rbx,%r14 rorq $23,%r13 - movq %r10,%r15 - movq %r12,120(%rsp) + movq %r10,%rdi - rorq $5,%r14 xorq %r9,%r13 - xorq %r11,%r15 + rorq $5,%r14 + xorq %r11,%rdi - rorq $4,%r13 - addq %rax,%r12 + movq %r12,120(%rsp) xorq %rbx,%r14 + andq %r9,%rdi - addq (%rbp,%rdi,8),%r12 - andq %r9,%r15 - movq %rcx,%rax + rorq $4,%r13 + addq %rax,%r12 + xorq %r11,%rdi rorq $6,%r14 xorq %r9,%r13 - xorq %r11,%r15 + addq %rdi,%r12 - xorq %rdx,%rax + movq %rbx,%rdi + addq (%rbp),%r12 xorq %rbx,%r14 - addq %r15,%r12 - movq %rcx,%r15 + xorq %rcx,%rdi rorq $14,%r13 - andq %rbx,%rax - andq %rdx,%r15 + movq %rcx,%rax + andq %rdi,%r15 rorq $28,%r14 addq %r13,%r12 - addq %r15,%rax + xorq %r15,%rax addq %r12,%r8 addq %r12,%rax - leaq 1(%rdi),%rdi - addq %r14,%rax - cmpq $80,%rdi - jb L$rounds_16_xx + leaq 24(%rbp),%rbp + cmpb $0,7(%rbp) + jnz L$rounds_16_xx movq 128+0(%rsp),%rdi + addq %r14,%rax leaq 128(%rsi),%rsi addq 0(%rdi),%rax @@ -1762,42 +1712,3653 @@ L$epilogue: K512: .quad 0x428a2f98d728ae22,0x7137449123ef65cd +.quad 0x428a2f98d728ae22,0x7137449123ef65cd .quad 0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc +.quad 0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc +.quad 0x3956c25bf348b538,0x59f111f1b605d019 .quad 0x3956c25bf348b538,0x59f111f1b605d019 .quad 0x923f82a4af194f9b,0xab1c5ed5da6d8118 +.quad 0x923f82a4af194f9b,0xab1c5ed5da6d8118 +.quad 0xd807aa98a3030242,0x12835b0145706fbe .quad 0xd807aa98a3030242,0x12835b0145706fbe .quad 0x243185be4ee4b28c,0x550c7dc3d5ffb4e2 +.quad 0x243185be4ee4b28c,0x550c7dc3d5ffb4e2 +.quad 0x72be5d74f27b896f,0x80deb1fe3b1696b1 .quad 0x72be5d74f27b896f,0x80deb1fe3b1696b1 .quad 0x9bdc06a725c71235,0xc19bf174cf692694 +.quad 0x9bdc06a725c71235,0xc19bf174cf692694 +.quad 0xe49b69c19ef14ad2,0xefbe4786384f25e3 .quad 0xe49b69c19ef14ad2,0xefbe4786384f25e3 .quad 0x0fc19dc68b8cd5b5,0x240ca1cc77ac9c65 +.quad 0x0fc19dc68b8cd5b5,0x240ca1cc77ac9c65 +.quad 0x2de92c6f592b0275,0x4a7484aa6ea6e483 .quad 0x2de92c6f592b0275,0x4a7484aa6ea6e483 .quad 0x5cb0a9dcbd41fbd4,0x76f988da831153b5 +.quad 0x5cb0a9dcbd41fbd4,0x76f988da831153b5 +.quad 0x983e5152ee66dfab,0xa831c66d2db43210 .quad 0x983e5152ee66dfab,0xa831c66d2db43210 .quad 0xb00327c898fb213f,0xbf597fc7beef0ee4 +.quad 0xb00327c898fb213f,0xbf597fc7beef0ee4 +.quad 0xc6e00bf33da88fc2,0xd5a79147930aa725 .quad 0xc6e00bf33da88fc2,0xd5a79147930aa725 .quad 0x06ca6351e003826f,0x142929670a0e6e70 +.quad 0x06ca6351e003826f,0x142929670a0e6e70 +.quad 0x27b70a8546d22ffc,0x2e1b21385c26c926 .quad 0x27b70a8546d22ffc,0x2e1b21385c26c926 .quad 0x4d2c6dfc5ac42aed,0x53380d139d95b3df +.quad 0x4d2c6dfc5ac42aed,0x53380d139d95b3df +.quad 0x650a73548baf63de,0x766a0abb3c77b2a8 .quad 0x650a73548baf63de,0x766a0abb3c77b2a8 .quad 0x81c2c92e47edaee6,0x92722c851482353b +.quad 0x81c2c92e47edaee6,0x92722c851482353b +.quad 0xa2bfe8a14cf10364,0xa81a664bbc423001 .quad 0xa2bfe8a14cf10364,0xa81a664bbc423001 .quad 0xc24b8b70d0f89791,0xc76c51a30654be30 +.quad 0xc24b8b70d0f89791,0xc76c51a30654be30 +.quad 0xd192e819d6ef5218,0xd69906245565a910 .quad 0xd192e819d6ef5218,0xd69906245565a910 .quad 0xf40e35855771202a,0x106aa07032bbd1b8 +.quad 0xf40e35855771202a,0x106aa07032bbd1b8 +.quad 0x19a4c116b8d2d0c8,0x1e376c085141ab53 .quad 0x19a4c116b8d2d0c8,0x1e376c085141ab53 .quad 0x2748774cdf8eeb99,0x34b0bcb5e19b48a8 +.quad 0x2748774cdf8eeb99,0x34b0bcb5e19b48a8 +.quad 0x391c0cb3c5c95a63,0x4ed8aa4ae3418acb .quad 0x391c0cb3c5c95a63,0x4ed8aa4ae3418acb .quad 0x5b9cca4f7763e373,0x682e6ff3d6b2b8a3 +.quad 0x5b9cca4f7763e373,0x682e6ff3d6b2b8a3 +.quad 0x748f82ee5defb2fc,0x78a5636f43172f60 .quad 0x748f82ee5defb2fc,0x78a5636f43172f60 .quad 0x84c87814a1f0ab72,0x8cc702081a6439ec +.quad 0x84c87814a1f0ab72,0x8cc702081a6439ec +.quad 0x90befffa23631e28,0xa4506cebde82bde9 .quad 0x90befffa23631e28,0xa4506cebde82bde9 .quad 0xbef9a3f7b2c67915,0xc67178f2e372532b +.quad 0xbef9a3f7b2c67915,0xc67178f2e372532b +.quad 0xca273eceea26619c,0xd186b8c721c0c207 .quad 0xca273eceea26619c,0xd186b8c721c0c207 .quad 0xeada7dd6cde0eb1e,0xf57d4f7fee6ed178 +.quad 0xeada7dd6cde0eb1e,0xf57d4f7fee6ed178 +.quad 0x06f067aa72176fba,0x0a637dc5a2c898a6 .quad 0x06f067aa72176fba,0x0a637dc5a2c898a6 .quad 0x113f9804bef90dae,0x1b710b35131c471b +.quad 0x113f9804bef90dae,0x1b710b35131c471b +.quad 0x28db77f523047d84,0x32caab7b40c72493 .quad 0x28db77f523047d84,0x32caab7b40c72493 .quad 0x3c9ebe0a15c9bebc,0x431d67c49c100d4c +.quad 0x3c9ebe0a15c9bebc,0x431d67c49c100d4c +.quad 0x4cc5d4becb3e42b6,0x597f299cfc657e2a .quad 0x4cc5d4becb3e42b6,0x597f299cfc657e2a .quad 0x5fcb6fab3ad6faec,0x6c44198c4a475817 +.quad 0x5fcb6fab3ad6faec,0x6c44198c4a475817 + +.quad 0x0001020304050607,0x08090a0b0c0d0e0f +.quad 0x0001020304050607,0x08090a0b0c0d0e0f +.byte 83,72,65,53,49,50,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 + +.p2align 6 +sha512_block_data_order_xop: +L$xop_shortcut: + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + movq %rsp,%r11 + shlq $4,%rdx + subq $160,%rsp + leaq (%rsi,%rdx,8),%rdx + andq $-64,%rsp + movq %rdi,128+0(%rsp) + movq %rsi,128+8(%rsp) + movq %rdx,128+16(%rsp) + movq %r11,128+24(%rsp) +L$prologue_xop: + + vzeroupper + movq 0(%rdi),%rax + movq 8(%rdi),%rbx + movq 16(%rdi),%rcx + movq 24(%rdi),%rdx + movq 32(%rdi),%r8 + movq 40(%rdi),%r9 + movq 48(%rdi),%r10 + movq 56(%rdi),%r11 + jmp L$loop_xop +.p2align 4 +L$loop_xop: + vmovdqa K512+1280(%rip),%xmm11 + vmovdqu 0(%rsi),%xmm0 + leaq K512+128(%rip),%rbp + vmovdqu 16(%rsi),%xmm1 + vmovdqu 32(%rsi),%xmm2 + vpshufb %xmm11,%xmm0,%xmm0 + vmovdqu 48(%rsi),%xmm3 + vpshufb %xmm11,%xmm1,%xmm1 + vmovdqu 64(%rsi),%xmm4 + vpshufb %xmm11,%xmm2,%xmm2 + vmovdqu 80(%rsi),%xmm5 + vpshufb %xmm11,%xmm3,%xmm3 + vmovdqu 96(%rsi),%xmm6 + vpshufb %xmm11,%xmm4,%xmm4 + vmovdqu 112(%rsi),%xmm7 + vpshufb %xmm11,%xmm5,%xmm5 + vpaddq -128(%rbp),%xmm0,%xmm8 + vpshufb %xmm11,%xmm6,%xmm6 + vpaddq -96(%rbp),%xmm1,%xmm9 + vpshufb %xmm11,%xmm7,%xmm7 + vpaddq -64(%rbp),%xmm2,%xmm10 + vpaddq -32(%rbp),%xmm3,%xmm11 + vmovdqa %xmm8,0(%rsp) + vpaddq 0(%rbp),%xmm4,%xmm8 + vmovdqa %xmm9,16(%rsp) + vpaddq 32(%rbp),%xmm5,%xmm9 + vmovdqa %xmm10,32(%rsp) + vpaddq 64(%rbp),%xmm6,%xmm10 + vmovdqa %xmm11,48(%rsp) + vpaddq 96(%rbp),%xmm7,%xmm11 + vmovdqa %xmm8,64(%rsp) + movq %rax,%r14 + vmovdqa %xmm9,80(%rsp) + movq %rbx,%rdi + vmovdqa %xmm10,96(%rsp) + xorq %rcx,%rdi + vmovdqa %xmm11,112(%rsp) + movq %r8,%r13 + jmp L$xop_00_47 + +.p2align 4 +L$xop_00_47: + addq $256,%rbp + vpalignr $8,%xmm0,%xmm1,%xmm8 + rorq $23,%r13 + movq %r14,%rax + vpalignr $8,%xmm4,%xmm5,%xmm11 + movq %r9,%r12 + rorq $5,%r14 +.byte 143,72,120,195,200,56 + xorq %r8,%r13 + xorq %r10,%r12 + vpsrlq $7,%xmm8,%xmm8 + rorq $4,%r13 + xorq %rax,%r14 + vpaddq %xmm11,%xmm0,%xmm0 + andq %r8,%r12 + xorq %r8,%r13 + addq 0(%rsp),%r11 + movq %rax,%r15 +.byte 143,72,120,195,209,7 + xorq %r10,%r12 + rorq $6,%r14 + vpxor %xmm9,%xmm8,%xmm8 + xorq %rbx,%r15 + addq %r12,%r11 + rorq $14,%r13 + andq %r15,%rdi +.byte 143,104,120,195,223,3 + xorq %rax,%r14 + addq %r13,%r11 + vpxor %xmm10,%xmm8,%xmm8 + xorq %rbx,%rdi + rorq $28,%r14 + vpsrlq $6,%xmm7,%xmm10 + addq %r11,%rdx + addq %rdi,%r11 + vpaddq %xmm8,%xmm0,%xmm0 + movq %rdx,%r13 + addq %r11,%r14 +.byte 143,72,120,195,203,42 + rorq $23,%r13 + movq %r14,%r11 + vpxor %xmm10,%xmm11,%xmm11 + movq %r8,%r12 + rorq $5,%r14 + xorq %rdx,%r13 + xorq %r9,%r12 + vpxor %xmm9,%xmm11,%xmm11 + rorq $4,%r13 + xorq %r11,%r14 + andq %rdx,%r12 + xorq %rdx,%r13 + vpaddq %xmm11,%xmm0,%xmm0 + addq 8(%rsp),%r10 + movq %r11,%rdi + xorq %r9,%r12 + rorq $6,%r14 + vpaddq -128(%rbp),%xmm0,%xmm10 + xorq %rax,%rdi + addq %r12,%r10 + rorq $14,%r13 + andq %rdi,%r15 + xorq %r11,%r14 + addq %r13,%r10 + xorq %rax,%r15 + rorq $28,%r14 + addq %r10,%rcx + addq %r15,%r10 + movq %rcx,%r13 + addq %r10,%r14 + vmovdqa %xmm10,0(%rsp) + vpalignr $8,%xmm1,%xmm2,%xmm8 + rorq $23,%r13 + movq %r14,%r10 + vpalignr $8,%xmm5,%xmm6,%xmm11 + movq %rdx,%r12 + rorq $5,%r14 +.byte 143,72,120,195,200,56 + xorq %rcx,%r13 + xorq %r8,%r12 + vpsrlq $7,%xmm8,%xmm8 + rorq $4,%r13 + xorq %r10,%r14 + vpaddq %xmm11,%xmm1,%xmm1 + andq %rcx,%r12 + xorq %rcx,%r13 + addq 16(%rsp),%r9 + movq %r10,%r15 +.byte 143,72,120,195,209,7 + xorq %r8,%r12 + rorq $6,%r14 + vpxor %xmm9,%xmm8,%xmm8 + xorq %r11,%r15 + addq %r12,%r9 + rorq $14,%r13 + andq %r15,%rdi +.byte 143,104,120,195,216,3 + xorq %r10,%r14 + addq %r13,%r9 + vpxor %xmm10,%xmm8,%xmm8 + xorq %r11,%rdi + rorq $28,%r14 + vpsrlq $6,%xmm0,%xmm10 + addq %r9,%rbx + addq %rdi,%r9 + vpaddq %xmm8,%xmm1,%xmm1 + movq %rbx,%r13 + addq %r9,%r14 +.byte 143,72,120,195,203,42 + rorq $23,%r13 + movq %r14,%r9 + vpxor %xmm10,%xmm11,%xmm11 + movq %rcx,%r12 + rorq $5,%r14 + xorq %rbx,%r13 + xorq %rdx,%r12 + vpxor %xmm9,%xmm11,%xmm11 + rorq $4,%r13 + xorq %r9,%r14 + andq %rbx,%r12 + xorq %rbx,%r13 + vpaddq %xmm11,%xmm1,%xmm1 + addq 24(%rsp),%r8 + movq %r9,%rdi + xorq %rdx,%r12 + rorq $6,%r14 + vpaddq -96(%rbp),%xmm1,%xmm10 + xorq %r10,%rdi + addq %r12,%r8 + rorq $14,%r13 + andq %rdi,%r15 + xorq %r9,%r14 + addq %r13,%r8 + xorq %r10,%r15 + rorq $28,%r14 + addq %r8,%rax + addq %r15,%r8 + movq %rax,%r13 + addq %r8,%r14 + vmovdqa %xmm10,16(%rsp) + vpalignr $8,%xmm2,%xmm3,%xmm8 + rorq $23,%r13 + movq %r14,%r8 + vpalignr $8,%xmm6,%xmm7,%xmm11 + movq %rbx,%r12 + rorq $5,%r14 +.byte 143,72,120,195,200,56 + xorq %rax,%r13 + xorq %rcx,%r12 + vpsrlq $7,%xmm8,%xmm8 + rorq $4,%r13 + xorq %r8,%r14 + vpaddq %xmm11,%xmm2,%xmm2 + andq %rax,%r12 + xorq %rax,%r13 + addq 32(%rsp),%rdx + movq %r8,%r15 +.byte 143,72,120,195,209,7 + xorq %rcx,%r12 + rorq $6,%r14 + vpxor %xmm9,%xmm8,%xmm8 + xorq %r9,%r15 + addq %r12,%rdx + rorq $14,%r13 + andq %r15,%rdi +.byte 143,104,120,195,217,3 + xorq %r8,%r14 + addq %r13,%rdx + vpxor %xmm10,%xmm8,%xmm8 + xorq %r9,%rdi + rorq $28,%r14 + vpsrlq $6,%xmm1,%xmm10 + addq %rdx,%r11 + addq %rdi,%rdx + vpaddq %xmm8,%xmm2,%xmm2 + movq %r11,%r13 + addq %rdx,%r14 +.byte 143,72,120,195,203,42 + rorq $23,%r13 + movq %r14,%rdx + vpxor %xmm10,%xmm11,%xmm11 + movq %rax,%r12 + rorq $5,%r14 + xorq %r11,%r13 + xorq %rbx,%r12 + vpxor %xmm9,%xmm11,%xmm11 + rorq $4,%r13 + xorq %rdx,%r14 + andq %r11,%r12 + xorq %r11,%r13 + vpaddq %xmm11,%xmm2,%xmm2 + addq 40(%rsp),%rcx + movq %rdx,%rdi + xorq %rbx,%r12 + rorq $6,%r14 + vpaddq -64(%rbp),%xmm2,%xmm10 + xorq %r8,%rdi + addq %r12,%rcx + rorq $14,%r13 + andq %rdi,%r15 + xorq %rdx,%r14 + addq %r13,%rcx + xorq %r8,%r15 + rorq $28,%r14 + addq %rcx,%r10 + addq %r15,%rcx + movq %r10,%r13 + addq %rcx,%r14 + vmovdqa %xmm10,32(%rsp) + vpalignr $8,%xmm3,%xmm4,%xmm8 + rorq $23,%r13 + movq %r14,%rcx + vpalignr $8,%xmm7,%xmm0,%xmm11 + movq %r11,%r12 + rorq $5,%r14 +.byte 143,72,120,195,200,56 + xorq %r10,%r13 + xorq %rax,%r12 + vpsrlq $7,%xmm8,%xmm8 + rorq $4,%r13 + xorq %rcx,%r14 + vpaddq %xmm11,%xmm3,%xmm3 + andq %r10,%r12 + xorq %r10,%r13 + addq 48(%rsp),%rbx + movq %rcx,%r15 +.byte 143,72,120,195,209,7 + xorq %rax,%r12 + rorq $6,%r14 + vpxor %xmm9,%xmm8,%xmm8 + xorq %rdx,%r15 + addq %r12,%rbx + rorq $14,%r13 + andq %r15,%rdi +.byte 143,104,120,195,218,3 + xorq %rcx,%r14 + addq %r13,%rbx + vpxor %xmm10,%xmm8,%xmm8 + xorq %rdx,%rdi + rorq $28,%r14 + vpsrlq $6,%xmm2,%xmm10 + addq %rbx,%r9 + addq %rdi,%rbx + vpaddq %xmm8,%xmm3,%xmm3 + movq %r9,%r13 + addq %rbx,%r14 +.byte 143,72,120,195,203,42 + rorq $23,%r13 + movq %r14,%rbx + vpxor %xmm10,%xmm11,%xmm11 + movq %r10,%r12 + rorq $5,%r14 + xorq %r9,%r13 + xorq %r11,%r12 + vpxor %xmm9,%xmm11,%xmm11 + rorq $4,%r13 + xorq %rbx,%r14 + andq %r9,%r12 + xorq %r9,%r13 + vpaddq %xmm11,%xmm3,%xmm3 + addq 56(%rsp),%rax + movq %rbx,%rdi + xorq %r11,%r12 + rorq $6,%r14 + vpaddq -32(%rbp),%xmm3,%xmm10 + xorq %rcx,%rdi + addq %r12,%rax + rorq $14,%r13 + andq %rdi,%r15 + xorq %rbx,%r14 + addq %r13,%rax + xorq %rcx,%r15 + rorq $28,%r14 + addq %rax,%r8 + addq %r15,%rax + movq %r8,%r13 + addq %rax,%r14 + vmovdqa %xmm10,48(%rsp) + vpalignr $8,%xmm4,%xmm5,%xmm8 + rorq $23,%r13 + movq %r14,%rax + vpalignr $8,%xmm0,%xmm1,%xmm11 + movq %r9,%r12 + rorq $5,%r14 +.byte 143,72,120,195,200,56 + xorq %r8,%r13 + xorq %r10,%r12 + vpsrlq $7,%xmm8,%xmm8 + rorq $4,%r13 + xorq %rax,%r14 + vpaddq %xmm11,%xmm4,%xmm4 + andq %r8,%r12 + xorq %r8,%r13 + addq 64(%rsp),%r11 + movq %rax,%r15 +.byte 143,72,120,195,209,7 + xorq %r10,%r12 + rorq $6,%r14 + vpxor %xmm9,%xmm8,%xmm8 + xorq %rbx,%r15 + addq %r12,%r11 + rorq $14,%r13 + andq %r15,%rdi +.byte 143,104,120,195,219,3 + xorq %rax,%r14 + addq %r13,%r11 + vpxor %xmm10,%xmm8,%xmm8 + xorq %rbx,%rdi + rorq $28,%r14 + vpsrlq $6,%xmm3,%xmm10 + addq %r11,%rdx + addq %rdi,%r11 + vpaddq %xmm8,%xmm4,%xmm4 + movq %rdx,%r13 + addq %r11,%r14 +.byte 143,72,120,195,203,42 + rorq $23,%r13 + movq %r14,%r11 + vpxor %xmm10,%xmm11,%xmm11 + movq %r8,%r12 + rorq $5,%r14 + xorq %rdx,%r13 + xorq %r9,%r12 + vpxor %xmm9,%xmm11,%xmm11 + rorq $4,%r13 + xorq %r11,%r14 + andq %rdx,%r12 + xorq %rdx,%r13 + vpaddq %xmm11,%xmm4,%xmm4 + addq 72(%rsp),%r10 + movq %r11,%rdi + xorq %r9,%r12 + rorq $6,%r14 + vpaddq 0(%rbp),%xmm4,%xmm10 + xorq %rax,%rdi + addq %r12,%r10 + rorq $14,%r13 + andq %rdi,%r15 + xorq %r11,%r14 + addq %r13,%r10 + xorq %rax,%r15 + rorq $28,%r14 + addq %r10,%rcx + addq %r15,%r10 + movq %rcx,%r13 + addq %r10,%r14 + vmovdqa %xmm10,64(%rsp) + vpalignr $8,%xmm5,%xmm6,%xmm8 + rorq $23,%r13 + movq %r14,%r10 + vpalignr $8,%xmm1,%xmm2,%xmm11 + movq %rdx,%r12 + rorq $5,%r14 +.byte 143,72,120,195,200,56 + xorq %rcx,%r13 + xorq %r8,%r12 + vpsrlq $7,%xmm8,%xmm8 + rorq $4,%r13 + xorq %r10,%r14 + vpaddq %xmm11,%xmm5,%xmm5 + andq %rcx,%r12 + xorq %rcx,%r13 + addq 80(%rsp),%r9 + movq %r10,%r15 +.byte 143,72,120,195,209,7 + xorq %r8,%r12 + rorq $6,%r14 + vpxor %xmm9,%xmm8,%xmm8 + xorq %r11,%r15 + addq %r12,%r9 + rorq $14,%r13 + andq %r15,%rdi +.byte 143,104,120,195,220,3 + xorq %r10,%r14 + addq %r13,%r9 + vpxor %xmm10,%xmm8,%xmm8 + xorq %r11,%rdi + rorq $28,%r14 + vpsrlq $6,%xmm4,%xmm10 + addq %r9,%rbx + addq %rdi,%r9 + vpaddq %xmm8,%xmm5,%xmm5 + movq %rbx,%r13 + addq %r9,%r14 +.byte 143,72,120,195,203,42 + rorq $23,%r13 + movq %r14,%r9 + vpxor %xmm10,%xmm11,%xmm11 + movq %rcx,%r12 + rorq $5,%r14 + xorq %rbx,%r13 + xorq %rdx,%r12 + vpxor %xmm9,%xmm11,%xmm11 + rorq $4,%r13 + xorq %r9,%r14 + andq %rbx,%r12 + xorq %rbx,%r13 + vpaddq %xmm11,%xmm5,%xmm5 + addq 88(%rsp),%r8 + movq %r9,%rdi + xorq %rdx,%r12 + rorq $6,%r14 + vpaddq 32(%rbp),%xmm5,%xmm10 + xorq %r10,%rdi + addq %r12,%r8 + rorq $14,%r13 + andq %rdi,%r15 + xorq %r9,%r14 + addq %r13,%r8 + xorq %r10,%r15 + rorq $28,%r14 + addq %r8,%rax + addq %r15,%r8 + movq %rax,%r13 + addq %r8,%r14 + vmovdqa %xmm10,80(%rsp) + vpalignr $8,%xmm6,%xmm7,%xmm8 + rorq $23,%r13 + movq %r14,%r8 + vpalignr $8,%xmm2,%xmm3,%xmm11 + movq %rbx,%r12 + rorq $5,%r14 +.byte 143,72,120,195,200,56 + xorq %rax,%r13 + xorq %rcx,%r12 + vpsrlq $7,%xmm8,%xmm8 + rorq $4,%r13 + xorq %r8,%r14 + vpaddq %xmm11,%xmm6,%xmm6 + andq %rax,%r12 + xorq %rax,%r13 + addq 96(%rsp),%rdx + movq %r8,%r15 +.byte 143,72,120,195,209,7 + xorq %rcx,%r12 + rorq $6,%r14 + vpxor %xmm9,%xmm8,%xmm8 + xorq %r9,%r15 + addq %r12,%rdx + rorq $14,%r13 + andq %r15,%rdi +.byte 143,104,120,195,221,3 + xorq %r8,%r14 + addq %r13,%rdx + vpxor %xmm10,%xmm8,%xmm8 + xorq %r9,%rdi + rorq $28,%r14 + vpsrlq $6,%xmm5,%xmm10 + addq %rdx,%r11 + addq %rdi,%rdx + vpaddq %xmm8,%xmm6,%xmm6 + movq %r11,%r13 + addq %rdx,%r14 +.byte 143,72,120,195,203,42 + rorq $23,%r13 + movq %r14,%rdx + vpxor %xmm10,%xmm11,%xmm11 + movq %rax,%r12 + rorq $5,%r14 + xorq %r11,%r13 + xorq %rbx,%r12 + vpxor %xmm9,%xmm11,%xmm11 + rorq $4,%r13 + xorq %rdx,%r14 + andq %r11,%r12 + xorq %r11,%r13 + vpaddq %xmm11,%xmm6,%xmm6 + addq 104(%rsp),%rcx + movq %rdx,%rdi + xorq %rbx,%r12 + rorq $6,%r14 + vpaddq 64(%rbp),%xmm6,%xmm10 + xorq %r8,%rdi + addq %r12,%rcx + rorq $14,%r13 + andq %rdi,%r15 + xorq %rdx,%r14 + addq %r13,%rcx + xorq %r8,%r15 + rorq $28,%r14 + addq %rcx,%r10 + addq %r15,%rcx + movq %r10,%r13 + addq %rcx,%r14 + vmovdqa %xmm10,96(%rsp) + vpalignr $8,%xmm7,%xmm0,%xmm8 + rorq $23,%r13 + movq %r14,%rcx + vpalignr $8,%xmm3,%xmm4,%xmm11 + movq %r11,%r12 + rorq $5,%r14 +.byte 143,72,120,195,200,56 + xorq %r10,%r13 + xorq %rax,%r12 + vpsrlq $7,%xmm8,%xmm8 + rorq $4,%r13 + xorq %rcx,%r14 + vpaddq %xmm11,%xmm7,%xmm7 + andq %r10,%r12 + xorq %r10,%r13 + addq 112(%rsp),%rbx + movq %rcx,%r15 +.byte 143,72,120,195,209,7 + xorq %rax,%r12 + rorq $6,%r14 + vpxor %xmm9,%xmm8,%xmm8 + xorq %rdx,%r15 + addq %r12,%rbx + rorq $14,%r13 + andq %r15,%rdi +.byte 143,104,120,195,222,3 + xorq %rcx,%r14 + addq %r13,%rbx + vpxor %xmm10,%xmm8,%xmm8 + xorq %rdx,%rdi + rorq $28,%r14 + vpsrlq $6,%xmm6,%xmm10 + addq %rbx,%r9 + addq %rdi,%rbx + vpaddq %xmm8,%xmm7,%xmm7 + movq %r9,%r13 + addq %rbx,%r14 +.byte 143,72,120,195,203,42 + rorq $23,%r13 + movq %r14,%rbx + vpxor %xmm10,%xmm11,%xmm11 + movq %r10,%r12 + rorq $5,%r14 + xorq %r9,%r13 + xorq %r11,%r12 + vpxor %xmm9,%xmm11,%xmm11 + rorq $4,%r13 + xorq %rbx,%r14 + andq %r9,%r12 + xorq %r9,%r13 + vpaddq %xmm11,%xmm7,%xmm7 + addq 120(%rsp),%rax + movq %rbx,%rdi + xorq %r11,%r12 + rorq $6,%r14 + vpaddq 96(%rbp),%xmm7,%xmm10 + xorq %rcx,%rdi + addq %r12,%rax + rorq $14,%r13 + andq %rdi,%r15 + xorq %rbx,%r14 + addq %r13,%rax + xorq %rcx,%r15 + rorq $28,%r14 + addq %rax,%r8 + addq %r15,%rax + movq %r8,%r13 + addq %rax,%r14 + vmovdqa %xmm10,112(%rsp) + cmpb $0,135(%rbp) + jne L$xop_00_47 + rorq $23,%r13 + movq %r14,%rax + movq %r9,%r12 + rorq $5,%r14 + xorq %r8,%r13 + xorq %r10,%r12 + rorq $4,%r13 + xorq %rax,%r14 + andq %r8,%r12 + xorq %r8,%r13 + addq 0(%rsp),%r11 + movq %rax,%r15 + xorq %r10,%r12 + rorq $6,%r14 + xorq %rbx,%r15 + addq %r12,%r11 + rorq $14,%r13 + andq %r15,%rdi + xorq %rax,%r14 + addq %r13,%r11 + xorq %rbx,%rdi + rorq $28,%r14 + addq %r11,%rdx + addq %rdi,%r11 + movq %rdx,%r13 + addq %r11,%r14 + rorq $23,%r13 + movq %r14,%r11 + movq %r8,%r12 + rorq $5,%r14 + xorq %rdx,%r13 + xorq %r9,%r12 + rorq $4,%r13 + xorq %r11,%r14 + andq %rdx,%r12 + xorq %rdx,%r13 + addq 8(%rsp),%r10 + movq %r11,%rdi + xorq %r9,%r12 + rorq $6,%r14 + xorq %rax,%rdi + addq %r12,%r10 + rorq $14,%r13 + andq %rdi,%r15 + xorq %r11,%r14 + addq %r13,%r10 + xorq %rax,%r15 + rorq $28,%r14 + addq %r10,%rcx + addq %r15,%r10 + movq %rcx,%r13 + addq %r10,%r14 + rorq $23,%r13 + movq %r14,%r10 + movq %rdx,%r12 + rorq $5,%r14 + xorq %rcx,%r13 + xorq %r8,%r12 + rorq $4,%r13 + xorq %r10,%r14 + andq %rcx,%r12 + xorq %rcx,%r13 + addq 16(%rsp),%r9 + movq %r10,%r15 + xorq %r8,%r12 + rorq $6,%r14 + xorq %r11,%r15 + addq %r12,%r9 + rorq $14,%r13 + andq %r15,%rdi + xorq %r10,%r14 + addq %r13,%r9 + xorq %r11,%rdi + rorq $28,%r14 + addq %r9,%rbx + addq %rdi,%r9 + movq %rbx,%r13 + addq %r9,%r14 + rorq $23,%r13 + movq %r14,%r9 + movq %rcx,%r12 + rorq $5,%r14 + xorq %rbx,%r13 + xorq %rdx,%r12 + rorq $4,%r13 + xorq %r9,%r14 + andq %rbx,%r12 + xorq %rbx,%r13 + addq 24(%rsp),%r8 + movq %r9,%rdi + xorq %rdx,%r12 + rorq $6,%r14 + xorq %r10,%rdi + addq %r12,%r8 + rorq $14,%r13 + andq %rdi,%r15 + xorq %r9,%r14 + addq %r13,%r8 + xorq %r10,%r15 + rorq $28,%r14 + addq %r8,%rax + addq %r15,%r8 + movq %rax,%r13 + addq %r8,%r14 + rorq $23,%r13 + movq %r14,%r8 + movq %rbx,%r12 + rorq $5,%r14 + xorq %rax,%r13 + xorq %rcx,%r12 + rorq $4,%r13 + xorq %r8,%r14 + andq %rax,%r12 + xorq %rax,%r13 + addq 32(%rsp),%rdx + movq %r8,%r15 + xorq %rcx,%r12 + rorq $6,%r14 + xorq %r9,%r15 + addq %r12,%rdx + rorq $14,%r13 + andq %r15,%rdi + xorq %r8,%r14 + addq %r13,%rdx + xorq %r9,%rdi + rorq $28,%r14 + addq %rdx,%r11 + addq %rdi,%rdx + movq %r11,%r13 + addq %rdx,%r14 + rorq $23,%r13 + movq %r14,%rdx + movq %rax,%r12 + rorq $5,%r14 + xorq %r11,%r13 + xorq %rbx,%r12 + rorq $4,%r13 + xorq %rdx,%r14 + andq %r11,%r12 + xorq %r11,%r13 + addq 40(%rsp),%rcx + movq %rdx,%rdi + xorq %rbx,%r12 + rorq $6,%r14 + xorq %r8,%rdi + addq %r12,%rcx + rorq $14,%r13 + andq %rdi,%r15 + xorq %rdx,%r14 + addq %r13,%rcx + xorq %r8,%r15 + rorq $28,%r14 + addq %rcx,%r10 + addq %r15,%rcx + movq %r10,%r13 + addq %rcx,%r14 + rorq $23,%r13 + movq %r14,%rcx + movq %r11,%r12 + rorq $5,%r14 + xorq %r10,%r13 + xorq %rax,%r12 + rorq $4,%r13 + xorq %rcx,%r14 + andq %r10,%r12 + xorq %r10,%r13 + addq 48(%rsp),%rbx + movq %rcx,%r15 + xorq %rax,%r12 + rorq $6,%r14 + xorq %rdx,%r15 + addq %r12,%rbx + rorq $14,%r13 + andq %r15,%rdi + xorq %rcx,%r14 + addq %r13,%rbx + xorq %rdx,%rdi + rorq $28,%r14 + addq %rbx,%r9 + addq %rdi,%rbx + movq %r9,%r13 + addq %rbx,%r14 + rorq $23,%r13 + movq %r14,%rbx + movq %r10,%r12 + rorq $5,%r14 + xorq %r9,%r13 + xorq %r11,%r12 + rorq $4,%r13 + xorq %rbx,%r14 + andq %r9,%r12 + xorq %r9,%r13 + addq 56(%rsp),%rax + movq %rbx,%rdi + xorq %r11,%r12 + rorq $6,%r14 + xorq %rcx,%rdi + addq %r12,%rax + rorq $14,%r13 + andq %rdi,%r15 + xorq %rbx,%r14 + addq %r13,%rax + xorq %rcx,%r15 + rorq $28,%r14 + addq %rax,%r8 + addq %r15,%rax + movq %r8,%r13 + addq %rax,%r14 + rorq $23,%r13 + movq %r14,%rax + movq %r9,%r12 + rorq $5,%r14 + xorq %r8,%r13 + xorq %r10,%r12 + rorq $4,%r13 + xorq %rax,%r14 + andq %r8,%r12 + xorq %r8,%r13 + addq 64(%rsp),%r11 + movq %rax,%r15 + xorq %r10,%r12 + rorq $6,%r14 + xorq %rbx,%r15 + addq %r12,%r11 + rorq $14,%r13 + andq %r15,%rdi + xorq %rax,%r14 + addq %r13,%r11 + xorq %rbx,%rdi + rorq $28,%r14 + addq %r11,%rdx + addq %rdi,%r11 + movq %rdx,%r13 + addq %r11,%r14 + rorq $23,%r13 + movq %r14,%r11 + movq %r8,%r12 + rorq $5,%r14 + xorq %rdx,%r13 + xorq %r9,%r12 + rorq $4,%r13 + xorq %r11,%r14 + andq %rdx,%r12 + xorq %rdx,%r13 + addq 72(%rsp),%r10 + movq %r11,%rdi + xorq %r9,%r12 + rorq $6,%r14 + xorq %rax,%rdi + addq %r12,%r10 + rorq $14,%r13 + andq %rdi,%r15 + xorq %r11,%r14 + addq %r13,%r10 + xorq %rax,%r15 + rorq $28,%r14 + addq %r10,%rcx + addq %r15,%r10 + movq %rcx,%r13 + addq %r10,%r14 + rorq $23,%r13 + movq %r14,%r10 + movq %rdx,%r12 + rorq $5,%r14 + xorq %rcx,%r13 + xorq %r8,%r12 + rorq $4,%r13 + xorq %r10,%r14 + andq %rcx,%r12 + xorq %rcx,%r13 + addq 80(%rsp),%r9 + movq %r10,%r15 + xorq %r8,%r12 + rorq $6,%r14 + xorq %r11,%r15 + addq %r12,%r9 + rorq $14,%r13 + andq %r15,%rdi + xorq %r10,%r14 + addq %r13,%r9 + xorq %r11,%rdi + rorq $28,%r14 + addq %r9,%rbx + addq %rdi,%r9 + movq %rbx,%r13 + addq %r9,%r14 + rorq $23,%r13 + movq %r14,%r9 + movq %rcx,%r12 + rorq $5,%r14 + xorq %rbx,%r13 + xorq %rdx,%r12 + rorq $4,%r13 + xorq %r9,%r14 + andq %rbx,%r12 + xorq %rbx,%r13 + addq 88(%rsp),%r8 + movq %r9,%rdi + xorq %rdx,%r12 + rorq $6,%r14 + xorq %r10,%rdi + addq %r12,%r8 + rorq $14,%r13 + andq %rdi,%r15 + xorq %r9,%r14 + addq %r13,%r8 + xorq %r10,%r15 + rorq $28,%r14 + addq %r8,%rax + addq %r15,%r8 + movq %rax,%r13 + addq %r8,%r14 + rorq $23,%r13 + movq %r14,%r8 + movq %rbx,%r12 + rorq $5,%r14 + xorq %rax,%r13 + xorq %rcx,%r12 + rorq $4,%r13 + xorq %r8,%r14 + andq %rax,%r12 + xorq %rax,%r13 + addq 96(%rsp),%rdx + movq %r8,%r15 + xorq %rcx,%r12 + rorq $6,%r14 + xorq %r9,%r15 + addq %r12,%rdx + rorq $14,%r13 + andq %r15,%rdi + xorq %r8,%r14 + addq %r13,%rdx + xorq %r9,%rdi + rorq $28,%r14 + addq %rdx,%r11 + addq %rdi,%rdx + movq %r11,%r13 + addq %rdx,%r14 + rorq $23,%r13 + movq %r14,%rdx + movq %rax,%r12 + rorq $5,%r14 + xorq %r11,%r13 + xorq %rbx,%r12 + rorq $4,%r13 + xorq %rdx,%r14 + andq %r11,%r12 + xorq %r11,%r13 + addq 104(%rsp),%rcx + movq %rdx,%rdi + xorq %rbx,%r12 + rorq $6,%r14 + xorq %r8,%rdi + addq %r12,%rcx + rorq $14,%r13 + andq %rdi,%r15 + xorq %rdx,%r14 + addq %r13,%rcx + xorq %r8,%r15 + rorq $28,%r14 + addq %rcx,%r10 + addq %r15,%rcx + movq %r10,%r13 + addq %rcx,%r14 + rorq $23,%r13 + movq %r14,%rcx + movq %r11,%r12 + rorq $5,%r14 + xorq %r10,%r13 + xorq %rax,%r12 + rorq $4,%r13 + xorq %rcx,%r14 + andq %r10,%r12 + xorq %r10,%r13 + addq 112(%rsp),%rbx + movq %rcx,%r15 + xorq %rax,%r12 + rorq $6,%r14 + xorq %rdx,%r15 + addq %r12,%rbx + rorq $14,%r13 + andq %r15,%rdi + xorq %rcx,%r14 + addq %r13,%rbx + xorq %rdx,%rdi + rorq $28,%r14 + addq %rbx,%r9 + addq %rdi,%rbx + movq %r9,%r13 + addq %rbx,%r14 + rorq $23,%r13 + movq %r14,%rbx + movq %r10,%r12 + rorq $5,%r14 + xorq %r9,%r13 + xorq %r11,%r12 + rorq $4,%r13 + xorq %rbx,%r14 + andq %r9,%r12 + xorq %r9,%r13 + addq 120(%rsp),%rax + movq %rbx,%rdi + xorq %r11,%r12 + rorq $6,%r14 + xorq %rcx,%rdi + addq %r12,%rax + rorq $14,%r13 + andq %rdi,%r15 + xorq %rbx,%r14 + addq %r13,%rax + xorq %rcx,%r15 + rorq $28,%r14 + addq %rax,%r8 + addq %r15,%rax + movq %r8,%r13 + addq %rax,%r14 + movq 128+0(%rsp),%rdi + movq %r14,%rax + + addq 0(%rdi),%rax + leaq 128(%rsi),%rsi + addq 8(%rdi),%rbx + addq 16(%rdi),%rcx + addq 24(%rdi),%rdx + addq 32(%rdi),%r8 + addq 40(%rdi),%r9 + addq 48(%rdi),%r10 + addq 56(%rdi),%r11 + + cmpq 128+16(%rsp),%rsi + + movq %rax,0(%rdi) + movq %rbx,8(%rdi) + movq %rcx,16(%rdi) + movq %rdx,24(%rdi) + movq %r8,32(%rdi) + movq %r9,40(%rdi) + movq %r10,48(%rdi) + movq %r11,56(%rdi) + jb L$loop_xop + + movq 128+24(%rsp),%rsi + vzeroupper + movq (%rsi),%r15 + movq 8(%rsi),%r14 + movq 16(%rsi),%r13 + movq 24(%rsi),%r12 + movq 32(%rsi),%rbp + movq 40(%rsi),%rbx + leaq 48(%rsi),%rsp +L$epilogue_xop: + .byte 0xf3,0xc3 + + +.p2align 6 +sha512_block_data_order_avx: +L$avx_shortcut: + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + movq %rsp,%r11 + shlq $4,%rdx + subq $160,%rsp + leaq (%rsi,%rdx,8),%rdx + andq $-64,%rsp + movq %rdi,128+0(%rsp) + movq %rsi,128+8(%rsp) + movq %rdx,128+16(%rsp) + movq %r11,128+24(%rsp) +L$prologue_avx: + + vzeroupper + movq 0(%rdi),%rax + movq 8(%rdi),%rbx + movq 16(%rdi),%rcx + movq 24(%rdi),%rdx + movq 32(%rdi),%r8 + movq 40(%rdi),%r9 + movq 48(%rdi),%r10 + movq 56(%rdi),%r11 + jmp L$loop_avx +.p2align 4 +L$loop_avx: + vmovdqa K512+1280(%rip),%xmm11 + vmovdqu 0(%rsi),%xmm0 + leaq K512+128(%rip),%rbp + vmovdqu 16(%rsi),%xmm1 + vmovdqu 32(%rsi),%xmm2 + vpshufb %xmm11,%xmm0,%xmm0 + vmovdqu 48(%rsi),%xmm3 + vpshufb %xmm11,%xmm1,%xmm1 + vmovdqu 64(%rsi),%xmm4 + vpshufb %xmm11,%xmm2,%xmm2 + vmovdqu 80(%rsi),%xmm5 + vpshufb %xmm11,%xmm3,%xmm3 + vmovdqu 96(%rsi),%xmm6 + vpshufb %xmm11,%xmm4,%xmm4 + vmovdqu 112(%rsi),%xmm7 + vpshufb %xmm11,%xmm5,%xmm5 + vpaddq -128(%rbp),%xmm0,%xmm8 + vpshufb %xmm11,%xmm6,%xmm6 + vpaddq -96(%rbp),%xmm1,%xmm9 + vpshufb %xmm11,%xmm7,%xmm7 + vpaddq -64(%rbp),%xmm2,%xmm10 + vpaddq -32(%rbp),%xmm3,%xmm11 + vmovdqa %xmm8,0(%rsp) + vpaddq 0(%rbp),%xmm4,%xmm8 + vmovdqa %xmm9,16(%rsp) + vpaddq 32(%rbp),%xmm5,%xmm9 + vmovdqa %xmm10,32(%rsp) + vpaddq 64(%rbp),%xmm6,%xmm10 + vmovdqa %xmm11,48(%rsp) + vpaddq 96(%rbp),%xmm7,%xmm11 + vmovdqa %xmm8,64(%rsp) + movq %rax,%r14 + vmovdqa %xmm9,80(%rsp) + movq %rbx,%rdi + vmovdqa %xmm10,96(%rsp) + xorq %rcx,%rdi + vmovdqa %xmm11,112(%rsp) + movq %r8,%r13 + jmp L$avx_00_47 + +.p2align 4 +L$avx_00_47: + addq $256,%rbp + vpalignr $8,%xmm0,%xmm1,%xmm8 + shrdq $23,%r13,%r13 + movq %r14,%rax + vpalignr $8,%xmm4,%xmm5,%xmm11 + movq %r9,%r12 + shrdq $5,%r14,%r14 + vpsrlq $1,%xmm8,%xmm10 + xorq %r8,%r13 + xorq %r10,%r12 + vpaddq %xmm11,%xmm0,%xmm0 + shrdq $4,%r13,%r13 + xorq %rax,%r14 + vpsrlq $7,%xmm8,%xmm11 + andq %r8,%r12 + xorq %r8,%r13 + vpsllq $56,%xmm8,%xmm9 + addq 0(%rsp),%r11 + movq %rax,%r15 + vpxor %xmm10,%xmm11,%xmm8 + xorq %r10,%r12 + shrdq $6,%r14,%r14 + vpsrlq $7,%xmm10,%xmm10 + xorq %rbx,%r15 + addq %r12,%r11 + vpxor %xmm9,%xmm8,%xmm8 + shrdq $14,%r13,%r13 + andq %r15,%rdi + vpsllq $7,%xmm9,%xmm9 + xorq %rax,%r14 + addq %r13,%r11 + vpxor %xmm10,%xmm8,%xmm8 + xorq %rbx,%rdi + shrdq $28,%r14,%r14 + vpsrlq $6,%xmm7,%xmm11 + addq %r11,%rdx + addq %rdi,%r11 + vpxor %xmm9,%xmm8,%xmm8 + movq %rdx,%r13 + addq %r11,%r14 + vpsllq $3,%xmm7,%xmm10 + shrdq $23,%r13,%r13 + movq %r14,%r11 + vpaddq %xmm8,%xmm0,%xmm0 + movq %r8,%r12 + shrdq $5,%r14,%r14 + vpsrlq $19,%xmm7,%xmm9 + xorq %rdx,%r13 + xorq %r9,%r12 + vpxor %xmm10,%xmm11,%xmm11 + shrdq $4,%r13,%r13 + xorq %r11,%r14 + vpsllq $42,%xmm10,%xmm10 + andq %rdx,%r12 + xorq %rdx,%r13 + vpxor %xmm9,%xmm11,%xmm11 + addq 8(%rsp),%r10 + movq %r11,%rdi + vpsrlq $42,%xmm9,%xmm9 + xorq %r9,%r12 + shrdq $6,%r14,%r14 + vpxor %xmm10,%xmm11,%xmm11 + xorq %rax,%rdi + addq %r12,%r10 + vpxor %xmm9,%xmm11,%xmm11 + shrdq $14,%r13,%r13 + andq %rdi,%r15 + vpaddq %xmm11,%xmm0,%xmm0 + xorq %r11,%r14 + addq %r13,%r10 + vpaddq -128(%rbp),%xmm0,%xmm10 + xorq %rax,%r15 + shrdq $28,%r14,%r14 + addq %r10,%rcx + addq %r15,%r10 + movq %rcx,%r13 + addq %r10,%r14 + vmovdqa %xmm10,0(%rsp) + vpalignr $8,%xmm1,%xmm2,%xmm8 + shrdq $23,%r13,%r13 + movq %r14,%r10 + vpalignr $8,%xmm5,%xmm6,%xmm11 + movq %rdx,%r12 + shrdq $5,%r14,%r14 + vpsrlq $1,%xmm8,%xmm10 + xorq %rcx,%r13 + xorq %r8,%r12 + vpaddq %xmm11,%xmm1,%xmm1 + shrdq $4,%r13,%r13 + xorq %r10,%r14 + vpsrlq $7,%xmm8,%xmm11 + andq %rcx,%r12 + xorq %rcx,%r13 + vpsllq $56,%xmm8,%xmm9 + addq 16(%rsp),%r9 + movq %r10,%r15 + vpxor %xmm10,%xmm11,%xmm8 + xorq %r8,%r12 + shrdq $6,%r14,%r14 + vpsrlq $7,%xmm10,%xmm10 + xorq %r11,%r15 + addq %r12,%r9 + vpxor %xmm9,%xmm8,%xmm8 + shrdq $14,%r13,%r13 + andq %r15,%rdi + vpsllq $7,%xmm9,%xmm9 + xorq %r10,%r14 + addq %r13,%r9 + vpxor %xmm10,%xmm8,%xmm8 + xorq %r11,%rdi + shrdq $28,%r14,%r14 + vpsrlq $6,%xmm0,%xmm11 + addq %r9,%rbx + addq %rdi,%r9 + vpxor %xmm9,%xmm8,%xmm8 + movq %rbx,%r13 + addq %r9,%r14 + vpsllq $3,%xmm0,%xmm10 + shrdq $23,%r13,%r13 + movq %r14,%r9 + vpaddq %xmm8,%xmm1,%xmm1 + movq %rcx,%r12 + shrdq $5,%r14,%r14 + vpsrlq $19,%xmm0,%xmm9 + xorq %rbx,%r13 + xorq %rdx,%r12 + vpxor %xmm10,%xmm11,%xmm11 + shrdq $4,%r13,%r13 + xorq %r9,%r14 + vpsllq $42,%xmm10,%xmm10 + andq %rbx,%r12 + xorq %rbx,%r13 + vpxor %xmm9,%xmm11,%xmm11 + addq 24(%rsp),%r8 + movq %r9,%rdi + vpsrlq $42,%xmm9,%xmm9 + xorq %rdx,%r12 + shrdq $6,%r14,%r14 + vpxor %xmm10,%xmm11,%xmm11 + xorq %r10,%rdi + addq %r12,%r8 + vpxor %xmm9,%xmm11,%xmm11 + shrdq $14,%r13,%r13 + andq %rdi,%r15 + vpaddq %xmm11,%xmm1,%xmm1 + xorq %r9,%r14 + addq %r13,%r8 + vpaddq -96(%rbp),%xmm1,%xmm10 + xorq %r10,%r15 + shrdq $28,%r14,%r14 + addq %r8,%rax + addq %r15,%r8 + movq %rax,%r13 + addq %r8,%r14 + vmovdqa %xmm10,16(%rsp) + vpalignr $8,%xmm2,%xmm3,%xmm8 + shrdq $23,%r13,%r13 + movq %r14,%r8 + vpalignr $8,%xmm6,%xmm7,%xmm11 + movq %rbx,%r12 + shrdq $5,%r14,%r14 + vpsrlq $1,%xmm8,%xmm10 + xorq %rax,%r13 + xorq %rcx,%r12 + vpaddq %xmm11,%xmm2,%xmm2 + shrdq $4,%r13,%r13 + xorq %r8,%r14 + vpsrlq $7,%xmm8,%xmm11 + andq %rax,%r12 + xorq %rax,%r13 + vpsllq $56,%xmm8,%xmm9 + addq 32(%rsp),%rdx + movq %r8,%r15 + vpxor %xmm10,%xmm11,%xmm8 + xorq %rcx,%r12 + shrdq $6,%r14,%r14 + vpsrlq $7,%xmm10,%xmm10 + xorq %r9,%r15 + addq %r12,%rdx + vpxor %xmm9,%xmm8,%xmm8 + shrdq $14,%r13,%r13 + andq %r15,%rdi + vpsllq $7,%xmm9,%xmm9 + xorq %r8,%r14 + addq %r13,%rdx + vpxor %xmm10,%xmm8,%xmm8 + xorq %r9,%rdi + shrdq $28,%r14,%r14 + vpsrlq $6,%xmm1,%xmm11 + addq %rdx,%r11 + addq %rdi,%rdx + vpxor %xmm9,%xmm8,%xmm8 + movq %r11,%r13 + addq %rdx,%r14 + vpsllq $3,%xmm1,%xmm10 + shrdq $23,%r13,%r13 + movq %r14,%rdx + vpaddq %xmm8,%xmm2,%xmm2 + movq %rax,%r12 + shrdq $5,%r14,%r14 + vpsrlq $19,%xmm1,%xmm9 + xorq %r11,%r13 + xorq %rbx,%r12 + vpxor %xmm10,%xmm11,%xmm11 + shrdq $4,%r13,%r13 + xorq %rdx,%r14 + vpsllq $42,%xmm10,%xmm10 + andq %r11,%r12 + xorq %r11,%r13 + vpxor %xmm9,%xmm11,%xmm11 + addq 40(%rsp),%rcx + movq %rdx,%rdi + vpsrlq $42,%xmm9,%xmm9 + xorq %rbx,%r12 + shrdq $6,%r14,%r14 + vpxor %xmm10,%xmm11,%xmm11 + xorq %r8,%rdi + addq %r12,%rcx + vpxor %xmm9,%xmm11,%xmm11 + shrdq $14,%r13,%r13 + andq %rdi,%r15 + vpaddq %xmm11,%xmm2,%xmm2 + xorq %rdx,%r14 + addq %r13,%rcx + vpaddq -64(%rbp),%xmm2,%xmm10 + xorq %r8,%r15 + shrdq $28,%r14,%r14 + addq %rcx,%r10 + addq %r15,%rcx + movq %r10,%r13 + addq %rcx,%r14 + vmovdqa %xmm10,32(%rsp) + vpalignr $8,%xmm3,%xmm4,%xmm8 + shrdq $23,%r13,%r13 + movq %r14,%rcx + vpalignr $8,%xmm7,%xmm0,%xmm11 + movq %r11,%r12 + shrdq $5,%r14,%r14 + vpsrlq $1,%xmm8,%xmm10 + xorq %r10,%r13 + xorq %rax,%r12 + vpaddq %xmm11,%xmm3,%xmm3 + shrdq $4,%r13,%r13 + xorq %rcx,%r14 + vpsrlq $7,%xmm8,%xmm11 + andq %r10,%r12 + xorq %r10,%r13 + vpsllq $56,%xmm8,%xmm9 + addq 48(%rsp),%rbx + movq %rcx,%r15 + vpxor %xmm10,%xmm11,%xmm8 + xorq %rax,%r12 + shrdq $6,%r14,%r14 + vpsrlq $7,%xmm10,%xmm10 + xorq %rdx,%r15 + addq %r12,%rbx + vpxor %xmm9,%xmm8,%xmm8 + shrdq $14,%r13,%r13 + andq %r15,%rdi + vpsllq $7,%xmm9,%xmm9 + xorq %rcx,%r14 + addq %r13,%rbx + vpxor %xmm10,%xmm8,%xmm8 + xorq %rdx,%rdi + shrdq $28,%r14,%r14 + vpsrlq $6,%xmm2,%xmm11 + addq %rbx,%r9 + addq %rdi,%rbx + vpxor %xmm9,%xmm8,%xmm8 + movq %r9,%r13 + addq %rbx,%r14 + vpsllq $3,%xmm2,%xmm10 + shrdq $23,%r13,%r13 + movq %r14,%rbx + vpaddq %xmm8,%xmm3,%xmm3 + movq %r10,%r12 + shrdq $5,%r14,%r14 + vpsrlq $19,%xmm2,%xmm9 + xorq %r9,%r13 + xorq %r11,%r12 + vpxor %xmm10,%xmm11,%xmm11 + shrdq $4,%r13,%r13 + xorq %rbx,%r14 + vpsllq $42,%xmm10,%xmm10 + andq %r9,%r12 + xorq %r9,%r13 + vpxor %xmm9,%xmm11,%xmm11 + addq 56(%rsp),%rax + movq %rbx,%rdi + vpsrlq $42,%xmm9,%xmm9 + xorq %r11,%r12 + shrdq $6,%r14,%r14 + vpxor %xmm10,%xmm11,%xmm11 + xorq %rcx,%rdi + addq %r12,%rax + vpxor %xmm9,%xmm11,%xmm11 + shrdq $14,%r13,%r13 + andq %rdi,%r15 + vpaddq %xmm11,%xmm3,%xmm3 + xorq %rbx,%r14 + addq %r13,%rax + vpaddq -32(%rbp),%xmm3,%xmm10 + xorq %rcx,%r15 + shrdq $28,%r14,%r14 + addq %rax,%r8 + addq %r15,%rax + movq %r8,%r13 + addq %rax,%r14 + vmovdqa %xmm10,48(%rsp) + vpalignr $8,%xmm4,%xmm5,%xmm8 + shrdq $23,%r13,%r13 + movq %r14,%rax + vpalignr $8,%xmm0,%xmm1,%xmm11 + movq %r9,%r12 + shrdq $5,%r14,%r14 + vpsrlq $1,%xmm8,%xmm10 + xorq %r8,%r13 + xorq %r10,%r12 + vpaddq %xmm11,%xmm4,%xmm4 + shrdq $4,%r13,%r13 + xorq %rax,%r14 + vpsrlq $7,%xmm8,%xmm11 + andq %r8,%r12 + xorq %r8,%r13 + vpsllq $56,%xmm8,%xmm9 + addq 64(%rsp),%r11 + movq %rax,%r15 + vpxor %xmm10,%xmm11,%xmm8 + xorq %r10,%r12 + shrdq $6,%r14,%r14 + vpsrlq $7,%xmm10,%xmm10 + xorq %rbx,%r15 + addq %r12,%r11 + vpxor %xmm9,%xmm8,%xmm8 + shrdq $14,%r13,%r13 + andq %r15,%rdi + vpsllq $7,%xmm9,%xmm9 + xorq %rax,%r14 + addq %r13,%r11 + vpxor %xmm10,%xmm8,%xmm8 + xorq %rbx,%rdi + shrdq $28,%r14,%r14 + vpsrlq $6,%xmm3,%xmm11 + addq %r11,%rdx + addq %rdi,%r11 + vpxor %xmm9,%xmm8,%xmm8 + movq %rdx,%r13 + addq %r11,%r14 + vpsllq $3,%xmm3,%xmm10 + shrdq $23,%r13,%r13 + movq %r14,%r11 + vpaddq %xmm8,%xmm4,%xmm4 + movq %r8,%r12 + shrdq $5,%r14,%r14 + vpsrlq $19,%xmm3,%xmm9 + xorq %rdx,%r13 + xorq %r9,%r12 + vpxor %xmm10,%xmm11,%xmm11 + shrdq $4,%r13,%r13 + xorq %r11,%r14 + vpsllq $42,%xmm10,%xmm10 + andq %rdx,%r12 + xorq %rdx,%r13 + vpxor %xmm9,%xmm11,%xmm11 + addq 72(%rsp),%r10 + movq %r11,%rdi + vpsrlq $42,%xmm9,%xmm9 + xorq %r9,%r12 + shrdq $6,%r14,%r14 + vpxor %xmm10,%xmm11,%xmm11 + xorq %rax,%rdi + addq %r12,%r10 + vpxor %xmm9,%xmm11,%xmm11 + shrdq $14,%r13,%r13 + andq %rdi,%r15 + vpaddq %xmm11,%xmm4,%xmm4 + xorq %r11,%r14 + addq %r13,%r10 + vpaddq 0(%rbp),%xmm4,%xmm10 + xorq %rax,%r15 + shrdq $28,%r14,%r14 + addq %r10,%rcx + addq %r15,%r10 + movq %rcx,%r13 + addq %r10,%r14 + vmovdqa %xmm10,64(%rsp) + vpalignr $8,%xmm5,%xmm6,%xmm8 + shrdq $23,%r13,%r13 + movq %r14,%r10 + vpalignr $8,%xmm1,%xmm2,%xmm11 + movq %rdx,%r12 + shrdq $5,%r14,%r14 + vpsrlq $1,%xmm8,%xmm10 + xorq %rcx,%r13 + xorq %r8,%r12 + vpaddq %xmm11,%xmm5,%xmm5 + shrdq $4,%r13,%r13 + xorq %r10,%r14 + vpsrlq $7,%xmm8,%xmm11 + andq %rcx,%r12 + xorq %rcx,%r13 + vpsllq $56,%xmm8,%xmm9 + addq 80(%rsp),%r9 + movq %r10,%r15 + vpxor %xmm10,%xmm11,%xmm8 + xorq %r8,%r12 + shrdq $6,%r14,%r14 + vpsrlq $7,%xmm10,%xmm10 + xorq %r11,%r15 + addq %r12,%r9 + vpxor %xmm9,%xmm8,%xmm8 + shrdq $14,%r13,%r13 + andq %r15,%rdi + vpsllq $7,%xmm9,%xmm9 + xorq %r10,%r14 + addq %r13,%r9 + vpxor %xmm10,%xmm8,%xmm8 + xorq %r11,%rdi + shrdq $28,%r14,%r14 + vpsrlq $6,%xmm4,%xmm11 + addq %r9,%rbx + addq %rdi,%r9 + vpxor %xmm9,%xmm8,%xmm8 + movq %rbx,%r13 + addq %r9,%r14 + vpsllq $3,%xmm4,%xmm10 + shrdq $23,%r13,%r13 + movq %r14,%r9 + vpaddq %xmm8,%xmm5,%xmm5 + movq %rcx,%r12 + shrdq $5,%r14,%r14 + vpsrlq $19,%xmm4,%xmm9 + xorq %rbx,%r13 + xorq %rdx,%r12 + vpxor %xmm10,%xmm11,%xmm11 + shrdq $4,%r13,%r13 + xorq %r9,%r14 + vpsllq $42,%xmm10,%xmm10 + andq %rbx,%r12 + xorq %rbx,%r13 + vpxor %xmm9,%xmm11,%xmm11 + addq 88(%rsp),%r8 + movq %r9,%rdi + vpsrlq $42,%xmm9,%xmm9 + xorq %rdx,%r12 + shrdq $6,%r14,%r14 + vpxor %xmm10,%xmm11,%xmm11 + xorq %r10,%rdi + addq %r12,%r8 + vpxor %xmm9,%xmm11,%xmm11 + shrdq $14,%r13,%r13 + andq %rdi,%r15 + vpaddq %xmm11,%xmm5,%xmm5 + xorq %r9,%r14 + addq %r13,%r8 + vpaddq 32(%rbp),%xmm5,%xmm10 + xorq %r10,%r15 + shrdq $28,%r14,%r14 + addq %r8,%rax + addq %r15,%r8 + movq %rax,%r13 + addq %r8,%r14 + vmovdqa %xmm10,80(%rsp) + vpalignr $8,%xmm6,%xmm7,%xmm8 + shrdq $23,%r13,%r13 + movq %r14,%r8 + vpalignr $8,%xmm2,%xmm3,%xmm11 + movq %rbx,%r12 + shrdq $5,%r14,%r14 + vpsrlq $1,%xmm8,%xmm10 + xorq %rax,%r13 + xorq %rcx,%r12 + vpaddq %xmm11,%xmm6,%xmm6 + shrdq $4,%r13,%r13 + xorq %r8,%r14 + vpsrlq $7,%xmm8,%xmm11 + andq %rax,%r12 + xorq %rax,%r13 + vpsllq $56,%xmm8,%xmm9 + addq 96(%rsp),%rdx + movq %r8,%r15 + vpxor %xmm10,%xmm11,%xmm8 + xorq %rcx,%r12 + shrdq $6,%r14,%r14 + vpsrlq $7,%xmm10,%xmm10 + xorq %r9,%r15 + addq %r12,%rdx + vpxor %xmm9,%xmm8,%xmm8 + shrdq $14,%r13,%r13 + andq %r15,%rdi + vpsllq $7,%xmm9,%xmm9 + xorq %r8,%r14 + addq %r13,%rdx + vpxor %xmm10,%xmm8,%xmm8 + xorq %r9,%rdi + shrdq $28,%r14,%r14 + vpsrlq $6,%xmm5,%xmm11 + addq %rdx,%r11 + addq %rdi,%rdx + vpxor %xmm9,%xmm8,%xmm8 + movq %r11,%r13 + addq %rdx,%r14 + vpsllq $3,%xmm5,%xmm10 + shrdq $23,%r13,%r13 + movq %r14,%rdx + vpaddq %xmm8,%xmm6,%xmm6 + movq %rax,%r12 + shrdq $5,%r14,%r14 + vpsrlq $19,%xmm5,%xmm9 + xorq %r11,%r13 + xorq %rbx,%r12 + vpxor %xmm10,%xmm11,%xmm11 + shrdq $4,%r13,%r13 + xorq %rdx,%r14 + vpsllq $42,%xmm10,%xmm10 + andq %r11,%r12 + xorq %r11,%r13 + vpxor %xmm9,%xmm11,%xmm11 + addq 104(%rsp),%rcx + movq %rdx,%rdi + vpsrlq $42,%xmm9,%xmm9 + xorq %rbx,%r12 + shrdq $6,%r14,%r14 + vpxor %xmm10,%xmm11,%xmm11 + xorq %r8,%rdi + addq %r12,%rcx + vpxor %xmm9,%xmm11,%xmm11 + shrdq $14,%r13,%r13 + andq %rdi,%r15 + vpaddq %xmm11,%xmm6,%xmm6 + xorq %rdx,%r14 + addq %r13,%rcx + vpaddq 64(%rbp),%xmm6,%xmm10 + xorq %r8,%r15 + shrdq $28,%r14,%r14 + addq %rcx,%r10 + addq %r15,%rcx + movq %r10,%r13 + addq %rcx,%r14 + vmovdqa %xmm10,96(%rsp) + vpalignr $8,%xmm7,%xmm0,%xmm8 + shrdq $23,%r13,%r13 + movq %r14,%rcx + vpalignr $8,%xmm3,%xmm4,%xmm11 + movq %r11,%r12 + shrdq $5,%r14,%r14 + vpsrlq $1,%xmm8,%xmm10 + xorq %r10,%r13 + xorq %rax,%r12 + vpaddq %xmm11,%xmm7,%xmm7 + shrdq $4,%r13,%r13 + xorq %rcx,%r14 + vpsrlq $7,%xmm8,%xmm11 + andq %r10,%r12 + xorq %r10,%r13 + vpsllq $56,%xmm8,%xmm9 + addq 112(%rsp),%rbx + movq %rcx,%r15 + vpxor %xmm10,%xmm11,%xmm8 + xorq %rax,%r12 + shrdq $6,%r14,%r14 + vpsrlq $7,%xmm10,%xmm10 + xorq %rdx,%r15 + addq %r12,%rbx + vpxor %xmm9,%xmm8,%xmm8 + shrdq $14,%r13,%r13 + andq %r15,%rdi + vpsllq $7,%xmm9,%xmm9 + xorq %rcx,%r14 + addq %r13,%rbx + vpxor %xmm10,%xmm8,%xmm8 + xorq %rdx,%rdi + shrdq $28,%r14,%r14 + vpsrlq $6,%xmm6,%xmm11 + addq %rbx,%r9 + addq %rdi,%rbx + vpxor %xmm9,%xmm8,%xmm8 + movq %r9,%r13 + addq %rbx,%r14 + vpsllq $3,%xmm6,%xmm10 + shrdq $23,%r13,%r13 + movq %r14,%rbx + vpaddq %xmm8,%xmm7,%xmm7 + movq %r10,%r12 + shrdq $5,%r14,%r14 + vpsrlq $19,%xmm6,%xmm9 + xorq %r9,%r13 + xorq %r11,%r12 + vpxor %xmm10,%xmm11,%xmm11 + shrdq $4,%r13,%r13 + xorq %rbx,%r14 + vpsllq $42,%xmm10,%xmm10 + andq %r9,%r12 + xorq %r9,%r13 + vpxor %xmm9,%xmm11,%xmm11 + addq 120(%rsp),%rax + movq %rbx,%rdi + vpsrlq $42,%xmm9,%xmm9 + xorq %r11,%r12 + shrdq $6,%r14,%r14 + vpxor %xmm10,%xmm11,%xmm11 + xorq %rcx,%rdi + addq %r12,%rax + vpxor %xmm9,%xmm11,%xmm11 + shrdq $14,%r13,%r13 + andq %rdi,%r15 + vpaddq %xmm11,%xmm7,%xmm7 + xorq %rbx,%r14 + addq %r13,%rax + vpaddq 96(%rbp),%xmm7,%xmm10 + xorq %rcx,%r15 + shrdq $28,%r14,%r14 + addq %rax,%r8 + addq %r15,%rax + movq %r8,%r13 + addq %rax,%r14 + vmovdqa %xmm10,112(%rsp) + cmpb $0,135(%rbp) + jne L$avx_00_47 + shrdq $23,%r13,%r13 + movq %r14,%rax + movq %r9,%r12 + shrdq $5,%r14,%r14 + xorq %r8,%r13 + xorq %r10,%r12 + shrdq $4,%r13,%r13 + xorq %rax,%r14 + andq %r8,%r12 + xorq %r8,%r13 + addq 0(%rsp),%r11 + movq %rax,%r15 + xorq %r10,%r12 + shrdq $6,%r14,%r14 + xorq %rbx,%r15 + addq %r12,%r11 + shrdq $14,%r13,%r13 + andq %r15,%rdi + xorq %rax,%r14 + addq %r13,%r11 + xorq %rbx,%rdi + shrdq $28,%r14,%r14 + addq %r11,%rdx + addq %rdi,%r11 + movq %rdx,%r13 + addq %r11,%r14 + shrdq $23,%r13,%r13 + movq %r14,%r11 + movq %r8,%r12 + shrdq $5,%r14,%r14 + xorq %rdx,%r13 + xorq %r9,%r12 + shrdq $4,%r13,%r13 + xorq %r11,%r14 + andq %rdx,%r12 + xorq %rdx,%r13 + addq 8(%rsp),%r10 + movq %r11,%rdi + xorq %r9,%r12 + shrdq $6,%r14,%r14 + xorq %rax,%rdi + addq %r12,%r10 + shrdq $14,%r13,%r13 + andq %rdi,%r15 + xorq %r11,%r14 + addq %r13,%r10 + xorq %rax,%r15 + shrdq $28,%r14,%r14 + addq %r10,%rcx + addq %r15,%r10 + movq %rcx,%r13 + addq %r10,%r14 + shrdq $23,%r13,%r13 + movq %r14,%r10 + movq %rdx,%r12 + shrdq $5,%r14,%r14 + xorq %rcx,%r13 + xorq %r8,%r12 + shrdq $4,%r13,%r13 + xorq %r10,%r14 + andq %rcx,%r12 + xorq %rcx,%r13 + addq 16(%rsp),%r9 + movq %r10,%r15 + xorq %r8,%r12 + shrdq $6,%r14,%r14 + xorq %r11,%r15 + addq %r12,%r9 + shrdq $14,%r13,%r13 + andq %r15,%rdi + xorq %r10,%r14 + addq %r13,%r9 + xorq %r11,%rdi + shrdq $28,%r14,%r14 + addq %r9,%rbx + addq %rdi,%r9 + movq %rbx,%r13 + addq %r9,%r14 + shrdq $23,%r13,%r13 + movq %r14,%r9 + movq %rcx,%r12 + shrdq $5,%r14,%r14 + xorq %rbx,%r13 + xorq %rdx,%r12 + shrdq $4,%r13,%r13 + xorq %r9,%r14 + andq %rbx,%r12 + xorq %rbx,%r13 + addq 24(%rsp),%r8 + movq %r9,%rdi + xorq %rdx,%r12 + shrdq $6,%r14,%r14 + xorq %r10,%rdi + addq %r12,%r8 + shrdq $14,%r13,%r13 + andq %rdi,%r15 + xorq %r9,%r14 + addq %r13,%r8 + xorq %r10,%r15 + shrdq $28,%r14,%r14 + addq %r8,%rax + addq %r15,%r8 + movq %rax,%r13 + addq %r8,%r14 + shrdq $23,%r13,%r13 + movq %r14,%r8 + movq %rbx,%r12 + shrdq $5,%r14,%r14 + xorq %rax,%r13 + xorq %rcx,%r12 + shrdq $4,%r13,%r13 + xorq %r8,%r14 + andq %rax,%r12 + xorq %rax,%r13 + addq 32(%rsp),%rdx + movq %r8,%r15 + xorq %rcx,%r12 + shrdq $6,%r14,%r14 + xorq %r9,%r15 + addq %r12,%rdx + shrdq $14,%r13,%r13 + andq %r15,%rdi + xorq %r8,%r14 + addq %r13,%rdx + xorq %r9,%rdi + shrdq $28,%r14,%r14 + addq %rdx,%r11 + addq %rdi,%rdx + movq %r11,%r13 + addq %rdx,%r14 + shrdq $23,%r13,%r13 + movq %r14,%rdx + movq %rax,%r12 + shrdq $5,%r14,%r14 + xorq %r11,%r13 + xorq %rbx,%r12 + shrdq $4,%r13,%r13 + xorq %rdx,%r14 + andq %r11,%r12 + xorq %r11,%r13 + addq 40(%rsp),%rcx + movq %rdx,%rdi + xorq %rbx,%r12 + shrdq $6,%r14,%r14 + xorq %r8,%rdi + addq %r12,%rcx + shrdq $14,%r13,%r13 + andq %rdi,%r15 + xorq %rdx,%r14 + addq %r13,%rcx + xorq %r8,%r15 + shrdq $28,%r14,%r14 + addq %rcx,%r10 + addq %r15,%rcx + movq %r10,%r13 + addq %rcx,%r14 + shrdq $23,%r13,%r13 + movq %r14,%rcx + movq %r11,%r12 + shrdq $5,%r14,%r14 + xorq %r10,%r13 + xorq %rax,%r12 + shrdq $4,%r13,%r13 + xorq %rcx,%r14 + andq %r10,%r12 + xorq %r10,%r13 + addq 48(%rsp),%rbx + movq %rcx,%r15 + xorq %rax,%r12 + shrdq $6,%r14,%r14 + xorq %rdx,%r15 + addq %r12,%rbx + shrdq $14,%r13,%r13 + andq %r15,%rdi + xorq %rcx,%r14 + addq %r13,%rbx + xorq %rdx,%rdi + shrdq $28,%r14,%r14 + addq %rbx,%r9 + addq %rdi,%rbx + movq %r9,%r13 + addq %rbx,%r14 + shrdq $23,%r13,%r13 + movq %r14,%rbx + movq %r10,%r12 + shrdq $5,%r14,%r14 + xorq %r9,%r13 + xorq %r11,%r12 + shrdq $4,%r13,%r13 + xorq %rbx,%r14 + andq %r9,%r12 + xorq %r9,%r13 + addq 56(%rsp),%rax + movq %rbx,%rdi + xorq %r11,%r12 + shrdq $6,%r14,%r14 + xorq %rcx,%rdi + addq %r12,%rax + shrdq $14,%r13,%r13 + andq %rdi,%r15 + xorq %rbx,%r14 + addq %r13,%rax + xorq %rcx,%r15 + shrdq $28,%r14,%r14 + addq %rax,%r8 + addq %r15,%rax + movq %r8,%r13 + addq %rax,%r14 + shrdq $23,%r13,%r13 + movq %r14,%rax + movq %r9,%r12 + shrdq $5,%r14,%r14 + xorq %r8,%r13 + xorq %r10,%r12 + shrdq $4,%r13,%r13 + xorq %rax,%r14 + andq %r8,%r12 + xorq %r8,%r13 + addq 64(%rsp),%r11 + movq %rax,%r15 + xorq %r10,%r12 + shrdq $6,%r14,%r14 + xorq %rbx,%r15 + addq %r12,%r11 + shrdq $14,%r13,%r13 + andq %r15,%rdi + xorq %rax,%r14 + addq %r13,%r11 + xorq %rbx,%rdi + shrdq $28,%r14,%r14 + addq %r11,%rdx + addq %rdi,%r11 + movq %rdx,%r13 + addq %r11,%r14 + shrdq $23,%r13,%r13 + movq %r14,%r11 + movq %r8,%r12 + shrdq $5,%r14,%r14 + xorq %rdx,%r13 + xorq %r9,%r12 + shrdq $4,%r13,%r13 + xorq %r11,%r14 + andq %rdx,%r12 + xorq %rdx,%r13 + addq 72(%rsp),%r10 + movq %r11,%rdi + xorq %r9,%r12 + shrdq $6,%r14,%r14 + xorq %rax,%rdi + addq %r12,%r10 + shrdq $14,%r13,%r13 + andq %rdi,%r15 + xorq %r11,%r14 + addq %r13,%r10 + xorq %rax,%r15 + shrdq $28,%r14,%r14 + addq %r10,%rcx + addq %r15,%r10 + movq %rcx,%r13 + addq %r10,%r14 + shrdq $23,%r13,%r13 + movq %r14,%r10 + movq %rdx,%r12 + shrdq $5,%r14,%r14 + xorq %rcx,%r13 + xorq %r8,%r12 + shrdq $4,%r13,%r13 + xorq %r10,%r14 + andq %rcx,%r12 + xorq %rcx,%r13 + addq 80(%rsp),%r9 + movq %r10,%r15 + xorq %r8,%r12 + shrdq $6,%r14,%r14 + xorq %r11,%r15 + addq %r12,%r9 + shrdq $14,%r13,%r13 + andq %r15,%rdi + xorq %r10,%r14 + addq %r13,%r9 + xorq %r11,%rdi + shrdq $28,%r14,%r14 + addq %r9,%rbx + addq %rdi,%r9 + movq %rbx,%r13 + addq %r9,%r14 + shrdq $23,%r13,%r13 + movq %r14,%r9 + movq %rcx,%r12 + shrdq $5,%r14,%r14 + xorq %rbx,%r13 + xorq %rdx,%r12 + shrdq $4,%r13,%r13 + xorq %r9,%r14 + andq %rbx,%r12 + xorq %rbx,%r13 + addq 88(%rsp),%r8 + movq %r9,%rdi + xorq %rdx,%r12 + shrdq $6,%r14,%r14 + xorq %r10,%rdi + addq %r12,%r8 + shrdq $14,%r13,%r13 + andq %rdi,%r15 + xorq %r9,%r14 + addq %r13,%r8 + xorq %r10,%r15 + shrdq $28,%r14,%r14 + addq %r8,%rax + addq %r15,%r8 + movq %rax,%r13 + addq %r8,%r14 + shrdq $23,%r13,%r13 + movq %r14,%r8 + movq %rbx,%r12 + shrdq $5,%r14,%r14 + xorq %rax,%r13 + xorq %rcx,%r12 + shrdq $4,%r13,%r13 + xorq %r8,%r14 + andq %rax,%r12 + xorq %rax,%r13 + addq 96(%rsp),%rdx + movq %r8,%r15 + xorq %rcx,%r12 + shrdq $6,%r14,%r14 + xorq %r9,%r15 + addq %r12,%rdx + shrdq $14,%r13,%r13 + andq %r15,%rdi + xorq %r8,%r14 + addq %r13,%rdx + xorq %r9,%rdi + shrdq $28,%r14,%r14 + addq %rdx,%r11 + addq %rdi,%rdx + movq %r11,%r13 + addq %rdx,%r14 + shrdq $23,%r13,%r13 + movq %r14,%rdx + movq %rax,%r12 + shrdq $5,%r14,%r14 + xorq %r11,%r13 + xorq %rbx,%r12 + shrdq $4,%r13,%r13 + xorq %rdx,%r14 + andq %r11,%r12 + xorq %r11,%r13 + addq 104(%rsp),%rcx + movq %rdx,%rdi + xorq %rbx,%r12 + shrdq $6,%r14,%r14 + xorq %r8,%rdi + addq %r12,%rcx + shrdq $14,%r13,%r13 + andq %rdi,%r15 + xorq %rdx,%r14 + addq %r13,%rcx + xorq %r8,%r15 + shrdq $28,%r14,%r14 + addq %rcx,%r10 + addq %r15,%rcx + movq %r10,%r13 + addq %rcx,%r14 + shrdq $23,%r13,%r13 + movq %r14,%rcx + movq %r11,%r12 + shrdq $5,%r14,%r14 + xorq %r10,%r13 + xorq %rax,%r12 + shrdq $4,%r13,%r13 + xorq %rcx,%r14 + andq %r10,%r12 + xorq %r10,%r13 + addq 112(%rsp),%rbx + movq %rcx,%r15 + xorq %rax,%r12 + shrdq $6,%r14,%r14 + xorq %rdx,%r15 + addq %r12,%rbx + shrdq $14,%r13,%r13 + andq %r15,%rdi + xorq %rcx,%r14 + addq %r13,%rbx + xorq %rdx,%rdi + shrdq $28,%r14,%r14 + addq %rbx,%r9 + addq %rdi,%rbx + movq %r9,%r13 + addq %rbx,%r14 + shrdq $23,%r13,%r13 + movq %r14,%rbx + movq %r10,%r12 + shrdq $5,%r14,%r14 + xorq %r9,%r13 + xorq %r11,%r12 + shrdq $4,%r13,%r13 + xorq %rbx,%r14 + andq %r9,%r12 + xorq %r9,%r13 + addq 120(%rsp),%rax + movq %rbx,%rdi + xorq %r11,%r12 + shrdq $6,%r14,%r14 + xorq %rcx,%rdi + addq %r12,%rax + shrdq $14,%r13,%r13 + andq %rdi,%r15 + xorq %rbx,%r14 + addq %r13,%rax + xorq %rcx,%r15 + shrdq $28,%r14,%r14 + addq %rax,%r8 + addq %r15,%rax + movq %r8,%r13 + addq %rax,%r14 + movq 128+0(%rsp),%rdi + movq %r14,%rax + + addq 0(%rdi),%rax + leaq 128(%rsi),%rsi + addq 8(%rdi),%rbx + addq 16(%rdi),%rcx + addq 24(%rdi),%rdx + addq 32(%rdi),%r8 + addq 40(%rdi),%r9 + addq 48(%rdi),%r10 + addq 56(%rdi),%r11 + + cmpq 128+16(%rsp),%rsi + + movq %rax,0(%rdi) + movq %rbx,8(%rdi) + movq %rcx,16(%rdi) + movq %rdx,24(%rdi) + movq %r8,32(%rdi) + movq %r9,40(%rdi) + movq %r10,48(%rdi) + movq %r11,56(%rdi) + jb L$loop_avx + + movq 128+24(%rsp),%rsi + vzeroupper + movq (%rsi),%r15 + movq 8(%rsi),%r14 + movq 16(%rsi),%r13 + movq 24(%rsi),%r12 + movq 32(%rsi),%rbp + movq 40(%rsi),%rbx + leaq 48(%rsi),%rsp +L$epilogue_avx: + .byte 0xf3,0xc3 + + +.p2align 6 +sha512_block_data_order_avx2: +L$avx2_shortcut: + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + movq %rsp,%r11 + subq $1312,%rsp + shlq $4,%rdx + andq $-2048,%rsp + leaq (%rsi,%rdx,8),%rdx + addq $1152,%rsp + movq %rdi,128+0(%rsp) + movq %rsi,128+8(%rsp) + movq %rdx,128+16(%rsp) + movq %r11,128+24(%rsp) +L$prologue_avx2: + + vzeroupper + subq $-128,%rsi + movq 0(%rdi),%rax + movq %rsi,%r12 + movq 8(%rdi),%rbx + cmpq %rdx,%rsi + movq 16(%rdi),%rcx + cmoveq %rsp,%r12 + movq 24(%rdi),%rdx + movq 32(%rdi),%r8 + movq 40(%rdi),%r9 + movq 48(%rdi),%r10 + movq 56(%rdi),%r11 + jmp L$oop_avx2 +.p2align 4 +L$oop_avx2: + vmovdqu -128(%rsi),%xmm0 + vmovdqu -128+16(%rsi),%xmm1 + vmovdqu -128+32(%rsi),%xmm2 + leaq K512+128(%rip),%rbp + vmovdqu -128+48(%rsi),%xmm3 + vmovdqu -128+64(%rsi),%xmm4 + vmovdqu -128+80(%rsi),%xmm5 + vmovdqu -128+96(%rsi),%xmm6 + vmovdqu -128+112(%rsi),%xmm7 + + vmovdqa 1152(%rbp),%ymm10 + vinserti128 $1,(%r12),%ymm0,%ymm0 + vinserti128 $1,16(%r12),%ymm1,%ymm1 + vpshufb %ymm10,%ymm0,%ymm0 + vinserti128 $1,32(%r12),%ymm2,%ymm2 + vpshufb %ymm10,%ymm1,%ymm1 + vinserti128 $1,48(%r12),%ymm3,%ymm3 + vpshufb %ymm10,%ymm2,%ymm2 + vinserti128 $1,64(%r12),%ymm4,%ymm4 + vpshufb %ymm10,%ymm3,%ymm3 + vinserti128 $1,80(%r12),%ymm5,%ymm5 + vpshufb %ymm10,%ymm4,%ymm4 + vinserti128 $1,96(%r12),%ymm6,%ymm6 + vpshufb %ymm10,%ymm5,%ymm5 + vinserti128 $1,112(%r12),%ymm7,%ymm7 + + vpaddq -128(%rbp),%ymm0,%ymm8 + vpshufb %ymm10,%ymm6,%ymm6 + vpaddq -96(%rbp),%ymm1,%ymm9 + vpshufb %ymm10,%ymm7,%ymm7 + vpaddq -64(%rbp),%ymm2,%ymm10 + vpaddq -32(%rbp),%ymm3,%ymm11 + vmovdqa %ymm8,0(%rsp) + vpaddq 0(%rbp),%ymm4,%ymm8 + vmovdqa %ymm9,32(%rsp) + vpaddq 32(%rbp),%ymm5,%ymm9 + vmovdqa %ymm10,64(%rsp) + vpaddq 64(%rbp),%ymm6,%ymm10 + vmovdqa %ymm11,96(%rsp) + leaq -128(%rsp),%rsp + vpaddq 96(%rbp),%ymm7,%ymm11 + vmovdqa %ymm8,0(%rsp) + xorq %r14,%r14 + vmovdqa %ymm9,32(%rsp) + movq %rbx,%rdi + vmovdqa %ymm10,64(%rsp) + xorq %rcx,%rdi + vmovdqa %ymm11,96(%rsp) + movq %r9,%r12 + addq $32*8,%rbp + jmp L$avx2_00_47 + +.p2align 4 +L$avx2_00_47: + leaq -128(%rsp),%rsp + vpalignr $8,%ymm0,%ymm1,%ymm8 + addq 0+256(%rsp),%r11 + andq %r8,%r12 + rorxq $41,%r8,%r13 + vpalignr $8,%ymm4,%ymm5,%ymm11 + rorxq $18,%r8,%r15 + leaq (%rax,%r14,1),%rax + leaq (%r11,%r12,1),%r11 + vpsrlq $1,%ymm8,%ymm10 + andnq %r10,%r8,%r12 + xorq %r15,%r13 + rorxq $14,%r8,%r14 + vpaddq %ymm11,%ymm0,%ymm0 + vpsrlq $7,%ymm8,%ymm11 + leaq (%r11,%r12,1),%r11 + xorq %r14,%r13 + movq %rax,%r15 + vpsllq $56,%ymm8,%ymm9 + vpxor %ymm10,%ymm11,%ymm8 + rorxq $39,%rax,%r12 + leaq (%r11,%r13,1),%r11 + xorq %rbx,%r15 + vpsrlq $7,%ymm10,%ymm10 + vpxor %ymm9,%ymm8,%ymm8 + rorxq $34,%rax,%r14 + rorxq $28,%rax,%r13 + leaq (%rdx,%r11,1),%rdx + vpsllq $7,%ymm9,%ymm9 + vpxor %ymm10,%ymm8,%ymm8 + andq %r15,%rdi + xorq %r12,%r14 + xorq %rbx,%rdi + vpsrlq $6,%ymm7,%ymm11 + vpxor %ymm9,%ymm8,%ymm8 + xorq %r13,%r14 + leaq (%r11,%rdi,1),%r11 + movq %r8,%r12 + vpsllq $3,%ymm7,%ymm10 + vpaddq %ymm8,%ymm0,%ymm0 + addq 8+256(%rsp),%r10 + andq %rdx,%r12 + rorxq $41,%rdx,%r13 + vpsrlq $19,%ymm7,%ymm9 + vpxor %ymm10,%ymm11,%ymm11 + rorxq $18,%rdx,%rdi + leaq (%r11,%r14,1),%r11 + leaq (%r10,%r12,1),%r10 + vpsllq $42,%ymm10,%ymm10 + vpxor %ymm9,%ymm11,%ymm11 + andnq %r9,%rdx,%r12 + xorq %rdi,%r13 + rorxq $14,%rdx,%r14 + vpsrlq $42,%ymm9,%ymm9 + vpxor %ymm10,%ymm11,%ymm11 + leaq (%r10,%r12,1),%r10 + xorq %r14,%r13 + movq %r11,%rdi + vpxor %ymm9,%ymm11,%ymm11 + rorxq $39,%r11,%r12 + leaq (%r10,%r13,1),%r10 + xorq %rax,%rdi + vpaddq %ymm11,%ymm0,%ymm0 + rorxq $34,%r11,%r14 + rorxq $28,%r11,%r13 + leaq (%rcx,%r10,1),%rcx + vpaddq -128(%rbp),%ymm0,%ymm10 + andq %rdi,%r15 + xorq %r12,%r14 + xorq %rax,%r15 + xorq %r13,%r14 + leaq (%r10,%r15,1),%r10 + movq %rdx,%r12 + vmovdqa %ymm10,0(%rsp) + vpalignr $8,%ymm1,%ymm2,%ymm8 + addq 32+256(%rsp),%r9 + andq %rcx,%r12 + rorxq $41,%rcx,%r13 + vpalignr $8,%ymm5,%ymm6,%ymm11 + rorxq $18,%rcx,%r15 + leaq (%r10,%r14,1),%r10 + leaq (%r9,%r12,1),%r9 + vpsrlq $1,%ymm8,%ymm10 + andnq %r8,%rcx,%r12 + xorq %r15,%r13 + rorxq $14,%rcx,%r14 + vpaddq %ymm11,%ymm1,%ymm1 + vpsrlq $7,%ymm8,%ymm11 + leaq (%r9,%r12,1),%r9 + xorq %r14,%r13 + movq %r10,%r15 + vpsllq $56,%ymm8,%ymm9 + vpxor %ymm10,%ymm11,%ymm8 + rorxq $39,%r10,%r12 + leaq (%r9,%r13,1),%r9 + xorq %r11,%r15 + vpsrlq $7,%ymm10,%ymm10 + vpxor %ymm9,%ymm8,%ymm8 + rorxq $34,%r10,%r14 + rorxq $28,%r10,%r13 + leaq (%rbx,%r9,1),%rbx + vpsllq $7,%ymm9,%ymm9 + vpxor %ymm10,%ymm8,%ymm8 + andq %r15,%rdi + xorq %r12,%r14 + xorq %r11,%rdi + vpsrlq $6,%ymm0,%ymm11 + vpxor %ymm9,%ymm8,%ymm8 + xorq %r13,%r14 + leaq (%r9,%rdi,1),%r9 + movq %rcx,%r12 + vpsllq $3,%ymm0,%ymm10 + vpaddq %ymm8,%ymm1,%ymm1 + addq 40+256(%rsp),%r8 + andq %rbx,%r12 + rorxq $41,%rbx,%r13 + vpsrlq $19,%ymm0,%ymm9 + vpxor %ymm10,%ymm11,%ymm11 + rorxq $18,%rbx,%rdi + leaq (%r9,%r14,1),%r9 + leaq (%r8,%r12,1),%r8 + vpsllq $42,%ymm10,%ymm10 + vpxor %ymm9,%ymm11,%ymm11 + andnq %rdx,%rbx,%r12 + xorq %rdi,%r13 + rorxq $14,%rbx,%r14 + vpsrlq $42,%ymm9,%ymm9 + vpxor %ymm10,%ymm11,%ymm11 + leaq (%r8,%r12,1),%r8 + xorq %r14,%r13 + movq %r9,%rdi + vpxor %ymm9,%ymm11,%ymm11 + rorxq $39,%r9,%r12 + leaq (%r8,%r13,1),%r8 + xorq %r10,%rdi + vpaddq %ymm11,%ymm1,%ymm1 + rorxq $34,%r9,%r14 + rorxq $28,%r9,%r13 + leaq (%rax,%r8,1),%rax + vpaddq -96(%rbp),%ymm1,%ymm10 + andq %rdi,%r15 + xorq %r12,%r14 + xorq %r10,%r15 + xorq %r13,%r14 + leaq (%r8,%r15,1),%r8 + movq %rbx,%r12 + vmovdqa %ymm10,32(%rsp) + vpalignr $8,%ymm2,%ymm3,%ymm8 + addq 64+256(%rsp),%rdx + andq %rax,%r12 + rorxq $41,%rax,%r13 + vpalignr $8,%ymm6,%ymm7,%ymm11 + rorxq $18,%rax,%r15 + leaq (%r8,%r14,1),%r8 + leaq (%rdx,%r12,1),%rdx + vpsrlq $1,%ymm8,%ymm10 + andnq %rcx,%rax,%r12 + xorq %r15,%r13 + rorxq $14,%rax,%r14 + vpaddq %ymm11,%ymm2,%ymm2 + vpsrlq $7,%ymm8,%ymm11 + leaq (%rdx,%r12,1),%rdx + xorq %r14,%r13 + movq %r8,%r15 + vpsllq $56,%ymm8,%ymm9 + vpxor %ymm10,%ymm11,%ymm8 + rorxq $39,%r8,%r12 + leaq (%rdx,%r13,1),%rdx + xorq %r9,%r15 + vpsrlq $7,%ymm10,%ymm10 + vpxor %ymm9,%ymm8,%ymm8 + rorxq $34,%r8,%r14 + rorxq $28,%r8,%r13 + leaq (%r11,%rdx,1),%r11 + vpsllq $7,%ymm9,%ymm9 + vpxor %ymm10,%ymm8,%ymm8 + andq %r15,%rdi + xorq %r12,%r14 + xorq %r9,%rdi + vpsrlq $6,%ymm1,%ymm11 + vpxor %ymm9,%ymm8,%ymm8 + xorq %r13,%r14 + leaq (%rdx,%rdi,1),%rdx + movq %rax,%r12 + vpsllq $3,%ymm1,%ymm10 + vpaddq %ymm8,%ymm2,%ymm2 + addq 72+256(%rsp),%rcx + andq %r11,%r12 + rorxq $41,%r11,%r13 + vpsrlq $19,%ymm1,%ymm9 + vpxor %ymm10,%ymm11,%ymm11 + rorxq $18,%r11,%rdi + leaq (%rdx,%r14,1),%rdx + leaq (%rcx,%r12,1),%rcx + vpsllq $42,%ymm10,%ymm10 + vpxor %ymm9,%ymm11,%ymm11 + andnq %rbx,%r11,%r12 + xorq %rdi,%r13 + rorxq $14,%r11,%r14 + vpsrlq $42,%ymm9,%ymm9 + vpxor %ymm10,%ymm11,%ymm11 + leaq (%rcx,%r12,1),%rcx + xorq %r14,%r13 + movq %rdx,%rdi + vpxor %ymm9,%ymm11,%ymm11 + rorxq $39,%rdx,%r12 + leaq (%rcx,%r13,1),%rcx + xorq %r8,%rdi + vpaddq %ymm11,%ymm2,%ymm2 + rorxq $34,%rdx,%r14 + rorxq $28,%rdx,%r13 + leaq (%r10,%rcx,1),%r10 + vpaddq -64(%rbp),%ymm2,%ymm10 + andq %rdi,%r15 + xorq %r12,%r14 + xorq %r8,%r15 + xorq %r13,%r14 + leaq (%rcx,%r15,1),%rcx + movq %r11,%r12 + vmovdqa %ymm10,64(%rsp) + vpalignr $8,%ymm3,%ymm4,%ymm8 + addq 96+256(%rsp),%rbx + andq %r10,%r12 + rorxq $41,%r10,%r13 + vpalignr $8,%ymm7,%ymm0,%ymm11 + rorxq $18,%r10,%r15 + leaq (%rcx,%r14,1),%rcx + leaq (%rbx,%r12,1),%rbx + vpsrlq $1,%ymm8,%ymm10 + andnq %rax,%r10,%r12 + xorq %r15,%r13 + rorxq $14,%r10,%r14 + vpaddq %ymm11,%ymm3,%ymm3 + vpsrlq $7,%ymm8,%ymm11 + leaq (%rbx,%r12,1),%rbx + xorq %r14,%r13 + movq %rcx,%r15 + vpsllq $56,%ymm8,%ymm9 + vpxor %ymm10,%ymm11,%ymm8 + rorxq $39,%rcx,%r12 + leaq (%rbx,%r13,1),%rbx + xorq %rdx,%r15 + vpsrlq $7,%ymm10,%ymm10 + vpxor %ymm9,%ymm8,%ymm8 + rorxq $34,%rcx,%r14 + rorxq $28,%rcx,%r13 + leaq (%r9,%rbx,1),%r9 + vpsllq $7,%ymm9,%ymm9 + vpxor %ymm10,%ymm8,%ymm8 + andq %r15,%rdi + xorq %r12,%r14 + xorq %rdx,%rdi + vpsrlq $6,%ymm2,%ymm11 + vpxor %ymm9,%ymm8,%ymm8 + xorq %r13,%r14 + leaq (%rbx,%rdi,1),%rbx + movq %r10,%r12 + vpsllq $3,%ymm2,%ymm10 + vpaddq %ymm8,%ymm3,%ymm3 + addq 104+256(%rsp),%rax + andq %r9,%r12 + rorxq $41,%r9,%r13 + vpsrlq $19,%ymm2,%ymm9 + vpxor %ymm10,%ymm11,%ymm11 + rorxq $18,%r9,%rdi + leaq (%rbx,%r14,1),%rbx + leaq (%rax,%r12,1),%rax + vpsllq $42,%ymm10,%ymm10 + vpxor %ymm9,%ymm11,%ymm11 + andnq %r11,%r9,%r12 + xorq %rdi,%r13 + rorxq $14,%r9,%r14 + vpsrlq $42,%ymm9,%ymm9 + vpxor %ymm10,%ymm11,%ymm11 + leaq (%rax,%r12,1),%rax + xorq %r14,%r13 + movq %rbx,%rdi + vpxor %ymm9,%ymm11,%ymm11 + rorxq $39,%rbx,%r12 + leaq (%rax,%r13,1),%rax + xorq %rcx,%rdi + vpaddq %ymm11,%ymm3,%ymm3 + rorxq $34,%rbx,%r14 + rorxq $28,%rbx,%r13 + leaq (%r8,%rax,1),%r8 + vpaddq -32(%rbp),%ymm3,%ymm10 + andq %rdi,%r15 + xorq %r12,%r14 + xorq %rcx,%r15 + xorq %r13,%r14 + leaq (%rax,%r15,1),%rax + movq %r9,%r12 + vmovdqa %ymm10,96(%rsp) + leaq -128(%rsp),%rsp + vpalignr $8,%ymm4,%ymm5,%ymm8 + addq 0+256(%rsp),%r11 + andq %r8,%r12 + rorxq $41,%r8,%r13 + vpalignr $8,%ymm0,%ymm1,%ymm11 + rorxq $18,%r8,%r15 + leaq (%rax,%r14,1),%rax + leaq (%r11,%r12,1),%r11 + vpsrlq $1,%ymm8,%ymm10 + andnq %r10,%r8,%r12 + xorq %r15,%r13 + rorxq $14,%r8,%r14 + vpaddq %ymm11,%ymm4,%ymm4 + vpsrlq $7,%ymm8,%ymm11 + leaq (%r11,%r12,1),%r11 + xorq %r14,%r13 + movq %rax,%r15 + vpsllq $56,%ymm8,%ymm9 + vpxor %ymm10,%ymm11,%ymm8 + rorxq $39,%rax,%r12 + leaq (%r11,%r13,1),%r11 + xorq %rbx,%r15 + vpsrlq $7,%ymm10,%ymm10 + vpxor %ymm9,%ymm8,%ymm8 + rorxq $34,%rax,%r14 + rorxq $28,%rax,%r13 + leaq (%rdx,%r11,1),%rdx + vpsllq $7,%ymm9,%ymm9 + vpxor %ymm10,%ymm8,%ymm8 + andq %r15,%rdi + xorq %r12,%r14 + xorq %rbx,%rdi + vpsrlq $6,%ymm3,%ymm11 + vpxor %ymm9,%ymm8,%ymm8 + xorq %r13,%r14 + leaq (%r11,%rdi,1),%r11 + movq %r8,%r12 + vpsllq $3,%ymm3,%ymm10 + vpaddq %ymm8,%ymm4,%ymm4 + addq 8+256(%rsp),%r10 + andq %rdx,%r12 + rorxq $41,%rdx,%r13 + vpsrlq $19,%ymm3,%ymm9 + vpxor %ymm10,%ymm11,%ymm11 + rorxq $18,%rdx,%rdi + leaq (%r11,%r14,1),%r11 + leaq (%r10,%r12,1),%r10 + vpsllq $42,%ymm10,%ymm10 + vpxor %ymm9,%ymm11,%ymm11 + andnq %r9,%rdx,%r12 + xorq %rdi,%r13 + rorxq $14,%rdx,%r14 + vpsrlq $42,%ymm9,%ymm9 + vpxor %ymm10,%ymm11,%ymm11 + leaq (%r10,%r12,1),%r10 + xorq %r14,%r13 + movq %r11,%rdi + vpxor %ymm9,%ymm11,%ymm11 + rorxq $39,%r11,%r12 + leaq (%r10,%r13,1),%r10 + xorq %rax,%rdi + vpaddq %ymm11,%ymm4,%ymm4 + rorxq $34,%r11,%r14 + rorxq $28,%r11,%r13 + leaq (%rcx,%r10,1),%rcx + vpaddq 0(%rbp),%ymm4,%ymm10 + andq %rdi,%r15 + xorq %r12,%r14 + xorq %rax,%r15 + xorq %r13,%r14 + leaq (%r10,%r15,1),%r10 + movq %rdx,%r12 + vmovdqa %ymm10,0(%rsp) + vpalignr $8,%ymm5,%ymm6,%ymm8 + addq 32+256(%rsp),%r9 + andq %rcx,%r12 + rorxq $41,%rcx,%r13 + vpalignr $8,%ymm1,%ymm2,%ymm11 + rorxq $18,%rcx,%r15 + leaq (%r10,%r14,1),%r10 + leaq (%r9,%r12,1),%r9 + vpsrlq $1,%ymm8,%ymm10 + andnq %r8,%rcx,%r12 + xorq %r15,%r13 + rorxq $14,%rcx,%r14 + vpaddq %ymm11,%ymm5,%ymm5 + vpsrlq $7,%ymm8,%ymm11 + leaq (%r9,%r12,1),%r9 + xorq %r14,%r13 + movq %r10,%r15 + vpsllq $56,%ymm8,%ymm9 + vpxor %ymm10,%ymm11,%ymm8 + rorxq $39,%r10,%r12 + leaq (%r9,%r13,1),%r9 + xorq %r11,%r15 + vpsrlq $7,%ymm10,%ymm10 + vpxor %ymm9,%ymm8,%ymm8 + rorxq $34,%r10,%r14 + rorxq $28,%r10,%r13 + leaq (%rbx,%r9,1),%rbx + vpsllq $7,%ymm9,%ymm9 + vpxor %ymm10,%ymm8,%ymm8 + andq %r15,%rdi + xorq %r12,%r14 + xorq %r11,%rdi + vpsrlq $6,%ymm4,%ymm11 + vpxor %ymm9,%ymm8,%ymm8 + xorq %r13,%r14 + leaq (%r9,%rdi,1),%r9 + movq %rcx,%r12 + vpsllq $3,%ymm4,%ymm10 + vpaddq %ymm8,%ymm5,%ymm5 + addq 40+256(%rsp),%r8 + andq %rbx,%r12 + rorxq $41,%rbx,%r13 + vpsrlq $19,%ymm4,%ymm9 + vpxor %ymm10,%ymm11,%ymm11 + rorxq $18,%rbx,%rdi + leaq (%r9,%r14,1),%r9 + leaq (%r8,%r12,1),%r8 + vpsllq $42,%ymm10,%ymm10 + vpxor %ymm9,%ymm11,%ymm11 + andnq %rdx,%rbx,%r12 + xorq %rdi,%r13 + rorxq $14,%rbx,%r14 + vpsrlq $42,%ymm9,%ymm9 + vpxor %ymm10,%ymm11,%ymm11 + leaq (%r8,%r12,1),%r8 + xorq %r14,%r13 + movq %r9,%rdi + vpxor %ymm9,%ymm11,%ymm11 + rorxq $39,%r9,%r12 + leaq (%r8,%r13,1),%r8 + xorq %r10,%rdi + vpaddq %ymm11,%ymm5,%ymm5 + rorxq $34,%r9,%r14 + rorxq $28,%r9,%r13 + leaq (%rax,%r8,1),%rax + vpaddq 32(%rbp),%ymm5,%ymm10 + andq %rdi,%r15 + xorq %r12,%r14 + xorq %r10,%r15 + xorq %r13,%r14 + leaq (%r8,%r15,1),%r8 + movq %rbx,%r12 + vmovdqa %ymm10,32(%rsp) + vpalignr $8,%ymm6,%ymm7,%ymm8 + addq 64+256(%rsp),%rdx + andq %rax,%r12 + rorxq $41,%rax,%r13 + vpalignr $8,%ymm2,%ymm3,%ymm11 + rorxq $18,%rax,%r15 + leaq (%r8,%r14,1),%r8 + leaq (%rdx,%r12,1),%rdx + vpsrlq $1,%ymm8,%ymm10 + andnq %rcx,%rax,%r12 + xorq %r15,%r13 + rorxq $14,%rax,%r14 + vpaddq %ymm11,%ymm6,%ymm6 + vpsrlq $7,%ymm8,%ymm11 + leaq (%rdx,%r12,1),%rdx + xorq %r14,%r13 + movq %r8,%r15 + vpsllq $56,%ymm8,%ymm9 + vpxor %ymm10,%ymm11,%ymm8 + rorxq $39,%r8,%r12 + leaq (%rdx,%r13,1),%rdx + xorq %r9,%r15 + vpsrlq $7,%ymm10,%ymm10 + vpxor %ymm9,%ymm8,%ymm8 + rorxq $34,%r8,%r14 + rorxq $28,%r8,%r13 + leaq (%r11,%rdx,1),%r11 + vpsllq $7,%ymm9,%ymm9 + vpxor %ymm10,%ymm8,%ymm8 + andq %r15,%rdi + xorq %r12,%r14 + xorq %r9,%rdi + vpsrlq $6,%ymm5,%ymm11 + vpxor %ymm9,%ymm8,%ymm8 + xorq %r13,%r14 + leaq (%rdx,%rdi,1),%rdx + movq %rax,%r12 + vpsllq $3,%ymm5,%ymm10 + vpaddq %ymm8,%ymm6,%ymm6 + addq 72+256(%rsp),%rcx + andq %r11,%r12 + rorxq $41,%r11,%r13 + vpsrlq $19,%ymm5,%ymm9 + vpxor %ymm10,%ymm11,%ymm11 + rorxq $18,%r11,%rdi + leaq (%rdx,%r14,1),%rdx + leaq (%rcx,%r12,1),%rcx + vpsllq $42,%ymm10,%ymm10 + vpxor %ymm9,%ymm11,%ymm11 + andnq %rbx,%r11,%r12 + xorq %rdi,%r13 + rorxq $14,%r11,%r14 + vpsrlq $42,%ymm9,%ymm9 + vpxor %ymm10,%ymm11,%ymm11 + leaq (%rcx,%r12,1),%rcx + xorq %r14,%r13 + movq %rdx,%rdi + vpxor %ymm9,%ymm11,%ymm11 + rorxq $39,%rdx,%r12 + leaq (%rcx,%r13,1),%rcx + xorq %r8,%rdi + vpaddq %ymm11,%ymm6,%ymm6 + rorxq $34,%rdx,%r14 + rorxq $28,%rdx,%r13 + leaq (%r10,%rcx,1),%r10 + vpaddq 64(%rbp),%ymm6,%ymm10 + andq %rdi,%r15 + xorq %r12,%r14 + xorq %r8,%r15 + xorq %r13,%r14 + leaq (%rcx,%r15,1),%rcx + movq %r11,%r12 + vmovdqa %ymm10,64(%rsp) + vpalignr $8,%ymm7,%ymm0,%ymm8 + addq 96+256(%rsp),%rbx + andq %r10,%r12 + rorxq $41,%r10,%r13 + vpalignr $8,%ymm3,%ymm4,%ymm11 + rorxq $18,%r10,%r15 + leaq (%rcx,%r14,1),%rcx + leaq (%rbx,%r12,1),%rbx + vpsrlq $1,%ymm8,%ymm10 + andnq %rax,%r10,%r12 + xorq %r15,%r13 + rorxq $14,%r10,%r14 + vpaddq %ymm11,%ymm7,%ymm7 + vpsrlq $7,%ymm8,%ymm11 + leaq (%rbx,%r12,1),%rbx + xorq %r14,%r13 + movq %rcx,%r15 + vpsllq $56,%ymm8,%ymm9 + vpxor %ymm10,%ymm11,%ymm8 + rorxq $39,%rcx,%r12 + leaq (%rbx,%r13,1),%rbx + xorq %rdx,%r15 + vpsrlq $7,%ymm10,%ymm10 + vpxor %ymm9,%ymm8,%ymm8 + rorxq $34,%rcx,%r14 + rorxq $28,%rcx,%r13 + leaq (%r9,%rbx,1),%r9 + vpsllq $7,%ymm9,%ymm9 + vpxor %ymm10,%ymm8,%ymm8 + andq %r15,%rdi + xorq %r12,%r14 + xorq %rdx,%rdi + vpsrlq $6,%ymm6,%ymm11 + vpxor %ymm9,%ymm8,%ymm8 + xorq %r13,%r14 + leaq (%rbx,%rdi,1),%rbx + movq %r10,%r12 + vpsllq $3,%ymm6,%ymm10 + vpaddq %ymm8,%ymm7,%ymm7 + addq 104+256(%rsp),%rax + andq %r9,%r12 + rorxq $41,%r9,%r13 + vpsrlq $19,%ymm6,%ymm9 + vpxor %ymm10,%ymm11,%ymm11 + rorxq $18,%r9,%rdi + leaq (%rbx,%r14,1),%rbx + leaq (%rax,%r12,1),%rax + vpsllq $42,%ymm10,%ymm10 + vpxor %ymm9,%ymm11,%ymm11 + andnq %r11,%r9,%r12 + xorq %rdi,%r13 + rorxq $14,%r9,%r14 + vpsrlq $42,%ymm9,%ymm9 + vpxor %ymm10,%ymm11,%ymm11 + leaq (%rax,%r12,1),%rax + xorq %r14,%r13 + movq %rbx,%rdi + vpxor %ymm9,%ymm11,%ymm11 + rorxq $39,%rbx,%r12 + leaq (%rax,%r13,1),%rax + xorq %rcx,%rdi + vpaddq %ymm11,%ymm7,%ymm7 + rorxq $34,%rbx,%r14 + rorxq $28,%rbx,%r13 + leaq (%r8,%rax,1),%r8 + vpaddq 96(%rbp),%ymm7,%ymm10 + andq %rdi,%r15 + xorq %r12,%r14 + xorq %rcx,%r15 + xorq %r13,%r14 + leaq (%rax,%r15,1),%rax + movq %r9,%r12 + vmovdqa %ymm10,96(%rsp) + leaq 256(%rbp),%rbp + cmpb $0,-121(%rbp) + jne L$avx2_00_47 + addq 0+128(%rsp),%r11 + andq %r8,%r12 + rorxq $41,%r8,%r13 + rorxq $18,%r8,%r15 + leaq (%rax,%r14,1),%rax + leaq (%r11,%r12,1),%r11 + andnq %r10,%r8,%r12 + xorq %r15,%r13 + rorxq $14,%r8,%r14 + leaq (%r11,%r12,1),%r11 + xorq %r14,%r13 + movq %rax,%r15 + rorxq $39,%rax,%r12 + leaq (%r11,%r13,1),%r11 + xorq %rbx,%r15 + rorxq $34,%rax,%r14 + rorxq $28,%rax,%r13 + leaq (%rdx,%r11,1),%rdx + andq %r15,%rdi + xorq %r12,%r14 + xorq %rbx,%rdi + xorq %r13,%r14 + leaq (%r11,%rdi,1),%r11 + movq %r8,%r12 + addq 8+128(%rsp),%r10 + andq %rdx,%r12 + rorxq $41,%rdx,%r13 + rorxq $18,%rdx,%rdi + leaq (%r11,%r14,1),%r11 + leaq (%r10,%r12,1),%r10 + andnq %r9,%rdx,%r12 + xorq %rdi,%r13 + rorxq $14,%rdx,%r14 + leaq (%r10,%r12,1),%r10 + xorq %r14,%r13 + movq %r11,%rdi + rorxq $39,%r11,%r12 + leaq (%r10,%r13,1),%r10 + xorq %rax,%rdi + rorxq $34,%r11,%r14 + rorxq $28,%r11,%r13 + leaq (%rcx,%r10,1),%rcx + andq %rdi,%r15 + xorq %r12,%r14 + xorq %rax,%r15 + xorq %r13,%r14 + leaq (%r10,%r15,1),%r10 + movq %rdx,%r12 + addq 32+128(%rsp),%r9 + andq %rcx,%r12 + rorxq $41,%rcx,%r13 + rorxq $18,%rcx,%r15 + leaq (%r10,%r14,1),%r10 + leaq (%r9,%r12,1),%r9 + andnq %r8,%rcx,%r12 + xorq %r15,%r13 + rorxq $14,%rcx,%r14 + leaq (%r9,%r12,1),%r9 + xorq %r14,%r13 + movq %r10,%r15 + rorxq $39,%r10,%r12 + leaq (%r9,%r13,1),%r9 + xorq %r11,%r15 + rorxq $34,%r10,%r14 + rorxq $28,%r10,%r13 + leaq (%rbx,%r9,1),%rbx + andq %r15,%rdi + xorq %r12,%r14 + xorq %r11,%rdi + xorq %r13,%r14 + leaq (%r9,%rdi,1),%r9 + movq %rcx,%r12 + addq 40+128(%rsp),%r8 + andq %rbx,%r12 + rorxq $41,%rbx,%r13 + rorxq $18,%rbx,%rdi + leaq (%r9,%r14,1),%r9 + leaq (%r8,%r12,1),%r8 + andnq %rdx,%rbx,%r12 + xorq %rdi,%r13 + rorxq $14,%rbx,%r14 + leaq (%r8,%r12,1),%r8 + xorq %r14,%r13 + movq %r9,%rdi + rorxq $39,%r9,%r12 + leaq (%r8,%r13,1),%r8 + xorq %r10,%rdi + rorxq $34,%r9,%r14 + rorxq $28,%r9,%r13 + leaq (%rax,%r8,1),%rax + andq %rdi,%r15 + xorq %r12,%r14 + xorq %r10,%r15 + xorq %r13,%r14 + leaq (%r8,%r15,1),%r8 + movq %rbx,%r12 + addq 64+128(%rsp),%rdx + andq %rax,%r12 + rorxq $41,%rax,%r13 + rorxq $18,%rax,%r15 + leaq (%r8,%r14,1),%r8 + leaq (%rdx,%r12,1),%rdx + andnq %rcx,%rax,%r12 + xorq %r15,%r13 + rorxq $14,%rax,%r14 + leaq (%rdx,%r12,1),%rdx + xorq %r14,%r13 + movq %r8,%r15 + rorxq $39,%r8,%r12 + leaq (%rdx,%r13,1),%rdx + xorq %r9,%r15 + rorxq $34,%r8,%r14 + rorxq $28,%r8,%r13 + leaq (%r11,%rdx,1),%r11 + andq %r15,%rdi + xorq %r12,%r14 + xorq %r9,%rdi + xorq %r13,%r14 + leaq (%rdx,%rdi,1),%rdx + movq %rax,%r12 + addq 72+128(%rsp),%rcx + andq %r11,%r12 + rorxq $41,%r11,%r13 + rorxq $18,%r11,%rdi + leaq (%rdx,%r14,1),%rdx + leaq (%rcx,%r12,1),%rcx + andnq %rbx,%r11,%r12 + xorq %rdi,%r13 + rorxq $14,%r11,%r14 + leaq (%rcx,%r12,1),%rcx + xorq %r14,%r13 + movq %rdx,%rdi + rorxq $39,%rdx,%r12 + leaq (%rcx,%r13,1),%rcx + xorq %r8,%rdi + rorxq $34,%rdx,%r14 + rorxq $28,%rdx,%r13 + leaq (%r10,%rcx,1),%r10 + andq %rdi,%r15 + xorq %r12,%r14 + xorq %r8,%r15 + xorq %r13,%r14 + leaq (%rcx,%r15,1),%rcx + movq %r11,%r12 + addq 96+128(%rsp),%rbx + andq %r10,%r12 + rorxq $41,%r10,%r13 + rorxq $18,%r10,%r15 + leaq (%rcx,%r14,1),%rcx + leaq (%rbx,%r12,1),%rbx + andnq %rax,%r10,%r12 + xorq %r15,%r13 + rorxq $14,%r10,%r14 + leaq (%rbx,%r12,1),%rbx + xorq %r14,%r13 + movq %rcx,%r15 + rorxq $39,%rcx,%r12 + leaq (%rbx,%r13,1),%rbx + xorq %rdx,%r15 + rorxq $34,%rcx,%r14 + rorxq $28,%rcx,%r13 + leaq (%r9,%rbx,1),%r9 + andq %r15,%rdi + xorq %r12,%r14 + xorq %rdx,%rdi + xorq %r13,%r14 + leaq (%rbx,%rdi,1),%rbx + movq %r10,%r12 + addq 104+128(%rsp),%rax + andq %r9,%r12 + rorxq $41,%r9,%r13 + rorxq $18,%r9,%rdi + leaq (%rbx,%r14,1),%rbx + leaq (%rax,%r12,1),%rax + andnq %r11,%r9,%r12 + xorq %rdi,%r13 + rorxq $14,%r9,%r14 + leaq (%rax,%r12,1),%rax + xorq %r14,%r13 + movq %rbx,%rdi + rorxq $39,%rbx,%r12 + leaq (%rax,%r13,1),%rax + xorq %rcx,%rdi + rorxq $34,%rbx,%r14 + rorxq $28,%rbx,%r13 + leaq (%r8,%rax,1),%r8 + andq %rdi,%r15 + xorq %r12,%r14 + xorq %rcx,%r15 + xorq %r13,%r14 + leaq (%rax,%r15,1),%rax + movq %r9,%r12 + addq 0(%rsp),%r11 + andq %r8,%r12 + rorxq $41,%r8,%r13 + rorxq $18,%r8,%r15 + leaq (%rax,%r14,1),%rax + leaq (%r11,%r12,1),%r11 + andnq %r10,%r8,%r12 + xorq %r15,%r13 + rorxq $14,%r8,%r14 + leaq (%r11,%r12,1),%r11 + xorq %r14,%r13 + movq %rax,%r15 + rorxq $39,%rax,%r12 + leaq (%r11,%r13,1),%r11 + xorq %rbx,%r15 + rorxq $34,%rax,%r14 + rorxq $28,%rax,%r13 + leaq (%rdx,%r11,1),%rdx + andq %r15,%rdi + xorq %r12,%r14 + xorq %rbx,%rdi + xorq %r13,%r14 + leaq (%r11,%rdi,1),%r11 + movq %r8,%r12 + addq 8(%rsp),%r10 + andq %rdx,%r12 + rorxq $41,%rdx,%r13 + rorxq $18,%rdx,%rdi + leaq (%r11,%r14,1),%r11 + leaq (%r10,%r12,1),%r10 + andnq %r9,%rdx,%r12 + xorq %rdi,%r13 + rorxq $14,%rdx,%r14 + leaq (%r10,%r12,1),%r10 + xorq %r14,%r13 + movq %r11,%rdi + rorxq $39,%r11,%r12 + leaq (%r10,%r13,1),%r10 + xorq %rax,%rdi + rorxq $34,%r11,%r14 + rorxq $28,%r11,%r13 + leaq (%rcx,%r10,1),%rcx + andq %rdi,%r15 + xorq %r12,%r14 + xorq %rax,%r15 + xorq %r13,%r14 + leaq (%r10,%r15,1),%r10 + movq %rdx,%r12 + addq 32(%rsp),%r9 + andq %rcx,%r12 + rorxq $41,%rcx,%r13 + rorxq $18,%rcx,%r15 + leaq (%r10,%r14,1),%r10 + leaq (%r9,%r12,1),%r9 + andnq %r8,%rcx,%r12 + xorq %r15,%r13 + rorxq $14,%rcx,%r14 + leaq (%r9,%r12,1),%r9 + xorq %r14,%r13 + movq %r10,%r15 + rorxq $39,%r10,%r12 + leaq (%r9,%r13,1),%r9 + xorq %r11,%r15 + rorxq $34,%r10,%r14 + rorxq $28,%r10,%r13 + leaq (%rbx,%r9,1),%rbx + andq %r15,%rdi + xorq %r12,%r14 + xorq %r11,%rdi + xorq %r13,%r14 + leaq (%r9,%rdi,1),%r9 + movq %rcx,%r12 + addq 40(%rsp),%r8 + andq %rbx,%r12 + rorxq $41,%rbx,%r13 + rorxq $18,%rbx,%rdi + leaq (%r9,%r14,1),%r9 + leaq (%r8,%r12,1),%r8 + andnq %rdx,%rbx,%r12 + xorq %rdi,%r13 + rorxq $14,%rbx,%r14 + leaq (%r8,%r12,1),%r8 + xorq %r14,%r13 + movq %r9,%rdi + rorxq $39,%r9,%r12 + leaq (%r8,%r13,1),%r8 + xorq %r10,%rdi + rorxq $34,%r9,%r14 + rorxq $28,%r9,%r13 + leaq (%rax,%r8,1),%rax + andq %rdi,%r15 + xorq %r12,%r14 + xorq %r10,%r15 + xorq %r13,%r14 + leaq (%r8,%r15,1),%r8 + movq %rbx,%r12 + addq 64(%rsp),%rdx + andq %rax,%r12 + rorxq $41,%rax,%r13 + rorxq $18,%rax,%r15 + leaq (%r8,%r14,1),%r8 + leaq (%rdx,%r12,1),%rdx + andnq %rcx,%rax,%r12 + xorq %r15,%r13 + rorxq $14,%rax,%r14 + leaq (%rdx,%r12,1),%rdx + xorq %r14,%r13 + movq %r8,%r15 + rorxq $39,%r8,%r12 + leaq (%rdx,%r13,1),%rdx + xorq %r9,%r15 + rorxq $34,%r8,%r14 + rorxq $28,%r8,%r13 + leaq (%r11,%rdx,1),%r11 + andq %r15,%rdi + xorq %r12,%r14 + xorq %r9,%rdi + xorq %r13,%r14 + leaq (%rdx,%rdi,1),%rdx + movq %rax,%r12 + addq 72(%rsp),%rcx + andq %r11,%r12 + rorxq $41,%r11,%r13 + rorxq $18,%r11,%rdi + leaq (%rdx,%r14,1),%rdx + leaq (%rcx,%r12,1),%rcx + andnq %rbx,%r11,%r12 + xorq %rdi,%r13 + rorxq $14,%r11,%r14 + leaq (%rcx,%r12,1),%rcx + xorq %r14,%r13 + movq %rdx,%rdi + rorxq $39,%rdx,%r12 + leaq (%rcx,%r13,1),%rcx + xorq %r8,%rdi + rorxq $34,%rdx,%r14 + rorxq $28,%rdx,%r13 + leaq (%r10,%rcx,1),%r10 + andq %rdi,%r15 + xorq %r12,%r14 + xorq %r8,%r15 + xorq %r13,%r14 + leaq (%rcx,%r15,1),%rcx + movq %r11,%r12 + addq 96(%rsp),%rbx + andq %r10,%r12 + rorxq $41,%r10,%r13 + rorxq $18,%r10,%r15 + leaq (%rcx,%r14,1),%rcx + leaq (%rbx,%r12,1),%rbx + andnq %rax,%r10,%r12 + xorq %r15,%r13 + rorxq $14,%r10,%r14 + leaq (%rbx,%r12,1),%rbx + xorq %r14,%r13 + movq %rcx,%r15 + rorxq $39,%rcx,%r12 + leaq (%rbx,%r13,1),%rbx + xorq %rdx,%r15 + rorxq $34,%rcx,%r14 + rorxq $28,%rcx,%r13 + leaq (%r9,%rbx,1),%r9 + andq %r15,%rdi + xorq %r12,%r14 + xorq %rdx,%rdi + xorq %r13,%r14 + leaq (%rbx,%rdi,1),%rbx + movq %r10,%r12 + addq 104(%rsp),%rax + andq %r9,%r12 + rorxq $41,%r9,%r13 + rorxq $18,%r9,%rdi + leaq (%rbx,%r14,1),%rbx + leaq (%rax,%r12,1),%rax + andnq %r11,%r9,%r12 + xorq %rdi,%r13 + rorxq $14,%r9,%r14 + leaq (%rax,%r12,1),%rax + xorq %r14,%r13 + movq %rbx,%rdi + rorxq $39,%rbx,%r12 + leaq (%rax,%r13,1),%rax + xorq %rcx,%rdi + rorxq $34,%rbx,%r14 + rorxq $28,%rbx,%r13 + leaq (%r8,%rax,1),%r8 + andq %rdi,%r15 + xorq %r12,%r14 + xorq %rcx,%r15 + xorq %r13,%r14 + leaq (%rax,%r15,1),%rax + movq %r9,%r12 + movq 1280(%rsp),%rdi + addq %r14,%rax + + leaq 1152(%rsp),%rbp + + addq 0(%rdi),%rax + addq 8(%rdi),%rbx + addq 16(%rdi),%rcx + addq 24(%rdi),%rdx + addq 32(%rdi),%r8 + addq 40(%rdi),%r9 + addq 48(%rdi),%r10 + addq 56(%rdi),%r11 + + movq %rax,0(%rdi) + movq %rbx,8(%rdi) + movq %rcx,16(%rdi) + movq %rdx,24(%rdi) + movq %r8,32(%rdi) + movq %r9,40(%rdi) + movq %r10,48(%rdi) + movq %r11,56(%rdi) + + cmpq 144(%rbp),%rsi + je L$done_avx2 + + xorq %r14,%r14 + movq %rbx,%rdi + xorq %rcx,%rdi + movq %r9,%r12 + jmp L$ower_avx2 +.p2align 4 +L$ower_avx2: + addq 0+16(%rbp),%r11 + andq %r8,%r12 + rorxq $41,%r8,%r13 + rorxq $18,%r8,%r15 + leaq (%rax,%r14,1),%rax + leaq (%r11,%r12,1),%r11 + andnq %r10,%r8,%r12 + xorq %r15,%r13 + rorxq $14,%r8,%r14 + leaq (%r11,%r12,1),%r11 + xorq %r14,%r13 + movq %rax,%r15 + rorxq $39,%rax,%r12 + leaq (%r11,%r13,1),%r11 + xorq %rbx,%r15 + rorxq $34,%rax,%r14 + rorxq $28,%rax,%r13 + leaq (%rdx,%r11,1),%rdx + andq %r15,%rdi + xorq %r12,%r14 + xorq %rbx,%rdi + xorq %r13,%r14 + leaq (%r11,%rdi,1),%r11 + movq %r8,%r12 + addq 8+16(%rbp),%r10 + andq %rdx,%r12 + rorxq $41,%rdx,%r13 + rorxq $18,%rdx,%rdi + leaq (%r11,%r14,1),%r11 + leaq (%r10,%r12,1),%r10 + andnq %r9,%rdx,%r12 + xorq %rdi,%r13 + rorxq $14,%rdx,%r14 + leaq (%r10,%r12,1),%r10 + xorq %r14,%r13 + movq %r11,%rdi + rorxq $39,%r11,%r12 + leaq (%r10,%r13,1),%r10 + xorq %rax,%rdi + rorxq $34,%r11,%r14 + rorxq $28,%r11,%r13 + leaq (%rcx,%r10,1),%rcx + andq %rdi,%r15 + xorq %r12,%r14 + xorq %rax,%r15 + xorq %r13,%r14 + leaq (%r10,%r15,1),%r10 + movq %rdx,%r12 + addq 32+16(%rbp),%r9 + andq %rcx,%r12 + rorxq $41,%rcx,%r13 + rorxq $18,%rcx,%r15 + leaq (%r10,%r14,1),%r10 + leaq (%r9,%r12,1),%r9 + andnq %r8,%rcx,%r12 + xorq %r15,%r13 + rorxq $14,%rcx,%r14 + leaq (%r9,%r12,1),%r9 + xorq %r14,%r13 + movq %r10,%r15 + rorxq $39,%r10,%r12 + leaq (%r9,%r13,1),%r9 + xorq %r11,%r15 + rorxq $34,%r10,%r14 + rorxq $28,%r10,%r13 + leaq (%rbx,%r9,1),%rbx + andq %r15,%rdi + xorq %r12,%r14 + xorq %r11,%rdi + xorq %r13,%r14 + leaq (%r9,%rdi,1),%r9 + movq %rcx,%r12 + addq 40+16(%rbp),%r8 + andq %rbx,%r12 + rorxq $41,%rbx,%r13 + rorxq $18,%rbx,%rdi + leaq (%r9,%r14,1),%r9 + leaq (%r8,%r12,1),%r8 + andnq %rdx,%rbx,%r12 + xorq %rdi,%r13 + rorxq $14,%rbx,%r14 + leaq (%r8,%r12,1),%r8 + xorq %r14,%r13 + movq %r9,%rdi + rorxq $39,%r9,%r12 + leaq (%r8,%r13,1),%r8 + xorq %r10,%rdi + rorxq $34,%r9,%r14 + rorxq $28,%r9,%r13 + leaq (%rax,%r8,1),%rax + andq %rdi,%r15 + xorq %r12,%r14 + xorq %r10,%r15 + xorq %r13,%r14 + leaq (%r8,%r15,1),%r8 + movq %rbx,%r12 + addq 64+16(%rbp),%rdx + andq %rax,%r12 + rorxq $41,%rax,%r13 + rorxq $18,%rax,%r15 + leaq (%r8,%r14,1),%r8 + leaq (%rdx,%r12,1),%rdx + andnq %rcx,%rax,%r12 + xorq %r15,%r13 + rorxq $14,%rax,%r14 + leaq (%rdx,%r12,1),%rdx + xorq %r14,%r13 + movq %r8,%r15 + rorxq $39,%r8,%r12 + leaq (%rdx,%r13,1),%rdx + xorq %r9,%r15 + rorxq $34,%r8,%r14 + rorxq $28,%r8,%r13 + leaq (%r11,%rdx,1),%r11 + andq %r15,%rdi + xorq %r12,%r14 + xorq %r9,%rdi + xorq %r13,%r14 + leaq (%rdx,%rdi,1),%rdx + movq %rax,%r12 + addq 72+16(%rbp),%rcx + andq %r11,%r12 + rorxq $41,%r11,%r13 + rorxq $18,%r11,%rdi + leaq (%rdx,%r14,1),%rdx + leaq (%rcx,%r12,1),%rcx + andnq %rbx,%r11,%r12 + xorq %rdi,%r13 + rorxq $14,%r11,%r14 + leaq (%rcx,%r12,1),%rcx + xorq %r14,%r13 + movq %rdx,%rdi + rorxq $39,%rdx,%r12 + leaq (%rcx,%r13,1),%rcx + xorq %r8,%rdi + rorxq $34,%rdx,%r14 + rorxq $28,%rdx,%r13 + leaq (%r10,%rcx,1),%r10 + andq %rdi,%r15 + xorq %r12,%r14 + xorq %r8,%r15 + xorq %r13,%r14 + leaq (%rcx,%r15,1),%rcx + movq %r11,%r12 + addq 96+16(%rbp),%rbx + andq %r10,%r12 + rorxq $41,%r10,%r13 + rorxq $18,%r10,%r15 + leaq (%rcx,%r14,1),%rcx + leaq (%rbx,%r12,1),%rbx + andnq %rax,%r10,%r12 + xorq %r15,%r13 + rorxq $14,%r10,%r14 + leaq (%rbx,%r12,1),%rbx + xorq %r14,%r13 + movq %rcx,%r15 + rorxq $39,%rcx,%r12 + leaq (%rbx,%r13,1),%rbx + xorq %rdx,%r15 + rorxq $34,%rcx,%r14 + rorxq $28,%rcx,%r13 + leaq (%r9,%rbx,1),%r9 + andq %r15,%rdi + xorq %r12,%r14 + xorq %rdx,%rdi + xorq %r13,%r14 + leaq (%rbx,%rdi,1),%rbx + movq %r10,%r12 + addq 104+16(%rbp),%rax + andq %r9,%r12 + rorxq $41,%r9,%r13 + rorxq $18,%r9,%rdi + leaq (%rbx,%r14,1),%rbx + leaq (%rax,%r12,1),%rax + andnq %r11,%r9,%r12 + xorq %rdi,%r13 + rorxq $14,%r9,%r14 + leaq (%rax,%r12,1),%rax + xorq %r14,%r13 + movq %rbx,%rdi + rorxq $39,%rbx,%r12 + leaq (%rax,%r13,1),%rax + xorq %rcx,%rdi + rorxq $34,%rbx,%r14 + rorxq $28,%rbx,%r13 + leaq (%r8,%rax,1),%r8 + andq %rdi,%r15 + xorq %r12,%r14 + xorq %rcx,%r15 + xorq %r13,%r14 + leaq (%rax,%r15,1),%rax + movq %r9,%r12 + leaq -128(%rbp),%rbp + cmpq %rsp,%rbp + jae L$ower_avx2 + + movq 1280(%rsp),%rdi + addq %r14,%rax + + leaq 1152(%rsp),%rsp + + addq 0(%rdi),%rax + addq 8(%rdi),%rbx + addq 16(%rdi),%rcx + addq 24(%rdi),%rdx + addq 32(%rdi),%r8 + addq 40(%rdi),%r9 + leaq 256(%rsi),%rsi + addq 48(%rdi),%r10 + movq %rsi,%r12 + addq 56(%rdi),%r11 + cmpq 128+16(%rsp),%rsi + + movq %rax,0(%rdi) + cmoveq %rsp,%r12 + movq %rbx,8(%rdi) + movq %rcx,16(%rdi) + movq %rdx,24(%rdi) + movq %r8,32(%rdi) + movq %r9,40(%rdi) + movq %r10,48(%rdi) + movq %r11,56(%rdi) + + jbe L$oop_avx2 + leaq (%rsp),%rbp + +L$done_avx2: + leaq (%rbp),%rsp + movq 128+24(%rsp),%rsi + vzeroupper + movq (%rsi),%r15 + movq 8(%rsi),%r14 + movq 16(%rsi),%r13 + movq 24(%rsi),%r12 + movq 32(%rsi),%rbp + movq 40(%rsi),%rbx + leaq 48(%rsi),%rsp +L$epilogue_avx2: + .byte 0xf3,0xc3 diff --git a/deps/openssl/asm/x64-macosx-gas/whrlpool/wp-x86_64.s b/deps/openssl/asm/x64-macosx-gas/whrlpool/wp-x86_64.s index 5e87e554ed41fb..ad43b5a1b35149 100644 --- a/deps/openssl/asm/x64-macosx-gas/whrlpool/wp-x86_64.s +++ b/deps/openssl/asm/x64-macosx-gas/whrlpool/wp-x86_64.s @@ -1,6 +1,5 @@ .text - .globl _whirlpool_block .p2align 4 @@ -63,233 +62,236 @@ L$outerloop: movq %r15,64+56(%rsp) xorq %rsi,%rsi movq %rsi,24(%rbx) + jmp L$round .p2align 4 L$round: movq 4096(%rbp,%rsi,8),%r8 movl 0(%rsp),%eax movl 4(%rsp),%ebx - movb %al,%cl - movb %ah,%dl + movzbl %al,%ecx + movzbl %ah,%edx + shrl $16,%eax leaq (%rcx,%rcx,1),%rsi + movzbl %al,%ecx leaq (%rdx,%rdx,1),%rdi - shrl $16,%eax + movzbl %ah,%edx xorq 0(%rbp,%rsi,8),%r8 movq 7(%rbp,%rdi,8),%r9 - movb %al,%cl - movb %ah,%dl movl 0+8(%rsp),%eax leaq (%rcx,%rcx,1),%rsi + movzbl %bl,%ecx leaq (%rdx,%rdx,1),%rdi + movzbl %bh,%edx movq 6(%rbp,%rsi,8),%r10 movq 5(%rbp,%rdi,8),%r11 - movb %bl,%cl - movb %bh,%dl + shrl $16,%ebx leaq (%rcx,%rcx,1),%rsi + movzbl %bl,%ecx leaq (%rdx,%rdx,1),%rdi - shrl $16,%ebx + movzbl %bh,%edx movq 4(%rbp,%rsi,8),%r12 movq 3(%rbp,%rdi,8),%r13 - movb %bl,%cl - movb %bh,%dl movl 0+8+4(%rsp),%ebx leaq (%rcx,%rcx,1),%rsi + movzbl %al,%ecx leaq (%rdx,%rdx,1),%rdi + movzbl %ah,%edx movq 2(%rbp,%rsi,8),%r14 movq 1(%rbp,%rdi,8),%r15 - movb %al,%cl - movb %ah,%dl + shrl $16,%eax leaq (%rcx,%rcx,1),%rsi + movzbl %al,%ecx leaq (%rdx,%rdx,1),%rdi - shrl $16,%eax + movzbl %ah,%edx xorq 0(%rbp,%rsi,8),%r9 xorq 7(%rbp,%rdi,8),%r10 - movb %al,%cl - movb %ah,%dl movl 8+8(%rsp),%eax leaq (%rcx,%rcx,1),%rsi + movzbl %bl,%ecx leaq (%rdx,%rdx,1),%rdi + movzbl %bh,%edx xorq 6(%rbp,%rsi,8),%r11 xorq 5(%rbp,%rdi,8),%r12 - movb %bl,%cl - movb %bh,%dl + shrl $16,%ebx leaq (%rcx,%rcx,1),%rsi + movzbl %bl,%ecx leaq (%rdx,%rdx,1),%rdi - shrl $16,%ebx + movzbl %bh,%edx xorq 4(%rbp,%rsi,8),%r13 xorq 3(%rbp,%rdi,8),%r14 - movb %bl,%cl - movb %bh,%dl movl 8+8+4(%rsp),%ebx leaq (%rcx,%rcx,1),%rsi + movzbl %al,%ecx leaq (%rdx,%rdx,1),%rdi + movzbl %ah,%edx xorq 2(%rbp,%rsi,8),%r15 xorq 1(%rbp,%rdi,8),%r8 - movb %al,%cl - movb %ah,%dl + shrl $16,%eax leaq (%rcx,%rcx,1),%rsi + movzbl %al,%ecx leaq (%rdx,%rdx,1),%rdi - shrl $16,%eax + movzbl %ah,%edx xorq 0(%rbp,%rsi,8),%r10 xorq 7(%rbp,%rdi,8),%r11 - movb %al,%cl - movb %ah,%dl movl 16+8(%rsp),%eax leaq (%rcx,%rcx,1),%rsi + movzbl %bl,%ecx leaq (%rdx,%rdx,1),%rdi + movzbl %bh,%edx xorq 6(%rbp,%rsi,8),%r12 xorq 5(%rbp,%rdi,8),%r13 - movb %bl,%cl - movb %bh,%dl + shrl $16,%ebx leaq (%rcx,%rcx,1),%rsi + movzbl %bl,%ecx leaq (%rdx,%rdx,1),%rdi - shrl $16,%ebx + movzbl %bh,%edx xorq 4(%rbp,%rsi,8),%r14 xorq 3(%rbp,%rdi,8),%r15 - movb %bl,%cl - movb %bh,%dl movl 16+8+4(%rsp),%ebx leaq (%rcx,%rcx,1),%rsi + movzbl %al,%ecx leaq (%rdx,%rdx,1),%rdi + movzbl %ah,%edx xorq 2(%rbp,%rsi,8),%r8 xorq 1(%rbp,%rdi,8),%r9 - movb %al,%cl - movb %ah,%dl + shrl $16,%eax leaq (%rcx,%rcx,1),%rsi + movzbl %al,%ecx leaq (%rdx,%rdx,1),%rdi - shrl $16,%eax + movzbl %ah,%edx xorq 0(%rbp,%rsi,8),%r11 xorq 7(%rbp,%rdi,8),%r12 - movb %al,%cl - movb %ah,%dl movl 24+8(%rsp),%eax leaq (%rcx,%rcx,1),%rsi + movzbl %bl,%ecx leaq (%rdx,%rdx,1),%rdi + movzbl %bh,%edx xorq 6(%rbp,%rsi,8),%r13 xorq 5(%rbp,%rdi,8),%r14 - movb %bl,%cl - movb %bh,%dl + shrl $16,%ebx leaq (%rcx,%rcx,1),%rsi + movzbl %bl,%ecx leaq (%rdx,%rdx,1),%rdi - shrl $16,%ebx + movzbl %bh,%edx xorq 4(%rbp,%rsi,8),%r15 xorq 3(%rbp,%rdi,8),%r8 - movb %bl,%cl - movb %bh,%dl movl 24+8+4(%rsp),%ebx leaq (%rcx,%rcx,1),%rsi + movzbl %al,%ecx leaq (%rdx,%rdx,1),%rdi + movzbl %ah,%edx xorq 2(%rbp,%rsi,8),%r9 xorq 1(%rbp,%rdi,8),%r10 - movb %al,%cl - movb %ah,%dl + shrl $16,%eax leaq (%rcx,%rcx,1),%rsi + movzbl %al,%ecx leaq (%rdx,%rdx,1),%rdi - shrl $16,%eax + movzbl %ah,%edx xorq 0(%rbp,%rsi,8),%r12 xorq 7(%rbp,%rdi,8),%r13 - movb %al,%cl - movb %ah,%dl movl 32+8(%rsp),%eax leaq (%rcx,%rcx,1),%rsi + movzbl %bl,%ecx leaq (%rdx,%rdx,1),%rdi + movzbl %bh,%edx xorq 6(%rbp,%rsi,8),%r14 xorq 5(%rbp,%rdi,8),%r15 - movb %bl,%cl - movb %bh,%dl + shrl $16,%ebx leaq (%rcx,%rcx,1),%rsi + movzbl %bl,%ecx leaq (%rdx,%rdx,1),%rdi - shrl $16,%ebx + movzbl %bh,%edx xorq 4(%rbp,%rsi,8),%r8 xorq 3(%rbp,%rdi,8),%r9 - movb %bl,%cl - movb %bh,%dl movl 32+8+4(%rsp),%ebx leaq (%rcx,%rcx,1),%rsi + movzbl %al,%ecx leaq (%rdx,%rdx,1),%rdi + movzbl %ah,%edx xorq 2(%rbp,%rsi,8),%r10 xorq 1(%rbp,%rdi,8),%r11 - movb %al,%cl - movb %ah,%dl + shrl $16,%eax leaq (%rcx,%rcx,1),%rsi + movzbl %al,%ecx leaq (%rdx,%rdx,1),%rdi - shrl $16,%eax + movzbl %ah,%edx xorq 0(%rbp,%rsi,8),%r13 xorq 7(%rbp,%rdi,8),%r14 - movb %al,%cl - movb %ah,%dl movl 40+8(%rsp),%eax leaq (%rcx,%rcx,1),%rsi + movzbl %bl,%ecx leaq (%rdx,%rdx,1),%rdi + movzbl %bh,%edx xorq 6(%rbp,%rsi,8),%r15 xorq 5(%rbp,%rdi,8),%r8 - movb %bl,%cl - movb %bh,%dl + shrl $16,%ebx leaq (%rcx,%rcx,1),%rsi + movzbl %bl,%ecx leaq (%rdx,%rdx,1),%rdi - shrl $16,%ebx + movzbl %bh,%edx xorq 4(%rbp,%rsi,8),%r9 xorq 3(%rbp,%rdi,8),%r10 - movb %bl,%cl - movb %bh,%dl movl 40+8+4(%rsp),%ebx leaq (%rcx,%rcx,1),%rsi + movzbl %al,%ecx leaq (%rdx,%rdx,1),%rdi + movzbl %ah,%edx xorq 2(%rbp,%rsi,8),%r11 xorq 1(%rbp,%rdi,8),%r12 - movb %al,%cl - movb %ah,%dl + shrl $16,%eax leaq (%rcx,%rcx,1),%rsi + movzbl %al,%ecx leaq (%rdx,%rdx,1),%rdi - shrl $16,%eax + movzbl %ah,%edx xorq 0(%rbp,%rsi,8),%r14 xorq 7(%rbp,%rdi,8),%r15 - movb %al,%cl - movb %ah,%dl movl 48+8(%rsp),%eax leaq (%rcx,%rcx,1),%rsi + movzbl %bl,%ecx leaq (%rdx,%rdx,1),%rdi + movzbl %bh,%edx xorq 6(%rbp,%rsi,8),%r8 xorq 5(%rbp,%rdi,8),%r9 - movb %bl,%cl - movb %bh,%dl + shrl $16,%ebx leaq (%rcx,%rcx,1),%rsi + movzbl %bl,%ecx leaq (%rdx,%rdx,1),%rdi - shrl $16,%ebx + movzbl %bh,%edx xorq 4(%rbp,%rsi,8),%r10 xorq 3(%rbp,%rdi,8),%r11 - movb %bl,%cl - movb %bh,%dl movl 48+8+4(%rsp),%ebx leaq (%rcx,%rcx,1),%rsi + movzbl %al,%ecx leaq (%rdx,%rdx,1),%rdi + movzbl %ah,%edx xorq 2(%rbp,%rsi,8),%r12 xorq 1(%rbp,%rdi,8),%r13 - movb %al,%cl - movb %ah,%dl + shrl $16,%eax leaq (%rcx,%rcx,1),%rsi + movzbl %al,%ecx leaq (%rdx,%rdx,1),%rdi - shrl $16,%eax + movzbl %ah,%edx xorq 0(%rbp,%rsi,8),%r15 xorq 7(%rbp,%rdi,8),%r8 - movb %al,%cl - movb %ah,%dl movl 56+8(%rsp),%eax leaq (%rcx,%rcx,1),%rsi + movzbl %bl,%ecx leaq (%rdx,%rdx,1),%rdi + movzbl %bh,%edx xorq 6(%rbp,%rsi,8),%r9 xorq 5(%rbp,%rdi,8),%r10 - movb %bl,%cl - movb %bh,%dl + shrl $16,%ebx leaq (%rcx,%rcx,1),%rsi + movzbl %bl,%ecx leaq (%rdx,%rdx,1),%rdi - shrl $16,%ebx + movzbl %bh,%edx xorq 4(%rbp,%rsi,8),%r11 xorq 3(%rbp,%rdi,8),%r12 - movb %bl,%cl - movb %bh,%dl movl 56+8+4(%rsp),%ebx leaq (%rcx,%rcx,1),%rsi + movzbl %al,%ecx leaq (%rdx,%rdx,1),%rdi + movzbl %ah,%edx xorq 2(%rbp,%rsi,8),%r13 xorq 1(%rbp,%rdi,8),%r14 movq %r8,0(%rsp) @@ -300,228 +302,228 @@ L$round: movq %r13,40(%rsp) movq %r14,48(%rsp) movq %r15,56(%rsp) - movb %al,%cl - movb %ah,%dl + shrl $16,%eax leaq (%rcx,%rcx,1),%rsi + movzbl %al,%ecx leaq (%rdx,%rdx,1),%rdi - shrl $16,%eax + movzbl %ah,%edx xorq 0(%rbp,%rsi,8),%r8 xorq 7(%rbp,%rdi,8),%r9 - movb %al,%cl - movb %ah,%dl movl 64+0+8(%rsp),%eax leaq (%rcx,%rcx,1),%rsi + movzbl %bl,%ecx leaq (%rdx,%rdx,1),%rdi + movzbl %bh,%edx xorq 6(%rbp,%rsi,8),%r10 xorq 5(%rbp,%rdi,8),%r11 - movb %bl,%cl - movb %bh,%dl + shrl $16,%ebx leaq (%rcx,%rcx,1),%rsi + movzbl %bl,%ecx leaq (%rdx,%rdx,1),%rdi - shrl $16,%ebx + movzbl %bh,%edx xorq 4(%rbp,%rsi,8),%r12 xorq 3(%rbp,%rdi,8),%r13 - movb %bl,%cl - movb %bh,%dl movl 64+0+8+4(%rsp),%ebx leaq (%rcx,%rcx,1),%rsi + movzbl %al,%ecx leaq (%rdx,%rdx,1),%rdi + movzbl %ah,%edx xorq 2(%rbp,%rsi,8),%r14 xorq 1(%rbp,%rdi,8),%r15 - movb %al,%cl - movb %ah,%dl + shrl $16,%eax leaq (%rcx,%rcx,1),%rsi + movzbl %al,%ecx leaq (%rdx,%rdx,1),%rdi - shrl $16,%eax + movzbl %ah,%edx xorq 0(%rbp,%rsi,8),%r9 xorq 7(%rbp,%rdi,8),%r10 - movb %al,%cl - movb %ah,%dl movl 64+8+8(%rsp),%eax leaq (%rcx,%rcx,1),%rsi + movzbl %bl,%ecx leaq (%rdx,%rdx,1),%rdi + movzbl %bh,%edx xorq 6(%rbp,%rsi,8),%r11 xorq 5(%rbp,%rdi,8),%r12 - movb %bl,%cl - movb %bh,%dl + shrl $16,%ebx leaq (%rcx,%rcx,1),%rsi + movzbl %bl,%ecx leaq (%rdx,%rdx,1),%rdi - shrl $16,%ebx + movzbl %bh,%edx xorq 4(%rbp,%rsi,8),%r13 xorq 3(%rbp,%rdi,8),%r14 - movb %bl,%cl - movb %bh,%dl movl 64+8+8+4(%rsp),%ebx leaq (%rcx,%rcx,1),%rsi + movzbl %al,%ecx leaq (%rdx,%rdx,1),%rdi + movzbl %ah,%edx xorq 2(%rbp,%rsi,8),%r15 xorq 1(%rbp,%rdi,8),%r8 - movb %al,%cl - movb %ah,%dl + shrl $16,%eax leaq (%rcx,%rcx,1),%rsi + movzbl %al,%ecx leaq (%rdx,%rdx,1),%rdi - shrl $16,%eax + movzbl %ah,%edx xorq 0(%rbp,%rsi,8),%r10 xorq 7(%rbp,%rdi,8),%r11 - movb %al,%cl - movb %ah,%dl movl 64+16+8(%rsp),%eax leaq (%rcx,%rcx,1),%rsi + movzbl %bl,%ecx leaq (%rdx,%rdx,1),%rdi + movzbl %bh,%edx xorq 6(%rbp,%rsi,8),%r12 xorq 5(%rbp,%rdi,8),%r13 - movb %bl,%cl - movb %bh,%dl + shrl $16,%ebx leaq (%rcx,%rcx,1),%rsi + movzbl %bl,%ecx leaq (%rdx,%rdx,1),%rdi - shrl $16,%ebx + movzbl %bh,%edx xorq 4(%rbp,%rsi,8),%r14 xorq 3(%rbp,%rdi,8),%r15 - movb %bl,%cl - movb %bh,%dl movl 64+16+8+4(%rsp),%ebx leaq (%rcx,%rcx,1),%rsi + movzbl %al,%ecx leaq (%rdx,%rdx,1),%rdi + movzbl %ah,%edx xorq 2(%rbp,%rsi,8),%r8 xorq 1(%rbp,%rdi,8),%r9 - movb %al,%cl - movb %ah,%dl + shrl $16,%eax leaq (%rcx,%rcx,1),%rsi + movzbl %al,%ecx leaq (%rdx,%rdx,1),%rdi - shrl $16,%eax + movzbl %ah,%edx xorq 0(%rbp,%rsi,8),%r11 xorq 7(%rbp,%rdi,8),%r12 - movb %al,%cl - movb %ah,%dl movl 64+24+8(%rsp),%eax leaq (%rcx,%rcx,1),%rsi + movzbl %bl,%ecx leaq (%rdx,%rdx,1),%rdi + movzbl %bh,%edx xorq 6(%rbp,%rsi,8),%r13 xorq 5(%rbp,%rdi,8),%r14 - movb %bl,%cl - movb %bh,%dl + shrl $16,%ebx leaq (%rcx,%rcx,1),%rsi + movzbl %bl,%ecx leaq (%rdx,%rdx,1),%rdi - shrl $16,%ebx + movzbl %bh,%edx xorq 4(%rbp,%rsi,8),%r15 xorq 3(%rbp,%rdi,8),%r8 - movb %bl,%cl - movb %bh,%dl movl 64+24+8+4(%rsp),%ebx leaq (%rcx,%rcx,1),%rsi + movzbl %al,%ecx leaq (%rdx,%rdx,1),%rdi + movzbl %ah,%edx xorq 2(%rbp,%rsi,8),%r9 xorq 1(%rbp,%rdi,8),%r10 - movb %al,%cl - movb %ah,%dl + shrl $16,%eax leaq (%rcx,%rcx,1),%rsi + movzbl %al,%ecx leaq (%rdx,%rdx,1),%rdi - shrl $16,%eax + movzbl %ah,%edx xorq 0(%rbp,%rsi,8),%r12 xorq 7(%rbp,%rdi,8),%r13 - movb %al,%cl - movb %ah,%dl movl 64+32+8(%rsp),%eax leaq (%rcx,%rcx,1),%rsi + movzbl %bl,%ecx leaq (%rdx,%rdx,1),%rdi + movzbl %bh,%edx xorq 6(%rbp,%rsi,8),%r14 xorq 5(%rbp,%rdi,8),%r15 - movb %bl,%cl - movb %bh,%dl + shrl $16,%ebx leaq (%rcx,%rcx,1),%rsi + movzbl %bl,%ecx leaq (%rdx,%rdx,1),%rdi - shrl $16,%ebx + movzbl %bh,%edx xorq 4(%rbp,%rsi,8),%r8 xorq 3(%rbp,%rdi,8),%r9 - movb %bl,%cl - movb %bh,%dl movl 64+32+8+4(%rsp),%ebx leaq (%rcx,%rcx,1),%rsi + movzbl %al,%ecx leaq (%rdx,%rdx,1),%rdi + movzbl %ah,%edx xorq 2(%rbp,%rsi,8),%r10 xorq 1(%rbp,%rdi,8),%r11 - movb %al,%cl - movb %ah,%dl + shrl $16,%eax leaq (%rcx,%rcx,1),%rsi + movzbl %al,%ecx leaq (%rdx,%rdx,1),%rdi - shrl $16,%eax + movzbl %ah,%edx xorq 0(%rbp,%rsi,8),%r13 xorq 7(%rbp,%rdi,8),%r14 - movb %al,%cl - movb %ah,%dl movl 64+40+8(%rsp),%eax leaq (%rcx,%rcx,1),%rsi + movzbl %bl,%ecx leaq (%rdx,%rdx,1),%rdi + movzbl %bh,%edx xorq 6(%rbp,%rsi,8),%r15 xorq 5(%rbp,%rdi,8),%r8 - movb %bl,%cl - movb %bh,%dl + shrl $16,%ebx leaq (%rcx,%rcx,1),%rsi + movzbl %bl,%ecx leaq (%rdx,%rdx,1),%rdi - shrl $16,%ebx + movzbl %bh,%edx xorq 4(%rbp,%rsi,8),%r9 xorq 3(%rbp,%rdi,8),%r10 - movb %bl,%cl - movb %bh,%dl movl 64+40+8+4(%rsp),%ebx leaq (%rcx,%rcx,1),%rsi + movzbl %al,%ecx leaq (%rdx,%rdx,1),%rdi + movzbl %ah,%edx xorq 2(%rbp,%rsi,8),%r11 xorq 1(%rbp,%rdi,8),%r12 - movb %al,%cl - movb %ah,%dl + shrl $16,%eax leaq (%rcx,%rcx,1),%rsi + movzbl %al,%ecx leaq (%rdx,%rdx,1),%rdi - shrl $16,%eax + movzbl %ah,%edx xorq 0(%rbp,%rsi,8),%r14 xorq 7(%rbp,%rdi,8),%r15 - movb %al,%cl - movb %ah,%dl movl 64+48+8(%rsp),%eax leaq (%rcx,%rcx,1),%rsi + movzbl %bl,%ecx leaq (%rdx,%rdx,1),%rdi + movzbl %bh,%edx xorq 6(%rbp,%rsi,8),%r8 xorq 5(%rbp,%rdi,8),%r9 - movb %bl,%cl - movb %bh,%dl + shrl $16,%ebx leaq (%rcx,%rcx,1),%rsi + movzbl %bl,%ecx leaq (%rdx,%rdx,1),%rdi - shrl $16,%ebx + movzbl %bh,%edx xorq 4(%rbp,%rsi,8),%r10 xorq 3(%rbp,%rdi,8),%r11 - movb %bl,%cl - movb %bh,%dl movl 64+48+8+4(%rsp),%ebx leaq (%rcx,%rcx,1),%rsi + movzbl %al,%ecx leaq (%rdx,%rdx,1),%rdi + movzbl %ah,%edx xorq 2(%rbp,%rsi,8),%r12 xorq 1(%rbp,%rdi,8),%r13 - movb %al,%cl - movb %ah,%dl + shrl $16,%eax leaq (%rcx,%rcx,1),%rsi + movzbl %al,%ecx leaq (%rdx,%rdx,1),%rdi - shrl $16,%eax + movzbl %ah,%edx xorq 0(%rbp,%rsi,8),%r15 xorq 7(%rbp,%rdi,8),%r8 - movb %al,%cl - movb %ah,%dl leaq (%rcx,%rcx,1),%rsi + movzbl %bl,%ecx leaq (%rdx,%rdx,1),%rdi + movzbl %bh,%edx xorq 6(%rbp,%rsi,8),%r9 xorq 5(%rbp,%rdi,8),%r10 - movb %bl,%cl - movb %bh,%dl + shrl $16,%ebx leaq (%rcx,%rcx,1),%rsi + movzbl %bl,%ecx leaq (%rdx,%rdx,1),%rdi - shrl $16,%ebx + movzbl %bh,%edx xorq 4(%rbp,%rsi,8),%r11 xorq 3(%rbp,%rdi,8),%r12 - movb %bl,%cl - movb %bh,%dl leaq (%rcx,%rcx,1),%rsi + movzbl %al,%ecx leaq (%rdx,%rdx,1),%rdi + movzbl %ah,%edx xorq 2(%rbp,%rsi,8),%r13 xorq 1(%rbp,%rdi,8),%r14 leaq 128(%rsp),%rbx diff --git a/deps/openssl/asm/x64-macosx-gas/x86_64cpuid.s b/deps/openssl/asm/x64-macosx-gas/x86_64cpuid.s index 21e8a8fc2ec4e2..5d69baad8f4ab7 100644 --- a/deps/openssl/asm/x64-macosx-gas/x86_64cpuid.s +++ b/deps/openssl/asm/x64-macosx-gas/x86_64cpuid.s @@ -5,11 +5,10 @@ .quad _OPENSSL_cpuid_setup .private_extern _OPENSSL_ia32cap_P -.comm _OPENSSL_ia32cap_P,8,2 +.comm _OPENSSL_ia32cap_P,16,2 .text - .globl _OPENSSL_atomic_add .p2align 4 @@ -17,12 +16,10 @@ _OPENSSL_atomic_add: movl (%rdi),%eax L$spin: leaq (%rsi,%rax,1),%r8 .byte 0xf0 - cmpxchgl %r8d,(%rdi) jne L$spin movl %r8d,%eax .byte 0x48,0x98 - .byte 0xf3,0xc3 @@ -43,6 +40,7 @@ _OPENSSL_ia32_cpuid: movq %rbx,%r8 xorl %eax,%eax + movl %eax,8(%rdi) cpuid movl %eax,%r11d @@ -110,6 +108,14 @@ L$intel: shrl $14,%r10d andl $4095,%r10d + cmpl $7,%r11d + jb L$nocacheinfo + + movl $7,%eax + xorl %ecx,%ecx + cpuid + movl %ebx,8(%rdi) + L$nocacheinfo: movl $1,%eax cpuid @@ -143,13 +149,13 @@ L$generic: jnc L$clear_avx xorl %ecx,%ecx .byte 0x0f,0x01,0xd0 - andl $6,%eax cmpl $6,%eax je L$done L$clear_avx: movl $4026525695,%eax andl %eax,%r9d + andl $4294967263,8(%rdi) L$done: shlq $32,%r9 movl %r10d,%eax @@ -236,3 +242,18 @@ L$break_rdrand: cmpq $0,%rax cmoveq %rcx,%rax .byte 0xf3,0xc3 + + +.globl _OPENSSL_ia32_rdseed + +.p2align 4 +_OPENSSL_ia32_rdseed: + movl $8,%ecx +L$oop_rdseed: +.byte 72,15,199,248 + jc L$break_rdseed + loop L$oop_rdseed +L$break_rdseed: + cmpq $0,%rax + cmoveq %rcx,%rax + .byte 0xf3,0xc3 diff --git a/deps/openssl/asm/x64-win32-masm/aes/aes-x86_64.asm b/deps/openssl/asm/x64-win32-masm/aes/aes-x86_64.asm index ff8ee9429156be..64f07ad589a53d 100644 --- a/deps/openssl/asm/x64-win32-masm/aes/aes-x86_64.asm +++ b/deps/openssl/asm/x64-win32-masm/aes/aes-x86_64.asm @@ -1,5 +1,5 @@ OPTION DOTNAME -.text$ SEGMENT ALIGN(64) 'CODE' +.text$ SEGMENT ALIGN(256) 'CODE' ALIGN 16 _x86_64_AES_encrypt PROC PRIVATE @@ -152,7 +152,6 @@ $L$enc_loop:: xor ecx,r12d xor edx,r8d DB 0f3h,0c3h - _x86_64_AES_encrypt ENDP ALIGN 16 @@ -177,80 +176,78 @@ $L$enc_loop_compact:: movzx r10d,al movzx r11d,bl movzx r12d,cl - movzx r10d,BYTE PTR[r10*1+r14] - movzx r11d,BYTE PTR[r11*1+r14] - movzx r12d,BYTE PTR[r12*1+r14] - movzx r8d,dl movzx esi,bh movzx edi,ch + shr ecx,16 + movzx ebp,dh + movzx r10d,BYTE PTR[r10*1+r14] + movzx r11d,BYTE PTR[r11*1+r14] + movzx r12d,BYTE PTR[r12*1+r14] movzx r8d,BYTE PTR[r8*1+r14] - movzx r9d,BYTE PTR[rsi*1+r14] - movzx r13d,BYTE PTR[rdi*1+r14] - movzx ebp,dh + movzx r9d,BYTE PTR[rsi*1+r14] movzx esi,ah - shr ecx,16 + movzx r13d,BYTE PTR[rdi*1+r14] + movzx edi,cl movzx ebp,BYTE PTR[rbp*1+r14] movzx esi,BYTE PTR[rsi*1+r14] - shr edx,16 - movzx edi,cl shl r9d,8 + shr edx,16 shl r13d,8 - movzx edi,BYTE PTR[rdi*1+r14] xor r10d,r9d - xor r11d,r13d - - movzx r9d,dl shr eax,16 + movzx r9d,dl shr ebx,16 - movzx r13d,al + xor r11d,r13d shl ebp,8 - shl esi,8 - movzx r9d,BYTE PTR[r9*1+r14] - movzx r13d,BYTE PTR[r13*1+r14] + movzx r13d,al + movzx edi,BYTE PTR[rdi*1+r14] xor r12d,ebp - xor r8d,esi + shl esi,8 movzx ebp,bl - movzx esi,dh shl edi,16 - movzx ebp,BYTE PTR[rbp*1+r14] - movzx esi,BYTE PTR[rsi*1+r14] + xor r8d,esi + movzx r9d,BYTE PTR[r9*1+r14] + movzx esi,dh + movzx r13d,BYTE PTR[r13*1+r14] xor r10d,edi - movzx edi,ah shr ecx,8 + movzx edi,ah + shl r9d,16 shr ebx,8 + shl r13d,16 + xor r11d,r9d + movzx ebp,BYTE PTR[rbp*1+r14] + movzx esi,BYTE PTR[rsi*1+r14] movzx edi,BYTE PTR[rdi*1+r14] movzx edx,BYTE PTR[rcx*1+r14] movzx ecx,BYTE PTR[rbx*1+r14] - shl r9d,16 - shl r13d,16 + shl ebp,16 - xor r11d,r9d xor r12d,r13d - xor r8d,ebp - shl esi,24 + xor r8d,ebp shl edi,24 - shl edx,24 xor r10d,esi - shl ecx,24 + shl edx,24 xor r11d,edi + shl ecx,24 mov eax,r10d mov ebx,r11d xor ecx,r12d xor edx,r8d cmp r15,QWORD PTR[16+rsp] je $L$enc_compact_done - mov esi,eax - mov edi,ebx - and esi,080808080h - and edi,080808080h - mov r10d,esi - mov r11d,edi + mov r10d,080808080h + mov r11d,080808080h + and r10d,eax + and r11d,ebx + mov esi,r10d + mov edi,r11d shr r10d,7 lea r8d,DWORD PTR[rax*1+rax] shr r11d,7 @@ -268,25 +265,25 @@ $L$enc_loop_compact:: xor eax,r8d xor ebx,r9d - mov esi,ecx - mov edi,edx + mov r12d,080808080h rol eax,24 + mov ebp,080808080h rol ebx,24 - and esi,080808080h - and edi,080808080h + and r12d,ecx + and ebp,edx xor eax,r8d xor ebx,r9d - mov r12d,esi - mov ebp,edi + mov esi,r12d ror r10d,16 + mov edi,ebp ror r11d,16 - shr r12d,7 lea r8d,DWORD PTR[rcx*1+rcx] + shr r12d,7 xor eax,r10d - xor ebx,r11d shr ebp,7 - lea r9d,DWORD PTR[rdx*1+rdx] + xor ebx,r11d ror r10d,8 + lea r9d,DWORD PTR[rdx*1+rdx] ror r11d,8 sub esi,r12d sub edi,ebp @@ -302,23 +299,23 @@ $L$enc_loop_compact:: xor r8d,esi xor r9d,edi + ror r12d,16 xor ecx,r8d + ror ebp,16 xor edx,r9d rol ecx,24 + mov esi,DWORD PTR[r14] rol edx,24 xor ecx,r8d - xor edx,r9d - mov esi,DWORD PTR[r14] - ror r12d,16 - ror ebp,16 mov edi,DWORD PTR[64+r14] - xor ecx,r12d - xor edx,ebp + xor edx,r9d mov r8d,DWORD PTR[128+r14] + xor ecx,r12d ror r12d,8 + xor edx,ebp ror ebp,8 - mov r9d,DWORD PTR[192+r14] xor ecx,r12d + mov r9d,DWORD PTR[192+r14] xor edx,ebp jmp $L$enc_loop_compact ALIGN 16 @@ -328,7 +325,6 @@ $L$enc_compact_done:: xor ecx,DWORD PTR[8+r15] xor edx,DWORD PTR[12+r15] DB 0f3h,0c3h - _x86_64_AES_encrypt_compact ENDP PUBLIC AES_encrypt @@ -563,7 +559,6 @@ $L$dec_loop:: xor ecx,r12d xor edx,r8d DB 0f3h,0c3h - _x86_64_AES_decrypt ENDP ALIGN 16 @@ -589,70 +584,69 @@ $L$dec_loop_compact:: movzx r10d,al movzx r11d,bl movzx r12d,cl - movzx r10d,BYTE PTR[r10*1+r14] - movzx r11d,BYTE PTR[r11*1+r14] - movzx r12d,BYTE PTR[r12*1+r14] - movzx r8d,dl movzx esi,dh movzx edi,ah + shr edx,16 + movzx ebp,bh + movzx r10d,BYTE PTR[r10*1+r14] + movzx r11d,BYTE PTR[r11*1+r14] + movzx r12d,BYTE PTR[r12*1+r14] movzx r8d,BYTE PTR[r8*1+r14] - movzx r9d,BYTE PTR[rsi*1+r14] - movzx r13d,BYTE PTR[rdi*1+r14] - movzx ebp,bh + movzx r9d,BYTE PTR[rsi*1+r14] movzx esi,ch - shr ecx,16 + movzx r13d,BYTE PTR[rdi*1+r14] movzx ebp,BYTE PTR[rbp*1+r14] movzx esi,BYTE PTR[rsi*1+r14] - shr edx,16 - movzx edi,cl - shl r9d,8 + shr ecx,16 shl r13d,8 - movzx edi,BYTE PTR[rdi*1+r14] - xor r10d,r9d - xor r11d,r13d - - movzx r9d,dl + shl r9d,8 + movzx edi,cl shr eax,16 + xor r10d,r9d shr ebx,16 - movzx r13d,al + movzx r9d,dl + shl ebp,8 + xor r11d,r13d shl esi,8 - movzx r9d,BYTE PTR[r9*1+r14] - movzx r13d,BYTE PTR[r13*1+r14] + movzx r13d,al + movzx edi,BYTE PTR[rdi*1+r14] xor r12d,ebp - xor r8d,esi - movzx ebp,bl - movzx esi,bh + shl edi,16 + xor r8d,esi + movzx r9d,BYTE PTR[r9*1+r14] + movzx esi,bh movzx ebp,BYTE PTR[rbp*1+r14] - movzx esi,BYTE PTR[rsi*1+r14] xor r10d,edi - + movzx r13d,BYTE PTR[r13*1+r14] movzx edi,ch + + shl ebp,16 shl r9d,16 shl r13d,16 - movzx ebx,BYTE PTR[rdi*1+r14] + xor r8d,ebp + movzx ebp,dh xor r11d,r9d + shr eax,8 xor r12d,r13d - movzx edi,dh - shr eax,8 - shl ebp,16 - movzx ecx,BYTE PTR[rdi*1+r14] + movzx esi,BYTE PTR[rsi*1+r14] + movzx ebx,BYTE PTR[rdi*1+r14] + movzx ecx,BYTE PTR[rbp*1+r14] movzx edx,BYTE PTR[rax*1+r14] - xor r8d,ebp + mov eax,r10d shl esi,24 shl ebx,24 shl ecx,24 - xor r10d,esi + xor eax,esi shl edx,24 xor ebx,r11d - mov eax,r10d xor ecx,r12d xor edx,r8d cmp r15,QWORD PTR[16+rsp] @@ -665,12 +659,12 @@ $L$dec_loop_compact:: or rax,rbx or rcx,rdx mov rbp,QWORD PTR[((256+16))+r14] - mov rbx,rax - mov rdx,rcx - and rbx,rsi - and rdx,rsi - mov r9,rbx - mov r12,rdx + mov r9,rsi + mov r12,rsi + and r9,rax + and r12,rcx + mov rbx,r9 + mov rdx,r12 shr r9,7 lea r8,QWORD PTR[rax*1+rax] shr r12,7 @@ -681,15 +675,15 @@ $L$dec_loop_compact:: and r11,rdi and rbx,rbp and rdx,rbp - xor rbx,r8 - xor rdx,r11 - mov r8,rbx - mov r11,rdx - - and rbx,rsi - and rdx,rsi - mov r10,rbx - mov r13,rdx + xor r8,rbx + xor r11,rdx + mov r10,rsi + mov r13,rsi + + and r10,r8 + and r13,r11 + mov rbx,r10 + mov rdx,r13 shr r10,7 lea r9,QWORD PTR[r8*1+r8] shr r13,7 @@ -700,15 +694,15 @@ $L$dec_loop_compact:: and r12,rdi and rbx,rbp and rdx,rbp - xor rbx,r9 - xor rdx,r12 - mov r9,rbx - mov r12,rdx - - and rbx,rsi - and rdx,rsi - mov r10,rbx - mov r13,rdx + xor r9,rbx + xor r12,rdx + mov r10,rsi + mov r13,rsi + + and r10,r9 + and r13,r12 + mov rbx,r10 + mov rdx,r13 shr r10,7 xor r8,rax shr r13,7 @@ -733,51 +727,51 @@ $L$dec_loop_compact:: mov rbx,rax mov rdx,rcx xor r9,r10 - xor r12,r13 shr rbx,32 + xor r12,r13 shr rdx,32 xor r10,r8 - xor r13,r11 rol eax,8 + xor r13,r11 rol ecx,8 xor r10,r9 + rol ebx,8 xor r13,r12 - rol ebx,8 rol edx,8 xor eax,r10d - xor ecx,r13d shr r10,32 + xor ecx,r13d shr r13,32 xor ebx,r10d xor edx,r13d mov r10,r8 - mov r13,r11 - shr r10,32 - shr r13,32 rol r8d,24 + mov r13,r11 rol r11d,24 - rol r10d,24 - rol r13d,24 + shr r10,32 xor eax,r8d + shr r13,32 xor ecx,r11d + rol r10d,24 mov r8,r9 + rol r13d,24 mov r11,r12 + shr r8,32 xor ebx,r10d + shr r11,32 xor edx,r13d mov rsi,QWORD PTR[r14] - shr r8,32 - shr r11,32 - mov rdi,QWORD PTR[64+r14] rol r9d,16 + mov rdi,QWORD PTR[64+r14] rol r12d,16 mov rbp,QWORD PTR[128+r14] rol r8d,16 - rol r11d,16 mov r10,QWORD PTR[192+r14] xor eax,r9d + rol r11d,16 xor ecx,r12d mov r13,QWORD PTR[256+r14] xor ebx,r8d @@ -790,7 +784,6 @@ $L$dec_compact_done:: xor ecx,DWORD PTR[8+r15] xor edx,DWORD PTR[12+r15] DB 0f3h,0c3h - _x86_64_AES_decrypt_compact ENDP PUBLIC AES_decrypt @@ -897,10 +890,6 @@ $L$enc_key_prologue:: call _x86_64_AES_set_encrypt_key - mov r15,QWORD PTR[8+rsp] - mov r14,QWORD PTR[16+rsp] - mov r13,QWORD PTR[24+rsp] - mov r12,QWORD PTR[32+rsp] mov rbp,QWORD PTR[40+rsp] mov rbx,QWORD PTR[48+rsp] add rsp,56 @@ -1149,7 +1138,6 @@ $L$badpointer:: mov rax,-1 $L$exit:: DB 0f3h,0c3h - _x86_64_AES_set_encrypt_key ENDP PUBLIC private_AES_set_decrypt_key @@ -1211,12 +1199,12 @@ $L$permute:: lea r15,QWORD PTR[16+r15] mov rax,QWORD PTR[r15] mov rcx,QWORD PTR[8+r15] - mov rbx,rax - mov rdx,rcx - and rbx,rsi - and rdx,rsi - mov r9,rbx - mov r12,rdx + mov r9,rsi + mov r12,rsi + and r9,rax + and r12,rcx + mov rbx,r9 + mov rdx,r12 shr r9,7 lea r8,QWORD PTR[rax*1+rax] shr r12,7 @@ -1227,15 +1215,15 @@ $L$permute:: and r11,rdi and rbx,rbp and rdx,rbp - xor rbx,r8 - xor rdx,r11 - mov r8,rbx - mov r11,rdx - - and rbx,rsi - and rdx,rsi - mov r10,rbx - mov r13,rdx + xor r8,rbx + xor r11,rdx + mov r10,rsi + mov r13,rsi + + and r10,r8 + and r13,r11 + mov rbx,r10 + mov rdx,r13 shr r10,7 lea r9,QWORD PTR[r8*1+r8] shr r13,7 @@ -1246,15 +1234,15 @@ $L$permute:: and r12,rdi and rbx,rbp and rdx,rbp - xor rbx,r9 - xor rdx,r12 - mov r9,rbx - mov r12,rdx - - and rbx,rsi - and rdx,rsi - mov r10,rbx - mov r13,rdx + xor r9,rbx + xor r12,rdx + mov r10,rsi + mov r13,rsi + + and r10,r9 + and r13,r12 + mov rbx,r10 + mov rdx,r13 shr r10,7 xor r8,rax shr r13,7 @@ -1279,51 +1267,51 @@ $L$permute:: mov rbx,rax mov rdx,rcx xor r9,r10 - xor r12,r13 shr rbx,32 + xor r12,r13 shr rdx,32 xor r10,r8 - xor r13,r11 rol eax,8 + xor r13,r11 rol ecx,8 xor r10,r9 + rol ebx,8 xor r13,r12 - rol ebx,8 rol edx,8 xor eax,r10d - xor ecx,r13d shr r10,32 + xor ecx,r13d shr r13,32 xor ebx,r10d xor edx,r13d mov r10,r8 - mov r13,r11 - shr r10,32 - shr r13,32 rol r8d,24 + mov r13,r11 rol r11d,24 - rol r10d,24 - rol r13d,24 + shr r10,32 xor eax,r8d + shr r13,32 xor ecx,r11d + rol r10d,24 mov r8,r9 + rol r13d,24 mov r11,r12 + shr r8,32 xor ebx,r10d + shr r11,32 xor edx,r13d - shr r8,32 - shr r11,32 - rol r9d,16 + rol r12d,16 rol r8d,16 - rol r11d,16 xor eax,r9d + rol r11d,16 xor ecx,r12d xor ebx,r8d @@ -1455,7 +1443,6 @@ $L$cbc_do_ecopy:: lea r15,QWORD PTR[80+rsp] mov ecx,240/8 DD 090A548F3h - mov DWORD PTR[rdi],eax $L$cbc_skip_ecopy:: mov QWORD PTR[rsp],r15 @@ -1619,7 +1606,6 @@ $L$cbc_fast_cleanup:: xor rax,rax DD 090AB48F3h - jmp $L$cbc_exit @@ -1675,7 +1661,6 @@ $L$cbc_slow_body:: mov edx,DWORD PTR[12+rbp] jz $L$cbc_slow_enc_tail - ALIGN 4 $L$cbc_slow_enc_loop:: xor eax,DWORD PTR[r8] @@ -1720,19 +1705,16 @@ $L$cbc_slow_enc_tail:: mov rsi,r8 mov rdi,r9 DD 09066A4F3h - mov rcx,16 sub rcx,r10 xor rax,rax DD 09066AAF3h - mov r8,r9 mov r10,16 mov rax,r11 mov rcx,r12 jmp $L$cbc_slow_enc_loop - ALIGN 16 $L$SLOW_DECRYPT:: shr rax,3 @@ -1808,7 +1790,6 @@ $L$cbc_slow_dec_partial:: lea rsi,QWORD PTR[64+rsp] lea rcx,QWORD PTR[16+r10] DD 09066A4F3h - jmp $L$cbc_exit ALIGN 16 @@ -2811,7 +2792,6 @@ $L$common_seh_exit:: mov ecx,154 DD 0a548f3fch - mov rsi,r9 xor rcx,rcx mov rdx,QWORD PTR[8+rsi] @@ -2870,22 +2850,18 @@ $L$SEH_info_AES_encrypt:: DB 9,0,0,0 DD imagerel block_se_handler DD imagerel $L$enc_prologue,imagerel $L$enc_epilogue - $L$SEH_info_AES_decrypt:: DB 9,0,0,0 DD imagerel block_se_handler DD imagerel $L$dec_prologue,imagerel $L$dec_epilogue - $L$SEH_info_private_AES_set_encrypt_key:: DB 9,0,0,0 DD imagerel key_se_handler DD imagerel $L$enc_key_prologue,imagerel $L$enc_key_epilogue - $L$SEH_info_private_AES_set_decrypt_key:: DB 9,0,0,0 DD imagerel key_se_handler DD imagerel $L$dec_key_prologue,imagerel $L$dec_key_epilogue - $L$SEH_info_AES_cbc_encrypt:: DB 9,0,0,0 DD imagerel cbc_se_handler diff --git a/deps/openssl/asm/x64-win32-masm/aes/aesni-mb-x86_64.asm b/deps/openssl/asm/x64-win32-masm/aes/aesni-mb-x86_64.asm new file mode 100644 index 00000000000000..f482b8566399e2 --- /dev/null +++ b/deps/openssl/asm/x64-win32-masm/aes/aesni-mb-x86_64.asm @@ -0,0 +1,1698 @@ +OPTION DOTNAME +.text$ SEGMENT ALIGN(256) 'CODE' + +EXTERN OPENSSL_ia32cap_P:NEAR + +PUBLIC aesni_multi_cbc_encrypt + +ALIGN 32 +aesni_multi_cbc_encrypt PROC PUBLIC + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_aesni_multi_cbc_encrypt:: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + + + cmp edx,2 + jb $L$enc_non_avx + mov ecx,DWORD PTR[((OPENSSL_ia32cap_P+4))] + test ecx,268435456 + jnz _avx_cbc_enc_shortcut + jmp $L$enc_non_avx +ALIGN 16 +$L$enc_non_avx:: + mov rax,rsp + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + lea rsp,QWORD PTR[((-168))+rsp] + movaps XMMWORD PTR[rsp],xmm6 + movaps XMMWORD PTR[16+rsp],xmm7 + movaps XMMWORD PTR[32+rsp],xmm8 + movaps XMMWORD PTR[48+rsp],xmm9 + movaps XMMWORD PTR[64+rsp],xmm10 + movaps XMMWORD PTR[80+rsp],xmm11 + movaps XMMWORD PTR[96+rsp],xmm12 + movaps XMMWORD PTR[(-104)+rax],xmm13 + movaps XMMWORD PTR[(-88)+rax],xmm14 + movaps XMMWORD PTR[(-72)+rax],xmm15 + + + + + + + sub rsp,48 + and rsp,-64 + mov QWORD PTR[16+rsp],rax + +$L$enc4x_body:: + movdqu xmm12,XMMWORD PTR[rsi] + lea rsi,QWORD PTR[120+rsi] + lea rdi,QWORD PTR[80+rdi] + +$L$enc4x_loop_grande:: + mov DWORD PTR[24+rsp],edx + xor edx,edx + mov ecx,DWORD PTR[((-64))+rdi] + mov r8,QWORD PTR[((-80))+rdi] + cmp ecx,edx + mov r12,QWORD PTR[((-72))+rdi] + cmovg edx,ecx + test ecx,ecx + movdqu xmm2,XMMWORD PTR[((-56))+rdi] + mov DWORD PTR[32+rsp],ecx + cmovle r8,rsp + mov ecx,DWORD PTR[((-24))+rdi] + mov r9,QWORD PTR[((-40))+rdi] + cmp ecx,edx + mov r13,QWORD PTR[((-32))+rdi] + cmovg edx,ecx + test ecx,ecx + movdqu xmm3,XMMWORD PTR[((-16))+rdi] + mov DWORD PTR[36+rsp],ecx + cmovle r9,rsp + mov ecx,DWORD PTR[16+rdi] + mov r10,QWORD PTR[rdi] + cmp ecx,edx + mov r14,QWORD PTR[8+rdi] + cmovg edx,ecx + test ecx,ecx + movdqu xmm4,XMMWORD PTR[24+rdi] + mov DWORD PTR[40+rsp],ecx + cmovle r10,rsp + mov ecx,DWORD PTR[56+rdi] + mov r11,QWORD PTR[40+rdi] + cmp ecx,edx + mov r15,QWORD PTR[48+rdi] + cmovg edx,ecx + test ecx,ecx + movdqu xmm5,XMMWORD PTR[64+rdi] + mov DWORD PTR[44+rsp],ecx + cmovle r11,rsp + test edx,edx + jz $L$enc4x_done + + movups xmm1,XMMWORD PTR[((16-120))+rsi] + pxor xmm2,xmm12 + movups xmm0,XMMWORD PTR[((32-120))+rsi] + pxor xmm3,xmm12 + mov eax,DWORD PTR[((240-120))+rsi] + pxor xmm4,xmm12 + movdqu xmm6,XMMWORD PTR[r8] + pxor xmm5,xmm12 + movdqu xmm7,XMMWORD PTR[r9] + pxor xmm2,xmm6 + movdqu xmm8,XMMWORD PTR[r10] + pxor xmm3,xmm7 + movdqu xmm9,XMMWORD PTR[r11] + pxor xmm4,xmm8 + pxor xmm5,xmm9 + movdqa xmm10,XMMWORD PTR[32+rsp] + xor rbx,rbx + jmp $L$oop_enc4x + +ALIGN 32 +$L$oop_enc4x:: + add rbx,16 + lea rbp,QWORD PTR[16+rsp] + mov ecx,1 + sub rbp,rbx + +DB 102,15,56,220,209 + prefetcht0 [31+rbx*1+r8] + prefetcht0 [31+rbx*1+r9] +DB 102,15,56,220,217 + prefetcht0 [31+rbx*1+r10] + prefetcht0 [31+rbx*1+r10] +DB 102,15,56,220,225 +DB 102,15,56,220,233 + movups xmm1,XMMWORD PTR[((48-120))+rsi] + cmp ecx,DWORD PTR[32+rsp] +DB 102,15,56,220,208 +DB 102,15,56,220,216 +DB 102,15,56,220,224 + cmovge r8,rbp + cmovg r12,rbp +DB 102,15,56,220,232 + movups xmm0,XMMWORD PTR[((-56))+rsi] + cmp ecx,DWORD PTR[36+rsp] +DB 102,15,56,220,209 +DB 102,15,56,220,217 +DB 102,15,56,220,225 + cmovge r9,rbp + cmovg r13,rbp +DB 102,15,56,220,233 + movups xmm1,XMMWORD PTR[((-40))+rsi] + cmp ecx,DWORD PTR[40+rsp] +DB 102,15,56,220,208 +DB 102,15,56,220,216 +DB 102,15,56,220,224 + cmovge r10,rbp + cmovg r14,rbp +DB 102,15,56,220,232 + movups xmm0,XMMWORD PTR[((-24))+rsi] + cmp ecx,DWORD PTR[44+rsp] +DB 102,15,56,220,209 +DB 102,15,56,220,217 +DB 102,15,56,220,225 + cmovge r11,rbp + cmovg r15,rbp +DB 102,15,56,220,233 + movups xmm1,XMMWORD PTR[((-8))+rsi] + movdqa xmm11,xmm10 +DB 102,15,56,220,208 + prefetcht0 [15+rbx*1+r12] + prefetcht0 [15+rbx*1+r13] +DB 102,15,56,220,216 + prefetcht0 [15+rbx*1+r14] + prefetcht0 [15+rbx*1+r15] +DB 102,15,56,220,224 +DB 102,15,56,220,232 + movups xmm0,XMMWORD PTR[((128-120))+rsi] + pxor xmm12,xmm12 + +DB 102,15,56,220,209 + pcmpgtd xmm11,xmm12 + movdqu xmm12,XMMWORD PTR[((-120))+rsi] +DB 102,15,56,220,217 + paddd xmm10,xmm11 + movdqa XMMWORD PTR[32+rsp],xmm10 +DB 102,15,56,220,225 +DB 102,15,56,220,233 + movups xmm1,XMMWORD PTR[((144-120))+rsi] + + cmp eax,11 + +DB 102,15,56,220,208 +DB 102,15,56,220,216 +DB 102,15,56,220,224 +DB 102,15,56,220,232 + movups xmm0,XMMWORD PTR[((160-120))+rsi] + + jb $L$enc4x_tail + +DB 102,15,56,220,209 +DB 102,15,56,220,217 +DB 102,15,56,220,225 +DB 102,15,56,220,233 + movups xmm1,XMMWORD PTR[((176-120))+rsi] + +DB 102,15,56,220,208 +DB 102,15,56,220,216 +DB 102,15,56,220,224 +DB 102,15,56,220,232 + movups xmm0,XMMWORD PTR[((192-120))+rsi] + + je $L$enc4x_tail + +DB 102,15,56,220,209 +DB 102,15,56,220,217 +DB 102,15,56,220,225 +DB 102,15,56,220,233 + movups xmm1,XMMWORD PTR[((208-120))+rsi] + +DB 102,15,56,220,208 +DB 102,15,56,220,216 +DB 102,15,56,220,224 +DB 102,15,56,220,232 + movups xmm0,XMMWORD PTR[((224-120))+rsi] + jmp $L$enc4x_tail + +ALIGN 32 +$L$enc4x_tail:: +DB 102,15,56,220,209 +DB 102,15,56,220,217 +DB 102,15,56,220,225 +DB 102,15,56,220,233 + movdqu xmm6,XMMWORD PTR[rbx*1+r8] + movdqu xmm1,XMMWORD PTR[((16-120))+rsi] + +DB 102,15,56,221,208 + movdqu xmm7,XMMWORD PTR[rbx*1+r9] + pxor xmm6,xmm12 +DB 102,15,56,221,216 + movdqu xmm8,XMMWORD PTR[rbx*1+r10] + pxor xmm7,xmm12 +DB 102,15,56,221,224 + movdqu xmm9,XMMWORD PTR[rbx*1+r11] + pxor xmm8,xmm12 +DB 102,15,56,221,232 + movdqu xmm0,XMMWORD PTR[((32-120))+rsi] + pxor xmm9,xmm12 + + movups XMMWORD PTR[(-16)+rbx*1+r12],xmm2 + pxor xmm2,xmm6 + movups XMMWORD PTR[(-16)+rbx*1+r13],xmm3 + pxor xmm3,xmm7 + movups XMMWORD PTR[(-16)+rbx*1+r14],xmm4 + pxor xmm4,xmm8 + movups XMMWORD PTR[(-16)+rbx*1+r15],xmm5 + pxor xmm5,xmm9 + + dec edx + jnz $L$oop_enc4x + + mov rax,QWORD PTR[16+rsp] + mov edx,DWORD PTR[24+rsp] + + + + + + + + + + + lea rdi,QWORD PTR[160+rdi] + dec edx + jnz $L$enc4x_loop_grande + +$L$enc4x_done:: + movaps xmm6,XMMWORD PTR[((-216))+rax] + movaps xmm7,XMMWORD PTR[((-200))+rax] + movaps xmm8,XMMWORD PTR[((-184))+rax] + movaps xmm9,XMMWORD PTR[((-168))+rax] + movaps xmm10,XMMWORD PTR[((-152))+rax] + movaps xmm11,XMMWORD PTR[((-136))+rax] + movaps xmm12,XMMWORD PTR[((-120))+rax] + + + + mov r15,QWORD PTR[((-48))+rax] + mov r14,QWORD PTR[((-40))+rax] + mov r13,QWORD PTR[((-32))+rax] + mov r12,QWORD PTR[((-24))+rax] + mov rbp,QWORD PTR[((-16))+rax] + mov rbx,QWORD PTR[((-8))+rax] + lea rsp,QWORD PTR[rax] +$L$enc4x_epilogue:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_aesni_multi_cbc_encrypt:: +aesni_multi_cbc_encrypt ENDP + +PUBLIC aesni_multi_cbc_decrypt + +ALIGN 32 +aesni_multi_cbc_decrypt PROC PUBLIC + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_aesni_multi_cbc_decrypt:: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + + + cmp edx,2 + jb $L$dec_non_avx + mov ecx,DWORD PTR[((OPENSSL_ia32cap_P+4))] + test ecx,268435456 + jnz _avx_cbc_dec_shortcut + jmp $L$dec_non_avx +ALIGN 16 +$L$dec_non_avx:: + mov rax,rsp + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + lea rsp,QWORD PTR[((-168))+rsp] + movaps XMMWORD PTR[rsp],xmm6 + movaps XMMWORD PTR[16+rsp],xmm7 + movaps XMMWORD PTR[32+rsp],xmm8 + movaps XMMWORD PTR[48+rsp],xmm9 + movaps XMMWORD PTR[64+rsp],xmm10 + movaps XMMWORD PTR[80+rsp],xmm11 + movaps XMMWORD PTR[96+rsp],xmm12 + movaps XMMWORD PTR[(-104)+rax],xmm13 + movaps XMMWORD PTR[(-88)+rax],xmm14 + movaps XMMWORD PTR[(-72)+rax],xmm15 + + + + + + + sub rsp,48 + and rsp,-64 + mov QWORD PTR[16+rsp],rax + +$L$dec4x_body:: + movdqu xmm12,XMMWORD PTR[rsi] + lea rsi,QWORD PTR[120+rsi] + lea rdi,QWORD PTR[80+rdi] + +$L$dec4x_loop_grande:: + mov DWORD PTR[24+rsp],edx + xor edx,edx + mov ecx,DWORD PTR[((-64))+rdi] + mov r8,QWORD PTR[((-80))+rdi] + cmp ecx,edx + mov r12,QWORD PTR[((-72))+rdi] + cmovg edx,ecx + test ecx,ecx + movdqu xmm6,XMMWORD PTR[((-56))+rdi] + mov DWORD PTR[32+rsp],ecx + cmovle r8,rsp + mov ecx,DWORD PTR[((-24))+rdi] + mov r9,QWORD PTR[((-40))+rdi] + cmp ecx,edx + mov r13,QWORD PTR[((-32))+rdi] + cmovg edx,ecx + test ecx,ecx + movdqu xmm7,XMMWORD PTR[((-16))+rdi] + mov DWORD PTR[36+rsp],ecx + cmovle r9,rsp + mov ecx,DWORD PTR[16+rdi] + mov r10,QWORD PTR[rdi] + cmp ecx,edx + mov r14,QWORD PTR[8+rdi] + cmovg edx,ecx + test ecx,ecx + movdqu xmm8,XMMWORD PTR[24+rdi] + mov DWORD PTR[40+rsp],ecx + cmovle r10,rsp + mov ecx,DWORD PTR[56+rdi] + mov r11,QWORD PTR[40+rdi] + cmp ecx,edx + mov r15,QWORD PTR[48+rdi] + cmovg edx,ecx + test ecx,ecx + movdqu xmm9,XMMWORD PTR[64+rdi] + mov DWORD PTR[44+rsp],ecx + cmovle r11,rsp + test edx,edx + jz $L$dec4x_done + + movups xmm1,XMMWORD PTR[((16-120))+rsi] + movups xmm0,XMMWORD PTR[((32-120))+rsi] + mov eax,DWORD PTR[((240-120))+rsi] + movdqu xmm2,XMMWORD PTR[r8] + movdqu xmm3,XMMWORD PTR[r9] + pxor xmm2,xmm12 + movdqu xmm4,XMMWORD PTR[r10] + pxor xmm3,xmm12 + movdqu xmm5,XMMWORD PTR[r11] + pxor xmm4,xmm12 + pxor xmm5,xmm12 + movdqa xmm10,XMMWORD PTR[32+rsp] + xor rbx,rbx + jmp $L$oop_dec4x + +ALIGN 32 +$L$oop_dec4x:: + add rbx,16 + lea rbp,QWORD PTR[16+rsp] + mov ecx,1 + sub rbp,rbx + +DB 102,15,56,222,209 + prefetcht0 [31+rbx*1+r8] + prefetcht0 [31+rbx*1+r9] +DB 102,15,56,222,217 + prefetcht0 [31+rbx*1+r10] + prefetcht0 [31+rbx*1+r11] +DB 102,15,56,222,225 +DB 102,15,56,222,233 + movups xmm1,XMMWORD PTR[((48-120))+rsi] + cmp ecx,DWORD PTR[32+rsp] +DB 102,15,56,222,208 +DB 102,15,56,222,216 +DB 102,15,56,222,224 + cmovge r8,rbp + cmovg r12,rbp +DB 102,15,56,222,232 + movups xmm0,XMMWORD PTR[((-56))+rsi] + cmp ecx,DWORD PTR[36+rsp] +DB 102,15,56,222,209 +DB 102,15,56,222,217 +DB 102,15,56,222,225 + cmovge r9,rbp + cmovg r13,rbp +DB 102,15,56,222,233 + movups xmm1,XMMWORD PTR[((-40))+rsi] + cmp ecx,DWORD PTR[40+rsp] +DB 102,15,56,222,208 +DB 102,15,56,222,216 +DB 102,15,56,222,224 + cmovge r10,rbp + cmovg r14,rbp +DB 102,15,56,222,232 + movups xmm0,XMMWORD PTR[((-24))+rsi] + cmp ecx,DWORD PTR[44+rsp] +DB 102,15,56,222,209 +DB 102,15,56,222,217 +DB 102,15,56,222,225 + cmovge r11,rbp + cmovg r15,rbp +DB 102,15,56,222,233 + movups xmm1,XMMWORD PTR[((-8))+rsi] + movdqa xmm11,xmm10 +DB 102,15,56,222,208 + prefetcht0 [15+rbx*1+r12] + prefetcht0 [15+rbx*1+r13] +DB 102,15,56,222,216 + prefetcht0 [15+rbx*1+r14] + prefetcht0 [15+rbx*1+r15] +DB 102,15,56,222,224 +DB 102,15,56,222,232 + movups xmm0,XMMWORD PTR[((128-120))+rsi] + pxor xmm12,xmm12 + +DB 102,15,56,222,209 + pcmpgtd xmm11,xmm12 + movdqu xmm12,XMMWORD PTR[((-120))+rsi] +DB 102,15,56,222,217 + paddd xmm10,xmm11 + movdqa XMMWORD PTR[32+rsp],xmm10 +DB 102,15,56,222,225 +DB 102,15,56,222,233 + movups xmm1,XMMWORD PTR[((144-120))+rsi] + + cmp eax,11 + +DB 102,15,56,222,208 +DB 102,15,56,222,216 +DB 102,15,56,222,224 +DB 102,15,56,222,232 + movups xmm0,XMMWORD PTR[((160-120))+rsi] + + jb $L$dec4x_tail + +DB 102,15,56,222,209 +DB 102,15,56,222,217 +DB 102,15,56,222,225 +DB 102,15,56,222,233 + movups xmm1,XMMWORD PTR[((176-120))+rsi] + +DB 102,15,56,222,208 +DB 102,15,56,222,216 +DB 102,15,56,222,224 +DB 102,15,56,222,232 + movups xmm0,XMMWORD PTR[((192-120))+rsi] + + je $L$dec4x_tail + +DB 102,15,56,222,209 +DB 102,15,56,222,217 +DB 102,15,56,222,225 +DB 102,15,56,222,233 + movups xmm1,XMMWORD PTR[((208-120))+rsi] + +DB 102,15,56,222,208 +DB 102,15,56,222,216 +DB 102,15,56,222,224 +DB 102,15,56,222,232 + movups xmm0,XMMWORD PTR[((224-120))+rsi] + jmp $L$dec4x_tail + +ALIGN 32 +$L$dec4x_tail:: +DB 102,15,56,222,209 +DB 102,15,56,222,217 +DB 102,15,56,222,225 + pxor xmm6,xmm0 + pxor xmm7,xmm0 +DB 102,15,56,222,233 + movdqu xmm1,XMMWORD PTR[((16-120))+rsi] + pxor xmm8,xmm0 + pxor xmm9,xmm0 + movdqu xmm0,XMMWORD PTR[((32-120))+rsi] + +DB 102,15,56,223,214 +DB 102,15,56,223,223 + movdqu xmm6,XMMWORD PTR[((-16))+rbx*1+r8] + movdqu xmm7,XMMWORD PTR[((-16))+rbx*1+r9] +DB 102,65,15,56,223,224 +DB 102,65,15,56,223,233 + movdqu xmm8,XMMWORD PTR[((-16))+rbx*1+r10] + movdqu xmm9,XMMWORD PTR[((-16))+rbx*1+r11] + + movups XMMWORD PTR[(-16)+rbx*1+r12],xmm2 + movdqu xmm2,XMMWORD PTR[rbx*1+r8] + movups XMMWORD PTR[(-16)+rbx*1+r13],xmm3 + movdqu xmm3,XMMWORD PTR[rbx*1+r9] + pxor xmm2,xmm12 + movups XMMWORD PTR[(-16)+rbx*1+r14],xmm4 + movdqu xmm4,XMMWORD PTR[rbx*1+r10] + pxor xmm3,xmm12 + movups XMMWORD PTR[(-16)+rbx*1+r15],xmm5 + movdqu xmm5,XMMWORD PTR[rbx*1+r11] + pxor xmm4,xmm12 + pxor xmm5,xmm12 + + dec edx + jnz $L$oop_dec4x + + mov rax,QWORD PTR[16+rsp] + mov edx,DWORD PTR[24+rsp] + + lea rdi,QWORD PTR[160+rdi] + dec edx + jnz $L$dec4x_loop_grande + +$L$dec4x_done:: + movaps xmm6,XMMWORD PTR[((-216))+rax] + movaps xmm7,XMMWORD PTR[((-200))+rax] + movaps xmm8,XMMWORD PTR[((-184))+rax] + movaps xmm9,XMMWORD PTR[((-168))+rax] + movaps xmm10,XMMWORD PTR[((-152))+rax] + movaps xmm11,XMMWORD PTR[((-136))+rax] + movaps xmm12,XMMWORD PTR[((-120))+rax] + + + + mov r15,QWORD PTR[((-48))+rax] + mov r14,QWORD PTR[((-40))+rax] + mov r13,QWORD PTR[((-32))+rax] + mov r12,QWORD PTR[((-24))+rax] + mov rbp,QWORD PTR[((-16))+rax] + mov rbx,QWORD PTR[((-8))+rax] + lea rsp,QWORD PTR[rax] +$L$dec4x_epilogue:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_aesni_multi_cbc_decrypt:: +aesni_multi_cbc_decrypt ENDP + +ALIGN 32 +aesni_multi_cbc_encrypt_avx PROC PRIVATE + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_aesni_multi_cbc_encrypt_avx:: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + + +_avx_cbc_enc_shortcut:: + mov rax,rsp + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + lea rsp,QWORD PTR[((-168))+rsp] + movaps XMMWORD PTR[rsp],xmm6 + movaps XMMWORD PTR[16+rsp],xmm7 + movaps XMMWORD PTR[32+rsp],xmm8 + movaps XMMWORD PTR[48+rsp],xmm9 + movaps XMMWORD PTR[64+rsp],xmm10 + movaps XMMWORD PTR[80+rsp],xmm11 + movaps XMMWORD PTR[(-120)+rax],xmm12 + movaps XMMWORD PTR[(-104)+rax],xmm13 + movaps XMMWORD PTR[(-88)+rax],xmm14 + movaps XMMWORD PTR[(-72)+rax],xmm15 + + + + + + + + + sub rsp,192 + and rsp,-128 + mov QWORD PTR[16+rsp],rax + +$L$enc8x_body:: + vzeroupper + vmovdqu xmm15,XMMWORD PTR[rsi] + lea rsi,QWORD PTR[120+rsi] + lea rdi,QWORD PTR[160+rdi] + shr edx,1 + +$L$enc8x_loop_grande:: + + xor edx,edx + mov ecx,DWORD PTR[((-144))+rdi] + mov r8,QWORD PTR[((-160))+rdi] + cmp ecx,edx + mov rbx,QWORD PTR[((-152))+rdi] + cmovg edx,ecx + test ecx,ecx + vmovdqu xmm2,XMMWORD PTR[((-136))+rdi] + mov DWORD PTR[32+rsp],ecx + cmovle r8,rsp + sub rbx,r8 + mov QWORD PTR[64+rsp],rbx + mov ecx,DWORD PTR[((-104))+rdi] + mov r9,QWORD PTR[((-120))+rdi] + cmp ecx,edx + mov rbp,QWORD PTR[((-112))+rdi] + cmovg edx,ecx + test ecx,ecx + vmovdqu xmm3,XMMWORD PTR[((-96))+rdi] + mov DWORD PTR[36+rsp],ecx + cmovle r9,rsp + sub rbp,r9 + mov QWORD PTR[72+rsp],rbp + mov ecx,DWORD PTR[((-64))+rdi] + mov r10,QWORD PTR[((-80))+rdi] + cmp ecx,edx + mov rbp,QWORD PTR[((-72))+rdi] + cmovg edx,ecx + test ecx,ecx + vmovdqu xmm4,XMMWORD PTR[((-56))+rdi] + mov DWORD PTR[40+rsp],ecx + cmovle r10,rsp + sub rbp,r10 + mov QWORD PTR[80+rsp],rbp + mov ecx,DWORD PTR[((-24))+rdi] + mov r11,QWORD PTR[((-40))+rdi] + cmp ecx,edx + mov rbp,QWORD PTR[((-32))+rdi] + cmovg edx,ecx + test ecx,ecx + vmovdqu xmm5,XMMWORD PTR[((-16))+rdi] + mov DWORD PTR[44+rsp],ecx + cmovle r11,rsp + sub rbp,r11 + mov QWORD PTR[88+rsp],rbp + mov ecx,DWORD PTR[16+rdi] + mov r12,QWORD PTR[rdi] + cmp ecx,edx + mov rbp,QWORD PTR[8+rdi] + cmovg edx,ecx + test ecx,ecx + vmovdqu xmm6,XMMWORD PTR[24+rdi] + mov DWORD PTR[48+rsp],ecx + cmovle r12,rsp + sub rbp,r12 + mov QWORD PTR[96+rsp],rbp + mov ecx,DWORD PTR[56+rdi] + mov r13,QWORD PTR[40+rdi] + cmp ecx,edx + mov rbp,QWORD PTR[48+rdi] + cmovg edx,ecx + test ecx,ecx + vmovdqu xmm7,XMMWORD PTR[64+rdi] + mov DWORD PTR[52+rsp],ecx + cmovle r13,rsp + sub rbp,r13 + mov QWORD PTR[104+rsp],rbp + mov ecx,DWORD PTR[96+rdi] + mov r14,QWORD PTR[80+rdi] + cmp ecx,edx + mov rbp,QWORD PTR[88+rdi] + cmovg edx,ecx + test ecx,ecx + vmovdqu xmm8,XMMWORD PTR[104+rdi] + mov DWORD PTR[56+rsp],ecx + cmovle r14,rsp + sub rbp,r14 + mov QWORD PTR[112+rsp],rbp + mov ecx,DWORD PTR[136+rdi] + mov r15,QWORD PTR[120+rdi] + cmp ecx,edx + mov rbp,QWORD PTR[128+rdi] + cmovg edx,ecx + test ecx,ecx + vmovdqu xmm9,XMMWORD PTR[144+rdi] + mov DWORD PTR[60+rsp],ecx + cmovle r15,rsp + sub rbp,r15 + mov QWORD PTR[120+rsp],rbp + test edx,edx + jz $L$enc8x_done + + vmovups xmm1,XMMWORD PTR[((16-120))+rsi] + vmovups xmm0,XMMWORD PTR[((32-120))+rsi] + mov eax,DWORD PTR[((240-120))+rsi] + + vpxor xmm10,xmm15,XMMWORD PTR[r8] + lea rbp,QWORD PTR[128+rsp] + vpxor xmm11,xmm15,XMMWORD PTR[r9] + vpxor xmm12,xmm15,XMMWORD PTR[r10] + vpxor xmm13,xmm15,XMMWORD PTR[r11] + vpxor xmm2,xmm2,xmm10 + vpxor xmm10,xmm15,XMMWORD PTR[r12] + vpxor xmm3,xmm3,xmm11 + vpxor xmm11,xmm15,XMMWORD PTR[r13] + vpxor xmm4,xmm4,xmm12 + vpxor xmm12,xmm15,XMMWORD PTR[r14] + vpxor xmm5,xmm5,xmm13 + vpxor xmm13,xmm15,XMMWORD PTR[r15] + vpxor xmm6,xmm6,xmm10 + mov ecx,1 + vpxor xmm7,xmm7,xmm11 + vpxor xmm8,xmm8,xmm12 + vpxor xmm9,xmm9,xmm13 + jmp $L$oop_enc8x + +ALIGN 32 +$L$oop_enc8x:: + vaesenc xmm2,xmm2,xmm1 + cmp ecx,DWORD PTR[((32+0))+rsp] + vaesenc xmm3,xmm3,xmm1 + prefetcht0 [31+r8] + vaesenc xmm4,xmm4,xmm1 + vaesenc xmm5,xmm5,xmm1 + lea rbx,QWORD PTR[rbx*1+r8] + cmovge r8,rsp + vaesenc xmm6,xmm6,xmm1 + cmovg rbx,rsp + vaesenc xmm7,xmm7,xmm1 + sub rbx,r8 + vaesenc xmm8,xmm8,xmm1 + vpxor xmm10,xmm15,XMMWORD PTR[16+r8] + mov QWORD PTR[((64+0))+rsp],rbx + vaesenc xmm9,xmm9,xmm1 + vmovups xmm1,XMMWORD PTR[((-72))+rsi] + lea r8,QWORD PTR[16+rbx*1+r8] + vmovdqu XMMWORD PTR[rbp],xmm10 + vaesenc xmm2,xmm2,xmm0 + cmp ecx,DWORD PTR[((32+4))+rsp] + mov rbx,QWORD PTR[((64+8))+rsp] + vaesenc xmm3,xmm3,xmm0 + prefetcht0 [31+r9] + vaesenc xmm4,xmm4,xmm0 + vaesenc xmm5,xmm5,xmm0 + lea rbx,QWORD PTR[rbx*1+r9] + cmovge r9,rsp + vaesenc xmm6,xmm6,xmm0 + cmovg rbx,rsp + vaesenc xmm7,xmm7,xmm0 + sub rbx,r9 + vaesenc xmm8,xmm8,xmm0 + vpxor xmm11,xmm15,XMMWORD PTR[16+r9] + mov QWORD PTR[((64+8))+rsp],rbx + vaesenc xmm9,xmm9,xmm0 + vmovups xmm0,XMMWORD PTR[((-56))+rsi] + lea r9,QWORD PTR[16+rbx*1+r9] + vmovdqu XMMWORD PTR[16+rbp],xmm11 + vaesenc xmm2,xmm2,xmm1 + cmp ecx,DWORD PTR[((32+8))+rsp] + mov rbx,QWORD PTR[((64+16))+rsp] + vaesenc xmm3,xmm3,xmm1 + prefetcht0 [31+r10] + vaesenc xmm4,xmm4,xmm1 + prefetcht0 [15+r8] + vaesenc xmm5,xmm5,xmm1 + lea rbx,QWORD PTR[rbx*1+r10] + cmovge r10,rsp + vaesenc xmm6,xmm6,xmm1 + cmovg rbx,rsp + vaesenc xmm7,xmm7,xmm1 + sub rbx,r10 + vaesenc xmm8,xmm8,xmm1 + vpxor xmm12,xmm15,XMMWORD PTR[16+r10] + mov QWORD PTR[((64+16))+rsp],rbx + vaesenc xmm9,xmm9,xmm1 + vmovups xmm1,XMMWORD PTR[((-40))+rsi] + lea r10,QWORD PTR[16+rbx*1+r10] + vmovdqu XMMWORD PTR[32+rbp],xmm12 + vaesenc xmm2,xmm2,xmm0 + cmp ecx,DWORD PTR[((32+12))+rsp] + mov rbx,QWORD PTR[((64+24))+rsp] + vaesenc xmm3,xmm3,xmm0 + prefetcht0 [31+r11] + vaesenc xmm4,xmm4,xmm0 + prefetcht0 [15+r9] + vaesenc xmm5,xmm5,xmm0 + lea rbx,QWORD PTR[rbx*1+r11] + cmovge r11,rsp + vaesenc xmm6,xmm6,xmm0 + cmovg rbx,rsp + vaesenc xmm7,xmm7,xmm0 + sub rbx,r11 + vaesenc xmm8,xmm8,xmm0 + vpxor xmm13,xmm15,XMMWORD PTR[16+r11] + mov QWORD PTR[((64+24))+rsp],rbx + vaesenc xmm9,xmm9,xmm0 + vmovups xmm0,XMMWORD PTR[((-24))+rsi] + lea r11,QWORD PTR[16+rbx*1+r11] + vmovdqu XMMWORD PTR[48+rbp],xmm13 + vaesenc xmm2,xmm2,xmm1 + cmp ecx,DWORD PTR[((32+16))+rsp] + mov rbx,QWORD PTR[((64+32))+rsp] + vaesenc xmm3,xmm3,xmm1 + prefetcht0 [31+r12] + vaesenc xmm4,xmm4,xmm1 + prefetcht0 [15+r10] + vaesenc xmm5,xmm5,xmm1 + lea rbx,QWORD PTR[rbx*1+r12] + cmovge r12,rsp + vaesenc xmm6,xmm6,xmm1 + cmovg rbx,rsp + vaesenc xmm7,xmm7,xmm1 + sub rbx,r12 + vaesenc xmm8,xmm8,xmm1 + vpxor xmm10,xmm15,XMMWORD PTR[16+r12] + mov QWORD PTR[((64+32))+rsp],rbx + vaesenc xmm9,xmm9,xmm1 + vmovups xmm1,XMMWORD PTR[((-8))+rsi] + lea r12,QWORD PTR[16+rbx*1+r12] + vaesenc xmm2,xmm2,xmm0 + cmp ecx,DWORD PTR[((32+20))+rsp] + mov rbx,QWORD PTR[((64+40))+rsp] + vaesenc xmm3,xmm3,xmm0 + prefetcht0 [31+r13] + vaesenc xmm4,xmm4,xmm0 + prefetcht0 [15+r11] + vaesenc xmm5,xmm5,xmm0 + lea rbx,QWORD PTR[r13*1+rbx] + cmovge r13,rsp + vaesenc xmm6,xmm6,xmm0 + cmovg rbx,rsp + vaesenc xmm7,xmm7,xmm0 + sub rbx,r13 + vaesenc xmm8,xmm8,xmm0 + vpxor xmm11,xmm15,XMMWORD PTR[16+r13] + mov QWORD PTR[((64+40))+rsp],rbx + vaesenc xmm9,xmm9,xmm0 + vmovups xmm0,XMMWORD PTR[8+rsi] + lea r13,QWORD PTR[16+rbx*1+r13] + vaesenc xmm2,xmm2,xmm1 + cmp ecx,DWORD PTR[((32+24))+rsp] + mov rbx,QWORD PTR[((64+48))+rsp] + vaesenc xmm3,xmm3,xmm1 + prefetcht0 [31+r14] + vaesenc xmm4,xmm4,xmm1 + prefetcht0 [15+r12] + vaesenc xmm5,xmm5,xmm1 + lea rbx,QWORD PTR[rbx*1+r14] + cmovge r14,rsp + vaesenc xmm6,xmm6,xmm1 + cmovg rbx,rsp + vaesenc xmm7,xmm7,xmm1 + sub rbx,r14 + vaesenc xmm8,xmm8,xmm1 + vpxor xmm12,xmm15,XMMWORD PTR[16+r14] + mov QWORD PTR[((64+48))+rsp],rbx + vaesenc xmm9,xmm9,xmm1 + vmovups xmm1,XMMWORD PTR[24+rsi] + lea r14,QWORD PTR[16+rbx*1+r14] + vaesenc xmm2,xmm2,xmm0 + cmp ecx,DWORD PTR[((32+28))+rsp] + mov rbx,QWORD PTR[((64+56))+rsp] + vaesenc xmm3,xmm3,xmm0 + prefetcht0 [31+r15] + vaesenc xmm4,xmm4,xmm0 + prefetcht0 [15+r13] + vaesenc xmm5,xmm5,xmm0 + lea rbx,QWORD PTR[rbx*1+r15] + cmovge r15,rsp + vaesenc xmm6,xmm6,xmm0 + cmovg rbx,rsp + vaesenc xmm7,xmm7,xmm0 + sub rbx,r15 + vaesenc xmm8,xmm8,xmm0 + vpxor xmm13,xmm15,XMMWORD PTR[16+r15] + mov QWORD PTR[((64+56))+rsp],rbx + vaesenc xmm9,xmm9,xmm0 + vmovups xmm0,XMMWORD PTR[40+rsi] + lea r15,QWORD PTR[16+rbx*1+r15] + vmovdqu xmm14,XMMWORD PTR[32+rsp] + prefetcht0 [15+r14] + prefetcht0 [15+r15] + cmp eax,11 + jb $L$enc8x_tail + + vaesenc xmm2,xmm2,xmm1 + vaesenc xmm3,xmm3,xmm1 + vaesenc xmm4,xmm4,xmm1 + vaesenc xmm5,xmm5,xmm1 + vaesenc xmm6,xmm6,xmm1 + vaesenc xmm7,xmm7,xmm1 + vaesenc xmm8,xmm8,xmm1 + vaesenc xmm9,xmm9,xmm1 + vmovups xmm1,XMMWORD PTR[((176-120))+rsi] + + vaesenc xmm2,xmm2,xmm0 + vaesenc xmm3,xmm3,xmm0 + vaesenc xmm4,xmm4,xmm0 + vaesenc xmm5,xmm5,xmm0 + vaesenc xmm6,xmm6,xmm0 + vaesenc xmm7,xmm7,xmm0 + vaesenc xmm8,xmm8,xmm0 + vaesenc xmm9,xmm9,xmm0 + vmovups xmm0,XMMWORD PTR[((192-120))+rsi] + je $L$enc8x_tail + + vaesenc xmm2,xmm2,xmm1 + vaesenc xmm3,xmm3,xmm1 + vaesenc xmm4,xmm4,xmm1 + vaesenc xmm5,xmm5,xmm1 + vaesenc xmm6,xmm6,xmm1 + vaesenc xmm7,xmm7,xmm1 + vaesenc xmm8,xmm8,xmm1 + vaesenc xmm9,xmm9,xmm1 + vmovups xmm1,XMMWORD PTR[((208-120))+rsi] + + vaesenc xmm2,xmm2,xmm0 + vaesenc xmm3,xmm3,xmm0 + vaesenc xmm4,xmm4,xmm0 + vaesenc xmm5,xmm5,xmm0 + vaesenc xmm6,xmm6,xmm0 + vaesenc xmm7,xmm7,xmm0 + vaesenc xmm8,xmm8,xmm0 + vaesenc xmm9,xmm9,xmm0 + vmovups xmm0,XMMWORD PTR[((224-120))+rsi] + +$L$enc8x_tail:: + vaesenc xmm2,xmm2,xmm1 + vpxor xmm15,xmm15,xmm15 + vaesenc xmm3,xmm3,xmm1 + vaesenc xmm4,xmm4,xmm1 + vpcmpgtd xmm15,xmm14,xmm15 + vaesenc xmm5,xmm5,xmm1 + vaesenc xmm6,xmm6,xmm1 + vpaddd xmm15,xmm15,xmm14 + vmovdqu xmm14,XMMWORD PTR[48+rsp] + vaesenc xmm7,xmm7,xmm1 + mov rbx,QWORD PTR[64+rsp] + vaesenc xmm8,xmm8,xmm1 + vaesenc xmm9,xmm9,xmm1 + vmovups xmm1,XMMWORD PTR[((16-120))+rsi] + + vaesenclast xmm2,xmm2,xmm0 + vmovdqa XMMWORD PTR[32+rsp],xmm15 + vpxor xmm15,xmm15,xmm15 + vaesenclast xmm3,xmm3,xmm0 + vaesenclast xmm4,xmm4,xmm0 + vpcmpgtd xmm15,xmm14,xmm15 + vaesenclast xmm5,xmm5,xmm0 + vaesenclast xmm6,xmm6,xmm0 + vpaddd xmm14,xmm14,xmm15 + vmovdqu xmm15,XMMWORD PTR[((-120))+rsi] + vaesenclast xmm7,xmm7,xmm0 + vaesenclast xmm8,xmm8,xmm0 + vmovdqa XMMWORD PTR[48+rsp],xmm14 + vaesenclast xmm9,xmm9,xmm0 + vmovups xmm0,XMMWORD PTR[((32-120))+rsi] + + vmovups XMMWORD PTR[(-16)+r8],xmm2 + sub r8,rbx + vpxor xmm2,xmm2,XMMWORD PTR[rbp] + vmovups XMMWORD PTR[(-16)+r9],xmm3 + sub r9,QWORD PTR[72+rsp] + vpxor xmm3,xmm3,XMMWORD PTR[16+rbp] + vmovups XMMWORD PTR[(-16)+r10],xmm4 + sub r10,QWORD PTR[80+rsp] + vpxor xmm4,xmm4,XMMWORD PTR[32+rbp] + vmovups XMMWORD PTR[(-16)+r11],xmm5 + sub r11,QWORD PTR[88+rsp] + vpxor xmm5,xmm5,XMMWORD PTR[48+rbp] + vmovups XMMWORD PTR[(-16)+r12],xmm6 + sub r12,QWORD PTR[96+rsp] + vpxor xmm6,xmm6,xmm10 + vmovups XMMWORD PTR[(-16)+r13],xmm7 + sub r13,QWORD PTR[104+rsp] + vpxor xmm7,xmm7,xmm11 + vmovups XMMWORD PTR[(-16)+r14],xmm8 + sub r14,QWORD PTR[112+rsp] + vpxor xmm8,xmm8,xmm12 + vmovups XMMWORD PTR[(-16)+r15],xmm9 + sub r15,QWORD PTR[120+rsp] + vpxor xmm9,xmm9,xmm13 + + dec edx + jnz $L$oop_enc8x + + mov rax,QWORD PTR[16+rsp] + + + + + +$L$enc8x_done:: + vzeroupper + movaps xmm6,XMMWORD PTR[((-216))+rax] + movaps xmm7,XMMWORD PTR[((-200))+rax] + movaps xmm8,XMMWORD PTR[((-184))+rax] + movaps xmm9,XMMWORD PTR[((-168))+rax] + movaps xmm10,XMMWORD PTR[((-152))+rax] + movaps xmm11,XMMWORD PTR[((-136))+rax] + movaps xmm12,XMMWORD PTR[((-120))+rax] + movaps xmm13,XMMWORD PTR[((-104))+rax] + movaps xmm14,XMMWORD PTR[((-88))+rax] + movaps xmm15,XMMWORD PTR[((-72))+rax] + mov r15,QWORD PTR[((-48))+rax] + mov r14,QWORD PTR[((-40))+rax] + mov r13,QWORD PTR[((-32))+rax] + mov r12,QWORD PTR[((-24))+rax] + mov rbp,QWORD PTR[((-16))+rax] + mov rbx,QWORD PTR[((-8))+rax] + lea rsp,QWORD PTR[rax] +$L$enc8x_epilogue:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_aesni_multi_cbc_encrypt_avx:: +aesni_multi_cbc_encrypt_avx ENDP + + +ALIGN 32 +aesni_multi_cbc_decrypt_avx PROC PRIVATE + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_aesni_multi_cbc_decrypt_avx:: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + + +_avx_cbc_dec_shortcut:: + mov rax,rsp + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + lea rsp,QWORD PTR[((-168))+rsp] + movaps XMMWORD PTR[rsp],xmm6 + movaps XMMWORD PTR[16+rsp],xmm7 + movaps XMMWORD PTR[32+rsp],xmm8 + movaps XMMWORD PTR[48+rsp],xmm9 + movaps XMMWORD PTR[64+rsp],xmm10 + movaps XMMWORD PTR[80+rsp],xmm11 + movaps XMMWORD PTR[(-120)+rax],xmm12 + movaps XMMWORD PTR[(-104)+rax],xmm13 + movaps XMMWORD PTR[(-88)+rax],xmm14 + movaps XMMWORD PTR[(-72)+rax],xmm15 + + + + + + + + + + sub rsp,256 + and rsp,-256 + sub rsp,192 + mov QWORD PTR[16+rsp],rax + +$L$dec8x_body:: + vzeroupper + vmovdqu xmm15,XMMWORD PTR[rsi] + lea rsi,QWORD PTR[120+rsi] + lea rdi,QWORD PTR[160+rdi] + shr edx,1 + +$L$dec8x_loop_grande:: + + xor edx,edx + mov ecx,DWORD PTR[((-144))+rdi] + mov r8,QWORD PTR[((-160))+rdi] + cmp ecx,edx + mov rbx,QWORD PTR[((-152))+rdi] + cmovg edx,ecx + test ecx,ecx + vmovdqu xmm2,XMMWORD PTR[((-136))+rdi] + mov DWORD PTR[32+rsp],ecx + cmovle r8,rsp + sub rbx,r8 + mov QWORD PTR[64+rsp],rbx + vmovdqu XMMWORD PTR[192+rsp],xmm2 + mov ecx,DWORD PTR[((-104))+rdi] + mov r9,QWORD PTR[((-120))+rdi] + cmp ecx,edx + mov rbp,QWORD PTR[((-112))+rdi] + cmovg edx,ecx + test ecx,ecx + vmovdqu xmm3,XMMWORD PTR[((-96))+rdi] + mov DWORD PTR[36+rsp],ecx + cmovle r9,rsp + sub rbp,r9 + mov QWORD PTR[72+rsp],rbp + vmovdqu XMMWORD PTR[208+rsp],xmm3 + mov ecx,DWORD PTR[((-64))+rdi] + mov r10,QWORD PTR[((-80))+rdi] + cmp ecx,edx + mov rbp,QWORD PTR[((-72))+rdi] + cmovg edx,ecx + test ecx,ecx + vmovdqu xmm4,XMMWORD PTR[((-56))+rdi] + mov DWORD PTR[40+rsp],ecx + cmovle r10,rsp + sub rbp,r10 + mov QWORD PTR[80+rsp],rbp + vmovdqu XMMWORD PTR[224+rsp],xmm4 + mov ecx,DWORD PTR[((-24))+rdi] + mov r11,QWORD PTR[((-40))+rdi] + cmp ecx,edx + mov rbp,QWORD PTR[((-32))+rdi] + cmovg edx,ecx + test ecx,ecx + vmovdqu xmm5,XMMWORD PTR[((-16))+rdi] + mov DWORD PTR[44+rsp],ecx + cmovle r11,rsp + sub rbp,r11 + mov QWORD PTR[88+rsp],rbp + vmovdqu XMMWORD PTR[240+rsp],xmm5 + mov ecx,DWORD PTR[16+rdi] + mov r12,QWORD PTR[rdi] + cmp ecx,edx + mov rbp,QWORD PTR[8+rdi] + cmovg edx,ecx + test ecx,ecx + vmovdqu xmm6,XMMWORD PTR[24+rdi] + mov DWORD PTR[48+rsp],ecx + cmovle r12,rsp + sub rbp,r12 + mov QWORD PTR[96+rsp],rbp + vmovdqu XMMWORD PTR[256+rsp],xmm6 + mov ecx,DWORD PTR[56+rdi] + mov r13,QWORD PTR[40+rdi] + cmp ecx,edx + mov rbp,QWORD PTR[48+rdi] + cmovg edx,ecx + test ecx,ecx + vmovdqu xmm7,XMMWORD PTR[64+rdi] + mov DWORD PTR[52+rsp],ecx + cmovle r13,rsp + sub rbp,r13 + mov QWORD PTR[104+rsp],rbp + vmovdqu XMMWORD PTR[272+rsp],xmm7 + mov ecx,DWORD PTR[96+rdi] + mov r14,QWORD PTR[80+rdi] + cmp ecx,edx + mov rbp,QWORD PTR[88+rdi] + cmovg edx,ecx + test ecx,ecx + vmovdqu xmm8,XMMWORD PTR[104+rdi] + mov DWORD PTR[56+rsp],ecx + cmovle r14,rsp + sub rbp,r14 + mov QWORD PTR[112+rsp],rbp + vmovdqu XMMWORD PTR[288+rsp],xmm8 + mov ecx,DWORD PTR[136+rdi] + mov r15,QWORD PTR[120+rdi] + cmp ecx,edx + mov rbp,QWORD PTR[128+rdi] + cmovg edx,ecx + test ecx,ecx + vmovdqu xmm9,XMMWORD PTR[144+rdi] + mov DWORD PTR[60+rsp],ecx + cmovle r15,rsp + sub rbp,r15 + mov QWORD PTR[120+rsp],rbp + vmovdqu XMMWORD PTR[304+rsp],xmm9 + test edx,edx + jz $L$dec8x_done + + vmovups xmm1,XMMWORD PTR[((16-120))+rsi] + vmovups xmm0,XMMWORD PTR[((32-120))+rsi] + mov eax,DWORD PTR[((240-120))+rsi] + lea rbp,QWORD PTR[((192+128))+rsp] + + vmovdqu xmm2,XMMWORD PTR[r8] + vmovdqu xmm3,XMMWORD PTR[r9] + vmovdqu xmm4,XMMWORD PTR[r10] + vmovdqu xmm5,XMMWORD PTR[r11] + vmovdqu xmm6,XMMWORD PTR[r12] + vmovdqu xmm7,XMMWORD PTR[r13] + vmovdqu xmm8,XMMWORD PTR[r14] + vmovdqu xmm9,XMMWORD PTR[r15] + vmovdqu XMMWORD PTR[rbp],xmm2 + vpxor xmm2,xmm2,xmm15 + vmovdqu XMMWORD PTR[16+rbp],xmm3 + vpxor xmm3,xmm3,xmm15 + vmovdqu XMMWORD PTR[32+rbp],xmm4 + vpxor xmm4,xmm4,xmm15 + vmovdqu XMMWORD PTR[48+rbp],xmm5 + vpxor xmm5,xmm5,xmm15 + vmovdqu XMMWORD PTR[64+rbp],xmm6 + vpxor xmm6,xmm6,xmm15 + vmovdqu XMMWORD PTR[80+rbp],xmm7 + vpxor xmm7,xmm7,xmm15 + vmovdqu XMMWORD PTR[96+rbp],xmm8 + vpxor xmm8,xmm8,xmm15 + vmovdqu XMMWORD PTR[112+rbp],xmm9 + vpxor xmm9,xmm9,xmm15 + xor rbp,080h + mov ecx,1 + jmp $L$oop_dec8x + +ALIGN 32 +$L$oop_dec8x:: + vaesdec xmm2,xmm2,xmm1 + cmp ecx,DWORD PTR[((32+0))+rsp] + vaesdec xmm3,xmm3,xmm1 + prefetcht0 [31+r8] + vaesdec xmm4,xmm4,xmm1 + vaesdec xmm5,xmm5,xmm1 + lea rbx,QWORD PTR[rbx*1+r8] + cmovge r8,rsp + vaesdec xmm6,xmm6,xmm1 + cmovg rbx,rsp + vaesdec xmm7,xmm7,xmm1 + sub rbx,r8 + vaesdec xmm8,xmm8,xmm1 + vmovdqu xmm10,XMMWORD PTR[16+r8] + mov QWORD PTR[((64+0))+rsp],rbx + vaesdec xmm9,xmm9,xmm1 + vmovups xmm1,XMMWORD PTR[((-72))+rsi] + lea r8,QWORD PTR[16+rbx*1+r8] + vmovdqu XMMWORD PTR[128+rsp],xmm10 + vaesdec xmm2,xmm2,xmm0 + cmp ecx,DWORD PTR[((32+4))+rsp] + mov rbx,QWORD PTR[((64+8))+rsp] + vaesdec xmm3,xmm3,xmm0 + prefetcht0 [31+r9] + vaesdec xmm4,xmm4,xmm0 + vaesdec xmm5,xmm5,xmm0 + lea rbx,QWORD PTR[rbx*1+r9] + cmovge r9,rsp + vaesdec xmm6,xmm6,xmm0 + cmovg rbx,rsp + vaesdec xmm7,xmm7,xmm0 + sub rbx,r9 + vaesdec xmm8,xmm8,xmm0 + vmovdqu xmm11,XMMWORD PTR[16+r9] + mov QWORD PTR[((64+8))+rsp],rbx + vaesdec xmm9,xmm9,xmm0 + vmovups xmm0,XMMWORD PTR[((-56))+rsi] + lea r9,QWORD PTR[16+rbx*1+r9] + vmovdqu XMMWORD PTR[144+rsp],xmm11 + vaesdec xmm2,xmm2,xmm1 + cmp ecx,DWORD PTR[((32+8))+rsp] + mov rbx,QWORD PTR[((64+16))+rsp] + vaesdec xmm3,xmm3,xmm1 + prefetcht0 [31+r10] + vaesdec xmm4,xmm4,xmm1 + prefetcht0 [15+r8] + vaesdec xmm5,xmm5,xmm1 + lea rbx,QWORD PTR[rbx*1+r10] + cmovge r10,rsp + vaesdec xmm6,xmm6,xmm1 + cmovg rbx,rsp + vaesdec xmm7,xmm7,xmm1 + sub rbx,r10 + vaesdec xmm8,xmm8,xmm1 + vmovdqu xmm12,XMMWORD PTR[16+r10] + mov QWORD PTR[((64+16))+rsp],rbx + vaesdec xmm9,xmm9,xmm1 + vmovups xmm1,XMMWORD PTR[((-40))+rsi] + lea r10,QWORD PTR[16+rbx*1+r10] + vmovdqu XMMWORD PTR[160+rsp],xmm12 + vaesdec xmm2,xmm2,xmm0 + cmp ecx,DWORD PTR[((32+12))+rsp] + mov rbx,QWORD PTR[((64+24))+rsp] + vaesdec xmm3,xmm3,xmm0 + prefetcht0 [31+r11] + vaesdec xmm4,xmm4,xmm0 + prefetcht0 [15+r9] + vaesdec xmm5,xmm5,xmm0 + lea rbx,QWORD PTR[rbx*1+r11] + cmovge r11,rsp + vaesdec xmm6,xmm6,xmm0 + cmovg rbx,rsp + vaesdec xmm7,xmm7,xmm0 + sub rbx,r11 + vaesdec xmm8,xmm8,xmm0 + vmovdqu xmm13,XMMWORD PTR[16+r11] + mov QWORD PTR[((64+24))+rsp],rbx + vaesdec xmm9,xmm9,xmm0 + vmovups xmm0,XMMWORD PTR[((-24))+rsi] + lea r11,QWORD PTR[16+rbx*1+r11] + vmovdqu XMMWORD PTR[176+rsp],xmm13 + vaesdec xmm2,xmm2,xmm1 + cmp ecx,DWORD PTR[((32+16))+rsp] + mov rbx,QWORD PTR[((64+32))+rsp] + vaesdec xmm3,xmm3,xmm1 + prefetcht0 [31+r12] + vaesdec xmm4,xmm4,xmm1 + prefetcht0 [15+r10] + vaesdec xmm5,xmm5,xmm1 + lea rbx,QWORD PTR[rbx*1+r12] + cmovge r12,rsp + vaesdec xmm6,xmm6,xmm1 + cmovg rbx,rsp + vaesdec xmm7,xmm7,xmm1 + sub rbx,r12 + vaesdec xmm8,xmm8,xmm1 + vmovdqu xmm10,XMMWORD PTR[16+r12] + mov QWORD PTR[((64+32))+rsp],rbx + vaesdec xmm9,xmm9,xmm1 + vmovups xmm1,XMMWORD PTR[((-8))+rsi] + lea r12,QWORD PTR[16+rbx*1+r12] + vaesdec xmm2,xmm2,xmm0 + cmp ecx,DWORD PTR[((32+20))+rsp] + mov rbx,QWORD PTR[((64+40))+rsp] + vaesdec xmm3,xmm3,xmm0 + prefetcht0 [31+r13] + vaesdec xmm4,xmm4,xmm0 + prefetcht0 [15+r11] + vaesdec xmm5,xmm5,xmm0 + lea rbx,QWORD PTR[r13*1+rbx] + cmovge r13,rsp + vaesdec xmm6,xmm6,xmm0 + cmovg rbx,rsp + vaesdec xmm7,xmm7,xmm0 + sub rbx,r13 + vaesdec xmm8,xmm8,xmm0 + vmovdqu xmm11,XMMWORD PTR[16+r13] + mov QWORD PTR[((64+40))+rsp],rbx + vaesdec xmm9,xmm9,xmm0 + vmovups xmm0,XMMWORD PTR[8+rsi] + lea r13,QWORD PTR[16+rbx*1+r13] + vaesdec xmm2,xmm2,xmm1 + cmp ecx,DWORD PTR[((32+24))+rsp] + mov rbx,QWORD PTR[((64+48))+rsp] + vaesdec xmm3,xmm3,xmm1 + prefetcht0 [31+r14] + vaesdec xmm4,xmm4,xmm1 + prefetcht0 [15+r12] + vaesdec xmm5,xmm5,xmm1 + lea rbx,QWORD PTR[rbx*1+r14] + cmovge r14,rsp + vaesdec xmm6,xmm6,xmm1 + cmovg rbx,rsp + vaesdec xmm7,xmm7,xmm1 + sub rbx,r14 + vaesdec xmm8,xmm8,xmm1 + vmovdqu xmm12,XMMWORD PTR[16+r14] + mov QWORD PTR[((64+48))+rsp],rbx + vaesdec xmm9,xmm9,xmm1 + vmovups xmm1,XMMWORD PTR[24+rsi] + lea r14,QWORD PTR[16+rbx*1+r14] + vaesdec xmm2,xmm2,xmm0 + cmp ecx,DWORD PTR[((32+28))+rsp] + mov rbx,QWORD PTR[((64+56))+rsp] + vaesdec xmm3,xmm3,xmm0 + prefetcht0 [31+r15] + vaesdec xmm4,xmm4,xmm0 + prefetcht0 [15+r13] + vaesdec xmm5,xmm5,xmm0 + lea rbx,QWORD PTR[rbx*1+r15] + cmovge r15,rsp + vaesdec xmm6,xmm6,xmm0 + cmovg rbx,rsp + vaesdec xmm7,xmm7,xmm0 + sub rbx,r15 + vaesdec xmm8,xmm8,xmm0 + vmovdqu xmm13,XMMWORD PTR[16+r15] + mov QWORD PTR[((64+56))+rsp],rbx + vaesdec xmm9,xmm9,xmm0 + vmovups xmm0,XMMWORD PTR[40+rsi] + lea r15,QWORD PTR[16+rbx*1+r15] + vmovdqu xmm14,XMMWORD PTR[32+rsp] + prefetcht0 [15+r14] + prefetcht0 [15+r15] + cmp eax,11 + jb $L$dec8x_tail + + vaesdec xmm2,xmm2,xmm1 + vaesdec xmm3,xmm3,xmm1 + vaesdec xmm4,xmm4,xmm1 + vaesdec xmm5,xmm5,xmm1 + vaesdec xmm6,xmm6,xmm1 + vaesdec xmm7,xmm7,xmm1 + vaesdec xmm8,xmm8,xmm1 + vaesdec xmm9,xmm9,xmm1 + vmovups xmm1,XMMWORD PTR[((176-120))+rsi] + + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vaesdec xmm5,xmm5,xmm0 + vaesdec xmm6,xmm6,xmm0 + vaesdec xmm7,xmm7,xmm0 + vaesdec xmm8,xmm8,xmm0 + vaesdec xmm9,xmm9,xmm0 + vmovups xmm0,XMMWORD PTR[((192-120))+rsi] + je $L$dec8x_tail + + vaesdec xmm2,xmm2,xmm1 + vaesdec xmm3,xmm3,xmm1 + vaesdec xmm4,xmm4,xmm1 + vaesdec xmm5,xmm5,xmm1 + vaesdec xmm6,xmm6,xmm1 + vaesdec xmm7,xmm7,xmm1 + vaesdec xmm8,xmm8,xmm1 + vaesdec xmm9,xmm9,xmm1 + vmovups xmm1,XMMWORD PTR[((208-120))+rsi] + + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vaesdec xmm5,xmm5,xmm0 + vaesdec xmm6,xmm6,xmm0 + vaesdec xmm7,xmm7,xmm0 + vaesdec xmm8,xmm8,xmm0 + vaesdec xmm9,xmm9,xmm0 + vmovups xmm0,XMMWORD PTR[((224-120))+rsi] + +$L$dec8x_tail:: + vaesdec xmm2,xmm2,xmm1 + vpxor xmm15,xmm15,xmm15 + vaesdec xmm3,xmm3,xmm1 + vaesdec xmm4,xmm4,xmm1 + vpcmpgtd xmm15,xmm14,xmm15 + vaesdec xmm5,xmm5,xmm1 + vaesdec xmm6,xmm6,xmm1 + vpaddd xmm15,xmm15,xmm14 + vmovdqu xmm14,XMMWORD PTR[48+rsp] + vaesdec xmm7,xmm7,xmm1 + mov rbx,QWORD PTR[64+rsp] + vaesdec xmm8,xmm8,xmm1 + vaesdec xmm9,xmm9,xmm1 + vmovups xmm1,XMMWORD PTR[((16-120))+rsi] + + vaesdeclast xmm2,xmm2,xmm0 + vmovdqa XMMWORD PTR[32+rsp],xmm15 + vpxor xmm15,xmm15,xmm15 + vaesdeclast xmm3,xmm3,xmm0 + vpxor xmm2,xmm2,XMMWORD PTR[rbp] + vaesdeclast xmm4,xmm4,xmm0 + vpxor xmm3,xmm3,XMMWORD PTR[16+rbp] + vpcmpgtd xmm15,xmm14,xmm15 + vaesdeclast xmm5,xmm5,xmm0 + vpxor xmm4,xmm4,XMMWORD PTR[32+rbp] + vaesdeclast xmm6,xmm6,xmm0 + vpxor xmm5,xmm5,XMMWORD PTR[48+rbp] + vpaddd xmm14,xmm14,xmm15 + vmovdqu xmm15,XMMWORD PTR[((-120))+rsi] + vaesdeclast xmm7,xmm7,xmm0 + vpxor xmm6,xmm6,XMMWORD PTR[64+rbp] + vaesdeclast xmm8,xmm8,xmm0 + vpxor xmm7,xmm7,XMMWORD PTR[80+rbp] + vmovdqa XMMWORD PTR[48+rsp],xmm14 + vaesdeclast xmm9,xmm9,xmm0 + vpxor xmm8,xmm8,XMMWORD PTR[96+rbp] + vmovups xmm0,XMMWORD PTR[((32-120))+rsi] + + vmovups XMMWORD PTR[(-16)+r8],xmm2 + sub r8,rbx + vmovdqu xmm2,XMMWORD PTR[((128+0))+rsp] + vpxor xmm9,xmm9,XMMWORD PTR[112+rbp] + vmovups XMMWORD PTR[(-16)+r9],xmm3 + sub r9,QWORD PTR[72+rsp] + vmovdqu XMMWORD PTR[rbp],xmm2 + vpxor xmm2,xmm2,xmm15 + vmovdqu xmm3,XMMWORD PTR[((128+16))+rsp] + vmovups XMMWORD PTR[(-16)+r10],xmm4 + sub r10,QWORD PTR[80+rsp] + vmovdqu XMMWORD PTR[16+rbp],xmm3 + vpxor xmm3,xmm3,xmm15 + vmovdqu xmm4,XMMWORD PTR[((128+32))+rsp] + vmovups XMMWORD PTR[(-16)+r11],xmm5 + sub r11,QWORD PTR[88+rsp] + vmovdqu XMMWORD PTR[32+rbp],xmm4 + vpxor xmm4,xmm4,xmm15 + vmovdqu xmm5,XMMWORD PTR[((128+48))+rsp] + vmovups XMMWORD PTR[(-16)+r12],xmm6 + sub r12,QWORD PTR[96+rsp] + vmovdqu XMMWORD PTR[48+rbp],xmm5 + vpxor xmm5,xmm5,xmm15 + vmovdqu XMMWORD PTR[64+rbp],xmm10 + vpxor xmm6,xmm15,xmm10 + vmovups XMMWORD PTR[(-16)+r13],xmm7 + sub r13,QWORD PTR[104+rsp] + vmovdqu XMMWORD PTR[80+rbp],xmm11 + vpxor xmm7,xmm15,xmm11 + vmovups XMMWORD PTR[(-16)+r14],xmm8 + sub r14,QWORD PTR[112+rsp] + vmovdqu XMMWORD PTR[96+rbp],xmm12 + vpxor xmm8,xmm15,xmm12 + vmovups XMMWORD PTR[(-16)+r15],xmm9 + sub r15,QWORD PTR[120+rsp] + vmovdqu XMMWORD PTR[112+rbp],xmm13 + vpxor xmm9,xmm15,xmm13 + + xor rbp,128 + dec edx + jnz $L$oop_dec8x + + mov rax,QWORD PTR[16+rsp] + + + + + +$L$dec8x_done:: + vzeroupper + movaps xmm6,XMMWORD PTR[((-216))+rax] + movaps xmm7,XMMWORD PTR[((-200))+rax] + movaps xmm8,XMMWORD PTR[((-184))+rax] + movaps xmm9,XMMWORD PTR[((-168))+rax] + movaps xmm10,XMMWORD PTR[((-152))+rax] + movaps xmm11,XMMWORD PTR[((-136))+rax] + movaps xmm12,XMMWORD PTR[((-120))+rax] + movaps xmm13,XMMWORD PTR[((-104))+rax] + movaps xmm14,XMMWORD PTR[((-88))+rax] + movaps xmm15,XMMWORD PTR[((-72))+rax] + mov r15,QWORD PTR[((-48))+rax] + mov r14,QWORD PTR[((-40))+rax] + mov r13,QWORD PTR[((-32))+rax] + mov r12,QWORD PTR[((-24))+rax] + mov rbp,QWORD PTR[((-16))+rax] + mov rbx,QWORD PTR[((-8))+rax] + lea rsp,QWORD PTR[rax] +$L$dec8x_epilogue:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_aesni_multi_cbc_decrypt_avx:: +aesni_multi_cbc_decrypt_avx ENDP +EXTERN __imp_RtlVirtualUnwind:NEAR + +ALIGN 16 +se_handler PROC PRIVATE + push rsi + push rdi + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + pushfq + sub rsp,64 + + mov rax,QWORD PTR[120+r8] + mov rbx,QWORD PTR[248+r8] + + mov rsi,QWORD PTR[8+r9] + mov r11,QWORD PTR[56+r9] + + mov r10d,DWORD PTR[r11] + lea r10,QWORD PTR[r10*1+rsi] + cmp rbx,r10 + jb $L$in_prologue + + mov rax,QWORD PTR[152+r8] + + mov r10d,DWORD PTR[4+r11] + lea r10,QWORD PTR[r10*1+rsi] + cmp rbx,r10 + jae $L$in_prologue + + mov rax,QWORD PTR[16+rax] + + mov rbx,QWORD PTR[((-8))+rax] + mov rbp,QWORD PTR[((-16))+rax] + mov r12,QWORD PTR[((-24))+rax] + mov r13,QWORD PTR[((-32))+rax] + mov r14,QWORD PTR[((-40))+rax] + mov r15,QWORD PTR[((-48))+rax] + mov QWORD PTR[144+r8],rbx + mov QWORD PTR[160+r8],rbp + mov QWORD PTR[216+r8],r12 + mov QWORD PTR[224+r8],r13 + mov QWORD PTR[232+r8],r14 + mov QWORD PTR[240+r8],r15 + + lea rsi,QWORD PTR[((-56-160))+rax] + lea rdi,QWORD PTR[512+r8] + mov ecx,20 + DD 0a548f3fch + +$L$in_prologue:: + mov rdi,QWORD PTR[8+rax] + mov rsi,QWORD PTR[16+rax] + mov QWORD PTR[152+r8],rax + mov QWORD PTR[168+r8],rsi + mov QWORD PTR[176+r8],rdi + + mov rdi,QWORD PTR[40+r9] + mov rsi,r8 + mov ecx,154 + DD 0a548f3fch + + mov rsi,r9 + xor rcx,rcx + mov rdx,QWORD PTR[8+rsi] + mov r8,QWORD PTR[rsi] + mov r9,QWORD PTR[16+rsi] + mov r10,QWORD PTR[40+rsi] + lea r11,QWORD PTR[56+rsi] + lea r12,QWORD PTR[24+rsi] + mov QWORD PTR[32+rsp],r10 + mov QWORD PTR[40+rsp],r11 + mov QWORD PTR[48+rsp],r12 + mov QWORD PTR[56+rsp],rcx + call QWORD PTR[__imp_RtlVirtualUnwind] + + mov eax,1 + add rsp,64 + popfq + pop r15 + pop r14 + pop r13 + pop r12 + pop rbp + pop rbx + pop rdi + pop rsi + DB 0F3h,0C3h ;repret +se_handler ENDP + +.text$ ENDS +.pdata SEGMENT READONLY ALIGN(4) +ALIGN 4 + DD imagerel $L$SEH_begin_aesni_multi_cbc_encrypt + DD imagerel $L$SEH_end_aesni_multi_cbc_encrypt + DD imagerel $L$SEH_info_aesni_multi_cbc_encrypt + DD imagerel $L$SEH_begin_aesni_multi_cbc_decrypt + DD imagerel $L$SEH_end_aesni_multi_cbc_decrypt + DD imagerel $L$SEH_info_aesni_multi_cbc_decrypt + DD imagerel $L$SEH_begin_aesni_multi_cbc_encrypt_avx + DD imagerel $L$SEH_end_aesni_multi_cbc_encrypt_avx + DD imagerel $L$SEH_info_aesni_multi_cbc_encrypt_avx + DD imagerel $L$SEH_begin_aesni_multi_cbc_decrypt_avx + DD imagerel $L$SEH_end_aesni_multi_cbc_decrypt_avx + DD imagerel $L$SEH_info_aesni_multi_cbc_decrypt_avx +.pdata ENDS +.xdata SEGMENT READONLY ALIGN(8) +ALIGN 8 +$L$SEH_info_aesni_multi_cbc_encrypt:: +DB 9,0,0,0 + DD imagerel se_handler + DD imagerel $L$enc4x_body,imagerel $L$enc4x_epilogue +$L$SEH_info_aesni_multi_cbc_decrypt:: +DB 9,0,0,0 + DD imagerel se_handler + DD imagerel $L$dec4x_body,imagerel $L$dec4x_epilogue +$L$SEH_info_aesni_multi_cbc_encrypt_avx:: +DB 9,0,0,0 + DD imagerel se_handler + DD imagerel $L$enc8x_body,imagerel $L$enc8x_epilogue +$L$SEH_info_aesni_multi_cbc_decrypt_avx:: +DB 9,0,0,0 + DD imagerel se_handler + DD imagerel $L$dec8x_body,imagerel $L$dec8x_epilogue + +.xdata ENDS +END diff --git a/deps/openssl/asm/x64-win32-masm/aes/aesni-sha1-x86_64.asm b/deps/openssl/asm/x64-win32-masm/aes/aesni-sha1-x86_64.asm index 3f205a16a03530..7403dc3ee6bac1 100644 --- a/deps/openssl/asm/x64-win32-masm/aes/aesni-sha1-x86_64.asm +++ b/deps/openssl/asm/x64-win32-masm/aes/aesni-sha1-x86_64.asm @@ -1,19 +1,26 @@ OPTION DOTNAME -.text$ SEGMENT ALIGN(64) 'CODE' +.text$ SEGMENT ALIGN(256) 'CODE' EXTERN OPENSSL_ia32cap_P:NEAR PUBLIC aesni_cbc_sha1_enc -ALIGN 16 +ALIGN 32 aesni_cbc_sha1_enc PROC PUBLIC mov r10d,DWORD PTR[((OPENSSL_ia32cap_P+0))] - mov r11d,DWORD PTR[((OPENSSL_ia32cap_P+4))] + mov r11,QWORD PTR[((OPENSSL_ia32cap_P+4))] + bt r11,61 + jc aesni_cbc_sha1_enc_shaext + and r11d,268435456 + and r10d,1073741824 + or r10d,r11d + cmp r10d,1342177280 + je aesni_cbc_sha1_enc_avx jmp aesni_cbc_sha1_enc_ssse3 DB 0F3h,0C3h ;repret aesni_cbc_sha1_enc ENDP -ALIGN 16 +ALIGN 32 aesni_cbc_sha1_enc_ssse3 PROC PRIVATE mov QWORD PTR[8+rsp],rdi ;WIN64 prologue mov QWORD PTR[16+rsp],rsi @@ -53,12 +60,12 @@ $L$prologue_ssse3:: mov r12,rdi mov r13,rsi mov r14,rdx - mov r15,rcx - movdqu xmm11,XMMWORD PTR[r8] + lea r15,QWORD PTR[112+rcx] + movdqu xmm2,XMMWORD PTR[r8] mov QWORD PTR[88+rsp],r8 shl r14,6 sub r13,r12 - mov r8d,DWORD PTR[240+r15] + mov r8d,DWORD PTR[((240-112))+r15] add r14,r10 lea r11,QWORD PTR[K_XX_XX] @@ -68,1188 +75,2500 @@ $L$prologue_ssse3:: mov edx,DWORD PTR[12+r9] mov esi,ebx mov ebp,DWORD PTR[16+r9] + mov edi,ecx + xor edi,edx + and esi,edi - movdqa xmm6,XMMWORD PTR[64+r11] - movdqa xmm9,XMMWORD PTR[r11] - movdqu xmm0,XMMWORD PTR[r10] - movdqu xmm1,XMMWORD PTR[16+r10] - movdqu xmm2,XMMWORD PTR[32+r10] - movdqu xmm3,XMMWORD PTR[48+r10] -DB 102,15,56,0,198 + movdqa xmm3,XMMWORD PTR[64+r11] + movdqa xmm13,XMMWORD PTR[r11] + movdqu xmm4,XMMWORD PTR[r10] + movdqu xmm5,XMMWORD PTR[16+r10] + movdqu xmm6,XMMWORD PTR[32+r10] + movdqu xmm7,XMMWORD PTR[48+r10] +DB 102,15,56,0,227 +DB 102,15,56,0,235 +DB 102,15,56,0,243 add r10,64 -DB 102,15,56,0,206 -DB 102,15,56,0,214 -DB 102,15,56,0,222 - paddd xmm0,xmm9 - paddd xmm1,xmm9 - paddd xmm2,xmm9 - movdqa XMMWORD PTR[rsp],xmm0 - psubd xmm0,xmm9 - movdqa XMMWORD PTR[16+rsp],xmm1 - psubd xmm1,xmm9 - movdqa XMMWORD PTR[32+rsp],xmm2 - psubd xmm2,xmm9 - movups xmm13,XMMWORD PTR[r15] - movups xmm14,XMMWORD PTR[16+r15] + paddd xmm4,xmm13 +DB 102,15,56,0,251 + paddd xmm5,xmm13 + paddd xmm6,xmm13 + movdqa XMMWORD PTR[rsp],xmm4 + psubd xmm4,xmm13 + movdqa XMMWORD PTR[16+rsp],xmm5 + psubd xmm5,xmm13 + movdqa XMMWORD PTR[32+rsp],xmm6 + psubd xmm6,xmm13 + movups xmm15,XMMWORD PTR[((-112))+r15] + movups xmm0,XMMWORD PTR[((16-112))+r15] jmp $L$oop_ssse3 -ALIGN 16 +ALIGN 32 $L$oop_ssse3:: - movdqa xmm4,xmm1 - add ebp,DWORD PTR[rsp] - movups xmm12,XMMWORD PTR[r12] - xorps xmm12,xmm13 - xorps xmm11,xmm12 -DB 102,69,15,56,220,222 - movups xmm15,XMMWORD PTR[32+r15] - xor ecx,edx - movdqa xmm8,xmm3 -DB 102,15,58,15,224,8 + ror ebx,2 + movups xmm14,XMMWORD PTR[r12] + xorps xmm14,xmm15 + xorps xmm2,xmm14 + movups xmm1,XMMWORD PTR[((-80))+r15] +DB 102,15,56,220,208 + pshufd xmm8,xmm4,238 + xor esi,edx + movdqa xmm12,xmm7 + paddd xmm13,xmm7 mov edi,eax + add ebp,DWORD PTR[rsp] + punpcklqdq xmm8,xmm5 + xor ebx,ecx rol eax,5 - paddd xmm9,xmm3 - and esi,ecx - xor ecx,edx - psrldq xmm8,4 - xor esi,edx - add ebp,eax - pxor xmm4,xmm0 - ror ebx,2 add ebp,esi - pxor xmm8,xmm2 - add edx,DWORD PTR[4+rsp] - xor ebx,ecx - mov esi,ebp - rol ebp,5 - pxor xmm4,xmm8 + psrldq xmm12,4 and edi,ebx xor ebx,ecx - movdqa XMMWORD PTR[48+rsp],xmm9 - xor edi,ecx -DB 102,69,15,56,220,223 - movups xmm14,XMMWORD PTR[48+r15] - add edx,ebp - movdqa xmm10,xmm4 - movdqa xmm8,xmm4 + pxor xmm8,xmm4 + add ebp,eax ror eax,7 - add edx,edi - add ecx,DWORD PTR[8+rsp] + pxor xmm12,xmm6 + xor edi,ecx + mov esi,ebp + add edx,DWORD PTR[4+rsp] + pxor xmm8,xmm12 xor eax,ebx - pslldq xmm10,12 - paddd xmm4,xmm4 - mov edi,edx - rol edx,5 + rol ebp,5 + movdqa XMMWORD PTR[48+rsp],xmm13 + add edx,edi + movups xmm0,XMMWORD PTR[((-64))+r15] +DB 102,15,56,220,209 and esi,eax + movdqa xmm3,xmm8 xor eax,ebx - psrld xmm8,31 - xor esi,ebx - add ecx,edx - movdqa xmm9,xmm10 + add edx,ebp ror ebp,7 - add ecx,esi - psrld xmm10,30 - por xmm4,xmm8 - add ebx,DWORD PTR[12+rsp] + movdqa xmm12,xmm8 + xor esi,ebx + pslldq xmm3,12 + paddd xmm8,xmm8 + mov edi,edx + add ecx,DWORD PTR[8+rsp] + psrld xmm12,31 xor ebp,eax - mov esi,ecx - rol ecx,5 -DB 102,69,15,56,220,222 - movups xmm15,XMMWORD PTR[64+r15] - pslld xmm9,2 - pxor xmm4,xmm10 + rol edx,5 + add ecx,esi + movdqa xmm13,xmm3 and edi,ebp xor ebp,eax - movdqa xmm10,XMMWORD PTR[r11] - xor edi,eax - add ebx,ecx - pxor xmm4,xmm9 + psrld xmm3,30 + add ecx,edx ror edx,7 - add ebx,edi - movdqa xmm5,xmm2 - add eax,DWORD PTR[16+rsp] + por xmm8,xmm12 + xor edi,eax + mov esi,ecx + add ebx,DWORD PTR[12+rsp] + movups xmm1,XMMWORD PTR[((-48))+r15] +DB 102,15,56,220,208 + pslld xmm13,2 + pxor xmm8,xmm3 xor edx,ebp - movdqa xmm9,xmm4 -DB 102,15,58,15,233,8 - mov edi,ebx - rol ebx,5 - paddd xmm10,xmm4 + movdqa xmm3,XMMWORD PTR[r11] + rol ecx,5 + add ebx,edi and esi,edx + pxor xmm8,xmm13 xor edx,ebp - psrldq xmm9,4 - xor esi,ebp - add eax,ebx - pxor xmm5,xmm1 + add ebx,ecx ror ecx,7 - add eax,esi - pxor xmm9,xmm3 - add ebp,DWORD PTR[20+rsp] -DB 102,69,15,56,220,223 - movups xmm14,XMMWORD PTR[80+r15] + pshufd xmm9,xmm5,238 + xor esi,ebp + movdqa xmm13,xmm8 + paddd xmm3,xmm8 + mov edi,ebx + add eax,DWORD PTR[16+rsp] + punpcklqdq xmm9,xmm6 xor ecx,edx - mov esi,eax - rol eax,5 - pxor xmm5,xmm9 + rol ebx,5 + add eax,esi + psrldq xmm13,4 and edi,ecx xor ecx,edx - movdqa XMMWORD PTR[rsp],xmm10 - xor edi,edx - add ebp,eax - movdqa xmm8,xmm5 - movdqa xmm9,xmm5 + pxor xmm9,xmm5 + add eax,ebx ror ebx,7 - add ebp,edi - add edx,DWORD PTR[24+rsp] + movups xmm0,XMMWORD PTR[((-32))+r15] +DB 102,15,56,220,209 + pxor xmm13,xmm7 + xor edi,edx + mov esi,eax + add ebp,DWORD PTR[20+rsp] + pxor xmm9,xmm13 xor ebx,ecx - pslldq xmm8,12 - paddd xmm5,xmm5 - mov edi,ebp - rol ebp,5 + rol eax,5 + movdqa XMMWORD PTR[rsp],xmm3 + add ebp,edi and esi,ebx + movdqa xmm12,xmm9 xor ebx,ecx - psrld xmm9,31 - xor esi,ecx -DB 102,69,15,56,220,222 - movups xmm15,XMMWORD PTR[96+r15] - add edx,ebp - movdqa xmm10,xmm8 + add ebp,eax ror eax,7 - add edx,esi - psrld xmm8,30 - por xmm5,xmm9 - add ecx,DWORD PTR[28+rsp] + movdqa xmm13,xmm9 + xor esi,ecx + pslldq xmm12,12 + paddd xmm9,xmm9 + mov edi,ebp + add edx,DWORD PTR[24+rsp] + psrld xmm13,31 xor eax,ebx - mov esi,edx - rol edx,5 - pslld xmm10,2 - pxor xmm5,xmm8 + rol ebp,5 + add edx,esi + movups xmm1,XMMWORD PTR[((-16))+r15] +DB 102,15,56,220,208 + movdqa xmm3,xmm12 and edi,eax xor eax,ebx - movdqa xmm8,XMMWORD PTR[16+r11] - xor edi,ebx - add ecx,edx - pxor xmm5,xmm10 + psrld xmm12,30 + add edx,ebp ror ebp,7 - add ecx,edi - movdqa xmm6,xmm3 - add ebx,DWORD PTR[32+rsp] + por xmm9,xmm13 + xor edi,ebx + mov esi,edx + add ecx,DWORD PTR[28+rsp] + pslld xmm3,2 + pxor xmm9,xmm12 xor ebp,eax - movdqa xmm10,xmm5 -DB 102,15,58,15,242,8 - mov edi,ecx - rol ecx,5 -DB 102,69,15,56,220,223 - movups xmm14,XMMWORD PTR[112+r15] - paddd xmm8,xmm5 + movdqa xmm12,XMMWORD PTR[16+r11] + rol edx,5 + add ecx,edi and esi,ebp + pxor xmm9,xmm3 xor ebp,eax - psrldq xmm10,4 - xor esi,eax - add ebx,ecx - pxor xmm6,xmm2 + add ecx,edx ror edx,7 - add ebx,esi - pxor xmm10,xmm4 - add eax,DWORD PTR[36+rsp] + pshufd xmm10,xmm6,238 + xor esi,eax + movdqa xmm3,xmm9 + paddd xmm12,xmm9 + mov edi,ecx + add ebx,DWORD PTR[32+rsp] + movups xmm0,XMMWORD PTR[r15] +DB 102,15,56,220,209 + punpcklqdq xmm10,xmm7 xor edx,ebp - mov esi,ebx - rol ebx,5 - pxor xmm6,xmm10 + rol ecx,5 + add ebx,esi + psrldq xmm3,4 and edi,edx xor edx,ebp - movdqa XMMWORD PTR[16+rsp],xmm8 - xor edi,ebp - add eax,ebx - movdqa xmm9,xmm6 - movdqa xmm10,xmm6 + pxor xmm10,xmm6 + add ebx,ecx ror ecx,7 - add eax,edi - add ebp,DWORD PTR[40+rsp] -DB 102,69,15,56,220,222 - movups xmm15,XMMWORD PTR[128+r15] + pxor xmm3,xmm8 + xor edi,ebp + mov esi,ebx + add eax,DWORD PTR[36+rsp] + pxor xmm10,xmm3 xor ecx,edx - pslldq xmm9,12 - paddd xmm6,xmm6 - mov edi,eax - rol eax,5 + rol ebx,5 + movdqa XMMWORD PTR[16+rsp],xmm12 + add eax,edi and esi,ecx + movdqa xmm13,xmm10 xor ecx,edx - psrld xmm10,31 - xor esi,edx - add ebp,eax - movdqa xmm8,xmm9 + add eax,ebx ror ebx,7 - add ebp,esi - psrld xmm9,30 - por xmm6,xmm10 - add edx,DWORD PTR[44+rsp] + movups xmm1,XMMWORD PTR[16+r15] +DB 102,15,56,220,208 + movdqa xmm3,xmm10 + xor esi,edx + pslldq xmm13,12 + paddd xmm10,xmm10 + mov edi,eax + add ebp,DWORD PTR[40+rsp] + psrld xmm3,31 xor ebx,ecx - mov esi,ebp - rol ebp,5 - pslld xmm8,2 - pxor xmm6,xmm9 + rol eax,5 + add ebp,esi + movdqa xmm12,xmm13 and edi,ebx xor ebx,ecx - movdqa xmm9,XMMWORD PTR[16+r11] - xor edi,ecx -DB 102,69,15,56,220,223 - movups xmm14,XMMWORD PTR[144+r15] - add edx,ebp - pxor xmm6,xmm8 + psrld xmm13,30 + add ebp,eax ror eax,7 - add edx,edi - movdqa xmm7,xmm4 - add ecx,DWORD PTR[48+rsp] + por xmm10,xmm3 + xor edi,ecx + mov esi,ebp + add edx,DWORD PTR[44+rsp] + pslld xmm12,2 + pxor xmm10,xmm13 xor eax,ebx - movdqa xmm8,xmm6 -DB 102,15,58,15,251,8 - mov edi,edx - rol edx,5 - paddd xmm9,xmm6 + movdqa xmm13,XMMWORD PTR[16+r11] + rol ebp,5 + add edx,edi + movups xmm0,XMMWORD PTR[32+r15] +DB 102,15,56,220,209 and esi,eax + pxor xmm10,xmm12 xor eax,ebx - psrldq xmm8,4 - xor esi,ebx - add ecx,edx - pxor xmm7,xmm3 + add edx,ebp ror ebp,7 - add ecx,esi - pxor xmm8,xmm5 - add ebx,DWORD PTR[52+rsp] + pshufd xmm11,xmm7,238 + xor esi,ebx + movdqa xmm12,xmm10 + paddd xmm13,xmm10 + mov edi,edx + add ecx,DWORD PTR[48+rsp] + punpcklqdq xmm11,xmm8 xor ebp,eax - mov esi,ecx - rol ecx,5 -DB 102,69,15,56,220,222 - movups xmm15,XMMWORD PTR[160+r15] - pxor xmm7,xmm8 + rol edx,5 + add ecx,esi + psrldq xmm12,4 and edi,ebp xor ebp,eax - movdqa XMMWORD PTR[32+rsp],xmm9 - xor edi,eax - add ebx,ecx - movdqa xmm10,xmm7 - movdqa xmm8,xmm7 + pxor xmm11,xmm7 + add ecx,edx ror edx,7 - add ebx,edi - add eax,DWORD PTR[56+rsp] + pxor xmm12,xmm9 + xor edi,eax + mov esi,ecx + add ebx,DWORD PTR[52+rsp] + movups xmm1,XMMWORD PTR[48+r15] +DB 102,15,56,220,208 + pxor xmm11,xmm12 xor edx,ebp - pslldq xmm10,12 - paddd xmm7,xmm7 - mov edi,ebx - rol ebx,5 + rol ecx,5 + movdqa XMMWORD PTR[32+rsp],xmm13 + add ebx,edi and esi,edx + movdqa xmm3,xmm11 xor edx,ebp - psrld xmm8,31 - xor esi,ebp - add eax,ebx - movdqa xmm9,xmm10 + add ebx,ecx ror ecx,7 + movdqa xmm12,xmm11 + xor esi,ebp + pslldq xmm3,12 + paddd xmm11,xmm11 + mov edi,ebx + add eax,DWORD PTR[56+rsp] + psrld xmm12,31 + xor ecx,edx + rol ebx,5 add eax,esi - psrld xmm10,30 - por xmm7,xmm8 - add ebp,DWORD PTR[60+rsp] + movdqa xmm13,xmm3 + and edi,ecx + xor ecx,edx + psrld xmm3,30 + add eax,ebx + ror ebx,7 cmp r8d,11 jb $L$aesenclast1 - movups xmm14,XMMWORD PTR[176+r15] -DB 102,69,15,56,220,223 - movups xmm15,XMMWORD PTR[192+r15] -DB 102,69,15,56,220,222 + movups xmm0,XMMWORD PTR[64+r15] +DB 102,15,56,220,209 + movups xmm1,XMMWORD PTR[80+r15] +DB 102,15,56,220,208 je $L$aesenclast1 - movups xmm14,XMMWORD PTR[208+r15] -DB 102,69,15,56,220,223 - movups xmm15,XMMWORD PTR[224+r15] -DB 102,69,15,56,220,222 + movups xmm0,XMMWORD PTR[96+r15] +DB 102,15,56,220,209 + movups xmm1,XMMWORD PTR[112+r15] +DB 102,15,56,220,208 $L$aesenclast1:: -DB 102,69,15,56,221,223 - movups xmm14,XMMWORD PTR[16+r15] - xor ecx,edx +DB 102,15,56,221,209 + movups xmm0,XMMWORD PTR[((16-112))+r15] + por xmm11,xmm12 + xor edi,edx mov esi,eax + add ebp,DWORD PTR[60+rsp] + pslld xmm13,2 + pxor xmm11,xmm3 + xor ebx,ecx + movdqa xmm3,XMMWORD PTR[16+r11] rol eax,5 - pslld xmm9,2 - pxor xmm7,xmm10 - and edi,ecx - xor ecx,edx - movdqa xmm10,XMMWORD PTR[16+r11] - xor edi,edx - add ebp,eax - pxor xmm7,xmm9 - ror ebx,7 add ebp,edi - movdqa xmm9,xmm7 - add edx,DWORD PTR[rsp] - pxor xmm0,xmm4 -DB 102,68,15,58,15,206,8 - xor ebx,ecx - mov edi,ebp - rol ebp,5 - pxor xmm0,xmm1 and esi,ebx + pxor xmm11,xmm13 + pshufd xmm13,xmm10,238 xor ebx,ecx - movdqa xmm8,xmm10 - paddd xmm10,xmm7 - xor esi,ecx - movups xmm12,XMMWORD PTR[16+r12] - xorps xmm12,xmm13 - movups XMMWORD PTR[r12*1+r13],xmm11 - xorps xmm11,xmm12 -DB 102,69,15,56,220,222 - movups xmm15,XMMWORD PTR[32+r15] - add edx,ebp - pxor xmm0,xmm9 + add ebp,eax ror eax,7 - add edx,esi - add ecx,DWORD PTR[4+rsp] + pxor xmm4,xmm8 + xor esi,ecx + mov edi,ebp + add edx,DWORD PTR[rsp] + punpcklqdq xmm13,xmm11 xor eax,ebx - movdqa xmm9,xmm0 - movdqa XMMWORD PTR[48+rsp],xmm10 - mov esi,edx - rol edx,5 + rol ebp,5 + pxor xmm4,xmm5 + add edx,esi + movups xmm14,XMMWORD PTR[16+r12] + xorps xmm14,xmm15 + movups XMMWORD PTR[r13*1+r12],xmm2 + xorps xmm2,xmm14 + movups xmm1,XMMWORD PTR[((-80))+r15] +DB 102,15,56,220,208 and edi,eax + movdqa xmm12,xmm3 xor eax,ebx - pslld xmm0,2 - xor edi,ebx - add ecx,edx - psrld xmm9,30 + paddd xmm3,xmm11 + add edx,ebp + pxor xmm4,xmm13 ror ebp,7 - add ecx,edi - add ebx,DWORD PTR[8+rsp] + xor edi,ebx + mov esi,edx + add ecx,DWORD PTR[4+rsp] + movdqa xmm13,xmm4 xor ebp,eax - mov edi,ecx - rol ecx,5 -DB 102,69,15,56,220,223 - movups xmm14,XMMWORD PTR[48+r15] - por xmm0,xmm9 + rol edx,5 + movdqa XMMWORD PTR[48+rsp],xmm3 + add ecx,edi and esi,ebp xor ebp,eax - movdqa xmm10,xmm0 - xor esi,eax - add ebx,ecx + pslld xmm4,2 + add ecx,edx ror edx,7 + psrld xmm13,30 + xor esi,eax + mov edi,ecx + add ebx,DWORD PTR[8+rsp] + movups xmm0,XMMWORD PTR[((-64))+r15] +DB 102,15,56,220,209 + por xmm4,xmm13 + xor edx,ebp + rol ecx,5 + pshufd xmm3,xmm11,238 add ebx,esi - add eax,DWORD PTR[12+rsp] + and edi,edx xor edx,ebp + add ebx,ecx + add eax,DWORD PTR[12+rsp] + xor edi,ebp mov esi,ebx rol ebx,5 - and edi,edx - xor edx,ebp - xor edi,ebp - add eax,ebx - ror ecx,7 add eax,edi - add ebp,DWORD PTR[16+rsp] -DB 102,69,15,56,220,222 - movups xmm15,XMMWORD PTR[64+r15] - pxor xmm1,xmm5 -DB 102,68,15,58,15,215,8 xor esi,edx + ror ecx,7 + add eax,ebx + pxor xmm5,xmm9 + add ebp,DWORD PTR[16+rsp] + movups xmm1,XMMWORD PTR[((-48))+r15] +DB 102,15,56,220,208 + xor esi,ecx + punpcklqdq xmm3,xmm4 mov edi,eax rol eax,5 - pxor xmm1,xmm2 - xor esi,ecx - add ebp,eax - movdqa xmm9,xmm8 - paddd xmm8,xmm0 - ror ebx,7 + pxor xmm5,xmm6 add ebp,esi - pxor xmm1,xmm10 - add edx,DWORD PTR[20+rsp] xor edi,ecx + movdqa xmm13,xmm12 + ror ebx,7 + paddd xmm12,xmm4 + add ebp,eax + pxor xmm5,xmm3 + add edx,DWORD PTR[20+rsp] + xor edi,ebx mov esi,ebp rol ebp,5 - movdqa xmm10,xmm1 - movdqa XMMWORD PTR[rsp],xmm8 - xor edi,ebx - add edx,ebp - ror eax,7 + movdqa xmm3,xmm5 add edx,edi - pslld xmm1,2 - add ecx,DWORD PTR[24+rsp] xor esi,ebx - psrld xmm10,30 + movdqa XMMWORD PTR[rsp],xmm12 + ror eax,7 + add edx,ebp + add ecx,DWORD PTR[24+rsp] + pslld xmm5,2 + xor esi,eax mov edi,edx + psrld xmm3,30 rol edx,5 - xor esi,eax -DB 102,69,15,56,220,223 - movups xmm14,XMMWORD PTR[80+r15] - add ecx,edx - ror ebp,7 add ecx,esi - por xmm1,xmm10 - add ebx,DWORD PTR[28+rsp] + movups xmm0,XMMWORD PTR[((-32))+r15] +DB 102,15,56,220,209 xor edi,eax - movdqa xmm8,xmm1 + ror ebp,7 + por xmm5,xmm3 + add ecx,edx + add ebx,DWORD PTR[28+rsp] + pshufd xmm12,xmm4,238 + xor edi,ebp mov esi,ecx rol ecx,5 - xor edi,ebp - add ebx,ecx - ror edx,7 add ebx,edi - add eax,DWORD PTR[32+rsp] - pxor xmm2,xmm6 -DB 102,68,15,58,15,192,8 xor esi,ebp + ror edx,7 + add ebx,ecx + pxor xmm6,xmm10 + add eax,DWORD PTR[32+rsp] + xor esi,edx + punpcklqdq xmm12,xmm5 mov edi,ebx rol ebx,5 - pxor xmm2,xmm3 - xor esi,edx - add eax,ebx - movdqa xmm10,XMMWORD PTR[32+r11] - paddd xmm9,xmm1 - ror ecx,7 + pxor xmm6,xmm7 add eax,esi - pxor xmm2,xmm8 - add ebp,DWORD PTR[36+rsp] -DB 102,69,15,56,220,222 - movups xmm15,XMMWORD PTR[96+r15] xor edi,edx + movdqa xmm3,XMMWORD PTR[32+r11] + ror ecx,7 + paddd xmm13,xmm5 + add eax,ebx + pxor xmm6,xmm12 + add ebp,DWORD PTR[36+rsp] + movups xmm1,XMMWORD PTR[((-16))+r15] +DB 102,15,56,220,208 + xor edi,ecx mov esi,eax rol eax,5 - movdqa xmm8,xmm2 - movdqa XMMWORD PTR[16+rsp],xmm9 - xor edi,ecx - add ebp,eax - ror ebx,7 + movdqa xmm12,xmm6 add ebp,edi - pslld xmm2,2 - add edx,DWORD PTR[40+rsp] xor esi,ecx - psrld xmm8,30 + movdqa XMMWORD PTR[16+rsp],xmm13 + ror ebx,7 + add ebp,eax + add edx,DWORD PTR[40+rsp] + pslld xmm6,2 + xor esi,ebx mov edi,ebp + psrld xmm12,30 rol ebp,5 - xor esi,ebx - add edx,ebp - ror eax,7 add edx,esi - por xmm2,xmm8 - add ecx,DWORD PTR[44+rsp] xor edi,ebx - movdqa xmm9,xmm2 + ror eax,7 + por xmm6,xmm12 + add edx,ebp + add ecx,DWORD PTR[44+rsp] + pshufd xmm13,xmm5,238 + xor edi,eax mov esi,edx rol edx,5 - xor edi,eax -DB 102,69,15,56,220,223 - movups xmm14,XMMWORD PTR[112+r15] - add ecx,edx - ror ebp,7 add ecx,edi - add ebx,DWORD PTR[48+rsp] - pxor xmm3,xmm7 -DB 102,68,15,58,15,201,8 + movups xmm0,XMMWORD PTR[r15] +DB 102,15,56,220,209 xor esi,eax + ror ebp,7 + add ecx,edx + pxor xmm7,xmm11 + add ebx,DWORD PTR[48+rsp] + xor esi,ebp + punpcklqdq xmm13,xmm6 mov edi,ecx rol ecx,5 - pxor xmm3,xmm4 - xor esi,ebp - add ebx,ecx - movdqa xmm8,xmm10 - paddd xmm10,xmm2 - ror edx,7 + pxor xmm7,xmm8 add ebx,esi - pxor xmm3,xmm9 - add eax,DWORD PTR[52+rsp] xor edi,ebp + movdqa xmm12,xmm3 + ror edx,7 + paddd xmm3,xmm6 + add ebx,ecx + pxor xmm7,xmm13 + add eax,DWORD PTR[52+rsp] + xor edi,edx mov esi,ebx rol ebx,5 - movdqa xmm9,xmm3 - movdqa XMMWORD PTR[32+rsp],xmm10 - xor edi,edx - add eax,ebx - ror ecx,7 + movdqa xmm13,xmm7 add eax,edi - pslld xmm3,2 - add ebp,DWORD PTR[56+rsp] -DB 102,69,15,56,220,222 - movups xmm15,XMMWORD PTR[128+r15] xor esi,edx - psrld xmm9,30 + movdqa XMMWORD PTR[32+rsp],xmm3 + ror ecx,7 + add eax,ebx + add ebp,DWORD PTR[56+rsp] + movups xmm1,XMMWORD PTR[16+r15] +DB 102,15,56,220,208 + pslld xmm7,2 + xor esi,ecx mov edi,eax + psrld xmm13,30 rol eax,5 - xor esi,ecx - add ebp,eax - ror ebx,7 add ebp,esi - por xmm3,xmm9 - add edx,DWORD PTR[60+rsp] xor edi,ecx - movdqa xmm10,xmm3 + ror ebx,7 + por xmm7,xmm13 + add ebp,eax + add edx,DWORD PTR[60+rsp] + pshufd xmm3,xmm6,238 + xor edi,ebx mov esi,ebp rol ebp,5 - xor edi,ebx - add edx,ebp - ror eax,7 add edx,edi - add ecx,DWORD PTR[rsp] - pxor xmm4,xmm0 -DB 102,68,15,58,15,210,8 xor esi,ebx + ror eax,7 + add edx,ebp + pxor xmm8,xmm4 + add ecx,DWORD PTR[rsp] + xor esi,eax + punpcklqdq xmm3,xmm7 mov edi,edx rol edx,5 - pxor xmm4,xmm5 - xor esi,eax -DB 102,69,15,56,220,223 - movups xmm14,XMMWORD PTR[144+r15] - add ecx,edx - movdqa xmm9,xmm8 - paddd xmm8,xmm3 - ror ebp,7 + pxor xmm8,xmm9 add ecx,esi - pxor xmm4,xmm10 - add ebx,DWORD PTR[4+rsp] + movups xmm0,XMMWORD PTR[32+r15] +DB 102,15,56,220,209 xor edi,eax + movdqa xmm13,xmm12 + ror ebp,7 + paddd xmm12,xmm7 + add ecx,edx + pxor xmm8,xmm3 + add ebx,DWORD PTR[4+rsp] + xor edi,ebp mov esi,ecx rol ecx,5 - movdqa xmm10,xmm4 - movdqa XMMWORD PTR[48+rsp],xmm8 - xor edi,ebp - add ebx,ecx - ror edx,7 + movdqa xmm3,xmm8 add ebx,edi - pslld xmm4,2 - add eax,DWORD PTR[8+rsp] xor esi,ebp - psrld xmm10,30 + movdqa XMMWORD PTR[48+rsp],xmm12 + ror edx,7 + add ebx,ecx + add eax,DWORD PTR[8+rsp] + pslld xmm8,2 + xor esi,edx mov edi,ebx + psrld xmm3,30 rol ebx,5 - xor esi,edx - add eax,ebx - ror ecx,7 add eax,esi - por xmm4,xmm10 - add ebp,DWORD PTR[12+rsp] -DB 102,69,15,56,220,222 - movups xmm15,XMMWORD PTR[160+r15] xor edi,edx - movdqa xmm8,xmm4 + ror ecx,7 + por xmm8,xmm3 + add eax,ebx + add ebp,DWORD PTR[12+rsp] + movups xmm1,XMMWORD PTR[48+r15] +DB 102,15,56,220,208 + pshufd xmm12,xmm7,238 + xor edi,ecx mov esi,eax rol eax,5 - xor edi,ecx - add ebp,eax - ror ebx,7 add ebp,edi - add edx,DWORD PTR[16+rsp] - pxor xmm5,xmm1 -DB 102,68,15,58,15,195,8 xor esi,ecx + ror ebx,7 + add ebp,eax + pxor xmm9,xmm5 + add edx,DWORD PTR[16+rsp] + xor esi,ebx + punpcklqdq xmm12,xmm8 mov edi,ebp rol ebp,5 - pxor xmm5,xmm6 - xor esi,ebx - add edx,ebp - movdqa xmm10,xmm9 - paddd xmm9,xmm4 - ror eax,7 + pxor xmm9,xmm10 add edx,esi - pxor xmm5,xmm8 - add ecx,DWORD PTR[20+rsp] xor edi,ebx + movdqa xmm3,xmm13 + ror eax,7 + paddd xmm13,xmm8 + add edx,ebp + pxor xmm9,xmm12 + add ecx,DWORD PTR[20+rsp] + xor edi,eax mov esi,edx rol edx,5 - movdqa xmm8,xmm5 - movdqa XMMWORD PTR[rsp],xmm9 - xor edi,eax + movdqa xmm12,xmm9 + add ecx,edi cmp r8d,11 jb $L$aesenclast2 - movups xmm14,XMMWORD PTR[176+r15] -DB 102,69,15,56,220,223 - movups xmm15,XMMWORD PTR[192+r15] -DB 102,69,15,56,220,222 + movups xmm0,XMMWORD PTR[64+r15] +DB 102,15,56,220,209 + movups xmm1,XMMWORD PTR[80+r15] +DB 102,15,56,220,208 je $L$aesenclast2 - movups xmm14,XMMWORD PTR[208+r15] -DB 102,69,15,56,220,223 - movups xmm15,XMMWORD PTR[224+r15] -DB 102,69,15,56,220,222 + movups xmm0,XMMWORD PTR[96+r15] +DB 102,15,56,220,209 + movups xmm1,XMMWORD PTR[112+r15] +DB 102,15,56,220,208 $L$aesenclast2:: -DB 102,69,15,56,221,223 - movups xmm14,XMMWORD PTR[16+r15] - add ecx,edx +DB 102,15,56,221,209 + movups xmm0,XMMWORD PTR[((16-112))+r15] + xor esi,eax + movdqa XMMWORD PTR[rsp],xmm13 ror ebp,7 - add ecx,edi - pslld xmm5,2 + add ecx,edx add ebx,DWORD PTR[24+rsp] - xor esi,eax - psrld xmm8,30 + pslld xmm9,2 + xor esi,ebp mov edi,ecx + psrld xmm12,30 rol ecx,5 - xor esi,ebp - add ebx,ecx - ror edx,7 add ebx,esi - por xmm5,xmm8 - add eax,DWORD PTR[28+rsp] xor edi,ebp - movdqa xmm9,xmm5 + ror edx,7 + por xmm9,xmm12 + add ebx,ecx + add eax,DWORD PTR[28+rsp] + pshufd xmm13,xmm8,238 + ror ecx,7 mov esi,ebx - rol ebx,5 xor edi,edx - add eax,ebx - ror ecx,7 + rol ebx,5 add eax,edi - mov edi,ecx - movups xmm12,XMMWORD PTR[32+r12] - xorps xmm12,xmm13 - movups XMMWORD PTR[16+r12*1+r13],xmm11 - xorps xmm11,xmm12 -DB 102,69,15,56,220,222 - movups xmm15,XMMWORD PTR[32+r15] - pxor xmm6,xmm2 -DB 102,68,15,58,15,204,8 + xor esi,ecx xor ecx,edx + add eax,ebx + pxor xmm10,xmm6 add ebp,DWORD PTR[32+rsp] - and edi,edx - pxor xmm6,xmm7 + movups xmm14,XMMWORD PTR[32+r12] + xorps xmm14,xmm15 + movups XMMWORD PTR[16+r12*1+r13],xmm2 + xorps xmm2,xmm14 + movups xmm1,XMMWORD PTR[((-80))+r15] +DB 102,15,56,220,208 and esi,ecx + xor ecx,edx ror ebx,7 - movdqa xmm8,xmm10 - paddd xmm10,xmm5 - add ebp,edi + punpcklqdq xmm13,xmm9 mov edi,eax - pxor xmm6,xmm9 + xor esi,ecx + pxor xmm10,xmm11 rol eax,5 add ebp,esi - xor ecx,edx - add ebp,eax - movdqa xmm9,xmm6 - movdqa XMMWORD PTR[16+rsp],xmm10 - mov esi,ebx + movdqa xmm12,xmm3 + xor edi,ebx + paddd xmm3,xmm9 xor ebx,ecx + pxor xmm10,xmm13 + add ebp,eax add edx,DWORD PTR[36+rsp] - and esi,ecx - pslld xmm6,2 and edi,ebx + xor ebx,ecx ror eax,7 - psrld xmm9,30 - add edx,esi + movdqa xmm13,xmm10 mov esi,ebp + xor edi,ebx + movdqa XMMWORD PTR[16+rsp],xmm3 rol ebp,5 -DB 102,69,15,56,220,223 - movups xmm14,XMMWORD PTR[48+r15] add edx,edi - xor ebx,ecx - add edx,ebp - por xmm6,xmm9 - mov edi,eax + movups xmm0,XMMWORD PTR[((-64))+r15] +DB 102,15,56,220,209 + xor esi,eax + pslld xmm10,2 xor eax,ebx - movdqa xmm10,xmm6 + add edx,ebp + psrld xmm13,30 add ecx,DWORD PTR[40+rsp] - and edi,ebx and esi,eax + xor eax,ebx + por xmm10,xmm13 ror ebp,7 - add ecx,edi mov edi,edx + xor esi,eax rol edx,5 + pshufd xmm3,xmm9,238 add ecx,esi - xor eax,ebx - add ecx,edx - mov esi,ebp + xor edi,ebp xor ebp,eax + add ecx,edx add ebx,DWORD PTR[44+rsp] - and esi,eax and edi,ebp -DB 102,69,15,56,220,222 - movups xmm15,XMMWORD PTR[64+r15] + xor ebp,eax ror edx,7 - add ebx,esi + movups xmm1,XMMWORD PTR[((-48))+r15] +DB 102,15,56,220,208 mov esi,ecx + xor edi,ebp rol ecx,5 add ebx,edi - xor ebp,eax - add ebx,ecx - mov edi,edx - pxor xmm7,xmm3 -DB 102,68,15,58,15,213,8 + xor esi,edx xor edx,ebp + add ebx,ecx + pxor xmm11,xmm7 add eax,DWORD PTR[48+rsp] - and edi,ebp - pxor xmm7,xmm0 and esi,edx + xor edx,ebp ror ecx,7 - movdqa xmm9,XMMWORD PTR[48+r11] - paddd xmm8,xmm6 - add eax,edi + punpcklqdq xmm3,xmm10 mov edi,ebx - pxor xmm7,xmm10 + xor esi,edx + pxor xmm11,xmm4 rol ebx,5 add eax,esi - xor edx,ebp - add eax,ebx - movdqa xmm10,xmm7 - movdqa XMMWORD PTR[32+rsp],xmm8 - mov esi,ecx -DB 102,69,15,56,220,223 - movups xmm14,XMMWORD PTR[80+r15] + movdqa xmm13,XMMWORD PTR[48+r11] + xor edi,ecx + paddd xmm12,xmm10 xor ecx,edx + pxor xmm11,xmm3 + add eax,ebx add ebp,DWORD PTR[52+rsp] - and esi,edx - pslld xmm7,2 + movups xmm0,XMMWORD PTR[((-32))+r15] +DB 102,15,56,220,209 and edi,ecx + xor ecx,edx ror ebx,7 - psrld xmm10,30 - add ebp,esi + movdqa xmm3,xmm11 mov esi,eax + xor edi,ecx + movdqa XMMWORD PTR[32+rsp],xmm12 rol eax,5 add ebp,edi - xor ecx,edx - add ebp,eax - por xmm7,xmm10 - mov edi,ebx + xor esi,ebx + pslld xmm11,2 xor ebx,ecx - movdqa xmm8,xmm7 + add ebp,eax + psrld xmm3,30 add edx,DWORD PTR[56+rsp] - and edi,ecx and esi,ebx + xor ebx,ecx + por xmm11,xmm3 ror eax,7 - add edx,edi mov edi,ebp + xor esi,ebx rol ebp,5 -DB 102,69,15,56,220,222 - movups xmm15,XMMWORD PTR[96+r15] + pshufd xmm12,xmm10,238 add edx,esi - xor ebx,ecx - add edx,ebp - mov esi,eax + movups xmm1,XMMWORD PTR[((-16))+r15] +DB 102,15,56,220,208 + xor edi,eax xor eax,ebx + add edx,ebp add ecx,DWORD PTR[60+rsp] - and esi,ebx and edi,eax + xor eax,ebx ror ebp,7 - add ecx,esi mov esi,edx + xor edi,eax rol edx,5 add ecx,edi - xor eax,ebx - add ecx,edx - mov edi,ebp - pxor xmm0,xmm4 -DB 102,68,15,58,15,198,8 + xor esi,ebp xor ebp,eax + add ecx,edx + pxor xmm4,xmm8 add ebx,DWORD PTR[rsp] - and edi,eax - pxor xmm0,xmm1 and esi,ebp -DB 102,69,15,56,220,223 - movups xmm14,XMMWORD PTR[112+r15] + xor ebp,eax ror edx,7 - movdqa xmm10,xmm9 - paddd xmm9,xmm7 - add ebx,edi + movups xmm0,XMMWORD PTR[r15] +DB 102,15,56,220,209 + punpcklqdq xmm12,xmm11 mov edi,ecx - pxor xmm0,xmm8 + xor esi,ebp + pxor xmm4,xmm5 rol ecx,5 add ebx,esi - xor ebp,eax - add ebx,ecx - movdqa xmm8,xmm0 - movdqa XMMWORD PTR[48+rsp],xmm9 - mov esi,edx + movdqa xmm3,xmm13 + xor edi,edx + paddd xmm13,xmm11 xor edx,ebp + pxor xmm4,xmm12 + add ebx,ecx add eax,DWORD PTR[4+rsp] - and esi,ebp - pslld xmm0,2 and edi,edx + xor edx,ebp ror ecx,7 - psrld xmm8,30 - add eax,esi + movdqa xmm12,xmm4 mov esi,ebx + xor edi,edx + movdqa XMMWORD PTR[48+rsp],xmm13 rol ebx,5 add eax,edi - xor edx,ebp - add eax,ebx - por xmm0,xmm8 - mov edi,ecx -DB 102,69,15,56,220,222 - movups xmm15,XMMWORD PTR[128+r15] + xor esi,ecx + pslld xmm4,2 xor ecx,edx - movdqa xmm9,xmm0 + add eax,ebx + psrld xmm12,30 add ebp,DWORD PTR[8+rsp] - and edi,edx + movups xmm1,XMMWORD PTR[16+r15] +DB 102,15,56,220,208 and esi,ecx + xor ecx,edx + por xmm4,xmm12 ror ebx,7 - add ebp,edi mov edi,eax + xor esi,ecx rol eax,5 + pshufd xmm13,xmm11,238 add ebp,esi - xor ecx,edx - add ebp,eax - mov esi,ebx + xor edi,ebx xor ebx,ecx + add ebp,eax add edx,DWORD PTR[12+rsp] - and esi,ecx and edi,ebx + xor ebx,ecx ror eax,7 - add edx,esi mov esi,ebp + xor edi,ebx rol ebp,5 -DB 102,69,15,56,220,223 - movups xmm14,XMMWORD PTR[144+r15] add edx,edi - xor ebx,ecx - add edx,ebp - mov edi,eax - pxor xmm1,xmm5 -DB 102,68,15,58,15,207,8 + movups xmm0,XMMWORD PTR[32+r15] +DB 102,15,56,220,209 + xor esi,eax xor eax,ebx + add edx,ebp + pxor xmm5,xmm9 add ecx,DWORD PTR[16+rsp] - and edi,ebx - pxor xmm1,xmm2 and esi,eax + xor eax,ebx ror ebp,7 - movdqa xmm8,xmm10 - paddd xmm10,xmm0 - add ecx,edi + punpcklqdq xmm13,xmm4 mov edi,edx - pxor xmm1,xmm9 + xor esi,eax + pxor xmm5,xmm6 rol edx,5 add ecx,esi - xor eax,ebx - add ecx,edx - movdqa xmm9,xmm1 - movdqa XMMWORD PTR[rsp],xmm10 - mov esi,ebp + movdqa xmm12,xmm3 + xor edi,ebp + paddd xmm3,xmm4 xor ebp,eax + pxor xmm5,xmm13 + add ecx,edx add ebx,DWORD PTR[20+rsp] - and esi,eax - pslld xmm1,2 and edi,ebp -DB 102,69,15,56,220,222 - movups xmm15,XMMWORD PTR[160+r15] + xor ebp,eax ror edx,7 - psrld xmm9,30 - add ebx,esi + movups xmm1,XMMWORD PTR[48+r15] +DB 102,15,56,220,208 + movdqa xmm13,xmm5 mov esi,ecx + xor edi,ebp + movdqa XMMWORD PTR[rsp],xmm3 rol ecx,5 add ebx,edi - xor ebp,eax - add ebx,ecx - por xmm1,xmm9 - mov edi,edx + xor esi,edx + pslld xmm5,2 xor edx,ebp - movdqa xmm10,xmm1 + add ebx,ecx + psrld xmm13,30 add eax,DWORD PTR[24+rsp] - and edi,ebp and esi,edx + xor edx,ebp + por xmm5,xmm13 ror ecx,7 - add eax,edi mov edi,ebx + xor esi,edx rol ebx,5 + pshufd xmm3,xmm4,238 add eax,esi - xor edx,ebp + xor edi,ecx + xor ecx,edx add eax,ebx - mov esi,ecx + add ebp,DWORD PTR[28+rsp] cmp r8d,11 jb $L$aesenclast3 - movups xmm14,XMMWORD PTR[176+r15] -DB 102,69,15,56,220,223 - movups xmm15,XMMWORD PTR[192+r15] -DB 102,69,15,56,220,222 + movups xmm0,XMMWORD PTR[64+r15] +DB 102,15,56,220,209 + movups xmm1,XMMWORD PTR[80+r15] +DB 102,15,56,220,208 je $L$aesenclast3 - movups xmm14,XMMWORD PTR[208+r15] -DB 102,69,15,56,220,223 - movups xmm15,XMMWORD PTR[224+r15] -DB 102,69,15,56,220,222 + movups xmm0,XMMWORD PTR[96+r15] +DB 102,15,56,220,209 + movups xmm1,XMMWORD PTR[112+r15] +DB 102,15,56,220,208 $L$aesenclast3:: -DB 102,69,15,56,221,223 - movups xmm14,XMMWORD PTR[16+r15] - xor ecx,edx - add ebp,DWORD PTR[28+rsp] - and esi,edx +DB 102,15,56,221,209 + movups xmm0,XMMWORD PTR[((16-112))+r15] and edi,ecx + xor ecx,edx ror ebx,7 - add ebp,esi mov esi,eax + xor edi,ecx rol eax,5 add ebp,edi - xor ecx,edx - add ebp,eax - mov edi,ebx - pxor xmm2,xmm6 -DB 102,68,15,58,15,208,8 + xor esi,ebx xor ebx,ecx + add ebp,eax + pxor xmm6,xmm10 add edx,DWORD PTR[32+rsp] - and edi,ecx - pxor xmm2,xmm3 and esi,ebx + xor ebx,ecx ror eax,7 - movdqa xmm9,xmm8 - paddd xmm8,xmm1 - add edx,edi + punpcklqdq xmm3,xmm5 mov edi,ebp - pxor xmm2,xmm10 + xor esi,ebx + pxor xmm6,xmm7 rol ebp,5 - movups xmm12,XMMWORD PTR[48+r12] - xorps xmm12,xmm13 - movups XMMWORD PTR[32+r12*1+r13],xmm11 - xorps xmm11,xmm12 -DB 102,69,15,56,220,222 - movups xmm15,XMMWORD PTR[32+r15] add edx,esi - xor ebx,ecx - add edx,ebp - movdqa xmm10,xmm2 - movdqa XMMWORD PTR[16+rsp],xmm8 - mov esi,eax + movups xmm14,XMMWORD PTR[48+r12] + xorps xmm14,xmm15 + movups XMMWORD PTR[32+r12*1+r13],xmm2 + xorps xmm2,xmm14 + movups xmm1,XMMWORD PTR[((-80))+r15] +DB 102,15,56,220,208 + movdqa xmm13,xmm12 + xor edi,eax + paddd xmm12,xmm5 xor eax,ebx + pxor xmm6,xmm3 + add edx,ebp add ecx,DWORD PTR[36+rsp] - and esi,ebx - pslld xmm2,2 and edi,eax + xor eax,ebx ror ebp,7 - psrld xmm10,30 - add ecx,esi + movdqa xmm3,xmm6 mov esi,edx + xor edi,eax + movdqa XMMWORD PTR[16+rsp],xmm12 rol edx,5 add ecx,edi - xor eax,ebx - add ecx,edx - por xmm2,xmm10 - mov edi,ebp + xor esi,ebp + pslld xmm6,2 xor ebp,eax - movdqa xmm8,xmm2 + add ecx,edx + psrld xmm3,30 add ebx,DWORD PTR[40+rsp] - and edi,eax and esi,ebp -DB 102,69,15,56,220,223 - movups xmm14,XMMWORD PTR[48+r15] + xor ebp,eax + por xmm6,xmm3 ror edx,7 - add ebx,edi + movups xmm0,XMMWORD PTR[((-64))+r15] +DB 102,15,56,220,209 mov edi,ecx + xor esi,ebp rol ecx,5 + pshufd xmm12,xmm5,238 add ebx,esi - xor ebp,eax - add ebx,ecx - mov esi,edx + xor edi,edx xor edx,ebp + add ebx,ecx add eax,DWORD PTR[44+rsp] - and esi,ebp and edi,edx + xor edx,ebp ror ecx,7 - add eax,esi mov esi,ebx + xor edi,edx rol ebx,5 add eax,edi - xor edx,ebp + xor esi,edx add eax,ebx + pxor xmm7,xmm11 add ebp,DWORD PTR[48+rsp] -DB 102,69,15,56,220,222 - movups xmm15,XMMWORD PTR[64+r15] - pxor xmm3,xmm7 -DB 102,68,15,58,15,193,8 - xor esi,edx + movups xmm1,XMMWORD PTR[((-48))+r15] +DB 102,15,56,220,208 + xor esi,ecx + punpcklqdq xmm12,xmm6 mov edi,eax rol eax,5 - pxor xmm3,xmm4 - xor esi,ecx - add ebp,eax - movdqa xmm10,xmm9 - paddd xmm9,xmm2 - ror ebx,7 + pxor xmm7,xmm8 add ebp,esi - pxor xmm3,xmm8 - add edx,DWORD PTR[52+rsp] xor edi,ecx + movdqa xmm3,xmm13 + ror ebx,7 + paddd xmm13,xmm6 + add ebp,eax + pxor xmm7,xmm12 + add edx,DWORD PTR[52+rsp] + xor edi,ebx mov esi,ebp rol ebp,5 - movdqa xmm8,xmm3 - movdqa XMMWORD PTR[32+rsp],xmm9 - xor edi,ebx - add edx,ebp - ror eax,7 + movdqa xmm12,xmm7 add edx,edi - pslld xmm3,2 - add ecx,DWORD PTR[56+rsp] xor esi,ebx - psrld xmm8,30 + movdqa XMMWORD PTR[32+rsp],xmm13 + ror eax,7 + add edx,ebp + add ecx,DWORD PTR[56+rsp] + pslld xmm7,2 + xor esi,eax mov edi,edx + psrld xmm12,30 rol edx,5 - xor esi,eax -DB 102,69,15,56,220,223 - movups xmm14,XMMWORD PTR[80+r15] - add ecx,edx - ror ebp,7 add ecx,esi - por xmm3,xmm8 - add ebx,DWORD PTR[60+rsp] + movups xmm0,XMMWORD PTR[((-32))+r15] +DB 102,15,56,220,209 xor edi,eax + ror ebp,7 + por xmm7,xmm12 + add ecx,edx + add ebx,DWORD PTR[60+rsp] + xor edi,ebp mov esi,ecx rol ecx,5 - xor edi,ebp - add ebx,ecx - ror edx,7 add ebx,edi - add eax,DWORD PTR[rsp] - paddd xmm10,xmm3 xor esi,ebp + ror edx,7 + add ebx,ecx + add eax,DWORD PTR[rsp] + xor esi,edx mov edi,ebx rol ebx,5 - xor esi,edx - movdqa XMMWORD PTR[48+rsp],xmm10 - add eax,ebx - ror ecx,7 + paddd xmm3,xmm7 add eax,esi - add ebp,DWORD PTR[4+rsp] -DB 102,69,15,56,220,222 - movups xmm15,XMMWORD PTR[96+r15] xor edi,edx + movdqa XMMWORD PTR[48+rsp],xmm3 + ror ecx,7 + add eax,ebx + add ebp,DWORD PTR[4+rsp] + movups xmm1,XMMWORD PTR[((-16))+r15] +DB 102,15,56,220,208 + xor edi,ecx mov esi,eax rol eax,5 - xor edi,ecx - add ebp,eax - ror ebx,7 add ebp,edi - add edx,DWORD PTR[8+rsp] xor esi,ecx + ror ebx,7 + add ebp,eax + add edx,DWORD PTR[8+rsp] + xor esi,ebx mov edi,ebp rol ebp,5 - xor esi,ebx - add edx,ebp - ror eax,7 add edx,esi - add ecx,DWORD PTR[12+rsp] xor edi,ebx + ror eax,7 + add edx,ebp + add ecx,DWORD PTR[12+rsp] + xor edi,eax mov esi,edx rol edx,5 - xor edi,eax -DB 102,69,15,56,220,223 - movups xmm14,XMMWORD PTR[112+r15] - add ecx,edx - ror ebp,7 add ecx,edi + movups xmm0,XMMWORD PTR[r15] +DB 102,15,56,220,209 + xor esi,eax + ror ebp,7 + add ecx,edx cmp r10,r14 je $L$done_ssse3 - movdqa xmm6,XMMWORD PTR[64+r11] - movdqa xmm9,XMMWORD PTR[r11] - movdqu xmm0,XMMWORD PTR[r10] - movdqu xmm1,XMMWORD PTR[16+r10] - movdqu xmm2,XMMWORD PTR[32+r10] - movdqu xmm3,XMMWORD PTR[48+r10] -DB 102,15,56,0,198 + movdqa xmm3,XMMWORD PTR[64+r11] + movdqa xmm13,XMMWORD PTR[r11] + movdqu xmm4,XMMWORD PTR[r10] + movdqu xmm5,XMMWORD PTR[16+r10] + movdqu xmm6,XMMWORD PTR[32+r10] + movdqu xmm7,XMMWORD PTR[48+r10] +DB 102,15,56,0,227 add r10,64 add ebx,DWORD PTR[16+rsp] - xor esi,eax -DB 102,15,56,0,206 + xor esi,ebp mov edi,ecx +DB 102,15,56,0,235 rol ecx,5 - paddd xmm0,xmm9 - xor esi,ebp + add ebx,esi + xor edi,ebp + ror edx,7 + paddd xmm4,xmm13 add ebx,ecx + add eax,DWORD PTR[20+rsp] + xor edi,edx + mov esi,ebx + movdqa XMMWORD PTR[rsp],xmm4 + rol ebx,5 + add eax,edi + xor esi,edx + ror ecx,7 + psubd xmm4,xmm13 + add eax,ebx + add ebp,DWORD PTR[24+rsp] + movups xmm1,XMMWORD PTR[16+r15] +DB 102,15,56,220,208 + xor esi,ecx + mov edi,eax + rol eax,5 + add ebp,esi + xor edi,ecx + ror ebx,7 + add ebp,eax + add edx,DWORD PTR[28+rsp] + xor edi,ebx + mov esi,ebp + rol ebp,5 + add edx,edi + xor esi,ebx + ror eax,7 + add edx,ebp + add ecx,DWORD PTR[32+rsp] + xor esi,eax + mov edi,edx +DB 102,15,56,0,243 + rol edx,5 + add ecx,esi + movups xmm0,XMMWORD PTR[32+r15] +DB 102,15,56,220,209 + xor edi,eax + ror ebp,7 + paddd xmm5,xmm13 + add ecx,edx + add ebx,DWORD PTR[36+rsp] + xor edi,ebp + mov esi,ecx + movdqa XMMWORD PTR[16+rsp],xmm5 + rol ecx,5 + add ebx,edi + xor esi,ebp ror edx,7 - add ebx,esi - movdqa XMMWORD PTR[rsp],xmm0 + psubd xmm5,xmm13 + add ebx,ecx + add eax,DWORD PTR[40+rsp] + xor esi,edx + mov edi,ebx + rol ebx,5 + add eax,esi + xor edi,edx + ror ecx,7 + add eax,ebx + add ebp,DWORD PTR[44+rsp] + movups xmm1,XMMWORD PTR[48+r15] +DB 102,15,56,220,208 + xor edi,ecx + mov esi,eax + rol eax,5 + add ebp,edi + xor esi,ecx + ror ebx,7 + add ebp,eax + add edx,DWORD PTR[48+rsp] + xor esi,ebx + mov edi,ebp +DB 102,15,56,0,251 + rol ebp,5 + add edx,esi + xor edi,ebx + ror eax,7 + paddd xmm6,xmm13 + add edx,ebp + add ecx,DWORD PTR[52+rsp] + xor edi,eax + mov esi,edx + movdqa XMMWORD PTR[32+rsp],xmm6 + rol edx,5 + add ecx,edi + cmp r8d,11 + jb $L$aesenclast4 + movups xmm0,XMMWORD PTR[64+r15] +DB 102,15,56,220,209 + movups xmm1,XMMWORD PTR[80+r15] +DB 102,15,56,220,208 + je $L$aesenclast4 + movups xmm0,XMMWORD PTR[96+r15] +DB 102,15,56,220,209 + movups xmm1,XMMWORD PTR[112+r15] +DB 102,15,56,220,208 +$L$aesenclast4:: +DB 102,15,56,221,209 + movups xmm0,XMMWORD PTR[((16-112))+r15] + xor esi,eax + ror ebp,7 + psubd xmm6,xmm13 + add ecx,edx + add ebx,DWORD PTR[56+rsp] + xor esi,ebp + mov edi,ecx + rol ecx,5 + add ebx,esi + xor edi,ebp + ror edx,7 + add ebx,ecx + add eax,DWORD PTR[60+rsp] + xor edi,edx + mov esi,ebx + rol ebx,5 + add eax,edi + ror ecx,7 + add eax,ebx + movups XMMWORD PTR[48+r12*1+r13],xmm2 + lea r12,QWORD PTR[64+r12] + + add eax,DWORD PTR[r9] + add esi,DWORD PTR[4+r9] + add ecx,DWORD PTR[8+r9] + add edx,DWORD PTR[12+r9] + mov DWORD PTR[r9],eax + add ebp,DWORD PTR[16+r9] + mov DWORD PTR[4+r9],esi + mov ebx,esi + mov DWORD PTR[8+r9],ecx + mov edi,ecx + mov DWORD PTR[12+r9],edx + xor edi,edx + mov DWORD PTR[16+r9],ebp + and esi,edi + jmp $L$oop_ssse3 + +$L$done_ssse3:: + add ebx,DWORD PTR[16+rsp] + xor esi,ebp + mov edi,ecx + rol ecx,5 + add ebx,esi + xor edi,ebp + ror edx,7 + add ebx,ecx + add eax,DWORD PTR[20+rsp] + xor edi,edx + mov esi,ebx + rol ebx,5 + add eax,edi + xor esi,edx + ror ecx,7 + add eax,ebx + add ebp,DWORD PTR[24+rsp] + movups xmm1,XMMWORD PTR[16+r15] +DB 102,15,56,220,208 + xor esi,ecx + mov edi,eax + rol eax,5 + add ebp,esi + xor edi,ecx + ror ebx,7 + add ebp,eax + add edx,DWORD PTR[28+rsp] + xor edi,ebx + mov esi,ebp + rol ebp,5 + add edx,edi + xor esi,ebx + ror eax,7 + add edx,ebp + add ecx,DWORD PTR[32+rsp] + xor esi,eax + mov edi,edx + rol edx,5 + add ecx,esi + movups xmm0,XMMWORD PTR[32+r15] +DB 102,15,56,220,209 + xor edi,eax + ror ebp,7 + add ecx,edx + add ebx,DWORD PTR[36+rsp] + xor edi,ebp + mov esi,ecx + rol ecx,5 + add ebx,edi + xor esi,ebp + ror edx,7 + add ebx,ecx + add eax,DWORD PTR[40+rsp] + xor esi,edx + mov edi,ebx + rol ebx,5 + add eax,esi + xor edi,edx + ror ecx,7 + add eax,ebx + add ebp,DWORD PTR[44+rsp] + movups xmm1,XMMWORD PTR[48+r15] +DB 102,15,56,220,208 + xor edi,ecx + mov esi,eax + rol eax,5 + add ebp,edi + xor esi,ecx + ror ebx,7 + add ebp,eax + add edx,DWORD PTR[48+rsp] + xor esi,ebx + mov edi,ebp + rol ebp,5 + add edx,esi + xor edi,ebx + ror eax,7 + add edx,ebp + add ecx,DWORD PTR[52+rsp] + xor edi,eax + mov esi,edx + rol edx,5 + add ecx,edi + cmp r8d,11 + jb $L$aesenclast5 + movups xmm0,XMMWORD PTR[64+r15] +DB 102,15,56,220,209 + movups xmm1,XMMWORD PTR[80+r15] +DB 102,15,56,220,208 + je $L$aesenclast5 + movups xmm0,XMMWORD PTR[96+r15] +DB 102,15,56,220,209 + movups xmm1,XMMWORD PTR[112+r15] +DB 102,15,56,220,208 +$L$aesenclast5:: +DB 102,15,56,221,209 + movups xmm0,XMMWORD PTR[((16-112))+r15] + xor esi,eax + ror ebp,7 + add ecx,edx + add ebx,DWORD PTR[56+rsp] + xor esi,ebp + mov edi,ecx + rol ecx,5 + add ebx,esi + xor edi,ebp + ror edx,7 + add ebx,ecx + add eax,DWORD PTR[60+rsp] + xor edi,edx + mov esi,ebx + rol ebx,5 + add eax,edi + ror ecx,7 + add eax,ebx + movups XMMWORD PTR[48+r12*1+r13],xmm2 + mov r8,QWORD PTR[88+rsp] + + add eax,DWORD PTR[r9] + add esi,DWORD PTR[4+r9] + add ecx,DWORD PTR[8+r9] + mov DWORD PTR[r9],eax + add edx,DWORD PTR[12+r9] + mov DWORD PTR[4+r9],esi + add ebp,DWORD PTR[16+r9] + mov DWORD PTR[8+r9],ecx + mov DWORD PTR[12+r9],edx + mov DWORD PTR[16+r9],ebp + movups XMMWORD PTR[r8],xmm2 + movaps xmm6,XMMWORD PTR[((96+0))+rsp] + movaps xmm7,XMMWORD PTR[((96+16))+rsp] + movaps xmm8,XMMWORD PTR[((96+32))+rsp] + movaps xmm9,XMMWORD PTR[((96+48))+rsp] + movaps xmm10,XMMWORD PTR[((96+64))+rsp] + movaps xmm11,XMMWORD PTR[((96+80))+rsp] + movaps xmm12,XMMWORD PTR[((96+96))+rsp] + movaps xmm13,XMMWORD PTR[((96+112))+rsp] + movaps xmm14,XMMWORD PTR[((96+128))+rsp] + movaps xmm15,XMMWORD PTR[((96+144))+rsp] + lea rsi,QWORD PTR[264+rsp] + mov r15,QWORD PTR[rsi] + mov r14,QWORD PTR[8+rsi] + mov r13,QWORD PTR[16+rsi] + mov r12,QWORD PTR[24+rsi] + mov rbp,QWORD PTR[32+rsi] + mov rbx,QWORD PTR[40+rsi] + lea rsp,QWORD PTR[48+rsi] +$L$epilogue_ssse3:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_aesni_cbc_sha1_enc_ssse3:: +aesni_cbc_sha1_enc_ssse3 ENDP + +ALIGN 32 +aesni_cbc_sha1_enc_avx PROC PRIVATE + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_aesni_cbc_sha1_enc_avx:: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + mov rcx,r9 + mov r8,QWORD PTR[40+rsp] + mov r9,QWORD PTR[48+rsp] + + + mov r10,QWORD PTR[56+rsp] + + + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + lea rsp,QWORD PTR[((-264))+rsp] + + + movaps XMMWORD PTR[(96+0)+rsp],xmm6 + movaps XMMWORD PTR[(96+16)+rsp],xmm7 + movaps XMMWORD PTR[(96+32)+rsp],xmm8 + movaps XMMWORD PTR[(96+48)+rsp],xmm9 + movaps XMMWORD PTR[(96+64)+rsp],xmm10 + movaps XMMWORD PTR[(96+80)+rsp],xmm11 + movaps XMMWORD PTR[(96+96)+rsp],xmm12 + movaps XMMWORD PTR[(96+112)+rsp],xmm13 + movaps XMMWORD PTR[(96+128)+rsp],xmm14 + movaps XMMWORD PTR[(96+144)+rsp],xmm15 +$L$prologue_avx:: + vzeroall + mov r12,rdi + mov r13,rsi + mov r14,rdx + lea r15,QWORD PTR[112+rcx] + vmovdqu xmm12,XMMWORD PTR[r8] + mov QWORD PTR[88+rsp],r8 + shl r14,6 + sub r13,r12 + mov r8d,DWORD PTR[((240-112))+r15] + add r14,r10 + + lea r11,QWORD PTR[K_XX_XX] + mov eax,DWORD PTR[r9] + mov ebx,DWORD PTR[4+r9] + mov ecx,DWORD PTR[8+r9] + mov edx,DWORD PTR[12+r9] + mov esi,ebx + mov ebp,DWORD PTR[16+r9] + mov edi,ecx + xor edi,edx + and esi,edi + + vmovdqa xmm6,XMMWORD PTR[64+r11] + vmovdqa xmm10,XMMWORD PTR[r11] + vmovdqu xmm0,XMMWORD PTR[r10] + vmovdqu xmm1,XMMWORD PTR[16+r10] + vmovdqu xmm2,XMMWORD PTR[32+r10] + vmovdqu xmm3,XMMWORD PTR[48+r10] + vpshufb xmm0,xmm0,xmm6 + add r10,64 + vpshufb xmm1,xmm1,xmm6 + vpshufb xmm2,xmm2,xmm6 + vpshufb xmm3,xmm3,xmm6 + vpaddd xmm4,xmm0,xmm10 + vpaddd xmm5,xmm1,xmm10 + vpaddd xmm6,xmm2,xmm10 + vmovdqa XMMWORD PTR[rsp],xmm4 + vmovdqa XMMWORD PTR[16+rsp],xmm5 + vmovdqa XMMWORD PTR[32+rsp],xmm6 + vmovups xmm15,XMMWORD PTR[((-112))+r15] + vmovups xmm14,XMMWORD PTR[((16-112))+r15] + jmp $L$oop_avx +ALIGN 32 +$L$oop_avx:: + shrd ebx,ebx,2 + vmovdqu xmm13,XMMWORD PTR[r12] + vpxor xmm13,xmm13,xmm15 + vpxor xmm12,xmm12,xmm13 + vaesenc xmm12,xmm12,xmm14 + vmovups xmm15,XMMWORD PTR[((-80))+r15] + xor esi,edx + vpalignr xmm4,xmm1,xmm0,8 + mov edi,eax + add ebp,DWORD PTR[rsp] + vpaddd xmm9,xmm10,xmm3 + xor ebx,ecx + shld eax,eax,5 + vpsrldq xmm8,xmm3,4 + add ebp,esi + and edi,ebx + vpxor xmm4,xmm4,xmm0 + xor ebx,ecx + add ebp,eax + vpxor xmm8,xmm8,xmm2 + shrd eax,eax,7 + xor edi,ecx + mov esi,ebp + add edx,DWORD PTR[4+rsp] + vpxor xmm4,xmm4,xmm8 + xor eax,ebx + shld ebp,ebp,5 + vmovdqa XMMWORD PTR[48+rsp],xmm9 + add edx,edi + vaesenc xmm12,xmm12,xmm15 + vmovups xmm14,XMMWORD PTR[((-64))+r15] + and esi,eax + vpsrld xmm8,xmm4,31 + xor eax,ebx + add edx,ebp + shrd ebp,ebp,7 + xor esi,ebx + vpslldq xmm9,xmm4,12 + vpaddd xmm4,xmm4,xmm4 + mov edi,edx + add ecx,DWORD PTR[8+rsp] + xor ebp,eax + shld edx,edx,5 + vpor xmm4,xmm4,xmm8 + vpsrld xmm8,xmm9,30 + add ecx,esi + and edi,ebp + xor ebp,eax + add ecx,edx + vpslld xmm9,xmm9,2 + vpxor xmm4,xmm4,xmm8 + shrd edx,edx,7 + xor edi,eax + mov esi,ecx + add ebx,DWORD PTR[12+rsp] + vaesenc xmm12,xmm12,xmm14 + vmovups xmm15,XMMWORD PTR[((-48))+r15] + vpxor xmm4,xmm4,xmm9 + xor edx,ebp + shld ecx,ecx,5 + add ebx,edi + and esi,edx + xor edx,ebp + add ebx,ecx + shrd ecx,ecx,7 + xor esi,ebp + vpalignr xmm5,xmm2,xmm1,8 + mov edi,ebx + add eax,DWORD PTR[16+rsp] + vpaddd xmm9,xmm10,xmm4 + xor ecx,edx + shld ebx,ebx,5 + vpsrldq xmm8,xmm4,4 + add eax,esi + and edi,ecx + vpxor xmm5,xmm5,xmm1 + xor ecx,edx + add eax,ebx + vpxor xmm8,xmm8,xmm3 + shrd ebx,ebx,7 + vaesenc xmm12,xmm12,xmm15 + vmovups xmm14,XMMWORD PTR[((-32))+r15] + xor edi,edx + mov esi,eax + add ebp,DWORD PTR[20+rsp] + vpxor xmm5,xmm5,xmm8 + xor ebx,ecx + shld eax,eax,5 + vmovdqa XMMWORD PTR[rsp],xmm9 + add ebp,edi + and esi,ebx + vpsrld xmm8,xmm5,31 + xor ebx,ecx + add ebp,eax + shrd eax,eax,7 + xor esi,ecx + vpslldq xmm9,xmm5,12 + vpaddd xmm5,xmm5,xmm5 + mov edi,ebp + add edx,DWORD PTR[24+rsp] + xor eax,ebx + shld ebp,ebp,5 + vpor xmm5,xmm5,xmm8 + vpsrld xmm8,xmm9,30 + add edx,esi + vaesenc xmm12,xmm12,xmm14 + vmovups xmm15,XMMWORD PTR[((-16))+r15] + and edi,eax + xor eax,ebx + add edx,ebp + vpslld xmm9,xmm9,2 + vpxor xmm5,xmm5,xmm8 + shrd ebp,ebp,7 + xor edi,ebx + mov esi,edx + add ecx,DWORD PTR[28+rsp] + vpxor xmm5,xmm5,xmm9 + xor ebp,eax + shld edx,edx,5 + vmovdqa xmm10,XMMWORD PTR[16+r11] + add ecx,edi + and esi,ebp + xor ebp,eax + add ecx,edx + shrd edx,edx,7 + xor esi,eax + vpalignr xmm6,xmm3,xmm2,8 + mov edi,ecx + add ebx,DWORD PTR[32+rsp] + vaesenc xmm12,xmm12,xmm15 + vmovups xmm14,XMMWORD PTR[r15] + vpaddd xmm9,xmm10,xmm5 + xor edx,ebp + shld ecx,ecx,5 + vpsrldq xmm8,xmm5,4 + add ebx,esi + and edi,edx + vpxor xmm6,xmm6,xmm2 + xor edx,ebp + add ebx,ecx + vpxor xmm8,xmm8,xmm4 + shrd ecx,ecx,7 + xor edi,ebp + mov esi,ebx + add eax,DWORD PTR[36+rsp] + vpxor xmm6,xmm6,xmm8 + xor ecx,edx + shld ebx,ebx,5 + vmovdqa XMMWORD PTR[16+rsp],xmm9 + add eax,edi + and esi,ecx + vpsrld xmm8,xmm6,31 + xor ecx,edx + add eax,ebx + shrd ebx,ebx,7 + vaesenc xmm12,xmm12,xmm14 + vmovups xmm15,XMMWORD PTR[16+r15] + xor esi,edx + vpslldq xmm9,xmm6,12 + vpaddd xmm6,xmm6,xmm6 + mov edi,eax + add ebp,DWORD PTR[40+rsp] + xor ebx,ecx + shld eax,eax,5 + vpor xmm6,xmm6,xmm8 + vpsrld xmm8,xmm9,30 + add ebp,esi + and edi,ebx + xor ebx,ecx + add ebp,eax + vpslld xmm9,xmm9,2 + vpxor xmm6,xmm6,xmm8 + shrd eax,eax,7 + xor edi,ecx + mov esi,ebp + add edx,DWORD PTR[44+rsp] + vpxor xmm6,xmm6,xmm9 + xor eax,ebx + shld ebp,ebp,5 + add edx,edi + vaesenc xmm12,xmm12,xmm15 + vmovups xmm14,XMMWORD PTR[32+r15] + and esi,eax + xor eax,ebx + add edx,ebp + shrd ebp,ebp,7 + xor esi,ebx + vpalignr xmm7,xmm4,xmm3,8 + mov edi,edx + add ecx,DWORD PTR[48+rsp] + vpaddd xmm9,xmm10,xmm6 + xor ebp,eax + shld edx,edx,5 + vpsrldq xmm8,xmm6,4 + add ecx,esi + and edi,ebp + vpxor xmm7,xmm7,xmm3 + xor ebp,eax + add ecx,edx + vpxor xmm8,xmm8,xmm5 + shrd edx,edx,7 + xor edi,eax + mov esi,ecx + add ebx,DWORD PTR[52+rsp] + vaesenc xmm12,xmm12,xmm14 + vmovups xmm15,XMMWORD PTR[48+r15] + vpxor xmm7,xmm7,xmm8 + xor edx,ebp + shld ecx,ecx,5 + vmovdqa XMMWORD PTR[32+rsp],xmm9 + add ebx,edi + and esi,edx + vpsrld xmm8,xmm7,31 + xor edx,ebp + add ebx,ecx + shrd ecx,ecx,7 + xor esi,ebp + vpslldq xmm9,xmm7,12 + vpaddd xmm7,xmm7,xmm7 + mov edi,ebx + add eax,DWORD PTR[56+rsp] + xor ecx,edx + shld ebx,ebx,5 + vpor xmm7,xmm7,xmm8 + vpsrld xmm8,xmm9,30 + add eax,esi + and edi,ecx + xor ecx,edx + add eax,ebx + vpslld xmm9,xmm9,2 + vpxor xmm7,xmm7,xmm8 + shrd ebx,ebx,7 + cmp r8d,11 + jb $L$vaesenclast6 + vaesenc xmm12,xmm12,xmm15 + vmovups xmm14,XMMWORD PTR[64+r15] + vaesenc xmm12,xmm12,xmm14 + vmovups xmm15,XMMWORD PTR[80+r15] + je $L$vaesenclast6 + vaesenc xmm12,xmm12,xmm15 + vmovups xmm14,XMMWORD PTR[96+r15] + vaesenc xmm12,xmm12,xmm14 + vmovups xmm15,XMMWORD PTR[112+r15] +$L$vaesenclast6:: + vaesenclast xmm12,xmm12,xmm15 + vmovups xmm15,XMMWORD PTR[((-112))+r15] + vmovups xmm14,XMMWORD PTR[((16-112))+r15] + xor edi,edx + mov esi,eax + add ebp,DWORD PTR[60+rsp] + vpxor xmm7,xmm7,xmm9 + xor ebx,ecx + shld eax,eax,5 + add ebp,edi + and esi,ebx + xor ebx,ecx + add ebp,eax + vpalignr xmm8,xmm7,xmm6,8 + vpxor xmm0,xmm0,xmm4 + shrd eax,eax,7 + xor esi,ecx + mov edi,ebp + add edx,DWORD PTR[rsp] + vpxor xmm0,xmm0,xmm1 + xor eax,ebx + shld ebp,ebp,5 + vpaddd xmm9,xmm10,xmm7 + add edx,esi + vmovdqu xmm13,XMMWORD PTR[16+r12] + vpxor xmm13,xmm13,xmm15 + vmovups XMMWORD PTR[r13*1+r12],xmm12 + vpxor xmm12,xmm12,xmm13 + vaesenc xmm12,xmm12,xmm14 + vmovups xmm15,XMMWORD PTR[((-80))+r15] + and edi,eax + vpxor xmm0,xmm0,xmm8 + xor eax,ebx + add edx,ebp + shrd ebp,ebp,7 + xor edi,ebx + vpsrld xmm8,xmm0,30 + vmovdqa XMMWORD PTR[48+rsp],xmm9 + mov esi,edx + add ecx,DWORD PTR[4+rsp] + xor ebp,eax + shld edx,edx,5 + vpslld xmm0,xmm0,2 + add ecx,edi + and esi,ebp + xor ebp,eax + add ecx,edx + shrd edx,edx,7 + xor esi,eax + mov edi,ecx + add ebx,DWORD PTR[8+rsp] + vaesenc xmm12,xmm12,xmm15 + vmovups xmm14,XMMWORD PTR[((-64))+r15] + vpor xmm0,xmm0,xmm8 + xor edx,ebp + shld ecx,ecx,5 + add ebx,esi + and edi,edx + xor edx,ebp + add ebx,ecx + add eax,DWORD PTR[12+rsp] + xor edi,ebp + mov esi,ebx + shld ebx,ebx,5 + add eax,edi + xor esi,edx + shrd ecx,ecx,7 + add eax,ebx + vpalignr xmm8,xmm0,xmm7,8 + vpxor xmm1,xmm1,xmm5 + add ebp,DWORD PTR[16+rsp] + vaesenc xmm12,xmm12,xmm14 + vmovups xmm15,XMMWORD PTR[((-48))+r15] + xor esi,ecx + mov edi,eax + shld eax,eax,5 + vpxor xmm1,xmm1,xmm2 + add ebp,esi + xor edi,ecx + vpaddd xmm9,xmm10,xmm0 + shrd ebx,ebx,7 + add ebp,eax + vpxor xmm1,xmm1,xmm8 + add edx,DWORD PTR[20+rsp] + xor edi,ebx + mov esi,ebp + shld ebp,ebp,5 + vpsrld xmm8,xmm1,30 + vmovdqa XMMWORD PTR[rsp],xmm9 + add edx,edi + xor esi,ebx + shrd eax,eax,7 + add edx,ebp + vpslld xmm1,xmm1,2 + add ecx,DWORD PTR[24+rsp] + xor esi,eax + mov edi,edx + shld edx,edx,5 + add ecx,esi + vaesenc xmm12,xmm12,xmm15 + vmovups xmm14,XMMWORD PTR[((-32))+r15] + xor edi,eax + shrd ebp,ebp,7 + add ecx,edx + vpor xmm1,xmm1,xmm8 + add ebx,DWORD PTR[28+rsp] + xor edi,ebp + mov esi,ecx + shld ecx,ecx,5 + add ebx,edi + xor esi,ebp + shrd edx,edx,7 + add ebx,ecx + vpalignr xmm8,xmm1,xmm0,8 + vpxor xmm2,xmm2,xmm6 + add eax,DWORD PTR[32+rsp] + xor esi,edx + mov edi,ebx + shld ebx,ebx,5 + vpxor xmm2,xmm2,xmm3 + add eax,esi + xor edi,edx + vpaddd xmm9,xmm10,xmm1 + vmovdqa xmm10,XMMWORD PTR[32+r11] + shrd ecx,ecx,7 + add eax,ebx + vpxor xmm2,xmm2,xmm8 + add ebp,DWORD PTR[36+rsp] + vaesenc xmm12,xmm12,xmm14 + vmovups xmm15,XMMWORD PTR[((-16))+r15] + xor edi,ecx + mov esi,eax + shld eax,eax,5 + vpsrld xmm8,xmm2,30 + vmovdqa XMMWORD PTR[16+rsp],xmm9 + add ebp,edi + xor esi,ecx + shrd ebx,ebx,7 + add ebp,eax + vpslld xmm2,xmm2,2 + add edx,DWORD PTR[40+rsp] + xor esi,ebx + mov edi,ebp + shld ebp,ebp,5 + add edx,esi + xor edi,ebx + shrd eax,eax,7 + add edx,ebp + vpor xmm2,xmm2,xmm8 + add ecx,DWORD PTR[44+rsp] + xor edi,eax + mov esi,edx + shld edx,edx,5 + add ecx,edi + vaesenc xmm12,xmm12,xmm15 + vmovups xmm14,XMMWORD PTR[r15] + xor esi,eax + shrd ebp,ebp,7 + add ecx,edx + vpalignr xmm8,xmm2,xmm1,8 + vpxor xmm3,xmm3,xmm7 + add ebx,DWORD PTR[48+rsp] + xor esi,ebp + mov edi,ecx + shld ecx,ecx,5 + vpxor xmm3,xmm3,xmm4 + add ebx,esi + xor edi,ebp + vpaddd xmm9,xmm10,xmm2 + shrd edx,edx,7 + add ebx,ecx + vpxor xmm3,xmm3,xmm8 + add eax,DWORD PTR[52+rsp] + xor edi,edx + mov esi,ebx + shld ebx,ebx,5 + vpsrld xmm8,xmm3,30 + vmovdqa XMMWORD PTR[32+rsp],xmm9 + add eax,edi + xor esi,edx + shrd ecx,ecx,7 + add eax,ebx + vpslld xmm3,xmm3,2 + add ebp,DWORD PTR[56+rsp] + vaesenc xmm12,xmm12,xmm14 + vmovups xmm15,XMMWORD PTR[16+r15] + xor esi,ecx + mov edi,eax + shld eax,eax,5 + add ebp,esi + xor edi,ecx + shrd ebx,ebx,7 + add ebp,eax + vpor xmm3,xmm3,xmm8 + add edx,DWORD PTR[60+rsp] + xor edi,ebx + mov esi,ebp + shld ebp,ebp,5 + add edx,edi + xor esi,ebx + shrd eax,eax,7 + add edx,ebp + vpalignr xmm8,xmm3,xmm2,8 + vpxor xmm4,xmm4,xmm0 + add ecx,DWORD PTR[rsp] + xor esi,eax + mov edi,edx + shld edx,edx,5 + vpxor xmm4,xmm4,xmm5 + add ecx,esi + vaesenc xmm12,xmm12,xmm15 + vmovups xmm14,XMMWORD PTR[32+r15] + xor edi,eax + vpaddd xmm9,xmm10,xmm3 + shrd ebp,ebp,7 + add ecx,edx + vpxor xmm4,xmm4,xmm8 + add ebx,DWORD PTR[4+rsp] + xor edi,ebp + mov esi,ecx + shld ecx,ecx,5 + vpsrld xmm8,xmm4,30 + vmovdqa XMMWORD PTR[48+rsp],xmm9 + add ebx,edi + xor esi,ebp + shrd edx,edx,7 + add ebx,ecx + vpslld xmm4,xmm4,2 + add eax,DWORD PTR[8+rsp] + xor esi,edx + mov edi,ebx + shld ebx,ebx,5 + add eax,esi + xor edi,edx + shrd ecx,ecx,7 + add eax,ebx + vpor xmm4,xmm4,xmm8 + add ebp,DWORD PTR[12+rsp] + vaesenc xmm12,xmm12,xmm14 + vmovups xmm15,XMMWORD PTR[48+r15] + xor edi,ecx + mov esi,eax + shld eax,eax,5 + add ebp,edi + xor esi,ecx + shrd ebx,ebx,7 + add ebp,eax + vpalignr xmm8,xmm4,xmm3,8 + vpxor xmm5,xmm5,xmm1 + add edx,DWORD PTR[16+rsp] + xor esi,ebx + mov edi,ebp + shld ebp,ebp,5 + vpxor xmm5,xmm5,xmm6 + add edx,esi + xor edi,ebx + vpaddd xmm9,xmm10,xmm4 + shrd eax,eax,7 + add edx,ebp + vpxor xmm5,xmm5,xmm8 + add ecx,DWORD PTR[20+rsp] + xor edi,eax + mov esi,edx + shld edx,edx,5 + vpsrld xmm8,xmm5,30 + vmovdqa XMMWORD PTR[rsp],xmm9 + add ecx,edi + cmp r8d,11 + jb $L$vaesenclast7 + vaesenc xmm12,xmm12,xmm15 + vmovups xmm14,XMMWORD PTR[64+r15] + vaesenc xmm12,xmm12,xmm14 + vmovups xmm15,XMMWORD PTR[80+r15] + je $L$vaesenclast7 + vaesenc xmm12,xmm12,xmm15 + vmovups xmm14,XMMWORD PTR[96+r15] + vaesenc xmm12,xmm12,xmm14 + vmovups xmm15,XMMWORD PTR[112+r15] +$L$vaesenclast7:: + vaesenclast xmm12,xmm12,xmm15 + vmovups xmm15,XMMWORD PTR[((-112))+r15] + vmovups xmm14,XMMWORD PTR[((16-112))+r15] + xor esi,eax + shrd ebp,ebp,7 + add ecx,edx + vpslld xmm5,xmm5,2 + add ebx,DWORD PTR[24+rsp] + xor esi,ebp + mov edi,ecx + shld ecx,ecx,5 + add ebx,esi + xor edi,ebp + shrd edx,edx,7 + add ebx,ecx + vpor xmm5,xmm5,xmm8 + add eax,DWORD PTR[28+rsp] + shrd ecx,ecx,7 + mov esi,ebx + xor edi,edx + shld ebx,ebx,5 + add eax,edi + xor esi,ecx + xor ecx,edx + add eax,ebx + vpalignr xmm8,xmm5,xmm4,8 + vpxor xmm6,xmm6,xmm2 + add ebp,DWORD PTR[32+rsp] + vmovdqu xmm13,XMMWORD PTR[32+r12] + vpxor xmm13,xmm13,xmm15 + vmovups XMMWORD PTR[16+r12*1+r13],xmm12 + vpxor xmm12,xmm12,xmm13 + vaesenc xmm12,xmm12,xmm14 + vmovups xmm15,XMMWORD PTR[((-80))+r15] + and esi,ecx + xor ecx,edx + shrd ebx,ebx,7 + vpxor xmm6,xmm6,xmm7 + mov edi,eax + xor esi,ecx + vpaddd xmm9,xmm10,xmm5 + shld eax,eax,5 + add ebp,esi + vpxor xmm6,xmm6,xmm8 + xor edi,ebx + xor ebx,ecx + add ebp,eax + add edx,DWORD PTR[36+rsp] + vpsrld xmm8,xmm6,30 + vmovdqa XMMWORD PTR[16+rsp],xmm9 + and edi,ebx + xor ebx,ecx + shrd eax,eax,7 + mov esi,ebp + vpslld xmm6,xmm6,2 + xor edi,ebx + shld ebp,ebp,5 + add edx,edi + vaesenc xmm12,xmm12,xmm15 + vmovups xmm14,XMMWORD PTR[((-64))+r15] + xor esi,eax + xor eax,ebx + add edx,ebp + add ecx,DWORD PTR[40+rsp] + and esi,eax + vpor xmm6,xmm6,xmm8 + xor eax,ebx + shrd ebp,ebp,7 + mov edi,edx + xor esi,eax + shld edx,edx,5 + add ecx,esi + xor edi,ebp + xor ebp,eax + add ecx,edx + add ebx,DWORD PTR[44+rsp] + and edi,ebp + xor ebp,eax + shrd edx,edx,7 + vaesenc xmm12,xmm12,xmm14 + vmovups xmm15,XMMWORD PTR[((-48))+r15] + mov esi,ecx + xor edi,ebp + shld ecx,ecx,5 + add ebx,edi + xor esi,edx + xor edx,ebp + add ebx,ecx + vpalignr xmm8,xmm6,xmm5,8 + vpxor xmm7,xmm7,xmm3 + add eax,DWORD PTR[48+rsp] + and esi,edx + xor edx,ebp + shrd ecx,ecx,7 + vpxor xmm7,xmm7,xmm0 + mov edi,ebx + xor esi,edx + vpaddd xmm9,xmm10,xmm6 + vmovdqa xmm10,XMMWORD PTR[48+r11] + shld ebx,ebx,5 + add eax,esi + vpxor xmm7,xmm7,xmm8 + xor edi,ecx + xor ecx,edx + add eax,ebx + add ebp,DWORD PTR[52+rsp] + vaesenc xmm12,xmm12,xmm15 + vmovups xmm14,XMMWORD PTR[((-32))+r15] + vpsrld xmm8,xmm7,30 + vmovdqa XMMWORD PTR[32+rsp],xmm9 + and edi,ecx + xor ecx,edx + shrd ebx,ebx,7 + mov esi,eax + vpslld xmm7,xmm7,2 + xor edi,ecx + shld eax,eax,5 + add ebp,edi + xor esi,ebx + xor ebx,ecx + add ebp,eax + add edx,DWORD PTR[56+rsp] + and esi,ebx + vpor xmm7,xmm7,xmm8 + xor ebx,ecx + shrd eax,eax,7 + mov edi,ebp + xor esi,ebx + shld ebp,ebp,5 + add edx,esi + vaesenc xmm12,xmm12,xmm14 + vmovups xmm15,XMMWORD PTR[((-16))+r15] + xor edi,eax + xor eax,ebx + add edx,ebp + add ecx,DWORD PTR[60+rsp] + and edi,eax + xor eax,ebx + shrd ebp,ebp,7 + mov esi,edx + xor edi,eax + shld edx,edx,5 + add ecx,edi + xor esi,ebp + xor ebp,eax + add ecx,edx + vpalignr xmm8,xmm7,xmm6,8 + vpxor xmm0,xmm0,xmm4 + add ebx,DWORD PTR[rsp] + and esi,ebp + xor ebp,eax + shrd edx,edx,7 + vaesenc xmm12,xmm12,xmm15 + vmovups xmm14,XMMWORD PTR[r15] + vpxor xmm0,xmm0,xmm1 + mov edi,ecx + xor esi,ebp + vpaddd xmm9,xmm10,xmm7 + shld ecx,ecx,5 + add ebx,esi + vpxor xmm0,xmm0,xmm8 + xor edi,edx + xor edx,ebp + add ebx,ecx + add eax,DWORD PTR[4+rsp] + vpsrld xmm8,xmm0,30 + vmovdqa XMMWORD PTR[48+rsp],xmm9 + and edi,edx + xor edx,ebp + shrd ecx,ecx,7 + mov esi,ebx + vpslld xmm0,xmm0,2 + xor edi,edx + shld ebx,ebx,5 + add eax,edi + xor esi,ecx + xor ecx,edx + add eax,ebx + add ebp,DWORD PTR[8+rsp] + vaesenc xmm12,xmm12,xmm14 + vmovups xmm15,XMMWORD PTR[16+r15] + and esi,ecx + vpor xmm0,xmm0,xmm8 + xor ecx,edx + shrd ebx,ebx,7 + mov edi,eax + xor esi,ecx + shld eax,eax,5 + add ebp,esi + xor edi,ebx + xor ebx,ecx + add ebp,eax + add edx,DWORD PTR[12+rsp] + and edi,ebx + xor ebx,ecx + shrd eax,eax,7 + mov esi,ebp + xor edi,ebx + shld ebp,ebp,5 + add edx,edi + vaesenc xmm12,xmm12,xmm15 + vmovups xmm14,XMMWORD PTR[32+r15] + xor esi,eax + xor eax,ebx + add edx,ebp + vpalignr xmm8,xmm0,xmm7,8 + vpxor xmm1,xmm1,xmm5 + add ecx,DWORD PTR[16+rsp] + and esi,eax + xor eax,ebx + shrd ebp,ebp,7 + vpxor xmm1,xmm1,xmm2 + mov edi,edx + xor esi,eax + vpaddd xmm9,xmm10,xmm0 + shld edx,edx,5 + add ecx,esi + vpxor xmm1,xmm1,xmm8 + xor edi,ebp + xor ebp,eax + add ecx,edx + add ebx,DWORD PTR[20+rsp] + vpsrld xmm8,xmm1,30 + vmovdqa XMMWORD PTR[rsp],xmm9 + and edi,ebp + xor ebp,eax + shrd edx,edx,7 + vaesenc xmm12,xmm12,xmm14 + vmovups xmm15,XMMWORD PTR[48+r15] + mov esi,ecx + vpslld xmm1,xmm1,2 + xor edi,ebp + shld ecx,ecx,5 + add ebx,edi + xor esi,edx + xor edx,ebp + add ebx,ecx + add eax,DWORD PTR[24+rsp] + and esi,edx + vpor xmm1,xmm1,xmm8 + xor edx,ebp + shrd ecx,ecx,7 + mov edi,ebx + xor esi,edx + shld ebx,ebx,5 + add eax,esi + xor edi,ecx + xor ecx,edx + add eax,ebx + add ebp,DWORD PTR[28+rsp] + cmp r8d,11 + jb $L$vaesenclast8 + vaesenc xmm12,xmm12,xmm15 + vmovups xmm14,XMMWORD PTR[64+r15] + vaesenc xmm12,xmm12,xmm14 + vmovups xmm15,XMMWORD PTR[80+r15] + je $L$vaesenclast8 + vaesenc xmm12,xmm12,xmm15 + vmovups xmm14,XMMWORD PTR[96+r15] + vaesenc xmm12,xmm12,xmm14 + vmovups xmm15,XMMWORD PTR[112+r15] +$L$vaesenclast8:: + vaesenclast xmm12,xmm12,xmm15 + vmovups xmm15,XMMWORD PTR[((-112))+r15] + vmovups xmm14,XMMWORD PTR[((16-112))+r15] + and edi,ecx + xor ecx,edx + shrd ebx,ebx,7 + mov esi,eax + xor edi,ecx + shld eax,eax,5 + add ebp,edi + xor esi,ebx + xor ebx,ecx + add ebp,eax + vpalignr xmm8,xmm1,xmm0,8 + vpxor xmm2,xmm2,xmm6 + add edx,DWORD PTR[32+rsp] + and esi,ebx + xor ebx,ecx + shrd eax,eax,7 + vpxor xmm2,xmm2,xmm3 + mov edi,ebp + xor esi,ebx + vpaddd xmm9,xmm10,xmm1 + shld ebp,ebp,5 + add edx,esi + vmovdqu xmm13,XMMWORD PTR[48+r12] + vpxor xmm13,xmm13,xmm15 + vmovups XMMWORD PTR[32+r12*1+r13],xmm12 + vpxor xmm12,xmm12,xmm13 + vaesenc xmm12,xmm12,xmm14 + vmovups xmm15,XMMWORD PTR[((-80))+r15] + vpxor xmm2,xmm2,xmm8 + xor edi,eax + xor eax,ebx + add edx,ebp + add ecx,DWORD PTR[36+rsp] + vpsrld xmm8,xmm2,30 + vmovdqa XMMWORD PTR[16+rsp],xmm9 + and edi,eax + xor eax,ebx + shrd ebp,ebp,7 + mov esi,edx + vpslld xmm2,xmm2,2 + xor edi,eax + shld edx,edx,5 + add ecx,edi + xor esi,ebp + xor ebp,eax + add ecx,edx + add ebx,DWORD PTR[40+rsp] + and esi,ebp + vpor xmm2,xmm2,xmm8 + xor ebp,eax + shrd edx,edx,7 + vaesenc xmm12,xmm12,xmm15 + vmovups xmm14,XMMWORD PTR[((-64))+r15] + mov edi,ecx + xor esi,ebp + shld ecx,ecx,5 + add ebx,esi + xor edi,edx + xor edx,ebp + add ebx,ecx + add eax,DWORD PTR[44+rsp] + and edi,edx + xor edx,ebp + shrd ecx,ecx,7 + mov esi,ebx + xor edi,edx + shld ebx,ebx,5 + add eax,edi + xor esi,edx + add eax,ebx + vpalignr xmm8,xmm2,xmm1,8 + vpxor xmm3,xmm3,xmm7 + add ebp,DWORD PTR[48+rsp] + vaesenc xmm12,xmm12,xmm14 + vmovups xmm15,XMMWORD PTR[((-48))+r15] + xor esi,ecx + mov edi,eax + shld eax,eax,5 + vpxor xmm3,xmm3,xmm4 + add ebp,esi + xor edi,ecx + vpaddd xmm9,xmm10,xmm2 + shrd ebx,ebx,7 + add ebp,eax + vpxor xmm3,xmm3,xmm8 + add edx,DWORD PTR[52+rsp] + xor edi,ebx + mov esi,ebp + shld ebp,ebp,5 + vpsrld xmm8,xmm3,30 + vmovdqa XMMWORD PTR[32+rsp],xmm9 + add edx,edi + xor esi,ebx + shrd eax,eax,7 + add edx,ebp + vpslld xmm3,xmm3,2 + add ecx,DWORD PTR[56+rsp] + xor esi,eax + mov edi,edx + shld edx,edx,5 + add ecx,esi + vaesenc xmm12,xmm12,xmm15 + vmovups xmm14,XMMWORD PTR[((-32))+r15] + xor edi,eax + shrd ebp,ebp,7 + add ecx,edx + vpor xmm3,xmm3,xmm8 + add ebx,DWORD PTR[60+rsp] + xor edi,ebp + mov esi,ecx + shld ecx,ecx,5 + add ebx,edi + xor esi,ebp + shrd edx,edx,7 + add ebx,ecx + add eax,DWORD PTR[rsp] + vpaddd xmm9,xmm10,xmm3 + xor esi,edx + mov edi,ebx + shld ebx,ebx,5 + add eax,esi + vmovdqa XMMWORD PTR[48+rsp],xmm9 + xor edi,edx + shrd ecx,ecx,7 + add eax,ebx + add ebp,DWORD PTR[4+rsp] + vaesenc xmm12,xmm12,xmm14 + vmovups xmm15,XMMWORD PTR[((-16))+r15] + xor edi,ecx + mov esi,eax + shld eax,eax,5 + add ebp,edi + xor esi,ecx + shrd ebx,ebx,7 + add ebp,eax + add edx,DWORD PTR[8+rsp] + xor esi,ebx + mov edi,ebp + shld ebp,ebp,5 + add edx,esi + xor edi,ebx + shrd eax,eax,7 + add edx,ebp + add ecx,DWORD PTR[12+rsp] + xor edi,eax + mov esi,edx + shld edx,edx,5 + add ecx,edi + vaesenc xmm12,xmm12,xmm15 + vmovups xmm14,XMMWORD PTR[r15] + xor esi,eax + shrd ebp,ebp,7 + add ecx,edx + cmp r10,r14 + je $L$done_avx + vmovdqa xmm9,XMMWORD PTR[64+r11] + vmovdqa xmm10,XMMWORD PTR[r11] + vmovdqu xmm0,XMMWORD PTR[r10] + vmovdqu xmm1,XMMWORD PTR[16+r10] + vmovdqu xmm2,XMMWORD PTR[32+r10] + vmovdqu xmm3,XMMWORD PTR[48+r10] + vpshufb xmm0,xmm0,xmm9 + add r10,64 + add ebx,DWORD PTR[16+rsp] + xor esi,ebp + vpshufb xmm1,xmm1,xmm9 + mov edi,ecx + shld ecx,ecx,5 + vpaddd xmm8,xmm0,xmm10 + add ebx,esi + xor edi,ebp + shrd edx,edx,7 + add ebx,ecx + vmovdqa XMMWORD PTR[rsp],xmm8 add eax,DWORD PTR[20+rsp] - xor edi,ebp - psubd xmm0,xmm9 - mov esi,ebx - rol ebx,5 xor edi,edx - add eax,ebx - ror ecx,7 + mov esi,ebx + shld ebx,ebx,5 add eax,edi - add ebp,DWORD PTR[24+rsp] -DB 102,69,15,56,220,222 - movups xmm15,XMMWORD PTR[128+r15] xor esi,edx - mov edi,eax - rol eax,5 + shrd ecx,ecx,7 + add eax,ebx + add ebp,DWORD PTR[24+rsp] + vaesenc xmm12,xmm12,xmm14 + vmovups xmm15,XMMWORD PTR[16+r15] xor esi,ecx - add ebp,eax - ror ebx,7 + mov edi,eax + shld eax,eax,5 add ebp,esi - add edx,DWORD PTR[28+rsp] xor edi,ecx - mov esi,ebp - rol ebp,5 + shrd ebx,ebx,7 + add ebp,eax + add edx,DWORD PTR[28+rsp] xor edi,ebx - add edx,ebp - ror eax,7 + mov esi,ebp + shld ebp,ebp,5 add edx,edi - add ecx,DWORD PTR[32+rsp] xor esi,ebx -DB 102,15,56,0,214 - mov edi,edx - rol edx,5 - paddd xmm1,xmm9 + shrd eax,eax,7 + add edx,ebp + add ecx,DWORD PTR[32+rsp] xor esi,eax -DB 102,69,15,56,220,223 - movups xmm14,XMMWORD PTR[144+r15] - add ecx,edx - ror ebp,7 + vpshufb xmm2,xmm2,xmm9 + mov edi,edx + shld edx,edx,5 + vpaddd xmm8,xmm1,xmm10 add ecx,esi - movdqa XMMWORD PTR[16+rsp],xmm1 - add ebx,DWORD PTR[36+rsp] + vaesenc xmm12,xmm12,xmm15 + vmovups xmm14,XMMWORD PTR[32+r15] xor edi,eax - psubd xmm1,xmm9 - mov esi,ecx - rol ecx,5 + shrd ebp,ebp,7 + add ecx,edx + vmovdqa XMMWORD PTR[16+rsp],xmm8 + add ebx,DWORD PTR[36+rsp] xor edi,ebp - add ebx,ecx - ror edx,7 + mov esi,ecx + shld ecx,ecx,5 add ebx,edi - add eax,DWORD PTR[40+rsp] xor esi,ebp - mov edi,ebx - rol ebx,5 + shrd edx,edx,7 + add ebx,ecx + add eax,DWORD PTR[40+rsp] xor esi,edx - add eax,ebx - ror ecx,7 + mov edi,ebx + shld ebx,ebx,5 add eax,esi - add ebp,DWORD PTR[44+rsp] -DB 102,69,15,56,220,222 - movups xmm15,XMMWORD PTR[160+r15] xor edi,edx - mov esi,eax - rol eax,5 + shrd ecx,ecx,7 + add eax,ebx + add ebp,DWORD PTR[44+rsp] + vaesenc xmm12,xmm12,xmm14 + vmovups xmm15,XMMWORD PTR[48+r15] xor edi,ecx - add ebp,eax - ror ebx,7 + mov esi,eax + shld eax,eax,5 add ebp,edi - add edx,DWORD PTR[48+rsp] xor esi,ecx -DB 102,15,56,0,222 - mov edi,ebp - rol ebp,5 - paddd xmm2,xmm9 + shrd ebx,ebx,7 + add ebp,eax + add edx,DWORD PTR[48+rsp] xor esi,ebx - add edx,ebp - ror eax,7 + vpshufb xmm3,xmm3,xmm9 + mov edi,ebp + shld ebp,ebp,5 + vpaddd xmm8,xmm2,xmm10 add edx,esi - movdqa XMMWORD PTR[32+rsp],xmm2 - add ecx,DWORD PTR[52+rsp] xor edi,ebx - psubd xmm2,xmm9 - mov esi,edx - rol edx,5 + shrd eax,eax,7 + add edx,ebp + vmovdqa XMMWORD PTR[32+rsp],xmm8 + add ecx,DWORD PTR[52+rsp] xor edi,eax + mov esi,edx + shld edx,edx,5 + add ecx,edi cmp r8d,11 - jb $L$aesenclast4 - movups xmm14,XMMWORD PTR[176+r15] -DB 102,69,15,56,220,223 - movups xmm15,XMMWORD PTR[192+r15] -DB 102,69,15,56,220,222 - je $L$aesenclast4 - movups xmm14,XMMWORD PTR[208+r15] -DB 102,69,15,56,220,223 - movups xmm15,XMMWORD PTR[224+r15] -DB 102,69,15,56,220,222 -$L$aesenclast4:: -DB 102,69,15,56,221,223 - movups xmm14,XMMWORD PTR[16+r15] + jb $L$vaesenclast9 + vaesenc xmm12,xmm12,xmm15 + vmovups xmm14,XMMWORD PTR[64+r15] + vaesenc xmm12,xmm12,xmm14 + vmovups xmm15,XMMWORD PTR[80+r15] + je $L$vaesenclast9 + vaesenc xmm12,xmm12,xmm15 + vmovups xmm14,XMMWORD PTR[96+r15] + vaesenc xmm12,xmm12,xmm14 + vmovups xmm15,XMMWORD PTR[112+r15] +$L$vaesenclast9:: + vaesenclast xmm12,xmm12,xmm15 + vmovups xmm15,XMMWORD PTR[((-112))+r15] + vmovups xmm14,XMMWORD PTR[((16-112))+r15] + xor esi,eax + shrd ebp,ebp,7 add ecx,edx - ror ebp,7 - add ecx,edi add ebx,DWORD PTR[56+rsp] - xor esi,eax - mov edi,ecx - rol ecx,5 xor esi,ebp - add ebx,ecx - ror edx,7 + mov edi,ecx + shld ecx,ecx,5 add ebx,esi - add eax,DWORD PTR[60+rsp] xor edi,ebp - mov esi,ebx - rol ebx,5 + shrd edx,edx,7 + add ebx,ecx + add eax,DWORD PTR[60+rsp] xor edi,edx - add eax,ebx - ror ecx,7 + mov esi,ebx + shld ebx,ebx,5 add eax,edi - movups XMMWORD PTR[48+r12*1+r13],xmm11 + shrd ecx,ecx,7 + add eax,ebx + vmovups XMMWORD PTR[48+r12*1+r13],xmm12 lea r12,QWORD PTR[64+r12] add eax,DWORD PTR[r9] @@ -1261,129 +2580,131 @@ DB 102,69,15,56,221,223 mov DWORD PTR[4+r9],esi mov ebx,esi mov DWORD PTR[8+r9],ecx + mov edi,ecx mov DWORD PTR[12+r9],edx + xor edi,edx mov DWORD PTR[16+r9],ebp - jmp $L$oop_ssse3 + and esi,edi + jmp $L$oop_avx -ALIGN 16 -$L$done_ssse3:: +$L$done_avx:: add ebx,DWORD PTR[16+rsp] - xor esi,eax - mov edi,ecx - rol ecx,5 xor esi,ebp - add ebx,ecx - ror edx,7 + mov edi,ecx + shld ecx,ecx,5 add ebx,esi - add eax,DWORD PTR[20+rsp] xor edi,ebp - mov esi,ebx - rol ebx,5 + shrd edx,edx,7 + add ebx,ecx + add eax,DWORD PTR[20+rsp] xor edi,edx - add eax,ebx - ror ecx,7 + mov esi,ebx + shld ebx,ebx,5 add eax,edi - add ebp,DWORD PTR[24+rsp] -DB 102,69,15,56,220,222 - movups xmm15,XMMWORD PTR[128+r15] xor esi,edx - mov edi,eax - rol eax,5 + shrd ecx,ecx,7 + add eax,ebx + add ebp,DWORD PTR[24+rsp] + vaesenc xmm12,xmm12,xmm14 + vmovups xmm15,XMMWORD PTR[16+r15] xor esi,ecx - add ebp,eax - ror ebx,7 + mov edi,eax + shld eax,eax,5 add ebp,esi - add edx,DWORD PTR[28+rsp] xor edi,ecx - mov esi,ebp - rol ebp,5 + shrd ebx,ebx,7 + add ebp,eax + add edx,DWORD PTR[28+rsp] xor edi,ebx - add edx,ebp - ror eax,7 + mov esi,ebp + shld ebp,ebp,5 add edx,edi - add ecx,DWORD PTR[32+rsp] xor esi,ebx - mov edi,edx - rol edx,5 + shrd eax,eax,7 + add edx,ebp + add ecx,DWORD PTR[32+rsp] xor esi,eax -DB 102,69,15,56,220,223 - movups xmm14,XMMWORD PTR[144+r15] - add ecx,edx - ror ebp,7 + mov edi,edx + shld edx,edx,5 add ecx,esi - add ebx,DWORD PTR[36+rsp] + vaesenc xmm12,xmm12,xmm15 + vmovups xmm14,XMMWORD PTR[32+r15] xor edi,eax - mov esi,ecx - rol ecx,5 + shrd ebp,ebp,7 + add ecx,edx + add ebx,DWORD PTR[36+rsp] xor edi,ebp - add ebx,ecx - ror edx,7 + mov esi,ecx + shld ecx,ecx,5 add ebx,edi - add eax,DWORD PTR[40+rsp] xor esi,ebp - mov edi,ebx - rol ebx,5 + shrd edx,edx,7 + add ebx,ecx + add eax,DWORD PTR[40+rsp] xor esi,edx - add eax,ebx - ror ecx,7 + mov edi,ebx + shld ebx,ebx,5 add eax,esi - add ebp,DWORD PTR[44+rsp] -DB 102,69,15,56,220,222 - movups xmm15,XMMWORD PTR[160+r15] xor edi,edx - mov esi,eax - rol eax,5 + shrd ecx,ecx,7 + add eax,ebx + add ebp,DWORD PTR[44+rsp] + vaesenc xmm12,xmm12,xmm14 + vmovups xmm15,XMMWORD PTR[48+r15] xor edi,ecx - add ebp,eax - ror ebx,7 + mov esi,eax + shld eax,eax,5 add ebp,edi - add edx,DWORD PTR[48+rsp] xor esi,ecx - mov edi,ebp - rol ebp,5 + shrd ebx,ebx,7 + add ebp,eax + add edx,DWORD PTR[48+rsp] xor esi,ebx - add edx,ebp - ror eax,7 + mov edi,ebp + shld ebp,ebp,5 add edx,esi - add ecx,DWORD PTR[52+rsp] xor edi,ebx - mov esi,edx - rol edx,5 + shrd eax,eax,7 + add edx,ebp + add ecx,DWORD PTR[52+rsp] xor edi,eax + mov esi,edx + shld edx,edx,5 + add ecx,edi cmp r8d,11 - jb $L$aesenclast5 - movups xmm14,XMMWORD PTR[176+r15] -DB 102,69,15,56,220,223 - movups xmm15,XMMWORD PTR[192+r15] -DB 102,69,15,56,220,222 - je $L$aesenclast5 - movups xmm14,XMMWORD PTR[208+r15] -DB 102,69,15,56,220,223 - movups xmm15,XMMWORD PTR[224+r15] -DB 102,69,15,56,220,222 -$L$aesenclast5:: -DB 102,69,15,56,221,223 - movups xmm14,XMMWORD PTR[16+r15] + jb $L$vaesenclast10 + vaesenc xmm12,xmm12,xmm15 + vmovups xmm14,XMMWORD PTR[64+r15] + vaesenc xmm12,xmm12,xmm14 + vmovups xmm15,XMMWORD PTR[80+r15] + je $L$vaesenclast10 + vaesenc xmm12,xmm12,xmm15 + vmovups xmm14,XMMWORD PTR[96+r15] + vaesenc xmm12,xmm12,xmm14 + vmovups xmm15,XMMWORD PTR[112+r15] +$L$vaesenclast10:: + vaesenclast xmm12,xmm12,xmm15 + vmovups xmm15,XMMWORD PTR[((-112))+r15] + vmovups xmm14,XMMWORD PTR[((16-112))+r15] + xor esi,eax + shrd ebp,ebp,7 add ecx,edx - ror ebp,7 - add ecx,edi add ebx,DWORD PTR[56+rsp] - xor esi,eax - mov edi,ecx - rol ecx,5 xor esi,ebp - add ebx,ecx - ror edx,7 + mov edi,ecx + shld ecx,ecx,5 add ebx,esi - add eax,DWORD PTR[60+rsp] xor edi,ebp - mov esi,ebx - rol ebx,5 + shrd edx,edx,7 + add ebx,ecx + add eax,DWORD PTR[60+rsp] xor edi,edx - add eax,ebx - ror ecx,7 + mov esi,ebx + shld ebx,ebx,5 add eax,edi - movups XMMWORD PTR[48+r12*1+r13],xmm11 + shrd ecx,ecx,7 + add eax,ebx + vmovups XMMWORD PTR[48+r12*1+r13],xmm12 mov r8,QWORD PTR[88+rsp] add eax,DWORD PTR[r9] @@ -1396,7 +2717,8 @@ DB 102,69,15,56,221,223 mov DWORD PTR[8+r9],ecx mov DWORD PTR[12+r9],edx mov DWORD PTR[16+r9],ebp - movups XMMWORD PTR[r8],xmm11 + vmovups XMMWORD PTR[r8],xmm12 + vzeroall movaps xmm6,XMMWORD PTR[((96+0))+rsp] movaps xmm7,XMMWORD PTR[((96+16))+rsp] movaps xmm8,XMMWORD PTR[((96+32))+rsp] @@ -1415,24 +2737,20 @@ DB 102,69,15,56,221,223 mov rbp,QWORD PTR[32+rsi] mov rbx,QWORD PTR[40+rsi] lea rsp,QWORD PTR[48+rsi] -$L$epilogue_ssse3:: +$L$epilogue_avx:: mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue mov rsi,QWORD PTR[16+rsp] DB 0F3h,0C3h ;repret -$L$SEH_end_aesni_cbc_sha1_enc_ssse3:: -aesni_cbc_sha1_enc_ssse3 ENDP +$L$SEH_end_aesni_cbc_sha1_enc_avx:: +aesni_cbc_sha1_enc_avx ENDP ALIGN 64 K_XX_XX:: DD 05a827999h,05a827999h,05a827999h,05a827999h - DD 06ed9eba1h,06ed9eba1h,06ed9eba1h,06ed9eba1h - DD 08f1bbcdch,08f1bbcdch,08f1bbcdch,08f1bbcdch - DD 0ca62c1d6h,0ca62c1d6h,0ca62c1d6h,0ca62c1d6h - DD 000010203h,004050607h,008090a0bh,00c0d0e0fh - +DB 0fh,0eh,0dh,0ch,0bh,0ah,09h,08h,07h,06h,05h,04h,03h,02h,01h,00h DB 65,69,83,78,73,45,67,66,67,43,83,72,65,49,32,115 DB 116,105,116,99,104,32,102,111,114,32,120,56,54,95,54,52 @@ -1440,6 +2758,346 @@ DB 44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32 DB 60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111 DB 114,103,62,0 ALIGN 64 + +ALIGN 32 +aesni_cbc_sha1_enc_shaext PROC PRIVATE + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_aesni_cbc_sha1_enc_shaext:: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + mov rcx,r9 + mov r8,QWORD PTR[40+rsp] + mov r9,QWORD PTR[48+rsp] + + + mov r10,QWORD PTR[56+rsp] + lea rsp,QWORD PTR[((-168))+rsp] + movaps XMMWORD PTR[(-8-160)+rax],xmm6 + movaps XMMWORD PTR[(-8-144)+rax],xmm7 + movaps XMMWORD PTR[(-8-128)+rax],xmm8 + movaps XMMWORD PTR[(-8-112)+rax],xmm9 + movaps XMMWORD PTR[(-8-96)+rax],xmm10 + movaps XMMWORD PTR[(-8-80)+rax],xmm11 + movaps XMMWORD PTR[(-8-64)+rax],xmm12 + movaps XMMWORD PTR[(-8-48)+rax],xmm13 + movaps XMMWORD PTR[(-8-32)+rax],xmm14 + movaps XMMWORD PTR[(-8-16)+rax],xmm15 +$L$prologue_shaext:: + movdqu xmm8,XMMWORD PTR[r9] + movd xmm9,DWORD PTR[16+r9] + movdqa xmm7,XMMWORD PTR[((K_XX_XX+80))] + + mov r11d,DWORD PTR[240+rcx] + sub rsi,rdi + movups xmm15,XMMWORD PTR[rcx] + movups xmm0,XMMWORD PTR[16+rcx] + lea rcx,QWORD PTR[112+rcx] + + pshufd xmm8,xmm8,27 + pshufd xmm9,xmm9,27 + jmp $L$oop_shaext + +ALIGN 16 +$L$oop_shaext:: + movups xmm14,XMMWORD PTR[rdi] + xorps xmm14,xmm15 + xorps xmm2,xmm14 + movups xmm1,XMMWORD PTR[((-80))+rcx] +DB 102,15,56,220,208 + movdqu xmm3,XMMWORD PTR[r10] + movdqa xmm12,xmm9 +DB 102,15,56,0,223 + movdqu xmm4,XMMWORD PTR[16+r10] + movdqa xmm11,xmm8 + movups xmm0,XMMWORD PTR[((-64))+rcx] +DB 102,15,56,220,209 +DB 102,15,56,0,231 + + paddd xmm9,xmm3 + movdqu xmm5,XMMWORD PTR[32+r10] + lea r10,QWORD PTR[64+r10] + pxor xmm3,xmm12 + movups xmm1,XMMWORD PTR[((-48))+rcx] +DB 102,15,56,220,208 + pxor xmm3,xmm12 + movdqa xmm10,xmm8 +DB 102,15,56,0,239 +DB 69,15,58,204,193,0 +DB 68,15,56,200,212 + movups xmm0,XMMWORD PTR[((-32))+rcx] +DB 102,15,56,220,209 +DB 15,56,201,220 + movdqu xmm6,XMMWORD PTR[((-16))+r10] + movdqa xmm9,xmm8 +DB 102,15,56,0,247 + movups xmm1,XMMWORD PTR[((-16))+rcx] +DB 102,15,56,220,208 +DB 69,15,58,204,194,0 +DB 68,15,56,200,205 + pxor xmm3,xmm5 +DB 15,56,201,229 + movups xmm0,XMMWORD PTR[rcx] +DB 102,15,56,220,209 + movdqa xmm10,xmm8 +DB 69,15,58,204,193,0 +DB 68,15,56,200,214 + movups xmm1,XMMWORD PTR[16+rcx] +DB 102,15,56,220,208 +DB 15,56,202,222 + pxor xmm4,xmm6 +DB 15,56,201,238 + movups xmm0,XMMWORD PTR[32+rcx] +DB 102,15,56,220,209 + movdqa xmm9,xmm8 +DB 69,15,58,204,194,0 +DB 68,15,56,200,203 + movups xmm1,XMMWORD PTR[48+rcx] +DB 102,15,56,220,208 +DB 15,56,202,227 + pxor xmm5,xmm3 +DB 15,56,201,243 + cmp r11d,11 + jb $L$aesenclast11 + movups xmm0,XMMWORD PTR[64+rcx] +DB 102,15,56,220,209 + movups xmm1,XMMWORD PTR[80+rcx] +DB 102,15,56,220,208 + je $L$aesenclast11 + movups xmm0,XMMWORD PTR[96+rcx] +DB 102,15,56,220,209 + movups xmm1,XMMWORD PTR[112+rcx] +DB 102,15,56,220,208 +$L$aesenclast11:: +DB 102,15,56,221,209 + movups xmm0,XMMWORD PTR[((16-112))+rcx] + movdqa xmm10,xmm8 +DB 69,15,58,204,193,0 +DB 68,15,56,200,212 + movups xmm14,XMMWORD PTR[16+rdi] + xorps xmm14,xmm15 + movups XMMWORD PTR[rdi*1+rsi],xmm2 + xorps xmm2,xmm14 + movups xmm1,XMMWORD PTR[((-80))+rcx] +DB 102,15,56,220,208 +DB 15,56,202,236 + pxor xmm6,xmm4 +DB 15,56,201,220 + movups xmm0,XMMWORD PTR[((-64))+rcx] +DB 102,15,56,220,209 + movdqa xmm9,xmm8 +DB 69,15,58,204,194,1 +DB 68,15,56,200,205 + movups xmm1,XMMWORD PTR[((-48))+rcx] +DB 102,15,56,220,208 +DB 15,56,202,245 + pxor xmm3,xmm5 +DB 15,56,201,229 + movups xmm0,XMMWORD PTR[((-32))+rcx] +DB 102,15,56,220,209 + movdqa xmm10,xmm8 +DB 69,15,58,204,193,1 +DB 68,15,56,200,214 + movups xmm1,XMMWORD PTR[((-16))+rcx] +DB 102,15,56,220,208 +DB 15,56,202,222 + pxor xmm4,xmm6 +DB 15,56,201,238 + movups xmm0,XMMWORD PTR[rcx] +DB 102,15,56,220,209 + movdqa xmm9,xmm8 +DB 69,15,58,204,194,1 +DB 68,15,56,200,203 + movups xmm1,XMMWORD PTR[16+rcx] +DB 102,15,56,220,208 +DB 15,56,202,227 + pxor xmm5,xmm3 +DB 15,56,201,243 + movups xmm0,XMMWORD PTR[32+rcx] +DB 102,15,56,220,209 + movdqa xmm10,xmm8 +DB 69,15,58,204,193,1 +DB 68,15,56,200,212 + movups xmm1,XMMWORD PTR[48+rcx] +DB 102,15,56,220,208 +DB 15,56,202,236 + pxor xmm6,xmm4 +DB 15,56,201,220 + cmp r11d,11 + jb $L$aesenclast12 + movups xmm0,XMMWORD PTR[64+rcx] +DB 102,15,56,220,209 + movups xmm1,XMMWORD PTR[80+rcx] +DB 102,15,56,220,208 + je $L$aesenclast12 + movups xmm0,XMMWORD PTR[96+rcx] +DB 102,15,56,220,209 + movups xmm1,XMMWORD PTR[112+rcx] +DB 102,15,56,220,208 +$L$aesenclast12:: +DB 102,15,56,221,209 + movups xmm0,XMMWORD PTR[((16-112))+rcx] + movdqa xmm9,xmm8 +DB 69,15,58,204,194,1 +DB 68,15,56,200,205 + movups xmm14,XMMWORD PTR[32+rdi] + xorps xmm14,xmm15 + movups XMMWORD PTR[16+rdi*1+rsi],xmm2 + xorps xmm2,xmm14 + movups xmm1,XMMWORD PTR[((-80))+rcx] +DB 102,15,56,220,208 +DB 15,56,202,245 + pxor xmm3,xmm5 +DB 15,56,201,229 + movups xmm0,XMMWORD PTR[((-64))+rcx] +DB 102,15,56,220,209 + movdqa xmm10,xmm8 +DB 69,15,58,204,193,2 +DB 68,15,56,200,214 + movups xmm1,XMMWORD PTR[((-48))+rcx] +DB 102,15,56,220,208 +DB 15,56,202,222 + pxor xmm4,xmm6 +DB 15,56,201,238 + movups xmm0,XMMWORD PTR[((-32))+rcx] +DB 102,15,56,220,209 + movdqa xmm9,xmm8 +DB 69,15,58,204,194,2 +DB 68,15,56,200,203 + movups xmm1,XMMWORD PTR[((-16))+rcx] +DB 102,15,56,220,208 +DB 15,56,202,227 + pxor xmm5,xmm3 +DB 15,56,201,243 + movups xmm0,XMMWORD PTR[rcx] +DB 102,15,56,220,209 + movdqa xmm10,xmm8 +DB 69,15,58,204,193,2 +DB 68,15,56,200,212 + movups xmm1,XMMWORD PTR[16+rcx] +DB 102,15,56,220,208 +DB 15,56,202,236 + pxor xmm6,xmm4 +DB 15,56,201,220 + movups xmm0,XMMWORD PTR[32+rcx] +DB 102,15,56,220,209 + movdqa xmm9,xmm8 +DB 69,15,58,204,194,2 +DB 68,15,56,200,205 + movups xmm1,XMMWORD PTR[48+rcx] +DB 102,15,56,220,208 +DB 15,56,202,245 + pxor xmm3,xmm5 +DB 15,56,201,229 + cmp r11d,11 + jb $L$aesenclast13 + movups xmm0,XMMWORD PTR[64+rcx] +DB 102,15,56,220,209 + movups xmm1,XMMWORD PTR[80+rcx] +DB 102,15,56,220,208 + je $L$aesenclast13 + movups xmm0,XMMWORD PTR[96+rcx] +DB 102,15,56,220,209 + movups xmm1,XMMWORD PTR[112+rcx] +DB 102,15,56,220,208 +$L$aesenclast13:: +DB 102,15,56,221,209 + movups xmm0,XMMWORD PTR[((16-112))+rcx] + movdqa xmm10,xmm8 +DB 69,15,58,204,193,2 +DB 68,15,56,200,214 + movups xmm14,XMMWORD PTR[48+rdi] + xorps xmm14,xmm15 + movups XMMWORD PTR[32+rdi*1+rsi],xmm2 + xorps xmm2,xmm14 + movups xmm1,XMMWORD PTR[((-80))+rcx] +DB 102,15,56,220,208 +DB 15,56,202,222 + pxor xmm4,xmm6 +DB 15,56,201,238 + movups xmm0,XMMWORD PTR[((-64))+rcx] +DB 102,15,56,220,209 + movdqa xmm9,xmm8 +DB 69,15,58,204,194,3 +DB 68,15,56,200,203 + movups xmm1,XMMWORD PTR[((-48))+rcx] +DB 102,15,56,220,208 +DB 15,56,202,227 + pxor xmm5,xmm3 +DB 15,56,201,243 + movups xmm0,XMMWORD PTR[((-32))+rcx] +DB 102,15,56,220,209 + movdqa xmm10,xmm8 +DB 69,15,58,204,193,3 +DB 68,15,56,200,212 +DB 15,56,202,236 + pxor xmm6,xmm4 + movups xmm1,XMMWORD PTR[((-16))+rcx] +DB 102,15,56,220,208 + movdqa xmm9,xmm8 +DB 69,15,58,204,194,3 +DB 68,15,56,200,205 +DB 15,56,202,245 + movups xmm0,XMMWORD PTR[rcx] +DB 102,15,56,220,209 + movdqa xmm5,xmm12 + movdqa xmm10,xmm8 +DB 69,15,58,204,193,3 +DB 68,15,56,200,214 + movups xmm1,XMMWORD PTR[16+rcx] +DB 102,15,56,220,208 + movdqa xmm9,xmm8 +DB 69,15,58,204,194,3 +DB 68,15,56,200,205 + movups xmm0,XMMWORD PTR[32+rcx] +DB 102,15,56,220,209 + movups xmm1,XMMWORD PTR[48+rcx] +DB 102,15,56,220,208 + cmp r11d,11 + jb $L$aesenclast14 + movups xmm0,XMMWORD PTR[64+rcx] +DB 102,15,56,220,209 + movups xmm1,XMMWORD PTR[80+rcx] +DB 102,15,56,220,208 + je $L$aesenclast14 + movups xmm0,XMMWORD PTR[96+rcx] +DB 102,15,56,220,209 + movups xmm1,XMMWORD PTR[112+rcx] +DB 102,15,56,220,208 +$L$aesenclast14:: +DB 102,15,56,221,209 + movups xmm0,XMMWORD PTR[((16-112))+rcx] + dec rdx + + paddd xmm8,xmm11 + movups XMMWORD PTR[48+rdi*1+rsi],xmm2 + lea rdi,QWORD PTR[64+rdi] + jnz $L$oop_shaext + + pshufd xmm8,xmm8,27 + pshufd xmm9,xmm9,27 + movups XMMWORD PTR[r8],xmm2 + movdqu XMMWORD PTR[r9],xmm8 + movd DWORD PTR[16+r9],xmm9 + movaps xmm6,XMMWORD PTR[((-8-160))+rax] + movaps xmm7,XMMWORD PTR[((-8-144))+rax] + movaps xmm8,XMMWORD PTR[((-8-128))+rax] + movaps xmm9,XMMWORD PTR[((-8-112))+rax] + movaps xmm10,XMMWORD PTR[((-8-96))+rax] + movaps xmm11,XMMWORD PTR[((-8-80))+rax] + movaps xmm12,XMMWORD PTR[((-8-64))+rax] + movaps xmm13,XMMWORD PTR[((-8-48))+rax] + movaps xmm14,XMMWORD PTR[((-8-32))+rax] + movaps xmm15,XMMWORD PTR[((-8-16))+rax] + mov rsp,rax +$L$epilogue_shaext:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_aesni_cbc_sha1_enc_shaext:: +aesni_cbc_sha1_enc_shaext ENDP EXTERN __imp_RtlVirtualUnwind:NEAR ALIGN 16 @@ -1472,12 +3130,21 @@ ssse3_handler PROC PRIVATE lea r10,QWORD PTR[r10*1+rsi] cmp rbx,r10 jae $L$common_seh_tail + lea r10,QWORD PTR[aesni_cbc_sha1_enc_shaext] + cmp rbx,r10 + jb $L$seh_no_shaext + lea rsi,QWORD PTR[rax] + lea rdi,QWORD PTR[512+r8] + mov ecx,20 + DD 0a548f3fch + lea rax,QWORD PTR[168+rax] + jmp $L$common_seh_tail +$L$seh_no_shaext:: lea rsi,QWORD PTR[96+rax] lea rdi,QWORD PTR[512+r8] mov ecx,20 DD 0a548f3fch - lea rax,QWORD PTR[264+rax] mov r15,QWORD PTR[rax] @@ -1506,7 +3173,6 @@ $L$common_seh_tail:: mov ecx,154 DD 0a548f3fch - mov rsi,r9 xor rcx,rcx mov rdx,QWORD PTR[8+rsi] @@ -1541,6 +3207,12 @@ ALIGN 4 DD imagerel $L$SEH_begin_aesni_cbc_sha1_enc_ssse3 DD imagerel $L$SEH_end_aesni_cbc_sha1_enc_ssse3 DD imagerel $L$SEH_info_aesni_cbc_sha1_enc_ssse3 + DD imagerel $L$SEH_begin_aesni_cbc_sha1_enc_avx + DD imagerel $L$SEH_end_aesni_cbc_sha1_enc_avx + DD imagerel $L$SEH_info_aesni_cbc_sha1_enc_avx + DD imagerel $L$SEH_begin_aesni_cbc_sha1_enc_shaext + DD imagerel $L$SEH_end_aesni_cbc_sha1_enc_shaext + DD imagerel $L$SEH_info_aesni_cbc_sha1_enc_shaext .pdata ENDS .xdata SEGMENT READONLY ALIGN(8) ALIGN 8 @@ -1548,7 +3220,14 @@ $L$SEH_info_aesni_cbc_sha1_enc_ssse3:: DB 9,0,0,0 DD imagerel ssse3_handler DD imagerel $L$prologue_ssse3,imagerel $L$epilogue_ssse3 - +$L$SEH_info_aesni_cbc_sha1_enc_avx:: +DB 9,0,0,0 + DD imagerel ssse3_handler + DD imagerel $L$prologue_avx,imagerel $L$epilogue_avx +$L$SEH_info_aesni_cbc_sha1_enc_shaext:: +DB 9,0,0,0 + DD imagerel ssse3_handler + DD imagerel $L$prologue_shaext,imagerel $L$epilogue_shaext .xdata ENDS END diff --git a/deps/openssl/asm/x64-win32-masm/aes/aesni-sha256-x86_64.asm b/deps/openssl/asm/x64-win32-masm/aes/aesni-sha256-x86_64.asm new file mode 100644 index 00000000000000..700d7f58f5c731 --- /dev/null +++ b/deps/openssl/asm/x64-win32-masm/aes/aesni-sha256-x86_64.asm @@ -0,0 +1,4656 @@ +OPTION DOTNAME +.text$ SEGMENT ALIGN(256) 'CODE' + +EXTERN OPENSSL_ia32cap_P:NEAR +PUBLIC aesni_cbc_sha256_enc + +ALIGN 16 +aesni_cbc_sha256_enc PROC PUBLIC + lea r11,QWORD PTR[OPENSSL_ia32cap_P] + mov eax,1 + cmp rcx,0 + je $L$probe + mov eax,DWORD PTR[r11] + mov r10,QWORD PTR[4+r11] + bt r10,61 + jc aesni_cbc_sha256_enc_shaext + mov r11,r10 + shr r11,32 + + test r10d,2048 + jnz aesni_cbc_sha256_enc_xop + and r11d,296 + cmp r11d,296 + je aesni_cbc_sha256_enc_avx2 + and eax,1073741824 + and r10d,268435968 + or r10d,eax + cmp r10d,1342177792 + je aesni_cbc_sha256_enc_avx + ud2 + xor eax,eax + cmp rcx,0 + je $L$probe + ud2 +$L$probe:: + DB 0F3h,0C3h ;repret +aesni_cbc_sha256_enc ENDP + +ALIGN 64 + +K256:: + DD 0428a2f98h,071374491h,0b5c0fbcfh,0e9b5dba5h + DD 0428a2f98h,071374491h,0b5c0fbcfh,0e9b5dba5h + DD 03956c25bh,059f111f1h,0923f82a4h,0ab1c5ed5h + DD 03956c25bh,059f111f1h,0923f82a4h,0ab1c5ed5h + DD 0d807aa98h,012835b01h,0243185beh,0550c7dc3h + DD 0d807aa98h,012835b01h,0243185beh,0550c7dc3h + DD 072be5d74h,080deb1feh,09bdc06a7h,0c19bf174h + DD 072be5d74h,080deb1feh,09bdc06a7h,0c19bf174h + DD 0e49b69c1h,0efbe4786h,00fc19dc6h,0240ca1cch + DD 0e49b69c1h,0efbe4786h,00fc19dc6h,0240ca1cch + DD 02de92c6fh,04a7484aah,05cb0a9dch,076f988dah + DD 02de92c6fh,04a7484aah,05cb0a9dch,076f988dah + DD 0983e5152h,0a831c66dh,0b00327c8h,0bf597fc7h + DD 0983e5152h,0a831c66dh,0b00327c8h,0bf597fc7h + DD 0c6e00bf3h,0d5a79147h,006ca6351h,014292967h + DD 0c6e00bf3h,0d5a79147h,006ca6351h,014292967h + DD 027b70a85h,02e1b2138h,04d2c6dfch,053380d13h + DD 027b70a85h,02e1b2138h,04d2c6dfch,053380d13h + DD 0650a7354h,0766a0abbh,081c2c92eh,092722c85h + DD 0650a7354h,0766a0abbh,081c2c92eh,092722c85h + DD 0a2bfe8a1h,0a81a664bh,0c24b8b70h,0c76c51a3h + DD 0a2bfe8a1h,0a81a664bh,0c24b8b70h,0c76c51a3h + DD 0d192e819h,0d6990624h,0f40e3585h,0106aa070h + DD 0d192e819h,0d6990624h,0f40e3585h,0106aa070h + DD 019a4c116h,01e376c08h,02748774ch,034b0bcb5h + DD 019a4c116h,01e376c08h,02748774ch,034b0bcb5h + DD 0391c0cb3h,04ed8aa4ah,05b9cca4fh,0682e6ff3h + DD 0391c0cb3h,04ed8aa4ah,05b9cca4fh,0682e6ff3h + DD 0748f82eeh,078a5636fh,084c87814h,08cc70208h + DD 0748f82eeh,078a5636fh,084c87814h,08cc70208h + DD 090befffah,0a4506cebh,0bef9a3f7h,0c67178f2h + DD 090befffah,0a4506cebh,0bef9a3f7h,0c67178f2h + + DD 000010203h,004050607h,008090a0bh,00c0d0e0fh + DD 000010203h,004050607h,008090a0bh,00c0d0e0fh + DD 0,0,0,0,0,0,0,0,-1,-1,-1,-1 + DD 0,0,0,0,0,0,0,0 +DB 65,69,83,78,73,45,67,66,67,43,83,72,65,50,53,54 +DB 32,115,116,105,116,99,104,32,102,111,114,32,120,56,54,95 +DB 54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98 +DB 121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108 +DB 46,111,114,103,62,0 +ALIGN 64 + +ALIGN 64 +aesni_cbc_sha256_enc_xop PROC PRIVATE + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_aesni_cbc_sha256_enc_xop:: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + mov rcx,r9 + mov r8,QWORD PTR[40+rsp] + mov r9,QWORD PTR[48+rsp] + + +$L$xop_shortcut:: + mov r10,QWORD PTR[56+rsp] + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + mov r11,rsp + sub rsp,288 + and rsp,-64 + + shl rdx,6 + sub rsi,rdi + sub r10,rdi + add rdx,rdi + + + mov QWORD PTR[((64+8))+rsp],rsi + mov QWORD PTR[((64+16))+rsp],rdx + + mov QWORD PTR[((64+32))+rsp],r8 + mov QWORD PTR[((64+40))+rsp],r9 + mov QWORD PTR[((64+48))+rsp],r10 + mov QWORD PTR[((64+56))+rsp],r11 + movaps XMMWORD PTR[128+rsp],xmm6 + movaps XMMWORD PTR[144+rsp],xmm7 + movaps XMMWORD PTR[160+rsp],xmm8 + movaps XMMWORD PTR[176+rsp],xmm9 + movaps XMMWORD PTR[192+rsp],xmm10 + movaps XMMWORD PTR[208+rsp],xmm11 + movaps XMMWORD PTR[224+rsp],xmm12 + movaps XMMWORD PTR[240+rsp],xmm13 + movaps XMMWORD PTR[256+rsp],xmm14 + movaps XMMWORD PTR[272+rsp],xmm15 +$L$prologue_xop:: + vzeroall + + mov r12,rdi + lea rdi,QWORD PTR[128+rcx] + lea r13,QWORD PTR[((K256+544))] + mov r14d,DWORD PTR[((240-128))+rdi] + mov r15,r9 + mov rsi,r10 + vmovdqu xmm8,XMMWORD PTR[r8] + sub r14,9 + + mov eax,DWORD PTR[r15] + mov ebx,DWORD PTR[4+r15] + mov ecx,DWORD PTR[8+r15] + mov edx,DWORD PTR[12+r15] + mov r8d,DWORD PTR[16+r15] + mov r9d,DWORD PTR[20+r15] + mov r10d,DWORD PTR[24+r15] + mov r11d,DWORD PTR[28+r15] + + vmovdqa xmm14,XMMWORD PTR[r14*8+r13] + vmovdqa xmm13,XMMWORD PTR[16+r14*8+r13] + vmovdqa xmm12,XMMWORD PTR[32+r14*8+r13] + vmovdqu xmm10,XMMWORD PTR[((0-128))+rdi] + jmp $L$loop_xop +ALIGN 16 +$L$loop_xop:: + vmovdqa xmm7,XMMWORD PTR[((K256+512))] + vmovdqu xmm0,XMMWORD PTR[r12*1+rsi] + vmovdqu xmm1,XMMWORD PTR[16+r12*1+rsi] + vmovdqu xmm2,XMMWORD PTR[32+r12*1+rsi] + vmovdqu xmm3,XMMWORD PTR[48+r12*1+rsi] + vpshufb xmm0,xmm0,xmm7 + lea rbp,QWORD PTR[K256] + vpshufb xmm1,xmm1,xmm7 + vpshufb xmm2,xmm2,xmm7 + vpaddd xmm4,xmm0,XMMWORD PTR[rbp] + vpshufb xmm3,xmm3,xmm7 + vpaddd xmm5,xmm1,XMMWORD PTR[32+rbp] + vpaddd xmm6,xmm2,XMMWORD PTR[64+rbp] + vpaddd xmm7,xmm3,XMMWORD PTR[96+rbp] + vmovdqa XMMWORD PTR[rsp],xmm4 + mov r14d,eax + vmovdqa XMMWORD PTR[16+rsp],xmm5 + mov esi,ebx + vmovdqa XMMWORD PTR[32+rsp],xmm6 + xor esi,ecx + vmovdqa XMMWORD PTR[48+rsp],xmm7 + mov r13d,r8d + jmp $L$xop_00_47 + +ALIGN 16 +$L$xop_00_47:: + sub rbp,-16*2*4 + vmovdqu xmm9,XMMWORD PTR[r12] + mov QWORD PTR[((64+0))+rsp],r12 + vpalignr xmm4,xmm1,xmm0,4 + ror r13d,14 + mov eax,r14d + vpalignr xmm7,xmm3,xmm2,4 + mov r12d,r9d + xor r13d,r8d +DB 143,232,120,194,236,14 + ror r14d,9 + xor r12d,r10d + vpsrld xmm4,xmm4,3 + ror r13d,5 + xor r14d,eax + vpaddd xmm0,xmm0,xmm7 + and r12d,r8d + vpxor xmm9,xmm9,xmm10 + vmovdqu xmm10,XMMWORD PTR[((16-128))+rdi] + xor r13d,r8d + add r11d,DWORD PTR[rsp] + mov r15d,eax +DB 143,232,120,194,245,11 + ror r14d,11 + xor r12d,r10d + vpxor xmm4,xmm4,xmm5 + xor r15d,ebx + ror r13d,6 + add r11d,r12d + and esi,r15d +DB 143,232,120,194,251,13 + xor r14d,eax + add r11d,r13d + vpxor xmm4,xmm4,xmm6 + xor esi,ebx + add edx,r11d + vpsrld xmm6,xmm3,10 + ror r14d,2 + add r11d,esi + vpaddd xmm0,xmm0,xmm4 + mov r13d,edx + add r14d,r11d +DB 143,232,120,194,239,2 + ror r13d,14 + mov r11d,r14d + vpxor xmm7,xmm7,xmm6 + mov r12d,r8d + xor r13d,edx + ror r14d,9 + xor r12d,r9d + vpxor xmm7,xmm7,xmm5 + ror r13d,5 + xor r14d,r11d + and r12d,edx + vpxor xmm9,xmm9,xmm8 + xor r13d,edx + vpsrldq xmm7,xmm7,8 + add r10d,DWORD PTR[4+rsp] + mov esi,r11d + ror r14d,11 + xor r12d,r9d + vpaddd xmm0,xmm0,xmm7 + xor esi,eax + ror r13d,6 + add r10d,r12d + and r15d,esi +DB 143,232,120,194,248,13 + xor r14d,r11d + add r10d,r13d + vpsrld xmm6,xmm0,10 + xor r15d,eax + add ecx,r10d +DB 143,232,120,194,239,2 + ror r14d,2 + add r10d,r15d + vpxor xmm7,xmm7,xmm6 + mov r13d,ecx + add r14d,r10d + ror r13d,14 + mov r10d,r14d + vpxor xmm7,xmm7,xmm5 + mov r12d,edx + xor r13d,ecx + ror r14d,9 + xor r12d,r8d + vpslldq xmm7,xmm7,8 + ror r13d,5 + xor r14d,r10d + and r12d,ecx + vaesenc xmm9,xmm9,xmm10 + vmovdqu xmm10,XMMWORD PTR[((32-128))+rdi] + xor r13d,ecx + vpaddd xmm0,xmm0,xmm7 + add r9d,DWORD PTR[8+rsp] + mov r15d,r10d + ror r14d,11 + xor r12d,r8d + vpaddd xmm6,xmm0,XMMWORD PTR[rbp] + xor r15d,r11d + ror r13d,6 + add r9d,r12d + and esi,r15d + xor r14d,r10d + add r9d,r13d + xor esi,r11d + add ebx,r9d + ror r14d,2 + add r9d,esi + mov r13d,ebx + add r14d,r9d + ror r13d,14 + mov r9d,r14d + mov r12d,ecx + xor r13d,ebx + ror r14d,9 + xor r12d,edx + ror r13d,5 + xor r14d,r9d + and r12d,ebx + vaesenc xmm9,xmm9,xmm10 + vmovdqu xmm10,XMMWORD PTR[((48-128))+rdi] + xor r13d,ebx + add r8d,DWORD PTR[12+rsp] + mov esi,r9d + ror r14d,11 + xor r12d,edx + xor esi,r10d + ror r13d,6 + add r8d,r12d + and r15d,esi + xor r14d,r9d + add r8d,r13d + xor r15d,r10d + add eax,r8d + ror r14d,2 + add r8d,r15d + mov r13d,eax + add r14d,r8d + vmovdqa XMMWORD PTR[rsp],xmm6 + vpalignr xmm4,xmm2,xmm1,4 + ror r13d,14 + mov r8d,r14d + vpalignr xmm7,xmm0,xmm3,4 + mov r12d,ebx + xor r13d,eax +DB 143,232,120,194,236,14 + ror r14d,9 + xor r12d,ecx + vpsrld xmm4,xmm4,3 + ror r13d,5 + xor r14d,r8d + vpaddd xmm1,xmm1,xmm7 + and r12d,eax + vaesenc xmm9,xmm9,xmm10 + vmovdqu xmm10,XMMWORD PTR[((64-128))+rdi] + xor r13d,eax + add edx,DWORD PTR[16+rsp] + mov r15d,r8d +DB 143,232,120,194,245,11 + ror r14d,11 + xor r12d,ecx + vpxor xmm4,xmm4,xmm5 + xor r15d,r9d + ror r13d,6 + add edx,r12d + and esi,r15d +DB 143,232,120,194,248,13 + xor r14d,r8d + add edx,r13d + vpxor xmm4,xmm4,xmm6 + xor esi,r9d + add r11d,edx + vpsrld xmm6,xmm0,10 + ror r14d,2 + add edx,esi + vpaddd xmm1,xmm1,xmm4 + mov r13d,r11d + add r14d,edx +DB 143,232,120,194,239,2 + ror r13d,14 + mov edx,r14d + vpxor xmm7,xmm7,xmm6 + mov r12d,eax + xor r13d,r11d + ror r14d,9 + xor r12d,ebx + vpxor xmm7,xmm7,xmm5 + ror r13d,5 + xor r14d,edx + and r12d,r11d + vaesenc xmm9,xmm9,xmm10 + vmovdqu xmm10,XMMWORD PTR[((80-128))+rdi] + xor r13d,r11d + vpsrldq xmm7,xmm7,8 + add ecx,DWORD PTR[20+rsp] + mov esi,edx + ror r14d,11 + xor r12d,ebx + vpaddd xmm1,xmm1,xmm7 + xor esi,r8d + ror r13d,6 + add ecx,r12d + and r15d,esi +DB 143,232,120,194,249,13 + xor r14d,edx + add ecx,r13d + vpsrld xmm6,xmm1,10 + xor r15d,r8d + add r10d,ecx +DB 143,232,120,194,239,2 + ror r14d,2 + add ecx,r15d + vpxor xmm7,xmm7,xmm6 + mov r13d,r10d + add r14d,ecx + ror r13d,14 + mov ecx,r14d + vpxor xmm7,xmm7,xmm5 + mov r12d,r11d + xor r13d,r10d + ror r14d,9 + xor r12d,eax + vpslldq xmm7,xmm7,8 + ror r13d,5 + xor r14d,ecx + and r12d,r10d + vaesenc xmm9,xmm9,xmm10 + vmovdqu xmm10,XMMWORD PTR[((96-128))+rdi] + xor r13d,r10d + vpaddd xmm1,xmm1,xmm7 + add ebx,DWORD PTR[24+rsp] + mov r15d,ecx + ror r14d,11 + xor r12d,eax + vpaddd xmm6,xmm1,XMMWORD PTR[32+rbp] + xor r15d,edx + ror r13d,6 + add ebx,r12d + and esi,r15d + xor r14d,ecx + add ebx,r13d + xor esi,edx + add r9d,ebx + ror r14d,2 + add ebx,esi + mov r13d,r9d + add r14d,ebx + ror r13d,14 + mov ebx,r14d + mov r12d,r10d + xor r13d,r9d + ror r14d,9 + xor r12d,r11d + ror r13d,5 + xor r14d,ebx + and r12d,r9d + vaesenc xmm9,xmm9,xmm10 + vmovdqu xmm10,XMMWORD PTR[((112-128))+rdi] + xor r13d,r9d + add eax,DWORD PTR[28+rsp] + mov esi,ebx + ror r14d,11 + xor r12d,r11d + xor esi,ecx + ror r13d,6 + add eax,r12d + and r15d,esi + xor r14d,ebx + add eax,r13d + xor r15d,ecx + add r8d,eax + ror r14d,2 + add eax,r15d + mov r13d,r8d + add r14d,eax + vmovdqa XMMWORD PTR[16+rsp],xmm6 + vpalignr xmm4,xmm3,xmm2,4 + ror r13d,14 + mov eax,r14d + vpalignr xmm7,xmm1,xmm0,4 + mov r12d,r9d + xor r13d,r8d +DB 143,232,120,194,236,14 + ror r14d,9 + xor r12d,r10d + vpsrld xmm4,xmm4,3 + ror r13d,5 + xor r14d,eax + vpaddd xmm2,xmm2,xmm7 + and r12d,r8d + vaesenc xmm9,xmm9,xmm10 + vmovdqu xmm10,XMMWORD PTR[((128-128))+rdi] + xor r13d,r8d + add r11d,DWORD PTR[32+rsp] + mov r15d,eax +DB 143,232,120,194,245,11 + ror r14d,11 + xor r12d,r10d + vpxor xmm4,xmm4,xmm5 + xor r15d,ebx + ror r13d,6 + add r11d,r12d + and esi,r15d +DB 143,232,120,194,249,13 + xor r14d,eax + add r11d,r13d + vpxor xmm4,xmm4,xmm6 + xor esi,ebx + add edx,r11d + vpsrld xmm6,xmm1,10 + ror r14d,2 + add r11d,esi + vpaddd xmm2,xmm2,xmm4 + mov r13d,edx + add r14d,r11d +DB 143,232,120,194,239,2 + ror r13d,14 + mov r11d,r14d + vpxor xmm7,xmm7,xmm6 + mov r12d,r8d + xor r13d,edx + ror r14d,9 + xor r12d,r9d + vpxor xmm7,xmm7,xmm5 + ror r13d,5 + xor r14d,r11d + and r12d,edx + vaesenc xmm9,xmm9,xmm10 + vmovdqu xmm10,XMMWORD PTR[((144-128))+rdi] + xor r13d,edx + vpsrldq xmm7,xmm7,8 + add r10d,DWORD PTR[36+rsp] + mov esi,r11d + ror r14d,11 + xor r12d,r9d + vpaddd xmm2,xmm2,xmm7 + xor esi,eax + ror r13d,6 + add r10d,r12d + and r15d,esi +DB 143,232,120,194,250,13 + xor r14d,r11d + add r10d,r13d + vpsrld xmm6,xmm2,10 + xor r15d,eax + add ecx,r10d +DB 143,232,120,194,239,2 + ror r14d,2 + add r10d,r15d + vpxor xmm7,xmm7,xmm6 + mov r13d,ecx + add r14d,r10d + ror r13d,14 + mov r10d,r14d + vpxor xmm7,xmm7,xmm5 + mov r12d,edx + xor r13d,ecx + ror r14d,9 + xor r12d,r8d + vpslldq xmm7,xmm7,8 + ror r13d,5 + xor r14d,r10d + and r12d,ecx + vaesenc xmm9,xmm9,xmm10 + vmovdqu xmm10,XMMWORD PTR[((160-128))+rdi] + xor r13d,ecx + vpaddd xmm2,xmm2,xmm7 + add r9d,DWORD PTR[40+rsp] + mov r15d,r10d + ror r14d,11 + xor r12d,r8d + vpaddd xmm6,xmm2,XMMWORD PTR[64+rbp] + xor r15d,r11d + ror r13d,6 + add r9d,r12d + and esi,r15d + xor r14d,r10d + add r9d,r13d + xor esi,r11d + add ebx,r9d + ror r14d,2 + add r9d,esi + mov r13d,ebx + add r14d,r9d + ror r13d,14 + mov r9d,r14d + mov r12d,ecx + xor r13d,ebx + ror r14d,9 + xor r12d,edx + ror r13d,5 + xor r14d,r9d + and r12d,ebx + vaesenclast xmm11,xmm9,xmm10 + vaesenc xmm9,xmm9,xmm10 + vmovdqu xmm10,XMMWORD PTR[((176-128))+rdi] + xor r13d,ebx + add r8d,DWORD PTR[44+rsp] + mov esi,r9d + ror r14d,11 + xor r12d,edx + xor esi,r10d + ror r13d,6 + add r8d,r12d + and r15d,esi + xor r14d,r9d + add r8d,r13d + xor r15d,r10d + add eax,r8d + ror r14d,2 + add r8d,r15d + mov r13d,eax + add r14d,r8d + vmovdqa XMMWORD PTR[32+rsp],xmm6 + vpalignr xmm4,xmm0,xmm3,4 + ror r13d,14 + mov r8d,r14d + vpalignr xmm7,xmm2,xmm1,4 + mov r12d,ebx + xor r13d,eax +DB 143,232,120,194,236,14 + ror r14d,9 + xor r12d,ecx + vpsrld xmm4,xmm4,3 + ror r13d,5 + xor r14d,r8d + vpaddd xmm3,xmm3,xmm7 + and r12d,eax + vpand xmm8,xmm11,xmm12 + vaesenc xmm9,xmm9,xmm10 + vmovdqu xmm10,XMMWORD PTR[((192-128))+rdi] + xor r13d,eax + add edx,DWORD PTR[48+rsp] + mov r15d,r8d +DB 143,232,120,194,245,11 + ror r14d,11 + xor r12d,ecx + vpxor xmm4,xmm4,xmm5 + xor r15d,r9d + ror r13d,6 + add edx,r12d + and esi,r15d +DB 143,232,120,194,250,13 + xor r14d,r8d + add edx,r13d + vpxor xmm4,xmm4,xmm6 + xor esi,r9d + add r11d,edx + vpsrld xmm6,xmm2,10 + ror r14d,2 + add edx,esi + vpaddd xmm3,xmm3,xmm4 + mov r13d,r11d + add r14d,edx +DB 143,232,120,194,239,2 + ror r13d,14 + mov edx,r14d + vpxor xmm7,xmm7,xmm6 + mov r12d,eax + xor r13d,r11d + ror r14d,9 + xor r12d,ebx + vpxor xmm7,xmm7,xmm5 + ror r13d,5 + xor r14d,edx + and r12d,r11d + vaesenclast xmm11,xmm9,xmm10 + vaesenc xmm9,xmm9,xmm10 + vmovdqu xmm10,XMMWORD PTR[((208-128))+rdi] + xor r13d,r11d + vpsrldq xmm7,xmm7,8 + add ecx,DWORD PTR[52+rsp] + mov esi,edx + ror r14d,11 + xor r12d,ebx + vpaddd xmm3,xmm3,xmm7 + xor esi,r8d + ror r13d,6 + add ecx,r12d + and r15d,esi +DB 143,232,120,194,251,13 + xor r14d,edx + add ecx,r13d + vpsrld xmm6,xmm3,10 + xor r15d,r8d + add r10d,ecx +DB 143,232,120,194,239,2 + ror r14d,2 + add ecx,r15d + vpxor xmm7,xmm7,xmm6 + mov r13d,r10d + add r14d,ecx + ror r13d,14 + mov ecx,r14d + vpxor xmm7,xmm7,xmm5 + mov r12d,r11d + xor r13d,r10d + ror r14d,9 + xor r12d,eax + vpslldq xmm7,xmm7,8 + ror r13d,5 + xor r14d,ecx + and r12d,r10d + vpand xmm11,xmm11,xmm13 + vaesenc xmm9,xmm9,xmm10 + vmovdqu xmm10,XMMWORD PTR[((224-128))+rdi] + xor r13d,r10d + vpaddd xmm3,xmm3,xmm7 + add ebx,DWORD PTR[56+rsp] + mov r15d,ecx + ror r14d,11 + xor r12d,eax + vpaddd xmm6,xmm3,XMMWORD PTR[96+rbp] + xor r15d,edx + ror r13d,6 + add ebx,r12d + and esi,r15d + xor r14d,ecx + add ebx,r13d + xor esi,edx + add r9d,ebx + ror r14d,2 + add ebx,esi + mov r13d,r9d + add r14d,ebx + ror r13d,14 + mov ebx,r14d + mov r12d,r10d + xor r13d,r9d + ror r14d,9 + xor r12d,r11d + ror r13d,5 + xor r14d,ebx + and r12d,r9d + vpor xmm8,xmm8,xmm11 + vaesenclast xmm11,xmm9,xmm10 + vmovdqu xmm10,XMMWORD PTR[((0-128))+rdi] + xor r13d,r9d + add eax,DWORD PTR[60+rsp] + mov esi,ebx + ror r14d,11 + xor r12d,r11d + xor esi,ecx + ror r13d,6 + add eax,r12d + and r15d,esi + xor r14d,ebx + add eax,r13d + xor r15d,ecx + add r8d,eax + ror r14d,2 + add eax,r15d + mov r13d,r8d + add r14d,eax + vmovdqa XMMWORD PTR[48+rsp],xmm6 + mov r12,QWORD PTR[((64+0))+rsp] + vpand xmm11,xmm11,xmm14 + mov r15,QWORD PTR[((64+8))+rsp] + vpor xmm8,xmm8,xmm11 + vmovdqu XMMWORD PTR[r12*1+r15],xmm8 + lea r12,QWORD PTR[16+r12] + cmp BYTE PTR[131+rbp],0 + jne $L$xop_00_47 + vmovdqu xmm9,XMMWORD PTR[r12] + mov QWORD PTR[((64+0))+rsp],r12 + ror r13d,14 + mov eax,r14d + mov r12d,r9d + xor r13d,r8d + ror r14d,9 + xor r12d,r10d + ror r13d,5 + xor r14d,eax + and r12d,r8d + vpxor xmm9,xmm9,xmm10 + vmovdqu xmm10,XMMWORD PTR[((16-128))+rdi] + xor r13d,r8d + add r11d,DWORD PTR[rsp] + mov r15d,eax + ror r14d,11 + xor r12d,r10d + xor r15d,ebx + ror r13d,6 + add r11d,r12d + and esi,r15d + xor r14d,eax + add r11d,r13d + xor esi,ebx + add edx,r11d + ror r14d,2 + add r11d,esi + mov r13d,edx + add r14d,r11d + ror r13d,14 + mov r11d,r14d + mov r12d,r8d + xor r13d,edx + ror r14d,9 + xor r12d,r9d + ror r13d,5 + xor r14d,r11d + and r12d,edx + vpxor xmm9,xmm9,xmm8 + xor r13d,edx + add r10d,DWORD PTR[4+rsp] + mov esi,r11d + ror r14d,11 + xor r12d,r9d + xor esi,eax + ror r13d,6 + add r10d,r12d + and r15d,esi + xor r14d,r11d + add r10d,r13d + xor r15d,eax + add ecx,r10d + ror r14d,2 + add r10d,r15d + mov r13d,ecx + add r14d,r10d + ror r13d,14 + mov r10d,r14d + mov r12d,edx + xor r13d,ecx + ror r14d,9 + xor r12d,r8d + ror r13d,5 + xor r14d,r10d + and r12d,ecx + vaesenc xmm9,xmm9,xmm10 + vmovdqu xmm10,XMMWORD PTR[((32-128))+rdi] + xor r13d,ecx + add r9d,DWORD PTR[8+rsp] + mov r15d,r10d + ror r14d,11 + xor r12d,r8d + xor r15d,r11d + ror r13d,6 + add r9d,r12d + and esi,r15d + xor r14d,r10d + add r9d,r13d + xor esi,r11d + add ebx,r9d + ror r14d,2 + add r9d,esi + mov r13d,ebx + add r14d,r9d + ror r13d,14 + mov r9d,r14d + mov r12d,ecx + xor r13d,ebx + ror r14d,9 + xor r12d,edx + ror r13d,5 + xor r14d,r9d + and r12d,ebx + vaesenc xmm9,xmm9,xmm10 + vmovdqu xmm10,XMMWORD PTR[((48-128))+rdi] + xor r13d,ebx + add r8d,DWORD PTR[12+rsp] + mov esi,r9d + ror r14d,11 + xor r12d,edx + xor esi,r10d + ror r13d,6 + add r8d,r12d + and r15d,esi + xor r14d,r9d + add r8d,r13d + xor r15d,r10d + add eax,r8d + ror r14d,2 + add r8d,r15d + mov r13d,eax + add r14d,r8d + ror r13d,14 + mov r8d,r14d + mov r12d,ebx + xor r13d,eax + ror r14d,9 + xor r12d,ecx + ror r13d,5 + xor r14d,r8d + and r12d,eax + vaesenc xmm9,xmm9,xmm10 + vmovdqu xmm10,XMMWORD PTR[((64-128))+rdi] + xor r13d,eax + add edx,DWORD PTR[16+rsp] + mov r15d,r8d + ror r14d,11 + xor r12d,ecx + xor r15d,r9d + ror r13d,6 + add edx,r12d + and esi,r15d + xor r14d,r8d + add edx,r13d + xor esi,r9d + add r11d,edx + ror r14d,2 + add edx,esi + mov r13d,r11d + add r14d,edx + ror r13d,14 + mov edx,r14d + mov r12d,eax + xor r13d,r11d + ror r14d,9 + xor r12d,ebx + ror r13d,5 + xor r14d,edx + and r12d,r11d + vaesenc xmm9,xmm9,xmm10 + vmovdqu xmm10,XMMWORD PTR[((80-128))+rdi] + xor r13d,r11d + add ecx,DWORD PTR[20+rsp] + mov esi,edx + ror r14d,11 + xor r12d,ebx + xor esi,r8d + ror r13d,6 + add ecx,r12d + and r15d,esi + xor r14d,edx + add ecx,r13d + xor r15d,r8d + add r10d,ecx + ror r14d,2 + add ecx,r15d + mov r13d,r10d + add r14d,ecx + ror r13d,14 + mov ecx,r14d + mov r12d,r11d + xor r13d,r10d + ror r14d,9 + xor r12d,eax + ror r13d,5 + xor r14d,ecx + and r12d,r10d + vaesenc xmm9,xmm9,xmm10 + vmovdqu xmm10,XMMWORD PTR[((96-128))+rdi] + xor r13d,r10d + add ebx,DWORD PTR[24+rsp] + mov r15d,ecx + ror r14d,11 + xor r12d,eax + xor r15d,edx + ror r13d,6 + add ebx,r12d + and esi,r15d + xor r14d,ecx + add ebx,r13d + xor esi,edx + add r9d,ebx + ror r14d,2 + add ebx,esi + mov r13d,r9d + add r14d,ebx + ror r13d,14 + mov ebx,r14d + mov r12d,r10d + xor r13d,r9d + ror r14d,9 + xor r12d,r11d + ror r13d,5 + xor r14d,ebx + and r12d,r9d + vaesenc xmm9,xmm9,xmm10 + vmovdqu xmm10,XMMWORD PTR[((112-128))+rdi] + xor r13d,r9d + add eax,DWORD PTR[28+rsp] + mov esi,ebx + ror r14d,11 + xor r12d,r11d + xor esi,ecx + ror r13d,6 + add eax,r12d + and r15d,esi + xor r14d,ebx + add eax,r13d + xor r15d,ecx + add r8d,eax + ror r14d,2 + add eax,r15d + mov r13d,r8d + add r14d,eax + ror r13d,14 + mov eax,r14d + mov r12d,r9d + xor r13d,r8d + ror r14d,9 + xor r12d,r10d + ror r13d,5 + xor r14d,eax + and r12d,r8d + vaesenc xmm9,xmm9,xmm10 + vmovdqu xmm10,XMMWORD PTR[((128-128))+rdi] + xor r13d,r8d + add r11d,DWORD PTR[32+rsp] + mov r15d,eax + ror r14d,11 + xor r12d,r10d + xor r15d,ebx + ror r13d,6 + add r11d,r12d + and esi,r15d + xor r14d,eax + add r11d,r13d + xor esi,ebx + add edx,r11d + ror r14d,2 + add r11d,esi + mov r13d,edx + add r14d,r11d + ror r13d,14 + mov r11d,r14d + mov r12d,r8d + xor r13d,edx + ror r14d,9 + xor r12d,r9d + ror r13d,5 + xor r14d,r11d + and r12d,edx + vaesenc xmm9,xmm9,xmm10 + vmovdqu xmm10,XMMWORD PTR[((144-128))+rdi] + xor r13d,edx + add r10d,DWORD PTR[36+rsp] + mov esi,r11d + ror r14d,11 + xor r12d,r9d + xor esi,eax + ror r13d,6 + add r10d,r12d + and r15d,esi + xor r14d,r11d + add r10d,r13d + xor r15d,eax + add ecx,r10d + ror r14d,2 + add r10d,r15d + mov r13d,ecx + add r14d,r10d + ror r13d,14 + mov r10d,r14d + mov r12d,edx + xor r13d,ecx + ror r14d,9 + xor r12d,r8d + ror r13d,5 + xor r14d,r10d + and r12d,ecx + vaesenc xmm9,xmm9,xmm10 + vmovdqu xmm10,XMMWORD PTR[((160-128))+rdi] + xor r13d,ecx + add r9d,DWORD PTR[40+rsp] + mov r15d,r10d + ror r14d,11 + xor r12d,r8d + xor r15d,r11d + ror r13d,6 + add r9d,r12d + and esi,r15d + xor r14d,r10d + add r9d,r13d + xor esi,r11d + add ebx,r9d + ror r14d,2 + add r9d,esi + mov r13d,ebx + add r14d,r9d + ror r13d,14 + mov r9d,r14d + mov r12d,ecx + xor r13d,ebx + ror r14d,9 + xor r12d,edx + ror r13d,5 + xor r14d,r9d + and r12d,ebx + vaesenclast xmm11,xmm9,xmm10 + vaesenc xmm9,xmm9,xmm10 + vmovdqu xmm10,XMMWORD PTR[((176-128))+rdi] + xor r13d,ebx + add r8d,DWORD PTR[44+rsp] + mov esi,r9d + ror r14d,11 + xor r12d,edx + xor esi,r10d + ror r13d,6 + add r8d,r12d + and r15d,esi + xor r14d,r9d + add r8d,r13d + xor r15d,r10d + add eax,r8d + ror r14d,2 + add r8d,r15d + mov r13d,eax + add r14d,r8d + ror r13d,14 + mov r8d,r14d + mov r12d,ebx + xor r13d,eax + ror r14d,9 + xor r12d,ecx + ror r13d,5 + xor r14d,r8d + and r12d,eax + vpand xmm8,xmm11,xmm12 + vaesenc xmm9,xmm9,xmm10 + vmovdqu xmm10,XMMWORD PTR[((192-128))+rdi] + xor r13d,eax + add edx,DWORD PTR[48+rsp] + mov r15d,r8d + ror r14d,11 + xor r12d,ecx + xor r15d,r9d + ror r13d,6 + add edx,r12d + and esi,r15d + xor r14d,r8d + add edx,r13d + xor esi,r9d + add r11d,edx + ror r14d,2 + add edx,esi + mov r13d,r11d + add r14d,edx + ror r13d,14 + mov edx,r14d + mov r12d,eax + xor r13d,r11d + ror r14d,9 + xor r12d,ebx + ror r13d,5 + xor r14d,edx + and r12d,r11d + vaesenclast xmm11,xmm9,xmm10 + vaesenc xmm9,xmm9,xmm10 + vmovdqu xmm10,XMMWORD PTR[((208-128))+rdi] + xor r13d,r11d + add ecx,DWORD PTR[52+rsp] + mov esi,edx + ror r14d,11 + xor r12d,ebx + xor esi,r8d + ror r13d,6 + add ecx,r12d + and r15d,esi + xor r14d,edx + add ecx,r13d + xor r15d,r8d + add r10d,ecx + ror r14d,2 + add ecx,r15d + mov r13d,r10d + add r14d,ecx + ror r13d,14 + mov ecx,r14d + mov r12d,r11d + xor r13d,r10d + ror r14d,9 + xor r12d,eax + ror r13d,5 + xor r14d,ecx + and r12d,r10d + vpand xmm11,xmm11,xmm13 + vaesenc xmm9,xmm9,xmm10 + vmovdqu xmm10,XMMWORD PTR[((224-128))+rdi] + xor r13d,r10d + add ebx,DWORD PTR[56+rsp] + mov r15d,ecx + ror r14d,11 + xor r12d,eax + xor r15d,edx + ror r13d,6 + add ebx,r12d + and esi,r15d + xor r14d,ecx + add ebx,r13d + xor esi,edx + add r9d,ebx + ror r14d,2 + add ebx,esi + mov r13d,r9d + add r14d,ebx + ror r13d,14 + mov ebx,r14d + mov r12d,r10d + xor r13d,r9d + ror r14d,9 + xor r12d,r11d + ror r13d,5 + xor r14d,ebx + and r12d,r9d + vpor xmm8,xmm8,xmm11 + vaesenclast xmm11,xmm9,xmm10 + vmovdqu xmm10,XMMWORD PTR[((0-128))+rdi] + xor r13d,r9d + add eax,DWORD PTR[60+rsp] + mov esi,ebx + ror r14d,11 + xor r12d,r11d + xor esi,ecx + ror r13d,6 + add eax,r12d + and r15d,esi + xor r14d,ebx + add eax,r13d + xor r15d,ecx + add r8d,eax + ror r14d,2 + add eax,r15d + mov r13d,r8d + add r14d,eax + mov r12,QWORD PTR[((64+0))+rsp] + mov r13,QWORD PTR[((64+8))+rsp] + mov r15,QWORD PTR[((64+40))+rsp] + mov rsi,QWORD PTR[((64+48))+rsp] + + vpand xmm11,xmm11,xmm14 + mov eax,r14d + vpor xmm8,xmm8,xmm11 + vmovdqu XMMWORD PTR[r13*1+r12],xmm8 + lea r12,QWORD PTR[16+r12] + + add eax,DWORD PTR[r15] + add ebx,DWORD PTR[4+r15] + add ecx,DWORD PTR[8+r15] + add edx,DWORD PTR[12+r15] + add r8d,DWORD PTR[16+r15] + add r9d,DWORD PTR[20+r15] + add r10d,DWORD PTR[24+r15] + add r11d,DWORD PTR[28+r15] + + cmp r12,QWORD PTR[((64+16))+rsp] + + mov DWORD PTR[r15],eax + mov DWORD PTR[4+r15],ebx + mov DWORD PTR[8+r15],ecx + mov DWORD PTR[12+r15],edx + mov DWORD PTR[16+r15],r8d + mov DWORD PTR[20+r15],r9d + mov DWORD PTR[24+r15],r10d + mov DWORD PTR[28+r15],r11d + + jb $L$loop_xop + + mov r8,QWORD PTR[((64+32))+rsp] + mov rsi,QWORD PTR[((64+56))+rsp] + vmovdqu XMMWORD PTR[r8],xmm8 + vzeroall + movaps xmm6,XMMWORD PTR[128+rsp] + movaps xmm7,XMMWORD PTR[144+rsp] + movaps xmm8,XMMWORD PTR[160+rsp] + movaps xmm9,XMMWORD PTR[176+rsp] + movaps xmm10,XMMWORD PTR[192+rsp] + movaps xmm11,XMMWORD PTR[208+rsp] + movaps xmm12,XMMWORD PTR[224+rsp] + movaps xmm13,XMMWORD PTR[240+rsp] + movaps xmm14,XMMWORD PTR[256+rsp] + movaps xmm15,XMMWORD PTR[272+rsp] + mov r15,QWORD PTR[rsi] + mov r14,QWORD PTR[8+rsi] + mov r13,QWORD PTR[16+rsi] + mov r12,QWORD PTR[24+rsi] + mov rbp,QWORD PTR[32+rsi] + mov rbx,QWORD PTR[40+rsi] + lea rsp,QWORD PTR[48+rsi] +$L$epilogue_xop:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_aesni_cbc_sha256_enc_xop:: +aesni_cbc_sha256_enc_xop ENDP + +ALIGN 64 +aesni_cbc_sha256_enc_avx PROC PRIVATE + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_aesni_cbc_sha256_enc_avx:: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + mov rcx,r9 + mov r8,QWORD PTR[40+rsp] + mov r9,QWORD PTR[48+rsp] + + +$L$avx_shortcut:: + mov r10,QWORD PTR[56+rsp] + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + mov r11,rsp + sub rsp,288 + and rsp,-64 + + shl rdx,6 + sub rsi,rdi + sub r10,rdi + add rdx,rdi + + + mov QWORD PTR[((64+8))+rsp],rsi + mov QWORD PTR[((64+16))+rsp],rdx + + mov QWORD PTR[((64+32))+rsp],r8 + mov QWORD PTR[((64+40))+rsp],r9 + mov QWORD PTR[((64+48))+rsp],r10 + mov QWORD PTR[((64+56))+rsp],r11 + movaps XMMWORD PTR[128+rsp],xmm6 + movaps XMMWORD PTR[144+rsp],xmm7 + movaps XMMWORD PTR[160+rsp],xmm8 + movaps XMMWORD PTR[176+rsp],xmm9 + movaps XMMWORD PTR[192+rsp],xmm10 + movaps XMMWORD PTR[208+rsp],xmm11 + movaps XMMWORD PTR[224+rsp],xmm12 + movaps XMMWORD PTR[240+rsp],xmm13 + movaps XMMWORD PTR[256+rsp],xmm14 + movaps XMMWORD PTR[272+rsp],xmm15 +$L$prologue_avx:: + vzeroall + + mov r12,rdi + lea rdi,QWORD PTR[128+rcx] + lea r13,QWORD PTR[((K256+544))] + mov r14d,DWORD PTR[((240-128))+rdi] + mov r15,r9 + mov rsi,r10 + vmovdqu xmm8,XMMWORD PTR[r8] + sub r14,9 + + mov eax,DWORD PTR[r15] + mov ebx,DWORD PTR[4+r15] + mov ecx,DWORD PTR[8+r15] + mov edx,DWORD PTR[12+r15] + mov r8d,DWORD PTR[16+r15] + mov r9d,DWORD PTR[20+r15] + mov r10d,DWORD PTR[24+r15] + mov r11d,DWORD PTR[28+r15] + + vmovdqa xmm14,XMMWORD PTR[r14*8+r13] + vmovdqa xmm13,XMMWORD PTR[16+r14*8+r13] + vmovdqa xmm12,XMMWORD PTR[32+r14*8+r13] + vmovdqu xmm10,XMMWORD PTR[((0-128))+rdi] + jmp $L$loop_avx +ALIGN 16 +$L$loop_avx:: + vmovdqa xmm7,XMMWORD PTR[((K256+512))] + vmovdqu xmm0,XMMWORD PTR[r12*1+rsi] + vmovdqu xmm1,XMMWORD PTR[16+r12*1+rsi] + vmovdqu xmm2,XMMWORD PTR[32+r12*1+rsi] + vmovdqu xmm3,XMMWORD PTR[48+r12*1+rsi] + vpshufb xmm0,xmm0,xmm7 + lea rbp,QWORD PTR[K256] + vpshufb xmm1,xmm1,xmm7 + vpshufb xmm2,xmm2,xmm7 + vpaddd xmm4,xmm0,XMMWORD PTR[rbp] + vpshufb xmm3,xmm3,xmm7 + vpaddd xmm5,xmm1,XMMWORD PTR[32+rbp] + vpaddd xmm6,xmm2,XMMWORD PTR[64+rbp] + vpaddd xmm7,xmm3,XMMWORD PTR[96+rbp] + vmovdqa XMMWORD PTR[rsp],xmm4 + mov r14d,eax + vmovdqa XMMWORD PTR[16+rsp],xmm5 + mov esi,ebx + vmovdqa XMMWORD PTR[32+rsp],xmm6 + xor esi,ecx + vmovdqa XMMWORD PTR[48+rsp],xmm7 + mov r13d,r8d + jmp $L$avx_00_47 + +ALIGN 16 +$L$avx_00_47:: + sub rbp,-16*2*4 + vmovdqu xmm9,XMMWORD PTR[r12] + mov QWORD PTR[((64+0))+rsp],r12 + vpalignr xmm4,xmm1,xmm0,4 + shrd r13d,r13d,14 + mov eax,r14d + mov r12d,r9d + vpalignr xmm7,xmm3,xmm2,4 + xor r13d,r8d + shrd r14d,r14d,9 + xor r12d,r10d + vpsrld xmm6,xmm4,7 + shrd r13d,r13d,5 + xor r14d,eax + and r12d,r8d + vpaddd xmm0,xmm0,xmm7 + vpxor xmm9,xmm9,xmm10 + vmovdqu xmm10,XMMWORD PTR[((16-128))+rdi] + xor r13d,r8d + add r11d,DWORD PTR[rsp] + mov r15d,eax + vpsrld xmm7,xmm4,3 + shrd r14d,r14d,11 + xor r12d,r10d + xor r15d,ebx + vpslld xmm5,xmm4,14 + shrd r13d,r13d,6 + add r11d,r12d + and esi,r15d + vpxor xmm4,xmm7,xmm6 + xor r14d,eax + add r11d,r13d + xor esi,ebx + vpshufd xmm7,xmm3,250 + add edx,r11d + shrd r14d,r14d,2 + add r11d,esi + vpsrld xmm6,xmm6,11 + mov r13d,edx + add r14d,r11d + shrd r13d,r13d,14 + vpxor xmm4,xmm4,xmm5 + mov r11d,r14d + mov r12d,r8d + xor r13d,edx + vpslld xmm5,xmm5,11 + shrd r14d,r14d,9 + xor r12d,r9d + shrd r13d,r13d,5 + vpxor xmm4,xmm4,xmm6 + xor r14d,r11d + and r12d,edx + vpxor xmm9,xmm9,xmm8 + xor r13d,edx + vpsrld xmm6,xmm7,10 + add r10d,DWORD PTR[4+rsp] + mov esi,r11d + shrd r14d,r14d,11 + vpxor xmm4,xmm4,xmm5 + xor r12d,r9d + xor esi,eax + shrd r13d,r13d,6 + vpsrlq xmm7,xmm7,17 + add r10d,r12d + and r15d,esi + xor r14d,r11d + vpaddd xmm0,xmm0,xmm4 + add r10d,r13d + xor r15d,eax + add ecx,r10d + vpxor xmm6,xmm6,xmm7 + shrd r14d,r14d,2 + add r10d,r15d + mov r13d,ecx + vpsrlq xmm7,xmm7,2 + add r14d,r10d + shrd r13d,r13d,14 + mov r10d,r14d + vpxor xmm6,xmm6,xmm7 + mov r12d,edx + xor r13d,ecx + shrd r14d,r14d,9 + vpshufd xmm6,xmm6,132 + xor r12d,r8d + shrd r13d,r13d,5 + xor r14d,r10d + vpsrldq xmm6,xmm6,8 + and r12d,ecx + vaesenc xmm9,xmm9,xmm10 + vmovdqu xmm10,XMMWORD PTR[((32-128))+rdi] + xor r13d,ecx + add r9d,DWORD PTR[8+rsp] + vpaddd xmm0,xmm0,xmm6 + mov r15d,r10d + shrd r14d,r14d,11 + xor r12d,r8d + vpshufd xmm7,xmm0,80 + xor r15d,r11d + shrd r13d,r13d,6 + add r9d,r12d + vpsrld xmm6,xmm7,10 + and esi,r15d + xor r14d,r10d + add r9d,r13d + vpsrlq xmm7,xmm7,17 + xor esi,r11d + add ebx,r9d + shrd r14d,r14d,2 + vpxor xmm6,xmm6,xmm7 + add r9d,esi + mov r13d,ebx + add r14d,r9d + vpsrlq xmm7,xmm7,2 + shrd r13d,r13d,14 + mov r9d,r14d + mov r12d,ecx + vpxor xmm6,xmm6,xmm7 + xor r13d,ebx + shrd r14d,r14d,9 + xor r12d,edx + vpshufd xmm6,xmm6,232 + shrd r13d,r13d,5 + xor r14d,r9d + and r12d,ebx + vpslldq xmm6,xmm6,8 + vaesenc xmm9,xmm9,xmm10 + vmovdqu xmm10,XMMWORD PTR[((48-128))+rdi] + xor r13d,ebx + add r8d,DWORD PTR[12+rsp] + mov esi,r9d + vpaddd xmm0,xmm0,xmm6 + shrd r14d,r14d,11 + xor r12d,edx + xor esi,r10d + vpaddd xmm6,xmm0,XMMWORD PTR[rbp] + shrd r13d,r13d,6 + add r8d,r12d + and r15d,esi + xor r14d,r9d + add r8d,r13d + xor r15d,r10d + add eax,r8d + shrd r14d,r14d,2 + add r8d,r15d + mov r13d,eax + add r14d,r8d + vmovdqa XMMWORD PTR[rsp],xmm6 + vpalignr xmm4,xmm2,xmm1,4 + shrd r13d,r13d,14 + mov r8d,r14d + mov r12d,ebx + vpalignr xmm7,xmm0,xmm3,4 + xor r13d,eax + shrd r14d,r14d,9 + xor r12d,ecx + vpsrld xmm6,xmm4,7 + shrd r13d,r13d,5 + xor r14d,r8d + and r12d,eax + vpaddd xmm1,xmm1,xmm7 + vaesenc xmm9,xmm9,xmm10 + vmovdqu xmm10,XMMWORD PTR[((64-128))+rdi] + xor r13d,eax + add edx,DWORD PTR[16+rsp] + mov r15d,r8d + vpsrld xmm7,xmm4,3 + shrd r14d,r14d,11 + xor r12d,ecx + xor r15d,r9d + vpslld xmm5,xmm4,14 + shrd r13d,r13d,6 + add edx,r12d + and esi,r15d + vpxor xmm4,xmm7,xmm6 + xor r14d,r8d + add edx,r13d + xor esi,r9d + vpshufd xmm7,xmm0,250 + add r11d,edx + shrd r14d,r14d,2 + add edx,esi + vpsrld xmm6,xmm6,11 + mov r13d,r11d + add r14d,edx + shrd r13d,r13d,14 + vpxor xmm4,xmm4,xmm5 + mov edx,r14d + mov r12d,eax + xor r13d,r11d + vpslld xmm5,xmm5,11 + shrd r14d,r14d,9 + xor r12d,ebx + shrd r13d,r13d,5 + vpxor xmm4,xmm4,xmm6 + xor r14d,edx + and r12d,r11d + vaesenc xmm9,xmm9,xmm10 + vmovdqu xmm10,XMMWORD PTR[((80-128))+rdi] + xor r13d,r11d + vpsrld xmm6,xmm7,10 + add ecx,DWORD PTR[20+rsp] + mov esi,edx + shrd r14d,r14d,11 + vpxor xmm4,xmm4,xmm5 + xor r12d,ebx + xor esi,r8d + shrd r13d,r13d,6 + vpsrlq xmm7,xmm7,17 + add ecx,r12d + and r15d,esi + xor r14d,edx + vpaddd xmm1,xmm1,xmm4 + add ecx,r13d + xor r15d,r8d + add r10d,ecx + vpxor xmm6,xmm6,xmm7 + shrd r14d,r14d,2 + add ecx,r15d + mov r13d,r10d + vpsrlq xmm7,xmm7,2 + add r14d,ecx + shrd r13d,r13d,14 + mov ecx,r14d + vpxor xmm6,xmm6,xmm7 + mov r12d,r11d + xor r13d,r10d + shrd r14d,r14d,9 + vpshufd xmm6,xmm6,132 + xor r12d,eax + shrd r13d,r13d,5 + xor r14d,ecx + vpsrldq xmm6,xmm6,8 + and r12d,r10d + vaesenc xmm9,xmm9,xmm10 + vmovdqu xmm10,XMMWORD PTR[((96-128))+rdi] + xor r13d,r10d + add ebx,DWORD PTR[24+rsp] + vpaddd xmm1,xmm1,xmm6 + mov r15d,ecx + shrd r14d,r14d,11 + xor r12d,eax + vpshufd xmm7,xmm1,80 + xor r15d,edx + shrd r13d,r13d,6 + add ebx,r12d + vpsrld xmm6,xmm7,10 + and esi,r15d + xor r14d,ecx + add ebx,r13d + vpsrlq xmm7,xmm7,17 + xor esi,edx + add r9d,ebx + shrd r14d,r14d,2 + vpxor xmm6,xmm6,xmm7 + add ebx,esi + mov r13d,r9d + add r14d,ebx + vpsrlq xmm7,xmm7,2 + shrd r13d,r13d,14 + mov ebx,r14d + mov r12d,r10d + vpxor xmm6,xmm6,xmm7 + xor r13d,r9d + shrd r14d,r14d,9 + xor r12d,r11d + vpshufd xmm6,xmm6,232 + shrd r13d,r13d,5 + xor r14d,ebx + and r12d,r9d + vpslldq xmm6,xmm6,8 + vaesenc xmm9,xmm9,xmm10 + vmovdqu xmm10,XMMWORD PTR[((112-128))+rdi] + xor r13d,r9d + add eax,DWORD PTR[28+rsp] + mov esi,ebx + vpaddd xmm1,xmm1,xmm6 + shrd r14d,r14d,11 + xor r12d,r11d + xor esi,ecx + vpaddd xmm6,xmm1,XMMWORD PTR[32+rbp] + shrd r13d,r13d,6 + add eax,r12d + and r15d,esi + xor r14d,ebx + add eax,r13d + xor r15d,ecx + add r8d,eax + shrd r14d,r14d,2 + add eax,r15d + mov r13d,r8d + add r14d,eax + vmovdqa XMMWORD PTR[16+rsp],xmm6 + vpalignr xmm4,xmm3,xmm2,4 + shrd r13d,r13d,14 + mov eax,r14d + mov r12d,r9d + vpalignr xmm7,xmm1,xmm0,4 + xor r13d,r8d + shrd r14d,r14d,9 + xor r12d,r10d + vpsrld xmm6,xmm4,7 + shrd r13d,r13d,5 + xor r14d,eax + and r12d,r8d + vpaddd xmm2,xmm2,xmm7 + vaesenc xmm9,xmm9,xmm10 + vmovdqu xmm10,XMMWORD PTR[((128-128))+rdi] + xor r13d,r8d + add r11d,DWORD PTR[32+rsp] + mov r15d,eax + vpsrld xmm7,xmm4,3 + shrd r14d,r14d,11 + xor r12d,r10d + xor r15d,ebx + vpslld xmm5,xmm4,14 + shrd r13d,r13d,6 + add r11d,r12d + and esi,r15d + vpxor xmm4,xmm7,xmm6 + xor r14d,eax + add r11d,r13d + xor esi,ebx + vpshufd xmm7,xmm1,250 + add edx,r11d + shrd r14d,r14d,2 + add r11d,esi + vpsrld xmm6,xmm6,11 + mov r13d,edx + add r14d,r11d + shrd r13d,r13d,14 + vpxor xmm4,xmm4,xmm5 + mov r11d,r14d + mov r12d,r8d + xor r13d,edx + vpslld xmm5,xmm5,11 + shrd r14d,r14d,9 + xor r12d,r9d + shrd r13d,r13d,5 + vpxor xmm4,xmm4,xmm6 + xor r14d,r11d + and r12d,edx + vaesenc xmm9,xmm9,xmm10 + vmovdqu xmm10,XMMWORD PTR[((144-128))+rdi] + xor r13d,edx + vpsrld xmm6,xmm7,10 + add r10d,DWORD PTR[36+rsp] + mov esi,r11d + shrd r14d,r14d,11 + vpxor xmm4,xmm4,xmm5 + xor r12d,r9d + xor esi,eax + shrd r13d,r13d,6 + vpsrlq xmm7,xmm7,17 + add r10d,r12d + and r15d,esi + xor r14d,r11d + vpaddd xmm2,xmm2,xmm4 + add r10d,r13d + xor r15d,eax + add ecx,r10d + vpxor xmm6,xmm6,xmm7 + shrd r14d,r14d,2 + add r10d,r15d + mov r13d,ecx + vpsrlq xmm7,xmm7,2 + add r14d,r10d + shrd r13d,r13d,14 + mov r10d,r14d + vpxor xmm6,xmm6,xmm7 + mov r12d,edx + xor r13d,ecx + shrd r14d,r14d,9 + vpshufd xmm6,xmm6,132 + xor r12d,r8d + shrd r13d,r13d,5 + xor r14d,r10d + vpsrldq xmm6,xmm6,8 + and r12d,ecx + vaesenc xmm9,xmm9,xmm10 + vmovdqu xmm10,XMMWORD PTR[((160-128))+rdi] + xor r13d,ecx + add r9d,DWORD PTR[40+rsp] + vpaddd xmm2,xmm2,xmm6 + mov r15d,r10d + shrd r14d,r14d,11 + xor r12d,r8d + vpshufd xmm7,xmm2,80 + xor r15d,r11d + shrd r13d,r13d,6 + add r9d,r12d + vpsrld xmm6,xmm7,10 + and esi,r15d + xor r14d,r10d + add r9d,r13d + vpsrlq xmm7,xmm7,17 + xor esi,r11d + add ebx,r9d + shrd r14d,r14d,2 + vpxor xmm6,xmm6,xmm7 + add r9d,esi + mov r13d,ebx + add r14d,r9d + vpsrlq xmm7,xmm7,2 + shrd r13d,r13d,14 + mov r9d,r14d + mov r12d,ecx + vpxor xmm6,xmm6,xmm7 + xor r13d,ebx + shrd r14d,r14d,9 + xor r12d,edx + vpshufd xmm6,xmm6,232 + shrd r13d,r13d,5 + xor r14d,r9d + and r12d,ebx + vpslldq xmm6,xmm6,8 + vaesenclast xmm11,xmm9,xmm10 + vaesenc xmm9,xmm9,xmm10 + vmovdqu xmm10,XMMWORD PTR[((176-128))+rdi] + xor r13d,ebx + add r8d,DWORD PTR[44+rsp] + mov esi,r9d + vpaddd xmm2,xmm2,xmm6 + shrd r14d,r14d,11 + xor r12d,edx + xor esi,r10d + vpaddd xmm6,xmm2,XMMWORD PTR[64+rbp] + shrd r13d,r13d,6 + add r8d,r12d + and r15d,esi + xor r14d,r9d + add r8d,r13d + xor r15d,r10d + add eax,r8d + shrd r14d,r14d,2 + add r8d,r15d + mov r13d,eax + add r14d,r8d + vmovdqa XMMWORD PTR[32+rsp],xmm6 + vpalignr xmm4,xmm0,xmm3,4 + shrd r13d,r13d,14 + mov r8d,r14d + mov r12d,ebx + vpalignr xmm7,xmm2,xmm1,4 + xor r13d,eax + shrd r14d,r14d,9 + xor r12d,ecx + vpsrld xmm6,xmm4,7 + shrd r13d,r13d,5 + xor r14d,r8d + and r12d,eax + vpaddd xmm3,xmm3,xmm7 + vpand xmm8,xmm11,xmm12 + vaesenc xmm9,xmm9,xmm10 + vmovdqu xmm10,XMMWORD PTR[((192-128))+rdi] + xor r13d,eax + add edx,DWORD PTR[48+rsp] + mov r15d,r8d + vpsrld xmm7,xmm4,3 + shrd r14d,r14d,11 + xor r12d,ecx + xor r15d,r9d + vpslld xmm5,xmm4,14 + shrd r13d,r13d,6 + add edx,r12d + and esi,r15d + vpxor xmm4,xmm7,xmm6 + xor r14d,r8d + add edx,r13d + xor esi,r9d + vpshufd xmm7,xmm2,250 + add r11d,edx + shrd r14d,r14d,2 + add edx,esi + vpsrld xmm6,xmm6,11 + mov r13d,r11d + add r14d,edx + shrd r13d,r13d,14 + vpxor xmm4,xmm4,xmm5 + mov edx,r14d + mov r12d,eax + xor r13d,r11d + vpslld xmm5,xmm5,11 + shrd r14d,r14d,9 + xor r12d,ebx + shrd r13d,r13d,5 + vpxor xmm4,xmm4,xmm6 + xor r14d,edx + and r12d,r11d + vaesenclast xmm11,xmm9,xmm10 + vaesenc xmm9,xmm9,xmm10 + vmovdqu xmm10,XMMWORD PTR[((208-128))+rdi] + xor r13d,r11d + vpsrld xmm6,xmm7,10 + add ecx,DWORD PTR[52+rsp] + mov esi,edx + shrd r14d,r14d,11 + vpxor xmm4,xmm4,xmm5 + xor r12d,ebx + xor esi,r8d + shrd r13d,r13d,6 + vpsrlq xmm7,xmm7,17 + add ecx,r12d + and r15d,esi + xor r14d,edx + vpaddd xmm3,xmm3,xmm4 + add ecx,r13d + xor r15d,r8d + add r10d,ecx + vpxor xmm6,xmm6,xmm7 + shrd r14d,r14d,2 + add ecx,r15d + mov r13d,r10d + vpsrlq xmm7,xmm7,2 + add r14d,ecx + shrd r13d,r13d,14 + mov ecx,r14d + vpxor xmm6,xmm6,xmm7 + mov r12d,r11d + xor r13d,r10d + shrd r14d,r14d,9 + vpshufd xmm6,xmm6,132 + xor r12d,eax + shrd r13d,r13d,5 + xor r14d,ecx + vpsrldq xmm6,xmm6,8 + and r12d,r10d + vpand xmm11,xmm11,xmm13 + vaesenc xmm9,xmm9,xmm10 + vmovdqu xmm10,XMMWORD PTR[((224-128))+rdi] + xor r13d,r10d + add ebx,DWORD PTR[56+rsp] + vpaddd xmm3,xmm3,xmm6 + mov r15d,ecx + shrd r14d,r14d,11 + xor r12d,eax + vpshufd xmm7,xmm3,80 + xor r15d,edx + shrd r13d,r13d,6 + add ebx,r12d + vpsrld xmm6,xmm7,10 + and esi,r15d + xor r14d,ecx + add ebx,r13d + vpsrlq xmm7,xmm7,17 + xor esi,edx + add r9d,ebx + shrd r14d,r14d,2 + vpxor xmm6,xmm6,xmm7 + add ebx,esi + mov r13d,r9d + add r14d,ebx + vpsrlq xmm7,xmm7,2 + shrd r13d,r13d,14 + mov ebx,r14d + mov r12d,r10d + vpxor xmm6,xmm6,xmm7 + xor r13d,r9d + shrd r14d,r14d,9 + xor r12d,r11d + vpshufd xmm6,xmm6,232 + shrd r13d,r13d,5 + xor r14d,ebx + and r12d,r9d + vpslldq xmm6,xmm6,8 + vpor xmm8,xmm8,xmm11 + vaesenclast xmm11,xmm9,xmm10 + vmovdqu xmm10,XMMWORD PTR[((0-128))+rdi] + xor r13d,r9d + add eax,DWORD PTR[60+rsp] + mov esi,ebx + vpaddd xmm3,xmm3,xmm6 + shrd r14d,r14d,11 + xor r12d,r11d + xor esi,ecx + vpaddd xmm6,xmm3,XMMWORD PTR[96+rbp] + shrd r13d,r13d,6 + add eax,r12d + and r15d,esi + xor r14d,ebx + add eax,r13d + xor r15d,ecx + add r8d,eax + shrd r14d,r14d,2 + add eax,r15d + mov r13d,r8d + add r14d,eax + vmovdqa XMMWORD PTR[48+rsp],xmm6 + mov r12,QWORD PTR[((64+0))+rsp] + vpand xmm11,xmm11,xmm14 + mov r15,QWORD PTR[((64+8))+rsp] + vpor xmm8,xmm8,xmm11 + vmovdqu XMMWORD PTR[r12*1+r15],xmm8 + lea r12,QWORD PTR[16+r12] + cmp BYTE PTR[131+rbp],0 + jne $L$avx_00_47 + vmovdqu xmm9,XMMWORD PTR[r12] + mov QWORD PTR[((64+0))+rsp],r12 + shrd r13d,r13d,14 + mov eax,r14d + mov r12d,r9d + xor r13d,r8d + shrd r14d,r14d,9 + xor r12d,r10d + shrd r13d,r13d,5 + xor r14d,eax + and r12d,r8d + vpxor xmm9,xmm9,xmm10 + vmovdqu xmm10,XMMWORD PTR[((16-128))+rdi] + xor r13d,r8d + add r11d,DWORD PTR[rsp] + mov r15d,eax + shrd r14d,r14d,11 + xor r12d,r10d + xor r15d,ebx + shrd r13d,r13d,6 + add r11d,r12d + and esi,r15d + xor r14d,eax + add r11d,r13d + xor esi,ebx + add edx,r11d + shrd r14d,r14d,2 + add r11d,esi + mov r13d,edx + add r14d,r11d + shrd r13d,r13d,14 + mov r11d,r14d + mov r12d,r8d + xor r13d,edx + shrd r14d,r14d,9 + xor r12d,r9d + shrd r13d,r13d,5 + xor r14d,r11d + and r12d,edx + vpxor xmm9,xmm9,xmm8 + xor r13d,edx + add r10d,DWORD PTR[4+rsp] + mov esi,r11d + shrd r14d,r14d,11 + xor r12d,r9d + xor esi,eax + shrd r13d,r13d,6 + add r10d,r12d + and r15d,esi + xor r14d,r11d + add r10d,r13d + xor r15d,eax + add ecx,r10d + shrd r14d,r14d,2 + add r10d,r15d + mov r13d,ecx + add r14d,r10d + shrd r13d,r13d,14 + mov r10d,r14d + mov r12d,edx + xor r13d,ecx + shrd r14d,r14d,9 + xor r12d,r8d + shrd r13d,r13d,5 + xor r14d,r10d + and r12d,ecx + vaesenc xmm9,xmm9,xmm10 + vmovdqu xmm10,XMMWORD PTR[((32-128))+rdi] + xor r13d,ecx + add r9d,DWORD PTR[8+rsp] + mov r15d,r10d + shrd r14d,r14d,11 + xor r12d,r8d + xor r15d,r11d + shrd r13d,r13d,6 + add r9d,r12d + and esi,r15d + xor r14d,r10d + add r9d,r13d + xor esi,r11d + add ebx,r9d + shrd r14d,r14d,2 + add r9d,esi + mov r13d,ebx + add r14d,r9d + shrd r13d,r13d,14 + mov r9d,r14d + mov r12d,ecx + xor r13d,ebx + shrd r14d,r14d,9 + xor r12d,edx + shrd r13d,r13d,5 + xor r14d,r9d + and r12d,ebx + vaesenc xmm9,xmm9,xmm10 + vmovdqu xmm10,XMMWORD PTR[((48-128))+rdi] + xor r13d,ebx + add r8d,DWORD PTR[12+rsp] + mov esi,r9d + shrd r14d,r14d,11 + xor r12d,edx + xor esi,r10d + shrd r13d,r13d,6 + add r8d,r12d + and r15d,esi + xor r14d,r9d + add r8d,r13d + xor r15d,r10d + add eax,r8d + shrd r14d,r14d,2 + add r8d,r15d + mov r13d,eax + add r14d,r8d + shrd r13d,r13d,14 + mov r8d,r14d + mov r12d,ebx + xor r13d,eax + shrd r14d,r14d,9 + xor r12d,ecx + shrd r13d,r13d,5 + xor r14d,r8d + and r12d,eax + vaesenc xmm9,xmm9,xmm10 + vmovdqu xmm10,XMMWORD PTR[((64-128))+rdi] + xor r13d,eax + add edx,DWORD PTR[16+rsp] + mov r15d,r8d + shrd r14d,r14d,11 + xor r12d,ecx + xor r15d,r9d + shrd r13d,r13d,6 + add edx,r12d + and esi,r15d + xor r14d,r8d + add edx,r13d + xor esi,r9d + add r11d,edx + shrd r14d,r14d,2 + add edx,esi + mov r13d,r11d + add r14d,edx + shrd r13d,r13d,14 + mov edx,r14d + mov r12d,eax + xor r13d,r11d + shrd r14d,r14d,9 + xor r12d,ebx + shrd r13d,r13d,5 + xor r14d,edx + and r12d,r11d + vaesenc xmm9,xmm9,xmm10 + vmovdqu xmm10,XMMWORD PTR[((80-128))+rdi] + xor r13d,r11d + add ecx,DWORD PTR[20+rsp] + mov esi,edx + shrd r14d,r14d,11 + xor r12d,ebx + xor esi,r8d + shrd r13d,r13d,6 + add ecx,r12d + and r15d,esi + xor r14d,edx + add ecx,r13d + xor r15d,r8d + add r10d,ecx + shrd r14d,r14d,2 + add ecx,r15d + mov r13d,r10d + add r14d,ecx + shrd r13d,r13d,14 + mov ecx,r14d + mov r12d,r11d + xor r13d,r10d + shrd r14d,r14d,9 + xor r12d,eax + shrd r13d,r13d,5 + xor r14d,ecx + and r12d,r10d + vaesenc xmm9,xmm9,xmm10 + vmovdqu xmm10,XMMWORD PTR[((96-128))+rdi] + xor r13d,r10d + add ebx,DWORD PTR[24+rsp] + mov r15d,ecx + shrd r14d,r14d,11 + xor r12d,eax + xor r15d,edx + shrd r13d,r13d,6 + add ebx,r12d + and esi,r15d + xor r14d,ecx + add ebx,r13d + xor esi,edx + add r9d,ebx + shrd r14d,r14d,2 + add ebx,esi + mov r13d,r9d + add r14d,ebx + shrd r13d,r13d,14 + mov ebx,r14d + mov r12d,r10d + xor r13d,r9d + shrd r14d,r14d,9 + xor r12d,r11d + shrd r13d,r13d,5 + xor r14d,ebx + and r12d,r9d + vaesenc xmm9,xmm9,xmm10 + vmovdqu xmm10,XMMWORD PTR[((112-128))+rdi] + xor r13d,r9d + add eax,DWORD PTR[28+rsp] + mov esi,ebx + shrd r14d,r14d,11 + xor r12d,r11d + xor esi,ecx + shrd r13d,r13d,6 + add eax,r12d + and r15d,esi + xor r14d,ebx + add eax,r13d + xor r15d,ecx + add r8d,eax + shrd r14d,r14d,2 + add eax,r15d + mov r13d,r8d + add r14d,eax + shrd r13d,r13d,14 + mov eax,r14d + mov r12d,r9d + xor r13d,r8d + shrd r14d,r14d,9 + xor r12d,r10d + shrd r13d,r13d,5 + xor r14d,eax + and r12d,r8d + vaesenc xmm9,xmm9,xmm10 + vmovdqu xmm10,XMMWORD PTR[((128-128))+rdi] + xor r13d,r8d + add r11d,DWORD PTR[32+rsp] + mov r15d,eax + shrd r14d,r14d,11 + xor r12d,r10d + xor r15d,ebx + shrd r13d,r13d,6 + add r11d,r12d + and esi,r15d + xor r14d,eax + add r11d,r13d + xor esi,ebx + add edx,r11d + shrd r14d,r14d,2 + add r11d,esi + mov r13d,edx + add r14d,r11d + shrd r13d,r13d,14 + mov r11d,r14d + mov r12d,r8d + xor r13d,edx + shrd r14d,r14d,9 + xor r12d,r9d + shrd r13d,r13d,5 + xor r14d,r11d + and r12d,edx + vaesenc xmm9,xmm9,xmm10 + vmovdqu xmm10,XMMWORD PTR[((144-128))+rdi] + xor r13d,edx + add r10d,DWORD PTR[36+rsp] + mov esi,r11d + shrd r14d,r14d,11 + xor r12d,r9d + xor esi,eax + shrd r13d,r13d,6 + add r10d,r12d + and r15d,esi + xor r14d,r11d + add r10d,r13d + xor r15d,eax + add ecx,r10d + shrd r14d,r14d,2 + add r10d,r15d + mov r13d,ecx + add r14d,r10d + shrd r13d,r13d,14 + mov r10d,r14d + mov r12d,edx + xor r13d,ecx + shrd r14d,r14d,9 + xor r12d,r8d + shrd r13d,r13d,5 + xor r14d,r10d + and r12d,ecx + vaesenc xmm9,xmm9,xmm10 + vmovdqu xmm10,XMMWORD PTR[((160-128))+rdi] + xor r13d,ecx + add r9d,DWORD PTR[40+rsp] + mov r15d,r10d + shrd r14d,r14d,11 + xor r12d,r8d + xor r15d,r11d + shrd r13d,r13d,6 + add r9d,r12d + and esi,r15d + xor r14d,r10d + add r9d,r13d + xor esi,r11d + add ebx,r9d + shrd r14d,r14d,2 + add r9d,esi + mov r13d,ebx + add r14d,r9d + shrd r13d,r13d,14 + mov r9d,r14d + mov r12d,ecx + xor r13d,ebx + shrd r14d,r14d,9 + xor r12d,edx + shrd r13d,r13d,5 + xor r14d,r9d + and r12d,ebx + vaesenclast xmm11,xmm9,xmm10 + vaesenc xmm9,xmm9,xmm10 + vmovdqu xmm10,XMMWORD PTR[((176-128))+rdi] + xor r13d,ebx + add r8d,DWORD PTR[44+rsp] + mov esi,r9d + shrd r14d,r14d,11 + xor r12d,edx + xor esi,r10d + shrd r13d,r13d,6 + add r8d,r12d + and r15d,esi + xor r14d,r9d + add r8d,r13d + xor r15d,r10d + add eax,r8d + shrd r14d,r14d,2 + add r8d,r15d + mov r13d,eax + add r14d,r8d + shrd r13d,r13d,14 + mov r8d,r14d + mov r12d,ebx + xor r13d,eax + shrd r14d,r14d,9 + xor r12d,ecx + shrd r13d,r13d,5 + xor r14d,r8d + and r12d,eax + vpand xmm8,xmm11,xmm12 + vaesenc xmm9,xmm9,xmm10 + vmovdqu xmm10,XMMWORD PTR[((192-128))+rdi] + xor r13d,eax + add edx,DWORD PTR[48+rsp] + mov r15d,r8d + shrd r14d,r14d,11 + xor r12d,ecx + xor r15d,r9d + shrd r13d,r13d,6 + add edx,r12d + and esi,r15d + xor r14d,r8d + add edx,r13d + xor esi,r9d + add r11d,edx + shrd r14d,r14d,2 + add edx,esi + mov r13d,r11d + add r14d,edx + shrd r13d,r13d,14 + mov edx,r14d + mov r12d,eax + xor r13d,r11d + shrd r14d,r14d,9 + xor r12d,ebx + shrd r13d,r13d,5 + xor r14d,edx + and r12d,r11d + vaesenclast xmm11,xmm9,xmm10 + vaesenc xmm9,xmm9,xmm10 + vmovdqu xmm10,XMMWORD PTR[((208-128))+rdi] + xor r13d,r11d + add ecx,DWORD PTR[52+rsp] + mov esi,edx + shrd r14d,r14d,11 + xor r12d,ebx + xor esi,r8d + shrd r13d,r13d,6 + add ecx,r12d + and r15d,esi + xor r14d,edx + add ecx,r13d + xor r15d,r8d + add r10d,ecx + shrd r14d,r14d,2 + add ecx,r15d + mov r13d,r10d + add r14d,ecx + shrd r13d,r13d,14 + mov ecx,r14d + mov r12d,r11d + xor r13d,r10d + shrd r14d,r14d,9 + xor r12d,eax + shrd r13d,r13d,5 + xor r14d,ecx + and r12d,r10d + vpand xmm11,xmm11,xmm13 + vaesenc xmm9,xmm9,xmm10 + vmovdqu xmm10,XMMWORD PTR[((224-128))+rdi] + xor r13d,r10d + add ebx,DWORD PTR[56+rsp] + mov r15d,ecx + shrd r14d,r14d,11 + xor r12d,eax + xor r15d,edx + shrd r13d,r13d,6 + add ebx,r12d + and esi,r15d + xor r14d,ecx + add ebx,r13d + xor esi,edx + add r9d,ebx + shrd r14d,r14d,2 + add ebx,esi + mov r13d,r9d + add r14d,ebx + shrd r13d,r13d,14 + mov ebx,r14d + mov r12d,r10d + xor r13d,r9d + shrd r14d,r14d,9 + xor r12d,r11d + shrd r13d,r13d,5 + xor r14d,ebx + and r12d,r9d + vpor xmm8,xmm8,xmm11 + vaesenclast xmm11,xmm9,xmm10 + vmovdqu xmm10,XMMWORD PTR[((0-128))+rdi] + xor r13d,r9d + add eax,DWORD PTR[60+rsp] + mov esi,ebx + shrd r14d,r14d,11 + xor r12d,r11d + xor esi,ecx + shrd r13d,r13d,6 + add eax,r12d + and r15d,esi + xor r14d,ebx + add eax,r13d + xor r15d,ecx + add r8d,eax + shrd r14d,r14d,2 + add eax,r15d + mov r13d,r8d + add r14d,eax + mov r12,QWORD PTR[((64+0))+rsp] + mov r13,QWORD PTR[((64+8))+rsp] + mov r15,QWORD PTR[((64+40))+rsp] + mov rsi,QWORD PTR[((64+48))+rsp] + + vpand xmm11,xmm11,xmm14 + mov eax,r14d + vpor xmm8,xmm8,xmm11 + vmovdqu XMMWORD PTR[r13*1+r12],xmm8 + lea r12,QWORD PTR[16+r12] + + add eax,DWORD PTR[r15] + add ebx,DWORD PTR[4+r15] + add ecx,DWORD PTR[8+r15] + add edx,DWORD PTR[12+r15] + add r8d,DWORD PTR[16+r15] + add r9d,DWORD PTR[20+r15] + add r10d,DWORD PTR[24+r15] + add r11d,DWORD PTR[28+r15] + + cmp r12,QWORD PTR[((64+16))+rsp] + + mov DWORD PTR[r15],eax + mov DWORD PTR[4+r15],ebx + mov DWORD PTR[8+r15],ecx + mov DWORD PTR[12+r15],edx + mov DWORD PTR[16+r15],r8d + mov DWORD PTR[20+r15],r9d + mov DWORD PTR[24+r15],r10d + mov DWORD PTR[28+r15],r11d + jb $L$loop_avx + + mov r8,QWORD PTR[((64+32))+rsp] + mov rsi,QWORD PTR[((64+56))+rsp] + vmovdqu XMMWORD PTR[r8],xmm8 + vzeroall + movaps xmm6,XMMWORD PTR[128+rsp] + movaps xmm7,XMMWORD PTR[144+rsp] + movaps xmm8,XMMWORD PTR[160+rsp] + movaps xmm9,XMMWORD PTR[176+rsp] + movaps xmm10,XMMWORD PTR[192+rsp] + movaps xmm11,XMMWORD PTR[208+rsp] + movaps xmm12,XMMWORD PTR[224+rsp] + movaps xmm13,XMMWORD PTR[240+rsp] + movaps xmm14,XMMWORD PTR[256+rsp] + movaps xmm15,XMMWORD PTR[272+rsp] + mov r15,QWORD PTR[rsi] + mov r14,QWORD PTR[8+rsi] + mov r13,QWORD PTR[16+rsi] + mov r12,QWORD PTR[24+rsi] + mov rbp,QWORD PTR[32+rsi] + mov rbx,QWORD PTR[40+rsi] + lea rsp,QWORD PTR[48+rsi] +$L$epilogue_avx:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_aesni_cbc_sha256_enc_avx:: +aesni_cbc_sha256_enc_avx ENDP + +ALIGN 64 +aesni_cbc_sha256_enc_avx2 PROC PRIVATE + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_aesni_cbc_sha256_enc_avx2:: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + mov rcx,r9 + mov r8,QWORD PTR[40+rsp] + mov r9,QWORD PTR[48+rsp] + + +$L$avx2_shortcut:: + mov r10,QWORD PTR[56+rsp] + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + mov r11,rsp + sub rsp,736 + and rsp,-256*4 + add rsp,448 + + shl rdx,6 + sub rsi,rdi + sub r10,rdi + add rdx,rdi + + + + mov QWORD PTR[((64+16))+rsp],rdx + + mov QWORD PTR[((64+32))+rsp],r8 + mov QWORD PTR[((64+40))+rsp],r9 + mov QWORD PTR[((64+48))+rsp],r10 + mov QWORD PTR[((64+56))+rsp],r11 + movaps XMMWORD PTR[128+rsp],xmm6 + movaps XMMWORD PTR[144+rsp],xmm7 + movaps XMMWORD PTR[160+rsp],xmm8 + movaps XMMWORD PTR[176+rsp],xmm9 + movaps XMMWORD PTR[192+rsp],xmm10 + movaps XMMWORD PTR[208+rsp],xmm11 + movaps XMMWORD PTR[224+rsp],xmm12 + movaps XMMWORD PTR[240+rsp],xmm13 + movaps XMMWORD PTR[256+rsp],xmm14 + movaps XMMWORD PTR[272+rsp],xmm15 +$L$prologue_avx2:: + vzeroall + + mov r13,rdi + vpinsrq xmm15,xmm15,rsi,1 + lea rdi,QWORD PTR[128+rcx] + lea r12,QWORD PTR[((K256+544))] + mov r14d,DWORD PTR[((240-128))+rdi] + mov r15,r9 + mov rsi,r10 + vmovdqu xmm8,XMMWORD PTR[r8] + lea r14,QWORD PTR[((-9))+r14] + + vmovdqa xmm14,XMMWORD PTR[r14*8+r12] + vmovdqa xmm13,XMMWORD PTR[16+r14*8+r12] + vmovdqa xmm12,XMMWORD PTR[32+r14*8+r12] + + sub r13,-16*4 + mov eax,DWORD PTR[r15] + lea r12,QWORD PTR[r13*1+rsi] + mov ebx,DWORD PTR[4+r15] + cmp r13,rdx + mov ecx,DWORD PTR[8+r15] + cmove r12,rsp + mov edx,DWORD PTR[12+r15] + mov r8d,DWORD PTR[16+r15] + mov r9d,DWORD PTR[20+r15] + mov r10d,DWORD PTR[24+r15] + mov r11d,DWORD PTR[28+r15] + vmovdqu xmm10,XMMWORD PTR[((0-128))+rdi] + jmp $L$oop_avx2 +ALIGN 16 +$L$oop_avx2:: + vmovdqa ymm7,YMMWORD PTR[((K256+512))] + vmovdqu xmm0,XMMWORD PTR[((-64+0))+r13*1+rsi] + vmovdqu xmm1,XMMWORD PTR[((-64+16))+r13*1+rsi] + vmovdqu xmm2,XMMWORD PTR[((-64+32))+r13*1+rsi] + vmovdqu xmm3,XMMWORD PTR[((-64+48))+r13*1+rsi] + + vinserti128 ymm0,ymm0,XMMWORD PTR[r12],1 + vinserti128 ymm1,ymm1,XMMWORD PTR[16+r12],1 + vpshufb ymm0,ymm0,ymm7 + vinserti128 ymm2,ymm2,XMMWORD PTR[32+r12],1 + vpshufb ymm1,ymm1,ymm7 + vinserti128 ymm3,ymm3,XMMWORD PTR[48+r12],1 + + lea rbp,QWORD PTR[K256] + vpshufb ymm2,ymm2,ymm7 + lea r13,QWORD PTR[((-64))+r13] + vpaddd ymm4,ymm0,YMMWORD PTR[rbp] + vpshufb ymm3,ymm3,ymm7 + vpaddd ymm5,ymm1,YMMWORD PTR[32+rbp] + vpaddd ymm6,ymm2,YMMWORD PTR[64+rbp] + vpaddd ymm7,ymm3,YMMWORD PTR[96+rbp] + vmovdqa YMMWORD PTR[rsp],ymm4 + xor r14d,r14d + vmovdqa YMMWORD PTR[32+rsp],ymm5 + lea rsp,QWORD PTR[((-64))+rsp] + mov esi,ebx + vmovdqa YMMWORD PTR[rsp],ymm6 + xor esi,ecx + vmovdqa YMMWORD PTR[32+rsp],ymm7 + mov r12d,r9d + sub rbp,-16*2*4 + jmp $L$avx2_00_47 + +ALIGN 16 +$L$avx2_00_47:: + vmovdqu xmm9,XMMWORD PTR[r13] + vpinsrq xmm15,xmm15,r13,0 + lea rsp,QWORD PTR[((-64))+rsp] + vpalignr ymm4,ymm1,ymm0,4 + add r11d,DWORD PTR[((0+128))+rsp] + and r12d,r8d + rorx r13d,r8d,25 + vpalignr ymm7,ymm3,ymm2,4 + rorx r15d,r8d,11 + lea eax,DWORD PTR[r14*1+rax] + lea r11d,DWORD PTR[r12*1+r11] + vpsrld ymm6,ymm4,7 + andn r12d,r8d,r10d + xor r13d,r15d + rorx r14d,r8d,6 + vpaddd ymm0,ymm0,ymm7 + lea r11d,DWORD PTR[r12*1+r11] + xor r13d,r14d + mov r15d,eax + vpsrld ymm7,ymm4,3 + rorx r12d,eax,22 + lea r11d,DWORD PTR[r13*1+r11] + xor r15d,ebx + vpslld ymm5,ymm4,14 + rorx r14d,eax,13 + rorx r13d,eax,2 + lea edx,DWORD PTR[r11*1+rdx] + vpxor ymm4,ymm7,ymm6 + and esi,r15d + vpxor xmm9,xmm9,xmm10 + vmovdqu xmm10,XMMWORD PTR[((16-128))+rdi] + xor r14d,r12d + xor esi,ebx + vpshufd ymm7,ymm3,250 + xor r14d,r13d + lea r11d,DWORD PTR[rsi*1+r11] + mov r12d,r8d + vpsrld ymm6,ymm6,11 + add r10d,DWORD PTR[((4+128))+rsp] + and r12d,edx + rorx r13d,edx,25 + vpxor ymm4,ymm4,ymm5 + rorx esi,edx,11 + lea r11d,DWORD PTR[r14*1+r11] + lea r10d,DWORD PTR[r12*1+r10] + vpslld ymm5,ymm5,11 + andn r12d,edx,r9d + xor r13d,esi + rorx r14d,edx,6 + vpxor ymm4,ymm4,ymm6 + lea r10d,DWORD PTR[r12*1+r10] + xor r13d,r14d + mov esi,r11d + vpsrld ymm6,ymm7,10 + rorx r12d,r11d,22 + lea r10d,DWORD PTR[r13*1+r10] + xor esi,eax + vpxor ymm4,ymm4,ymm5 + rorx r14d,r11d,13 + rorx r13d,r11d,2 + lea ecx,DWORD PTR[r10*1+rcx] + vpsrlq ymm7,ymm7,17 + and r15d,esi + vpxor xmm9,xmm9,xmm8 + xor r14d,r12d + xor r15d,eax + vpaddd ymm0,ymm0,ymm4 + xor r14d,r13d + lea r10d,DWORD PTR[r15*1+r10] + mov r12d,edx + vpxor ymm6,ymm6,ymm7 + add r9d,DWORD PTR[((8+128))+rsp] + and r12d,ecx + rorx r13d,ecx,25 + vpsrlq ymm7,ymm7,2 + rorx r15d,ecx,11 + lea r10d,DWORD PTR[r14*1+r10] + lea r9d,DWORD PTR[r12*1+r9] + vpxor ymm6,ymm6,ymm7 + andn r12d,ecx,r8d + xor r13d,r15d + rorx r14d,ecx,6 + vpshufd ymm6,ymm6,132 + lea r9d,DWORD PTR[r12*1+r9] + xor r13d,r14d + mov r15d,r10d + vpsrldq ymm6,ymm6,8 + rorx r12d,r10d,22 + lea r9d,DWORD PTR[r13*1+r9] + xor r15d,r11d + vpaddd ymm0,ymm0,ymm6 + rorx r14d,r10d,13 + rorx r13d,r10d,2 + lea ebx,DWORD PTR[r9*1+rbx] + vpshufd ymm7,ymm0,80 + and esi,r15d + vaesenc xmm9,xmm9,xmm10 + vmovdqu xmm10,XMMWORD PTR[((32-128))+rdi] + xor r14d,r12d + xor esi,r11d + vpsrld ymm6,ymm7,10 + xor r14d,r13d + lea r9d,DWORD PTR[rsi*1+r9] + mov r12d,ecx + vpsrlq ymm7,ymm7,17 + add r8d,DWORD PTR[((12+128))+rsp] + and r12d,ebx + rorx r13d,ebx,25 + vpxor ymm6,ymm6,ymm7 + rorx esi,ebx,11 + lea r9d,DWORD PTR[r14*1+r9] + lea r8d,DWORD PTR[r12*1+r8] + vpsrlq ymm7,ymm7,2 + andn r12d,ebx,edx + xor r13d,esi + rorx r14d,ebx,6 + vpxor ymm6,ymm6,ymm7 + lea r8d,DWORD PTR[r12*1+r8] + xor r13d,r14d + mov esi,r9d + vpshufd ymm6,ymm6,232 + rorx r12d,r9d,22 + lea r8d,DWORD PTR[r13*1+r8] + xor esi,r10d + vpslldq ymm6,ymm6,8 + rorx r14d,r9d,13 + rorx r13d,r9d,2 + lea eax,DWORD PTR[r8*1+rax] + vpaddd ymm0,ymm0,ymm6 + and r15d,esi + vaesenc xmm9,xmm9,xmm10 + vmovdqu xmm10,XMMWORD PTR[((48-128))+rdi] + xor r14d,r12d + xor r15d,r10d + vpaddd ymm6,ymm0,YMMWORD PTR[rbp] + xor r14d,r13d + lea r8d,DWORD PTR[r15*1+r8] + mov r12d,ebx + vmovdqa YMMWORD PTR[rsp],ymm6 + vpalignr ymm4,ymm2,ymm1,4 + add edx,DWORD PTR[((32+128))+rsp] + and r12d,eax + rorx r13d,eax,25 + vpalignr ymm7,ymm0,ymm3,4 + rorx r15d,eax,11 + lea r8d,DWORD PTR[r14*1+r8] + lea edx,DWORD PTR[r12*1+rdx] + vpsrld ymm6,ymm4,7 + andn r12d,eax,ecx + xor r13d,r15d + rorx r14d,eax,6 + vpaddd ymm1,ymm1,ymm7 + lea edx,DWORD PTR[r12*1+rdx] + xor r13d,r14d + mov r15d,r8d + vpsrld ymm7,ymm4,3 + rorx r12d,r8d,22 + lea edx,DWORD PTR[r13*1+rdx] + xor r15d,r9d + vpslld ymm5,ymm4,14 + rorx r14d,r8d,13 + rorx r13d,r8d,2 + lea r11d,DWORD PTR[rdx*1+r11] + vpxor ymm4,ymm7,ymm6 + and esi,r15d + vaesenc xmm9,xmm9,xmm10 + vmovdqu xmm10,XMMWORD PTR[((64-128))+rdi] + xor r14d,r12d + xor esi,r9d + vpshufd ymm7,ymm0,250 + xor r14d,r13d + lea edx,DWORD PTR[rsi*1+rdx] + mov r12d,eax + vpsrld ymm6,ymm6,11 + add ecx,DWORD PTR[((36+128))+rsp] + and r12d,r11d + rorx r13d,r11d,25 + vpxor ymm4,ymm4,ymm5 + rorx esi,r11d,11 + lea edx,DWORD PTR[r14*1+rdx] + lea ecx,DWORD PTR[r12*1+rcx] + vpslld ymm5,ymm5,11 + andn r12d,r11d,ebx + xor r13d,esi + rorx r14d,r11d,6 + vpxor ymm4,ymm4,ymm6 + lea ecx,DWORD PTR[r12*1+rcx] + xor r13d,r14d + mov esi,edx + vpsrld ymm6,ymm7,10 + rorx r12d,edx,22 + lea ecx,DWORD PTR[r13*1+rcx] + xor esi,r8d + vpxor ymm4,ymm4,ymm5 + rorx r14d,edx,13 + rorx r13d,edx,2 + lea r10d,DWORD PTR[rcx*1+r10] + vpsrlq ymm7,ymm7,17 + and r15d,esi + vaesenc xmm9,xmm9,xmm10 + vmovdqu xmm10,XMMWORD PTR[((80-128))+rdi] + xor r14d,r12d + xor r15d,r8d + vpaddd ymm1,ymm1,ymm4 + xor r14d,r13d + lea ecx,DWORD PTR[r15*1+rcx] + mov r12d,r11d + vpxor ymm6,ymm6,ymm7 + add ebx,DWORD PTR[((40+128))+rsp] + and r12d,r10d + rorx r13d,r10d,25 + vpsrlq ymm7,ymm7,2 + rorx r15d,r10d,11 + lea ecx,DWORD PTR[r14*1+rcx] + lea ebx,DWORD PTR[r12*1+rbx] + vpxor ymm6,ymm6,ymm7 + andn r12d,r10d,eax + xor r13d,r15d + rorx r14d,r10d,6 + vpshufd ymm6,ymm6,132 + lea ebx,DWORD PTR[r12*1+rbx] + xor r13d,r14d + mov r15d,ecx + vpsrldq ymm6,ymm6,8 + rorx r12d,ecx,22 + lea ebx,DWORD PTR[r13*1+rbx] + xor r15d,edx + vpaddd ymm1,ymm1,ymm6 + rorx r14d,ecx,13 + rorx r13d,ecx,2 + lea r9d,DWORD PTR[rbx*1+r9] + vpshufd ymm7,ymm1,80 + and esi,r15d + vaesenc xmm9,xmm9,xmm10 + vmovdqu xmm10,XMMWORD PTR[((96-128))+rdi] + xor r14d,r12d + xor esi,edx + vpsrld ymm6,ymm7,10 + xor r14d,r13d + lea ebx,DWORD PTR[rsi*1+rbx] + mov r12d,r10d + vpsrlq ymm7,ymm7,17 + add eax,DWORD PTR[((44+128))+rsp] + and r12d,r9d + rorx r13d,r9d,25 + vpxor ymm6,ymm6,ymm7 + rorx esi,r9d,11 + lea ebx,DWORD PTR[r14*1+rbx] + lea eax,DWORD PTR[r12*1+rax] + vpsrlq ymm7,ymm7,2 + andn r12d,r9d,r11d + xor r13d,esi + rorx r14d,r9d,6 + vpxor ymm6,ymm6,ymm7 + lea eax,DWORD PTR[r12*1+rax] + xor r13d,r14d + mov esi,ebx + vpshufd ymm6,ymm6,232 + rorx r12d,ebx,22 + lea eax,DWORD PTR[r13*1+rax] + xor esi,ecx + vpslldq ymm6,ymm6,8 + rorx r14d,ebx,13 + rorx r13d,ebx,2 + lea r8d,DWORD PTR[rax*1+r8] + vpaddd ymm1,ymm1,ymm6 + and r15d,esi + vaesenc xmm9,xmm9,xmm10 + vmovdqu xmm10,XMMWORD PTR[((112-128))+rdi] + xor r14d,r12d + xor r15d,ecx + vpaddd ymm6,ymm1,YMMWORD PTR[32+rbp] + xor r14d,r13d + lea eax,DWORD PTR[r15*1+rax] + mov r12d,r9d + vmovdqa YMMWORD PTR[32+rsp],ymm6 + lea rsp,QWORD PTR[((-64))+rsp] + vpalignr ymm4,ymm3,ymm2,4 + add r11d,DWORD PTR[((0+128))+rsp] + and r12d,r8d + rorx r13d,r8d,25 + vpalignr ymm7,ymm1,ymm0,4 + rorx r15d,r8d,11 + lea eax,DWORD PTR[r14*1+rax] + lea r11d,DWORD PTR[r12*1+r11] + vpsrld ymm6,ymm4,7 + andn r12d,r8d,r10d + xor r13d,r15d + rorx r14d,r8d,6 + vpaddd ymm2,ymm2,ymm7 + lea r11d,DWORD PTR[r12*1+r11] + xor r13d,r14d + mov r15d,eax + vpsrld ymm7,ymm4,3 + rorx r12d,eax,22 + lea r11d,DWORD PTR[r13*1+r11] + xor r15d,ebx + vpslld ymm5,ymm4,14 + rorx r14d,eax,13 + rorx r13d,eax,2 + lea edx,DWORD PTR[r11*1+rdx] + vpxor ymm4,ymm7,ymm6 + and esi,r15d + vaesenc xmm9,xmm9,xmm10 + vmovdqu xmm10,XMMWORD PTR[((128-128))+rdi] + xor r14d,r12d + xor esi,ebx + vpshufd ymm7,ymm1,250 + xor r14d,r13d + lea r11d,DWORD PTR[rsi*1+r11] + mov r12d,r8d + vpsrld ymm6,ymm6,11 + add r10d,DWORD PTR[((4+128))+rsp] + and r12d,edx + rorx r13d,edx,25 + vpxor ymm4,ymm4,ymm5 + rorx esi,edx,11 + lea r11d,DWORD PTR[r14*1+r11] + lea r10d,DWORD PTR[r12*1+r10] + vpslld ymm5,ymm5,11 + andn r12d,edx,r9d + xor r13d,esi + rorx r14d,edx,6 + vpxor ymm4,ymm4,ymm6 + lea r10d,DWORD PTR[r12*1+r10] + xor r13d,r14d + mov esi,r11d + vpsrld ymm6,ymm7,10 + rorx r12d,r11d,22 + lea r10d,DWORD PTR[r13*1+r10] + xor esi,eax + vpxor ymm4,ymm4,ymm5 + rorx r14d,r11d,13 + rorx r13d,r11d,2 + lea ecx,DWORD PTR[r10*1+rcx] + vpsrlq ymm7,ymm7,17 + and r15d,esi + vaesenc xmm9,xmm9,xmm10 + vmovdqu xmm10,XMMWORD PTR[((144-128))+rdi] + xor r14d,r12d + xor r15d,eax + vpaddd ymm2,ymm2,ymm4 + xor r14d,r13d + lea r10d,DWORD PTR[r15*1+r10] + mov r12d,edx + vpxor ymm6,ymm6,ymm7 + add r9d,DWORD PTR[((8+128))+rsp] + and r12d,ecx + rorx r13d,ecx,25 + vpsrlq ymm7,ymm7,2 + rorx r15d,ecx,11 + lea r10d,DWORD PTR[r14*1+r10] + lea r9d,DWORD PTR[r12*1+r9] + vpxor ymm6,ymm6,ymm7 + andn r12d,ecx,r8d + xor r13d,r15d + rorx r14d,ecx,6 + vpshufd ymm6,ymm6,132 + lea r9d,DWORD PTR[r12*1+r9] + xor r13d,r14d + mov r15d,r10d + vpsrldq ymm6,ymm6,8 + rorx r12d,r10d,22 + lea r9d,DWORD PTR[r13*1+r9] + xor r15d,r11d + vpaddd ymm2,ymm2,ymm6 + rorx r14d,r10d,13 + rorx r13d,r10d,2 + lea ebx,DWORD PTR[r9*1+rbx] + vpshufd ymm7,ymm2,80 + and esi,r15d + vaesenc xmm9,xmm9,xmm10 + vmovdqu xmm10,XMMWORD PTR[((160-128))+rdi] + xor r14d,r12d + xor esi,r11d + vpsrld ymm6,ymm7,10 + xor r14d,r13d + lea r9d,DWORD PTR[rsi*1+r9] + mov r12d,ecx + vpsrlq ymm7,ymm7,17 + add r8d,DWORD PTR[((12+128))+rsp] + and r12d,ebx + rorx r13d,ebx,25 + vpxor ymm6,ymm6,ymm7 + rorx esi,ebx,11 + lea r9d,DWORD PTR[r14*1+r9] + lea r8d,DWORD PTR[r12*1+r8] + vpsrlq ymm7,ymm7,2 + andn r12d,ebx,edx + xor r13d,esi + rorx r14d,ebx,6 + vpxor ymm6,ymm6,ymm7 + lea r8d,DWORD PTR[r12*1+r8] + xor r13d,r14d + mov esi,r9d + vpshufd ymm6,ymm6,232 + rorx r12d,r9d,22 + lea r8d,DWORD PTR[r13*1+r8] + xor esi,r10d + vpslldq ymm6,ymm6,8 + rorx r14d,r9d,13 + rorx r13d,r9d,2 + lea eax,DWORD PTR[r8*1+rax] + vpaddd ymm2,ymm2,ymm6 + and r15d,esi + vaesenclast xmm11,xmm9,xmm10 + vaesenc xmm9,xmm9,xmm10 + vmovdqu xmm10,XMMWORD PTR[((176-128))+rdi] + xor r14d,r12d + xor r15d,r10d + vpaddd ymm6,ymm2,YMMWORD PTR[64+rbp] + xor r14d,r13d + lea r8d,DWORD PTR[r15*1+r8] + mov r12d,ebx + vmovdqa YMMWORD PTR[rsp],ymm6 + vpalignr ymm4,ymm0,ymm3,4 + add edx,DWORD PTR[((32+128))+rsp] + and r12d,eax + rorx r13d,eax,25 + vpalignr ymm7,ymm2,ymm1,4 + rorx r15d,eax,11 + lea r8d,DWORD PTR[r14*1+r8] + lea edx,DWORD PTR[r12*1+rdx] + vpsrld ymm6,ymm4,7 + andn r12d,eax,ecx + xor r13d,r15d + rorx r14d,eax,6 + vpaddd ymm3,ymm3,ymm7 + lea edx,DWORD PTR[r12*1+rdx] + xor r13d,r14d + mov r15d,r8d + vpsrld ymm7,ymm4,3 + rorx r12d,r8d,22 + lea edx,DWORD PTR[r13*1+rdx] + xor r15d,r9d + vpslld ymm5,ymm4,14 + rorx r14d,r8d,13 + rorx r13d,r8d,2 + lea r11d,DWORD PTR[rdx*1+r11] + vpxor ymm4,ymm7,ymm6 + and esi,r15d + vpand xmm8,xmm11,xmm12 + vaesenc xmm9,xmm9,xmm10 + vmovdqu xmm10,XMMWORD PTR[((192-128))+rdi] + xor r14d,r12d + xor esi,r9d + vpshufd ymm7,ymm2,250 + xor r14d,r13d + lea edx,DWORD PTR[rsi*1+rdx] + mov r12d,eax + vpsrld ymm6,ymm6,11 + add ecx,DWORD PTR[((36+128))+rsp] + and r12d,r11d + rorx r13d,r11d,25 + vpxor ymm4,ymm4,ymm5 + rorx esi,r11d,11 + lea edx,DWORD PTR[r14*1+rdx] + lea ecx,DWORD PTR[r12*1+rcx] + vpslld ymm5,ymm5,11 + andn r12d,r11d,ebx + xor r13d,esi + rorx r14d,r11d,6 + vpxor ymm4,ymm4,ymm6 + lea ecx,DWORD PTR[r12*1+rcx] + xor r13d,r14d + mov esi,edx + vpsrld ymm6,ymm7,10 + rorx r12d,edx,22 + lea ecx,DWORD PTR[r13*1+rcx] + xor esi,r8d + vpxor ymm4,ymm4,ymm5 + rorx r14d,edx,13 + rorx r13d,edx,2 + lea r10d,DWORD PTR[rcx*1+r10] + vpsrlq ymm7,ymm7,17 + and r15d,esi + vaesenclast xmm11,xmm9,xmm10 + vaesenc xmm9,xmm9,xmm10 + vmovdqu xmm10,XMMWORD PTR[((208-128))+rdi] + xor r14d,r12d + xor r15d,r8d + vpaddd ymm3,ymm3,ymm4 + xor r14d,r13d + lea ecx,DWORD PTR[r15*1+rcx] + mov r12d,r11d + vpxor ymm6,ymm6,ymm7 + add ebx,DWORD PTR[((40+128))+rsp] + and r12d,r10d + rorx r13d,r10d,25 + vpsrlq ymm7,ymm7,2 + rorx r15d,r10d,11 + lea ecx,DWORD PTR[r14*1+rcx] + lea ebx,DWORD PTR[r12*1+rbx] + vpxor ymm6,ymm6,ymm7 + andn r12d,r10d,eax + xor r13d,r15d + rorx r14d,r10d,6 + vpshufd ymm6,ymm6,132 + lea ebx,DWORD PTR[r12*1+rbx] + xor r13d,r14d + mov r15d,ecx + vpsrldq ymm6,ymm6,8 + rorx r12d,ecx,22 + lea ebx,DWORD PTR[r13*1+rbx] + xor r15d,edx + vpaddd ymm3,ymm3,ymm6 + rorx r14d,ecx,13 + rorx r13d,ecx,2 + lea r9d,DWORD PTR[rbx*1+r9] + vpshufd ymm7,ymm3,80 + and esi,r15d + vpand xmm11,xmm11,xmm13 + vaesenc xmm9,xmm9,xmm10 + vmovdqu xmm10,XMMWORD PTR[((224-128))+rdi] + xor r14d,r12d + xor esi,edx + vpsrld ymm6,ymm7,10 + xor r14d,r13d + lea ebx,DWORD PTR[rsi*1+rbx] + mov r12d,r10d + vpsrlq ymm7,ymm7,17 + add eax,DWORD PTR[((44+128))+rsp] + and r12d,r9d + rorx r13d,r9d,25 + vpxor ymm6,ymm6,ymm7 + rorx esi,r9d,11 + lea ebx,DWORD PTR[r14*1+rbx] + lea eax,DWORD PTR[r12*1+rax] + vpsrlq ymm7,ymm7,2 + andn r12d,r9d,r11d + xor r13d,esi + rorx r14d,r9d,6 + vpxor ymm6,ymm6,ymm7 + lea eax,DWORD PTR[r12*1+rax] + xor r13d,r14d + mov esi,ebx + vpshufd ymm6,ymm6,232 + rorx r12d,ebx,22 + lea eax,DWORD PTR[r13*1+rax] + xor esi,ecx + vpslldq ymm6,ymm6,8 + rorx r14d,ebx,13 + rorx r13d,ebx,2 + lea r8d,DWORD PTR[rax*1+r8] + vpaddd ymm3,ymm3,ymm6 + and r15d,esi + vpor xmm8,xmm8,xmm11 + vaesenclast xmm11,xmm9,xmm10 + vmovdqu xmm10,XMMWORD PTR[((0-128))+rdi] + xor r14d,r12d + xor r15d,ecx + vpaddd ymm6,ymm3,YMMWORD PTR[96+rbp] + xor r14d,r13d + lea eax,DWORD PTR[r15*1+rax] + mov r12d,r9d + vmovdqa YMMWORD PTR[32+rsp],ymm6 + vmovq r13,xmm15 + vpextrq r15,xmm15,1 + vpand xmm11,xmm11,xmm14 + vpor xmm8,xmm8,xmm11 + vmovdqu XMMWORD PTR[r13*1+r15],xmm8 + lea r13,QWORD PTR[16+r13] + lea rbp,QWORD PTR[128+rbp] + cmp BYTE PTR[3+rbp],0 + jne $L$avx2_00_47 + vmovdqu xmm9,XMMWORD PTR[r13] + vpinsrq xmm15,xmm15,r13,0 + add r11d,DWORD PTR[((0+64))+rsp] + and r12d,r8d + rorx r13d,r8d,25 + rorx r15d,r8d,11 + lea eax,DWORD PTR[r14*1+rax] + lea r11d,DWORD PTR[r12*1+r11] + andn r12d,r8d,r10d + xor r13d,r15d + rorx r14d,r8d,6 + lea r11d,DWORD PTR[r12*1+r11] + xor r13d,r14d + mov r15d,eax + rorx r12d,eax,22 + lea r11d,DWORD PTR[r13*1+r11] + xor r15d,ebx + rorx r14d,eax,13 + rorx r13d,eax,2 + lea edx,DWORD PTR[r11*1+rdx] + and esi,r15d + vpxor xmm9,xmm9,xmm10 + vmovdqu xmm10,XMMWORD PTR[((16-128))+rdi] + xor r14d,r12d + xor esi,ebx + xor r14d,r13d + lea r11d,DWORD PTR[rsi*1+r11] + mov r12d,r8d + add r10d,DWORD PTR[((4+64))+rsp] + and r12d,edx + rorx r13d,edx,25 + rorx esi,edx,11 + lea r11d,DWORD PTR[r14*1+r11] + lea r10d,DWORD PTR[r12*1+r10] + andn r12d,edx,r9d + xor r13d,esi + rorx r14d,edx,6 + lea r10d,DWORD PTR[r12*1+r10] + xor r13d,r14d + mov esi,r11d + rorx r12d,r11d,22 + lea r10d,DWORD PTR[r13*1+r10] + xor esi,eax + rorx r14d,r11d,13 + rorx r13d,r11d,2 + lea ecx,DWORD PTR[r10*1+rcx] + and r15d,esi + vpxor xmm9,xmm9,xmm8 + xor r14d,r12d + xor r15d,eax + xor r14d,r13d + lea r10d,DWORD PTR[r15*1+r10] + mov r12d,edx + add r9d,DWORD PTR[((8+64))+rsp] + and r12d,ecx + rorx r13d,ecx,25 + rorx r15d,ecx,11 + lea r10d,DWORD PTR[r14*1+r10] + lea r9d,DWORD PTR[r12*1+r9] + andn r12d,ecx,r8d + xor r13d,r15d + rorx r14d,ecx,6 + lea r9d,DWORD PTR[r12*1+r9] + xor r13d,r14d + mov r15d,r10d + rorx r12d,r10d,22 + lea r9d,DWORD PTR[r13*1+r9] + xor r15d,r11d + rorx r14d,r10d,13 + rorx r13d,r10d,2 + lea ebx,DWORD PTR[r9*1+rbx] + and esi,r15d + vaesenc xmm9,xmm9,xmm10 + vmovdqu xmm10,XMMWORD PTR[((32-128))+rdi] + xor r14d,r12d + xor esi,r11d + xor r14d,r13d + lea r9d,DWORD PTR[rsi*1+r9] + mov r12d,ecx + add r8d,DWORD PTR[((12+64))+rsp] + and r12d,ebx + rorx r13d,ebx,25 + rorx esi,ebx,11 + lea r9d,DWORD PTR[r14*1+r9] + lea r8d,DWORD PTR[r12*1+r8] + andn r12d,ebx,edx + xor r13d,esi + rorx r14d,ebx,6 + lea r8d,DWORD PTR[r12*1+r8] + xor r13d,r14d + mov esi,r9d + rorx r12d,r9d,22 + lea r8d,DWORD PTR[r13*1+r8] + xor esi,r10d + rorx r14d,r9d,13 + rorx r13d,r9d,2 + lea eax,DWORD PTR[r8*1+rax] + and r15d,esi + vaesenc xmm9,xmm9,xmm10 + vmovdqu xmm10,XMMWORD PTR[((48-128))+rdi] + xor r14d,r12d + xor r15d,r10d + xor r14d,r13d + lea r8d,DWORD PTR[r15*1+r8] + mov r12d,ebx + add edx,DWORD PTR[((32+64))+rsp] + and r12d,eax + rorx r13d,eax,25 + rorx r15d,eax,11 + lea r8d,DWORD PTR[r14*1+r8] + lea edx,DWORD PTR[r12*1+rdx] + andn r12d,eax,ecx + xor r13d,r15d + rorx r14d,eax,6 + lea edx,DWORD PTR[r12*1+rdx] + xor r13d,r14d + mov r15d,r8d + rorx r12d,r8d,22 + lea edx,DWORD PTR[r13*1+rdx] + xor r15d,r9d + rorx r14d,r8d,13 + rorx r13d,r8d,2 + lea r11d,DWORD PTR[rdx*1+r11] + and esi,r15d + vaesenc xmm9,xmm9,xmm10 + vmovdqu xmm10,XMMWORD PTR[((64-128))+rdi] + xor r14d,r12d + xor esi,r9d + xor r14d,r13d + lea edx,DWORD PTR[rsi*1+rdx] + mov r12d,eax + add ecx,DWORD PTR[((36+64))+rsp] + and r12d,r11d + rorx r13d,r11d,25 + rorx esi,r11d,11 + lea edx,DWORD PTR[r14*1+rdx] + lea ecx,DWORD PTR[r12*1+rcx] + andn r12d,r11d,ebx + xor r13d,esi + rorx r14d,r11d,6 + lea ecx,DWORD PTR[r12*1+rcx] + xor r13d,r14d + mov esi,edx + rorx r12d,edx,22 + lea ecx,DWORD PTR[r13*1+rcx] + xor esi,r8d + rorx r14d,edx,13 + rorx r13d,edx,2 + lea r10d,DWORD PTR[rcx*1+r10] + and r15d,esi + vaesenc xmm9,xmm9,xmm10 + vmovdqu xmm10,XMMWORD PTR[((80-128))+rdi] + xor r14d,r12d + xor r15d,r8d + xor r14d,r13d + lea ecx,DWORD PTR[r15*1+rcx] + mov r12d,r11d + add ebx,DWORD PTR[((40+64))+rsp] + and r12d,r10d + rorx r13d,r10d,25 + rorx r15d,r10d,11 + lea ecx,DWORD PTR[r14*1+rcx] + lea ebx,DWORD PTR[r12*1+rbx] + andn r12d,r10d,eax + xor r13d,r15d + rorx r14d,r10d,6 + lea ebx,DWORD PTR[r12*1+rbx] + xor r13d,r14d + mov r15d,ecx + rorx r12d,ecx,22 + lea ebx,DWORD PTR[r13*1+rbx] + xor r15d,edx + rorx r14d,ecx,13 + rorx r13d,ecx,2 + lea r9d,DWORD PTR[rbx*1+r9] + and esi,r15d + vaesenc xmm9,xmm9,xmm10 + vmovdqu xmm10,XMMWORD PTR[((96-128))+rdi] + xor r14d,r12d + xor esi,edx + xor r14d,r13d + lea ebx,DWORD PTR[rsi*1+rbx] + mov r12d,r10d + add eax,DWORD PTR[((44+64))+rsp] + and r12d,r9d + rorx r13d,r9d,25 + rorx esi,r9d,11 + lea ebx,DWORD PTR[r14*1+rbx] + lea eax,DWORD PTR[r12*1+rax] + andn r12d,r9d,r11d + xor r13d,esi + rorx r14d,r9d,6 + lea eax,DWORD PTR[r12*1+rax] + xor r13d,r14d + mov esi,ebx + rorx r12d,ebx,22 + lea eax,DWORD PTR[r13*1+rax] + xor esi,ecx + rorx r14d,ebx,13 + rorx r13d,ebx,2 + lea r8d,DWORD PTR[rax*1+r8] + and r15d,esi + vaesenc xmm9,xmm9,xmm10 + vmovdqu xmm10,XMMWORD PTR[((112-128))+rdi] + xor r14d,r12d + xor r15d,ecx + xor r14d,r13d + lea eax,DWORD PTR[r15*1+rax] + mov r12d,r9d + add r11d,DWORD PTR[rsp] + and r12d,r8d + rorx r13d,r8d,25 + rorx r15d,r8d,11 + lea eax,DWORD PTR[r14*1+rax] + lea r11d,DWORD PTR[r12*1+r11] + andn r12d,r8d,r10d + xor r13d,r15d + rorx r14d,r8d,6 + lea r11d,DWORD PTR[r12*1+r11] + xor r13d,r14d + mov r15d,eax + rorx r12d,eax,22 + lea r11d,DWORD PTR[r13*1+r11] + xor r15d,ebx + rorx r14d,eax,13 + rorx r13d,eax,2 + lea edx,DWORD PTR[r11*1+rdx] + and esi,r15d + vaesenc xmm9,xmm9,xmm10 + vmovdqu xmm10,XMMWORD PTR[((128-128))+rdi] + xor r14d,r12d + xor esi,ebx + xor r14d,r13d + lea r11d,DWORD PTR[rsi*1+r11] + mov r12d,r8d + add r10d,DWORD PTR[4+rsp] + and r12d,edx + rorx r13d,edx,25 + rorx esi,edx,11 + lea r11d,DWORD PTR[r14*1+r11] + lea r10d,DWORD PTR[r12*1+r10] + andn r12d,edx,r9d + xor r13d,esi + rorx r14d,edx,6 + lea r10d,DWORD PTR[r12*1+r10] + xor r13d,r14d + mov esi,r11d + rorx r12d,r11d,22 + lea r10d,DWORD PTR[r13*1+r10] + xor esi,eax + rorx r14d,r11d,13 + rorx r13d,r11d,2 + lea ecx,DWORD PTR[r10*1+rcx] + and r15d,esi + vaesenc xmm9,xmm9,xmm10 + vmovdqu xmm10,XMMWORD PTR[((144-128))+rdi] + xor r14d,r12d + xor r15d,eax + xor r14d,r13d + lea r10d,DWORD PTR[r15*1+r10] + mov r12d,edx + add r9d,DWORD PTR[8+rsp] + and r12d,ecx + rorx r13d,ecx,25 + rorx r15d,ecx,11 + lea r10d,DWORD PTR[r14*1+r10] + lea r9d,DWORD PTR[r12*1+r9] + andn r12d,ecx,r8d + xor r13d,r15d + rorx r14d,ecx,6 + lea r9d,DWORD PTR[r12*1+r9] + xor r13d,r14d + mov r15d,r10d + rorx r12d,r10d,22 + lea r9d,DWORD PTR[r13*1+r9] + xor r15d,r11d + rorx r14d,r10d,13 + rorx r13d,r10d,2 + lea ebx,DWORD PTR[r9*1+rbx] + and esi,r15d + vaesenc xmm9,xmm9,xmm10 + vmovdqu xmm10,XMMWORD PTR[((160-128))+rdi] + xor r14d,r12d + xor esi,r11d + xor r14d,r13d + lea r9d,DWORD PTR[rsi*1+r9] + mov r12d,ecx + add r8d,DWORD PTR[12+rsp] + and r12d,ebx + rorx r13d,ebx,25 + rorx esi,ebx,11 + lea r9d,DWORD PTR[r14*1+r9] + lea r8d,DWORD PTR[r12*1+r8] + andn r12d,ebx,edx + xor r13d,esi + rorx r14d,ebx,6 + lea r8d,DWORD PTR[r12*1+r8] + xor r13d,r14d + mov esi,r9d + rorx r12d,r9d,22 + lea r8d,DWORD PTR[r13*1+r8] + xor esi,r10d + rorx r14d,r9d,13 + rorx r13d,r9d,2 + lea eax,DWORD PTR[r8*1+rax] + and r15d,esi + vaesenclast xmm11,xmm9,xmm10 + vaesenc xmm9,xmm9,xmm10 + vmovdqu xmm10,XMMWORD PTR[((176-128))+rdi] + xor r14d,r12d + xor r15d,r10d + xor r14d,r13d + lea r8d,DWORD PTR[r15*1+r8] + mov r12d,ebx + add edx,DWORD PTR[32+rsp] + and r12d,eax + rorx r13d,eax,25 + rorx r15d,eax,11 + lea r8d,DWORD PTR[r14*1+r8] + lea edx,DWORD PTR[r12*1+rdx] + andn r12d,eax,ecx + xor r13d,r15d + rorx r14d,eax,6 + lea edx,DWORD PTR[r12*1+rdx] + xor r13d,r14d + mov r15d,r8d + rorx r12d,r8d,22 + lea edx,DWORD PTR[r13*1+rdx] + xor r15d,r9d + rorx r14d,r8d,13 + rorx r13d,r8d,2 + lea r11d,DWORD PTR[rdx*1+r11] + and esi,r15d + vpand xmm8,xmm11,xmm12 + vaesenc xmm9,xmm9,xmm10 + vmovdqu xmm10,XMMWORD PTR[((192-128))+rdi] + xor r14d,r12d + xor esi,r9d + xor r14d,r13d + lea edx,DWORD PTR[rsi*1+rdx] + mov r12d,eax + add ecx,DWORD PTR[36+rsp] + and r12d,r11d + rorx r13d,r11d,25 + rorx esi,r11d,11 + lea edx,DWORD PTR[r14*1+rdx] + lea ecx,DWORD PTR[r12*1+rcx] + andn r12d,r11d,ebx + xor r13d,esi + rorx r14d,r11d,6 + lea ecx,DWORD PTR[r12*1+rcx] + xor r13d,r14d + mov esi,edx + rorx r12d,edx,22 + lea ecx,DWORD PTR[r13*1+rcx] + xor esi,r8d + rorx r14d,edx,13 + rorx r13d,edx,2 + lea r10d,DWORD PTR[rcx*1+r10] + and r15d,esi + vaesenclast xmm11,xmm9,xmm10 + vaesenc xmm9,xmm9,xmm10 + vmovdqu xmm10,XMMWORD PTR[((208-128))+rdi] + xor r14d,r12d + xor r15d,r8d + xor r14d,r13d + lea ecx,DWORD PTR[r15*1+rcx] + mov r12d,r11d + add ebx,DWORD PTR[40+rsp] + and r12d,r10d + rorx r13d,r10d,25 + rorx r15d,r10d,11 + lea ecx,DWORD PTR[r14*1+rcx] + lea ebx,DWORD PTR[r12*1+rbx] + andn r12d,r10d,eax + xor r13d,r15d + rorx r14d,r10d,6 + lea ebx,DWORD PTR[r12*1+rbx] + xor r13d,r14d + mov r15d,ecx + rorx r12d,ecx,22 + lea ebx,DWORD PTR[r13*1+rbx] + xor r15d,edx + rorx r14d,ecx,13 + rorx r13d,ecx,2 + lea r9d,DWORD PTR[rbx*1+r9] + and esi,r15d + vpand xmm11,xmm11,xmm13 + vaesenc xmm9,xmm9,xmm10 + vmovdqu xmm10,XMMWORD PTR[((224-128))+rdi] + xor r14d,r12d + xor esi,edx + xor r14d,r13d + lea ebx,DWORD PTR[rsi*1+rbx] + mov r12d,r10d + add eax,DWORD PTR[44+rsp] + and r12d,r9d + rorx r13d,r9d,25 + rorx esi,r9d,11 + lea ebx,DWORD PTR[r14*1+rbx] + lea eax,DWORD PTR[r12*1+rax] + andn r12d,r9d,r11d + xor r13d,esi + rorx r14d,r9d,6 + lea eax,DWORD PTR[r12*1+rax] + xor r13d,r14d + mov esi,ebx + rorx r12d,ebx,22 + lea eax,DWORD PTR[r13*1+rax] + xor esi,ecx + rorx r14d,ebx,13 + rorx r13d,ebx,2 + lea r8d,DWORD PTR[rax*1+r8] + and r15d,esi + vpor xmm8,xmm8,xmm11 + vaesenclast xmm11,xmm9,xmm10 + vmovdqu xmm10,XMMWORD PTR[((0-128))+rdi] + xor r14d,r12d + xor r15d,ecx + xor r14d,r13d + lea eax,DWORD PTR[r15*1+rax] + mov r12d,r9d + vpextrq r12,xmm15,1 + vmovq r13,xmm15 + mov r15,QWORD PTR[552+rsp] + add eax,r14d + lea rbp,QWORD PTR[448+rsp] + + vpand xmm11,xmm11,xmm14 + vpor xmm8,xmm8,xmm11 + vmovdqu XMMWORD PTR[r13*1+r12],xmm8 + lea r13,QWORD PTR[16+r13] + + add eax,DWORD PTR[r15] + add ebx,DWORD PTR[4+r15] + add ecx,DWORD PTR[8+r15] + add edx,DWORD PTR[12+r15] + add r8d,DWORD PTR[16+r15] + add r9d,DWORD PTR[20+r15] + add r10d,DWORD PTR[24+r15] + add r11d,DWORD PTR[28+r15] + + mov DWORD PTR[r15],eax + mov DWORD PTR[4+r15],ebx + mov DWORD PTR[8+r15],ecx + mov DWORD PTR[12+r15],edx + mov DWORD PTR[16+r15],r8d + mov DWORD PTR[20+r15],r9d + mov DWORD PTR[24+r15],r10d + mov DWORD PTR[28+r15],r11d + + cmp r13,QWORD PTR[80+rbp] + je $L$done_avx2 + + xor r14d,r14d + mov esi,ebx + mov r12d,r9d + xor esi,ecx + jmp $L$ower_avx2 +ALIGN 16 +$L$ower_avx2:: + vmovdqu xmm9,XMMWORD PTR[r13] + vpinsrq xmm15,xmm15,r13,0 + add r11d,DWORD PTR[((0+16))+rbp] + and r12d,r8d + rorx r13d,r8d,25 + rorx r15d,r8d,11 + lea eax,DWORD PTR[r14*1+rax] + lea r11d,DWORD PTR[r12*1+r11] + andn r12d,r8d,r10d + xor r13d,r15d + rorx r14d,r8d,6 + lea r11d,DWORD PTR[r12*1+r11] + xor r13d,r14d + mov r15d,eax + rorx r12d,eax,22 + lea r11d,DWORD PTR[r13*1+r11] + xor r15d,ebx + rorx r14d,eax,13 + rorx r13d,eax,2 + lea edx,DWORD PTR[r11*1+rdx] + and esi,r15d + vpxor xmm9,xmm9,xmm10 + vmovdqu xmm10,XMMWORD PTR[((16-128))+rdi] + xor r14d,r12d + xor esi,ebx + xor r14d,r13d + lea r11d,DWORD PTR[rsi*1+r11] + mov r12d,r8d + add r10d,DWORD PTR[((4+16))+rbp] + and r12d,edx + rorx r13d,edx,25 + rorx esi,edx,11 + lea r11d,DWORD PTR[r14*1+r11] + lea r10d,DWORD PTR[r12*1+r10] + andn r12d,edx,r9d + xor r13d,esi + rorx r14d,edx,6 + lea r10d,DWORD PTR[r12*1+r10] + xor r13d,r14d + mov esi,r11d + rorx r12d,r11d,22 + lea r10d,DWORD PTR[r13*1+r10] + xor esi,eax + rorx r14d,r11d,13 + rorx r13d,r11d,2 + lea ecx,DWORD PTR[r10*1+rcx] + and r15d,esi + vpxor xmm9,xmm9,xmm8 + xor r14d,r12d + xor r15d,eax + xor r14d,r13d + lea r10d,DWORD PTR[r15*1+r10] + mov r12d,edx + add r9d,DWORD PTR[((8+16))+rbp] + and r12d,ecx + rorx r13d,ecx,25 + rorx r15d,ecx,11 + lea r10d,DWORD PTR[r14*1+r10] + lea r9d,DWORD PTR[r12*1+r9] + andn r12d,ecx,r8d + xor r13d,r15d + rorx r14d,ecx,6 + lea r9d,DWORD PTR[r12*1+r9] + xor r13d,r14d + mov r15d,r10d + rorx r12d,r10d,22 + lea r9d,DWORD PTR[r13*1+r9] + xor r15d,r11d + rorx r14d,r10d,13 + rorx r13d,r10d,2 + lea ebx,DWORD PTR[r9*1+rbx] + and esi,r15d + vaesenc xmm9,xmm9,xmm10 + vmovdqu xmm10,XMMWORD PTR[((32-128))+rdi] + xor r14d,r12d + xor esi,r11d + xor r14d,r13d + lea r9d,DWORD PTR[rsi*1+r9] + mov r12d,ecx + add r8d,DWORD PTR[((12+16))+rbp] + and r12d,ebx + rorx r13d,ebx,25 + rorx esi,ebx,11 + lea r9d,DWORD PTR[r14*1+r9] + lea r8d,DWORD PTR[r12*1+r8] + andn r12d,ebx,edx + xor r13d,esi + rorx r14d,ebx,6 + lea r8d,DWORD PTR[r12*1+r8] + xor r13d,r14d + mov esi,r9d + rorx r12d,r9d,22 + lea r8d,DWORD PTR[r13*1+r8] + xor esi,r10d + rorx r14d,r9d,13 + rorx r13d,r9d,2 + lea eax,DWORD PTR[r8*1+rax] + and r15d,esi + vaesenc xmm9,xmm9,xmm10 + vmovdqu xmm10,XMMWORD PTR[((48-128))+rdi] + xor r14d,r12d + xor r15d,r10d + xor r14d,r13d + lea r8d,DWORD PTR[r15*1+r8] + mov r12d,ebx + add edx,DWORD PTR[((32+16))+rbp] + and r12d,eax + rorx r13d,eax,25 + rorx r15d,eax,11 + lea r8d,DWORD PTR[r14*1+r8] + lea edx,DWORD PTR[r12*1+rdx] + andn r12d,eax,ecx + xor r13d,r15d + rorx r14d,eax,6 + lea edx,DWORD PTR[r12*1+rdx] + xor r13d,r14d + mov r15d,r8d + rorx r12d,r8d,22 + lea edx,DWORD PTR[r13*1+rdx] + xor r15d,r9d + rorx r14d,r8d,13 + rorx r13d,r8d,2 + lea r11d,DWORD PTR[rdx*1+r11] + and esi,r15d + vaesenc xmm9,xmm9,xmm10 + vmovdqu xmm10,XMMWORD PTR[((64-128))+rdi] + xor r14d,r12d + xor esi,r9d + xor r14d,r13d + lea edx,DWORD PTR[rsi*1+rdx] + mov r12d,eax + add ecx,DWORD PTR[((36+16))+rbp] + and r12d,r11d + rorx r13d,r11d,25 + rorx esi,r11d,11 + lea edx,DWORD PTR[r14*1+rdx] + lea ecx,DWORD PTR[r12*1+rcx] + andn r12d,r11d,ebx + xor r13d,esi + rorx r14d,r11d,6 + lea ecx,DWORD PTR[r12*1+rcx] + xor r13d,r14d + mov esi,edx + rorx r12d,edx,22 + lea ecx,DWORD PTR[r13*1+rcx] + xor esi,r8d + rorx r14d,edx,13 + rorx r13d,edx,2 + lea r10d,DWORD PTR[rcx*1+r10] + and r15d,esi + vaesenc xmm9,xmm9,xmm10 + vmovdqu xmm10,XMMWORD PTR[((80-128))+rdi] + xor r14d,r12d + xor r15d,r8d + xor r14d,r13d + lea ecx,DWORD PTR[r15*1+rcx] + mov r12d,r11d + add ebx,DWORD PTR[((40+16))+rbp] + and r12d,r10d + rorx r13d,r10d,25 + rorx r15d,r10d,11 + lea ecx,DWORD PTR[r14*1+rcx] + lea ebx,DWORD PTR[r12*1+rbx] + andn r12d,r10d,eax + xor r13d,r15d + rorx r14d,r10d,6 + lea ebx,DWORD PTR[r12*1+rbx] + xor r13d,r14d + mov r15d,ecx + rorx r12d,ecx,22 + lea ebx,DWORD PTR[r13*1+rbx] + xor r15d,edx + rorx r14d,ecx,13 + rorx r13d,ecx,2 + lea r9d,DWORD PTR[rbx*1+r9] + and esi,r15d + vaesenc xmm9,xmm9,xmm10 + vmovdqu xmm10,XMMWORD PTR[((96-128))+rdi] + xor r14d,r12d + xor esi,edx + xor r14d,r13d + lea ebx,DWORD PTR[rsi*1+rbx] + mov r12d,r10d + add eax,DWORD PTR[((44+16))+rbp] + and r12d,r9d + rorx r13d,r9d,25 + rorx esi,r9d,11 + lea ebx,DWORD PTR[r14*1+rbx] + lea eax,DWORD PTR[r12*1+rax] + andn r12d,r9d,r11d + xor r13d,esi + rorx r14d,r9d,6 + lea eax,DWORD PTR[r12*1+rax] + xor r13d,r14d + mov esi,ebx + rorx r12d,ebx,22 + lea eax,DWORD PTR[r13*1+rax] + xor esi,ecx + rorx r14d,ebx,13 + rorx r13d,ebx,2 + lea r8d,DWORD PTR[rax*1+r8] + and r15d,esi + vaesenc xmm9,xmm9,xmm10 + vmovdqu xmm10,XMMWORD PTR[((112-128))+rdi] + xor r14d,r12d + xor r15d,ecx + xor r14d,r13d + lea eax,DWORD PTR[r15*1+rax] + mov r12d,r9d + lea rbp,QWORD PTR[((-64))+rbp] + add r11d,DWORD PTR[((0+16))+rbp] + and r12d,r8d + rorx r13d,r8d,25 + rorx r15d,r8d,11 + lea eax,DWORD PTR[r14*1+rax] + lea r11d,DWORD PTR[r12*1+r11] + andn r12d,r8d,r10d + xor r13d,r15d + rorx r14d,r8d,6 + lea r11d,DWORD PTR[r12*1+r11] + xor r13d,r14d + mov r15d,eax + rorx r12d,eax,22 + lea r11d,DWORD PTR[r13*1+r11] + xor r15d,ebx + rorx r14d,eax,13 + rorx r13d,eax,2 + lea edx,DWORD PTR[r11*1+rdx] + and esi,r15d + vaesenc xmm9,xmm9,xmm10 + vmovdqu xmm10,XMMWORD PTR[((128-128))+rdi] + xor r14d,r12d + xor esi,ebx + xor r14d,r13d + lea r11d,DWORD PTR[rsi*1+r11] + mov r12d,r8d + add r10d,DWORD PTR[((4+16))+rbp] + and r12d,edx + rorx r13d,edx,25 + rorx esi,edx,11 + lea r11d,DWORD PTR[r14*1+r11] + lea r10d,DWORD PTR[r12*1+r10] + andn r12d,edx,r9d + xor r13d,esi + rorx r14d,edx,6 + lea r10d,DWORD PTR[r12*1+r10] + xor r13d,r14d + mov esi,r11d + rorx r12d,r11d,22 + lea r10d,DWORD PTR[r13*1+r10] + xor esi,eax + rorx r14d,r11d,13 + rorx r13d,r11d,2 + lea ecx,DWORD PTR[r10*1+rcx] + and r15d,esi + vaesenc xmm9,xmm9,xmm10 + vmovdqu xmm10,XMMWORD PTR[((144-128))+rdi] + xor r14d,r12d + xor r15d,eax + xor r14d,r13d + lea r10d,DWORD PTR[r15*1+r10] + mov r12d,edx + add r9d,DWORD PTR[((8+16))+rbp] + and r12d,ecx + rorx r13d,ecx,25 + rorx r15d,ecx,11 + lea r10d,DWORD PTR[r14*1+r10] + lea r9d,DWORD PTR[r12*1+r9] + andn r12d,ecx,r8d + xor r13d,r15d + rorx r14d,ecx,6 + lea r9d,DWORD PTR[r12*1+r9] + xor r13d,r14d + mov r15d,r10d + rorx r12d,r10d,22 + lea r9d,DWORD PTR[r13*1+r9] + xor r15d,r11d + rorx r14d,r10d,13 + rorx r13d,r10d,2 + lea ebx,DWORD PTR[r9*1+rbx] + and esi,r15d + vaesenc xmm9,xmm9,xmm10 + vmovdqu xmm10,XMMWORD PTR[((160-128))+rdi] + xor r14d,r12d + xor esi,r11d + xor r14d,r13d + lea r9d,DWORD PTR[rsi*1+r9] + mov r12d,ecx + add r8d,DWORD PTR[((12+16))+rbp] + and r12d,ebx + rorx r13d,ebx,25 + rorx esi,ebx,11 + lea r9d,DWORD PTR[r14*1+r9] + lea r8d,DWORD PTR[r12*1+r8] + andn r12d,ebx,edx + xor r13d,esi + rorx r14d,ebx,6 + lea r8d,DWORD PTR[r12*1+r8] + xor r13d,r14d + mov esi,r9d + rorx r12d,r9d,22 + lea r8d,DWORD PTR[r13*1+r8] + xor esi,r10d + rorx r14d,r9d,13 + rorx r13d,r9d,2 + lea eax,DWORD PTR[r8*1+rax] + and r15d,esi + vaesenclast xmm11,xmm9,xmm10 + vaesenc xmm9,xmm9,xmm10 + vmovdqu xmm10,XMMWORD PTR[((176-128))+rdi] + xor r14d,r12d + xor r15d,r10d + xor r14d,r13d + lea r8d,DWORD PTR[r15*1+r8] + mov r12d,ebx + add edx,DWORD PTR[((32+16))+rbp] + and r12d,eax + rorx r13d,eax,25 + rorx r15d,eax,11 + lea r8d,DWORD PTR[r14*1+r8] + lea edx,DWORD PTR[r12*1+rdx] + andn r12d,eax,ecx + xor r13d,r15d + rorx r14d,eax,6 + lea edx,DWORD PTR[r12*1+rdx] + xor r13d,r14d + mov r15d,r8d + rorx r12d,r8d,22 + lea edx,DWORD PTR[r13*1+rdx] + xor r15d,r9d + rorx r14d,r8d,13 + rorx r13d,r8d,2 + lea r11d,DWORD PTR[rdx*1+r11] + and esi,r15d + vpand xmm8,xmm11,xmm12 + vaesenc xmm9,xmm9,xmm10 + vmovdqu xmm10,XMMWORD PTR[((192-128))+rdi] + xor r14d,r12d + xor esi,r9d + xor r14d,r13d + lea edx,DWORD PTR[rsi*1+rdx] + mov r12d,eax + add ecx,DWORD PTR[((36+16))+rbp] + and r12d,r11d + rorx r13d,r11d,25 + rorx esi,r11d,11 + lea edx,DWORD PTR[r14*1+rdx] + lea ecx,DWORD PTR[r12*1+rcx] + andn r12d,r11d,ebx + xor r13d,esi + rorx r14d,r11d,6 + lea ecx,DWORD PTR[r12*1+rcx] + xor r13d,r14d + mov esi,edx + rorx r12d,edx,22 + lea ecx,DWORD PTR[r13*1+rcx] + xor esi,r8d + rorx r14d,edx,13 + rorx r13d,edx,2 + lea r10d,DWORD PTR[rcx*1+r10] + and r15d,esi + vaesenclast xmm11,xmm9,xmm10 + vaesenc xmm9,xmm9,xmm10 + vmovdqu xmm10,XMMWORD PTR[((208-128))+rdi] + xor r14d,r12d + xor r15d,r8d + xor r14d,r13d + lea ecx,DWORD PTR[r15*1+rcx] + mov r12d,r11d + add ebx,DWORD PTR[((40+16))+rbp] + and r12d,r10d + rorx r13d,r10d,25 + rorx r15d,r10d,11 + lea ecx,DWORD PTR[r14*1+rcx] + lea ebx,DWORD PTR[r12*1+rbx] + andn r12d,r10d,eax + xor r13d,r15d + rorx r14d,r10d,6 + lea ebx,DWORD PTR[r12*1+rbx] + xor r13d,r14d + mov r15d,ecx + rorx r12d,ecx,22 + lea ebx,DWORD PTR[r13*1+rbx] + xor r15d,edx + rorx r14d,ecx,13 + rorx r13d,ecx,2 + lea r9d,DWORD PTR[rbx*1+r9] + and esi,r15d + vpand xmm11,xmm11,xmm13 + vaesenc xmm9,xmm9,xmm10 + vmovdqu xmm10,XMMWORD PTR[((224-128))+rdi] + xor r14d,r12d + xor esi,edx + xor r14d,r13d + lea ebx,DWORD PTR[rsi*1+rbx] + mov r12d,r10d + add eax,DWORD PTR[((44+16))+rbp] + and r12d,r9d + rorx r13d,r9d,25 + rorx esi,r9d,11 + lea ebx,DWORD PTR[r14*1+rbx] + lea eax,DWORD PTR[r12*1+rax] + andn r12d,r9d,r11d + xor r13d,esi + rorx r14d,r9d,6 + lea eax,DWORD PTR[r12*1+rax] + xor r13d,r14d + mov esi,ebx + rorx r12d,ebx,22 + lea eax,DWORD PTR[r13*1+rax] + xor esi,ecx + rorx r14d,ebx,13 + rorx r13d,ebx,2 + lea r8d,DWORD PTR[rax*1+r8] + and r15d,esi + vpor xmm8,xmm8,xmm11 + vaesenclast xmm11,xmm9,xmm10 + vmovdqu xmm10,XMMWORD PTR[((0-128))+rdi] + xor r14d,r12d + xor r15d,ecx + xor r14d,r13d + lea eax,DWORD PTR[r15*1+rax] + mov r12d,r9d + vmovq r13,xmm15 + vpextrq r15,xmm15,1 + vpand xmm11,xmm11,xmm14 + vpor xmm8,xmm8,xmm11 + lea rbp,QWORD PTR[((-64))+rbp] + vmovdqu XMMWORD PTR[r13*1+r15],xmm8 + lea r13,QWORD PTR[16+r13] + cmp rbp,rsp + jae $L$ower_avx2 + + mov r15,QWORD PTR[552+rsp] + lea r13,QWORD PTR[64+r13] + mov rsi,QWORD PTR[560+rsp] + add eax,r14d + lea rsp,QWORD PTR[448+rsp] + + add eax,DWORD PTR[r15] + add ebx,DWORD PTR[4+r15] + add ecx,DWORD PTR[8+r15] + add edx,DWORD PTR[12+r15] + add r8d,DWORD PTR[16+r15] + add r9d,DWORD PTR[20+r15] + add r10d,DWORD PTR[24+r15] + lea r12,QWORD PTR[r13*1+rsi] + add r11d,DWORD PTR[28+r15] + + cmp r13,QWORD PTR[((64+16))+rsp] + + mov DWORD PTR[r15],eax + cmove r12,rsp + mov DWORD PTR[4+r15],ebx + mov DWORD PTR[8+r15],ecx + mov DWORD PTR[12+r15],edx + mov DWORD PTR[16+r15],r8d + mov DWORD PTR[20+r15],r9d + mov DWORD PTR[24+r15],r10d + mov DWORD PTR[28+r15],r11d + + jbe $L$oop_avx2 + lea rbp,QWORD PTR[rsp] + +$L$done_avx2:: + lea rsp,QWORD PTR[rbp] + mov r8,QWORD PTR[((64+32))+rsp] + mov rsi,QWORD PTR[((64+56))+rsp] + vmovdqu XMMWORD PTR[r8],xmm8 + vzeroall + movaps xmm6,XMMWORD PTR[128+rsp] + movaps xmm7,XMMWORD PTR[144+rsp] + movaps xmm8,XMMWORD PTR[160+rsp] + movaps xmm9,XMMWORD PTR[176+rsp] + movaps xmm10,XMMWORD PTR[192+rsp] + movaps xmm11,XMMWORD PTR[208+rsp] + movaps xmm12,XMMWORD PTR[224+rsp] + movaps xmm13,XMMWORD PTR[240+rsp] + movaps xmm14,XMMWORD PTR[256+rsp] + movaps xmm15,XMMWORD PTR[272+rsp] + mov r15,QWORD PTR[rsi] + mov r14,QWORD PTR[8+rsi] + mov r13,QWORD PTR[16+rsi] + mov r12,QWORD PTR[24+rsi] + mov rbp,QWORD PTR[32+rsi] + mov rbx,QWORD PTR[40+rsi] + lea rsp,QWORD PTR[48+rsi] +$L$epilogue_avx2:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_aesni_cbc_sha256_enc_avx2:: +aesni_cbc_sha256_enc_avx2 ENDP + +ALIGN 32 +aesni_cbc_sha256_enc_shaext PROC PRIVATE + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_aesni_cbc_sha256_enc_shaext:: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + mov rcx,r9 + mov r8,QWORD PTR[40+rsp] + mov r9,QWORD PTR[48+rsp] + + + mov r10,QWORD PTR[56+rsp] + lea rsp,QWORD PTR[((-168))+rsp] + movaps XMMWORD PTR[(-8-160)+rax],xmm6 + movaps XMMWORD PTR[(-8-144)+rax],xmm7 + movaps XMMWORD PTR[(-8-128)+rax],xmm8 + movaps XMMWORD PTR[(-8-112)+rax],xmm9 + movaps XMMWORD PTR[(-8-96)+rax],xmm10 + movaps XMMWORD PTR[(-8-80)+rax],xmm11 + movaps XMMWORD PTR[(-8-64)+rax],xmm12 + movaps XMMWORD PTR[(-8-48)+rax],xmm13 + movaps XMMWORD PTR[(-8-32)+rax],xmm14 + movaps XMMWORD PTR[(-8-16)+rax],xmm15 +$L$prologue_shaext:: + lea rax,QWORD PTR[((K256+128))] + movdqu xmm1,XMMWORD PTR[r9] + movdqu xmm2,XMMWORD PTR[16+r9] + movdqa xmm3,XMMWORD PTR[((512-128))+rax] + + mov r11d,DWORD PTR[240+rcx] + sub rsi,rdi + movups xmm15,XMMWORD PTR[rcx] + movups xmm4,XMMWORD PTR[16+rcx] + lea rcx,QWORD PTR[112+rcx] + + pshufd xmm0,xmm1,01bh + pshufd xmm1,xmm1,1h + pshufd xmm2,xmm2,01bh + movdqa xmm7,xmm3 +DB 102,15,58,15,202,8 + punpcklqdq xmm2,xmm0 + + jmp $L$oop_shaext + +ALIGN 16 +$L$oop_shaext:: + movdqu xmm10,XMMWORD PTR[r10] + movdqu xmm11,XMMWORD PTR[16+r10] + movdqu xmm12,XMMWORD PTR[32+r10] +DB 102,68,15,56,0,211 + movdqu xmm13,XMMWORD PTR[48+r10] + + movdqa xmm0,XMMWORD PTR[((0-128))+rax] + paddd xmm0,xmm10 +DB 102,68,15,56,0,219 + movdqa xmm9,xmm2 + movdqa xmm8,xmm1 + movups xmm14,XMMWORD PTR[rdi] + xorps xmm14,xmm15 + xorps xmm6,xmm14 + movups xmm5,XMMWORD PTR[((-80))+rcx] + aesenc xmm6,xmm4 +DB 15,56,203,209 + pshufd xmm0,xmm0,00eh + movups xmm4,XMMWORD PTR[((-64))+rcx] + aesenc xmm6,xmm5 +DB 15,56,203,202 + + movdqa xmm0,XMMWORD PTR[((32-128))+rax] + paddd xmm0,xmm11 +DB 102,68,15,56,0,227 + lea r10,QWORD PTR[64+r10] + movups xmm5,XMMWORD PTR[((-48))+rcx] + aesenc xmm6,xmm4 +DB 15,56,203,209 + pshufd xmm0,xmm0,00eh + movups xmm4,XMMWORD PTR[((-32))+rcx] + aesenc xmm6,xmm5 +DB 15,56,203,202 + + movdqa xmm0,XMMWORD PTR[((64-128))+rax] + paddd xmm0,xmm12 +DB 102,68,15,56,0,235 +DB 69,15,56,204,211 + movups xmm5,XMMWORD PTR[((-16))+rcx] + aesenc xmm6,xmm4 +DB 15,56,203,209 + pshufd xmm0,xmm0,00eh + movdqa xmm3,xmm13 +DB 102,65,15,58,15,220,4 + paddd xmm10,xmm3 + movups xmm4,XMMWORD PTR[rcx] + aesenc xmm6,xmm5 +DB 15,56,203,202 + + movdqa xmm0,XMMWORD PTR[((96-128))+rax] + paddd xmm0,xmm13 +DB 69,15,56,205,213 +DB 69,15,56,204,220 + movups xmm5,XMMWORD PTR[16+rcx] + aesenc xmm6,xmm4 +DB 15,56,203,209 + pshufd xmm0,xmm0,00eh + movups xmm4,XMMWORD PTR[32+rcx] + aesenc xmm6,xmm5 + movdqa xmm3,xmm10 +DB 102,65,15,58,15,221,4 + paddd xmm11,xmm3 +DB 15,56,203,202 + movdqa xmm0,XMMWORD PTR[((128-128))+rax] + paddd xmm0,xmm10 +DB 69,15,56,205,218 +DB 69,15,56,204,229 + movups xmm5,XMMWORD PTR[48+rcx] + aesenc xmm6,xmm4 +DB 15,56,203,209 + pshufd xmm0,xmm0,00eh + movdqa xmm3,xmm11 +DB 102,65,15,58,15,218,4 + paddd xmm12,xmm3 + cmp r11d,11 + jb $L$aesenclast1 + movups xmm4,XMMWORD PTR[64+rcx] + aesenc xmm6,xmm5 + movups xmm5,XMMWORD PTR[80+rcx] + aesenc xmm6,xmm4 + je $L$aesenclast1 + movups xmm4,XMMWORD PTR[96+rcx] + aesenc xmm6,xmm5 + movups xmm5,XMMWORD PTR[112+rcx] + aesenc xmm6,xmm4 +$L$aesenclast1:: + aesenclast xmm6,xmm5 + movups xmm4,XMMWORD PTR[((16-112))+rcx] + nop +DB 15,56,203,202 + movups xmm14,XMMWORD PTR[16+rdi] + xorps xmm14,xmm15 + movups XMMWORD PTR[rdi*1+rsi],xmm6 + xorps xmm6,xmm14 + movups xmm5,XMMWORD PTR[((-80))+rcx] + aesenc xmm6,xmm4 + movdqa xmm0,XMMWORD PTR[((160-128))+rax] + paddd xmm0,xmm11 +DB 69,15,56,205,227 +DB 69,15,56,204,234 + movups xmm4,XMMWORD PTR[((-64))+rcx] + aesenc xmm6,xmm5 +DB 15,56,203,209 + pshufd xmm0,xmm0,00eh + movdqa xmm3,xmm12 +DB 102,65,15,58,15,219,4 + paddd xmm13,xmm3 + movups xmm5,XMMWORD PTR[((-48))+rcx] + aesenc xmm6,xmm4 +DB 15,56,203,202 + movdqa xmm0,XMMWORD PTR[((192-128))+rax] + paddd xmm0,xmm12 +DB 69,15,56,205,236 +DB 69,15,56,204,211 + movups xmm4,XMMWORD PTR[((-32))+rcx] + aesenc xmm6,xmm5 +DB 15,56,203,209 + pshufd xmm0,xmm0,00eh + movdqa xmm3,xmm13 +DB 102,65,15,58,15,220,4 + paddd xmm10,xmm3 + movups xmm5,XMMWORD PTR[((-16))+rcx] + aesenc xmm6,xmm4 +DB 15,56,203,202 + movdqa xmm0,XMMWORD PTR[((224-128))+rax] + paddd xmm0,xmm13 +DB 69,15,56,205,213 +DB 69,15,56,204,220 + movups xmm4,XMMWORD PTR[rcx] + aesenc xmm6,xmm5 +DB 15,56,203,209 + pshufd xmm0,xmm0,00eh + movdqa xmm3,xmm10 +DB 102,65,15,58,15,221,4 + paddd xmm11,xmm3 + movups xmm5,XMMWORD PTR[16+rcx] + aesenc xmm6,xmm4 +DB 15,56,203,202 + movdqa xmm0,XMMWORD PTR[((256-128))+rax] + paddd xmm0,xmm10 +DB 69,15,56,205,218 +DB 69,15,56,204,229 + movups xmm4,XMMWORD PTR[32+rcx] + aesenc xmm6,xmm5 +DB 15,56,203,209 + pshufd xmm0,xmm0,00eh + movdqa xmm3,xmm11 +DB 102,65,15,58,15,218,4 + paddd xmm12,xmm3 + movups xmm5,XMMWORD PTR[48+rcx] + aesenc xmm6,xmm4 + cmp r11d,11 + jb $L$aesenclast2 + movups xmm4,XMMWORD PTR[64+rcx] + aesenc xmm6,xmm5 + movups xmm5,XMMWORD PTR[80+rcx] + aesenc xmm6,xmm4 + je $L$aesenclast2 + movups xmm4,XMMWORD PTR[96+rcx] + aesenc xmm6,xmm5 + movups xmm5,XMMWORD PTR[112+rcx] + aesenc xmm6,xmm4 +$L$aesenclast2:: + aesenclast xmm6,xmm5 + movups xmm4,XMMWORD PTR[((16-112))+rcx] + nop +DB 15,56,203,202 + movups xmm14,XMMWORD PTR[32+rdi] + xorps xmm14,xmm15 + movups XMMWORD PTR[16+rdi*1+rsi],xmm6 + xorps xmm6,xmm14 + movups xmm5,XMMWORD PTR[((-80))+rcx] + aesenc xmm6,xmm4 + movdqa xmm0,XMMWORD PTR[((288-128))+rax] + paddd xmm0,xmm11 +DB 69,15,56,205,227 +DB 69,15,56,204,234 + movups xmm4,XMMWORD PTR[((-64))+rcx] + aesenc xmm6,xmm5 +DB 15,56,203,209 + pshufd xmm0,xmm0,00eh + movdqa xmm3,xmm12 +DB 102,65,15,58,15,219,4 + paddd xmm13,xmm3 + movups xmm5,XMMWORD PTR[((-48))+rcx] + aesenc xmm6,xmm4 +DB 15,56,203,202 + movdqa xmm0,XMMWORD PTR[((320-128))+rax] + paddd xmm0,xmm12 +DB 69,15,56,205,236 +DB 69,15,56,204,211 + movups xmm4,XMMWORD PTR[((-32))+rcx] + aesenc xmm6,xmm5 +DB 15,56,203,209 + pshufd xmm0,xmm0,00eh + movdqa xmm3,xmm13 +DB 102,65,15,58,15,220,4 + paddd xmm10,xmm3 + movups xmm5,XMMWORD PTR[((-16))+rcx] + aesenc xmm6,xmm4 +DB 15,56,203,202 + movdqa xmm0,XMMWORD PTR[((352-128))+rax] + paddd xmm0,xmm13 +DB 69,15,56,205,213 +DB 69,15,56,204,220 + movups xmm4,XMMWORD PTR[rcx] + aesenc xmm6,xmm5 +DB 15,56,203,209 + pshufd xmm0,xmm0,00eh + movdqa xmm3,xmm10 +DB 102,65,15,58,15,221,4 + paddd xmm11,xmm3 + movups xmm5,XMMWORD PTR[16+rcx] + aesenc xmm6,xmm4 +DB 15,56,203,202 + movdqa xmm0,XMMWORD PTR[((384-128))+rax] + paddd xmm0,xmm10 +DB 69,15,56,205,218 +DB 69,15,56,204,229 + movups xmm4,XMMWORD PTR[32+rcx] + aesenc xmm6,xmm5 +DB 15,56,203,209 + pshufd xmm0,xmm0,00eh + movdqa xmm3,xmm11 +DB 102,65,15,58,15,218,4 + paddd xmm12,xmm3 + movups xmm5,XMMWORD PTR[48+rcx] + aesenc xmm6,xmm4 +DB 15,56,203,202 + movdqa xmm0,XMMWORD PTR[((416-128))+rax] + paddd xmm0,xmm11 +DB 69,15,56,205,227 +DB 69,15,56,204,234 + cmp r11d,11 + jb $L$aesenclast3 + movups xmm4,XMMWORD PTR[64+rcx] + aesenc xmm6,xmm5 + movups xmm5,XMMWORD PTR[80+rcx] + aesenc xmm6,xmm4 + je $L$aesenclast3 + movups xmm4,XMMWORD PTR[96+rcx] + aesenc xmm6,xmm5 + movups xmm5,XMMWORD PTR[112+rcx] + aesenc xmm6,xmm4 +$L$aesenclast3:: + aesenclast xmm6,xmm5 + movups xmm4,XMMWORD PTR[((16-112))+rcx] + nop +DB 15,56,203,209 + pshufd xmm0,xmm0,00eh + movdqa xmm3,xmm12 +DB 102,65,15,58,15,219,4 + paddd xmm13,xmm3 + movups xmm14,XMMWORD PTR[48+rdi] + xorps xmm14,xmm15 + movups XMMWORD PTR[32+rdi*1+rsi],xmm6 + xorps xmm6,xmm14 + movups xmm5,XMMWORD PTR[((-80))+rcx] + aesenc xmm6,xmm4 + movups xmm4,XMMWORD PTR[((-64))+rcx] + aesenc xmm6,xmm5 +DB 15,56,203,202 + + movdqa xmm0,XMMWORD PTR[((448-128))+rax] + paddd xmm0,xmm12 +DB 69,15,56,205,236 + movdqa xmm3,xmm7 + movups xmm5,XMMWORD PTR[((-48))+rcx] + aesenc xmm6,xmm4 +DB 15,56,203,209 + pshufd xmm0,xmm0,00eh + movups xmm4,XMMWORD PTR[((-32))+rcx] + aesenc xmm6,xmm5 +DB 15,56,203,202 + + movdqa xmm0,XMMWORD PTR[((480-128))+rax] + paddd xmm0,xmm13 + movups xmm5,XMMWORD PTR[((-16))+rcx] + aesenc xmm6,xmm4 + movups xmm4,XMMWORD PTR[rcx] + aesenc xmm6,xmm5 +DB 15,56,203,209 + pshufd xmm0,xmm0,00eh + movups xmm5,XMMWORD PTR[16+rcx] + aesenc xmm6,xmm4 +DB 15,56,203,202 + + movups xmm4,XMMWORD PTR[32+rcx] + aesenc xmm6,xmm5 + movups xmm5,XMMWORD PTR[48+rcx] + aesenc xmm6,xmm4 + cmp r11d,11 + jb $L$aesenclast4 + movups xmm4,XMMWORD PTR[64+rcx] + aesenc xmm6,xmm5 + movups xmm5,XMMWORD PTR[80+rcx] + aesenc xmm6,xmm4 + je $L$aesenclast4 + movups xmm4,XMMWORD PTR[96+rcx] + aesenc xmm6,xmm5 + movups xmm5,XMMWORD PTR[112+rcx] + aesenc xmm6,xmm4 +$L$aesenclast4:: + aesenclast xmm6,xmm5 + movups xmm4,XMMWORD PTR[((16-112))+rcx] + nop + + paddd xmm2,xmm9 + paddd xmm1,xmm8 + + dec rdx + movups XMMWORD PTR[48+rdi*1+rsi],xmm6 + lea rdi,QWORD PTR[64+rdi] + jnz $L$oop_shaext + + pshufd xmm2,xmm2,1h + pshufd xmm3,xmm1,01bh + pshufd xmm1,xmm1,1h + punpckhqdq xmm1,xmm2 +DB 102,15,58,15,211,8 + + movups XMMWORD PTR[r8],xmm6 + movdqu XMMWORD PTR[r9],xmm1 + movdqu XMMWORD PTR[16+r9],xmm2 + movaps xmm6,XMMWORD PTR[rsp] + movaps xmm7,XMMWORD PTR[16+rsp] + movaps xmm8,XMMWORD PTR[32+rsp] + movaps xmm9,XMMWORD PTR[48+rsp] + movaps xmm10,XMMWORD PTR[64+rsp] + movaps xmm11,XMMWORD PTR[80+rsp] + movaps xmm12,XMMWORD PTR[96+rsp] + movaps xmm13,XMMWORD PTR[112+rsp] + movaps xmm14,XMMWORD PTR[128+rsp] + movaps xmm15,XMMWORD PTR[144+rsp] + lea rsp,QWORD PTR[((8+160))+rsp] +$L$epilogue_shaext:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_aesni_cbc_sha256_enc_shaext:: +aesni_cbc_sha256_enc_shaext ENDP +EXTERN __imp_RtlVirtualUnwind:NEAR + +ALIGN 16 +se_handler PROC PRIVATE + push rsi + push rdi + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + pushfq + sub rsp,64 + + mov rax,QWORD PTR[120+r8] + mov rbx,QWORD PTR[248+r8] + + mov rsi,QWORD PTR[8+r9] + mov r11,QWORD PTR[56+r9] + + mov r10d,DWORD PTR[r11] + lea r10,QWORD PTR[r10*1+rsi] + cmp rbx,r10 + jb $L$in_prologue + + mov rax,QWORD PTR[152+r8] + + mov r10d,DWORD PTR[4+r11] + lea r10,QWORD PTR[r10*1+rsi] + cmp rbx,r10 + jae $L$in_prologue + lea r10,QWORD PTR[aesni_cbc_sha256_enc_shaext] + cmp rbx,r10 + jb $L$not_in_shaext + + lea rsi,QWORD PTR[rax] + lea rdi,QWORD PTR[512+r8] + mov ecx,20 + DD 0a548f3fch + lea rax,QWORD PTR[168+rax] + jmp $L$in_prologue +$L$not_in_shaext:: + lea r10,QWORD PTR[$L$avx2_shortcut] + cmp rbx,r10 + jb $L$not_in_avx2 + + and rax,-256*4 + add rax,448 +$L$not_in_avx2:: + mov rsi,rax + mov rax,QWORD PTR[((64+56))+rax] + lea rax,QWORD PTR[48+rax] + + mov rbx,QWORD PTR[((-8))+rax] + mov rbp,QWORD PTR[((-16))+rax] + mov r12,QWORD PTR[((-24))+rax] + mov r13,QWORD PTR[((-32))+rax] + mov r14,QWORD PTR[((-40))+rax] + mov r15,QWORD PTR[((-48))+rax] + mov QWORD PTR[144+r8],rbx + mov QWORD PTR[160+r8],rbp + mov QWORD PTR[216+r8],r12 + mov QWORD PTR[224+r8],r13 + mov QWORD PTR[232+r8],r14 + mov QWORD PTR[240+r8],r15 + + lea rsi,QWORD PTR[((64+64))+rsi] + lea rdi,QWORD PTR[512+r8] + mov ecx,20 + DD 0a548f3fch + +$L$in_prologue:: + mov rdi,QWORD PTR[8+rax] + mov rsi,QWORD PTR[16+rax] + mov QWORD PTR[152+r8],rax + mov QWORD PTR[168+r8],rsi + mov QWORD PTR[176+r8],rdi + + mov rdi,QWORD PTR[40+r9] + mov rsi,r8 + mov ecx,154 + DD 0a548f3fch + + mov rsi,r9 + xor rcx,rcx + mov rdx,QWORD PTR[8+rsi] + mov r8,QWORD PTR[rsi] + mov r9,QWORD PTR[16+rsi] + mov r10,QWORD PTR[40+rsi] + lea r11,QWORD PTR[56+rsi] + lea r12,QWORD PTR[24+rsi] + mov QWORD PTR[32+rsp],r10 + mov QWORD PTR[40+rsp],r11 + mov QWORD PTR[48+rsp],r12 + mov QWORD PTR[56+rsp],rcx + call QWORD PTR[__imp_RtlVirtualUnwind] + + mov eax,1 + add rsp,64 + popfq + pop r15 + pop r14 + pop r13 + pop r12 + pop rbp + pop rbx + pop rdi + pop rsi + DB 0F3h,0C3h ;repret +se_handler ENDP + +.text$ ENDS +.pdata SEGMENT READONLY ALIGN(4) + DD imagerel $L$SEH_begin_aesni_cbc_sha256_enc_xop + DD imagerel $L$SEH_end_aesni_cbc_sha256_enc_xop + DD imagerel $L$SEH_info_aesni_cbc_sha256_enc_xop + + DD imagerel $L$SEH_begin_aesni_cbc_sha256_enc_avx + DD imagerel $L$SEH_end_aesni_cbc_sha256_enc_avx + DD imagerel $L$SEH_info_aesni_cbc_sha256_enc_avx + DD imagerel $L$SEH_begin_aesni_cbc_sha256_enc_avx2 + DD imagerel $L$SEH_end_aesni_cbc_sha256_enc_avx2 + DD imagerel $L$SEH_info_aesni_cbc_sha256_enc_avx2 + DD imagerel $L$SEH_begin_aesni_cbc_sha256_enc_shaext + DD imagerel $L$SEH_end_aesni_cbc_sha256_enc_shaext + DD imagerel $L$SEH_info_aesni_cbc_sha256_enc_shaext +.pdata ENDS +.xdata SEGMENT READONLY ALIGN(8) +ALIGN 8 +$L$SEH_info_aesni_cbc_sha256_enc_xop:: +DB 9,0,0,0 + DD imagerel se_handler + DD imagerel $L$prologue_xop,imagerel $L$epilogue_xop + +$L$SEH_info_aesni_cbc_sha256_enc_avx:: +DB 9,0,0,0 + DD imagerel se_handler + DD imagerel $L$prologue_avx,imagerel $L$epilogue_avx +$L$SEH_info_aesni_cbc_sha256_enc_avx2:: +DB 9,0,0,0 + DD imagerel se_handler + DD imagerel $L$prologue_avx2,imagerel $L$epilogue_avx2 +$L$SEH_info_aesni_cbc_sha256_enc_shaext:: +DB 9,0,0,0 + DD imagerel se_handler + DD imagerel $L$prologue_shaext,imagerel $L$epilogue_shaext + +.xdata ENDS +END diff --git a/deps/openssl/asm/x64-win32-masm/aes/aesni-x86_64.asm b/deps/openssl/asm/x64-win32-masm/aes/aesni-x86_64.asm index e114c5eb1f5e59..53d8afc950f174 100644 --- a/deps/openssl/asm/x64-win32-masm/aes/aesni-x86_64.asm +++ b/deps/openssl/asm/x64-win32-masm/aes/aesni-x86_64.asm @@ -1,5 +1,6 @@ OPTION DOTNAME -.text$ SEGMENT ALIGN(64) 'CODE' +.text$ SEGMENT ALIGN(256) 'CODE' +EXTERN OPENSSL_ia32cap_P:NEAR PUBLIC aesni_encrypt ALIGN 16 @@ -16,7 +17,6 @@ DB 102,15,56,220,209 movups xmm1,XMMWORD PTR[r8] lea r8,QWORD PTR[16+r8] jnz $L$oop_enc1_1 - DB 102,15,56,221,209 movups XMMWORD PTR[rdx],xmm2 DB 0F3h,0C3h ;repret @@ -38,34 +38,92 @@ DB 102,15,56,222,209 movups xmm1,XMMWORD PTR[r8] lea r8,QWORD PTR[16+r8] jnz $L$oop_dec1_2 - DB 102,15,56,223,209 movups XMMWORD PTR[rdx],xmm2 DB 0F3h,0C3h ;repret aesni_decrypt ENDP +ALIGN 16 +_aesni_encrypt2 PROC PRIVATE + movups xmm0,XMMWORD PTR[rcx] + shl eax,4 + movups xmm1,XMMWORD PTR[16+rcx] + xorps xmm2,xmm0 + xorps xmm3,xmm0 + movups xmm0,XMMWORD PTR[32+rcx] + lea rcx,QWORD PTR[32+rax*1+rcx] + neg rax + add rax,16 + +$L$enc_loop2:: +DB 102,15,56,220,209 +DB 102,15,56,220,217 + movups xmm1,XMMWORD PTR[rax*1+rcx] + add rax,32 +DB 102,15,56,220,208 +DB 102,15,56,220,216 + movups xmm0,XMMWORD PTR[((-16))+rax*1+rcx] + jnz $L$enc_loop2 + +DB 102,15,56,220,209 +DB 102,15,56,220,217 +DB 102,15,56,221,208 +DB 102,15,56,221,216 + DB 0F3h,0C3h ;repret +_aesni_encrypt2 ENDP + +ALIGN 16 +_aesni_decrypt2 PROC PRIVATE + movups xmm0,XMMWORD PTR[rcx] + shl eax,4 + movups xmm1,XMMWORD PTR[16+rcx] + xorps xmm2,xmm0 + xorps xmm3,xmm0 + movups xmm0,XMMWORD PTR[32+rcx] + lea rcx,QWORD PTR[32+rax*1+rcx] + neg rax + add rax,16 + +$L$dec_loop2:: +DB 102,15,56,222,209 +DB 102,15,56,222,217 + movups xmm1,XMMWORD PTR[rax*1+rcx] + add rax,32 +DB 102,15,56,222,208 +DB 102,15,56,222,216 + movups xmm0,XMMWORD PTR[((-16))+rax*1+rcx] + jnz $L$dec_loop2 + +DB 102,15,56,222,209 +DB 102,15,56,222,217 +DB 102,15,56,223,208 +DB 102,15,56,223,216 + DB 0F3h,0C3h ;repret +_aesni_decrypt2 ENDP + ALIGN 16 _aesni_encrypt3 PROC PRIVATE movups xmm0,XMMWORD PTR[rcx] - shr eax,1 + shl eax,4 movups xmm1,XMMWORD PTR[16+rcx] - lea rcx,QWORD PTR[32+rcx] xorps xmm2,xmm0 xorps xmm3,xmm0 xorps xmm4,xmm0 - movups xmm0,XMMWORD PTR[rcx] + movups xmm0,XMMWORD PTR[32+rcx] + lea rcx,QWORD PTR[32+rax*1+rcx] + neg rax + add rax,16 $L$enc_loop3:: DB 102,15,56,220,209 DB 102,15,56,220,217 - dec eax DB 102,15,56,220,225 - movups xmm1,XMMWORD PTR[16+rcx] + movups xmm1,XMMWORD PTR[rax*1+rcx] + add rax,32 DB 102,15,56,220,208 DB 102,15,56,220,216 - lea rcx,QWORD PTR[32+rcx] DB 102,15,56,220,224 - movups xmm0,XMMWORD PTR[rcx] + movups xmm0,XMMWORD PTR[((-16))+rax*1+rcx] jnz $L$enc_loop3 DB 102,15,56,220,209 @@ -80,25 +138,26 @@ _aesni_encrypt3 ENDP ALIGN 16 _aesni_decrypt3 PROC PRIVATE movups xmm0,XMMWORD PTR[rcx] - shr eax,1 + shl eax,4 movups xmm1,XMMWORD PTR[16+rcx] - lea rcx,QWORD PTR[32+rcx] xorps xmm2,xmm0 xorps xmm3,xmm0 xorps xmm4,xmm0 - movups xmm0,XMMWORD PTR[rcx] + movups xmm0,XMMWORD PTR[32+rcx] + lea rcx,QWORD PTR[32+rax*1+rcx] + neg rax + add rax,16 $L$dec_loop3:: DB 102,15,56,222,209 DB 102,15,56,222,217 - dec eax DB 102,15,56,222,225 - movups xmm1,XMMWORD PTR[16+rcx] + movups xmm1,XMMWORD PTR[rax*1+rcx] + add rax,32 DB 102,15,56,222,208 DB 102,15,56,222,216 - lea rcx,QWORD PTR[32+rcx] DB 102,15,56,222,224 - movups xmm0,XMMWORD PTR[rcx] + movups xmm0,XMMWORD PTR[((-16))+rax*1+rcx] jnz $L$dec_loop3 DB 102,15,56,222,209 @@ -113,28 +172,30 @@ _aesni_decrypt3 ENDP ALIGN 16 _aesni_encrypt4 PROC PRIVATE movups xmm0,XMMWORD PTR[rcx] - shr eax,1 + shl eax,4 movups xmm1,XMMWORD PTR[16+rcx] - lea rcx,QWORD PTR[32+rcx] xorps xmm2,xmm0 xorps xmm3,xmm0 xorps xmm4,xmm0 xorps xmm5,xmm0 - movups xmm0,XMMWORD PTR[rcx] + movups xmm0,XMMWORD PTR[32+rcx] + lea rcx,QWORD PTR[32+rax*1+rcx] + neg rax +DB 00fh,01fh,000h + add rax,16 $L$enc_loop4:: DB 102,15,56,220,209 DB 102,15,56,220,217 - dec eax DB 102,15,56,220,225 DB 102,15,56,220,233 - movups xmm1,XMMWORD PTR[16+rcx] + movups xmm1,XMMWORD PTR[rax*1+rcx] + add rax,32 DB 102,15,56,220,208 DB 102,15,56,220,216 - lea rcx,QWORD PTR[32+rcx] DB 102,15,56,220,224 DB 102,15,56,220,232 - movups xmm0,XMMWORD PTR[rcx] + movups xmm0,XMMWORD PTR[((-16))+rax*1+rcx] jnz $L$enc_loop4 DB 102,15,56,220,209 @@ -151,28 +212,30 @@ _aesni_encrypt4 ENDP ALIGN 16 _aesni_decrypt4 PROC PRIVATE movups xmm0,XMMWORD PTR[rcx] - shr eax,1 + shl eax,4 movups xmm1,XMMWORD PTR[16+rcx] - lea rcx,QWORD PTR[32+rcx] xorps xmm2,xmm0 xorps xmm3,xmm0 xorps xmm4,xmm0 xorps xmm5,xmm0 - movups xmm0,XMMWORD PTR[rcx] + movups xmm0,XMMWORD PTR[32+rcx] + lea rcx,QWORD PTR[32+rax*1+rcx] + neg rax +DB 00fh,01fh,000h + add rax,16 $L$dec_loop4:: DB 102,15,56,222,209 DB 102,15,56,222,217 - dec eax DB 102,15,56,222,225 DB 102,15,56,222,233 - movups xmm1,XMMWORD PTR[16+rcx] + movups xmm1,XMMWORD PTR[rax*1+rcx] + add rax,32 DB 102,15,56,222,208 DB 102,15,56,222,216 - lea rcx,QWORD PTR[32+rcx] DB 102,15,56,222,224 DB 102,15,56,222,232 - movups xmm0,XMMWORD PTR[rcx] + movups xmm0,XMMWORD PTR[((-16))+rax*1+rcx] jnz $L$dec_loop4 DB 102,15,56,222,209 @@ -189,43 +252,43 @@ _aesni_decrypt4 ENDP ALIGN 16 _aesni_encrypt6 PROC PRIVATE movups xmm0,XMMWORD PTR[rcx] - shr eax,1 + shl eax,4 movups xmm1,XMMWORD PTR[16+rcx] - lea rcx,QWORD PTR[32+rcx] xorps xmm2,xmm0 pxor xmm3,xmm0 -DB 102,15,56,220,209 pxor xmm4,xmm0 +DB 102,15,56,220,209 + lea rcx,QWORD PTR[32+rax*1+rcx] + neg rax DB 102,15,56,220,217 pxor xmm5,xmm0 -DB 102,15,56,220,225 pxor xmm6,xmm0 -DB 102,15,56,220,233 +DB 102,15,56,220,225 pxor xmm7,xmm0 - dec eax + add rax,16 +DB 102,15,56,220,233 DB 102,15,56,220,241 - movups xmm0,XMMWORD PTR[rcx] DB 102,15,56,220,249 + movups xmm0,XMMWORD PTR[((-16))+rax*1+rcx] jmp $L$enc_loop6_enter ALIGN 16 $L$enc_loop6:: DB 102,15,56,220,209 DB 102,15,56,220,217 - dec eax DB 102,15,56,220,225 DB 102,15,56,220,233 DB 102,15,56,220,241 DB 102,15,56,220,249 $L$enc_loop6_enter:: - movups xmm1,XMMWORD PTR[16+rcx] + movups xmm1,XMMWORD PTR[rax*1+rcx] + add rax,32 DB 102,15,56,220,208 DB 102,15,56,220,216 - lea rcx,QWORD PTR[32+rcx] DB 102,15,56,220,224 DB 102,15,56,220,232 DB 102,15,56,220,240 DB 102,15,56,220,248 - movups xmm0,XMMWORD PTR[rcx] + movups xmm0,XMMWORD PTR[((-16))+rax*1+rcx] jnz $L$enc_loop6 DB 102,15,56,220,209 @@ -246,43 +309,43 @@ _aesni_encrypt6 ENDP ALIGN 16 _aesni_decrypt6 PROC PRIVATE movups xmm0,XMMWORD PTR[rcx] - shr eax,1 + shl eax,4 movups xmm1,XMMWORD PTR[16+rcx] - lea rcx,QWORD PTR[32+rcx] xorps xmm2,xmm0 pxor xmm3,xmm0 -DB 102,15,56,222,209 pxor xmm4,xmm0 +DB 102,15,56,222,209 + lea rcx,QWORD PTR[32+rax*1+rcx] + neg rax DB 102,15,56,222,217 pxor xmm5,xmm0 -DB 102,15,56,222,225 pxor xmm6,xmm0 -DB 102,15,56,222,233 +DB 102,15,56,222,225 pxor xmm7,xmm0 - dec eax + add rax,16 +DB 102,15,56,222,233 DB 102,15,56,222,241 - movups xmm0,XMMWORD PTR[rcx] DB 102,15,56,222,249 + movups xmm0,XMMWORD PTR[((-16))+rax*1+rcx] jmp $L$dec_loop6_enter ALIGN 16 $L$dec_loop6:: DB 102,15,56,222,209 DB 102,15,56,222,217 - dec eax DB 102,15,56,222,225 DB 102,15,56,222,233 DB 102,15,56,222,241 DB 102,15,56,222,249 $L$dec_loop6_enter:: - movups xmm1,XMMWORD PTR[16+rcx] + movups xmm1,XMMWORD PTR[rax*1+rcx] + add rax,32 DB 102,15,56,222,208 DB 102,15,56,222,216 - lea rcx,QWORD PTR[32+rcx] DB 102,15,56,222,224 DB 102,15,56,222,232 DB 102,15,56,222,240 DB 102,15,56,222,248 - movups xmm0,XMMWORD PTR[rcx] + movups xmm0,XMMWORD PTR[((-16))+rax*1+rcx] jnz $L$dec_loop6 DB 102,15,56,222,209 @@ -303,52 +366,51 @@ _aesni_decrypt6 ENDP ALIGN 16 _aesni_encrypt8 PROC PRIVATE movups xmm0,XMMWORD PTR[rcx] - shr eax,1 + shl eax,4 movups xmm1,XMMWORD PTR[16+rcx] - lea rcx,QWORD PTR[32+rcx] xorps xmm2,xmm0 xorps xmm3,xmm0 -DB 102,15,56,220,209 pxor xmm4,xmm0 -DB 102,15,56,220,217 pxor xmm5,xmm0 -DB 102,15,56,220,225 pxor xmm6,xmm0 -DB 102,15,56,220,233 + lea rcx,QWORD PTR[32+rax*1+rcx] + neg rax +DB 102,15,56,220,209 + add rax,16 pxor xmm7,xmm0 - dec eax -DB 102,15,56,220,241 +DB 102,15,56,220,217 pxor xmm8,xmm0 -DB 102,15,56,220,249 pxor xmm9,xmm0 - movups xmm0,XMMWORD PTR[rcx] +DB 102,15,56,220,225 +DB 102,15,56,220,233 +DB 102,15,56,220,241 +DB 102,15,56,220,249 DB 102,68,15,56,220,193 DB 102,68,15,56,220,201 - movups xmm1,XMMWORD PTR[16+rcx] + movups xmm0,XMMWORD PTR[((-16))+rax*1+rcx] jmp $L$enc_loop8_enter ALIGN 16 $L$enc_loop8:: DB 102,15,56,220,209 DB 102,15,56,220,217 - dec eax DB 102,15,56,220,225 DB 102,15,56,220,233 DB 102,15,56,220,241 DB 102,15,56,220,249 DB 102,68,15,56,220,193 DB 102,68,15,56,220,201 - movups xmm1,XMMWORD PTR[16+rcx] $L$enc_loop8_enter:: + movups xmm1,XMMWORD PTR[rax*1+rcx] + add rax,32 DB 102,15,56,220,208 DB 102,15,56,220,216 - lea rcx,QWORD PTR[32+rcx] DB 102,15,56,220,224 DB 102,15,56,220,232 DB 102,15,56,220,240 DB 102,15,56,220,248 DB 102,68,15,56,220,192 DB 102,68,15,56,220,200 - movups xmm0,XMMWORD PTR[rcx] + movups xmm0,XMMWORD PTR[((-16))+rax*1+rcx] jnz $L$enc_loop8 DB 102,15,56,220,209 @@ -373,52 +435,51 @@ _aesni_encrypt8 ENDP ALIGN 16 _aesni_decrypt8 PROC PRIVATE movups xmm0,XMMWORD PTR[rcx] - shr eax,1 + shl eax,4 movups xmm1,XMMWORD PTR[16+rcx] - lea rcx,QWORD PTR[32+rcx] xorps xmm2,xmm0 xorps xmm3,xmm0 -DB 102,15,56,222,209 pxor xmm4,xmm0 -DB 102,15,56,222,217 pxor xmm5,xmm0 -DB 102,15,56,222,225 pxor xmm6,xmm0 -DB 102,15,56,222,233 + lea rcx,QWORD PTR[32+rax*1+rcx] + neg rax +DB 102,15,56,222,209 + add rax,16 pxor xmm7,xmm0 - dec eax -DB 102,15,56,222,241 +DB 102,15,56,222,217 pxor xmm8,xmm0 -DB 102,15,56,222,249 pxor xmm9,xmm0 - movups xmm0,XMMWORD PTR[rcx] +DB 102,15,56,222,225 +DB 102,15,56,222,233 +DB 102,15,56,222,241 +DB 102,15,56,222,249 DB 102,68,15,56,222,193 DB 102,68,15,56,222,201 - movups xmm1,XMMWORD PTR[16+rcx] + movups xmm0,XMMWORD PTR[((-16))+rax*1+rcx] jmp $L$dec_loop8_enter ALIGN 16 $L$dec_loop8:: DB 102,15,56,222,209 DB 102,15,56,222,217 - dec eax DB 102,15,56,222,225 DB 102,15,56,222,233 DB 102,15,56,222,241 DB 102,15,56,222,249 DB 102,68,15,56,222,193 DB 102,68,15,56,222,201 - movups xmm1,XMMWORD PTR[16+rcx] $L$dec_loop8_enter:: + movups xmm1,XMMWORD PTR[rax*1+rcx] + add rax,32 DB 102,15,56,222,208 DB 102,15,56,222,216 - lea rcx,QWORD PTR[32+rcx] DB 102,15,56,222,224 DB 102,15,56,222,232 DB 102,15,56,222,240 DB 102,15,56,222,248 DB 102,68,15,56,222,192 DB 102,68,15,56,222,200 - movups xmm0,XMMWORD PTR[rcx] + movups xmm0,XMMWORD PTR[((-16))+rax*1+rcx] jnz $L$dec_loop8 DB 102,15,56,222,209 @@ -565,14 +626,12 @@ DB 102,15,56,220,209 movups xmm1,XMMWORD PTR[rcx] lea rcx,QWORD PTR[16+rcx] jnz $L$oop_enc1_3 - DB 102,15,56,221,209 movups XMMWORD PTR[rsi],xmm2 jmp $L$ecb_ret ALIGN 16 $L$ecb_enc_two:: - xorps xmm4,xmm4 - call _aesni_encrypt3 + call _aesni_encrypt2 movups XMMWORD PTR[rsi],xmm2 movups XMMWORD PTR[16+rsi],xmm3 jmp $L$ecb_ret @@ -711,14 +770,12 @@ DB 102,15,56,222,209 movups xmm1,XMMWORD PTR[rcx] lea rcx,QWORD PTR[16+rcx] jnz $L$oop_dec1_4 - DB 102,15,56,223,209 movups XMMWORD PTR[rsi],xmm2 jmp $L$ecb_ret ALIGN 16 $L$ecb_dec_two:: - xorps xmm4,xmm4 - call _aesni_decrypt3 + call _aesni_decrypt2 movups XMMWORD PTR[rsi],xmm2 movups XMMWORD PTR[16+rsi],xmm3 jmp $L$ecb_ret @@ -792,53 +849,53 @@ $L$SEH_begin_aesni_ccm64_encrypt_blocks:: movaps XMMWORD PTR[48+rsp],xmm9 $L$ccm64_enc_body:: mov eax,DWORD PTR[240+rcx] - movdqu xmm9,XMMWORD PTR[r8] - movdqa xmm6,XMMWORD PTR[$L$increment64] + movdqu xmm6,XMMWORD PTR[r8] + movdqa xmm9,XMMWORD PTR[$L$increment64] movdqa xmm7,XMMWORD PTR[$L$bswap_mask] - shr eax,1 + shl eax,4 + mov r10d,16 lea r11,QWORD PTR[rcx] movdqu xmm3,XMMWORD PTR[r9] - movdqa xmm2,xmm9 - mov r10d,eax -DB 102,68,15,56,0,207 + movdqa xmm2,xmm6 + lea rcx,QWORD PTR[32+rax*1+rcx] +DB 102,15,56,0,247 + sub r10,rax jmp $L$ccm64_enc_outer ALIGN 16 $L$ccm64_enc_outer:: movups xmm0,XMMWORD PTR[r11] - mov eax,r10d + mov rax,r10 movups xmm8,XMMWORD PTR[rdi] xorps xmm2,xmm0 movups xmm1,XMMWORD PTR[16+r11] xorps xmm0,xmm8 - lea rcx,QWORD PTR[32+r11] xorps xmm3,xmm0 - movups xmm0,XMMWORD PTR[rcx] + movups xmm0,XMMWORD PTR[32+r11] $L$ccm64_enc2_loop:: DB 102,15,56,220,209 - dec eax DB 102,15,56,220,217 - movups xmm1,XMMWORD PTR[16+rcx] + movups xmm1,XMMWORD PTR[rax*1+rcx] + add rax,32 DB 102,15,56,220,208 - lea rcx,QWORD PTR[32+rcx] DB 102,15,56,220,216 - movups xmm0,XMMWORD PTR[rcx] + movups xmm0,XMMWORD PTR[((-16))+rax*1+rcx] jnz $L$ccm64_enc2_loop DB 102,15,56,220,209 DB 102,15,56,220,217 - paddq xmm9,xmm6 + paddq xmm6,xmm9 + dec rdx DB 102,15,56,221,208 DB 102,15,56,221,216 - dec rdx lea rdi,QWORD PTR[16+rdi] xorps xmm8,xmm2 - movdqa xmm2,xmm9 + movdqa xmm2,xmm6 movups XMMWORD PTR[rsi],xmm8 - lea rsi,QWORD PTR[16+rsi] DB 102,15,56,0,215 + lea rsi,QWORD PTR[16+rsi] jnz $L$ccm64_enc_outer movups XMMWORD PTR[r9],xmm3 @@ -876,15 +933,15 @@ $L$SEH_begin_aesni_ccm64_decrypt_blocks:: movaps XMMWORD PTR[48+rsp],xmm9 $L$ccm64_dec_body:: mov eax,DWORD PTR[240+rcx] - movups xmm9,XMMWORD PTR[r8] + movups xmm6,XMMWORD PTR[r8] movdqu xmm3,XMMWORD PTR[r9] - movdqa xmm6,XMMWORD PTR[$L$increment64] + movdqa xmm9,XMMWORD PTR[$L$increment64] movdqa xmm7,XMMWORD PTR[$L$bswap_mask] - movaps xmm2,xmm9 + movaps xmm2,xmm6 mov r10d,eax mov r11,rcx -DB 102,68,15,56,0,207 +DB 102,15,56,0,247 movups xmm0,XMMWORD PTR[rcx] movups xmm1,XMMWORD PTR[16+rcx] lea rcx,QWORD PTR[32+rcx] @@ -895,17 +952,20 @@ DB 102,15,56,220,209 movups xmm1,XMMWORD PTR[rcx] lea rcx,QWORD PTR[16+rcx] jnz $L$oop_enc1_5 - DB 102,15,56,221,209 + shl r10d,4 + mov eax,16 movups xmm8,XMMWORD PTR[rdi] - paddq xmm9,xmm6 + paddq xmm6,xmm9 lea rdi,QWORD PTR[16+rdi] + sub rax,r10 + lea rcx,QWORD PTR[32+r10*1+r11] + mov r10,rax jmp $L$ccm64_dec_outer ALIGN 16 $L$ccm64_dec_outer:: xorps xmm8,xmm2 - movdqa xmm2,xmm9 - mov eax,r10d + movdqa xmm2,xmm6 movups XMMWORD PTR[rsi],xmm8 lea rsi,QWORD PTR[16+rsi] DB 102,15,56,0,215 @@ -914,36 +974,36 @@ DB 102,15,56,0,215 jz $L$ccm64_dec_break movups xmm0,XMMWORD PTR[r11] - shr eax,1 + mov rax,r10 movups xmm1,XMMWORD PTR[16+r11] xorps xmm8,xmm0 - lea rcx,QWORD PTR[32+r11] xorps xmm2,xmm0 xorps xmm3,xmm8 - movups xmm0,XMMWORD PTR[rcx] - + movups xmm0,XMMWORD PTR[32+r11] + jmp $L$ccm64_dec2_loop +ALIGN 16 $L$ccm64_dec2_loop:: DB 102,15,56,220,209 - dec eax DB 102,15,56,220,217 - movups xmm1,XMMWORD PTR[16+rcx] + movups xmm1,XMMWORD PTR[rax*1+rcx] + add rax,32 DB 102,15,56,220,208 - lea rcx,QWORD PTR[32+rcx] DB 102,15,56,220,216 - movups xmm0,XMMWORD PTR[rcx] + movups xmm0,XMMWORD PTR[((-16))+rax*1+rcx] jnz $L$ccm64_dec2_loop movups xmm8,XMMWORD PTR[rdi] - paddq xmm9,xmm6 + paddq xmm6,xmm9 DB 102,15,56,220,209 DB 102,15,56,220,217 - lea rdi,QWORD PTR[16+rdi] DB 102,15,56,221,208 DB 102,15,56,221,216 + lea rdi,QWORD PTR[16+rdi] jmp $L$ccm64_dec_outer ALIGN 16 $L$ccm64_dec_break:: + mov eax,DWORD PTR[240+r11] movups xmm0,XMMWORD PTR[r11] movups xmm1,XMMWORD PTR[16+r11] xorps xmm8,xmm0 @@ -955,7 +1015,6 @@ DB 102,15,56,220,217 movups xmm1,XMMWORD PTR[r11] lea r11,QWORD PTR[16+r11] jnz $L$oop_enc1_6 - DB 102,15,56,221,217 movups XMMWORD PTR[r9],xmm3 movaps xmm6,XMMWORD PTR[rsp] @@ -984,211 +1043,529 @@ $L$SEH_begin_aesni_ctr32_encrypt_blocks:: mov r8,QWORD PTR[40+rsp] - lea rsp,QWORD PTR[((-200))+rsp] - movaps XMMWORD PTR[32+rsp],xmm6 - movaps XMMWORD PTR[48+rsp],xmm7 - movaps XMMWORD PTR[64+rsp],xmm8 - movaps XMMWORD PTR[80+rsp],xmm9 - movaps XMMWORD PTR[96+rsp],xmm10 - movaps XMMWORD PTR[112+rsp],xmm11 - movaps XMMWORD PTR[128+rsp],xmm12 - movaps XMMWORD PTR[144+rsp],xmm13 - movaps XMMWORD PTR[160+rsp],xmm14 - movaps XMMWORD PTR[176+rsp],xmm15 + lea rax,QWORD PTR[rsp] + push rbp + sub rsp,288 + and rsp,-16 + movaps XMMWORD PTR[(-168)+rax],xmm6 + movaps XMMWORD PTR[(-152)+rax],xmm7 + movaps XMMWORD PTR[(-136)+rax],xmm8 + movaps XMMWORD PTR[(-120)+rax],xmm9 + movaps XMMWORD PTR[(-104)+rax],xmm10 + movaps XMMWORD PTR[(-88)+rax],xmm11 + movaps XMMWORD PTR[(-72)+rax],xmm12 + movaps XMMWORD PTR[(-56)+rax],xmm13 + movaps XMMWORD PTR[(-40)+rax],xmm14 + movaps XMMWORD PTR[(-24)+rax],xmm15 $L$ctr32_body:: + lea rbp,QWORD PTR[((-8))+rax] + cmp rdx,1 je $L$ctr32_one_shortcut - movdqu xmm14,XMMWORD PTR[r8] - movdqa xmm15,XMMWORD PTR[$L$bswap_mask] - xor eax,eax -DB 102,69,15,58,22,242,3 -DB 102,68,15,58,34,240,3 - + movdqu xmm2,XMMWORD PTR[r8] + movdqu xmm0,XMMWORD PTR[rcx] + mov r8d,DWORD PTR[12+r8] + pxor xmm2,xmm0 + mov r11d,DWORD PTR[12+rcx] + movdqa XMMWORD PTR[rsp],xmm2 + bswap r8d + movdqa xmm3,xmm2 + movdqa xmm4,xmm2 + movdqa xmm5,xmm2 + movdqa XMMWORD PTR[64+rsp],xmm2 + movdqa XMMWORD PTR[80+rsp],xmm2 + movdqa XMMWORD PTR[96+rsp],xmm2 + mov r10,rdx + movdqa XMMWORD PTR[112+rsp],xmm2 + + lea rax,QWORD PTR[1+r8] + lea rdx,QWORD PTR[2+r8] + bswap eax + bswap edx + xor eax,r11d + xor edx,r11d +DB 102,15,58,34,216,3 + lea rax,QWORD PTR[3+r8] + movdqa XMMWORD PTR[16+rsp],xmm3 +DB 102,15,58,34,226,3 + bswap eax + mov rdx,r10 + lea r10,QWORD PTR[4+r8] + movdqa XMMWORD PTR[32+rsp],xmm4 + xor eax,r11d + bswap r10d +DB 102,15,58,34,232,3 + xor r10d,r11d + movdqa XMMWORD PTR[48+rsp],xmm5 + lea r9,QWORD PTR[5+r8] + mov DWORD PTR[((64+12))+rsp],r10d + bswap r9d + lea r10,QWORD PTR[6+r8] mov eax,DWORD PTR[240+rcx] + xor r9d,r11d bswap r10d - pxor xmm12,xmm12 - pxor xmm13,xmm13 -DB 102,69,15,58,34,226,0 - lea r11,QWORD PTR[3+r10] -DB 102,69,15,58,34,235,0 - inc r10d -DB 102,69,15,58,34,226,1 - inc r11 -DB 102,69,15,58,34,235,1 - inc r10d -DB 102,69,15,58,34,226,2 - inc r11 -DB 102,69,15,58,34,235,2 - movdqa XMMWORD PTR[rsp],xmm12 -DB 102,69,15,56,0,231 - movdqa XMMWORD PTR[16+rsp],xmm13 -DB 102,69,15,56,0,239 - - pshufd xmm2,xmm12,192 - pshufd xmm3,xmm12,128 - pshufd xmm4,xmm12,64 - cmp rdx,6 + mov DWORD PTR[((80+12))+rsp],r9d + xor r10d,r11d + lea r9,QWORD PTR[7+r8] + mov DWORD PTR[((96+12))+rsp],r10d + bswap r9d + mov r10d,DWORD PTR[((OPENSSL_ia32cap_P+4))] + xor r9d,r11d + and r10d,71303168 + mov DWORD PTR[((112+12))+rsp],r9d + + movups xmm1,XMMWORD PTR[16+rcx] + + movdqa xmm6,XMMWORD PTR[64+rsp] + movdqa xmm7,XMMWORD PTR[80+rsp] + + cmp rdx,8 jb $L$ctr32_tail - shr eax,1 - mov r11,rcx - mov r10d,eax + sub rdx,6 + cmp r10d,4194304 + je $L$ctr32_6x + + lea rcx,QWORD PTR[128+rcx] + sub rdx,2 + jmp $L$ctr32_loop8 + +ALIGN 16 +$L$ctr32_6x:: + shl eax,4 + mov r10d,48 + bswap r11d + lea rcx,QWORD PTR[32+rax*1+rcx] + sub r10,rax jmp $L$ctr32_loop6 ALIGN 16 $L$ctr32_loop6:: - pshufd xmm5,xmm13,192 - por xmm2,xmm14 - movups xmm0,XMMWORD PTR[r11] - pshufd xmm6,xmm13,128 - por xmm3,xmm14 - movups xmm1,XMMWORD PTR[16+r11] - pshufd xmm7,xmm13,64 - por xmm4,xmm14 - por xmm5,xmm14 - xorps xmm2,xmm0 - por xmm6,xmm14 - por xmm7,xmm14 + add r8d,6 + movups xmm0,XMMWORD PTR[((-48))+r10*1+rcx] +DB 102,15,56,220,209 + mov eax,r8d + xor eax,r11d +DB 102,15,56,220,217 +DB 00fh,038h,0f1h,044h,024h,12 + lea eax,DWORD PTR[1+r8] +DB 102,15,56,220,225 + xor eax,r11d +DB 00fh,038h,0f1h,044h,024h,28 +DB 102,15,56,220,233 + lea eax,DWORD PTR[2+r8] + xor eax,r11d +DB 102,15,56,220,241 +DB 00fh,038h,0f1h,044h,024h,44 + lea eax,DWORD PTR[3+r8] +DB 102,15,56,220,249 + movups xmm1,XMMWORD PTR[((-32))+r10*1+rcx] + xor eax,r11d + +DB 102,15,56,220,208 +DB 00fh,038h,0f1h,044h,024h,60 + lea eax,DWORD PTR[4+r8] +DB 102,15,56,220,216 + xor eax,r11d +DB 00fh,038h,0f1h,044h,024h,76 +DB 102,15,56,220,224 + lea eax,DWORD PTR[5+r8] + xor eax,r11d +DB 102,15,56,220,232 +DB 00fh,038h,0f1h,044h,024h,92 + mov rax,r10 +DB 102,15,56,220,240 +DB 102,15,56,220,248 + movups xmm0,XMMWORD PTR[((-16))+r10*1+rcx] + + call $L$enc_loop6 + + movdqu xmm8,XMMWORD PTR[rdi] + movdqu xmm9,XMMWORD PTR[16+rdi] + movdqu xmm10,XMMWORD PTR[32+rdi] + movdqu xmm11,XMMWORD PTR[48+rdi] + movdqu xmm12,XMMWORD PTR[64+rdi] + movdqu xmm13,XMMWORD PTR[80+rdi] + lea rdi,QWORD PTR[96+rdi] + movups xmm1,XMMWORD PTR[((-64))+r10*1+rcx] + pxor xmm8,xmm2 + movaps xmm2,XMMWORD PTR[rsp] + pxor xmm9,xmm3 + movaps xmm3,XMMWORD PTR[16+rsp] + pxor xmm10,xmm4 + movaps xmm4,XMMWORD PTR[32+rsp] + pxor xmm11,xmm5 + movaps xmm5,XMMWORD PTR[48+rsp] + pxor xmm12,xmm6 + movaps xmm6,XMMWORD PTR[64+rsp] + pxor xmm13,xmm7 + movaps xmm7,XMMWORD PTR[80+rsp] + movdqu XMMWORD PTR[rsi],xmm8 + movdqu XMMWORD PTR[16+rsi],xmm9 + movdqu XMMWORD PTR[32+rsi],xmm10 + movdqu XMMWORD PTR[48+rsi],xmm11 + movdqu XMMWORD PTR[64+rsi],xmm12 + movdqu XMMWORD PTR[80+rsi],xmm13 + lea rsi,QWORD PTR[96+rsi] + sub rdx,6 + jnc $L$ctr32_loop6 + add rdx,6 + jz $L$ctr32_done + lea eax,DWORD PTR[((-48))+r10] + lea rcx,QWORD PTR[((-80))+r10*1+rcx] + neg eax + shr eax,4 + jmp $L$ctr32_tail - pxor xmm3,xmm0 +ALIGN 32 +$L$ctr32_loop8:: + add r8d,8 + movdqa xmm8,XMMWORD PTR[96+rsp] DB 102,15,56,220,209 - lea rcx,QWORD PTR[32+r11] - pxor xmm4,xmm0 + mov r9d,r8d + movdqa xmm9,XMMWORD PTR[112+rsp] DB 102,15,56,220,217 - movdqa xmm13,XMMWORD PTR[$L$increment32] - pxor xmm5,xmm0 + bswap r9d + movups xmm0,XMMWORD PTR[((32-128))+rcx] DB 102,15,56,220,225 - movdqa xmm12,XMMWORD PTR[rsp] - pxor xmm6,xmm0 + xor r9d,r11d + nop DB 102,15,56,220,233 - pxor xmm7,xmm0 - movups xmm0,XMMWORD PTR[rcx] - dec eax + mov DWORD PTR[((0+12))+rsp],r9d + lea r9,QWORD PTR[1+r8] DB 102,15,56,220,241 DB 102,15,56,220,249 - jmp $L$ctr32_enc_loop6_enter -ALIGN 16 -$L$ctr32_enc_loop6:: +DB 102,68,15,56,220,193 +DB 102,68,15,56,220,201 + movups xmm1,XMMWORD PTR[((48-128))+rcx] + bswap r9d +DB 102,15,56,220,208 +DB 102,15,56,220,216 + xor r9d,r11d +DB 066h,090h +DB 102,15,56,220,224 +DB 102,15,56,220,232 + mov DWORD PTR[((16+12))+rsp],r9d + lea r9,QWORD PTR[2+r8] +DB 102,15,56,220,240 +DB 102,15,56,220,248 +DB 102,68,15,56,220,192 +DB 102,68,15,56,220,200 + movups xmm0,XMMWORD PTR[((64-128))+rcx] + bswap r9d DB 102,15,56,220,209 DB 102,15,56,220,217 - dec eax + xor r9d,r11d +DB 066h,090h DB 102,15,56,220,225 DB 102,15,56,220,233 + mov DWORD PTR[((32+12))+rsp],r9d + lea r9,QWORD PTR[3+r8] DB 102,15,56,220,241 DB 102,15,56,220,249 -$L$ctr32_enc_loop6_enter:: - movups xmm1,XMMWORD PTR[16+rcx] +DB 102,68,15,56,220,193 +DB 102,68,15,56,220,201 + movups xmm1,XMMWORD PTR[((80-128))+rcx] + bswap r9d DB 102,15,56,220,208 DB 102,15,56,220,216 - lea rcx,QWORD PTR[32+rcx] + xor r9d,r11d +DB 066h,090h DB 102,15,56,220,224 DB 102,15,56,220,232 + mov DWORD PTR[((48+12))+rsp],r9d + lea r9,QWORD PTR[4+r8] DB 102,15,56,220,240 DB 102,15,56,220,248 - movups xmm0,XMMWORD PTR[rcx] - jnz $L$ctr32_enc_loop6 - +DB 102,68,15,56,220,192 +DB 102,68,15,56,220,200 + movups xmm0,XMMWORD PTR[((96-128))+rcx] + bswap r9d DB 102,15,56,220,209 - paddd xmm12,xmm13 DB 102,15,56,220,217 - paddd xmm13,XMMWORD PTR[16+rsp] + xor r9d,r11d +DB 066h,090h DB 102,15,56,220,225 - movdqa XMMWORD PTR[rsp],xmm12 DB 102,15,56,220,233 - movdqa XMMWORD PTR[16+rsp],xmm13 + mov DWORD PTR[((64+12))+rsp],r9d + lea r9,QWORD PTR[5+r8] DB 102,15,56,220,241 -DB 102,69,15,56,0,231 DB 102,15,56,220,249 -DB 102,69,15,56,0,239 - -DB 102,15,56,221,208 - movups xmm8,XMMWORD PTR[rdi] -DB 102,15,56,221,216 - movups xmm9,XMMWORD PTR[16+rdi] -DB 102,15,56,221,224 - movups xmm10,XMMWORD PTR[32+rdi] -DB 102,15,56,221,232 - movups xmm11,XMMWORD PTR[48+rdi] -DB 102,15,56,221,240 - movups xmm1,XMMWORD PTR[64+rdi] -DB 102,15,56,221,248 - movups xmm0,XMMWORD PTR[80+rdi] - lea rdi,QWORD PTR[96+rdi] - - xorps xmm8,xmm2 - pshufd xmm2,xmm12,192 - xorps xmm9,xmm3 - pshufd xmm3,xmm12,128 - movups XMMWORD PTR[rsi],xmm8 - xorps xmm10,xmm4 - pshufd xmm4,xmm12,64 - movups XMMWORD PTR[16+rsi],xmm9 - xorps xmm11,xmm5 - movups XMMWORD PTR[32+rsi],xmm10 - xorps xmm1,xmm6 - movups XMMWORD PTR[48+rsi],xmm11 - xorps xmm0,xmm7 - movups XMMWORD PTR[64+rsi],xmm1 - movups XMMWORD PTR[80+rsi],xmm0 - lea rsi,QWORD PTR[96+rsi] - mov eax,r10d - sub rdx,6 - jnc $L$ctr32_loop6 - - add rdx,6 - jz $L$ctr32_done - mov rcx,r11 - lea eax,DWORD PTR[1+rax*1+rax] - -$L$ctr32_tail:: - por xmm2,xmm14 - movups xmm8,XMMWORD PTR[rdi] - cmp rdx,2 - jb $L$ctr32_one +DB 102,68,15,56,220,193 +DB 102,68,15,56,220,201 + movups xmm1,XMMWORD PTR[((112-128))+rcx] + bswap r9d +DB 102,15,56,220,208 +DB 102,15,56,220,216 + xor r9d,r11d +DB 066h,090h +DB 102,15,56,220,224 +DB 102,15,56,220,232 + mov DWORD PTR[((80+12))+rsp],r9d + lea r9,QWORD PTR[6+r8] +DB 102,15,56,220,240 +DB 102,15,56,220,248 +DB 102,68,15,56,220,192 +DB 102,68,15,56,220,200 + movups xmm0,XMMWORD PTR[((128-128))+rcx] + bswap r9d +DB 102,15,56,220,209 +DB 102,15,56,220,217 + xor r9d,r11d +DB 066h,090h +DB 102,15,56,220,225 +DB 102,15,56,220,233 + mov DWORD PTR[((96+12))+rsp],r9d + lea r9,QWORD PTR[7+r8] +DB 102,15,56,220,241 +DB 102,15,56,220,249 +DB 102,68,15,56,220,193 +DB 102,68,15,56,220,201 + movups xmm1,XMMWORD PTR[((144-128))+rcx] + bswap r9d +DB 102,15,56,220,208 +DB 102,15,56,220,216 +DB 102,15,56,220,224 + xor r9d,r11d + movdqu xmm10,XMMWORD PTR[rdi] +DB 102,15,56,220,232 + mov DWORD PTR[((112+12))+rsp],r9d + cmp eax,11 +DB 102,15,56,220,240 +DB 102,15,56,220,248 +DB 102,68,15,56,220,192 +DB 102,68,15,56,220,200 + movups xmm0,XMMWORD PTR[((160-128))+rcx] + + jb $L$ctr32_enc_done - por xmm3,xmm14 - movups xmm9,XMMWORD PTR[16+rdi] - je $L$ctr32_two +DB 102,15,56,220,209 +DB 102,15,56,220,217 +DB 102,15,56,220,225 +DB 102,15,56,220,233 +DB 102,15,56,220,241 +DB 102,15,56,220,249 +DB 102,68,15,56,220,193 +DB 102,68,15,56,220,201 + movups xmm1,XMMWORD PTR[((176-128))+rcx] + +DB 102,15,56,220,208 +DB 102,15,56,220,216 +DB 102,15,56,220,224 +DB 102,15,56,220,232 +DB 102,15,56,220,240 +DB 102,15,56,220,248 +DB 102,68,15,56,220,192 +DB 102,68,15,56,220,200 + movups xmm0,XMMWORD PTR[((192-128))+rcx] + je $L$ctr32_enc_done + +DB 102,15,56,220,209 +DB 102,15,56,220,217 +DB 102,15,56,220,225 +DB 102,15,56,220,233 +DB 102,15,56,220,241 +DB 102,15,56,220,249 +DB 102,68,15,56,220,193 +DB 102,68,15,56,220,201 + movups xmm1,XMMWORD PTR[((208-128))+rcx] - pshufd xmm5,xmm13,192 - por xmm4,xmm14 - movups xmm10,XMMWORD PTR[32+rdi] +DB 102,15,56,220,208 +DB 102,15,56,220,216 +DB 102,15,56,220,224 +DB 102,15,56,220,232 +DB 102,15,56,220,240 +DB 102,15,56,220,248 +DB 102,68,15,56,220,192 +DB 102,68,15,56,220,200 + movups xmm0,XMMWORD PTR[((224-128))+rcx] + jmp $L$ctr32_enc_done + +ALIGN 16 +$L$ctr32_enc_done:: + movdqu xmm11,XMMWORD PTR[16+rdi] + pxor xmm10,xmm0 + movdqu xmm12,XMMWORD PTR[32+rdi] + pxor xmm11,xmm0 + movdqu xmm13,XMMWORD PTR[48+rdi] + pxor xmm12,xmm0 + movdqu xmm14,XMMWORD PTR[64+rdi] + pxor xmm13,xmm0 + movdqu xmm15,XMMWORD PTR[80+rdi] + pxor xmm14,xmm0 + pxor xmm15,xmm0 +DB 102,15,56,220,209 +DB 102,15,56,220,217 +DB 102,15,56,220,225 +DB 102,15,56,220,233 +DB 102,15,56,220,241 +DB 102,15,56,220,249 +DB 102,68,15,56,220,193 +DB 102,68,15,56,220,201 + movdqu xmm1,XMMWORD PTR[96+rdi] + lea rdi,QWORD PTR[128+rdi] + +DB 102,65,15,56,221,210 + pxor xmm1,xmm0 + movdqu xmm10,XMMWORD PTR[((112-128))+rdi] +DB 102,65,15,56,221,219 + pxor xmm10,xmm0 + movdqa xmm11,XMMWORD PTR[rsp] +DB 102,65,15,56,221,228 +DB 102,65,15,56,221,237 + movdqa xmm12,XMMWORD PTR[16+rsp] + movdqa xmm13,XMMWORD PTR[32+rsp] +DB 102,65,15,56,221,246 +DB 102,65,15,56,221,255 + movdqa xmm14,XMMWORD PTR[48+rsp] + movdqa xmm15,XMMWORD PTR[64+rsp] +DB 102,68,15,56,221,193 + movdqa xmm0,XMMWORD PTR[80+rsp] + movups xmm1,XMMWORD PTR[((16-128))+rcx] +DB 102,69,15,56,221,202 + + movups XMMWORD PTR[rsi],xmm2 + movdqa xmm2,xmm11 + movups XMMWORD PTR[16+rsi],xmm3 + movdqa xmm3,xmm12 + movups XMMWORD PTR[32+rsi],xmm4 + movdqa xmm4,xmm13 + movups XMMWORD PTR[48+rsi],xmm5 + movdqa xmm5,xmm14 + movups XMMWORD PTR[64+rsi],xmm6 + movdqa xmm6,xmm15 + movups XMMWORD PTR[80+rsi],xmm7 + movdqa xmm7,xmm0 + movups XMMWORD PTR[96+rsi],xmm8 + movups XMMWORD PTR[112+rsi],xmm9 + lea rsi,QWORD PTR[128+rsi] + + sub rdx,8 + jnc $L$ctr32_loop8 + + add rdx,8 + jz $L$ctr32_done + lea rcx,QWORD PTR[((-128))+rcx] + +$L$ctr32_tail:: + lea rcx,QWORD PTR[16+rcx] cmp rdx,4 - jb $L$ctr32_three + jb $L$ctr32_loop3 + je $L$ctr32_loop4 - pshufd xmm6,xmm13,128 - por xmm5,xmm14 - movups xmm11,XMMWORD PTR[48+rdi] - je $L$ctr32_four + shl eax,4 + movdqa xmm8,XMMWORD PTR[96+rsp] + pxor xmm9,xmm9 - por xmm6,xmm14 - xorps xmm7,xmm7 + movups xmm0,XMMWORD PTR[16+rcx] +DB 102,15,56,220,209 +DB 102,15,56,220,217 + lea rcx,QWORD PTR[((32-16))+rax*1+rcx] + neg rax +DB 102,15,56,220,225 + add rax,16 + movups xmm10,XMMWORD PTR[rdi] +DB 102,15,56,220,233 +DB 102,15,56,220,241 + movups xmm11,XMMWORD PTR[16+rdi] + movups xmm12,XMMWORD PTR[32+rdi] +DB 102,15,56,220,249 +DB 102,68,15,56,220,193 - call _aesni_encrypt6 + call $L$enc_loop8_enter - movups xmm1,XMMWORD PTR[64+rdi] - xorps xmm8,xmm2 - xorps xmm9,xmm3 - movups XMMWORD PTR[rsi],xmm8 - xorps xmm10,xmm4 - movups XMMWORD PTR[16+rsi],xmm9 - xorps xmm11,xmm5 - movups XMMWORD PTR[32+rsi],xmm10 - xorps xmm1,xmm6 - movups XMMWORD PTR[48+rsi],xmm11 - movups XMMWORD PTR[64+rsi],xmm1 + movdqu xmm13,XMMWORD PTR[48+rdi] + pxor xmm2,xmm10 + movdqu xmm10,XMMWORD PTR[64+rdi] + pxor xmm3,xmm11 + movdqu XMMWORD PTR[rsi],xmm2 + pxor xmm4,xmm12 + movdqu XMMWORD PTR[16+rsi],xmm3 + pxor xmm5,xmm13 + movdqu XMMWORD PTR[32+rsi],xmm4 + pxor xmm6,xmm10 + movdqu XMMWORD PTR[48+rsi],xmm5 + movdqu XMMWORD PTR[64+rsi],xmm6 + cmp rdx,6 + jb $L$ctr32_done + + movups xmm11,XMMWORD PTR[80+rdi] + xorps xmm7,xmm11 + movups XMMWORD PTR[80+rsi],xmm7 + je $L$ctr32_done + + movups xmm12,XMMWORD PTR[96+rdi] + xorps xmm8,xmm12 + movups XMMWORD PTR[96+rsi],xmm8 + jmp $L$ctr32_done + +ALIGN 32 +$L$ctr32_loop4:: +DB 102,15,56,220,209 + lea rcx,QWORD PTR[16+rcx] + dec eax +DB 102,15,56,220,217 +DB 102,15,56,220,225 +DB 102,15,56,220,233 + movups xmm1,XMMWORD PTR[rcx] + jnz $L$ctr32_loop4 +DB 102,15,56,221,209 +DB 102,15,56,221,217 + movups xmm10,XMMWORD PTR[rdi] + movups xmm11,XMMWORD PTR[16+rdi] +DB 102,15,56,221,225 +DB 102,15,56,221,233 + movups xmm12,XMMWORD PTR[32+rdi] + movups xmm13,XMMWORD PTR[48+rdi] + + xorps xmm2,xmm10 + movups XMMWORD PTR[rsi],xmm2 + xorps xmm3,xmm11 + movups XMMWORD PTR[16+rsi],xmm3 + pxor xmm4,xmm12 + movdqu XMMWORD PTR[32+rsi],xmm4 + pxor xmm5,xmm13 + movdqu XMMWORD PTR[48+rsi],xmm5 + jmp $L$ctr32_done + +ALIGN 32 +$L$ctr32_loop3:: +DB 102,15,56,220,209 + lea rcx,QWORD PTR[16+rcx] + dec eax +DB 102,15,56,220,217 +DB 102,15,56,220,225 + movups xmm1,XMMWORD PTR[rcx] + jnz $L$ctr32_loop3 +DB 102,15,56,221,209 +DB 102,15,56,221,217 +DB 102,15,56,221,225 + + movups xmm10,XMMWORD PTR[rdi] + xorps xmm2,xmm10 + movups XMMWORD PTR[rsi],xmm2 + cmp rdx,2 + jb $L$ctr32_done + + movups xmm11,XMMWORD PTR[16+rdi] + xorps xmm3,xmm11 + movups XMMWORD PTR[16+rsi],xmm3 + je $L$ctr32_done + + movups xmm12,XMMWORD PTR[32+rdi] + xorps xmm4,xmm12 + movups XMMWORD PTR[32+rsi],xmm4 jmp $L$ctr32_done ALIGN 16 $L$ctr32_one_shortcut:: movups xmm2,XMMWORD PTR[r8] - movups xmm8,XMMWORD PTR[rdi] + movups xmm10,XMMWORD PTR[rdi] mov eax,DWORD PTR[240+rcx] -$L$ctr32_one:: movups xmm0,XMMWORD PTR[rcx] movups xmm1,XMMWORD PTR[16+rcx] lea rcx,QWORD PTR[32+rcx] @@ -1199,58 +1576,26 @@ DB 102,15,56,220,209 movups xmm1,XMMWORD PTR[rcx] lea rcx,QWORD PTR[16+rcx] jnz $L$oop_enc1_7 - DB 102,15,56,221,209 - xorps xmm8,xmm2 - movups XMMWORD PTR[rsi],xmm8 - jmp $L$ctr32_done - -ALIGN 16 -$L$ctr32_two:: - xorps xmm4,xmm4 - call _aesni_encrypt3 - xorps xmm8,xmm2 - xorps xmm9,xmm3 - movups XMMWORD PTR[rsi],xmm8 - movups XMMWORD PTR[16+rsi],xmm9 - jmp $L$ctr32_done - -ALIGN 16 -$L$ctr32_three:: - call _aesni_encrypt3 - xorps xmm8,xmm2 - xorps xmm9,xmm3 - movups XMMWORD PTR[rsi],xmm8 - xorps xmm10,xmm4 - movups XMMWORD PTR[16+rsi],xmm9 - movups XMMWORD PTR[32+rsi],xmm10 + xorps xmm2,xmm10 + movups XMMWORD PTR[rsi],xmm2 jmp $L$ctr32_done ALIGN 16 -$L$ctr32_four:: - call _aesni_encrypt4 - xorps xmm8,xmm2 - xorps xmm9,xmm3 - movups XMMWORD PTR[rsi],xmm8 - xorps xmm10,xmm4 - movups XMMWORD PTR[16+rsi],xmm9 - xorps xmm11,xmm5 - movups XMMWORD PTR[32+rsi],xmm10 - movups XMMWORD PTR[48+rsi],xmm11 - $L$ctr32_done:: - movaps xmm6,XMMWORD PTR[32+rsp] - movaps xmm7,XMMWORD PTR[48+rsp] - movaps xmm8,XMMWORD PTR[64+rsp] - movaps xmm9,XMMWORD PTR[80+rsp] - movaps xmm10,XMMWORD PTR[96+rsp] - movaps xmm11,XMMWORD PTR[112+rsp] - movaps xmm12,XMMWORD PTR[128+rsp] - movaps xmm13,XMMWORD PTR[144+rsp] - movaps xmm14,XMMWORD PTR[160+rsp] - movaps xmm15,XMMWORD PTR[176+rsp] - lea rsp,QWORD PTR[200+rsp] -$L$ctr32_ret:: + movaps xmm6,XMMWORD PTR[((-160))+rbp] + movaps xmm7,XMMWORD PTR[((-144))+rbp] + movaps xmm8,XMMWORD PTR[((-128))+rbp] + movaps xmm9,XMMWORD PTR[((-112))+rbp] + movaps xmm10,XMMWORD PTR[((-96))+rbp] + movaps xmm11,XMMWORD PTR[((-80))+rbp] + movaps xmm12,XMMWORD PTR[((-64))+rbp] + movaps xmm13,XMMWORD PTR[((-48))+rbp] + movaps xmm14,XMMWORD PTR[((-32))+rbp] + movaps xmm15,XMMWORD PTR[((-16))+rbp] + lea rsp,QWORD PTR[rbp] + pop rbp +$L$ctr32_epilogue:: mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue mov rsi,QWORD PTR[16+rsp] DB 0F3h,0C3h ;repret @@ -1272,255 +1617,297 @@ $L$SEH_begin_aesni_xts_encrypt:: mov r9,QWORD PTR[48+rsp] - lea rsp,QWORD PTR[((-264))+rsp] - movaps XMMWORD PTR[96+rsp],xmm6 - movaps XMMWORD PTR[112+rsp],xmm7 - movaps XMMWORD PTR[128+rsp],xmm8 - movaps XMMWORD PTR[144+rsp],xmm9 - movaps XMMWORD PTR[160+rsp],xmm10 - movaps XMMWORD PTR[176+rsp],xmm11 - movaps XMMWORD PTR[192+rsp],xmm12 - movaps XMMWORD PTR[208+rsp],xmm13 - movaps XMMWORD PTR[224+rsp],xmm14 - movaps XMMWORD PTR[240+rsp],xmm15 + lea rax,QWORD PTR[rsp] + push rbp + sub rsp,272 + and rsp,-16 + movaps XMMWORD PTR[(-168)+rax],xmm6 + movaps XMMWORD PTR[(-152)+rax],xmm7 + movaps XMMWORD PTR[(-136)+rax],xmm8 + movaps XMMWORD PTR[(-120)+rax],xmm9 + movaps XMMWORD PTR[(-104)+rax],xmm10 + movaps XMMWORD PTR[(-88)+rax],xmm11 + movaps XMMWORD PTR[(-72)+rax],xmm12 + movaps XMMWORD PTR[(-56)+rax],xmm13 + movaps XMMWORD PTR[(-40)+rax],xmm14 + movaps XMMWORD PTR[(-24)+rax],xmm15 $L$xts_enc_body:: - movups xmm15,XMMWORD PTR[r9] + lea rbp,QWORD PTR[((-8))+rax] + movups xmm2,XMMWORD PTR[r9] mov eax,DWORD PTR[240+r8] mov r10d,DWORD PTR[240+rcx] movups xmm0,XMMWORD PTR[r8] movups xmm1,XMMWORD PTR[16+r8] lea r8,QWORD PTR[32+r8] - xorps xmm15,xmm0 + xorps xmm2,xmm0 $L$oop_enc1_8:: -DB 102,68,15,56,220,249 +DB 102,15,56,220,209 dec eax movups xmm1,XMMWORD PTR[r8] lea r8,QWORD PTR[16+r8] jnz $L$oop_enc1_8 - -DB 102,68,15,56,221,249 +DB 102,15,56,221,209 + movups xmm0,XMMWORD PTR[rcx] mov r11,rcx mov eax,r10d + shl r10d,4 mov r9,rdx and rdx,-16 + movups xmm1,XMMWORD PTR[16+r10*1+rcx] + movdqa xmm8,XMMWORD PTR[$L$xts_magic] - pxor xmm14,xmm14 - pcmpgtd xmm14,xmm15 - pshufd xmm9,xmm14,013h - pxor xmm14,xmm14 + movdqa xmm15,xmm2 + pshufd xmm9,xmm2,05fh + pxor xmm1,xmm0 + movdqa xmm14,xmm9 + paddd xmm9,xmm9 movdqa xmm10,xmm15 + psrad xmm14,31 paddq xmm15,xmm15 - pand xmm9,xmm8 - pcmpgtd xmm14,xmm15 - pxor xmm15,xmm9 - pshufd xmm9,xmm14,013h - pxor xmm14,xmm14 + pand xmm14,xmm8 + pxor xmm10,xmm0 + pxor xmm15,xmm14 + movdqa xmm14,xmm9 + paddd xmm9,xmm9 movdqa xmm11,xmm15 + psrad xmm14,31 paddq xmm15,xmm15 - pand xmm9,xmm8 - pcmpgtd xmm14,xmm15 - pxor xmm15,xmm9 - pshufd xmm9,xmm14,013h - pxor xmm14,xmm14 + pand xmm14,xmm8 + pxor xmm11,xmm0 + pxor xmm15,xmm14 + movdqa xmm14,xmm9 + paddd xmm9,xmm9 movdqa xmm12,xmm15 + psrad xmm14,31 paddq xmm15,xmm15 - pand xmm9,xmm8 - pcmpgtd xmm14,xmm15 - pxor xmm15,xmm9 - pshufd xmm9,xmm14,013h - pxor xmm14,xmm14 + pand xmm14,xmm8 + pxor xmm12,xmm0 + pxor xmm15,xmm14 + movdqa xmm14,xmm9 + paddd xmm9,xmm9 movdqa xmm13,xmm15 + psrad xmm14,31 + paddq xmm15,xmm15 + pand xmm14,xmm8 + pxor xmm13,xmm0 + pxor xmm15,xmm14 + movdqa xmm14,xmm15 + psrad xmm9,31 paddq xmm15,xmm15 pand xmm9,xmm8 - pcmpgtd xmm14,xmm15 + pxor xmm14,xmm0 pxor xmm15,xmm9 + movaps XMMWORD PTR[96+rsp],xmm1 + sub rdx,16*6 jc $L$xts_enc_short - shr eax,1 - sub eax,1 - mov r10d,eax + mov eax,16+96 + lea rcx,QWORD PTR[32+r10*1+r11] + sub rax,r10 + movups xmm1,XMMWORD PTR[16+r11] + mov r10,rax + lea r8,QWORD PTR[$L$xts_magic] jmp $L$xts_enc_grandloop -ALIGN 16 +ALIGN 32 $L$xts_enc_grandloop:: - pshufd xmm9,xmm14,013h - movdqa xmm14,xmm15 - paddq xmm15,xmm15 movdqu xmm2,XMMWORD PTR[rdi] - pand xmm9,xmm8 + movdqa xmm8,xmm0 movdqu xmm3,XMMWORD PTR[16+rdi] - pxor xmm15,xmm9 - - movdqu xmm4,XMMWORD PTR[32+rdi] pxor xmm2,xmm10 - movdqu xmm5,XMMWORD PTR[48+rdi] + movdqu xmm4,XMMWORD PTR[32+rdi] pxor xmm3,xmm11 - movdqu xmm6,XMMWORD PTR[64+rdi] +DB 102,15,56,220,209 + movdqu xmm5,XMMWORD PTR[48+rdi] pxor xmm4,xmm12 - movdqu xmm7,XMMWORD PTR[80+rdi] - lea rdi,QWORD PTR[96+rdi] +DB 102,15,56,220,217 + movdqu xmm6,XMMWORD PTR[64+rdi] pxor xmm5,xmm13 - movups xmm0,XMMWORD PTR[r11] +DB 102,15,56,220,225 + movdqu xmm7,XMMWORD PTR[80+rdi] + pxor xmm8,xmm15 + movdqa xmm9,XMMWORD PTR[96+rsp] pxor xmm6,xmm14 - pxor xmm7,xmm15 - - +DB 102,15,56,220,233 + movups xmm0,XMMWORD PTR[32+r11] + lea rdi,QWORD PTR[96+rdi] + pxor xmm7,xmm8 - movups xmm1,XMMWORD PTR[16+r11] - pxor xmm2,xmm0 - pxor xmm3,xmm0 + pxor xmm10,xmm9 +DB 102,15,56,220,241 + pxor xmm11,xmm9 movdqa XMMWORD PTR[rsp],xmm10 -DB 102,15,56,220,209 - lea rcx,QWORD PTR[32+r11] - pxor xmm4,xmm0 +DB 102,15,56,220,249 + movups xmm1,XMMWORD PTR[48+r11] + pxor xmm12,xmm9 + +DB 102,15,56,220,208 + pxor xmm13,xmm9 movdqa XMMWORD PTR[16+rsp],xmm11 -DB 102,15,56,220,217 - pxor xmm5,xmm0 +DB 102,15,56,220,216 + pxor xmm14,xmm9 movdqa XMMWORD PTR[32+rsp],xmm12 +DB 102,15,56,220,224 +DB 102,15,56,220,232 + pxor xmm8,xmm9 + movdqa XMMWORD PTR[64+rsp],xmm14 +DB 102,15,56,220,240 +DB 102,15,56,220,248 + movups xmm0,XMMWORD PTR[64+r11] + movdqa XMMWORD PTR[80+rsp],xmm8 + pshufd xmm9,xmm15,05fh + jmp $L$xts_enc_loop6 +ALIGN 32 +$L$xts_enc_loop6:: +DB 102,15,56,220,209 +DB 102,15,56,220,217 DB 102,15,56,220,225 - pxor xmm6,xmm0 - movdqa XMMWORD PTR[48+rsp],xmm13 DB 102,15,56,220,233 - pxor xmm7,xmm0 - movups xmm0,XMMWORD PTR[rcx] - dec eax - movdqa XMMWORD PTR[64+rsp],xmm14 DB 102,15,56,220,241 - movdqa XMMWORD PTR[80+rsp],xmm15 DB 102,15,56,220,249 - pxor xmm14,xmm14 - pcmpgtd xmm14,xmm15 - jmp $L$xts_enc_loop6_enter + movups xmm1,XMMWORD PTR[((-64))+rax*1+rcx] + add rax,32 -ALIGN 16 -$L$xts_enc_loop6:: +DB 102,15,56,220,208 +DB 102,15,56,220,216 +DB 102,15,56,220,224 +DB 102,15,56,220,232 +DB 102,15,56,220,240 +DB 102,15,56,220,248 + movups xmm0,XMMWORD PTR[((-80))+rax*1+rcx] + jnz $L$xts_enc_loop6 + + movdqa xmm8,XMMWORD PTR[r8] + movdqa xmm14,xmm9 + paddd xmm9,xmm9 DB 102,15,56,220,209 + paddq xmm15,xmm15 + psrad xmm14,31 DB 102,15,56,220,217 - dec eax + pand xmm14,xmm8 + movups xmm10,XMMWORD PTR[r11] DB 102,15,56,220,225 DB 102,15,56,220,233 DB 102,15,56,220,241 + pxor xmm15,xmm14 + movaps xmm11,xmm10 DB 102,15,56,220,249 -$L$xts_enc_loop6_enter:: - movups xmm1,XMMWORD PTR[16+rcx] + movups xmm1,XMMWORD PTR[((-64))+rcx] + + movdqa xmm14,xmm9 DB 102,15,56,220,208 + paddd xmm9,xmm9 + pxor xmm10,xmm15 DB 102,15,56,220,216 - lea rcx,QWORD PTR[32+rcx] + psrad xmm14,31 + paddq xmm15,xmm15 DB 102,15,56,220,224 DB 102,15,56,220,232 + pand xmm14,xmm8 + movaps xmm12,xmm11 DB 102,15,56,220,240 + pxor xmm15,xmm14 + movdqa xmm14,xmm9 DB 102,15,56,220,248 - movups xmm0,XMMWORD PTR[rcx] - jnz $L$xts_enc_loop6 + movups xmm0,XMMWORD PTR[((-48))+rcx] - pshufd xmm9,xmm14,013h - pxor xmm14,xmm14 - paddq xmm15,xmm15 + paddd xmm9,xmm9 DB 102,15,56,220,209 - pand xmm9,xmm8 + pxor xmm11,xmm15 + psrad xmm14,31 DB 102,15,56,220,217 - pcmpgtd xmm14,xmm15 + paddq xmm15,xmm15 + pand xmm14,xmm8 DB 102,15,56,220,225 - pxor xmm15,xmm9 DB 102,15,56,220,233 + movdqa XMMWORD PTR[48+rsp],xmm13 + pxor xmm15,xmm14 DB 102,15,56,220,241 + movaps xmm13,xmm12 + movdqa xmm14,xmm9 DB 102,15,56,220,249 - movups xmm1,XMMWORD PTR[16+rcx] + movups xmm1,XMMWORD PTR[((-32))+rcx] - pshufd xmm9,xmm14,013h - pxor xmm14,xmm14 - movdqa xmm10,xmm15 - paddq xmm15,xmm15 + paddd xmm9,xmm9 DB 102,15,56,220,208 - pand xmm9,xmm8 + pxor xmm12,xmm15 + psrad xmm14,31 DB 102,15,56,220,216 - pcmpgtd xmm14,xmm15 + paddq xmm15,xmm15 + pand xmm14,xmm8 DB 102,15,56,220,224 - pxor xmm15,xmm9 DB 102,15,56,220,232 DB 102,15,56,220,240 + pxor xmm15,xmm14 + movaps xmm14,xmm13 DB 102,15,56,220,248 - movups xmm0,XMMWORD PTR[32+rcx] - pshufd xmm9,xmm14,013h - pxor xmm14,xmm14 - movdqa xmm11,xmm15 - paddq xmm15,xmm15 + movdqa xmm0,xmm9 + paddd xmm9,xmm9 DB 102,15,56,220,209 - pand xmm9,xmm8 + pxor xmm13,xmm15 + psrad xmm0,31 DB 102,15,56,220,217 - pcmpgtd xmm14,xmm15 + paddq xmm15,xmm15 + pand xmm0,xmm8 DB 102,15,56,220,225 - pxor xmm15,xmm9 DB 102,15,56,220,233 + pxor xmm15,xmm0 + movups xmm0,XMMWORD PTR[r11] DB 102,15,56,220,241 DB 102,15,56,220,249 + movups xmm1,XMMWORD PTR[16+r11] - pshufd xmm9,xmm14,013h - pxor xmm14,xmm14 - movdqa xmm12,xmm15 + pxor xmm14,xmm15 +DB 102,15,56,221,84,36,0 + psrad xmm9,31 paddq xmm15,xmm15 -DB 102,15,56,221,208 +DB 102,15,56,221,92,36,16 +DB 102,15,56,221,100,36,32 pand xmm9,xmm8 -DB 102,15,56,221,216 - pcmpgtd xmm14,xmm15 -DB 102,15,56,221,224 + mov rax,r10 +DB 102,15,56,221,108,36,48 +DB 102,15,56,221,116,36,64 +DB 102,15,56,221,124,36,80 pxor xmm15,xmm9 -DB 102,15,56,221,232 -DB 102,15,56,221,240 -DB 102,15,56,221,248 - pshufd xmm9,xmm14,013h - pxor xmm14,xmm14 - movdqa xmm13,xmm15 - paddq xmm15,xmm15 - xorps xmm2,XMMWORD PTR[rsp] - pand xmm9,xmm8 - xorps xmm3,XMMWORD PTR[16+rsp] - pcmpgtd xmm14,xmm15 - pxor xmm15,xmm9 - - xorps xmm4,XMMWORD PTR[32+rsp] - movups XMMWORD PTR[rsi],xmm2 - xorps xmm5,XMMWORD PTR[48+rsp] - movups XMMWORD PTR[16+rsi],xmm3 - xorps xmm6,XMMWORD PTR[64+rsp] - movups XMMWORD PTR[32+rsi],xmm4 - xorps xmm7,XMMWORD PTR[80+rsp] - movups XMMWORD PTR[48+rsi],xmm5 - mov eax,r10d - movups XMMWORD PTR[64+rsi],xmm6 - movups XMMWORD PTR[80+rsi],xmm7 lea rsi,QWORD PTR[96+rsi] + movups XMMWORD PTR[(-96)+rsi],xmm2 + movups XMMWORD PTR[(-80)+rsi],xmm3 + movups XMMWORD PTR[(-64)+rsi],xmm4 + movups XMMWORD PTR[(-48)+rsi],xmm5 + movups XMMWORD PTR[(-32)+rsi],xmm6 + movups XMMWORD PTR[(-16)+rsi],xmm7 sub rdx,16*6 jnc $L$xts_enc_grandloop - lea eax,DWORD PTR[3+rax*1+rax] + mov eax,16+96 + sub eax,r10d mov rcx,r11 - mov r10d,eax + shr eax,4 $L$xts_enc_short:: + mov r10d,eax + pxor xmm10,xmm0 add rdx,16*6 jz $L$xts_enc_done + pxor xmm11,xmm0 cmp rdx,020h jb $L$xts_enc_one + pxor xmm12,xmm0 je $L$xts_enc_two + pxor xmm13,xmm0 cmp rdx,040h jb $L$xts_enc_three + pxor xmm14,xmm0 je $L$xts_enc_four - pshufd xmm9,xmm14,013h - movdqa xmm14,xmm15 - paddq xmm15,xmm15 movdqu xmm2,XMMWORD PTR[rdi] - pand xmm9,xmm8 movdqu xmm3,XMMWORD PTR[16+rdi] - pxor xmm15,xmm9 - movdqu xmm4,XMMWORD PTR[32+rdi] pxor xmm2,xmm10 movdqu xmm5,XMMWORD PTR[48+rdi] @@ -1562,7 +1949,6 @@ DB 102,15,56,220,209 movups xmm1,XMMWORD PTR[rcx] lea rcx,QWORD PTR[16+rcx] jnz $L$oop_enc1_9 - DB 102,15,56,221,209 xorps xmm2,xmm10 movdqa xmm10,xmm11 @@ -1578,7 +1964,7 @@ $L$xts_enc_two:: xorps xmm2,xmm10 xorps xmm3,xmm11 - call _aesni_encrypt3 + call _aesni_encrypt2 xorps xmm2,xmm10 movdqa xmm10,xmm12 @@ -1624,15 +2010,15 @@ $L$xts_enc_four:: call _aesni_encrypt4 - xorps xmm2,xmm10 - movdqa xmm10,xmm15 - xorps xmm3,xmm11 - xorps xmm4,xmm12 - movups XMMWORD PTR[rsi],xmm2 - xorps xmm5,xmm13 - movups XMMWORD PTR[16+rsi],xmm3 - movups XMMWORD PTR[32+rsi],xmm4 - movups XMMWORD PTR[48+rsi],xmm5 + pxor xmm2,xmm10 + movdqa xmm10,xmm14 + pxor xmm3,xmm11 + pxor xmm4,xmm12 + movdqu XMMWORD PTR[rsi],xmm2 + pxor xmm5,xmm13 + movdqu XMMWORD PTR[16+rsi],xmm3 + movdqu XMMWORD PTR[32+rsi],xmm4 + movdqu XMMWORD PTR[48+rsi],xmm5 lea rsi,QWORD PTR[64+rsi] jmp $L$xts_enc_done @@ -1668,23 +2054,23 @@ DB 102,15,56,220,209 movups xmm1,XMMWORD PTR[rcx] lea rcx,QWORD PTR[16+rcx] jnz $L$oop_enc1_10 - DB 102,15,56,221,209 xorps xmm2,xmm10 movups XMMWORD PTR[(-16)+rsi],xmm2 $L$xts_enc_ret:: - movaps xmm6,XMMWORD PTR[96+rsp] - movaps xmm7,XMMWORD PTR[112+rsp] - movaps xmm8,XMMWORD PTR[128+rsp] - movaps xmm9,XMMWORD PTR[144+rsp] - movaps xmm10,XMMWORD PTR[160+rsp] - movaps xmm11,XMMWORD PTR[176+rsp] - movaps xmm12,XMMWORD PTR[192+rsp] - movaps xmm13,XMMWORD PTR[208+rsp] - movaps xmm14,XMMWORD PTR[224+rsp] - movaps xmm15,XMMWORD PTR[240+rsp] - lea rsp,QWORD PTR[264+rsp] + movaps xmm6,XMMWORD PTR[((-160))+rbp] + movaps xmm7,XMMWORD PTR[((-144))+rbp] + movaps xmm8,XMMWORD PTR[((-128))+rbp] + movaps xmm9,XMMWORD PTR[((-112))+rbp] + movaps xmm10,XMMWORD PTR[((-96))+rbp] + movaps xmm11,XMMWORD PTR[((-80))+rbp] + movaps xmm12,XMMWORD PTR[((-64))+rbp] + movaps xmm13,XMMWORD PTR[((-48))+rbp] + movaps xmm14,XMMWORD PTR[((-32))+rbp] + movaps xmm15,XMMWORD PTR[((-16))+rbp] + lea rsp,QWORD PTR[rbp] + pop rbp $L$xts_enc_epilogue:: mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue mov rsi,QWORD PTR[16+rsp] @@ -1707,261 +2093,303 @@ $L$SEH_begin_aesni_xts_decrypt:: mov r9,QWORD PTR[48+rsp] - lea rsp,QWORD PTR[((-264))+rsp] - movaps XMMWORD PTR[96+rsp],xmm6 - movaps XMMWORD PTR[112+rsp],xmm7 - movaps XMMWORD PTR[128+rsp],xmm8 - movaps XMMWORD PTR[144+rsp],xmm9 - movaps XMMWORD PTR[160+rsp],xmm10 - movaps XMMWORD PTR[176+rsp],xmm11 - movaps XMMWORD PTR[192+rsp],xmm12 - movaps XMMWORD PTR[208+rsp],xmm13 - movaps XMMWORD PTR[224+rsp],xmm14 - movaps XMMWORD PTR[240+rsp],xmm15 + lea rax,QWORD PTR[rsp] + push rbp + sub rsp,272 + and rsp,-16 + movaps XMMWORD PTR[(-168)+rax],xmm6 + movaps XMMWORD PTR[(-152)+rax],xmm7 + movaps XMMWORD PTR[(-136)+rax],xmm8 + movaps XMMWORD PTR[(-120)+rax],xmm9 + movaps XMMWORD PTR[(-104)+rax],xmm10 + movaps XMMWORD PTR[(-88)+rax],xmm11 + movaps XMMWORD PTR[(-72)+rax],xmm12 + movaps XMMWORD PTR[(-56)+rax],xmm13 + movaps XMMWORD PTR[(-40)+rax],xmm14 + movaps XMMWORD PTR[(-24)+rax],xmm15 $L$xts_dec_body:: - movups xmm15,XMMWORD PTR[r9] + lea rbp,QWORD PTR[((-8))+rax] + movups xmm2,XMMWORD PTR[r9] mov eax,DWORD PTR[240+r8] mov r10d,DWORD PTR[240+rcx] movups xmm0,XMMWORD PTR[r8] movups xmm1,XMMWORD PTR[16+r8] lea r8,QWORD PTR[32+r8] - xorps xmm15,xmm0 + xorps xmm2,xmm0 $L$oop_enc1_11:: -DB 102,68,15,56,220,249 +DB 102,15,56,220,209 dec eax movups xmm1,XMMWORD PTR[r8] lea r8,QWORD PTR[16+r8] jnz $L$oop_enc1_11 - -DB 102,68,15,56,221,249 +DB 102,15,56,221,209 xor eax,eax test rdx,15 setnz al shl rax,4 sub rdx,rax + movups xmm0,XMMWORD PTR[rcx] mov r11,rcx mov eax,r10d + shl r10d,4 mov r9,rdx and rdx,-16 + movups xmm1,XMMWORD PTR[16+r10*1+rcx] + movdqa xmm8,XMMWORD PTR[$L$xts_magic] - pxor xmm14,xmm14 - pcmpgtd xmm14,xmm15 - pshufd xmm9,xmm14,013h - pxor xmm14,xmm14 + movdqa xmm15,xmm2 + pshufd xmm9,xmm2,05fh + pxor xmm1,xmm0 + movdqa xmm14,xmm9 + paddd xmm9,xmm9 movdqa xmm10,xmm15 + psrad xmm14,31 paddq xmm15,xmm15 - pand xmm9,xmm8 - pcmpgtd xmm14,xmm15 - pxor xmm15,xmm9 - pshufd xmm9,xmm14,013h - pxor xmm14,xmm14 + pand xmm14,xmm8 + pxor xmm10,xmm0 + pxor xmm15,xmm14 + movdqa xmm14,xmm9 + paddd xmm9,xmm9 movdqa xmm11,xmm15 + psrad xmm14,31 paddq xmm15,xmm15 - pand xmm9,xmm8 - pcmpgtd xmm14,xmm15 - pxor xmm15,xmm9 - pshufd xmm9,xmm14,013h - pxor xmm14,xmm14 + pand xmm14,xmm8 + pxor xmm11,xmm0 + pxor xmm15,xmm14 + movdqa xmm14,xmm9 + paddd xmm9,xmm9 movdqa xmm12,xmm15 + psrad xmm14,31 paddq xmm15,xmm15 - pand xmm9,xmm8 - pcmpgtd xmm14,xmm15 - pxor xmm15,xmm9 - pshufd xmm9,xmm14,013h - pxor xmm14,xmm14 + pand xmm14,xmm8 + pxor xmm12,xmm0 + pxor xmm15,xmm14 + movdqa xmm14,xmm9 + paddd xmm9,xmm9 movdqa xmm13,xmm15 + psrad xmm14,31 + paddq xmm15,xmm15 + pand xmm14,xmm8 + pxor xmm13,xmm0 + pxor xmm15,xmm14 + movdqa xmm14,xmm15 + psrad xmm9,31 paddq xmm15,xmm15 pand xmm9,xmm8 - pcmpgtd xmm14,xmm15 + pxor xmm14,xmm0 pxor xmm15,xmm9 + movaps XMMWORD PTR[96+rsp],xmm1 + sub rdx,16*6 jc $L$xts_dec_short - shr eax,1 - sub eax,1 - mov r10d,eax + mov eax,16+96 + lea rcx,QWORD PTR[32+r10*1+r11] + sub rax,r10 + movups xmm1,XMMWORD PTR[16+r11] + mov r10,rax + lea r8,QWORD PTR[$L$xts_magic] jmp $L$xts_dec_grandloop -ALIGN 16 +ALIGN 32 $L$xts_dec_grandloop:: - pshufd xmm9,xmm14,013h - movdqa xmm14,xmm15 - paddq xmm15,xmm15 movdqu xmm2,XMMWORD PTR[rdi] - pand xmm9,xmm8 + movdqa xmm8,xmm0 movdqu xmm3,XMMWORD PTR[16+rdi] - pxor xmm15,xmm9 - - movdqu xmm4,XMMWORD PTR[32+rdi] pxor xmm2,xmm10 - movdqu xmm5,XMMWORD PTR[48+rdi] + movdqu xmm4,XMMWORD PTR[32+rdi] pxor xmm3,xmm11 - movdqu xmm6,XMMWORD PTR[64+rdi] - pxor xmm4,xmm12 - movdqu xmm7,XMMWORD PTR[80+rdi] - lea rdi,QWORD PTR[96+rdi] - pxor xmm5,xmm13 - movups xmm0,XMMWORD PTR[r11] - pxor xmm6,xmm14 - pxor xmm7,xmm15 - - - - movups xmm1,XMMWORD PTR[16+r11] - pxor xmm2,xmm0 - pxor xmm3,xmm0 - movdqa XMMWORD PTR[rsp],xmm10 DB 102,15,56,222,209 - lea rcx,QWORD PTR[32+r11] - pxor xmm4,xmm0 - movdqa XMMWORD PTR[16+rsp],xmm11 + movdqu xmm5,XMMWORD PTR[48+rdi] + pxor xmm4,xmm12 DB 102,15,56,222,217 - pxor xmm5,xmm0 - movdqa XMMWORD PTR[32+rsp],xmm12 + movdqu xmm6,XMMWORD PTR[64+rdi] + pxor xmm5,xmm13 DB 102,15,56,222,225 - pxor xmm6,xmm0 - movdqa XMMWORD PTR[48+rsp],xmm13 -DB 102,15,56,222,233 - pxor xmm7,xmm0 - movups xmm0,XMMWORD PTR[rcx] - dec eax - movdqa XMMWORD PTR[64+rsp],xmm14 + movdqu xmm7,XMMWORD PTR[80+rdi] + pxor xmm8,xmm15 + movdqa xmm9,XMMWORD PTR[96+rsp] + pxor xmm6,xmm14 +DB 102,15,56,222,233 + movups xmm0,XMMWORD PTR[32+r11] + lea rdi,QWORD PTR[96+rdi] + pxor xmm7,xmm8 + + pxor xmm10,xmm9 DB 102,15,56,222,241 - movdqa XMMWORD PTR[80+rsp],xmm15 + pxor xmm11,xmm9 + movdqa XMMWORD PTR[rsp],xmm10 DB 102,15,56,222,249 - pxor xmm14,xmm14 - pcmpgtd xmm14,xmm15 - jmp $L$xts_dec_loop6_enter + movups xmm1,XMMWORD PTR[48+r11] + pxor xmm12,xmm9 -ALIGN 16 +DB 102,15,56,222,208 + pxor xmm13,xmm9 + movdqa XMMWORD PTR[16+rsp],xmm11 +DB 102,15,56,222,216 + pxor xmm14,xmm9 + movdqa XMMWORD PTR[32+rsp],xmm12 +DB 102,15,56,222,224 +DB 102,15,56,222,232 + pxor xmm8,xmm9 + movdqa XMMWORD PTR[64+rsp],xmm14 +DB 102,15,56,222,240 +DB 102,15,56,222,248 + movups xmm0,XMMWORD PTR[64+r11] + movdqa XMMWORD PTR[80+rsp],xmm8 + pshufd xmm9,xmm15,05fh + jmp $L$xts_dec_loop6 +ALIGN 32 $L$xts_dec_loop6:: DB 102,15,56,222,209 DB 102,15,56,222,217 - dec eax DB 102,15,56,222,225 DB 102,15,56,222,233 DB 102,15,56,222,241 DB 102,15,56,222,249 -$L$xts_dec_loop6_enter:: - movups xmm1,XMMWORD PTR[16+rcx] + movups xmm1,XMMWORD PTR[((-64))+rax*1+rcx] + add rax,32 + DB 102,15,56,222,208 DB 102,15,56,222,216 - lea rcx,QWORD PTR[32+rcx] DB 102,15,56,222,224 DB 102,15,56,222,232 DB 102,15,56,222,240 DB 102,15,56,222,248 - movups xmm0,XMMWORD PTR[rcx] + movups xmm0,XMMWORD PTR[((-80))+rax*1+rcx] jnz $L$xts_dec_loop6 - pshufd xmm9,xmm14,013h - pxor xmm14,xmm14 - paddq xmm15,xmm15 + movdqa xmm8,XMMWORD PTR[r8] + movdqa xmm14,xmm9 + paddd xmm9,xmm9 DB 102,15,56,222,209 - pand xmm9,xmm8 + paddq xmm15,xmm15 + psrad xmm14,31 DB 102,15,56,222,217 - pcmpgtd xmm14,xmm15 + pand xmm14,xmm8 + movups xmm10,XMMWORD PTR[r11] DB 102,15,56,222,225 - pxor xmm15,xmm9 DB 102,15,56,222,233 DB 102,15,56,222,241 + pxor xmm15,xmm14 + movaps xmm11,xmm10 DB 102,15,56,222,249 - movups xmm1,XMMWORD PTR[16+rcx] + movups xmm1,XMMWORD PTR[((-64))+rcx] - pshufd xmm9,xmm14,013h - pxor xmm14,xmm14 - movdqa xmm10,xmm15 - paddq xmm15,xmm15 + movdqa xmm14,xmm9 DB 102,15,56,222,208 - pand xmm9,xmm8 + paddd xmm9,xmm9 + pxor xmm10,xmm15 DB 102,15,56,222,216 - pcmpgtd xmm14,xmm15 + psrad xmm14,31 + paddq xmm15,xmm15 DB 102,15,56,222,224 - pxor xmm15,xmm9 DB 102,15,56,222,232 + pand xmm14,xmm8 + movaps xmm12,xmm11 DB 102,15,56,222,240 + pxor xmm15,xmm14 + movdqa xmm14,xmm9 DB 102,15,56,222,248 - movups xmm0,XMMWORD PTR[32+rcx] + movups xmm0,XMMWORD PTR[((-48))+rcx] - pshufd xmm9,xmm14,013h - pxor xmm14,xmm14 - movdqa xmm11,xmm15 - paddq xmm15,xmm15 + paddd xmm9,xmm9 DB 102,15,56,222,209 - pand xmm9,xmm8 + pxor xmm11,xmm15 + psrad xmm14,31 DB 102,15,56,222,217 - pcmpgtd xmm14,xmm15 + paddq xmm15,xmm15 + pand xmm14,xmm8 DB 102,15,56,222,225 - pxor xmm15,xmm9 DB 102,15,56,222,233 + movdqa XMMWORD PTR[48+rsp],xmm13 + pxor xmm15,xmm14 DB 102,15,56,222,241 + movaps xmm13,xmm12 + movdqa xmm14,xmm9 DB 102,15,56,222,249 + movups xmm1,XMMWORD PTR[((-32))+rcx] - pshufd xmm9,xmm14,013h - pxor xmm14,xmm14 - movdqa xmm12,xmm15 + paddd xmm9,xmm9 +DB 102,15,56,222,208 + pxor xmm12,xmm15 + psrad xmm14,31 +DB 102,15,56,222,216 paddq xmm15,xmm15 -DB 102,15,56,223,208 - pand xmm9,xmm8 -DB 102,15,56,223,216 - pcmpgtd xmm14,xmm15 -DB 102,15,56,223,224 - pxor xmm15,xmm9 -DB 102,15,56,223,232 -DB 102,15,56,223,240 -DB 102,15,56,223,248 + pand xmm14,xmm8 +DB 102,15,56,222,224 +DB 102,15,56,222,232 +DB 102,15,56,222,240 + pxor xmm15,xmm14 + movaps xmm14,xmm13 +DB 102,15,56,222,248 - pshufd xmm9,xmm14,013h - pxor xmm14,xmm14 - movdqa xmm13,xmm15 + movdqa xmm0,xmm9 + paddd xmm9,xmm9 +DB 102,15,56,222,209 + pxor xmm13,xmm15 + psrad xmm0,31 +DB 102,15,56,222,217 + paddq xmm15,xmm15 + pand xmm0,xmm8 +DB 102,15,56,222,225 +DB 102,15,56,222,233 + pxor xmm15,xmm0 + movups xmm0,XMMWORD PTR[r11] +DB 102,15,56,222,241 +DB 102,15,56,222,249 + movups xmm1,XMMWORD PTR[16+r11] + + pxor xmm14,xmm15 +DB 102,15,56,223,84,36,0 + psrad xmm9,31 paddq xmm15,xmm15 - xorps xmm2,XMMWORD PTR[rsp] +DB 102,15,56,223,92,36,16 +DB 102,15,56,223,100,36,32 pand xmm9,xmm8 - xorps xmm3,XMMWORD PTR[16+rsp] - pcmpgtd xmm14,xmm15 + mov rax,r10 +DB 102,15,56,223,108,36,48 +DB 102,15,56,223,116,36,64 +DB 102,15,56,223,124,36,80 pxor xmm15,xmm9 - xorps xmm4,XMMWORD PTR[32+rsp] - movups XMMWORD PTR[rsi],xmm2 - xorps xmm5,XMMWORD PTR[48+rsp] - movups XMMWORD PTR[16+rsi],xmm3 - xorps xmm6,XMMWORD PTR[64+rsp] - movups XMMWORD PTR[32+rsi],xmm4 - xorps xmm7,XMMWORD PTR[80+rsp] - movups XMMWORD PTR[48+rsi],xmm5 - mov eax,r10d - movups XMMWORD PTR[64+rsi],xmm6 - movups XMMWORD PTR[80+rsi],xmm7 lea rsi,QWORD PTR[96+rsi] + movups XMMWORD PTR[(-96)+rsi],xmm2 + movups XMMWORD PTR[(-80)+rsi],xmm3 + movups XMMWORD PTR[(-64)+rsi],xmm4 + movups XMMWORD PTR[(-48)+rsi],xmm5 + movups XMMWORD PTR[(-32)+rsi],xmm6 + movups XMMWORD PTR[(-16)+rsi],xmm7 sub rdx,16*6 jnc $L$xts_dec_grandloop - lea eax,DWORD PTR[3+rax*1+rax] + mov eax,16+96 + sub eax,r10d mov rcx,r11 - mov r10d,eax + shr eax,4 $L$xts_dec_short:: + mov r10d,eax + pxor xmm10,xmm0 + pxor xmm11,xmm0 add rdx,16*6 jz $L$xts_dec_done + pxor xmm12,xmm0 cmp rdx,020h jb $L$xts_dec_one + pxor xmm13,xmm0 je $L$xts_dec_two + pxor xmm14,xmm0 cmp rdx,040h jb $L$xts_dec_three je $L$xts_dec_four - pshufd xmm9,xmm14,013h - movdqa xmm14,xmm15 - paddq xmm15,xmm15 movdqu xmm2,XMMWORD PTR[rdi] - pand xmm9,xmm8 movdqu xmm3,XMMWORD PTR[16+rdi] - pxor xmm15,xmm9 - movdqu xmm4,XMMWORD PTR[32+rdi] pxor xmm2,xmm10 movdqu xmm5,XMMWORD PTR[48+rdi] @@ -2012,7 +2440,6 @@ DB 102,15,56,222,209 movups xmm1,XMMWORD PTR[rcx] lea rcx,QWORD PTR[16+rcx] jnz $L$oop_dec1_12 - DB 102,15,56,223,209 xorps xmm2,xmm10 movdqa xmm10,xmm11 @@ -2029,7 +2456,7 @@ $L$xts_dec_two:: xorps xmm2,xmm10 xorps xmm3,xmm11 - call _aesni_decrypt3 + call _aesni_decrypt2 xorps xmm2,xmm10 movdqa xmm10,xmm12 @@ -2055,7 +2482,7 @@ $L$xts_dec_three:: xorps xmm2,xmm10 movdqa xmm10,xmm13 xorps xmm3,xmm11 - movdqa xmm11,xmm15 + movdqa xmm11,xmm14 xorps xmm4,xmm12 movups XMMWORD PTR[rsi],xmm2 movups XMMWORD PTR[16+rsi],xmm3 @@ -2065,14 +2492,8 @@ $L$xts_dec_three:: ALIGN 16 $L$xts_dec_four:: - pshufd xmm9,xmm14,013h - movdqa xmm14,xmm15 - paddq xmm15,xmm15 movups xmm2,XMMWORD PTR[rdi] - pand xmm9,xmm8 movups xmm3,XMMWORD PTR[16+rdi] - pxor xmm15,xmm9 - movups xmm4,XMMWORD PTR[32+rdi] xorps xmm2,xmm10 movups xmm5,XMMWORD PTR[48+rdi] @@ -2083,16 +2504,16 @@ $L$xts_dec_four:: call _aesni_decrypt4 - xorps xmm2,xmm10 + pxor xmm2,xmm10 movdqa xmm10,xmm14 - xorps xmm3,xmm11 + pxor xmm3,xmm11 movdqa xmm11,xmm15 - xorps xmm4,xmm12 - movups XMMWORD PTR[rsi],xmm2 - xorps xmm5,xmm13 - movups XMMWORD PTR[16+rsi],xmm3 - movups XMMWORD PTR[32+rsi],xmm4 - movups XMMWORD PTR[48+rsi],xmm5 + pxor xmm4,xmm12 + movdqu XMMWORD PTR[rsi],xmm2 + pxor xmm5,xmm13 + movdqu XMMWORD PTR[16+rsi],xmm3 + movdqu XMMWORD PTR[32+rsi],xmm4 + movdqu XMMWORD PTR[48+rsi],xmm5 lea rsi,QWORD PTR[64+rsi] jmp $L$xts_dec_done @@ -2117,7 +2538,6 @@ DB 102,15,56,222,209 movups xmm1,XMMWORD PTR[rcx] lea rcx,QWORD PTR[16+rcx] jnz $L$oop_dec1_13 - DB 102,15,56,223,209 xorps xmm2,xmm11 movups XMMWORD PTR[rsi],xmm2 @@ -2148,23 +2568,23 @@ DB 102,15,56,222,209 movups xmm1,XMMWORD PTR[rcx] lea rcx,QWORD PTR[16+rcx] jnz $L$oop_dec1_14 - DB 102,15,56,223,209 xorps xmm2,xmm10 movups XMMWORD PTR[rsi],xmm2 $L$xts_dec_ret:: - movaps xmm6,XMMWORD PTR[96+rsp] - movaps xmm7,XMMWORD PTR[112+rsp] - movaps xmm8,XMMWORD PTR[128+rsp] - movaps xmm9,XMMWORD PTR[144+rsp] - movaps xmm10,XMMWORD PTR[160+rsp] - movaps xmm11,XMMWORD PTR[176+rsp] - movaps xmm12,XMMWORD PTR[192+rsp] - movaps xmm13,XMMWORD PTR[208+rsp] - movaps xmm14,XMMWORD PTR[224+rsp] - movaps xmm15,XMMWORD PTR[240+rsp] - lea rsp,QWORD PTR[264+rsp] + movaps xmm6,XMMWORD PTR[((-160))+rbp] + movaps xmm7,XMMWORD PTR[((-144))+rbp] + movaps xmm8,XMMWORD PTR[((-128))+rbp] + movaps xmm9,XMMWORD PTR[((-112))+rbp] + movaps xmm10,XMMWORD PTR[((-96))+rbp] + movaps xmm11,XMMWORD PTR[((-80))+rbp] + movaps xmm12,XMMWORD PTR[((-64))+rbp] + movaps xmm13,XMMWORD PTR[((-48))+rbp] + movaps xmm14,XMMWORD PTR[((-32))+rbp] + movaps xmm15,XMMWORD PTR[((-16))+rbp] + lea rsp,QWORD PTR[rbp] + pop rbp $L$xts_dec_epilogue:: mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue mov rsi,QWORD PTR[16+rsp] @@ -2217,7 +2637,6 @@ DB 102,15,56,220,209 movups xmm1,XMMWORD PTR[rcx] lea rcx,QWORD PTR[16+rcx] jnz $L$oop_enc1_15 - DB 102,15,56,221,209 mov eax,r10d mov rcx,r11 @@ -2234,12 +2653,10 @@ $L$cbc_enc_tail:: mov rcx,rdx xchg rsi,rdi DD 09066A4F3h - mov ecx,16 sub rcx,rdx xor eax,eax DD 09066AAF3h - lea rdi,QWORD PTR[((-16))+rdi] mov eax,r10d mov rsi,rdi @@ -2247,158 +2664,396 @@ $L$cbc_enc_tail:: xor rdx,rdx jmp $L$cbc_enc_loop - ALIGN 16 $L$cbc_decrypt:: - lea rsp,QWORD PTR[((-88))+rsp] - movaps XMMWORD PTR[rsp],xmm6 - movaps XMMWORD PTR[16+rsp],xmm7 - movaps XMMWORD PTR[32+rsp],xmm8 - movaps XMMWORD PTR[48+rsp],xmm9 + lea rax,QWORD PTR[rsp] + push rbp + sub rsp,176 + and rsp,-16 + movaps XMMWORD PTR[16+rsp],xmm6 + movaps XMMWORD PTR[32+rsp],xmm7 + movaps XMMWORD PTR[48+rsp],xmm8 + movaps XMMWORD PTR[64+rsp],xmm9 + movaps XMMWORD PTR[80+rsp],xmm10 + movaps XMMWORD PTR[96+rsp],xmm11 + movaps XMMWORD PTR[112+rsp],xmm12 + movaps XMMWORD PTR[128+rsp],xmm13 + movaps XMMWORD PTR[144+rsp],xmm14 + movaps XMMWORD PTR[160+rsp],xmm15 $L$cbc_decrypt_body:: - movups xmm9,XMMWORD PTR[r8] + lea rbp,QWORD PTR[((-8))+rax] + movups xmm10,XMMWORD PTR[r8] mov eax,r10d - cmp rdx,070h + cmp rdx,050h jbe $L$cbc_dec_tail - shr r10d,1 - sub rdx,070h - mov eax,r10d - movaps XMMWORD PTR[64+rsp],xmm9 + + movups xmm0,XMMWORD PTR[rcx] + movdqu xmm2,XMMWORD PTR[rdi] + movdqu xmm3,XMMWORD PTR[16+rdi] + movdqa xmm11,xmm2 + movdqu xmm4,XMMWORD PTR[32+rdi] + movdqa xmm12,xmm3 + movdqu xmm5,XMMWORD PTR[48+rdi] + movdqa xmm13,xmm4 + movdqu xmm6,XMMWORD PTR[64+rdi] + movdqa xmm14,xmm5 + movdqu xmm7,XMMWORD PTR[80+rdi] + movdqa xmm15,xmm6 + mov r9d,DWORD PTR[((OPENSSL_ia32cap_P+4))] + cmp rdx,070h + jbe $L$cbc_dec_six_or_seven + + and r9d,71303168 + sub rdx,050h + cmp r9d,4194304 + je $L$cbc_dec_loop6_enter + sub rdx,020h + lea rcx,QWORD PTR[112+rcx] jmp $L$cbc_dec_loop8_enter ALIGN 16 -$L$cbc_dec_loop8:: - movaps XMMWORD PTR[64+rsp],xmm0 - movups XMMWORD PTR[rsi],xmm9 - lea rsi,QWORD PTR[16+rsi] -$L$cbc_dec_loop8_enter:: - movups xmm0,XMMWORD PTR[rcx] - movups xmm2,XMMWORD PTR[rdi] - movups xmm3,XMMWORD PTR[16+rdi] - movups xmm1,XMMWORD PTR[16+rcx] - - lea rcx,QWORD PTR[32+rcx] - movdqu xmm4,XMMWORD PTR[32+rdi] - xorps xmm2,xmm0 - movdqu xmm5,XMMWORD PTR[48+rdi] - xorps xmm3,xmm0 - movdqu xmm6,XMMWORD PTR[64+rdi] +$L$cbc_dec_loop8:: + movups XMMWORD PTR[rsi],xmm9 + lea rsi,QWORD PTR[16+rsi] +$L$cbc_dec_loop8_enter:: + movdqu xmm8,XMMWORD PTR[96+rdi] + pxor xmm2,xmm0 + movdqu xmm9,XMMWORD PTR[112+rdi] + pxor xmm3,xmm0 + movups xmm1,XMMWORD PTR[((16-112))+rcx] + pxor xmm4,xmm0 + xor r11,r11 + cmp rdx,070h + pxor xmm5,xmm0 + pxor xmm6,xmm0 + pxor xmm7,xmm0 + pxor xmm8,xmm0 + +DB 102,15,56,222,209 + pxor xmm9,xmm0 + movups xmm0,XMMWORD PTR[((32-112))+rcx] +DB 102,15,56,222,217 +DB 102,15,56,222,225 +DB 102,15,56,222,233 +DB 102,15,56,222,241 +DB 102,15,56,222,249 +DB 102,68,15,56,222,193 + setnc r11b + shl r11,7 +DB 102,68,15,56,222,201 + add r11,rdi + movups xmm1,XMMWORD PTR[((48-112))+rcx] +DB 102,15,56,222,208 +DB 102,15,56,222,216 +DB 102,15,56,222,224 +DB 102,15,56,222,232 +DB 102,15,56,222,240 +DB 102,15,56,222,248 +DB 102,68,15,56,222,192 +DB 102,68,15,56,222,200 + movups xmm0,XMMWORD PTR[((64-112))+rcx] + nop +DB 102,15,56,222,209 +DB 102,15,56,222,217 +DB 102,15,56,222,225 +DB 102,15,56,222,233 +DB 102,15,56,222,241 +DB 102,15,56,222,249 +DB 102,68,15,56,222,193 +DB 102,68,15,56,222,201 + movups xmm1,XMMWORD PTR[((80-112))+rcx] + nop +DB 102,15,56,222,208 +DB 102,15,56,222,216 +DB 102,15,56,222,224 +DB 102,15,56,222,232 +DB 102,15,56,222,240 +DB 102,15,56,222,248 +DB 102,68,15,56,222,192 +DB 102,68,15,56,222,200 + movups xmm0,XMMWORD PTR[((96-112))+rcx] + nop +DB 102,15,56,222,209 +DB 102,15,56,222,217 +DB 102,15,56,222,225 +DB 102,15,56,222,233 +DB 102,15,56,222,241 +DB 102,15,56,222,249 +DB 102,68,15,56,222,193 +DB 102,68,15,56,222,201 + movups xmm1,XMMWORD PTR[((112-112))+rcx] + nop +DB 102,15,56,222,208 +DB 102,15,56,222,216 +DB 102,15,56,222,224 +DB 102,15,56,222,232 +DB 102,15,56,222,240 +DB 102,15,56,222,248 +DB 102,68,15,56,222,192 +DB 102,68,15,56,222,200 + movups xmm0,XMMWORD PTR[((128-112))+rcx] + nop +DB 102,15,56,222,209 +DB 102,15,56,222,217 +DB 102,15,56,222,225 +DB 102,15,56,222,233 +DB 102,15,56,222,241 +DB 102,15,56,222,249 +DB 102,68,15,56,222,193 +DB 102,68,15,56,222,201 + movups xmm1,XMMWORD PTR[((144-112))+rcx] + cmp eax,11 +DB 102,15,56,222,208 +DB 102,15,56,222,216 +DB 102,15,56,222,224 +DB 102,15,56,222,232 +DB 102,15,56,222,240 +DB 102,15,56,222,248 +DB 102,68,15,56,222,192 +DB 102,68,15,56,222,200 + movups xmm0,XMMWORD PTR[((160-112))+rcx] + jb $L$cbc_dec_done +DB 102,15,56,222,209 +DB 102,15,56,222,217 +DB 102,15,56,222,225 +DB 102,15,56,222,233 +DB 102,15,56,222,241 +DB 102,15,56,222,249 +DB 102,68,15,56,222,193 +DB 102,68,15,56,222,201 + movups xmm1,XMMWORD PTR[((176-112))+rcx] + nop +DB 102,15,56,222,208 +DB 102,15,56,222,216 +DB 102,15,56,222,224 +DB 102,15,56,222,232 +DB 102,15,56,222,240 +DB 102,15,56,222,248 +DB 102,68,15,56,222,192 +DB 102,68,15,56,222,200 + movups xmm0,XMMWORD PTR[((192-112))+rcx] + je $L$cbc_dec_done +DB 102,15,56,222,209 +DB 102,15,56,222,217 +DB 102,15,56,222,225 +DB 102,15,56,222,233 +DB 102,15,56,222,241 +DB 102,15,56,222,249 +DB 102,68,15,56,222,193 +DB 102,68,15,56,222,201 + movups xmm1,XMMWORD PTR[((208-112))+rcx] + nop +DB 102,15,56,222,208 +DB 102,15,56,222,216 +DB 102,15,56,222,224 +DB 102,15,56,222,232 +DB 102,15,56,222,240 +DB 102,15,56,222,248 +DB 102,68,15,56,222,192 +DB 102,68,15,56,222,200 + movups xmm0,XMMWORD PTR[((224-112))+rcx] + jmp $L$cbc_dec_done +ALIGN 16 +$L$cbc_dec_done:: DB 102,15,56,222,209 - pxor xmm4,xmm0 - movdqu xmm7,XMMWORD PTR[80+rdi] DB 102,15,56,222,217 - pxor xmm5,xmm0 - movdqu xmm8,XMMWORD PTR[96+rdi] + pxor xmm10,xmm0 + pxor xmm11,xmm0 DB 102,15,56,222,225 - pxor xmm6,xmm0 - movdqu xmm9,XMMWORD PTR[112+rdi] DB 102,15,56,222,233 - pxor xmm7,xmm0 - dec eax + pxor xmm12,xmm0 + pxor xmm13,xmm0 DB 102,15,56,222,241 - pxor xmm8,xmm0 DB 102,15,56,222,249 - pxor xmm9,xmm0 - movups xmm0,XMMWORD PTR[rcx] + pxor xmm14,xmm0 + pxor xmm15,xmm0 DB 102,68,15,56,222,193 DB 102,68,15,56,222,201 - movups xmm1,XMMWORD PTR[16+rcx] - - call $L$dec_loop8_enter + movdqu xmm1,XMMWORD PTR[80+rdi] + +DB 102,65,15,56,223,210 + movdqu xmm10,XMMWORD PTR[96+rdi] + pxor xmm1,xmm0 +DB 102,65,15,56,223,219 + pxor xmm10,xmm0 + movdqu xmm0,XMMWORD PTR[112+rdi] +DB 102,65,15,56,223,228 + lea rdi,QWORD PTR[128+rdi] + movdqu xmm11,XMMWORD PTR[r11] +DB 102,65,15,56,223,237 +DB 102,65,15,56,223,246 + movdqu xmm12,XMMWORD PTR[16+r11] + movdqu xmm13,XMMWORD PTR[32+r11] +DB 102,65,15,56,223,255 +DB 102,68,15,56,223,193 + movdqu xmm14,XMMWORD PTR[48+r11] + movdqu xmm15,XMMWORD PTR[64+r11] +DB 102,69,15,56,223,202 + movdqa xmm10,xmm0 + movdqu xmm1,XMMWORD PTR[80+r11] + movups xmm0,XMMWORD PTR[((-112))+rcx] - movups xmm1,XMMWORD PTR[rdi] - movups xmm0,XMMWORD PTR[16+rdi] - xorps xmm2,XMMWORD PTR[64+rsp] - xorps xmm3,xmm1 - movups xmm1,XMMWORD PTR[32+rdi] - xorps xmm4,xmm0 - movups xmm0,XMMWORD PTR[48+rdi] - xorps xmm5,xmm1 - movups xmm1,XMMWORD PTR[64+rdi] - xorps xmm6,xmm0 - movups xmm0,XMMWORD PTR[80+rdi] - xorps xmm7,xmm1 - movups xmm1,XMMWORD PTR[96+rdi] - xorps xmm8,xmm0 - movups xmm0,XMMWORD PTR[112+rdi] - xorps xmm9,xmm1 movups XMMWORD PTR[rsi],xmm2 + movdqa xmm2,xmm11 movups XMMWORD PTR[16+rsi],xmm3 + movdqa xmm3,xmm12 movups XMMWORD PTR[32+rsi],xmm4 + movdqa xmm4,xmm13 movups XMMWORD PTR[48+rsi],xmm5 - mov eax,r10d + movdqa xmm5,xmm14 movups XMMWORD PTR[64+rsi],xmm6 - mov rcx,r11 + movdqa xmm6,xmm15 movups XMMWORD PTR[80+rsi],xmm7 - lea rdi,QWORD PTR[128+rdi] + movdqa xmm7,xmm1 movups XMMWORD PTR[96+rsi],xmm8 lea rsi,QWORD PTR[112+rsi] + sub rdx,080h ja $L$cbc_dec_loop8 movaps xmm2,xmm9 - movaps xmm9,xmm0 + lea rcx,QWORD PTR[((-112))+rcx] add rdx,070h jle $L$cbc_dec_tail_collected - movups XMMWORD PTR[rsi],xmm2 - lea eax,DWORD PTR[1+r10*1+r10] + movups XMMWORD PTR[rsi],xmm9 + lea rsi,QWORD PTR[16+rsi] + cmp rdx,050h + jbe $L$cbc_dec_tail + + movaps xmm2,xmm11 +$L$cbc_dec_six_or_seven:: + cmp rdx,060h + ja $L$cbc_dec_seven + + movaps xmm8,xmm7 + call _aesni_decrypt6 + pxor xmm2,xmm10 + movaps xmm10,xmm8 + pxor xmm3,xmm11 + movdqu XMMWORD PTR[rsi],xmm2 + pxor xmm4,xmm12 + movdqu XMMWORD PTR[16+rsi],xmm3 + pxor xmm5,xmm13 + movdqu XMMWORD PTR[32+rsi],xmm4 + pxor xmm6,xmm14 + movdqu XMMWORD PTR[48+rsi],xmm5 + pxor xmm7,xmm15 + movdqu XMMWORD PTR[64+rsi],xmm6 + lea rsi,QWORD PTR[80+rsi] + movdqa xmm2,xmm7 + jmp $L$cbc_dec_tail_collected + +ALIGN 16 +$L$cbc_dec_seven:: + movups xmm8,XMMWORD PTR[96+rdi] + xorps xmm9,xmm9 + call _aesni_decrypt8 + movups xmm9,XMMWORD PTR[80+rdi] + pxor xmm2,xmm10 + movups xmm10,XMMWORD PTR[96+rdi] + pxor xmm3,xmm11 + movdqu XMMWORD PTR[rsi],xmm2 + pxor xmm4,xmm12 + movdqu XMMWORD PTR[16+rsi],xmm3 + pxor xmm5,xmm13 + movdqu XMMWORD PTR[32+rsi],xmm4 + pxor xmm6,xmm14 + movdqu XMMWORD PTR[48+rsi],xmm5 + pxor xmm7,xmm15 + movdqu XMMWORD PTR[64+rsi],xmm6 + pxor xmm8,xmm9 + movdqu XMMWORD PTR[80+rsi],xmm7 + lea rsi,QWORD PTR[96+rsi] + movdqa xmm2,xmm8 + jmp $L$cbc_dec_tail_collected + +ALIGN 16 +$L$cbc_dec_loop6:: + movups XMMWORD PTR[rsi],xmm7 + lea rsi,QWORD PTR[16+rsi] + movdqu xmm2,XMMWORD PTR[rdi] + movdqu xmm3,XMMWORD PTR[16+rdi] + movdqa xmm11,xmm2 + movdqu xmm4,XMMWORD PTR[32+rdi] + movdqa xmm12,xmm3 + movdqu xmm5,XMMWORD PTR[48+rdi] + movdqa xmm13,xmm4 + movdqu xmm6,XMMWORD PTR[64+rdi] + movdqa xmm14,xmm5 + movdqu xmm7,XMMWORD PTR[80+rdi] + movdqa xmm15,xmm6 +$L$cbc_dec_loop6_enter:: + lea rdi,QWORD PTR[96+rdi] + movdqa xmm8,xmm7 + + call _aesni_decrypt6 + + pxor xmm2,xmm10 + movdqa xmm10,xmm8 + pxor xmm3,xmm11 + movdqu XMMWORD PTR[rsi],xmm2 + pxor xmm4,xmm12 + movdqu XMMWORD PTR[16+rsi],xmm3 + pxor xmm5,xmm13 + movdqu XMMWORD PTR[32+rsi],xmm4 + pxor xmm6,xmm14 + mov rcx,r11 + movdqu XMMWORD PTR[48+rsi],xmm5 + pxor xmm7,xmm15 + mov eax,r10d + movdqu XMMWORD PTR[64+rsi],xmm6 + lea rsi,QWORD PTR[80+rsi] + sub rdx,060h + ja $L$cbc_dec_loop6 + + movdqa xmm2,xmm7 + add rdx,050h + jle $L$cbc_dec_tail_collected + movups XMMWORD PTR[rsi],xmm7 lea rsi,QWORD PTR[16+rsi] + $L$cbc_dec_tail:: movups xmm2,XMMWORD PTR[rdi] - movaps xmm8,xmm2 - cmp rdx,010h + sub rdx,010h jbe $L$cbc_dec_one movups xmm3,XMMWORD PTR[16+rdi] - movaps xmm7,xmm3 - cmp rdx,020h + movaps xmm11,xmm2 + sub rdx,010h jbe $L$cbc_dec_two movups xmm4,XMMWORD PTR[32+rdi] - movaps xmm6,xmm4 - cmp rdx,030h + movaps xmm12,xmm3 + sub rdx,010h jbe $L$cbc_dec_three movups xmm5,XMMWORD PTR[48+rdi] - cmp rdx,040h + movaps xmm13,xmm4 + sub rdx,010h jbe $L$cbc_dec_four movups xmm6,XMMWORD PTR[64+rdi] - cmp rdx,050h - jbe $L$cbc_dec_five - - movups xmm7,XMMWORD PTR[80+rdi] - cmp rdx,060h - jbe $L$cbc_dec_six - - movups xmm8,XMMWORD PTR[96+rdi] - movaps XMMWORD PTR[64+rsp],xmm9 - call _aesni_decrypt8 - movups xmm1,XMMWORD PTR[rdi] - movups xmm0,XMMWORD PTR[16+rdi] - xorps xmm2,XMMWORD PTR[64+rsp] - xorps xmm3,xmm1 - movups xmm1,XMMWORD PTR[32+rdi] - xorps xmm4,xmm0 - movups xmm0,XMMWORD PTR[48+rdi] - xorps xmm5,xmm1 - movups xmm1,XMMWORD PTR[64+rdi] - xorps xmm6,xmm0 - movups xmm0,XMMWORD PTR[80+rdi] - xorps xmm7,xmm1 - movups xmm9,XMMWORD PTR[96+rdi] - xorps xmm8,xmm0 - movups XMMWORD PTR[rsi],xmm2 - movups XMMWORD PTR[16+rsi],xmm3 - movups XMMWORD PTR[32+rsi],xmm4 - movups XMMWORD PTR[48+rsi],xmm5 - movups XMMWORD PTR[64+rsi],xmm6 - movups XMMWORD PTR[80+rsi],xmm7 - lea rsi,QWORD PTR[96+rsi] - movaps xmm2,xmm8 - sub rdx,070h + movaps xmm14,xmm5 + movaps xmm15,xmm6 + xorps xmm7,xmm7 + call _aesni_decrypt6 + pxor xmm2,xmm10 + movaps xmm10,xmm15 + pxor xmm3,xmm11 + movdqu XMMWORD PTR[rsi],xmm2 + pxor xmm4,xmm12 + movdqu XMMWORD PTR[16+rsi],xmm3 + pxor xmm5,xmm13 + movdqu XMMWORD PTR[32+rsi],xmm4 + pxor xmm6,xmm14 + movdqu XMMWORD PTR[48+rsi],xmm5 + lea rsi,QWORD PTR[64+rsi] + movdqa xmm2,xmm6 + sub rdx,010h jmp $L$cbc_dec_tail_collected + ALIGN 16 $L$cbc_dec_one:: + movaps xmm11,xmm2 movups xmm0,XMMWORD PTR[rcx] movups xmm1,XMMWORD PTR[16+rcx] lea rcx,QWORD PTR[32+rcx] @@ -2409,119 +3064,79 @@ DB 102,15,56,222,209 movups xmm1,XMMWORD PTR[rcx] lea rcx,QWORD PTR[16+rcx] jnz $L$oop_dec1_16 - DB 102,15,56,223,209 - xorps xmm2,xmm9 - movaps xmm9,xmm8 - sub rdx,010h + xorps xmm2,xmm10 + movaps xmm10,xmm11 jmp $L$cbc_dec_tail_collected ALIGN 16 $L$cbc_dec_two:: - xorps xmm4,xmm4 - call _aesni_decrypt3 - xorps xmm2,xmm9 - xorps xmm3,xmm8 - movups XMMWORD PTR[rsi],xmm2 - movaps xmm9,xmm7 - movaps xmm2,xmm3 + movaps xmm12,xmm3 + call _aesni_decrypt2 + pxor xmm2,xmm10 + movaps xmm10,xmm12 + pxor xmm3,xmm11 + movdqu XMMWORD PTR[rsi],xmm2 + movdqa xmm2,xmm3 lea rsi,QWORD PTR[16+rsi] - sub rdx,020h jmp $L$cbc_dec_tail_collected ALIGN 16 $L$cbc_dec_three:: + movaps xmm13,xmm4 call _aesni_decrypt3 - xorps xmm2,xmm9 - xorps xmm3,xmm8 - movups XMMWORD PTR[rsi],xmm2 - xorps xmm4,xmm7 - movups XMMWORD PTR[16+rsi],xmm3 - movaps xmm9,xmm6 - movaps xmm2,xmm4 + pxor xmm2,xmm10 + movaps xmm10,xmm13 + pxor xmm3,xmm11 + movdqu XMMWORD PTR[rsi],xmm2 + pxor xmm4,xmm12 + movdqu XMMWORD PTR[16+rsi],xmm3 + movdqa xmm2,xmm4 lea rsi,QWORD PTR[32+rsi] - sub rdx,030h jmp $L$cbc_dec_tail_collected ALIGN 16 $L$cbc_dec_four:: + movaps xmm14,xmm5 call _aesni_decrypt4 - xorps xmm2,xmm9 - movups xmm9,XMMWORD PTR[48+rdi] - xorps xmm3,xmm8 - movups XMMWORD PTR[rsi],xmm2 - xorps xmm4,xmm7 - movups XMMWORD PTR[16+rsi],xmm3 - xorps xmm5,xmm6 - movups XMMWORD PTR[32+rsi],xmm4 - movaps xmm2,xmm5 + pxor xmm2,xmm10 + movaps xmm10,xmm14 + pxor xmm3,xmm11 + movdqu XMMWORD PTR[rsi],xmm2 + pxor xmm4,xmm12 + movdqu XMMWORD PTR[16+rsi],xmm3 + pxor xmm5,xmm13 + movdqu XMMWORD PTR[32+rsi],xmm4 + movdqa xmm2,xmm5 lea rsi,QWORD PTR[48+rsi] - sub rdx,040h - jmp $L$cbc_dec_tail_collected -ALIGN 16 -$L$cbc_dec_five:: - xorps xmm7,xmm7 - call _aesni_decrypt6 - movups xmm1,XMMWORD PTR[16+rdi] - movups xmm0,XMMWORD PTR[32+rdi] - xorps xmm2,xmm9 - xorps xmm3,xmm8 - xorps xmm4,xmm1 - movups xmm1,XMMWORD PTR[48+rdi] - xorps xmm5,xmm0 - movups xmm9,XMMWORD PTR[64+rdi] - xorps xmm6,xmm1 - movups XMMWORD PTR[rsi],xmm2 - movups XMMWORD PTR[16+rsi],xmm3 - movups XMMWORD PTR[32+rsi],xmm4 - movups XMMWORD PTR[48+rsi],xmm5 - lea rsi,QWORD PTR[64+rsi] - movaps xmm2,xmm6 - sub rdx,050h - jmp $L$cbc_dec_tail_collected -ALIGN 16 -$L$cbc_dec_six:: - call _aesni_decrypt6 - movups xmm1,XMMWORD PTR[16+rdi] - movups xmm0,XMMWORD PTR[32+rdi] - xorps xmm2,xmm9 - xorps xmm3,xmm8 - xorps xmm4,xmm1 - movups xmm1,XMMWORD PTR[48+rdi] - xorps xmm5,xmm0 - movups xmm0,XMMWORD PTR[64+rdi] - xorps xmm6,xmm1 - movups xmm9,XMMWORD PTR[80+rdi] - xorps xmm7,xmm0 - movups XMMWORD PTR[rsi],xmm2 - movups XMMWORD PTR[16+rsi],xmm3 - movups XMMWORD PTR[32+rsi],xmm4 - movups XMMWORD PTR[48+rsi],xmm5 - movups XMMWORD PTR[64+rsi],xmm6 - lea rsi,QWORD PTR[80+rsi] - movaps xmm2,xmm7 - sub rdx,060h jmp $L$cbc_dec_tail_collected + ALIGN 16 $L$cbc_dec_tail_collected:: + movups XMMWORD PTR[r8],xmm10 and rdx,15 - movups XMMWORD PTR[r8],xmm9 jnz $L$cbc_dec_tail_partial movups XMMWORD PTR[rsi],xmm2 jmp $L$cbc_dec_ret ALIGN 16 $L$cbc_dec_tail_partial:: - movaps XMMWORD PTR[64+rsp],xmm2 + movaps XMMWORD PTR[rsp],xmm2 mov rcx,16 mov rdi,rsi sub rcx,rdx - lea rsi,QWORD PTR[64+rsp] + lea rsi,QWORD PTR[rsp] DD 09066A4F3h - $L$cbc_dec_ret:: - movaps xmm6,XMMWORD PTR[rsp] - movaps xmm7,XMMWORD PTR[16+rsp] - movaps xmm8,XMMWORD PTR[32+rsp] - movaps xmm9,XMMWORD PTR[48+rsp] - lea rsp,QWORD PTR[88+rsp] + movaps xmm6,XMMWORD PTR[16+rsp] + movaps xmm7,XMMWORD PTR[32+rsp] + movaps xmm8,XMMWORD PTR[48+rsp] + movaps xmm9,XMMWORD PTR[64+rsp] + movaps xmm10,XMMWORD PTR[80+rsp] + movaps xmm11,XMMWORD PTR[96+rsp] + movaps xmm12,XMMWORD PTR[112+rsp] + movaps xmm13,XMMWORD PTR[128+rsp] + movaps xmm14,XMMWORD PTR[144+rsp] + movaps xmm15,XMMWORD PTR[160+rsp] + lea rsp,QWORD PTR[rbp] + pop rbp $L$cbc_ret:: mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue mov rsi,QWORD PTR[16+rsp] @@ -2533,7 +3148,6 @@ PUBLIC aesni_set_decrypt_key ALIGN 16 aesni_set_decrypt_key PROC PUBLIC DB 048h,083h,0ECh,008h - call __aesni_set_encrypt_key shl edx,4 test eax,eax @@ -2573,7 +3187,6 @@ ALIGN 16 aesni_set_encrypt_key PROC PUBLIC __aesni_set_encrypt_key:: DB 048h,083h,0ECh,008h - mov rax,-1 test rcx,rcx jz $L$enc_key_ret @@ -2769,6 +3382,8 @@ $L$increment64:: DD 1,0,0,0 $L$xts_magic:: DD 087h,0,1,0 +$L$increment1:: +DB 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1 DB 65,69,83,32,102,111,114,32,73,110,116,101,108,32,65,69 DB 83,45,78,73,44,32,67,82,89,80,84,79,71,65,77,83 @@ -2812,7 +3427,6 @@ ecb_ccm64_se_handler PROC PRIVATE lea rdi,QWORD PTR[512+r8] mov ecx,8 DD 0a548f3fch - lea rax,QWORD PTR[88+rax] jmp $L$common_seh_tail @@ -2820,44 +3434,7 @@ ecb_ccm64_se_handler ENDP ALIGN 16 -ctr32_se_handler PROC PRIVATE - push rsi - push rdi - push rbx - push rbp - push r12 - push r13 - push r14 - push r15 - pushfq - sub rsp,64 - - mov rax,QWORD PTR[120+r8] - mov rbx,QWORD PTR[248+r8] - - lea r10,QWORD PTR[$L$ctr32_body] - cmp rbx,r10 - jb $L$common_seh_tail - - mov rax,QWORD PTR[152+r8] - - lea r10,QWORD PTR[$L$ctr32_ret] - cmp rbx,r10 - jae $L$common_seh_tail - - lea rsi,QWORD PTR[32+rax] - lea rdi,QWORD PTR[512+r8] - mov ecx,20 - DD 0a548f3fch - - lea rax,QWORD PTR[200+rax] - - jmp $L$common_seh_tail -ctr32_se_handler ENDP - - -ALIGN 16 -xts_se_handler PROC PRIVATE +ctr_xts_se_handler PROC PRIVATE push rsi push rdi push rbx @@ -2887,15 +3464,14 @@ xts_se_handler PROC PRIVATE cmp rbx,r10 jae $L$common_seh_tail - lea rsi,QWORD PTR[96+rax] + mov rax,QWORD PTR[160+r8] + lea rsi,QWORD PTR[((-160))+rax] lea rdi,QWORD PTR[512+r8] mov ecx,20 DD 0a548f3fch - lea rax,QWORD PTR[((104+160))+rax] - - jmp $L$common_seh_tail -xts_se_handler ENDP + jmp $L$common_rbp_tail +ctr_xts_se_handler ENDP ALIGN 16 cbc_se_handler PROC PRIVATE @@ -2925,12 +3501,16 @@ cbc_se_handler PROC PRIVATE cmp rbx,r10 jae $L$common_seh_tail - lea rsi,QWORD PTR[rax] + lea rsi,QWORD PTR[16+rax] lea rdi,QWORD PTR[512+r8] - mov ecx,8 + mov ecx,20 DD 0a548f3fch - lea rax,QWORD PTR[88+rax] +$L$common_rbp_tail:: + mov rax,QWORD PTR[160+r8] + mov rbp,QWORD PTR[rax] + lea rax,QWORD PTR[8+rax] + mov QWORD PTR[160+r8],rbp jmp $L$common_seh_tail $L$restore_cbc_rax:: @@ -2948,7 +3528,6 @@ $L$common_seh_tail:: mov ecx,154 DD 0a548f3fch - mov rsi,r9 xor rcx,rcx mov rdx,QWORD PTR[8+rsi] @@ -3021,30 +3600,26 @@ $L$SEH_info_ecb:: DB 9,0,0,0 DD imagerel ecb_ccm64_se_handler DD imagerel $L$ecb_enc_body,imagerel $L$ecb_enc_ret - $L$SEH_info_ccm64_enc:: DB 9,0,0,0 DD imagerel ecb_ccm64_se_handler DD imagerel $L$ccm64_enc_body,imagerel $L$ccm64_enc_ret - $L$SEH_info_ccm64_dec:: DB 9,0,0,0 DD imagerel ecb_ccm64_se_handler DD imagerel $L$ccm64_dec_body,imagerel $L$ccm64_dec_ret - $L$SEH_info_ctr32:: DB 9,0,0,0 - DD imagerel ctr32_se_handler + DD imagerel ctr_xts_se_handler + DD imagerel $L$ctr32_body,imagerel $L$ctr32_epilogue $L$SEH_info_xts_enc:: DB 9,0,0,0 - DD imagerel xts_se_handler + DD imagerel ctr_xts_se_handler DD imagerel $L$xts_enc_body,imagerel $L$xts_enc_epilogue - $L$SEH_info_xts_dec:: DB 9,0,0,0 - DD imagerel xts_se_handler + DD imagerel ctr_xts_se_handler DD imagerel $L$xts_dec_body,imagerel $L$xts_dec_epilogue - $L$SEH_info_cbc:: DB 9,0,0,0 DD imagerel cbc_se_handler @@ -3052,6 +3627,5 @@ $L$SEH_info_key:: DB 001h,004h,001h,000h DB 004h,002h,000h,000h - .xdata ENDS END diff --git a/deps/openssl/asm/x64-win32-masm/aes/bsaes-x86_64.asm b/deps/openssl/asm/x64-win32-masm/aes/bsaes-x86_64.asm index bea2f038ca6482..3346a7e25b4657 100644 --- a/deps/openssl/asm/x64-win32-masm/aes/bsaes-x86_64.asm +++ b/deps/openssl/asm/x64-win32-masm/aes/bsaes-x86_64.asm @@ -1,5 +1,5 @@ OPTION DOTNAME -.text$ SEGMENT ALIGN(64) 'CODE' +.text$ SEGMENT ALIGN(256) 'CODE' EXTERN asm_AES_encrypt:NEAR EXTERN asm_AES_decrypt:NEAR @@ -14,18 +14,18 @@ _bsaes_encrypt8 PROC PRIVATE movdqa xmm7,XMMWORD PTR[80+r11] pxor xmm15,xmm8 pxor xmm0,xmm8 -DB 102,68,15,56,0,255 pxor xmm1,xmm8 -DB 102,15,56,0,199 pxor xmm2,xmm8 -DB 102,15,56,0,207 +DB 102,68,15,56,0,255 +DB 102,15,56,0,199 pxor xmm3,xmm8 -DB 102,15,56,0,215 pxor xmm4,xmm8 -DB 102,15,56,0,223 +DB 102,15,56,0,207 +DB 102,15,56,0,215 pxor xmm5,xmm8 -DB 102,15,56,0,231 pxor xmm6,xmm8 +DB 102,15,56,0,223 +DB 102,15,56,0,231 DB 102,15,56,0,239 DB 102,15,56,0,247 _bsaes_encrypt8_bitslice:: @@ -122,21 +122,21 @@ ALIGN 16 $L$enc_loop:: pxor xmm15,XMMWORD PTR[rax] pxor xmm0,XMMWORD PTR[16+rax] -DB 102,68,15,56,0,255 pxor xmm1,XMMWORD PTR[32+rax] -DB 102,15,56,0,199 pxor xmm2,XMMWORD PTR[48+rax] -DB 102,15,56,0,207 +DB 102,68,15,56,0,255 +DB 102,15,56,0,199 pxor xmm3,XMMWORD PTR[64+rax] -DB 102,15,56,0,215 pxor xmm4,XMMWORD PTR[80+rax] -DB 102,15,56,0,223 +DB 102,15,56,0,207 +DB 102,15,56,0,215 pxor xmm5,XMMWORD PTR[96+rax] -DB 102,15,56,0,231 pxor xmm6,XMMWORD PTR[112+rax] +DB 102,15,56,0,223 +DB 102,15,56,0,231 DB 102,15,56,0,239 - lea rax,QWORD PTR[128+rax] DB 102,15,56,0,247 + lea rax,QWORD PTR[128+rax] $L$enc_sbox:: pxor xmm4,xmm5 pxor xmm1,xmm0 @@ -486,18 +486,18 @@ _bsaes_decrypt8 PROC PRIVATE movdqa xmm7,XMMWORD PTR[((-48))+r11] pxor xmm15,xmm8 pxor xmm0,xmm8 -DB 102,68,15,56,0,255 pxor xmm1,xmm8 -DB 102,15,56,0,199 pxor xmm2,xmm8 -DB 102,15,56,0,207 +DB 102,68,15,56,0,255 +DB 102,15,56,0,199 pxor xmm3,xmm8 -DB 102,15,56,0,215 pxor xmm4,xmm8 -DB 102,15,56,0,223 +DB 102,15,56,0,207 +DB 102,15,56,0,215 pxor xmm5,xmm8 -DB 102,15,56,0,231 pxor xmm6,xmm8 +DB 102,15,56,0,223 +DB 102,15,56,0,231 DB 102,15,56,0,239 DB 102,15,56,0,247 movdqa xmm7,XMMWORD PTR[r11] @@ -593,21 +593,21 @@ ALIGN 16 $L$dec_loop:: pxor xmm15,XMMWORD PTR[rax] pxor xmm0,XMMWORD PTR[16+rax] -DB 102,68,15,56,0,255 pxor xmm1,XMMWORD PTR[32+rax] -DB 102,15,56,0,199 pxor xmm2,XMMWORD PTR[48+rax] -DB 102,15,56,0,207 +DB 102,68,15,56,0,255 +DB 102,15,56,0,199 pxor xmm3,XMMWORD PTR[64+rax] -DB 102,15,56,0,215 pxor xmm4,XMMWORD PTR[80+rax] -DB 102,15,56,0,223 +DB 102,15,56,0,207 +DB 102,15,56,0,215 pxor xmm5,XMMWORD PTR[96+rax] -DB 102,15,56,0,231 pxor xmm6,XMMWORD PTR[112+rax] +DB 102,15,56,0,223 +DB 102,15,56,0,231 DB 102,15,56,0,239 - lea rax,QWORD PTR[128+rax] DB 102,15,56,0,247 + lea rax,QWORD PTR[128+rax] $L$dec_sbox:: pxor xmm2,xmm3 @@ -1300,7 +1300,6 @@ $L$cbc_dec_one:: lea rdx,QWORD PTR[32+rbp] lea r8,QWORD PTR[r15] call asm_AES_decrypt - pxor xmm14,XMMWORD PTR[32+rbp] movdqu XMMWORD PTR[r13],xmm14 movdqa xmm14,xmm15 @@ -1422,21 +1421,21 @@ $L$ctr_enc_loop:: movdqa xmm7,XMMWORD PTR[((-16))+r11] pxor xmm15,xmm8 pxor xmm0,xmm8 -DB 102,68,15,56,0,255 pxor xmm1,xmm8 -DB 102,15,56,0,199 pxor xmm2,xmm8 -DB 102,15,56,0,207 +DB 102,68,15,56,0,255 +DB 102,15,56,0,199 pxor xmm3,xmm8 -DB 102,15,56,0,215 pxor xmm4,xmm8 -DB 102,15,56,0,223 +DB 102,15,56,0,207 +DB 102,15,56,0,215 pxor xmm5,xmm8 -DB 102,15,56,0,231 pxor xmm6,xmm8 +DB 102,15,56,0,223 +DB 102,15,56,0,231 DB 102,15,56,0,239 - lea r11,QWORD PTR[$L$BS0] DB 102,15,56,0,247 + lea r11,QWORD PTR[$L$BS0] mov r10d,ebx call _bsaes_encrypt8_bitslice @@ -1601,7 +1600,6 @@ $L$xts_enc_body:: lea r8,QWORD PTR[r10] call asm_AES_encrypt - mov eax,DWORD PTR[240+r15] mov rbx,r14 @@ -1971,7 +1969,6 @@ $L$xts_enc_1:: lea rdx,QWORD PTR[32+rbp] lea r8,QWORD PTR[r15] call asm_AES_encrypt - pxor xmm15,XMMWORD PTR[32+rbp] @@ -2005,7 +2002,6 @@ $L$xts_enc_steal:: movdqa XMMWORD PTR[32+rbp],xmm15 lea r8,QWORD PTR[r15] call asm_AES_encrypt - pxor xmm6,XMMWORD PTR[32+rbp] movdqu XMMWORD PTR[(-16)+r13],xmm6 @@ -2081,7 +2077,6 @@ $L$xts_dec_body:: lea r8,QWORD PTR[r10] call asm_AES_encrypt - mov eax,DWORD PTR[240+r15] mov rbx,r14 @@ -2458,7 +2453,6 @@ $L$xts_dec_1:: lea rdx,QWORD PTR[32+rbp] lea r8,QWORD PTR[r15] call asm_AES_decrypt - pxor xmm15,XMMWORD PTR[32+rbp] @@ -2490,7 +2484,6 @@ $L$xts_dec_done:: movdqa XMMWORD PTR[32+rbp],xmm15 lea r8,QWORD PTR[r15] call asm_AES_decrypt - pxor xmm6,XMMWORD PTR[32+rbp] mov rdx,r13 movdqu XMMWORD PTR[r13],xmm6 @@ -2512,7 +2505,6 @@ $L$xts_dec_steal:: movdqa XMMWORD PTR[32+rbp],xmm15 lea r8,QWORD PTR[r15] call asm_AES_decrypt - pxor xmm5,XMMWORD PTR[32+rbp] movdqu XMMWORD PTR[r13],xmm5 @@ -2647,7 +2639,6 @@ se_handler PROC PRIVATE lea rdi,QWORD PTR[512+r8] mov ecx,20 DD 0a548f3fch - lea rax,QWORD PTR[160+rax] mov rbp,QWORD PTR[112+rax] @@ -2672,7 +2663,6 @@ $L$in_prologue:: mov ecx,154 DD 0a548f3fch - mov rsi,r9 xor rcx,rcx mov rdx,QWORD PTR[8+rsi] @@ -2727,22 +2717,18 @@ $L$cbc_dec_info:: DB 9,0,0,0 DD imagerel se_handler DD imagerel $L$cbc_dec_body,imagerel $L$cbc_dec_epilogue - $L$ctr_enc_info:: DB 9,0,0,0 DD imagerel se_handler DD imagerel $L$ctr_enc_body,imagerel $L$ctr_enc_epilogue - $L$xts_enc_info:: DB 9,0,0,0 DD imagerel se_handler DD imagerel $L$xts_enc_body,imagerel $L$xts_enc_epilogue - $L$xts_dec_info:: DB 9,0,0,0 DD imagerel se_handler DD imagerel $L$xts_dec_body,imagerel $L$xts_dec_epilogue - .xdata ENDS END diff --git a/deps/openssl/asm/x64-win32-masm/aes/vpaes-x86_64.asm b/deps/openssl/asm/x64-win32-masm/aes/vpaes-x86_64.asm index d904c807523343..292f64d58debb1 100644 --- a/deps/openssl/asm/x64-win32-masm/aes/vpaes-x86_64.asm +++ b/deps/openssl/asm/x64-win32-masm/aes/vpaes-x86_64.asm @@ -1,5 +1,5 @@ OPTION DOTNAME -.text$ SEGMENT ALIGN(64) 'CODE' +.text$ SEGMENT ALIGN(256) 'CODE' @@ -32,8 +32,8 @@ DB 102,15,56,0,208 movdqa xmm0,XMMWORD PTR[(($L$k_ipt+16))] DB 102,15,56,0,193 pxor xmm2,xmm5 - pxor xmm0,xmm2 add r9,16 + pxor xmm0,xmm2 lea r10,QWORD PTR[$L$k_mc_backward] jmp $L$enc_entry @@ -41,19 +41,19 @@ ALIGN 16 $L$enc_loop:: movdqa xmm4,xmm13 -DB 102,15,56,0,226 - pxor xmm4,xmm5 movdqa xmm0,xmm12 +DB 102,15,56,0,226 DB 102,15,56,0,195 - pxor xmm0,xmm4 + pxor xmm4,xmm5 movdqa xmm5,xmm15 -DB 102,15,56,0,234 + pxor xmm0,xmm4 movdqa xmm1,XMMWORD PTR[((-64))+r10*1+r11] +DB 102,15,56,0,234 + movdqa xmm4,XMMWORD PTR[r10*1+r11] movdqa xmm2,xmm14 DB 102,15,56,0,211 - pxor xmm2,xmm5 - movdqa xmm4,XMMWORD PTR[r10*1+r11] movdqa xmm3,xmm0 + pxor xmm2,xmm5 DB 102,15,56,0,193 add r9,16 pxor xmm0,xmm2 @@ -62,30 +62,30 @@ DB 102,15,56,0,220 pxor xmm3,xmm0 DB 102,15,56,0,193 and r11,030h - pxor xmm0,xmm3 sub rax,1 + pxor xmm0,xmm3 $L$enc_entry:: movdqa xmm1,xmm9 + movdqa xmm5,xmm11 pandn xmm1,xmm0 psrld xmm1,4 pand xmm0,xmm9 - movdqa xmm5,xmm11 DB 102,15,56,0,232 - pxor xmm0,xmm1 movdqa xmm3,xmm10 + pxor xmm0,xmm1 DB 102,15,56,0,217 - pxor xmm3,xmm5 movdqa xmm4,xmm10 + pxor xmm3,xmm5 DB 102,15,56,0,224 - pxor xmm4,xmm5 movdqa xmm2,xmm10 + pxor xmm4,xmm5 DB 102,15,56,0,211 - pxor xmm2,xmm0 movdqa xmm3,xmm10 - movdqu xmm5,XMMWORD PTR[r9] + pxor xmm2,xmm0 DB 102,15,56,0,220 + movdqu xmm5,XMMWORD PTR[r9] pxor xmm3,xmm1 jnz $L$enc_loop @@ -138,62 +138,61 @@ $L$dec_loop:: movdqa xmm4,XMMWORD PTR[((-32))+r10] + movdqa xmm1,XMMWORD PTR[((-16))+r10] DB 102,15,56,0,226 - pxor xmm4,xmm0 - movdqa xmm0,XMMWORD PTR[((-16))+r10] -DB 102,15,56,0,195 +DB 102,15,56,0,203 pxor xmm0,xmm4 - add r9,16 - -DB 102,15,56,0,197 movdqa xmm4,XMMWORD PTR[r10] -DB 102,15,56,0,226 - pxor xmm4,xmm0 - movdqa xmm0,XMMWORD PTR[16+r10] -DB 102,15,56,0,195 - pxor xmm0,xmm4 - sub rax,1 + pxor xmm0,xmm1 + movdqa xmm1,XMMWORD PTR[16+r10] -DB 102,15,56,0,197 - movdqa xmm4,XMMWORD PTR[32+r10] DB 102,15,56,0,226 - pxor xmm4,xmm0 - movdqa xmm0,XMMWORD PTR[48+r10] -DB 102,15,56,0,195 +DB 102,15,56,0,197 +DB 102,15,56,0,203 pxor xmm0,xmm4 + movdqa xmm4,XMMWORD PTR[32+r10] + pxor xmm0,xmm1 + movdqa xmm1,XMMWORD PTR[48+r10] +DB 102,15,56,0,226 DB 102,15,56,0,197 +DB 102,15,56,0,203 + pxor xmm0,xmm4 movdqa xmm4,XMMWORD PTR[64+r10] + pxor xmm0,xmm1 + movdqa xmm1,XMMWORD PTR[80+r10] + DB 102,15,56,0,226 - pxor xmm4,xmm0 - movdqa xmm0,XMMWORD PTR[80+r10] -DB 102,15,56,0,195 +DB 102,15,56,0,197 +DB 102,15,56,0,203 pxor xmm0,xmm4 - + add r9,16 DB 102,15,58,15,237,12 + pxor xmm0,xmm1 + sub rax,1 $L$dec_entry:: movdqa xmm1,xmm9 pandn xmm1,xmm0 + movdqa xmm2,xmm11 psrld xmm1,4 pand xmm0,xmm9 - movdqa xmm2,xmm11 DB 102,15,56,0,208 - pxor xmm0,xmm1 movdqa xmm3,xmm10 + pxor xmm0,xmm1 DB 102,15,56,0,217 - pxor xmm3,xmm2 movdqa xmm4,xmm10 + pxor xmm3,xmm2 DB 102,15,56,0,224 pxor xmm4,xmm2 movdqa xmm2,xmm10 DB 102,15,56,0,211 - pxor xmm2,xmm0 movdqa xmm3,xmm10 + pxor xmm2,xmm0 DB 102,15,56,0,220 - pxor xmm3,xmm1 movdqu xmm0,XMMWORD PTR[r9] + pxor xmm3,xmm1 jnz $L$dec_loop @@ -222,7 +221,6 @@ _vpaes_schedule_core PROC PRIVATE call _vpaes_preheat - movdqa xmm8,XMMWORD PTR[$L$k_rcon] movdqu xmm0,XMMWORD PTR[rdi] @@ -269,7 +267,6 @@ $L$oop_schedule_128:: dec rsi jz $L$schedule_mangle_last call _vpaes_schedule_mangle - jmp $L$oop_schedule_128 @@ -291,7 +288,6 @@ ALIGN 16 $L$schedule_192:: movdqu xmm0,XMMWORD PTR[8+rdi] call _vpaes_schedule_transform - movdqa xmm6,xmm0 pxor xmm4,xmm4 movhlps xmm6,xmm4 @@ -301,15 +297,12 @@ $L$oop_schedule_192:: call _vpaes_schedule_round DB 102,15,58,15,198,8 call _vpaes_schedule_mangle - call _vpaes_schedule_192_smear call _vpaes_schedule_mangle - call _vpaes_schedule_round dec rsi jz $L$schedule_mangle_last call _vpaes_schedule_mangle - call _vpaes_schedule_192_smear jmp $L$oop_schedule_192 @@ -327,12 +320,10 @@ ALIGN 16 $L$schedule_256:: movdqu xmm0,XMMWORD PTR[16+rdi] call _vpaes_schedule_transform - mov esi,7 $L$oop_schedule_256:: call _vpaes_schedule_mangle - movdqa xmm6,xmm0 @@ -342,7 +333,6 @@ $L$oop_schedule_256:: call _vpaes_schedule_mangle - pshufd xmm0,xmm0,0FFh movdqa xmm5,xmm7 movdqa xmm7,xmm6 @@ -379,7 +369,6 @@ $L$schedule_mangle_last_dec:: add rdx,-16 pxor xmm0,XMMWORD PTR[$L$k_s63] call _vpaes_schedule_transform - movdqu XMMWORD PTR[rdx],xmm0 @@ -411,12 +400,12 @@ _vpaes_schedule_core ENDP ALIGN 16 _vpaes_schedule_192_smear PROC PRIVATE - pshufd xmm0,xmm6,080h - pxor xmm6,xmm0 + pshufd xmm1,xmm6,080h pshufd xmm0,xmm7,0FEh + pxor xmm6,xmm1 + pxor xmm1,xmm1 pxor xmm6,xmm0 movdqa xmm0,xmm6 - pxor xmm1,xmm1 movhlps xmm6,xmm1 DB 0F3h,0C3h ;repret _vpaes_schedule_192_smear ENDP @@ -1061,7 +1050,6 @@ se_handler PROC PRIVATE lea rdi,QWORD PTR[512+r8] mov ecx,20 DD 0a548f3fch - lea rax,QWORD PTR[184+rax] $L$in_prologue:: @@ -1076,7 +1064,6 @@ $L$in_prologue:: mov ecx,154 DD 0a548f3fch - mov rsi,r9 xor rcx,rcx mov rdx,QWORD PTR[8+rsi] @@ -1135,27 +1122,22 @@ $L$SEH_info_vpaes_set_encrypt_key:: DB 9,0,0,0 DD imagerel se_handler DD imagerel $L$enc_key_body,imagerel $L$enc_key_epilogue - $L$SEH_info_vpaes_set_decrypt_key:: DB 9,0,0,0 DD imagerel se_handler DD imagerel $L$dec_key_body,imagerel $L$dec_key_epilogue - $L$SEH_info_vpaes_encrypt:: DB 9,0,0,0 DD imagerel se_handler DD imagerel $L$enc_body,imagerel $L$enc_epilogue - $L$SEH_info_vpaes_decrypt:: DB 9,0,0,0 DD imagerel se_handler DD imagerel $L$dec_body,imagerel $L$dec_epilogue - $L$SEH_info_vpaes_cbc_encrypt:: DB 9,0,0,0 DD imagerel se_handler DD imagerel $L$cbc_body,imagerel $L$cbc_epilogue - .xdata ENDS END diff --git a/deps/openssl/asm/x64-win32-masm/bn/modexp512-x86_64.asm b/deps/openssl/asm/x64-win32-masm/bn/modexp512-x86_64.asm deleted file mode 100644 index b83aa18d46e24f..00000000000000 --- a/deps/openssl/asm/x64-win32-masm/bn/modexp512-x86_64.asm +++ /dev/null @@ -1,1890 +0,0 @@ -OPTION DOTNAME -.text$ SEGMENT ALIGN(64) 'CODE' - - -ALIGN 16 -MULADD_128x512 PROC PRIVATE - mov rax,QWORD PTR[rsi] - mul rbp - add r8,rax - adc rdx,0 - mov QWORD PTR[rcx],r8 - mov rbx,rdx - - mov rax,QWORD PTR[8+rsi] - mul rbp - add r9,rax - adc rdx,0 - add r9,rbx - adc rdx,0 - mov rbx,rdx - - mov rax,QWORD PTR[16+rsi] - mul rbp - add r10,rax - adc rdx,0 - add r10,rbx - adc rdx,0 - mov rbx,rdx - - mov rax,QWORD PTR[24+rsi] - mul rbp - add r11,rax - adc rdx,0 - add r11,rbx - adc rdx,0 - mov rbx,rdx - - mov rax,QWORD PTR[32+rsi] - mul rbp - add r12,rax - adc rdx,0 - add r12,rbx - adc rdx,0 - mov rbx,rdx - - mov rax,QWORD PTR[40+rsi] - mul rbp - add r13,rax - adc rdx,0 - add r13,rbx - adc rdx,0 - mov rbx,rdx - - mov rax,QWORD PTR[48+rsi] - mul rbp - add r14,rax - adc rdx,0 - add r14,rbx - adc rdx,0 - mov rbx,rdx - - mov rax,QWORD PTR[56+rsi] - mul rbp - add r15,rax - adc rdx,0 - add r15,rbx - adc rdx,0 - mov r8,rdx - mov rbp,QWORD PTR[8+rdi] - mov rax,QWORD PTR[rsi] - mul rbp - add r9,rax - adc rdx,0 - mov QWORD PTR[8+rcx],r9 - mov rbx,rdx - - mov rax,QWORD PTR[8+rsi] - mul rbp - add r10,rax - adc rdx,0 - add r10,rbx - adc rdx,0 - mov rbx,rdx - - mov rax,QWORD PTR[16+rsi] - mul rbp - add r11,rax - adc rdx,0 - add r11,rbx - adc rdx,0 - mov rbx,rdx - - mov rax,QWORD PTR[24+rsi] - mul rbp - add r12,rax - adc rdx,0 - add r12,rbx - adc rdx,0 - mov rbx,rdx - - mov rax,QWORD PTR[32+rsi] - mul rbp - add r13,rax - adc rdx,0 - add r13,rbx - adc rdx,0 - mov rbx,rdx - - mov rax,QWORD PTR[40+rsi] - mul rbp - add r14,rax - adc rdx,0 - add r14,rbx - adc rdx,0 - mov rbx,rdx - - mov rax,QWORD PTR[48+rsi] - mul rbp - add r15,rax - adc rdx,0 - add r15,rbx - adc rdx,0 - mov rbx,rdx - - mov rax,QWORD PTR[56+rsi] - mul rbp - add r8,rax - adc rdx,0 - add r8,rbx - adc rdx,0 - mov r9,rdx - DB 0F3h,0C3h ;repret -MULADD_128x512 ENDP - -ALIGN 16 -mont_reduce PROC PRIVATE - lea rdi,QWORD PTR[192+rsp] - mov rsi,QWORD PTR[32+rsp] - add rsi,576 - lea rcx,QWORD PTR[520+rsp] - - mov rbp,QWORD PTR[96+rcx] - mov rax,QWORD PTR[rsi] - mul rbp - mov r8,QWORD PTR[rcx] - add r8,rax - adc rdx,0 - mov QWORD PTR[rdi],r8 - mov rbx,rdx - - mov rax,QWORD PTR[8+rsi] - mul rbp - mov r9,QWORD PTR[8+rcx] - add r9,rax - adc rdx,0 - add r9,rbx - adc rdx,0 - mov rbx,rdx - - mov rax,QWORD PTR[16+rsi] - mul rbp - mov r10,QWORD PTR[16+rcx] - add r10,rax - adc rdx,0 - add r10,rbx - adc rdx,0 - mov rbx,rdx - - mov rax,QWORD PTR[24+rsi] - mul rbp - mov r11,QWORD PTR[24+rcx] - add r11,rax - adc rdx,0 - add r11,rbx - adc rdx,0 - mov rbx,rdx - - mov rax,QWORD PTR[32+rsi] - mul rbp - mov r12,QWORD PTR[32+rcx] - add r12,rax - adc rdx,0 - add r12,rbx - adc rdx,0 - mov rbx,rdx - - mov rax,QWORD PTR[40+rsi] - mul rbp - mov r13,QWORD PTR[40+rcx] - add r13,rax - adc rdx,0 - add r13,rbx - adc rdx,0 - mov rbx,rdx - - mov rax,QWORD PTR[48+rsi] - mul rbp - mov r14,QWORD PTR[48+rcx] - add r14,rax - adc rdx,0 - add r14,rbx - adc rdx,0 - mov rbx,rdx - - mov rax,QWORD PTR[56+rsi] - mul rbp - mov r15,QWORD PTR[56+rcx] - add r15,rax - adc rdx,0 - add r15,rbx - adc rdx,0 - mov r8,rdx - mov rbp,QWORD PTR[104+rcx] - mov rax,QWORD PTR[rsi] - mul rbp - add r9,rax - adc rdx,0 - mov QWORD PTR[8+rdi],r9 - mov rbx,rdx - - mov rax,QWORD PTR[8+rsi] - mul rbp - add r10,rax - adc rdx,0 - add r10,rbx - adc rdx,0 - mov rbx,rdx - - mov rax,QWORD PTR[16+rsi] - mul rbp - add r11,rax - adc rdx,0 - add r11,rbx - adc rdx,0 - mov rbx,rdx - - mov rax,QWORD PTR[24+rsi] - mul rbp - add r12,rax - adc rdx,0 - add r12,rbx - adc rdx,0 - mov rbx,rdx - - mov rax,QWORD PTR[32+rsi] - mul rbp - add r13,rax - adc rdx,0 - add r13,rbx - adc rdx,0 - mov rbx,rdx - - mov rax,QWORD PTR[40+rsi] - mul rbp - add r14,rax - adc rdx,0 - add r14,rbx - adc rdx,0 - mov rbx,rdx - - mov rax,QWORD PTR[48+rsi] - mul rbp - add r15,rax - adc rdx,0 - add r15,rbx - adc rdx,0 - mov rbx,rdx - - mov rax,QWORD PTR[56+rsi] - mul rbp - add r8,rax - adc rdx,0 - add r8,rbx - adc rdx,0 - mov r9,rdx - mov rbp,QWORD PTR[112+rcx] - mov rax,QWORD PTR[rsi] - mul rbp - add r10,rax - adc rdx,0 - mov QWORD PTR[16+rdi],r10 - mov rbx,rdx - - mov rax,QWORD PTR[8+rsi] - mul rbp - add r11,rax - adc rdx,0 - add r11,rbx - adc rdx,0 - mov rbx,rdx - - mov rax,QWORD PTR[16+rsi] - mul rbp - add r12,rax - adc rdx,0 - add r12,rbx - adc rdx,0 - mov rbx,rdx - - mov rax,QWORD PTR[24+rsi] - mul rbp - add r13,rax - adc rdx,0 - add r13,rbx - adc rdx,0 - mov rbx,rdx - - mov rax,QWORD PTR[32+rsi] - mul rbp - add r14,rax - adc rdx,0 - add r14,rbx - adc rdx,0 - mov rbx,rdx - - mov rax,QWORD PTR[40+rsi] - mul rbp - add r15,rax - adc rdx,0 - add r15,rbx - adc rdx,0 - mov rbx,rdx - - mov rax,QWORD PTR[48+rsi] - mul rbp - add r8,rax - adc rdx,0 - add r8,rbx - adc rdx,0 - mov rbx,rdx - - mov rax,QWORD PTR[56+rsi] - mul rbp - add r9,rax - adc rdx,0 - add r9,rbx - adc rdx,0 - mov r10,rdx - mov rbp,QWORD PTR[120+rcx] - mov rax,QWORD PTR[rsi] - mul rbp - add r11,rax - adc rdx,0 - mov QWORD PTR[24+rdi],r11 - mov rbx,rdx - - mov rax,QWORD PTR[8+rsi] - mul rbp - add r12,rax - adc rdx,0 - add r12,rbx - adc rdx,0 - mov rbx,rdx - - mov rax,QWORD PTR[16+rsi] - mul rbp - add r13,rax - adc rdx,0 - add r13,rbx - adc rdx,0 - mov rbx,rdx - - mov rax,QWORD PTR[24+rsi] - mul rbp - add r14,rax - adc rdx,0 - add r14,rbx - adc rdx,0 - mov rbx,rdx - - mov rax,QWORD PTR[32+rsi] - mul rbp - add r15,rax - adc rdx,0 - add r15,rbx - adc rdx,0 - mov rbx,rdx - - mov rax,QWORD PTR[40+rsi] - mul rbp - add r8,rax - adc rdx,0 - add r8,rbx - adc rdx,0 - mov rbx,rdx - - mov rax,QWORD PTR[48+rsi] - mul rbp - add r9,rax - adc rdx,0 - add r9,rbx - adc rdx,0 - mov rbx,rdx - - mov rax,QWORD PTR[56+rsi] - mul rbp - add r10,rax - adc rdx,0 - add r10,rbx - adc rdx,0 - mov r11,rdx - xor rax,rax - - add r8,QWORD PTR[64+rcx] - adc r9,QWORD PTR[72+rcx] - adc r10,QWORD PTR[80+rcx] - adc r11,QWORD PTR[88+rcx] - adc rax,0 - - - - - mov QWORD PTR[64+rdi],r8 - mov QWORD PTR[72+rdi],r9 - mov rbp,r10 - mov QWORD PTR[88+rdi],r11 - - mov QWORD PTR[384+rsp],rax - - mov r8,QWORD PTR[rdi] - mov r9,QWORD PTR[8+rdi] - mov r10,QWORD PTR[16+rdi] - mov r11,QWORD PTR[24+rdi] - - - - - - - - - add rdi,8*10 - - add rsi,64 - lea rcx,QWORD PTR[296+rsp] - - call MULADD_128x512 - - - mov rax,QWORD PTR[384+rsp] - - - add r8,QWORD PTR[((-16))+rdi] - adc r9,QWORD PTR[((-8))+rdi] - mov QWORD PTR[64+rcx],r8 - mov QWORD PTR[72+rcx],r9 - - adc rax,rax - mov QWORD PTR[384+rsp],rax - - lea rdi,QWORD PTR[192+rsp] - add rsi,64 - - - - - - mov r8,QWORD PTR[rsi] - mov rbx,QWORD PTR[8+rsi] - - mov rax,QWORD PTR[rcx] - mul r8 - mov rbp,rax - mov r9,rdx - - mov rax,QWORD PTR[8+rcx] - mul r8 - add r9,rax - - mov rax,QWORD PTR[rcx] - mul rbx - add r9,rax - - mov QWORD PTR[8+rdi],r9 - - - sub rsi,192 - - mov r8,QWORD PTR[rcx] - mov r9,QWORD PTR[8+rcx] - - call MULADD_128x512 - - - - - - mov rax,QWORD PTR[rsi] - mov rbx,QWORD PTR[8+rsi] - mov rdi,QWORD PTR[16+rsi] - mov rdx,QWORD PTR[24+rsi] - - - mov rbp,QWORD PTR[384+rsp] - - add r8,QWORD PTR[64+rcx] - adc r9,QWORD PTR[72+rcx] - - - adc rbp,rbp - - - - shl rbp,3 - mov rcx,QWORD PTR[32+rsp] - add rbp,rcx - - - xor rsi,rsi - - add r10,QWORD PTR[rbp] - adc r11,QWORD PTR[64+rbp] - adc r12,QWORD PTR[128+rbp] - adc r13,QWORD PTR[192+rbp] - adc r14,QWORD PTR[256+rbp] - adc r15,QWORD PTR[320+rbp] - adc r8,QWORD PTR[384+rbp] - adc r9,QWORD PTR[448+rbp] - - - - sbb rsi,0 - - - and rax,rsi - and rbx,rsi - and rdi,rsi - and rdx,rsi - - mov rbp,1 - sub r10,rax - sbb r11,rbx - sbb r12,rdi - sbb r13,rdx - - - - - sbb rbp,0 - - - - add rcx,512 - mov rax,QWORD PTR[32+rcx] - mov rbx,QWORD PTR[40+rcx] - mov rdi,QWORD PTR[48+rcx] - mov rdx,QWORD PTR[56+rcx] - - - - and rax,rsi - and rbx,rsi - and rdi,rsi - and rdx,rsi - - - - sub rbp,1 - - sbb r14,rax - sbb r15,rbx - sbb r8,rdi - sbb r9,rdx - - - - mov rsi,QWORD PTR[144+rsp] - mov QWORD PTR[rsi],r10 - mov QWORD PTR[8+rsi],r11 - mov QWORD PTR[16+rsi],r12 - mov QWORD PTR[24+rsi],r13 - mov QWORD PTR[32+rsi],r14 - mov QWORD PTR[40+rsi],r15 - mov QWORD PTR[48+rsi],r8 - mov QWORD PTR[56+rsi],r9 - - DB 0F3h,0C3h ;repret -mont_reduce ENDP - -ALIGN 16 -mont_mul_a3b PROC PRIVATE - - - - - mov rbp,QWORD PTR[rdi] - - mov rax,r10 - mul rbp - mov QWORD PTR[520+rsp],rax - mov r10,rdx - mov rax,r11 - mul rbp - add r10,rax - adc rdx,0 - mov r11,rdx - mov rax,r12 - mul rbp - add r11,rax - adc rdx,0 - mov r12,rdx - mov rax,r13 - mul rbp - add r12,rax - adc rdx,0 - mov r13,rdx - mov rax,r14 - mul rbp - add r13,rax - adc rdx,0 - mov r14,rdx - mov rax,r15 - mul rbp - add r14,rax - adc rdx,0 - mov r15,rdx - mov rax,r8 - mul rbp - add r15,rax - adc rdx,0 - mov r8,rdx - mov rax,r9 - mul rbp - add r8,rax - adc rdx,0 - mov r9,rdx - mov rbp,QWORD PTR[8+rdi] - mov rax,QWORD PTR[rsi] - mul rbp - add r10,rax - adc rdx,0 - mov QWORD PTR[528+rsp],r10 - mov rbx,rdx - - mov rax,QWORD PTR[8+rsi] - mul rbp - add r11,rax - adc rdx,0 - add r11,rbx - adc rdx,0 - mov rbx,rdx - - mov rax,QWORD PTR[16+rsi] - mul rbp - add r12,rax - adc rdx,0 - add r12,rbx - adc rdx,0 - mov rbx,rdx - - mov rax,QWORD PTR[24+rsi] - mul rbp - add r13,rax - adc rdx,0 - add r13,rbx - adc rdx,0 - mov rbx,rdx - - mov rax,QWORD PTR[32+rsi] - mul rbp - add r14,rax - adc rdx,0 - add r14,rbx - adc rdx,0 - mov rbx,rdx - - mov rax,QWORD PTR[40+rsi] - mul rbp - add r15,rax - adc rdx,0 - add r15,rbx - adc rdx,0 - mov rbx,rdx - - mov rax,QWORD PTR[48+rsi] - mul rbp - add r8,rax - adc rdx,0 - add r8,rbx - adc rdx,0 - mov rbx,rdx - - mov rax,QWORD PTR[56+rsi] - mul rbp - add r9,rax - adc rdx,0 - add r9,rbx - adc rdx,0 - mov r10,rdx - mov rbp,QWORD PTR[16+rdi] - mov rax,QWORD PTR[rsi] - mul rbp - add r11,rax - adc rdx,0 - mov QWORD PTR[536+rsp],r11 - mov rbx,rdx - - mov rax,QWORD PTR[8+rsi] - mul rbp - add r12,rax - adc rdx,0 - add r12,rbx - adc rdx,0 - mov rbx,rdx - - mov rax,QWORD PTR[16+rsi] - mul rbp - add r13,rax - adc rdx,0 - add r13,rbx - adc rdx,0 - mov rbx,rdx - - mov rax,QWORD PTR[24+rsi] - mul rbp - add r14,rax - adc rdx,0 - add r14,rbx - adc rdx,0 - mov rbx,rdx - - mov rax,QWORD PTR[32+rsi] - mul rbp - add r15,rax - adc rdx,0 - add r15,rbx - adc rdx,0 - mov rbx,rdx - - mov rax,QWORD PTR[40+rsi] - mul rbp - add r8,rax - adc rdx,0 - add r8,rbx - adc rdx,0 - mov rbx,rdx - - mov rax,QWORD PTR[48+rsi] - mul rbp - add r9,rax - adc rdx,0 - add r9,rbx - adc rdx,0 - mov rbx,rdx - - mov rax,QWORD PTR[56+rsi] - mul rbp - add r10,rax - adc rdx,0 - add r10,rbx - adc rdx,0 - mov r11,rdx - mov rbp,QWORD PTR[24+rdi] - mov rax,QWORD PTR[rsi] - mul rbp - add r12,rax - adc rdx,0 - mov QWORD PTR[544+rsp],r12 - mov rbx,rdx - - mov rax,QWORD PTR[8+rsi] - mul rbp - add r13,rax - adc rdx,0 - add r13,rbx - adc rdx,0 - mov rbx,rdx - - mov rax,QWORD PTR[16+rsi] - mul rbp - add r14,rax - adc rdx,0 - add r14,rbx - adc rdx,0 - mov rbx,rdx - - mov rax,QWORD PTR[24+rsi] - mul rbp - add r15,rax - adc rdx,0 - add r15,rbx - adc rdx,0 - mov rbx,rdx - - mov rax,QWORD PTR[32+rsi] - mul rbp - add r8,rax - adc rdx,0 - add r8,rbx - adc rdx,0 - mov rbx,rdx - - mov rax,QWORD PTR[40+rsi] - mul rbp - add r9,rax - adc rdx,0 - add r9,rbx - adc rdx,0 - mov rbx,rdx - - mov rax,QWORD PTR[48+rsi] - mul rbp - add r10,rax - adc rdx,0 - add r10,rbx - adc rdx,0 - mov rbx,rdx - - mov rax,QWORD PTR[56+rsi] - mul rbp - add r11,rax - adc rdx,0 - add r11,rbx - adc rdx,0 - mov r12,rdx - mov rbp,QWORD PTR[32+rdi] - mov rax,QWORD PTR[rsi] - mul rbp - add r13,rax - adc rdx,0 - mov QWORD PTR[552+rsp],r13 - mov rbx,rdx - - mov rax,QWORD PTR[8+rsi] - mul rbp - add r14,rax - adc rdx,0 - add r14,rbx - adc rdx,0 - mov rbx,rdx - - mov rax,QWORD PTR[16+rsi] - mul rbp - add r15,rax - adc rdx,0 - add r15,rbx - adc rdx,0 - mov rbx,rdx - - mov rax,QWORD PTR[24+rsi] - mul rbp - add r8,rax - adc rdx,0 - add r8,rbx - adc rdx,0 - mov rbx,rdx - - mov rax,QWORD PTR[32+rsi] - mul rbp - add r9,rax - adc rdx,0 - add r9,rbx - adc rdx,0 - mov rbx,rdx - - mov rax,QWORD PTR[40+rsi] - mul rbp - add r10,rax - adc rdx,0 - add r10,rbx - adc rdx,0 - mov rbx,rdx - - mov rax,QWORD PTR[48+rsi] - mul rbp - add r11,rax - adc rdx,0 - add r11,rbx - adc rdx,0 - mov rbx,rdx - - mov rax,QWORD PTR[56+rsi] - mul rbp - add r12,rax - adc rdx,0 - add r12,rbx - adc rdx,0 - mov r13,rdx - mov rbp,QWORD PTR[40+rdi] - mov rax,QWORD PTR[rsi] - mul rbp - add r14,rax - adc rdx,0 - mov QWORD PTR[560+rsp],r14 - mov rbx,rdx - - mov rax,QWORD PTR[8+rsi] - mul rbp - add r15,rax - adc rdx,0 - add r15,rbx - adc rdx,0 - mov rbx,rdx - - mov rax,QWORD PTR[16+rsi] - mul rbp - add r8,rax - adc rdx,0 - add r8,rbx - adc rdx,0 - mov rbx,rdx - - mov rax,QWORD PTR[24+rsi] - mul rbp - add r9,rax - adc rdx,0 - add r9,rbx - adc rdx,0 - mov rbx,rdx - - mov rax,QWORD PTR[32+rsi] - mul rbp - add r10,rax - adc rdx,0 - add r10,rbx - adc rdx,0 - mov rbx,rdx - - mov rax,QWORD PTR[40+rsi] - mul rbp - add r11,rax - adc rdx,0 - add r11,rbx - adc rdx,0 - mov rbx,rdx - - mov rax,QWORD PTR[48+rsi] - mul rbp - add r12,rax - adc rdx,0 - add r12,rbx - adc rdx,0 - mov rbx,rdx - - mov rax,QWORD PTR[56+rsi] - mul rbp - add r13,rax - adc rdx,0 - add r13,rbx - adc rdx,0 - mov r14,rdx - mov rbp,QWORD PTR[48+rdi] - mov rax,QWORD PTR[rsi] - mul rbp - add r15,rax - adc rdx,0 - mov QWORD PTR[568+rsp],r15 - mov rbx,rdx - - mov rax,QWORD PTR[8+rsi] - mul rbp - add r8,rax - adc rdx,0 - add r8,rbx - adc rdx,0 - mov rbx,rdx - - mov rax,QWORD PTR[16+rsi] - mul rbp - add r9,rax - adc rdx,0 - add r9,rbx - adc rdx,0 - mov rbx,rdx - - mov rax,QWORD PTR[24+rsi] - mul rbp - add r10,rax - adc rdx,0 - add r10,rbx - adc rdx,0 - mov rbx,rdx - - mov rax,QWORD PTR[32+rsi] - mul rbp - add r11,rax - adc rdx,0 - add r11,rbx - adc rdx,0 - mov rbx,rdx - - mov rax,QWORD PTR[40+rsi] - mul rbp - add r12,rax - adc rdx,0 - add r12,rbx - adc rdx,0 - mov rbx,rdx - - mov rax,QWORD PTR[48+rsi] - mul rbp - add r13,rax - adc rdx,0 - add r13,rbx - adc rdx,0 - mov rbx,rdx - - mov rax,QWORD PTR[56+rsi] - mul rbp - add r14,rax - adc rdx,0 - add r14,rbx - adc rdx,0 - mov r15,rdx - mov rbp,QWORD PTR[56+rdi] - mov rax,QWORD PTR[rsi] - mul rbp - add r8,rax - adc rdx,0 - mov QWORD PTR[576+rsp],r8 - mov rbx,rdx - - mov rax,QWORD PTR[8+rsi] - mul rbp - add r9,rax - adc rdx,0 - add r9,rbx - adc rdx,0 - mov rbx,rdx - - mov rax,QWORD PTR[16+rsi] - mul rbp - add r10,rax - adc rdx,0 - add r10,rbx - adc rdx,0 - mov rbx,rdx - - mov rax,QWORD PTR[24+rsi] - mul rbp - add r11,rax - adc rdx,0 - add r11,rbx - adc rdx,0 - mov rbx,rdx - - mov rax,QWORD PTR[32+rsi] - mul rbp - add r12,rax - adc rdx,0 - add r12,rbx - adc rdx,0 - mov rbx,rdx - - mov rax,QWORD PTR[40+rsi] - mul rbp - add r13,rax - adc rdx,0 - add r13,rbx - adc rdx,0 - mov rbx,rdx - - mov rax,QWORD PTR[48+rsi] - mul rbp - add r14,rax - adc rdx,0 - add r14,rbx - adc rdx,0 - mov rbx,rdx - - mov rax,QWORD PTR[56+rsi] - mul rbp - add r15,rax - adc rdx,0 - add r15,rbx - adc rdx,0 - mov r8,rdx - mov QWORD PTR[584+rsp],r9 - mov QWORD PTR[592+rsp],r10 - mov QWORD PTR[600+rsp],r11 - mov QWORD PTR[608+rsp],r12 - mov QWORD PTR[616+rsp],r13 - mov QWORD PTR[624+rsp],r14 - mov QWORD PTR[632+rsp],r15 - mov QWORD PTR[640+rsp],r8 - - - - - - jmp mont_reduce - - -mont_mul_a3b ENDP - -ALIGN 16 -sqr_reduce PROC PRIVATE - mov rcx,QWORD PTR[16+rsp] - - - - mov rbx,r10 - - mov rax,r11 - mul rbx - mov QWORD PTR[528+rsp],rax - mov r10,rdx - mov rax,r12 - mul rbx - add r10,rax - adc rdx,0 - mov r11,rdx - mov rax,r13 - mul rbx - add r11,rax - adc rdx,0 - mov r12,rdx - mov rax,r14 - mul rbx - add r12,rax - adc rdx,0 - mov r13,rdx - mov rax,r15 - mul rbx - add r13,rax - adc rdx,0 - mov r14,rdx - mov rax,r8 - mul rbx - add r14,rax - adc rdx,0 - mov r15,rdx - mov rax,r9 - mul rbx - add r15,rax - adc rdx,0 - mov rsi,rdx - - mov QWORD PTR[536+rsp],r10 - - - - - - mov rbx,QWORD PTR[8+rcx] - - mov rax,QWORD PTR[16+rcx] - mul rbx - add r11,rax - adc rdx,0 - mov QWORD PTR[544+rsp],r11 - - mov r10,rdx - mov rax,QWORD PTR[24+rcx] - mul rbx - add r12,rax - adc rdx,0 - add r12,r10 - adc rdx,0 - mov QWORD PTR[552+rsp],r12 - - mov r10,rdx - mov rax,QWORD PTR[32+rcx] - mul rbx - add r13,rax - adc rdx,0 - add r13,r10 - adc rdx,0 - - mov r10,rdx - mov rax,QWORD PTR[40+rcx] - mul rbx - add r14,rax - adc rdx,0 - add r14,r10 - adc rdx,0 - - mov r10,rdx - mov rax,r8 - mul rbx - add r15,rax - adc rdx,0 - add r15,r10 - adc rdx,0 - - mov r10,rdx - mov rax,r9 - mul rbx - add rsi,rax - adc rdx,0 - add rsi,r10 - adc rdx,0 - - mov r11,rdx - - - - - mov rbx,QWORD PTR[16+rcx] - - mov rax,QWORD PTR[24+rcx] - mul rbx - add r13,rax - adc rdx,0 - mov QWORD PTR[560+rsp],r13 - - mov r10,rdx - mov rax,QWORD PTR[32+rcx] - mul rbx - add r14,rax - adc rdx,0 - add r14,r10 - adc rdx,0 - mov QWORD PTR[568+rsp],r14 - - mov r10,rdx - mov rax,QWORD PTR[40+rcx] - mul rbx - add r15,rax - adc rdx,0 - add r15,r10 - adc rdx,0 - - mov r10,rdx - mov rax,r8 - mul rbx - add rsi,rax - adc rdx,0 - add rsi,r10 - adc rdx,0 - - mov r10,rdx - mov rax,r9 - mul rbx - add r11,rax - adc rdx,0 - add r11,r10 - adc rdx,0 - - mov r12,rdx - - - - - - mov rbx,QWORD PTR[24+rcx] - - mov rax,QWORD PTR[32+rcx] - mul rbx - add r15,rax - adc rdx,0 - mov QWORD PTR[576+rsp],r15 - - mov r10,rdx - mov rax,QWORD PTR[40+rcx] - mul rbx - add rsi,rax - adc rdx,0 - add rsi,r10 - adc rdx,0 - mov QWORD PTR[584+rsp],rsi - - mov r10,rdx - mov rax,r8 - mul rbx - add r11,rax - adc rdx,0 - add r11,r10 - adc rdx,0 - - mov r10,rdx - mov rax,r9 - mul rbx - add r12,rax - adc rdx,0 - add r12,r10 - adc rdx,0 - - mov r15,rdx - - - - - mov rbx,QWORD PTR[32+rcx] - - mov rax,QWORD PTR[40+rcx] - mul rbx - add r11,rax - adc rdx,0 - mov QWORD PTR[592+rsp],r11 - - mov r10,rdx - mov rax,r8 - mul rbx - add r12,rax - adc rdx,0 - add r12,r10 - adc rdx,0 - mov QWORD PTR[600+rsp],r12 - - mov r10,rdx - mov rax,r9 - mul rbx - add r15,rax - adc rdx,0 - add r15,r10 - adc rdx,0 - - mov r11,rdx - - - - - mov rbx,QWORD PTR[40+rcx] - - mov rax,r8 - mul rbx - add r15,rax - adc rdx,0 - mov QWORD PTR[608+rsp],r15 - - mov r10,rdx - mov rax,r9 - mul rbx - add r11,rax - adc rdx,0 - add r11,r10 - adc rdx,0 - mov QWORD PTR[616+rsp],r11 - - mov r12,rdx - - - - - mov rbx,r8 - - mov rax,r9 - mul rbx - add r12,rax - adc rdx,0 - mov QWORD PTR[624+rsp],r12 - - mov QWORD PTR[632+rsp],rdx - - - mov r10,QWORD PTR[528+rsp] - mov r11,QWORD PTR[536+rsp] - mov r12,QWORD PTR[544+rsp] - mov r13,QWORD PTR[552+rsp] - mov r14,QWORD PTR[560+rsp] - mov r15,QWORD PTR[568+rsp] - - mov rax,QWORD PTR[24+rcx] - mul rax - mov rdi,rax - mov r8,rdx - - add r10,r10 - adc r11,r11 - adc r12,r12 - adc r13,r13 - adc r14,r14 - adc r15,r15 - adc r8,0 - - mov rax,QWORD PTR[rcx] - mul rax - mov QWORD PTR[520+rsp],rax - mov rbx,rdx - - mov rax,QWORD PTR[8+rcx] - mul rax - - add r10,rbx - adc r11,rax - adc rdx,0 - - mov rbx,rdx - mov QWORD PTR[528+rsp],r10 - mov QWORD PTR[536+rsp],r11 - - mov rax,QWORD PTR[16+rcx] - mul rax - - add r12,rbx - adc r13,rax - adc rdx,0 - - mov rbx,rdx - - mov QWORD PTR[544+rsp],r12 - mov QWORD PTR[552+rsp],r13 - - xor rbp,rbp - add r14,rbx - adc r15,rdi - adc rbp,0 - - mov QWORD PTR[560+rsp],r14 - mov QWORD PTR[568+rsp],r15 - - - - - mov r10,QWORD PTR[576+rsp] - mov r11,QWORD PTR[584+rsp] - mov r12,QWORD PTR[592+rsp] - mov r13,QWORD PTR[600+rsp] - mov r14,QWORD PTR[608+rsp] - mov r15,QWORD PTR[616+rsp] - mov rdi,QWORD PTR[624+rsp] - mov rsi,QWORD PTR[632+rsp] - - mov rax,r9 - mul rax - mov r9,rax - mov rbx,rdx - - add r10,r10 - adc r11,r11 - adc r12,r12 - adc r13,r13 - adc r14,r14 - adc r15,r15 - adc rdi,rdi - adc rsi,rsi - adc rbx,0 - - add r10,rbp - - mov rax,QWORD PTR[32+rcx] - mul rax - - add r10,r8 - adc r11,rax - adc rdx,0 - - mov rbp,rdx - - mov QWORD PTR[576+rsp],r10 - mov QWORD PTR[584+rsp],r11 - - mov rax,QWORD PTR[40+rcx] - mul rax - - add r12,rbp - adc r13,rax - adc rdx,0 - - mov rbp,rdx - - mov QWORD PTR[592+rsp],r12 - mov QWORD PTR[600+rsp],r13 - - mov rax,QWORD PTR[48+rcx] - mul rax - - add r14,rbp - adc r15,rax - adc rdx,0 - - mov QWORD PTR[608+rsp],r14 - mov QWORD PTR[616+rsp],r15 - - add rdi,rdx - adc rsi,r9 - adc rbx,0 - - mov QWORD PTR[624+rsp],rdi - mov QWORD PTR[632+rsp],rsi - mov QWORD PTR[640+rsp],rbx - - jmp mont_reduce - - -sqr_reduce ENDP -PUBLIC mod_exp_512 - -mod_exp_512 PROC PUBLIC - mov QWORD PTR[8+rsp],rdi ;WIN64 prologue - mov QWORD PTR[16+rsp],rsi - mov rax,rsp -$L$SEH_begin_mod_exp_512:: - mov rdi,rcx - mov rsi,rdx - mov rdx,r8 - mov rcx,r9 - - - push rbp - push rbx - push r12 - push r13 - push r14 - push r15 - - - mov r8,rsp - sub rsp,2688 - and rsp,-64 - - - mov QWORD PTR[rsp],r8 - mov QWORD PTR[8+rsp],rdi - mov QWORD PTR[16+rsp],rsi - mov QWORD PTR[24+rsp],rcx -$L$body:: - - - - pxor xmm4,xmm4 - movdqu xmm0,XMMWORD PTR[rsi] - movdqu xmm1,XMMWORD PTR[16+rsi] - movdqu xmm2,XMMWORD PTR[32+rsi] - movdqu xmm3,XMMWORD PTR[48+rsi] - movdqa XMMWORD PTR[512+rsp],xmm4 - movdqa XMMWORD PTR[528+rsp],xmm4 - movdqa XMMWORD PTR[608+rsp],xmm4 - movdqa XMMWORD PTR[624+rsp],xmm4 - movdqa XMMWORD PTR[544+rsp],xmm0 - movdqa XMMWORD PTR[560+rsp],xmm1 - movdqa XMMWORD PTR[576+rsp],xmm2 - movdqa XMMWORD PTR[592+rsp],xmm3 - - - movdqu xmm0,XMMWORD PTR[rdx] - movdqu xmm1,XMMWORD PTR[16+rdx] - movdqu xmm2,XMMWORD PTR[32+rdx] - movdqu xmm3,XMMWORD PTR[48+rdx] - - lea rbx,QWORD PTR[384+rsp] - mov QWORD PTR[136+rsp],rbx - call mont_reduce - - - lea rcx,QWORD PTR[448+rsp] - xor rax,rax - mov QWORD PTR[rcx],rax - mov QWORD PTR[8+rcx],rax - mov QWORD PTR[24+rcx],rax - mov QWORD PTR[32+rcx],rax - mov QWORD PTR[40+rcx],rax - mov QWORD PTR[48+rcx],rax - mov QWORD PTR[56+rcx],rax - mov QWORD PTR[128+rsp],rax - mov QWORD PTR[16+rcx],1 - - lea rbp,QWORD PTR[640+rsp] - mov rsi,rcx - mov rdi,rbp - mov rax,8 -loop_0:: - mov rbx,QWORD PTR[rcx] - mov WORD PTR[rdi],bx - shr rbx,16 - mov WORD PTR[64+rdi],bx - shr rbx,16 - mov WORD PTR[128+rdi],bx - shr rbx,16 - mov WORD PTR[192+rdi],bx - lea rcx,QWORD PTR[8+rcx] - lea rdi,QWORD PTR[256+rdi] - dec rax - jnz loop_0 - mov rax,31 - mov QWORD PTR[32+rsp],rax - mov QWORD PTR[40+rsp],rbp - - mov QWORD PTR[136+rsp],rsi - mov r10,QWORD PTR[rsi] - mov r11,QWORD PTR[8+rsi] - mov r12,QWORD PTR[16+rsi] - mov r13,QWORD PTR[24+rsi] - mov r14,QWORD PTR[32+rsi] - mov r15,QWORD PTR[40+rsi] - mov r8,QWORD PTR[48+rsi] - mov r9,QWORD PTR[56+rsi] -init_loop:: - lea rdi,QWORD PTR[384+rsp] - call mont_mul_a3b - lea rsi,QWORD PTR[448+rsp] - mov rbp,QWORD PTR[40+rsp] - add rbp,2 - mov QWORD PTR[40+rsp],rbp - mov rcx,rsi - mov rax,8 -loop_1:: - mov rbx,QWORD PTR[rcx] - mov WORD PTR[rbp],bx - shr rbx,16 - mov WORD PTR[64+rbp],bx - shr rbx,16 - mov WORD PTR[128+rbp],bx - shr rbx,16 - mov WORD PTR[192+rbp],bx - lea rcx,QWORD PTR[8+rcx] - lea rbp,QWORD PTR[256+rbp] - dec rax - jnz loop_1 - mov rax,QWORD PTR[32+rsp] - sub rax,1 - mov QWORD PTR[32+rsp],rax - jne init_loop - - - - movdqa XMMWORD PTR[64+rsp],xmm0 - movdqa XMMWORD PTR[80+rsp],xmm1 - movdqa XMMWORD PTR[96+rsp],xmm2 - movdqa XMMWORD PTR[112+rsp],xmm3 - - - - - - mov eax,DWORD PTR[126+rsp] - mov rdx,rax - shr rax,11 - and edx,007FFh - mov DWORD PTR[126+rsp],edx - lea rsi,QWORD PTR[640+rax*2+rsp] - mov rdx,QWORD PTR[8+rsp] - mov rbp,4 -loop_2:: - movzx rbx,WORD PTR[192+rsi] - movzx rax,WORD PTR[448+rsi] - shl rbx,16 - shl rax,16 - mov bx,WORD PTR[128+rsi] - mov ax,WORD PTR[384+rsi] - shl rbx,16 - shl rax,16 - mov bx,WORD PTR[64+rsi] - mov ax,WORD PTR[320+rsi] - shl rbx,16 - shl rax,16 - mov bx,WORD PTR[rsi] - mov ax,WORD PTR[256+rsi] - mov QWORD PTR[rdx],rbx - mov QWORD PTR[8+rdx],rax - lea rsi,QWORD PTR[512+rsi] - lea rdx,QWORD PTR[16+rdx] - sub rbp,1 - jnz loop_2 - mov QWORD PTR[48+rsp],505 - - mov rcx,QWORD PTR[8+rsp] - mov QWORD PTR[136+rsp],rcx - mov r10,QWORD PTR[rcx] - mov r11,QWORD PTR[8+rcx] - mov r12,QWORD PTR[16+rcx] - mov r13,QWORD PTR[24+rcx] - mov r14,QWORD PTR[32+rcx] - mov r15,QWORD PTR[40+rcx] - mov r8,QWORD PTR[48+rcx] - mov r9,QWORD PTR[56+rcx] - jmp sqr_2 - -main_loop_a3b:: - call sqr_reduce - call sqr_reduce - call sqr_reduce -sqr_2:: - call sqr_reduce - call sqr_reduce - - - - mov rcx,QWORD PTR[48+rsp] - mov rax,rcx - shr rax,4 - mov edx,DWORD PTR[64+rax*2+rsp] - and rcx,15 - shr rdx,cl - and rdx,01Fh - - lea rsi,QWORD PTR[640+rdx*2+rsp] - lea rdx,QWORD PTR[448+rsp] - mov rdi,rdx - mov rbp,4 -loop_3:: - movzx rbx,WORD PTR[192+rsi] - movzx rax,WORD PTR[448+rsi] - shl rbx,16 - shl rax,16 - mov bx,WORD PTR[128+rsi] - mov ax,WORD PTR[384+rsi] - shl rbx,16 - shl rax,16 - mov bx,WORD PTR[64+rsi] - mov ax,WORD PTR[320+rsi] - shl rbx,16 - shl rax,16 - mov bx,WORD PTR[rsi] - mov ax,WORD PTR[256+rsi] - mov QWORD PTR[rdx],rbx - mov QWORD PTR[8+rdx],rax - lea rsi,QWORD PTR[512+rsi] - lea rdx,QWORD PTR[16+rdx] - sub rbp,1 - jnz loop_3 - mov rsi,QWORD PTR[8+rsp] - call mont_mul_a3b - - - - mov rcx,QWORD PTR[48+rsp] - sub rcx,5 - mov QWORD PTR[48+rsp],rcx - jge main_loop_a3b - - - -end_main_loop_a3b:: - - - mov rdx,QWORD PTR[8+rsp] - pxor xmm4,xmm4 - movdqu xmm0,XMMWORD PTR[rdx] - movdqu xmm1,XMMWORD PTR[16+rdx] - movdqu xmm2,XMMWORD PTR[32+rdx] - movdqu xmm3,XMMWORD PTR[48+rdx] - movdqa XMMWORD PTR[576+rsp],xmm4 - movdqa XMMWORD PTR[592+rsp],xmm4 - movdqa XMMWORD PTR[608+rsp],xmm4 - movdqa XMMWORD PTR[624+rsp],xmm4 - movdqa XMMWORD PTR[512+rsp],xmm0 - movdqa XMMWORD PTR[528+rsp],xmm1 - movdqa XMMWORD PTR[544+rsp],xmm2 - movdqa XMMWORD PTR[560+rsp],xmm3 - call mont_reduce - - - - mov rax,QWORD PTR[8+rsp] - mov r8,QWORD PTR[rax] - mov r9,QWORD PTR[8+rax] - mov r10,QWORD PTR[16+rax] - mov r11,QWORD PTR[24+rax] - mov r12,QWORD PTR[32+rax] - mov r13,QWORD PTR[40+rax] - mov r14,QWORD PTR[48+rax] - mov r15,QWORD PTR[56+rax] - - - mov rbx,QWORD PTR[24+rsp] - add rbx,512 - - sub r8,QWORD PTR[rbx] - sbb r9,QWORD PTR[8+rbx] - sbb r10,QWORD PTR[16+rbx] - sbb r11,QWORD PTR[24+rbx] - sbb r12,QWORD PTR[32+rbx] - sbb r13,QWORD PTR[40+rbx] - sbb r14,QWORD PTR[48+rbx] - sbb r15,QWORD PTR[56+rbx] - - - mov rsi,QWORD PTR[rax] - mov rdi,QWORD PTR[8+rax] - mov rcx,QWORD PTR[16+rax] - mov rdx,QWORD PTR[24+rax] - cmovnc rsi,r8 - cmovnc rdi,r9 - cmovnc rcx,r10 - cmovnc rdx,r11 - mov QWORD PTR[rax],rsi - mov QWORD PTR[8+rax],rdi - mov QWORD PTR[16+rax],rcx - mov QWORD PTR[24+rax],rdx - - mov rsi,QWORD PTR[32+rax] - mov rdi,QWORD PTR[40+rax] - mov rcx,QWORD PTR[48+rax] - mov rdx,QWORD PTR[56+rax] - cmovnc rsi,r12 - cmovnc rdi,r13 - cmovnc rcx,r14 - cmovnc rdx,r15 - mov QWORD PTR[32+rax],rsi - mov QWORD PTR[40+rax],rdi - mov QWORD PTR[48+rax],rcx - mov QWORD PTR[56+rax],rdx - - mov rsi,QWORD PTR[rsp] - mov r15,QWORD PTR[rsi] - mov r14,QWORD PTR[8+rsi] - mov r13,QWORD PTR[16+rsi] - mov r12,QWORD PTR[24+rsi] - mov rbx,QWORD PTR[32+rsi] - mov rbp,QWORD PTR[40+rsi] - lea rsp,QWORD PTR[48+rsi] -$L$epilogue:: - mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue - mov rsi,QWORD PTR[16+rsp] - DB 0F3h,0C3h ;repret -$L$SEH_end_mod_exp_512:: -mod_exp_512 ENDP -EXTERN __imp_RtlVirtualUnwind:NEAR - -ALIGN 16 -mod_exp_512_se_handler PROC PRIVATE - push rsi - push rdi - push rbx - push rbp - push r12 - push r13 - push r14 - push r15 - pushfq - sub rsp,64 - - mov rax,QWORD PTR[120+r8] - mov rbx,QWORD PTR[248+r8] - - lea r10,QWORD PTR[$L$body] - cmp rbx,r10 - jb $L$in_prologue - - mov rax,QWORD PTR[152+r8] - - lea r10,QWORD PTR[$L$epilogue] - cmp rbx,r10 - jae $L$in_prologue - - mov rax,QWORD PTR[rax] - - mov rbx,QWORD PTR[32+rax] - mov rbp,QWORD PTR[40+rax] - mov r12,QWORD PTR[24+rax] - mov r13,QWORD PTR[16+rax] - mov r14,QWORD PTR[8+rax] - mov r15,QWORD PTR[rax] - lea rax,QWORD PTR[48+rax] - mov QWORD PTR[144+r8],rbx - mov QWORD PTR[160+r8],rbp - mov QWORD PTR[216+r8],r12 - mov QWORD PTR[224+r8],r13 - mov QWORD PTR[232+r8],r14 - mov QWORD PTR[240+r8],r15 - -$L$in_prologue:: - mov rdi,QWORD PTR[8+rax] - mov rsi,QWORD PTR[16+rax] - mov QWORD PTR[152+r8],rax - mov QWORD PTR[168+r8],rsi - mov QWORD PTR[176+r8],rdi - - mov rdi,QWORD PTR[40+r9] - mov rsi,r8 - mov ecx,154 - DD 0a548f3fch - - - mov rsi,r9 - xor rcx,rcx - mov rdx,QWORD PTR[8+rsi] - mov r8,QWORD PTR[rsi] - mov r9,QWORD PTR[16+rsi] - mov r10,QWORD PTR[40+rsi] - lea r11,QWORD PTR[56+rsi] - lea r12,QWORD PTR[24+rsi] - mov QWORD PTR[32+rsp],r10 - mov QWORD PTR[40+rsp],r11 - mov QWORD PTR[48+rsp],r12 - mov QWORD PTR[56+rsp],rcx - call QWORD PTR[__imp_RtlVirtualUnwind] - - mov eax,1 - add rsp,64 - popfq - pop r15 - pop r14 - pop r13 - pop r12 - pop rbp - pop rbx - pop rdi - pop rsi - DB 0F3h,0C3h ;repret -mod_exp_512_se_handler ENDP - -.text$ ENDS -.pdata SEGMENT READONLY ALIGN(4) -ALIGN 4 - DD imagerel $L$SEH_begin_mod_exp_512 - DD imagerel $L$SEH_end_mod_exp_512 - DD imagerel $L$SEH_info_mod_exp_512 - -.pdata ENDS -.xdata SEGMENT READONLY ALIGN(8) -ALIGN 8 -$L$SEH_info_mod_exp_512:: -DB 9,0,0,0 - DD imagerel mod_exp_512_se_handler - -.xdata ENDS -END diff --git a/deps/openssl/asm/x64-win32-masm/bn/rsaz-avx2.asm b/deps/openssl/asm/x64-win32-masm/bn/rsaz-avx2.asm new file mode 100644 index 00000000000000..0d3107834e6983 --- /dev/null +++ b/deps/openssl/asm/x64-win32-masm/bn/rsaz-avx2.asm @@ -0,0 +1,1867 @@ +OPTION DOTNAME +.text$ SEGMENT ALIGN(256) 'CODE' + +PUBLIC rsaz_1024_sqr_avx2 + +ALIGN 64 +rsaz_1024_sqr_avx2 PROC PUBLIC + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_rsaz_1024_sqr_avx2:: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + mov rcx,r9 + mov r8,QWORD PTR[40+rsp] + + + lea rax,QWORD PTR[rsp] + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + vzeroupper + lea rsp,QWORD PTR[((-168))+rsp] + vmovaps XMMWORD PTR[(-216)+rax],xmm6 + vmovaps XMMWORD PTR[(-200)+rax],xmm7 + vmovaps XMMWORD PTR[(-184)+rax],xmm8 + vmovaps XMMWORD PTR[(-168)+rax],xmm9 + vmovaps XMMWORD PTR[(-152)+rax],xmm10 + vmovaps XMMWORD PTR[(-136)+rax],xmm11 + vmovaps XMMWORD PTR[(-120)+rax],xmm12 + vmovaps XMMWORD PTR[(-104)+rax],xmm13 + vmovaps XMMWORD PTR[(-88)+rax],xmm14 + vmovaps XMMWORD PTR[(-72)+rax],xmm15 +$L$sqr_1024_body:: + mov rbp,rax + mov r13,rdx + sub rsp,832 + mov r15,r13 + sub rdi,-128 + sub rsi,-128 + sub r13,-128 + + and r15,4095 + add r15,32*10 + shr r15,12 + vpxor ymm9,ymm9,ymm9 + jz $L$sqr_1024_no_n_copy + + + + + + sub rsp,32*10 + vmovdqu ymm0,YMMWORD PTR[((0-128))+r13] + and rsp,-2048 + vmovdqu ymm1,YMMWORD PTR[((32-128))+r13] + vmovdqu ymm2,YMMWORD PTR[((64-128))+r13] + vmovdqu ymm3,YMMWORD PTR[((96-128))+r13] + vmovdqu ymm4,YMMWORD PTR[((128-128))+r13] + vmovdqu ymm5,YMMWORD PTR[((160-128))+r13] + vmovdqu ymm6,YMMWORD PTR[((192-128))+r13] + vmovdqu ymm7,YMMWORD PTR[((224-128))+r13] + vmovdqu ymm8,YMMWORD PTR[((256-128))+r13] + lea r13,QWORD PTR[((832+128))+rsp] + vmovdqu YMMWORD PTR[(0-128)+r13],ymm0 + vmovdqu YMMWORD PTR[(32-128)+r13],ymm1 + vmovdqu YMMWORD PTR[(64-128)+r13],ymm2 + vmovdqu YMMWORD PTR[(96-128)+r13],ymm3 + vmovdqu YMMWORD PTR[(128-128)+r13],ymm4 + vmovdqu YMMWORD PTR[(160-128)+r13],ymm5 + vmovdqu YMMWORD PTR[(192-128)+r13],ymm6 + vmovdqu YMMWORD PTR[(224-128)+r13],ymm7 + vmovdqu YMMWORD PTR[(256-128)+r13],ymm8 + vmovdqu YMMWORD PTR[(288-128)+r13],ymm9 + +$L$sqr_1024_no_n_copy:: + and rsp,-1024 + + vmovdqu ymm1,YMMWORD PTR[((32-128))+rsi] + vmovdqu ymm2,YMMWORD PTR[((64-128))+rsi] + vmovdqu ymm3,YMMWORD PTR[((96-128))+rsi] + vmovdqu ymm4,YMMWORD PTR[((128-128))+rsi] + vmovdqu ymm5,YMMWORD PTR[((160-128))+rsi] + vmovdqu ymm6,YMMWORD PTR[((192-128))+rsi] + vmovdqu ymm7,YMMWORD PTR[((224-128))+rsi] + vmovdqu ymm8,YMMWORD PTR[((256-128))+rsi] + + lea rbx,QWORD PTR[192+rsp] + vpbroadcastq ymm15,QWORD PTR[$L$and_mask] + jmp $L$OOP_GRANDE_SQR_1024 + +ALIGN 32 +$L$OOP_GRANDE_SQR_1024:: + lea r9,QWORD PTR[((576+128))+rsp] + lea r12,QWORD PTR[448+rsp] + + + + + vpaddq ymm1,ymm1,ymm1 + vpbroadcastq ymm10,QWORD PTR[((0-128))+rsi] + vpaddq ymm2,ymm2,ymm2 + vmovdqa YMMWORD PTR[(0-128)+r9],ymm1 + vpaddq ymm3,ymm3,ymm3 + vmovdqa YMMWORD PTR[(32-128)+r9],ymm2 + vpaddq ymm4,ymm4,ymm4 + vmovdqa YMMWORD PTR[(64-128)+r9],ymm3 + vpaddq ymm5,ymm5,ymm5 + vmovdqa YMMWORD PTR[(96-128)+r9],ymm4 + vpaddq ymm6,ymm6,ymm6 + vmovdqa YMMWORD PTR[(128-128)+r9],ymm5 + vpaddq ymm7,ymm7,ymm7 + vmovdqa YMMWORD PTR[(160-128)+r9],ymm6 + vpaddq ymm8,ymm8,ymm8 + vmovdqa YMMWORD PTR[(192-128)+r9],ymm7 + vpxor ymm9,ymm9,ymm9 + vmovdqa YMMWORD PTR[(224-128)+r9],ymm8 + + vpmuludq ymm0,ymm10,YMMWORD PTR[((0-128))+rsi] + vpbroadcastq ymm11,QWORD PTR[((32-128))+rsi] + vmovdqu YMMWORD PTR[(288-192)+rbx],ymm9 + vpmuludq ymm1,ymm1,ymm10 + vmovdqu YMMWORD PTR[(320-448)+r12],ymm9 + vpmuludq ymm2,ymm2,ymm10 + vmovdqu YMMWORD PTR[(352-448)+r12],ymm9 + vpmuludq ymm3,ymm3,ymm10 + vmovdqu YMMWORD PTR[(384-448)+r12],ymm9 + vpmuludq ymm4,ymm4,ymm10 + vmovdqu YMMWORD PTR[(416-448)+r12],ymm9 + vpmuludq ymm5,ymm5,ymm10 + vmovdqu YMMWORD PTR[(448-448)+r12],ymm9 + vpmuludq ymm6,ymm6,ymm10 + vmovdqu YMMWORD PTR[(480-448)+r12],ymm9 + vpmuludq ymm7,ymm7,ymm10 + vmovdqu YMMWORD PTR[(512-448)+r12],ymm9 + vpmuludq ymm8,ymm8,ymm10 + vpbroadcastq ymm10,QWORD PTR[((64-128))+rsi] + vmovdqu YMMWORD PTR[(544-448)+r12],ymm9 + + mov r15,rsi + mov r14d,4 + jmp $L$sqr_entry_1024 +ALIGN 32 +$L$OOP_SQR_1024:: + vpbroadcastq ymm11,QWORD PTR[((32-128))+r15] + vpmuludq ymm0,ymm10,YMMWORD PTR[((0-128))+rsi] + vpaddq ymm0,ymm0,YMMWORD PTR[((0-192))+rbx] + vpmuludq ymm1,ymm10,YMMWORD PTR[((0-128))+r9] + vpaddq ymm1,ymm1,YMMWORD PTR[((32-192))+rbx] + vpmuludq ymm2,ymm10,YMMWORD PTR[((32-128))+r9] + vpaddq ymm2,ymm2,YMMWORD PTR[((64-192))+rbx] + vpmuludq ymm3,ymm10,YMMWORD PTR[((64-128))+r9] + vpaddq ymm3,ymm3,YMMWORD PTR[((96-192))+rbx] + vpmuludq ymm4,ymm10,YMMWORD PTR[((96-128))+r9] + vpaddq ymm4,ymm4,YMMWORD PTR[((128-192))+rbx] + vpmuludq ymm5,ymm10,YMMWORD PTR[((128-128))+r9] + vpaddq ymm5,ymm5,YMMWORD PTR[((160-192))+rbx] + vpmuludq ymm6,ymm10,YMMWORD PTR[((160-128))+r9] + vpaddq ymm6,ymm6,YMMWORD PTR[((192-192))+rbx] + vpmuludq ymm7,ymm10,YMMWORD PTR[((192-128))+r9] + vpaddq ymm7,ymm7,YMMWORD PTR[((224-192))+rbx] + vpmuludq ymm8,ymm10,YMMWORD PTR[((224-128))+r9] + vpbroadcastq ymm10,QWORD PTR[((64-128))+r15] + vpaddq ymm8,ymm8,YMMWORD PTR[((256-192))+rbx] +$L$sqr_entry_1024:: + vmovdqu YMMWORD PTR[(0-192)+rbx],ymm0 + vmovdqu YMMWORD PTR[(32-192)+rbx],ymm1 + + vpmuludq ymm12,ymm11,YMMWORD PTR[((32-128))+rsi] + vpaddq ymm2,ymm2,ymm12 + vpmuludq ymm14,ymm11,YMMWORD PTR[((32-128))+r9] + vpaddq ymm3,ymm3,ymm14 + vpmuludq ymm13,ymm11,YMMWORD PTR[((64-128))+r9] + vpaddq ymm4,ymm4,ymm13 + vpmuludq ymm12,ymm11,YMMWORD PTR[((96-128))+r9] + vpaddq ymm5,ymm5,ymm12 + vpmuludq ymm14,ymm11,YMMWORD PTR[((128-128))+r9] + vpaddq ymm6,ymm6,ymm14 + vpmuludq ymm13,ymm11,YMMWORD PTR[((160-128))+r9] + vpaddq ymm7,ymm7,ymm13 + vpmuludq ymm12,ymm11,YMMWORD PTR[((192-128))+r9] + vpaddq ymm8,ymm8,ymm12 + vpmuludq ymm0,ymm11,YMMWORD PTR[((224-128))+r9] + vpbroadcastq ymm11,QWORD PTR[((96-128))+r15] + vpaddq ymm0,ymm0,YMMWORD PTR[((288-192))+rbx] + + vmovdqu YMMWORD PTR[(64-192)+rbx],ymm2 + vmovdqu YMMWORD PTR[(96-192)+rbx],ymm3 + + vpmuludq ymm13,ymm10,YMMWORD PTR[((64-128))+rsi] + vpaddq ymm4,ymm4,ymm13 + vpmuludq ymm12,ymm10,YMMWORD PTR[((64-128))+r9] + vpaddq ymm5,ymm5,ymm12 + vpmuludq ymm14,ymm10,YMMWORD PTR[((96-128))+r9] + vpaddq ymm6,ymm6,ymm14 + vpmuludq ymm13,ymm10,YMMWORD PTR[((128-128))+r9] + vpaddq ymm7,ymm7,ymm13 + vpmuludq ymm12,ymm10,YMMWORD PTR[((160-128))+r9] + vpaddq ymm8,ymm8,ymm12 + vpmuludq ymm14,ymm10,YMMWORD PTR[((192-128))+r9] + vpaddq ymm0,ymm0,ymm14 + vpmuludq ymm1,ymm10,YMMWORD PTR[((224-128))+r9] + vpbroadcastq ymm10,QWORD PTR[((128-128))+r15] + vpaddq ymm1,ymm1,YMMWORD PTR[((320-448))+r12] + + vmovdqu YMMWORD PTR[(128-192)+rbx],ymm4 + vmovdqu YMMWORD PTR[(160-192)+rbx],ymm5 + + vpmuludq ymm12,ymm11,YMMWORD PTR[((96-128))+rsi] + vpaddq ymm6,ymm6,ymm12 + vpmuludq ymm14,ymm11,YMMWORD PTR[((96-128))+r9] + vpaddq ymm7,ymm7,ymm14 + vpmuludq ymm13,ymm11,YMMWORD PTR[((128-128))+r9] + vpaddq ymm8,ymm8,ymm13 + vpmuludq ymm12,ymm11,YMMWORD PTR[((160-128))+r9] + vpaddq ymm0,ymm0,ymm12 + vpmuludq ymm14,ymm11,YMMWORD PTR[((192-128))+r9] + vpaddq ymm1,ymm1,ymm14 + vpmuludq ymm2,ymm11,YMMWORD PTR[((224-128))+r9] + vpbroadcastq ymm11,QWORD PTR[((160-128))+r15] + vpaddq ymm2,ymm2,YMMWORD PTR[((352-448))+r12] + + vmovdqu YMMWORD PTR[(192-192)+rbx],ymm6 + vmovdqu YMMWORD PTR[(224-192)+rbx],ymm7 + + vpmuludq ymm12,ymm10,YMMWORD PTR[((128-128))+rsi] + vpaddq ymm8,ymm8,ymm12 + vpmuludq ymm14,ymm10,YMMWORD PTR[((128-128))+r9] + vpaddq ymm0,ymm0,ymm14 + vpmuludq ymm13,ymm10,YMMWORD PTR[((160-128))+r9] + vpaddq ymm1,ymm1,ymm13 + vpmuludq ymm12,ymm10,YMMWORD PTR[((192-128))+r9] + vpaddq ymm2,ymm2,ymm12 + vpmuludq ymm3,ymm10,YMMWORD PTR[((224-128))+r9] + vpbroadcastq ymm10,QWORD PTR[((192-128))+r15] + vpaddq ymm3,ymm3,YMMWORD PTR[((384-448))+r12] + + vmovdqu YMMWORD PTR[(256-192)+rbx],ymm8 + vmovdqu YMMWORD PTR[(288-192)+rbx],ymm0 + lea rbx,QWORD PTR[8+rbx] + + vpmuludq ymm13,ymm11,YMMWORD PTR[((160-128))+rsi] + vpaddq ymm1,ymm1,ymm13 + vpmuludq ymm12,ymm11,YMMWORD PTR[((160-128))+r9] + vpaddq ymm2,ymm2,ymm12 + vpmuludq ymm14,ymm11,YMMWORD PTR[((192-128))+r9] + vpaddq ymm3,ymm3,ymm14 + vpmuludq ymm4,ymm11,YMMWORD PTR[((224-128))+r9] + vpbroadcastq ymm11,QWORD PTR[((224-128))+r15] + vpaddq ymm4,ymm4,YMMWORD PTR[((416-448))+r12] + + vmovdqu YMMWORD PTR[(320-448)+r12],ymm1 + vmovdqu YMMWORD PTR[(352-448)+r12],ymm2 + + vpmuludq ymm12,ymm10,YMMWORD PTR[((192-128))+rsi] + vpaddq ymm3,ymm3,ymm12 + vpmuludq ymm14,ymm10,YMMWORD PTR[((192-128))+r9] + vpbroadcastq ymm0,QWORD PTR[((256-128))+r15] + vpaddq ymm4,ymm4,ymm14 + vpmuludq ymm5,ymm10,YMMWORD PTR[((224-128))+r9] + vpbroadcastq ymm10,QWORD PTR[((0+8-128))+r15] + vpaddq ymm5,ymm5,YMMWORD PTR[((448-448))+r12] + + vmovdqu YMMWORD PTR[(384-448)+r12],ymm3 + vmovdqu YMMWORD PTR[(416-448)+r12],ymm4 + lea r15,QWORD PTR[8+r15] + + vpmuludq ymm12,ymm11,YMMWORD PTR[((224-128))+rsi] + vpaddq ymm5,ymm5,ymm12 + vpmuludq ymm6,ymm11,YMMWORD PTR[((224-128))+r9] + vpaddq ymm6,ymm6,YMMWORD PTR[((480-448))+r12] + + vpmuludq ymm7,ymm0,YMMWORD PTR[((256-128))+rsi] + vmovdqu YMMWORD PTR[(448-448)+r12],ymm5 + vpaddq ymm7,ymm7,YMMWORD PTR[((512-448))+r12] + vmovdqu YMMWORD PTR[(480-448)+r12],ymm6 + vmovdqu YMMWORD PTR[(512-448)+r12],ymm7 + lea r12,QWORD PTR[8+r12] + + dec r14d + jnz $L$OOP_SQR_1024 + + vmovdqu ymm8,YMMWORD PTR[256+rsp] + vmovdqu ymm1,YMMWORD PTR[288+rsp] + vmovdqu ymm2,YMMWORD PTR[320+rsp] + lea rbx,QWORD PTR[192+rsp] + + vpsrlq ymm14,ymm8,29 + vpand ymm8,ymm8,ymm15 + vpsrlq ymm11,ymm1,29 + vpand ymm1,ymm1,ymm15 + + vpermq ymm14,ymm14,093h + vpxor ymm9,ymm9,ymm9 + vpermq ymm11,ymm11,093h + + vpblendd ymm10,ymm14,ymm9,3 + vpblendd ymm14,ymm11,ymm14,3 + vpaddq ymm8,ymm8,ymm10 + vpblendd ymm11,ymm9,ymm11,3 + vpaddq ymm1,ymm1,ymm14 + vpaddq ymm2,ymm2,ymm11 + vmovdqu YMMWORD PTR[(288-192)+rbx],ymm1 + vmovdqu YMMWORD PTR[(320-192)+rbx],ymm2 + + mov rax,QWORD PTR[rsp] + mov r10,QWORD PTR[8+rsp] + mov r11,QWORD PTR[16+rsp] + mov r12,QWORD PTR[24+rsp] + vmovdqu ymm1,YMMWORD PTR[32+rsp] + vmovdqu ymm2,YMMWORD PTR[((64-192))+rbx] + vmovdqu ymm3,YMMWORD PTR[((96-192))+rbx] + vmovdqu ymm4,YMMWORD PTR[((128-192))+rbx] + vmovdqu ymm5,YMMWORD PTR[((160-192))+rbx] + vmovdqu ymm6,YMMWORD PTR[((192-192))+rbx] + vmovdqu ymm7,YMMWORD PTR[((224-192))+rbx] + + mov r9,rax + imul eax,ecx + and eax,01fffffffh + vmovd xmm12,eax + + mov rdx,rax + imul rax,QWORD PTR[((-128))+r13] + vpbroadcastq ymm12,xmm12 + add r9,rax + mov rax,rdx + imul rax,QWORD PTR[((8-128))+r13] + shr r9,29 + add r10,rax + mov rax,rdx + imul rax,QWORD PTR[((16-128))+r13] + add r10,r9 + add r11,rax + imul rdx,QWORD PTR[((24-128))+r13] + add r12,rdx + + mov rax,r10 + imul eax,ecx + and eax,01fffffffh + + mov r14d,9 + jmp $L$OOP_REDUCE_1024 + +ALIGN 32 +$L$OOP_REDUCE_1024:: + vmovd xmm13,eax + vpbroadcastq ymm13,xmm13 + + vpmuludq ymm10,ymm12,YMMWORD PTR[((32-128))+r13] + mov rdx,rax + imul rax,QWORD PTR[((-128))+r13] + vpaddq ymm1,ymm1,ymm10 + add r10,rax + vpmuludq ymm14,ymm12,YMMWORD PTR[((64-128))+r13] + mov rax,rdx + imul rax,QWORD PTR[((8-128))+r13] + vpaddq ymm2,ymm2,ymm14 + vpmuludq ymm11,ymm12,YMMWORD PTR[((96-128))+r13] +DB 067h + add r11,rax +DB 067h + mov rax,rdx + imul rax,QWORD PTR[((16-128))+r13] + shr r10,29 + vpaddq ymm3,ymm3,ymm11 + vpmuludq ymm10,ymm12,YMMWORD PTR[((128-128))+r13] + add r12,rax + add r11,r10 + vpaddq ymm4,ymm4,ymm10 + vpmuludq ymm14,ymm12,YMMWORD PTR[((160-128))+r13] + mov rax,r11 + imul eax,ecx + vpaddq ymm5,ymm5,ymm14 + vpmuludq ymm11,ymm12,YMMWORD PTR[((192-128))+r13] + and eax,01fffffffh + vpaddq ymm6,ymm6,ymm11 + vpmuludq ymm10,ymm12,YMMWORD PTR[((224-128))+r13] + vpaddq ymm7,ymm7,ymm10 + vpmuludq ymm14,ymm12,YMMWORD PTR[((256-128))+r13] + vmovd xmm12,eax + + vpaddq ymm8,ymm8,ymm14 + + vpbroadcastq ymm12,xmm12 + + vpmuludq ymm11,ymm13,YMMWORD PTR[((32-8-128))+r13] + vmovdqu ymm14,YMMWORD PTR[((96-8-128))+r13] + mov rdx,rax + imul rax,QWORD PTR[((-128))+r13] + vpaddq ymm1,ymm1,ymm11 + vpmuludq ymm10,ymm13,YMMWORD PTR[((64-8-128))+r13] + vmovdqu ymm11,YMMWORD PTR[((128-8-128))+r13] + add r11,rax + mov rax,rdx + imul rax,QWORD PTR[((8-128))+r13] + vpaddq ymm2,ymm2,ymm10 + add rax,r12 + shr r11,29 + vpmuludq ymm14,ymm14,ymm13 + vmovdqu ymm10,YMMWORD PTR[((160-8-128))+r13] + add rax,r11 + vpaddq ymm3,ymm3,ymm14 + vpmuludq ymm11,ymm11,ymm13 + vmovdqu ymm14,YMMWORD PTR[((192-8-128))+r13] +DB 067h + mov r12,rax + imul eax,ecx + vpaddq ymm4,ymm4,ymm11 + vpmuludq ymm10,ymm10,ymm13 +DB 0c4h,041h,07eh,06fh,09dh,058h,000h,000h,000h + and eax,01fffffffh + vpaddq ymm5,ymm5,ymm10 + vpmuludq ymm14,ymm14,ymm13 + vmovdqu ymm10,YMMWORD PTR[((256-8-128))+r13] + vpaddq ymm6,ymm6,ymm14 + vpmuludq ymm11,ymm11,ymm13 + vmovdqu ymm9,YMMWORD PTR[((288-8-128))+r13] + vmovd xmm0,eax + imul rax,QWORD PTR[((-128))+r13] + vpaddq ymm7,ymm7,ymm11 + vpmuludq ymm10,ymm10,ymm13 + vmovdqu ymm14,YMMWORD PTR[((32-16-128))+r13] + vpbroadcastq ymm0,xmm0 + vpaddq ymm8,ymm8,ymm10 + vpmuludq ymm9,ymm9,ymm13 + vmovdqu ymm11,YMMWORD PTR[((64-16-128))+r13] + add r12,rax + + vmovdqu ymm13,YMMWORD PTR[((32-24-128))+r13] + vpmuludq ymm14,ymm14,ymm12 + vmovdqu ymm10,YMMWORD PTR[((96-16-128))+r13] + vpaddq ymm1,ymm1,ymm14 + vpmuludq ymm13,ymm13,ymm0 + vpmuludq ymm11,ymm11,ymm12 +DB 0c4h,041h,07eh,06fh,0b5h,0f0h,0ffh,0ffh,0ffh + vpaddq ymm13,ymm13,ymm1 + vpaddq ymm2,ymm2,ymm11 + vpmuludq ymm10,ymm10,ymm12 + vmovdqu ymm11,YMMWORD PTR[((160-16-128))+r13] +DB 067h + vmovq rax,xmm13 + vmovdqu YMMWORD PTR[rsp],ymm13 + vpaddq ymm3,ymm3,ymm10 + vpmuludq ymm14,ymm14,ymm12 + vmovdqu ymm10,YMMWORD PTR[((192-16-128))+r13] + vpaddq ymm4,ymm4,ymm14 + vpmuludq ymm11,ymm11,ymm12 + vmovdqu ymm14,YMMWORD PTR[((224-16-128))+r13] + vpaddq ymm5,ymm5,ymm11 + vpmuludq ymm10,ymm10,ymm12 + vmovdqu ymm11,YMMWORD PTR[((256-16-128))+r13] + vpaddq ymm6,ymm6,ymm10 + vpmuludq ymm14,ymm14,ymm12 + shr r12,29 + vmovdqu ymm10,YMMWORD PTR[((288-16-128))+r13] + add rax,r12 + vpaddq ymm7,ymm7,ymm14 + vpmuludq ymm11,ymm11,ymm12 + + mov r9,rax + imul eax,ecx + vpaddq ymm8,ymm8,ymm11 + vpmuludq ymm10,ymm10,ymm12 + and eax,01fffffffh + vmovd xmm12,eax + vmovdqu ymm11,YMMWORD PTR[((96-24-128))+r13] +DB 067h + vpaddq ymm9,ymm9,ymm10 + vpbroadcastq ymm12,xmm12 + + vpmuludq ymm14,ymm0,YMMWORD PTR[((64-24-128))+r13] + vmovdqu ymm10,YMMWORD PTR[((128-24-128))+r13] + mov rdx,rax + imul rax,QWORD PTR[((-128))+r13] + mov r10,QWORD PTR[8+rsp] + vpaddq ymm1,ymm2,ymm14 + vpmuludq ymm11,ymm11,ymm0 + vmovdqu ymm14,YMMWORD PTR[((160-24-128))+r13] + add r9,rax + mov rax,rdx + imul rax,QWORD PTR[((8-128))+r13] +DB 067h + shr r9,29 + mov r11,QWORD PTR[16+rsp] + vpaddq ymm2,ymm3,ymm11 + vpmuludq ymm10,ymm10,ymm0 + vmovdqu ymm11,YMMWORD PTR[((192-24-128))+r13] + add r10,rax + mov rax,rdx + imul rax,QWORD PTR[((16-128))+r13] + vpaddq ymm3,ymm4,ymm10 + vpmuludq ymm14,ymm14,ymm0 + vmovdqu ymm10,YMMWORD PTR[((224-24-128))+r13] + imul rdx,QWORD PTR[((24-128))+r13] + add r11,rax + lea rax,QWORD PTR[r10*1+r9] + vpaddq ymm4,ymm5,ymm14 + vpmuludq ymm11,ymm11,ymm0 + vmovdqu ymm14,YMMWORD PTR[((256-24-128))+r13] + mov r10,rax + imul eax,ecx + vpmuludq ymm10,ymm10,ymm0 + vpaddq ymm5,ymm6,ymm11 + vmovdqu ymm11,YMMWORD PTR[((288-24-128))+r13] + and eax,01fffffffh + vpaddq ymm6,ymm7,ymm10 + vpmuludq ymm14,ymm14,ymm0 + add rdx,QWORD PTR[24+rsp] + vpaddq ymm7,ymm8,ymm14 + vpmuludq ymm11,ymm11,ymm0 + vpaddq ymm8,ymm9,ymm11 + vmovq xmm9,r12 + mov r12,rdx + + dec r14d + jnz $L$OOP_REDUCE_1024 + lea r12,QWORD PTR[448+rsp] + vpaddq ymm0,ymm13,ymm9 + vpxor ymm9,ymm9,ymm9 + + vpaddq ymm0,ymm0,YMMWORD PTR[((288-192))+rbx] + vpaddq ymm1,ymm1,YMMWORD PTR[((320-448))+r12] + vpaddq ymm2,ymm2,YMMWORD PTR[((352-448))+r12] + vpaddq ymm3,ymm3,YMMWORD PTR[((384-448))+r12] + vpaddq ymm4,ymm4,YMMWORD PTR[((416-448))+r12] + vpaddq ymm5,ymm5,YMMWORD PTR[((448-448))+r12] + vpaddq ymm6,ymm6,YMMWORD PTR[((480-448))+r12] + vpaddq ymm7,ymm7,YMMWORD PTR[((512-448))+r12] + vpaddq ymm8,ymm8,YMMWORD PTR[((544-448))+r12] + + vpsrlq ymm14,ymm0,29 + vpand ymm0,ymm0,ymm15 + vpsrlq ymm11,ymm1,29 + vpand ymm1,ymm1,ymm15 + vpsrlq ymm12,ymm2,29 + vpermq ymm14,ymm14,093h + vpand ymm2,ymm2,ymm15 + vpsrlq ymm13,ymm3,29 + vpermq ymm11,ymm11,093h + vpand ymm3,ymm3,ymm15 + vpermq ymm12,ymm12,093h + + vpblendd ymm10,ymm14,ymm9,3 + vpermq ymm13,ymm13,093h + vpblendd ymm14,ymm11,ymm14,3 + vpaddq ymm0,ymm0,ymm10 + vpblendd ymm11,ymm12,ymm11,3 + vpaddq ymm1,ymm1,ymm14 + vpblendd ymm12,ymm13,ymm12,3 + vpaddq ymm2,ymm2,ymm11 + vpblendd ymm13,ymm9,ymm13,3 + vpaddq ymm3,ymm3,ymm12 + vpaddq ymm4,ymm4,ymm13 + + vpsrlq ymm14,ymm0,29 + vpand ymm0,ymm0,ymm15 + vpsrlq ymm11,ymm1,29 + vpand ymm1,ymm1,ymm15 + vpsrlq ymm12,ymm2,29 + vpermq ymm14,ymm14,093h + vpand ymm2,ymm2,ymm15 + vpsrlq ymm13,ymm3,29 + vpermq ymm11,ymm11,093h + vpand ymm3,ymm3,ymm15 + vpermq ymm12,ymm12,093h + + vpblendd ymm10,ymm14,ymm9,3 + vpermq ymm13,ymm13,093h + vpblendd ymm14,ymm11,ymm14,3 + vpaddq ymm0,ymm0,ymm10 + vpblendd ymm11,ymm12,ymm11,3 + vpaddq ymm1,ymm1,ymm14 + vmovdqu YMMWORD PTR[(0-128)+rdi],ymm0 + vpblendd ymm12,ymm13,ymm12,3 + vpaddq ymm2,ymm2,ymm11 + vmovdqu YMMWORD PTR[(32-128)+rdi],ymm1 + vpblendd ymm13,ymm9,ymm13,3 + vpaddq ymm3,ymm3,ymm12 + vmovdqu YMMWORD PTR[(64-128)+rdi],ymm2 + vpaddq ymm4,ymm4,ymm13 + vmovdqu YMMWORD PTR[(96-128)+rdi],ymm3 + vpsrlq ymm14,ymm4,29 + vpand ymm4,ymm4,ymm15 + vpsrlq ymm11,ymm5,29 + vpand ymm5,ymm5,ymm15 + vpsrlq ymm12,ymm6,29 + vpermq ymm14,ymm14,093h + vpand ymm6,ymm6,ymm15 + vpsrlq ymm13,ymm7,29 + vpermq ymm11,ymm11,093h + vpand ymm7,ymm7,ymm15 + vpsrlq ymm0,ymm8,29 + vpermq ymm12,ymm12,093h + vpand ymm8,ymm8,ymm15 + vpermq ymm13,ymm13,093h + + vpblendd ymm10,ymm14,ymm9,3 + vpermq ymm0,ymm0,093h + vpblendd ymm14,ymm11,ymm14,3 + vpaddq ymm4,ymm4,ymm10 + vpblendd ymm11,ymm12,ymm11,3 + vpaddq ymm5,ymm5,ymm14 + vpblendd ymm12,ymm13,ymm12,3 + vpaddq ymm6,ymm6,ymm11 + vpblendd ymm13,ymm0,ymm13,3 + vpaddq ymm7,ymm7,ymm12 + vpaddq ymm8,ymm8,ymm13 + + vpsrlq ymm14,ymm4,29 + vpand ymm4,ymm4,ymm15 + vpsrlq ymm11,ymm5,29 + vpand ymm5,ymm5,ymm15 + vpsrlq ymm12,ymm6,29 + vpermq ymm14,ymm14,093h + vpand ymm6,ymm6,ymm15 + vpsrlq ymm13,ymm7,29 + vpermq ymm11,ymm11,093h + vpand ymm7,ymm7,ymm15 + vpsrlq ymm0,ymm8,29 + vpermq ymm12,ymm12,093h + vpand ymm8,ymm8,ymm15 + vpermq ymm13,ymm13,093h + + vpblendd ymm10,ymm14,ymm9,3 + vpermq ymm0,ymm0,093h + vpblendd ymm14,ymm11,ymm14,3 + vpaddq ymm4,ymm4,ymm10 + vpblendd ymm11,ymm12,ymm11,3 + vpaddq ymm5,ymm5,ymm14 + vmovdqu YMMWORD PTR[(128-128)+rdi],ymm4 + vpblendd ymm12,ymm13,ymm12,3 + vpaddq ymm6,ymm6,ymm11 + vmovdqu YMMWORD PTR[(160-128)+rdi],ymm5 + vpblendd ymm13,ymm0,ymm13,3 + vpaddq ymm7,ymm7,ymm12 + vmovdqu YMMWORD PTR[(192-128)+rdi],ymm6 + vpaddq ymm8,ymm8,ymm13 + vmovdqu YMMWORD PTR[(224-128)+rdi],ymm7 + vmovdqu YMMWORD PTR[(256-128)+rdi],ymm8 + + mov rsi,rdi + dec r8d + jne $L$OOP_GRANDE_SQR_1024 + + vzeroall + mov rax,rbp + movaps xmm6,XMMWORD PTR[((-216))+rax] + movaps xmm7,XMMWORD PTR[((-200))+rax] + movaps xmm8,XMMWORD PTR[((-184))+rax] + movaps xmm9,XMMWORD PTR[((-168))+rax] + movaps xmm10,XMMWORD PTR[((-152))+rax] + movaps xmm11,XMMWORD PTR[((-136))+rax] + movaps xmm12,XMMWORD PTR[((-120))+rax] + movaps xmm13,XMMWORD PTR[((-104))+rax] + movaps xmm14,XMMWORD PTR[((-88))+rax] + movaps xmm15,XMMWORD PTR[((-72))+rax] + mov r15,QWORD PTR[((-48))+rax] + mov r14,QWORD PTR[((-40))+rax] + mov r13,QWORD PTR[((-32))+rax] + mov r12,QWORD PTR[((-24))+rax] + mov rbp,QWORD PTR[((-16))+rax] + mov rbx,QWORD PTR[((-8))+rax] + lea rsp,QWORD PTR[rax] +$L$sqr_1024_epilogue:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_rsaz_1024_sqr_avx2:: +rsaz_1024_sqr_avx2 ENDP +PUBLIC rsaz_1024_mul_avx2 + +ALIGN 64 +rsaz_1024_mul_avx2 PROC PUBLIC + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_rsaz_1024_mul_avx2:: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + mov rcx,r9 + mov r8,QWORD PTR[40+rsp] + + + lea rax,QWORD PTR[rsp] + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + vzeroupper + lea rsp,QWORD PTR[((-168))+rsp] + vmovaps XMMWORD PTR[(-216)+rax],xmm6 + vmovaps XMMWORD PTR[(-200)+rax],xmm7 + vmovaps XMMWORD PTR[(-184)+rax],xmm8 + vmovaps XMMWORD PTR[(-168)+rax],xmm9 + vmovaps XMMWORD PTR[(-152)+rax],xmm10 + vmovaps XMMWORD PTR[(-136)+rax],xmm11 + vmovaps XMMWORD PTR[(-120)+rax],xmm12 + vmovaps XMMWORD PTR[(-104)+rax],xmm13 + vmovaps XMMWORD PTR[(-88)+rax],xmm14 + vmovaps XMMWORD PTR[(-72)+rax],xmm15 +$L$mul_1024_body:: + mov rbp,rax + vzeroall + mov r13,rdx + sub rsp,64 + + + + + + +DB 067h,067h + mov r15,rsi + and r15,4095 + add r15,32*10 + shr r15,12 + mov r15,rsi + cmovnz rsi,r13 + cmovnz r13,r15 + + mov r15,rcx + sub rsi,-128 + sub rcx,-128 + sub rdi,-128 + + and r15,4095 + add r15,32*10 +DB 067h,067h + shr r15,12 + jz $L$mul_1024_no_n_copy + + + + + + sub rsp,32*10 + vmovdqu ymm0,YMMWORD PTR[((0-128))+rcx] + and rsp,-512 + vmovdqu ymm1,YMMWORD PTR[((32-128))+rcx] + vmovdqu ymm2,YMMWORD PTR[((64-128))+rcx] + vmovdqu ymm3,YMMWORD PTR[((96-128))+rcx] + vmovdqu ymm4,YMMWORD PTR[((128-128))+rcx] + vmovdqu ymm5,YMMWORD PTR[((160-128))+rcx] + vmovdqu ymm6,YMMWORD PTR[((192-128))+rcx] + vmovdqu ymm7,YMMWORD PTR[((224-128))+rcx] + vmovdqu ymm8,YMMWORD PTR[((256-128))+rcx] + lea rcx,QWORD PTR[((64+128))+rsp] + vmovdqu YMMWORD PTR[(0-128)+rcx],ymm0 + vpxor ymm0,ymm0,ymm0 + vmovdqu YMMWORD PTR[(32-128)+rcx],ymm1 + vpxor ymm1,ymm1,ymm1 + vmovdqu YMMWORD PTR[(64-128)+rcx],ymm2 + vpxor ymm2,ymm2,ymm2 + vmovdqu YMMWORD PTR[(96-128)+rcx],ymm3 + vpxor ymm3,ymm3,ymm3 + vmovdqu YMMWORD PTR[(128-128)+rcx],ymm4 + vpxor ymm4,ymm4,ymm4 + vmovdqu YMMWORD PTR[(160-128)+rcx],ymm5 + vpxor ymm5,ymm5,ymm5 + vmovdqu YMMWORD PTR[(192-128)+rcx],ymm6 + vpxor ymm6,ymm6,ymm6 + vmovdqu YMMWORD PTR[(224-128)+rcx],ymm7 + vpxor ymm7,ymm7,ymm7 + vmovdqu YMMWORD PTR[(256-128)+rcx],ymm8 + vmovdqa ymm8,ymm0 + vmovdqu YMMWORD PTR[(288-128)+rcx],ymm9 +$L$mul_1024_no_n_copy:: + and rsp,-64 + + mov rbx,QWORD PTR[r13] + vpbroadcastq ymm10,QWORD PTR[r13] + vmovdqu YMMWORD PTR[rsp],ymm0 + xor r9,r9 +DB 067h + xor r10,r10 + xor r11,r11 + xor r12,r12 + + vmovdqu ymm15,YMMWORD PTR[$L$and_mask] + mov r14d,9 + vmovdqu YMMWORD PTR[(288-128)+rdi],ymm9 + jmp $L$oop_mul_1024 + +ALIGN 32 +$L$oop_mul_1024:: + vpsrlq ymm9,ymm3,29 + mov rax,rbx + imul rax,QWORD PTR[((-128))+rsi] + add rax,r9 + mov r10,rbx + imul r10,QWORD PTR[((8-128))+rsi] + add r10,QWORD PTR[8+rsp] + + mov r9,rax + imul eax,r8d + and eax,01fffffffh + + mov r11,rbx + imul r11,QWORD PTR[((16-128))+rsi] + add r11,QWORD PTR[16+rsp] + + mov r12,rbx + imul r12,QWORD PTR[((24-128))+rsi] + add r12,QWORD PTR[24+rsp] + vpmuludq ymm0,ymm10,YMMWORD PTR[((32-128))+rsi] + vmovd xmm11,eax + vpaddq ymm1,ymm1,ymm0 + vpmuludq ymm12,ymm10,YMMWORD PTR[((64-128))+rsi] + vpbroadcastq ymm11,xmm11 + vpaddq ymm2,ymm2,ymm12 + vpmuludq ymm13,ymm10,YMMWORD PTR[((96-128))+rsi] + vpand ymm3,ymm3,ymm15 + vpaddq ymm3,ymm3,ymm13 + vpmuludq ymm0,ymm10,YMMWORD PTR[((128-128))+rsi] + vpaddq ymm4,ymm4,ymm0 + vpmuludq ymm12,ymm10,YMMWORD PTR[((160-128))+rsi] + vpaddq ymm5,ymm5,ymm12 + vpmuludq ymm13,ymm10,YMMWORD PTR[((192-128))+rsi] + vpaddq ymm6,ymm6,ymm13 + vpmuludq ymm0,ymm10,YMMWORD PTR[((224-128))+rsi] + vpermq ymm9,ymm9,093h + vpaddq ymm7,ymm7,ymm0 + vpmuludq ymm12,ymm10,YMMWORD PTR[((256-128))+rsi] + vpbroadcastq ymm10,QWORD PTR[8+r13] + vpaddq ymm8,ymm8,ymm12 + + mov rdx,rax + imul rax,QWORD PTR[((-128))+rcx] + add r9,rax + mov rax,rdx + imul rax,QWORD PTR[((8-128))+rcx] + add r10,rax + mov rax,rdx + imul rax,QWORD PTR[((16-128))+rcx] + add r11,rax + shr r9,29 + imul rdx,QWORD PTR[((24-128))+rcx] + add r12,rdx + add r10,r9 + + vpmuludq ymm13,ymm11,YMMWORD PTR[((32-128))+rcx] + vmovq rbx,xmm10 + vpaddq ymm1,ymm1,ymm13 + vpmuludq ymm0,ymm11,YMMWORD PTR[((64-128))+rcx] + vpaddq ymm2,ymm2,ymm0 + vpmuludq ymm12,ymm11,YMMWORD PTR[((96-128))+rcx] + vpaddq ymm3,ymm3,ymm12 + vpmuludq ymm13,ymm11,YMMWORD PTR[((128-128))+rcx] + vpaddq ymm4,ymm4,ymm13 + vpmuludq ymm0,ymm11,YMMWORD PTR[((160-128))+rcx] + vpaddq ymm5,ymm5,ymm0 + vpmuludq ymm12,ymm11,YMMWORD PTR[((192-128))+rcx] + vpaddq ymm6,ymm6,ymm12 + vpmuludq ymm13,ymm11,YMMWORD PTR[((224-128))+rcx] + vpblendd ymm9,ymm9,ymm14,3 + vpaddq ymm7,ymm7,ymm13 + vpmuludq ymm0,ymm11,YMMWORD PTR[((256-128))+rcx] + vpaddq ymm3,ymm3,ymm9 + vpaddq ymm8,ymm8,ymm0 + + mov rax,rbx + imul rax,QWORD PTR[((-128))+rsi] + add r10,rax + vmovdqu ymm12,YMMWORD PTR[((-8+32-128))+rsi] + mov rax,rbx + imul rax,QWORD PTR[((8-128))+rsi] + add r11,rax + vmovdqu ymm13,YMMWORD PTR[((-8+64-128))+rsi] + + mov rax,r10 + imul eax,r8d + and eax,01fffffffh + + imul rbx,QWORD PTR[((16-128))+rsi] + add r12,rbx + vpmuludq ymm12,ymm12,ymm10 + vmovd xmm11,eax + vmovdqu ymm0,YMMWORD PTR[((-8+96-128))+rsi] + vpaddq ymm1,ymm1,ymm12 + vpmuludq ymm13,ymm13,ymm10 + vpbroadcastq ymm11,xmm11 + vmovdqu ymm12,YMMWORD PTR[((-8+128-128))+rsi] + vpaddq ymm2,ymm2,ymm13 + vpmuludq ymm0,ymm0,ymm10 + vmovdqu ymm13,YMMWORD PTR[((-8+160-128))+rsi] + vpaddq ymm3,ymm3,ymm0 + vpmuludq ymm12,ymm12,ymm10 + vmovdqu ymm0,YMMWORD PTR[((-8+192-128))+rsi] + vpaddq ymm4,ymm4,ymm12 + vpmuludq ymm13,ymm13,ymm10 + vmovdqu ymm12,YMMWORD PTR[((-8+224-128))+rsi] + vpaddq ymm5,ymm5,ymm13 + vpmuludq ymm0,ymm0,ymm10 + vmovdqu ymm13,YMMWORD PTR[((-8+256-128))+rsi] + vpaddq ymm6,ymm6,ymm0 + vpmuludq ymm12,ymm12,ymm10 + vmovdqu ymm9,YMMWORD PTR[((-8+288-128))+rsi] + vpaddq ymm7,ymm7,ymm12 + vpmuludq ymm13,ymm13,ymm10 + vpaddq ymm8,ymm8,ymm13 + vpmuludq ymm9,ymm9,ymm10 + vpbroadcastq ymm10,QWORD PTR[16+r13] + + mov rdx,rax + imul rax,QWORD PTR[((-128))+rcx] + add r10,rax + vmovdqu ymm0,YMMWORD PTR[((-8+32-128))+rcx] + mov rax,rdx + imul rax,QWORD PTR[((8-128))+rcx] + add r11,rax + vmovdqu ymm12,YMMWORD PTR[((-8+64-128))+rcx] + shr r10,29 + imul rdx,QWORD PTR[((16-128))+rcx] + add r12,rdx + add r11,r10 + + vpmuludq ymm0,ymm0,ymm11 + vmovq rbx,xmm10 + vmovdqu ymm13,YMMWORD PTR[((-8+96-128))+rcx] + vpaddq ymm1,ymm1,ymm0 + vpmuludq ymm12,ymm12,ymm11 + vmovdqu ymm0,YMMWORD PTR[((-8+128-128))+rcx] + vpaddq ymm2,ymm2,ymm12 + vpmuludq ymm13,ymm13,ymm11 + vmovdqu ymm12,YMMWORD PTR[((-8+160-128))+rcx] + vpaddq ymm3,ymm3,ymm13 + vpmuludq ymm0,ymm0,ymm11 + vmovdqu ymm13,YMMWORD PTR[((-8+192-128))+rcx] + vpaddq ymm4,ymm4,ymm0 + vpmuludq ymm12,ymm12,ymm11 + vmovdqu ymm0,YMMWORD PTR[((-8+224-128))+rcx] + vpaddq ymm5,ymm5,ymm12 + vpmuludq ymm13,ymm13,ymm11 + vmovdqu ymm12,YMMWORD PTR[((-8+256-128))+rcx] + vpaddq ymm6,ymm6,ymm13 + vpmuludq ymm0,ymm0,ymm11 + vmovdqu ymm13,YMMWORD PTR[((-8+288-128))+rcx] + vpaddq ymm7,ymm7,ymm0 + vpmuludq ymm12,ymm12,ymm11 + vpaddq ymm8,ymm8,ymm12 + vpmuludq ymm13,ymm13,ymm11 + vpaddq ymm9,ymm9,ymm13 + + vmovdqu ymm0,YMMWORD PTR[((-16+32-128))+rsi] + mov rax,rbx + imul rax,QWORD PTR[((-128))+rsi] + add rax,r11 + + vmovdqu ymm12,YMMWORD PTR[((-16+64-128))+rsi] + mov r11,rax + imul eax,r8d + and eax,01fffffffh + + imul rbx,QWORD PTR[((8-128))+rsi] + add r12,rbx + vpmuludq ymm0,ymm0,ymm10 + vmovd xmm11,eax + vmovdqu ymm13,YMMWORD PTR[((-16+96-128))+rsi] + vpaddq ymm1,ymm1,ymm0 + vpmuludq ymm12,ymm12,ymm10 + vpbroadcastq ymm11,xmm11 + vmovdqu ymm0,YMMWORD PTR[((-16+128-128))+rsi] + vpaddq ymm2,ymm2,ymm12 + vpmuludq ymm13,ymm13,ymm10 + vmovdqu ymm12,YMMWORD PTR[((-16+160-128))+rsi] + vpaddq ymm3,ymm3,ymm13 + vpmuludq ymm0,ymm0,ymm10 + vmovdqu ymm13,YMMWORD PTR[((-16+192-128))+rsi] + vpaddq ymm4,ymm4,ymm0 + vpmuludq ymm12,ymm12,ymm10 + vmovdqu ymm0,YMMWORD PTR[((-16+224-128))+rsi] + vpaddq ymm5,ymm5,ymm12 + vpmuludq ymm13,ymm13,ymm10 + vmovdqu ymm12,YMMWORD PTR[((-16+256-128))+rsi] + vpaddq ymm6,ymm6,ymm13 + vpmuludq ymm0,ymm0,ymm10 + vmovdqu ymm13,YMMWORD PTR[((-16+288-128))+rsi] + vpaddq ymm7,ymm7,ymm0 + vpmuludq ymm12,ymm12,ymm10 + vpaddq ymm8,ymm8,ymm12 + vpmuludq ymm13,ymm13,ymm10 + vpbroadcastq ymm10,QWORD PTR[24+r13] + vpaddq ymm9,ymm9,ymm13 + + vmovdqu ymm0,YMMWORD PTR[((-16+32-128))+rcx] + mov rdx,rax + imul rax,QWORD PTR[((-128))+rcx] + add r11,rax + vmovdqu ymm12,YMMWORD PTR[((-16+64-128))+rcx] + imul rdx,QWORD PTR[((8-128))+rcx] + add r12,rdx + shr r11,29 + + vpmuludq ymm0,ymm0,ymm11 + vmovq rbx,xmm10 + vmovdqu ymm13,YMMWORD PTR[((-16+96-128))+rcx] + vpaddq ymm1,ymm1,ymm0 + vpmuludq ymm12,ymm12,ymm11 + vmovdqu ymm0,YMMWORD PTR[((-16+128-128))+rcx] + vpaddq ymm2,ymm2,ymm12 + vpmuludq ymm13,ymm13,ymm11 + vmovdqu ymm12,YMMWORD PTR[((-16+160-128))+rcx] + vpaddq ymm3,ymm3,ymm13 + vpmuludq ymm0,ymm0,ymm11 + vmovdqu ymm13,YMMWORD PTR[((-16+192-128))+rcx] + vpaddq ymm4,ymm4,ymm0 + vpmuludq ymm12,ymm12,ymm11 + vmovdqu ymm0,YMMWORD PTR[((-16+224-128))+rcx] + vpaddq ymm5,ymm5,ymm12 + vpmuludq ymm13,ymm13,ymm11 + vmovdqu ymm12,YMMWORD PTR[((-16+256-128))+rcx] + vpaddq ymm6,ymm6,ymm13 + vpmuludq ymm0,ymm0,ymm11 + vmovdqu ymm13,YMMWORD PTR[((-16+288-128))+rcx] + vpaddq ymm7,ymm7,ymm0 + vpmuludq ymm12,ymm12,ymm11 + vmovdqu ymm0,YMMWORD PTR[((-24+32-128))+rsi] + vpaddq ymm8,ymm8,ymm12 + vpmuludq ymm13,ymm13,ymm11 + vmovdqu ymm12,YMMWORD PTR[((-24+64-128))+rsi] + vpaddq ymm9,ymm9,ymm13 + + add r12,r11 + imul rbx,QWORD PTR[((-128))+rsi] + add r12,rbx + + mov rax,r12 + imul eax,r8d + and eax,01fffffffh + + vpmuludq ymm0,ymm0,ymm10 + vmovd xmm11,eax + vmovdqu ymm13,YMMWORD PTR[((-24+96-128))+rsi] + vpaddq ymm1,ymm1,ymm0 + vpmuludq ymm12,ymm12,ymm10 + vpbroadcastq ymm11,xmm11 + vmovdqu ymm0,YMMWORD PTR[((-24+128-128))+rsi] + vpaddq ymm2,ymm2,ymm12 + vpmuludq ymm13,ymm13,ymm10 + vmovdqu ymm12,YMMWORD PTR[((-24+160-128))+rsi] + vpaddq ymm3,ymm3,ymm13 + vpmuludq ymm0,ymm0,ymm10 + vmovdqu ymm13,YMMWORD PTR[((-24+192-128))+rsi] + vpaddq ymm4,ymm4,ymm0 + vpmuludq ymm12,ymm12,ymm10 + vmovdqu ymm0,YMMWORD PTR[((-24+224-128))+rsi] + vpaddq ymm5,ymm5,ymm12 + vpmuludq ymm13,ymm13,ymm10 + vmovdqu ymm12,YMMWORD PTR[((-24+256-128))+rsi] + vpaddq ymm6,ymm6,ymm13 + vpmuludq ymm0,ymm0,ymm10 + vmovdqu ymm13,YMMWORD PTR[((-24+288-128))+rsi] + vpaddq ymm7,ymm7,ymm0 + vpmuludq ymm12,ymm12,ymm10 + vpaddq ymm8,ymm8,ymm12 + vpmuludq ymm13,ymm13,ymm10 + vpbroadcastq ymm10,QWORD PTR[32+r13] + vpaddq ymm9,ymm9,ymm13 + add r13,32 + + vmovdqu ymm0,YMMWORD PTR[((-24+32-128))+rcx] + imul rax,QWORD PTR[((-128))+rcx] + add r12,rax + shr r12,29 + + vmovdqu ymm12,YMMWORD PTR[((-24+64-128))+rcx] + vpmuludq ymm0,ymm0,ymm11 + vmovq rbx,xmm10 + vmovdqu ymm13,YMMWORD PTR[((-24+96-128))+rcx] + vpaddq ymm0,ymm1,ymm0 + vpmuludq ymm12,ymm12,ymm11 + vmovdqu YMMWORD PTR[rsp],ymm0 + vpaddq ymm1,ymm2,ymm12 + vmovdqu ymm0,YMMWORD PTR[((-24+128-128))+rcx] + vpmuludq ymm13,ymm13,ymm11 + vmovdqu ymm12,YMMWORD PTR[((-24+160-128))+rcx] + vpaddq ymm2,ymm3,ymm13 + vpmuludq ymm0,ymm0,ymm11 + vmovdqu ymm13,YMMWORD PTR[((-24+192-128))+rcx] + vpaddq ymm3,ymm4,ymm0 + vpmuludq ymm12,ymm12,ymm11 + vmovdqu ymm0,YMMWORD PTR[((-24+224-128))+rcx] + vpaddq ymm4,ymm5,ymm12 + vpmuludq ymm13,ymm13,ymm11 + vmovdqu ymm12,YMMWORD PTR[((-24+256-128))+rcx] + vpaddq ymm5,ymm6,ymm13 + vpmuludq ymm0,ymm0,ymm11 + vmovdqu ymm13,YMMWORD PTR[((-24+288-128))+rcx] + mov r9,r12 + vpaddq ymm6,ymm7,ymm0 + vpmuludq ymm12,ymm12,ymm11 + add r9,QWORD PTR[rsp] + vpaddq ymm7,ymm8,ymm12 + vpmuludq ymm13,ymm13,ymm11 + vmovq xmm12,r12 + vpaddq ymm8,ymm9,ymm13 + + dec r14d + jnz $L$oop_mul_1024 + vpermq ymm15,ymm15,0 + vpaddq ymm0,ymm12,YMMWORD PTR[rsp] + + vpsrlq ymm12,ymm0,29 + vpand ymm0,ymm0,ymm15 + vpsrlq ymm13,ymm1,29 + vpand ymm1,ymm1,ymm15 + vpsrlq ymm10,ymm2,29 + vpermq ymm12,ymm12,093h + vpand ymm2,ymm2,ymm15 + vpsrlq ymm11,ymm3,29 + vpermq ymm13,ymm13,093h + vpand ymm3,ymm3,ymm15 + + vpblendd ymm9,ymm12,ymm14,3 + vpermq ymm10,ymm10,093h + vpblendd ymm12,ymm13,ymm12,3 + vpermq ymm11,ymm11,093h + vpaddq ymm0,ymm0,ymm9 + vpblendd ymm13,ymm10,ymm13,3 + vpaddq ymm1,ymm1,ymm12 + vpblendd ymm10,ymm11,ymm10,3 + vpaddq ymm2,ymm2,ymm13 + vpblendd ymm11,ymm14,ymm11,3 + vpaddq ymm3,ymm3,ymm10 + vpaddq ymm4,ymm4,ymm11 + + vpsrlq ymm12,ymm0,29 + vpand ymm0,ymm0,ymm15 + vpsrlq ymm13,ymm1,29 + vpand ymm1,ymm1,ymm15 + vpsrlq ymm10,ymm2,29 + vpermq ymm12,ymm12,093h + vpand ymm2,ymm2,ymm15 + vpsrlq ymm11,ymm3,29 + vpermq ymm13,ymm13,093h + vpand ymm3,ymm3,ymm15 + vpermq ymm10,ymm10,093h + + vpblendd ymm9,ymm12,ymm14,3 + vpermq ymm11,ymm11,093h + vpblendd ymm12,ymm13,ymm12,3 + vpaddq ymm0,ymm0,ymm9 + vpblendd ymm13,ymm10,ymm13,3 + vpaddq ymm1,ymm1,ymm12 + vpblendd ymm10,ymm11,ymm10,3 + vpaddq ymm2,ymm2,ymm13 + vpblendd ymm11,ymm14,ymm11,3 + vpaddq ymm3,ymm3,ymm10 + vpaddq ymm4,ymm4,ymm11 + + vmovdqu YMMWORD PTR[(0-128)+rdi],ymm0 + vmovdqu YMMWORD PTR[(32-128)+rdi],ymm1 + vmovdqu YMMWORD PTR[(64-128)+rdi],ymm2 + vmovdqu YMMWORD PTR[(96-128)+rdi],ymm3 + vpsrlq ymm12,ymm4,29 + vpand ymm4,ymm4,ymm15 + vpsrlq ymm13,ymm5,29 + vpand ymm5,ymm5,ymm15 + vpsrlq ymm10,ymm6,29 + vpermq ymm12,ymm12,093h + vpand ymm6,ymm6,ymm15 + vpsrlq ymm11,ymm7,29 + vpermq ymm13,ymm13,093h + vpand ymm7,ymm7,ymm15 + vpsrlq ymm0,ymm8,29 + vpermq ymm10,ymm10,093h + vpand ymm8,ymm8,ymm15 + vpermq ymm11,ymm11,093h + + vpblendd ymm9,ymm12,ymm14,3 + vpermq ymm0,ymm0,093h + vpblendd ymm12,ymm13,ymm12,3 + vpaddq ymm4,ymm4,ymm9 + vpblendd ymm13,ymm10,ymm13,3 + vpaddq ymm5,ymm5,ymm12 + vpblendd ymm10,ymm11,ymm10,3 + vpaddq ymm6,ymm6,ymm13 + vpblendd ymm11,ymm0,ymm11,3 + vpaddq ymm7,ymm7,ymm10 + vpaddq ymm8,ymm8,ymm11 + + vpsrlq ymm12,ymm4,29 + vpand ymm4,ymm4,ymm15 + vpsrlq ymm13,ymm5,29 + vpand ymm5,ymm5,ymm15 + vpsrlq ymm10,ymm6,29 + vpermq ymm12,ymm12,093h + vpand ymm6,ymm6,ymm15 + vpsrlq ymm11,ymm7,29 + vpermq ymm13,ymm13,093h + vpand ymm7,ymm7,ymm15 + vpsrlq ymm0,ymm8,29 + vpermq ymm10,ymm10,093h + vpand ymm8,ymm8,ymm15 + vpermq ymm11,ymm11,093h + + vpblendd ymm9,ymm12,ymm14,3 + vpermq ymm0,ymm0,093h + vpblendd ymm12,ymm13,ymm12,3 + vpaddq ymm4,ymm4,ymm9 + vpblendd ymm13,ymm10,ymm13,3 + vpaddq ymm5,ymm5,ymm12 + vpblendd ymm10,ymm11,ymm10,3 + vpaddq ymm6,ymm6,ymm13 + vpblendd ymm11,ymm0,ymm11,3 + vpaddq ymm7,ymm7,ymm10 + vpaddq ymm8,ymm8,ymm11 + + vmovdqu YMMWORD PTR[(128-128)+rdi],ymm4 + vmovdqu YMMWORD PTR[(160-128)+rdi],ymm5 + vmovdqu YMMWORD PTR[(192-128)+rdi],ymm6 + vmovdqu YMMWORD PTR[(224-128)+rdi],ymm7 + vmovdqu YMMWORD PTR[(256-128)+rdi],ymm8 + vzeroupper + + mov rax,rbp + movaps xmm6,XMMWORD PTR[((-216))+rax] + movaps xmm7,XMMWORD PTR[((-200))+rax] + movaps xmm8,XMMWORD PTR[((-184))+rax] + movaps xmm9,XMMWORD PTR[((-168))+rax] + movaps xmm10,XMMWORD PTR[((-152))+rax] + movaps xmm11,XMMWORD PTR[((-136))+rax] + movaps xmm12,XMMWORD PTR[((-120))+rax] + movaps xmm13,XMMWORD PTR[((-104))+rax] + movaps xmm14,XMMWORD PTR[((-88))+rax] + movaps xmm15,XMMWORD PTR[((-72))+rax] + mov r15,QWORD PTR[((-48))+rax] + mov r14,QWORD PTR[((-40))+rax] + mov r13,QWORD PTR[((-32))+rax] + mov r12,QWORD PTR[((-24))+rax] + mov rbp,QWORD PTR[((-16))+rax] + mov rbx,QWORD PTR[((-8))+rax] + lea rsp,QWORD PTR[rax] +$L$mul_1024_epilogue:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_rsaz_1024_mul_avx2:: +rsaz_1024_mul_avx2 ENDP +PUBLIC rsaz_1024_red2norm_avx2 + +ALIGN 32 +rsaz_1024_red2norm_avx2 PROC PUBLIC + sub rdx,-128 + xor rax,rax + mov r8,QWORD PTR[((-128))+rdx] + mov r9,QWORD PTR[((-120))+rdx] + mov r10,QWORD PTR[((-112))+rdx] + shl r8,0 + shl r9,29 + mov r11,r10 + shl r10,58 + shr r11,6 + add rax,r8 + add rax,r9 + add rax,r10 + adc r11,0 + mov QWORD PTR[rcx],rax + mov rax,r11 + mov r8,QWORD PTR[((-104))+rdx] + mov r9,QWORD PTR[((-96))+rdx] + shl r8,23 + mov r10,r9 + shl r9,52 + shr r10,12 + add rax,r8 + add rax,r9 + adc r10,0 + mov QWORD PTR[8+rcx],rax + mov rax,r10 + mov r11,QWORD PTR[((-88))+rdx] + mov r8,QWORD PTR[((-80))+rdx] + shl r11,17 + mov r9,r8 + shl r8,46 + shr r9,18 + add rax,r11 + add rax,r8 + adc r9,0 + mov QWORD PTR[16+rcx],rax + mov rax,r9 + mov r10,QWORD PTR[((-72))+rdx] + mov r11,QWORD PTR[((-64))+rdx] + shl r10,11 + mov r8,r11 + shl r11,40 + shr r8,24 + add rax,r10 + add rax,r11 + adc r8,0 + mov QWORD PTR[24+rcx],rax + mov rax,r8 + mov r9,QWORD PTR[((-56))+rdx] + mov r10,QWORD PTR[((-48))+rdx] + mov r11,QWORD PTR[((-40))+rdx] + shl r9,5 + shl r10,34 + mov r8,r11 + shl r11,63 + shr r8,1 + add rax,r9 + add rax,r10 + add rax,r11 + adc r8,0 + mov QWORD PTR[32+rcx],rax + mov rax,r8 + mov r9,QWORD PTR[((-32))+rdx] + mov r10,QWORD PTR[((-24))+rdx] + shl r9,28 + mov r11,r10 + shl r10,57 + shr r11,7 + add rax,r9 + add rax,r10 + adc r11,0 + mov QWORD PTR[40+rcx],rax + mov rax,r11 + mov r8,QWORD PTR[((-16))+rdx] + mov r9,QWORD PTR[((-8))+rdx] + shl r8,22 + mov r10,r9 + shl r9,51 + shr r10,13 + add rax,r8 + add rax,r9 + adc r10,0 + mov QWORD PTR[48+rcx],rax + mov rax,r10 + mov r11,QWORD PTR[rdx] + mov r8,QWORD PTR[8+rdx] + shl r11,16 + mov r9,r8 + shl r8,45 + shr r9,19 + add rax,r11 + add rax,r8 + adc r9,0 + mov QWORD PTR[56+rcx],rax + mov rax,r9 + mov r10,QWORD PTR[16+rdx] + mov r11,QWORD PTR[24+rdx] + shl r10,10 + mov r8,r11 + shl r11,39 + shr r8,25 + add rax,r10 + add rax,r11 + adc r8,0 + mov QWORD PTR[64+rcx],rax + mov rax,r8 + mov r9,QWORD PTR[32+rdx] + mov r10,QWORD PTR[40+rdx] + mov r11,QWORD PTR[48+rdx] + shl r9,4 + shl r10,33 + mov r8,r11 + shl r11,62 + shr r8,2 + add rax,r9 + add rax,r10 + add rax,r11 + adc r8,0 + mov QWORD PTR[72+rcx],rax + mov rax,r8 + mov r9,QWORD PTR[56+rdx] + mov r10,QWORD PTR[64+rdx] + shl r9,27 + mov r11,r10 + shl r10,56 + shr r11,8 + add rax,r9 + add rax,r10 + adc r11,0 + mov QWORD PTR[80+rcx],rax + mov rax,r11 + mov r8,QWORD PTR[72+rdx] + mov r9,QWORD PTR[80+rdx] + shl r8,21 + mov r10,r9 + shl r9,50 + shr r10,14 + add rax,r8 + add rax,r9 + adc r10,0 + mov QWORD PTR[88+rcx],rax + mov rax,r10 + mov r11,QWORD PTR[88+rdx] + mov r8,QWORD PTR[96+rdx] + shl r11,15 + mov r9,r8 + shl r8,44 + shr r9,20 + add rax,r11 + add rax,r8 + adc r9,0 + mov QWORD PTR[96+rcx],rax + mov rax,r9 + mov r10,QWORD PTR[104+rdx] + mov r11,QWORD PTR[112+rdx] + shl r10,9 + mov r8,r11 + shl r11,38 + shr r8,26 + add rax,r10 + add rax,r11 + adc r8,0 + mov QWORD PTR[104+rcx],rax + mov rax,r8 + mov r9,QWORD PTR[120+rdx] + mov r10,QWORD PTR[128+rdx] + mov r11,QWORD PTR[136+rdx] + shl r9,3 + shl r10,32 + mov r8,r11 + shl r11,61 + shr r8,3 + add rax,r9 + add rax,r10 + add rax,r11 + adc r8,0 + mov QWORD PTR[112+rcx],rax + mov rax,r8 + mov r9,QWORD PTR[144+rdx] + mov r10,QWORD PTR[152+rdx] + shl r9,26 + mov r11,r10 + shl r10,55 + shr r11,9 + add rax,r9 + add rax,r10 + adc r11,0 + mov QWORD PTR[120+rcx],rax + mov rax,r11 + DB 0F3h,0C3h ;repret +rsaz_1024_red2norm_avx2 ENDP + +PUBLIC rsaz_1024_norm2red_avx2 + +ALIGN 32 +rsaz_1024_norm2red_avx2 PROC PUBLIC + sub rcx,-128 + mov r8,QWORD PTR[rdx] + mov eax,01fffffffh + mov r9,QWORD PTR[8+rdx] + mov r11,r8 + shr r11,0 + and r11,rax + mov QWORD PTR[((-128))+rcx],r11 + mov r10,r8 + shr r10,29 + and r10,rax + mov QWORD PTR[((-120))+rcx],r10 + shrd r8,r9,58 + and r8,rax + mov QWORD PTR[((-112))+rcx],r8 + mov r10,QWORD PTR[16+rdx] + mov r8,r9 + shr r8,23 + and r8,rax + mov QWORD PTR[((-104))+rcx],r8 + shrd r9,r10,52 + and r9,rax + mov QWORD PTR[((-96))+rcx],r9 + mov r11,QWORD PTR[24+rdx] + mov r9,r10 + shr r9,17 + and r9,rax + mov QWORD PTR[((-88))+rcx],r9 + shrd r10,r11,46 + and r10,rax + mov QWORD PTR[((-80))+rcx],r10 + mov r8,QWORD PTR[32+rdx] + mov r10,r11 + shr r10,11 + and r10,rax + mov QWORD PTR[((-72))+rcx],r10 + shrd r11,r8,40 + and r11,rax + mov QWORD PTR[((-64))+rcx],r11 + mov r9,QWORD PTR[40+rdx] + mov r11,r8 + shr r11,5 + and r11,rax + mov QWORD PTR[((-56))+rcx],r11 + mov r10,r8 + shr r10,34 + and r10,rax + mov QWORD PTR[((-48))+rcx],r10 + shrd r8,r9,63 + and r8,rax + mov QWORD PTR[((-40))+rcx],r8 + mov r10,QWORD PTR[48+rdx] + mov r8,r9 + shr r8,28 + and r8,rax + mov QWORD PTR[((-32))+rcx],r8 + shrd r9,r10,57 + and r9,rax + mov QWORD PTR[((-24))+rcx],r9 + mov r11,QWORD PTR[56+rdx] + mov r9,r10 + shr r9,22 + and r9,rax + mov QWORD PTR[((-16))+rcx],r9 + shrd r10,r11,51 + and r10,rax + mov QWORD PTR[((-8))+rcx],r10 + mov r8,QWORD PTR[64+rdx] + mov r10,r11 + shr r10,16 + and r10,rax + mov QWORD PTR[rcx],r10 + shrd r11,r8,45 + and r11,rax + mov QWORD PTR[8+rcx],r11 + mov r9,QWORD PTR[72+rdx] + mov r11,r8 + shr r11,10 + and r11,rax + mov QWORD PTR[16+rcx],r11 + shrd r8,r9,39 + and r8,rax + mov QWORD PTR[24+rcx],r8 + mov r10,QWORD PTR[80+rdx] + mov r8,r9 + shr r8,4 + and r8,rax + mov QWORD PTR[32+rcx],r8 + mov r11,r9 + shr r11,33 + and r11,rax + mov QWORD PTR[40+rcx],r11 + shrd r9,r10,62 + and r9,rax + mov QWORD PTR[48+rcx],r9 + mov r11,QWORD PTR[88+rdx] + mov r9,r10 + shr r9,27 + and r9,rax + mov QWORD PTR[56+rcx],r9 + shrd r10,r11,56 + and r10,rax + mov QWORD PTR[64+rcx],r10 + mov r8,QWORD PTR[96+rdx] + mov r10,r11 + shr r10,21 + and r10,rax + mov QWORD PTR[72+rcx],r10 + shrd r11,r8,50 + and r11,rax + mov QWORD PTR[80+rcx],r11 + mov r9,QWORD PTR[104+rdx] + mov r11,r8 + shr r11,15 + and r11,rax + mov QWORD PTR[88+rcx],r11 + shrd r8,r9,44 + and r8,rax + mov QWORD PTR[96+rcx],r8 + mov r10,QWORD PTR[112+rdx] + mov r8,r9 + shr r8,9 + and r8,rax + mov QWORD PTR[104+rcx],r8 + shrd r9,r10,38 + and r9,rax + mov QWORD PTR[112+rcx],r9 + mov r11,QWORD PTR[120+rdx] + mov r9,r10 + shr r9,3 + and r9,rax + mov QWORD PTR[120+rcx],r9 + mov r8,r10 + shr r8,32 + and r8,rax + mov QWORD PTR[128+rcx],r8 + shrd r10,r11,61 + and r10,rax + mov QWORD PTR[136+rcx],r10 + xor r8,r8 + mov r10,r11 + shr r10,26 + and r10,rax + mov QWORD PTR[144+rcx],r10 + shrd r11,r8,55 + and r11,rax + mov QWORD PTR[152+rcx],r11 + mov QWORD PTR[160+rcx],r8 + mov QWORD PTR[168+rcx],r8 + mov QWORD PTR[176+rcx],r8 + mov QWORD PTR[184+rcx],r8 + DB 0F3h,0C3h ;repret +rsaz_1024_norm2red_avx2 ENDP +PUBLIC rsaz_1024_scatter5_avx2 + +ALIGN 32 +rsaz_1024_scatter5_avx2 PROC PUBLIC + vzeroupper + vmovdqu ymm5,YMMWORD PTR[$L$scatter_permd] + shl r8d,4 + lea rcx,QWORD PTR[r8*1+rcx] + mov eax,9 + jmp $L$oop_scatter_1024 + +ALIGN 32 +$L$oop_scatter_1024:: + vmovdqu ymm0,YMMWORD PTR[rdx] + lea rdx,QWORD PTR[32+rdx] + vpermd ymm0,ymm5,ymm0 + vmovdqu XMMWORD PTR[rcx],xmm0 + lea rcx,QWORD PTR[512+rcx] + dec eax + jnz $L$oop_scatter_1024 + + vzeroupper + DB 0F3h,0C3h ;repret +rsaz_1024_scatter5_avx2 ENDP + +PUBLIC rsaz_1024_gather5_avx2 + +ALIGN 32 +rsaz_1024_gather5_avx2 PROC PUBLIC + lea rax,QWORD PTR[((-136))+rsp] + vzeroupper +$L$SEH_begin_rsaz_1024_gather5:: + +DB 048h,08dh,060h,0e0h +DB 0c5h,0f8h,029h,070h,0e0h +DB 0c5h,0f8h,029h,078h,0f0h +DB 0c5h,078h,029h,040h,000h +DB 0c5h,078h,029h,048h,010h +DB 0c5h,078h,029h,050h,020h +DB 0c5h,078h,029h,058h,030h +DB 0c5h,078h,029h,060h,040h +DB 0c5h,078h,029h,068h,050h +DB 0c5h,078h,029h,070h,060h +DB 0c5h,078h,029h,078h,070h + lea r11,QWORD PTR[$L$gather_table] + mov eax,r8d + and r8d,3 + shr eax,2 + shl r8d,4 + + vmovdqu ymm7,YMMWORD PTR[((-32))+r11] + vpbroadcastb xmm8,BYTE PTR[8+rax*1+r11] + vpbroadcastb xmm9,BYTE PTR[7+rax*1+r11] + vpbroadcastb xmm10,BYTE PTR[6+rax*1+r11] + vpbroadcastb xmm11,BYTE PTR[5+rax*1+r11] + vpbroadcastb xmm12,BYTE PTR[4+rax*1+r11] + vpbroadcastb xmm13,BYTE PTR[3+rax*1+r11] + vpbroadcastb xmm14,BYTE PTR[2+rax*1+r11] + vpbroadcastb xmm15,BYTE PTR[1+rax*1+r11] + + lea rdx,QWORD PTR[64+r8*1+rdx] + mov r11,64 + mov eax,9 + jmp $L$oop_gather_1024 + +ALIGN 32 +$L$oop_gather_1024:: + vpand xmm0,xmm8,XMMWORD PTR[((-64))+rdx] + vpand xmm1,xmm9,XMMWORD PTR[rdx] + vpand xmm2,xmm10,XMMWORD PTR[64+rdx] + vpand xmm3,xmm11,XMMWORD PTR[r11*2+rdx] + vpor xmm1,xmm1,xmm0 + vpand xmm4,xmm12,XMMWORD PTR[64+r11*2+rdx] + vpor xmm3,xmm3,xmm2 + vpand xmm5,xmm13,XMMWORD PTR[r11*4+rdx] + vpor xmm3,xmm3,xmm1 + vpand xmm6,xmm14,XMMWORD PTR[64+r11*4+rdx] + vpor xmm5,xmm5,xmm4 + vpand xmm2,xmm15,XMMWORD PTR[((-128))+r11*8+rdx] + lea rdx,QWORD PTR[r11*8+rdx] + vpor xmm5,xmm5,xmm3 + vpor xmm6,xmm6,xmm2 + vpor xmm6,xmm6,xmm5 + vpermd ymm6,ymm7,ymm6 + vmovdqu YMMWORD PTR[rcx],ymm6 + lea rcx,QWORD PTR[32+rcx] + dec eax + jnz $L$oop_gather_1024 + + vpxor ymm0,ymm0,ymm0 + vmovdqu YMMWORD PTR[rcx],ymm0 + vzeroupper + movaps xmm6,XMMWORD PTR[rsp] + movaps xmm7,XMMWORD PTR[16+rsp] + movaps xmm8,XMMWORD PTR[32+rsp] + movaps xmm9,XMMWORD PTR[48+rsp] + movaps xmm10,XMMWORD PTR[64+rsp] + movaps xmm11,XMMWORD PTR[80+rsp] + movaps xmm12,XMMWORD PTR[96+rsp] + movaps xmm13,XMMWORD PTR[112+rsp] + movaps xmm14,XMMWORD PTR[128+rsp] + movaps xmm15,XMMWORD PTR[144+rsp] + lea rsp,QWORD PTR[168+rsp] +$L$SEH_end_rsaz_1024_gather5:: + DB 0F3h,0C3h ;repret +rsaz_1024_gather5_avx2 ENDP +EXTERN OPENSSL_ia32cap_P:NEAR +PUBLIC rsaz_avx2_eligible + +ALIGN 32 +rsaz_avx2_eligible PROC PUBLIC + mov eax,DWORD PTR[((OPENSSL_ia32cap_P+8))] + mov ecx,524544 + mov edx,0 + and ecx,eax + cmp ecx,524544 + cmove eax,edx + and eax,32 + shr eax,5 + DB 0F3h,0C3h ;repret +rsaz_avx2_eligible ENDP + +ALIGN 64 +$L$and_mask:: + DQ 01fffffffh,01fffffffh,01fffffffh,-1 +$L$scatter_permd:: + DD 0,2,4,6,7,7,7,7 +$L$gather_permd:: + DD 0,7,1,7,2,7,3,7 +$L$gather_table:: +DB 0,0,0,0,0,0,0,0,0ffh,0,0,0,0,0,0,0 +ALIGN 64 +EXTERN __imp_RtlVirtualUnwind:NEAR + +ALIGN 16 +rsaz_se_handler PROC PRIVATE + push rsi + push rdi + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + pushfq + sub rsp,64 + + mov rax,QWORD PTR[120+r8] + mov rbx,QWORD PTR[248+r8] + + mov rsi,QWORD PTR[8+r9] + mov r11,QWORD PTR[56+r9] + + mov r10d,DWORD PTR[r11] + lea r10,QWORD PTR[r10*1+rsi] + cmp rbx,r10 + jb $L$common_seh_tail + + mov rax,QWORD PTR[152+r8] + + mov r10d,DWORD PTR[4+r11] + lea r10,QWORD PTR[r10*1+rsi] + cmp rbx,r10 + jae $L$common_seh_tail + + mov rax,QWORD PTR[160+r8] + + mov r15,QWORD PTR[((-48))+rax] + mov r14,QWORD PTR[((-40))+rax] + mov r13,QWORD PTR[((-32))+rax] + mov r12,QWORD PTR[((-24))+rax] + mov rbp,QWORD PTR[((-16))+rax] + mov rbx,QWORD PTR[((-8))+rax] + mov QWORD PTR[240+r8],r15 + mov QWORD PTR[232+r8],r14 + mov QWORD PTR[224+r8],r13 + mov QWORD PTR[216+r8],r12 + mov QWORD PTR[160+r8],rbp + mov QWORD PTR[144+r8],rbx + + lea rsi,QWORD PTR[((-216))+rax] + lea rdi,QWORD PTR[512+r8] + mov ecx,20 + DD 0a548f3fch + +$L$common_seh_tail:: + mov rdi,QWORD PTR[8+rax] + mov rsi,QWORD PTR[16+rax] + mov QWORD PTR[152+r8],rax + mov QWORD PTR[168+r8],rsi + mov QWORD PTR[176+r8],rdi + + mov rdi,QWORD PTR[40+r9] + mov rsi,r8 + mov ecx,154 + DD 0a548f3fch + + mov rsi,r9 + xor rcx,rcx + mov rdx,QWORD PTR[8+rsi] + mov r8,QWORD PTR[rsi] + mov r9,QWORD PTR[16+rsi] + mov r10,QWORD PTR[40+rsi] + lea r11,QWORD PTR[56+rsi] + lea r12,QWORD PTR[24+rsi] + mov QWORD PTR[32+rsp],r10 + mov QWORD PTR[40+rsp],r11 + mov QWORD PTR[48+rsp],r12 + mov QWORD PTR[56+rsp],rcx + call QWORD PTR[__imp_RtlVirtualUnwind] + + mov eax,1 + add rsp,64 + popfq + pop r15 + pop r14 + pop r13 + pop r12 + pop rbp + pop rbx + pop rdi + pop rsi + DB 0F3h,0C3h ;repret +rsaz_se_handler ENDP + +.text$ ENDS +.pdata SEGMENT READONLY ALIGN(4) +ALIGN 4 + DD imagerel $L$SEH_begin_rsaz_1024_sqr_avx2 + DD imagerel $L$SEH_end_rsaz_1024_sqr_avx2 + DD imagerel $L$SEH_info_rsaz_1024_sqr_avx2 + + DD imagerel $L$SEH_begin_rsaz_1024_mul_avx2 + DD imagerel $L$SEH_end_rsaz_1024_mul_avx2 + DD imagerel $L$SEH_info_rsaz_1024_mul_avx2 + + DD imagerel $L$SEH_begin_rsaz_1024_gather5 + DD imagerel $L$SEH_end_rsaz_1024_gather5 + DD imagerel $L$SEH_info_rsaz_1024_gather5 +.pdata ENDS +.xdata SEGMENT READONLY ALIGN(8) +ALIGN 8 +$L$SEH_info_rsaz_1024_sqr_avx2:: +DB 9,0,0,0 + DD imagerel rsaz_se_handler + DD imagerel $L$sqr_1024_body,imagerel $L$sqr_1024_epilogue +$L$SEH_info_rsaz_1024_mul_avx2:: +DB 9,0,0,0 + DD imagerel rsaz_se_handler + DD imagerel $L$mul_1024_body,imagerel $L$mul_1024_epilogue +$L$SEH_info_rsaz_1024_gather5:: +DB 001h,033h,016h,000h +DB 036h,0f8h,009h,000h +DB 031h,0e8h,008h,000h +DB 02ch,0d8h,007h,000h +DB 027h,0c8h,006h,000h +DB 022h,0b8h,005h,000h +DB 01dh,0a8h,004h,000h +DB 018h,098h,003h,000h +DB 013h,088h,002h,000h +DB 00eh,078h,001h,000h +DB 009h,068h,000h,000h +DB 004h,001h,015h,000h + +.xdata ENDS +END diff --git a/deps/openssl/asm/x64-win32-masm/bn/rsaz-x86_64.asm b/deps/openssl/asm/x64-win32-masm/bn/rsaz-x86_64.asm new file mode 100644 index 00000000000000..1c6440470d9992 --- /dev/null +++ b/deps/openssl/asm/x64-win32-masm/bn/rsaz-x86_64.asm @@ -0,0 +1,1964 @@ +OPTION DOTNAME +.text$ SEGMENT ALIGN(256) 'CODE' + +EXTERN OPENSSL_ia32cap_P:NEAR + +PUBLIC rsaz_512_sqr + +ALIGN 32 +rsaz_512_sqr PROC PUBLIC + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_rsaz_512_sqr:: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + mov rcx,r9 + mov r8,QWORD PTR[40+rsp] + + + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + + sub rsp,128+24 +$L$sqr_body:: + mov rbp,rdx + mov rdx,QWORD PTR[rsi] + mov rax,QWORD PTR[8+rsi] + mov QWORD PTR[128+rsp],rcx + mov r11d,080100h + and r11d,DWORD PTR[((OPENSSL_ia32cap_P+8))] + cmp r11d,080100h + je $L$oop_sqrx + jmp $L$oop_sqr + +ALIGN 32 +$L$oop_sqr:: + mov DWORD PTR[((128+8))+rsp],r8d + + mov rbx,rdx + mul rdx + mov r8,rax + mov rax,QWORD PTR[16+rsi] + mov r9,rdx + + mul rbx + add r9,rax + mov rax,QWORD PTR[24+rsi] + mov r10,rdx + adc r10,0 + + mul rbx + add r10,rax + mov rax,QWORD PTR[32+rsi] + mov r11,rdx + adc r11,0 + + mul rbx + add r11,rax + mov rax,QWORD PTR[40+rsi] + mov r12,rdx + adc r12,0 + + mul rbx + add r12,rax + mov rax,QWORD PTR[48+rsi] + mov r13,rdx + adc r13,0 + + mul rbx + add r13,rax + mov rax,QWORD PTR[56+rsi] + mov r14,rdx + adc r14,0 + + mul rbx + add r14,rax + mov rax,rbx + mov r15,rdx + adc r15,0 + + add r8,r8 + mov rcx,r9 + adc r9,r9 + + mul rax + mov QWORD PTR[rsp],rax + add r8,rdx + adc r9,0 + + mov QWORD PTR[8+rsp],r8 + shr rcx,63 + + + mov r8,QWORD PTR[8+rsi] + mov rax,QWORD PTR[16+rsi] + mul r8 + add r10,rax + mov rax,QWORD PTR[24+rsi] + mov rbx,rdx + adc rbx,0 + + mul r8 + add r11,rax + mov rax,QWORD PTR[32+rsi] + adc rdx,0 + add r11,rbx + mov rbx,rdx + adc rbx,0 + + mul r8 + add r12,rax + mov rax,QWORD PTR[40+rsi] + adc rdx,0 + add r12,rbx + mov rbx,rdx + adc rbx,0 + + mul r8 + add r13,rax + mov rax,QWORD PTR[48+rsi] + adc rdx,0 + add r13,rbx + mov rbx,rdx + adc rbx,0 + + mul r8 + add r14,rax + mov rax,QWORD PTR[56+rsi] + adc rdx,0 + add r14,rbx + mov rbx,rdx + adc rbx,0 + + mul r8 + add r15,rax + mov rax,r8 + adc rdx,0 + add r15,rbx + mov r8,rdx + mov rdx,r10 + adc r8,0 + + add rdx,rdx + lea r10,QWORD PTR[r10*2+rcx] + mov rbx,r11 + adc r11,r11 + + mul rax + add r9,rax + adc r10,rdx + adc r11,0 + + mov QWORD PTR[16+rsp],r9 + mov QWORD PTR[24+rsp],r10 + shr rbx,63 + + + mov r9,QWORD PTR[16+rsi] + mov rax,QWORD PTR[24+rsi] + mul r9 + add r12,rax + mov rax,QWORD PTR[32+rsi] + mov rcx,rdx + adc rcx,0 + + mul r9 + add r13,rax + mov rax,QWORD PTR[40+rsi] + adc rdx,0 + add r13,rcx + mov rcx,rdx + adc rcx,0 + + mul r9 + add r14,rax + mov rax,QWORD PTR[48+rsi] + adc rdx,0 + add r14,rcx + mov rcx,rdx + adc rcx,0 + + mul r9 + mov r10,r12 + lea r12,QWORD PTR[r12*2+rbx] + add r15,rax + mov rax,QWORD PTR[56+rsi] + adc rdx,0 + add r15,rcx + mov rcx,rdx + adc rcx,0 + + mul r9 + shr r10,63 + add r8,rax + mov rax,r9 + adc rdx,0 + add r8,rcx + mov r9,rdx + adc r9,0 + + mov rcx,r13 + lea r13,QWORD PTR[r13*2+r10] + + mul rax + add r11,rax + adc r12,rdx + adc r13,0 + + mov QWORD PTR[32+rsp],r11 + mov QWORD PTR[40+rsp],r12 + shr rcx,63 + + + mov r10,QWORD PTR[24+rsi] + mov rax,QWORD PTR[32+rsi] + mul r10 + add r14,rax + mov rax,QWORD PTR[40+rsi] + mov rbx,rdx + adc rbx,0 + + mul r10 + add r15,rax + mov rax,QWORD PTR[48+rsi] + adc rdx,0 + add r15,rbx + mov rbx,rdx + adc rbx,0 + + mul r10 + mov r12,r14 + lea r14,QWORD PTR[r14*2+rcx] + add r8,rax + mov rax,QWORD PTR[56+rsi] + adc rdx,0 + add r8,rbx + mov rbx,rdx + adc rbx,0 + + mul r10 + shr r12,63 + add r9,rax + mov rax,r10 + adc rdx,0 + add r9,rbx + mov r10,rdx + adc r10,0 + + mov rbx,r15 + lea r15,QWORD PTR[r15*2+r12] + + mul rax + add r13,rax + adc r14,rdx + adc r15,0 + + mov QWORD PTR[48+rsp],r13 + mov QWORD PTR[56+rsp],r14 + shr rbx,63 + + + mov r11,QWORD PTR[32+rsi] + mov rax,QWORD PTR[40+rsi] + mul r11 + add r8,rax + mov rax,QWORD PTR[48+rsi] + mov rcx,rdx + adc rcx,0 + + mul r11 + add r9,rax + mov rax,QWORD PTR[56+rsi] + adc rdx,0 + mov r12,r8 + lea r8,QWORD PTR[r8*2+rbx] + add r9,rcx + mov rcx,rdx + adc rcx,0 + + mul r11 + shr r12,63 + add r10,rax + mov rax,r11 + adc rdx,0 + add r10,rcx + mov r11,rdx + adc r11,0 + + mov rcx,r9 + lea r9,QWORD PTR[r9*2+r12] + + mul rax + add r15,rax + adc r8,rdx + adc r9,0 + + mov QWORD PTR[64+rsp],r15 + mov QWORD PTR[72+rsp],r8 + shr rcx,63 + + + mov r12,QWORD PTR[40+rsi] + mov rax,QWORD PTR[48+rsi] + mul r12 + add r10,rax + mov rax,QWORD PTR[56+rsi] + mov rbx,rdx + adc rbx,0 + + mul r12 + add r11,rax + mov rax,r12 + mov r15,r10 + lea r10,QWORD PTR[r10*2+rcx] + adc rdx,0 + shr r15,63 + add r11,rbx + mov r12,rdx + adc r12,0 + + mov rbx,r11 + lea r11,QWORD PTR[r11*2+r15] + + mul rax + add r9,rax + adc r10,rdx + adc r11,0 + + mov QWORD PTR[80+rsp],r9 + mov QWORD PTR[88+rsp],r10 + + + mov r13,QWORD PTR[48+rsi] + mov rax,QWORD PTR[56+rsi] + mul r13 + add r12,rax + mov rax,r13 + mov r13,rdx + adc r13,0 + + xor r14,r14 + shl rbx,1 + adc r12,r12 + adc r13,r13 + adc r14,r14 + + mul rax + add r11,rax + adc r12,rdx + adc r13,0 + + mov QWORD PTR[96+rsp],r11 + mov QWORD PTR[104+rsp],r12 + + + mov rax,QWORD PTR[56+rsi] + mul rax + add r13,rax + adc rdx,0 + + add r14,rdx + + mov QWORD PTR[112+rsp],r13 + mov QWORD PTR[120+rsp],r14 + + mov r8,QWORD PTR[rsp] + mov r9,QWORD PTR[8+rsp] + mov r10,QWORD PTR[16+rsp] + mov r11,QWORD PTR[24+rsp] + mov r12,QWORD PTR[32+rsp] + mov r13,QWORD PTR[40+rsp] + mov r14,QWORD PTR[48+rsp] + mov r15,QWORD PTR[56+rsp] + + call __rsaz_512_reduce + + add r8,QWORD PTR[64+rsp] + adc r9,QWORD PTR[72+rsp] + adc r10,QWORD PTR[80+rsp] + adc r11,QWORD PTR[88+rsp] + adc r12,QWORD PTR[96+rsp] + adc r13,QWORD PTR[104+rsp] + adc r14,QWORD PTR[112+rsp] + adc r15,QWORD PTR[120+rsp] + sbb rcx,rcx + + call __rsaz_512_subtract + + mov rdx,r8 + mov rax,r9 + mov r8d,DWORD PTR[((128+8))+rsp] + mov rsi,rdi + + dec r8d + jnz $L$oop_sqr + jmp $L$sqr_tail + +ALIGN 32 +$L$oop_sqrx:: + mov DWORD PTR[((128+8))+rsp],r8d +DB 102,72,15,110,199 +DB 102,72,15,110,205 + + mulx r9,r8,rax + + mulx r10,rcx,QWORD PTR[16+rsi] + xor rbp,rbp + + mulx r11,rax,QWORD PTR[24+rsi] + adcx r9,rcx + + mulx r12,rcx,QWORD PTR[32+rsi] + adcx r10,rax + + mulx r13,rax,QWORD PTR[40+rsi] + adcx r11,rcx + +DB 0c4h,062h,0f3h,0f6h,0b6h,030h,000h,000h,000h + adcx r12,rax + adcx r13,rcx + +DB 0c4h,062h,0fbh,0f6h,0beh,038h,000h,000h,000h + adcx r14,rax + adcx r15,rbp + + mov rcx,r9 + shld r9,r8,1 + shl r8,1 + + xor ebp,ebp + mulx rdx,rax,rdx + adcx r8,rdx + mov rdx,QWORD PTR[8+rsi] + adcx r9,rbp + + mov QWORD PTR[rsp],rax + mov QWORD PTR[8+rsp],r8 + + + mulx rbx,rax,QWORD PTR[16+rsi] + adox r10,rax + adcx r11,rbx + +DB 0c4h,062h,0c3h,0f6h,086h,018h,000h,000h,000h + adox r11,rdi + adcx r12,r8 + + mulx rbx,rax,QWORD PTR[32+rsi] + adox r12,rax + adcx r13,rbx + + mulx r8,rdi,QWORD PTR[40+rsi] + adox r13,rdi + adcx r14,r8 + +DB 0c4h,0e2h,0fbh,0f6h,09eh,030h,000h,000h,000h + adox r14,rax + adcx r15,rbx + +DB 0c4h,062h,0c3h,0f6h,086h,038h,000h,000h,000h + adox r15,rdi + adcx r8,rbp + adox r8,rbp + + mov rbx,r11 + shld r11,r10,1 + shld r10,rcx,1 + + xor ebp,ebp + mulx rcx,rax,rdx + mov rdx,QWORD PTR[16+rsi] + adcx r9,rax + adcx r10,rcx + adcx r11,rbp + + mov QWORD PTR[16+rsp],r9 +DB 04ch,089h,094h,024h,018h,000h,000h,000h + + +DB 0c4h,062h,0c3h,0f6h,08eh,018h,000h,000h,000h + adox r12,rdi + adcx r13,r9 + + mulx rcx,rax,QWORD PTR[32+rsi] + adox r13,rax + adcx r14,rcx + + mulx r9,rdi,QWORD PTR[40+rsi] + adox r14,rdi + adcx r15,r9 + +DB 0c4h,0e2h,0fbh,0f6h,08eh,030h,000h,000h,000h + adox r15,rax + adcx r8,rcx + +DB 0c4h,062h,0c3h,0f6h,08eh,038h,000h,000h,000h + adox r8,rdi + adcx r9,rbp + adox r9,rbp + + mov rcx,r13 + shld r13,r12,1 + shld r12,rbx,1 + + xor ebp,ebp + mulx rdx,rax,rdx + adcx r11,rax + adcx r12,rdx + mov rdx,QWORD PTR[24+rsi] + adcx r13,rbp + + mov QWORD PTR[32+rsp],r11 +DB 04ch,089h,0a4h,024h,028h,000h,000h,000h + + +DB 0c4h,0e2h,0fbh,0f6h,09eh,020h,000h,000h,000h + adox r14,rax + adcx r15,rbx + + mulx r10,rdi,QWORD PTR[40+rsi] + adox r15,rdi + adcx r8,r10 + + mulx rbx,rax,QWORD PTR[48+rsi] + adox r8,rax + adcx r9,rbx + + mulx r10,rdi,QWORD PTR[56+rsi] + adox r9,rdi + adcx r10,rbp + adox r10,rbp + +DB 066h + mov rbx,r15 + shld r15,r14,1 + shld r14,rcx,1 + + xor ebp,ebp + mulx rdx,rax,rdx + adcx r13,rax + adcx r14,rdx + mov rdx,QWORD PTR[32+rsi] + adcx r15,rbp + + mov QWORD PTR[48+rsp],r13 + mov QWORD PTR[56+rsp],r14 + + +DB 0c4h,062h,0c3h,0f6h,09eh,028h,000h,000h,000h + adox r8,rdi + adcx r9,r11 + + mulx rcx,rax,QWORD PTR[48+rsi] + adox r9,rax + adcx r10,rcx + + mulx r11,rdi,QWORD PTR[56+rsi] + adox r10,rdi + adcx r11,rbp + adox r11,rbp + + mov rcx,r9 + shld r9,r8,1 + shld r8,rbx,1 + + xor ebp,ebp + mulx rdx,rax,rdx + adcx r15,rax + adcx r8,rdx + mov rdx,QWORD PTR[40+rsi] + adcx r9,rbp + + mov QWORD PTR[64+rsp],r15 + mov QWORD PTR[72+rsp],r8 + + +DB 0c4h,0e2h,0fbh,0f6h,09eh,030h,000h,000h,000h + adox r10,rax + adcx r11,rbx + +DB 0c4h,062h,0c3h,0f6h,0a6h,038h,000h,000h,000h + adox r11,rdi + adcx r12,rbp + adox r12,rbp + + mov rbx,r11 + shld r11,r10,1 + shld r10,rcx,1 + + xor ebp,ebp + mulx rdx,rax,rdx + adcx r9,rax + adcx r10,rdx + mov rdx,QWORD PTR[48+rsi] + adcx r11,rbp + + mov QWORD PTR[80+rsp],r9 + mov QWORD PTR[88+rsp],r10 + + +DB 0c4h,062h,0fbh,0f6h,0aeh,038h,000h,000h,000h + adox r12,rax + adox r13,rbp + + xor r14,r14 + shld r14,r13,1 + shld r13,r12,1 + shld r12,rbx,1 + + xor ebp,ebp + mulx rdx,rax,rdx + adcx r11,rax + adcx r12,rdx + mov rdx,QWORD PTR[56+rsi] + adcx r13,rbp + +DB 04ch,089h,09ch,024h,060h,000h,000h,000h +DB 04ch,089h,0a4h,024h,068h,000h,000h,000h + + + mulx rdx,rax,rdx + adox r13,rax + adox rdx,rbp + +DB 066h + add r14,rdx + + mov QWORD PTR[112+rsp],r13 + mov QWORD PTR[120+rsp],r14 +DB 102,72,15,126,199 +DB 102,72,15,126,205 + + mov rdx,QWORD PTR[128+rsp] + mov r8,QWORD PTR[rsp] + mov r9,QWORD PTR[8+rsp] + mov r10,QWORD PTR[16+rsp] + mov r11,QWORD PTR[24+rsp] + mov r12,QWORD PTR[32+rsp] + mov r13,QWORD PTR[40+rsp] + mov r14,QWORD PTR[48+rsp] + mov r15,QWORD PTR[56+rsp] + + call __rsaz_512_reducex + + add r8,QWORD PTR[64+rsp] + adc r9,QWORD PTR[72+rsp] + adc r10,QWORD PTR[80+rsp] + adc r11,QWORD PTR[88+rsp] + adc r12,QWORD PTR[96+rsp] + adc r13,QWORD PTR[104+rsp] + adc r14,QWORD PTR[112+rsp] + adc r15,QWORD PTR[120+rsp] + sbb rcx,rcx + + call __rsaz_512_subtract + + mov rdx,r8 + mov rax,r9 + mov r8d,DWORD PTR[((128+8))+rsp] + mov rsi,rdi + + dec r8d + jnz $L$oop_sqrx + +$L$sqr_tail:: + + lea rax,QWORD PTR[((128+24+48))+rsp] + mov r15,QWORD PTR[((-48))+rax] + mov r14,QWORD PTR[((-40))+rax] + mov r13,QWORD PTR[((-32))+rax] + mov r12,QWORD PTR[((-24))+rax] + mov rbp,QWORD PTR[((-16))+rax] + mov rbx,QWORD PTR[((-8))+rax] + lea rsp,QWORD PTR[rax] +$L$sqr_epilogue:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_rsaz_512_sqr:: +rsaz_512_sqr ENDP +PUBLIC rsaz_512_mul + +ALIGN 32 +rsaz_512_mul PROC PUBLIC + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_rsaz_512_mul:: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + mov rcx,r9 + mov r8,QWORD PTR[40+rsp] + + + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + + sub rsp,128+24 +$L$mul_body:: +DB 102,72,15,110,199 +DB 102,72,15,110,201 + mov QWORD PTR[128+rsp],r8 + mov r11d,080100h + and r11d,DWORD PTR[((OPENSSL_ia32cap_P+8))] + cmp r11d,080100h + je $L$mulx + mov rbx,QWORD PTR[rdx] + mov rbp,rdx + call __rsaz_512_mul + +DB 102,72,15,126,199 +DB 102,72,15,126,205 + + mov r8,QWORD PTR[rsp] + mov r9,QWORD PTR[8+rsp] + mov r10,QWORD PTR[16+rsp] + mov r11,QWORD PTR[24+rsp] + mov r12,QWORD PTR[32+rsp] + mov r13,QWORD PTR[40+rsp] + mov r14,QWORD PTR[48+rsp] + mov r15,QWORD PTR[56+rsp] + + call __rsaz_512_reduce + jmp $L$mul_tail + +ALIGN 32 +$L$mulx:: + mov rbp,rdx + mov rdx,QWORD PTR[rdx] + call __rsaz_512_mulx + +DB 102,72,15,126,199 +DB 102,72,15,126,205 + + mov rdx,QWORD PTR[128+rsp] + mov r8,QWORD PTR[rsp] + mov r9,QWORD PTR[8+rsp] + mov r10,QWORD PTR[16+rsp] + mov r11,QWORD PTR[24+rsp] + mov r12,QWORD PTR[32+rsp] + mov r13,QWORD PTR[40+rsp] + mov r14,QWORD PTR[48+rsp] + mov r15,QWORD PTR[56+rsp] + + call __rsaz_512_reducex +$L$mul_tail:: + add r8,QWORD PTR[64+rsp] + adc r9,QWORD PTR[72+rsp] + adc r10,QWORD PTR[80+rsp] + adc r11,QWORD PTR[88+rsp] + adc r12,QWORD PTR[96+rsp] + adc r13,QWORD PTR[104+rsp] + adc r14,QWORD PTR[112+rsp] + adc r15,QWORD PTR[120+rsp] + sbb rcx,rcx + + call __rsaz_512_subtract + + lea rax,QWORD PTR[((128+24+48))+rsp] + mov r15,QWORD PTR[((-48))+rax] + mov r14,QWORD PTR[((-40))+rax] + mov r13,QWORD PTR[((-32))+rax] + mov r12,QWORD PTR[((-24))+rax] + mov rbp,QWORD PTR[((-16))+rax] + mov rbx,QWORD PTR[((-8))+rax] + lea rsp,QWORD PTR[rax] +$L$mul_epilogue:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_rsaz_512_mul:: +rsaz_512_mul ENDP +PUBLIC rsaz_512_mul_gather4 + +ALIGN 32 +rsaz_512_mul_gather4 PROC PUBLIC + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_rsaz_512_mul_gather4:: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + mov rcx,r9 + mov r8,QWORD PTR[40+rsp] + mov r9,QWORD PTR[48+rsp] + + + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + + mov r9d,r9d + sub rsp,128+24 +$L$mul_gather4_body:: + mov r11d,080100h + and r11d,DWORD PTR[((OPENSSL_ia32cap_P+8))] + cmp r11d,080100h + je $L$mulx_gather + mov eax,DWORD PTR[64+r9*4+rdx] +DB 102,72,15,110,199 + mov ebx,DWORD PTR[r9*4+rdx] +DB 102,72,15,110,201 + mov QWORD PTR[128+rsp],r8 + + shl rax,32 + or rbx,rax + mov rax,QWORD PTR[rsi] + mov rcx,QWORD PTR[8+rsi] + lea rbp,QWORD PTR[128+r9*4+rdx] + mul rbx + mov QWORD PTR[rsp],rax + mov rax,rcx + mov r8,rdx + + mul rbx + movd xmm4,DWORD PTR[rbp] + add r8,rax + mov rax,QWORD PTR[16+rsi] + mov r9,rdx + adc r9,0 + + mul rbx + movd xmm5,DWORD PTR[64+rbp] + add r9,rax + mov rax,QWORD PTR[24+rsi] + mov r10,rdx + adc r10,0 + + mul rbx + pslldq xmm5,4 + add r10,rax + mov rax,QWORD PTR[32+rsi] + mov r11,rdx + adc r11,0 + + mul rbx + por xmm4,xmm5 + add r11,rax + mov rax,QWORD PTR[40+rsi] + mov r12,rdx + adc r12,0 + + mul rbx + add r12,rax + mov rax,QWORD PTR[48+rsi] + mov r13,rdx + adc r13,0 + + mul rbx + lea rbp,QWORD PTR[128+rbp] + add r13,rax + mov rax,QWORD PTR[56+rsi] + mov r14,rdx + adc r14,0 + + mul rbx +DB 102,72,15,126,227 + add r14,rax + mov rax,QWORD PTR[rsi] + mov r15,rdx + adc r15,0 + + lea rdi,QWORD PTR[8+rsp] + mov ecx,7 + jmp $L$oop_mul_gather + +ALIGN 32 +$L$oop_mul_gather:: + mul rbx + add r8,rax + mov rax,QWORD PTR[8+rsi] + mov QWORD PTR[rdi],r8 + mov r8,rdx + adc r8,0 + + mul rbx + movd xmm4,DWORD PTR[rbp] + add r9,rax + mov rax,QWORD PTR[16+rsi] + adc rdx,0 + add r8,r9 + mov r9,rdx + adc r9,0 + + mul rbx + movd xmm5,DWORD PTR[64+rbp] + add r10,rax + mov rax,QWORD PTR[24+rsi] + adc rdx,0 + add r9,r10 + mov r10,rdx + adc r10,0 + + mul rbx + pslldq xmm5,4 + add r11,rax + mov rax,QWORD PTR[32+rsi] + adc rdx,0 + add r10,r11 + mov r11,rdx + adc r11,0 + + mul rbx + por xmm4,xmm5 + add r12,rax + mov rax,QWORD PTR[40+rsi] + adc rdx,0 + add r11,r12 + mov r12,rdx + adc r12,0 + + mul rbx + add r13,rax + mov rax,QWORD PTR[48+rsi] + adc rdx,0 + add r12,r13 + mov r13,rdx + adc r13,0 + + mul rbx + add r14,rax + mov rax,QWORD PTR[56+rsi] + adc rdx,0 + add r13,r14 + mov r14,rdx + adc r14,0 + + mul rbx +DB 102,72,15,126,227 + add r15,rax + mov rax,QWORD PTR[rsi] + adc rdx,0 + add r14,r15 + mov r15,rdx + adc r15,0 + + lea rbp,QWORD PTR[128+rbp] + lea rdi,QWORD PTR[8+rdi] + + dec ecx + jnz $L$oop_mul_gather + + mov QWORD PTR[rdi],r8 + mov QWORD PTR[8+rdi],r9 + mov QWORD PTR[16+rdi],r10 + mov QWORD PTR[24+rdi],r11 + mov QWORD PTR[32+rdi],r12 + mov QWORD PTR[40+rdi],r13 + mov QWORD PTR[48+rdi],r14 + mov QWORD PTR[56+rdi],r15 + +DB 102,72,15,126,199 +DB 102,72,15,126,205 + + mov r8,QWORD PTR[rsp] + mov r9,QWORD PTR[8+rsp] + mov r10,QWORD PTR[16+rsp] + mov r11,QWORD PTR[24+rsp] + mov r12,QWORD PTR[32+rsp] + mov r13,QWORD PTR[40+rsp] + mov r14,QWORD PTR[48+rsp] + mov r15,QWORD PTR[56+rsp] + + call __rsaz_512_reduce + jmp $L$mul_gather_tail + +ALIGN 32 +$L$mulx_gather:: + mov eax,DWORD PTR[64+r9*4+rdx] +DB 102,72,15,110,199 + lea rbp,QWORD PTR[128+r9*4+rdx] + mov edx,DWORD PTR[r9*4+rdx] +DB 102,72,15,110,201 + mov QWORD PTR[128+rsp],r8 + + shl rax,32 + or rdx,rax + mulx r8,rbx,QWORD PTR[rsi] + mov QWORD PTR[rsp],rbx + xor edi,edi + + mulx r9,rax,QWORD PTR[8+rsi] + movd xmm4,DWORD PTR[rbp] + + mulx r10,rbx,QWORD PTR[16+rsi] + movd xmm5,DWORD PTR[64+rbp] + adcx r8,rax + + mulx r11,rax,QWORD PTR[24+rsi] + pslldq xmm5,4 + adcx r9,rbx + + mulx r12,rbx,QWORD PTR[32+rsi] + por xmm4,xmm5 + adcx r10,rax + + mulx r13,rax,QWORD PTR[40+rsi] + adcx r11,rbx + + mulx r14,rbx,QWORD PTR[48+rsi] + lea rbp,QWORD PTR[128+rbp] + adcx r12,rax + + mulx r15,rax,QWORD PTR[56+rsi] +DB 102,72,15,126,226 + adcx r13,rbx + adcx r14,rax + mov rbx,r8 + adcx r15,rdi + + mov rcx,-7 + jmp $L$oop_mulx_gather + +ALIGN 32 +$L$oop_mulx_gather:: + mulx r8,rax,QWORD PTR[rsi] + adcx rbx,rax + adox r8,r9 + + mulx r9,rax,QWORD PTR[8+rsi] +DB 066h,00fh,06eh,0a5h,000h,000h,000h,000h + adcx r8,rax + adox r9,r10 + + mulx r10,rax,QWORD PTR[16+rsi] + movd xmm5,DWORD PTR[64+rbp] + lea rbp,QWORD PTR[128+rbp] + adcx r9,rax + adox r10,r11 + +DB 0c4h,062h,0fbh,0f6h,09eh,018h,000h,000h,000h + pslldq xmm5,4 + por xmm4,xmm5 + adcx r10,rax + adox r11,r12 + + mulx r12,rax,QWORD PTR[32+rsi] + adcx r11,rax + adox r12,r13 + + mulx r13,rax,QWORD PTR[40+rsi] + adcx r12,rax + adox r13,r14 + +DB 0c4h,062h,0fbh,0f6h,0b6h,030h,000h,000h,000h + adcx r13,rax + adox r14,r15 + + mulx r15,rax,QWORD PTR[56+rsi] +DB 102,72,15,126,226 + mov QWORD PTR[64+rcx*8+rsp],rbx + adcx r14,rax + adox r15,rdi + mov rbx,r8 + adcx r15,rdi + + inc rcx + jnz $L$oop_mulx_gather + + mov QWORD PTR[64+rsp],r8 + mov QWORD PTR[((64+8))+rsp],r9 + mov QWORD PTR[((64+16))+rsp],r10 + mov QWORD PTR[((64+24))+rsp],r11 + mov QWORD PTR[((64+32))+rsp],r12 + mov QWORD PTR[((64+40))+rsp],r13 + mov QWORD PTR[((64+48))+rsp],r14 + mov QWORD PTR[((64+56))+rsp],r15 + +DB 102,72,15,126,199 +DB 102,72,15,126,205 + + mov rdx,QWORD PTR[128+rsp] + mov r8,QWORD PTR[rsp] + mov r9,QWORD PTR[8+rsp] + mov r10,QWORD PTR[16+rsp] + mov r11,QWORD PTR[24+rsp] + mov r12,QWORD PTR[32+rsp] + mov r13,QWORD PTR[40+rsp] + mov r14,QWORD PTR[48+rsp] + mov r15,QWORD PTR[56+rsp] + + call __rsaz_512_reducex + +$L$mul_gather_tail:: + add r8,QWORD PTR[64+rsp] + adc r9,QWORD PTR[72+rsp] + adc r10,QWORD PTR[80+rsp] + adc r11,QWORD PTR[88+rsp] + adc r12,QWORD PTR[96+rsp] + adc r13,QWORD PTR[104+rsp] + adc r14,QWORD PTR[112+rsp] + adc r15,QWORD PTR[120+rsp] + sbb rcx,rcx + + call __rsaz_512_subtract + + lea rax,QWORD PTR[((128+24+48))+rsp] + mov r15,QWORD PTR[((-48))+rax] + mov r14,QWORD PTR[((-40))+rax] + mov r13,QWORD PTR[((-32))+rax] + mov r12,QWORD PTR[((-24))+rax] + mov rbp,QWORD PTR[((-16))+rax] + mov rbx,QWORD PTR[((-8))+rax] + lea rsp,QWORD PTR[rax] +$L$mul_gather4_epilogue:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_rsaz_512_mul_gather4:: +rsaz_512_mul_gather4 ENDP +PUBLIC rsaz_512_mul_scatter4 + +ALIGN 32 +rsaz_512_mul_scatter4 PROC PUBLIC + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_rsaz_512_mul_scatter4:: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + mov rcx,r9 + mov r8,QWORD PTR[40+rsp] + mov r9,QWORD PTR[48+rsp] + + + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + + mov r9d,r9d + sub rsp,128+24 +$L$mul_scatter4_body:: + lea r8,QWORD PTR[r9*4+r8] +DB 102,72,15,110,199 +DB 102,72,15,110,202 +DB 102,73,15,110,208 + mov QWORD PTR[128+rsp],rcx + + mov rbp,rdi + mov r11d,080100h + and r11d,DWORD PTR[((OPENSSL_ia32cap_P+8))] + cmp r11d,080100h + je $L$mulx_scatter + mov rbx,QWORD PTR[rdi] + call __rsaz_512_mul + +DB 102,72,15,126,199 +DB 102,72,15,126,205 + + mov r8,QWORD PTR[rsp] + mov r9,QWORD PTR[8+rsp] + mov r10,QWORD PTR[16+rsp] + mov r11,QWORD PTR[24+rsp] + mov r12,QWORD PTR[32+rsp] + mov r13,QWORD PTR[40+rsp] + mov r14,QWORD PTR[48+rsp] + mov r15,QWORD PTR[56+rsp] + + call __rsaz_512_reduce + jmp $L$mul_scatter_tail + +ALIGN 32 +$L$mulx_scatter:: + mov rdx,QWORD PTR[rdi] + call __rsaz_512_mulx + +DB 102,72,15,126,199 +DB 102,72,15,126,205 + + mov rdx,QWORD PTR[128+rsp] + mov r8,QWORD PTR[rsp] + mov r9,QWORD PTR[8+rsp] + mov r10,QWORD PTR[16+rsp] + mov r11,QWORD PTR[24+rsp] + mov r12,QWORD PTR[32+rsp] + mov r13,QWORD PTR[40+rsp] + mov r14,QWORD PTR[48+rsp] + mov r15,QWORD PTR[56+rsp] + + call __rsaz_512_reducex + +$L$mul_scatter_tail:: + add r8,QWORD PTR[64+rsp] + adc r9,QWORD PTR[72+rsp] + adc r10,QWORD PTR[80+rsp] + adc r11,QWORD PTR[88+rsp] + adc r12,QWORD PTR[96+rsp] + adc r13,QWORD PTR[104+rsp] + adc r14,QWORD PTR[112+rsp] + adc r15,QWORD PTR[120+rsp] +DB 102,72,15,126,214 + sbb rcx,rcx + + call __rsaz_512_subtract + + mov DWORD PTR[rsi],r8d + shr r8,32 + mov DWORD PTR[128+rsi],r9d + shr r9,32 + mov DWORD PTR[256+rsi],r10d + shr r10,32 + mov DWORD PTR[384+rsi],r11d + shr r11,32 + mov DWORD PTR[512+rsi],r12d + shr r12,32 + mov DWORD PTR[640+rsi],r13d + shr r13,32 + mov DWORD PTR[768+rsi],r14d + shr r14,32 + mov DWORD PTR[896+rsi],r15d + shr r15,32 + mov DWORD PTR[64+rsi],r8d + mov DWORD PTR[192+rsi],r9d + mov DWORD PTR[320+rsi],r10d + mov DWORD PTR[448+rsi],r11d + mov DWORD PTR[576+rsi],r12d + mov DWORD PTR[704+rsi],r13d + mov DWORD PTR[832+rsi],r14d + mov DWORD PTR[960+rsi],r15d + + lea rax,QWORD PTR[((128+24+48))+rsp] + mov r15,QWORD PTR[((-48))+rax] + mov r14,QWORD PTR[((-40))+rax] + mov r13,QWORD PTR[((-32))+rax] + mov r12,QWORD PTR[((-24))+rax] + mov rbp,QWORD PTR[((-16))+rax] + mov rbx,QWORD PTR[((-8))+rax] + lea rsp,QWORD PTR[rax] +$L$mul_scatter4_epilogue:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_rsaz_512_mul_scatter4:: +rsaz_512_mul_scatter4 ENDP +PUBLIC rsaz_512_mul_by_one + +ALIGN 32 +rsaz_512_mul_by_one PROC PUBLIC + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_rsaz_512_mul_by_one:: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + mov rcx,r9 + + + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + + sub rsp,128+24 +$L$mul_by_one_body:: + mov eax,DWORD PTR[((OPENSSL_ia32cap_P+8))] + mov rbp,rdx + mov QWORD PTR[128+rsp],rcx + + mov r8,QWORD PTR[rsi] + pxor xmm0,xmm0 + mov r9,QWORD PTR[8+rsi] + mov r10,QWORD PTR[16+rsi] + mov r11,QWORD PTR[24+rsi] + mov r12,QWORD PTR[32+rsi] + mov r13,QWORD PTR[40+rsi] + mov r14,QWORD PTR[48+rsi] + mov r15,QWORD PTR[56+rsi] + + movdqa XMMWORD PTR[rsp],xmm0 + movdqa XMMWORD PTR[16+rsp],xmm0 + movdqa XMMWORD PTR[32+rsp],xmm0 + movdqa XMMWORD PTR[48+rsp],xmm0 + movdqa XMMWORD PTR[64+rsp],xmm0 + movdqa XMMWORD PTR[80+rsp],xmm0 + movdqa XMMWORD PTR[96+rsp],xmm0 + and eax,080100h + cmp eax,080100h + je $L$by_one_callx + call __rsaz_512_reduce + jmp $L$by_one_tail +ALIGN 32 +$L$by_one_callx:: + mov rdx,QWORD PTR[128+rsp] + call __rsaz_512_reducex +$L$by_one_tail:: + mov QWORD PTR[rdi],r8 + mov QWORD PTR[8+rdi],r9 + mov QWORD PTR[16+rdi],r10 + mov QWORD PTR[24+rdi],r11 + mov QWORD PTR[32+rdi],r12 + mov QWORD PTR[40+rdi],r13 + mov QWORD PTR[48+rdi],r14 + mov QWORD PTR[56+rdi],r15 + + lea rax,QWORD PTR[((128+24+48))+rsp] + mov r15,QWORD PTR[((-48))+rax] + mov r14,QWORD PTR[((-40))+rax] + mov r13,QWORD PTR[((-32))+rax] + mov r12,QWORD PTR[((-24))+rax] + mov rbp,QWORD PTR[((-16))+rax] + mov rbx,QWORD PTR[((-8))+rax] + lea rsp,QWORD PTR[rax] +$L$mul_by_one_epilogue:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_rsaz_512_mul_by_one:: +rsaz_512_mul_by_one ENDP + +ALIGN 32 +__rsaz_512_reduce PROC PRIVATE + mov rbx,r8 + imul rbx,QWORD PTR[((128+8))+rsp] + mov rax,QWORD PTR[rbp] + mov ecx,8 + jmp $L$reduction_loop + +ALIGN 32 +$L$reduction_loop:: + mul rbx + mov rax,QWORD PTR[8+rbp] + neg r8 + mov r8,rdx + adc r8,0 + + mul rbx + add r9,rax + mov rax,QWORD PTR[16+rbp] + adc rdx,0 + add r8,r9 + mov r9,rdx + adc r9,0 + + mul rbx + add r10,rax + mov rax,QWORD PTR[24+rbp] + adc rdx,0 + add r9,r10 + mov r10,rdx + adc r10,0 + + mul rbx + add r11,rax + mov rax,QWORD PTR[32+rbp] + adc rdx,0 + add r10,r11 + mov rsi,QWORD PTR[((128+8))+rsp] + + + adc rdx,0 + mov r11,rdx + + mul rbx + add r12,rax + mov rax,QWORD PTR[40+rbp] + adc rdx,0 + imul rsi,r8 + add r11,r12 + mov r12,rdx + adc r12,0 + + mul rbx + add r13,rax + mov rax,QWORD PTR[48+rbp] + adc rdx,0 + add r12,r13 + mov r13,rdx + adc r13,0 + + mul rbx + add r14,rax + mov rax,QWORD PTR[56+rbp] + adc rdx,0 + add r13,r14 + mov r14,rdx + adc r14,0 + + mul rbx + mov rbx,rsi + add r15,rax + mov rax,QWORD PTR[rbp] + adc rdx,0 + add r14,r15 + mov r15,rdx + adc r15,0 + + dec ecx + jne $L$reduction_loop + + DB 0F3h,0C3h ;repret +__rsaz_512_reduce ENDP + +ALIGN 32 +__rsaz_512_reducex PROC PRIVATE + + imul rdx,r8 + xor rsi,rsi + mov ecx,8 + jmp $L$reduction_loopx + +ALIGN 32 +$L$reduction_loopx:: + mov rbx,r8 + mulx r8,rax,QWORD PTR[rbp] + adcx rax,rbx + adox r8,r9 + + mulx r9,rax,QWORD PTR[8+rbp] + adcx r8,rax + adox r9,r10 + + mulx r10,rbx,QWORD PTR[16+rbp] + adcx r9,rbx + adox r10,r11 + + mulx r11,rbx,QWORD PTR[24+rbp] + adcx r10,rbx + adox r11,r12 + +DB 0c4h,062h,0e3h,0f6h,0a5h,020h,000h,000h,000h + mov rax,rdx + mov rdx,r8 + adcx r11,rbx + adox r12,r13 + + mulx rdx,rbx,QWORD PTR[((128+8))+rsp] + mov rdx,rax + + mulx r13,rax,QWORD PTR[40+rbp] + adcx r12,rax + adox r13,r14 + +DB 0c4h,062h,0fbh,0f6h,0b5h,030h,000h,000h,000h + adcx r13,rax + adox r14,r15 + + mulx r15,rax,QWORD PTR[56+rbp] + mov rdx,rbx + adcx r14,rax + adox r15,rsi + adcx r15,rsi + + dec ecx + jne $L$reduction_loopx + + DB 0F3h,0C3h ;repret +__rsaz_512_reducex ENDP + +ALIGN 32 +__rsaz_512_subtract PROC PRIVATE + mov QWORD PTR[rdi],r8 + mov QWORD PTR[8+rdi],r9 + mov QWORD PTR[16+rdi],r10 + mov QWORD PTR[24+rdi],r11 + mov QWORD PTR[32+rdi],r12 + mov QWORD PTR[40+rdi],r13 + mov QWORD PTR[48+rdi],r14 + mov QWORD PTR[56+rdi],r15 + + mov r8,QWORD PTR[rbp] + mov r9,QWORD PTR[8+rbp] + neg r8 + not r9 + and r8,rcx + mov r10,QWORD PTR[16+rbp] + and r9,rcx + not r10 + mov r11,QWORD PTR[24+rbp] + and r10,rcx + not r11 + mov r12,QWORD PTR[32+rbp] + and r11,rcx + not r12 + mov r13,QWORD PTR[40+rbp] + and r12,rcx + not r13 + mov r14,QWORD PTR[48+rbp] + and r13,rcx + not r14 + mov r15,QWORD PTR[56+rbp] + and r14,rcx + not r15 + and r15,rcx + + add r8,QWORD PTR[rdi] + adc r9,QWORD PTR[8+rdi] + adc r10,QWORD PTR[16+rdi] + adc r11,QWORD PTR[24+rdi] + adc r12,QWORD PTR[32+rdi] + adc r13,QWORD PTR[40+rdi] + adc r14,QWORD PTR[48+rdi] + adc r15,QWORD PTR[56+rdi] + + mov QWORD PTR[rdi],r8 + mov QWORD PTR[8+rdi],r9 + mov QWORD PTR[16+rdi],r10 + mov QWORD PTR[24+rdi],r11 + mov QWORD PTR[32+rdi],r12 + mov QWORD PTR[40+rdi],r13 + mov QWORD PTR[48+rdi],r14 + mov QWORD PTR[56+rdi],r15 + + DB 0F3h,0C3h ;repret +__rsaz_512_subtract ENDP + +ALIGN 32 +__rsaz_512_mul PROC PRIVATE + lea rdi,QWORD PTR[8+rsp] + + mov rax,QWORD PTR[rsi] + mul rbx + mov QWORD PTR[rdi],rax + mov rax,QWORD PTR[8+rsi] + mov r8,rdx + + mul rbx + add r8,rax + mov rax,QWORD PTR[16+rsi] + mov r9,rdx + adc r9,0 + + mul rbx + add r9,rax + mov rax,QWORD PTR[24+rsi] + mov r10,rdx + adc r10,0 + + mul rbx + add r10,rax + mov rax,QWORD PTR[32+rsi] + mov r11,rdx + adc r11,0 + + mul rbx + add r11,rax + mov rax,QWORD PTR[40+rsi] + mov r12,rdx + adc r12,0 + + mul rbx + add r12,rax + mov rax,QWORD PTR[48+rsi] + mov r13,rdx + adc r13,0 + + mul rbx + add r13,rax + mov rax,QWORD PTR[56+rsi] + mov r14,rdx + adc r14,0 + + mul rbx + add r14,rax + mov rax,QWORD PTR[rsi] + mov r15,rdx + adc r15,0 + + lea rbp,QWORD PTR[8+rbp] + lea rdi,QWORD PTR[8+rdi] + + mov ecx,7 + jmp $L$oop_mul + +ALIGN 32 +$L$oop_mul:: + mov rbx,QWORD PTR[rbp] + mul rbx + add r8,rax + mov rax,QWORD PTR[8+rsi] + mov QWORD PTR[rdi],r8 + mov r8,rdx + adc r8,0 + + mul rbx + add r9,rax + mov rax,QWORD PTR[16+rsi] + adc rdx,0 + add r8,r9 + mov r9,rdx + adc r9,0 + + mul rbx + add r10,rax + mov rax,QWORD PTR[24+rsi] + adc rdx,0 + add r9,r10 + mov r10,rdx + adc r10,0 + + mul rbx + add r11,rax + mov rax,QWORD PTR[32+rsi] + adc rdx,0 + add r10,r11 + mov r11,rdx + adc r11,0 + + mul rbx + add r12,rax + mov rax,QWORD PTR[40+rsi] + adc rdx,0 + add r11,r12 + mov r12,rdx + adc r12,0 + + mul rbx + add r13,rax + mov rax,QWORD PTR[48+rsi] + adc rdx,0 + add r12,r13 + mov r13,rdx + adc r13,0 + + mul rbx + add r14,rax + mov rax,QWORD PTR[56+rsi] + adc rdx,0 + add r13,r14 + mov r14,rdx + lea rbp,QWORD PTR[8+rbp] + adc r14,0 + + mul rbx + add r15,rax + mov rax,QWORD PTR[rsi] + adc rdx,0 + add r14,r15 + mov r15,rdx + adc r15,0 + + lea rdi,QWORD PTR[8+rdi] + + dec ecx + jnz $L$oop_mul + + mov QWORD PTR[rdi],r8 + mov QWORD PTR[8+rdi],r9 + mov QWORD PTR[16+rdi],r10 + mov QWORD PTR[24+rdi],r11 + mov QWORD PTR[32+rdi],r12 + mov QWORD PTR[40+rdi],r13 + mov QWORD PTR[48+rdi],r14 + mov QWORD PTR[56+rdi],r15 + + DB 0F3h,0C3h ;repret +__rsaz_512_mul ENDP + +ALIGN 32 +__rsaz_512_mulx PROC PRIVATE + mulx r8,rbx,QWORD PTR[rsi] + mov rcx,-6 + + mulx r9,rax,QWORD PTR[8+rsi] + mov QWORD PTR[8+rsp],rbx + + mulx r10,rbx,QWORD PTR[16+rsi] + adc r8,rax + + mulx r11,rax,QWORD PTR[24+rsi] + adc r9,rbx + + mulx r12,rbx,QWORD PTR[32+rsi] + adc r10,rax + + mulx r13,rax,QWORD PTR[40+rsi] + adc r11,rbx + + mulx r14,rbx,QWORD PTR[48+rsi] + adc r12,rax + + mulx r15,rax,QWORD PTR[56+rsi] + mov rdx,QWORD PTR[8+rbp] + adc r13,rbx + adc r14,rax + adc r15,0 + + xor rdi,rdi + jmp $L$oop_mulx + +ALIGN 32 +$L$oop_mulx:: + mov rbx,r8 + mulx r8,rax,QWORD PTR[rsi] + adcx rbx,rax + adox r8,r9 + + mulx r9,rax,QWORD PTR[8+rsi] + adcx r8,rax + adox r9,r10 + + mulx r10,rax,QWORD PTR[16+rsi] + adcx r9,rax + adox r10,r11 + + mulx r11,rax,QWORD PTR[24+rsi] + adcx r10,rax + adox r11,r12 + +DB 03eh,0c4h,062h,0fbh,0f6h,0a6h,020h,000h,000h,000h + adcx r11,rax + adox r12,r13 + + mulx r13,rax,QWORD PTR[40+rsi] + adcx r12,rax + adox r13,r14 + + mulx r14,rax,QWORD PTR[48+rsi] + adcx r13,rax + adox r14,r15 + + mulx r15,rax,QWORD PTR[56+rsi] + mov rdx,QWORD PTR[64+rcx*8+rbp] + mov QWORD PTR[((8+64-8))+rcx*8+rsp],rbx + adcx r14,rax + adox r15,rdi + adcx r15,rdi + + inc rcx + jnz $L$oop_mulx + + mov rbx,r8 + mulx r8,rax,QWORD PTR[rsi] + adcx rbx,rax + adox r8,r9 + +DB 0c4h,062h,0fbh,0f6h,08eh,008h,000h,000h,000h + adcx r8,rax + adox r9,r10 + +DB 0c4h,062h,0fbh,0f6h,096h,010h,000h,000h,000h + adcx r9,rax + adox r10,r11 + + mulx r11,rax,QWORD PTR[24+rsi] + adcx r10,rax + adox r11,r12 + + mulx r12,rax,QWORD PTR[32+rsi] + adcx r11,rax + adox r12,r13 + + mulx r13,rax,QWORD PTR[40+rsi] + adcx r12,rax + adox r13,r14 + +DB 0c4h,062h,0fbh,0f6h,0b6h,030h,000h,000h,000h + adcx r13,rax + adox r14,r15 + +DB 0c4h,062h,0fbh,0f6h,0beh,038h,000h,000h,000h + adcx r14,rax + adox r15,rdi + adcx r15,rdi + + mov QWORD PTR[((8+64-8))+rsp],rbx + mov QWORD PTR[((8+64))+rsp],r8 + mov QWORD PTR[((8+64+8))+rsp],r9 + mov QWORD PTR[((8+64+16))+rsp],r10 + mov QWORD PTR[((8+64+24))+rsp],r11 + mov QWORD PTR[((8+64+32))+rsp],r12 + mov QWORD PTR[((8+64+40))+rsp],r13 + mov QWORD PTR[((8+64+48))+rsp],r14 + mov QWORD PTR[((8+64+56))+rsp],r15 + + DB 0F3h,0C3h ;repret +__rsaz_512_mulx ENDP +PUBLIC rsaz_512_scatter4 + +ALIGN 16 +rsaz_512_scatter4 PROC PUBLIC + lea rcx,QWORD PTR[r8*4+rcx] + mov r9d,8 + jmp $L$oop_scatter +ALIGN 16 +$L$oop_scatter:: + mov rax,QWORD PTR[rdx] + lea rdx,QWORD PTR[8+rdx] + mov DWORD PTR[rcx],eax + shr rax,32 + mov DWORD PTR[64+rcx],eax + lea rcx,QWORD PTR[128+rcx] + dec r9d + jnz $L$oop_scatter + DB 0F3h,0C3h ;repret +rsaz_512_scatter4 ENDP + +PUBLIC rsaz_512_gather4 + +ALIGN 16 +rsaz_512_gather4 PROC PUBLIC + lea rdx,QWORD PTR[r8*4+rdx] + mov r9d,8 + jmp $L$oop_gather +ALIGN 16 +$L$oop_gather:: + mov eax,DWORD PTR[rdx] + mov r8d,DWORD PTR[64+rdx] + lea rdx,QWORD PTR[128+rdx] + shl r8,32 + or rax,r8 + mov QWORD PTR[rcx],rax + lea rcx,QWORD PTR[8+rcx] + dec r9d + jnz $L$oop_gather + DB 0F3h,0C3h ;repret +rsaz_512_gather4 ENDP +EXTERN __imp_RtlVirtualUnwind:NEAR + +ALIGN 16 +se_handler PROC PRIVATE + push rsi + push rdi + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + pushfq + sub rsp,64 + + mov rax,QWORD PTR[120+r8] + mov rbx,QWORD PTR[248+r8] + + mov rsi,QWORD PTR[8+r9] + mov r11,QWORD PTR[56+r9] + + mov r10d,DWORD PTR[r11] + lea r10,QWORD PTR[r10*1+rsi] + cmp rbx,r10 + jb $L$common_seh_tail + + mov rax,QWORD PTR[152+r8] + + mov r10d,DWORD PTR[4+r11] + lea r10,QWORD PTR[r10*1+rsi] + cmp rbx,r10 + jae $L$common_seh_tail + + lea rax,QWORD PTR[((128+24+48))+rax] + + mov rbx,QWORD PTR[((-8))+rax] + mov rbp,QWORD PTR[((-16))+rax] + mov r12,QWORD PTR[((-24))+rax] + mov r13,QWORD PTR[((-32))+rax] + mov r14,QWORD PTR[((-40))+rax] + mov r15,QWORD PTR[((-48))+rax] + mov QWORD PTR[144+r8],rbx + mov QWORD PTR[160+r8],rbp + mov QWORD PTR[216+r8],r12 + mov QWORD PTR[224+r8],r13 + mov QWORD PTR[232+r8],r14 + mov QWORD PTR[240+r8],r15 + +$L$common_seh_tail:: + mov rdi,QWORD PTR[8+rax] + mov rsi,QWORD PTR[16+rax] + mov QWORD PTR[152+r8],rax + mov QWORD PTR[168+r8],rsi + mov QWORD PTR[176+r8],rdi + + mov rdi,QWORD PTR[40+r9] + mov rsi,r8 + mov ecx,154 + DD 0a548f3fch + + mov rsi,r9 + xor rcx,rcx + mov rdx,QWORD PTR[8+rsi] + mov r8,QWORD PTR[rsi] + mov r9,QWORD PTR[16+rsi] + mov r10,QWORD PTR[40+rsi] + lea r11,QWORD PTR[56+rsi] + lea r12,QWORD PTR[24+rsi] + mov QWORD PTR[32+rsp],r10 + mov QWORD PTR[40+rsp],r11 + mov QWORD PTR[48+rsp],r12 + mov QWORD PTR[56+rsp],rcx + call QWORD PTR[__imp_RtlVirtualUnwind] + + mov eax,1 + add rsp,64 + popfq + pop r15 + pop r14 + pop r13 + pop r12 + pop rbp + pop rbx + pop rdi + pop rsi + DB 0F3h,0C3h ;repret +se_handler ENDP + +.text$ ENDS +.pdata SEGMENT READONLY ALIGN(4) +ALIGN 4 + DD imagerel $L$SEH_begin_rsaz_512_sqr + DD imagerel $L$SEH_end_rsaz_512_sqr + DD imagerel $L$SEH_info_rsaz_512_sqr + + DD imagerel $L$SEH_begin_rsaz_512_mul + DD imagerel $L$SEH_end_rsaz_512_mul + DD imagerel $L$SEH_info_rsaz_512_mul + + DD imagerel $L$SEH_begin_rsaz_512_mul_gather4 + DD imagerel $L$SEH_end_rsaz_512_mul_gather4 + DD imagerel $L$SEH_info_rsaz_512_mul_gather4 + + DD imagerel $L$SEH_begin_rsaz_512_mul_scatter4 + DD imagerel $L$SEH_end_rsaz_512_mul_scatter4 + DD imagerel $L$SEH_info_rsaz_512_mul_scatter4 + + DD imagerel $L$SEH_begin_rsaz_512_mul_by_one + DD imagerel $L$SEH_end_rsaz_512_mul_by_one + DD imagerel $L$SEH_info_rsaz_512_mul_by_one + +.pdata ENDS +.xdata SEGMENT READONLY ALIGN(8) +ALIGN 8 +$L$SEH_info_rsaz_512_sqr:: +DB 9,0,0,0 + DD imagerel se_handler + DD imagerel $L$sqr_body,imagerel $L$sqr_epilogue +$L$SEH_info_rsaz_512_mul:: +DB 9,0,0,0 + DD imagerel se_handler + DD imagerel $L$mul_body,imagerel $L$mul_epilogue +$L$SEH_info_rsaz_512_mul_gather4:: +DB 9,0,0,0 + DD imagerel se_handler + DD imagerel $L$mul_gather4_body,imagerel $L$mul_gather4_epilogue +$L$SEH_info_rsaz_512_mul_scatter4:: +DB 9,0,0,0 + DD imagerel se_handler + DD imagerel $L$mul_scatter4_body,imagerel $L$mul_scatter4_epilogue +$L$SEH_info_rsaz_512_mul_by_one:: +DB 9,0,0,0 + DD imagerel se_handler + DD imagerel $L$mul_by_one_body,imagerel $L$mul_by_one_epilogue + +.xdata ENDS +END diff --git a/deps/openssl/asm/x64-win32-masm/bn/x86_64-gf2m.asm b/deps/openssl/asm/x64-win32-masm/bn/x86_64-gf2m.asm index a58049d3c80ce5..d2e9c6600bfc47 100644 --- a/deps/openssl/asm/x64-win32-masm/bn/x86_64-gf2m.asm +++ b/deps/openssl/asm/x64-win32-masm/bn/x86_64-gf2m.asm @@ -1,5 +1,5 @@ OPTION DOTNAME -.text$ SEGMENT ALIGN(64) 'CODE' +.text$ SEGMENT ALIGN(256) 'CODE' ALIGN 16 @@ -250,14 +250,12 @@ $L$body_mul_2x2:: mov rax,rdx mov rbp,r9 call _mul_1x1 - mov QWORD PTR[16+rsp],rax mov QWORD PTR[24+rsp],rdx mov rax,QWORD PTR[48+rsp] mov rbp,QWORD PTR[64+rsp] call _mul_1x1 - mov QWORD PTR[rsp],rax mov QWORD PTR[8+rsp],rdx @@ -266,7 +264,6 @@ $L$body_mul_2x2:: xor rax,QWORD PTR[48+rsp] xor rbp,QWORD PTR[64+rsp] call _mul_1x1 - mov rbx,QWORD PTR[rsp] mov rcx,QWORD PTR[8+rsp] mov rdi,QWORD PTR[16+rsp] @@ -350,7 +347,6 @@ $L$in_prologue:: mov ecx,154 DD 0a548f3fch - mov rsi,r9 xor rcx,rcx mov rdx,QWORD PTR[8+rsi] @@ -395,7 +391,6 @@ ALIGN 8 $L$SEH_info_1x1:: DB 001h,007h,002h,000h DB 007h,001h,011h,000h - $L$SEH_info_2x2:: DB 9,0,0,0 DD imagerel se_handler diff --git a/deps/openssl/asm/x64-win32-masm/bn/x86_64-mont.asm b/deps/openssl/asm/x64-win32-masm/bn/x86_64-mont.asm index f4518aa3bdb04f..f2527450608007 100644 --- a/deps/openssl/asm/x64-win32-masm/bn/x86_64-mont.asm +++ b/deps/openssl/asm/x64-win32-masm/bn/x86_64-mont.asm @@ -1,5 +1,7 @@ OPTION DOTNAME -.text$ SEGMENT ALIGN(64) 'CODE' +.text$ SEGMENT ALIGN(256) 'CODE' + +EXTERN OPENSSL_ia32cap_P:NEAR PUBLIC bn_mul_mont @@ -21,9 +23,12 @@ $L$SEH_begin_bn_mul_mont:: jnz $L$mul_enter cmp r9d,8 jb $L$mul_enter + mov r11d,DWORD PTR[((OPENSSL_ia32cap_P+8))] cmp rdx,rsi jne $L$mul4x_enter - jmp $L$sqr4x_enter + test r9d,7 + jz $L$sqr8x_enter + jmp $L$mul4x_enter ALIGN 16 $L$mul_enter:: @@ -176,7 +181,7 @@ $L$inner_enter:: lea r14,QWORD PTR[1+r14] cmp r14,r9 - jl $L$outer + jb $L$outer xor r14,r14 mov rax,QWORD PTR[rsp] @@ -239,6 +244,9 @@ $L$SEH_begin_bn_mul4x_mont:: $L$mul4x_enter:: + and r11d,080100h + cmp r11d,080100h + je $L$mulx4x_enter push rbx push rbp push r12 @@ -357,7 +365,7 @@ $L$1st4x:: mov QWORD PTR[((-32))+r15*8+rsp],rdi mov r13,rdx cmp r15,r9 - jl $L$1st4x + jb $L$1st4x mul rbx add r10,rax @@ -505,7 +513,7 @@ $L$inner4x:: mov QWORD PTR[((-32))+r15*8+rsp],rdi mov r13,rdx cmp r15,r9 - jl $L$inner4x + jb $L$inner4x mul rbx add r10,rax @@ -551,7 +559,7 @@ $L$inner4x:: mov QWORD PTR[r15*8+rsp],rdi cmp r14,r9 - jl $L$outer4x + jb $L$outer4x mov rdi,QWORD PTR[16+r9*8+rsp] mov rax,QWORD PTR[rsp] pxor xmm0,xmm0 @@ -636,13 +644,16 @@ $L$mul4x_epilogue:: DB 0F3h,0C3h ;repret $L$SEH_end_bn_mul4x_mont:: bn_mul4x_mont ENDP +EXTERN bn_sqrx8x_internal:NEAR +EXTERN bn_sqr8x_internal:NEAR -ALIGN 16 -bn_sqr4x_mont PROC PRIVATE + +ALIGN 32 +bn_sqr8x_mont PROC PRIVATE mov QWORD PTR[8+rsp],rdi ;WIN64 prologue mov QWORD PTR[16+rsp],rsi mov rax,rsp -$L$SEH_begin_bn_sqr4x_mont:: +$L$SEH_begin_bn_sqr8x_mont:: mov rdi,rcx mov rsi,rdx mov rdx,r8 @@ -651,7 +662,8 @@ $L$SEH_begin_bn_sqr4x_mont:: mov r9,QWORD PTR[48+rsp] -$L$sqr4x_enter:: +$L$sqr8x_enter:: + mov rax,rsp push rbx push rbp push r12 @@ -659,763 +671,447 @@ $L$sqr4x_enter:: push r14 push r15 + mov r10d,r9d shl r9d,3 - xor r10,r10 - mov r11,rsp - sub r10,r9 - mov r8,QWORD PTR[r8] - lea rsp,QWORD PTR[((-72))+r10*2+rsp] - and rsp,-1024 - - - + shl r10,3+2 + neg r9 + lea r11,QWORD PTR[((-64))+r9*4+rsp] + mov r8,QWORD PTR[r8] + sub r11,rsi + and r11,4095 + cmp r10,r11 + jb $L$sqr8x_sp_alt + sub rsp,r11 + lea rsp,QWORD PTR[((-64))+r9*4+rsp] + jmp $L$sqr8x_sp_done + +ALIGN 32 +$L$sqr8x_sp_alt:: + lea r10,QWORD PTR[((4096-64))+r9*4] + lea rsp,QWORD PTR[((-64))+r9*4+rsp] + sub r11,r10 + mov r10,0 + cmovc r11,r10 + sub rsp,r11 +$L$sqr8x_sp_done:: + and rsp,-64 + mov r10,r9 + neg r9 + + lea r11,QWORD PTR[64+r9*2+rsp] + mov QWORD PTR[32+rsp],r8 + mov QWORD PTR[40+rsp],rax +$L$sqr8x_body:: + + mov rbp,r9 +DB 102,73,15,110,211 + shr rbp,3+2 + mov eax,DWORD PTR[((OPENSSL_ia32cap_P+8))] + jmp $L$sqr8x_copy_n + +ALIGN 32 +$L$sqr8x_copy_n:: + movq xmm0,QWORD PTR[rcx] + movq xmm1,QWORD PTR[8+rcx] + movq xmm3,QWORD PTR[16+rcx] + movq xmm4,QWORD PTR[24+rcx] + lea rcx,QWORD PTR[32+rcx] + movdqa XMMWORD PTR[r11],xmm0 + movdqa XMMWORD PTR[16+r11],xmm1 + movdqa XMMWORD PTR[32+r11],xmm3 + movdqa XMMWORD PTR[48+r11],xmm4 + lea r11,QWORD PTR[64+r11] + dec rbp + jnz $L$sqr8x_copy_n + pxor xmm0,xmm0 +DB 102,72,15,110,207 +DB 102,73,15,110,218 + and eax,080100h + cmp eax,080100h + jne $L$sqr8x_nox - mov QWORD PTR[32+rsp],rdi - mov QWORD PTR[40+rsp],rcx - mov QWORD PTR[48+rsp],r8 - mov QWORD PTR[56+rsp],r11 -$L$sqr4x_body:: - - - + call bn_sqrx8x_internal + pxor xmm0,xmm0 + lea rax,QWORD PTR[48+rsp] + lea rdx,QWORD PTR[64+r9*2+rsp] + shr r9,3+2 + mov rsi,QWORD PTR[40+rsp] + jmp $L$sqr8x_zero +ALIGN 32 +$L$sqr8x_nox:: + call bn_sqr8x_internal + pxor xmm0,xmm0 + lea rax,QWORD PTR[48+rsp] + lea rdx,QWORD PTR[64+r9*2+rsp] + shr r9,3+2 + mov rsi,QWORD PTR[40+rsp] + jmp $L$sqr8x_zero + +ALIGN 32 +$L$sqr8x_zero:: + movdqa XMMWORD PTR[rax],xmm0 + movdqa XMMWORD PTR[16+rax],xmm0 + movdqa XMMWORD PTR[32+rax],xmm0 + movdqa XMMWORD PTR[48+rax],xmm0 + lea rax,QWORD PTR[64+rax] + movdqa XMMWORD PTR[rdx],xmm0 + movdqa XMMWORD PTR[16+rdx],xmm0 + movdqa XMMWORD PTR[32+rdx],xmm0 + movdqa XMMWORD PTR[48+rdx],xmm0 + lea rdx,QWORD PTR[64+rdx] + dec r9 + jnz $L$sqr8x_zero - lea rbp,QWORD PTR[32+r10] - lea rsi,QWORD PTR[r9*1+rsi] + mov rax,1 + mov r15,QWORD PTR[((-48))+rsi] + mov r14,QWORD PTR[((-40))+rsi] + mov r13,QWORD PTR[((-32))+rsi] + mov r12,QWORD PTR[((-24))+rsi] + mov rbp,QWORD PTR[((-16))+rsi] + mov rbx,QWORD PTR[((-8))+rsi] + lea rsp,QWORD PTR[rsi] +$L$sqr8x_epilogue:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_bn_sqr8x_mont:: +bn_sqr8x_mont ENDP +ALIGN 32 +bn_mulx4x_mont PROC PRIVATE + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_bn_mulx4x_mont:: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 mov rcx,r9 + mov r8,QWORD PTR[40+rsp] + mov r9,QWORD PTR[48+rsp] - mov r14,QWORD PTR[((-32))+rbp*1+rsi] - lea rdi,QWORD PTR[64+r9*2+rsp] - mov rax,QWORD PTR[((-24))+rbp*1+rsi] - lea rdi,QWORD PTR[((-32))+rbp*1+rdi] - mov rbx,QWORD PTR[((-16))+rbp*1+rsi] - mov r15,rax - - mul r14 - mov r10,rax - mov rax,rbx - mov r11,rdx - mov QWORD PTR[((-24))+rbp*1+rdi],r10 - - xor r10,r10 - mul r14 - add r11,rax - mov rax,rbx - adc r10,rdx - mov QWORD PTR[((-16))+rbp*1+rdi],r11 - - lea rcx,QWORD PTR[((-16))+rbp] - - - mov rbx,QWORD PTR[8+rcx*1+rsi] - mul r15 - mov r12,rax - mov rax,rbx - mov r13,rdx - - xor r11,r11 - add r10,r12 - lea rcx,QWORD PTR[16+rcx] - adc r11,0 - mul r14 - add r10,rax - mov rax,rbx - adc r11,rdx - mov QWORD PTR[((-8))+rcx*1+rdi],r10 - jmp $L$sqr4x_1st - -ALIGN 16 -$L$sqr4x_1st:: - mov rbx,QWORD PTR[rcx*1+rsi] - xor r12,r12 - mul r15 - add r13,rax - mov rax,rbx - adc r12,rdx - - xor r10,r10 - add r11,r13 - adc r10,0 - mul r14 - add r11,rax - mov rax,rbx - adc r10,rdx - mov QWORD PTR[rcx*1+rdi],r11 - - - mov rbx,QWORD PTR[8+rcx*1+rsi] - xor r13,r13 - mul r15 - add r12,rax - mov rax,rbx - adc r13,rdx - - xor r11,r11 - add r10,r12 - adc r11,0 - mul r14 - add r10,rax - mov rax,rbx - adc r11,rdx - mov QWORD PTR[8+rcx*1+rdi],r10 - - mov rbx,QWORD PTR[16+rcx*1+rsi] - xor r12,r12 - mul r15 - add r13,rax - mov rax,rbx - adc r12,rdx +$L$mulx4x_enter:: + mov rax,rsp + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + shl r9d,3 +DB 067h xor r10,r10 - add r11,r13 - adc r10,0 - mul r14 - add r11,rax - mov rax,rbx - adc r10,rdx - mov QWORD PTR[16+rcx*1+rdi],r11 - + sub r10,r9 + mov r8,QWORD PTR[r8] + lea rsp,QWORD PTR[((-72))+r10*1+rsp] + lea r10,QWORD PTR[r9*1+rdx] + and rsp,-128 - mov rbx,QWORD PTR[24+rcx*1+rsi] - xor r13,r13 - mul r15 - add r12,rax - mov rax,rbx - adc r13,rdx - xor r11,r11 - add r10,r12 - lea rcx,QWORD PTR[32+rcx] - adc r11,0 - mul r14 - add r10,rax - mov rax,rbx - adc r11,rdx - mov QWORD PTR[((-8))+rcx*1+rdi],r10 - cmp rcx,0 - jne $L$sqr4x_1st - xor r12,r12 - add r13,r11 - adc r12,0 - mul r15 - add r13,rax - adc r12,rdx - mov QWORD PTR[rdi],r13 - lea rbp,QWORD PTR[16+rbp] - mov QWORD PTR[8+rdi],r12 - jmp $L$sqr4x_outer -ALIGN 16 -$L$sqr4x_outer:: - mov r14,QWORD PTR[((-32))+rbp*1+rsi] - lea rdi,QWORD PTR[64+r9*2+rsp] - mov rax,QWORD PTR[((-24))+rbp*1+rsi] - lea rdi,QWORD PTR[((-32))+rbp*1+rdi] - mov rbx,QWORD PTR[((-16))+rbp*1+rsi] - mov r15,rax - - mov r10,QWORD PTR[((-24))+rbp*1+rdi] - xor r11,r11 - mul r14 - add r10,rax - mov rax,rbx - adc r11,rdx - mov QWORD PTR[((-24))+rbp*1+rdi],r10 - xor r10,r10 - add r11,QWORD PTR[((-16))+rbp*1+rdi] - adc r10,0 - mul r14 - add r11,rax - mov rax,rbx - adc r10,rdx - mov QWORD PTR[((-16))+rbp*1+rdi],r11 - lea rcx,QWORD PTR[((-16))+rbp] - xor r12,r12 - mov rbx,QWORD PTR[8+rcx*1+rsi] - xor r13,r13 - add r12,QWORD PTR[8+rcx*1+rdi] - adc r13,0 - mul r15 - add r12,rax - mov rax,rbx - adc r13,rdx - xor r11,r11 - add r10,r12 - adc r11,0 - mul r14 - add r10,rax - mov rax,rbx - adc r11,rdx - mov QWORD PTR[8+rcx*1+rdi],r10 - lea rcx,QWORD PTR[16+rcx] - jmp $L$sqr4x_inner - -ALIGN 16 -$L$sqr4x_inner:: - mov rbx,QWORD PTR[rcx*1+rsi] - xor r12,r12 - add r13,QWORD PTR[rcx*1+rdi] - adc r12,0 - mul r15 - add r13,rax - mov rax,rbx - adc r12,rdx - - xor r10,r10 - add r11,r13 - adc r10,0 - mul r14 + mov QWORD PTR[rsp],r9 + shr r9,5 + mov QWORD PTR[16+rsp],r10 + sub r9,1 + mov QWORD PTR[24+rsp],r8 + mov QWORD PTR[32+rsp],rdi + mov QWORD PTR[40+rsp],rax + mov QWORD PTR[48+rsp],r9 + jmp $L$mulx4x_body + +ALIGN 32 +$L$mulx4x_body:: + lea rdi,QWORD PTR[8+rdx] + mov rdx,QWORD PTR[rdx] + lea rbx,QWORD PTR[((64+32))+rsp] + mov r9,rdx + + mulx rax,r8,QWORD PTR[rsi] + mulx r14,r11,QWORD PTR[8+rsi] add r11,rax - mov rax,rbx - adc r10,rdx - mov QWORD PTR[rcx*1+rdi],r11 - - mov rbx,QWORD PTR[8+rcx*1+rsi] - xor r13,r13 - add r12,QWORD PTR[8+rcx*1+rdi] + mov QWORD PTR[8+rsp],rdi + mulx r13,r12,QWORD PTR[16+rsi] + adc r12,r14 adc r13,0 - mul r15 - add r12,rax - mov rax,rbx - adc r13,rdx - - xor r11,r11 - add r10,r12 - lea rcx,QWORD PTR[16+rcx] - adc r11,0 - mul r14 - add r10,rax - mov rax,rbx - adc r11,rdx - mov QWORD PTR[((-8))+rcx*1+rdi],r10 - - cmp rcx,0 - jne $L$sqr4x_inner - - xor r12,r12 - add r13,r11 - adc r12,0 - mul r15 - add r13,rax - adc r12,rdx - - mov QWORD PTR[rdi],r13 - mov QWORD PTR[8+rdi],r12 - - add rbp,16 - jnz $L$sqr4x_outer - - - mov r14,QWORD PTR[((-32))+rsi] - lea rdi,QWORD PTR[64+r9*2+rsp] - mov rax,QWORD PTR[((-24))+rsi] - lea rdi,QWORD PTR[((-32))+rbp*1+rdi] - mov rbx,QWORD PTR[((-16))+rsi] - mov r15,rax - - xor r11,r11 - mul r14 - add r10,rax - mov rax,rbx - adc r11,rdx - mov QWORD PTR[((-24))+rdi],r10 - - xor r10,r10 - add r11,r13 - adc r10,0 - mul r14 - add r11,rax - mov rax,rbx - adc r10,rdx - mov QWORD PTR[((-16))+rdi],r11 - - mov rbx,QWORD PTR[((-8))+rsi] - mul r15 - add r12,rax - mov rax,rbx - adc rdx,0 - - xor r11,r11 - add r10,r12 - mov r13,rdx - adc r11,0 - mul r14 - add r10,rax - mov rax,rbx - adc r11,rdx - mov QWORD PTR[((-8))+rdi],r10 - - xor r12,r12 - add r13,r11 - adc r12,0 - mul r15 - add r13,rax - mov rax,QWORD PTR[((-16))+rsi] - adc r12,rdx - mov QWORD PTR[rdi],r13 - mov QWORD PTR[8+rdi],r12 - - mul rbx - add rbp,16 - xor r14,r14 - sub rbp,r9 - xor r15,r15 + mov rdi,r8 + imul r8,QWORD PTR[24+rsp] + xor rbp,rbp - add rax,r12 - adc rdx,0 - mov QWORD PTR[8+rdi],rax - mov QWORD PTR[16+rdi],rdx - mov QWORD PTR[24+rdi],r15 + mulx r14,rax,QWORD PTR[24+rsi] + mov rdx,r8 + lea rsi,QWORD PTR[32+rsi] + adcx r13,rax + adcx r14,rbp + + mulx r10,rax,QWORD PTR[rcx] + adcx rdi,rax + adox r10,r11 + mulx r11,rax,QWORD PTR[8+rcx] + adcx r10,rax + adox r11,r12 +DB 0c4h,062h,0fbh,0f6h,0a1h,010h,000h,000h,000h + mov rdi,QWORD PTR[48+rsp] + mov QWORD PTR[((-32))+rbx],r10 + adcx r11,rax + adox r12,r13 + mulx r15,rax,QWORD PTR[24+rcx] + mov rdx,r9 + mov QWORD PTR[((-24))+rbx],r11 + adcx r12,rax + adox r15,rbp + lea rcx,QWORD PTR[32+rcx] + mov QWORD PTR[((-16))+rbx],r12 + + jmp $L$mulx4x_1st + +ALIGN 32 +$L$mulx4x_1st:: + adcx r15,rbp + mulx rax,r10,QWORD PTR[rsi] + adcx r10,r14 + mulx r14,r11,QWORD PTR[8+rsi] + adcx r11,rax + mulx rax,r12,QWORD PTR[16+rsi] + adcx r12,r14 + mulx r14,r13,QWORD PTR[24+rsi] +DB 067h,067h + mov rdx,r8 + adcx r13,rax + adcx r14,rbp + lea rsi,QWORD PTR[32+rsi] + lea rbx,QWORD PTR[32+rbx] + + adox r10,r15 + mulx r15,rax,QWORD PTR[rcx] + adcx r10,rax + adox r11,r15 + mulx r15,rax,QWORD PTR[8+rcx] + adcx r11,rax + adox r12,r15 + mulx r15,rax,QWORD PTR[16+rcx] + mov QWORD PTR[((-40))+rbx],r10 + adcx r12,rax + mov QWORD PTR[((-32))+rbx],r11 + adox r13,r15 + mulx r15,rax,QWORD PTR[24+rcx] + mov rdx,r9 + mov QWORD PTR[((-24))+rbx],r12 + adcx r13,rax + adox r15,rbp + lea rcx,QWORD PTR[32+rcx] + mov QWORD PTR[((-16))+rbx],r13 - mov rax,QWORD PTR[((-16))+rbp*1+rsi] - lea rdi,QWORD PTR[64+r9*2+rsp] - xor r10,r10 - mov r11,QWORD PTR[((-24))+rbp*2+rdi] - - lea r12,QWORD PTR[r10*2+r14] - shr r10,63 - lea r13,QWORD PTR[r11*2+rcx] - shr r11,63 - or r13,r10 - mov r10,QWORD PTR[((-16))+rbp*2+rdi] - mov r14,r11 - mul rax - neg r15 - mov r11,QWORD PTR[((-8))+rbp*2+rdi] - adc r12,rax - mov rax,QWORD PTR[((-8))+rbp*1+rsi] - mov QWORD PTR[((-32))+rbp*2+rdi],r12 - adc r13,rdx - - lea rbx,QWORD PTR[r10*2+r14] - mov QWORD PTR[((-24))+rbp*2+rdi],r13 - sbb r15,r15 - shr r10,63 - lea r8,QWORD PTR[r11*2+rcx] - shr r11,63 - or r8,r10 - mov r10,QWORD PTR[rbp*2+rdi] - mov r14,r11 - mul rax - neg r15 - mov r11,QWORD PTR[8+rbp*2+rdi] - adc rbx,rax - mov rax,QWORD PTR[rbp*1+rsi] - mov QWORD PTR[((-16))+rbp*2+rdi],rbx - adc r8,rdx - lea rbp,QWORD PTR[16+rbp] - mov QWORD PTR[((-40))+rbp*2+rdi],r8 - sbb r15,r15 - jmp $L$sqr4x_shift_n_add + dec rdi + jnz $L$mulx4x_1st -ALIGN 16 -$L$sqr4x_shift_n_add:: - lea r12,QWORD PTR[r10*2+r14] - shr r10,63 - lea r13,QWORD PTR[r11*2+rcx] - shr r11,63 - or r13,r10 - mov r10,QWORD PTR[((-16))+rbp*2+rdi] - mov r14,r11 - mul rax - neg r15 - mov r11,QWORD PTR[((-8))+rbp*2+rdi] - adc r12,rax - mov rax,QWORD PTR[((-8))+rbp*1+rsi] - mov QWORD PTR[((-32))+rbp*2+rdi],r12 - adc r13,rdx - - lea rbx,QWORD PTR[r10*2+r14] - mov QWORD PTR[((-24))+rbp*2+rdi],r13 - sbb r15,r15 - shr r10,63 - lea r8,QWORD PTR[r11*2+rcx] - shr r11,63 - or r8,r10 - mov r10,QWORD PTR[rbp*2+rdi] - mov r14,r11 - mul rax - neg r15 - mov r11,QWORD PTR[8+rbp*2+rdi] - adc rbx,rax - mov rax,QWORD PTR[rbp*1+rsi] - mov QWORD PTR[((-16))+rbp*2+rdi],rbx - adc r8,rdx - - lea r12,QWORD PTR[r10*2+r14] - mov QWORD PTR[((-8))+rbp*2+rdi],r8 - sbb r15,r15 - shr r10,63 - lea r13,QWORD PTR[r11*2+rcx] - shr r11,63 - or r13,r10 - mov r10,QWORD PTR[16+rbp*2+rdi] - mov r14,r11 - mul rax - neg r15 - mov r11,QWORD PTR[24+rbp*2+rdi] - adc r12,rax - mov rax,QWORD PTR[8+rbp*1+rsi] - mov QWORD PTR[rbp*2+rdi],r12 - adc r13,rdx - - lea rbx,QWORD PTR[r10*2+r14] - mov QWORD PTR[8+rbp*2+rdi],r13 - sbb r15,r15 - shr r10,63 - lea r8,QWORD PTR[r11*2+rcx] - shr r11,63 - or r8,r10 - mov r10,QWORD PTR[32+rbp*2+rdi] - mov r14,r11 - mul rax - neg r15 - mov r11,QWORD PTR[40+rbp*2+rdi] - adc rbx,rax - mov rax,QWORD PTR[16+rbp*1+rsi] - mov QWORD PTR[16+rbp*2+rdi],rbx - adc r8,rdx - mov QWORD PTR[24+rbp*2+rdi],r8 - sbb r15,r15 - add rbp,32 - jnz $L$sqr4x_shift_n_add - - lea r12,QWORD PTR[r10*2+r14] - shr r10,63 - lea r13,QWORD PTR[r11*2+rcx] - shr r11,63 - or r13,r10 - mov r10,QWORD PTR[((-16))+rdi] - mov r14,r11 - mul rax - neg r15 - mov r11,QWORD PTR[((-8))+rdi] - adc r12,rax - mov rax,QWORD PTR[((-8))+rsi] - mov QWORD PTR[((-32))+rdi],r12 - adc r13,rdx - - lea rbx,QWORD PTR[r10*2+r14] - mov QWORD PTR[((-24))+rdi],r13 + mov rax,QWORD PTR[rsp] + mov rdi,QWORD PTR[8+rsp] + adc r15,rbp + add r14,r15 sbb r15,r15 - shr r10,63 - lea r8,QWORD PTR[r11*2+rcx] - shr r11,63 - or r8,r10 - mul rax - neg r15 - adc rbx,rax - adc r8,rdx - mov QWORD PTR[((-16))+rdi],rbx - mov QWORD PTR[((-8))+rdi],r8 - mov rsi,QWORD PTR[40+rsp] - mov r8,QWORD PTR[48+rsp] - xor rcx,rcx - mov QWORD PTR[rsp],r9 - sub rcx,r9 - mov r10,QWORD PTR[64+rsp] - mov r14,r8 - lea rax,QWORD PTR[64+r9*2+rsp] - lea rdi,QWORD PTR[64+r9*1+rsp] - mov QWORD PTR[8+rsp],rax - lea rsi,QWORD PTR[r9*1+rsi] - xor rbp,rbp - - mov rax,QWORD PTR[rcx*1+rsi] - mov r9,QWORD PTR[8+rcx*1+rsi] - imul r14,r10 - mov rbx,rax - jmp $L$sqr4x_mont_outer - -ALIGN 16 -$L$sqr4x_mont_outer:: - xor r11,r11 - mul r14 - add r10,rax - mov rax,r9 - adc r11,rdx + mov QWORD PTR[((-8))+rbx],r14 + jmp $L$mulx4x_outer + +ALIGN 32 +$L$mulx4x_outer:: + mov rdx,QWORD PTR[rdi] + lea rdi,QWORD PTR[8+rdi] + sub rsi,rax + mov QWORD PTR[rbx],r15 + lea rbx,QWORD PTR[((64+32))+rsp] + sub rcx,rax + + mulx r11,r8,QWORD PTR[rsi] + xor ebp,ebp + mov r9,rdx + mulx r12,r14,QWORD PTR[8+rsi] + adox r8,QWORD PTR[((-32))+rbx] + adcx r11,r14 + mulx r13,r15,QWORD PTR[16+rsi] + adox r11,QWORD PTR[((-24))+rbx] + adcx r12,r15 + adox r12,rbp + adcx r13,rbp + + mov QWORD PTR[8+rsp],rdi +DB 067h mov r15,r8 + imul r8,QWORD PTR[24+rsp] + xor ebp,ebp - xor r10,r10 - add r11,QWORD PTR[8+rcx*1+rdi] - adc r10,0 - mul r14 - add r11,rax - mov rax,rbx - adc r10,rdx - - imul r15,r11 - - mov rbx,QWORD PTR[16+rcx*1+rsi] - xor r13,r13 - add r12,r11 - adc r13,0 - mul r15 - add r12,rax - mov rax,rbx - adc r13,rdx - mov QWORD PTR[8+rcx*1+rdi],r12 - - xor r11,r11 - add r10,QWORD PTR[16+rcx*1+rdi] - adc r11,0 - mul r14 - add r10,rax - mov rax,r9 - adc r11,rdx - - mov r9,QWORD PTR[24+rcx*1+rsi] - xor r12,r12 - add r13,r10 - adc r12,0 - mul r15 - add r13,rax - mov rax,r9 - adc r12,rdx - mov QWORD PTR[16+rcx*1+rdi],r13 - - xor r10,r10 - add r11,QWORD PTR[24+rcx*1+rdi] + mulx r14,rax,QWORD PTR[24+rsi] + mov rdx,r8 + adox r12,QWORD PTR[((-16))+rbx] + adcx r13,rax + adox r13,QWORD PTR[((-8))+rbx] + adcx r14,rbp + lea rsi,QWORD PTR[32+rsi] + adox r14,rbp + + mulx r10,rax,QWORD PTR[rcx] + adcx r15,rax + adox r10,r11 + mulx r11,rax,QWORD PTR[8+rcx] + adcx r10,rax + adox r11,r12 + mulx r12,rax,QWORD PTR[16+rcx] + mov QWORD PTR[((-32))+rbx],r10 + adcx r11,rax + adox r12,r13 + mulx r15,rax,QWORD PTR[24+rcx] + mov rdx,r9 + mov QWORD PTR[((-24))+rbx],r11 lea rcx,QWORD PTR[32+rcx] - adc r10,0 - mul r14 - add r11,rax - mov rax,rbx - adc r10,rdx - jmp $L$sqr4x_mont_inner - -ALIGN 16 -$L$sqr4x_mont_inner:: - mov rbx,QWORD PTR[rcx*1+rsi] - xor r13,r13 - add r12,r11 - adc r13,0 - mul r15 - add r12,rax - mov rax,rbx - adc r13,rdx - mov QWORD PTR[((-8))+rcx*1+rdi],r12 - - xor r11,r11 - add r10,QWORD PTR[rcx*1+rdi] - adc r11,0 - mul r14 - add r10,rax - mov rax,r9 - adc r11,rdx - - mov r9,QWORD PTR[8+rcx*1+rsi] - xor r12,r12 - add r13,r10 - adc r12,0 - mul r15 - add r13,rax - mov rax,r9 - adc r12,rdx - mov QWORD PTR[rcx*1+rdi],r13 - - xor r10,r10 - add r11,QWORD PTR[8+rcx*1+rdi] - adc r10,0 - mul r14 - add r11,rax - mov rax,rbx - adc r10,rdx - - - mov rbx,QWORD PTR[16+rcx*1+rsi] - xor r13,r13 - add r12,r11 - adc r13,0 - mul r15 - add r12,rax - mov rax,rbx - adc r13,rdx - mov QWORD PTR[8+rcx*1+rdi],r12 - - xor r11,r11 - add r10,QWORD PTR[16+rcx*1+rdi] - adc r11,0 - mul r14 - add r10,rax - mov rax,r9 - adc r11,rdx - - mov r9,QWORD PTR[24+rcx*1+rsi] - xor r12,r12 - add r13,r10 - adc r12,0 - mul r15 - add r13,rax - mov rax,r9 - adc r12,rdx - mov QWORD PTR[16+rcx*1+rdi],r13 - - xor r10,r10 - add r11,QWORD PTR[24+rcx*1+rdi] + adcx r12,rax + adox r15,rbp + mov rdi,QWORD PTR[48+rsp] + mov QWORD PTR[((-16))+rbx],r12 + + jmp $L$mulx4x_inner + +ALIGN 32 +$L$mulx4x_inner:: + mulx rax,r10,QWORD PTR[rsi] + adcx r15,rbp + adox r10,r14 + mulx r14,r11,QWORD PTR[8+rsi] + adcx r10,QWORD PTR[rbx] + adox r11,rax + mulx rax,r12,QWORD PTR[16+rsi] + adcx r11,QWORD PTR[8+rbx] + adox r12,r14 + mulx r14,r13,QWORD PTR[24+rsi] + mov rdx,r8 + adcx r12,QWORD PTR[16+rbx] + adox r13,rax + adcx r13,QWORD PTR[24+rbx] + adox r14,rbp + lea rsi,QWORD PTR[32+rsi] + lea rbx,QWORD PTR[32+rbx] + adcx r14,rbp + + adox r10,r15 + mulx r15,rax,QWORD PTR[rcx] + adcx r10,rax + adox r11,r15 + mulx r15,rax,QWORD PTR[8+rcx] + adcx r11,rax + adox r12,r15 + mulx r15,rax,QWORD PTR[16+rcx] + mov QWORD PTR[((-40))+rbx],r10 + adcx r12,rax + adox r13,r15 + mulx r15,rax,QWORD PTR[24+rcx] + mov rdx,r9 + mov QWORD PTR[((-32))+rbx],r11 + mov QWORD PTR[((-24))+rbx],r12 + adcx r13,rax + adox r15,rbp lea rcx,QWORD PTR[32+rcx] - adc r10,0 - mul r14 - add r11,rax - mov rax,rbx - adc r10,rdx - cmp rcx,0 - jne $L$sqr4x_mont_inner + mov QWORD PTR[((-16))+rbx],r13 - sub rcx,QWORD PTR[rsp] - mov r14,r8 + dec rdi + jnz $L$mulx4x_inner - xor r13,r13 - add r12,r11 - adc r13,0 - mul r15 - add r12,rax - mov rax,r9 - adc r13,rdx - mov QWORD PTR[((-8))+rdi],r12 - - xor r11,r11 - add r10,QWORD PTR[rdi] - adc r11,0 - mov rbx,QWORD PTR[rcx*1+rsi] - add r10,rbp - adc r11,0 + mov rax,QWORD PTR[rsp] + mov rdi,QWORD PTR[8+rsp] + adc r15,rbp + sub rbp,QWORD PTR[rbx] + adc r14,r15 + mov r8,QWORD PTR[((-8))+rcx] + sbb r15,r15 + mov QWORD PTR[((-8))+rbx],r14 - imul r14,QWORD PTR[16+rcx*1+rdi] - xor r12,r12 - mov r9,QWORD PTR[8+rcx*1+rsi] - add r13,r10 - mov r10,QWORD PTR[16+rcx*1+rdi] - adc r12,0 - mul r15 - add r13,rax - mov rax,rbx - adc r12,rdx - mov QWORD PTR[rdi],r13 + cmp rdi,QWORD PTR[16+rsp] + jne $L$mulx4x_outer - xor rbp,rbp - add r12,QWORD PTR[8+rdi] - adc rbp,rbp - add r12,r11 - lea rdi,QWORD PTR[16+rdi] - adc rbp,0 - mov QWORD PTR[((-8))+rdi],r12 - cmp rdi,QWORD PTR[8+rsp] - jb $L$sqr4x_mont_outer - - mov r9,QWORD PTR[rsp] - mov QWORD PTR[rdi],rbp - mov rax,QWORD PTR[64+r9*1+rsp] - lea rbx,QWORD PTR[64+r9*1+rsp] - mov rsi,QWORD PTR[40+rsp] - shr r9,5 - mov rdx,QWORD PTR[8+rbx] - xor rbp,rbp + sub r8,r14 + sbb r8,r8 + or r15,r8 + neg rax + xor rdx,rdx mov rdi,QWORD PTR[32+rsp] - sub rax,QWORD PTR[rsi] - mov r10,QWORD PTR[16+rbx] - mov r11,QWORD PTR[24+rbx] - sbb rdx,QWORD PTR[8+rsi] - lea rcx,QWORD PTR[((-1))+r9] - jmp $L$sqr4x_sub -ALIGN 16 -$L$sqr4x_sub:: - mov QWORD PTR[rbp*8+rdi],rax - mov QWORD PTR[8+rbp*8+rdi],rdx - sbb r10,QWORD PTR[16+rbp*8+rsi] - mov rax,QWORD PTR[32+rbp*8+rbx] - mov rdx,QWORD PTR[40+rbp*8+rbx] - sbb r11,QWORD PTR[24+rbp*8+rsi] - mov QWORD PTR[16+rbp*8+rdi],r10 - mov QWORD PTR[24+rbp*8+rdi],r11 - sbb rax,QWORD PTR[32+rbp*8+rsi] - mov r10,QWORD PTR[48+rbp*8+rbx] - mov r11,QWORD PTR[56+rbp*8+rbx] - sbb rdx,QWORD PTR[40+rbp*8+rsi] - lea rbp,QWORD PTR[4+rbp] - dec rcx - jnz $L$sqr4x_sub - - mov QWORD PTR[rbp*8+rdi],rax - mov rax,QWORD PTR[32+rbp*8+rbx] - sbb r10,QWORD PTR[16+rbp*8+rsi] - mov QWORD PTR[8+rbp*8+rdi],rdx - sbb r11,QWORD PTR[24+rbp*8+rsi] - mov QWORD PTR[16+rbp*8+rdi],r10 - - sbb rax,0 - mov QWORD PTR[24+rbp*8+rdi],r11 - xor rbp,rbp - and rbx,rax - not rax - mov rsi,rdi - and rsi,rax - lea rcx,QWORD PTR[((-1))+r9] - or rbx,rsi + lea rbx,QWORD PTR[64+rsp] pxor xmm0,xmm0 - lea rsi,QWORD PTR[64+r9*8+rsp] - movdqu xmm1,XMMWORD PTR[rbx] - lea rsi,QWORD PTR[r9*8+rsi] - movdqa XMMWORD PTR[64+rsp],xmm0 - movdqa XMMWORD PTR[rsi],xmm0 - movdqu XMMWORD PTR[rdi],xmm1 - jmp $L$sqr4x_copy -ALIGN 16 -$L$sqr4x_copy:: - movdqu xmm2,XMMWORD PTR[16+rbp*1+rbx] - movdqu xmm1,XMMWORD PTR[32+rbp*1+rbx] - movdqa XMMWORD PTR[80+rbp*1+rsp],xmm0 - movdqa XMMWORD PTR[96+rbp*1+rsp],xmm0 - movdqa XMMWORD PTR[16+rbp*1+rsi],xmm0 - movdqa XMMWORD PTR[32+rbp*1+rsi],xmm0 - movdqu XMMWORD PTR[16+rbp*1+rdi],xmm2 - movdqu XMMWORD PTR[32+rbp*1+rdi],xmm1 - lea rbp,QWORD PTR[32+rbp] - dec rcx - jnz $L$sqr4x_copy - - movdqu xmm2,XMMWORD PTR[16+rbp*1+rbx] - movdqa XMMWORD PTR[80+rbp*1+rsp],xmm0 - movdqa XMMWORD PTR[16+rbp*1+rsi],xmm0 - movdqu XMMWORD PTR[16+rbp*1+rdi],xmm2 - mov rsi,QWORD PTR[56+rsp] + mov r8,QWORD PTR[rax*1+rcx] + mov r9,QWORD PTR[8+rax*1+rcx] + neg r8 + jmp $L$mulx4x_sub_entry + +ALIGN 32 +$L$mulx4x_sub:: + mov r8,QWORD PTR[rax*1+rcx] + mov r9,QWORD PTR[8+rax*1+rcx] + not r8 +$L$mulx4x_sub_entry:: + mov r10,QWORD PTR[16+rax*1+rcx] + not r9 + and r8,r15 + mov r11,QWORD PTR[24+rax*1+rcx] + not r10 + and r9,r15 + not r11 + and r10,r15 + and r11,r15 + + neg rdx + adc r8,QWORD PTR[rbx] + adc r9,QWORD PTR[8+rbx] + movdqa XMMWORD PTR[rbx],xmm0 + adc r10,QWORD PTR[16+rbx] + adc r11,QWORD PTR[24+rbx] + movdqa XMMWORD PTR[16+rbx],xmm0 + lea rbx,QWORD PTR[32+rbx] + sbb rdx,rdx + + mov QWORD PTR[rdi],r8 + mov QWORD PTR[8+rdi],r9 + mov QWORD PTR[16+rdi],r10 + mov QWORD PTR[24+rdi],r11 + lea rdi,QWORD PTR[32+rdi] + + add rax,32 + jnz $L$mulx4x_sub + + mov rsi,QWORD PTR[40+rsp] mov rax,1 - mov r15,QWORD PTR[rsi] - mov r14,QWORD PTR[8+rsi] - mov r13,QWORD PTR[16+rsi] - mov r12,QWORD PTR[24+rsi] - mov rbp,QWORD PTR[32+rsi] - mov rbx,QWORD PTR[40+rsi] - lea rsp,QWORD PTR[48+rsi] -$L$sqr4x_epilogue:: + mov r15,QWORD PTR[((-48))+rsi] + mov r14,QWORD PTR[((-40))+rsi] + mov r13,QWORD PTR[((-32))+rsi] + mov r12,QWORD PTR[((-24))+rsi] + mov rbp,QWORD PTR[((-16))+rsi] + mov rbx,QWORD PTR[((-8))+rsi] + lea rsp,QWORD PTR[rsi] +$L$mulx4x_epilogue:: mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue mov rsi,QWORD PTR[16+rsp] DB 0F3h,0C3h ;repret -$L$SEH_end_bn_sqr4x_mont:: -bn_sqr4x_mont ENDP +$L$SEH_end_bn_mulx4x_mont:: +bn_mulx4x_mont ENDP DB 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105 DB 112,108,105,99,97,116,105,111,110,32,102,111,114,32,120,56 DB 54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83 @@ -1492,18 +1188,22 @@ sqr_handler PROC PRIVATE mov rax,QWORD PTR[120+r8] mov rbx,QWORD PTR[248+r8] - lea r10,QWORD PTR[$L$sqr4x_body] + mov rsi,QWORD PTR[8+r9] + mov r11,QWORD PTR[56+r9] + + mov r10d,DWORD PTR[r11] + lea r10,QWORD PTR[r10*1+rsi] cmp rbx,r10 jb $L$common_seh_tail mov rax,QWORD PTR[152+r8] - lea r10,QWORD PTR[$L$sqr4x_epilogue] + mov r10d,DWORD PTR[4+r11] + lea r10,QWORD PTR[r10*1+rsi] cmp rbx,r10 jae $L$common_seh_tail - mov rax,QWORD PTR[56+rax] - lea rax,QWORD PTR[48+rax] + mov rax,QWORD PTR[40+rax] mov rbx,QWORD PTR[((-8))+rax] mov rbp,QWORD PTR[((-16))+rax] @@ -1530,7 +1230,6 @@ $L$common_seh_tail:: mov ecx,154 DD 0a548f3fch - mov rsi,r9 xor rcx,rcx mov rdx,QWORD PTR[8+rsi] @@ -1570,10 +1269,12 @@ ALIGN 4 DD imagerel $L$SEH_end_bn_mul4x_mont DD imagerel $L$SEH_info_bn_mul4x_mont - DD imagerel $L$SEH_begin_bn_sqr4x_mont - DD imagerel $L$SEH_end_bn_sqr4x_mont - DD imagerel $L$SEH_info_bn_sqr4x_mont - + DD imagerel $L$SEH_begin_bn_sqr8x_mont + DD imagerel $L$SEH_end_bn_sqr8x_mont + DD imagerel $L$SEH_info_bn_sqr8x_mont + DD imagerel $L$SEH_begin_bn_mulx4x_mont + DD imagerel $L$SEH_end_bn_mulx4x_mont + DD imagerel $L$SEH_info_bn_mulx4x_mont .pdata ENDS .xdata SEGMENT READONLY ALIGN(8) ALIGN 8 @@ -1581,15 +1282,18 @@ $L$SEH_info_bn_mul_mont:: DB 9,0,0,0 DD imagerel mul_handler DD imagerel $L$mul_body,imagerel $L$mul_epilogue - $L$SEH_info_bn_mul4x_mont:: DB 9,0,0,0 DD imagerel mul_handler DD imagerel $L$mul4x_body,imagerel $L$mul4x_epilogue - -$L$SEH_info_bn_sqr4x_mont:: +$L$SEH_info_bn_sqr8x_mont:: +DB 9,0,0,0 + DD imagerel sqr_handler + DD imagerel $L$sqr8x_body,imagerel $L$sqr8x_epilogue +$L$SEH_info_bn_mulx4x_mont:: DB 9,0,0,0 DD imagerel sqr_handler + DD imagerel $L$mulx4x_body,imagerel $L$mulx4x_epilogue .xdata ENDS END diff --git a/deps/openssl/asm/x64-win32-masm/bn/x86_64-mont5.asm b/deps/openssl/asm/x64-win32-masm/bn/x86_64-mont5.asm index e7107f0af7c616..64a1b42cfeedcb 100644 --- a/deps/openssl/asm/x64-win32-masm/bn/x86_64-mont5.asm +++ b/deps/openssl/asm/x64-win32-masm/bn/x86_64-mont5.asm @@ -1,5 +1,7 @@ OPTION DOTNAME -.text$ SEGMENT ALIGN(64) 'CODE' +.text$ SEGMENT ALIGN(256) 'CODE' + +EXTERN OPENSSL_ia32cap_P:NEAR PUBLIC bn_mul_mont_gather5 @@ -17,15 +19,15 @@ $L$SEH_begin_bn_mul_mont_gather5:: mov r9,QWORD PTR[48+rsp] - test r9d,3 + test r9d,7 jnz $L$mul_enter - cmp r9d,8 - jb $L$mul_enter + mov r11d,DWORD PTR[((OPENSSL_ia32cap_P+8))] jmp $L$mul4x_enter ALIGN 16 $L$mul_enter:: mov r9d,r9d + mov rax,rsp mov r10d,DWORD PTR[56+rsp] push rbx push rbp @@ -36,8 +38,6 @@ $L$mul_enter:: lea rsp,QWORD PTR[((-40))+rsp] movaps XMMWORD PTR[rsp],xmm6 movaps XMMWORD PTR[16+rsp],xmm7 -$L$mul_alloca:: - mov rax,rsp lea r11,QWORD PTR[2+r9] neg r11 lea rsp,QWORD PTR[r11*8+rsp] @@ -238,7 +238,7 @@ DB 102,72,15,126,195 lea r14,QWORD PTR[1+r14] cmp r14,r9 - jl $L$outer + jb $L$outer xor r14,r14 mov rax,QWORD PTR[rsp] @@ -272,16 +272,15 @@ $L$copy:: mov rsi,QWORD PTR[8+r9*8+rsp] mov rax,1 - movaps xmm6,XMMWORD PTR[rsi] - movaps xmm7,XMMWORD PTR[16+rsi] - lea rsi,QWORD PTR[40+rsi] - mov r15,QWORD PTR[rsi] - mov r14,QWORD PTR[8+rsi] - mov r13,QWORD PTR[16+rsi] - mov r12,QWORD PTR[24+rsi] - mov rbp,QWORD PTR[32+rsi] - mov rbx,QWORD PTR[40+rsi] - lea rsp,QWORD PTR[48+rsi] + movaps xmm6,XMMWORD PTR[((-88))+rsi] + movaps xmm7,XMMWORD PTR[((-72))+rsi] + mov r15,QWORD PTR[((-48))+rsi] + mov r14,QWORD PTR[((-40))+rsi] + mov r13,QWORD PTR[((-32))+rsi] + mov r12,QWORD PTR[((-24))+rsi] + mov rbp,QWORD PTR[((-16))+rsi] + mov rbx,QWORD PTR[((-8))+rsi] + lea rsp,QWORD PTR[rsi] $L$mul_epilogue:: mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue mov rsi,QWORD PTR[16+rsp] @@ -289,7 +288,7 @@ $L$mul_epilogue:: $L$SEH_end_bn_mul_mont_gather5:: bn_mul_mont_gather5 ENDP -ALIGN 16 +ALIGN 32 bn_mul4x_mont_gather5 PROC PRIVATE mov QWORD PTR[8+rsp],rdi ;WIN64 prologue mov QWORD PTR[16+rsp],rsi @@ -304,8 +303,11 @@ $L$SEH_begin_bn_mul4x_mont_gather5:: $L$mul4x_enter:: - mov r9d,r9d - mov r10d,DWORD PTR[56+rsp] + and r11d,080100h + cmp r11d,080100h + je $L$mulx4x_enter +DB 067h + mov rax,rsp push rbx push rbp push r12 @@ -315,580 +317,2822 @@ $L$mul4x_enter:: lea rsp,QWORD PTR[((-40))+rsp] movaps XMMWORD PTR[rsp],xmm6 movaps XMMWORD PTR[16+rsp],xmm7 -$L$mul4x_alloca:: - mov rax,rsp - lea r11,QWORD PTR[4+r9] - neg r11 - lea rsp,QWORD PTR[r11*8+rsp] - and rsp,-1024 +DB 067h + mov r10d,r9d + shl r9d,3 + shl r10d,3+2 + neg r9 - mov QWORD PTR[8+r9*8+rsp],rax + + + + + + + + lea r11,QWORD PTR[((-64))+r9*2+rsp] + sub r11,rsi + and r11,4095 + cmp r10,r11 + jb $L$mul4xsp_alt + sub rsp,r11 + lea rsp,QWORD PTR[((-64))+r9*2+rsp] + jmp $L$mul4xsp_done + +ALIGN 32 +$L$mul4xsp_alt:: + lea r10,QWORD PTR[((4096-64))+r9*2] + lea rsp,QWORD PTR[((-64))+r9*2+rsp] + sub r11,r10 + mov r10,0 + cmovc r11,r10 + sub rsp,r11 +$L$mul4xsp_done:: + and rsp,-64 + neg r9 + + mov QWORD PTR[40+rsp],rax $L$mul4x_body:: - mov QWORD PTR[16+r9*8+rsp],rdi - mov r12,rdx + + call mul4x_internal + + mov rsi,QWORD PTR[40+rsp] + mov rax,1 + movaps xmm6,XMMWORD PTR[((-88))+rsi] + movaps xmm7,XMMWORD PTR[((-72))+rsi] + mov r15,QWORD PTR[((-48))+rsi] + mov r14,QWORD PTR[((-40))+rsi] + mov r13,QWORD PTR[((-32))+rsi] + mov r12,QWORD PTR[((-24))+rsi] + mov rbp,QWORD PTR[((-16))+rsi] + mov rbx,QWORD PTR[((-8))+rsi] + lea rsp,QWORD PTR[rsi] +$L$mul4x_epilogue:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_bn_mul4x_mont_gather5:: +bn_mul4x_mont_gather5 ENDP + + +ALIGN 32 +mul4x_internal PROC PRIVATE + shl r9,5 + mov r10d,DWORD PTR[56+rax] + lea r13,QWORD PTR[256+r9*1+rdx] + shr r9,5 mov r11,r10 shr r10,3 and r11,7 not r10 lea rax,QWORD PTR[$L$magic_masks] and r10,3 - lea r12,QWORD PTR[96+r11*8+r12] + lea r12,QWORD PTR[96+r11*8+rdx] movq xmm4,QWORD PTR[r10*8+rax] movq xmm5,QWORD PTR[8+r10*8+rax] + add r11,7 movq xmm6,QWORD PTR[16+r10*8+rax] movq xmm7,QWORD PTR[24+r10*8+rax] + and r11,7 movq xmm0,QWORD PTR[((-96))+r12] + lea r14,QWORD PTR[256+r12] movq xmm1,QWORD PTR[((-32))+r12] pand xmm0,xmm4 movq xmm2,QWORD PTR[32+r12] pand xmm1,xmm5 movq xmm3,QWORD PTR[96+r12] pand xmm2,xmm6 +DB 067h por xmm0,xmm1 + movq xmm1,QWORD PTR[((-96))+r14] +DB 067h pand xmm3,xmm7 +DB 067h por xmm0,xmm2 - lea r12,QWORD PTR[256+r12] + movq xmm2,QWORD PTR[((-32))+r14] +DB 067h + pand xmm1,xmm4 +DB 067h por xmm0,xmm3 + movq xmm3,QWORD PTR[32+r14] DB 102,72,15,126,195 + movq xmm0,QWORD PTR[96+r14] + mov QWORD PTR[((16+8))+rsp],r13 + mov QWORD PTR[((56+8))+rsp],rdi + mov r8,QWORD PTR[r8] mov rax,QWORD PTR[rsi] - - xor r14,r14 - xor r15,r15 - - movq xmm0,QWORD PTR[((-96))+r12] - movq xmm1,QWORD PTR[((-32))+r12] - pand xmm0,xmm4 - movq xmm2,QWORD PTR[32+r12] - pand xmm1,xmm5 + lea rsi,QWORD PTR[r9*1+rsi] + neg r9 mov rbp,r8 mul rbx mov r10,rax mov rax,QWORD PTR[rcx] - movq xmm3,QWORD PTR[96+r12] - pand xmm2,xmm6 - por xmm0,xmm1 - pand xmm3,xmm7 + pand xmm2,xmm5 + pand xmm3,xmm6 + por xmm1,xmm2 imul rbp,r10 + + + + + + + + lea r14,QWORD PTR[((64+8))+r11*8+rsp] mov r11,rdx - por xmm0,xmm2 - lea r12,QWORD PTR[256+r12] - por xmm0,xmm3 + pand xmm0,xmm7 + por xmm1,xmm3 + lea r12,QWORD PTR[512+r12] + por xmm0,xmm1 mul rbp add r10,rax - mov rax,QWORD PTR[8+rsi] + mov rax,QWORD PTR[8+r9*1+rsi] adc rdx,0 mov rdi,rdx mul rbx add r11,rax - mov rax,QWORD PTR[8+rcx] + mov rax,QWORD PTR[16+rcx] adc rdx,0 mov r10,rdx mul rbp add rdi,rax - mov rax,QWORD PTR[16+rsi] + mov rax,QWORD PTR[16+r9*1+rsi] adc rdx,0 add rdi,r11 - lea r15,QWORD PTR[4+r15] + lea r15,QWORD PTR[32+r9] + lea rcx,QWORD PTR[64+rcx] adc rdx,0 - mov QWORD PTR[rsp],rdi + mov QWORD PTR[r14],rdi mov r13,rdx jmp $L$1st4x -ALIGN 16 + +ALIGN 32 $L$1st4x:: mul rbx add r10,rax - mov rax,QWORD PTR[((-16))+r15*8+rcx] + mov rax,QWORD PTR[((-32))+rcx] + lea r14,QWORD PTR[32+r14] adc rdx,0 mov r11,rdx mul rbp add r13,rax - mov rax,QWORD PTR[((-8))+r15*8+rsi] + mov rax,QWORD PTR[((-8))+r15*1+rsi] adc rdx,0 add r13,r10 adc rdx,0 - mov QWORD PTR[((-24))+r15*8+rsp],r13 + mov QWORD PTR[((-24))+r14],r13 mov rdi,rdx mul rbx add r11,rax - mov rax,QWORD PTR[((-8))+r15*8+rcx] + mov rax,QWORD PTR[((-16))+rcx] adc rdx,0 mov r10,rdx mul rbp add rdi,rax - mov rax,QWORD PTR[r15*8+rsi] + mov rax,QWORD PTR[r15*1+rsi] adc rdx,0 add rdi,r11 adc rdx,0 - mov QWORD PTR[((-16))+r15*8+rsp],rdi + mov QWORD PTR[((-16))+r14],rdi mov r13,rdx mul rbx add r10,rax - mov rax,QWORD PTR[r15*8+rcx] + mov rax,QWORD PTR[rcx] adc rdx,0 mov r11,rdx mul rbp add r13,rax - mov rax,QWORD PTR[8+r15*8+rsi] + mov rax,QWORD PTR[8+r15*1+rsi] adc rdx,0 add r13,r10 adc rdx,0 - mov QWORD PTR[((-8))+r15*8+rsp],r13 + mov QWORD PTR[((-8))+r14],r13 mov rdi,rdx mul rbx add r11,rax - mov rax,QWORD PTR[8+r15*8+rcx] + mov rax,QWORD PTR[16+rcx] adc rdx,0 - lea r15,QWORD PTR[4+r15] mov r10,rdx mul rbp add rdi,rax - mov rax,QWORD PTR[((-16))+r15*8+rsi] + mov rax,QWORD PTR[16+r15*1+rsi] adc rdx,0 add rdi,r11 + lea rcx,QWORD PTR[64+rcx] adc rdx,0 - mov QWORD PTR[((-32))+r15*8+rsp],rdi + mov QWORD PTR[r14],rdi mov r13,rdx - cmp r15,r9 - jl $L$1st4x + + add r15,32 + jnz $L$1st4x mul rbx add r10,rax - mov rax,QWORD PTR[((-16))+r15*8+rcx] + mov rax,QWORD PTR[((-32))+rcx] + lea r14,QWORD PTR[32+r14] adc rdx,0 mov r11,rdx mul rbp add r13,rax - mov rax,QWORD PTR[((-8))+r15*8+rsi] + mov rax,QWORD PTR[((-8))+rsi] adc rdx,0 add r13,r10 adc rdx,0 - mov QWORD PTR[((-24))+r15*8+rsp],r13 + mov QWORD PTR[((-24))+r14],r13 mov rdi,rdx mul rbx add r11,rax - mov rax,QWORD PTR[((-8))+r15*8+rcx] + mov rax,QWORD PTR[((-16))+rcx] adc rdx,0 mov r10,rdx mul rbp add rdi,rax - mov rax,QWORD PTR[rsi] + mov rax,QWORD PTR[r9*1+rsi] adc rdx,0 add rdi,r11 adc rdx,0 - mov QWORD PTR[((-16))+r15*8+rsp],rdi + mov QWORD PTR[((-16))+r14],rdi mov r13,rdx DB 102,72,15,126,195 + lea rcx,QWORD PTR[r9*2+rcx] xor rdi,rdi add r13,r10 adc rdi,0 - mov QWORD PTR[((-8))+r15*8+rsp],r13 - mov QWORD PTR[r15*8+rsp],rdi + mov QWORD PTR[((-8))+r14],r13 - lea r14,QWORD PTR[1+r14] -ALIGN 4 -$L$outer4x:: - xor r15,r15 - movq xmm0,QWORD PTR[((-96))+r12] - movq xmm1,QWORD PTR[((-32))+r12] - pand xmm0,xmm4 - movq xmm2,QWORD PTR[32+r12] - pand xmm1,xmm5 + jmp $L$outer4x - mov r10,QWORD PTR[rsp] +ALIGN 32 +$L$outer4x:: + mov r10,QWORD PTR[r9*1+r14] mov rbp,r8 mul rbx add r10,rax mov rax,QWORD PTR[rcx] adc rdx,0 + movq xmm0,QWORD PTR[((-96))+r12] + movq xmm1,QWORD PTR[((-32))+r12] + pand xmm0,xmm4 + movq xmm2,QWORD PTR[32+r12] + pand xmm1,xmm5 movq xmm3,QWORD PTR[96+r12] - pand xmm2,xmm6 - por xmm0,xmm1 - pand xmm3,xmm7 imul rbp,r10 +DB 067h mov r11,rdx + mov QWORD PTR[r14],rdi + pand xmm2,xmm6 + por xmm0,xmm1 + pand xmm3,xmm7 por xmm0,xmm2 + lea r14,QWORD PTR[r9*1+r14] lea r12,QWORD PTR[256+r12] por xmm0,xmm3 mul rbp add r10,rax - mov rax,QWORD PTR[8+rsi] + mov rax,QWORD PTR[8+r9*1+rsi] adc rdx,0 mov rdi,rdx mul rbx add r11,rax - mov rax,QWORD PTR[8+rcx] + mov rax,QWORD PTR[16+rcx] adc rdx,0 - add r11,QWORD PTR[8+rsp] + add r11,QWORD PTR[8+r14] adc rdx,0 mov r10,rdx mul rbp add rdi,rax - mov rax,QWORD PTR[16+rsi] + mov rax,QWORD PTR[16+r9*1+rsi] adc rdx,0 add rdi,r11 - lea r15,QWORD PTR[4+r15] + lea r15,QWORD PTR[32+r9] + lea rcx,QWORD PTR[64+rcx] adc rdx,0 mov r13,rdx jmp $L$inner4x -ALIGN 16 + +ALIGN 32 $L$inner4x:: mul rbx add r10,rax - mov rax,QWORD PTR[((-16))+r15*8+rcx] + mov rax,QWORD PTR[((-32))+rcx] adc rdx,0 - add r10,QWORD PTR[((-16))+r15*8+rsp] + add r10,QWORD PTR[16+r14] + lea r14,QWORD PTR[32+r14] adc rdx,0 mov r11,rdx mul rbp add r13,rax - mov rax,QWORD PTR[((-8))+r15*8+rsi] + mov rax,QWORD PTR[((-8))+r15*1+rsi] adc rdx,0 add r13,r10 adc rdx,0 - mov QWORD PTR[((-32))+r15*8+rsp],rdi + mov QWORD PTR[((-32))+r14],rdi mov rdi,rdx mul rbx add r11,rax - mov rax,QWORD PTR[((-8))+r15*8+rcx] + mov rax,QWORD PTR[((-16))+rcx] adc rdx,0 - add r11,QWORD PTR[((-8))+r15*8+rsp] + add r11,QWORD PTR[((-8))+r14] adc rdx,0 mov r10,rdx mul rbp add rdi,rax - mov rax,QWORD PTR[r15*8+rsi] + mov rax,QWORD PTR[r15*1+rsi] adc rdx,0 add rdi,r11 adc rdx,0 - mov QWORD PTR[((-24))+r15*8+rsp],r13 + mov QWORD PTR[((-24))+r14],r13 mov r13,rdx mul rbx add r10,rax - mov rax,QWORD PTR[r15*8+rcx] + mov rax,QWORD PTR[rcx] adc rdx,0 - add r10,QWORD PTR[r15*8+rsp] + add r10,QWORD PTR[r14] adc rdx,0 mov r11,rdx mul rbp add r13,rax - mov rax,QWORD PTR[8+r15*8+rsi] + mov rax,QWORD PTR[8+r15*1+rsi] adc rdx,0 add r13,r10 adc rdx,0 - mov QWORD PTR[((-16))+r15*8+rsp],rdi + mov QWORD PTR[((-16))+r14],rdi mov rdi,rdx mul rbx add r11,rax - mov rax,QWORD PTR[8+r15*8+rcx] + mov rax,QWORD PTR[16+rcx] adc rdx,0 - add r11,QWORD PTR[8+r15*8+rsp] + add r11,QWORD PTR[8+r14] adc rdx,0 - lea r15,QWORD PTR[4+r15] mov r10,rdx mul rbp add rdi,rax - mov rax,QWORD PTR[((-16))+r15*8+rsi] + mov rax,QWORD PTR[16+r15*1+rsi] adc rdx,0 add rdi,r11 + lea rcx,QWORD PTR[64+rcx] adc rdx,0 - mov QWORD PTR[((-40))+r15*8+rsp],r13 + mov QWORD PTR[((-8))+r14],r13 mov r13,rdx - cmp r15,r9 - jl $L$inner4x + + add r15,32 + jnz $L$inner4x mul rbx add r10,rax - mov rax,QWORD PTR[((-16))+r15*8+rcx] + mov rax,QWORD PTR[((-32))+rcx] adc rdx,0 - add r10,QWORD PTR[((-16))+r15*8+rsp] + add r10,QWORD PTR[16+r14] + lea r14,QWORD PTR[32+r14] adc rdx,0 mov r11,rdx mul rbp add r13,rax - mov rax,QWORD PTR[((-8))+r15*8+rsi] + mov rax,QWORD PTR[((-8))+rsi] adc rdx,0 add r13,r10 adc rdx,0 - mov QWORD PTR[((-32))+r15*8+rsp],rdi + mov QWORD PTR[((-32))+r14],rdi mov rdi,rdx mul rbx add r11,rax - mov rax,QWORD PTR[((-8))+r15*8+rcx] + mov rax,rbp + mov rbp,QWORD PTR[((-16))+rcx] adc rdx,0 - add r11,QWORD PTR[((-8))+r15*8+rsp] + add r11,QWORD PTR[((-8))+r14] adc rdx,0 - lea r14,QWORD PTR[1+r14] mov r10,rdx mul rbp add rdi,rax - mov rax,QWORD PTR[rsi] + mov rax,QWORD PTR[r9*1+rsi] adc rdx,0 add rdi,r11 adc rdx,0 - mov QWORD PTR[((-24))+r15*8+rsp],r13 + mov QWORD PTR[((-24))+r14],r13 mov r13,rdx DB 102,72,15,126,195 - mov QWORD PTR[((-16))+r15*8+rsp],rdi + mov QWORD PTR[((-16))+r14],rdi + lea rcx,QWORD PTR[r9*2+rcx] xor rdi,rdi add r13,r10 adc rdi,0 - add r13,QWORD PTR[r9*8+rsp] + add r13,QWORD PTR[r14] adc rdi,0 - mov QWORD PTR[((-8))+r15*8+rsp],r13 - mov QWORD PTR[r15*8+rsp],rdi + mov QWORD PTR[((-8))+r14],r13 + + cmp r12,QWORD PTR[((16+8))+rsp] + jb $L$outer4x + sub rbp,r13 + adc r15,r15 + or rdi,r15 + xor rdi,1 + lea rbx,QWORD PTR[r9*1+r14] + lea rbp,QWORD PTR[rdi*8+rcx] + mov rcx,r9 + sar rcx,3+2 + mov rdi,QWORD PTR[((56+8))+rsp] + jmp $L$sqr4x_sub +mul4x_internal ENDP +PUBLIC bn_power5 + +ALIGN 32 +bn_power5 PROC PUBLIC + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_bn_power5:: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + mov rcx,r9 + mov r8,QWORD PTR[40+rsp] + mov r9,QWORD PTR[48+rsp] - cmp r14,r9 - jl $L$outer4x - mov rdi,QWORD PTR[16+r9*8+rsp] - mov rax,QWORD PTR[rsp] - pxor xmm0,xmm0 - mov rdx,QWORD PTR[8+rsp] - shr r9,2 - lea rsi,QWORD PTR[rsp] - xor r14,r14 - sub rax,QWORD PTR[rcx] - mov rbx,QWORD PTR[16+rsi] - mov rbp,QWORD PTR[24+rsi] - sbb rdx,QWORD PTR[8+rcx] - lea r15,QWORD PTR[((-1))+r9] - jmp $L$sub4x -ALIGN 16 -$L$sub4x:: - mov QWORD PTR[r14*8+rdi],rax - mov QWORD PTR[8+r14*8+rdi],rdx - sbb rbx,QWORD PTR[16+r14*8+rcx] - mov rax,QWORD PTR[32+r14*8+rsi] - mov rdx,QWORD PTR[40+r14*8+rsi] - sbb rbp,QWORD PTR[24+r14*8+rcx] - mov QWORD PTR[16+r14*8+rdi],rbx - mov QWORD PTR[24+r14*8+rdi],rbp - sbb rax,QWORD PTR[32+r14*8+rcx] - mov rbx,QWORD PTR[48+r14*8+rsi] - mov rbp,QWORD PTR[56+r14*8+rsi] - sbb rdx,QWORD PTR[40+r14*8+rcx] - lea r14,QWORD PTR[4+r14] - dec r15 - jnz $L$sub4x + mov r11d,DWORD PTR[((OPENSSL_ia32cap_P+8))] + and r11d,080100h + cmp r11d,080100h + je $L$powerx5_enter + mov rax,rsp + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + lea rsp,QWORD PTR[((-40))+rsp] + movaps XMMWORD PTR[rsp],xmm6 + movaps XMMWORD PTR[16+rsp],xmm7 + mov r10d,r9d + shl r9d,3 + shl r10d,3+2 + neg r9 + mov r8,QWORD PTR[r8] - mov QWORD PTR[r14*8+rdi],rax - mov rax,QWORD PTR[32+r14*8+rsi] - sbb rbx,QWORD PTR[16+r14*8+rcx] - mov QWORD PTR[8+r14*8+rdi],rdx - sbb rbp,QWORD PTR[24+r14*8+rcx] - mov QWORD PTR[16+r14*8+rdi],rbx - sbb rax,0 - mov QWORD PTR[24+r14*8+rdi],rbp - xor r14,r14 - and rsi,rax - not rax - mov rcx,rdi - and rcx,rax - lea r15,QWORD PTR[((-1))+r9] - or rsi,rcx - movdqu xmm1,XMMWORD PTR[rsi] - movdqa XMMWORD PTR[rsp],xmm0 - movdqu XMMWORD PTR[rdi],xmm1 - jmp $L$copy4x -ALIGN 16 -$L$copy4x:: - movdqu xmm2,XMMWORD PTR[16+r14*1+rsi] - movdqu xmm1,XMMWORD PTR[32+r14*1+rsi] - movdqa XMMWORD PTR[16+r14*1+rsp],xmm0 - movdqu XMMWORD PTR[16+r14*1+rdi],xmm2 - movdqa XMMWORD PTR[32+r14*1+rsp],xmm0 - movdqu XMMWORD PTR[32+r14*1+rdi],xmm1 - lea r14,QWORD PTR[32+r14] - dec r15 - jnz $L$copy4x - shl r9,2 - movdqu xmm2,XMMWORD PTR[16+r14*1+rsi] - movdqa XMMWORD PTR[16+r14*1+rsp],xmm0 - movdqu XMMWORD PTR[16+r14*1+rdi],xmm2 - mov rsi,QWORD PTR[8+r9*8+rsp] + + + + lea r11,QWORD PTR[((-64))+r9*2+rsp] + sub r11,rsi + and r11,4095 + cmp r10,r11 + jb $L$pwr_sp_alt + sub rsp,r11 + lea rsp,QWORD PTR[((-64))+r9*2+rsp] + jmp $L$pwr_sp_done + +ALIGN 32 +$L$pwr_sp_alt:: + lea r10,QWORD PTR[((4096-64))+r9*2] + lea rsp,QWORD PTR[((-64))+r9*2+rsp] + sub r11,r10 + mov r10,0 + cmovc r11,r10 + sub rsp,r11 +$L$pwr_sp_done:: + and rsp,-64 + mov r10,r9 + neg r9 + + + + + + + + + + + mov QWORD PTR[32+rsp],r8 + mov QWORD PTR[40+rsp],rax +$L$power5_body:: +DB 102,72,15,110,207 +DB 102,72,15,110,209 +DB 102,73,15,110,218 +DB 102,72,15,110,226 + + call __bn_sqr8x_internal + call __bn_sqr8x_internal + call __bn_sqr8x_internal + call __bn_sqr8x_internal + call __bn_sqr8x_internal + +DB 102,72,15,126,209 +DB 102,72,15,126,226 + mov rdi,rsi + mov rax,QWORD PTR[40+rsp] + lea r8,QWORD PTR[32+rsp] + + call mul4x_internal + + mov rsi,QWORD PTR[40+rsp] mov rax,1 - movaps xmm6,XMMWORD PTR[rsi] - movaps xmm7,XMMWORD PTR[16+rsi] - lea rsi,QWORD PTR[40+rsi] - mov r15,QWORD PTR[rsi] - mov r14,QWORD PTR[8+rsi] - mov r13,QWORD PTR[16+rsi] - mov r12,QWORD PTR[24+rsi] - mov rbp,QWORD PTR[32+rsi] - mov rbx,QWORD PTR[40+rsi] - lea rsp,QWORD PTR[48+rsi] -$L$mul4x_epilogue:: + mov r15,QWORD PTR[((-48))+rsi] + mov r14,QWORD PTR[((-40))+rsi] + mov r13,QWORD PTR[((-32))+rsi] + mov r12,QWORD PTR[((-24))+rsi] + mov rbp,QWORD PTR[((-16))+rsi] + mov rbx,QWORD PTR[((-8))+rsi] + lea rsp,QWORD PTR[rsi] +$L$power5_epilogue:: mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue mov rsi,QWORD PTR[16+rsp] DB 0F3h,0C3h ;repret -$L$SEH_end_bn_mul4x_mont_gather5:: -bn_mul4x_mont_gather5 ENDP -PUBLIC bn_scatter5 +$L$SEH_end_bn_power5:: +bn_power5 ENDP -ALIGN 16 -bn_scatter5 PROC PUBLIC - cmp rdx,0 - jz $L$scatter_epilogue - lea r8,QWORD PTR[r9*8+r8] -$L$scatter:: - mov rax,QWORD PTR[rcx] - lea rcx,QWORD PTR[8+rcx] - mov QWORD PTR[r8],rax - lea r8,QWORD PTR[256+r8] - sub rdx,1 - jnz $L$scatter -$L$scatter_epilogue:: - DB 0F3h,0C3h ;repret -bn_scatter5 ENDP +PUBLIC bn_sqr8x_internal -PUBLIC bn_gather5 -ALIGN 16 -bn_gather5 PROC PUBLIC -$L$SEH_begin_bn_gather5:: +ALIGN 32 +bn_sqr8x_internal PROC PUBLIC +__bn_sqr8x_internal:: -DB 048h,083h,0ech,028h -DB 00fh,029h,034h,024h -DB 00fh,029h,07ch,024h,010h - mov r11,r9 - shr r9,3 - and r11,7 - not r9 - lea rax,QWORD PTR[$L$magic_masks] - and r9,3 - lea r8,QWORD PTR[96+r11*8+r8] - movq xmm4,QWORD PTR[r9*8+rax] - movq xmm5,QWORD PTR[8+r9*8+rax] - movq xmm6,QWORD PTR[16+r9*8+rax] - movq xmm7,QWORD PTR[24+r9*8+rax] - jmp $L$gather -ALIGN 16 -$L$gather:: - movq xmm0,QWORD PTR[((-96))+r8] - movq xmm1,QWORD PTR[((-32))+r8] - pand xmm0,xmm4 - movq xmm2,QWORD PTR[32+r8] - pand xmm1,xmm5 - movq xmm3,QWORD PTR[96+r8] - pand xmm2,xmm6 - por xmm0,xmm1 - pand xmm3,xmm7 - por xmm0,xmm2 - lea r8,QWORD PTR[256+r8] - por xmm0,xmm3 - movq QWORD PTR[rcx],xmm0 - lea rcx,QWORD PTR[8+rcx] - sub rdx,1 - jnz $L$gather - movaps xmm6,XMMWORD PTR[rsp] - movaps xmm7,XMMWORD PTR[16+rsp] - lea rsp,QWORD PTR[40+rsp] - DB 0F3h,0C3h ;repret -$L$SEH_end_bn_gather5:: -bn_gather5 ENDP -ALIGN 64 -$L$magic_masks:: - DD 0,0,0,0,0,0,-1,-1 - DD 0,0,0,0,0,0,0,0 -DB 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105 -DB 112,108,105,99,97,116,105,111,110,32,119,105,116,104,32,115 -DB 99,97,116,116,101,114,47,103,97,116,104,101,114,32,102,111 -DB 114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79 -DB 71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111 -DB 112,101,110,115,115,108,46,111,114,103,62,0 -EXTERN __imp_RtlVirtualUnwind:NEAR -ALIGN 16 -mul_handler PROC PRIVATE - push rsi - push rdi - push rbx - push rbp - push r12 - push r13 - push r14 - push r15 - pushfq - sub rsp,64 - mov rax,QWORD PTR[120+r8] - mov rbx,QWORD PTR[248+r8] - mov rsi,QWORD PTR[8+r9] - mov r11,QWORD PTR[56+r9] - mov r10d,DWORD PTR[r11] - lea r10,QWORD PTR[r10*1+rsi] - cmp rbx,r10 - jb $L$common_seh_tail - lea rax,QWORD PTR[88+rax] - mov r10d,DWORD PTR[4+r11] - lea r10,QWORD PTR[r10*1+rsi] - cmp rbx,r10 - jb $L$common_seh_tail - mov rax,QWORD PTR[152+r8] - mov r10d,DWORD PTR[8+r11] - lea r10,QWORD PTR[r10*1+rsi] - cmp rbx,r10 - jae $L$common_seh_tail + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + lea rbp,QWORD PTR[32+r10] + lea rsi,QWORD PTR[r9*1+rsi] + + mov rcx,r9 + + + mov r14,QWORD PTR[((-32))+rbp*1+rsi] + lea rdi,QWORD PTR[((48+8))+r9*2+rsp] + mov rax,QWORD PTR[((-24))+rbp*1+rsi] + lea rdi,QWORD PTR[((-32))+rbp*1+rdi] + mov rbx,QWORD PTR[((-16))+rbp*1+rsi] + mov r15,rax + + mul r14 + mov r10,rax + mov rax,rbx + mov r11,rdx + mov QWORD PTR[((-24))+rbp*1+rdi],r10 + + mul r14 + add r11,rax + mov rax,rbx + adc rdx,0 + mov QWORD PTR[((-16))+rbp*1+rdi],r11 + mov r10,rdx + + + mov rbx,QWORD PTR[((-8))+rbp*1+rsi] + mul r15 + mov r12,rax + mov rax,rbx + mov r13,rdx + + lea rcx,QWORD PTR[rbp] + mul r14 + add r10,rax + mov rax,rbx + mov r11,rdx + adc r11,0 + add r10,r12 + adc r11,0 + mov QWORD PTR[((-8))+rcx*1+rdi],r10 + jmp $L$sqr4x_1st + +ALIGN 32 +$L$sqr4x_1st:: + mov rbx,QWORD PTR[rcx*1+rsi] + mul r15 + add r13,rax + mov rax,rbx + mov r12,rdx + adc r12,0 + + mul r14 + add r11,rax + mov rax,rbx + mov rbx,QWORD PTR[8+rcx*1+rsi] + mov r10,rdx + adc r10,0 + add r11,r13 + adc r10,0 + + + mul r15 + add r12,rax + mov rax,rbx + mov QWORD PTR[rcx*1+rdi],r11 + mov r13,rdx + adc r13,0 + + mul r14 + add r10,rax + mov rax,rbx + mov rbx,QWORD PTR[16+rcx*1+rsi] + mov r11,rdx + adc r11,0 + add r10,r12 + adc r11,0 + + mul r15 + add r13,rax + mov rax,rbx + mov QWORD PTR[8+rcx*1+rdi],r10 + mov r12,rdx + adc r12,0 + + mul r14 + add r11,rax + mov rax,rbx + mov rbx,QWORD PTR[24+rcx*1+rsi] + mov r10,rdx + adc r10,0 + add r11,r13 + adc r10,0 + + + mul r15 + add r12,rax + mov rax,rbx + mov QWORD PTR[16+rcx*1+rdi],r11 + mov r13,rdx + adc r13,0 + lea rcx,QWORD PTR[32+rcx] + + mul r14 + add r10,rax + mov rax,rbx + mov r11,rdx + adc r11,0 + add r10,r12 + adc r11,0 + mov QWORD PTR[((-8))+rcx*1+rdi],r10 + + cmp rcx,0 + jne $L$sqr4x_1st + + mul r15 + add r13,rax + lea rbp,QWORD PTR[16+rbp] + adc rdx,0 + add r13,r11 + adc rdx,0 + + mov QWORD PTR[rdi],r13 + mov r12,rdx + mov QWORD PTR[8+rdi],rdx + jmp $L$sqr4x_outer + +ALIGN 32 +$L$sqr4x_outer:: + mov r14,QWORD PTR[((-32))+rbp*1+rsi] + lea rdi,QWORD PTR[((48+8))+r9*2+rsp] + mov rax,QWORD PTR[((-24))+rbp*1+rsi] + lea rdi,QWORD PTR[((-32))+rbp*1+rdi] + mov rbx,QWORD PTR[((-16))+rbp*1+rsi] + mov r15,rax + + mul r14 + mov r10,QWORD PTR[((-24))+rbp*1+rdi] + add r10,rax + mov rax,rbx + adc rdx,0 + mov QWORD PTR[((-24))+rbp*1+rdi],r10 + mov r11,rdx + + mul r14 + add r11,rax + mov rax,rbx + adc rdx,0 + add r11,QWORD PTR[((-16))+rbp*1+rdi] + mov r10,rdx + adc r10,0 + mov QWORD PTR[((-16))+rbp*1+rdi],r11 + + xor r12,r12 + + mov rbx,QWORD PTR[((-8))+rbp*1+rsi] + mul r15 + add r12,rax + mov rax,rbx + adc rdx,0 + add r12,QWORD PTR[((-8))+rbp*1+rdi] + mov r13,rdx + adc r13,0 + + mul r14 + add r10,rax + mov rax,rbx + adc rdx,0 + add r10,r12 + mov r11,rdx + adc r11,0 + mov QWORD PTR[((-8))+rbp*1+rdi],r10 + + lea rcx,QWORD PTR[rbp] + jmp $L$sqr4x_inner + +ALIGN 32 +$L$sqr4x_inner:: + mov rbx,QWORD PTR[rcx*1+rsi] + mul r15 + add r13,rax + mov rax,rbx + mov r12,rdx + adc r12,0 + add r13,QWORD PTR[rcx*1+rdi] + adc r12,0 + +DB 067h + mul r14 + add r11,rax + mov rax,rbx + mov rbx,QWORD PTR[8+rcx*1+rsi] + mov r10,rdx + adc r10,0 + add r11,r13 + adc r10,0 + + mul r15 + add r12,rax + mov QWORD PTR[rcx*1+rdi],r11 + mov rax,rbx + mov r13,rdx + adc r13,0 + add r12,QWORD PTR[8+rcx*1+rdi] + lea rcx,QWORD PTR[16+rcx] + adc r13,0 + + mul r14 + add r10,rax + mov rax,rbx + adc rdx,0 + add r10,r12 + mov r11,rdx + adc r11,0 + mov QWORD PTR[((-8))+rcx*1+rdi],r10 + + cmp rcx,0 + jne $L$sqr4x_inner + +DB 067h + mul r15 + add r13,rax + adc rdx,0 + add r13,r11 + adc rdx,0 + + mov QWORD PTR[rdi],r13 + mov r12,rdx + mov QWORD PTR[8+rdi],rdx + + add rbp,16 + jnz $L$sqr4x_outer + + + mov r14,QWORD PTR[((-32))+rsi] + lea rdi,QWORD PTR[((48+8))+r9*2+rsp] + mov rax,QWORD PTR[((-24))+rsi] + lea rdi,QWORD PTR[((-32))+rbp*1+rdi] + mov rbx,QWORD PTR[((-16))+rsi] + mov r15,rax + + mul r14 + add r10,rax + mov rax,rbx + mov r11,rdx + adc r11,0 + + mul r14 + add r11,rax + mov rax,rbx + mov QWORD PTR[((-24))+rdi],r10 + mov r10,rdx + adc r10,0 + add r11,r13 + mov rbx,QWORD PTR[((-8))+rsi] + adc r10,0 + + mul r15 + add r12,rax + mov rax,rbx + mov QWORD PTR[((-16))+rdi],r11 + mov r13,rdx + adc r13,0 + + mul r14 + add r10,rax + mov rax,rbx + mov r11,rdx + adc r11,0 + add r10,r12 + adc r11,0 + mov QWORD PTR[((-8))+rdi],r10 + + mul r15 + add r13,rax + mov rax,QWORD PTR[((-16))+rsi] + adc rdx,0 + add r13,r11 + adc rdx,0 + + mov QWORD PTR[rdi],r13 + mov r12,rdx + mov QWORD PTR[8+rdi],rdx + + mul rbx + add rbp,16 + xor r14,r14 + sub rbp,r9 + xor r15,r15 + + add rax,r12 + adc rdx,0 + mov QWORD PTR[8+rdi],rax + mov QWORD PTR[16+rdi],rdx + mov QWORD PTR[24+rdi],r15 + + mov rax,QWORD PTR[((-16))+rbp*1+rsi] + lea rdi,QWORD PTR[((48+8))+rsp] + xor r10,r10 + mov r11,QWORD PTR[8+rdi] + + lea r12,QWORD PTR[r10*2+r14] + shr r10,63 + lea r13,QWORD PTR[r11*2+rcx] + shr r11,63 + or r13,r10 + mov r10,QWORD PTR[16+rdi] + mov r14,r11 + mul rax + neg r15 + mov r11,QWORD PTR[24+rdi] + adc r12,rax + mov rax,QWORD PTR[((-8))+rbp*1+rsi] + mov QWORD PTR[rdi],r12 + adc r13,rdx + + lea rbx,QWORD PTR[r10*2+r14] + mov QWORD PTR[8+rdi],r13 + sbb r15,r15 + shr r10,63 + lea r8,QWORD PTR[r11*2+rcx] + shr r11,63 + or r8,r10 + mov r10,QWORD PTR[32+rdi] + mov r14,r11 + mul rax + neg r15 + mov r11,QWORD PTR[40+rdi] + adc rbx,rax + mov rax,QWORD PTR[rbp*1+rsi] + mov QWORD PTR[16+rdi],rbx + adc r8,rdx + lea rbp,QWORD PTR[16+rbp] + mov QWORD PTR[24+rdi],r8 + sbb r15,r15 + lea rdi,QWORD PTR[64+rdi] + jmp $L$sqr4x_shift_n_add + +ALIGN 32 +$L$sqr4x_shift_n_add:: + lea r12,QWORD PTR[r10*2+r14] + shr r10,63 + lea r13,QWORD PTR[r11*2+rcx] + shr r11,63 + or r13,r10 + mov r10,QWORD PTR[((-16))+rdi] + mov r14,r11 + mul rax + neg r15 + mov r11,QWORD PTR[((-8))+rdi] + adc r12,rax + mov rax,QWORD PTR[((-8))+rbp*1+rsi] + mov QWORD PTR[((-32))+rdi],r12 + adc r13,rdx + + lea rbx,QWORD PTR[r10*2+r14] + mov QWORD PTR[((-24))+rdi],r13 + sbb r15,r15 + shr r10,63 + lea r8,QWORD PTR[r11*2+rcx] + shr r11,63 + or r8,r10 + mov r10,QWORD PTR[rdi] + mov r14,r11 + mul rax + neg r15 + mov r11,QWORD PTR[8+rdi] + adc rbx,rax + mov rax,QWORD PTR[rbp*1+rsi] + mov QWORD PTR[((-16))+rdi],rbx + adc r8,rdx + + lea r12,QWORD PTR[r10*2+r14] + mov QWORD PTR[((-8))+rdi],r8 + sbb r15,r15 + shr r10,63 + lea r13,QWORD PTR[r11*2+rcx] + shr r11,63 + or r13,r10 + mov r10,QWORD PTR[16+rdi] + mov r14,r11 + mul rax + neg r15 + mov r11,QWORD PTR[24+rdi] + adc r12,rax + mov rax,QWORD PTR[8+rbp*1+rsi] + mov QWORD PTR[rdi],r12 + adc r13,rdx + + lea rbx,QWORD PTR[r10*2+r14] + mov QWORD PTR[8+rdi],r13 + sbb r15,r15 + shr r10,63 + lea r8,QWORD PTR[r11*2+rcx] + shr r11,63 + or r8,r10 + mov r10,QWORD PTR[32+rdi] + mov r14,r11 + mul rax + neg r15 + mov r11,QWORD PTR[40+rdi] + adc rbx,rax + mov rax,QWORD PTR[16+rbp*1+rsi] + mov QWORD PTR[16+rdi],rbx + adc r8,rdx + mov QWORD PTR[24+rdi],r8 + sbb r15,r15 + lea rdi,QWORD PTR[64+rdi] + add rbp,32 + jnz $L$sqr4x_shift_n_add + + lea r12,QWORD PTR[r10*2+r14] +DB 067h + shr r10,63 + lea r13,QWORD PTR[r11*2+rcx] + shr r11,63 + or r13,r10 + mov r10,QWORD PTR[((-16))+rdi] + mov r14,r11 + mul rax + neg r15 + mov r11,QWORD PTR[((-8))+rdi] + adc r12,rax + mov rax,QWORD PTR[((-8))+rsi] + mov QWORD PTR[((-32))+rdi],r12 + adc r13,rdx + + lea rbx,QWORD PTR[r10*2+r14] + mov QWORD PTR[((-24))+rdi],r13 + sbb r15,r15 + shr r10,63 + lea r8,QWORD PTR[r11*2+rcx] + shr r11,63 + or r8,r10 + mul rax + neg r15 + adc rbx,rax + adc r8,rdx + mov QWORD PTR[((-16))+rdi],rbx + mov QWORD PTR[((-8))+rdi],r8 +DB 102,72,15,126,213 +sqr8x_reduction:: + xor rax,rax + lea rcx,QWORD PTR[r9*2+rbp] + lea rdx,QWORD PTR[((48+8))+r9*2+rsp] + mov QWORD PTR[((0+8))+rsp],rcx + lea rdi,QWORD PTR[((48+8))+r9*1+rsp] + mov QWORD PTR[((8+8))+rsp],rdx + neg r9 + jmp $L$8x_reduction_loop + +ALIGN 32 +$L$8x_reduction_loop:: + lea rdi,QWORD PTR[r9*1+rdi] +DB 066h + mov rbx,QWORD PTR[rdi] + mov r9,QWORD PTR[8+rdi] + mov r10,QWORD PTR[16+rdi] + mov r11,QWORD PTR[24+rdi] + mov r12,QWORD PTR[32+rdi] + mov r13,QWORD PTR[40+rdi] + mov r14,QWORD PTR[48+rdi] + mov r15,QWORD PTR[56+rdi] + mov QWORD PTR[rdx],rax + lea rdi,QWORD PTR[64+rdi] + +DB 067h + mov r8,rbx + imul rbx,QWORD PTR[((32+8))+rsp] + mov rax,QWORD PTR[rbp] + mov ecx,8 + jmp $L$8x_reduce + +ALIGN 32 +$L$8x_reduce:: + mul rbx + mov rax,QWORD PTR[16+rbp] + neg r8 + mov r8,rdx + adc r8,0 + + mul rbx + add r9,rax + mov rax,QWORD PTR[32+rbp] + adc rdx,0 + add r8,r9 + mov QWORD PTR[((48-8+8))+rcx*8+rsp],rbx + mov r9,rdx + adc r9,0 + + mul rbx + add r10,rax + mov rax,QWORD PTR[48+rbp] + adc rdx,0 + add r9,r10 + mov rsi,QWORD PTR[((32+8))+rsp] + mov r10,rdx + adc r10,0 + + mul rbx + add r11,rax + mov rax,QWORD PTR[64+rbp] + adc rdx,0 + imul rsi,r8 + add r10,r11 + mov r11,rdx + adc r11,0 + + mul rbx + add r12,rax + mov rax,QWORD PTR[80+rbp] + adc rdx,0 + add r11,r12 + mov r12,rdx + adc r12,0 + + mul rbx + add r13,rax + mov rax,QWORD PTR[96+rbp] + adc rdx,0 + add r12,r13 + mov r13,rdx + adc r13,0 + + mul rbx + add r14,rax + mov rax,QWORD PTR[112+rbp] + adc rdx,0 + add r13,r14 + mov r14,rdx + adc r14,0 + + mul rbx + mov rbx,rsi + add r15,rax + mov rax,QWORD PTR[rbp] + adc rdx,0 + add r14,r15 + mov r15,rdx + adc r15,0 + + dec ecx + jnz $L$8x_reduce + + lea rbp,QWORD PTR[128+rbp] + xor rax,rax + mov rdx,QWORD PTR[((8+8))+rsp] + cmp rbp,QWORD PTR[((0+8))+rsp] + jae $L$8x_no_tail + +DB 066h + add r8,QWORD PTR[rdi] + adc r9,QWORD PTR[8+rdi] + adc r10,QWORD PTR[16+rdi] + adc r11,QWORD PTR[24+rdi] + adc r12,QWORD PTR[32+rdi] + adc r13,QWORD PTR[40+rdi] + adc r14,QWORD PTR[48+rdi] + adc r15,QWORD PTR[56+rdi] + sbb rsi,rsi + + mov rbx,QWORD PTR[((48+56+8))+rsp] + mov ecx,8 + mov rax,QWORD PTR[rbp] + jmp $L$8x_tail + +ALIGN 32 +$L$8x_tail:: + mul rbx + add r8,rax + mov rax,QWORD PTR[16+rbp] + mov QWORD PTR[rdi],r8 + mov r8,rdx + adc r8,0 + + mul rbx + add r9,rax + mov rax,QWORD PTR[32+rbp] + adc rdx,0 + add r8,r9 + lea rdi,QWORD PTR[8+rdi] + mov r9,rdx + adc r9,0 + + mul rbx + add r10,rax + mov rax,QWORD PTR[48+rbp] + adc rdx,0 + add r9,r10 + mov r10,rdx + adc r10,0 + + mul rbx + add r11,rax + mov rax,QWORD PTR[64+rbp] + adc rdx,0 + add r10,r11 + mov r11,rdx + adc r11,0 + + mul rbx + add r12,rax + mov rax,QWORD PTR[80+rbp] + adc rdx,0 + add r11,r12 + mov r12,rdx + adc r12,0 + + mul rbx + add r13,rax + mov rax,QWORD PTR[96+rbp] + adc rdx,0 + add r12,r13 + mov r13,rdx + adc r13,0 + + mul rbx + add r14,rax + mov rax,QWORD PTR[112+rbp] + adc rdx,0 + add r13,r14 + mov r14,rdx + adc r14,0 + + mul rbx + mov rbx,QWORD PTR[((48-16+8))+rcx*8+rsp] + add r15,rax + adc rdx,0 + add r14,r15 + mov rax,QWORD PTR[rbp] + mov r15,rdx + adc r15,0 + + dec ecx + jnz $L$8x_tail + + lea rbp,QWORD PTR[128+rbp] + mov rdx,QWORD PTR[((8+8))+rsp] + cmp rbp,QWORD PTR[((0+8))+rsp] + jae $L$8x_tail_done + + mov rbx,QWORD PTR[((48+56+8))+rsp] + neg rsi + mov rax,QWORD PTR[rbp] + adc r8,QWORD PTR[rdi] + adc r9,QWORD PTR[8+rdi] + adc r10,QWORD PTR[16+rdi] + adc r11,QWORD PTR[24+rdi] + adc r12,QWORD PTR[32+rdi] + adc r13,QWORD PTR[40+rdi] + adc r14,QWORD PTR[48+rdi] + adc r15,QWORD PTR[56+rdi] + sbb rsi,rsi + + mov ecx,8 + jmp $L$8x_tail + +ALIGN 32 +$L$8x_tail_done:: + add r8,QWORD PTR[rdx] + xor rax,rax + + neg rsi +$L$8x_no_tail:: + adc r8,QWORD PTR[rdi] + adc r9,QWORD PTR[8+rdi] + adc r10,QWORD PTR[16+rdi] + adc r11,QWORD PTR[24+rdi] + adc r12,QWORD PTR[32+rdi] + adc r13,QWORD PTR[40+rdi] + adc r14,QWORD PTR[48+rdi] + adc r15,QWORD PTR[56+rdi] + adc rax,0 + mov rcx,QWORD PTR[((-16))+rbp] + xor rsi,rsi + +DB 102,72,15,126,213 + + mov QWORD PTR[rdi],r8 + mov QWORD PTR[8+rdi],r9 +DB 102,73,15,126,217 + mov QWORD PTR[16+rdi],r10 + mov QWORD PTR[24+rdi],r11 + mov QWORD PTR[32+rdi],r12 + mov QWORD PTR[40+rdi],r13 + mov QWORD PTR[48+rdi],r14 + mov QWORD PTR[56+rdi],r15 + lea rdi,QWORD PTR[64+rdi] + + cmp rdi,rdx + jb $L$8x_reduction_loop + + sub rcx,r15 + lea rbx,QWORD PTR[r9*1+rdi] + adc rsi,rsi + mov rcx,r9 + or rax,rsi +DB 102,72,15,126,207 + xor rax,1 +DB 102,72,15,126,206 + lea rbp,QWORD PTR[rax*8+rbp] + sar rcx,3+2 + jmp $L$sqr4x_sub + +ALIGN 32 +$L$sqr4x_sub:: +DB 066h + mov r12,QWORD PTR[rbx] + mov r13,QWORD PTR[8+rbx] + sbb r12,QWORD PTR[rbp] + mov r14,QWORD PTR[16+rbx] + sbb r13,QWORD PTR[16+rbp] + mov r15,QWORD PTR[24+rbx] + lea rbx,QWORD PTR[32+rbx] + sbb r14,QWORD PTR[32+rbp] + mov QWORD PTR[rdi],r12 + sbb r15,QWORD PTR[48+rbp] + lea rbp,QWORD PTR[64+rbp] + mov QWORD PTR[8+rdi],r13 + mov QWORD PTR[16+rdi],r14 + mov QWORD PTR[24+rdi],r15 + lea rdi,QWORD PTR[32+rdi] + + inc rcx + jnz $L$sqr4x_sub + mov r10,r9 + neg r9 + DB 0F3h,0C3h ;repret +bn_sqr8x_internal ENDP +PUBLIC bn_from_montgomery + +ALIGN 32 +bn_from_montgomery PROC PUBLIC + test DWORD PTR[48+rsp],7 + jz bn_from_mont8x + xor eax,eax + DB 0F3h,0C3h ;repret +bn_from_montgomery ENDP + + +ALIGN 32 +bn_from_mont8x PROC PRIVATE + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_bn_from_mont8x:: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + mov rcx,r9 + mov r8,QWORD PTR[40+rsp] + mov r9,QWORD PTR[48+rsp] + + +DB 067h + mov rax,rsp + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + lea rsp,QWORD PTR[((-40))+rsp] + movaps XMMWORD PTR[rsp],xmm6 + movaps XMMWORD PTR[16+rsp],xmm7 +DB 067h + mov r10d,r9d + shl r9d,3 + shl r10d,3+2 + neg r9 + mov r8,QWORD PTR[r8] + + + + + + + + lea r11,QWORD PTR[((-64))+r9*2+rsp] + sub r11,rsi + and r11,4095 + cmp r10,r11 + jb $L$from_sp_alt + sub rsp,r11 + lea rsp,QWORD PTR[((-64))+r9*2+rsp] + jmp $L$from_sp_done + +ALIGN 32 +$L$from_sp_alt:: + lea r10,QWORD PTR[((4096-64))+r9*2] + lea rsp,QWORD PTR[((-64))+r9*2+rsp] + sub r11,r10 + mov r10,0 + cmovc r11,r10 + sub rsp,r11 +$L$from_sp_done:: + and rsp,-64 + mov r10,r9 + neg r9 + + + + + + + + + + + mov QWORD PTR[32+rsp],r8 + mov QWORD PTR[40+rsp],rax +$L$from_body:: + mov r11,r9 + lea rax,QWORD PTR[48+rsp] + pxor xmm0,xmm0 + jmp $L$mul_by_1 + +ALIGN 32 +$L$mul_by_1:: + movdqu xmm1,XMMWORD PTR[rsi] + movdqu xmm2,XMMWORD PTR[16+rsi] + movdqu xmm3,XMMWORD PTR[32+rsi] + movdqa XMMWORD PTR[r9*1+rax],xmm0 + movdqu xmm4,XMMWORD PTR[48+rsi] + movdqa XMMWORD PTR[16+r9*1+rax],xmm0 +DB 048h,08dh,0b6h,040h,000h,000h,000h + movdqa XMMWORD PTR[rax],xmm1 + movdqa XMMWORD PTR[32+r9*1+rax],xmm0 + movdqa XMMWORD PTR[16+rax],xmm2 + movdqa XMMWORD PTR[48+r9*1+rax],xmm0 + movdqa XMMWORD PTR[32+rax],xmm3 + movdqa XMMWORD PTR[48+rax],xmm4 + lea rax,QWORD PTR[64+rax] + sub r11,64 + jnz $L$mul_by_1 + +DB 102,72,15,110,207 +DB 102,72,15,110,209 +DB 067h + mov rbp,rcx +DB 102,73,15,110,218 + mov r11d,DWORD PTR[((OPENSSL_ia32cap_P+8))] + and r11d,080100h + cmp r11d,080100h + jne $L$from_mont_nox + + lea rdi,QWORD PTR[r9*1+rax] + call sqrx8x_reduction + + pxor xmm0,xmm0 + lea rax,QWORD PTR[48+rsp] + mov rsi,QWORD PTR[40+rsp] + jmp $L$from_mont_zero + +ALIGN 32 +$L$from_mont_nox:: + call sqr8x_reduction + + pxor xmm0,xmm0 + lea rax,QWORD PTR[48+rsp] + mov rsi,QWORD PTR[40+rsp] + jmp $L$from_mont_zero + +ALIGN 32 +$L$from_mont_zero:: + movdqa XMMWORD PTR[rax],xmm0 + movdqa XMMWORD PTR[16+rax],xmm0 + movdqa XMMWORD PTR[32+rax],xmm0 + movdqa XMMWORD PTR[48+rax],xmm0 + lea rax,QWORD PTR[64+rax] + sub r9,32 + jnz $L$from_mont_zero + + mov rax,1 + mov r15,QWORD PTR[((-48))+rsi] + mov r14,QWORD PTR[((-40))+rsi] + mov r13,QWORD PTR[((-32))+rsi] + mov r12,QWORD PTR[((-24))+rsi] + mov rbp,QWORD PTR[((-16))+rsi] + mov rbx,QWORD PTR[((-8))+rsi] + lea rsp,QWORD PTR[rsi] +$L$from_epilogue:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_bn_from_mont8x:: +bn_from_mont8x ENDP + +ALIGN 32 +bn_mulx4x_mont_gather5 PROC PRIVATE + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_bn_mulx4x_mont_gather5:: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + mov rcx,r9 + mov r8,QWORD PTR[40+rsp] + mov r9,QWORD PTR[48+rsp] + + +$L$mulx4x_enter:: +DB 067h + mov rax,rsp + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + lea rsp,QWORD PTR[((-40))+rsp] + movaps XMMWORD PTR[rsp],xmm6 + movaps XMMWORD PTR[16+rsp],xmm7 +DB 067h + mov r10d,r9d + shl r9d,3 + shl r10d,3+2 + neg r9 + mov r8,QWORD PTR[r8] + + + + + + + + + lea r11,QWORD PTR[((-64))+r9*2+rsp] + sub r11,rsi + and r11,4095 + cmp r10,r11 + jb $L$mulx4xsp_alt + sub rsp,r11 + lea rsp,QWORD PTR[((-64))+r9*2+rsp] + jmp $L$mulx4xsp_done + +ALIGN 32 +$L$mulx4xsp_alt:: + lea r10,QWORD PTR[((4096-64))+r9*2] + lea rsp,QWORD PTR[((-64))+r9*2+rsp] + sub r11,r10 + mov r10,0 + cmovc r11,r10 + sub rsp,r11 +$L$mulx4xsp_done:: + and rsp,-64 + + + + + + + + + + + + + mov QWORD PTR[32+rsp],r8 + mov QWORD PTR[40+rsp],rax +$L$mulx4x_body:: + call mulx4x_internal + + mov rsi,QWORD PTR[40+rsp] + mov rax,1 + movaps xmm6,XMMWORD PTR[((-88))+rsi] + movaps xmm7,XMMWORD PTR[((-72))+rsi] + mov r15,QWORD PTR[((-48))+rsi] + mov r14,QWORD PTR[((-40))+rsi] + mov r13,QWORD PTR[((-32))+rsi] + mov r12,QWORD PTR[((-24))+rsi] + mov rbp,QWORD PTR[((-16))+rsi] + mov rbx,QWORD PTR[((-8))+rsi] + lea rsp,QWORD PTR[rsi] +$L$mulx4x_epilogue:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_bn_mulx4x_mont_gather5:: +bn_mulx4x_mont_gather5 ENDP + + +ALIGN 32 +mulx4x_internal PROC PRIVATE +DB 04ch,089h,08ch,024h,008h,000h,000h,000h +DB 067h + neg r9 + shl r9,5 + lea r13,QWORD PTR[256+r9*1+rdx] + shr r9,5+5 + mov r10d,DWORD PTR[56+rax] + sub r9,1 + mov QWORD PTR[((16+8))+rsp],r13 + mov QWORD PTR[((24+8))+rsp],r9 + mov QWORD PTR[((56+8))+rsp],rdi + mov r11,r10 + shr r10,3 + and r11,7 + not r10 + lea rax,QWORD PTR[$L$magic_masks] + and r10,3 + lea rdi,QWORD PTR[96+r11*8+rdx] + movq xmm4,QWORD PTR[r10*8+rax] + movq xmm5,QWORD PTR[8+r10*8+rax] + add r11,7 + movq xmm6,QWORD PTR[16+r10*8+rax] + movq xmm7,QWORD PTR[24+r10*8+rax] + and r11,7 + + movq xmm0,QWORD PTR[((-96))+rdi] + lea rbx,QWORD PTR[256+rdi] + movq xmm1,QWORD PTR[((-32))+rdi] + pand xmm0,xmm4 + movq xmm2,QWORD PTR[32+rdi] + pand xmm1,xmm5 + movq xmm3,QWORD PTR[96+rdi] + pand xmm2,xmm6 + por xmm0,xmm1 + movq xmm1,QWORD PTR[((-96))+rbx] + pand xmm3,xmm7 + por xmm0,xmm2 + movq xmm2,QWORD PTR[((-32))+rbx] + por xmm0,xmm3 +DB 067h,067h + pand xmm1,xmm4 + movq xmm3,QWORD PTR[32+rbx] + +DB 102,72,15,126,194 + movq xmm0,QWORD PTR[96+rbx] + lea rdi,QWORD PTR[512+rdi] + pand xmm2,xmm5 +DB 067h,067h + pand xmm3,xmm6 + + + + + + + + lea rbx,QWORD PTR[((64+32+8))+r11*8+rsp] + + mov r9,rdx + mulx rax,r8,QWORD PTR[rsi] + mulx r12,r11,QWORD PTR[8+rsi] + add r11,rax + mulx r13,rax,QWORD PTR[16+rsi] + adc r12,rax + adc r13,0 + mulx r14,rax,QWORD PTR[24+rsi] + + mov r15,r8 + imul r8,QWORD PTR[((32+8))+rsp] + xor rbp,rbp + mov rdx,r8 + + por xmm1,xmm2 + pand xmm0,xmm7 + por xmm1,xmm3 + mov QWORD PTR[((8+8))+rsp],rdi + por xmm0,xmm1 + +DB 048h,08dh,0b6h,020h,000h,000h,000h + adcx r13,rax + adcx r14,rbp + + mulx r10,rax,QWORD PTR[rcx] + adcx r15,rax + adox r10,r11 + mulx r11,rax,QWORD PTR[16+rcx] + adcx r10,rax + adox r11,r12 + mulx r12,rax,QWORD PTR[32+rcx] + mov rdi,QWORD PTR[((24+8))+rsp] +DB 066h + mov QWORD PTR[((-32))+rbx],r10 + adcx r11,rax + adox r12,r13 + mulx r15,rax,QWORD PTR[48+rcx] +DB 067h,067h + mov rdx,r9 + mov QWORD PTR[((-24))+rbx],r11 + adcx r12,rax + adox r15,rbp +DB 048h,08dh,089h,040h,000h,000h,000h + mov QWORD PTR[((-16))+rbx],r12 + + +ALIGN 32 +$L$mulx4x_1st:: + adcx r15,rbp + mulx rax,r10,QWORD PTR[rsi] + adcx r10,r14 + mulx r14,r11,QWORD PTR[8+rsi] + adcx r11,rax + mulx rax,r12,QWORD PTR[16+rsi] + adcx r12,r14 + mulx r14,r13,QWORD PTR[24+rsi] +DB 067h,067h + mov rdx,r8 + adcx r13,rax + adcx r14,rbp + lea rsi,QWORD PTR[32+rsi] + lea rbx,QWORD PTR[32+rbx] + + adox r10,r15 + mulx r15,rax,QWORD PTR[rcx] + adcx r10,rax + adox r11,r15 + mulx r15,rax,QWORD PTR[16+rcx] + adcx r11,rax + adox r12,r15 + mulx r15,rax,QWORD PTR[32+rcx] + mov QWORD PTR[((-40))+rbx],r10 + adcx r12,rax + mov QWORD PTR[((-32))+rbx],r11 + adox r13,r15 + mulx r15,rax,QWORD PTR[48+rcx] + mov rdx,r9 + mov QWORD PTR[((-24))+rbx],r12 + adcx r13,rax + adox r15,rbp + lea rcx,QWORD PTR[64+rcx] + mov QWORD PTR[((-16))+rbx],r13 + + dec rdi + jnz $L$mulx4x_1st + + mov rax,QWORD PTR[8+rsp] +DB 102,72,15,126,194 + adc r15,rbp + lea rsi,QWORD PTR[rax*1+rsi] + add r14,r15 + mov rdi,QWORD PTR[((8+8))+rsp] + adc rbp,rbp + mov QWORD PTR[((-8))+rbx],r14 + jmp $L$mulx4x_outer + +ALIGN 32 +$L$mulx4x_outer:: + mov QWORD PTR[rbx],rbp + lea rbx,QWORD PTR[32+rax*1+rbx] + mulx r11,r8,QWORD PTR[rsi] + xor rbp,rbp + mov r9,rdx + mulx r12,r14,QWORD PTR[8+rsi] + adox r8,QWORD PTR[((-32))+rbx] + adcx r11,r14 + mulx r13,r15,QWORD PTR[16+rsi] + adox r11,QWORD PTR[((-24))+rbx] + adcx r12,r15 + mulx r14,rdx,QWORD PTR[24+rsi] + adox r12,QWORD PTR[((-16))+rbx] + adcx r13,rdx + lea rcx,QWORD PTR[rax*2+rcx] + lea rsi,QWORD PTR[32+rsi] + adox r13,QWORD PTR[((-8))+rbx] + adcx r14,rbp + adox r14,rbp + +DB 067h + mov r15,r8 + imul r8,QWORD PTR[((32+8))+rsp] + + movq xmm0,QWORD PTR[((-96))+rdi] +DB 067h,067h + mov rdx,r8 + movq xmm1,QWORD PTR[((-32))+rdi] +DB 067h + pand xmm0,xmm4 + movq xmm2,QWORD PTR[32+rdi] +DB 067h + pand xmm1,xmm5 + movq xmm3,QWORD PTR[96+rdi] + add rdi,256 +DB 067h + pand xmm2,xmm6 + por xmm0,xmm1 + pand xmm3,xmm7 + xor rbp,rbp + mov QWORD PTR[((8+8))+rsp],rdi + + mulx r10,rax,QWORD PTR[rcx] + adcx r15,rax + adox r10,r11 + mulx r11,rax,QWORD PTR[16+rcx] + adcx r10,rax + adox r11,r12 + mulx r12,rax,QWORD PTR[32+rcx] + adcx r11,rax + adox r12,r13 + mulx r15,rax,QWORD PTR[48+rcx] + mov rdx,r9 + por xmm0,xmm2 + mov rdi,QWORD PTR[((24+8))+rsp] + mov QWORD PTR[((-32))+rbx],r10 + por xmm0,xmm3 + adcx r12,rax + mov QWORD PTR[((-24))+rbx],r11 + adox r15,rbp + mov QWORD PTR[((-16))+rbx],r12 + lea rcx,QWORD PTR[64+rcx] + jmp $L$mulx4x_inner + +ALIGN 32 +$L$mulx4x_inner:: + mulx rax,r10,QWORD PTR[rsi] + adcx r15,rbp + adox r10,r14 + mulx r14,r11,QWORD PTR[8+rsi] + adcx r10,QWORD PTR[rbx] + adox r11,rax + mulx rax,r12,QWORD PTR[16+rsi] + adcx r11,QWORD PTR[8+rbx] + adox r12,r14 + mulx r14,r13,QWORD PTR[24+rsi] + mov rdx,r8 + adcx r12,QWORD PTR[16+rbx] + adox r13,rax + adcx r13,QWORD PTR[24+rbx] + adox r14,rbp + lea rsi,QWORD PTR[32+rsi] + lea rbx,QWORD PTR[32+rbx] + adcx r14,rbp + + adox r10,r15 + mulx r15,rax,QWORD PTR[rcx] + adcx r10,rax + adox r11,r15 + mulx r15,rax,QWORD PTR[16+rcx] + adcx r11,rax + adox r12,r15 + mulx r15,rax,QWORD PTR[32+rcx] + mov QWORD PTR[((-40))+rbx],r10 + adcx r12,rax + adox r13,r15 + mov QWORD PTR[((-32))+rbx],r11 + mulx r15,rax,QWORD PTR[48+rcx] + mov rdx,r9 + lea rcx,QWORD PTR[64+rcx] + mov QWORD PTR[((-24))+rbx],r12 + adcx r13,rax + adox r15,rbp + mov QWORD PTR[((-16))+rbx],r13 + + dec rdi + jnz $L$mulx4x_inner + + mov rax,QWORD PTR[((0+8))+rsp] +DB 102,72,15,126,194 + adc r15,rbp + sub rdi,QWORD PTR[rbx] + mov rdi,QWORD PTR[((8+8))+rsp] + mov r10,QWORD PTR[((16+8))+rsp] + adc r14,r15 + lea rsi,QWORD PTR[rax*1+rsi] + adc rbp,rbp + mov QWORD PTR[((-8))+rbx],r14 + + cmp rdi,r10 + jb $L$mulx4x_outer + + mov r10,QWORD PTR[((-16))+rcx] + xor r15,r15 + sub r10,r14 + adc r15,r15 + or rbp,r15 + xor rbp,1 + lea rdi,QWORD PTR[rax*1+rbx] + lea rcx,QWORD PTR[rax*2+rcx] +DB 067h,067h + sar rax,3+2 + lea rbp,QWORD PTR[rbp*8+rcx] + mov rdx,QWORD PTR[((56+8))+rsp] + mov rcx,rax + jmp $L$sqrx4x_sub +mulx4x_internal ENDP + +ALIGN 32 +bn_powerx5 PROC PRIVATE + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_bn_powerx5:: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + mov rcx,r9 + mov r8,QWORD PTR[40+rsp] + mov r9,QWORD PTR[48+rsp] + + +$L$powerx5_enter:: +DB 067h + mov rax,rsp + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + lea rsp,QWORD PTR[((-40))+rsp] + movaps XMMWORD PTR[rsp],xmm6 + movaps XMMWORD PTR[16+rsp],xmm7 +DB 067h + mov r10d,r9d + shl r9d,3 + shl r10d,3+2 + neg r9 + mov r8,QWORD PTR[r8] + + + + + + + + lea r11,QWORD PTR[((-64))+r9*2+rsp] + sub r11,rsi + and r11,4095 + cmp r10,r11 + jb $L$pwrx_sp_alt + sub rsp,r11 + lea rsp,QWORD PTR[((-64))+r9*2+rsp] + jmp $L$pwrx_sp_done + +ALIGN 32 +$L$pwrx_sp_alt:: + lea r10,QWORD PTR[((4096-64))+r9*2] + lea rsp,QWORD PTR[((-64))+r9*2+rsp] + sub r11,r10 + mov r10,0 + cmovc r11,r10 + sub rsp,r11 +$L$pwrx_sp_done:: + and rsp,-64 + mov r10,r9 + neg r9 + + + + + + + + + + + + + pxor xmm0,xmm0 +DB 102,72,15,110,207 +DB 102,72,15,110,209 +DB 102,73,15,110,218 +DB 102,72,15,110,226 + mov QWORD PTR[32+rsp],r8 + mov QWORD PTR[40+rsp],rax +$L$powerx5_body:: + + call __bn_sqrx8x_internal + call __bn_sqrx8x_internal + call __bn_sqrx8x_internal + call __bn_sqrx8x_internal + call __bn_sqrx8x_internal + + mov r9,r10 + mov rdi,rsi +DB 102,72,15,126,209 +DB 102,72,15,126,226 + mov rax,QWORD PTR[40+rsp] + + call mulx4x_internal + + mov rsi,QWORD PTR[40+rsp] + mov rax,1 + movaps xmm6,XMMWORD PTR[((-88))+rsi] + movaps xmm7,XMMWORD PTR[((-72))+rsi] + mov r15,QWORD PTR[((-48))+rsi] + mov r14,QWORD PTR[((-40))+rsi] + mov r13,QWORD PTR[((-32))+rsi] + mov r12,QWORD PTR[((-24))+rsi] + mov rbp,QWORD PTR[((-16))+rsi] + mov rbx,QWORD PTR[((-8))+rsi] + lea rsp,QWORD PTR[rsi] +$L$powerx5_epilogue:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_bn_powerx5:: +bn_powerx5 ENDP + +PUBLIC bn_sqrx8x_internal + + +ALIGN 32 +bn_sqrx8x_internal PROC PUBLIC +__bn_sqrx8x_internal:: + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + lea rdi,QWORD PTR[((48+8))+rsp] + lea rbp,QWORD PTR[r9*1+rsi] + mov QWORD PTR[((0+8))+rsp],r9 + mov QWORD PTR[((8+8))+rsp],rbp + jmp $L$sqr8x_zero_start + +ALIGN 32 +DB 066h,066h,066h,02eh,00fh,01fh,084h,000h,000h,000h,000h,000h +$L$sqrx8x_zero:: +DB 03eh + movdqa XMMWORD PTR[rdi],xmm0 + movdqa XMMWORD PTR[16+rdi],xmm0 + movdqa XMMWORD PTR[32+rdi],xmm0 + movdqa XMMWORD PTR[48+rdi],xmm0 +$L$sqr8x_zero_start:: + movdqa XMMWORD PTR[64+rdi],xmm0 + movdqa XMMWORD PTR[80+rdi],xmm0 + movdqa XMMWORD PTR[96+rdi],xmm0 + movdqa XMMWORD PTR[112+rdi],xmm0 + lea rdi,QWORD PTR[128+rdi] + sub r9,64 + jnz $L$sqrx8x_zero + + mov rdx,QWORD PTR[rsi] + + xor r10,r10 + xor r11,r11 + xor r12,r12 + xor r13,r13 + xor r14,r14 + xor r15,r15 + lea rdi,QWORD PTR[((48+8))+rsp] + xor rbp,rbp + jmp $L$sqrx8x_outer_loop + +ALIGN 32 +$L$sqrx8x_outer_loop:: + mulx rax,r8,QWORD PTR[8+rsi] + adcx r8,r9 + adox r10,rax + mulx rax,r9,QWORD PTR[16+rsi] + adcx r9,r10 + adox r11,rax +DB 0c4h,0e2h,0abh,0f6h,086h,018h,000h,000h,000h + adcx r10,r11 + adox r12,rax +DB 0c4h,0e2h,0a3h,0f6h,086h,020h,000h,000h,000h + adcx r11,r12 + adox r13,rax + mulx rax,r12,QWORD PTR[40+rsi] + adcx r12,r13 + adox r14,rax + mulx rax,r13,QWORD PTR[48+rsi] + adcx r13,r14 + adox rax,r15 + mulx r15,r14,QWORD PTR[56+rsi] + mov rdx,QWORD PTR[8+rsi] + adcx r14,rax + adox r15,rbp + adc r15,QWORD PTR[64+rdi] + mov QWORD PTR[8+rdi],r8 + mov QWORD PTR[16+rdi],r9 + sbb rcx,rcx + xor rbp,rbp + + + mulx rbx,r8,QWORD PTR[16+rsi] + mulx rax,r9,QWORD PTR[24+rsi] + adcx r8,r10 + adox r9,rbx + mulx rbx,r10,QWORD PTR[32+rsi] + adcx r9,r11 + adox r10,rax +DB 0c4h,0e2h,0a3h,0f6h,086h,028h,000h,000h,000h + adcx r10,r12 + adox r11,rbx +DB 0c4h,0e2h,09bh,0f6h,09eh,030h,000h,000h,000h + adcx r11,r13 + adox r12,r14 +DB 0c4h,062h,093h,0f6h,0b6h,038h,000h,000h,000h + mov rdx,QWORD PTR[16+rsi] + adcx r12,rax + adox r13,rbx + adcx r13,r15 + adox r14,rbp + adcx r14,rbp + + mov QWORD PTR[24+rdi],r8 + mov QWORD PTR[32+rdi],r9 + + mulx rbx,r8,QWORD PTR[24+rsi] + mulx rax,r9,QWORD PTR[32+rsi] + adcx r8,r10 + adox r9,rbx + mulx rbx,r10,QWORD PTR[40+rsi] + adcx r9,r11 + adox r10,rax +DB 0c4h,0e2h,0a3h,0f6h,086h,030h,000h,000h,000h + adcx r10,r12 + adox r11,r13 +DB 0c4h,062h,09bh,0f6h,0aeh,038h,000h,000h,000h +DB 03eh + mov rdx,QWORD PTR[24+rsi] + adcx r11,rbx + adox r12,rax + adcx r12,r14 + mov QWORD PTR[40+rdi],r8 + mov QWORD PTR[48+rdi],r9 + mulx rax,r8,QWORD PTR[32+rsi] + adox r13,rbp + adcx r13,rbp + + mulx rbx,r9,QWORD PTR[40+rsi] + adcx r8,r10 + adox r9,rax + mulx rax,r10,QWORD PTR[48+rsi] + adcx r9,r11 + adox r10,r12 + mulx r12,r11,QWORD PTR[56+rsi] + mov rdx,QWORD PTR[32+rsi] + mov r14,QWORD PTR[40+rsi] + adcx r10,rbx + adox r11,rax + mov r15,QWORD PTR[48+rsi] + adcx r11,r13 + adox r12,rbp + adcx r12,rbp + + mov QWORD PTR[56+rdi],r8 + mov QWORD PTR[64+rdi],r9 + + mulx rax,r9,r14 + mov r8,QWORD PTR[56+rsi] + adcx r9,r10 + mulx rbx,r10,r15 + adox r10,rax + adcx r10,r11 + mulx rax,r11,r8 + mov rdx,r14 + adox r11,rbx + adcx r11,r12 + + adcx rax,rbp + + mulx rbx,r14,r15 + mulx r13,r12,r8 + mov rdx,r15 + lea rsi,QWORD PTR[64+rsi] + adcx r11,r14 + adox r12,rbx + adcx r12,rax + adox r13,rbp + +DB 067h,067h + mulx r14,r8,r8 + adcx r13,r8 + adcx r14,rbp + + cmp rsi,QWORD PTR[((8+8))+rsp] + je $L$sqrx8x_outer_break + + neg rcx + mov rcx,-8 + mov r15,rbp + mov r8,QWORD PTR[64+rdi] + adcx r9,QWORD PTR[72+rdi] + adcx r10,QWORD PTR[80+rdi] + adcx r11,QWORD PTR[88+rdi] + adc r12,QWORD PTR[96+rdi] + adc r13,QWORD PTR[104+rdi] + adc r14,QWORD PTR[112+rdi] + adc r15,QWORD PTR[120+rdi] + lea rbp,QWORD PTR[rsi] + lea rdi,QWORD PTR[128+rdi] + sbb rax,rax + + mov rdx,QWORD PTR[((-64))+rsi] + mov QWORD PTR[((16+8))+rsp],rax + mov QWORD PTR[((24+8))+rsp],rdi + + + xor eax,eax + jmp $L$sqrx8x_loop + +ALIGN 32 +$L$sqrx8x_loop:: + mov rbx,r8 + mulx r8,rax,QWORD PTR[rbp] + adcx rbx,rax + adox r8,r9 + + mulx r9,rax,QWORD PTR[8+rbp] + adcx r8,rax + adox r9,r10 + + mulx r10,rax,QWORD PTR[16+rbp] + adcx r9,rax + adox r10,r11 + + mulx r11,rax,QWORD PTR[24+rbp] + adcx r10,rax + adox r11,r12 + +DB 0c4h,062h,0fbh,0f6h,0a5h,020h,000h,000h,000h + adcx r11,rax + adox r12,r13 + + mulx r13,rax,QWORD PTR[40+rbp] + adcx r12,rax + adox r13,r14 + + mulx r14,rax,QWORD PTR[48+rbp] + mov QWORD PTR[rcx*8+rdi],rbx + mov ebx,0 + adcx r13,rax + adox r14,r15 + +DB 0c4h,062h,0fbh,0f6h,0bdh,038h,000h,000h,000h + mov rdx,QWORD PTR[8+rcx*8+rsi] + adcx r14,rax + adox r15,rbx + adcx r15,rbx + +DB 067h + inc rcx + jnz $L$sqrx8x_loop + + lea rbp,QWORD PTR[64+rbp] + mov rcx,-8 + cmp rbp,QWORD PTR[((8+8))+rsp] + je $L$sqrx8x_break + + sub rbx,QWORD PTR[((16+8))+rsp] +DB 066h + mov rdx,QWORD PTR[((-64))+rsi] + adcx r8,QWORD PTR[rdi] + adcx r9,QWORD PTR[8+rdi] + adc r10,QWORD PTR[16+rdi] + adc r11,QWORD PTR[24+rdi] + adc r12,QWORD PTR[32+rdi] + adc r13,QWORD PTR[40+rdi] + adc r14,QWORD PTR[48+rdi] + adc r15,QWORD PTR[56+rdi] + lea rdi,QWORD PTR[64+rdi] +DB 067h + sbb rax,rax + xor ebx,ebx + mov QWORD PTR[((16+8))+rsp],rax + jmp $L$sqrx8x_loop + +ALIGN 32 +$L$sqrx8x_break:: + sub r8,QWORD PTR[((16+8))+rsp] + mov rcx,QWORD PTR[((24+8))+rsp] + mov rdx,QWORD PTR[rsi] + xor ebp,ebp + mov QWORD PTR[rdi],r8 + cmp rdi,rcx + je $L$sqrx8x_outer_loop + + mov QWORD PTR[8+rdi],r9 + mov r9,QWORD PTR[8+rcx] + mov QWORD PTR[16+rdi],r10 + mov r10,QWORD PTR[16+rcx] + mov QWORD PTR[24+rdi],r11 + mov r11,QWORD PTR[24+rcx] + mov QWORD PTR[32+rdi],r12 + mov r12,QWORD PTR[32+rcx] + mov QWORD PTR[40+rdi],r13 + mov r13,QWORD PTR[40+rcx] + mov QWORD PTR[48+rdi],r14 + mov r14,QWORD PTR[48+rcx] + mov QWORD PTR[56+rdi],r15 + mov r15,QWORD PTR[56+rcx] + mov rdi,rcx + jmp $L$sqrx8x_outer_loop + +ALIGN 32 +$L$sqrx8x_outer_break:: + mov QWORD PTR[72+rdi],r9 +DB 102,72,15,126,217 + mov QWORD PTR[80+rdi],r10 + mov QWORD PTR[88+rdi],r11 + mov QWORD PTR[96+rdi],r12 + mov QWORD PTR[104+rdi],r13 + mov QWORD PTR[112+rdi],r14 + lea rdi,QWORD PTR[((48+8))+rsp] + mov rdx,QWORD PTR[rcx*1+rsi] + + mov r11,QWORD PTR[8+rdi] + xor r10,r10 + mov r9,QWORD PTR[((0+8))+rsp] + adox r11,r11 + mov r12,QWORD PTR[16+rdi] + mov r13,QWORD PTR[24+rdi] + + +ALIGN 32 +$L$sqrx4x_shift_n_add:: + mulx rbx,rax,rdx + adox r12,r12 + adcx rax,r10 +DB 048h,08bh,094h,00eh,008h,000h,000h,000h +DB 04ch,08bh,097h,020h,000h,000h,000h + adox r13,r13 + adcx rbx,r11 + mov r11,QWORD PTR[40+rdi] + mov QWORD PTR[rdi],rax + mov QWORD PTR[8+rdi],rbx + + mulx rbx,rax,rdx + adox r10,r10 + adcx rax,r12 + mov rdx,QWORD PTR[16+rcx*1+rsi] + mov r12,QWORD PTR[48+rdi] + adox r11,r11 + adcx rbx,r13 + mov r13,QWORD PTR[56+rdi] + mov QWORD PTR[16+rdi],rax + mov QWORD PTR[24+rdi],rbx + + mulx rbx,rax,rdx + adox r12,r12 + adcx rax,r10 + mov rdx,QWORD PTR[24+rcx*1+rsi] + lea rcx,QWORD PTR[32+rcx] + mov r10,QWORD PTR[64+rdi] + adox r13,r13 + adcx rbx,r11 + mov r11,QWORD PTR[72+rdi] + mov QWORD PTR[32+rdi],rax + mov QWORD PTR[40+rdi],rbx + + mulx rbx,rax,rdx + adox r10,r10 + adcx rax,r12 + jrcxz $L$sqrx4x_shift_n_add_break +DB 048h,08bh,094h,00eh,000h,000h,000h,000h + adox r11,r11 + adcx rbx,r13 + mov r12,QWORD PTR[80+rdi] + mov r13,QWORD PTR[88+rdi] + mov QWORD PTR[48+rdi],rax + mov QWORD PTR[56+rdi],rbx + lea rdi,QWORD PTR[64+rdi] + nop + jmp $L$sqrx4x_shift_n_add + +ALIGN 32 +$L$sqrx4x_shift_n_add_break:: + adcx rbx,r13 + mov QWORD PTR[48+rdi],rax + mov QWORD PTR[56+rdi],rbx + lea rdi,QWORD PTR[64+rdi] +DB 102,72,15,126,213 +sqrx8x_reduction:: + xor eax,eax + mov rbx,QWORD PTR[((32+8))+rsp] + mov rdx,QWORD PTR[((48+8))+rsp] + lea rcx,QWORD PTR[((-128))+r9*2+rbp] + + mov QWORD PTR[((0+8))+rsp],rcx + mov QWORD PTR[((8+8))+rsp],rdi + + lea rdi,QWORD PTR[((48+8))+rsp] + jmp $L$sqrx8x_reduction_loop + +ALIGN 32 +$L$sqrx8x_reduction_loop:: + mov r9,QWORD PTR[8+rdi] + mov r10,QWORD PTR[16+rdi] + mov r11,QWORD PTR[24+rdi] + mov r12,QWORD PTR[32+rdi] + mov r8,rdx + imul rdx,rbx + mov r13,QWORD PTR[40+rdi] + mov r14,QWORD PTR[48+rdi] + mov r15,QWORD PTR[56+rdi] + mov QWORD PTR[((24+8))+rsp],rax + + lea rdi,QWORD PTR[64+rdi] + xor rsi,rsi + mov rcx,-8 + jmp $L$sqrx8x_reduce + +ALIGN 32 +$L$sqrx8x_reduce:: + mov rbx,r8 + mulx r8,rax,QWORD PTR[rbp] + adcx rax,rbx + adox r8,r9 + + mulx r9,rbx,QWORD PTR[16+rbp] + adcx r8,rbx + adox r9,r10 + + mulx r10,rbx,QWORD PTR[32+rbp] + adcx r9,rbx + adox r10,r11 + + mulx r11,rbx,QWORD PTR[48+rbp] + adcx r10,rbx + adox r11,r12 + +DB 0c4h,062h,0e3h,0f6h,0a5h,040h,000h,000h,000h + mov rax,rdx + mov rdx,r8 + adcx r11,rbx + adox r12,r13 + + mulx rdx,rbx,QWORD PTR[((32+8))+rsp] + mov rdx,rax + mov QWORD PTR[((64+48+8))+rcx*8+rsp],rax + + mulx r13,rax,QWORD PTR[80+rbp] + adcx r12,rax + adox r13,r14 + + mulx r14,rax,QWORD PTR[96+rbp] + adcx r13,rax + adox r14,r15 + + mulx r15,rax,QWORD PTR[112+rbp] + mov rdx,rbx + adcx r14,rax + adox r15,rsi + adcx r15,rsi + +DB 067h,067h,067h + inc rcx + jnz $L$sqrx8x_reduce + + mov rax,rsi + cmp rbp,QWORD PTR[((0+8))+rsp] + jae $L$sqrx8x_no_tail + + mov rdx,QWORD PTR[((48+8))+rsp] + add r8,QWORD PTR[rdi] + lea rbp,QWORD PTR[128+rbp] + mov rcx,-8 + adcx r9,QWORD PTR[8+rdi] + adcx r10,QWORD PTR[16+rdi] + adc r11,QWORD PTR[24+rdi] + adc r12,QWORD PTR[32+rdi] + adc r13,QWORD PTR[40+rdi] + adc r14,QWORD PTR[48+rdi] + adc r15,QWORD PTR[56+rdi] + lea rdi,QWORD PTR[64+rdi] + sbb rax,rax + + xor rsi,rsi + mov QWORD PTR[((16+8))+rsp],rax + jmp $L$sqrx8x_tail + +ALIGN 32 +$L$sqrx8x_tail:: + mov rbx,r8 + mulx r8,rax,QWORD PTR[rbp] + adcx rbx,rax + adox r8,r9 + + mulx r9,rax,QWORD PTR[16+rbp] + adcx r8,rax + adox r9,r10 + + mulx r10,rax,QWORD PTR[32+rbp] + adcx r9,rax + adox r10,r11 + + mulx r11,rax,QWORD PTR[48+rbp] + adcx r10,rax + adox r11,r12 + +DB 0c4h,062h,0fbh,0f6h,0a5h,040h,000h,000h,000h + adcx r11,rax + adox r12,r13 + + mulx r13,rax,QWORD PTR[80+rbp] + adcx r12,rax + adox r13,r14 + + mulx r14,rax,QWORD PTR[96+rbp] + adcx r13,rax + adox r14,r15 + + mulx r15,rax,QWORD PTR[112+rbp] + mov rdx,QWORD PTR[((72+48+8))+rcx*8+rsp] + adcx r14,rax + adox r15,rsi + mov QWORD PTR[rcx*8+rdi],rbx + mov rbx,r8 + adcx r15,rsi + + inc rcx + jnz $L$sqrx8x_tail + + cmp rbp,QWORD PTR[((0+8))+rsp] + jae $L$sqrx8x_tail_done + + sub rsi,QWORD PTR[((16+8))+rsp] + mov rdx,QWORD PTR[((48+8))+rsp] + lea rbp,QWORD PTR[128+rbp] + adc r8,QWORD PTR[rdi] + adc r9,QWORD PTR[8+rdi] + adc r10,QWORD PTR[16+rdi] + adc r11,QWORD PTR[24+rdi] + adc r12,QWORD PTR[32+rdi] + adc r13,QWORD PTR[40+rdi] + adc r14,QWORD PTR[48+rdi] + adc r15,QWORD PTR[56+rdi] + lea rdi,QWORD PTR[64+rdi] + sbb rax,rax + sub rcx,8 + + xor rsi,rsi + mov QWORD PTR[((16+8))+rsp],rax + jmp $L$sqrx8x_tail + +ALIGN 32 +$L$sqrx8x_tail_done:: + add r8,QWORD PTR[((24+8))+rsp] + mov rax,rsi + + sub rsi,QWORD PTR[((16+8))+rsp] +$L$sqrx8x_no_tail:: + adc r8,QWORD PTR[rdi] +DB 102,72,15,126,217 + adc r9,QWORD PTR[8+rdi] + mov rsi,QWORD PTR[112+rbp] +DB 102,72,15,126,213 + adc r10,QWORD PTR[16+rdi] + adc r11,QWORD PTR[24+rdi] + adc r12,QWORD PTR[32+rdi] + adc r13,QWORD PTR[40+rdi] + adc r14,QWORD PTR[48+rdi] + adc r15,QWORD PTR[56+rdi] + adc rax,rax + + mov rbx,QWORD PTR[((32+8))+rsp] + mov rdx,QWORD PTR[64+rcx*1+rdi] + + mov QWORD PTR[rdi],r8 + lea r8,QWORD PTR[64+rdi] + mov QWORD PTR[8+rdi],r9 + mov QWORD PTR[16+rdi],r10 + mov QWORD PTR[24+rdi],r11 + mov QWORD PTR[32+rdi],r12 + mov QWORD PTR[40+rdi],r13 + mov QWORD PTR[48+rdi],r14 + mov QWORD PTR[56+rdi],r15 + + lea rdi,QWORD PTR[64+rcx*1+rdi] + cmp r8,QWORD PTR[((8+8))+rsp] + jb $L$sqrx8x_reduction_loop + xor rbx,rbx + sub rsi,r15 + adc rbx,rbx + mov r10,rcx +DB 067h + or rax,rbx +DB 067h + mov r9,rcx + xor rax,1 + sar rcx,3+2 + + lea rbp,QWORD PTR[rax*8+rbp] +DB 102,72,15,126,202 +DB 102,72,15,126,206 + jmp $L$sqrx4x_sub + +ALIGN 32 +$L$sqrx4x_sub:: +DB 066h + mov r12,QWORD PTR[rdi] + mov r13,QWORD PTR[8+rdi] + sbb r12,QWORD PTR[rbp] + mov r14,QWORD PTR[16+rdi] + sbb r13,QWORD PTR[16+rbp] + mov r15,QWORD PTR[24+rdi] + lea rdi,QWORD PTR[32+rdi] + sbb r14,QWORD PTR[32+rbp] + mov QWORD PTR[rdx],r12 + sbb r15,QWORD PTR[48+rbp] + lea rbp,QWORD PTR[64+rbp] + mov QWORD PTR[8+rdx],r13 + mov QWORD PTR[16+rdx],r14 + mov QWORD PTR[24+rdx],r15 + lea rdx,QWORD PTR[32+rdx] + + inc rcx + jnz $L$sqrx4x_sub + neg r9 + + DB 0F3h,0C3h ;repret +bn_sqrx8x_internal ENDP +PUBLIC bn_get_bits5 + +ALIGN 16 +bn_get_bits5 PROC PUBLIC + mov r10,rcx + mov ecx,edx + shr edx,3 + movzx eax,WORD PTR[rdx*1+r10] + and ecx,7 + shr eax,cl + and eax,31 + DB 0F3h,0C3h ;repret +bn_get_bits5 ENDP + +PUBLIC bn_scatter5 + +ALIGN 16 +bn_scatter5 PROC PUBLIC + cmp edx,0 + jz $L$scatter_epilogue + lea r8,QWORD PTR[r9*8+r8] +$L$scatter:: + mov rax,QWORD PTR[rcx] + lea rcx,QWORD PTR[8+rcx] + mov QWORD PTR[r8],rax + lea r8,QWORD PTR[256+r8] + sub edx,1 + jnz $L$scatter +$L$scatter_epilogue:: + DB 0F3h,0C3h ;repret +bn_scatter5 ENDP + +PUBLIC bn_gather5 + +ALIGN 16 +bn_gather5 PROC PUBLIC +$L$SEH_begin_bn_gather5:: + +DB 048h,083h,0ech,028h +DB 00fh,029h,034h,024h +DB 00fh,029h,07ch,024h,010h + mov r11d,r9d + shr r9d,3 + and r11,7 + not r9d + lea rax,QWORD PTR[$L$magic_masks] + and r9d,3 + lea r8,QWORD PTR[128+r11*8+r8] + movq xmm4,QWORD PTR[r9*8+rax] + movq xmm5,QWORD PTR[8+r9*8+rax] + movq xmm6,QWORD PTR[16+r9*8+rax] + movq xmm7,QWORD PTR[24+r9*8+rax] + jmp $L$gather +ALIGN 16 +$L$gather:: + movq xmm0,QWORD PTR[((-128))+r8] + movq xmm1,QWORD PTR[((-64))+r8] + pand xmm0,xmm4 + movq xmm2,QWORD PTR[r8] + pand xmm1,xmm5 + movq xmm3,QWORD PTR[64+r8] + pand xmm2,xmm6 + por xmm0,xmm1 + pand xmm3,xmm7 +DB 067h,067h + por xmm0,xmm2 + lea r8,QWORD PTR[256+r8] + por xmm0,xmm3 + + movq QWORD PTR[rcx],xmm0 + lea rcx,QWORD PTR[8+rcx] + sub edx,1 + jnz $L$gather + movaps xmm6,XMMWORD PTR[rsp] + movaps xmm7,XMMWORD PTR[16+rsp] + lea rsp,QWORD PTR[40+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_bn_gather5:: +bn_gather5 ENDP +ALIGN 64 +$L$magic_masks:: + DD 0,0,0,0,0,0,-1,-1 + DD 0,0,0,0,0,0,0,0 +DB 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105 +DB 112,108,105,99,97,116,105,111,110,32,119,105,116,104,32,115 +DB 99,97,116,116,101,114,47,103,97,116,104,101,114,32,102,111 +DB 114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79 +DB 71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111 +DB 112,101,110,115,115,108,46,111,114,103,62,0 +EXTERN __imp_RtlVirtualUnwind:NEAR + +ALIGN 16 +mul_handler PROC PRIVATE + push rsi + push rdi + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + pushfq + sub rsp,64 + + mov rax,QWORD PTR[120+r8] + mov rbx,QWORD PTR[248+r8] + + mov rsi,QWORD PTR[8+r9] + mov r11,QWORD PTR[56+r9] + + mov r10d,DWORD PTR[r11] + lea r10,QWORD PTR[r10*1+rsi] + cmp rbx,r10 + jb $L$common_seh_tail + + mov rax,QWORD PTR[152+r8] + + mov r10d,DWORD PTR[4+r11] + lea r10,QWORD PTR[r10*1+rsi] + cmp rbx,r10 + jae $L$common_seh_tail + + lea r10,QWORD PTR[$L$mul_epilogue] + cmp rbx,r10 + jb $L$body_40 mov r10,QWORD PTR[192+r8] mov rax,QWORD PTR[8+r10*8+rax] + jmp $L$body_proceed + +$L$body_40:: + mov rax,QWORD PTR[40+rax] +$L$body_proceed:: - movaps xmm0,XMMWORD PTR[rax] - movaps xmm1,XMMWORD PTR[16+rax] - lea rax,QWORD PTR[88+rax] + movaps xmm0,XMMWORD PTR[((-88))+rax] + movaps xmm1,XMMWORD PTR[((-72))+rax] mov rbx,QWORD PTR[((-8))+rax] mov rbp,QWORD PTR[((-16))+rax] @@ -917,7 +3161,6 @@ $L$common_seh_tail:: mov ecx,154 DD 0a548f3fch - mov rsi,r9 xor rcx,rcx mov rdx,QWORD PTR[8+rsi] @@ -957,6 +3200,20 @@ ALIGN 4 DD imagerel $L$SEH_end_bn_mul4x_mont_gather5 DD imagerel $L$SEH_info_bn_mul4x_mont_gather5 + DD imagerel $L$SEH_begin_bn_power5 + DD imagerel $L$SEH_end_bn_power5 + DD imagerel $L$SEH_info_bn_power5 + + DD imagerel $L$SEH_begin_bn_from_mont8x + DD imagerel $L$SEH_end_bn_from_mont8x + DD imagerel $L$SEH_info_bn_from_mont8x + DD imagerel $L$SEH_begin_bn_mulx4x_mont_gather5 + DD imagerel $L$SEH_end_bn_mulx4x_mont_gather5 + DD imagerel $L$SEH_info_bn_mulx4x_mont_gather5 + + DD imagerel $L$SEH_begin_bn_powerx5 + DD imagerel $L$SEH_end_bn_powerx5 + DD imagerel $L$SEH_info_bn_powerx5 DD imagerel $L$SEH_begin_bn_gather5 DD imagerel $L$SEH_end_bn_gather5 DD imagerel $L$SEH_info_bn_gather5 @@ -967,23 +3224,38 @@ ALIGN 8 $L$SEH_info_bn_mul_mont_gather5:: DB 9,0,0,0 DD imagerel mul_handler - DD imagerel $L$mul_alloca,imagerel $L$mul_body,imagerel $L$mul_epilogue - + DD imagerel $L$mul_body,imagerel $L$mul_epilogue ALIGN 8 $L$SEH_info_bn_mul4x_mont_gather5:: DB 9,0,0,0 DD imagerel mul_handler - DD imagerel $L$mul4x_alloca,imagerel $L$mul4x_body,imagerel $L$mul4x_epilogue - + DD imagerel $L$mul4x_body,imagerel $L$mul4x_epilogue +ALIGN 8 +$L$SEH_info_bn_power5:: +DB 9,0,0,0 + DD imagerel mul_handler + DD imagerel $L$power5_body,imagerel $L$power5_epilogue +ALIGN 8 +$L$SEH_info_bn_from_mont8x:: +DB 9,0,0,0 + DD imagerel mul_handler + DD imagerel $L$from_body,imagerel $L$from_epilogue +ALIGN 8 +$L$SEH_info_bn_mulx4x_mont_gather5:: +DB 9,0,0,0 + DD imagerel mul_handler + DD imagerel $L$mulx4x_body,imagerel $L$mulx4x_epilogue +ALIGN 8 +$L$SEH_info_bn_powerx5:: +DB 9,0,0,0 + DD imagerel mul_handler + DD imagerel $L$powerx5_body,imagerel $L$powerx5_epilogue ALIGN 8 $L$SEH_info_bn_gather5:: DB 001h,00dh,005h,000h DB 00dh,078h,001h,000h - DB 008h,068h,000h,000h - DB 004h,042h,000h,000h - ALIGN 8 .xdata ENDS diff --git a/deps/openssl/asm/x64-win32-masm/camellia/cmll-x86_64.asm b/deps/openssl/asm/x64-win32-masm/camellia/cmll-x86_64.asm index 0ea789b6aeaf1b..5e6d45157645dc 100644 --- a/deps/openssl/asm/x64-win32-masm/camellia/cmll-x86_64.asm +++ b/deps/openssl/asm/x64-win32-masm/camellia/cmll-x86_64.asm @@ -1,5 +1,5 @@ OPTION DOTNAME -.text$ SEGMENT ALIGN(64) 'CODE' +.text$ SEGMENT ALIGN(256) 'CODE' PUBLIC Camellia_EncryptBlock @@ -281,7 +281,6 @@ $L$edone:: mov r11d,edx DB 0f3h,0c3h - _x86_64_Camellia_encrypt ENDP @@ -565,7 +564,6 @@ $L$ddone:: mov r11d,ebx DB 0f3h,0c3h - _x86_64_Camellia_decrypt ENDP PUBLIC Camellia_Ekeygen @@ -587,7 +585,7 @@ $L$SEH_begin_Camellia_Ekeygen:: push r15 $L$key_prologue:: - mov r15,rdi + mov r15d,edi mov r13,rdx mov r8d,DWORD PTR[rsi] @@ -1777,7 +1775,6 @@ $L$cbc_enc_pushf:: mov rsi,r12 lea rdi,QWORD PTR[((8+24))+rsp] DD 09066A4F3h - popfq $L$cbc_enc_popf:: @@ -1786,7 +1783,6 @@ $L$cbc_enc_popf:: mov QWORD PTR[8+rsp],rax jmp $L$cbc_eloop - ALIGN 16 $L$CBC_DECRYPT:: xchg r15,r14 @@ -1869,7 +1865,6 @@ $L$cbc_dec_pushf:: lea rsi,QWORD PTR[((8+24))+rsp] lea rdi,QWORD PTR[r13] DD 09066A4F3h - popfq $L$cbc_dec_popf:: @@ -2034,7 +2029,6 @@ $L$common_seh_exit:: mov ecx,154 DD 0a548f3fch - mov rsi,r9 xor rcx,rcx mov rdx,QWORD PTR[8+rsi] @@ -2089,17 +2083,14 @@ $L$SEH_info_Camellia_EncryptBlock_Rounds:: DB 9,0,0,0 DD imagerel common_se_handler DD imagerel $L$enc_prologue,imagerel $L$enc_epilogue - $L$SEH_info_Camellia_DecryptBlock_Rounds:: DB 9,0,0,0 DD imagerel common_se_handler DD imagerel $L$dec_prologue,imagerel $L$dec_epilogue - $L$SEH_info_Camellia_Ekeygen:: DB 9,0,0,0 DD imagerel common_se_handler DD imagerel $L$key_prologue,imagerel $L$key_epilogue - $L$SEH_info_Camellia_cbc_encrypt:: DB 9,0,0,0 DD imagerel cbc_se_handler diff --git a/deps/openssl/asm/x64-win32-masm/ec/ecp_nistz256-x86_64.asm b/deps/openssl/asm/x64-win32-masm/ec/ecp_nistz256-x86_64.asm new file mode 100644 index 00000000000000..3fa69816b50e7b --- /dev/null +++ b/deps/openssl/asm/x64-win32-masm/ec/ecp_nistz256-x86_64.asm @@ -0,0 +1,3789 @@ +OPTION DOTNAME +.text$ SEGMENT ALIGN(256) 'CODE' +EXTERN OPENSSL_ia32cap_P:NEAR + + +ALIGN 64 +$L$poly:: + DQ 0ffffffffffffffffh,000000000ffffffffh,00000000000000000h,0ffffffff00000001h + + +$L$RR:: + DQ 00000000000000003h,0fffffffbffffffffh,0fffffffffffffffeh,000000004fffffffdh + +$L$One:: + DD 1,1,1,1,1,1,1,1 +$L$Two:: + DD 2,2,2,2,2,2,2,2 +$L$Three:: + DD 3,3,3,3,3,3,3,3 +$L$ONE_mont:: + DQ 00000000000000001h,0ffffffff00000000h,0ffffffffffffffffh,000000000fffffffeh + +PUBLIC ecp_nistz256_mul_by_2 + +ALIGN 64 +ecp_nistz256_mul_by_2 PROC PUBLIC + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_ecp_nistz256_mul_by_2:: + mov rdi,rcx + mov rsi,rdx + + + push r12 + push r13 + + mov r8,QWORD PTR[rsi] + mov r9,QWORD PTR[8+rsi] + add r8,r8 + mov r10,QWORD PTR[16+rsi] + adc r9,r9 + mov r11,QWORD PTR[24+rsi] + lea rsi,QWORD PTR[$L$poly] + mov rax,r8 + adc r10,r10 + adc r11,r11 + mov rdx,r9 + sbb r13,r13 + + sub r8,QWORD PTR[rsi] + mov rcx,r10 + sbb r9,QWORD PTR[8+rsi] + sbb r10,QWORD PTR[16+rsi] + mov r12,r11 + sbb r11,QWORD PTR[24+rsi] + test r13,r13 + + cmovz r8,rax + cmovz r9,rdx + mov QWORD PTR[rdi],r8 + cmovz r10,rcx + mov QWORD PTR[8+rdi],r9 + cmovz r11,r12 + mov QWORD PTR[16+rdi],r10 + mov QWORD PTR[24+rdi],r11 + + pop r13 + pop r12 + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_ecp_nistz256_mul_by_2:: +ecp_nistz256_mul_by_2 ENDP + + + +PUBLIC ecp_nistz256_div_by_2 + +ALIGN 32 +ecp_nistz256_div_by_2 PROC PUBLIC + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_ecp_nistz256_div_by_2:: + mov rdi,rcx + mov rsi,rdx + + + push r12 + push r13 + + mov r8,QWORD PTR[rsi] + mov r9,QWORD PTR[8+rsi] + mov r10,QWORD PTR[16+rsi] + mov rax,r8 + mov r11,QWORD PTR[24+rsi] + lea rsi,QWORD PTR[$L$poly] + + mov rdx,r9 + xor r13,r13 + add r8,QWORD PTR[rsi] + mov rcx,r10 + adc r9,QWORD PTR[8+rsi] + adc r10,QWORD PTR[16+rsi] + mov r12,r11 + adc r11,QWORD PTR[24+rsi] + adc r13,0 + xor rsi,rsi + test rax,1 + + cmovz r8,rax + cmovz r9,rdx + cmovz r10,rcx + cmovz r11,r12 + cmovz r13,rsi + + mov rax,r9 + shr r8,1 + shl rax,63 + mov rdx,r10 + shr r9,1 + or r8,rax + shl rdx,63 + mov rcx,r11 + shr r10,1 + or r9,rdx + shl rcx,63 + shr r11,1 + shl r13,63 + or r10,rcx + or r11,r13 + + mov QWORD PTR[rdi],r8 + mov QWORD PTR[8+rdi],r9 + mov QWORD PTR[16+rdi],r10 + mov QWORD PTR[24+rdi],r11 + + pop r13 + pop r12 + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_ecp_nistz256_div_by_2:: +ecp_nistz256_div_by_2 ENDP + + + +PUBLIC ecp_nistz256_mul_by_3 + +ALIGN 32 +ecp_nistz256_mul_by_3 PROC PUBLIC + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_ecp_nistz256_mul_by_3:: + mov rdi,rcx + mov rsi,rdx + + + push r12 + push r13 + + mov r8,QWORD PTR[rsi] + xor r13,r13 + mov r9,QWORD PTR[8+rsi] + add r8,r8 + mov r10,QWORD PTR[16+rsi] + adc r9,r9 + mov r11,QWORD PTR[24+rsi] + mov rax,r8 + adc r10,r10 + adc r11,r11 + mov rdx,r9 + adc r13,0 + + sub r8,-1 + mov rcx,r10 + sbb r9,QWORD PTR[(($L$poly+8))] + sbb r10,0 + mov r12,r11 + sbb r11,QWORD PTR[(($L$poly+24))] + test r13,r13 + + cmovz r8,rax + cmovz r9,rdx + cmovz r10,rcx + cmovz r11,r12 + + xor r13,r13 + add r8,QWORD PTR[rsi] + adc r9,QWORD PTR[8+rsi] + mov rax,r8 + adc r10,QWORD PTR[16+rsi] + adc r11,QWORD PTR[24+rsi] + mov rdx,r9 + adc r13,0 + + sub r8,-1 + mov rcx,r10 + sbb r9,QWORD PTR[(($L$poly+8))] + sbb r10,0 + mov r12,r11 + sbb r11,QWORD PTR[(($L$poly+24))] + test r13,r13 + + cmovz r8,rax + cmovz r9,rdx + mov QWORD PTR[rdi],r8 + cmovz r10,rcx + mov QWORD PTR[8+rdi],r9 + cmovz r11,r12 + mov QWORD PTR[16+rdi],r10 + mov QWORD PTR[24+rdi],r11 + + pop r13 + pop r12 + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_ecp_nistz256_mul_by_3:: +ecp_nistz256_mul_by_3 ENDP + + + +PUBLIC ecp_nistz256_add + +ALIGN 32 +ecp_nistz256_add PROC PUBLIC + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_ecp_nistz256_add:: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + + + push r12 + push r13 + + mov r8,QWORD PTR[rsi] + xor r13,r13 + mov r9,QWORD PTR[8+rsi] + mov r10,QWORD PTR[16+rsi] + mov r11,QWORD PTR[24+rsi] + lea rsi,QWORD PTR[$L$poly] + + add r8,QWORD PTR[rdx] + adc r9,QWORD PTR[8+rdx] + mov rax,r8 + adc r10,QWORD PTR[16+rdx] + adc r11,QWORD PTR[24+rdx] + mov rdx,r9 + adc r13,0 + + sub r8,QWORD PTR[rsi] + mov rcx,r10 + sbb r9,QWORD PTR[8+rsi] + sbb r10,QWORD PTR[16+rsi] + mov r12,r11 + sbb r11,QWORD PTR[24+rsi] + test r13,r13 + + cmovz r8,rax + cmovz r9,rdx + mov QWORD PTR[rdi],r8 + cmovz r10,rcx + mov QWORD PTR[8+rdi],r9 + cmovz r11,r12 + mov QWORD PTR[16+rdi],r10 + mov QWORD PTR[24+rdi],r11 + + pop r13 + pop r12 + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_ecp_nistz256_add:: +ecp_nistz256_add ENDP + + + +PUBLIC ecp_nistz256_sub + +ALIGN 32 +ecp_nistz256_sub PROC PUBLIC + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_ecp_nistz256_sub:: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + + + push r12 + push r13 + + mov r8,QWORD PTR[rsi] + xor r13,r13 + mov r9,QWORD PTR[8+rsi] + mov r10,QWORD PTR[16+rsi] + mov r11,QWORD PTR[24+rsi] + lea rsi,QWORD PTR[$L$poly] + + sub r8,QWORD PTR[rdx] + sbb r9,QWORD PTR[8+rdx] + mov rax,r8 + sbb r10,QWORD PTR[16+rdx] + sbb r11,QWORD PTR[24+rdx] + mov rdx,r9 + sbb r13,0 + + add r8,QWORD PTR[rsi] + mov rcx,r10 + adc r9,QWORD PTR[8+rsi] + adc r10,QWORD PTR[16+rsi] + mov r12,r11 + adc r11,QWORD PTR[24+rsi] + test r13,r13 + + cmovz r8,rax + cmovz r9,rdx + mov QWORD PTR[rdi],r8 + cmovz r10,rcx + mov QWORD PTR[8+rdi],r9 + cmovz r11,r12 + mov QWORD PTR[16+rdi],r10 + mov QWORD PTR[24+rdi],r11 + + pop r13 + pop r12 + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_ecp_nistz256_sub:: +ecp_nistz256_sub ENDP + + + +PUBLIC ecp_nistz256_neg + +ALIGN 32 +ecp_nistz256_neg PROC PUBLIC + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_ecp_nistz256_neg:: + mov rdi,rcx + mov rsi,rdx + + + push r12 + push r13 + + xor r8,r8 + xor r9,r9 + xor r10,r10 + xor r11,r11 + xor r13,r13 + + sub r8,QWORD PTR[rsi] + sbb r9,QWORD PTR[8+rsi] + sbb r10,QWORD PTR[16+rsi] + mov rax,r8 + sbb r11,QWORD PTR[24+rsi] + lea rsi,QWORD PTR[$L$poly] + mov rdx,r9 + sbb r13,0 + + add r8,QWORD PTR[rsi] + mov rcx,r10 + adc r9,QWORD PTR[8+rsi] + adc r10,QWORD PTR[16+rsi] + mov r12,r11 + adc r11,QWORD PTR[24+rsi] + test r13,r13 + + cmovz r8,rax + cmovz r9,rdx + mov QWORD PTR[rdi],r8 + cmovz r10,rcx + mov QWORD PTR[8+rdi],r9 + cmovz r11,r12 + mov QWORD PTR[16+rdi],r10 + mov QWORD PTR[24+rdi],r11 + + pop r13 + pop r12 + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_ecp_nistz256_neg:: +ecp_nistz256_neg ENDP + + + + +PUBLIC ecp_nistz256_to_mont + +ALIGN 32 +ecp_nistz256_to_mont PROC PUBLIC + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_ecp_nistz256_to_mont:: + mov rdi,rcx + mov rsi,rdx + + + mov ecx,080100h + and ecx,DWORD PTR[((OPENSSL_ia32cap_P+8))] + lea rdx,QWORD PTR[$L$RR] + jmp $L$mul_mont +$L$SEH_end_ecp_nistz256_to_mont:: +ecp_nistz256_to_mont ENDP + + + + + + + +PUBLIC ecp_nistz256_mul_mont + +ALIGN 32 +ecp_nistz256_mul_mont PROC PUBLIC + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_ecp_nistz256_mul_mont:: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + + + mov ecx,080100h + and ecx,DWORD PTR[((OPENSSL_ia32cap_P+8))] +$L$mul_mont:: + push rbp + push rbx + push r12 + push r13 + push r14 + push r15 + cmp ecx,080100h + je $L$mul_montx + mov rbx,rdx + mov rax,QWORD PTR[rdx] + mov r9,QWORD PTR[rsi] + mov r10,QWORD PTR[8+rsi] + mov r11,QWORD PTR[16+rsi] + mov r12,QWORD PTR[24+rsi] + + call __ecp_nistz256_mul_montq + jmp $L$mul_mont_done + +ALIGN 32 +$L$mul_montx:: + mov rbx,rdx + mov rdx,QWORD PTR[rdx] + mov r9,QWORD PTR[rsi] + mov r10,QWORD PTR[8+rsi] + mov r11,QWORD PTR[16+rsi] + mov r12,QWORD PTR[24+rsi] + lea rsi,QWORD PTR[((-128))+rsi] + + call __ecp_nistz256_mul_montx +$L$mul_mont_done:: + pop r15 + pop r14 + pop r13 + pop r12 + pop rbx + pop rbp + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_ecp_nistz256_mul_mont:: +ecp_nistz256_mul_mont ENDP + + +ALIGN 32 +__ecp_nistz256_mul_montq PROC PRIVATE + + + mov rbp,rax + mul r9 + mov r14,QWORD PTR[(($L$poly+8))] + mov r8,rax + mov rax,rbp + mov r9,rdx + + mul r10 + mov r15,QWORD PTR[(($L$poly+24))] + add r9,rax + mov rax,rbp + adc rdx,0 + mov r10,rdx + + mul r11 + add r10,rax + mov rax,rbp + adc rdx,0 + mov r11,rdx + + mul r12 + add r11,rax + mov rax,r8 + adc rdx,0 + xor r13,r13 + mov r12,rdx + + + + + + + + + + + mov rbp,r8 + shl r8,32 + mul r15 + shr rbp,32 + add r9,r8 + adc r10,rbp + adc r11,rax + mov rax,QWORD PTR[8+rbx] + adc r12,rdx + adc r13,0 + xor r8,r8 + + + + mov rbp,rax + mul QWORD PTR[rsi] + add r9,rax + mov rax,rbp + adc rdx,0 + mov rcx,rdx + + mul QWORD PTR[8+rsi] + add r10,rcx + adc rdx,0 + add r10,rax + mov rax,rbp + adc rdx,0 + mov rcx,rdx + + mul QWORD PTR[16+rsi] + add r11,rcx + adc rdx,0 + add r11,rax + mov rax,rbp + adc rdx,0 + mov rcx,rdx + + mul QWORD PTR[24+rsi] + add r12,rcx + adc rdx,0 + add r12,rax + mov rax,r9 + adc r13,rdx + adc r8,0 + + + + mov rbp,r9 + shl r9,32 + mul r15 + shr rbp,32 + add r10,r9 + adc r11,rbp + adc r12,rax + mov rax,QWORD PTR[16+rbx] + adc r13,rdx + adc r8,0 + xor r9,r9 + + + + mov rbp,rax + mul QWORD PTR[rsi] + add r10,rax + mov rax,rbp + adc rdx,0 + mov rcx,rdx + + mul QWORD PTR[8+rsi] + add r11,rcx + adc rdx,0 + add r11,rax + mov rax,rbp + adc rdx,0 + mov rcx,rdx + + mul QWORD PTR[16+rsi] + add r12,rcx + adc rdx,0 + add r12,rax + mov rax,rbp + adc rdx,0 + mov rcx,rdx + + mul QWORD PTR[24+rsi] + add r13,rcx + adc rdx,0 + add r13,rax + mov rax,r10 + adc r8,rdx + adc r9,0 + + + + mov rbp,r10 + shl r10,32 + mul r15 + shr rbp,32 + add r11,r10 + adc r12,rbp + adc r13,rax + mov rax,QWORD PTR[24+rbx] + adc r8,rdx + adc r9,0 + xor r10,r10 + + + + mov rbp,rax + mul QWORD PTR[rsi] + add r11,rax + mov rax,rbp + adc rdx,0 + mov rcx,rdx + + mul QWORD PTR[8+rsi] + add r12,rcx + adc rdx,0 + add r12,rax + mov rax,rbp + adc rdx,0 + mov rcx,rdx + + mul QWORD PTR[16+rsi] + add r13,rcx + adc rdx,0 + add r13,rax + mov rax,rbp + adc rdx,0 + mov rcx,rdx + + mul QWORD PTR[24+rsi] + add r8,rcx + adc rdx,0 + add r8,rax + mov rax,r11 + adc r9,rdx + adc r10,0 + + + + mov rbp,r11 + shl r11,32 + mul r15 + shr rbp,32 + add r12,r11 + adc r13,rbp + mov rcx,r12 + adc r8,rax + adc r9,rdx + mov rbp,r13 + adc r10,0 + + + + sub r12,-1 + mov rbx,r8 + sbb r13,r14 + sbb r8,0 + mov rdx,r9 + sbb r9,r15 + sbb r10,0 + + cmovc r12,rcx + cmovc r13,rbp + mov QWORD PTR[rdi],r12 + cmovc r8,rbx + mov QWORD PTR[8+rdi],r13 + cmovc r9,rdx + mov QWORD PTR[16+rdi],r8 + mov QWORD PTR[24+rdi],r9 + + DB 0F3h,0C3h ;repret +__ecp_nistz256_mul_montq ENDP + + + + + + + + +PUBLIC ecp_nistz256_sqr_mont + +ALIGN 32 +ecp_nistz256_sqr_mont PROC PUBLIC + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_ecp_nistz256_sqr_mont:: + mov rdi,rcx + mov rsi,rdx + + + mov ecx,080100h + and ecx,DWORD PTR[((OPENSSL_ia32cap_P+8))] + push rbp + push rbx + push r12 + push r13 + push r14 + push r15 + cmp ecx,080100h + je $L$sqr_montx + mov rax,QWORD PTR[rsi] + mov r14,QWORD PTR[8+rsi] + mov r15,QWORD PTR[16+rsi] + mov r8,QWORD PTR[24+rsi] + + call __ecp_nistz256_sqr_montq + jmp $L$sqr_mont_done + +ALIGN 32 +$L$sqr_montx:: + mov rdx,QWORD PTR[rsi] + mov r14,QWORD PTR[8+rsi] + mov r15,QWORD PTR[16+rsi] + mov r8,QWORD PTR[24+rsi] + lea rsi,QWORD PTR[((-128))+rsi] + + call __ecp_nistz256_sqr_montx +$L$sqr_mont_done:: + pop r15 + pop r14 + pop r13 + pop r12 + pop rbx + pop rbp + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_ecp_nistz256_sqr_mont:: +ecp_nistz256_sqr_mont ENDP + + +ALIGN 32 +__ecp_nistz256_sqr_montq PROC PRIVATE + mov r13,rax + mul r14 + mov r9,rax + mov rax,r15 + mov r10,rdx + + mul r13 + add r10,rax + mov rax,r8 + adc rdx,0 + mov r11,rdx + + mul r13 + add r11,rax + mov rax,r15 + adc rdx,0 + mov r12,rdx + + + mul r14 + add r11,rax + mov rax,r8 + adc rdx,0 + mov rbp,rdx + + mul r14 + add r12,rax + mov rax,r8 + adc rdx,0 + add r12,rbp + mov r13,rdx + adc r13,0 + + + mul r15 + xor r15,r15 + add r13,rax + mov rax,QWORD PTR[rsi] + mov r14,rdx + adc r14,0 + + add r9,r9 + adc r10,r10 + adc r11,r11 + adc r12,r12 + adc r13,r13 + adc r14,r14 + adc r15,0 + + mul rax + mov r8,rax + mov rax,QWORD PTR[8+rsi] + mov rcx,rdx + + mul rax + add r9,rcx + adc r10,rax + mov rax,QWORD PTR[16+rsi] + adc rdx,0 + mov rcx,rdx + + mul rax + add r11,rcx + adc r12,rax + mov rax,QWORD PTR[24+rsi] + adc rdx,0 + mov rcx,rdx + + mul rax + add r13,rcx + adc r14,rax + mov rax,r8 + adc r15,rdx + + mov rsi,QWORD PTR[(($L$poly+8))] + mov rbp,QWORD PTR[(($L$poly+24))] + + + + + mov rcx,r8 + shl r8,32 + mul rbp + shr rcx,32 + add r9,r8 + adc r10,rcx + adc r11,rax + mov rax,r9 + adc rdx,0 + + + + mov rcx,r9 + shl r9,32 + mov r8,rdx + mul rbp + shr rcx,32 + add r10,r9 + adc r11,rcx + adc r8,rax + mov rax,r10 + adc rdx,0 + + + + mov rcx,r10 + shl r10,32 + mov r9,rdx + mul rbp + shr rcx,32 + add r11,r10 + adc r8,rcx + adc r9,rax + mov rax,r11 + adc rdx,0 + + + + mov rcx,r11 + shl r11,32 + mov r10,rdx + mul rbp + shr rcx,32 + add r8,r11 + adc r9,rcx + adc r10,rax + adc rdx,0 + xor r11,r11 + + + + add r12,r8 + adc r13,r9 + mov r8,r12 + adc r14,r10 + adc r15,rdx + mov r9,r13 + adc r11,0 + + sub r12,-1 + mov r10,r14 + sbb r13,rsi + sbb r14,0 + mov rcx,r15 + sbb r15,rbp + sbb r11,0 + + cmovc r12,r8 + cmovc r13,r9 + mov QWORD PTR[rdi],r12 + cmovc r14,r10 + mov QWORD PTR[8+rdi],r13 + cmovc r15,rcx + mov QWORD PTR[16+rdi],r14 + mov QWORD PTR[24+rdi],r15 + + DB 0F3h,0C3h ;repret +__ecp_nistz256_sqr_montq ENDP + +ALIGN 32 +__ecp_nistz256_mul_montx PROC PRIVATE + + + mulx r9,r8,r9 + mulx r10,rcx,r10 + mov r14,32 + xor r13,r13 + mulx r11,rbp,r11 + mov r15,QWORD PTR[(($L$poly+24))] + adc r9,rcx + mulx r12,rcx,r12 + mov rdx,r8 + adc r10,rbp + shlx rbp,r8,r14 + adc r11,rcx + shrx rcx,r8,r14 + adc r12,0 + + + + add r9,rbp + adc r10,rcx + + mulx rbp,rcx,r15 + mov rdx,QWORD PTR[8+rbx] + adc r11,rcx + adc r12,rbp + adc r13,0 + xor r8,r8 + + + + mulx rbp,rcx,QWORD PTR[((0+128))+rsi] + adcx r9,rcx + adox r10,rbp + + mulx rbp,rcx,QWORD PTR[((8+128))+rsi] + adcx r10,rcx + adox r11,rbp + + mulx rbp,rcx,QWORD PTR[((16+128))+rsi] + adcx r11,rcx + adox r12,rbp + + mulx rbp,rcx,QWORD PTR[((24+128))+rsi] + mov rdx,r9 + adcx r12,rcx + shlx rcx,r9,r14 + adox r13,rbp + shrx rbp,r9,r14 + + adcx r13,r8 + adox r8,r8 + adc r8,0 + + + + add r10,rcx + adc r11,rbp + + mulx rbp,rcx,r15 + mov rdx,QWORD PTR[16+rbx] + adc r12,rcx + adc r13,rbp + adc r8,0 + xor r9,r9 + + + + mulx rbp,rcx,QWORD PTR[((0+128))+rsi] + adcx r10,rcx + adox r11,rbp + + mulx rbp,rcx,QWORD PTR[((8+128))+rsi] + adcx r11,rcx + adox r12,rbp + + mulx rbp,rcx,QWORD PTR[((16+128))+rsi] + adcx r12,rcx + adox r13,rbp + + mulx rbp,rcx,QWORD PTR[((24+128))+rsi] + mov rdx,r10 + adcx r13,rcx + shlx rcx,r10,r14 + adox r8,rbp + shrx rbp,r10,r14 + + adcx r8,r9 + adox r9,r9 + adc r9,0 + + + + add r11,rcx + adc r12,rbp + + mulx rbp,rcx,r15 + mov rdx,QWORD PTR[24+rbx] + adc r13,rcx + adc r8,rbp + adc r9,0 + xor r10,r10 + + + + mulx rbp,rcx,QWORD PTR[((0+128))+rsi] + adcx r11,rcx + adox r12,rbp + + mulx rbp,rcx,QWORD PTR[((8+128))+rsi] + adcx r12,rcx + adox r13,rbp + + mulx rbp,rcx,QWORD PTR[((16+128))+rsi] + adcx r13,rcx + adox r8,rbp + + mulx rbp,rcx,QWORD PTR[((24+128))+rsi] + mov rdx,r11 + adcx r8,rcx + shlx rcx,r11,r14 + adox r9,rbp + shrx rbp,r11,r14 + + adcx r9,r10 + adox r10,r10 + adc r10,0 + + + + add r12,rcx + adc r13,rbp + + mulx rbp,rcx,r15 + mov rbx,r12 + mov r14,QWORD PTR[(($L$poly+8))] + adc r8,rcx + mov rdx,r13 + adc r9,rbp + adc r10,0 + + + + xor eax,eax + mov rcx,r8 + sbb r12,-1 + sbb r13,r14 + sbb r8,0 + mov rbp,r9 + sbb r9,r15 + sbb r10,0 + + cmovc r12,rbx + cmovc r13,rdx + mov QWORD PTR[rdi],r12 + cmovc r8,rcx + mov QWORD PTR[8+rdi],r13 + cmovc r9,rbp + mov QWORD PTR[16+rdi],r8 + mov QWORD PTR[24+rdi],r9 + + DB 0F3h,0C3h ;repret +__ecp_nistz256_mul_montx ENDP + + +ALIGN 32 +__ecp_nistz256_sqr_montx PROC PRIVATE + mulx r10,r9,r14 + mulx r11,rcx,r15 + xor eax,eax + adc r10,rcx + mulx r12,rbp,r8 + mov rdx,r14 + adc r11,rbp + adc r12,0 + xor r13,r13 + + + mulx rbp,rcx,r15 + adcx r11,rcx + adox r12,rbp + + mulx rbp,rcx,r8 + mov rdx,r15 + adcx r12,rcx + adox r13,rbp + adc r13,0 + + + mulx r14,rcx,r8 + mov rdx,QWORD PTR[((0+128))+rsi] + xor r15,r15 + adcx r9,r9 + adox r13,rcx + adcx r10,r10 + adox r14,r15 + + mulx rbp,r8,rdx + mov rdx,QWORD PTR[((8+128))+rsi] + adcx r11,r11 + adox r9,rbp + adcx r12,r12 + mulx rax,rcx,rdx + mov rdx,QWORD PTR[((16+128))+rsi] + adcx r13,r13 + adox r10,rcx + adcx r14,r14 +DB 067h + mulx rbp,rcx,rdx + mov rdx,QWORD PTR[((24+128))+rsi] + adox r11,rax + adcx r15,r15 + adox r12,rcx + mov rsi,32 + adox r13,rbp +DB 067h,067h + mulx rax,rcx,rdx + mov rdx,r8 + adox r14,rcx + shlx rcx,r8,rsi + adox r15,rax + shrx rax,r8,rsi + mov rbp,QWORD PTR[(($L$poly+24))] + + + add r9,rcx + adc r10,rax + + mulx r8,rcx,rbp + mov rdx,r9 + adc r11,rcx + shlx rcx,r9,rsi + adc r8,0 + shrx rax,r9,rsi + + + add r10,rcx + adc r11,rax + + mulx r9,rcx,rbp + mov rdx,r10 + adc r8,rcx + shlx rcx,r10,rsi + adc r9,0 + shrx rax,r10,rsi + + + add r11,rcx + adc r8,rax + + mulx r10,rcx,rbp + mov rdx,r11 + adc r9,rcx + shlx rcx,r11,rsi + adc r10,0 + shrx rax,r11,rsi + + + add r8,rcx + adc r9,rax + + mulx r11,rcx,rbp + adc r10,rcx + adc r11,0 + + xor rdx,rdx + adc r12,r8 + mov rsi,QWORD PTR[(($L$poly+8))] + adc r13,r9 + mov r8,r12 + adc r14,r10 + adc r15,r11 + mov r9,r13 + adc rdx,0 + + xor eax,eax + sbb r12,-1 + mov r10,r14 + sbb r13,rsi + sbb r14,0 + mov r11,r15 + sbb r15,rbp + sbb rdx,0 + + cmovc r12,r8 + cmovc r13,r9 + mov QWORD PTR[rdi],r12 + cmovc r14,r10 + mov QWORD PTR[8+rdi],r13 + cmovc r15,r11 + mov QWORD PTR[16+rdi],r14 + mov QWORD PTR[24+rdi],r15 + + DB 0F3h,0C3h ;repret +__ecp_nistz256_sqr_montx ENDP + + + + + + +PUBLIC ecp_nistz256_from_mont + +ALIGN 32 +ecp_nistz256_from_mont PROC PUBLIC + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_ecp_nistz256_from_mont:: + mov rdi,rcx + mov rsi,rdx + + + push r12 + push r13 + + mov rax,QWORD PTR[rsi] + mov r13,QWORD PTR[(($L$poly+24))] + mov r9,QWORD PTR[8+rsi] + mov r10,QWORD PTR[16+rsi] + mov r11,QWORD PTR[24+rsi] + mov r8,rax + mov r12,QWORD PTR[(($L$poly+8))] + + + + mov rcx,rax + shl r8,32 + mul r13 + shr rcx,32 + add r9,r8 + adc r10,rcx + adc r11,rax + mov rax,r9 + adc rdx,0 + + + + mov rcx,r9 + shl r9,32 + mov r8,rdx + mul r13 + shr rcx,32 + add r10,r9 + adc r11,rcx + adc r8,rax + mov rax,r10 + adc rdx,0 + + + + mov rcx,r10 + shl r10,32 + mov r9,rdx + mul r13 + shr rcx,32 + add r11,r10 + adc r8,rcx + adc r9,rax + mov rax,r11 + adc rdx,0 + + + + mov rcx,r11 + shl r11,32 + mov r10,rdx + mul r13 + shr rcx,32 + add r8,r11 + adc r9,rcx + mov rcx,r8 + adc r10,rax + mov rsi,r9 + adc rdx,0 + + + + sub r8,-1 + mov rax,r10 + sbb r9,r12 + sbb r10,0 + mov r11,rdx + sbb rdx,r13 + sbb r13,r13 + + cmovnz r8,rcx + cmovnz r9,rsi + mov QWORD PTR[rdi],r8 + cmovnz r10,rax + mov QWORD PTR[8+rdi],r9 + cmovz r11,rdx + mov QWORD PTR[16+rdi],r10 + mov QWORD PTR[24+rdi],r11 + + pop r13 + pop r12 + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_ecp_nistz256_from_mont:: +ecp_nistz256_from_mont ENDP + + +PUBLIC ecp_nistz256_select_w5 + +ALIGN 32 +ecp_nistz256_select_w5 PROC PUBLIC + mov eax,DWORD PTR[((OPENSSL_ia32cap_P+8))] + test eax,32 + jnz $L$avx2_select_w5 + lea rax,QWORD PTR[((-136))+rsp] +$L$SEH_begin_ecp_nistz256_select_w5:: +DB 048h,08dh,060h,0e0h +DB 00fh,029h,070h,0e0h +DB 00fh,029h,078h,0f0h +DB 044h,00fh,029h,000h +DB 044h,00fh,029h,048h,010h +DB 044h,00fh,029h,050h,020h +DB 044h,00fh,029h,058h,030h +DB 044h,00fh,029h,060h,040h +DB 044h,00fh,029h,068h,050h +DB 044h,00fh,029h,070h,060h +DB 044h,00fh,029h,078h,070h + movdqa xmm0,XMMWORD PTR[$L$One] + movd xmm1,r8d + + pxor xmm2,xmm2 + pxor xmm3,xmm3 + pxor xmm4,xmm4 + pxor xmm5,xmm5 + pxor xmm6,xmm6 + pxor xmm7,xmm7 + + movdqa xmm8,xmm0 + pshufd xmm1,xmm1,0 + + mov rax,16 +$L$select_loop_sse_w5:: + + movdqa xmm15,xmm8 + paddd xmm8,xmm0 + pcmpeqd xmm15,xmm1 + + movdqa xmm9,XMMWORD PTR[rdx] + movdqa xmm10,XMMWORD PTR[16+rdx] + movdqa xmm11,XMMWORD PTR[32+rdx] + movdqa xmm12,XMMWORD PTR[48+rdx] + movdqa xmm13,XMMWORD PTR[64+rdx] + movdqa xmm14,XMMWORD PTR[80+rdx] + lea rdx,QWORD PTR[96+rdx] + + pand xmm9,xmm15 + pand xmm10,xmm15 + por xmm2,xmm9 + pand xmm11,xmm15 + por xmm3,xmm10 + pand xmm12,xmm15 + por xmm4,xmm11 + pand xmm13,xmm15 + por xmm5,xmm12 + pand xmm14,xmm15 + por xmm6,xmm13 + por xmm7,xmm14 + + dec rax + jnz $L$select_loop_sse_w5 + + movdqu XMMWORD PTR[rcx],xmm2 + movdqu XMMWORD PTR[16+rcx],xmm3 + movdqu XMMWORD PTR[32+rcx],xmm4 + movdqu XMMWORD PTR[48+rcx],xmm5 + movdqu XMMWORD PTR[64+rcx],xmm6 + movdqu XMMWORD PTR[80+rcx],xmm7 + movaps xmm6,XMMWORD PTR[rsp] + movaps xmm7,XMMWORD PTR[16+rsp] + movaps xmm8,XMMWORD PTR[32+rsp] + movaps xmm9,XMMWORD PTR[48+rsp] + movaps xmm10,XMMWORD PTR[64+rsp] + movaps xmm11,XMMWORD PTR[80+rsp] + movaps xmm12,XMMWORD PTR[96+rsp] + movaps xmm13,XMMWORD PTR[112+rsp] + movaps xmm14,XMMWORD PTR[128+rsp] + movaps xmm15,XMMWORD PTR[144+rsp] + lea rsp,QWORD PTR[168+rsp] +$L$SEH_end_ecp_nistz256_select_w5:: + DB 0F3h,0C3h ;repret +ecp_nistz256_select_w5 ENDP + + + +PUBLIC ecp_nistz256_select_w7 + +ALIGN 32 +ecp_nistz256_select_w7 PROC PUBLIC + mov eax,DWORD PTR[((OPENSSL_ia32cap_P+8))] + test eax,32 + jnz $L$avx2_select_w7 + lea rax,QWORD PTR[((-136))+rsp] +$L$SEH_begin_ecp_nistz256_select_w7:: +DB 048h,08dh,060h,0e0h +DB 00fh,029h,070h,0e0h +DB 00fh,029h,078h,0f0h +DB 044h,00fh,029h,000h +DB 044h,00fh,029h,048h,010h +DB 044h,00fh,029h,050h,020h +DB 044h,00fh,029h,058h,030h +DB 044h,00fh,029h,060h,040h +DB 044h,00fh,029h,068h,050h +DB 044h,00fh,029h,070h,060h +DB 044h,00fh,029h,078h,070h + movdqa xmm8,XMMWORD PTR[$L$One] + movd xmm1,r8d + + pxor xmm2,xmm2 + pxor xmm3,xmm3 + pxor xmm4,xmm4 + pxor xmm5,xmm5 + + movdqa xmm0,xmm8 + pshufd xmm1,xmm1,0 + mov rax,64 + +$L$select_loop_sse_w7:: + movdqa xmm15,xmm8 + paddd xmm8,xmm0 + movdqa xmm9,XMMWORD PTR[rdx] + movdqa xmm10,XMMWORD PTR[16+rdx] + pcmpeqd xmm15,xmm1 + movdqa xmm11,XMMWORD PTR[32+rdx] + movdqa xmm12,XMMWORD PTR[48+rdx] + lea rdx,QWORD PTR[64+rdx] + + pand xmm9,xmm15 + pand xmm10,xmm15 + por xmm2,xmm9 + pand xmm11,xmm15 + por xmm3,xmm10 + pand xmm12,xmm15 + por xmm4,xmm11 + prefetcht0 [255+rdx] + por xmm5,xmm12 + + dec rax + jnz $L$select_loop_sse_w7 + + movdqu XMMWORD PTR[rcx],xmm2 + movdqu XMMWORD PTR[16+rcx],xmm3 + movdqu XMMWORD PTR[32+rcx],xmm4 + movdqu XMMWORD PTR[48+rcx],xmm5 + movaps xmm6,XMMWORD PTR[rsp] + movaps xmm7,XMMWORD PTR[16+rsp] + movaps xmm8,XMMWORD PTR[32+rsp] + movaps xmm9,XMMWORD PTR[48+rsp] + movaps xmm10,XMMWORD PTR[64+rsp] + movaps xmm11,XMMWORD PTR[80+rsp] + movaps xmm12,XMMWORD PTR[96+rsp] + movaps xmm13,XMMWORD PTR[112+rsp] + movaps xmm14,XMMWORD PTR[128+rsp] + movaps xmm15,XMMWORD PTR[144+rsp] + lea rsp,QWORD PTR[168+rsp] +$L$SEH_end_ecp_nistz256_select_w7:: + DB 0F3h,0C3h ;repret +ecp_nistz256_select_w7 ENDP + + + +ALIGN 32 +ecp_nistz256_avx2_select_w5 PROC PRIVATE +$L$avx2_select_w5:: + vzeroupper + lea rax,QWORD PTR[((-136))+rsp] +$L$SEH_begin_ecp_nistz256_avx2_select_w5:: +DB 048h,08dh,060h,0e0h +DB 0c5h,0f8h,029h,070h,0e0h +DB 0c5h,0f8h,029h,078h,0f0h +DB 0c5h,078h,029h,040h,000h +DB 0c5h,078h,029h,048h,010h +DB 0c5h,078h,029h,050h,020h +DB 0c5h,078h,029h,058h,030h +DB 0c5h,078h,029h,060h,040h +DB 0c5h,078h,029h,068h,050h +DB 0c5h,078h,029h,070h,060h +DB 0c5h,078h,029h,078h,070h + vmovdqa ymm0,YMMWORD PTR[$L$Two] + + vpxor ymm2,ymm2,ymm2 + vpxor ymm3,ymm3,ymm3 + vpxor ymm4,ymm4,ymm4 + + vmovdqa ymm5,YMMWORD PTR[$L$One] + vmovdqa ymm10,YMMWORD PTR[$L$Two] + + vmovd xmm1,r8d + vpermd ymm1,ymm2,ymm1 + + mov rax,8 +$L$select_loop_avx2_w5:: + + vmovdqa ymm6,YMMWORD PTR[rdx] + vmovdqa ymm7,YMMWORD PTR[32+rdx] + vmovdqa ymm8,YMMWORD PTR[64+rdx] + + vmovdqa ymm11,YMMWORD PTR[96+rdx] + vmovdqa ymm12,YMMWORD PTR[128+rdx] + vmovdqa ymm13,YMMWORD PTR[160+rdx] + + vpcmpeqd ymm9,ymm5,ymm1 + vpcmpeqd ymm14,ymm10,ymm1 + + vpaddd ymm5,ymm5,ymm0 + vpaddd ymm10,ymm10,ymm0 + lea rdx,QWORD PTR[192+rdx] + + vpand ymm6,ymm6,ymm9 + vpand ymm7,ymm7,ymm9 + vpand ymm8,ymm8,ymm9 + vpand ymm11,ymm11,ymm14 + vpand ymm12,ymm12,ymm14 + vpand ymm13,ymm13,ymm14 + + vpxor ymm2,ymm2,ymm6 + vpxor ymm3,ymm3,ymm7 + vpxor ymm4,ymm4,ymm8 + vpxor ymm2,ymm2,ymm11 + vpxor ymm3,ymm3,ymm12 + vpxor ymm4,ymm4,ymm13 + + dec rax + jnz $L$select_loop_avx2_w5 + + vmovdqu YMMWORD PTR[rcx],ymm2 + vmovdqu YMMWORD PTR[32+rcx],ymm3 + vmovdqu YMMWORD PTR[64+rcx],ymm4 + vzeroupper + movaps xmm6,XMMWORD PTR[rsp] + movaps xmm7,XMMWORD PTR[16+rsp] + movaps xmm8,XMMWORD PTR[32+rsp] + movaps xmm9,XMMWORD PTR[48+rsp] + movaps xmm10,XMMWORD PTR[64+rsp] + movaps xmm11,XMMWORD PTR[80+rsp] + movaps xmm12,XMMWORD PTR[96+rsp] + movaps xmm13,XMMWORD PTR[112+rsp] + movaps xmm14,XMMWORD PTR[128+rsp] + movaps xmm15,XMMWORD PTR[144+rsp] + lea rsp,QWORD PTR[168+rsp] +$L$SEH_end_ecp_nistz256_avx2_select_w5:: + DB 0F3h,0C3h ;repret +ecp_nistz256_avx2_select_w5 ENDP + + + +PUBLIC ecp_nistz256_avx2_select_w7 + +ALIGN 32 +ecp_nistz256_avx2_select_w7 PROC PUBLIC +$L$avx2_select_w7:: + vzeroupper + lea rax,QWORD PTR[((-136))+rsp] +$L$SEH_begin_ecp_nistz256_avx2_select_w7:: +DB 048h,08dh,060h,0e0h +DB 0c5h,0f8h,029h,070h,0e0h +DB 0c5h,0f8h,029h,078h,0f0h +DB 0c5h,078h,029h,040h,000h +DB 0c5h,078h,029h,048h,010h +DB 0c5h,078h,029h,050h,020h +DB 0c5h,078h,029h,058h,030h +DB 0c5h,078h,029h,060h,040h +DB 0c5h,078h,029h,068h,050h +DB 0c5h,078h,029h,070h,060h +DB 0c5h,078h,029h,078h,070h + vmovdqa ymm0,YMMWORD PTR[$L$Three] + + vpxor ymm2,ymm2,ymm2 + vpxor ymm3,ymm3,ymm3 + + vmovdqa ymm4,YMMWORD PTR[$L$One] + vmovdqa ymm8,YMMWORD PTR[$L$Two] + vmovdqa ymm12,YMMWORD PTR[$L$Three] + + vmovd xmm1,r8d + vpermd ymm1,ymm2,ymm1 + + + mov rax,21 +$L$select_loop_avx2_w7:: + + vmovdqa ymm5,YMMWORD PTR[rdx] + vmovdqa ymm6,YMMWORD PTR[32+rdx] + + vmovdqa ymm9,YMMWORD PTR[64+rdx] + vmovdqa ymm10,YMMWORD PTR[96+rdx] + + vmovdqa ymm13,YMMWORD PTR[128+rdx] + vmovdqa ymm14,YMMWORD PTR[160+rdx] + + vpcmpeqd ymm7,ymm4,ymm1 + vpcmpeqd ymm11,ymm8,ymm1 + vpcmpeqd ymm15,ymm12,ymm1 + + vpaddd ymm4,ymm4,ymm0 + vpaddd ymm8,ymm8,ymm0 + vpaddd ymm12,ymm12,ymm0 + lea rdx,QWORD PTR[192+rdx] + + vpand ymm5,ymm5,ymm7 + vpand ymm6,ymm6,ymm7 + vpand ymm9,ymm9,ymm11 + vpand ymm10,ymm10,ymm11 + vpand ymm13,ymm13,ymm15 + vpand ymm14,ymm14,ymm15 + + vpxor ymm2,ymm2,ymm5 + vpxor ymm3,ymm3,ymm6 + vpxor ymm2,ymm2,ymm9 + vpxor ymm3,ymm3,ymm10 + vpxor ymm2,ymm2,ymm13 + vpxor ymm3,ymm3,ymm14 + + dec rax + jnz $L$select_loop_avx2_w7 + + + vmovdqa ymm5,YMMWORD PTR[rdx] + vmovdqa ymm6,YMMWORD PTR[32+rdx] + + vpcmpeqd ymm7,ymm4,ymm1 + + vpand ymm5,ymm5,ymm7 + vpand ymm6,ymm6,ymm7 + + vpxor ymm2,ymm2,ymm5 + vpxor ymm3,ymm3,ymm6 + + vmovdqu YMMWORD PTR[rcx],ymm2 + vmovdqu YMMWORD PTR[32+rcx],ymm3 + vzeroupper + movaps xmm6,XMMWORD PTR[rsp] + movaps xmm7,XMMWORD PTR[16+rsp] + movaps xmm8,XMMWORD PTR[32+rsp] + movaps xmm9,XMMWORD PTR[48+rsp] + movaps xmm10,XMMWORD PTR[64+rsp] + movaps xmm11,XMMWORD PTR[80+rsp] + movaps xmm12,XMMWORD PTR[96+rsp] + movaps xmm13,XMMWORD PTR[112+rsp] + movaps xmm14,XMMWORD PTR[128+rsp] + movaps xmm15,XMMWORD PTR[144+rsp] + lea rsp,QWORD PTR[168+rsp] +$L$SEH_end_ecp_nistz256_avx2_select_w7:: + DB 0F3h,0C3h ;repret +ecp_nistz256_avx2_select_w7 ENDP + +ALIGN 32 +__ecp_nistz256_add_toq PROC PRIVATE + add r12,QWORD PTR[rbx] + adc r13,QWORD PTR[8+rbx] + mov rax,r12 + adc r8,QWORD PTR[16+rbx] + adc r9,QWORD PTR[24+rbx] + mov rbp,r13 + sbb r11,r11 + + sub r12,-1 + mov rcx,r8 + sbb r13,r14 + sbb r8,0 + mov r10,r9 + sbb r9,r15 + test r11,r11 + + cmovz r12,rax + cmovz r13,rbp + mov QWORD PTR[rdi],r12 + cmovz r8,rcx + mov QWORD PTR[8+rdi],r13 + cmovz r9,r10 + mov QWORD PTR[16+rdi],r8 + mov QWORD PTR[24+rdi],r9 + + DB 0F3h,0C3h ;repret +__ecp_nistz256_add_toq ENDP + + +ALIGN 32 +__ecp_nistz256_sub_fromq PROC PRIVATE + sub r12,QWORD PTR[rbx] + sbb r13,QWORD PTR[8+rbx] + mov rax,r12 + sbb r8,QWORD PTR[16+rbx] + sbb r9,QWORD PTR[24+rbx] + mov rbp,r13 + sbb r11,r11 + + add r12,-1 + mov rcx,r8 + adc r13,r14 + adc r8,0 + mov r10,r9 + adc r9,r15 + test r11,r11 + + cmovz r12,rax + cmovz r13,rbp + mov QWORD PTR[rdi],r12 + cmovz r8,rcx + mov QWORD PTR[8+rdi],r13 + cmovz r9,r10 + mov QWORD PTR[16+rdi],r8 + mov QWORD PTR[24+rdi],r9 + + DB 0F3h,0C3h ;repret +__ecp_nistz256_sub_fromq ENDP + + +ALIGN 32 +__ecp_nistz256_subq PROC PRIVATE + sub rax,r12 + sbb rbp,r13 + mov r12,rax + sbb rcx,r8 + sbb r10,r9 + mov r13,rbp + sbb r11,r11 + + add rax,-1 + mov r8,rcx + adc rbp,r14 + adc rcx,0 + mov r9,r10 + adc r10,r15 + test r11,r11 + + cmovnz r12,rax + cmovnz r13,rbp + cmovnz r8,rcx + cmovnz r9,r10 + + DB 0F3h,0C3h ;repret +__ecp_nistz256_subq ENDP + + +ALIGN 32 +__ecp_nistz256_mul_by_2q PROC PRIVATE + add r12,r12 + adc r13,r13 + mov rax,r12 + adc r8,r8 + adc r9,r9 + mov rbp,r13 + sbb r11,r11 + + sub r12,-1 + mov rcx,r8 + sbb r13,r14 + sbb r8,0 + mov r10,r9 + sbb r9,r15 + test r11,r11 + + cmovz r12,rax + cmovz r13,rbp + mov QWORD PTR[rdi],r12 + cmovz r8,rcx + mov QWORD PTR[8+rdi],r13 + cmovz r9,r10 + mov QWORD PTR[16+rdi],r8 + mov QWORD PTR[24+rdi],r9 + + DB 0F3h,0C3h ;repret +__ecp_nistz256_mul_by_2q ENDP +PUBLIC ecp_nistz256_point_double + +ALIGN 32 +ecp_nistz256_point_double PROC PUBLIC + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_ecp_nistz256_point_double:: + mov rdi,rcx + mov rsi,rdx + + + mov ecx,080100h + and ecx,DWORD PTR[((OPENSSL_ia32cap_P+8))] + cmp ecx,080100h + je $L$point_doublex + push rbp + push rbx + push r12 + push r13 + push r14 + push r15 + sub rsp,32*5+8 + + movdqu xmm0,XMMWORD PTR[rsi] + mov rbx,rsi + movdqu xmm1,XMMWORD PTR[16+rsi] + mov r12,QWORD PTR[((32+0))+rsi] + mov r13,QWORD PTR[((32+8))+rsi] + mov r8,QWORD PTR[((32+16))+rsi] + mov r9,QWORD PTR[((32+24))+rsi] + mov r14,QWORD PTR[(($L$poly+8))] + mov r15,QWORD PTR[(($L$poly+24))] + movdqa XMMWORD PTR[96+rsp],xmm0 + movdqa XMMWORD PTR[(96+16)+rsp],xmm1 + lea r10,QWORD PTR[32+rdi] + lea r11,QWORD PTR[64+rdi] +DB 102,72,15,110,199 +DB 102,73,15,110,202 +DB 102,73,15,110,211 + + lea rdi,QWORD PTR[rsp] + call __ecp_nistz256_mul_by_2q + + mov rax,QWORD PTR[((64+0))+rsi] + mov r14,QWORD PTR[((64+8))+rsi] + mov r15,QWORD PTR[((64+16))+rsi] + mov r8,QWORD PTR[((64+24))+rsi] + lea rsi,QWORD PTR[((64-0))+rsi] + lea rdi,QWORD PTR[64+rsp] + call __ecp_nistz256_sqr_montq + + mov rax,QWORD PTR[((0+0))+rsp] + mov r14,QWORD PTR[((8+0))+rsp] + lea rsi,QWORD PTR[((0+0))+rsp] + mov r15,QWORD PTR[((16+0))+rsp] + mov r8,QWORD PTR[((24+0))+rsp] + lea rdi,QWORD PTR[rsp] + call __ecp_nistz256_sqr_montq + + mov rax,QWORD PTR[32+rbx] + mov r9,QWORD PTR[((64+0))+rbx] + mov r10,QWORD PTR[((64+8))+rbx] + mov r11,QWORD PTR[((64+16))+rbx] + mov r12,QWORD PTR[((64+24))+rbx] + lea rsi,QWORD PTR[((64-0))+rbx] + lea rbx,QWORD PTR[32+rbx] +DB 102,72,15,126,215 + call __ecp_nistz256_mul_montq + call __ecp_nistz256_mul_by_2q + + mov r12,QWORD PTR[((96+0))+rsp] + mov r13,QWORD PTR[((96+8))+rsp] + lea rbx,QWORD PTR[64+rsp] + mov r8,QWORD PTR[((96+16))+rsp] + mov r9,QWORD PTR[((96+24))+rsp] + lea rdi,QWORD PTR[32+rsp] + call __ecp_nistz256_add_toq + + mov r12,QWORD PTR[((96+0))+rsp] + mov r13,QWORD PTR[((96+8))+rsp] + lea rbx,QWORD PTR[64+rsp] + mov r8,QWORD PTR[((96+16))+rsp] + mov r9,QWORD PTR[((96+24))+rsp] + lea rdi,QWORD PTR[64+rsp] + call __ecp_nistz256_sub_fromq + + mov rax,QWORD PTR[((0+0))+rsp] + mov r14,QWORD PTR[((8+0))+rsp] + lea rsi,QWORD PTR[((0+0))+rsp] + mov r15,QWORD PTR[((16+0))+rsp] + mov r8,QWORD PTR[((24+0))+rsp] +DB 102,72,15,126,207 + call __ecp_nistz256_sqr_montq + xor r9,r9 + mov rax,r12 + add r12,-1 + mov r10,r13 + adc r13,rsi + mov rcx,r14 + adc r14,0 + mov r8,r15 + adc r15,rbp + adc r9,0 + xor rsi,rsi + test rax,1 + + cmovz r12,rax + cmovz r13,r10 + cmovz r14,rcx + cmovz r15,r8 + cmovz r9,rsi + + mov rax,r13 + shr r12,1 + shl rax,63 + mov r10,r14 + shr r13,1 + or r12,rax + shl r10,63 + mov rcx,r15 + shr r14,1 + or r13,r10 + shl rcx,63 + mov QWORD PTR[rdi],r12 + shr r15,1 + mov QWORD PTR[8+rdi],r13 + shl r9,63 + or r14,rcx + or r15,r9 + mov QWORD PTR[16+rdi],r14 + mov QWORD PTR[24+rdi],r15 + mov rax,QWORD PTR[64+rsp] + lea rbx,QWORD PTR[64+rsp] + mov r9,QWORD PTR[((0+32))+rsp] + mov r10,QWORD PTR[((8+32))+rsp] + lea rsi,QWORD PTR[((0+32))+rsp] + mov r11,QWORD PTR[((16+32))+rsp] + mov r12,QWORD PTR[((24+32))+rsp] + lea rdi,QWORD PTR[32+rsp] + call __ecp_nistz256_mul_montq + + lea rdi,QWORD PTR[128+rsp] + call __ecp_nistz256_mul_by_2q + + lea rbx,QWORD PTR[32+rsp] + lea rdi,QWORD PTR[32+rsp] + call __ecp_nistz256_add_toq + + mov rax,QWORD PTR[96+rsp] + lea rbx,QWORD PTR[96+rsp] + mov r9,QWORD PTR[((0+0))+rsp] + mov r10,QWORD PTR[((8+0))+rsp] + lea rsi,QWORD PTR[((0+0))+rsp] + mov r11,QWORD PTR[((16+0))+rsp] + mov r12,QWORD PTR[((24+0))+rsp] + lea rdi,QWORD PTR[rsp] + call __ecp_nistz256_mul_montq + + lea rdi,QWORD PTR[128+rsp] + call __ecp_nistz256_mul_by_2q + + mov rax,QWORD PTR[((0+32))+rsp] + mov r14,QWORD PTR[((8+32))+rsp] + lea rsi,QWORD PTR[((0+32))+rsp] + mov r15,QWORD PTR[((16+32))+rsp] + mov r8,QWORD PTR[((24+32))+rsp] +DB 102,72,15,126,199 + call __ecp_nistz256_sqr_montq + + lea rbx,QWORD PTR[128+rsp] + mov r8,r14 + mov r9,r15 + mov r14,rsi + mov r15,rbp + call __ecp_nistz256_sub_fromq + + mov rax,QWORD PTR[((0+0))+rsp] + mov rbp,QWORD PTR[((0+8))+rsp] + mov rcx,QWORD PTR[((0+16))+rsp] + mov r10,QWORD PTR[((0+24))+rsp] + lea rdi,QWORD PTR[rsp] + call __ecp_nistz256_subq + + mov rax,QWORD PTR[32+rsp] + lea rbx,QWORD PTR[32+rsp] + mov r14,r12 + xor ecx,ecx + mov QWORD PTR[((0+0))+rsp],r12 + mov r10,r13 + mov QWORD PTR[((0+8))+rsp],r13 + cmovz r11,r8 + mov QWORD PTR[((0+16))+rsp],r8 + lea rsi,QWORD PTR[((0-0))+rsp] + cmovz r12,r9 + mov QWORD PTR[((0+24))+rsp],r9 + mov r9,r14 + lea rdi,QWORD PTR[rsp] + call __ecp_nistz256_mul_montq + +DB 102,72,15,126,203 +DB 102,72,15,126,207 + call __ecp_nistz256_sub_fromq + + add rsp,32*5+8 + pop r15 + pop r14 + pop r13 + pop r12 + pop rbx + pop rbp + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_ecp_nistz256_point_double:: +ecp_nistz256_point_double ENDP +PUBLIC ecp_nistz256_point_add + +ALIGN 32 +ecp_nistz256_point_add PROC PUBLIC + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_ecp_nistz256_point_add:: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + + + mov ecx,080100h + and ecx,DWORD PTR[((OPENSSL_ia32cap_P+8))] + cmp ecx,080100h + je $L$point_addx + push rbp + push rbx + push r12 + push r13 + push r14 + push r15 + sub rsp,32*18+8 + + movdqu xmm0,XMMWORD PTR[rsi] + movdqu xmm1,XMMWORD PTR[16+rsi] + movdqu xmm2,XMMWORD PTR[32+rsi] + movdqu xmm3,XMMWORD PTR[48+rsi] + movdqu xmm4,XMMWORD PTR[64+rsi] + movdqu xmm5,XMMWORD PTR[80+rsi] + mov rbx,rsi + mov rsi,rdx + movdqa XMMWORD PTR[384+rsp],xmm0 + movdqa XMMWORD PTR[(384+16)+rsp],xmm1 + por xmm1,xmm0 + movdqa XMMWORD PTR[416+rsp],xmm2 + movdqa XMMWORD PTR[(416+16)+rsp],xmm3 + por xmm3,xmm2 + movdqa XMMWORD PTR[448+rsp],xmm4 + movdqa XMMWORD PTR[(448+16)+rsp],xmm5 + por xmm3,xmm1 + + movdqu xmm0,XMMWORD PTR[rsi] + pshufd xmm5,xmm3,1h + movdqu xmm1,XMMWORD PTR[16+rsi] + movdqu xmm2,XMMWORD PTR[32+rsi] + por xmm5,xmm3 + movdqu xmm3,XMMWORD PTR[48+rsi] + mov rax,QWORD PTR[((64+0))+rsi] + mov r14,QWORD PTR[((64+8))+rsi] + mov r15,QWORD PTR[((64+16))+rsi] + mov r8,QWORD PTR[((64+24))+rsi] + movdqa XMMWORD PTR[480+rsp],xmm0 + pshufd xmm4,xmm5,01eh + movdqa XMMWORD PTR[(480+16)+rsp],xmm1 + por xmm1,xmm0 +DB 102,72,15,110,199 + movdqa XMMWORD PTR[512+rsp],xmm2 + movdqa XMMWORD PTR[(512+16)+rsp],xmm3 + por xmm3,xmm2 + por xmm5,xmm4 + pxor xmm4,xmm4 + por xmm3,xmm1 + + lea rsi,QWORD PTR[((64-0))+rsi] + mov QWORD PTR[((544+0))+rsp],rax + mov QWORD PTR[((544+8))+rsp],r14 + mov QWORD PTR[((544+16))+rsp],r15 + mov QWORD PTR[((544+24))+rsp],r8 + lea rdi,QWORD PTR[96+rsp] + call __ecp_nistz256_sqr_montq + + pcmpeqd xmm5,xmm4 + pshufd xmm4,xmm3,1h + por xmm4,xmm3 + pshufd xmm5,xmm5,0 + pshufd xmm3,xmm4,01eh + por xmm4,xmm3 + pxor xmm3,xmm3 + pcmpeqd xmm4,xmm3 + pshufd xmm4,xmm4,0 + mov rax,QWORD PTR[((64+0))+rbx] + mov r14,QWORD PTR[((64+8))+rbx] + mov r15,QWORD PTR[((64+16))+rbx] + mov r8,QWORD PTR[((64+24))+rbx] + + lea rsi,QWORD PTR[((64-0))+rbx] + lea rdi,QWORD PTR[32+rsp] + call __ecp_nistz256_sqr_montq + + mov rax,QWORD PTR[544+rsp] + lea rbx,QWORD PTR[544+rsp] + mov r9,QWORD PTR[((0+96))+rsp] + mov r10,QWORD PTR[((8+96))+rsp] + lea rsi,QWORD PTR[((0+96))+rsp] + mov r11,QWORD PTR[((16+96))+rsp] + mov r12,QWORD PTR[((24+96))+rsp] + lea rdi,QWORD PTR[224+rsp] + call __ecp_nistz256_mul_montq + + mov rax,QWORD PTR[448+rsp] + lea rbx,QWORD PTR[448+rsp] + mov r9,QWORD PTR[((0+32))+rsp] + mov r10,QWORD PTR[((8+32))+rsp] + lea rsi,QWORD PTR[((0+32))+rsp] + mov r11,QWORD PTR[((16+32))+rsp] + mov r12,QWORD PTR[((24+32))+rsp] + lea rdi,QWORD PTR[256+rsp] + call __ecp_nistz256_mul_montq + + mov rax,QWORD PTR[416+rsp] + lea rbx,QWORD PTR[416+rsp] + mov r9,QWORD PTR[((0+224))+rsp] + mov r10,QWORD PTR[((8+224))+rsp] + lea rsi,QWORD PTR[((0+224))+rsp] + mov r11,QWORD PTR[((16+224))+rsp] + mov r12,QWORD PTR[((24+224))+rsp] + lea rdi,QWORD PTR[224+rsp] + call __ecp_nistz256_mul_montq + + mov rax,QWORD PTR[512+rsp] + lea rbx,QWORD PTR[512+rsp] + mov r9,QWORD PTR[((0+256))+rsp] + mov r10,QWORD PTR[((8+256))+rsp] + lea rsi,QWORD PTR[((0+256))+rsp] + mov r11,QWORD PTR[((16+256))+rsp] + mov r12,QWORD PTR[((24+256))+rsp] + lea rdi,QWORD PTR[256+rsp] + call __ecp_nistz256_mul_montq + + lea rbx,QWORD PTR[224+rsp] + lea rdi,QWORD PTR[64+rsp] + call __ecp_nistz256_sub_fromq + + or r12,r13 + movdqa xmm2,xmm4 + or r12,r8 + or r12,r9 + por xmm2,xmm5 +DB 102,73,15,110,220 + + mov rax,QWORD PTR[384+rsp] + lea rbx,QWORD PTR[384+rsp] + mov r9,QWORD PTR[((0+96))+rsp] + mov r10,QWORD PTR[((8+96))+rsp] + lea rsi,QWORD PTR[((0+96))+rsp] + mov r11,QWORD PTR[((16+96))+rsp] + mov r12,QWORD PTR[((24+96))+rsp] + lea rdi,QWORD PTR[160+rsp] + call __ecp_nistz256_mul_montq + + mov rax,QWORD PTR[480+rsp] + lea rbx,QWORD PTR[480+rsp] + mov r9,QWORD PTR[((0+32))+rsp] + mov r10,QWORD PTR[((8+32))+rsp] + lea rsi,QWORD PTR[((0+32))+rsp] + mov r11,QWORD PTR[((16+32))+rsp] + mov r12,QWORD PTR[((24+32))+rsp] + lea rdi,QWORD PTR[192+rsp] + call __ecp_nistz256_mul_montq + + lea rbx,QWORD PTR[160+rsp] + lea rdi,QWORD PTR[rsp] + call __ecp_nistz256_sub_fromq + + or r12,r13 + or r12,r8 + or r12,r9 + +DB 03eh + jnz $L$add_proceedq +DB 102,73,15,126,208 +DB 102,73,15,126,217 + test r8,r8 + jnz $L$add_proceedq + test r9,r9 + jz $L$add_proceedq + +DB 102,72,15,126,199 + pxor xmm0,xmm0 + movdqu XMMWORD PTR[rdi],xmm0 + movdqu XMMWORD PTR[16+rdi],xmm0 + movdqu XMMWORD PTR[32+rdi],xmm0 + movdqu XMMWORD PTR[48+rdi],xmm0 + movdqu XMMWORD PTR[64+rdi],xmm0 + movdqu XMMWORD PTR[80+rdi],xmm0 + jmp $L$add_doneq + +ALIGN 32 +$L$add_proceedq:: + mov rax,QWORD PTR[((0+64))+rsp] + mov r14,QWORD PTR[((8+64))+rsp] + lea rsi,QWORD PTR[((0+64))+rsp] + mov r15,QWORD PTR[((16+64))+rsp] + mov r8,QWORD PTR[((24+64))+rsp] + lea rdi,QWORD PTR[96+rsp] + call __ecp_nistz256_sqr_montq + + mov rax,QWORD PTR[448+rsp] + lea rbx,QWORD PTR[448+rsp] + mov r9,QWORD PTR[((0+0))+rsp] + mov r10,QWORD PTR[((8+0))+rsp] + lea rsi,QWORD PTR[((0+0))+rsp] + mov r11,QWORD PTR[((16+0))+rsp] + mov r12,QWORD PTR[((24+0))+rsp] + lea rdi,QWORD PTR[352+rsp] + call __ecp_nistz256_mul_montq + + mov rax,QWORD PTR[((0+0))+rsp] + mov r14,QWORD PTR[((8+0))+rsp] + lea rsi,QWORD PTR[((0+0))+rsp] + mov r15,QWORD PTR[((16+0))+rsp] + mov r8,QWORD PTR[((24+0))+rsp] + lea rdi,QWORD PTR[32+rsp] + call __ecp_nistz256_sqr_montq + + mov rax,QWORD PTR[544+rsp] + lea rbx,QWORD PTR[544+rsp] + mov r9,QWORD PTR[((0+352))+rsp] + mov r10,QWORD PTR[((8+352))+rsp] + lea rsi,QWORD PTR[((0+352))+rsp] + mov r11,QWORD PTR[((16+352))+rsp] + mov r12,QWORD PTR[((24+352))+rsp] + lea rdi,QWORD PTR[352+rsp] + call __ecp_nistz256_mul_montq + + mov rax,QWORD PTR[rsp] + lea rbx,QWORD PTR[rsp] + mov r9,QWORD PTR[((0+32))+rsp] + mov r10,QWORD PTR[((8+32))+rsp] + lea rsi,QWORD PTR[((0+32))+rsp] + mov r11,QWORD PTR[((16+32))+rsp] + mov r12,QWORD PTR[((24+32))+rsp] + lea rdi,QWORD PTR[128+rsp] + call __ecp_nistz256_mul_montq + + mov rax,QWORD PTR[160+rsp] + lea rbx,QWORD PTR[160+rsp] + mov r9,QWORD PTR[((0+32))+rsp] + mov r10,QWORD PTR[((8+32))+rsp] + lea rsi,QWORD PTR[((0+32))+rsp] + mov r11,QWORD PTR[((16+32))+rsp] + mov r12,QWORD PTR[((24+32))+rsp] + lea rdi,QWORD PTR[192+rsp] + call __ecp_nistz256_mul_montq + + + + + add r12,r12 + lea rsi,QWORD PTR[96+rsp] + adc r13,r13 + mov rax,r12 + adc r8,r8 + adc r9,r9 + mov rbp,r13 + sbb r11,r11 + + sub r12,-1 + mov rcx,r8 + sbb r13,r14 + sbb r8,0 + mov r10,r9 + sbb r9,r15 + test r11,r11 + + cmovz r12,rax + mov rax,QWORD PTR[rsi] + cmovz r13,rbp + mov rbp,QWORD PTR[8+rsi] + cmovz r8,rcx + mov rcx,QWORD PTR[16+rsi] + cmovz r9,r10 + mov r10,QWORD PTR[24+rsi] + + call __ecp_nistz256_subq + + lea rbx,QWORD PTR[128+rsp] + lea rdi,QWORD PTR[288+rsp] + call __ecp_nistz256_sub_fromq + + mov rax,QWORD PTR[((192+0))+rsp] + mov rbp,QWORD PTR[((192+8))+rsp] + mov rcx,QWORD PTR[((192+16))+rsp] + mov r10,QWORD PTR[((192+24))+rsp] + lea rdi,QWORD PTR[320+rsp] + + call __ecp_nistz256_subq + + mov QWORD PTR[rdi],r12 + mov QWORD PTR[8+rdi],r13 + mov QWORD PTR[16+rdi],r8 + mov QWORD PTR[24+rdi],r9 + mov rax,QWORD PTR[128+rsp] + lea rbx,QWORD PTR[128+rsp] + mov r9,QWORD PTR[((0+224))+rsp] + mov r10,QWORD PTR[((8+224))+rsp] + lea rsi,QWORD PTR[((0+224))+rsp] + mov r11,QWORD PTR[((16+224))+rsp] + mov r12,QWORD PTR[((24+224))+rsp] + lea rdi,QWORD PTR[256+rsp] + call __ecp_nistz256_mul_montq + + mov rax,QWORD PTR[320+rsp] + lea rbx,QWORD PTR[320+rsp] + mov r9,QWORD PTR[((0+64))+rsp] + mov r10,QWORD PTR[((8+64))+rsp] + lea rsi,QWORD PTR[((0+64))+rsp] + mov r11,QWORD PTR[((16+64))+rsp] + mov r12,QWORD PTR[((24+64))+rsp] + lea rdi,QWORD PTR[320+rsp] + call __ecp_nistz256_mul_montq + + lea rbx,QWORD PTR[256+rsp] + lea rdi,QWORD PTR[320+rsp] + call __ecp_nistz256_sub_fromq + +DB 102,72,15,126,199 + + movdqa xmm0,xmm5 + movdqa xmm1,xmm5 + pandn xmm0,XMMWORD PTR[352+rsp] + movdqa xmm2,xmm5 + pandn xmm1,XMMWORD PTR[((352+16))+rsp] + movdqa xmm3,xmm5 + pand xmm2,XMMWORD PTR[544+rsp] + pand xmm3,XMMWORD PTR[((544+16))+rsp] + por xmm2,xmm0 + por xmm3,xmm1 + + movdqa xmm0,xmm4 + movdqa xmm1,xmm4 + pandn xmm0,xmm2 + movdqa xmm2,xmm4 + pandn xmm1,xmm3 + movdqa xmm3,xmm4 + pand xmm2,XMMWORD PTR[448+rsp] + pand xmm3,XMMWORD PTR[((448+16))+rsp] + por xmm2,xmm0 + por xmm3,xmm1 + movdqu XMMWORD PTR[64+rdi],xmm2 + movdqu XMMWORD PTR[80+rdi],xmm3 + + movdqa xmm0,xmm5 + movdqa xmm1,xmm5 + pandn xmm0,XMMWORD PTR[288+rsp] + movdqa xmm2,xmm5 + pandn xmm1,XMMWORD PTR[((288+16))+rsp] + movdqa xmm3,xmm5 + pand xmm2,XMMWORD PTR[480+rsp] + pand xmm3,XMMWORD PTR[((480+16))+rsp] + por xmm2,xmm0 + por xmm3,xmm1 + + movdqa xmm0,xmm4 + movdqa xmm1,xmm4 + pandn xmm0,xmm2 + movdqa xmm2,xmm4 + pandn xmm1,xmm3 + movdqa xmm3,xmm4 + pand xmm2,XMMWORD PTR[384+rsp] + pand xmm3,XMMWORD PTR[((384+16))+rsp] + por xmm2,xmm0 + por xmm3,xmm1 + movdqu XMMWORD PTR[rdi],xmm2 + movdqu XMMWORD PTR[16+rdi],xmm3 + + movdqa xmm0,xmm5 + movdqa xmm1,xmm5 + pandn xmm0,XMMWORD PTR[320+rsp] + movdqa xmm2,xmm5 + pandn xmm1,XMMWORD PTR[((320+16))+rsp] + movdqa xmm3,xmm5 + pand xmm2,XMMWORD PTR[512+rsp] + pand xmm3,XMMWORD PTR[((512+16))+rsp] + por xmm2,xmm0 + por xmm3,xmm1 + + movdqa xmm0,xmm4 + movdqa xmm1,xmm4 + pandn xmm0,xmm2 + movdqa xmm2,xmm4 + pandn xmm1,xmm3 + movdqa xmm3,xmm4 + pand xmm2,XMMWORD PTR[416+rsp] + pand xmm3,XMMWORD PTR[((416+16))+rsp] + por xmm2,xmm0 + por xmm3,xmm1 + movdqu XMMWORD PTR[32+rdi],xmm2 + movdqu XMMWORD PTR[48+rdi],xmm3 + +$L$add_doneq:: + add rsp,32*18+8 + pop r15 + pop r14 + pop r13 + pop r12 + pop rbx + pop rbp + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_ecp_nistz256_point_add:: +ecp_nistz256_point_add ENDP +PUBLIC ecp_nistz256_point_add_affine + +ALIGN 32 +ecp_nistz256_point_add_affine PROC PUBLIC + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_ecp_nistz256_point_add_affine:: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + + + mov ecx,080100h + and ecx,DWORD PTR[((OPENSSL_ia32cap_P+8))] + cmp ecx,080100h + je $L$point_add_affinex + push rbp + push rbx + push r12 + push r13 + push r14 + push r15 + sub rsp,32*15+8 + + movdqu xmm0,XMMWORD PTR[rsi] + mov rbx,rdx + movdqu xmm1,XMMWORD PTR[16+rsi] + movdqu xmm2,XMMWORD PTR[32+rsi] + movdqu xmm3,XMMWORD PTR[48+rsi] + movdqu xmm4,XMMWORD PTR[64+rsi] + movdqu xmm5,XMMWORD PTR[80+rsi] + mov rax,QWORD PTR[((64+0))+rsi] + mov r14,QWORD PTR[((64+8))+rsi] + mov r15,QWORD PTR[((64+16))+rsi] + mov r8,QWORD PTR[((64+24))+rsi] + movdqa XMMWORD PTR[320+rsp],xmm0 + movdqa XMMWORD PTR[(320+16)+rsp],xmm1 + por xmm1,xmm0 + movdqa XMMWORD PTR[352+rsp],xmm2 + movdqa XMMWORD PTR[(352+16)+rsp],xmm3 + por xmm3,xmm2 + movdqa XMMWORD PTR[384+rsp],xmm4 + movdqa XMMWORD PTR[(384+16)+rsp],xmm5 + por xmm3,xmm1 + + movdqu xmm0,XMMWORD PTR[rbx] + pshufd xmm5,xmm3,1h + movdqu xmm1,XMMWORD PTR[16+rbx] + movdqu xmm2,XMMWORD PTR[32+rbx] + por xmm5,xmm3 + movdqu xmm3,XMMWORD PTR[48+rbx] + movdqa XMMWORD PTR[416+rsp],xmm0 + pshufd xmm4,xmm5,01eh + movdqa XMMWORD PTR[(416+16)+rsp],xmm1 + por xmm1,xmm0 +DB 102,72,15,110,199 + movdqa XMMWORD PTR[448+rsp],xmm2 + movdqa XMMWORD PTR[(448+16)+rsp],xmm3 + por xmm3,xmm2 + por xmm5,xmm4 + pxor xmm4,xmm4 + por xmm3,xmm1 + + lea rsi,QWORD PTR[((64-0))+rsi] + lea rdi,QWORD PTR[32+rsp] + call __ecp_nistz256_sqr_montq + + pcmpeqd xmm5,xmm4 + pshufd xmm4,xmm3,1h + mov rax,QWORD PTR[rbx] + + mov r9,r12 + por xmm4,xmm3 + pshufd xmm5,xmm5,0 + pshufd xmm3,xmm4,01eh + mov r10,r13 + por xmm4,xmm3 + pxor xmm3,xmm3 + mov r11,r14 + pcmpeqd xmm4,xmm3 + pshufd xmm4,xmm4,0 + + lea rsi,QWORD PTR[((32-0))+rsp] + mov r12,r15 + lea rdi,QWORD PTR[rsp] + call __ecp_nistz256_mul_montq + + lea rbx,QWORD PTR[320+rsp] + lea rdi,QWORD PTR[64+rsp] + call __ecp_nistz256_sub_fromq + + mov rax,QWORD PTR[384+rsp] + lea rbx,QWORD PTR[384+rsp] + mov r9,QWORD PTR[((0+32))+rsp] + mov r10,QWORD PTR[((8+32))+rsp] + lea rsi,QWORD PTR[((0+32))+rsp] + mov r11,QWORD PTR[((16+32))+rsp] + mov r12,QWORD PTR[((24+32))+rsp] + lea rdi,QWORD PTR[32+rsp] + call __ecp_nistz256_mul_montq + + mov rax,QWORD PTR[384+rsp] + lea rbx,QWORD PTR[384+rsp] + mov r9,QWORD PTR[((0+64))+rsp] + mov r10,QWORD PTR[((8+64))+rsp] + lea rsi,QWORD PTR[((0+64))+rsp] + mov r11,QWORD PTR[((16+64))+rsp] + mov r12,QWORD PTR[((24+64))+rsp] + lea rdi,QWORD PTR[288+rsp] + call __ecp_nistz256_mul_montq + + mov rax,QWORD PTR[448+rsp] + lea rbx,QWORD PTR[448+rsp] + mov r9,QWORD PTR[((0+32))+rsp] + mov r10,QWORD PTR[((8+32))+rsp] + lea rsi,QWORD PTR[((0+32))+rsp] + mov r11,QWORD PTR[((16+32))+rsp] + mov r12,QWORD PTR[((24+32))+rsp] + lea rdi,QWORD PTR[32+rsp] + call __ecp_nistz256_mul_montq + + lea rbx,QWORD PTR[352+rsp] + lea rdi,QWORD PTR[96+rsp] + call __ecp_nistz256_sub_fromq + + mov rax,QWORD PTR[((0+64))+rsp] + mov r14,QWORD PTR[((8+64))+rsp] + lea rsi,QWORD PTR[((0+64))+rsp] + mov r15,QWORD PTR[((16+64))+rsp] + mov r8,QWORD PTR[((24+64))+rsp] + lea rdi,QWORD PTR[128+rsp] + call __ecp_nistz256_sqr_montq + + mov rax,QWORD PTR[((0+96))+rsp] + mov r14,QWORD PTR[((8+96))+rsp] + lea rsi,QWORD PTR[((0+96))+rsp] + mov r15,QWORD PTR[((16+96))+rsp] + mov r8,QWORD PTR[((24+96))+rsp] + lea rdi,QWORD PTR[192+rsp] + call __ecp_nistz256_sqr_montq + + mov rax,QWORD PTR[128+rsp] + lea rbx,QWORD PTR[128+rsp] + mov r9,QWORD PTR[((0+64))+rsp] + mov r10,QWORD PTR[((8+64))+rsp] + lea rsi,QWORD PTR[((0+64))+rsp] + mov r11,QWORD PTR[((16+64))+rsp] + mov r12,QWORD PTR[((24+64))+rsp] + lea rdi,QWORD PTR[160+rsp] + call __ecp_nistz256_mul_montq + + mov rax,QWORD PTR[320+rsp] + lea rbx,QWORD PTR[320+rsp] + mov r9,QWORD PTR[((0+128))+rsp] + mov r10,QWORD PTR[((8+128))+rsp] + lea rsi,QWORD PTR[((0+128))+rsp] + mov r11,QWORD PTR[((16+128))+rsp] + mov r12,QWORD PTR[((24+128))+rsp] + lea rdi,QWORD PTR[rsp] + call __ecp_nistz256_mul_montq + + + + + add r12,r12 + lea rsi,QWORD PTR[192+rsp] + adc r13,r13 + mov rax,r12 + adc r8,r8 + adc r9,r9 + mov rbp,r13 + sbb r11,r11 + + sub r12,-1 + mov rcx,r8 + sbb r13,r14 + sbb r8,0 + mov r10,r9 + sbb r9,r15 + test r11,r11 + + cmovz r12,rax + mov rax,QWORD PTR[rsi] + cmovz r13,rbp + mov rbp,QWORD PTR[8+rsi] + cmovz r8,rcx + mov rcx,QWORD PTR[16+rsi] + cmovz r9,r10 + mov r10,QWORD PTR[24+rsi] + + call __ecp_nistz256_subq + + lea rbx,QWORD PTR[160+rsp] + lea rdi,QWORD PTR[224+rsp] + call __ecp_nistz256_sub_fromq + + mov rax,QWORD PTR[((0+0))+rsp] + mov rbp,QWORD PTR[((0+8))+rsp] + mov rcx,QWORD PTR[((0+16))+rsp] + mov r10,QWORD PTR[((0+24))+rsp] + lea rdi,QWORD PTR[64+rsp] + + call __ecp_nistz256_subq + + mov QWORD PTR[rdi],r12 + mov QWORD PTR[8+rdi],r13 + mov QWORD PTR[16+rdi],r8 + mov QWORD PTR[24+rdi],r9 + mov rax,QWORD PTR[352+rsp] + lea rbx,QWORD PTR[352+rsp] + mov r9,QWORD PTR[((0+160))+rsp] + mov r10,QWORD PTR[((8+160))+rsp] + lea rsi,QWORD PTR[((0+160))+rsp] + mov r11,QWORD PTR[((16+160))+rsp] + mov r12,QWORD PTR[((24+160))+rsp] + lea rdi,QWORD PTR[32+rsp] + call __ecp_nistz256_mul_montq + + mov rax,QWORD PTR[96+rsp] + lea rbx,QWORD PTR[96+rsp] + mov r9,QWORD PTR[((0+64))+rsp] + mov r10,QWORD PTR[((8+64))+rsp] + lea rsi,QWORD PTR[((0+64))+rsp] + mov r11,QWORD PTR[((16+64))+rsp] + mov r12,QWORD PTR[((24+64))+rsp] + lea rdi,QWORD PTR[64+rsp] + call __ecp_nistz256_mul_montq + + lea rbx,QWORD PTR[32+rsp] + lea rdi,QWORD PTR[256+rsp] + call __ecp_nistz256_sub_fromq + +DB 102,72,15,126,199 + + movdqa xmm0,xmm5 + movdqa xmm1,xmm5 + pandn xmm0,XMMWORD PTR[288+rsp] + movdqa xmm2,xmm5 + pandn xmm1,XMMWORD PTR[((288+16))+rsp] + movdqa xmm3,xmm5 + pand xmm2,XMMWORD PTR[$L$ONE_mont] + pand xmm3,XMMWORD PTR[(($L$ONE_mont+16))] + por xmm2,xmm0 + por xmm3,xmm1 + + movdqa xmm0,xmm4 + movdqa xmm1,xmm4 + pandn xmm0,xmm2 + movdqa xmm2,xmm4 + pandn xmm1,xmm3 + movdqa xmm3,xmm4 + pand xmm2,XMMWORD PTR[384+rsp] + pand xmm3,XMMWORD PTR[((384+16))+rsp] + por xmm2,xmm0 + por xmm3,xmm1 + movdqu XMMWORD PTR[64+rdi],xmm2 + movdqu XMMWORD PTR[80+rdi],xmm3 + + movdqa xmm0,xmm5 + movdqa xmm1,xmm5 + pandn xmm0,XMMWORD PTR[224+rsp] + movdqa xmm2,xmm5 + pandn xmm1,XMMWORD PTR[((224+16))+rsp] + movdqa xmm3,xmm5 + pand xmm2,XMMWORD PTR[416+rsp] + pand xmm3,XMMWORD PTR[((416+16))+rsp] + por xmm2,xmm0 + por xmm3,xmm1 + + movdqa xmm0,xmm4 + movdqa xmm1,xmm4 + pandn xmm0,xmm2 + movdqa xmm2,xmm4 + pandn xmm1,xmm3 + movdqa xmm3,xmm4 + pand xmm2,XMMWORD PTR[320+rsp] + pand xmm3,XMMWORD PTR[((320+16))+rsp] + por xmm2,xmm0 + por xmm3,xmm1 + movdqu XMMWORD PTR[rdi],xmm2 + movdqu XMMWORD PTR[16+rdi],xmm3 + + movdqa xmm0,xmm5 + movdqa xmm1,xmm5 + pandn xmm0,XMMWORD PTR[256+rsp] + movdqa xmm2,xmm5 + pandn xmm1,XMMWORD PTR[((256+16))+rsp] + movdqa xmm3,xmm5 + pand xmm2,XMMWORD PTR[448+rsp] + pand xmm3,XMMWORD PTR[((448+16))+rsp] + por xmm2,xmm0 + por xmm3,xmm1 + + movdqa xmm0,xmm4 + movdqa xmm1,xmm4 + pandn xmm0,xmm2 + movdqa xmm2,xmm4 + pandn xmm1,xmm3 + movdqa xmm3,xmm4 + pand xmm2,XMMWORD PTR[352+rsp] + pand xmm3,XMMWORD PTR[((352+16))+rsp] + por xmm2,xmm0 + por xmm3,xmm1 + movdqu XMMWORD PTR[32+rdi],xmm2 + movdqu XMMWORD PTR[48+rdi],xmm3 + + add rsp,32*15+8 + pop r15 + pop r14 + pop r13 + pop r12 + pop rbx + pop rbp + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_ecp_nistz256_point_add_affine:: +ecp_nistz256_point_add_affine ENDP + +ALIGN 32 +__ecp_nistz256_add_tox PROC PRIVATE + xor r11,r11 + adc r12,QWORD PTR[rbx] + adc r13,QWORD PTR[8+rbx] + mov rax,r12 + adc r8,QWORD PTR[16+rbx] + adc r9,QWORD PTR[24+rbx] + mov rbp,r13 + adc r11,0 + + xor r10,r10 + sbb r12,-1 + mov rcx,r8 + sbb r13,r14 + sbb r8,0 + mov r10,r9 + sbb r9,r15 + + bt r11,0 + cmovnc r12,rax + cmovnc r13,rbp + mov QWORD PTR[rdi],r12 + cmovnc r8,rcx + mov QWORD PTR[8+rdi],r13 + cmovnc r9,r10 + mov QWORD PTR[16+rdi],r8 + mov QWORD PTR[24+rdi],r9 + + DB 0F3h,0C3h ;repret +__ecp_nistz256_add_tox ENDP + + +ALIGN 32 +__ecp_nistz256_sub_fromx PROC PRIVATE + xor r11,r11 + sbb r12,QWORD PTR[rbx] + sbb r13,QWORD PTR[8+rbx] + mov rax,r12 + sbb r8,QWORD PTR[16+rbx] + sbb r9,QWORD PTR[24+rbx] + mov rbp,r13 + sbb r11,0 + + xor r10,r10 + adc r12,-1 + mov rcx,r8 + adc r13,r14 + adc r8,0 + mov r10,r9 + adc r9,r15 + + bt r11,0 + cmovnc r12,rax + cmovnc r13,rbp + mov QWORD PTR[rdi],r12 + cmovnc r8,rcx + mov QWORD PTR[8+rdi],r13 + cmovnc r9,r10 + mov QWORD PTR[16+rdi],r8 + mov QWORD PTR[24+rdi],r9 + + DB 0F3h,0C3h ;repret +__ecp_nistz256_sub_fromx ENDP + + +ALIGN 32 +__ecp_nistz256_subx PROC PRIVATE + xor r11,r11 + sbb rax,r12 + sbb rbp,r13 + mov r12,rax + sbb rcx,r8 + sbb r10,r9 + mov r13,rbp + sbb r11,0 + + xor r9,r9 + adc rax,-1 + mov r8,rcx + adc rbp,r14 + adc rcx,0 + mov r9,r10 + adc r10,r15 + + bt r11,0 + cmovc r12,rax + cmovc r13,rbp + cmovc r8,rcx + cmovc r9,r10 + + DB 0F3h,0C3h ;repret +__ecp_nistz256_subx ENDP + + +ALIGN 32 +__ecp_nistz256_mul_by_2x PROC PRIVATE + xor r11,r11 + adc r12,r12 + adc r13,r13 + mov rax,r12 + adc r8,r8 + adc r9,r9 + mov rbp,r13 + adc r11,0 + + xor r10,r10 + sbb r12,-1 + mov rcx,r8 + sbb r13,r14 + sbb r8,0 + mov r10,r9 + sbb r9,r15 + + bt r11,0 + cmovnc r12,rax + cmovnc r13,rbp + mov QWORD PTR[rdi],r12 + cmovnc r8,rcx + mov QWORD PTR[8+rdi],r13 + cmovnc r9,r10 + mov QWORD PTR[16+rdi],r8 + mov QWORD PTR[24+rdi],r9 + + DB 0F3h,0C3h ;repret +__ecp_nistz256_mul_by_2x ENDP + +ALIGN 32 +ecp_nistz256_point_doublex PROC PRIVATE + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_ecp_nistz256_point_doublex:: + mov rdi,rcx + mov rsi,rdx + + +$L$point_doublex:: + push rbp + push rbx + push r12 + push r13 + push r14 + push r15 + sub rsp,32*5+8 + + movdqu xmm0,XMMWORD PTR[rsi] + mov rbx,rsi + movdqu xmm1,XMMWORD PTR[16+rsi] + mov r12,QWORD PTR[((32+0))+rsi] + mov r13,QWORD PTR[((32+8))+rsi] + mov r8,QWORD PTR[((32+16))+rsi] + mov r9,QWORD PTR[((32+24))+rsi] + mov r14,QWORD PTR[(($L$poly+8))] + mov r15,QWORD PTR[(($L$poly+24))] + movdqa XMMWORD PTR[96+rsp],xmm0 + movdqa XMMWORD PTR[(96+16)+rsp],xmm1 + lea r10,QWORD PTR[32+rdi] + lea r11,QWORD PTR[64+rdi] +DB 102,72,15,110,199 +DB 102,73,15,110,202 +DB 102,73,15,110,211 + + lea rdi,QWORD PTR[rsp] + call __ecp_nistz256_mul_by_2x + + mov rdx,QWORD PTR[((64+0))+rsi] + mov r14,QWORD PTR[((64+8))+rsi] + mov r15,QWORD PTR[((64+16))+rsi] + mov r8,QWORD PTR[((64+24))+rsi] + lea rsi,QWORD PTR[((64-128))+rsi] + lea rdi,QWORD PTR[64+rsp] + call __ecp_nistz256_sqr_montx + + mov rdx,QWORD PTR[((0+0))+rsp] + mov r14,QWORD PTR[((8+0))+rsp] + lea rsi,QWORD PTR[((-128+0))+rsp] + mov r15,QWORD PTR[((16+0))+rsp] + mov r8,QWORD PTR[((24+0))+rsp] + lea rdi,QWORD PTR[rsp] + call __ecp_nistz256_sqr_montx + + mov rdx,QWORD PTR[32+rbx] + mov r9,QWORD PTR[((64+0))+rbx] + mov r10,QWORD PTR[((64+8))+rbx] + mov r11,QWORD PTR[((64+16))+rbx] + mov r12,QWORD PTR[((64+24))+rbx] + lea rsi,QWORD PTR[((64-128))+rbx] + lea rbx,QWORD PTR[32+rbx] +DB 102,72,15,126,215 + call __ecp_nistz256_mul_montx + call __ecp_nistz256_mul_by_2x + + mov r12,QWORD PTR[((96+0))+rsp] + mov r13,QWORD PTR[((96+8))+rsp] + lea rbx,QWORD PTR[64+rsp] + mov r8,QWORD PTR[((96+16))+rsp] + mov r9,QWORD PTR[((96+24))+rsp] + lea rdi,QWORD PTR[32+rsp] + call __ecp_nistz256_add_tox + + mov r12,QWORD PTR[((96+0))+rsp] + mov r13,QWORD PTR[((96+8))+rsp] + lea rbx,QWORD PTR[64+rsp] + mov r8,QWORD PTR[((96+16))+rsp] + mov r9,QWORD PTR[((96+24))+rsp] + lea rdi,QWORD PTR[64+rsp] + call __ecp_nistz256_sub_fromx + + mov rdx,QWORD PTR[((0+0))+rsp] + mov r14,QWORD PTR[((8+0))+rsp] + lea rsi,QWORD PTR[((-128+0))+rsp] + mov r15,QWORD PTR[((16+0))+rsp] + mov r8,QWORD PTR[((24+0))+rsp] +DB 102,72,15,126,207 + call __ecp_nistz256_sqr_montx + xor r9,r9 + mov rax,r12 + add r12,-1 + mov r10,r13 + adc r13,rsi + mov rcx,r14 + adc r14,0 + mov r8,r15 + adc r15,rbp + adc r9,0 + xor rsi,rsi + test rax,1 + + cmovz r12,rax + cmovz r13,r10 + cmovz r14,rcx + cmovz r15,r8 + cmovz r9,rsi + + mov rax,r13 + shr r12,1 + shl rax,63 + mov r10,r14 + shr r13,1 + or r12,rax + shl r10,63 + mov rcx,r15 + shr r14,1 + or r13,r10 + shl rcx,63 + mov QWORD PTR[rdi],r12 + shr r15,1 + mov QWORD PTR[8+rdi],r13 + shl r9,63 + or r14,rcx + or r15,r9 + mov QWORD PTR[16+rdi],r14 + mov QWORD PTR[24+rdi],r15 + mov rdx,QWORD PTR[64+rsp] + lea rbx,QWORD PTR[64+rsp] + mov r9,QWORD PTR[((0+32))+rsp] + mov r10,QWORD PTR[((8+32))+rsp] + lea rsi,QWORD PTR[((-128+32))+rsp] + mov r11,QWORD PTR[((16+32))+rsp] + mov r12,QWORD PTR[((24+32))+rsp] + lea rdi,QWORD PTR[32+rsp] + call __ecp_nistz256_mul_montx + + lea rdi,QWORD PTR[128+rsp] + call __ecp_nistz256_mul_by_2x + + lea rbx,QWORD PTR[32+rsp] + lea rdi,QWORD PTR[32+rsp] + call __ecp_nistz256_add_tox + + mov rdx,QWORD PTR[96+rsp] + lea rbx,QWORD PTR[96+rsp] + mov r9,QWORD PTR[((0+0))+rsp] + mov r10,QWORD PTR[((8+0))+rsp] + lea rsi,QWORD PTR[((-128+0))+rsp] + mov r11,QWORD PTR[((16+0))+rsp] + mov r12,QWORD PTR[((24+0))+rsp] + lea rdi,QWORD PTR[rsp] + call __ecp_nistz256_mul_montx + + lea rdi,QWORD PTR[128+rsp] + call __ecp_nistz256_mul_by_2x + + mov rdx,QWORD PTR[((0+32))+rsp] + mov r14,QWORD PTR[((8+32))+rsp] + lea rsi,QWORD PTR[((-128+32))+rsp] + mov r15,QWORD PTR[((16+32))+rsp] + mov r8,QWORD PTR[((24+32))+rsp] +DB 102,72,15,126,199 + call __ecp_nistz256_sqr_montx + + lea rbx,QWORD PTR[128+rsp] + mov r8,r14 + mov r9,r15 + mov r14,rsi + mov r15,rbp + call __ecp_nistz256_sub_fromx + + mov rax,QWORD PTR[((0+0))+rsp] + mov rbp,QWORD PTR[((0+8))+rsp] + mov rcx,QWORD PTR[((0+16))+rsp] + mov r10,QWORD PTR[((0+24))+rsp] + lea rdi,QWORD PTR[rsp] + call __ecp_nistz256_subx + + mov rdx,QWORD PTR[32+rsp] + lea rbx,QWORD PTR[32+rsp] + mov r14,r12 + xor ecx,ecx + mov QWORD PTR[((0+0))+rsp],r12 + mov r10,r13 + mov QWORD PTR[((0+8))+rsp],r13 + cmovz r11,r8 + mov QWORD PTR[((0+16))+rsp],r8 + lea rsi,QWORD PTR[((0-128))+rsp] + cmovz r12,r9 + mov QWORD PTR[((0+24))+rsp],r9 + mov r9,r14 + lea rdi,QWORD PTR[rsp] + call __ecp_nistz256_mul_montx + +DB 102,72,15,126,203 +DB 102,72,15,126,207 + call __ecp_nistz256_sub_fromx + + add rsp,32*5+8 + pop r15 + pop r14 + pop r13 + pop r12 + pop rbx + pop rbp + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_ecp_nistz256_point_doublex:: +ecp_nistz256_point_doublex ENDP + +ALIGN 32 +ecp_nistz256_point_addx PROC PRIVATE + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_ecp_nistz256_point_addx:: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + + +$L$point_addx:: + push rbp + push rbx + push r12 + push r13 + push r14 + push r15 + sub rsp,32*18+8 + + movdqu xmm0,XMMWORD PTR[rsi] + movdqu xmm1,XMMWORD PTR[16+rsi] + movdqu xmm2,XMMWORD PTR[32+rsi] + movdqu xmm3,XMMWORD PTR[48+rsi] + movdqu xmm4,XMMWORD PTR[64+rsi] + movdqu xmm5,XMMWORD PTR[80+rsi] + mov rbx,rsi + mov rsi,rdx + movdqa XMMWORD PTR[384+rsp],xmm0 + movdqa XMMWORD PTR[(384+16)+rsp],xmm1 + por xmm1,xmm0 + movdqa XMMWORD PTR[416+rsp],xmm2 + movdqa XMMWORD PTR[(416+16)+rsp],xmm3 + por xmm3,xmm2 + movdqa XMMWORD PTR[448+rsp],xmm4 + movdqa XMMWORD PTR[(448+16)+rsp],xmm5 + por xmm3,xmm1 + + movdqu xmm0,XMMWORD PTR[rsi] + pshufd xmm5,xmm3,1h + movdqu xmm1,XMMWORD PTR[16+rsi] + movdqu xmm2,XMMWORD PTR[32+rsi] + por xmm5,xmm3 + movdqu xmm3,XMMWORD PTR[48+rsi] + mov rdx,QWORD PTR[((64+0))+rsi] + mov r14,QWORD PTR[((64+8))+rsi] + mov r15,QWORD PTR[((64+16))+rsi] + mov r8,QWORD PTR[((64+24))+rsi] + movdqa XMMWORD PTR[480+rsp],xmm0 + pshufd xmm4,xmm5,01eh + movdqa XMMWORD PTR[(480+16)+rsp],xmm1 + por xmm1,xmm0 +DB 102,72,15,110,199 + movdqa XMMWORD PTR[512+rsp],xmm2 + movdqa XMMWORD PTR[(512+16)+rsp],xmm3 + por xmm3,xmm2 + por xmm5,xmm4 + pxor xmm4,xmm4 + por xmm3,xmm1 + + lea rsi,QWORD PTR[((64-128))+rsi] + mov QWORD PTR[((544+0))+rsp],rdx + mov QWORD PTR[((544+8))+rsp],r14 + mov QWORD PTR[((544+16))+rsp],r15 + mov QWORD PTR[((544+24))+rsp],r8 + lea rdi,QWORD PTR[96+rsp] + call __ecp_nistz256_sqr_montx + + pcmpeqd xmm5,xmm4 + pshufd xmm4,xmm3,1h + por xmm4,xmm3 + pshufd xmm5,xmm5,0 + pshufd xmm3,xmm4,01eh + por xmm4,xmm3 + pxor xmm3,xmm3 + pcmpeqd xmm4,xmm3 + pshufd xmm4,xmm4,0 + mov rdx,QWORD PTR[((64+0))+rbx] + mov r14,QWORD PTR[((64+8))+rbx] + mov r15,QWORD PTR[((64+16))+rbx] + mov r8,QWORD PTR[((64+24))+rbx] + + lea rsi,QWORD PTR[((64-128))+rbx] + lea rdi,QWORD PTR[32+rsp] + call __ecp_nistz256_sqr_montx + + mov rdx,QWORD PTR[544+rsp] + lea rbx,QWORD PTR[544+rsp] + mov r9,QWORD PTR[((0+96))+rsp] + mov r10,QWORD PTR[((8+96))+rsp] + lea rsi,QWORD PTR[((-128+96))+rsp] + mov r11,QWORD PTR[((16+96))+rsp] + mov r12,QWORD PTR[((24+96))+rsp] + lea rdi,QWORD PTR[224+rsp] + call __ecp_nistz256_mul_montx + + mov rdx,QWORD PTR[448+rsp] + lea rbx,QWORD PTR[448+rsp] + mov r9,QWORD PTR[((0+32))+rsp] + mov r10,QWORD PTR[((8+32))+rsp] + lea rsi,QWORD PTR[((-128+32))+rsp] + mov r11,QWORD PTR[((16+32))+rsp] + mov r12,QWORD PTR[((24+32))+rsp] + lea rdi,QWORD PTR[256+rsp] + call __ecp_nistz256_mul_montx + + mov rdx,QWORD PTR[416+rsp] + lea rbx,QWORD PTR[416+rsp] + mov r9,QWORD PTR[((0+224))+rsp] + mov r10,QWORD PTR[((8+224))+rsp] + lea rsi,QWORD PTR[((-128+224))+rsp] + mov r11,QWORD PTR[((16+224))+rsp] + mov r12,QWORD PTR[((24+224))+rsp] + lea rdi,QWORD PTR[224+rsp] + call __ecp_nistz256_mul_montx + + mov rdx,QWORD PTR[512+rsp] + lea rbx,QWORD PTR[512+rsp] + mov r9,QWORD PTR[((0+256))+rsp] + mov r10,QWORD PTR[((8+256))+rsp] + lea rsi,QWORD PTR[((-128+256))+rsp] + mov r11,QWORD PTR[((16+256))+rsp] + mov r12,QWORD PTR[((24+256))+rsp] + lea rdi,QWORD PTR[256+rsp] + call __ecp_nistz256_mul_montx + + lea rbx,QWORD PTR[224+rsp] + lea rdi,QWORD PTR[64+rsp] + call __ecp_nistz256_sub_fromx + + or r12,r13 + movdqa xmm2,xmm4 + or r12,r8 + or r12,r9 + por xmm2,xmm5 +DB 102,73,15,110,220 + + mov rdx,QWORD PTR[384+rsp] + lea rbx,QWORD PTR[384+rsp] + mov r9,QWORD PTR[((0+96))+rsp] + mov r10,QWORD PTR[((8+96))+rsp] + lea rsi,QWORD PTR[((-128+96))+rsp] + mov r11,QWORD PTR[((16+96))+rsp] + mov r12,QWORD PTR[((24+96))+rsp] + lea rdi,QWORD PTR[160+rsp] + call __ecp_nistz256_mul_montx + + mov rdx,QWORD PTR[480+rsp] + lea rbx,QWORD PTR[480+rsp] + mov r9,QWORD PTR[((0+32))+rsp] + mov r10,QWORD PTR[((8+32))+rsp] + lea rsi,QWORD PTR[((-128+32))+rsp] + mov r11,QWORD PTR[((16+32))+rsp] + mov r12,QWORD PTR[((24+32))+rsp] + lea rdi,QWORD PTR[192+rsp] + call __ecp_nistz256_mul_montx + + lea rbx,QWORD PTR[160+rsp] + lea rdi,QWORD PTR[rsp] + call __ecp_nistz256_sub_fromx + + or r12,r13 + or r12,r8 + or r12,r9 + +DB 03eh + jnz $L$add_proceedx +DB 102,73,15,126,208 +DB 102,73,15,126,217 + test r8,r8 + jnz $L$add_proceedx + test r9,r9 + jz $L$add_proceedx + +DB 102,72,15,126,199 + pxor xmm0,xmm0 + movdqu XMMWORD PTR[rdi],xmm0 + movdqu XMMWORD PTR[16+rdi],xmm0 + movdqu XMMWORD PTR[32+rdi],xmm0 + movdqu XMMWORD PTR[48+rdi],xmm0 + movdqu XMMWORD PTR[64+rdi],xmm0 + movdqu XMMWORD PTR[80+rdi],xmm0 + jmp $L$add_donex + +ALIGN 32 +$L$add_proceedx:: + mov rdx,QWORD PTR[((0+64))+rsp] + mov r14,QWORD PTR[((8+64))+rsp] + lea rsi,QWORD PTR[((-128+64))+rsp] + mov r15,QWORD PTR[((16+64))+rsp] + mov r8,QWORD PTR[((24+64))+rsp] + lea rdi,QWORD PTR[96+rsp] + call __ecp_nistz256_sqr_montx + + mov rdx,QWORD PTR[448+rsp] + lea rbx,QWORD PTR[448+rsp] + mov r9,QWORD PTR[((0+0))+rsp] + mov r10,QWORD PTR[((8+0))+rsp] + lea rsi,QWORD PTR[((-128+0))+rsp] + mov r11,QWORD PTR[((16+0))+rsp] + mov r12,QWORD PTR[((24+0))+rsp] + lea rdi,QWORD PTR[352+rsp] + call __ecp_nistz256_mul_montx + + mov rdx,QWORD PTR[((0+0))+rsp] + mov r14,QWORD PTR[((8+0))+rsp] + lea rsi,QWORD PTR[((-128+0))+rsp] + mov r15,QWORD PTR[((16+0))+rsp] + mov r8,QWORD PTR[((24+0))+rsp] + lea rdi,QWORD PTR[32+rsp] + call __ecp_nistz256_sqr_montx + + mov rdx,QWORD PTR[544+rsp] + lea rbx,QWORD PTR[544+rsp] + mov r9,QWORD PTR[((0+352))+rsp] + mov r10,QWORD PTR[((8+352))+rsp] + lea rsi,QWORD PTR[((-128+352))+rsp] + mov r11,QWORD PTR[((16+352))+rsp] + mov r12,QWORD PTR[((24+352))+rsp] + lea rdi,QWORD PTR[352+rsp] + call __ecp_nistz256_mul_montx + + mov rdx,QWORD PTR[rsp] + lea rbx,QWORD PTR[rsp] + mov r9,QWORD PTR[((0+32))+rsp] + mov r10,QWORD PTR[((8+32))+rsp] + lea rsi,QWORD PTR[((-128+32))+rsp] + mov r11,QWORD PTR[((16+32))+rsp] + mov r12,QWORD PTR[((24+32))+rsp] + lea rdi,QWORD PTR[128+rsp] + call __ecp_nistz256_mul_montx + + mov rdx,QWORD PTR[160+rsp] + lea rbx,QWORD PTR[160+rsp] + mov r9,QWORD PTR[((0+32))+rsp] + mov r10,QWORD PTR[((8+32))+rsp] + lea rsi,QWORD PTR[((-128+32))+rsp] + mov r11,QWORD PTR[((16+32))+rsp] + mov r12,QWORD PTR[((24+32))+rsp] + lea rdi,QWORD PTR[192+rsp] + call __ecp_nistz256_mul_montx + + + + + add r12,r12 + lea rsi,QWORD PTR[96+rsp] + adc r13,r13 + mov rax,r12 + adc r8,r8 + adc r9,r9 + mov rbp,r13 + sbb r11,r11 + + sub r12,-1 + mov rcx,r8 + sbb r13,r14 + sbb r8,0 + mov r10,r9 + sbb r9,r15 + test r11,r11 + + cmovz r12,rax + mov rax,QWORD PTR[rsi] + cmovz r13,rbp + mov rbp,QWORD PTR[8+rsi] + cmovz r8,rcx + mov rcx,QWORD PTR[16+rsi] + cmovz r9,r10 + mov r10,QWORD PTR[24+rsi] + + call __ecp_nistz256_subx + + lea rbx,QWORD PTR[128+rsp] + lea rdi,QWORD PTR[288+rsp] + call __ecp_nistz256_sub_fromx + + mov rax,QWORD PTR[((192+0))+rsp] + mov rbp,QWORD PTR[((192+8))+rsp] + mov rcx,QWORD PTR[((192+16))+rsp] + mov r10,QWORD PTR[((192+24))+rsp] + lea rdi,QWORD PTR[320+rsp] + + call __ecp_nistz256_subx + + mov QWORD PTR[rdi],r12 + mov QWORD PTR[8+rdi],r13 + mov QWORD PTR[16+rdi],r8 + mov QWORD PTR[24+rdi],r9 + mov rdx,QWORD PTR[128+rsp] + lea rbx,QWORD PTR[128+rsp] + mov r9,QWORD PTR[((0+224))+rsp] + mov r10,QWORD PTR[((8+224))+rsp] + lea rsi,QWORD PTR[((-128+224))+rsp] + mov r11,QWORD PTR[((16+224))+rsp] + mov r12,QWORD PTR[((24+224))+rsp] + lea rdi,QWORD PTR[256+rsp] + call __ecp_nistz256_mul_montx + + mov rdx,QWORD PTR[320+rsp] + lea rbx,QWORD PTR[320+rsp] + mov r9,QWORD PTR[((0+64))+rsp] + mov r10,QWORD PTR[((8+64))+rsp] + lea rsi,QWORD PTR[((-128+64))+rsp] + mov r11,QWORD PTR[((16+64))+rsp] + mov r12,QWORD PTR[((24+64))+rsp] + lea rdi,QWORD PTR[320+rsp] + call __ecp_nistz256_mul_montx + + lea rbx,QWORD PTR[256+rsp] + lea rdi,QWORD PTR[320+rsp] + call __ecp_nistz256_sub_fromx + +DB 102,72,15,126,199 + + movdqa xmm0,xmm5 + movdqa xmm1,xmm5 + pandn xmm0,XMMWORD PTR[352+rsp] + movdqa xmm2,xmm5 + pandn xmm1,XMMWORD PTR[((352+16))+rsp] + movdqa xmm3,xmm5 + pand xmm2,XMMWORD PTR[544+rsp] + pand xmm3,XMMWORD PTR[((544+16))+rsp] + por xmm2,xmm0 + por xmm3,xmm1 + + movdqa xmm0,xmm4 + movdqa xmm1,xmm4 + pandn xmm0,xmm2 + movdqa xmm2,xmm4 + pandn xmm1,xmm3 + movdqa xmm3,xmm4 + pand xmm2,XMMWORD PTR[448+rsp] + pand xmm3,XMMWORD PTR[((448+16))+rsp] + por xmm2,xmm0 + por xmm3,xmm1 + movdqu XMMWORD PTR[64+rdi],xmm2 + movdqu XMMWORD PTR[80+rdi],xmm3 + + movdqa xmm0,xmm5 + movdqa xmm1,xmm5 + pandn xmm0,XMMWORD PTR[288+rsp] + movdqa xmm2,xmm5 + pandn xmm1,XMMWORD PTR[((288+16))+rsp] + movdqa xmm3,xmm5 + pand xmm2,XMMWORD PTR[480+rsp] + pand xmm3,XMMWORD PTR[((480+16))+rsp] + por xmm2,xmm0 + por xmm3,xmm1 + + movdqa xmm0,xmm4 + movdqa xmm1,xmm4 + pandn xmm0,xmm2 + movdqa xmm2,xmm4 + pandn xmm1,xmm3 + movdqa xmm3,xmm4 + pand xmm2,XMMWORD PTR[384+rsp] + pand xmm3,XMMWORD PTR[((384+16))+rsp] + por xmm2,xmm0 + por xmm3,xmm1 + movdqu XMMWORD PTR[rdi],xmm2 + movdqu XMMWORD PTR[16+rdi],xmm3 + + movdqa xmm0,xmm5 + movdqa xmm1,xmm5 + pandn xmm0,XMMWORD PTR[320+rsp] + movdqa xmm2,xmm5 + pandn xmm1,XMMWORD PTR[((320+16))+rsp] + movdqa xmm3,xmm5 + pand xmm2,XMMWORD PTR[512+rsp] + pand xmm3,XMMWORD PTR[((512+16))+rsp] + por xmm2,xmm0 + por xmm3,xmm1 + + movdqa xmm0,xmm4 + movdqa xmm1,xmm4 + pandn xmm0,xmm2 + movdqa xmm2,xmm4 + pandn xmm1,xmm3 + movdqa xmm3,xmm4 + pand xmm2,XMMWORD PTR[416+rsp] + pand xmm3,XMMWORD PTR[((416+16))+rsp] + por xmm2,xmm0 + por xmm3,xmm1 + movdqu XMMWORD PTR[32+rdi],xmm2 + movdqu XMMWORD PTR[48+rdi],xmm3 + +$L$add_donex:: + add rsp,32*18+8 + pop r15 + pop r14 + pop r13 + pop r12 + pop rbx + pop rbp + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_ecp_nistz256_point_addx:: +ecp_nistz256_point_addx ENDP + +ALIGN 32 +ecp_nistz256_point_add_affinex PROC PRIVATE + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_ecp_nistz256_point_add_affinex:: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + + +$L$point_add_affinex:: + push rbp + push rbx + push r12 + push r13 + push r14 + push r15 + sub rsp,32*15+8 + + movdqu xmm0,XMMWORD PTR[rsi] + mov rbx,rdx + movdqu xmm1,XMMWORD PTR[16+rsi] + movdqu xmm2,XMMWORD PTR[32+rsi] + movdqu xmm3,XMMWORD PTR[48+rsi] + movdqu xmm4,XMMWORD PTR[64+rsi] + movdqu xmm5,XMMWORD PTR[80+rsi] + mov rdx,QWORD PTR[((64+0))+rsi] + mov r14,QWORD PTR[((64+8))+rsi] + mov r15,QWORD PTR[((64+16))+rsi] + mov r8,QWORD PTR[((64+24))+rsi] + movdqa XMMWORD PTR[320+rsp],xmm0 + movdqa XMMWORD PTR[(320+16)+rsp],xmm1 + por xmm1,xmm0 + movdqa XMMWORD PTR[352+rsp],xmm2 + movdqa XMMWORD PTR[(352+16)+rsp],xmm3 + por xmm3,xmm2 + movdqa XMMWORD PTR[384+rsp],xmm4 + movdqa XMMWORD PTR[(384+16)+rsp],xmm5 + por xmm3,xmm1 + + movdqu xmm0,XMMWORD PTR[rbx] + pshufd xmm5,xmm3,1h + movdqu xmm1,XMMWORD PTR[16+rbx] + movdqu xmm2,XMMWORD PTR[32+rbx] + por xmm5,xmm3 + movdqu xmm3,XMMWORD PTR[48+rbx] + movdqa XMMWORD PTR[416+rsp],xmm0 + pshufd xmm4,xmm5,01eh + movdqa XMMWORD PTR[(416+16)+rsp],xmm1 + por xmm1,xmm0 +DB 102,72,15,110,199 + movdqa XMMWORD PTR[448+rsp],xmm2 + movdqa XMMWORD PTR[(448+16)+rsp],xmm3 + por xmm3,xmm2 + por xmm5,xmm4 + pxor xmm4,xmm4 + por xmm3,xmm1 + + lea rsi,QWORD PTR[((64-128))+rsi] + lea rdi,QWORD PTR[32+rsp] + call __ecp_nistz256_sqr_montx + + pcmpeqd xmm5,xmm4 + pshufd xmm4,xmm3,1h + mov rdx,QWORD PTR[rbx] + + mov r9,r12 + por xmm4,xmm3 + pshufd xmm5,xmm5,0 + pshufd xmm3,xmm4,01eh + mov r10,r13 + por xmm4,xmm3 + pxor xmm3,xmm3 + mov r11,r14 + pcmpeqd xmm4,xmm3 + pshufd xmm4,xmm4,0 + + lea rsi,QWORD PTR[((32-128))+rsp] + mov r12,r15 + lea rdi,QWORD PTR[rsp] + call __ecp_nistz256_mul_montx + + lea rbx,QWORD PTR[320+rsp] + lea rdi,QWORD PTR[64+rsp] + call __ecp_nistz256_sub_fromx + + mov rdx,QWORD PTR[384+rsp] + lea rbx,QWORD PTR[384+rsp] + mov r9,QWORD PTR[((0+32))+rsp] + mov r10,QWORD PTR[((8+32))+rsp] + lea rsi,QWORD PTR[((-128+32))+rsp] + mov r11,QWORD PTR[((16+32))+rsp] + mov r12,QWORD PTR[((24+32))+rsp] + lea rdi,QWORD PTR[32+rsp] + call __ecp_nistz256_mul_montx + + mov rdx,QWORD PTR[384+rsp] + lea rbx,QWORD PTR[384+rsp] + mov r9,QWORD PTR[((0+64))+rsp] + mov r10,QWORD PTR[((8+64))+rsp] + lea rsi,QWORD PTR[((-128+64))+rsp] + mov r11,QWORD PTR[((16+64))+rsp] + mov r12,QWORD PTR[((24+64))+rsp] + lea rdi,QWORD PTR[288+rsp] + call __ecp_nistz256_mul_montx + + mov rdx,QWORD PTR[448+rsp] + lea rbx,QWORD PTR[448+rsp] + mov r9,QWORD PTR[((0+32))+rsp] + mov r10,QWORD PTR[((8+32))+rsp] + lea rsi,QWORD PTR[((-128+32))+rsp] + mov r11,QWORD PTR[((16+32))+rsp] + mov r12,QWORD PTR[((24+32))+rsp] + lea rdi,QWORD PTR[32+rsp] + call __ecp_nistz256_mul_montx + + lea rbx,QWORD PTR[352+rsp] + lea rdi,QWORD PTR[96+rsp] + call __ecp_nistz256_sub_fromx + + mov rdx,QWORD PTR[((0+64))+rsp] + mov r14,QWORD PTR[((8+64))+rsp] + lea rsi,QWORD PTR[((-128+64))+rsp] + mov r15,QWORD PTR[((16+64))+rsp] + mov r8,QWORD PTR[((24+64))+rsp] + lea rdi,QWORD PTR[128+rsp] + call __ecp_nistz256_sqr_montx + + mov rdx,QWORD PTR[((0+96))+rsp] + mov r14,QWORD PTR[((8+96))+rsp] + lea rsi,QWORD PTR[((-128+96))+rsp] + mov r15,QWORD PTR[((16+96))+rsp] + mov r8,QWORD PTR[((24+96))+rsp] + lea rdi,QWORD PTR[192+rsp] + call __ecp_nistz256_sqr_montx + + mov rdx,QWORD PTR[128+rsp] + lea rbx,QWORD PTR[128+rsp] + mov r9,QWORD PTR[((0+64))+rsp] + mov r10,QWORD PTR[((8+64))+rsp] + lea rsi,QWORD PTR[((-128+64))+rsp] + mov r11,QWORD PTR[((16+64))+rsp] + mov r12,QWORD PTR[((24+64))+rsp] + lea rdi,QWORD PTR[160+rsp] + call __ecp_nistz256_mul_montx + + mov rdx,QWORD PTR[320+rsp] + lea rbx,QWORD PTR[320+rsp] + mov r9,QWORD PTR[((0+128))+rsp] + mov r10,QWORD PTR[((8+128))+rsp] + lea rsi,QWORD PTR[((-128+128))+rsp] + mov r11,QWORD PTR[((16+128))+rsp] + mov r12,QWORD PTR[((24+128))+rsp] + lea rdi,QWORD PTR[rsp] + call __ecp_nistz256_mul_montx + + + + + add r12,r12 + lea rsi,QWORD PTR[192+rsp] + adc r13,r13 + mov rax,r12 + adc r8,r8 + adc r9,r9 + mov rbp,r13 + sbb r11,r11 + + sub r12,-1 + mov rcx,r8 + sbb r13,r14 + sbb r8,0 + mov r10,r9 + sbb r9,r15 + test r11,r11 + + cmovz r12,rax + mov rax,QWORD PTR[rsi] + cmovz r13,rbp + mov rbp,QWORD PTR[8+rsi] + cmovz r8,rcx + mov rcx,QWORD PTR[16+rsi] + cmovz r9,r10 + mov r10,QWORD PTR[24+rsi] + + call __ecp_nistz256_subx + + lea rbx,QWORD PTR[160+rsp] + lea rdi,QWORD PTR[224+rsp] + call __ecp_nistz256_sub_fromx + + mov rax,QWORD PTR[((0+0))+rsp] + mov rbp,QWORD PTR[((0+8))+rsp] + mov rcx,QWORD PTR[((0+16))+rsp] + mov r10,QWORD PTR[((0+24))+rsp] + lea rdi,QWORD PTR[64+rsp] + + call __ecp_nistz256_subx + + mov QWORD PTR[rdi],r12 + mov QWORD PTR[8+rdi],r13 + mov QWORD PTR[16+rdi],r8 + mov QWORD PTR[24+rdi],r9 + mov rdx,QWORD PTR[352+rsp] + lea rbx,QWORD PTR[352+rsp] + mov r9,QWORD PTR[((0+160))+rsp] + mov r10,QWORD PTR[((8+160))+rsp] + lea rsi,QWORD PTR[((-128+160))+rsp] + mov r11,QWORD PTR[((16+160))+rsp] + mov r12,QWORD PTR[((24+160))+rsp] + lea rdi,QWORD PTR[32+rsp] + call __ecp_nistz256_mul_montx + + mov rdx,QWORD PTR[96+rsp] + lea rbx,QWORD PTR[96+rsp] + mov r9,QWORD PTR[((0+64))+rsp] + mov r10,QWORD PTR[((8+64))+rsp] + lea rsi,QWORD PTR[((-128+64))+rsp] + mov r11,QWORD PTR[((16+64))+rsp] + mov r12,QWORD PTR[((24+64))+rsp] + lea rdi,QWORD PTR[64+rsp] + call __ecp_nistz256_mul_montx + + lea rbx,QWORD PTR[32+rsp] + lea rdi,QWORD PTR[256+rsp] + call __ecp_nistz256_sub_fromx + +DB 102,72,15,126,199 + + movdqa xmm0,xmm5 + movdqa xmm1,xmm5 + pandn xmm0,XMMWORD PTR[288+rsp] + movdqa xmm2,xmm5 + pandn xmm1,XMMWORD PTR[((288+16))+rsp] + movdqa xmm3,xmm5 + pand xmm2,XMMWORD PTR[$L$ONE_mont] + pand xmm3,XMMWORD PTR[(($L$ONE_mont+16))] + por xmm2,xmm0 + por xmm3,xmm1 + + movdqa xmm0,xmm4 + movdqa xmm1,xmm4 + pandn xmm0,xmm2 + movdqa xmm2,xmm4 + pandn xmm1,xmm3 + movdqa xmm3,xmm4 + pand xmm2,XMMWORD PTR[384+rsp] + pand xmm3,XMMWORD PTR[((384+16))+rsp] + por xmm2,xmm0 + por xmm3,xmm1 + movdqu XMMWORD PTR[64+rdi],xmm2 + movdqu XMMWORD PTR[80+rdi],xmm3 + + movdqa xmm0,xmm5 + movdqa xmm1,xmm5 + pandn xmm0,XMMWORD PTR[224+rsp] + movdqa xmm2,xmm5 + pandn xmm1,XMMWORD PTR[((224+16))+rsp] + movdqa xmm3,xmm5 + pand xmm2,XMMWORD PTR[416+rsp] + pand xmm3,XMMWORD PTR[((416+16))+rsp] + por xmm2,xmm0 + por xmm3,xmm1 + + movdqa xmm0,xmm4 + movdqa xmm1,xmm4 + pandn xmm0,xmm2 + movdqa xmm2,xmm4 + pandn xmm1,xmm3 + movdqa xmm3,xmm4 + pand xmm2,XMMWORD PTR[320+rsp] + pand xmm3,XMMWORD PTR[((320+16))+rsp] + por xmm2,xmm0 + por xmm3,xmm1 + movdqu XMMWORD PTR[rdi],xmm2 + movdqu XMMWORD PTR[16+rdi],xmm3 + + movdqa xmm0,xmm5 + movdqa xmm1,xmm5 + pandn xmm0,XMMWORD PTR[256+rsp] + movdqa xmm2,xmm5 + pandn xmm1,XMMWORD PTR[((256+16))+rsp] + movdqa xmm3,xmm5 + pand xmm2,XMMWORD PTR[448+rsp] + pand xmm3,XMMWORD PTR[((448+16))+rsp] + por xmm2,xmm0 + por xmm3,xmm1 + + movdqa xmm0,xmm4 + movdqa xmm1,xmm4 + pandn xmm0,xmm2 + movdqa xmm2,xmm4 + pandn xmm1,xmm3 + movdqa xmm3,xmm4 + pand xmm2,XMMWORD PTR[352+rsp] + pand xmm3,XMMWORD PTR[((352+16))+rsp] + por xmm2,xmm0 + por xmm3,xmm1 + movdqu XMMWORD PTR[32+rdi],xmm2 + movdqu XMMWORD PTR[48+rdi],xmm3 + + add rsp,32*15+8 + pop r15 + pop r14 + pop r13 + pop r12 + pop rbx + pop rbp + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_ecp_nistz256_point_add_affinex:: +ecp_nistz256_point_add_affinex ENDP + +.text$ ENDS +END diff --git a/deps/openssl/asm/x64-win32-masm/md5/md5-x86_64.asm b/deps/openssl/asm/x64-win32-masm/md5/md5-x86_64.asm index 8ddad41c84f118..d2faa88e8a5f9d 100644 --- a/deps/openssl/asm/x64-win32-masm/md5/md5-x86_64.asm +++ b/deps/openssl/asm/x64-win32-masm/md5/md5-x86_64.asm @@ -1,5 +1,5 @@ OPTION DOTNAME -.text$ SEGMENT ALIGN(64) 'CODE' +.text$ SEGMENT ALIGN(256) 'CODE' ALIGN 16 PUBLIC md5_block_asm_data_order @@ -42,7 +42,6 @@ $L$prologue:: je $L$end - $L$loop:: mov r8d,eax mov r9d,ebx @@ -662,7 +661,6 @@ $L$loop:: jb $L$loop - $L$end:: mov DWORD PTR[rbp],eax mov DWORD PTR[4+rbp],ebx @@ -734,7 +732,6 @@ $L$in_prologue:: mov ecx,154 DD 0a548f3fch - mov rsi,r9 xor rcx,rcx mov rdx,QWORD PTR[8+rsi] diff --git a/deps/openssl/asm/x64-win32-masm/modes/aesni-gcm-x86_64.asm b/deps/openssl/asm/x64-win32-masm/modes/aesni-gcm-x86_64.asm new file mode 100644 index 00000000000000..0626d8f7824646 --- /dev/null +++ b/deps/openssl/asm/x64-win32-masm/modes/aesni-gcm-x86_64.asm @@ -0,0 +1,948 @@ +OPTION DOTNAME +.text$ SEGMENT ALIGN(256) 'CODE' + + +ALIGN 32 +_aesni_ctr32_ghash_6x PROC PRIVATE + vmovdqu xmm2,XMMWORD PTR[32+r11] + sub rdx,6 + vpxor xmm4,xmm4,xmm4 + vmovdqu xmm15,XMMWORD PTR[((0-128))+rcx] + vpaddb xmm10,xmm1,xmm2 + vpaddb xmm11,xmm10,xmm2 + vpaddb xmm12,xmm11,xmm2 + vpaddb xmm13,xmm12,xmm2 + vpaddb xmm14,xmm13,xmm2 + vpxor xmm9,xmm1,xmm15 + vmovdqu XMMWORD PTR[(16+8)+rsp],xmm4 + jmp $L$oop6x + +ALIGN 32 +$L$oop6x:: + add ebx,100663296 + jc $L$handle_ctr32 + vmovdqu xmm3,XMMWORD PTR[((0-32))+r9] + vpaddb xmm1,xmm14,xmm2 + vpxor xmm10,xmm10,xmm15 + vpxor xmm11,xmm11,xmm15 + +$L$resume_ctr32:: + vmovdqu XMMWORD PTR[r8],xmm1 + vpclmulqdq xmm5,xmm7,xmm3,010h + vpxor xmm12,xmm12,xmm15 + vmovups xmm2,XMMWORD PTR[((16-128))+rcx] + vpclmulqdq xmm6,xmm7,xmm3,001h + xor r12,r12 + cmp r15,r14 + + vaesenc xmm9,xmm9,xmm2 + vmovdqu xmm0,XMMWORD PTR[((48+8))+rsp] + vpxor xmm13,xmm13,xmm15 + vpclmulqdq xmm1,xmm7,xmm3,000h + vaesenc xmm10,xmm10,xmm2 + vpxor xmm14,xmm14,xmm15 + setnc r12b + vpclmulqdq xmm7,xmm7,xmm3,011h + vaesenc xmm11,xmm11,xmm2 + vmovdqu xmm3,XMMWORD PTR[((16-32))+r9] + neg r12 + vaesenc xmm12,xmm12,xmm2 + vpxor xmm6,xmm6,xmm5 + vpclmulqdq xmm5,xmm0,xmm3,000h + vpxor xmm8,xmm8,xmm4 + vaesenc xmm13,xmm13,xmm2 + vpxor xmm4,xmm1,xmm5 + and r12,060h + vmovups xmm15,XMMWORD PTR[((32-128))+rcx] + vpclmulqdq xmm1,xmm0,xmm3,010h + vaesenc xmm14,xmm14,xmm2 + + vpclmulqdq xmm2,xmm0,xmm3,001h + lea r14,QWORD PTR[r12*1+r14] + vaesenc xmm9,xmm9,xmm15 + vpxor xmm8,xmm8,XMMWORD PTR[((16+8))+rsp] + vpclmulqdq xmm3,xmm0,xmm3,011h + vmovdqu xmm0,XMMWORD PTR[((64+8))+rsp] + vaesenc xmm10,xmm10,xmm15 + movbe r13,QWORD PTR[88+r14] + vaesenc xmm11,xmm11,xmm15 + movbe r12,QWORD PTR[80+r14] + vaesenc xmm12,xmm12,xmm15 + mov QWORD PTR[((32+8))+rsp],r13 + vaesenc xmm13,xmm13,xmm15 + mov QWORD PTR[((40+8))+rsp],r12 + vmovdqu xmm5,XMMWORD PTR[((48-32))+r9] + vaesenc xmm14,xmm14,xmm15 + + vmovups xmm15,XMMWORD PTR[((48-128))+rcx] + vpxor xmm6,xmm6,xmm1 + vpclmulqdq xmm1,xmm0,xmm5,000h + vaesenc xmm9,xmm9,xmm15 + vpxor xmm6,xmm6,xmm2 + vpclmulqdq xmm2,xmm0,xmm5,010h + vaesenc xmm10,xmm10,xmm15 + vpxor xmm7,xmm7,xmm3 + vpclmulqdq xmm3,xmm0,xmm5,001h + vaesenc xmm11,xmm11,xmm15 + vpclmulqdq xmm5,xmm0,xmm5,011h + vmovdqu xmm0,XMMWORD PTR[((80+8))+rsp] + vaesenc xmm12,xmm12,xmm15 + vaesenc xmm13,xmm13,xmm15 + vpxor xmm4,xmm4,xmm1 + vmovdqu xmm1,XMMWORD PTR[((64-32))+r9] + vaesenc xmm14,xmm14,xmm15 + + vmovups xmm15,XMMWORD PTR[((64-128))+rcx] + vpxor xmm6,xmm6,xmm2 + vpclmulqdq xmm2,xmm0,xmm1,000h + vaesenc xmm9,xmm9,xmm15 + vpxor xmm6,xmm6,xmm3 + vpclmulqdq xmm3,xmm0,xmm1,010h + vaesenc xmm10,xmm10,xmm15 + movbe r13,QWORD PTR[72+r14] + vpxor xmm7,xmm7,xmm5 + vpclmulqdq xmm5,xmm0,xmm1,001h + vaesenc xmm11,xmm11,xmm15 + movbe r12,QWORD PTR[64+r14] + vpclmulqdq xmm1,xmm0,xmm1,011h + vmovdqu xmm0,XMMWORD PTR[((96+8))+rsp] + vaesenc xmm12,xmm12,xmm15 + mov QWORD PTR[((48+8))+rsp],r13 + vaesenc xmm13,xmm13,xmm15 + mov QWORD PTR[((56+8))+rsp],r12 + vpxor xmm4,xmm4,xmm2 + vmovdqu xmm2,XMMWORD PTR[((96-32))+r9] + vaesenc xmm14,xmm14,xmm15 + + vmovups xmm15,XMMWORD PTR[((80-128))+rcx] + vpxor xmm6,xmm6,xmm3 + vpclmulqdq xmm3,xmm0,xmm2,000h + vaesenc xmm9,xmm9,xmm15 + vpxor xmm6,xmm6,xmm5 + vpclmulqdq xmm5,xmm0,xmm2,010h + vaesenc xmm10,xmm10,xmm15 + movbe r13,QWORD PTR[56+r14] + vpxor xmm7,xmm7,xmm1 + vpclmulqdq xmm1,xmm0,xmm2,001h + vpxor xmm8,xmm8,XMMWORD PTR[((112+8))+rsp] + vaesenc xmm11,xmm11,xmm15 + movbe r12,QWORD PTR[48+r14] + vpclmulqdq xmm2,xmm0,xmm2,011h + vaesenc xmm12,xmm12,xmm15 + mov QWORD PTR[((64+8))+rsp],r13 + vaesenc xmm13,xmm13,xmm15 + mov QWORD PTR[((72+8))+rsp],r12 + vpxor xmm4,xmm4,xmm3 + vmovdqu xmm3,XMMWORD PTR[((112-32))+r9] + vaesenc xmm14,xmm14,xmm15 + + vmovups xmm15,XMMWORD PTR[((96-128))+rcx] + vpxor xmm6,xmm6,xmm5 + vpclmulqdq xmm5,xmm8,xmm3,010h + vaesenc xmm9,xmm9,xmm15 + vpxor xmm6,xmm6,xmm1 + vpclmulqdq xmm1,xmm8,xmm3,001h + vaesenc xmm10,xmm10,xmm15 + movbe r13,QWORD PTR[40+r14] + vpxor xmm7,xmm7,xmm2 + vpclmulqdq xmm2,xmm8,xmm3,000h + vaesenc xmm11,xmm11,xmm15 + movbe r12,QWORD PTR[32+r14] + vpclmulqdq xmm8,xmm8,xmm3,011h + vaesenc xmm12,xmm12,xmm15 + mov QWORD PTR[((80+8))+rsp],r13 + vaesenc xmm13,xmm13,xmm15 + mov QWORD PTR[((88+8))+rsp],r12 + vpxor xmm6,xmm6,xmm5 + vaesenc xmm14,xmm14,xmm15 + vpxor xmm6,xmm6,xmm1 + + vmovups xmm15,XMMWORD PTR[((112-128))+rcx] + vpslldq xmm5,xmm6,8 + vpxor xmm4,xmm4,xmm2 + vmovdqu xmm3,XMMWORD PTR[16+r11] + + vaesenc xmm9,xmm9,xmm15 + vpxor xmm7,xmm7,xmm8 + vaesenc xmm10,xmm10,xmm15 + vpxor xmm4,xmm4,xmm5 + movbe r13,QWORD PTR[24+r14] + vaesenc xmm11,xmm11,xmm15 + movbe r12,QWORD PTR[16+r14] + vpalignr xmm0,xmm4,xmm4,8 + vpclmulqdq xmm4,xmm4,xmm3,010h + mov QWORD PTR[((96+8))+rsp],r13 + vaesenc xmm12,xmm12,xmm15 + mov QWORD PTR[((104+8))+rsp],r12 + vaesenc xmm13,xmm13,xmm15 + vmovups xmm1,XMMWORD PTR[((128-128))+rcx] + vaesenc xmm14,xmm14,xmm15 + + vaesenc xmm9,xmm9,xmm1 + vmovups xmm15,XMMWORD PTR[((144-128))+rcx] + vaesenc xmm10,xmm10,xmm1 + vpsrldq xmm6,xmm6,8 + vaesenc xmm11,xmm11,xmm1 + vpxor xmm7,xmm7,xmm6 + vaesenc xmm12,xmm12,xmm1 + vpxor xmm4,xmm4,xmm0 + movbe r13,QWORD PTR[8+r14] + vaesenc xmm13,xmm13,xmm1 + movbe r12,QWORD PTR[r14] + vaesenc xmm14,xmm14,xmm1 + vmovups xmm1,XMMWORD PTR[((160-128))+rcx] + cmp ebp,11 + jb $L$enc_tail + + vaesenc xmm9,xmm9,xmm15 + vaesenc xmm10,xmm10,xmm15 + vaesenc xmm11,xmm11,xmm15 + vaesenc xmm12,xmm12,xmm15 + vaesenc xmm13,xmm13,xmm15 + vaesenc xmm14,xmm14,xmm15 + + vaesenc xmm9,xmm9,xmm1 + vaesenc xmm10,xmm10,xmm1 + vaesenc xmm11,xmm11,xmm1 + vaesenc xmm12,xmm12,xmm1 + vaesenc xmm13,xmm13,xmm1 + vmovups xmm15,XMMWORD PTR[((176-128))+rcx] + vaesenc xmm14,xmm14,xmm1 + vmovups xmm1,XMMWORD PTR[((192-128))+rcx] + je $L$enc_tail + + vaesenc xmm9,xmm9,xmm15 + vaesenc xmm10,xmm10,xmm15 + vaesenc xmm11,xmm11,xmm15 + vaesenc xmm12,xmm12,xmm15 + vaesenc xmm13,xmm13,xmm15 + vaesenc xmm14,xmm14,xmm15 + + vaesenc xmm9,xmm9,xmm1 + vaesenc xmm10,xmm10,xmm1 + vaesenc xmm11,xmm11,xmm1 + vaesenc xmm12,xmm12,xmm1 + vaesenc xmm13,xmm13,xmm1 + vmovups xmm15,XMMWORD PTR[((208-128))+rcx] + vaesenc xmm14,xmm14,xmm1 + vmovups xmm1,XMMWORD PTR[((224-128))+rcx] + jmp $L$enc_tail + +ALIGN 32 +$L$handle_ctr32:: + vmovdqu xmm0,XMMWORD PTR[r11] + vpshufb xmm6,xmm1,xmm0 + vmovdqu xmm5,XMMWORD PTR[48+r11] + vpaddd xmm10,xmm6,XMMWORD PTR[64+r11] + vpaddd xmm11,xmm6,xmm5 + vmovdqu xmm3,XMMWORD PTR[((0-32))+r9] + vpaddd xmm12,xmm10,xmm5 + vpshufb xmm10,xmm10,xmm0 + vpaddd xmm13,xmm11,xmm5 + vpshufb xmm11,xmm11,xmm0 + vpxor xmm10,xmm10,xmm15 + vpaddd xmm14,xmm12,xmm5 + vpshufb xmm12,xmm12,xmm0 + vpxor xmm11,xmm11,xmm15 + vpaddd xmm1,xmm13,xmm5 + vpshufb xmm13,xmm13,xmm0 + vpshufb xmm14,xmm14,xmm0 + vpshufb xmm1,xmm1,xmm0 + jmp $L$resume_ctr32 + +ALIGN 32 +$L$enc_tail:: + vaesenc xmm9,xmm9,xmm15 + vmovdqu XMMWORD PTR[(16+8)+rsp],xmm7 + vpalignr xmm8,xmm4,xmm4,8 + vaesenc xmm10,xmm10,xmm15 + vpclmulqdq xmm4,xmm4,xmm3,010h + vpxor xmm2,xmm1,XMMWORD PTR[rdi] + vaesenc xmm11,xmm11,xmm15 + vpxor xmm0,xmm1,XMMWORD PTR[16+rdi] + vaesenc xmm12,xmm12,xmm15 + vpxor xmm5,xmm1,XMMWORD PTR[32+rdi] + vaesenc xmm13,xmm13,xmm15 + vpxor xmm6,xmm1,XMMWORD PTR[48+rdi] + vaesenc xmm14,xmm14,xmm15 + vpxor xmm7,xmm1,XMMWORD PTR[64+rdi] + vpxor xmm3,xmm1,XMMWORD PTR[80+rdi] + vmovdqu xmm1,XMMWORD PTR[r8] + + vaesenclast xmm9,xmm9,xmm2 + vmovdqu xmm2,XMMWORD PTR[32+r11] + vaesenclast xmm10,xmm10,xmm0 + vpaddb xmm0,xmm1,xmm2 + mov QWORD PTR[((112+8))+rsp],r13 + lea rdi,QWORD PTR[96+rdi] + vaesenclast xmm11,xmm11,xmm5 + vpaddb xmm5,xmm0,xmm2 + mov QWORD PTR[((120+8))+rsp],r12 + lea rsi,QWORD PTR[96+rsi] + vmovdqu xmm15,XMMWORD PTR[((0-128))+rcx] + vaesenclast xmm12,xmm12,xmm6 + vpaddb xmm6,xmm5,xmm2 + vaesenclast xmm13,xmm13,xmm7 + vpaddb xmm7,xmm6,xmm2 + vaesenclast xmm14,xmm14,xmm3 + vpaddb xmm3,xmm7,xmm2 + + add r10,060h + sub rdx,06h + jc $L$6x_done + + vmovups XMMWORD PTR[(-96)+rsi],xmm9 + vpxor xmm9,xmm1,xmm15 + vmovups XMMWORD PTR[(-80)+rsi],xmm10 + vmovdqa xmm10,xmm0 + vmovups XMMWORD PTR[(-64)+rsi],xmm11 + vmovdqa xmm11,xmm5 + vmovups XMMWORD PTR[(-48)+rsi],xmm12 + vmovdqa xmm12,xmm6 + vmovups XMMWORD PTR[(-32)+rsi],xmm13 + vmovdqa xmm13,xmm7 + vmovups XMMWORD PTR[(-16)+rsi],xmm14 + vmovdqa xmm14,xmm3 + vmovdqu xmm7,XMMWORD PTR[((32+8))+rsp] + jmp $L$oop6x + +$L$6x_done:: + vpxor xmm8,xmm8,XMMWORD PTR[((16+8))+rsp] + vpxor xmm8,xmm8,xmm4 + + DB 0F3h,0C3h ;repret +_aesni_ctr32_ghash_6x ENDP +PUBLIC aesni_gcm_decrypt + +ALIGN 32 +aesni_gcm_decrypt PROC PUBLIC + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_aesni_gcm_decrypt:: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + mov rcx,r9 + mov r8,QWORD PTR[40+rsp] + mov r9,QWORD PTR[48+rsp] + + + xor r10,r10 + cmp rdx,060h + jb $L$gcm_dec_abort + + lea rax,QWORD PTR[rsp] + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + lea rsp,QWORD PTR[((-168))+rsp] + movaps XMMWORD PTR[(-216)+rax],xmm6 + movaps XMMWORD PTR[(-200)+rax],xmm7 + movaps XMMWORD PTR[(-184)+rax],xmm8 + movaps XMMWORD PTR[(-168)+rax],xmm9 + movaps XMMWORD PTR[(-152)+rax],xmm10 + movaps XMMWORD PTR[(-136)+rax],xmm11 + movaps XMMWORD PTR[(-120)+rax],xmm12 + movaps XMMWORD PTR[(-104)+rax],xmm13 + movaps XMMWORD PTR[(-88)+rax],xmm14 + movaps XMMWORD PTR[(-72)+rax],xmm15 +$L$gcm_dec_body:: + vzeroupper + + vmovdqu xmm1,XMMWORD PTR[r8] + add rsp,-128 + mov ebx,DWORD PTR[12+r8] + lea r11,QWORD PTR[$L$bswap_mask] + lea r14,QWORD PTR[((-128))+rcx] + mov r15,0f80h + vmovdqu xmm8,XMMWORD PTR[r9] + and rsp,-128 + vmovdqu xmm0,XMMWORD PTR[r11] + lea rcx,QWORD PTR[128+rcx] + lea r9,QWORD PTR[((32+32))+r9] + mov ebp,DWORD PTR[((240-128))+rcx] + vpshufb xmm8,xmm8,xmm0 + + and r14,r15 + and r15,rsp + sub r15,r14 + jc $L$dec_no_key_aliasing + cmp r15,768 + jnc $L$dec_no_key_aliasing + sub rsp,r15 +$L$dec_no_key_aliasing:: + + vmovdqu xmm7,XMMWORD PTR[80+rdi] + lea r14,QWORD PTR[rdi] + vmovdqu xmm4,XMMWORD PTR[64+rdi] + lea r15,QWORD PTR[((-192))+rdx*1+rdi] + vmovdqu xmm5,XMMWORD PTR[48+rdi] + shr rdx,4 + xor r10,r10 + vmovdqu xmm6,XMMWORD PTR[32+rdi] + vpshufb xmm7,xmm7,xmm0 + vmovdqu xmm2,XMMWORD PTR[16+rdi] + vpshufb xmm4,xmm4,xmm0 + vmovdqu xmm3,XMMWORD PTR[rdi] + vpshufb xmm5,xmm5,xmm0 + vmovdqu XMMWORD PTR[48+rsp],xmm4 + vpshufb xmm6,xmm6,xmm0 + vmovdqu XMMWORD PTR[64+rsp],xmm5 + vpshufb xmm2,xmm2,xmm0 + vmovdqu XMMWORD PTR[80+rsp],xmm6 + vpshufb xmm3,xmm3,xmm0 + vmovdqu XMMWORD PTR[96+rsp],xmm2 + vmovdqu XMMWORD PTR[112+rsp],xmm3 + + call _aesni_ctr32_ghash_6x + + vmovups XMMWORD PTR[(-96)+rsi],xmm9 + vmovups XMMWORD PTR[(-80)+rsi],xmm10 + vmovups XMMWORD PTR[(-64)+rsi],xmm11 + vmovups XMMWORD PTR[(-48)+rsi],xmm12 + vmovups XMMWORD PTR[(-32)+rsi],xmm13 + vmovups XMMWORD PTR[(-16)+rsi],xmm14 + + vpshufb xmm8,xmm8,XMMWORD PTR[r11] + vmovdqu XMMWORD PTR[(-64)+r9],xmm8 + + vzeroupper + movaps xmm6,XMMWORD PTR[((-216))+rax] + movaps xmm7,XMMWORD PTR[((-216))+rax] + movaps xmm8,XMMWORD PTR[((-184))+rax] + movaps xmm9,XMMWORD PTR[((-168))+rax] + movaps xmm10,XMMWORD PTR[((-152))+rax] + movaps xmm11,XMMWORD PTR[((-136))+rax] + movaps xmm12,XMMWORD PTR[((-120))+rax] + movaps xmm13,XMMWORD PTR[((-104))+rax] + movaps xmm14,XMMWORD PTR[((-88))+rax] + movaps xmm15,XMMWORD PTR[((-72))+rax] + mov r15,QWORD PTR[((-48))+rax] + mov r14,QWORD PTR[((-40))+rax] + mov r13,QWORD PTR[((-32))+rax] + mov r12,QWORD PTR[((-24))+rax] + mov rbp,QWORD PTR[((-16))+rax] + mov rbx,QWORD PTR[((-8))+rax] + lea rsp,QWORD PTR[rax] +$L$gcm_dec_abort:: + mov rax,r10 + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_aesni_gcm_decrypt:: +aesni_gcm_decrypt ENDP + +ALIGN 32 +_aesni_ctr32_6x PROC PRIVATE + vmovdqu xmm4,XMMWORD PTR[((0-128))+rcx] + vmovdqu xmm2,XMMWORD PTR[32+r11] + lea r13,QWORD PTR[((-1))+rbp] + vmovups xmm15,XMMWORD PTR[((16-128))+rcx] + lea r12,QWORD PTR[((32-128))+rcx] + vpxor xmm9,xmm1,xmm4 + add ebx,100663296 + jc $L$handle_ctr32_2 + vpaddb xmm10,xmm1,xmm2 + vpaddb xmm11,xmm10,xmm2 + vpxor xmm10,xmm10,xmm4 + vpaddb xmm12,xmm11,xmm2 + vpxor xmm11,xmm11,xmm4 + vpaddb xmm13,xmm12,xmm2 + vpxor xmm12,xmm12,xmm4 + vpaddb xmm14,xmm13,xmm2 + vpxor xmm13,xmm13,xmm4 + vpaddb xmm1,xmm14,xmm2 + vpxor xmm14,xmm14,xmm4 + jmp $L$oop_ctr32 + +ALIGN 16 +$L$oop_ctr32:: + vaesenc xmm9,xmm9,xmm15 + vaesenc xmm10,xmm10,xmm15 + vaesenc xmm11,xmm11,xmm15 + vaesenc xmm12,xmm12,xmm15 + vaesenc xmm13,xmm13,xmm15 + vaesenc xmm14,xmm14,xmm15 + vmovups xmm15,XMMWORD PTR[r12] + lea r12,QWORD PTR[16+r12] + dec r13d + jnz $L$oop_ctr32 + + vmovdqu xmm3,XMMWORD PTR[r12] + vaesenc xmm9,xmm9,xmm15 + vpxor xmm4,xmm3,XMMWORD PTR[rdi] + vaesenc xmm10,xmm10,xmm15 + vpxor xmm5,xmm3,XMMWORD PTR[16+rdi] + vaesenc xmm11,xmm11,xmm15 + vpxor xmm6,xmm3,XMMWORD PTR[32+rdi] + vaesenc xmm12,xmm12,xmm15 + vpxor xmm8,xmm3,XMMWORD PTR[48+rdi] + vaesenc xmm13,xmm13,xmm15 + vpxor xmm2,xmm3,XMMWORD PTR[64+rdi] + vaesenc xmm14,xmm14,xmm15 + vpxor xmm3,xmm3,XMMWORD PTR[80+rdi] + lea rdi,QWORD PTR[96+rdi] + + vaesenclast xmm9,xmm9,xmm4 + vaesenclast xmm10,xmm10,xmm5 + vaesenclast xmm11,xmm11,xmm6 + vaesenclast xmm12,xmm12,xmm8 + vaesenclast xmm13,xmm13,xmm2 + vaesenclast xmm14,xmm14,xmm3 + vmovups XMMWORD PTR[rsi],xmm9 + vmovups XMMWORD PTR[16+rsi],xmm10 + vmovups XMMWORD PTR[32+rsi],xmm11 + vmovups XMMWORD PTR[48+rsi],xmm12 + vmovups XMMWORD PTR[64+rsi],xmm13 + vmovups XMMWORD PTR[80+rsi],xmm14 + lea rsi,QWORD PTR[96+rsi] + + DB 0F3h,0C3h ;repret +ALIGN 32 +$L$handle_ctr32_2:: + vpshufb xmm6,xmm1,xmm0 + vmovdqu xmm5,XMMWORD PTR[48+r11] + vpaddd xmm10,xmm6,XMMWORD PTR[64+r11] + vpaddd xmm11,xmm6,xmm5 + vpaddd xmm12,xmm10,xmm5 + vpshufb xmm10,xmm10,xmm0 + vpaddd xmm13,xmm11,xmm5 + vpshufb xmm11,xmm11,xmm0 + vpxor xmm10,xmm10,xmm4 + vpaddd xmm14,xmm12,xmm5 + vpshufb xmm12,xmm12,xmm0 + vpxor xmm11,xmm11,xmm4 + vpaddd xmm1,xmm13,xmm5 + vpshufb xmm13,xmm13,xmm0 + vpxor xmm12,xmm12,xmm4 + vpshufb xmm14,xmm14,xmm0 + vpxor xmm13,xmm13,xmm4 + vpshufb xmm1,xmm1,xmm0 + vpxor xmm14,xmm14,xmm4 + jmp $L$oop_ctr32 +_aesni_ctr32_6x ENDP + +PUBLIC aesni_gcm_encrypt + +ALIGN 32 +aesni_gcm_encrypt PROC PUBLIC + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_aesni_gcm_encrypt:: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + mov rcx,r9 + mov r8,QWORD PTR[40+rsp] + mov r9,QWORD PTR[48+rsp] + + + xor r10,r10 + cmp rdx,060h*3 + jb $L$gcm_enc_abort + + lea rax,QWORD PTR[rsp] + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + lea rsp,QWORD PTR[((-168))+rsp] + movaps XMMWORD PTR[(-216)+rax],xmm6 + movaps XMMWORD PTR[(-200)+rax],xmm7 + movaps XMMWORD PTR[(-184)+rax],xmm8 + movaps XMMWORD PTR[(-168)+rax],xmm9 + movaps XMMWORD PTR[(-152)+rax],xmm10 + movaps XMMWORD PTR[(-136)+rax],xmm11 + movaps XMMWORD PTR[(-120)+rax],xmm12 + movaps XMMWORD PTR[(-104)+rax],xmm13 + movaps XMMWORD PTR[(-88)+rax],xmm14 + movaps XMMWORD PTR[(-72)+rax],xmm15 +$L$gcm_enc_body:: + vzeroupper + + vmovdqu xmm1,XMMWORD PTR[r8] + add rsp,-128 + mov ebx,DWORD PTR[12+r8] + lea r11,QWORD PTR[$L$bswap_mask] + lea r14,QWORD PTR[((-128))+rcx] + mov r15,0f80h + lea rcx,QWORD PTR[128+rcx] + vmovdqu xmm0,XMMWORD PTR[r11] + and rsp,-128 + mov ebp,DWORD PTR[((240-128))+rcx] + + and r14,r15 + and r15,rsp + sub r15,r14 + jc $L$enc_no_key_aliasing + cmp r15,768 + jnc $L$enc_no_key_aliasing + sub rsp,r15 +$L$enc_no_key_aliasing:: + + lea r14,QWORD PTR[rsi] + lea r15,QWORD PTR[((-192))+rdx*1+rsi] + shr rdx,4 + + call _aesni_ctr32_6x + vpshufb xmm8,xmm9,xmm0 + vpshufb xmm2,xmm10,xmm0 + vmovdqu XMMWORD PTR[112+rsp],xmm8 + vpshufb xmm4,xmm11,xmm0 + vmovdqu XMMWORD PTR[96+rsp],xmm2 + vpshufb xmm5,xmm12,xmm0 + vmovdqu XMMWORD PTR[80+rsp],xmm4 + vpshufb xmm6,xmm13,xmm0 + vmovdqu XMMWORD PTR[64+rsp],xmm5 + vpshufb xmm7,xmm14,xmm0 + vmovdqu XMMWORD PTR[48+rsp],xmm6 + + call _aesni_ctr32_6x + + vmovdqu xmm8,XMMWORD PTR[r9] + lea r9,QWORD PTR[((32+32))+r9] + sub rdx,12 + mov r10,060h*2 + vpshufb xmm8,xmm8,xmm0 + + call _aesni_ctr32_ghash_6x + vmovdqu xmm7,XMMWORD PTR[32+rsp] + vmovdqu xmm0,XMMWORD PTR[r11] + vmovdqu xmm3,XMMWORD PTR[((0-32))+r9] + vpunpckhqdq xmm1,xmm7,xmm7 + vmovdqu xmm15,XMMWORD PTR[((32-32))+r9] + vmovups XMMWORD PTR[(-96)+rsi],xmm9 + vpshufb xmm9,xmm9,xmm0 + vpxor xmm1,xmm1,xmm7 + vmovups XMMWORD PTR[(-80)+rsi],xmm10 + vpshufb xmm10,xmm10,xmm0 + vmovups XMMWORD PTR[(-64)+rsi],xmm11 + vpshufb xmm11,xmm11,xmm0 + vmovups XMMWORD PTR[(-48)+rsi],xmm12 + vpshufb xmm12,xmm12,xmm0 + vmovups XMMWORD PTR[(-32)+rsi],xmm13 + vpshufb xmm13,xmm13,xmm0 + vmovups XMMWORD PTR[(-16)+rsi],xmm14 + vpshufb xmm14,xmm14,xmm0 + vmovdqu XMMWORD PTR[16+rsp],xmm9 + vmovdqu xmm6,XMMWORD PTR[48+rsp] + vmovdqu xmm0,XMMWORD PTR[((16-32))+r9] + vpunpckhqdq xmm2,xmm6,xmm6 + vpclmulqdq xmm5,xmm7,xmm3,000h + vpxor xmm2,xmm2,xmm6 + vpclmulqdq xmm7,xmm7,xmm3,011h + vpclmulqdq xmm1,xmm1,xmm15,000h + + vmovdqu xmm9,XMMWORD PTR[64+rsp] + vpclmulqdq xmm4,xmm6,xmm0,000h + vmovdqu xmm3,XMMWORD PTR[((48-32))+r9] + vpxor xmm4,xmm4,xmm5 + vpunpckhqdq xmm5,xmm9,xmm9 + vpclmulqdq xmm6,xmm6,xmm0,011h + vpxor xmm5,xmm5,xmm9 + vpxor xmm6,xmm6,xmm7 + vpclmulqdq xmm2,xmm2,xmm15,010h + vmovdqu xmm15,XMMWORD PTR[((80-32))+r9] + vpxor xmm2,xmm2,xmm1 + + vmovdqu xmm1,XMMWORD PTR[80+rsp] + vpclmulqdq xmm7,xmm9,xmm3,000h + vmovdqu xmm0,XMMWORD PTR[((64-32))+r9] + vpxor xmm7,xmm7,xmm4 + vpunpckhqdq xmm4,xmm1,xmm1 + vpclmulqdq xmm9,xmm9,xmm3,011h + vpxor xmm4,xmm4,xmm1 + vpxor xmm9,xmm9,xmm6 + vpclmulqdq xmm5,xmm5,xmm15,000h + vpxor xmm5,xmm5,xmm2 + + vmovdqu xmm2,XMMWORD PTR[96+rsp] + vpclmulqdq xmm6,xmm1,xmm0,000h + vmovdqu xmm3,XMMWORD PTR[((96-32))+r9] + vpxor xmm6,xmm6,xmm7 + vpunpckhqdq xmm7,xmm2,xmm2 + vpclmulqdq xmm1,xmm1,xmm0,011h + vpxor xmm7,xmm7,xmm2 + vpxor xmm1,xmm1,xmm9 + vpclmulqdq xmm4,xmm4,xmm15,010h + vmovdqu xmm15,XMMWORD PTR[((128-32))+r9] + vpxor xmm4,xmm4,xmm5 + + vpxor xmm8,xmm8,XMMWORD PTR[112+rsp] + vpclmulqdq xmm5,xmm2,xmm3,000h + vmovdqu xmm0,XMMWORD PTR[((112-32))+r9] + vpunpckhqdq xmm9,xmm8,xmm8 + vpxor xmm5,xmm5,xmm6 + vpclmulqdq xmm2,xmm2,xmm3,011h + vpxor xmm9,xmm9,xmm8 + vpxor xmm2,xmm2,xmm1 + vpclmulqdq xmm7,xmm7,xmm15,000h + vpxor xmm4,xmm7,xmm4 + + vpclmulqdq xmm6,xmm8,xmm0,000h + vmovdqu xmm3,XMMWORD PTR[((0-32))+r9] + vpunpckhqdq xmm1,xmm14,xmm14 + vpclmulqdq xmm8,xmm8,xmm0,011h + vpxor xmm1,xmm1,xmm14 + vpxor xmm5,xmm6,xmm5 + vpclmulqdq xmm9,xmm9,xmm15,010h + vmovdqu xmm15,XMMWORD PTR[((32-32))+r9] + vpxor xmm7,xmm8,xmm2 + vpxor xmm6,xmm9,xmm4 + + vmovdqu xmm0,XMMWORD PTR[((16-32))+r9] + vpxor xmm9,xmm7,xmm5 + vpclmulqdq xmm4,xmm14,xmm3,000h + vpxor xmm6,xmm6,xmm9 + vpunpckhqdq xmm2,xmm13,xmm13 + vpclmulqdq xmm14,xmm14,xmm3,011h + vpxor xmm2,xmm2,xmm13 + vpslldq xmm9,xmm6,8 + vpclmulqdq xmm1,xmm1,xmm15,000h + vpxor xmm8,xmm5,xmm9 + vpsrldq xmm6,xmm6,8 + vpxor xmm7,xmm7,xmm6 + + vpclmulqdq xmm5,xmm13,xmm0,000h + vmovdqu xmm3,XMMWORD PTR[((48-32))+r9] + vpxor xmm5,xmm5,xmm4 + vpunpckhqdq xmm9,xmm12,xmm12 + vpclmulqdq xmm13,xmm13,xmm0,011h + vpxor xmm9,xmm9,xmm12 + vpxor xmm13,xmm13,xmm14 + vpalignr xmm14,xmm8,xmm8,8 + vpclmulqdq xmm2,xmm2,xmm15,010h + vmovdqu xmm15,XMMWORD PTR[((80-32))+r9] + vpxor xmm2,xmm2,xmm1 + + vpclmulqdq xmm4,xmm12,xmm3,000h + vmovdqu xmm0,XMMWORD PTR[((64-32))+r9] + vpxor xmm4,xmm4,xmm5 + vpunpckhqdq xmm1,xmm11,xmm11 + vpclmulqdq xmm12,xmm12,xmm3,011h + vpxor xmm1,xmm1,xmm11 + vpxor xmm12,xmm12,xmm13 + vxorps xmm7,xmm7,XMMWORD PTR[16+rsp] + vpclmulqdq xmm9,xmm9,xmm15,000h + vpxor xmm9,xmm9,xmm2 + + vpclmulqdq xmm8,xmm8,XMMWORD PTR[16+r11],010h + vxorps xmm8,xmm8,xmm14 + + vpclmulqdq xmm5,xmm11,xmm0,000h + vmovdqu xmm3,XMMWORD PTR[((96-32))+r9] + vpxor xmm5,xmm5,xmm4 + vpunpckhqdq xmm2,xmm10,xmm10 + vpclmulqdq xmm11,xmm11,xmm0,011h + vpxor xmm2,xmm2,xmm10 + vpalignr xmm14,xmm8,xmm8,8 + vpxor xmm11,xmm11,xmm12 + vpclmulqdq xmm1,xmm1,xmm15,010h + vmovdqu xmm15,XMMWORD PTR[((128-32))+r9] + vpxor xmm1,xmm1,xmm9 + + vxorps xmm14,xmm14,xmm7 + vpclmulqdq xmm8,xmm8,XMMWORD PTR[16+r11],010h + vxorps xmm8,xmm8,xmm14 + + vpclmulqdq xmm4,xmm10,xmm3,000h + vmovdqu xmm0,XMMWORD PTR[((112-32))+r9] + vpxor xmm4,xmm4,xmm5 + vpunpckhqdq xmm9,xmm8,xmm8 + vpclmulqdq xmm10,xmm10,xmm3,011h + vpxor xmm9,xmm9,xmm8 + vpxor xmm10,xmm10,xmm11 + vpclmulqdq xmm2,xmm2,xmm15,000h + vpxor xmm2,xmm2,xmm1 + + vpclmulqdq xmm5,xmm8,xmm0,000h + vpclmulqdq xmm7,xmm8,xmm0,011h + vpxor xmm5,xmm5,xmm4 + vpclmulqdq xmm6,xmm9,xmm15,010h + vpxor xmm7,xmm7,xmm10 + vpxor xmm6,xmm6,xmm2 + + vpxor xmm4,xmm7,xmm5 + vpxor xmm6,xmm6,xmm4 + vpslldq xmm1,xmm6,8 + vmovdqu xmm3,XMMWORD PTR[16+r11] + vpsrldq xmm6,xmm6,8 + vpxor xmm8,xmm5,xmm1 + vpxor xmm7,xmm7,xmm6 + + vpalignr xmm2,xmm8,xmm8,8 + vpclmulqdq xmm8,xmm8,xmm3,010h + vpxor xmm8,xmm8,xmm2 + + vpalignr xmm2,xmm8,xmm8,8 + vpclmulqdq xmm8,xmm8,xmm3,010h + vpxor xmm2,xmm2,xmm7 + vpxor xmm8,xmm8,xmm2 + vpshufb xmm8,xmm8,XMMWORD PTR[r11] + vmovdqu XMMWORD PTR[(-64)+r9],xmm8 + + vzeroupper + movaps xmm6,XMMWORD PTR[((-216))+rax] + movaps xmm7,XMMWORD PTR[((-200))+rax] + movaps xmm8,XMMWORD PTR[((-184))+rax] + movaps xmm9,XMMWORD PTR[((-168))+rax] + movaps xmm10,XMMWORD PTR[((-152))+rax] + movaps xmm11,XMMWORD PTR[((-136))+rax] + movaps xmm12,XMMWORD PTR[((-120))+rax] + movaps xmm13,XMMWORD PTR[((-104))+rax] + movaps xmm14,XMMWORD PTR[((-88))+rax] + movaps xmm15,XMMWORD PTR[((-72))+rax] + mov r15,QWORD PTR[((-48))+rax] + mov r14,QWORD PTR[((-40))+rax] + mov r13,QWORD PTR[((-32))+rax] + mov r12,QWORD PTR[((-24))+rax] + mov rbp,QWORD PTR[((-16))+rax] + mov rbx,QWORD PTR[((-8))+rax] + lea rsp,QWORD PTR[rax] +$L$gcm_enc_abort:: + mov rax,r10 + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_aesni_gcm_encrypt:: +aesni_gcm_encrypt ENDP +ALIGN 64 +$L$bswap_mask:: +DB 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0 +$L$poly:: +DB 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0c2h +$L$one_msb:: +DB 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1 +$L$two_lsb:: +DB 2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 +$L$one_lsb:: +DB 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 +DB 65,69,83,45,78,73,32,71,67,77,32,109,111,100,117,108 +DB 101,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82 +DB 89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112 +DB 114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 +ALIGN 64 +EXTERN __imp_RtlVirtualUnwind:NEAR + +ALIGN 16 +gcm_se_handler PROC PRIVATE + push rsi + push rdi + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + pushfq + sub rsp,64 + + mov rax,QWORD PTR[120+r8] + mov rbx,QWORD PTR[248+r8] + + mov rsi,QWORD PTR[8+r9] + mov r11,QWORD PTR[56+r9] + + mov r10d,DWORD PTR[r11] + lea r10,QWORD PTR[r10*1+rsi] + cmp rbx,r10 + jb $L$common_seh_tail + + mov rax,QWORD PTR[152+r8] + + mov r10d,DWORD PTR[4+r11] + lea r10,QWORD PTR[r10*1+rsi] + cmp rbx,r10 + jae $L$common_seh_tail + + mov rax,QWORD PTR[120+r8] + + mov r15,QWORD PTR[((-48))+rax] + mov r14,QWORD PTR[((-40))+rax] + mov r13,QWORD PTR[((-32))+rax] + mov r12,QWORD PTR[((-24))+rax] + mov rbp,QWORD PTR[((-16))+rax] + mov rbx,QWORD PTR[((-8))+rax] + mov QWORD PTR[240+r8],r15 + mov QWORD PTR[232+r8],r14 + mov QWORD PTR[224+r8],r13 + mov QWORD PTR[216+r8],r12 + mov QWORD PTR[160+r8],rbp + mov QWORD PTR[144+r8],rbx + + lea rsi,QWORD PTR[((-216))+rax] + lea rdi,QWORD PTR[512+r8] + mov ecx,20 + DD 0a548f3fch + +$L$common_seh_tail:: + mov rdi,QWORD PTR[8+rax] + mov rsi,QWORD PTR[16+rax] + mov QWORD PTR[152+r8],rax + mov QWORD PTR[168+r8],rsi + mov QWORD PTR[176+r8],rdi + + mov rdi,QWORD PTR[40+r9] + mov rsi,r8 + mov ecx,154 + DD 0a548f3fch + + mov rsi,r9 + xor rcx,rcx + mov rdx,QWORD PTR[8+rsi] + mov r8,QWORD PTR[rsi] + mov r9,QWORD PTR[16+rsi] + mov r10,QWORD PTR[40+rsi] + lea r11,QWORD PTR[56+rsi] + lea r12,QWORD PTR[24+rsi] + mov QWORD PTR[32+rsp],r10 + mov QWORD PTR[40+rsp],r11 + mov QWORD PTR[48+rsp],r12 + mov QWORD PTR[56+rsp],rcx + call QWORD PTR[__imp_RtlVirtualUnwind] + + mov eax,1 + add rsp,64 + popfq + pop r15 + pop r14 + pop r13 + pop r12 + pop rbp + pop rbx + pop rdi + pop rsi + DB 0F3h,0C3h ;repret +gcm_se_handler ENDP + +.text$ ENDS +.pdata SEGMENT READONLY ALIGN(4) +ALIGN 4 + DD imagerel $L$SEH_begin_aesni_gcm_decrypt + DD imagerel $L$SEH_end_aesni_gcm_decrypt + DD imagerel $L$SEH_gcm_dec_info + + DD imagerel $L$SEH_begin_aesni_gcm_encrypt + DD imagerel $L$SEH_end_aesni_gcm_encrypt + DD imagerel $L$SEH_gcm_enc_info +.pdata ENDS +.xdata SEGMENT READONLY ALIGN(8) +ALIGN 8 +$L$SEH_gcm_dec_info:: +DB 9,0,0,0 + DD imagerel gcm_se_handler + DD imagerel $L$gcm_dec_body,imagerel $L$gcm_dec_abort +$L$SEH_gcm_enc_info:: +DB 9,0,0,0 + DD imagerel gcm_se_handler + DD imagerel $L$gcm_enc_body,imagerel $L$gcm_enc_abort + +.xdata ENDS +END diff --git a/deps/openssl/asm/x64-win32-masm/modes/ghash-x86_64.asm b/deps/openssl/asm/x64-win32-masm/modes/ghash-x86_64.asm index 01fe3071a8c2db..fd6300dc985a44 100644 --- a/deps/openssl/asm/x64-win32-masm/modes/ghash-x86_64.asm +++ b/deps/openssl/asm/x64-win32-masm/modes/ghash-x86_64.asm @@ -1,5 +1,6 @@ OPTION DOTNAME -.text$ SEGMENT ALIGN(64) 'CODE' +.text$ SEGMENT ALIGN(256) 'CODE' +EXTERN OPENSSL_ia32cap_P:NEAR PUBLIC gcm_gmult_4bit @@ -683,6 +684,11 @@ PUBLIC gcm_init_clmul ALIGN 16 gcm_init_clmul PROC PUBLIC +$L$_init_clmul:: +$L$SEH_begin_gcm_init_clmul:: + +DB 048h,083h,0ech,018h +DB 00fh,029h,034h,024h movdqu xmm2,XMMWORD PTR[rdx] pshufd xmm2,xmm2,78 @@ -701,15 +707,15 @@ gcm_init_clmul PROC PUBLIC pxor xmm2,xmm5 + pshufd xmm6,xmm2,78 movdqa xmm0,xmm2 + pxor xmm6,xmm2 movdqa xmm1,xmm0 pshufd xmm3,xmm0,78 - pshufd xmm4,xmm2,78 pxor xmm3,xmm0 - pxor xmm4,xmm2 DB 102,15,58,68,194,0 DB 102,15,58,68,202,17 -DB 102,15,58,68,220,0 +DB 102,15,58,68,222,0 pxor xmm3,xmm0 pxor xmm3,xmm1 @@ -719,44 +725,137 @@ DB 102,15,58,68,220,0 pxor xmm1,xmm3 pxor xmm0,xmm4 + movdqa xmm4,xmm0 movdqa xmm3,xmm0 + psllq xmm0,5 + pxor xmm3,xmm0 psllq xmm0,1 pxor xmm0,xmm3 + psllq xmm0,57 + movdqa xmm3,xmm0 + pslldq xmm0,8 + psrldq xmm3,8 + pxor xmm0,xmm4 + pxor xmm1,xmm3 + + + movdqa xmm4,xmm0 + psrlq xmm0,1 + pxor xmm1,xmm4 + pxor xmm4,xmm0 + psrlq xmm0,5 + pxor xmm0,xmm4 + psrlq xmm0,1 + pxor xmm0,xmm1 + pshufd xmm3,xmm2,78 + pshufd xmm4,xmm0,78 + pxor xmm3,xmm2 + movdqu XMMWORD PTR[rcx],xmm2 + pxor xmm4,xmm0 + movdqu XMMWORD PTR[16+rcx],xmm0 +DB 102,15,58,15,227,8 + movdqu XMMWORD PTR[32+rcx],xmm4 + movdqa xmm1,xmm0 + pshufd xmm3,xmm0,78 + pxor xmm3,xmm0 +DB 102,15,58,68,194,0 +DB 102,15,58,68,202,17 +DB 102,15,58,68,222,0 + pxor xmm3,xmm0 + pxor xmm3,xmm1 + + movdqa xmm4,xmm3 + psrldq xmm3,8 + pslldq xmm4,8 + pxor xmm1,xmm3 + pxor xmm0,xmm4 + + movdqa xmm4,xmm0 + movdqa xmm3,xmm0 psllq xmm0,5 + pxor xmm3,xmm0 + psllq xmm0,1 pxor xmm0,xmm3 psllq xmm0,57 - movdqa xmm4,xmm0 + movdqa xmm3,xmm0 pslldq xmm0,8 - psrldq xmm4,8 - pxor xmm0,xmm3 - pxor xmm1,xmm4 + psrldq xmm3,8 + pxor xmm0,xmm4 + pxor xmm1,xmm3 movdqa xmm4,xmm0 + psrlq xmm0,1 + pxor xmm1,xmm4 + pxor xmm4,xmm0 psrlq xmm0,5 pxor xmm0,xmm4 psrlq xmm0,1 + pxor xmm0,xmm1 + movdqa xmm5,xmm0 + movdqa xmm1,xmm0 + pshufd xmm3,xmm0,78 + pxor xmm3,xmm0 +DB 102,15,58,68,194,0 +DB 102,15,58,68,202,17 +DB 102,15,58,68,222,0 + pxor xmm3,xmm0 + pxor xmm3,xmm1 + + movdqa xmm4,xmm3 + psrldq xmm3,8 + pslldq xmm4,8 + pxor xmm1,xmm3 pxor xmm0,xmm4 - pxor xmm4,xmm1 + + movdqa xmm4,xmm0 + movdqa xmm3,xmm0 + psllq xmm0,5 + pxor xmm3,xmm0 + psllq xmm0,1 + pxor xmm0,xmm3 + psllq xmm0,57 + movdqa xmm3,xmm0 + pslldq xmm0,8 + psrldq xmm3,8 + pxor xmm0,xmm4 + pxor xmm1,xmm3 + + + movdqa xmm4,xmm0 psrlq xmm0,1 + pxor xmm1,xmm4 + pxor xmm4,xmm0 + psrlq xmm0,5 pxor xmm0,xmm4 - movdqu XMMWORD PTR[rcx],xmm2 - movdqu XMMWORD PTR[16+rcx],xmm0 + psrlq xmm0,1 + pxor xmm0,xmm1 + pshufd xmm3,xmm5,78 + pshufd xmm4,xmm0,78 + pxor xmm3,xmm5 + movdqu XMMWORD PTR[48+rcx],xmm5 + pxor xmm4,xmm0 + movdqu XMMWORD PTR[64+rcx],xmm0 +DB 102,15,58,15,227,8 + movdqu XMMWORD PTR[80+rcx],xmm4 + movaps xmm6,XMMWORD PTR[rsp] + lea rsp,QWORD PTR[24+rsp] +$L$SEH_end_gcm_init_clmul:: DB 0F3h,0C3h ;repret gcm_init_clmul ENDP PUBLIC gcm_gmult_clmul ALIGN 16 gcm_gmult_clmul PROC PUBLIC +$L$_gmult_clmul:: movdqu xmm0,XMMWORD PTR[rcx] movdqa xmm5,XMMWORD PTR[$L$bswap_mask] movdqu xmm2,XMMWORD PTR[rdx] + movdqu xmm4,XMMWORD PTR[32+rdx] DB 102,15,56,0,197 movdqa xmm1,xmm0 pshufd xmm3,xmm0,78 - pshufd xmm4,xmm2,78 pxor xmm3,xmm0 - pxor xmm4,xmm2 DB 102,15,58,68,194,0 DB 102,15,58,68,202,17 DB 102,15,58,68,220,0 @@ -769,215 +868,393 @@ DB 102,15,58,68,220,0 pxor xmm1,xmm3 pxor xmm0,xmm4 + movdqa xmm4,xmm0 movdqa xmm3,xmm0 - psllq xmm0,1 - pxor xmm0,xmm3 psllq xmm0,5 + pxor xmm3,xmm0 + psllq xmm0,1 pxor xmm0,xmm3 psllq xmm0,57 - movdqa xmm4,xmm0 + movdqa xmm3,xmm0 pslldq xmm0,8 - psrldq xmm4,8 - pxor xmm0,xmm3 - pxor xmm1,xmm4 + psrldq xmm3,8 + pxor xmm0,xmm4 + pxor xmm1,xmm3 movdqa xmm4,xmm0 - psrlq xmm0,5 - pxor xmm0,xmm4 psrlq xmm0,1 + pxor xmm1,xmm4 + pxor xmm4,xmm0 + psrlq xmm0,5 pxor xmm0,xmm4 - pxor xmm4,xmm1 psrlq xmm0,1 - pxor xmm0,xmm4 + pxor xmm0,xmm1 DB 102,15,56,0,197 movdqu XMMWORD PTR[rcx],xmm0 DB 0F3h,0C3h ;repret gcm_gmult_clmul ENDP PUBLIC gcm_ghash_clmul -ALIGN 16 +ALIGN 32 gcm_ghash_clmul PROC PUBLIC +$L$_ghash_clmul:: + lea rax,QWORD PTR[((-136))+rsp] $L$SEH_begin_gcm_ghash_clmul:: -DB 048h,083h,0ech,058h - -DB 00fh,029h,034h,024h +DB 048h,08dh,060h,0e0h +DB 00fh,029h,070h,0e0h +DB 00fh,029h,078h,0f0h +DB 044h,00fh,029h,000h +DB 044h,00fh,029h,048h,010h +DB 044h,00fh,029h,050h,020h +DB 044h,00fh,029h,058h,030h +DB 044h,00fh,029h,060h,040h +DB 044h,00fh,029h,068h,050h +DB 044h,00fh,029h,070h,060h +DB 044h,00fh,029h,078h,070h + movdqa xmm10,XMMWORD PTR[$L$bswap_mask] -DB 00fh,029h,07ch,024h,010h + movdqu xmm0,XMMWORD PTR[rcx] + movdqu xmm2,XMMWORD PTR[rdx] + movdqu xmm7,XMMWORD PTR[32+rdx] +DB 102,65,15,56,0,194 -DB 044h,00fh,029h,044h,024h,020h + sub r9,010h + jz $L$odd_tail -DB 044h,00fh,029h,04ch,024h,030h + movdqu xmm6,XMMWORD PTR[16+rdx] + mov eax,DWORD PTR[((OPENSSL_ia32cap_P+4))] + cmp r9,030h + jb $L$skip4x + + and eax,71303168 + cmp eax,4194304 + je $L$skip4x + + sub r9,030h + mov rax,0A040608020C0E000h + movdqu xmm14,XMMWORD PTR[48+rdx] + movdqu xmm15,XMMWORD PTR[64+rdx] + + + + + movdqu xmm3,XMMWORD PTR[48+r8] + movdqu xmm11,XMMWORD PTR[32+r8] +DB 102,65,15,56,0,218 +DB 102,69,15,56,0,218 + movdqa xmm5,xmm3 + pshufd xmm4,xmm3,78 + pxor xmm4,xmm3 +DB 102,15,58,68,218,0 +DB 102,15,58,68,234,17 +DB 102,15,58,68,231,0 + + movdqa xmm13,xmm11 + pshufd xmm12,xmm11,78 + pxor xmm12,xmm11 +DB 102,68,15,58,68,222,0 +DB 102,68,15,58,68,238,17 +DB 102,68,15,58,68,231,16 + xorps xmm3,xmm11 + xorps xmm5,xmm13 + movups xmm7,XMMWORD PTR[80+rdx] + xorps xmm4,xmm12 + + movdqu xmm11,XMMWORD PTR[16+r8] + movdqu xmm8,XMMWORD PTR[r8] +DB 102,69,15,56,0,218 +DB 102,69,15,56,0,194 + movdqa xmm13,xmm11 + pshufd xmm12,xmm11,78 + pxor xmm0,xmm8 + pxor xmm12,xmm11 +DB 102,69,15,58,68,222,0 + movdqa xmm1,xmm0 + pshufd xmm8,xmm0,78 + pxor xmm8,xmm0 +DB 102,69,15,58,68,238,17 +DB 102,68,15,58,68,231,0 + xorps xmm3,xmm11 + xorps xmm5,xmm13 + + lea r8,QWORD PTR[64+r8] + sub r9,040h + jc $L$tail4x + + jmp $L$mod4_loop +ALIGN 32 +$L$mod4_loop:: +DB 102,65,15,58,68,199,0 + xorps xmm4,xmm12 + movdqu xmm11,XMMWORD PTR[48+r8] +DB 102,69,15,56,0,218 +DB 102,65,15,58,68,207,17 + xorps xmm0,xmm3 + movdqu xmm3,XMMWORD PTR[32+r8] + movdqa xmm13,xmm11 +DB 102,68,15,58,68,199,16 + pshufd xmm12,xmm11,78 + xorps xmm1,xmm5 + pxor xmm12,xmm11 +DB 102,65,15,56,0,218 + movups xmm7,XMMWORD PTR[32+rdx] + xorps xmm8,xmm4 +DB 102,68,15,58,68,218,0 + pshufd xmm4,xmm3,78 + + pxor xmm8,xmm0 + movdqa xmm5,xmm3 + pxor xmm8,xmm1 + pxor xmm4,xmm3 + movdqa xmm9,xmm8 +DB 102,68,15,58,68,234,17 + pslldq xmm8,8 + psrldq xmm9,8 + pxor xmm0,xmm8 + movdqa xmm8,XMMWORD PTR[$L$7_mask] + pxor xmm1,xmm9 +DB 102,76,15,110,200 + + pand xmm8,xmm0 +DB 102,69,15,56,0,200 + pxor xmm9,xmm0 +DB 102,68,15,58,68,231,0 + psllq xmm9,57 + movdqa xmm8,xmm9 + pslldq xmm9,8 +DB 102,15,58,68,222,0 + psrldq xmm8,8 + pxor xmm0,xmm9 + pxor xmm1,xmm8 + movdqu xmm8,XMMWORD PTR[r8] + + movdqa xmm9,xmm0 + psrlq xmm0,1 +DB 102,15,58,68,238,17 + xorps xmm3,xmm11 + movdqu xmm11,XMMWORD PTR[16+r8] +DB 102,69,15,56,0,218 +DB 102,15,58,68,231,16 + xorps xmm5,xmm13 + movups xmm7,XMMWORD PTR[80+rdx] +DB 102,69,15,56,0,194 + pxor xmm1,xmm9 + pxor xmm9,xmm0 + psrlq xmm0,5 -DB 044h,00fh,029h,054h,024h,040h + movdqa xmm13,xmm11 + pxor xmm4,xmm12 + pshufd xmm12,xmm11,78 + pxor xmm0,xmm9 + pxor xmm1,xmm8 + pxor xmm12,xmm11 +DB 102,69,15,58,68,222,0 + psrlq xmm0,1 + pxor xmm0,xmm1 + movdqa xmm1,xmm0 +DB 102,69,15,58,68,238,17 + xorps xmm3,xmm11 + pshufd xmm8,xmm0,78 + pxor xmm8,xmm0 + +DB 102,68,15,58,68,231,0 + xorps xmm5,xmm13 + + lea r8,QWORD PTR[64+r8] + sub r9,040h + jnc $L$mod4_loop + +$L$tail4x:: +DB 102,65,15,58,68,199,0 +DB 102,65,15,58,68,207,17 +DB 102,68,15,58,68,199,16 + xorps xmm4,xmm12 + xorps xmm0,xmm3 + xorps xmm1,xmm5 + pxor xmm1,xmm0 + pxor xmm8,xmm4 + + pxor xmm8,xmm1 + pxor xmm1,xmm0 + + movdqa xmm9,xmm8 + psrldq xmm8,8 + pslldq xmm9,8 + pxor xmm1,xmm8 + pxor xmm0,xmm9 - movdqa xmm5,XMMWORD PTR[$L$bswap_mask] + movdqa xmm4,xmm0 + movdqa xmm3,xmm0 + psllq xmm0,5 + pxor xmm3,xmm0 + psllq xmm0,1 + pxor xmm0,xmm3 + psllq xmm0,57 + movdqa xmm3,xmm0 + pslldq xmm0,8 + psrldq xmm3,8 + pxor xmm0,xmm4 + pxor xmm1,xmm3 - movdqu xmm0,XMMWORD PTR[rcx] - movdqu xmm2,XMMWORD PTR[rdx] -DB 102,15,56,0,197 + movdqa xmm4,xmm0 + psrlq xmm0,1 + pxor xmm1,xmm4 + pxor xmm4,xmm0 + psrlq xmm0,5 + pxor xmm0,xmm4 + psrlq xmm0,1 + pxor xmm0,xmm1 + add r9,040h + jz $L$done + movdqu xmm7,XMMWORD PTR[32+rdx] sub r9,010h jz $L$odd_tail +$L$skip4x:: - movdqu xmm8,XMMWORD PTR[16+rdx] + movdqu xmm8,XMMWORD PTR[r8] + movdqu xmm3,XMMWORD PTR[16+r8] +DB 102,69,15,56,0,194 +DB 102,65,15,56,0,218 + pxor xmm0,xmm8 - movdqu xmm3,XMMWORD PTR[r8] - movdqu xmm6,XMMWORD PTR[16+r8] -DB 102,15,56,0,221 -DB 102,15,56,0,245 - pxor xmm0,xmm3 - movdqa xmm7,xmm6 - pshufd xmm3,xmm6,78 - pshufd xmm4,xmm2,78 - pxor xmm3,xmm6 - pxor xmm4,xmm2 -DB 102,15,58,68,242,0 -DB 102,15,58,68,250,17 -DB 102,15,58,68,220,0 - pxor xmm3,xmm6 - pxor xmm3,xmm7 - - movdqa xmm4,xmm3 - psrldq xmm3,8 - pslldq xmm4,8 - pxor xmm7,xmm3 - pxor xmm6,xmm4 - movdqa xmm1,xmm0 - pshufd xmm3,xmm0,78 - pshufd xmm4,xmm8,78 - pxor xmm3,xmm0 - pxor xmm4,xmm8 + movdqa xmm5,xmm3 + pshufd xmm4,xmm3,78 + pxor xmm4,xmm3 +DB 102,15,58,68,218,0 +DB 102,15,58,68,234,17 +DB 102,15,58,68,231,0 lea r8,QWORD PTR[32+r8] + nop sub r9,020h jbe $L$even_tail + nop + jmp $L$mod_loop +ALIGN 32 $L$mod_loop:: -DB 102,65,15,58,68,192,0 -DB 102,65,15,58,68,200,17 -DB 102,15,58,68,220,0 - pxor xmm3,xmm0 - pxor xmm3,xmm1 + movdqa xmm1,xmm0 + movdqa xmm8,xmm4 + pshufd xmm4,xmm0,78 + pxor xmm4,xmm0 - movdqa xmm4,xmm3 - psrldq xmm3,8 +DB 102,15,58,68,198,0 +DB 102,15,58,68,206,17 +DB 102,15,58,68,231,16 + + pxor xmm0,xmm3 + pxor xmm1,xmm5 + movdqu xmm9,XMMWORD PTR[r8] + pxor xmm8,xmm0 +DB 102,69,15,56,0,202 + movdqu xmm3,XMMWORD PTR[16+r8] + + pxor xmm8,xmm1 + pxor xmm1,xmm9 + pxor xmm4,xmm8 +DB 102,65,15,56,0,218 + movdqa xmm8,xmm4 + psrldq xmm8,8 pslldq xmm4,8 - pxor xmm1,xmm3 + pxor xmm1,xmm8 pxor xmm0,xmm4 - movdqu xmm3,XMMWORD PTR[r8] - pxor xmm0,xmm6 - pxor xmm1,xmm7 - - movdqu xmm6,XMMWORD PTR[16+r8] -DB 102,15,56,0,221 -DB 102,15,56,0,245 - - movdqa xmm7,xmm6 - pshufd xmm9,xmm6,78 - pshufd xmm10,xmm2,78 - pxor xmm9,xmm6 - pxor xmm10,xmm2 - pxor xmm1,xmm3 - movdqa xmm3,xmm0 - psllq xmm0,1 - pxor xmm0,xmm3 + movdqa xmm5,xmm3 + + movdqa xmm9,xmm0 + movdqa xmm8,xmm0 psllq xmm0,5 - pxor xmm0,xmm3 -DB 102,15,58,68,242,0 + pxor xmm8,xmm0 +DB 102,15,58,68,218,0 + psllq xmm0,1 + pxor xmm0,xmm8 psllq xmm0,57 - movdqa xmm4,xmm0 + movdqa xmm8,xmm0 pslldq xmm0,8 - psrldq xmm4,8 - pxor xmm0,xmm3 - pxor xmm1,xmm4 + psrldq xmm8,8 + pxor xmm0,xmm9 + pshufd xmm4,xmm5,78 + pxor xmm1,xmm8 + pxor xmm4,xmm5 -DB 102,15,58,68,250,17 - movdqa xmm4,xmm0 - psrlq xmm0,5 - pxor xmm0,xmm4 + movdqa xmm9,xmm0 psrlq xmm0,1 - pxor xmm0,xmm4 - pxor xmm4,xmm1 +DB 102,15,58,68,234,17 + pxor xmm1,xmm9 + pxor xmm9,xmm0 + psrlq xmm0,5 + pxor xmm0,xmm9 + lea r8,QWORD PTR[32+r8] psrlq xmm0,1 - pxor xmm0,xmm4 - -DB 102,69,15,58,68,202,0 - movdqa xmm1,xmm0 - pshufd xmm3,xmm0,78 - pshufd xmm4,xmm8,78 - pxor xmm3,xmm0 - pxor xmm4,xmm8 - - pxor xmm9,xmm6 - pxor xmm9,xmm7 - movdqa xmm10,xmm9 - psrldq xmm9,8 - pslldq xmm10,8 - pxor xmm7,xmm9 - pxor xmm6,xmm10 +DB 102,15,58,68,231,0 + pxor xmm0,xmm1 - lea r8,QWORD PTR[32+r8] sub r9,020h ja $L$mod_loop $L$even_tail:: -DB 102,65,15,58,68,192,0 -DB 102,65,15,58,68,200,17 -DB 102,15,58,68,220,0 - pxor xmm3,xmm0 - pxor xmm3,xmm1 + movdqa xmm1,xmm0 + movdqa xmm8,xmm4 + pshufd xmm4,xmm0,78 + pxor xmm4,xmm0 - movdqa xmm4,xmm3 - psrldq xmm3,8 +DB 102,15,58,68,198,0 +DB 102,15,58,68,206,17 +DB 102,15,58,68,231,16 + + pxor xmm0,xmm3 + pxor xmm1,xmm5 + pxor xmm8,xmm0 + pxor xmm8,xmm1 + pxor xmm4,xmm8 + movdqa xmm8,xmm4 + psrldq xmm8,8 pslldq xmm4,8 - pxor xmm1,xmm3 + pxor xmm1,xmm8 pxor xmm0,xmm4 - pxor xmm0,xmm6 - pxor xmm1,xmm7 + movdqa xmm4,xmm0 movdqa xmm3,xmm0 - psllq xmm0,1 - pxor xmm0,xmm3 psllq xmm0,5 + pxor xmm3,xmm0 + psllq xmm0,1 pxor xmm0,xmm3 psllq xmm0,57 - movdqa xmm4,xmm0 + movdqa xmm3,xmm0 pslldq xmm0,8 - psrldq xmm4,8 - pxor xmm0,xmm3 - pxor xmm1,xmm4 + psrldq xmm3,8 + pxor xmm0,xmm4 + pxor xmm1,xmm3 movdqa xmm4,xmm0 - psrlq xmm0,5 - pxor xmm0,xmm4 psrlq xmm0,1 + pxor xmm1,xmm4 + pxor xmm4,xmm0 + psrlq xmm0,5 pxor xmm0,xmm4 - pxor xmm4,xmm1 psrlq xmm0,1 - pxor xmm0,xmm4 + pxor xmm0,xmm1 test r9,r9 jnz $L$done $L$odd_tail:: - movdqu xmm3,XMMWORD PTR[r8] -DB 102,15,56,0,221 - pxor xmm0,xmm3 + movdqu xmm8,XMMWORD PTR[r8] +DB 102,69,15,56,0,194 + pxor xmm0,xmm8 movdqa xmm1,xmm0 pshufd xmm3,xmm0,78 - pshufd xmm4,xmm2,78 pxor xmm3,xmm0 - pxor xmm4,xmm2 DB 102,15,58,68,194,0 DB 102,15,58,68,202,17 -DB 102,15,58,68,220,0 +DB 102,15,58,68,223,0 pxor xmm3,xmm0 pxor xmm3,xmm1 @@ -987,44 +1264,576 @@ DB 102,15,58,68,220,0 pxor xmm1,xmm3 pxor xmm0,xmm4 + movdqa xmm4,xmm0 movdqa xmm3,xmm0 - psllq xmm0,1 - pxor xmm0,xmm3 psllq xmm0,5 + pxor xmm3,xmm0 + psllq xmm0,1 pxor xmm0,xmm3 psllq xmm0,57 - movdqa xmm4,xmm0 + movdqa xmm3,xmm0 pslldq xmm0,8 - psrldq xmm4,8 - pxor xmm0,xmm3 - pxor xmm1,xmm4 + psrldq xmm3,8 + pxor xmm0,xmm4 + pxor xmm1,xmm3 movdqa xmm4,xmm0 - psrlq xmm0,5 - pxor xmm0,xmm4 psrlq xmm0,1 + pxor xmm1,xmm4 + pxor xmm4,xmm0 + psrlq xmm0,5 pxor xmm0,xmm4 - pxor xmm4,xmm1 psrlq xmm0,1 - pxor xmm0,xmm4 + pxor xmm0,xmm1 $L$done:: -DB 102,15,56,0,197 +DB 102,65,15,56,0,194 movdqu XMMWORD PTR[rcx],xmm0 movaps xmm6,XMMWORD PTR[rsp] movaps xmm7,XMMWORD PTR[16+rsp] movaps xmm8,XMMWORD PTR[32+rsp] movaps xmm9,XMMWORD PTR[48+rsp] movaps xmm10,XMMWORD PTR[64+rsp] - add rsp,058h - DB 0F3h,0C3h ;repret + movaps xmm11,XMMWORD PTR[80+rsp] + movaps xmm12,XMMWORD PTR[96+rsp] + movaps xmm13,XMMWORD PTR[112+rsp] + movaps xmm14,XMMWORD PTR[128+rsp] + movaps xmm15,XMMWORD PTR[144+rsp] + lea rsp,QWORD PTR[168+rsp] $L$SEH_end_gcm_ghash_clmul:: + DB 0F3h,0C3h ;repret gcm_ghash_clmul ENDP +PUBLIC gcm_init_avx + +ALIGN 32 +gcm_init_avx PROC PUBLIC +$L$SEH_begin_gcm_init_avx:: + +DB 048h,083h,0ech,018h +DB 00fh,029h,034h,024h + vzeroupper + + vmovdqu xmm2,XMMWORD PTR[rdx] + vpshufd xmm2,xmm2,78 + + + vpshufd xmm4,xmm2,255 + vpsrlq xmm3,xmm2,63 + vpsllq xmm2,xmm2,1 + vpxor xmm5,xmm5,xmm5 + vpcmpgtd xmm5,xmm5,xmm4 + vpslldq xmm3,xmm3,8 + vpor xmm2,xmm2,xmm3 + + + vpand xmm5,xmm5,XMMWORD PTR[$L$0x1c2_polynomial] + vpxor xmm2,xmm2,xmm5 + + vpunpckhqdq xmm6,xmm2,xmm2 + vmovdqa xmm0,xmm2 + vpxor xmm6,xmm6,xmm2 + mov r10,4 + jmp $L$init_start_avx +ALIGN 32 +$L$init_loop_avx:: + vpalignr xmm5,xmm4,xmm3,8 + vmovdqu XMMWORD PTR[(-16)+rcx],xmm5 + vpunpckhqdq xmm3,xmm0,xmm0 + vpxor xmm3,xmm3,xmm0 + vpclmulqdq xmm1,xmm0,xmm2,011h + vpclmulqdq xmm0,xmm0,xmm2,000h + vpclmulqdq xmm3,xmm3,xmm6,000h + vpxor xmm4,xmm1,xmm0 + vpxor xmm3,xmm3,xmm4 + + vpslldq xmm4,xmm3,8 + vpsrldq xmm3,xmm3,8 + vpxor xmm0,xmm0,xmm4 + vpxor xmm1,xmm1,xmm3 + vpsllq xmm3,xmm0,57 + vpsllq xmm4,xmm0,62 + vpxor xmm4,xmm4,xmm3 + vpsllq xmm3,xmm0,63 + vpxor xmm4,xmm4,xmm3 + vpslldq xmm3,xmm4,8 + vpsrldq xmm4,xmm4,8 + vpxor xmm0,xmm0,xmm3 + vpxor xmm1,xmm1,xmm4 + + vpsrlq xmm4,xmm0,1 + vpxor xmm1,xmm1,xmm0 + vpxor xmm0,xmm0,xmm4 + vpsrlq xmm4,xmm4,5 + vpxor xmm0,xmm0,xmm4 + vpsrlq xmm0,xmm0,1 + vpxor xmm0,xmm0,xmm1 +$L$init_start_avx:: + vmovdqa xmm5,xmm0 + vpunpckhqdq xmm3,xmm0,xmm0 + vpxor xmm3,xmm3,xmm0 + vpclmulqdq xmm1,xmm0,xmm2,011h + vpclmulqdq xmm0,xmm0,xmm2,000h + vpclmulqdq xmm3,xmm3,xmm6,000h + vpxor xmm4,xmm1,xmm0 + vpxor xmm3,xmm3,xmm4 + + vpslldq xmm4,xmm3,8 + vpsrldq xmm3,xmm3,8 + vpxor xmm0,xmm0,xmm4 + vpxor xmm1,xmm1,xmm3 + vpsllq xmm3,xmm0,57 + vpsllq xmm4,xmm0,62 + vpxor xmm4,xmm4,xmm3 + vpsllq xmm3,xmm0,63 + vpxor xmm4,xmm4,xmm3 + vpslldq xmm3,xmm4,8 + vpsrldq xmm4,xmm4,8 + vpxor xmm0,xmm0,xmm3 + vpxor xmm1,xmm1,xmm4 + + vpsrlq xmm4,xmm0,1 + vpxor xmm1,xmm1,xmm0 + vpxor xmm0,xmm0,xmm4 + vpsrlq xmm4,xmm4,5 + vpxor xmm0,xmm0,xmm4 + vpsrlq xmm0,xmm0,1 + vpxor xmm0,xmm0,xmm1 + vpshufd xmm3,xmm5,78 + vpshufd xmm4,xmm0,78 + vpxor xmm3,xmm3,xmm5 + vmovdqu XMMWORD PTR[rcx],xmm5 + vpxor xmm4,xmm4,xmm0 + vmovdqu XMMWORD PTR[16+rcx],xmm0 + lea rcx,QWORD PTR[48+rcx] + sub r10,1 + jnz $L$init_loop_avx + + vpalignr xmm5,xmm3,xmm4,8 + vmovdqu XMMWORD PTR[(-16)+rcx],xmm5 + + vzeroupper + movaps xmm6,XMMWORD PTR[rsp] + lea rsp,QWORD PTR[24+rsp] +$L$SEH_end_gcm_init_avx:: + DB 0F3h,0C3h ;repret +gcm_init_avx ENDP +PUBLIC gcm_gmult_avx + +ALIGN 32 +gcm_gmult_avx PROC PUBLIC + jmp $L$_gmult_clmul +gcm_gmult_avx ENDP +PUBLIC gcm_ghash_avx + +ALIGN 32 +gcm_ghash_avx PROC PUBLIC + lea rax,QWORD PTR[((-136))+rsp] +$L$SEH_begin_gcm_ghash_avx:: + +DB 048h,08dh,060h,0e0h +DB 00fh,029h,070h,0e0h +DB 00fh,029h,078h,0f0h +DB 044h,00fh,029h,000h +DB 044h,00fh,029h,048h,010h +DB 044h,00fh,029h,050h,020h +DB 044h,00fh,029h,058h,030h +DB 044h,00fh,029h,060h,040h +DB 044h,00fh,029h,068h,050h +DB 044h,00fh,029h,070h,060h +DB 044h,00fh,029h,078h,070h + vzeroupper + + vmovdqu xmm10,XMMWORD PTR[rcx] + lea r10,QWORD PTR[$L$0x1c2_polynomial] + lea rdx,QWORD PTR[64+rdx] + vmovdqu xmm13,XMMWORD PTR[$L$bswap_mask] + vpshufb xmm10,xmm10,xmm13 + cmp r9,080h + jb $L$short_avx + sub r9,080h + + vmovdqu xmm14,XMMWORD PTR[112+r8] + vmovdqu xmm6,XMMWORD PTR[((0-64))+rdx] + vpshufb xmm14,xmm14,xmm13 + vmovdqu xmm7,XMMWORD PTR[((32-64))+rdx] + + vpunpckhqdq xmm9,xmm14,xmm14 + vmovdqu xmm15,XMMWORD PTR[96+r8] + vpclmulqdq xmm0,xmm14,xmm6,000h + vpxor xmm9,xmm9,xmm14 + vpshufb xmm15,xmm15,xmm13 + vpclmulqdq xmm1,xmm14,xmm6,011h + vmovdqu xmm6,XMMWORD PTR[((16-64))+rdx] + vpunpckhqdq xmm8,xmm15,xmm15 + vmovdqu xmm14,XMMWORD PTR[80+r8] + vpclmulqdq xmm2,xmm9,xmm7,000h + vpxor xmm8,xmm8,xmm15 + + vpshufb xmm14,xmm14,xmm13 + vpclmulqdq xmm3,xmm15,xmm6,000h + vpunpckhqdq xmm9,xmm14,xmm14 + vpclmulqdq xmm4,xmm15,xmm6,011h + vmovdqu xmm6,XMMWORD PTR[((48-64))+rdx] + vpxor xmm9,xmm9,xmm14 + vmovdqu xmm15,XMMWORD PTR[64+r8] + vpclmulqdq xmm5,xmm8,xmm7,010h + vmovdqu xmm7,XMMWORD PTR[((80-64))+rdx] + + vpshufb xmm15,xmm15,xmm13 + vpxor xmm3,xmm3,xmm0 + vpclmulqdq xmm0,xmm14,xmm6,000h + vpxor xmm4,xmm4,xmm1 + vpunpckhqdq xmm8,xmm15,xmm15 + vpclmulqdq xmm1,xmm14,xmm6,011h + vmovdqu xmm6,XMMWORD PTR[((64-64))+rdx] + vpxor xmm5,xmm5,xmm2 + vpclmulqdq xmm2,xmm9,xmm7,000h + vpxor xmm8,xmm8,xmm15 + + vmovdqu xmm14,XMMWORD PTR[48+r8] + vpxor xmm0,xmm0,xmm3 + vpclmulqdq xmm3,xmm15,xmm6,000h + vpxor xmm1,xmm1,xmm4 + vpshufb xmm14,xmm14,xmm13 + vpclmulqdq xmm4,xmm15,xmm6,011h + vmovdqu xmm6,XMMWORD PTR[((96-64))+rdx] + vpxor xmm2,xmm2,xmm5 + vpunpckhqdq xmm9,xmm14,xmm14 + vpclmulqdq xmm5,xmm8,xmm7,010h + vmovdqu xmm7,XMMWORD PTR[((128-64))+rdx] + vpxor xmm9,xmm9,xmm14 + + vmovdqu xmm15,XMMWORD PTR[32+r8] + vpxor xmm3,xmm3,xmm0 + vpclmulqdq xmm0,xmm14,xmm6,000h + vpxor xmm4,xmm4,xmm1 + vpshufb xmm15,xmm15,xmm13 + vpclmulqdq xmm1,xmm14,xmm6,011h + vmovdqu xmm6,XMMWORD PTR[((112-64))+rdx] + vpxor xmm5,xmm5,xmm2 + vpunpckhqdq xmm8,xmm15,xmm15 + vpclmulqdq xmm2,xmm9,xmm7,000h + vpxor xmm8,xmm8,xmm15 + + vmovdqu xmm14,XMMWORD PTR[16+r8] + vpxor xmm0,xmm0,xmm3 + vpclmulqdq xmm3,xmm15,xmm6,000h + vpxor xmm1,xmm1,xmm4 + vpshufb xmm14,xmm14,xmm13 + vpclmulqdq xmm4,xmm15,xmm6,011h + vmovdqu xmm6,XMMWORD PTR[((144-64))+rdx] + vpxor xmm2,xmm2,xmm5 + vpunpckhqdq xmm9,xmm14,xmm14 + vpclmulqdq xmm5,xmm8,xmm7,010h + vmovdqu xmm7,XMMWORD PTR[((176-64))+rdx] + vpxor xmm9,xmm9,xmm14 + + vmovdqu xmm15,XMMWORD PTR[r8] + vpxor xmm3,xmm3,xmm0 + vpclmulqdq xmm0,xmm14,xmm6,000h + vpxor xmm4,xmm4,xmm1 + vpshufb xmm15,xmm15,xmm13 + vpclmulqdq xmm1,xmm14,xmm6,011h + vmovdqu xmm6,XMMWORD PTR[((160-64))+rdx] + vpxor xmm5,xmm5,xmm2 + vpclmulqdq xmm2,xmm9,xmm7,010h + + lea r8,QWORD PTR[128+r8] + cmp r9,080h + jb $L$tail_avx + + vpxor xmm15,xmm15,xmm10 + sub r9,080h + jmp $L$oop8x_avx + +ALIGN 32 +$L$oop8x_avx:: + vpunpckhqdq xmm8,xmm15,xmm15 + vmovdqu xmm14,XMMWORD PTR[112+r8] + vpxor xmm3,xmm3,xmm0 + vpxor xmm8,xmm8,xmm15 + vpclmulqdq xmm10,xmm15,xmm6,000h + vpshufb xmm14,xmm14,xmm13 + vpxor xmm4,xmm4,xmm1 + vpclmulqdq xmm11,xmm15,xmm6,011h + vmovdqu xmm6,XMMWORD PTR[((0-64))+rdx] + vpunpckhqdq xmm9,xmm14,xmm14 + vpxor xmm5,xmm5,xmm2 + vpclmulqdq xmm12,xmm8,xmm7,000h + vmovdqu xmm7,XMMWORD PTR[((32-64))+rdx] + vpxor xmm9,xmm9,xmm14 + + vmovdqu xmm15,XMMWORD PTR[96+r8] + vpclmulqdq xmm0,xmm14,xmm6,000h + vpxor xmm10,xmm10,xmm3 + vpshufb xmm15,xmm15,xmm13 + vpclmulqdq xmm1,xmm14,xmm6,011h + vxorps xmm11,xmm11,xmm4 + vmovdqu xmm6,XMMWORD PTR[((16-64))+rdx] + vpunpckhqdq xmm8,xmm15,xmm15 + vpclmulqdq xmm2,xmm9,xmm7,000h + vpxor xmm12,xmm12,xmm5 + vxorps xmm8,xmm8,xmm15 + + vmovdqu xmm14,XMMWORD PTR[80+r8] + vpxor xmm12,xmm12,xmm10 + vpclmulqdq xmm3,xmm15,xmm6,000h + vpxor xmm12,xmm12,xmm11 + vpslldq xmm9,xmm12,8 + vpxor xmm3,xmm3,xmm0 + vpclmulqdq xmm4,xmm15,xmm6,011h + vpsrldq xmm12,xmm12,8 + vpxor xmm10,xmm10,xmm9 + vmovdqu xmm6,XMMWORD PTR[((48-64))+rdx] + vpshufb xmm14,xmm14,xmm13 + vxorps xmm11,xmm11,xmm12 + vpxor xmm4,xmm4,xmm1 + vpunpckhqdq xmm9,xmm14,xmm14 + vpclmulqdq xmm5,xmm8,xmm7,010h + vmovdqu xmm7,XMMWORD PTR[((80-64))+rdx] + vpxor xmm9,xmm9,xmm14 + vpxor xmm5,xmm5,xmm2 + + vmovdqu xmm15,XMMWORD PTR[64+r8] + vpalignr xmm12,xmm10,xmm10,8 + vpclmulqdq xmm0,xmm14,xmm6,000h + vpshufb xmm15,xmm15,xmm13 + vpxor xmm0,xmm0,xmm3 + vpclmulqdq xmm1,xmm14,xmm6,011h + vmovdqu xmm6,XMMWORD PTR[((64-64))+rdx] + vpunpckhqdq xmm8,xmm15,xmm15 + vpxor xmm1,xmm1,xmm4 + vpclmulqdq xmm2,xmm9,xmm7,000h + vxorps xmm8,xmm8,xmm15 + vpxor xmm2,xmm2,xmm5 + + vmovdqu xmm14,XMMWORD PTR[48+r8] + vpclmulqdq xmm10,xmm10,XMMWORD PTR[r10],010h + vpclmulqdq xmm3,xmm15,xmm6,000h + vpshufb xmm14,xmm14,xmm13 + vpxor xmm3,xmm3,xmm0 + vpclmulqdq xmm4,xmm15,xmm6,011h + vmovdqu xmm6,XMMWORD PTR[((96-64))+rdx] + vpunpckhqdq xmm9,xmm14,xmm14 + vpxor xmm4,xmm4,xmm1 + vpclmulqdq xmm5,xmm8,xmm7,010h + vmovdqu xmm7,XMMWORD PTR[((128-64))+rdx] + vpxor xmm9,xmm9,xmm14 + vpxor xmm5,xmm5,xmm2 + + vmovdqu xmm15,XMMWORD PTR[32+r8] + vpclmulqdq xmm0,xmm14,xmm6,000h + vpshufb xmm15,xmm15,xmm13 + vpxor xmm0,xmm0,xmm3 + vpclmulqdq xmm1,xmm14,xmm6,011h + vmovdqu xmm6,XMMWORD PTR[((112-64))+rdx] + vpunpckhqdq xmm8,xmm15,xmm15 + vpxor xmm1,xmm1,xmm4 + vpclmulqdq xmm2,xmm9,xmm7,000h + vpxor xmm8,xmm8,xmm15 + vpxor xmm2,xmm2,xmm5 + vxorps xmm10,xmm10,xmm12 + + vmovdqu xmm14,XMMWORD PTR[16+r8] + vpalignr xmm12,xmm10,xmm10,8 + vpclmulqdq xmm3,xmm15,xmm6,000h + vpshufb xmm14,xmm14,xmm13 + vpxor xmm3,xmm3,xmm0 + vpclmulqdq xmm4,xmm15,xmm6,011h + vmovdqu xmm6,XMMWORD PTR[((144-64))+rdx] + vpclmulqdq xmm10,xmm10,XMMWORD PTR[r10],010h + vxorps xmm12,xmm12,xmm11 + vpunpckhqdq xmm9,xmm14,xmm14 + vpxor xmm4,xmm4,xmm1 + vpclmulqdq xmm5,xmm8,xmm7,010h + vmovdqu xmm7,XMMWORD PTR[((176-64))+rdx] + vpxor xmm9,xmm9,xmm14 + vpxor xmm5,xmm5,xmm2 + + vmovdqu xmm15,XMMWORD PTR[r8] + vpclmulqdq xmm0,xmm14,xmm6,000h + vpshufb xmm15,xmm15,xmm13 + vpclmulqdq xmm1,xmm14,xmm6,011h + vmovdqu xmm6,XMMWORD PTR[((160-64))+rdx] + vpxor xmm15,xmm15,xmm12 + vpclmulqdq xmm2,xmm9,xmm7,010h + vpxor xmm15,xmm15,xmm10 + + lea r8,QWORD PTR[128+r8] + sub r9,080h + jnc $L$oop8x_avx + + add r9,080h + jmp $L$tail_no_xor_avx + +ALIGN 32 +$L$short_avx:: + vmovdqu xmm14,XMMWORD PTR[((-16))+r9*1+r8] + lea r8,QWORD PTR[r9*1+r8] + vmovdqu xmm6,XMMWORD PTR[((0-64))+rdx] + vmovdqu xmm7,XMMWORD PTR[((32-64))+rdx] + vpshufb xmm15,xmm14,xmm13 + + vmovdqa xmm3,xmm0 + vmovdqa xmm4,xmm1 + vmovdqa xmm5,xmm2 + sub r9,010h + jz $L$tail_avx + + vpunpckhqdq xmm8,xmm15,xmm15 + vpxor xmm3,xmm3,xmm0 + vpclmulqdq xmm0,xmm15,xmm6,000h + vpxor xmm8,xmm8,xmm15 + vmovdqu xmm14,XMMWORD PTR[((-32))+r8] + vpxor xmm4,xmm4,xmm1 + vpclmulqdq xmm1,xmm15,xmm6,011h + vmovdqu xmm6,XMMWORD PTR[((16-64))+rdx] + vpshufb xmm15,xmm14,xmm13 + vpxor xmm5,xmm5,xmm2 + vpclmulqdq xmm2,xmm8,xmm7,000h + vpsrldq xmm7,xmm7,8 + sub r9,010h + jz $L$tail_avx + + vpunpckhqdq xmm8,xmm15,xmm15 + vpxor xmm3,xmm3,xmm0 + vpclmulqdq xmm0,xmm15,xmm6,000h + vpxor xmm8,xmm8,xmm15 + vmovdqu xmm14,XMMWORD PTR[((-48))+r8] + vpxor xmm4,xmm4,xmm1 + vpclmulqdq xmm1,xmm15,xmm6,011h + vmovdqu xmm6,XMMWORD PTR[((48-64))+rdx] + vpshufb xmm15,xmm14,xmm13 + vpxor xmm5,xmm5,xmm2 + vpclmulqdq xmm2,xmm8,xmm7,000h + vmovdqu xmm7,XMMWORD PTR[((80-64))+rdx] + sub r9,010h + jz $L$tail_avx + + vpunpckhqdq xmm8,xmm15,xmm15 + vpxor xmm3,xmm3,xmm0 + vpclmulqdq xmm0,xmm15,xmm6,000h + vpxor xmm8,xmm8,xmm15 + vmovdqu xmm14,XMMWORD PTR[((-64))+r8] + vpxor xmm4,xmm4,xmm1 + vpclmulqdq xmm1,xmm15,xmm6,011h + vmovdqu xmm6,XMMWORD PTR[((64-64))+rdx] + vpshufb xmm15,xmm14,xmm13 + vpxor xmm5,xmm5,xmm2 + vpclmulqdq xmm2,xmm8,xmm7,000h + vpsrldq xmm7,xmm7,8 + sub r9,010h + jz $L$tail_avx + + vpunpckhqdq xmm8,xmm15,xmm15 + vpxor xmm3,xmm3,xmm0 + vpclmulqdq xmm0,xmm15,xmm6,000h + vpxor xmm8,xmm8,xmm15 + vmovdqu xmm14,XMMWORD PTR[((-80))+r8] + vpxor xmm4,xmm4,xmm1 + vpclmulqdq xmm1,xmm15,xmm6,011h + vmovdqu xmm6,XMMWORD PTR[((96-64))+rdx] + vpshufb xmm15,xmm14,xmm13 + vpxor xmm5,xmm5,xmm2 + vpclmulqdq xmm2,xmm8,xmm7,000h + vmovdqu xmm7,XMMWORD PTR[((128-64))+rdx] + sub r9,010h + jz $L$tail_avx + + vpunpckhqdq xmm8,xmm15,xmm15 + vpxor xmm3,xmm3,xmm0 + vpclmulqdq xmm0,xmm15,xmm6,000h + vpxor xmm8,xmm8,xmm15 + vmovdqu xmm14,XMMWORD PTR[((-96))+r8] + vpxor xmm4,xmm4,xmm1 + vpclmulqdq xmm1,xmm15,xmm6,011h + vmovdqu xmm6,XMMWORD PTR[((112-64))+rdx] + vpshufb xmm15,xmm14,xmm13 + vpxor xmm5,xmm5,xmm2 + vpclmulqdq xmm2,xmm8,xmm7,000h + vpsrldq xmm7,xmm7,8 + sub r9,010h + jz $L$tail_avx + + vpunpckhqdq xmm8,xmm15,xmm15 + vpxor xmm3,xmm3,xmm0 + vpclmulqdq xmm0,xmm15,xmm6,000h + vpxor xmm8,xmm8,xmm15 + vmovdqu xmm14,XMMWORD PTR[((-112))+r8] + vpxor xmm4,xmm4,xmm1 + vpclmulqdq xmm1,xmm15,xmm6,011h + vmovdqu xmm6,XMMWORD PTR[((144-64))+rdx] + vpshufb xmm15,xmm14,xmm13 + vpxor xmm5,xmm5,xmm2 + vpclmulqdq xmm2,xmm8,xmm7,000h + vmovq xmm7,QWORD PTR[((184-64))+rdx] + sub r9,010h + jmp $L$tail_avx + +ALIGN 32 +$L$tail_avx:: + vpxor xmm15,xmm15,xmm10 +$L$tail_no_xor_avx:: + vpunpckhqdq xmm8,xmm15,xmm15 + vpxor xmm3,xmm3,xmm0 + vpclmulqdq xmm0,xmm15,xmm6,000h + vpxor xmm8,xmm8,xmm15 + vpxor xmm4,xmm4,xmm1 + vpclmulqdq xmm1,xmm15,xmm6,011h + vpxor xmm5,xmm5,xmm2 + vpclmulqdq xmm2,xmm8,xmm7,000h + + vmovdqu xmm12,XMMWORD PTR[r10] + + vpxor xmm10,xmm3,xmm0 + vpxor xmm11,xmm4,xmm1 + vpxor xmm5,xmm5,xmm2 + + vpxor xmm5,xmm5,xmm10 + vpxor xmm5,xmm5,xmm11 + vpslldq xmm9,xmm5,8 + vpsrldq xmm5,xmm5,8 + vpxor xmm10,xmm10,xmm9 + vpxor xmm11,xmm11,xmm5 + + vpclmulqdq xmm9,xmm10,xmm12,010h + vpalignr xmm10,xmm10,xmm10,8 + vpxor xmm10,xmm10,xmm9 + + vpclmulqdq xmm9,xmm10,xmm12,010h + vpalignr xmm10,xmm10,xmm10,8 + vpxor xmm10,xmm10,xmm11 + vpxor xmm10,xmm10,xmm9 + + cmp r9,0 + jne $L$short_avx + + vpshufb xmm10,xmm10,xmm13 + vmovdqu XMMWORD PTR[rcx],xmm10 + vzeroupper + movaps xmm6,XMMWORD PTR[rsp] + movaps xmm7,XMMWORD PTR[16+rsp] + movaps xmm8,XMMWORD PTR[32+rsp] + movaps xmm9,XMMWORD PTR[48+rsp] + movaps xmm10,XMMWORD PTR[64+rsp] + movaps xmm11,XMMWORD PTR[80+rsp] + movaps xmm12,XMMWORD PTR[96+rsp] + movaps xmm13,XMMWORD PTR[112+rsp] + movaps xmm14,XMMWORD PTR[128+rsp] + movaps xmm15,XMMWORD PTR[144+rsp] + lea rsp,QWORD PTR[168+rsp] +$L$SEH_end_gcm_ghash_avx:: + DB 0F3h,0C3h ;repret +gcm_ghash_avx ENDP ALIGN 64 $L$bswap_mask:: DB 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0 $L$0x1c2_polynomial:: DB 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0c2h +$L$7_mask:: + DD 7,0,7,0 +$L$7_mask_poly:: + DD 7,0,450,0 ALIGN 64 $L$rem_4bit:: @@ -1126,7 +1935,6 @@ $L$in_prologue:: mov ecx,154 DD 0a548f3fch - mov rsi,r9 xor rcx,rcx mov rdx,QWORD PTR[8+rsi] @@ -1166,10 +1974,20 @@ ALIGN 4 DD imagerel $L$SEH_end_gcm_ghash_4bit DD imagerel $L$SEH_info_gcm_ghash_4bit + DD imagerel $L$SEH_begin_gcm_init_clmul + DD imagerel $L$SEH_end_gcm_init_clmul + DD imagerel $L$SEH_info_gcm_init_clmul + DD imagerel $L$SEH_begin_gcm_ghash_clmul DD imagerel $L$SEH_end_gcm_ghash_clmul DD imagerel $L$SEH_info_gcm_ghash_clmul + DD imagerel $L$SEH_begin_gcm_init_avx + DD imagerel $L$SEH_end_gcm_init_avx + DD imagerel $L$SEH_info_gcm_init_clmul + DD imagerel $L$SEH_begin_gcm_ghash_avx + DD imagerel $L$SEH_end_gcm_ghash_avx + DD imagerel $L$SEH_info_gcm_ghash_clmul .pdata ENDS .xdata SEGMENT READONLY ALIGN(8) ALIGN 8 @@ -1177,26 +1995,27 @@ $L$SEH_info_gcm_gmult_4bit:: DB 9,0,0,0 DD imagerel se_handler DD imagerel $L$gmult_prologue,imagerel $L$gmult_epilogue - $L$SEH_info_gcm_ghash_4bit:: DB 9,0,0,0 DD imagerel se_handler DD imagerel $L$ghash_prologue,imagerel $L$ghash_epilogue - +$L$SEH_info_gcm_init_clmul:: +DB 001h,008h,003h,000h +DB 008h,068h,000h,000h +DB 004h,022h,000h,000h $L$SEH_info_gcm_ghash_clmul:: -DB 001h,01fh,00bh,000h -DB 01fh,0a8h,004h,000h - -DB 019h,098h,003h,000h - -DB 013h,088h,002h,000h - -DB 00dh,078h,001h,000h - +DB 001h,033h,016h,000h +DB 033h,0f8h,009h,000h +DB 02eh,0e8h,008h,000h +DB 029h,0d8h,007h,000h +DB 024h,0c8h,006h,000h +DB 01fh,0b8h,005h,000h +DB 01ah,0a8h,004h,000h +DB 015h,098h,003h,000h +DB 010h,088h,002h,000h +DB 00ch,078h,001h,000h DB 008h,068h,000h,000h - -DB 004h,0a2h,000h,000h - +DB 004h,001h,015h,000h .xdata ENDS END diff --git a/deps/openssl/asm/x64-win32-masm/rc4/rc4-md5-x86_64.asm b/deps/openssl/asm/x64-win32-masm/rc4/rc4-md5-x86_64.asm index 4af838e3e9a7b6..9d823aed573007 100644 --- a/deps/openssl/asm/x64-win32-masm/rc4/rc4-md5-x86_64.asm +++ b/deps/openssl/asm/x64-win32-masm/rc4/rc4-md5-x86_64.asm @@ -1,5 +1,5 @@ OPTION DOTNAME -.text$ SEGMENT ALIGN(64) 'CODE' +.text$ SEGMENT ALIGN(256) 'CODE' ALIGN 16 PUBLIC rc4_md5_enc @@ -1328,7 +1328,6 @@ $L$in_prologue:: mov ecx,154 DD 0a548f3fch - mov rsi,r9 xor rcx,rcx mov rdx,QWORD PTR[8+rsi] diff --git a/deps/openssl/asm/x64-win32-masm/rc4/rc4-x86_64.asm b/deps/openssl/asm/x64-win32-masm/rc4/rc4-x86_64.asm index aea304fbad64ef..a0e5553247b032 100644 --- a/deps/openssl/asm/x64-win32-masm/rc4/rc4-x86_64.asm +++ b/deps/openssl/asm/x64-win32-masm/rc4/rc4-x86_64.asm @@ -1,5 +1,5 @@ OPTION DOTNAME -.text$ SEGMENT ALIGN(64) 'CODE' +.text$ SEGMENT ALIGN(256) 'CODE' EXTERN OPENSSL_ia32cap_P:NEAR PUBLIC RC4 @@ -60,7 +60,7 @@ $L$oop8_warmup:: mov edx,DWORD PTR[rax*4+rdi] mov eax,DWORD PTR[r10*4+rdi] xor dl,BYTE PTR[r12] - mov BYTE PTR[r12*1+r13],dl + mov BYTE PTR[r13*1+r12],dl lea r12,QWORD PTR[1+r12] dec rbx jnz $L$oop8_warmup @@ -139,7 +139,7 @@ $L$oop8:: sub r11,8 xor r8,QWORD PTR[r12] - mov QWORD PTR[r12*1+r13],r8 + mov QWORD PTR[r13*1+r12],r8 lea r12,QWORD PTR[8+r12] test r11,-8 @@ -165,7 +165,7 @@ $L$oop16_warmup:: mov edx,DWORD PTR[rax*4+rdi] mov eax,DWORD PTR[r10*4+rdi] xor dl,BYTE PTR[r12] - mov BYTE PTR[r12*1+r13],dl + mov BYTE PTR[r13*1+r12],dl lea r12,QWORD PTR[1+r12] dec rbx jnz $L$oop16_warmup @@ -202,7 +202,7 @@ $L$oop16:: pxor xmm2,xmm1 add cl,bl pinsrw xmm0,WORD PTR[rax*4+rdi],0 - movdqu XMMWORD PTR[r12*1+r13],xmm2 + movdqu XMMWORD PTR[r13*1+r12],xmm2 lea r12,QWORD PTR[16+r12] $L$oop16_enter:: mov edx,DWORD PTR[rcx*4+rdi] @@ -338,7 +338,7 @@ $L$oop16_enter:: psllq xmm1,8 pxor xmm2,xmm0 pxor xmm2,xmm1 - movdqu XMMWORD PTR[r12*1+r13],xmm2 + movdqu XMMWORD PTR[r13*1+r12],xmm2 lea r12,QWORD PTR[16+r12] cmp r11,0 @@ -356,7 +356,7 @@ $L$loop1:: mov edx,DWORD PTR[rax*4+rdi] mov eax,DWORD PTR[r10*4+rdi] xor dl,BYTE PTR[r12] - mov BYTE PTR[r12*1+r13],dl + mov BYTE PTR[r13*1+r12],dl lea r12,QWORD PTR[1+r12] dec r11 jnz $L$loop1 @@ -382,7 +382,6 @@ $L$cloop8:: cmp rcx,rsi mov BYTE PTR[r10*1+rdi],dl jne $L$cmov0 - mov rbx,rax $L$cmov0:: add dl,al @@ -397,7 +396,6 @@ $L$cmov0:: cmp rcx,r10 mov BYTE PTR[rsi*1+rdi],dl jne $L$cmov1 - mov rax,rbx $L$cmov1:: add dl,bl @@ -412,7 +410,6 @@ $L$cmov1:: cmp rcx,rsi mov BYTE PTR[r10*1+rdi],dl jne $L$cmov2 - mov rbx,rax $L$cmov2:: add dl,al @@ -427,7 +424,6 @@ $L$cmov2:: cmp rcx,r10 mov BYTE PTR[rsi*1+rdi],dl jne $L$cmov3 - mov rax,rbx $L$cmov3:: add dl,bl @@ -442,7 +438,6 @@ $L$cmov3:: cmp rcx,rsi mov BYTE PTR[r10*1+rdi],dl jne $L$cmov4 - mov rbx,rax $L$cmov4:: add dl,al @@ -457,7 +452,6 @@ $L$cmov4:: cmp rcx,r10 mov BYTE PTR[rsi*1+rdi],dl jne $L$cmov5 - mov rax,rbx $L$cmov5:: add dl,bl @@ -472,7 +466,6 @@ $L$cmov5:: cmp rcx,rsi mov BYTE PTR[r10*1+rdi],dl jne $L$cmov6 - mov rbx,rax $L$cmov6:: add dl,al @@ -487,7 +480,6 @@ $L$cmov6:: cmp rcx,r10 mov BYTE PTR[rsi*1+rdi],dl jne $L$cmov7 - mov rax,rbx $L$cmov7:: add dl,bl @@ -726,7 +718,6 @@ $L$common_seh_exit:: mov ecx,154 DD 0a548f3fch - mov rsi,r9 xor rcx,rcx mov rdx,QWORD PTR[8+rsi] diff --git a/deps/openssl/asm/x64-win32-masm/sha/sha1-mb-x86_64.asm b/deps/openssl/asm/x64-win32-masm/sha/sha1-mb-x86_64.asm new file mode 100644 index 00000000000000..fe49fbcadb0171 --- /dev/null +++ b/deps/openssl/asm/x64-win32-masm/sha/sha1-mb-x86_64.asm @@ -0,0 +1,7533 @@ +OPTION DOTNAME +.text$ SEGMENT ALIGN(256) 'CODE' + +EXTERN OPENSSL_ia32cap_P:NEAR + +PUBLIC sha1_multi_block + +ALIGN 32 +sha1_multi_block PROC PUBLIC + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_sha1_multi_block:: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + + + mov rcx,QWORD PTR[((OPENSSL_ia32cap_P+4))] + bt rcx,61 + jc _shaext_shortcut + test ecx,268435456 + jnz _avx_shortcut + mov rax,rsp + push rbx + push rbp + lea rsp,QWORD PTR[((-168))+rsp] + movaps XMMWORD PTR[rsp],xmm6 + movaps XMMWORD PTR[16+rsp],xmm7 + movaps XMMWORD PTR[32+rsp],xmm8 + movaps XMMWORD PTR[48+rsp],xmm9 + movaps XMMWORD PTR[(-120)+rax],xmm10 + movaps XMMWORD PTR[(-104)+rax],xmm11 + movaps XMMWORD PTR[(-88)+rax],xmm12 + movaps XMMWORD PTR[(-72)+rax],xmm13 + movaps XMMWORD PTR[(-56)+rax],xmm14 + movaps XMMWORD PTR[(-40)+rax],xmm15 + sub rsp,288 + and rsp,-256 + mov QWORD PTR[272+rsp],rax +$L$body:: + lea rbp,QWORD PTR[K_XX_XX] + lea rbx,QWORD PTR[256+rsp] + +$L$oop_grande:: + mov DWORD PTR[280+rsp],edx + xor edx,edx + mov r8,QWORD PTR[rsi] + mov ecx,DWORD PTR[8+rsi] + cmp ecx,edx + cmovg edx,ecx + test ecx,ecx + mov DWORD PTR[rbx],ecx + cmovle r8,rbp + mov r9,QWORD PTR[16+rsi] + mov ecx,DWORD PTR[24+rsi] + cmp ecx,edx + cmovg edx,ecx + test ecx,ecx + mov DWORD PTR[4+rbx],ecx + cmovle r9,rbp + mov r10,QWORD PTR[32+rsi] + mov ecx,DWORD PTR[40+rsi] + cmp ecx,edx + cmovg edx,ecx + test ecx,ecx + mov DWORD PTR[8+rbx],ecx + cmovle r10,rbp + mov r11,QWORD PTR[48+rsi] + mov ecx,DWORD PTR[56+rsi] + cmp ecx,edx + cmovg edx,ecx + test ecx,ecx + mov DWORD PTR[12+rbx],ecx + cmovle r11,rbp + test edx,edx + jz $L$done + + movdqu xmm10,XMMWORD PTR[rdi] + lea rax,QWORD PTR[128+rsp] + movdqu xmm11,XMMWORD PTR[32+rdi] + movdqu xmm12,XMMWORD PTR[64+rdi] + movdqu xmm13,XMMWORD PTR[96+rdi] + movdqu xmm14,XMMWORD PTR[128+rdi] + movdqa xmm5,XMMWORD PTR[96+rbp] + movdqa xmm15,XMMWORD PTR[((-32))+rbp] + jmp $L$oop + +ALIGN 32 +$L$oop:: + movd xmm0,DWORD PTR[r8] + lea r8,QWORD PTR[64+r8] + movd xmm2,DWORD PTR[r9] + lea r9,QWORD PTR[64+r9] + movd xmm3,DWORD PTR[r10] + lea r10,QWORD PTR[64+r10] + movd xmm4,DWORD PTR[r11] + lea r11,QWORD PTR[64+r11] + punpckldq xmm0,xmm3 + movd xmm1,DWORD PTR[((-60))+r8] + punpckldq xmm2,xmm4 + movd xmm9,DWORD PTR[((-60))+r9] + punpckldq xmm0,xmm2 + movd xmm8,DWORD PTR[((-60))+r10] +DB 102,15,56,0,197 + movd xmm7,DWORD PTR[((-60))+r11] + punpckldq xmm1,xmm8 + movdqa xmm8,xmm10 + paddd xmm14,xmm15 + punpckldq xmm9,xmm7 + movdqa xmm7,xmm11 + movdqa xmm6,xmm11 + pslld xmm8,5 + pandn xmm7,xmm13 + pand xmm6,xmm12 + punpckldq xmm1,xmm9 + movdqa xmm9,xmm10 + + movdqa XMMWORD PTR[(0-128)+rax],xmm0 + paddd xmm14,xmm0 + movd xmm2,DWORD PTR[((-56))+r8] + psrld xmm9,27 + pxor xmm6,xmm7 + movdqa xmm7,xmm11 + + por xmm8,xmm9 + movd xmm9,DWORD PTR[((-56))+r9] + pslld xmm7,30 + paddd xmm14,xmm6 + + psrld xmm11,2 + paddd xmm14,xmm8 +DB 102,15,56,0,205 + movd xmm8,DWORD PTR[((-56))+r10] + por xmm11,xmm7 + movd xmm7,DWORD PTR[((-56))+r11] + punpckldq xmm2,xmm8 + movdqa xmm8,xmm14 + paddd xmm13,xmm15 + punpckldq xmm9,xmm7 + movdqa xmm7,xmm10 + movdqa xmm6,xmm10 + pslld xmm8,5 + pandn xmm7,xmm12 + pand xmm6,xmm11 + punpckldq xmm2,xmm9 + movdqa xmm9,xmm14 + + movdqa XMMWORD PTR[(16-128)+rax],xmm1 + paddd xmm13,xmm1 + movd xmm3,DWORD PTR[((-52))+r8] + psrld xmm9,27 + pxor xmm6,xmm7 + movdqa xmm7,xmm10 + + por xmm8,xmm9 + movd xmm9,DWORD PTR[((-52))+r9] + pslld xmm7,30 + paddd xmm13,xmm6 + + psrld xmm10,2 + paddd xmm13,xmm8 +DB 102,15,56,0,213 + movd xmm8,DWORD PTR[((-52))+r10] + por xmm10,xmm7 + movd xmm7,DWORD PTR[((-52))+r11] + punpckldq xmm3,xmm8 + movdqa xmm8,xmm13 + paddd xmm12,xmm15 + punpckldq xmm9,xmm7 + movdqa xmm7,xmm14 + movdqa xmm6,xmm14 + pslld xmm8,5 + pandn xmm7,xmm11 + pand xmm6,xmm10 + punpckldq xmm3,xmm9 + movdqa xmm9,xmm13 + + movdqa XMMWORD PTR[(32-128)+rax],xmm2 + paddd xmm12,xmm2 + movd xmm4,DWORD PTR[((-48))+r8] + psrld xmm9,27 + pxor xmm6,xmm7 + movdqa xmm7,xmm14 + + por xmm8,xmm9 + movd xmm9,DWORD PTR[((-48))+r9] + pslld xmm7,30 + paddd xmm12,xmm6 + + psrld xmm14,2 + paddd xmm12,xmm8 +DB 102,15,56,0,221 + movd xmm8,DWORD PTR[((-48))+r10] + por xmm14,xmm7 + movd xmm7,DWORD PTR[((-48))+r11] + punpckldq xmm4,xmm8 + movdqa xmm8,xmm12 + paddd xmm11,xmm15 + punpckldq xmm9,xmm7 + movdqa xmm7,xmm13 + movdqa xmm6,xmm13 + pslld xmm8,5 + pandn xmm7,xmm10 + pand xmm6,xmm14 + punpckldq xmm4,xmm9 + movdqa xmm9,xmm12 + + movdqa XMMWORD PTR[(48-128)+rax],xmm3 + paddd xmm11,xmm3 + movd xmm0,DWORD PTR[((-44))+r8] + psrld xmm9,27 + pxor xmm6,xmm7 + movdqa xmm7,xmm13 + + por xmm8,xmm9 + movd xmm9,DWORD PTR[((-44))+r9] + pslld xmm7,30 + paddd xmm11,xmm6 + + psrld xmm13,2 + paddd xmm11,xmm8 +DB 102,15,56,0,229 + movd xmm8,DWORD PTR[((-44))+r10] + por xmm13,xmm7 + movd xmm7,DWORD PTR[((-44))+r11] + punpckldq xmm0,xmm8 + movdqa xmm8,xmm11 + paddd xmm10,xmm15 + punpckldq xmm9,xmm7 + movdqa xmm7,xmm12 + movdqa xmm6,xmm12 + pslld xmm8,5 + pandn xmm7,xmm14 + pand xmm6,xmm13 + punpckldq xmm0,xmm9 + movdqa xmm9,xmm11 + + movdqa XMMWORD PTR[(64-128)+rax],xmm4 + paddd xmm10,xmm4 + movd xmm1,DWORD PTR[((-40))+r8] + psrld xmm9,27 + pxor xmm6,xmm7 + movdqa xmm7,xmm12 + + por xmm8,xmm9 + movd xmm9,DWORD PTR[((-40))+r9] + pslld xmm7,30 + paddd xmm10,xmm6 + + psrld xmm12,2 + paddd xmm10,xmm8 +DB 102,15,56,0,197 + movd xmm8,DWORD PTR[((-40))+r10] + por xmm12,xmm7 + movd xmm7,DWORD PTR[((-40))+r11] + punpckldq xmm1,xmm8 + movdqa xmm8,xmm10 + paddd xmm14,xmm15 + punpckldq xmm9,xmm7 + movdqa xmm7,xmm11 + movdqa xmm6,xmm11 + pslld xmm8,5 + pandn xmm7,xmm13 + pand xmm6,xmm12 + punpckldq xmm1,xmm9 + movdqa xmm9,xmm10 + + movdqa XMMWORD PTR[(80-128)+rax],xmm0 + paddd xmm14,xmm0 + movd xmm2,DWORD PTR[((-36))+r8] + psrld xmm9,27 + pxor xmm6,xmm7 + movdqa xmm7,xmm11 + + por xmm8,xmm9 + movd xmm9,DWORD PTR[((-36))+r9] + pslld xmm7,30 + paddd xmm14,xmm6 + + psrld xmm11,2 + paddd xmm14,xmm8 +DB 102,15,56,0,205 + movd xmm8,DWORD PTR[((-36))+r10] + por xmm11,xmm7 + movd xmm7,DWORD PTR[((-36))+r11] + punpckldq xmm2,xmm8 + movdqa xmm8,xmm14 + paddd xmm13,xmm15 + punpckldq xmm9,xmm7 + movdqa xmm7,xmm10 + movdqa xmm6,xmm10 + pslld xmm8,5 + pandn xmm7,xmm12 + pand xmm6,xmm11 + punpckldq xmm2,xmm9 + movdqa xmm9,xmm14 + + movdqa XMMWORD PTR[(96-128)+rax],xmm1 + paddd xmm13,xmm1 + movd xmm3,DWORD PTR[((-32))+r8] + psrld xmm9,27 + pxor xmm6,xmm7 + movdqa xmm7,xmm10 + + por xmm8,xmm9 + movd xmm9,DWORD PTR[((-32))+r9] + pslld xmm7,30 + paddd xmm13,xmm6 + + psrld xmm10,2 + paddd xmm13,xmm8 +DB 102,15,56,0,213 + movd xmm8,DWORD PTR[((-32))+r10] + por xmm10,xmm7 + movd xmm7,DWORD PTR[((-32))+r11] + punpckldq xmm3,xmm8 + movdqa xmm8,xmm13 + paddd xmm12,xmm15 + punpckldq xmm9,xmm7 + movdqa xmm7,xmm14 + movdqa xmm6,xmm14 + pslld xmm8,5 + pandn xmm7,xmm11 + pand xmm6,xmm10 + punpckldq xmm3,xmm9 + movdqa xmm9,xmm13 + + movdqa XMMWORD PTR[(112-128)+rax],xmm2 + paddd xmm12,xmm2 + movd xmm4,DWORD PTR[((-28))+r8] + psrld xmm9,27 + pxor xmm6,xmm7 + movdqa xmm7,xmm14 + + por xmm8,xmm9 + movd xmm9,DWORD PTR[((-28))+r9] + pslld xmm7,30 + paddd xmm12,xmm6 + + psrld xmm14,2 + paddd xmm12,xmm8 +DB 102,15,56,0,221 + movd xmm8,DWORD PTR[((-28))+r10] + por xmm14,xmm7 + movd xmm7,DWORD PTR[((-28))+r11] + punpckldq xmm4,xmm8 + movdqa xmm8,xmm12 + paddd xmm11,xmm15 + punpckldq xmm9,xmm7 + movdqa xmm7,xmm13 + movdqa xmm6,xmm13 + pslld xmm8,5 + pandn xmm7,xmm10 + pand xmm6,xmm14 + punpckldq xmm4,xmm9 + movdqa xmm9,xmm12 + + movdqa XMMWORD PTR[(128-128)+rax],xmm3 + paddd xmm11,xmm3 + movd xmm0,DWORD PTR[((-24))+r8] + psrld xmm9,27 + pxor xmm6,xmm7 + movdqa xmm7,xmm13 + + por xmm8,xmm9 + movd xmm9,DWORD PTR[((-24))+r9] + pslld xmm7,30 + paddd xmm11,xmm6 + + psrld xmm13,2 + paddd xmm11,xmm8 +DB 102,15,56,0,229 + movd xmm8,DWORD PTR[((-24))+r10] + por xmm13,xmm7 + movd xmm7,DWORD PTR[((-24))+r11] + punpckldq xmm0,xmm8 + movdqa xmm8,xmm11 + paddd xmm10,xmm15 + punpckldq xmm9,xmm7 + movdqa xmm7,xmm12 + movdqa xmm6,xmm12 + pslld xmm8,5 + pandn xmm7,xmm14 + pand xmm6,xmm13 + punpckldq xmm0,xmm9 + movdqa xmm9,xmm11 + + movdqa XMMWORD PTR[(144-128)+rax],xmm4 + paddd xmm10,xmm4 + movd xmm1,DWORD PTR[((-20))+r8] + psrld xmm9,27 + pxor xmm6,xmm7 + movdqa xmm7,xmm12 + + por xmm8,xmm9 + movd xmm9,DWORD PTR[((-20))+r9] + pslld xmm7,30 + paddd xmm10,xmm6 + + psrld xmm12,2 + paddd xmm10,xmm8 +DB 102,15,56,0,197 + movd xmm8,DWORD PTR[((-20))+r10] + por xmm12,xmm7 + movd xmm7,DWORD PTR[((-20))+r11] + punpckldq xmm1,xmm8 + movdqa xmm8,xmm10 + paddd xmm14,xmm15 + punpckldq xmm9,xmm7 + movdqa xmm7,xmm11 + movdqa xmm6,xmm11 + pslld xmm8,5 + pandn xmm7,xmm13 + pand xmm6,xmm12 + punpckldq xmm1,xmm9 + movdqa xmm9,xmm10 + + movdqa XMMWORD PTR[(160-128)+rax],xmm0 + paddd xmm14,xmm0 + movd xmm2,DWORD PTR[((-16))+r8] + psrld xmm9,27 + pxor xmm6,xmm7 + movdqa xmm7,xmm11 + + por xmm8,xmm9 + movd xmm9,DWORD PTR[((-16))+r9] + pslld xmm7,30 + paddd xmm14,xmm6 + + psrld xmm11,2 + paddd xmm14,xmm8 +DB 102,15,56,0,205 + movd xmm8,DWORD PTR[((-16))+r10] + por xmm11,xmm7 + movd xmm7,DWORD PTR[((-16))+r11] + punpckldq xmm2,xmm8 + movdqa xmm8,xmm14 + paddd xmm13,xmm15 + punpckldq xmm9,xmm7 + movdqa xmm7,xmm10 + movdqa xmm6,xmm10 + pslld xmm8,5 + pandn xmm7,xmm12 + pand xmm6,xmm11 + punpckldq xmm2,xmm9 + movdqa xmm9,xmm14 + + movdqa XMMWORD PTR[(176-128)+rax],xmm1 + paddd xmm13,xmm1 + movd xmm3,DWORD PTR[((-12))+r8] + psrld xmm9,27 + pxor xmm6,xmm7 + movdqa xmm7,xmm10 + + por xmm8,xmm9 + movd xmm9,DWORD PTR[((-12))+r9] + pslld xmm7,30 + paddd xmm13,xmm6 + + psrld xmm10,2 + paddd xmm13,xmm8 +DB 102,15,56,0,213 + movd xmm8,DWORD PTR[((-12))+r10] + por xmm10,xmm7 + movd xmm7,DWORD PTR[((-12))+r11] + punpckldq xmm3,xmm8 + movdqa xmm8,xmm13 + paddd xmm12,xmm15 + punpckldq xmm9,xmm7 + movdqa xmm7,xmm14 + movdqa xmm6,xmm14 + pslld xmm8,5 + pandn xmm7,xmm11 + pand xmm6,xmm10 + punpckldq xmm3,xmm9 + movdqa xmm9,xmm13 + + movdqa XMMWORD PTR[(192-128)+rax],xmm2 + paddd xmm12,xmm2 + movd xmm4,DWORD PTR[((-8))+r8] + psrld xmm9,27 + pxor xmm6,xmm7 + movdqa xmm7,xmm14 + + por xmm8,xmm9 + movd xmm9,DWORD PTR[((-8))+r9] + pslld xmm7,30 + paddd xmm12,xmm6 + + psrld xmm14,2 + paddd xmm12,xmm8 +DB 102,15,56,0,221 + movd xmm8,DWORD PTR[((-8))+r10] + por xmm14,xmm7 + movd xmm7,DWORD PTR[((-8))+r11] + punpckldq xmm4,xmm8 + movdqa xmm8,xmm12 + paddd xmm11,xmm15 + punpckldq xmm9,xmm7 + movdqa xmm7,xmm13 + movdqa xmm6,xmm13 + pslld xmm8,5 + pandn xmm7,xmm10 + pand xmm6,xmm14 + punpckldq xmm4,xmm9 + movdqa xmm9,xmm12 + + movdqa XMMWORD PTR[(208-128)+rax],xmm3 + paddd xmm11,xmm3 + movd xmm0,DWORD PTR[((-4))+r8] + psrld xmm9,27 + pxor xmm6,xmm7 + movdqa xmm7,xmm13 + + por xmm8,xmm9 + movd xmm9,DWORD PTR[((-4))+r9] + pslld xmm7,30 + paddd xmm11,xmm6 + + psrld xmm13,2 + paddd xmm11,xmm8 +DB 102,15,56,0,229 + movd xmm8,DWORD PTR[((-4))+r10] + por xmm13,xmm7 + movdqa xmm1,XMMWORD PTR[((0-128))+rax] + movd xmm7,DWORD PTR[((-4))+r11] + punpckldq xmm0,xmm8 + movdqa xmm8,xmm11 + paddd xmm10,xmm15 + punpckldq xmm9,xmm7 + movdqa xmm7,xmm12 + movdqa xmm6,xmm12 + pslld xmm8,5 + prefetcht0 [63+r8] + pandn xmm7,xmm14 + pand xmm6,xmm13 + punpckldq xmm0,xmm9 + movdqa xmm9,xmm11 + + movdqa XMMWORD PTR[(224-128)+rax],xmm4 + paddd xmm10,xmm4 + psrld xmm9,27 + pxor xmm6,xmm7 + movdqa xmm7,xmm12 + prefetcht0 [63+r9] + + por xmm8,xmm9 + pslld xmm7,30 + paddd xmm10,xmm6 + prefetcht0 [63+r10] + + psrld xmm12,2 + paddd xmm10,xmm8 +DB 102,15,56,0,197 + prefetcht0 [63+r11] + por xmm12,xmm7 + movdqa xmm2,XMMWORD PTR[((16-128))+rax] + pxor xmm1,xmm3 + movdqa xmm3,XMMWORD PTR[((32-128))+rax] + + movdqa xmm8,xmm10 + pxor xmm1,XMMWORD PTR[((128-128))+rax] + paddd xmm14,xmm15 + movdqa xmm7,xmm11 + pslld xmm8,5 + pxor xmm1,xmm3 + movdqa xmm6,xmm11 + pandn xmm7,xmm13 + movdqa xmm5,xmm1 + pand xmm6,xmm12 + movdqa xmm9,xmm10 + psrld xmm5,31 + paddd xmm1,xmm1 + + movdqa XMMWORD PTR[(240-128)+rax],xmm0 + paddd xmm14,xmm0 + psrld xmm9,27 + pxor xmm6,xmm7 + + movdqa xmm7,xmm11 + por xmm8,xmm9 + pslld xmm7,30 + paddd xmm14,xmm6 + + psrld xmm11,2 + paddd xmm14,xmm8 + por xmm1,xmm5 + por xmm11,xmm7 + pxor xmm2,xmm4 + movdqa xmm4,XMMWORD PTR[((48-128))+rax] + + movdqa xmm8,xmm14 + pxor xmm2,XMMWORD PTR[((144-128))+rax] + paddd xmm13,xmm15 + movdqa xmm7,xmm10 + pslld xmm8,5 + pxor xmm2,xmm4 + movdqa xmm6,xmm10 + pandn xmm7,xmm12 + movdqa xmm5,xmm2 + pand xmm6,xmm11 + movdqa xmm9,xmm14 + psrld xmm5,31 + paddd xmm2,xmm2 + + movdqa XMMWORD PTR[(0-128)+rax],xmm1 + paddd xmm13,xmm1 + psrld xmm9,27 + pxor xmm6,xmm7 + + movdqa xmm7,xmm10 + por xmm8,xmm9 + pslld xmm7,30 + paddd xmm13,xmm6 + + psrld xmm10,2 + paddd xmm13,xmm8 + por xmm2,xmm5 + por xmm10,xmm7 + pxor xmm3,xmm0 + movdqa xmm0,XMMWORD PTR[((64-128))+rax] + + movdqa xmm8,xmm13 + pxor xmm3,XMMWORD PTR[((160-128))+rax] + paddd xmm12,xmm15 + movdqa xmm7,xmm14 + pslld xmm8,5 + pxor xmm3,xmm0 + movdqa xmm6,xmm14 + pandn xmm7,xmm11 + movdqa xmm5,xmm3 + pand xmm6,xmm10 + movdqa xmm9,xmm13 + psrld xmm5,31 + paddd xmm3,xmm3 + + movdqa XMMWORD PTR[(16-128)+rax],xmm2 + paddd xmm12,xmm2 + psrld xmm9,27 + pxor xmm6,xmm7 + + movdqa xmm7,xmm14 + por xmm8,xmm9 + pslld xmm7,30 + paddd xmm12,xmm6 + + psrld xmm14,2 + paddd xmm12,xmm8 + por xmm3,xmm5 + por xmm14,xmm7 + pxor xmm4,xmm1 + movdqa xmm1,XMMWORD PTR[((80-128))+rax] + + movdqa xmm8,xmm12 + pxor xmm4,XMMWORD PTR[((176-128))+rax] + paddd xmm11,xmm15 + movdqa xmm7,xmm13 + pslld xmm8,5 + pxor xmm4,xmm1 + movdqa xmm6,xmm13 + pandn xmm7,xmm10 + movdqa xmm5,xmm4 + pand xmm6,xmm14 + movdqa xmm9,xmm12 + psrld xmm5,31 + paddd xmm4,xmm4 + + movdqa XMMWORD PTR[(32-128)+rax],xmm3 + paddd xmm11,xmm3 + psrld xmm9,27 + pxor xmm6,xmm7 + + movdqa xmm7,xmm13 + por xmm8,xmm9 + pslld xmm7,30 + paddd xmm11,xmm6 + + psrld xmm13,2 + paddd xmm11,xmm8 + por xmm4,xmm5 + por xmm13,xmm7 + pxor xmm0,xmm2 + movdqa xmm2,XMMWORD PTR[((96-128))+rax] + + movdqa xmm8,xmm11 + pxor xmm0,XMMWORD PTR[((192-128))+rax] + paddd xmm10,xmm15 + movdqa xmm7,xmm12 + pslld xmm8,5 + pxor xmm0,xmm2 + movdqa xmm6,xmm12 + pandn xmm7,xmm14 + movdqa xmm5,xmm0 + pand xmm6,xmm13 + movdqa xmm9,xmm11 + psrld xmm5,31 + paddd xmm0,xmm0 + + movdqa XMMWORD PTR[(48-128)+rax],xmm4 + paddd xmm10,xmm4 + psrld xmm9,27 + pxor xmm6,xmm7 + + movdqa xmm7,xmm12 + por xmm8,xmm9 + pslld xmm7,30 + paddd xmm10,xmm6 + + psrld xmm12,2 + paddd xmm10,xmm8 + por xmm0,xmm5 + por xmm12,xmm7 + movdqa xmm15,XMMWORD PTR[rbp] + pxor xmm1,xmm3 + movdqa xmm3,XMMWORD PTR[((112-128))+rax] + + movdqa xmm8,xmm10 + movdqa xmm6,xmm13 + pxor xmm1,XMMWORD PTR[((208-128))+rax] + paddd xmm14,xmm15 + pslld xmm8,5 + pxor xmm6,xmm11 + + movdqa xmm9,xmm10 + movdqa XMMWORD PTR[(64-128)+rax],xmm0 + paddd xmm14,xmm0 + pxor xmm1,xmm3 + psrld xmm9,27 + pxor xmm6,xmm12 + movdqa xmm7,xmm11 + + pslld xmm7,30 + movdqa xmm5,xmm1 + por xmm8,xmm9 + psrld xmm5,31 + paddd xmm14,xmm6 + paddd xmm1,xmm1 + + psrld xmm11,2 + paddd xmm14,xmm8 + por xmm1,xmm5 + por xmm11,xmm7 + pxor xmm2,xmm4 + movdqa xmm4,XMMWORD PTR[((128-128))+rax] + + movdqa xmm8,xmm14 + movdqa xmm6,xmm12 + pxor xmm2,XMMWORD PTR[((224-128))+rax] + paddd xmm13,xmm15 + pslld xmm8,5 + pxor xmm6,xmm10 + + movdqa xmm9,xmm14 + movdqa XMMWORD PTR[(80-128)+rax],xmm1 + paddd xmm13,xmm1 + pxor xmm2,xmm4 + psrld xmm9,27 + pxor xmm6,xmm11 + movdqa xmm7,xmm10 + + pslld xmm7,30 + movdqa xmm5,xmm2 + por xmm8,xmm9 + psrld xmm5,31 + paddd xmm13,xmm6 + paddd xmm2,xmm2 + + psrld xmm10,2 + paddd xmm13,xmm8 + por xmm2,xmm5 + por xmm10,xmm7 + pxor xmm3,xmm0 + movdqa xmm0,XMMWORD PTR[((144-128))+rax] + + movdqa xmm8,xmm13 + movdqa xmm6,xmm11 + pxor xmm3,XMMWORD PTR[((240-128))+rax] + paddd xmm12,xmm15 + pslld xmm8,5 + pxor xmm6,xmm14 + + movdqa xmm9,xmm13 + movdqa XMMWORD PTR[(96-128)+rax],xmm2 + paddd xmm12,xmm2 + pxor xmm3,xmm0 + psrld xmm9,27 + pxor xmm6,xmm10 + movdqa xmm7,xmm14 + + pslld xmm7,30 + movdqa xmm5,xmm3 + por xmm8,xmm9 + psrld xmm5,31 + paddd xmm12,xmm6 + paddd xmm3,xmm3 + + psrld xmm14,2 + paddd xmm12,xmm8 + por xmm3,xmm5 + por xmm14,xmm7 + pxor xmm4,xmm1 + movdqa xmm1,XMMWORD PTR[((160-128))+rax] + + movdqa xmm8,xmm12 + movdqa xmm6,xmm10 + pxor xmm4,XMMWORD PTR[((0-128))+rax] + paddd xmm11,xmm15 + pslld xmm8,5 + pxor xmm6,xmm13 + + movdqa xmm9,xmm12 + movdqa XMMWORD PTR[(112-128)+rax],xmm3 + paddd xmm11,xmm3 + pxor xmm4,xmm1 + psrld xmm9,27 + pxor xmm6,xmm14 + movdqa xmm7,xmm13 + + pslld xmm7,30 + movdqa xmm5,xmm4 + por xmm8,xmm9 + psrld xmm5,31 + paddd xmm11,xmm6 + paddd xmm4,xmm4 + + psrld xmm13,2 + paddd xmm11,xmm8 + por xmm4,xmm5 + por xmm13,xmm7 + pxor xmm0,xmm2 + movdqa xmm2,XMMWORD PTR[((176-128))+rax] + + movdqa xmm8,xmm11 + movdqa xmm6,xmm14 + pxor xmm0,XMMWORD PTR[((16-128))+rax] + paddd xmm10,xmm15 + pslld xmm8,5 + pxor xmm6,xmm12 + + movdqa xmm9,xmm11 + movdqa XMMWORD PTR[(128-128)+rax],xmm4 + paddd xmm10,xmm4 + pxor xmm0,xmm2 + psrld xmm9,27 + pxor xmm6,xmm13 + movdqa xmm7,xmm12 + + pslld xmm7,30 + movdqa xmm5,xmm0 + por xmm8,xmm9 + psrld xmm5,31 + paddd xmm10,xmm6 + paddd xmm0,xmm0 + + psrld xmm12,2 + paddd xmm10,xmm8 + por xmm0,xmm5 + por xmm12,xmm7 + pxor xmm1,xmm3 + movdqa xmm3,XMMWORD PTR[((192-128))+rax] + + movdqa xmm8,xmm10 + movdqa xmm6,xmm13 + pxor xmm1,XMMWORD PTR[((32-128))+rax] + paddd xmm14,xmm15 + pslld xmm8,5 + pxor xmm6,xmm11 + + movdqa xmm9,xmm10 + movdqa XMMWORD PTR[(144-128)+rax],xmm0 + paddd xmm14,xmm0 + pxor xmm1,xmm3 + psrld xmm9,27 + pxor xmm6,xmm12 + movdqa xmm7,xmm11 + + pslld xmm7,30 + movdqa xmm5,xmm1 + por xmm8,xmm9 + psrld xmm5,31 + paddd xmm14,xmm6 + paddd xmm1,xmm1 + + psrld xmm11,2 + paddd xmm14,xmm8 + por xmm1,xmm5 + por xmm11,xmm7 + pxor xmm2,xmm4 + movdqa xmm4,XMMWORD PTR[((208-128))+rax] + + movdqa xmm8,xmm14 + movdqa xmm6,xmm12 + pxor xmm2,XMMWORD PTR[((48-128))+rax] + paddd xmm13,xmm15 + pslld xmm8,5 + pxor xmm6,xmm10 + + movdqa xmm9,xmm14 + movdqa XMMWORD PTR[(160-128)+rax],xmm1 + paddd xmm13,xmm1 + pxor xmm2,xmm4 + psrld xmm9,27 + pxor xmm6,xmm11 + movdqa xmm7,xmm10 + + pslld xmm7,30 + movdqa xmm5,xmm2 + por xmm8,xmm9 + psrld xmm5,31 + paddd xmm13,xmm6 + paddd xmm2,xmm2 + + psrld xmm10,2 + paddd xmm13,xmm8 + por xmm2,xmm5 + por xmm10,xmm7 + pxor xmm3,xmm0 + movdqa xmm0,XMMWORD PTR[((224-128))+rax] + + movdqa xmm8,xmm13 + movdqa xmm6,xmm11 + pxor xmm3,XMMWORD PTR[((64-128))+rax] + paddd xmm12,xmm15 + pslld xmm8,5 + pxor xmm6,xmm14 + + movdqa xmm9,xmm13 + movdqa XMMWORD PTR[(176-128)+rax],xmm2 + paddd xmm12,xmm2 + pxor xmm3,xmm0 + psrld xmm9,27 + pxor xmm6,xmm10 + movdqa xmm7,xmm14 + + pslld xmm7,30 + movdqa xmm5,xmm3 + por xmm8,xmm9 + psrld xmm5,31 + paddd xmm12,xmm6 + paddd xmm3,xmm3 + + psrld xmm14,2 + paddd xmm12,xmm8 + por xmm3,xmm5 + por xmm14,xmm7 + pxor xmm4,xmm1 + movdqa xmm1,XMMWORD PTR[((240-128))+rax] + + movdqa xmm8,xmm12 + movdqa xmm6,xmm10 + pxor xmm4,XMMWORD PTR[((80-128))+rax] + paddd xmm11,xmm15 + pslld xmm8,5 + pxor xmm6,xmm13 + + movdqa xmm9,xmm12 + movdqa XMMWORD PTR[(192-128)+rax],xmm3 + paddd xmm11,xmm3 + pxor xmm4,xmm1 + psrld xmm9,27 + pxor xmm6,xmm14 + movdqa xmm7,xmm13 + + pslld xmm7,30 + movdqa xmm5,xmm4 + por xmm8,xmm9 + psrld xmm5,31 + paddd xmm11,xmm6 + paddd xmm4,xmm4 + + psrld xmm13,2 + paddd xmm11,xmm8 + por xmm4,xmm5 + por xmm13,xmm7 + pxor xmm0,xmm2 + movdqa xmm2,XMMWORD PTR[((0-128))+rax] + + movdqa xmm8,xmm11 + movdqa xmm6,xmm14 + pxor xmm0,XMMWORD PTR[((96-128))+rax] + paddd xmm10,xmm15 + pslld xmm8,5 + pxor xmm6,xmm12 + + movdqa xmm9,xmm11 + movdqa XMMWORD PTR[(208-128)+rax],xmm4 + paddd xmm10,xmm4 + pxor xmm0,xmm2 + psrld xmm9,27 + pxor xmm6,xmm13 + movdqa xmm7,xmm12 + + pslld xmm7,30 + movdqa xmm5,xmm0 + por xmm8,xmm9 + psrld xmm5,31 + paddd xmm10,xmm6 + paddd xmm0,xmm0 + + psrld xmm12,2 + paddd xmm10,xmm8 + por xmm0,xmm5 + por xmm12,xmm7 + pxor xmm1,xmm3 + movdqa xmm3,XMMWORD PTR[((16-128))+rax] + + movdqa xmm8,xmm10 + movdqa xmm6,xmm13 + pxor xmm1,XMMWORD PTR[((112-128))+rax] + paddd xmm14,xmm15 + pslld xmm8,5 + pxor xmm6,xmm11 + + movdqa xmm9,xmm10 + movdqa XMMWORD PTR[(224-128)+rax],xmm0 + paddd xmm14,xmm0 + pxor xmm1,xmm3 + psrld xmm9,27 + pxor xmm6,xmm12 + movdqa xmm7,xmm11 + + pslld xmm7,30 + movdqa xmm5,xmm1 + por xmm8,xmm9 + psrld xmm5,31 + paddd xmm14,xmm6 + paddd xmm1,xmm1 + + psrld xmm11,2 + paddd xmm14,xmm8 + por xmm1,xmm5 + por xmm11,xmm7 + pxor xmm2,xmm4 + movdqa xmm4,XMMWORD PTR[((32-128))+rax] + + movdqa xmm8,xmm14 + movdqa xmm6,xmm12 + pxor xmm2,XMMWORD PTR[((128-128))+rax] + paddd xmm13,xmm15 + pslld xmm8,5 + pxor xmm6,xmm10 + + movdqa xmm9,xmm14 + movdqa XMMWORD PTR[(240-128)+rax],xmm1 + paddd xmm13,xmm1 + pxor xmm2,xmm4 + psrld xmm9,27 + pxor xmm6,xmm11 + movdqa xmm7,xmm10 + + pslld xmm7,30 + movdqa xmm5,xmm2 + por xmm8,xmm9 + psrld xmm5,31 + paddd xmm13,xmm6 + paddd xmm2,xmm2 + + psrld xmm10,2 + paddd xmm13,xmm8 + por xmm2,xmm5 + por xmm10,xmm7 + pxor xmm3,xmm0 + movdqa xmm0,XMMWORD PTR[((48-128))+rax] + + movdqa xmm8,xmm13 + movdqa xmm6,xmm11 + pxor xmm3,XMMWORD PTR[((144-128))+rax] + paddd xmm12,xmm15 + pslld xmm8,5 + pxor xmm6,xmm14 + + movdqa xmm9,xmm13 + movdqa XMMWORD PTR[(0-128)+rax],xmm2 + paddd xmm12,xmm2 + pxor xmm3,xmm0 + psrld xmm9,27 + pxor xmm6,xmm10 + movdqa xmm7,xmm14 + + pslld xmm7,30 + movdqa xmm5,xmm3 + por xmm8,xmm9 + psrld xmm5,31 + paddd xmm12,xmm6 + paddd xmm3,xmm3 + + psrld xmm14,2 + paddd xmm12,xmm8 + por xmm3,xmm5 + por xmm14,xmm7 + pxor xmm4,xmm1 + movdqa xmm1,XMMWORD PTR[((64-128))+rax] + + movdqa xmm8,xmm12 + movdqa xmm6,xmm10 + pxor xmm4,XMMWORD PTR[((160-128))+rax] + paddd xmm11,xmm15 + pslld xmm8,5 + pxor xmm6,xmm13 + + movdqa xmm9,xmm12 + movdqa XMMWORD PTR[(16-128)+rax],xmm3 + paddd xmm11,xmm3 + pxor xmm4,xmm1 + psrld xmm9,27 + pxor xmm6,xmm14 + movdqa xmm7,xmm13 + + pslld xmm7,30 + movdqa xmm5,xmm4 + por xmm8,xmm9 + psrld xmm5,31 + paddd xmm11,xmm6 + paddd xmm4,xmm4 + + psrld xmm13,2 + paddd xmm11,xmm8 + por xmm4,xmm5 + por xmm13,xmm7 + pxor xmm0,xmm2 + movdqa xmm2,XMMWORD PTR[((80-128))+rax] + + movdqa xmm8,xmm11 + movdqa xmm6,xmm14 + pxor xmm0,XMMWORD PTR[((176-128))+rax] + paddd xmm10,xmm15 + pslld xmm8,5 + pxor xmm6,xmm12 + + movdqa xmm9,xmm11 + movdqa XMMWORD PTR[(32-128)+rax],xmm4 + paddd xmm10,xmm4 + pxor xmm0,xmm2 + psrld xmm9,27 + pxor xmm6,xmm13 + movdqa xmm7,xmm12 + + pslld xmm7,30 + movdqa xmm5,xmm0 + por xmm8,xmm9 + psrld xmm5,31 + paddd xmm10,xmm6 + paddd xmm0,xmm0 + + psrld xmm12,2 + paddd xmm10,xmm8 + por xmm0,xmm5 + por xmm12,xmm7 + pxor xmm1,xmm3 + movdqa xmm3,XMMWORD PTR[((96-128))+rax] + + movdqa xmm8,xmm10 + movdqa xmm6,xmm13 + pxor xmm1,XMMWORD PTR[((192-128))+rax] + paddd xmm14,xmm15 + pslld xmm8,5 + pxor xmm6,xmm11 + + movdqa xmm9,xmm10 + movdqa XMMWORD PTR[(48-128)+rax],xmm0 + paddd xmm14,xmm0 + pxor xmm1,xmm3 + psrld xmm9,27 + pxor xmm6,xmm12 + movdqa xmm7,xmm11 + + pslld xmm7,30 + movdqa xmm5,xmm1 + por xmm8,xmm9 + psrld xmm5,31 + paddd xmm14,xmm6 + paddd xmm1,xmm1 + + psrld xmm11,2 + paddd xmm14,xmm8 + por xmm1,xmm5 + por xmm11,xmm7 + pxor xmm2,xmm4 + movdqa xmm4,XMMWORD PTR[((112-128))+rax] + + movdqa xmm8,xmm14 + movdqa xmm6,xmm12 + pxor xmm2,XMMWORD PTR[((208-128))+rax] + paddd xmm13,xmm15 + pslld xmm8,5 + pxor xmm6,xmm10 + + movdqa xmm9,xmm14 + movdqa XMMWORD PTR[(64-128)+rax],xmm1 + paddd xmm13,xmm1 + pxor xmm2,xmm4 + psrld xmm9,27 + pxor xmm6,xmm11 + movdqa xmm7,xmm10 + + pslld xmm7,30 + movdqa xmm5,xmm2 + por xmm8,xmm9 + psrld xmm5,31 + paddd xmm13,xmm6 + paddd xmm2,xmm2 + + psrld xmm10,2 + paddd xmm13,xmm8 + por xmm2,xmm5 + por xmm10,xmm7 + pxor xmm3,xmm0 + movdqa xmm0,XMMWORD PTR[((128-128))+rax] + + movdqa xmm8,xmm13 + movdqa xmm6,xmm11 + pxor xmm3,XMMWORD PTR[((224-128))+rax] + paddd xmm12,xmm15 + pslld xmm8,5 + pxor xmm6,xmm14 + + movdqa xmm9,xmm13 + movdqa XMMWORD PTR[(80-128)+rax],xmm2 + paddd xmm12,xmm2 + pxor xmm3,xmm0 + psrld xmm9,27 + pxor xmm6,xmm10 + movdqa xmm7,xmm14 + + pslld xmm7,30 + movdqa xmm5,xmm3 + por xmm8,xmm9 + psrld xmm5,31 + paddd xmm12,xmm6 + paddd xmm3,xmm3 + + psrld xmm14,2 + paddd xmm12,xmm8 + por xmm3,xmm5 + por xmm14,xmm7 + pxor xmm4,xmm1 + movdqa xmm1,XMMWORD PTR[((144-128))+rax] + + movdqa xmm8,xmm12 + movdqa xmm6,xmm10 + pxor xmm4,XMMWORD PTR[((240-128))+rax] + paddd xmm11,xmm15 + pslld xmm8,5 + pxor xmm6,xmm13 + + movdqa xmm9,xmm12 + movdqa XMMWORD PTR[(96-128)+rax],xmm3 + paddd xmm11,xmm3 + pxor xmm4,xmm1 + psrld xmm9,27 + pxor xmm6,xmm14 + movdqa xmm7,xmm13 + + pslld xmm7,30 + movdqa xmm5,xmm4 + por xmm8,xmm9 + psrld xmm5,31 + paddd xmm11,xmm6 + paddd xmm4,xmm4 + + psrld xmm13,2 + paddd xmm11,xmm8 + por xmm4,xmm5 + por xmm13,xmm7 + pxor xmm0,xmm2 + movdqa xmm2,XMMWORD PTR[((160-128))+rax] + + movdqa xmm8,xmm11 + movdqa xmm6,xmm14 + pxor xmm0,XMMWORD PTR[((0-128))+rax] + paddd xmm10,xmm15 + pslld xmm8,5 + pxor xmm6,xmm12 + + movdqa xmm9,xmm11 + movdqa XMMWORD PTR[(112-128)+rax],xmm4 + paddd xmm10,xmm4 + pxor xmm0,xmm2 + psrld xmm9,27 + pxor xmm6,xmm13 + movdqa xmm7,xmm12 + + pslld xmm7,30 + movdqa xmm5,xmm0 + por xmm8,xmm9 + psrld xmm5,31 + paddd xmm10,xmm6 + paddd xmm0,xmm0 + + psrld xmm12,2 + paddd xmm10,xmm8 + por xmm0,xmm5 + por xmm12,xmm7 + movdqa xmm15,XMMWORD PTR[32+rbp] + pxor xmm1,xmm3 + movdqa xmm3,XMMWORD PTR[((176-128))+rax] + + movdqa xmm8,xmm10 + movdqa xmm7,xmm13 + pxor xmm1,XMMWORD PTR[((16-128))+rax] + pxor xmm1,xmm3 + paddd xmm14,xmm15 + pslld xmm8,5 + movdqa xmm9,xmm10 + pand xmm7,xmm12 + + movdqa xmm6,xmm13 + movdqa xmm5,xmm1 + psrld xmm9,27 + paddd xmm14,xmm7 + pxor xmm6,xmm12 + + movdqa XMMWORD PTR[(128-128)+rax],xmm0 + paddd xmm14,xmm0 + por xmm8,xmm9 + psrld xmm5,31 + pand xmm6,xmm11 + movdqa xmm7,xmm11 + + pslld xmm7,30 + paddd xmm1,xmm1 + paddd xmm14,xmm6 + + psrld xmm11,2 + paddd xmm14,xmm8 + por xmm1,xmm5 + por xmm11,xmm7 + pxor xmm2,xmm4 + movdqa xmm4,XMMWORD PTR[((192-128))+rax] + + movdqa xmm8,xmm14 + movdqa xmm7,xmm12 + pxor xmm2,XMMWORD PTR[((32-128))+rax] + pxor xmm2,xmm4 + paddd xmm13,xmm15 + pslld xmm8,5 + movdqa xmm9,xmm14 + pand xmm7,xmm11 + + movdqa xmm6,xmm12 + movdqa xmm5,xmm2 + psrld xmm9,27 + paddd xmm13,xmm7 + pxor xmm6,xmm11 + + movdqa XMMWORD PTR[(144-128)+rax],xmm1 + paddd xmm13,xmm1 + por xmm8,xmm9 + psrld xmm5,31 + pand xmm6,xmm10 + movdqa xmm7,xmm10 + + pslld xmm7,30 + paddd xmm2,xmm2 + paddd xmm13,xmm6 + + psrld xmm10,2 + paddd xmm13,xmm8 + por xmm2,xmm5 + por xmm10,xmm7 + pxor xmm3,xmm0 + movdqa xmm0,XMMWORD PTR[((208-128))+rax] + + movdqa xmm8,xmm13 + movdqa xmm7,xmm11 + pxor xmm3,XMMWORD PTR[((48-128))+rax] + pxor xmm3,xmm0 + paddd xmm12,xmm15 + pslld xmm8,5 + movdqa xmm9,xmm13 + pand xmm7,xmm10 + + movdqa xmm6,xmm11 + movdqa xmm5,xmm3 + psrld xmm9,27 + paddd xmm12,xmm7 + pxor xmm6,xmm10 + + movdqa XMMWORD PTR[(160-128)+rax],xmm2 + paddd xmm12,xmm2 + por xmm8,xmm9 + psrld xmm5,31 + pand xmm6,xmm14 + movdqa xmm7,xmm14 + + pslld xmm7,30 + paddd xmm3,xmm3 + paddd xmm12,xmm6 + + psrld xmm14,2 + paddd xmm12,xmm8 + por xmm3,xmm5 + por xmm14,xmm7 + pxor xmm4,xmm1 + movdqa xmm1,XMMWORD PTR[((224-128))+rax] + + movdqa xmm8,xmm12 + movdqa xmm7,xmm10 + pxor xmm4,XMMWORD PTR[((64-128))+rax] + pxor xmm4,xmm1 + paddd xmm11,xmm15 + pslld xmm8,5 + movdqa xmm9,xmm12 + pand xmm7,xmm14 + + movdqa xmm6,xmm10 + movdqa xmm5,xmm4 + psrld xmm9,27 + paddd xmm11,xmm7 + pxor xmm6,xmm14 + + movdqa XMMWORD PTR[(176-128)+rax],xmm3 + paddd xmm11,xmm3 + por xmm8,xmm9 + psrld xmm5,31 + pand xmm6,xmm13 + movdqa xmm7,xmm13 + + pslld xmm7,30 + paddd xmm4,xmm4 + paddd xmm11,xmm6 + + psrld xmm13,2 + paddd xmm11,xmm8 + por xmm4,xmm5 + por xmm13,xmm7 + pxor xmm0,xmm2 + movdqa xmm2,XMMWORD PTR[((240-128))+rax] + + movdqa xmm8,xmm11 + movdqa xmm7,xmm14 + pxor xmm0,XMMWORD PTR[((80-128))+rax] + pxor xmm0,xmm2 + paddd xmm10,xmm15 + pslld xmm8,5 + movdqa xmm9,xmm11 + pand xmm7,xmm13 + + movdqa xmm6,xmm14 + movdqa xmm5,xmm0 + psrld xmm9,27 + paddd xmm10,xmm7 + pxor xmm6,xmm13 + + movdqa XMMWORD PTR[(192-128)+rax],xmm4 + paddd xmm10,xmm4 + por xmm8,xmm9 + psrld xmm5,31 + pand xmm6,xmm12 + movdqa xmm7,xmm12 + + pslld xmm7,30 + paddd xmm0,xmm0 + paddd xmm10,xmm6 + + psrld xmm12,2 + paddd xmm10,xmm8 + por xmm0,xmm5 + por xmm12,xmm7 + pxor xmm1,xmm3 + movdqa xmm3,XMMWORD PTR[((0-128))+rax] + + movdqa xmm8,xmm10 + movdqa xmm7,xmm13 + pxor xmm1,XMMWORD PTR[((96-128))+rax] + pxor xmm1,xmm3 + paddd xmm14,xmm15 + pslld xmm8,5 + movdqa xmm9,xmm10 + pand xmm7,xmm12 + + movdqa xmm6,xmm13 + movdqa xmm5,xmm1 + psrld xmm9,27 + paddd xmm14,xmm7 + pxor xmm6,xmm12 + + movdqa XMMWORD PTR[(208-128)+rax],xmm0 + paddd xmm14,xmm0 + por xmm8,xmm9 + psrld xmm5,31 + pand xmm6,xmm11 + movdqa xmm7,xmm11 + + pslld xmm7,30 + paddd xmm1,xmm1 + paddd xmm14,xmm6 + + psrld xmm11,2 + paddd xmm14,xmm8 + por xmm1,xmm5 + por xmm11,xmm7 + pxor xmm2,xmm4 + movdqa xmm4,XMMWORD PTR[((16-128))+rax] + + movdqa xmm8,xmm14 + movdqa xmm7,xmm12 + pxor xmm2,XMMWORD PTR[((112-128))+rax] + pxor xmm2,xmm4 + paddd xmm13,xmm15 + pslld xmm8,5 + movdqa xmm9,xmm14 + pand xmm7,xmm11 + + movdqa xmm6,xmm12 + movdqa xmm5,xmm2 + psrld xmm9,27 + paddd xmm13,xmm7 + pxor xmm6,xmm11 + + movdqa XMMWORD PTR[(224-128)+rax],xmm1 + paddd xmm13,xmm1 + por xmm8,xmm9 + psrld xmm5,31 + pand xmm6,xmm10 + movdqa xmm7,xmm10 + + pslld xmm7,30 + paddd xmm2,xmm2 + paddd xmm13,xmm6 + + psrld xmm10,2 + paddd xmm13,xmm8 + por xmm2,xmm5 + por xmm10,xmm7 + pxor xmm3,xmm0 + movdqa xmm0,XMMWORD PTR[((32-128))+rax] + + movdqa xmm8,xmm13 + movdqa xmm7,xmm11 + pxor xmm3,XMMWORD PTR[((128-128))+rax] + pxor xmm3,xmm0 + paddd xmm12,xmm15 + pslld xmm8,5 + movdqa xmm9,xmm13 + pand xmm7,xmm10 + + movdqa xmm6,xmm11 + movdqa xmm5,xmm3 + psrld xmm9,27 + paddd xmm12,xmm7 + pxor xmm6,xmm10 + + movdqa XMMWORD PTR[(240-128)+rax],xmm2 + paddd xmm12,xmm2 + por xmm8,xmm9 + psrld xmm5,31 + pand xmm6,xmm14 + movdqa xmm7,xmm14 + + pslld xmm7,30 + paddd xmm3,xmm3 + paddd xmm12,xmm6 + + psrld xmm14,2 + paddd xmm12,xmm8 + por xmm3,xmm5 + por xmm14,xmm7 + pxor xmm4,xmm1 + movdqa xmm1,XMMWORD PTR[((48-128))+rax] + + movdqa xmm8,xmm12 + movdqa xmm7,xmm10 + pxor xmm4,XMMWORD PTR[((144-128))+rax] + pxor xmm4,xmm1 + paddd xmm11,xmm15 + pslld xmm8,5 + movdqa xmm9,xmm12 + pand xmm7,xmm14 + + movdqa xmm6,xmm10 + movdqa xmm5,xmm4 + psrld xmm9,27 + paddd xmm11,xmm7 + pxor xmm6,xmm14 + + movdqa XMMWORD PTR[(0-128)+rax],xmm3 + paddd xmm11,xmm3 + por xmm8,xmm9 + psrld xmm5,31 + pand xmm6,xmm13 + movdqa xmm7,xmm13 + + pslld xmm7,30 + paddd xmm4,xmm4 + paddd xmm11,xmm6 + + psrld xmm13,2 + paddd xmm11,xmm8 + por xmm4,xmm5 + por xmm13,xmm7 + pxor xmm0,xmm2 + movdqa xmm2,XMMWORD PTR[((64-128))+rax] + + movdqa xmm8,xmm11 + movdqa xmm7,xmm14 + pxor xmm0,XMMWORD PTR[((160-128))+rax] + pxor xmm0,xmm2 + paddd xmm10,xmm15 + pslld xmm8,5 + movdqa xmm9,xmm11 + pand xmm7,xmm13 + + movdqa xmm6,xmm14 + movdqa xmm5,xmm0 + psrld xmm9,27 + paddd xmm10,xmm7 + pxor xmm6,xmm13 + + movdqa XMMWORD PTR[(16-128)+rax],xmm4 + paddd xmm10,xmm4 + por xmm8,xmm9 + psrld xmm5,31 + pand xmm6,xmm12 + movdqa xmm7,xmm12 + + pslld xmm7,30 + paddd xmm0,xmm0 + paddd xmm10,xmm6 + + psrld xmm12,2 + paddd xmm10,xmm8 + por xmm0,xmm5 + por xmm12,xmm7 + pxor xmm1,xmm3 + movdqa xmm3,XMMWORD PTR[((80-128))+rax] + + movdqa xmm8,xmm10 + movdqa xmm7,xmm13 + pxor xmm1,XMMWORD PTR[((176-128))+rax] + pxor xmm1,xmm3 + paddd xmm14,xmm15 + pslld xmm8,5 + movdqa xmm9,xmm10 + pand xmm7,xmm12 + + movdqa xmm6,xmm13 + movdqa xmm5,xmm1 + psrld xmm9,27 + paddd xmm14,xmm7 + pxor xmm6,xmm12 + + movdqa XMMWORD PTR[(32-128)+rax],xmm0 + paddd xmm14,xmm0 + por xmm8,xmm9 + psrld xmm5,31 + pand xmm6,xmm11 + movdqa xmm7,xmm11 + + pslld xmm7,30 + paddd xmm1,xmm1 + paddd xmm14,xmm6 + + psrld xmm11,2 + paddd xmm14,xmm8 + por xmm1,xmm5 + por xmm11,xmm7 + pxor xmm2,xmm4 + movdqa xmm4,XMMWORD PTR[((96-128))+rax] + + movdqa xmm8,xmm14 + movdqa xmm7,xmm12 + pxor xmm2,XMMWORD PTR[((192-128))+rax] + pxor xmm2,xmm4 + paddd xmm13,xmm15 + pslld xmm8,5 + movdqa xmm9,xmm14 + pand xmm7,xmm11 + + movdqa xmm6,xmm12 + movdqa xmm5,xmm2 + psrld xmm9,27 + paddd xmm13,xmm7 + pxor xmm6,xmm11 + + movdqa XMMWORD PTR[(48-128)+rax],xmm1 + paddd xmm13,xmm1 + por xmm8,xmm9 + psrld xmm5,31 + pand xmm6,xmm10 + movdqa xmm7,xmm10 + + pslld xmm7,30 + paddd xmm2,xmm2 + paddd xmm13,xmm6 + + psrld xmm10,2 + paddd xmm13,xmm8 + por xmm2,xmm5 + por xmm10,xmm7 + pxor xmm3,xmm0 + movdqa xmm0,XMMWORD PTR[((112-128))+rax] + + movdqa xmm8,xmm13 + movdqa xmm7,xmm11 + pxor xmm3,XMMWORD PTR[((208-128))+rax] + pxor xmm3,xmm0 + paddd xmm12,xmm15 + pslld xmm8,5 + movdqa xmm9,xmm13 + pand xmm7,xmm10 + + movdqa xmm6,xmm11 + movdqa xmm5,xmm3 + psrld xmm9,27 + paddd xmm12,xmm7 + pxor xmm6,xmm10 + + movdqa XMMWORD PTR[(64-128)+rax],xmm2 + paddd xmm12,xmm2 + por xmm8,xmm9 + psrld xmm5,31 + pand xmm6,xmm14 + movdqa xmm7,xmm14 + + pslld xmm7,30 + paddd xmm3,xmm3 + paddd xmm12,xmm6 + + psrld xmm14,2 + paddd xmm12,xmm8 + por xmm3,xmm5 + por xmm14,xmm7 + pxor xmm4,xmm1 + movdqa xmm1,XMMWORD PTR[((128-128))+rax] + + movdqa xmm8,xmm12 + movdqa xmm7,xmm10 + pxor xmm4,XMMWORD PTR[((224-128))+rax] + pxor xmm4,xmm1 + paddd xmm11,xmm15 + pslld xmm8,5 + movdqa xmm9,xmm12 + pand xmm7,xmm14 + + movdqa xmm6,xmm10 + movdqa xmm5,xmm4 + psrld xmm9,27 + paddd xmm11,xmm7 + pxor xmm6,xmm14 + + movdqa XMMWORD PTR[(80-128)+rax],xmm3 + paddd xmm11,xmm3 + por xmm8,xmm9 + psrld xmm5,31 + pand xmm6,xmm13 + movdqa xmm7,xmm13 + + pslld xmm7,30 + paddd xmm4,xmm4 + paddd xmm11,xmm6 + + psrld xmm13,2 + paddd xmm11,xmm8 + por xmm4,xmm5 + por xmm13,xmm7 + pxor xmm0,xmm2 + movdqa xmm2,XMMWORD PTR[((144-128))+rax] + + movdqa xmm8,xmm11 + movdqa xmm7,xmm14 + pxor xmm0,XMMWORD PTR[((240-128))+rax] + pxor xmm0,xmm2 + paddd xmm10,xmm15 + pslld xmm8,5 + movdqa xmm9,xmm11 + pand xmm7,xmm13 + + movdqa xmm6,xmm14 + movdqa xmm5,xmm0 + psrld xmm9,27 + paddd xmm10,xmm7 + pxor xmm6,xmm13 + + movdqa XMMWORD PTR[(96-128)+rax],xmm4 + paddd xmm10,xmm4 + por xmm8,xmm9 + psrld xmm5,31 + pand xmm6,xmm12 + movdqa xmm7,xmm12 + + pslld xmm7,30 + paddd xmm0,xmm0 + paddd xmm10,xmm6 + + psrld xmm12,2 + paddd xmm10,xmm8 + por xmm0,xmm5 + por xmm12,xmm7 + pxor xmm1,xmm3 + movdqa xmm3,XMMWORD PTR[((160-128))+rax] + + movdqa xmm8,xmm10 + movdqa xmm7,xmm13 + pxor xmm1,XMMWORD PTR[((0-128))+rax] + pxor xmm1,xmm3 + paddd xmm14,xmm15 + pslld xmm8,5 + movdqa xmm9,xmm10 + pand xmm7,xmm12 + + movdqa xmm6,xmm13 + movdqa xmm5,xmm1 + psrld xmm9,27 + paddd xmm14,xmm7 + pxor xmm6,xmm12 + + movdqa XMMWORD PTR[(112-128)+rax],xmm0 + paddd xmm14,xmm0 + por xmm8,xmm9 + psrld xmm5,31 + pand xmm6,xmm11 + movdqa xmm7,xmm11 + + pslld xmm7,30 + paddd xmm1,xmm1 + paddd xmm14,xmm6 + + psrld xmm11,2 + paddd xmm14,xmm8 + por xmm1,xmm5 + por xmm11,xmm7 + pxor xmm2,xmm4 + movdqa xmm4,XMMWORD PTR[((176-128))+rax] + + movdqa xmm8,xmm14 + movdqa xmm7,xmm12 + pxor xmm2,XMMWORD PTR[((16-128))+rax] + pxor xmm2,xmm4 + paddd xmm13,xmm15 + pslld xmm8,5 + movdqa xmm9,xmm14 + pand xmm7,xmm11 + + movdqa xmm6,xmm12 + movdqa xmm5,xmm2 + psrld xmm9,27 + paddd xmm13,xmm7 + pxor xmm6,xmm11 + + movdqa XMMWORD PTR[(128-128)+rax],xmm1 + paddd xmm13,xmm1 + por xmm8,xmm9 + psrld xmm5,31 + pand xmm6,xmm10 + movdqa xmm7,xmm10 + + pslld xmm7,30 + paddd xmm2,xmm2 + paddd xmm13,xmm6 + + psrld xmm10,2 + paddd xmm13,xmm8 + por xmm2,xmm5 + por xmm10,xmm7 + pxor xmm3,xmm0 + movdqa xmm0,XMMWORD PTR[((192-128))+rax] + + movdqa xmm8,xmm13 + movdqa xmm7,xmm11 + pxor xmm3,XMMWORD PTR[((32-128))+rax] + pxor xmm3,xmm0 + paddd xmm12,xmm15 + pslld xmm8,5 + movdqa xmm9,xmm13 + pand xmm7,xmm10 + + movdqa xmm6,xmm11 + movdqa xmm5,xmm3 + psrld xmm9,27 + paddd xmm12,xmm7 + pxor xmm6,xmm10 + + movdqa XMMWORD PTR[(144-128)+rax],xmm2 + paddd xmm12,xmm2 + por xmm8,xmm9 + psrld xmm5,31 + pand xmm6,xmm14 + movdqa xmm7,xmm14 + + pslld xmm7,30 + paddd xmm3,xmm3 + paddd xmm12,xmm6 + + psrld xmm14,2 + paddd xmm12,xmm8 + por xmm3,xmm5 + por xmm14,xmm7 + pxor xmm4,xmm1 + movdqa xmm1,XMMWORD PTR[((208-128))+rax] + + movdqa xmm8,xmm12 + movdqa xmm7,xmm10 + pxor xmm4,XMMWORD PTR[((48-128))+rax] + pxor xmm4,xmm1 + paddd xmm11,xmm15 + pslld xmm8,5 + movdqa xmm9,xmm12 + pand xmm7,xmm14 + + movdqa xmm6,xmm10 + movdqa xmm5,xmm4 + psrld xmm9,27 + paddd xmm11,xmm7 + pxor xmm6,xmm14 + + movdqa XMMWORD PTR[(160-128)+rax],xmm3 + paddd xmm11,xmm3 + por xmm8,xmm9 + psrld xmm5,31 + pand xmm6,xmm13 + movdqa xmm7,xmm13 + + pslld xmm7,30 + paddd xmm4,xmm4 + paddd xmm11,xmm6 + + psrld xmm13,2 + paddd xmm11,xmm8 + por xmm4,xmm5 + por xmm13,xmm7 + pxor xmm0,xmm2 + movdqa xmm2,XMMWORD PTR[((224-128))+rax] + + movdqa xmm8,xmm11 + movdqa xmm7,xmm14 + pxor xmm0,XMMWORD PTR[((64-128))+rax] + pxor xmm0,xmm2 + paddd xmm10,xmm15 + pslld xmm8,5 + movdqa xmm9,xmm11 + pand xmm7,xmm13 + + movdqa xmm6,xmm14 + movdqa xmm5,xmm0 + psrld xmm9,27 + paddd xmm10,xmm7 + pxor xmm6,xmm13 + + movdqa XMMWORD PTR[(176-128)+rax],xmm4 + paddd xmm10,xmm4 + por xmm8,xmm9 + psrld xmm5,31 + pand xmm6,xmm12 + movdqa xmm7,xmm12 + + pslld xmm7,30 + paddd xmm0,xmm0 + paddd xmm10,xmm6 + + psrld xmm12,2 + paddd xmm10,xmm8 + por xmm0,xmm5 + por xmm12,xmm7 + movdqa xmm15,XMMWORD PTR[64+rbp] + pxor xmm1,xmm3 + movdqa xmm3,XMMWORD PTR[((240-128))+rax] + + movdqa xmm8,xmm10 + movdqa xmm6,xmm13 + pxor xmm1,XMMWORD PTR[((80-128))+rax] + paddd xmm14,xmm15 + pslld xmm8,5 + pxor xmm6,xmm11 + + movdqa xmm9,xmm10 + movdqa XMMWORD PTR[(192-128)+rax],xmm0 + paddd xmm14,xmm0 + pxor xmm1,xmm3 + psrld xmm9,27 + pxor xmm6,xmm12 + movdqa xmm7,xmm11 + + pslld xmm7,30 + movdqa xmm5,xmm1 + por xmm8,xmm9 + psrld xmm5,31 + paddd xmm14,xmm6 + paddd xmm1,xmm1 + + psrld xmm11,2 + paddd xmm14,xmm8 + por xmm1,xmm5 + por xmm11,xmm7 + pxor xmm2,xmm4 + movdqa xmm4,XMMWORD PTR[((0-128))+rax] + + movdqa xmm8,xmm14 + movdqa xmm6,xmm12 + pxor xmm2,XMMWORD PTR[((96-128))+rax] + paddd xmm13,xmm15 + pslld xmm8,5 + pxor xmm6,xmm10 + + movdqa xmm9,xmm14 + movdqa XMMWORD PTR[(208-128)+rax],xmm1 + paddd xmm13,xmm1 + pxor xmm2,xmm4 + psrld xmm9,27 + pxor xmm6,xmm11 + movdqa xmm7,xmm10 + + pslld xmm7,30 + movdqa xmm5,xmm2 + por xmm8,xmm9 + psrld xmm5,31 + paddd xmm13,xmm6 + paddd xmm2,xmm2 + + psrld xmm10,2 + paddd xmm13,xmm8 + por xmm2,xmm5 + por xmm10,xmm7 + pxor xmm3,xmm0 + movdqa xmm0,XMMWORD PTR[((16-128))+rax] + + movdqa xmm8,xmm13 + movdqa xmm6,xmm11 + pxor xmm3,XMMWORD PTR[((112-128))+rax] + paddd xmm12,xmm15 + pslld xmm8,5 + pxor xmm6,xmm14 + + movdqa xmm9,xmm13 + movdqa XMMWORD PTR[(224-128)+rax],xmm2 + paddd xmm12,xmm2 + pxor xmm3,xmm0 + psrld xmm9,27 + pxor xmm6,xmm10 + movdqa xmm7,xmm14 + + pslld xmm7,30 + movdqa xmm5,xmm3 + por xmm8,xmm9 + psrld xmm5,31 + paddd xmm12,xmm6 + paddd xmm3,xmm3 + + psrld xmm14,2 + paddd xmm12,xmm8 + por xmm3,xmm5 + por xmm14,xmm7 + pxor xmm4,xmm1 + movdqa xmm1,XMMWORD PTR[((32-128))+rax] + + movdqa xmm8,xmm12 + movdqa xmm6,xmm10 + pxor xmm4,XMMWORD PTR[((128-128))+rax] + paddd xmm11,xmm15 + pslld xmm8,5 + pxor xmm6,xmm13 + + movdqa xmm9,xmm12 + movdqa XMMWORD PTR[(240-128)+rax],xmm3 + paddd xmm11,xmm3 + pxor xmm4,xmm1 + psrld xmm9,27 + pxor xmm6,xmm14 + movdqa xmm7,xmm13 + + pslld xmm7,30 + movdqa xmm5,xmm4 + por xmm8,xmm9 + psrld xmm5,31 + paddd xmm11,xmm6 + paddd xmm4,xmm4 + + psrld xmm13,2 + paddd xmm11,xmm8 + por xmm4,xmm5 + por xmm13,xmm7 + pxor xmm0,xmm2 + movdqa xmm2,XMMWORD PTR[((48-128))+rax] + + movdqa xmm8,xmm11 + movdqa xmm6,xmm14 + pxor xmm0,XMMWORD PTR[((144-128))+rax] + paddd xmm10,xmm15 + pslld xmm8,5 + pxor xmm6,xmm12 + + movdqa xmm9,xmm11 + movdqa XMMWORD PTR[(0-128)+rax],xmm4 + paddd xmm10,xmm4 + pxor xmm0,xmm2 + psrld xmm9,27 + pxor xmm6,xmm13 + movdqa xmm7,xmm12 + + pslld xmm7,30 + movdqa xmm5,xmm0 + por xmm8,xmm9 + psrld xmm5,31 + paddd xmm10,xmm6 + paddd xmm0,xmm0 + + psrld xmm12,2 + paddd xmm10,xmm8 + por xmm0,xmm5 + por xmm12,xmm7 + pxor xmm1,xmm3 + movdqa xmm3,XMMWORD PTR[((64-128))+rax] + + movdqa xmm8,xmm10 + movdqa xmm6,xmm13 + pxor xmm1,XMMWORD PTR[((160-128))+rax] + paddd xmm14,xmm15 + pslld xmm8,5 + pxor xmm6,xmm11 + + movdqa xmm9,xmm10 + movdqa XMMWORD PTR[(16-128)+rax],xmm0 + paddd xmm14,xmm0 + pxor xmm1,xmm3 + psrld xmm9,27 + pxor xmm6,xmm12 + movdqa xmm7,xmm11 + + pslld xmm7,30 + movdqa xmm5,xmm1 + por xmm8,xmm9 + psrld xmm5,31 + paddd xmm14,xmm6 + paddd xmm1,xmm1 + + psrld xmm11,2 + paddd xmm14,xmm8 + por xmm1,xmm5 + por xmm11,xmm7 + pxor xmm2,xmm4 + movdqa xmm4,XMMWORD PTR[((80-128))+rax] + + movdqa xmm8,xmm14 + movdqa xmm6,xmm12 + pxor xmm2,XMMWORD PTR[((176-128))+rax] + paddd xmm13,xmm15 + pslld xmm8,5 + pxor xmm6,xmm10 + + movdqa xmm9,xmm14 + movdqa XMMWORD PTR[(32-128)+rax],xmm1 + paddd xmm13,xmm1 + pxor xmm2,xmm4 + psrld xmm9,27 + pxor xmm6,xmm11 + movdqa xmm7,xmm10 + + pslld xmm7,30 + movdqa xmm5,xmm2 + por xmm8,xmm9 + psrld xmm5,31 + paddd xmm13,xmm6 + paddd xmm2,xmm2 + + psrld xmm10,2 + paddd xmm13,xmm8 + por xmm2,xmm5 + por xmm10,xmm7 + pxor xmm3,xmm0 + movdqa xmm0,XMMWORD PTR[((96-128))+rax] + + movdqa xmm8,xmm13 + movdqa xmm6,xmm11 + pxor xmm3,XMMWORD PTR[((192-128))+rax] + paddd xmm12,xmm15 + pslld xmm8,5 + pxor xmm6,xmm14 + + movdqa xmm9,xmm13 + movdqa XMMWORD PTR[(48-128)+rax],xmm2 + paddd xmm12,xmm2 + pxor xmm3,xmm0 + psrld xmm9,27 + pxor xmm6,xmm10 + movdqa xmm7,xmm14 + + pslld xmm7,30 + movdqa xmm5,xmm3 + por xmm8,xmm9 + psrld xmm5,31 + paddd xmm12,xmm6 + paddd xmm3,xmm3 + + psrld xmm14,2 + paddd xmm12,xmm8 + por xmm3,xmm5 + por xmm14,xmm7 + pxor xmm4,xmm1 + movdqa xmm1,XMMWORD PTR[((112-128))+rax] + + movdqa xmm8,xmm12 + movdqa xmm6,xmm10 + pxor xmm4,XMMWORD PTR[((208-128))+rax] + paddd xmm11,xmm15 + pslld xmm8,5 + pxor xmm6,xmm13 + + movdqa xmm9,xmm12 + movdqa XMMWORD PTR[(64-128)+rax],xmm3 + paddd xmm11,xmm3 + pxor xmm4,xmm1 + psrld xmm9,27 + pxor xmm6,xmm14 + movdqa xmm7,xmm13 + + pslld xmm7,30 + movdqa xmm5,xmm4 + por xmm8,xmm9 + psrld xmm5,31 + paddd xmm11,xmm6 + paddd xmm4,xmm4 + + psrld xmm13,2 + paddd xmm11,xmm8 + por xmm4,xmm5 + por xmm13,xmm7 + pxor xmm0,xmm2 + movdqa xmm2,XMMWORD PTR[((128-128))+rax] + + movdqa xmm8,xmm11 + movdqa xmm6,xmm14 + pxor xmm0,XMMWORD PTR[((224-128))+rax] + paddd xmm10,xmm15 + pslld xmm8,5 + pxor xmm6,xmm12 + + movdqa xmm9,xmm11 + movdqa XMMWORD PTR[(80-128)+rax],xmm4 + paddd xmm10,xmm4 + pxor xmm0,xmm2 + psrld xmm9,27 + pxor xmm6,xmm13 + movdqa xmm7,xmm12 + + pslld xmm7,30 + movdqa xmm5,xmm0 + por xmm8,xmm9 + psrld xmm5,31 + paddd xmm10,xmm6 + paddd xmm0,xmm0 + + psrld xmm12,2 + paddd xmm10,xmm8 + por xmm0,xmm5 + por xmm12,xmm7 + pxor xmm1,xmm3 + movdqa xmm3,XMMWORD PTR[((144-128))+rax] + + movdqa xmm8,xmm10 + movdqa xmm6,xmm13 + pxor xmm1,XMMWORD PTR[((240-128))+rax] + paddd xmm14,xmm15 + pslld xmm8,5 + pxor xmm6,xmm11 + + movdqa xmm9,xmm10 + movdqa XMMWORD PTR[(96-128)+rax],xmm0 + paddd xmm14,xmm0 + pxor xmm1,xmm3 + psrld xmm9,27 + pxor xmm6,xmm12 + movdqa xmm7,xmm11 + + pslld xmm7,30 + movdqa xmm5,xmm1 + por xmm8,xmm9 + psrld xmm5,31 + paddd xmm14,xmm6 + paddd xmm1,xmm1 + + psrld xmm11,2 + paddd xmm14,xmm8 + por xmm1,xmm5 + por xmm11,xmm7 + pxor xmm2,xmm4 + movdqa xmm4,XMMWORD PTR[((160-128))+rax] + + movdqa xmm8,xmm14 + movdqa xmm6,xmm12 + pxor xmm2,XMMWORD PTR[((0-128))+rax] + paddd xmm13,xmm15 + pslld xmm8,5 + pxor xmm6,xmm10 + + movdqa xmm9,xmm14 + movdqa XMMWORD PTR[(112-128)+rax],xmm1 + paddd xmm13,xmm1 + pxor xmm2,xmm4 + psrld xmm9,27 + pxor xmm6,xmm11 + movdqa xmm7,xmm10 + + pslld xmm7,30 + movdqa xmm5,xmm2 + por xmm8,xmm9 + psrld xmm5,31 + paddd xmm13,xmm6 + paddd xmm2,xmm2 + + psrld xmm10,2 + paddd xmm13,xmm8 + por xmm2,xmm5 + por xmm10,xmm7 + pxor xmm3,xmm0 + movdqa xmm0,XMMWORD PTR[((176-128))+rax] + + movdqa xmm8,xmm13 + movdqa xmm6,xmm11 + pxor xmm3,XMMWORD PTR[((16-128))+rax] + paddd xmm12,xmm15 + pslld xmm8,5 + pxor xmm6,xmm14 + + movdqa xmm9,xmm13 + paddd xmm12,xmm2 + pxor xmm3,xmm0 + psrld xmm9,27 + pxor xmm6,xmm10 + movdqa xmm7,xmm14 + + pslld xmm7,30 + movdqa xmm5,xmm3 + por xmm8,xmm9 + psrld xmm5,31 + paddd xmm12,xmm6 + paddd xmm3,xmm3 + + psrld xmm14,2 + paddd xmm12,xmm8 + por xmm3,xmm5 + por xmm14,xmm7 + pxor xmm4,xmm1 + movdqa xmm1,XMMWORD PTR[((192-128))+rax] + + movdqa xmm8,xmm12 + movdqa xmm6,xmm10 + pxor xmm4,XMMWORD PTR[((32-128))+rax] + paddd xmm11,xmm15 + pslld xmm8,5 + pxor xmm6,xmm13 + + movdqa xmm9,xmm12 + paddd xmm11,xmm3 + pxor xmm4,xmm1 + psrld xmm9,27 + pxor xmm6,xmm14 + movdqa xmm7,xmm13 + + pslld xmm7,30 + movdqa xmm5,xmm4 + por xmm8,xmm9 + psrld xmm5,31 + paddd xmm11,xmm6 + paddd xmm4,xmm4 + + psrld xmm13,2 + paddd xmm11,xmm8 + por xmm4,xmm5 + por xmm13,xmm7 + pxor xmm0,xmm2 + movdqa xmm2,XMMWORD PTR[((208-128))+rax] + + movdqa xmm8,xmm11 + movdqa xmm6,xmm14 + pxor xmm0,XMMWORD PTR[((48-128))+rax] + paddd xmm10,xmm15 + pslld xmm8,5 + pxor xmm6,xmm12 + + movdqa xmm9,xmm11 + paddd xmm10,xmm4 + pxor xmm0,xmm2 + psrld xmm9,27 + pxor xmm6,xmm13 + movdqa xmm7,xmm12 + + pslld xmm7,30 + movdqa xmm5,xmm0 + por xmm8,xmm9 + psrld xmm5,31 + paddd xmm10,xmm6 + paddd xmm0,xmm0 + + psrld xmm12,2 + paddd xmm10,xmm8 + por xmm0,xmm5 + por xmm12,xmm7 + pxor xmm1,xmm3 + movdqa xmm3,XMMWORD PTR[((224-128))+rax] + + movdqa xmm8,xmm10 + movdqa xmm6,xmm13 + pxor xmm1,XMMWORD PTR[((64-128))+rax] + paddd xmm14,xmm15 + pslld xmm8,5 + pxor xmm6,xmm11 + + movdqa xmm9,xmm10 + paddd xmm14,xmm0 + pxor xmm1,xmm3 + psrld xmm9,27 + pxor xmm6,xmm12 + movdqa xmm7,xmm11 + + pslld xmm7,30 + movdqa xmm5,xmm1 + por xmm8,xmm9 + psrld xmm5,31 + paddd xmm14,xmm6 + paddd xmm1,xmm1 + + psrld xmm11,2 + paddd xmm14,xmm8 + por xmm1,xmm5 + por xmm11,xmm7 + pxor xmm2,xmm4 + movdqa xmm4,XMMWORD PTR[((240-128))+rax] + + movdqa xmm8,xmm14 + movdqa xmm6,xmm12 + pxor xmm2,XMMWORD PTR[((80-128))+rax] + paddd xmm13,xmm15 + pslld xmm8,5 + pxor xmm6,xmm10 + + movdqa xmm9,xmm14 + paddd xmm13,xmm1 + pxor xmm2,xmm4 + psrld xmm9,27 + pxor xmm6,xmm11 + movdqa xmm7,xmm10 + + pslld xmm7,30 + movdqa xmm5,xmm2 + por xmm8,xmm9 + psrld xmm5,31 + paddd xmm13,xmm6 + paddd xmm2,xmm2 + + psrld xmm10,2 + paddd xmm13,xmm8 + por xmm2,xmm5 + por xmm10,xmm7 + pxor xmm3,xmm0 + movdqa xmm0,XMMWORD PTR[((0-128))+rax] + + movdqa xmm8,xmm13 + movdqa xmm6,xmm11 + pxor xmm3,XMMWORD PTR[((96-128))+rax] + paddd xmm12,xmm15 + pslld xmm8,5 + pxor xmm6,xmm14 + + movdqa xmm9,xmm13 + paddd xmm12,xmm2 + pxor xmm3,xmm0 + psrld xmm9,27 + pxor xmm6,xmm10 + movdqa xmm7,xmm14 + + pslld xmm7,30 + movdqa xmm5,xmm3 + por xmm8,xmm9 + psrld xmm5,31 + paddd xmm12,xmm6 + paddd xmm3,xmm3 + + psrld xmm14,2 + paddd xmm12,xmm8 + por xmm3,xmm5 + por xmm14,xmm7 + pxor xmm4,xmm1 + movdqa xmm1,XMMWORD PTR[((16-128))+rax] + + movdqa xmm8,xmm12 + movdqa xmm6,xmm10 + pxor xmm4,XMMWORD PTR[((112-128))+rax] + paddd xmm11,xmm15 + pslld xmm8,5 + pxor xmm6,xmm13 + + movdqa xmm9,xmm12 + paddd xmm11,xmm3 + pxor xmm4,xmm1 + psrld xmm9,27 + pxor xmm6,xmm14 + movdqa xmm7,xmm13 + + pslld xmm7,30 + movdqa xmm5,xmm4 + por xmm8,xmm9 + psrld xmm5,31 + paddd xmm11,xmm6 + paddd xmm4,xmm4 + + psrld xmm13,2 + paddd xmm11,xmm8 + por xmm4,xmm5 + por xmm13,xmm7 + movdqa xmm8,xmm11 + paddd xmm10,xmm15 + movdqa xmm6,xmm14 + pslld xmm8,5 + pxor xmm6,xmm12 + + movdqa xmm9,xmm11 + paddd xmm10,xmm4 + psrld xmm9,27 + movdqa xmm7,xmm12 + pxor xmm6,xmm13 + + pslld xmm7,30 + por xmm8,xmm9 + paddd xmm10,xmm6 + + psrld xmm12,2 + paddd xmm10,xmm8 + por xmm12,xmm7 + movdqa xmm0,XMMWORD PTR[rbx] + mov ecx,1 + cmp ecx,DWORD PTR[rbx] + pxor xmm8,xmm8 + cmovge r8,rbp + cmp ecx,DWORD PTR[4+rbx] + movdqa xmm1,xmm0 + cmovge r9,rbp + cmp ecx,DWORD PTR[8+rbx] + pcmpgtd xmm1,xmm8 + cmovge r10,rbp + cmp ecx,DWORD PTR[12+rbx] + paddd xmm0,xmm1 + cmovge r11,rbp + + movdqu xmm6,XMMWORD PTR[rdi] + pand xmm10,xmm1 + movdqu xmm7,XMMWORD PTR[32+rdi] + pand xmm11,xmm1 + paddd xmm10,xmm6 + movdqu xmm8,XMMWORD PTR[64+rdi] + pand xmm12,xmm1 + paddd xmm11,xmm7 + movdqu xmm9,XMMWORD PTR[96+rdi] + pand xmm13,xmm1 + paddd xmm12,xmm8 + movdqu xmm5,XMMWORD PTR[128+rdi] + pand xmm14,xmm1 + movdqu XMMWORD PTR[rdi],xmm10 + paddd xmm13,xmm9 + movdqu XMMWORD PTR[32+rdi],xmm11 + paddd xmm14,xmm5 + movdqu XMMWORD PTR[64+rdi],xmm12 + movdqu XMMWORD PTR[96+rdi],xmm13 + movdqu XMMWORD PTR[128+rdi],xmm14 + + movdqa XMMWORD PTR[rbx],xmm0 + movdqa xmm5,XMMWORD PTR[96+rbp] + movdqa xmm15,XMMWORD PTR[((-32))+rbp] + dec edx + jnz $L$oop + + mov edx,DWORD PTR[280+rsp] + lea rdi,QWORD PTR[16+rdi] + lea rsi,QWORD PTR[64+rsi] + dec edx + jnz $L$oop_grande + +$L$done:: + mov rax,QWORD PTR[272+rsp] + movaps xmm6,XMMWORD PTR[((-184))+rax] + movaps xmm7,XMMWORD PTR[((-168))+rax] + movaps xmm8,XMMWORD PTR[((-152))+rax] + movaps xmm9,XMMWORD PTR[((-136))+rax] + movaps xmm10,XMMWORD PTR[((-120))+rax] + movaps xmm11,XMMWORD PTR[((-104))+rax] + movaps xmm12,XMMWORD PTR[((-88))+rax] + movaps xmm13,XMMWORD PTR[((-72))+rax] + movaps xmm14,XMMWORD PTR[((-56))+rax] + movaps xmm15,XMMWORD PTR[((-40))+rax] + mov rbp,QWORD PTR[((-16))+rax] + mov rbx,QWORD PTR[((-8))+rax] + lea rsp,QWORD PTR[rax] +$L$epilogue:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_sha1_multi_block:: +sha1_multi_block ENDP + +ALIGN 32 +sha1_multi_block_shaext PROC PRIVATE + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_sha1_multi_block_shaext:: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + + +_shaext_shortcut:: + mov rax,rsp + push rbx + push rbp + lea rsp,QWORD PTR[((-168))+rsp] + movaps XMMWORD PTR[rsp],xmm6 + movaps XMMWORD PTR[16+rsp],xmm7 + movaps XMMWORD PTR[32+rsp],xmm8 + movaps XMMWORD PTR[48+rsp],xmm9 + movaps XMMWORD PTR[(-120)+rax],xmm10 + movaps XMMWORD PTR[(-104)+rax],xmm11 + movaps XMMWORD PTR[(-88)+rax],xmm12 + movaps XMMWORD PTR[(-72)+rax],xmm13 + movaps XMMWORD PTR[(-56)+rax],xmm14 + movaps XMMWORD PTR[(-40)+rax],xmm15 + sub rsp,288 + shl edx,1 + and rsp,-256 + lea rdi,QWORD PTR[64+rdi] + mov QWORD PTR[272+rsp],rax +$L$body_shaext:: + lea rbx,QWORD PTR[256+rsp] + movdqa xmm3,XMMWORD PTR[((K_XX_XX+128))] + +$L$oop_grande_shaext:: + mov DWORD PTR[280+rsp],edx + xor edx,edx + mov r8,QWORD PTR[rsi] + mov ecx,DWORD PTR[8+rsi] + cmp ecx,edx + cmovg edx,ecx + test ecx,ecx + mov DWORD PTR[rbx],ecx + cmovle r8,rsp + mov r9,QWORD PTR[16+rsi] + mov ecx,DWORD PTR[24+rsi] + cmp ecx,edx + cmovg edx,ecx + test ecx,ecx + mov DWORD PTR[4+rbx],ecx + cmovle r9,rsp + test edx,edx + jz $L$done_shaext + + movq xmm0,QWORD PTR[((0-64))+rdi] + movq xmm4,QWORD PTR[((32-64))+rdi] + movq xmm5,QWORD PTR[((64-64))+rdi] + movq xmm6,QWORD PTR[((96-64))+rdi] + movq xmm7,QWORD PTR[((128-64))+rdi] + + punpckldq xmm0,xmm4 + punpckldq xmm5,xmm6 + + movdqa xmm8,xmm0 + punpcklqdq xmm0,xmm5 + punpckhqdq xmm8,xmm5 + + pshufd xmm1,xmm7,63 + pshufd xmm9,xmm7,127 + pshufd xmm0,xmm0,27 + pshufd xmm8,xmm8,27 + jmp $L$oop_shaext + +ALIGN 32 +$L$oop_shaext:: + movdqu xmm4,XMMWORD PTR[r8] + movdqu xmm11,XMMWORD PTR[r9] + movdqu xmm5,XMMWORD PTR[16+r8] + movdqu xmm12,XMMWORD PTR[16+r9] + movdqu xmm6,XMMWORD PTR[32+r8] +DB 102,15,56,0,227 + movdqu xmm13,XMMWORD PTR[32+r9] +DB 102,68,15,56,0,219 + movdqu xmm7,XMMWORD PTR[48+r8] + lea r8,QWORD PTR[64+r8] +DB 102,15,56,0,235 + movdqu xmm14,XMMWORD PTR[48+r9] + lea r9,QWORD PTR[64+r9] +DB 102,68,15,56,0,227 + + movdqa XMMWORD PTR[80+rsp],xmm1 + paddd xmm1,xmm4 + movdqa XMMWORD PTR[112+rsp],xmm9 + paddd xmm9,xmm11 + movdqa XMMWORD PTR[64+rsp],xmm0 + movdqa xmm2,xmm0 + movdqa XMMWORD PTR[96+rsp],xmm8 + movdqa xmm10,xmm8 +DB 15,58,204,193,0 +DB 15,56,200,213 +DB 69,15,58,204,193,0 +DB 69,15,56,200,212 +DB 102,15,56,0,243 + prefetcht0 [127+r8] +DB 15,56,201,229 +DB 102,68,15,56,0,235 + prefetcht0 [127+r9] +DB 69,15,56,201,220 + +DB 102,15,56,0,251 + movdqa xmm1,xmm0 +DB 102,68,15,56,0,243 + movdqa xmm9,xmm8 +DB 15,58,204,194,0 +DB 15,56,200,206 +DB 69,15,58,204,194,0 +DB 69,15,56,200,205 + pxor xmm4,xmm6 +DB 15,56,201,238 + pxor xmm11,xmm13 +DB 69,15,56,201,229 + movdqa xmm2,xmm0 + movdqa xmm10,xmm8 +DB 15,58,204,193,0 +DB 15,56,200,215 +DB 69,15,58,204,193,0 +DB 69,15,56,200,214 +DB 15,56,202,231 +DB 69,15,56,202,222 + pxor xmm5,xmm7 +DB 15,56,201,247 + pxor xmm12,xmm14 +DB 69,15,56,201,238 + movdqa xmm1,xmm0 + movdqa xmm9,xmm8 +DB 15,58,204,194,0 +DB 15,56,200,204 +DB 69,15,58,204,194,0 +DB 69,15,56,200,203 +DB 15,56,202,236 +DB 69,15,56,202,227 + pxor xmm6,xmm4 +DB 15,56,201,252 + pxor xmm13,xmm11 +DB 69,15,56,201,243 + movdqa xmm2,xmm0 + movdqa xmm10,xmm8 +DB 15,58,204,193,0 +DB 15,56,200,213 +DB 69,15,58,204,193,0 +DB 69,15,56,200,212 +DB 15,56,202,245 +DB 69,15,56,202,236 + pxor xmm7,xmm5 +DB 15,56,201,229 + pxor xmm14,xmm12 +DB 69,15,56,201,220 + movdqa xmm1,xmm0 + movdqa xmm9,xmm8 +DB 15,58,204,194,1 +DB 15,56,200,206 +DB 69,15,58,204,194,1 +DB 69,15,56,200,205 +DB 15,56,202,254 +DB 69,15,56,202,245 + pxor xmm4,xmm6 +DB 15,56,201,238 + pxor xmm11,xmm13 +DB 69,15,56,201,229 + movdqa xmm2,xmm0 + movdqa xmm10,xmm8 +DB 15,58,204,193,1 +DB 15,56,200,215 +DB 69,15,58,204,193,1 +DB 69,15,56,200,214 +DB 15,56,202,231 +DB 69,15,56,202,222 + pxor xmm5,xmm7 +DB 15,56,201,247 + pxor xmm12,xmm14 +DB 69,15,56,201,238 + movdqa xmm1,xmm0 + movdqa xmm9,xmm8 +DB 15,58,204,194,1 +DB 15,56,200,204 +DB 69,15,58,204,194,1 +DB 69,15,56,200,203 +DB 15,56,202,236 +DB 69,15,56,202,227 + pxor xmm6,xmm4 +DB 15,56,201,252 + pxor xmm13,xmm11 +DB 69,15,56,201,243 + movdqa xmm2,xmm0 + movdqa xmm10,xmm8 +DB 15,58,204,193,1 +DB 15,56,200,213 +DB 69,15,58,204,193,1 +DB 69,15,56,200,212 +DB 15,56,202,245 +DB 69,15,56,202,236 + pxor xmm7,xmm5 +DB 15,56,201,229 + pxor xmm14,xmm12 +DB 69,15,56,201,220 + movdqa xmm1,xmm0 + movdqa xmm9,xmm8 +DB 15,58,204,194,1 +DB 15,56,200,206 +DB 69,15,58,204,194,1 +DB 69,15,56,200,205 +DB 15,56,202,254 +DB 69,15,56,202,245 + pxor xmm4,xmm6 +DB 15,56,201,238 + pxor xmm11,xmm13 +DB 69,15,56,201,229 + movdqa xmm2,xmm0 + movdqa xmm10,xmm8 +DB 15,58,204,193,2 +DB 15,56,200,215 +DB 69,15,58,204,193,2 +DB 69,15,56,200,214 +DB 15,56,202,231 +DB 69,15,56,202,222 + pxor xmm5,xmm7 +DB 15,56,201,247 + pxor xmm12,xmm14 +DB 69,15,56,201,238 + movdqa xmm1,xmm0 + movdqa xmm9,xmm8 +DB 15,58,204,194,2 +DB 15,56,200,204 +DB 69,15,58,204,194,2 +DB 69,15,56,200,203 +DB 15,56,202,236 +DB 69,15,56,202,227 + pxor xmm6,xmm4 +DB 15,56,201,252 + pxor xmm13,xmm11 +DB 69,15,56,201,243 + movdqa xmm2,xmm0 + movdqa xmm10,xmm8 +DB 15,58,204,193,2 +DB 15,56,200,213 +DB 69,15,58,204,193,2 +DB 69,15,56,200,212 +DB 15,56,202,245 +DB 69,15,56,202,236 + pxor xmm7,xmm5 +DB 15,56,201,229 + pxor xmm14,xmm12 +DB 69,15,56,201,220 + movdqa xmm1,xmm0 + movdqa xmm9,xmm8 +DB 15,58,204,194,2 +DB 15,56,200,206 +DB 69,15,58,204,194,2 +DB 69,15,56,200,205 +DB 15,56,202,254 +DB 69,15,56,202,245 + pxor xmm4,xmm6 +DB 15,56,201,238 + pxor xmm11,xmm13 +DB 69,15,56,201,229 + movdqa xmm2,xmm0 + movdqa xmm10,xmm8 +DB 15,58,204,193,2 +DB 15,56,200,215 +DB 69,15,58,204,193,2 +DB 69,15,56,200,214 +DB 15,56,202,231 +DB 69,15,56,202,222 + pxor xmm5,xmm7 +DB 15,56,201,247 + pxor xmm12,xmm14 +DB 69,15,56,201,238 + movdqa xmm1,xmm0 + movdqa xmm9,xmm8 +DB 15,58,204,194,3 +DB 15,56,200,204 +DB 69,15,58,204,194,3 +DB 69,15,56,200,203 +DB 15,56,202,236 +DB 69,15,56,202,227 + pxor xmm6,xmm4 +DB 15,56,201,252 + pxor xmm13,xmm11 +DB 69,15,56,201,243 + movdqa xmm2,xmm0 + movdqa xmm10,xmm8 +DB 15,58,204,193,3 +DB 15,56,200,213 +DB 69,15,58,204,193,3 +DB 69,15,56,200,212 +DB 15,56,202,245 +DB 69,15,56,202,236 + pxor xmm7,xmm5 + pxor xmm14,xmm12 + + mov ecx,1 + pxor xmm4,xmm4 + cmp ecx,DWORD PTR[rbx] + cmovge r8,rsp + + movdqa xmm1,xmm0 + movdqa xmm9,xmm8 +DB 15,58,204,194,3 +DB 15,56,200,206 +DB 69,15,58,204,194,3 +DB 69,15,56,200,205 +DB 15,56,202,254 +DB 69,15,56,202,245 + + cmp ecx,DWORD PTR[4+rbx] + cmovge r9,rsp + movq xmm6,QWORD PTR[rbx] + + movdqa xmm2,xmm0 + movdqa xmm10,xmm8 +DB 15,58,204,193,3 +DB 15,56,200,215 +DB 69,15,58,204,193,3 +DB 69,15,56,200,214 + + pshufd xmm11,xmm6,000h + pshufd xmm12,xmm6,055h + movdqa xmm7,xmm6 + pcmpgtd xmm11,xmm4 + pcmpgtd xmm12,xmm4 + + movdqa xmm1,xmm0 + movdqa xmm9,xmm8 +DB 15,58,204,194,3 +DB 15,56,200,204 +DB 69,15,58,204,194,3 +DB 68,15,56,200,204 + + pcmpgtd xmm7,xmm4 + pand xmm0,xmm11 + pand xmm1,xmm11 + pand xmm8,xmm12 + pand xmm9,xmm12 + paddd xmm6,xmm7 + + paddd xmm0,XMMWORD PTR[64+rsp] + paddd xmm1,XMMWORD PTR[80+rsp] + paddd xmm8,XMMWORD PTR[96+rsp] + paddd xmm9,XMMWORD PTR[112+rsp] + + movq QWORD PTR[rbx],xmm6 + dec edx + jnz $L$oop_shaext + + mov edx,DWORD PTR[280+rsp] + + pshufd xmm0,xmm0,27 + pshufd xmm8,xmm8,27 + + movdqa xmm6,xmm0 + punpckldq xmm0,xmm8 + punpckhdq xmm6,xmm8 + punpckhdq xmm1,xmm9 + movq QWORD PTR[(0-64)+rdi],xmm0 + psrldq xmm0,8 + movq QWORD PTR[(64-64)+rdi],xmm6 + psrldq xmm6,8 + movq QWORD PTR[(32-64)+rdi],xmm0 + psrldq xmm1,8 + movq QWORD PTR[(96-64)+rdi],xmm6 + movq QWORD PTR[(128-64)+rdi],xmm1 + + lea rdi,QWORD PTR[8+rdi] + lea rsi,QWORD PTR[32+rsi] + dec edx + jnz $L$oop_grande_shaext + +$L$done_shaext:: + + movaps xmm6,XMMWORD PTR[((-184))+rax] + movaps xmm7,XMMWORD PTR[((-168))+rax] + movaps xmm8,XMMWORD PTR[((-152))+rax] + movaps xmm9,XMMWORD PTR[((-136))+rax] + movaps xmm10,XMMWORD PTR[((-120))+rax] + movaps xmm11,XMMWORD PTR[((-104))+rax] + movaps xmm12,XMMWORD PTR[((-88))+rax] + movaps xmm13,XMMWORD PTR[((-72))+rax] + movaps xmm14,XMMWORD PTR[((-56))+rax] + movaps xmm15,XMMWORD PTR[((-40))+rax] + mov rbp,QWORD PTR[((-16))+rax] + mov rbx,QWORD PTR[((-8))+rax] + lea rsp,QWORD PTR[rax] +$L$epilogue_shaext:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_sha1_multi_block_shaext:: +sha1_multi_block_shaext ENDP + +ALIGN 32 +sha1_multi_block_avx PROC PRIVATE + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_sha1_multi_block_avx:: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + + +_avx_shortcut:: + shr rcx,32 + cmp edx,2 + jb $L$avx + test ecx,32 + jnz _avx2_shortcut + jmp $L$avx +ALIGN 32 +$L$avx:: + mov rax,rsp + push rbx + push rbp + lea rsp,QWORD PTR[((-168))+rsp] + movaps XMMWORD PTR[rsp],xmm6 + movaps XMMWORD PTR[16+rsp],xmm7 + movaps XMMWORD PTR[32+rsp],xmm8 + movaps XMMWORD PTR[48+rsp],xmm9 + movaps XMMWORD PTR[(-120)+rax],xmm10 + movaps XMMWORD PTR[(-104)+rax],xmm11 + movaps XMMWORD PTR[(-88)+rax],xmm12 + movaps XMMWORD PTR[(-72)+rax],xmm13 + movaps XMMWORD PTR[(-56)+rax],xmm14 + movaps XMMWORD PTR[(-40)+rax],xmm15 + sub rsp,288 + and rsp,-256 + mov QWORD PTR[272+rsp],rax +$L$body_avx:: + lea rbp,QWORD PTR[K_XX_XX] + lea rbx,QWORD PTR[256+rsp] + + vzeroupper +$L$oop_grande_avx:: + mov DWORD PTR[280+rsp],edx + xor edx,edx + mov r8,QWORD PTR[rsi] + mov ecx,DWORD PTR[8+rsi] + cmp ecx,edx + cmovg edx,ecx + test ecx,ecx + mov DWORD PTR[rbx],ecx + cmovle r8,rbp + mov r9,QWORD PTR[16+rsi] + mov ecx,DWORD PTR[24+rsi] + cmp ecx,edx + cmovg edx,ecx + test ecx,ecx + mov DWORD PTR[4+rbx],ecx + cmovle r9,rbp + mov r10,QWORD PTR[32+rsi] + mov ecx,DWORD PTR[40+rsi] + cmp ecx,edx + cmovg edx,ecx + test ecx,ecx + mov DWORD PTR[8+rbx],ecx + cmovle r10,rbp + mov r11,QWORD PTR[48+rsi] + mov ecx,DWORD PTR[56+rsi] + cmp ecx,edx + cmovg edx,ecx + test ecx,ecx + mov DWORD PTR[12+rbx],ecx + cmovle r11,rbp + test edx,edx + jz $L$done_avx + + vmovdqu xmm10,XMMWORD PTR[rdi] + lea rax,QWORD PTR[128+rsp] + vmovdqu xmm11,XMMWORD PTR[32+rdi] + vmovdqu xmm12,XMMWORD PTR[64+rdi] + vmovdqu xmm13,XMMWORD PTR[96+rdi] + vmovdqu xmm14,XMMWORD PTR[128+rdi] + vmovdqu xmm5,XMMWORD PTR[96+rbp] + jmp $L$oop_avx + +ALIGN 32 +$L$oop_avx:: + vmovdqa xmm15,XMMWORD PTR[((-32))+rbp] + vmovd xmm0,DWORD PTR[r8] + lea r8,QWORD PTR[64+r8] + vmovd xmm2,DWORD PTR[r9] + lea r9,QWORD PTR[64+r9] + vpinsrd xmm0,xmm0,DWORD PTR[r10],1 + lea r10,QWORD PTR[64+r10] + vpinsrd xmm2,xmm2,DWORD PTR[r11],1 + lea r11,QWORD PTR[64+r11] + vmovd xmm1,DWORD PTR[((-60))+r8] + vpunpckldq xmm0,xmm0,xmm2 + vmovd xmm9,DWORD PTR[((-60))+r9] + vpshufb xmm0,xmm0,xmm5 + vpinsrd xmm1,xmm1,DWORD PTR[((-60))+r10],1 + vpinsrd xmm9,xmm9,DWORD PTR[((-60))+r11],1 + vpaddd xmm14,xmm14,xmm15 + vpslld xmm8,xmm10,5 + vpandn xmm7,xmm11,xmm13 + vpand xmm6,xmm11,xmm12 + + vmovdqa XMMWORD PTR[(0-128)+rax],xmm0 + vpaddd xmm14,xmm14,xmm0 + vpunpckldq xmm1,xmm1,xmm9 + vpsrld xmm9,xmm10,27 + vpxor xmm6,xmm6,xmm7 + vmovd xmm2,DWORD PTR[((-56))+r8] + + vpslld xmm7,xmm11,30 + vpor xmm8,xmm8,xmm9 + vmovd xmm9,DWORD PTR[((-56))+r9] + vpaddd xmm14,xmm14,xmm6 + + vpsrld xmm11,xmm11,2 + vpaddd xmm14,xmm14,xmm8 + vpshufb xmm1,xmm1,xmm5 + vpor xmm11,xmm11,xmm7 + vpinsrd xmm2,xmm2,DWORD PTR[((-56))+r10],1 + vpinsrd xmm9,xmm9,DWORD PTR[((-56))+r11],1 + vpaddd xmm13,xmm13,xmm15 + vpslld xmm8,xmm14,5 + vpandn xmm7,xmm10,xmm12 + vpand xmm6,xmm10,xmm11 + + vmovdqa XMMWORD PTR[(16-128)+rax],xmm1 + vpaddd xmm13,xmm13,xmm1 + vpunpckldq xmm2,xmm2,xmm9 + vpsrld xmm9,xmm14,27 + vpxor xmm6,xmm6,xmm7 + vmovd xmm3,DWORD PTR[((-52))+r8] + + vpslld xmm7,xmm10,30 + vpor xmm8,xmm8,xmm9 + vmovd xmm9,DWORD PTR[((-52))+r9] + vpaddd xmm13,xmm13,xmm6 + + vpsrld xmm10,xmm10,2 + vpaddd xmm13,xmm13,xmm8 + vpshufb xmm2,xmm2,xmm5 + vpor xmm10,xmm10,xmm7 + vpinsrd xmm3,xmm3,DWORD PTR[((-52))+r10],1 + vpinsrd xmm9,xmm9,DWORD PTR[((-52))+r11],1 + vpaddd xmm12,xmm12,xmm15 + vpslld xmm8,xmm13,5 + vpandn xmm7,xmm14,xmm11 + vpand xmm6,xmm14,xmm10 + + vmovdqa XMMWORD PTR[(32-128)+rax],xmm2 + vpaddd xmm12,xmm12,xmm2 + vpunpckldq xmm3,xmm3,xmm9 + vpsrld xmm9,xmm13,27 + vpxor xmm6,xmm6,xmm7 + vmovd xmm4,DWORD PTR[((-48))+r8] + + vpslld xmm7,xmm14,30 + vpor xmm8,xmm8,xmm9 + vmovd xmm9,DWORD PTR[((-48))+r9] + vpaddd xmm12,xmm12,xmm6 + + vpsrld xmm14,xmm14,2 + vpaddd xmm12,xmm12,xmm8 + vpshufb xmm3,xmm3,xmm5 + vpor xmm14,xmm14,xmm7 + vpinsrd xmm4,xmm4,DWORD PTR[((-48))+r10],1 + vpinsrd xmm9,xmm9,DWORD PTR[((-48))+r11],1 + vpaddd xmm11,xmm11,xmm15 + vpslld xmm8,xmm12,5 + vpandn xmm7,xmm13,xmm10 + vpand xmm6,xmm13,xmm14 + + vmovdqa XMMWORD PTR[(48-128)+rax],xmm3 + vpaddd xmm11,xmm11,xmm3 + vpunpckldq xmm4,xmm4,xmm9 + vpsrld xmm9,xmm12,27 + vpxor xmm6,xmm6,xmm7 + vmovd xmm0,DWORD PTR[((-44))+r8] + + vpslld xmm7,xmm13,30 + vpor xmm8,xmm8,xmm9 + vmovd xmm9,DWORD PTR[((-44))+r9] + vpaddd xmm11,xmm11,xmm6 + + vpsrld xmm13,xmm13,2 + vpaddd xmm11,xmm11,xmm8 + vpshufb xmm4,xmm4,xmm5 + vpor xmm13,xmm13,xmm7 + vpinsrd xmm0,xmm0,DWORD PTR[((-44))+r10],1 + vpinsrd xmm9,xmm9,DWORD PTR[((-44))+r11],1 + vpaddd xmm10,xmm10,xmm15 + vpslld xmm8,xmm11,5 + vpandn xmm7,xmm12,xmm14 + vpand xmm6,xmm12,xmm13 + + vmovdqa XMMWORD PTR[(64-128)+rax],xmm4 + vpaddd xmm10,xmm10,xmm4 + vpunpckldq xmm0,xmm0,xmm9 + vpsrld xmm9,xmm11,27 + vpxor xmm6,xmm6,xmm7 + vmovd xmm1,DWORD PTR[((-40))+r8] + + vpslld xmm7,xmm12,30 + vpor xmm8,xmm8,xmm9 + vmovd xmm9,DWORD PTR[((-40))+r9] + vpaddd xmm10,xmm10,xmm6 + + vpsrld xmm12,xmm12,2 + vpaddd xmm10,xmm10,xmm8 + vpshufb xmm0,xmm0,xmm5 + vpor xmm12,xmm12,xmm7 + vpinsrd xmm1,xmm1,DWORD PTR[((-40))+r10],1 + vpinsrd xmm9,xmm9,DWORD PTR[((-40))+r11],1 + vpaddd xmm14,xmm14,xmm15 + vpslld xmm8,xmm10,5 + vpandn xmm7,xmm11,xmm13 + vpand xmm6,xmm11,xmm12 + + vmovdqa XMMWORD PTR[(80-128)+rax],xmm0 + vpaddd xmm14,xmm14,xmm0 + vpunpckldq xmm1,xmm1,xmm9 + vpsrld xmm9,xmm10,27 + vpxor xmm6,xmm6,xmm7 + vmovd xmm2,DWORD PTR[((-36))+r8] + + vpslld xmm7,xmm11,30 + vpor xmm8,xmm8,xmm9 + vmovd xmm9,DWORD PTR[((-36))+r9] + vpaddd xmm14,xmm14,xmm6 + + vpsrld xmm11,xmm11,2 + vpaddd xmm14,xmm14,xmm8 + vpshufb xmm1,xmm1,xmm5 + vpor xmm11,xmm11,xmm7 + vpinsrd xmm2,xmm2,DWORD PTR[((-36))+r10],1 + vpinsrd xmm9,xmm9,DWORD PTR[((-36))+r11],1 + vpaddd xmm13,xmm13,xmm15 + vpslld xmm8,xmm14,5 + vpandn xmm7,xmm10,xmm12 + vpand xmm6,xmm10,xmm11 + + vmovdqa XMMWORD PTR[(96-128)+rax],xmm1 + vpaddd xmm13,xmm13,xmm1 + vpunpckldq xmm2,xmm2,xmm9 + vpsrld xmm9,xmm14,27 + vpxor xmm6,xmm6,xmm7 + vmovd xmm3,DWORD PTR[((-32))+r8] + + vpslld xmm7,xmm10,30 + vpor xmm8,xmm8,xmm9 + vmovd xmm9,DWORD PTR[((-32))+r9] + vpaddd xmm13,xmm13,xmm6 + + vpsrld xmm10,xmm10,2 + vpaddd xmm13,xmm13,xmm8 + vpshufb xmm2,xmm2,xmm5 + vpor xmm10,xmm10,xmm7 + vpinsrd xmm3,xmm3,DWORD PTR[((-32))+r10],1 + vpinsrd xmm9,xmm9,DWORD PTR[((-32))+r11],1 + vpaddd xmm12,xmm12,xmm15 + vpslld xmm8,xmm13,5 + vpandn xmm7,xmm14,xmm11 + vpand xmm6,xmm14,xmm10 + + vmovdqa XMMWORD PTR[(112-128)+rax],xmm2 + vpaddd xmm12,xmm12,xmm2 + vpunpckldq xmm3,xmm3,xmm9 + vpsrld xmm9,xmm13,27 + vpxor xmm6,xmm6,xmm7 + vmovd xmm4,DWORD PTR[((-28))+r8] + + vpslld xmm7,xmm14,30 + vpor xmm8,xmm8,xmm9 + vmovd xmm9,DWORD PTR[((-28))+r9] + vpaddd xmm12,xmm12,xmm6 + + vpsrld xmm14,xmm14,2 + vpaddd xmm12,xmm12,xmm8 + vpshufb xmm3,xmm3,xmm5 + vpor xmm14,xmm14,xmm7 + vpinsrd xmm4,xmm4,DWORD PTR[((-28))+r10],1 + vpinsrd xmm9,xmm9,DWORD PTR[((-28))+r11],1 + vpaddd xmm11,xmm11,xmm15 + vpslld xmm8,xmm12,5 + vpandn xmm7,xmm13,xmm10 + vpand xmm6,xmm13,xmm14 + + vmovdqa XMMWORD PTR[(128-128)+rax],xmm3 + vpaddd xmm11,xmm11,xmm3 + vpunpckldq xmm4,xmm4,xmm9 + vpsrld xmm9,xmm12,27 + vpxor xmm6,xmm6,xmm7 + vmovd xmm0,DWORD PTR[((-24))+r8] + + vpslld xmm7,xmm13,30 + vpor xmm8,xmm8,xmm9 + vmovd xmm9,DWORD PTR[((-24))+r9] + vpaddd xmm11,xmm11,xmm6 + + vpsrld xmm13,xmm13,2 + vpaddd xmm11,xmm11,xmm8 + vpshufb xmm4,xmm4,xmm5 + vpor xmm13,xmm13,xmm7 + vpinsrd xmm0,xmm0,DWORD PTR[((-24))+r10],1 + vpinsrd xmm9,xmm9,DWORD PTR[((-24))+r11],1 + vpaddd xmm10,xmm10,xmm15 + vpslld xmm8,xmm11,5 + vpandn xmm7,xmm12,xmm14 + vpand xmm6,xmm12,xmm13 + + vmovdqa XMMWORD PTR[(144-128)+rax],xmm4 + vpaddd xmm10,xmm10,xmm4 + vpunpckldq xmm0,xmm0,xmm9 + vpsrld xmm9,xmm11,27 + vpxor xmm6,xmm6,xmm7 + vmovd xmm1,DWORD PTR[((-20))+r8] + + vpslld xmm7,xmm12,30 + vpor xmm8,xmm8,xmm9 + vmovd xmm9,DWORD PTR[((-20))+r9] + vpaddd xmm10,xmm10,xmm6 + + vpsrld xmm12,xmm12,2 + vpaddd xmm10,xmm10,xmm8 + vpshufb xmm0,xmm0,xmm5 + vpor xmm12,xmm12,xmm7 + vpinsrd xmm1,xmm1,DWORD PTR[((-20))+r10],1 + vpinsrd xmm9,xmm9,DWORD PTR[((-20))+r11],1 + vpaddd xmm14,xmm14,xmm15 + vpslld xmm8,xmm10,5 + vpandn xmm7,xmm11,xmm13 + vpand xmm6,xmm11,xmm12 + + vmovdqa XMMWORD PTR[(160-128)+rax],xmm0 + vpaddd xmm14,xmm14,xmm0 + vpunpckldq xmm1,xmm1,xmm9 + vpsrld xmm9,xmm10,27 + vpxor xmm6,xmm6,xmm7 + vmovd xmm2,DWORD PTR[((-16))+r8] + + vpslld xmm7,xmm11,30 + vpor xmm8,xmm8,xmm9 + vmovd xmm9,DWORD PTR[((-16))+r9] + vpaddd xmm14,xmm14,xmm6 + + vpsrld xmm11,xmm11,2 + vpaddd xmm14,xmm14,xmm8 + vpshufb xmm1,xmm1,xmm5 + vpor xmm11,xmm11,xmm7 + vpinsrd xmm2,xmm2,DWORD PTR[((-16))+r10],1 + vpinsrd xmm9,xmm9,DWORD PTR[((-16))+r11],1 + vpaddd xmm13,xmm13,xmm15 + vpslld xmm8,xmm14,5 + vpandn xmm7,xmm10,xmm12 + vpand xmm6,xmm10,xmm11 + + vmovdqa XMMWORD PTR[(176-128)+rax],xmm1 + vpaddd xmm13,xmm13,xmm1 + vpunpckldq xmm2,xmm2,xmm9 + vpsrld xmm9,xmm14,27 + vpxor xmm6,xmm6,xmm7 + vmovd xmm3,DWORD PTR[((-12))+r8] + + vpslld xmm7,xmm10,30 + vpor xmm8,xmm8,xmm9 + vmovd xmm9,DWORD PTR[((-12))+r9] + vpaddd xmm13,xmm13,xmm6 + + vpsrld xmm10,xmm10,2 + vpaddd xmm13,xmm13,xmm8 + vpshufb xmm2,xmm2,xmm5 + vpor xmm10,xmm10,xmm7 + vpinsrd xmm3,xmm3,DWORD PTR[((-12))+r10],1 + vpinsrd xmm9,xmm9,DWORD PTR[((-12))+r11],1 + vpaddd xmm12,xmm12,xmm15 + vpslld xmm8,xmm13,5 + vpandn xmm7,xmm14,xmm11 + vpand xmm6,xmm14,xmm10 + + vmovdqa XMMWORD PTR[(192-128)+rax],xmm2 + vpaddd xmm12,xmm12,xmm2 + vpunpckldq xmm3,xmm3,xmm9 + vpsrld xmm9,xmm13,27 + vpxor xmm6,xmm6,xmm7 + vmovd xmm4,DWORD PTR[((-8))+r8] + + vpslld xmm7,xmm14,30 + vpor xmm8,xmm8,xmm9 + vmovd xmm9,DWORD PTR[((-8))+r9] + vpaddd xmm12,xmm12,xmm6 + + vpsrld xmm14,xmm14,2 + vpaddd xmm12,xmm12,xmm8 + vpshufb xmm3,xmm3,xmm5 + vpor xmm14,xmm14,xmm7 + vpinsrd xmm4,xmm4,DWORD PTR[((-8))+r10],1 + vpinsrd xmm9,xmm9,DWORD PTR[((-8))+r11],1 + vpaddd xmm11,xmm11,xmm15 + vpslld xmm8,xmm12,5 + vpandn xmm7,xmm13,xmm10 + vpand xmm6,xmm13,xmm14 + + vmovdqa XMMWORD PTR[(208-128)+rax],xmm3 + vpaddd xmm11,xmm11,xmm3 + vpunpckldq xmm4,xmm4,xmm9 + vpsrld xmm9,xmm12,27 + vpxor xmm6,xmm6,xmm7 + vmovd xmm0,DWORD PTR[((-4))+r8] + + vpslld xmm7,xmm13,30 + vpor xmm8,xmm8,xmm9 + vmovd xmm9,DWORD PTR[((-4))+r9] + vpaddd xmm11,xmm11,xmm6 + + vpsrld xmm13,xmm13,2 + vpaddd xmm11,xmm11,xmm8 + vpshufb xmm4,xmm4,xmm5 + vpor xmm13,xmm13,xmm7 + vmovdqa xmm1,XMMWORD PTR[((0-128))+rax] + vpinsrd xmm0,xmm0,DWORD PTR[((-4))+r10],1 + vpinsrd xmm9,xmm9,DWORD PTR[((-4))+r11],1 + vpaddd xmm10,xmm10,xmm15 + prefetcht0 [63+r8] + vpslld xmm8,xmm11,5 + vpandn xmm7,xmm12,xmm14 + vpand xmm6,xmm12,xmm13 + + vmovdqa XMMWORD PTR[(224-128)+rax],xmm4 + vpaddd xmm10,xmm10,xmm4 + vpunpckldq xmm0,xmm0,xmm9 + vpsrld xmm9,xmm11,27 + prefetcht0 [63+r9] + vpxor xmm6,xmm6,xmm7 + + vpslld xmm7,xmm12,30 + vpor xmm8,xmm8,xmm9 + prefetcht0 [63+r10] + vpaddd xmm10,xmm10,xmm6 + + vpsrld xmm12,xmm12,2 + vpaddd xmm10,xmm10,xmm8 + prefetcht0 [63+r11] + vpshufb xmm0,xmm0,xmm5 + vpor xmm12,xmm12,xmm7 + vmovdqa xmm2,XMMWORD PTR[((16-128))+rax] + vpxor xmm1,xmm1,xmm3 + vmovdqa xmm3,XMMWORD PTR[((32-128))+rax] + + vpaddd xmm14,xmm14,xmm15 + vpslld xmm8,xmm10,5 + vpandn xmm7,xmm11,xmm13 + + vpand xmm6,xmm11,xmm12 + + vmovdqa XMMWORD PTR[(240-128)+rax],xmm0 + vpaddd xmm14,xmm14,xmm0 + vpxor xmm1,xmm1,XMMWORD PTR[((128-128))+rax] + vpsrld xmm9,xmm10,27 + vpxor xmm6,xmm6,xmm7 + vpxor xmm1,xmm1,xmm3 + + + vpslld xmm7,xmm11,30 + vpor xmm8,xmm8,xmm9 + vpaddd xmm14,xmm14,xmm6 + + vpsrld xmm5,xmm1,31 + vpaddd xmm1,xmm1,xmm1 + + vpsrld xmm11,xmm11,2 + + vpaddd xmm14,xmm14,xmm8 + vpor xmm1,xmm1,xmm5 + vpor xmm11,xmm11,xmm7 + vpxor xmm2,xmm2,xmm4 + vmovdqa xmm4,XMMWORD PTR[((48-128))+rax] + + vpaddd xmm13,xmm13,xmm15 + vpslld xmm8,xmm14,5 + vpandn xmm7,xmm10,xmm12 + + vpand xmm6,xmm10,xmm11 + + vmovdqa XMMWORD PTR[(0-128)+rax],xmm1 + vpaddd xmm13,xmm13,xmm1 + vpxor xmm2,xmm2,XMMWORD PTR[((144-128))+rax] + vpsrld xmm9,xmm14,27 + vpxor xmm6,xmm6,xmm7 + vpxor xmm2,xmm2,xmm4 + + + vpslld xmm7,xmm10,30 + vpor xmm8,xmm8,xmm9 + vpaddd xmm13,xmm13,xmm6 + + vpsrld xmm5,xmm2,31 + vpaddd xmm2,xmm2,xmm2 + + vpsrld xmm10,xmm10,2 + + vpaddd xmm13,xmm13,xmm8 + vpor xmm2,xmm2,xmm5 + vpor xmm10,xmm10,xmm7 + vpxor xmm3,xmm3,xmm0 + vmovdqa xmm0,XMMWORD PTR[((64-128))+rax] + + vpaddd xmm12,xmm12,xmm15 + vpslld xmm8,xmm13,5 + vpandn xmm7,xmm14,xmm11 + + vpand xmm6,xmm14,xmm10 + + vmovdqa XMMWORD PTR[(16-128)+rax],xmm2 + vpaddd xmm12,xmm12,xmm2 + vpxor xmm3,xmm3,XMMWORD PTR[((160-128))+rax] + vpsrld xmm9,xmm13,27 + vpxor xmm6,xmm6,xmm7 + vpxor xmm3,xmm3,xmm0 + + + vpslld xmm7,xmm14,30 + vpor xmm8,xmm8,xmm9 + vpaddd xmm12,xmm12,xmm6 + + vpsrld xmm5,xmm3,31 + vpaddd xmm3,xmm3,xmm3 + + vpsrld xmm14,xmm14,2 + + vpaddd xmm12,xmm12,xmm8 + vpor xmm3,xmm3,xmm5 + vpor xmm14,xmm14,xmm7 + vpxor xmm4,xmm4,xmm1 + vmovdqa xmm1,XMMWORD PTR[((80-128))+rax] + + vpaddd xmm11,xmm11,xmm15 + vpslld xmm8,xmm12,5 + vpandn xmm7,xmm13,xmm10 + + vpand xmm6,xmm13,xmm14 + + vmovdqa XMMWORD PTR[(32-128)+rax],xmm3 + vpaddd xmm11,xmm11,xmm3 + vpxor xmm4,xmm4,XMMWORD PTR[((176-128))+rax] + vpsrld xmm9,xmm12,27 + vpxor xmm6,xmm6,xmm7 + vpxor xmm4,xmm4,xmm1 + + + vpslld xmm7,xmm13,30 + vpor xmm8,xmm8,xmm9 + vpaddd xmm11,xmm11,xmm6 + + vpsrld xmm5,xmm4,31 + vpaddd xmm4,xmm4,xmm4 + + vpsrld xmm13,xmm13,2 + + vpaddd xmm11,xmm11,xmm8 + vpor xmm4,xmm4,xmm5 + vpor xmm13,xmm13,xmm7 + vpxor xmm0,xmm0,xmm2 + vmovdqa xmm2,XMMWORD PTR[((96-128))+rax] + + vpaddd xmm10,xmm10,xmm15 + vpslld xmm8,xmm11,5 + vpandn xmm7,xmm12,xmm14 + + vpand xmm6,xmm12,xmm13 + + vmovdqa XMMWORD PTR[(48-128)+rax],xmm4 + vpaddd xmm10,xmm10,xmm4 + vpxor xmm0,xmm0,XMMWORD PTR[((192-128))+rax] + vpsrld xmm9,xmm11,27 + vpxor xmm6,xmm6,xmm7 + vpxor xmm0,xmm0,xmm2 + + + vpslld xmm7,xmm12,30 + vpor xmm8,xmm8,xmm9 + vpaddd xmm10,xmm10,xmm6 + + vpsrld xmm5,xmm0,31 + vpaddd xmm0,xmm0,xmm0 + + vpsrld xmm12,xmm12,2 + + vpaddd xmm10,xmm10,xmm8 + vpor xmm0,xmm0,xmm5 + vpor xmm12,xmm12,xmm7 + vmovdqa xmm15,XMMWORD PTR[rbp] + vpxor xmm1,xmm1,xmm3 + vmovdqa xmm3,XMMWORD PTR[((112-128))+rax] + + vpslld xmm8,xmm10,5 + vpaddd xmm14,xmm14,xmm15 + vpxor xmm6,xmm13,xmm11 + vmovdqa XMMWORD PTR[(64-128)+rax],xmm0 + vpaddd xmm14,xmm14,xmm0 + vpxor xmm1,xmm1,XMMWORD PTR[((208-128))+rax] + vpsrld xmm9,xmm10,27 + vpxor xmm6,xmm6,xmm12 + vpxor xmm1,xmm1,xmm3 + + vpslld xmm7,xmm11,30 + vpor xmm8,xmm8,xmm9 + vpaddd xmm14,xmm14,xmm6 + vpsrld xmm5,xmm1,31 + vpaddd xmm1,xmm1,xmm1 + + vpsrld xmm11,xmm11,2 + vpaddd xmm14,xmm14,xmm8 + vpor xmm1,xmm1,xmm5 + vpor xmm11,xmm11,xmm7 + vpxor xmm2,xmm2,xmm4 + vmovdqa xmm4,XMMWORD PTR[((128-128))+rax] + + vpslld xmm8,xmm14,5 + vpaddd xmm13,xmm13,xmm15 + vpxor xmm6,xmm12,xmm10 + vmovdqa XMMWORD PTR[(80-128)+rax],xmm1 + vpaddd xmm13,xmm13,xmm1 + vpxor xmm2,xmm2,XMMWORD PTR[((224-128))+rax] + vpsrld xmm9,xmm14,27 + vpxor xmm6,xmm6,xmm11 + vpxor xmm2,xmm2,xmm4 + + vpslld xmm7,xmm10,30 + vpor xmm8,xmm8,xmm9 + vpaddd xmm13,xmm13,xmm6 + vpsrld xmm5,xmm2,31 + vpaddd xmm2,xmm2,xmm2 + + vpsrld xmm10,xmm10,2 + vpaddd xmm13,xmm13,xmm8 + vpor xmm2,xmm2,xmm5 + vpor xmm10,xmm10,xmm7 + vpxor xmm3,xmm3,xmm0 + vmovdqa xmm0,XMMWORD PTR[((144-128))+rax] + + vpslld xmm8,xmm13,5 + vpaddd xmm12,xmm12,xmm15 + vpxor xmm6,xmm11,xmm14 + vmovdqa XMMWORD PTR[(96-128)+rax],xmm2 + vpaddd xmm12,xmm12,xmm2 + vpxor xmm3,xmm3,XMMWORD PTR[((240-128))+rax] + vpsrld xmm9,xmm13,27 + vpxor xmm6,xmm6,xmm10 + vpxor xmm3,xmm3,xmm0 + + vpslld xmm7,xmm14,30 + vpor xmm8,xmm8,xmm9 + vpaddd xmm12,xmm12,xmm6 + vpsrld xmm5,xmm3,31 + vpaddd xmm3,xmm3,xmm3 + + vpsrld xmm14,xmm14,2 + vpaddd xmm12,xmm12,xmm8 + vpor xmm3,xmm3,xmm5 + vpor xmm14,xmm14,xmm7 + vpxor xmm4,xmm4,xmm1 + vmovdqa xmm1,XMMWORD PTR[((160-128))+rax] + + vpslld xmm8,xmm12,5 + vpaddd xmm11,xmm11,xmm15 + vpxor xmm6,xmm10,xmm13 + vmovdqa XMMWORD PTR[(112-128)+rax],xmm3 + vpaddd xmm11,xmm11,xmm3 + vpxor xmm4,xmm4,XMMWORD PTR[((0-128))+rax] + vpsrld xmm9,xmm12,27 + vpxor xmm6,xmm6,xmm14 + vpxor xmm4,xmm4,xmm1 + + vpslld xmm7,xmm13,30 + vpor xmm8,xmm8,xmm9 + vpaddd xmm11,xmm11,xmm6 + vpsrld xmm5,xmm4,31 + vpaddd xmm4,xmm4,xmm4 + + vpsrld xmm13,xmm13,2 + vpaddd xmm11,xmm11,xmm8 + vpor xmm4,xmm4,xmm5 + vpor xmm13,xmm13,xmm7 + vpxor xmm0,xmm0,xmm2 + vmovdqa xmm2,XMMWORD PTR[((176-128))+rax] + + vpslld xmm8,xmm11,5 + vpaddd xmm10,xmm10,xmm15 + vpxor xmm6,xmm14,xmm12 + vmovdqa XMMWORD PTR[(128-128)+rax],xmm4 + vpaddd xmm10,xmm10,xmm4 + vpxor xmm0,xmm0,XMMWORD PTR[((16-128))+rax] + vpsrld xmm9,xmm11,27 + vpxor xmm6,xmm6,xmm13 + vpxor xmm0,xmm0,xmm2 + + vpslld xmm7,xmm12,30 + vpor xmm8,xmm8,xmm9 + vpaddd xmm10,xmm10,xmm6 + vpsrld xmm5,xmm0,31 + vpaddd xmm0,xmm0,xmm0 + + vpsrld xmm12,xmm12,2 + vpaddd xmm10,xmm10,xmm8 + vpor xmm0,xmm0,xmm5 + vpor xmm12,xmm12,xmm7 + vpxor xmm1,xmm1,xmm3 + vmovdqa xmm3,XMMWORD PTR[((192-128))+rax] + + vpslld xmm8,xmm10,5 + vpaddd xmm14,xmm14,xmm15 + vpxor xmm6,xmm13,xmm11 + vmovdqa XMMWORD PTR[(144-128)+rax],xmm0 + vpaddd xmm14,xmm14,xmm0 + vpxor xmm1,xmm1,XMMWORD PTR[((32-128))+rax] + vpsrld xmm9,xmm10,27 + vpxor xmm6,xmm6,xmm12 + vpxor xmm1,xmm1,xmm3 + + vpslld xmm7,xmm11,30 + vpor xmm8,xmm8,xmm9 + vpaddd xmm14,xmm14,xmm6 + vpsrld xmm5,xmm1,31 + vpaddd xmm1,xmm1,xmm1 + + vpsrld xmm11,xmm11,2 + vpaddd xmm14,xmm14,xmm8 + vpor xmm1,xmm1,xmm5 + vpor xmm11,xmm11,xmm7 + vpxor xmm2,xmm2,xmm4 + vmovdqa xmm4,XMMWORD PTR[((208-128))+rax] + + vpslld xmm8,xmm14,5 + vpaddd xmm13,xmm13,xmm15 + vpxor xmm6,xmm12,xmm10 + vmovdqa XMMWORD PTR[(160-128)+rax],xmm1 + vpaddd xmm13,xmm13,xmm1 + vpxor xmm2,xmm2,XMMWORD PTR[((48-128))+rax] + vpsrld xmm9,xmm14,27 + vpxor xmm6,xmm6,xmm11 + vpxor xmm2,xmm2,xmm4 + + vpslld xmm7,xmm10,30 + vpor xmm8,xmm8,xmm9 + vpaddd xmm13,xmm13,xmm6 + vpsrld xmm5,xmm2,31 + vpaddd xmm2,xmm2,xmm2 + + vpsrld xmm10,xmm10,2 + vpaddd xmm13,xmm13,xmm8 + vpor xmm2,xmm2,xmm5 + vpor xmm10,xmm10,xmm7 + vpxor xmm3,xmm3,xmm0 + vmovdqa xmm0,XMMWORD PTR[((224-128))+rax] + + vpslld xmm8,xmm13,5 + vpaddd xmm12,xmm12,xmm15 + vpxor xmm6,xmm11,xmm14 + vmovdqa XMMWORD PTR[(176-128)+rax],xmm2 + vpaddd xmm12,xmm12,xmm2 + vpxor xmm3,xmm3,XMMWORD PTR[((64-128))+rax] + vpsrld xmm9,xmm13,27 + vpxor xmm6,xmm6,xmm10 + vpxor xmm3,xmm3,xmm0 + + vpslld xmm7,xmm14,30 + vpor xmm8,xmm8,xmm9 + vpaddd xmm12,xmm12,xmm6 + vpsrld xmm5,xmm3,31 + vpaddd xmm3,xmm3,xmm3 + + vpsrld xmm14,xmm14,2 + vpaddd xmm12,xmm12,xmm8 + vpor xmm3,xmm3,xmm5 + vpor xmm14,xmm14,xmm7 + vpxor xmm4,xmm4,xmm1 + vmovdqa xmm1,XMMWORD PTR[((240-128))+rax] + + vpslld xmm8,xmm12,5 + vpaddd xmm11,xmm11,xmm15 + vpxor xmm6,xmm10,xmm13 + vmovdqa XMMWORD PTR[(192-128)+rax],xmm3 + vpaddd xmm11,xmm11,xmm3 + vpxor xmm4,xmm4,XMMWORD PTR[((80-128))+rax] + vpsrld xmm9,xmm12,27 + vpxor xmm6,xmm6,xmm14 + vpxor xmm4,xmm4,xmm1 + + vpslld xmm7,xmm13,30 + vpor xmm8,xmm8,xmm9 + vpaddd xmm11,xmm11,xmm6 + vpsrld xmm5,xmm4,31 + vpaddd xmm4,xmm4,xmm4 + + vpsrld xmm13,xmm13,2 + vpaddd xmm11,xmm11,xmm8 + vpor xmm4,xmm4,xmm5 + vpor xmm13,xmm13,xmm7 + vpxor xmm0,xmm0,xmm2 + vmovdqa xmm2,XMMWORD PTR[((0-128))+rax] + + vpslld xmm8,xmm11,5 + vpaddd xmm10,xmm10,xmm15 + vpxor xmm6,xmm14,xmm12 + vmovdqa XMMWORD PTR[(208-128)+rax],xmm4 + vpaddd xmm10,xmm10,xmm4 + vpxor xmm0,xmm0,XMMWORD PTR[((96-128))+rax] + vpsrld xmm9,xmm11,27 + vpxor xmm6,xmm6,xmm13 + vpxor xmm0,xmm0,xmm2 + + vpslld xmm7,xmm12,30 + vpor xmm8,xmm8,xmm9 + vpaddd xmm10,xmm10,xmm6 + vpsrld xmm5,xmm0,31 + vpaddd xmm0,xmm0,xmm0 + + vpsrld xmm12,xmm12,2 + vpaddd xmm10,xmm10,xmm8 + vpor xmm0,xmm0,xmm5 + vpor xmm12,xmm12,xmm7 + vpxor xmm1,xmm1,xmm3 + vmovdqa xmm3,XMMWORD PTR[((16-128))+rax] + + vpslld xmm8,xmm10,5 + vpaddd xmm14,xmm14,xmm15 + vpxor xmm6,xmm13,xmm11 + vmovdqa XMMWORD PTR[(224-128)+rax],xmm0 + vpaddd xmm14,xmm14,xmm0 + vpxor xmm1,xmm1,XMMWORD PTR[((112-128))+rax] + vpsrld xmm9,xmm10,27 + vpxor xmm6,xmm6,xmm12 + vpxor xmm1,xmm1,xmm3 + + vpslld xmm7,xmm11,30 + vpor xmm8,xmm8,xmm9 + vpaddd xmm14,xmm14,xmm6 + vpsrld xmm5,xmm1,31 + vpaddd xmm1,xmm1,xmm1 + + vpsrld xmm11,xmm11,2 + vpaddd xmm14,xmm14,xmm8 + vpor xmm1,xmm1,xmm5 + vpor xmm11,xmm11,xmm7 + vpxor xmm2,xmm2,xmm4 + vmovdqa xmm4,XMMWORD PTR[((32-128))+rax] + + vpslld xmm8,xmm14,5 + vpaddd xmm13,xmm13,xmm15 + vpxor xmm6,xmm12,xmm10 + vmovdqa XMMWORD PTR[(240-128)+rax],xmm1 + vpaddd xmm13,xmm13,xmm1 + vpxor xmm2,xmm2,XMMWORD PTR[((128-128))+rax] + vpsrld xmm9,xmm14,27 + vpxor xmm6,xmm6,xmm11 + vpxor xmm2,xmm2,xmm4 + + vpslld xmm7,xmm10,30 + vpor xmm8,xmm8,xmm9 + vpaddd xmm13,xmm13,xmm6 + vpsrld xmm5,xmm2,31 + vpaddd xmm2,xmm2,xmm2 + + vpsrld xmm10,xmm10,2 + vpaddd xmm13,xmm13,xmm8 + vpor xmm2,xmm2,xmm5 + vpor xmm10,xmm10,xmm7 + vpxor xmm3,xmm3,xmm0 + vmovdqa xmm0,XMMWORD PTR[((48-128))+rax] + + vpslld xmm8,xmm13,5 + vpaddd xmm12,xmm12,xmm15 + vpxor xmm6,xmm11,xmm14 + vmovdqa XMMWORD PTR[(0-128)+rax],xmm2 + vpaddd xmm12,xmm12,xmm2 + vpxor xmm3,xmm3,XMMWORD PTR[((144-128))+rax] + vpsrld xmm9,xmm13,27 + vpxor xmm6,xmm6,xmm10 + vpxor xmm3,xmm3,xmm0 + + vpslld xmm7,xmm14,30 + vpor xmm8,xmm8,xmm9 + vpaddd xmm12,xmm12,xmm6 + vpsrld xmm5,xmm3,31 + vpaddd xmm3,xmm3,xmm3 + + vpsrld xmm14,xmm14,2 + vpaddd xmm12,xmm12,xmm8 + vpor xmm3,xmm3,xmm5 + vpor xmm14,xmm14,xmm7 + vpxor xmm4,xmm4,xmm1 + vmovdqa xmm1,XMMWORD PTR[((64-128))+rax] + + vpslld xmm8,xmm12,5 + vpaddd xmm11,xmm11,xmm15 + vpxor xmm6,xmm10,xmm13 + vmovdqa XMMWORD PTR[(16-128)+rax],xmm3 + vpaddd xmm11,xmm11,xmm3 + vpxor xmm4,xmm4,XMMWORD PTR[((160-128))+rax] + vpsrld xmm9,xmm12,27 + vpxor xmm6,xmm6,xmm14 + vpxor xmm4,xmm4,xmm1 + + vpslld xmm7,xmm13,30 + vpor xmm8,xmm8,xmm9 + vpaddd xmm11,xmm11,xmm6 + vpsrld xmm5,xmm4,31 + vpaddd xmm4,xmm4,xmm4 + + vpsrld xmm13,xmm13,2 + vpaddd xmm11,xmm11,xmm8 + vpor xmm4,xmm4,xmm5 + vpor xmm13,xmm13,xmm7 + vpxor xmm0,xmm0,xmm2 + vmovdqa xmm2,XMMWORD PTR[((80-128))+rax] + + vpslld xmm8,xmm11,5 + vpaddd xmm10,xmm10,xmm15 + vpxor xmm6,xmm14,xmm12 + vmovdqa XMMWORD PTR[(32-128)+rax],xmm4 + vpaddd xmm10,xmm10,xmm4 + vpxor xmm0,xmm0,XMMWORD PTR[((176-128))+rax] + vpsrld xmm9,xmm11,27 + vpxor xmm6,xmm6,xmm13 + vpxor xmm0,xmm0,xmm2 + + vpslld xmm7,xmm12,30 + vpor xmm8,xmm8,xmm9 + vpaddd xmm10,xmm10,xmm6 + vpsrld xmm5,xmm0,31 + vpaddd xmm0,xmm0,xmm0 + + vpsrld xmm12,xmm12,2 + vpaddd xmm10,xmm10,xmm8 + vpor xmm0,xmm0,xmm5 + vpor xmm12,xmm12,xmm7 + vpxor xmm1,xmm1,xmm3 + vmovdqa xmm3,XMMWORD PTR[((96-128))+rax] + + vpslld xmm8,xmm10,5 + vpaddd xmm14,xmm14,xmm15 + vpxor xmm6,xmm13,xmm11 + vmovdqa XMMWORD PTR[(48-128)+rax],xmm0 + vpaddd xmm14,xmm14,xmm0 + vpxor xmm1,xmm1,XMMWORD PTR[((192-128))+rax] + vpsrld xmm9,xmm10,27 + vpxor xmm6,xmm6,xmm12 + vpxor xmm1,xmm1,xmm3 + + vpslld xmm7,xmm11,30 + vpor xmm8,xmm8,xmm9 + vpaddd xmm14,xmm14,xmm6 + vpsrld xmm5,xmm1,31 + vpaddd xmm1,xmm1,xmm1 + + vpsrld xmm11,xmm11,2 + vpaddd xmm14,xmm14,xmm8 + vpor xmm1,xmm1,xmm5 + vpor xmm11,xmm11,xmm7 + vpxor xmm2,xmm2,xmm4 + vmovdqa xmm4,XMMWORD PTR[((112-128))+rax] + + vpslld xmm8,xmm14,5 + vpaddd xmm13,xmm13,xmm15 + vpxor xmm6,xmm12,xmm10 + vmovdqa XMMWORD PTR[(64-128)+rax],xmm1 + vpaddd xmm13,xmm13,xmm1 + vpxor xmm2,xmm2,XMMWORD PTR[((208-128))+rax] + vpsrld xmm9,xmm14,27 + vpxor xmm6,xmm6,xmm11 + vpxor xmm2,xmm2,xmm4 + + vpslld xmm7,xmm10,30 + vpor xmm8,xmm8,xmm9 + vpaddd xmm13,xmm13,xmm6 + vpsrld xmm5,xmm2,31 + vpaddd xmm2,xmm2,xmm2 + + vpsrld xmm10,xmm10,2 + vpaddd xmm13,xmm13,xmm8 + vpor xmm2,xmm2,xmm5 + vpor xmm10,xmm10,xmm7 + vpxor xmm3,xmm3,xmm0 + vmovdqa xmm0,XMMWORD PTR[((128-128))+rax] + + vpslld xmm8,xmm13,5 + vpaddd xmm12,xmm12,xmm15 + vpxor xmm6,xmm11,xmm14 + vmovdqa XMMWORD PTR[(80-128)+rax],xmm2 + vpaddd xmm12,xmm12,xmm2 + vpxor xmm3,xmm3,XMMWORD PTR[((224-128))+rax] + vpsrld xmm9,xmm13,27 + vpxor xmm6,xmm6,xmm10 + vpxor xmm3,xmm3,xmm0 + + vpslld xmm7,xmm14,30 + vpor xmm8,xmm8,xmm9 + vpaddd xmm12,xmm12,xmm6 + vpsrld xmm5,xmm3,31 + vpaddd xmm3,xmm3,xmm3 + + vpsrld xmm14,xmm14,2 + vpaddd xmm12,xmm12,xmm8 + vpor xmm3,xmm3,xmm5 + vpor xmm14,xmm14,xmm7 + vpxor xmm4,xmm4,xmm1 + vmovdqa xmm1,XMMWORD PTR[((144-128))+rax] + + vpslld xmm8,xmm12,5 + vpaddd xmm11,xmm11,xmm15 + vpxor xmm6,xmm10,xmm13 + vmovdqa XMMWORD PTR[(96-128)+rax],xmm3 + vpaddd xmm11,xmm11,xmm3 + vpxor xmm4,xmm4,XMMWORD PTR[((240-128))+rax] + vpsrld xmm9,xmm12,27 + vpxor xmm6,xmm6,xmm14 + vpxor xmm4,xmm4,xmm1 + + vpslld xmm7,xmm13,30 + vpor xmm8,xmm8,xmm9 + vpaddd xmm11,xmm11,xmm6 + vpsrld xmm5,xmm4,31 + vpaddd xmm4,xmm4,xmm4 + + vpsrld xmm13,xmm13,2 + vpaddd xmm11,xmm11,xmm8 + vpor xmm4,xmm4,xmm5 + vpor xmm13,xmm13,xmm7 + vpxor xmm0,xmm0,xmm2 + vmovdqa xmm2,XMMWORD PTR[((160-128))+rax] + + vpslld xmm8,xmm11,5 + vpaddd xmm10,xmm10,xmm15 + vpxor xmm6,xmm14,xmm12 + vmovdqa XMMWORD PTR[(112-128)+rax],xmm4 + vpaddd xmm10,xmm10,xmm4 + vpxor xmm0,xmm0,XMMWORD PTR[((0-128))+rax] + vpsrld xmm9,xmm11,27 + vpxor xmm6,xmm6,xmm13 + vpxor xmm0,xmm0,xmm2 + + vpslld xmm7,xmm12,30 + vpor xmm8,xmm8,xmm9 + vpaddd xmm10,xmm10,xmm6 + vpsrld xmm5,xmm0,31 + vpaddd xmm0,xmm0,xmm0 + + vpsrld xmm12,xmm12,2 + vpaddd xmm10,xmm10,xmm8 + vpor xmm0,xmm0,xmm5 + vpor xmm12,xmm12,xmm7 + vmovdqa xmm15,XMMWORD PTR[32+rbp] + vpxor xmm1,xmm1,xmm3 + vmovdqa xmm3,XMMWORD PTR[((176-128))+rax] + + vpaddd xmm14,xmm14,xmm15 + vpslld xmm8,xmm10,5 + vpand xmm7,xmm13,xmm12 + vpxor xmm1,xmm1,XMMWORD PTR[((16-128))+rax] + + vpaddd xmm14,xmm14,xmm7 + vpsrld xmm9,xmm10,27 + vpxor xmm6,xmm13,xmm12 + vpxor xmm1,xmm1,xmm3 + + vmovdqu XMMWORD PTR[(128-128)+rax],xmm0 + vpaddd xmm14,xmm14,xmm0 + vpor xmm8,xmm8,xmm9 + vpsrld xmm5,xmm1,31 + vpand xmm6,xmm6,xmm11 + vpaddd xmm1,xmm1,xmm1 + + vpslld xmm7,xmm11,30 + vpaddd xmm14,xmm14,xmm6 + + vpsrld xmm11,xmm11,2 + vpaddd xmm14,xmm14,xmm8 + vpor xmm1,xmm1,xmm5 + vpor xmm11,xmm11,xmm7 + vpxor xmm2,xmm2,xmm4 + vmovdqa xmm4,XMMWORD PTR[((192-128))+rax] + + vpaddd xmm13,xmm13,xmm15 + vpslld xmm8,xmm14,5 + vpand xmm7,xmm12,xmm11 + vpxor xmm2,xmm2,XMMWORD PTR[((32-128))+rax] + + vpaddd xmm13,xmm13,xmm7 + vpsrld xmm9,xmm14,27 + vpxor xmm6,xmm12,xmm11 + vpxor xmm2,xmm2,xmm4 + + vmovdqu XMMWORD PTR[(144-128)+rax],xmm1 + vpaddd xmm13,xmm13,xmm1 + vpor xmm8,xmm8,xmm9 + vpsrld xmm5,xmm2,31 + vpand xmm6,xmm6,xmm10 + vpaddd xmm2,xmm2,xmm2 + + vpslld xmm7,xmm10,30 + vpaddd xmm13,xmm13,xmm6 + + vpsrld xmm10,xmm10,2 + vpaddd xmm13,xmm13,xmm8 + vpor xmm2,xmm2,xmm5 + vpor xmm10,xmm10,xmm7 + vpxor xmm3,xmm3,xmm0 + vmovdqa xmm0,XMMWORD PTR[((208-128))+rax] + + vpaddd xmm12,xmm12,xmm15 + vpslld xmm8,xmm13,5 + vpand xmm7,xmm11,xmm10 + vpxor xmm3,xmm3,XMMWORD PTR[((48-128))+rax] + + vpaddd xmm12,xmm12,xmm7 + vpsrld xmm9,xmm13,27 + vpxor xmm6,xmm11,xmm10 + vpxor xmm3,xmm3,xmm0 + + vmovdqu XMMWORD PTR[(160-128)+rax],xmm2 + vpaddd xmm12,xmm12,xmm2 + vpor xmm8,xmm8,xmm9 + vpsrld xmm5,xmm3,31 + vpand xmm6,xmm6,xmm14 + vpaddd xmm3,xmm3,xmm3 + + vpslld xmm7,xmm14,30 + vpaddd xmm12,xmm12,xmm6 + + vpsrld xmm14,xmm14,2 + vpaddd xmm12,xmm12,xmm8 + vpor xmm3,xmm3,xmm5 + vpor xmm14,xmm14,xmm7 + vpxor xmm4,xmm4,xmm1 + vmovdqa xmm1,XMMWORD PTR[((224-128))+rax] + + vpaddd xmm11,xmm11,xmm15 + vpslld xmm8,xmm12,5 + vpand xmm7,xmm10,xmm14 + vpxor xmm4,xmm4,XMMWORD PTR[((64-128))+rax] + + vpaddd xmm11,xmm11,xmm7 + vpsrld xmm9,xmm12,27 + vpxor xmm6,xmm10,xmm14 + vpxor xmm4,xmm4,xmm1 + + vmovdqu XMMWORD PTR[(176-128)+rax],xmm3 + vpaddd xmm11,xmm11,xmm3 + vpor xmm8,xmm8,xmm9 + vpsrld xmm5,xmm4,31 + vpand xmm6,xmm6,xmm13 + vpaddd xmm4,xmm4,xmm4 + + vpslld xmm7,xmm13,30 + vpaddd xmm11,xmm11,xmm6 + + vpsrld xmm13,xmm13,2 + vpaddd xmm11,xmm11,xmm8 + vpor xmm4,xmm4,xmm5 + vpor xmm13,xmm13,xmm7 + vpxor xmm0,xmm0,xmm2 + vmovdqa xmm2,XMMWORD PTR[((240-128))+rax] + + vpaddd xmm10,xmm10,xmm15 + vpslld xmm8,xmm11,5 + vpand xmm7,xmm14,xmm13 + vpxor xmm0,xmm0,XMMWORD PTR[((80-128))+rax] + + vpaddd xmm10,xmm10,xmm7 + vpsrld xmm9,xmm11,27 + vpxor xmm6,xmm14,xmm13 + vpxor xmm0,xmm0,xmm2 + + vmovdqu XMMWORD PTR[(192-128)+rax],xmm4 + vpaddd xmm10,xmm10,xmm4 + vpor xmm8,xmm8,xmm9 + vpsrld xmm5,xmm0,31 + vpand xmm6,xmm6,xmm12 + vpaddd xmm0,xmm0,xmm0 + + vpslld xmm7,xmm12,30 + vpaddd xmm10,xmm10,xmm6 + + vpsrld xmm12,xmm12,2 + vpaddd xmm10,xmm10,xmm8 + vpor xmm0,xmm0,xmm5 + vpor xmm12,xmm12,xmm7 + vpxor xmm1,xmm1,xmm3 + vmovdqa xmm3,XMMWORD PTR[((0-128))+rax] + + vpaddd xmm14,xmm14,xmm15 + vpslld xmm8,xmm10,5 + vpand xmm7,xmm13,xmm12 + vpxor xmm1,xmm1,XMMWORD PTR[((96-128))+rax] + + vpaddd xmm14,xmm14,xmm7 + vpsrld xmm9,xmm10,27 + vpxor xmm6,xmm13,xmm12 + vpxor xmm1,xmm1,xmm3 + + vmovdqu XMMWORD PTR[(208-128)+rax],xmm0 + vpaddd xmm14,xmm14,xmm0 + vpor xmm8,xmm8,xmm9 + vpsrld xmm5,xmm1,31 + vpand xmm6,xmm6,xmm11 + vpaddd xmm1,xmm1,xmm1 + + vpslld xmm7,xmm11,30 + vpaddd xmm14,xmm14,xmm6 + + vpsrld xmm11,xmm11,2 + vpaddd xmm14,xmm14,xmm8 + vpor xmm1,xmm1,xmm5 + vpor xmm11,xmm11,xmm7 + vpxor xmm2,xmm2,xmm4 + vmovdqa xmm4,XMMWORD PTR[((16-128))+rax] + + vpaddd xmm13,xmm13,xmm15 + vpslld xmm8,xmm14,5 + vpand xmm7,xmm12,xmm11 + vpxor xmm2,xmm2,XMMWORD PTR[((112-128))+rax] + + vpaddd xmm13,xmm13,xmm7 + vpsrld xmm9,xmm14,27 + vpxor xmm6,xmm12,xmm11 + vpxor xmm2,xmm2,xmm4 + + vmovdqu XMMWORD PTR[(224-128)+rax],xmm1 + vpaddd xmm13,xmm13,xmm1 + vpor xmm8,xmm8,xmm9 + vpsrld xmm5,xmm2,31 + vpand xmm6,xmm6,xmm10 + vpaddd xmm2,xmm2,xmm2 + + vpslld xmm7,xmm10,30 + vpaddd xmm13,xmm13,xmm6 + + vpsrld xmm10,xmm10,2 + vpaddd xmm13,xmm13,xmm8 + vpor xmm2,xmm2,xmm5 + vpor xmm10,xmm10,xmm7 + vpxor xmm3,xmm3,xmm0 + vmovdqa xmm0,XMMWORD PTR[((32-128))+rax] + + vpaddd xmm12,xmm12,xmm15 + vpslld xmm8,xmm13,5 + vpand xmm7,xmm11,xmm10 + vpxor xmm3,xmm3,XMMWORD PTR[((128-128))+rax] + + vpaddd xmm12,xmm12,xmm7 + vpsrld xmm9,xmm13,27 + vpxor xmm6,xmm11,xmm10 + vpxor xmm3,xmm3,xmm0 + + vmovdqu XMMWORD PTR[(240-128)+rax],xmm2 + vpaddd xmm12,xmm12,xmm2 + vpor xmm8,xmm8,xmm9 + vpsrld xmm5,xmm3,31 + vpand xmm6,xmm6,xmm14 + vpaddd xmm3,xmm3,xmm3 + + vpslld xmm7,xmm14,30 + vpaddd xmm12,xmm12,xmm6 + + vpsrld xmm14,xmm14,2 + vpaddd xmm12,xmm12,xmm8 + vpor xmm3,xmm3,xmm5 + vpor xmm14,xmm14,xmm7 + vpxor xmm4,xmm4,xmm1 + vmovdqa xmm1,XMMWORD PTR[((48-128))+rax] + + vpaddd xmm11,xmm11,xmm15 + vpslld xmm8,xmm12,5 + vpand xmm7,xmm10,xmm14 + vpxor xmm4,xmm4,XMMWORD PTR[((144-128))+rax] + + vpaddd xmm11,xmm11,xmm7 + vpsrld xmm9,xmm12,27 + vpxor xmm6,xmm10,xmm14 + vpxor xmm4,xmm4,xmm1 + + vmovdqu XMMWORD PTR[(0-128)+rax],xmm3 + vpaddd xmm11,xmm11,xmm3 + vpor xmm8,xmm8,xmm9 + vpsrld xmm5,xmm4,31 + vpand xmm6,xmm6,xmm13 + vpaddd xmm4,xmm4,xmm4 + + vpslld xmm7,xmm13,30 + vpaddd xmm11,xmm11,xmm6 + + vpsrld xmm13,xmm13,2 + vpaddd xmm11,xmm11,xmm8 + vpor xmm4,xmm4,xmm5 + vpor xmm13,xmm13,xmm7 + vpxor xmm0,xmm0,xmm2 + vmovdqa xmm2,XMMWORD PTR[((64-128))+rax] + + vpaddd xmm10,xmm10,xmm15 + vpslld xmm8,xmm11,5 + vpand xmm7,xmm14,xmm13 + vpxor xmm0,xmm0,XMMWORD PTR[((160-128))+rax] + + vpaddd xmm10,xmm10,xmm7 + vpsrld xmm9,xmm11,27 + vpxor xmm6,xmm14,xmm13 + vpxor xmm0,xmm0,xmm2 + + vmovdqu XMMWORD PTR[(16-128)+rax],xmm4 + vpaddd xmm10,xmm10,xmm4 + vpor xmm8,xmm8,xmm9 + vpsrld xmm5,xmm0,31 + vpand xmm6,xmm6,xmm12 + vpaddd xmm0,xmm0,xmm0 + + vpslld xmm7,xmm12,30 + vpaddd xmm10,xmm10,xmm6 + + vpsrld xmm12,xmm12,2 + vpaddd xmm10,xmm10,xmm8 + vpor xmm0,xmm0,xmm5 + vpor xmm12,xmm12,xmm7 + vpxor xmm1,xmm1,xmm3 + vmovdqa xmm3,XMMWORD PTR[((80-128))+rax] + + vpaddd xmm14,xmm14,xmm15 + vpslld xmm8,xmm10,5 + vpand xmm7,xmm13,xmm12 + vpxor xmm1,xmm1,XMMWORD PTR[((176-128))+rax] + + vpaddd xmm14,xmm14,xmm7 + vpsrld xmm9,xmm10,27 + vpxor xmm6,xmm13,xmm12 + vpxor xmm1,xmm1,xmm3 + + vmovdqu XMMWORD PTR[(32-128)+rax],xmm0 + vpaddd xmm14,xmm14,xmm0 + vpor xmm8,xmm8,xmm9 + vpsrld xmm5,xmm1,31 + vpand xmm6,xmm6,xmm11 + vpaddd xmm1,xmm1,xmm1 + + vpslld xmm7,xmm11,30 + vpaddd xmm14,xmm14,xmm6 + + vpsrld xmm11,xmm11,2 + vpaddd xmm14,xmm14,xmm8 + vpor xmm1,xmm1,xmm5 + vpor xmm11,xmm11,xmm7 + vpxor xmm2,xmm2,xmm4 + vmovdqa xmm4,XMMWORD PTR[((96-128))+rax] + + vpaddd xmm13,xmm13,xmm15 + vpslld xmm8,xmm14,5 + vpand xmm7,xmm12,xmm11 + vpxor xmm2,xmm2,XMMWORD PTR[((192-128))+rax] + + vpaddd xmm13,xmm13,xmm7 + vpsrld xmm9,xmm14,27 + vpxor xmm6,xmm12,xmm11 + vpxor xmm2,xmm2,xmm4 + + vmovdqu XMMWORD PTR[(48-128)+rax],xmm1 + vpaddd xmm13,xmm13,xmm1 + vpor xmm8,xmm8,xmm9 + vpsrld xmm5,xmm2,31 + vpand xmm6,xmm6,xmm10 + vpaddd xmm2,xmm2,xmm2 + + vpslld xmm7,xmm10,30 + vpaddd xmm13,xmm13,xmm6 + + vpsrld xmm10,xmm10,2 + vpaddd xmm13,xmm13,xmm8 + vpor xmm2,xmm2,xmm5 + vpor xmm10,xmm10,xmm7 + vpxor xmm3,xmm3,xmm0 + vmovdqa xmm0,XMMWORD PTR[((112-128))+rax] + + vpaddd xmm12,xmm12,xmm15 + vpslld xmm8,xmm13,5 + vpand xmm7,xmm11,xmm10 + vpxor xmm3,xmm3,XMMWORD PTR[((208-128))+rax] + + vpaddd xmm12,xmm12,xmm7 + vpsrld xmm9,xmm13,27 + vpxor xmm6,xmm11,xmm10 + vpxor xmm3,xmm3,xmm0 + + vmovdqu XMMWORD PTR[(64-128)+rax],xmm2 + vpaddd xmm12,xmm12,xmm2 + vpor xmm8,xmm8,xmm9 + vpsrld xmm5,xmm3,31 + vpand xmm6,xmm6,xmm14 + vpaddd xmm3,xmm3,xmm3 + + vpslld xmm7,xmm14,30 + vpaddd xmm12,xmm12,xmm6 + + vpsrld xmm14,xmm14,2 + vpaddd xmm12,xmm12,xmm8 + vpor xmm3,xmm3,xmm5 + vpor xmm14,xmm14,xmm7 + vpxor xmm4,xmm4,xmm1 + vmovdqa xmm1,XMMWORD PTR[((128-128))+rax] + + vpaddd xmm11,xmm11,xmm15 + vpslld xmm8,xmm12,5 + vpand xmm7,xmm10,xmm14 + vpxor xmm4,xmm4,XMMWORD PTR[((224-128))+rax] + + vpaddd xmm11,xmm11,xmm7 + vpsrld xmm9,xmm12,27 + vpxor xmm6,xmm10,xmm14 + vpxor xmm4,xmm4,xmm1 + + vmovdqu XMMWORD PTR[(80-128)+rax],xmm3 + vpaddd xmm11,xmm11,xmm3 + vpor xmm8,xmm8,xmm9 + vpsrld xmm5,xmm4,31 + vpand xmm6,xmm6,xmm13 + vpaddd xmm4,xmm4,xmm4 + + vpslld xmm7,xmm13,30 + vpaddd xmm11,xmm11,xmm6 + + vpsrld xmm13,xmm13,2 + vpaddd xmm11,xmm11,xmm8 + vpor xmm4,xmm4,xmm5 + vpor xmm13,xmm13,xmm7 + vpxor xmm0,xmm0,xmm2 + vmovdqa xmm2,XMMWORD PTR[((144-128))+rax] + + vpaddd xmm10,xmm10,xmm15 + vpslld xmm8,xmm11,5 + vpand xmm7,xmm14,xmm13 + vpxor xmm0,xmm0,XMMWORD PTR[((240-128))+rax] + + vpaddd xmm10,xmm10,xmm7 + vpsrld xmm9,xmm11,27 + vpxor xmm6,xmm14,xmm13 + vpxor xmm0,xmm0,xmm2 + + vmovdqu XMMWORD PTR[(96-128)+rax],xmm4 + vpaddd xmm10,xmm10,xmm4 + vpor xmm8,xmm8,xmm9 + vpsrld xmm5,xmm0,31 + vpand xmm6,xmm6,xmm12 + vpaddd xmm0,xmm0,xmm0 + + vpslld xmm7,xmm12,30 + vpaddd xmm10,xmm10,xmm6 + + vpsrld xmm12,xmm12,2 + vpaddd xmm10,xmm10,xmm8 + vpor xmm0,xmm0,xmm5 + vpor xmm12,xmm12,xmm7 + vpxor xmm1,xmm1,xmm3 + vmovdqa xmm3,XMMWORD PTR[((160-128))+rax] + + vpaddd xmm14,xmm14,xmm15 + vpslld xmm8,xmm10,5 + vpand xmm7,xmm13,xmm12 + vpxor xmm1,xmm1,XMMWORD PTR[((0-128))+rax] + + vpaddd xmm14,xmm14,xmm7 + vpsrld xmm9,xmm10,27 + vpxor xmm6,xmm13,xmm12 + vpxor xmm1,xmm1,xmm3 + + vmovdqu XMMWORD PTR[(112-128)+rax],xmm0 + vpaddd xmm14,xmm14,xmm0 + vpor xmm8,xmm8,xmm9 + vpsrld xmm5,xmm1,31 + vpand xmm6,xmm6,xmm11 + vpaddd xmm1,xmm1,xmm1 + + vpslld xmm7,xmm11,30 + vpaddd xmm14,xmm14,xmm6 + + vpsrld xmm11,xmm11,2 + vpaddd xmm14,xmm14,xmm8 + vpor xmm1,xmm1,xmm5 + vpor xmm11,xmm11,xmm7 + vpxor xmm2,xmm2,xmm4 + vmovdqa xmm4,XMMWORD PTR[((176-128))+rax] + + vpaddd xmm13,xmm13,xmm15 + vpslld xmm8,xmm14,5 + vpand xmm7,xmm12,xmm11 + vpxor xmm2,xmm2,XMMWORD PTR[((16-128))+rax] + + vpaddd xmm13,xmm13,xmm7 + vpsrld xmm9,xmm14,27 + vpxor xmm6,xmm12,xmm11 + vpxor xmm2,xmm2,xmm4 + + vmovdqu XMMWORD PTR[(128-128)+rax],xmm1 + vpaddd xmm13,xmm13,xmm1 + vpor xmm8,xmm8,xmm9 + vpsrld xmm5,xmm2,31 + vpand xmm6,xmm6,xmm10 + vpaddd xmm2,xmm2,xmm2 + + vpslld xmm7,xmm10,30 + vpaddd xmm13,xmm13,xmm6 + + vpsrld xmm10,xmm10,2 + vpaddd xmm13,xmm13,xmm8 + vpor xmm2,xmm2,xmm5 + vpor xmm10,xmm10,xmm7 + vpxor xmm3,xmm3,xmm0 + vmovdqa xmm0,XMMWORD PTR[((192-128))+rax] + + vpaddd xmm12,xmm12,xmm15 + vpslld xmm8,xmm13,5 + vpand xmm7,xmm11,xmm10 + vpxor xmm3,xmm3,XMMWORD PTR[((32-128))+rax] + + vpaddd xmm12,xmm12,xmm7 + vpsrld xmm9,xmm13,27 + vpxor xmm6,xmm11,xmm10 + vpxor xmm3,xmm3,xmm0 + + vmovdqu XMMWORD PTR[(144-128)+rax],xmm2 + vpaddd xmm12,xmm12,xmm2 + vpor xmm8,xmm8,xmm9 + vpsrld xmm5,xmm3,31 + vpand xmm6,xmm6,xmm14 + vpaddd xmm3,xmm3,xmm3 + + vpslld xmm7,xmm14,30 + vpaddd xmm12,xmm12,xmm6 + + vpsrld xmm14,xmm14,2 + vpaddd xmm12,xmm12,xmm8 + vpor xmm3,xmm3,xmm5 + vpor xmm14,xmm14,xmm7 + vpxor xmm4,xmm4,xmm1 + vmovdqa xmm1,XMMWORD PTR[((208-128))+rax] + + vpaddd xmm11,xmm11,xmm15 + vpslld xmm8,xmm12,5 + vpand xmm7,xmm10,xmm14 + vpxor xmm4,xmm4,XMMWORD PTR[((48-128))+rax] + + vpaddd xmm11,xmm11,xmm7 + vpsrld xmm9,xmm12,27 + vpxor xmm6,xmm10,xmm14 + vpxor xmm4,xmm4,xmm1 + + vmovdqu XMMWORD PTR[(160-128)+rax],xmm3 + vpaddd xmm11,xmm11,xmm3 + vpor xmm8,xmm8,xmm9 + vpsrld xmm5,xmm4,31 + vpand xmm6,xmm6,xmm13 + vpaddd xmm4,xmm4,xmm4 + + vpslld xmm7,xmm13,30 + vpaddd xmm11,xmm11,xmm6 + + vpsrld xmm13,xmm13,2 + vpaddd xmm11,xmm11,xmm8 + vpor xmm4,xmm4,xmm5 + vpor xmm13,xmm13,xmm7 + vpxor xmm0,xmm0,xmm2 + vmovdqa xmm2,XMMWORD PTR[((224-128))+rax] + + vpaddd xmm10,xmm10,xmm15 + vpslld xmm8,xmm11,5 + vpand xmm7,xmm14,xmm13 + vpxor xmm0,xmm0,XMMWORD PTR[((64-128))+rax] + + vpaddd xmm10,xmm10,xmm7 + vpsrld xmm9,xmm11,27 + vpxor xmm6,xmm14,xmm13 + vpxor xmm0,xmm0,xmm2 + + vmovdqu XMMWORD PTR[(176-128)+rax],xmm4 + vpaddd xmm10,xmm10,xmm4 + vpor xmm8,xmm8,xmm9 + vpsrld xmm5,xmm0,31 + vpand xmm6,xmm6,xmm12 + vpaddd xmm0,xmm0,xmm0 + + vpslld xmm7,xmm12,30 + vpaddd xmm10,xmm10,xmm6 + + vpsrld xmm12,xmm12,2 + vpaddd xmm10,xmm10,xmm8 + vpor xmm0,xmm0,xmm5 + vpor xmm12,xmm12,xmm7 + vmovdqa xmm15,XMMWORD PTR[64+rbp] + vpxor xmm1,xmm1,xmm3 + vmovdqa xmm3,XMMWORD PTR[((240-128))+rax] + + vpslld xmm8,xmm10,5 + vpaddd xmm14,xmm14,xmm15 + vpxor xmm6,xmm13,xmm11 + vmovdqa XMMWORD PTR[(192-128)+rax],xmm0 + vpaddd xmm14,xmm14,xmm0 + vpxor xmm1,xmm1,XMMWORD PTR[((80-128))+rax] + vpsrld xmm9,xmm10,27 + vpxor xmm6,xmm6,xmm12 + vpxor xmm1,xmm1,xmm3 + + vpslld xmm7,xmm11,30 + vpor xmm8,xmm8,xmm9 + vpaddd xmm14,xmm14,xmm6 + vpsrld xmm5,xmm1,31 + vpaddd xmm1,xmm1,xmm1 + + vpsrld xmm11,xmm11,2 + vpaddd xmm14,xmm14,xmm8 + vpor xmm1,xmm1,xmm5 + vpor xmm11,xmm11,xmm7 + vpxor xmm2,xmm2,xmm4 + vmovdqa xmm4,XMMWORD PTR[((0-128))+rax] + + vpslld xmm8,xmm14,5 + vpaddd xmm13,xmm13,xmm15 + vpxor xmm6,xmm12,xmm10 + vmovdqa XMMWORD PTR[(208-128)+rax],xmm1 + vpaddd xmm13,xmm13,xmm1 + vpxor xmm2,xmm2,XMMWORD PTR[((96-128))+rax] + vpsrld xmm9,xmm14,27 + vpxor xmm6,xmm6,xmm11 + vpxor xmm2,xmm2,xmm4 + + vpslld xmm7,xmm10,30 + vpor xmm8,xmm8,xmm9 + vpaddd xmm13,xmm13,xmm6 + vpsrld xmm5,xmm2,31 + vpaddd xmm2,xmm2,xmm2 + + vpsrld xmm10,xmm10,2 + vpaddd xmm13,xmm13,xmm8 + vpor xmm2,xmm2,xmm5 + vpor xmm10,xmm10,xmm7 + vpxor xmm3,xmm3,xmm0 + vmovdqa xmm0,XMMWORD PTR[((16-128))+rax] + + vpslld xmm8,xmm13,5 + vpaddd xmm12,xmm12,xmm15 + vpxor xmm6,xmm11,xmm14 + vmovdqa XMMWORD PTR[(224-128)+rax],xmm2 + vpaddd xmm12,xmm12,xmm2 + vpxor xmm3,xmm3,XMMWORD PTR[((112-128))+rax] + vpsrld xmm9,xmm13,27 + vpxor xmm6,xmm6,xmm10 + vpxor xmm3,xmm3,xmm0 + + vpslld xmm7,xmm14,30 + vpor xmm8,xmm8,xmm9 + vpaddd xmm12,xmm12,xmm6 + vpsrld xmm5,xmm3,31 + vpaddd xmm3,xmm3,xmm3 + + vpsrld xmm14,xmm14,2 + vpaddd xmm12,xmm12,xmm8 + vpor xmm3,xmm3,xmm5 + vpor xmm14,xmm14,xmm7 + vpxor xmm4,xmm4,xmm1 + vmovdqa xmm1,XMMWORD PTR[((32-128))+rax] + + vpslld xmm8,xmm12,5 + vpaddd xmm11,xmm11,xmm15 + vpxor xmm6,xmm10,xmm13 + vmovdqa XMMWORD PTR[(240-128)+rax],xmm3 + vpaddd xmm11,xmm11,xmm3 + vpxor xmm4,xmm4,XMMWORD PTR[((128-128))+rax] + vpsrld xmm9,xmm12,27 + vpxor xmm6,xmm6,xmm14 + vpxor xmm4,xmm4,xmm1 + + vpslld xmm7,xmm13,30 + vpor xmm8,xmm8,xmm9 + vpaddd xmm11,xmm11,xmm6 + vpsrld xmm5,xmm4,31 + vpaddd xmm4,xmm4,xmm4 + + vpsrld xmm13,xmm13,2 + vpaddd xmm11,xmm11,xmm8 + vpor xmm4,xmm4,xmm5 + vpor xmm13,xmm13,xmm7 + vpxor xmm0,xmm0,xmm2 + vmovdqa xmm2,XMMWORD PTR[((48-128))+rax] + + vpslld xmm8,xmm11,5 + vpaddd xmm10,xmm10,xmm15 + vpxor xmm6,xmm14,xmm12 + vmovdqa XMMWORD PTR[(0-128)+rax],xmm4 + vpaddd xmm10,xmm10,xmm4 + vpxor xmm0,xmm0,XMMWORD PTR[((144-128))+rax] + vpsrld xmm9,xmm11,27 + vpxor xmm6,xmm6,xmm13 + vpxor xmm0,xmm0,xmm2 + + vpslld xmm7,xmm12,30 + vpor xmm8,xmm8,xmm9 + vpaddd xmm10,xmm10,xmm6 + vpsrld xmm5,xmm0,31 + vpaddd xmm0,xmm0,xmm0 + + vpsrld xmm12,xmm12,2 + vpaddd xmm10,xmm10,xmm8 + vpor xmm0,xmm0,xmm5 + vpor xmm12,xmm12,xmm7 + vpxor xmm1,xmm1,xmm3 + vmovdqa xmm3,XMMWORD PTR[((64-128))+rax] + + vpslld xmm8,xmm10,5 + vpaddd xmm14,xmm14,xmm15 + vpxor xmm6,xmm13,xmm11 + vmovdqa XMMWORD PTR[(16-128)+rax],xmm0 + vpaddd xmm14,xmm14,xmm0 + vpxor xmm1,xmm1,XMMWORD PTR[((160-128))+rax] + vpsrld xmm9,xmm10,27 + vpxor xmm6,xmm6,xmm12 + vpxor xmm1,xmm1,xmm3 + + vpslld xmm7,xmm11,30 + vpor xmm8,xmm8,xmm9 + vpaddd xmm14,xmm14,xmm6 + vpsrld xmm5,xmm1,31 + vpaddd xmm1,xmm1,xmm1 + + vpsrld xmm11,xmm11,2 + vpaddd xmm14,xmm14,xmm8 + vpor xmm1,xmm1,xmm5 + vpor xmm11,xmm11,xmm7 + vpxor xmm2,xmm2,xmm4 + vmovdqa xmm4,XMMWORD PTR[((80-128))+rax] + + vpslld xmm8,xmm14,5 + vpaddd xmm13,xmm13,xmm15 + vpxor xmm6,xmm12,xmm10 + vmovdqa XMMWORD PTR[(32-128)+rax],xmm1 + vpaddd xmm13,xmm13,xmm1 + vpxor xmm2,xmm2,XMMWORD PTR[((176-128))+rax] + vpsrld xmm9,xmm14,27 + vpxor xmm6,xmm6,xmm11 + vpxor xmm2,xmm2,xmm4 + + vpslld xmm7,xmm10,30 + vpor xmm8,xmm8,xmm9 + vpaddd xmm13,xmm13,xmm6 + vpsrld xmm5,xmm2,31 + vpaddd xmm2,xmm2,xmm2 + + vpsrld xmm10,xmm10,2 + vpaddd xmm13,xmm13,xmm8 + vpor xmm2,xmm2,xmm5 + vpor xmm10,xmm10,xmm7 + vpxor xmm3,xmm3,xmm0 + vmovdqa xmm0,XMMWORD PTR[((96-128))+rax] + + vpslld xmm8,xmm13,5 + vpaddd xmm12,xmm12,xmm15 + vpxor xmm6,xmm11,xmm14 + vmovdqa XMMWORD PTR[(48-128)+rax],xmm2 + vpaddd xmm12,xmm12,xmm2 + vpxor xmm3,xmm3,XMMWORD PTR[((192-128))+rax] + vpsrld xmm9,xmm13,27 + vpxor xmm6,xmm6,xmm10 + vpxor xmm3,xmm3,xmm0 + + vpslld xmm7,xmm14,30 + vpor xmm8,xmm8,xmm9 + vpaddd xmm12,xmm12,xmm6 + vpsrld xmm5,xmm3,31 + vpaddd xmm3,xmm3,xmm3 + + vpsrld xmm14,xmm14,2 + vpaddd xmm12,xmm12,xmm8 + vpor xmm3,xmm3,xmm5 + vpor xmm14,xmm14,xmm7 + vpxor xmm4,xmm4,xmm1 + vmovdqa xmm1,XMMWORD PTR[((112-128))+rax] + + vpslld xmm8,xmm12,5 + vpaddd xmm11,xmm11,xmm15 + vpxor xmm6,xmm10,xmm13 + vmovdqa XMMWORD PTR[(64-128)+rax],xmm3 + vpaddd xmm11,xmm11,xmm3 + vpxor xmm4,xmm4,XMMWORD PTR[((208-128))+rax] + vpsrld xmm9,xmm12,27 + vpxor xmm6,xmm6,xmm14 + vpxor xmm4,xmm4,xmm1 + + vpslld xmm7,xmm13,30 + vpor xmm8,xmm8,xmm9 + vpaddd xmm11,xmm11,xmm6 + vpsrld xmm5,xmm4,31 + vpaddd xmm4,xmm4,xmm4 + + vpsrld xmm13,xmm13,2 + vpaddd xmm11,xmm11,xmm8 + vpor xmm4,xmm4,xmm5 + vpor xmm13,xmm13,xmm7 + vpxor xmm0,xmm0,xmm2 + vmovdqa xmm2,XMMWORD PTR[((128-128))+rax] + + vpslld xmm8,xmm11,5 + vpaddd xmm10,xmm10,xmm15 + vpxor xmm6,xmm14,xmm12 + vmovdqa XMMWORD PTR[(80-128)+rax],xmm4 + vpaddd xmm10,xmm10,xmm4 + vpxor xmm0,xmm0,XMMWORD PTR[((224-128))+rax] + vpsrld xmm9,xmm11,27 + vpxor xmm6,xmm6,xmm13 + vpxor xmm0,xmm0,xmm2 + + vpslld xmm7,xmm12,30 + vpor xmm8,xmm8,xmm9 + vpaddd xmm10,xmm10,xmm6 + vpsrld xmm5,xmm0,31 + vpaddd xmm0,xmm0,xmm0 + + vpsrld xmm12,xmm12,2 + vpaddd xmm10,xmm10,xmm8 + vpor xmm0,xmm0,xmm5 + vpor xmm12,xmm12,xmm7 + vpxor xmm1,xmm1,xmm3 + vmovdqa xmm3,XMMWORD PTR[((144-128))+rax] + + vpslld xmm8,xmm10,5 + vpaddd xmm14,xmm14,xmm15 + vpxor xmm6,xmm13,xmm11 + vmovdqa XMMWORD PTR[(96-128)+rax],xmm0 + vpaddd xmm14,xmm14,xmm0 + vpxor xmm1,xmm1,XMMWORD PTR[((240-128))+rax] + vpsrld xmm9,xmm10,27 + vpxor xmm6,xmm6,xmm12 + vpxor xmm1,xmm1,xmm3 + + vpslld xmm7,xmm11,30 + vpor xmm8,xmm8,xmm9 + vpaddd xmm14,xmm14,xmm6 + vpsrld xmm5,xmm1,31 + vpaddd xmm1,xmm1,xmm1 + + vpsrld xmm11,xmm11,2 + vpaddd xmm14,xmm14,xmm8 + vpor xmm1,xmm1,xmm5 + vpor xmm11,xmm11,xmm7 + vpxor xmm2,xmm2,xmm4 + vmovdqa xmm4,XMMWORD PTR[((160-128))+rax] + + vpslld xmm8,xmm14,5 + vpaddd xmm13,xmm13,xmm15 + vpxor xmm6,xmm12,xmm10 + vmovdqa XMMWORD PTR[(112-128)+rax],xmm1 + vpaddd xmm13,xmm13,xmm1 + vpxor xmm2,xmm2,XMMWORD PTR[((0-128))+rax] + vpsrld xmm9,xmm14,27 + vpxor xmm6,xmm6,xmm11 + vpxor xmm2,xmm2,xmm4 + + vpslld xmm7,xmm10,30 + vpor xmm8,xmm8,xmm9 + vpaddd xmm13,xmm13,xmm6 + vpsrld xmm5,xmm2,31 + vpaddd xmm2,xmm2,xmm2 + + vpsrld xmm10,xmm10,2 + vpaddd xmm13,xmm13,xmm8 + vpor xmm2,xmm2,xmm5 + vpor xmm10,xmm10,xmm7 + vpxor xmm3,xmm3,xmm0 + vmovdqa xmm0,XMMWORD PTR[((176-128))+rax] + + vpslld xmm8,xmm13,5 + vpaddd xmm12,xmm12,xmm15 + vpxor xmm6,xmm11,xmm14 + vpaddd xmm12,xmm12,xmm2 + vpxor xmm3,xmm3,XMMWORD PTR[((16-128))+rax] + vpsrld xmm9,xmm13,27 + vpxor xmm6,xmm6,xmm10 + vpxor xmm3,xmm3,xmm0 + + vpslld xmm7,xmm14,30 + vpor xmm8,xmm8,xmm9 + vpaddd xmm12,xmm12,xmm6 + vpsrld xmm5,xmm3,31 + vpaddd xmm3,xmm3,xmm3 + + vpsrld xmm14,xmm14,2 + vpaddd xmm12,xmm12,xmm8 + vpor xmm3,xmm3,xmm5 + vpor xmm14,xmm14,xmm7 + vpxor xmm4,xmm4,xmm1 + vmovdqa xmm1,XMMWORD PTR[((192-128))+rax] + + vpslld xmm8,xmm12,5 + vpaddd xmm11,xmm11,xmm15 + vpxor xmm6,xmm10,xmm13 + vpaddd xmm11,xmm11,xmm3 + vpxor xmm4,xmm4,XMMWORD PTR[((32-128))+rax] + vpsrld xmm9,xmm12,27 + vpxor xmm6,xmm6,xmm14 + vpxor xmm4,xmm4,xmm1 + + vpslld xmm7,xmm13,30 + vpor xmm8,xmm8,xmm9 + vpaddd xmm11,xmm11,xmm6 + vpsrld xmm5,xmm4,31 + vpaddd xmm4,xmm4,xmm4 + + vpsrld xmm13,xmm13,2 + vpaddd xmm11,xmm11,xmm8 + vpor xmm4,xmm4,xmm5 + vpor xmm13,xmm13,xmm7 + vpxor xmm0,xmm0,xmm2 + vmovdqa xmm2,XMMWORD PTR[((208-128))+rax] + + vpslld xmm8,xmm11,5 + vpaddd xmm10,xmm10,xmm15 + vpxor xmm6,xmm14,xmm12 + vpaddd xmm10,xmm10,xmm4 + vpxor xmm0,xmm0,XMMWORD PTR[((48-128))+rax] + vpsrld xmm9,xmm11,27 + vpxor xmm6,xmm6,xmm13 + vpxor xmm0,xmm0,xmm2 + + vpslld xmm7,xmm12,30 + vpor xmm8,xmm8,xmm9 + vpaddd xmm10,xmm10,xmm6 + vpsrld xmm5,xmm0,31 + vpaddd xmm0,xmm0,xmm0 + + vpsrld xmm12,xmm12,2 + vpaddd xmm10,xmm10,xmm8 + vpor xmm0,xmm0,xmm5 + vpor xmm12,xmm12,xmm7 + vpxor xmm1,xmm1,xmm3 + vmovdqa xmm3,XMMWORD PTR[((224-128))+rax] + + vpslld xmm8,xmm10,5 + vpaddd xmm14,xmm14,xmm15 + vpxor xmm6,xmm13,xmm11 + vpaddd xmm14,xmm14,xmm0 + vpxor xmm1,xmm1,XMMWORD PTR[((64-128))+rax] + vpsrld xmm9,xmm10,27 + vpxor xmm6,xmm6,xmm12 + vpxor xmm1,xmm1,xmm3 + + vpslld xmm7,xmm11,30 + vpor xmm8,xmm8,xmm9 + vpaddd xmm14,xmm14,xmm6 + vpsrld xmm5,xmm1,31 + vpaddd xmm1,xmm1,xmm1 + + vpsrld xmm11,xmm11,2 + vpaddd xmm14,xmm14,xmm8 + vpor xmm1,xmm1,xmm5 + vpor xmm11,xmm11,xmm7 + vpxor xmm2,xmm2,xmm4 + vmovdqa xmm4,XMMWORD PTR[((240-128))+rax] + + vpslld xmm8,xmm14,5 + vpaddd xmm13,xmm13,xmm15 + vpxor xmm6,xmm12,xmm10 + vpaddd xmm13,xmm13,xmm1 + vpxor xmm2,xmm2,XMMWORD PTR[((80-128))+rax] + vpsrld xmm9,xmm14,27 + vpxor xmm6,xmm6,xmm11 + vpxor xmm2,xmm2,xmm4 + + vpslld xmm7,xmm10,30 + vpor xmm8,xmm8,xmm9 + vpaddd xmm13,xmm13,xmm6 + vpsrld xmm5,xmm2,31 + vpaddd xmm2,xmm2,xmm2 + + vpsrld xmm10,xmm10,2 + vpaddd xmm13,xmm13,xmm8 + vpor xmm2,xmm2,xmm5 + vpor xmm10,xmm10,xmm7 + vpxor xmm3,xmm3,xmm0 + vmovdqa xmm0,XMMWORD PTR[((0-128))+rax] + + vpslld xmm8,xmm13,5 + vpaddd xmm12,xmm12,xmm15 + vpxor xmm6,xmm11,xmm14 + vpaddd xmm12,xmm12,xmm2 + vpxor xmm3,xmm3,XMMWORD PTR[((96-128))+rax] + vpsrld xmm9,xmm13,27 + vpxor xmm6,xmm6,xmm10 + vpxor xmm3,xmm3,xmm0 + + vpslld xmm7,xmm14,30 + vpor xmm8,xmm8,xmm9 + vpaddd xmm12,xmm12,xmm6 + vpsrld xmm5,xmm3,31 + vpaddd xmm3,xmm3,xmm3 + + vpsrld xmm14,xmm14,2 + vpaddd xmm12,xmm12,xmm8 + vpor xmm3,xmm3,xmm5 + vpor xmm14,xmm14,xmm7 + vpxor xmm4,xmm4,xmm1 + vmovdqa xmm1,XMMWORD PTR[((16-128))+rax] + + vpslld xmm8,xmm12,5 + vpaddd xmm11,xmm11,xmm15 + vpxor xmm6,xmm10,xmm13 + vpaddd xmm11,xmm11,xmm3 + vpxor xmm4,xmm4,XMMWORD PTR[((112-128))+rax] + vpsrld xmm9,xmm12,27 + vpxor xmm6,xmm6,xmm14 + vpxor xmm4,xmm4,xmm1 + + vpslld xmm7,xmm13,30 + vpor xmm8,xmm8,xmm9 + vpaddd xmm11,xmm11,xmm6 + vpsrld xmm5,xmm4,31 + vpaddd xmm4,xmm4,xmm4 + + vpsrld xmm13,xmm13,2 + vpaddd xmm11,xmm11,xmm8 + vpor xmm4,xmm4,xmm5 + vpor xmm13,xmm13,xmm7 + vpslld xmm8,xmm11,5 + vpaddd xmm10,xmm10,xmm15 + vpxor xmm6,xmm14,xmm12 + + vpsrld xmm9,xmm11,27 + vpaddd xmm10,xmm10,xmm4 + vpxor xmm6,xmm6,xmm13 + + vpslld xmm7,xmm12,30 + vpor xmm8,xmm8,xmm9 + vpaddd xmm10,xmm10,xmm6 + + vpsrld xmm12,xmm12,2 + vpaddd xmm10,xmm10,xmm8 + vpor xmm12,xmm12,xmm7 + mov ecx,1 + cmp ecx,DWORD PTR[rbx] + cmovge r8,rbp + cmp ecx,DWORD PTR[4+rbx] + cmovge r9,rbp + cmp ecx,DWORD PTR[8+rbx] + cmovge r10,rbp + cmp ecx,DWORD PTR[12+rbx] + cmovge r11,rbp + vmovdqu xmm6,XMMWORD PTR[rbx] + vpxor xmm8,xmm8,xmm8 + vmovdqa xmm7,xmm6 + vpcmpgtd xmm7,xmm7,xmm8 + vpaddd xmm6,xmm6,xmm7 + + vpand xmm10,xmm10,xmm7 + vpand xmm11,xmm11,xmm7 + vpaddd xmm10,xmm10,XMMWORD PTR[rdi] + vpand xmm12,xmm12,xmm7 + vpaddd xmm11,xmm11,XMMWORD PTR[32+rdi] + vpand xmm13,xmm13,xmm7 + vpaddd xmm12,xmm12,XMMWORD PTR[64+rdi] + vpand xmm14,xmm14,xmm7 + vpaddd xmm13,xmm13,XMMWORD PTR[96+rdi] + vpaddd xmm14,xmm14,XMMWORD PTR[128+rdi] + vmovdqu XMMWORD PTR[rdi],xmm10 + vmovdqu XMMWORD PTR[32+rdi],xmm11 + vmovdqu XMMWORD PTR[64+rdi],xmm12 + vmovdqu XMMWORD PTR[96+rdi],xmm13 + vmovdqu XMMWORD PTR[128+rdi],xmm14 + + vmovdqu XMMWORD PTR[rbx],xmm6 + vmovdqu xmm5,XMMWORD PTR[96+rbp] + dec edx + jnz $L$oop_avx + + mov edx,DWORD PTR[280+rsp] + lea rdi,QWORD PTR[16+rdi] + lea rsi,QWORD PTR[64+rsi] + dec edx + jnz $L$oop_grande_avx + +$L$done_avx:: + mov rax,QWORD PTR[272+rsp] + vzeroupper + movaps xmm6,XMMWORD PTR[((-184))+rax] + movaps xmm7,XMMWORD PTR[((-168))+rax] + movaps xmm8,XMMWORD PTR[((-152))+rax] + movaps xmm9,XMMWORD PTR[((-136))+rax] + movaps xmm10,XMMWORD PTR[((-120))+rax] + movaps xmm11,XMMWORD PTR[((-104))+rax] + movaps xmm12,XMMWORD PTR[((-88))+rax] + movaps xmm13,XMMWORD PTR[((-72))+rax] + movaps xmm14,XMMWORD PTR[((-56))+rax] + movaps xmm15,XMMWORD PTR[((-40))+rax] + mov rbp,QWORD PTR[((-16))+rax] + mov rbx,QWORD PTR[((-8))+rax] + lea rsp,QWORD PTR[rax] +$L$epilogue_avx:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_sha1_multi_block_avx:: +sha1_multi_block_avx ENDP + +ALIGN 32 +sha1_multi_block_avx2 PROC PRIVATE + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_sha1_multi_block_avx2:: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + + +_avx2_shortcut:: + mov rax,rsp + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + lea rsp,QWORD PTR[((-168))+rsp] + movaps XMMWORD PTR[rsp],xmm6 + movaps XMMWORD PTR[16+rsp],xmm7 + movaps XMMWORD PTR[32+rsp],xmm8 + movaps XMMWORD PTR[48+rsp],xmm9 + movaps XMMWORD PTR[64+rsp],xmm10 + movaps XMMWORD PTR[80+rsp],xmm11 + movaps XMMWORD PTR[(-120)+rax],xmm12 + movaps XMMWORD PTR[(-104)+rax],xmm13 + movaps XMMWORD PTR[(-88)+rax],xmm14 + movaps XMMWORD PTR[(-72)+rax],xmm15 + sub rsp,576 + and rsp,-256 + mov QWORD PTR[544+rsp],rax +$L$body_avx2:: + lea rbp,QWORD PTR[K_XX_XX] + shr edx,1 + + vzeroupper +$L$oop_grande_avx2:: + mov DWORD PTR[552+rsp],edx + xor edx,edx + lea rbx,QWORD PTR[512+rsp] + mov r12,QWORD PTR[rsi] + mov ecx,DWORD PTR[8+rsi] + cmp ecx,edx + cmovg edx,ecx + test ecx,ecx + mov DWORD PTR[rbx],ecx + cmovle r12,rbp + mov r13,QWORD PTR[16+rsi] + mov ecx,DWORD PTR[24+rsi] + cmp ecx,edx + cmovg edx,ecx + test ecx,ecx + mov DWORD PTR[4+rbx],ecx + cmovle r13,rbp + mov r14,QWORD PTR[32+rsi] + mov ecx,DWORD PTR[40+rsi] + cmp ecx,edx + cmovg edx,ecx + test ecx,ecx + mov DWORD PTR[8+rbx],ecx + cmovle r14,rbp + mov r15,QWORD PTR[48+rsi] + mov ecx,DWORD PTR[56+rsi] + cmp ecx,edx + cmovg edx,ecx + test ecx,ecx + mov DWORD PTR[12+rbx],ecx + cmovle r15,rbp + mov r8,QWORD PTR[64+rsi] + mov ecx,DWORD PTR[72+rsi] + cmp ecx,edx + cmovg edx,ecx + test ecx,ecx + mov DWORD PTR[16+rbx],ecx + cmovle r8,rbp + mov r9,QWORD PTR[80+rsi] + mov ecx,DWORD PTR[88+rsi] + cmp ecx,edx + cmovg edx,ecx + test ecx,ecx + mov DWORD PTR[20+rbx],ecx + cmovle r9,rbp + mov r10,QWORD PTR[96+rsi] + mov ecx,DWORD PTR[104+rsi] + cmp ecx,edx + cmovg edx,ecx + test ecx,ecx + mov DWORD PTR[24+rbx],ecx + cmovle r10,rbp + mov r11,QWORD PTR[112+rsi] + mov ecx,DWORD PTR[120+rsi] + cmp ecx,edx + cmovg edx,ecx + test ecx,ecx + mov DWORD PTR[28+rbx],ecx + cmovle r11,rbp + vmovdqu ymm0,YMMWORD PTR[rdi] + lea rax,QWORD PTR[128+rsp] + vmovdqu ymm1,YMMWORD PTR[32+rdi] + lea rbx,QWORD PTR[((256+128))+rsp] + vmovdqu ymm2,YMMWORD PTR[64+rdi] + vmovdqu ymm3,YMMWORD PTR[96+rdi] + vmovdqu ymm4,YMMWORD PTR[128+rdi] + vmovdqu ymm9,YMMWORD PTR[96+rbp] + jmp $L$oop_avx2 + +ALIGN 32 +$L$oop_avx2:: + vmovdqa ymm15,YMMWORD PTR[((-32))+rbp] + vmovd xmm10,DWORD PTR[r12] + lea r12,QWORD PTR[64+r12] + vmovd xmm12,DWORD PTR[r8] + lea r8,QWORD PTR[64+r8] + vmovd xmm7,DWORD PTR[r13] + lea r13,QWORD PTR[64+r13] + vmovd xmm6,DWORD PTR[r9] + lea r9,QWORD PTR[64+r9] + vpinsrd xmm10,xmm10,DWORD PTR[r14],1 + lea r14,QWORD PTR[64+r14] + vpinsrd xmm12,xmm12,DWORD PTR[r10],1 + lea r10,QWORD PTR[64+r10] + vpinsrd xmm7,xmm7,DWORD PTR[r15],1 + lea r15,QWORD PTR[64+r15] + vpunpckldq ymm10,ymm10,ymm7 + vpinsrd xmm6,xmm6,DWORD PTR[r11],1 + lea r11,QWORD PTR[64+r11] + vpunpckldq ymm12,ymm12,ymm6 + vmovd xmm11,DWORD PTR[((-60))+r12] + vinserti128 ymm10,ymm10,xmm12,1 + vmovd xmm8,DWORD PTR[((-60))+r8] + vpshufb ymm10,ymm10,ymm9 + vmovd xmm7,DWORD PTR[((-60))+r13] + vmovd xmm6,DWORD PTR[((-60))+r9] + vpinsrd xmm11,xmm11,DWORD PTR[((-60))+r14],1 + vpinsrd xmm8,xmm8,DWORD PTR[((-60))+r10],1 + vpinsrd xmm7,xmm7,DWORD PTR[((-60))+r15],1 + vpunpckldq ymm11,ymm11,ymm7 + vpinsrd xmm6,xmm6,DWORD PTR[((-60))+r11],1 + vpunpckldq ymm8,ymm8,ymm6 + vpaddd ymm4,ymm4,ymm15 + vpslld ymm7,ymm0,5 + vpandn ymm6,ymm1,ymm3 + vpand ymm5,ymm1,ymm2 + + vmovdqa YMMWORD PTR[(0-128)+rax],ymm10 + vpaddd ymm4,ymm4,ymm10 + vinserti128 ymm11,ymm11,xmm8,1 + vpsrld ymm8,ymm0,27 + vpxor ymm5,ymm5,ymm6 + vmovd xmm12,DWORD PTR[((-56))+r12] + + vpslld ymm6,ymm1,30 + vpor ymm7,ymm7,ymm8 + vmovd xmm8,DWORD PTR[((-56))+r8] + vpaddd ymm4,ymm4,ymm5 + + vpsrld ymm1,ymm1,2 + vpaddd ymm4,ymm4,ymm7 + vpshufb ymm11,ymm11,ymm9 + vpor ymm1,ymm1,ymm6 + vmovd xmm7,DWORD PTR[((-56))+r13] + vmovd xmm6,DWORD PTR[((-56))+r9] + vpinsrd xmm12,xmm12,DWORD PTR[((-56))+r14],1 + vpinsrd xmm8,xmm8,DWORD PTR[((-56))+r10],1 + vpinsrd xmm7,xmm7,DWORD PTR[((-56))+r15],1 + vpunpckldq ymm12,ymm12,ymm7 + vpinsrd xmm6,xmm6,DWORD PTR[((-56))+r11],1 + vpunpckldq ymm8,ymm8,ymm6 + vpaddd ymm3,ymm3,ymm15 + vpslld ymm7,ymm4,5 + vpandn ymm6,ymm0,ymm2 + vpand ymm5,ymm0,ymm1 + + vmovdqa YMMWORD PTR[(32-128)+rax],ymm11 + vpaddd ymm3,ymm3,ymm11 + vinserti128 ymm12,ymm12,xmm8,1 + vpsrld ymm8,ymm4,27 + vpxor ymm5,ymm5,ymm6 + vmovd xmm13,DWORD PTR[((-52))+r12] + + vpslld ymm6,ymm0,30 + vpor ymm7,ymm7,ymm8 + vmovd xmm8,DWORD PTR[((-52))+r8] + vpaddd ymm3,ymm3,ymm5 + + vpsrld ymm0,ymm0,2 + vpaddd ymm3,ymm3,ymm7 + vpshufb ymm12,ymm12,ymm9 + vpor ymm0,ymm0,ymm6 + vmovd xmm7,DWORD PTR[((-52))+r13] + vmovd xmm6,DWORD PTR[((-52))+r9] + vpinsrd xmm13,xmm13,DWORD PTR[((-52))+r14],1 + vpinsrd xmm8,xmm8,DWORD PTR[((-52))+r10],1 + vpinsrd xmm7,xmm7,DWORD PTR[((-52))+r15],1 + vpunpckldq ymm13,ymm13,ymm7 + vpinsrd xmm6,xmm6,DWORD PTR[((-52))+r11],1 + vpunpckldq ymm8,ymm8,ymm6 + vpaddd ymm2,ymm2,ymm15 + vpslld ymm7,ymm3,5 + vpandn ymm6,ymm4,ymm1 + vpand ymm5,ymm4,ymm0 + + vmovdqa YMMWORD PTR[(64-128)+rax],ymm12 + vpaddd ymm2,ymm2,ymm12 + vinserti128 ymm13,ymm13,xmm8,1 + vpsrld ymm8,ymm3,27 + vpxor ymm5,ymm5,ymm6 + vmovd xmm14,DWORD PTR[((-48))+r12] + + vpslld ymm6,ymm4,30 + vpor ymm7,ymm7,ymm8 + vmovd xmm8,DWORD PTR[((-48))+r8] + vpaddd ymm2,ymm2,ymm5 + + vpsrld ymm4,ymm4,2 + vpaddd ymm2,ymm2,ymm7 + vpshufb ymm13,ymm13,ymm9 + vpor ymm4,ymm4,ymm6 + vmovd xmm7,DWORD PTR[((-48))+r13] + vmovd xmm6,DWORD PTR[((-48))+r9] + vpinsrd xmm14,xmm14,DWORD PTR[((-48))+r14],1 + vpinsrd xmm8,xmm8,DWORD PTR[((-48))+r10],1 + vpinsrd xmm7,xmm7,DWORD PTR[((-48))+r15],1 + vpunpckldq ymm14,ymm14,ymm7 + vpinsrd xmm6,xmm6,DWORD PTR[((-48))+r11],1 + vpunpckldq ymm8,ymm8,ymm6 + vpaddd ymm1,ymm1,ymm15 + vpslld ymm7,ymm2,5 + vpandn ymm6,ymm3,ymm0 + vpand ymm5,ymm3,ymm4 + + vmovdqa YMMWORD PTR[(96-128)+rax],ymm13 + vpaddd ymm1,ymm1,ymm13 + vinserti128 ymm14,ymm14,xmm8,1 + vpsrld ymm8,ymm2,27 + vpxor ymm5,ymm5,ymm6 + vmovd xmm10,DWORD PTR[((-44))+r12] + + vpslld ymm6,ymm3,30 + vpor ymm7,ymm7,ymm8 + vmovd xmm8,DWORD PTR[((-44))+r8] + vpaddd ymm1,ymm1,ymm5 + + vpsrld ymm3,ymm3,2 + vpaddd ymm1,ymm1,ymm7 + vpshufb ymm14,ymm14,ymm9 + vpor ymm3,ymm3,ymm6 + vmovd xmm7,DWORD PTR[((-44))+r13] + vmovd xmm6,DWORD PTR[((-44))+r9] + vpinsrd xmm10,xmm10,DWORD PTR[((-44))+r14],1 + vpinsrd xmm8,xmm8,DWORD PTR[((-44))+r10],1 + vpinsrd xmm7,xmm7,DWORD PTR[((-44))+r15],1 + vpunpckldq ymm10,ymm10,ymm7 + vpinsrd xmm6,xmm6,DWORD PTR[((-44))+r11],1 + vpunpckldq ymm8,ymm8,ymm6 + vpaddd ymm0,ymm0,ymm15 + vpslld ymm7,ymm1,5 + vpandn ymm6,ymm2,ymm4 + vpand ymm5,ymm2,ymm3 + + vmovdqa YMMWORD PTR[(128-128)+rax],ymm14 + vpaddd ymm0,ymm0,ymm14 + vinserti128 ymm10,ymm10,xmm8,1 + vpsrld ymm8,ymm1,27 + vpxor ymm5,ymm5,ymm6 + vmovd xmm11,DWORD PTR[((-40))+r12] + + vpslld ymm6,ymm2,30 + vpor ymm7,ymm7,ymm8 + vmovd xmm8,DWORD PTR[((-40))+r8] + vpaddd ymm0,ymm0,ymm5 + + vpsrld ymm2,ymm2,2 + vpaddd ymm0,ymm0,ymm7 + vpshufb ymm10,ymm10,ymm9 + vpor ymm2,ymm2,ymm6 + vmovd xmm7,DWORD PTR[((-40))+r13] + vmovd xmm6,DWORD PTR[((-40))+r9] + vpinsrd xmm11,xmm11,DWORD PTR[((-40))+r14],1 + vpinsrd xmm8,xmm8,DWORD PTR[((-40))+r10],1 + vpinsrd xmm7,xmm7,DWORD PTR[((-40))+r15],1 + vpunpckldq ymm11,ymm11,ymm7 + vpinsrd xmm6,xmm6,DWORD PTR[((-40))+r11],1 + vpunpckldq ymm8,ymm8,ymm6 + vpaddd ymm4,ymm4,ymm15 + vpslld ymm7,ymm0,5 + vpandn ymm6,ymm1,ymm3 + vpand ymm5,ymm1,ymm2 + + vmovdqa YMMWORD PTR[(160-128)+rax],ymm10 + vpaddd ymm4,ymm4,ymm10 + vinserti128 ymm11,ymm11,xmm8,1 + vpsrld ymm8,ymm0,27 + vpxor ymm5,ymm5,ymm6 + vmovd xmm12,DWORD PTR[((-36))+r12] + + vpslld ymm6,ymm1,30 + vpor ymm7,ymm7,ymm8 + vmovd xmm8,DWORD PTR[((-36))+r8] + vpaddd ymm4,ymm4,ymm5 + + vpsrld ymm1,ymm1,2 + vpaddd ymm4,ymm4,ymm7 + vpshufb ymm11,ymm11,ymm9 + vpor ymm1,ymm1,ymm6 + vmovd xmm7,DWORD PTR[((-36))+r13] + vmovd xmm6,DWORD PTR[((-36))+r9] + vpinsrd xmm12,xmm12,DWORD PTR[((-36))+r14],1 + vpinsrd xmm8,xmm8,DWORD PTR[((-36))+r10],1 + vpinsrd xmm7,xmm7,DWORD PTR[((-36))+r15],1 + vpunpckldq ymm12,ymm12,ymm7 + vpinsrd xmm6,xmm6,DWORD PTR[((-36))+r11],1 + vpunpckldq ymm8,ymm8,ymm6 + vpaddd ymm3,ymm3,ymm15 + vpslld ymm7,ymm4,5 + vpandn ymm6,ymm0,ymm2 + vpand ymm5,ymm0,ymm1 + + vmovdqa YMMWORD PTR[(192-128)+rax],ymm11 + vpaddd ymm3,ymm3,ymm11 + vinserti128 ymm12,ymm12,xmm8,1 + vpsrld ymm8,ymm4,27 + vpxor ymm5,ymm5,ymm6 + vmovd xmm13,DWORD PTR[((-32))+r12] + + vpslld ymm6,ymm0,30 + vpor ymm7,ymm7,ymm8 + vmovd xmm8,DWORD PTR[((-32))+r8] + vpaddd ymm3,ymm3,ymm5 + + vpsrld ymm0,ymm0,2 + vpaddd ymm3,ymm3,ymm7 + vpshufb ymm12,ymm12,ymm9 + vpor ymm0,ymm0,ymm6 + vmovd xmm7,DWORD PTR[((-32))+r13] + vmovd xmm6,DWORD PTR[((-32))+r9] + vpinsrd xmm13,xmm13,DWORD PTR[((-32))+r14],1 + vpinsrd xmm8,xmm8,DWORD PTR[((-32))+r10],1 + vpinsrd xmm7,xmm7,DWORD PTR[((-32))+r15],1 + vpunpckldq ymm13,ymm13,ymm7 + vpinsrd xmm6,xmm6,DWORD PTR[((-32))+r11],1 + vpunpckldq ymm8,ymm8,ymm6 + vpaddd ymm2,ymm2,ymm15 + vpslld ymm7,ymm3,5 + vpandn ymm6,ymm4,ymm1 + vpand ymm5,ymm4,ymm0 + + vmovdqa YMMWORD PTR[(224-128)+rax],ymm12 + vpaddd ymm2,ymm2,ymm12 + vinserti128 ymm13,ymm13,xmm8,1 + vpsrld ymm8,ymm3,27 + vpxor ymm5,ymm5,ymm6 + vmovd xmm14,DWORD PTR[((-28))+r12] + + vpslld ymm6,ymm4,30 + vpor ymm7,ymm7,ymm8 + vmovd xmm8,DWORD PTR[((-28))+r8] + vpaddd ymm2,ymm2,ymm5 + + vpsrld ymm4,ymm4,2 + vpaddd ymm2,ymm2,ymm7 + vpshufb ymm13,ymm13,ymm9 + vpor ymm4,ymm4,ymm6 + vmovd xmm7,DWORD PTR[((-28))+r13] + vmovd xmm6,DWORD PTR[((-28))+r9] + vpinsrd xmm14,xmm14,DWORD PTR[((-28))+r14],1 + vpinsrd xmm8,xmm8,DWORD PTR[((-28))+r10],1 + vpinsrd xmm7,xmm7,DWORD PTR[((-28))+r15],1 + vpunpckldq ymm14,ymm14,ymm7 + vpinsrd xmm6,xmm6,DWORD PTR[((-28))+r11],1 + vpunpckldq ymm8,ymm8,ymm6 + vpaddd ymm1,ymm1,ymm15 + vpslld ymm7,ymm2,5 + vpandn ymm6,ymm3,ymm0 + vpand ymm5,ymm3,ymm4 + + vmovdqa YMMWORD PTR[(256-256-128)+rbx],ymm13 + vpaddd ymm1,ymm1,ymm13 + vinserti128 ymm14,ymm14,xmm8,1 + vpsrld ymm8,ymm2,27 + vpxor ymm5,ymm5,ymm6 + vmovd xmm10,DWORD PTR[((-24))+r12] + + vpslld ymm6,ymm3,30 + vpor ymm7,ymm7,ymm8 + vmovd xmm8,DWORD PTR[((-24))+r8] + vpaddd ymm1,ymm1,ymm5 + + vpsrld ymm3,ymm3,2 + vpaddd ymm1,ymm1,ymm7 + vpshufb ymm14,ymm14,ymm9 + vpor ymm3,ymm3,ymm6 + vmovd xmm7,DWORD PTR[((-24))+r13] + vmovd xmm6,DWORD PTR[((-24))+r9] + vpinsrd xmm10,xmm10,DWORD PTR[((-24))+r14],1 + vpinsrd xmm8,xmm8,DWORD PTR[((-24))+r10],1 + vpinsrd xmm7,xmm7,DWORD PTR[((-24))+r15],1 + vpunpckldq ymm10,ymm10,ymm7 + vpinsrd xmm6,xmm6,DWORD PTR[((-24))+r11],1 + vpunpckldq ymm8,ymm8,ymm6 + vpaddd ymm0,ymm0,ymm15 + vpslld ymm7,ymm1,5 + vpandn ymm6,ymm2,ymm4 + vpand ymm5,ymm2,ymm3 + + vmovdqa YMMWORD PTR[(288-256-128)+rbx],ymm14 + vpaddd ymm0,ymm0,ymm14 + vinserti128 ymm10,ymm10,xmm8,1 + vpsrld ymm8,ymm1,27 + vpxor ymm5,ymm5,ymm6 + vmovd xmm11,DWORD PTR[((-20))+r12] + + vpslld ymm6,ymm2,30 + vpor ymm7,ymm7,ymm8 + vmovd xmm8,DWORD PTR[((-20))+r8] + vpaddd ymm0,ymm0,ymm5 + + vpsrld ymm2,ymm2,2 + vpaddd ymm0,ymm0,ymm7 + vpshufb ymm10,ymm10,ymm9 + vpor ymm2,ymm2,ymm6 + vmovd xmm7,DWORD PTR[((-20))+r13] + vmovd xmm6,DWORD PTR[((-20))+r9] + vpinsrd xmm11,xmm11,DWORD PTR[((-20))+r14],1 + vpinsrd xmm8,xmm8,DWORD PTR[((-20))+r10],1 + vpinsrd xmm7,xmm7,DWORD PTR[((-20))+r15],1 + vpunpckldq ymm11,ymm11,ymm7 + vpinsrd xmm6,xmm6,DWORD PTR[((-20))+r11],1 + vpunpckldq ymm8,ymm8,ymm6 + vpaddd ymm4,ymm4,ymm15 + vpslld ymm7,ymm0,5 + vpandn ymm6,ymm1,ymm3 + vpand ymm5,ymm1,ymm2 + + vmovdqa YMMWORD PTR[(320-256-128)+rbx],ymm10 + vpaddd ymm4,ymm4,ymm10 + vinserti128 ymm11,ymm11,xmm8,1 + vpsrld ymm8,ymm0,27 + vpxor ymm5,ymm5,ymm6 + vmovd xmm12,DWORD PTR[((-16))+r12] + + vpslld ymm6,ymm1,30 + vpor ymm7,ymm7,ymm8 + vmovd xmm8,DWORD PTR[((-16))+r8] + vpaddd ymm4,ymm4,ymm5 + + vpsrld ymm1,ymm1,2 + vpaddd ymm4,ymm4,ymm7 + vpshufb ymm11,ymm11,ymm9 + vpor ymm1,ymm1,ymm6 + vmovd xmm7,DWORD PTR[((-16))+r13] + vmovd xmm6,DWORD PTR[((-16))+r9] + vpinsrd xmm12,xmm12,DWORD PTR[((-16))+r14],1 + vpinsrd xmm8,xmm8,DWORD PTR[((-16))+r10],1 + vpinsrd xmm7,xmm7,DWORD PTR[((-16))+r15],1 + vpunpckldq ymm12,ymm12,ymm7 + vpinsrd xmm6,xmm6,DWORD PTR[((-16))+r11],1 + vpunpckldq ymm8,ymm8,ymm6 + vpaddd ymm3,ymm3,ymm15 + vpslld ymm7,ymm4,5 + vpandn ymm6,ymm0,ymm2 + vpand ymm5,ymm0,ymm1 + + vmovdqa YMMWORD PTR[(352-256-128)+rbx],ymm11 + vpaddd ymm3,ymm3,ymm11 + vinserti128 ymm12,ymm12,xmm8,1 + vpsrld ymm8,ymm4,27 + vpxor ymm5,ymm5,ymm6 + vmovd xmm13,DWORD PTR[((-12))+r12] + + vpslld ymm6,ymm0,30 + vpor ymm7,ymm7,ymm8 + vmovd xmm8,DWORD PTR[((-12))+r8] + vpaddd ymm3,ymm3,ymm5 + + vpsrld ymm0,ymm0,2 + vpaddd ymm3,ymm3,ymm7 + vpshufb ymm12,ymm12,ymm9 + vpor ymm0,ymm0,ymm6 + vmovd xmm7,DWORD PTR[((-12))+r13] + vmovd xmm6,DWORD PTR[((-12))+r9] + vpinsrd xmm13,xmm13,DWORD PTR[((-12))+r14],1 + vpinsrd xmm8,xmm8,DWORD PTR[((-12))+r10],1 + vpinsrd xmm7,xmm7,DWORD PTR[((-12))+r15],1 + vpunpckldq ymm13,ymm13,ymm7 + vpinsrd xmm6,xmm6,DWORD PTR[((-12))+r11],1 + vpunpckldq ymm8,ymm8,ymm6 + vpaddd ymm2,ymm2,ymm15 + vpslld ymm7,ymm3,5 + vpandn ymm6,ymm4,ymm1 + vpand ymm5,ymm4,ymm0 + + vmovdqa YMMWORD PTR[(384-256-128)+rbx],ymm12 + vpaddd ymm2,ymm2,ymm12 + vinserti128 ymm13,ymm13,xmm8,1 + vpsrld ymm8,ymm3,27 + vpxor ymm5,ymm5,ymm6 + vmovd xmm14,DWORD PTR[((-8))+r12] + + vpslld ymm6,ymm4,30 + vpor ymm7,ymm7,ymm8 + vmovd xmm8,DWORD PTR[((-8))+r8] + vpaddd ymm2,ymm2,ymm5 + + vpsrld ymm4,ymm4,2 + vpaddd ymm2,ymm2,ymm7 + vpshufb ymm13,ymm13,ymm9 + vpor ymm4,ymm4,ymm6 + vmovd xmm7,DWORD PTR[((-8))+r13] + vmovd xmm6,DWORD PTR[((-8))+r9] + vpinsrd xmm14,xmm14,DWORD PTR[((-8))+r14],1 + vpinsrd xmm8,xmm8,DWORD PTR[((-8))+r10],1 + vpinsrd xmm7,xmm7,DWORD PTR[((-8))+r15],1 + vpunpckldq ymm14,ymm14,ymm7 + vpinsrd xmm6,xmm6,DWORD PTR[((-8))+r11],1 + vpunpckldq ymm8,ymm8,ymm6 + vpaddd ymm1,ymm1,ymm15 + vpslld ymm7,ymm2,5 + vpandn ymm6,ymm3,ymm0 + vpand ymm5,ymm3,ymm4 + + vmovdqa YMMWORD PTR[(416-256-128)+rbx],ymm13 + vpaddd ymm1,ymm1,ymm13 + vinserti128 ymm14,ymm14,xmm8,1 + vpsrld ymm8,ymm2,27 + vpxor ymm5,ymm5,ymm6 + vmovd xmm10,DWORD PTR[((-4))+r12] + + vpslld ymm6,ymm3,30 + vpor ymm7,ymm7,ymm8 + vmovd xmm8,DWORD PTR[((-4))+r8] + vpaddd ymm1,ymm1,ymm5 + + vpsrld ymm3,ymm3,2 + vpaddd ymm1,ymm1,ymm7 + vpshufb ymm14,ymm14,ymm9 + vpor ymm3,ymm3,ymm6 + vmovdqa ymm11,YMMWORD PTR[((0-128))+rax] + vmovd xmm7,DWORD PTR[((-4))+r13] + vmovd xmm6,DWORD PTR[((-4))+r9] + vpinsrd xmm10,xmm10,DWORD PTR[((-4))+r14],1 + vpinsrd xmm8,xmm8,DWORD PTR[((-4))+r10],1 + vpinsrd xmm7,xmm7,DWORD PTR[((-4))+r15],1 + vpunpckldq ymm10,ymm10,ymm7 + vpinsrd xmm6,xmm6,DWORD PTR[((-4))+r11],1 + vpunpckldq ymm8,ymm8,ymm6 + vpaddd ymm0,ymm0,ymm15 + prefetcht0 [63+r12] + vpslld ymm7,ymm1,5 + vpandn ymm6,ymm2,ymm4 + vpand ymm5,ymm2,ymm3 + + vmovdqa YMMWORD PTR[(448-256-128)+rbx],ymm14 + vpaddd ymm0,ymm0,ymm14 + vinserti128 ymm10,ymm10,xmm8,1 + vpsrld ymm8,ymm1,27 + prefetcht0 [63+r13] + vpxor ymm5,ymm5,ymm6 + + vpslld ymm6,ymm2,30 + vpor ymm7,ymm7,ymm8 + prefetcht0 [63+r14] + vpaddd ymm0,ymm0,ymm5 + + vpsrld ymm2,ymm2,2 + vpaddd ymm0,ymm0,ymm7 + prefetcht0 [63+r15] + vpshufb ymm10,ymm10,ymm9 + vpor ymm2,ymm2,ymm6 + vmovdqa ymm12,YMMWORD PTR[((32-128))+rax] + vpxor ymm11,ymm11,ymm13 + vmovdqa ymm13,YMMWORD PTR[((64-128))+rax] + + vpaddd ymm4,ymm4,ymm15 + vpslld ymm7,ymm0,5 + vpandn ymm6,ymm1,ymm3 + prefetcht0 [63+r8] + vpand ymm5,ymm1,ymm2 + + vmovdqa YMMWORD PTR[(480-256-128)+rbx],ymm10 + vpaddd ymm4,ymm4,ymm10 + vpxor ymm11,ymm11,YMMWORD PTR[((256-256-128))+rbx] + vpsrld ymm8,ymm0,27 + vpxor ymm5,ymm5,ymm6 + vpxor ymm11,ymm11,ymm13 + prefetcht0 [63+r9] + + vpslld ymm6,ymm1,30 + vpor ymm7,ymm7,ymm8 + vpaddd ymm4,ymm4,ymm5 + prefetcht0 [63+r10] + vpsrld ymm9,ymm11,31 + vpaddd ymm11,ymm11,ymm11 + + vpsrld ymm1,ymm1,2 + prefetcht0 [63+r11] + vpaddd ymm4,ymm4,ymm7 + vpor ymm11,ymm11,ymm9 + vpor ymm1,ymm1,ymm6 + vpxor ymm12,ymm12,ymm14 + vmovdqa ymm14,YMMWORD PTR[((96-128))+rax] + + vpaddd ymm3,ymm3,ymm15 + vpslld ymm7,ymm4,5 + vpandn ymm6,ymm0,ymm2 + + vpand ymm5,ymm0,ymm1 + + vmovdqa YMMWORD PTR[(0-128)+rax],ymm11 + vpaddd ymm3,ymm3,ymm11 + vpxor ymm12,ymm12,YMMWORD PTR[((288-256-128))+rbx] + vpsrld ymm8,ymm4,27 + vpxor ymm5,ymm5,ymm6 + vpxor ymm12,ymm12,ymm14 + + + vpslld ymm6,ymm0,30 + vpor ymm7,ymm7,ymm8 + vpaddd ymm3,ymm3,ymm5 + + vpsrld ymm9,ymm12,31 + vpaddd ymm12,ymm12,ymm12 + + vpsrld ymm0,ymm0,2 + + vpaddd ymm3,ymm3,ymm7 + vpor ymm12,ymm12,ymm9 + vpor ymm0,ymm0,ymm6 + vpxor ymm13,ymm13,ymm10 + vmovdqa ymm10,YMMWORD PTR[((128-128))+rax] + + vpaddd ymm2,ymm2,ymm15 + vpslld ymm7,ymm3,5 + vpandn ymm6,ymm4,ymm1 + + vpand ymm5,ymm4,ymm0 + + vmovdqa YMMWORD PTR[(32-128)+rax],ymm12 + vpaddd ymm2,ymm2,ymm12 + vpxor ymm13,ymm13,YMMWORD PTR[((320-256-128))+rbx] + vpsrld ymm8,ymm3,27 + vpxor ymm5,ymm5,ymm6 + vpxor ymm13,ymm13,ymm10 + + + vpslld ymm6,ymm4,30 + vpor ymm7,ymm7,ymm8 + vpaddd ymm2,ymm2,ymm5 + + vpsrld ymm9,ymm13,31 + vpaddd ymm13,ymm13,ymm13 + + vpsrld ymm4,ymm4,2 + + vpaddd ymm2,ymm2,ymm7 + vpor ymm13,ymm13,ymm9 + vpor ymm4,ymm4,ymm6 + vpxor ymm14,ymm14,ymm11 + vmovdqa ymm11,YMMWORD PTR[((160-128))+rax] + + vpaddd ymm1,ymm1,ymm15 + vpslld ymm7,ymm2,5 + vpandn ymm6,ymm3,ymm0 + + vpand ymm5,ymm3,ymm4 + + vmovdqa YMMWORD PTR[(64-128)+rax],ymm13 + vpaddd ymm1,ymm1,ymm13 + vpxor ymm14,ymm14,YMMWORD PTR[((352-256-128))+rbx] + vpsrld ymm8,ymm2,27 + vpxor ymm5,ymm5,ymm6 + vpxor ymm14,ymm14,ymm11 + + + vpslld ymm6,ymm3,30 + vpor ymm7,ymm7,ymm8 + vpaddd ymm1,ymm1,ymm5 + + vpsrld ymm9,ymm14,31 + vpaddd ymm14,ymm14,ymm14 + + vpsrld ymm3,ymm3,2 + + vpaddd ymm1,ymm1,ymm7 + vpor ymm14,ymm14,ymm9 + vpor ymm3,ymm3,ymm6 + vpxor ymm10,ymm10,ymm12 + vmovdqa ymm12,YMMWORD PTR[((192-128))+rax] + + vpaddd ymm0,ymm0,ymm15 + vpslld ymm7,ymm1,5 + vpandn ymm6,ymm2,ymm4 + + vpand ymm5,ymm2,ymm3 + + vmovdqa YMMWORD PTR[(96-128)+rax],ymm14 + vpaddd ymm0,ymm0,ymm14 + vpxor ymm10,ymm10,YMMWORD PTR[((384-256-128))+rbx] + vpsrld ymm8,ymm1,27 + vpxor ymm5,ymm5,ymm6 + vpxor ymm10,ymm10,ymm12 + + + vpslld ymm6,ymm2,30 + vpor ymm7,ymm7,ymm8 + vpaddd ymm0,ymm0,ymm5 + + vpsrld ymm9,ymm10,31 + vpaddd ymm10,ymm10,ymm10 + + vpsrld ymm2,ymm2,2 + + vpaddd ymm0,ymm0,ymm7 + vpor ymm10,ymm10,ymm9 + vpor ymm2,ymm2,ymm6 + vmovdqa ymm15,YMMWORD PTR[rbp] + vpxor ymm11,ymm11,ymm13 + vmovdqa ymm13,YMMWORD PTR[((224-128))+rax] + + vpslld ymm7,ymm0,5 + vpaddd ymm4,ymm4,ymm15 + vpxor ymm5,ymm3,ymm1 + vmovdqa YMMWORD PTR[(128-128)+rax],ymm10 + vpaddd ymm4,ymm4,ymm10 + vpxor ymm11,ymm11,YMMWORD PTR[((416-256-128))+rbx] + vpsrld ymm8,ymm0,27 + vpxor ymm5,ymm5,ymm2 + vpxor ymm11,ymm11,ymm13 + + vpslld ymm6,ymm1,30 + vpor ymm7,ymm7,ymm8 + vpaddd ymm4,ymm4,ymm5 + vpsrld ymm9,ymm11,31 + vpaddd ymm11,ymm11,ymm11 + + vpsrld ymm1,ymm1,2 + vpaddd ymm4,ymm4,ymm7 + vpor ymm11,ymm11,ymm9 + vpor ymm1,ymm1,ymm6 + vpxor ymm12,ymm12,ymm14 + vmovdqa ymm14,YMMWORD PTR[((256-256-128))+rbx] + + vpslld ymm7,ymm4,5 + vpaddd ymm3,ymm3,ymm15 + vpxor ymm5,ymm2,ymm0 + vmovdqa YMMWORD PTR[(160-128)+rax],ymm11 + vpaddd ymm3,ymm3,ymm11 + vpxor ymm12,ymm12,YMMWORD PTR[((448-256-128))+rbx] + vpsrld ymm8,ymm4,27 + vpxor ymm5,ymm5,ymm1 + vpxor ymm12,ymm12,ymm14 + + vpslld ymm6,ymm0,30 + vpor ymm7,ymm7,ymm8 + vpaddd ymm3,ymm3,ymm5 + vpsrld ymm9,ymm12,31 + vpaddd ymm12,ymm12,ymm12 + + vpsrld ymm0,ymm0,2 + vpaddd ymm3,ymm3,ymm7 + vpor ymm12,ymm12,ymm9 + vpor ymm0,ymm0,ymm6 + vpxor ymm13,ymm13,ymm10 + vmovdqa ymm10,YMMWORD PTR[((288-256-128))+rbx] + + vpslld ymm7,ymm3,5 + vpaddd ymm2,ymm2,ymm15 + vpxor ymm5,ymm1,ymm4 + vmovdqa YMMWORD PTR[(192-128)+rax],ymm12 + vpaddd ymm2,ymm2,ymm12 + vpxor ymm13,ymm13,YMMWORD PTR[((480-256-128))+rbx] + vpsrld ymm8,ymm3,27 + vpxor ymm5,ymm5,ymm0 + vpxor ymm13,ymm13,ymm10 + + vpslld ymm6,ymm4,30 + vpor ymm7,ymm7,ymm8 + vpaddd ymm2,ymm2,ymm5 + vpsrld ymm9,ymm13,31 + vpaddd ymm13,ymm13,ymm13 + + vpsrld ymm4,ymm4,2 + vpaddd ymm2,ymm2,ymm7 + vpor ymm13,ymm13,ymm9 + vpor ymm4,ymm4,ymm6 + vpxor ymm14,ymm14,ymm11 + vmovdqa ymm11,YMMWORD PTR[((320-256-128))+rbx] + + vpslld ymm7,ymm2,5 + vpaddd ymm1,ymm1,ymm15 + vpxor ymm5,ymm0,ymm3 + vmovdqa YMMWORD PTR[(224-128)+rax],ymm13 + vpaddd ymm1,ymm1,ymm13 + vpxor ymm14,ymm14,YMMWORD PTR[((0-128))+rax] + vpsrld ymm8,ymm2,27 + vpxor ymm5,ymm5,ymm4 + vpxor ymm14,ymm14,ymm11 + + vpslld ymm6,ymm3,30 + vpor ymm7,ymm7,ymm8 + vpaddd ymm1,ymm1,ymm5 + vpsrld ymm9,ymm14,31 + vpaddd ymm14,ymm14,ymm14 + + vpsrld ymm3,ymm3,2 + vpaddd ymm1,ymm1,ymm7 + vpor ymm14,ymm14,ymm9 + vpor ymm3,ymm3,ymm6 + vpxor ymm10,ymm10,ymm12 + vmovdqa ymm12,YMMWORD PTR[((352-256-128))+rbx] + + vpslld ymm7,ymm1,5 + vpaddd ymm0,ymm0,ymm15 + vpxor ymm5,ymm4,ymm2 + vmovdqa YMMWORD PTR[(256-256-128)+rbx],ymm14 + vpaddd ymm0,ymm0,ymm14 + vpxor ymm10,ymm10,YMMWORD PTR[((32-128))+rax] + vpsrld ymm8,ymm1,27 + vpxor ymm5,ymm5,ymm3 + vpxor ymm10,ymm10,ymm12 + + vpslld ymm6,ymm2,30 + vpor ymm7,ymm7,ymm8 + vpaddd ymm0,ymm0,ymm5 + vpsrld ymm9,ymm10,31 + vpaddd ymm10,ymm10,ymm10 + + vpsrld ymm2,ymm2,2 + vpaddd ymm0,ymm0,ymm7 + vpor ymm10,ymm10,ymm9 + vpor ymm2,ymm2,ymm6 + vpxor ymm11,ymm11,ymm13 + vmovdqa ymm13,YMMWORD PTR[((384-256-128))+rbx] + + vpslld ymm7,ymm0,5 + vpaddd ymm4,ymm4,ymm15 + vpxor ymm5,ymm3,ymm1 + vmovdqa YMMWORD PTR[(288-256-128)+rbx],ymm10 + vpaddd ymm4,ymm4,ymm10 + vpxor ymm11,ymm11,YMMWORD PTR[((64-128))+rax] + vpsrld ymm8,ymm0,27 + vpxor ymm5,ymm5,ymm2 + vpxor ymm11,ymm11,ymm13 + + vpslld ymm6,ymm1,30 + vpor ymm7,ymm7,ymm8 + vpaddd ymm4,ymm4,ymm5 + vpsrld ymm9,ymm11,31 + vpaddd ymm11,ymm11,ymm11 + + vpsrld ymm1,ymm1,2 + vpaddd ymm4,ymm4,ymm7 + vpor ymm11,ymm11,ymm9 + vpor ymm1,ymm1,ymm6 + vpxor ymm12,ymm12,ymm14 + vmovdqa ymm14,YMMWORD PTR[((416-256-128))+rbx] + + vpslld ymm7,ymm4,5 + vpaddd ymm3,ymm3,ymm15 + vpxor ymm5,ymm2,ymm0 + vmovdqa YMMWORD PTR[(320-256-128)+rbx],ymm11 + vpaddd ymm3,ymm3,ymm11 + vpxor ymm12,ymm12,YMMWORD PTR[((96-128))+rax] + vpsrld ymm8,ymm4,27 + vpxor ymm5,ymm5,ymm1 + vpxor ymm12,ymm12,ymm14 + + vpslld ymm6,ymm0,30 + vpor ymm7,ymm7,ymm8 + vpaddd ymm3,ymm3,ymm5 + vpsrld ymm9,ymm12,31 + vpaddd ymm12,ymm12,ymm12 + + vpsrld ymm0,ymm0,2 + vpaddd ymm3,ymm3,ymm7 + vpor ymm12,ymm12,ymm9 + vpor ymm0,ymm0,ymm6 + vpxor ymm13,ymm13,ymm10 + vmovdqa ymm10,YMMWORD PTR[((448-256-128))+rbx] + + vpslld ymm7,ymm3,5 + vpaddd ymm2,ymm2,ymm15 + vpxor ymm5,ymm1,ymm4 + vmovdqa YMMWORD PTR[(352-256-128)+rbx],ymm12 + vpaddd ymm2,ymm2,ymm12 + vpxor ymm13,ymm13,YMMWORD PTR[((128-128))+rax] + vpsrld ymm8,ymm3,27 + vpxor ymm5,ymm5,ymm0 + vpxor ymm13,ymm13,ymm10 + + vpslld ymm6,ymm4,30 + vpor ymm7,ymm7,ymm8 + vpaddd ymm2,ymm2,ymm5 + vpsrld ymm9,ymm13,31 + vpaddd ymm13,ymm13,ymm13 + + vpsrld ymm4,ymm4,2 + vpaddd ymm2,ymm2,ymm7 + vpor ymm13,ymm13,ymm9 + vpor ymm4,ymm4,ymm6 + vpxor ymm14,ymm14,ymm11 + vmovdqa ymm11,YMMWORD PTR[((480-256-128))+rbx] + + vpslld ymm7,ymm2,5 + vpaddd ymm1,ymm1,ymm15 + vpxor ymm5,ymm0,ymm3 + vmovdqa YMMWORD PTR[(384-256-128)+rbx],ymm13 + vpaddd ymm1,ymm1,ymm13 + vpxor ymm14,ymm14,YMMWORD PTR[((160-128))+rax] + vpsrld ymm8,ymm2,27 + vpxor ymm5,ymm5,ymm4 + vpxor ymm14,ymm14,ymm11 + + vpslld ymm6,ymm3,30 + vpor ymm7,ymm7,ymm8 + vpaddd ymm1,ymm1,ymm5 + vpsrld ymm9,ymm14,31 + vpaddd ymm14,ymm14,ymm14 + + vpsrld ymm3,ymm3,2 + vpaddd ymm1,ymm1,ymm7 + vpor ymm14,ymm14,ymm9 + vpor ymm3,ymm3,ymm6 + vpxor ymm10,ymm10,ymm12 + vmovdqa ymm12,YMMWORD PTR[((0-128))+rax] + + vpslld ymm7,ymm1,5 + vpaddd ymm0,ymm0,ymm15 + vpxor ymm5,ymm4,ymm2 + vmovdqa YMMWORD PTR[(416-256-128)+rbx],ymm14 + vpaddd ymm0,ymm0,ymm14 + vpxor ymm10,ymm10,YMMWORD PTR[((192-128))+rax] + vpsrld ymm8,ymm1,27 + vpxor ymm5,ymm5,ymm3 + vpxor ymm10,ymm10,ymm12 + + vpslld ymm6,ymm2,30 + vpor ymm7,ymm7,ymm8 + vpaddd ymm0,ymm0,ymm5 + vpsrld ymm9,ymm10,31 + vpaddd ymm10,ymm10,ymm10 + + vpsrld ymm2,ymm2,2 + vpaddd ymm0,ymm0,ymm7 + vpor ymm10,ymm10,ymm9 + vpor ymm2,ymm2,ymm6 + vpxor ymm11,ymm11,ymm13 + vmovdqa ymm13,YMMWORD PTR[((32-128))+rax] + + vpslld ymm7,ymm0,5 + vpaddd ymm4,ymm4,ymm15 + vpxor ymm5,ymm3,ymm1 + vmovdqa YMMWORD PTR[(448-256-128)+rbx],ymm10 + vpaddd ymm4,ymm4,ymm10 + vpxor ymm11,ymm11,YMMWORD PTR[((224-128))+rax] + vpsrld ymm8,ymm0,27 + vpxor ymm5,ymm5,ymm2 + vpxor ymm11,ymm11,ymm13 + + vpslld ymm6,ymm1,30 + vpor ymm7,ymm7,ymm8 + vpaddd ymm4,ymm4,ymm5 + vpsrld ymm9,ymm11,31 + vpaddd ymm11,ymm11,ymm11 + + vpsrld ymm1,ymm1,2 + vpaddd ymm4,ymm4,ymm7 + vpor ymm11,ymm11,ymm9 + vpor ymm1,ymm1,ymm6 + vpxor ymm12,ymm12,ymm14 + vmovdqa ymm14,YMMWORD PTR[((64-128))+rax] + + vpslld ymm7,ymm4,5 + vpaddd ymm3,ymm3,ymm15 + vpxor ymm5,ymm2,ymm0 + vmovdqa YMMWORD PTR[(480-256-128)+rbx],ymm11 + vpaddd ymm3,ymm3,ymm11 + vpxor ymm12,ymm12,YMMWORD PTR[((256-256-128))+rbx] + vpsrld ymm8,ymm4,27 + vpxor ymm5,ymm5,ymm1 + vpxor ymm12,ymm12,ymm14 + + vpslld ymm6,ymm0,30 + vpor ymm7,ymm7,ymm8 + vpaddd ymm3,ymm3,ymm5 + vpsrld ymm9,ymm12,31 + vpaddd ymm12,ymm12,ymm12 + + vpsrld ymm0,ymm0,2 + vpaddd ymm3,ymm3,ymm7 + vpor ymm12,ymm12,ymm9 + vpor ymm0,ymm0,ymm6 + vpxor ymm13,ymm13,ymm10 + vmovdqa ymm10,YMMWORD PTR[((96-128))+rax] + + vpslld ymm7,ymm3,5 + vpaddd ymm2,ymm2,ymm15 + vpxor ymm5,ymm1,ymm4 + vmovdqa YMMWORD PTR[(0-128)+rax],ymm12 + vpaddd ymm2,ymm2,ymm12 + vpxor ymm13,ymm13,YMMWORD PTR[((288-256-128))+rbx] + vpsrld ymm8,ymm3,27 + vpxor ymm5,ymm5,ymm0 + vpxor ymm13,ymm13,ymm10 + + vpslld ymm6,ymm4,30 + vpor ymm7,ymm7,ymm8 + vpaddd ymm2,ymm2,ymm5 + vpsrld ymm9,ymm13,31 + vpaddd ymm13,ymm13,ymm13 + + vpsrld ymm4,ymm4,2 + vpaddd ymm2,ymm2,ymm7 + vpor ymm13,ymm13,ymm9 + vpor ymm4,ymm4,ymm6 + vpxor ymm14,ymm14,ymm11 + vmovdqa ymm11,YMMWORD PTR[((128-128))+rax] + + vpslld ymm7,ymm2,5 + vpaddd ymm1,ymm1,ymm15 + vpxor ymm5,ymm0,ymm3 + vmovdqa YMMWORD PTR[(32-128)+rax],ymm13 + vpaddd ymm1,ymm1,ymm13 + vpxor ymm14,ymm14,YMMWORD PTR[((320-256-128))+rbx] + vpsrld ymm8,ymm2,27 + vpxor ymm5,ymm5,ymm4 + vpxor ymm14,ymm14,ymm11 + + vpslld ymm6,ymm3,30 + vpor ymm7,ymm7,ymm8 + vpaddd ymm1,ymm1,ymm5 + vpsrld ymm9,ymm14,31 + vpaddd ymm14,ymm14,ymm14 + + vpsrld ymm3,ymm3,2 + vpaddd ymm1,ymm1,ymm7 + vpor ymm14,ymm14,ymm9 + vpor ymm3,ymm3,ymm6 + vpxor ymm10,ymm10,ymm12 + vmovdqa ymm12,YMMWORD PTR[((160-128))+rax] + + vpslld ymm7,ymm1,5 + vpaddd ymm0,ymm0,ymm15 + vpxor ymm5,ymm4,ymm2 + vmovdqa YMMWORD PTR[(64-128)+rax],ymm14 + vpaddd ymm0,ymm0,ymm14 + vpxor ymm10,ymm10,YMMWORD PTR[((352-256-128))+rbx] + vpsrld ymm8,ymm1,27 + vpxor ymm5,ymm5,ymm3 + vpxor ymm10,ymm10,ymm12 + + vpslld ymm6,ymm2,30 + vpor ymm7,ymm7,ymm8 + vpaddd ymm0,ymm0,ymm5 + vpsrld ymm9,ymm10,31 + vpaddd ymm10,ymm10,ymm10 + + vpsrld ymm2,ymm2,2 + vpaddd ymm0,ymm0,ymm7 + vpor ymm10,ymm10,ymm9 + vpor ymm2,ymm2,ymm6 + vpxor ymm11,ymm11,ymm13 + vmovdqa ymm13,YMMWORD PTR[((192-128))+rax] + + vpslld ymm7,ymm0,5 + vpaddd ymm4,ymm4,ymm15 + vpxor ymm5,ymm3,ymm1 + vmovdqa YMMWORD PTR[(96-128)+rax],ymm10 + vpaddd ymm4,ymm4,ymm10 + vpxor ymm11,ymm11,YMMWORD PTR[((384-256-128))+rbx] + vpsrld ymm8,ymm0,27 + vpxor ymm5,ymm5,ymm2 + vpxor ymm11,ymm11,ymm13 + + vpslld ymm6,ymm1,30 + vpor ymm7,ymm7,ymm8 + vpaddd ymm4,ymm4,ymm5 + vpsrld ymm9,ymm11,31 + vpaddd ymm11,ymm11,ymm11 + + vpsrld ymm1,ymm1,2 + vpaddd ymm4,ymm4,ymm7 + vpor ymm11,ymm11,ymm9 + vpor ymm1,ymm1,ymm6 + vpxor ymm12,ymm12,ymm14 + vmovdqa ymm14,YMMWORD PTR[((224-128))+rax] + + vpslld ymm7,ymm4,5 + vpaddd ymm3,ymm3,ymm15 + vpxor ymm5,ymm2,ymm0 + vmovdqa YMMWORD PTR[(128-128)+rax],ymm11 + vpaddd ymm3,ymm3,ymm11 + vpxor ymm12,ymm12,YMMWORD PTR[((416-256-128))+rbx] + vpsrld ymm8,ymm4,27 + vpxor ymm5,ymm5,ymm1 + vpxor ymm12,ymm12,ymm14 + + vpslld ymm6,ymm0,30 + vpor ymm7,ymm7,ymm8 + vpaddd ymm3,ymm3,ymm5 + vpsrld ymm9,ymm12,31 + vpaddd ymm12,ymm12,ymm12 + + vpsrld ymm0,ymm0,2 + vpaddd ymm3,ymm3,ymm7 + vpor ymm12,ymm12,ymm9 + vpor ymm0,ymm0,ymm6 + vpxor ymm13,ymm13,ymm10 + vmovdqa ymm10,YMMWORD PTR[((256-256-128))+rbx] + + vpslld ymm7,ymm3,5 + vpaddd ymm2,ymm2,ymm15 + vpxor ymm5,ymm1,ymm4 + vmovdqa YMMWORD PTR[(160-128)+rax],ymm12 + vpaddd ymm2,ymm2,ymm12 + vpxor ymm13,ymm13,YMMWORD PTR[((448-256-128))+rbx] + vpsrld ymm8,ymm3,27 + vpxor ymm5,ymm5,ymm0 + vpxor ymm13,ymm13,ymm10 + + vpslld ymm6,ymm4,30 + vpor ymm7,ymm7,ymm8 + vpaddd ymm2,ymm2,ymm5 + vpsrld ymm9,ymm13,31 + vpaddd ymm13,ymm13,ymm13 + + vpsrld ymm4,ymm4,2 + vpaddd ymm2,ymm2,ymm7 + vpor ymm13,ymm13,ymm9 + vpor ymm4,ymm4,ymm6 + vpxor ymm14,ymm14,ymm11 + vmovdqa ymm11,YMMWORD PTR[((288-256-128))+rbx] + + vpslld ymm7,ymm2,5 + vpaddd ymm1,ymm1,ymm15 + vpxor ymm5,ymm0,ymm3 + vmovdqa YMMWORD PTR[(192-128)+rax],ymm13 + vpaddd ymm1,ymm1,ymm13 + vpxor ymm14,ymm14,YMMWORD PTR[((480-256-128))+rbx] + vpsrld ymm8,ymm2,27 + vpxor ymm5,ymm5,ymm4 + vpxor ymm14,ymm14,ymm11 + + vpslld ymm6,ymm3,30 + vpor ymm7,ymm7,ymm8 + vpaddd ymm1,ymm1,ymm5 + vpsrld ymm9,ymm14,31 + vpaddd ymm14,ymm14,ymm14 + + vpsrld ymm3,ymm3,2 + vpaddd ymm1,ymm1,ymm7 + vpor ymm14,ymm14,ymm9 + vpor ymm3,ymm3,ymm6 + vpxor ymm10,ymm10,ymm12 + vmovdqa ymm12,YMMWORD PTR[((320-256-128))+rbx] + + vpslld ymm7,ymm1,5 + vpaddd ymm0,ymm0,ymm15 + vpxor ymm5,ymm4,ymm2 + vmovdqa YMMWORD PTR[(224-128)+rax],ymm14 + vpaddd ymm0,ymm0,ymm14 + vpxor ymm10,ymm10,YMMWORD PTR[((0-128))+rax] + vpsrld ymm8,ymm1,27 + vpxor ymm5,ymm5,ymm3 + vpxor ymm10,ymm10,ymm12 + + vpslld ymm6,ymm2,30 + vpor ymm7,ymm7,ymm8 + vpaddd ymm0,ymm0,ymm5 + vpsrld ymm9,ymm10,31 + vpaddd ymm10,ymm10,ymm10 + + vpsrld ymm2,ymm2,2 + vpaddd ymm0,ymm0,ymm7 + vpor ymm10,ymm10,ymm9 + vpor ymm2,ymm2,ymm6 + vmovdqa ymm15,YMMWORD PTR[32+rbp] + vpxor ymm11,ymm11,ymm13 + vmovdqa ymm13,YMMWORD PTR[((352-256-128))+rbx] + + vpaddd ymm4,ymm4,ymm15 + vpslld ymm7,ymm0,5 + vpand ymm6,ymm3,ymm2 + vpxor ymm11,ymm11,YMMWORD PTR[((32-128))+rax] + + vpaddd ymm4,ymm4,ymm6 + vpsrld ymm8,ymm0,27 + vpxor ymm5,ymm3,ymm2 + vpxor ymm11,ymm11,ymm13 + + vmovdqu YMMWORD PTR[(256-256-128)+rbx],ymm10 + vpaddd ymm4,ymm4,ymm10 + vpor ymm7,ymm7,ymm8 + vpsrld ymm9,ymm11,31 + vpand ymm5,ymm5,ymm1 + vpaddd ymm11,ymm11,ymm11 + + vpslld ymm6,ymm1,30 + vpaddd ymm4,ymm4,ymm5 + + vpsrld ymm1,ymm1,2 + vpaddd ymm4,ymm4,ymm7 + vpor ymm11,ymm11,ymm9 + vpor ymm1,ymm1,ymm6 + vpxor ymm12,ymm12,ymm14 + vmovdqa ymm14,YMMWORD PTR[((384-256-128))+rbx] + + vpaddd ymm3,ymm3,ymm15 + vpslld ymm7,ymm4,5 + vpand ymm6,ymm2,ymm1 + vpxor ymm12,ymm12,YMMWORD PTR[((64-128))+rax] + + vpaddd ymm3,ymm3,ymm6 + vpsrld ymm8,ymm4,27 + vpxor ymm5,ymm2,ymm1 + vpxor ymm12,ymm12,ymm14 + + vmovdqu YMMWORD PTR[(288-256-128)+rbx],ymm11 + vpaddd ymm3,ymm3,ymm11 + vpor ymm7,ymm7,ymm8 + vpsrld ymm9,ymm12,31 + vpand ymm5,ymm5,ymm0 + vpaddd ymm12,ymm12,ymm12 + + vpslld ymm6,ymm0,30 + vpaddd ymm3,ymm3,ymm5 + + vpsrld ymm0,ymm0,2 + vpaddd ymm3,ymm3,ymm7 + vpor ymm12,ymm12,ymm9 + vpor ymm0,ymm0,ymm6 + vpxor ymm13,ymm13,ymm10 + vmovdqa ymm10,YMMWORD PTR[((416-256-128))+rbx] + + vpaddd ymm2,ymm2,ymm15 + vpslld ymm7,ymm3,5 + vpand ymm6,ymm1,ymm0 + vpxor ymm13,ymm13,YMMWORD PTR[((96-128))+rax] + + vpaddd ymm2,ymm2,ymm6 + vpsrld ymm8,ymm3,27 + vpxor ymm5,ymm1,ymm0 + vpxor ymm13,ymm13,ymm10 + + vmovdqu YMMWORD PTR[(320-256-128)+rbx],ymm12 + vpaddd ymm2,ymm2,ymm12 + vpor ymm7,ymm7,ymm8 + vpsrld ymm9,ymm13,31 + vpand ymm5,ymm5,ymm4 + vpaddd ymm13,ymm13,ymm13 + + vpslld ymm6,ymm4,30 + vpaddd ymm2,ymm2,ymm5 + + vpsrld ymm4,ymm4,2 + vpaddd ymm2,ymm2,ymm7 + vpor ymm13,ymm13,ymm9 + vpor ymm4,ymm4,ymm6 + vpxor ymm14,ymm14,ymm11 + vmovdqa ymm11,YMMWORD PTR[((448-256-128))+rbx] + + vpaddd ymm1,ymm1,ymm15 + vpslld ymm7,ymm2,5 + vpand ymm6,ymm0,ymm4 + vpxor ymm14,ymm14,YMMWORD PTR[((128-128))+rax] + + vpaddd ymm1,ymm1,ymm6 + vpsrld ymm8,ymm2,27 + vpxor ymm5,ymm0,ymm4 + vpxor ymm14,ymm14,ymm11 + + vmovdqu YMMWORD PTR[(352-256-128)+rbx],ymm13 + vpaddd ymm1,ymm1,ymm13 + vpor ymm7,ymm7,ymm8 + vpsrld ymm9,ymm14,31 + vpand ymm5,ymm5,ymm3 + vpaddd ymm14,ymm14,ymm14 + + vpslld ymm6,ymm3,30 + vpaddd ymm1,ymm1,ymm5 + + vpsrld ymm3,ymm3,2 + vpaddd ymm1,ymm1,ymm7 + vpor ymm14,ymm14,ymm9 + vpor ymm3,ymm3,ymm6 + vpxor ymm10,ymm10,ymm12 + vmovdqa ymm12,YMMWORD PTR[((480-256-128))+rbx] + + vpaddd ymm0,ymm0,ymm15 + vpslld ymm7,ymm1,5 + vpand ymm6,ymm4,ymm3 + vpxor ymm10,ymm10,YMMWORD PTR[((160-128))+rax] + + vpaddd ymm0,ymm0,ymm6 + vpsrld ymm8,ymm1,27 + vpxor ymm5,ymm4,ymm3 + vpxor ymm10,ymm10,ymm12 + + vmovdqu YMMWORD PTR[(384-256-128)+rbx],ymm14 + vpaddd ymm0,ymm0,ymm14 + vpor ymm7,ymm7,ymm8 + vpsrld ymm9,ymm10,31 + vpand ymm5,ymm5,ymm2 + vpaddd ymm10,ymm10,ymm10 + + vpslld ymm6,ymm2,30 + vpaddd ymm0,ymm0,ymm5 + + vpsrld ymm2,ymm2,2 + vpaddd ymm0,ymm0,ymm7 + vpor ymm10,ymm10,ymm9 + vpor ymm2,ymm2,ymm6 + vpxor ymm11,ymm11,ymm13 + vmovdqa ymm13,YMMWORD PTR[((0-128))+rax] + + vpaddd ymm4,ymm4,ymm15 + vpslld ymm7,ymm0,5 + vpand ymm6,ymm3,ymm2 + vpxor ymm11,ymm11,YMMWORD PTR[((192-128))+rax] + + vpaddd ymm4,ymm4,ymm6 + vpsrld ymm8,ymm0,27 + vpxor ymm5,ymm3,ymm2 + vpxor ymm11,ymm11,ymm13 + + vmovdqu YMMWORD PTR[(416-256-128)+rbx],ymm10 + vpaddd ymm4,ymm4,ymm10 + vpor ymm7,ymm7,ymm8 + vpsrld ymm9,ymm11,31 + vpand ymm5,ymm5,ymm1 + vpaddd ymm11,ymm11,ymm11 + + vpslld ymm6,ymm1,30 + vpaddd ymm4,ymm4,ymm5 + + vpsrld ymm1,ymm1,2 + vpaddd ymm4,ymm4,ymm7 + vpor ymm11,ymm11,ymm9 + vpor ymm1,ymm1,ymm6 + vpxor ymm12,ymm12,ymm14 + vmovdqa ymm14,YMMWORD PTR[((32-128))+rax] + + vpaddd ymm3,ymm3,ymm15 + vpslld ymm7,ymm4,5 + vpand ymm6,ymm2,ymm1 + vpxor ymm12,ymm12,YMMWORD PTR[((224-128))+rax] + + vpaddd ymm3,ymm3,ymm6 + vpsrld ymm8,ymm4,27 + vpxor ymm5,ymm2,ymm1 + vpxor ymm12,ymm12,ymm14 + + vmovdqu YMMWORD PTR[(448-256-128)+rbx],ymm11 + vpaddd ymm3,ymm3,ymm11 + vpor ymm7,ymm7,ymm8 + vpsrld ymm9,ymm12,31 + vpand ymm5,ymm5,ymm0 + vpaddd ymm12,ymm12,ymm12 + + vpslld ymm6,ymm0,30 + vpaddd ymm3,ymm3,ymm5 + + vpsrld ymm0,ymm0,2 + vpaddd ymm3,ymm3,ymm7 + vpor ymm12,ymm12,ymm9 + vpor ymm0,ymm0,ymm6 + vpxor ymm13,ymm13,ymm10 + vmovdqa ymm10,YMMWORD PTR[((64-128))+rax] + + vpaddd ymm2,ymm2,ymm15 + vpslld ymm7,ymm3,5 + vpand ymm6,ymm1,ymm0 + vpxor ymm13,ymm13,YMMWORD PTR[((256-256-128))+rbx] + + vpaddd ymm2,ymm2,ymm6 + vpsrld ymm8,ymm3,27 + vpxor ymm5,ymm1,ymm0 + vpxor ymm13,ymm13,ymm10 + + vmovdqu YMMWORD PTR[(480-256-128)+rbx],ymm12 + vpaddd ymm2,ymm2,ymm12 + vpor ymm7,ymm7,ymm8 + vpsrld ymm9,ymm13,31 + vpand ymm5,ymm5,ymm4 + vpaddd ymm13,ymm13,ymm13 + + vpslld ymm6,ymm4,30 + vpaddd ymm2,ymm2,ymm5 + + vpsrld ymm4,ymm4,2 + vpaddd ymm2,ymm2,ymm7 + vpor ymm13,ymm13,ymm9 + vpor ymm4,ymm4,ymm6 + vpxor ymm14,ymm14,ymm11 + vmovdqa ymm11,YMMWORD PTR[((96-128))+rax] + + vpaddd ymm1,ymm1,ymm15 + vpslld ymm7,ymm2,5 + vpand ymm6,ymm0,ymm4 + vpxor ymm14,ymm14,YMMWORD PTR[((288-256-128))+rbx] + + vpaddd ymm1,ymm1,ymm6 + vpsrld ymm8,ymm2,27 + vpxor ymm5,ymm0,ymm4 + vpxor ymm14,ymm14,ymm11 + + vmovdqu YMMWORD PTR[(0-128)+rax],ymm13 + vpaddd ymm1,ymm1,ymm13 + vpor ymm7,ymm7,ymm8 + vpsrld ymm9,ymm14,31 + vpand ymm5,ymm5,ymm3 + vpaddd ymm14,ymm14,ymm14 + + vpslld ymm6,ymm3,30 + vpaddd ymm1,ymm1,ymm5 + + vpsrld ymm3,ymm3,2 + vpaddd ymm1,ymm1,ymm7 + vpor ymm14,ymm14,ymm9 + vpor ymm3,ymm3,ymm6 + vpxor ymm10,ymm10,ymm12 + vmovdqa ymm12,YMMWORD PTR[((128-128))+rax] + + vpaddd ymm0,ymm0,ymm15 + vpslld ymm7,ymm1,5 + vpand ymm6,ymm4,ymm3 + vpxor ymm10,ymm10,YMMWORD PTR[((320-256-128))+rbx] + + vpaddd ymm0,ymm0,ymm6 + vpsrld ymm8,ymm1,27 + vpxor ymm5,ymm4,ymm3 + vpxor ymm10,ymm10,ymm12 + + vmovdqu YMMWORD PTR[(32-128)+rax],ymm14 + vpaddd ymm0,ymm0,ymm14 + vpor ymm7,ymm7,ymm8 + vpsrld ymm9,ymm10,31 + vpand ymm5,ymm5,ymm2 + vpaddd ymm10,ymm10,ymm10 + + vpslld ymm6,ymm2,30 + vpaddd ymm0,ymm0,ymm5 + + vpsrld ymm2,ymm2,2 + vpaddd ymm0,ymm0,ymm7 + vpor ymm10,ymm10,ymm9 + vpor ymm2,ymm2,ymm6 + vpxor ymm11,ymm11,ymm13 + vmovdqa ymm13,YMMWORD PTR[((160-128))+rax] + + vpaddd ymm4,ymm4,ymm15 + vpslld ymm7,ymm0,5 + vpand ymm6,ymm3,ymm2 + vpxor ymm11,ymm11,YMMWORD PTR[((352-256-128))+rbx] + + vpaddd ymm4,ymm4,ymm6 + vpsrld ymm8,ymm0,27 + vpxor ymm5,ymm3,ymm2 + vpxor ymm11,ymm11,ymm13 + + vmovdqu YMMWORD PTR[(64-128)+rax],ymm10 + vpaddd ymm4,ymm4,ymm10 + vpor ymm7,ymm7,ymm8 + vpsrld ymm9,ymm11,31 + vpand ymm5,ymm5,ymm1 + vpaddd ymm11,ymm11,ymm11 + + vpslld ymm6,ymm1,30 + vpaddd ymm4,ymm4,ymm5 + + vpsrld ymm1,ymm1,2 + vpaddd ymm4,ymm4,ymm7 + vpor ymm11,ymm11,ymm9 + vpor ymm1,ymm1,ymm6 + vpxor ymm12,ymm12,ymm14 + vmovdqa ymm14,YMMWORD PTR[((192-128))+rax] + + vpaddd ymm3,ymm3,ymm15 + vpslld ymm7,ymm4,5 + vpand ymm6,ymm2,ymm1 + vpxor ymm12,ymm12,YMMWORD PTR[((384-256-128))+rbx] + + vpaddd ymm3,ymm3,ymm6 + vpsrld ymm8,ymm4,27 + vpxor ymm5,ymm2,ymm1 + vpxor ymm12,ymm12,ymm14 + + vmovdqu YMMWORD PTR[(96-128)+rax],ymm11 + vpaddd ymm3,ymm3,ymm11 + vpor ymm7,ymm7,ymm8 + vpsrld ymm9,ymm12,31 + vpand ymm5,ymm5,ymm0 + vpaddd ymm12,ymm12,ymm12 + + vpslld ymm6,ymm0,30 + vpaddd ymm3,ymm3,ymm5 + + vpsrld ymm0,ymm0,2 + vpaddd ymm3,ymm3,ymm7 + vpor ymm12,ymm12,ymm9 + vpor ymm0,ymm0,ymm6 + vpxor ymm13,ymm13,ymm10 + vmovdqa ymm10,YMMWORD PTR[((224-128))+rax] + + vpaddd ymm2,ymm2,ymm15 + vpslld ymm7,ymm3,5 + vpand ymm6,ymm1,ymm0 + vpxor ymm13,ymm13,YMMWORD PTR[((416-256-128))+rbx] + + vpaddd ymm2,ymm2,ymm6 + vpsrld ymm8,ymm3,27 + vpxor ymm5,ymm1,ymm0 + vpxor ymm13,ymm13,ymm10 + + vmovdqu YMMWORD PTR[(128-128)+rax],ymm12 + vpaddd ymm2,ymm2,ymm12 + vpor ymm7,ymm7,ymm8 + vpsrld ymm9,ymm13,31 + vpand ymm5,ymm5,ymm4 + vpaddd ymm13,ymm13,ymm13 + + vpslld ymm6,ymm4,30 + vpaddd ymm2,ymm2,ymm5 + + vpsrld ymm4,ymm4,2 + vpaddd ymm2,ymm2,ymm7 + vpor ymm13,ymm13,ymm9 + vpor ymm4,ymm4,ymm6 + vpxor ymm14,ymm14,ymm11 + vmovdqa ymm11,YMMWORD PTR[((256-256-128))+rbx] + + vpaddd ymm1,ymm1,ymm15 + vpslld ymm7,ymm2,5 + vpand ymm6,ymm0,ymm4 + vpxor ymm14,ymm14,YMMWORD PTR[((448-256-128))+rbx] + + vpaddd ymm1,ymm1,ymm6 + vpsrld ymm8,ymm2,27 + vpxor ymm5,ymm0,ymm4 + vpxor ymm14,ymm14,ymm11 + + vmovdqu YMMWORD PTR[(160-128)+rax],ymm13 + vpaddd ymm1,ymm1,ymm13 + vpor ymm7,ymm7,ymm8 + vpsrld ymm9,ymm14,31 + vpand ymm5,ymm5,ymm3 + vpaddd ymm14,ymm14,ymm14 + + vpslld ymm6,ymm3,30 + vpaddd ymm1,ymm1,ymm5 + + vpsrld ymm3,ymm3,2 + vpaddd ymm1,ymm1,ymm7 + vpor ymm14,ymm14,ymm9 + vpor ymm3,ymm3,ymm6 + vpxor ymm10,ymm10,ymm12 + vmovdqa ymm12,YMMWORD PTR[((288-256-128))+rbx] + + vpaddd ymm0,ymm0,ymm15 + vpslld ymm7,ymm1,5 + vpand ymm6,ymm4,ymm3 + vpxor ymm10,ymm10,YMMWORD PTR[((480-256-128))+rbx] + + vpaddd ymm0,ymm0,ymm6 + vpsrld ymm8,ymm1,27 + vpxor ymm5,ymm4,ymm3 + vpxor ymm10,ymm10,ymm12 + + vmovdqu YMMWORD PTR[(192-128)+rax],ymm14 + vpaddd ymm0,ymm0,ymm14 + vpor ymm7,ymm7,ymm8 + vpsrld ymm9,ymm10,31 + vpand ymm5,ymm5,ymm2 + vpaddd ymm10,ymm10,ymm10 + + vpslld ymm6,ymm2,30 + vpaddd ymm0,ymm0,ymm5 + + vpsrld ymm2,ymm2,2 + vpaddd ymm0,ymm0,ymm7 + vpor ymm10,ymm10,ymm9 + vpor ymm2,ymm2,ymm6 + vpxor ymm11,ymm11,ymm13 + vmovdqa ymm13,YMMWORD PTR[((320-256-128))+rbx] + + vpaddd ymm4,ymm4,ymm15 + vpslld ymm7,ymm0,5 + vpand ymm6,ymm3,ymm2 + vpxor ymm11,ymm11,YMMWORD PTR[((0-128))+rax] + + vpaddd ymm4,ymm4,ymm6 + vpsrld ymm8,ymm0,27 + vpxor ymm5,ymm3,ymm2 + vpxor ymm11,ymm11,ymm13 + + vmovdqu YMMWORD PTR[(224-128)+rax],ymm10 + vpaddd ymm4,ymm4,ymm10 + vpor ymm7,ymm7,ymm8 + vpsrld ymm9,ymm11,31 + vpand ymm5,ymm5,ymm1 + vpaddd ymm11,ymm11,ymm11 + + vpslld ymm6,ymm1,30 + vpaddd ymm4,ymm4,ymm5 + + vpsrld ymm1,ymm1,2 + vpaddd ymm4,ymm4,ymm7 + vpor ymm11,ymm11,ymm9 + vpor ymm1,ymm1,ymm6 + vpxor ymm12,ymm12,ymm14 + vmovdqa ymm14,YMMWORD PTR[((352-256-128))+rbx] + + vpaddd ymm3,ymm3,ymm15 + vpslld ymm7,ymm4,5 + vpand ymm6,ymm2,ymm1 + vpxor ymm12,ymm12,YMMWORD PTR[((32-128))+rax] + + vpaddd ymm3,ymm3,ymm6 + vpsrld ymm8,ymm4,27 + vpxor ymm5,ymm2,ymm1 + vpxor ymm12,ymm12,ymm14 + + vmovdqu YMMWORD PTR[(256-256-128)+rbx],ymm11 + vpaddd ymm3,ymm3,ymm11 + vpor ymm7,ymm7,ymm8 + vpsrld ymm9,ymm12,31 + vpand ymm5,ymm5,ymm0 + vpaddd ymm12,ymm12,ymm12 + + vpslld ymm6,ymm0,30 + vpaddd ymm3,ymm3,ymm5 + + vpsrld ymm0,ymm0,2 + vpaddd ymm3,ymm3,ymm7 + vpor ymm12,ymm12,ymm9 + vpor ymm0,ymm0,ymm6 + vpxor ymm13,ymm13,ymm10 + vmovdqa ymm10,YMMWORD PTR[((384-256-128))+rbx] + + vpaddd ymm2,ymm2,ymm15 + vpslld ymm7,ymm3,5 + vpand ymm6,ymm1,ymm0 + vpxor ymm13,ymm13,YMMWORD PTR[((64-128))+rax] + + vpaddd ymm2,ymm2,ymm6 + vpsrld ymm8,ymm3,27 + vpxor ymm5,ymm1,ymm0 + vpxor ymm13,ymm13,ymm10 + + vmovdqu YMMWORD PTR[(288-256-128)+rbx],ymm12 + vpaddd ymm2,ymm2,ymm12 + vpor ymm7,ymm7,ymm8 + vpsrld ymm9,ymm13,31 + vpand ymm5,ymm5,ymm4 + vpaddd ymm13,ymm13,ymm13 + + vpslld ymm6,ymm4,30 + vpaddd ymm2,ymm2,ymm5 + + vpsrld ymm4,ymm4,2 + vpaddd ymm2,ymm2,ymm7 + vpor ymm13,ymm13,ymm9 + vpor ymm4,ymm4,ymm6 + vpxor ymm14,ymm14,ymm11 + vmovdqa ymm11,YMMWORD PTR[((416-256-128))+rbx] + + vpaddd ymm1,ymm1,ymm15 + vpslld ymm7,ymm2,5 + vpand ymm6,ymm0,ymm4 + vpxor ymm14,ymm14,YMMWORD PTR[((96-128))+rax] + + vpaddd ymm1,ymm1,ymm6 + vpsrld ymm8,ymm2,27 + vpxor ymm5,ymm0,ymm4 + vpxor ymm14,ymm14,ymm11 + + vmovdqu YMMWORD PTR[(320-256-128)+rbx],ymm13 + vpaddd ymm1,ymm1,ymm13 + vpor ymm7,ymm7,ymm8 + vpsrld ymm9,ymm14,31 + vpand ymm5,ymm5,ymm3 + vpaddd ymm14,ymm14,ymm14 + + vpslld ymm6,ymm3,30 + vpaddd ymm1,ymm1,ymm5 + + vpsrld ymm3,ymm3,2 + vpaddd ymm1,ymm1,ymm7 + vpor ymm14,ymm14,ymm9 + vpor ymm3,ymm3,ymm6 + vpxor ymm10,ymm10,ymm12 + vmovdqa ymm12,YMMWORD PTR[((448-256-128))+rbx] + + vpaddd ymm0,ymm0,ymm15 + vpslld ymm7,ymm1,5 + vpand ymm6,ymm4,ymm3 + vpxor ymm10,ymm10,YMMWORD PTR[((128-128))+rax] + + vpaddd ymm0,ymm0,ymm6 + vpsrld ymm8,ymm1,27 + vpxor ymm5,ymm4,ymm3 + vpxor ymm10,ymm10,ymm12 + + vmovdqu YMMWORD PTR[(352-256-128)+rbx],ymm14 + vpaddd ymm0,ymm0,ymm14 + vpor ymm7,ymm7,ymm8 + vpsrld ymm9,ymm10,31 + vpand ymm5,ymm5,ymm2 + vpaddd ymm10,ymm10,ymm10 + + vpslld ymm6,ymm2,30 + vpaddd ymm0,ymm0,ymm5 + + vpsrld ymm2,ymm2,2 + vpaddd ymm0,ymm0,ymm7 + vpor ymm10,ymm10,ymm9 + vpor ymm2,ymm2,ymm6 + vmovdqa ymm15,YMMWORD PTR[64+rbp] + vpxor ymm11,ymm11,ymm13 + vmovdqa ymm13,YMMWORD PTR[((480-256-128))+rbx] + + vpslld ymm7,ymm0,5 + vpaddd ymm4,ymm4,ymm15 + vpxor ymm5,ymm3,ymm1 + vmovdqa YMMWORD PTR[(384-256-128)+rbx],ymm10 + vpaddd ymm4,ymm4,ymm10 + vpxor ymm11,ymm11,YMMWORD PTR[((160-128))+rax] + vpsrld ymm8,ymm0,27 + vpxor ymm5,ymm5,ymm2 + vpxor ymm11,ymm11,ymm13 + + vpslld ymm6,ymm1,30 + vpor ymm7,ymm7,ymm8 + vpaddd ymm4,ymm4,ymm5 + vpsrld ymm9,ymm11,31 + vpaddd ymm11,ymm11,ymm11 + + vpsrld ymm1,ymm1,2 + vpaddd ymm4,ymm4,ymm7 + vpor ymm11,ymm11,ymm9 + vpor ymm1,ymm1,ymm6 + vpxor ymm12,ymm12,ymm14 + vmovdqa ymm14,YMMWORD PTR[((0-128))+rax] + + vpslld ymm7,ymm4,5 + vpaddd ymm3,ymm3,ymm15 + vpxor ymm5,ymm2,ymm0 + vmovdqa YMMWORD PTR[(416-256-128)+rbx],ymm11 + vpaddd ymm3,ymm3,ymm11 + vpxor ymm12,ymm12,YMMWORD PTR[((192-128))+rax] + vpsrld ymm8,ymm4,27 + vpxor ymm5,ymm5,ymm1 + vpxor ymm12,ymm12,ymm14 + + vpslld ymm6,ymm0,30 + vpor ymm7,ymm7,ymm8 + vpaddd ymm3,ymm3,ymm5 + vpsrld ymm9,ymm12,31 + vpaddd ymm12,ymm12,ymm12 + + vpsrld ymm0,ymm0,2 + vpaddd ymm3,ymm3,ymm7 + vpor ymm12,ymm12,ymm9 + vpor ymm0,ymm0,ymm6 + vpxor ymm13,ymm13,ymm10 + vmovdqa ymm10,YMMWORD PTR[((32-128))+rax] + + vpslld ymm7,ymm3,5 + vpaddd ymm2,ymm2,ymm15 + vpxor ymm5,ymm1,ymm4 + vmovdqa YMMWORD PTR[(448-256-128)+rbx],ymm12 + vpaddd ymm2,ymm2,ymm12 + vpxor ymm13,ymm13,YMMWORD PTR[((224-128))+rax] + vpsrld ymm8,ymm3,27 + vpxor ymm5,ymm5,ymm0 + vpxor ymm13,ymm13,ymm10 + + vpslld ymm6,ymm4,30 + vpor ymm7,ymm7,ymm8 + vpaddd ymm2,ymm2,ymm5 + vpsrld ymm9,ymm13,31 + vpaddd ymm13,ymm13,ymm13 + + vpsrld ymm4,ymm4,2 + vpaddd ymm2,ymm2,ymm7 + vpor ymm13,ymm13,ymm9 + vpor ymm4,ymm4,ymm6 + vpxor ymm14,ymm14,ymm11 + vmovdqa ymm11,YMMWORD PTR[((64-128))+rax] + + vpslld ymm7,ymm2,5 + vpaddd ymm1,ymm1,ymm15 + vpxor ymm5,ymm0,ymm3 + vmovdqa YMMWORD PTR[(480-256-128)+rbx],ymm13 + vpaddd ymm1,ymm1,ymm13 + vpxor ymm14,ymm14,YMMWORD PTR[((256-256-128))+rbx] + vpsrld ymm8,ymm2,27 + vpxor ymm5,ymm5,ymm4 + vpxor ymm14,ymm14,ymm11 + + vpslld ymm6,ymm3,30 + vpor ymm7,ymm7,ymm8 + vpaddd ymm1,ymm1,ymm5 + vpsrld ymm9,ymm14,31 + vpaddd ymm14,ymm14,ymm14 + + vpsrld ymm3,ymm3,2 + vpaddd ymm1,ymm1,ymm7 + vpor ymm14,ymm14,ymm9 + vpor ymm3,ymm3,ymm6 + vpxor ymm10,ymm10,ymm12 + vmovdqa ymm12,YMMWORD PTR[((96-128))+rax] + + vpslld ymm7,ymm1,5 + vpaddd ymm0,ymm0,ymm15 + vpxor ymm5,ymm4,ymm2 + vmovdqa YMMWORD PTR[(0-128)+rax],ymm14 + vpaddd ymm0,ymm0,ymm14 + vpxor ymm10,ymm10,YMMWORD PTR[((288-256-128))+rbx] + vpsrld ymm8,ymm1,27 + vpxor ymm5,ymm5,ymm3 + vpxor ymm10,ymm10,ymm12 + + vpslld ymm6,ymm2,30 + vpor ymm7,ymm7,ymm8 + vpaddd ymm0,ymm0,ymm5 + vpsrld ymm9,ymm10,31 + vpaddd ymm10,ymm10,ymm10 + + vpsrld ymm2,ymm2,2 + vpaddd ymm0,ymm0,ymm7 + vpor ymm10,ymm10,ymm9 + vpor ymm2,ymm2,ymm6 + vpxor ymm11,ymm11,ymm13 + vmovdqa ymm13,YMMWORD PTR[((128-128))+rax] + + vpslld ymm7,ymm0,5 + vpaddd ymm4,ymm4,ymm15 + vpxor ymm5,ymm3,ymm1 + vmovdqa YMMWORD PTR[(32-128)+rax],ymm10 + vpaddd ymm4,ymm4,ymm10 + vpxor ymm11,ymm11,YMMWORD PTR[((320-256-128))+rbx] + vpsrld ymm8,ymm0,27 + vpxor ymm5,ymm5,ymm2 + vpxor ymm11,ymm11,ymm13 + + vpslld ymm6,ymm1,30 + vpor ymm7,ymm7,ymm8 + vpaddd ymm4,ymm4,ymm5 + vpsrld ymm9,ymm11,31 + vpaddd ymm11,ymm11,ymm11 + + vpsrld ymm1,ymm1,2 + vpaddd ymm4,ymm4,ymm7 + vpor ymm11,ymm11,ymm9 + vpor ymm1,ymm1,ymm6 + vpxor ymm12,ymm12,ymm14 + vmovdqa ymm14,YMMWORD PTR[((160-128))+rax] + + vpslld ymm7,ymm4,5 + vpaddd ymm3,ymm3,ymm15 + vpxor ymm5,ymm2,ymm0 + vmovdqa YMMWORD PTR[(64-128)+rax],ymm11 + vpaddd ymm3,ymm3,ymm11 + vpxor ymm12,ymm12,YMMWORD PTR[((352-256-128))+rbx] + vpsrld ymm8,ymm4,27 + vpxor ymm5,ymm5,ymm1 + vpxor ymm12,ymm12,ymm14 + + vpslld ymm6,ymm0,30 + vpor ymm7,ymm7,ymm8 + vpaddd ymm3,ymm3,ymm5 + vpsrld ymm9,ymm12,31 + vpaddd ymm12,ymm12,ymm12 + + vpsrld ymm0,ymm0,2 + vpaddd ymm3,ymm3,ymm7 + vpor ymm12,ymm12,ymm9 + vpor ymm0,ymm0,ymm6 + vpxor ymm13,ymm13,ymm10 + vmovdqa ymm10,YMMWORD PTR[((192-128))+rax] + + vpslld ymm7,ymm3,5 + vpaddd ymm2,ymm2,ymm15 + vpxor ymm5,ymm1,ymm4 + vmovdqa YMMWORD PTR[(96-128)+rax],ymm12 + vpaddd ymm2,ymm2,ymm12 + vpxor ymm13,ymm13,YMMWORD PTR[((384-256-128))+rbx] + vpsrld ymm8,ymm3,27 + vpxor ymm5,ymm5,ymm0 + vpxor ymm13,ymm13,ymm10 + + vpslld ymm6,ymm4,30 + vpor ymm7,ymm7,ymm8 + vpaddd ymm2,ymm2,ymm5 + vpsrld ymm9,ymm13,31 + vpaddd ymm13,ymm13,ymm13 + + vpsrld ymm4,ymm4,2 + vpaddd ymm2,ymm2,ymm7 + vpor ymm13,ymm13,ymm9 + vpor ymm4,ymm4,ymm6 + vpxor ymm14,ymm14,ymm11 + vmovdqa ymm11,YMMWORD PTR[((224-128))+rax] + + vpslld ymm7,ymm2,5 + vpaddd ymm1,ymm1,ymm15 + vpxor ymm5,ymm0,ymm3 + vmovdqa YMMWORD PTR[(128-128)+rax],ymm13 + vpaddd ymm1,ymm1,ymm13 + vpxor ymm14,ymm14,YMMWORD PTR[((416-256-128))+rbx] + vpsrld ymm8,ymm2,27 + vpxor ymm5,ymm5,ymm4 + vpxor ymm14,ymm14,ymm11 + + vpslld ymm6,ymm3,30 + vpor ymm7,ymm7,ymm8 + vpaddd ymm1,ymm1,ymm5 + vpsrld ymm9,ymm14,31 + vpaddd ymm14,ymm14,ymm14 + + vpsrld ymm3,ymm3,2 + vpaddd ymm1,ymm1,ymm7 + vpor ymm14,ymm14,ymm9 + vpor ymm3,ymm3,ymm6 + vpxor ymm10,ymm10,ymm12 + vmovdqa ymm12,YMMWORD PTR[((256-256-128))+rbx] + + vpslld ymm7,ymm1,5 + vpaddd ymm0,ymm0,ymm15 + vpxor ymm5,ymm4,ymm2 + vmovdqa YMMWORD PTR[(160-128)+rax],ymm14 + vpaddd ymm0,ymm0,ymm14 + vpxor ymm10,ymm10,YMMWORD PTR[((448-256-128))+rbx] + vpsrld ymm8,ymm1,27 + vpxor ymm5,ymm5,ymm3 + vpxor ymm10,ymm10,ymm12 + + vpslld ymm6,ymm2,30 + vpor ymm7,ymm7,ymm8 + vpaddd ymm0,ymm0,ymm5 + vpsrld ymm9,ymm10,31 + vpaddd ymm10,ymm10,ymm10 + + vpsrld ymm2,ymm2,2 + vpaddd ymm0,ymm0,ymm7 + vpor ymm10,ymm10,ymm9 + vpor ymm2,ymm2,ymm6 + vpxor ymm11,ymm11,ymm13 + vmovdqa ymm13,YMMWORD PTR[((288-256-128))+rbx] + + vpslld ymm7,ymm0,5 + vpaddd ymm4,ymm4,ymm15 + vpxor ymm5,ymm3,ymm1 + vmovdqa YMMWORD PTR[(192-128)+rax],ymm10 + vpaddd ymm4,ymm4,ymm10 + vpxor ymm11,ymm11,YMMWORD PTR[((480-256-128))+rbx] + vpsrld ymm8,ymm0,27 + vpxor ymm5,ymm5,ymm2 + vpxor ymm11,ymm11,ymm13 + + vpslld ymm6,ymm1,30 + vpor ymm7,ymm7,ymm8 + vpaddd ymm4,ymm4,ymm5 + vpsrld ymm9,ymm11,31 + vpaddd ymm11,ymm11,ymm11 + + vpsrld ymm1,ymm1,2 + vpaddd ymm4,ymm4,ymm7 + vpor ymm11,ymm11,ymm9 + vpor ymm1,ymm1,ymm6 + vpxor ymm12,ymm12,ymm14 + vmovdqa ymm14,YMMWORD PTR[((320-256-128))+rbx] + + vpslld ymm7,ymm4,5 + vpaddd ymm3,ymm3,ymm15 + vpxor ymm5,ymm2,ymm0 + vmovdqa YMMWORD PTR[(224-128)+rax],ymm11 + vpaddd ymm3,ymm3,ymm11 + vpxor ymm12,ymm12,YMMWORD PTR[((0-128))+rax] + vpsrld ymm8,ymm4,27 + vpxor ymm5,ymm5,ymm1 + vpxor ymm12,ymm12,ymm14 + + vpslld ymm6,ymm0,30 + vpor ymm7,ymm7,ymm8 + vpaddd ymm3,ymm3,ymm5 + vpsrld ymm9,ymm12,31 + vpaddd ymm12,ymm12,ymm12 + + vpsrld ymm0,ymm0,2 + vpaddd ymm3,ymm3,ymm7 + vpor ymm12,ymm12,ymm9 + vpor ymm0,ymm0,ymm6 + vpxor ymm13,ymm13,ymm10 + vmovdqa ymm10,YMMWORD PTR[((352-256-128))+rbx] + + vpslld ymm7,ymm3,5 + vpaddd ymm2,ymm2,ymm15 + vpxor ymm5,ymm1,ymm4 + vpaddd ymm2,ymm2,ymm12 + vpxor ymm13,ymm13,YMMWORD PTR[((32-128))+rax] + vpsrld ymm8,ymm3,27 + vpxor ymm5,ymm5,ymm0 + vpxor ymm13,ymm13,ymm10 + + vpslld ymm6,ymm4,30 + vpor ymm7,ymm7,ymm8 + vpaddd ymm2,ymm2,ymm5 + vpsrld ymm9,ymm13,31 + vpaddd ymm13,ymm13,ymm13 + + vpsrld ymm4,ymm4,2 + vpaddd ymm2,ymm2,ymm7 + vpor ymm13,ymm13,ymm9 + vpor ymm4,ymm4,ymm6 + vpxor ymm14,ymm14,ymm11 + vmovdqa ymm11,YMMWORD PTR[((384-256-128))+rbx] + + vpslld ymm7,ymm2,5 + vpaddd ymm1,ymm1,ymm15 + vpxor ymm5,ymm0,ymm3 + vpaddd ymm1,ymm1,ymm13 + vpxor ymm14,ymm14,YMMWORD PTR[((64-128))+rax] + vpsrld ymm8,ymm2,27 + vpxor ymm5,ymm5,ymm4 + vpxor ymm14,ymm14,ymm11 + + vpslld ymm6,ymm3,30 + vpor ymm7,ymm7,ymm8 + vpaddd ymm1,ymm1,ymm5 + vpsrld ymm9,ymm14,31 + vpaddd ymm14,ymm14,ymm14 + + vpsrld ymm3,ymm3,2 + vpaddd ymm1,ymm1,ymm7 + vpor ymm14,ymm14,ymm9 + vpor ymm3,ymm3,ymm6 + vpxor ymm10,ymm10,ymm12 + vmovdqa ymm12,YMMWORD PTR[((416-256-128))+rbx] + + vpslld ymm7,ymm1,5 + vpaddd ymm0,ymm0,ymm15 + vpxor ymm5,ymm4,ymm2 + vpaddd ymm0,ymm0,ymm14 + vpxor ymm10,ymm10,YMMWORD PTR[((96-128))+rax] + vpsrld ymm8,ymm1,27 + vpxor ymm5,ymm5,ymm3 + vpxor ymm10,ymm10,ymm12 + + vpslld ymm6,ymm2,30 + vpor ymm7,ymm7,ymm8 + vpaddd ymm0,ymm0,ymm5 + vpsrld ymm9,ymm10,31 + vpaddd ymm10,ymm10,ymm10 + + vpsrld ymm2,ymm2,2 + vpaddd ymm0,ymm0,ymm7 + vpor ymm10,ymm10,ymm9 + vpor ymm2,ymm2,ymm6 + vpxor ymm11,ymm11,ymm13 + vmovdqa ymm13,YMMWORD PTR[((448-256-128))+rbx] + + vpslld ymm7,ymm0,5 + vpaddd ymm4,ymm4,ymm15 + vpxor ymm5,ymm3,ymm1 + vpaddd ymm4,ymm4,ymm10 + vpxor ymm11,ymm11,YMMWORD PTR[((128-128))+rax] + vpsrld ymm8,ymm0,27 + vpxor ymm5,ymm5,ymm2 + vpxor ymm11,ymm11,ymm13 + + vpslld ymm6,ymm1,30 + vpor ymm7,ymm7,ymm8 + vpaddd ymm4,ymm4,ymm5 + vpsrld ymm9,ymm11,31 + vpaddd ymm11,ymm11,ymm11 + + vpsrld ymm1,ymm1,2 + vpaddd ymm4,ymm4,ymm7 + vpor ymm11,ymm11,ymm9 + vpor ymm1,ymm1,ymm6 + vpxor ymm12,ymm12,ymm14 + vmovdqa ymm14,YMMWORD PTR[((480-256-128))+rbx] + + vpslld ymm7,ymm4,5 + vpaddd ymm3,ymm3,ymm15 + vpxor ymm5,ymm2,ymm0 + vpaddd ymm3,ymm3,ymm11 + vpxor ymm12,ymm12,YMMWORD PTR[((160-128))+rax] + vpsrld ymm8,ymm4,27 + vpxor ymm5,ymm5,ymm1 + vpxor ymm12,ymm12,ymm14 + + vpslld ymm6,ymm0,30 + vpor ymm7,ymm7,ymm8 + vpaddd ymm3,ymm3,ymm5 + vpsrld ymm9,ymm12,31 + vpaddd ymm12,ymm12,ymm12 + + vpsrld ymm0,ymm0,2 + vpaddd ymm3,ymm3,ymm7 + vpor ymm12,ymm12,ymm9 + vpor ymm0,ymm0,ymm6 + vpxor ymm13,ymm13,ymm10 + vmovdqa ymm10,YMMWORD PTR[((0-128))+rax] + + vpslld ymm7,ymm3,5 + vpaddd ymm2,ymm2,ymm15 + vpxor ymm5,ymm1,ymm4 + vpaddd ymm2,ymm2,ymm12 + vpxor ymm13,ymm13,YMMWORD PTR[((192-128))+rax] + vpsrld ymm8,ymm3,27 + vpxor ymm5,ymm5,ymm0 + vpxor ymm13,ymm13,ymm10 + + vpslld ymm6,ymm4,30 + vpor ymm7,ymm7,ymm8 + vpaddd ymm2,ymm2,ymm5 + vpsrld ymm9,ymm13,31 + vpaddd ymm13,ymm13,ymm13 + + vpsrld ymm4,ymm4,2 + vpaddd ymm2,ymm2,ymm7 + vpor ymm13,ymm13,ymm9 + vpor ymm4,ymm4,ymm6 + vpxor ymm14,ymm14,ymm11 + vmovdqa ymm11,YMMWORD PTR[((32-128))+rax] + + vpslld ymm7,ymm2,5 + vpaddd ymm1,ymm1,ymm15 + vpxor ymm5,ymm0,ymm3 + vpaddd ymm1,ymm1,ymm13 + vpxor ymm14,ymm14,YMMWORD PTR[((224-128))+rax] + vpsrld ymm8,ymm2,27 + vpxor ymm5,ymm5,ymm4 + vpxor ymm14,ymm14,ymm11 + + vpslld ymm6,ymm3,30 + vpor ymm7,ymm7,ymm8 + vpaddd ymm1,ymm1,ymm5 + vpsrld ymm9,ymm14,31 + vpaddd ymm14,ymm14,ymm14 + + vpsrld ymm3,ymm3,2 + vpaddd ymm1,ymm1,ymm7 + vpor ymm14,ymm14,ymm9 + vpor ymm3,ymm3,ymm6 + vpslld ymm7,ymm1,5 + vpaddd ymm0,ymm0,ymm15 + vpxor ymm5,ymm4,ymm2 + + vpsrld ymm8,ymm1,27 + vpaddd ymm0,ymm0,ymm14 + vpxor ymm5,ymm5,ymm3 + + vpslld ymm6,ymm2,30 + vpor ymm7,ymm7,ymm8 + vpaddd ymm0,ymm0,ymm5 + + vpsrld ymm2,ymm2,2 + vpaddd ymm0,ymm0,ymm7 + vpor ymm2,ymm2,ymm6 + mov ecx,1 + lea rbx,QWORD PTR[512+rsp] + cmp ecx,DWORD PTR[rbx] + cmovge r12,rbp + cmp ecx,DWORD PTR[4+rbx] + cmovge r13,rbp + cmp ecx,DWORD PTR[8+rbx] + cmovge r14,rbp + cmp ecx,DWORD PTR[12+rbx] + cmovge r15,rbp + cmp ecx,DWORD PTR[16+rbx] + cmovge r8,rbp + cmp ecx,DWORD PTR[20+rbx] + cmovge r9,rbp + cmp ecx,DWORD PTR[24+rbx] + cmovge r10,rbp + cmp ecx,DWORD PTR[28+rbx] + cmovge r11,rbp + vmovdqu ymm5,YMMWORD PTR[rbx] + vpxor ymm7,ymm7,ymm7 + vmovdqa ymm6,ymm5 + vpcmpgtd ymm6,ymm6,ymm7 + vpaddd ymm5,ymm5,ymm6 + + vpand ymm0,ymm0,ymm6 + vpand ymm1,ymm1,ymm6 + vpaddd ymm0,ymm0,YMMWORD PTR[rdi] + vpand ymm2,ymm2,ymm6 + vpaddd ymm1,ymm1,YMMWORD PTR[32+rdi] + vpand ymm3,ymm3,ymm6 + vpaddd ymm2,ymm2,YMMWORD PTR[64+rdi] + vpand ymm4,ymm4,ymm6 + vpaddd ymm3,ymm3,YMMWORD PTR[96+rdi] + vpaddd ymm4,ymm4,YMMWORD PTR[128+rdi] + vmovdqu YMMWORD PTR[rdi],ymm0 + vmovdqu YMMWORD PTR[32+rdi],ymm1 + vmovdqu YMMWORD PTR[64+rdi],ymm2 + vmovdqu YMMWORD PTR[96+rdi],ymm3 + vmovdqu YMMWORD PTR[128+rdi],ymm4 + + vmovdqu YMMWORD PTR[rbx],ymm5 + lea rbx,QWORD PTR[((256+128))+rsp] + vmovdqu ymm9,YMMWORD PTR[96+rbp] + dec edx + jnz $L$oop_avx2 + + + + + + + +$L$done_avx2:: + mov rax,QWORD PTR[544+rsp] + vzeroupper + movaps xmm6,XMMWORD PTR[((-216))+rax] + movaps xmm7,XMMWORD PTR[((-200))+rax] + movaps xmm8,XMMWORD PTR[((-184))+rax] + movaps xmm9,XMMWORD PTR[((-168))+rax] + movaps xmm10,XMMWORD PTR[((-152))+rax] + movaps xmm11,XMMWORD PTR[((-136))+rax] + movaps xmm12,XMMWORD PTR[((-120))+rax] + movaps xmm13,XMMWORD PTR[((-104))+rax] + movaps xmm14,XMMWORD PTR[((-88))+rax] + movaps xmm15,XMMWORD PTR[((-72))+rax] + mov r15,QWORD PTR[((-48))+rax] + mov r14,QWORD PTR[((-40))+rax] + mov r13,QWORD PTR[((-32))+rax] + mov r12,QWORD PTR[((-24))+rax] + mov rbp,QWORD PTR[((-16))+rax] + mov rbx,QWORD PTR[((-8))+rax] + lea rsp,QWORD PTR[rax] +$L$epilogue_avx2:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_sha1_multi_block_avx2:: +sha1_multi_block_avx2 ENDP + +ALIGN 256 + DD 05a827999h,05a827999h,05a827999h,05a827999h + DD 05a827999h,05a827999h,05a827999h,05a827999h +K_XX_XX:: + DD 06ed9eba1h,06ed9eba1h,06ed9eba1h,06ed9eba1h + DD 06ed9eba1h,06ed9eba1h,06ed9eba1h,06ed9eba1h + DD 08f1bbcdch,08f1bbcdch,08f1bbcdch,08f1bbcdch + DD 08f1bbcdch,08f1bbcdch,08f1bbcdch,08f1bbcdch + DD 0ca62c1d6h,0ca62c1d6h,0ca62c1d6h,0ca62c1d6h + DD 0ca62c1d6h,0ca62c1d6h,0ca62c1d6h,0ca62c1d6h + DD 000010203h,004050607h,008090a0bh,00c0d0e0fh + DD 000010203h,004050607h,008090a0bh,00c0d0e0fh +DB 0fh,0eh,0dh,0ch,0bh,0ah,09h,08h,07h,06h,05h,04h,03h,02h,01h,00h +DB 83,72,65,49,32,109,117,108,116,105,45,98,108,111,99,107 +DB 32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,120 +DB 56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77 +DB 83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110 +DB 115,115,108,46,111,114,103,62,0 +EXTERN __imp_RtlVirtualUnwind:NEAR + +ALIGN 16 +se_handler PROC PRIVATE + push rsi + push rdi + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + pushfq + sub rsp,64 + + mov rax,QWORD PTR[120+r8] + mov rbx,QWORD PTR[248+r8] + + mov rsi,QWORD PTR[8+r9] + mov r11,QWORD PTR[56+r9] + + mov r10d,DWORD PTR[r11] + lea r10,QWORD PTR[r10*1+rsi] + cmp rbx,r10 + jb $L$in_prologue + + mov rax,QWORD PTR[152+r8] + + mov r10d,DWORD PTR[4+r11] + lea r10,QWORD PTR[r10*1+rsi] + cmp rbx,r10 + jae $L$in_prologue + + mov rax,QWORD PTR[272+rax] + + mov rbx,QWORD PTR[((-8))+rax] + mov rbp,QWORD PTR[((-16))+rax] + mov QWORD PTR[144+r8],rbx + mov QWORD PTR[160+r8],rbp + + lea rsi,QWORD PTR[((-24-160))+rax] + lea rdi,QWORD PTR[512+r8] + mov ecx,20 + DD 0a548f3fch + +$L$in_prologue:: + mov rdi,QWORD PTR[8+rax] + mov rsi,QWORD PTR[16+rax] + mov QWORD PTR[152+r8],rax + mov QWORD PTR[168+r8],rsi + mov QWORD PTR[176+r8],rdi + + mov rdi,QWORD PTR[40+r9] + mov rsi,r8 + mov ecx,154 + DD 0a548f3fch + + mov rsi,r9 + xor rcx,rcx + mov rdx,QWORD PTR[8+rsi] + mov r8,QWORD PTR[rsi] + mov r9,QWORD PTR[16+rsi] + mov r10,QWORD PTR[40+rsi] + lea r11,QWORD PTR[56+rsi] + lea r12,QWORD PTR[24+rsi] + mov QWORD PTR[32+rsp],r10 + mov QWORD PTR[40+rsp],r11 + mov QWORD PTR[48+rsp],r12 + mov QWORD PTR[56+rsp],rcx + call QWORD PTR[__imp_RtlVirtualUnwind] + + mov eax,1 + add rsp,64 + popfq + pop r15 + pop r14 + pop r13 + pop r12 + pop rbp + pop rbx + pop rdi + pop rsi + DB 0F3h,0C3h ;repret +se_handler ENDP + +ALIGN 16 +avx2_handler PROC PRIVATE + push rsi + push rdi + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + pushfq + sub rsp,64 + + mov rax,QWORD PTR[120+r8] + mov rbx,QWORD PTR[248+r8] + + mov rsi,QWORD PTR[8+r9] + mov r11,QWORD PTR[56+r9] + + mov r10d,DWORD PTR[r11] + lea r10,QWORD PTR[r10*1+rsi] + cmp rbx,r10 + jb $L$in_prologue + + mov rax,QWORD PTR[152+r8] + + mov r10d,DWORD PTR[4+r11] + lea r10,QWORD PTR[r10*1+rsi] + cmp rbx,r10 + jae $L$in_prologue + + mov rax,QWORD PTR[544+r8] + + mov rbx,QWORD PTR[((-8))+rax] + mov rbp,QWORD PTR[((-16))+rax] + mov r12,QWORD PTR[((-24))+rax] + mov r13,QWORD PTR[((-32))+rax] + mov r14,QWORD PTR[((-40))+rax] + mov r15,QWORD PTR[((-48))+rax] + mov QWORD PTR[144+r8],rbx + mov QWORD PTR[160+r8],rbp + mov QWORD PTR[216+r8],r12 + mov QWORD PTR[224+r8],r13 + mov QWORD PTR[232+r8],r14 + mov QWORD PTR[240+r8],r15 + + lea rsi,QWORD PTR[((-56-160))+rax] + lea rdi,QWORD PTR[512+r8] + mov ecx,20 + DD 0a548f3fch + + jmp $L$in_prologue +avx2_handler ENDP +.text$ ENDS +.pdata SEGMENT READONLY ALIGN(4) +ALIGN 4 + DD imagerel $L$SEH_begin_sha1_multi_block + DD imagerel $L$SEH_end_sha1_multi_block + DD imagerel $L$SEH_info_sha1_multi_block + DD imagerel $L$SEH_begin_sha1_multi_block_shaext + DD imagerel $L$SEH_end_sha1_multi_block_shaext + DD imagerel $L$SEH_info_sha1_multi_block_shaext + DD imagerel $L$SEH_begin_sha1_multi_block_avx + DD imagerel $L$SEH_end_sha1_multi_block_avx + DD imagerel $L$SEH_info_sha1_multi_block_avx + DD imagerel $L$SEH_begin_sha1_multi_block_avx2 + DD imagerel $L$SEH_end_sha1_multi_block_avx2 + DD imagerel $L$SEH_info_sha1_multi_block_avx2 +.pdata ENDS +.xdata SEGMENT READONLY ALIGN(8) +ALIGN 8 +$L$SEH_info_sha1_multi_block:: +DB 9,0,0,0 + DD imagerel se_handler + DD imagerel $L$body,imagerel $L$epilogue +$L$SEH_info_sha1_multi_block_shaext:: +DB 9,0,0,0 + DD imagerel se_handler + DD imagerel $L$body_shaext,imagerel $L$epilogue_shaext +$L$SEH_info_sha1_multi_block_avx:: +DB 9,0,0,0 + DD imagerel se_handler + DD imagerel $L$body_avx,imagerel $L$epilogue_avx +$L$SEH_info_sha1_multi_block_avx2:: +DB 9,0,0,0 + DD imagerel avx2_handler + DD imagerel $L$body_avx2,imagerel $L$epilogue_avx2 + +.xdata ENDS +END diff --git a/deps/openssl/asm/x64-win32-masm/sha/sha1-x86_64.asm b/deps/openssl/asm/x64-win32-masm/sha/sha1-x86_64.asm index 9589f7fa08184a..295a2c06ba012e 100644 --- a/deps/openssl/asm/x64-win32-masm/sha/sha1-x86_64.asm +++ b/deps/openssl/asm/x64-win32-masm/sha/sha1-x86_64.asm @@ -1,5 +1,5 @@ OPTION DOTNAME -.text$ SEGMENT ALIGN(64) 'CODE' +.text$ SEGMENT ALIGN(256) 'CODE' EXTERN OPENSSL_ia32cap_P:NEAR PUBLIC sha1_block_data_order @@ -17,23 +17,35 @@ $L$SEH_begin_sha1_block_data_order:: mov r9d,DWORD PTR[((OPENSSL_ia32cap_P+0))] mov r8d,DWORD PTR[((OPENSSL_ia32cap_P+4))] + mov r10d,DWORD PTR[((OPENSSL_ia32cap_P+8))] test r8d,512 jz $L$ialu + test r10d,536870912 + jnz _shaext_shortcut + and r10d,296 + cmp r10d,296 + je _avx2_shortcut + and r8d,268435456 + and r9d,1073741824 + or r8d,r9d + cmp r8d,1342177280 + je _avx_shortcut jmp _ssse3_shortcut ALIGN 16 $L$ialu:: + mov rax,rsp push rbx push rbp push r12 push r13 - mov r11,rsp + push r14 mov r8,rdi sub rsp,72 mov r9,rsi and rsp,-64 mov r10,rdx - mov QWORD PTR[64+rsp],r11 + mov QWORD PTR[64+rsp],rax $L$prologue:: mov esi,DWORD PTR[r8] @@ -47,1230 +59,1168 @@ ALIGN 16 $L$loop:: mov edx,DWORD PTR[r9] bswap edx - mov DWORD PTR[rsp],edx - mov eax,r11d mov ebp,DWORD PTR[4+r9] + mov eax,r12d + mov DWORD PTR[rsp],edx mov ecx,esi - xor eax,r12d bswap ebp + xor eax,r11d rol ecx,5 - lea r13d,DWORD PTR[1518500249+r13*1+rdx] and eax,edi - mov DWORD PTR[4+rsp],ebp + lea r13d,DWORD PTR[1518500249+r13*1+rdx] add r13d,ecx xor eax,r12d rol edi,30 add r13d,eax - mov eax,edi - mov edx,DWORD PTR[8+r9] + mov r14d,DWORD PTR[8+r9] + mov eax,r11d + mov DWORD PTR[4+rsp],ebp mov ecx,r13d - xor eax,r11d - bswap edx + bswap r14d + xor eax,edi rol ecx,5 - lea r12d,DWORD PTR[1518500249+r12*1+rbp] and eax,esi - mov DWORD PTR[8+rsp],edx + lea r12d,DWORD PTR[1518500249+r12*1+rbp] add r12d,ecx xor eax,r11d rol esi,30 add r12d,eax - mov eax,esi - mov ebp,DWORD PTR[12+r9] + mov edx,DWORD PTR[12+r9] + mov eax,edi + mov DWORD PTR[8+rsp],r14d mov ecx,r12d - xor eax,edi - bswap ebp + bswap edx + xor eax,esi rol ecx,5 - lea r11d,DWORD PTR[1518500249+r11*1+rdx] and eax,r13d - mov DWORD PTR[12+rsp],ebp + lea r11d,DWORD PTR[1518500249+r11*1+r14] add r11d,ecx xor eax,edi rol r13d,30 add r11d,eax - mov eax,r13d - mov edx,DWORD PTR[16+r9] + mov ebp,DWORD PTR[16+r9] + mov eax,esi + mov DWORD PTR[12+rsp],edx mov ecx,r11d - xor eax,esi - bswap edx + bswap ebp + xor eax,r13d rol ecx,5 - lea edi,DWORD PTR[1518500249+rdi*1+rbp] and eax,r12d - mov DWORD PTR[16+rsp],edx + lea edi,DWORD PTR[1518500249+rdi*1+rdx] add edi,ecx xor eax,esi rol r12d,30 add edi,eax - mov eax,r12d - mov ebp,DWORD PTR[20+r9] + mov r14d,DWORD PTR[20+r9] + mov eax,r13d + mov DWORD PTR[16+rsp],ebp mov ecx,edi - xor eax,r13d - bswap ebp + bswap r14d + xor eax,r12d rol ecx,5 - lea esi,DWORD PTR[1518500249+rsi*1+rdx] and eax,r11d - mov DWORD PTR[20+rsp],ebp + lea esi,DWORD PTR[1518500249+rsi*1+rbp] add esi,ecx xor eax,r13d rol r11d,30 add esi,eax - mov eax,r11d mov edx,DWORD PTR[24+r9] + mov eax,r12d + mov DWORD PTR[20+rsp],r14d mov ecx,esi - xor eax,r12d bswap edx + xor eax,r11d rol ecx,5 - lea r13d,DWORD PTR[1518500249+r13*1+rbp] and eax,edi - mov DWORD PTR[24+rsp],edx + lea r13d,DWORD PTR[1518500249+r13*1+r14] add r13d,ecx xor eax,r12d rol edi,30 add r13d,eax - mov eax,edi mov ebp,DWORD PTR[28+r9] + mov eax,r11d + mov DWORD PTR[24+rsp],edx mov ecx,r13d - xor eax,r11d bswap ebp + xor eax,edi rol ecx,5 - lea r12d,DWORD PTR[1518500249+r12*1+rdx] and eax,esi - mov DWORD PTR[28+rsp],ebp + lea r12d,DWORD PTR[1518500249+r12*1+rdx] add r12d,ecx xor eax,r11d rol esi,30 add r12d,eax - mov eax,esi - mov edx,DWORD PTR[32+r9] + mov r14d,DWORD PTR[32+r9] + mov eax,edi + mov DWORD PTR[28+rsp],ebp mov ecx,r12d - xor eax,edi - bswap edx + bswap r14d + xor eax,esi rol ecx,5 - lea r11d,DWORD PTR[1518500249+r11*1+rbp] and eax,r13d - mov DWORD PTR[32+rsp],edx + lea r11d,DWORD PTR[1518500249+r11*1+rbp] add r11d,ecx xor eax,edi rol r13d,30 add r11d,eax - mov eax,r13d - mov ebp,DWORD PTR[36+r9] + mov edx,DWORD PTR[36+r9] + mov eax,esi + mov DWORD PTR[32+rsp],r14d mov ecx,r11d - xor eax,esi - bswap ebp + bswap edx + xor eax,r13d rol ecx,5 - lea edi,DWORD PTR[1518500249+rdi*1+rdx] and eax,r12d - mov DWORD PTR[36+rsp],ebp + lea edi,DWORD PTR[1518500249+rdi*1+r14] add edi,ecx xor eax,esi rol r12d,30 add edi,eax - mov eax,r12d - mov edx,DWORD PTR[40+r9] + mov ebp,DWORD PTR[40+r9] + mov eax,r13d + mov DWORD PTR[36+rsp],edx mov ecx,edi - xor eax,r13d - bswap edx + bswap ebp + xor eax,r12d rol ecx,5 - lea esi,DWORD PTR[1518500249+rsi*1+rbp] and eax,r11d - mov DWORD PTR[40+rsp],edx + lea esi,DWORD PTR[1518500249+rsi*1+rdx] add esi,ecx xor eax,r13d rol r11d,30 add esi,eax - mov eax,r11d - mov ebp,DWORD PTR[44+r9] + mov r14d,DWORD PTR[44+r9] + mov eax,r12d + mov DWORD PTR[40+rsp],ebp mov ecx,esi - xor eax,r12d - bswap ebp + bswap r14d + xor eax,r11d rol ecx,5 - lea r13d,DWORD PTR[1518500249+r13*1+rdx] and eax,edi - mov DWORD PTR[44+rsp],ebp + lea r13d,DWORD PTR[1518500249+r13*1+rbp] add r13d,ecx xor eax,r12d rol edi,30 add r13d,eax - mov eax,edi mov edx,DWORD PTR[48+r9] + mov eax,r11d + mov DWORD PTR[44+rsp],r14d mov ecx,r13d - xor eax,r11d bswap edx + xor eax,edi rol ecx,5 - lea r12d,DWORD PTR[1518500249+r12*1+rbp] and eax,esi - mov DWORD PTR[48+rsp],edx + lea r12d,DWORD PTR[1518500249+r12*1+r14] add r12d,ecx xor eax,r11d rol esi,30 add r12d,eax - mov eax,esi mov ebp,DWORD PTR[52+r9] + mov eax,edi + mov DWORD PTR[48+rsp],edx mov ecx,r12d - xor eax,edi bswap ebp + xor eax,esi rol ecx,5 - lea r11d,DWORD PTR[1518500249+r11*1+rdx] and eax,r13d - mov DWORD PTR[52+rsp],ebp + lea r11d,DWORD PTR[1518500249+r11*1+rdx] add r11d,ecx xor eax,edi rol r13d,30 add r11d,eax - mov eax,r13d - mov edx,DWORD PTR[56+r9] + mov r14d,DWORD PTR[56+r9] + mov eax,esi + mov DWORD PTR[52+rsp],ebp mov ecx,r11d - xor eax,esi - bswap edx + bswap r14d + xor eax,r13d rol ecx,5 - lea edi,DWORD PTR[1518500249+rdi*1+rbp] and eax,r12d - mov DWORD PTR[56+rsp],edx + lea edi,DWORD PTR[1518500249+rdi*1+rbp] add edi,ecx xor eax,esi rol r12d,30 add edi,eax - mov eax,r12d - mov ebp,DWORD PTR[60+r9] + mov edx,DWORD PTR[60+r9] + mov eax,r13d + mov DWORD PTR[56+rsp],r14d mov ecx,edi - xor eax,r13d - bswap ebp + bswap edx + xor eax,r12d rol ecx,5 - lea esi,DWORD PTR[1518500249+rsi*1+rdx] and eax,r11d - mov DWORD PTR[60+rsp],ebp + lea esi,DWORD PTR[1518500249+rsi*1+r14] add esi,ecx xor eax,r13d rol r11d,30 add esi,eax - mov edx,DWORD PTR[rsp] - mov eax,r11d + xor ebp,DWORD PTR[rsp] + mov eax,r12d + mov DWORD PTR[60+rsp],edx mov ecx,esi - xor edx,DWORD PTR[8+rsp] - xor eax,r12d + xor ebp,DWORD PTR[8+rsp] + xor eax,r11d rol ecx,5 - xor edx,DWORD PTR[32+rsp] + xor ebp,DWORD PTR[32+rsp] and eax,edi - lea r13d,DWORD PTR[1518500249+r13*1+rbp] - xor edx,DWORD PTR[52+rsp] + lea r13d,DWORD PTR[1518500249+r13*1+rdx] + rol edi,30 xor eax,r12d - rol edx,1 add r13d,ecx - rol edi,30 - mov DWORD PTR[rsp],edx + rol ebp,1 add r13d,eax - mov ebp,DWORD PTR[4+rsp] - mov eax,edi + xor r14d,DWORD PTR[4+rsp] + mov eax,r11d + mov DWORD PTR[rsp],ebp mov ecx,r13d - xor ebp,DWORD PTR[12+rsp] - xor eax,r11d + xor r14d,DWORD PTR[12+rsp] + xor eax,edi rol ecx,5 - xor ebp,DWORD PTR[36+rsp] + xor r14d,DWORD PTR[36+rsp] and eax,esi - lea r12d,DWORD PTR[1518500249+r12*1+rdx] - xor ebp,DWORD PTR[56+rsp] + lea r12d,DWORD PTR[1518500249+r12*1+rbp] + rol esi,30 xor eax,r11d - rol ebp,1 add r12d,ecx - rol esi,30 - mov DWORD PTR[4+rsp],ebp + rol r14d,1 add r12d,eax - mov edx,DWORD PTR[8+rsp] - mov eax,esi + xor edx,DWORD PTR[8+rsp] + mov eax,edi + mov DWORD PTR[4+rsp],r14d mov ecx,r12d xor edx,DWORD PTR[16+rsp] - xor eax,edi + xor eax,esi rol ecx,5 xor edx,DWORD PTR[40+rsp] and eax,r13d - lea r11d,DWORD PTR[1518500249+r11*1+rbp] - xor edx,DWORD PTR[60+rsp] + lea r11d,DWORD PTR[1518500249+r11*1+r14] + rol r13d,30 xor eax,edi - rol edx,1 add r11d,ecx - rol r13d,30 - mov DWORD PTR[8+rsp],edx + rol edx,1 add r11d,eax - mov ebp,DWORD PTR[12+rsp] - mov eax,r13d + xor ebp,DWORD PTR[12+rsp] + mov eax,esi + mov DWORD PTR[8+rsp],edx mov ecx,r11d xor ebp,DWORD PTR[20+rsp] - xor eax,esi + xor eax,r13d rol ecx,5 xor ebp,DWORD PTR[44+rsp] and eax,r12d lea edi,DWORD PTR[1518500249+rdi*1+rdx] - xor ebp,DWORD PTR[rsp] + rol r12d,30 xor eax,esi - rol ebp,1 add edi,ecx - rol r12d,30 - mov DWORD PTR[12+rsp],ebp + rol ebp,1 add edi,eax - mov edx,DWORD PTR[16+rsp] - mov eax,r12d + xor r14d,DWORD PTR[16+rsp] + mov eax,r13d + mov DWORD PTR[12+rsp],ebp mov ecx,edi - xor edx,DWORD PTR[24+rsp] - xor eax,r13d + xor r14d,DWORD PTR[24+rsp] + xor eax,r12d rol ecx,5 - xor edx,DWORD PTR[48+rsp] + xor r14d,DWORD PTR[48+rsp] and eax,r11d lea esi,DWORD PTR[1518500249+rsi*1+rbp] - xor edx,DWORD PTR[4+rsp] + rol r11d,30 xor eax,r13d - rol edx,1 add esi,ecx - rol r11d,30 - mov DWORD PTR[16+rsp],edx + rol r14d,1 add esi,eax - mov ebp,DWORD PTR[20+rsp] - mov eax,r11d + xor edx,DWORD PTR[20+rsp] + mov eax,edi + mov DWORD PTR[16+rsp],r14d mov ecx,esi - xor ebp,DWORD PTR[28+rsp] - xor eax,edi - rol ecx,5 - lea r13d,DWORD PTR[1859775393+r13*1+rdx] - xor ebp,DWORD PTR[52+rsp] + xor edx,DWORD PTR[28+rsp] xor eax,r12d + rol ecx,5 + xor edx,DWORD PTR[52+rsp] + lea r13d,DWORD PTR[1859775393+r13*1+r14] + xor eax,r11d add r13d,ecx - xor ebp,DWORD PTR[8+rsp] rol edi,30 add r13d,eax - rol ebp,1 - mov DWORD PTR[20+rsp],ebp - mov edx,DWORD PTR[24+rsp] - mov eax,edi + rol edx,1 + xor ebp,DWORD PTR[24+rsp] + mov eax,esi + mov DWORD PTR[20+rsp],edx mov ecx,r13d - xor edx,DWORD PTR[32+rsp] - xor eax,esi - rol ecx,5 - lea r12d,DWORD PTR[1859775393+r12*1+rbp] - xor edx,DWORD PTR[56+rsp] + xor ebp,DWORD PTR[32+rsp] xor eax,r11d + rol ecx,5 + xor ebp,DWORD PTR[56+rsp] + lea r12d,DWORD PTR[1859775393+r12*1+rdx] + xor eax,edi add r12d,ecx - xor edx,DWORD PTR[12+rsp] rol esi,30 add r12d,eax - rol edx,1 - mov DWORD PTR[24+rsp],edx - mov ebp,DWORD PTR[28+rsp] - mov eax,esi + rol ebp,1 + xor r14d,DWORD PTR[28+rsp] + mov eax,r13d + mov DWORD PTR[24+rsp],ebp mov ecx,r12d - xor ebp,DWORD PTR[36+rsp] - xor eax,r13d - rol ecx,5 - lea r11d,DWORD PTR[1859775393+r11*1+rdx] - xor ebp,DWORD PTR[60+rsp] + xor r14d,DWORD PTR[36+rsp] xor eax,edi + rol ecx,5 + xor r14d,DWORD PTR[60+rsp] + lea r11d,DWORD PTR[1859775393+r11*1+rbp] + xor eax,esi add r11d,ecx - xor ebp,DWORD PTR[16+rsp] rol r13d,30 add r11d,eax - rol ebp,1 - mov DWORD PTR[28+rsp],ebp - mov edx,DWORD PTR[32+rsp] - mov eax,r13d + rol r14d,1 + xor edx,DWORD PTR[32+rsp] + mov eax,r12d + mov DWORD PTR[28+rsp],r14d mov ecx,r11d xor edx,DWORD PTR[40+rsp] - xor eax,r12d + xor eax,esi rol ecx,5 - lea edi,DWORD PTR[1859775393+rdi*1+rbp] xor edx,DWORD PTR[rsp] - xor eax,esi + lea edi,DWORD PTR[1859775393+rdi*1+r14] + xor eax,r13d add edi,ecx - xor edx,DWORD PTR[20+rsp] rol r12d,30 add edi,eax rol edx,1 + xor ebp,DWORD PTR[36+rsp] + mov eax,r11d mov DWORD PTR[32+rsp],edx - mov ebp,DWORD PTR[36+rsp] - mov eax,r12d mov ecx,edi xor ebp,DWORD PTR[44+rsp] - xor eax,r11d + xor eax,r13d rol ecx,5 - lea esi,DWORD PTR[1859775393+rsi*1+rdx] xor ebp,DWORD PTR[4+rsp] - xor eax,r13d + lea esi,DWORD PTR[1859775393+rsi*1+rdx] + xor eax,r12d add esi,ecx - xor ebp,DWORD PTR[24+rsp] rol r11d,30 add esi,eax rol ebp,1 + xor r14d,DWORD PTR[40+rsp] + mov eax,edi mov DWORD PTR[36+rsp],ebp - mov edx,DWORD PTR[40+rsp] - mov eax,r11d mov ecx,esi - xor edx,DWORD PTR[48+rsp] - xor eax,edi + xor r14d,DWORD PTR[48+rsp] + xor eax,r12d rol ecx,5 + xor r14d,DWORD PTR[8+rsp] lea r13d,DWORD PTR[1859775393+r13*1+rbp] - xor edx,DWORD PTR[8+rsp] - xor eax,r12d + xor eax,r11d add r13d,ecx - xor edx,DWORD PTR[28+rsp] rol edi,30 add r13d,eax - rol edx,1 - mov DWORD PTR[40+rsp],edx - mov ebp,DWORD PTR[44+rsp] - mov eax,edi + rol r14d,1 + xor edx,DWORD PTR[44+rsp] + mov eax,esi + mov DWORD PTR[40+rsp],r14d mov ecx,r13d - xor ebp,DWORD PTR[52+rsp] - xor eax,esi - rol ecx,5 - lea r12d,DWORD PTR[1859775393+r12*1+rdx] - xor ebp,DWORD PTR[12+rsp] + xor edx,DWORD PTR[52+rsp] xor eax,r11d + rol ecx,5 + xor edx,DWORD PTR[12+rsp] + lea r12d,DWORD PTR[1859775393+r12*1+r14] + xor eax,edi add r12d,ecx - xor ebp,DWORD PTR[32+rsp] rol esi,30 add r12d,eax - rol ebp,1 - mov DWORD PTR[44+rsp],ebp - mov edx,DWORD PTR[48+rsp] - mov eax,esi + rol edx,1 + xor ebp,DWORD PTR[48+rsp] + mov eax,r13d + mov DWORD PTR[44+rsp],edx mov ecx,r12d - xor edx,DWORD PTR[56+rsp] - xor eax,r13d - rol ecx,5 - lea r11d,DWORD PTR[1859775393+r11*1+rbp] - xor edx,DWORD PTR[16+rsp] + xor ebp,DWORD PTR[56+rsp] xor eax,edi + rol ecx,5 + xor ebp,DWORD PTR[16+rsp] + lea r11d,DWORD PTR[1859775393+r11*1+rdx] + xor eax,esi add r11d,ecx - xor edx,DWORD PTR[36+rsp] rol r13d,30 add r11d,eax - rol edx,1 - mov DWORD PTR[48+rsp],edx - mov ebp,DWORD PTR[52+rsp] - mov eax,r13d + rol ebp,1 + xor r14d,DWORD PTR[52+rsp] + mov eax,r12d + mov DWORD PTR[48+rsp],ebp mov ecx,r11d - xor ebp,DWORD PTR[60+rsp] - xor eax,r12d - rol ecx,5 - lea edi,DWORD PTR[1859775393+rdi*1+rdx] - xor ebp,DWORD PTR[20+rsp] + xor r14d,DWORD PTR[60+rsp] xor eax,esi + rol ecx,5 + xor r14d,DWORD PTR[20+rsp] + lea edi,DWORD PTR[1859775393+rdi*1+rbp] + xor eax,r13d add edi,ecx - xor ebp,DWORD PTR[40+rsp] rol r12d,30 add edi,eax - rol ebp,1 - mov DWORD PTR[52+rsp],ebp - mov edx,DWORD PTR[56+rsp] - mov eax,r12d + rol r14d,1 + xor edx,DWORD PTR[56+rsp] + mov eax,r11d + mov DWORD PTR[52+rsp],r14d mov ecx,edi xor edx,DWORD PTR[rsp] - xor eax,r11d + xor eax,r13d rol ecx,5 - lea esi,DWORD PTR[1859775393+rsi*1+rbp] xor edx,DWORD PTR[24+rsp] - xor eax,r13d + lea esi,DWORD PTR[1859775393+rsi*1+r14] + xor eax,r12d add esi,ecx - xor edx,DWORD PTR[44+rsp] rol r11d,30 add esi,eax rol edx,1 + xor ebp,DWORD PTR[60+rsp] + mov eax,edi mov DWORD PTR[56+rsp],edx - mov ebp,DWORD PTR[60+rsp] - mov eax,r11d mov ecx,esi xor ebp,DWORD PTR[4+rsp] - xor eax,edi + xor eax,r12d rol ecx,5 - lea r13d,DWORD PTR[1859775393+r13*1+rdx] xor ebp,DWORD PTR[28+rsp] - xor eax,r12d + lea r13d,DWORD PTR[1859775393+r13*1+rdx] + xor eax,r11d add r13d,ecx - xor ebp,DWORD PTR[48+rsp] rol edi,30 add r13d,eax rol ebp,1 + xor r14d,DWORD PTR[rsp] + mov eax,esi mov DWORD PTR[60+rsp],ebp - mov edx,DWORD PTR[rsp] - mov eax,edi mov ecx,r13d - xor edx,DWORD PTR[8+rsp] - xor eax,esi + xor r14d,DWORD PTR[8+rsp] + xor eax,r11d rol ecx,5 + xor r14d,DWORD PTR[32+rsp] lea r12d,DWORD PTR[1859775393+r12*1+rbp] - xor edx,DWORD PTR[32+rsp] - xor eax,r11d + xor eax,edi add r12d,ecx - xor edx,DWORD PTR[52+rsp] rol esi,30 add r12d,eax - rol edx,1 - mov DWORD PTR[rsp],edx - mov ebp,DWORD PTR[4+rsp] - mov eax,esi + rol r14d,1 + xor edx,DWORD PTR[4+rsp] + mov eax,r13d + mov DWORD PTR[rsp],r14d mov ecx,r12d - xor ebp,DWORD PTR[12+rsp] - xor eax,r13d - rol ecx,5 - lea r11d,DWORD PTR[1859775393+r11*1+rdx] - xor ebp,DWORD PTR[36+rsp] + xor edx,DWORD PTR[12+rsp] xor eax,edi + rol ecx,5 + xor edx,DWORD PTR[36+rsp] + lea r11d,DWORD PTR[1859775393+r11*1+r14] + xor eax,esi add r11d,ecx - xor ebp,DWORD PTR[56+rsp] rol r13d,30 add r11d,eax - rol ebp,1 - mov DWORD PTR[4+rsp],ebp - mov edx,DWORD PTR[8+rsp] - mov eax,r13d + rol edx,1 + xor ebp,DWORD PTR[8+rsp] + mov eax,r12d + mov DWORD PTR[4+rsp],edx mov ecx,r11d - xor edx,DWORD PTR[16+rsp] - xor eax,r12d - rol ecx,5 - lea edi,DWORD PTR[1859775393+rdi*1+rbp] - xor edx,DWORD PTR[40+rsp] + xor ebp,DWORD PTR[16+rsp] xor eax,esi + rol ecx,5 + xor ebp,DWORD PTR[40+rsp] + lea edi,DWORD PTR[1859775393+rdi*1+rdx] + xor eax,r13d add edi,ecx - xor edx,DWORD PTR[60+rsp] rol r12d,30 add edi,eax - rol edx,1 - mov DWORD PTR[8+rsp],edx - mov ebp,DWORD PTR[12+rsp] - mov eax,r12d + rol ebp,1 + xor r14d,DWORD PTR[12+rsp] + mov eax,r11d + mov DWORD PTR[8+rsp],ebp mov ecx,edi - xor ebp,DWORD PTR[20+rsp] - xor eax,r11d - rol ecx,5 - lea esi,DWORD PTR[1859775393+rsi*1+rdx] - xor ebp,DWORD PTR[44+rsp] + xor r14d,DWORD PTR[20+rsp] xor eax,r13d + rol ecx,5 + xor r14d,DWORD PTR[44+rsp] + lea esi,DWORD PTR[1859775393+rsi*1+rbp] + xor eax,r12d add esi,ecx - xor ebp,DWORD PTR[rsp] rol r11d,30 add esi,eax - rol ebp,1 - mov DWORD PTR[12+rsp],ebp - mov edx,DWORD PTR[16+rsp] - mov eax,r11d + rol r14d,1 + xor edx,DWORD PTR[16+rsp] + mov eax,edi + mov DWORD PTR[12+rsp],r14d mov ecx,esi xor edx,DWORD PTR[24+rsp] - xor eax,edi + xor eax,r12d rol ecx,5 - lea r13d,DWORD PTR[1859775393+r13*1+rbp] xor edx,DWORD PTR[48+rsp] - xor eax,r12d + lea r13d,DWORD PTR[1859775393+r13*1+r14] + xor eax,r11d add r13d,ecx - xor edx,DWORD PTR[4+rsp] rol edi,30 add r13d,eax rol edx,1 + xor ebp,DWORD PTR[20+rsp] + mov eax,esi mov DWORD PTR[16+rsp],edx - mov ebp,DWORD PTR[20+rsp] - mov eax,edi mov ecx,r13d xor ebp,DWORD PTR[28+rsp] - xor eax,esi + xor eax,r11d rol ecx,5 - lea r12d,DWORD PTR[1859775393+r12*1+rdx] xor ebp,DWORD PTR[52+rsp] - xor eax,r11d + lea r12d,DWORD PTR[1859775393+r12*1+rdx] + xor eax,edi add r12d,ecx - xor ebp,DWORD PTR[8+rsp] rol esi,30 add r12d,eax rol ebp,1 + xor r14d,DWORD PTR[24+rsp] + mov eax,r13d mov DWORD PTR[20+rsp],ebp - mov edx,DWORD PTR[24+rsp] - mov eax,esi mov ecx,r12d - xor edx,DWORD PTR[32+rsp] - xor eax,r13d + xor r14d,DWORD PTR[32+rsp] + xor eax,edi rol ecx,5 + xor r14d,DWORD PTR[56+rsp] lea r11d,DWORD PTR[1859775393+r11*1+rbp] - xor edx,DWORD PTR[56+rsp] - xor eax,edi + xor eax,esi add r11d,ecx - xor edx,DWORD PTR[12+rsp] rol r13d,30 add r11d,eax - rol edx,1 - mov DWORD PTR[24+rsp],edx - mov ebp,DWORD PTR[28+rsp] - mov eax,r13d + rol r14d,1 + xor edx,DWORD PTR[28+rsp] + mov eax,r12d + mov DWORD PTR[24+rsp],r14d mov ecx,r11d - xor ebp,DWORD PTR[36+rsp] - xor eax,r12d - rol ecx,5 - lea edi,DWORD PTR[1859775393+rdi*1+rdx] - xor ebp,DWORD PTR[60+rsp] + xor edx,DWORD PTR[36+rsp] xor eax,esi + rol ecx,5 + xor edx,DWORD PTR[60+rsp] + lea edi,DWORD PTR[1859775393+rdi*1+r14] + xor eax,r13d add edi,ecx - xor ebp,DWORD PTR[16+rsp] rol r12d,30 add edi,eax - rol ebp,1 - mov DWORD PTR[28+rsp],ebp - mov edx,DWORD PTR[32+rsp] - mov eax,r12d + rol edx,1 + xor ebp,DWORD PTR[32+rsp] + mov eax,r11d + mov DWORD PTR[28+rsp],edx mov ecx,edi - xor edx,DWORD PTR[40+rsp] - xor eax,r11d - rol ecx,5 - lea esi,DWORD PTR[1859775393+rsi*1+rbp] - xor edx,DWORD PTR[rsp] + xor ebp,DWORD PTR[40+rsp] xor eax,r13d + rol ecx,5 + xor ebp,DWORD PTR[rsp] + lea esi,DWORD PTR[1859775393+rsi*1+rdx] + xor eax,r12d add esi,ecx - xor edx,DWORD PTR[20+rsp] rol r11d,30 add esi,eax - rol edx,1 - mov DWORD PTR[32+rsp],edx - mov ebp,DWORD PTR[36+rsp] - mov eax,r11d - mov ebx,r11d - xor ebp,DWORD PTR[44+rsp] - and eax,r12d + rol ebp,1 + xor r14d,DWORD PTR[36+rsp] + mov eax,r12d + mov DWORD PTR[32+rsp],ebp + mov ebx,r12d + xor r14d,DWORD PTR[44+rsp] + and eax,r11d mov ecx,esi - xor ebp,DWORD PTR[4+rsp] - xor ebx,r12d - lea r13d,DWORD PTR[((-1894007588))+r13*1+rdx] + xor r14d,DWORD PTR[4+rsp] + lea r13d,DWORD PTR[((-1894007588))+r13*1+rbp] + xor ebx,r11d rol ecx,5 - xor ebp,DWORD PTR[24+rsp] add r13d,eax + rol r14d,1 and ebx,edi - rol ebp,1 - add r13d,ebx - rol edi,30 - mov DWORD PTR[36+rsp],ebp add r13d,ecx - mov edx,DWORD PTR[40+rsp] - mov eax,edi - mov ebx,edi + rol edi,30 + add r13d,ebx + xor edx,DWORD PTR[40+rsp] + mov eax,r11d + mov DWORD PTR[36+rsp],r14d + mov ebx,r11d xor edx,DWORD PTR[48+rsp] - and eax,r11d + and eax,edi mov ecx,r13d xor edx,DWORD PTR[8+rsp] - xor ebx,r11d - lea r12d,DWORD PTR[((-1894007588))+r12*1+rbp] + lea r12d,DWORD PTR[((-1894007588))+r12*1+r14] + xor ebx,edi rol ecx,5 - xor edx,DWORD PTR[28+rsp] add r12d,eax - and ebx,esi rol edx,1 - add r12d,ebx + and ebx,esi + add r12d,ecx rol esi,30 + add r12d,ebx + xor ebp,DWORD PTR[44+rsp] + mov eax,edi mov DWORD PTR[40+rsp],edx - add r12d,ecx - mov ebp,DWORD PTR[44+rsp] - mov eax,esi - mov ebx,esi + mov ebx,edi xor ebp,DWORD PTR[52+rsp] - and eax,edi + and eax,esi mov ecx,r12d xor ebp,DWORD PTR[12+rsp] - xor ebx,edi lea r11d,DWORD PTR[((-1894007588))+r11*1+rdx] + xor ebx,esi rol ecx,5 - xor ebp,DWORD PTR[32+rsp] add r11d,eax - and ebx,r13d rol ebp,1 - add r11d,ebx + and ebx,r13d + add r11d,ecx rol r13d,30 + add r11d,ebx + xor r14d,DWORD PTR[48+rsp] + mov eax,esi mov DWORD PTR[44+rsp],ebp - add r11d,ecx - mov edx,DWORD PTR[48+rsp] - mov eax,r13d - mov ebx,r13d - xor edx,DWORD PTR[56+rsp] - and eax,esi + mov ebx,esi + xor r14d,DWORD PTR[56+rsp] + and eax,r13d mov ecx,r11d - xor edx,DWORD PTR[16+rsp] - xor ebx,esi + xor r14d,DWORD PTR[16+rsp] lea edi,DWORD PTR[((-1894007588))+rdi*1+rbp] + xor ebx,r13d rol ecx,5 - xor edx,DWORD PTR[36+rsp] add edi,eax + rol r14d,1 and ebx,r12d - rol edx,1 - add edi,ebx - rol r12d,30 - mov DWORD PTR[48+rsp],edx add edi,ecx - mov ebp,DWORD PTR[52+rsp] - mov eax,r12d - mov ebx,r12d - xor ebp,DWORD PTR[60+rsp] - and eax,r13d + rol r12d,30 + add edi,ebx + xor edx,DWORD PTR[52+rsp] + mov eax,r13d + mov DWORD PTR[48+rsp],r14d + mov ebx,r13d + xor edx,DWORD PTR[60+rsp] + and eax,r12d mov ecx,edi - xor ebp,DWORD PTR[20+rsp] - xor ebx,r13d - lea esi,DWORD PTR[((-1894007588))+rsi*1+rdx] + xor edx,DWORD PTR[20+rsp] + lea esi,DWORD PTR[((-1894007588))+rsi*1+r14] + xor ebx,r12d rol ecx,5 - xor ebp,DWORD PTR[40+rsp] add esi,eax + rol edx,1 and ebx,r11d - rol ebp,1 - add esi,ebx - rol r11d,30 - mov DWORD PTR[52+rsp],ebp add esi,ecx - mov edx,DWORD PTR[56+rsp] - mov eax,r11d - mov ebx,r11d - xor edx,DWORD PTR[rsp] - and eax,r12d + rol r11d,30 + add esi,ebx + xor ebp,DWORD PTR[56+rsp] + mov eax,r12d + mov DWORD PTR[52+rsp],edx + mov ebx,r12d + xor ebp,DWORD PTR[rsp] + and eax,r11d mov ecx,esi - xor edx,DWORD PTR[24+rsp] - xor ebx,r12d - lea r13d,DWORD PTR[((-1894007588))+r13*1+rbp] + xor ebp,DWORD PTR[24+rsp] + lea r13d,DWORD PTR[((-1894007588))+r13*1+rdx] + xor ebx,r11d rol ecx,5 - xor edx,DWORD PTR[44+rsp] add r13d,eax + rol ebp,1 and ebx,edi - rol edx,1 - add r13d,ebx - rol edi,30 - mov DWORD PTR[56+rsp],edx add r13d,ecx - mov ebp,DWORD PTR[60+rsp] - mov eax,edi - mov ebx,edi - xor ebp,DWORD PTR[4+rsp] - and eax,r11d + rol edi,30 + add r13d,ebx + xor r14d,DWORD PTR[60+rsp] + mov eax,r11d + mov DWORD PTR[56+rsp],ebp + mov ebx,r11d + xor r14d,DWORD PTR[4+rsp] + and eax,edi mov ecx,r13d - xor ebp,DWORD PTR[28+rsp] - xor ebx,r11d - lea r12d,DWORD PTR[((-1894007588))+r12*1+rdx] + xor r14d,DWORD PTR[28+rsp] + lea r12d,DWORD PTR[((-1894007588))+r12*1+rbp] + xor ebx,edi rol ecx,5 - xor ebp,DWORD PTR[48+rsp] add r12d,eax + rol r14d,1 and ebx,esi - rol ebp,1 - add r12d,ebx - rol esi,30 - mov DWORD PTR[60+rsp],ebp add r12d,ecx - mov edx,DWORD PTR[rsp] - mov eax,esi - mov ebx,esi + rol esi,30 + add r12d,ebx + xor edx,DWORD PTR[rsp] + mov eax,edi + mov DWORD PTR[60+rsp],r14d + mov ebx,edi xor edx,DWORD PTR[8+rsp] - and eax,edi + and eax,esi mov ecx,r12d xor edx,DWORD PTR[32+rsp] - xor ebx,edi - lea r11d,DWORD PTR[((-1894007588))+r11*1+rbp] + lea r11d,DWORD PTR[((-1894007588))+r11*1+r14] + xor ebx,esi rol ecx,5 - xor edx,DWORD PTR[52+rsp] add r11d,eax - and ebx,r13d rol edx,1 - add r11d,ebx + and ebx,r13d + add r11d,ecx rol r13d,30 + add r11d,ebx + xor ebp,DWORD PTR[4+rsp] + mov eax,esi mov DWORD PTR[rsp],edx - add r11d,ecx - mov ebp,DWORD PTR[4+rsp] - mov eax,r13d - mov ebx,r13d + mov ebx,esi xor ebp,DWORD PTR[12+rsp] - and eax,esi + and eax,r13d mov ecx,r11d xor ebp,DWORD PTR[36+rsp] - xor ebx,esi lea edi,DWORD PTR[((-1894007588))+rdi*1+rdx] + xor ebx,r13d rol ecx,5 - xor ebp,DWORD PTR[56+rsp] add edi,eax - and ebx,r12d rol ebp,1 - add edi,ebx + and ebx,r12d + add edi,ecx rol r12d,30 + add edi,ebx + xor r14d,DWORD PTR[8+rsp] + mov eax,r13d mov DWORD PTR[4+rsp],ebp - add edi,ecx - mov edx,DWORD PTR[8+rsp] - mov eax,r12d - mov ebx,r12d - xor edx,DWORD PTR[16+rsp] - and eax,r13d + mov ebx,r13d + xor r14d,DWORD PTR[16+rsp] + and eax,r12d mov ecx,edi - xor edx,DWORD PTR[40+rsp] - xor ebx,r13d + xor r14d,DWORD PTR[40+rsp] lea esi,DWORD PTR[((-1894007588))+rsi*1+rbp] + xor ebx,r12d rol ecx,5 - xor edx,DWORD PTR[60+rsp] add esi,eax + rol r14d,1 and ebx,r11d - rol edx,1 - add esi,ebx - rol r11d,30 - mov DWORD PTR[8+rsp],edx add esi,ecx - mov ebp,DWORD PTR[12+rsp] - mov eax,r11d - mov ebx,r11d - xor ebp,DWORD PTR[20+rsp] - and eax,r12d + rol r11d,30 + add esi,ebx + xor edx,DWORD PTR[12+rsp] + mov eax,r12d + mov DWORD PTR[8+rsp],r14d + mov ebx,r12d + xor edx,DWORD PTR[20+rsp] + and eax,r11d mov ecx,esi - xor ebp,DWORD PTR[44+rsp] - xor ebx,r12d - lea r13d,DWORD PTR[((-1894007588))+r13*1+rdx] + xor edx,DWORD PTR[44+rsp] + lea r13d,DWORD PTR[((-1894007588))+r13*1+r14] + xor ebx,r11d rol ecx,5 - xor ebp,DWORD PTR[rsp] add r13d,eax + rol edx,1 and ebx,edi - rol ebp,1 - add r13d,ebx - rol edi,30 - mov DWORD PTR[12+rsp],ebp add r13d,ecx - mov edx,DWORD PTR[16+rsp] - mov eax,edi - mov ebx,edi - xor edx,DWORD PTR[24+rsp] - and eax,r11d + rol edi,30 + add r13d,ebx + xor ebp,DWORD PTR[16+rsp] + mov eax,r11d + mov DWORD PTR[12+rsp],edx + mov ebx,r11d + xor ebp,DWORD PTR[24+rsp] + and eax,edi mov ecx,r13d - xor edx,DWORD PTR[48+rsp] - xor ebx,r11d - lea r12d,DWORD PTR[((-1894007588))+r12*1+rbp] + xor ebp,DWORD PTR[48+rsp] + lea r12d,DWORD PTR[((-1894007588))+r12*1+rdx] + xor ebx,edi rol ecx,5 - xor edx,DWORD PTR[4+rsp] add r12d,eax + rol ebp,1 and ebx,esi - rol edx,1 - add r12d,ebx - rol esi,30 - mov DWORD PTR[16+rsp],edx add r12d,ecx - mov ebp,DWORD PTR[20+rsp] - mov eax,esi - mov ebx,esi - xor ebp,DWORD PTR[28+rsp] - and eax,edi + rol esi,30 + add r12d,ebx + xor r14d,DWORD PTR[20+rsp] + mov eax,edi + mov DWORD PTR[16+rsp],ebp + mov ebx,edi + xor r14d,DWORD PTR[28+rsp] + and eax,esi mov ecx,r12d - xor ebp,DWORD PTR[52+rsp] - xor ebx,edi - lea r11d,DWORD PTR[((-1894007588))+r11*1+rdx] + xor r14d,DWORD PTR[52+rsp] + lea r11d,DWORD PTR[((-1894007588))+r11*1+rbp] + xor ebx,esi rol ecx,5 - xor ebp,DWORD PTR[8+rsp] add r11d,eax + rol r14d,1 and ebx,r13d - rol ebp,1 - add r11d,ebx - rol r13d,30 - mov DWORD PTR[20+rsp],ebp add r11d,ecx - mov edx,DWORD PTR[24+rsp] - mov eax,r13d - mov ebx,r13d + rol r13d,30 + add r11d,ebx + xor edx,DWORD PTR[24+rsp] + mov eax,esi + mov DWORD PTR[20+rsp],r14d + mov ebx,esi xor edx,DWORD PTR[32+rsp] - and eax,esi + and eax,r13d mov ecx,r11d xor edx,DWORD PTR[56+rsp] - xor ebx,esi - lea edi,DWORD PTR[((-1894007588))+rdi*1+rbp] + lea edi,DWORD PTR[((-1894007588))+rdi*1+r14] + xor ebx,r13d rol ecx,5 - xor edx,DWORD PTR[12+rsp] add edi,eax - and ebx,r12d rol edx,1 - add edi,ebx + and ebx,r12d + add edi,ecx rol r12d,30 + add edi,ebx + xor ebp,DWORD PTR[28+rsp] + mov eax,r13d mov DWORD PTR[24+rsp],edx - add edi,ecx - mov ebp,DWORD PTR[28+rsp] - mov eax,r12d - mov ebx,r12d + mov ebx,r13d xor ebp,DWORD PTR[36+rsp] - and eax,r13d + and eax,r12d mov ecx,edi xor ebp,DWORD PTR[60+rsp] - xor ebx,r13d lea esi,DWORD PTR[((-1894007588))+rsi*1+rdx] + xor ebx,r12d rol ecx,5 - xor ebp,DWORD PTR[16+rsp] add esi,eax - and ebx,r11d rol ebp,1 - add esi,ebx + and ebx,r11d + add esi,ecx rol r11d,30 + add esi,ebx + xor r14d,DWORD PTR[32+rsp] + mov eax,r12d mov DWORD PTR[28+rsp],ebp - add esi,ecx - mov edx,DWORD PTR[32+rsp] - mov eax,r11d - mov ebx,r11d - xor edx,DWORD PTR[40+rsp] - and eax,r12d + mov ebx,r12d + xor r14d,DWORD PTR[40+rsp] + and eax,r11d mov ecx,esi - xor edx,DWORD PTR[rsp] - xor ebx,r12d + xor r14d,DWORD PTR[rsp] lea r13d,DWORD PTR[((-1894007588))+r13*1+rbp] + xor ebx,r11d rol ecx,5 - xor edx,DWORD PTR[20+rsp] add r13d,eax + rol r14d,1 and ebx,edi - rol edx,1 - add r13d,ebx - rol edi,30 - mov DWORD PTR[32+rsp],edx add r13d,ecx - mov ebp,DWORD PTR[36+rsp] - mov eax,edi - mov ebx,edi - xor ebp,DWORD PTR[44+rsp] - and eax,r11d + rol edi,30 + add r13d,ebx + xor edx,DWORD PTR[36+rsp] + mov eax,r11d + mov DWORD PTR[32+rsp],r14d + mov ebx,r11d + xor edx,DWORD PTR[44+rsp] + and eax,edi mov ecx,r13d - xor ebp,DWORD PTR[4+rsp] - xor ebx,r11d - lea r12d,DWORD PTR[((-1894007588))+r12*1+rdx] + xor edx,DWORD PTR[4+rsp] + lea r12d,DWORD PTR[((-1894007588))+r12*1+r14] + xor ebx,edi rol ecx,5 - xor ebp,DWORD PTR[24+rsp] add r12d,eax + rol edx,1 and ebx,esi - rol ebp,1 - add r12d,ebx - rol esi,30 - mov DWORD PTR[36+rsp],ebp add r12d,ecx - mov edx,DWORD PTR[40+rsp] - mov eax,esi - mov ebx,esi - xor edx,DWORD PTR[48+rsp] - and eax,edi + rol esi,30 + add r12d,ebx + xor ebp,DWORD PTR[40+rsp] + mov eax,edi + mov DWORD PTR[36+rsp],edx + mov ebx,edi + xor ebp,DWORD PTR[48+rsp] + and eax,esi mov ecx,r12d - xor edx,DWORD PTR[8+rsp] - xor ebx,edi - lea r11d,DWORD PTR[((-1894007588))+r11*1+rbp] + xor ebp,DWORD PTR[8+rsp] + lea r11d,DWORD PTR[((-1894007588))+r11*1+rdx] + xor ebx,esi rol ecx,5 - xor edx,DWORD PTR[28+rsp] add r11d,eax + rol ebp,1 and ebx,r13d - rol edx,1 - add r11d,ebx - rol r13d,30 - mov DWORD PTR[40+rsp],edx add r11d,ecx - mov ebp,DWORD PTR[44+rsp] - mov eax,r13d - mov ebx,r13d - xor ebp,DWORD PTR[52+rsp] - and eax,esi + rol r13d,30 + add r11d,ebx + xor r14d,DWORD PTR[44+rsp] + mov eax,esi + mov DWORD PTR[40+rsp],ebp + mov ebx,esi + xor r14d,DWORD PTR[52+rsp] + and eax,r13d mov ecx,r11d - xor ebp,DWORD PTR[12+rsp] - xor ebx,esi - lea edi,DWORD PTR[((-1894007588))+rdi*1+rdx] + xor r14d,DWORD PTR[12+rsp] + lea edi,DWORD PTR[((-1894007588))+rdi*1+rbp] + xor ebx,r13d rol ecx,5 - xor ebp,DWORD PTR[32+rsp] add edi,eax + rol r14d,1 and ebx,r12d - rol ebp,1 - add edi,ebx - rol r12d,30 - mov DWORD PTR[44+rsp],ebp add edi,ecx - mov edx,DWORD PTR[48+rsp] - mov eax,r12d - mov ebx,r12d + rol r12d,30 + add edi,ebx + xor edx,DWORD PTR[48+rsp] + mov eax,r13d + mov DWORD PTR[44+rsp],r14d + mov ebx,r13d xor edx,DWORD PTR[56+rsp] - and eax,r13d + and eax,r12d mov ecx,edi xor edx,DWORD PTR[16+rsp] - xor ebx,r13d - lea esi,DWORD PTR[((-1894007588))+rsi*1+rbp] + lea esi,DWORD PTR[((-1894007588))+rsi*1+r14] + xor ebx,r12d rol ecx,5 - xor edx,DWORD PTR[36+rsp] add esi,eax - and ebx,r11d rol edx,1 - add esi,ebx + and ebx,r11d + add esi,ecx rol r11d,30 + add esi,ebx + xor ebp,DWORD PTR[52+rsp] + mov eax,edi mov DWORD PTR[48+rsp],edx - add esi,ecx - mov ebp,DWORD PTR[52+rsp] - mov eax,r11d mov ecx,esi xor ebp,DWORD PTR[60+rsp] - xor eax,edi + xor eax,r12d rol ecx,5 - lea r13d,DWORD PTR[((-899497514))+r13*1+rdx] xor ebp,DWORD PTR[20+rsp] - xor eax,r12d + lea r13d,DWORD PTR[((-899497514))+r13*1+rdx] + xor eax,r11d add r13d,ecx - xor ebp,DWORD PTR[40+rsp] rol edi,30 add r13d,eax rol ebp,1 + xor r14d,DWORD PTR[56+rsp] + mov eax,esi mov DWORD PTR[52+rsp],ebp - mov edx,DWORD PTR[56+rsp] - mov eax,edi mov ecx,r13d - xor edx,DWORD PTR[rsp] - xor eax,esi + xor r14d,DWORD PTR[rsp] + xor eax,r11d rol ecx,5 + xor r14d,DWORD PTR[24+rsp] lea r12d,DWORD PTR[((-899497514))+r12*1+rbp] - xor edx,DWORD PTR[24+rsp] - xor eax,r11d + xor eax,edi add r12d,ecx - xor edx,DWORD PTR[44+rsp] rol esi,30 add r12d,eax - rol edx,1 - mov DWORD PTR[56+rsp],edx - mov ebp,DWORD PTR[60+rsp] - mov eax,esi + rol r14d,1 + xor edx,DWORD PTR[60+rsp] + mov eax,r13d + mov DWORD PTR[56+rsp],r14d mov ecx,r12d - xor ebp,DWORD PTR[4+rsp] - xor eax,r13d - rol ecx,5 - lea r11d,DWORD PTR[((-899497514))+r11*1+rdx] - xor ebp,DWORD PTR[28+rsp] + xor edx,DWORD PTR[4+rsp] xor eax,edi + rol ecx,5 + xor edx,DWORD PTR[28+rsp] + lea r11d,DWORD PTR[((-899497514))+r11*1+r14] + xor eax,esi add r11d,ecx - xor ebp,DWORD PTR[48+rsp] rol r13d,30 add r11d,eax - rol ebp,1 - mov DWORD PTR[60+rsp],ebp - mov edx,DWORD PTR[rsp] - mov eax,r13d + rol edx,1 + xor ebp,DWORD PTR[rsp] + mov eax,r12d + mov DWORD PTR[60+rsp],edx mov ecx,r11d - xor edx,DWORD PTR[8+rsp] - xor eax,r12d - rol ecx,5 - lea edi,DWORD PTR[((-899497514))+rdi*1+rbp] - xor edx,DWORD PTR[32+rsp] + xor ebp,DWORD PTR[8+rsp] xor eax,esi + rol ecx,5 + xor ebp,DWORD PTR[32+rsp] + lea edi,DWORD PTR[((-899497514))+rdi*1+rdx] + xor eax,r13d add edi,ecx - xor edx,DWORD PTR[52+rsp] rol r12d,30 add edi,eax - rol edx,1 - mov DWORD PTR[rsp],edx - mov ebp,DWORD PTR[4+rsp] - mov eax,r12d + rol ebp,1 + xor r14d,DWORD PTR[4+rsp] + mov eax,r11d + mov DWORD PTR[rsp],ebp mov ecx,edi - xor ebp,DWORD PTR[12+rsp] - xor eax,r11d - rol ecx,5 - lea esi,DWORD PTR[((-899497514))+rsi*1+rdx] - xor ebp,DWORD PTR[36+rsp] + xor r14d,DWORD PTR[12+rsp] xor eax,r13d + rol ecx,5 + xor r14d,DWORD PTR[36+rsp] + lea esi,DWORD PTR[((-899497514))+rsi*1+rbp] + xor eax,r12d add esi,ecx - xor ebp,DWORD PTR[56+rsp] rol r11d,30 add esi,eax - rol ebp,1 - mov DWORD PTR[4+rsp],ebp - mov edx,DWORD PTR[8+rsp] - mov eax,r11d + rol r14d,1 + xor edx,DWORD PTR[8+rsp] + mov eax,edi + mov DWORD PTR[4+rsp],r14d mov ecx,esi xor edx,DWORD PTR[16+rsp] - xor eax,edi + xor eax,r12d rol ecx,5 - lea r13d,DWORD PTR[((-899497514))+r13*1+rbp] xor edx,DWORD PTR[40+rsp] - xor eax,r12d + lea r13d,DWORD PTR[((-899497514))+r13*1+r14] + xor eax,r11d add r13d,ecx - xor edx,DWORD PTR[60+rsp] rol edi,30 add r13d,eax rol edx,1 + xor ebp,DWORD PTR[12+rsp] + mov eax,esi mov DWORD PTR[8+rsp],edx - mov ebp,DWORD PTR[12+rsp] - mov eax,edi mov ecx,r13d xor ebp,DWORD PTR[20+rsp] - xor eax,esi + xor eax,r11d rol ecx,5 - lea r12d,DWORD PTR[((-899497514))+r12*1+rdx] xor ebp,DWORD PTR[44+rsp] - xor eax,r11d + lea r12d,DWORD PTR[((-899497514))+r12*1+rdx] + xor eax,edi add r12d,ecx - xor ebp,DWORD PTR[rsp] rol esi,30 add r12d,eax rol ebp,1 + xor r14d,DWORD PTR[16+rsp] + mov eax,r13d mov DWORD PTR[12+rsp],ebp - mov edx,DWORD PTR[16+rsp] - mov eax,esi mov ecx,r12d - xor edx,DWORD PTR[24+rsp] - xor eax,r13d + xor r14d,DWORD PTR[24+rsp] + xor eax,edi rol ecx,5 + xor r14d,DWORD PTR[48+rsp] lea r11d,DWORD PTR[((-899497514))+r11*1+rbp] - xor edx,DWORD PTR[48+rsp] - xor eax,edi + xor eax,esi add r11d,ecx - xor edx,DWORD PTR[4+rsp] rol r13d,30 add r11d,eax - rol edx,1 - mov DWORD PTR[16+rsp],edx - mov ebp,DWORD PTR[20+rsp] - mov eax,r13d + rol r14d,1 + xor edx,DWORD PTR[20+rsp] + mov eax,r12d + mov DWORD PTR[16+rsp],r14d mov ecx,r11d - xor ebp,DWORD PTR[28+rsp] - xor eax,r12d - rol ecx,5 - lea edi,DWORD PTR[((-899497514))+rdi*1+rdx] - xor ebp,DWORD PTR[52+rsp] + xor edx,DWORD PTR[28+rsp] xor eax,esi + rol ecx,5 + xor edx,DWORD PTR[52+rsp] + lea edi,DWORD PTR[((-899497514))+rdi*1+r14] + xor eax,r13d add edi,ecx - xor ebp,DWORD PTR[8+rsp] rol r12d,30 add edi,eax - rol ebp,1 - mov DWORD PTR[20+rsp],ebp - mov edx,DWORD PTR[24+rsp] - mov eax,r12d + rol edx,1 + xor ebp,DWORD PTR[24+rsp] + mov eax,r11d + mov DWORD PTR[20+rsp],edx mov ecx,edi - xor edx,DWORD PTR[32+rsp] - xor eax,r11d - rol ecx,5 - lea esi,DWORD PTR[((-899497514))+rsi*1+rbp] - xor edx,DWORD PTR[56+rsp] + xor ebp,DWORD PTR[32+rsp] xor eax,r13d + rol ecx,5 + xor ebp,DWORD PTR[56+rsp] + lea esi,DWORD PTR[((-899497514))+rsi*1+rdx] + xor eax,r12d add esi,ecx - xor edx,DWORD PTR[12+rsp] rol r11d,30 add esi,eax - rol edx,1 - mov DWORD PTR[24+rsp],edx - mov ebp,DWORD PTR[28+rsp] - mov eax,r11d + rol ebp,1 + xor r14d,DWORD PTR[28+rsp] + mov eax,edi + mov DWORD PTR[24+rsp],ebp mov ecx,esi - xor ebp,DWORD PTR[36+rsp] - xor eax,edi - rol ecx,5 - lea r13d,DWORD PTR[((-899497514))+r13*1+rdx] - xor ebp,DWORD PTR[60+rsp] + xor r14d,DWORD PTR[36+rsp] xor eax,r12d + rol ecx,5 + xor r14d,DWORD PTR[60+rsp] + lea r13d,DWORD PTR[((-899497514))+r13*1+rbp] + xor eax,r11d add r13d,ecx - xor ebp,DWORD PTR[16+rsp] rol edi,30 add r13d,eax - rol ebp,1 - mov DWORD PTR[28+rsp],ebp - mov edx,DWORD PTR[32+rsp] - mov eax,edi + rol r14d,1 + xor edx,DWORD PTR[32+rsp] + mov eax,esi + mov DWORD PTR[28+rsp],r14d mov ecx,r13d xor edx,DWORD PTR[40+rsp] - xor eax,esi + xor eax,r11d rol ecx,5 - lea r12d,DWORD PTR[((-899497514))+r12*1+rbp] xor edx,DWORD PTR[rsp] - xor eax,r11d + lea r12d,DWORD PTR[((-899497514))+r12*1+r14] + xor eax,edi add r12d,ecx - xor edx,DWORD PTR[20+rsp] rol esi,30 add r12d,eax rol edx,1 - mov DWORD PTR[32+rsp],edx - mov ebp,DWORD PTR[36+rsp] - mov eax,esi + xor ebp,DWORD PTR[36+rsp] + mov eax,r13d + mov ecx,r12d xor ebp,DWORD PTR[44+rsp] - xor eax,r13d + xor eax,edi rol ecx,5 - lea r11d,DWORD PTR[((-899497514))+r11*1+rdx] xor ebp,DWORD PTR[4+rsp] - xor eax,edi + lea r11d,DWORD PTR[((-899497514))+r11*1+rdx] + xor eax,esi add r11d,ecx - xor ebp,DWORD PTR[24+rsp] rol r13d,30 add r11d,eax rol ebp,1 - mov DWORD PTR[36+rsp],ebp - mov edx,DWORD PTR[40+rsp] - mov eax,r13d + xor r14d,DWORD PTR[40+rsp] + mov eax,r12d + mov ecx,r11d - xor edx,DWORD PTR[48+rsp] - xor eax,r12d + xor r14d,DWORD PTR[48+rsp] + xor eax,esi rol ecx,5 + xor r14d,DWORD PTR[8+rsp] lea edi,DWORD PTR[((-899497514))+rdi*1+rbp] - xor edx,DWORD PTR[8+rsp] - xor eax,esi + xor eax,r13d add edi,ecx - xor edx,DWORD PTR[28+rsp] rol r12d,30 add edi,eax - rol edx,1 - mov DWORD PTR[40+rsp],edx - mov ebp,DWORD PTR[44+rsp] - mov eax,r12d + rol r14d,1 + xor edx,DWORD PTR[44+rsp] + mov eax,r11d + mov ecx,edi - xor ebp,DWORD PTR[52+rsp] - xor eax,r11d - rol ecx,5 - lea esi,DWORD PTR[((-899497514))+rsi*1+rdx] - xor ebp,DWORD PTR[12+rsp] + xor edx,DWORD PTR[52+rsp] xor eax,r13d + rol ecx,5 + xor edx,DWORD PTR[12+rsp] + lea esi,DWORD PTR[((-899497514))+rsi*1+r14] + xor eax,r12d add esi,ecx - xor ebp,DWORD PTR[32+rsp] rol r11d,30 add esi,eax - rol ebp,1 - mov DWORD PTR[44+rsp],ebp - mov edx,DWORD PTR[48+rsp] - mov eax,r11d - mov ecx,esi - xor edx,DWORD PTR[56+rsp] - xor eax,edi - rol ecx,5 - lea r13d,DWORD PTR[((-899497514))+r13*1+rbp] - xor edx,DWORD PTR[16+rsp] + rol edx,1 + xor ebp,DWORD PTR[48+rsp] + mov eax,edi + + mov ecx,esi + xor ebp,DWORD PTR[56+rsp] xor eax,r12d + rol ecx,5 + xor ebp,DWORD PTR[16+rsp] + lea r13d,DWORD PTR[((-899497514))+r13*1+rdx] + xor eax,r11d add r13d,ecx - xor edx,DWORD PTR[36+rsp] rol edi,30 add r13d,eax - rol edx,1 - mov DWORD PTR[48+rsp],edx - mov ebp,DWORD PTR[52+rsp] - mov eax,edi + rol ebp,1 + xor r14d,DWORD PTR[52+rsp] + mov eax,esi + mov ecx,r13d - xor ebp,DWORD PTR[60+rsp] - xor eax,esi - rol ecx,5 - lea r12d,DWORD PTR[((-899497514))+r12*1+rdx] - xor ebp,DWORD PTR[20+rsp] + xor r14d,DWORD PTR[60+rsp] xor eax,r11d + rol ecx,5 + xor r14d,DWORD PTR[20+rsp] + lea r12d,DWORD PTR[((-899497514))+r12*1+rbp] + xor eax,edi add r12d,ecx - xor ebp,DWORD PTR[40+rsp] rol esi,30 add r12d,eax - rol ebp,1 - mov edx,DWORD PTR[56+rsp] - mov eax,esi + rol r14d,1 + xor edx,DWORD PTR[56+rsp] + mov eax,r13d + mov ecx,r12d xor edx,DWORD PTR[rsp] - xor eax,r13d + xor eax,edi rol ecx,5 - lea r11d,DWORD PTR[((-899497514))+r11*1+rbp] xor edx,DWORD PTR[24+rsp] - xor eax,edi + lea r11d,DWORD PTR[((-899497514))+r11*1+r14] + xor eax,esi add r11d,ecx - xor edx,DWORD PTR[44+rsp] rol r13d,30 add r11d,eax rol edx,1 - mov ebp,DWORD PTR[60+rsp] - mov eax,r13d + xor ebp,DWORD PTR[60+rsp] + mov eax,r12d + mov ecx,r11d xor ebp,DWORD PTR[4+rsp] - xor eax,r12d + xor eax,esi rol ecx,5 - lea edi,DWORD PTR[((-899497514))+rdi*1+rdx] xor ebp,DWORD PTR[28+rsp] - xor eax,esi + lea edi,DWORD PTR[((-899497514))+rdi*1+rdx] + xor eax,r13d add edi,ecx - xor ebp,DWORD PTR[48+rsp] rol r12d,30 add edi,eax rol ebp,1 - mov eax,r12d + mov eax,r11d mov ecx,edi - xor eax,r11d + xor eax,r13d lea esi,DWORD PTR[((-899497514))+rsi*1+rbp] rol ecx,5 - xor eax,r13d + xor eax,r12d add esi,ecx rol r11d,30 add esi,eax @@ -1290,11 +1240,12 @@ $L$loop:: jnz $L$loop mov rsi,QWORD PTR[64+rsp] - mov r13,QWORD PTR[rsi] - mov r12,QWORD PTR[8+rsi] - mov rbp,QWORD PTR[16+rsi] - mov rbx,QWORD PTR[24+rsi] - lea rsp,QWORD PTR[32+rsi] + mov r14,QWORD PTR[((-40))+rsi] + mov r13,QWORD PTR[((-32))+rsi] + mov r12,QWORD PTR[((-24))+rsi] + mov rbp,QWORD PTR[((-16))+rsi] + mov rbx,QWORD PTR[((-8))+rsi] + lea rsp,QWORD PTR[rsi] $L$epilogue:: mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue mov rsi,QWORD PTR[16+rsp] @@ -1302,6 +1253,197 @@ $L$epilogue:: $L$SEH_end_sha1_block_data_order:: sha1_block_data_order ENDP +ALIGN 32 +sha1_block_data_order_shaext PROC PRIVATE + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_sha1_block_data_order_shaext:: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + + +_shaext_shortcut:: + lea rsp,QWORD PTR[((-72))+rsp] + movaps XMMWORD PTR[(-8-64)+rax],xmm6 + movaps XMMWORD PTR[(-8-48)+rax],xmm7 + movaps XMMWORD PTR[(-8-32)+rax],xmm8 + movaps XMMWORD PTR[(-8-16)+rax],xmm9 +$L$prologue_shaext:: + movdqu xmm0,XMMWORD PTR[rdi] + movd xmm1,DWORD PTR[16+rdi] + movdqa xmm3,XMMWORD PTR[((K_XX_XX+160))] + + movdqu xmm4,XMMWORD PTR[rsi] + pshufd xmm0,xmm0,27 + movdqu xmm5,XMMWORD PTR[16+rsi] + pshufd xmm1,xmm1,27 + movdqu xmm6,XMMWORD PTR[32+rsi] +DB 102,15,56,0,227 + movdqu xmm7,XMMWORD PTR[48+rsi] +DB 102,15,56,0,235 +DB 102,15,56,0,243 + movdqa xmm9,xmm1 +DB 102,15,56,0,251 + jmp $L$oop_shaext + +ALIGN 16 +$L$oop_shaext:: + dec rdx + lea rax,QWORD PTR[64+rsi] + paddd xmm1,xmm4 + cmovne rsi,rax + movdqa xmm8,xmm0 +DB 15,56,201,229 + movdqa xmm2,xmm0 +DB 15,58,204,193,0 +DB 15,56,200,213 + pxor xmm4,xmm6 +DB 15,56,201,238 +DB 15,56,202,231 + + movdqa xmm1,xmm0 +DB 15,58,204,194,0 +DB 15,56,200,206 + pxor xmm5,xmm7 +DB 15,56,202,236 +DB 15,56,201,247 + movdqa xmm2,xmm0 +DB 15,58,204,193,0 +DB 15,56,200,215 + pxor xmm6,xmm4 +DB 15,56,201,252 +DB 15,56,202,245 + + movdqa xmm1,xmm0 +DB 15,58,204,194,0 +DB 15,56,200,204 + pxor xmm7,xmm5 +DB 15,56,202,254 +DB 15,56,201,229 + movdqa xmm2,xmm0 +DB 15,58,204,193,0 +DB 15,56,200,213 + pxor xmm4,xmm6 +DB 15,56,201,238 +DB 15,56,202,231 + + movdqa xmm1,xmm0 +DB 15,58,204,194,1 +DB 15,56,200,206 + pxor xmm5,xmm7 +DB 15,56,202,236 +DB 15,56,201,247 + movdqa xmm2,xmm0 +DB 15,58,204,193,1 +DB 15,56,200,215 + pxor xmm6,xmm4 +DB 15,56,201,252 +DB 15,56,202,245 + + movdqa xmm1,xmm0 +DB 15,58,204,194,1 +DB 15,56,200,204 + pxor xmm7,xmm5 +DB 15,56,202,254 +DB 15,56,201,229 + movdqa xmm2,xmm0 +DB 15,58,204,193,1 +DB 15,56,200,213 + pxor xmm4,xmm6 +DB 15,56,201,238 +DB 15,56,202,231 + + movdqa xmm1,xmm0 +DB 15,58,204,194,1 +DB 15,56,200,206 + pxor xmm5,xmm7 +DB 15,56,202,236 +DB 15,56,201,247 + movdqa xmm2,xmm0 +DB 15,58,204,193,2 +DB 15,56,200,215 + pxor xmm6,xmm4 +DB 15,56,201,252 +DB 15,56,202,245 + + movdqa xmm1,xmm0 +DB 15,58,204,194,2 +DB 15,56,200,204 + pxor xmm7,xmm5 +DB 15,56,202,254 +DB 15,56,201,229 + movdqa xmm2,xmm0 +DB 15,58,204,193,2 +DB 15,56,200,213 + pxor xmm4,xmm6 +DB 15,56,201,238 +DB 15,56,202,231 + + movdqa xmm1,xmm0 +DB 15,58,204,194,2 +DB 15,56,200,206 + pxor xmm5,xmm7 +DB 15,56,202,236 +DB 15,56,201,247 + movdqa xmm2,xmm0 +DB 15,58,204,193,2 +DB 15,56,200,215 + pxor xmm6,xmm4 +DB 15,56,201,252 +DB 15,56,202,245 + + movdqa xmm1,xmm0 +DB 15,58,204,194,3 +DB 15,56,200,204 + pxor xmm7,xmm5 +DB 15,56,202,254 + movdqu xmm4,XMMWORD PTR[rsi] + movdqa xmm2,xmm0 +DB 15,58,204,193,3 +DB 15,56,200,213 + movdqu xmm5,XMMWORD PTR[16+rsi] +DB 102,15,56,0,227 + + movdqa xmm1,xmm0 +DB 15,58,204,194,3 +DB 15,56,200,206 + movdqu xmm6,XMMWORD PTR[32+rsi] +DB 102,15,56,0,235 + + movdqa xmm2,xmm0 +DB 15,58,204,193,3 +DB 15,56,200,215 + movdqu xmm7,XMMWORD PTR[48+rsi] +DB 102,15,56,0,243 + + movdqa xmm1,xmm0 +DB 15,58,204,194,3 +DB 65,15,56,200,201 +DB 102,15,56,0,251 + + paddd xmm0,xmm8 + movdqa xmm9,xmm1 + + jnz $L$oop_shaext + + pshufd xmm0,xmm0,27 + pshufd xmm1,xmm1,27 + movdqu XMMWORD PTR[rdi],xmm0 + movd DWORD PTR[16+rdi],xmm1 + movaps xmm6,XMMWORD PTR[((-8-64))+rax] + movaps xmm7,XMMWORD PTR[((-8-48))+rax] + movaps xmm8,XMMWORD PTR[((-8-32))+rax] + movaps xmm9,XMMWORD PTR[((-8-16))+rax] + mov rsp,rax +$L$epilogue_shaext:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_sha1_block_data_order_shaext:: +sha1_block_data_order_shaext ENDP + ALIGN 16 sha1_block_data_order_ssse3 PROC PRIVATE mov QWORD PTR[8+rsp],rdi ;WIN64 prologue @@ -1314,23 +1456,29 @@ $L$SEH_begin_sha1_block_data_order_ssse3:: _ssse3_shortcut:: + mov rax,rsp push rbx push rbp push r12 - lea rsp,QWORD PTR[((-144))+rsp] - movaps XMMWORD PTR[(64+0)+rsp],xmm6 - movaps XMMWORD PTR[(64+16)+rsp],xmm7 - movaps XMMWORD PTR[(64+32)+rsp],xmm8 - movaps XMMWORD PTR[(64+48)+rsp],xmm9 - movaps XMMWORD PTR[(64+64)+rsp],xmm10 + push r13 + push r14 + lea rsp,QWORD PTR[((-160))+rsp] + movaps XMMWORD PTR[(-40-96)+rax],xmm6 + movaps XMMWORD PTR[(-40-80)+rax],xmm7 + movaps XMMWORD PTR[(-40-64)+rax],xmm8 + movaps XMMWORD PTR[(-40-48)+rax],xmm9 + movaps XMMWORD PTR[(-40-32)+rax],xmm10 + movaps XMMWORD PTR[(-40-16)+rax],xmm11 $L$prologue_ssse3:: + mov r14,rax + and rsp,-64 mov r8,rdi mov r9,rsi mov r10,rdx shl r10,6 add r10,r9 - lea r11,QWORD PTR[K_XX_XX] + lea r11,QWORD PTR[((K_XX_XX+64))] mov eax,DWORD PTR[r8] mov ebx,DWORD PTR[4+r8] @@ -1338,19 +1486,22 @@ $L$prologue_ssse3:: mov edx,DWORD PTR[12+r8] mov esi,ebx mov ebp,DWORD PTR[16+r8] + mov edi,ecx + xor edi,edx + and esi,edi movdqa xmm6,XMMWORD PTR[64+r11] - movdqa xmm9,XMMWORD PTR[r11] + movdqa xmm9,XMMWORD PTR[((-64))+r11] movdqu xmm0,XMMWORD PTR[r9] movdqu xmm1,XMMWORD PTR[16+r9] movdqu xmm2,XMMWORD PTR[32+r9] movdqu xmm3,XMMWORD PTR[48+r9] DB 102,15,56,0,198 - add r9,64 DB 102,15,56,0,206 DB 102,15,56,0,214 -DB 102,15,56,0,222 + add r9,64 paddd xmm0,xmm9 +DB 102,15,56,0,222 paddd xmm1,xmm9 paddd xmm2,xmm9 movdqa XMMWORD PTR[rsp],xmm0 @@ -1362,904 +1513,882 @@ DB 102,15,56,0,222 jmp $L$oop_ssse3 ALIGN 16 $L$oop_ssse3:: - movdqa xmm4,xmm1 - add ebp,DWORD PTR[rsp] - xor ecx,edx + ror ebx,2 + pshufd xmm4,xmm0,238 + xor esi,edx movdqa xmm8,xmm3 -DB 102,15,58,15,224,8 + paddd xmm9,xmm3 mov edi,eax + add ebp,DWORD PTR[rsp] + punpcklqdq xmm4,xmm1 + xor ebx,ecx rol eax,5 - paddd xmm9,xmm3 - and esi,ecx - xor ecx,edx + add ebp,esi psrldq xmm8,4 - xor esi,edx - add ebp,eax + and edi,ebx + xor ebx,ecx pxor xmm4,xmm0 - ror ebx,2 - add ebp,esi + add ebp,eax + ror eax,7 pxor xmm8,xmm2 - add edx,DWORD PTR[4+rsp] - xor ebx,ecx + xor edi,ecx mov esi,ebp - rol ebp,5 + add edx,DWORD PTR[4+rsp] pxor xmm4,xmm8 - and edi,ebx - xor ebx,ecx + xor eax,ebx + rol ebp,5 movdqa XMMWORD PTR[48+rsp],xmm9 - xor edi,ecx - add edx,ebp - movdqa xmm10,xmm4 - movdqa xmm8,xmm4 - ror eax,7 add edx,edi - add ecx,DWORD PTR[8+rsp] + and esi,eax + movdqa xmm10,xmm4 xor eax,ebx + add edx,ebp + ror ebp,7 + movdqa xmm8,xmm4 + xor esi,ebx pslldq xmm10,12 paddd xmm4,xmm4 mov edi,edx - rol edx,5 - and esi,eax - xor eax,ebx + add ecx,DWORD PTR[8+rsp] psrld xmm8,31 - xor esi,ebx - add ecx,edx - movdqa xmm9,xmm10 - ror ebp,7 + xor ebp,eax + rol edx,5 add ecx,esi + movdqa xmm9,xmm10 + and edi,ebp + xor ebp,eax psrld xmm10,30 + add ecx,edx + ror edx,7 por xmm4,xmm8 - add ebx,DWORD PTR[12+rsp] - xor ebp,eax + xor edi,eax mov esi,ecx - rol ecx,5 + add ebx,DWORD PTR[12+rsp] pslld xmm9,2 pxor xmm4,xmm10 - and edi,ebp - xor ebp,eax - movdqa xmm10,XMMWORD PTR[r11] - xor edi,eax - add ebx,ecx - pxor xmm4,xmm9 - ror edx,7 + xor edx,ebp + movdqa xmm10,XMMWORD PTR[((-64))+r11] + rol ecx,5 add ebx,edi - movdqa xmm5,xmm2 - add eax,DWORD PTR[16+rsp] + and esi,edx + pxor xmm4,xmm9 xor edx,ebp + add ebx,ecx + ror ecx,7 + pshufd xmm5,xmm1,238 + xor esi,ebp movdqa xmm9,xmm4 -DB 102,15,58,15,233,8 + paddd xmm10,xmm4 mov edi,ebx + add eax,DWORD PTR[16+rsp] + punpcklqdq xmm5,xmm2 + xor ecx,edx rol ebx,5 - paddd xmm10,xmm4 - and esi,edx - xor edx,ebp + add eax,esi psrldq xmm9,4 - xor esi,ebp - add eax,ebx + and edi,ecx + xor ecx,edx pxor xmm5,xmm1 - ror ecx,7 - add eax,esi + add eax,ebx + ror ebx,7 pxor xmm9,xmm3 - add ebp,DWORD PTR[20+rsp] - xor ecx,edx + xor edi,edx mov esi,eax - rol eax,5 + add ebp,DWORD PTR[20+rsp] pxor xmm5,xmm9 - and edi,ecx - xor ecx,edx + xor ebx,ecx + rol eax,5 movdqa XMMWORD PTR[rsp],xmm10 - xor edi,edx - add ebp,eax - movdqa xmm8,xmm5 - movdqa xmm9,xmm5 - ror ebx,7 add ebp,edi - add edx,DWORD PTR[24+rsp] + and esi,ebx + movdqa xmm8,xmm5 xor ebx,ecx + add ebp,eax + ror eax,7 + movdqa xmm9,xmm5 + xor esi,ecx pslldq xmm8,12 paddd xmm5,xmm5 mov edi,ebp - rol ebp,5 - and esi,ebx - xor ebx,ecx + add edx,DWORD PTR[24+rsp] psrld xmm9,31 - xor esi,ecx - add edx,ebp - movdqa xmm10,xmm8 - ror eax,7 + xor eax,ebx + rol ebp,5 add edx,esi + movdqa xmm10,xmm8 + and edi,eax + xor eax,ebx psrld xmm8,30 + add edx,ebp + ror ebp,7 por xmm5,xmm9 - add ecx,DWORD PTR[28+rsp] - xor eax,ebx + xor edi,ebx mov esi,edx - rol edx,5 + add ecx,DWORD PTR[28+rsp] pslld xmm10,2 pxor xmm5,xmm8 - and edi,eax - xor eax,ebx - movdqa xmm8,XMMWORD PTR[16+r11] - xor edi,ebx - add ecx,edx - pxor xmm5,xmm10 - ror ebp,7 + xor ebp,eax + movdqa xmm8,XMMWORD PTR[((-32))+r11] + rol edx,5 add ecx,edi - movdqa xmm6,xmm3 - add ebx,DWORD PTR[32+rsp] + and esi,ebp + pxor xmm5,xmm10 xor ebp,eax + add ecx,edx + ror edx,7 + pshufd xmm6,xmm2,238 + xor esi,eax movdqa xmm10,xmm5 -DB 102,15,58,15,242,8 + paddd xmm8,xmm5 mov edi,ecx + add ebx,DWORD PTR[32+rsp] + punpcklqdq xmm6,xmm3 + xor edx,ebp rol ecx,5 - paddd xmm8,xmm5 - and esi,ebp - xor ebp,eax + add ebx,esi psrldq xmm10,4 - xor esi,eax - add ebx,ecx - pxor xmm6,xmm2 - ror edx,7 - add ebx,esi - pxor xmm10,xmm4 - add eax,DWORD PTR[36+rsp] + and edi,edx xor edx,ebp + pxor xmm6,xmm2 + add ebx,ecx + ror ecx,7 + pxor xmm10,xmm4 + xor edi,ebp mov esi,ebx - rol ebx,5 + add eax,DWORD PTR[36+rsp] pxor xmm6,xmm10 - and edi,edx - xor edx,ebp + xor ecx,edx + rol ebx,5 movdqa XMMWORD PTR[16+rsp],xmm8 - xor edi,ebp - add eax,ebx - movdqa xmm9,xmm6 - movdqa xmm10,xmm6 - ror ecx,7 add eax,edi - add ebp,DWORD PTR[40+rsp] + and esi,ecx + movdqa xmm9,xmm6 xor ecx,edx + add eax,ebx + ror ebx,7 + movdqa xmm10,xmm6 + xor esi,edx pslldq xmm9,12 paddd xmm6,xmm6 mov edi,eax - rol eax,5 - and esi,ecx - xor ecx,edx + add ebp,DWORD PTR[40+rsp] psrld xmm10,31 - xor esi,edx - add ebp,eax - movdqa xmm8,xmm9 - ror ebx,7 + xor ebx,ecx + rol eax,5 add ebp,esi + movdqa xmm8,xmm9 + and edi,ebx + xor ebx,ecx psrld xmm9,30 + add ebp,eax + ror eax,7 por xmm6,xmm10 - add edx,DWORD PTR[44+rsp] - xor ebx,ecx + xor edi,ecx mov esi,ebp - rol ebp,5 + add edx,DWORD PTR[44+rsp] pslld xmm8,2 pxor xmm6,xmm9 - and edi,ebx - xor ebx,ecx - movdqa xmm9,XMMWORD PTR[16+r11] - xor edi,ecx - add edx,ebp - pxor xmm6,xmm8 - ror eax,7 + xor eax,ebx + movdqa xmm9,XMMWORD PTR[((-32))+r11] + rol ebp,5 add edx,edi - movdqa xmm7,xmm4 - add ecx,DWORD PTR[48+rsp] + and esi,eax + pxor xmm6,xmm8 xor eax,ebx + add edx,ebp + ror ebp,7 + pshufd xmm7,xmm3,238 + xor esi,ebx movdqa xmm8,xmm6 -DB 102,15,58,15,251,8 + paddd xmm9,xmm6 mov edi,edx + add ecx,DWORD PTR[48+rsp] + punpcklqdq xmm7,xmm4 + xor ebp,eax rol edx,5 - paddd xmm9,xmm6 - and esi,eax - xor eax,ebx + add ecx,esi psrldq xmm8,4 - xor esi,ebx - add ecx,edx + and edi,ebp + xor ebp,eax pxor xmm7,xmm3 - ror ebp,7 - add ecx,esi + add ecx,edx + ror edx,7 pxor xmm8,xmm5 - add ebx,DWORD PTR[52+rsp] - xor ebp,eax + xor edi,eax mov esi,ecx - rol ecx,5 + add ebx,DWORD PTR[52+rsp] pxor xmm7,xmm8 - and edi,ebp - xor ebp,eax + xor edx,ebp + rol ecx,5 movdqa XMMWORD PTR[32+rsp],xmm9 - xor edi,eax - add ebx,ecx - movdqa xmm10,xmm7 - movdqa xmm8,xmm7 - ror edx,7 add ebx,edi - add eax,DWORD PTR[56+rsp] + and esi,edx + movdqa xmm10,xmm7 xor edx,ebp + add ebx,ecx + ror ecx,7 + movdqa xmm8,xmm7 + xor esi,ebp pslldq xmm10,12 paddd xmm7,xmm7 mov edi,ebx - rol ebx,5 - and esi,edx - xor edx,ebp + add eax,DWORD PTR[56+rsp] psrld xmm8,31 - xor esi,ebp - add eax,ebx - movdqa xmm9,xmm10 - ror ecx,7 + xor ecx,edx + rol ebx,5 add eax,esi + movdqa xmm9,xmm10 + and edi,ecx + xor ecx,edx psrld xmm10,30 + add eax,ebx + ror ebx,7 por xmm7,xmm8 - add ebp,DWORD PTR[60+rsp] - xor ecx,edx + xor edi,edx mov esi,eax - rol eax,5 + add ebp,DWORD PTR[60+rsp] pslld xmm9,2 pxor xmm7,xmm10 - and edi,ecx - xor ecx,edx - movdqa xmm10,XMMWORD PTR[16+r11] - xor edi,edx - add ebp,eax - pxor xmm7,xmm9 - ror ebx,7 + xor ebx,ecx + movdqa xmm10,XMMWORD PTR[((-32))+r11] + rol eax,5 add ebp,edi - movdqa xmm9,xmm7 - add edx,DWORD PTR[rsp] - pxor xmm0,xmm4 -DB 102,68,15,58,15,206,8 + and esi,ebx + pxor xmm7,xmm9 + pshufd xmm9,xmm6,238 xor ebx,ecx + add ebp,eax + ror eax,7 + pxor xmm0,xmm4 + xor esi,ecx mov edi,ebp + add edx,DWORD PTR[rsp] + punpcklqdq xmm9,xmm7 + xor eax,ebx rol ebp,5 pxor xmm0,xmm1 - and esi,ebx - xor ebx,ecx + add edx,esi + and edi,eax movdqa xmm8,xmm10 + xor eax,ebx paddd xmm10,xmm7 - xor esi,ecx add edx,ebp pxor xmm0,xmm9 - ror eax,7 - add edx,esi + ror ebp,7 + xor edi,ebx + mov esi,edx add ecx,DWORD PTR[4+rsp] - xor eax,ebx movdqa xmm9,xmm0 - movdqa XMMWORD PTR[48+rsp],xmm10 - mov esi,edx + xor ebp,eax rol edx,5 - and edi,eax - xor eax,ebx + movdqa XMMWORD PTR[48+rsp],xmm10 + add ecx,edi + and esi,ebp + xor ebp,eax pslld xmm0,2 - xor edi,ebx add ecx,edx + ror edx,7 psrld xmm9,30 - ror ebp,7 - add ecx,edi - add ebx,DWORD PTR[8+rsp] - xor ebp,eax + xor esi,eax mov edi,ecx - rol ecx,5 + add ebx,DWORD PTR[8+rsp] por xmm0,xmm9 - and esi,ebp - xor ebp,eax - movdqa xmm10,xmm0 - xor esi,eax - add ebx,ecx - ror edx,7 - add ebx,esi - add eax,DWORD PTR[12+rsp] xor edx,ebp - mov esi,ebx - rol ebx,5 + rol ecx,5 + pshufd xmm10,xmm7,238 + add ebx,esi and edi,edx xor edx,ebp + add ebx,ecx + add eax,DWORD PTR[12+rsp] xor edi,ebp - add eax,ebx - ror ecx,7 + mov esi,ebx + rol ebx,5 add eax,edi - add ebp,DWORD PTR[16+rsp] - pxor xmm1,xmm5 -DB 102,68,15,58,15,215,8 xor esi,edx + ror ecx,7 + add eax,ebx + pxor xmm1,xmm5 + add ebp,DWORD PTR[16+rsp] + xor esi,ecx + punpcklqdq xmm10,xmm0 mov edi,eax rol eax,5 pxor xmm1,xmm2 - xor esi,ecx - add ebp,eax + add ebp,esi + xor edi,ecx movdqa xmm9,xmm8 - paddd xmm8,xmm0 ror ebx,7 - add ebp,esi + paddd xmm8,xmm0 + add ebp,eax pxor xmm1,xmm10 add edx,DWORD PTR[20+rsp] - xor edi,ecx + xor edi,ebx mov esi,ebp rol ebp,5 movdqa xmm10,xmm1 + add edx,edi + xor esi,ebx movdqa XMMWORD PTR[rsp],xmm8 - xor edi,ebx - add edx,ebp ror eax,7 - add edx,edi - pslld xmm1,2 + add edx,ebp add ecx,DWORD PTR[24+rsp] - xor esi,ebx - psrld xmm10,30 + pslld xmm1,2 + xor esi,eax mov edi,edx + psrld xmm10,30 rol edx,5 - xor esi,eax - add ecx,edx - ror ebp,7 add ecx,esi + xor edi,eax + ror ebp,7 por xmm1,xmm10 + add ecx,edx add ebx,DWORD PTR[28+rsp] - xor edi,eax - movdqa xmm8,xmm1 + pshufd xmm8,xmm0,238 + xor edi,ebp mov esi,ecx rol ecx,5 - xor edi,ebp - add ebx,ecx - ror edx,7 add ebx,edi - add eax,DWORD PTR[32+rsp] - pxor xmm2,xmm6 -DB 102,68,15,58,15,192,8 xor esi,ebp + ror edx,7 + add ebx,ecx + pxor xmm2,xmm6 + add eax,DWORD PTR[32+rsp] + xor esi,edx + punpcklqdq xmm8,xmm1 mov edi,ebx rol ebx,5 pxor xmm2,xmm3 - xor esi,edx - add eax,ebx - movdqa xmm10,XMMWORD PTR[32+r11] - paddd xmm9,xmm1 - ror ecx,7 add eax,esi + xor edi,edx + movdqa xmm10,XMMWORD PTR[r11] + ror ecx,7 + paddd xmm9,xmm1 + add eax,ebx pxor xmm2,xmm8 add ebp,DWORD PTR[36+rsp] - xor edi,edx + xor edi,ecx mov esi,eax rol eax,5 movdqa xmm8,xmm2 + add ebp,edi + xor esi,ecx movdqa XMMWORD PTR[16+rsp],xmm9 - xor edi,ecx - add ebp,eax ror ebx,7 - add ebp,edi - pslld xmm2,2 + add ebp,eax add edx,DWORD PTR[40+rsp] - xor esi,ecx - psrld xmm8,30 + pslld xmm2,2 + xor esi,ebx mov edi,ebp + psrld xmm8,30 rol ebp,5 - xor esi,ebx - add edx,ebp - ror eax,7 add edx,esi + xor edi,ebx + ror eax,7 por xmm2,xmm8 + add edx,ebp add ecx,DWORD PTR[44+rsp] - xor edi,ebx - movdqa xmm9,xmm2 + pshufd xmm9,xmm1,238 + xor edi,eax mov esi,edx rol edx,5 - xor edi,eax - add ecx,edx - ror ebp,7 add ecx,edi - add ebx,DWORD PTR[48+rsp] - pxor xmm3,xmm7 -DB 102,68,15,58,15,201,8 xor esi,eax + ror ebp,7 + add ecx,edx + pxor xmm3,xmm7 + add ebx,DWORD PTR[48+rsp] + xor esi,ebp + punpcklqdq xmm9,xmm2 mov edi,ecx rol ecx,5 pxor xmm3,xmm4 - xor esi,ebp - add ebx,ecx + add ebx,esi + xor edi,ebp movdqa xmm8,xmm10 - paddd xmm10,xmm2 ror edx,7 - add ebx,esi + paddd xmm10,xmm2 + add ebx,ecx pxor xmm3,xmm9 add eax,DWORD PTR[52+rsp] - xor edi,ebp + xor edi,edx mov esi,ebx rol ebx,5 movdqa xmm9,xmm3 + add eax,edi + xor esi,edx movdqa XMMWORD PTR[32+rsp],xmm10 - xor edi,edx - add eax,ebx ror ecx,7 - add eax,edi - pslld xmm3,2 + add eax,ebx add ebp,DWORD PTR[56+rsp] - xor esi,edx - psrld xmm9,30 + pslld xmm3,2 + xor esi,ecx mov edi,eax + psrld xmm9,30 rol eax,5 - xor esi,ecx - add ebp,eax - ror ebx,7 add ebp,esi + xor edi,ecx + ror ebx,7 por xmm3,xmm9 + add ebp,eax add edx,DWORD PTR[60+rsp] - xor edi,ecx - movdqa xmm10,xmm3 + pshufd xmm10,xmm2,238 + xor edi,ebx mov esi,ebp rol ebp,5 - xor edi,ebx - add edx,ebp - ror eax,7 add edx,edi - add ecx,DWORD PTR[rsp] - pxor xmm4,xmm0 -DB 102,68,15,58,15,210,8 xor esi,ebx + ror eax,7 + add edx,ebp + pxor xmm4,xmm0 + add ecx,DWORD PTR[rsp] + xor esi,eax + punpcklqdq xmm10,xmm3 mov edi,edx rol edx,5 pxor xmm4,xmm5 - xor esi,eax - add ecx,edx + add ecx,esi + xor edi,eax movdqa xmm9,xmm8 - paddd xmm8,xmm3 ror ebp,7 - add ecx,esi + paddd xmm8,xmm3 + add ecx,edx pxor xmm4,xmm10 add ebx,DWORD PTR[4+rsp] - xor edi,eax + xor edi,ebp mov esi,ecx rol ecx,5 movdqa xmm10,xmm4 + add ebx,edi + xor esi,ebp movdqa XMMWORD PTR[48+rsp],xmm8 - xor edi,ebp - add ebx,ecx ror edx,7 - add ebx,edi - pslld xmm4,2 + add ebx,ecx add eax,DWORD PTR[8+rsp] - xor esi,ebp - psrld xmm10,30 + pslld xmm4,2 + xor esi,edx mov edi,ebx + psrld xmm10,30 rol ebx,5 - xor esi,edx - add eax,ebx - ror ecx,7 add eax,esi + xor edi,edx + ror ecx,7 por xmm4,xmm10 + add eax,ebx add ebp,DWORD PTR[12+rsp] - xor edi,edx - movdqa xmm8,xmm4 + pshufd xmm8,xmm3,238 + xor edi,ecx mov esi,eax rol eax,5 - xor edi,ecx - add ebp,eax - ror ebx,7 add ebp,edi - add edx,DWORD PTR[16+rsp] - pxor xmm5,xmm1 -DB 102,68,15,58,15,195,8 xor esi,ecx + ror ebx,7 + add ebp,eax + pxor xmm5,xmm1 + add edx,DWORD PTR[16+rsp] + xor esi,ebx + punpcklqdq xmm8,xmm4 mov edi,ebp rol ebp,5 pxor xmm5,xmm6 - xor esi,ebx - add edx,ebp + add edx,esi + xor edi,ebx movdqa xmm10,xmm9 - paddd xmm9,xmm4 ror eax,7 - add edx,esi + paddd xmm9,xmm4 + add edx,ebp pxor xmm5,xmm8 add ecx,DWORD PTR[20+rsp] - xor edi,ebx + xor edi,eax mov esi,edx rol edx,5 movdqa xmm8,xmm5 + add ecx,edi + xor esi,eax movdqa XMMWORD PTR[rsp],xmm9 - xor edi,eax - add ecx,edx ror ebp,7 - add ecx,edi - pslld xmm5,2 + add ecx,edx add ebx,DWORD PTR[24+rsp] - xor esi,eax - psrld xmm8,30 + pslld xmm5,2 + xor esi,ebp mov edi,ecx + psrld xmm8,30 rol ecx,5 - xor esi,ebp - add ebx,ecx - ror edx,7 add ebx,esi + xor edi,ebp + ror edx,7 por xmm5,xmm8 + add ebx,ecx add eax,DWORD PTR[28+rsp] - xor edi,ebp - movdqa xmm9,xmm5 + pshufd xmm9,xmm4,238 + ror ecx,7 mov esi,ebx - rol ebx,5 xor edi,edx - add eax,ebx - ror ecx,7 + rol ebx,5 add eax,edi - mov edi,ecx - pxor xmm6,xmm2 -DB 102,68,15,58,15,204,8 + xor esi,ecx xor ecx,edx + add eax,ebx + pxor xmm6,xmm2 add ebp,DWORD PTR[32+rsp] - and edi,edx - pxor xmm6,xmm7 and esi,ecx + xor ecx,edx ror ebx,7 - movdqa xmm8,xmm10 - paddd xmm10,xmm5 - add ebp,edi + punpcklqdq xmm9,xmm5 mov edi,eax - pxor xmm6,xmm9 + xor esi,ecx + pxor xmm6,xmm7 rol eax,5 add ebp,esi - xor ecx,edx - add ebp,eax - movdqa xmm9,xmm6 - movdqa XMMWORD PTR[16+rsp],xmm10 - mov esi,ebx + movdqa xmm8,xmm10 + xor edi,ebx + paddd xmm10,xmm5 xor ebx,ecx + pxor xmm6,xmm9 + add ebp,eax add edx,DWORD PTR[36+rsp] - and esi,ecx - pslld xmm6,2 and edi,ebx + xor ebx,ecx ror eax,7 - psrld xmm9,30 - add edx,esi + movdqa xmm9,xmm6 mov esi,ebp + xor edi,ebx + movdqa XMMWORD PTR[16+rsp],xmm10 rol ebp,5 add edx,edi - xor ebx,ecx - add edx,ebp - por xmm6,xmm9 - mov edi,eax + xor esi,eax + pslld xmm6,2 xor eax,ebx - movdqa xmm10,xmm6 + add edx,ebp + psrld xmm9,30 add ecx,DWORD PTR[40+rsp] - and edi,ebx and esi,eax + xor eax,ebx + por xmm6,xmm9 ror ebp,7 - add ecx,edi mov edi,edx + xor esi,eax rol edx,5 + pshufd xmm10,xmm5,238 add ecx,esi - xor eax,ebx - add ecx,edx - mov esi,ebp + xor edi,ebp xor ebp,eax + add ecx,edx add ebx,DWORD PTR[44+rsp] - and esi,eax and edi,ebp + xor ebp,eax ror edx,7 - add ebx,esi mov esi,ecx + xor edi,ebp rol ecx,5 add ebx,edi - xor ebp,eax + xor esi,edx + xor edx,ebp add ebx,ecx - mov edi,edx pxor xmm7,xmm3 -DB 102,68,15,58,15,213,8 - xor edx,ebp add eax,DWORD PTR[48+rsp] - and edi,ebp - pxor xmm7,xmm0 and esi,edx + xor edx,ebp ror ecx,7 - movdqa xmm9,XMMWORD PTR[48+r11] - paddd xmm8,xmm6 - add eax,edi + punpcklqdq xmm10,xmm6 mov edi,ebx - pxor xmm7,xmm10 + xor esi,edx + pxor xmm7,xmm0 rol ebx,5 add eax,esi - xor edx,ebp - add eax,ebx - movdqa xmm10,xmm7 - movdqa XMMWORD PTR[32+rsp],xmm8 - mov esi,ecx + movdqa xmm9,XMMWORD PTR[32+r11] + xor edi,ecx + paddd xmm8,xmm6 xor ecx,edx + pxor xmm7,xmm10 + add eax,ebx add ebp,DWORD PTR[52+rsp] - and esi,edx - pslld xmm7,2 and edi,ecx + xor ecx,edx ror ebx,7 - psrld xmm10,30 - add ebp,esi + movdqa xmm10,xmm7 mov esi,eax + xor edi,ecx + movdqa XMMWORD PTR[32+rsp],xmm8 rol eax,5 add ebp,edi - xor ecx,edx - add ebp,eax - por xmm7,xmm10 - mov edi,ebx + xor esi,ebx + pslld xmm7,2 xor ebx,ecx - movdqa xmm8,xmm7 + add ebp,eax + psrld xmm10,30 add edx,DWORD PTR[56+rsp] - and edi,ecx and esi,ebx + xor ebx,ecx + por xmm7,xmm10 ror eax,7 - add edx,edi mov edi,ebp + xor esi,ebx rol ebp,5 + pshufd xmm8,xmm6,238 add edx,esi - xor ebx,ecx - add edx,ebp - mov esi,eax + xor edi,eax xor eax,ebx + add edx,ebp add ecx,DWORD PTR[60+rsp] - and esi,ebx and edi,eax + xor eax,ebx ror ebp,7 - add ecx,esi mov esi,edx + xor edi,eax rol edx,5 add ecx,edi - xor eax,ebx + xor esi,ebp + xor ebp,eax add ecx,edx - mov edi,ebp pxor xmm0,xmm4 -DB 102,68,15,58,15,198,8 - xor ebp,eax add ebx,DWORD PTR[rsp] - and edi,eax - pxor xmm0,xmm1 and esi,ebp + xor ebp,eax ror edx,7 - movdqa xmm10,xmm9 - paddd xmm9,xmm7 - add ebx,edi + punpcklqdq xmm8,xmm7 mov edi,ecx - pxor xmm0,xmm8 + xor esi,ebp + pxor xmm0,xmm1 rol ecx,5 add ebx,esi - xor ebp,eax - add ebx,ecx - movdqa xmm8,xmm0 - movdqa XMMWORD PTR[48+rsp],xmm9 - mov esi,edx + movdqa xmm10,xmm9 + xor edi,edx + paddd xmm9,xmm7 xor edx,ebp + pxor xmm0,xmm8 + add ebx,ecx add eax,DWORD PTR[4+rsp] - and esi,ebp - pslld xmm0,2 and edi,edx + xor edx,ebp ror ecx,7 - psrld xmm8,30 - add eax,esi + movdqa xmm8,xmm0 mov esi,ebx + xor edi,edx + movdqa XMMWORD PTR[48+rsp],xmm9 rol ebx,5 add eax,edi - xor edx,ebp - add eax,ebx - por xmm0,xmm8 - mov edi,ecx + xor esi,ecx + pslld xmm0,2 xor ecx,edx - movdqa xmm9,xmm0 + add eax,ebx + psrld xmm8,30 add ebp,DWORD PTR[8+rsp] - and edi,edx and esi,ecx + xor ecx,edx + por xmm0,xmm8 ror ebx,7 - add ebp,edi mov edi,eax + xor esi,ecx rol eax,5 + pshufd xmm9,xmm7,238 add ebp,esi - xor ecx,edx - add ebp,eax - mov esi,ebx + xor edi,ebx xor ebx,ecx + add ebp,eax add edx,DWORD PTR[12+rsp] - and esi,ecx and edi,ebx + xor ebx,ecx ror eax,7 - add edx,esi mov esi,ebp + xor edi,ebx rol ebp,5 add edx,edi - xor ebx,ecx + xor esi,eax + xor eax,ebx add edx,ebp - mov edi,eax pxor xmm1,xmm5 -DB 102,68,15,58,15,207,8 - xor eax,ebx add ecx,DWORD PTR[16+rsp] - and edi,ebx - pxor xmm1,xmm2 and esi,eax + xor eax,ebx ror ebp,7 - movdqa xmm8,xmm10 - paddd xmm10,xmm0 - add ecx,edi + punpcklqdq xmm9,xmm0 mov edi,edx - pxor xmm1,xmm9 + xor esi,eax + pxor xmm1,xmm2 rol edx,5 add ecx,esi - xor eax,ebx - add ecx,edx - movdqa xmm9,xmm1 - movdqa XMMWORD PTR[rsp],xmm10 - mov esi,ebp + movdqa xmm8,xmm10 + xor edi,ebp + paddd xmm10,xmm0 xor ebp,eax + pxor xmm1,xmm9 + add ecx,edx add ebx,DWORD PTR[20+rsp] - and esi,eax - pslld xmm1,2 and edi,ebp + xor ebp,eax ror edx,7 - psrld xmm9,30 - add ebx,esi + movdqa xmm9,xmm1 mov esi,ecx + xor edi,ebp + movdqa XMMWORD PTR[rsp],xmm10 rol ecx,5 add ebx,edi - xor ebp,eax - add ebx,ecx - por xmm1,xmm9 - mov edi,edx + xor esi,edx + pslld xmm1,2 xor edx,ebp - movdqa xmm10,xmm1 + add ebx,ecx + psrld xmm9,30 add eax,DWORD PTR[24+rsp] - and edi,ebp and esi,edx + xor edx,ebp + por xmm1,xmm9 ror ecx,7 - add eax,edi mov edi,ebx + xor esi,edx rol ebx,5 + pshufd xmm10,xmm0,238 add eax,esi - xor edx,ebp - add eax,ebx - mov esi,ecx + xor edi,ecx xor ecx,edx + add eax,ebx add ebp,DWORD PTR[28+rsp] - and esi,edx and edi,ecx + xor ecx,edx ror ebx,7 - add ebp,esi mov esi,eax + xor edi,ecx rol eax,5 add ebp,edi - xor ecx,edx + xor esi,ebx + xor ebx,ecx add ebp,eax - mov edi,ebx pxor xmm2,xmm6 -DB 102,68,15,58,15,208,8 - xor ebx,ecx add edx,DWORD PTR[32+rsp] - and edi,ecx - pxor xmm2,xmm3 and esi,ebx + xor ebx,ecx ror eax,7 - movdqa xmm9,xmm8 - paddd xmm8,xmm1 - add edx,edi + punpcklqdq xmm10,xmm1 mov edi,ebp - pxor xmm2,xmm10 + xor esi,ebx + pxor xmm2,xmm3 rol ebp,5 add edx,esi - xor ebx,ecx - add edx,ebp - movdqa xmm10,xmm2 - movdqa XMMWORD PTR[16+rsp],xmm8 - mov esi,eax + movdqa xmm9,xmm8 + xor edi,eax + paddd xmm8,xmm1 xor eax,ebx + pxor xmm2,xmm10 + add edx,ebp add ecx,DWORD PTR[36+rsp] - and esi,ebx - pslld xmm2,2 and edi,eax + xor eax,ebx ror ebp,7 - psrld xmm10,30 - add ecx,esi + movdqa xmm10,xmm2 mov esi,edx + xor edi,eax + movdqa XMMWORD PTR[16+rsp],xmm8 rol edx,5 add ecx,edi - xor eax,ebx - add ecx,edx - por xmm2,xmm10 - mov edi,ebp + xor esi,ebp + pslld xmm2,2 xor ebp,eax - movdqa xmm8,xmm2 + add ecx,edx + psrld xmm10,30 add ebx,DWORD PTR[40+rsp] - and edi,eax and esi,ebp + xor ebp,eax + por xmm2,xmm10 ror edx,7 - add ebx,edi mov edi,ecx + xor esi,ebp rol ecx,5 + pshufd xmm8,xmm1,238 add ebx,esi - xor ebp,eax - add ebx,ecx - mov esi,edx + xor edi,edx xor edx,ebp + add ebx,ecx add eax,DWORD PTR[44+rsp] - and esi,ebp and edi,edx + xor edx,ebp ror ecx,7 - add eax,esi mov esi,ebx + xor edi,edx rol ebx,5 add eax,edi - xor edx,ebp + xor esi,edx add eax,ebx - add ebp,DWORD PTR[48+rsp] pxor xmm3,xmm7 -DB 102,68,15,58,15,193,8 - xor esi,edx + add ebp,DWORD PTR[48+rsp] + xor esi,ecx + punpcklqdq xmm8,xmm2 mov edi,eax rol eax,5 pxor xmm3,xmm4 - xor esi,ecx - add ebp,eax + add ebp,esi + xor edi,ecx movdqa xmm10,xmm9 - paddd xmm9,xmm2 ror ebx,7 - add ebp,esi + paddd xmm9,xmm2 + add ebp,eax pxor xmm3,xmm8 add edx,DWORD PTR[52+rsp] - xor edi,ecx + xor edi,ebx mov esi,ebp rol ebp,5 movdqa xmm8,xmm3 + add edx,edi + xor esi,ebx movdqa XMMWORD PTR[32+rsp],xmm9 - xor edi,ebx - add edx,ebp ror eax,7 - add edx,edi - pslld xmm3,2 + add edx,ebp add ecx,DWORD PTR[56+rsp] - xor esi,ebx - psrld xmm8,30 + pslld xmm3,2 + xor esi,eax mov edi,edx + psrld xmm8,30 rol edx,5 - xor esi,eax - add ecx,edx - ror ebp,7 add ecx,esi + xor edi,eax + ror ebp,7 por xmm3,xmm8 + add ecx,edx add ebx,DWORD PTR[60+rsp] - xor edi,eax + xor edi,ebp mov esi,ecx rol ecx,5 - xor edi,ebp - add ebx,ecx - ror edx,7 add ebx,edi - add eax,DWORD PTR[rsp] - paddd xmm10,xmm3 xor esi,ebp + ror edx,7 + add ebx,ecx + add eax,DWORD PTR[rsp] + xor esi,edx mov edi,ebx rol ebx,5 - xor esi,edx + paddd xmm10,xmm3 + add eax,esi + xor edi,edx movdqa XMMWORD PTR[48+rsp],xmm10 - add eax,ebx ror ecx,7 - add eax,esi + add eax,ebx add ebp,DWORD PTR[4+rsp] - xor edi,edx + xor edi,ecx mov esi,eax rol eax,5 - xor edi,ecx - add ebp,eax - ror ebx,7 add ebp,edi - add edx,DWORD PTR[8+rsp] xor esi,ecx + ror ebx,7 + add ebp,eax + add edx,DWORD PTR[8+rsp] + xor esi,ebx mov edi,ebp rol ebp,5 - xor esi,ebx - add edx,ebp - ror eax,7 add edx,esi - add ecx,DWORD PTR[12+rsp] xor edi,ebx + ror eax,7 + add edx,ebp + add ecx,DWORD PTR[12+rsp] + xor edi,eax mov esi,edx rol edx,5 - xor edi,eax - add ecx,edx - ror ebp,7 add ecx,edi + xor esi,eax + ror ebp,7 + add ecx,edx cmp r9,r10 je $L$done_ssse3 movdqa xmm6,XMMWORD PTR[64+r11] - movdqa xmm9,XMMWORD PTR[r11] + movdqa xmm9,XMMWORD PTR[((-64))+r11] movdqu xmm0,XMMWORD PTR[r9] movdqu xmm1,XMMWORD PTR[16+r9] movdqu xmm2,XMMWORD PTR[32+r9] @@ -2267,262 +2396,3115 @@ DB 102,68,15,58,15,193,8 DB 102,15,56,0,198 add r9,64 add ebx,DWORD PTR[16+rsp] - xor esi,eax -DB 102,15,56,0,206 + xor esi,ebp mov edi,ecx +DB 102,15,56,0,206 rol ecx,5 + add ebx,esi + xor edi,ebp + ror edx,7 paddd xmm0,xmm9 - xor esi,ebp add ebx,ecx - ror edx,7 - add ebx,esi - movdqa XMMWORD PTR[rsp],xmm0 add eax,DWORD PTR[20+rsp] - xor edi,ebp - psubd xmm0,xmm9 + xor edi,edx mov esi,ebx + movdqa XMMWORD PTR[rsp],xmm0 rol ebx,5 - xor edi,edx - add eax,ebx - ror ecx,7 add eax,edi - add ebp,DWORD PTR[24+rsp] xor esi,edx + ror ecx,7 + psubd xmm0,xmm9 + add eax,ebx + add ebp,DWORD PTR[24+rsp] + xor esi,ecx mov edi,eax rol eax,5 - xor esi,ecx - add ebp,eax - ror ebx,7 add ebp,esi - add edx,DWORD PTR[28+rsp] xor edi,ecx + ror ebx,7 + add ebp,eax + add edx,DWORD PTR[28+rsp] + xor edi,ebx mov esi,ebp rol ebp,5 - xor edi,ebx - add edx,ebp - ror eax,7 add edx,edi - add ecx,DWORD PTR[32+rsp] xor esi,ebx -DB 102,15,56,0,214 + ror eax,7 + add edx,ebp + add ecx,DWORD PTR[32+rsp] + xor esi,eax mov edi,edx +DB 102,15,56,0,214 rol edx,5 + add ecx,esi + xor edi,eax + ror ebp,7 paddd xmm1,xmm9 - xor esi,eax add ecx,edx - ror ebp,7 - add ecx,esi - movdqa XMMWORD PTR[16+rsp],xmm1 add ebx,DWORD PTR[36+rsp] - xor edi,eax - psubd xmm1,xmm9 + xor edi,ebp mov esi,ecx + movdqa XMMWORD PTR[16+rsp],xmm1 + rol ecx,5 + add ebx,edi + xor esi,ebp + ror edx,7 + psubd xmm1,xmm9 + add ebx,ecx + add eax,DWORD PTR[40+rsp] + xor esi,edx + mov edi,ebx + rol ebx,5 + add eax,esi + xor edi,edx + ror ecx,7 + add eax,ebx + add ebp,DWORD PTR[44+rsp] + xor edi,ecx + mov esi,eax + rol eax,5 + add ebp,edi + xor esi,ecx + ror ebx,7 + add ebp,eax + add edx,DWORD PTR[48+rsp] + xor esi,ebx + mov edi,ebp +DB 102,15,56,0,222 + rol ebp,5 + add edx,esi + xor edi,ebx + ror eax,7 + paddd xmm2,xmm9 + add edx,ebp + add ecx,DWORD PTR[52+rsp] + xor edi,eax + mov esi,edx + movdqa XMMWORD PTR[32+rsp],xmm2 + rol edx,5 + add ecx,edi + xor esi,eax + ror ebp,7 + psubd xmm2,xmm9 + add ecx,edx + add ebx,DWORD PTR[56+rsp] + xor esi,ebp + mov edi,ecx rol ecx,5 + add ebx,esi xor edi,ebp + ror edx,7 add ebx,ecx + add eax,DWORD PTR[60+rsp] + xor edi,edx + mov esi,ebx + rol ebx,5 + add eax,edi + ror ecx,7 + add eax,ebx + add eax,DWORD PTR[r8] + add esi,DWORD PTR[4+r8] + add ecx,DWORD PTR[8+r8] + add edx,DWORD PTR[12+r8] + mov DWORD PTR[r8],eax + add ebp,DWORD PTR[16+r8] + mov DWORD PTR[4+r8],esi + mov ebx,esi + mov DWORD PTR[8+r8],ecx + mov edi,ecx + mov DWORD PTR[12+r8],edx + xor edi,edx + mov DWORD PTR[16+r8],ebp + and esi,edi + jmp $L$oop_ssse3 + +ALIGN 16 +$L$done_ssse3:: + add ebx,DWORD PTR[16+rsp] + xor esi,ebp + mov edi,ecx + rol ecx,5 + add ebx,esi + xor edi,ebp ror edx,7 + add ebx,ecx + add eax,DWORD PTR[20+rsp] + xor edi,edx + mov esi,ebx + rol ebx,5 + add eax,edi + xor esi,edx + ror ecx,7 + add eax,ebx + add ebp,DWORD PTR[24+rsp] + xor esi,ecx + mov edi,eax + rol eax,5 + add ebp,esi + xor edi,ecx + ror ebx,7 + add ebp,eax + add edx,DWORD PTR[28+rsp] + xor edi,ebx + mov esi,ebp + rol ebp,5 + add edx,edi + xor esi,ebx + ror eax,7 + add edx,ebp + add ecx,DWORD PTR[32+rsp] + xor esi,eax + mov edi,edx + rol edx,5 + add ecx,esi + xor edi,eax + ror ebp,7 + add ecx,edx + add ebx,DWORD PTR[36+rsp] + xor edi,ebp + mov esi,ecx + rol ecx,5 add ebx,edi - add eax,DWORD PTR[40+rsp] xor esi,ebp + ror edx,7 + add ebx,ecx + add eax,DWORD PTR[40+rsp] + xor esi,edx mov edi,ebx rol ebx,5 - xor esi,edx - add eax,ebx - ror ecx,7 add eax,esi + xor edi,edx + ror ecx,7 + add eax,ebx add ebp,DWORD PTR[44+rsp] + xor edi,ecx + mov esi,eax + rol eax,5 + add ebp,edi + xor esi,ecx + ror ebx,7 + add ebp,eax + add edx,DWORD PTR[48+rsp] + xor esi,ebx + mov edi,ebp + rol ebp,5 + add edx,esi + xor edi,ebx + ror eax,7 + add edx,ebp + add ecx,DWORD PTR[52+rsp] + xor edi,eax + mov esi,edx + rol edx,5 + add ecx,edi + xor esi,eax + ror ebp,7 + add ecx,edx + add ebx,DWORD PTR[56+rsp] + xor esi,ebp + mov edi,ecx + rol ecx,5 + add ebx,esi + xor edi,ebp + ror edx,7 + add ebx,ecx + add eax,DWORD PTR[60+rsp] + xor edi,edx + mov esi,ebx + rol ebx,5 + add eax,edi + ror ecx,7 + add eax,ebx + add eax,DWORD PTR[r8] + add esi,DWORD PTR[4+r8] + add ecx,DWORD PTR[8+r8] + mov DWORD PTR[r8],eax + add edx,DWORD PTR[12+r8] + mov DWORD PTR[4+r8],esi + add ebp,DWORD PTR[16+r8] + mov DWORD PTR[8+r8],ecx + mov DWORD PTR[12+r8],edx + mov DWORD PTR[16+r8],ebp + movaps xmm6,XMMWORD PTR[((-40-96))+r14] + movaps xmm7,XMMWORD PTR[((-40-80))+r14] + movaps xmm8,XMMWORD PTR[((-40-64))+r14] + movaps xmm9,XMMWORD PTR[((-40-48))+r14] + movaps xmm10,XMMWORD PTR[((-40-32))+r14] + movaps xmm11,XMMWORD PTR[((-40-16))+r14] + lea rsi,QWORD PTR[r14] + mov r14,QWORD PTR[((-40))+rsi] + mov r13,QWORD PTR[((-32))+rsi] + mov r12,QWORD PTR[((-24))+rsi] + mov rbp,QWORD PTR[((-16))+rsi] + mov rbx,QWORD PTR[((-8))+rsi] + lea rsp,QWORD PTR[rsi] +$L$epilogue_ssse3:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_sha1_block_data_order_ssse3:: +sha1_block_data_order_ssse3 ENDP + +ALIGN 16 +sha1_block_data_order_avx PROC PRIVATE + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_sha1_block_data_order_avx:: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + + +_avx_shortcut:: + mov rax,rsp + push rbx + push rbp + push r12 + push r13 + push r14 + lea rsp,QWORD PTR[((-160))+rsp] + vzeroupper + vmovaps XMMWORD PTR[(-40-96)+rax],xmm6 + vmovaps XMMWORD PTR[(-40-80)+rax],xmm7 + vmovaps XMMWORD PTR[(-40-64)+rax],xmm8 + vmovaps XMMWORD PTR[(-40-48)+rax],xmm9 + vmovaps XMMWORD PTR[(-40-32)+rax],xmm10 + vmovaps XMMWORD PTR[(-40-16)+rax],xmm11 +$L$prologue_avx:: + mov r14,rax + and rsp,-64 + mov r8,rdi + mov r9,rsi + mov r10,rdx + + shl r10,6 + add r10,r9 + lea r11,QWORD PTR[((K_XX_XX+64))] + + mov eax,DWORD PTR[r8] + mov ebx,DWORD PTR[4+r8] + mov ecx,DWORD PTR[8+r8] + mov edx,DWORD PTR[12+r8] + mov esi,ebx + mov ebp,DWORD PTR[16+r8] + mov edi,ecx + xor edi,edx + and esi,edi + + vmovdqa xmm6,XMMWORD PTR[64+r11] + vmovdqa xmm11,XMMWORD PTR[((-64))+r11] + vmovdqu xmm0,XMMWORD PTR[r9] + vmovdqu xmm1,XMMWORD PTR[16+r9] + vmovdqu xmm2,XMMWORD PTR[32+r9] + vmovdqu xmm3,XMMWORD PTR[48+r9] + vpshufb xmm0,xmm0,xmm6 + add r9,64 + vpshufb xmm1,xmm1,xmm6 + vpshufb xmm2,xmm2,xmm6 + vpshufb xmm3,xmm3,xmm6 + vpaddd xmm4,xmm0,xmm11 + vpaddd xmm5,xmm1,xmm11 + vpaddd xmm6,xmm2,xmm11 + vmovdqa XMMWORD PTR[rsp],xmm4 + vmovdqa XMMWORD PTR[16+rsp],xmm5 + vmovdqa XMMWORD PTR[32+rsp],xmm6 + jmp $L$oop_avx +ALIGN 16 +$L$oop_avx:: + shrd ebx,ebx,2 + xor esi,edx + vpalignr xmm4,xmm1,xmm0,8 + mov edi,eax + add ebp,DWORD PTR[rsp] + vpaddd xmm9,xmm11,xmm3 + xor ebx,ecx + shld eax,eax,5 + vpsrldq xmm8,xmm3,4 + add ebp,esi + and edi,ebx + vpxor xmm4,xmm4,xmm0 + xor ebx,ecx + add ebp,eax + vpxor xmm8,xmm8,xmm2 + shrd eax,eax,7 + xor edi,ecx + mov esi,ebp + add edx,DWORD PTR[4+rsp] + vpxor xmm4,xmm4,xmm8 + xor eax,ebx + shld ebp,ebp,5 + vmovdqa XMMWORD PTR[48+rsp],xmm9 + add edx,edi + and esi,eax + vpsrld xmm8,xmm4,31 + xor eax,ebx + add edx,ebp + shrd ebp,ebp,7 + xor esi,ebx + vpslldq xmm10,xmm4,12 + vpaddd xmm4,xmm4,xmm4 + mov edi,edx + add ecx,DWORD PTR[8+rsp] + xor ebp,eax + shld edx,edx,5 + vpsrld xmm9,xmm10,30 + vpor xmm4,xmm4,xmm8 + add ecx,esi + and edi,ebp + xor ebp,eax + add ecx,edx + vpslld xmm10,xmm10,2 + vpxor xmm4,xmm4,xmm9 + shrd edx,edx,7 + xor edi,eax + mov esi,ecx + add ebx,DWORD PTR[12+rsp] + vpxor xmm4,xmm4,xmm10 + xor edx,ebp + shld ecx,ecx,5 + add ebx,edi + and esi,edx + xor edx,ebp + add ebx,ecx + shrd ecx,ecx,7 + xor esi,ebp + vpalignr xmm5,xmm2,xmm1,8 + mov edi,ebx + add eax,DWORD PTR[16+rsp] + vpaddd xmm9,xmm11,xmm4 + xor ecx,edx + shld ebx,ebx,5 + vpsrldq xmm8,xmm4,4 + add eax,esi + and edi,ecx + vpxor xmm5,xmm5,xmm1 + xor ecx,edx + add eax,ebx + vpxor xmm8,xmm8,xmm3 + shrd ebx,ebx,7 + xor edi,edx + mov esi,eax + add ebp,DWORD PTR[20+rsp] + vpxor xmm5,xmm5,xmm8 + xor ebx,ecx + shld eax,eax,5 + vmovdqa XMMWORD PTR[rsp],xmm9 + add ebp,edi + and esi,ebx + vpsrld xmm8,xmm5,31 + xor ebx,ecx + add ebp,eax + shrd eax,eax,7 + xor esi,ecx + vpslldq xmm10,xmm5,12 + vpaddd xmm5,xmm5,xmm5 + mov edi,ebp + add edx,DWORD PTR[24+rsp] + xor eax,ebx + shld ebp,ebp,5 + vpsrld xmm9,xmm10,30 + vpor xmm5,xmm5,xmm8 + add edx,esi + and edi,eax + xor eax,ebx + add edx,ebp + vpslld xmm10,xmm10,2 + vpxor xmm5,xmm5,xmm9 + shrd ebp,ebp,7 + xor edi,ebx + mov esi,edx + add ecx,DWORD PTR[28+rsp] + vpxor xmm5,xmm5,xmm10 + xor ebp,eax + shld edx,edx,5 + vmovdqa xmm11,XMMWORD PTR[((-32))+r11] + add ecx,edi + and esi,ebp + xor ebp,eax + add ecx,edx + shrd edx,edx,7 + xor esi,eax + vpalignr xmm6,xmm3,xmm2,8 + mov edi,ecx + add ebx,DWORD PTR[32+rsp] + vpaddd xmm9,xmm11,xmm5 + xor edx,ebp + shld ecx,ecx,5 + vpsrldq xmm8,xmm5,4 + add ebx,esi + and edi,edx + vpxor xmm6,xmm6,xmm2 + xor edx,ebp + add ebx,ecx + vpxor xmm8,xmm8,xmm4 + shrd ecx,ecx,7 + xor edi,ebp + mov esi,ebx + add eax,DWORD PTR[36+rsp] + vpxor xmm6,xmm6,xmm8 + xor ecx,edx + shld ebx,ebx,5 + vmovdqa XMMWORD PTR[16+rsp],xmm9 + add eax,edi + and esi,ecx + vpsrld xmm8,xmm6,31 + xor ecx,edx + add eax,ebx + shrd ebx,ebx,7 + xor esi,edx + vpslldq xmm10,xmm6,12 + vpaddd xmm6,xmm6,xmm6 + mov edi,eax + add ebp,DWORD PTR[40+rsp] + xor ebx,ecx + shld eax,eax,5 + vpsrld xmm9,xmm10,30 + vpor xmm6,xmm6,xmm8 + add ebp,esi + and edi,ebx + xor ebx,ecx + add ebp,eax + vpslld xmm10,xmm10,2 + vpxor xmm6,xmm6,xmm9 + shrd eax,eax,7 + xor edi,ecx + mov esi,ebp + add edx,DWORD PTR[44+rsp] + vpxor xmm6,xmm6,xmm10 + xor eax,ebx + shld ebp,ebp,5 + add edx,edi + and esi,eax + xor eax,ebx + add edx,ebp + shrd ebp,ebp,7 + xor esi,ebx + vpalignr xmm7,xmm4,xmm3,8 + mov edi,edx + add ecx,DWORD PTR[48+rsp] + vpaddd xmm9,xmm11,xmm6 + xor ebp,eax + shld edx,edx,5 + vpsrldq xmm8,xmm6,4 + add ecx,esi + and edi,ebp + vpxor xmm7,xmm7,xmm3 + xor ebp,eax + add ecx,edx + vpxor xmm8,xmm8,xmm5 + shrd edx,edx,7 + xor edi,eax + mov esi,ecx + add ebx,DWORD PTR[52+rsp] + vpxor xmm7,xmm7,xmm8 + xor edx,ebp + shld ecx,ecx,5 + vmovdqa XMMWORD PTR[32+rsp],xmm9 + add ebx,edi + and esi,edx + vpsrld xmm8,xmm7,31 + xor edx,ebp + add ebx,ecx + shrd ecx,ecx,7 + xor esi,ebp + vpslldq xmm10,xmm7,12 + vpaddd xmm7,xmm7,xmm7 + mov edi,ebx + add eax,DWORD PTR[56+rsp] + xor ecx,edx + shld ebx,ebx,5 + vpsrld xmm9,xmm10,30 + vpor xmm7,xmm7,xmm8 + add eax,esi + and edi,ecx + xor ecx,edx + add eax,ebx + vpslld xmm10,xmm10,2 + vpxor xmm7,xmm7,xmm9 + shrd ebx,ebx,7 + xor edi,edx + mov esi,eax + add ebp,DWORD PTR[60+rsp] + vpxor xmm7,xmm7,xmm10 + xor ebx,ecx + shld eax,eax,5 + add ebp,edi + and esi,ebx + xor ebx,ecx + add ebp,eax + vpalignr xmm8,xmm7,xmm6,8 + vpxor xmm0,xmm0,xmm4 + shrd eax,eax,7 + xor esi,ecx + mov edi,ebp + add edx,DWORD PTR[rsp] + vpxor xmm0,xmm0,xmm1 + xor eax,ebx + shld ebp,ebp,5 + vpaddd xmm9,xmm11,xmm7 + add edx,esi + and edi,eax + vpxor xmm0,xmm0,xmm8 + xor eax,ebx + add edx,ebp + shrd ebp,ebp,7 + xor edi,ebx + vpsrld xmm8,xmm0,30 + vmovdqa XMMWORD PTR[48+rsp],xmm9 + mov esi,edx + add ecx,DWORD PTR[4+rsp] + xor ebp,eax + shld edx,edx,5 + vpslld xmm0,xmm0,2 + add ecx,edi + and esi,ebp + xor ebp,eax + add ecx,edx + shrd edx,edx,7 + xor esi,eax + mov edi,ecx + add ebx,DWORD PTR[8+rsp] + vpor xmm0,xmm0,xmm8 + xor edx,ebp + shld ecx,ecx,5 + add ebx,esi + and edi,edx + xor edx,ebp + add ebx,ecx + add eax,DWORD PTR[12+rsp] + xor edi,ebp + mov esi,ebx + shld ebx,ebx,5 + add eax,edi + xor esi,edx + shrd ecx,ecx,7 + add eax,ebx + vpalignr xmm8,xmm0,xmm7,8 + vpxor xmm1,xmm1,xmm5 + add ebp,DWORD PTR[16+rsp] + xor esi,ecx + mov edi,eax + shld eax,eax,5 + vpxor xmm1,xmm1,xmm2 + add ebp,esi + xor edi,ecx + vpaddd xmm9,xmm11,xmm0 + shrd ebx,ebx,7 + add ebp,eax + vpxor xmm1,xmm1,xmm8 + add edx,DWORD PTR[20+rsp] + xor edi,ebx + mov esi,ebp + shld ebp,ebp,5 + vpsrld xmm8,xmm1,30 + vmovdqa XMMWORD PTR[rsp],xmm9 + add edx,edi + xor esi,ebx + shrd eax,eax,7 + add edx,ebp + vpslld xmm1,xmm1,2 + add ecx,DWORD PTR[24+rsp] + xor esi,eax + mov edi,edx + shld edx,edx,5 + add ecx,esi + xor edi,eax + shrd ebp,ebp,7 + add ecx,edx + vpor xmm1,xmm1,xmm8 + add ebx,DWORD PTR[28+rsp] + xor edi,ebp + mov esi,ecx + shld ecx,ecx,5 + add ebx,edi + xor esi,ebp + shrd edx,edx,7 + add ebx,ecx + vpalignr xmm8,xmm1,xmm0,8 + vpxor xmm2,xmm2,xmm6 + add eax,DWORD PTR[32+rsp] + xor esi,edx + mov edi,ebx + shld ebx,ebx,5 + vpxor xmm2,xmm2,xmm3 + add eax,esi + xor edi,edx + vpaddd xmm9,xmm11,xmm1 + vmovdqa xmm11,XMMWORD PTR[r11] + shrd ecx,ecx,7 + add eax,ebx + vpxor xmm2,xmm2,xmm8 + add ebp,DWORD PTR[36+rsp] + xor edi,ecx + mov esi,eax + shld eax,eax,5 + vpsrld xmm8,xmm2,30 + vmovdqa XMMWORD PTR[16+rsp],xmm9 + add ebp,edi + xor esi,ecx + shrd ebx,ebx,7 + add ebp,eax + vpslld xmm2,xmm2,2 + add edx,DWORD PTR[40+rsp] + xor esi,ebx + mov edi,ebp + shld ebp,ebp,5 + add edx,esi + xor edi,ebx + shrd eax,eax,7 + add edx,ebp + vpor xmm2,xmm2,xmm8 + add ecx,DWORD PTR[44+rsp] + xor edi,eax + mov esi,edx + shld edx,edx,5 + add ecx,edi + xor esi,eax + shrd ebp,ebp,7 + add ecx,edx + vpalignr xmm8,xmm2,xmm1,8 + vpxor xmm3,xmm3,xmm7 + add ebx,DWORD PTR[48+rsp] + xor esi,ebp + mov edi,ecx + shld ecx,ecx,5 + vpxor xmm3,xmm3,xmm4 + add ebx,esi + xor edi,ebp + vpaddd xmm9,xmm11,xmm2 + shrd edx,edx,7 + add ebx,ecx + vpxor xmm3,xmm3,xmm8 + add eax,DWORD PTR[52+rsp] + xor edi,edx + mov esi,ebx + shld ebx,ebx,5 + vpsrld xmm8,xmm3,30 + vmovdqa XMMWORD PTR[32+rsp],xmm9 + add eax,edi + xor esi,edx + shrd ecx,ecx,7 + add eax,ebx + vpslld xmm3,xmm3,2 + add ebp,DWORD PTR[56+rsp] + xor esi,ecx + mov edi,eax + shld eax,eax,5 + add ebp,esi + xor edi,ecx + shrd ebx,ebx,7 + add ebp,eax + vpor xmm3,xmm3,xmm8 + add edx,DWORD PTR[60+rsp] + xor edi,ebx + mov esi,ebp + shld ebp,ebp,5 + add edx,edi + xor esi,ebx + shrd eax,eax,7 + add edx,ebp + vpalignr xmm8,xmm3,xmm2,8 + vpxor xmm4,xmm4,xmm0 + add ecx,DWORD PTR[rsp] + xor esi,eax + mov edi,edx + shld edx,edx,5 + vpxor xmm4,xmm4,xmm5 + add ecx,esi + xor edi,eax + vpaddd xmm9,xmm11,xmm3 + shrd ebp,ebp,7 + add ecx,edx + vpxor xmm4,xmm4,xmm8 + add ebx,DWORD PTR[4+rsp] + xor edi,ebp + mov esi,ecx + shld ecx,ecx,5 + vpsrld xmm8,xmm4,30 + vmovdqa XMMWORD PTR[48+rsp],xmm9 + add ebx,edi + xor esi,ebp + shrd edx,edx,7 + add ebx,ecx + vpslld xmm4,xmm4,2 + add eax,DWORD PTR[8+rsp] + xor esi,edx + mov edi,ebx + shld ebx,ebx,5 + add eax,esi + xor edi,edx + shrd ecx,ecx,7 + add eax,ebx + vpor xmm4,xmm4,xmm8 + add ebp,DWORD PTR[12+rsp] + xor edi,ecx + mov esi,eax + shld eax,eax,5 + add ebp,edi + xor esi,ecx + shrd ebx,ebx,7 + add ebp,eax + vpalignr xmm8,xmm4,xmm3,8 + vpxor xmm5,xmm5,xmm1 + add edx,DWORD PTR[16+rsp] + xor esi,ebx + mov edi,ebp + shld ebp,ebp,5 + vpxor xmm5,xmm5,xmm6 + add edx,esi + xor edi,ebx + vpaddd xmm9,xmm11,xmm4 + shrd eax,eax,7 + add edx,ebp + vpxor xmm5,xmm5,xmm8 + add ecx,DWORD PTR[20+rsp] + xor edi,eax + mov esi,edx + shld edx,edx,5 + vpsrld xmm8,xmm5,30 + vmovdqa XMMWORD PTR[rsp],xmm9 + add ecx,edi + xor esi,eax + shrd ebp,ebp,7 + add ecx,edx + vpslld xmm5,xmm5,2 + add ebx,DWORD PTR[24+rsp] + xor esi,ebp + mov edi,ecx + shld ecx,ecx,5 + add ebx,esi + xor edi,ebp + shrd edx,edx,7 + add ebx,ecx + vpor xmm5,xmm5,xmm8 + add eax,DWORD PTR[28+rsp] + shrd ecx,ecx,7 + mov esi,ebx + xor edi,edx + shld ebx,ebx,5 + add eax,edi + xor esi,ecx + xor ecx,edx + add eax,ebx + vpalignr xmm8,xmm5,xmm4,8 + vpxor xmm6,xmm6,xmm2 + add ebp,DWORD PTR[32+rsp] + and esi,ecx + xor ecx,edx + shrd ebx,ebx,7 + vpxor xmm6,xmm6,xmm7 + mov edi,eax + xor esi,ecx + vpaddd xmm9,xmm11,xmm5 + shld eax,eax,5 + add ebp,esi + vpxor xmm6,xmm6,xmm8 + xor edi,ebx + xor ebx,ecx + add ebp,eax + add edx,DWORD PTR[36+rsp] + vpsrld xmm8,xmm6,30 + vmovdqa XMMWORD PTR[16+rsp],xmm9 + and edi,ebx + xor ebx,ecx + shrd eax,eax,7 + mov esi,ebp + vpslld xmm6,xmm6,2 + xor edi,ebx + shld ebp,ebp,5 + add edx,edi + xor esi,eax + xor eax,ebx + add edx,ebp + add ecx,DWORD PTR[40+rsp] + and esi,eax + vpor xmm6,xmm6,xmm8 + xor eax,ebx + shrd ebp,ebp,7 + mov edi,edx + xor esi,eax + shld edx,edx,5 + add ecx,esi + xor edi,ebp + xor ebp,eax + add ecx,edx + add ebx,DWORD PTR[44+rsp] + and edi,ebp + xor ebp,eax + shrd edx,edx,7 + mov esi,ecx + xor edi,ebp + shld ecx,ecx,5 + add ebx,edi + xor esi,edx + xor edx,ebp + add ebx,ecx + vpalignr xmm8,xmm6,xmm5,8 + vpxor xmm7,xmm7,xmm3 + add eax,DWORD PTR[48+rsp] + and esi,edx + xor edx,ebp + shrd ecx,ecx,7 + vpxor xmm7,xmm7,xmm0 + mov edi,ebx + xor esi,edx + vpaddd xmm9,xmm11,xmm6 + vmovdqa xmm11,XMMWORD PTR[32+r11] + shld ebx,ebx,5 + add eax,esi + vpxor xmm7,xmm7,xmm8 + xor edi,ecx + xor ecx,edx + add eax,ebx + add ebp,DWORD PTR[52+rsp] + vpsrld xmm8,xmm7,30 + vmovdqa XMMWORD PTR[32+rsp],xmm9 + and edi,ecx + xor ecx,edx + shrd ebx,ebx,7 + mov esi,eax + vpslld xmm7,xmm7,2 + xor edi,ecx + shld eax,eax,5 + add ebp,edi + xor esi,ebx + xor ebx,ecx + add ebp,eax + add edx,DWORD PTR[56+rsp] + and esi,ebx + vpor xmm7,xmm7,xmm8 + xor ebx,ecx + shrd eax,eax,7 + mov edi,ebp + xor esi,ebx + shld ebp,ebp,5 + add edx,esi + xor edi,eax + xor eax,ebx + add edx,ebp + add ecx,DWORD PTR[60+rsp] + and edi,eax + xor eax,ebx + shrd ebp,ebp,7 + mov esi,edx + xor edi,eax + shld edx,edx,5 + add ecx,edi + xor esi,ebp + xor ebp,eax + add ecx,edx + vpalignr xmm8,xmm7,xmm6,8 + vpxor xmm0,xmm0,xmm4 + add ebx,DWORD PTR[rsp] + and esi,ebp + xor ebp,eax + shrd edx,edx,7 + vpxor xmm0,xmm0,xmm1 + mov edi,ecx + xor esi,ebp + vpaddd xmm9,xmm11,xmm7 + shld ecx,ecx,5 + add ebx,esi + vpxor xmm0,xmm0,xmm8 + xor edi,edx + xor edx,ebp + add ebx,ecx + add eax,DWORD PTR[4+rsp] + vpsrld xmm8,xmm0,30 + vmovdqa XMMWORD PTR[48+rsp],xmm9 + and edi,edx + xor edx,ebp + shrd ecx,ecx,7 + mov esi,ebx + vpslld xmm0,xmm0,2 + xor edi,edx + shld ebx,ebx,5 + add eax,edi + xor esi,ecx + xor ecx,edx + add eax,ebx + add ebp,DWORD PTR[8+rsp] + and esi,ecx + vpor xmm0,xmm0,xmm8 + xor ecx,edx + shrd ebx,ebx,7 + mov edi,eax + xor esi,ecx + shld eax,eax,5 + add ebp,esi + xor edi,ebx + xor ebx,ecx + add ebp,eax + add edx,DWORD PTR[12+rsp] + and edi,ebx + xor ebx,ecx + shrd eax,eax,7 + mov esi,ebp + xor edi,ebx + shld ebp,ebp,5 + add edx,edi + xor esi,eax + xor eax,ebx + add edx,ebp + vpalignr xmm8,xmm0,xmm7,8 + vpxor xmm1,xmm1,xmm5 + add ecx,DWORD PTR[16+rsp] + and esi,eax + xor eax,ebx + shrd ebp,ebp,7 + vpxor xmm1,xmm1,xmm2 + mov edi,edx + xor esi,eax + vpaddd xmm9,xmm11,xmm0 + shld edx,edx,5 + add ecx,esi + vpxor xmm1,xmm1,xmm8 + xor edi,ebp + xor ebp,eax + add ecx,edx + add ebx,DWORD PTR[20+rsp] + vpsrld xmm8,xmm1,30 + vmovdqa XMMWORD PTR[rsp],xmm9 + and edi,ebp + xor ebp,eax + shrd edx,edx,7 + mov esi,ecx + vpslld xmm1,xmm1,2 + xor edi,ebp + shld ecx,ecx,5 + add ebx,edi + xor esi,edx + xor edx,ebp + add ebx,ecx + add eax,DWORD PTR[24+rsp] + and esi,edx + vpor xmm1,xmm1,xmm8 + xor edx,ebp + shrd ecx,ecx,7 + mov edi,ebx + xor esi,edx + shld ebx,ebx,5 + add eax,esi + xor edi,ecx + xor ecx,edx + add eax,ebx + add ebp,DWORD PTR[28+rsp] + and edi,ecx + xor ecx,edx + shrd ebx,ebx,7 + mov esi,eax + xor edi,ecx + shld eax,eax,5 + add ebp,edi + xor esi,ebx + xor ebx,ecx + add ebp,eax + vpalignr xmm8,xmm1,xmm0,8 + vpxor xmm2,xmm2,xmm6 + add edx,DWORD PTR[32+rsp] + and esi,ebx + xor ebx,ecx + shrd eax,eax,7 + vpxor xmm2,xmm2,xmm3 + mov edi,ebp + xor esi,ebx + vpaddd xmm9,xmm11,xmm1 + shld ebp,ebp,5 + add edx,esi + vpxor xmm2,xmm2,xmm8 + xor edi,eax + xor eax,ebx + add edx,ebp + add ecx,DWORD PTR[36+rsp] + vpsrld xmm8,xmm2,30 + vmovdqa XMMWORD PTR[16+rsp],xmm9 + and edi,eax + xor eax,ebx + shrd ebp,ebp,7 + mov esi,edx + vpslld xmm2,xmm2,2 + xor edi,eax + shld edx,edx,5 + add ecx,edi + xor esi,ebp + xor ebp,eax + add ecx,edx + add ebx,DWORD PTR[40+rsp] + and esi,ebp + vpor xmm2,xmm2,xmm8 + xor ebp,eax + shrd edx,edx,7 + mov edi,ecx + xor esi,ebp + shld ecx,ecx,5 + add ebx,esi + xor edi,edx + xor edx,ebp + add ebx,ecx + add eax,DWORD PTR[44+rsp] + and edi,edx + xor edx,ebp + shrd ecx,ecx,7 + mov esi,ebx + xor edi,edx + shld ebx,ebx,5 + add eax,edi + xor esi,edx + add eax,ebx + vpalignr xmm8,xmm2,xmm1,8 + vpxor xmm3,xmm3,xmm7 + add ebp,DWORD PTR[48+rsp] + xor esi,ecx + mov edi,eax + shld eax,eax,5 + vpxor xmm3,xmm3,xmm4 + add ebp,esi + xor edi,ecx + vpaddd xmm9,xmm11,xmm2 + shrd ebx,ebx,7 + add ebp,eax + vpxor xmm3,xmm3,xmm8 + add edx,DWORD PTR[52+rsp] + xor edi,ebx + mov esi,ebp + shld ebp,ebp,5 + vpsrld xmm8,xmm3,30 + vmovdqa XMMWORD PTR[32+rsp],xmm9 + add edx,edi + xor esi,ebx + shrd eax,eax,7 + add edx,ebp + vpslld xmm3,xmm3,2 + add ecx,DWORD PTR[56+rsp] + xor esi,eax + mov edi,edx + shld edx,edx,5 + add ecx,esi + xor edi,eax + shrd ebp,ebp,7 + add ecx,edx + vpor xmm3,xmm3,xmm8 + add ebx,DWORD PTR[60+rsp] + xor edi,ebp + mov esi,ecx + shld ecx,ecx,5 + add ebx,edi + xor esi,ebp + shrd edx,edx,7 + add ebx,ecx + add eax,DWORD PTR[rsp] + vpaddd xmm9,xmm11,xmm3 + xor esi,edx + mov edi,ebx + shld ebx,ebx,5 + add eax,esi + vmovdqa XMMWORD PTR[48+rsp],xmm9 + xor edi,edx + shrd ecx,ecx,7 + add eax,ebx + add ebp,DWORD PTR[4+rsp] + xor edi,ecx + mov esi,eax + shld eax,eax,5 + add ebp,edi + xor esi,ecx + shrd ebx,ebx,7 + add ebp,eax + add edx,DWORD PTR[8+rsp] + xor esi,ebx + mov edi,ebp + shld ebp,ebp,5 + add edx,esi + xor edi,ebx + shrd eax,eax,7 + add edx,ebp + add ecx,DWORD PTR[12+rsp] + xor edi,eax + mov esi,edx + shld edx,edx,5 + add ecx,edi + xor esi,eax + shrd ebp,ebp,7 + add ecx,edx + cmp r9,r10 + je $L$done_avx + vmovdqa xmm6,XMMWORD PTR[64+r11] + vmovdqa xmm11,XMMWORD PTR[((-64))+r11] + vmovdqu xmm0,XMMWORD PTR[r9] + vmovdqu xmm1,XMMWORD PTR[16+r9] + vmovdqu xmm2,XMMWORD PTR[32+r9] + vmovdqu xmm3,XMMWORD PTR[48+r9] + vpshufb xmm0,xmm0,xmm6 + add r9,64 + add ebx,DWORD PTR[16+rsp] + xor esi,ebp + vpshufb xmm1,xmm1,xmm6 + mov edi,ecx + shld ecx,ecx,5 + vpaddd xmm4,xmm0,xmm11 + add ebx,esi + xor edi,ebp + shrd edx,edx,7 + add ebx,ecx + vmovdqa XMMWORD PTR[rsp],xmm4 + add eax,DWORD PTR[20+rsp] + xor edi,edx + mov esi,ebx + shld ebx,ebx,5 + add eax,edi + xor esi,edx + shrd ecx,ecx,7 + add eax,ebx + add ebp,DWORD PTR[24+rsp] + xor esi,ecx + mov edi,eax + shld eax,eax,5 + add ebp,esi + xor edi,ecx + shrd ebx,ebx,7 + add ebp,eax + add edx,DWORD PTR[28+rsp] + xor edi,ebx + mov esi,ebp + shld ebp,ebp,5 + add edx,edi + xor esi,ebx + shrd eax,eax,7 + add edx,ebp + add ecx,DWORD PTR[32+rsp] + xor esi,eax + vpshufb xmm2,xmm2,xmm6 + mov edi,edx + shld edx,edx,5 + vpaddd xmm5,xmm1,xmm11 + add ecx,esi + xor edi,eax + shrd ebp,ebp,7 + add ecx,edx + vmovdqa XMMWORD PTR[16+rsp],xmm5 + add ebx,DWORD PTR[36+rsp] + xor edi,ebp + mov esi,ecx + shld ecx,ecx,5 + add ebx,edi + xor esi,ebp + shrd edx,edx,7 + add ebx,ecx + add eax,DWORD PTR[40+rsp] + xor esi,edx + mov edi,ebx + shld ebx,ebx,5 + add eax,esi + xor edi,edx + shrd ecx,ecx,7 + add eax,ebx + add ebp,DWORD PTR[44+rsp] + xor edi,ecx + mov esi,eax + shld eax,eax,5 + add ebp,edi + xor esi,ecx + shrd ebx,ebx,7 + add ebp,eax + add edx,DWORD PTR[48+rsp] + xor esi,ebx + vpshufb xmm3,xmm3,xmm6 + mov edi,ebp + shld ebp,ebp,5 + vpaddd xmm6,xmm2,xmm11 + add edx,esi + xor edi,ebx + shrd eax,eax,7 + add edx,ebp + vmovdqa XMMWORD PTR[32+rsp],xmm6 + add ecx,DWORD PTR[52+rsp] + xor edi,eax + mov esi,edx + shld edx,edx,5 + add ecx,edi + xor esi,eax + shrd ebp,ebp,7 + add ecx,edx + add ebx,DWORD PTR[56+rsp] + xor esi,ebp + mov edi,ecx + shld ecx,ecx,5 + add ebx,esi + xor edi,ebp + shrd edx,edx,7 + add ebx,ecx + add eax,DWORD PTR[60+rsp] + xor edi,edx + mov esi,ebx + shld ebx,ebx,5 + add eax,edi + shrd ecx,ecx,7 + add eax,ebx + add eax,DWORD PTR[r8] + add esi,DWORD PTR[4+r8] + add ecx,DWORD PTR[8+r8] + add edx,DWORD PTR[12+r8] + mov DWORD PTR[r8],eax + add ebp,DWORD PTR[16+r8] + mov DWORD PTR[4+r8],esi + mov ebx,esi + mov DWORD PTR[8+r8],ecx + mov edi,ecx + mov DWORD PTR[12+r8],edx + xor edi,edx + mov DWORD PTR[16+r8],ebp + and esi,edi + jmp $L$oop_avx + +ALIGN 16 +$L$done_avx:: + add ebx,DWORD PTR[16+rsp] + xor esi,ebp + mov edi,ecx + shld ecx,ecx,5 + add ebx,esi + xor edi,ebp + shrd edx,edx,7 + add ebx,ecx + add eax,DWORD PTR[20+rsp] + xor edi,edx + mov esi,ebx + shld ebx,ebx,5 + add eax,edi + xor esi,edx + shrd ecx,ecx,7 + add eax,ebx + add ebp,DWORD PTR[24+rsp] + xor esi,ecx + mov edi,eax + shld eax,eax,5 + add ebp,esi + xor edi,ecx + shrd ebx,ebx,7 + add ebp,eax + add edx,DWORD PTR[28+rsp] + xor edi,ebx + mov esi,ebp + shld ebp,ebp,5 + add edx,edi + xor esi,ebx + shrd eax,eax,7 + add edx,ebp + add ecx,DWORD PTR[32+rsp] + xor esi,eax + mov edi,edx + shld edx,edx,5 + add ecx,esi + xor edi,eax + shrd ebp,ebp,7 + add ecx,edx + add ebx,DWORD PTR[36+rsp] + xor edi,ebp + mov esi,ecx + shld ecx,ecx,5 + add ebx,edi + xor esi,ebp + shrd edx,edx,7 + add ebx,ecx + add eax,DWORD PTR[40+rsp] + xor esi,edx + mov edi,ebx + shld ebx,ebx,5 + add eax,esi + xor edi,edx + shrd ecx,ecx,7 + add eax,ebx + add ebp,DWORD PTR[44+rsp] + xor edi,ecx + mov esi,eax + shld eax,eax,5 + add ebp,edi + xor esi,ecx + shrd ebx,ebx,7 + add ebp,eax + add edx,DWORD PTR[48+rsp] + xor esi,ebx + mov edi,ebp + shld ebp,ebp,5 + add edx,esi + xor edi,ebx + shrd eax,eax,7 + add edx,ebp + add ecx,DWORD PTR[52+rsp] + xor edi,eax + mov esi,edx + shld edx,edx,5 + add ecx,edi + xor esi,eax + shrd ebp,ebp,7 + add ecx,edx + add ebx,DWORD PTR[56+rsp] + xor esi,ebp + mov edi,ecx + shld ecx,ecx,5 + add ebx,esi + xor edi,ebp + shrd edx,edx,7 + add ebx,ecx + add eax,DWORD PTR[60+rsp] + xor edi,edx + mov esi,ebx + shld ebx,ebx,5 + add eax,edi + shrd ecx,ecx,7 + add eax,ebx + vzeroupper + + add eax,DWORD PTR[r8] + add esi,DWORD PTR[4+r8] + add ecx,DWORD PTR[8+r8] + mov DWORD PTR[r8],eax + add edx,DWORD PTR[12+r8] + mov DWORD PTR[4+r8],esi + add ebp,DWORD PTR[16+r8] + mov DWORD PTR[8+r8],ecx + mov DWORD PTR[12+r8],edx + mov DWORD PTR[16+r8],ebp + movaps xmm6,XMMWORD PTR[((-40-96))+r14] + movaps xmm7,XMMWORD PTR[((-40-80))+r14] + movaps xmm8,XMMWORD PTR[((-40-64))+r14] + movaps xmm9,XMMWORD PTR[((-40-48))+r14] + movaps xmm10,XMMWORD PTR[((-40-32))+r14] + movaps xmm11,XMMWORD PTR[((-40-16))+r14] + lea rsi,QWORD PTR[r14] + mov r14,QWORD PTR[((-40))+rsi] + mov r13,QWORD PTR[((-32))+rsi] + mov r12,QWORD PTR[((-24))+rsi] + mov rbp,QWORD PTR[((-16))+rsi] + mov rbx,QWORD PTR[((-8))+rsi] + lea rsp,QWORD PTR[rsi] +$L$epilogue_avx:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_sha1_block_data_order_avx:: +sha1_block_data_order_avx ENDP + +ALIGN 16 +sha1_block_data_order_avx2 PROC PRIVATE + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_sha1_block_data_order_avx2:: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + + +_avx2_shortcut:: + mov rax,rsp + push rbx + push rbp + push r12 + push r13 + push r14 + vzeroupper + lea rsp,QWORD PTR[((-96))+rsp] + vmovaps XMMWORD PTR[(-40-96)+rax],xmm6 + vmovaps XMMWORD PTR[(-40-80)+rax],xmm7 + vmovaps XMMWORD PTR[(-40-64)+rax],xmm8 + vmovaps XMMWORD PTR[(-40-48)+rax],xmm9 + vmovaps XMMWORD PTR[(-40-32)+rax],xmm10 + vmovaps XMMWORD PTR[(-40-16)+rax],xmm11 +$L$prologue_avx2:: + mov r14,rax + mov r8,rdi + mov r9,rsi + mov r10,rdx + + lea rsp,QWORD PTR[((-640))+rsp] + shl r10,6 + lea r13,QWORD PTR[64+r9] + and rsp,-128 + add r10,r9 + lea r11,QWORD PTR[((K_XX_XX+64))] + + mov eax,DWORD PTR[r8] + cmp r13,r10 + cmovae r13,r9 + mov ebp,DWORD PTR[4+r8] + mov ecx,DWORD PTR[8+r8] + mov edx,DWORD PTR[12+r8] + mov esi,DWORD PTR[16+r8] + vmovdqu ymm6,YMMWORD PTR[64+r11] + + vmovdqu xmm0,XMMWORD PTR[r9] + vmovdqu xmm1,XMMWORD PTR[16+r9] + vmovdqu xmm2,XMMWORD PTR[32+r9] + vmovdqu xmm3,XMMWORD PTR[48+r9] + lea r9,QWORD PTR[64+r9] + vinserti128 ymm0,ymm0,XMMWORD PTR[r13],1 + vinserti128 ymm1,ymm1,XMMWORD PTR[16+r13],1 + vpshufb ymm0,ymm0,ymm6 + vinserti128 ymm2,ymm2,XMMWORD PTR[32+r13],1 + vpshufb ymm1,ymm1,ymm6 + vinserti128 ymm3,ymm3,XMMWORD PTR[48+r13],1 + vpshufb ymm2,ymm2,ymm6 + vmovdqu ymm11,YMMWORD PTR[((-64))+r11] + vpshufb ymm3,ymm3,ymm6 + + vpaddd ymm4,ymm0,ymm11 + vpaddd ymm5,ymm1,ymm11 + vmovdqu YMMWORD PTR[rsp],ymm4 + vpaddd ymm6,ymm2,ymm11 + vmovdqu YMMWORD PTR[32+rsp],ymm5 + vpaddd ymm7,ymm3,ymm11 + vmovdqu YMMWORD PTR[64+rsp],ymm6 + vmovdqu YMMWORD PTR[96+rsp],ymm7 + vpalignr ymm4,ymm1,ymm0,8 + vpsrldq ymm8,ymm3,4 + vpxor ymm4,ymm4,ymm0 + vpxor ymm8,ymm8,ymm2 + vpxor ymm4,ymm4,ymm8 + vpsrld ymm8,ymm4,31 + vpslldq ymm10,ymm4,12 + vpaddd ymm4,ymm4,ymm4 + vpsrld ymm9,ymm10,30 + vpor ymm4,ymm4,ymm8 + vpslld ymm10,ymm10,2 + vpxor ymm4,ymm4,ymm9 + vpxor ymm4,ymm4,ymm10 + vpaddd ymm9,ymm4,ymm11 + vmovdqu YMMWORD PTR[128+rsp],ymm9 + vpalignr ymm5,ymm2,ymm1,8 + vpsrldq ymm8,ymm4,4 + vpxor ymm5,ymm5,ymm1 + vpxor ymm8,ymm8,ymm3 + vpxor ymm5,ymm5,ymm8 + vpsrld ymm8,ymm5,31 + vmovdqu ymm11,YMMWORD PTR[((-32))+r11] + vpslldq ymm10,ymm5,12 + vpaddd ymm5,ymm5,ymm5 + vpsrld ymm9,ymm10,30 + vpor ymm5,ymm5,ymm8 + vpslld ymm10,ymm10,2 + vpxor ymm5,ymm5,ymm9 + vpxor ymm5,ymm5,ymm10 + vpaddd ymm9,ymm5,ymm11 + vmovdqu YMMWORD PTR[160+rsp],ymm9 + vpalignr ymm6,ymm3,ymm2,8 + vpsrldq ymm8,ymm5,4 + vpxor ymm6,ymm6,ymm2 + vpxor ymm8,ymm8,ymm4 + vpxor ymm6,ymm6,ymm8 + vpsrld ymm8,ymm6,31 + vpslldq ymm10,ymm6,12 + vpaddd ymm6,ymm6,ymm6 + vpsrld ymm9,ymm10,30 + vpor ymm6,ymm6,ymm8 + vpslld ymm10,ymm10,2 + vpxor ymm6,ymm6,ymm9 + vpxor ymm6,ymm6,ymm10 + vpaddd ymm9,ymm6,ymm11 + vmovdqu YMMWORD PTR[192+rsp],ymm9 + vpalignr ymm7,ymm4,ymm3,8 + vpsrldq ymm8,ymm6,4 + vpxor ymm7,ymm7,ymm3 + vpxor ymm8,ymm8,ymm5 + vpxor ymm7,ymm7,ymm8 + vpsrld ymm8,ymm7,31 + vpslldq ymm10,ymm7,12 + vpaddd ymm7,ymm7,ymm7 + vpsrld ymm9,ymm10,30 + vpor ymm7,ymm7,ymm8 + vpslld ymm10,ymm10,2 + vpxor ymm7,ymm7,ymm9 + vpxor ymm7,ymm7,ymm10 + vpaddd ymm9,ymm7,ymm11 + vmovdqu YMMWORD PTR[224+rsp],ymm9 + lea r13,QWORD PTR[128+rsp] + jmp $L$oop_avx2 +ALIGN 32 +$L$oop_avx2:: + rorx ebx,ebp,2 + andn edi,ebp,edx + and ebp,ecx + xor ebp,edi + jmp $L$align32_1 +ALIGN 32 +$L$align32_1:: + vpalignr ymm8,ymm7,ymm6,8 + vpxor ymm0,ymm0,ymm4 + add esi,DWORD PTR[((-128))+r13] + andn edi,eax,ecx + vpxor ymm0,ymm0,ymm1 + add esi,ebp + rorx r12d,eax,27 + rorx ebp,eax,2 + vpxor ymm0,ymm0,ymm8 + and eax,ebx + add esi,r12d + xor eax,edi + vpsrld ymm8,ymm0,30 + vpslld ymm0,ymm0,2 + add edx,DWORD PTR[((-124))+r13] + andn edi,esi,ebx + add edx,eax + rorx r12d,esi,27 + rorx eax,esi,2 + and esi,ebp + vpor ymm0,ymm0,ymm8 + add edx,r12d + xor esi,edi + add ecx,DWORD PTR[((-120))+r13] + andn edi,edx,ebp + vpaddd ymm9,ymm0,ymm11 + add ecx,esi + rorx r12d,edx,27 + rorx esi,edx,2 + and edx,eax + vmovdqu YMMWORD PTR[256+rsp],ymm9 + add ecx,r12d + xor edx,edi + add ebx,DWORD PTR[((-116))+r13] + andn edi,ecx,eax + add ebx,edx + rorx r12d,ecx,27 + rorx edx,ecx,2 + and ecx,esi + add ebx,r12d + xor ecx,edi + add ebp,DWORD PTR[((-96))+r13] + andn edi,ebx,esi + add ebp,ecx + rorx r12d,ebx,27 + rorx ecx,ebx,2 + and ebx,edx + add ebp,r12d + xor ebx,edi + vpalignr ymm8,ymm0,ymm7,8 + vpxor ymm1,ymm1,ymm5 + add eax,DWORD PTR[((-92))+r13] + andn edi,ebp,edx + vpxor ymm1,ymm1,ymm2 + add eax,ebx + rorx r12d,ebp,27 + rorx ebx,ebp,2 + vpxor ymm1,ymm1,ymm8 + and ebp,ecx + add eax,r12d + xor ebp,edi + vpsrld ymm8,ymm1,30 + vpslld ymm1,ymm1,2 + add esi,DWORD PTR[((-88))+r13] + andn edi,eax,ecx + add esi,ebp + rorx r12d,eax,27 + rorx ebp,eax,2 + and eax,ebx + vpor ymm1,ymm1,ymm8 + add esi,r12d + xor eax,edi + add edx,DWORD PTR[((-84))+r13] + andn edi,esi,ebx + vpaddd ymm9,ymm1,ymm11 + add edx,eax + rorx r12d,esi,27 + rorx eax,esi,2 + and esi,ebp + vmovdqu YMMWORD PTR[288+rsp],ymm9 + add edx,r12d + xor esi,edi + add ecx,DWORD PTR[((-64))+r13] + andn edi,edx,ebp + add ecx,esi + rorx r12d,edx,27 + rorx esi,edx,2 + and edx,eax + add ecx,r12d + xor edx,edi + add ebx,DWORD PTR[((-60))+r13] + andn edi,ecx,eax + add ebx,edx + rorx r12d,ecx,27 + rorx edx,ecx,2 + and ecx,esi + add ebx,r12d + xor ecx,edi + vpalignr ymm8,ymm1,ymm0,8 + vpxor ymm2,ymm2,ymm6 + add ebp,DWORD PTR[((-56))+r13] + andn edi,ebx,esi + vpxor ymm2,ymm2,ymm3 + vmovdqu ymm11,YMMWORD PTR[r11] + add ebp,ecx + rorx r12d,ebx,27 + rorx ecx,ebx,2 + vpxor ymm2,ymm2,ymm8 + and ebx,edx + add ebp,r12d + xor ebx,edi + vpsrld ymm8,ymm2,30 + vpslld ymm2,ymm2,2 + add eax,DWORD PTR[((-52))+r13] + andn edi,ebp,edx + add eax,ebx + rorx r12d,ebp,27 + rorx ebx,ebp,2 + and ebp,ecx + vpor ymm2,ymm2,ymm8 + add eax,r12d + xor ebp,edi + add esi,DWORD PTR[((-32))+r13] + andn edi,eax,ecx + vpaddd ymm9,ymm2,ymm11 + add esi,ebp + rorx r12d,eax,27 + rorx ebp,eax,2 + and eax,ebx + vmovdqu YMMWORD PTR[320+rsp],ymm9 + add esi,r12d + xor eax,edi + add edx,DWORD PTR[((-28))+r13] + andn edi,esi,ebx + add edx,eax + rorx r12d,esi,27 + rorx eax,esi,2 + and esi,ebp + add edx,r12d + xor esi,edi + add ecx,DWORD PTR[((-24))+r13] + andn edi,edx,ebp + add ecx,esi + rorx r12d,edx,27 + rorx esi,edx,2 + and edx,eax + add ecx,r12d + xor edx,edi + vpalignr ymm8,ymm2,ymm1,8 + vpxor ymm3,ymm3,ymm7 + add ebx,DWORD PTR[((-20))+r13] + andn edi,ecx,eax + vpxor ymm3,ymm3,ymm4 + add ebx,edx + rorx r12d,ecx,27 + rorx edx,ecx,2 + vpxor ymm3,ymm3,ymm8 + and ecx,esi + add ebx,r12d + xor ecx,edi + vpsrld ymm8,ymm3,30 + vpslld ymm3,ymm3,2 + add ebp,DWORD PTR[r13] + andn edi,ebx,esi + add ebp,ecx + rorx r12d,ebx,27 + rorx ecx,ebx,2 + and ebx,edx + vpor ymm3,ymm3,ymm8 + add ebp,r12d + xor ebx,edi + add eax,DWORD PTR[4+r13] + andn edi,ebp,edx + vpaddd ymm9,ymm3,ymm11 + add eax,ebx + rorx r12d,ebp,27 + rorx ebx,ebp,2 + and ebp,ecx + vmovdqu YMMWORD PTR[352+rsp],ymm9 + add eax,r12d + xor ebp,edi + add esi,DWORD PTR[8+r13] + andn edi,eax,ecx + add esi,ebp + rorx r12d,eax,27 + rorx ebp,eax,2 + and eax,ebx + add esi,r12d + xor eax,edi + add edx,DWORD PTR[12+r13] + lea edx,DWORD PTR[rax*1+rdx] + rorx r12d,esi,27 + rorx eax,esi,2 + xor esi,ebp + add edx,r12d + xor esi,ebx + vpalignr ymm8,ymm3,ymm2,8 + vpxor ymm4,ymm4,ymm0 + add ecx,DWORD PTR[32+r13] + lea ecx,DWORD PTR[rsi*1+rcx] + vpxor ymm4,ymm4,ymm5 + rorx r12d,edx,27 + rorx esi,edx,2 + xor edx,eax + vpxor ymm4,ymm4,ymm8 + add ecx,r12d + xor edx,ebp + add ebx,DWORD PTR[36+r13] + vpsrld ymm8,ymm4,30 + vpslld ymm4,ymm4,2 + lea ebx,DWORD PTR[rdx*1+rbx] + rorx r12d,ecx,27 + rorx edx,ecx,2 + xor ecx,esi + add ebx,r12d + xor ecx,eax + vpor ymm4,ymm4,ymm8 + add ebp,DWORD PTR[40+r13] + lea ebp,DWORD PTR[rbp*1+rcx] + rorx r12d,ebx,27 + rorx ecx,ebx,2 + vpaddd ymm9,ymm4,ymm11 + xor ebx,edx + add ebp,r12d + xor ebx,esi + add eax,DWORD PTR[44+r13] + vmovdqu YMMWORD PTR[384+rsp],ymm9 + lea eax,DWORD PTR[rbx*1+rax] + rorx r12d,ebp,27 + rorx ebx,ebp,2 + xor ebp,ecx + add eax,r12d + xor ebp,edx + add esi,DWORD PTR[64+r13] + lea esi,DWORD PTR[rbp*1+rsi] + rorx r12d,eax,27 + rorx ebp,eax,2 + xor eax,ebx + add esi,r12d + xor eax,ecx + vpalignr ymm8,ymm4,ymm3,8 + vpxor ymm5,ymm5,ymm1 + add edx,DWORD PTR[68+r13] + lea edx,DWORD PTR[rax*1+rdx] + vpxor ymm5,ymm5,ymm6 + rorx r12d,esi,27 + rorx eax,esi,2 + xor esi,ebp + vpxor ymm5,ymm5,ymm8 + add edx,r12d + xor esi,ebx + add ecx,DWORD PTR[72+r13] + vpsrld ymm8,ymm5,30 + vpslld ymm5,ymm5,2 + lea ecx,DWORD PTR[rsi*1+rcx] + rorx r12d,edx,27 + rorx esi,edx,2 + xor edx,eax + add ecx,r12d + xor edx,ebp + vpor ymm5,ymm5,ymm8 + add ebx,DWORD PTR[76+r13] + lea ebx,DWORD PTR[rdx*1+rbx] + rorx r12d,ecx,27 + rorx edx,ecx,2 + vpaddd ymm9,ymm5,ymm11 + xor ecx,esi + add ebx,r12d + xor ecx,eax + add ebp,DWORD PTR[96+r13] + vmovdqu YMMWORD PTR[416+rsp],ymm9 + lea ebp,DWORD PTR[rbp*1+rcx] + rorx r12d,ebx,27 + rorx ecx,ebx,2 + xor ebx,edx + add ebp,r12d + xor ebx,esi + add eax,DWORD PTR[100+r13] + lea eax,DWORD PTR[rbx*1+rax] + rorx r12d,ebp,27 + rorx ebx,ebp,2 + xor ebp,ecx + add eax,r12d + xor ebp,edx + vpalignr ymm8,ymm5,ymm4,8 + vpxor ymm6,ymm6,ymm2 + add esi,DWORD PTR[104+r13] + lea esi,DWORD PTR[rbp*1+rsi] + vpxor ymm6,ymm6,ymm7 + rorx r12d,eax,27 + rorx ebp,eax,2 + xor eax,ebx + vpxor ymm6,ymm6,ymm8 + add esi,r12d + xor eax,ecx + add edx,DWORD PTR[108+r13] + lea r13,QWORD PTR[256+r13] + vpsrld ymm8,ymm6,30 + vpslld ymm6,ymm6,2 + lea edx,DWORD PTR[rax*1+rdx] + rorx r12d,esi,27 + rorx eax,esi,2 + xor esi,ebp + add edx,r12d + xor esi,ebx + vpor ymm6,ymm6,ymm8 + add ecx,DWORD PTR[((-128))+r13] + lea ecx,DWORD PTR[rsi*1+rcx] + rorx r12d,edx,27 + rorx esi,edx,2 + vpaddd ymm9,ymm6,ymm11 + xor edx,eax + add ecx,r12d + xor edx,ebp + add ebx,DWORD PTR[((-124))+r13] + vmovdqu YMMWORD PTR[448+rsp],ymm9 + lea ebx,DWORD PTR[rdx*1+rbx] + rorx r12d,ecx,27 + rorx edx,ecx,2 + xor ecx,esi + add ebx,r12d + xor ecx,eax + add ebp,DWORD PTR[((-120))+r13] + lea ebp,DWORD PTR[rbp*1+rcx] + rorx r12d,ebx,27 + rorx ecx,ebx,2 + xor ebx,edx + add ebp,r12d + xor ebx,esi + vpalignr ymm8,ymm6,ymm5,8 + vpxor ymm7,ymm7,ymm3 + add eax,DWORD PTR[((-116))+r13] + lea eax,DWORD PTR[rbx*1+rax] + vpxor ymm7,ymm7,ymm0 + vmovdqu ymm11,YMMWORD PTR[32+r11] + rorx r12d,ebp,27 + rorx ebx,ebp,2 + xor ebp,ecx + vpxor ymm7,ymm7,ymm8 + add eax,r12d + xor ebp,edx + add esi,DWORD PTR[((-96))+r13] + vpsrld ymm8,ymm7,30 + vpslld ymm7,ymm7,2 + lea esi,DWORD PTR[rbp*1+rsi] + rorx r12d,eax,27 + rorx ebp,eax,2 + xor eax,ebx + add esi,r12d + xor eax,ecx + vpor ymm7,ymm7,ymm8 + add edx,DWORD PTR[((-92))+r13] + lea edx,DWORD PTR[rax*1+rdx] + rorx r12d,esi,27 + rorx eax,esi,2 + vpaddd ymm9,ymm7,ymm11 + xor esi,ebp + add edx,r12d + xor esi,ebx + add ecx,DWORD PTR[((-88))+r13] + vmovdqu YMMWORD PTR[480+rsp],ymm9 + lea ecx,DWORD PTR[rsi*1+rcx] + rorx r12d,edx,27 + rorx esi,edx,2 + xor edx,eax + add ecx,r12d + xor edx,ebp + add ebx,DWORD PTR[((-84))+r13] + mov edi,esi + xor edi,eax + lea ebx,DWORD PTR[rdx*1+rbx] + rorx r12d,ecx,27 + rorx edx,ecx,2 + xor ecx,esi + add ebx,r12d + and ecx,edi + jmp $L$align32_2 +ALIGN 32 +$L$align32_2:: + vpalignr ymm8,ymm7,ymm6,8 + vpxor ymm0,ymm0,ymm4 + add ebp,DWORD PTR[((-64))+r13] + xor ecx,esi + vpxor ymm0,ymm0,ymm1 + mov edi,edx + xor edi,esi + lea ebp,DWORD PTR[rbp*1+rcx] + vpxor ymm0,ymm0,ymm8 + rorx r12d,ebx,27 + rorx ecx,ebx,2 + xor ebx,edx + vpsrld ymm8,ymm0,30 + vpslld ymm0,ymm0,2 + add ebp,r12d + and ebx,edi + add eax,DWORD PTR[((-60))+r13] + xor ebx,edx + mov edi,ecx + xor edi,edx + vpor ymm0,ymm0,ymm8 + lea eax,DWORD PTR[rbx*1+rax] + rorx r12d,ebp,27 + rorx ebx,ebp,2 + xor ebp,ecx + vpaddd ymm9,ymm0,ymm11 + add eax,r12d + and ebp,edi + add esi,DWORD PTR[((-56))+r13] + xor ebp,ecx + vmovdqu YMMWORD PTR[512+rsp],ymm9 + mov edi,ebx + xor edi,ecx + lea esi,DWORD PTR[rbp*1+rsi] + rorx r12d,eax,27 + rorx ebp,eax,2 + xor eax,ebx + add esi,r12d + and eax,edi + add edx,DWORD PTR[((-52))+r13] + xor eax,ebx + mov edi,ebp + xor edi,ebx + lea edx,DWORD PTR[rax*1+rdx] + rorx r12d,esi,27 + rorx eax,esi,2 + xor esi,ebp + add edx,r12d + and esi,edi + add ecx,DWORD PTR[((-32))+r13] + xor esi,ebp + mov edi,eax + xor edi,ebp + lea ecx,DWORD PTR[rsi*1+rcx] + rorx r12d,edx,27 + rorx esi,edx,2 + xor edx,eax + add ecx,r12d + and edx,edi + vpalignr ymm8,ymm0,ymm7,8 + vpxor ymm1,ymm1,ymm5 + add ebx,DWORD PTR[((-28))+r13] + xor edx,eax + vpxor ymm1,ymm1,ymm2 + mov edi,esi + xor edi,eax + lea ebx,DWORD PTR[rdx*1+rbx] + vpxor ymm1,ymm1,ymm8 + rorx r12d,ecx,27 + rorx edx,ecx,2 + xor ecx,esi + vpsrld ymm8,ymm1,30 + vpslld ymm1,ymm1,2 + add ebx,r12d + and ecx,edi + add ebp,DWORD PTR[((-24))+r13] + xor ecx,esi + mov edi,edx + xor edi,esi + vpor ymm1,ymm1,ymm8 + lea ebp,DWORD PTR[rbp*1+rcx] + rorx r12d,ebx,27 + rorx ecx,ebx,2 + xor ebx,edx + vpaddd ymm9,ymm1,ymm11 + add ebp,r12d + and ebx,edi + add eax,DWORD PTR[((-20))+r13] + xor ebx,edx + vmovdqu YMMWORD PTR[544+rsp],ymm9 + mov edi,ecx + xor edi,edx + lea eax,DWORD PTR[rbx*1+rax] + rorx r12d,ebp,27 + rorx ebx,ebp,2 + xor ebp,ecx + add eax,r12d + and ebp,edi + add esi,DWORD PTR[r13] + xor ebp,ecx + mov edi,ebx + xor edi,ecx + lea esi,DWORD PTR[rbp*1+rsi] + rorx r12d,eax,27 + rorx ebp,eax,2 + xor eax,ebx + add esi,r12d + and eax,edi + add edx,DWORD PTR[4+r13] + xor eax,ebx + mov edi,ebp + xor edi,ebx + lea edx,DWORD PTR[rax*1+rdx] + rorx r12d,esi,27 + rorx eax,esi,2 + xor esi,ebp + add edx,r12d + and esi,edi + vpalignr ymm8,ymm1,ymm0,8 + vpxor ymm2,ymm2,ymm6 + add ecx,DWORD PTR[8+r13] + xor esi,ebp + vpxor ymm2,ymm2,ymm3 + mov edi,eax + xor edi,ebp + lea ecx,DWORD PTR[rsi*1+rcx] + vpxor ymm2,ymm2,ymm8 + rorx r12d,edx,27 + rorx esi,edx,2 + xor edx,eax + vpsrld ymm8,ymm2,30 + vpslld ymm2,ymm2,2 + add ecx,r12d + and edx,edi + add ebx,DWORD PTR[12+r13] + xor edx,eax + mov edi,esi + xor edi,eax + vpor ymm2,ymm2,ymm8 + lea ebx,DWORD PTR[rdx*1+rbx] + rorx r12d,ecx,27 + rorx edx,ecx,2 + xor ecx,esi + vpaddd ymm9,ymm2,ymm11 + add ebx,r12d + and ecx,edi + add ebp,DWORD PTR[32+r13] + xor ecx,esi + vmovdqu YMMWORD PTR[576+rsp],ymm9 + mov edi,edx + xor edi,esi + lea ebp,DWORD PTR[rbp*1+rcx] + rorx r12d,ebx,27 + rorx ecx,ebx,2 + xor ebx,edx + add ebp,r12d + and ebx,edi + add eax,DWORD PTR[36+r13] + xor ebx,edx + mov edi,ecx + xor edi,edx + lea eax,DWORD PTR[rbx*1+rax] + rorx r12d,ebp,27 + rorx ebx,ebp,2 + xor ebp,ecx + add eax,r12d + and ebp,edi + add esi,DWORD PTR[40+r13] + xor ebp,ecx + mov edi,ebx + xor edi,ecx + lea esi,DWORD PTR[rbp*1+rsi] + rorx r12d,eax,27 + rorx ebp,eax,2 + xor eax,ebx + add esi,r12d + and eax,edi + vpalignr ymm8,ymm2,ymm1,8 + vpxor ymm3,ymm3,ymm7 + add edx,DWORD PTR[44+r13] + xor eax,ebx + vpxor ymm3,ymm3,ymm4 + mov edi,ebp + xor edi,ebx + lea edx,DWORD PTR[rax*1+rdx] + vpxor ymm3,ymm3,ymm8 + rorx r12d,esi,27 + rorx eax,esi,2 + xor esi,ebp + vpsrld ymm8,ymm3,30 + vpslld ymm3,ymm3,2 + add edx,r12d + and esi,edi + add ecx,DWORD PTR[64+r13] + xor esi,ebp + mov edi,eax + xor edi,ebp + vpor ymm3,ymm3,ymm8 + lea ecx,DWORD PTR[rsi*1+rcx] + rorx r12d,edx,27 + rorx esi,edx,2 + xor edx,eax + vpaddd ymm9,ymm3,ymm11 + add ecx,r12d + and edx,edi + add ebx,DWORD PTR[68+r13] + xor edx,eax + vmovdqu YMMWORD PTR[608+rsp],ymm9 + mov edi,esi + xor edi,eax + lea ebx,DWORD PTR[rdx*1+rbx] + rorx r12d,ecx,27 + rorx edx,ecx,2 + xor ecx,esi + add ebx,r12d + and ecx,edi + add ebp,DWORD PTR[72+r13] + xor ecx,esi + mov edi,edx + xor edi,esi + lea ebp,DWORD PTR[rbp*1+rcx] + rorx r12d,ebx,27 + rorx ecx,ebx,2 + xor ebx,edx + add ebp,r12d + and ebx,edi + add eax,DWORD PTR[76+r13] + xor ebx,edx + lea eax,DWORD PTR[rbx*1+rax] + rorx r12d,ebp,27 + rorx ebx,ebp,2 + xor ebp,ecx + add eax,r12d + xor ebp,edx + add esi,DWORD PTR[96+r13] + lea esi,DWORD PTR[rbp*1+rsi] + rorx r12d,eax,27 + rorx ebp,eax,2 + xor eax,ebx + add esi,r12d + xor eax,ecx + add edx,DWORD PTR[100+r13] + lea edx,DWORD PTR[rax*1+rdx] + rorx r12d,esi,27 + rorx eax,esi,2 + xor esi,ebp + add edx,r12d + xor esi,ebx + add ecx,DWORD PTR[104+r13] + lea ecx,DWORD PTR[rsi*1+rcx] + rorx r12d,edx,27 + rorx esi,edx,2 + xor edx,eax + add ecx,r12d + xor edx,ebp + add ebx,DWORD PTR[108+r13] + lea r13,QWORD PTR[256+r13] + lea ebx,DWORD PTR[rdx*1+rbx] + rorx r12d,ecx,27 + rorx edx,ecx,2 + xor ecx,esi + add ebx,r12d + xor ecx,eax + add ebp,DWORD PTR[((-128))+r13] + lea ebp,DWORD PTR[rbp*1+rcx] + rorx r12d,ebx,27 + rorx ecx,ebx,2 + xor ebx,edx + add ebp,r12d + xor ebx,esi + add eax,DWORD PTR[((-124))+r13] + lea eax,DWORD PTR[rbx*1+rax] + rorx r12d,ebp,27 + rorx ebx,ebp,2 + xor ebp,ecx + add eax,r12d + xor ebp,edx + add esi,DWORD PTR[((-120))+r13] + lea esi,DWORD PTR[rbp*1+rsi] + rorx r12d,eax,27 + rorx ebp,eax,2 + xor eax,ebx + add esi,r12d + xor eax,ecx + add edx,DWORD PTR[((-116))+r13] + lea edx,DWORD PTR[rax*1+rdx] + rorx r12d,esi,27 + rorx eax,esi,2 + xor esi,ebp + add edx,r12d + xor esi,ebx + add ecx,DWORD PTR[((-96))+r13] + lea ecx,DWORD PTR[rsi*1+rcx] + rorx r12d,edx,27 + rorx esi,edx,2 + xor edx,eax + add ecx,r12d + xor edx,ebp + add ebx,DWORD PTR[((-92))+r13] + lea ebx,DWORD PTR[rdx*1+rbx] + rorx r12d,ecx,27 + rorx edx,ecx,2 + xor ecx,esi + add ebx,r12d + xor ecx,eax + add ebp,DWORD PTR[((-88))+r13] + lea ebp,DWORD PTR[rbp*1+rcx] + rorx r12d,ebx,27 + rorx ecx,ebx,2 + xor ebx,edx + add ebp,r12d + xor ebx,esi + add eax,DWORD PTR[((-84))+r13] + lea eax,DWORD PTR[rbx*1+rax] + rorx r12d,ebp,27 + rorx ebx,ebp,2 + xor ebp,ecx + add eax,r12d + xor ebp,edx + add esi,DWORD PTR[((-64))+r13] + lea esi,DWORD PTR[rbp*1+rsi] + rorx r12d,eax,27 + rorx ebp,eax,2 + xor eax,ebx + add esi,r12d + xor eax,ecx + add edx,DWORD PTR[((-60))+r13] + lea edx,DWORD PTR[rax*1+rdx] + rorx r12d,esi,27 + rorx eax,esi,2 + xor esi,ebp + add edx,r12d + xor esi,ebx + add ecx,DWORD PTR[((-56))+r13] + lea ecx,DWORD PTR[rsi*1+rcx] + rorx r12d,edx,27 + rorx esi,edx,2 + xor edx,eax + add ecx,r12d + xor edx,ebp + add ebx,DWORD PTR[((-52))+r13] + lea ebx,DWORD PTR[rdx*1+rbx] + rorx r12d,ecx,27 + rorx edx,ecx,2 + xor ecx,esi + add ebx,r12d + xor ecx,eax + add ebp,DWORD PTR[((-32))+r13] + lea ebp,DWORD PTR[rbp*1+rcx] + rorx r12d,ebx,27 + rorx ecx,ebx,2 + xor ebx,edx + add ebp,r12d + xor ebx,esi + add eax,DWORD PTR[((-28))+r13] + lea eax,DWORD PTR[rbx*1+rax] + rorx r12d,ebp,27 + rorx ebx,ebp,2 + xor ebp,ecx + add eax,r12d + xor ebp,edx + add esi,DWORD PTR[((-24))+r13] + lea esi,DWORD PTR[rbp*1+rsi] + rorx r12d,eax,27 + rorx ebp,eax,2 + xor eax,ebx + add esi,r12d + xor eax,ecx + add edx,DWORD PTR[((-20))+r13] + lea edx,DWORD PTR[rax*1+rdx] + rorx r12d,esi,27 + add edx,r12d + lea r13,QWORD PTR[128+r9] + lea rdi,QWORD PTR[128+r9] + cmp r13,r10 + cmovae r13,r9 + + + add edx,DWORD PTR[r8] + add esi,DWORD PTR[4+r8] + add ebp,DWORD PTR[8+r8] + mov DWORD PTR[r8],edx + add ebx,DWORD PTR[12+r8] + mov DWORD PTR[4+r8],esi + mov eax,edx + add ecx,DWORD PTR[16+r8] + mov r12d,ebp + mov DWORD PTR[8+r8],ebp + mov edx,ebx + + mov DWORD PTR[12+r8],ebx + mov ebp,esi + mov DWORD PTR[16+r8],ecx + + mov esi,ecx + mov ecx,r12d + + + cmp r9,r10 + je $L$done_avx2 + vmovdqu ymm6,YMMWORD PTR[64+r11] + cmp rdi,r10 + ja $L$ast_avx2 + + vmovdqu xmm0,XMMWORD PTR[((-64))+rdi] + vmovdqu xmm1,XMMWORD PTR[((-48))+rdi] + vmovdqu xmm2,XMMWORD PTR[((-32))+rdi] + vmovdqu xmm3,XMMWORD PTR[((-16))+rdi] + vinserti128 ymm0,ymm0,XMMWORD PTR[r13],1 + vinserti128 ymm1,ymm1,XMMWORD PTR[16+r13],1 + vinserti128 ymm2,ymm2,XMMWORD PTR[32+r13],1 + vinserti128 ymm3,ymm3,XMMWORD PTR[48+r13],1 + jmp $L$ast_avx2 + +ALIGN 32 +$L$ast_avx2:: + lea r13,QWORD PTR[((128+16))+rsp] + rorx ebx,ebp,2 + andn edi,ebp,edx + and ebp,ecx + xor ebp,edi + sub r9,-128 + add esi,DWORD PTR[((-128))+r13] + andn edi,eax,ecx + add esi,ebp + rorx r12d,eax,27 + rorx ebp,eax,2 + and eax,ebx + add esi,r12d + xor eax,edi + add edx,DWORD PTR[((-124))+r13] + andn edi,esi,ebx + add edx,eax + rorx r12d,esi,27 + rorx eax,esi,2 + and esi,ebp + add edx,r12d + xor esi,edi + add ecx,DWORD PTR[((-120))+r13] + andn edi,edx,ebp + add ecx,esi + rorx r12d,edx,27 + rorx esi,edx,2 + and edx,eax + add ecx,r12d + xor edx,edi + add ebx,DWORD PTR[((-116))+r13] + andn edi,ecx,eax + add ebx,edx + rorx r12d,ecx,27 + rorx edx,ecx,2 + and ecx,esi + add ebx,r12d + xor ecx,edi + add ebp,DWORD PTR[((-96))+r13] + andn edi,ebx,esi + add ebp,ecx + rorx r12d,ebx,27 + rorx ecx,ebx,2 + and ebx,edx + add ebp,r12d + xor ebx,edi + add eax,DWORD PTR[((-92))+r13] + andn edi,ebp,edx + add eax,ebx + rorx r12d,ebp,27 + rorx ebx,ebp,2 + and ebp,ecx + add eax,r12d + xor ebp,edi + add esi,DWORD PTR[((-88))+r13] + andn edi,eax,ecx + add esi,ebp + rorx r12d,eax,27 + rorx ebp,eax,2 + and eax,ebx + add esi,r12d + xor eax,edi + add edx,DWORD PTR[((-84))+r13] + andn edi,esi,ebx + add edx,eax + rorx r12d,esi,27 + rorx eax,esi,2 + and esi,ebp + add edx,r12d + xor esi,edi + add ecx,DWORD PTR[((-64))+r13] + andn edi,edx,ebp + add ecx,esi + rorx r12d,edx,27 + rorx esi,edx,2 + and edx,eax + add ecx,r12d + xor edx,edi + add ebx,DWORD PTR[((-60))+r13] + andn edi,ecx,eax + add ebx,edx + rorx r12d,ecx,27 + rorx edx,ecx,2 + and ecx,esi + add ebx,r12d + xor ecx,edi + add ebp,DWORD PTR[((-56))+r13] + andn edi,ebx,esi + add ebp,ecx + rorx r12d,ebx,27 + rorx ecx,ebx,2 + and ebx,edx + add ebp,r12d + xor ebx,edi + add eax,DWORD PTR[((-52))+r13] + andn edi,ebp,edx + add eax,ebx + rorx r12d,ebp,27 + rorx ebx,ebp,2 + and ebp,ecx + add eax,r12d + xor ebp,edi + add esi,DWORD PTR[((-32))+r13] + andn edi,eax,ecx + add esi,ebp + rorx r12d,eax,27 + rorx ebp,eax,2 + and eax,ebx + add esi,r12d + xor eax,edi + add edx,DWORD PTR[((-28))+r13] + andn edi,esi,ebx + add edx,eax + rorx r12d,esi,27 + rorx eax,esi,2 + and esi,ebp + add edx,r12d + xor esi,edi + add ecx,DWORD PTR[((-24))+r13] + andn edi,edx,ebp + add ecx,esi + rorx r12d,edx,27 + rorx esi,edx,2 + and edx,eax + add ecx,r12d + xor edx,edi + add ebx,DWORD PTR[((-20))+r13] + andn edi,ecx,eax + add ebx,edx + rorx r12d,ecx,27 + rorx edx,ecx,2 + and ecx,esi + add ebx,r12d + xor ecx,edi + add ebp,DWORD PTR[r13] + andn edi,ebx,esi + add ebp,ecx + rorx r12d,ebx,27 + rorx ecx,ebx,2 + and ebx,edx + add ebp,r12d + xor ebx,edi + add eax,DWORD PTR[4+r13] + andn edi,ebp,edx + add eax,ebx + rorx r12d,ebp,27 + rorx ebx,ebp,2 + and ebp,ecx + add eax,r12d + xor ebp,edi + add esi,DWORD PTR[8+r13] + andn edi,eax,ecx + add esi,ebp + rorx r12d,eax,27 + rorx ebp,eax,2 + and eax,ebx + add esi,r12d + xor eax,edi + add edx,DWORD PTR[12+r13] + lea edx,DWORD PTR[rax*1+rdx] + rorx r12d,esi,27 + rorx eax,esi,2 + xor esi,ebp + add edx,r12d + xor esi,ebx + add ecx,DWORD PTR[32+r13] + lea ecx,DWORD PTR[rsi*1+rcx] + rorx r12d,edx,27 + rorx esi,edx,2 + xor edx,eax + add ecx,r12d + xor edx,ebp + add ebx,DWORD PTR[36+r13] + lea ebx,DWORD PTR[rdx*1+rbx] + rorx r12d,ecx,27 + rorx edx,ecx,2 + xor ecx,esi + add ebx,r12d + xor ecx,eax + add ebp,DWORD PTR[40+r13] + lea ebp,DWORD PTR[rbp*1+rcx] + rorx r12d,ebx,27 + rorx ecx,ebx,2 + xor ebx,edx + add ebp,r12d + xor ebx,esi + add eax,DWORD PTR[44+r13] + lea eax,DWORD PTR[rbx*1+rax] + rorx r12d,ebp,27 + rorx ebx,ebp,2 + xor ebp,ecx + add eax,r12d + xor ebp,edx + add esi,DWORD PTR[64+r13] + lea esi,DWORD PTR[rbp*1+rsi] + rorx r12d,eax,27 + rorx ebp,eax,2 + xor eax,ebx + add esi,r12d + xor eax,ecx + vmovdqu ymm11,YMMWORD PTR[((-64))+r11] + vpshufb ymm0,ymm0,ymm6 + add edx,DWORD PTR[68+r13] + lea edx,DWORD PTR[rax*1+rdx] + rorx r12d,esi,27 + rorx eax,esi,2 + xor esi,ebp + add edx,r12d + xor esi,ebx + add ecx,DWORD PTR[72+r13] + lea ecx,DWORD PTR[rsi*1+rcx] + rorx r12d,edx,27 + rorx esi,edx,2 + xor edx,eax + add ecx,r12d + xor edx,ebp + add ebx,DWORD PTR[76+r13] + lea ebx,DWORD PTR[rdx*1+rbx] + rorx r12d,ecx,27 + rorx edx,ecx,2 + xor ecx,esi + add ebx,r12d + xor ecx,eax + add ebp,DWORD PTR[96+r13] + lea ebp,DWORD PTR[rbp*1+rcx] + rorx r12d,ebx,27 + rorx ecx,ebx,2 + xor ebx,edx + add ebp,r12d + xor ebx,esi + add eax,DWORD PTR[100+r13] + lea eax,DWORD PTR[rbx*1+rax] + rorx r12d,ebp,27 + rorx ebx,ebp,2 + xor ebp,ecx + add eax,r12d + xor ebp,edx + vpshufb ymm1,ymm1,ymm6 + vpaddd ymm8,ymm0,ymm11 + add esi,DWORD PTR[104+r13] + lea esi,DWORD PTR[rbp*1+rsi] + rorx r12d,eax,27 + rorx ebp,eax,2 + xor eax,ebx + add esi,r12d + xor eax,ecx + add edx,DWORD PTR[108+r13] + lea r13,QWORD PTR[256+r13] + lea edx,DWORD PTR[rax*1+rdx] + rorx r12d,esi,27 + rorx eax,esi,2 + xor esi,ebp + add edx,r12d + xor esi,ebx + add ecx,DWORD PTR[((-128))+r13] + lea ecx,DWORD PTR[rsi*1+rcx] + rorx r12d,edx,27 + rorx esi,edx,2 + xor edx,eax + add ecx,r12d + xor edx,ebp + add ebx,DWORD PTR[((-124))+r13] + lea ebx,DWORD PTR[rdx*1+rbx] + rorx r12d,ecx,27 + rorx edx,ecx,2 + xor ecx,esi + add ebx,r12d + xor ecx,eax + add ebp,DWORD PTR[((-120))+r13] + lea ebp,DWORD PTR[rbp*1+rcx] + rorx r12d,ebx,27 + rorx ecx,ebx,2 + xor ebx,edx + add ebp,r12d + xor ebx,esi + vmovdqu YMMWORD PTR[rsp],ymm8 + vpshufb ymm2,ymm2,ymm6 + vpaddd ymm9,ymm1,ymm11 + add eax,DWORD PTR[((-116))+r13] + lea eax,DWORD PTR[rbx*1+rax] + rorx r12d,ebp,27 + rorx ebx,ebp,2 + xor ebp,ecx + add eax,r12d + xor ebp,edx + add esi,DWORD PTR[((-96))+r13] + lea esi,DWORD PTR[rbp*1+rsi] + rorx r12d,eax,27 + rorx ebp,eax,2 + xor eax,ebx + add esi,r12d + xor eax,ecx + add edx,DWORD PTR[((-92))+r13] + lea edx,DWORD PTR[rax*1+rdx] + rorx r12d,esi,27 + rorx eax,esi,2 + xor esi,ebp + add edx,r12d + xor esi,ebx + add ecx,DWORD PTR[((-88))+r13] + lea ecx,DWORD PTR[rsi*1+rcx] + rorx r12d,edx,27 + rorx esi,edx,2 + xor edx,eax + add ecx,r12d + xor edx,ebp + add ebx,DWORD PTR[((-84))+r13] + mov edi,esi + xor edi,eax + lea ebx,DWORD PTR[rdx*1+rbx] + rorx r12d,ecx,27 + rorx edx,ecx,2 + xor ecx,esi + add ebx,r12d + and ecx,edi + vmovdqu YMMWORD PTR[32+rsp],ymm9 + vpshufb ymm3,ymm3,ymm6 + vpaddd ymm6,ymm2,ymm11 + add ebp,DWORD PTR[((-64))+r13] + xor ecx,esi + mov edi,edx + xor edi,esi + lea ebp,DWORD PTR[rbp*1+rcx] + rorx r12d,ebx,27 + rorx ecx,ebx,2 + xor ebx,edx + add ebp,r12d + and ebx,edi + add eax,DWORD PTR[((-60))+r13] + xor ebx,edx + mov edi,ecx xor edi,edx - mov esi,eax - rol eax,5 + lea eax,DWORD PTR[rbx*1+rax] + rorx r12d,ebp,27 + rorx ebx,ebp,2 + xor ebp,ecx + add eax,r12d + and ebp,edi + add esi,DWORD PTR[((-56))+r13] + xor ebp,ecx + mov edi,ebx xor edi,ecx - add ebp,eax - ror ebx,7 - add ebp,edi - add edx,DWORD PTR[48+rsp] - xor esi,ecx -DB 102,15,56,0,222 + lea esi,DWORD PTR[rbp*1+rsi] + rorx r12d,eax,27 + rorx ebp,eax,2 + xor eax,ebx + add esi,r12d + and eax,edi + add edx,DWORD PTR[((-52))+r13] + xor eax,ebx mov edi,ebp - rol ebp,5 - paddd xmm2,xmm9 - xor esi,ebx - add edx,ebp - ror eax,7 - add edx,esi - movdqa XMMWORD PTR[32+rsp],xmm2 - add ecx,DWORD PTR[52+rsp] xor edi,ebx - psubd xmm2,xmm9 - mov esi,edx - rol edx,5 - xor edi,eax - add ecx,edx - ror ebp,7 - add ecx,edi - add ebx,DWORD PTR[56+rsp] - xor esi,eax - mov edi,ecx - rol ecx,5 + lea edx,DWORD PTR[rax*1+rdx] + rorx r12d,esi,27 + rorx eax,esi,2 xor esi,ebp - add ebx,ecx - ror edx,7 - add ebx,esi - add eax,DWORD PTR[60+rsp] - xor edi,ebp - mov esi,ebx - rol ebx,5 - xor edi,edx - add eax,ebx - ror ecx,7 - add eax,edi - add eax,DWORD PTR[r8] - add esi,DWORD PTR[4+r8] - add ecx,DWORD PTR[8+r8] - add edx,DWORD PTR[12+r8] - mov DWORD PTR[r8],eax - add ebp,DWORD PTR[16+r8] - mov DWORD PTR[4+r8],esi - mov ebx,esi - mov DWORD PTR[8+r8],ecx - mov DWORD PTR[12+r8],edx - mov DWORD PTR[16+r8],ebp - jmp $L$oop_ssse3 - -ALIGN 16 -$L$done_ssse3:: - add ebx,DWORD PTR[16+rsp] - xor esi,eax - mov edi,ecx - rol ecx,5 + add edx,r12d + and esi,edi + add ecx,DWORD PTR[((-32))+r13] xor esi,ebp - add ebx,ecx - ror edx,7 - add ebx,esi - add eax,DWORD PTR[20+rsp] + mov edi,eax xor edi,ebp - mov esi,ebx - rol ebx,5 + lea ecx,DWORD PTR[rsi*1+rcx] + rorx r12d,edx,27 + rorx esi,edx,2 + xor edx,eax + add ecx,r12d + and edx,edi + jmp $L$align32_3 +ALIGN 32 +$L$align32_3:: + vmovdqu YMMWORD PTR[64+rsp],ymm6 + vpaddd ymm7,ymm3,ymm11 + add ebx,DWORD PTR[((-28))+r13] + xor edx,eax + mov edi,esi + xor edi,eax + lea ebx,DWORD PTR[rdx*1+rbx] + rorx r12d,ecx,27 + rorx edx,ecx,2 + xor ecx,esi + add ebx,r12d + and ecx,edi + add ebp,DWORD PTR[((-24))+r13] + xor ecx,esi + mov edi,edx + xor edi,esi + lea ebp,DWORD PTR[rbp*1+rcx] + rorx r12d,ebx,27 + rorx ecx,ebx,2 + xor ebx,edx + add ebp,r12d + and ebx,edi + add eax,DWORD PTR[((-20))+r13] + xor ebx,edx + mov edi,ecx xor edi,edx - add eax,ebx - ror ecx,7 - add eax,edi - add ebp,DWORD PTR[24+rsp] - xor esi,edx - mov edi,eax - rol eax,5 - xor esi,ecx - add ebp,eax - ror ebx,7 - add ebp,esi - add edx,DWORD PTR[28+rsp] + lea eax,DWORD PTR[rbx*1+rax] + rorx r12d,ebp,27 + rorx ebx,ebp,2 + xor ebp,ecx + add eax,r12d + and ebp,edi + add esi,DWORD PTR[r13] + xor ebp,ecx + mov edi,ebx xor edi,ecx - mov esi,ebp - rol ebp,5 + lea esi,DWORD PTR[rbp*1+rsi] + rorx r12d,eax,27 + rorx ebp,eax,2 + xor eax,ebx + add esi,r12d + and eax,edi + add edx,DWORD PTR[4+r13] + xor eax,ebx + mov edi,ebp xor edi,ebx - add edx,ebp - ror eax,7 - add edx,edi - add ecx,DWORD PTR[32+rsp] - xor esi,ebx - mov edi,edx - rol edx,5 - xor esi,eax - add ecx,edx - ror ebp,7 - add ecx,esi - add ebx,DWORD PTR[36+rsp] - xor edi,eax - mov esi,ecx - rol ecx,5 - xor edi,ebp - add ebx,ecx - ror edx,7 - add ebx,edi - add eax,DWORD PTR[40+rsp] + lea edx,DWORD PTR[rax*1+rdx] + rorx r12d,esi,27 + rorx eax,esi,2 xor esi,ebp - mov edi,ebx - rol ebx,5 - xor esi,edx - add eax,ebx - ror ecx,7 - add eax,esi - add ebp,DWORD PTR[44+rsp] + add edx,r12d + and esi,edi + vmovdqu YMMWORD PTR[96+rsp],ymm7 + add ecx,DWORD PTR[8+r13] + xor esi,ebp + mov edi,eax + xor edi,ebp + lea ecx,DWORD PTR[rsi*1+rcx] + rorx r12d,edx,27 + rorx esi,edx,2 + xor edx,eax + add ecx,r12d + and edx,edi + add ebx,DWORD PTR[12+r13] + xor edx,eax + mov edi,esi + xor edi,eax + lea ebx,DWORD PTR[rdx*1+rbx] + rorx r12d,ecx,27 + rorx edx,ecx,2 + xor ecx,esi + add ebx,r12d + and ecx,edi + add ebp,DWORD PTR[32+r13] + xor ecx,esi + mov edi,edx + xor edi,esi + lea ebp,DWORD PTR[rbp*1+rcx] + rorx r12d,ebx,27 + rorx ecx,ebx,2 + xor ebx,edx + add ebp,r12d + and ebx,edi + add eax,DWORD PTR[36+r13] + xor ebx,edx + mov edi,ecx xor edi,edx - mov esi,eax - rol eax,5 + lea eax,DWORD PTR[rbx*1+rax] + rorx r12d,ebp,27 + rorx ebx,ebp,2 + xor ebp,ecx + add eax,r12d + and ebp,edi + add esi,DWORD PTR[40+r13] + xor ebp,ecx + mov edi,ebx xor edi,ecx - add ebp,eax - ror ebx,7 - add ebp,edi - add edx,DWORD PTR[48+rsp] - xor esi,ecx + lea esi,DWORD PTR[rbp*1+rsi] + rorx r12d,eax,27 + rorx ebp,eax,2 + xor eax,ebx + add esi,r12d + and eax,edi + vpalignr ymm4,ymm1,ymm0,8 + add edx,DWORD PTR[44+r13] + xor eax,ebx mov edi,ebp - rol ebp,5 - xor esi,ebx - add edx,ebp - ror eax,7 - add edx,esi - add ecx,DWORD PTR[52+rsp] xor edi,ebx - mov esi,edx - rol edx,5 - xor edi,eax - add ecx,edx - ror ebp,7 - add ecx,edi - add ebx,DWORD PTR[56+rsp] - xor esi,eax - mov edi,ecx - rol ecx,5 + vpsrldq ymm8,ymm3,4 + lea edx,DWORD PTR[rax*1+rdx] + rorx r12d,esi,27 + rorx eax,esi,2 + vpxor ymm4,ymm4,ymm0 + vpxor ymm8,ymm8,ymm2 xor esi,ebp - add ebx,ecx - ror edx,7 - add ebx,esi - add eax,DWORD PTR[60+rsp] + add edx,r12d + vpxor ymm4,ymm4,ymm8 + and esi,edi + add ecx,DWORD PTR[64+r13] + xor esi,ebp + mov edi,eax + vpsrld ymm8,ymm4,31 xor edi,ebp - mov esi,ebx - rol ebx,5 - xor edi,edx - add eax,ebx - ror ecx,7 - add eax,edi - add eax,DWORD PTR[r8] + lea ecx,DWORD PTR[rsi*1+rcx] + rorx r12d,edx,27 + vpslldq ymm10,ymm4,12 + vpaddd ymm4,ymm4,ymm4 + rorx esi,edx,2 + xor edx,eax + vpsrld ymm9,ymm10,30 + vpor ymm4,ymm4,ymm8 + add ecx,r12d + and edx,edi + vpslld ymm10,ymm10,2 + vpxor ymm4,ymm4,ymm9 + add ebx,DWORD PTR[68+r13] + xor edx,eax + vpxor ymm4,ymm4,ymm10 + mov edi,esi + xor edi,eax + lea ebx,DWORD PTR[rdx*1+rbx] + vpaddd ymm9,ymm4,ymm11 + rorx r12d,ecx,27 + rorx edx,ecx,2 + xor ecx,esi + vmovdqu YMMWORD PTR[128+rsp],ymm9 + add ebx,r12d + and ecx,edi + add ebp,DWORD PTR[72+r13] + xor ecx,esi + mov edi,edx + xor edi,esi + lea ebp,DWORD PTR[rbp*1+rcx] + rorx r12d,ebx,27 + rorx ecx,ebx,2 + xor ebx,edx + add ebp,r12d + and ebx,edi + add eax,DWORD PTR[76+r13] + xor ebx,edx + lea eax,DWORD PTR[rbx*1+rax] + rorx r12d,ebp,27 + rorx ebx,ebp,2 + xor ebp,ecx + add eax,r12d + xor ebp,edx + vpalignr ymm5,ymm2,ymm1,8 + add esi,DWORD PTR[96+r13] + lea esi,DWORD PTR[rbp*1+rsi] + rorx r12d,eax,27 + rorx ebp,eax,2 + vpsrldq ymm8,ymm4,4 + xor eax,ebx + add esi,r12d + xor eax,ecx + vpxor ymm5,ymm5,ymm1 + vpxor ymm8,ymm8,ymm3 + add edx,DWORD PTR[100+r13] + lea edx,DWORD PTR[rax*1+rdx] + vpxor ymm5,ymm5,ymm8 + rorx r12d,esi,27 + rorx eax,esi,2 + xor esi,ebp + add edx,r12d + vpsrld ymm8,ymm5,31 + vmovdqu ymm11,YMMWORD PTR[((-32))+r11] + xor esi,ebx + add ecx,DWORD PTR[104+r13] + lea ecx,DWORD PTR[rsi*1+rcx] + vpslldq ymm10,ymm5,12 + vpaddd ymm5,ymm5,ymm5 + rorx r12d,edx,27 + rorx esi,edx,2 + vpsrld ymm9,ymm10,30 + vpor ymm5,ymm5,ymm8 + xor edx,eax + add ecx,r12d + vpslld ymm10,ymm10,2 + vpxor ymm5,ymm5,ymm9 + xor edx,ebp + add ebx,DWORD PTR[108+r13] + lea r13,QWORD PTR[256+r13] + vpxor ymm5,ymm5,ymm10 + lea ebx,DWORD PTR[rdx*1+rbx] + rorx r12d,ecx,27 + rorx edx,ecx,2 + vpaddd ymm9,ymm5,ymm11 + xor ecx,esi + add ebx,r12d + xor ecx,eax + vmovdqu YMMWORD PTR[160+rsp],ymm9 + add ebp,DWORD PTR[((-128))+r13] + lea ebp,DWORD PTR[rbp*1+rcx] + rorx r12d,ebx,27 + rorx ecx,ebx,2 + xor ebx,edx + add ebp,r12d + xor ebx,esi + vpalignr ymm6,ymm3,ymm2,8 + add eax,DWORD PTR[((-124))+r13] + lea eax,DWORD PTR[rbx*1+rax] + rorx r12d,ebp,27 + rorx ebx,ebp,2 + vpsrldq ymm8,ymm5,4 + xor ebp,ecx + add eax,r12d + xor ebp,edx + vpxor ymm6,ymm6,ymm2 + vpxor ymm8,ymm8,ymm4 + add esi,DWORD PTR[((-120))+r13] + lea esi,DWORD PTR[rbp*1+rsi] + vpxor ymm6,ymm6,ymm8 + rorx r12d,eax,27 + rorx ebp,eax,2 + xor eax,ebx + add esi,r12d + vpsrld ymm8,ymm6,31 + xor eax,ecx + add edx,DWORD PTR[((-116))+r13] + lea edx,DWORD PTR[rax*1+rdx] + vpslldq ymm10,ymm6,12 + vpaddd ymm6,ymm6,ymm6 + rorx r12d,esi,27 + rorx eax,esi,2 + vpsrld ymm9,ymm10,30 + vpor ymm6,ymm6,ymm8 + xor esi,ebp + add edx,r12d + vpslld ymm10,ymm10,2 + vpxor ymm6,ymm6,ymm9 + xor esi,ebx + add ecx,DWORD PTR[((-96))+r13] + vpxor ymm6,ymm6,ymm10 + lea ecx,DWORD PTR[rsi*1+rcx] + rorx r12d,edx,27 + rorx esi,edx,2 + vpaddd ymm9,ymm6,ymm11 + xor edx,eax + add ecx,r12d + xor edx,ebp + vmovdqu YMMWORD PTR[192+rsp],ymm9 + add ebx,DWORD PTR[((-92))+r13] + lea ebx,DWORD PTR[rdx*1+rbx] + rorx r12d,ecx,27 + rorx edx,ecx,2 + xor ecx,esi + add ebx,r12d + xor ecx,eax + vpalignr ymm7,ymm4,ymm3,8 + add ebp,DWORD PTR[((-88))+r13] + lea ebp,DWORD PTR[rbp*1+rcx] + rorx r12d,ebx,27 + rorx ecx,ebx,2 + vpsrldq ymm8,ymm6,4 + xor ebx,edx + add ebp,r12d + xor ebx,esi + vpxor ymm7,ymm7,ymm3 + vpxor ymm8,ymm8,ymm5 + add eax,DWORD PTR[((-84))+r13] + lea eax,DWORD PTR[rbx*1+rax] + vpxor ymm7,ymm7,ymm8 + rorx r12d,ebp,27 + rorx ebx,ebp,2 + xor ebp,ecx + add eax,r12d + vpsrld ymm8,ymm7,31 + xor ebp,edx + add esi,DWORD PTR[((-64))+r13] + lea esi,DWORD PTR[rbp*1+rsi] + vpslldq ymm10,ymm7,12 + vpaddd ymm7,ymm7,ymm7 + rorx r12d,eax,27 + rorx ebp,eax,2 + vpsrld ymm9,ymm10,30 + vpor ymm7,ymm7,ymm8 + xor eax,ebx + add esi,r12d + vpslld ymm10,ymm10,2 + vpxor ymm7,ymm7,ymm9 + xor eax,ecx + add edx,DWORD PTR[((-60))+r13] + vpxor ymm7,ymm7,ymm10 + lea edx,DWORD PTR[rax*1+rdx] + rorx r12d,esi,27 + rorx eax,esi,2 + vpaddd ymm9,ymm7,ymm11 + xor esi,ebp + add edx,r12d + xor esi,ebx + vmovdqu YMMWORD PTR[224+rsp],ymm9 + add ecx,DWORD PTR[((-56))+r13] + lea ecx,DWORD PTR[rsi*1+rcx] + rorx r12d,edx,27 + rorx esi,edx,2 + xor edx,eax + add ecx,r12d + xor edx,ebp + add ebx,DWORD PTR[((-52))+r13] + lea ebx,DWORD PTR[rdx*1+rbx] + rorx r12d,ecx,27 + rorx edx,ecx,2 + xor ecx,esi + add ebx,r12d + xor ecx,eax + add ebp,DWORD PTR[((-32))+r13] + lea ebp,DWORD PTR[rbp*1+rcx] + rorx r12d,ebx,27 + rorx ecx,ebx,2 + xor ebx,edx + add ebp,r12d + xor ebx,esi + add eax,DWORD PTR[((-28))+r13] + lea eax,DWORD PTR[rbx*1+rax] + rorx r12d,ebp,27 + rorx ebx,ebp,2 + xor ebp,ecx + add eax,r12d + xor ebp,edx + add esi,DWORD PTR[((-24))+r13] + lea esi,DWORD PTR[rbp*1+rsi] + rorx r12d,eax,27 + rorx ebp,eax,2 + xor eax,ebx + add esi,r12d + xor eax,ecx + add edx,DWORD PTR[((-20))+r13] + lea edx,DWORD PTR[rax*1+rdx] + rorx r12d,esi,27 + add edx,r12d + lea r13,QWORD PTR[128+rsp] + + + add edx,DWORD PTR[r8] add esi,DWORD PTR[4+r8] - add ecx,DWORD PTR[8+r8] - mov DWORD PTR[r8],eax - add edx,DWORD PTR[12+r8] + add ebp,DWORD PTR[8+r8] + mov DWORD PTR[r8],edx + add ebx,DWORD PTR[12+r8] mov DWORD PTR[4+r8],esi - add ebp,DWORD PTR[16+r8] - mov DWORD PTR[8+r8],ecx - mov DWORD PTR[12+r8],edx - mov DWORD PTR[16+r8],ebp - movaps xmm6,XMMWORD PTR[((64+0))+rsp] - movaps xmm7,XMMWORD PTR[((64+16))+rsp] - movaps xmm8,XMMWORD PTR[((64+32))+rsp] - movaps xmm9,XMMWORD PTR[((64+48))+rsp] - movaps xmm10,XMMWORD PTR[((64+64))+rsp] - lea rsi,QWORD PTR[144+rsp] - mov r12,QWORD PTR[rsi] - mov rbp,QWORD PTR[8+rsi] - mov rbx,QWORD PTR[16+rsi] - lea rsp,QWORD PTR[24+rsi] -$L$epilogue_ssse3:: + mov eax,edx + add ecx,DWORD PTR[16+r8] + mov r12d,ebp + mov DWORD PTR[8+r8],ebp + mov edx,ebx + + mov DWORD PTR[12+r8],ebx + mov ebp,esi + mov DWORD PTR[16+r8],ecx + + mov esi,ecx + mov ecx,r12d + + + cmp r9,r10 + jbe $L$oop_avx2 + +$L$done_avx2:: + vzeroupper + movaps xmm6,XMMWORD PTR[((-40-96))+r14] + movaps xmm7,XMMWORD PTR[((-40-80))+r14] + movaps xmm8,XMMWORD PTR[((-40-64))+r14] + movaps xmm9,XMMWORD PTR[((-40-48))+r14] + movaps xmm10,XMMWORD PTR[((-40-32))+r14] + movaps xmm11,XMMWORD PTR[((-40-16))+r14] + lea rsi,QWORD PTR[r14] + mov r14,QWORD PTR[((-40))+rsi] + mov r13,QWORD PTR[((-32))+rsi] + mov r12,QWORD PTR[((-24))+rsi] + mov rbp,QWORD PTR[((-16))+rsi] + mov rbx,QWORD PTR[((-8))+rsi] + lea rsp,QWORD PTR[rsi] +$L$epilogue_avx2:: mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue mov rsi,QWORD PTR[16+rsp] DB 0F3h,0C3h ;repret -$L$SEH_end_sha1_block_data_order_ssse3:: -sha1_block_data_order_ssse3 ENDP +$L$SEH_end_sha1_block_data_order_avx2:: +sha1_block_data_order_avx2 ENDP ALIGN 64 K_XX_XX:: DD 05a827999h,05a827999h,05a827999h,05a827999h - + DD 05a827999h,05a827999h,05a827999h,05a827999h + DD 06ed9eba1h,06ed9eba1h,06ed9eba1h,06ed9eba1h DD 06ed9eba1h,06ed9eba1h,06ed9eba1h,06ed9eba1h - DD 08f1bbcdch,08f1bbcdch,08f1bbcdch,08f1bbcdch - + DD 08f1bbcdch,08f1bbcdch,08f1bbcdch,08f1bbcdch + DD 0ca62c1d6h,0ca62c1d6h,0ca62c1d6h,0ca62c1d6h DD 0ca62c1d6h,0ca62c1d6h,0ca62c1d6h,0ca62c1d6h - DD 000010203h,004050607h,008090a0bh,00c0d0e0fh - + DD 000010203h,004050607h,008090a0bh,00c0d0e0fh +DB 0fh,0eh,0dh,0ch,0bh,0ah,09h,08h,07h,06h,05h,04h,03h,02h,01h,00h DB 83,72,65,49,32,98,108,111,99,107,32,116,114,97,110,115 DB 102,111,114,109,32,102,111,114,32,120,56,54,95,54,52,44 DB 32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60 @@ -2558,20 +5540,52 @@ se_handler PROC PRIVATE jae $L$common_seh_tail mov rax,QWORD PTR[64+rax] - lea rax,QWORD PTR[32+rax] mov rbx,QWORD PTR[((-8))+rax] mov rbp,QWORD PTR[((-16))+rax] mov r12,QWORD PTR[((-24))+rax] mov r13,QWORD PTR[((-32))+rax] + mov r14,QWORD PTR[((-40))+rax] mov QWORD PTR[144+r8],rbx mov QWORD PTR[160+r8],rbp mov QWORD PTR[216+r8],r12 mov QWORD PTR[224+r8],r13 + mov QWORD PTR[232+r8],r14 jmp $L$common_seh_tail se_handler ENDP +ALIGN 16 +shaext_handler PROC PRIVATE + push rsi + push rdi + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + pushfq + sub rsp,64 + + mov rax,QWORD PTR[120+r8] + mov rbx,QWORD PTR[248+r8] + + lea r10,QWORD PTR[$L$prologue_shaext] + cmp rbx,r10 + jb $L$common_seh_tail + + lea r10,QWORD PTR[$L$epilogue_shaext] + cmp rbx,r10 + jae $L$common_seh_tail + + lea rsi,QWORD PTR[((-8-64))+rax] + lea rdi,QWORD PTR[512+r8] + mov ecx,8 + DD 0a548f3fch + + jmp $L$common_seh_tail +shaext_handler ENDP ALIGN 16 ssse3_handler PROC PRIVATE @@ -2604,19 +5618,23 @@ ssse3_handler PROC PRIVATE cmp rbx,r10 jae $L$common_seh_tail - lea rsi,QWORD PTR[64+rax] + mov rax,QWORD PTR[232+r8] + + lea rsi,QWORD PTR[((-40-96))+rax] lea rdi,QWORD PTR[512+r8] - mov ecx,10 + mov ecx,12 DD 0a548f3fch - lea rax,QWORD PTR[168+rax] - mov rbx,QWORD PTR[((-8))+rax] mov rbp,QWORD PTR[((-16))+rax] mov r12,QWORD PTR[((-24))+rax] + mov r13,QWORD PTR[((-32))+rax] + mov r14,QWORD PTR[((-40))+rax] mov QWORD PTR[144+r8],rbx mov QWORD PTR[160+r8],rbp mov QWORD PTR[216+r8],r12 + mov QWORD PTR[224+r8],r13 + mov QWORD PTR[232+r8],r14 $L$common_seh_tail:: mov rdi,QWORD PTR[8+rax] @@ -2630,7 +5648,6 @@ $L$common_seh_tail:: mov ecx,154 DD 0a548f3fch - mov rsi,r9 xor rcx,rcx mov rdx,QWORD PTR[8+rsi] @@ -2665,20 +5682,39 @@ ALIGN 4 DD imagerel $L$SEH_begin_sha1_block_data_order DD imagerel $L$SEH_end_sha1_block_data_order DD imagerel $L$SEH_info_sha1_block_data_order + DD imagerel $L$SEH_begin_sha1_block_data_order_shaext + DD imagerel $L$SEH_end_sha1_block_data_order_shaext + DD imagerel $L$SEH_info_sha1_block_data_order_shaext DD imagerel $L$SEH_begin_sha1_block_data_order_ssse3 DD imagerel $L$SEH_end_sha1_block_data_order_ssse3 DD imagerel $L$SEH_info_sha1_block_data_order_ssse3 + DD imagerel $L$SEH_begin_sha1_block_data_order_avx + DD imagerel $L$SEH_end_sha1_block_data_order_avx + DD imagerel $L$SEH_info_sha1_block_data_order_avx + DD imagerel $L$SEH_begin_sha1_block_data_order_avx2 + DD imagerel $L$SEH_end_sha1_block_data_order_avx2 + DD imagerel $L$SEH_info_sha1_block_data_order_avx2 .pdata ENDS .xdata SEGMENT READONLY ALIGN(8) ALIGN 8 $L$SEH_info_sha1_block_data_order:: DB 9,0,0,0 DD imagerel se_handler +$L$SEH_info_sha1_block_data_order_shaext:: +DB 9,0,0,0 + DD imagerel shaext_handler $L$SEH_info_sha1_block_data_order_ssse3:: DB 9,0,0,0 DD imagerel ssse3_handler DD imagerel $L$prologue_ssse3,imagerel $L$epilogue_ssse3 - +$L$SEH_info_sha1_block_data_order_avx:: +DB 9,0,0,0 + DD imagerel ssse3_handler + DD imagerel $L$prologue_avx,imagerel $L$epilogue_avx +$L$SEH_info_sha1_block_data_order_avx2:: +DB 9,0,0,0 + DD imagerel ssse3_handler + DD imagerel $L$prologue_avx2,imagerel $L$epilogue_avx2 .xdata ENDS END diff --git a/deps/openssl/asm/x64-win32-masm/sha/sha256-mb-x86_64.asm b/deps/openssl/asm/x64-win32-masm/sha/sha256-mb-x86_64.asm new file mode 100644 index 00000000000000..057853d286f1d9 --- /dev/null +++ b/deps/openssl/asm/x64-win32-masm/sha/sha256-mb-x86_64.asm @@ -0,0 +1,8214 @@ +OPTION DOTNAME +.text$ SEGMENT ALIGN(256) 'CODE' + +EXTERN OPENSSL_ia32cap_P:NEAR + +PUBLIC sha256_multi_block + +ALIGN 32 +sha256_multi_block PROC PUBLIC + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_sha256_multi_block:: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + + + mov rcx,QWORD PTR[((OPENSSL_ia32cap_P+4))] + bt rcx,61 + jc _shaext_shortcut + test ecx,268435456 + jnz _avx_shortcut + mov rax,rsp + push rbx + push rbp + lea rsp,QWORD PTR[((-168))+rsp] + movaps XMMWORD PTR[rsp],xmm6 + movaps XMMWORD PTR[16+rsp],xmm7 + movaps XMMWORD PTR[32+rsp],xmm8 + movaps XMMWORD PTR[48+rsp],xmm9 + movaps XMMWORD PTR[(-120)+rax],xmm10 + movaps XMMWORD PTR[(-104)+rax],xmm11 + movaps XMMWORD PTR[(-88)+rax],xmm12 + movaps XMMWORD PTR[(-72)+rax],xmm13 + movaps XMMWORD PTR[(-56)+rax],xmm14 + movaps XMMWORD PTR[(-40)+rax],xmm15 + sub rsp,288 + and rsp,-256 + mov QWORD PTR[272+rsp],rax +$L$body:: + lea rbp,QWORD PTR[((K256+128))] + lea rbx,QWORD PTR[256+rsp] + lea rdi,QWORD PTR[128+rdi] + +$L$oop_grande:: + mov DWORD PTR[280+rsp],edx + xor edx,edx + mov r8,QWORD PTR[rsi] + mov ecx,DWORD PTR[8+rsi] + cmp ecx,edx + cmovg edx,ecx + test ecx,ecx + mov DWORD PTR[rbx],ecx + cmovle r8,rbp + mov r9,QWORD PTR[16+rsi] + mov ecx,DWORD PTR[24+rsi] + cmp ecx,edx + cmovg edx,ecx + test ecx,ecx + mov DWORD PTR[4+rbx],ecx + cmovle r9,rbp + mov r10,QWORD PTR[32+rsi] + mov ecx,DWORD PTR[40+rsi] + cmp ecx,edx + cmovg edx,ecx + test ecx,ecx + mov DWORD PTR[8+rbx],ecx + cmovle r10,rbp + mov r11,QWORD PTR[48+rsi] + mov ecx,DWORD PTR[56+rsi] + cmp ecx,edx + cmovg edx,ecx + test ecx,ecx + mov DWORD PTR[12+rbx],ecx + cmovle r11,rbp + test edx,edx + jz $L$done + + movdqu xmm8,XMMWORD PTR[((0-128))+rdi] + lea rax,QWORD PTR[128+rsp] + movdqu xmm9,XMMWORD PTR[((32-128))+rdi] + movdqu xmm10,XMMWORD PTR[((64-128))+rdi] + movdqu xmm11,XMMWORD PTR[((96-128))+rdi] + movdqu xmm12,XMMWORD PTR[((128-128))+rdi] + movdqu xmm13,XMMWORD PTR[((160-128))+rdi] + movdqu xmm14,XMMWORD PTR[((192-128))+rdi] + movdqu xmm15,XMMWORD PTR[((224-128))+rdi] + movdqu xmm6,XMMWORD PTR[$L$pbswap] + jmp $L$oop + +ALIGN 32 +$L$oop:: + movdqa xmm4,xmm10 + pxor xmm4,xmm9 + movd xmm5,DWORD PTR[r8] + movd xmm0,DWORD PTR[r9] + movd xmm1,DWORD PTR[r10] + movd xmm2,DWORD PTR[r11] + punpckldq xmm5,xmm1 + punpckldq xmm0,xmm2 + punpckldq xmm5,xmm0 + movdqa xmm7,xmm12 +DB 102,15,56,0,238 + movdqa xmm2,xmm12 + + psrld xmm7,6 + movdqa xmm1,xmm12 + pslld xmm2,7 + movdqa XMMWORD PTR[(0-128)+rax],xmm5 + paddd xmm5,xmm15 + + psrld xmm1,11 + pxor xmm7,xmm2 + pslld xmm2,21-7 + paddd xmm5,XMMWORD PTR[((-128))+rbp] + pxor xmm7,xmm1 + + psrld xmm1,25-11 + movdqa xmm0,xmm12 + + pxor xmm7,xmm2 + movdqa xmm3,xmm12 + pslld xmm2,26-21 + pandn xmm0,xmm14 + pand xmm3,xmm13 + pxor xmm7,xmm1 + + + movdqa xmm1,xmm8 + pxor xmm7,xmm2 + movdqa xmm2,xmm8 + psrld xmm1,2 + paddd xmm5,xmm7 + pxor xmm0,xmm3 + movdqa xmm3,xmm9 + movdqa xmm7,xmm8 + pslld xmm2,10 + pxor xmm3,xmm8 + + + psrld xmm7,13 + pxor xmm1,xmm2 + paddd xmm5,xmm0 + pslld xmm2,19-10 + pand xmm4,xmm3 + pxor xmm1,xmm7 + + + psrld xmm7,22-13 + pxor xmm1,xmm2 + movdqa xmm15,xmm9 + pslld xmm2,30-19 + pxor xmm7,xmm1 + pxor xmm15,xmm4 + paddd xmm11,xmm5 + pxor xmm7,xmm2 + + paddd xmm15,xmm5 + paddd xmm15,xmm7 + movd xmm5,DWORD PTR[4+r8] + movd xmm0,DWORD PTR[4+r9] + movd xmm1,DWORD PTR[4+r10] + movd xmm2,DWORD PTR[4+r11] + punpckldq xmm5,xmm1 + punpckldq xmm0,xmm2 + punpckldq xmm5,xmm0 + movdqa xmm7,xmm11 + + movdqa xmm2,xmm11 +DB 102,15,56,0,238 + psrld xmm7,6 + movdqa xmm1,xmm11 + pslld xmm2,7 + movdqa XMMWORD PTR[(16-128)+rax],xmm5 + paddd xmm5,xmm14 + + psrld xmm1,11 + pxor xmm7,xmm2 + pslld xmm2,21-7 + paddd xmm5,XMMWORD PTR[((-96))+rbp] + pxor xmm7,xmm1 + + psrld xmm1,25-11 + movdqa xmm0,xmm11 + + pxor xmm7,xmm2 + movdqa xmm4,xmm11 + pslld xmm2,26-21 + pandn xmm0,xmm13 + pand xmm4,xmm12 + pxor xmm7,xmm1 + + + movdqa xmm1,xmm15 + pxor xmm7,xmm2 + movdqa xmm2,xmm15 + psrld xmm1,2 + paddd xmm5,xmm7 + pxor xmm0,xmm4 + movdqa xmm4,xmm8 + movdqa xmm7,xmm15 + pslld xmm2,10 + pxor xmm4,xmm15 + + + psrld xmm7,13 + pxor xmm1,xmm2 + paddd xmm5,xmm0 + pslld xmm2,19-10 + pand xmm3,xmm4 + pxor xmm1,xmm7 + + + psrld xmm7,22-13 + pxor xmm1,xmm2 + movdqa xmm14,xmm8 + pslld xmm2,30-19 + pxor xmm7,xmm1 + pxor xmm14,xmm3 + paddd xmm10,xmm5 + pxor xmm7,xmm2 + + paddd xmm14,xmm5 + paddd xmm14,xmm7 + movd xmm5,DWORD PTR[8+r8] + movd xmm0,DWORD PTR[8+r9] + movd xmm1,DWORD PTR[8+r10] + movd xmm2,DWORD PTR[8+r11] + punpckldq xmm5,xmm1 + punpckldq xmm0,xmm2 + punpckldq xmm5,xmm0 + movdqa xmm7,xmm10 +DB 102,15,56,0,238 + movdqa xmm2,xmm10 + + psrld xmm7,6 + movdqa xmm1,xmm10 + pslld xmm2,7 + movdqa XMMWORD PTR[(32-128)+rax],xmm5 + paddd xmm5,xmm13 + + psrld xmm1,11 + pxor xmm7,xmm2 + pslld xmm2,21-7 + paddd xmm5,XMMWORD PTR[((-64))+rbp] + pxor xmm7,xmm1 + + psrld xmm1,25-11 + movdqa xmm0,xmm10 + + pxor xmm7,xmm2 + movdqa xmm3,xmm10 + pslld xmm2,26-21 + pandn xmm0,xmm12 + pand xmm3,xmm11 + pxor xmm7,xmm1 + + + movdqa xmm1,xmm14 + pxor xmm7,xmm2 + movdqa xmm2,xmm14 + psrld xmm1,2 + paddd xmm5,xmm7 + pxor xmm0,xmm3 + movdqa xmm3,xmm15 + movdqa xmm7,xmm14 + pslld xmm2,10 + pxor xmm3,xmm14 + + + psrld xmm7,13 + pxor xmm1,xmm2 + paddd xmm5,xmm0 + pslld xmm2,19-10 + pand xmm4,xmm3 + pxor xmm1,xmm7 + + + psrld xmm7,22-13 + pxor xmm1,xmm2 + movdqa xmm13,xmm15 + pslld xmm2,30-19 + pxor xmm7,xmm1 + pxor xmm13,xmm4 + paddd xmm9,xmm5 + pxor xmm7,xmm2 + + paddd xmm13,xmm5 + paddd xmm13,xmm7 + movd xmm5,DWORD PTR[12+r8] + movd xmm0,DWORD PTR[12+r9] + movd xmm1,DWORD PTR[12+r10] + movd xmm2,DWORD PTR[12+r11] + punpckldq xmm5,xmm1 + punpckldq xmm0,xmm2 + punpckldq xmm5,xmm0 + movdqa xmm7,xmm9 + + movdqa xmm2,xmm9 +DB 102,15,56,0,238 + psrld xmm7,6 + movdqa xmm1,xmm9 + pslld xmm2,7 + movdqa XMMWORD PTR[(48-128)+rax],xmm5 + paddd xmm5,xmm12 + + psrld xmm1,11 + pxor xmm7,xmm2 + pslld xmm2,21-7 + paddd xmm5,XMMWORD PTR[((-32))+rbp] + pxor xmm7,xmm1 + + psrld xmm1,25-11 + movdqa xmm0,xmm9 + + pxor xmm7,xmm2 + movdqa xmm4,xmm9 + pslld xmm2,26-21 + pandn xmm0,xmm11 + pand xmm4,xmm10 + pxor xmm7,xmm1 + + + movdqa xmm1,xmm13 + pxor xmm7,xmm2 + movdqa xmm2,xmm13 + psrld xmm1,2 + paddd xmm5,xmm7 + pxor xmm0,xmm4 + movdqa xmm4,xmm14 + movdqa xmm7,xmm13 + pslld xmm2,10 + pxor xmm4,xmm13 + + + psrld xmm7,13 + pxor xmm1,xmm2 + paddd xmm5,xmm0 + pslld xmm2,19-10 + pand xmm3,xmm4 + pxor xmm1,xmm7 + + + psrld xmm7,22-13 + pxor xmm1,xmm2 + movdqa xmm12,xmm14 + pslld xmm2,30-19 + pxor xmm7,xmm1 + pxor xmm12,xmm3 + paddd xmm8,xmm5 + pxor xmm7,xmm2 + + paddd xmm12,xmm5 + paddd xmm12,xmm7 + movd xmm5,DWORD PTR[16+r8] + movd xmm0,DWORD PTR[16+r9] + movd xmm1,DWORD PTR[16+r10] + movd xmm2,DWORD PTR[16+r11] + punpckldq xmm5,xmm1 + punpckldq xmm0,xmm2 + punpckldq xmm5,xmm0 + movdqa xmm7,xmm8 +DB 102,15,56,0,238 + movdqa xmm2,xmm8 + + psrld xmm7,6 + movdqa xmm1,xmm8 + pslld xmm2,7 + movdqa XMMWORD PTR[(64-128)+rax],xmm5 + paddd xmm5,xmm11 + + psrld xmm1,11 + pxor xmm7,xmm2 + pslld xmm2,21-7 + paddd xmm5,XMMWORD PTR[rbp] + pxor xmm7,xmm1 + + psrld xmm1,25-11 + movdqa xmm0,xmm8 + + pxor xmm7,xmm2 + movdqa xmm3,xmm8 + pslld xmm2,26-21 + pandn xmm0,xmm10 + pand xmm3,xmm9 + pxor xmm7,xmm1 + + + movdqa xmm1,xmm12 + pxor xmm7,xmm2 + movdqa xmm2,xmm12 + psrld xmm1,2 + paddd xmm5,xmm7 + pxor xmm0,xmm3 + movdqa xmm3,xmm13 + movdqa xmm7,xmm12 + pslld xmm2,10 + pxor xmm3,xmm12 + + + psrld xmm7,13 + pxor xmm1,xmm2 + paddd xmm5,xmm0 + pslld xmm2,19-10 + pand xmm4,xmm3 + pxor xmm1,xmm7 + + + psrld xmm7,22-13 + pxor xmm1,xmm2 + movdqa xmm11,xmm13 + pslld xmm2,30-19 + pxor xmm7,xmm1 + pxor xmm11,xmm4 + paddd xmm15,xmm5 + pxor xmm7,xmm2 + + paddd xmm11,xmm5 + paddd xmm11,xmm7 + movd xmm5,DWORD PTR[20+r8] + movd xmm0,DWORD PTR[20+r9] + movd xmm1,DWORD PTR[20+r10] + movd xmm2,DWORD PTR[20+r11] + punpckldq xmm5,xmm1 + punpckldq xmm0,xmm2 + punpckldq xmm5,xmm0 + movdqa xmm7,xmm15 + + movdqa xmm2,xmm15 +DB 102,15,56,0,238 + psrld xmm7,6 + movdqa xmm1,xmm15 + pslld xmm2,7 + movdqa XMMWORD PTR[(80-128)+rax],xmm5 + paddd xmm5,xmm10 + + psrld xmm1,11 + pxor xmm7,xmm2 + pslld xmm2,21-7 + paddd xmm5,XMMWORD PTR[32+rbp] + pxor xmm7,xmm1 + + psrld xmm1,25-11 + movdqa xmm0,xmm15 + + pxor xmm7,xmm2 + movdqa xmm4,xmm15 + pslld xmm2,26-21 + pandn xmm0,xmm9 + pand xmm4,xmm8 + pxor xmm7,xmm1 + + + movdqa xmm1,xmm11 + pxor xmm7,xmm2 + movdqa xmm2,xmm11 + psrld xmm1,2 + paddd xmm5,xmm7 + pxor xmm0,xmm4 + movdqa xmm4,xmm12 + movdqa xmm7,xmm11 + pslld xmm2,10 + pxor xmm4,xmm11 + + + psrld xmm7,13 + pxor xmm1,xmm2 + paddd xmm5,xmm0 + pslld xmm2,19-10 + pand xmm3,xmm4 + pxor xmm1,xmm7 + + + psrld xmm7,22-13 + pxor xmm1,xmm2 + movdqa xmm10,xmm12 + pslld xmm2,30-19 + pxor xmm7,xmm1 + pxor xmm10,xmm3 + paddd xmm14,xmm5 + pxor xmm7,xmm2 + + paddd xmm10,xmm5 + paddd xmm10,xmm7 + movd xmm5,DWORD PTR[24+r8] + movd xmm0,DWORD PTR[24+r9] + movd xmm1,DWORD PTR[24+r10] + movd xmm2,DWORD PTR[24+r11] + punpckldq xmm5,xmm1 + punpckldq xmm0,xmm2 + punpckldq xmm5,xmm0 + movdqa xmm7,xmm14 +DB 102,15,56,0,238 + movdqa xmm2,xmm14 + + psrld xmm7,6 + movdqa xmm1,xmm14 + pslld xmm2,7 + movdqa XMMWORD PTR[(96-128)+rax],xmm5 + paddd xmm5,xmm9 + + psrld xmm1,11 + pxor xmm7,xmm2 + pslld xmm2,21-7 + paddd xmm5,XMMWORD PTR[64+rbp] + pxor xmm7,xmm1 + + psrld xmm1,25-11 + movdqa xmm0,xmm14 + + pxor xmm7,xmm2 + movdqa xmm3,xmm14 + pslld xmm2,26-21 + pandn xmm0,xmm8 + pand xmm3,xmm15 + pxor xmm7,xmm1 + + + movdqa xmm1,xmm10 + pxor xmm7,xmm2 + movdqa xmm2,xmm10 + psrld xmm1,2 + paddd xmm5,xmm7 + pxor xmm0,xmm3 + movdqa xmm3,xmm11 + movdqa xmm7,xmm10 + pslld xmm2,10 + pxor xmm3,xmm10 + + + psrld xmm7,13 + pxor xmm1,xmm2 + paddd xmm5,xmm0 + pslld xmm2,19-10 + pand xmm4,xmm3 + pxor xmm1,xmm7 + + + psrld xmm7,22-13 + pxor xmm1,xmm2 + movdqa xmm9,xmm11 + pslld xmm2,30-19 + pxor xmm7,xmm1 + pxor xmm9,xmm4 + paddd xmm13,xmm5 + pxor xmm7,xmm2 + + paddd xmm9,xmm5 + paddd xmm9,xmm7 + movd xmm5,DWORD PTR[28+r8] + movd xmm0,DWORD PTR[28+r9] + movd xmm1,DWORD PTR[28+r10] + movd xmm2,DWORD PTR[28+r11] + punpckldq xmm5,xmm1 + punpckldq xmm0,xmm2 + punpckldq xmm5,xmm0 + movdqa xmm7,xmm13 + + movdqa xmm2,xmm13 +DB 102,15,56,0,238 + psrld xmm7,6 + movdqa xmm1,xmm13 + pslld xmm2,7 + movdqa XMMWORD PTR[(112-128)+rax],xmm5 + paddd xmm5,xmm8 + + psrld xmm1,11 + pxor xmm7,xmm2 + pslld xmm2,21-7 + paddd xmm5,XMMWORD PTR[96+rbp] + pxor xmm7,xmm1 + + psrld xmm1,25-11 + movdqa xmm0,xmm13 + + pxor xmm7,xmm2 + movdqa xmm4,xmm13 + pslld xmm2,26-21 + pandn xmm0,xmm15 + pand xmm4,xmm14 + pxor xmm7,xmm1 + + + movdqa xmm1,xmm9 + pxor xmm7,xmm2 + movdqa xmm2,xmm9 + psrld xmm1,2 + paddd xmm5,xmm7 + pxor xmm0,xmm4 + movdqa xmm4,xmm10 + movdqa xmm7,xmm9 + pslld xmm2,10 + pxor xmm4,xmm9 + + + psrld xmm7,13 + pxor xmm1,xmm2 + paddd xmm5,xmm0 + pslld xmm2,19-10 + pand xmm3,xmm4 + pxor xmm1,xmm7 + + + psrld xmm7,22-13 + pxor xmm1,xmm2 + movdqa xmm8,xmm10 + pslld xmm2,30-19 + pxor xmm7,xmm1 + pxor xmm8,xmm3 + paddd xmm12,xmm5 + pxor xmm7,xmm2 + + paddd xmm8,xmm5 + paddd xmm8,xmm7 + lea rbp,QWORD PTR[256+rbp] + movd xmm5,DWORD PTR[32+r8] + movd xmm0,DWORD PTR[32+r9] + movd xmm1,DWORD PTR[32+r10] + movd xmm2,DWORD PTR[32+r11] + punpckldq xmm5,xmm1 + punpckldq xmm0,xmm2 + punpckldq xmm5,xmm0 + movdqa xmm7,xmm12 +DB 102,15,56,0,238 + movdqa xmm2,xmm12 + + psrld xmm7,6 + movdqa xmm1,xmm12 + pslld xmm2,7 + movdqa XMMWORD PTR[(128-128)+rax],xmm5 + paddd xmm5,xmm15 + + psrld xmm1,11 + pxor xmm7,xmm2 + pslld xmm2,21-7 + paddd xmm5,XMMWORD PTR[((-128))+rbp] + pxor xmm7,xmm1 + + psrld xmm1,25-11 + movdqa xmm0,xmm12 + + pxor xmm7,xmm2 + movdqa xmm3,xmm12 + pslld xmm2,26-21 + pandn xmm0,xmm14 + pand xmm3,xmm13 + pxor xmm7,xmm1 + + + movdqa xmm1,xmm8 + pxor xmm7,xmm2 + movdqa xmm2,xmm8 + psrld xmm1,2 + paddd xmm5,xmm7 + pxor xmm0,xmm3 + movdqa xmm3,xmm9 + movdqa xmm7,xmm8 + pslld xmm2,10 + pxor xmm3,xmm8 + + + psrld xmm7,13 + pxor xmm1,xmm2 + paddd xmm5,xmm0 + pslld xmm2,19-10 + pand xmm4,xmm3 + pxor xmm1,xmm7 + + + psrld xmm7,22-13 + pxor xmm1,xmm2 + movdqa xmm15,xmm9 + pslld xmm2,30-19 + pxor xmm7,xmm1 + pxor xmm15,xmm4 + paddd xmm11,xmm5 + pxor xmm7,xmm2 + + paddd xmm15,xmm5 + paddd xmm15,xmm7 + movd xmm5,DWORD PTR[36+r8] + movd xmm0,DWORD PTR[36+r9] + movd xmm1,DWORD PTR[36+r10] + movd xmm2,DWORD PTR[36+r11] + punpckldq xmm5,xmm1 + punpckldq xmm0,xmm2 + punpckldq xmm5,xmm0 + movdqa xmm7,xmm11 + + movdqa xmm2,xmm11 +DB 102,15,56,0,238 + psrld xmm7,6 + movdqa xmm1,xmm11 + pslld xmm2,7 + movdqa XMMWORD PTR[(144-128)+rax],xmm5 + paddd xmm5,xmm14 + + psrld xmm1,11 + pxor xmm7,xmm2 + pslld xmm2,21-7 + paddd xmm5,XMMWORD PTR[((-96))+rbp] + pxor xmm7,xmm1 + + psrld xmm1,25-11 + movdqa xmm0,xmm11 + + pxor xmm7,xmm2 + movdqa xmm4,xmm11 + pslld xmm2,26-21 + pandn xmm0,xmm13 + pand xmm4,xmm12 + pxor xmm7,xmm1 + + + movdqa xmm1,xmm15 + pxor xmm7,xmm2 + movdqa xmm2,xmm15 + psrld xmm1,2 + paddd xmm5,xmm7 + pxor xmm0,xmm4 + movdqa xmm4,xmm8 + movdqa xmm7,xmm15 + pslld xmm2,10 + pxor xmm4,xmm15 + + + psrld xmm7,13 + pxor xmm1,xmm2 + paddd xmm5,xmm0 + pslld xmm2,19-10 + pand xmm3,xmm4 + pxor xmm1,xmm7 + + + psrld xmm7,22-13 + pxor xmm1,xmm2 + movdqa xmm14,xmm8 + pslld xmm2,30-19 + pxor xmm7,xmm1 + pxor xmm14,xmm3 + paddd xmm10,xmm5 + pxor xmm7,xmm2 + + paddd xmm14,xmm5 + paddd xmm14,xmm7 + movd xmm5,DWORD PTR[40+r8] + movd xmm0,DWORD PTR[40+r9] + movd xmm1,DWORD PTR[40+r10] + movd xmm2,DWORD PTR[40+r11] + punpckldq xmm5,xmm1 + punpckldq xmm0,xmm2 + punpckldq xmm5,xmm0 + movdqa xmm7,xmm10 +DB 102,15,56,0,238 + movdqa xmm2,xmm10 + + psrld xmm7,6 + movdqa xmm1,xmm10 + pslld xmm2,7 + movdqa XMMWORD PTR[(160-128)+rax],xmm5 + paddd xmm5,xmm13 + + psrld xmm1,11 + pxor xmm7,xmm2 + pslld xmm2,21-7 + paddd xmm5,XMMWORD PTR[((-64))+rbp] + pxor xmm7,xmm1 + + psrld xmm1,25-11 + movdqa xmm0,xmm10 + + pxor xmm7,xmm2 + movdqa xmm3,xmm10 + pslld xmm2,26-21 + pandn xmm0,xmm12 + pand xmm3,xmm11 + pxor xmm7,xmm1 + + + movdqa xmm1,xmm14 + pxor xmm7,xmm2 + movdqa xmm2,xmm14 + psrld xmm1,2 + paddd xmm5,xmm7 + pxor xmm0,xmm3 + movdqa xmm3,xmm15 + movdqa xmm7,xmm14 + pslld xmm2,10 + pxor xmm3,xmm14 + + + psrld xmm7,13 + pxor xmm1,xmm2 + paddd xmm5,xmm0 + pslld xmm2,19-10 + pand xmm4,xmm3 + pxor xmm1,xmm7 + + + psrld xmm7,22-13 + pxor xmm1,xmm2 + movdqa xmm13,xmm15 + pslld xmm2,30-19 + pxor xmm7,xmm1 + pxor xmm13,xmm4 + paddd xmm9,xmm5 + pxor xmm7,xmm2 + + paddd xmm13,xmm5 + paddd xmm13,xmm7 + movd xmm5,DWORD PTR[44+r8] + movd xmm0,DWORD PTR[44+r9] + movd xmm1,DWORD PTR[44+r10] + movd xmm2,DWORD PTR[44+r11] + punpckldq xmm5,xmm1 + punpckldq xmm0,xmm2 + punpckldq xmm5,xmm0 + movdqa xmm7,xmm9 + + movdqa xmm2,xmm9 +DB 102,15,56,0,238 + psrld xmm7,6 + movdqa xmm1,xmm9 + pslld xmm2,7 + movdqa XMMWORD PTR[(176-128)+rax],xmm5 + paddd xmm5,xmm12 + + psrld xmm1,11 + pxor xmm7,xmm2 + pslld xmm2,21-7 + paddd xmm5,XMMWORD PTR[((-32))+rbp] + pxor xmm7,xmm1 + + psrld xmm1,25-11 + movdqa xmm0,xmm9 + + pxor xmm7,xmm2 + movdqa xmm4,xmm9 + pslld xmm2,26-21 + pandn xmm0,xmm11 + pand xmm4,xmm10 + pxor xmm7,xmm1 + + + movdqa xmm1,xmm13 + pxor xmm7,xmm2 + movdqa xmm2,xmm13 + psrld xmm1,2 + paddd xmm5,xmm7 + pxor xmm0,xmm4 + movdqa xmm4,xmm14 + movdqa xmm7,xmm13 + pslld xmm2,10 + pxor xmm4,xmm13 + + + psrld xmm7,13 + pxor xmm1,xmm2 + paddd xmm5,xmm0 + pslld xmm2,19-10 + pand xmm3,xmm4 + pxor xmm1,xmm7 + + + psrld xmm7,22-13 + pxor xmm1,xmm2 + movdqa xmm12,xmm14 + pslld xmm2,30-19 + pxor xmm7,xmm1 + pxor xmm12,xmm3 + paddd xmm8,xmm5 + pxor xmm7,xmm2 + + paddd xmm12,xmm5 + paddd xmm12,xmm7 + movd xmm5,DWORD PTR[48+r8] + movd xmm0,DWORD PTR[48+r9] + movd xmm1,DWORD PTR[48+r10] + movd xmm2,DWORD PTR[48+r11] + punpckldq xmm5,xmm1 + punpckldq xmm0,xmm2 + punpckldq xmm5,xmm0 + movdqa xmm7,xmm8 +DB 102,15,56,0,238 + movdqa xmm2,xmm8 + + psrld xmm7,6 + movdqa xmm1,xmm8 + pslld xmm2,7 + movdqa XMMWORD PTR[(192-128)+rax],xmm5 + paddd xmm5,xmm11 + + psrld xmm1,11 + pxor xmm7,xmm2 + pslld xmm2,21-7 + paddd xmm5,XMMWORD PTR[rbp] + pxor xmm7,xmm1 + + psrld xmm1,25-11 + movdqa xmm0,xmm8 + + pxor xmm7,xmm2 + movdqa xmm3,xmm8 + pslld xmm2,26-21 + pandn xmm0,xmm10 + pand xmm3,xmm9 + pxor xmm7,xmm1 + + + movdqa xmm1,xmm12 + pxor xmm7,xmm2 + movdqa xmm2,xmm12 + psrld xmm1,2 + paddd xmm5,xmm7 + pxor xmm0,xmm3 + movdqa xmm3,xmm13 + movdqa xmm7,xmm12 + pslld xmm2,10 + pxor xmm3,xmm12 + + + psrld xmm7,13 + pxor xmm1,xmm2 + paddd xmm5,xmm0 + pslld xmm2,19-10 + pand xmm4,xmm3 + pxor xmm1,xmm7 + + + psrld xmm7,22-13 + pxor xmm1,xmm2 + movdqa xmm11,xmm13 + pslld xmm2,30-19 + pxor xmm7,xmm1 + pxor xmm11,xmm4 + paddd xmm15,xmm5 + pxor xmm7,xmm2 + + paddd xmm11,xmm5 + paddd xmm11,xmm7 + movd xmm5,DWORD PTR[52+r8] + movd xmm0,DWORD PTR[52+r9] + movd xmm1,DWORD PTR[52+r10] + movd xmm2,DWORD PTR[52+r11] + punpckldq xmm5,xmm1 + punpckldq xmm0,xmm2 + punpckldq xmm5,xmm0 + movdqa xmm7,xmm15 + + movdqa xmm2,xmm15 +DB 102,15,56,0,238 + psrld xmm7,6 + movdqa xmm1,xmm15 + pslld xmm2,7 + movdqa XMMWORD PTR[(208-128)+rax],xmm5 + paddd xmm5,xmm10 + + psrld xmm1,11 + pxor xmm7,xmm2 + pslld xmm2,21-7 + paddd xmm5,XMMWORD PTR[32+rbp] + pxor xmm7,xmm1 + + psrld xmm1,25-11 + movdqa xmm0,xmm15 + + pxor xmm7,xmm2 + movdqa xmm4,xmm15 + pslld xmm2,26-21 + pandn xmm0,xmm9 + pand xmm4,xmm8 + pxor xmm7,xmm1 + + + movdqa xmm1,xmm11 + pxor xmm7,xmm2 + movdqa xmm2,xmm11 + psrld xmm1,2 + paddd xmm5,xmm7 + pxor xmm0,xmm4 + movdqa xmm4,xmm12 + movdqa xmm7,xmm11 + pslld xmm2,10 + pxor xmm4,xmm11 + + + psrld xmm7,13 + pxor xmm1,xmm2 + paddd xmm5,xmm0 + pslld xmm2,19-10 + pand xmm3,xmm4 + pxor xmm1,xmm7 + + + psrld xmm7,22-13 + pxor xmm1,xmm2 + movdqa xmm10,xmm12 + pslld xmm2,30-19 + pxor xmm7,xmm1 + pxor xmm10,xmm3 + paddd xmm14,xmm5 + pxor xmm7,xmm2 + + paddd xmm10,xmm5 + paddd xmm10,xmm7 + movd xmm5,DWORD PTR[56+r8] + movd xmm0,DWORD PTR[56+r9] + movd xmm1,DWORD PTR[56+r10] + movd xmm2,DWORD PTR[56+r11] + punpckldq xmm5,xmm1 + punpckldq xmm0,xmm2 + punpckldq xmm5,xmm0 + movdqa xmm7,xmm14 +DB 102,15,56,0,238 + movdqa xmm2,xmm14 + + psrld xmm7,6 + movdqa xmm1,xmm14 + pslld xmm2,7 + movdqa XMMWORD PTR[(224-128)+rax],xmm5 + paddd xmm5,xmm9 + + psrld xmm1,11 + pxor xmm7,xmm2 + pslld xmm2,21-7 + paddd xmm5,XMMWORD PTR[64+rbp] + pxor xmm7,xmm1 + + psrld xmm1,25-11 + movdqa xmm0,xmm14 + + pxor xmm7,xmm2 + movdqa xmm3,xmm14 + pslld xmm2,26-21 + pandn xmm0,xmm8 + pand xmm3,xmm15 + pxor xmm7,xmm1 + + + movdqa xmm1,xmm10 + pxor xmm7,xmm2 + movdqa xmm2,xmm10 + psrld xmm1,2 + paddd xmm5,xmm7 + pxor xmm0,xmm3 + movdqa xmm3,xmm11 + movdqa xmm7,xmm10 + pslld xmm2,10 + pxor xmm3,xmm10 + + + psrld xmm7,13 + pxor xmm1,xmm2 + paddd xmm5,xmm0 + pslld xmm2,19-10 + pand xmm4,xmm3 + pxor xmm1,xmm7 + + + psrld xmm7,22-13 + pxor xmm1,xmm2 + movdqa xmm9,xmm11 + pslld xmm2,30-19 + pxor xmm7,xmm1 + pxor xmm9,xmm4 + paddd xmm13,xmm5 + pxor xmm7,xmm2 + + paddd xmm9,xmm5 + paddd xmm9,xmm7 + movd xmm5,DWORD PTR[60+r8] + lea r8,QWORD PTR[64+r8] + movd xmm0,DWORD PTR[60+r9] + lea r9,QWORD PTR[64+r9] + movd xmm1,DWORD PTR[60+r10] + lea r10,QWORD PTR[64+r10] + movd xmm2,DWORD PTR[60+r11] + lea r11,QWORD PTR[64+r11] + punpckldq xmm5,xmm1 + punpckldq xmm0,xmm2 + punpckldq xmm5,xmm0 + movdqa xmm7,xmm13 + + movdqa xmm2,xmm13 +DB 102,15,56,0,238 + psrld xmm7,6 + movdqa xmm1,xmm13 + pslld xmm2,7 + movdqa XMMWORD PTR[(240-128)+rax],xmm5 + paddd xmm5,xmm8 + + psrld xmm1,11 + pxor xmm7,xmm2 + pslld xmm2,21-7 + paddd xmm5,XMMWORD PTR[96+rbp] + pxor xmm7,xmm1 + + psrld xmm1,25-11 + movdqa xmm0,xmm13 + prefetcht0 [63+r8] + pxor xmm7,xmm2 + movdqa xmm4,xmm13 + pslld xmm2,26-21 + pandn xmm0,xmm15 + pand xmm4,xmm14 + pxor xmm7,xmm1 + + prefetcht0 [63+r9] + movdqa xmm1,xmm9 + pxor xmm7,xmm2 + movdqa xmm2,xmm9 + psrld xmm1,2 + paddd xmm5,xmm7 + pxor xmm0,xmm4 + movdqa xmm4,xmm10 + movdqa xmm7,xmm9 + pslld xmm2,10 + pxor xmm4,xmm9 + + prefetcht0 [63+r10] + psrld xmm7,13 + pxor xmm1,xmm2 + paddd xmm5,xmm0 + pslld xmm2,19-10 + pand xmm3,xmm4 + pxor xmm1,xmm7 + + prefetcht0 [63+r11] + psrld xmm7,22-13 + pxor xmm1,xmm2 + movdqa xmm8,xmm10 + pslld xmm2,30-19 + pxor xmm7,xmm1 + pxor xmm8,xmm3 + paddd xmm12,xmm5 + pxor xmm7,xmm2 + + paddd xmm8,xmm5 + paddd xmm8,xmm7 + lea rbp,QWORD PTR[256+rbp] + movdqu xmm5,XMMWORD PTR[((0-128))+rax] + mov ecx,3 + jmp $L$oop_16_xx +ALIGN 32 +$L$oop_16_xx:: + movdqa xmm6,XMMWORD PTR[((16-128))+rax] + paddd xmm5,XMMWORD PTR[((144-128))+rax] + + movdqa xmm7,xmm6 + movdqa xmm1,xmm6 + psrld xmm7,3 + movdqa xmm2,xmm6 + + psrld xmm1,7 + movdqa xmm0,XMMWORD PTR[((224-128))+rax] + pslld xmm2,14 + pxor xmm7,xmm1 + psrld xmm1,18-7 + movdqa xmm3,xmm0 + pxor xmm7,xmm2 + pslld xmm2,25-14 + pxor xmm7,xmm1 + psrld xmm0,10 + movdqa xmm1,xmm3 + + psrld xmm3,17 + pxor xmm7,xmm2 + pslld xmm1,13 + paddd xmm5,xmm7 + pxor xmm0,xmm3 + psrld xmm3,19-17 + pxor xmm0,xmm1 + pslld xmm1,15-13 + pxor xmm0,xmm3 + pxor xmm0,xmm1 + paddd xmm5,xmm0 + movdqa xmm7,xmm12 + + movdqa xmm2,xmm12 + + psrld xmm7,6 + movdqa xmm1,xmm12 + pslld xmm2,7 + movdqa XMMWORD PTR[(0-128)+rax],xmm5 + paddd xmm5,xmm15 + + psrld xmm1,11 + pxor xmm7,xmm2 + pslld xmm2,21-7 + paddd xmm5,XMMWORD PTR[((-128))+rbp] + pxor xmm7,xmm1 + + psrld xmm1,25-11 + movdqa xmm0,xmm12 + + pxor xmm7,xmm2 + movdqa xmm3,xmm12 + pslld xmm2,26-21 + pandn xmm0,xmm14 + pand xmm3,xmm13 + pxor xmm7,xmm1 + + + movdqa xmm1,xmm8 + pxor xmm7,xmm2 + movdqa xmm2,xmm8 + psrld xmm1,2 + paddd xmm5,xmm7 + pxor xmm0,xmm3 + movdqa xmm3,xmm9 + movdqa xmm7,xmm8 + pslld xmm2,10 + pxor xmm3,xmm8 + + + psrld xmm7,13 + pxor xmm1,xmm2 + paddd xmm5,xmm0 + pslld xmm2,19-10 + pand xmm4,xmm3 + pxor xmm1,xmm7 + + + psrld xmm7,22-13 + pxor xmm1,xmm2 + movdqa xmm15,xmm9 + pslld xmm2,30-19 + pxor xmm7,xmm1 + pxor xmm15,xmm4 + paddd xmm11,xmm5 + pxor xmm7,xmm2 + + paddd xmm15,xmm5 + paddd xmm15,xmm7 + movdqa xmm5,XMMWORD PTR[((32-128))+rax] + paddd xmm6,XMMWORD PTR[((160-128))+rax] + + movdqa xmm7,xmm5 + movdqa xmm1,xmm5 + psrld xmm7,3 + movdqa xmm2,xmm5 + + psrld xmm1,7 + movdqa xmm0,XMMWORD PTR[((240-128))+rax] + pslld xmm2,14 + pxor xmm7,xmm1 + psrld xmm1,18-7 + movdqa xmm4,xmm0 + pxor xmm7,xmm2 + pslld xmm2,25-14 + pxor xmm7,xmm1 + psrld xmm0,10 + movdqa xmm1,xmm4 + + psrld xmm4,17 + pxor xmm7,xmm2 + pslld xmm1,13 + paddd xmm6,xmm7 + pxor xmm0,xmm4 + psrld xmm4,19-17 + pxor xmm0,xmm1 + pslld xmm1,15-13 + pxor xmm0,xmm4 + pxor xmm0,xmm1 + paddd xmm6,xmm0 + movdqa xmm7,xmm11 + + movdqa xmm2,xmm11 + + psrld xmm7,6 + movdqa xmm1,xmm11 + pslld xmm2,7 + movdqa XMMWORD PTR[(16-128)+rax],xmm6 + paddd xmm6,xmm14 + + psrld xmm1,11 + pxor xmm7,xmm2 + pslld xmm2,21-7 + paddd xmm6,XMMWORD PTR[((-96))+rbp] + pxor xmm7,xmm1 + + psrld xmm1,25-11 + movdqa xmm0,xmm11 + + pxor xmm7,xmm2 + movdqa xmm4,xmm11 + pslld xmm2,26-21 + pandn xmm0,xmm13 + pand xmm4,xmm12 + pxor xmm7,xmm1 + + + movdqa xmm1,xmm15 + pxor xmm7,xmm2 + movdqa xmm2,xmm15 + psrld xmm1,2 + paddd xmm6,xmm7 + pxor xmm0,xmm4 + movdqa xmm4,xmm8 + movdqa xmm7,xmm15 + pslld xmm2,10 + pxor xmm4,xmm15 + + + psrld xmm7,13 + pxor xmm1,xmm2 + paddd xmm6,xmm0 + pslld xmm2,19-10 + pand xmm3,xmm4 + pxor xmm1,xmm7 + + + psrld xmm7,22-13 + pxor xmm1,xmm2 + movdqa xmm14,xmm8 + pslld xmm2,30-19 + pxor xmm7,xmm1 + pxor xmm14,xmm3 + paddd xmm10,xmm6 + pxor xmm7,xmm2 + + paddd xmm14,xmm6 + paddd xmm14,xmm7 + movdqa xmm6,XMMWORD PTR[((48-128))+rax] + paddd xmm5,XMMWORD PTR[((176-128))+rax] + + movdqa xmm7,xmm6 + movdqa xmm1,xmm6 + psrld xmm7,3 + movdqa xmm2,xmm6 + + psrld xmm1,7 + movdqa xmm0,XMMWORD PTR[((0-128))+rax] + pslld xmm2,14 + pxor xmm7,xmm1 + psrld xmm1,18-7 + movdqa xmm3,xmm0 + pxor xmm7,xmm2 + pslld xmm2,25-14 + pxor xmm7,xmm1 + psrld xmm0,10 + movdqa xmm1,xmm3 + + psrld xmm3,17 + pxor xmm7,xmm2 + pslld xmm1,13 + paddd xmm5,xmm7 + pxor xmm0,xmm3 + psrld xmm3,19-17 + pxor xmm0,xmm1 + pslld xmm1,15-13 + pxor xmm0,xmm3 + pxor xmm0,xmm1 + paddd xmm5,xmm0 + movdqa xmm7,xmm10 + + movdqa xmm2,xmm10 + + psrld xmm7,6 + movdqa xmm1,xmm10 + pslld xmm2,7 + movdqa XMMWORD PTR[(32-128)+rax],xmm5 + paddd xmm5,xmm13 + + psrld xmm1,11 + pxor xmm7,xmm2 + pslld xmm2,21-7 + paddd xmm5,XMMWORD PTR[((-64))+rbp] + pxor xmm7,xmm1 + + psrld xmm1,25-11 + movdqa xmm0,xmm10 + + pxor xmm7,xmm2 + movdqa xmm3,xmm10 + pslld xmm2,26-21 + pandn xmm0,xmm12 + pand xmm3,xmm11 + pxor xmm7,xmm1 + + + movdqa xmm1,xmm14 + pxor xmm7,xmm2 + movdqa xmm2,xmm14 + psrld xmm1,2 + paddd xmm5,xmm7 + pxor xmm0,xmm3 + movdqa xmm3,xmm15 + movdqa xmm7,xmm14 + pslld xmm2,10 + pxor xmm3,xmm14 + + + psrld xmm7,13 + pxor xmm1,xmm2 + paddd xmm5,xmm0 + pslld xmm2,19-10 + pand xmm4,xmm3 + pxor xmm1,xmm7 + + + psrld xmm7,22-13 + pxor xmm1,xmm2 + movdqa xmm13,xmm15 + pslld xmm2,30-19 + pxor xmm7,xmm1 + pxor xmm13,xmm4 + paddd xmm9,xmm5 + pxor xmm7,xmm2 + + paddd xmm13,xmm5 + paddd xmm13,xmm7 + movdqa xmm5,XMMWORD PTR[((64-128))+rax] + paddd xmm6,XMMWORD PTR[((192-128))+rax] + + movdqa xmm7,xmm5 + movdqa xmm1,xmm5 + psrld xmm7,3 + movdqa xmm2,xmm5 + + psrld xmm1,7 + movdqa xmm0,XMMWORD PTR[((16-128))+rax] + pslld xmm2,14 + pxor xmm7,xmm1 + psrld xmm1,18-7 + movdqa xmm4,xmm0 + pxor xmm7,xmm2 + pslld xmm2,25-14 + pxor xmm7,xmm1 + psrld xmm0,10 + movdqa xmm1,xmm4 + + psrld xmm4,17 + pxor xmm7,xmm2 + pslld xmm1,13 + paddd xmm6,xmm7 + pxor xmm0,xmm4 + psrld xmm4,19-17 + pxor xmm0,xmm1 + pslld xmm1,15-13 + pxor xmm0,xmm4 + pxor xmm0,xmm1 + paddd xmm6,xmm0 + movdqa xmm7,xmm9 + + movdqa xmm2,xmm9 + + psrld xmm7,6 + movdqa xmm1,xmm9 + pslld xmm2,7 + movdqa XMMWORD PTR[(48-128)+rax],xmm6 + paddd xmm6,xmm12 + + psrld xmm1,11 + pxor xmm7,xmm2 + pslld xmm2,21-7 + paddd xmm6,XMMWORD PTR[((-32))+rbp] + pxor xmm7,xmm1 + + psrld xmm1,25-11 + movdqa xmm0,xmm9 + + pxor xmm7,xmm2 + movdqa xmm4,xmm9 + pslld xmm2,26-21 + pandn xmm0,xmm11 + pand xmm4,xmm10 + pxor xmm7,xmm1 + + + movdqa xmm1,xmm13 + pxor xmm7,xmm2 + movdqa xmm2,xmm13 + psrld xmm1,2 + paddd xmm6,xmm7 + pxor xmm0,xmm4 + movdqa xmm4,xmm14 + movdqa xmm7,xmm13 + pslld xmm2,10 + pxor xmm4,xmm13 + + + psrld xmm7,13 + pxor xmm1,xmm2 + paddd xmm6,xmm0 + pslld xmm2,19-10 + pand xmm3,xmm4 + pxor xmm1,xmm7 + + + psrld xmm7,22-13 + pxor xmm1,xmm2 + movdqa xmm12,xmm14 + pslld xmm2,30-19 + pxor xmm7,xmm1 + pxor xmm12,xmm3 + paddd xmm8,xmm6 + pxor xmm7,xmm2 + + paddd xmm12,xmm6 + paddd xmm12,xmm7 + movdqa xmm6,XMMWORD PTR[((80-128))+rax] + paddd xmm5,XMMWORD PTR[((208-128))+rax] + + movdqa xmm7,xmm6 + movdqa xmm1,xmm6 + psrld xmm7,3 + movdqa xmm2,xmm6 + + psrld xmm1,7 + movdqa xmm0,XMMWORD PTR[((32-128))+rax] + pslld xmm2,14 + pxor xmm7,xmm1 + psrld xmm1,18-7 + movdqa xmm3,xmm0 + pxor xmm7,xmm2 + pslld xmm2,25-14 + pxor xmm7,xmm1 + psrld xmm0,10 + movdqa xmm1,xmm3 + + psrld xmm3,17 + pxor xmm7,xmm2 + pslld xmm1,13 + paddd xmm5,xmm7 + pxor xmm0,xmm3 + psrld xmm3,19-17 + pxor xmm0,xmm1 + pslld xmm1,15-13 + pxor xmm0,xmm3 + pxor xmm0,xmm1 + paddd xmm5,xmm0 + movdqa xmm7,xmm8 + + movdqa xmm2,xmm8 + + psrld xmm7,6 + movdqa xmm1,xmm8 + pslld xmm2,7 + movdqa XMMWORD PTR[(64-128)+rax],xmm5 + paddd xmm5,xmm11 + + psrld xmm1,11 + pxor xmm7,xmm2 + pslld xmm2,21-7 + paddd xmm5,XMMWORD PTR[rbp] + pxor xmm7,xmm1 + + psrld xmm1,25-11 + movdqa xmm0,xmm8 + + pxor xmm7,xmm2 + movdqa xmm3,xmm8 + pslld xmm2,26-21 + pandn xmm0,xmm10 + pand xmm3,xmm9 + pxor xmm7,xmm1 + + + movdqa xmm1,xmm12 + pxor xmm7,xmm2 + movdqa xmm2,xmm12 + psrld xmm1,2 + paddd xmm5,xmm7 + pxor xmm0,xmm3 + movdqa xmm3,xmm13 + movdqa xmm7,xmm12 + pslld xmm2,10 + pxor xmm3,xmm12 + + + psrld xmm7,13 + pxor xmm1,xmm2 + paddd xmm5,xmm0 + pslld xmm2,19-10 + pand xmm4,xmm3 + pxor xmm1,xmm7 + + + psrld xmm7,22-13 + pxor xmm1,xmm2 + movdqa xmm11,xmm13 + pslld xmm2,30-19 + pxor xmm7,xmm1 + pxor xmm11,xmm4 + paddd xmm15,xmm5 + pxor xmm7,xmm2 + + paddd xmm11,xmm5 + paddd xmm11,xmm7 + movdqa xmm5,XMMWORD PTR[((96-128))+rax] + paddd xmm6,XMMWORD PTR[((224-128))+rax] + + movdqa xmm7,xmm5 + movdqa xmm1,xmm5 + psrld xmm7,3 + movdqa xmm2,xmm5 + + psrld xmm1,7 + movdqa xmm0,XMMWORD PTR[((48-128))+rax] + pslld xmm2,14 + pxor xmm7,xmm1 + psrld xmm1,18-7 + movdqa xmm4,xmm0 + pxor xmm7,xmm2 + pslld xmm2,25-14 + pxor xmm7,xmm1 + psrld xmm0,10 + movdqa xmm1,xmm4 + + psrld xmm4,17 + pxor xmm7,xmm2 + pslld xmm1,13 + paddd xmm6,xmm7 + pxor xmm0,xmm4 + psrld xmm4,19-17 + pxor xmm0,xmm1 + pslld xmm1,15-13 + pxor xmm0,xmm4 + pxor xmm0,xmm1 + paddd xmm6,xmm0 + movdqa xmm7,xmm15 + + movdqa xmm2,xmm15 + + psrld xmm7,6 + movdqa xmm1,xmm15 + pslld xmm2,7 + movdqa XMMWORD PTR[(80-128)+rax],xmm6 + paddd xmm6,xmm10 + + psrld xmm1,11 + pxor xmm7,xmm2 + pslld xmm2,21-7 + paddd xmm6,XMMWORD PTR[32+rbp] + pxor xmm7,xmm1 + + psrld xmm1,25-11 + movdqa xmm0,xmm15 + + pxor xmm7,xmm2 + movdqa xmm4,xmm15 + pslld xmm2,26-21 + pandn xmm0,xmm9 + pand xmm4,xmm8 + pxor xmm7,xmm1 + + + movdqa xmm1,xmm11 + pxor xmm7,xmm2 + movdqa xmm2,xmm11 + psrld xmm1,2 + paddd xmm6,xmm7 + pxor xmm0,xmm4 + movdqa xmm4,xmm12 + movdqa xmm7,xmm11 + pslld xmm2,10 + pxor xmm4,xmm11 + + + psrld xmm7,13 + pxor xmm1,xmm2 + paddd xmm6,xmm0 + pslld xmm2,19-10 + pand xmm3,xmm4 + pxor xmm1,xmm7 + + + psrld xmm7,22-13 + pxor xmm1,xmm2 + movdqa xmm10,xmm12 + pslld xmm2,30-19 + pxor xmm7,xmm1 + pxor xmm10,xmm3 + paddd xmm14,xmm6 + pxor xmm7,xmm2 + + paddd xmm10,xmm6 + paddd xmm10,xmm7 + movdqa xmm6,XMMWORD PTR[((112-128))+rax] + paddd xmm5,XMMWORD PTR[((240-128))+rax] + + movdqa xmm7,xmm6 + movdqa xmm1,xmm6 + psrld xmm7,3 + movdqa xmm2,xmm6 + + psrld xmm1,7 + movdqa xmm0,XMMWORD PTR[((64-128))+rax] + pslld xmm2,14 + pxor xmm7,xmm1 + psrld xmm1,18-7 + movdqa xmm3,xmm0 + pxor xmm7,xmm2 + pslld xmm2,25-14 + pxor xmm7,xmm1 + psrld xmm0,10 + movdqa xmm1,xmm3 + + psrld xmm3,17 + pxor xmm7,xmm2 + pslld xmm1,13 + paddd xmm5,xmm7 + pxor xmm0,xmm3 + psrld xmm3,19-17 + pxor xmm0,xmm1 + pslld xmm1,15-13 + pxor xmm0,xmm3 + pxor xmm0,xmm1 + paddd xmm5,xmm0 + movdqa xmm7,xmm14 + + movdqa xmm2,xmm14 + + psrld xmm7,6 + movdqa xmm1,xmm14 + pslld xmm2,7 + movdqa XMMWORD PTR[(96-128)+rax],xmm5 + paddd xmm5,xmm9 + + psrld xmm1,11 + pxor xmm7,xmm2 + pslld xmm2,21-7 + paddd xmm5,XMMWORD PTR[64+rbp] + pxor xmm7,xmm1 + + psrld xmm1,25-11 + movdqa xmm0,xmm14 + + pxor xmm7,xmm2 + movdqa xmm3,xmm14 + pslld xmm2,26-21 + pandn xmm0,xmm8 + pand xmm3,xmm15 + pxor xmm7,xmm1 + + + movdqa xmm1,xmm10 + pxor xmm7,xmm2 + movdqa xmm2,xmm10 + psrld xmm1,2 + paddd xmm5,xmm7 + pxor xmm0,xmm3 + movdqa xmm3,xmm11 + movdqa xmm7,xmm10 + pslld xmm2,10 + pxor xmm3,xmm10 + + + psrld xmm7,13 + pxor xmm1,xmm2 + paddd xmm5,xmm0 + pslld xmm2,19-10 + pand xmm4,xmm3 + pxor xmm1,xmm7 + + + psrld xmm7,22-13 + pxor xmm1,xmm2 + movdqa xmm9,xmm11 + pslld xmm2,30-19 + pxor xmm7,xmm1 + pxor xmm9,xmm4 + paddd xmm13,xmm5 + pxor xmm7,xmm2 + + paddd xmm9,xmm5 + paddd xmm9,xmm7 + movdqa xmm5,XMMWORD PTR[((128-128))+rax] + paddd xmm6,XMMWORD PTR[((0-128))+rax] + + movdqa xmm7,xmm5 + movdqa xmm1,xmm5 + psrld xmm7,3 + movdqa xmm2,xmm5 + + psrld xmm1,7 + movdqa xmm0,XMMWORD PTR[((80-128))+rax] + pslld xmm2,14 + pxor xmm7,xmm1 + psrld xmm1,18-7 + movdqa xmm4,xmm0 + pxor xmm7,xmm2 + pslld xmm2,25-14 + pxor xmm7,xmm1 + psrld xmm0,10 + movdqa xmm1,xmm4 + + psrld xmm4,17 + pxor xmm7,xmm2 + pslld xmm1,13 + paddd xmm6,xmm7 + pxor xmm0,xmm4 + psrld xmm4,19-17 + pxor xmm0,xmm1 + pslld xmm1,15-13 + pxor xmm0,xmm4 + pxor xmm0,xmm1 + paddd xmm6,xmm0 + movdqa xmm7,xmm13 + + movdqa xmm2,xmm13 + + psrld xmm7,6 + movdqa xmm1,xmm13 + pslld xmm2,7 + movdqa XMMWORD PTR[(112-128)+rax],xmm6 + paddd xmm6,xmm8 + + psrld xmm1,11 + pxor xmm7,xmm2 + pslld xmm2,21-7 + paddd xmm6,XMMWORD PTR[96+rbp] + pxor xmm7,xmm1 + + psrld xmm1,25-11 + movdqa xmm0,xmm13 + + pxor xmm7,xmm2 + movdqa xmm4,xmm13 + pslld xmm2,26-21 + pandn xmm0,xmm15 + pand xmm4,xmm14 + pxor xmm7,xmm1 + + + movdqa xmm1,xmm9 + pxor xmm7,xmm2 + movdqa xmm2,xmm9 + psrld xmm1,2 + paddd xmm6,xmm7 + pxor xmm0,xmm4 + movdqa xmm4,xmm10 + movdqa xmm7,xmm9 + pslld xmm2,10 + pxor xmm4,xmm9 + + + psrld xmm7,13 + pxor xmm1,xmm2 + paddd xmm6,xmm0 + pslld xmm2,19-10 + pand xmm3,xmm4 + pxor xmm1,xmm7 + + + psrld xmm7,22-13 + pxor xmm1,xmm2 + movdqa xmm8,xmm10 + pslld xmm2,30-19 + pxor xmm7,xmm1 + pxor xmm8,xmm3 + paddd xmm12,xmm6 + pxor xmm7,xmm2 + + paddd xmm8,xmm6 + paddd xmm8,xmm7 + lea rbp,QWORD PTR[256+rbp] + movdqa xmm6,XMMWORD PTR[((144-128))+rax] + paddd xmm5,XMMWORD PTR[((16-128))+rax] + + movdqa xmm7,xmm6 + movdqa xmm1,xmm6 + psrld xmm7,3 + movdqa xmm2,xmm6 + + psrld xmm1,7 + movdqa xmm0,XMMWORD PTR[((96-128))+rax] + pslld xmm2,14 + pxor xmm7,xmm1 + psrld xmm1,18-7 + movdqa xmm3,xmm0 + pxor xmm7,xmm2 + pslld xmm2,25-14 + pxor xmm7,xmm1 + psrld xmm0,10 + movdqa xmm1,xmm3 + + psrld xmm3,17 + pxor xmm7,xmm2 + pslld xmm1,13 + paddd xmm5,xmm7 + pxor xmm0,xmm3 + psrld xmm3,19-17 + pxor xmm0,xmm1 + pslld xmm1,15-13 + pxor xmm0,xmm3 + pxor xmm0,xmm1 + paddd xmm5,xmm0 + movdqa xmm7,xmm12 + + movdqa xmm2,xmm12 + + psrld xmm7,6 + movdqa xmm1,xmm12 + pslld xmm2,7 + movdqa XMMWORD PTR[(128-128)+rax],xmm5 + paddd xmm5,xmm15 + + psrld xmm1,11 + pxor xmm7,xmm2 + pslld xmm2,21-7 + paddd xmm5,XMMWORD PTR[((-128))+rbp] + pxor xmm7,xmm1 + + psrld xmm1,25-11 + movdqa xmm0,xmm12 + + pxor xmm7,xmm2 + movdqa xmm3,xmm12 + pslld xmm2,26-21 + pandn xmm0,xmm14 + pand xmm3,xmm13 + pxor xmm7,xmm1 + + + movdqa xmm1,xmm8 + pxor xmm7,xmm2 + movdqa xmm2,xmm8 + psrld xmm1,2 + paddd xmm5,xmm7 + pxor xmm0,xmm3 + movdqa xmm3,xmm9 + movdqa xmm7,xmm8 + pslld xmm2,10 + pxor xmm3,xmm8 + + + psrld xmm7,13 + pxor xmm1,xmm2 + paddd xmm5,xmm0 + pslld xmm2,19-10 + pand xmm4,xmm3 + pxor xmm1,xmm7 + + + psrld xmm7,22-13 + pxor xmm1,xmm2 + movdqa xmm15,xmm9 + pslld xmm2,30-19 + pxor xmm7,xmm1 + pxor xmm15,xmm4 + paddd xmm11,xmm5 + pxor xmm7,xmm2 + + paddd xmm15,xmm5 + paddd xmm15,xmm7 + movdqa xmm5,XMMWORD PTR[((160-128))+rax] + paddd xmm6,XMMWORD PTR[((32-128))+rax] + + movdqa xmm7,xmm5 + movdqa xmm1,xmm5 + psrld xmm7,3 + movdqa xmm2,xmm5 + + psrld xmm1,7 + movdqa xmm0,XMMWORD PTR[((112-128))+rax] + pslld xmm2,14 + pxor xmm7,xmm1 + psrld xmm1,18-7 + movdqa xmm4,xmm0 + pxor xmm7,xmm2 + pslld xmm2,25-14 + pxor xmm7,xmm1 + psrld xmm0,10 + movdqa xmm1,xmm4 + + psrld xmm4,17 + pxor xmm7,xmm2 + pslld xmm1,13 + paddd xmm6,xmm7 + pxor xmm0,xmm4 + psrld xmm4,19-17 + pxor xmm0,xmm1 + pslld xmm1,15-13 + pxor xmm0,xmm4 + pxor xmm0,xmm1 + paddd xmm6,xmm0 + movdqa xmm7,xmm11 + + movdqa xmm2,xmm11 + + psrld xmm7,6 + movdqa xmm1,xmm11 + pslld xmm2,7 + movdqa XMMWORD PTR[(144-128)+rax],xmm6 + paddd xmm6,xmm14 + + psrld xmm1,11 + pxor xmm7,xmm2 + pslld xmm2,21-7 + paddd xmm6,XMMWORD PTR[((-96))+rbp] + pxor xmm7,xmm1 + + psrld xmm1,25-11 + movdqa xmm0,xmm11 + + pxor xmm7,xmm2 + movdqa xmm4,xmm11 + pslld xmm2,26-21 + pandn xmm0,xmm13 + pand xmm4,xmm12 + pxor xmm7,xmm1 + + + movdqa xmm1,xmm15 + pxor xmm7,xmm2 + movdqa xmm2,xmm15 + psrld xmm1,2 + paddd xmm6,xmm7 + pxor xmm0,xmm4 + movdqa xmm4,xmm8 + movdqa xmm7,xmm15 + pslld xmm2,10 + pxor xmm4,xmm15 + + + psrld xmm7,13 + pxor xmm1,xmm2 + paddd xmm6,xmm0 + pslld xmm2,19-10 + pand xmm3,xmm4 + pxor xmm1,xmm7 + + + psrld xmm7,22-13 + pxor xmm1,xmm2 + movdqa xmm14,xmm8 + pslld xmm2,30-19 + pxor xmm7,xmm1 + pxor xmm14,xmm3 + paddd xmm10,xmm6 + pxor xmm7,xmm2 + + paddd xmm14,xmm6 + paddd xmm14,xmm7 + movdqa xmm6,XMMWORD PTR[((176-128))+rax] + paddd xmm5,XMMWORD PTR[((48-128))+rax] + + movdqa xmm7,xmm6 + movdqa xmm1,xmm6 + psrld xmm7,3 + movdqa xmm2,xmm6 + + psrld xmm1,7 + movdqa xmm0,XMMWORD PTR[((128-128))+rax] + pslld xmm2,14 + pxor xmm7,xmm1 + psrld xmm1,18-7 + movdqa xmm3,xmm0 + pxor xmm7,xmm2 + pslld xmm2,25-14 + pxor xmm7,xmm1 + psrld xmm0,10 + movdqa xmm1,xmm3 + + psrld xmm3,17 + pxor xmm7,xmm2 + pslld xmm1,13 + paddd xmm5,xmm7 + pxor xmm0,xmm3 + psrld xmm3,19-17 + pxor xmm0,xmm1 + pslld xmm1,15-13 + pxor xmm0,xmm3 + pxor xmm0,xmm1 + paddd xmm5,xmm0 + movdqa xmm7,xmm10 + + movdqa xmm2,xmm10 + + psrld xmm7,6 + movdqa xmm1,xmm10 + pslld xmm2,7 + movdqa XMMWORD PTR[(160-128)+rax],xmm5 + paddd xmm5,xmm13 + + psrld xmm1,11 + pxor xmm7,xmm2 + pslld xmm2,21-7 + paddd xmm5,XMMWORD PTR[((-64))+rbp] + pxor xmm7,xmm1 + + psrld xmm1,25-11 + movdqa xmm0,xmm10 + + pxor xmm7,xmm2 + movdqa xmm3,xmm10 + pslld xmm2,26-21 + pandn xmm0,xmm12 + pand xmm3,xmm11 + pxor xmm7,xmm1 + + + movdqa xmm1,xmm14 + pxor xmm7,xmm2 + movdqa xmm2,xmm14 + psrld xmm1,2 + paddd xmm5,xmm7 + pxor xmm0,xmm3 + movdqa xmm3,xmm15 + movdqa xmm7,xmm14 + pslld xmm2,10 + pxor xmm3,xmm14 + + + psrld xmm7,13 + pxor xmm1,xmm2 + paddd xmm5,xmm0 + pslld xmm2,19-10 + pand xmm4,xmm3 + pxor xmm1,xmm7 + + + psrld xmm7,22-13 + pxor xmm1,xmm2 + movdqa xmm13,xmm15 + pslld xmm2,30-19 + pxor xmm7,xmm1 + pxor xmm13,xmm4 + paddd xmm9,xmm5 + pxor xmm7,xmm2 + + paddd xmm13,xmm5 + paddd xmm13,xmm7 + movdqa xmm5,XMMWORD PTR[((192-128))+rax] + paddd xmm6,XMMWORD PTR[((64-128))+rax] + + movdqa xmm7,xmm5 + movdqa xmm1,xmm5 + psrld xmm7,3 + movdqa xmm2,xmm5 + + psrld xmm1,7 + movdqa xmm0,XMMWORD PTR[((144-128))+rax] + pslld xmm2,14 + pxor xmm7,xmm1 + psrld xmm1,18-7 + movdqa xmm4,xmm0 + pxor xmm7,xmm2 + pslld xmm2,25-14 + pxor xmm7,xmm1 + psrld xmm0,10 + movdqa xmm1,xmm4 + + psrld xmm4,17 + pxor xmm7,xmm2 + pslld xmm1,13 + paddd xmm6,xmm7 + pxor xmm0,xmm4 + psrld xmm4,19-17 + pxor xmm0,xmm1 + pslld xmm1,15-13 + pxor xmm0,xmm4 + pxor xmm0,xmm1 + paddd xmm6,xmm0 + movdqa xmm7,xmm9 + + movdqa xmm2,xmm9 + + psrld xmm7,6 + movdqa xmm1,xmm9 + pslld xmm2,7 + movdqa XMMWORD PTR[(176-128)+rax],xmm6 + paddd xmm6,xmm12 + + psrld xmm1,11 + pxor xmm7,xmm2 + pslld xmm2,21-7 + paddd xmm6,XMMWORD PTR[((-32))+rbp] + pxor xmm7,xmm1 + + psrld xmm1,25-11 + movdqa xmm0,xmm9 + + pxor xmm7,xmm2 + movdqa xmm4,xmm9 + pslld xmm2,26-21 + pandn xmm0,xmm11 + pand xmm4,xmm10 + pxor xmm7,xmm1 + + + movdqa xmm1,xmm13 + pxor xmm7,xmm2 + movdqa xmm2,xmm13 + psrld xmm1,2 + paddd xmm6,xmm7 + pxor xmm0,xmm4 + movdqa xmm4,xmm14 + movdqa xmm7,xmm13 + pslld xmm2,10 + pxor xmm4,xmm13 + + + psrld xmm7,13 + pxor xmm1,xmm2 + paddd xmm6,xmm0 + pslld xmm2,19-10 + pand xmm3,xmm4 + pxor xmm1,xmm7 + + + psrld xmm7,22-13 + pxor xmm1,xmm2 + movdqa xmm12,xmm14 + pslld xmm2,30-19 + pxor xmm7,xmm1 + pxor xmm12,xmm3 + paddd xmm8,xmm6 + pxor xmm7,xmm2 + + paddd xmm12,xmm6 + paddd xmm12,xmm7 + movdqa xmm6,XMMWORD PTR[((208-128))+rax] + paddd xmm5,XMMWORD PTR[((80-128))+rax] + + movdqa xmm7,xmm6 + movdqa xmm1,xmm6 + psrld xmm7,3 + movdqa xmm2,xmm6 + + psrld xmm1,7 + movdqa xmm0,XMMWORD PTR[((160-128))+rax] + pslld xmm2,14 + pxor xmm7,xmm1 + psrld xmm1,18-7 + movdqa xmm3,xmm0 + pxor xmm7,xmm2 + pslld xmm2,25-14 + pxor xmm7,xmm1 + psrld xmm0,10 + movdqa xmm1,xmm3 + + psrld xmm3,17 + pxor xmm7,xmm2 + pslld xmm1,13 + paddd xmm5,xmm7 + pxor xmm0,xmm3 + psrld xmm3,19-17 + pxor xmm0,xmm1 + pslld xmm1,15-13 + pxor xmm0,xmm3 + pxor xmm0,xmm1 + paddd xmm5,xmm0 + movdqa xmm7,xmm8 + + movdqa xmm2,xmm8 + + psrld xmm7,6 + movdqa xmm1,xmm8 + pslld xmm2,7 + movdqa XMMWORD PTR[(192-128)+rax],xmm5 + paddd xmm5,xmm11 + + psrld xmm1,11 + pxor xmm7,xmm2 + pslld xmm2,21-7 + paddd xmm5,XMMWORD PTR[rbp] + pxor xmm7,xmm1 + + psrld xmm1,25-11 + movdqa xmm0,xmm8 + + pxor xmm7,xmm2 + movdqa xmm3,xmm8 + pslld xmm2,26-21 + pandn xmm0,xmm10 + pand xmm3,xmm9 + pxor xmm7,xmm1 + + + movdqa xmm1,xmm12 + pxor xmm7,xmm2 + movdqa xmm2,xmm12 + psrld xmm1,2 + paddd xmm5,xmm7 + pxor xmm0,xmm3 + movdqa xmm3,xmm13 + movdqa xmm7,xmm12 + pslld xmm2,10 + pxor xmm3,xmm12 + + + psrld xmm7,13 + pxor xmm1,xmm2 + paddd xmm5,xmm0 + pslld xmm2,19-10 + pand xmm4,xmm3 + pxor xmm1,xmm7 + + + psrld xmm7,22-13 + pxor xmm1,xmm2 + movdqa xmm11,xmm13 + pslld xmm2,30-19 + pxor xmm7,xmm1 + pxor xmm11,xmm4 + paddd xmm15,xmm5 + pxor xmm7,xmm2 + + paddd xmm11,xmm5 + paddd xmm11,xmm7 + movdqa xmm5,XMMWORD PTR[((224-128))+rax] + paddd xmm6,XMMWORD PTR[((96-128))+rax] + + movdqa xmm7,xmm5 + movdqa xmm1,xmm5 + psrld xmm7,3 + movdqa xmm2,xmm5 + + psrld xmm1,7 + movdqa xmm0,XMMWORD PTR[((176-128))+rax] + pslld xmm2,14 + pxor xmm7,xmm1 + psrld xmm1,18-7 + movdqa xmm4,xmm0 + pxor xmm7,xmm2 + pslld xmm2,25-14 + pxor xmm7,xmm1 + psrld xmm0,10 + movdqa xmm1,xmm4 + + psrld xmm4,17 + pxor xmm7,xmm2 + pslld xmm1,13 + paddd xmm6,xmm7 + pxor xmm0,xmm4 + psrld xmm4,19-17 + pxor xmm0,xmm1 + pslld xmm1,15-13 + pxor xmm0,xmm4 + pxor xmm0,xmm1 + paddd xmm6,xmm0 + movdqa xmm7,xmm15 + + movdqa xmm2,xmm15 + + psrld xmm7,6 + movdqa xmm1,xmm15 + pslld xmm2,7 + movdqa XMMWORD PTR[(208-128)+rax],xmm6 + paddd xmm6,xmm10 + + psrld xmm1,11 + pxor xmm7,xmm2 + pslld xmm2,21-7 + paddd xmm6,XMMWORD PTR[32+rbp] + pxor xmm7,xmm1 + + psrld xmm1,25-11 + movdqa xmm0,xmm15 + + pxor xmm7,xmm2 + movdqa xmm4,xmm15 + pslld xmm2,26-21 + pandn xmm0,xmm9 + pand xmm4,xmm8 + pxor xmm7,xmm1 + + + movdqa xmm1,xmm11 + pxor xmm7,xmm2 + movdqa xmm2,xmm11 + psrld xmm1,2 + paddd xmm6,xmm7 + pxor xmm0,xmm4 + movdqa xmm4,xmm12 + movdqa xmm7,xmm11 + pslld xmm2,10 + pxor xmm4,xmm11 + + + psrld xmm7,13 + pxor xmm1,xmm2 + paddd xmm6,xmm0 + pslld xmm2,19-10 + pand xmm3,xmm4 + pxor xmm1,xmm7 + + + psrld xmm7,22-13 + pxor xmm1,xmm2 + movdqa xmm10,xmm12 + pslld xmm2,30-19 + pxor xmm7,xmm1 + pxor xmm10,xmm3 + paddd xmm14,xmm6 + pxor xmm7,xmm2 + + paddd xmm10,xmm6 + paddd xmm10,xmm7 + movdqa xmm6,XMMWORD PTR[((240-128))+rax] + paddd xmm5,XMMWORD PTR[((112-128))+rax] + + movdqa xmm7,xmm6 + movdqa xmm1,xmm6 + psrld xmm7,3 + movdqa xmm2,xmm6 + + psrld xmm1,7 + movdqa xmm0,XMMWORD PTR[((192-128))+rax] + pslld xmm2,14 + pxor xmm7,xmm1 + psrld xmm1,18-7 + movdqa xmm3,xmm0 + pxor xmm7,xmm2 + pslld xmm2,25-14 + pxor xmm7,xmm1 + psrld xmm0,10 + movdqa xmm1,xmm3 + + psrld xmm3,17 + pxor xmm7,xmm2 + pslld xmm1,13 + paddd xmm5,xmm7 + pxor xmm0,xmm3 + psrld xmm3,19-17 + pxor xmm0,xmm1 + pslld xmm1,15-13 + pxor xmm0,xmm3 + pxor xmm0,xmm1 + paddd xmm5,xmm0 + movdqa xmm7,xmm14 + + movdqa xmm2,xmm14 + + psrld xmm7,6 + movdqa xmm1,xmm14 + pslld xmm2,7 + movdqa XMMWORD PTR[(224-128)+rax],xmm5 + paddd xmm5,xmm9 + + psrld xmm1,11 + pxor xmm7,xmm2 + pslld xmm2,21-7 + paddd xmm5,XMMWORD PTR[64+rbp] + pxor xmm7,xmm1 + + psrld xmm1,25-11 + movdqa xmm0,xmm14 + + pxor xmm7,xmm2 + movdqa xmm3,xmm14 + pslld xmm2,26-21 + pandn xmm0,xmm8 + pand xmm3,xmm15 + pxor xmm7,xmm1 + + + movdqa xmm1,xmm10 + pxor xmm7,xmm2 + movdqa xmm2,xmm10 + psrld xmm1,2 + paddd xmm5,xmm7 + pxor xmm0,xmm3 + movdqa xmm3,xmm11 + movdqa xmm7,xmm10 + pslld xmm2,10 + pxor xmm3,xmm10 + + + psrld xmm7,13 + pxor xmm1,xmm2 + paddd xmm5,xmm0 + pslld xmm2,19-10 + pand xmm4,xmm3 + pxor xmm1,xmm7 + + + psrld xmm7,22-13 + pxor xmm1,xmm2 + movdqa xmm9,xmm11 + pslld xmm2,30-19 + pxor xmm7,xmm1 + pxor xmm9,xmm4 + paddd xmm13,xmm5 + pxor xmm7,xmm2 + + paddd xmm9,xmm5 + paddd xmm9,xmm7 + movdqa xmm5,XMMWORD PTR[((0-128))+rax] + paddd xmm6,XMMWORD PTR[((128-128))+rax] + + movdqa xmm7,xmm5 + movdqa xmm1,xmm5 + psrld xmm7,3 + movdqa xmm2,xmm5 + + psrld xmm1,7 + movdqa xmm0,XMMWORD PTR[((208-128))+rax] + pslld xmm2,14 + pxor xmm7,xmm1 + psrld xmm1,18-7 + movdqa xmm4,xmm0 + pxor xmm7,xmm2 + pslld xmm2,25-14 + pxor xmm7,xmm1 + psrld xmm0,10 + movdqa xmm1,xmm4 + + psrld xmm4,17 + pxor xmm7,xmm2 + pslld xmm1,13 + paddd xmm6,xmm7 + pxor xmm0,xmm4 + psrld xmm4,19-17 + pxor xmm0,xmm1 + pslld xmm1,15-13 + pxor xmm0,xmm4 + pxor xmm0,xmm1 + paddd xmm6,xmm0 + movdqa xmm7,xmm13 + + movdqa xmm2,xmm13 + + psrld xmm7,6 + movdqa xmm1,xmm13 + pslld xmm2,7 + movdqa XMMWORD PTR[(240-128)+rax],xmm6 + paddd xmm6,xmm8 + + psrld xmm1,11 + pxor xmm7,xmm2 + pslld xmm2,21-7 + paddd xmm6,XMMWORD PTR[96+rbp] + pxor xmm7,xmm1 + + psrld xmm1,25-11 + movdqa xmm0,xmm13 + + pxor xmm7,xmm2 + movdqa xmm4,xmm13 + pslld xmm2,26-21 + pandn xmm0,xmm15 + pand xmm4,xmm14 + pxor xmm7,xmm1 + + + movdqa xmm1,xmm9 + pxor xmm7,xmm2 + movdqa xmm2,xmm9 + psrld xmm1,2 + paddd xmm6,xmm7 + pxor xmm0,xmm4 + movdqa xmm4,xmm10 + movdqa xmm7,xmm9 + pslld xmm2,10 + pxor xmm4,xmm9 + + + psrld xmm7,13 + pxor xmm1,xmm2 + paddd xmm6,xmm0 + pslld xmm2,19-10 + pand xmm3,xmm4 + pxor xmm1,xmm7 + + + psrld xmm7,22-13 + pxor xmm1,xmm2 + movdqa xmm8,xmm10 + pslld xmm2,30-19 + pxor xmm7,xmm1 + pxor xmm8,xmm3 + paddd xmm12,xmm6 + pxor xmm7,xmm2 + + paddd xmm8,xmm6 + paddd xmm8,xmm7 + lea rbp,QWORD PTR[256+rbp] + dec ecx + jnz $L$oop_16_xx + + mov ecx,1 + lea rbp,QWORD PTR[((K256+128))] + + movdqa xmm7,XMMWORD PTR[rbx] + cmp ecx,DWORD PTR[rbx] + pxor xmm0,xmm0 + cmovge r8,rbp + cmp ecx,DWORD PTR[4+rbx] + movdqa xmm6,xmm7 + cmovge r9,rbp + cmp ecx,DWORD PTR[8+rbx] + pcmpgtd xmm6,xmm0 + cmovge r10,rbp + cmp ecx,DWORD PTR[12+rbx] + paddd xmm7,xmm6 + cmovge r11,rbp + + movdqu xmm0,XMMWORD PTR[((0-128))+rdi] + pand xmm8,xmm6 + movdqu xmm1,XMMWORD PTR[((32-128))+rdi] + pand xmm9,xmm6 + movdqu xmm2,XMMWORD PTR[((64-128))+rdi] + pand xmm10,xmm6 + movdqu xmm5,XMMWORD PTR[((96-128))+rdi] + pand xmm11,xmm6 + paddd xmm8,xmm0 + movdqu xmm0,XMMWORD PTR[((128-128))+rdi] + pand xmm12,xmm6 + paddd xmm9,xmm1 + movdqu xmm1,XMMWORD PTR[((160-128))+rdi] + pand xmm13,xmm6 + paddd xmm10,xmm2 + movdqu xmm2,XMMWORD PTR[((192-128))+rdi] + pand xmm14,xmm6 + paddd xmm11,xmm5 + movdqu xmm5,XMMWORD PTR[((224-128))+rdi] + pand xmm15,xmm6 + paddd xmm12,xmm0 + paddd xmm13,xmm1 + movdqu XMMWORD PTR[(0-128)+rdi],xmm8 + paddd xmm14,xmm2 + movdqu XMMWORD PTR[(32-128)+rdi],xmm9 + paddd xmm15,xmm5 + movdqu XMMWORD PTR[(64-128)+rdi],xmm10 + movdqu XMMWORD PTR[(96-128)+rdi],xmm11 + movdqu XMMWORD PTR[(128-128)+rdi],xmm12 + movdqu XMMWORD PTR[(160-128)+rdi],xmm13 + movdqu XMMWORD PTR[(192-128)+rdi],xmm14 + movdqu XMMWORD PTR[(224-128)+rdi],xmm15 + + movdqa XMMWORD PTR[rbx],xmm7 + movdqa xmm6,XMMWORD PTR[$L$pbswap] + dec edx + jnz $L$oop + + mov edx,DWORD PTR[280+rsp] + lea rdi,QWORD PTR[16+rdi] + lea rsi,QWORD PTR[64+rsi] + dec edx + jnz $L$oop_grande + +$L$done:: + mov rax,QWORD PTR[272+rsp] + movaps xmm6,XMMWORD PTR[((-184))+rax] + movaps xmm7,XMMWORD PTR[((-168))+rax] + movaps xmm8,XMMWORD PTR[((-152))+rax] + movaps xmm9,XMMWORD PTR[((-136))+rax] + movaps xmm10,XMMWORD PTR[((-120))+rax] + movaps xmm11,XMMWORD PTR[((-104))+rax] + movaps xmm12,XMMWORD PTR[((-88))+rax] + movaps xmm13,XMMWORD PTR[((-72))+rax] + movaps xmm14,XMMWORD PTR[((-56))+rax] + movaps xmm15,XMMWORD PTR[((-40))+rax] + mov rbp,QWORD PTR[((-16))+rax] + mov rbx,QWORD PTR[((-8))+rax] + lea rsp,QWORD PTR[rax] +$L$epilogue:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_sha256_multi_block:: +sha256_multi_block ENDP + +ALIGN 32 +sha256_multi_block_shaext PROC PRIVATE + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_sha256_multi_block_shaext:: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + + +_shaext_shortcut:: + mov rax,rsp + push rbx + push rbp + lea rsp,QWORD PTR[((-168))+rsp] + movaps XMMWORD PTR[rsp],xmm6 + movaps XMMWORD PTR[16+rsp],xmm7 + movaps XMMWORD PTR[32+rsp],xmm8 + movaps XMMWORD PTR[48+rsp],xmm9 + movaps XMMWORD PTR[(-120)+rax],xmm10 + movaps XMMWORD PTR[(-104)+rax],xmm11 + movaps XMMWORD PTR[(-88)+rax],xmm12 + movaps XMMWORD PTR[(-72)+rax],xmm13 + movaps XMMWORD PTR[(-56)+rax],xmm14 + movaps XMMWORD PTR[(-40)+rax],xmm15 + sub rsp,288 + shl edx,1 + and rsp,-256 + lea rdi,QWORD PTR[128+rdi] + mov QWORD PTR[272+rsp],rax +$L$body_shaext:: + lea rbx,QWORD PTR[256+rsp] + lea rbp,QWORD PTR[((K256_shaext+128))] + +$L$oop_grande_shaext:: + mov DWORD PTR[280+rsp],edx + xor edx,edx + mov r8,QWORD PTR[rsi] + mov ecx,DWORD PTR[8+rsi] + cmp ecx,edx + cmovg edx,ecx + test ecx,ecx + mov DWORD PTR[rbx],ecx + cmovle r8,rsp + mov r9,QWORD PTR[16+rsi] + mov ecx,DWORD PTR[24+rsi] + cmp ecx,edx + cmovg edx,ecx + test ecx,ecx + mov DWORD PTR[4+rbx],ecx + cmovle r9,rsp + test edx,edx + jz $L$done_shaext + + movq xmm12,QWORD PTR[((0-128))+rdi] + movq xmm4,QWORD PTR[((32-128))+rdi] + movq xmm13,QWORD PTR[((64-128))+rdi] + movq xmm5,QWORD PTR[((96-128))+rdi] + movq xmm8,QWORD PTR[((128-128))+rdi] + movq xmm9,QWORD PTR[((160-128))+rdi] + movq xmm10,QWORD PTR[((192-128))+rdi] + movq xmm11,QWORD PTR[((224-128))+rdi] + + punpckldq xmm12,xmm4 + punpckldq xmm13,xmm5 + punpckldq xmm8,xmm9 + punpckldq xmm10,xmm11 + movdqa xmm3,XMMWORD PTR[((K256_shaext-16))] + + movdqa xmm14,xmm12 + movdqa xmm15,xmm13 + punpcklqdq xmm12,xmm8 + punpcklqdq xmm13,xmm10 + punpckhqdq xmm14,xmm8 + punpckhqdq xmm15,xmm10 + + pshufd xmm12,xmm12,27 + pshufd xmm13,xmm13,27 + pshufd xmm14,xmm14,27 + pshufd xmm15,xmm15,27 + jmp $L$oop_shaext + +ALIGN 32 +$L$oop_shaext:: + movdqu xmm4,XMMWORD PTR[r8] + movdqu xmm8,XMMWORD PTR[r9] + movdqu xmm5,XMMWORD PTR[16+r8] + movdqu xmm9,XMMWORD PTR[16+r9] + movdqu xmm6,XMMWORD PTR[32+r8] +DB 102,15,56,0,227 + movdqu xmm10,XMMWORD PTR[32+r9] +DB 102,68,15,56,0,195 + movdqu xmm7,XMMWORD PTR[48+r8] + lea r8,QWORD PTR[64+r8] + movdqu xmm11,XMMWORD PTR[48+r9] + lea r9,QWORD PTR[64+r9] + + movdqa xmm0,XMMWORD PTR[((0-128))+rbp] +DB 102,15,56,0,235 + paddd xmm0,xmm4 + pxor xmm4,xmm12 + movdqa xmm1,xmm0 + movdqa xmm2,XMMWORD PTR[((0-128))+rbp] +DB 102,68,15,56,0,203 + paddd xmm2,xmm8 + movdqa XMMWORD PTR[80+rsp],xmm13 +DB 69,15,56,203,236 + pxor xmm8,xmm14 + movdqa xmm0,xmm2 + movdqa XMMWORD PTR[112+rsp],xmm15 +DB 69,15,56,203,254 + pshufd xmm0,xmm1,00eh + pxor xmm4,xmm12 + movdqa XMMWORD PTR[64+rsp],xmm12 +DB 69,15,56,203,229 + pshufd xmm0,xmm2,00eh + pxor xmm8,xmm14 + movdqa XMMWORD PTR[96+rsp],xmm14 + movdqa xmm1,XMMWORD PTR[((16-128))+rbp] + paddd xmm1,xmm5 +DB 102,15,56,0,243 +DB 69,15,56,203,247 + + movdqa xmm0,xmm1 + movdqa xmm2,XMMWORD PTR[((16-128))+rbp] + paddd xmm2,xmm9 +DB 69,15,56,203,236 + movdqa xmm0,xmm2 + prefetcht0 [127+r8] +DB 102,15,56,0,251 +DB 102,68,15,56,0,211 + prefetcht0 [127+r9] +DB 69,15,56,203,254 + pshufd xmm0,xmm1,00eh +DB 102,68,15,56,0,219 +DB 15,56,204,229 +DB 69,15,56,203,229 + pshufd xmm0,xmm2,00eh + movdqa xmm1,XMMWORD PTR[((32-128))+rbp] + paddd xmm1,xmm6 +DB 69,15,56,203,247 + + movdqa xmm0,xmm1 + movdqa xmm2,XMMWORD PTR[((32-128))+rbp] + paddd xmm2,xmm10 +DB 69,15,56,203,236 +DB 69,15,56,204,193 + movdqa xmm0,xmm2 + movdqa xmm3,xmm7 +DB 69,15,56,203,254 + pshufd xmm0,xmm1,00eh +DB 102,15,58,15,222,4 + paddd xmm4,xmm3 + movdqa xmm3,xmm11 +DB 102,65,15,58,15,218,4 +DB 15,56,204,238 +DB 69,15,56,203,229 + pshufd xmm0,xmm2,00eh + movdqa xmm1,XMMWORD PTR[((48-128))+rbp] + paddd xmm1,xmm7 +DB 69,15,56,203,247 +DB 69,15,56,204,202 + + movdqa xmm0,xmm1 + movdqa xmm2,XMMWORD PTR[((48-128))+rbp] + paddd xmm8,xmm3 + paddd xmm2,xmm11 +DB 15,56,205,231 +DB 69,15,56,203,236 + movdqa xmm0,xmm2 + movdqa xmm3,xmm4 +DB 102,15,58,15,223,4 +DB 69,15,56,203,254 +DB 69,15,56,205,195 + pshufd xmm0,xmm1,00eh + paddd xmm5,xmm3 + movdqa xmm3,xmm8 +DB 102,65,15,58,15,219,4 +DB 15,56,204,247 +DB 69,15,56,203,229 + pshufd xmm0,xmm2,00eh + movdqa xmm1,XMMWORD PTR[((64-128))+rbp] + paddd xmm1,xmm4 +DB 69,15,56,203,247 +DB 69,15,56,204,211 + movdqa xmm0,xmm1 + movdqa xmm2,XMMWORD PTR[((64-128))+rbp] + paddd xmm9,xmm3 + paddd xmm2,xmm8 +DB 15,56,205,236 +DB 69,15,56,203,236 + movdqa xmm0,xmm2 + movdqa xmm3,xmm5 +DB 102,15,58,15,220,4 +DB 69,15,56,203,254 +DB 69,15,56,205,200 + pshufd xmm0,xmm1,00eh + paddd xmm6,xmm3 + movdqa xmm3,xmm9 +DB 102,65,15,58,15,216,4 +DB 15,56,204,252 +DB 69,15,56,203,229 + pshufd xmm0,xmm2,00eh + movdqa xmm1,XMMWORD PTR[((80-128))+rbp] + paddd xmm1,xmm5 +DB 69,15,56,203,247 +DB 69,15,56,204,216 + movdqa xmm0,xmm1 + movdqa xmm2,XMMWORD PTR[((80-128))+rbp] + paddd xmm10,xmm3 + paddd xmm2,xmm9 +DB 15,56,205,245 +DB 69,15,56,203,236 + movdqa xmm0,xmm2 + movdqa xmm3,xmm6 +DB 102,15,58,15,221,4 +DB 69,15,56,203,254 +DB 69,15,56,205,209 + pshufd xmm0,xmm1,00eh + paddd xmm7,xmm3 + movdqa xmm3,xmm10 +DB 102,65,15,58,15,217,4 +DB 15,56,204,229 +DB 69,15,56,203,229 + pshufd xmm0,xmm2,00eh + movdqa xmm1,XMMWORD PTR[((96-128))+rbp] + paddd xmm1,xmm6 +DB 69,15,56,203,247 +DB 69,15,56,204,193 + movdqa xmm0,xmm1 + movdqa xmm2,XMMWORD PTR[((96-128))+rbp] + paddd xmm11,xmm3 + paddd xmm2,xmm10 +DB 15,56,205,254 +DB 69,15,56,203,236 + movdqa xmm0,xmm2 + movdqa xmm3,xmm7 +DB 102,15,58,15,222,4 +DB 69,15,56,203,254 +DB 69,15,56,205,218 + pshufd xmm0,xmm1,00eh + paddd xmm4,xmm3 + movdqa xmm3,xmm11 +DB 102,65,15,58,15,218,4 +DB 15,56,204,238 +DB 69,15,56,203,229 + pshufd xmm0,xmm2,00eh + movdqa xmm1,XMMWORD PTR[((112-128))+rbp] + paddd xmm1,xmm7 +DB 69,15,56,203,247 +DB 69,15,56,204,202 + movdqa xmm0,xmm1 + movdqa xmm2,XMMWORD PTR[((112-128))+rbp] + paddd xmm8,xmm3 + paddd xmm2,xmm11 +DB 15,56,205,231 +DB 69,15,56,203,236 + movdqa xmm0,xmm2 + movdqa xmm3,xmm4 +DB 102,15,58,15,223,4 +DB 69,15,56,203,254 +DB 69,15,56,205,195 + pshufd xmm0,xmm1,00eh + paddd xmm5,xmm3 + movdqa xmm3,xmm8 +DB 102,65,15,58,15,219,4 +DB 15,56,204,247 +DB 69,15,56,203,229 + pshufd xmm0,xmm2,00eh + movdqa xmm1,XMMWORD PTR[((128-128))+rbp] + paddd xmm1,xmm4 +DB 69,15,56,203,247 +DB 69,15,56,204,211 + movdqa xmm0,xmm1 + movdqa xmm2,XMMWORD PTR[((128-128))+rbp] + paddd xmm9,xmm3 + paddd xmm2,xmm8 +DB 15,56,205,236 +DB 69,15,56,203,236 + movdqa xmm0,xmm2 + movdqa xmm3,xmm5 +DB 102,15,58,15,220,4 +DB 69,15,56,203,254 +DB 69,15,56,205,200 + pshufd xmm0,xmm1,00eh + paddd xmm6,xmm3 + movdqa xmm3,xmm9 +DB 102,65,15,58,15,216,4 +DB 15,56,204,252 +DB 69,15,56,203,229 + pshufd xmm0,xmm2,00eh + movdqa xmm1,XMMWORD PTR[((144-128))+rbp] + paddd xmm1,xmm5 +DB 69,15,56,203,247 +DB 69,15,56,204,216 + movdqa xmm0,xmm1 + movdqa xmm2,XMMWORD PTR[((144-128))+rbp] + paddd xmm10,xmm3 + paddd xmm2,xmm9 +DB 15,56,205,245 +DB 69,15,56,203,236 + movdqa xmm0,xmm2 + movdqa xmm3,xmm6 +DB 102,15,58,15,221,4 +DB 69,15,56,203,254 +DB 69,15,56,205,209 + pshufd xmm0,xmm1,00eh + paddd xmm7,xmm3 + movdqa xmm3,xmm10 +DB 102,65,15,58,15,217,4 +DB 15,56,204,229 +DB 69,15,56,203,229 + pshufd xmm0,xmm2,00eh + movdqa xmm1,XMMWORD PTR[((160-128))+rbp] + paddd xmm1,xmm6 +DB 69,15,56,203,247 +DB 69,15,56,204,193 + movdqa xmm0,xmm1 + movdqa xmm2,XMMWORD PTR[((160-128))+rbp] + paddd xmm11,xmm3 + paddd xmm2,xmm10 +DB 15,56,205,254 +DB 69,15,56,203,236 + movdqa xmm0,xmm2 + movdqa xmm3,xmm7 +DB 102,15,58,15,222,4 +DB 69,15,56,203,254 +DB 69,15,56,205,218 + pshufd xmm0,xmm1,00eh + paddd xmm4,xmm3 + movdqa xmm3,xmm11 +DB 102,65,15,58,15,218,4 +DB 15,56,204,238 +DB 69,15,56,203,229 + pshufd xmm0,xmm2,00eh + movdqa xmm1,XMMWORD PTR[((176-128))+rbp] + paddd xmm1,xmm7 +DB 69,15,56,203,247 +DB 69,15,56,204,202 + movdqa xmm0,xmm1 + movdqa xmm2,XMMWORD PTR[((176-128))+rbp] + paddd xmm8,xmm3 + paddd xmm2,xmm11 +DB 15,56,205,231 +DB 69,15,56,203,236 + movdqa xmm0,xmm2 + movdqa xmm3,xmm4 +DB 102,15,58,15,223,4 +DB 69,15,56,203,254 +DB 69,15,56,205,195 + pshufd xmm0,xmm1,00eh + paddd xmm5,xmm3 + movdqa xmm3,xmm8 +DB 102,65,15,58,15,219,4 +DB 15,56,204,247 +DB 69,15,56,203,229 + pshufd xmm0,xmm2,00eh + movdqa xmm1,XMMWORD PTR[((192-128))+rbp] + paddd xmm1,xmm4 +DB 69,15,56,203,247 +DB 69,15,56,204,211 + movdqa xmm0,xmm1 + movdqa xmm2,XMMWORD PTR[((192-128))+rbp] + paddd xmm9,xmm3 + paddd xmm2,xmm8 +DB 15,56,205,236 +DB 69,15,56,203,236 + movdqa xmm0,xmm2 + movdqa xmm3,xmm5 +DB 102,15,58,15,220,4 +DB 69,15,56,203,254 +DB 69,15,56,205,200 + pshufd xmm0,xmm1,00eh + paddd xmm6,xmm3 + movdqa xmm3,xmm9 +DB 102,65,15,58,15,216,4 +DB 15,56,204,252 +DB 69,15,56,203,229 + pshufd xmm0,xmm2,00eh + movdqa xmm1,XMMWORD PTR[((208-128))+rbp] + paddd xmm1,xmm5 +DB 69,15,56,203,247 +DB 69,15,56,204,216 + movdqa xmm0,xmm1 + movdqa xmm2,XMMWORD PTR[((208-128))+rbp] + paddd xmm10,xmm3 + paddd xmm2,xmm9 +DB 15,56,205,245 +DB 69,15,56,203,236 + movdqa xmm0,xmm2 + movdqa xmm3,xmm6 +DB 102,15,58,15,221,4 +DB 69,15,56,203,254 +DB 69,15,56,205,209 + pshufd xmm0,xmm1,00eh + paddd xmm7,xmm3 + movdqa xmm3,xmm10 +DB 102,65,15,58,15,217,4 + nop +DB 69,15,56,203,229 + pshufd xmm0,xmm2,00eh + movdqa xmm1,XMMWORD PTR[((224-128))+rbp] + paddd xmm1,xmm6 +DB 69,15,56,203,247 + + movdqa xmm0,xmm1 + movdqa xmm2,XMMWORD PTR[((224-128))+rbp] + paddd xmm11,xmm3 + paddd xmm2,xmm10 +DB 15,56,205,254 + nop +DB 69,15,56,203,236 + movdqa xmm0,xmm2 + mov ecx,1 + pxor xmm6,xmm6 +DB 69,15,56,203,254 +DB 69,15,56,205,218 + pshufd xmm0,xmm1,00eh + movdqa xmm1,XMMWORD PTR[((240-128))+rbp] + paddd xmm1,xmm7 + movq xmm7,QWORD PTR[rbx] + nop +DB 69,15,56,203,229 + pshufd xmm0,xmm2,00eh + movdqa xmm2,XMMWORD PTR[((240-128))+rbp] + paddd xmm2,xmm11 +DB 69,15,56,203,247 + + movdqa xmm0,xmm1 + cmp ecx,DWORD PTR[rbx] + cmovge r8,rsp + cmp ecx,DWORD PTR[4+rbx] + cmovge r9,rsp + pshufd xmm9,xmm7,000h +DB 69,15,56,203,236 + movdqa xmm0,xmm2 + pshufd xmm10,xmm7,055h + movdqa xmm11,xmm7 +DB 69,15,56,203,254 + pshufd xmm0,xmm1,00eh + pcmpgtd xmm9,xmm6 + pcmpgtd xmm10,xmm6 +DB 69,15,56,203,229 + pshufd xmm0,xmm2,00eh + pcmpgtd xmm11,xmm6 + movdqa xmm3,XMMWORD PTR[((K256_shaext-16))] +DB 69,15,56,203,247 + + pand xmm13,xmm9 + pand xmm15,xmm10 + pand xmm12,xmm9 + pand xmm14,xmm10 + paddd xmm11,xmm7 + + paddd xmm13,XMMWORD PTR[80+rsp] + paddd xmm15,XMMWORD PTR[112+rsp] + paddd xmm12,XMMWORD PTR[64+rsp] + paddd xmm14,XMMWORD PTR[96+rsp] + + movq QWORD PTR[rbx],xmm11 + dec edx + jnz $L$oop_shaext + + mov edx,DWORD PTR[280+rsp] + + pshufd xmm12,xmm12,27 + pshufd xmm13,xmm13,27 + pshufd xmm14,xmm14,27 + pshufd xmm15,xmm15,27 + + movdqa xmm5,xmm12 + movdqa xmm6,xmm13 + punpckldq xmm12,xmm14 + punpckhdq xmm5,xmm14 + punpckldq xmm13,xmm15 + punpckhdq xmm6,xmm15 + + movq QWORD PTR[(0-128)+rdi],xmm12 + psrldq xmm12,8 + movq QWORD PTR[(128-128)+rdi],xmm5 + psrldq xmm5,8 + movq QWORD PTR[(32-128)+rdi],xmm12 + movq QWORD PTR[(160-128)+rdi],xmm5 + + movq QWORD PTR[(64-128)+rdi],xmm13 + psrldq xmm13,8 + movq QWORD PTR[(192-128)+rdi],xmm6 + psrldq xmm6,8 + movq QWORD PTR[(96-128)+rdi],xmm13 + movq QWORD PTR[(224-128)+rdi],xmm6 + + lea rdi,QWORD PTR[8+rdi] + lea rsi,QWORD PTR[32+rsi] + dec edx + jnz $L$oop_grande_shaext + +$L$done_shaext:: + + movaps xmm6,XMMWORD PTR[((-184))+rax] + movaps xmm7,XMMWORD PTR[((-168))+rax] + movaps xmm8,XMMWORD PTR[((-152))+rax] + movaps xmm9,XMMWORD PTR[((-136))+rax] + movaps xmm10,XMMWORD PTR[((-120))+rax] + movaps xmm11,XMMWORD PTR[((-104))+rax] + movaps xmm12,XMMWORD PTR[((-88))+rax] + movaps xmm13,XMMWORD PTR[((-72))+rax] + movaps xmm14,XMMWORD PTR[((-56))+rax] + movaps xmm15,XMMWORD PTR[((-40))+rax] + mov rbp,QWORD PTR[((-16))+rax] + mov rbx,QWORD PTR[((-8))+rax] + lea rsp,QWORD PTR[rax] +$L$epilogue_shaext:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_sha256_multi_block_shaext:: +sha256_multi_block_shaext ENDP + +ALIGN 32 +sha256_multi_block_avx PROC PRIVATE + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_sha256_multi_block_avx:: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + + +_avx_shortcut:: + shr rcx,32 + cmp edx,2 + jb $L$avx + test ecx,32 + jnz _avx2_shortcut + jmp $L$avx +ALIGN 32 +$L$avx:: + mov rax,rsp + push rbx + push rbp + lea rsp,QWORD PTR[((-168))+rsp] + movaps XMMWORD PTR[rsp],xmm6 + movaps XMMWORD PTR[16+rsp],xmm7 + movaps XMMWORD PTR[32+rsp],xmm8 + movaps XMMWORD PTR[48+rsp],xmm9 + movaps XMMWORD PTR[(-120)+rax],xmm10 + movaps XMMWORD PTR[(-104)+rax],xmm11 + movaps XMMWORD PTR[(-88)+rax],xmm12 + movaps XMMWORD PTR[(-72)+rax],xmm13 + movaps XMMWORD PTR[(-56)+rax],xmm14 + movaps XMMWORD PTR[(-40)+rax],xmm15 + sub rsp,288 + and rsp,-256 + mov QWORD PTR[272+rsp],rax +$L$body_avx:: + lea rbp,QWORD PTR[((K256+128))] + lea rbx,QWORD PTR[256+rsp] + lea rdi,QWORD PTR[128+rdi] + +$L$oop_grande_avx:: + mov DWORD PTR[280+rsp],edx + xor edx,edx + mov r8,QWORD PTR[rsi] + mov ecx,DWORD PTR[8+rsi] + cmp ecx,edx + cmovg edx,ecx + test ecx,ecx + mov DWORD PTR[rbx],ecx + cmovle r8,rbp + mov r9,QWORD PTR[16+rsi] + mov ecx,DWORD PTR[24+rsi] + cmp ecx,edx + cmovg edx,ecx + test ecx,ecx + mov DWORD PTR[4+rbx],ecx + cmovle r9,rbp + mov r10,QWORD PTR[32+rsi] + mov ecx,DWORD PTR[40+rsi] + cmp ecx,edx + cmovg edx,ecx + test ecx,ecx + mov DWORD PTR[8+rbx],ecx + cmovle r10,rbp + mov r11,QWORD PTR[48+rsi] + mov ecx,DWORD PTR[56+rsi] + cmp ecx,edx + cmovg edx,ecx + test ecx,ecx + mov DWORD PTR[12+rbx],ecx + cmovle r11,rbp + test edx,edx + jz $L$done_avx + + vmovdqu xmm8,XMMWORD PTR[((0-128))+rdi] + lea rax,QWORD PTR[128+rsp] + vmovdqu xmm9,XMMWORD PTR[((32-128))+rdi] + vmovdqu xmm10,XMMWORD PTR[((64-128))+rdi] + vmovdqu xmm11,XMMWORD PTR[((96-128))+rdi] + vmovdqu xmm12,XMMWORD PTR[((128-128))+rdi] + vmovdqu xmm13,XMMWORD PTR[((160-128))+rdi] + vmovdqu xmm14,XMMWORD PTR[((192-128))+rdi] + vmovdqu xmm15,XMMWORD PTR[((224-128))+rdi] + vmovdqu xmm6,XMMWORD PTR[$L$pbswap] + jmp $L$oop_avx + +ALIGN 32 +$L$oop_avx:: + vpxor xmm4,xmm10,xmm9 + vmovd xmm5,DWORD PTR[r8] + vmovd xmm0,DWORD PTR[r9] + vpinsrd xmm5,xmm5,DWORD PTR[r10],1 + vpinsrd xmm0,xmm0,DWORD PTR[r11],1 + vpunpckldq xmm5,xmm5,xmm0 + vpshufb xmm5,xmm5,xmm6 + vpsrld xmm7,xmm12,6 + vpslld xmm2,xmm12,26 + vmovdqu XMMWORD PTR[(0-128)+rax],xmm5 + vpaddd xmm5,xmm5,xmm15 + + vpsrld xmm1,xmm12,11 + vpxor xmm7,xmm7,xmm2 + vpslld xmm2,xmm12,21 + vpaddd xmm5,xmm5,XMMWORD PTR[((-128))+rbp] + vpxor xmm7,xmm7,xmm1 + + vpsrld xmm1,xmm12,25 + vpxor xmm7,xmm7,xmm2 + + vpslld xmm2,xmm12,7 + vpandn xmm0,xmm12,xmm14 + vpand xmm3,xmm12,xmm13 + + vpxor xmm7,xmm7,xmm1 + + vpsrld xmm15,xmm8,2 + vpxor xmm7,xmm7,xmm2 + + vpslld xmm1,xmm8,30 + vpxor xmm0,xmm0,xmm3 + vpxor xmm3,xmm9,xmm8 + + vpxor xmm15,xmm15,xmm1 + vpaddd xmm5,xmm5,xmm7 + + vpsrld xmm1,xmm8,13 + + vpslld xmm2,xmm8,19 + vpaddd xmm5,xmm5,xmm0 + vpand xmm4,xmm4,xmm3 + + vpxor xmm7,xmm15,xmm1 + + vpsrld xmm1,xmm8,22 + vpxor xmm7,xmm7,xmm2 + + vpslld xmm2,xmm8,10 + vpxor xmm15,xmm9,xmm4 + vpaddd xmm11,xmm11,xmm5 + + vpxor xmm7,xmm7,xmm1 + vpxor xmm7,xmm7,xmm2 + + vpaddd xmm15,xmm15,xmm5 + vpaddd xmm15,xmm15,xmm7 + vmovd xmm5,DWORD PTR[4+r8] + vmovd xmm0,DWORD PTR[4+r9] + vpinsrd xmm5,xmm5,DWORD PTR[4+r10],1 + vpinsrd xmm0,xmm0,DWORD PTR[4+r11],1 + vpunpckldq xmm5,xmm5,xmm0 + vpshufb xmm5,xmm5,xmm6 + vpsrld xmm7,xmm11,6 + vpslld xmm2,xmm11,26 + vmovdqu XMMWORD PTR[(16-128)+rax],xmm5 + vpaddd xmm5,xmm5,xmm14 + + vpsrld xmm1,xmm11,11 + vpxor xmm7,xmm7,xmm2 + vpslld xmm2,xmm11,21 + vpaddd xmm5,xmm5,XMMWORD PTR[((-96))+rbp] + vpxor xmm7,xmm7,xmm1 + + vpsrld xmm1,xmm11,25 + vpxor xmm7,xmm7,xmm2 + + vpslld xmm2,xmm11,7 + vpandn xmm0,xmm11,xmm13 + vpand xmm4,xmm11,xmm12 + + vpxor xmm7,xmm7,xmm1 + + vpsrld xmm14,xmm15,2 + vpxor xmm7,xmm7,xmm2 + + vpslld xmm1,xmm15,30 + vpxor xmm0,xmm0,xmm4 + vpxor xmm4,xmm8,xmm15 + + vpxor xmm14,xmm14,xmm1 + vpaddd xmm5,xmm5,xmm7 + + vpsrld xmm1,xmm15,13 + + vpslld xmm2,xmm15,19 + vpaddd xmm5,xmm5,xmm0 + vpand xmm3,xmm3,xmm4 + + vpxor xmm7,xmm14,xmm1 + + vpsrld xmm1,xmm15,22 + vpxor xmm7,xmm7,xmm2 + + vpslld xmm2,xmm15,10 + vpxor xmm14,xmm8,xmm3 + vpaddd xmm10,xmm10,xmm5 + + vpxor xmm7,xmm7,xmm1 + vpxor xmm7,xmm7,xmm2 + + vpaddd xmm14,xmm14,xmm5 + vpaddd xmm14,xmm14,xmm7 + vmovd xmm5,DWORD PTR[8+r8] + vmovd xmm0,DWORD PTR[8+r9] + vpinsrd xmm5,xmm5,DWORD PTR[8+r10],1 + vpinsrd xmm0,xmm0,DWORD PTR[8+r11],1 + vpunpckldq xmm5,xmm5,xmm0 + vpshufb xmm5,xmm5,xmm6 + vpsrld xmm7,xmm10,6 + vpslld xmm2,xmm10,26 + vmovdqu XMMWORD PTR[(32-128)+rax],xmm5 + vpaddd xmm5,xmm5,xmm13 + + vpsrld xmm1,xmm10,11 + vpxor xmm7,xmm7,xmm2 + vpslld xmm2,xmm10,21 + vpaddd xmm5,xmm5,XMMWORD PTR[((-64))+rbp] + vpxor xmm7,xmm7,xmm1 + + vpsrld xmm1,xmm10,25 + vpxor xmm7,xmm7,xmm2 + + vpslld xmm2,xmm10,7 + vpandn xmm0,xmm10,xmm12 + vpand xmm3,xmm10,xmm11 + + vpxor xmm7,xmm7,xmm1 + + vpsrld xmm13,xmm14,2 + vpxor xmm7,xmm7,xmm2 + + vpslld xmm1,xmm14,30 + vpxor xmm0,xmm0,xmm3 + vpxor xmm3,xmm15,xmm14 + + vpxor xmm13,xmm13,xmm1 + vpaddd xmm5,xmm5,xmm7 + + vpsrld xmm1,xmm14,13 + + vpslld xmm2,xmm14,19 + vpaddd xmm5,xmm5,xmm0 + vpand xmm4,xmm4,xmm3 + + vpxor xmm7,xmm13,xmm1 + + vpsrld xmm1,xmm14,22 + vpxor xmm7,xmm7,xmm2 + + vpslld xmm2,xmm14,10 + vpxor xmm13,xmm15,xmm4 + vpaddd xmm9,xmm9,xmm5 + + vpxor xmm7,xmm7,xmm1 + vpxor xmm7,xmm7,xmm2 + + vpaddd xmm13,xmm13,xmm5 + vpaddd xmm13,xmm13,xmm7 + vmovd xmm5,DWORD PTR[12+r8] + vmovd xmm0,DWORD PTR[12+r9] + vpinsrd xmm5,xmm5,DWORD PTR[12+r10],1 + vpinsrd xmm0,xmm0,DWORD PTR[12+r11],1 + vpunpckldq xmm5,xmm5,xmm0 + vpshufb xmm5,xmm5,xmm6 + vpsrld xmm7,xmm9,6 + vpslld xmm2,xmm9,26 + vmovdqu XMMWORD PTR[(48-128)+rax],xmm5 + vpaddd xmm5,xmm5,xmm12 + + vpsrld xmm1,xmm9,11 + vpxor xmm7,xmm7,xmm2 + vpslld xmm2,xmm9,21 + vpaddd xmm5,xmm5,XMMWORD PTR[((-32))+rbp] + vpxor xmm7,xmm7,xmm1 + + vpsrld xmm1,xmm9,25 + vpxor xmm7,xmm7,xmm2 + + vpslld xmm2,xmm9,7 + vpandn xmm0,xmm9,xmm11 + vpand xmm4,xmm9,xmm10 + + vpxor xmm7,xmm7,xmm1 + + vpsrld xmm12,xmm13,2 + vpxor xmm7,xmm7,xmm2 + + vpslld xmm1,xmm13,30 + vpxor xmm0,xmm0,xmm4 + vpxor xmm4,xmm14,xmm13 + + vpxor xmm12,xmm12,xmm1 + vpaddd xmm5,xmm5,xmm7 + + vpsrld xmm1,xmm13,13 + + vpslld xmm2,xmm13,19 + vpaddd xmm5,xmm5,xmm0 + vpand xmm3,xmm3,xmm4 + + vpxor xmm7,xmm12,xmm1 + + vpsrld xmm1,xmm13,22 + vpxor xmm7,xmm7,xmm2 + + vpslld xmm2,xmm13,10 + vpxor xmm12,xmm14,xmm3 + vpaddd xmm8,xmm8,xmm5 + + vpxor xmm7,xmm7,xmm1 + vpxor xmm7,xmm7,xmm2 + + vpaddd xmm12,xmm12,xmm5 + vpaddd xmm12,xmm12,xmm7 + vmovd xmm5,DWORD PTR[16+r8] + vmovd xmm0,DWORD PTR[16+r9] + vpinsrd xmm5,xmm5,DWORD PTR[16+r10],1 + vpinsrd xmm0,xmm0,DWORD PTR[16+r11],1 + vpunpckldq xmm5,xmm5,xmm0 + vpshufb xmm5,xmm5,xmm6 + vpsrld xmm7,xmm8,6 + vpslld xmm2,xmm8,26 + vmovdqu XMMWORD PTR[(64-128)+rax],xmm5 + vpaddd xmm5,xmm5,xmm11 + + vpsrld xmm1,xmm8,11 + vpxor xmm7,xmm7,xmm2 + vpslld xmm2,xmm8,21 + vpaddd xmm5,xmm5,XMMWORD PTR[rbp] + vpxor xmm7,xmm7,xmm1 + + vpsrld xmm1,xmm8,25 + vpxor xmm7,xmm7,xmm2 + + vpslld xmm2,xmm8,7 + vpandn xmm0,xmm8,xmm10 + vpand xmm3,xmm8,xmm9 + + vpxor xmm7,xmm7,xmm1 + + vpsrld xmm11,xmm12,2 + vpxor xmm7,xmm7,xmm2 + + vpslld xmm1,xmm12,30 + vpxor xmm0,xmm0,xmm3 + vpxor xmm3,xmm13,xmm12 + + vpxor xmm11,xmm11,xmm1 + vpaddd xmm5,xmm5,xmm7 + + vpsrld xmm1,xmm12,13 + + vpslld xmm2,xmm12,19 + vpaddd xmm5,xmm5,xmm0 + vpand xmm4,xmm4,xmm3 + + vpxor xmm7,xmm11,xmm1 + + vpsrld xmm1,xmm12,22 + vpxor xmm7,xmm7,xmm2 + + vpslld xmm2,xmm12,10 + vpxor xmm11,xmm13,xmm4 + vpaddd xmm15,xmm15,xmm5 + + vpxor xmm7,xmm7,xmm1 + vpxor xmm7,xmm7,xmm2 + + vpaddd xmm11,xmm11,xmm5 + vpaddd xmm11,xmm11,xmm7 + vmovd xmm5,DWORD PTR[20+r8] + vmovd xmm0,DWORD PTR[20+r9] + vpinsrd xmm5,xmm5,DWORD PTR[20+r10],1 + vpinsrd xmm0,xmm0,DWORD PTR[20+r11],1 + vpunpckldq xmm5,xmm5,xmm0 + vpshufb xmm5,xmm5,xmm6 + vpsrld xmm7,xmm15,6 + vpslld xmm2,xmm15,26 + vmovdqu XMMWORD PTR[(80-128)+rax],xmm5 + vpaddd xmm5,xmm5,xmm10 + + vpsrld xmm1,xmm15,11 + vpxor xmm7,xmm7,xmm2 + vpslld xmm2,xmm15,21 + vpaddd xmm5,xmm5,XMMWORD PTR[32+rbp] + vpxor xmm7,xmm7,xmm1 + + vpsrld xmm1,xmm15,25 + vpxor xmm7,xmm7,xmm2 + + vpslld xmm2,xmm15,7 + vpandn xmm0,xmm15,xmm9 + vpand xmm4,xmm15,xmm8 + + vpxor xmm7,xmm7,xmm1 + + vpsrld xmm10,xmm11,2 + vpxor xmm7,xmm7,xmm2 + + vpslld xmm1,xmm11,30 + vpxor xmm0,xmm0,xmm4 + vpxor xmm4,xmm12,xmm11 + + vpxor xmm10,xmm10,xmm1 + vpaddd xmm5,xmm5,xmm7 + + vpsrld xmm1,xmm11,13 + + vpslld xmm2,xmm11,19 + vpaddd xmm5,xmm5,xmm0 + vpand xmm3,xmm3,xmm4 + + vpxor xmm7,xmm10,xmm1 + + vpsrld xmm1,xmm11,22 + vpxor xmm7,xmm7,xmm2 + + vpslld xmm2,xmm11,10 + vpxor xmm10,xmm12,xmm3 + vpaddd xmm14,xmm14,xmm5 + + vpxor xmm7,xmm7,xmm1 + vpxor xmm7,xmm7,xmm2 + + vpaddd xmm10,xmm10,xmm5 + vpaddd xmm10,xmm10,xmm7 + vmovd xmm5,DWORD PTR[24+r8] + vmovd xmm0,DWORD PTR[24+r9] + vpinsrd xmm5,xmm5,DWORD PTR[24+r10],1 + vpinsrd xmm0,xmm0,DWORD PTR[24+r11],1 + vpunpckldq xmm5,xmm5,xmm0 + vpshufb xmm5,xmm5,xmm6 + vpsrld xmm7,xmm14,6 + vpslld xmm2,xmm14,26 + vmovdqu XMMWORD PTR[(96-128)+rax],xmm5 + vpaddd xmm5,xmm5,xmm9 + + vpsrld xmm1,xmm14,11 + vpxor xmm7,xmm7,xmm2 + vpslld xmm2,xmm14,21 + vpaddd xmm5,xmm5,XMMWORD PTR[64+rbp] + vpxor xmm7,xmm7,xmm1 + + vpsrld xmm1,xmm14,25 + vpxor xmm7,xmm7,xmm2 + + vpslld xmm2,xmm14,7 + vpandn xmm0,xmm14,xmm8 + vpand xmm3,xmm14,xmm15 + + vpxor xmm7,xmm7,xmm1 + + vpsrld xmm9,xmm10,2 + vpxor xmm7,xmm7,xmm2 + + vpslld xmm1,xmm10,30 + vpxor xmm0,xmm0,xmm3 + vpxor xmm3,xmm11,xmm10 + + vpxor xmm9,xmm9,xmm1 + vpaddd xmm5,xmm5,xmm7 + + vpsrld xmm1,xmm10,13 + + vpslld xmm2,xmm10,19 + vpaddd xmm5,xmm5,xmm0 + vpand xmm4,xmm4,xmm3 + + vpxor xmm7,xmm9,xmm1 + + vpsrld xmm1,xmm10,22 + vpxor xmm7,xmm7,xmm2 + + vpslld xmm2,xmm10,10 + vpxor xmm9,xmm11,xmm4 + vpaddd xmm13,xmm13,xmm5 + + vpxor xmm7,xmm7,xmm1 + vpxor xmm7,xmm7,xmm2 + + vpaddd xmm9,xmm9,xmm5 + vpaddd xmm9,xmm9,xmm7 + vmovd xmm5,DWORD PTR[28+r8] + vmovd xmm0,DWORD PTR[28+r9] + vpinsrd xmm5,xmm5,DWORD PTR[28+r10],1 + vpinsrd xmm0,xmm0,DWORD PTR[28+r11],1 + vpunpckldq xmm5,xmm5,xmm0 + vpshufb xmm5,xmm5,xmm6 + vpsrld xmm7,xmm13,6 + vpslld xmm2,xmm13,26 + vmovdqu XMMWORD PTR[(112-128)+rax],xmm5 + vpaddd xmm5,xmm5,xmm8 + + vpsrld xmm1,xmm13,11 + vpxor xmm7,xmm7,xmm2 + vpslld xmm2,xmm13,21 + vpaddd xmm5,xmm5,XMMWORD PTR[96+rbp] + vpxor xmm7,xmm7,xmm1 + + vpsrld xmm1,xmm13,25 + vpxor xmm7,xmm7,xmm2 + + vpslld xmm2,xmm13,7 + vpandn xmm0,xmm13,xmm15 + vpand xmm4,xmm13,xmm14 + + vpxor xmm7,xmm7,xmm1 + + vpsrld xmm8,xmm9,2 + vpxor xmm7,xmm7,xmm2 + + vpslld xmm1,xmm9,30 + vpxor xmm0,xmm0,xmm4 + vpxor xmm4,xmm10,xmm9 + + vpxor xmm8,xmm8,xmm1 + vpaddd xmm5,xmm5,xmm7 + + vpsrld xmm1,xmm9,13 + + vpslld xmm2,xmm9,19 + vpaddd xmm5,xmm5,xmm0 + vpand xmm3,xmm3,xmm4 + + vpxor xmm7,xmm8,xmm1 + + vpsrld xmm1,xmm9,22 + vpxor xmm7,xmm7,xmm2 + + vpslld xmm2,xmm9,10 + vpxor xmm8,xmm10,xmm3 + vpaddd xmm12,xmm12,xmm5 + + vpxor xmm7,xmm7,xmm1 + vpxor xmm7,xmm7,xmm2 + + vpaddd xmm8,xmm8,xmm5 + vpaddd xmm8,xmm8,xmm7 + add rbp,256 + vmovd xmm5,DWORD PTR[32+r8] + vmovd xmm0,DWORD PTR[32+r9] + vpinsrd xmm5,xmm5,DWORD PTR[32+r10],1 + vpinsrd xmm0,xmm0,DWORD PTR[32+r11],1 + vpunpckldq xmm5,xmm5,xmm0 + vpshufb xmm5,xmm5,xmm6 + vpsrld xmm7,xmm12,6 + vpslld xmm2,xmm12,26 + vmovdqu XMMWORD PTR[(128-128)+rax],xmm5 + vpaddd xmm5,xmm5,xmm15 + + vpsrld xmm1,xmm12,11 + vpxor xmm7,xmm7,xmm2 + vpslld xmm2,xmm12,21 + vpaddd xmm5,xmm5,XMMWORD PTR[((-128))+rbp] + vpxor xmm7,xmm7,xmm1 + + vpsrld xmm1,xmm12,25 + vpxor xmm7,xmm7,xmm2 + + vpslld xmm2,xmm12,7 + vpandn xmm0,xmm12,xmm14 + vpand xmm3,xmm12,xmm13 + + vpxor xmm7,xmm7,xmm1 + + vpsrld xmm15,xmm8,2 + vpxor xmm7,xmm7,xmm2 + + vpslld xmm1,xmm8,30 + vpxor xmm0,xmm0,xmm3 + vpxor xmm3,xmm9,xmm8 + + vpxor xmm15,xmm15,xmm1 + vpaddd xmm5,xmm5,xmm7 + + vpsrld xmm1,xmm8,13 + + vpslld xmm2,xmm8,19 + vpaddd xmm5,xmm5,xmm0 + vpand xmm4,xmm4,xmm3 + + vpxor xmm7,xmm15,xmm1 + + vpsrld xmm1,xmm8,22 + vpxor xmm7,xmm7,xmm2 + + vpslld xmm2,xmm8,10 + vpxor xmm15,xmm9,xmm4 + vpaddd xmm11,xmm11,xmm5 + + vpxor xmm7,xmm7,xmm1 + vpxor xmm7,xmm7,xmm2 + + vpaddd xmm15,xmm15,xmm5 + vpaddd xmm15,xmm15,xmm7 + vmovd xmm5,DWORD PTR[36+r8] + vmovd xmm0,DWORD PTR[36+r9] + vpinsrd xmm5,xmm5,DWORD PTR[36+r10],1 + vpinsrd xmm0,xmm0,DWORD PTR[36+r11],1 + vpunpckldq xmm5,xmm5,xmm0 + vpshufb xmm5,xmm5,xmm6 + vpsrld xmm7,xmm11,6 + vpslld xmm2,xmm11,26 + vmovdqu XMMWORD PTR[(144-128)+rax],xmm5 + vpaddd xmm5,xmm5,xmm14 + + vpsrld xmm1,xmm11,11 + vpxor xmm7,xmm7,xmm2 + vpslld xmm2,xmm11,21 + vpaddd xmm5,xmm5,XMMWORD PTR[((-96))+rbp] + vpxor xmm7,xmm7,xmm1 + + vpsrld xmm1,xmm11,25 + vpxor xmm7,xmm7,xmm2 + + vpslld xmm2,xmm11,7 + vpandn xmm0,xmm11,xmm13 + vpand xmm4,xmm11,xmm12 + + vpxor xmm7,xmm7,xmm1 + + vpsrld xmm14,xmm15,2 + vpxor xmm7,xmm7,xmm2 + + vpslld xmm1,xmm15,30 + vpxor xmm0,xmm0,xmm4 + vpxor xmm4,xmm8,xmm15 + + vpxor xmm14,xmm14,xmm1 + vpaddd xmm5,xmm5,xmm7 + + vpsrld xmm1,xmm15,13 + + vpslld xmm2,xmm15,19 + vpaddd xmm5,xmm5,xmm0 + vpand xmm3,xmm3,xmm4 + + vpxor xmm7,xmm14,xmm1 + + vpsrld xmm1,xmm15,22 + vpxor xmm7,xmm7,xmm2 + + vpslld xmm2,xmm15,10 + vpxor xmm14,xmm8,xmm3 + vpaddd xmm10,xmm10,xmm5 + + vpxor xmm7,xmm7,xmm1 + vpxor xmm7,xmm7,xmm2 + + vpaddd xmm14,xmm14,xmm5 + vpaddd xmm14,xmm14,xmm7 + vmovd xmm5,DWORD PTR[40+r8] + vmovd xmm0,DWORD PTR[40+r9] + vpinsrd xmm5,xmm5,DWORD PTR[40+r10],1 + vpinsrd xmm0,xmm0,DWORD PTR[40+r11],1 + vpunpckldq xmm5,xmm5,xmm0 + vpshufb xmm5,xmm5,xmm6 + vpsrld xmm7,xmm10,6 + vpslld xmm2,xmm10,26 + vmovdqu XMMWORD PTR[(160-128)+rax],xmm5 + vpaddd xmm5,xmm5,xmm13 + + vpsrld xmm1,xmm10,11 + vpxor xmm7,xmm7,xmm2 + vpslld xmm2,xmm10,21 + vpaddd xmm5,xmm5,XMMWORD PTR[((-64))+rbp] + vpxor xmm7,xmm7,xmm1 + + vpsrld xmm1,xmm10,25 + vpxor xmm7,xmm7,xmm2 + + vpslld xmm2,xmm10,7 + vpandn xmm0,xmm10,xmm12 + vpand xmm3,xmm10,xmm11 + + vpxor xmm7,xmm7,xmm1 + + vpsrld xmm13,xmm14,2 + vpxor xmm7,xmm7,xmm2 + + vpslld xmm1,xmm14,30 + vpxor xmm0,xmm0,xmm3 + vpxor xmm3,xmm15,xmm14 + + vpxor xmm13,xmm13,xmm1 + vpaddd xmm5,xmm5,xmm7 + + vpsrld xmm1,xmm14,13 + + vpslld xmm2,xmm14,19 + vpaddd xmm5,xmm5,xmm0 + vpand xmm4,xmm4,xmm3 + + vpxor xmm7,xmm13,xmm1 + + vpsrld xmm1,xmm14,22 + vpxor xmm7,xmm7,xmm2 + + vpslld xmm2,xmm14,10 + vpxor xmm13,xmm15,xmm4 + vpaddd xmm9,xmm9,xmm5 + + vpxor xmm7,xmm7,xmm1 + vpxor xmm7,xmm7,xmm2 + + vpaddd xmm13,xmm13,xmm5 + vpaddd xmm13,xmm13,xmm7 + vmovd xmm5,DWORD PTR[44+r8] + vmovd xmm0,DWORD PTR[44+r9] + vpinsrd xmm5,xmm5,DWORD PTR[44+r10],1 + vpinsrd xmm0,xmm0,DWORD PTR[44+r11],1 + vpunpckldq xmm5,xmm5,xmm0 + vpshufb xmm5,xmm5,xmm6 + vpsrld xmm7,xmm9,6 + vpslld xmm2,xmm9,26 + vmovdqu XMMWORD PTR[(176-128)+rax],xmm5 + vpaddd xmm5,xmm5,xmm12 + + vpsrld xmm1,xmm9,11 + vpxor xmm7,xmm7,xmm2 + vpslld xmm2,xmm9,21 + vpaddd xmm5,xmm5,XMMWORD PTR[((-32))+rbp] + vpxor xmm7,xmm7,xmm1 + + vpsrld xmm1,xmm9,25 + vpxor xmm7,xmm7,xmm2 + + vpslld xmm2,xmm9,7 + vpandn xmm0,xmm9,xmm11 + vpand xmm4,xmm9,xmm10 + + vpxor xmm7,xmm7,xmm1 + + vpsrld xmm12,xmm13,2 + vpxor xmm7,xmm7,xmm2 + + vpslld xmm1,xmm13,30 + vpxor xmm0,xmm0,xmm4 + vpxor xmm4,xmm14,xmm13 + + vpxor xmm12,xmm12,xmm1 + vpaddd xmm5,xmm5,xmm7 + + vpsrld xmm1,xmm13,13 + + vpslld xmm2,xmm13,19 + vpaddd xmm5,xmm5,xmm0 + vpand xmm3,xmm3,xmm4 + + vpxor xmm7,xmm12,xmm1 + + vpsrld xmm1,xmm13,22 + vpxor xmm7,xmm7,xmm2 + + vpslld xmm2,xmm13,10 + vpxor xmm12,xmm14,xmm3 + vpaddd xmm8,xmm8,xmm5 + + vpxor xmm7,xmm7,xmm1 + vpxor xmm7,xmm7,xmm2 + + vpaddd xmm12,xmm12,xmm5 + vpaddd xmm12,xmm12,xmm7 + vmovd xmm5,DWORD PTR[48+r8] + vmovd xmm0,DWORD PTR[48+r9] + vpinsrd xmm5,xmm5,DWORD PTR[48+r10],1 + vpinsrd xmm0,xmm0,DWORD PTR[48+r11],1 + vpunpckldq xmm5,xmm5,xmm0 + vpshufb xmm5,xmm5,xmm6 + vpsrld xmm7,xmm8,6 + vpslld xmm2,xmm8,26 + vmovdqu XMMWORD PTR[(192-128)+rax],xmm5 + vpaddd xmm5,xmm5,xmm11 + + vpsrld xmm1,xmm8,11 + vpxor xmm7,xmm7,xmm2 + vpslld xmm2,xmm8,21 + vpaddd xmm5,xmm5,XMMWORD PTR[rbp] + vpxor xmm7,xmm7,xmm1 + + vpsrld xmm1,xmm8,25 + vpxor xmm7,xmm7,xmm2 + + vpslld xmm2,xmm8,7 + vpandn xmm0,xmm8,xmm10 + vpand xmm3,xmm8,xmm9 + + vpxor xmm7,xmm7,xmm1 + + vpsrld xmm11,xmm12,2 + vpxor xmm7,xmm7,xmm2 + + vpslld xmm1,xmm12,30 + vpxor xmm0,xmm0,xmm3 + vpxor xmm3,xmm13,xmm12 + + vpxor xmm11,xmm11,xmm1 + vpaddd xmm5,xmm5,xmm7 + + vpsrld xmm1,xmm12,13 + + vpslld xmm2,xmm12,19 + vpaddd xmm5,xmm5,xmm0 + vpand xmm4,xmm4,xmm3 + + vpxor xmm7,xmm11,xmm1 + + vpsrld xmm1,xmm12,22 + vpxor xmm7,xmm7,xmm2 + + vpslld xmm2,xmm12,10 + vpxor xmm11,xmm13,xmm4 + vpaddd xmm15,xmm15,xmm5 + + vpxor xmm7,xmm7,xmm1 + vpxor xmm7,xmm7,xmm2 + + vpaddd xmm11,xmm11,xmm5 + vpaddd xmm11,xmm11,xmm7 + vmovd xmm5,DWORD PTR[52+r8] + vmovd xmm0,DWORD PTR[52+r9] + vpinsrd xmm5,xmm5,DWORD PTR[52+r10],1 + vpinsrd xmm0,xmm0,DWORD PTR[52+r11],1 + vpunpckldq xmm5,xmm5,xmm0 + vpshufb xmm5,xmm5,xmm6 + vpsrld xmm7,xmm15,6 + vpslld xmm2,xmm15,26 + vmovdqu XMMWORD PTR[(208-128)+rax],xmm5 + vpaddd xmm5,xmm5,xmm10 + + vpsrld xmm1,xmm15,11 + vpxor xmm7,xmm7,xmm2 + vpslld xmm2,xmm15,21 + vpaddd xmm5,xmm5,XMMWORD PTR[32+rbp] + vpxor xmm7,xmm7,xmm1 + + vpsrld xmm1,xmm15,25 + vpxor xmm7,xmm7,xmm2 + + vpslld xmm2,xmm15,7 + vpandn xmm0,xmm15,xmm9 + vpand xmm4,xmm15,xmm8 + + vpxor xmm7,xmm7,xmm1 + + vpsrld xmm10,xmm11,2 + vpxor xmm7,xmm7,xmm2 + + vpslld xmm1,xmm11,30 + vpxor xmm0,xmm0,xmm4 + vpxor xmm4,xmm12,xmm11 + + vpxor xmm10,xmm10,xmm1 + vpaddd xmm5,xmm5,xmm7 + + vpsrld xmm1,xmm11,13 + + vpslld xmm2,xmm11,19 + vpaddd xmm5,xmm5,xmm0 + vpand xmm3,xmm3,xmm4 + + vpxor xmm7,xmm10,xmm1 + + vpsrld xmm1,xmm11,22 + vpxor xmm7,xmm7,xmm2 + + vpslld xmm2,xmm11,10 + vpxor xmm10,xmm12,xmm3 + vpaddd xmm14,xmm14,xmm5 + + vpxor xmm7,xmm7,xmm1 + vpxor xmm7,xmm7,xmm2 + + vpaddd xmm10,xmm10,xmm5 + vpaddd xmm10,xmm10,xmm7 + vmovd xmm5,DWORD PTR[56+r8] + vmovd xmm0,DWORD PTR[56+r9] + vpinsrd xmm5,xmm5,DWORD PTR[56+r10],1 + vpinsrd xmm0,xmm0,DWORD PTR[56+r11],1 + vpunpckldq xmm5,xmm5,xmm0 + vpshufb xmm5,xmm5,xmm6 + vpsrld xmm7,xmm14,6 + vpslld xmm2,xmm14,26 + vmovdqu XMMWORD PTR[(224-128)+rax],xmm5 + vpaddd xmm5,xmm5,xmm9 + + vpsrld xmm1,xmm14,11 + vpxor xmm7,xmm7,xmm2 + vpslld xmm2,xmm14,21 + vpaddd xmm5,xmm5,XMMWORD PTR[64+rbp] + vpxor xmm7,xmm7,xmm1 + + vpsrld xmm1,xmm14,25 + vpxor xmm7,xmm7,xmm2 + + vpslld xmm2,xmm14,7 + vpandn xmm0,xmm14,xmm8 + vpand xmm3,xmm14,xmm15 + + vpxor xmm7,xmm7,xmm1 + + vpsrld xmm9,xmm10,2 + vpxor xmm7,xmm7,xmm2 + + vpslld xmm1,xmm10,30 + vpxor xmm0,xmm0,xmm3 + vpxor xmm3,xmm11,xmm10 + + vpxor xmm9,xmm9,xmm1 + vpaddd xmm5,xmm5,xmm7 + + vpsrld xmm1,xmm10,13 + + vpslld xmm2,xmm10,19 + vpaddd xmm5,xmm5,xmm0 + vpand xmm4,xmm4,xmm3 + + vpxor xmm7,xmm9,xmm1 + + vpsrld xmm1,xmm10,22 + vpxor xmm7,xmm7,xmm2 + + vpslld xmm2,xmm10,10 + vpxor xmm9,xmm11,xmm4 + vpaddd xmm13,xmm13,xmm5 + + vpxor xmm7,xmm7,xmm1 + vpxor xmm7,xmm7,xmm2 + + vpaddd xmm9,xmm9,xmm5 + vpaddd xmm9,xmm9,xmm7 + vmovd xmm5,DWORD PTR[60+r8] + lea r8,QWORD PTR[64+r8] + vmovd xmm0,DWORD PTR[60+r9] + lea r9,QWORD PTR[64+r9] + vpinsrd xmm5,xmm5,DWORD PTR[60+r10],1 + lea r10,QWORD PTR[64+r10] + vpinsrd xmm0,xmm0,DWORD PTR[60+r11],1 + lea r11,QWORD PTR[64+r11] + vpunpckldq xmm5,xmm5,xmm0 + vpshufb xmm5,xmm5,xmm6 + vpsrld xmm7,xmm13,6 + vpslld xmm2,xmm13,26 + vmovdqu XMMWORD PTR[(240-128)+rax],xmm5 + vpaddd xmm5,xmm5,xmm8 + + vpsrld xmm1,xmm13,11 + vpxor xmm7,xmm7,xmm2 + vpslld xmm2,xmm13,21 + vpaddd xmm5,xmm5,XMMWORD PTR[96+rbp] + vpxor xmm7,xmm7,xmm1 + + vpsrld xmm1,xmm13,25 + vpxor xmm7,xmm7,xmm2 + prefetcht0 [63+r8] + vpslld xmm2,xmm13,7 + vpandn xmm0,xmm13,xmm15 + vpand xmm4,xmm13,xmm14 + prefetcht0 [63+r9] + vpxor xmm7,xmm7,xmm1 + + vpsrld xmm8,xmm9,2 + vpxor xmm7,xmm7,xmm2 + prefetcht0 [63+r10] + vpslld xmm1,xmm9,30 + vpxor xmm0,xmm0,xmm4 + vpxor xmm4,xmm10,xmm9 + prefetcht0 [63+r11] + vpxor xmm8,xmm8,xmm1 + vpaddd xmm5,xmm5,xmm7 + + vpsrld xmm1,xmm9,13 + + vpslld xmm2,xmm9,19 + vpaddd xmm5,xmm5,xmm0 + vpand xmm3,xmm3,xmm4 + + vpxor xmm7,xmm8,xmm1 + + vpsrld xmm1,xmm9,22 + vpxor xmm7,xmm7,xmm2 + + vpslld xmm2,xmm9,10 + vpxor xmm8,xmm10,xmm3 + vpaddd xmm12,xmm12,xmm5 + + vpxor xmm7,xmm7,xmm1 + vpxor xmm7,xmm7,xmm2 + + vpaddd xmm8,xmm8,xmm5 + vpaddd xmm8,xmm8,xmm7 + add rbp,256 + vmovdqu xmm5,XMMWORD PTR[((0-128))+rax] + mov ecx,3 + jmp $L$oop_16_xx_avx +ALIGN 32 +$L$oop_16_xx_avx:: + vmovdqu xmm6,XMMWORD PTR[((16-128))+rax] + vpaddd xmm5,xmm5,XMMWORD PTR[((144-128))+rax] + + vpsrld xmm7,xmm6,3 + vpsrld xmm1,xmm6,7 + vpslld xmm2,xmm6,25 + vpxor xmm7,xmm7,xmm1 + vpsrld xmm1,xmm6,18 + vpxor xmm7,xmm7,xmm2 + vpslld xmm2,xmm6,14 + vmovdqu xmm0,XMMWORD PTR[((224-128))+rax] + vpsrld xmm3,xmm0,10 + + vpxor xmm7,xmm7,xmm1 + vpsrld xmm1,xmm0,17 + vpxor xmm7,xmm7,xmm2 + vpslld xmm2,xmm0,15 + vpaddd xmm5,xmm5,xmm7 + vpxor xmm7,xmm3,xmm1 + vpsrld xmm1,xmm0,19 + vpxor xmm7,xmm7,xmm2 + vpslld xmm2,xmm0,13 + vpxor xmm7,xmm7,xmm1 + vpxor xmm7,xmm7,xmm2 + vpaddd xmm5,xmm5,xmm7 + vpsrld xmm7,xmm12,6 + vpslld xmm2,xmm12,26 + vmovdqu XMMWORD PTR[(0-128)+rax],xmm5 + vpaddd xmm5,xmm5,xmm15 + + vpsrld xmm1,xmm12,11 + vpxor xmm7,xmm7,xmm2 + vpslld xmm2,xmm12,21 + vpaddd xmm5,xmm5,XMMWORD PTR[((-128))+rbp] + vpxor xmm7,xmm7,xmm1 + + vpsrld xmm1,xmm12,25 + vpxor xmm7,xmm7,xmm2 + + vpslld xmm2,xmm12,7 + vpandn xmm0,xmm12,xmm14 + vpand xmm3,xmm12,xmm13 + + vpxor xmm7,xmm7,xmm1 + + vpsrld xmm15,xmm8,2 + vpxor xmm7,xmm7,xmm2 + + vpslld xmm1,xmm8,30 + vpxor xmm0,xmm0,xmm3 + vpxor xmm3,xmm9,xmm8 + + vpxor xmm15,xmm15,xmm1 + vpaddd xmm5,xmm5,xmm7 + + vpsrld xmm1,xmm8,13 + + vpslld xmm2,xmm8,19 + vpaddd xmm5,xmm5,xmm0 + vpand xmm4,xmm4,xmm3 + + vpxor xmm7,xmm15,xmm1 + + vpsrld xmm1,xmm8,22 + vpxor xmm7,xmm7,xmm2 + + vpslld xmm2,xmm8,10 + vpxor xmm15,xmm9,xmm4 + vpaddd xmm11,xmm11,xmm5 + + vpxor xmm7,xmm7,xmm1 + vpxor xmm7,xmm7,xmm2 + + vpaddd xmm15,xmm15,xmm5 + vpaddd xmm15,xmm15,xmm7 + vmovdqu xmm5,XMMWORD PTR[((32-128))+rax] + vpaddd xmm6,xmm6,XMMWORD PTR[((160-128))+rax] + + vpsrld xmm7,xmm5,3 + vpsrld xmm1,xmm5,7 + vpslld xmm2,xmm5,25 + vpxor xmm7,xmm7,xmm1 + vpsrld xmm1,xmm5,18 + vpxor xmm7,xmm7,xmm2 + vpslld xmm2,xmm5,14 + vmovdqu xmm0,XMMWORD PTR[((240-128))+rax] + vpsrld xmm4,xmm0,10 + + vpxor xmm7,xmm7,xmm1 + vpsrld xmm1,xmm0,17 + vpxor xmm7,xmm7,xmm2 + vpslld xmm2,xmm0,15 + vpaddd xmm6,xmm6,xmm7 + vpxor xmm7,xmm4,xmm1 + vpsrld xmm1,xmm0,19 + vpxor xmm7,xmm7,xmm2 + vpslld xmm2,xmm0,13 + vpxor xmm7,xmm7,xmm1 + vpxor xmm7,xmm7,xmm2 + vpaddd xmm6,xmm6,xmm7 + vpsrld xmm7,xmm11,6 + vpslld xmm2,xmm11,26 + vmovdqu XMMWORD PTR[(16-128)+rax],xmm6 + vpaddd xmm6,xmm6,xmm14 + + vpsrld xmm1,xmm11,11 + vpxor xmm7,xmm7,xmm2 + vpslld xmm2,xmm11,21 + vpaddd xmm6,xmm6,XMMWORD PTR[((-96))+rbp] + vpxor xmm7,xmm7,xmm1 + + vpsrld xmm1,xmm11,25 + vpxor xmm7,xmm7,xmm2 + + vpslld xmm2,xmm11,7 + vpandn xmm0,xmm11,xmm13 + vpand xmm4,xmm11,xmm12 + + vpxor xmm7,xmm7,xmm1 + + vpsrld xmm14,xmm15,2 + vpxor xmm7,xmm7,xmm2 + + vpslld xmm1,xmm15,30 + vpxor xmm0,xmm0,xmm4 + vpxor xmm4,xmm8,xmm15 + + vpxor xmm14,xmm14,xmm1 + vpaddd xmm6,xmm6,xmm7 + + vpsrld xmm1,xmm15,13 + + vpslld xmm2,xmm15,19 + vpaddd xmm6,xmm6,xmm0 + vpand xmm3,xmm3,xmm4 + + vpxor xmm7,xmm14,xmm1 + + vpsrld xmm1,xmm15,22 + vpxor xmm7,xmm7,xmm2 + + vpslld xmm2,xmm15,10 + vpxor xmm14,xmm8,xmm3 + vpaddd xmm10,xmm10,xmm6 + + vpxor xmm7,xmm7,xmm1 + vpxor xmm7,xmm7,xmm2 + + vpaddd xmm14,xmm14,xmm6 + vpaddd xmm14,xmm14,xmm7 + vmovdqu xmm6,XMMWORD PTR[((48-128))+rax] + vpaddd xmm5,xmm5,XMMWORD PTR[((176-128))+rax] + + vpsrld xmm7,xmm6,3 + vpsrld xmm1,xmm6,7 + vpslld xmm2,xmm6,25 + vpxor xmm7,xmm7,xmm1 + vpsrld xmm1,xmm6,18 + vpxor xmm7,xmm7,xmm2 + vpslld xmm2,xmm6,14 + vmovdqu xmm0,XMMWORD PTR[((0-128))+rax] + vpsrld xmm3,xmm0,10 + + vpxor xmm7,xmm7,xmm1 + vpsrld xmm1,xmm0,17 + vpxor xmm7,xmm7,xmm2 + vpslld xmm2,xmm0,15 + vpaddd xmm5,xmm5,xmm7 + vpxor xmm7,xmm3,xmm1 + vpsrld xmm1,xmm0,19 + vpxor xmm7,xmm7,xmm2 + vpslld xmm2,xmm0,13 + vpxor xmm7,xmm7,xmm1 + vpxor xmm7,xmm7,xmm2 + vpaddd xmm5,xmm5,xmm7 + vpsrld xmm7,xmm10,6 + vpslld xmm2,xmm10,26 + vmovdqu XMMWORD PTR[(32-128)+rax],xmm5 + vpaddd xmm5,xmm5,xmm13 + + vpsrld xmm1,xmm10,11 + vpxor xmm7,xmm7,xmm2 + vpslld xmm2,xmm10,21 + vpaddd xmm5,xmm5,XMMWORD PTR[((-64))+rbp] + vpxor xmm7,xmm7,xmm1 + + vpsrld xmm1,xmm10,25 + vpxor xmm7,xmm7,xmm2 + + vpslld xmm2,xmm10,7 + vpandn xmm0,xmm10,xmm12 + vpand xmm3,xmm10,xmm11 + + vpxor xmm7,xmm7,xmm1 + + vpsrld xmm13,xmm14,2 + vpxor xmm7,xmm7,xmm2 + + vpslld xmm1,xmm14,30 + vpxor xmm0,xmm0,xmm3 + vpxor xmm3,xmm15,xmm14 + + vpxor xmm13,xmm13,xmm1 + vpaddd xmm5,xmm5,xmm7 + + vpsrld xmm1,xmm14,13 + + vpslld xmm2,xmm14,19 + vpaddd xmm5,xmm5,xmm0 + vpand xmm4,xmm4,xmm3 + + vpxor xmm7,xmm13,xmm1 + + vpsrld xmm1,xmm14,22 + vpxor xmm7,xmm7,xmm2 + + vpslld xmm2,xmm14,10 + vpxor xmm13,xmm15,xmm4 + vpaddd xmm9,xmm9,xmm5 + + vpxor xmm7,xmm7,xmm1 + vpxor xmm7,xmm7,xmm2 + + vpaddd xmm13,xmm13,xmm5 + vpaddd xmm13,xmm13,xmm7 + vmovdqu xmm5,XMMWORD PTR[((64-128))+rax] + vpaddd xmm6,xmm6,XMMWORD PTR[((192-128))+rax] + + vpsrld xmm7,xmm5,3 + vpsrld xmm1,xmm5,7 + vpslld xmm2,xmm5,25 + vpxor xmm7,xmm7,xmm1 + vpsrld xmm1,xmm5,18 + vpxor xmm7,xmm7,xmm2 + vpslld xmm2,xmm5,14 + vmovdqu xmm0,XMMWORD PTR[((16-128))+rax] + vpsrld xmm4,xmm0,10 + + vpxor xmm7,xmm7,xmm1 + vpsrld xmm1,xmm0,17 + vpxor xmm7,xmm7,xmm2 + vpslld xmm2,xmm0,15 + vpaddd xmm6,xmm6,xmm7 + vpxor xmm7,xmm4,xmm1 + vpsrld xmm1,xmm0,19 + vpxor xmm7,xmm7,xmm2 + vpslld xmm2,xmm0,13 + vpxor xmm7,xmm7,xmm1 + vpxor xmm7,xmm7,xmm2 + vpaddd xmm6,xmm6,xmm7 + vpsrld xmm7,xmm9,6 + vpslld xmm2,xmm9,26 + vmovdqu XMMWORD PTR[(48-128)+rax],xmm6 + vpaddd xmm6,xmm6,xmm12 + + vpsrld xmm1,xmm9,11 + vpxor xmm7,xmm7,xmm2 + vpslld xmm2,xmm9,21 + vpaddd xmm6,xmm6,XMMWORD PTR[((-32))+rbp] + vpxor xmm7,xmm7,xmm1 + + vpsrld xmm1,xmm9,25 + vpxor xmm7,xmm7,xmm2 + + vpslld xmm2,xmm9,7 + vpandn xmm0,xmm9,xmm11 + vpand xmm4,xmm9,xmm10 + + vpxor xmm7,xmm7,xmm1 + + vpsrld xmm12,xmm13,2 + vpxor xmm7,xmm7,xmm2 + + vpslld xmm1,xmm13,30 + vpxor xmm0,xmm0,xmm4 + vpxor xmm4,xmm14,xmm13 + + vpxor xmm12,xmm12,xmm1 + vpaddd xmm6,xmm6,xmm7 + + vpsrld xmm1,xmm13,13 + + vpslld xmm2,xmm13,19 + vpaddd xmm6,xmm6,xmm0 + vpand xmm3,xmm3,xmm4 + + vpxor xmm7,xmm12,xmm1 + + vpsrld xmm1,xmm13,22 + vpxor xmm7,xmm7,xmm2 + + vpslld xmm2,xmm13,10 + vpxor xmm12,xmm14,xmm3 + vpaddd xmm8,xmm8,xmm6 + + vpxor xmm7,xmm7,xmm1 + vpxor xmm7,xmm7,xmm2 + + vpaddd xmm12,xmm12,xmm6 + vpaddd xmm12,xmm12,xmm7 + vmovdqu xmm6,XMMWORD PTR[((80-128))+rax] + vpaddd xmm5,xmm5,XMMWORD PTR[((208-128))+rax] + + vpsrld xmm7,xmm6,3 + vpsrld xmm1,xmm6,7 + vpslld xmm2,xmm6,25 + vpxor xmm7,xmm7,xmm1 + vpsrld xmm1,xmm6,18 + vpxor xmm7,xmm7,xmm2 + vpslld xmm2,xmm6,14 + vmovdqu xmm0,XMMWORD PTR[((32-128))+rax] + vpsrld xmm3,xmm0,10 + + vpxor xmm7,xmm7,xmm1 + vpsrld xmm1,xmm0,17 + vpxor xmm7,xmm7,xmm2 + vpslld xmm2,xmm0,15 + vpaddd xmm5,xmm5,xmm7 + vpxor xmm7,xmm3,xmm1 + vpsrld xmm1,xmm0,19 + vpxor xmm7,xmm7,xmm2 + vpslld xmm2,xmm0,13 + vpxor xmm7,xmm7,xmm1 + vpxor xmm7,xmm7,xmm2 + vpaddd xmm5,xmm5,xmm7 + vpsrld xmm7,xmm8,6 + vpslld xmm2,xmm8,26 + vmovdqu XMMWORD PTR[(64-128)+rax],xmm5 + vpaddd xmm5,xmm5,xmm11 + + vpsrld xmm1,xmm8,11 + vpxor xmm7,xmm7,xmm2 + vpslld xmm2,xmm8,21 + vpaddd xmm5,xmm5,XMMWORD PTR[rbp] + vpxor xmm7,xmm7,xmm1 + + vpsrld xmm1,xmm8,25 + vpxor xmm7,xmm7,xmm2 + + vpslld xmm2,xmm8,7 + vpandn xmm0,xmm8,xmm10 + vpand xmm3,xmm8,xmm9 + + vpxor xmm7,xmm7,xmm1 + + vpsrld xmm11,xmm12,2 + vpxor xmm7,xmm7,xmm2 + + vpslld xmm1,xmm12,30 + vpxor xmm0,xmm0,xmm3 + vpxor xmm3,xmm13,xmm12 + + vpxor xmm11,xmm11,xmm1 + vpaddd xmm5,xmm5,xmm7 + + vpsrld xmm1,xmm12,13 + + vpslld xmm2,xmm12,19 + vpaddd xmm5,xmm5,xmm0 + vpand xmm4,xmm4,xmm3 + + vpxor xmm7,xmm11,xmm1 + + vpsrld xmm1,xmm12,22 + vpxor xmm7,xmm7,xmm2 + + vpslld xmm2,xmm12,10 + vpxor xmm11,xmm13,xmm4 + vpaddd xmm15,xmm15,xmm5 + + vpxor xmm7,xmm7,xmm1 + vpxor xmm7,xmm7,xmm2 + + vpaddd xmm11,xmm11,xmm5 + vpaddd xmm11,xmm11,xmm7 + vmovdqu xmm5,XMMWORD PTR[((96-128))+rax] + vpaddd xmm6,xmm6,XMMWORD PTR[((224-128))+rax] + + vpsrld xmm7,xmm5,3 + vpsrld xmm1,xmm5,7 + vpslld xmm2,xmm5,25 + vpxor xmm7,xmm7,xmm1 + vpsrld xmm1,xmm5,18 + vpxor xmm7,xmm7,xmm2 + vpslld xmm2,xmm5,14 + vmovdqu xmm0,XMMWORD PTR[((48-128))+rax] + vpsrld xmm4,xmm0,10 + + vpxor xmm7,xmm7,xmm1 + vpsrld xmm1,xmm0,17 + vpxor xmm7,xmm7,xmm2 + vpslld xmm2,xmm0,15 + vpaddd xmm6,xmm6,xmm7 + vpxor xmm7,xmm4,xmm1 + vpsrld xmm1,xmm0,19 + vpxor xmm7,xmm7,xmm2 + vpslld xmm2,xmm0,13 + vpxor xmm7,xmm7,xmm1 + vpxor xmm7,xmm7,xmm2 + vpaddd xmm6,xmm6,xmm7 + vpsrld xmm7,xmm15,6 + vpslld xmm2,xmm15,26 + vmovdqu XMMWORD PTR[(80-128)+rax],xmm6 + vpaddd xmm6,xmm6,xmm10 + + vpsrld xmm1,xmm15,11 + vpxor xmm7,xmm7,xmm2 + vpslld xmm2,xmm15,21 + vpaddd xmm6,xmm6,XMMWORD PTR[32+rbp] + vpxor xmm7,xmm7,xmm1 + + vpsrld xmm1,xmm15,25 + vpxor xmm7,xmm7,xmm2 + + vpslld xmm2,xmm15,7 + vpandn xmm0,xmm15,xmm9 + vpand xmm4,xmm15,xmm8 + + vpxor xmm7,xmm7,xmm1 + + vpsrld xmm10,xmm11,2 + vpxor xmm7,xmm7,xmm2 + + vpslld xmm1,xmm11,30 + vpxor xmm0,xmm0,xmm4 + vpxor xmm4,xmm12,xmm11 + + vpxor xmm10,xmm10,xmm1 + vpaddd xmm6,xmm6,xmm7 + + vpsrld xmm1,xmm11,13 + + vpslld xmm2,xmm11,19 + vpaddd xmm6,xmm6,xmm0 + vpand xmm3,xmm3,xmm4 + + vpxor xmm7,xmm10,xmm1 + + vpsrld xmm1,xmm11,22 + vpxor xmm7,xmm7,xmm2 + + vpslld xmm2,xmm11,10 + vpxor xmm10,xmm12,xmm3 + vpaddd xmm14,xmm14,xmm6 + + vpxor xmm7,xmm7,xmm1 + vpxor xmm7,xmm7,xmm2 + + vpaddd xmm10,xmm10,xmm6 + vpaddd xmm10,xmm10,xmm7 + vmovdqu xmm6,XMMWORD PTR[((112-128))+rax] + vpaddd xmm5,xmm5,XMMWORD PTR[((240-128))+rax] + + vpsrld xmm7,xmm6,3 + vpsrld xmm1,xmm6,7 + vpslld xmm2,xmm6,25 + vpxor xmm7,xmm7,xmm1 + vpsrld xmm1,xmm6,18 + vpxor xmm7,xmm7,xmm2 + vpslld xmm2,xmm6,14 + vmovdqu xmm0,XMMWORD PTR[((64-128))+rax] + vpsrld xmm3,xmm0,10 + + vpxor xmm7,xmm7,xmm1 + vpsrld xmm1,xmm0,17 + vpxor xmm7,xmm7,xmm2 + vpslld xmm2,xmm0,15 + vpaddd xmm5,xmm5,xmm7 + vpxor xmm7,xmm3,xmm1 + vpsrld xmm1,xmm0,19 + vpxor xmm7,xmm7,xmm2 + vpslld xmm2,xmm0,13 + vpxor xmm7,xmm7,xmm1 + vpxor xmm7,xmm7,xmm2 + vpaddd xmm5,xmm5,xmm7 + vpsrld xmm7,xmm14,6 + vpslld xmm2,xmm14,26 + vmovdqu XMMWORD PTR[(96-128)+rax],xmm5 + vpaddd xmm5,xmm5,xmm9 + + vpsrld xmm1,xmm14,11 + vpxor xmm7,xmm7,xmm2 + vpslld xmm2,xmm14,21 + vpaddd xmm5,xmm5,XMMWORD PTR[64+rbp] + vpxor xmm7,xmm7,xmm1 + + vpsrld xmm1,xmm14,25 + vpxor xmm7,xmm7,xmm2 + + vpslld xmm2,xmm14,7 + vpandn xmm0,xmm14,xmm8 + vpand xmm3,xmm14,xmm15 + + vpxor xmm7,xmm7,xmm1 + + vpsrld xmm9,xmm10,2 + vpxor xmm7,xmm7,xmm2 + + vpslld xmm1,xmm10,30 + vpxor xmm0,xmm0,xmm3 + vpxor xmm3,xmm11,xmm10 + + vpxor xmm9,xmm9,xmm1 + vpaddd xmm5,xmm5,xmm7 + + vpsrld xmm1,xmm10,13 + + vpslld xmm2,xmm10,19 + vpaddd xmm5,xmm5,xmm0 + vpand xmm4,xmm4,xmm3 + + vpxor xmm7,xmm9,xmm1 + + vpsrld xmm1,xmm10,22 + vpxor xmm7,xmm7,xmm2 + + vpslld xmm2,xmm10,10 + vpxor xmm9,xmm11,xmm4 + vpaddd xmm13,xmm13,xmm5 + + vpxor xmm7,xmm7,xmm1 + vpxor xmm7,xmm7,xmm2 + + vpaddd xmm9,xmm9,xmm5 + vpaddd xmm9,xmm9,xmm7 + vmovdqu xmm5,XMMWORD PTR[((128-128))+rax] + vpaddd xmm6,xmm6,XMMWORD PTR[((0-128))+rax] + + vpsrld xmm7,xmm5,3 + vpsrld xmm1,xmm5,7 + vpslld xmm2,xmm5,25 + vpxor xmm7,xmm7,xmm1 + vpsrld xmm1,xmm5,18 + vpxor xmm7,xmm7,xmm2 + vpslld xmm2,xmm5,14 + vmovdqu xmm0,XMMWORD PTR[((80-128))+rax] + vpsrld xmm4,xmm0,10 + + vpxor xmm7,xmm7,xmm1 + vpsrld xmm1,xmm0,17 + vpxor xmm7,xmm7,xmm2 + vpslld xmm2,xmm0,15 + vpaddd xmm6,xmm6,xmm7 + vpxor xmm7,xmm4,xmm1 + vpsrld xmm1,xmm0,19 + vpxor xmm7,xmm7,xmm2 + vpslld xmm2,xmm0,13 + vpxor xmm7,xmm7,xmm1 + vpxor xmm7,xmm7,xmm2 + vpaddd xmm6,xmm6,xmm7 + vpsrld xmm7,xmm13,6 + vpslld xmm2,xmm13,26 + vmovdqu XMMWORD PTR[(112-128)+rax],xmm6 + vpaddd xmm6,xmm6,xmm8 + + vpsrld xmm1,xmm13,11 + vpxor xmm7,xmm7,xmm2 + vpslld xmm2,xmm13,21 + vpaddd xmm6,xmm6,XMMWORD PTR[96+rbp] + vpxor xmm7,xmm7,xmm1 + + vpsrld xmm1,xmm13,25 + vpxor xmm7,xmm7,xmm2 + + vpslld xmm2,xmm13,7 + vpandn xmm0,xmm13,xmm15 + vpand xmm4,xmm13,xmm14 + + vpxor xmm7,xmm7,xmm1 + + vpsrld xmm8,xmm9,2 + vpxor xmm7,xmm7,xmm2 + + vpslld xmm1,xmm9,30 + vpxor xmm0,xmm0,xmm4 + vpxor xmm4,xmm10,xmm9 + + vpxor xmm8,xmm8,xmm1 + vpaddd xmm6,xmm6,xmm7 + + vpsrld xmm1,xmm9,13 + + vpslld xmm2,xmm9,19 + vpaddd xmm6,xmm6,xmm0 + vpand xmm3,xmm3,xmm4 + + vpxor xmm7,xmm8,xmm1 + + vpsrld xmm1,xmm9,22 + vpxor xmm7,xmm7,xmm2 + + vpslld xmm2,xmm9,10 + vpxor xmm8,xmm10,xmm3 + vpaddd xmm12,xmm12,xmm6 + + vpxor xmm7,xmm7,xmm1 + vpxor xmm7,xmm7,xmm2 + + vpaddd xmm8,xmm8,xmm6 + vpaddd xmm8,xmm8,xmm7 + add rbp,256 + vmovdqu xmm6,XMMWORD PTR[((144-128))+rax] + vpaddd xmm5,xmm5,XMMWORD PTR[((16-128))+rax] + + vpsrld xmm7,xmm6,3 + vpsrld xmm1,xmm6,7 + vpslld xmm2,xmm6,25 + vpxor xmm7,xmm7,xmm1 + vpsrld xmm1,xmm6,18 + vpxor xmm7,xmm7,xmm2 + vpslld xmm2,xmm6,14 + vmovdqu xmm0,XMMWORD PTR[((96-128))+rax] + vpsrld xmm3,xmm0,10 + + vpxor xmm7,xmm7,xmm1 + vpsrld xmm1,xmm0,17 + vpxor xmm7,xmm7,xmm2 + vpslld xmm2,xmm0,15 + vpaddd xmm5,xmm5,xmm7 + vpxor xmm7,xmm3,xmm1 + vpsrld xmm1,xmm0,19 + vpxor xmm7,xmm7,xmm2 + vpslld xmm2,xmm0,13 + vpxor xmm7,xmm7,xmm1 + vpxor xmm7,xmm7,xmm2 + vpaddd xmm5,xmm5,xmm7 + vpsrld xmm7,xmm12,6 + vpslld xmm2,xmm12,26 + vmovdqu XMMWORD PTR[(128-128)+rax],xmm5 + vpaddd xmm5,xmm5,xmm15 + + vpsrld xmm1,xmm12,11 + vpxor xmm7,xmm7,xmm2 + vpslld xmm2,xmm12,21 + vpaddd xmm5,xmm5,XMMWORD PTR[((-128))+rbp] + vpxor xmm7,xmm7,xmm1 + + vpsrld xmm1,xmm12,25 + vpxor xmm7,xmm7,xmm2 + + vpslld xmm2,xmm12,7 + vpandn xmm0,xmm12,xmm14 + vpand xmm3,xmm12,xmm13 + + vpxor xmm7,xmm7,xmm1 + + vpsrld xmm15,xmm8,2 + vpxor xmm7,xmm7,xmm2 + + vpslld xmm1,xmm8,30 + vpxor xmm0,xmm0,xmm3 + vpxor xmm3,xmm9,xmm8 + + vpxor xmm15,xmm15,xmm1 + vpaddd xmm5,xmm5,xmm7 + + vpsrld xmm1,xmm8,13 + + vpslld xmm2,xmm8,19 + vpaddd xmm5,xmm5,xmm0 + vpand xmm4,xmm4,xmm3 + + vpxor xmm7,xmm15,xmm1 + + vpsrld xmm1,xmm8,22 + vpxor xmm7,xmm7,xmm2 + + vpslld xmm2,xmm8,10 + vpxor xmm15,xmm9,xmm4 + vpaddd xmm11,xmm11,xmm5 + + vpxor xmm7,xmm7,xmm1 + vpxor xmm7,xmm7,xmm2 + + vpaddd xmm15,xmm15,xmm5 + vpaddd xmm15,xmm15,xmm7 + vmovdqu xmm5,XMMWORD PTR[((160-128))+rax] + vpaddd xmm6,xmm6,XMMWORD PTR[((32-128))+rax] + + vpsrld xmm7,xmm5,3 + vpsrld xmm1,xmm5,7 + vpslld xmm2,xmm5,25 + vpxor xmm7,xmm7,xmm1 + vpsrld xmm1,xmm5,18 + vpxor xmm7,xmm7,xmm2 + vpslld xmm2,xmm5,14 + vmovdqu xmm0,XMMWORD PTR[((112-128))+rax] + vpsrld xmm4,xmm0,10 + + vpxor xmm7,xmm7,xmm1 + vpsrld xmm1,xmm0,17 + vpxor xmm7,xmm7,xmm2 + vpslld xmm2,xmm0,15 + vpaddd xmm6,xmm6,xmm7 + vpxor xmm7,xmm4,xmm1 + vpsrld xmm1,xmm0,19 + vpxor xmm7,xmm7,xmm2 + vpslld xmm2,xmm0,13 + vpxor xmm7,xmm7,xmm1 + vpxor xmm7,xmm7,xmm2 + vpaddd xmm6,xmm6,xmm7 + vpsrld xmm7,xmm11,6 + vpslld xmm2,xmm11,26 + vmovdqu XMMWORD PTR[(144-128)+rax],xmm6 + vpaddd xmm6,xmm6,xmm14 + + vpsrld xmm1,xmm11,11 + vpxor xmm7,xmm7,xmm2 + vpslld xmm2,xmm11,21 + vpaddd xmm6,xmm6,XMMWORD PTR[((-96))+rbp] + vpxor xmm7,xmm7,xmm1 + + vpsrld xmm1,xmm11,25 + vpxor xmm7,xmm7,xmm2 + + vpslld xmm2,xmm11,7 + vpandn xmm0,xmm11,xmm13 + vpand xmm4,xmm11,xmm12 + + vpxor xmm7,xmm7,xmm1 + + vpsrld xmm14,xmm15,2 + vpxor xmm7,xmm7,xmm2 + + vpslld xmm1,xmm15,30 + vpxor xmm0,xmm0,xmm4 + vpxor xmm4,xmm8,xmm15 + + vpxor xmm14,xmm14,xmm1 + vpaddd xmm6,xmm6,xmm7 + + vpsrld xmm1,xmm15,13 + + vpslld xmm2,xmm15,19 + vpaddd xmm6,xmm6,xmm0 + vpand xmm3,xmm3,xmm4 + + vpxor xmm7,xmm14,xmm1 + + vpsrld xmm1,xmm15,22 + vpxor xmm7,xmm7,xmm2 + + vpslld xmm2,xmm15,10 + vpxor xmm14,xmm8,xmm3 + vpaddd xmm10,xmm10,xmm6 + + vpxor xmm7,xmm7,xmm1 + vpxor xmm7,xmm7,xmm2 + + vpaddd xmm14,xmm14,xmm6 + vpaddd xmm14,xmm14,xmm7 + vmovdqu xmm6,XMMWORD PTR[((176-128))+rax] + vpaddd xmm5,xmm5,XMMWORD PTR[((48-128))+rax] + + vpsrld xmm7,xmm6,3 + vpsrld xmm1,xmm6,7 + vpslld xmm2,xmm6,25 + vpxor xmm7,xmm7,xmm1 + vpsrld xmm1,xmm6,18 + vpxor xmm7,xmm7,xmm2 + vpslld xmm2,xmm6,14 + vmovdqu xmm0,XMMWORD PTR[((128-128))+rax] + vpsrld xmm3,xmm0,10 + + vpxor xmm7,xmm7,xmm1 + vpsrld xmm1,xmm0,17 + vpxor xmm7,xmm7,xmm2 + vpslld xmm2,xmm0,15 + vpaddd xmm5,xmm5,xmm7 + vpxor xmm7,xmm3,xmm1 + vpsrld xmm1,xmm0,19 + vpxor xmm7,xmm7,xmm2 + vpslld xmm2,xmm0,13 + vpxor xmm7,xmm7,xmm1 + vpxor xmm7,xmm7,xmm2 + vpaddd xmm5,xmm5,xmm7 + vpsrld xmm7,xmm10,6 + vpslld xmm2,xmm10,26 + vmovdqu XMMWORD PTR[(160-128)+rax],xmm5 + vpaddd xmm5,xmm5,xmm13 + + vpsrld xmm1,xmm10,11 + vpxor xmm7,xmm7,xmm2 + vpslld xmm2,xmm10,21 + vpaddd xmm5,xmm5,XMMWORD PTR[((-64))+rbp] + vpxor xmm7,xmm7,xmm1 + + vpsrld xmm1,xmm10,25 + vpxor xmm7,xmm7,xmm2 + + vpslld xmm2,xmm10,7 + vpandn xmm0,xmm10,xmm12 + vpand xmm3,xmm10,xmm11 + + vpxor xmm7,xmm7,xmm1 + + vpsrld xmm13,xmm14,2 + vpxor xmm7,xmm7,xmm2 + + vpslld xmm1,xmm14,30 + vpxor xmm0,xmm0,xmm3 + vpxor xmm3,xmm15,xmm14 + + vpxor xmm13,xmm13,xmm1 + vpaddd xmm5,xmm5,xmm7 + + vpsrld xmm1,xmm14,13 + + vpslld xmm2,xmm14,19 + vpaddd xmm5,xmm5,xmm0 + vpand xmm4,xmm4,xmm3 + + vpxor xmm7,xmm13,xmm1 + + vpsrld xmm1,xmm14,22 + vpxor xmm7,xmm7,xmm2 + + vpslld xmm2,xmm14,10 + vpxor xmm13,xmm15,xmm4 + vpaddd xmm9,xmm9,xmm5 + + vpxor xmm7,xmm7,xmm1 + vpxor xmm7,xmm7,xmm2 + + vpaddd xmm13,xmm13,xmm5 + vpaddd xmm13,xmm13,xmm7 + vmovdqu xmm5,XMMWORD PTR[((192-128))+rax] + vpaddd xmm6,xmm6,XMMWORD PTR[((64-128))+rax] + + vpsrld xmm7,xmm5,3 + vpsrld xmm1,xmm5,7 + vpslld xmm2,xmm5,25 + vpxor xmm7,xmm7,xmm1 + vpsrld xmm1,xmm5,18 + vpxor xmm7,xmm7,xmm2 + vpslld xmm2,xmm5,14 + vmovdqu xmm0,XMMWORD PTR[((144-128))+rax] + vpsrld xmm4,xmm0,10 + + vpxor xmm7,xmm7,xmm1 + vpsrld xmm1,xmm0,17 + vpxor xmm7,xmm7,xmm2 + vpslld xmm2,xmm0,15 + vpaddd xmm6,xmm6,xmm7 + vpxor xmm7,xmm4,xmm1 + vpsrld xmm1,xmm0,19 + vpxor xmm7,xmm7,xmm2 + vpslld xmm2,xmm0,13 + vpxor xmm7,xmm7,xmm1 + vpxor xmm7,xmm7,xmm2 + vpaddd xmm6,xmm6,xmm7 + vpsrld xmm7,xmm9,6 + vpslld xmm2,xmm9,26 + vmovdqu XMMWORD PTR[(176-128)+rax],xmm6 + vpaddd xmm6,xmm6,xmm12 + + vpsrld xmm1,xmm9,11 + vpxor xmm7,xmm7,xmm2 + vpslld xmm2,xmm9,21 + vpaddd xmm6,xmm6,XMMWORD PTR[((-32))+rbp] + vpxor xmm7,xmm7,xmm1 + + vpsrld xmm1,xmm9,25 + vpxor xmm7,xmm7,xmm2 + + vpslld xmm2,xmm9,7 + vpandn xmm0,xmm9,xmm11 + vpand xmm4,xmm9,xmm10 + + vpxor xmm7,xmm7,xmm1 + + vpsrld xmm12,xmm13,2 + vpxor xmm7,xmm7,xmm2 + + vpslld xmm1,xmm13,30 + vpxor xmm0,xmm0,xmm4 + vpxor xmm4,xmm14,xmm13 + + vpxor xmm12,xmm12,xmm1 + vpaddd xmm6,xmm6,xmm7 + + vpsrld xmm1,xmm13,13 + + vpslld xmm2,xmm13,19 + vpaddd xmm6,xmm6,xmm0 + vpand xmm3,xmm3,xmm4 + + vpxor xmm7,xmm12,xmm1 + + vpsrld xmm1,xmm13,22 + vpxor xmm7,xmm7,xmm2 + + vpslld xmm2,xmm13,10 + vpxor xmm12,xmm14,xmm3 + vpaddd xmm8,xmm8,xmm6 + + vpxor xmm7,xmm7,xmm1 + vpxor xmm7,xmm7,xmm2 + + vpaddd xmm12,xmm12,xmm6 + vpaddd xmm12,xmm12,xmm7 + vmovdqu xmm6,XMMWORD PTR[((208-128))+rax] + vpaddd xmm5,xmm5,XMMWORD PTR[((80-128))+rax] + + vpsrld xmm7,xmm6,3 + vpsrld xmm1,xmm6,7 + vpslld xmm2,xmm6,25 + vpxor xmm7,xmm7,xmm1 + vpsrld xmm1,xmm6,18 + vpxor xmm7,xmm7,xmm2 + vpslld xmm2,xmm6,14 + vmovdqu xmm0,XMMWORD PTR[((160-128))+rax] + vpsrld xmm3,xmm0,10 + + vpxor xmm7,xmm7,xmm1 + vpsrld xmm1,xmm0,17 + vpxor xmm7,xmm7,xmm2 + vpslld xmm2,xmm0,15 + vpaddd xmm5,xmm5,xmm7 + vpxor xmm7,xmm3,xmm1 + vpsrld xmm1,xmm0,19 + vpxor xmm7,xmm7,xmm2 + vpslld xmm2,xmm0,13 + vpxor xmm7,xmm7,xmm1 + vpxor xmm7,xmm7,xmm2 + vpaddd xmm5,xmm5,xmm7 + vpsrld xmm7,xmm8,6 + vpslld xmm2,xmm8,26 + vmovdqu XMMWORD PTR[(192-128)+rax],xmm5 + vpaddd xmm5,xmm5,xmm11 + + vpsrld xmm1,xmm8,11 + vpxor xmm7,xmm7,xmm2 + vpslld xmm2,xmm8,21 + vpaddd xmm5,xmm5,XMMWORD PTR[rbp] + vpxor xmm7,xmm7,xmm1 + + vpsrld xmm1,xmm8,25 + vpxor xmm7,xmm7,xmm2 + + vpslld xmm2,xmm8,7 + vpandn xmm0,xmm8,xmm10 + vpand xmm3,xmm8,xmm9 + + vpxor xmm7,xmm7,xmm1 + + vpsrld xmm11,xmm12,2 + vpxor xmm7,xmm7,xmm2 + + vpslld xmm1,xmm12,30 + vpxor xmm0,xmm0,xmm3 + vpxor xmm3,xmm13,xmm12 + + vpxor xmm11,xmm11,xmm1 + vpaddd xmm5,xmm5,xmm7 + + vpsrld xmm1,xmm12,13 + + vpslld xmm2,xmm12,19 + vpaddd xmm5,xmm5,xmm0 + vpand xmm4,xmm4,xmm3 + + vpxor xmm7,xmm11,xmm1 + + vpsrld xmm1,xmm12,22 + vpxor xmm7,xmm7,xmm2 + + vpslld xmm2,xmm12,10 + vpxor xmm11,xmm13,xmm4 + vpaddd xmm15,xmm15,xmm5 + + vpxor xmm7,xmm7,xmm1 + vpxor xmm7,xmm7,xmm2 + + vpaddd xmm11,xmm11,xmm5 + vpaddd xmm11,xmm11,xmm7 + vmovdqu xmm5,XMMWORD PTR[((224-128))+rax] + vpaddd xmm6,xmm6,XMMWORD PTR[((96-128))+rax] + + vpsrld xmm7,xmm5,3 + vpsrld xmm1,xmm5,7 + vpslld xmm2,xmm5,25 + vpxor xmm7,xmm7,xmm1 + vpsrld xmm1,xmm5,18 + vpxor xmm7,xmm7,xmm2 + vpslld xmm2,xmm5,14 + vmovdqu xmm0,XMMWORD PTR[((176-128))+rax] + vpsrld xmm4,xmm0,10 + + vpxor xmm7,xmm7,xmm1 + vpsrld xmm1,xmm0,17 + vpxor xmm7,xmm7,xmm2 + vpslld xmm2,xmm0,15 + vpaddd xmm6,xmm6,xmm7 + vpxor xmm7,xmm4,xmm1 + vpsrld xmm1,xmm0,19 + vpxor xmm7,xmm7,xmm2 + vpslld xmm2,xmm0,13 + vpxor xmm7,xmm7,xmm1 + vpxor xmm7,xmm7,xmm2 + vpaddd xmm6,xmm6,xmm7 + vpsrld xmm7,xmm15,6 + vpslld xmm2,xmm15,26 + vmovdqu XMMWORD PTR[(208-128)+rax],xmm6 + vpaddd xmm6,xmm6,xmm10 + + vpsrld xmm1,xmm15,11 + vpxor xmm7,xmm7,xmm2 + vpslld xmm2,xmm15,21 + vpaddd xmm6,xmm6,XMMWORD PTR[32+rbp] + vpxor xmm7,xmm7,xmm1 + + vpsrld xmm1,xmm15,25 + vpxor xmm7,xmm7,xmm2 + + vpslld xmm2,xmm15,7 + vpandn xmm0,xmm15,xmm9 + vpand xmm4,xmm15,xmm8 + + vpxor xmm7,xmm7,xmm1 + + vpsrld xmm10,xmm11,2 + vpxor xmm7,xmm7,xmm2 + + vpslld xmm1,xmm11,30 + vpxor xmm0,xmm0,xmm4 + vpxor xmm4,xmm12,xmm11 + + vpxor xmm10,xmm10,xmm1 + vpaddd xmm6,xmm6,xmm7 + + vpsrld xmm1,xmm11,13 + + vpslld xmm2,xmm11,19 + vpaddd xmm6,xmm6,xmm0 + vpand xmm3,xmm3,xmm4 + + vpxor xmm7,xmm10,xmm1 + + vpsrld xmm1,xmm11,22 + vpxor xmm7,xmm7,xmm2 + + vpslld xmm2,xmm11,10 + vpxor xmm10,xmm12,xmm3 + vpaddd xmm14,xmm14,xmm6 + + vpxor xmm7,xmm7,xmm1 + vpxor xmm7,xmm7,xmm2 + + vpaddd xmm10,xmm10,xmm6 + vpaddd xmm10,xmm10,xmm7 + vmovdqu xmm6,XMMWORD PTR[((240-128))+rax] + vpaddd xmm5,xmm5,XMMWORD PTR[((112-128))+rax] + + vpsrld xmm7,xmm6,3 + vpsrld xmm1,xmm6,7 + vpslld xmm2,xmm6,25 + vpxor xmm7,xmm7,xmm1 + vpsrld xmm1,xmm6,18 + vpxor xmm7,xmm7,xmm2 + vpslld xmm2,xmm6,14 + vmovdqu xmm0,XMMWORD PTR[((192-128))+rax] + vpsrld xmm3,xmm0,10 + + vpxor xmm7,xmm7,xmm1 + vpsrld xmm1,xmm0,17 + vpxor xmm7,xmm7,xmm2 + vpslld xmm2,xmm0,15 + vpaddd xmm5,xmm5,xmm7 + vpxor xmm7,xmm3,xmm1 + vpsrld xmm1,xmm0,19 + vpxor xmm7,xmm7,xmm2 + vpslld xmm2,xmm0,13 + vpxor xmm7,xmm7,xmm1 + vpxor xmm7,xmm7,xmm2 + vpaddd xmm5,xmm5,xmm7 + vpsrld xmm7,xmm14,6 + vpslld xmm2,xmm14,26 + vmovdqu XMMWORD PTR[(224-128)+rax],xmm5 + vpaddd xmm5,xmm5,xmm9 + + vpsrld xmm1,xmm14,11 + vpxor xmm7,xmm7,xmm2 + vpslld xmm2,xmm14,21 + vpaddd xmm5,xmm5,XMMWORD PTR[64+rbp] + vpxor xmm7,xmm7,xmm1 + + vpsrld xmm1,xmm14,25 + vpxor xmm7,xmm7,xmm2 + + vpslld xmm2,xmm14,7 + vpandn xmm0,xmm14,xmm8 + vpand xmm3,xmm14,xmm15 + + vpxor xmm7,xmm7,xmm1 + + vpsrld xmm9,xmm10,2 + vpxor xmm7,xmm7,xmm2 + + vpslld xmm1,xmm10,30 + vpxor xmm0,xmm0,xmm3 + vpxor xmm3,xmm11,xmm10 + + vpxor xmm9,xmm9,xmm1 + vpaddd xmm5,xmm5,xmm7 + + vpsrld xmm1,xmm10,13 + + vpslld xmm2,xmm10,19 + vpaddd xmm5,xmm5,xmm0 + vpand xmm4,xmm4,xmm3 + + vpxor xmm7,xmm9,xmm1 + + vpsrld xmm1,xmm10,22 + vpxor xmm7,xmm7,xmm2 + + vpslld xmm2,xmm10,10 + vpxor xmm9,xmm11,xmm4 + vpaddd xmm13,xmm13,xmm5 + + vpxor xmm7,xmm7,xmm1 + vpxor xmm7,xmm7,xmm2 + + vpaddd xmm9,xmm9,xmm5 + vpaddd xmm9,xmm9,xmm7 + vmovdqu xmm5,XMMWORD PTR[((0-128))+rax] + vpaddd xmm6,xmm6,XMMWORD PTR[((128-128))+rax] + + vpsrld xmm7,xmm5,3 + vpsrld xmm1,xmm5,7 + vpslld xmm2,xmm5,25 + vpxor xmm7,xmm7,xmm1 + vpsrld xmm1,xmm5,18 + vpxor xmm7,xmm7,xmm2 + vpslld xmm2,xmm5,14 + vmovdqu xmm0,XMMWORD PTR[((208-128))+rax] + vpsrld xmm4,xmm0,10 + + vpxor xmm7,xmm7,xmm1 + vpsrld xmm1,xmm0,17 + vpxor xmm7,xmm7,xmm2 + vpslld xmm2,xmm0,15 + vpaddd xmm6,xmm6,xmm7 + vpxor xmm7,xmm4,xmm1 + vpsrld xmm1,xmm0,19 + vpxor xmm7,xmm7,xmm2 + vpslld xmm2,xmm0,13 + vpxor xmm7,xmm7,xmm1 + vpxor xmm7,xmm7,xmm2 + vpaddd xmm6,xmm6,xmm7 + vpsrld xmm7,xmm13,6 + vpslld xmm2,xmm13,26 + vmovdqu XMMWORD PTR[(240-128)+rax],xmm6 + vpaddd xmm6,xmm6,xmm8 + + vpsrld xmm1,xmm13,11 + vpxor xmm7,xmm7,xmm2 + vpslld xmm2,xmm13,21 + vpaddd xmm6,xmm6,XMMWORD PTR[96+rbp] + vpxor xmm7,xmm7,xmm1 + + vpsrld xmm1,xmm13,25 + vpxor xmm7,xmm7,xmm2 + + vpslld xmm2,xmm13,7 + vpandn xmm0,xmm13,xmm15 + vpand xmm4,xmm13,xmm14 + + vpxor xmm7,xmm7,xmm1 + + vpsrld xmm8,xmm9,2 + vpxor xmm7,xmm7,xmm2 + + vpslld xmm1,xmm9,30 + vpxor xmm0,xmm0,xmm4 + vpxor xmm4,xmm10,xmm9 + + vpxor xmm8,xmm8,xmm1 + vpaddd xmm6,xmm6,xmm7 + + vpsrld xmm1,xmm9,13 + + vpslld xmm2,xmm9,19 + vpaddd xmm6,xmm6,xmm0 + vpand xmm3,xmm3,xmm4 + + vpxor xmm7,xmm8,xmm1 + + vpsrld xmm1,xmm9,22 + vpxor xmm7,xmm7,xmm2 + + vpslld xmm2,xmm9,10 + vpxor xmm8,xmm10,xmm3 + vpaddd xmm12,xmm12,xmm6 + + vpxor xmm7,xmm7,xmm1 + vpxor xmm7,xmm7,xmm2 + + vpaddd xmm8,xmm8,xmm6 + vpaddd xmm8,xmm8,xmm7 + add rbp,256 + dec ecx + jnz $L$oop_16_xx_avx + + mov ecx,1 + lea rbp,QWORD PTR[((K256+128))] + cmp ecx,DWORD PTR[rbx] + cmovge r8,rbp + cmp ecx,DWORD PTR[4+rbx] + cmovge r9,rbp + cmp ecx,DWORD PTR[8+rbx] + cmovge r10,rbp + cmp ecx,DWORD PTR[12+rbx] + cmovge r11,rbp + vmovdqa xmm7,XMMWORD PTR[rbx] + vpxor xmm0,xmm0,xmm0 + vmovdqa xmm6,xmm7 + vpcmpgtd xmm6,xmm6,xmm0 + vpaddd xmm7,xmm7,xmm6 + + vmovdqu xmm0,XMMWORD PTR[((0-128))+rdi] + vpand xmm8,xmm8,xmm6 + vmovdqu xmm1,XMMWORD PTR[((32-128))+rdi] + vpand xmm9,xmm9,xmm6 + vmovdqu xmm2,XMMWORD PTR[((64-128))+rdi] + vpand xmm10,xmm10,xmm6 + vmovdqu xmm5,XMMWORD PTR[((96-128))+rdi] + vpand xmm11,xmm11,xmm6 + vpaddd xmm8,xmm8,xmm0 + vmovdqu xmm0,XMMWORD PTR[((128-128))+rdi] + vpand xmm12,xmm12,xmm6 + vpaddd xmm9,xmm9,xmm1 + vmovdqu xmm1,XMMWORD PTR[((160-128))+rdi] + vpand xmm13,xmm13,xmm6 + vpaddd xmm10,xmm10,xmm2 + vmovdqu xmm2,XMMWORD PTR[((192-128))+rdi] + vpand xmm14,xmm14,xmm6 + vpaddd xmm11,xmm11,xmm5 + vmovdqu xmm5,XMMWORD PTR[((224-128))+rdi] + vpand xmm15,xmm15,xmm6 + vpaddd xmm12,xmm12,xmm0 + vpaddd xmm13,xmm13,xmm1 + vmovdqu XMMWORD PTR[(0-128)+rdi],xmm8 + vpaddd xmm14,xmm14,xmm2 + vmovdqu XMMWORD PTR[(32-128)+rdi],xmm9 + vpaddd xmm15,xmm15,xmm5 + vmovdqu XMMWORD PTR[(64-128)+rdi],xmm10 + vmovdqu XMMWORD PTR[(96-128)+rdi],xmm11 + vmovdqu XMMWORD PTR[(128-128)+rdi],xmm12 + vmovdqu XMMWORD PTR[(160-128)+rdi],xmm13 + vmovdqu XMMWORD PTR[(192-128)+rdi],xmm14 + vmovdqu XMMWORD PTR[(224-128)+rdi],xmm15 + + vmovdqu XMMWORD PTR[rbx],xmm7 + vmovdqu xmm6,XMMWORD PTR[$L$pbswap] + dec edx + jnz $L$oop_avx + + mov edx,DWORD PTR[280+rsp] + lea rdi,QWORD PTR[16+rdi] + lea rsi,QWORD PTR[64+rsi] + dec edx + jnz $L$oop_grande_avx + +$L$done_avx:: + mov rax,QWORD PTR[272+rsp] + vzeroupper + movaps xmm6,XMMWORD PTR[((-184))+rax] + movaps xmm7,XMMWORD PTR[((-168))+rax] + movaps xmm8,XMMWORD PTR[((-152))+rax] + movaps xmm9,XMMWORD PTR[((-136))+rax] + movaps xmm10,XMMWORD PTR[((-120))+rax] + movaps xmm11,XMMWORD PTR[((-104))+rax] + movaps xmm12,XMMWORD PTR[((-88))+rax] + movaps xmm13,XMMWORD PTR[((-72))+rax] + movaps xmm14,XMMWORD PTR[((-56))+rax] + movaps xmm15,XMMWORD PTR[((-40))+rax] + mov rbp,QWORD PTR[((-16))+rax] + mov rbx,QWORD PTR[((-8))+rax] + lea rsp,QWORD PTR[rax] +$L$epilogue_avx:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_sha256_multi_block_avx:: +sha256_multi_block_avx ENDP + +ALIGN 32 +sha256_multi_block_avx2 PROC PRIVATE + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_sha256_multi_block_avx2:: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + + +_avx2_shortcut:: + mov rax,rsp + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + lea rsp,QWORD PTR[((-168))+rsp] + movaps XMMWORD PTR[rsp],xmm6 + movaps XMMWORD PTR[16+rsp],xmm7 + movaps XMMWORD PTR[32+rsp],xmm8 + movaps XMMWORD PTR[48+rsp],xmm9 + movaps XMMWORD PTR[64+rsp],xmm10 + movaps XMMWORD PTR[80+rsp],xmm11 + movaps XMMWORD PTR[(-120)+rax],xmm12 + movaps XMMWORD PTR[(-104)+rax],xmm13 + movaps XMMWORD PTR[(-88)+rax],xmm14 + movaps XMMWORD PTR[(-72)+rax],xmm15 + sub rsp,576 + and rsp,-256 + mov QWORD PTR[544+rsp],rax +$L$body_avx2:: + lea rbp,QWORD PTR[((K256+128))] + lea rdi,QWORD PTR[128+rdi] + +$L$oop_grande_avx2:: + mov DWORD PTR[552+rsp],edx + xor edx,edx + lea rbx,QWORD PTR[512+rsp] + mov r12,QWORD PTR[rsi] + mov ecx,DWORD PTR[8+rsi] + cmp ecx,edx + cmovg edx,ecx + test ecx,ecx + mov DWORD PTR[rbx],ecx + cmovle r12,rbp + mov r13,QWORD PTR[16+rsi] + mov ecx,DWORD PTR[24+rsi] + cmp ecx,edx + cmovg edx,ecx + test ecx,ecx + mov DWORD PTR[4+rbx],ecx + cmovle r13,rbp + mov r14,QWORD PTR[32+rsi] + mov ecx,DWORD PTR[40+rsi] + cmp ecx,edx + cmovg edx,ecx + test ecx,ecx + mov DWORD PTR[8+rbx],ecx + cmovle r14,rbp + mov r15,QWORD PTR[48+rsi] + mov ecx,DWORD PTR[56+rsi] + cmp ecx,edx + cmovg edx,ecx + test ecx,ecx + mov DWORD PTR[12+rbx],ecx + cmovle r15,rbp + mov r8,QWORD PTR[64+rsi] + mov ecx,DWORD PTR[72+rsi] + cmp ecx,edx + cmovg edx,ecx + test ecx,ecx + mov DWORD PTR[16+rbx],ecx + cmovle r8,rbp + mov r9,QWORD PTR[80+rsi] + mov ecx,DWORD PTR[88+rsi] + cmp ecx,edx + cmovg edx,ecx + test ecx,ecx + mov DWORD PTR[20+rbx],ecx + cmovle r9,rbp + mov r10,QWORD PTR[96+rsi] + mov ecx,DWORD PTR[104+rsi] + cmp ecx,edx + cmovg edx,ecx + test ecx,ecx + mov DWORD PTR[24+rbx],ecx + cmovle r10,rbp + mov r11,QWORD PTR[112+rsi] + mov ecx,DWORD PTR[120+rsi] + cmp ecx,edx + cmovg edx,ecx + test ecx,ecx + mov DWORD PTR[28+rbx],ecx + cmovle r11,rbp + vmovdqu ymm8,YMMWORD PTR[((0-128))+rdi] + lea rax,QWORD PTR[128+rsp] + vmovdqu ymm9,YMMWORD PTR[((32-128))+rdi] + lea rbx,QWORD PTR[((256+128))+rsp] + vmovdqu ymm10,YMMWORD PTR[((64-128))+rdi] + vmovdqu ymm11,YMMWORD PTR[((96-128))+rdi] + vmovdqu ymm12,YMMWORD PTR[((128-128))+rdi] + vmovdqu ymm13,YMMWORD PTR[((160-128))+rdi] + vmovdqu ymm14,YMMWORD PTR[((192-128))+rdi] + vmovdqu ymm15,YMMWORD PTR[((224-128))+rdi] + vmovdqu ymm6,YMMWORD PTR[$L$pbswap] + jmp $L$oop_avx2 + +ALIGN 32 +$L$oop_avx2:: + vpxor ymm4,ymm10,ymm9 + vmovd xmm5,DWORD PTR[r12] + vmovd xmm0,DWORD PTR[r8] + vmovd xmm1,DWORD PTR[r13] + vmovd xmm2,DWORD PTR[r9] + vpinsrd xmm5,xmm5,DWORD PTR[r14],1 + vpinsrd xmm0,xmm0,DWORD PTR[r10],1 + vpinsrd xmm1,xmm1,DWORD PTR[r15],1 + vpunpckldq ymm5,ymm5,ymm1 + vpinsrd xmm2,xmm2,DWORD PTR[r11],1 + vpunpckldq ymm0,ymm0,ymm2 + vinserti128 ymm5,ymm5,xmm0,1 + vpshufb ymm5,ymm5,ymm6 + vpsrld ymm7,ymm12,6 + vpslld ymm2,ymm12,26 + vmovdqu YMMWORD PTR[(0-128)+rax],ymm5 + vpaddd ymm5,ymm5,ymm15 + + vpsrld ymm1,ymm12,11 + vpxor ymm7,ymm7,ymm2 + vpslld ymm2,ymm12,21 + vpaddd ymm5,ymm5,YMMWORD PTR[((-128))+rbp] + vpxor ymm7,ymm7,ymm1 + + vpsrld ymm1,ymm12,25 + vpxor ymm7,ymm7,ymm2 + + vpslld ymm2,ymm12,7 + vpandn ymm0,ymm12,ymm14 + vpand ymm3,ymm12,ymm13 + + vpxor ymm7,ymm7,ymm1 + + vpsrld ymm15,ymm8,2 + vpxor ymm7,ymm7,ymm2 + + vpslld ymm1,ymm8,30 + vpxor ymm0,ymm0,ymm3 + vpxor ymm3,ymm9,ymm8 + + vpxor ymm15,ymm15,ymm1 + vpaddd ymm5,ymm5,ymm7 + + vpsrld ymm1,ymm8,13 + + vpslld ymm2,ymm8,19 + vpaddd ymm5,ymm5,ymm0 + vpand ymm4,ymm4,ymm3 + + vpxor ymm7,ymm15,ymm1 + + vpsrld ymm1,ymm8,22 + vpxor ymm7,ymm7,ymm2 + + vpslld ymm2,ymm8,10 + vpxor ymm15,ymm9,ymm4 + vpaddd ymm11,ymm11,ymm5 + + vpxor ymm7,ymm7,ymm1 + vpxor ymm7,ymm7,ymm2 + + vpaddd ymm15,ymm15,ymm5 + vpaddd ymm15,ymm15,ymm7 + vmovd xmm5,DWORD PTR[4+r12] + vmovd xmm0,DWORD PTR[4+r8] + vmovd xmm1,DWORD PTR[4+r13] + vmovd xmm2,DWORD PTR[4+r9] + vpinsrd xmm5,xmm5,DWORD PTR[4+r14],1 + vpinsrd xmm0,xmm0,DWORD PTR[4+r10],1 + vpinsrd xmm1,xmm1,DWORD PTR[4+r15],1 + vpunpckldq ymm5,ymm5,ymm1 + vpinsrd xmm2,xmm2,DWORD PTR[4+r11],1 + vpunpckldq ymm0,ymm0,ymm2 + vinserti128 ymm5,ymm5,xmm0,1 + vpshufb ymm5,ymm5,ymm6 + vpsrld ymm7,ymm11,6 + vpslld ymm2,ymm11,26 + vmovdqu YMMWORD PTR[(32-128)+rax],ymm5 + vpaddd ymm5,ymm5,ymm14 + + vpsrld ymm1,ymm11,11 + vpxor ymm7,ymm7,ymm2 + vpslld ymm2,ymm11,21 + vpaddd ymm5,ymm5,YMMWORD PTR[((-96))+rbp] + vpxor ymm7,ymm7,ymm1 + + vpsrld ymm1,ymm11,25 + vpxor ymm7,ymm7,ymm2 + + vpslld ymm2,ymm11,7 + vpandn ymm0,ymm11,ymm13 + vpand ymm4,ymm11,ymm12 + + vpxor ymm7,ymm7,ymm1 + + vpsrld ymm14,ymm15,2 + vpxor ymm7,ymm7,ymm2 + + vpslld ymm1,ymm15,30 + vpxor ymm0,ymm0,ymm4 + vpxor ymm4,ymm8,ymm15 + + vpxor ymm14,ymm14,ymm1 + vpaddd ymm5,ymm5,ymm7 + + vpsrld ymm1,ymm15,13 + + vpslld ymm2,ymm15,19 + vpaddd ymm5,ymm5,ymm0 + vpand ymm3,ymm3,ymm4 + + vpxor ymm7,ymm14,ymm1 + + vpsrld ymm1,ymm15,22 + vpxor ymm7,ymm7,ymm2 + + vpslld ymm2,ymm15,10 + vpxor ymm14,ymm8,ymm3 + vpaddd ymm10,ymm10,ymm5 + + vpxor ymm7,ymm7,ymm1 + vpxor ymm7,ymm7,ymm2 + + vpaddd ymm14,ymm14,ymm5 + vpaddd ymm14,ymm14,ymm7 + vmovd xmm5,DWORD PTR[8+r12] + vmovd xmm0,DWORD PTR[8+r8] + vmovd xmm1,DWORD PTR[8+r13] + vmovd xmm2,DWORD PTR[8+r9] + vpinsrd xmm5,xmm5,DWORD PTR[8+r14],1 + vpinsrd xmm0,xmm0,DWORD PTR[8+r10],1 + vpinsrd xmm1,xmm1,DWORD PTR[8+r15],1 + vpunpckldq ymm5,ymm5,ymm1 + vpinsrd xmm2,xmm2,DWORD PTR[8+r11],1 + vpunpckldq ymm0,ymm0,ymm2 + vinserti128 ymm5,ymm5,xmm0,1 + vpshufb ymm5,ymm5,ymm6 + vpsrld ymm7,ymm10,6 + vpslld ymm2,ymm10,26 + vmovdqu YMMWORD PTR[(64-128)+rax],ymm5 + vpaddd ymm5,ymm5,ymm13 + + vpsrld ymm1,ymm10,11 + vpxor ymm7,ymm7,ymm2 + vpslld ymm2,ymm10,21 + vpaddd ymm5,ymm5,YMMWORD PTR[((-64))+rbp] + vpxor ymm7,ymm7,ymm1 + + vpsrld ymm1,ymm10,25 + vpxor ymm7,ymm7,ymm2 + + vpslld ymm2,ymm10,7 + vpandn ymm0,ymm10,ymm12 + vpand ymm3,ymm10,ymm11 + + vpxor ymm7,ymm7,ymm1 + + vpsrld ymm13,ymm14,2 + vpxor ymm7,ymm7,ymm2 + + vpslld ymm1,ymm14,30 + vpxor ymm0,ymm0,ymm3 + vpxor ymm3,ymm15,ymm14 + + vpxor ymm13,ymm13,ymm1 + vpaddd ymm5,ymm5,ymm7 + + vpsrld ymm1,ymm14,13 + + vpslld ymm2,ymm14,19 + vpaddd ymm5,ymm5,ymm0 + vpand ymm4,ymm4,ymm3 + + vpxor ymm7,ymm13,ymm1 + + vpsrld ymm1,ymm14,22 + vpxor ymm7,ymm7,ymm2 + + vpslld ymm2,ymm14,10 + vpxor ymm13,ymm15,ymm4 + vpaddd ymm9,ymm9,ymm5 + + vpxor ymm7,ymm7,ymm1 + vpxor ymm7,ymm7,ymm2 + + vpaddd ymm13,ymm13,ymm5 + vpaddd ymm13,ymm13,ymm7 + vmovd xmm5,DWORD PTR[12+r12] + vmovd xmm0,DWORD PTR[12+r8] + vmovd xmm1,DWORD PTR[12+r13] + vmovd xmm2,DWORD PTR[12+r9] + vpinsrd xmm5,xmm5,DWORD PTR[12+r14],1 + vpinsrd xmm0,xmm0,DWORD PTR[12+r10],1 + vpinsrd xmm1,xmm1,DWORD PTR[12+r15],1 + vpunpckldq ymm5,ymm5,ymm1 + vpinsrd xmm2,xmm2,DWORD PTR[12+r11],1 + vpunpckldq ymm0,ymm0,ymm2 + vinserti128 ymm5,ymm5,xmm0,1 + vpshufb ymm5,ymm5,ymm6 + vpsrld ymm7,ymm9,6 + vpslld ymm2,ymm9,26 + vmovdqu YMMWORD PTR[(96-128)+rax],ymm5 + vpaddd ymm5,ymm5,ymm12 + + vpsrld ymm1,ymm9,11 + vpxor ymm7,ymm7,ymm2 + vpslld ymm2,ymm9,21 + vpaddd ymm5,ymm5,YMMWORD PTR[((-32))+rbp] + vpxor ymm7,ymm7,ymm1 + + vpsrld ymm1,ymm9,25 + vpxor ymm7,ymm7,ymm2 + + vpslld ymm2,ymm9,7 + vpandn ymm0,ymm9,ymm11 + vpand ymm4,ymm9,ymm10 + + vpxor ymm7,ymm7,ymm1 + + vpsrld ymm12,ymm13,2 + vpxor ymm7,ymm7,ymm2 + + vpslld ymm1,ymm13,30 + vpxor ymm0,ymm0,ymm4 + vpxor ymm4,ymm14,ymm13 + + vpxor ymm12,ymm12,ymm1 + vpaddd ymm5,ymm5,ymm7 + + vpsrld ymm1,ymm13,13 + + vpslld ymm2,ymm13,19 + vpaddd ymm5,ymm5,ymm0 + vpand ymm3,ymm3,ymm4 + + vpxor ymm7,ymm12,ymm1 + + vpsrld ymm1,ymm13,22 + vpxor ymm7,ymm7,ymm2 + + vpslld ymm2,ymm13,10 + vpxor ymm12,ymm14,ymm3 + vpaddd ymm8,ymm8,ymm5 + + vpxor ymm7,ymm7,ymm1 + vpxor ymm7,ymm7,ymm2 + + vpaddd ymm12,ymm12,ymm5 + vpaddd ymm12,ymm12,ymm7 + vmovd xmm5,DWORD PTR[16+r12] + vmovd xmm0,DWORD PTR[16+r8] + vmovd xmm1,DWORD PTR[16+r13] + vmovd xmm2,DWORD PTR[16+r9] + vpinsrd xmm5,xmm5,DWORD PTR[16+r14],1 + vpinsrd xmm0,xmm0,DWORD PTR[16+r10],1 + vpinsrd xmm1,xmm1,DWORD PTR[16+r15],1 + vpunpckldq ymm5,ymm5,ymm1 + vpinsrd xmm2,xmm2,DWORD PTR[16+r11],1 + vpunpckldq ymm0,ymm0,ymm2 + vinserti128 ymm5,ymm5,xmm0,1 + vpshufb ymm5,ymm5,ymm6 + vpsrld ymm7,ymm8,6 + vpslld ymm2,ymm8,26 + vmovdqu YMMWORD PTR[(128-128)+rax],ymm5 + vpaddd ymm5,ymm5,ymm11 + + vpsrld ymm1,ymm8,11 + vpxor ymm7,ymm7,ymm2 + vpslld ymm2,ymm8,21 + vpaddd ymm5,ymm5,YMMWORD PTR[rbp] + vpxor ymm7,ymm7,ymm1 + + vpsrld ymm1,ymm8,25 + vpxor ymm7,ymm7,ymm2 + + vpslld ymm2,ymm8,7 + vpandn ymm0,ymm8,ymm10 + vpand ymm3,ymm8,ymm9 + + vpxor ymm7,ymm7,ymm1 + + vpsrld ymm11,ymm12,2 + vpxor ymm7,ymm7,ymm2 + + vpslld ymm1,ymm12,30 + vpxor ymm0,ymm0,ymm3 + vpxor ymm3,ymm13,ymm12 + + vpxor ymm11,ymm11,ymm1 + vpaddd ymm5,ymm5,ymm7 + + vpsrld ymm1,ymm12,13 + + vpslld ymm2,ymm12,19 + vpaddd ymm5,ymm5,ymm0 + vpand ymm4,ymm4,ymm3 + + vpxor ymm7,ymm11,ymm1 + + vpsrld ymm1,ymm12,22 + vpxor ymm7,ymm7,ymm2 + + vpslld ymm2,ymm12,10 + vpxor ymm11,ymm13,ymm4 + vpaddd ymm15,ymm15,ymm5 + + vpxor ymm7,ymm7,ymm1 + vpxor ymm7,ymm7,ymm2 + + vpaddd ymm11,ymm11,ymm5 + vpaddd ymm11,ymm11,ymm7 + vmovd xmm5,DWORD PTR[20+r12] + vmovd xmm0,DWORD PTR[20+r8] + vmovd xmm1,DWORD PTR[20+r13] + vmovd xmm2,DWORD PTR[20+r9] + vpinsrd xmm5,xmm5,DWORD PTR[20+r14],1 + vpinsrd xmm0,xmm0,DWORD PTR[20+r10],1 + vpinsrd xmm1,xmm1,DWORD PTR[20+r15],1 + vpunpckldq ymm5,ymm5,ymm1 + vpinsrd xmm2,xmm2,DWORD PTR[20+r11],1 + vpunpckldq ymm0,ymm0,ymm2 + vinserti128 ymm5,ymm5,xmm0,1 + vpshufb ymm5,ymm5,ymm6 + vpsrld ymm7,ymm15,6 + vpslld ymm2,ymm15,26 + vmovdqu YMMWORD PTR[(160-128)+rax],ymm5 + vpaddd ymm5,ymm5,ymm10 + + vpsrld ymm1,ymm15,11 + vpxor ymm7,ymm7,ymm2 + vpslld ymm2,ymm15,21 + vpaddd ymm5,ymm5,YMMWORD PTR[32+rbp] + vpxor ymm7,ymm7,ymm1 + + vpsrld ymm1,ymm15,25 + vpxor ymm7,ymm7,ymm2 + + vpslld ymm2,ymm15,7 + vpandn ymm0,ymm15,ymm9 + vpand ymm4,ymm15,ymm8 + + vpxor ymm7,ymm7,ymm1 + + vpsrld ymm10,ymm11,2 + vpxor ymm7,ymm7,ymm2 + + vpslld ymm1,ymm11,30 + vpxor ymm0,ymm0,ymm4 + vpxor ymm4,ymm12,ymm11 + + vpxor ymm10,ymm10,ymm1 + vpaddd ymm5,ymm5,ymm7 + + vpsrld ymm1,ymm11,13 + + vpslld ymm2,ymm11,19 + vpaddd ymm5,ymm5,ymm0 + vpand ymm3,ymm3,ymm4 + + vpxor ymm7,ymm10,ymm1 + + vpsrld ymm1,ymm11,22 + vpxor ymm7,ymm7,ymm2 + + vpslld ymm2,ymm11,10 + vpxor ymm10,ymm12,ymm3 + vpaddd ymm14,ymm14,ymm5 + + vpxor ymm7,ymm7,ymm1 + vpxor ymm7,ymm7,ymm2 + + vpaddd ymm10,ymm10,ymm5 + vpaddd ymm10,ymm10,ymm7 + vmovd xmm5,DWORD PTR[24+r12] + vmovd xmm0,DWORD PTR[24+r8] + vmovd xmm1,DWORD PTR[24+r13] + vmovd xmm2,DWORD PTR[24+r9] + vpinsrd xmm5,xmm5,DWORD PTR[24+r14],1 + vpinsrd xmm0,xmm0,DWORD PTR[24+r10],1 + vpinsrd xmm1,xmm1,DWORD PTR[24+r15],1 + vpunpckldq ymm5,ymm5,ymm1 + vpinsrd xmm2,xmm2,DWORD PTR[24+r11],1 + vpunpckldq ymm0,ymm0,ymm2 + vinserti128 ymm5,ymm5,xmm0,1 + vpshufb ymm5,ymm5,ymm6 + vpsrld ymm7,ymm14,6 + vpslld ymm2,ymm14,26 + vmovdqu YMMWORD PTR[(192-128)+rax],ymm5 + vpaddd ymm5,ymm5,ymm9 + + vpsrld ymm1,ymm14,11 + vpxor ymm7,ymm7,ymm2 + vpslld ymm2,ymm14,21 + vpaddd ymm5,ymm5,YMMWORD PTR[64+rbp] + vpxor ymm7,ymm7,ymm1 + + vpsrld ymm1,ymm14,25 + vpxor ymm7,ymm7,ymm2 + + vpslld ymm2,ymm14,7 + vpandn ymm0,ymm14,ymm8 + vpand ymm3,ymm14,ymm15 + + vpxor ymm7,ymm7,ymm1 + + vpsrld ymm9,ymm10,2 + vpxor ymm7,ymm7,ymm2 + + vpslld ymm1,ymm10,30 + vpxor ymm0,ymm0,ymm3 + vpxor ymm3,ymm11,ymm10 + + vpxor ymm9,ymm9,ymm1 + vpaddd ymm5,ymm5,ymm7 + + vpsrld ymm1,ymm10,13 + + vpslld ymm2,ymm10,19 + vpaddd ymm5,ymm5,ymm0 + vpand ymm4,ymm4,ymm3 + + vpxor ymm7,ymm9,ymm1 + + vpsrld ymm1,ymm10,22 + vpxor ymm7,ymm7,ymm2 + + vpslld ymm2,ymm10,10 + vpxor ymm9,ymm11,ymm4 + vpaddd ymm13,ymm13,ymm5 + + vpxor ymm7,ymm7,ymm1 + vpxor ymm7,ymm7,ymm2 + + vpaddd ymm9,ymm9,ymm5 + vpaddd ymm9,ymm9,ymm7 + vmovd xmm5,DWORD PTR[28+r12] + vmovd xmm0,DWORD PTR[28+r8] + vmovd xmm1,DWORD PTR[28+r13] + vmovd xmm2,DWORD PTR[28+r9] + vpinsrd xmm5,xmm5,DWORD PTR[28+r14],1 + vpinsrd xmm0,xmm0,DWORD PTR[28+r10],1 + vpinsrd xmm1,xmm1,DWORD PTR[28+r15],1 + vpunpckldq ymm5,ymm5,ymm1 + vpinsrd xmm2,xmm2,DWORD PTR[28+r11],1 + vpunpckldq ymm0,ymm0,ymm2 + vinserti128 ymm5,ymm5,xmm0,1 + vpshufb ymm5,ymm5,ymm6 + vpsrld ymm7,ymm13,6 + vpslld ymm2,ymm13,26 + vmovdqu YMMWORD PTR[(224-128)+rax],ymm5 + vpaddd ymm5,ymm5,ymm8 + + vpsrld ymm1,ymm13,11 + vpxor ymm7,ymm7,ymm2 + vpslld ymm2,ymm13,21 + vpaddd ymm5,ymm5,YMMWORD PTR[96+rbp] + vpxor ymm7,ymm7,ymm1 + + vpsrld ymm1,ymm13,25 + vpxor ymm7,ymm7,ymm2 + + vpslld ymm2,ymm13,7 + vpandn ymm0,ymm13,ymm15 + vpand ymm4,ymm13,ymm14 + + vpxor ymm7,ymm7,ymm1 + + vpsrld ymm8,ymm9,2 + vpxor ymm7,ymm7,ymm2 + + vpslld ymm1,ymm9,30 + vpxor ymm0,ymm0,ymm4 + vpxor ymm4,ymm10,ymm9 + + vpxor ymm8,ymm8,ymm1 + vpaddd ymm5,ymm5,ymm7 + + vpsrld ymm1,ymm9,13 + + vpslld ymm2,ymm9,19 + vpaddd ymm5,ymm5,ymm0 + vpand ymm3,ymm3,ymm4 + + vpxor ymm7,ymm8,ymm1 + + vpsrld ymm1,ymm9,22 + vpxor ymm7,ymm7,ymm2 + + vpslld ymm2,ymm9,10 + vpxor ymm8,ymm10,ymm3 + vpaddd ymm12,ymm12,ymm5 + + vpxor ymm7,ymm7,ymm1 + vpxor ymm7,ymm7,ymm2 + + vpaddd ymm8,ymm8,ymm5 + vpaddd ymm8,ymm8,ymm7 + add rbp,256 + vmovd xmm5,DWORD PTR[32+r12] + vmovd xmm0,DWORD PTR[32+r8] + vmovd xmm1,DWORD PTR[32+r13] + vmovd xmm2,DWORD PTR[32+r9] + vpinsrd xmm5,xmm5,DWORD PTR[32+r14],1 + vpinsrd xmm0,xmm0,DWORD PTR[32+r10],1 + vpinsrd xmm1,xmm1,DWORD PTR[32+r15],1 + vpunpckldq ymm5,ymm5,ymm1 + vpinsrd xmm2,xmm2,DWORD PTR[32+r11],1 + vpunpckldq ymm0,ymm0,ymm2 + vinserti128 ymm5,ymm5,xmm0,1 + vpshufb ymm5,ymm5,ymm6 + vpsrld ymm7,ymm12,6 + vpslld ymm2,ymm12,26 + vmovdqu YMMWORD PTR[(256-256-128)+rbx],ymm5 + vpaddd ymm5,ymm5,ymm15 + + vpsrld ymm1,ymm12,11 + vpxor ymm7,ymm7,ymm2 + vpslld ymm2,ymm12,21 + vpaddd ymm5,ymm5,YMMWORD PTR[((-128))+rbp] + vpxor ymm7,ymm7,ymm1 + + vpsrld ymm1,ymm12,25 + vpxor ymm7,ymm7,ymm2 + + vpslld ymm2,ymm12,7 + vpandn ymm0,ymm12,ymm14 + vpand ymm3,ymm12,ymm13 + + vpxor ymm7,ymm7,ymm1 + + vpsrld ymm15,ymm8,2 + vpxor ymm7,ymm7,ymm2 + + vpslld ymm1,ymm8,30 + vpxor ymm0,ymm0,ymm3 + vpxor ymm3,ymm9,ymm8 + + vpxor ymm15,ymm15,ymm1 + vpaddd ymm5,ymm5,ymm7 + + vpsrld ymm1,ymm8,13 + + vpslld ymm2,ymm8,19 + vpaddd ymm5,ymm5,ymm0 + vpand ymm4,ymm4,ymm3 + + vpxor ymm7,ymm15,ymm1 + + vpsrld ymm1,ymm8,22 + vpxor ymm7,ymm7,ymm2 + + vpslld ymm2,ymm8,10 + vpxor ymm15,ymm9,ymm4 + vpaddd ymm11,ymm11,ymm5 + + vpxor ymm7,ymm7,ymm1 + vpxor ymm7,ymm7,ymm2 + + vpaddd ymm15,ymm15,ymm5 + vpaddd ymm15,ymm15,ymm7 + vmovd xmm5,DWORD PTR[36+r12] + vmovd xmm0,DWORD PTR[36+r8] + vmovd xmm1,DWORD PTR[36+r13] + vmovd xmm2,DWORD PTR[36+r9] + vpinsrd xmm5,xmm5,DWORD PTR[36+r14],1 + vpinsrd xmm0,xmm0,DWORD PTR[36+r10],1 + vpinsrd xmm1,xmm1,DWORD PTR[36+r15],1 + vpunpckldq ymm5,ymm5,ymm1 + vpinsrd xmm2,xmm2,DWORD PTR[36+r11],1 + vpunpckldq ymm0,ymm0,ymm2 + vinserti128 ymm5,ymm5,xmm0,1 + vpshufb ymm5,ymm5,ymm6 + vpsrld ymm7,ymm11,6 + vpslld ymm2,ymm11,26 + vmovdqu YMMWORD PTR[(288-256-128)+rbx],ymm5 + vpaddd ymm5,ymm5,ymm14 + + vpsrld ymm1,ymm11,11 + vpxor ymm7,ymm7,ymm2 + vpslld ymm2,ymm11,21 + vpaddd ymm5,ymm5,YMMWORD PTR[((-96))+rbp] + vpxor ymm7,ymm7,ymm1 + + vpsrld ymm1,ymm11,25 + vpxor ymm7,ymm7,ymm2 + + vpslld ymm2,ymm11,7 + vpandn ymm0,ymm11,ymm13 + vpand ymm4,ymm11,ymm12 + + vpxor ymm7,ymm7,ymm1 + + vpsrld ymm14,ymm15,2 + vpxor ymm7,ymm7,ymm2 + + vpslld ymm1,ymm15,30 + vpxor ymm0,ymm0,ymm4 + vpxor ymm4,ymm8,ymm15 + + vpxor ymm14,ymm14,ymm1 + vpaddd ymm5,ymm5,ymm7 + + vpsrld ymm1,ymm15,13 + + vpslld ymm2,ymm15,19 + vpaddd ymm5,ymm5,ymm0 + vpand ymm3,ymm3,ymm4 + + vpxor ymm7,ymm14,ymm1 + + vpsrld ymm1,ymm15,22 + vpxor ymm7,ymm7,ymm2 + + vpslld ymm2,ymm15,10 + vpxor ymm14,ymm8,ymm3 + vpaddd ymm10,ymm10,ymm5 + + vpxor ymm7,ymm7,ymm1 + vpxor ymm7,ymm7,ymm2 + + vpaddd ymm14,ymm14,ymm5 + vpaddd ymm14,ymm14,ymm7 + vmovd xmm5,DWORD PTR[40+r12] + vmovd xmm0,DWORD PTR[40+r8] + vmovd xmm1,DWORD PTR[40+r13] + vmovd xmm2,DWORD PTR[40+r9] + vpinsrd xmm5,xmm5,DWORD PTR[40+r14],1 + vpinsrd xmm0,xmm0,DWORD PTR[40+r10],1 + vpinsrd xmm1,xmm1,DWORD PTR[40+r15],1 + vpunpckldq ymm5,ymm5,ymm1 + vpinsrd xmm2,xmm2,DWORD PTR[40+r11],1 + vpunpckldq ymm0,ymm0,ymm2 + vinserti128 ymm5,ymm5,xmm0,1 + vpshufb ymm5,ymm5,ymm6 + vpsrld ymm7,ymm10,6 + vpslld ymm2,ymm10,26 + vmovdqu YMMWORD PTR[(320-256-128)+rbx],ymm5 + vpaddd ymm5,ymm5,ymm13 + + vpsrld ymm1,ymm10,11 + vpxor ymm7,ymm7,ymm2 + vpslld ymm2,ymm10,21 + vpaddd ymm5,ymm5,YMMWORD PTR[((-64))+rbp] + vpxor ymm7,ymm7,ymm1 + + vpsrld ymm1,ymm10,25 + vpxor ymm7,ymm7,ymm2 + + vpslld ymm2,ymm10,7 + vpandn ymm0,ymm10,ymm12 + vpand ymm3,ymm10,ymm11 + + vpxor ymm7,ymm7,ymm1 + + vpsrld ymm13,ymm14,2 + vpxor ymm7,ymm7,ymm2 + + vpslld ymm1,ymm14,30 + vpxor ymm0,ymm0,ymm3 + vpxor ymm3,ymm15,ymm14 + + vpxor ymm13,ymm13,ymm1 + vpaddd ymm5,ymm5,ymm7 + + vpsrld ymm1,ymm14,13 + + vpslld ymm2,ymm14,19 + vpaddd ymm5,ymm5,ymm0 + vpand ymm4,ymm4,ymm3 + + vpxor ymm7,ymm13,ymm1 + + vpsrld ymm1,ymm14,22 + vpxor ymm7,ymm7,ymm2 + + vpslld ymm2,ymm14,10 + vpxor ymm13,ymm15,ymm4 + vpaddd ymm9,ymm9,ymm5 + + vpxor ymm7,ymm7,ymm1 + vpxor ymm7,ymm7,ymm2 + + vpaddd ymm13,ymm13,ymm5 + vpaddd ymm13,ymm13,ymm7 + vmovd xmm5,DWORD PTR[44+r12] + vmovd xmm0,DWORD PTR[44+r8] + vmovd xmm1,DWORD PTR[44+r13] + vmovd xmm2,DWORD PTR[44+r9] + vpinsrd xmm5,xmm5,DWORD PTR[44+r14],1 + vpinsrd xmm0,xmm0,DWORD PTR[44+r10],1 + vpinsrd xmm1,xmm1,DWORD PTR[44+r15],1 + vpunpckldq ymm5,ymm5,ymm1 + vpinsrd xmm2,xmm2,DWORD PTR[44+r11],1 + vpunpckldq ymm0,ymm0,ymm2 + vinserti128 ymm5,ymm5,xmm0,1 + vpshufb ymm5,ymm5,ymm6 + vpsrld ymm7,ymm9,6 + vpslld ymm2,ymm9,26 + vmovdqu YMMWORD PTR[(352-256-128)+rbx],ymm5 + vpaddd ymm5,ymm5,ymm12 + + vpsrld ymm1,ymm9,11 + vpxor ymm7,ymm7,ymm2 + vpslld ymm2,ymm9,21 + vpaddd ymm5,ymm5,YMMWORD PTR[((-32))+rbp] + vpxor ymm7,ymm7,ymm1 + + vpsrld ymm1,ymm9,25 + vpxor ymm7,ymm7,ymm2 + + vpslld ymm2,ymm9,7 + vpandn ymm0,ymm9,ymm11 + vpand ymm4,ymm9,ymm10 + + vpxor ymm7,ymm7,ymm1 + + vpsrld ymm12,ymm13,2 + vpxor ymm7,ymm7,ymm2 + + vpslld ymm1,ymm13,30 + vpxor ymm0,ymm0,ymm4 + vpxor ymm4,ymm14,ymm13 + + vpxor ymm12,ymm12,ymm1 + vpaddd ymm5,ymm5,ymm7 + + vpsrld ymm1,ymm13,13 + + vpslld ymm2,ymm13,19 + vpaddd ymm5,ymm5,ymm0 + vpand ymm3,ymm3,ymm4 + + vpxor ymm7,ymm12,ymm1 + + vpsrld ymm1,ymm13,22 + vpxor ymm7,ymm7,ymm2 + + vpslld ymm2,ymm13,10 + vpxor ymm12,ymm14,ymm3 + vpaddd ymm8,ymm8,ymm5 + + vpxor ymm7,ymm7,ymm1 + vpxor ymm7,ymm7,ymm2 + + vpaddd ymm12,ymm12,ymm5 + vpaddd ymm12,ymm12,ymm7 + vmovd xmm5,DWORD PTR[48+r12] + vmovd xmm0,DWORD PTR[48+r8] + vmovd xmm1,DWORD PTR[48+r13] + vmovd xmm2,DWORD PTR[48+r9] + vpinsrd xmm5,xmm5,DWORD PTR[48+r14],1 + vpinsrd xmm0,xmm0,DWORD PTR[48+r10],1 + vpinsrd xmm1,xmm1,DWORD PTR[48+r15],1 + vpunpckldq ymm5,ymm5,ymm1 + vpinsrd xmm2,xmm2,DWORD PTR[48+r11],1 + vpunpckldq ymm0,ymm0,ymm2 + vinserti128 ymm5,ymm5,xmm0,1 + vpshufb ymm5,ymm5,ymm6 + vpsrld ymm7,ymm8,6 + vpslld ymm2,ymm8,26 + vmovdqu YMMWORD PTR[(384-256-128)+rbx],ymm5 + vpaddd ymm5,ymm5,ymm11 + + vpsrld ymm1,ymm8,11 + vpxor ymm7,ymm7,ymm2 + vpslld ymm2,ymm8,21 + vpaddd ymm5,ymm5,YMMWORD PTR[rbp] + vpxor ymm7,ymm7,ymm1 + + vpsrld ymm1,ymm8,25 + vpxor ymm7,ymm7,ymm2 + + vpslld ymm2,ymm8,7 + vpandn ymm0,ymm8,ymm10 + vpand ymm3,ymm8,ymm9 + + vpxor ymm7,ymm7,ymm1 + + vpsrld ymm11,ymm12,2 + vpxor ymm7,ymm7,ymm2 + + vpslld ymm1,ymm12,30 + vpxor ymm0,ymm0,ymm3 + vpxor ymm3,ymm13,ymm12 + + vpxor ymm11,ymm11,ymm1 + vpaddd ymm5,ymm5,ymm7 + + vpsrld ymm1,ymm12,13 + + vpslld ymm2,ymm12,19 + vpaddd ymm5,ymm5,ymm0 + vpand ymm4,ymm4,ymm3 + + vpxor ymm7,ymm11,ymm1 + + vpsrld ymm1,ymm12,22 + vpxor ymm7,ymm7,ymm2 + + vpslld ymm2,ymm12,10 + vpxor ymm11,ymm13,ymm4 + vpaddd ymm15,ymm15,ymm5 + + vpxor ymm7,ymm7,ymm1 + vpxor ymm7,ymm7,ymm2 + + vpaddd ymm11,ymm11,ymm5 + vpaddd ymm11,ymm11,ymm7 + vmovd xmm5,DWORD PTR[52+r12] + vmovd xmm0,DWORD PTR[52+r8] + vmovd xmm1,DWORD PTR[52+r13] + vmovd xmm2,DWORD PTR[52+r9] + vpinsrd xmm5,xmm5,DWORD PTR[52+r14],1 + vpinsrd xmm0,xmm0,DWORD PTR[52+r10],1 + vpinsrd xmm1,xmm1,DWORD PTR[52+r15],1 + vpunpckldq ymm5,ymm5,ymm1 + vpinsrd xmm2,xmm2,DWORD PTR[52+r11],1 + vpunpckldq ymm0,ymm0,ymm2 + vinserti128 ymm5,ymm5,xmm0,1 + vpshufb ymm5,ymm5,ymm6 + vpsrld ymm7,ymm15,6 + vpslld ymm2,ymm15,26 + vmovdqu YMMWORD PTR[(416-256-128)+rbx],ymm5 + vpaddd ymm5,ymm5,ymm10 + + vpsrld ymm1,ymm15,11 + vpxor ymm7,ymm7,ymm2 + vpslld ymm2,ymm15,21 + vpaddd ymm5,ymm5,YMMWORD PTR[32+rbp] + vpxor ymm7,ymm7,ymm1 + + vpsrld ymm1,ymm15,25 + vpxor ymm7,ymm7,ymm2 + + vpslld ymm2,ymm15,7 + vpandn ymm0,ymm15,ymm9 + vpand ymm4,ymm15,ymm8 + + vpxor ymm7,ymm7,ymm1 + + vpsrld ymm10,ymm11,2 + vpxor ymm7,ymm7,ymm2 + + vpslld ymm1,ymm11,30 + vpxor ymm0,ymm0,ymm4 + vpxor ymm4,ymm12,ymm11 + + vpxor ymm10,ymm10,ymm1 + vpaddd ymm5,ymm5,ymm7 + + vpsrld ymm1,ymm11,13 + + vpslld ymm2,ymm11,19 + vpaddd ymm5,ymm5,ymm0 + vpand ymm3,ymm3,ymm4 + + vpxor ymm7,ymm10,ymm1 + + vpsrld ymm1,ymm11,22 + vpxor ymm7,ymm7,ymm2 + + vpslld ymm2,ymm11,10 + vpxor ymm10,ymm12,ymm3 + vpaddd ymm14,ymm14,ymm5 + + vpxor ymm7,ymm7,ymm1 + vpxor ymm7,ymm7,ymm2 + + vpaddd ymm10,ymm10,ymm5 + vpaddd ymm10,ymm10,ymm7 + vmovd xmm5,DWORD PTR[56+r12] + vmovd xmm0,DWORD PTR[56+r8] + vmovd xmm1,DWORD PTR[56+r13] + vmovd xmm2,DWORD PTR[56+r9] + vpinsrd xmm5,xmm5,DWORD PTR[56+r14],1 + vpinsrd xmm0,xmm0,DWORD PTR[56+r10],1 + vpinsrd xmm1,xmm1,DWORD PTR[56+r15],1 + vpunpckldq ymm5,ymm5,ymm1 + vpinsrd xmm2,xmm2,DWORD PTR[56+r11],1 + vpunpckldq ymm0,ymm0,ymm2 + vinserti128 ymm5,ymm5,xmm0,1 + vpshufb ymm5,ymm5,ymm6 + vpsrld ymm7,ymm14,6 + vpslld ymm2,ymm14,26 + vmovdqu YMMWORD PTR[(448-256-128)+rbx],ymm5 + vpaddd ymm5,ymm5,ymm9 + + vpsrld ymm1,ymm14,11 + vpxor ymm7,ymm7,ymm2 + vpslld ymm2,ymm14,21 + vpaddd ymm5,ymm5,YMMWORD PTR[64+rbp] + vpxor ymm7,ymm7,ymm1 + + vpsrld ymm1,ymm14,25 + vpxor ymm7,ymm7,ymm2 + + vpslld ymm2,ymm14,7 + vpandn ymm0,ymm14,ymm8 + vpand ymm3,ymm14,ymm15 + + vpxor ymm7,ymm7,ymm1 + + vpsrld ymm9,ymm10,2 + vpxor ymm7,ymm7,ymm2 + + vpslld ymm1,ymm10,30 + vpxor ymm0,ymm0,ymm3 + vpxor ymm3,ymm11,ymm10 + + vpxor ymm9,ymm9,ymm1 + vpaddd ymm5,ymm5,ymm7 + + vpsrld ymm1,ymm10,13 + + vpslld ymm2,ymm10,19 + vpaddd ymm5,ymm5,ymm0 + vpand ymm4,ymm4,ymm3 + + vpxor ymm7,ymm9,ymm1 + + vpsrld ymm1,ymm10,22 + vpxor ymm7,ymm7,ymm2 + + vpslld ymm2,ymm10,10 + vpxor ymm9,ymm11,ymm4 + vpaddd ymm13,ymm13,ymm5 + + vpxor ymm7,ymm7,ymm1 + vpxor ymm7,ymm7,ymm2 + + vpaddd ymm9,ymm9,ymm5 + vpaddd ymm9,ymm9,ymm7 + vmovd xmm5,DWORD PTR[60+r12] + lea r12,QWORD PTR[64+r12] + vmovd xmm0,DWORD PTR[60+r8] + lea r8,QWORD PTR[64+r8] + vmovd xmm1,DWORD PTR[60+r13] + lea r13,QWORD PTR[64+r13] + vmovd xmm2,DWORD PTR[60+r9] + lea r9,QWORD PTR[64+r9] + vpinsrd xmm5,xmm5,DWORD PTR[60+r14],1 + lea r14,QWORD PTR[64+r14] + vpinsrd xmm0,xmm0,DWORD PTR[60+r10],1 + lea r10,QWORD PTR[64+r10] + vpinsrd xmm1,xmm1,DWORD PTR[60+r15],1 + lea r15,QWORD PTR[64+r15] + vpunpckldq ymm5,ymm5,ymm1 + vpinsrd xmm2,xmm2,DWORD PTR[60+r11],1 + lea r11,QWORD PTR[64+r11] + vpunpckldq ymm0,ymm0,ymm2 + vinserti128 ymm5,ymm5,xmm0,1 + vpshufb ymm5,ymm5,ymm6 + vpsrld ymm7,ymm13,6 + vpslld ymm2,ymm13,26 + vmovdqu YMMWORD PTR[(480-256-128)+rbx],ymm5 + vpaddd ymm5,ymm5,ymm8 + + vpsrld ymm1,ymm13,11 + vpxor ymm7,ymm7,ymm2 + vpslld ymm2,ymm13,21 + vpaddd ymm5,ymm5,YMMWORD PTR[96+rbp] + vpxor ymm7,ymm7,ymm1 + + vpsrld ymm1,ymm13,25 + vpxor ymm7,ymm7,ymm2 + prefetcht0 [63+r12] + vpslld ymm2,ymm13,7 + vpandn ymm0,ymm13,ymm15 + vpand ymm4,ymm13,ymm14 + prefetcht0 [63+r13] + vpxor ymm7,ymm7,ymm1 + + vpsrld ymm8,ymm9,2 + vpxor ymm7,ymm7,ymm2 + prefetcht0 [63+r14] + vpslld ymm1,ymm9,30 + vpxor ymm0,ymm0,ymm4 + vpxor ymm4,ymm10,ymm9 + prefetcht0 [63+r15] + vpxor ymm8,ymm8,ymm1 + vpaddd ymm5,ymm5,ymm7 + + vpsrld ymm1,ymm9,13 + prefetcht0 [63+r8] + vpslld ymm2,ymm9,19 + vpaddd ymm5,ymm5,ymm0 + vpand ymm3,ymm3,ymm4 + prefetcht0 [63+r9] + vpxor ymm7,ymm8,ymm1 + + vpsrld ymm1,ymm9,22 + vpxor ymm7,ymm7,ymm2 + prefetcht0 [63+r10] + vpslld ymm2,ymm9,10 + vpxor ymm8,ymm10,ymm3 + vpaddd ymm12,ymm12,ymm5 + prefetcht0 [63+r11] + vpxor ymm7,ymm7,ymm1 + vpxor ymm7,ymm7,ymm2 + + vpaddd ymm8,ymm8,ymm5 + vpaddd ymm8,ymm8,ymm7 + add rbp,256 + vmovdqu ymm5,YMMWORD PTR[((0-128))+rax] + mov ecx,3 + jmp $L$oop_16_xx_avx2 +ALIGN 32 +$L$oop_16_xx_avx2:: + vmovdqu ymm6,YMMWORD PTR[((32-128))+rax] + vpaddd ymm5,ymm5,YMMWORD PTR[((288-256-128))+rbx] + + vpsrld ymm7,ymm6,3 + vpsrld ymm1,ymm6,7 + vpslld ymm2,ymm6,25 + vpxor ymm7,ymm7,ymm1 + vpsrld ymm1,ymm6,18 + vpxor ymm7,ymm7,ymm2 + vpslld ymm2,ymm6,14 + vmovdqu ymm0,YMMWORD PTR[((448-256-128))+rbx] + vpsrld ymm3,ymm0,10 + + vpxor ymm7,ymm7,ymm1 + vpsrld ymm1,ymm0,17 + vpxor ymm7,ymm7,ymm2 + vpslld ymm2,ymm0,15 + vpaddd ymm5,ymm5,ymm7 + vpxor ymm7,ymm3,ymm1 + vpsrld ymm1,ymm0,19 + vpxor ymm7,ymm7,ymm2 + vpslld ymm2,ymm0,13 + vpxor ymm7,ymm7,ymm1 + vpxor ymm7,ymm7,ymm2 + vpaddd ymm5,ymm5,ymm7 + vpsrld ymm7,ymm12,6 + vpslld ymm2,ymm12,26 + vmovdqu YMMWORD PTR[(0-128)+rax],ymm5 + vpaddd ymm5,ymm5,ymm15 + + vpsrld ymm1,ymm12,11 + vpxor ymm7,ymm7,ymm2 + vpslld ymm2,ymm12,21 + vpaddd ymm5,ymm5,YMMWORD PTR[((-128))+rbp] + vpxor ymm7,ymm7,ymm1 + + vpsrld ymm1,ymm12,25 + vpxor ymm7,ymm7,ymm2 + + vpslld ymm2,ymm12,7 + vpandn ymm0,ymm12,ymm14 + vpand ymm3,ymm12,ymm13 + + vpxor ymm7,ymm7,ymm1 + + vpsrld ymm15,ymm8,2 + vpxor ymm7,ymm7,ymm2 + + vpslld ymm1,ymm8,30 + vpxor ymm0,ymm0,ymm3 + vpxor ymm3,ymm9,ymm8 + + vpxor ymm15,ymm15,ymm1 + vpaddd ymm5,ymm5,ymm7 + + vpsrld ymm1,ymm8,13 + + vpslld ymm2,ymm8,19 + vpaddd ymm5,ymm5,ymm0 + vpand ymm4,ymm4,ymm3 + + vpxor ymm7,ymm15,ymm1 + + vpsrld ymm1,ymm8,22 + vpxor ymm7,ymm7,ymm2 + + vpslld ymm2,ymm8,10 + vpxor ymm15,ymm9,ymm4 + vpaddd ymm11,ymm11,ymm5 + + vpxor ymm7,ymm7,ymm1 + vpxor ymm7,ymm7,ymm2 + + vpaddd ymm15,ymm15,ymm5 + vpaddd ymm15,ymm15,ymm7 + vmovdqu ymm5,YMMWORD PTR[((64-128))+rax] + vpaddd ymm6,ymm6,YMMWORD PTR[((320-256-128))+rbx] + + vpsrld ymm7,ymm5,3 + vpsrld ymm1,ymm5,7 + vpslld ymm2,ymm5,25 + vpxor ymm7,ymm7,ymm1 + vpsrld ymm1,ymm5,18 + vpxor ymm7,ymm7,ymm2 + vpslld ymm2,ymm5,14 + vmovdqu ymm0,YMMWORD PTR[((480-256-128))+rbx] + vpsrld ymm4,ymm0,10 + + vpxor ymm7,ymm7,ymm1 + vpsrld ymm1,ymm0,17 + vpxor ymm7,ymm7,ymm2 + vpslld ymm2,ymm0,15 + vpaddd ymm6,ymm6,ymm7 + vpxor ymm7,ymm4,ymm1 + vpsrld ymm1,ymm0,19 + vpxor ymm7,ymm7,ymm2 + vpslld ymm2,ymm0,13 + vpxor ymm7,ymm7,ymm1 + vpxor ymm7,ymm7,ymm2 + vpaddd ymm6,ymm6,ymm7 + vpsrld ymm7,ymm11,6 + vpslld ymm2,ymm11,26 + vmovdqu YMMWORD PTR[(32-128)+rax],ymm6 + vpaddd ymm6,ymm6,ymm14 + + vpsrld ymm1,ymm11,11 + vpxor ymm7,ymm7,ymm2 + vpslld ymm2,ymm11,21 + vpaddd ymm6,ymm6,YMMWORD PTR[((-96))+rbp] + vpxor ymm7,ymm7,ymm1 + + vpsrld ymm1,ymm11,25 + vpxor ymm7,ymm7,ymm2 + + vpslld ymm2,ymm11,7 + vpandn ymm0,ymm11,ymm13 + vpand ymm4,ymm11,ymm12 + + vpxor ymm7,ymm7,ymm1 + + vpsrld ymm14,ymm15,2 + vpxor ymm7,ymm7,ymm2 + + vpslld ymm1,ymm15,30 + vpxor ymm0,ymm0,ymm4 + vpxor ymm4,ymm8,ymm15 + + vpxor ymm14,ymm14,ymm1 + vpaddd ymm6,ymm6,ymm7 + + vpsrld ymm1,ymm15,13 + + vpslld ymm2,ymm15,19 + vpaddd ymm6,ymm6,ymm0 + vpand ymm3,ymm3,ymm4 + + vpxor ymm7,ymm14,ymm1 + + vpsrld ymm1,ymm15,22 + vpxor ymm7,ymm7,ymm2 + + vpslld ymm2,ymm15,10 + vpxor ymm14,ymm8,ymm3 + vpaddd ymm10,ymm10,ymm6 + + vpxor ymm7,ymm7,ymm1 + vpxor ymm7,ymm7,ymm2 + + vpaddd ymm14,ymm14,ymm6 + vpaddd ymm14,ymm14,ymm7 + vmovdqu ymm6,YMMWORD PTR[((96-128))+rax] + vpaddd ymm5,ymm5,YMMWORD PTR[((352-256-128))+rbx] + + vpsrld ymm7,ymm6,3 + vpsrld ymm1,ymm6,7 + vpslld ymm2,ymm6,25 + vpxor ymm7,ymm7,ymm1 + vpsrld ymm1,ymm6,18 + vpxor ymm7,ymm7,ymm2 + vpslld ymm2,ymm6,14 + vmovdqu ymm0,YMMWORD PTR[((0-128))+rax] + vpsrld ymm3,ymm0,10 + + vpxor ymm7,ymm7,ymm1 + vpsrld ymm1,ymm0,17 + vpxor ymm7,ymm7,ymm2 + vpslld ymm2,ymm0,15 + vpaddd ymm5,ymm5,ymm7 + vpxor ymm7,ymm3,ymm1 + vpsrld ymm1,ymm0,19 + vpxor ymm7,ymm7,ymm2 + vpslld ymm2,ymm0,13 + vpxor ymm7,ymm7,ymm1 + vpxor ymm7,ymm7,ymm2 + vpaddd ymm5,ymm5,ymm7 + vpsrld ymm7,ymm10,6 + vpslld ymm2,ymm10,26 + vmovdqu YMMWORD PTR[(64-128)+rax],ymm5 + vpaddd ymm5,ymm5,ymm13 + + vpsrld ymm1,ymm10,11 + vpxor ymm7,ymm7,ymm2 + vpslld ymm2,ymm10,21 + vpaddd ymm5,ymm5,YMMWORD PTR[((-64))+rbp] + vpxor ymm7,ymm7,ymm1 + + vpsrld ymm1,ymm10,25 + vpxor ymm7,ymm7,ymm2 + + vpslld ymm2,ymm10,7 + vpandn ymm0,ymm10,ymm12 + vpand ymm3,ymm10,ymm11 + + vpxor ymm7,ymm7,ymm1 + + vpsrld ymm13,ymm14,2 + vpxor ymm7,ymm7,ymm2 + + vpslld ymm1,ymm14,30 + vpxor ymm0,ymm0,ymm3 + vpxor ymm3,ymm15,ymm14 + + vpxor ymm13,ymm13,ymm1 + vpaddd ymm5,ymm5,ymm7 + + vpsrld ymm1,ymm14,13 + + vpslld ymm2,ymm14,19 + vpaddd ymm5,ymm5,ymm0 + vpand ymm4,ymm4,ymm3 + + vpxor ymm7,ymm13,ymm1 + + vpsrld ymm1,ymm14,22 + vpxor ymm7,ymm7,ymm2 + + vpslld ymm2,ymm14,10 + vpxor ymm13,ymm15,ymm4 + vpaddd ymm9,ymm9,ymm5 + + vpxor ymm7,ymm7,ymm1 + vpxor ymm7,ymm7,ymm2 + + vpaddd ymm13,ymm13,ymm5 + vpaddd ymm13,ymm13,ymm7 + vmovdqu ymm5,YMMWORD PTR[((128-128))+rax] + vpaddd ymm6,ymm6,YMMWORD PTR[((384-256-128))+rbx] + + vpsrld ymm7,ymm5,3 + vpsrld ymm1,ymm5,7 + vpslld ymm2,ymm5,25 + vpxor ymm7,ymm7,ymm1 + vpsrld ymm1,ymm5,18 + vpxor ymm7,ymm7,ymm2 + vpslld ymm2,ymm5,14 + vmovdqu ymm0,YMMWORD PTR[((32-128))+rax] + vpsrld ymm4,ymm0,10 + + vpxor ymm7,ymm7,ymm1 + vpsrld ymm1,ymm0,17 + vpxor ymm7,ymm7,ymm2 + vpslld ymm2,ymm0,15 + vpaddd ymm6,ymm6,ymm7 + vpxor ymm7,ymm4,ymm1 + vpsrld ymm1,ymm0,19 + vpxor ymm7,ymm7,ymm2 + vpslld ymm2,ymm0,13 + vpxor ymm7,ymm7,ymm1 + vpxor ymm7,ymm7,ymm2 + vpaddd ymm6,ymm6,ymm7 + vpsrld ymm7,ymm9,6 + vpslld ymm2,ymm9,26 + vmovdqu YMMWORD PTR[(96-128)+rax],ymm6 + vpaddd ymm6,ymm6,ymm12 + + vpsrld ymm1,ymm9,11 + vpxor ymm7,ymm7,ymm2 + vpslld ymm2,ymm9,21 + vpaddd ymm6,ymm6,YMMWORD PTR[((-32))+rbp] + vpxor ymm7,ymm7,ymm1 + + vpsrld ymm1,ymm9,25 + vpxor ymm7,ymm7,ymm2 + + vpslld ymm2,ymm9,7 + vpandn ymm0,ymm9,ymm11 + vpand ymm4,ymm9,ymm10 + + vpxor ymm7,ymm7,ymm1 + + vpsrld ymm12,ymm13,2 + vpxor ymm7,ymm7,ymm2 + + vpslld ymm1,ymm13,30 + vpxor ymm0,ymm0,ymm4 + vpxor ymm4,ymm14,ymm13 + + vpxor ymm12,ymm12,ymm1 + vpaddd ymm6,ymm6,ymm7 + + vpsrld ymm1,ymm13,13 + + vpslld ymm2,ymm13,19 + vpaddd ymm6,ymm6,ymm0 + vpand ymm3,ymm3,ymm4 + + vpxor ymm7,ymm12,ymm1 + + vpsrld ymm1,ymm13,22 + vpxor ymm7,ymm7,ymm2 + + vpslld ymm2,ymm13,10 + vpxor ymm12,ymm14,ymm3 + vpaddd ymm8,ymm8,ymm6 + + vpxor ymm7,ymm7,ymm1 + vpxor ymm7,ymm7,ymm2 + + vpaddd ymm12,ymm12,ymm6 + vpaddd ymm12,ymm12,ymm7 + vmovdqu ymm6,YMMWORD PTR[((160-128))+rax] + vpaddd ymm5,ymm5,YMMWORD PTR[((416-256-128))+rbx] + + vpsrld ymm7,ymm6,3 + vpsrld ymm1,ymm6,7 + vpslld ymm2,ymm6,25 + vpxor ymm7,ymm7,ymm1 + vpsrld ymm1,ymm6,18 + vpxor ymm7,ymm7,ymm2 + vpslld ymm2,ymm6,14 + vmovdqu ymm0,YMMWORD PTR[((64-128))+rax] + vpsrld ymm3,ymm0,10 + + vpxor ymm7,ymm7,ymm1 + vpsrld ymm1,ymm0,17 + vpxor ymm7,ymm7,ymm2 + vpslld ymm2,ymm0,15 + vpaddd ymm5,ymm5,ymm7 + vpxor ymm7,ymm3,ymm1 + vpsrld ymm1,ymm0,19 + vpxor ymm7,ymm7,ymm2 + vpslld ymm2,ymm0,13 + vpxor ymm7,ymm7,ymm1 + vpxor ymm7,ymm7,ymm2 + vpaddd ymm5,ymm5,ymm7 + vpsrld ymm7,ymm8,6 + vpslld ymm2,ymm8,26 + vmovdqu YMMWORD PTR[(128-128)+rax],ymm5 + vpaddd ymm5,ymm5,ymm11 + + vpsrld ymm1,ymm8,11 + vpxor ymm7,ymm7,ymm2 + vpslld ymm2,ymm8,21 + vpaddd ymm5,ymm5,YMMWORD PTR[rbp] + vpxor ymm7,ymm7,ymm1 + + vpsrld ymm1,ymm8,25 + vpxor ymm7,ymm7,ymm2 + + vpslld ymm2,ymm8,7 + vpandn ymm0,ymm8,ymm10 + vpand ymm3,ymm8,ymm9 + + vpxor ymm7,ymm7,ymm1 + + vpsrld ymm11,ymm12,2 + vpxor ymm7,ymm7,ymm2 + + vpslld ymm1,ymm12,30 + vpxor ymm0,ymm0,ymm3 + vpxor ymm3,ymm13,ymm12 + + vpxor ymm11,ymm11,ymm1 + vpaddd ymm5,ymm5,ymm7 + + vpsrld ymm1,ymm12,13 + + vpslld ymm2,ymm12,19 + vpaddd ymm5,ymm5,ymm0 + vpand ymm4,ymm4,ymm3 + + vpxor ymm7,ymm11,ymm1 + + vpsrld ymm1,ymm12,22 + vpxor ymm7,ymm7,ymm2 + + vpslld ymm2,ymm12,10 + vpxor ymm11,ymm13,ymm4 + vpaddd ymm15,ymm15,ymm5 + + vpxor ymm7,ymm7,ymm1 + vpxor ymm7,ymm7,ymm2 + + vpaddd ymm11,ymm11,ymm5 + vpaddd ymm11,ymm11,ymm7 + vmovdqu ymm5,YMMWORD PTR[((192-128))+rax] + vpaddd ymm6,ymm6,YMMWORD PTR[((448-256-128))+rbx] + + vpsrld ymm7,ymm5,3 + vpsrld ymm1,ymm5,7 + vpslld ymm2,ymm5,25 + vpxor ymm7,ymm7,ymm1 + vpsrld ymm1,ymm5,18 + vpxor ymm7,ymm7,ymm2 + vpslld ymm2,ymm5,14 + vmovdqu ymm0,YMMWORD PTR[((96-128))+rax] + vpsrld ymm4,ymm0,10 + + vpxor ymm7,ymm7,ymm1 + vpsrld ymm1,ymm0,17 + vpxor ymm7,ymm7,ymm2 + vpslld ymm2,ymm0,15 + vpaddd ymm6,ymm6,ymm7 + vpxor ymm7,ymm4,ymm1 + vpsrld ymm1,ymm0,19 + vpxor ymm7,ymm7,ymm2 + vpslld ymm2,ymm0,13 + vpxor ymm7,ymm7,ymm1 + vpxor ymm7,ymm7,ymm2 + vpaddd ymm6,ymm6,ymm7 + vpsrld ymm7,ymm15,6 + vpslld ymm2,ymm15,26 + vmovdqu YMMWORD PTR[(160-128)+rax],ymm6 + vpaddd ymm6,ymm6,ymm10 + + vpsrld ymm1,ymm15,11 + vpxor ymm7,ymm7,ymm2 + vpslld ymm2,ymm15,21 + vpaddd ymm6,ymm6,YMMWORD PTR[32+rbp] + vpxor ymm7,ymm7,ymm1 + + vpsrld ymm1,ymm15,25 + vpxor ymm7,ymm7,ymm2 + + vpslld ymm2,ymm15,7 + vpandn ymm0,ymm15,ymm9 + vpand ymm4,ymm15,ymm8 + + vpxor ymm7,ymm7,ymm1 + + vpsrld ymm10,ymm11,2 + vpxor ymm7,ymm7,ymm2 + + vpslld ymm1,ymm11,30 + vpxor ymm0,ymm0,ymm4 + vpxor ymm4,ymm12,ymm11 + + vpxor ymm10,ymm10,ymm1 + vpaddd ymm6,ymm6,ymm7 + + vpsrld ymm1,ymm11,13 + + vpslld ymm2,ymm11,19 + vpaddd ymm6,ymm6,ymm0 + vpand ymm3,ymm3,ymm4 + + vpxor ymm7,ymm10,ymm1 + + vpsrld ymm1,ymm11,22 + vpxor ymm7,ymm7,ymm2 + + vpslld ymm2,ymm11,10 + vpxor ymm10,ymm12,ymm3 + vpaddd ymm14,ymm14,ymm6 + + vpxor ymm7,ymm7,ymm1 + vpxor ymm7,ymm7,ymm2 + + vpaddd ymm10,ymm10,ymm6 + vpaddd ymm10,ymm10,ymm7 + vmovdqu ymm6,YMMWORD PTR[((224-128))+rax] + vpaddd ymm5,ymm5,YMMWORD PTR[((480-256-128))+rbx] + + vpsrld ymm7,ymm6,3 + vpsrld ymm1,ymm6,7 + vpslld ymm2,ymm6,25 + vpxor ymm7,ymm7,ymm1 + vpsrld ymm1,ymm6,18 + vpxor ymm7,ymm7,ymm2 + vpslld ymm2,ymm6,14 + vmovdqu ymm0,YMMWORD PTR[((128-128))+rax] + vpsrld ymm3,ymm0,10 + + vpxor ymm7,ymm7,ymm1 + vpsrld ymm1,ymm0,17 + vpxor ymm7,ymm7,ymm2 + vpslld ymm2,ymm0,15 + vpaddd ymm5,ymm5,ymm7 + vpxor ymm7,ymm3,ymm1 + vpsrld ymm1,ymm0,19 + vpxor ymm7,ymm7,ymm2 + vpslld ymm2,ymm0,13 + vpxor ymm7,ymm7,ymm1 + vpxor ymm7,ymm7,ymm2 + vpaddd ymm5,ymm5,ymm7 + vpsrld ymm7,ymm14,6 + vpslld ymm2,ymm14,26 + vmovdqu YMMWORD PTR[(192-128)+rax],ymm5 + vpaddd ymm5,ymm5,ymm9 + + vpsrld ymm1,ymm14,11 + vpxor ymm7,ymm7,ymm2 + vpslld ymm2,ymm14,21 + vpaddd ymm5,ymm5,YMMWORD PTR[64+rbp] + vpxor ymm7,ymm7,ymm1 + + vpsrld ymm1,ymm14,25 + vpxor ymm7,ymm7,ymm2 + + vpslld ymm2,ymm14,7 + vpandn ymm0,ymm14,ymm8 + vpand ymm3,ymm14,ymm15 + + vpxor ymm7,ymm7,ymm1 + + vpsrld ymm9,ymm10,2 + vpxor ymm7,ymm7,ymm2 + + vpslld ymm1,ymm10,30 + vpxor ymm0,ymm0,ymm3 + vpxor ymm3,ymm11,ymm10 + + vpxor ymm9,ymm9,ymm1 + vpaddd ymm5,ymm5,ymm7 + + vpsrld ymm1,ymm10,13 + + vpslld ymm2,ymm10,19 + vpaddd ymm5,ymm5,ymm0 + vpand ymm4,ymm4,ymm3 + + vpxor ymm7,ymm9,ymm1 + + vpsrld ymm1,ymm10,22 + vpxor ymm7,ymm7,ymm2 + + vpslld ymm2,ymm10,10 + vpxor ymm9,ymm11,ymm4 + vpaddd ymm13,ymm13,ymm5 + + vpxor ymm7,ymm7,ymm1 + vpxor ymm7,ymm7,ymm2 + + vpaddd ymm9,ymm9,ymm5 + vpaddd ymm9,ymm9,ymm7 + vmovdqu ymm5,YMMWORD PTR[((256-256-128))+rbx] + vpaddd ymm6,ymm6,YMMWORD PTR[((0-128))+rax] + + vpsrld ymm7,ymm5,3 + vpsrld ymm1,ymm5,7 + vpslld ymm2,ymm5,25 + vpxor ymm7,ymm7,ymm1 + vpsrld ymm1,ymm5,18 + vpxor ymm7,ymm7,ymm2 + vpslld ymm2,ymm5,14 + vmovdqu ymm0,YMMWORD PTR[((160-128))+rax] + vpsrld ymm4,ymm0,10 + + vpxor ymm7,ymm7,ymm1 + vpsrld ymm1,ymm0,17 + vpxor ymm7,ymm7,ymm2 + vpslld ymm2,ymm0,15 + vpaddd ymm6,ymm6,ymm7 + vpxor ymm7,ymm4,ymm1 + vpsrld ymm1,ymm0,19 + vpxor ymm7,ymm7,ymm2 + vpslld ymm2,ymm0,13 + vpxor ymm7,ymm7,ymm1 + vpxor ymm7,ymm7,ymm2 + vpaddd ymm6,ymm6,ymm7 + vpsrld ymm7,ymm13,6 + vpslld ymm2,ymm13,26 + vmovdqu YMMWORD PTR[(224-128)+rax],ymm6 + vpaddd ymm6,ymm6,ymm8 + + vpsrld ymm1,ymm13,11 + vpxor ymm7,ymm7,ymm2 + vpslld ymm2,ymm13,21 + vpaddd ymm6,ymm6,YMMWORD PTR[96+rbp] + vpxor ymm7,ymm7,ymm1 + + vpsrld ymm1,ymm13,25 + vpxor ymm7,ymm7,ymm2 + + vpslld ymm2,ymm13,7 + vpandn ymm0,ymm13,ymm15 + vpand ymm4,ymm13,ymm14 + + vpxor ymm7,ymm7,ymm1 + + vpsrld ymm8,ymm9,2 + vpxor ymm7,ymm7,ymm2 + + vpslld ymm1,ymm9,30 + vpxor ymm0,ymm0,ymm4 + vpxor ymm4,ymm10,ymm9 + + vpxor ymm8,ymm8,ymm1 + vpaddd ymm6,ymm6,ymm7 + + vpsrld ymm1,ymm9,13 + + vpslld ymm2,ymm9,19 + vpaddd ymm6,ymm6,ymm0 + vpand ymm3,ymm3,ymm4 + + vpxor ymm7,ymm8,ymm1 + + vpsrld ymm1,ymm9,22 + vpxor ymm7,ymm7,ymm2 + + vpslld ymm2,ymm9,10 + vpxor ymm8,ymm10,ymm3 + vpaddd ymm12,ymm12,ymm6 + + vpxor ymm7,ymm7,ymm1 + vpxor ymm7,ymm7,ymm2 + + vpaddd ymm8,ymm8,ymm6 + vpaddd ymm8,ymm8,ymm7 + add rbp,256 + vmovdqu ymm6,YMMWORD PTR[((288-256-128))+rbx] + vpaddd ymm5,ymm5,YMMWORD PTR[((32-128))+rax] + + vpsrld ymm7,ymm6,3 + vpsrld ymm1,ymm6,7 + vpslld ymm2,ymm6,25 + vpxor ymm7,ymm7,ymm1 + vpsrld ymm1,ymm6,18 + vpxor ymm7,ymm7,ymm2 + vpslld ymm2,ymm6,14 + vmovdqu ymm0,YMMWORD PTR[((192-128))+rax] + vpsrld ymm3,ymm0,10 + + vpxor ymm7,ymm7,ymm1 + vpsrld ymm1,ymm0,17 + vpxor ymm7,ymm7,ymm2 + vpslld ymm2,ymm0,15 + vpaddd ymm5,ymm5,ymm7 + vpxor ymm7,ymm3,ymm1 + vpsrld ymm1,ymm0,19 + vpxor ymm7,ymm7,ymm2 + vpslld ymm2,ymm0,13 + vpxor ymm7,ymm7,ymm1 + vpxor ymm7,ymm7,ymm2 + vpaddd ymm5,ymm5,ymm7 + vpsrld ymm7,ymm12,6 + vpslld ymm2,ymm12,26 + vmovdqu YMMWORD PTR[(256-256-128)+rbx],ymm5 + vpaddd ymm5,ymm5,ymm15 + + vpsrld ymm1,ymm12,11 + vpxor ymm7,ymm7,ymm2 + vpslld ymm2,ymm12,21 + vpaddd ymm5,ymm5,YMMWORD PTR[((-128))+rbp] + vpxor ymm7,ymm7,ymm1 + + vpsrld ymm1,ymm12,25 + vpxor ymm7,ymm7,ymm2 + + vpslld ymm2,ymm12,7 + vpandn ymm0,ymm12,ymm14 + vpand ymm3,ymm12,ymm13 + + vpxor ymm7,ymm7,ymm1 + + vpsrld ymm15,ymm8,2 + vpxor ymm7,ymm7,ymm2 + + vpslld ymm1,ymm8,30 + vpxor ymm0,ymm0,ymm3 + vpxor ymm3,ymm9,ymm8 + + vpxor ymm15,ymm15,ymm1 + vpaddd ymm5,ymm5,ymm7 + + vpsrld ymm1,ymm8,13 + + vpslld ymm2,ymm8,19 + vpaddd ymm5,ymm5,ymm0 + vpand ymm4,ymm4,ymm3 + + vpxor ymm7,ymm15,ymm1 + + vpsrld ymm1,ymm8,22 + vpxor ymm7,ymm7,ymm2 + + vpslld ymm2,ymm8,10 + vpxor ymm15,ymm9,ymm4 + vpaddd ymm11,ymm11,ymm5 + + vpxor ymm7,ymm7,ymm1 + vpxor ymm7,ymm7,ymm2 + + vpaddd ymm15,ymm15,ymm5 + vpaddd ymm15,ymm15,ymm7 + vmovdqu ymm5,YMMWORD PTR[((320-256-128))+rbx] + vpaddd ymm6,ymm6,YMMWORD PTR[((64-128))+rax] + + vpsrld ymm7,ymm5,3 + vpsrld ymm1,ymm5,7 + vpslld ymm2,ymm5,25 + vpxor ymm7,ymm7,ymm1 + vpsrld ymm1,ymm5,18 + vpxor ymm7,ymm7,ymm2 + vpslld ymm2,ymm5,14 + vmovdqu ymm0,YMMWORD PTR[((224-128))+rax] + vpsrld ymm4,ymm0,10 + + vpxor ymm7,ymm7,ymm1 + vpsrld ymm1,ymm0,17 + vpxor ymm7,ymm7,ymm2 + vpslld ymm2,ymm0,15 + vpaddd ymm6,ymm6,ymm7 + vpxor ymm7,ymm4,ymm1 + vpsrld ymm1,ymm0,19 + vpxor ymm7,ymm7,ymm2 + vpslld ymm2,ymm0,13 + vpxor ymm7,ymm7,ymm1 + vpxor ymm7,ymm7,ymm2 + vpaddd ymm6,ymm6,ymm7 + vpsrld ymm7,ymm11,6 + vpslld ymm2,ymm11,26 + vmovdqu YMMWORD PTR[(288-256-128)+rbx],ymm6 + vpaddd ymm6,ymm6,ymm14 + + vpsrld ymm1,ymm11,11 + vpxor ymm7,ymm7,ymm2 + vpslld ymm2,ymm11,21 + vpaddd ymm6,ymm6,YMMWORD PTR[((-96))+rbp] + vpxor ymm7,ymm7,ymm1 + + vpsrld ymm1,ymm11,25 + vpxor ymm7,ymm7,ymm2 + + vpslld ymm2,ymm11,7 + vpandn ymm0,ymm11,ymm13 + vpand ymm4,ymm11,ymm12 + + vpxor ymm7,ymm7,ymm1 + + vpsrld ymm14,ymm15,2 + vpxor ymm7,ymm7,ymm2 + + vpslld ymm1,ymm15,30 + vpxor ymm0,ymm0,ymm4 + vpxor ymm4,ymm8,ymm15 + + vpxor ymm14,ymm14,ymm1 + vpaddd ymm6,ymm6,ymm7 + + vpsrld ymm1,ymm15,13 + + vpslld ymm2,ymm15,19 + vpaddd ymm6,ymm6,ymm0 + vpand ymm3,ymm3,ymm4 + + vpxor ymm7,ymm14,ymm1 + + vpsrld ymm1,ymm15,22 + vpxor ymm7,ymm7,ymm2 + + vpslld ymm2,ymm15,10 + vpxor ymm14,ymm8,ymm3 + vpaddd ymm10,ymm10,ymm6 + + vpxor ymm7,ymm7,ymm1 + vpxor ymm7,ymm7,ymm2 + + vpaddd ymm14,ymm14,ymm6 + vpaddd ymm14,ymm14,ymm7 + vmovdqu ymm6,YMMWORD PTR[((352-256-128))+rbx] + vpaddd ymm5,ymm5,YMMWORD PTR[((96-128))+rax] + + vpsrld ymm7,ymm6,3 + vpsrld ymm1,ymm6,7 + vpslld ymm2,ymm6,25 + vpxor ymm7,ymm7,ymm1 + vpsrld ymm1,ymm6,18 + vpxor ymm7,ymm7,ymm2 + vpslld ymm2,ymm6,14 + vmovdqu ymm0,YMMWORD PTR[((256-256-128))+rbx] + vpsrld ymm3,ymm0,10 + + vpxor ymm7,ymm7,ymm1 + vpsrld ymm1,ymm0,17 + vpxor ymm7,ymm7,ymm2 + vpslld ymm2,ymm0,15 + vpaddd ymm5,ymm5,ymm7 + vpxor ymm7,ymm3,ymm1 + vpsrld ymm1,ymm0,19 + vpxor ymm7,ymm7,ymm2 + vpslld ymm2,ymm0,13 + vpxor ymm7,ymm7,ymm1 + vpxor ymm7,ymm7,ymm2 + vpaddd ymm5,ymm5,ymm7 + vpsrld ymm7,ymm10,6 + vpslld ymm2,ymm10,26 + vmovdqu YMMWORD PTR[(320-256-128)+rbx],ymm5 + vpaddd ymm5,ymm5,ymm13 + + vpsrld ymm1,ymm10,11 + vpxor ymm7,ymm7,ymm2 + vpslld ymm2,ymm10,21 + vpaddd ymm5,ymm5,YMMWORD PTR[((-64))+rbp] + vpxor ymm7,ymm7,ymm1 + + vpsrld ymm1,ymm10,25 + vpxor ymm7,ymm7,ymm2 + + vpslld ymm2,ymm10,7 + vpandn ymm0,ymm10,ymm12 + vpand ymm3,ymm10,ymm11 + + vpxor ymm7,ymm7,ymm1 + + vpsrld ymm13,ymm14,2 + vpxor ymm7,ymm7,ymm2 + + vpslld ymm1,ymm14,30 + vpxor ymm0,ymm0,ymm3 + vpxor ymm3,ymm15,ymm14 + + vpxor ymm13,ymm13,ymm1 + vpaddd ymm5,ymm5,ymm7 + + vpsrld ymm1,ymm14,13 + + vpslld ymm2,ymm14,19 + vpaddd ymm5,ymm5,ymm0 + vpand ymm4,ymm4,ymm3 + + vpxor ymm7,ymm13,ymm1 + + vpsrld ymm1,ymm14,22 + vpxor ymm7,ymm7,ymm2 + + vpslld ymm2,ymm14,10 + vpxor ymm13,ymm15,ymm4 + vpaddd ymm9,ymm9,ymm5 + + vpxor ymm7,ymm7,ymm1 + vpxor ymm7,ymm7,ymm2 + + vpaddd ymm13,ymm13,ymm5 + vpaddd ymm13,ymm13,ymm7 + vmovdqu ymm5,YMMWORD PTR[((384-256-128))+rbx] + vpaddd ymm6,ymm6,YMMWORD PTR[((128-128))+rax] + + vpsrld ymm7,ymm5,3 + vpsrld ymm1,ymm5,7 + vpslld ymm2,ymm5,25 + vpxor ymm7,ymm7,ymm1 + vpsrld ymm1,ymm5,18 + vpxor ymm7,ymm7,ymm2 + vpslld ymm2,ymm5,14 + vmovdqu ymm0,YMMWORD PTR[((288-256-128))+rbx] + vpsrld ymm4,ymm0,10 + + vpxor ymm7,ymm7,ymm1 + vpsrld ymm1,ymm0,17 + vpxor ymm7,ymm7,ymm2 + vpslld ymm2,ymm0,15 + vpaddd ymm6,ymm6,ymm7 + vpxor ymm7,ymm4,ymm1 + vpsrld ymm1,ymm0,19 + vpxor ymm7,ymm7,ymm2 + vpslld ymm2,ymm0,13 + vpxor ymm7,ymm7,ymm1 + vpxor ymm7,ymm7,ymm2 + vpaddd ymm6,ymm6,ymm7 + vpsrld ymm7,ymm9,6 + vpslld ymm2,ymm9,26 + vmovdqu YMMWORD PTR[(352-256-128)+rbx],ymm6 + vpaddd ymm6,ymm6,ymm12 + + vpsrld ymm1,ymm9,11 + vpxor ymm7,ymm7,ymm2 + vpslld ymm2,ymm9,21 + vpaddd ymm6,ymm6,YMMWORD PTR[((-32))+rbp] + vpxor ymm7,ymm7,ymm1 + + vpsrld ymm1,ymm9,25 + vpxor ymm7,ymm7,ymm2 + + vpslld ymm2,ymm9,7 + vpandn ymm0,ymm9,ymm11 + vpand ymm4,ymm9,ymm10 + + vpxor ymm7,ymm7,ymm1 + + vpsrld ymm12,ymm13,2 + vpxor ymm7,ymm7,ymm2 + + vpslld ymm1,ymm13,30 + vpxor ymm0,ymm0,ymm4 + vpxor ymm4,ymm14,ymm13 + + vpxor ymm12,ymm12,ymm1 + vpaddd ymm6,ymm6,ymm7 + + vpsrld ymm1,ymm13,13 + + vpslld ymm2,ymm13,19 + vpaddd ymm6,ymm6,ymm0 + vpand ymm3,ymm3,ymm4 + + vpxor ymm7,ymm12,ymm1 + + vpsrld ymm1,ymm13,22 + vpxor ymm7,ymm7,ymm2 + + vpslld ymm2,ymm13,10 + vpxor ymm12,ymm14,ymm3 + vpaddd ymm8,ymm8,ymm6 + + vpxor ymm7,ymm7,ymm1 + vpxor ymm7,ymm7,ymm2 + + vpaddd ymm12,ymm12,ymm6 + vpaddd ymm12,ymm12,ymm7 + vmovdqu ymm6,YMMWORD PTR[((416-256-128))+rbx] + vpaddd ymm5,ymm5,YMMWORD PTR[((160-128))+rax] + + vpsrld ymm7,ymm6,3 + vpsrld ymm1,ymm6,7 + vpslld ymm2,ymm6,25 + vpxor ymm7,ymm7,ymm1 + vpsrld ymm1,ymm6,18 + vpxor ymm7,ymm7,ymm2 + vpslld ymm2,ymm6,14 + vmovdqu ymm0,YMMWORD PTR[((320-256-128))+rbx] + vpsrld ymm3,ymm0,10 + + vpxor ymm7,ymm7,ymm1 + vpsrld ymm1,ymm0,17 + vpxor ymm7,ymm7,ymm2 + vpslld ymm2,ymm0,15 + vpaddd ymm5,ymm5,ymm7 + vpxor ymm7,ymm3,ymm1 + vpsrld ymm1,ymm0,19 + vpxor ymm7,ymm7,ymm2 + vpslld ymm2,ymm0,13 + vpxor ymm7,ymm7,ymm1 + vpxor ymm7,ymm7,ymm2 + vpaddd ymm5,ymm5,ymm7 + vpsrld ymm7,ymm8,6 + vpslld ymm2,ymm8,26 + vmovdqu YMMWORD PTR[(384-256-128)+rbx],ymm5 + vpaddd ymm5,ymm5,ymm11 + + vpsrld ymm1,ymm8,11 + vpxor ymm7,ymm7,ymm2 + vpslld ymm2,ymm8,21 + vpaddd ymm5,ymm5,YMMWORD PTR[rbp] + vpxor ymm7,ymm7,ymm1 + + vpsrld ymm1,ymm8,25 + vpxor ymm7,ymm7,ymm2 + + vpslld ymm2,ymm8,7 + vpandn ymm0,ymm8,ymm10 + vpand ymm3,ymm8,ymm9 + + vpxor ymm7,ymm7,ymm1 + + vpsrld ymm11,ymm12,2 + vpxor ymm7,ymm7,ymm2 + + vpslld ymm1,ymm12,30 + vpxor ymm0,ymm0,ymm3 + vpxor ymm3,ymm13,ymm12 + + vpxor ymm11,ymm11,ymm1 + vpaddd ymm5,ymm5,ymm7 + + vpsrld ymm1,ymm12,13 + + vpslld ymm2,ymm12,19 + vpaddd ymm5,ymm5,ymm0 + vpand ymm4,ymm4,ymm3 + + vpxor ymm7,ymm11,ymm1 + + vpsrld ymm1,ymm12,22 + vpxor ymm7,ymm7,ymm2 + + vpslld ymm2,ymm12,10 + vpxor ymm11,ymm13,ymm4 + vpaddd ymm15,ymm15,ymm5 + + vpxor ymm7,ymm7,ymm1 + vpxor ymm7,ymm7,ymm2 + + vpaddd ymm11,ymm11,ymm5 + vpaddd ymm11,ymm11,ymm7 + vmovdqu ymm5,YMMWORD PTR[((448-256-128))+rbx] + vpaddd ymm6,ymm6,YMMWORD PTR[((192-128))+rax] + + vpsrld ymm7,ymm5,3 + vpsrld ymm1,ymm5,7 + vpslld ymm2,ymm5,25 + vpxor ymm7,ymm7,ymm1 + vpsrld ymm1,ymm5,18 + vpxor ymm7,ymm7,ymm2 + vpslld ymm2,ymm5,14 + vmovdqu ymm0,YMMWORD PTR[((352-256-128))+rbx] + vpsrld ymm4,ymm0,10 + + vpxor ymm7,ymm7,ymm1 + vpsrld ymm1,ymm0,17 + vpxor ymm7,ymm7,ymm2 + vpslld ymm2,ymm0,15 + vpaddd ymm6,ymm6,ymm7 + vpxor ymm7,ymm4,ymm1 + vpsrld ymm1,ymm0,19 + vpxor ymm7,ymm7,ymm2 + vpslld ymm2,ymm0,13 + vpxor ymm7,ymm7,ymm1 + vpxor ymm7,ymm7,ymm2 + vpaddd ymm6,ymm6,ymm7 + vpsrld ymm7,ymm15,6 + vpslld ymm2,ymm15,26 + vmovdqu YMMWORD PTR[(416-256-128)+rbx],ymm6 + vpaddd ymm6,ymm6,ymm10 + + vpsrld ymm1,ymm15,11 + vpxor ymm7,ymm7,ymm2 + vpslld ymm2,ymm15,21 + vpaddd ymm6,ymm6,YMMWORD PTR[32+rbp] + vpxor ymm7,ymm7,ymm1 + + vpsrld ymm1,ymm15,25 + vpxor ymm7,ymm7,ymm2 + + vpslld ymm2,ymm15,7 + vpandn ymm0,ymm15,ymm9 + vpand ymm4,ymm15,ymm8 + + vpxor ymm7,ymm7,ymm1 + + vpsrld ymm10,ymm11,2 + vpxor ymm7,ymm7,ymm2 + + vpslld ymm1,ymm11,30 + vpxor ymm0,ymm0,ymm4 + vpxor ymm4,ymm12,ymm11 + + vpxor ymm10,ymm10,ymm1 + vpaddd ymm6,ymm6,ymm7 + + vpsrld ymm1,ymm11,13 + + vpslld ymm2,ymm11,19 + vpaddd ymm6,ymm6,ymm0 + vpand ymm3,ymm3,ymm4 + + vpxor ymm7,ymm10,ymm1 + + vpsrld ymm1,ymm11,22 + vpxor ymm7,ymm7,ymm2 + + vpslld ymm2,ymm11,10 + vpxor ymm10,ymm12,ymm3 + vpaddd ymm14,ymm14,ymm6 + + vpxor ymm7,ymm7,ymm1 + vpxor ymm7,ymm7,ymm2 + + vpaddd ymm10,ymm10,ymm6 + vpaddd ymm10,ymm10,ymm7 + vmovdqu ymm6,YMMWORD PTR[((480-256-128))+rbx] + vpaddd ymm5,ymm5,YMMWORD PTR[((224-128))+rax] + + vpsrld ymm7,ymm6,3 + vpsrld ymm1,ymm6,7 + vpslld ymm2,ymm6,25 + vpxor ymm7,ymm7,ymm1 + vpsrld ymm1,ymm6,18 + vpxor ymm7,ymm7,ymm2 + vpslld ymm2,ymm6,14 + vmovdqu ymm0,YMMWORD PTR[((384-256-128))+rbx] + vpsrld ymm3,ymm0,10 + + vpxor ymm7,ymm7,ymm1 + vpsrld ymm1,ymm0,17 + vpxor ymm7,ymm7,ymm2 + vpslld ymm2,ymm0,15 + vpaddd ymm5,ymm5,ymm7 + vpxor ymm7,ymm3,ymm1 + vpsrld ymm1,ymm0,19 + vpxor ymm7,ymm7,ymm2 + vpslld ymm2,ymm0,13 + vpxor ymm7,ymm7,ymm1 + vpxor ymm7,ymm7,ymm2 + vpaddd ymm5,ymm5,ymm7 + vpsrld ymm7,ymm14,6 + vpslld ymm2,ymm14,26 + vmovdqu YMMWORD PTR[(448-256-128)+rbx],ymm5 + vpaddd ymm5,ymm5,ymm9 + + vpsrld ymm1,ymm14,11 + vpxor ymm7,ymm7,ymm2 + vpslld ymm2,ymm14,21 + vpaddd ymm5,ymm5,YMMWORD PTR[64+rbp] + vpxor ymm7,ymm7,ymm1 + + vpsrld ymm1,ymm14,25 + vpxor ymm7,ymm7,ymm2 + + vpslld ymm2,ymm14,7 + vpandn ymm0,ymm14,ymm8 + vpand ymm3,ymm14,ymm15 + + vpxor ymm7,ymm7,ymm1 + + vpsrld ymm9,ymm10,2 + vpxor ymm7,ymm7,ymm2 + + vpslld ymm1,ymm10,30 + vpxor ymm0,ymm0,ymm3 + vpxor ymm3,ymm11,ymm10 + + vpxor ymm9,ymm9,ymm1 + vpaddd ymm5,ymm5,ymm7 + + vpsrld ymm1,ymm10,13 + + vpslld ymm2,ymm10,19 + vpaddd ymm5,ymm5,ymm0 + vpand ymm4,ymm4,ymm3 + + vpxor ymm7,ymm9,ymm1 + + vpsrld ymm1,ymm10,22 + vpxor ymm7,ymm7,ymm2 + + vpslld ymm2,ymm10,10 + vpxor ymm9,ymm11,ymm4 + vpaddd ymm13,ymm13,ymm5 + + vpxor ymm7,ymm7,ymm1 + vpxor ymm7,ymm7,ymm2 + + vpaddd ymm9,ymm9,ymm5 + vpaddd ymm9,ymm9,ymm7 + vmovdqu ymm5,YMMWORD PTR[((0-128))+rax] + vpaddd ymm6,ymm6,YMMWORD PTR[((256-256-128))+rbx] + + vpsrld ymm7,ymm5,3 + vpsrld ymm1,ymm5,7 + vpslld ymm2,ymm5,25 + vpxor ymm7,ymm7,ymm1 + vpsrld ymm1,ymm5,18 + vpxor ymm7,ymm7,ymm2 + vpslld ymm2,ymm5,14 + vmovdqu ymm0,YMMWORD PTR[((416-256-128))+rbx] + vpsrld ymm4,ymm0,10 + + vpxor ymm7,ymm7,ymm1 + vpsrld ymm1,ymm0,17 + vpxor ymm7,ymm7,ymm2 + vpslld ymm2,ymm0,15 + vpaddd ymm6,ymm6,ymm7 + vpxor ymm7,ymm4,ymm1 + vpsrld ymm1,ymm0,19 + vpxor ymm7,ymm7,ymm2 + vpslld ymm2,ymm0,13 + vpxor ymm7,ymm7,ymm1 + vpxor ymm7,ymm7,ymm2 + vpaddd ymm6,ymm6,ymm7 + vpsrld ymm7,ymm13,6 + vpslld ymm2,ymm13,26 + vmovdqu YMMWORD PTR[(480-256-128)+rbx],ymm6 + vpaddd ymm6,ymm6,ymm8 + + vpsrld ymm1,ymm13,11 + vpxor ymm7,ymm7,ymm2 + vpslld ymm2,ymm13,21 + vpaddd ymm6,ymm6,YMMWORD PTR[96+rbp] + vpxor ymm7,ymm7,ymm1 + + vpsrld ymm1,ymm13,25 + vpxor ymm7,ymm7,ymm2 + + vpslld ymm2,ymm13,7 + vpandn ymm0,ymm13,ymm15 + vpand ymm4,ymm13,ymm14 + + vpxor ymm7,ymm7,ymm1 + + vpsrld ymm8,ymm9,2 + vpxor ymm7,ymm7,ymm2 + + vpslld ymm1,ymm9,30 + vpxor ymm0,ymm0,ymm4 + vpxor ymm4,ymm10,ymm9 + + vpxor ymm8,ymm8,ymm1 + vpaddd ymm6,ymm6,ymm7 + + vpsrld ymm1,ymm9,13 + + vpslld ymm2,ymm9,19 + vpaddd ymm6,ymm6,ymm0 + vpand ymm3,ymm3,ymm4 + + vpxor ymm7,ymm8,ymm1 + + vpsrld ymm1,ymm9,22 + vpxor ymm7,ymm7,ymm2 + + vpslld ymm2,ymm9,10 + vpxor ymm8,ymm10,ymm3 + vpaddd ymm12,ymm12,ymm6 + + vpxor ymm7,ymm7,ymm1 + vpxor ymm7,ymm7,ymm2 + + vpaddd ymm8,ymm8,ymm6 + vpaddd ymm8,ymm8,ymm7 + add rbp,256 + dec ecx + jnz $L$oop_16_xx_avx2 + + mov ecx,1 + lea rbx,QWORD PTR[512+rsp] + lea rbp,QWORD PTR[((K256+128))] + cmp ecx,DWORD PTR[rbx] + cmovge r12,rbp + cmp ecx,DWORD PTR[4+rbx] + cmovge r13,rbp + cmp ecx,DWORD PTR[8+rbx] + cmovge r14,rbp + cmp ecx,DWORD PTR[12+rbx] + cmovge r15,rbp + cmp ecx,DWORD PTR[16+rbx] + cmovge r8,rbp + cmp ecx,DWORD PTR[20+rbx] + cmovge r9,rbp + cmp ecx,DWORD PTR[24+rbx] + cmovge r10,rbp + cmp ecx,DWORD PTR[28+rbx] + cmovge r11,rbp + vmovdqa ymm7,YMMWORD PTR[rbx] + vpxor ymm0,ymm0,ymm0 + vmovdqa ymm6,ymm7 + vpcmpgtd ymm6,ymm6,ymm0 + vpaddd ymm7,ymm7,ymm6 + + vmovdqu ymm0,YMMWORD PTR[((0-128))+rdi] + vpand ymm8,ymm8,ymm6 + vmovdqu ymm1,YMMWORD PTR[((32-128))+rdi] + vpand ymm9,ymm9,ymm6 + vmovdqu ymm2,YMMWORD PTR[((64-128))+rdi] + vpand ymm10,ymm10,ymm6 + vmovdqu ymm5,YMMWORD PTR[((96-128))+rdi] + vpand ymm11,ymm11,ymm6 + vpaddd ymm8,ymm8,ymm0 + vmovdqu ymm0,YMMWORD PTR[((128-128))+rdi] + vpand ymm12,ymm12,ymm6 + vpaddd ymm9,ymm9,ymm1 + vmovdqu ymm1,YMMWORD PTR[((160-128))+rdi] + vpand ymm13,ymm13,ymm6 + vpaddd ymm10,ymm10,ymm2 + vmovdqu ymm2,YMMWORD PTR[((192-128))+rdi] + vpand ymm14,ymm14,ymm6 + vpaddd ymm11,ymm11,ymm5 + vmovdqu ymm5,YMMWORD PTR[((224-128))+rdi] + vpand ymm15,ymm15,ymm6 + vpaddd ymm12,ymm12,ymm0 + vpaddd ymm13,ymm13,ymm1 + vmovdqu YMMWORD PTR[(0-128)+rdi],ymm8 + vpaddd ymm14,ymm14,ymm2 + vmovdqu YMMWORD PTR[(32-128)+rdi],ymm9 + vpaddd ymm15,ymm15,ymm5 + vmovdqu YMMWORD PTR[(64-128)+rdi],ymm10 + vmovdqu YMMWORD PTR[(96-128)+rdi],ymm11 + vmovdqu YMMWORD PTR[(128-128)+rdi],ymm12 + vmovdqu YMMWORD PTR[(160-128)+rdi],ymm13 + vmovdqu YMMWORD PTR[(192-128)+rdi],ymm14 + vmovdqu YMMWORD PTR[(224-128)+rdi],ymm15 + + vmovdqu YMMWORD PTR[rbx],ymm7 + lea rbx,QWORD PTR[((256+128))+rsp] + vmovdqu ymm6,YMMWORD PTR[$L$pbswap] + dec edx + jnz $L$oop_avx2 + + + + + + + +$L$done_avx2:: + mov rax,QWORD PTR[544+rsp] + vzeroupper + movaps xmm6,XMMWORD PTR[((-216))+rax] + movaps xmm7,XMMWORD PTR[((-200))+rax] + movaps xmm8,XMMWORD PTR[((-184))+rax] + movaps xmm9,XMMWORD PTR[((-168))+rax] + movaps xmm10,XMMWORD PTR[((-152))+rax] + movaps xmm11,XMMWORD PTR[((-136))+rax] + movaps xmm12,XMMWORD PTR[((-120))+rax] + movaps xmm13,XMMWORD PTR[((-104))+rax] + movaps xmm14,XMMWORD PTR[((-88))+rax] + movaps xmm15,XMMWORD PTR[((-72))+rax] + mov r15,QWORD PTR[((-48))+rax] + mov r14,QWORD PTR[((-40))+rax] + mov r13,QWORD PTR[((-32))+rax] + mov r12,QWORD PTR[((-24))+rax] + mov rbp,QWORD PTR[((-16))+rax] + mov rbx,QWORD PTR[((-8))+rax] + lea rsp,QWORD PTR[rax] +$L$epilogue_avx2:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_sha256_multi_block_avx2:: +sha256_multi_block_avx2 ENDP +ALIGN 256 +K256:: + DD 1116352408,1116352408,1116352408,1116352408 + DD 1116352408,1116352408,1116352408,1116352408 + DD 1899447441,1899447441,1899447441,1899447441 + DD 1899447441,1899447441,1899447441,1899447441 + DD 3049323471,3049323471,3049323471,3049323471 + DD 3049323471,3049323471,3049323471,3049323471 + DD 3921009573,3921009573,3921009573,3921009573 + DD 3921009573,3921009573,3921009573,3921009573 + DD 961987163,961987163,961987163,961987163 + DD 961987163,961987163,961987163,961987163 + DD 1508970993,1508970993,1508970993,1508970993 + DD 1508970993,1508970993,1508970993,1508970993 + DD 2453635748,2453635748,2453635748,2453635748 + DD 2453635748,2453635748,2453635748,2453635748 + DD 2870763221,2870763221,2870763221,2870763221 + DD 2870763221,2870763221,2870763221,2870763221 + DD 3624381080,3624381080,3624381080,3624381080 + DD 3624381080,3624381080,3624381080,3624381080 + DD 310598401,310598401,310598401,310598401 + DD 310598401,310598401,310598401,310598401 + DD 607225278,607225278,607225278,607225278 + DD 607225278,607225278,607225278,607225278 + DD 1426881987,1426881987,1426881987,1426881987 + DD 1426881987,1426881987,1426881987,1426881987 + DD 1925078388,1925078388,1925078388,1925078388 + DD 1925078388,1925078388,1925078388,1925078388 + DD 2162078206,2162078206,2162078206,2162078206 + DD 2162078206,2162078206,2162078206,2162078206 + DD 2614888103,2614888103,2614888103,2614888103 + DD 2614888103,2614888103,2614888103,2614888103 + DD 3248222580,3248222580,3248222580,3248222580 + DD 3248222580,3248222580,3248222580,3248222580 + DD 3835390401,3835390401,3835390401,3835390401 + DD 3835390401,3835390401,3835390401,3835390401 + DD 4022224774,4022224774,4022224774,4022224774 + DD 4022224774,4022224774,4022224774,4022224774 + DD 264347078,264347078,264347078,264347078 + DD 264347078,264347078,264347078,264347078 + DD 604807628,604807628,604807628,604807628 + DD 604807628,604807628,604807628,604807628 + DD 770255983,770255983,770255983,770255983 + DD 770255983,770255983,770255983,770255983 + DD 1249150122,1249150122,1249150122,1249150122 + DD 1249150122,1249150122,1249150122,1249150122 + DD 1555081692,1555081692,1555081692,1555081692 + DD 1555081692,1555081692,1555081692,1555081692 + DD 1996064986,1996064986,1996064986,1996064986 + DD 1996064986,1996064986,1996064986,1996064986 + DD 2554220882,2554220882,2554220882,2554220882 + DD 2554220882,2554220882,2554220882,2554220882 + DD 2821834349,2821834349,2821834349,2821834349 + DD 2821834349,2821834349,2821834349,2821834349 + DD 2952996808,2952996808,2952996808,2952996808 + DD 2952996808,2952996808,2952996808,2952996808 + DD 3210313671,3210313671,3210313671,3210313671 + DD 3210313671,3210313671,3210313671,3210313671 + DD 3336571891,3336571891,3336571891,3336571891 + DD 3336571891,3336571891,3336571891,3336571891 + DD 3584528711,3584528711,3584528711,3584528711 + DD 3584528711,3584528711,3584528711,3584528711 + DD 113926993,113926993,113926993,113926993 + DD 113926993,113926993,113926993,113926993 + DD 338241895,338241895,338241895,338241895 + DD 338241895,338241895,338241895,338241895 + DD 666307205,666307205,666307205,666307205 + DD 666307205,666307205,666307205,666307205 + DD 773529912,773529912,773529912,773529912 + DD 773529912,773529912,773529912,773529912 + DD 1294757372,1294757372,1294757372,1294757372 + DD 1294757372,1294757372,1294757372,1294757372 + DD 1396182291,1396182291,1396182291,1396182291 + DD 1396182291,1396182291,1396182291,1396182291 + DD 1695183700,1695183700,1695183700,1695183700 + DD 1695183700,1695183700,1695183700,1695183700 + DD 1986661051,1986661051,1986661051,1986661051 + DD 1986661051,1986661051,1986661051,1986661051 + DD 2177026350,2177026350,2177026350,2177026350 + DD 2177026350,2177026350,2177026350,2177026350 + DD 2456956037,2456956037,2456956037,2456956037 + DD 2456956037,2456956037,2456956037,2456956037 + DD 2730485921,2730485921,2730485921,2730485921 + DD 2730485921,2730485921,2730485921,2730485921 + DD 2820302411,2820302411,2820302411,2820302411 + DD 2820302411,2820302411,2820302411,2820302411 + DD 3259730800,3259730800,3259730800,3259730800 + DD 3259730800,3259730800,3259730800,3259730800 + DD 3345764771,3345764771,3345764771,3345764771 + DD 3345764771,3345764771,3345764771,3345764771 + DD 3516065817,3516065817,3516065817,3516065817 + DD 3516065817,3516065817,3516065817,3516065817 + DD 3600352804,3600352804,3600352804,3600352804 + DD 3600352804,3600352804,3600352804,3600352804 + DD 4094571909,4094571909,4094571909,4094571909 + DD 4094571909,4094571909,4094571909,4094571909 + DD 275423344,275423344,275423344,275423344 + DD 275423344,275423344,275423344,275423344 + DD 430227734,430227734,430227734,430227734 + DD 430227734,430227734,430227734,430227734 + DD 506948616,506948616,506948616,506948616 + DD 506948616,506948616,506948616,506948616 + DD 659060556,659060556,659060556,659060556 + DD 659060556,659060556,659060556,659060556 + DD 883997877,883997877,883997877,883997877 + DD 883997877,883997877,883997877,883997877 + DD 958139571,958139571,958139571,958139571 + DD 958139571,958139571,958139571,958139571 + DD 1322822218,1322822218,1322822218,1322822218 + DD 1322822218,1322822218,1322822218,1322822218 + DD 1537002063,1537002063,1537002063,1537002063 + DD 1537002063,1537002063,1537002063,1537002063 + DD 1747873779,1747873779,1747873779,1747873779 + DD 1747873779,1747873779,1747873779,1747873779 + DD 1955562222,1955562222,1955562222,1955562222 + DD 1955562222,1955562222,1955562222,1955562222 + DD 2024104815,2024104815,2024104815,2024104815 + DD 2024104815,2024104815,2024104815,2024104815 + DD 2227730452,2227730452,2227730452,2227730452 + DD 2227730452,2227730452,2227730452,2227730452 + DD 2361852424,2361852424,2361852424,2361852424 + DD 2361852424,2361852424,2361852424,2361852424 + DD 2428436474,2428436474,2428436474,2428436474 + DD 2428436474,2428436474,2428436474,2428436474 + DD 2756734187,2756734187,2756734187,2756734187 + DD 2756734187,2756734187,2756734187,2756734187 + DD 3204031479,3204031479,3204031479,3204031479 + DD 3204031479,3204031479,3204031479,3204031479 + DD 3329325298,3329325298,3329325298,3329325298 + DD 3329325298,3329325298,3329325298,3329325298 +$L$pbswap:: + DD 000010203h,004050607h,008090a0bh,00c0d0e0fh + DD 000010203h,004050607h,008090a0bh,00c0d0e0fh +K256_shaext:: + DD 0428a2f98h,071374491h,0b5c0fbcfh,0e9b5dba5h + DD 03956c25bh,059f111f1h,0923f82a4h,0ab1c5ed5h + DD 0d807aa98h,012835b01h,0243185beh,0550c7dc3h + DD 072be5d74h,080deb1feh,09bdc06a7h,0c19bf174h + DD 0e49b69c1h,0efbe4786h,00fc19dc6h,0240ca1cch + DD 02de92c6fh,04a7484aah,05cb0a9dch,076f988dah + DD 0983e5152h,0a831c66dh,0b00327c8h,0bf597fc7h + DD 0c6e00bf3h,0d5a79147h,006ca6351h,014292967h + DD 027b70a85h,02e1b2138h,04d2c6dfch,053380d13h + DD 0650a7354h,0766a0abbh,081c2c92eh,092722c85h + DD 0a2bfe8a1h,0a81a664bh,0c24b8b70h,0c76c51a3h + DD 0d192e819h,0d6990624h,0f40e3585h,0106aa070h + DD 019a4c116h,01e376c08h,02748774ch,034b0bcb5h + DD 0391c0cb3h,04ed8aa4ah,05b9cca4fh,0682e6ff3h + DD 0748f82eeh,078a5636fh,084c87814h,08cc70208h + DD 090befffah,0a4506cebh,0bef9a3f7h,0c67178f2h +DB 83,72,65,50,53,54,32,109,117,108,116,105,45,98,108,111 +DB 99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114 +DB 32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71 +DB 65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112 +DB 101,110,115,115,108,46,111,114,103,62,0 +EXTERN __imp_RtlVirtualUnwind:NEAR + +ALIGN 16 +se_handler PROC PRIVATE + push rsi + push rdi + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + pushfq + sub rsp,64 + + mov rax,QWORD PTR[120+r8] + mov rbx,QWORD PTR[248+r8] + + mov rsi,QWORD PTR[8+r9] + mov r11,QWORD PTR[56+r9] + + mov r10d,DWORD PTR[r11] + lea r10,QWORD PTR[r10*1+rsi] + cmp rbx,r10 + jb $L$in_prologue + + mov rax,QWORD PTR[152+r8] + + mov r10d,DWORD PTR[4+r11] + lea r10,QWORD PTR[r10*1+rsi] + cmp rbx,r10 + jae $L$in_prologue + + mov rax,QWORD PTR[272+rax] + + mov rbx,QWORD PTR[((-8))+rax] + mov rbp,QWORD PTR[((-16))+rax] + mov QWORD PTR[144+r8],rbx + mov QWORD PTR[160+r8],rbp + + lea rsi,QWORD PTR[((-24-160))+rax] + lea rdi,QWORD PTR[512+r8] + mov ecx,20 + DD 0a548f3fch + +$L$in_prologue:: + mov rdi,QWORD PTR[8+rax] + mov rsi,QWORD PTR[16+rax] + mov QWORD PTR[152+r8],rax + mov QWORD PTR[168+r8],rsi + mov QWORD PTR[176+r8],rdi + + mov rdi,QWORD PTR[40+r9] + mov rsi,r8 + mov ecx,154 + DD 0a548f3fch + + mov rsi,r9 + xor rcx,rcx + mov rdx,QWORD PTR[8+rsi] + mov r8,QWORD PTR[rsi] + mov r9,QWORD PTR[16+rsi] + mov r10,QWORD PTR[40+rsi] + lea r11,QWORD PTR[56+rsi] + lea r12,QWORD PTR[24+rsi] + mov QWORD PTR[32+rsp],r10 + mov QWORD PTR[40+rsp],r11 + mov QWORD PTR[48+rsp],r12 + mov QWORD PTR[56+rsp],rcx + call QWORD PTR[__imp_RtlVirtualUnwind] + + mov eax,1 + add rsp,64 + popfq + pop r15 + pop r14 + pop r13 + pop r12 + pop rbp + pop rbx + pop rdi + pop rsi + DB 0F3h,0C3h ;repret +se_handler ENDP + +ALIGN 16 +avx2_handler PROC PRIVATE + push rsi + push rdi + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + pushfq + sub rsp,64 + + mov rax,QWORD PTR[120+r8] + mov rbx,QWORD PTR[248+r8] + + mov rsi,QWORD PTR[8+r9] + mov r11,QWORD PTR[56+r9] + + mov r10d,DWORD PTR[r11] + lea r10,QWORD PTR[r10*1+rsi] + cmp rbx,r10 + jb $L$in_prologue + + mov rax,QWORD PTR[152+r8] + + mov r10d,DWORD PTR[4+r11] + lea r10,QWORD PTR[r10*1+rsi] + cmp rbx,r10 + jae $L$in_prologue + + mov rax,QWORD PTR[544+r8] + + mov rbx,QWORD PTR[((-8))+rax] + mov rbp,QWORD PTR[((-16))+rax] + mov r12,QWORD PTR[((-24))+rax] + mov r13,QWORD PTR[((-32))+rax] + mov r14,QWORD PTR[((-40))+rax] + mov r15,QWORD PTR[((-48))+rax] + mov QWORD PTR[144+r8],rbx + mov QWORD PTR[160+r8],rbp + mov QWORD PTR[216+r8],r12 + mov QWORD PTR[224+r8],r13 + mov QWORD PTR[232+r8],r14 + mov QWORD PTR[240+r8],r15 + + lea rsi,QWORD PTR[((-56-160))+rax] + lea rdi,QWORD PTR[512+r8] + mov ecx,20 + DD 0a548f3fch + + jmp $L$in_prologue +avx2_handler ENDP +.text$ ENDS +.pdata SEGMENT READONLY ALIGN(4) +ALIGN 4 + DD imagerel $L$SEH_begin_sha256_multi_block + DD imagerel $L$SEH_end_sha256_multi_block + DD imagerel $L$SEH_info_sha256_multi_block + DD imagerel $L$SEH_begin_sha256_multi_block_shaext + DD imagerel $L$SEH_end_sha256_multi_block_shaext + DD imagerel $L$SEH_info_sha256_multi_block_shaext + DD imagerel $L$SEH_begin_sha256_multi_block_avx + DD imagerel $L$SEH_end_sha256_multi_block_avx + DD imagerel $L$SEH_info_sha256_multi_block_avx + DD imagerel $L$SEH_begin_sha256_multi_block_avx2 + DD imagerel $L$SEH_end_sha256_multi_block_avx2 + DD imagerel $L$SEH_info_sha256_multi_block_avx2 +.pdata ENDS +.xdata SEGMENT READONLY ALIGN(8) +ALIGN 8 +$L$SEH_info_sha256_multi_block:: +DB 9,0,0,0 + DD imagerel se_handler + DD imagerel $L$body,imagerel $L$epilogue +$L$SEH_info_sha256_multi_block_shaext:: +DB 9,0,0,0 + DD imagerel se_handler + DD imagerel $L$body_shaext,imagerel $L$epilogue_shaext +$L$SEH_info_sha256_multi_block_avx:: +DB 9,0,0,0 + DD imagerel se_handler + DD imagerel $L$body_avx,imagerel $L$epilogue_avx +$L$SEH_info_sha256_multi_block_avx2:: +DB 9,0,0,0 + DD imagerel avx2_handler + DD imagerel $L$body_avx2,imagerel $L$epilogue_avx2 + +.xdata ENDS +END diff --git a/deps/openssl/asm/x64-win32-masm/sha/sha256-x86_64.asm b/deps/openssl/asm/x64-win32-masm/sha/sha256-x86_64.asm index f685c2fdfc4aa0..ddcaa2c5ec5270 100644 --- a/deps/openssl/asm/x64-win32-masm/sha/sha256-x86_64.asm +++ b/deps/openssl/asm/x64-win32-masm/sha/sha256-x86_64.asm @@ -1,6 +1,7 @@ OPTION DOTNAME -.text$ SEGMENT ALIGN(64) 'CODE' +.text$ SEGMENT ALIGN(256) 'CODE' +EXTERN OPENSSL_ia32cap_P:NEAR PUBLIC sha256_block_data_order ALIGN 16 @@ -12,9 +13,24 @@ $L$SEH_begin_sha256_block_data_order:: mov rdi,rcx mov rsi,rdx mov rdx,r8 - mov rcx,r9 + lea r11,QWORD PTR[OPENSSL_ia32cap_P] + mov r9d,DWORD PTR[r11] + mov r10d,DWORD PTR[4+r11] + mov r11d,DWORD PTR[8+r11] + test r11d,536870912 + jnz _shaext_shortcut + and r11d,296 + cmp r11d,296 + je $L$avx2_shortcut + and r9d,1073741824 + and r10d,268435968 + or r10d,r9d + cmp r10d,1342177792 + je $L$avx_shortcut + test r10d,512 + jnz $L$ssse3_shortcut push rbx push rbp push r12 @@ -32,8 +48,6 @@ $L$SEH_begin_sha256_block_data_order:: mov QWORD PTR[((64+24))+rsp],r11 $L$prologue:: - lea rbp,QWORD PTR[K256] - mov eax,DWORD PTR[rdi] mov ebx,DWORD PTR[4+rdi] mov ecx,DWORD PTR[8+rdi] @@ -46,1695 +60,5358 @@ $L$prologue:: ALIGN 16 $L$loop:: - xor rdi,rdi + mov edi,ebx + lea rbp,QWORD PTR[K256] + xor edi,ecx mov r12d,DWORD PTR[rsi] mov r13d,r8d mov r14d,eax bswap r12d ror r13d,14 mov r15d,r9d - mov DWORD PTR[rsp],r12d - ror r14d,9 xor r13d,r8d + ror r14d,9 xor r15d,r10d - ror r13d,5 - add r12d,r11d + mov DWORD PTR[rsp],r12d xor r14d,eax - - add r12d,DWORD PTR[rdi*4+rbp] and r15d,r8d - mov r11d,ebx + + ror r13d,5 + add r12d,r11d + xor r15d,r10d ror r14d,11 xor r13d,r8d - xor r15d,r10d + add r12d,r15d - xor r11d,ecx + mov r15d,eax + add r12d,DWORD PTR[rbp] xor r14d,eax - add r12d,r15d - mov r15d,ebx + xor r15d,ebx ror r13d,6 - and r11d,eax - and r15d,ecx + mov r11d,ebx + and edi,r15d ror r14d,2 add r12d,r13d - add r11d,r15d + xor r11d,edi add edx,r12d add r11d,r12d - lea rdi,QWORD PTR[1+rdi] - add r11d,r14d + lea rbp,QWORD PTR[4+rbp] + add r11d,r14d mov r12d,DWORD PTR[4+rsi] mov r13d,edx mov r14d,r11d bswap r12d ror r13d,14 - mov r15d,r8d - mov DWORD PTR[4+rsp],r12d + mov edi,r8d - ror r14d,9 xor r13d,edx - xor r15d,r9d + ror r14d,9 + xor edi,r9d - ror r13d,5 - add r12d,r10d + mov DWORD PTR[4+rsp],r12d xor r14d,r11d + and edi,edx - add r12d,DWORD PTR[rdi*4+rbp] - and r15d,edx - mov r10d,eax + ror r13d,5 + add r12d,r10d + xor edi,r9d ror r14d,11 xor r13d,edx - xor r15d,r9d + add r12d,edi - xor r10d,ebx + mov edi,r11d + add r12d,DWORD PTR[rbp] xor r14d,r11d - add r12d,r15d - mov r15d,eax + xor edi,eax ror r13d,6 - and r10d,r11d - and r15d,ebx + mov r10d,eax + and r15d,edi ror r14d,2 add r12d,r13d - add r10d,r15d + xor r10d,r15d add ecx,r12d add r10d,r12d - lea rdi,QWORD PTR[1+rdi] - add r10d,r14d + lea rbp,QWORD PTR[4+rbp] + add r10d,r14d mov r12d,DWORD PTR[8+rsi] mov r13d,ecx mov r14d,r10d bswap r12d ror r13d,14 mov r15d,edx - mov DWORD PTR[8+rsp],r12d - ror r14d,9 xor r13d,ecx + ror r14d,9 xor r15d,r8d - ror r13d,5 - add r12d,r9d + mov DWORD PTR[8+rsp],r12d xor r14d,r10d - - add r12d,DWORD PTR[rdi*4+rbp] and r15d,ecx - mov r9d,r11d + + ror r13d,5 + add r12d,r9d + xor r15d,r8d ror r14d,11 xor r13d,ecx - xor r15d,r8d + add r12d,r15d - xor r9d,eax + mov r15d,r10d + add r12d,DWORD PTR[rbp] xor r14d,r10d - add r12d,r15d - mov r15d,r11d + xor r15d,r11d ror r13d,6 - and r9d,r10d - and r15d,eax + mov r9d,r11d + and edi,r15d ror r14d,2 add r12d,r13d - add r9d,r15d + xor r9d,edi add ebx,r12d add r9d,r12d - lea rdi,QWORD PTR[1+rdi] - add r9d,r14d + lea rbp,QWORD PTR[4+rbp] + add r9d,r14d mov r12d,DWORD PTR[12+rsi] mov r13d,ebx mov r14d,r9d bswap r12d ror r13d,14 - mov r15d,ecx - mov DWORD PTR[12+rsp],r12d + mov edi,ecx - ror r14d,9 xor r13d,ebx - xor r15d,edx + ror r14d,9 + xor edi,edx - ror r13d,5 - add r12d,r8d + mov DWORD PTR[12+rsp],r12d xor r14d,r9d + and edi,ebx - add r12d,DWORD PTR[rdi*4+rbp] - and r15d,ebx - mov r8d,r10d + ror r13d,5 + add r12d,r8d + xor edi,edx ror r14d,11 xor r13d,ebx - xor r15d,edx + add r12d,edi - xor r8d,r11d + mov edi,r9d + add r12d,DWORD PTR[rbp] xor r14d,r9d - add r12d,r15d - mov r15d,r10d + xor edi,r10d ror r13d,6 - and r8d,r9d - and r15d,r11d + mov r8d,r10d + and r15d,edi ror r14d,2 add r12d,r13d - add r8d,r15d + xor r8d,r15d add eax,r12d add r8d,r12d - lea rdi,QWORD PTR[1+rdi] - add r8d,r14d + lea rbp,QWORD PTR[20+rbp] + add r8d,r14d mov r12d,DWORD PTR[16+rsi] mov r13d,eax mov r14d,r8d bswap r12d ror r13d,14 mov r15d,ebx - mov DWORD PTR[16+rsp],r12d - ror r14d,9 xor r13d,eax + ror r14d,9 xor r15d,ecx - ror r13d,5 - add r12d,edx + mov DWORD PTR[16+rsp],r12d xor r14d,r8d - - add r12d,DWORD PTR[rdi*4+rbp] and r15d,eax - mov edx,r9d + + ror r13d,5 + add r12d,edx + xor r15d,ecx ror r14d,11 xor r13d,eax - xor r15d,ecx + add r12d,r15d - xor edx,r10d + mov r15d,r8d + add r12d,DWORD PTR[rbp] xor r14d,r8d - add r12d,r15d - mov r15d,r9d + xor r15d,r9d ror r13d,6 - and edx,r8d - and r15d,r10d + mov edx,r9d + and edi,r15d ror r14d,2 add r12d,r13d - add edx,r15d + xor edx,edi add r11d,r12d add edx,r12d - lea rdi,QWORD PTR[1+rdi] - add edx,r14d + lea rbp,QWORD PTR[4+rbp] + add edx,r14d mov r12d,DWORD PTR[20+rsi] mov r13d,r11d mov r14d,edx bswap r12d ror r13d,14 - mov r15d,eax - mov DWORD PTR[20+rsp],r12d + mov edi,eax - ror r14d,9 xor r13d,r11d - xor r15d,ebx + ror r14d,9 + xor edi,ebx - ror r13d,5 - add r12d,ecx + mov DWORD PTR[20+rsp],r12d xor r14d,edx + and edi,r11d - add r12d,DWORD PTR[rdi*4+rbp] - and r15d,r11d - mov ecx,r8d + ror r13d,5 + add r12d,ecx + xor edi,ebx ror r14d,11 xor r13d,r11d - xor r15d,ebx + add r12d,edi - xor ecx,r9d + mov edi,edx + add r12d,DWORD PTR[rbp] xor r14d,edx - add r12d,r15d - mov r15d,r8d + xor edi,r8d ror r13d,6 - and ecx,edx - and r15d,r9d + mov ecx,r8d + and r15d,edi ror r14d,2 add r12d,r13d - add ecx,r15d + xor ecx,r15d add r10d,r12d add ecx,r12d - lea rdi,QWORD PTR[1+rdi] - add ecx,r14d + lea rbp,QWORD PTR[4+rbp] + add ecx,r14d mov r12d,DWORD PTR[24+rsi] mov r13d,r10d mov r14d,ecx bswap r12d ror r13d,14 mov r15d,r11d - mov DWORD PTR[24+rsp],r12d - ror r14d,9 xor r13d,r10d + ror r14d,9 xor r15d,eax - ror r13d,5 - add r12d,ebx + mov DWORD PTR[24+rsp],r12d xor r14d,ecx - - add r12d,DWORD PTR[rdi*4+rbp] and r15d,r10d - mov ebx,edx + + ror r13d,5 + add r12d,ebx + xor r15d,eax ror r14d,11 xor r13d,r10d - xor r15d,eax + add r12d,r15d - xor ebx,r8d + mov r15d,ecx + add r12d,DWORD PTR[rbp] xor r14d,ecx - add r12d,r15d - mov r15d,edx + xor r15d,edx ror r13d,6 - and ebx,ecx - and r15d,r8d + mov ebx,edx + and edi,r15d ror r14d,2 add r12d,r13d - add ebx,r15d + xor ebx,edi add r9d,r12d add ebx,r12d - lea rdi,QWORD PTR[1+rdi] - add ebx,r14d + lea rbp,QWORD PTR[4+rbp] + add ebx,r14d mov r12d,DWORD PTR[28+rsi] mov r13d,r9d mov r14d,ebx bswap r12d ror r13d,14 - mov r15d,r10d - mov DWORD PTR[28+rsp],r12d + mov edi,r10d - ror r14d,9 xor r13d,r9d - xor r15d,r11d + ror r14d,9 + xor edi,r11d - ror r13d,5 - add r12d,eax + mov DWORD PTR[28+rsp],r12d xor r14d,ebx + and edi,r9d - add r12d,DWORD PTR[rdi*4+rbp] - and r15d,r9d - mov eax,ecx + ror r13d,5 + add r12d,eax + xor edi,r11d ror r14d,11 xor r13d,r9d - xor r15d,r11d + add r12d,edi - xor eax,edx + mov edi,ebx + add r12d,DWORD PTR[rbp] xor r14d,ebx - add r12d,r15d - mov r15d,ecx + xor edi,ecx ror r13d,6 - and eax,ebx - and r15d,edx + mov eax,ecx + and r15d,edi ror r14d,2 add r12d,r13d - add eax,r15d + xor eax,r15d add r8d,r12d add eax,r12d - lea rdi,QWORD PTR[1+rdi] - add eax,r14d + lea rbp,QWORD PTR[20+rbp] + add eax,r14d mov r12d,DWORD PTR[32+rsi] mov r13d,r8d mov r14d,eax bswap r12d ror r13d,14 mov r15d,r9d - mov DWORD PTR[32+rsp],r12d - ror r14d,9 xor r13d,r8d + ror r14d,9 xor r15d,r10d - ror r13d,5 - add r12d,r11d + mov DWORD PTR[32+rsp],r12d xor r14d,eax - - add r12d,DWORD PTR[rdi*4+rbp] and r15d,r8d - mov r11d,ebx + + ror r13d,5 + add r12d,r11d + xor r15d,r10d ror r14d,11 xor r13d,r8d - xor r15d,r10d + add r12d,r15d - xor r11d,ecx + mov r15d,eax + add r12d,DWORD PTR[rbp] xor r14d,eax - add r12d,r15d - mov r15d,ebx + xor r15d,ebx ror r13d,6 - and r11d,eax - and r15d,ecx + mov r11d,ebx + and edi,r15d ror r14d,2 add r12d,r13d - add r11d,r15d + xor r11d,edi add edx,r12d add r11d,r12d - lea rdi,QWORD PTR[1+rdi] - add r11d,r14d + lea rbp,QWORD PTR[4+rbp] + add r11d,r14d mov r12d,DWORD PTR[36+rsi] mov r13d,edx mov r14d,r11d bswap r12d ror r13d,14 - mov r15d,r8d - mov DWORD PTR[36+rsp],r12d + mov edi,r8d - ror r14d,9 xor r13d,edx - xor r15d,r9d + ror r14d,9 + xor edi,r9d - ror r13d,5 - add r12d,r10d + mov DWORD PTR[36+rsp],r12d xor r14d,r11d + and edi,edx - add r12d,DWORD PTR[rdi*4+rbp] - and r15d,edx - mov r10d,eax + ror r13d,5 + add r12d,r10d + xor edi,r9d ror r14d,11 xor r13d,edx - xor r15d,r9d + add r12d,edi - xor r10d,ebx + mov edi,r11d + add r12d,DWORD PTR[rbp] xor r14d,r11d - add r12d,r15d - mov r15d,eax + xor edi,eax ror r13d,6 - and r10d,r11d - and r15d,ebx + mov r10d,eax + and r15d,edi ror r14d,2 add r12d,r13d - add r10d,r15d + xor r10d,r15d add ecx,r12d add r10d,r12d - lea rdi,QWORD PTR[1+rdi] - add r10d,r14d + lea rbp,QWORD PTR[4+rbp] + add r10d,r14d mov r12d,DWORD PTR[40+rsi] mov r13d,ecx mov r14d,r10d bswap r12d ror r13d,14 mov r15d,edx - mov DWORD PTR[40+rsp],r12d - ror r14d,9 xor r13d,ecx + ror r14d,9 xor r15d,r8d - ror r13d,5 - add r12d,r9d + mov DWORD PTR[40+rsp],r12d xor r14d,r10d - - add r12d,DWORD PTR[rdi*4+rbp] and r15d,ecx - mov r9d,r11d + + ror r13d,5 + add r12d,r9d + xor r15d,r8d ror r14d,11 xor r13d,ecx - xor r15d,r8d + add r12d,r15d - xor r9d,eax + mov r15d,r10d + add r12d,DWORD PTR[rbp] xor r14d,r10d - add r12d,r15d - mov r15d,r11d + xor r15d,r11d ror r13d,6 - and r9d,r10d - and r15d,eax + mov r9d,r11d + and edi,r15d ror r14d,2 add r12d,r13d - add r9d,r15d + xor r9d,edi add ebx,r12d add r9d,r12d - lea rdi,QWORD PTR[1+rdi] - add r9d,r14d + lea rbp,QWORD PTR[4+rbp] + add r9d,r14d mov r12d,DWORD PTR[44+rsi] mov r13d,ebx mov r14d,r9d bswap r12d ror r13d,14 - mov r15d,ecx - mov DWORD PTR[44+rsp],r12d + mov edi,ecx - ror r14d,9 xor r13d,ebx - xor r15d,edx + ror r14d,9 + xor edi,edx - ror r13d,5 - add r12d,r8d + mov DWORD PTR[44+rsp],r12d xor r14d,r9d + and edi,ebx - add r12d,DWORD PTR[rdi*4+rbp] - and r15d,ebx - mov r8d,r10d + ror r13d,5 + add r12d,r8d + xor edi,edx ror r14d,11 xor r13d,ebx - xor r15d,edx + add r12d,edi - xor r8d,r11d + mov edi,r9d + add r12d,DWORD PTR[rbp] xor r14d,r9d - add r12d,r15d - mov r15d,r10d + xor edi,r10d ror r13d,6 - and r8d,r9d - and r15d,r11d + mov r8d,r10d + and r15d,edi ror r14d,2 add r12d,r13d - add r8d,r15d + xor r8d,r15d add eax,r12d add r8d,r12d - lea rdi,QWORD PTR[1+rdi] - add r8d,r14d + lea rbp,QWORD PTR[20+rbp] + add r8d,r14d mov r12d,DWORD PTR[48+rsi] mov r13d,eax mov r14d,r8d bswap r12d ror r13d,14 mov r15d,ebx - mov DWORD PTR[48+rsp],r12d - ror r14d,9 xor r13d,eax + ror r14d,9 xor r15d,ecx - ror r13d,5 - add r12d,edx + mov DWORD PTR[48+rsp],r12d xor r14d,r8d - - add r12d,DWORD PTR[rdi*4+rbp] and r15d,eax - mov edx,r9d - ror r14d,11 - xor r13d,eax + ror r13d,5 + add r12d,edx xor r15d,ecx - xor edx,r10d - xor r14d,r8d + ror r14d,11 + xor r13d,eax add r12d,r15d - mov r15d,r9d + mov r15d,r8d + add r12d,DWORD PTR[rbp] + xor r14d,r8d + + xor r15d,r9d ror r13d,6 - and edx,r8d - and r15d,r10d + mov edx,r9d + and edi,r15d ror r14d,2 add r12d,r13d - add edx,r15d + xor edx,edi add r11d,r12d add edx,r12d - lea rdi,QWORD PTR[1+rdi] - add edx,r14d + lea rbp,QWORD PTR[4+rbp] + add edx,r14d mov r12d,DWORD PTR[52+rsi] mov r13d,r11d mov r14d,edx bswap r12d ror r13d,14 - mov r15d,eax - mov DWORD PTR[52+rsp],r12d + mov edi,eax - ror r14d,9 xor r13d,r11d - xor r15d,ebx + ror r14d,9 + xor edi,ebx - ror r13d,5 - add r12d,ecx + mov DWORD PTR[52+rsp],r12d xor r14d,edx + and edi,r11d - add r12d,DWORD PTR[rdi*4+rbp] - and r15d,r11d - mov ecx,r8d + ror r13d,5 + add r12d,ecx + xor edi,ebx ror r14d,11 xor r13d,r11d - xor r15d,ebx + add r12d,edi - xor ecx,r9d + mov edi,edx + add r12d,DWORD PTR[rbp] xor r14d,edx - add r12d,r15d - mov r15d,r8d + xor edi,r8d ror r13d,6 - and ecx,edx - and r15d,r9d + mov ecx,r8d + and r15d,edi ror r14d,2 add r12d,r13d - add ecx,r15d + xor ecx,r15d add r10d,r12d add ecx,r12d - lea rdi,QWORD PTR[1+rdi] - add ecx,r14d + lea rbp,QWORD PTR[4+rbp] + add ecx,r14d mov r12d,DWORD PTR[56+rsi] mov r13d,r10d mov r14d,ecx bswap r12d ror r13d,14 mov r15d,r11d - mov DWORD PTR[56+rsp],r12d - ror r14d,9 xor r13d,r10d + ror r14d,9 xor r15d,eax - ror r13d,5 - add r12d,ebx + mov DWORD PTR[56+rsp],r12d xor r14d,ecx - - add r12d,DWORD PTR[rdi*4+rbp] and r15d,r10d - mov ebx,edx + + ror r13d,5 + add r12d,ebx + xor r15d,eax ror r14d,11 xor r13d,r10d - xor r15d,eax + add r12d,r15d - xor ebx,r8d + mov r15d,ecx + add r12d,DWORD PTR[rbp] xor r14d,ecx - add r12d,r15d - mov r15d,edx + xor r15d,edx ror r13d,6 - and ebx,ecx - and r15d,r8d + mov ebx,edx + and edi,r15d ror r14d,2 add r12d,r13d - add ebx,r15d + xor ebx,edi add r9d,r12d add ebx,r12d - lea rdi,QWORD PTR[1+rdi] - add ebx,r14d + lea rbp,QWORD PTR[4+rbp] + add ebx,r14d mov r12d,DWORD PTR[60+rsi] mov r13d,r9d mov r14d,ebx bswap r12d ror r13d,14 - mov r15d,r10d - mov DWORD PTR[60+rsp],r12d + mov edi,r10d - ror r14d,9 xor r13d,r9d - xor r15d,r11d + ror r14d,9 + xor edi,r11d - ror r13d,5 - add r12d,eax + mov DWORD PTR[60+rsp],r12d xor r14d,ebx + and edi,r9d - add r12d,DWORD PTR[rdi*4+rbp] - and r15d,r9d - mov eax,ecx + ror r13d,5 + add r12d,eax + xor edi,r11d ror r14d,11 xor r13d,r9d - xor r15d,r11d + add r12d,edi - xor eax,edx + mov edi,ebx + add r12d,DWORD PTR[rbp] xor r14d,ebx - add r12d,r15d - mov r15d,ecx + xor edi,ecx ror r13d,6 - and eax,ebx - and r15d,edx + mov eax,ecx + and r15d,edi ror r14d,2 add r12d,r13d - add eax,r15d + xor eax,r15d add r8d,r12d add eax,r12d - lea rdi,QWORD PTR[1+rdi] - add eax,r14d + lea rbp,QWORD PTR[20+rbp] jmp $L$rounds_16_xx ALIGN 16 $L$rounds_16_xx:: mov r13d,DWORD PTR[4+rsp] - mov r14d,DWORD PTR[56+rsp] - mov r12d,r13d - mov r15d,r14d + mov r15d,DWORD PTR[56+rsp] - ror r12d,11 - xor r12d,r13d - shr r13d,3 + mov r12d,r13d + ror r13d,11 + add eax,r14d + mov r14d,r15d + ror r15d,2 - ror r12d,7 xor r13d,r12d - mov r12d,DWORD PTR[36+rsp] - - ror r15d,2 + shr r12d,3 + ror r13d,7 xor r15d,r14d shr r14d,10 ror r15d,17 - add r12d,r13d - xor r14d,r15d + xor r12d,r13d + xor r15d,r14d + add r12d,DWORD PTR[36+rsp] add r12d,DWORD PTR[rsp] mov r13d,r8d - add r12d,r14d + add r12d,r15d mov r14d,eax ror r13d,14 mov r15d,r9d - mov DWORD PTR[rsp],r12d - ror r14d,9 xor r13d,r8d + ror r14d,9 xor r15d,r10d - ror r13d,5 - add r12d,r11d + mov DWORD PTR[rsp],r12d xor r14d,eax - - add r12d,DWORD PTR[rdi*4+rbp] and r15d,r8d - mov r11d,ebx + + ror r13d,5 + add r12d,r11d + xor r15d,r10d ror r14d,11 xor r13d,r8d - xor r15d,r10d + add r12d,r15d - xor r11d,ecx + mov r15d,eax + add r12d,DWORD PTR[rbp] xor r14d,eax - add r12d,r15d - mov r15d,ebx + xor r15d,ebx ror r13d,6 - and r11d,eax - and r15d,ecx + mov r11d,ebx + and edi,r15d ror r14d,2 add r12d,r13d - add r11d,r15d + xor r11d,edi add edx,r12d add r11d,r12d - lea rdi,QWORD PTR[1+rdi] - add r11d,r14d + lea rbp,QWORD PTR[4+rbp] mov r13d,DWORD PTR[8+rsp] - mov r14d,DWORD PTR[60+rsp] - mov r12d,r13d - mov r15d,r14d + mov edi,DWORD PTR[60+rsp] - ror r12d,11 - xor r12d,r13d - shr r13d,3 + mov r12d,r13d + ror r13d,11 + add r11d,r14d + mov r14d,edi + ror edi,2 - ror r12d,7 xor r13d,r12d - mov r12d,DWORD PTR[40+rsp] - - ror r15d,2 - xor r15d,r14d + shr r12d,3 + ror r13d,7 + xor edi,r14d shr r14d,10 - ror r15d,17 - add r12d,r13d - xor r14d,r15d + ror edi,17 + xor r12d,r13d + xor edi,r14d + add r12d,DWORD PTR[40+rsp] add r12d,DWORD PTR[4+rsp] mov r13d,edx - add r12d,r14d + add r12d,edi mov r14d,r11d ror r13d,14 - mov r15d,r8d - mov DWORD PTR[4+rsp],r12d + mov edi,r8d - ror r14d,9 xor r13d,edx - xor r15d,r9d + ror r14d,9 + xor edi,r9d - ror r13d,5 - add r12d,r10d + mov DWORD PTR[4+rsp],r12d xor r14d,r11d + and edi,edx - add r12d,DWORD PTR[rdi*4+rbp] - and r15d,edx - mov r10d,eax + ror r13d,5 + add r12d,r10d + xor edi,r9d ror r14d,11 xor r13d,edx - xor r15d,r9d + add r12d,edi - xor r10d,ebx + mov edi,r11d + add r12d,DWORD PTR[rbp] xor r14d,r11d - add r12d,r15d - mov r15d,eax + xor edi,eax ror r13d,6 - and r10d,r11d - and r15d,ebx + mov r10d,eax + and r15d,edi ror r14d,2 add r12d,r13d - add r10d,r15d + xor r10d,r15d add ecx,r12d add r10d,r12d - lea rdi,QWORD PTR[1+rdi] - add r10d,r14d + lea rbp,QWORD PTR[4+rbp] mov r13d,DWORD PTR[12+rsp] - mov r14d,DWORD PTR[rsp] - mov r12d,r13d - mov r15d,r14d + mov r15d,DWORD PTR[rsp] - ror r12d,11 - xor r12d,r13d - shr r13d,3 + mov r12d,r13d + ror r13d,11 + add r10d,r14d + mov r14d,r15d + ror r15d,2 - ror r12d,7 xor r13d,r12d - mov r12d,DWORD PTR[44+rsp] - - ror r15d,2 + shr r12d,3 + ror r13d,7 xor r15d,r14d shr r14d,10 ror r15d,17 - add r12d,r13d - xor r14d,r15d + xor r12d,r13d + xor r15d,r14d + add r12d,DWORD PTR[44+rsp] add r12d,DWORD PTR[8+rsp] mov r13d,ecx - add r12d,r14d + add r12d,r15d mov r14d,r10d ror r13d,14 mov r15d,edx - mov DWORD PTR[8+rsp],r12d - ror r14d,9 xor r13d,ecx + ror r14d,9 xor r15d,r8d - ror r13d,5 - add r12d,r9d + mov DWORD PTR[8+rsp],r12d xor r14d,r10d - - add r12d,DWORD PTR[rdi*4+rbp] and r15d,ecx - mov r9d,r11d + + ror r13d,5 + add r12d,r9d + xor r15d,r8d ror r14d,11 xor r13d,ecx - xor r15d,r8d + add r12d,r15d - xor r9d,eax + mov r15d,r10d + add r12d,DWORD PTR[rbp] xor r14d,r10d - add r12d,r15d - mov r15d,r11d + xor r15d,r11d ror r13d,6 - and r9d,r10d - and r15d,eax + mov r9d,r11d + and edi,r15d ror r14d,2 add r12d,r13d - add r9d,r15d + xor r9d,edi add ebx,r12d add r9d,r12d - lea rdi,QWORD PTR[1+rdi] - add r9d,r14d + lea rbp,QWORD PTR[4+rbp] mov r13d,DWORD PTR[16+rsp] - mov r14d,DWORD PTR[4+rsp] - mov r12d,r13d - mov r15d,r14d + mov edi,DWORD PTR[4+rsp] - ror r12d,11 - xor r12d,r13d - shr r13d,3 + mov r12d,r13d + ror r13d,11 + add r9d,r14d + mov r14d,edi + ror edi,2 - ror r12d,7 xor r13d,r12d - mov r12d,DWORD PTR[48+rsp] - - ror r15d,2 - xor r15d,r14d + shr r12d,3 + ror r13d,7 + xor edi,r14d shr r14d,10 - ror r15d,17 - add r12d,r13d - xor r14d,r15d + ror edi,17 + xor r12d,r13d + xor edi,r14d + add r12d,DWORD PTR[48+rsp] add r12d,DWORD PTR[12+rsp] mov r13d,ebx - add r12d,r14d + add r12d,edi mov r14d,r9d ror r13d,14 - mov r15d,ecx - mov DWORD PTR[12+rsp],r12d + mov edi,ecx - ror r14d,9 xor r13d,ebx - xor r15d,edx + ror r14d,9 + xor edi,edx - ror r13d,5 - add r12d,r8d + mov DWORD PTR[12+rsp],r12d xor r14d,r9d + and edi,ebx - add r12d,DWORD PTR[rdi*4+rbp] - and r15d,ebx - mov r8d,r10d + ror r13d,5 + add r12d,r8d + xor edi,edx ror r14d,11 xor r13d,ebx - xor r15d,edx + add r12d,edi - xor r8d,r11d + mov edi,r9d + add r12d,DWORD PTR[rbp] xor r14d,r9d - add r12d,r15d - mov r15d,r10d + xor edi,r10d ror r13d,6 - and r8d,r9d - and r15d,r11d + mov r8d,r10d + and r15d,edi ror r14d,2 add r12d,r13d - add r8d,r15d + xor r8d,r15d add eax,r12d add r8d,r12d - lea rdi,QWORD PTR[1+rdi] - add r8d,r14d + lea rbp,QWORD PTR[20+rbp] mov r13d,DWORD PTR[20+rsp] - mov r14d,DWORD PTR[8+rsp] - mov r12d,r13d - mov r15d,r14d + mov r15d,DWORD PTR[8+rsp] - ror r12d,11 - xor r12d,r13d - shr r13d,3 + mov r12d,r13d + ror r13d,11 + add r8d,r14d + mov r14d,r15d + ror r15d,2 - ror r12d,7 xor r13d,r12d - mov r12d,DWORD PTR[52+rsp] - - ror r15d,2 + shr r12d,3 + ror r13d,7 xor r15d,r14d shr r14d,10 ror r15d,17 - add r12d,r13d - xor r14d,r15d + xor r12d,r13d + xor r15d,r14d + add r12d,DWORD PTR[52+rsp] add r12d,DWORD PTR[16+rsp] mov r13d,eax - add r12d,r14d + add r12d,r15d mov r14d,r8d ror r13d,14 mov r15d,ebx - mov DWORD PTR[16+rsp],r12d - ror r14d,9 xor r13d,eax + ror r14d,9 xor r15d,ecx - ror r13d,5 - add r12d,edx + mov DWORD PTR[16+rsp],r12d xor r14d,r8d - - add r12d,DWORD PTR[rdi*4+rbp] and r15d,eax - mov edx,r9d + + ror r13d,5 + add r12d,edx + xor r15d,ecx ror r14d,11 xor r13d,eax - xor r15d,ecx + add r12d,r15d - xor edx,r10d + mov r15d,r8d + add r12d,DWORD PTR[rbp] xor r14d,r8d - add r12d,r15d - mov r15d,r9d + xor r15d,r9d ror r13d,6 - and edx,r8d - and r15d,r10d + mov edx,r9d + and edi,r15d ror r14d,2 add r12d,r13d - add edx,r15d + xor edx,edi add r11d,r12d add edx,r12d - lea rdi,QWORD PTR[1+rdi] - add edx,r14d + lea rbp,QWORD PTR[4+rbp] mov r13d,DWORD PTR[24+rsp] - mov r14d,DWORD PTR[12+rsp] - mov r12d,r13d - mov r15d,r14d + mov edi,DWORD PTR[12+rsp] - ror r12d,11 - xor r12d,r13d - shr r13d,3 + mov r12d,r13d + ror r13d,11 + add edx,r14d + mov r14d,edi + ror edi,2 - ror r12d,7 xor r13d,r12d - mov r12d,DWORD PTR[56+rsp] - - ror r15d,2 - xor r15d,r14d + shr r12d,3 + ror r13d,7 + xor edi,r14d shr r14d,10 - ror r15d,17 - add r12d,r13d - xor r14d,r15d + ror edi,17 + xor r12d,r13d + xor edi,r14d + add r12d,DWORD PTR[56+rsp] add r12d,DWORD PTR[20+rsp] mov r13d,r11d - add r12d,r14d + add r12d,edi mov r14d,edx ror r13d,14 - mov r15d,eax - mov DWORD PTR[20+rsp],r12d + mov edi,eax - ror r14d,9 xor r13d,r11d - xor r15d,ebx + ror r14d,9 + xor edi,ebx - ror r13d,5 - add r12d,ecx + mov DWORD PTR[20+rsp],r12d xor r14d,edx + and edi,r11d - add r12d,DWORD PTR[rdi*4+rbp] - and r15d,r11d - mov ecx,r8d + ror r13d,5 + add r12d,ecx + xor edi,ebx ror r14d,11 xor r13d,r11d - xor r15d,ebx + add r12d,edi - xor ecx,r9d + mov edi,edx + add r12d,DWORD PTR[rbp] xor r14d,edx - add r12d,r15d - mov r15d,r8d + xor edi,r8d ror r13d,6 - and ecx,edx - and r15d,r9d + mov ecx,r8d + and r15d,edi ror r14d,2 add r12d,r13d - add ecx,r15d + xor ecx,r15d add r10d,r12d add ecx,r12d - lea rdi,QWORD PTR[1+rdi] - add ecx,r14d + lea rbp,QWORD PTR[4+rbp] mov r13d,DWORD PTR[28+rsp] - mov r14d,DWORD PTR[16+rsp] - mov r12d,r13d - mov r15d,r14d + mov r15d,DWORD PTR[16+rsp] - ror r12d,11 - xor r12d,r13d - shr r13d,3 + mov r12d,r13d + ror r13d,11 + add ecx,r14d + mov r14d,r15d + ror r15d,2 - ror r12d,7 xor r13d,r12d - mov r12d,DWORD PTR[60+rsp] - - ror r15d,2 + shr r12d,3 + ror r13d,7 xor r15d,r14d shr r14d,10 ror r15d,17 - add r12d,r13d - xor r14d,r15d + xor r12d,r13d + xor r15d,r14d + add r12d,DWORD PTR[60+rsp] add r12d,DWORD PTR[24+rsp] mov r13d,r10d - add r12d,r14d + add r12d,r15d mov r14d,ecx ror r13d,14 mov r15d,r11d - mov DWORD PTR[24+rsp],r12d - ror r14d,9 xor r13d,r10d + ror r14d,9 xor r15d,eax - ror r13d,5 - add r12d,ebx + mov DWORD PTR[24+rsp],r12d xor r14d,ecx - - add r12d,DWORD PTR[rdi*4+rbp] and r15d,r10d - mov ebx,edx + + ror r13d,5 + add r12d,ebx + xor r15d,eax ror r14d,11 xor r13d,r10d - xor r15d,eax + add r12d,r15d - xor ebx,r8d + mov r15d,ecx + add r12d,DWORD PTR[rbp] xor r14d,ecx - add r12d,r15d - mov r15d,edx + xor r15d,edx ror r13d,6 - and ebx,ecx - and r15d,r8d + mov ebx,edx + and edi,r15d ror r14d,2 add r12d,r13d - add ebx,r15d + xor ebx,edi add r9d,r12d add ebx,r12d - lea rdi,QWORD PTR[1+rdi] - add ebx,r14d + lea rbp,QWORD PTR[4+rbp] mov r13d,DWORD PTR[32+rsp] - mov r14d,DWORD PTR[20+rsp] - mov r12d,r13d - mov r15d,r14d + mov edi,DWORD PTR[20+rsp] - ror r12d,11 - xor r12d,r13d - shr r13d,3 + mov r12d,r13d + ror r13d,11 + add ebx,r14d + mov r14d,edi + ror edi,2 - ror r12d,7 xor r13d,r12d - mov r12d,DWORD PTR[rsp] - - ror r15d,2 - xor r15d,r14d + shr r12d,3 + ror r13d,7 + xor edi,r14d shr r14d,10 - ror r15d,17 - add r12d,r13d - xor r14d,r15d + ror edi,17 + xor r12d,r13d + xor edi,r14d + add r12d,DWORD PTR[rsp] add r12d,DWORD PTR[28+rsp] mov r13d,r9d - add r12d,r14d + add r12d,edi mov r14d,ebx ror r13d,14 - mov r15d,r10d - mov DWORD PTR[28+rsp],r12d + mov edi,r10d - ror r14d,9 xor r13d,r9d - xor r15d,r11d + ror r14d,9 + xor edi,r11d - ror r13d,5 - add r12d,eax + mov DWORD PTR[28+rsp],r12d xor r14d,ebx + and edi,r9d - add r12d,DWORD PTR[rdi*4+rbp] - and r15d,r9d - mov eax,ecx + ror r13d,5 + add r12d,eax + xor edi,r11d ror r14d,11 xor r13d,r9d - xor r15d,r11d + add r12d,edi - xor eax,edx + mov edi,ebx + add r12d,DWORD PTR[rbp] xor r14d,ebx - add r12d,r15d - mov r15d,ecx + xor edi,ecx ror r13d,6 - and eax,ebx - and r15d,edx + mov eax,ecx + and r15d,edi ror r14d,2 add r12d,r13d - add eax,r15d + xor eax,r15d add r8d,r12d add eax,r12d - lea rdi,QWORD PTR[1+rdi] - add eax,r14d + lea rbp,QWORD PTR[20+rbp] mov r13d,DWORD PTR[36+rsp] - mov r14d,DWORD PTR[24+rsp] - mov r12d,r13d - mov r15d,r14d + mov r15d,DWORD PTR[24+rsp] - ror r12d,11 - xor r12d,r13d - shr r13d,3 + mov r12d,r13d + ror r13d,11 + add eax,r14d + mov r14d,r15d + ror r15d,2 - ror r12d,7 xor r13d,r12d - mov r12d,DWORD PTR[4+rsp] - - ror r15d,2 + shr r12d,3 + ror r13d,7 xor r15d,r14d shr r14d,10 ror r15d,17 - add r12d,r13d - xor r14d,r15d + xor r12d,r13d + xor r15d,r14d + add r12d,DWORD PTR[4+rsp] add r12d,DWORD PTR[32+rsp] mov r13d,r8d - add r12d,r14d + add r12d,r15d mov r14d,eax ror r13d,14 mov r15d,r9d - mov DWORD PTR[32+rsp],r12d - ror r14d,9 xor r13d,r8d + ror r14d,9 xor r15d,r10d - ror r13d,5 - add r12d,r11d + mov DWORD PTR[32+rsp],r12d xor r14d,eax - - add r12d,DWORD PTR[rdi*4+rbp] and r15d,r8d - mov r11d,ebx + + ror r13d,5 + add r12d,r11d + xor r15d,r10d ror r14d,11 xor r13d,r8d - xor r15d,r10d + add r12d,r15d - xor r11d,ecx + mov r15d,eax + add r12d,DWORD PTR[rbp] xor r14d,eax - add r12d,r15d - mov r15d,ebx + xor r15d,ebx ror r13d,6 - and r11d,eax - and r15d,ecx + mov r11d,ebx + and edi,r15d ror r14d,2 add r12d,r13d - add r11d,r15d + xor r11d,edi add edx,r12d add r11d,r12d - lea rdi,QWORD PTR[1+rdi] - add r11d,r14d + lea rbp,QWORD PTR[4+rbp] mov r13d,DWORD PTR[40+rsp] - mov r14d,DWORD PTR[28+rsp] - mov r12d,r13d - mov r15d,r14d + mov edi,DWORD PTR[28+rsp] - ror r12d,11 - xor r12d,r13d - shr r13d,3 + mov r12d,r13d + ror r13d,11 + add r11d,r14d + mov r14d,edi + ror edi,2 - ror r12d,7 xor r13d,r12d - mov r12d,DWORD PTR[8+rsp] - - ror r15d,2 - xor r15d,r14d + shr r12d,3 + ror r13d,7 + xor edi,r14d shr r14d,10 - ror r15d,17 - add r12d,r13d - xor r14d,r15d + ror edi,17 + xor r12d,r13d + xor edi,r14d + add r12d,DWORD PTR[8+rsp] add r12d,DWORD PTR[36+rsp] mov r13d,edx - add r12d,r14d + add r12d,edi mov r14d,r11d ror r13d,14 - mov r15d,r8d - mov DWORD PTR[36+rsp],r12d + mov edi,r8d - ror r14d,9 xor r13d,edx - xor r15d,r9d + ror r14d,9 + xor edi,r9d - ror r13d,5 - add r12d,r10d + mov DWORD PTR[36+rsp],r12d xor r14d,r11d + and edi,edx - add r12d,DWORD PTR[rdi*4+rbp] - and r15d,edx - mov r10d,eax + ror r13d,5 + add r12d,r10d + xor edi,r9d ror r14d,11 xor r13d,edx - xor r15d,r9d + add r12d,edi - xor r10d,ebx + mov edi,r11d + add r12d,DWORD PTR[rbp] xor r14d,r11d - add r12d,r15d - mov r15d,eax + xor edi,eax ror r13d,6 - and r10d,r11d - and r15d,ebx + mov r10d,eax + and r15d,edi ror r14d,2 add r12d,r13d - add r10d,r15d + xor r10d,r15d add ecx,r12d add r10d,r12d - lea rdi,QWORD PTR[1+rdi] - add r10d,r14d + lea rbp,QWORD PTR[4+rbp] mov r13d,DWORD PTR[44+rsp] - mov r14d,DWORD PTR[32+rsp] - mov r12d,r13d - mov r15d,r14d + mov r15d,DWORD PTR[32+rsp] - ror r12d,11 - xor r12d,r13d - shr r13d,3 + mov r12d,r13d + ror r13d,11 + add r10d,r14d + mov r14d,r15d + ror r15d,2 - ror r12d,7 xor r13d,r12d - mov r12d,DWORD PTR[12+rsp] - - ror r15d,2 + shr r12d,3 + ror r13d,7 xor r15d,r14d shr r14d,10 ror r15d,17 - add r12d,r13d - xor r14d,r15d + xor r12d,r13d + xor r15d,r14d + add r12d,DWORD PTR[12+rsp] add r12d,DWORD PTR[40+rsp] mov r13d,ecx - add r12d,r14d + add r12d,r15d mov r14d,r10d ror r13d,14 mov r15d,edx - mov DWORD PTR[40+rsp],r12d - ror r14d,9 xor r13d,ecx + ror r14d,9 xor r15d,r8d - ror r13d,5 - add r12d,r9d + mov DWORD PTR[40+rsp],r12d xor r14d,r10d - - add r12d,DWORD PTR[rdi*4+rbp] and r15d,ecx - mov r9d,r11d + + ror r13d,5 + add r12d,r9d + xor r15d,r8d ror r14d,11 xor r13d,ecx - xor r15d,r8d + add r12d,r15d - xor r9d,eax + mov r15d,r10d + add r12d,DWORD PTR[rbp] xor r14d,r10d - add r12d,r15d - mov r15d,r11d + xor r15d,r11d ror r13d,6 - and r9d,r10d - and r15d,eax + mov r9d,r11d + and edi,r15d ror r14d,2 add r12d,r13d - add r9d,r15d + xor r9d,edi add ebx,r12d add r9d,r12d - lea rdi,QWORD PTR[1+rdi] - add r9d,r14d + lea rbp,QWORD PTR[4+rbp] mov r13d,DWORD PTR[48+rsp] - mov r14d,DWORD PTR[36+rsp] - mov r12d,r13d - mov r15d,r14d + mov edi,DWORD PTR[36+rsp] - ror r12d,11 - xor r12d,r13d - shr r13d,3 + mov r12d,r13d + ror r13d,11 + add r9d,r14d + mov r14d,edi + ror edi,2 - ror r12d,7 xor r13d,r12d - mov r12d,DWORD PTR[16+rsp] - - ror r15d,2 - xor r15d,r14d + shr r12d,3 + ror r13d,7 + xor edi,r14d shr r14d,10 - ror r15d,17 - add r12d,r13d - xor r14d,r15d + ror edi,17 + xor r12d,r13d + xor edi,r14d + add r12d,DWORD PTR[16+rsp] add r12d,DWORD PTR[44+rsp] mov r13d,ebx - add r12d,r14d + add r12d,edi mov r14d,r9d ror r13d,14 - mov r15d,ecx - mov DWORD PTR[44+rsp],r12d + mov edi,ecx - ror r14d,9 xor r13d,ebx - xor r15d,edx + ror r14d,9 + xor edi,edx - ror r13d,5 - add r12d,r8d + mov DWORD PTR[44+rsp],r12d xor r14d,r9d + and edi,ebx - add r12d,DWORD PTR[rdi*4+rbp] - and r15d,ebx - mov r8d,r10d + ror r13d,5 + add r12d,r8d + xor edi,edx ror r14d,11 xor r13d,ebx - xor r15d,edx + add r12d,edi - xor r8d,r11d + mov edi,r9d + add r12d,DWORD PTR[rbp] xor r14d,r9d - add r12d,r15d - mov r15d,r10d + xor edi,r10d ror r13d,6 - and r8d,r9d - and r15d,r11d + mov r8d,r10d + and r15d,edi ror r14d,2 add r12d,r13d - add r8d,r15d + xor r8d,r15d add eax,r12d add r8d,r12d - lea rdi,QWORD PTR[1+rdi] - add r8d,r14d + lea rbp,QWORD PTR[20+rbp] mov r13d,DWORD PTR[52+rsp] - mov r14d,DWORD PTR[40+rsp] - mov r12d,r13d - mov r15d,r14d + mov r15d,DWORD PTR[40+rsp] - ror r12d,11 - xor r12d,r13d - shr r13d,3 + mov r12d,r13d + ror r13d,11 + add r8d,r14d + mov r14d,r15d + ror r15d,2 - ror r12d,7 xor r13d,r12d - mov r12d,DWORD PTR[20+rsp] - - ror r15d,2 + shr r12d,3 + ror r13d,7 xor r15d,r14d shr r14d,10 ror r15d,17 - add r12d,r13d - xor r14d,r15d + xor r12d,r13d + xor r15d,r14d + add r12d,DWORD PTR[20+rsp] add r12d,DWORD PTR[48+rsp] mov r13d,eax - add r12d,r14d + add r12d,r15d mov r14d,r8d ror r13d,14 mov r15d,ebx - mov DWORD PTR[48+rsp],r12d - ror r14d,9 xor r13d,eax + ror r14d,9 xor r15d,ecx - ror r13d,5 - add r12d,edx + mov DWORD PTR[48+rsp],r12d xor r14d,r8d - - add r12d,DWORD PTR[rdi*4+rbp] and r15d,eax - mov edx,r9d + + ror r13d,5 + add r12d,edx + xor r15d,ecx ror r14d,11 xor r13d,eax - xor r15d,ecx + add r12d,r15d - xor edx,r10d + mov r15d,r8d + add r12d,DWORD PTR[rbp] xor r14d,r8d - add r12d,r15d - mov r15d,r9d + xor r15d,r9d ror r13d,6 - and edx,r8d - and r15d,r10d + mov edx,r9d + and edi,r15d ror r14d,2 add r12d,r13d - add edx,r15d + xor edx,edi add r11d,r12d add edx,r12d - lea rdi,QWORD PTR[1+rdi] - add edx,r14d + lea rbp,QWORD PTR[4+rbp] mov r13d,DWORD PTR[56+rsp] - mov r14d,DWORD PTR[44+rsp] - mov r12d,r13d - mov r15d,r14d + mov edi,DWORD PTR[44+rsp] - ror r12d,11 - xor r12d,r13d - shr r13d,3 + mov r12d,r13d + ror r13d,11 + add edx,r14d + mov r14d,edi + ror edi,2 - ror r12d,7 xor r13d,r12d - mov r12d,DWORD PTR[24+rsp] - - ror r15d,2 - xor r15d,r14d + shr r12d,3 + ror r13d,7 + xor edi,r14d shr r14d,10 - ror r15d,17 - add r12d,r13d - xor r14d,r15d + ror edi,17 + xor r12d,r13d + xor edi,r14d + add r12d,DWORD PTR[24+rsp] add r12d,DWORD PTR[52+rsp] mov r13d,r11d - add r12d,r14d + add r12d,edi mov r14d,edx ror r13d,14 - mov r15d,eax - mov DWORD PTR[52+rsp],r12d + mov edi,eax - ror r14d,9 xor r13d,r11d - xor r15d,ebx + ror r14d,9 + xor edi,ebx - ror r13d,5 - add r12d,ecx + mov DWORD PTR[52+rsp],r12d xor r14d,edx + and edi,r11d - add r12d,DWORD PTR[rdi*4+rbp] - and r15d,r11d - mov ecx,r8d + ror r13d,5 + add r12d,ecx + xor edi,ebx ror r14d,11 xor r13d,r11d - xor r15d,ebx + add r12d,edi - xor ecx,r9d + mov edi,edx + add r12d,DWORD PTR[rbp] xor r14d,edx - add r12d,r15d - mov r15d,r8d + xor edi,r8d ror r13d,6 - and ecx,edx - and r15d,r9d + mov ecx,r8d + and r15d,edi ror r14d,2 add r12d,r13d - add ecx,r15d + xor ecx,r15d add r10d,r12d add ecx,r12d - lea rdi,QWORD PTR[1+rdi] - add ecx,r14d + lea rbp,QWORD PTR[4+rbp] mov r13d,DWORD PTR[60+rsp] - mov r14d,DWORD PTR[48+rsp] - mov r12d,r13d - mov r15d,r14d + mov r15d,DWORD PTR[48+rsp] - ror r12d,11 - xor r12d,r13d - shr r13d,3 + mov r12d,r13d + ror r13d,11 + add ecx,r14d + mov r14d,r15d + ror r15d,2 - ror r12d,7 xor r13d,r12d - mov r12d,DWORD PTR[28+rsp] - - ror r15d,2 + shr r12d,3 + ror r13d,7 xor r15d,r14d shr r14d,10 ror r15d,17 - add r12d,r13d - xor r14d,r15d + xor r12d,r13d + xor r15d,r14d + add r12d,DWORD PTR[28+rsp] add r12d,DWORD PTR[56+rsp] mov r13d,r10d - add r12d,r14d + add r12d,r15d mov r14d,ecx ror r13d,14 mov r15d,r11d - mov DWORD PTR[56+rsp],r12d - ror r14d,9 xor r13d,r10d + ror r14d,9 xor r15d,eax - ror r13d,5 - add r12d,ebx + mov DWORD PTR[56+rsp],r12d xor r14d,ecx - - add r12d,DWORD PTR[rdi*4+rbp] and r15d,r10d - mov ebx,edx + + ror r13d,5 + add r12d,ebx + xor r15d,eax ror r14d,11 xor r13d,r10d - xor r15d,eax + add r12d,r15d - xor ebx,r8d + mov r15d,ecx + add r12d,DWORD PTR[rbp] xor r14d,ecx - add r12d,r15d - mov r15d,edx - ror r13d,6 - and ebx,ecx - and r15d,r8d + xor r15d,edx + ror r13d,6 + mov ebx,edx + + and edi,r15d + ror r14d,2 + add r12d,r13d + + xor ebx,edi + add r9d,r12d + add ebx,r12d + + lea rbp,QWORD PTR[4+rbp] + mov r13d,DWORD PTR[rsp] + mov edi,DWORD PTR[52+rsp] + + mov r12d,r13d + ror r13d,11 + add ebx,r14d + mov r14d,edi + ror edi,2 + + xor r13d,r12d + shr r12d,3 + ror r13d,7 + xor edi,r14d + shr r14d,10 + + ror edi,17 + xor r12d,r13d + xor edi,r14d + add r12d,DWORD PTR[32+rsp] + + add r12d,DWORD PTR[60+rsp] + mov r13d,r9d + add r12d,edi + mov r14d,ebx + ror r13d,14 + mov edi,r10d + + xor r13d,r9d + ror r14d,9 + xor edi,r11d + + mov DWORD PTR[60+rsp],r12d + xor r14d,ebx + and edi,r9d + + ror r13d,5 + add r12d,eax + xor edi,r11d + + ror r14d,11 + xor r13d,r9d + add r12d,edi + + mov edi,ebx + add r12d,DWORD PTR[rbp] + xor r14d,ebx + + xor edi,ecx + ror r13d,6 + mov eax,ecx + + and r15d,edi + ror r14d,2 + add r12d,r13d + + xor eax,r15d + add r8d,r12d + add eax,r12d + + lea rbp,QWORD PTR[20+rbp] + cmp BYTE PTR[3+rbp],0 + jnz $L$rounds_16_xx + + mov rdi,QWORD PTR[((64+0))+rsp] + add eax,r14d + lea rsi,QWORD PTR[64+rsi] + + add eax,DWORD PTR[rdi] + add ebx,DWORD PTR[4+rdi] + add ecx,DWORD PTR[8+rdi] + add edx,DWORD PTR[12+rdi] + add r8d,DWORD PTR[16+rdi] + add r9d,DWORD PTR[20+rdi] + add r10d,DWORD PTR[24+rdi] + add r11d,DWORD PTR[28+rdi] + + cmp rsi,QWORD PTR[((64+16))+rsp] + + mov DWORD PTR[rdi],eax + mov DWORD PTR[4+rdi],ebx + mov DWORD PTR[8+rdi],ecx + mov DWORD PTR[12+rdi],edx + mov DWORD PTR[16+rdi],r8d + mov DWORD PTR[20+rdi],r9d + mov DWORD PTR[24+rdi],r10d + mov DWORD PTR[28+rdi],r11d + jb $L$loop + + mov rsi,QWORD PTR[((64+24))+rsp] + mov r15,QWORD PTR[rsi] + mov r14,QWORD PTR[8+rsi] + mov r13,QWORD PTR[16+rsi] + mov r12,QWORD PTR[24+rsi] + mov rbp,QWORD PTR[32+rsi] + mov rbx,QWORD PTR[40+rsi] + lea rsp,QWORD PTR[48+rsi] +$L$epilogue:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_sha256_block_data_order:: +sha256_block_data_order ENDP +ALIGN 64 + +K256:: + DD 0428a2f98h,071374491h,0b5c0fbcfh,0e9b5dba5h + DD 0428a2f98h,071374491h,0b5c0fbcfh,0e9b5dba5h + DD 03956c25bh,059f111f1h,0923f82a4h,0ab1c5ed5h + DD 03956c25bh,059f111f1h,0923f82a4h,0ab1c5ed5h + DD 0d807aa98h,012835b01h,0243185beh,0550c7dc3h + DD 0d807aa98h,012835b01h,0243185beh,0550c7dc3h + DD 072be5d74h,080deb1feh,09bdc06a7h,0c19bf174h + DD 072be5d74h,080deb1feh,09bdc06a7h,0c19bf174h + DD 0e49b69c1h,0efbe4786h,00fc19dc6h,0240ca1cch + DD 0e49b69c1h,0efbe4786h,00fc19dc6h,0240ca1cch + DD 02de92c6fh,04a7484aah,05cb0a9dch,076f988dah + DD 02de92c6fh,04a7484aah,05cb0a9dch,076f988dah + DD 0983e5152h,0a831c66dh,0b00327c8h,0bf597fc7h + DD 0983e5152h,0a831c66dh,0b00327c8h,0bf597fc7h + DD 0c6e00bf3h,0d5a79147h,006ca6351h,014292967h + DD 0c6e00bf3h,0d5a79147h,006ca6351h,014292967h + DD 027b70a85h,02e1b2138h,04d2c6dfch,053380d13h + DD 027b70a85h,02e1b2138h,04d2c6dfch,053380d13h + DD 0650a7354h,0766a0abbh,081c2c92eh,092722c85h + DD 0650a7354h,0766a0abbh,081c2c92eh,092722c85h + DD 0a2bfe8a1h,0a81a664bh,0c24b8b70h,0c76c51a3h + DD 0a2bfe8a1h,0a81a664bh,0c24b8b70h,0c76c51a3h + DD 0d192e819h,0d6990624h,0f40e3585h,0106aa070h + DD 0d192e819h,0d6990624h,0f40e3585h,0106aa070h + DD 019a4c116h,01e376c08h,02748774ch,034b0bcb5h + DD 019a4c116h,01e376c08h,02748774ch,034b0bcb5h + DD 0391c0cb3h,04ed8aa4ah,05b9cca4fh,0682e6ff3h + DD 0391c0cb3h,04ed8aa4ah,05b9cca4fh,0682e6ff3h + DD 0748f82eeh,078a5636fh,084c87814h,08cc70208h + DD 0748f82eeh,078a5636fh,084c87814h,08cc70208h + DD 090befffah,0a4506cebh,0bef9a3f7h,0c67178f2h + DD 090befffah,0a4506cebh,0bef9a3f7h,0c67178f2h + + DD 000010203h,004050607h,008090a0bh,00c0d0e0fh + DD 000010203h,004050607h,008090a0bh,00c0d0e0fh + DD 003020100h,00b0a0908h,0ffffffffh,0ffffffffh + DD 003020100h,00b0a0908h,0ffffffffh,0ffffffffh + DD 0ffffffffh,0ffffffffh,003020100h,00b0a0908h + DD 0ffffffffh,0ffffffffh,003020100h,00b0a0908h +DB 83,72,65,50,53,54,32,98,108,111,99,107,32,116,114,97 +DB 110,115,102,111,114,109,32,102,111,114,32,120,56,54,95,54 +DB 52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121 +DB 32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46 +DB 111,114,103,62,0 + +ALIGN 64 +sha256_block_data_order_shaext PROC PRIVATE + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_sha256_block_data_order_shaext:: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + + +_shaext_shortcut:: + lea rsp,QWORD PTR[((-88))+rsp] + movaps XMMWORD PTR[(-8-80)+rax],xmm6 + movaps XMMWORD PTR[(-8-64)+rax],xmm7 + movaps XMMWORD PTR[(-8-48)+rax],xmm8 + movaps XMMWORD PTR[(-8-32)+rax],xmm9 + movaps XMMWORD PTR[(-8-16)+rax],xmm10 +$L$prologue_shaext:: + lea rcx,QWORD PTR[((K256+128))] + movdqu xmm1,XMMWORD PTR[rdi] + movdqu xmm2,XMMWORD PTR[16+rdi] + movdqa xmm7,XMMWORD PTR[((512-128))+rcx] + + pshufd xmm0,xmm1,01bh + pshufd xmm1,xmm1,1h + pshufd xmm2,xmm2,01bh + movdqa xmm8,xmm7 +DB 102,15,58,15,202,8 + punpcklqdq xmm2,xmm0 + jmp $L$oop_shaext + +ALIGN 16 +$L$oop_shaext:: + movdqu xmm3,XMMWORD PTR[rsi] + movdqu xmm4,XMMWORD PTR[16+rsi] + movdqu xmm5,XMMWORD PTR[32+rsi] +DB 102,15,56,0,223 + movdqu xmm6,XMMWORD PTR[48+rsi] + + movdqa xmm0,XMMWORD PTR[((0-128))+rcx] + paddd xmm0,xmm3 +DB 102,15,56,0,231 + movdqa xmm10,xmm2 +DB 15,56,203,209 + pshufd xmm0,xmm0,00eh + nop + movdqa xmm9,xmm1 +DB 15,56,203,202 + + movdqa xmm0,XMMWORD PTR[((32-128))+rcx] + paddd xmm0,xmm4 +DB 102,15,56,0,239 +DB 15,56,203,209 + pshufd xmm0,xmm0,00eh + lea rsi,QWORD PTR[64+rsi] +DB 15,56,204,220 +DB 15,56,203,202 + + movdqa xmm0,XMMWORD PTR[((64-128))+rcx] + paddd xmm0,xmm5 +DB 102,15,56,0,247 +DB 15,56,203,209 + pshufd xmm0,xmm0,00eh + movdqa xmm7,xmm6 +DB 102,15,58,15,253,4 + nop + paddd xmm3,xmm7 +DB 15,56,204,229 +DB 15,56,203,202 + + movdqa xmm0,XMMWORD PTR[((96-128))+rcx] + paddd xmm0,xmm6 +DB 15,56,205,222 +DB 15,56,203,209 + pshufd xmm0,xmm0,00eh + movdqa xmm7,xmm3 +DB 102,15,58,15,254,4 + nop + paddd xmm4,xmm7 +DB 15,56,204,238 +DB 15,56,203,202 + movdqa xmm0,XMMWORD PTR[((128-128))+rcx] + paddd xmm0,xmm3 +DB 15,56,205,227 +DB 15,56,203,209 + pshufd xmm0,xmm0,00eh + movdqa xmm7,xmm4 +DB 102,15,58,15,251,4 + nop + paddd xmm5,xmm7 +DB 15,56,204,243 +DB 15,56,203,202 + movdqa xmm0,XMMWORD PTR[((160-128))+rcx] + paddd xmm0,xmm4 +DB 15,56,205,236 +DB 15,56,203,209 + pshufd xmm0,xmm0,00eh + movdqa xmm7,xmm5 +DB 102,15,58,15,252,4 + nop + paddd xmm6,xmm7 +DB 15,56,204,220 +DB 15,56,203,202 + movdqa xmm0,XMMWORD PTR[((192-128))+rcx] + paddd xmm0,xmm5 +DB 15,56,205,245 +DB 15,56,203,209 + pshufd xmm0,xmm0,00eh + movdqa xmm7,xmm6 +DB 102,15,58,15,253,4 + nop + paddd xmm3,xmm7 +DB 15,56,204,229 +DB 15,56,203,202 + movdqa xmm0,XMMWORD PTR[((224-128))+rcx] + paddd xmm0,xmm6 +DB 15,56,205,222 +DB 15,56,203,209 + pshufd xmm0,xmm0,00eh + movdqa xmm7,xmm3 +DB 102,15,58,15,254,4 + nop + paddd xmm4,xmm7 +DB 15,56,204,238 +DB 15,56,203,202 + movdqa xmm0,XMMWORD PTR[((256-128))+rcx] + paddd xmm0,xmm3 +DB 15,56,205,227 +DB 15,56,203,209 + pshufd xmm0,xmm0,00eh + movdqa xmm7,xmm4 +DB 102,15,58,15,251,4 + nop + paddd xmm5,xmm7 +DB 15,56,204,243 +DB 15,56,203,202 + movdqa xmm0,XMMWORD PTR[((288-128))+rcx] + paddd xmm0,xmm4 +DB 15,56,205,236 +DB 15,56,203,209 + pshufd xmm0,xmm0,00eh + movdqa xmm7,xmm5 +DB 102,15,58,15,252,4 + nop + paddd xmm6,xmm7 +DB 15,56,204,220 +DB 15,56,203,202 + movdqa xmm0,XMMWORD PTR[((320-128))+rcx] + paddd xmm0,xmm5 +DB 15,56,205,245 +DB 15,56,203,209 + pshufd xmm0,xmm0,00eh + movdqa xmm7,xmm6 +DB 102,15,58,15,253,4 + nop + paddd xmm3,xmm7 +DB 15,56,204,229 +DB 15,56,203,202 + movdqa xmm0,XMMWORD PTR[((352-128))+rcx] + paddd xmm0,xmm6 +DB 15,56,205,222 +DB 15,56,203,209 + pshufd xmm0,xmm0,00eh + movdqa xmm7,xmm3 +DB 102,15,58,15,254,4 + nop + paddd xmm4,xmm7 +DB 15,56,204,238 +DB 15,56,203,202 + movdqa xmm0,XMMWORD PTR[((384-128))+rcx] + paddd xmm0,xmm3 +DB 15,56,205,227 +DB 15,56,203,209 + pshufd xmm0,xmm0,00eh + movdqa xmm7,xmm4 +DB 102,15,58,15,251,4 + nop + paddd xmm5,xmm7 +DB 15,56,204,243 +DB 15,56,203,202 + movdqa xmm0,XMMWORD PTR[((416-128))+rcx] + paddd xmm0,xmm4 +DB 15,56,205,236 +DB 15,56,203,209 + pshufd xmm0,xmm0,00eh + movdqa xmm7,xmm5 +DB 102,15,58,15,252,4 +DB 15,56,203,202 + paddd xmm6,xmm7 + + movdqa xmm0,XMMWORD PTR[((448-128))+rcx] + paddd xmm0,xmm5 +DB 15,56,203,209 + pshufd xmm0,xmm0,00eh +DB 15,56,205,245 + movdqa xmm7,xmm8 +DB 15,56,203,202 + + movdqa xmm0,XMMWORD PTR[((480-128))+rcx] + paddd xmm0,xmm6 + nop +DB 15,56,203,209 + pshufd xmm0,xmm0,00eh + dec rdx + nop +DB 15,56,203,202 + + paddd xmm2,xmm10 + paddd xmm1,xmm9 + jnz $L$oop_shaext + + pshufd xmm2,xmm2,1h + pshufd xmm7,xmm1,01bh + pshufd xmm1,xmm1,1h + punpckhqdq xmm1,xmm2 +DB 102,15,58,15,215,8 + + movdqu XMMWORD PTR[rdi],xmm1 + movdqu XMMWORD PTR[16+rdi],xmm2 + movaps xmm6,XMMWORD PTR[((-8-80))+rax] + movaps xmm7,XMMWORD PTR[((-8-64))+rax] + movaps xmm8,XMMWORD PTR[((-8-48))+rax] + movaps xmm9,XMMWORD PTR[((-8-32))+rax] + movaps xmm10,XMMWORD PTR[((-8-16))+rax] + mov rsp,rax +$L$epilogue_shaext:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_sha256_block_data_order_shaext:: +sha256_block_data_order_shaext ENDP + +ALIGN 64 +sha256_block_data_order_ssse3 PROC PRIVATE + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_sha256_block_data_order_ssse3:: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + + +$L$ssse3_shortcut:: + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + mov r11,rsp + shl rdx,4 + sub rsp,160 + lea rdx,QWORD PTR[rdx*4+rsi] + and rsp,-64 + mov QWORD PTR[((64+0))+rsp],rdi + mov QWORD PTR[((64+8))+rsp],rsi + mov QWORD PTR[((64+16))+rsp],rdx + mov QWORD PTR[((64+24))+rsp],r11 + movaps XMMWORD PTR[(64+32)+rsp],xmm6 + movaps XMMWORD PTR[(64+48)+rsp],xmm7 + movaps XMMWORD PTR[(64+64)+rsp],xmm8 + movaps XMMWORD PTR[(64+80)+rsp],xmm9 +$L$prologue_ssse3:: + + mov eax,DWORD PTR[rdi] + mov ebx,DWORD PTR[4+rdi] + mov ecx,DWORD PTR[8+rdi] + mov edx,DWORD PTR[12+rdi] + mov r8d,DWORD PTR[16+rdi] + mov r9d,DWORD PTR[20+rdi] + mov r10d,DWORD PTR[24+rdi] + mov r11d,DWORD PTR[28+rdi] + + + jmp $L$loop_ssse3 +ALIGN 16 +$L$loop_ssse3:: + movdqa xmm7,XMMWORD PTR[((K256+512))] + movdqu xmm0,XMMWORD PTR[rsi] + movdqu xmm1,XMMWORD PTR[16+rsi] + movdqu xmm2,XMMWORD PTR[32+rsi] +DB 102,15,56,0,199 + movdqu xmm3,XMMWORD PTR[48+rsi] + lea rbp,QWORD PTR[K256] +DB 102,15,56,0,207 + movdqa xmm4,XMMWORD PTR[rbp] + movdqa xmm5,XMMWORD PTR[32+rbp] +DB 102,15,56,0,215 + paddd xmm4,xmm0 + movdqa xmm6,XMMWORD PTR[64+rbp] +DB 102,15,56,0,223 + movdqa xmm7,XMMWORD PTR[96+rbp] + paddd xmm5,xmm1 + paddd xmm6,xmm2 + paddd xmm7,xmm3 + movdqa XMMWORD PTR[rsp],xmm4 + mov r14d,eax + movdqa XMMWORD PTR[16+rsp],xmm5 + mov edi,ebx + movdqa XMMWORD PTR[32+rsp],xmm6 + xor edi,ecx + movdqa XMMWORD PTR[48+rsp],xmm7 + mov r13d,r8d + jmp $L$ssse3_00_47 + +ALIGN 16 +$L$ssse3_00_47:: + sub rbp,-128 + ror r13d,14 + movdqa xmm4,xmm1 + mov eax,r14d + mov r12d,r9d + movdqa xmm7,xmm3 + ror r14d,9 + xor r13d,r8d + xor r12d,r10d + ror r13d,5 + xor r14d,eax +DB 102,15,58,15,224,4 + and r12d,r8d + xor r13d,r8d +DB 102,15,58,15,250,4 + add r11d,DWORD PTR[rsp] + mov r15d,eax + xor r12d,r10d + ror r14d,11 + movdqa xmm5,xmm4 + xor r15d,ebx + add r11d,r12d + movdqa xmm6,xmm4 + ror r13d,6 + and edi,r15d + psrld xmm4,3 + xor r14d,eax + add r11d,r13d + xor edi,ebx + paddd xmm0,xmm7 + ror r14d,2 + add edx,r11d + psrld xmm6,7 + add r11d,edi + mov r13d,edx + pshufd xmm7,xmm3,250 + add r14d,r11d + ror r13d,14 + pslld xmm5,14 + mov r11d,r14d + mov r12d,r8d + pxor xmm4,xmm6 + ror r14d,9 + xor r13d,edx + xor r12d,r9d + ror r13d,5 + psrld xmm6,11 + xor r14d,r11d + pxor xmm4,xmm5 + and r12d,edx + xor r13d,edx + pslld xmm5,11 + add r10d,DWORD PTR[4+rsp] + mov edi,r11d + pxor xmm4,xmm6 + xor r12d,r9d + ror r14d,11 + movdqa xmm6,xmm7 + xor edi,eax + add r10d,r12d + pxor xmm4,xmm5 + ror r13d,6 + and r15d,edi + xor r14d,r11d + psrld xmm7,10 + add r10d,r13d + xor r15d,eax + paddd xmm0,xmm4 + ror r14d,2 + add ecx,r10d + psrlq xmm6,17 + add r10d,r15d + mov r13d,ecx + add r14d,r10d + pxor xmm7,xmm6 + ror r13d,14 + mov r10d,r14d + mov r12d,edx + ror r14d,9 + psrlq xmm6,2 + xor r13d,ecx + xor r12d,r8d + pxor xmm7,xmm6 + ror r13d,5 + xor r14d,r10d + and r12d,ecx + pshufd xmm7,xmm7,128 + xor r13d,ecx + add r9d,DWORD PTR[8+rsp] + mov r15d,r10d + psrldq xmm7,8 + xor r12d,r8d + ror r14d,11 + xor r15d,r11d + add r9d,r12d + ror r13d,6 + paddd xmm0,xmm7 + and edi,r15d + xor r14d,r10d + add r9d,r13d + pshufd xmm7,xmm0,80 + xor edi,r11d + ror r14d,2 + add ebx,r9d + movdqa xmm6,xmm7 + add r9d,edi + mov r13d,ebx + psrld xmm7,10 + add r14d,r9d + ror r13d,14 + psrlq xmm6,17 + mov r9d,r14d + mov r12d,ecx + pxor xmm7,xmm6 + ror r14d,9 + xor r13d,ebx + xor r12d,edx + ror r13d,5 + xor r14d,r9d + psrlq xmm6,2 + and r12d,ebx + xor r13d,ebx + add r8d,DWORD PTR[12+rsp] + pxor xmm7,xmm6 + mov edi,r9d + xor r12d,edx + ror r14d,11 + pshufd xmm7,xmm7,8 + xor edi,r10d + add r8d,r12d + movdqa xmm6,XMMWORD PTR[rbp] + ror r13d,6 + and r15d,edi + pslldq xmm7,8 + xor r14d,r9d + add r8d,r13d + xor r15d,r10d + paddd xmm0,xmm7 + ror r14d,2 + add eax,r8d + add r8d,r15d + paddd xmm6,xmm0 + mov r13d,eax + add r14d,r8d + movdqa XMMWORD PTR[rsp],xmm6 + ror r13d,14 + movdqa xmm4,xmm2 + mov r8d,r14d + mov r12d,ebx + movdqa xmm7,xmm0 + ror r14d,9 + xor r13d,eax + xor r12d,ecx + ror r13d,5 + xor r14d,r8d +DB 102,15,58,15,225,4 + and r12d,eax + xor r13d,eax +DB 102,15,58,15,251,4 + add edx,DWORD PTR[16+rsp] + mov r15d,r8d + xor r12d,ecx + ror r14d,11 + movdqa xmm5,xmm4 + xor r15d,r9d + add edx,r12d + movdqa xmm6,xmm4 + ror r13d,6 + and edi,r15d + psrld xmm4,3 + xor r14d,r8d + add edx,r13d + xor edi,r9d + paddd xmm1,xmm7 + ror r14d,2 + add r11d,edx + psrld xmm6,7 + add edx,edi + mov r13d,r11d + pshufd xmm7,xmm0,250 + add r14d,edx + ror r13d,14 + pslld xmm5,14 + mov edx,r14d + mov r12d,eax + pxor xmm4,xmm6 + ror r14d,9 + xor r13d,r11d + xor r12d,ebx + ror r13d,5 + psrld xmm6,11 + xor r14d,edx + pxor xmm4,xmm5 + and r12d,r11d + xor r13d,r11d + pslld xmm5,11 + add ecx,DWORD PTR[20+rsp] + mov edi,edx + pxor xmm4,xmm6 + xor r12d,ebx + ror r14d,11 + movdqa xmm6,xmm7 + xor edi,r8d + add ecx,r12d + pxor xmm4,xmm5 + ror r13d,6 + and r15d,edi + xor r14d,edx + psrld xmm7,10 + add ecx,r13d + xor r15d,r8d + paddd xmm1,xmm4 + ror r14d,2 + add r10d,ecx + psrlq xmm6,17 + add ecx,r15d + mov r13d,r10d + add r14d,ecx + pxor xmm7,xmm6 + ror r13d,14 + mov ecx,r14d + mov r12d,r11d + ror r14d,9 + psrlq xmm6,2 + xor r13d,r10d + xor r12d,eax + pxor xmm7,xmm6 + ror r13d,5 + xor r14d,ecx + and r12d,r10d + pshufd xmm7,xmm7,128 + xor r13d,r10d + add ebx,DWORD PTR[24+rsp] + mov r15d,ecx + psrldq xmm7,8 + xor r12d,eax + ror r14d,11 + xor r15d,edx + add ebx,r12d + ror r13d,6 + paddd xmm1,xmm7 + and edi,r15d + xor r14d,ecx + add ebx,r13d + pshufd xmm7,xmm1,80 + xor edi,edx + ror r14d,2 + add r9d,ebx + movdqa xmm6,xmm7 + add ebx,edi + mov r13d,r9d + psrld xmm7,10 + add r14d,ebx + ror r13d,14 + psrlq xmm6,17 + mov ebx,r14d + mov r12d,r10d + pxor xmm7,xmm6 + ror r14d,9 + xor r13d,r9d + xor r12d,r11d + ror r13d,5 + xor r14d,ebx + psrlq xmm6,2 + and r12d,r9d + xor r13d,r9d + add eax,DWORD PTR[28+rsp] + pxor xmm7,xmm6 + mov edi,ebx + xor r12d,r11d + ror r14d,11 + pshufd xmm7,xmm7,8 + xor edi,ecx + add eax,r12d + movdqa xmm6,XMMWORD PTR[32+rbp] + ror r13d,6 + and r15d,edi + pslldq xmm7,8 + xor r14d,ebx + add eax,r13d + xor r15d,ecx + paddd xmm1,xmm7 + ror r14d,2 + add r8d,eax + add eax,r15d + paddd xmm6,xmm1 + mov r13d,r8d + add r14d,eax + movdqa XMMWORD PTR[16+rsp],xmm6 + ror r13d,14 + movdqa xmm4,xmm3 + mov eax,r14d + mov r12d,r9d + movdqa xmm7,xmm1 + ror r14d,9 + xor r13d,r8d + xor r12d,r10d + ror r13d,5 + xor r14d,eax +DB 102,15,58,15,226,4 + and r12d,r8d + xor r13d,r8d +DB 102,15,58,15,248,4 + add r11d,DWORD PTR[32+rsp] + mov r15d,eax + xor r12d,r10d + ror r14d,11 + movdqa xmm5,xmm4 + xor r15d,ebx + add r11d,r12d + movdqa xmm6,xmm4 + ror r13d,6 + and edi,r15d + psrld xmm4,3 + xor r14d,eax + add r11d,r13d + xor edi,ebx + paddd xmm2,xmm7 + ror r14d,2 + add edx,r11d + psrld xmm6,7 + add r11d,edi + mov r13d,edx + pshufd xmm7,xmm1,250 + add r14d,r11d + ror r13d,14 + pslld xmm5,14 + mov r11d,r14d + mov r12d,r8d + pxor xmm4,xmm6 + ror r14d,9 + xor r13d,edx + xor r12d,r9d + ror r13d,5 + psrld xmm6,11 + xor r14d,r11d + pxor xmm4,xmm5 + and r12d,edx + xor r13d,edx + pslld xmm5,11 + add r10d,DWORD PTR[36+rsp] + mov edi,r11d + pxor xmm4,xmm6 + xor r12d,r9d + ror r14d,11 + movdqa xmm6,xmm7 + xor edi,eax + add r10d,r12d + pxor xmm4,xmm5 + ror r13d,6 + and r15d,edi + xor r14d,r11d + psrld xmm7,10 + add r10d,r13d + xor r15d,eax + paddd xmm2,xmm4 + ror r14d,2 + add ecx,r10d + psrlq xmm6,17 + add r10d,r15d + mov r13d,ecx + add r14d,r10d + pxor xmm7,xmm6 + ror r13d,14 + mov r10d,r14d + mov r12d,edx + ror r14d,9 + psrlq xmm6,2 + xor r13d,ecx + xor r12d,r8d + pxor xmm7,xmm6 + ror r13d,5 + xor r14d,r10d + and r12d,ecx + pshufd xmm7,xmm7,128 + xor r13d,ecx + add r9d,DWORD PTR[40+rsp] + mov r15d,r10d + psrldq xmm7,8 + xor r12d,r8d + ror r14d,11 + xor r15d,r11d + add r9d,r12d + ror r13d,6 + paddd xmm2,xmm7 + and edi,r15d + xor r14d,r10d + add r9d,r13d + pshufd xmm7,xmm2,80 + xor edi,r11d + ror r14d,2 + add ebx,r9d + movdqa xmm6,xmm7 + add r9d,edi + mov r13d,ebx + psrld xmm7,10 + add r14d,r9d + ror r13d,14 + psrlq xmm6,17 + mov r9d,r14d + mov r12d,ecx + pxor xmm7,xmm6 + ror r14d,9 + xor r13d,ebx + xor r12d,edx + ror r13d,5 + xor r14d,r9d + psrlq xmm6,2 + and r12d,ebx + xor r13d,ebx + add r8d,DWORD PTR[44+rsp] + pxor xmm7,xmm6 + mov edi,r9d + xor r12d,edx + ror r14d,11 + pshufd xmm7,xmm7,8 + xor edi,r10d + add r8d,r12d + movdqa xmm6,XMMWORD PTR[64+rbp] + ror r13d,6 + and r15d,edi + pslldq xmm7,8 + xor r14d,r9d + add r8d,r13d + xor r15d,r10d + paddd xmm2,xmm7 + ror r14d,2 + add eax,r8d + add r8d,r15d + paddd xmm6,xmm2 + mov r13d,eax + add r14d,r8d + movdqa XMMWORD PTR[32+rsp],xmm6 + ror r13d,14 + movdqa xmm4,xmm0 + mov r8d,r14d + mov r12d,ebx + movdqa xmm7,xmm2 + ror r14d,9 + xor r13d,eax + xor r12d,ecx + ror r13d,5 + xor r14d,r8d +DB 102,15,58,15,227,4 + and r12d,eax + xor r13d,eax +DB 102,15,58,15,249,4 + add edx,DWORD PTR[48+rsp] + mov r15d,r8d + xor r12d,ecx + ror r14d,11 + movdqa xmm5,xmm4 + xor r15d,r9d + add edx,r12d + movdqa xmm6,xmm4 + ror r13d,6 + and edi,r15d + psrld xmm4,3 + xor r14d,r8d + add edx,r13d + xor edi,r9d + paddd xmm3,xmm7 + ror r14d,2 + add r11d,edx + psrld xmm6,7 + add edx,edi + mov r13d,r11d + pshufd xmm7,xmm2,250 + add r14d,edx + ror r13d,14 + pslld xmm5,14 + mov edx,r14d + mov r12d,eax + pxor xmm4,xmm6 + ror r14d,9 + xor r13d,r11d + xor r12d,ebx + ror r13d,5 + psrld xmm6,11 + xor r14d,edx + pxor xmm4,xmm5 + and r12d,r11d + xor r13d,r11d + pslld xmm5,11 + add ecx,DWORD PTR[52+rsp] + mov edi,edx + pxor xmm4,xmm6 + xor r12d,ebx + ror r14d,11 + movdqa xmm6,xmm7 + xor edi,r8d + add ecx,r12d + pxor xmm4,xmm5 + ror r13d,6 + and r15d,edi + xor r14d,edx + psrld xmm7,10 + add ecx,r13d + xor r15d,r8d + paddd xmm3,xmm4 + ror r14d,2 + add r10d,ecx + psrlq xmm6,17 + add ecx,r15d + mov r13d,r10d + add r14d,ecx + pxor xmm7,xmm6 + ror r13d,14 + mov ecx,r14d + mov r12d,r11d + ror r14d,9 + psrlq xmm6,2 + xor r13d,r10d + xor r12d,eax + pxor xmm7,xmm6 + ror r13d,5 + xor r14d,ecx + and r12d,r10d + pshufd xmm7,xmm7,128 + xor r13d,r10d + add ebx,DWORD PTR[56+rsp] + mov r15d,ecx + psrldq xmm7,8 + xor r12d,eax + ror r14d,11 + xor r15d,edx + add ebx,r12d + ror r13d,6 + paddd xmm3,xmm7 + and edi,r15d + xor r14d,ecx + add ebx,r13d + pshufd xmm7,xmm3,80 + xor edi,edx + ror r14d,2 + add r9d,ebx + movdqa xmm6,xmm7 + add ebx,edi + mov r13d,r9d + psrld xmm7,10 + add r14d,ebx + ror r13d,14 + psrlq xmm6,17 + mov ebx,r14d + mov r12d,r10d + pxor xmm7,xmm6 + ror r14d,9 + xor r13d,r9d + xor r12d,r11d + ror r13d,5 + xor r14d,ebx + psrlq xmm6,2 + and r12d,r9d + xor r13d,r9d + add eax,DWORD PTR[60+rsp] + pxor xmm7,xmm6 + mov edi,ebx + xor r12d,r11d + ror r14d,11 + pshufd xmm7,xmm7,8 + xor edi,ecx + add eax,r12d + movdqa xmm6,XMMWORD PTR[96+rbp] + ror r13d,6 + and r15d,edi + pslldq xmm7,8 + xor r14d,ebx + add eax,r13d + xor r15d,ecx + paddd xmm3,xmm7 + ror r14d,2 + add r8d,eax + add eax,r15d + paddd xmm6,xmm3 + mov r13d,r8d + add r14d,eax + movdqa XMMWORD PTR[48+rsp],xmm6 + cmp BYTE PTR[131+rbp],0 + jne $L$ssse3_00_47 + ror r13d,14 + mov eax,r14d + mov r12d,r9d + ror r14d,9 + xor r13d,r8d + xor r12d,r10d + ror r13d,5 + xor r14d,eax + and r12d,r8d + xor r13d,r8d + add r11d,DWORD PTR[rsp] + mov r15d,eax + xor r12d,r10d + ror r14d,11 + xor r15d,ebx + add r11d,r12d + ror r13d,6 + and edi,r15d + xor r14d,eax + add r11d,r13d + xor edi,ebx + ror r14d,2 + add edx,r11d + add r11d,edi + mov r13d,edx + add r14d,r11d + ror r13d,14 + mov r11d,r14d + mov r12d,r8d + ror r14d,9 + xor r13d,edx + xor r12d,r9d + ror r13d,5 + xor r14d,r11d + and r12d,edx + xor r13d,edx + add r10d,DWORD PTR[4+rsp] + mov edi,r11d + xor r12d,r9d + ror r14d,11 + xor edi,eax + add r10d,r12d + ror r13d,6 + and r15d,edi + xor r14d,r11d + add r10d,r13d + xor r15d,eax + ror r14d,2 + add ecx,r10d + add r10d,r15d + mov r13d,ecx + add r14d,r10d + ror r13d,14 + mov r10d,r14d + mov r12d,edx + ror r14d,9 + xor r13d,ecx + xor r12d,r8d + ror r13d,5 + xor r14d,r10d + and r12d,ecx + xor r13d,ecx + add r9d,DWORD PTR[8+rsp] + mov r15d,r10d + xor r12d,r8d + ror r14d,11 + xor r15d,r11d + add r9d,r12d + ror r13d,6 + and edi,r15d + xor r14d,r10d + add r9d,r13d + xor edi,r11d + ror r14d,2 + add ebx,r9d + add r9d,edi + mov r13d,ebx + add r14d,r9d + ror r13d,14 + mov r9d,r14d + mov r12d,ecx + ror r14d,9 + xor r13d,ebx + xor r12d,edx + ror r13d,5 + xor r14d,r9d + and r12d,ebx + xor r13d,ebx + add r8d,DWORD PTR[12+rsp] + mov edi,r9d + xor r12d,edx + ror r14d,11 + xor edi,r10d + add r8d,r12d + ror r13d,6 + and r15d,edi + xor r14d,r9d + add r8d,r13d + xor r15d,r10d + ror r14d,2 + add eax,r8d + add r8d,r15d + mov r13d,eax + add r14d,r8d + ror r13d,14 + mov r8d,r14d + mov r12d,ebx + ror r14d,9 + xor r13d,eax + xor r12d,ecx + ror r13d,5 + xor r14d,r8d + and r12d,eax + xor r13d,eax + add edx,DWORD PTR[16+rsp] + mov r15d,r8d + xor r12d,ecx + ror r14d,11 + xor r15d,r9d + add edx,r12d + ror r13d,6 + and edi,r15d + xor r14d,r8d + add edx,r13d + xor edi,r9d + ror r14d,2 + add r11d,edx + add edx,edi + mov r13d,r11d + add r14d,edx + ror r13d,14 + mov edx,r14d + mov r12d,eax + ror r14d,9 + xor r13d,r11d + xor r12d,ebx + ror r13d,5 + xor r14d,edx + and r12d,r11d + xor r13d,r11d + add ecx,DWORD PTR[20+rsp] + mov edi,edx + xor r12d,ebx + ror r14d,11 + xor edi,r8d + add ecx,r12d + ror r13d,6 + and r15d,edi + xor r14d,edx + add ecx,r13d + xor r15d,r8d + ror r14d,2 + add r10d,ecx + add ecx,r15d + mov r13d,r10d + add r14d,ecx + ror r13d,14 + mov ecx,r14d + mov r12d,r11d + ror r14d,9 + xor r13d,r10d + xor r12d,eax + ror r13d,5 + xor r14d,ecx + and r12d,r10d + xor r13d,r10d + add ebx,DWORD PTR[24+rsp] + mov r15d,ecx + xor r12d,eax + ror r14d,11 + xor r15d,edx + add ebx,r12d + ror r13d,6 + and edi,r15d + xor r14d,ecx + add ebx,r13d + xor edi,edx + ror r14d,2 + add r9d,ebx + add ebx,edi + mov r13d,r9d + add r14d,ebx + ror r13d,14 + mov ebx,r14d + mov r12d,r10d + ror r14d,9 + xor r13d,r9d + xor r12d,r11d + ror r13d,5 + xor r14d,ebx + and r12d,r9d + xor r13d,r9d + add eax,DWORD PTR[28+rsp] + mov edi,ebx + xor r12d,r11d + ror r14d,11 + xor edi,ecx + add eax,r12d + ror r13d,6 + and r15d,edi + xor r14d,ebx + add eax,r13d + xor r15d,ecx + ror r14d,2 + add r8d,eax + add eax,r15d + mov r13d,r8d + add r14d,eax + ror r13d,14 + mov eax,r14d + mov r12d,r9d + ror r14d,9 + xor r13d,r8d + xor r12d,r10d + ror r13d,5 + xor r14d,eax + and r12d,r8d + xor r13d,r8d + add r11d,DWORD PTR[32+rsp] + mov r15d,eax + xor r12d,r10d + ror r14d,11 + xor r15d,ebx + add r11d,r12d + ror r13d,6 + and edi,r15d + xor r14d,eax + add r11d,r13d + xor edi,ebx + ror r14d,2 + add edx,r11d + add r11d,edi + mov r13d,edx + add r14d,r11d + ror r13d,14 + mov r11d,r14d + mov r12d,r8d + ror r14d,9 + xor r13d,edx + xor r12d,r9d + ror r13d,5 + xor r14d,r11d + and r12d,edx + xor r13d,edx + add r10d,DWORD PTR[36+rsp] + mov edi,r11d + xor r12d,r9d + ror r14d,11 + xor edi,eax + add r10d,r12d + ror r13d,6 + and r15d,edi + xor r14d,r11d + add r10d,r13d + xor r15d,eax + ror r14d,2 + add ecx,r10d + add r10d,r15d + mov r13d,ecx + add r14d,r10d + ror r13d,14 + mov r10d,r14d + mov r12d,edx + ror r14d,9 + xor r13d,ecx + xor r12d,r8d + ror r13d,5 + xor r14d,r10d + and r12d,ecx + xor r13d,ecx + add r9d,DWORD PTR[40+rsp] + mov r15d,r10d + xor r12d,r8d + ror r14d,11 + xor r15d,r11d + add r9d,r12d + ror r13d,6 + and edi,r15d + xor r14d,r10d + add r9d,r13d + xor edi,r11d + ror r14d,2 + add ebx,r9d + add r9d,edi + mov r13d,ebx + add r14d,r9d + ror r13d,14 + mov r9d,r14d + mov r12d,ecx + ror r14d,9 + xor r13d,ebx + xor r12d,edx + ror r13d,5 + xor r14d,r9d + and r12d,ebx + xor r13d,ebx + add r8d,DWORD PTR[44+rsp] + mov edi,r9d + xor r12d,edx + ror r14d,11 + xor edi,r10d + add r8d,r12d + ror r13d,6 + and r15d,edi + xor r14d,r9d + add r8d,r13d + xor r15d,r10d + ror r14d,2 + add eax,r8d + add r8d,r15d + mov r13d,eax + add r14d,r8d + ror r13d,14 + mov r8d,r14d + mov r12d,ebx + ror r14d,9 + xor r13d,eax + xor r12d,ecx + ror r13d,5 + xor r14d,r8d + and r12d,eax + xor r13d,eax + add edx,DWORD PTR[48+rsp] + mov r15d,r8d + xor r12d,ecx + ror r14d,11 + xor r15d,r9d + add edx,r12d + ror r13d,6 + and edi,r15d + xor r14d,r8d + add edx,r13d + xor edi,r9d + ror r14d,2 + add r11d,edx + add edx,edi + mov r13d,r11d + add r14d,edx + ror r13d,14 + mov edx,r14d + mov r12d,eax + ror r14d,9 + xor r13d,r11d + xor r12d,ebx + ror r13d,5 + xor r14d,edx + and r12d,r11d + xor r13d,r11d + add ecx,DWORD PTR[52+rsp] + mov edi,edx + xor r12d,ebx + ror r14d,11 + xor edi,r8d + add ecx,r12d + ror r13d,6 + and r15d,edi + xor r14d,edx + add ecx,r13d + xor r15d,r8d + ror r14d,2 + add r10d,ecx + add ecx,r15d + mov r13d,r10d + add r14d,ecx + ror r13d,14 + mov ecx,r14d + mov r12d,r11d + ror r14d,9 + xor r13d,r10d + xor r12d,eax + ror r13d,5 + xor r14d,ecx + and r12d,r10d + xor r13d,r10d + add ebx,DWORD PTR[56+rsp] + mov r15d,ecx + xor r12d,eax + ror r14d,11 + xor r15d,edx + add ebx,r12d + ror r13d,6 + and edi,r15d + xor r14d,ecx + add ebx,r13d + xor edi,edx + ror r14d,2 + add r9d,ebx + add ebx,edi + mov r13d,r9d + add r14d,ebx + ror r13d,14 + mov ebx,r14d + mov r12d,r10d + ror r14d,9 + xor r13d,r9d + xor r12d,r11d + ror r13d,5 + xor r14d,ebx + and r12d,r9d + xor r13d,r9d + add eax,DWORD PTR[60+rsp] + mov edi,ebx + xor r12d,r11d + ror r14d,11 + xor edi,ecx + add eax,r12d + ror r13d,6 + and r15d,edi + xor r14d,ebx + add eax,r13d + xor r15d,ecx + ror r14d,2 + add r8d,eax + add eax,r15d + mov r13d,r8d + add r14d,eax + mov rdi,QWORD PTR[((64+0))+rsp] + mov eax,r14d + + add eax,DWORD PTR[rdi] + lea rsi,QWORD PTR[64+rsi] + add ebx,DWORD PTR[4+rdi] + add ecx,DWORD PTR[8+rdi] + add edx,DWORD PTR[12+rdi] + add r8d,DWORD PTR[16+rdi] + add r9d,DWORD PTR[20+rdi] + add r10d,DWORD PTR[24+rdi] + add r11d,DWORD PTR[28+rdi] + + cmp rsi,QWORD PTR[((64+16))+rsp] + + mov DWORD PTR[rdi],eax + mov DWORD PTR[4+rdi],ebx + mov DWORD PTR[8+rdi],ecx + mov DWORD PTR[12+rdi],edx + mov DWORD PTR[16+rdi],r8d + mov DWORD PTR[20+rdi],r9d + mov DWORD PTR[24+rdi],r10d + mov DWORD PTR[28+rdi],r11d + jb $L$loop_ssse3 + + mov rsi,QWORD PTR[((64+24))+rsp] + movaps xmm6,XMMWORD PTR[((64+32))+rsp] + movaps xmm7,XMMWORD PTR[((64+48))+rsp] + movaps xmm8,XMMWORD PTR[((64+64))+rsp] + movaps xmm9,XMMWORD PTR[((64+80))+rsp] + mov r15,QWORD PTR[rsi] + mov r14,QWORD PTR[8+rsi] + mov r13,QWORD PTR[16+rsi] + mov r12,QWORD PTR[24+rsi] + mov rbp,QWORD PTR[32+rsi] + mov rbx,QWORD PTR[40+rsi] + lea rsp,QWORD PTR[48+rsi] +$L$epilogue_ssse3:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_sha256_block_data_order_ssse3:: +sha256_block_data_order_ssse3 ENDP + +ALIGN 64 +sha256_block_data_order_avx PROC PRIVATE + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_sha256_block_data_order_avx:: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + + +$L$avx_shortcut:: + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + mov r11,rsp + shl rdx,4 + sub rsp,160 + lea rdx,QWORD PTR[rdx*4+rsi] + and rsp,-64 + mov QWORD PTR[((64+0))+rsp],rdi + mov QWORD PTR[((64+8))+rsp],rsi + mov QWORD PTR[((64+16))+rsp],rdx + mov QWORD PTR[((64+24))+rsp],r11 + movaps XMMWORD PTR[(64+32)+rsp],xmm6 + movaps XMMWORD PTR[(64+48)+rsp],xmm7 + movaps XMMWORD PTR[(64+64)+rsp],xmm8 + movaps XMMWORD PTR[(64+80)+rsp],xmm9 +$L$prologue_avx:: + + vzeroupper + mov eax,DWORD PTR[rdi] + mov ebx,DWORD PTR[4+rdi] + mov ecx,DWORD PTR[8+rdi] + mov edx,DWORD PTR[12+rdi] + mov r8d,DWORD PTR[16+rdi] + mov r9d,DWORD PTR[20+rdi] + mov r10d,DWORD PTR[24+rdi] + mov r11d,DWORD PTR[28+rdi] + vmovdqa xmm8,XMMWORD PTR[((K256+512+32))] + vmovdqa xmm9,XMMWORD PTR[((K256+512+64))] + jmp $L$loop_avx +ALIGN 16 +$L$loop_avx:: + vmovdqa xmm7,XMMWORD PTR[((K256+512))] + vmovdqu xmm0,XMMWORD PTR[rsi] + vmovdqu xmm1,XMMWORD PTR[16+rsi] + vmovdqu xmm2,XMMWORD PTR[32+rsi] + vmovdqu xmm3,XMMWORD PTR[48+rsi] + vpshufb xmm0,xmm0,xmm7 + lea rbp,QWORD PTR[K256] + vpshufb xmm1,xmm1,xmm7 + vpshufb xmm2,xmm2,xmm7 + vpaddd xmm4,xmm0,XMMWORD PTR[rbp] + vpshufb xmm3,xmm3,xmm7 + vpaddd xmm5,xmm1,XMMWORD PTR[32+rbp] + vpaddd xmm6,xmm2,XMMWORD PTR[64+rbp] + vpaddd xmm7,xmm3,XMMWORD PTR[96+rbp] + vmovdqa XMMWORD PTR[rsp],xmm4 + mov r14d,eax + vmovdqa XMMWORD PTR[16+rsp],xmm5 + mov edi,ebx + vmovdqa XMMWORD PTR[32+rsp],xmm6 + xor edi,ecx + vmovdqa XMMWORD PTR[48+rsp],xmm7 + mov r13d,r8d + jmp $L$avx_00_47 + +ALIGN 16 +$L$avx_00_47:: + sub rbp,-128 + vpalignr xmm4,xmm1,xmm0,4 + shrd r13d,r13d,14 + mov eax,r14d + mov r12d,r9d + vpalignr xmm7,xmm3,xmm2,4 + shrd r14d,r14d,9 + xor r13d,r8d + xor r12d,r10d + vpsrld xmm6,xmm4,7 + shrd r13d,r13d,5 + xor r14d,eax + and r12d,r8d + vpaddd xmm0,xmm0,xmm7 + xor r13d,r8d + add r11d,DWORD PTR[rsp] + mov r15d,eax + vpsrld xmm7,xmm4,3 + xor r12d,r10d + shrd r14d,r14d,11 + xor r15d,ebx + vpslld xmm5,xmm4,14 + add r11d,r12d + shrd r13d,r13d,6 + and edi,r15d + vpxor xmm4,xmm7,xmm6 + xor r14d,eax + add r11d,r13d + xor edi,ebx + vpshufd xmm7,xmm3,250 + shrd r14d,r14d,2 + add edx,r11d + add r11d,edi + vpsrld xmm6,xmm6,11 + mov r13d,edx + add r14d,r11d + shrd r13d,r13d,14 + vpxor xmm4,xmm4,xmm5 + mov r11d,r14d + mov r12d,r8d + shrd r14d,r14d,9 + vpslld xmm5,xmm5,11 + xor r13d,edx + xor r12d,r9d + shrd r13d,r13d,5 + vpxor xmm4,xmm4,xmm6 + xor r14d,r11d + and r12d,edx + xor r13d,edx + vpsrld xmm6,xmm7,10 + add r10d,DWORD PTR[4+rsp] + mov edi,r11d + xor r12d,r9d + vpxor xmm4,xmm4,xmm5 + shrd r14d,r14d,11 + xor edi,eax + add r10d,r12d + vpsrlq xmm7,xmm7,17 + shrd r13d,r13d,6 + and r15d,edi + xor r14d,r11d + vpaddd xmm0,xmm0,xmm4 + add r10d,r13d + xor r15d,eax + shrd r14d,r14d,2 + vpxor xmm6,xmm6,xmm7 + add ecx,r10d + add r10d,r15d + mov r13d,ecx + vpsrlq xmm7,xmm7,2 + add r14d,r10d + shrd r13d,r13d,14 + mov r10d,r14d + vpxor xmm6,xmm6,xmm7 + mov r12d,edx + shrd r14d,r14d,9 + xor r13d,ecx + vpshufb xmm6,xmm6,xmm8 + xor r12d,r8d + shrd r13d,r13d,5 + xor r14d,r10d + vpaddd xmm0,xmm0,xmm6 + and r12d,ecx + xor r13d,ecx + add r9d,DWORD PTR[8+rsp] + vpshufd xmm7,xmm0,80 + mov r15d,r10d + xor r12d,r8d + shrd r14d,r14d,11 + vpsrld xmm6,xmm7,10 + xor r15d,r11d + add r9d,r12d + shrd r13d,r13d,6 + vpsrlq xmm7,xmm7,17 + and edi,r15d + xor r14d,r10d + add r9d,r13d + vpxor xmm6,xmm6,xmm7 + xor edi,r11d + shrd r14d,r14d,2 + add ebx,r9d + vpsrlq xmm7,xmm7,2 + add r9d,edi + mov r13d,ebx + add r14d,r9d + vpxor xmm6,xmm6,xmm7 + shrd r13d,r13d,14 + mov r9d,r14d + mov r12d,ecx + vpshufb xmm6,xmm6,xmm9 + shrd r14d,r14d,9 + xor r13d,ebx + xor r12d,edx + vpaddd xmm0,xmm0,xmm6 + shrd r13d,r13d,5 + xor r14d,r9d + and r12d,ebx + vpaddd xmm6,xmm0,XMMWORD PTR[rbp] + xor r13d,ebx + add r8d,DWORD PTR[12+rsp] + mov edi,r9d + xor r12d,edx + shrd r14d,r14d,11 + xor edi,r10d + add r8d,r12d + shrd r13d,r13d,6 + and r15d,edi + xor r14d,r9d + add r8d,r13d + xor r15d,r10d + shrd r14d,r14d,2 + add eax,r8d + add r8d,r15d + mov r13d,eax + add r14d,r8d + vmovdqa XMMWORD PTR[rsp],xmm6 + vpalignr xmm4,xmm2,xmm1,4 + shrd r13d,r13d,14 + mov r8d,r14d + mov r12d,ebx + vpalignr xmm7,xmm0,xmm3,4 + shrd r14d,r14d,9 + xor r13d,eax + xor r12d,ecx + vpsrld xmm6,xmm4,7 + shrd r13d,r13d,5 + xor r14d,r8d + and r12d,eax + vpaddd xmm1,xmm1,xmm7 + xor r13d,eax + add edx,DWORD PTR[16+rsp] + mov r15d,r8d + vpsrld xmm7,xmm4,3 + xor r12d,ecx + shrd r14d,r14d,11 + xor r15d,r9d + vpslld xmm5,xmm4,14 + add edx,r12d + shrd r13d,r13d,6 + and edi,r15d + vpxor xmm4,xmm7,xmm6 + xor r14d,r8d + add edx,r13d + xor edi,r9d + vpshufd xmm7,xmm0,250 + shrd r14d,r14d,2 + add r11d,edx + add edx,edi + vpsrld xmm6,xmm6,11 + mov r13d,r11d + add r14d,edx + shrd r13d,r13d,14 + vpxor xmm4,xmm4,xmm5 + mov edx,r14d + mov r12d,eax + shrd r14d,r14d,9 + vpslld xmm5,xmm5,11 + xor r13d,r11d + xor r12d,ebx + shrd r13d,r13d,5 + vpxor xmm4,xmm4,xmm6 + xor r14d,edx + and r12d,r11d + xor r13d,r11d + vpsrld xmm6,xmm7,10 + add ecx,DWORD PTR[20+rsp] + mov edi,edx + xor r12d,ebx + vpxor xmm4,xmm4,xmm5 + shrd r14d,r14d,11 + xor edi,r8d + add ecx,r12d + vpsrlq xmm7,xmm7,17 + shrd r13d,r13d,6 + and r15d,edi + xor r14d,edx + vpaddd xmm1,xmm1,xmm4 + add ecx,r13d + xor r15d,r8d + shrd r14d,r14d,2 + vpxor xmm6,xmm6,xmm7 + add r10d,ecx + add ecx,r15d + mov r13d,r10d + vpsrlq xmm7,xmm7,2 + add r14d,ecx + shrd r13d,r13d,14 + mov ecx,r14d + vpxor xmm6,xmm6,xmm7 + mov r12d,r11d + shrd r14d,r14d,9 + xor r13d,r10d + vpshufb xmm6,xmm6,xmm8 + xor r12d,eax + shrd r13d,r13d,5 + xor r14d,ecx + vpaddd xmm1,xmm1,xmm6 + and r12d,r10d + xor r13d,r10d + add ebx,DWORD PTR[24+rsp] + vpshufd xmm7,xmm1,80 + mov r15d,ecx + xor r12d,eax + shrd r14d,r14d,11 + vpsrld xmm6,xmm7,10 + xor r15d,edx + add ebx,r12d + shrd r13d,r13d,6 + vpsrlq xmm7,xmm7,17 + and edi,r15d + xor r14d,ecx + add ebx,r13d + vpxor xmm6,xmm6,xmm7 + xor edi,edx + shrd r14d,r14d,2 + add r9d,ebx + vpsrlq xmm7,xmm7,2 + add ebx,edi + mov r13d,r9d + add r14d,ebx + vpxor xmm6,xmm6,xmm7 + shrd r13d,r13d,14 + mov ebx,r14d + mov r12d,r10d + vpshufb xmm6,xmm6,xmm9 + shrd r14d,r14d,9 + xor r13d,r9d + xor r12d,r11d + vpaddd xmm1,xmm1,xmm6 + shrd r13d,r13d,5 + xor r14d,ebx + and r12d,r9d + vpaddd xmm6,xmm1,XMMWORD PTR[32+rbp] + xor r13d,r9d + add eax,DWORD PTR[28+rsp] + mov edi,ebx + xor r12d,r11d + shrd r14d,r14d,11 + xor edi,ecx + add eax,r12d + shrd r13d,r13d,6 + and r15d,edi + xor r14d,ebx + add eax,r13d + xor r15d,ecx + shrd r14d,r14d,2 + add r8d,eax + add eax,r15d + mov r13d,r8d + add r14d,eax + vmovdqa XMMWORD PTR[16+rsp],xmm6 + vpalignr xmm4,xmm3,xmm2,4 + shrd r13d,r13d,14 + mov eax,r14d + mov r12d,r9d + vpalignr xmm7,xmm1,xmm0,4 + shrd r14d,r14d,9 + xor r13d,r8d + xor r12d,r10d + vpsrld xmm6,xmm4,7 + shrd r13d,r13d,5 + xor r14d,eax + and r12d,r8d + vpaddd xmm2,xmm2,xmm7 + xor r13d,r8d + add r11d,DWORD PTR[32+rsp] + mov r15d,eax + vpsrld xmm7,xmm4,3 + xor r12d,r10d + shrd r14d,r14d,11 + xor r15d,ebx + vpslld xmm5,xmm4,14 + add r11d,r12d + shrd r13d,r13d,6 + and edi,r15d + vpxor xmm4,xmm7,xmm6 + xor r14d,eax + add r11d,r13d + xor edi,ebx + vpshufd xmm7,xmm1,250 + shrd r14d,r14d,2 + add edx,r11d + add r11d,edi + vpsrld xmm6,xmm6,11 + mov r13d,edx + add r14d,r11d + shrd r13d,r13d,14 + vpxor xmm4,xmm4,xmm5 + mov r11d,r14d + mov r12d,r8d + shrd r14d,r14d,9 + vpslld xmm5,xmm5,11 + xor r13d,edx + xor r12d,r9d + shrd r13d,r13d,5 + vpxor xmm4,xmm4,xmm6 + xor r14d,r11d + and r12d,edx + xor r13d,edx + vpsrld xmm6,xmm7,10 + add r10d,DWORD PTR[36+rsp] + mov edi,r11d + xor r12d,r9d + vpxor xmm4,xmm4,xmm5 + shrd r14d,r14d,11 + xor edi,eax + add r10d,r12d + vpsrlq xmm7,xmm7,17 + shrd r13d,r13d,6 + and r15d,edi + xor r14d,r11d + vpaddd xmm2,xmm2,xmm4 + add r10d,r13d + xor r15d,eax + shrd r14d,r14d,2 + vpxor xmm6,xmm6,xmm7 + add ecx,r10d + add r10d,r15d + mov r13d,ecx + vpsrlq xmm7,xmm7,2 + add r14d,r10d + shrd r13d,r13d,14 + mov r10d,r14d + vpxor xmm6,xmm6,xmm7 + mov r12d,edx + shrd r14d,r14d,9 + xor r13d,ecx + vpshufb xmm6,xmm6,xmm8 + xor r12d,r8d + shrd r13d,r13d,5 + xor r14d,r10d + vpaddd xmm2,xmm2,xmm6 + and r12d,ecx + xor r13d,ecx + add r9d,DWORD PTR[40+rsp] + vpshufd xmm7,xmm2,80 + mov r15d,r10d + xor r12d,r8d + shrd r14d,r14d,11 + vpsrld xmm6,xmm7,10 + xor r15d,r11d + add r9d,r12d + shrd r13d,r13d,6 + vpsrlq xmm7,xmm7,17 + and edi,r15d + xor r14d,r10d + add r9d,r13d + vpxor xmm6,xmm6,xmm7 + xor edi,r11d + shrd r14d,r14d,2 + add ebx,r9d + vpsrlq xmm7,xmm7,2 + add r9d,edi + mov r13d,ebx + add r14d,r9d + vpxor xmm6,xmm6,xmm7 + shrd r13d,r13d,14 + mov r9d,r14d + mov r12d,ecx + vpshufb xmm6,xmm6,xmm9 + shrd r14d,r14d,9 + xor r13d,ebx + xor r12d,edx + vpaddd xmm2,xmm2,xmm6 + shrd r13d,r13d,5 + xor r14d,r9d + and r12d,ebx + vpaddd xmm6,xmm2,XMMWORD PTR[64+rbp] + xor r13d,ebx + add r8d,DWORD PTR[44+rsp] + mov edi,r9d + xor r12d,edx + shrd r14d,r14d,11 + xor edi,r10d + add r8d,r12d + shrd r13d,r13d,6 + and r15d,edi + xor r14d,r9d + add r8d,r13d + xor r15d,r10d + shrd r14d,r14d,2 + add eax,r8d + add r8d,r15d + mov r13d,eax + add r14d,r8d + vmovdqa XMMWORD PTR[32+rsp],xmm6 + vpalignr xmm4,xmm0,xmm3,4 + shrd r13d,r13d,14 + mov r8d,r14d + mov r12d,ebx + vpalignr xmm7,xmm2,xmm1,4 + shrd r14d,r14d,9 + xor r13d,eax + xor r12d,ecx + vpsrld xmm6,xmm4,7 + shrd r13d,r13d,5 + xor r14d,r8d + and r12d,eax + vpaddd xmm3,xmm3,xmm7 + xor r13d,eax + add edx,DWORD PTR[48+rsp] + mov r15d,r8d + vpsrld xmm7,xmm4,3 + xor r12d,ecx + shrd r14d,r14d,11 + xor r15d,r9d + vpslld xmm5,xmm4,14 + add edx,r12d + shrd r13d,r13d,6 + and edi,r15d + vpxor xmm4,xmm7,xmm6 + xor r14d,r8d + add edx,r13d + xor edi,r9d + vpshufd xmm7,xmm2,250 + shrd r14d,r14d,2 + add r11d,edx + add edx,edi + vpsrld xmm6,xmm6,11 + mov r13d,r11d + add r14d,edx + shrd r13d,r13d,14 + vpxor xmm4,xmm4,xmm5 + mov edx,r14d + mov r12d,eax + shrd r14d,r14d,9 + vpslld xmm5,xmm5,11 + xor r13d,r11d + xor r12d,ebx + shrd r13d,r13d,5 + vpxor xmm4,xmm4,xmm6 + xor r14d,edx + and r12d,r11d + xor r13d,r11d + vpsrld xmm6,xmm7,10 + add ecx,DWORD PTR[52+rsp] + mov edi,edx + xor r12d,ebx + vpxor xmm4,xmm4,xmm5 + shrd r14d,r14d,11 + xor edi,r8d + add ecx,r12d + vpsrlq xmm7,xmm7,17 + shrd r13d,r13d,6 + and r15d,edi + xor r14d,edx + vpaddd xmm3,xmm3,xmm4 + add ecx,r13d + xor r15d,r8d + shrd r14d,r14d,2 + vpxor xmm6,xmm6,xmm7 + add r10d,ecx + add ecx,r15d + mov r13d,r10d + vpsrlq xmm7,xmm7,2 + add r14d,ecx + shrd r13d,r13d,14 + mov ecx,r14d + vpxor xmm6,xmm6,xmm7 + mov r12d,r11d + shrd r14d,r14d,9 + xor r13d,r10d + vpshufb xmm6,xmm6,xmm8 + xor r12d,eax + shrd r13d,r13d,5 + xor r14d,ecx + vpaddd xmm3,xmm3,xmm6 + and r12d,r10d + xor r13d,r10d + add ebx,DWORD PTR[56+rsp] + vpshufd xmm7,xmm3,80 + mov r15d,ecx + xor r12d,eax + shrd r14d,r14d,11 + vpsrld xmm6,xmm7,10 + xor r15d,edx + add ebx,r12d + shrd r13d,r13d,6 + vpsrlq xmm7,xmm7,17 + and edi,r15d + xor r14d,ecx + add ebx,r13d + vpxor xmm6,xmm6,xmm7 + xor edi,edx + shrd r14d,r14d,2 + add r9d,ebx + vpsrlq xmm7,xmm7,2 + add ebx,edi + mov r13d,r9d + add r14d,ebx + vpxor xmm6,xmm6,xmm7 + shrd r13d,r13d,14 + mov ebx,r14d + mov r12d,r10d + vpshufb xmm6,xmm6,xmm9 + shrd r14d,r14d,9 + xor r13d,r9d + xor r12d,r11d + vpaddd xmm3,xmm3,xmm6 + shrd r13d,r13d,5 + xor r14d,ebx + and r12d,r9d + vpaddd xmm6,xmm3,XMMWORD PTR[96+rbp] + xor r13d,r9d + add eax,DWORD PTR[60+rsp] + mov edi,ebx + xor r12d,r11d + shrd r14d,r14d,11 + xor edi,ecx + add eax,r12d + shrd r13d,r13d,6 + and r15d,edi + xor r14d,ebx + add eax,r13d + xor r15d,ecx + shrd r14d,r14d,2 + add r8d,eax + add eax,r15d + mov r13d,r8d + add r14d,eax + vmovdqa XMMWORD PTR[48+rsp],xmm6 + cmp BYTE PTR[131+rbp],0 + jne $L$avx_00_47 + shrd r13d,r13d,14 + mov eax,r14d + mov r12d,r9d + shrd r14d,r14d,9 + xor r13d,r8d + xor r12d,r10d + shrd r13d,r13d,5 + xor r14d,eax + and r12d,r8d + xor r13d,r8d + add r11d,DWORD PTR[rsp] + mov r15d,eax + xor r12d,r10d + shrd r14d,r14d,11 + xor r15d,ebx + add r11d,r12d + shrd r13d,r13d,6 + and edi,r15d + xor r14d,eax + add r11d,r13d + xor edi,ebx + shrd r14d,r14d,2 + add edx,r11d + add r11d,edi + mov r13d,edx + add r14d,r11d + shrd r13d,r13d,14 + mov r11d,r14d + mov r12d,r8d + shrd r14d,r14d,9 + xor r13d,edx + xor r12d,r9d + shrd r13d,r13d,5 + xor r14d,r11d + and r12d,edx + xor r13d,edx + add r10d,DWORD PTR[4+rsp] + mov edi,r11d + xor r12d,r9d + shrd r14d,r14d,11 + xor edi,eax + add r10d,r12d + shrd r13d,r13d,6 + and r15d,edi + xor r14d,r11d + add r10d,r13d + xor r15d,eax + shrd r14d,r14d,2 + add ecx,r10d + add r10d,r15d + mov r13d,ecx + add r14d,r10d + shrd r13d,r13d,14 + mov r10d,r14d + mov r12d,edx + shrd r14d,r14d,9 + xor r13d,ecx + xor r12d,r8d + shrd r13d,r13d,5 + xor r14d,r10d + and r12d,ecx + xor r13d,ecx + add r9d,DWORD PTR[8+rsp] + mov r15d,r10d + xor r12d,r8d + shrd r14d,r14d,11 + xor r15d,r11d + add r9d,r12d + shrd r13d,r13d,6 + and edi,r15d + xor r14d,r10d + add r9d,r13d + xor edi,r11d + shrd r14d,r14d,2 + add ebx,r9d + add r9d,edi + mov r13d,ebx + add r14d,r9d + shrd r13d,r13d,14 + mov r9d,r14d + mov r12d,ecx + shrd r14d,r14d,9 + xor r13d,ebx + xor r12d,edx + shrd r13d,r13d,5 + xor r14d,r9d + and r12d,ebx + xor r13d,ebx + add r8d,DWORD PTR[12+rsp] + mov edi,r9d + xor r12d,edx + shrd r14d,r14d,11 + xor edi,r10d + add r8d,r12d + shrd r13d,r13d,6 + and r15d,edi + xor r14d,r9d + add r8d,r13d + xor r15d,r10d + shrd r14d,r14d,2 + add eax,r8d + add r8d,r15d + mov r13d,eax + add r14d,r8d + shrd r13d,r13d,14 + mov r8d,r14d + mov r12d,ebx + shrd r14d,r14d,9 + xor r13d,eax + xor r12d,ecx + shrd r13d,r13d,5 + xor r14d,r8d + and r12d,eax + xor r13d,eax + add edx,DWORD PTR[16+rsp] + mov r15d,r8d + xor r12d,ecx + shrd r14d,r14d,11 + xor r15d,r9d + add edx,r12d + shrd r13d,r13d,6 + and edi,r15d + xor r14d,r8d + add edx,r13d + xor edi,r9d + shrd r14d,r14d,2 + add r11d,edx + add edx,edi + mov r13d,r11d + add r14d,edx + shrd r13d,r13d,14 + mov edx,r14d + mov r12d,eax + shrd r14d,r14d,9 + xor r13d,r11d + xor r12d,ebx + shrd r13d,r13d,5 + xor r14d,edx + and r12d,r11d + xor r13d,r11d + add ecx,DWORD PTR[20+rsp] + mov edi,edx + xor r12d,ebx + shrd r14d,r14d,11 + xor edi,r8d + add ecx,r12d + shrd r13d,r13d,6 + and r15d,edi + xor r14d,edx + add ecx,r13d + xor r15d,r8d + shrd r14d,r14d,2 + add r10d,ecx + add ecx,r15d + mov r13d,r10d + add r14d,ecx + shrd r13d,r13d,14 + mov ecx,r14d + mov r12d,r11d + shrd r14d,r14d,9 + xor r13d,r10d + xor r12d,eax + shrd r13d,r13d,5 + xor r14d,ecx + and r12d,r10d + xor r13d,r10d + add ebx,DWORD PTR[24+rsp] + mov r15d,ecx + xor r12d,eax + shrd r14d,r14d,11 + xor r15d,edx + add ebx,r12d + shrd r13d,r13d,6 + and edi,r15d + xor r14d,ecx + add ebx,r13d + xor edi,edx + shrd r14d,r14d,2 + add r9d,ebx + add ebx,edi + mov r13d,r9d + add r14d,ebx + shrd r13d,r13d,14 + mov ebx,r14d + mov r12d,r10d + shrd r14d,r14d,9 + xor r13d,r9d + xor r12d,r11d + shrd r13d,r13d,5 + xor r14d,ebx + and r12d,r9d + xor r13d,r9d + add eax,DWORD PTR[28+rsp] + mov edi,ebx + xor r12d,r11d + shrd r14d,r14d,11 + xor edi,ecx + add eax,r12d + shrd r13d,r13d,6 + and r15d,edi + xor r14d,ebx + add eax,r13d + xor r15d,ecx + shrd r14d,r14d,2 + add r8d,eax + add eax,r15d + mov r13d,r8d + add r14d,eax + shrd r13d,r13d,14 + mov eax,r14d + mov r12d,r9d + shrd r14d,r14d,9 + xor r13d,r8d + xor r12d,r10d + shrd r13d,r13d,5 + xor r14d,eax + and r12d,r8d + xor r13d,r8d + add r11d,DWORD PTR[32+rsp] + mov r15d,eax + xor r12d,r10d + shrd r14d,r14d,11 + xor r15d,ebx + add r11d,r12d + shrd r13d,r13d,6 + and edi,r15d + xor r14d,eax + add r11d,r13d + xor edi,ebx + shrd r14d,r14d,2 + add edx,r11d + add r11d,edi + mov r13d,edx + add r14d,r11d + shrd r13d,r13d,14 + mov r11d,r14d + mov r12d,r8d + shrd r14d,r14d,9 + xor r13d,edx + xor r12d,r9d + shrd r13d,r13d,5 + xor r14d,r11d + and r12d,edx + xor r13d,edx + add r10d,DWORD PTR[36+rsp] + mov edi,r11d + xor r12d,r9d + shrd r14d,r14d,11 + xor edi,eax + add r10d,r12d + shrd r13d,r13d,6 + and r15d,edi + xor r14d,r11d + add r10d,r13d + xor r15d,eax + shrd r14d,r14d,2 + add ecx,r10d + add r10d,r15d + mov r13d,ecx + add r14d,r10d + shrd r13d,r13d,14 + mov r10d,r14d + mov r12d,edx + shrd r14d,r14d,9 + xor r13d,ecx + xor r12d,r8d + shrd r13d,r13d,5 + xor r14d,r10d + and r12d,ecx + xor r13d,ecx + add r9d,DWORD PTR[40+rsp] + mov r15d,r10d + xor r12d,r8d + shrd r14d,r14d,11 + xor r15d,r11d + add r9d,r12d + shrd r13d,r13d,6 + and edi,r15d + xor r14d,r10d + add r9d,r13d + xor edi,r11d + shrd r14d,r14d,2 + add ebx,r9d + add r9d,edi + mov r13d,ebx + add r14d,r9d + shrd r13d,r13d,14 + mov r9d,r14d + mov r12d,ecx + shrd r14d,r14d,9 + xor r13d,ebx + xor r12d,edx + shrd r13d,r13d,5 + xor r14d,r9d + and r12d,ebx + xor r13d,ebx + add r8d,DWORD PTR[44+rsp] + mov edi,r9d + xor r12d,edx + shrd r14d,r14d,11 + xor edi,r10d + add r8d,r12d + shrd r13d,r13d,6 + and r15d,edi + xor r14d,r9d + add r8d,r13d + xor r15d,r10d + shrd r14d,r14d,2 + add eax,r8d + add r8d,r15d + mov r13d,eax + add r14d,r8d + shrd r13d,r13d,14 + mov r8d,r14d + mov r12d,ebx + shrd r14d,r14d,9 + xor r13d,eax + xor r12d,ecx + shrd r13d,r13d,5 + xor r14d,r8d + and r12d,eax + xor r13d,eax + add edx,DWORD PTR[48+rsp] + mov r15d,r8d + xor r12d,ecx + shrd r14d,r14d,11 + xor r15d,r9d + add edx,r12d + shrd r13d,r13d,6 + and edi,r15d + xor r14d,r8d + add edx,r13d + xor edi,r9d + shrd r14d,r14d,2 + add r11d,edx + add edx,edi + mov r13d,r11d + add r14d,edx + shrd r13d,r13d,14 + mov edx,r14d + mov r12d,eax + shrd r14d,r14d,9 + xor r13d,r11d + xor r12d,ebx + shrd r13d,r13d,5 + xor r14d,edx + and r12d,r11d + xor r13d,r11d + add ecx,DWORD PTR[52+rsp] + mov edi,edx + xor r12d,ebx + shrd r14d,r14d,11 + xor edi,r8d + add ecx,r12d + shrd r13d,r13d,6 + and r15d,edi + xor r14d,edx + add ecx,r13d + xor r15d,r8d + shrd r14d,r14d,2 + add r10d,ecx + add ecx,r15d + mov r13d,r10d + add r14d,ecx + shrd r13d,r13d,14 + mov ecx,r14d + mov r12d,r11d + shrd r14d,r14d,9 + xor r13d,r10d + xor r12d,eax + shrd r13d,r13d,5 + xor r14d,ecx + and r12d,r10d + xor r13d,r10d + add ebx,DWORD PTR[56+rsp] + mov r15d,ecx + xor r12d,eax + shrd r14d,r14d,11 + xor r15d,edx + add ebx,r12d + shrd r13d,r13d,6 + and edi,r15d + xor r14d,ecx + add ebx,r13d + xor edi,edx + shrd r14d,r14d,2 + add r9d,ebx + add ebx,edi + mov r13d,r9d + add r14d,ebx + shrd r13d,r13d,14 + mov ebx,r14d + mov r12d,r10d + shrd r14d,r14d,9 + xor r13d,r9d + xor r12d,r11d + shrd r13d,r13d,5 + xor r14d,ebx + and r12d,r9d + xor r13d,r9d + add eax,DWORD PTR[60+rsp] + mov edi,ebx + xor r12d,r11d + shrd r14d,r14d,11 + xor edi,ecx + add eax,r12d + shrd r13d,r13d,6 + and r15d,edi + xor r14d,ebx + add eax,r13d + xor r15d,ecx + shrd r14d,r14d,2 + add r8d,eax + add eax,r15d + mov r13d,r8d + add r14d,eax + mov rdi,QWORD PTR[((64+0))+rsp] + mov eax,r14d + + add eax,DWORD PTR[rdi] + lea rsi,QWORD PTR[64+rsi] + add ebx,DWORD PTR[4+rdi] + add ecx,DWORD PTR[8+rdi] + add edx,DWORD PTR[12+rdi] + add r8d,DWORD PTR[16+rdi] + add r9d,DWORD PTR[20+rdi] + add r10d,DWORD PTR[24+rdi] + add r11d,DWORD PTR[28+rdi] - ror r14d,2 - add r12d,r13d - add ebx,r15d + cmp rsi,QWORD PTR[((64+16))+rsp] - add r9d,r12d - add ebx,r12d - lea rdi,QWORD PTR[1+rdi] - add ebx,r14d + mov DWORD PTR[rdi],eax + mov DWORD PTR[4+rdi],ebx + mov DWORD PTR[8+rdi],ecx + mov DWORD PTR[12+rdi],edx + mov DWORD PTR[16+rdi],r8d + mov DWORD PTR[20+rdi],r9d + mov DWORD PTR[24+rdi],r10d + mov DWORD PTR[28+rdi],r11d + jb $L$loop_avx - mov r13d,DWORD PTR[rsp] - mov r14d,DWORD PTR[52+rsp] - mov r12d,r13d - mov r15d,r14d + mov rsi,QWORD PTR[((64+24))+rsp] + vzeroupper + movaps xmm6,XMMWORD PTR[((64+32))+rsp] + movaps xmm7,XMMWORD PTR[((64+48))+rsp] + movaps xmm8,XMMWORD PTR[((64+64))+rsp] + movaps xmm9,XMMWORD PTR[((64+80))+rsp] + mov r15,QWORD PTR[rsi] + mov r14,QWORD PTR[8+rsi] + mov r13,QWORD PTR[16+rsi] + mov r12,QWORD PTR[24+rsi] + mov rbp,QWORD PTR[32+rsi] + mov rbx,QWORD PTR[40+rsi] + lea rsp,QWORD PTR[48+rsi] +$L$epilogue_avx:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_sha256_block_data_order_avx:: +sha256_block_data_order_avx ENDP - ror r12d,11 - xor r12d,r13d - shr r13d,3 +ALIGN 64 +sha256_block_data_order_avx2 PROC PRIVATE + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_sha256_block_data_order_avx2:: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 - ror r12d,7 - xor r13d,r12d - mov r12d,DWORD PTR[32+rsp] - ror r15d,2 - xor r15d,r14d - shr r14d,10 +$L$avx2_shortcut:: + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + mov r11,rsp + sub rsp,608 + shl rdx,4 + and rsp,-256*4 + lea rdx,QWORD PTR[rdx*4+rsi] + add rsp,448 + mov QWORD PTR[((64+0))+rsp],rdi + mov QWORD PTR[((64+8))+rsp],rsi + mov QWORD PTR[((64+16))+rsp],rdx + mov QWORD PTR[((64+24))+rsp],r11 + movaps XMMWORD PTR[(64+32)+rsp],xmm6 + movaps XMMWORD PTR[(64+48)+rsp],xmm7 + movaps XMMWORD PTR[(64+64)+rsp],xmm8 + movaps XMMWORD PTR[(64+80)+rsp],xmm9 +$L$prologue_avx2:: + + vzeroupper + sub rsi,-16*4 + mov eax,DWORD PTR[rdi] + mov r12,rsi + mov ebx,DWORD PTR[4+rdi] + cmp rsi,rdx + mov ecx,DWORD PTR[8+rdi] + cmove r12,rsp + mov edx,DWORD PTR[12+rdi] + mov r8d,DWORD PTR[16+rdi] + mov r9d,DWORD PTR[20+rdi] + mov r10d,DWORD PTR[24+rdi] + mov r11d,DWORD PTR[28+rdi] + vmovdqa ymm8,YMMWORD PTR[((K256+512+32))] + vmovdqa ymm9,YMMWORD PTR[((K256+512+64))] + jmp $L$oop_avx2 +ALIGN 16 +$L$oop_avx2:: + vmovdqa ymm7,YMMWORD PTR[((K256+512))] + vmovdqu xmm0,XMMWORD PTR[((-64+0))+rsi] + vmovdqu xmm1,XMMWORD PTR[((-64+16))+rsi] + vmovdqu xmm2,XMMWORD PTR[((-64+32))+rsi] + vmovdqu xmm3,XMMWORD PTR[((-64+48))+rsi] + + vinserti128 ymm0,ymm0,XMMWORD PTR[r12],1 + vinserti128 ymm1,ymm1,XMMWORD PTR[16+r12],1 + vpshufb ymm0,ymm0,ymm7 + vinserti128 ymm2,ymm2,XMMWORD PTR[32+r12],1 + vpshufb ymm1,ymm1,ymm7 + vinserti128 ymm3,ymm3,XMMWORD PTR[48+r12],1 - ror r15d,17 - add r12d,r13d - xor r14d,r15d + lea rbp,QWORD PTR[K256] + vpshufb ymm2,ymm2,ymm7 + vpaddd ymm4,ymm0,YMMWORD PTR[rbp] + vpshufb ymm3,ymm3,ymm7 + vpaddd ymm5,ymm1,YMMWORD PTR[32+rbp] + vpaddd ymm6,ymm2,YMMWORD PTR[64+rbp] + vpaddd ymm7,ymm3,YMMWORD PTR[96+rbp] + vmovdqa YMMWORD PTR[rsp],ymm4 + xor r14d,r14d + vmovdqa YMMWORD PTR[32+rsp],ymm5 + lea rsp,QWORD PTR[((-64))+rsp] + mov edi,ebx + vmovdqa YMMWORD PTR[rsp],ymm6 + xor edi,ecx + vmovdqa YMMWORD PTR[32+rsp],ymm7 + mov r12d,r9d + sub rbp,-16*2*4 + jmp $L$avx2_00_47 - add r12d,DWORD PTR[60+rsp] - mov r13d,r9d - add r12d,r14d - mov r14d,ebx - ror r13d,14 +ALIGN 16 +$L$avx2_00_47:: + lea rsp,QWORD PTR[((-64))+rsp] + vpalignr ymm4,ymm1,ymm0,4 + add r11d,DWORD PTR[((0+128))+rsp] + and r12d,r8d + rorx r13d,r8d,25 + vpalignr ymm7,ymm3,ymm2,4 + rorx r15d,r8d,11 + lea eax,DWORD PTR[r14*1+rax] + lea r11d,DWORD PTR[r12*1+r11] + vpsrld ymm6,ymm4,7 + andn r12d,r8d,r10d + xor r13d,r15d + rorx r14d,r8d,6 + vpaddd ymm0,ymm0,ymm7 + lea r11d,DWORD PTR[r12*1+r11] + xor r13d,r14d + mov r15d,eax + vpsrld ymm7,ymm4,3 + rorx r12d,eax,22 + lea r11d,DWORD PTR[r13*1+r11] + xor r15d,ebx + vpslld ymm5,ymm4,14 + rorx r14d,eax,13 + rorx r13d,eax,2 + lea edx,DWORD PTR[r11*1+rdx] + vpxor ymm4,ymm7,ymm6 + and edi,r15d + xor r14d,r12d + xor edi,ebx + vpshufd ymm7,ymm3,250 + xor r14d,r13d + lea r11d,DWORD PTR[rdi*1+r11] + mov r12d,r8d + vpsrld ymm6,ymm6,11 + add r10d,DWORD PTR[((4+128))+rsp] + and r12d,edx + rorx r13d,edx,25 + vpxor ymm4,ymm4,ymm5 + rorx edi,edx,11 + lea r11d,DWORD PTR[r14*1+r11] + lea r10d,DWORD PTR[r12*1+r10] + vpslld ymm5,ymm5,11 + andn r12d,edx,r9d + xor r13d,edi + rorx r14d,edx,6 + vpxor ymm4,ymm4,ymm6 + lea r10d,DWORD PTR[r12*1+r10] + xor r13d,r14d + mov edi,r11d + vpsrld ymm6,ymm7,10 + rorx r12d,r11d,22 + lea r10d,DWORD PTR[r13*1+r10] + xor edi,eax + vpxor ymm4,ymm4,ymm5 + rorx r14d,r11d,13 + rorx r13d,r11d,2 + lea ecx,DWORD PTR[r10*1+rcx] + vpsrlq ymm7,ymm7,17 + and r15d,edi + xor r14d,r12d + xor r15d,eax + vpaddd ymm0,ymm0,ymm4 + xor r14d,r13d + lea r10d,DWORD PTR[r15*1+r10] + mov r12d,edx + vpxor ymm6,ymm6,ymm7 + add r9d,DWORD PTR[((8+128))+rsp] + and r12d,ecx + rorx r13d,ecx,25 + vpsrlq ymm7,ymm7,2 + rorx r15d,ecx,11 + lea r10d,DWORD PTR[r14*1+r10] + lea r9d,DWORD PTR[r12*1+r9] + vpxor ymm6,ymm6,ymm7 + andn r12d,ecx,r8d + xor r13d,r15d + rorx r14d,ecx,6 + vpshufb ymm6,ymm6,ymm8 + lea r9d,DWORD PTR[r12*1+r9] + xor r13d,r14d mov r15d,r10d - mov DWORD PTR[60+rsp],r12d - - ror r14d,9 - xor r13d,r9d + vpaddd ymm0,ymm0,ymm6 + rorx r12d,r10d,22 + lea r9d,DWORD PTR[r13*1+r9] xor r15d,r11d - - ror r13d,5 - add r12d,eax - xor r14d,ebx - - add r12d,DWORD PTR[rdi*4+rbp] - and r15d,r9d - mov eax,ecx - - ror r14d,11 - xor r13d,r9d + vpshufd ymm7,ymm0,80 + rorx r14d,r10d,13 + rorx r13d,r10d,2 + lea ebx,DWORD PTR[r9*1+rbx] + vpsrld ymm6,ymm7,10 + and edi,r15d + xor r14d,r12d + xor edi,r11d + vpsrlq ymm7,ymm7,17 + xor r14d,r13d + lea r9d,DWORD PTR[rdi*1+r9] + mov r12d,ecx + vpxor ymm6,ymm6,ymm7 + add r8d,DWORD PTR[((12+128))+rsp] + and r12d,ebx + rorx r13d,ebx,25 + vpsrlq ymm7,ymm7,2 + rorx edi,ebx,11 + lea r9d,DWORD PTR[r14*1+r9] + lea r8d,DWORD PTR[r12*1+r8] + vpxor ymm6,ymm6,ymm7 + andn r12d,ebx,edx + xor r13d,edi + rorx r14d,ebx,6 + vpshufb ymm6,ymm6,ymm9 + lea r8d,DWORD PTR[r12*1+r8] + xor r13d,r14d + mov edi,r9d + vpaddd ymm0,ymm0,ymm6 + rorx r12d,r9d,22 + lea r8d,DWORD PTR[r13*1+r8] + xor edi,r10d + vpaddd ymm6,ymm0,YMMWORD PTR[rbp] + rorx r14d,r9d,13 + rorx r13d,r9d,2 + lea eax,DWORD PTR[r8*1+rax] + and r15d,edi + xor r14d,r12d + xor r15d,r10d + xor r14d,r13d + lea r8d,DWORD PTR[r15*1+r8] + mov r12d,ebx + vmovdqa YMMWORD PTR[rsp],ymm6 + vpalignr ymm4,ymm2,ymm1,4 + add edx,DWORD PTR[((32+128))+rsp] + and r12d,eax + rorx r13d,eax,25 + vpalignr ymm7,ymm0,ymm3,4 + rorx r15d,eax,11 + lea r8d,DWORD PTR[r14*1+r8] + lea edx,DWORD PTR[r12*1+rdx] + vpsrld ymm6,ymm4,7 + andn r12d,eax,ecx + xor r13d,r15d + rorx r14d,eax,6 + vpaddd ymm1,ymm1,ymm7 + lea edx,DWORD PTR[r12*1+rdx] + xor r13d,r14d + mov r15d,r8d + vpsrld ymm7,ymm4,3 + rorx r12d,r8d,22 + lea edx,DWORD PTR[r13*1+rdx] + xor r15d,r9d + vpslld ymm5,ymm4,14 + rorx r14d,r8d,13 + rorx r13d,r8d,2 + lea r11d,DWORD PTR[rdx*1+r11] + vpxor ymm4,ymm7,ymm6 + and edi,r15d + xor r14d,r12d + xor edi,r9d + vpshufd ymm7,ymm0,250 + xor r14d,r13d + lea edx,DWORD PTR[rdi*1+rdx] + mov r12d,eax + vpsrld ymm6,ymm6,11 + add ecx,DWORD PTR[((36+128))+rsp] + and r12d,r11d + rorx r13d,r11d,25 + vpxor ymm4,ymm4,ymm5 + rorx edi,r11d,11 + lea edx,DWORD PTR[r14*1+rdx] + lea ecx,DWORD PTR[r12*1+rcx] + vpslld ymm5,ymm5,11 + andn r12d,r11d,ebx + xor r13d,edi + rorx r14d,r11d,6 + vpxor ymm4,ymm4,ymm6 + lea ecx,DWORD PTR[r12*1+rcx] + xor r13d,r14d + mov edi,edx + vpsrld ymm6,ymm7,10 + rorx r12d,edx,22 + lea ecx,DWORD PTR[r13*1+rcx] + xor edi,r8d + vpxor ymm4,ymm4,ymm5 + rorx r14d,edx,13 + rorx r13d,edx,2 + lea r10d,DWORD PTR[rcx*1+r10] + vpsrlq ymm7,ymm7,17 + and r15d,edi + xor r14d,r12d + xor r15d,r8d + vpaddd ymm1,ymm1,ymm4 + xor r14d,r13d + lea ecx,DWORD PTR[r15*1+rcx] + mov r12d,r11d + vpxor ymm6,ymm6,ymm7 + add ebx,DWORD PTR[((40+128))+rsp] + and r12d,r10d + rorx r13d,r10d,25 + vpsrlq ymm7,ymm7,2 + rorx r15d,r10d,11 + lea ecx,DWORD PTR[r14*1+rcx] + lea ebx,DWORD PTR[r12*1+rbx] + vpxor ymm6,ymm6,ymm7 + andn r12d,r10d,eax + xor r13d,r15d + rorx r14d,r10d,6 + vpshufb ymm6,ymm6,ymm8 + lea ebx,DWORD PTR[r12*1+rbx] + xor r13d,r14d + mov r15d,ecx + vpaddd ymm1,ymm1,ymm6 + rorx r12d,ecx,22 + lea ebx,DWORD PTR[r13*1+rbx] + xor r15d,edx + vpshufd ymm7,ymm1,80 + rorx r14d,ecx,13 + rorx r13d,ecx,2 + lea r9d,DWORD PTR[rbx*1+r9] + vpsrld ymm6,ymm7,10 + and edi,r15d + xor r14d,r12d + xor edi,edx + vpsrlq ymm7,ymm7,17 + xor r14d,r13d + lea ebx,DWORD PTR[rdi*1+rbx] + mov r12d,r10d + vpxor ymm6,ymm6,ymm7 + add eax,DWORD PTR[((44+128))+rsp] + and r12d,r9d + rorx r13d,r9d,25 + vpsrlq ymm7,ymm7,2 + rorx edi,r9d,11 + lea ebx,DWORD PTR[r14*1+rbx] + lea eax,DWORD PTR[r12*1+rax] + vpxor ymm6,ymm6,ymm7 + andn r12d,r9d,r11d + xor r13d,edi + rorx r14d,r9d,6 + vpshufb ymm6,ymm6,ymm9 + lea eax,DWORD PTR[r12*1+rax] + xor r13d,r14d + mov edi,ebx + vpaddd ymm1,ymm1,ymm6 + rorx r12d,ebx,22 + lea eax,DWORD PTR[r13*1+rax] + xor edi,ecx + vpaddd ymm6,ymm1,YMMWORD PTR[32+rbp] + rorx r14d,ebx,13 + rorx r13d,ebx,2 + lea r8d,DWORD PTR[rax*1+r8] + and r15d,edi + xor r14d,r12d + xor r15d,ecx + xor r14d,r13d + lea eax,DWORD PTR[r15*1+rax] + mov r12d,r9d + vmovdqa YMMWORD PTR[32+rsp],ymm6 + lea rsp,QWORD PTR[((-64))+rsp] + vpalignr ymm4,ymm3,ymm2,4 + add r11d,DWORD PTR[((0+128))+rsp] + and r12d,r8d + rorx r13d,r8d,25 + vpalignr ymm7,ymm1,ymm0,4 + rorx r15d,r8d,11 + lea eax,DWORD PTR[r14*1+rax] + lea r11d,DWORD PTR[r12*1+r11] + vpsrld ymm6,ymm4,7 + andn r12d,r8d,r10d + xor r13d,r15d + rorx r14d,r8d,6 + vpaddd ymm2,ymm2,ymm7 + lea r11d,DWORD PTR[r12*1+r11] + xor r13d,r14d + mov r15d,eax + vpsrld ymm7,ymm4,3 + rorx r12d,eax,22 + lea r11d,DWORD PTR[r13*1+r11] + xor r15d,ebx + vpslld ymm5,ymm4,14 + rorx r14d,eax,13 + rorx r13d,eax,2 + lea edx,DWORD PTR[r11*1+rdx] + vpxor ymm4,ymm7,ymm6 + and edi,r15d + xor r14d,r12d + xor edi,ebx + vpshufd ymm7,ymm1,250 + xor r14d,r13d + lea r11d,DWORD PTR[rdi*1+r11] + mov r12d,r8d + vpsrld ymm6,ymm6,11 + add r10d,DWORD PTR[((4+128))+rsp] + and r12d,edx + rorx r13d,edx,25 + vpxor ymm4,ymm4,ymm5 + rorx edi,edx,11 + lea r11d,DWORD PTR[r14*1+r11] + lea r10d,DWORD PTR[r12*1+r10] + vpslld ymm5,ymm5,11 + andn r12d,edx,r9d + xor r13d,edi + rorx r14d,edx,6 + vpxor ymm4,ymm4,ymm6 + lea r10d,DWORD PTR[r12*1+r10] + xor r13d,r14d + mov edi,r11d + vpsrld ymm6,ymm7,10 + rorx r12d,r11d,22 + lea r10d,DWORD PTR[r13*1+r10] + xor edi,eax + vpxor ymm4,ymm4,ymm5 + rorx r14d,r11d,13 + rorx r13d,r11d,2 + lea ecx,DWORD PTR[r10*1+rcx] + vpsrlq ymm7,ymm7,17 + and r15d,edi + xor r14d,r12d + xor r15d,eax + vpaddd ymm2,ymm2,ymm4 + xor r14d,r13d + lea r10d,DWORD PTR[r15*1+r10] + mov r12d,edx + vpxor ymm6,ymm6,ymm7 + add r9d,DWORD PTR[((8+128))+rsp] + and r12d,ecx + rorx r13d,ecx,25 + vpsrlq ymm7,ymm7,2 + rorx r15d,ecx,11 + lea r10d,DWORD PTR[r14*1+r10] + lea r9d,DWORD PTR[r12*1+r9] + vpxor ymm6,ymm6,ymm7 + andn r12d,ecx,r8d + xor r13d,r15d + rorx r14d,ecx,6 + vpshufb ymm6,ymm6,ymm8 + lea r9d,DWORD PTR[r12*1+r9] + xor r13d,r14d + mov r15d,r10d + vpaddd ymm2,ymm2,ymm6 + rorx r12d,r10d,22 + lea r9d,DWORD PTR[r13*1+r9] xor r15d,r11d - - xor eax,edx - xor r14d,ebx - add r12d,r15d + vpshufd ymm7,ymm2,80 + rorx r14d,r10d,13 + rorx r13d,r10d,2 + lea ebx,DWORD PTR[r9*1+rbx] + vpsrld ymm6,ymm7,10 + and edi,r15d + xor r14d,r12d + xor edi,r11d + vpsrlq ymm7,ymm7,17 + xor r14d,r13d + lea r9d,DWORD PTR[rdi*1+r9] + mov r12d,ecx + vpxor ymm6,ymm6,ymm7 + add r8d,DWORD PTR[((12+128))+rsp] + and r12d,ebx + rorx r13d,ebx,25 + vpsrlq ymm7,ymm7,2 + rorx edi,ebx,11 + lea r9d,DWORD PTR[r14*1+r9] + lea r8d,DWORD PTR[r12*1+r8] + vpxor ymm6,ymm6,ymm7 + andn r12d,ebx,edx + xor r13d,edi + rorx r14d,ebx,6 + vpshufb ymm6,ymm6,ymm9 + lea r8d,DWORD PTR[r12*1+r8] + xor r13d,r14d + mov edi,r9d + vpaddd ymm2,ymm2,ymm6 + rorx r12d,r9d,22 + lea r8d,DWORD PTR[r13*1+r8] + xor edi,r10d + vpaddd ymm6,ymm2,YMMWORD PTR[64+rbp] + rorx r14d,r9d,13 + rorx r13d,r9d,2 + lea eax,DWORD PTR[r8*1+rax] + and r15d,edi + xor r14d,r12d + xor r15d,r10d + xor r14d,r13d + lea r8d,DWORD PTR[r15*1+r8] + mov r12d,ebx + vmovdqa YMMWORD PTR[rsp],ymm6 + vpalignr ymm4,ymm0,ymm3,4 + add edx,DWORD PTR[((32+128))+rsp] + and r12d,eax + rorx r13d,eax,25 + vpalignr ymm7,ymm2,ymm1,4 + rorx r15d,eax,11 + lea r8d,DWORD PTR[r14*1+r8] + lea edx,DWORD PTR[r12*1+rdx] + vpsrld ymm6,ymm4,7 + andn r12d,eax,ecx + xor r13d,r15d + rorx r14d,eax,6 + vpaddd ymm3,ymm3,ymm7 + lea edx,DWORD PTR[r12*1+rdx] + xor r13d,r14d + mov r15d,r8d + vpsrld ymm7,ymm4,3 + rorx r12d,r8d,22 + lea edx,DWORD PTR[r13*1+rdx] + xor r15d,r9d + vpslld ymm5,ymm4,14 + rorx r14d,r8d,13 + rorx r13d,r8d,2 + lea r11d,DWORD PTR[rdx*1+r11] + vpxor ymm4,ymm7,ymm6 + and edi,r15d + xor r14d,r12d + xor edi,r9d + vpshufd ymm7,ymm2,250 + xor r14d,r13d + lea edx,DWORD PTR[rdi*1+rdx] + mov r12d,eax + vpsrld ymm6,ymm6,11 + add ecx,DWORD PTR[((36+128))+rsp] + and r12d,r11d + rorx r13d,r11d,25 + vpxor ymm4,ymm4,ymm5 + rorx edi,r11d,11 + lea edx,DWORD PTR[r14*1+rdx] + lea ecx,DWORD PTR[r12*1+rcx] + vpslld ymm5,ymm5,11 + andn r12d,r11d,ebx + xor r13d,edi + rorx r14d,r11d,6 + vpxor ymm4,ymm4,ymm6 + lea ecx,DWORD PTR[r12*1+rcx] + xor r13d,r14d + mov edi,edx + vpsrld ymm6,ymm7,10 + rorx r12d,edx,22 + lea ecx,DWORD PTR[r13*1+rcx] + xor edi,r8d + vpxor ymm4,ymm4,ymm5 + rorx r14d,edx,13 + rorx r13d,edx,2 + lea r10d,DWORD PTR[rcx*1+r10] + vpsrlq ymm7,ymm7,17 + and r15d,edi + xor r14d,r12d + xor r15d,r8d + vpaddd ymm3,ymm3,ymm4 + xor r14d,r13d + lea ecx,DWORD PTR[r15*1+rcx] + mov r12d,r11d + vpxor ymm6,ymm6,ymm7 + add ebx,DWORD PTR[((40+128))+rsp] + and r12d,r10d + rorx r13d,r10d,25 + vpsrlq ymm7,ymm7,2 + rorx r15d,r10d,11 + lea ecx,DWORD PTR[r14*1+rcx] + lea ebx,DWORD PTR[r12*1+rbx] + vpxor ymm6,ymm6,ymm7 + andn r12d,r10d,eax + xor r13d,r15d + rorx r14d,r10d,6 + vpshufb ymm6,ymm6,ymm8 + lea ebx,DWORD PTR[r12*1+rbx] + xor r13d,r14d + mov r15d,ecx + vpaddd ymm3,ymm3,ymm6 + rorx r12d,ecx,22 + lea ebx,DWORD PTR[r13*1+rbx] + xor r15d,edx + vpshufd ymm7,ymm3,80 + rorx r14d,ecx,13 + rorx r13d,ecx,2 + lea r9d,DWORD PTR[rbx*1+r9] + vpsrld ymm6,ymm7,10 + and edi,r15d + xor r14d,r12d + xor edi,edx + vpsrlq ymm7,ymm7,17 + xor r14d,r13d + lea ebx,DWORD PTR[rdi*1+rbx] + mov r12d,r10d + vpxor ymm6,ymm6,ymm7 + add eax,DWORD PTR[((44+128))+rsp] + and r12d,r9d + rorx r13d,r9d,25 + vpsrlq ymm7,ymm7,2 + rorx edi,r9d,11 + lea ebx,DWORD PTR[r14*1+rbx] + lea eax,DWORD PTR[r12*1+rax] + vpxor ymm6,ymm6,ymm7 + andn r12d,r9d,r11d + xor r13d,edi + rorx r14d,r9d,6 + vpshufb ymm6,ymm6,ymm9 + lea eax,DWORD PTR[r12*1+rax] + xor r13d,r14d + mov edi,ebx + vpaddd ymm3,ymm3,ymm6 + rorx r12d,ebx,22 + lea eax,DWORD PTR[r13*1+rax] + xor edi,ecx + vpaddd ymm6,ymm3,YMMWORD PTR[96+rbp] + rorx r14d,ebx,13 + rorx r13d,ebx,2 + lea r8d,DWORD PTR[rax*1+r8] + and r15d,edi + xor r14d,r12d + xor r15d,ecx + xor r14d,r13d + lea eax,DWORD PTR[r15*1+rax] + mov r12d,r9d + vmovdqa YMMWORD PTR[32+rsp],ymm6 + lea rbp,QWORD PTR[128+rbp] + cmp BYTE PTR[3+rbp],0 + jne $L$avx2_00_47 + add r11d,DWORD PTR[((0+64))+rsp] + and r12d,r8d + rorx r13d,r8d,25 + rorx r15d,r8d,11 + lea eax,DWORD PTR[r14*1+rax] + lea r11d,DWORD PTR[r12*1+r11] + andn r12d,r8d,r10d + xor r13d,r15d + rorx r14d,r8d,6 + lea r11d,DWORD PTR[r12*1+r11] + xor r13d,r14d + mov r15d,eax + rorx r12d,eax,22 + lea r11d,DWORD PTR[r13*1+r11] + xor r15d,ebx + rorx r14d,eax,13 + rorx r13d,eax,2 + lea edx,DWORD PTR[r11*1+rdx] + and edi,r15d + xor r14d,r12d + xor edi,ebx + xor r14d,r13d + lea r11d,DWORD PTR[rdi*1+r11] + mov r12d,r8d + add r10d,DWORD PTR[((4+64))+rsp] + and r12d,edx + rorx r13d,edx,25 + rorx edi,edx,11 + lea r11d,DWORD PTR[r14*1+r11] + lea r10d,DWORD PTR[r12*1+r10] + andn r12d,edx,r9d + xor r13d,edi + rorx r14d,edx,6 + lea r10d,DWORD PTR[r12*1+r10] + xor r13d,r14d + mov edi,r11d + rorx r12d,r11d,22 + lea r10d,DWORD PTR[r13*1+r10] + xor edi,eax + rorx r14d,r11d,13 + rorx r13d,r11d,2 + lea ecx,DWORD PTR[r10*1+rcx] + and r15d,edi + xor r14d,r12d + xor r15d,eax + xor r14d,r13d + lea r10d,DWORD PTR[r15*1+r10] + mov r12d,edx + add r9d,DWORD PTR[((8+64))+rsp] + and r12d,ecx + rorx r13d,ecx,25 + rorx r15d,ecx,11 + lea r10d,DWORD PTR[r14*1+r10] + lea r9d,DWORD PTR[r12*1+r9] + andn r12d,ecx,r8d + xor r13d,r15d + rorx r14d,ecx,6 + lea r9d,DWORD PTR[r12*1+r9] + xor r13d,r14d + mov r15d,r10d + rorx r12d,r10d,22 + lea r9d,DWORD PTR[r13*1+r9] + xor r15d,r11d + rorx r14d,r10d,13 + rorx r13d,r10d,2 + lea ebx,DWORD PTR[r9*1+rbx] + and edi,r15d + xor r14d,r12d + xor edi,r11d + xor r14d,r13d + lea r9d,DWORD PTR[rdi*1+r9] + mov r12d,ecx + add r8d,DWORD PTR[((12+64))+rsp] + and r12d,ebx + rorx r13d,ebx,25 + rorx edi,ebx,11 + lea r9d,DWORD PTR[r14*1+r9] + lea r8d,DWORD PTR[r12*1+r8] + andn r12d,ebx,edx + xor r13d,edi + rorx r14d,ebx,6 + lea r8d,DWORD PTR[r12*1+r8] + xor r13d,r14d + mov edi,r9d + rorx r12d,r9d,22 + lea r8d,DWORD PTR[r13*1+r8] + xor edi,r10d + rorx r14d,r9d,13 + rorx r13d,r9d,2 + lea eax,DWORD PTR[r8*1+rax] + and r15d,edi + xor r14d,r12d + xor r15d,r10d + xor r14d,r13d + lea r8d,DWORD PTR[r15*1+r8] + mov r12d,ebx + add edx,DWORD PTR[((32+64))+rsp] + and r12d,eax + rorx r13d,eax,25 + rorx r15d,eax,11 + lea r8d,DWORD PTR[r14*1+r8] + lea edx,DWORD PTR[r12*1+rdx] + andn r12d,eax,ecx + xor r13d,r15d + rorx r14d,eax,6 + lea edx,DWORD PTR[r12*1+rdx] + xor r13d,r14d + mov r15d,r8d + rorx r12d,r8d,22 + lea edx,DWORD PTR[r13*1+rdx] + xor r15d,r9d + rorx r14d,r8d,13 + rorx r13d,r8d,2 + lea r11d,DWORD PTR[rdx*1+r11] + and edi,r15d + xor r14d,r12d + xor edi,r9d + xor r14d,r13d + lea edx,DWORD PTR[rdi*1+rdx] + mov r12d,eax + add ecx,DWORD PTR[((36+64))+rsp] + and r12d,r11d + rorx r13d,r11d,25 + rorx edi,r11d,11 + lea edx,DWORD PTR[r14*1+rdx] + lea ecx,DWORD PTR[r12*1+rcx] + andn r12d,r11d,ebx + xor r13d,edi + rorx r14d,r11d,6 + lea ecx,DWORD PTR[r12*1+rcx] + xor r13d,r14d + mov edi,edx + rorx r12d,edx,22 + lea ecx,DWORD PTR[r13*1+rcx] + xor edi,r8d + rorx r14d,edx,13 + rorx r13d,edx,2 + lea r10d,DWORD PTR[rcx*1+r10] + and r15d,edi + xor r14d,r12d + xor r15d,r8d + xor r14d,r13d + lea ecx,DWORD PTR[r15*1+rcx] + mov r12d,r11d + add ebx,DWORD PTR[((40+64))+rsp] + and r12d,r10d + rorx r13d,r10d,25 + rorx r15d,r10d,11 + lea ecx,DWORD PTR[r14*1+rcx] + lea ebx,DWORD PTR[r12*1+rbx] + andn r12d,r10d,eax + xor r13d,r15d + rorx r14d,r10d,6 + lea ebx,DWORD PTR[r12*1+rbx] + xor r13d,r14d + mov r15d,ecx + rorx r12d,ecx,22 + lea ebx,DWORD PTR[r13*1+rbx] + xor r15d,edx + rorx r14d,ecx,13 + rorx r13d,ecx,2 + lea r9d,DWORD PTR[rbx*1+r9] + and edi,r15d + xor r14d,r12d + xor edi,edx + xor r14d,r13d + lea ebx,DWORD PTR[rdi*1+rbx] + mov r12d,r10d + add eax,DWORD PTR[((44+64))+rsp] + and r12d,r9d + rorx r13d,r9d,25 + rorx edi,r9d,11 + lea ebx,DWORD PTR[r14*1+rbx] + lea eax,DWORD PTR[r12*1+rax] + andn r12d,r9d,r11d + xor r13d,edi + rorx r14d,r9d,6 + lea eax,DWORD PTR[r12*1+rax] + xor r13d,r14d + mov edi,ebx + rorx r12d,ebx,22 + lea eax,DWORD PTR[r13*1+rax] + xor edi,ecx + rorx r14d,ebx,13 + rorx r13d,ebx,2 + lea r8d,DWORD PTR[rax*1+r8] + and r15d,edi + xor r14d,r12d + xor r15d,ecx + xor r14d,r13d + lea eax,DWORD PTR[r15*1+rax] + mov r12d,r9d + add r11d,DWORD PTR[rsp] + and r12d,r8d + rorx r13d,r8d,25 + rorx r15d,r8d,11 + lea eax,DWORD PTR[r14*1+rax] + lea r11d,DWORD PTR[r12*1+r11] + andn r12d,r8d,r10d + xor r13d,r15d + rorx r14d,r8d,6 + lea r11d,DWORD PTR[r12*1+r11] + xor r13d,r14d + mov r15d,eax + rorx r12d,eax,22 + lea r11d,DWORD PTR[r13*1+r11] + xor r15d,ebx + rorx r14d,eax,13 + rorx r13d,eax,2 + lea edx,DWORD PTR[r11*1+rdx] + and edi,r15d + xor r14d,r12d + xor edi,ebx + xor r14d,r13d + lea r11d,DWORD PTR[rdi*1+r11] + mov r12d,r8d + add r10d,DWORD PTR[4+rsp] + and r12d,edx + rorx r13d,edx,25 + rorx edi,edx,11 + lea r11d,DWORD PTR[r14*1+r11] + lea r10d,DWORD PTR[r12*1+r10] + andn r12d,edx,r9d + xor r13d,edi + rorx r14d,edx,6 + lea r10d,DWORD PTR[r12*1+r10] + xor r13d,r14d + mov edi,r11d + rorx r12d,r11d,22 + lea r10d,DWORD PTR[r13*1+r10] + xor edi,eax + rorx r14d,r11d,13 + rorx r13d,r11d,2 + lea ecx,DWORD PTR[r10*1+rcx] + and r15d,edi + xor r14d,r12d + xor r15d,eax + xor r14d,r13d + lea r10d,DWORD PTR[r15*1+r10] + mov r12d,edx + add r9d,DWORD PTR[8+rsp] + and r12d,ecx + rorx r13d,ecx,25 + rorx r15d,ecx,11 + lea r10d,DWORD PTR[r14*1+r10] + lea r9d,DWORD PTR[r12*1+r9] + andn r12d,ecx,r8d + xor r13d,r15d + rorx r14d,ecx,6 + lea r9d,DWORD PTR[r12*1+r9] + xor r13d,r14d + mov r15d,r10d + rorx r12d,r10d,22 + lea r9d,DWORD PTR[r13*1+r9] + xor r15d,r11d + rorx r14d,r10d,13 + rorx r13d,r10d,2 + lea ebx,DWORD PTR[r9*1+rbx] + and edi,r15d + xor r14d,r12d + xor edi,r11d + xor r14d,r13d + lea r9d,DWORD PTR[rdi*1+r9] + mov r12d,ecx + add r8d,DWORD PTR[12+rsp] + and r12d,ebx + rorx r13d,ebx,25 + rorx edi,ebx,11 + lea r9d,DWORD PTR[r14*1+r9] + lea r8d,DWORD PTR[r12*1+r8] + andn r12d,ebx,edx + xor r13d,edi + rorx r14d,ebx,6 + lea r8d,DWORD PTR[r12*1+r8] + xor r13d,r14d + mov edi,r9d + rorx r12d,r9d,22 + lea r8d,DWORD PTR[r13*1+r8] + xor edi,r10d + rorx r14d,r9d,13 + rorx r13d,r9d,2 + lea eax,DWORD PTR[r8*1+rax] + and r15d,edi + xor r14d,r12d + xor r15d,r10d + xor r14d,r13d + lea r8d,DWORD PTR[r15*1+r8] + mov r12d,ebx + add edx,DWORD PTR[32+rsp] + and r12d,eax + rorx r13d,eax,25 + rorx r15d,eax,11 + lea r8d,DWORD PTR[r14*1+r8] + lea edx,DWORD PTR[r12*1+rdx] + andn r12d,eax,ecx + xor r13d,r15d + rorx r14d,eax,6 + lea edx,DWORD PTR[r12*1+rdx] + xor r13d,r14d + mov r15d,r8d + rorx r12d,r8d,22 + lea edx,DWORD PTR[r13*1+rdx] + xor r15d,r9d + rorx r14d,r8d,13 + rorx r13d,r8d,2 + lea r11d,DWORD PTR[rdx*1+r11] + and edi,r15d + xor r14d,r12d + xor edi,r9d + xor r14d,r13d + lea edx,DWORD PTR[rdi*1+rdx] + mov r12d,eax + add ecx,DWORD PTR[36+rsp] + and r12d,r11d + rorx r13d,r11d,25 + rorx edi,r11d,11 + lea edx,DWORD PTR[r14*1+rdx] + lea ecx,DWORD PTR[r12*1+rcx] + andn r12d,r11d,ebx + xor r13d,edi + rorx r14d,r11d,6 + lea ecx,DWORD PTR[r12*1+rcx] + xor r13d,r14d + mov edi,edx + rorx r12d,edx,22 + lea ecx,DWORD PTR[r13*1+rcx] + xor edi,r8d + rorx r14d,edx,13 + rorx r13d,edx,2 + lea r10d,DWORD PTR[rcx*1+r10] + and r15d,edi + xor r14d,r12d + xor r15d,r8d + xor r14d,r13d + lea ecx,DWORD PTR[r15*1+rcx] + mov r12d,r11d + add ebx,DWORD PTR[40+rsp] + and r12d,r10d + rorx r13d,r10d,25 + rorx r15d,r10d,11 + lea ecx,DWORD PTR[r14*1+rcx] + lea ebx,DWORD PTR[r12*1+rbx] + andn r12d,r10d,eax + xor r13d,r15d + rorx r14d,r10d,6 + lea ebx,DWORD PTR[r12*1+rbx] + xor r13d,r14d mov r15d,ecx + rorx r12d,ecx,22 + lea ebx,DWORD PTR[r13*1+rbx] + xor r15d,edx + rorx r14d,ecx,13 + rorx r13d,ecx,2 + lea r9d,DWORD PTR[rbx*1+r9] + and edi,r15d + xor r14d,r12d + xor edi,edx + xor r14d,r13d + lea ebx,DWORD PTR[rdi*1+rbx] + mov r12d,r10d + add eax,DWORD PTR[44+rsp] + and r12d,r9d + rorx r13d,r9d,25 + rorx edi,r9d,11 + lea ebx,DWORD PTR[r14*1+rbx] + lea eax,DWORD PTR[r12*1+rax] + andn r12d,r9d,r11d + xor r13d,edi + rorx r14d,r9d,6 + lea eax,DWORD PTR[r12*1+rax] + xor r13d,r14d + mov edi,ebx + rorx r12d,ebx,22 + lea eax,DWORD PTR[r13*1+rax] + xor edi,ecx + rorx r14d,ebx,13 + rorx r13d,ebx,2 + lea r8d,DWORD PTR[rax*1+r8] + and r15d,edi + xor r14d,r12d + xor r15d,ecx + xor r14d,r13d + lea eax,DWORD PTR[r15*1+rax] + mov r12d,r9d + mov rdi,QWORD PTR[512+rsp] + add eax,r14d - ror r13d,6 - and eax,ebx - and r15d,edx + lea rbp,QWORD PTR[448+rsp] - ror r14d,2 - add r12d,r13d - add eax,r15d + add eax,DWORD PTR[rdi] + add ebx,DWORD PTR[4+rdi] + add ecx,DWORD PTR[8+rdi] + add edx,DWORD PTR[12+rdi] + add r8d,DWORD PTR[16+rdi] + add r9d,DWORD PTR[20+rdi] + add r10d,DWORD PTR[24+rdi] + add r11d,DWORD PTR[28+rdi] - add r8d,r12d - add eax,r12d - lea rdi,QWORD PTR[1+rdi] - add eax,r14d + mov DWORD PTR[rdi],eax + mov DWORD PTR[4+rdi],ebx + mov DWORD PTR[8+rdi],ecx + mov DWORD PTR[12+rdi],edx + mov DWORD PTR[16+rdi],r8d + mov DWORD PTR[20+rdi],r9d + mov DWORD PTR[24+rdi],r10d + mov DWORD PTR[28+rdi],r11d - cmp rdi,64 - jb $L$rounds_16_xx + cmp rsi,QWORD PTR[80+rbp] + je $L$done_avx2 - mov rdi,QWORD PTR[((64+0))+rsp] - lea rsi,QWORD PTR[64+rsi] + xor r14d,r14d + mov edi,ebx + xor edi,ecx + mov r12d,r9d + jmp $L$ower_avx2 +ALIGN 16 +$L$ower_avx2:: + add r11d,DWORD PTR[((0+16))+rbp] + and r12d,r8d + rorx r13d,r8d,25 + rorx r15d,r8d,11 + lea eax,DWORD PTR[r14*1+rax] + lea r11d,DWORD PTR[r12*1+r11] + andn r12d,r8d,r10d + xor r13d,r15d + rorx r14d,r8d,6 + lea r11d,DWORD PTR[r12*1+r11] + xor r13d,r14d + mov r15d,eax + rorx r12d,eax,22 + lea r11d,DWORD PTR[r13*1+r11] + xor r15d,ebx + rorx r14d,eax,13 + rorx r13d,eax,2 + lea edx,DWORD PTR[r11*1+rdx] + and edi,r15d + xor r14d,r12d + xor edi,ebx + xor r14d,r13d + lea r11d,DWORD PTR[rdi*1+r11] + mov r12d,r8d + add r10d,DWORD PTR[((4+16))+rbp] + and r12d,edx + rorx r13d,edx,25 + rorx edi,edx,11 + lea r11d,DWORD PTR[r14*1+r11] + lea r10d,DWORD PTR[r12*1+r10] + andn r12d,edx,r9d + xor r13d,edi + rorx r14d,edx,6 + lea r10d,DWORD PTR[r12*1+r10] + xor r13d,r14d + mov edi,r11d + rorx r12d,r11d,22 + lea r10d,DWORD PTR[r13*1+r10] + xor edi,eax + rorx r14d,r11d,13 + rorx r13d,r11d,2 + lea ecx,DWORD PTR[r10*1+rcx] + and r15d,edi + xor r14d,r12d + xor r15d,eax + xor r14d,r13d + lea r10d,DWORD PTR[r15*1+r10] + mov r12d,edx + add r9d,DWORD PTR[((8+16))+rbp] + and r12d,ecx + rorx r13d,ecx,25 + rorx r15d,ecx,11 + lea r10d,DWORD PTR[r14*1+r10] + lea r9d,DWORD PTR[r12*1+r9] + andn r12d,ecx,r8d + xor r13d,r15d + rorx r14d,ecx,6 + lea r9d,DWORD PTR[r12*1+r9] + xor r13d,r14d + mov r15d,r10d + rorx r12d,r10d,22 + lea r9d,DWORD PTR[r13*1+r9] + xor r15d,r11d + rorx r14d,r10d,13 + rorx r13d,r10d,2 + lea ebx,DWORD PTR[r9*1+rbx] + and edi,r15d + xor r14d,r12d + xor edi,r11d + xor r14d,r13d + lea r9d,DWORD PTR[rdi*1+r9] + mov r12d,ecx + add r8d,DWORD PTR[((12+16))+rbp] + and r12d,ebx + rorx r13d,ebx,25 + rorx edi,ebx,11 + lea r9d,DWORD PTR[r14*1+r9] + lea r8d,DWORD PTR[r12*1+r8] + andn r12d,ebx,edx + xor r13d,edi + rorx r14d,ebx,6 + lea r8d,DWORD PTR[r12*1+r8] + xor r13d,r14d + mov edi,r9d + rorx r12d,r9d,22 + lea r8d,DWORD PTR[r13*1+r8] + xor edi,r10d + rorx r14d,r9d,13 + rorx r13d,r9d,2 + lea eax,DWORD PTR[r8*1+rax] + and r15d,edi + xor r14d,r12d + xor r15d,r10d + xor r14d,r13d + lea r8d,DWORD PTR[r15*1+r8] + mov r12d,ebx + add edx,DWORD PTR[((32+16))+rbp] + and r12d,eax + rorx r13d,eax,25 + rorx r15d,eax,11 + lea r8d,DWORD PTR[r14*1+r8] + lea edx,DWORD PTR[r12*1+rdx] + andn r12d,eax,ecx + xor r13d,r15d + rorx r14d,eax,6 + lea edx,DWORD PTR[r12*1+rdx] + xor r13d,r14d + mov r15d,r8d + rorx r12d,r8d,22 + lea edx,DWORD PTR[r13*1+rdx] + xor r15d,r9d + rorx r14d,r8d,13 + rorx r13d,r8d,2 + lea r11d,DWORD PTR[rdx*1+r11] + and edi,r15d + xor r14d,r12d + xor edi,r9d + xor r14d,r13d + lea edx,DWORD PTR[rdi*1+rdx] + mov r12d,eax + add ecx,DWORD PTR[((36+16))+rbp] + and r12d,r11d + rorx r13d,r11d,25 + rorx edi,r11d,11 + lea edx,DWORD PTR[r14*1+rdx] + lea ecx,DWORD PTR[r12*1+rcx] + andn r12d,r11d,ebx + xor r13d,edi + rorx r14d,r11d,6 + lea ecx,DWORD PTR[r12*1+rcx] + xor r13d,r14d + mov edi,edx + rorx r12d,edx,22 + lea ecx,DWORD PTR[r13*1+rcx] + xor edi,r8d + rorx r14d,edx,13 + rorx r13d,edx,2 + lea r10d,DWORD PTR[rcx*1+r10] + and r15d,edi + xor r14d,r12d + xor r15d,r8d + xor r14d,r13d + lea ecx,DWORD PTR[r15*1+rcx] + mov r12d,r11d + add ebx,DWORD PTR[((40+16))+rbp] + and r12d,r10d + rorx r13d,r10d,25 + rorx r15d,r10d,11 + lea ecx,DWORD PTR[r14*1+rcx] + lea ebx,DWORD PTR[r12*1+rbx] + andn r12d,r10d,eax + xor r13d,r15d + rorx r14d,r10d,6 + lea ebx,DWORD PTR[r12*1+rbx] + xor r13d,r14d + mov r15d,ecx + rorx r12d,ecx,22 + lea ebx,DWORD PTR[r13*1+rbx] + xor r15d,edx + rorx r14d,ecx,13 + rorx r13d,ecx,2 + lea r9d,DWORD PTR[rbx*1+r9] + and edi,r15d + xor r14d,r12d + xor edi,edx + xor r14d,r13d + lea ebx,DWORD PTR[rdi*1+rbx] + mov r12d,r10d + add eax,DWORD PTR[((44+16))+rbp] + and r12d,r9d + rorx r13d,r9d,25 + rorx edi,r9d,11 + lea ebx,DWORD PTR[r14*1+rbx] + lea eax,DWORD PTR[r12*1+rax] + andn r12d,r9d,r11d + xor r13d,edi + rorx r14d,r9d,6 + lea eax,DWORD PTR[r12*1+rax] + xor r13d,r14d + mov edi,ebx + rorx r12d,ebx,22 + lea eax,DWORD PTR[r13*1+rax] + xor edi,ecx + rorx r14d,ebx,13 + rorx r13d,ebx,2 + lea r8d,DWORD PTR[rax*1+r8] + and r15d,edi + xor r14d,r12d + xor r15d,ecx + xor r14d,r13d + lea eax,DWORD PTR[r15*1+rax] + mov r12d,r9d + lea rbp,QWORD PTR[((-64))+rbp] + cmp rbp,rsp + jae $L$ower_avx2 + + mov rdi,QWORD PTR[512+rsp] + add eax,r14d + + lea rsp,QWORD PTR[448+rsp] add eax,DWORD PTR[rdi] add ebx,DWORD PTR[4+rdi] @@ -1742,12 +5419,14 @@ $L$rounds_16_xx:: add edx,DWORD PTR[12+rdi] add r8d,DWORD PTR[16+rdi] add r9d,DWORD PTR[20+rdi] + lea rsi,QWORD PTR[128+rsi] add r10d,DWORD PTR[24+rdi] + mov r12,rsi add r11d,DWORD PTR[28+rdi] - cmp rsi,QWORD PTR[((64+16))+rsp] mov DWORD PTR[rdi],eax + cmove r12,rsp mov DWORD PTR[4+rdi],ebx mov DWORD PTR[8+rdi],ecx mov DWORD PTR[12+rdi],edx @@ -1755,9 +5434,18 @@ $L$rounds_16_xx:: mov DWORD PTR[20+rdi],r9d mov DWORD PTR[24+rdi],r10d mov DWORD PTR[28+rdi],r11d - jb $L$loop + jbe $L$oop_avx2 + lea rbp,QWORD PTR[rsp] + +$L$done_avx2:: + lea rsp,QWORD PTR[rbp] mov rsi,QWORD PTR[((64+24))+rsp] + vzeroupper + movaps xmm6,XMMWORD PTR[((64+32))+rsp] + movaps xmm7,XMMWORD PTR[((64+48))+rsp] + movaps xmm8,XMMWORD PTR[((64+64))+rsp] + movaps xmm9,XMMWORD PTR[((64+80))+rsp] mov r15,QWORD PTR[rsi] mov r14,QWORD PTR[8+rsi] mov r13,QWORD PTR[16+rsi] @@ -1765,31 +5453,12 @@ $L$rounds_16_xx:: mov rbp,QWORD PTR[32+rsi] mov rbx,QWORD PTR[40+rsi] lea rsp,QWORD PTR[48+rsi] -$L$epilogue:: +$L$epilogue_avx2:: mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue mov rsi,QWORD PTR[16+rsp] DB 0F3h,0C3h ;repret -$L$SEH_end_sha256_block_data_order:: -sha256_block_data_order ENDP -ALIGN 64 - -K256:: - DD 0428a2f98h,071374491h,0b5c0fbcfh,0e9b5dba5h - DD 03956c25bh,059f111f1h,0923f82a4h,0ab1c5ed5h - DD 0d807aa98h,012835b01h,0243185beh,0550c7dc3h - DD 072be5d74h,080deb1feh,09bdc06a7h,0c19bf174h - DD 0e49b69c1h,0efbe4786h,00fc19dc6h,0240ca1cch - DD 02de92c6fh,04a7484aah,05cb0a9dch,076f988dah - DD 0983e5152h,0a831c66dh,0b00327c8h,0bf597fc7h - DD 0c6e00bf3h,0d5a79147h,006ca6351h,014292967h - DD 027b70a85h,02e1b2138h,04d2c6dfch,053380d13h - DD 0650a7354h,0766a0abbh,081c2c92eh,092722c85h - DD 0a2bfe8a1h,0a81a664bh,0c24b8b70h,0c76c51a3h - DD 0d192e819h,0d6990624h,0f40e3585h,0106aa070h - DD 019a4c116h,01e376c08h,02748774ch,034b0bcb5h - DD 0391c0cb3h,04ed8aa4ah,05b9cca4fh,0682e6ff3h - DD 0748f82eeh,078a5636fh,084c87814h,08cc70208h - DD 090befffah,0a4506cebh,0bef9a3f7h,0c67178f2h +$L$SEH_end_sha256_block_data_order_avx2:: +sha256_block_data_order_avx2 ENDP EXTERN __imp_RtlVirtualUnwind:NEAR ALIGN 16 @@ -1808,16 +5477,28 @@ se_handler PROC PRIVATE mov rax,QWORD PTR[120+r8] mov rbx,QWORD PTR[248+r8] - lea r10,QWORD PTR[$L$prologue] + mov rsi,QWORD PTR[8+r9] + mov r11,QWORD PTR[56+r9] + + mov r10d,DWORD PTR[r11] + lea r10,QWORD PTR[r10*1+rsi] cmp rbx,r10 jb $L$in_prologue mov rax,QWORD PTR[152+r8] - lea r10,QWORD PTR[$L$epilogue] + mov r10d,DWORD PTR[4+r11] + lea r10,QWORD PTR[r10*1+rsi] cmp rbx,r10 jae $L$in_prologue + lea r10,QWORD PTR[$L$avx2_shortcut] + cmp rbx,r10 + jb $L$not_in_avx2 + and rax,-256*4 + add rax,448 +$L$not_in_avx2:: + mov rsi,rax mov rax,QWORD PTR[((64+24))+rax] lea rax,QWORD PTR[48+rax] @@ -1834,6 +5515,15 @@ se_handler PROC PRIVATE mov QWORD PTR[232+r8],r14 mov QWORD PTR[240+r8],r15 + lea r10,QWORD PTR[$L$epilogue] + cmp rbx,r10 + jb $L$in_prologue + + lea rsi,QWORD PTR[((64+32))+rsi] + lea rdi,QWORD PTR[512+r8] + mov ecx,8 + DD 0a548f3fch + $L$in_prologue:: mov rdi,QWORD PTR[8+rax] mov rsi,QWORD PTR[16+rax] @@ -1846,7 +5536,6 @@ $L$in_prologue:: mov ecx,154 DD 0a548f3fch - mov rsi,r9 xor rcx,rcx mov rdx,QWORD PTR[8+rsi] @@ -1875,19 +5564,77 @@ $L$in_prologue:: DB 0F3h,0C3h ;repret se_handler ENDP +ALIGN 16 +shaext_handler PROC PRIVATE + push rsi + push rdi + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + pushfq + sub rsp,64 + + mov rax,QWORD PTR[120+r8] + mov rbx,QWORD PTR[248+r8] + + lea r10,QWORD PTR[$L$prologue_shaext] + cmp rbx,r10 + jb $L$in_prologue + + lea r10,QWORD PTR[$L$epilogue_shaext] + cmp rbx,r10 + jae $L$in_prologue + + lea rsi,QWORD PTR[((-8-80))+rax] + lea rdi,QWORD PTR[512+r8] + mov ecx,10 + DD 0a548f3fch + + jmp $L$in_prologue +shaext_handler ENDP .text$ ENDS .pdata SEGMENT READONLY ALIGN(4) ALIGN 4 DD imagerel $L$SEH_begin_sha256_block_data_order DD imagerel $L$SEH_end_sha256_block_data_order DD imagerel $L$SEH_info_sha256_block_data_order - + DD imagerel $L$SEH_begin_sha256_block_data_order_shaext + DD imagerel $L$SEH_end_sha256_block_data_order_shaext + DD imagerel $L$SEH_info_sha256_block_data_order_shaext + DD imagerel $L$SEH_begin_sha256_block_data_order_ssse3 + DD imagerel $L$SEH_end_sha256_block_data_order_ssse3 + DD imagerel $L$SEH_info_sha256_block_data_order_ssse3 + DD imagerel $L$SEH_begin_sha256_block_data_order_avx + DD imagerel $L$SEH_end_sha256_block_data_order_avx + DD imagerel $L$SEH_info_sha256_block_data_order_avx + DD imagerel $L$SEH_begin_sha256_block_data_order_avx2 + DD imagerel $L$SEH_end_sha256_block_data_order_avx2 + DD imagerel $L$SEH_info_sha256_block_data_order_avx2 .pdata ENDS .xdata SEGMENT READONLY ALIGN(8) ALIGN 8 $L$SEH_info_sha256_block_data_order:: DB 9,0,0,0 DD imagerel se_handler + DD imagerel $L$prologue,imagerel $L$epilogue +$L$SEH_info_sha256_block_data_order_shaext:: +DB 9,0,0,0 + DD imagerel shaext_handler +$L$SEH_info_sha256_block_data_order_ssse3:: +DB 9,0,0,0 + DD imagerel se_handler + DD imagerel $L$prologue_ssse3,imagerel $L$epilogue_ssse3 +$L$SEH_info_sha256_block_data_order_avx:: +DB 9,0,0,0 + DD imagerel se_handler + DD imagerel $L$prologue_avx,imagerel $L$epilogue_avx +$L$SEH_info_sha256_block_data_order_avx2:: +DB 9,0,0,0 + DD imagerel se_handler + DD imagerel $L$prologue_avx2,imagerel $L$epilogue_avx2 .xdata ENDS END diff --git a/deps/openssl/asm/x64-win32-masm/sha/sha512-x86_64.asm b/deps/openssl/asm/x64-win32-masm/sha/sha512-x86_64.asm index 52f998e28b89c9..a3d68c675a73bf 100644 --- a/deps/openssl/asm/x64-win32-masm/sha/sha512-x86_64.asm +++ b/deps/openssl/asm/x64-win32-masm/sha/sha512-x86_64.asm @@ -1,6 +1,7 @@ OPTION DOTNAME -.text$ SEGMENT ALIGN(64) 'CODE' +.text$ SEGMENT ALIGN(256) 'CODE' +EXTERN OPENSSL_ia32cap_P:NEAR PUBLIC sha512_block_data_order ALIGN 16 @@ -12,9 +13,22 @@ $L$SEH_begin_sha512_block_data_order:: mov rdi,rcx mov rsi,rdx mov rdx,r8 - mov rcx,r9 + lea r11,QWORD PTR[OPENSSL_ia32cap_P] + mov r9d,DWORD PTR[r11] + mov r10d,DWORD PTR[4+r11] + mov r11d,DWORD PTR[8+r11] + test r10d,2048 + jnz $L$xop_shortcut + and r11d,296 + cmp r11d,296 + je $L$avx2_shortcut + and r9d,1073741824 + and r10d,268435968 + or r10d,r9d + cmp r10d,1342177792 + je $L$avx_shortcut push rbx push rbp push r12 @@ -32,8 +46,6 @@ $L$SEH_begin_sha512_block_data_order:: mov QWORD PTR[((128+24))+rsp],r11 $L$prologue:: - lea rbp,QWORD PTR[K512] - mov rax,QWORD PTR[rdi] mov rbx,QWORD PTR[8+rdi] mov rcx,QWORD PTR[16+rdi] @@ -46,1694 +58,1632 @@ $L$prologue:: ALIGN 16 $L$loop:: - xor rdi,rdi + mov rdi,rbx + lea rbp,QWORD PTR[K512] + xor rdi,rcx mov r12,QWORD PTR[rsi] mov r13,r8 mov r14,rax bswap r12 ror r13,23 mov r15,r9 - mov QWORD PTR[rsp],r12 - ror r14,5 xor r13,r8 + ror r14,5 xor r15,r10 - ror r13,4 - add r12,r11 + mov QWORD PTR[rsp],r12 xor r14,rax - - add r12,QWORD PTR[rdi*8+rbp] and r15,r8 - mov r11,rbx + + ror r13,4 + add r12,r11 + xor r15,r10 ror r14,6 xor r13,r8 - xor r15,r10 + add r12,r15 - xor r11,rcx + mov r15,rax + add r12,QWORD PTR[rbp] xor r14,rax - add r12,r15 - mov r15,rbx + xor r15,rbx ror r13,14 - and r11,rax - and r15,rcx + mov r11,rbx + and rdi,r15 ror r14,28 add r12,r13 - add r11,r15 + xor r11,rdi add rdx,r12 add r11,r12 - lea rdi,QWORD PTR[1+rdi] - add r11,r14 + lea rbp,QWORD PTR[8+rbp] + add r11,r14 mov r12,QWORD PTR[8+rsi] mov r13,rdx mov r14,r11 bswap r12 ror r13,23 - mov r15,r8 - mov QWORD PTR[8+rsp],r12 + mov rdi,r8 - ror r14,5 xor r13,rdx - xor r15,r9 + ror r14,5 + xor rdi,r9 - ror r13,4 - add r12,r10 + mov QWORD PTR[8+rsp],r12 xor r14,r11 + and rdi,rdx - add r12,QWORD PTR[rdi*8+rbp] - and r15,rdx - mov r10,rax + ror r13,4 + add r12,r10 + xor rdi,r9 ror r14,6 xor r13,rdx - xor r15,r9 + add r12,rdi - xor r10,rbx + mov rdi,r11 + add r12,QWORD PTR[rbp] xor r14,r11 - add r12,r15 - mov r15,rax + xor rdi,rax ror r13,14 - and r10,r11 - and r15,rbx + mov r10,rax + and r15,rdi ror r14,28 add r12,r13 - add r10,r15 + xor r10,r15 add rcx,r12 add r10,r12 - lea rdi,QWORD PTR[1+rdi] - add r10,r14 + lea rbp,QWORD PTR[24+rbp] + add r10,r14 mov r12,QWORD PTR[16+rsi] mov r13,rcx mov r14,r10 bswap r12 ror r13,23 mov r15,rdx - mov QWORD PTR[16+rsp],r12 - ror r14,5 xor r13,rcx + ror r14,5 xor r15,r8 - ror r13,4 - add r12,r9 + mov QWORD PTR[16+rsp],r12 xor r14,r10 - - add r12,QWORD PTR[rdi*8+rbp] and r15,rcx - mov r9,r11 + + ror r13,4 + add r12,r9 + xor r15,r8 ror r14,6 xor r13,rcx - xor r15,r8 + add r12,r15 - xor r9,rax + mov r15,r10 + add r12,QWORD PTR[rbp] xor r14,r10 - add r12,r15 - mov r15,r11 + xor r15,r11 ror r13,14 - and r9,r10 - and r15,rax + mov r9,r11 + and rdi,r15 ror r14,28 add r12,r13 - add r9,r15 + xor r9,rdi add rbx,r12 add r9,r12 - lea rdi,QWORD PTR[1+rdi] - add r9,r14 + lea rbp,QWORD PTR[8+rbp] + add r9,r14 mov r12,QWORD PTR[24+rsi] mov r13,rbx mov r14,r9 bswap r12 ror r13,23 - mov r15,rcx - mov QWORD PTR[24+rsp],r12 + mov rdi,rcx - ror r14,5 xor r13,rbx - xor r15,rdx + ror r14,5 + xor rdi,rdx - ror r13,4 - add r12,r8 + mov QWORD PTR[24+rsp],r12 xor r14,r9 + and rdi,rbx - add r12,QWORD PTR[rdi*8+rbp] - and r15,rbx - mov r8,r10 + ror r13,4 + add r12,r8 + xor rdi,rdx ror r14,6 xor r13,rbx - xor r15,rdx + add r12,rdi - xor r8,r11 + mov rdi,r9 + add r12,QWORD PTR[rbp] xor r14,r9 - add r12,r15 - mov r15,r10 + xor rdi,r10 ror r13,14 - and r8,r9 - and r15,r11 + mov r8,r10 + and r15,rdi ror r14,28 add r12,r13 - add r8,r15 + xor r8,r15 add rax,r12 add r8,r12 - lea rdi,QWORD PTR[1+rdi] - add r8,r14 + lea rbp,QWORD PTR[24+rbp] + add r8,r14 mov r12,QWORD PTR[32+rsi] mov r13,rax mov r14,r8 bswap r12 ror r13,23 mov r15,rbx - mov QWORD PTR[32+rsp],r12 - ror r14,5 xor r13,rax + ror r14,5 xor r15,rcx - ror r13,4 - add r12,rdx + mov QWORD PTR[32+rsp],r12 xor r14,r8 - - add r12,QWORD PTR[rdi*8+rbp] and r15,rax - mov rdx,r9 + + ror r13,4 + add r12,rdx + xor r15,rcx ror r14,6 xor r13,rax - xor r15,rcx + add r12,r15 - xor rdx,r10 + mov r15,r8 + add r12,QWORD PTR[rbp] xor r14,r8 - add r12,r15 - mov r15,r9 + xor r15,r9 ror r13,14 - and rdx,r8 - and r15,r10 + mov rdx,r9 + and rdi,r15 ror r14,28 add r12,r13 - add rdx,r15 + xor rdx,rdi add r11,r12 add rdx,r12 - lea rdi,QWORD PTR[1+rdi] - add rdx,r14 + lea rbp,QWORD PTR[8+rbp] + add rdx,r14 mov r12,QWORD PTR[40+rsi] mov r13,r11 mov r14,rdx bswap r12 ror r13,23 - mov r15,rax - mov QWORD PTR[40+rsp],r12 + mov rdi,rax - ror r14,5 xor r13,r11 - xor r15,rbx + ror r14,5 + xor rdi,rbx - ror r13,4 - add r12,rcx + mov QWORD PTR[40+rsp],r12 xor r14,rdx + and rdi,r11 - add r12,QWORD PTR[rdi*8+rbp] - and r15,r11 - mov rcx,r8 + ror r13,4 + add r12,rcx + xor rdi,rbx ror r14,6 xor r13,r11 - xor r15,rbx + add r12,rdi - xor rcx,r9 + mov rdi,rdx + add r12,QWORD PTR[rbp] xor r14,rdx - add r12,r15 - mov r15,r8 + xor rdi,r8 ror r13,14 - and rcx,rdx - and r15,r9 + mov rcx,r8 + and r15,rdi ror r14,28 add r12,r13 - add rcx,r15 + xor rcx,r15 add r10,r12 add rcx,r12 - lea rdi,QWORD PTR[1+rdi] - add rcx,r14 + lea rbp,QWORD PTR[24+rbp] + add rcx,r14 mov r12,QWORD PTR[48+rsi] mov r13,r10 mov r14,rcx bswap r12 ror r13,23 mov r15,r11 - mov QWORD PTR[48+rsp],r12 - ror r14,5 xor r13,r10 + ror r14,5 xor r15,rax - ror r13,4 - add r12,rbx + mov QWORD PTR[48+rsp],r12 xor r14,rcx - - add r12,QWORD PTR[rdi*8+rbp] and r15,r10 - mov rbx,rdx + + ror r13,4 + add r12,rbx + xor r15,rax ror r14,6 xor r13,r10 - xor r15,rax + add r12,r15 - xor rbx,r8 + mov r15,rcx + add r12,QWORD PTR[rbp] xor r14,rcx - add r12,r15 - mov r15,rdx + xor r15,rdx ror r13,14 - and rbx,rcx - and r15,r8 + mov rbx,rdx + and rdi,r15 ror r14,28 add r12,r13 - add rbx,r15 + xor rbx,rdi add r9,r12 add rbx,r12 - lea rdi,QWORD PTR[1+rdi] - add rbx,r14 + lea rbp,QWORD PTR[8+rbp] + add rbx,r14 mov r12,QWORD PTR[56+rsi] mov r13,r9 mov r14,rbx bswap r12 ror r13,23 - mov r15,r10 - mov QWORD PTR[56+rsp],r12 + mov rdi,r10 - ror r14,5 xor r13,r9 - xor r15,r11 + ror r14,5 + xor rdi,r11 - ror r13,4 - add r12,rax + mov QWORD PTR[56+rsp],r12 xor r14,rbx + and rdi,r9 - add r12,QWORD PTR[rdi*8+rbp] - and r15,r9 - mov rax,rcx + ror r13,4 + add r12,rax + xor rdi,r11 ror r14,6 xor r13,r9 - xor r15,r11 + add r12,rdi - xor rax,rdx + mov rdi,rbx + add r12,QWORD PTR[rbp] xor r14,rbx - add r12,r15 - mov r15,rcx + xor rdi,rcx ror r13,14 - and rax,rbx - and r15,rdx + mov rax,rcx + and r15,rdi ror r14,28 add r12,r13 - add rax,r15 + xor rax,r15 add r8,r12 add rax,r12 - lea rdi,QWORD PTR[1+rdi] - add rax,r14 + lea rbp,QWORD PTR[24+rbp] + add rax,r14 mov r12,QWORD PTR[64+rsi] mov r13,r8 mov r14,rax bswap r12 ror r13,23 mov r15,r9 - mov QWORD PTR[64+rsp],r12 - ror r14,5 xor r13,r8 + ror r14,5 xor r15,r10 - ror r13,4 - add r12,r11 + mov QWORD PTR[64+rsp],r12 xor r14,rax - - add r12,QWORD PTR[rdi*8+rbp] and r15,r8 - mov r11,rbx + + ror r13,4 + add r12,r11 + xor r15,r10 ror r14,6 xor r13,r8 - xor r15,r10 + add r12,r15 - xor r11,rcx + mov r15,rax + add r12,QWORD PTR[rbp] xor r14,rax - add r12,r15 - mov r15,rbx + xor r15,rbx ror r13,14 - and r11,rax - and r15,rcx + mov r11,rbx + and rdi,r15 ror r14,28 add r12,r13 - add r11,r15 + xor r11,rdi add rdx,r12 add r11,r12 - lea rdi,QWORD PTR[1+rdi] - add r11,r14 + lea rbp,QWORD PTR[8+rbp] + add r11,r14 mov r12,QWORD PTR[72+rsi] mov r13,rdx mov r14,r11 bswap r12 ror r13,23 - mov r15,r8 - mov QWORD PTR[72+rsp],r12 + mov rdi,r8 - ror r14,5 xor r13,rdx - xor r15,r9 + ror r14,5 + xor rdi,r9 - ror r13,4 - add r12,r10 + mov QWORD PTR[72+rsp],r12 xor r14,r11 + and rdi,rdx - add r12,QWORD PTR[rdi*8+rbp] - and r15,rdx - mov r10,rax + ror r13,4 + add r12,r10 + xor rdi,r9 ror r14,6 xor r13,rdx - xor r15,r9 + add r12,rdi - xor r10,rbx + mov rdi,r11 + add r12,QWORD PTR[rbp] xor r14,r11 - add r12,r15 - mov r15,rax + xor rdi,rax ror r13,14 - and r10,r11 - and r15,rbx + mov r10,rax + and r15,rdi ror r14,28 add r12,r13 - add r10,r15 + xor r10,r15 add rcx,r12 add r10,r12 - lea rdi,QWORD PTR[1+rdi] - add r10,r14 + lea rbp,QWORD PTR[24+rbp] + add r10,r14 mov r12,QWORD PTR[80+rsi] mov r13,rcx mov r14,r10 bswap r12 ror r13,23 mov r15,rdx - mov QWORD PTR[80+rsp],r12 - ror r14,5 xor r13,rcx + ror r14,5 xor r15,r8 - ror r13,4 - add r12,r9 + mov QWORD PTR[80+rsp],r12 xor r14,r10 - - add r12,QWORD PTR[rdi*8+rbp] and r15,rcx - mov r9,r11 + + ror r13,4 + add r12,r9 + xor r15,r8 ror r14,6 xor r13,rcx - xor r15,r8 + add r12,r15 - xor r9,rax + mov r15,r10 + add r12,QWORD PTR[rbp] xor r14,r10 - add r12,r15 - mov r15,r11 + xor r15,r11 ror r13,14 - and r9,r10 - and r15,rax + mov r9,r11 + and rdi,r15 ror r14,28 add r12,r13 - add r9,r15 + xor r9,rdi add rbx,r12 add r9,r12 - lea rdi,QWORD PTR[1+rdi] - add r9,r14 + lea rbp,QWORD PTR[8+rbp] + add r9,r14 mov r12,QWORD PTR[88+rsi] mov r13,rbx mov r14,r9 bswap r12 ror r13,23 - mov r15,rcx - mov QWORD PTR[88+rsp],r12 + mov rdi,rcx - ror r14,5 xor r13,rbx - xor r15,rdx + ror r14,5 + xor rdi,rdx - ror r13,4 - add r12,r8 + mov QWORD PTR[88+rsp],r12 xor r14,r9 + and rdi,rbx - add r12,QWORD PTR[rdi*8+rbp] - and r15,rbx - mov r8,r10 + ror r13,4 + add r12,r8 + xor rdi,rdx ror r14,6 xor r13,rbx - xor r15,rdx + add r12,rdi - xor r8,r11 + mov rdi,r9 + add r12,QWORD PTR[rbp] xor r14,r9 - add r12,r15 - mov r15,r10 + xor rdi,r10 ror r13,14 - and r8,r9 - and r15,r11 + mov r8,r10 + and r15,rdi ror r14,28 add r12,r13 - add r8,r15 + xor r8,r15 add rax,r12 add r8,r12 - lea rdi,QWORD PTR[1+rdi] - add r8,r14 + lea rbp,QWORD PTR[24+rbp] + add r8,r14 mov r12,QWORD PTR[96+rsi] mov r13,rax mov r14,r8 bswap r12 ror r13,23 mov r15,rbx - mov QWORD PTR[96+rsp],r12 - ror r14,5 xor r13,rax + ror r14,5 xor r15,rcx - ror r13,4 - add r12,rdx + mov QWORD PTR[96+rsp],r12 xor r14,r8 - - add r12,QWORD PTR[rdi*8+rbp] and r15,rax - mov rdx,r9 - ror r14,6 - xor r13,rax - xor r15,rcx + ror r13,4 + add r12,rdx + xor r15,rcx - xor rdx,r10 - xor r14,r8 + ror r14,6 + xor r13,rax add r12,r15 - mov r15,r9 + mov r15,r8 + add r12,QWORD PTR[rbp] + xor r14,r8 + + xor r15,r9 ror r13,14 - and rdx,r8 - and r15,r10 + mov rdx,r9 + and rdi,r15 ror r14,28 add r12,r13 - add rdx,r15 + xor rdx,rdi add r11,r12 add rdx,r12 - lea rdi,QWORD PTR[1+rdi] - add rdx,r14 + lea rbp,QWORD PTR[8+rbp] + add rdx,r14 mov r12,QWORD PTR[104+rsi] mov r13,r11 mov r14,rdx bswap r12 ror r13,23 - mov r15,rax - mov QWORD PTR[104+rsp],r12 + mov rdi,rax - ror r14,5 xor r13,r11 - xor r15,rbx + ror r14,5 + xor rdi,rbx - ror r13,4 - add r12,rcx + mov QWORD PTR[104+rsp],r12 xor r14,rdx + and rdi,r11 - add r12,QWORD PTR[rdi*8+rbp] - and r15,r11 - mov rcx,r8 + ror r13,4 + add r12,rcx + xor rdi,rbx ror r14,6 xor r13,r11 - xor r15,rbx + add r12,rdi - xor rcx,r9 + mov rdi,rdx + add r12,QWORD PTR[rbp] xor r14,rdx - add r12,r15 - mov r15,r8 + xor rdi,r8 ror r13,14 - and rcx,rdx - and r15,r9 + mov rcx,r8 + and r15,rdi ror r14,28 add r12,r13 - add rcx,r15 + xor rcx,r15 add r10,r12 add rcx,r12 - lea rdi,QWORD PTR[1+rdi] - add rcx,r14 + lea rbp,QWORD PTR[24+rbp] + add rcx,r14 mov r12,QWORD PTR[112+rsi] mov r13,r10 mov r14,rcx bswap r12 ror r13,23 mov r15,r11 - mov QWORD PTR[112+rsp],r12 - ror r14,5 xor r13,r10 + ror r14,5 xor r15,rax - ror r13,4 - add r12,rbx + mov QWORD PTR[112+rsp],r12 xor r14,rcx - - add r12,QWORD PTR[rdi*8+rbp] and r15,r10 - mov rbx,rdx + + ror r13,4 + add r12,rbx + xor r15,rax ror r14,6 xor r13,r10 - xor r15,rax + add r12,r15 - xor rbx,r8 + mov r15,rcx + add r12,QWORD PTR[rbp] xor r14,rcx - add r12,r15 - mov r15,rdx + xor r15,rdx ror r13,14 - and rbx,rcx - and r15,r8 + mov rbx,rdx + and rdi,r15 ror r14,28 add r12,r13 - add rbx,r15 + xor rbx,rdi add r9,r12 add rbx,r12 - lea rdi,QWORD PTR[1+rdi] - add rbx,r14 + lea rbp,QWORD PTR[8+rbp] + add rbx,r14 mov r12,QWORD PTR[120+rsi] mov r13,r9 mov r14,rbx bswap r12 ror r13,23 - mov r15,r10 - mov QWORD PTR[120+rsp],r12 + mov rdi,r10 - ror r14,5 xor r13,r9 - xor r15,r11 + ror r14,5 + xor rdi,r11 - ror r13,4 - add r12,rax + mov QWORD PTR[120+rsp],r12 xor r14,rbx + and rdi,r9 - add r12,QWORD PTR[rdi*8+rbp] - and r15,r9 - mov rax,rcx + ror r13,4 + add r12,rax + xor rdi,r11 ror r14,6 xor r13,r9 - xor r15,r11 + add r12,rdi - xor rax,rdx + mov rdi,rbx + add r12,QWORD PTR[rbp] xor r14,rbx - add r12,r15 - mov r15,rcx + xor rdi,rcx ror r13,14 - and rax,rbx - and r15,rdx + mov rax,rcx + and r15,rdi ror r14,28 add r12,r13 - add rax,r15 + xor rax,r15 add r8,r12 add rax,r12 - lea rdi,QWORD PTR[1+rdi] - add rax,r14 + lea rbp,QWORD PTR[24+rbp] jmp $L$rounds_16_xx ALIGN 16 $L$rounds_16_xx:: mov r13,QWORD PTR[8+rsp] - mov r14,QWORD PTR[112+rsp] - mov r12,r13 - mov r15,r14 + mov r15,QWORD PTR[112+rsp] - ror r12,7 - xor r12,r13 - shr r13,7 + mov r12,r13 + ror r13,7 + add rax,r14 + mov r14,r15 + ror r15,42 - ror r12,1 xor r13,r12 - mov r12,QWORD PTR[72+rsp] - - ror r15,42 + shr r12,7 + ror r13,1 xor r15,r14 shr r14,6 ror r15,19 - add r12,r13 - xor r14,r15 + xor r12,r13 + xor r15,r14 + add r12,QWORD PTR[72+rsp] add r12,QWORD PTR[rsp] mov r13,r8 - add r12,r14 + add r12,r15 mov r14,rax ror r13,23 mov r15,r9 - mov QWORD PTR[rsp],r12 - ror r14,5 xor r13,r8 + ror r14,5 xor r15,r10 - ror r13,4 - add r12,r11 + mov QWORD PTR[rsp],r12 xor r14,rax - - add r12,QWORD PTR[rdi*8+rbp] and r15,r8 - mov r11,rbx + + ror r13,4 + add r12,r11 + xor r15,r10 ror r14,6 xor r13,r8 - xor r15,r10 + add r12,r15 - xor r11,rcx + mov r15,rax + add r12,QWORD PTR[rbp] xor r14,rax - add r12,r15 - mov r15,rbx + xor r15,rbx ror r13,14 - and r11,rax - and r15,rcx + mov r11,rbx + and rdi,r15 ror r14,28 add r12,r13 - add r11,r15 + xor r11,rdi add rdx,r12 add r11,r12 - lea rdi,QWORD PTR[1+rdi] - add r11,r14 + lea rbp,QWORD PTR[8+rbp] mov r13,QWORD PTR[16+rsp] - mov r14,QWORD PTR[120+rsp] - mov r12,r13 - mov r15,r14 + mov rdi,QWORD PTR[120+rsp] - ror r12,7 - xor r12,r13 - shr r13,7 + mov r12,r13 + ror r13,7 + add r11,r14 + mov r14,rdi + ror rdi,42 - ror r12,1 xor r13,r12 - mov r12,QWORD PTR[80+rsp] - - ror r15,42 - xor r15,r14 + shr r12,7 + ror r13,1 + xor rdi,r14 shr r14,6 - ror r15,19 - add r12,r13 - xor r14,r15 + ror rdi,19 + xor r12,r13 + xor rdi,r14 + add r12,QWORD PTR[80+rsp] add r12,QWORD PTR[8+rsp] mov r13,rdx - add r12,r14 + add r12,rdi mov r14,r11 ror r13,23 - mov r15,r8 - mov QWORD PTR[8+rsp],r12 + mov rdi,r8 - ror r14,5 xor r13,rdx - xor r15,r9 + ror r14,5 + xor rdi,r9 - ror r13,4 - add r12,r10 + mov QWORD PTR[8+rsp],r12 xor r14,r11 + and rdi,rdx - add r12,QWORD PTR[rdi*8+rbp] - and r15,rdx - mov r10,rax + ror r13,4 + add r12,r10 + xor rdi,r9 ror r14,6 xor r13,rdx - xor r15,r9 + add r12,rdi - xor r10,rbx + mov rdi,r11 + add r12,QWORD PTR[rbp] xor r14,r11 - add r12,r15 - mov r15,rax + xor rdi,rax ror r13,14 - and r10,r11 - and r15,rbx + mov r10,rax + and r15,rdi ror r14,28 add r12,r13 - add r10,r15 + xor r10,r15 add rcx,r12 add r10,r12 - lea rdi,QWORD PTR[1+rdi] - add r10,r14 + lea rbp,QWORD PTR[24+rbp] mov r13,QWORD PTR[24+rsp] - mov r14,QWORD PTR[rsp] - mov r12,r13 - mov r15,r14 + mov r15,QWORD PTR[rsp] - ror r12,7 - xor r12,r13 - shr r13,7 + mov r12,r13 + ror r13,7 + add r10,r14 + mov r14,r15 + ror r15,42 - ror r12,1 xor r13,r12 - mov r12,QWORD PTR[88+rsp] - - ror r15,42 + shr r12,7 + ror r13,1 xor r15,r14 shr r14,6 ror r15,19 - add r12,r13 - xor r14,r15 + xor r12,r13 + xor r15,r14 + add r12,QWORD PTR[88+rsp] add r12,QWORD PTR[16+rsp] mov r13,rcx - add r12,r14 + add r12,r15 mov r14,r10 ror r13,23 mov r15,rdx - mov QWORD PTR[16+rsp],r12 - ror r14,5 xor r13,rcx + ror r14,5 xor r15,r8 - ror r13,4 - add r12,r9 + mov QWORD PTR[16+rsp],r12 xor r14,r10 - - add r12,QWORD PTR[rdi*8+rbp] and r15,rcx - mov r9,r11 + + ror r13,4 + add r12,r9 + xor r15,r8 ror r14,6 xor r13,rcx - xor r15,r8 + add r12,r15 - xor r9,rax + mov r15,r10 + add r12,QWORD PTR[rbp] xor r14,r10 - add r12,r15 - mov r15,r11 + xor r15,r11 ror r13,14 - and r9,r10 - and r15,rax + mov r9,r11 + and rdi,r15 ror r14,28 add r12,r13 - add r9,r15 + xor r9,rdi add rbx,r12 add r9,r12 - lea rdi,QWORD PTR[1+rdi] - add r9,r14 + lea rbp,QWORD PTR[8+rbp] mov r13,QWORD PTR[32+rsp] - mov r14,QWORD PTR[8+rsp] - mov r12,r13 - mov r15,r14 + mov rdi,QWORD PTR[8+rsp] - ror r12,7 - xor r12,r13 - shr r13,7 + mov r12,r13 + ror r13,7 + add r9,r14 + mov r14,rdi + ror rdi,42 - ror r12,1 xor r13,r12 - mov r12,QWORD PTR[96+rsp] - - ror r15,42 - xor r15,r14 + shr r12,7 + ror r13,1 + xor rdi,r14 shr r14,6 - ror r15,19 - add r12,r13 - xor r14,r15 + ror rdi,19 + xor r12,r13 + xor rdi,r14 + add r12,QWORD PTR[96+rsp] add r12,QWORD PTR[24+rsp] mov r13,rbx - add r12,r14 + add r12,rdi mov r14,r9 ror r13,23 - mov r15,rcx - mov QWORD PTR[24+rsp],r12 + mov rdi,rcx - ror r14,5 xor r13,rbx - xor r15,rdx + ror r14,5 + xor rdi,rdx - ror r13,4 - add r12,r8 + mov QWORD PTR[24+rsp],r12 xor r14,r9 + and rdi,rbx - add r12,QWORD PTR[rdi*8+rbp] - and r15,rbx - mov r8,r10 + ror r13,4 + add r12,r8 + xor rdi,rdx ror r14,6 xor r13,rbx - xor r15,rdx + add r12,rdi - xor r8,r11 + mov rdi,r9 + add r12,QWORD PTR[rbp] xor r14,r9 - add r12,r15 - mov r15,r10 + xor rdi,r10 ror r13,14 - and r8,r9 - and r15,r11 + mov r8,r10 + and r15,rdi ror r14,28 add r12,r13 - add r8,r15 + xor r8,r15 add rax,r12 add r8,r12 - lea rdi,QWORD PTR[1+rdi] - add r8,r14 + lea rbp,QWORD PTR[24+rbp] mov r13,QWORD PTR[40+rsp] - mov r14,QWORD PTR[16+rsp] - mov r12,r13 - mov r15,r14 + mov r15,QWORD PTR[16+rsp] - ror r12,7 - xor r12,r13 - shr r13,7 + mov r12,r13 + ror r13,7 + add r8,r14 + mov r14,r15 + ror r15,42 - ror r12,1 xor r13,r12 - mov r12,QWORD PTR[104+rsp] - - ror r15,42 + shr r12,7 + ror r13,1 xor r15,r14 shr r14,6 ror r15,19 - add r12,r13 - xor r14,r15 + xor r12,r13 + xor r15,r14 + add r12,QWORD PTR[104+rsp] add r12,QWORD PTR[32+rsp] mov r13,rax - add r12,r14 + add r12,r15 mov r14,r8 ror r13,23 mov r15,rbx - mov QWORD PTR[32+rsp],r12 - ror r14,5 xor r13,rax + ror r14,5 xor r15,rcx - ror r13,4 - add r12,rdx + mov QWORD PTR[32+rsp],r12 xor r14,r8 - - add r12,QWORD PTR[rdi*8+rbp] and r15,rax - mov rdx,r9 + + ror r13,4 + add r12,rdx + xor r15,rcx ror r14,6 xor r13,rax - xor r15,rcx + add r12,r15 - xor rdx,r10 + mov r15,r8 + add r12,QWORD PTR[rbp] xor r14,r8 - add r12,r15 - mov r15,r9 + xor r15,r9 ror r13,14 - and rdx,r8 - and r15,r10 + mov rdx,r9 + and rdi,r15 ror r14,28 add r12,r13 - add rdx,r15 + xor rdx,rdi add r11,r12 add rdx,r12 - lea rdi,QWORD PTR[1+rdi] - add rdx,r14 + lea rbp,QWORD PTR[8+rbp] mov r13,QWORD PTR[48+rsp] - mov r14,QWORD PTR[24+rsp] - mov r12,r13 - mov r15,r14 + mov rdi,QWORD PTR[24+rsp] - ror r12,7 - xor r12,r13 - shr r13,7 + mov r12,r13 + ror r13,7 + add rdx,r14 + mov r14,rdi + ror rdi,42 - ror r12,1 xor r13,r12 - mov r12,QWORD PTR[112+rsp] - - ror r15,42 - xor r15,r14 + shr r12,7 + ror r13,1 + xor rdi,r14 shr r14,6 - ror r15,19 - add r12,r13 - xor r14,r15 + ror rdi,19 + xor r12,r13 + xor rdi,r14 + add r12,QWORD PTR[112+rsp] add r12,QWORD PTR[40+rsp] mov r13,r11 - add r12,r14 + add r12,rdi mov r14,rdx ror r13,23 - mov r15,rax - mov QWORD PTR[40+rsp],r12 + mov rdi,rax - ror r14,5 xor r13,r11 - xor r15,rbx + ror r14,5 + xor rdi,rbx - ror r13,4 - add r12,rcx + mov QWORD PTR[40+rsp],r12 xor r14,rdx + and rdi,r11 - add r12,QWORD PTR[rdi*8+rbp] - and r15,r11 - mov rcx,r8 + ror r13,4 + add r12,rcx + xor rdi,rbx ror r14,6 xor r13,r11 - xor r15,rbx + add r12,rdi - xor rcx,r9 + mov rdi,rdx + add r12,QWORD PTR[rbp] xor r14,rdx - add r12,r15 - mov r15,r8 + xor rdi,r8 ror r13,14 - and rcx,rdx - and r15,r9 + mov rcx,r8 + and r15,rdi ror r14,28 add r12,r13 - add rcx,r15 + xor rcx,r15 add r10,r12 add rcx,r12 - lea rdi,QWORD PTR[1+rdi] - add rcx,r14 + lea rbp,QWORD PTR[24+rbp] mov r13,QWORD PTR[56+rsp] - mov r14,QWORD PTR[32+rsp] - mov r12,r13 - mov r15,r14 + mov r15,QWORD PTR[32+rsp] - ror r12,7 - xor r12,r13 - shr r13,7 + mov r12,r13 + ror r13,7 + add rcx,r14 + mov r14,r15 + ror r15,42 - ror r12,1 xor r13,r12 - mov r12,QWORD PTR[120+rsp] - - ror r15,42 + shr r12,7 + ror r13,1 xor r15,r14 shr r14,6 ror r15,19 - add r12,r13 - xor r14,r15 + xor r12,r13 + xor r15,r14 + add r12,QWORD PTR[120+rsp] add r12,QWORD PTR[48+rsp] mov r13,r10 - add r12,r14 + add r12,r15 mov r14,rcx ror r13,23 mov r15,r11 - mov QWORD PTR[48+rsp],r12 - ror r14,5 xor r13,r10 + ror r14,5 xor r15,rax - ror r13,4 - add r12,rbx + mov QWORD PTR[48+rsp],r12 xor r14,rcx - - add r12,QWORD PTR[rdi*8+rbp] and r15,r10 - mov rbx,rdx + + ror r13,4 + add r12,rbx + xor r15,rax ror r14,6 xor r13,r10 - xor r15,rax + add r12,r15 - xor rbx,r8 + mov r15,rcx + add r12,QWORD PTR[rbp] xor r14,rcx - add r12,r15 - mov r15,rdx + xor r15,rdx ror r13,14 - and rbx,rcx - and r15,r8 + mov rbx,rdx + and rdi,r15 ror r14,28 add r12,r13 - add rbx,r15 + xor rbx,rdi add r9,r12 add rbx,r12 - lea rdi,QWORD PTR[1+rdi] - add rbx,r14 + lea rbp,QWORD PTR[8+rbp] mov r13,QWORD PTR[64+rsp] - mov r14,QWORD PTR[40+rsp] - mov r12,r13 - mov r15,r14 + mov rdi,QWORD PTR[40+rsp] - ror r12,7 - xor r12,r13 - shr r13,7 + mov r12,r13 + ror r13,7 + add rbx,r14 + mov r14,rdi + ror rdi,42 - ror r12,1 xor r13,r12 - mov r12,QWORD PTR[rsp] - - ror r15,42 - xor r15,r14 + shr r12,7 + ror r13,1 + xor rdi,r14 shr r14,6 - ror r15,19 - add r12,r13 - xor r14,r15 + ror rdi,19 + xor r12,r13 + xor rdi,r14 + add r12,QWORD PTR[rsp] add r12,QWORD PTR[56+rsp] mov r13,r9 - add r12,r14 + add r12,rdi mov r14,rbx ror r13,23 - mov r15,r10 - mov QWORD PTR[56+rsp],r12 + mov rdi,r10 - ror r14,5 xor r13,r9 - xor r15,r11 + ror r14,5 + xor rdi,r11 - ror r13,4 - add r12,rax + mov QWORD PTR[56+rsp],r12 xor r14,rbx + and rdi,r9 - add r12,QWORD PTR[rdi*8+rbp] - and r15,r9 - mov rax,rcx + ror r13,4 + add r12,rax + xor rdi,r11 ror r14,6 xor r13,r9 - xor r15,r11 + add r12,rdi - xor rax,rdx + mov rdi,rbx + add r12,QWORD PTR[rbp] xor r14,rbx - add r12,r15 - mov r15,rcx + xor rdi,rcx ror r13,14 - and rax,rbx - and r15,rdx + mov rax,rcx + and r15,rdi ror r14,28 add r12,r13 - add rax,r15 + xor rax,r15 add r8,r12 add rax,r12 - lea rdi,QWORD PTR[1+rdi] - add rax,r14 + lea rbp,QWORD PTR[24+rbp] mov r13,QWORD PTR[72+rsp] - mov r14,QWORD PTR[48+rsp] - mov r12,r13 - mov r15,r14 + mov r15,QWORD PTR[48+rsp] - ror r12,7 - xor r12,r13 - shr r13,7 + mov r12,r13 + ror r13,7 + add rax,r14 + mov r14,r15 + ror r15,42 - ror r12,1 xor r13,r12 - mov r12,QWORD PTR[8+rsp] - - ror r15,42 + shr r12,7 + ror r13,1 xor r15,r14 shr r14,6 ror r15,19 - add r12,r13 - xor r14,r15 + xor r12,r13 + xor r15,r14 + add r12,QWORD PTR[8+rsp] add r12,QWORD PTR[64+rsp] mov r13,r8 - add r12,r14 + add r12,r15 mov r14,rax ror r13,23 mov r15,r9 - mov QWORD PTR[64+rsp],r12 - ror r14,5 xor r13,r8 + ror r14,5 xor r15,r10 - ror r13,4 - add r12,r11 + mov QWORD PTR[64+rsp],r12 xor r14,rax - - add r12,QWORD PTR[rdi*8+rbp] and r15,r8 - mov r11,rbx + + ror r13,4 + add r12,r11 + xor r15,r10 ror r14,6 xor r13,r8 - xor r15,r10 + add r12,r15 - xor r11,rcx + mov r15,rax + add r12,QWORD PTR[rbp] xor r14,rax - add r12,r15 - mov r15,rbx + xor r15,rbx ror r13,14 - and r11,rax - and r15,rcx + mov r11,rbx + and rdi,r15 ror r14,28 add r12,r13 - add r11,r15 + xor r11,rdi add rdx,r12 add r11,r12 - lea rdi,QWORD PTR[1+rdi] - add r11,r14 + lea rbp,QWORD PTR[8+rbp] mov r13,QWORD PTR[80+rsp] - mov r14,QWORD PTR[56+rsp] - mov r12,r13 - mov r15,r14 + mov rdi,QWORD PTR[56+rsp] - ror r12,7 - xor r12,r13 - shr r13,7 + mov r12,r13 + ror r13,7 + add r11,r14 + mov r14,rdi + ror rdi,42 - ror r12,1 xor r13,r12 - mov r12,QWORD PTR[16+rsp] - - ror r15,42 - xor r15,r14 + shr r12,7 + ror r13,1 + xor rdi,r14 shr r14,6 - ror r15,19 - add r12,r13 - xor r14,r15 + ror rdi,19 + xor r12,r13 + xor rdi,r14 + add r12,QWORD PTR[16+rsp] add r12,QWORD PTR[72+rsp] mov r13,rdx - add r12,r14 + add r12,rdi mov r14,r11 ror r13,23 - mov r15,r8 - mov QWORD PTR[72+rsp],r12 + mov rdi,r8 - ror r14,5 xor r13,rdx - xor r15,r9 + ror r14,5 + xor rdi,r9 - ror r13,4 - add r12,r10 + mov QWORD PTR[72+rsp],r12 xor r14,r11 + and rdi,rdx - add r12,QWORD PTR[rdi*8+rbp] - and r15,rdx - mov r10,rax + ror r13,4 + add r12,r10 + xor rdi,r9 ror r14,6 xor r13,rdx - xor r15,r9 + add r12,rdi - xor r10,rbx + mov rdi,r11 + add r12,QWORD PTR[rbp] xor r14,r11 - add r12,r15 - mov r15,rax + xor rdi,rax ror r13,14 - and r10,r11 - and r15,rbx + mov r10,rax + and r15,rdi ror r14,28 add r12,r13 - add r10,r15 + xor r10,r15 add rcx,r12 add r10,r12 - lea rdi,QWORD PTR[1+rdi] - add r10,r14 + lea rbp,QWORD PTR[24+rbp] mov r13,QWORD PTR[88+rsp] - mov r14,QWORD PTR[64+rsp] - mov r12,r13 - mov r15,r14 + mov r15,QWORD PTR[64+rsp] - ror r12,7 - xor r12,r13 - shr r13,7 + mov r12,r13 + ror r13,7 + add r10,r14 + mov r14,r15 + ror r15,42 - ror r12,1 xor r13,r12 - mov r12,QWORD PTR[24+rsp] - - ror r15,42 + shr r12,7 + ror r13,1 xor r15,r14 shr r14,6 ror r15,19 - add r12,r13 - xor r14,r15 + xor r12,r13 + xor r15,r14 + add r12,QWORD PTR[24+rsp] add r12,QWORD PTR[80+rsp] mov r13,rcx - add r12,r14 + add r12,r15 mov r14,r10 ror r13,23 mov r15,rdx - mov QWORD PTR[80+rsp],r12 - ror r14,5 xor r13,rcx + ror r14,5 xor r15,r8 - ror r13,4 - add r12,r9 + mov QWORD PTR[80+rsp],r12 xor r14,r10 - - add r12,QWORD PTR[rdi*8+rbp] and r15,rcx - mov r9,r11 + + ror r13,4 + add r12,r9 + xor r15,r8 ror r14,6 xor r13,rcx - xor r15,r8 + add r12,r15 - xor r9,rax + mov r15,r10 + add r12,QWORD PTR[rbp] xor r14,r10 - add r12,r15 - mov r15,r11 + xor r15,r11 ror r13,14 - and r9,r10 - and r15,rax + mov r9,r11 + and rdi,r15 ror r14,28 add r12,r13 - add r9,r15 + xor r9,rdi add rbx,r12 add r9,r12 - lea rdi,QWORD PTR[1+rdi] - add r9,r14 + lea rbp,QWORD PTR[8+rbp] mov r13,QWORD PTR[96+rsp] - mov r14,QWORD PTR[72+rsp] - mov r12,r13 - mov r15,r14 + mov rdi,QWORD PTR[72+rsp] - ror r12,7 - xor r12,r13 - shr r13,7 + mov r12,r13 + ror r13,7 + add r9,r14 + mov r14,rdi + ror rdi,42 - ror r12,1 xor r13,r12 - mov r12,QWORD PTR[32+rsp] - - ror r15,42 - xor r15,r14 + shr r12,7 + ror r13,1 + xor rdi,r14 shr r14,6 - ror r15,19 - add r12,r13 - xor r14,r15 + ror rdi,19 + xor r12,r13 + xor rdi,r14 + add r12,QWORD PTR[32+rsp] add r12,QWORD PTR[88+rsp] mov r13,rbx - add r12,r14 + add r12,rdi mov r14,r9 ror r13,23 - mov r15,rcx - mov QWORD PTR[88+rsp],r12 + mov rdi,rcx - ror r14,5 xor r13,rbx - xor r15,rdx + ror r14,5 + xor rdi,rdx - ror r13,4 - add r12,r8 + mov QWORD PTR[88+rsp],r12 xor r14,r9 + and rdi,rbx - add r12,QWORD PTR[rdi*8+rbp] - and r15,rbx - mov r8,r10 + ror r13,4 + add r12,r8 + xor rdi,rdx ror r14,6 xor r13,rbx - xor r15,rdx + add r12,rdi - xor r8,r11 + mov rdi,r9 + add r12,QWORD PTR[rbp] xor r14,r9 - add r12,r15 - mov r15,r10 + xor rdi,r10 ror r13,14 - and r8,r9 - and r15,r11 + mov r8,r10 + and r15,rdi ror r14,28 add r12,r13 - add r8,r15 + xor r8,r15 add rax,r12 add r8,r12 - lea rdi,QWORD PTR[1+rdi] - add r8,r14 + lea rbp,QWORD PTR[24+rbp] mov r13,QWORD PTR[104+rsp] - mov r14,QWORD PTR[80+rsp] - mov r12,r13 - mov r15,r14 + mov r15,QWORD PTR[80+rsp] - ror r12,7 - xor r12,r13 - shr r13,7 + mov r12,r13 + ror r13,7 + add r8,r14 + mov r14,r15 + ror r15,42 - ror r12,1 xor r13,r12 - mov r12,QWORD PTR[40+rsp] - - ror r15,42 + shr r12,7 + ror r13,1 xor r15,r14 shr r14,6 ror r15,19 - add r12,r13 - xor r14,r15 + xor r12,r13 + xor r15,r14 + add r12,QWORD PTR[40+rsp] add r12,QWORD PTR[96+rsp] mov r13,rax - add r12,r14 + add r12,r15 mov r14,r8 ror r13,23 mov r15,rbx - mov QWORD PTR[96+rsp],r12 - ror r14,5 xor r13,rax + ror r14,5 xor r15,rcx - ror r13,4 - add r12,rdx + mov QWORD PTR[96+rsp],r12 xor r14,r8 - - add r12,QWORD PTR[rdi*8+rbp] and r15,rax - mov rdx,r9 + + ror r13,4 + add r12,rdx + xor r15,rcx ror r14,6 xor r13,rax - xor r15,rcx + add r12,r15 - xor rdx,r10 + mov r15,r8 + add r12,QWORD PTR[rbp] xor r14,r8 - add r12,r15 - mov r15,r9 + xor r15,r9 ror r13,14 - and rdx,r8 - and r15,r10 + mov rdx,r9 + and rdi,r15 ror r14,28 add r12,r13 - add rdx,r15 + xor rdx,rdi add r11,r12 add rdx,r12 - lea rdi,QWORD PTR[1+rdi] - add rdx,r14 + lea rbp,QWORD PTR[8+rbp] mov r13,QWORD PTR[112+rsp] - mov r14,QWORD PTR[88+rsp] - mov r12,r13 - mov r15,r14 + mov rdi,QWORD PTR[88+rsp] - ror r12,7 - xor r12,r13 - shr r13,7 + mov r12,r13 + ror r13,7 + add rdx,r14 + mov r14,rdi + ror rdi,42 - ror r12,1 xor r13,r12 - mov r12,QWORD PTR[48+rsp] - - ror r15,42 - xor r15,r14 + shr r12,7 + ror r13,1 + xor rdi,r14 shr r14,6 - ror r15,19 - add r12,r13 - xor r14,r15 + ror rdi,19 + xor r12,r13 + xor rdi,r14 + add r12,QWORD PTR[48+rsp] add r12,QWORD PTR[104+rsp] mov r13,r11 - add r12,r14 + add r12,rdi mov r14,rdx ror r13,23 - mov r15,rax - mov QWORD PTR[104+rsp],r12 + mov rdi,rax - ror r14,5 xor r13,r11 - xor r15,rbx + ror r14,5 + xor rdi,rbx - ror r13,4 - add r12,rcx + mov QWORD PTR[104+rsp],r12 xor r14,rdx + and rdi,r11 - add r12,QWORD PTR[rdi*8+rbp] - and r15,r11 - mov rcx,r8 + ror r13,4 + add r12,rcx + xor rdi,rbx ror r14,6 xor r13,r11 - xor r15,rbx + add r12,rdi - xor rcx,r9 + mov rdi,rdx + add r12,QWORD PTR[rbp] xor r14,rdx - add r12,r15 - mov r15,r8 + xor rdi,r8 ror r13,14 - and rcx,rdx - and r15,r9 + mov rcx,r8 + and r15,rdi ror r14,28 add r12,r13 - add rcx,r15 + xor rcx,r15 add r10,r12 add rcx,r12 - lea rdi,QWORD PTR[1+rdi] - add rcx,r14 + lea rbp,QWORD PTR[24+rbp] mov r13,QWORD PTR[120+rsp] - mov r14,QWORD PTR[96+rsp] - mov r12,r13 - mov r15,r14 + mov r15,QWORD PTR[96+rsp] - ror r12,7 - xor r12,r13 - shr r13,7 + mov r12,r13 + ror r13,7 + add rcx,r14 + mov r14,r15 + ror r15,42 - ror r12,1 xor r13,r12 - mov r12,QWORD PTR[56+rsp] - - ror r15,42 + shr r12,7 + ror r13,1 xor r15,r14 shr r14,6 ror r15,19 - add r12,r13 - xor r14,r15 + xor r12,r13 + xor r15,r14 + add r12,QWORD PTR[56+rsp] add r12,QWORD PTR[112+rsp] mov r13,r10 - add r12,r14 + add r12,r15 mov r14,rcx ror r13,23 mov r15,r11 - mov QWORD PTR[112+rsp],r12 - ror r14,5 xor r13,r10 + ror r14,5 xor r15,rax - ror r13,4 - add r12,rbx + mov QWORD PTR[112+rsp],r12 xor r14,rcx - - add r12,QWORD PTR[rdi*8+rbp] and r15,r10 - mov rbx,rdx + + ror r13,4 + add r12,rbx + xor r15,rax ror r14,6 xor r13,r10 - xor r15,rax + add r12,r15 - xor rbx,r8 + mov r15,rcx + add r12,QWORD PTR[rbp] xor r14,rcx - add r12,r15 - mov r15,rdx + xor r15,rdx ror r13,14 - and rbx,rcx - and r15,r8 + mov rbx,rdx + and rdi,r15 ror r14,28 add r12,r13 - add rbx,r15 + xor rbx,rdi add r9,r12 add rbx,r12 - lea rdi,QWORD PTR[1+rdi] - add rbx,r14 + lea rbp,QWORD PTR[8+rbp] mov r13,QWORD PTR[rsp] - mov r14,QWORD PTR[104+rsp] - mov r12,r13 - mov r15,r14 + mov rdi,QWORD PTR[104+rsp] - ror r12,7 - xor r12,r13 - shr r13,7 + mov r12,r13 + ror r13,7 + add rbx,r14 + mov r14,rdi + ror rdi,42 - ror r12,1 xor r13,r12 - mov r12,QWORD PTR[64+rsp] - - ror r15,42 - xor r15,r14 + shr r12,7 + ror r13,1 + xor rdi,r14 shr r14,6 - ror r15,19 - add r12,r13 - xor r14,r15 + ror rdi,19 + xor r12,r13 + xor rdi,r14 + add r12,QWORD PTR[64+rsp] add r12,QWORD PTR[120+rsp] mov r13,r9 - add r12,r14 + add r12,rdi mov r14,rbx ror r13,23 - mov r15,r10 - mov QWORD PTR[120+rsp],r12 + mov rdi,r10 - ror r14,5 xor r13,r9 - xor r15,r11 + ror r14,5 + xor rdi,r11 - ror r13,4 - add r12,rax + mov QWORD PTR[120+rsp],r12 xor r14,rbx + and rdi,r9 - add r12,QWORD PTR[rdi*8+rbp] - and r15,r9 - mov rax,rcx + ror r13,4 + add r12,rax + xor rdi,r11 ror r14,6 xor r13,r9 - xor r15,r11 + add r12,rdi - xor rax,rdx + mov rdi,rbx + add r12,QWORD PTR[rbp] xor r14,rbx - add r12,r15 - mov r15,rcx + xor rdi,rcx ror r13,14 - and rax,rbx - and r15,rdx + mov rax,rcx + and r15,rdi ror r14,28 add r12,r13 - add rax,r15 + xor rax,r15 add r8,r12 add rax,r12 - lea rdi,QWORD PTR[1+rdi] - add rax,r14 - cmp rdi,80 - jb $L$rounds_16_xx + lea rbp,QWORD PTR[24+rbp] + cmp BYTE PTR[7+rbp],0 + jnz $L$rounds_16_xx mov rdi,QWORD PTR[((128+0))+rsp] + add rax,r14 lea rsi,QWORD PTR[128+rsi] add rax,QWORD PTR[rdi] @@ -1774,144 +1724,3872 @@ sha512_block_data_order ENDP ALIGN 64 K512:: + DQ 0428a2f98d728ae22h,07137449123ef65cdh DQ 0428a2f98d728ae22h,07137449123ef65cdh DQ 0b5c0fbcfec4d3b2fh,0e9b5dba58189dbbch + DQ 0b5c0fbcfec4d3b2fh,0e9b5dba58189dbbch + DQ 03956c25bf348b538h,059f111f1b605d019h DQ 03956c25bf348b538h,059f111f1b605d019h DQ 0923f82a4af194f9bh,0ab1c5ed5da6d8118h + DQ 0923f82a4af194f9bh,0ab1c5ed5da6d8118h + DQ 0d807aa98a3030242h,012835b0145706fbeh DQ 0d807aa98a3030242h,012835b0145706fbeh DQ 0243185be4ee4b28ch,0550c7dc3d5ffb4e2h + DQ 0243185be4ee4b28ch,0550c7dc3d5ffb4e2h + DQ 072be5d74f27b896fh,080deb1fe3b1696b1h DQ 072be5d74f27b896fh,080deb1fe3b1696b1h DQ 09bdc06a725c71235h,0c19bf174cf692694h + DQ 09bdc06a725c71235h,0c19bf174cf692694h + DQ 0e49b69c19ef14ad2h,0efbe4786384f25e3h DQ 0e49b69c19ef14ad2h,0efbe4786384f25e3h DQ 00fc19dc68b8cd5b5h,0240ca1cc77ac9c65h + DQ 00fc19dc68b8cd5b5h,0240ca1cc77ac9c65h + DQ 02de92c6f592b0275h,04a7484aa6ea6e483h DQ 02de92c6f592b0275h,04a7484aa6ea6e483h DQ 05cb0a9dcbd41fbd4h,076f988da831153b5h + DQ 05cb0a9dcbd41fbd4h,076f988da831153b5h + DQ 0983e5152ee66dfabh,0a831c66d2db43210h DQ 0983e5152ee66dfabh,0a831c66d2db43210h DQ 0b00327c898fb213fh,0bf597fc7beef0ee4h + DQ 0b00327c898fb213fh,0bf597fc7beef0ee4h + DQ 0c6e00bf33da88fc2h,0d5a79147930aa725h DQ 0c6e00bf33da88fc2h,0d5a79147930aa725h DQ 006ca6351e003826fh,0142929670a0e6e70h + DQ 006ca6351e003826fh,0142929670a0e6e70h + DQ 027b70a8546d22ffch,02e1b21385c26c926h DQ 027b70a8546d22ffch,02e1b21385c26c926h DQ 04d2c6dfc5ac42aedh,053380d139d95b3dfh + DQ 04d2c6dfc5ac42aedh,053380d139d95b3dfh + DQ 0650a73548baf63deh,0766a0abb3c77b2a8h DQ 0650a73548baf63deh,0766a0abb3c77b2a8h DQ 081c2c92e47edaee6h,092722c851482353bh + DQ 081c2c92e47edaee6h,092722c851482353bh + DQ 0a2bfe8a14cf10364h,0a81a664bbc423001h DQ 0a2bfe8a14cf10364h,0a81a664bbc423001h DQ 0c24b8b70d0f89791h,0c76c51a30654be30h + DQ 0c24b8b70d0f89791h,0c76c51a30654be30h + DQ 0d192e819d6ef5218h,0d69906245565a910h DQ 0d192e819d6ef5218h,0d69906245565a910h DQ 0f40e35855771202ah,0106aa07032bbd1b8h + DQ 0f40e35855771202ah,0106aa07032bbd1b8h + DQ 019a4c116b8d2d0c8h,01e376c085141ab53h DQ 019a4c116b8d2d0c8h,01e376c085141ab53h DQ 02748774cdf8eeb99h,034b0bcb5e19b48a8h + DQ 02748774cdf8eeb99h,034b0bcb5e19b48a8h + DQ 0391c0cb3c5c95a63h,04ed8aa4ae3418acbh DQ 0391c0cb3c5c95a63h,04ed8aa4ae3418acbh DQ 05b9cca4f7763e373h,0682e6ff3d6b2b8a3h + DQ 05b9cca4f7763e373h,0682e6ff3d6b2b8a3h + DQ 0748f82ee5defb2fch,078a5636f43172f60h DQ 0748f82ee5defb2fch,078a5636f43172f60h DQ 084c87814a1f0ab72h,08cc702081a6439ech + DQ 084c87814a1f0ab72h,08cc702081a6439ech + DQ 090befffa23631e28h,0a4506cebde82bde9h DQ 090befffa23631e28h,0a4506cebde82bde9h DQ 0bef9a3f7b2c67915h,0c67178f2e372532bh + DQ 0bef9a3f7b2c67915h,0c67178f2e372532bh + DQ 0ca273eceea26619ch,0d186b8c721c0c207h DQ 0ca273eceea26619ch,0d186b8c721c0c207h DQ 0eada7dd6cde0eb1eh,0f57d4f7fee6ed178h + DQ 0eada7dd6cde0eb1eh,0f57d4f7fee6ed178h + DQ 006f067aa72176fbah,00a637dc5a2c898a6h DQ 006f067aa72176fbah,00a637dc5a2c898a6h DQ 0113f9804bef90daeh,01b710b35131c471bh + DQ 0113f9804bef90daeh,01b710b35131c471bh + DQ 028db77f523047d84h,032caab7b40c72493h DQ 028db77f523047d84h,032caab7b40c72493h DQ 03c9ebe0a15c9bebch,0431d67c49c100d4ch + DQ 03c9ebe0a15c9bebch,0431d67c49c100d4ch + DQ 04cc5d4becb3e42b6h,0597f299cfc657e2ah DQ 04cc5d4becb3e42b6h,0597f299cfc657e2ah DQ 05fcb6fab3ad6faech,06c44198c4a475817h -EXTERN __imp_RtlVirtualUnwind:NEAR + DQ 05fcb6fab3ad6faech,06c44198c4a475817h -ALIGN 16 -se_handler PROC PRIVATE - push rsi - push rdi + DQ 00001020304050607h,008090a0b0c0d0e0fh + DQ 00001020304050607h,008090a0b0c0d0e0fh +DB 83,72,65,53,49,50,32,98,108,111,99,107,32,116,114,97 +DB 110,115,102,111,114,109,32,102,111,114,32,120,56,54,95,54 +DB 52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121 +DB 32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46 +DB 111,114,103,62,0 + +ALIGN 64 +sha512_block_data_order_xop PROC PRIVATE + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_sha512_block_data_order_xop:: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + + +$L$xop_shortcut:: push rbx push rbp push r12 push r13 push r14 push r15 - pushfq - sub rsp,64 - - mov rax,QWORD PTR[120+r8] - mov rbx,QWORD PTR[248+r8] - - lea r10,QWORD PTR[$L$prologue] - cmp rbx,r10 - jb $L$in_prologue - - mov rax,QWORD PTR[152+r8] - - lea r10,QWORD PTR[$L$epilogue] - cmp rbx,r10 - jae $L$in_prologue - - mov rax,QWORD PTR[((128+24))+rax] - lea rax,QWORD PTR[48+rax] - - mov rbx,QWORD PTR[((-8))+rax] - mov rbp,QWORD PTR[((-16))+rax] - mov r12,QWORD PTR[((-24))+rax] - mov r13,QWORD PTR[((-32))+rax] - mov r14,QWORD PTR[((-40))+rax] - mov r15,QWORD PTR[((-48))+rax] - mov QWORD PTR[144+r8],rbx - mov QWORD PTR[160+r8],rbp - mov QWORD PTR[216+r8],r12 - mov QWORD PTR[224+r8],r13 - mov QWORD PTR[232+r8],r14 - mov QWORD PTR[240+r8],r15 - -$L$in_prologue:: - mov rdi,QWORD PTR[8+rax] - mov rsi,QWORD PTR[16+rax] - mov QWORD PTR[152+r8],rax - mov QWORD PTR[168+r8],rsi - mov QWORD PTR[176+r8],rdi - - mov rdi,QWORD PTR[40+r9] - mov rsi,r8 - mov ecx,154 - DD 0a548f3fch - - - mov rsi,r9 - xor rcx,rcx - mov rdx,QWORD PTR[8+rsi] - mov r8,QWORD PTR[rsi] - mov r9,QWORD PTR[16+rsi] - mov r10,QWORD PTR[40+rsi] - lea r11,QWORD PTR[56+rsi] - lea r12,QWORD PTR[24+rsi] - mov QWORD PTR[32+rsp],r10 - mov QWORD PTR[40+rsp],r11 - mov QWORD PTR[48+rsp],r12 - mov QWORD PTR[56+rsp],rcx - call QWORD PTR[__imp_RtlVirtualUnwind] - - mov eax,1 - add rsp,64 - popfq - pop r15 - pop r14 - pop r13 - pop r12 - pop rbp - pop rbx - pop rdi - pop rsi - DB 0F3h,0C3h ;repret -se_handler ENDP - -.text$ ENDS -.pdata SEGMENT READONLY ALIGN(4) -ALIGN 4 - DD imagerel $L$SEH_begin_sha512_block_data_order - DD imagerel $L$SEH_end_sha512_block_data_order - DD imagerel $L$SEH_info_sha512_block_data_order + mov r11,rsp + shl rdx,4 + sub rsp,256 + lea rdx,QWORD PTR[rdx*8+rsi] + and rsp,-64 + mov QWORD PTR[((128+0))+rsp],rdi + mov QWORD PTR[((128+8))+rsp],rsi + mov QWORD PTR[((128+16))+rsp],rdx + mov QWORD PTR[((128+24))+rsp],r11 + movaps XMMWORD PTR[(128+32)+rsp],xmm6 + movaps XMMWORD PTR[(128+48)+rsp],xmm7 + movaps XMMWORD PTR[(128+64)+rsp],xmm8 + movaps XMMWORD PTR[(128+80)+rsp],xmm9 + movaps XMMWORD PTR[(128+96)+rsp],xmm10 + movaps XMMWORD PTR[(128+112)+rsp],xmm11 +$L$prologue_xop:: + + vzeroupper + mov rax,QWORD PTR[rdi] + mov rbx,QWORD PTR[8+rdi] + mov rcx,QWORD PTR[16+rdi] + mov rdx,QWORD PTR[24+rdi] + mov r8,QWORD PTR[32+rdi] + mov r9,QWORD PTR[40+rdi] + mov r10,QWORD PTR[48+rdi] + mov r11,QWORD PTR[56+rdi] + jmp $L$loop_xop +ALIGN 16 +$L$loop_xop:: + vmovdqa xmm11,XMMWORD PTR[((K512+1280))] + vmovdqu xmm0,XMMWORD PTR[rsi] + lea rbp,QWORD PTR[((K512+128))] + vmovdqu xmm1,XMMWORD PTR[16+rsi] + vmovdqu xmm2,XMMWORD PTR[32+rsi] + vpshufb xmm0,xmm0,xmm11 + vmovdqu xmm3,XMMWORD PTR[48+rsi] + vpshufb xmm1,xmm1,xmm11 + vmovdqu xmm4,XMMWORD PTR[64+rsi] + vpshufb xmm2,xmm2,xmm11 + vmovdqu xmm5,XMMWORD PTR[80+rsi] + vpshufb xmm3,xmm3,xmm11 + vmovdqu xmm6,XMMWORD PTR[96+rsi] + vpshufb xmm4,xmm4,xmm11 + vmovdqu xmm7,XMMWORD PTR[112+rsi] + vpshufb xmm5,xmm5,xmm11 + vpaddq xmm8,xmm0,XMMWORD PTR[((-128))+rbp] + vpshufb xmm6,xmm6,xmm11 + vpaddq xmm9,xmm1,XMMWORD PTR[((-96))+rbp] + vpshufb xmm7,xmm7,xmm11 + vpaddq xmm10,xmm2,XMMWORD PTR[((-64))+rbp] + vpaddq xmm11,xmm3,XMMWORD PTR[((-32))+rbp] + vmovdqa XMMWORD PTR[rsp],xmm8 + vpaddq xmm8,xmm4,XMMWORD PTR[rbp] + vmovdqa XMMWORD PTR[16+rsp],xmm9 + vpaddq xmm9,xmm5,XMMWORD PTR[32+rbp] + vmovdqa XMMWORD PTR[32+rsp],xmm10 + vpaddq xmm10,xmm6,XMMWORD PTR[64+rbp] + vmovdqa XMMWORD PTR[48+rsp],xmm11 + vpaddq xmm11,xmm7,XMMWORD PTR[96+rbp] + vmovdqa XMMWORD PTR[64+rsp],xmm8 + mov r14,rax + vmovdqa XMMWORD PTR[80+rsp],xmm9 + mov rdi,rbx + vmovdqa XMMWORD PTR[96+rsp],xmm10 + xor rdi,rcx + vmovdqa XMMWORD PTR[112+rsp],xmm11 + mov r13,r8 + jmp $L$xop_00_47 -.pdata ENDS -.xdata SEGMENT READONLY ALIGN(8) -ALIGN 8 -$L$SEH_info_sha512_block_data_order:: +ALIGN 16 +$L$xop_00_47:: + add rbp,256 + vpalignr xmm8,xmm1,xmm0,8 + ror r13,23 + mov rax,r14 + vpalignr xmm11,xmm5,xmm4,8 + mov r12,r9 + ror r14,5 +DB 143,72,120,195,200,56 + xor r13,r8 + xor r12,r10 + vpsrlq xmm8,xmm8,7 + ror r13,4 + xor r14,rax + vpaddq xmm0,xmm0,xmm11 + and r12,r8 + xor r13,r8 + add r11,QWORD PTR[rsp] + mov r15,rax +DB 143,72,120,195,209,7 + xor r12,r10 + ror r14,6 + vpxor xmm8,xmm8,xmm9 + xor r15,rbx + add r11,r12 + ror r13,14 + and rdi,r15 +DB 143,104,120,195,223,3 + xor r14,rax + add r11,r13 + vpxor xmm8,xmm8,xmm10 + xor rdi,rbx + ror r14,28 + vpsrlq xmm10,xmm7,6 + add rdx,r11 + add r11,rdi + vpaddq xmm0,xmm0,xmm8 + mov r13,rdx + add r14,r11 +DB 143,72,120,195,203,42 + ror r13,23 + mov r11,r14 + vpxor xmm11,xmm11,xmm10 + mov r12,r8 + ror r14,5 + xor r13,rdx + xor r12,r9 + vpxor xmm11,xmm11,xmm9 + ror r13,4 + xor r14,r11 + and r12,rdx + xor r13,rdx + vpaddq xmm0,xmm0,xmm11 + add r10,QWORD PTR[8+rsp] + mov rdi,r11 + xor r12,r9 + ror r14,6 + vpaddq xmm10,xmm0,XMMWORD PTR[((-128))+rbp] + xor rdi,rax + add r10,r12 + ror r13,14 + and r15,rdi + xor r14,r11 + add r10,r13 + xor r15,rax + ror r14,28 + add rcx,r10 + add r10,r15 + mov r13,rcx + add r14,r10 + vmovdqa XMMWORD PTR[rsp],xmm10 + vpalignr xmm8,xmm2,xmm1,8 + ror r13,23 + mov r10,r14 + vpalignr xmm11,xmm6,xmm5,8 + mov r12,rdx + ror r14,5 +DB 143,72,120,195,200,56 + xor r13,rcx + xor r12,r8 + vpsrlq xmm8,xmm8,7 + ror r13,4 + xor r14,r10 + vpaddq xmm1,xmm1,xmm11 + and r12,rcx + xor r13,rcx + add r9,QWORD PTR[16+rsp] + mov r15,r10 +DB 143,72,120,195,209,7 + xor r12,r8 + ror r14,6 + vpxor xmm8,xmm8,xmm9 + xor r15,r11 + add r9,r12 + ror r13,14 + and rdi,r15 +DB 143,104,120,195,216,3 + xor r14,r10 + add r9,r13 + vpxor xmm8,xmm8,xmm10 + xor rdi,r11 + ror r14,28 + vpsrlq xmm10,xmm0,6 + add rbx,r9 + add r9,rdi + vpaddq xmm1,xmm1,xmm8 + mov r13,rbx + add r14,r9 +DB 143,72,120,195,203,42 + ror r13,23 + mov r9,r14 + vpxor xmm11,xmm11,xmm10 + mov r12,rcx + ror r14,5 + xor r13,rbx + xor r12,rdx + vpxor xmm11,xmm11,xmm9 + ror r13,4 + xor r14,r9 + and r12,rbx + xor r13,rbx + vpaddq xmm1,xmm1,xmm11 + add r8,QWORD PTR[24+rsp] + mov rdi,r9 + xor r12,rdx + ror r14,6 + vpaddq xmm10,xmm1,XMMWORD PTR[((-96))+rbp] + xor rdi,r10 + add r8,r12 + ror r13,14 + and r15,rdi + xor r14,r9 + add r8,r13 + xor r15,r10 + ror r14,28 + add rax,r8 + add r8,r15 + mov r13,rax + add r14,r8 + vmovdqa XMMWORD PTR[16+rsp],xmm10 + vpalignr xmm8,xmm3,xmm2,8 + ror r13,23 + mov r8,r14 + vpalignr xmm11,xmm7,xmm6,8 + mov r12,rbx + ror r14,5 +DB 143,72,120,195,200,56 + xor r13,rax + xor r12,rcx + vpsrlq xmm8,xmm8,7 + ror r13,4 + xor r14,r8 + vpaddq xmm2,xmm2,xmm11 + and r12,rax + xor r13,rax + add rdx,QWORD PTR[32+rsp] + mov r15,r8 +DB 143,72,120,195,209,7 + xor r12,rcx + ror r14,6 + vpxor xmm8,xmm8,xmm9 + xor r15,r9 + add rdx,r12 + ror r13,14 + and rdi,r15 +DB 143,104,120,195,217,3 + xor r14,r8 + add rdx,r13 + vpxor xmm8,xmm8,xmm10 + xor rdi,r9 + ror r14,28 + vpsrlq xmm10,xmm1,6 + add r11,rdx + add rdx,rdi + vpaddq xmm2,xmm2,xmm8 + mov r13,r11 + add r14,rdx +DB 143,72,120,195,203,42 + ror r13,23 + mov rdx,r14 + vpxor xmm11,xmm11,xmm10 + mov r12,rax + ror r14,5 + xor r13,r11 + xor r12,rbx + vpxor xmm11,xmm11,xmm9 + ror r13,4 + xor r14,rdx + and r12,r11 + xor r13,r11 + vpaddq xmm2,xmm2,xmm11 + add rcx,QWORD PTR[40+rsp] + mov rdi,rdx + xor r12,rbx + ror r14,6 + vpaddq xmm10,xmm2,XMMWORD PTR[((-64))+rbp] + xor rdi,r8 + add rcx,r12 + ror r13,14 + and r15,rdi + xor r14,rdx + add rcx,r13 + xor r15,r8 + ror r14,28 + add r10,rcx + add rcx,r15 + mov r13,r10 + add r14,rcx + vmovdqa XMMWORD PTR[32+rsp],xmm10 + vpalignr xmm8,xmm4,xmm3,8 + ror r13,23 + mov rcx,r14 + vpalignr xmm11,xmm0,xmm7,8 + mov r12,r11 + ror r14,5 +DB 143,72,120,195,200,56 + xor r13,r10 + xor r12,rax + vpsrlq xmm8,xmm8,7 + ror r13,4 + xor r14,rcx + vpaddq xmm3,xmm3,xmm11 + and r12,r10 + xor r13,r10 + add rbx,QWORD PTR[48+rsp] + mov r15,rcx +DB 143,72,120,195,209,7 + xor r12,rax + ror r14,6 + vpxor xmm8,xmm8,xmm9 + xor r15,rdx + add rbx,r12 + ror r13,14 + and rdi,r15 +DB 143,104,120,195,218,3 + xor r14,rcx + add rbx,r13 + vpxor xmm8,xmm8,xmm10 + xor rdi,rdx + ror r14,28 + vpsrlq xmm10,xmm2,6 + add r9,rbx + add rbx,rdi + vpaddq xmm3,xmm3,xmm8 + mov r13,r9 + add r14,rbx +DB 143,72,120,195,203,42 + ror r13,23 + mov rbx,r14 + vpxor xmm11,xmm11,xmm10 + mov r12,r10 + ror r14,5 + xor r13,r9 + xor r12,r11 + vpxor xmm11,xmm11,xmm9 + ror r13,4 + xor r14,rbx + and r12,r9 + xor r13,r9 + vpaddq xmm3,xmm3,xmm11 + add rax,QWORD PTR[56+rsp] + mov rdi,rbx + xor r12,r11 + ror r14,6 + vpaddq xmm10,xmm3,XMMWORD PTR[((-32))+rbp] + xor rdi,rcx + add rax,r12 + ror r13,14 + and r15,rdi + xor r14,rbx + add rax,r13 + xor r15,rcx + ror r14,28 + add r8,rax + add rax,r15 + mov r13,r8 + add r14,rax + vmovdqa XMMWORD PTR[48+rsp],xmm10 + vpalignr xmm8,xmm5,xmm4,8 + ror r13,23 + mov rax,r14 + vpalignr xmm11,xmm1,xmm0,8 + mov r12,r9 + ror r14,5 +DB 143,72,120,195,200,56 + xor r13,r8 + xor r12,r10 + vpsrlq xmm8,xmm8,7 + ror r13,4 + xor r14,rax + vpaddq xmm4,xmm4,xmm11 + and r12,r8 + xor r13,r8 + add r11,QWORD PTR[64+rsp] + mov r15,rax +DB 143,72,120,195,209,7 + xor r12,r10 + ror r14,6 + vpxor xmm8,xmm8,xmm9 + xor r15,rbx + add r11,r12 + ror r13,14 + and rdi,r15 +DB 143,104,120,195,219,3 + xor r14,rax + add r11,r13 + vpxor xmm8,xmm8,xmm10 + xor rdi,rbx + ror r14,28 + vpsrlq xmm10,xmm3,6 + add rdx,r11 + add r11,rdi + vpaddq xmm4,xmm4,xmm8 + mov r13,rdx + add r14,r11 +DB 143,72,120,195,203,42 + ror r13,23 + mov r11,r14 + vpxor xmm11,xmm11,xmm10 + mov r12,r8 + ror r14,5 + xor r13,rdx + xor r12,r9 + vpxor xmm11,xmm11,xmm9 + ror r13,4 + xor r14,r11 + and r12,rdx + xor r13,rdx + vpaddq xmm4,xmm4,xmm11 + add r10,QWORD PTR[72+rsp] + mov rdi,r11 + xor r12,r9 + ror r14,6 + vpaddq xmm10,xmm4,XMMWORD PTR[rbp] + xor rdi,rax + add r10,r12 + ror r13,14 + and r15,rdi + xor r14,r11 + add r10,r13 + xor r15,rax + ror r14,28 + add rcx,r10 + add r10,r15 + mov r13,rcx + add r14,r10 + vmovdqa XMMWORD PTR[64+rsp],xmm10 + vpalignr xmm8,xmm6,xmm5,8 + ror r13,23 + mov r10,r14 + vpalignr xmm11,xmm2,xmm1,8 + mov r12,rdx + ror r14,5 +DB 143,72,120,195,200,56 + xor r13,rcx + xor r12,r8 + vpsrlq xmm8,xmm8,7 + ror r13,4 + xor r14,r10 + vpaddq xmm5,xmm5,xmm11 + and r12,rcx + xor r13,rcx + add r9,QWORD PTR[80+rsp] + mov r15,r10 +DB 143,72,120,195,209,7 + xor r12,r8 + ror r14,6 + vpxor xmm8,xmm8,xmm9 + xor r15,r11 + add r9,r12 + ror r13,14 + and rdi,r15 +DB 143,104,120,195,220,3 + xor r14,r10 + add r9,r13 + vpxor xmm8,xmm8,xmm10 + xor rdi,r11 + ror r14,28 + vpsrlq xmm10,xmm4,6 + add rbx,r9 + add r9,rdi + vpaddq xmm5,xmm5,xmm8 + mov r13,rbx + add r14,r9 +DB 143,72,120,195,203,42 + ror r13,23 + mov r9,r14 + vpxor xmm11,xmm11,xmm10 + mov r12,rcx + ror r14,5 + xor r13,rbx + xor r12,rdx + vpxor xmm11,xmm11,xmm9 + ror r13,4 + xor r14,r9 + and r12,rbx + xor r13,rbx + vpaddq xmm5,xmm5,xmm11 + add r8,QWORD PTR[88+rsp] + mov rdi,r9 + xor r12,rdx + ror r14,6 + vpaddq xmm10,xmm5,XMMWORD PTR[32+rbp] + xor rdi,r10 + add r8,r12 + ror r13,14 + and r15,rdi + xor r14,r9 + add r8,r13 + xor r15,r10 + ror r14,28 + add rax,r8 + add r8,r15 + mov r13,rax + add r14,r8 + vmovdqa XMMWORD PTR[80+rsp],xmm10 + vpalignr xmm8,xmm7,xmm6,8 + ror r13,23 + mov r8,r14 + vpalignr xmm11,xmm3,xmm2,8 + mov r12,rbx + ror r14,5 +DB 143,72,120,195,200,56 + xor r13,rax + xor r12,rcx + vpsrlq xmm8,xmm8,7 + ror r13,4 + xor r14,r8 + vpaddq xmm6,xmm6,xmm11 + and r12,rax + xor r13,rax + add rdx,QWORD PTR[96+rsp] + mov r15,r8 +DB 143,72,120,195,209,7 + xor r12,rcx + ror r14,6 + vpxor xmm8,xmm8,xmm9 + xor r15,r9 + add rdx,r12 + ror r13,14 + and rdi,r15 +DB 143,104,120,195,221,3 + xor r14,r8 + add rdx,r13 + vpxor xmm8,xmm8,xmm10 + xor rdi,r9 + ror r14,28 + vpsrlq xmm10,xmm5,6 + add r11,rdx + add rdx,rdi + vpaddq xmm6,xmm6,xmm8 + mov r13,r11 + add r14,rdx +DB 143,72,120,195,203,42 + ror r13,23 + mov rdx,r14 + vpxor xmm11,xmm11,xmm10 + mov r12,rax + ror r14,5 + xor r13,r11 + xor r12,rbx + vpxor xmm11,xmm11,xmm9 + ror r13,4 + xor r14,rdx + and r12,r11 + xor r13,r11 + vpaddq xmm6,xmm6,xmm11 + add rcx,QWORD PTR[104+rsp] + mov rdi,rdx + xor r12,rbx + ror r14,6 + vpaddq xmm10,xmm6,XMMWORD PTR[64+rbp] + xor rdi,r8 + add rcx,r12 + ror r13,14 + and r15,rdi + xor r14,rdx + add rcx,r13 + xor r15,r8 + ror r14,28 + add r10,rcx + add rcx,r15 + mov r13,r10 + add r14,rcx + vmovdqa XMMWORD PTR[96+rsp],xmm10 + vpalignr xmm8,xmm0,xmm7,8 + ror r13,23 + mov rcx,r14 + vpalignr xmm11,xmm4,xmm3,8 + mov r12,r11 + ror r14,5 +DB 143,72,120,195,200,56 + xor r13,r10 + xor r12,rax + vpsrlq xmm8,xmm8,7 + ror r13,4 + xor r14,rcx + vpaddq xmm7,xmm7,xmm11 + and r12,r10 + xor r13,r10 + add rbx,QWORD PTR[112+rsp] + mov r15,rcx +DB 143,72,120,195,209,7 + xor r12,rax + ror r14,6 + vpxor xmm8,xmm8,xmm9 + xor r15,rdx + add rbx,r12 + ror r13,14 + and rdi,r15 +DB 143,104,120,195,222,3 + xor r14,rcx + add rbx,r13 + vpxor xmm8,xmm8,xmm10 + xor rdi,rdx + ror r14,28 + vpsrlq xmm10,xmm6,6 + add r9,rbx + add rbx,rdi + vpaddq xmm7,xmm7,xmm8 + mov r13,r9 + add r14,rbx +DB 143,72,120,195,203,42 + ror r13,23 + mov rbx,r14 + vpxor xmm11,xmm11,xmm10 + mov r12,r10 + ror r14,5 + xor r13,r9 + xor r12,r11 + vpxor xmm11,xmm11,xmm9 + ror r13,4 + xor r14,rbx + and r12,r9 + xor r13,r9 + vpaddq xmm7,xmm7,xmm11 + add rax,QWORD PTR[120+rsp] + mov rdi,rbx + xor r12,r11 + ror r14,6 + vpaddq xmm10,xmm7,XMMWORD PTR[96+rbp] + xor rdi,rcx + add rax,r12 + ror r13,14 + and r15,rdi + xor r14,rbx + add rax,r13 + xor r15,rcx + ror r14,28 + add r8,rax + add rax,r15 + mov r13,r8 + add r14,rax + vmovdqa XMMWORD PTR[112+rsp],xmm10 + cmp BYTE PTR[135+rbp],0 + jne $L$xop_00_47 + ror r13,23 + mov rax,r14 + mov r12,r9 + ror r14,5 + xor r13,r8 + xor r12,r10 + ror r13,4 + xor r14,rax + and r12,r8 + xor r13,r8 + add r11,QWORD PTR[rsp] + mov r15,rax + xor r12,r10 + ror r14,6 + xor r15,rbx + add r11,r12 + ror r13,14 + and rdi,r15 + xor r14,rax + add r11,r13 + xor rdi,rbx + ror r14,28 + add rdx,r11 + add r11,rdi + mov r13,rdx + add r14,r11 + ror r13,23 + mov r11,r14 + mov r12,r8 + ror r14,5 + xor r13,rdx + xor r12,r9 + ror r13,4 + xor r14,r11 + and r12,rdx + xor r13,rdx + add r10,QWORD PTR[8+rsp] + mov rdi,r11 + xor r12,r9 + ror r14,6 + xor rdi,rax + add r10,r12 + ror r13,14 + and r15,rdi + xor r14,r11 + add r10,r13 + xor r15,rax + ror r14,28 + add rcx,r10 + add r10,r15 + mov r13,rcx + add r14,r10 + ror r13,23 + mov r10,r14 + mov r12,rdx + ror r14,5 + xor r13,rcx + xor r12,r8 + ror r13,4 + xor r14,r10 + and r12,rcx + xor r13,rcx + add r9,QWORD PTR[16+rsp] + mov r15,r10 + xor r12,r8 + ror r14,6 + xor r15,r11 + add r9,r12 + ror r13,14 + and rdi,r15 + xor r14,r10 + add r9,r13 + xor rdi,r11 + ror r14,28 + add rbx,r9 + add r9,rdi + mov r13,rbx + add r14,r9 + ror r13,23 + mov r9,r14 + mov r12,rcx + ror r14,5 + xor r13,rbx + xor r12,rdx + ror r13,4 + xor r14,r9 + and r12,rbx + xor r13,rbx + add r8,QWORD PTR[24+rsp] + mov rdi,r9 + xor r12,rdx + ror r14,6 + xor rdi,r10 + add r8,r12 + ror r13,14 + and r15,rdi + xor r14,r9 + add r8,r13 + xor r15,r10 + ror r14,28 + add rax,r8 + add r8,r15 + mov r13,rax + add r14,r8 + ror r13,23 + mov r8,r14 + mov r12,rbx + ror r14,5 + xor r13,rax + xor r12,rcx + ror r13,4 + xor r14,r8 + and r12,rax + xor r13,rax + add rdx,QWORD PTR[32+rsp] + mov r15,r8 + xor r12,rcx + ror r14,6 + xor r15,r9 + add rdx,r12 + ror r13,14 + and rdi,r15 + xor r14,r8 + add rdx,r13 + xor rdi,r9 + ror r14,28 + add r11,rdx + add rdx,rdi + mov r13,r11 + add r14,rdx + ror r13,23 + mov rdx,r14 + mov r12,rax + ror r14,5 + xor r13,r11 + xor r12,rbx + ror r13,4 + xor r14,rdx + and r12,r11 + xor r13,r11 + add rcx,QWORD PTR[40+rsp] + mov rdi,rdx + xor r12,rbx + ror r14,6 + xor rdi,r8 + add rcx,r12 + ror r13,14 + and r15,rdi + xor r14,rdx + add rcx,r13 + xor r15,r8 + ror r14,28 + add r10,rcx + add rcx,r15 + mov r13,r10 + add r14,rcx + ror r13,23 + mov rcx,r14 + mov r12,r11 + ror r14,5 + xor r13,r10 + xor r12,rax + ror r13,4 + xor r14,rcx + and r12,r10 + xor r13,r10 + add rbx,QWORD PTR[48+rsp] + mov r15,rcx + xor r12,rax + ror r14,6 + xor r15,rdx + add rbx,r12 + ror r13,14 + and rdi,r15 + xor r14,rcx + add rbx,r13 + xor rdi,rdx + ror r14,28 + add r9,rbx + add rbx,rdi + mov r13,r9 + add r14,rbx + ror r13,23 + mov rbx,r14 + mov r12,r10 + ror r14,5 + xor r13,r9 + xor r12,r11 + ror r13,4 + xor r14,rbx + and r12,r9 + xor r13,r9 + add rax,QWORD PTR[56+rsp] + mov rdi,rbx + xor r12,r11 + ror r14,6 + xor rdi,rcx + add rax,r12 + ror r13,14 + and r15,rdi + xor r14,rbx + add rax,r13 + xor r15,rcx + ror r14,28 + add r8,rax + add rax,r15 + mov r13,r8 + add r14,rax + ror r13,23 + mov rax,r14 + mov r12,r9 + ror r14,5 + xor r13,r8 + xor r12,r10 + ror r13,4 + xor r14,rax + and r12,r8 + xor r13,r8 + add r11,QWORD PTR[64+rsp] + mov r15,rax + xor r12,r10 + ror r14,6 + xor r15,rbx + add r11,r12 + ror r13,14 + and rdi,r15 + xor r14,rax + add r11,r13 + xor rdi,rbx + ror r14,28 + add rdx,r11 + add r11,rdi + mov r13,rdx + add r14,r11 + ror r13,23 + mov r11,r14 + mov r12,r8 + ror r14,5 + xor r13,rdx + xor r12,r9 + ror r13,4 + xor r14,r11 + and r12,rdx + xor r13,rdx + add r10,QWORD PTR[72+rsp] + mov rdi,r11 + xor r12,r9 + ror r14,6 + xor rdi,rax + add r10,r12 + ror r13,14 + and r15,rdi + xor r14,r11 + add r10,r13 + xor r15,rax + ror r14,28 + add rcx,r10 + add r10,r15 + mov r13,rcx + add r14,r10 + ror r13,23 + mov r10,r14 + mov r12,rdx + ror r14,5 + xor r13,rcx + xor r12,r8 + ror r13,4 + xor r14,r10 + and r12,rcx + xor r13,rcx + add r9,QWORD PTR[80+rsp] + mov r15,r10 + xor r12,r8 + ror r14,6 + xor r15,r11 + add r9,r12 + ror r13,14 + and rdi,r15 + xor r14,r10 + add r9,r13 + xor rdi,r11 + ror r14,28 + add rbx,r9 + add r9,rdi + mov r13,rbx + add r14,r9 + ror r13,23 + mov r9,r14 + mov r12,rcx + ror r14,5 + xor r13,rbx + xor r12,rdx + ror r13,4 + xor r14,r9 + and r12,rbx + xor r13,rbx + add r8,QWORD PTR[88+rsp] + mov rdi,r9 + xor r12,rdx + ror r14,6 + xor rdi,r10 + add r8,r12 + ror r13,14 + and r15,rdi + xor r14,r9 + add r8,r13 + xor r15,r10 + ror r14,28 + add rax,r8 + add r8,r15 + mov r13,rax + add r14,r8 + ror r13,23 + mov r8,r14 + mov r12,rbx + ror r14,5 + xor r13,rax + xor r12,rcx + ror r13,4 + xor r14,r8 + and r12,rax + xor r13,rax + add rdx,QWORD PTR[96+rsp] + mov r15,r8 + xor r12,rcx + ror r14,6 + xor r15,r9 + add rdx,r12 + ror r13,14 + and rdi,r15 + xor r14,r8 + add rdx,r13 + xor rdi,r9 + ror r14,28 + add r11,rdx + add rdx,rdi + mov r13,r11 + add r14,rdx + ror r13,23 + mov rdx,r14 + mov r12,rax + ror r14,5 + xor r13,r11 + xor r12,rbx + ror r13,4 + xor r14,rdx + and r12,r11 + xor r13,r11 + add rcx,QWORD PTR[104+rsp] + mov rdi,rdx + xor r12,rbx + ror r14,6 + xor rdi,r8 + add rcx,r12 + ror r13,14 + and r15,rdi + xor r14,rdx + add rcx,r13 + xor r15,r8 + ror r14,28 + add r10,rcx + add rcx,r15 + mov r13,r10 + add r14,rcx + ror r13,23 + mov rcx,r14 + mov r12,r11 + ror r14,5 + xor r13,r10 + xor r12,rax + ror r13,4 + xor r14,rcx + and r12,r10 + xor r13,r10 + add rbx,QWORD PTR[112+rsp] + mov r15,rcx + xor r12,rax + ror r14,6 + xor r15,rdx + add rbx,r12 + ror r13,14 + and rdi,r15 + xor r14,rcx + add rbx,r13 + xor rdi,rdx + ror r14,28 + add r9,rbx + add rbx,rdi + mov r13,r9 + add r14,rbx + ror r13,23 + mov rbx,r14 + mov r12,r10 + ror r14,5 + xor r13,r9 + xor r12,r11 + ror r13,4 + xor r14,rbx + and r12,r9 + xor r13,r9 + add rax,QWORD PTR[120+rsp] + mov rdi,rbx + xor r12,r11 + ror r14,6 + xor rdi,rcx + add rax,r12 + ror r13,14 + and r15,rdi + xor r14,rbx + add rax,r13 + xor r15,rcx + ror r14,28 + add r8,rax + add rax,r15 + mov r13,r8 + add r14,rax + mov rdi,QWORD PTR[((128+0))+rsp] + mov rax,r14 + + add rax,QWORD PTR[rdi] + lea rsi,QWORD PTR[128+rsi] + add rbx,QWORD PTR[8+rdi] + add rcx,QWORD PTR[16+rdi] + add rdx,QWORD PTR[24+rdi] + add r8,QWORD PTR[32+rdi] + add r9,QWORD PTR[40+rdi] + add r10,QWORD PTR[48+rdi] + add r11,QWORD PTR[56+rdi] + + cmp rsi,QWORD PTR[((128+16))+rsp] + + mov QWORD PTR[rdi],rax + mov QWORD PTR[8+rdi],rbx + mov QWORD PTR[16+rdi],rcx + mov QWORD PTR[24+rdi],rdx + mov QWORD PTR[32+rdi],r8 + mov QWORD PTR[40+rdi],r9 + mov QWORD PTR[48+rdi],r10 + mov QWORD PTR[56+rdi],r11 + jb $L$loop_xop + + mov rsi,QWORD PTR[((128+24))+rsp] + vzeroupper + movaps xmm6,XMMWORD PTR[((128+32))+rsp] + movaps xmm7,XMMWORD PTR[((128+48))+rsp] + movaps xmm8,XMMWORD PTR[((128+64))+rsp] + movaps xmm9,XMMWORD PTR[((128+80))+rsp] + movaps xmm10,XMMWORD PTR[((128+96))+rsp] + movaps xmm11,XMMWORD PTR[((128+112))+rsp] + mov r15,QWORD PTR[rsi] + mov r14,QWORD PTR[8+rsi] + mov r13,QWORD PTR[16+rsi] + mov r12,QWORD PTR[24+rsi] + mov rbp,QWORD PTR[32+rsi] + mov rbx,QWORD PTR[40+rsi] + lea rsp,QWORD PTR[48+rsi] +$L$epilogue_xop:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_sha512_block_data_order_xop:: +sha512_block_data_order_xop ENDP + +ALIGN 64 +sha512_block_data_order_avx PROC PRIVATE + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_sha512_block_data_order_avx:: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + + +$L$avx_shortcut:: + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + mov r11,rsp + shl rdx,4 + sub rsp,256 + lea rdx,QWORD PTR[rdx*8+rsi] + and rsp,-64 + mov QWORD PTR[((128+0))+rsp],rdi + mov QWORD PTR[((128+8))+rsp],rsi + mov QWORD PTR[((128+16))+rsp],rdx + mov QWORD PTR[((128+24))+rsp],r11 + movaps XMMWORD PTR[(128+32)+rsp],xmm6 + movaps XMMWORD PTR[(128+48)+rsp],xmm7 + movaps XMMWORD PTR[(128+64)+rsp],xmm8 + movaps XMMWORD PTR[(128+80)+rsp],xmm9 + movaps XMMWORD PTR[(128+96)+rsp],xmm10 + movaps XMMWORD PTR[(128+112)+rsp],xmm11 +$L$prologue_avx:: + + vzeroupper + mov rax,QWORD PTR[rdi] + mov rbx,QWORD PTR[8+rdi] + mov rcx,QWORD PTR[16+rdi] + mov rdx,QWORD PTR[24+rdi] + mov r8,QWORD PTR[32+rdi] + mov r9,QWORD PTR[40+rdi] + mov r10,QWORD PTR[48+rdi] + mov r11,QWORD PTR[56+rdi] + jmp $L$loop_avx +ALIGN 16 +$L$loop_avx:: + vmovdqa xmm11,XMMWORD PTR[((K512+1280))] + vmovdqu xmm0,XMMWORD PTR[rsi] + lea rbp,QWORD PTR[((K512+128))] + vmovdqu xmm1,XMMWORD PTR[16+rsi] + vmovdqu xmm2,XMMWORD PTR[32+rsi] + vpshufb xmm0,xmm0,xmm11 + vmovdqu xmm3,XMMWORD PTR[48+rsi] + vpshufb xmm1,xmm1,xmm11 + vmovdqu xmm4,XMMWORD PTR[64+rsi] + vpshufb xmm2,xmm2,xmm11 + vmovdqu xmm5,XMMWORD PTR[80+rsi] + vpshufb xmm3,xmm3,xmm11 + vmovdqu xmm6,XMMWORD PTR[96+rsi] + vpshufb xmm4,xmm4,xmm11 + vmovdqu xmm7,XMMWORD PTR[112+rsi] + vpshufb xmm5,xmm5,xmm11 + vpaddq xmm8,xmm0,XMMWORD PTR[((-128))+rbp] + vpshufb xmm6,xmm6,xmm11 + vpaddq xmm9,xmm1,XMMWORD PTR[((-96))+rbp] + vpshufb xmm7,xmm7,xmm11 + vpaddq xmm10,xmm2,XMMWORD PTR[((-64))+rbp] + vpaddq xmm11,xmm3,XMMWORD PTR[((-32))+rbp] + vmovdqa XMMWORD PTR[rsp],xmm8 + vpaddq xmm8,xmm4,XMMWORD PTR[rbp] + vmovdqa XMMWORD PTR[16+rsp],xmm9 + vpaddq xmm9,xmm5,XMMWORD PTR[32+rbp] + vmovdqa XMMWORD PTR[32+rsp],xmm10 + vpaddq xmm10,xmm6,XMMWORD PTR[64+rbp] + vmovdqa XMMWORD PTR[48+rsp],xmm11 + vpaddq xmm11,xmm7,XMMWORD PTR[96+rbp] + vmovdqa XMMWORD PTR[64+rsp],xmm8 + mov r14,rax + vmovdqa XMMWORD PTR[80+rsp],xmm9 + mov rdi,rbx + vmovdqa XMMWORD PTR[96+rsp],xmm10 + xor rdi,rcx + vmovdqa XMMWORD PTR[112+rsp],xmm11 + mov r13,r8 + jmp $L$avx_00_47 + +ALIGN 16 +$L$avx_00_47:: + add rbp,256 + vpalignr xmm8,xmm1,xmm0,8 + shrd r13,r13,23 + mov rax,r14 + vpalignr xmm11,xmm5,xmm4,8 + mov r12,r9 + shrd r14,r14,5 + vpsrlq xmm10,xmm8,1 + xor r13,r8 + xor r12,r10 + vpaddq xmm0,xmm0,xmm11 + shrd r13,r13,4 + xor r14,rax + vpsrlq xmm11,xmm8,7 + and r12,r8 + xor r13,r8 + vpsllq xmm9,xmm8,56 + add r11,QWORD PTR[rsp] + mov r15,rax + vpxor xmm8,xmm11,xmm10 + xor r12,r10 + shrd r14,r14,6 + vpsrlq xmm10,xmm10,7 + xor r15,rbx + add r11,r12 + vpxor xmm8,xmm8,xmm9 + shrd r13,r13,14 + and rdi,r15 + vpsllq xmm9,xmm9,7 + xor r14,rax + add r11,r13 + vpxor xmm8,xmm8,xmm10 + xor rdi,rbx + shrd r14,r14,28 + vpsrlq xmm11,xmm7,6 + add rdx,r11 + add r11,rdi + vpxor xmm8,xmm8,xmm9 + mov r13,rdx + add r14,r11 + vpsllq xmm10,xmm7,3 + shrd r13,r13,23 + mov r11,r14 + vpaddq xmm0,xmm0,xmm8 + mov r12,r8 + shrd r14,r14,5 + vpsrlq xmm9,xmm7,19 + xor r13,rdx + xor r12,r9 + vpxor xmm11,xmm11,xmm10 + shrd r13,r13,4 + xor r14,r11 + vpsllq xmm10,xmm10,42 + and r12,rdx + xor r13,rdx + vpxor xmm11,xmm11,xmm9 + add r10,QWORD PTR[8+rsp] + mov rdi,r11 + vpsrlq xmm9,xmm9,42 + xor r12,r9 + shrd r14,r14,6 + vpxor xmm11,xmm11,xmm10 + xor rdi,rax + add r10,r12 + vpxor xmm11,xmm11,xmm9 + shrd r13,r13,14 + and r15,rdi + vpaddq xmm0,xmm0,xmm11 + xor r14,r11 + add r10,r13 + vpaddq xmm10,xmm0,XMMWORD PTR[((-128))+rbp] + xor r15,rax + shrd r14,r14,28 + add rcx,r10 + add r10,r15 + mov r13,rcx + add r14,r10 + vmovdqa XMMWORD PTR[rsp],xmm10 + vpalignr xmm8,xmm2,xmm1,8 + shrd r13,r13,23 + mov r10,r14 + vpalignr xmm11,xmm6,xmm5,8 + mov r12,rdx + shrd r14,r14,5 + vpsrlq xmm10,xmm8,1 + xor r13,rcx + xor r12,r8 + vpaddq xmm1,xmm1,xmm11 + shrd r13,r13,4 + xor r14,r10 + vpsrlq xmm11,xmm8,7 + and r12,rcx + xor r13,rcx + vpsllq xmm9,xmm8,56 + add r9,QWORD PTR[16+rsp] + mov r15,r10 + vpxor xmm8,xmm11,xmm10 + xor r12,r8 + shrd r14,r14,6 + vpsrlq xmm10,xmm10,7 + xor r15,r11 + add r9,r12 + vpxor xmm8,xmm8,xmm9 + shrd r13,r13,14 + and rdi,r15 + vpsllq xmm9,xmm9,7 + xor r14,r10 + add r9,r13 + vpxor xmm8,xmm8,xmm10 + xor rdi,r11 + shrd r14,r14,28 + vpsrlq xmm11,xmm0,6 + add rbx,r9 + add r9,rdi + vpxor xmm8,xmm8,xmm9 + mov r13,rbx + add r14,r9 + vpsllq xmm10,xmm0,3 + shrd r13,r13,23 + mov r9,r14 + vpaddq xmm1,xmm1,xmm8 + mov r12,rcx + shrd r14,r14,5 + vpsrlq xmm9,xmm0,19 + xor r13,rbx + xor r12,rdx + vpxor xmm11,xmm11,xmm10 + shrd r13,r13,4 + xor r14,r9 + vpsllq xmm10,xmm10,42 + and r12,rbx + xor r13,rbx + vpxor xmm11,xmm11,xmm9 + add r8,QWORD PTR[24+rsp] + mov rdi,r9 + vpsrlq xmm9,xmm9,42 + xor r12,rdx + shrd r14,r14,6 + vpxor xmm11,xmm11,xmm10 + xor rdi,r10 + add r8,r12 + vpxor xmm11,xmm11,xmm9 + shrd r13,r13,14 + and r15,rdi + vpaddq xmm1,xmm1,xmm11 + xor r14,r9 + add r8,r13 + vpaddq xmm10,xmm1,XMMWORD PTR[((-96))+rbp] + xor r15,r10 + shrd r14,r14,28 + add rax,r8 + add r8,r15 + mov r13,rax + add r14,r8 + vmovdqa XMMWORD PTR[16+rsp],xmm10 + vpalignr xmm8,xmm3,xmm2,8 + shrd r13,r13,23 + mov r8,r14 + vpalignr xmm11,xmm7,xmm6,8 + mov r12,rbx + shrd r14,r14,5 + vpsrlq xmm10,xmm8,1 + xor r13,rax + xor r12,rcx + vpaddq xmm2,xmm2,xmm11 + shrd r13,r13,4 + xor r14,r8 + vpsrlq xmm11,xmm8,7 + and r12,rax + xor r13,rax + vpsllq xmm9,xmm8,56 + add rdx,QWORD PTR[32+rsp] + mov r15,r8 + vpxor xmm8,xmm11,xmm10 + xor r12,rcx + shrd r14,r14,6 + vpsrlq xmm10,xmm10,7 + xor r15,r9 + add rdx,r12 + vpxor xmm8,xmm8,xmm9 + shrd r13,r13,14 + and rdi,r15 + vpsllq xmm9,xmm9,7 + xor r14,r8 + add rdx,r13 + vpxor xmm8,xmm8,xmm10 + xor rdi,r9 + shrd r14,r14,28 + vpsrlq xmm11,xmm1,6 + add r11,rdx + add rdx,rdi + vpxor xmm8,xmm8,xmm9 + mov r13,r11 + add r14,rdx + vpsllq xmm10,xmm1,3 + shrd r13,r13,23 + mov rdx,r14 + vpaddq xmm2,xmm2,xmm8 + mov r12,rax + shrd r14,r14,5 + vpsrlq xmm9,xmm1,19 + xor r13,r11 + xor r12,rbx + vpxor xmm11,xmm11,xmm10 + shrd r13,r13,4 + xor r14,rdx + vpsllq xmm10,xmm10,42 + and r12,r11 + xor r13,r11 + vpxor xmm11,xmm11,xmm9 + add rcx,QWORD PTR[40+rsp] + mov rdi,rdx + vpsrlq xmm9,xmm9,42 + xor r12,rbx + shrd r14,r14,6 + vpxor xmm11,xmm11,xmm10 + xor rdi,r8 + add rcx,r12 + vpxor xmm11,xmm11,xmm9 + shrd r13,r13,14 + and r15,rdi + vpaddq xmm2,xmm2,xmm11 + xor r14,rdx + add rcx,r13 + vpaddq xmm10,xmm2,XMMWORD PTR[((-64))+rbp] + xor r15,r8 + shrd r14,r14,28 + add r10,rcx + add rcx,r15 + mov r13,r10 + add r14,rcx + vmovdqa XMMWORD PTR[32+rsp],xmm10 + vpalignr xmm8,xmm4,xmm3,8 + shrd r13,r13,23 + mov rcx,r14 + vpalignr xmm11,xmm0,xmm7,8 + mov r12,r11 + shrd r14,r14,5 + vpsrlq xmm10,xmm8,1 + xor r13,r10 + xor r12,rax + vpaddq xmm3,xmm3,xmm11 + shrd r13,r13,4 + xor r14,rcx + vpsrlq xmm11,xmm8,7 + and r12,r10 + xor r13,r10 + vpsllq xmm9,xmm8,56 + add rbx,QWORD PTR[48+rsp] + mov r15,rcx + vpxor xmm8,xmm11,xmm10 + xor r12,rax + shrd r14,r14,6 + vpsrlq xmm10,xmm10,7 + xor r15,rdx + add rbx,r12 + vpxor xmm8,xmm8,xmm9 + shrd r13,r13,14 + and rdi,r15 + vpsllq xmm9,xmm9,7 + xor r14,rcx + add rbx,r13 + vpxor xmm8,xmm8,xmm10 + xor rdi,rdx + shrd r14,r14,28 + vpsrlq xmm11,xmm2,6 + add r9,rbx + add rbx,rdi + vpxor xmm8,xmm8,xmm9 + mov r13,r9 + add r14,rbx + vpsllq xmm10,xmm2,3 + shrd r13,r13,23 + mov rbx,r14 + vpaddq xmm3,xmm3,xmm8 + mov r12,r10 + shrd r14,r14,5 + vpsrlq xmm9,xmm2,19 + xor r13,r9 + xor r12,r11 + vpxor xmm11,xmm11,xmm10 + shrd r13,r13,4 + xor r14,rbx + vpsllq xmm10,xmm10,42 + and r12,r9 + xor r13,r9 + vpxor xmm11,xmm11,xmm9 + add rax,QWORD PTR[56+rsp] + mov rdi,rbx + vpsrlq xmm9,xmm9,42 + xor r12,r11 + shrd r14,r14,6 + vpxor xmm11,xmm11,xmm10 + xor rdi,rcx + add rax,r12 + vpxor xmm11,xmm11,xmm9 + shrd r13,r13,14 + and r15,rdi + vpaddq xmm3,xmm3,xmm11 + xor r14,rbx + add rax,r13 + vpaddq xmm10,xmm3,XMMWORD PTR[((-32))+rbp] + xor r15,rcx + shrd r14,r14,28 + add r8,rax + add rax,r15 + mov r13,r8 + add r14,rax + vmovdqa XMMWORD PTR[48+rsp],xmm10 + vpalignr xmm8,xmm5,xmm4,8 + shrd r13,r13,23 + mov rax,r14 + vpalignr xmm11,xmm1,xmm0,8 + mov r12,r9 + shrd r14,r14,5 + vpsrlq xmm10,xmm8,1 + xor r13,r8 + xor r12,r10 + vpaddq xmm4,xmm4,xmm11 + shrd r13,r13,4 + xor r14,rax + vpsrlq xmm11,xmm8,7 + and r12,r8 + xor r13,r8 + vpsllq xmm9,xmm8,56 + add r11,QWORD PTR[64+rsp] + mov r15,rax + vpxor xmm8,xmm11,xmm10 + xor r12,r10 + shrd r14,r14,6 + vpsrlq xmm10,xmm10,7 + xor r15,rbx + add r11,r12 + vpxor xmm8,xmm8,xmm9 + shrd r13,r13,14 + and rdi,r15 + vpsllq xmm9,xmm9,7 + xor r14,rax + add r11,r13 + vpxor xmm8,xmm8,xmm10 + xor rdi,rbx + shrd r14,r14,28 + vpsrlq xmm11,xmm3,6 + add rdx,r11 + add r11,rdi + vpxor xmm8,xmm8,xmm9 + mov r13,rdx + add r14,r11 + vpsllq xmm10,xmm3,3 + shrd r13,r13,23 + mov r11,r14 + vpaddq xmm4,xmm4,xmm8 + mov r12,r8 + shrd r14,r14,5 + vpsrlq xmm9,xmm3,19 + xor r13,rdx + xor r12,r9 + vpxor xmm11,xmm11,xmm10 + shrd r13,r13,4 + xor r14,r11 + vpsllq xmm10,xmm10,42 + and r12,rdx + xor r13,rdx + vpxor xmm11,xmm11,xmm9 + add r10,QWORD PTR[72+rsp] + mov rdi,r11 + vpsrlq xmm9,xmm9,42 + xor r12,r9 + shrd r14,r14,6 + vpxor xmm11,xmm11,xmm10 + xor rdi,rax + add r10,r12 + vpxor xmm11,xmm11,xmm9 + shrd r13,r13,14 + and r15,rdi + vpaddq xmm4,xmm4,xmm11 + xor r14,r11 + add r10,r13 + vpaddq xmm10,xmm4,XMMWORD PTR[rbp] + xor r15,rax + shrd r14,r14,28 + add rcx,r10 + add r10,r15 + mov r13,rcx + add r14,r10 + vmovdqa XMMWORD PTR[64+rsp],xmm10 + vpalignr xmm8,xmm6,xmm5,8 + shrd r13,r13,23 + mov r10,r14 + vpalignr xmm11,xmm2,xmm1,8 + mov r12,rdx + shrd r14,r14,5 + vpsrlq xmm10,xmm8,1 + xor r13,rcx + xor r12,r8 + vpaddq xmm5,xmm5,xmm11 + shrd r13,r13,4 + xor r14,r10 + vpsrlq xmm11,xmm8,7 + and r12,rcx + xor r13,rcx + vpsllq xmm9,xmm8,56 + add r9,QWORD PTR[80+rsp] + mov r15,r10 + vpxor xmm8,xmm11,xmm10 + xor r12,r8 + shrd r14,r14,6 + vpsrlq xmm10,xmm10,7 + xor r15,r11 + add r9,r12 + vpxor xmm8,xmm8,xmm9 + shrd r13,r13,14 + and rdi,r15 + vpsllq xmm9,xmm9,7 + xor r14,r10 + add r9,r13 + vpxor xmm8,xmm8,xmm10 + xor rdi,r11 + shrd r14,r14,28 + vpsrlq xmm11,xmm4,6 + add rbx,r9 + add r9,rdi + vpxor xmm8,xmm8,xmm9 + mov r13,rbx + add r14,r9 + vpsllq xmm10,xmm4,3 + shrd r13,r13,23 + mov r9,r14 + vpaddq xmm5,xmm5,xmm8 + mov r12,rcx + shrd r14,r14,5 + vpsrlq xmm9,xmm4,19 + xor r13,rbx + xor r12,rdx + vpxor xmm11,xmm11,xmm10 + shrd r13,r13,4 + xor r14,r9 + vpsllq xmm10,xmm10,42 + and r12,rbx + xor r13,rbx + vpxor xmm11,xmm11,xmm9 + add r8,QWORD PTR[88+rsp] + mov rdi,r9 + vpsrlq xmm9,xmm9,42 + xor r12,rdx + shrd r14,r14,6 + vpxor xmm11,xmm11,xmm10 + xor rdi,r10 + add r8,r12 + vpxor xmm11,xmm11,xmm9 + shrd r13,r13,14 + and r15,rdi + vpaddq xmm5,xmm5,xmm11 + xor r14,r9 + add r8,r13 + vpaddq xmm10,xmm5,XMMWORD PTR[32+rbp] + xor r15,r10 + shrd r14,r14,28 + add rax,r8 + add r8,r15 + mov r13,rax + add r14,r8 + vmovdqa XMMWORD PTR[80+rsp],xmm10 + vpalignr xmm8,xmm7,xmm6,8 + shrd r13,r13,23 + mov r8,r14 + vpalignr xmm11,xmm3,xmm2,8 + mov r12,rbx + shrd r14,r14,5 + vpsrlq xmm10,xmm8,1 + xor r13,rax + xor r12,rcx + vpaddq xmm6,xmm6,xmm11 + shrd r13,r13,4 + xor r14,r8 + vpsrlq xmm11,xmm8,7 + and r12,rax + xor r13,rax + vpsllq xmm9,xmm8,56 + add rdx,QWORD PTR[96+rsp] + mov r15,r8 + vpxor xmm8,xmm11,xmm10 + xor r12,rcx + shrd r14,r14,6 + vpsrlq xmm10,xmm10,7 + xor r15,r9 + add rdx,r12 + vpxor xmm8,xmm8,xmm9 + shrd r13,r13,14 + and rdi,r15 + vpsllq xmm9,xmm9,7 + xor r14,r8 + add rdx,r13 + vpxor xmm8,xmm8,xmm10 + xor rdi,r9 + shrd r14,r14,28 + vpsrlq xmm11,xmm5,6 + add r11,rdx + add rdx,rdi + vpxor xmm8,xmm8,xmm9 + mov r13,r11 + add r14,rdx + vpsllq xmm10,xmm5,3 + shrd r13,r13,23 + mov rdx,r14 + vpaddq xmm6,xmm6,xmm8 + mov r12,rax + shrd r14,r14,5 + vpsrlq xmm9,xmm5,19 + xor r13,r11 + xor r12,rbx + vpxor xmm11,xmm11,xmm10 + shrd r13,r13,4 + xor r14,rdx + vpsllq xmm10,xmm10,42 + and r12,r11 + xor r13,r11 + vpxor xmm11,xmm11,xmm9 + add rcx,QWORD PTR[104+rsp] + mov rdi,rdx + vpsrlq xmm9,xmm9,42 + xor r12,rbx + shrd r14,r14,6 + vpxor xmm11,xmm11,xmm10 + xor rdi,r8 + add rcx,r12 + vpxor xmm11,xmm11,xmm9 + shrd r13,r13,14 + and r15,rdi + vpaddq xmm6,xmm6,xmm11 + xor r14,rdx + add rcx,r13 + vpaddq xmm10,xmm6,XMMWORD PTR[64+rbp] + xor r15,r8 + shrd r14,r14,28 + add r10,rcx + add rcx,r15 + mov r13,r10 + add r14,rcx + vmovdqa XMMWORD PTR[96+rsp],xmm10 + vpalignr xmm8,xmm0,xmm7,8 + shrd r13,r13,23 + mov rcx,r14 + vpalignr xmm11,xmm4,xmm3,8 + mov r12,r11 + shrd r14,r14,5 + vpsrlq xmm10,xmm8,1 + xor r13,r10 + xor r12,rax + vpaddq xmm7,xmm7,xmm11 + shrd r13,r13,4 + xor r14,rcx + vpsrlq xmm11,xmm8,7 + and r12,r10 + xor r13,r10 + vpsllq xmm9,xmm8,56 + add rbx,QWORD PTR[112+rsp] + mov r15,rcx + vpxor xmm8,xmm11,xmm10 + xor r12,rax + shrd r14,r14,6 + vpsrlq xmm10,xmm10,7 + xor r15,rdx + add rbx,r12 + vpxor xmm8,xmm8,xmm9 + shrd r13,r13,14 + and rdi,r15 + vpsllq xmm9,xmm9,7 + xor r14,rcx + add rbx,r13 + vpxor xmm8,xmm8,xmm10 + xor rdi,rdx + shrd r14,r14,28 + vpsrlq xmm11,xmm6,6 + add r9,rbx + add rbx,rdi + vpxor xmm8,xmm8,xmm9 + mov r13,r9 + add r14,rbx + vpsllq xmm10,xmm6,3 + shrd r13,r13,23 + mov rbx,r14 + vpaddq xmm7,xmm7,xmm8 + mov r12,r10 + shrd r14,r14,5 + vpsrlq xmm9,xmm6,19 + xor r13,r9 + xor r12,r11 + vpxor xmm11,xmm11,xmm10 + shrd r13,r13,4 + xor r14,rbx + vpsllq xmm10,xmm10,42 + and r12,r9 + xor r13,r9 + vpxor xmm11,xmm11,xmm9 + add rax,QWORD PTR[120+rsp] + mov rdi,rbx + vpsrlq xmm9,xmm9,42 + xor r12,r11 + shrd r14,r14,6 + vpxor xmm11,xmm11,xmm10 + xor rdi,rcx + add rax,r12 + vpxor xmm11,xmm11,xmm9 + shrd r13,r13,14 + and r15,rdi + vpaddq xmm7,xmm7,xmm11 + xor r14,rbx + add rax,r13 + vpaddq xmm10,xmm7,XMMWORD PTR[96+rbp] + xor r15,rcx + shrd r14,r14,28 + add r8,rax + add rax,r15 + mov r13,r8 + add r14,rax + vmovdqa XMMWORD PTR[112+rsp],xmm10 + cmp BYTE PTR[135+rbp],0 + jne $L$avx_00_47 + shrd r13,r13,23 + mov rax,r14 + mov r12,r9 + shrd r14,r14,5 + xor r13,r8 + xor r12,r10 + shrd r13,r13,4 + xor r14,rax + and r12,r8 + xor r13,r8 + add r11,QWORD PTR[rsp] + mov r15,rax + xor r12,r10 + shrd r14,r14,6 + xor r15,rbx + add r11,r12 + shrd r13,r13,14 + and rdi,r15 + xor r14,rax + add r11,r13 + xor rdi,rbx + shrd r14,r14,28 + add rdx,r11 + add r11,rdi + mov r13,rdx + add r14,r11 + shrd r13,r13,23 + mov r11,r14 + mov r12,r8 + shrd r14,r14,5 + xor r13,rdx + xor r12,r9 + shrd r13,r13,4 + xor r14,r11 + and r12,rdx + xor r13,rdx + add r10,QWORD PTR[8+rsp] + mov rdi,r11 + xor r12,r9 + shrd r14,r14,6 + xor rdi,rax + add r10,r12 + shrd r13,r13,14 + and r15,rdi + xor r14,r11 + add r10,r13 + xor r15,rax + shrd r14,r14,28 + add rcx,r10 + add r10,r15 + mov r13,rcx + add r14,r10 + shrd r13,r13,23 + mov r10,r14 + mov r12,rdx + shrd r14,r14,5 + xor r13,rcx + xor r12,r8 + shrd r13,r13,4 + xor r14,r10 + and r12,rcx + xor r13,rcx + add r9,QWORD PTR[16+rsp] + mov r15,r10 + xor r12,r8 + shrd r14,r14,6 + xor r15,r11 + add r9,r12 + shrd r13,r13,14 + and rdi,r15 + xor r14,r10 + add r9,r13 + xor rdi,r11 + shrd r14,r14,28 + add rbx,r9 + add r9,rdi + mov r13,rbx + add r14,r9 + shrd r13,r13,23 + mov r9,r14 + mov r12,rcx + shrd r14,r14,5 + xor r13,rbx + xor r12,rdx + shrd r13,r13,4 + xor r14,r9 + and r12,rbx + xor r13,rbx + add r8,QWORD PTR[24+rsp] + mov rdi,r9 + xor r12,rdx + shrd r14,r14,6 + xor rdi,r10 + add r8,r12 + shrd r13,r13,14 + and r15,rdi + xor r14,r9 + add r8,r13 + xor r15,r10 + shrd r14,r14,28 + add rax,r8 + add r8,r15 + mov r13,rax + add r14,r8 + shrd r13,r13,23 + mov r8,r14 + mov r12,rbx + shrd r14,r14,5 + xor r13,rax + xor r12,rcx + shrd r13,r13,4 + xor r14,r8 + and r12,rax + xor r13,rax + add rdx,QWORD PTR[32+rsp] + mov r15,r8 + xor r12,rcx + shrd r14,r14,6 + xor r15,r9 + add rdx,r12 + shrd r13,r13,14 + and rdi,r15 + xor r14,r8 + add rdx,r13 + xor rdi,r9 + shrd r14,r14,28 + add r11,rdx + add rdx,rdi + mov r13,r11 + add r14,rdx + shrd r13,r13,23 + mov rdx,r14 + mov r12,rax + shrd r14,r14,5 + xor r13,r11 + xor r12,rbx + shrd r13,r13,4 + xor r14,rdx + and r12,r11 + xor r13,r11 + add rcx,QWORD PTR[40+rsp] + mov rdi,rdx + xor r12,rbx + shrd r14,r14,6 + xor rdi,r8 + add rcx,r12 + shrd r13,r13,14 + and r15,rdi + xor r14,rdx + add rcx,r13 + xor r15,r8 + shrd r14,r14,28 + add r10,rcx + add rcx,r15 + mov r13,r10 + add r14,rcx + shrd r13,r13,23 + mov rcx,r14 + mov r12,r11 + shrd r14,r14,5 + xor r13,r10 + xor r12,rax + shrd r13,r13,4 + xor r14,rcx + and r12,r10 + xor r13,r10 + add rbx,QWORD PTR[48+rsp] + mov r15,rcx + xor r12,rax + shrd r14,r14,6 + xor r15,rdx + add rbx,r12 + shrd r13,r13,14 + and rdi,r15 + xor r14,rcx + add rbx,r13 + xor rdi,rdx + shrd r14,r14,28 + add r9,rbx + add rbx,rdi + mov r13,r9 + add r14,rbx + shrd r13,r13,23 + mov rbx,r14 + mov r12,r10 + shrd r14,r14,5 + xor r13,r9 + xor r12,r11 + shrd r13,r13,4 + xor r14,rbx + and r12,r9 + xor r13,r9 + add rax,QWORD PTR[56+rsp] + mov rdi,rbx + xor r12,r11 + shrd r14,r14,6 + xor rdi,rcx + add rax,r12 + shrd r13,r13,14 + and r15,rdi + xor r14,rbx + add rax,r13 + xor r15,rcx + shrd r14,r14,28 + add r8,rax + add rax,r15 + mov r13,r8 + add r14,rax + shrd r13,r13,23 + mov rax,r14 + mov r12,r9 + shrd r14,r14,5 + xor r13,r8 + xor r12,r10 + shrd r13,r13,4 + xor r14,rax + and r12,r8 + xor r13,r8 + add r11,QWORD PTR[64+rsp] + mov r15,rax + xor r12,r10 + shrd r14,r14,6 + xor r15,rbx + add r11,r12 + shrd r13,r13,14 + and rdi,r15 + xor r14,rax + add r11,r13 + xor rdi,rbx + shrd r14,r14,28 + add rdx,r11 + add r11,rdi + mov r13,rdx + add r14,r11 + shrd r13,r13,23 + mov r11,r14 + mov r12,r8 + shrd r14,r14,5 + xor r13,rdx + xor r12,r9 + shrd r13,r13,4 + xor r14,r11 + and r12,rdx + xor r13,rdx + add r10,QWORD PTR[72+rsp] + mov rdi,r11 + xor r12,r9 + shrd r14,r14,6 + xor rdi,rax + add r10,r12 + shrd r13,r13,14 + and r15,rdi + xor r14,r11 + add r10,r13 + xor r15,rax + shrd r14,r14,28 + add rcx,r10 + add r10,r15 + mov r13,rcx + add r14,r10 + shrd r13,r13,23 + mov r10,r14 + mov r12,rdx + shrd r14,r14,5 + xor r13,rcx + xor r12,r8 + shrd r13,r13,4 + xor r14,r10 + and r12,rcx + xor r13,rcx + add r9,QWORD PTR[80+rsp] + mov r15,r10 + xor r12,r8 + shrd r14,r14,6 + xor r15,r11 + add r9,r12 + shrd r13,r13,14 + and rdi,r15 + xor r14,r10 + add r9,r13 + xor rdi,r11 + shrd r14,r14,28 + add rbx,r9 + add r9,rdi + mov r13,rbx + add r14,r9 + shrd r13,r13,23 + mov r9,r14 + mov r12,rcx + shrd r14,r14,5 + xor r13,rbx + xor r12,rdx + shrd r13,r13,4 + xor r14,r9 + and r12,rbx + xor r13,rbx + add r8,QWORD PTR[88+rsp] + mov rdi,r9 + xor r12,rdx + shrd r14,r14,6 + xor rdi,r10 + add r8,r12 + shrd r13,r13,14 + and r15,rdi + xor r14,r9 + add r8,r13 + xor r15,r10 + shrd r14,r14,28 + add rax,r8 + add r8,r15 + mov r13,rax + add r14,r8 + shrd r13,r13,23 + mov r8,r14 + mov r12,rbx + shrd r14,r14,5 + xor r13,rax + xor r12,rcx + shrd r13,r13,4 + xor r14,r8 + and r12,rax + xor r13,rax + add rdx,QWORD PTR[96+rsp] + mov r15,r8 + xor r12,rcx + shrd r14,r14,6 + xor r15,r9 + add rdx,r12 + shrd r13,r13,14 + and rdi,r15 + xor r14,r8 + add rdx,r13 + xor rdi,r9 + shrd r14,r14,28 + add r11,rdx + add rdx,rdi + mov r13,r11 + add r14,rdx + shrd r13,r13,23 + mov rdx,r14 + mov r12,rax + shrd r14,r14,5 + xor r13,r11 + xor r12,rbx + shrd r13,r13,4 + xor r14,rdx + and r12,r11 + xor r13,r11 + add rcx,QWORD PTR[104+rsp] + mov rdi,rdx + xor r12,rbx + shrd r14,r14,6 + xor rdi,r8 + add rcx,r12 + shrd r13,r13,14 + and r15,rdi + xor r14,rdx + add rcx,r13 + xor r15,r8 + shrd r14,r14,28 + add r10,rcx + add rcx,r15 + mov r13,r10 + add r14,rcx + shrd r13,r13,23 + mov rcx,r14 + mov r12,r11 + shrd r14,r14,5 + xor r13,r10 + xor r12,rax + shrd r13,r13,4 + xor r14,rcx + and r12,r10 + xor r13,r10 + add rbx,QWORD PTR[112+rsp] + mov r15,rcx + xor r12,rax + shrd r14,r14,6 + xor r15,rdx + add rbx,r12 + shrd r13,r13,14 + and rdi,r15 + xor r14,rcx + add rbx,r13 + xor rdi,rdx + shrd r14,r14,28 + add r9,rbx + add rbx,rdi + mov r13,r9 + add r14,rbx + shrd r13,r13,23 + mov rbx,r14 + mov r12,r10 + shrd r14,r14,5 + xor r13,r9 + xor r12,r11 + shrd r13,r13,4 + xor r14,rbx + and r12,r9 + xor r13,r9 + add rax,QWORD PTR[120+rsp] + mov rdi,rbx + xor r12,r11 + shrd r14,r14,6 + xor rdi,rcx + add rax,r12 + shrd r13,r13,14 + and r15,rdi + xor r14,rbx + add rax,r13 + xor r15,rcx + shrd r14,r14,28 + add r8,rax + add rax,r15 + mov r13,r8 + add r14,rax + mov rdi,QWORD PTR[((128+0))+rsp] + mov rax,r14 + + add rax,QWORD PTR[rdi] + lea rsi,QWORD PTR[128+rsi] + add rbx,QWORD PTR[8+rdi] + add rcx,QWORD PTR[16+rdi] + add rdx,QWORD PTR[24+rdi] + add r8,QWORD PTR[32+rdi] + add r9,QWORD PTR[40+rdi] + add r10,QWORD PTR[48+rdi] + add r11,QWORD PTR[56+rdi] + + cmp rsi,QWORD PTR[((128+16))+rsp] + + mov QWORD PTR[rdi],rax + mov QWORD PTR[8+rdi],rbx + mov QWORD PTR[16+rdi],rcx + mov QWORD PTR[24+rdi],rdx + mov QWORD PTR[32+rdi],r8 + mov QWORD PTR[40+rdi],r9 + mov QWORD PTR[48+rdi],r10 + mov QWORD PTR[56+rdi],r11 + jb $L$loop_avx + + mov rsi,QWORD PTR[((128+24))+rsp] + vzeroupper + movaps xmm6,XMMWORD PTR[((128+32))+rsp] + movaps xmm7,XMMWORD PTR[((128+48))+rsp] + movaps xmm8,XMMWORD PTR[((128+64))+rsp] + movaps xmm9,XMMWORD PTR[((128+80))+rsp] + movaps xmm10,XMMWORD PTR[((128+96))+rsp] + movaps xmm11,XMMWORD PTR[((128+112))+rsp] + mov r15,QWORD PTR[rsi] + mov r14,QWORD PTR[8+rsi] + mov r13,QWORD PTR[16+rsi] + mov r12,QWORD PTR[24+rsi] + mov rbp,QWORD PTR[32+rsi] + mov rbx,QWORD PTR[40+rsi] + lea rsp,QWORD PTR[48+rsi] +$L$epilogue_avx:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_sha512_block_data_order_avx:: +sha512_block_data_order_avx ENDP + +ALIGN 64 +sha512_block_data_order_avx2 PROC PRIVATE + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_sha512_block_data_order_avx2:: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + + +$L$avx2_shortcut:: + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + mov r11,rsp + sub rsp,1408 + shl rdx,4 + and rsp,-256*8 + lea rdx,QWORD PTR[rdx*8+rsi] + add rsp,1152 + mov QWORD PTR[((128+0))+rsp],rdi + mov QWORD PTR[((128+8))+rsp],rsi + mov QWORD PTR[((128+16))+rsp],rdx + mov QWORD PTR[((128+24))+rsp],r11 + movaps XMMWORD PTR[(128+32)+rsp],xmm6 + movaps XMMWORD PTR[(128+48)+rsp],xmm7 + movaps XMMWORD PTR[(128+64)+rsp],xmm8 + movaps XMMWORD PTR[(128+80)+rsp],xmm9 + movaps XMMWORD PTR[(128+96)+rsp],xmm10 + movaps XMMWORD PTR[(128+112)+rsp],xmm11 +$L$prologue_avx2:: + + vzeroupper + sub rsi,-16*8 + mov rax,QWORD PTR[rdi] + mov r12,rsi + mov rbx,QWORD PTR[8+rdi] + cmp rsi,rdx + mov rcx,QWORD PTR[16+rdi] + cmove r12,rsp + mov rdx,QWORD PTR[24+rdi] + mov r8,QWORD PTR[32+rdi] + mov r9,QWORD PTR[40+rdi] + mov r10,QWORD PTR[48+rdi] + mov r11,QWORD PTR[56+rdi] + jmp $L$oop_avx2 +ALIGN 16 +$L$oop_avx2:: + vmovdqu xmm0,XMMWORD PTR[((-128))+rsi] + vmovdqu xmm1,XMMWORD PTR[((-128+16))+rsi] + vmovdqu xmm2,XMMWORD PTR[((-128+32))+rsi] + lea rbp,QWORD PTR[((K512+128))] + vmovdqu xmm3,XMMWORD PTR[((-128+48))+rsi] + vmovdqu xmm4,XMMWORD PTR[((-128+64))+rsi] + vmovdqu xmm5,XMMWORD PTR[((-128+80))+rsi] + vmovdqu xmm6,XMMWORD PTR[((-128+96))+rsi] + vmovdqu xmm7,XMMWORD PTR[((-128+112))+rsi] + + vmovdqa ymm10,YMMWORD PTR[1152+rbp] + vinserti128 ymm0,ymm0,XMMWORD PTR[r12],1 + vinserti128 ymm1,ymm1,XMMWORD PTR[16+r12],1 + vpshufb ymm0,ymm0,ymm10 + vinserti128 ymm2,ymm2,XMMWORD PTR[32+r12],1 + vpshufb ymm1,ymm1,ymm10 + vinserti128 ymm3,ymm3,XMMWORD PTR[48+r12],1 + vpshufb ymm2,ymm2,ymm10 + vinserti128 ymm4,ymm4,XMMWORD PTR[64+r12],1 + vpshufb ymm3,ymm3,ymm10 + vinserti128 ymm5,ymm5,XMMWORD PTR[80+r12],1 + vpshufb ymm4,ymm4,ymm10 + vinserti128 ymm6,ymm6,XMMWORD PTR[96+r12],1 + vpshufb ymm5,ymm5,ymm10 + vinserti128 ymm7,ymm7,XMMWORD PTR[112+r12],1 + + vpaddq ymm8,ymm0,YMMWORD PTR[((-128))+rbp] + vpshufb ymm6,ymm6,ymm10 + vpaddq ymm9,ymm1,YMMWORD PTR[((-96))+rbp] + vpshufb ymm7,ymm7,ymm10 + vpaddq ymm10,ymm2,YMMWORD PTR[((-64))+rbp] + vpaddq ymm11,ymm3,YMMWORD PTR[((-32))+rbp] + vmovdqa YMMWORD PTR[rsp],ymm8 + vpaddq ymm8,ymm4,YMMWORD PTR[rbp] + vmovdqa YMMWORD PTR[32+rsp],ymm9 + vpaddq ymm9,ymm5,YMMWORD PTR[32+rbp] + vmovdqa YMMWORD PTR[64+rsp],ymm10 + vpaddq ymm10,ymm6,YMMWORD PTR[64+rbp] + vmovdqa YMMWORD PTR[96+rsp],ymm11 + lea rsp,QWORD PTR[((-128))+rsp] + vpaddq ymm11,ymm7,YMMWORD PTR[96+rbp] + vmovdqa YMMWORD PTR[rsp],ymm8 + xor r14,r14 + vmovdqa YMMWORD PTR[32+rsp],ymm9 + mov rdi,rbx + vmovdqa YMMWORD PTR[64+rsp],ymm10 + xor rdi,rcx + vmovdqa YMMWORD PTR[96+rsp],ymm11 + mov r12,r9 + add rbp,16*2*8 + jmp $L$avx2_00_47 + +ALIGN 16 +$L$avx2_00_47:: + lea rsp,QWORD PTR[((-128))+rsp] + vpalignr ymm8,ymm1,ymm0,8 + add r11,QWORD PTR[((0+256))+rsp] + and r12,r8 + rorx r13,r8,41 + vpalignr ymm11,ymm5,ymm4,8 + rorx r15,r8,18 + lea rax,QWORD PTR[r14*1+rax] + lea r11,QWORD PTR[r12*1+r11] + vpsrlq ymm10,ymm8,1 + andn r12,r8,r10 + xor r13,r15 + rorx r14,r8,14 + vpaddq ymm0,ymm0,ymm11 + vpsrlq ymm11,ymm8,7 + lea r11,QWORD PTR[r12*1+r11] + xor r13,r14 + mov r15,rax + vpsllq ymm9,ymm8,56 + vpxor ymm8,ymm11,ymm10 + rorx r12,rax,39 + lea r11,QWORD PTR[r13*1+r11] + xor r15,rbx + vpsrlq ymm10,ymm10,7 + vpxor ymm8,ymm8,ymm9 + rorx r14,rax,34 + rorx r13,rax,28 + lea rdx,QWORD PTR[r11*1+rdx] + vpsllq ymm9,ymm9,7 + vpxor ymm8,ymm8,ymm10 + and rdi,r15 + xor r14,r12 + xor rdi,rbx + vpsrlq ymm11,ymm7,6 + vpxor ymm8,ymm8,ymm9 + xor r14,r13 + lea r11,QWORD PTR[rdi*1+r11] + mov r12,r8 + vpsllq ymm10,ymm7,3 + vpaddq ymm0,ymm0,ymm8 + add r10,QWORD PTR[((8+256))+rsp] + and r12,rdx + rorx r13,rdx,41 + vpsrlq ymm9,ymm7,19 + vpxor ymm11,ymm11,ymm10 + rorx rdi,rdx,18 + lea r11,QWORD PTR[r14*1+r11] + lea r10,QWORD PTR[r12*1+r10] + vpsllq ymm10,ymm10,42 + vpxor ymm11,ymm11,ymm9 + andn r12,rdx,r9 + xor r13,rdi + rorx r14,rdx,14 + vpsrlq ymm9,ymm9,42 + vpxor ymm11,ymm11,ymm10 + lea r10,QWORD PTR[r12*1+r10] + xor r13,r14 + mov rdi,r11 + vpxor ymm11,ymm11,ymm9 + rorx r12,r11,39 + lea r10,QWORD PTR[r13*1+r10] + xor rdi,rax + vpaddq ymm0,ymm0,ymm11 + rorx r14,r11,34 + rorx r13,r11,28 + lea rcx,QWORD PTR[r10*1+rcx] + vpaddq ymm10,ymm0,YMMWORD PTR[((-128))+rbp] + and r15,rdi + xor r14,r12 + xor r15,rax + xor r14,r13 + lea r10,QWORD PTR[r15*1+r10] + mov r12,rdx + vmovdqa YMMWORD PTR[rsp],ymm10 + vpalignr ymm8,ymm2,ymm1,8 + add r9,QWORD PTR[((32+256))+rsp] + and r12,rcx + rorx r13,rcx,41 + vpalignr ymm11,ymm6,ymm5,8 + rorx r15,rcx,18 + lea r10,QWORD PTR[r14*1+r10] + lea r9,QWORD PTR[r12*1+r9] + vpsrlq ymm10,ymm8,1 + andn r12,rcx,r8 + xor r13,r15 + rorx r14,rcx,14 + vpaddq ymm1,ymm1,ymm11 + vpsrlq ymm11,ymm8,7 + lea r9,QWORD PTR[r12*1+r9] + xor r13,r14 + mov r15,r10 + vpsllq ymm9,ymm8,56 + vpxor ymm8,ymm11,ymm10 + rorx r12,r10,39 + lea r9,QWORD PTR[r13*1+r9] + xor r15,r11 + vpsrlq ymm10,ymm10,7 + vpxor ymm8,ymm8,ymm9 + rorx r14,r10,34 + rorx r13,r10,28 + lea rbx,QWORD PTR[r9*1+rbx] + vpsllq ymm9,ymm9,7 + vpxor ymm8,ymm8,ymm10 + and rdi,r15 + xor r14,r12 + xor rdi,r11 + vpsrlq ymm11,ymm0,6 + vpxor ymm8,ymm8,ymm9 + xor r14,r13 + lea r9,QWORD PTR[rdi*1+r9] + mov r12,rcx + vpsllq ymm10,ymm0,3 + vpaddq ymm1,ymm1,ymm8 + add r8,QWORD PTR[((40+256))+rsp] + and r12,rbx + rorx r13,rbx,41 + vpsrlq ymm9,ymm0,19 + vpxor ymm11,ymm11,ymm10 + rorx rdi,rbx,18 + lea r9,QWORD PTR[r14*1+r9] + lea r8,QWORD PTR[r12*1+r8] + vpsllq ymm10,ymm10,42 + vpxor ymm11,ymm11,ymm9 + andn r12,rbx,rdx + xor r13,rdi + rorx r14,rbx,14 + vpsrlq ymm9,ymm9,42 + vpxor ymm11,ymm11,ymm10 + lea r8,QWORD PTR[r12*1+r8] + xor r13,r14 + mov rdi,r9 + vpxor ymm11,ymm11,ymm9 + rorx r12,r9,39 + lea r8,QWORD PTR[r13*1+r8] + xor rdi,r10 + vpaddq ymm1,ymm1,ymm11 + rorx r14,r9,34 + rorx r13,r9,28 + lea rax,QWORD PTR[r8*1+rax] + vpaddq ymm10,ymm1,YMMWORD PTR[((-96))+rbp] + and r15,rdi + xor r14,r12 + xor r15,r10 + xor r14,r13 + lea r8,QWORD PTR[r15*1+r8] + mov r12,rbx + vmovdqa YMMWORD PTR[32+rsp],ymm10 + vpalignr ymm8,ymm3,ymm2,8 + add rdx,QWORD PTR[((64+256))+rsp] + and r12,rax + rorx r13,rax,41 + vpalignr ymm11,ymm7,ymm6,8 + rorx r15,rax,18 + lea r8,QWORD PTR[r14*1+r8] + lea rdx,QWORD PTR[r12*1+rdx] + vpsrlq ymm10,ymm8,1 + andn r12,rax,rcx + xor r13,r15 + rorx r14,rax,14 + vpaddq ymm2,ymm2,ymm11 + vpsrlq ymm11,ymm8,7 + lea rdx,QWORD PTR[r12*1+rdx] + xor r13,r14 + mov r15,r8 + vpsllq ymm9,ymm8,56 + vpxor ymm8,ymm11,ymm10 + rorx r12,r8,39 + lea rdx,QWORD PTR[r13*1+rdx] + xor r15,r9 + vpsrlq ymm10,ymm10,7 + vpxor ymm8,ymm8,ymm9 + rorx r14,r8,34 + rorx r13,r8,28 + lea r11,QWORD PTR[rdx*1+r11] + vpsllq ymm9,ymm9,7 + vpxor ymm8,ymm8,ymm10 + and rdi,r15 + xor r14,r12 + xor rdi,r9 + vpsrlq ymm11,ymm1,6 + vpxor ymm8,ymm8,ymm9 + xor r14,r13 + lea rdx,QWORD PTR[rdi*1+rdx] + mov r12,rax + vpsllq ymm10,ymm1,3 + vpaddq ymm2,ymm2,ymm8 + add rcx,QWORD PTR[((72+256))+rsp] + and r12,r11 + rorx r13,r11,41 + vpsrlq ymm9,ymm1,19 + vpxor ymm11,ymm11,ymm10 + rorx rdi,r11,18 + lea rdx,QWORD PTR[r14*1+rdx] + lea rcx,QWORD PTR[r12*1+rcx] + vpsllq ymm10,ymm10,42 + vpxor ymm11,ymm11,ymm9 + andn r12,r11,rbx + xor r13,rdi + rorx r14,r11,14 + vpsrlq ymm9,ymm9,42 + vpxor ymm11,ymm11,ymm10 + lea rcx,QWORD PTR[r12*1+rcx] + xor r13,r14 + mov rdi,rdx + vpxor ymm11,ymm11,ymm9 + rorx r12,rdx,39 + lea rcx,QWORD PTR[r13*1+rcx] + xor rdi,r8 + vpaddq ymm2,ymm2,ymm11 + rorx r14,rdx,34 + rorx r13,rdx,28 + lea r10,QWORD PTR[rcx*1+r10] + vpaddq ymm10,ymm2,YMMWORD PTR[((-64))+rbp] + and r15,rdi + xor r14,r12 + xor r15,r8 + xor r14,r13 + lea rcx,QWORD PTR[r15*1+rcx] + mov r12,r11 + vmovdqa YMMWORD PTR[64+rsp],ymm10 + vpalignr ymm8,ymm4,ymm3,8 + add rbx,QWORD PTR[((96+256))+rsp] + and r12,r10 + rorx r13,r10,41 + vpalignr ymm11,ymm0,ymm7,8 + rorx r15,r10,18 + lea rcx,QWORD PTR[r14*1+rcx] + lea rbx,QWORD PTR[r12*1+rbx] + vpsrlq ymm10,ymm8,1 + andn r12,r10,rax + xor r13,r15 + rorx r14,r10,14 + vpaddq ymm3,ymm3,ymm11 + vpsrlq ymm11,ymm8,7 + lea rbx,QWORD PTR[r12*1+rbx] + xor r13,r14 + mov r15,rcx + vpsllq ymm9,ymm8,56 + vpxor ymm8,ymm11,ymm10 + rorx r12,rcx,39 + lea rbx,QWORD PTR[r13*1+rbx] + xor r15,rdx + vpsrlq ymm10,ymm10,7 + vpxor ymm8,ymm8,ymm9 + rorx r14,rcx,34 + rorx r13,rcx,28 + lea r9,QWORD PTR[rbx*1+r9] + vpsllq ymm9,ymm9,7 + vpxor ymm8,ymm8,ymm10 + and rdi,r15 + xor r14,r12 + xor rdi,rdx + vpsrlq ymm11,ymm2,6 + vpxor ymm8,ymm8,ymm9 + xor r14,r13 + lea rbx,QWORD PTR[rdi*1+rbx] + mov r12,r10 + vpsllq ymm10,ymm2,3 + vpaddq ymm3,ymm3,ymm8 + add rax,QWORD PTR[((104+256))+rsp] + and r12,r9 + rorx r13,r9,41 + vpsrlq ymm9,ymm2,19 + vpxor ymm11,ymm11,ymm10 + rorx rdi,r9,18 + lea rbx,QWORD PTR[r14*1+rbx] + lea rax,QWORD PTR[r12*1+rax] + vpsllq ymm10,ymm10,42 + vpxor ymm11,ymm11,ymm9 + andn r12,r9,r11 + xor r13,rdi + rorx r14,r9,14 + vpsrlq ymm9,ymm9,42 + vpxor ymm11,ymm11,ymm10 + lea rax,QWORD PTR[r12*1+rax] + xor r13,r14 + mov rdi,rbx + vpxor ymm11,ymm11,ymm9 + rorx r12,rbx,39 + lea rax,QWORD PTR[r13*1+rax] + xor rdi,rcx + vpaddq ymm3,ymm3,ymm11 + rorx r14,rbx,34 + rorx r13,rbx,28 + lea r8,QWORD PTR[rax*1+r8] + vpaddq ymm10,ymm3,YMMWORD PTR[((-32))+rbp] + and r15,rdi + xor r14,r12 + xor r15,rcx + xor r14,r13 + lea rax,QWORD PTR[r15*1+rax] + mov r12,r9 + vmovdqa YMMWORD PTR[96+rsp],ymm10 + lea rsp,QWORD PTR[((-128))+rsp] + vpalignr ymm8,ymm5,ymm4,8 + add r11,QWORD PTR[((0+256))+rsp] + and r12,r8 + rorx r13,r8,41 + vpalignr ymm11,ymm1,ymm0,8 + rorx r15,r8,18 + lea rax,QWORD PTR[r14*1+rax] + lea r11,QWORD PTR[r12*1+r11] + vpsrlq ymm10,ymm8,1 + andn r12,r8,r10 + xor r13,r15 + rorx r14,r8,14 + vpaddq ymm4,ymm4,ymm11 + vpsrlq ymm11,ymm8,7 + lea r11,QWORD PTR[r12*1+r11] + xor r13,r14 + mov r15,rax + vpsllq ymm9,ymm8,56 + vpxor ymm8,ymm11,ymm10 + rorx r12,rax,39 + lea r11,QWORD PTR[r13*1+r11] + xor r15,rbx + vpsrlq ymm10,ymm10,7 + vpxor ymm8,ymm8,ymm9 + rorx r14,rax,34 + rorx r13,rax,28 + lea rdx,QWORD PTR[r11*1+rdx] + vpsllq ymm9,ymm9,7 + vpxor ymm8,ymm8,ymm10 + and rdi,r15 + xor r14,r12 + xor rdi,rbx + vpsrlq ymm11,ymm3,6 + vpxor ymm8,ymm8,ymm9 + xor r14,r13 + lea r11,QWORD PTR[rdi*1+r11] + mov r12,r8 + vpsllq ymm10,ymm3,3 + vpaddq ymm4,ymm4,ymm8 + add r10,QWORD PTR[((8+256))+rsp] + and r12,rdx + rorx r13,rdx,41 + vpsrlq ymm9,ymm3,19 + vpxor ymm11,ymm11,ymm10 + rorx rdi,rdx,18 + lea r11,QWORD PTR[r14*1+r11] + lea r10,QWORD PTR[r12*1+r10] + vpsllq ymm10,ymm10,42 + vpxor ymm11,ymm11,ymm9 + andn r12,rdx,r9 + xor r13,rdi + rorx r14,rdx,14 + vpsrlq ymm9,ymm9,42 + vpxor ymm11,ymm11,ymm10 + lea r10,QWORD PTR[r12*1+r10] + xor r13,r14 + mov rdi,r11 + vpxor ymm11,ymm11,ymm9 + rorx r12,r11,39 + lea r10,QWORD PTR[r13*1+r10] + xor rdi,rax + vpaddq ymm4,ymm4,ymm11 + rorx r14,r11,34 + rorx r13,r11,28 + lea rcx,QWORD PTR[r10*1+rcx] + vpaddq ymm10,ymm4,YMMWORD PTR[rbp] + and r15,rdi + xor r14,r12 + xor r15,rax + xor r14,r13 + lea r10,QWORD PTR[r15*1+r10] + mov r12,rdx + vmovdqa YMMWORD PTR[rsp],ymm10 + vpalignr ymm8,ymm6,ymm5,8 + add r9,QWORD PTR[((32+256))+rsp] + and r12,rcx + rorx r13,rcx,41 + vpalignr ymm11,ymm2,ymm1,8 + rorx r15,rcx,18 + lea r10,QWORD PTR[r14*1+r10] + lea r9,QWORD PTR[r12*1+r9] + vpsrlq ymm10,ymm8,1 + andn r12,rcx,r8 + xor r13,r15 + rorx r14,rcx,14 + vpaddq ymm5,ymm5,ymm11 + vpsrlq ymm11,ymm8,7 + lea r9,QWORD PTR[r12*1+r9] + xor r13,r14 + mov r15,r10 + vpsllq ymm9,ymm8,56 + vpxor ymm8,ymm11,ymm10 + rorx r12,r10,39 + lea r9,QWORD PTR[r13*1+r9] + xor r15,r11 + vpsrlq ymm10,ymm10,7 + vpxor ymm8,ymm8,ymm9 + rorx r14,r10,34 + rorx r13,r10,28 + lea rbx,QWORD PTR[r9*1+rbx] + vpsllq ymm9,ymm9,7 + vpxor ymm8,ymm8,ymm10 + and rdi,r15 + xor r14,r12 + xor rdi,r11 + vpsrlq ymm11,ymm4,6 + vpxor ymm8,ymm8,ymm9 + xor r14,r13 + lea r9,QWORD PTR[rdi*1+r9] + mov r12,rcx + vpsllq ymm10,ymm4,3 + vpaddq ymm5,ymm5,ymm8 + add r8,QWORD PTR[((40+256))+rsp] + and r12,rbx + rorx r13,rbx,41 + vpsrlq ymm9,ymm4,19 + vpxor ymm11,ymm11,ymm10 + rorx rdi,rbx,18 + lea r9,QWORD PTR[r14*1+r9] + lea r8,QWORD PTR[r12*1+r8] + vpsllq ymm10,ymm10,42 + vpxor ymm11,ymm11,ymm9 + andn r12,rbx,rdx + xor r13,rdi + rorx r14,rbx,14 + vpsrlq ymm9,ymm9,42 + vpxor ymm11,ymm11,ymm10 + lea r8,QWORD PTR[r12*1+r8] + xor r13,r14 + mov rdi,r9 + vpxor ymm11,ymm11,ymm9 + rorx r12,r9,39 + lea r8,QWORD PTR[r13*1+r8] + xor rdi,r10 + vpaddq ymm5,ymm5,ymm11 + rorx r14,r9,34 + rorx r13,r9,28 + lea rax,QWORD PTR[r8*1+rax] + vpaddq ymm10,ymm5,YMMWORD PTR[32+rbp] + and r15,rdi + xor r14,r12 + xor r15,r10 + xor r14,r13 + lea r8,QWORD PTR[r15*1+r8] + mov r12,rbx + vmovdqa YMMWORD PTR[32+rsp],ymm10 + vpalignr ymm8,ymm7,ymm6,8 + add rdx,QWORD PTR[((64+256))+rsp] + and r12,rax + rorx r13,rax,41 + vpalignr ymm11,ymm3,ymm2,8 + rorx r15,rax,18 + lea r8,QWORD PTR[r14*1+r8] + lea rdx,QWORD PTR[r12*1+rdx] + vpsrlq ymm10,ymm8,1 + andn r12,rax,rcx + xor r13,r15 + rorx r14,rax,14 + vpaddq ymm6,ymm6,ymm11 + vpsrlq ymm11,ymm8,7 + lea rdx,QWORD PTR[r12*1+rdx] + xor r13,r14 + mov r15,r8 + vpsllq ymm9,ymm8,56 + vpxor ymm8,ymm11,ymm10 + rorx r12,r8,39 + lea rdx,QWORD PTR[r13*1+rdx] + xor r15,r9 + vpsrlq ymm10,ymm10,7 + vpxor ymm8,ymm8,ymm9 + rorx r14,r8,34 + rorx r13,r8,28 + lea r11,QWORD PTR[rdx*1+r11] + vpsllq ymm9,ymm9,7 + vpxor ymm8,ymm8,ymm10 + and rdi,r15 + xor r14,r12 + xor rdi,r9 + vpsrlq ymm11,ymm5,6 + vpxor ymm8,ymm8,ymm9 + xor r14,r13 + lea rdx,QWORD PTR[rdi*1+rdx] + mov r12,rax + vpsllq ymm10,ymm5,3 + vpaddq ymm6,ymm6,ymm8 + add rcx,QWORD PTR[((72+256))+rsp] + and r12,r11 + rorx r13,r11,41 + vpsrlq ymm9,ymm5,19 + vpxor ymm11,ymm11,ymm10 + rorx rdi,r11,18 + lea rdx,QWORD PTR[r14*1+rdx] + lea rcx,QWORD PTR[r12*1+rcx] + vpsllq ymm10,ymm10,42 + vpxor ymm11,ymm11,ymm9 + andn r12,r11,rbx + xor r13,rdi + rorx r14,r11,14 + vpsrlq ymm9,ymm9,42 + vpxor ymm11,ymm11,ymm10 + lea rcx,QWORD PTR[r12*1+rcx] + xor r13,r14 + mov rdi,rdx + vpxor ymm11,ymm11,ymm9 + rorx r12,rdx,39 + lea rcx,QWORD PTR[r13*1+rcx] + xor rdi,r8 + vpaddq ymm6,ymm6,ymm11 + rorx r14,rdx,34 + rorx r13,rdx,28 + lea r10,QWORD PTR[rcx*1+r10] + vpaddq ymm10,ymm6,YMMWORD PTR[64+rbp] + and r15,rdi + xor r14,r12 + xor r15,r8 + xor r14,r13 + lea rcx,QWORD PTR[r15*1+rcx] + mov r12,r11 + vmovdqa YMMWORD PTR[64+rsp],ymm10 + vpalignr ymm8,ymm0,ymm7,8 + add rbx,QWORD PTR[((96+256))+rsp] + and r12,r10 + rorx r13,r10,41 + vpalignr ymm11,ymm4,ymm3,8 + rorx r15,r10,18 + lea rcx,QWORD PTR[r14*1+rcx] + lea rbx,QWORD PTR[r12*1+rbx] + vpsrlq ymm10,ymm8,1 + andn r12,r10,rax + xor r13,r15 + rorx r14,r10,14 + vpaddq ymm7,ymm7,ymm11 + vpsrlq ymm11,ymm8,7 + lea rbx,QWORD PTR[r12*1+rbx] + xor r13,r14 + mov r15,rcx + vpsllq ymm9,ymm8,56 + vpxor ymm8,ymm11,ymm10 + rorx r12,rcx,39 + lea rbx,QWORD PTR[r13*1+rbx] + xor r15,rdx + vpsrlq ymm10,ymm10,7 + vpxor ymm8,ymm8,ymm9 + rorx r14,rcx,34 + rorx r13,rcx,28 + lea r9,QWORD PTR[rbx*1+r9] + vpsllq ymm9,ymm9,7 + vpxor ymm8,ymm8,ymm10 + and rdi,r15 + xor r14,r12 + xor rdi,rdx + vpsrlq ymm11,ymm6,6 + vpxor ymm8,ymm8,ymm9 + xor r14,r13 + lea rbx,QWORD PTR[rdi*1+rbx] + mov r12,r10 + vpsllq ymm10,ymm6,3 + vpaddq ymm7,ymm7,ymm8 + add rax,QWORD PTR[((104+256))+rsp] + and r12,r9 + rorx r13,r9,41 + vpsrlq ymm9,ymm6,19 + vpxor ymm11,ymm11,ymm10 + rorx rdi,r9,18 + lea rbx,QWORD PTR[r14*1+rbx] + lea rax,QWORD PTR[r12*1+rax] + vpsllq ymm10,ymm10,42 + vpxor ymm11,ymm11,ymm9 + andn r12,r9,r11 + xor r13,rdi + rorx r14,r9,14 + vpsrlq ymm9,ymm9,42 + vpxor ymm11,ymm11,ymm10 + lea rax,QWORD PTR[r12*1+rax] + xor r13,r14 + mov rdi,rbx + vpxor ymm11,ymm11,ymm9 + rorx r12,rbx,39 + lea rax,QWORD PTR[r13*1+rax] + xor rdi,rcx + vpaddq ymm7,ymm7,ymm11 + rorx r14,rbx,34 + rorx r13,rbx,28 + lea r8,QWORD PTR[rax*1+r8] + vpaddq ymm10,ymm7,YMMWORD PTR[96+rbp] + and r15,rdi + xor r14,r12 + xor r15,rcx + xor r14,r13 + lea rax,QWORD PTR[r15*1+rax] + mov r12,r9 + vmovdqa YMMWORD PTR[96+rsp],ymm10 + lea rbp,QWORD PTR[256+rbp] + cmp BYTE PTR[((-121))+rbp],0 + jne $L$avx2_00_47 + add r11,QWORD PTR[((0+128))+rsp] + and r12,r8 + rorx r13,r8,41 + rorx r15,r8,18 + lea rax,QWORD PTR[r14*1+rax] + lea r11,QWORD PTR[r12*1+r11] + andn r12,r8,r10 + xor r13,r15 + rorx r14,r8,14 + lea r11,QWORD PTR[r12*1+r11] + xor r13,r14 + mov r15,rax + rorx r12,rax,39 + lea r11,QWORD PTR[r13*1+r11] + xor r15,rbx + rorx r14,rax,34 + rorx r13,rax,28 + lea rdx,QWORD PTR[r11*1+rdx] + and rdi,r15 + xor r14,r12 + xor rdi,rbx + xor r14,r13 + lea r11,QWORD PTR[rdi*1+r11] + mov r12,r8 + add r10,QWORD PTR[((8+128))+rsp] + and r12,rdx + rorx r13,rdx,41 + rorx rdi,rdx,18 + lea r11,QWORD PTR[r14*1+r11] + lea r10,QWORD PTR[r12*1+r10] + andn r12,rdx,r9 + xor r13,rdi + rorx r14,rdx,14 + lea r10,QWORD PTR[r12*1+r10] + xor r13,r14 + mov rdi,r11 + rorx r12,r11,39 + lea r10,QWORD PTR[r13*1+r10] + xor rdi,rax + rorx r14,r11,34 + rorx r13,r11,28 + lea rcx,QWORD PTR[r10*1+rcx] + and r15,rdi + xor r14,r12 + xor r15,rax + xor r14,r13 + lea r10,QWORD PTR[r15*1+r10] + mov r12,rdx + add r9,QWORD PTR[((32+128))+rsp] + and r12,rcx + rorx r13,rcx,41 + rorx r15,rcx,18 + lea r10,QWORD PTR[r14*1+r10] + lea r9,QWORD PTR[r12*1+r9] + andn r12,rcx,r8 + xor r13,r15 + rorx r14,rcx,14 + lea r9,QWORD PTR[r12*1+r9] + xor r13,r14 + mov r15,r10 + rorx r12,r10,39 + lea r9,QWORD PTR[r13*1+r9] + xor r15,r11 + rorx r14,r10,34 + rorx r13,r10,28 + lea rbx,QWORD PTR[r9*1+rbx] + and rdi,r15 + xor r14,r12 + xor rdi,r11 + xor r14,r13 + lea r9,QWORD PTR[rdi*1+r9] + mov r12,rcx + add r8,QWORD PTR[((40+128))+rsp] + and r12,rbx + rorx r13,rbx,41 + rorx rdi,rbx,18 + lea r9,QWORD PTR[r14*1+r9] + lea r8,QWORD PTR[r12*1+r8] + andn r12,rbx,rdx + xor r13,rdi + rorx r14,rbx,14 + lea r8,QWORD PTR[r12*1+r8] + xor r13,r14 + mov rdi,r9 + rorx r12,r9,39 + lea r8,QWORD PTR[r13*1+r8] + xor rdi,r10 + rorx r14,r9,34 + rorx r13,r9,28 + lea rax,QWORD PTR[r8*1+rax] + and r15,rdi + xor r14,r12 + xor r15,r10 + xor r14,r13 + lea r8,QWORD PTR[r15*1+r8] + mov r12,rbx + add rdx,QWORD PTR[((64+128))+rsp] + and r12,rax + rorx r13,rax,41 + rorx r15,rax,18 + lea r8,QWORD PTR[r14*1+r8] + lea rdx,QWORD PTR[r12*1+rdx] + andn r12,rax,rcx + xor r13,r15 + rorx r14,rax,14 + lea rdx,QWORD PTR[r12*1+rdx] + xor r13,r14 + mov r15,r8 + rorx r12,r8,39 + lea rdx,QWORD PTR[r13*1+rdx] + xor r15,r9 + rorx r14,r8,34 + rorx r13,r8,28 + lea r11,QWORD PTR[rdx*1+r11] + and rdi,r15 + xor r14,r12 + xor rdi,r9 + xor r14,r13 + lea rdx,QWORD PTR[rdi*1+rdx] + mov r12,rax + add rcx,QWORD PTR[((72+128))+rsp] + and r12,r11 + rorx r13,r11,41 + rorx rdi,r11,18 + lea rdx,QWORD PTR[r14*1+rdx] + lea rcx,QWORD PTR[r12*1+rcx] + andn r12,r11,rbx + xor r13,rdi + rorx r14,r11,14 + lea rcx,QWORD PTR[r12*1+rcx] + xor r13,r14 + mov rdi,rdx + rorx r12,rdx,39 + lea rcx,QWORD PTR[r13*1+rcx] + xor rdi,r8 + rorx r14,rdx,34 + rorx r13,rdx,28 + lea r10,QWORD PTR[rcx*1+r10] + and r15,rdi + xor r14,r12 + xor r15,r8 + xor r14,r13 + lea rcx,QWORD PTR[r15*1+rcx] + mov r12,r11 + add rbx,QWORD PTR[((96+128))+rsp] + and r12,r10 + rorx r13,r10,41 + rorx r15,r10,18 + lea rcx,QWORD PTR[r14*1+rcx] + lea rbx,QWORD PTR[r12*1+rbx] + andn r12,r10,rax + xor r13,r15 + rorx r14,r10,14 + lea rbx,QWORD PTR[r12*1+rbx] + xor r13,r14 + mov r15,rcx + rorx r12,rcx,39 + lea rbx,QWORD PTR[r13*1+rbx] + xor r15,rdx + rorx r14,rcx,34 + rorx r13,rcx,28 + lea r9,QWORD PTR[rbx*1+r9] + and rdi,r15 + xor r14,r12 + xor rdi,rdx + xor r14,r13 + lea rbx,QWORD PTR[rdi*1+rbx] + mov r12,r10 + add rax,QWORD PTR[((104+128))+rsp] + and r12,r9 + rorx r13,r9,41 + rorx rdi,r9,18 + lea rbx,QWORD PTR[r14*1+rbx] + lea rax,QWORD PTR[r12*1+rax] + andn r12,r9,r11 + xor r13,rdi + rorx r14,r9,14 + lea rax,QWORD PTR[r12*1+rax] + xor r13,r14 + mov rdi,rbx + rorx r12,rbx,39 + lea rax,QWORD PTR[r13*1+rax] + xor rdi,rcx + rorx r14,rbx,34 + rorx r13,rbx,28 + lea r8,QWORD PTR[rax*1+r8] + and r15,rdi + xor r14,r12 + xor r15,rcx + xor r14,r13 + lea rax,QWORD PTR[r15*1+rax] + mov r12,r9 + add r11,QWORD PTR[rsp] + and r12,r8 + rorx r13,r8,41 + rorx r15,r8,18 + lea rax,QWORD PTR[r14*1+rax] + lea r11,QWORD PTR[r12*1+r11] + andn r12,r8,r10 + xor r13,r15 + rorx r14,r8,14 + lea r11,QWORD PTR[r12*1+r11] + xor r13,r14 + mov r15,rax + rorx r12,rax,39 + lea r11,QWORD PTR[r13*1+r11] + xor r15,rbx + rorx r14,rax,34 + rorx r13,rax,28 + lea rdx,QWORD PTR[r11*1+rdx] + and rdi,r15 + xor r14,r12 + xor rdi,rbx + xor r14,r13 + lea r11,QWORD PTR[rdi*1+r11] + mov r12,r8 + add r10,QWORD PTR[8+rsp] + and r12,rdx + rorx r13,rdx,41 + rorx rdi,rdx,18 + lea r11,QWORD PTR[r14*1+r11] + lea r10,QWORD PTR[r12*1+r10] + andn r12,rdx,r9 + xor r13,rdi + rorx r14,rdx,14 + lea r10,QWORD PTR[r12*1+r10] + xor r13,r14 + mov rdi,r11 + rorx r12,r11,39 + lea r10,QWORD PTR[r13*1+r10] + xor rdi,rax + rorx r14,r11,34 + rorx r13,r11,28 + lea rcx,QWORD PTR[r10*1+rcx] + and r15,rdi + xor r14,r12 + xor r15,rax + xor r14,r13 + lea r10,QWORD PTR[r15*1+r10] + mov r12,rdx + add r9,QWORD PTR[32+rsp] + and r12,rcx + rorx r13,rcx,41 + rorx r15,rcx,18 + lea r10,QWORD PTR[r14*1+r10] + lea r9,QWORD PTR[r12*1+r9] + andn r12,rcx,r8 + xor r13,r15 + rorx r14,rcx,14 + lea r9,QWORD PTR[r12*1+r9] + xor r13,r14 + mov r15,r10 + rorx r12,r10,39 + lea r9,QWORD PTR[r13*1+r9] + xor r15,r11 + rorx r14,r10,34 + rorx r13,r10,28 + lea rbx,QWORD PTR[r9*1+rbx] + and rdi,r15 + xor r14,r12 + xor rdi,r11 + xor r14,r13 + lea r9,QWORD PTR[rdi*1+r9] + mov r12,rcx + add r8,QWORD PTR[40+rsp] + and r12,rbx + rorx r13,rbx,41 + rorx rdi,rbx,18 + lea r9,QWORD PTR[r14*1+r9] + lea r8,QWORD PTR[r12*1+r8] + andn r12,rbx,rdx + xor r13,rdi + rorx r14,rbx,14 + lea r8,QWORD PTR[r12*1+r8] + xor r13,r14 + mov rdi,r9 + rorx r12,r9,39 + lea r8,QWORD PTR[r13*1+r8] + xor rdi,r10 + rorx r14,r9,34 + rorx r13,r9,28 + lea rax,QWORD PTR[r8*1+rax] + and r15,rdi + xor r14,r12 + xor r15,r10 + xor r14,r13 + lea r8,QWORD PTR[r15*1+r8] + mov r12,rbx + add rdx,QWORD PTR[64+rsp] + and r12,rax + rorx r13,rax,41 + rorx r15,rax,18 + lea r8,QWORD PTR[r14*1+r8] + lea rdx,QWORD PTR[r12*1+rdx] + andn r12,rax,rcx + xor r13,r15 + rorx r14,rax,14 + lea rdx,QWORD PTR[r12*1+rdx] + xor r13,r14 + mov r15,r8 + rorx r12,r8,39 + lea rdx,QWORD PTR[r13*1+rdx] + xor r15,r9 + rorx r14,r8,34 + rorx r13,r8,28 + lea r11,QWORD PTR[rdx*1+r11] + and rdi,r15 + xor r14,r12 + xor rdi,r9 + xor r14,r13 + lea rdx,QWORD PTR[rdi*1+rdx] + mov r12,rax + add rcx,QWORD PTR[72+rsp] + and r12,r11 + rorx r13,r11,41 + rorx rdi,r11,18 + lea rdx,QWORD PTR[r14*1+rdx] + lea rcx,QWORD PTR[r12*1+rcx] + andn r12,r11,rbx + xor r13,rdi + rorx r14,r11,14 + lea rcx,QWORD PTR[r12*1+rcx] + xor r13,r14 + mov rdi,rdx + rorx r12,rdx,39 + lea rcx,QWORD PTR[r13*1+rcx] + xor rdi,r8 + rorx r14,rdx,34 + rorx r13,rdx,28 + lea r10,QWORD PTR[rcx*1+r10] + and r15,rdi + xor r14,r12 + xor r15,r8 + xor r14,r13 + lea rcx,QWORD PTR[r15*1+rcx] + mov r12,r11 + add rbx,QWORD PTR[96+rsp] + and r12,r10 + rorx r13,r10,41 + rorx r15,r10,18 + lea rcx,QWORD PTR[r14*1+rcx] + lea rbx,QWORD PTR[r12*1+rbx] + andn r12,r10,rax + xor r13,r15 + rorx r14,r10,14 + lea rbx,QWORD PTR[r12*1+rbx] + xor r13,r14 + mov r15,rcx + rorx r12,rcx,39 + lea rbx,QWORD PTR[r13*1+rbx] + xor r15,rdx + rorx r14,rcx,34 + rorx r13,rcx,28 + lea r9,QWORD PTR[rbx*1+r9] + and rdi,r15 + xor r14,r12 + xor rdi,rdx + xor r14,r13 + lea rbx,QWORD PTR[rdi*1+rbx] + mov r12,r10 + add rax,QWORD PTR[104+rsp] + and r12,r9 + rorx r13,r9,41 + rorx rdi,r9,18 + lea rbx,QWORD PTR[r14*1+rbx] + lea rax,QWORD PTR[r12*1+rax] + andn r12,r9,r11 + xor r13,rdi + rorx r14,r9,14 + lea rax,QWORD PTR[r12*1+rax] + xor r13,r14 + mov rdi,rbx + rorx r12,rbx,39 + lea rax,QWORD PTR[r13*1+rax] + xor rdi,rcx + rorx r14,rbx,34 + rorx r13,rbx,28 + lea r8,QWORD PTR[rax*1+r8] + and r15,rdi + xor r14,r12 + xor r15,rcx + xor r14,r13 + lea rax,QWORD PTR[r15*1+rax] + mov r12,r9 + mov rdi,QWORD PTR[1280+rsp] + add rax,r14 + + lea rbp,QWORD PTR[1152+rsp] + + add rax,QWORD PTR[rdi] + add rbx,QWORD PTR[8+rdi] + add rcx,QWORD PTR[16+rdi] + add rdx,QWORD PTR[24+rdi] + add r8,QWORD PTR[32+rdi] + add r9,QWORD PTR[40+rdi] + add r10,QWORD PTR[48+rdi] + add r11,QWORD PTR[56+rdi] + + mov QWORD PTR[rdi],rax + mov QWORD PTR[8+rdi],rbx + mov QWORD PTR[16+rdi],rcx + mov QWORD PTR[24+rdi],rdx + mov QWORD PTR[32+rdi],r8 + mov QWORD PTR[40+rdi],r9 + mov QWORD PTR[48+rdi],r10 + mov QWORD PTR[56+rdi],r11 + + cmp rsi,QWORD PTR[144+rbp] + je $L$done_avx2 + + xor r14,r14 + mov rdi,rbx + xor rdi,rcx + mov r12,r9 + jmp $L$ower_avx2 +ALIGN 16 +$L$ower_avx2:: + add r11,QWORD PTR[((0+16))+rbp] + and r12,r8 + rorx r13,r8,41 + rorx r15,r8,18 + lea rax,QWORD PTR[r14*1+rax] + lea r11,QWORD PTR[r12*1+r11] + andn r12,r8,r10 + xor r13,r15 + rorx r14,r8,14 + lea r11,QWORD PTR[r12*1+r11] + xor r13,r14 + mov r15,rax + rorx r12,rax,39 + lea r11,QWORD PTR[r13*1+r11] + xor r15,rbx + rorx r14,rax,34 + rorx r13,rax,28 + lea rdx,QWORD PTR[r11*1+rdx] + and rdi,r15 + xor r14,r12 + xor rdi,rbx + xor r14,r13 + lea r11,QWORD PTR[rdi*1+r11] + mov r12,r8 + add r10,QWORD PTR[((8+16))+rbp] + and r12,rdx + rorx r13,rdx,41 + rorx rdi,rdx,18 + lea r11,QWORD PTR[r14*1+r11] + lea r10,QWORD PTR[r12*1+r10] + andn r12,rdx,r9 + xor r13,rdi + rorx r14,rdx,14 + lea r10,QWORD PTR[r12*1+r10] + xor r13,r14 + mov rdi,r11 + rorx r12,r11,39 + lea r10,QWORD PTR[r13*1+r10] + xor rdi,rax + rorx r14,r11,34 + rorx r13,r11,28 + lea rcx,QWORD PTR[r10*1+rcx] + and r15,rdi + xor r14,r12 + xor r15,rax + xor r14,r13 + lea r10,QWORD PTR[r15*1+r10] + mov r12,rdx + add r9,QWORD PTR[((32+16))+rbp] + and r12,rcx + rorx r13,rcx,41 + rorx r15,rcx,18 + lea r10,QWORD PTR[r14*1+r10] + lea r9,QWORD PTR[r12*1+r9] + andn r12,rcx,r8 + xor r13,r15 + rorx r14,rcx,14 + lea r9,QWORD PTR[r12*1+r9] + xor r13,r14 + mov r15,r10 + rorx r12,r10,39 + lea r9,QWORD PTR[r13*1+r9] + xor r15,r11 + rorx r14,r10,34 + rorx r13,r10,28 + lea rbx,QWORD PTR[r9*1+rbx] + and rdi,r15 + xor r14,r12 + xor rdi,r11 + xor r14,r13 + lea r9,QWORD PTR[rdi*1+r9] + mov r12,rcx + add r8,QWORD PTR[((40+16))+rbp] + and r12,rbx + rorx r13,rbx,41 + rorx rdi,rbx,18 + lea r9,QWORD PTR[r14*1+r9] + lea r8,QWORD PTR[r12*1+r8] + andn r12,rbx,rdx + xor r13,rdi + rorx r14,rbx,14 + lea r8,QWORD PTR[r12*1+r8] + xor r13,r14 + mov rdi,r9 + rorx r12,r9,39 + lea r8,QWORD PTR[r13*1+r8] + xor rdi,r10 + rorx r14,r9,34 + rorx r13,r9,28 + lea rax,QWORD PTR[r8*1+rax] + and r15,rdi + xor r14,r12 + xor r15,r10 + xor r14,r13 + lea r8,QWORD PTR[r15*1+r8] + mov r12,rbx + add rdx,QWORD PTR[((64+16))+rbp] + and r12,rax + rorx r13,rax,41 + rorx r15,rax,18 + lea r8,QWORD PTR[r14*1+r8] + lea rdx,QWORD PTR[r12*1+rdx] + andn r12,rax,rcx + xor r13,r15 + rorx r14,rax,14 + lea rdx,QWORD PTR[r12*1+rdx] + xor r13,r14 + mov r15,r8 + rorx r12,r8,39 + lea rdx,QWORD PTR[r13*1+rdx] + xor r15,r9 + rorx r14,r8,34 + rorx r13,r8,28 + lea r11,QWORD PTR[rdx*1+r11] + and rdi,r15 + xor r14,r12 + xor rdi,r9 + xor r14,r13 + lea rdx,QWORD PTR[rdi*1+rdx] + mov r12,rax + add rcx,QWORD PTR[((72+16))+rbp] + and r12,r11 + rorx r13,r11,41 + rorx rdi,r11,18 + lea rdx,QWORD PTR[r14*1+rdx] + lea rcx,QWORD PTR[r12*1+rcx] + andn r12,r11,rbx + xor r13,rdi + rorx r14,r11,14 + lea rcx,QWORD PTR[r12*1+rcx] + xor r13,r14 + mov rdi,rdx + rorx r12,rdx,39 + lea rcx,QWORD PTR[r13*1+rcx] + xor rdi,r8 + rorx r14,rdx,34 + rorx r13,rdx,28 + lea r10,QWORD PTR[rcx*1+r10] + and r15,rdi + xor r14,r12 + xor r15,r8 + xor r14,r13 + lea rcx,QWORD PTR[r15*1+rcx] + mov r12,r11 + add rbx,QWORD PTR[((96+16))+rbp] + and r12,r10 + rorx r13,r10,41 + rorx r15,r10,18 + lea rcx,QWORD PTR[r14*1+rcx] + lea rbx,QWORD PTR[r12*1+rbx] + andn r12,r10,rax + xor r13,r15 + rorx r14,r10,14 + lea rbx,QWORD PTR[r12*1+rbx] + xor r13,r14 + mov r15,rcx + rorx r12,rcx,39 + lea rbx,QWORD PTR[r13*1+rbx] + xor r15,rdx + rorx r14,rcx,34 + rorx r13,rcx,28 + lea r9,QWORD PTR[rbx*1+r9] + and rdi,r15 + xor r14,r12 + xor rdi,rdx + xor r14,r13 + lea rbx,QWORD PTR[rdi*1+rbx] + mov r12,r10 + add rax,QWORD PTR[((104+16))+rbp] + and r12,r9 + rorx r13,r9,41 + rorx rdi,r9,18 + lea rbx,QWORD PTR[r14*1+rbx] + lea rax,QWORD PTR[r12*1+rax] + andn r12,r9,r11 + xor r13,rdi + rorx r14,r9,14 + lea rax,QWORD PTR[r12*1+rax] + xor r13,r14 + mov rdi,rbx + rorx r12,rbx,39 + lea rax,QWORD PTR[r13*1+rax] + xor rdi,rcx + rorx r14,rbx,34 + rorx r13,rbx,28 + lea r8,QWORD PTR[rax*1+r8] + and r15,rdi + xor r14,r12 + xor r15,rcx + xor r14,r13 + lea rax,QWORD PTR[r15*1+rax] + mov r12,r9 + lea rbp,QWORD PTR[((-128))+rbp] + cmp rbp,rsp + jae $L$ower_avx2 + + mov rdi,QWORD PTR[1280+rsp] + add rax,r14 + + lea rsp,QWORD PTR[1152+rsp] + + add rax,QWORD PTR[rdi] + add rbx,QWORD PTR[8+rdi] + add rcx,QWORD PTR[16+rdi] + add rdx,QWORD PTR[24+rdi] + add r8,QWORD PTR[32+rdi] + add r9,QWORD PTR[40+rdi] + lea rsi,QWORD PTR[256+rsi] + add r10,QWORD PTR[48+rdi] + mov r12,rsi + add r11,QWORD PTR[56+rdi] + cmp rsi,QWORD PTR[((128+16))+rsp] + + mov QWORD PTR[rdi],rax + cmove r12,rsp + mov QWORD PTR[8+rdi],rbx + mov QWORD PTR[16+rdi],rcx + mov QWORD PTR[24+rdi],rdx + mov QWORD PTR[32+rdi],r8 + mov QWORD PTR[40+rdi],r9 + mov QWORD PTR[48+rdi],r10 + mov QWORD PTR[56+rdi],r11 + + jbe $L$oop_avx2 + lea rbp,QWORD PTR[rsp] + +$L$done_avx2:: + lea rsp,QWORD PTR[rbp] + mov rsi,QWORD PTR[((128+24))+rsp] + vzeroupper + movaps xmm6,XMMWORD PTR[((128+32))+rsp] + movaps xmm7,XMMWORD PTR[((128+48))+rsp] + movaps xmm8,XMMWORD PTR[((128+64))+rsp] + movaps xmm9,XMMWORD PTR[((128+80))+rsp] + movaps xmm10,XMMWORD PTR[((128+96))+rsp] + movaps xmm11,XMMWORD PTR[((128+112))+rsp] + mov r15,QWORD PTR[rsi] + mov r14,QWORD PTR[8+rsi] + mov r13,QWORD PTR[16+rsi] + mov r12,QWORD PTR[24+rsi] + mov rbp,QWORD PTR[32+rsi] + mov rbx,QWORD PTR[40+rsi] + lea rsp,QWORD PTR[48+rsi] +$L$epilogue_avx2:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_sha512_block_data_order_avx2:: +sha512_block_data_order_avx2 ENDP +EXTERN __imp_RtlVirtualUnwind:NEAR + +ALIGN 16 +se_handler PROC PRIVATE + push rsi + push rdi + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + pushfq + sub rsp,64 + + mov rax,QWORD PTR[120+r8] + mov rbx,QWORD PTR[248+r8] + + mov rsi,QWORD PTR[8+r9] + mov r11,QWORD PTR[56+r9] + + mov r10d,DWORD PTR[r11] + lea r10,QWORD PTR[r10*1+rsi] + cmp rbx,r10 + jb $L$in_prologue + + mov rax,QWORD PTR[152+r8] + + mov r10d,DWORD PTR[4+r11] + lea r10,QWORD PTR[r10*1+rsi] + cmp rbx,r10 + jae $L$in_prologue + lea r10,QWORD PTR[$L$avx2_shortcut] + cmp rbx,r10 + jb $L$not_in_avx2 + + and rax,-256*8 + add rax,1152 +$L$not_in_avx2:: + mov rsi,rax + mov rax,QWORD PTR[((128+24))+rax] + lea rax,QWORD PTR[48+rax] + + mov rbx,QWORD PTR[((-8))+rax] + mov rbp,QWORD PTR[((-16))+rax] + mov r12,QWORD PTR[((-24))+rax] + mov r13,QWORD PTR[((-32))+rax] + mov r14,QWORD PTR[((-40))+rax] + mov r15,QWORD PTR[((-48))+rax] + mov QWORD PTR[144+r8],rbx + mov QWORD PTR[160+r8],rbp + mov QWORD PTR[216+r8],r12 + mov QWORD PTR[224+r8],r13 + mov QWORD PTR[232+r8],r14 + mov QWORD PTR[240+r8],r15 + + lea r10,QWORD PTR[$L$epilogue] + cmp rbx,r10 + jb $L$in_prologue + + lea rsi,QWORD PTR[((128+32))+rsi] + lea rdi,QWORD PTR[512+r8] + mov ecx,12 + DD 0a548f3fch + +$L$in_prologue:: + mov rdi,QWORD PTR[8+rax] + mov rsi,QWORD PTR[16+rax] + mov QWORD PTR[152+r8],rax + mov QWORD PTR[168+r8],rsi + mov QWORD PTR[176+r8],rdi + + mov rdi,QWORD PTR[40+r9] + mov rsi,r8 + mov ecx,154 + DD 0a548f3fch + + mov rsi,r9 + xor rcx,rcx + mov rdx,QWORD PTR[8+rsi] + mov r8,QWORD PTR[rsi] + mov r9,QWORD PTR[16+rsi] + mov r10,QWORD PTR[40+rsi] + lea r11,QWORD PTR[56+rsi] + lea r12,QWORD PTR[24+rsi] + mov QWORD PTR[32+rsp],r10 + mov QWORD PTR[40+rsp],r11 + mov QWORD PTR[48+rsp],r12 + mov QWORD PTR[56+rsp],rcx + call QWORD PTR[__imp_RtlVirtualUnwind] + + mov eax,1 + add rsp,64 + popfq + pop r15 + pop r14 + pop r13 + pop r12 + pop rbp + pop rbx + pop rdi + pop rsi + DB 0F3h,0C3h ;repret +se_handler ENDP +.text$ ENDS +.pdata SEGMENT READONLY ALIGN(4) +ALIGN 4 + DD imagerel $L$SEH_begin_sha512_block_data_order + DD imagerel $L$SEH_end_sha512_block_data_order + DD imagerel $L$SEH_info_sha512_block_data_order + DD imagerel $L$SEH_begin_sha512_block_data_order_xop + DD imagerel $L$SEH_end_sha512_block_data_order_xop + DD imagerel $L$SEH_info_sha512_block_data_order_xop + DD imagerel $L$SEH_begin_sha512_block_data_order_avx + DD imagerel $L$SEH_end_sha512_block_data_order_avx + DD imagerel $L$SEH_info_sha512_block_data_order_avx + DD imagerel $L$SEH_begin_sha512_block_data_order_avx2 + DD imagerel $L$SEH_end_sha512_block_data_order_avx2 + DD imagerel $L$SEH_info_sha512_block_data_order_avx2 +.pdata ENDS +.xdata SEGMENT READONLY ALIGN(8) +ALIGN 8 +$L$SEH_info_sha512_block_data_order:: +DB 9,0,0,0 + DD imagerel se_handler + DD imagerel $L$prologue,imagerel $L$epilogue +$L$SEH_info_sha512_block_data_order_xop:: +DB 9,0,0,0 + DD imagerel se_handler + DD imagerel $L$prologue_xop,imagerel $L$epilogue_xop +$L$SEH_info_sha512_block_data_order_avx:: +DB 9,0,0,0 + DD imagerel se_handler + DD imagerel $L$prologue_avx,imagerel $L$epilogue_avx +$L$SEH_info_sha512_block_data_order_avx2:: DB 9,0,0,0 DD imagerel se_handler + DD imagerel $L$prologue_avx2,imagerel $L$epilogue_avx2 .xdata ENDS END diff --git a/deps/openssl/asm/x64-win32-masm/whrlpool/wp-x86_64.asm b/deps/openssl/asm/x64-win32-masm/whrlpool/wp-x86_64.asm index 42b524dc8f936e..fa5bfbc7b20cd6 100644 --- a/deps/openssl/asm/x64-win32-masm/whrlpool/wp-x86_64.asm +++ b/deps/openssl/asm/x64-win32-masm/whrlpool/wp-x86_64.asm @@ -1,5 +1,5 @@ OPTION DOTNAME -.text$ SEGMENT ALIGN(64) 'CODE' +.text$ SEGMENT ALIGN(256) 'CODE' PUBLIC whirlpool_block @@ -72,233 +72,236 @@ $L$outerloop:: mov QWORD PTR[((64+56))+rsp],r15 xor rsi,rsi mov QWORD PTR[24+rbx],rsi + jmp $L$round ALIGN 16 $L$round:: mov r8,QWORD PTR[4096+rsi*8+rbp] mov eax,DWORD PTR[rsp] mov ebx,DWORD PTR[4+rsp] - mov cl,al - mov dl,ah + movzx ecx,al + movzx edx,ah + shr eax,16 lea rsi,QWORD PTR[rcx*1+rcx] + movzx ecx,al lea rdi,QWORD PTR[rdx*1+rdx] - shr eax,16 + movzx edx,ah xor r8,QWORD PTR[rsi*8+rbp] mov r9,QWORD PTR[7+rdi*8+rbp] - mov cl,al - mov dl,ah mov eax,DWORD PTR[((0+8))+rsp] lea rsi,QWORD PTR[rcx*1+rcx] + movzx ecx,bl lea rdi,QWORD PTR[rdx*1+rdx] + movzx edx,bh mov r10,QWORD PTR[6+rsi*8+rbp] mov r11,QWORD PTR[5+rdi*8+rbp] - mov cl,bl - mov dl,bh + shr ebx,16 lea rsi,QWORD PTR[rcx*1+rcx] + movzx ecx,bl lea rdi,QWORD PTR[rdx*1+rdx] - shr ebx,16 + movzx edx,bh mov r12,QWORD PTR[4+rsi*8+rbp] mov r13,QWORD PTR[3+rdi*8+rbp] - mov cl,bl - mov dl,bh mov ebx,DWORD PTR[((0+8+4))+rsp] lea rsi,QWORD PTR[rcx*1+rcx] + movzx ecx,al lea rdi,QWORD PTR[rdx*1+rdx] + movzx edx,ah mov r14,QWORD PTR[2+rsi*8+rbp] mov r15,QWORD PTR[1+rdi*8+rbp] - mov cl,al - mov dl,ah + shr eax,16 lea rsi,QWORD PTR[rcx*1+rcx] + movzx ecx,al lea rdi,QWORD PTR[rdx*1+rdx] - shr eax,16 + movzx edx,ah xor r9,QWORD PTR[rsi*8+rbp] xor r10,QWORD PTR[7+rdi*8+rbp] - mov cl,al - mov dl,ah mov eax,DWORD PTR[((8+8))+rsp] lea rsi,QWORD PTR[rcx*1+rcx] + movzx ecx,bl lea rdi,QWORD PTR[rdx*1+rdx] + movzx edx,bh xor r11,QWORD PTR[6+rsi*8+rbp] xor r12,QWORD PTR[5+rdi*8+rbp] - mov cl,bl - mov dl,bh + shr ebx,16 lea rsi,QWORD PTR[rcx*1+rcx] + movzx ecx,bl lea rdi,QWORD PTR[rdx*1+rdx] - shr ebx,16 + movzx edx,bh xor r13,QWORD PTR[4+rsi*8+rbp] xor r14,QWORD PTR[3+rdi*8+rbp] - mov cl,bl - mov dl,bh mov ebx,DWORD PTR[((8+8+4))+rsp] lea rsi,QWORD PTR[rcx*1+rcx] + movzx ecx,al lea rdi,QWORD PTR[rdx*1+rdx] + movzx edx,ah xor r15,QWORD PTR[2+rsi*8+rbp] xor r8,QWORD PTR[1+rdi*8+rbp] - mov cl,al - mov dl,ah + shr eax,16 lea rsi,QWORD PTR[rcx*1+rcx] + movzx ecx,al lea rdi,QWORD PTR[rdx*1+rdx] - shr eax,16 + movzx edx,ah xor r10,QWORD PTR[rsi*8+rbp] xor r11,QWORD PTR[7+rdi*8+rbp] - mov cl,al - mov dl,ah mov eax,DWORD PTR[((16+8))+rsp] lea rsi,QWORD PTR[rcx*1+rcx] + movzx ecx,bl lea rdi,QWORD PTR[rdx*1+rdx] + movzx edx,bh xor r12,QWORD PTR[6+rsi*8+rbp] xor r13,QWORD PTR[5+rdi*8+rbp] - mov cl,bl - mov dl,bh + shr ebx,16 lea rsi,QWORD PTR[rcx*1+rcx] + movzx ecx,bl lea rdi,QWORD PTR[rdx*1+rdx] - shr ebx,16 + movzx edx,bh xor r14,QWORD PTR[4+rsi*8+rbp] xor r15,QWORD PTR[3+rdi*8+rbp] - mov cl,bl - mov dl,bh mov ebx,DWORD PTR[((16+8+4))+rsp] lea rsi,QWORD PTR[rcx*1+rcx] + movzx ecx,al lea rdi,QWORD PTR[rdx*1+rdx] + movzx edx,ah xor r8,QWORD PTR[2+rsi*8+rbp] xor r9,QWORD PTR[1+rdi*8+rbp] - mov cl,al - mov dl,ah + shr eax,16 lea rsi,QWORD PTR[rcx*1+rcx] + movzx ecx,al lea rdi,QWORD PTR[rdx*1+rdx] - shr eax,16 + movzx edx,ah xor r11,QWORD PTR[rsi*8+rbp] xor r12,QWORD PTR[7+rdi*8+rbp] - mov cl,al - mov dl,ah mov eax,DWORD PTR[((24+8))+rsp] lea rsi,QWORD PTR[rcx*1+rcx] + movzx ecx,bl lea rdi,QWORD PTR[rdx*1+rdx] + movzx edx,bh xor r13,QWORD PTR[6+rsi*8+rbp] xor r14,QWORD PTR[5+rdi*8+rbp] - mov cl,bl - mov dl,bh + shr ebx,16 lea rsi,QWORD PTR[rcx*1+rcx] + movzx ecx,bl lea rdi,QWORD PTR[rdx*1+rdx] - shr ebx,16 + movzx edx,bh xor r15,QWORD PTR[4+rsi*8+rbp] xor r8,QWORD PTR[3+rdi*8+rbp] - mov cl,bl - mov dl,bh mov ebx,DWORD PTR[((24+8+4))+rsp] lea rsi,QWORD PTR[rcx*1+rcx] + movzx ecx,al lea rdi,QWORD PTR[rdx*1+rdx] + movzx edx,ah xor r9,QWORD PTR[2+rsi*8+rbp] xor r10,QWORD PTR[1+rdi*8+rbp] - mov cl,al - mov dl,ah + shr eax,16 lea rsi,QWORD PTR[rcx*1+rcx] + movzx ecx,al lea rdi,QWORD PTR[rdx*1+rdx] - shr eax,16 + movzx edx,ah xor r12,QWORD PTR[rsi*8+rbp] xor r13,QWORD PTR[7+rdi*8+rbp] - mov cl,al - mov dl,ah mov eax,DWORD PTR[((32+8))+rsp] lea rsi,QWORD PTR[rcx*1+rcx] + movzx ecx,bl lea rdi,QWORD PTR[rdx*1+rdx] + movzx edx,bh xor r14,QWORD PTR[6+rsi*8+rbp] xor r15,QWORD PTR[5+rdi*8+rbp] - mov cl,bl - mov dl,bh + shr ebx,16 lea rsi,QWORD PTR[rcx*1+rcx] + movzx ecx,bl lea rdi,QWORD PTR[rdx*1+rdx] - shr ebx,16 + movzx edx,bh xor r8,QWORD PTR[4+rsi*8+rbp] xor r9,QWORD PTR[3+rdi*8+rbp] - mov cl,bl - mov dl,bh mov ebx,DWORD PTR[((32+8+4))+rsp] lea rsi,QWORD PTR[rcx*1+rcx] + movzx ecx,al lea rdi,QWORD PTR[rdx*1+rdx] + movzx edx,ah xor r10,QWORD PTR[2+rsi*8+rbp] xor r11,QWORD PTR[1+rdi*8+rbp] - mov cl,al - mov dl,ah + shr eax,16 lea rsi,QWORD PTR[rcx*1+rcx] + movzx ecx,al lea rdi,QWORD PTR[rdx*1+rdx] - shr eax,16 + movzx edx,ah xor r13,QWORD PTR[rsi*8+rbp] xor r14,QWORD PTR[7+rdi*8+rbp] - mov cl,al - mov dl,ah mov eax,DWORD PTR[((40+8))+rsp] lea rsi,QWORD PTR[rcx*1+rcx] + movzx ecx,bl lea rdi,QWORD PTR[rdx*1+rdx] + movzx edx,bh xor r15,QWORD PTR[6+rsi*8+rbp] xor r8,QWORD PTR[5+rdi*8+rbp] - mov cl,bl - mov dl,bh + shr ebx,16 lea rsi,QWORD PTR[rcx*1+rcx] + movzx ecx,bl lea rdi,QWORD PTR[rdx*1+rdx] - shr ebx,16 + movzx edx,bh xor r9,QWORD PTR[4+rsi*8+rbp] xor r10,QWORD PTR[3+rdi*8+rbp] - mov cl,bl - mov dl,bh mov ebx,DWORD PTR[((40+8+4))+rsp] lea rsi,QWORD PTR[rcx*1+rcx] + movzx ecx,al lea rdi,QWORD PTR[rdx*1+rdx] + movzx edx,ah xor r11,QWORD PTR[2+rsi*8+rbp] xor r12,QWORD PTR[1+rdi*8+rbp] - mov cl,al - mov dl,ah + shr eax,16 lea rsi,QWORD PTR[rcx*1+rcx] + movzx ecx,al lea rdi,QWORD PTR[rdx*1+rdx] - shr eax,16 + movzx edx,ah xor r14,QWORD PTR[rsi*8+rbp] xor r15,QWORD PTR[7+rdi*8+rbp] - mov cl,al - mov dl,ah mov eax,DWORD PTR[((48+8))+rsp] lea rsi,QWORD PTR[rcx*1+rcx] + movzx ecx,bl lea rdi,QWORD PTR[rdx*1+rdx] + movzx edx,bh xor r8,QWORD PTR[6+rsi*8+rbp] xor r9,QWORD PTR[5+rdi*8+rbp] - mov cl,bl - mov dl,bh + shr ebx,16 lea rsi,QWORD PTR[rcx*1+rcx] + movzx ecx,bl lea rdi,QWORD PTR[rdx*1+rdx] - shr ebx,16 + movzx edx,bh xor r10,QWORD PTR[4+rsi*8+rbp] xor r11,QWORD PTR[3+rdi*8+rbp] - mov cl,bl - mov dl,bh mov ebx,DWORD PTR[((48+8+4))+rsp] lea rsi,QWORD PTR[rcx*1+rcx] + movzx ecx,al lea rdi,QWORD PTR[rdx*1+rdx] + movzx edx,ah xor r12,QWORD PTR[2+rsi*8+rbp] xor r13,QWORD PTR[1+rdi*8+rbp] - mov cl,al - mov dl,ah + shr eax,16 lea rsi,QWORD PTR[rcx*1+rcx] + movzx ecx,al lea rdi,QWORD PTR[rdx*1+rdx] - shr eax,16 + movzx edx,ah xor r15,QWORD PTR[rsi*8+rbp] xor r8,QWORD PTR[7+rdi*8+rbp] - mov cl,al - mov dl,ah mov eax,DWORD PTR[((56+8))+rsp] lea rsi,QWORD PTR[rcx*1+rcx] + movzx ecx,bl lea rdi,QWORD PTR[rdx*1+rdx] + movzx edx,bh xor r9,QWORD PTR[6+rsi*8+rbp] xor r10,QWORD PTR[5+rdi*8+rbp] - mov cl,bl - mov dl,bh + shr ebx,16 lea rsi,QWORD PTR[rcx*1+rcx] + movzx ecx,bl lea rdi,QWORD PTR[rdx*1+rdx] - shr ebx,16 + movzx edx,bh xor r11,QWORD PTR[4+rsi*8+rbp] xor r12,QWORD PTR[3+rdi*8+rbp] - mov cl,bl - mov dl,bh mov ebx,DWORD PTR[((56+8+4))+rsp] lea rsi,QWORD PTR[rcx*1+rcx] + movzx ecx,al lea rdi,QWORD PTR[rdx*1+rdx] + movzx edx,ah xor r13,QWORD PTR[2+rsi*8+rbp] xor r14,QWORD PTR[1+rdi*8+rbp] mov QWORD PTR[rsp],r8 @@ -309,228 +312,228 @@ $L$round:: mov QWORD PTR[40+rsp],r13 mov QWORD PTR[48+rsp],r14 mov QWORD PTR[56+rsp],r15 - mov cl,al - mov dl,ah + shr eax,16 lea rsi,QWORD PTR[rcx*1+rcx] + movzx ecx,al lea rdi,QWORD PTR[rdx*1+rdx] - shr eax,16 + movzx edx,ah xor r8,QWORD PTR[rsi*8+rbp] xor r9,QWORD PTR[7+rdi*8+rbp] - mov cl,al - mov dl,ah mov eax,DWORD PTR[((64+0+8))+rsp] lea rsi,QWORD PTR[rcx*1+rcx] + movzx ecx,bl lea rdi,QWORD PTR[rdx*1+rdx] + movzx edx,bh xor r10,QWORD PTR[6+rsi*8+rbp] xor r11,QWORD PTR[5+rdi*8+rbp] - mov cl,bl - mov dl,bh + shr ebx,16 lea rsi,QWORD PTR[rcx*1+rcx] + movzx ecx,bl lea rdi,QWORD PTR[rdx*1+rdx] - shr ebx,16 + movzx edx,bh xor r12,QWORD PTR[4+rsi*8+rbp] xor r13,QWORD PTR[3+rdi*8+rbp] - mov cl,bl - mov dl,bh mov ebx,DWORD PTR[((64+0+8+4))+rsp] lea rsi,QWORD PTR[rcx*1+rcx] + movzx ecx,al lea rdi,QWORD PTR[rdx*1+rdx] + movzx edx,ah xor r14,QWORD PTR[2+rsi*8+rbp] xor r15,QWORD PTR[1+rdi*8+rbp] - mov cl,al - mov dl,ah + shr eax,16 lea rsi,QWORD PTR[rcx*1+rcx] + movzx ecx,al lea rdi,QWORD PTR[rdx*1+rdx] - shr eax,16 + movzx edx,ah xor r9,QWORD PTR[rsi*8+rbp] xor r10,QWORD PTR[7+rdi*8+rbp] - mov cl,al - mov dl,ah mov eax,DWORD PTR[((64+8+8))+rsp] lea rsi,QWORD PTR[rcx*1+rcx] + movzx ecx,bl lea rdi,QWORD PTR[rdx*1+rdx] + movzx edx,bh xor r11,QWORD PTR[6+rsi*8+rbp] xor r12,QWORD PTR[5+rdi*8+rbp] - mov cl,bl - mov dl,bh + shr ebx,16 lea rsi,QWORD PTR[rcx*1+rcx] + movzx ecx,bl lea rdi,QWORD PTR[rdx*1+rdx] - shr ebx,16 + movzx edx,bh xor r13,QWORD PTR[4+rsi*8+rbp] xor r14,QWORD PTR[3+rdi*8+rbp] - mov cl,bl - mov dl,bh mov ebx,DWORD PTR[((64+8+8+4))+rsp] lea rsi,QWORD PTR[rcx*1+rcx] + movzx ecx,al lea rdi,QWORD PTR[rdx*1+rdx] + movzx edx,ah xor r15,QWORD PTR[2+rsi*8+rbp] xor r8,QWORD PTR[1+rdi*8+rbp] - mov cl,al - mov dl,ah + shr eax,16 lea rsi,QWORD PTR[rcx*1+rcx] + movzx ecx,al lea rdi,QWORD PTR[rdx*1+rdx] - shr eax,16 + movzx edx,ah xor r10,QWORD PTR[rsi*8+rbp] xor r11,QWORD PTR[7+rdi*8+rbp] - mov cl,al - mov dl,ah mov eax,DWORD PTR[((64+16+8))+rsp] lea rsi,QWORD PTR[rcx*1+rcx] + movzx ecx,bl lea rdi,QWORD PTR[rdx*1+rdx] + movzx edx,bh xor r12,QWORD PTR[6+rsi*8+rbp] xor r13,QWORD PTR[5+rdi*8+rbp] - mov cl,bl - mov dl,bh + shr ebx,16 lea rsi,QWORD PTR[rcx*1+rcx] + movzx ecx,bl lea rdi,QWORD PTR[rdx*1+rdx] - shr ebx,16 + movzx edx,bh xor r14,QWORD PTR[4+rsi*8+rbp] xor r15,QWORD PTR[3+rdi*8+rbp] - mov cl,bl - mov dl,bh mov ebx,DWORD PTR[((64+16+8+4))+rsp] lea rsi,QWORD PTR[rcx*1+rcx] + movzx ecx,al lea rdi,QWORD PTR[rdx*1+rdx] + movzx edx,ah xor r8,QWORD PTR[2+rsi*8+rbp] xor r9,QWORD PTR[1+rdi*8+rbp] - mov cl,al - mov dl,ah + shr eax,16 lea rsi,QWORD PTR[rcx*1+rcx] + movzx ecx,al lea rdi,QWORD PTR[rdx*1+rdx] - shr eax,16 + movzx edx,ah xor r11,QWORD PTR[rsi*8+rbp] xor r12,QWORD PTR[7+rdi*8+rbp] - mov cl,al - mov dl,ah mov eax,DWORD PTR[((64+24+8))+rsp] lea rsi,QWORD PTR[rcx*1+rcx] + movzx ecx,bl lea rdi,QWORD PTR[rdx*1+rdx] + movzx edx,bh xor r13,QWORD PTR[6+rsi*8+rbp] xor r14,QWORD PTR[5+rdi*8+rbp] - mov cl,bl - mov dl,bh + shr ebx,16 lea rsi,QWORD PTR[rcx*1+rcx] + movzx ecx,bl lea rdi,QWORD PTR[rdx*1+rdx] - shr ebx,16 + movzx edx,bh xor r15,QWORD PTR[4+rsi*8+rbp] xor r8,QWORD PTR[3+rdi*8+rbp] - mov cl,bl - mov dl,bh mov ebx,DWORD PTR[((64+24+8+4))+rsp] lea rsi,QWORD PTR[rcx*1+rcx] + movzx ecx,al lea rdi,QWORD PTR[rdx*1+rdx] + movzx edx,ah xor r9,QWORD PTR[2+rsi*8+rbp] xor r10,QWORD PTR[1+rdi*8+rbp] - mov cl,al - mov dl,ah + shr eax,16 lea rsi,QWORD PTR[rcx*1+rcx] + movzx ecx,al lea rdi,QWORD PTR[rdx*1+rdx] - shr eax,16 + movzx edx,ah xor r12,QWORD PTR[rsi*8+rbp] xor r13,QWORD PTR[7+rdi*8+rbp] - mov cl,al - mov dl,ah mov eax,DWORD PTR[((64+32+8))+rsp] lea rsi,QWORD PTR[rcx*1+rcx] + movzx ecx,bl lea rdi,QWORD PTR[rdx*1+rdx] + movzx edx,bh xor r14,QWORD PTR[6+rsi*8+rbp] xor r15,QWORD PTR[5+rdi*8+rbp] - mov cl,bl - mov dl,bh + shr ebx,16 lea rsi,QWORD PTR[rcx*1+rcx] + movzx ecx,bl lea rdi,QWORD PTR[rdx*1+rdx] - shr ebx,16 + movzx edx,bh xor r8,QWORD PTR[4+rsi*8+rbp] xor r9,QWORD PTR[3+rdi*8+rbp] - mov cl,bl - mov dl,bh mov ebx,DWORD PTR[((64+32+8+4))+rsp] lea rsi,QWORD PTR[rcx*1+rcx] + movzx ecx,al lea rdi,QWORD PTR[rdx*1+rdx] + movzx edx,ah xor r10,QWORD PTR[2+rsi*8+rbp] xor r11,QWORD PTR[1+rdi*8+rbp] - mov cl,al - mov dl,ah + shr eax,16 lea rsi,QWORD PTR[rcx*1+rcx] + movzx ecx,al lea rdi,QWORD PTR[rdx*1+rdx] - shr eax,16 + movzx edx,ah xor r13,QWORD PTR[rsi*8+rbp] xor r14,QWORD PTR[7+rdi*8+rbp] - mov cl,al - mov dl,ah mov eax,DWORD PTR[((64+40+8))+rsp] lea rsi,QWORD PTR[rcx*1+rcx] + movzx ecx,bl lea rdi,QWORD PTR[rdx*1+rdx] + movzx edx,bh xor r15,QWORD PTR[6+rsi*8+rbp] xor r8,QWORD PTR[5+rdi*8+rbp] - mov cl,bl - mov dl,bh + shr ebx,16 lea rsi,QWORD PTR[rcx*1+rcx] + movzx ecx,bl lea rdi,QWORD PTR[rdx*1+rdx] - shr ebx,16 + movzx edx,bh xor r9,QWORD PTR[4+rsi*8+rbp] xor r10,QWORD PTR[3+rdi*8+rbp] - mov cl,bl - mov dl,bh mov ebx,DWORD PTR[((64+40+8+4))+rsp] lea rsi,QWORD PTR[rcx*1+rcx] + movzx ecx,al lea rdi,QWORD PTR[rdx*1+rdx] + movzx edx,ah xor r11,QWORD PTR[2+rsi*8+rbp] xor r12,QWORD PTR[1+rdi*8+rbp] - mov cl,al - mov dl,ah + shr eax,16 lea rsi,QWORD PTR[rcx*1+rcx] + movzx ecx,al lea rdi,QWORD PTR[rdx*1+rdx] - shr eax,16 + movzx edx,ah xor r14,QWORD PTR[rsi*8+rbp] xor r15,QWORD PTR[7+rdi*8+rbp] - mov cl,al - mov dl,ah mov eax,DWORD PTR[((64+48+8))+rsp] lea rsi,QWORD PTR[rcx*1+rcx] + movzx ecx,bl lea rdi,QWORD PTR[rdx*1+rdx] + movzx edx,bh xor r8,QWORD PTR[6+rsi*8+rbp] xor r9,QWORD PTR[5+rdi*8+rbp] - mov cl,bl - mov dl,bh + shr ebx,16 lea rsi,QWORD PTR[rcx*1+rcx] + movzx ecx,bl lea rdi,QWORD PTR[rdx*1+rdx] - shr ebx,16 + movzx edx,bh xor r10,QWORD PTR[4+rsi*8+rbp] xor r11,QWORD PTR[3+rdi*8+rbp] - mov cl,bl - mov dl,bh mov ebx,DWORD PTR[((64+48+8+4))+rsp] lea rsi,QWORD PTR[rcx*1+rcx] + movzx ecx,al lea rdi,QWORD PTR[rdx*1+rdx] + movzx edx,ah xor r12,QWORD PTR[2+rsi*8+rbp] xor r13,QWORD PTR[1+rdi*8+rbp] - mov cl,al - mov dl,ah + shr eax,16 lea rsi,QWORD PTR[rcx*1+rcx] + movzx ecx,al lea rdi,QWORD PTR[rdx*1+rdx] - shr eax,16 + movzx edx,ah xor r15,QWORD PTR[rsi*8+rbp] xor r8,QWORD PTR[7+rdi*8+rbp] - mov cl,al - mov dl,ah lea rsi,QWORD PTR[rcx*1+rcx] + movzx ecx,bl lea rdi,QWORD PTR[rdx*1+rdx] + movzx edx,bh xor r9,QWORD PTR[6+rsi*8+rbp] xor r10,QWORD PTR[5+rdi*8+rbp] - mov cl,bl - mov dl,bh + shr ebx,16 lea rsi,QWORD PTR[rcx*1+rcx] + movzx ecx,bl lea rdi,QWORD PTR[rdx*1+rdx] - shr ebx,16 + movzx edx,bh xor r11,QWORD PTR[4+rsi*8+rbp] xor r12,QWORD PTR[3+rdi*8+rbp] - mov cl,bl - mov dl,bh lea rsi,QWORD PTR[rcx*1+rcx] + movzx ecx,al lea rdi,QWORD PTR[rdx*1+rdx] + movzx edx,ah xor r13,QWORD PTR[2+rsi*8+rbp] xor r14,QWORD PTR[1+rdi*8+rbp] lea rbx,QWORD PTR[128+rsp] @@ -925,7 +928,6 @@ $L$in_prologue:: mov ecx,154 DD 0a548f3fch - mov rsi,r9 xor rcx,rcx mov rdx,QWORD PTR[8+rsi] diff --git a/deps/openssl/asm/x64-win32-masm/x86_64cpuid.asm b/deps/openssl/asm/x64-win32-masm/x86_64cpuid.asm index 497160cbcf3ad4..c767b91f9a7cd1 100644 --- a/deps/openssl/asm/x64-win32-masm/x86_64cpuid.asm +++ b/deps/openssl/asm/x64-win32-masm/x86_64cpuid.asm @@ -7,10 +7,10 @@ EXTERN OPENSSL_cpuid_setup:NEAR .CRT$XCU ENDS _DATA SEGMENT -COMM OPENSSL_ia32cap_P:DWORD:2 +COMM OPENSSL_ia32cap_P:DWORD:4 _DATA ENDS -.text$ SEGMENT ALIGN(64) 'CODE' +.text$ SEGMENT ALIGN(256) 'CODE' PUBLIC OPENSSL_atomic_add @@ -19,12 +19,10 @@ OPENSSL_atomic_add PROC PUBLIC mov eax,DWORD PTR[rcx] $L$spin:: lea r8,QWORD PTR[rax*1+rdx] DB 0f0h - cmpxchg DWORD PTR[rcx],r8d jne $L$spin mov eax,r8d DB 048h,098h - DB 0F3h,0C3h ;repret OPENSSL_atomic_add ENDP @@ -42,9 +40,17 @@ PUBLIC OPENSSL_ia32_cpuid ALIGN 16 OPENSSL_ia32_cpuid PROC PUBLIC + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_OPENSSL_ia32_cpuid:: + mov rdi,rcx + + mov r8,rbx xor eax,eax + mov DWORD PTR[8+rdi],eax cpuid mov r11d,eax @@ -112,6 +118,14 @@ $L$intel:: shr r10d,14 and r10d,0fffh + cmp r11d,7 + jb $L$nocacheinfo + + mov eax,7 + xor ecx,ecx + cpuid + mov DWORD PTR[8+rdi],ebx + $L$nocacheinfo:: mov eax,1 cpuid @@ -145,19 +159,22 @@ $L$generic:: jnc $L$clear_avx xor ecx,ecx DB 00fh,001h,0d0h - and eax,6 cmp eax,6 je $L$done $L$clear_avx:: mov eax,0efffe7ffh and r9d,eax + and DWORD PTR[8+rdi],0ffffffdfh $L$done:: shl r9,32 mov eax,r10d mov rbx,r8 or rax,r9 + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] DB 0F3h,0C3h ;repret +$L$SEH_end_OPENSSL_ia32_cpuid:: OPENSSL_ia32_cpuid ENDP PUBLIC OPENSSL_cleanse @@ -228,5 +245,20 @@ $L$break_rdrand:: DB 0F3h,0C3h ;repret OPENSSL_ia32_rdrand ENDP +PUBLIC OPENSSL_ia32_rdseed + +ALIGN 16 +OPENSSL_ia32_rdseed PROC PUBLIC + mov ecx,8 +$L$oop_rdseed:: +DB 72,15,199,248 + jc $L$break_rdseed + loop $L$oop_rdseed +$L$break_rdseed:: + cmp rax,0 + cmove rax,rcx + DB 0F3h,0C3h ;repret +OPENSSL_ia32_rdseed ENDP + .text$ ENDS END diff --git a/deps/openssl/asm/x86-elf-gas/aes/aes-586.s b/deps/openssl/asm/x86-elf-gas/aes/aes-586.s index f69b7d543dd425..2d7c1fd09df5a9 100644 --- a/deps/openssl/asm/x86-elf-gas/aes/aes-586.s +++ b/deps/openssl/asm/x86-elf-gas/aes/aes-586.s @@ -100,74 +100,78 @@ _x86_AES_encrypt_compact: xorl %ecx,%edx movl %esi,%ecx - movl %ecx,%esi - andl $2155905152,%esi - movl %esi,%ebp - shrl $7,%ebp + movl $2155905152,%ebp + andl %ecx,%ebp leal (%ecx,%ecx,1),%edi - subl %ebp,%esi + movl %ebp,%esi + shrl $7,%ebp andl $4278124286,%edi - andl $454761243,%esi + subl %ebp,%esi movl %ecx,%ebp + andl $454761243,%esi + rorl $16,%ebp xorl %edi,%esi + movl %ecx,%edi xorl %esi,%ecx + rorl $24,%edi + xorl %ebp,%esi roll $24,%ecx + xorl %edi,%esi + movl $2155905152,%ebp xorl %esi,%ecx - rorl $16,%ebp - xorl %ebp,%ecx - rorl $8,%ebp - xorl %ebp,%ecx - movl %edx,%esi - andl $2155905152,%esi - movl %esi,%ebp - shrl $7,%ebp + andl %edx,%ebp leal (%edx,%edx,1),%edi - subl %ebp,%esi + movl %ebp,%esi + shrl $7,%ebp andl $4278124286,%edi - andl $454761243,%esi + subl %ebp,%esi movl %edx,%ebp + andl $454761243,%esi + rorl $16,%ebp xorl %edi,%esi + movl %edx,%edi xorl %esi,%edx + rorl $24,%edi + xorl %ebp,%esi roll $24,%edx + xorl %edi,%esi + movl $2155905152,%ebp xorl %esi,%edx - rorl $16,%ebp - xorl %ebp,%edx - rorl $8,%ebp - xorl %ebp,%edx - movl %eax,%esi - andl $2155905152,%esi - movl %esi,%ebp - shrl $7,%ebp + andl %eax,%ebp leal (%eax,%eax,1),%edi - subl %ebp,%esi + movl %ebp,%esi + shrl $7,%ebp andl $4278124286,%edi - andl $454761243,%esi + subl %ebp,%esi movl %eax,%ebp + andl $454761243,%esi + rorl $16,%ebp xorl %edi,%esi + movl %eax,%edi xorl %esi,%eax + rorl $24,%edi + xorl %ebp,%esi roll $24,%eax + xorl %edi,%esi + movl $2155905152,%ebp xorl %esi,%eax - rorl $16,%ebp - xorl %ebp,%eax - rorl $8,%ebp - xorl %ebp,%eax - movl %ebx,%esi - andl $2155905152,%esi - movl %esi,%ebp - shrl $7,%ebp + andl %ebx,%ebp leal (%ebx,%ebx,1),%edi - subl %ebp,%esi + movl %ebp,%esi + shrl $7,%ebp andl $4278124286,%edi - andl $454761243,%esi + subl %ebp,%esi movl %ebx,%ebp + andl $454761243,%esi + rorl $16,%ebp xorl %edi,%esi + movl %ebx,%edi xorl %esi,%ebx + rorl $24,%edi + xorl %ebp,%esi roll $24,%ebx + xorl %edi,%esi xorl %esi,%ebx - rorl $16,%ebp - xorl %ebp,%ebx - rorl $8,%ebp - xorl %ebp,%ebx movl 20(%esp),%edi movl 28(%esp),%ebp addl $16,%edi @@ -289,74 +293,76 @@ _sse_AES_encrypt_compact: pshufw $13,%mm4,%mm5 movd %mm1,%eax movd %mm5,%ebx + movl %edi,20(%esp) movzbl %al,%esi - movzbl -128(%ebp,%esi,1),%ecx - pshufw $13,%mm0,%mm2 movzbl %ah,%edx + pshufw $13,%mm0,%mm2 + movzbl -128(%ebp,%esi,1),%ecx + movzbl %bl,%edi movzbl -128(%ebp,%edx,1),%edx - shll $8,%edx shrl $16,%eax - movzbl %bl,%esi - movzbl -128(%ebp,%esi,1),%esi + shll $8,%edx + movzbl -128(%ebp,%edi,1),%esi + movzbl %bh,%edi shll $16,%esi - orl %esi,%ecx pshufw $8,%mm4,%mm6 - movzbl %bh,%esi - movzbl -128(%ebp,%esi,1),%esi + orl %esi,%ecx + movzbl -128(%ebp,%edi,1),%esi + movzbl %ah,%edi shll $24,%esi - orl %esi,%edx shrl $16,%ebx - movzbl %ah,%esi - movzbl -128(%ebp,%esi,1),%esi + orl %esi,%edx + movzbl -128(%ebp,%edi,1),%esi + movzbl %bh,%edi shll $8,%esi orl %esi,%ecx - movzbl %bh,%esi - movzbl -128(%ebp,%esi,1),%esi + movzbl -128(%ebp,%edi,1),%esi + movzbl %al,%edi shll $24,%esi orl %esi,%ecx - movd %ecx,%mm0 - movzbl %al,%esi - movzbl -128(%ebp,%esi,1),%ecx + movzbl -128(%ebp,%edi,1),%esi + movzbl %bl,%edi movd %mm2,%eax - movzbl %bl,%esi - movzbl -128(%ebp,%esi,1),%esi - shll $16,%esi - orl %esi,%ecx + movd %ecx,%mm0 + movzbl -128(%ebp,%edi,1),%ecx + movzbl %ah,%edi + shll $16,%ecx movd %mm6,%ebx - movzbl %ah,%esi - movzbl -128(%ebp,%esi,1),%esi + orl %esi,%ecx + movzbl -128(%ebp,%edi,1),%esi + movzbl %bh,%edi shll $24,%esi orl %esi,%ecx - movzbl %bh,%esi - movzbl -128(%ebp,%esi,1),%esi + movzbl -128(%ebp,%edi,1),%esi + movzbl %bl,%edi shll $8,%esi - orl %esi,%ecx - movd %ecx,%mm1 - movzbl %bl,%esi - movzbl -128(%ebp,%esi,1),%ecx shrl $16,%ebx - movzbl %al,%esi - movzbl -128(%ebp,%esi,1),%esi - shll $16,%esi orl %esi,%ecx + movzbl -128(%ebp,%edi,1),%esi + movzbl %al,%edi shrl $16,%eax + movd %ecx,%mm1 + movzbl -128(%ebp,%edi,1),%ecx + movzbl %ah,%edi + shll $16,%ecx + andl $255,%eax + orl %esi,%ecx punpckldq %mm1,%mm0 - movzbl %ah,%esi - movzbl -128(%ebp,%esi,1),%esi + movzbl -128(%ebp,%edi,1),%esi + movzbl %bh,%edi shll $24,%esi - orl %esi,%ecx - andl $255,%eax + andl $255,%ebx movzbl -128(%ebp,%eax,1),%eax + orl %esi,%ecx shll $16,%eax + movzbl -128(%ebp,%edi,1),%esi orl %eax,%edx - movzbl %bh,%esi - movzbl -128(%ebp,%esi,1),%esi shll $8,%esi - orl %esi,%ecx - movd %ecx,%mm4 - andl $255,%ebx movzbl -128(%ebp,%ebx,1),%ebx + orl %esi,%ecx orl %ebx,%edx + movl 20(%esp),%edi + movd %ecx,%mm4 movd %edx,%mm5 punpckldq %mm5,%mm4 addl $16,%edi @@ -1129,28 +1135,28 @@ _x86_AES_decrypt_compact: movzbl -128(%ebp,%eax,1),%eax shll $24,%eax xorl %eax,%edx - movl %ecx,%esi - andl $2155905152,%esi - movl %esi,%edi + movl $2155905152,%edi + andl %ecx,%edi + movl %edi,%esi shrl $7,%edi leal (%ecx,%ecx,1),%eax subl %edi,%esi andl $4278124286,%eax andl $454761243,%esi - xorl %eax,%esi - movl %esi,%eax - andl $2155905152,%esi - movl %esi,%edi + xorl %esi,%eax + movl $2155905152,%edi + andl %eax,%edi + movl %edi,%esi shrl $7,%edi leal (%eax,%eax,1),%ebx subl %edi,%esi andl $4278124286,%ebx andl $454761243,%esi xorl %ecx,%eax - xorl %ebx,%esi - movl %esi,%ebx - andl $2155905152,%esi - movl %esi,%edi + xorl %esi,%ebx + movl $2155905152,%edi + andl %ebx,%edi + movl %edi,%esi shrl $7,%edi leal (%ebx,%ebx,1),%ebp subl %edi,%esi @@ -1161,39 +1167,39 @@ _x86_AES_decrypt_compact: xorl %esi,%ebp xorl %eax,%ecx xorl %ebp,%eax - roll $24,%eax xorl %ebx,%ecx xorl %ebp,%ebx - roll $16,%ebx + roll $24,%eax xorl %ebp,%ecx - roll $8,%ebp + roll $16,%ebx xorl %eax,%ecx + roll $8,%ebp xorl %ebx,%ecx movl 4(%esp),%eax xorl %ebp,%ecx movl %ecx,12(%esp) - movl %edx,%esi - andl $2155905152,%esi - movl %esi,%edi + movl $2155905152,%edi + andl %edx,%edi + movl %edi,%esi shrl $7,%edi leal (%edx,%edx,1),%ebx subl %edi,%esi andl $4278124286,%ebx andl $454761243,%esi - xorl %ebx,%esi - movl %esi,%ebx - andl $2155905152,%esi - movl %esi,%edi + xorl %esi,%ebx + movl $2155905152,%edi + andl %ebx,%edi + movl %edi,%esi shrl $7,%edi leal (%ebx,%ebx,1),%ecx subl %edi,%esi andl $4278124286,%ecx andl $454761243,%esi xorl %edx,%ebx - xorl %ecx,%esi - movl %esi,%ecx - andl $2155905152,%esi - movl %esi,%edi + xorl %esi,%ecx + movl $2155905152,%edi + andl %ecx,%edi + movl %edi,%esi shrl $7,%edi leal (%ecx,%ecx,1),%ebp subl %edi,%esi @@ -1204,39 +1210,39 @@ _x86_AES_decrypt_compact: xorl %esi,%ebp xorl %ebx,%edx xorl %ebp,%ebx - roll $24,%ebx xorl %ecx,%edx xorl %ebp,%ecx - roll $16,%ecx + roll $24,%ebx xorl %ebp,%edx - roll $8,%ebp + roll $16,%ecx xorl %ebx,%edx + roll $8,%ebp xorl %ecx,%edx movl 8(%esp),%ebx xorl %ebp,%edx movl %edx,16(%esp) - movl %eax,%esi - andl $2155905152,%esi - movl %esi,%edi + movl $2155905152,%edi + andl %eax,%edi + movl %edi,%esi shrl $7,%edi leal (%eax,%eax,1),%ecx subl %edi,%esi andl $4278124286,%ecx andl $454761243,%esi - xorl %ecx,%esi - movl %esi,%ecx - andl $2155905152,%esi - movl %esi,%edi + xorl %esi,%ecx + movl $2155905152,%edi + andl %ecx,%edi + movl %edi,%esi shrl $7,%edi leal (%ecx,%ecx,1),%edx subl %edi,%esi andl $4278124286,%edx andl $454761243,%esi xorl %eax,%ecx - xorl %edx,%esi - movl %esi,%edx - andl $2155905152,%esi - movl %esi,%edi + xorl %esi,%edx + movl $2155905152,%edi + andl %edx,%edi + movl %edi,%esi shrl $7,%edi leal (%edx,%edx,1),%ebp subl %edi,%esi @@ -1247,37 +1253,37 @@ _x86_AES_decrypt_compact: xorl %esi,%ebp xorl %ecx,%eax xorl %ebp,%ecx - roll $24,%ecx xorl %edx,%eax xorl %ebp,%edx - roll $16,%edx + roll $24,%ecx xorl %ebp,%eax - roll $8,%ebp + roll $16,%edx xorl %ecx,%eax + roll $8,%ebp xorl %edx,%eax xorl %ebp,%eax - movl %ebx,%esi - andl $2155905152,%esi - movl %esi,%edi + movl $2155905152,%edi + andl %ebx,%edi + movl %edi,%esi shrl $7,%edi leal (%ebx,%ebx,1),%ecx subl %edi,%esi andl $4278124286,%ecx andl $454761243,%esi - xorl %ecx,%esi - movl %esi,%ecx - andl $2155905152,%esi - movl %esi,%edi + xorl %esi,%ecx + movl $2155905152,%edi + andl %ecx,%edi + movl %edi,%esi shrl $7,%edi leal (%ecx,%ecx,1),%edx subl %edi,%esi andl $4278124286,%edx andl $454761243,%esi xorl %ebx,%ecx - xorl %edx,%esi - movl %esi,%edx - andl $2155905152,%esi - movl %esi,%edi + xorl %esi,%edx + movl $2155905152,%edi + andl %edx,%edi + movl %edi,%esi shrl $7,%edi leal (%edx,%edx,1),%ebp subl %edi,%esi @@ -1288,13 +1294,13 @@ _x86_AES_decrypt_compact: xorl %esi,%ebp xorl %ecx,%ebx xorl %ebp,%ecx - roll $24,%ecx xorl %edx,%ebx xorl %ebp,%edx - roll $16,%edx + roll $24,%ecx xorl %ebp,%ebx - roll $8,%ebp + roll $16,%edx xorl %ecx,%ebx + roll $8,%ebp xorl %edx,%ebx movl 12(%esp),%ecx xorl %ebp,%ebx @@ -1413,77 +1419,79 @@ _sse_AES_decrypt_compact: .align 16 .L007loop: pshufw $12,%mm0,%mm1 - movd %mm1,%eax pshufw $9,%mm4,%mm5 - movzbl %al,%esi - movzbl -128(%ebp,%esi,1),%ecx + movd %mm1,%eax movd %mm5,%ebx + movl %edi,20(%esp) + movzbl %al,%esi movzbl %ah,%edx + pshufw $6,%mm0,%mm2 + movzbl -128(%ebp,%esi,1),%ecx + movzbl %bl,%edi movzbl -128(%ebp,%edx,1),%edx + shrl $16,%eax shll $8,%edx - pshufw $6,%mm0,%mm2 - movzbl %bl,%esi - movzbl -128(%ebp,%esi,1),%esi + movzbl -128(%ebp,%edi,1),%esi + movzbl %bh,%edi shll $16,%esi + pshufw $3,%mm4,%mm6 orl %esi,%ecx - shrl $16,%eax - movzbl %bh,%esi - movzbl -128(%ebp,%esi,1),%esi + movzbl -128(%ebp,%edi,1),%esi + movzbl %ah,%edi shll $24,%esi - orl %esi,%edx shrl $16,%ebx - pshufw $3,%mm4,%mm6 - movzbl %ah,%esi - movzbl -128(%ebp,%esi,1),%esi + orl %esi,%edx + movzbl -128(%ebp,%edi,1),%esi + movzbl %bh,%edi shll $24,%esi orl %esi,%ecx - movzbl %bh,%esi - movzbl -128(%ebp,%esi,1),%esi + movzbl -128(%ebp,%edi,1),%esi + movzbl %al,%edi shll $8,%esi - orl %esi,%ecx - movd %ecx,%mm0 - movzbl %al,%esi movd %mm2,%eax - movzbl -128(%ebp,%esi,1),%ecx - shll $16,%ecx - movzbl %bl,%esi + orl %esi,%ecx + movzbl -128(%ebp,%edi,1),%esi + movzbl %bl,%edi + shll $16,%esi movd %mm6,%ebx - movzbl -128(%ebp,%esi,1),%esi + movd %ecx,%mm0 + movzbl -128(%ebp,%edi,1),%ecx + movzbl %al,%edi orl %esi,%ecx - movzbl %al,%esi - movzbl -128(%ebp,%esi,1),%esi + movzbl -128(%ebp,%edi,1),%esi + movzbl %bl,%edi orl %esi,%edx - movzbl %bl,%esi - movzbl -128(%ebp,%esi,1),%esi + movzbl -128(%ebp,%edi,1),%esi + movzbl %ah,%edi shll $16,%esi - orl %esi,%edx - movd %edx,%mm1 - movzbl %ah,%esi - movzbl -128(%ebp,%esi,1),%edx - shll $8,%edx - movzbl %bh,%esi shrl $16,%eax - movzbl -128(%ebp,%esi,1),%esi - shll $24,%esi orl %esi,%edx + movzbl -128(%ebp,%edi,1),%esi + movzbl %bh,%edi shrl $16,%ebx - punpckldq %mm1,%mm0 - movzbl %bh,%esi - movzbl -128(%ebp,%esi,1),%esi shll $8,%esi - orl %esi,%ecx + movd %edx,%mm1 + movzbl -128(%ebp,%edi,1),%edx + movzbl %bh,%edi + shll $24,%edx andl $255,%ebx + orl %esi,%edx + punpckldq %mm1,%mm0 + movzbl -128(%ebp,%edi,1),%esi + movzbl %al,%edi + shll $8,%esi + movzbl %ah,%eax movzbl -128(%ebp,%ebx,1),%ebx + orl %esi,%ecx + movzbl -128(%ebp,%edi,1),%esi orl %ebx,%edx - movzbl %al,%esi - movzbl -128(%ebp,%esi,1),%esi shll $16,%esi - orl %esi,%edx - movd %edx,%mm4 - movzbl %ah,%eax movzbl -128(%ebp,%eax,1),%eax + orl %esi,%edx shll $24,%eax orl %eax,%ecx + movl 20(%esp),%edi + movd %edx,%mm4 movd %ecx,%mm5 punpckldq %mm5,%mm4 addl $16,%edi @@ -3045,30 +3053,30 @@ private_AES_set_decrypt_key: .align 4 .L056permute: addl $16,%edi - movl %eax,%esi - andl $2155905152,%esi - movl %esi,%ebp - shrl $7,%ebp + movl $2155905152,%ebp + andl %eax,%ebp leal (%eax,%eax,1),%ebx + movl %ebp,%esi + shrl $7,%ebp subl %ebp,%esi andl $4278124286,%ebx andl $454761243,%esi - xorl %ebx,%esi - movl %esi,%ebx - andl $2155905152,%esi - movl %esi,%ebp - shrl $7,%ebp + xorl %esi,%ebx + movl $2155905152,%ebp + andl %ebx,%ebp leal (%ebx,%ebx,1),%ecx + movl %ebp,%esi + shrl $7,%ebp subl %ebp,%esi andl $4278124286,%ecx andl $454761243,%esi xorl %eax,%ebx - xorl %ecx,%esi - movl %esi,%ecx - andl $2155905152,%esi - movl %esi,%ebp - shrl $7,%ebp + xorl %esi,%ecx + movl $2155905152,%ebp + andl %ecx,%ebp leal (%ecx,%ecx,1),%edx + movl %ebp,%esi + shrl $7,%ebp xorl %eax,%ecx subl %ebp,%esi andl $4278124286,%edx @@ -3089,30 +3097,30 @@ private_AES_set_decrypt_key: movl %ebp,%ebx xorl %edx,%eax movl %eax,(%edi) - movl %ebx,%esi - andl $2155905152,%esi - movl %esi,%ebp - shrl $7,%ebp + movl $2155905152,%ebp + andl %ebx,%ebp leal (%ebx,%ebx,1),%ecx + movl %ebp,%esi + shrl $7,%ebp subl %ebp,%esi andl $4278124286,%ecx andl $454761243,%esi - xorl %ecx,%esi - movl %esi,%ecx - andl $2155905152,%esi - movl %esi,%ebp - shrl $7,%ebp + xorl %esi,%ecx + movl $2155905152,%ebp + andl %ecx,%ebp leal (%ecx,%ecx,1),%edx + movl %ebp,%esi + shrl $7,%ebp subl %ebp,%esi andl $4278124286,%edx andl $454761243,%esi xorl %ebx,%ecx - xorl %edx,%esi - movl %esi,%edx - andl $2155905152,%esi - movl %esi,%ebp - shrl $7,%ebp + xorl %esi,%edx + movl $2155905152,%ebp + andl %edx,%ebp leal (%edx,%edx,1),%eax + movl %ebp,%esi + shrl $7,%ebp xorl %ebx,%edx subl %ebp,%esi andl $4278124286,%eax @@ -3133,30 +3141,30 @@ private_AES_set_decrypt_key: movl %ebp,%ecx xorl %eax,%ebx movl %ebx,4(%edi) - movl %ecx,%esi - andl $2155905152,%esi - movl %esi,%ebp - shrl $7,%ebp + movl $2155905152,%ebp + andl %ecx,%ebp leal (%ecx,%ecx,1),%edx + movl %ebp,%esi + shrl $7,%ebp subl %ebp,%esi andl $4278124286,%edx andl $454761243,%esi - xorl %edx,%esi - movl %esi,%edx - andl $2155905152,%esi - movl %esi,%ebp - shrl $7,%ebp + xorl %esi,%edx + movl $2155905152,%ebp + andl %edx,%ebp leal (%edx,%edx,1),%eax + movl %ebp,%esi + shrl $7,%ebp subl %ebp,%esi andl $4278124286,%eax andl $454761243,%esi xorl %ecx,%edx - xorl %eax,%esi - movl %esi,%eax - andl $2155905152,%esi - movl %esi,%ebp - shrl $7,%ebp + xorl %esi,%eax + movl $2155905152,%ebp + andl %eax,%ebp leal (%eax,%eax,1),%ebx + movl %ebp,%esi + shrl $7,%ebp xorl %ecx,%eax subl %ebp,%esi andl $4278124286,%ebx @@ -3177,30 +3185,30 @@ private_AES_set_decrypt_key: movl %ebp,%edx xorl %ebx,%ecx movl %ecx,8(%edi) - movl %edx,%esi - andl $2155905152,%esi - movl %esi,%ebp - shrl $7,%ebp + movl $2155905152,%ebp + andl %edx,%ebp leal (%edx,%edx,1),%eax + movl %ebp,%esi + shrl $7,%ebp subl %ebp,%esi andl $4278124286,%eax andl $454761243,%esi - xorl %eax,%esi - movl %esi,%eax - andl $2155905152,%esi - movl %esi,%ebp - shrl $7,%ebp + xorl %esi,%eax + movl $2155905152,%ebp + andl %eax,%ebp leal (%eax,%eax,1),%ebx + movl %ebp,%esi + shrl $7,%ebp subl %ebp,%esi andl $4278124286,%ebx andl $454761243,%esi xorl %edx,%eax - xorl %ebx,%esi - movl %esi,%ebx - andl $2155905152,%esi - movl %esi,%ebp - shrl $7,%ebp + xorl %esi,%ebx + movl $2155905152,%ebp + andl %ebx,%ebp leal (%ebx,%ebx,1),%ecx + movl %ebp,%esi + shrl $7,%ebp xorl %edx,%ebx subl %ebp,%esi andl $4278124286,%ecx @@ -3233,4 +3241,4 @@ private_AES_set_decrypt_key: .byte 65,69,83,32,102,111,114,32,120,56,54,44,32,67,82,89 .byte 80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114 .byte 111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 -.comm OPENSSL_ia32cap_P,8,4 +.comm OPENSSL_ia32cap_P,16,4 diff --git a/deps/openssl/asm/x86-elf-gas/aes/aesni-x86.s b/deps/openssl/asm/x86-elf-gas/aes/aesni-x86.s index d9f26885581bac..a68f7cdbe9cbe6 100644 --- a/deps/openssl/asm/x86-elf-gas/aes/aesni-x86.s +++ b/deps/openssl/asm/x86-elf-gas/aes/aesni-x86.s @@ -48,29 +48,84 @@ aesni_decrypt: movups %xmm2,(%eax) ret .size aesni_decrypt,.-.L_aesni_decrypt_begin +.type _aesni_encrypt2,@function +.align 16 +_aesni_encrypt2: + movups (%edx),%xmm0 + shll $4,%ecx + movups 16(%edx),%xmm1 + xorps %xmm0,%xmm2 + pxor %xmm0,%xmm3 + movups 32(%edx),%xmm0 + leal 32(%edx,%ecx,1),%edx + negl %ecx + addl $16,%ecx +.L002enc2_loop: +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 + movups (%edx,%ecx,1),%xmm1 + addl $32,%ecx +.byte 102,15,56,220,208 +.byte 102,15,56,220,216 + movups -16(%edx,%ecx,1),%xmm0 + jnz .L002enc2_loop +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 +.byte 102,15,56,221,208 +.byte 102,15,56,221,216 + ret +.size _aesni_encrypt2,.-_aesni_encrypt2 +.type _aesni_decrypt2,@function +.align 16 +_aesni_decrypt2: + movups (%edx),%xmm0 + shll $4,%ecx + movups 16(%edx),%xmm1 + xorps %xmm0,%xmm2 + pxor %xmm0,%xmm3 + movups 32(%edx),%xmm0 + leal 32(%edx,%ecx,1),%edx + negl %ecx + addl $16,%ecx +.L003dec2_loop: +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 + movups (%edx,%ecx,1),%xmm1 + addl $32,%ecx +.byte 102,15,56,222,208 +.byte 102,15,56,222,216 + movups -16(%edx,%ecx,1),%xmm0 + jnz .L003dec2_loop +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 +.byte 102,15,56,223,208 +.byte 102,15,56,223,216 + ret +.size _aesni_decrypt2,.-_aesni_decrypt2 .type _aesni_encrypt3,@function .align 16 _aesni_encrypt3: movups (%edx),%xmm0 - shrl $1,%ecx + shll $4,%ecx movups 16(%edx),%xmm1 - leal 32(%edx),%edx xorps %xmm0,%xmm2 pxor %xmm0,%xmm3 pxor %xmm0,%xmm4 - movups (%edx),%xmm0 -.L002enc3_loop: + movups 32(%edx),%xmm0 + leal 32(%edx,%ecx,1),%edx + negl %ecx + addl $16,%ecx +.L004enc3_loop: .byte 102,15,56,220,209 .byte 102,15,56,220,217 - decl %ecx .byte 102,15,56,220,225 - movups 16(%edx),%xmm1 + movups (%edx,%ecx,1),%xmm1 + addl $32,%ecx .byte 102,15,56,220,208 .byte 102,15,56,220,216 - leal 32(%edx),%edx .byte 102,15,56,220,224 - movups (%edx),%xmm0 - jnz .L002enc3_loop + movups -16(%edx,%ecx,1),%xmm0 + jnz .L004enc3_loop .byte 102,15,56,220,209 .byte 102,15,56,220,217 .byte 102,15,56,220,225 @@ -83,25 +138,26 @@ _aesni_encrypt3: .align 16 _aesni_decrypt3: movups (%edx),%xmm0 - shrl $1,%ecx + shll $4,%ecx movups 16(%edx),%xmm1 - leal 32(%edx),%edx xorps %xmm0,%xmm2 pxor %xmm0,%xmm3 pxor %xmm0,%xmm4 - movups (%edx),%xmm0 -.L003dec3_loop: + movups 32(%edx),%xmm0 + leal 32(%edx,%ecx,1),%edx + negl %ecx + addl $16,%ecx +.L005dec3_loop: .byte 102,15,56,222,209 .byte 102,15,56,222,217 - decl %ecx .byte 102,15,56,222,225 - movups 16(%edx),%xmm1 + movups (%edx,%ecx,1),%xmm1 + addl $32,%ecx .byte 102,15,56,222,208 .byte 102,15,56,222,216 - leal 32(%edx),%edx .byte 102,15,56,222,224 - movups (%edx),%xmm0 - jnz .L003dec3_loop + movups -16(%edx,%ecx,1),%xmm0 + jnz .L005dec3_loop .byte 102,15,56,222,209 .byte 102,15,56,222,217 .byte 102,15,56,222,225 @@ -115,27 +171,29 @@ _aesni_decrypt3: _aesni_encrypt4: movups (%edx),%xmm0 movups 16(%edx),%xmm1 - shrl $1,%ecx - leal 32(%edx),%edx + shll $4,%ecx xorps %xmm0,%xmm2 pxor %xmm0,%xmm3 pxor %xmm0,%xmm4 pxor %xmm0,%xmm5 - movups (%edx),%xmm0 -.L004enc4_loop: + movups 32(%edx),%xmm0 + leal 32(%edx,%ecx,1),%edx + negl %ecx +.byte 15,31,64,0 + addl $16,%ecx +.L006enc4_loop: .byte 102,15,56,220,209 .byte 102,15,56,220,217 - decl %ecx .byte 102,15,56,220,225 .byte 102,15,56,220,233 - movups 16(%edx),%xmm1 + movups (%edx,%ecx,1),%xmm1 + addl $32,%ecx .byte 102,15,56,220,208 .byte 102,15,56,220,216 - leal 32(%edx),%edx .byte 102,15,56,220,224 .byte 102,15,56,220,232 - movups (%edx),%xmm0 - jnz .L004enc4_loop + movups -16(%edx,%ecx,1),%xmm0 + jnz .L006enc4_loop .byte 102,15,56,220,209 .byte 102,15,56,220,217 .byte 102,15,56,220,225 @@ -151,27 +209,29 @@ _aesni_encrypt4: _aesni_decrypt4: movups (%edx),%xmm0 movups 16(%edx),%xmm1 - shrl $1,%ecx - leal 32(%edx),%edx + shll $4,%ecx xorps %xmm0,%xmm2 pxor %xmm0,%xmm3 pxor %xmm0,%xmm4 pxor %xmm0,%xmm5 - movups (%edx),%xmm0 -.L005dec4_loop: + movups 32(%edx),%xmm0 + leal 32(%edx,%ecx,1),%edx + negl %ecx +.byte 15,31,64,0 + addl $16,%ecx +.L007dec4_loop: .byte 102,15,56,222,209 .byte 102,15,56,222,217 - decl %ecx .byte 102,15,56,222,225 .byte 102,15,56,222,233 - movups 16(%edx),%xmm1 + movups (%edx,%ecx,1),%xmm1 + addl $32,%ecx .byte 102,15,56,222,208 .byte 102,15,56,222,216 - leal 32(%edx),%edx .byte 102,15,56,222,224 .byte 102,15,56,222,232 - movups (%edx),%xmm0 - jnz .L005dec4_loop + movups -16(%edx,%ecx,1),%xmm0 + jnz .L007dec4_loop .byte 102,15,56,222,209 .byte 102,15,56,222,217 .byte 102,15,56,222,225 @@ -186,45 +246,44 @@ _aesni_decrypt4: .align 16 _aesni_encrypt6: movups (%edx),%xmm0 - shrl $1,%ecx + shll $4,%ecx movups 16(%edx),%xmm1 - leal 32(%edx),%edx xorps %xmm0,%xmm2 pxor %xmm0,%xmm3 -.byte 102,15,56,220,209 pxor %xmm0,%xmm4 -.byte 102,15,56,220,217 +.byte 102,15,56,220,209 pxor %xmm0,%xmm5 - decl %ecx -.byte 102,15,56,220,225 pxor %xmm0,%xmm6 -.byte 102,15,56,220,233 +.byte 102,15,56,220,217 + leal 32(%edx,%ecx,1),%edx + negl %ecx +.byte 102,15,56,220,225 pxor %xmm0,%xmm7 + addl $16,%ecx +.byte 102,15,56,220,233 .byte 102,15,56,220,241 - movups (%edx),%xmm0 .byte 102,15,56,220,249 + movups -16(%edx,%ecx,1),%xmm0 jmp .L_aesni_encrypt6_enter .align 16 -.L006enc6_loop: +.L008enc6_loop: .byte 102,15,56,220,209 .byte 102,15,56,220,217 - decl %ecx .byte 102,15,56,220,225 .byte 102,15,56,220,233 .byte 102,15,56,220,241 .byte 102,15,56,220,249 -.align 16 .L_aesni_encrypt6_enter: - movups 16(%edx),%xmm1 + movups (%edx,%ecx,1),%xmm1 + addl $32,%ecx .byte 102,15,56,220,208 .byte 102,15,56,220,216 - leal 32(%edx),%edx .byte 102,15,56,220,224 .byte 102,15,56,220,232 .byte 102,15,56,220,240 .byte 102,15,56,220,248 - movups (%edx),%xmm0 - jnz .L006enc6_loop + movups -16(%edx,%ecx,1),%xmm0 + jnz .L008enc6_loop .byte 102,15,56,220,209 .byte 102,15,56,220,217 .byte 102,15,56,220,225 @@ -243,45 +302,44 @@ _aesni_encrypt6: .align 16 _aesni_decrypt6: movups (%edx),%xmm0 - shrl $1,%ecx + shll $4,%ecx movups 16(%edx),%xmm1 - leal 32(%edx),%edx xorps %xmm0,%xmm2 pxor %xmm0,%xmm3 -.byte 102,15,56,222,209 pxor %xmm0,%xmm4 -.byte 102,15,56,222,217 +.byte 102,15,56,222,209 pxor %xmm0,%xmm5 - decl %ecx -.byte 102,15,56,222,225 pxor %xmm0,%xmm6 -.byte 102,15,56,222,233 +.byte 102,15,56,222,217 + leal 32(%edx,%ecx,1),%edx + negl %ecx +.byte 102,15,56,222,225 pxor %xmm0,%xmm7 + addl $16,%ecx +.byte 102,15,56,222,233 .byte 102,15,56,222,241 - movups (%edx),%xmm0 .byte 102,15,56,222,249 + movups -16(%edx,%ecx,1),%xmm0 jmp .L_aesni_decrypt6_enter .align 16 -.L007dec6_loop: +.L009dec6_loop: .byte 102,15,56,222,209 .byte 102,15,56,222,217 - decl %ecx .byte 102,15,56,222,225 .byte 102,15,56,222,233 .byte 102,15,56,222,241 .byte 102,15,56,222,249 -.align 16 .L_aesni_decrypt6_enter: - movups 16(%edx),%xmm1 + movups (%edx,%ecx,1),%xmm1 + addl $32,%ecx .byte 102,15,56,222,208 .byte 102,15,56,222,216 - leal 32(%edx),%edx .byte 102,15,56,222,224 .byte 102,15,56,222,232 .byte 102,15,56,222,240 .byte 102,15,56,222,248 - movups (%edx),%xmm0 - jnz .L007dec6_loop + movups -16(%edx,%ecx,1),%xmm0 + jnz .L009dec6_loop .byte 102,15,56,222,209 .byte 102,15,56,222,217 .byte 102,15,56,222,225 @@ -311,14 +369,14 @@ aesni_ecb_encrypt: movl 32(%esp),%edx movl 36(%esp),%ebx andl $-16,%eax - jz .L008ecb_ret + jz .L010ecb_ret movl 240(%edx),%ecx testl %ebx,%ebx - jz .L009ecb_decrypt + jz .L011ecb_decrypt movl %edx,%ebp movl %ecx,%ebx cmpl $96,%eax - jb .L010ecb_enc_tail + jb .L012ecb_enc_tail movdqu (%esi),%xmm2 movdqu 16(%esi),%xmm3 movdqu 32(%esi),%xmm4 @@ -327,9 +385,9 @@ aesni_ecb_encrypt: movdqu 80(%esi),%xmm7 leal 96(%esi),%esi subl $96,%eax - jmp .L011ecb_enc_loop6_enter + jmp .L013ecb_enc_loop6_enter .align 16 -.L012ecb_enc_loop6: +.L014ecb_enc_loop6: movups %xmm2,(%edi) movdqu (%esi),%xmm2 movups %xmm3,16(%edi) @@ -344,12 +402,12 @@ aesni_ecb_encrypt: leal 96(%edi),%edi movdqu 80(%esi),%xmm7 leal 96(%esi),%esi -.L011ecb_enc_loop6_enter: +.L013ecb_enc_loop6_enter: call _aesni_encrypt6 movl %ebp,%edx movl %ebx,%ecx subl $96,%eax - jnc .L012ecb_enc_loop6 + jnc .L014ecb_enc_loop6 movups %xmm2,(%edi) movups %xmm3,16(%edi) movups %xmm4,32(%edi) @@ -358,18 +416,18 @@ aesni_ecb_encrypt: movups %xmm7,80(%edi) leal 96(%edi),%edi addl $96,%eax - jz .L008ecb_ret -.L010ecb_enc_tail: + jz .L010ecb_ret +.L012ecb_enc_tail: movups (%esi),%xmm2 cmpl $32,%eax - jb .L013ecb_enc_one + jb .L015ecb_enc_one movups 16(%esi),%xmm3 - je .L014ecb_enc_two + je .L016ecb_enc_two movups 32(%esi),%xmm4 cmpl $64,%eax - jb .L015ecb_enc_three + jb .L017ecb_enc_three movups 48(%esi),%xmm5 - je .L016ecb_enc_four + je .L018ecb_enc_four movups 64(%esi),%xmm6 xorps %xmm7,%xmm7 call _aesni_encrypt6 @@ -378,50 +436,49 @@ aesni_ecb_encrypt: movups %xmm4,32(%edi) movups %xmm5,48(%edi) movups %xmm6,64(%edi) - jmp .L008ecb_ret + jmp .L010ecb_ret .align 16 -.L013ecb_enc_one: +.L015ecb_enc_one: movups (%edx),%xmm0 movups 16(%edx),%xmm1 leal 32(%edx),%edx xorps %xmm0,%xmm2 -.L017enc1_loop_3: +.L019enc1_loop_3: .byte 102,15,56,220,209 decl %ecx movups (%edx),%xmm1 leal 16(%edx),%edx - jnz .L017enc1_loop_3 + jnz .L019enc1_loop_3 .byte 102,15,56,221,209 movups %xmm2,(%edi) - jmp .L008ecb_ret + jmp .L010ecb_ret .align 16 -.L014ecb_enc_two: - xorps %xmm4,%xmm4 - call _aesni_encrypt3 +.L016ecb_enc_two: + call _aesni_encrypt2 movups %xmm2,(%edi) movups %xmm3,16(%edi) - jmp .L008ecb_ret + jmp .L010ecb_ret .align 16 -.L015ecb_enc_three: +.L017ecb_enc_three: call _aesni_encrypt3 movups %xmm2,(%edi) movups %xmm3,16(%edi) movups %xmm4,32(%edi) - jmp .L008ecb_ret + jmp .L010ecb_ret .align 16 -.L016ecb_enc_four: +.L018ecb_enc_four: call _aesni_encrypt4 movups %xmm2,(%edi) movups %xmm3,16(%edi) movups %xmm4,32(%edi) movups %xmm5,48(%edi) - jmp .L008ecb_ret + jmp .L010ecb_ret .align 16 -.L009ecb_decrypt: +.L011ecb_decrypt: movl %edx,%ebp movl %ecx,%ebx cmpl $96,%eax - jb .L018ecb_dec_tail + jb .L020ecb_dec_tail movdqu (%esi),%xmm2 movdqu 16(%esi),%xmm3 movdqu 32(%esi),%xmm4 @@ -430,9 +487,9 @@ aesni_ecb_encrypt: movdqu 80(%esi),%xmm7 leal 96(%esi),%esi subl $96,%eax - jmp .L019ecb_dec_loop6_enter + jmp .L021ecb_dec_loop6_enter .align 16 -.L020ecb_dec_loop6: +.L022ecb_dec_loop6: movups %xmm2,(%edi) movdqu (%esi),%xmm2 movups %xmm3,16(%edi) @@ -447,12 +504,12 @@ aesni_ecb_encrypt: leal 96(%edi),%edi movdqu 80(%esi),%xmm7 leal 96(%esi),%esi -.L019ecb_dec_loop6_enter: +.L021ecb_dec_loop6_enter: call _aesni_decrypt6 movl %ebp,%edx movl %ebx,%ecx subl $96,%eax - jnc .L020ecb_dec_loop6 + jnc .L022ecb_dec_loop6 movups %xmm2,(%edi) movups %xmm3,16(%edi) movups %xmm4,32(%edi) @@ -461,18 +518,18 @@ aesni_ecb_encrypt: movups %xmm7,80(%edi) leal 96(%edi),%edi addl $96,%eax - jz .L008ecb_ret -.L018ecb_dec_tail: + jz .L010ecb_ret +.L020ecb_dec_tail: movups (%esi),%xmm2 cmpl $32,%eax - jb .L021ecb_dec_one + jb .L023ecb_dec_one movups 16(%esi),%xmm3 - je .L022ecb_dec_two + je .L024ecb_dec_two movups 32(%esi),%xmm4 cmpl $64,%eax - jb .L023ecb_dec_three + jb .L025ecb_dec_three movups 48(%esi),%xmm5 - je .L024ecb_dec_four + je .L026ecb_dec_four movups 64(%esi),%xmm6 xorps %xmm7,%xmm7 call _aesni_decrypt6 @@ -481,44 +538,43 @@ aesni_ecb_encrypt: movups %xmm4,32(%edi) movups %xmm5,48(%edi) movups %xmm6,64(%edi) - jmp .L008ecb_ret + jmp .L010ecb_ret .align 16 -.L021ecb_dec_one: +.L023ecb_dec_one: movups (%edx),%xmm0 movups 16(%edx),%xmm1 leal 32(%edx),%edx xorps %xmm0,%xmm2 -.L025dec1_loop_4: +.L027dec1_loop_4: .byte 102,15,56,222,209 decl %ecx movups (%edx),%xmm1 leal 16(%edx),%edx - jnz .L025dec1_loop_4 + jnz .L027dec1_loop_4 .byte 102,15,56,223,209 movups %xmm2,(%edi) - jmp .L008ecb_ret + jmp .L010ecb_ret .align 16 -.L022ecb_dec_two: - xorps %xmm4,%xmm4 - call _aesni_decrypt3 +.L024ecb_dec_two: + call _aesni_decrypt2 movups %xmm2,(%edi) movups %xmm3,16(%edi) - jmp .L008ecb_ret + jmp .L010ecb_ret .align 16 -.L023ecb_dec_three: +.L025ecb_dec_three: call _aesni_decrypt3 movups %xmm2,(%edi) movups %xmm3,16(%edi) movups %xmm4,32(%edi) - jmp .L008ecb_ret + jmp .L010ecb_ret .align 16 -.L024ecb_dec_four: +.L026ecb_dec_four: call _aesni_decrypt4 movups %xmm2,(%edi) movups %xmm3,16(%edi) movups %xmm4,32(%edi) movups %xmm5,48(%edi) -.L008ecb_ret: +.L010ecb_ret: popl %edi popl %esi popl %ebx @@ -557,45 +613,45 @@ aesni_ccm64_encrypt_blocks: movl %ebp,20(%esp) movl %ebp,24(%esp) movl %ebp,28(%esp) - shrl $1,%ecx + shll $4,%ecx + movl $16,%ebx leal (%edx),%ebp movdqa (%esp),%xmm5 movdqa %xmm7,%xmm2 - movl %ecx,%ebx + leal 32(%edx,%ecx,1),%edx + subl %ecx,%ebx .byte 102,15,56,0,253 -.L026ccm64_enc_outer: +.L028ccm64_enc_outer: movups (%ebp),%xmm0 movl %ebx,%ecx movups (%esi),%xmm6 xorps %xmm0,%xmm2 movups 16(%ebp),%xmm1 xorps %xmm6,%xmm0 - leal 32(%ebp),%edx xorps %xmm0,%xmm3 - movups (%edx),%xmm0 -.L027ccm64_enc2_loop: + movups 32(%ebp),%xmm0 +.L029ccm64_enc2_loop: .byte 102,15,56,220,209 - decl %ecx .byte 102,15,56,220,217 - movups 16(%edx),%xmm1 + movups (%edx,%ecx,1),%xmm1 + addl $32,%ecx .byte 102,15,56,220,208 - leal 32(%edx),%edx .byte 102,15,56,220,216 - movups (%edx),%xmm0 - jnz .L027ccm64_enc2_loop + movups -16(%edx,%ecx,1),%xmm0 + jnz .L029ccm64_enc2_loop .byte 102,15,56,220,209 .byte 102,15,56,220,217 paddq 16(%esp),%xmm7 + decl %eax .byte 102,15,56,221,208 .byte 102,15,56,221,216 - decl %eax leal 16(%esi),%esi xorps %xmm2,%xmm6 movdqa %xmm7,%xmm2 movups %xmm6,(%edi) - leal 16(%edi),%edi .byte 102,15,56,0,213 - jnz .L026ccm64_enc_outer + leal 16(%edi),%edi + jnz .L028ccm64_enc_outer movl 48(%esp),%esp movl 40(%esp),%edi movups %xmm3,(%edi) @@ -646,67 +702,70 @@ aesni_ccm64_decrypt_blocks: movups 16(%edx),%xmm1 leal 32(%edx),%edx xorps %xmm0,%xmm2 -.L028enc1_loop_5: +.L030enc1_loop_5: .byte 102,15,56,220,209 decl %ecx movups (%edx),%xmm1 leal 16(%edx),%edx - jnz .L028enc1_loop_5 + jnz .L030enc1_loop_5 .byte 102,15,56,221,209 + shll $4,%ebx + movl $16,%ecx movups (%esi),%xmm6 paddq 16(%esp),%xmm7 leal 16(%esi),%esi - jmp .L029ccm64_dec_outer + subl %ebx,%ecx + leal 32(%ebp,%ebx,1),%edx + movl %ecx,%ebx + jmp .L031ccm64_dec_outer .align 16 -.L029ccm64_dec_outer: +.L031ccm64_dec_outer: xorps %xmm2,%xmm6 movdqa %xmm7,%xmm2 - movl %ebx,%ecx movups %xmm6,(%edi) leal 16(%edi),%edi .byte 102,15,56,0,213 subl $1,%eax - jz .L030ccm64_dec_break + jz .L032ccm64_dec_break movups (%ebp),%xmm0 - shrl $1,%ecx + movl %ebx,%ecx movups 16(%ebp),%xmm1 xorps %xmm0,%xmm6 - leal 32(%ebp),%edx xorps %xmm0,%xmm2 xorps %xmm6,%xmm3 - movups (%edx),%xmm0 -.L031ccm64_dec2_loop: + movups 32(%ebp),%xmm0 +.L033ccm64_dec2_loop: .byte 102,15,56,220,209 - decl %ecx .byte 102,15,56,220,217 - movups 16(%edx),%xmm1 + movups (%edx,%ecx,1),%xmm1 + addl $32,%ecx .byte 102,15,56,220,208 - leal 32(%edx),%edx .byte 102,15,56,220,216 - movups (%edx),%xmm0 - jnz .L031ccm64_dec2_loop + movups -16(%edx,%ecx,1),%xmm0 + jnz .L033ccm64_dec2_loop movups (%esi),%xmm6 paddq 16(%esp),%xmm7 .byte 102,15,56,220,209 .byte 102,15,56,220,217 - leal 16(%esi),%esi .byte 102,15,56,221,208 .byte 102,15,56,221,216 - jmp .L029ccm64_dec_outer + leal 16(%esi),%esi + jmp .L031ccm64_dec_outer .align 16 -.L030ccm64_dec_break: +.L032ccm64_dec_break: + movl 240(%ebp),%ecx movl %ebp,%edx movups (%edx),%xmm0 movups 16(%edx),%xmm1 xorps %xmm0,%xmm6 leal 32(%edx),%edx xorps %xmm6,%xmm3 -.L032enc1_loop_6: +.L034enc1_loop_6: .byte 102,15,56,220,217 decl %ecx movups (%edx),%xmm1 leal 16(%edx),%edx - jnz .L032enc1_loop_6 + jnz .L034enc1_loop_6 .byte 102,15,56,221,217 movl 48(%esp),%esp movl 40(%esp),%edi @@ -736,7 +795,7 @@ aesni_ctr32_encrypt_blocks: andl $-16,%esp movl %ebp,80(%esp) cmpl $1,%eax - je .L033ctr32_one_shortcut + je .L035ctr32_one_shortcut movdqu (%ebx),%xmm7 movl $202182159,(%esp) movl $134810123,4(%esp) @@ -752,63 +811,59 @@ aesni_ctr32_encrypt_blocks: .byte 102,15,58,34,253,3 movl 240(%edx),%ecx bswap %ebx - pxor %xmm1,%xmm1 pxor %xmm0,%xmm0 + pxor %xmm1,%xmm1 movdqa (%esp),%xmm2 -.byte 102,15,58,34,203,0 +.byte 102,15,58,34,195,0 leal 3(%ebx),%ebp -.byte 102,15,58,34,197,0 +.byte 102,15,58,34,205,0 incl %ebx -.byte 102,15,58,34,203,1 +.byte 102,15,58,34,195,1 incl %ebp -.byte 102,15,58,34,197,1 +.byte 102,15,58,34,205,1 incl %ebx -.byte 102,15,58,34,203,2 +.byte 102,15,58,34,195,2 incl %ebp -.byte 102,15,58,34,197,2 - movdqa %xmm1,48(%esp) -.byte 102,15,56,0,202 - movdqa %xmm0,64(%esp) +.byte 102,15,58,34,205,2 + movdqa %xmm0,48(%esp) .byte 102,15,56,0,194 - pshufd $192,%xmm1,%xmm2 - pshufd $128,%xmm1,%xmm3 + movdqu (%edx),%xmm6 + movdqa %xmm1,64(%esp) +.byte 102,15,56,0,202 + pshufd $192,%xmm0,%xmm2 + pshufd $128,%xmm0,%xmm3 cmpl $6,%eax - jb .L034ctr32_tail + jb .L036ctr32_tail + pxor %xmm6,%xmm7 + shll $4,%ecx + movl $16,%ebx movdqa %xmm7,32(%esp) - shrl $1,%ecx movl %edx,%ebp - movl %ecx,%ebx + subl %ecx,%ebx + leal 32(%edx,%ecx,1),%edx subl $6,%eax - jmp .L035ctr32_loop6 -.align 16 -.L035ctr32_loop6: - pshufd $64,%xmm1,%xmm4 - movdqa 32(%esp),%xmm1 - pshufd $192,%xmm0,%xmm5 - por %xmm1,%xmm2 - pshufd $128,%xmm0,%xmm6 - por %xmm1,%xmm3 - pshufd $64,%xmm0,%xmm7 - por %xmm1,%xmm4 - por %xmm1,%xmm5 - por %xmm1,%xmm6 - por %xmm1,%xmm7 - movups (%ebp),%xmm0 - movups 16(%ebp),%xmm1 - leal 32(%ebp),%edx - decl %ecx + jmp .L037ctr32_loop6 +.align 16 +.L037ctr32_loop6: + pshufd $64,%xmm0,%xmm4 + movdqa 32(%esp),%xmm0 + pshufd $192,%xmm1,%xmm5 pxor %xmm0,%xmm2 + pshufd $128,%xmm1,%xmm6 pxor %xmm0,%xmm3 -.byte 102,15,56,220,209 + pshufd $64,%xmm1,%xmm7 + movups 16(%ebp),%xmm1 pxor %xmm0,%xmm4 -.byte 102,15,56,220,217 pxor %xmm0,%xmm5 -.byte 102,15,56,220,225 +.byte 102,15,56,220,209 pxor %xmm0,%xmm6 -.byte 102,15,56,220,233 pxor %xmm0,%xmm7 +.byte 102,15,56,220,217 + movups 32(%ebp),%xmm0 + movl %ebx,%ecx +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 .byte 102,15,56,220,241 - movups (%edx),%xmm0 .byte 102,15,56,220,249 call .L_aesni_encrypt6_enter movups (%esi),%xmm1 @@ -819,51 +874,51 @@ aesni_ctr32_encrypt_blocks: movups %xmm2,(%edi) movdqa 16(%esp),%xmm0 xorps %xmm1,%xmm4 - movdqa 48(%esp),%xmm1 + movdqa 64(%esp),%xmm1 movups %xmm3,16(%edi) movups %xmm4,32(%edi) paddd %xmm0,%xmm1 - paddd 64(%esp),%xmm0 + paddd 48(%esp),%xmm0 movdqa (%esp),%xmm2 movups 48(%esi),%xmm3 movups 64(%esi),%xmm4 xorps %xmm3,%xmm5 movups 80(%esi),%xmm3 leal 96(%esi),%esi - movdqa %xmm1,48(%esp) -.byte 102,15,56,0,202 + movdqa %xmm0,48(%esp) +.byte 102,15,56,0,194 xorps %xmm4,%xmm6 movups %xmm5,48(%edi) xorps %xmm3,%xmm7 - movdqa %xmm0,64(%esp) -.byte 102,15,56,0,194 + movdqa %xmm1,64(%esp) +.byte 102,15,56,0,202 movups %xmm6,64(%edi) - pshufd $192,%xmm1,%xmm2 + pshufd $192,%xmm0,%xmm2 movups %xmm7,80(%edi) leal 96(%edi),%edi - movl %ebx,%ecx - pshufd $128,%xmm1,%xmm3 + pshufd $128,%xmm0,%xmm3 subl $6,%eax - jnc .L035ctr32_loop6 + jnc .L037ctr32_loop6 addl $6,%eax - jz .L036ctr32_ret + jz .L038ctr32_ret + movdqu (%ebp),%xmm7 movl %ebp,%edx - leal 1(,%ecx,2),%ecx - movdqa 32(%esp),%xmm7 -.L034ctr32_tail: + pxor 32(%esp),%xmm7 + movl 240(%ebp),%ecx +.L036ctr32_tail: por %xmm7,%xmm2 cmpl $2,%eax - jb .L037ctr32_one - pshufd $64,%xmm1,%xmm4 + jb .L039ctr32_one + pshufd $64,%xmm0,%xmm4 por %xmm7,%xmm3 - je .L038ctr32_two - pshufd $192,%xmm0,%xmm5 + je .L040ctr32_two + pshufd $192,%xmm1,%xmm5 por %xmm7,%xmm4 cmpl $4,%eax - jb .L039ctr32_three - pshufd $128,%xmm0,%xmm6 + jb .L041ctr32_three + pshufd $128,%xmm1,%xmm6 por %xmm7,%xmm5 - je .L040ctr32_four + je .L042ctr32_four por %xmm7,%xmm6 call _aesni_encrypt6 movups (%esi),%xmm1 @@ -881,39 +936,39 @@ aesni_ctr32_encrypt_blocks: movups %xmm4,32(%edi) movups %xmm5,48(%edi) movups %xmm6,64(%edi) - jmp .L036ctr32_ret + jmp .L038ctr32_ret .align 16 -.L033ctr32_one_shortcut: +.L035ctr32_one_shortcut: movups (%ebx),%xmm2 movl 240(%edx),%ecx -.L037ctr32_one: +.L039ctr32_one: movups (%edx),%xmm0 movups 16(%edx),%xmm1 leal 32(%edx),%edx xorps %xmm0,%xmm2 -.L041enc1_loop_7: +.L043enc1_loop_7: .byte 102,15,56,220,209 decl %ecx movups (%edx),%xmm1 leal 16(%edx),%edx - jnz .L041enc1_loop_7 + jnz .L043enc1_loop_7 .byte 102,15,56,221,209 movups (%esi),%xmm6 xorps %xmm2,%xmm6 movups %xmm6,(%edi) - jmp .L036ctr32_ret + jmp .L038ctr32_ret .align 16 -.L038ctr32_two: - call _aesni_encrypt3 +.L040ctr32_two: + call _aesni_encrypt2 movups (%esi),%xmm5 movups 16(%esi),%xmm6 xorps %xmm5,%xmm2 xorps %xmm6,%xmm3 movups %xmm2,(%edi) movups %xmm3,16(%edi) - jmp .L036ctr32_ret + jmp .L038ctr32_ret .align 16 -.L039ctr32_three: +.L041ctr32_three: call _aesni_encrypt3 movups (%esi),%xmm5 movups 16(%esi),%xmm6 @@ -924,9 +979,9 @@ aesni_ctr32_encrypt_blocks: xorps %xmm7,%xmm4 movups %xmm3,16(%edi) movups %xmm4,32(%edi) - jmp .L036ctr32_ret + jmp .L038ctr32_ret .align 16 -.L040ctr32_four: +.L042ctr32_four: call _aesni_encrypt4 movups (%esi),%xmm6 movups 16(%esi),%xmm7 @@ -940,7 +995,7 @@ aesni_ctr32_encrypt_blocks: xorps %xmm0,%xmm5 movups %xmm4,32(%edi) movups %xmm5,48(%edi) -.L036ctr32_ret: +.L038ctr32_ret: movl 80(%esp),%esp popl %edi popl %esi @@ -965,12 +1020,12 @@ aesni_xts_encrypt: movups 16(%edx),%xmm1 leal 32(%edx),%edx xorps %xmm0,%xmm2 -.L042enc1_loop_8: +.L044enc1_loop_8: .byte 102,15,56,220,209 decl %ecx movups (%edx),%xmm1 leal 16(%edx),%edx - jnz .L042enc1_loop_8 + jnz .L044enc1_loop_8 .byte 102,15,56,221,209 movl 20(%esp),%esi movl 24(%esp),%edi @@ -994,12 +1049,14 @@ aesni_xts_encrypt: movl %edx,%ebp movl %ecx,%ebx subl $96,%eax - jc .L043xts_enc_short - shrl $1,%ecx - movl %ecx,%ebx - jmp .L044xts_enc_loop6 + jc .L045xts_enc_short + shll $4,%ecx + movl $16,%ebx + subl %ecx,%ebx + leal 32(%edx,%ecx,1),%edx + jmp .L046xts_enc_loop6 .align 16 -.L044xts_enc_loop6: +.L046xts_enc_loop6: pshufd $19,%xmm0,%xmm2 pxor %xmm0,%xmm0 movdqa %xmm1,(%esp) @@ -1035,6 +1092,7 @@ aesni_xts_encrypt: pand %xmm3,%xmm7 movups (%esi),%xmm2 pxor %xmm1,%xmm7 + movl %ebx,%ecx movdqu 16(%esi),%xmm3 xorps %xmm0,%xmm2 movdqu 32(%esi),%xmm4 @@ -1050,19 +1108,17 @@ aesni_xts_encrypt: movdqa %xmm7,80(%esp) pxor %xmm1,%xmm7 movups 16(%ebp),%xmm1 - leal 32(%ebp),%edx pxor 16(%esp),%xmm3 -.byte 102,15,56,220,209 pxor 32(%esp),%xmm4 -.byte 102,15,56,220,217 +.byte 102,15,56,220,209 pxor 48(%esp),%xmm5 - decl %ecx -.byte 102,15,56,220,225 pxor 64(%esp),%xmm6 -.byte 102,15,56,220,233 +.byte 102,15,56,220,217 pxor %xmm0,%xmm7 + movups 32(%ebp),%xmm0 +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 .byte 102,15,56,220,241 - movups (%edx),%xmm0 .byte 102,15,56,220,249 call .L_aesni_encrypt6_enter movdqa 80(%esp),%xmm1 @@ -1087,26 +1143,25 @@ aesni_xts_encrypt: paddq %xmm1,%xmm1 pand %xmm3,%xmm2 pcmpgtd %xmm1,%xmm0 - movl %ebx,%ecx pxor %xmm2,%xmm1 subl $96,%eax - jnc .L044xts_enc_loop6 - leal 1(,%ecx,2),%ecx + jnc .L046xts_enc_loop6 + movl 240(%ebp),%ecx movl %ebp,%edx movl %ecx,%ebx -.L043xts_enc_short: +.L045xts_enc_short: addl $96,%eax - jz .L045xts_enc_done6x + jz .L047xts_enc_done6x movdqa %xmm1,%xmm5 cmpl $32,%eax - jb .L046xts_enc_one + jb .L048xts_enc_one pshufd $19,%xmm0,%xmm2 pxor %xmm0,%xmm0 paddq %xmm1,%xmm1 pand %xmm3,%xmm2 pcmpgtd %xmm1,%xmm0 pxor %xmm2,%xmm1 - je .L047xts_enc_two + je .L049xts_enc_two pshufd $19,%xmm0,%xmm2 pxor %xmm0,%xmm0 movdqa %xmm1,%xmm6 @@ -1115,7 +1170,7 @@ aesni_xts_encrypt: pcmpgtd %xmm1,%xmm0 pxor %xmm2,%xmm1 cmpl $64,%eax - jb .L048xts_enc_three + jb .L050xts_enc_three pshufd $19,%xmm0,%xmm2 pxor %xmm0,%xmm0 movdqa %xmm1,%xmm7 @@ -1125,7 +1180,7 @@ aesni_xts_encrypt: pxor %xmm2,%xmm1 movdqa %xmm5,(%esp) movdqa %xmm6,16(%esp) - je .L049xts_enc_four + je .L051xts_enc_four movdqa %xmm7,32(%esp) pshufd $19,%xmm0,%xmm7 movdqa %xmm1,48(%esp) @@ -1157,9 +1212,9 @@ aesni_xts_encrypt: movups %xmm5,48(%edi) movups %xmm6,64(%edi) leal 80(%edi),%edi - jmp .L050xts_enc_done + jmp .L052xts_enc_done .align 16 -.L046xts_enc_one: +.L048xts_enc_one: movups (%esi),%xmm2 leal 16(%esi),%esi xorps %xmm5,%xmm2 @@ -1167,37 +1222,36 @@ aesni_xts_encrypt: movups 16(%edx),%xmm1 leal 32(%edx),%edx xorps %xmm0,%xmm2 -.L051enc1_loop_9: +.L053enc1_loop_9: .byte 102,15,56,220,209 decl %ecx movups (%edx),%xmm1 leal 16(%edx),%edx - jnz .L051enc1_loop_9 + jnz .L053enc1_loop_9 .byte 102,15,56,221,209 xorps %xmm5,%xmm2 movups %xmm2,(%edi) leal 16(%edi),%edi movdqa %xmm5,%xmm1 - jmp .L050xts_enc_done + jmp .L052xts_enc_done .align 16 -.L047xts_enc_two: +.L049xts_enc_two: movaps %xmm1,%xmm6 movups (%esi),%xmm2 movups 16(%esi),%xmm3 leal 32(%esi),%esi xorps %xmm5,%xmm2 xorps %xmm6,%xmm3 - xorps %xmm4,%xmm4 - call _aesni_encrypt3 + call _aesni_encrypt2 xorps %xmm5,%xmm2 xorps %xmm6,%xmm3 movups %xmm2,(%edi) movups %xmm3,16(%edi) leal 32(%edi),%edi movdqa %xmm6,%xmm1 - jmp .L050xts_enc_done + jmp .L052xts_enc_done .align 16 -.L048xts_enc_three: +.L050xts_enc_three: movaps %xmm1,%xmm7 movups (%esi),%xmm2 movups 16(%esi),%xmm3 @@ -1215,9 +1269,9 @@ aesni_xts_encrypt: movups %xmm4,32(%edi) leal 48(%edi),%edi movdqa %xmm7,%xmm1 - jmp .L050xts_enc_done + jmp .L052xts_enc_done .align 16 -.L049xts_enc_four: +.L051xts_enc_four: movaps %xmm1,%xmm6 movups (%esi),%xmm2 movups 16(%esi),%xmm3 @@ -1239,28 +1293,28 @@ aesni_xts_encrypt: movups %xmm5,48(%edi) leal 64(%edi),%edi movdqa %xmm6,%xmm1 - jmp .L050xts_enc_done + jmp .L052xts_enc_done .align 16 -.L045xts_enc_done6x: +.L047xts_enc_done6x: movl 112(%esp),%eax andl $15,%eax - jz .L052xts_enc_ret + jz .L054xts_enc_ret movdqa %xmm1,%xmm5 movl %eax,112(%esp) - jmp .L053xts_enc_steal + jmp .L055xts_enc_steal .align 16 -.L050xts_enc_done: +.L052xts_enc_done: movl 112(%esp),%eax pxor %xmm0,%xmm0 andl $15,%eax - jz .L052xts_enc_ret + jz .L054xts_enc_ret pcmpgtd %xmm1,%xmm0 movl %eax,112(%esp) pshufd $19,%xmm0,%xmm5 paddq %xmm1,%xmm1 pand 96(%esp),%xmm5 pxor %xmm1,%xmm5 -.L053xts_enc_steal: +.L055xts_enc_steal: movzbl (%esi),%ecx movzbl -16(%edi),%edx leal 1(%esi),%esi @@ -1268,7 +1322,7 @@ aesni_xts_encrypt: movb %dl,(%edi) leal 1(%edi),%edi subl $1,%eax - jnz .L053xts_enc_steal + jnz .L055xts_enc_steal subl 112(%esp),%edi movl %ebp,%edx movl %ebx,%ecx @@ -1278,16 +1332,16 @@ aesni_xts_encrypt: movups 16(%edx),%xmm1 leal 32(%edx),%edx xorps %xmm0,%xmm2 -.L054enc1_loop_10: +.L056enc1_loop_10: .byte 102,15,56,220,209 decl %ecx movups (%edx),%xmm1 leal 16(%edx),%edx - jnz .L054enc1_loop_10 + jnz .L056enc1_loop_10 .byte 102,15,56,221,209 xorps %xmm5,%xmm2 movups %xmm2,-16(%edi) -.L052xts_enc_ret: +.L054xts_enc_ret: movl 116(%esp),%esp popl %edi popl %esi @@ -1312,12 +1366,12 @@ aesni_xts_decrypt: movups 16(%edx),%xmm1 leal 32(%edx),%edx xorps %xmm0,%xmm2 -.L055enc1_loop_11: +.L057enc1_loop_11: .byte 102,15,56,220,209 decl %ecx movups (%edx),%xmm1 leal 16(%edx),%edx - jnz .L055enc1_loop_11 + jnz .L057enc1_loop_11 .byte 102,15,56,221,209 movl 20(%esp),%esi movl 24(%esp),%edi @@ -1346,12 +1400,14 @@ aesni_xts_decrypt: pcmpgtd %xmm1,%xmm0 andl $-16,%eax subl $96,%eax - jc .L056xts_dec_short - shrl $1,%ecx - movl %ecx,%ebx - jmp .L057xts_dec_loop6 + jc .L058xts_dec_short + shll $4,%ecx + movl $16,%ebx + subl %ecx,%ebx + leal 32(%edx,%ecx,1),%edx + jmp .L059xts_dec_loop6 .align 16 -.L057xts_dec_loop6: +.L059xts_dec_loop6: pshufd $19,%xmm0,%xmm2 pxor %xmm0,%xmm0 movdqa %xmm1,(%esp) @@ -1387,6 +1443,7 @@ aesni_xts_decrypt: pand %xmm3,%xmm7 movups (%esi),%xmm2 pxor %xmm1,%xmm7 + movl %ebx,%ecx movdqu 16(%esi),%xmm3 xorps %xmm0,%xmm2 movdqu 32(%esi),%xmm4 @@ -1402,19 +1459,17 @@ aesni_xts_decrypt: movdqa %xmm7,80(%esp) pxor %xmm1,%xmm7 movups 16(%ebp),%xmm1 - leal 32(%ebp),%edx pxor 16(%esp),%xmm3 -.byte 102,15,56,222,209 pxor 32(%esp),%xmm4 -.byte 102,15,56,222,217 +.byte 102,15,56,222,209 pxor 48(%esp),%xmm5 - decl %ecx -.byte 102,15,56,222,225 pxor 64(%esp),%xmm6 -.byte 102,15,56,222,233 +.byte 102,15,56,222,217 pxor %xmm0,%xmm7 + movups 32(%ebp),%xmm0 +.byte 102,15,56,222,225 +.byte 102,15,56,222,233 .byte 102,15,56,222,241 - movups (%edx),%xmm0 .byte 102,15,56,222,249 call .L_aesni_decrypt6_enter movdqa 80(%esp),%xmm1 @@ -1439,26 +1494,25 @@ aesni_xts_decrypt: paddq %xmm1,%xmm1 pand %xmm3,%xmm2 pcmpgtd %xmm1,%xmm0 - movl %ebx,%ecx pxor %xmm2,%xmm1 subl $96,%eax - jnc .L057xts_dec_loop6 - leal 1(,%ecx,2),%ecx + jnc .L059xts_dec_loop6 + movl 240(%ebp),%ecx movl %ebp,%edx movl %ecx,%ebx -.L056xts_dec_short: +.L058xts_dec_short: addl $96,%eax - jz .L058xts_dec_done6x + jz .L060xts_dec_done6x movdqa %xmm1,%xmm5 cmpl $32,%eax - jb .L059xts_dec_one + jb .L061xts_dec_one pshufd $19,%xmm0,%xmm2 pxor %xmm0,%xmm0 paddq %xmm1,%xmm1 pand %xmm3,%xmm2 pcmpgtd %xmm1,%xmm0 pxor %xmm2,%xmm1 - je .L060xts_dec_two + je .L062xts_dec_two pshufd $19,%xmm0,%xmm2 pxor %xmm0,%xmm0 movdqa %xmm1,%xmm6 @@ -1467,7 +1521,7 @@ aesni_xts_decrypt: pcmpgtd %xmm1,%xmm0 pxor %xmm2,%xmm1 cmpl $64,%eax - jb .L061xts_dec_three + jb .L063xts_dec_three pshufd $19,%xmm0,%xmm2 pxor %xmm0,%xmm0 movdqa %xmm1,%xmm7 @@ -1477,7 +1531,7 @@ aesni_xts_decrypt: pxor %xmm2,%xmm1 movdqa %xmm5,(%esp) movdqa %xmm6,16(%esp) - je .L062xts_dec_four + je .L064xts_dec_four movdqa %xmm7,32(%esp) pshufd $19,%xmm0,%xmm7 movdqa %xmm1,48(%esp) @@ -1509,9 +1563,9 @@ aesni_xts_decrypt: movups %xmm5,48(%edi) movups %xmm6,64(%edi) leal 80(%edi),%edi - jmp .L063xts_dec_done + jmp .L065xts_dec_done .align 16 -.L059xts_dec_one: +.L061xts_dec_one: movups (%esi),%xmm2 leal 16(%esi),%esi xorps %xmm5,%xmm2 @@ -1519,36 +1573,36 @@ aesni_xts_decrypt: movups 16(%edx),%xmm1 leal 32(%edx),%edx xorps %xmm0,%xmm2 -.L064dec1_loop_12: +.L066dec1_loop_12: .byte 102,15,56,222,209 decl %ecx movups (%edx),%xmm1 leal 16(%edx),%edx - jnz .L064dec1_loop_12 + jnz .L066dec1_loop_12 .byte 102,15,56,223,209 xorps %xmm5,%xmm2 movups %xmm2,(%edi) leal 16(%edi),%edi movdqa %xmm5,%xmm1 - jmp .L063xts_dec_done + jmp .L065xts_dec_done .align 16 -.L060xts_dec_two: +.L062xts_dec_two: movaps %xmm1,%xmm6 movups (%esi),%xmm2 movups 16(%esi),%xmm3 leal 32(%esi),%esi xorps %xmm5,%xmm2 xorps %xmm6,%xmm3 - call _aesni_decrypt3 + call _aesni_decrypt2 xorps %xmm5,%xmm2 xorps %xmm6,%xmm3 movups %xmm2,(%edi) movups %xmm3,16(%edi) leal 32(%edi),%edi movdqa %xmm6,%xmm1 - jmp .L063xts_dec_done + jmp .L065xts_dec_done .align 16 -.L061xts_dec_three: +.L063xts_dec_three: movaps %xmm1,%xmm7 movups (%esi),%xmm2 movups 16(%esi),%xmm3 @@ -1566,9 +1620,9 @@ aesni_xts_decrypt: movups %xmm4,32(%edi) leal 48(%edi),%edi movdqa %xmm7,%xmm1 - jmp .L063xts_dec_done + jmp .L065xts_dec_done .align 16 -.L062xts_dec_four: +.L064xts_dec_four: movaps %xmm1,%xmm6 movups (%esi),%xmm2 movups 16(%esi),%xmm3 @@ -1590,20 +1644,20 @@ aesni_xts_decrypt: movups %xmm5,48(%edi) leal 64(%edi),%edi movdqa %xmm6,%xmm1 - jmp .L063xts_dec_done + jmp .L065xts_dec_done .align 16 -.L058xts_dec_done6x: +.L060xts_dec_done6x: movl 112(%esp),%eax andl $15,%eax - jz .L065xts_dec_ret + jz .L067xts_dec_ret movl %eax,112(%esp) - jmp .L066xts_dec_only_one_more + jmp .L068xts_dec_only_one_more .align 16 -.L063xts_dec_done: +.L065xts_dec_done: movl 112(%esp),%eax pxor %xmm0,%xmm0 andl $15,%eax - jz .L065xts_dec_ret + jz .L067xts_dec_ret pcmpgtd %xmm1,%xmm0 movl %eax,112(%esp) pshufd $19,%xmm0,%xmm2 @@ -1613,7 +1667,7 @@ aesni_xts_decrypt: pand %xmm3,%xmm2 pcmpgtd %xmm1,%xmm0 pxor %xmm2,%xmm1 -.L066xts_dec_only_one_more: +.L068xts_dec_only_one_more: pshufd $19,%xmm0,%xmm5 movdqa %xmm1,%xmm6 paddq %xmm1,%xmm1 @@ -1627,16 +1681,16 @@ aesni_xts_decrypt: movups 16(%edx),%xmm1 leal 32(%edx),%edx xorps %xmm0,%xmm2 -.L067dec1_loop_13: +.L069dec1_loop_13: .byte 102,15,56,222,209 decl %ecx movups (%edx),%xmm1 leal 16(%edx),%edx - jnz .L067dec1_loop_13 + jnz .L069dec1_loop_13 .byte 102,15,56,223,209 xorps %xmm5,%xmm2 movups %xmm2,(%edi) -.L068xts_dec_steal: +.L070xts_dec_steal: movzbl 16(%esi),%ecx movzbl (%edi),%edx leal 1(%esi),%esi @@ -1644,7 +1698,7 @@ aesni_xts_decrypt: movb %dl,16(%edi) leal 1(%edi),%edi subl $1,%eax - jnz .L068xts_dec_steal + jnz .L070xts_dec_steal subl 112(%esp),%edi movl %ebp,%edx movl %ebx,%ecx @@ -1654,16 +1708,16 @@ aesni_xts_decrypt: movups 16(%edx),%xmm1 leal 32(%edx),%edx xorps %xmm0,%xmm2 -.L069dec1_loop_14: +.L071dec1_loop_14: .byte 102,15,56,222,209 decl %ecx movups (%edx),%xmm1 leal 16(%edx),%edx - jnz .L069dec1_loop_14 + jnz .L071dec1_loop_14 .byte 102,15,56,223,209 xorps %xmm6,%xmm2 movups %xmm2,(%edi) -.L065xts_dec_ret: +.L067xts_dec_ret: movl 116(%esp),%esp popl %edi popl %esi @@ -1689,7 +1743,7 @@ aesni_cbc_encrypt: movl 32(%esp),%edx movl 36(%esp),%ebp testl %eax,%eax - jz .L070cbc_abort + jz .L072cbc_abort cmpl $0,40(%esp) xchgl %esp,%ebx movups (%ebp),%xmm7 @@ -1697,14 +1751,14 @@ aesni_cbc_encrypt: movl %edx,%ebp movl %ebx,16(%esp) movl %ecx,%ebx - je .L071cbc_decrypt + je .L073cbc_decrypt movaps %xmm7,%xmm2 cmpl $16,%eax - jb .L072cbc_enc_tail + jb .L074cbc_enc_tail subl $16,%eax - jmp .L073cbc_enc_loop + jmp .L075cbc_enc_loop .align 16 -.L073cbc_enc_loop: +.L075cbc_enc_loop: movups (%esi),%xmm7 leal 16(%esi),%esi movups (%edx),%xmm0 @@ -1712,24 +1766,24 @@ aesni_cbc_encrypt: xorps %xmm0,%xmm7 leal 32(%edx),%edx xorps %xmm7,%xmm2 -.L074enc1_loop_15: +.L076enc1_loop_15: .byte 102,15,56,220,209 decl %ecx movups (%edx),%xmm1 leal 16(%edx),%edx - jnz .L074enc1_loop_15 + jnz .L076enc1_loop_15 .byte 102,15,56,221,209 movl %ebx,%ecx movl %ebp,%edx movups %xmm2,(%edi) leal 16(%edi),%edi subl $16,%eax - jnc .L073cbc_enc_loop + jnc .L075cbc_enc_loop addl $16,%eax - jnz .L072cbc_enc_tail + jnz .L074cbc_enc_tail movaps %xmm2,%xmm7 - jmp .L075cbc_ret -.L072cbc_enc_tail: + jmp .L077cbc_ret +.L074cbc_enc_tail: movl %eax,%ecx .long 2767451785 movl $16,%ecx @@ -1740,20 +1794,20 @@ aesni_cbc_encrypt: movl %ebx,%ecx movl %edi,%esi movl %ebp,%edx - jmp .L073cbc_enc_loop + jmp .L075cbc_enc_loop .align 16 -.L071cbc_decrypt: +.L073cbc_decrypt: cmpl $80,%eax - jbe .L076cbc_dec_tail + jbe .L078cbc_dec_tail movaps %xmm7,(%esp) subl $80,%eax - jmp .L077cbc_dec_loop6_enter + jmp .L079cbc_dec_loop6_enter .align 16 -.L078cbc_dec_loop6: +.L080cbc_dec_loop6: movaps %xmm0,(%esp) movups %xmm7,(%edi) leal 16(%edi),%edi -.L077cbc_dec_loop6_enter: +.L079cbc_dec_loop6_enter: movdqu (%esi),%xmm2 movdqu 16(%esi),%xmm3 movdqu 32(%esi),%xmm4 @@ -1783,28 +1837,28 @@ aesni_cbc_encrypt: movups %xmm6,64(%edi) leal 80(%edi),%edi subl $96,%eax - ja .L078cbc_dec_loop6 + ja .L080cbc_dec_loop6 movaps %xmm7,%xmm2 movaps %xmm0,%xmm7 addl $80,%eax - jle .L079cbc_dec_tail_collected + jle .L081cbc_dec_tail_collected movups %xmm2,(%edi) leal 16(%edi),%edi -.L076cbc_dec_tail: +.L078cbc_dec_tail: movups (%esi),%xmm2 movaps %xmm2,%xmm6 cmpl $16,%eax - jbe .L080cbc_dec_one + jbe .L082cbc_dec_one movups 16(%esi),%xmm3 movaps %xmm3,%xmm5 cmpl $32,%eax - jbe .L081cbc_dec_two + jbe .L083cbc_dec_two movups 32(%esi),%xmm4 cmpl $48,%eax - jbe .L082cbc_dec_three + jbe .L084cbc_dec_three movups 48(%esi),%xmm5 cmpl $64,%eax - jbe .L083cbc_dec_four + jbe .L085cbc_dec_four movups 64(%esi),%xmm6 movaps %xmm7,(%esp) movups (%esi),%xmm2 @@ -1827,28 +1881,27 @@ aesni_cbc_encrypt: leal 64(%edi),%edi movaps %xmm6,%xmm2 subl $80,%eax - jmp .L079cbc_dec_tail_collected + jmp .L081cbc_dec_tail_collected .align 16 -.L080cbc_dec_one: +.L082cbc_dec_one: movups (%edx),%xmm0 movups 16(%edx),%xmm1 leal 32(%edx),%edx xorps %xmm0,%xmm2 -.L084dec1_loop_16: +.L086dec1_loop_16: .byte 102,15,56,222,209 decl %ecx movups (%edx),%xmm1 leal 16(%edx),%edx - jnz .L084dec1_loop_16 + jnz .L086dec1_loop_16 .byte 102,15,56,223,209 xorps %xmm7,%xmm2 movaps %xmm6,%xmm7 subl $16,%eax - jmp .L079cbc_dec_tail_collected + jmp .L081cbc_dec_tail_collected .align 16 -.L081cbc_dec_two: - xorps %xmm4,%xmm4 - call _aesni_decrypt3 +.L083cbc_dec_two: + call _aesni_decrypt2 xorps %xmm7,%xmm2 xorps %xmm6,%xmm3 movups %xmm2,(%edi) @@ -1856,9 +1909,9 @@ aesni_cbc_encrypt: leal 16(%edi),%edi movaps %xmm5,%xmm7 subl $32,%eax - jmp .L079cbc_dec_tail_collected + jmp .L081cbc_dec_tail_collected .align 16 -.L082cbc_dec_three: +.L084cbc_dec_three: call _aesni_decrypt3 xorps %xmm7,%xmm2 xorps %xmm6,%xmm3 @@ -1869,9 +1922,9 @@ aesni_cbc_encrypt: leal 32(%edi),%edi movups 32(%esi),%xmm7 subl $48,%eax - jmp .L079cbc_dec_tail_collected + jmp .L081cbc_dec_tail_collected .align 16 -.L083cbc_dec_four: +.L085cbc_dec_four: call _aesni_decrypt4 movups 16(%esi),%xmm1 movups 32(%esi),%xmm0 @@ -1886,23 +1939,23 @@ aesni_cbc_encrypt: leal 48(%edi),%edi movaps %xmm5,%xmm2 subl $64,%eax -.L079cbc_dec_tail_collected: +.L081cbc_dec_tail_collected: andl $15,%eax - jnz .L085cbc_dec_tail_partial + jnz .L087cbc_dec_tail_partial movups %xmm2,(%edi) - jmp .L075cbc_ret + jmp .L077cbc_ret .align 16 -.L085cbc_dec_tail_partial: +.L087cbc_dec_tail_partial: movaps %xmm2,(%esp) movl $16,%ecx movl %esp,%esi subl %eax,%ecx .long 2767451785 -.L075cbc_ret: +.L077cbc_ret: movl 16(%esp),%esp movl 36(%esp),%ebp movups %xmm7,(%ebp) -.L070cbc_abort: +.L072cbc_abort: popl %edi popl %esi popl %ebx @@ -1913,51 +1966,51 @@ aesni_cbc_encrypt: .align 16 _aesni_set_encrypt_key: testl %eax,%eax - jz .L086bad_pointer + jz .L088bad_pointer testl %edx,%edx - jz .L086bad_pointer + jz .L088bad_pointer movups (%eax),%xmm0 xorps %xmm4,%xmm4 leal 16(%edx),%edx cmpl $256,%ecx - je .L08714rounds + je .L08914rounds cmpl $192,%ecx - je .L08812rounds + je .L09012rounds cmpl $128,%ecx - jne .L089bad_keybits + jne .L091bad_keybits .align 16 -.L09010rounds: +.L09210rounds: movl $9,%ecx movups %xmm0,-16(%edx) .byte 102,15,58,223,200,1 - call .L091key_128_cold + call .L093key_128_cold .byte 102,15,58,223,200,2 - call .L092key_128 + call .L094key_128 .byte 102,15,58,223,200,4 - call .L092key_128 + call .L094key_128 .byte 102,15,58,223,200,8 - call .L092key_128 + call .L094key_128 .byte 102,15,58,223,200,16 - call .L092key_128 + call .L094key_128 .byte 102,15,58,223,200,32 - call .L092key_128 + call .L094key_128 .byte 102,15,58,223,200,64 - call .L092key_128 + call .L094key_128 .byte 102,15,58,223,200,128 - call .L092key_128 + call .L094key_128 .byte 102,15,58,223,200,27 - call .L092key_128 + call .L094key_128 .byte 102,15,58,223,200,54 - call .L092key_128 + call .L094key_128 movups %xmm0,(%edx) movl %ecx,80(%edx) xorl %eax,%eax ret .align 16 -.L092key_128: +.L094key_128: movups %xmm0,(%edx) leal 16(%edx),%edx -.L091key_128_cold: +.L093key_128_cold: shufps $16,%xmm0,%xmm4 xorps %xmm4,%xmm0 shufps $140,%xmm0,%xmm4 @@ -1966,38 +2019,38 @@ _aesni_set_encrypt_key: xorps %xmm1,%xmm0 ret .align 16 -.L08812rounds: +.L09012rounds: movq 16(%eax),%xmm2 movl $11,%ecx movups %xmm0,-16(%edx) .byte 102,15,58,223,202,1 - call .L093key_192a_cold + call .L095key_192a_cold .byte 102,15,58,223,202,2 - call .L094key_192b + call .L096key_192b .byte 102,15,58,223,202,4 - call .L095key_192a + call .L097key_192a .byte 102,15,58,223,202,8 - call .L094key_192b + call .L096key_192b .byte 102,15,58,223,202,16 - call .L095key_192a + call .L097key_192a .byte 102,15,58,223,202,32 - call .L094key_192b + call .L096key_192b .byte 102,15,58,223,202,64 - call .L095key_192a + call .L097key_192a .byte 102,15,58,223,202,128 - call .L094key_192b + call .L096key_192b movups %xmm0,(%edx) movl %ecx,48(%edx) xorl %eax,%eax ret .align 16 -.L095key_192a: +.L097key_192a: movups %xmm0,(%edx) leal 16(%edx),%edx .align 16 -.L093key_192a_cold: +.L095key_192a_cold: movaps %xmm2,%xmm5 -.L096key_192b_warm: +.L098key_192b_warm: shufps $16,%xmm0,%xmm4 movdqa %xmm2,%xmm3 xorps %xmm4,%xmm0 @@ -2011,56 +2064,56 @@ _aesni_set_encrypt_key: pxor %xmm3,%xmm2 ret .align 16 -.L094key_192b: +.L096key_192b: movaps %xmm0,%xmm3 shufps $68,%xmm0,%xmm5 movups %xmm5,(%edx) shufps $78,%xmm2,%xmm3 movups %xmm3,16(%edx) leal 32(%edx),%edx - jmp .L096key_192b_warm + jmp .L098key_192b_warm .align 16 -.L08714rounds: +.L08914rounds: movups 16(%eax),%xmm2 movl $13,%ecx leal 16(%edx),%edx movups %xmm0,-32(%edx) movups %xmm2,-16(%edx) .byte 102,15,58,223,202,1 - call .L097key_256a_cold + call .L099key_256a_cold .byte 102,15,58,223,200,1 - call .L098key_256b + call .L100key_256b .byte 102,15,58,223,202,2 - call .L099key_256a + call .L101key_256a .byte 102,15,58,223,200,2 - call .L098key_256b + call .L100key_256b .byte 102,15,58,223,202,4 - call .L099key_256a + call .L101key_256a .byte 102,15,58,223,200,4 - call .L098key_256b + call .L100key_256b .byte 102,15,58,223,202,8 - call .L099key_256a + call .L101key_256a .byte 102,15,58,223,200,8 - call .L098key_256b + call .L100key_256b .byte 102,15,58,223,202,16 - call .L099key_256a + call .L101key_256a .byte 102,15,58,223,200,16 - call .L098key_256b + call .L100key_256b .byte 102,15,58,223,202,32 - call .L099key_256a + call .L101key_256a .byte 102,15,58,223,200,32 - call .L098key_256b + call .L100key_256b .byte 102,15,58,223,202,64 - call .L099key_256a + call .L101key_256a movups %xmm0,(%edx) movl %ecx,16(%edx) xorl %eax,%eax ret .align 16 -.L099key_256a: +.L101key_256a: movups %xmm2,(%edx) leal 16(%edx),%edx -.L097key_256a_cold: +.L099key_256a_cold: shufps $16,%xmm0,%xmm4 xorps %xmm4,%xmm0 shufps $140,%xmm0,%xmm4 @@ -2069,7 +2122,7 @@ _aesni_set_encrypt_key: xorps %xmm1,%xmm0 ret .align 16 -.L098key_256b: +.L100key_256b: movups %xmm0,(%edx) leal 16(%edx),%edx shufps $16,%xmm2,%xmm4 @@ -2080,11 +2133,11 @@ _aesni_set_encrypt_key: xorps %xmm1,%xmm2 ret .align 4 -.L086bad_pointer: +.L088bad_pointer: movl $-1,%eax ret .align 4 -.L089bad_keybits: +.L091bad_keybits: movl $-2,%eax ret .size _aesni_set_encrypt_key,.-_aesni_set_encrypt_key @@ -2111,7 +2164,7 @@ aesni_set_decrypt_key: movl 12(%esp),%edx shll $4,%ecx testl %eax,%eax - jnz .L100dec_key_ret + jnz .L102dec_key_ret leal 16(%edx,%ecx,1),%eax movups (%edx),%xmm0 movups (%eax),%xmm1 @@ -2119,7 +2172,7 @@ aesni_set_decrypt_key: movups %xmm1,(%edx) leal 16(%edx),%edx leal -16(%eax),%eax -.L101dec_key_inverse: +.L103dec_key_inverse: movups (%edx),%xmm0 movups (%eax),%xmm1 .byte 102,15,56,219,192 @@ -2129,12 +2182,12 @@ aesni_set_decrypt_key: movups %xmm0,16(%eax) movups %xmm1,-16(%edx) cmpl %edx,%eax - ja .L101dec_key_inverse + ja .L103dec_key_inverse movups (%edx),%xmm0 .byte 102,15,56,219,192 movups %xmm0,(%edx) xorl %eax,%eax -.L100dec_key_ret: +.L102dec_key_ret: ret .size aesni_set_decrypt_key,.-.L_aesni_set_decrypt_key_begin .byte 65,69,83,32,102,111,114,32,73,110,116,101,108,32,65,69 diff --git a/deps/openssl/asm/x86-elf-gas/aes/vpaes-x86.s b/deps/openssl/asm/x86-elf-gas/aes/vpaes-x86.s index c53a5074a66311..5dc8d19633a8ae 100644 --- a/deps/openssl/asm/x86-elf-gas/aes/vpaes-x86.s +++ b/deps/openssl/asm/x86-elf-gas/aes/vpaes-x86.s @@ -73,33 +73,33 @@ _vpaes_encrypt_core: movdqa %xmm6,%xmm1 movdqa (%ebp),%xmm2 pandn %xmm0,%xmm1 - movdqu (%edx),%xmm5 - psrld $4,%xmm1 pand %xmm6,%xmm0 + movdqu (%edx),%xmm5 .byte 102,15,56,0,208 movdqa 16(%ebp),%xmm0 -.byte 102,15,56,0,193 pxor %xmm5,%xmm2 - pxor %xmm2,%xmm0 + psrld $4,%xmm1 addl $16,%edx +.byte 102,15,56,0,193 leal 192(%ebp),%ebx + pxor %xmm2,%xmm0 jmp .L000enc_entry .align 16 .L001enc_loop: movdqa 32(%ebp),%xmm4 -.byte 102,15,56,0,226 - pxor %xmm5,%xmm4 movdqa 48(%ebp),%xmm0 +.byte 102,15,56,0,226 .byte 102,15,56,0,195 - pxor %xmm4,%xmm0 + pxor %xmm5,%xmm4 movdqa 64(%ebp),%xmm5 -.byte 102,15,56,0,234 + pxor %xmm4,%xmm0 movdqa -64(%ebx,%ecx,1),%xmm1 +.byte 102,15,56,0,234 movdqa 80(%ebp),%xmm2 -.byte 102,15,56,0,211 - pxor %xmm5,%xmm2 movdqa (%ebx,%ecx,1),%xmm4 +.byte 102,15,56,0,211 movdqa %xmm0,%xmm3 + pxor %xmm5,%xmm2 .byte 102,15,56,0,193 addl $16,%edx pxor %xmm2,%xmm0 @@ -108,28 +108,28 @@ _vpaes_encrypt_core: pxor %xmm0,%xmm3 .byte 102,15,56,0,193 andl $48,%ecx - pxor %xmm3,%xmm0 subl $1,%eax + pxor %xmm3,%xmm0 .L000enc_entry: movdqa %xmm6,%xmm1 + movdqa -32(%ebp),%xmm5 pandn %xmm0,%xmm1 psrld $4,%xmm1 pand %xmm6,%xmm0 - movdqa -32(%ebp),%xmm5 .byte 102,15,56,0,232 - pxor %xmm1,%xmm0 movdqa %xmm7,%xmm3 + pxor %xmm1,%xmm0 .byte 102,15,56,0,217 - pxor %xmm5,%xmm3 movdqa %xmm7,%xmm4 + pxor %xmm5,%xmm3 .byte 102,15,56,0,224 - pxor %xmm5,%xmm4 movdqa %xmm7,%xmm2 + pxor %xmm5,%xmm4 .byte 102,15,56,0,211 - pxor %xmm0,%xmm2 movdqa %xmm7,%xmm3 - movdqu (%edx),%xmm5 + pxor %xmm0,%xmm2 .byte 102,15,56,0,220 + movdqu (%edx),%xmm5 pxor %xmm1,%xmm3 jnz .L001enc_loop movdqa 96(%ebp),%xmm4 @@ -145,8 +145,8 @@ _vpaes_encrypt_core: .type _vpaes_decrypt_core,@function .align 16 _vpaes_decrypt_core: - movl 240(%edx),%eax leal 608(%ebp),%ebx + movl 240(%edx),%eax movdqa %xmm6,%xmm1 movdqa -64(%ebx),%xmm2 pandn %xmm0,%xmm1 @@ -169,56 +169,56 @@ _vpaes_decrypt_core: .align 16 .L003dec_loop: movdqa -32(%ebx),%xmm4 + movdqa -16(%ebx),%xmm1 .byte 102,15,56,0,226 - pxor %xmm0,%xmm4 - movdqa -16(%ebx),%xmm0 -.byte 102,15,56,0,195 +.byte 102,15,56,0,203 pxor %xmm4,%xmm0 - addl $16,%edx -.byte 102,15,56,0,197 movdqa (%ebx),%xmm4 + pxor %xmm1,%xmm0 + movdqa 16(%ebx),%xmm1 .byte 102,15,56,0,226 - pxor %xmm0,%xmm4 - movdqa 16(%ebx),%xmm0 -.byte 102,15,56,0,195 - pxor %xmm4,%xmm0 - subl $1,%eax .byte 102,15,56,0,197 +.byte 102,15,56,0,203 + pxor %xmm4,%xmm0 movdqa 32(%ebx),%xmm4 + pxor %xmm1,%xmm0 + movdqa 48(%ebx),%xmm1 .byte 102,15,56,0,226 - pxor %xmm0,%xmm4 - movdqa 48(%ebx),%xmm0 -.byte 102,15,56,0,195 - pxor %xmm4,%xmm0 .byte 102,15,56,0,197 +.byte 102,15,56,0,203 + pxor %xmm4,%xmm0 movdqa 64(%ebx),%xmm4 + pxor %xmm1,%xmm0 + movdqa 80(%ebx),%xmm1 .byte 102,15,56,0,226 - pxor %xmm0,%xmm4 - movdqa 80(%ebx),%xmm0 -.byte 102,15,56,0,195 +.byte 102,15,56,0,197 +.byte 102,15,56,0,203 pxor %xmm4,%xmm0 + addl $16,%edx .byte 102,15,58,15,237,12 + pxor %xmm1,%xmm0 + subl $1,%eax .L002dec_entry: movdqa %xmm6,%xmm1 + movdqa -32(%ebp),%xmm2 pandn %xmm0,%xmm1 - psrld $4,%xmm1 pand %xmm6,%xmm0 - movdqa -32(%ebp),%xmm2 + psrld $4,%xmm1 .byte 102,15,56,0,208 - pxor %xmm1,%xmm0 movdqa %xmm7,%xmm3 + pxor %xmm1,%xmm0 .byte 102,15,56,0,217 - pxor %xmm2,%xmm3 movdqa %xmm7,%xmm4 + pxor %xmm2,%xmm3 .byte 102,15,56,0,224 pxor %xmm2,%xmm4 movdqa %xmm7,%xmm2 .byte 102,15,56,0,211 - pxor %xmm0,%xmm2 movdqa %xmm7,%xmm3 + pxor %xmm0,%xmm2 .byte 102,15,56,0,220 - pxor %xmm1,%xmm3 movdqu (%edx),%xmm0 + pxor %xmm1,%xmm3 jnz .L003dec_loop movdqa 96(%ebx),%xmm4 .byte 102,15,56,0,226 @@ -327,12 +327,12 @@ _vpaes_schedule_core: .type _vpaes_schedule_192_smear,@function .align 16 _vpaes_schedule_192_smear: - pshufd $128,%xmm6,%xmm0 - pxor %xmm0,%xmm6 + pshufd $128,%xmm6,%xmm1 pshufd $254,%xmm7,%xmm0 + pxor %xmm1,%xmm6 + pxor %xmm1,%xmm1 pxor %xmm0,%xmm6 movdqa %xmm6,%xmm0 - pxor %xmm1,%xmm1 movhlps %xmm1,%xmm6 ret .size _vpaes_schedule_192_smear,.-_vpaes_schedule_192_smear diff --git a/deps/openssl/asm/x86-elf-gas/bf/bf-586.s b/deps/openssl/asm/x86-elf-gas/bf/bf-586.s new file mode 100644 index 00000000000000..aa718d40cd5c2a --- /dev/null +++ b/deps/openssl/asm/x86-elf-gas/bf/bf-586.s @@ -0,0 +1,896 @@ +.file "bf-586.s" +.text +.globl BF_encrypt +.type BF_encrypt,@function +.align 16 +BF_encrypt: +.L_BF_encrypt_begin: + + pushl %ebp + pushl %ebx + movl 12(%esp),%ebx + movl 16(%esp),%ebp + pushl %esi + pushl %edi + + movl (%ebx),%edi + movl 4(%ebx),%esi + xorl %eax,%eax + movl (%ebp),%ebx + xorl %ecx,%ecx + xorl %ebx,%edi + + + movl 4(%ebp),%edx + movl %edi,%ebx + xorl %edx,%esi + shrl $16,%ebx + movl %edi,%edx + movb %bh,%al + andl $255,%ebx + movb %dh,%cl + andl $255,%edx + movl 72(%ebp,%eax,4),%eax + movl 1096(%ebp,%ebx,4),%ebx + addl %eax,%ebx + movl 2120(%ebp,%ecx,4),%eax + xorl %eax,%ebx + movl 3144(%ebp,%edx,4),%edx + addl %edx,%ebx + xorl %eax,%eax + xorl %ebx,%esi + + + movl 8(%ebp),%edx + movl %esi,%ebx + xorl %edx,%edi + shrl $16,%ebx + movl %esi,%edx + movb %bh,%al + andl $255,%ebx + movb %dh,%cl + andl $255,%edx + movl 72(%ebp,%eax,4),%eax + movl 1096(%ebp,%ebx,4),%ebx + addl %eax,%ebx + movl 2120(%ebp,%ecx,4),%eax + xorl %eax,%ebx + movl 3144(%ebp,%edx,4),%edx + addl %edx,%ebx + xorl %eax,%eax + xorl %ebx,%edi + + + movl 12(%ebp),%edx + movl %edi,%ebx + xorl %edx,%esi + shrl $16,%ebx + movl %edi,%edx + movb %bh,%al + andl $255,%ebx + movb %dh,%cl + andl $255,%edx + movl 72(%ebp,%eax,4),%eax + movl 1096(%ebp,%ebx,4),%ebx + addl %eax,%ebx + movl 2120(%ebp,%ecx,4),%eax + xorl %eax,%ebx + movl 3144(%ebp,%edx,4),%edx + addl %edx,%ebx + xorl %eax,%eax + xorl %ebx,%esi + + + movl 16(%ebp),%edx + movl %esi,%ebx + xorl %edx,%edi + shrl $16,%ebx + movl %esi,%edx + movb %bh,%al + andl $255,%ebx + movb %dh,%cl + andl $255,%edx + movl 72(%ebp,%eax,4),%eax + movl 1096(%ebp,%ebx,4),%ebx + addl %eax,%ebx + movl 2120(%ebp,%ecx,4),%eax + xorl %eax,%ebx + movl 3144(%ebp,%edx,4),%edx + addl %edx,%ebx + xorl %eax,%eax + xorl %ebx,%edi + + + movl 20(%ebp),%edx + movl %edi,%ebx + xorl %edx,%esi + shrl $16,%ebx + movl %edi,%edx + movb %bh,%al + andl $255,%ebx + movb %dh,%cl + andl $255,%edx + movl 72(%ebp,%eax,4),%eax + movl 1096(%ebp,%ebx,4),%ebx + addl %eax,%ebx + movl 2120(%ebp,%ecx,4),%eax + xorl %eax,%ebx + movl 3144(%ebp,%edx,4),%edx + addl %edx,%ebx + xorl %eax,%eax + xorl %ebx,%esi + + + movl 24(%ebp),%edx + movl %esi,%ebx + xorl %edx,%edi + shrl $16,%ebx + movl %esi,%edx + movb %bh,%al + andl $255,%ebx + movb %dh,%cl + andl $255,%edx + movl 72(%ebp,%eax,4),%eax + movl 1096(%ebp,%ebx,4),%ebx + addl %eax,%ebx + movl 2120(%ebp,%ecx,4),%eax + xorl %eax,%ebx + movl 3144(%ebp,%edx,4),%edx + addl %edx,%ebx + xorl %eax,%eax + xorl %ebx,%edi + + + movl 28(%ebp),%edx + movl %edi,%ebx + xorl %edx,%esi + shrl $16,%ebx + movl %edi,%edx + movb %bh,%al + andl $255,%ebx + movb %dh,%cl + andl $255,%edx + movl 72(%ebp,%eax,4),%eax + movl 1096(%ebp,%ebx,4),%ebx + addl %eax,%ebx + movl 2120(%ebp,%ecx,4),%eax + xorl %eax,%ebx + movl 3144(%ebp,%edx,4),%edx + addl %edx,%ebx + xorl %eax,%eax + xorl %ebx,%esi + + + movl 32(%ebp),%edx + movl %esi,%ebx + xorl %edx,%edi + shrl $16,%ebx + movl %esi,%edx + movb %bh,%al + andl $255,%ebx + movb %dh,%cl + andl $255,%edx + movl 72(%ebp,%eax,4),%eax + movl 1096(%ebp,%ebx,4),%ebx + addl %eax,%ebx + movl 2120(%ebp,%ecx,4),%eax + xorl %eax,%ebx + movl 3144(%ebp,%edx,4),%edx + addl %edx,%ebx + xorl %eax,%eax + xorl %ebx,%edi + + + movl 36(%ebp),%edx + movl %edi,%ebx + xorl %edx,%esi + shrl $16,%ebx + movl %edi,%edx + movb %bh,%al + andl $255,%ebx + movb %dh,%cl + andl $255,%edx + movl 72(%ebp,%eax,4),%eax + movl 1096(%ebp,%ebx,4),%ebx + addl %eax,%ebx + movl 2120(%ebp,%ecx,4),%eax + xorl %eax,%ebx + movl 3144(%ebp,%edx,4),%edx + addl %edx,%ebx + xorl %eax,%eax + xorl %ebx,%esi + + + movl 40(%ebp),%edx + movl %esi,%ebx + xorl %edx,%edi + shrl $16,%ebx + movl %esi,%edx + movb %bh,%al + andl $255,%ebx + movb %dh,%cl + andl $255,%edx + movl 72(%ebp,%eax,4),%eax + movl 1096(%ebp,%ebx,4),%ebx + addl %eax,%ebx + movl 2120(%ebp,%ecx,4),%eax + xorl %eax,%ebx + movl 3144(%ebp,%edx,4),%edx + addl %edx,%ebx + xorl %eax,%eax + xorl %ebx,%edi + + + movl 44(%ebp),%edx + movl %edi,%ebx + xorl %edx,%esi + shrl $16,%ebx + movl %edi,%edx + movb %bh,%al + andl $255,%ebx + movb %dh,%cl + andl $255,%edx + movl 72(%ebp,%eax,4),%eax + movl 1096(%ebp,%ebx,4),%ebx + addl %eax,%ebx + movl 2120(%ebp,%ecx,4),%eax + xorl %eax,%ebx + movl 3144(%ebp,%edx,4),%edx + addl %edx,%ebx + xorl %eax,%eax + xorl %ebx,%esi + + + movl 48(%ebp),%edx + movl %esi,%ebx + xorl %edx,%edi + shrl $16,%ebx + movl %esi,%edx + movb %bh,%al + andl $255,%ebx + movb %dh,%cl + andl $255,%edx + movl 72(%ebp,%eax,4),%eax + movl 1096(%ebp,%ebx,4),%ebx + addl %eax,%ebx + movl 2120(%ebp,%ecx,4),%eax + xorl %eax,%ebx + movl 3144(%ebp,%edx,4),%edx + addl %edx,%ebx + xorl %eax,%eax + xorl %ebx,%edi + + + movl 52(%ebp),%edx + movl %edi,%ebx + xorl %edx,%esi + shrl $16,%ebx + movl %edi,%edx + movb %bh,%al + andl $255,%ebx + movb %dh,%cl + andl $255,%edx + movl 72(%ebp,%eax,4),%eax + movl 1096(%ebp,%ebx,4),%ebx + addl %eax,%ebx + movl 2120(%ebp,%ecx,4),%eax + xorl %eax,%ebx + movl 3144(%ebp,%edx,4),%edx + addl %edx,%ebx + xorl %eax,%eax + xorl %ebx,%esi + + + movl 56(%ebp),%edx + movl %esi,%ebx + xorl %edx,%edi + shrl $16,%ebx + movl %esi,%edx + movb %bh,%al + andl $255,%ebx + movb %dh,%cl + andl $255,%edx + movl 72(%ebp,%eax,4),%eax + movl 1096(%ebp,%ebx,4),%ebx + addl %eax,%ebx + movl 2120(%ebp,%ecx,4),%eax + xorl %eax,%ebx + movl 3144(%ebp,%edx,4),%edx + addl %edx,%ebx + xorl %eax,%eax + xorl %ebx,%edi + + + movl 60(%ebp),%edx + movl %edi,%ebx + xorl %edx,%esi + shrl $16,%ebx + movl %edi,%edx + movb %bh,%al + andl $255,%ebx + movb %dh,%cl + andl $255,%edx + movl 72(%ebp,%eax,4),%eax + movl 1096(%ebp,%ebx,4),%ebx + addl %eax,%ebx + movl 2120(%ebp,%ecx,4),%eax + xorl %eax,%ebx + movl 3144(%ebp,%edx,4),%edx + addl %edx,%ebx + xorl %eax,%eax + xorl %ebx,%esi + + + movl 64(%ebp),%edx + movl %esi,%ebx + xorl %edx,%edi + shrl $16,%ebx + movl %esi,%edx + movb %bh,%al + andl $255,%ebx + movb %dh,%cl + andl $255,%edx + movl 72(%ebp,%eax,4),%eax + movl 1096(%ebp,%ebx,4),%ebx + addl %eax,%ebx + movl 2120(%ebp,%ecx,4),%eax + xorl %eax,%ebx + movl 3144(%ebp,%edx,4),%edx + addl %edx,%ebx + + movl 20(%esp),%eax + xorl %ebx,%edi + movl 68(%ebp),%edx + xorl %edx,%esi + movl %edi,4(%eax) + movl %esi,(%eax) + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.size BF_encrypt,.-.L_BF_encrypt_begin +.globl BF_decrypt +.type BF_decrypt,@function +.align 16 +BF_decrypt: +.L_BF_decrypt_begin: + + pushl %ebp + pushl %ebx + movl 12(%esp),%ebx + movl 16(%esp),%ebp + pushl %esi + pushl %edi + + movl (%ebx),%edi + movl 4(%ebx),%esi + xorl %eax,%eax + movl 68(%ebp),%ebx + xorl %ecx,%ecx + xorl %ebx,%edi + + + movl 64(%ebp),%edx + movl %edi,%ebx + xorl %edx,%esi + shrl $16,%ebx + movl %edi,%edx + movb %bh,%al + andl $255,%ebx + movb %dh,%cl + andl $255,%edx + movl 72(%ebp,%eax,4),%eax + movl 1096(%ebp,%ebx,4),%ebx + addl %eax,%ebx + movl 2120(%ebp,%ecx,4),%eax + xorl %eax,%ebx + movl 3144(%ebp,%edx,4),%edx + addl %edx,%ebx + xorl %eax,%eax + xorl %ebx,%esi + + + movl 60(%ebp),%edx + movl %esi,%ebx + xorl %edx,%edi + shrl $16,%ebx + movl %esi,%edx + movb %bh,%al + andl $255,%ebx + movb %dh,%cl + andl $255,%edx + movl 72(%ebp,%eax,4),%eax + movl 1096(%ebp,%ebx,4),%ebx + addl %eax,%ebx + movl 2120(%ebp,%ecx,4),%eax + xorl %eax,%ebx + movl 3144(%ebp,%edx,4),%edx + addl %edx,%ebx + xorl %eax,%eax + xorl %ebx,%edi + + + movl 56(%ebp),%edx + movl %edi,%ebx + xorl %edx,%esi + shrl $16,%ebx + movl %edi,%edx + movb %bh,%al + andl $255,%ebx + movb %dh,%cl + andl $255,%edx + movl 72(%ebp,%eax,4),%eax + movl 1096(%ebp,%ebx,4),%ebx + addl %eax,%ebx + movl 2120(%ebp,%ecx,4),%eax + xorl %eax,%ebx + movl 3144(%ebp,%edx,4),%edx + addl %edx,%ebx + xorl %eax,%eax + xorl %ebx,%esi + + + movl 52(%ebp),%edx + movl %esi,%ebx + xorl %edx,%edi + shrl $16,%ebx + movl %esi,%edx + movb %bh,%al + andl $255,%ebx + movb %dh,%cl + andl $255,%edx + movl 72(%ebp,%eax,4),%eax + movl 1096(%ebp,%ebx,4),%ebx + addl %eax,%ebx + movl 2120(%ebp,%ecx,4),%eax + xorl %eax,%ebx + movl 3144(%ebp,%edx,4),%edx + addl %edx,%ebx + xorl %eax,%eax + xorl %ebx,%edi + + + movl 48(%ebp),%edx + movl %edi,%ebx + xorl %edx,%esi + shrl $16,%ebx + movl %edi,%edx + movb %bh,%al + andl $255,%ebx + movb %dh,%cl + andl $255,%edx + movl 72(%ebp,%eax,4),%eax + movl 1096(%ebp,%ebx,4),%ebx + addl %eax,%ebx + movl 2120(%ebp,%ecx,4),%eax + xorl %eax,%ebx + movl 3144(%ebp,%edx,4),%edx + addl %edx,%ebx + xorl %eax,%eax + xorl %ebx,%esi + + + movl 44(%ebp),%edx + movl %esi,%ebx + xorl %edx,%edi + shrl $16,%ebx + movl %esi,%edx + movb %bh,%al + andl $255,%ebx + movb %dh,%cl + andl $255,%edx + movl 72(%ebp,%eax,4),%eax + movl 1096(%ebp,%ebx,4),%ebx + addl %eax,%ebx + movl 2120(%ebp,%ecx,4),%eax + xorl %eax,%ebx + movl 3144(%ebp,%edx,4),%edx + addl %edx,%ebx + xorl %eax,%eax + xorl %ebx,%edi + + + movl 40(%ebp),%edx + movl %edi,%ebx + xorl %edx,%esi + shrl $16,%ebx + movl %edi,%edx + movb %bh,%al + andl $255,%ebx + movb %dh,%cl + andl $255,%edx + movl 72(%ebp,%eax,4),%eax + movl 1096(%ebp,%ebx,4),%ebx + addl %eax,%ebx + movl 2120(%ebp,%ecx,4),%eax + xorl %eax,%ebx + movl 3144(%ebp,%edx,4),%edx + addl %edx,%ebx + xorl %eax,%eax + xorl %ebx,%esi + + + movl 36(%ebp),%edx + movl %esi,%ebx + xorl %edx,%edi + shrl $16,%ebx + movl %esi,%edx + movb %bh,%al + andl $255,%ebx + movb %dh,%cl + andl $255,%edx + movl 72(%ebp,%eax,4),%eax + movl 1096(%ebp,%ebx,4),%ebx + addl %eax,%ebx + movl 2120(%ebp,%ecx,4),%eax + xorl %eax,%ebx + movl 3144(%ebp,%edx,4),%edx + addl %edx,%ebx + xorl %eax,%eax + xorl %ebx,%edi + + + movl 32(%ebp),%edx + movl %edi,%ebx + xorl %edx,%esi + shrl $16,%ebx + movl %edi,%edx + movb %bh,%al + andl $255,%ebx + movb %dh,%cl + andl $255,%edx + movl 72(%ebp,%eax,4),%eax + movl 1096(%ebp,%ebx,4),%ebx + addl %eax,%ebx + movl 2120(%ebp,%ecx,4),%eax + xorl %eax,%ebx + movl 3144(%ebp,%edx,4),%edx + addl %edx,%ebx + xorl %eax,%eax + xorl %ebx,%esi + + + movl 28(%ebp),%edx + movl %esi,%ebx + xorl %edx,%edi + shrl $16,%ebx + movl %esi,%edx + movb %bh,%al + andl $255,%ebx + movb %dh,%cl + andl $255,%edx + movl 72(%ebp,%eax,4),%eax + movl 1096(%ebp,%ebx,4),%ebx + addl %eax,%ebx + movl 2120(%ebp,%ecx,4),%eax + xorl %eax,%ebx + movl 3144(%ebp,%edx,4),%edx + addl %edx,%ebx + xorl %eax,%eax + xorl %ebx,%edi + + + movl 24(%ebp),%edx + movl %edi,%ebx + xorl %edx,%esi + shrl $16,%ebx + movl %edi,%edx + movb %bh,%al + andl $255,%ebx + movb %dh,%cl + andl $255,%edx + movl 72(%ebp,%eax,4),%eax + movl 1096(%ebp,%ebx,4),%ebx + addl %eax,%ebx + movl 2120(%ebp,%ecx,4),%eax + xorl %eax,%ebx + movl 3144(%ebp,%edx,4),%edx + addl %edx,%ebx + xorl %eax,%eax + xorl %ebx,%esi + + + movl 20(%ebp),%edx + movl %esi,%ebx + xorl %edx,%edi + shrl $16,%ebx + movl %esi,%edx + movb %bh,%al + andl $255,%ebx + movb %dh,%cl + andl $255,%edx + movl 72(%ebp,%eax,4),%eax + movl 1096(%ebp,%ebx,4),%ebx + addl %eax,%ebx + movl 2120(%ebp,%ecx,4),%eax + xorl %eax,%ebx + movl 3144(%ebp,%edx,4),%edx + addl %edx,%ebx + xorl %eax,%eax + xorl %ebx,%edi + + + movl 16(%ebp),%edx + movl %edi,%ebx + xorl %edx,%esi + shrl $16,%ebx + movl %edi,%edx + movb %bh,%al + andl $255,%ebx + movb %dh,%cl + andl $255,%edx + movl 72(%ebp,%eax,4),%eax + movl 1096(%ebp,%ebx,4),%ebx + addl %eax,%ebx + movl 2120(%ebp,%ecx,4),%eax + xorl %eax,%ebx + movl 3144(%ebp,%edx,4),%edx + addl %edx,%ebx + xorl %eax,%eax + xorl %ebx,%esi + + + movl 12(%ebp),%edx + movl %esi,%ebx + xorl %edx,%edi + shrl $16,%ebx + movl %esi,%edx + movb %bh,%al + andl $255,%ebx + movb %dh,%cl + andl $255,%edx + movl 72(%ebp,%eax,4),%eax + movl 1096(%ebp,%ebx,4),%ebx + addl %eax,%ebx + movl 2120(%ebp,%ecx,4),%eax + xorl %eax,%ebx + movl 3144(%ebp,%edx,4),%edx + addl %edx,%ebx + xorl %eax,%eax + xorl %ebx,%edi + + + movl 8(%ebp),%edx + movl %edi,%ebx + xorl %edx,%esi + shrl $16,%ebx + movl %edi,%edx + movb %bh,%al + andl $255,%ebx + movb %dh,%cl + andl $255,%edx + movl 72(%ebp,%eax,4),%eax + movl 1096(%ebp,%ebx,4),%ebx + addl %eax,%ebx + movl 2120(%ebp,%ecx,4),%eax + xorl %eax,%ebx + movl 3144(%ebp,%edx,4),%edx + addl %edx,%ebx + xorl %eax,%eax + xorl %ebx,%esi + + + movl 4(%ebp),%edx + movl %esi,%ebx + xorl %edx,%edi + shrl $16,%ebx + movl %esi,%edx + movb %bh,%al + andl $255,%ebx + movb %dh,%cl + andl $255,%edx + movl 72(%ebp,%eax,4),%eax + movl 1096(%ebp,%ebx,4),%ebx + addl %eax,%ebx + movl 2120(%ebp,%ecx,4),%eax + xorl %eax,%ebx + movl 3144(%ebp,%edx,4),%edx + addl %edx,%ebx + + movl 20(%esp),%eax + xorl %ebx,%edi + movl (%ebp),%edx + xorl %edx,%esi + movl %edi,4(%eax) + movl %esi,(%eax) + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.size BF_decrypt,.-.L_BF_decrypt_begin +.globl BF_cbc_encrypt +.type BF_cbc_encrypt,@function +.align 16 +BF_cbc_encrypt: +.L_BF_cbc_encrypt_begin: + + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 28(%esp),%ebp + + movl 36(%esp),%ebx + movl (%ebx),%esi + movl 4(%ebx),%edi + pushl %edi + pushl %esi + pushl %edi + pushl %esi + movl %esp,%ebx + movl 36(%esp),%esi + movl 40(%esp),%edi + + movl 56(%esp),%ecx + + movl 48(%esp),%eax + pushl %eax + pushl %ebx + cmpl $0,%ecx + jz .L000decrypt + andl $4294967288,%ebp + movl 8(%esp),%eax + movl 12(%esp),%ebx + jz .L001encrypt_finish +.L002encrypt_loop: + movl (%esi),%ecx + movl 4(%esi),%edx + xorl %ecx,%eax + xorl %edx,%ebx + bswap %eax + bswap %ebx + movl %eax,8(%esp) + movl %ebx,12(%esp) + call .L_BF_encrypt_begin + movl 8(%esp),%eax + movl 12(%esp),%ebx + bswap %eax + bswap %ebx + movl %eax,(%edi) + movl %ebx,4(%edi) + addl $8,%esi + addl $8,%edi + subl $8,%ebp + jnz .L002encrypt_loop +.L001encrypt_finish: + movl 52(%esp),%ebp + andl $7,%ebp + jz .L003finish + call .L004PIC_point +.L004PIC_point: + popl %edx + leal .L005cbc_enc_jmp_table-.L004PIC_point(%edx),%ecx + movl (%ecx,%ebp,4),%ebp + addl %edx,%ebp + xorl %ecx,%ecx + xorl %edx,%edx + jmp *%ebp +.L006ej7: + movb 6(%esi),%dh + shll $8,%edx +.L007ej6: + movb 5(%esi),%dh +.L008ej5: + movb 4(%esi),%dl +.L009ej4: + movl (%esi),%ecx + jmp .L010ejend +.L011ej3: + movb 2(%esi),%ch + shll $8,%ecx +.L012ej2: + movb 1(%esi),%ch +.L013ej1: + movb (%esi),%cl +.L010ejend: + xorl %ecx,%eax + xorl %edx,%ebx + bswap %eax + bswap %ebx + movl %eax,8(%esp) + movl %ebx,12(%esp) + call .L_BF_encrypt_begin + movl 8(%esp),%eax + movl 12(%esp),%ebx + bswap %eax + bswap %ebx + movl %eax,(%edi) + movl %ebx,4(%edi) + jmp .L003finish +.L000decrypt: + andl $4294967288,%ebp + movl 16(%esp),%eax + movl 20(%esp),%ebx + jz .L014decrypt_finish +.L015decrypt_loop: + movl (%esi),%eax + movl 4(%esi),%ebx + bswap %eax + bswap %ebx + movl %eax,8(%esp) + movl %ebx,12(%esp) + call .L_BF_decrypt_begin + movl 8(%esp),%eax + movl 12(%esp),%ebx + bswap %eax + bswap %ebx + movl 16(%esp),%ecx + movl 20(%esp),%edx + xorl %eax,%ecx + xorl %ebx,%edx + movl (%esi),%eax + movl 4(%esi),%ebx + movl %ecx,(%edi) + movl %edx,4(%edi) + movl %eax,16(%esp) + movl %ebx,20(%esp) + addl $8,%esi + addl $8,%edi + subl $8,%ebp + jnz .L015decrypt_loop +.L014decrypt_finish: + movl 52(%esp),%ebp + andl $7,%ebp + jz .L003finish + movl (%esi),%eax + movl 4(%esi),%ebx + bswap %eax + bswap %ebx + movl %eax,8(%esp) + movl %ebx,12(%esp) + call .L_BF_decrypt_begin + movl 8(%esp),%eax + movl 12(%esp),%ebx + bswap %eax + bswap %ebx + movl 16(%esp),%ecx + movl 20(%esp),%edx + xorl %eax,%ecx + xorl %ebx,%edx + movl (%esi),%eax + movl 4(%esi),%ebx +.L016dj7: + rorl $16,%edx + movb %dl,6(%edi) + shrl $16,%edx +.L017dj6: + movb %dh,5(%edi) +.L018dj5: + movb %dl,4(%edi) +.L019dj4: + movl %ecx,(%edi) + jmp .L020djend +.L021dj3: + rorl $16,%ecx + movb %cl,2(%edi) + shll $16,%ecx +.L022dj2: + movb %ch,1(%esi) +.L023dj1: + movb %cl,(%esi) +.L020djend: + jmp .L003finish +.L003finish: + movl 60(%esp),%ecx + addl $24,%esp + movl %eax,(%ecx) + movl %ebx,4(%ecx) + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.align 64 +.L005cbc_enc_jmp_table: +.long 0 +.long .L013ej1-.L004PIC_point +.long .L012ej2-.L004PIC_point +.long .L011ej3-.L004PIC_point +.long .L009ej4-.L004PIC_point +.long .L008ej5-.L004PIC_point +.long .L007ej6-.L004PIC_point +.long .L006ej7-.L004PIC_point +.align 64 +.size BF_cbc_encrypt,.-.L_BF_cbc_encrypt_begin diff --git a/deps/openssl/asm/x86-elf-gas/bn/bn-586.s b/deps/openssl/asm/x86-elf-gas/bn/bn-586.s new file mode 100644 index 00000000000000..3e20e9df09cc2a --- /dev/null +++ b/deps/openssl/asm/x86-elf-gas/bn/bn-586.s @@ -0,0 +1,1521 @@ +.file "../openssl/crypto/bn/asm/bn-586.s" +.text +.globl bn_mul_add_words +.type bn_mul_add_words,@function +.align 16 +bn_mul_add_words: +.L_bn_mul_add_words_begin: + leal OPENSSL_ia32cap_P,%eax + btl $26,(%eax) + jnc .L000maw_non_sse2 + movl 4(%esp),%eax + movl 8(%esp),%edx + movl 12(%esp),%ecx + movd 16(%esp),%mm0 + pxor %mm1,%mm1 + jmp .L001maw_sse2_entry +.align 16 +.L002maw_sse2_unrolled: + movd (%eax),%mm3 + paddq %mm3,%mm1 + movd (%edx),%mm2 + pmuludq %mm0,%mm2 + movd 4(%edx),%mm4 + pmuludq %mm0,%mm4 + movd 8(%edx),%mm6 + pmuludq %mm0,%mm6 + movd 12(%edx),%mm7 + pmuludq %mm0,%mm7 + paddq %mm2,%mm1 + movd 4(%eax),%mm3 + paddq %mm4,%mm3 + movd 8(%eax),%mm5 + paddq %mm6,%mm5 + movd 12(%eax),%mm4 + paddq %mm4,%mm7 + movd %mm1,(%eax) + movd 16(%edx),%mm2 + pmuludq %mm0,%mm2 + psrlq $32,%mm1 + movd 20(%edx),%mm4 + pmuludq %mm0,%mm4 + paddq %mm3,%mm1 + movd 24(%edx),%mm6 + pmuludq %mm0,%mm6 + movd %mm1,4(%eax) + psrlq $32,%mm1 + movd 28(%edx),%mm3 + addl $32,%edx + pmuludq %mm0,%mm3 + paddq %mm5,%mm1 + movd 16(%eax),%mm5 + paddq %mm5,%mm2 + movd %mm1,8(%eax) + psrlq $32,%mm1 + paddq %mm7,%mm1 + movd 20(%eax),%mm5 + paddq %mm5,%mm4 + movd %mm1,12(%eax) + psrlq $32,%mm1 + paddq %mm2,%mm1 + movd 24(%eax),%mm5 + paddq %mm5,%mm6 + movd %mm1,16(%eax) + psrlq $32,%mm1 + paddq %mm4,%mm1 + movd 28(%eax),%mm5 + paddq %mm5,%mm3 + movd %mm1,20(%eax) + psrlq $32,%mm1 + paddq %mm6,%mm1 + movd %mm1,24(%eax) + psrlq $32,%mm1 + paddq %mm3,%mm1 + movd %mm1,28(%eax) + leal 32(%eax),%eax + psrlq $32,%mm1 + subl $8,%ecx + jz .L003maw_sse2_exit +.L001maw_sse2_entry: + testl $4294967288,%ecx + jnz .L002maw_sse2_unrolled +.align 4 +.L004maw_sse2_loop: + movd (%edx),%mm2 + movd (%eax),%mm3 + pmuludq %mm0,%mm2 + leal 4(%edx),%edx + paddq %mm3,%mm1 + paddq %mm2,%mm1 + movd %mm1,(%eax) + subl $1,%ecx + psrlq $32,%mm1 + leal 4(%eax),%eax + jnz .L004maw_sse2_loop +.L003maw_sse2_exit: + movd %mm1,%eax + emms + ret +.align 16 +.L000maw_non_sse2: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + + xorl %esi,%esi + movl 20(%esp),%edi + movl 28(%esp),%ecx + movl 24(%esp),%ebx + andl $4294967288,%ecx + movl 32(%esp),%ebp + pushl %ecx + jz .L005maw_finish +.align 16 +.L006maw_loop: + + movl (%ebx),%eax + mull %ebp + addl %esi,%eax + adcl $0,%edx + addl (%edi),%eax + adcl $0,%edx + movl %eax,(%edi) + movl %edx,%esi + + movl 4(%ebx),%eax + mull %ebp + addl %esi,%eax + adcl $0,%edx + addl 4(%edi),%eax + adcl $0,%edx + movl %eax,4(%edi) + movl %edx,%esi + + movl 8(%ebx),%eax + mull %ebp + addl %esi,%eax + adcl $0,%edx + addl 8(%edi),%eax + adcl $0,%edx + movl %eax,8(%edi) + movl %edx,%esi + + movl 12(%ebx),%eax + mull %ebp + addl %esi,%eax + adcl $0,%edx + addl 12(%edi),%eax + adcl $0,%edx + movl %eax,12(%edi) + movl %edx,%esi + + movl 16(%ebx),%eax + mull %ebp + addl %esi,%eax + adcl $0,%edx + addl 16(%edi),%eax + adcl $0,%edx + movl %eax,16(%edi) + movl %edx,%esi + + movl 20(%ebx),%eax + mull %ebp + addl %esi,%eax + adcl $0,%edx + addl 20(%edi),%eax + adcl $0,%edx + movl %eax,20(%edi) + movl %edx,%esi + + movl 24(%ebx),%eax + mull %ebp + addl %esi,%eax + adcl $0,%edx + addl 24(%edi),%eax + adcl $0,%edx + movl %eax,24(%edi) + movl %edx,%esi + + movl 28(%ebx),%eax + mull %ebp + addl %esi,%eax + adcl $0,%edx + addl 28(%edi),%eax + adcl $0,%edx + movl %eax,28(%edi) + movl %edx,%esi + + subl $8,%ecx + leal 32(%ebx),%ebx + leal 32(%edi),%edi + jnz .L006maw_loop +.L005maw_finish: + movl 32(%esp),%ecx + andl $7,%ecx + jnz .L007maw_finish2 + jmp .L008maw_end +.L007maw_finish2: + + movl (%ebx),%eax + mull %ebp + addl %esi,%eax + adcl $0,%edx + addl (%edi),%eax + adcl $0,%edx + decl %ecx + movl %eax,(%edi) + movl %edx,%esi + jz .L008maw_end + + movl 4(%ebx),%eax + mull %ebp + addl %esi,%eax + adcl $0,%edx + addl 4(%edi),%eax + adcl $0,%edx + decl %ecx + movl %eax,4(%edi) + movl %edx,%esi + jz .L008maw_end + + movl 8(%ebx),%eax + mull %ebp + addl %esi,%eax + adcl $0,%edx + addl 8(%edi),%eax + adcl $0,%edx + decl %ecx + movl %eax,8(%edi) + movl %edx,%esi + jz .L008maw_end + + movl 12(%ebx),%eax + mull %ebp + addl %esi,%eax + adcl $0,%edx + addl 12(%edi),%eax + adcl $0,%edx + decl %ecx + movl %eax,12(%edi) + movl %edx,%esi + jz .L008maw_end + + movl 16(%ebx),%eax + mull %ebp + addl %esi,%eax + adcl $0,%edx + addl 16(%edi),%eax + adcl $0,%edx + decl %ecx + movl %eax,16(%edi) + movl %edx,%esi + jz .L008maw_end + + movl 20(%ebx),%eax + mull %ebp + addl %esi,%eax + adcl $0,%edx + addl 20(%edi),%eax + adcl $0,%edx + decl %ecx + movl %eax,20(%edi) + movl %edx,%esi + jz .L008maw_end + + movl 24(%ebx),%eax + mull %ebp + addl %esi,%eax + adcl $0,%edx + addl 24(%edi),%eax + adcl $0,%edx + movl %eax,24(%edi) + movl %edx,%esi +.L008maw_end: + movl %esi,%eax + popl %ecx + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.size bn_mul_add_words,.-.L_bn_mul_add_words_begin +.globl bn_mul_words +.type bn_mul_words,@function +.align 16 +bn_mul_words: +.L_bn_mul_words_begin: + leal OPENSSL_ia32cap_P,%eax + btl $26,(%eax) + jnc .L009mw_non_sse2 + movl 4(%esp),%eax + movl 8(%esp),%edx + movl 12(%esp),%ecx + movd 16(%esp),%mm0 + pxor %mm1,%mm1 +.align 16 +.L010mw_sse2_loop: + movd (%edx),%mm2 + pmuludq %mm0,%mm2 + leal 4(%edx),%edx + paddq %mm2,%mm1 + movd %mm1,(%eax) + subl $1,%ecx + psrlq $32,%mm1 + leal 4(%eax),%eax + jnz .L010mw_sse2_loop + movd %mm1,%eax + emms + ret +.align 16 +.L009mw_non_sse2: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + + xorl %esi,%esi + movl 20(%esp),%edi + movl 24(%esp),%ebx + movl 28(%esp),%ebp + movl 32(%esp),%ecx + andl $4294967288,%ebp + jz .L011mw_finish +.L012mw_loop: + + movl (%ebx),%eax + mull %ecx + addl %esi,%eax + adcl $0,%edx + movl %eax,(%edi) + movl %edx,%esi + + movl 4(%ebx),%eax + mull %ecx + addl %esi,%eax + adcl $0,%edx + movl %eax,4(%edi) + movl %edx,%esi + + movl 8(%ebx),%eax + mull %ecx + addl %esi,%eax + adcl $0,%edx + movl %eax,8(%edi) + movl %edx,%esi + + movl 12(%ebx),%eax + mull %ecx + addl %esi,%eax + adcl $0,%edx + movl %eax,12(%edi) + movl %edx,%esi + + movl 16(%ebx),%eax + mull %ecx + addl %esi,%eax + adcl $0,%edx + movl %eax,16(%edi) + movl %edx,%esi + + movl 20(%ebx),%eax + mull %ecx + addl %esi,%eax + adcl $0,%edx + movl %eax,20(%edi) + movl %edx,%esi + + movl 24(%ebx),%eax + mull %ecx + addl %esi,%eax + adcl $0,%edx + movl %eax,24(%edi) + movl %edx,%esi + + movl 28(%ebx),%eax + mull %ecx + addl %esi,%eax + adcl $0,%edx + movl %eax,28(%edi) + movl %edx,%esi + + addl $32,%ebx + addl $32,%edi + subl $8,%ebp + jz .L011mw_finish + jmp .L012mw_loop +.L011mw_finish: + movl 28(%esp),%ebp + andl $7,%ebp + jnz .L013mw_finish2 + jmp .L014mw_end +.L013mw_finish2: + + movl (%ebx),%eax + mull %ecx + addl %esi,%eax + adcl $0,%edx + movl %eax,(%edi) + movl %edx,%esi + decl %ebp + jz .L014mw_end + + movl 4(%ebx),%eax + mull %ecx + addl %esi,%eax + adcl $0,%edx + movl %eax,4(%edi) + movl %edx,%esi + decl %ebp + jz .L014mw_end + + movl 8(%ebx),%eax + mull %ecx + addl %esi,%eax + adcl $0,%edx + movl %eax,8(%edi) + movl %edx,%esi + decl %ebp + jz .L014mw_end + + movl 12(%ebx),%eax + mull %ecx + addl %esi,%eax + adcl $0,%edx + movl %eax,12(%edi) + movl %edx,%esi + decl %ebp + jz .L014mw_end + + movl 16(%ebx),%eax + mull %ecx + addl %esi,%eax + adcl $0,%edx + movl %eax,16(%edi) + movl %edx,%esi + decl %ebp + jz .L014mw_end + + movl 20(%ebx),%eax + mull %ecx + addl %esi,%eax + adcl $0,%edx + movl %eax,20(%edi) + movl %edx,%esi + decl %ebp + jz .L014mw_end + + movl 24(%ebx),%eax + mull %ecx + addl %esi,%eax + adcl $0,%edx + movl %eax,24(%edi) + movl %edx,%esi +.L014mw_end: + movl %esi,%eax + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.size bn_mul_words,.-.L_bn_mul_words_begin +.globl bn_sqr_words +.type bn_sqr_words,@function +.align 16 +bn_sqr_words: +.L_bn_sqr_words_begin: + leal OPENSSL_ia32cap_P,%eax + btl $26,(%eax) + jnc .L015sqr_non_sse2 + movl 4(%esp),%eax + movl 8(%esp),%edx + movl 12(%esp),%ecx +.align 16 +.L016sqr_sse2_loop: + movd (%edx),%mm0 + pmuludq %mm0,%mm0 + leal 4(%edx),%edx + movq %mm0,(%eax) + subl $1,%ecx + leal 8(%eax),%eax + jnz .L016sqr_sse2_loop + emms + ret +.align 16 +.L015sqr_non_sse2: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + + movl 20(%esp),%esi + movl 24(%esp),%edi + movl 28(%esp),%ebx + andl $4294967288,%ebx + jz .L017sw_finish +.L018sw_loop: + + movl (%edi),%eax + mull %eax + movl %eax,(%esi) + movl %edx,4(%esi) + + movl 4(%edi),%eax + mull %eax + movl %eax,8(%esi) + movl %edx,12(%esi) + + movl 8(%edi),%eax + mull %eax + movl %eax,16(%esi) + movl %edx,20(%esi) + + movl 12(%edi),%eax + mull %eax + movl %eax,24(%esi) + movl %edx,28(%esi) + + movl 16(%edi),%eax + mull %eax + movl %eax,32(%esi) + movl %edx,36(%esi) + + movl 20(%edi),%eax + mull %eax + movl %eax,40(%esi) + movl %edx,44(%esi) + + movl 24(%edi),%eax + mull %eax + movl %eax,48(%esi) + movl %edx,52(%esi) + + movl 28(%edi),%eax + mull %eax + movl %eax,56(%esi) + movl %edx,60(%esi) + + addl $32,%edi + addl $64,%esi + subl $8,%ebx + jnz .L018sw_loop +.L017sw_finish: + movl 28(%esp),%ebx + andl $7,%ebx + jz .L019sw_end + + movl (%edi),%eax + mull %eax + movl %eax,(%esi) + decl %ebx + movl %edx,4(%esi) + jz .L019sw_end + + movl 4(%edi),%eax + mull %eax + movl %eax,8(%esi) + decl %ebx + movl %edx,12(%esi) + jz .L019sw_end + + movl 8(%edi),%eax + mull %eax + movl %eax,16(%esi) + decl %ebx + movl %edx,20(%esi) + jz .L019sw_end + + movl 12(%edi),%eax + mull %eax + movl %eax,24(%esi) + decl %ebx + movl %edx,28(%esi) + jz .L019sw_end + + movl 16(%edi),%eax + mull %eax + movl %eax,32(%esi) + decl %ebx + movl %edx,36(%esi) + jz .L019sw_end + + movl 20(%edi),%eax + mull %eax + movl %eax,40(%esi) + decl %ebx + movl %edx,44(%esi) + jz .L019sw_end + + movl 24(%edi),%eax + mull %eax + movl %eax,48(%esi) + movl %edx,52(%esi) +.L019sw_end: + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.size bn_sqr_words,.-.L_bn_sqr_words_begin +.globl bn_div_words +.type bn_div_words,@function +.align 16 +bn_div_words: +.L_bn_div_words_begin: + movl 4(%esp),%edx + movl 8(%esp),%eax + movl 12(%esp),%ecx + divl %ecx + ret +.size bn_div_words,.-.L_bn_div_words_begin +.globl bn_add_words +.type bn_add_words,@function +.align 16 +bn_add_words: +.L_bn_add_words_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + + movl 20(%esp),%ebx + movl 24(%esp),%esi + movl 28(%esp),%edi + movl 32(%esp),%ebp + xorl %eax,%eax + andl $4294967288,%ebp + jz .L020aw_finish +.L021aw_loop: + + movl (%esi),%ecx + movl (%edi),%edx + addl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + addl %edx,%ecx + adcl $0,%eax + movl %ecx,(%ebx) + + movl 4(%esi),%ecx + movl 4(%edi),%edx + addl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + addl %edx,%ecx + adcl $0,%eax + movl %ecx,4(%ebx) + + movl 8(%esi),%ecx + movl 8(%edi),%edx + addl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + addl %edx,%ecx + adcl $0,%eax + movl %ecx,8(%ebx) + + movl 12(%esi),%ecx + movl 12(%edi),%edx + addl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + addl %edx,%ecx + adcl $0,%eax + movl %ecx,12(%ebx) + + movl 16(%esi),%ecx + movl 16(%edi),%edx + addl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + addl %edx,%ecx + adcl $0,%eax + movl %ecx,16(%ebx) + + movl 20(%esi),%ecx + movl 20(%edi),%edx + addl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + addl %edx,%ecx + adcl $0,%eax + movl %ecx,20(%ebx) + + movl 24(%esi),%ecx + movl 24(%edi),%edx + addl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + addl %edx,%ecx + adcl $0,%eax + movl %ecx,24(%ebx) + + movl 28(%esi),%ecx + movl 28(%edi),%edx + addl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + addl %edx,%ecx + adcl $0,%eax + movl %ecx,28(%ebx) + + addl $32,%esi + addl $32,%edi + addl $32,%ebx + subl $8,%ebp + jnz .L021aw_loop +.L020aw_finish: + movl 32(%esp),%ebp + andl $7,%ebp + jz .L022aw_end + + movl (%esi),%ecx + movl (%edi),%edx + addl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + addl %edx,%ecx + adcl $0,%eax + decl %ebp + movl %ecx,(%ebx) + jz .L022aw_end + + movl 4(%esi),%ecx + movl 4(%edi),%edx + addl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + addl %edx,%ecx + adcl $0,%eax + decl %ebp + movl %ecx,4(%ebx) + jz .L022aw_end + + movl 8(%esi),%ecx + movl 8(%edi),%edx + addl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + addl %edx,%ecx + adcl $0,%eax + decl %ebp + movl %ecx,8(%ebx) + jz .L022aw_end + + movl 12(%esi),%ecx + movl 12(%edi),%edx + addl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + addl %edx,%ecx + adcl $0,%eax + decl %ebp + movl %ecx,12(%ebx) + jz .L022aw_end + + movl 16(%esi),%ecx + movl 16(%edi),%edx + addl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + addl %edx,%ecx + adcl $0,%eax + decl %ebp + movl %ecx,16(%ebx) + jz .L022aw_end + + movl 20(%esi),%ecx + movl 20(%edi),%edx + addl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + addl %edx,%ecx + adcl $0,%eax + decl %ebp + movl %ecx,20(%ebx) + jz .L022aw_end + + movl 24(%esi),%ecx + movl 24(%edi),%edx + addl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + addl %edx,%ecx + adcl $0,%eax + movl %ecx,24(%ebx) +.L022aw_end: + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.size bn_add_words,.-.L_bn_add_words_begin +.globl bn_sub_words +.type bn_sub_words,@function +.align 16 +bn_sub_words: +.L_bn_sub_words_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + + movl 20(%esp),%ebx + movl 24(%esp),%esi + movl 28(%esp),%edi + movl 32(%esp),%ebp + xorl %eax,%eax + andl $4294967288,%ebp + jz .L023aw_finish +.L024aw_loop: + + movl (%esi),%ecx + movl (%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,(%ebx) + + movl 4(%esi),%ecx + movl 4(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,4(%ebx) + + movl 8(%esi),%ecx + movl 8(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,8(%ebx) + + movl 12(%esi),%ecx + movl 12(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,12(%ebx) + + movl 16(%esi),%ecx + movl 16(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,16(%ebx) + + movl 20(%esi),%ecx + movl 20(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,20(%ebx) + + movl 24(%esi),%ecx + movl 24(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,24(%ebx) + + movl 28(%esi),%ecx + movl 28(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,28(%ebx) + + addl $32,%esi + addl $32,%edi + addl $32,%ebx + subl $8,%ebp + jnz .L024aw_loop +.L023aw_finish: + movl 32(%esp),%ebp + andl $7,%ebp + jz .L025aw_end + + movl (%esi),%ecx + movl (%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + decl %ebp + movl %ecx,(%ebx) + jz .L025aw_end + + movl 4(%esi),%ecx + movl 4(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + decl %ebp + movl %ecx,4(%ebx) + jz .L025aw_end + + movl 8(%esi),%ecx + movl 8(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + decl %ebp + movl %ecx,8(%ebx) + jz .L025aw_end + + movl 12(%esi),%ecx + movl 12(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + decl %ebp + movl %ecx,12(%ebx) + jz .L025aw_end + + movl 16(%esi),%ecx + movl 16(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + decl %ebp + movl %ecx,16(%ebx) + jz .L025aw_end + + movl 20(%esi),%ecx + movl 20(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + decl %ebp + movl %ecx,20(%ebx) + jz .L025aw_end + + movl 24(%esi),%ecx + movl 24(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,24(%ebx) +.L025aw_end: + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.size bn_sub_words,.-.L_bn_sub_words_begin +.globl bn_sub_part_words +.type bn_sub_part_words,@function +.align 16 +bn_sub_part_words: +.L_bn_sub_part_words_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + + movl 20(%esp),%ebx + movl 24(%esp),%esi + movl 28(%esp),%edi + movl 32(%esp),%ebp + xorl %eax,%eax + andl $4294967288,%ebp + jz .L026aw_finish +.L027aw_loop: + + movl (%esi),%ecx + movl (%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,(%ebx) + + movl 4(%esi),%ecx + movl 4(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,4(%ebx) + + movl 8(%esi),%ecx + movl 8(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,8(%ebx) + + movl 12(%esi),%ecx + movl 12(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,12(%ebx) + + movl 16(%esi),%ecx + movl 16(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,16(%ebx) + + movl 20(%esi),%ecx + movl 20(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,20(%ebx) + + movl 24(%esi),%ecx + movl 24(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,24(%ebx) + + movl 28(%esi),%ecx + movl 28(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,28(%ebx) + + addl $32,%esi + addl $32,%edi + addl $32,%ebx + subl $8,%ebp + jnz .L027aw_loop +.L026aw_finish: + movl 32(%esp),%ebp + andl $7,%ebp + jz .L028aw_end + + movl (%esi),%ecx + movl (%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,(%ebx) + addl $4,%esi + addl $4,%edi + addl $4,%ebx + decl %ebp + jz .L028aw_end + + movl (%esi),%ecx + movl (%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,(%ebx) + addl $4,%esi + addl $4,%edi + addl $4,%ebx + decl %ebp + jz .L028aw_end + + movl (%esi),%ecx + movl (%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,(%ebx) + addl $4,%esi + addl $4,%edi + addl $4,%ebx + decl %ebp + jz .L028aw_end + + movl (%esi),%ecx + movl (%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,(%ebx) + addl $4,%esi + addl $4,%edi + addl $4,%ebx + decl %ebp + jz .L028aw_end + + movl (%esi),%ecx + movl (%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,(%ebx) + addl $4,%esi + addl $4,%edi + addl $4,%ebx + decl %ebp + jz .L028aw_end + + movl (%esi),%ecx + movl (%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,(%ebx) + addl $4,%esi + addl $4,%edi + addl $4,%ebx + decl %ebp + jz .L028aw_end + + movl (%esi),%ecx + movl (%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,(%ebx) + addl $4,%esi + addl $4,%edi + addl $4,%ebx +.L028aw_end: + cmpl $0,36(%esp) + je .L029pw_end + movl 36(%esp),%ebp + cmpl $0,%ebp + je .L029pw_end + jge .L030pw_pos + + movl $0,%edx + subl %ebp,%edx + movl %edx,%ebp + andl $4294967288,%ebp + jz .L031pw_neg_finish +.L032pw_neg_loop: + + movl $0,%ecx + movl (%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,(%ebx) + + movl $0,%ecx + movl 4(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,4(%ebx) + + movl $0,%ecx + movl 8(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,8(%ebx) + + movl $0,%ecx + movl 12(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,12(%ebx) + + movl $0,%ecx + movl 16(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,16(%ebx) + + movl $0,%ecx + movl 20(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,20(%ebx) + + movl $0,%ecx + movl 24(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,24(%ebx) + + movl $0,%ecx + movl 28(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,28(%ebx) + + addl $32,%edi + addl $32,%ebx + subl $8,%ebp + jnz .L032pw_neg_loop +.L031pw_neg_finish: + movl 36(%esp),%edx + movl $0,%ebp + subl %edx,%ebp + andl $7,%ebp + jz .L029pw_end + + movl $0,%ecx + movl (%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + decl %ebp + movl %ecx,(%ebx) + jz .L029pw_end + + movl $0,%ecx + movl 4(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + decl %ebp + movl %ecx,4(%ebx) + jz .L029pw_end + + movl $0,%ecx + movl 8(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + decl %ebp + movl %ecx,8(%ebx) + jz .L029pw_end + + movl $0,%ecx + movl 12(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + decl %ebp + movl %ecx,12(%ebx) + jz .L029pw_end + + movl $0,%ecx + movl 16(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + decl %ebp + movl %ecx,16(%ebx) + jz .L029pw_end + + movl $0,%ecx + movl 20(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + decl %ebp + movl %ecx,20(%ebx) + jz .L029pw_end + + movl $0,%ecx + movl 24(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,24(%ebx) + jmp .L029pw_end +.L030pw_pos: + andl $4294967288,%ebp + jz .L033pw_pos_finish +.L034pw_pos_loop: + + movl (%esi),%ecx + subl %eax,%ecx + movl %ecx,(%ebx) + jnc .L035pw_nc0 + + movl 4(%esi),%ecx + subl %eax,%ecx + movl %ecx,4(%ebx) + jnc .L036pw_nc1 + + movl 8(%esi),%ecx + subl %eax,%ecx + movl %ecx,8(%ebx) + jnc .L037pw_nc2 + + movl 12(%esi),%ecx + subl %eax,%ecx + movl %ecx,12(%ebx) + jnc .L038pw_nc3 + + movl 16(%esi),%ecx + subl %eax,%ecx + movl %ecx,16(%ebx) + jnc .L039pw_nc4 + + movl 20(%esi),%ecx + subl %eax,%ecx + movl %ecx,20(%ebx) + jnc .L040pw_nc5 + + movl 24(%esi),%ecx + subl %eax,%ecx + movl %ecx,24(%ebx) + jnc .L041pw_nc6 + + movl 28(%esi),%ecx + subl %eax,%ecx + movl %ecx,28(%ebx) + jnc .L042pw_nc7 + + addl $32,%esi + addl $32,%ebx + subl $8,%ebp + jnz .L034pw_pos_loop +.L033pw_pos_finish: + movl 36(%esp),%ebp + andl $7,%ebp + jz .L029pw_end + + movl (%esi),%ecx + subl %eax,%ecx + movl %ecx,(%ebx) + jnc .L043pw_tail_nc0 + decl %ebp + jz .L029pw_end + + movl 4(%esi),%ecx + subl %eax,%ecx + movl %ecx,4(%ebx) + jnc .L044pw_tail_nc1 + decl %ebp + jz .L029pw_end + + movl 8(%esi),%ecx + subl %eax,%ecx + movl %ecx,8(%ebx) + jnc .L045pw_tail_nc2 + decl %ebp + jz .L029pw_end + + movl 12(%esi),%ecx + subl %eax,%ecx + movl %ecx,12(%ebx) + jnc .L046pw_tail_nc3 + decl %ebp + jz .L029pw_end + + movl 16(%esi),%ecx + subl %eax,%ecx + movl %ecx,16(%ebx) + jnc .L047pw_tail_nc4 + decl %ebp + jz .L029pw_end + + movl 20(%esi),%ecx + subl %eax,%ecx + movl %ecx,20(%ebx) + jnc .L048pw_tail_nc5 + decl %ebp + jz .L029pw_end + + movl 24(%esi),%ecx + subl %eax,%ecx + movl %ecx,24(%ebx) + jnc .L049pw_tail_nc6 + movl $1,%eax + jmp .L029pw_end +.L050pw_nc_loop: + movl (%esi),%ecx + movl %ecx,(%ebx) +.L035pw_nc0: + movl 4(%esi),%ecx + movl %ecx,4(%ebx) +.L036pw_nc1: + movl 8(%esi),%ecx + movl %ecx,8(%ebx) +.L037pw_nc2: + movl 12(%esi),%ecx + movl %ecx,12(%ebx) +.L038pw_nc3: + movl 16(%esi),%ecx + movl %ecx,16(%ebx) +.L039pw_nc4: + movl 20(%esi),%ecx + movl %ecx,20(%ebx) +.L040pw_nc5: + movl 24(%esi),%ecx + movl %ecx,24(%ebx) +.L041pw_nc6: + movl 28(%esi),%ecx + movl %ecx,28(%ebx) +.L042pw_nc7: + + addl $32,%esi + addl $32,%ebx + subl $8,%ebp + jnz .L050pw_nc_loop + movl 36(%esp),%ebp + andl $7,%ebp + jz .L051pw_nc_end + movl (%esi),%ecx + movl %ecx,(%ebx) +.L043pw_tail_nc0: + decl %ebp + jz .L051pw_nc_end + movl 4(%esi),%ecx + movl %ecx,4(%ebx) +.L044pw_tail_nc1: + decl %ebp + jz .L051pw_nc_end + movl 8(%esi),%ecx + movl %ecx,8(%ebx) +.L045pw_tail_nc2: + decl %ebp + jz .L051pw_nc_end + movl 12(%esi),%ecx + movl %ecx,12(%ebx) +.L046pw_tail_nc3: + decl %ebp + jz .L051pw_nc_end + movl 16(%esi),%ecx + movl %ecx,16(%ebx) +.L047pw_tail_nc4: + decl %ebp + jz .L051pw_nc_end + movl 20(%esi),%ecx + movl %ecx,20(%ebx) +.L048pw_tail_nc5: + decl %ebp + jz .L051pw_nc_end + movl 24(%esi),%ecx + movl %ecx,24(%ebx) +.L049pw_tail_nc6: +.L051pw_nc_end: + movl $0,%eax +.L029pw_end: + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.size bn_sub_part_words,.-.L_bn_sub_part_words_begin +.comm OPENSSL_ia32cap_P,16,4 diff --git a/deps/openssl/asm/x86-elf-gas/bn/x86.s b/deps/openssl/asm/x86-elf-gas/bn/co-586.s similarity index 56% rename from deps/openssl/asm/x86-elf-gas/bn/x86.s rename to deps/openssl/asm/x86-elf-gas/bn/co-586.s index c41c8e917e6174..1128e3e959a3a6 100644 --- a/deps/openssl/asm/x86-elf-gas/bn/x86.s +++ b/deps/openssl/asm/x86-elf-gas/bn/co-586.s @@ -1,865 +1,5 @@ -.file "../openssl/crypto/bn/asm/x86.s" +.file "../openssl/crypto/bn/asm/co-586.s" .text -.globl bn_mul_add_words -.type bn_mul_add_words,@function -.align 16 -bn_mul_add_words: -.L_bn_mul_add_words_begin: - pushl %ebp - pushl %ebx - pushl %esi - pushl %edi - - xorl %esi,%esi - movl 20(%esp),%edi - movl 28(%esp),%ecx - movl 24(%esp),%ebx - andl $4294967288,%ecx - movl 32(%esp),%ebp - pushl %ecx - jz .L000maw_finish -.L001maw_loop: - movl %ecx,(%esp) - - movl (%ebx),%eax - mull %ebp - addl %esi,%eax - movl (%edi),%esi - adcl $0,%edx - addl %esi,%eax - adcl $0,%edx - movl %eax,(%edi) - movl %edx,%esi - - movl 4(%ebx),%eax - mull %ebp - addl %esi,%eax - movl 4(%edi),%esi - adcl $0,%edx - addl %esi,%eax - adcl $0,%edx - movl %eax,4(%edi) - movl %edx,%esi - - movl 8(%ebx),%eax - mull %ebp - addl %esi,%eax - movl 8(%edi),%esi - adcl $0,%edx - addl %esi,%eax - adcl $0,%edx - movl %eax,8(%edi) - movl %edx,%esi - - movl 12(%ebx),%eax - mull %ebp - addl %esi,%eax - movl 12(%edi),%esi - adcl $0,%edx - addl %esi,%eax - adcl $0,%edx - movl %eax,12(%edi) - movl %edx,%esi - - movl 16(%ebx),%eax - mull %ebp - addl %esi,%eax - movl 16(%edi),%esi - adcl $0,%edx - addl %esi,%eax - adcl $0,%edx - movl %eax,16(%edi) - movl %edx,%esi - - movl 20(%ebx),%eax - mull %ebp - addl %esi,%eax - movl 20(%edi),%esi - adcl $0,%edx - addl %esi,%eax - adcl $0,%edx - movl %eax,20(%edi) - movl %edx,%esi - - movl 24(%ebx),%eax - mull %ebp - addl %esi,%eax - movl 24(%edi),%esi - adcl $0,%edx - addl %esi,%eax - adcl $0,%edx - movl %eax,24(%edi) - movl %edx,%esi - - movl 28(%ebx),%eax - mull %ebp - addl %esi,%eax - movl 28(%edi),%esi - adcl $0,%edx - addl %esi,%eax - adcl $0,%edx - movl %eax,28(%edi) - movl %edx,%esi - - movl (%esp),%ecx - addl $32,%ebx - addl $32,%edi - subl $8,%ecx - jnz .L001maw_loop -.L000maw_finish: - movl 32(%esp),%ecx - andl $7,%ecx - jnz .L002maw_finish2 - jmp .L003maw_end -.L002maw_finish2: - - movl (%ebx),%eax - mull %ebp - addl %esi,%eax - movl (%edi),%esi - adcl $0,%edx - addl %esi,%eax - adcl $0,%edx - decl %ecx - movl %eax,(%edi) - movl %edx,%esi - jz .L003maw_end - - movl 4(%ebx),%eax - mull %ebp - addl %esi,%eax - movl 4(%edi),%esi - adcl $0,%edx - addl %esi,%eax - adcl $0,%edx - decl %ecx - movl %eax,4(%edi) - movl %edx,%esi - jz .L003maw_end - - movl 8(%ebx),%eax - mull %ebp - addl %esi,%eax - movl 8(%edi),%esi - adcl $0,%edx - addl %esi,%eax - adcl $0,%edx - decl %ecx - movl %eax,8(%edi) - movl %edx,%esi - jz .L003maw_end - - movl 12(%ebx),%eax - mull %ebp - addl %esi,%eax - movl 12(%edi),%esi - adcl $0,%edx - addl %esi,%eax - adcl $0,%edx - decl %ecx - movl %eax,12(%edi) - movl %edx,%esi - jz .L003maw_end - - movl 16(%ebx),%eax - mull %ebp - addl %esi,%eax - movl 16(%edi),%esi - adcl $0,%edx - addl %esi,%eax - adcl $0,%edx - decl %ecx - movl %eax,16(%edi) - movl %edx,%esi - jz .L003maw_end - - movl 20(%ebx),%eax - mull %ebp - addl %esi,%eax - movl 20(%edi),%esi - adcl $0,%edx - addl %esi,%eax - adcl $0,%edx - decl %ecx - movl %eax,20(%edi) - movl %edx,%esi - jz .L003maw_end - - movl 24(%ebx),%eax - mull %ebp - addl %esi,%eax - movl 24(%edi),%esi - adcl $0,%edx - addl %esi,%eax - adcl $0,%edx - movl %eax,24(%edi) - movl %edx,%esi -.L003maw_end: - movl %esi,%eax - popl %ecx - popl %edi - popl %esi - popl %ebx - popl %ebp - ret -.size bn_mul_add_words,.-.L_bn_mul_add_words_begin -.globl bn_mul_words -.type bn_mul_words,@function -.align 16 -bn_mul_words: -.L_bn_mul_words_begin: - pushl %ebp - pushl %ebx - pushl %esi - pushl %edi - - xorl %esi,%esi - movl 20(%esp),%edi - movl 24(%esp),%ebx - movl 28(%esp),%ebp - movl 32(%esp),%ecx - andl $4294967288,%ebp - jz .L004mw_finish -.L005mw_loop: - - movl (%ebx),%eax - mull %ecx - addl %esi,%eax - adcl $0,%edx - movl %eax,(%edi) - movl %edx,%esi - - movl 4(%ebx),%eax - mull %ecx - addl %esi,%eax - adcl $0,%edx - movl %eax,4(%edi) - movl %edx,%esi - - movl 8(%ebx),%eax - mull %ecx - addl %esi,%eax - adcl $0,%edx - movl %eax,8(%edi) - movl %edx,%esi - - movl 12(%ebx),%eax - mull %ecx - addl %esi,%eax - adcl $0,%edx - movl %eax,12(%edi) - movl %edx,%esi - - movl 16(%ebx),%eax - mull %ecx - addl %esi,%eax - adcl $0,%edx - movl %eax,16(%edi) - movl %edx,%esi - - movl 20(%ebx),%eax - mull %ecx - addl %esi,%eax - adcl $0,%edx - movl %eax,20(%edi) - movl %edx,%esi - - movl 24(%ebx),%eax - mull %ecx - addl %esi,%eax - adcl $0,%edx - movl %eax,24(%edi) - movl %edx,%esi - - movl 28(%ebx),%eax - mull %ecx - addl %esi,%eax - adcl $0,%edx - movl %eax,28(%edi) - movl %edx,%esi - - addl $32,%ebx - addl $32,%edi - subl $8,%ebp - jz .L004mw_finish - jmp .L005mw_loop -.L004mw_finish: - movl 28(%esp),%ebp - andl $7,%ebp - jnz .L006mw_finish2 - jmp .L007mw_end -.L006mw_finish2: - - movl (%ebx),%eax - mull %ecx - addl %esi,%eax - adcl $0,%edx - movl %eax,(%edi) - movl %edx,%esi - decl %ebp - jz .L007mw_end - - movl 4(%ebx),%eax - mull %ecx - addl %esi,%eax - adcl $0,%edx - movl %eax,4(%edi) - movl %edx,%esi - decl %ebp - jz .L007mw_end - - movl 8(%ebx),%eax - mull %ecx - addl %esi,%eax - adcl $0,%edx - movl %eax,8(%edi) - movl %edx,%esi - decl %ebp - jz .L007mw_end - - movl 12(%ebx),%eax - mull %ecx - addl %esi,%eax - adcl $0,%edx - movl %eax,12(%edi) - movl %edx,%esi - decl %ebp - jz .L007mw_end - - movl 16(%ebx),%eax - mull %ecx - addl %esi,%eax - adcl $0,%edx - movl %eax,16(%edi) - movl %edx,%esi - decl %ebp - jz .L007mw_end - - movl 20(%ebx),%eax - mull %ecx - addl %esi,%eax - adcl $0,%edx - movl %eax,20(%edi) - movl %edx,%esi - decl %ebp - jz .L007mw_end - - movl 24(%ebx),%eax - mull %ecx - addl %esi,%eax - adcl $0,%edx - movl %eax,24(%edi) - movl %edx,%esi -.L007mw_end: - movl %esi,%eax - popl %edi - popl %esi - popl %ebx - popl %ebp - ret -.size bn_mul_words,.-.L_bn_mul_words_begin -.globl bn_sqr_words -.type bn_sqr_words,@function -.align 16 -bn_sqr_words: -.L_bn_sqr_words_begin: - pushl %ebp - pushl %ebx - pushl %esi - pushl %edi - - movl 20(%esp),%esi - movl 24(%esp),%edi - movl 28(%esp),%ebx - andl $4294967288,%ebx - jz .L008sw_finish -.L009sw_loop: - - movl (%edi),%eax - mull %eax - movl %eax,(%esi) - movl %edx,4(%esi) - - movl 4(%edi),%eax - mull %eax - movl %eax,8(%esi) - movl %edx,12(%esi) - - movl 8(%edi),%eax - mull %eax - movl %eax,16(%esi) - movl %edx,20(%esi) - - movl 12(%edi),%eax - mull %eax - movl %eax,24(%esi) - movl %edx,28(%esi) - - movl 16(%edi),%eax - mull %eax - movl %eax,32(%esi) - movl %edx,36(%esi) - - movl 20(%edi),%eax - mull %eax - movl %eax,40(%esi) - movl %edx,44(%esi) - - movl 24(%edi),%eax - mull %eax - movl %eax,48(%esi) - movl %edx,52(%esi) - - movl 28(%edi),%eax - mull %eax - movl %eax,56(%esi) - movl %edx,60(%esi) - - addl $32,%edi - addl $64,%esi - subl $8,%ebx - jnz .L009sw_loop -.L008sw_finish: - movl 28(%esp),%ebx - andl $7,%ebx - jz .L010sw_end - - movl (%edi),%eax - mull %eax - movl %eax,(%esi) - decl %ebx - movl %edx,4(%esi) - jz .L010sw_end - - movl 4(%edi),%eax - mull %eax - movl %eax,8(%esi) - decl %ebx - movl %edx,12(%esi) - jz .L010sw_end - - movl 8(%edi),%eax - mull %eax - movl %eax,16(%esi) - decl %ebx - movl %edx,20(%esi) - jz .L010sw_end - - movl 12(%edi),%eax - mull %eax - movl %eax,24(%esi) - decl %ebx - movl %edx,28(%esi) - jz .L010sw_end - - movl 16(%edi),%eax - mull %eax - movl %eax,32(%esi) - decl %ebx - movl %edx,36(%esi) - jz .L010sw_end - - movl 20(%edi),%eax - mull %eax - movl %eax,40(%esi) - decl %ebx - movl %edx,44(%esi) - jz .L010sw_end - - movl 24(%edi),%eax - mull %eax - movl %eax,48(%esi) - movl %edx,52(%esi) -.L010sw_end: - popl %edi - popl %esi - popl %ebx - popl %ebp - ret -.size bn_sqr_words,.-.L_bn_sqr_words_begin -.globl bn_div_words -.type bn_div_words,@function -.align 16 -bn_div_words: -.L_bn_div_words_begin: - pushl %ebp - pushl %ebx - pushl %esi - pushl %edi - movl 20(%esp),%edx - movl 24(%esp),%eax - movl 28(%esp),%ebx - divl %ebx - popl %edi - popl %esi - popl %ebx - popl %ebp - ret -.size bn_div_words,.-.L_bn_div_words_begin -.globl bn_add_words -.type bn_add_words,@function -.align 16 -bn_add_words: -.L_bn_add_words_begin: - pushl %ebp - pushl %ebx - pushl %esi - pushl %edi - - movl 20(%esp),%ebx - movl 24(%esp),%esi - movl 28(%esp),%edi - movl 32(%esp),%ebp - xorl %eax,%eax - andl $4294967288,%ebp - jz .L011aw_finish -.L012aw_loop: - - movl (%esi),%ecx - movl (%edi),%edx - addl %eax,%ecx - movl $0,%eax - adcl %eax,%eax - addl %edx,%ecx - adcl $0,%eax - movl %ecx,(%ebx) - - movl 4(%esi),%ecx - movl 4(%edi),%edx - addl %eax,%ecx - movl $0,%eax - adcl %eax,%eax - addl %edx,%ecx - adcl $0,%eax - movl %ecx,4(%ebx) - - movl 8(%esi),%ecx - movl 8(%edi),%edx - addl %eax,%ecx - movl $0,%eax - adcl %eax,%eax - addl %edx,%ecx - adcl $0,%eax - movl %ecx,8(%ebx) - - movl 12(%esi),%ecx - movl 12(%edi),%edx - addl %eax,%ecx - movl $0,%eax - adcl %eax,%eax - addl %edx,%ecx - adcl $0,%eax - movl %ecx,12(%ebx) - - movl 16(%esi),%ecx - movl 16(%edi),%edx - addl %eax,%ecx - movl $0,%eax - adcl %eax,%eax - addl %edx,%ecx - adcl $0,%eax - movl %ecx,16(%ebx) - - movl 20(%esi),%ecx - movl 20(%edi),%edx - addl %eax,%ecx - movl $0,%eax - adcl %eax,%eax - addl %edx,%ecx - adcl $0,%eax - movl %ecx,20(%ebx) - - movl 24(%esi),%ecx - movl 24(%edi),%edx - addl %eax,%ecx - movl $0,%eax - adcl %eax,%eax - addl %edx,%ecx - adcl $0,%eax - movl %ecx,24(%ebx) - - movl 28(%esi),%ecx - movl 28(%edi),%edx - addl %eax,%ecx - movl $0,%eax - adcl %eax,%eax - addl %edx,%ecx - adcl $0,%eax - movl %ecx,28(%ebx) - - addl $32,%esi - addl $32,%edi - addl $32,%ebx - subl $8,%ebp - jnz .L012aw_loop -.L011aw_finish: - movl 32(%esp),%ebp - andl $7,%ebp - jz .L013aw_end - - movl (%esi),%ecx - movl (%edi),%edx - addl %eax,%ecx - movl $0,%eax - adcl %eax,%eax - addl %edx,%ecx - adcl $0,%eax - decl %ebp - movl %ecx,(%ebx) - jz .L013aw_end - - movl 4(%esi),%ecx - movl 4(%edi),%edx - addl %eax,%ecx - movl $0,%eax - adcl %eax,%eax - addl %edx,%ecx - adcl $0,%eax - decl %ebp - movl %ecx,4(%ebx) - jz .L013aw_end - - movl 8(%esi),%ecx - movl 8(%edi),%edx - addl %eax,%ecx - movl $0,%eax - adcl %eax,%eax - addl %edx,%ecx - adcl $0,%eax - decl %ebp - movl %ecx,8(%ebx) - jz .L013aw_end - - movl 12(%esi),%ecx - movl 12(%edi),%edx - addl %eax,%ecx - movl $0,%eax - adcl %eax,%eax - addl %edx,%ecx - adcl $0,%eax - decl %ebp - movl %ecx,12(%ebx) - jz .L013aw_end - - movl 16(%esi),%ecx - movl 16(%edi),%edx - addl %eax,%ecx - movl $0,%eax - adcl %eax,%eax - addl %edx,%ecx - adcl $0,%eax - decl %ebp - movl %ecx,16(%ebx) - jz .L013aw_end - - movl 20(%esi),%ecx - movl 20(%edi),%edx - addl %eax,%ecx - movl $0,%eax - adcl %eax,%eax - addl %edx,%ecx - adcl $0,%eax - decl %ebp - movl %ecx,20(%ebx) - jz .L013aw_end - - movl 24(%esi),%ecx - movl 24(%edi),%edx - addl %eax,%ecx - movl $0,%eax - adcl %eax,%eax - addl %edx,%ecx - adcl $0,%eax - movl %ecx,24(%ebx) -.L013aw_end: - popl %edi - popl %esi - popl %ebx - popl %ebp - ret -.size bn_add_words,.-.L_bn_add_words_begin -.globl bn_sub_words -.type bn_sub_words,@function -.align 16 -bn_sub_words: -.L_bn_sub_words_begin: - pushl %ebp - pushl %ebx - pushl %esi - pushl %edi - - movl 20(%esp),%ebx - movl 24(%esp),%esi - movl 28(%esp),%edi - movl 32(%esp),%ebp - xorl %eax,%eax - andl $4294967288,%ebp - jz .L014aw_finish -.L015aw_loop: - - movl (%esi),%ecx - movl (%edi),%edx - subl %eax,%ecx - movl $0,%eax - adcl %eax,%eax - subl %edx,%ecx - adcl $0,%eax - movl %ecx,(%ebx) - - movl 4(%esi),%ecx - movl 4(%edi),%edx - subl %eax,%ecx - movl $0,%eax - adcl %eax,%eax - subl %edx,%ecx - adcl $0,%eax - movl %ecx,4(%ebx) - - movl 8(%esi),%ecx - movl 8(%edi),%edx - subl %eax,%ecx - movl $0,%eax - adcl %eax,%eax - subl %edx,%ecx - adcl $0,%eax - movl %ecx,8(%ebx) - - movl 12(%esi),%ecx - movl 12(%edi),%edx - subl %eax,%ecx - movl $0,%eax - adcl %eax,%eax - subl %edx,%ecx - adcl $0,%eax - movl %ecx,12(%ebx) - - movl 16(%esi),%ecx - movl 16(%edi),%edx - subl %eax,%ecx - movl $0,%eax - adcl %eax,%eax - subl %edx,%ecx - adcl $0,%eax - movl %ecx,16(%ebx) - - movl 20(%esi),%ecx - movl 20(%edi),%edx - subl %eax,%ecx - movl $0,%eax - adcl %eax,%eax - subl %edx,%ecx - adcl $0,%eax - movl %ecx,20(%ebx) - - movl 24(%esi),%ecx - movl 24(%edi),%edx - subl %eax,%ecx - movl $0,%eax - adcl %eax,%eax - subl %edx,%ecx - adcl $0,%eax - movl %ecx,24(%ebx) - - movl 28(%esi),%ecx - movl 28(%edi),%edx - subl %eax,%ecx - movl $0,%eax - adcl %eax,%eax - subl %edx,%ecx - adcl $0,%eax - movl %ecx,28(%ebx) - - addl $32,%esi - addl $32,%edi - addl $32,%ebx - subl $8,%ebp - jnz .L015aw_loop -.L014aw_finish: - movl 32(%esp),%ebp - andl $7,%ebp - jz .L016aw_end - - movl (%esi),%ecx - movl (%edi),%edx - subl %eax,%ecx - movl $0,%eax - adcl %eax,%eax - subl %edx,%ecx - adcl $0,%eax - decl %ebp - movl %ecx,(%ebx) - jz .L016aw_end - - movl 4(%esi),%ecx - movl 4(%edi),%edx - subl %eax,%ecx - movl $0,%eax - adcl %eax,%eax - subl %edx,%ecx - adcl $0,%eax - decl %ebp - movl %ecx,4(%ebx) - jz .L016aw_end - - movl 8(%esi),%ecx - movl 8(%edi),%edx - subl %eax,%ecx - movl $0,%eax - adcl %eax,%eax - subl %edx,%ecx - adcl $0,%eax - decl %ebp - movl %ecx,8(%ebx) - jz .L016aw_end - - movl 12(%esi),%ecx - movl 12(%edi),%edx - subl %eax,%ecx - movl $0,%eax - adcl %eax,%eax - subl %edx,%ecx - adcl $0,%eax - decl %ebp - movl %ecx,12(%ebx) - jz .L016aw_end - - movl 16(%esi),%ecx - movl 16(%edi),%edx - subl %eax,%ecx - movl $0,%eax - adcl %eax,%eax - subl %edx,%ecx - adcl $0,%eax - decl %ebp - movl %ecx,16(%ebx) - jz .L016aw_end - - movl 20(%esi),%ecx - movl 20(%edi),%edx - subl %eax,%ecx - movl $0,%eax - adcl %eax,%eax - subl %edx,%ecx - adcl $0,%eax - decl %ebp - movl %ecx,20(%ebx) - jz .L016aw_end - - movl 24(%esi),%ecx - movl 24(%edi),%edx - subl %eax,%ecx - movl $0,%eax - adcl %eax,%eax - subl %edx,%ecx - adcl $0,%eax - movl %ecx,24(%ebx) -.L016aw_end: - popl %edi - popl %esi - popl %ebx - popl %ebp - ret -.size bn_sub_words,.-.L_bn_sub_words_begin .globl bn_mul_comba8 .type bn_mul_comba8,@function .align 16 diff --git a/deps/openssl/asm/x86-elf-gas/bn/x86-gf2m.s b/deps/openssl/asm/x86-elf-gas/bn/x86-gf2m.s new file mode 100644 index 00000000000000..d8fff7c102714d --- /dev/null +++ b/deps/openssl/asm/x86-elf-gas/bn/x86-gf2m.s @@ -0,0 +1,343 @@ +.file "../openssl/crypto/bn/asm/x86-gf2m.s" +.text +.type _mul_1x1_mmx,@function +.align 16 +_mul_1x1_mmx: + subl $36,%esp + movl %eax,%ecx + leal (%eax,%eax,1),%edx + andl $1073741823,%ecx + leal (%edx,%edx,1),%ebp + movl $0,(%esp) + andl $2147483647,%edx + movd %eax,%mm2 + movd %ebx,%mm3 + movl %ecx,4(%esp) + xorl %edx,%ecx + pxor %mm5,%mm5 + pxor %mm4,%mm4 + movl %edx,8(%esp) + xorl %ebp,%edx + movl %ecx,12(%esp) + pcmpgtd %mm2,%mm5 + paddd %mm2,%mm2 + xorl %edx,%ecx + movl %ebp,16(%esp) + xorl %edx,%ebp + pand %mm3,%mm5 + pcmpgtd %mm2,%mm4 + movl %ecx,20(%esp) + xorl %ecx,%ebp + psllq $31,%mm5 + pand %mm3,%mm4 + movl %edx,24(%esp) + movl $7,%esi + movl %ebp,28(%esp) + movl %esi,%ebp + andl %ebx,%esi + shrl $3,%ebx + movl %ebp,%edi + psllq $30,%mm4 + andl %ebx,%edi + shrl $3,%ebx + movd (%esp,%esi,4),%mm0 + movl %ebp,%esi + andl %ebx,%esi + shrl $3,%ebx + movd (%esp,%edi,4),%mm2 + movl %ebp,%edi + psllq $3,%mm2 + andl %ebx,%edi + shrl $3,%ebx + pxor %mm2,%mm0 + movd (%esp,%esi,4),%mm1 + movl %ebp,%esi + psllq $6,%mm1 + andl %ebx,%esi + shrl $3,%ebx + pxor %mm1,%mm0 + movd (%esp,%edi,4),%mm2 + movl %ebp,%edi + psllq $9,%mm2 + andl %ebx,%edi + shrl $3,%ebx + pxor %mm2,%mm0 + movd (%esp,%esi,4),%mm1 + movl %ebp,%esi + psllq $12,%mm1 + andl %ebx,%esi + shrl $3,%ebx + pxor %mm1,%mm0 + movd (%esp,%edi,4),%mm2 + movl %ebp,%edi + psllq $15,%mm2 + andl %ebx,%edi + shrl $3,%ebx + pxor %mm2,%mm0 + movd (%esp,%esi,4),%mm1 + movl %ebp,%esi + psllq $18,%mm1 + andl %ebx,%esi + shrl $3,%ebx + pxor %mm1,%mm0 + movd (%esp,%edi,4),%mm2 + movl %ebp,%edi + psllq $21,%mm2 + andl %ebx,%edi + shrl $3,%ebx + pxor %mm2,%mm0 + movd (%esp,%esi,4),%mm1 + movl %ebp,%esi + psllq $24,%mm1 + andl %ebx,%esi + shrl $3,%ebx + pxor %mm1,%mm0 + movd (%esp,%edi,4),%mm2 + pxor %mm4,%mm0 + psllq $27,%mm2 + pxor %mm2,%mm0 + movd (%esp,%esi,4),%mm1 + pxor %mm5,%mm0 + psllq $30,%mm1 + addl $36,%esp + pxor %mm1,%mm0 + ret +.size _mul_1x1_mmx,.-_mul_1x1_mmx +.type _mul_1x1_ialu,@function +.align 16 +_mul_1x1_ialu: + subl $36,%esp + movl %eax,%ecx + leal (%eax,%eax,1),%edx + leal (,%eax,4),%ebp + andl $1073741823,%ecx + leal (%eax,%eax,1),%edi + sarl $31,%eax + movl $0,(%esp) + andl $2147483647,%edx + movl %ecx,4(%esp) + xorl %edx,%ecx + movl %edx,8(%esp) + xorl %ebp,%edx + movl %ecx,12(%esp) + xorl %edx,%ecx + movl %ebp,16(%esp) + xorl %edx,%ebp + movl %ecx,20(%esp) + xorl %ecx,%ebp + sarl $31,%edi + andl %ebx,%eax + movl %edx,24(%esp) + andl %ebx,%edi + movl %ebp,28(%esp) + movl %eax,%edx + shll $31,%eax + movl %edi,%ecx + shrl $1,%edx + movl $7,%esi + shll $30,%edi + andl %ebx,%esi + shrl $2,%ecx + xorl %edi,%eax + shrl $3,%ebx + movl $7,%edi + andl %ebx,%edi + shrl $3,%ebx + xorl %ecx,%edx + xorl (%esp,%esi,4),%eax + movl $7,%esi + andl %ebx,%esi + shrl $3,%ebx + movl (%esp,%edi,4),%ebp + movl $7,%edi + movl %ebp,%ecx + shll $3,%ebp + andl %ebx,%edi + shrl $29,%ecx + xorl %ebp,%eax + shrl $3,%ebx + xorl %ecx,%edx + movl (%esp,%esi,4),%ecx + movl $7,%esi + movl %ecx,%ebp + shll $6,%ecx + andl %ebx,%esi + shrl $26,%ebp + xorl %ecx,%eax + shrl $3,%ebx + xorl %ebp,%edx + movl (%esp,%edi,4),%ebp + movl $7,%edi + movl %ebp,%ecx + shll $9,%ebp + andl %ebx,%edi + shrl $23,%ecx + xorl %ebp,%eax + shrl $3,%ebx + xorl %ecx,%edx + movl (%esp,%esi,4),%ecx + movl $7,%esi + movl %ecx,%ebp + shll $12,%ecx + andl %ebx,%esi + shrl $20,%ebp + xorl %ecx,%eax + shrl $3,%ebx + xorl %ebp,%edx + movl (%esp,%edi,4),%ebp + movl $7,%edi + movl %ebp,%ecx + shll $15,%ebp + andl %ebx,%edi + shrl $17,%ecx + xorl %ebp,%eax + shrl $3,%ebx + xorl %ecx,%edx + movl (%esp,%esi,4),%ecx + movl $7,%esi + movl %ecx,%ebp + shll $18,%ecx + andl %ebx,%esi + shrl $14,%ebp + xorl %ecx,%eax + shrl $3,%ebx + xorl %ebp,%edx + movl (%esp,%edi,4),%ebp + movl $7,%edi + movl %ebp,%ecx + shll $21,%ebp + andl %ebx,%edi + shrl $11,%ecx + xorl %ebp,%eax + shrl $3,%ebx + xorl %ecx,%edx + movl (%esp,%esi,4),%ecx + movl $7,%esi + movl %ecx,%ebp + shll $24,%ecx + andl %ebx,%esi + shrl $8,%ebp + xorl %ecx,%eax + shrl $3,%ebx + xorl %ebp,%edx + movl (%esp,%edi,4),%ebp + movl %ebp,%ecx + shll $27,%ebp + movl (%esp,%esi,4),%edi + shrl $5,%ecx + movl %edi,%esi + xorl %ebp,%eax + shll $30,%edi + xorl %ecx,%edx + shrl $2,%esi + xorl %edi,%eax + xorl %esi,%edx + addl $36,%esp + ret +.size _mul_1x1_ialu,.-_mul_1x1_ialu +.globl bn_GF2m_mul_2x2 +.type bn_GF2m_mul_2x2,@function +.align 16 +bn_GF2m_mul_2x2: +.L_bn_GF2m_mul_2x2_begin: + leal OPENSSL_ia32cap_P,%edx + movl (%edx),%eax + movl 4(%edx),%edx + testl $8388608,%eax + jz .L000ialu + testl $16777216,%eax + jz .L001mmx + testl $2,%edx + jz .L001mmx + movups 8(%esp),%xmm0 + shufps $177,%xmm0,%xmm0 +.byte 102,15,58,68,192,1 + movl 4(%esp),%eax + movups %xmm0,(%eax) + ret +.align 16 +.L001mmx: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 24(%esp),%eax + movl 32(%esp),%ebx + call _mul_1x1_mmx + movq %mm0,%mm7 + movl 28(%esp),%eax + movl 36(%esp),%ebx + call _mul_1x1_mmx + movq %mm0,%mm6 + movl 24(%esp),%eax + movl 32(%esp),%ebx + xorl 28(%esp),%eax + xorl 36(%esp),%ebx + call _mul_1x1_mmx + pxor %mm7,%mm0 + movl 20(%esp),%eax + pxor %mm6,%mm0 + movq %mm0,%mm2 + psllq $32,%mm0 + popl %edi + psrlq $32,%mm2 + popl %esi + pxor %mm6,%mm0 + popl %ebx + pxor %mm7,%mm2 + movq %mm0,(%eax) + popl %ebp + movq %mm2,8(%eax) + emms + ret +.align 16 +.L000ialu: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + subl $20,%esp + movl 44(%esp),%eax + movl 52(%esp),%ebx + call _mul_1x1_ialu + movl %eax,8(%esp) + movl %edx,12(%esp) + movl 48(%esp),%eax + movl 56(%esp),%ebx + call _mul_1x1_ialu + movl %eax,(%esp) + movl %edx,4(%esp) + movl 44(%esp),%eax + movl 52(%esp),%ebx + xorl 48(%esp),%eax + xorl 56(%esp),%ebx + call _mul_1x1_ialu + movl 40(%esp),%ebp + movl (%esp),%ebx + movl 4(%esp),%ecx + movl 8(%esp),%edi + movl 12(%esp),%esi + xorl %edx,%eax + xorl %ecx,%edx + xorl %ebx,%eax + movl %ebx,(%ebp) + xorl %edi,%edx + movl %esi,12(%ebp) + xorl %esi,%eax + addl $20,%esp + xorl %esi,%edx + popl %edi + xorl %edx,%eax + popl %esi + movl %edx,8(%ebp) + popl %ebx + movl %eax,4(%ebp) + popl %ebp + ret +.size bn_GF2m_mul_2x2,.-.L_bn_GF2m_mul_2x2_begin +.byte 71,70,40,50,94,109,41,32,77,117,108,116,105,112,108,105 +.byte 99,97,116,105,111,110,32,102,111,114,32,120,56,54,44,32 +.byte 67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97 +.byte 112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103 +.byte 62,0 +.comm OPENSSL_ia32cap_P,16,4 diff --git a/deps/openssl/asm/x86-elf-gas/bn/x86-mont.s b/deps/openssl/asm/x86-elf-gas/bn/x86-mont.s index d71cc6441c422e..1d815a0472f562 100644 --- a/deps/openssl/asm/x86-elf-gas/bn/x86-mont.s +++ b/deps/openssl/asm/x86-elf-gas/bn/x86-mont.s @@ -42,6 +42,123 @@ bn_mul_mont: movl %esi,20(%esp) leal -3(%edi),%ebx movl %ebp,24(%esp) + leal OPENSSL_ia32cap_P,%eax + btl $26,(%eax) + jnc .L001non_sse2 + movl $-1,%eax + movd %eax,%mm7 + movl 8(%esp),%esi + movl 12(%esp),%edi + movl 16(%esp),%ebp + xorl %edx,%edx + xorl %ecx,%ecx + movd (%edi),%mm4 + movd (%esi),%mm5 + movd (%ebp),%mm3 + pmuludq %mm4,%mm5 + movq %mm5,%mm2 + movq %mm5,%mm0 + pand %mm7,%mm0 + pmuludq 20(%esp),%mm5 + pmuludq %mm5,%mm3 + paddq %mm0,%mm3 + movd 4(%ebp),%mm1 + movd 4(%esi),%mm0 + psrlq $32,%mm2 + psrlq $32,%mm3 + incl %ecx +.align 16 +.L0021st: + pmuludq %mm4,%mm0 + pmuludq %mm5,%mm1 + paddq %mm0,%mm2 + paddq %mm1,%mm3 + movq %mm2,%mm0 + pand %mm7,%mm0 + movd 4(%ebp,%ecx,4),%mm1 + paddq %mm0,%mm3 + movd 4(%esi,%ecx,4),%mm0 + psrlq $32,%mm2 + movd %mm3,28(%esp,%ecx,4) + psrlq $32,%mm3 + leal 1(%ecx),%ecx + cmpl %ebx,%ecx + jl .L0021st + pmuludq %mm4,%mm0 + pmuludq %mm5,%mm1 + paddq %mm0,%mm2 + paddq %mm1,%mm3 + movq %mm2,%mm0 + pand %mm7,%mm0 + paddq %mm0,%mm3 + movd %mm3,28(%esp,%ecx,4) + psrlq $32,%mm2 + psrlq $32,%mm3 + paddq %mm2,%mm3 + movq %mm3,32(%esp,%ebx,4) + incl %edx +.L003outer: + xorl %ecx,%ecx + movd (%edi,%edx,4),%mm4 + movd (%esi),%mm5 + movd 32(%esp),%mm6 + movd (%ebp),%mm3 + pmuludq %mm4,%mm5 + paddq %mm6,%mm5 + movq %mm5,%mm0 + movq %mm5,%mm2 + pand %mm7,%mm0 + pmuludq 20(%esp),%mm5 + pmuludq %mm5,%mm3 + paddq %mm0,%mm3 + movd 36(%esp),%mm6 + movd 4(%ebp),%mm1 + movd 4(%esi),%mm0 + psrlq $32,%mm2 + psrlq $32,%mm3 + paddq %mm6,%mm2 + incl %ecx + decl %ebx +.L004inner: + pmuludq %mm4,%mm0 + pmuludq %mm5,%mm1 + paddq %mm0,%mm2 + paddq %mm1,%mm3 + movq %mm2,%mm0 + movd 36(%esp,%ecx,4),%mm6 + pand %mm7,%mm0 + movd 4(%ebp,%ecx,4),%mm1 + paddq %mm0,%mm3 + movd 4(%esi,%ecx,4),%mm0 + psrlq $32,%mm2 + movd %mm3,28(%esp,%ecx,4) + psrlq $32,%mm3 + paddq %mm6,%mm2 + decl %ebx + leal 1(%ecx),%ecx + jnz .L004inner + movl %ecx,%ebx + pmuludq %mm4,%mm0 + pmuludq %mm5,%mm1 + paddq %mm0,%mm2 + paddq %mm1,%mm3 + movq %mm2,%mm0 + pand %mm7,%mm0 + paddq %mm0,%mm3 + movd %mm3,28(%esp,%ecx,4) + psrlq $32,%mm2 + psrlq $32,%mm3 + movd 36(%esp,%ebx,4),%mm6 + paddq %mm2,%mm3 + paddq %mm6,%mm3 + movq %mm3,32(%esp,%ebx,4) + leal 1(%edx),%edx + cmpl %ebx,%edx + jle .L003outer + emms + jmp .L005common_tail +.align 16 +.L001non_sse2: movl 8(%esp),%esi leal 1(%ebx),%ebp movl 12(%esp),%edi @@ -52,12 +169,12 @@ bn_mul_mont: leal 4(%edi,%ebx,4),%eax orl %edx,%ebp movl (%edi),%edi - jz .L001bn_sqr_mont + jz .L006bn_sqr_mont movl %eax,28(%esp) movl (%esi),%eax xorl %edx,%edx .align 16 -.L002mull: +.L007mull: movl %edx,%ebp mull %edi addl %eax,%ebp @@ -66,7 +183,7 @@ bn_mul_mont: movl (%esi,%ecx,4),%eax cmpl %ebx,%ecx movl %ebp,28(%esp,%ecx,4) - jl .L002mull + jl .L007mull movl %edx,%ebp mull %edi movl 20(%esp),%edi @@ -84,9 +201,9 @@ bn_mul_mont: movl 4(%esi),%eax adcl $0,%edx incl %ecx - jmp .L0032ndmadd + jmp .L0082ndmadd .align 16 -.L0041stmadd: +.L0091stmadd: movl %edx,%ebp mull %edi addl 32(%esp,%ecx,4),%ebp @@ -97,7 +214,7 @@ bn_mul_mont: adcl $0,%edx cmpl %ebx,%ecx movl %ebp,28(%esp,%ecx,4) - jl .L0041stmadd + jl .L0091stmadd movl %edx,%ebp mull %edi addl 32(%esp,%ebx,4),%eax @@ -120,7 +237,7 @@ bn_mul_mont: adcl $0,%edx movl $1,%ecx .align 16 -.L0032ndmadd: +.L0082ndmadd: movl %edx,%ebp mull %edi addl 32(%esp,%ecx,4),%ebp @@ -131,7 +248,7 @@ bn_mul_mont: adcl $0,%edx cmpl %ebx,%ecx movl %ebp,24(%esp,%ecx,4) - jl .L0032ndmadd + jl .L0082ndmadd movl %edx,%ebp mull %edi addl 32(%esp,%ebx,4),%ebp @@ -154,9 +271,9 @@ bn_mul_mont: xorl %ecx,%ecx xorl %edx,%edx movl (%esi),%eax - jmp .L0041stmadd + jmp .L0091stmadd .align 16 -.L001bn_sqr_mont: +.L006bn_sqr_mont: movl %ebx,(%esp) movl %ecx,12(%esp) movl %edi,%eax @@ -167,7 +284,7 @@ bn_mul_mont: andl $1,%ebx incl %ecx .align 16 -.L006sqr: +.L010sqr: movl (%esi,%ecx,4),%eax movl %edx,%ebp mull %edi @@ -179,7 +296,7 @@ bn_mul_mont: cmpl (%esp),%ecx movl %eax,%ebx movl %ebp,28(%esp,%ecx,4) - jl .L006sqr + jl .L010sqr movl (%esi,%ecx,4),%eax movl %edx,%ebp mull %edi @@ -203,7 +320,7 @@ bn_mul_mont: movl 4(%esi),%eax movl $1,%ecx .align 16 -.L0073rdmadd: +.L0113rdmadd: movl %edx,%ebp mull %edi addl 32(%esp,%ecx,4),%ebp @@ -222,7 +339,7 @@ bn_mul_mont: adcl $0,%edx cmpl %ebx,%ecx movl %ebp,24(%esp,%ecx,4) - jl .L0073rdmadd + jl .L0113rdmadd movl %edx,%ebp mull %edi addl 32(%esp,%ebx,4),%ebp @@ -250,12 +367,12 @@ bn_mul_mont: xorl %ebp,%ebp cmpl %ebx,%ecx leal 1(%ecx),%ecx - je .L008sqrlast + je .L012sqrlast movl %edx,%ebx shrl $1,%edx andl $1,%ebx .align 16 -.L009sqradd: +.L013sqradd: movl (%esi,%ecx,4),%eax movl %edx,%ebp mull %edi @@ -271,13 +388,13 @@ bn_mul_mont: cmpl (%esp),%ecx movl %ebp,28(%esp,%ecx,4) movl %eax,%ebx - jle .L009sqradd + jle .L013sqradd movl %edx,%ebp addl %edx,%edx shrl $31,%ebp addl %ebx,%edx adcl $0,%ebp -.L008sqrlast: +.L012sqrlast: movl 20(%esp),%edi movl 16(%esp),%esi imull 32(%esp),%edi @@ -292,7 +409,7 @@ bn_mul_mont: adcl $0,%edx movl $1,%ecx movl 4(%esi),%eax - jmp .L0073rdmadd + jmp .L0113rdmadd .align 16 .L005common_tail: movl 16(%esp),%ebp @@ -302,13 +419,13 @@ bn_mul_mont: movl %ebx,%ecx xorl %edx,%edx .align 16 -.L010sub: +.L014sub: sbbl (%ebp,%edx,4),%eax movl %eax,(%edi,%edx,4) decl %ecx movl 4(%esi,%edx,4),%eax leal 1(%edx),%edx - jge .L010sub + jge .L014sub sbbl $0,%eax andl %eax,%esi notl %eax @@ -316,12 +433,12 @@ bn_mul_mont: andl %eax,%ebp orl %ebp,%esi .align 16 -.L011copy: +.L015copy: movl (%esi,%ebx,4),%eax movl %eax,(%edi,%ebx,4) movl %ecx,32(%esp,%ebx,4) decl %ebx - jge .L011copy + jge .L015copy movl 24(%esp),%esp movl $1,%eax .L000just_leave: @@ -336,3 +453,4 @@ bn_mul_mont: .byte 54,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121 .byte 32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46 .byte 111,114,103,62,0 +.comm OPENSSL_ia32cap_P,16,4 diff --git a/deps/openssl/asm/x86-elf-gas/des/des-586.s b/deps/openssl/asm/x86-elf-gas/des/des-586.s index 2fbd340dae2905..054e2b0d597efa 100644 --- a/deps/openssl/asm/x86-elf-gas/des/des-586.s +++ b/deps/openssl/asm/x86-elf-gas/des/des-586.s @@ -1000,7 +1000,7 @@ DES_encrypt1: call .L000pic_point .L000pic_point: popl %ebp - leal DES_SPtrans-.L000pic_point(%ebp),%ebp + leal .Ldes_sptrans-.L000pic_point(%ebp),%ebp movl 24(%esp),%ecx cmpl $0,%ebx je .L001decrypt @@ -1077,7 +1077,7 @@ DES_encrypt2: call .L003pic_point .L003pic_point: popl %ebp - leal DES_SPtrans-.L003pic_point(%ebp),%ebp + leal .Ldes_sptrans-.L003pic_point(%ebp),%ebp movl 24(%esp),%ecx cmpl $0,%ebx je .L004decrypt @@ -1707,6 +1707,7 @@ DES_ede3_cbc_encrypt: .size DES_ede3_cbc_encrypt,.-.L_DES_ede3_cbc_encrypt_begin .align 64 DES_SPtrans: +.Ldes_sptrans: .long 34080768,524288,33554434,34080770 .long 33554432,526338,524290,33554434 .long 526338,34080768,34078720,2050 diff --git a/deps/openssl/asm/x86-elf-gas/modes/ghash-x86.s b/deps/openssl/asm/x86-elf-gas/modes/ghash-x86.s index cb9ae20dcdda5b..70e9493d1b3737 100644 --- a/deps/openssl/asm/x86-elf-gas/modes/ghash-x86.s +++ b/deps/openssl/asm/x86-elf-gas/modes/ghash-x86.s @@ -203,418 +203,94 @@ gcm_ghash_4bit_x86: popl %ebp ret .size gcm_ghash_4bit_x86,.-.L_gcm_ghash_4bit_x86_begin -.type _mmx_gmult_4bit_inner,@function +.globl gcm_gmult_4bit_mmx +.type gcm_gmult_4bit_mmx,@function .align 16 -_mmx_gmult_4bit_inner: +gcm_gmult_4bit_mmx: +.L_gcm_gmult_4bit_mmx_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 20(%esp),%edi + movl 24(%esp),%esi + call .L005pic_point +.L005pic_point: + popl %eax + leal .Lrem_4bit-.L005pic_point(%eax),%eax + movzbl 15(%edi),%ebx xorl %ecx,%ecx movl %ebx,%edx movb %dl,%cl + movl $14,%ebp shlb $4,%cl andl $240,%edx movq 8(%esi,%ecx,1),%mm0 movq (%esi,%ecx,1),%mm1 - movd %mm0,%ebp - psrlq $4,%mm0 - movq %mm1,%mm2 - psrlq $4,%mm1 - pxor 8(%esi,%edx,1),%mm0 - movb 14(%edi),%cl - psllq $60,%mm2 - andl $15,%ebp - pxor (%esi,%edx,1),%mm1 - movl %ecx,%edx movd %mm0,%ebx - pxor %mm2,%mm0 - shlb $4,%cl + jmp .L006mmx_loop +.align 16 +.L006mmx_loop: psrlq $4,%mm0 - movq %mm1,%mm2 - psrlq $4,%mm1 - pxor 8(%esi,%ecx,1),%mm0 - psllq $60,%mm2 - andl $240,%edx - pxor (%eax,%ebp,8),%mm1 andl $15,%ebx - pxor (%esi,%ecx,1),%mm1 - movd %mm0,%ebp - pxor %mm2,%mm0 - psrlq $4,%mm0 movq %mm1,%mm2 psrlq $4,%mm1 pxor 8(%esi,%edx,1),%mm0 - movb 13(%edi),%cl + movb (%edi,%ebp,1),%cl psllq $60,%mm2 pxor (%eax,%ebx,8),%mm1 - andl $15,%ebp - pxor (%esi,%edx,1),%mm1 - movl %ecx,%edx + decl %ebp movd %mm0,%ebx - pxor %mm2,%mm0 - shlb $4,%cl - psrlq $4,%mm0 - movq %mm1,%mm2 - psrlq $4,%mm1 - pxor 8(%esi,%ecx,1),%mm0 - psllq $60,%mm2 - andl $240,%edx - pxor (%eax,%ebp,8),%mm1 - andl $15,%ebx - pxor (%esi,%ecx,1),%mm1 - movd %mm0,%ebp - pxor %mm2,%mm0 - psrlq $4,%mm0 - movq %mm1,%mm2 - psrlq $4,%mm1 - pxor 8(%esi,%edx,1),%mm0 - movb 12(%edi),%cl - psllq $60,%mm2 - pxor (%eax,%ebx,8),%mm1 - andl $15,%ebp pxor (%esi,%edx,1),%mm1 movl %ecx,%edx - movd %mm0,%ebx pxor %mm2,%mm0 + js .L007mmx_break shlb $4,%cl - psrlq $4,%mm0 - movq %mm1,%mm2 - psrlq $4,%mm1 - pxor 8(%esi,%ecx,1),%mm0 - psllq $60,%mm2 - andl $240,%edx - pxor (%eax,%ebp,8),%mm1 andl $15,%ebx - pxor (%esi,%ecx,1),%mm1 - movd %mm0,%ebp - pxor %mm2,%mm0 - psrlq $4,%mm0 - movq %mm1,%mm2 - psrlq $4,%mm1 - pxor 8(%esi,%edx,1),%mm0 - movb 11(%edi),%cl - psllq $60,%mm2 - pxor (%eax,%ebx,8),%mm1 - andl $15,%ebp - pxor (%esi,%edx,1),%mm1 - movl %ecx,%edx - movd %mm0,%ebx - pxor %mm2,%mm0 - shlb $4,%cl psrlq $4,%mm0 - movq %mm1,%mm2 - psrlq $4,%mm1 - pxor 8(%esi,%ecx,1),%mm0 - psllq $60,%mm2 andl $240,%edx - pxor (%eax,%ebp,8),%mm1 - andl $15,%ebx - pxor (%esi,%ecx,1),%mm1 - movd %mm0,%ebp - pxor %mm2,%mm0 - psrlq $4,%mm0 - movq %mm1,%mm2 - psrlq $4,%mm1 - pxor 8(%esi,%edx,1),%mm0 - movb 10(%edi),%cl - psllq $60,%mm2 - pxor (%eax,%ebx,8),%mm1 - andl $15,%ebp - pxor (%esi,%edx,1),%mm1 - movl %ecx,%edx - movd %mm0,%ebx - pxor %mm2,%mm0 - shlb $4,%cl - psrlq $4,%mm0 movq %mm1,%mm2 psrlq $4,%mm1 pxor 8(%esi,%ecx,1),%mm0 psllq $60,%mm2 - andl $240,%edx - pxor (%eax,%ebp,8),%mm1 - andl $15,%ebx - pxor (%esi,%ecx,1),%mm1 - movd %mm0,%ebp - pxor %mm2,%mm0 - psrlq $4,%mm0 - movq %mm1,%mm2 - psrlq $4,%mm1 - pxor 8(%esi,%edx,1),%mm0 - movb 9(%edi),%cl - psllq $60,%mm2 pxor (%eax,%ebx,8),%mm1 - andl $15,%ebp - pxor (%esi,%edx,1),%mm1 - movl %ecx,%edx movd %mm0,%ebx - pxor %mm2,%mm0 - shlb $4,%cl - psrlq $4,%mm0 - movq %mm1,%mm2 - psrlq $4,%mm1 - pxor 8(%esi,%ecx,1),%mm0 - psllq $60,%mm2 - andl $240,%edx - pxor (%eax,%ebp,8),%mm1 - andl $15,%ebx pxor (%esi,%ecx,1),%mm1 - movd %mm0,%ebp - pxor %mm2,%mm0 - psrlq $4,%mm0 - movq %mm1,%mm2 - psrlq $4,%mm1 - pxor 8(%esi,%edx,1),%mm0 - movb 8(%edi),%cl - psllq $60,%mm2 - pxor (%eax,%ebx,8),%mm1 - andl $15,%ebp - pxor (%esi,%edx,1),%mm1 - movl %ecx,%edx - movd %mm0,%ebx pxor %mm2,%mm0 + jmp .L006mmx_loop +.align 16 +.L007mmx_break: shlb $4,%cl - psrlq $4,%mm0 - movq %mm1,%mm2 - psrlq $4,%mm1 - pxor 8(%esi,%ecx,1),%mm0 - psllq $60,%mm2 - andl $240,%edx - pxor (%eax,%ebp,8),%mm1 andl $15,%ebx - pxor (%esi,%ecx,1),%mm1 - movd %mm0,%ebp - pxor %mm2,%mm0 - psrlq $4,%mm0 - movq %mm1,%mm2 - psrlq $4,%mm1 - pxor 8(%esi,%edx,1),%mm0 - movb 7(%edi),%cl - psllq $60,%mm2 - pxor (%eax,%ebx,8),%mm1 - andl $15,%ebp - pxor (%esi,%edx,1),%mm1 - movl %ecx,%edx - movd %mm0,%ebx - pxor %mm2,%mm0 - shlb $4,%cl psrlq $4,%mm0 - movq %mm1,%mm2 - psrlq $4,%mm1 - pxor 8(%esi,%ecx,1),%mm0 - psllq $60,%mm2 andl $240,%edx - pxor (%eax,%ebp,8),%mm1 - andl $15,%ebx - pxor (%esi,%ecx,1),%mm1 - movd %mm0,%ebp - pxor %mm2,%mm0 - psrlq $4,%mm0 - movq %mm1,%mm2 - psrlq $4,%mm1 - pxor 8(%esi,%edx,1),%mm0 - movb 6(%edi),%cl - psllq $60,%mm2 - pxor (%eax,%ebx,8),%mm1 - andl $15,%ebp - pxor (%esi,%edx,1),%mm1 - movl %ecx,%edx - movd %mm0,%ebx - pxor %mm2,%mm0 - shlb $4,%cl - psrlq $4,%mm0 movq %mm1,%mm2 psrlq $4,%mm1 pxor 8(%esi,%ecx,1),%mm0 psllq $60,%mm2 - andl $240,%edx - pxor (%eax,%ebp,8),%mm1 - andl $15,%ebx - pxor (%esi,%ecx,1),%mm1 - movd %mm0,%ebp - pxor %mm2,%mm0 - psrlq $4,%mm0 - movq %mm1,%mm2 - psrlq $4,%mm1 - pxor 8(%esi,%edx,1),%mm0 - movb 5(%edi),%cl - psllq $60,%mm2 pxor (%eax,%ebx,8),%mm1 - andl $15,%ebp - pxor (%esi,%edx,1),%mm1 - movl %ecx,%edx movd %mm0,%ebx - pxor %mm2,%mm0 - shlb $4,%cl - psrlq $4,%mm0 - movq %mm1,%mm2 - psrlq $4,%mm1 - pxor 8(%esi,%ecx,1),%mm0 - psllq $60,%mm2 - andl $240,%edx - pxor (%eax,%ebp,8),%mm1 - andl $15,%ebx pxor (%esi,%ecx,1),%mm1 - movd %mm0,%ebp pxor %mm2,%mm0 psrlq $4,%mm0 - movq %mm1,%mm2 - psrlq $4,%mm1 - pxor 8(%esi,%edx,1),%mm0 - movb 4(%edi),%cl - psllq $60,%mm2 - pxor (%eax,%ebx,8),%mm1 - andl $15,%ebp - pxor (%esi,%edx,1),%mm1 - movl %ecx,%edx - movd %mm0,%ebx - pxor %mm2,%mm0 - shlb $4,%cl - psrlq $4,%mm0 - movq %mm1,%mm2 - psrlq $4,%mm1 - pxor 8(%esi,%ecx,1),%mm0 - psllq $60,%mm2 - andl $240,%edx - pxor (%eax,%ebp,8),%mm1 - andl $15,%ebx - pxor (%esi,%ecx,1),%mm1 - movd %mm0,%ebp - pxor %mm2,%mm0 - psrlq $4,%mm0 - movq %mm1,%mm2 - psrlq $4,%mm1 - pxor 8(%esi,%edx,1),%mm0 - movb 3(%edi),%cl - psllq $60,%mm2 - pxor (%eax,%ebx,8),%mm1 - andl $15,%ebp - pxor (%esi,%edx,1),%mm1 - movl %ecx,%edx - movd %mm0,%ebx - pxor %mm2,%mm0 - shlb $4,%cl - psrlq $4,%mm0 - movq %mm1,%mm2 - psrlq $4,%mm1 - pxor 8(%esi,%ecx,1),%mm0 - psllq $60,%mm2 - andl $240,%edx - pxor (%eax,%ebp,8),%mm1 - andl $15,%ebx - pxor (%esi,%ecx,1),%mm1 - movd %mm0,%ebp - pxor %mm2,%mm0 - psrlq $4,%mm0 - movq %mm1,%mm2 - psrlq $4,%mm1 - pxor 8(%esi,%edx,1),%mm0 - movb 2(%edi),%cl - psllq $60,%mm2 - pxor (%eax,%ebx,8),%mm1 - andl $15,%ebp - pxor (%esi,%edx,1),%mm1 - movl %ecx,%edx - movd %mm0,%ebx - pxor %mm2,%mm0 - shlb $4,%cl - psrlq $4,%mm0 - movq %mm1,%mm2 - psrlq $4,%mm1 - pxor 8(%esi,%ecx,1),%mm0 - psllq $60,%mm2 - andl $240,%edx - pxor (%eax,%ebp,8),%mm1 - andl $15,%ebx - pxor (%esi,%ecx,1),%mm1 - movd %mm0,%ebp - pxor %mm2,%mm0 - psrlq $4,%mm0 - movq %mm1,%mm2 - psrlq $4,%mm1 - pxor 8(%esi,%edx,1),%mm0 - movb 1(%edi),%cl - psllq $60,%mm2 - pxor (%eax,%ebx,8),%mm1 - andl $15,%ebp - pxor (%esi,%edx,1),%mm1 - movl %ecx,%edx - movd %mm0,%ebx - pxor %mm2,%mm0 - shlb $4,%cl - psrlq $4,%mm0 - movq %mm1,%mm2 - psrlq $4,%mm1 - pxor 8(%esi,%ecx,1),%mm0 - psllq $60,%mm2 - andl $240,%edx - pxor (%eax,%ebp,8),%mm1 andl $15,%ebx - pxor (%esi,%ecx,1),%mm1 - movd %mm0,%ebp - pxor %mm2,%mm0 - psrlq $4,%mm0 movq %mm1,%mm2 psrlq $4,%mm1 pxor 8(%esi,%edx,1),%mm0 - movb (%edi),%cl psllq $60,%mm2 pxor (%eax,%ebx,8),%mm1 - andl $15,%ebp - pxor (%esi,%edx,1),%mm1 - movl %ecx,%edx movd %mm0,%ebx - pxor %mm2,%mm0 - shlb $4,%cl - psrlq $4,%mm0 - movq %mm1,%mm2 - psrlq $4,%mm1 - pxor 8(%esi,%ecx,1),%mm0 - psllq $60,%mm2 - andl $240,%edx - pxor (%eax,%ebp,8),%mm1 - andl $15,%ebx - pxor (%esi,%ecx,1),%mm1 - movd %mm0,%ebp - pxor %mm2,%mm0 - psrlq $4,%mm0 - movq %mm1,%mm2 - psrlq $4,%mm1 - pxor 8(%esi,%edx,1),%mm0 - psllq $60,%mm2 - pxor (%eax,%ebx,8),%mm1 - andl $15,%ebp pxor (%esi,%edx,1),%mm1 - movd %mm0,%ebx pxor %mm2,%mm0 - movl 4(%eax,%ebp,8),%edi psrlq $32,%mm0 movd %mm1,%edx psrlq $32,%mm1 movd %mm0,%ecx movd %mm1,%ebp - shll $4,%edi bswap %ebx bswap %edx bswap %ecx - xorl %edi,%ebp bswap %ebp - ret -.size _mmx_gmult_4bit_inner,.-_mmx_gmult_4bit_inner -.globl gcm_gmult_4bit_mmx -.type gcm_gmult_4bit_mmx,@function -.align 16 -gcm_gmult_4bit_mmx: -.L_gcm_gmult_4bit_mmx_begin: - pushl %ebp - pushl %ebx - pushl %esi - pushl %edi - movl 20(%esp),%edi - movl 24(%esp),%esi - call .L005pic_point -.L005pic_point: - popl %eax - leal .Lrem_4bit-.L005pic_point(%eax),%eax - movzbl 15(%edi),%ebx - call _mmx_gmult_4bit_inner - movl 20(%esp),%edi emms movl %ebx,12(%edi) movl %edx,4(%edi) @@ -635,61 +311,916 @@ gcm_ghash_4bit_mmx: pushl %ebx pushl %esi pushl %edi - movl 20(%esp),%ebp - movl 24(%esp),%esi - movl 28(%esp),%edi - movl 32(%esp),%ecx - call .L006pic_point -.L006pic_point: - popl %eax - leal .Lrem_4bit-.L006pic_point(%eax),%eax - addl %edi,%ecx - movl %ecx,32(%esp) - subl $20,%esp - movl 12(%ebp),%ebx - movl 4(%ebp),%edx - movl 8(%ebp),%ecx - movl (%ebp),%ebp - jmp .L007mmx_outer_loop + movl 20(%esp),%eax + movl 24(%esp),%ebx + movl 28(%esp),%ecx + movl 32(%esp),%edx + movl %esp,%ebp + call .L008pic_point +.L008pic_point: + popl %esi + leal .Lrem_8bit-.L008pic_point(%esi),%esi + subl $544,%esp + andl $-64,%esp + subl $16,%esp + addl %ecx,%edx + movl %eax,544(%esp) + movl %edx,552(%esp) + movl %ebp,556(%esp) + addl $128,%ebx + leal 144(%esp),%edi + leal 400(%esp),%ebp + movl -120(%ebx),%edx + movq -120(%ebx),%mm0 + movq -128(%ebx),%mm3 + shll $4,%edx + movb %dl,(%esp) + movl -104(%ebx),%edx + movq -104(%ebx),%mm2 + movq -112(%ebx),%mm5 + movq %mm0,-128(%edi) + psrlq $4,%mm0 + movq %mm3,(%edi) + movq %mm3,%mm7 + psrlq $4,%mm3 + shll $4,%edx + movb %dl,1(%esp) + movl -88(%ebx),%edx + movq -88(%ebx),%mm1 + psllq $60,%mm7 + movq -96(%ebx),%mm4 + por %mm7,%mm0 + movq %mm2,-120(%edi) + psrlq $4,%mm2 + movq %mm5,8(%edi) + movq %mm5,%mm6 + movq %mm0,-128(%ebp) + psrlq $4,%mm5 + movq %mm3,(%ebp) + shll $4,%edx + movb %dl,2(%esp) + movl -72(%ebx),%edx + movq -72(%ebx),%mm0 + psllq $60,%mm6 + movq -80(%ebx),%mm3 + por %mm6,%mm2 + movq %mm1,-112(%edi) + psrlq $4,%mm1 + movq %mm4,16(%edi) + movq %mm4,%mm7 + movq %mm2,-120(%ebp) + psrlq $4,%mm4 + movq %mm5,8(%ebp) + shll $4,%edx + movb %dl,3(%esp) + movl -56(%ebx),%edx + movq -56(%ebx),%mm2 + psllq $60,%mm7 + movq -64(%ebx),%mm5 + por %mm7,%mm1 + movq %mm0,-104(%edi) + psrlq $4,%mm0 + movq %mm3,24(%edi) + movq %mm3,%mm6 + movq %mm1,-112(%ebp) + psrlq $4,%mm3 + movq %mm4,16(%ebp) + shll $4,%edx + movb %dl,4(%esp) + movl -40(%ebx),%edx + movq -40(%ebx),%mm1 + psllq $60,%mm6 + movq -48(%ebx),%mm4 + por %mm6,%mm0 + movq %mm2,-96(%edi) + psrlq $4,%mm2 + movq %mm5,32(%edi) + movq %mm5,%mm7 + movq %mm0,-104(%ebp) + psrlq $4,%mm5 + movq %mm3,24(%ebp) + shll $4,%edx + movb %dl,5(%esp) + movl -24(%ebx),%edx + movq -24(%ebx),%mm0 + psllq $60,%mm7 + movq -32(%ebx),%mm3 + por %mm7,%mm2 + movq %mm1,-88(%edi) + psrlq $4,%mm1 + movq %mm4,40(%edi) + movq %mm4,%mm6 + movq %mm2,-96(%ebp) + psrlq $4,%mm4 + movq %mm5,32(%ebp) + shll $4,%edx + movb %dl,6(%esp) + movl -8(%ebx),%edx + movq -8(%ebx),%mm2 + psllq $60,%mm6 + movq -16(%ebx),%mm5 + por %mm6,%mm1 + movq %mm0,-80(%edi) + psrlq $4,%mm0 + movq %mm3,48(%edi) + movq %mm3,%mm7 + movq %mm1,-88(%ebp) + psrlq $4,%mm3 + movq %mm4,40(%ebp) + shll $4,%edx + movb %dl,7(%esp) + movl 8(%ebx),%edx + movq 8(%ebx),%mm1 + psllq $60,%mm7 + movq (%ebx),%mm4 + por %mm7,%mm0 + movq %mm2,-72(%edi) + psrlq $4,%mm2 + movq %mm5,56(%edi) + movq %mm5,%mm6 + movq %mm0,-80(%ebp) + psrlq $4,%mm5 + movq %mm3,48(%ebp) + shll $4,%edx + movb %dl,8(%esp) + movl 24(%ebx),%edx + movq 24(%ebx),%mm0 + psllq $60,%mm6 + movq 16(%ebx),%mm3 + por %mm6,%mm2 + movq %mm1,-64(%edi) + psrlq $4,%mm1 + movq %mm4,64(%edi) + movq %mm4,%mm7 + movq %mm2,-72(%ebp) + psrlq $4,%mm4 + movq %mm5,56(%ebp) + shll $4,%edx + movb %dl,9(%esp) + movl 40(%ebx),%edx + movq 40(%ebx),%mm2 + psllq $60,%mm7 + movq 32(%ebx),%mm5 + por %mm7,%mm1 + movq %mm0,-56(%edi) + psrlq $4,%mm0 + movq %mm3,72(%edi) + movq %mm3,%mm6 + movq %mm1,-64(%ebp) + psrlq $4,%mm3 + movq %mm4,64(%ebp) + shll $4,%edx + movb %dl,10(%esp) + movl 56(%ebx),%edx + movq 56(%ebx),%mm1 + psllq $60,%mm6 + movq 48(%ebx),%mm4 + por %mm6,%mm0 + movq %mm2,-48(%edi) + psrlq $4,%mm2 + movq %mm5,80(%edi) + movq %mm5,%mm7 + movq %mm0,-56(%ebp) + psrlq $4,%mm5 + movq %mm3,72(%ebp) + shll $4,%edx + movb %dl,11(%esp) + movl 72(%ebx),%edx + movq 72(%ebx),%mm0 + psllq $60,%mm7 + movq 64(%ebx),%mm3 + por %mm7,%mm2 + movq %mm1,-40(%edi) + psrlq $4,%mm1 + movq %mm4,88(%edi) + movq %mm4,%mm6 + movq %mm2,-48(%ebp) + psrlq $4,%mm4 + movq %mm5,80(%ebp) + shll $4,%edx + movb %dl,12(%esp) + movl 88(%ebx),%edx + movq 88(%ebx),%mm2 + psllq $60,%mm6 + movq 80(%ebx),%mm5 + por %mm6,%mm1 + movq %mm0,-32(%edi) + psrlq $4,%mm0 + movq %mm3,96(%edi) + movq %mm3,%mm7 + movq %mm1,-40(%ebp) + psrlq $4,%mm3 + movq %mm4,88(%ebp) + shll $4,%edx + movb %dl,13(%esp) + movl 104(%ebx),%edx + movq 104(%ebx),%mm1 + psllq $60,%mm7 + movq 96(%ebx),%mm4 + por %mm7,%mm0 + movq %mm2,-24(%edi) + psrlq $4,%mm2 + movq %mm5,104(%edi) + movq %mm5,%mm6 + movq %mm0,-32(%ebp) + psrlq $4,%mm5 + movq %mm3,96(%ebp) + shll $4,%edx + movb %dl,14(%esp) + movl 120(%ebx),%edx + movq 120(%ebx),%mm0 + psllq $60,%mm6 + movq 112(%ebx),%mm3 + por %mm6,%mm2 + movq %mm1,-16(%edi) + psrlq $4,%mm1 + movq %mm4,112(%edi) + movq %mm4,%mm7 + movq %mm2,-24(%ebp) + psrlq $4,%mm4 + movq %mm5,104(%ebp) + shll $4,%edx + movb %dl,15(%esp) + psllq $60,%mm7 + por %mm7,%mm1 + movq %mm0,-8(%edi) + psrlq $4,%mm0 + movq %mm3,120(%edi) + movq %mm3,%mm6 + movq %mm1,-16(%ebp) + psrlq $4,%mm3 + movq %mm4,112(%ebp) + psllq $60,%mm6 + por %mm6,%mm0 + movq %mm0,-8(%ebp) + movq %mm3,120(%ebp) + movq (%eax),%mm6 + movl 8(%eax),%ebx + movl 12(%eax),%edx .align 16 -.L007mmx_outer_loop: - xorl 12(%edi),%ebx - xorl 4(%edi),%edx - xorl 8(%edi),%ecx - xorl (%edi),%ebp - movl %edi,48(%esp) - movl %ebx,12(%esp) - movl %edx,4(%esp) - movl %ecx,8(%esp) - movl %ebp,(%esp) - movl %esp,%edi - shrl $24,%ebx - call _mmx_gmult_4bit_inner - movl 48(%esp),%edi - leal 16(%edi),%edi - cmpl 52(%esp),%edi - jb .L007mmx_outer_loop - movl 40(%esp),%edi +.L009outer: + xorl 12(%ecx),%edx + xorl 8(%ecx),%ebx + pxor (%ecx),%mm6 + leal 16(%ecx),%ecx + movl %ebx,536(%esp) + movq %mm6,528(%esp) + movl %ecx,548(%esp) + xorl %eax,%eax + roll $8,%edx + movb %dl,%al + movl %eax,%ebp + andb $15,%al + shrl $4,%ebp + pxor %mm0,%mm0 + roll $8,%edx + pxor %mm1,%mm1 + pxor %mm2,%mm2 + movq 16(%esp,%eax,8),%mm7 + movq 144(%esp,%eax,8),%mm6 + movb %dl,%al + movd %mm7,%ebx + psrlq $8,%mm7 + movq %mm6,%mm3 + movl %eax,%edi + psrlq $8,%mm6 + pxor 272(%esp,%ebp,8),%mm7 + andb $15,%al + psllq $56,%mm3 + shrl $4,%edi + pxor 16(%esp,%eax,8),%mm7 + roll $8,%edx + pxor 144(%esp,%eax,8),%mm6 + pxor %mm3,%mm7 + pxor 400(%esp,%ebp,8),%mm6 + xorb (%esp,%ebp,1),%bl + movb %dl,%al + movd %mm7,%ecx + movzbl %bl,%ebx + psrlq $8,%mm7 + movq %mm6,%mm3 + movl %eax,%ebp + psrlq $8,%mm6 + pxor 272(%esp,%edi,8),%mm7 + andb $15,%al + psllq $56,%mm3 + shrl $4,%ebp + pinsrw $2,(%esi,%ebx,2),%mm2 + pxor 16(%esp,%eax,8),%mm7 + roll $8,%edx + pxor 144(%esp,%eax,8),%mm6 + pxor %mm3,%mm7 + pxor 400(%esp,%edi,8),%mm6 + xorb (%esp,%edi,1),%cl + movb %dl,%al + movl 536(%esp),%edx + movd %mm7,%ebx + movzbl %cl,%ecx + psrlq $8,%mm7 + movq %mm6,%mm3 + movl %eax,%edi + psrlq $8,%mm6 + pxor 272(%esp,%ebp,8),%mm7 + andb $15,%al + psllq $56,%mm3 + pxor %mm2,%mm6 + shrl $4,%edi + pinsrw $2,(%esi,%ecx,2),%mm1 + pxor 16(%esp,%eax,8),%mm7 + roll $8,%edx + pxor 144(%esp,%eax,8),%mm6 + pxor %mm3,%mm7 + pxor 400(%esp,%ebp,8),%mm6 + xorb (%esp,%ebp,1),%bl + movb %dl,%al + movd %mm7,%ecx + movzbl %bl,%ebx + psrlq $8,%mm7 + movq %mm6,%mm3 + movl %eax,%ebp + psrlq $8,%mm6 + pxor 272(%esp,%edi,8),%mm7 + andb $15,%al + psllq $56,%mm3 + pxor %mm1,%mm6 + shrl $4,%ebp + pinsrw $2,(%esi,%ebx,2),%mm0 + pxor 16(%esp,%eax,8),%mm7 + roll $8,%edx + pxor 144(%esp,%eax,8),%mm6 + pxor %mm3,%mm7 + pxor 400(%esp,%edi,8),%mm6 + xorb (%esp,%edi,1),%cl + movb %dl,%al + movd %mm7,%ebx + movzbl %cl,%ecx + psrlq $8,%mm7 + movq %mm6,%mm3 + movl %eax,%edi + psrlq $8,%mm6 + pxor 272(%esp,%ebp,8),%mm7 + andb $15,%al + psllq $56,%mm3 + pxor %mm0,%mm6 + shrl $4,%edi + pinsrw $2,(%esi,%ecx,2),%mm2 + pxor 16(%esp,%eax,8),%mm7 + roll $8,%edx + pxor 144(%esp,%eax,8),%mm6 + pxor %mm3,%mm7 + pxor 400(%esp,%ebp,8),%mm6 + xorb (%esp,%ebp,1),%bl + movb %dl,%al + movd %mm7,%ecx + movzbl %bl,%ebx + psrlq $8,%mm7 + movq %mm6,%mm3 + movl %eax,%ebp + psrlq $8,%mm6 + pxor 272(%esp,%edi,8),%mm7 + andb $15,%al + psllq $56,%mm3 + pxor %mm2,%mm6 + shrl $4,%ebp + pinsrw $2,(%esi,%ebx,2),%mm1 + pxor 16(%esp,%eax,8),%mm7 + roll $8,%edx + pxor 144(%esp,%eax,8),%mm6 + pxor %mm3,%mm7 + pxor 400(%esp,%edi,8),%mm6 + xorb (%esp,%edi,1),%cl + movb %dl,%al + movl 532(%esp),%edx + movd %mm7,%ebx + movzbl %cl,%ecx + psrlq $8,%mm7 + movq %mm6,%mm3 + movl %eax,%edi + psrlq $8,%mm6 + pxor 272(%esp,%ebp,8),%mm7 + andb $15,%al + psllq $56,%mm3 + pxor %mm1,%mm6 + shrl $4,%edi + pinsrw $2,(%esi,%ecx,2),%mm0 + pxor 16(%esp,%eax,8),%mm7 + roll $8,%edx + pxor 144(%esp,%eax,8),%mm6 + pxor %mm3,%mm7 + pxor 400(%esp,%ebp,8),%mm6 + xorb (%esp,%ebp,1),%bl + movb %dl,%al + movd %mm7,%ecx + movzbl %bl,%ebx + psrlq $8,%mm7 + movq %mm6,%mm3 + movl %eax,%ebp + psrlq $8,%mm6 + pxor 272(%esp,%edi,8),%mm7 + andb $15,%al + psllq $56,%mm3 + pxor %mm0,%mm6 + shrl $4,%ebp + pinsrw $2,(%esi,%ebx,2),%mm2 + pxor 16(%esp,%eax,8),%mm7 + roll $8,%edx + pxor 144(%esp,%eax,8),%mm6 + pxor %mm3,%mm7 + pxor 400(%esp,%edi,8),%mm6 + xorb (%esp,%edi,1),%cl + movb %dl,%al + movd %mm7,%ebx + movzbl %cl,%ecx + psrlq $8,%mm7 + movq %mm6,%mm3 + movl %eax,%edi + psrlq $8,%mm6 + pxor 272(%esp,%ebp,8),%mm7 + andb $15,%al + psllq $56,%mm3 + pxor %mm2,%mm6 + shrl $4,%edi + pinsrw $2,(%esi,%ecx,2),%mm1 + pxor 16(%esp,%eax,8),%mm7 + roll $8,%edx + pxor 144(%esp,%eax,8),%mm6 + pxor %mm3,%mm7 + pxor 400(%esp,%ebp,8),%mm6 + xorb (%esp,%ebp,1),%bl + movb %dl,%al + movd %mm7,%ecx + movzbl %bl,%ebx + psrlq $8,%mm7 + movq %mm6,%mm3 + movl %eax,%ebp + psrlq $8,%mm6 + pxor 272(%esp,%edi,8),%mm7 + andb $15,%al + psllq $56,%mm3 + pxor %mm1,%mm6 + shrl $4,%ebp + pinsrw $2,(%esi,%ebx,2),%mm0 + pxor 16(%esp,%eax,8),%mm7 + roll $8,%edx + pxor 144(%esp,%eax,8),%mm6 + pxor %mm3,%mm7 + pxor 400(%esp,%edi,8),%mm6 + xorb (%esp,%edi,1),%cl + movb %dl,%al + movl 528(%esp),%edx + movd %mm7,%ebx + movzbl %cl,%ecx + psrlq $8,%mm7 + movq %mm6,%mm3 + movl %eax,%edi + psrlq $8,%mm6 + pxor 272(%esp,%ebp,8),%mm7 + andb $15,%al + psllq $56,%mm3 + pxor %mm0,%mm6 + shrl $4,%edi + pinsrw $2,(%esi,%ecx,2),%mm2 + pxor 16(%esp,%eax,8),%mm7 + roll $8,%edx + pxor 144(%esp,%eax,8),%mm6 + pxor %mm3,%mm7 + pxor 400(%esp,%ebp,8),%mm6 + xorb (%esp,%ebp,1),%bl + movb %dl,%al + movd %mm7,%ecx + movzbl %bl,%ebx + psrlq $8,%mm7 + movq %mm6,%mm3 + movl %eax,%ebp + psrlq $8,%mm6 + pxor 272(%esp,%edi,8),%mm7 + andb $15,%al + psllq $56,%mm3 + pxor %mm2,%mm6 + shrl $4,%ebp + pinsrw $2,(%esi,%ebx,2),%mm1 + pxor 16(%esp,%eax,8),%mm7 + roll $8,%edx + pxor 144(%esp,%eax,8),%mm6 + pxor %mm3,%mm7 + pxor 400(%esp,%edi,8),%mm6 + xorb (%esp,%edi,1),%cl + movb %dl,%al + movd %mm7,%ebx + movzbl %cl,%ecx + psrlq $8,%mm7 + movq %mm6,%mm3 + movl %eax,%edi + psrlq $8,%mm6 + pxor 272(%esp,%ebp,8),%mm7 + andb $15,%al + psllq $56,%mm3 + pxor %mm1,%mm6 + shrl $4,%edi + pinsrw $2,(%esi,%ecx,2),%mm0 + pxor 16(%esp,%eax,8),%mm7 + roll $8,%edx + pxor 144(%esp,%eax,8),%mm6 + pxor %mm3,%mm7 + pxor 400(%esp,%ebp,8),%mm6 + xorb (%esp,%ebp,1),%bl + movb %dl,%al + movd %mm7,%ecx + movzbl %bl,%ebx + psrlq $8,%mm7 + movq %mm6,%mm3 + movl %eax,%ebp + psrlq $8,%mm6 + pxor 272(%esp,%edi,8),%mm7 + andb $15,%al + psllq $56,%mm3 + pxor %mm0,%mm6 + shrl $4,%ebp + pinsrw $2,(%esi,%ebx,2),%mm2 + pxor 16(%esp,%eax,8),%mm7 + roll $8,%edx + pxor 144(%esp,%eax,8),%mm6 + pxor %mm3,%mm7 + pxor 400(%esp,%edi,8),%mm6 + xorb (%esp,%edi,1),%cl + movb %dl,%al + movl 524(%esp),%edx + movd %mm7,%ebx + movzbl %cl,%ecx + psrlq $8,%mm7 + movq %mm6,%mm3 + movl %eax,%edi + psrlq $8,%mm6 + pxor 272(%esp,%ebp,8),%mm7 + andb $15,%al + psllq $56,%mm3 + pxor %mm2,%mm6 + shrl $4,%edi + pinsrw $2,(%esi,%ecx,2),%mm1 + pxor 16(%esp,%eax,8),%mm7 + pxor 144(%esp,%eax,8),%mm6 + xorb (%esp,%ebp,1),%bl + pxor %mm3,%mm7 + pxor 400(%esp,%ebp,8),%mm6 + movzbl %bl,%ebx + pxor %mm2,%mm2 + psllq $4,%mm1 + movd %mm7,%ecx + psrlq $4,%mm7 + movq %mm6,%mm3 + psrlq $4,%mm6 + shll $4,%ecx + pxor 16(%esp,%edi,8),%mm7 + psllq $60,%mm3 + movzbl %cl,%ecx + pxor %mm3,%mm7 + pxor 144(%esp,%edi,8),%mm6 + pinsrw $2,(%esi,%ebx,2),%mm0 + pxor %mm1,%mm6 + movd %mm7,%edx + pinsrw $3,(%esi,%ecx,2),%mm2 + psllq $12,%mm0 + pxor %mm0,%mm6 + psrlq $32,%mm7 + pxor %mm2,%mm6 + movl 548(%esp),%ecx + movd %mm7,%ebx + movq %mm6,%mm3 + psllw $8,%mm6 + psrlw $8,%mm3 + por %mm3,%mm6 + bswap %edx + pshufw $27,%mm6,%mm6 + bswap %ebx + cmpl 552(%esp),%ecx + jne .L009outer + movl 544(%esp),%eax + movl %edx,12(%eax) + movl %ebx,8(%eax) + movq %mm6,(%eax) + movl 556(%esp),%esp emms - movl %ebx,12(%edi) - movl %edx,4(%edi) - movl %ecx,8(%edi) - movl %ebp,(%edi) - addl $20,%esp popl %edi popl %esi popl %ebx popl %ebp ret .size gcm_ghash_4bit_mmx,.-.L_gcm_ghash_4bit_mmx_begin +.globl gcm_init_clmul +.type gcm_init_clmul,@function +.align 16 +gcm_init_clmul: +.L_gcm_init_clmul_begin: + movl 4(%esp),%edx + movl 8(%esp),%eax + call .L010pic +.L010pic: + popl %ecx + leal .Lbswap-.L010pic(%ecx),%ecx + movdqu (%eax),%xmm2 + pshufd $78,%xmm2,%xmm2 + pshufd $255,%xmm2,%xmm4 + movdqa %xmm2,%xmm3 + psllq $1,%xmm2 + pxor %xmm5,%xmm5 + psrlq $63,%xmm3 + pcmpgtd %xmm4,%xmm5 + pslldq $8,%xmm3 + por %xmm3,%xmm2 + pand 16(%ecx),%xmm5 + pxor %xmm5,%xmm2 + movdqa %xmm2,%xmm0 + movdqa %xmm0,%xmm1 + pshufd $78,%xmm0,%xmm3 + pshufd $78,%xmm2,%xmm4 + pxor %xmm0,%xmm3 + pxor %xmm2,%xmm4 +.byte 102,15,58,68,194,0 +.byte 102,15,58,68,202,17 +.byte 102,15,58,68,220,0 + xorps %xmm0,%xmm3 + xorps %xmm1,%xmm3 + movdqa %xmm3,%xmm4 + psrldq $8,%xmm3 + pslldq $8,%xmm4 + pxor %xmm3,%xmm1 + pxor %xmm4,%xmm0 + movdqa %xmm0,%xmm4 + movdqa %xmm0,%xmm3 + psllq $5,%xmm0 + pxor %xmm0,%xmm3 + psllq $1,%xmm0 + pxor %xmm3,%xmm0 + psllq $57,%xmm0 + movdqa %xmm0,%xmm3 + pslldq $8,%xmm0 + psrldq $8,%xmm3 + pxor %xmm4,%xmm0 + pxor %xmm3,%xmm1 + movdqa %xmm0,%xmm4 + psrlq $1,%xmm0 + pxor %xmm4,%xmm1 + pxor %xmm0,%xmm4 + psrlq $5,%xmm0 + pxor %xmm4,%xmm0 + psrlq $1,%xmm0 + pxor %xmm1,%xmm0 + pshufd $78,%xmm2,%xmm3 + pshufd $78,%xmm0,%xmm4 + pxor %xmm2,%xmm3 + movdqu %xmm2,(%edx) + pxor %xmm0,%xmm4 + movdqu %xmm0,16(%edx) +.byte 102,15,58,15,227,8 + movdqu %xmm4,32(%edx) + ret +.size gcm_init_clmul,.-.L_gcm_init_clmul_begin +.globl gcm_gmult_clmul +.type gcm_gmult_clmul,@function +.align 16 +gcm_gmult_clmul: +.L_gcm_gmult_clmul_begin: + movl 4(%esp),%eax + movl 8(%esp),%edx + call .L011pic +.L011pic: + popl %ecx + leal .Lbswap-.L011pic(%ecx),%ecx + movdqu (%eax),%xmm0 + movdqa (%ecx),%xmm5 + movups (%edx),%xmm2 +.byte 102,15,56,0,197 + movups 32(%edx),%xmm4 + movdqa %xmm0,%xmm1 + pshufd $78,%xmm0,%xmm3 + pxor %xmm0,%xmm3 +.byte 102,15,58,68,194,0 +.byte 102,15,58,68,202,17 +.byte 102,15,58,68,220,0 + xorps %xmm0,%xmm3 + xorps %xmm1,%xmm3 + movdqa %xmm3,%xmm4 + psrldq $8,%xmm3 + pslldq $8,%xmm4 + pxor %xmm3,%xmm1 + pxor %xmm4,%xmm0 + movdqa %xmm0,%xmm4 + movdqa %xmm0,%xmm3 + psllq $5,%xmm0 + pxor %xmm0,%xmm3 + psllq $1,%xmm0 + pxor %xmm3,%xmm0 + psllq $57,%xmm0 + movdqa %xmm0,%xmm3 + pslldq $8,%xmm0 + psrldq $8,%xmm3 + pxor %xmm4,%xmm0 + pxor %xmm3,%xmm1 + movdqa %xmm0,%xmm4 + psrlq $1,%xmm0 + pxor %xmm4,%xmm1 + pxor %xmm0,%xmm4 + psrlq $5,%xmm0 + pxor %xmm4,%xmm0 + psrlq $1,%xmm0 + pxor %xmm1,%xmm0 +.byte 102,15,56,0,197 + movdqu %xmm0,(%eax) + ret +.size gcm_gmult_clmul,.-.L_gcm_gmult_clmul_begin +.globl gcm_ghash_clmul +.type gcm_ghash_clmul,@function +.align 16 +gcm_ghash_clmul: +.L_gcm_ghash_clmul_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 20(%esp),%eax + movl 24(%esp),%edx + movl 28(%esp),%esi + movl 32(%esp),%ebx + call .L012pic +.L012pic: + popl %ecx + leal .Lbswap-.L012pic(%ecx),%ecx + movdqu (%eax),%xmm0 + movdqa (%ecx),%xmm5 + movdqu (%edx),%xmm2 +.byte 102,15,56,0,197 + subl $16,%ebx + jz .L013odd_tail + movdqu (%esi),%xmm3 + movdqu 16(%esi),%xmm6 +.byte 102,15,56,0,221 +.byte 102,15,56,0,245 + movdqu 32(%edx),%xmm5 + pxor %xmm3,%xmm0 + pshufd $78,%xmm6,%xmm3 + movdqa %xmm6,%xmm7 + pxor %xmm6,%xmm3 + leal 32(%esi),%esi +.byte 102,15,58,68,242,0 +.byte 102,15,58,68,250,17 +.byte 102,15,58,68,221,0 + movups 16(%edx),%xmm2 + nop + subl $32,%ebx + jbe .L014even_tail + jmp .L015mod_loop +.align 32 +.L015mod_loop: + pshufd $78,%xmm0,%xmm4 + movdqa %xmm0,%xmm1 + pxor %xmm0,%xmm4 + nop +.byte 102,15,58,68,194,0 +.byte 102,15,58,68,202,17 +.byte 102,15,58,68,229,16 + movups (%edx),%xmm2 + xorps %xmm6,%xmm0 + movdqa (%ecx),%xmm5 + xorps %xmm7,%xmm1 + movdqu (%esi),%xmm7 + pxor %xmm0,%xmm3 + movdqu 16(%esi),%xmm6 + pxor %xmm1,%xmm3 +.byte 102,15,56,0,253 + pxor %xmm3,%xmm4 + movdqa %xmm4,%xmm3 + psrldq $8,%xmm4 + pslldq $8,%xmm3 + pxor %xmm4,%xmm1 + pxor %xmm3,%xmm0 +.byte 102,15,56,0,245 + pxor %xmm7,%xmm1 + movdqa %xmm6,%xmm7 + movdqa %xmm0,%xmm4 + movdqa %xmm0,%xmm3 + psllq $5,%xmm0 + pxor %xmm0,%xmm3 + psllq $1,%xmm0 + pxor %xmm3,%xmm0 +.byte 102,15,58,68,242,0 + movups 32(%edx),%xmm5 + psllq $57,%xmm0 + movdqa %xmm0,%xmm3 + pslldq $8,%xmm0 + psrldq $8,%xmm3 + pxor %xmm4,%xmm0 + pxor %xmm3,%xmm1 + pshufd $78,%xmm7,%xmm3 + movdqa %xmm0,%xmm4 + psrlq $1,%xmm0 + pxor %xmm7,%xmm3 + pxor %xmm4,%xmm1 +.byte 102,15,58,68,250,17 + movups 16(%edx),%xmm2 + pxor %xmm0,%xmm4 + psrlq $5,%xmm0 + pxor %xmm4,%xmm0 + psrlq $1,%xmm0 + pxor %xmm1,%xmm0 +.byte 102,15,58,68,221,0 + leal 32(%esi),%esi + subl $32,%ebx + ja .L015mod_loop +.L014even_tail: + pshufd $78,%xmm0,%xmm4 + movdqa %xmm0,%xmm1 + pxor %xmm0,%xmm4 +.byte 102,15,58,68,194,0 +.byte 102,15,58,68,202,17 +.byte 102,15,58,68,229,16 + movdqa (%ecx),%xmm5 + xorps %xmm6,%xmm0 + xorps %xmm7,%xmm1 + pxor %xmm0,%xmm3 + pxor %xmm1,%xmm3 + pxor %xmm3,%xmm4 + movdqa %xmm4,%xmm3 + psrldq $8,%xmm4 + pslldq $8,%xmm3 + pxor %xmm4,%xmm1 + pxor %xmm3,%xmm0 + movdqa %xmm0,%xmm4 + movdqa %xmm0,%xmm3 + psllq $5,%xmm0 + pxor %xmm0,%xmm3 + psllq $1,%xmm0 + pxor %xmm3,%xmm0 + psllq $57,%xmm0 + movdqa %xmm0,%xmm3 + pslldq $8,%xmm0 + psrldq $8,%xmm3 + pxor %xmm4,%xmm0 + pxor %xmm3,%xmm1 + movdqa %xmm0,%xmm4 + psrlq $1,%xmm0 + pxor %xmm4,%xmm1 + pxor %xmm0,%xmm4 + psrlq $5,%xmm0 + pxor %xmm4,%xmm0 + psrlq $1,%xmm0 + pxor %xmm1,%xmm0 + testl %ebx,%ebx + jnz .L016done + movups (%edx),%xmm2 +.L013odd_tail: + movdqu (%esi),%xmm3 +.byte 102,15,56,0,221 + pxor %xmm3,%xmm0 + movdqa %xmm0,%xmm1 + pshufd $78,%xmm0,%xmm3 + pshufd $78,%xmm2,%xmm4 + pxor %xmm0,%xmm3 + pxor %xmm2,%xmm4 +.byte 102,15,58,68,194,0 +.byte 102,15,58,68,202,17 +.byte 102,15,58,68,220,0 + xorps %xmm0,%xmm3 + xorps %xmm1,%xmm3 + movdqa %xmm3,%xmm4 + psrldq $8,%xmm3 + pslldq $8,%xmm4 + pxor %xmm3,%xmm1 + pxor %xmm4,%xmm0 + movdqa %xmm0,%xmm4 + movdqa %xmm0,%xmm3 + psllq $5,%xmm0 + pxor %xmm0,%xmm3 + psllq $1,%xmm0 + pxor %xmm3,%xmm0 + psllq $57,%xmm0 + movdqa %xmm0,%xmm3 + pslldq $8,%xmm0 + psrldq $8,%xmm3 + pxor %xmm4,%xmm0 + pxor %xmm3,%xmm1 + movdqa %xmm0,%xmm4 + psrlq $1,%xmm0 + pxor %xmm4,%xmm1 + pxor %xmm0,%xmm4 + psrlq $5,%xmm0 + pxor %xmm4,%xmm0 + psrlq $1,%xmm0 + pxor %xmm1,%xmm0 +.L016done: +.byte 102,15,56,0,197 + movdqu %xmm0,(%eax) + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.size gcm_ghash_clmul,.-.L_gcm_ghash_clmul_begin .align 64 -.Lrem_4bit: -.long 0,0,0,29491200,0,58982400,0,38141952 -.long 0,117964800,0,113901568,0,76283904,0,88997888 -.long 0,235929600,0,265420800,0,227803136,0,206962688 -.long 0,152567808,0,148504576,0,177995776,0,190709760 +.Lbswap: +.byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0 +.byte 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,194 .align 64 -.L008rem_8bit: +.Lrem_8bit: .value 0,450,900,582,1800,1738,1164,1358 .value 3600,4050,3476,3158,2328,2266,2716,2910 .value 7200,7650,8100,7782,6952,6890,6316,6510 @@ -722,6 +1253,12 @@ gcm_ghash_4bit_mmx: .value 42960,42514,42068,42390,41176,41242,41820,41630 .value 46560,46114,46692,47014,45800,45866,45420,45230 .value 48112,47666,47220,47542,48376,48442,49020,48830 +.align 64 +.Lrem_4bit: +.long 0,0,0,471859200,0,943718400,0,610271232 +.long 0,1887436800,0,1822425088,0,1220542464,0,1423966208 +.long 0,3774873600,0,4246732800,0,3644850176,0,3311403008 +.long 0,2441084928,0,2376073216,0,2847932416,0,3051356160 .byte 71,72,65,83,72,32,102,111,114,32,120,56,54,44,32,67 .byte 82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112 .byte 112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62 diff --git a/deps/openssl/asm/x86-elf-gas/rc4/rc4-586.s b/deps/openssl/asm/x86-elf-gas/rc4/rc4-586.s index 513ce6a58bbd25..fcb4adc14633c9 100644 --- a/deps/openssl/asm/x86-elf-gas/rc4/rc4-586.s +++ b/deps/openssl/asm/x86-elf-gas/rc4/rc4-586.s @@ -29,8 +29,8 @@ RC4: movl (%edi,%eax,4),%ecx andl $-4,%edx jz .L002loop1 - testl $-8,%edx movl %ebp,32(%esp) + testl $-8,%edx jz .L003go4loop4 leal OPENSSL_ia32cap_P,%ebp btl $26,(%ebp) @@ -369,4 +369,4 @@ RC4_options: .byte 111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 .align 64 .size RC4_options,.-.L_RC4_options_begin -.comm OPENSSL_ia32cap_P,8,4 +.comm OPENSSL_ia32cap_P,16,4 diff --git a/deps/openssl/asm/x86-elf-gas/rc5/rc5-586.s b/deps/openssl/asm/x86-elf-gas/rc5/rc5-586.s deleted file mode 100644 index ff8a4929ab7412..00000000000000 --- a/deps/openssl/asm/x86-elf-gas/rc5/rc5-586.s +++ /dev/null @@ -1,564 +0,0 @@ -.file "rc5-586.s" -.text -.globl RC5_32_encrypt -.type RC5_32_encrypt,@function -.align 16 -RC5_32_encrypt: -.L_RC5_32_encrypt_begin: - - pushl %ebp - pushl %esi - pushl %edi - movl 16(%esp),%edx - movl 20(%esp),%ebp - - movl (%edx),%edi - movl 4(%edx),%esi - pushl %ebx - movl (%ebp),%ebx - addl 4(%ebp),%edi - addl 8(%ebp),%esi - xorl %esi,%edi - movl 12(%ebp),%eax - movl %esi,%ecx - roll %cl,%edi - addl %eax,%edi - xorl %edi,%esi - movl 16(%ebp),%eax - movl %edi,%ecx - roll %cl,%esi - addl %eax,%esi - xorl %esi,%edi - movl 20(%ebp),%eax - movl %esi,%ecx - roll %cl,%edi - addl %eax,%edi - xorl %edi,%esi - movl 24(%ebp),%eax - movl %edi,%ecx - roll %cl,%esi - addl %eax,%esi - xorl %esi,%edi - movl 28(%ebp),%eax - movl %esi,%ecx - roll %cl,%edi - addl %eax,%edi - xorl %edi,%esi - movl 32(%ebp),%eax - movl %edi,%ecx - roll %cl,%esi - addl %eax,%esi - xorl %esi,%edi - movl 36(%ebp),%eax - movl %esi,%ecx - roll %cl,%edi - addl %eax,%edi - xorl %edi,%esi - movl 40(%ebp),%eax - movl %edi,%ecx - roll %cl,%esi - addl %eax,%esi - xorl %esi,%edi - movl 44(%ebp),%eax - movl %esi,%ecx - roll %cl,%edi - addl %eax,%edi - xorl %edi,%esi - movl 48(%ebp),%eax - movl %edi,%ecx - roll %cl,%esi - addl %eax,%esi - xorl %esi,%edi - movl 52(%ebp),%eax - movl %esi,%ecx - roll %cl,%edi - addl %eax,%edi - xorl %edi,%esi - movl 56(%ebp),%eax - movl %edi,%ecx - roll %cl,%esi - addl %eax,%esi - xorl %esi,%edi - movl 60(%ebp),%eax - movl %esi,%ecx - roll %cl,%edi - addl %eax,%edi - xorl %edi,%esi - movl 64(%ebp),%eax - movl %edi,%ecx - roll %cl,%esi - addl %eax,%esi - xorl %esi,%edi - movl 68(%ebp),%eax - movl %esi,%ecx - roll %cl,%edi - addl %eax,%edi - xorl %edi,%esi - movl 72(%ebp),%eax - movl %edi,%ecx - roll %cl,%esi - addl %eax,%esi - cmpl $8,%ebx - je .L000rc5_exit - xorl %esi,%edi - movl 76(%ebp),%eax - movl %esi,%ecx - roll %cl,%edi - addl %eax,%edi - xorl %edi,%esi - movl 80(%ebp),%eax - movl %edi,%ecx - roll %cl,%esi - addl %eax,%esi - xorl %esi,%edi - movl 84(%ebp),%eax - movl %esi,%ecx - roll %cl,%edi - addl %eax,%edi - xorl %edi,%esi - movl 88(%ebp),%eax - movl %edi,%ecx - roll %cl,%esi - addl %eax,%esi - xorl %esi,%edi - movl 92(%ebp),%eax - movl %esi,%ecx - roll %cl,%edi - addl %eax,%edi - xorl %edi,%esi - movl 96(%ebp),%eax - movl %edi,%ecx - roll %cl,%esi - addl %eax,%esi - xorl %esi,%edi - movl 100(%ebp),%eax - movl %esi,%ecx - roll %cl,%edi - addl %eax,%edi - xorl %edi,%esi - movl 104(%ebp),%eax - movl %edi,%ecx - roll %cl,%esi - addl %eax,%esi - cmpl $12,%ebx - je .L000rc5_exit - xorl %esi,%edi - movl 108(%ebp),%eax - movl %esi,%ecx - roll %cl,%edi - addl %eax,%edi - xorl %edi,%esi - movl 112(%ebp),%eax - movl %edi,%ecx - roll %cl,%esi - addl %eax,%esi - xorl %esi,%edi - movl 116(%ebp),%eax - movl %esi,%ecx - roll %cl,%edi - addl %eax,%edi - xorl %edi,%esi - movl 120(%ebp),%eax - movl %edi,%ecx - roll %cl,%esi - addl %eax,%esi - xorl %esi,%edi - movl 124(%ebp),%eax - movl %esi,%ecx - roll %cl,%edi - addl %eax,%edi - xorl %edi,%esi - movl 128(%ebp),%eax - movl %edi,%ecx - roll %cl,%esi - addl %eax,%esi - xorl %esi,%edi - movl 132(%ebp),%eax - movl %esi,%ecx - roll %cl,%edi - addl %eax,%edi - xorl %edi,%esi - movl 136(%ebp),%eax - movl %edi,%ecx - roll %cl,%esi - addl %eax,%esi -.L000rc5_exit: - movl %edi,(%edx) - movl %esi,4(%edx) - popl %ebx - popl %edi - popl %esi - popl %ebp - ret -.size RC5_32_encrypt,.-.L_RC5_32_encrypt_begin -.globl RC5_32_decrypt -.type RC5_32_decrypt,@function -.align 16 -RC5_32_decrypt: -.L_RC5_32_decrypt_begin: - - pushl %ebp - pushl %esi - pushl %edi - movl 16(%esp),%edx - movl 20(%esp),%ebp - - movl (%edx),%edi - movl 4(%edx),%esi - pushl %ebx - movl (%ebp),%ebx - cmpl $12,%ebx - je .L001rc5_dec_12 - cmpl $8,%ebx - je .L002rc5_dec_8 - movl 136(%ebp),%eax - subl %eax,%esi - movl %edi,%ecx - rorl %cl,%esi - xorl %edi,%esi - movl 132(%ebp),%eax - subl %eax,%edi - movl %esi,%ecx - rorl %cl,%edi - xorl %esi,%edi - movl 128(%ebp),%eax - subl %eax,%esi - movl %edi,%ecx - rorl %cl,%esi - xorl %edi,%esi - movl 124(%ebp),%eax - subl %eax,%edi - movl %esi,%ecx - rorl %cl,%edi - xorl %esi,%edi - movl 120(%ebp),%eax - subl %eax,%esi - movl %edi,%ecx - rorl %cl,%esi - xorl %edi,%esi - movl 116(%ebp),%eax - subl %eax,%edi - movl %esi,%ecx - rorl %cl,%edi - xorl %esi,%edi - movl 112(%ebp),%eax - subl %eax,%esi - movl %edi,%ecx - rorl %cl,%esi - xorl %edi,%esi - movl 108(%ebp),%eax - subl %eax,%edi - movl %esi,%ecx - rorl %cl,%edi - xorl %esi,%edi -.L001rc5_dec_12: - movl 104(%ebp),%eax - subl %eax,%esi - movl %edi,%ecx - rorl %cl,%esi - xorl %edi,%esi - movl 100(%ebp),%eax - subl %eax,%edi - movl %esi,%ecx - rorl %cl,%edi - xorl %esi,%edi - movl 96(%ebp),%eax - subl %eax,%esi - movl %edi,%ecx - rorl %cl,%esi - xorl %edi,%esi - movl 92(%ebp),%eax - subl %eax,%edi - movl %esi,%ecx - rorl %cl,%edi - xorl %esi,%edi - movl 88(%ebp),%eax - subl %eax,%esi - movl %edi,%ecx - rorl %cl,%esi - xorl %edi,%esi - movl 84(%ebp),%eax - subl %eax,%edi - movl %esi,%ecx - rorl %cl,%edi - xorl %esi,%edi - movl 80(%ebp),%eax - subl %eax,%esi - movl %edi,%ecx - rorl %cl,%esi - xorl %edi,%esi - movl 76(%ebp),%eax - subl %eax,%edi - movl %esi,%ecx - rorl %cl,%edi - xorl %esi,%edi -.L002rc5_dec_8: - movl 72(%ebp),%eax - subl %eax,%esi - movl %edi,%ecx - rorl %cl,%esi - xorl %edi,%esi - movl 68(%ebp),%eax - subl %eax,%edi - movl %esi,%ecx - rorl %cl,%edi - xorl %esi,%edi - movl 64(%ebp),%eax - subl %eax,%esi - movl %edi,%ecx - rorl %cl,%esi - xorl %edi,%esi - movl 60(%ebp),%eax - subl %eax,%edi - movl %esi,%ecx - rorl %cl,%edi - xorl %esi,%edi - movl 56(%ebp),%eax - subl %eax,%esi - movl %edi,%ecx - rorl %cl,%esi - xorl %edi,%esi - movl 52(%ebp),%eax - subl %eax,%edi - movl %esi,%ecx - rorl %cl,%edi - xorl %esi,%edi - movl 48(%ebp),%eax - subl %eax,%esi - movl %edi,%ecx - rorl %cl,%esi - xorl %edi,%esi - movl 44(%ebp),%eax - subl %eax,%edi - movl %esi,%ecx - rorl %cl,%edi - xorl %esi,%edi - movl 40(%ebp),%eax - subl %eax,%esi - movl %edi,%ecx - rorl %cl,%esi - xorl %edi,%esi - movl 36(%ebp),%eax - subl %eax,%edi - movl %esi,%ecx - rorl %cl,%edi - xorl %esi,%edi - movl 32(%ebp),%eax - subl %eax,%esi - movl %edi,%ecx - rorl %cl,%esi - xorl %edi,%esi - movl 28(%ebp),%eax - subl %eax,%edi - movl %esi,%ecx - rorl %cl,%edi - xorl %esi,%edi - movl 24(%ebp),%eax - subl %eax,%esi - movl %edi,%ecx - rorl %cl,%esi - xorl %edi,%esi - movl 20(%ebp),%eax - subl %eax,%edi - movl %esi,%ecx - rorl %cl,%edi - xorl %esi,%edi - movl 16(%ebp),%eax - subl %eax,%esi - movl %edi,%ecx - rorl %cl,%esi - xorl %edi,%esi - movl 12(%ebp),%eax - subl %eax,%edi - movl %esi,%ecx - rorl %cl,%edi - xorl %esi,%edi - subl 8(%ebp),%esi - subl 4(%ebp),%edi -.L003rc5_exit: - movl %edi,(%edx) - movl %esi,4(%edx) - popl %ebx - popl %edi - popl %esi - popl %ebp - ret -.size RC5_32_decrypt,.-.L_RC5_32_decrypt_begin -.globl RC5_32_cbc_encrypt -.type RC5_32_cbc_encrypt,@function -.align 16 -RC5_32_cbc_encrypt: -.L_RC5_32_cbc_encrypt_begin: - - pushl %ebp - pushl %ebx - pushl %esi - pushl %edi - movl 28(%esp),%ebp - - movl 36(%esp),%ebx - movl (%ebx),%esi - movl 4(%ebx),%edi - pushl %edi - pushl %esi - pushl %edi - pushl %esi - movl %esp,%ebx - movl 36(%esp),%esi - movl 40(%esp),%edi - - movl 56(%esp),%ecx - - movl 48(%esp),%eax - pushl %eax - pushl %ebx - cmpl $0,%ecx - jz .L004decrypt - andl $4294967288,%ebp - movl 8(%esp),%eax - movl 12(%esp),%ebx - jz .L005encrypt_finish -.L006encrypt_loop: - movl (%esi),%ecx - movl 4(%esi),%edx - xorl %ecx,%eax - xorl %edx,%ebx - movl %eax,8(%esp) - movl %ebx,12(%esp) - call .L_RC5_32_encrypt_begin - movl 8(%esp),%eax - movl 12(%esp),%ebx - movl %eax,(%edi) - movl %ebx,4(%edi) - addl $8,%esi - addl $8,%edi - subl $8,%ebp - jnz .L006encrypt_loop -.L005encrypt_finish: - movl 52(%esp),%ebp - andl $7,%ebp - jz .L007finish - call .L008PIC_point -.L008PIC_point: - popl %edx - leal .L009cbc_enc_jmp_table-.L008PIC_point(%edx),%ecx - movl (%ecx,%ebp,4),%ebp - addl %edx,%ebp - xorl %ecx,%ecx - xorl %edx,%edx - jmp *%ebp -.L010ej7: - movb 6(%esi),%dh - shll $8,%edx -.L011ej6: - movb 5(%esi),%dh -.L012ej5: - movb 4(%esi),%dl -.L013ej4: - movl (%esi),%ecx - jmp .L014ejend -.L015ej3: - movb 2(%esi),%ch - shll $8,%ecx -.L016ej2: - movb 1(%esi),%ch -.L017ej1: - movb (%esi),%cl -.L014ejend: - xorl %ecx,%eax - xorl %edx,%ebx - movl %eax,8(%esp) - movl %ebx,12(%esp) - call .L_RC5_32_encrypt_begin - movl 8(%esp),%eax - movl 12(%esp),%ebx - movl %eax,(%edi) - movl %ebx,4(%edi) - jmp .L007finish -.L004decrypt: - andl $4294967288,%ebp - movl 16(%esp),%eax - movl 20(%esp),%ebx - jz .L018decrypt_finish -.L019decrypt_loop: - movl (%esi),%eax - movl 4(%esi),%ebx - movl %eax,8(%esp) - movl %ebx,12(%esp) - call .L_RC5_32_decrypt_begin - movl 8(%esp),%eax - movl 12(%esp),%ebx - movl 16(%esp),%ecx - movl 20(%esp),%edx - xorl %eax,%ecx - xorl %ebx,%edx - movl (%esi),%eax - movl 4(%esi),%ebx - movl %ecx,(%edi) - movl %edx,4(%edi) - movl %eax,16(%esp) - movl %ebx,20(%esp) - addl $8,%esi - addl $8,%edi - subl $8,%ebp - jnz .L019decrypt_loop -.L018decrypt_finish: - movl 52(%esp),%ebp - andl $7,%ebp - jz .L007finish - movl (%esi),%eax - movl 4(%esi),%ebx - movl %eax,8(%esp) - movl %ebx,12(%esp) - call .L_RC5_32_decrypt_begin - movl 8(%esp),%eax - movl 12(%esp),%ebx - movl 16(%esp),%ecx - movl 20(%esp),%edx - xorl %eax,%ecx - xorl %ebx,%edx - movl (%esi),%eax - movl 4(%esi),%ebx -.L020dj7: - rorl $16,%edx - movb %dl,6(%edi) - shrl $16,%edx -.L021dj6: - movb %dh,5(%edi) -.L022dj5: - movb %dl,4(%edi) -.L023dj4: - movl %ecx,(%edi) - jmp .L024djend -.L025dj3: - rorl $16,%ecx - movb %cl,2(%edi) - shll $16,%ecx -.L026dj2: - movb %ch,1(%esi) -.L027dj1: - movb %cl,(%esi) -.L024djend: - jmp .L007finish -.L007finish: - movl 60(%esp),%ecx - addl $24,%esp - movl %eax,(%ecx) - movl %ebx,4(%ecx) - popl %edi - popl %esi - popl %ebx - popl %ebp - ret -.align 64 -.L009cbc_enc_jmp_table: -.long 0 -.long .L017ej1-.L008PIC_point -.long .L016ej2-.L008PIC_point -.long .L015ej3-.L008PIC_point -.long .L013ej4-.L008PIC_point -.long .L012ej5-.L008PIC_point -.long .L011ej6-.L008PIC_point -.long .L010ej7-.L008PIC_point -.align 64 -.size RC5_32_cbc_encrypt,.-.L_RC5_32_cbc_encrypt_begin diff --git a/deps/openssl/asm/x86-elf-gas/sha/sha1-586.s b/deps/openssl/asm/x86-elf-gas/sha/sha1-586.s index e77f65412fe66e..8a9ef772b73414 100644 --- a/deps/openssl/asm/x86-elf-gas/sha/sha1-586.s +++ b/deps/openssl/asm/x86-elf-gas/sha/sha1-586.s @@ -9,6 +9,28 @@ sha1_block_data_order: pushl %ebx pushl %esi pushl %edi + call .L000pic_point +.L000pic_point: + popl %ebp + leal OPENSSL_ia32cap_P,%esi + leal .LK_XX_XX-.L000pic_point(%ebp),%ebp + movl (%esi),%eax + movl 4(%esi),%edx + testl $512,%edx + jz .L001x86 + movl 8(%esi),%ecx + testl $16777216,%eax + jz .L001x86 + testl $536870912,%ecx + jnz .Lshaext_shortcut + andl $268435456,%edx + andl $1073741824,%eax + orl %edx,%eax + cmpl $1342177280,%eax + je .Lavx_shortcut + jmp .Lssse3_shortcut +.align 16 +.L001x86: movl 20(%esp),%ebp movl 24(%esp),%esi movl 28(%esp),%eax @@ -17,9 +39,9 @@ sha1_block_data_order: addl %esi,%eax movl %eax,104(%esp) movl 16(%ebp),%edi - jmp .L000loop + jmp .L002loop .align 16 -.L000loop: +.L002loop: movl (%esi),%eax movl 4(%esi),%ebx movl 8(%esi),%ecx @@ -1366,7 +1388,7 @@ sha1_block_data_order: movl %ebx,12(%ebp) movl %edx,%esi movl %ecx,16(%ebp) - jb .L000loop + jb .L002loop addl $76,%esp popl %edi popl %esi @@ -1374,7 +1396,2575 @@ sha1_block_data_order: popl %ebp ret .size sha1_block_data_order,.-.L_sha1_block_data_order_begin +.type _sha1_block_data_order_shaext,@function +.align 16 +_sha1_block_data_order_shaext: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + call .L003pic_point +.L003pic_point: + popl %ebp + leal .LK_XX_XX-.L003pic_point(%ebp),%ebp +.Lshaext_shortcut: + movl 20(%esp),%edi + movl %esp,%ebx + movl 24(%esp),%esi + movl 28(%esp),%ecx + subl $32,%esp + movdqu (%edi),%xmm0 + movd 16(%edi),%xmm1 + andl $-32,%esp + movdqa 80(%ebp),%xmm3 + movdqu (%esi),%xmm4 + pshufd $27,%xmm0,%xmm0 + movdqu 16(%esi),%xmm5 + pshufd $27,%xmm1,%xmm1 + movdqu 32(%esi),%xmm6 +.byte 102,15,56,0,227 + movdqu 48(%esi),%xmm7 +.byte 102,15,56,0,235 +.byte 102,15,56,0,243 +.byte 102,15,56,0,251 + jmp .L004loop_shaext +.align 16 +.L004loop_shaext: + decl %ecx + leal 64(%esi),%eax + movdqa %xmm1,(%esp) + paddd %xmm4,%xmm1 + cmovnel %eax,%esi + movdqa %xmm0,16(%esp) +.byte 15,56,201,229 + movdqa %xmm0,%xmm2 +.byte 15,58,204,193,0 +.byte 15,56,200,213 + pxor %xmm6,%xmm4 +.byte 15,56,201,238 +.byte 15,56,202,231 + movdqa %xmm0,%xmm1 +.byte 15,58,204,194,0 +.byte 15,56,200,206 + pxor %xmm7,%xmm5 +.byte 15,56,202,236 +.byte 15,56,201,247 + movdqa %xmm0,%xmm2 +.byte 15,58,204,193,0 +.byte 15,56,200,215 + pxor %xmm4,%xmm6 +.byte 15,56,201,252 +.byte 15,56,202,245 + movdqa %xmm0,%xmm1 +.byte 15,58,204,194,0 +.byte 15,56,200,204 + pxor %xmm5,%xmm7 +.byte 15,56,202,254 +.byte 15,56,201,229 + movdqa %xmm0,%xmm2 +.byte 15,58,204,193,0 +.byte 15,56,200,213 + pxor %xmm6,%xmm4 +.byte 15,56,201,238 +.byte 15,56,202,231 + movdqa %xmm0,%xmm1 +.byte 15,58,204,194,1 +.byte 15,56,200,206 + pxor %xmm7,%xmm5 +.byte 15,56,202,236 +.byte 15,56,201,247 + movdqa %xmm0,%xmm2 +.byte 15,58,204,193,1 +.byte 15,56,200,215 + pxor %xmm4,%xmm6 +.byte 15,56,201,252 +.byte 15,56,202,245 + movdqa %xmm0,%xmm1 +.byte 15,58,204,194,1 +.byte 15,56,200,204 + pxor %xmm5,%xmm7 +.byte 15,56,202,254 +.byte 15,56,201,229 + movdqa %xmm0,%xmm2 +.byte 15,58,204,193,1 +.byte 15,56,200,213 + pxor %xmm6,%xmm4 +.byte 15,56,201,238 +.byte 15,56,202,231 + movdqa %xmm0,%xmm1 +.byte 15,58,204,194,1 +.byte 15,56,200,206 + pxor %xmm7,%xmm5 +.byte 15,56,202,236 +.byte 15,56,201,247 + movdqa %xmm0,%xmm2 +.byte 15,58,204,193,2 +.byte 15,56,200,215 + pxor %xmm4,%xmm6 +.byte 15,56,201,252 +.byte 15,56,202,245 + movdqa %xmm0,%xmm1 +.byte 15,58,204,194,2 +.byte 15,56,200,204 + pxor %xmm5,%xmm7 +.byte 15,56,202,254 +.byte 15,56,201,229 + movdqa %xmm0,%xmm2 +.byte 15,58,204,193,2 +.byte 15,56,200,213 + pxor %xmm6,%xmm4 +.byte 15,56,201,238 +.byte 15,56,202,231 + movdqa %xmm0,%xmm1 +.byte 15,58,204,194,2 +.byte 15,56,200,206 + pxor %xmm7,%xmm5 +.byte 15,56,202,236 +.byte 15,56,201,247 + movdqa %xmm0,%xmm2 +.byte 15,58,204,193,2 +.byte 15,56,200,215 + pxor %xmm4,%xmm6 +.byte 15,56,201,252 +.byte 15,56,202,245 + movdqa %xmm0,%xmm1 +.byte 15,58,204,194,3 +.byte 15,56,200,204 + pxor %xmm5,%xmm7 +.byte 15,56,202,254 + movdqu (%esi),%xmm4 + movdqa %xmm0,%xmm2 +.byte 15,58,204,193,3 +.byte 15,56,200,213 + movdqu 16(%esi),%xmm5 +.byte 102,15,56,0,227 + movdqa %xmm0,%xmm1 +.byte 15,58,204,194,3 +.byte 15,56,200,206 + movdqu 32(%esi),%xmm6 +.byte 102,15,56,0,235 + movdqa %xmm0,%xmm2 +.byte 15,58,204,193,3 +.byte 15,56,200,215 + movdqu 48(%esi),%xmm7 +.byte 102,15,56,0,243 + movdqa %xmm0,%xmm1 +.byte 15,58,204,194,3 + movdqa (%esp),%xmm2 +.byte 102,15,56,0,251 +.byte 15,56,200,202 + paddd 16(%esp),%xmm0 + jnz .L004loop_shaext + pshufd $27,%xmm0,%xmm0 + pshufd $27,%xmm1,%xmm1 + movdqu %xmm0,(%edi) + movd %xmm1,16(%edi) + movl %ebx,%esp + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.size _sha1_block_data_order_shaext,.-_sha1_block_data_order_shaext +.type _sha1_block_data_order_ssse3,@function +.align 16 +_sha1_block_data_order_ssse3: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + call .L005pic_point +.L005pic_point: + popl %ebp + leal .LK_XX_XX-.L005pic_point(%ebp),%ebp +.Lssse3_shortcut: + movdqa (%ebp),%xmm7 + movdqa 16(%ebp),%xmm0 + movdqa 32(%ebp),%xmm1 + movdqa 48(%ebp),%xmm2 + movdqa 64(%ebp),%xmm6 + movl 20(%esp),%edi + movl 24(%esp),%ebp + movl 28(%esp),%edx + movl %esp,%esi + subl $208,%esp + andl $-64,%esp + movdqa %xmm0,112(%esp) + movdqa %xmm1,128(%esp) + movdqa %xmm2,144(%esp) + shll $6,%edx + movdqa %xmm7,160(%esp) + addl %ebp,%edx + movdqa %xmm6,176(%esp) + addl $64,%ebp + movl %edi,192(%esp) + movl %ebp,196(%esp) + movl %edx,200(%esp) + movl %esi,204(%esp) + movl (%edi),%eax + movl 4(%edi),%ebx + movl 8(%edi),%ecx + movl 12(%edi),%edx + movl 16(%edi),%edi + movl %ebx,%esi + movdqu -64(%ebp),%xmm0 + movdqu -48(%ebp),%xmm1 + movdqu -32(%ebp),%xmm2 + movdqu -16(%ebp),%xmm3 +.byte 102,15,56,0,198 +.byte 102,15,56,0,206 +.byte 102,15,56,0,214 + movdqa %xmm7,96(%esp) +.byte 102,15,56,0,222 + paddd %xmm7,%xmm0 + paddd %xmm7,%xmm1 + paddd %xmm7,%xmm2 + movdqa %xmm0,(%esp) + psubd %xmm7,%xmm0 + movdqa %xmm1,16(%esp) + psubd %xmm7,%xmm1 + movdqa %xmm2,32(%esp) + movl %ecx,%ebp + psubd %xmm7,%xmm2 + xorl %edx,%ebp + pshufd $238,%xmm0,%xmm4 + andl %ebp,%esi + jmp .L006loop +.align 16 +.L006loop: + rorl $2,%ebx + xorl %edx,%esi + movl %eax,%ebp + punpcklqdq %xmm1,%xmm4 + movdqa %xmm3,%xmm6 + addl (%esp),%edi + xorl %ecx,%ebx + paddd %xmm3,%xmm7 + movdqa %xmm0,64(%esp) + roll $5,%eax + addl %esi,%edi + psrldq $4,%xmm6 + andl %ebx,%ebp + xorl %ecx,%ebx + pxor %xmm0,%xmm4 + addl %eax,%edi + rorl $7,%eax + pxor %xmm2,%xmm6 + xorl %ecx,%ebp + movl %edi,%esi + addl 4(%esp),%edx + pxor %xmm6,%xmm4 + xorl %ebx,%eax + roll $5,%edi + movdqa %xmm7,48(%esp) + addl %ebp,%edx + andl %eax,%esi + movdqa %xmm4,%xmm0 + xorl %ebx,%eax + addl %edi,%edx + rorl $7,%edi + movdqa %xmm4,%xmm6 + xorl %ebx,%esi + pslldq $12,%xmm0 + paddd %xmm4,%xmm4 + movl %edx,%ebp + addl 8(%esp),%ecx + psrld $31,%xmm6 + xorl %eax,%edi + roll $5,%edx + movdqa %xmm0,%xmm7 + addl %esi,%ecx + andl %edi,%ebp + xorl %eax,%edi + psrld $30,%xmm0 + addl %edx,%ecx + rorl $7,%edx + por %xmm6,%xmm4 + xorl %eax,%ebp + movl %ecx,%esi + addl 12(%esp),%ebx + pslld $2,%xmm7 + xorl %edi,%edx + roll $5,%ecx + pxor %xmm0,%xmm4 + movdqa 96(%esp),%xmm0 + addl %ebp,%ebx + andl %edx,%esi + pxor %xmm7,%xmm4 + pshufd $238,%xmm1,%xmm5 + xorl %edi,%edx + addl %ecx,%ebx + rorl $7,%ecx + xorl %edi,%esi + movl %ebx,%ebp + punpcklqdq %xmm2,%xmm5 + movdqa %xmm4,%xmm7 + addl 16(%esp),%eax + xorl %edx,%ecx + paddd %xmm4,%xmm0 + movdqa %xmm1,80(%esp) + roll $5,%ebx + addl %esi,%eax + psrldq $4,%xmm7 + andl %ecx,%ebp + xorl %edx,%ecx + pxor %xmm1,%xmm5 + addl %ebx,%eax + rorl $7,%ebx + pxor %xmm3,%xmm7 + xorl %edx,%ebp + movl %eax,%esi + addl 20(%esp),%edi + pxor %xmm7,%xmm5 + xorl %ecx,%ebx + roll $5,%eax + movdqa %xmm0,(%esp) + addl %ebp,%edi + andl %ebx,%esi + movdqa %xmm5,%xmm1 + xorl %ecx,%ebx + addl %eax,%edi + rorl $7,%eax + movdqa %xmm5,%xmm7 + xorl %ecx,%esi + pslldq $12,%xmm1 + paddd %xmm5,%xmm5 + movl %edi,%ebp + addl 24(%esp),%edx + psrld $31,%xmm7 + xorl %ebx,%eax + roll $5,%edi + movdqa %xmm1,%xmm0 + addl %esi,%edx + andl %eax,%ebp + xorl %ebx,%eax + psrld $30,%xmm1 + addl %edi,%edx + rorl $7,%edi + por %xmm7,%xmm5 + xorl %ebx,%ebp + movl %edx,%esi + addl 28(%esp),%ecx + pslld $2,%xmm0 + xorl %eax,%edi + roll $5,%edx + pxor %xmm1,%xmm5 + movdqa 112(%esp),%xmm1 + addl %ebp,%ecx + andl %edi,%esi + pxor %xmm0,%xmm5 + pshufd $238,%xmm2,%xmm6 + xorl %eax,%edi + addl %edx,%ecx + rorl $7,%edx + xorl %eax,%esi + movl %ecx,%ebp + punpcklqdq %xmm3,%xmm6 + movdqa %xmm5,%xmm0 + addl 32(%esp),%ebx + xorl %edi,%edx + paddd %xmm5,%xmm1 + movdqa %xmm2,96(%esp) + roll $5,%ecx + addl %esi,%ebx + psrldq $4,%xmm0 + andl %edx,%ebp + xorl %edi,%edx + pxor %xmm2,%xmm6 + addl %ecx,%ebx + rorl $7,%ecx + pxor %xmm4,%xmm0 + xorl %edi,%ebp + movl %ebx,%esi + addl 36(%esp),%eax + pxor %xmm0,%xmm6 + xorl %edx,%ecx + roll $5,%ebx + movdqa %xmm1,16(%esp) + addl %ebp,%eax + andl %ecx,%esi + movdqa %xmm6,%xmm2 + xorl %edx,%ecx + addl %ebx,%eax + rorl $7,%ebx + movdqa %xmm6,%xmm0 + xorl %edx,%esi + pslldq $12,%xmm2 + paddd %xmm6,%xmm6 + movl %eax,%ebp + addl 40(%esp),%edi + psrld $31,%xmm0 + xorl %ecx,%ebx + roll $5,%eax + movdqa %xmm2,%xmm1 + addl %esi,%edi + andl %ebx,%ebp + xorl %ecx,%ebx + psrld $30,%xmm2 + addl %eax,%edi + rorl $7,%eax + por %xmm0,%xmm6 + xorl %ecx,%ebp + movdqa 64(%esp),%xmm0 + movl %edi,%esi + addl 44(%esp),%edx + pslld $2,%xmm1 + xorl %ebx,%eax + roll $5,%edi + pxor %xmm2,%xmm6 + movdqa 112(%esp),%xmm2 + addl %ebp,%edx + andl %eax,%esi + pxor %xmm1,%xmm6 + pshufd $238,%xmm3,%xmm7 + xorl %ebx,%eax + addl %edi,%edx + rorl $7,%edi + xorl %ebx,%esi + movl %edx,%ebp + punpcklqdq %xmm4,%xmm7 + movdqa %xmm6,%xmm1 + addl 48(%esp),%ecx + xorl %eax,%edi + paddd %xmm6,%xmm2 + movdqa %xmm3,64(%esp) + roll $5,%edx + addl %esi,%ecx + psrldq $4,%xmm1 + andl %edi,%ebp + xorl %eax,%edi + pxor %xmm3,%xmm7 + addl %edx,%ecx + rorl $7,%edx + pxor %xmm5,%xmm1 + xorl %eax,%ebp + movl %ecx,%esi + addl 52(%esp),%ebx + pxor %xmm1,%xmm7 + xorl %edi,%edx + roll $5,%ecx + movdqa %xmm2,32(%esp) + addl %ebp,%ebx + andl %edx,%esi + movdqa %xmm7,%xmm3 + xorl %edi,%edx + addl %ecx,%ebx + rorl $7,%ecx + movdqa %xmm7,%xmm1 + xorl %edi,%esi + pslldq $12,%xmm3 + paddd %xmm7,%xmm7 + movl %ebx,%ebp + addl 56(%esp),%eax + psrld $31,%xmm1 + xorl %edx,%ecx + roll $5,%ebx + movdqa %xmm3,%xmm2 + addl %esi,%eax + andl %ecx,%ebp + xorl %edx,%ecx + psrld $30,%xmm3 + addl %ebx,%eax + rorl $7,%ebx + por %xmm1,%xmm7 + xorl %edx,%ebp + movdqa 80(%esp),%xmm1 + movl %eax,%esi + addl 60(%esp),%edi + pslld $2,%xmm2 + xorl %ecx,%ebx + roll $5,%eax + pxor %xmm3,%xmm7 + movdqa 112(%esp),%xmm3 + addl %ebp,%edi + andl %ebx,%esi + pxor %xmm2,%xmm7 + pshufd $238,%xmm6,%xmm2 + xorl %ecx,%ebx + addl %eax,%edi + rorl $7,%eax + pxor %xmm4,%xmm0 + punpcklqdq %xmm7,%xmm2 + xorl %ecx,%esi + movl %edi,%ebp + addl (%esp),%edx + pxor %xmm1,%xmm0 + movdqa %xmm4,80(%esp) + xorl %ebx,%eax + roll $5,%edi + movdqa %xmm3,%xmm4 + addl %esi,%edx + paddd %xmm7,%xmm3 + andl %eax,%ebp + pxor %xmm2,%xmm0 + xorl %ebx,%eax + addl %edi,%edx + rorl $7,%edi + xorl %ebx,%ebp + movdqa %xmm0,%xmm2 + movdqa %xmm3,48(%esp) + movl %edx,%esi + addl 4(%esp),%ecx + xorl %eax,%edi + roll $5,%edx + pslld $2,%xmm0 + addl %ebp,%ecx + andl %edi,%esi + psrld $30,%xmm2 + xorl %eax,%edi + addl %edx,%ecx + rorl $7,%edx + xorl %eax,%esi + movl %ecx,%ebp + addl 8(%esp),%ebx + xorl %edi,%edx + roll $5,%ecx + por %xmm2,%xmm0 + addl %esi,%ebx + andl %edx,%ebp + movdqa 96(%esp),%xmm2 + xorl %edi,%edx + addl %ecx,%ebx + addl 12(%esp),%eax + xorl %edi,%ebp + movl %ebx,%esi + pshufd $238,%xmm7,%xmm3 + roll $5,%ebx + addl %ebp,%eax + xorl %edx,%esi + rorl $7,%ecx + addl %ebx,%eax + addl 16(%esp),%edi + pxor %xmm5,%xmm1 + punpcklqdq %xmm0,%xmm3 + xorl %ecx,%esi + movl %eax,%ebp + roll $5,%eax + pxor %xmm2,%xmm1 + movdqa %xmm5,96(%esp) + addl %esi,%edi + xorl %ecx,%ebp + movdqa %xmm4,%xmm5 + rorl $7,%ebx + paddd %xmm0,%xmm4 + addl %eax,%edi + pxor %xmm3,%xmm1 + addl 20(%esp),%edx + xorl %ebx,%ebp + movl %edi,%esi + roll $5,%edi + movdqa %xmm1,%xmm3 + movdqa %xmm4,(%esp) + addl %ebp,%edx + xorl %ebx,%esi + rorl $7,%eax + addl %edi,%edx + pslld $2,%xmm1 + addl 24(%esp),%ecx + xorl %eax,%esi + psrld $30,%xmm3 + movl %edx,%ebp + roll $5,%edx + addl %esi,%ecx + xorl %eax,%ebp + rorl $7,%edi + addl %edx,%ecx + por %xmm3,%xmm1 + addl 28(%esp),%ebx + xorl %edi,%ebp + movdqa 64(%esp),%xmm3 + movl %ecx,%esi + roll $5,%ecx + addl %ebp,%ebx + xorl %edi,%esi + rorl $7,%edx + pshufd $238,%xmm0,%xmm4 + addl %ecx,%ebx + addl 32(%esp),%eax + pxor %xmm6,%xmm2 + punpcklqdq %xmm1,%xmm4 + xorl %edx,%esi + movl %ebx,%ebp + roll $5,%ebx + pxor %xmm3,%xmm2 + movdqa %xmm6,64(%esp) + addl %esi,%eax + xorl %edx,%ebp + movdqa 128(%esp),%xmm6 + rorl $7,%ecx + paddd %xmm1,%xmm5 + addl %ebx,%eax + pxor %xmm4,%xmm2 + addl 36(%esp),%edi + xorl %ecx,%ebp + movl %eax,%esi + roll $5,%eax + movdqa %xmm2,%xmm4 + movdqa %xmm5,16(%esp) + addl %ebp,%edi + xorl %ecx,%esi + rorl $7,%ebx + addl %eax,%edi + pslld $2,%xmm2 + addl 40(%esp),%edx + xorl %ebx,%esi + psrld $30,%xmm4 + movl %edi,%ebp + roll $5,%edi + addl %esi,%edx + xorl %ebx,%ebp + rorl $7,%eax + addl %edi,%edx + por %xmm4,%xmm2 + addl 44(%esp),%ecx + xorl %eax,%ebp + movdqa 80(%esp),%xmm4 + movl %edx,%esi + roll $5,%edx + addl %ebp,%ecx + xorl %eax,%esi + rorl $7,%edi + pshufd $238,%xmm1,%xmm5 + addl %edx,%ecx + addl 48(%esp),%ebx + pxor %xmm7,%xmm3 + punpcklqdq %xmm2,%xmm5 + xorl %edi,%esi + movl %ecx,%ebp + roll $5,%ecx + pxor %xmm4,%xmm3 + movdqa %xmm7,80(%esp) + addl %esi,%ebx + xorl %edi,%ebp + movdqa %xmm6,%xmm7 + rorl $7,%edx + paddd %xmm2,%xmm6 + addl %ecx,%ebx + pxor %xmm5,%xmm3 + addl 52(%esp),%eax + xorl %edx,%ebp + movl %ebx,%esi + roll $5,%ebx + movdqa %xmm3,%xmm5 + movdqa %xmm6,32(%esp) + addl %ebp,%eax + xorl %edx,%esi + rorl $7,%ecx + addl %ebx,%eax + pslld $2,%xmm3 + addl 56(%esp),%edi + xorl %ecx,%esi + psrld $30,%xmm5 + movl %eax,%ebp + roll $5,%eax + addl %esi,%edi + xorl %ecx,%ebp + rorl $7,%ebx + addl %eax,%edi + por %xmm5,%xmm3 + addl 60(%esp),%edx + xorl %ebx,%ebp + movdqa 96(%esp),%xmm5 + movl %edi,%esi + roll $5,%edi + addl %ebp,%edx + xorl %ebx,%esi + rorl $7,%eax + pshufd $238,%xmm2,%xmm6 + addl %edi,%edx + addl (%esp),%ecx + pxor %xmm0,%xmm4 + punpcklqdq %xmm3,%xmm6 + xorl %eax,%esi + movl %edx,%ebp + roll $5,%edx + pxor %xmm5,%xmm4 + movdqa %xmm0,96(%esp) + addl %esi,%ecx + xorl %eax,%ebp + movdqa %xmm7,%xmm0 + rorl $7,%edi + paddd %xmm3,%xmm7 + addl %edx,%ecx + pxor %xmm6,%xmm4 + addl 4(%esp),%ebx + xorl %edi,%ebp + movl %ecx,%esi + roll $5,%ecx + movdqa %xmm4,%xmm6 + movdqa %xmm7,48(%esp) + addl %ebp,%ebx + xorl %edi,%esi + rorl $7,%edx + addl %ecx,%ebx + pslld $2,%xmm4 + addl 8(%esp),%eax + xorl %edx,%esi + psrld $30,%xmm6 + movl %ebx,%ebp + roll $5,%ebx + addl %esi,%eax + xorl %edx,%ebp + rorl $7,%ecx + addl %ebx,%eax + por %xmm6,%xmm4 + addl 12(%esp),%edi + xorl %ecx,%ebp + movdqa 64(%esp),%xmm6 + movl %eax,%esi + roll $5,%eax + addl %ebp,%edi + xorl %ecx,%esi + rorl $7,%ebx + pshufd $238,%xmm3,%xmm7 + addl %eax,%edi + addl 16(%esp),%edx + pxor %xmm1,%xmm5 + punpcklqdq %xmm4,%xmm7 + xorl %ebx,%esi + movl %edi,%ebp + roll $5,%edi + pxor %xmm6,%xmm5 + movdqa %xmm1,64(%esp) + addl %esi,%edx + xorl %ebx,%ebp + movdqa %xmm0,%xmm1 + rorl $7,%eax + paddd %xmm4,%xmm0 + addl %edi,%edx + pxor %xmm7,%xmm5 + addl 20(%esp),%ecx + xorl %eax,%ebp + movl %edx,%esi + roll $5,%edx + movdqa %xmm5,%xmm7 + movdqa %xmm0,(%esp) + addl %ebp,%ecx + xorl %eax,%esi + rorl $7,%edi + addl %edx,%ecx + pslld $2,%xmm5 + addl 24(%esp),%ebx + xorl %edi,%esi + psrld $30,%xmm7 + movl %ecx,%ebp + roll $5,%ecx + addl %esi,%ebx + xorl %edi,%ebp + rorl $7,%edx + addl %ecx,%ebx + por %xmm7,%xmm5 + addl 28(%esp),%eax + movdqa 80(%esp),%xmm7 + rorl $7,%ecx + movl %ebx,%esi + xorl %edx,%ebp + roll $5,%ebx + pshufd $238,%xmm4,%xmm0 + addl %ebp,%eax + xorl %ecx,%esi + xorl %edx,%ecx + addl %ebx,%eax + addl 32(%esp),%edi + pxor %xmm2,%xmm6 + punpcklqdq %xmm5,%xmm0 + andl %ecx,%esi + xorl %edx,%ecx + rorl $7,%ebx + pxor %xmm7,%xmm6 + movdqa %xmm2,80(%esp) + movl %eax,%ebp + xorl %ecx,%esi + roll $5,%eax + movdqa %xmm1,%xmm2 + addl %esi,%edi + paddd %xmm5,%xmm1 + xorl %ebx,%ebp + pxor %xmm0,%xmm6 + xorl %ecx,%ebx + addl %eax,%edi + addl 36(%esp),%edx + andl %ebx,%ebp + movdqa %xmm6,%xmm0 + movdqa %xmm1,16(%esp) + xorl %ecx,%ebx + rorl $7,%eax + movl %edi,%esi + xorl %ebx,%ebp + roll $5,%edi + pslld $2,%xmm6 + addl %ebp,%edx + xorl %eax,%esi + psrld $30,%xmm0 + xorl %ebx,%eax + addl %edi,%edx + addl 40(%esp),%ecx + andl %eax,%esi + xorl %ebx,%eax + rorl $7,%edi + por %xmm0,%xmm6 + movl %edx,%ebp + xorl %eax,%esi + movdqa 96(%esp),%xmm0 + roll $5,%edx + addl %esi,%ecx + xorl %edi,%ebp + xorl %eax,%edi + addl %edx,%ecx + pshufd $238,%xmm5,%xmm1 + addl 44(%esp),%ebx + andl %edi,%ebp + xorl %eax,%edi + rorl $7,%edx + movl %ecx,%esi + xorl %edi,%ebp + roll $5,%ecx + addl %ebp,%ebx + xorl %edx,%esi + xorl %edi,%edx + addl %ecx,%ebx + addl 48(%esp),%eax + pxor %xmm3,%xmm7 + punpcklqdq %xmm6,%xmm1 + andl %edx,%esi + xorl %edi,%edx + rorl $7,%ecx + pxor %xmm0,%xmm7 + movdqa %xmm3,96(%esp) + movl %ebx,%ebp + xorl %edx,%esi + roll $5,%ebx + movdqa 144(%esp),%xmm3 + addl %esi,%eax + paddd %xmm6,%xmm2 + xorl %ecx,%ebp + pxor %xmm1,%xmm7 + xorl %edx,%ecx + addl %ebx,%eax + addl 52(%esp),%edi + andl %ecx,%ebp + movdqa %xmm7,%xmm1 + movdqa %xmm2,32(%esp) + xorl %edx,%ecx + rorl $7,%ebx + movl %eax,%esi + xorl %ecx,%ebp + roll $5,%eax + pslld $2,%xmm7 + addl %ebp,%edi + xorl %ebx,%esi + psrld $30,%xmm1 + xorl %ecx,%ebx + addl %eax,%edi + addl 56(%esp),%edx + andl %ebx,%esi + xorl %ecx,%ebx + rorl $7,%eax + por %xmm1,%xmm7 + movl %edi,%ebp + xorl %ebx,%esi + movdqa 64(%esp),%xmm1 + roll $5,%edi + addl %esi,%edx + xorl %eax,%ebp + xorl %ebx,%eax + addl %edi,%edx + pshufd $238,%xmm6,%xmm2 + addl 60(%esp),%ecx + andl %eax,%ebp + xorl %ebx,%eax + rorl $7,%edi + movl %edx,%esi + xorl %eax,%ebp + roll $5,%edx + addl %ebp,%ecx + xorl %edi,%esi + xorl %eax,%edi + addl %edx,%ecx + addl (%esp),%ebx + pxor %xmm4,%xmm0 + punpcklqdq %xmm7,%xmm2 + andl %edi,%esi + xorl %eax,%edi + rorl $7,%edx + pxor %xmm1,%xmm0 + movdqa %xmm4,64(%esp) + movl %ecx,%ebp + xorl %edi,%esi + roll $5,%ecx + movdqa %xmm3,%xmm4 + addl %esi,%ebx + paddd %xmm7,%xmm3 + xorl %edx,%ebp + pxor %xmm2,%xmm0 + xorl %edi,%edx + addl %ecx,%ebx + addl 4(%esp),%eax + andl %edx,%ebp + movdqa %xmm0,%xmm2 + movdqa %xmm3,48(%esp) + xorl %edi,%edx + rorl $7,%ecx + movl %ebx,%esi + xorl %edx,%ebp + roll $5,%ebx + pslld $2,%xmm0 + addl %ebp,%eax + xorl %ecx,%esi + psrld $30,%xmm2 + xorl %edx,%ecx + addl %ebx,%eax + addl 8(%esp),%edi + andl %ecx,%esi + xorl %edx,%ecx + rorl $7,%ebx + por %xmm2,%xmm0 + movl %eax,%ebp + xorl %ecx,%esi + movdqa 80(%esp),%xmm2 + roll $5,%eax + addl %esi,%edi + xorl %ebx,%ebp + xorl %ecx,%ebx + addl %eax,%edi + pshufd $238,%xmm7,%xmm3 + addl 12(%esp),%edx + andl %ebx,%ebp + xorl %ecx,%ebx + rorl $7,%eax + movl %edi,%esi + xorl %ebx,%ebp + roll $5,%edi + addl %ebp,%edx + xorl %eax,%esi + xorl %ebx,%eax + addl %edi,%edx + addl 16(%esp),%ecx + pxor %xmm5,%xmm1 + punpcklqdq %xmm0,%xmm3 + andl %eax,%esi + xorl %ebx,%eax + rorl $7,%edi + pxor %xmm2,%xmm1 + movdqa %xmm5,80(%esp) + movl %edx,%ebp + xorl %eax,%esi + roll $5,%edx + movdqa %xmm4,%xmm5 + addl %esi,%ecx + paddd %xmm0,%xmm4 + xorl %edi,%ebp + pxor %xmm3,%xmm1 + xorl %eax,%edi + addl %edx,%ecx + addl 20(%esp),%ebx + andl %edi,%ebp + movdqa %xmm1,%xmm3 + movdqa %xmm4,(%esp) + xorl %eax,%edi + rorl $7,%edx + movl %ecx,%esi + xorl %edi,%ebp + roll $5,%ecx + pslld $2,%xmm1 + addl %ebp,%ebx + xorl %edx,%esi + psrld $30,%xmm3 + xorl %edi,%edx + addl %ecx,%ebx + addl 24(%esp),%eax + andl %edx,%esi + xorl %edi,%edx + rorl $7,%ecx + por %xmm3,%xmm1 + movl %ebx,%ebp + xorl %edx,%esi + movdqa 96(%esp),%xmm3 + roll $5,%ebx + addl %esi,%eax + xorl %ecx,%ebp + xorl %edx,%ecx + addl %ebx,%eax + pshufd $238,%xmm0,%xmm4 + addl 28(%esp),%edi + andl %ecx,%ebp + xorl %edx,%ecx + rorl $7,%ebx + movl %eax,%esi + xorl %ecx,%ebp + roll $5,%eax + addl %ebp,%edi + xorl %ebx,%esi + xorl %ecx,%ebx + addl %eax,%edi + addl 32(%esp),%edx + pxor %xmm6,%xmm2 + punpcklqdq %xmm1,%xmm4 + andl %ebx,%esi + xorl %ecx,%ebx + rorl $7,%eax + pxor %xmm3,%xmm2 + movdqa %xmm6,96(%esp) + movl %edi,%ebp + xorl %ebx,%esi + roll $5,%edi + movdqa %xmm5,%xmm6 + addl %esi,%edx + paddd %xmm1,%xmm5 + xorl %eax,%ebp + pxor %xmm4,%xmm2 + xorl %ebx,%eax + addl %edi,%edx + addl 36(%esp),%ecx + andl %eax,%ebp + movdqa %xmm2,%xmm4 + movdqa %xmm5,16(%esp) + xorl %ebx,%eax + rorl $7,%edi + movl %edx,%esi + xorl %eax,%ebp + roll $5,%edx + pslld $2,%xmm2 + addl %ebp,%ecx + xorl %edi,%esi + psrld $30,%xmm4 + xorl %eax,%edi + addl %edx,%ecx + addl 40(%esp),%ebx + andl %edi,%esi + xorl %eax,%edi + rorl $7,%edx + por %xmm4,%xmm2 + movl %ecx,%ebp + xorl %edi,%esi + movdqa 64(%esp),%xmm4 + roll $5,%ecx + addl %esi,%ebx + xorl %edx,%ebp + xorl %edi,%edx + addl %ecx,%ebx + pshufd $238,%xmm1,%xmm5 + addl 44(%esp),%eax + andl %edx,%ebp + xorl %edi,%edx + rorl $7,%ecx + movl %ebx,%esi + xorl %edx,%ebp + roll $5,%ebx + addl %ebp,%eax + xorl %edx,%esi + addl %ebx,%eax + addl 48(%esp),%edi + pxor %xmm7,%xmm3 + punpcklqdq %xmm2,%xmm5 + xorl %ecx,%esi + movl %eax,%ebp + roll $5,%eax + pxor %xmm4,%xmm3 + movdqa %xmm7,64(%esp) + addl %esi,%edi + xorl %ecx,%ebp + movdqa %xmm6,%xmm7 + rorl $7,%ebx + paddd %xmm2,%xmm6 + addl %eax,%edi + pxor %xmm5,%xmm3 + addl 52(%esp),%edx + xorl %ebx,%ebp + movl %edi,%esi + roll $5,%edi + movdqa %xmm3,%xmm5 + movdqa %xmm6,32(%esp) + addl %ebp,%edx + xorl %ebx,%esi + rorl $7,%eax + addl %edi,%edx + pslld $2,%xmm3 + addl 56(%esp),%ecx + xorl %eax,%esi + psrld $30,%xmm5 + movl %edx,%ebp + roll $5,%edx + addl %esi,%ecx + xorl %eax,%ebp + rorl $7,%edi + addl %edx,%ecx + por %xmm5,%xmm3 + addl 60(%esp),%ebx + xorl %edi,%ebp + movl %ecx,%esi + roll $5,%ecx + addl %ebp,%ebx + xorl %edi,%esi + rorl $7,%edx + addl %ecx,%ebx + addl (%esp),%eax + xorl %edx,%esi + movl %ebx,%ebp + roll $5,%ebx + addl %esi,%eax + xorl %edx,%ebp + rorl $7,%ecx + paddd %xmm3,%xmm7 + addl %ebx,%eax + addl 4(%esp),%edi + xorl %ecx,%ebp + movl %eax,%esi + movdqa %xmm7,48(%esp) + roll $5,%eax + addl %ebp,%edi + xorl %ecx,%esi + rorl $7,%ebx + addl %eax,%edi + addl 8(%esp),%edx + xorl %ebx,%esi + movl %edi,%ebp + roll $5,%edi + addl %esi,%edx + xorl %ebx,%ebp + rorl $7,%eax + addl %edi,%edx + addl 12(%esp),%ecx + xorl %eax,%ebp + movl %edx,%esi + roll $5,%edx + addl %ebp,%ecx + xorl %eax,%esi + rorl $7,%edi + addl %edx,%ecx + movl 196(%esp),%ebp + cmpl 200(%esp),%ebp + je .L007done + movdqa 160(%esp),%xmm7 + movdqa 176(%esp),%xmm6 + movdqu (%ebp),%xmm0 + movdqu 16(%ebp),%xmm1 + movdqu 32(%ebp),%xmm2 + movdqu 48(%ebp),%xmm3 + addl $64,%ebp +.byte 102,15,56,0,198 + movl %ebp,196(%esp) + movdqa %xmm7,96(%esp) + addl 16(%esp),%ebx + xorl %edi,%esi + movl %ecx,%ebp + roll $5,%ecx + addl %esi,%ebx + xorl %edi,%ebp + rorl $7,%edx +.byte 102,15,56,0,206 + addl %ecx,%ebx + addl 20(%esp),%eax + xorl %edx,%ebp + movl %ebx,%esi + paddd %xmm7,%xmm0 + roll $5,%ebx + addl %ebp,%eax + xorl %edx,%esi + rorl $7,%ecx + movdqa %xmm0,(%esp) + addl %ebx,%eax + addl 24(%esp),%edi + xorl %ecx,%esi + movl %eax,%ebp + psubd %xmm7,%xmm0 + roll $5,%eax + addl %esi,%edi + xorl %ecx,%ebp + rorl $7,%ebx + addl %eax,%edi + addl 28(%esp),%edx + xorl %ebx,%ebp + movl %edi,%esi + roll $5,%edi + addl %ebp,%edx + xorl %ebx,%esi + rorl $7,%eax + addl %edi,%edx + addl 32(%esp),%ecx + xorl %eax,%esi + movl %edx,%ebp + roll $5,%edx + addl %esi,%ecx + xorl %eax,%ebp + rorl $7,%edi +.byte 102,15,56,0,214 + addl %edx,%ecx + addl 36(%esp),%ebx + xorl %edi,%ebp + movl %ecx,%esi + paddd %xmm7,%xmm1 + roll $5,%ecx + addl %ebp,%ebx + xorl %edi,%esi + rorl $7,%edx + movdqa %xmm1,16(%esp) + addl %ecx,%ebx + addl 40(%esp),%eax + xorl %edx,%esi + movl %ebx,%ebp + psubd %xmm7,%xmm1 + roll $5,%ebx + addl %esi,%eax + xorl %edx,%ebp + rorl $7,%ecx + addl %ebx,%eax + addl 44(%esp),%edi + xorl %ecx,%ebp + movl %eax,%esi + roll $5,%eax + addl %ebp,%edi + xorl %ecx,%esi + rorl $7,%ebx + addl %eax,%edi + addl 48(%esp),%edx + xorl %ebx,%esi + movl %edi,%ebp + roll $5,%edi + addl %esi,%edx + xorl %ebx,%ebp + rorl $7,%eax +.byte 102,15,56,0,222 + addl %edi,%edx + addl 52(%esp),%ecx + xorl %eax,%ebp + movl %edx,%esi + paddd %xmm7,%xmm2 + roll $5,%edx + addl %ebp,%ecx + xorl %eax,%esi + rorl $7,%edi + movdqa %xmm2,32(%esp) + addl %edx,%ecx + addl 56(%esp),%ebx + xorl %edi,%esi + movl %ecx,%ebp + psubd %xmm7,%xmm2 + roll $5,%ecx + addl %esi,%ebx + xorl %edi,%ebp + rorl $7,%edx + addl %ecx,%ebx + addl 60(%esp),%eax + xorl %edx,%ebp + movl %ebx,%esi + roll $5,%ebx + addl %ebp,%eax + rorl $7,%ecx + addl %ebx,%eax + movl 192(%esp),%ebp + addl (%ebp),%eax + addl 4(%ebp),%esi + addl 8(%ebp),%ecx + movl %eax,(%ebp) + addl 12(%ebp),%edx + movl %esi,4(%ebp) + addl 16(%ebp),%edi + movl %ecx,8(%ebp) + movl %ecx,%ebx + movl %edx,12(%ebp) + xorl %edx,%ebx + movl %edi,16(%ebp) + movl %esi,%ebp + pshufd $238,%xmm0,%xmm4 + andl %ebx,%esi + movl %ebp,%ebx + jmp .L006loop +.align 16 +.L007done: + addl 16(%esp),%ebx + xorl %edi,%esi + movl %ecx,%ebp + roll $5,%ecx + addl %esi,%ebx + xorl %edi,%ebp + rorl $7,%edx + addl %ecx,%ebx + addl 20(%esp),%eax + xorl %edx,%ebp + movl %ebx,%esi + roll $5,%ebx + addl %ebp,%eax + xorl %edx,%esi + rorl $7,%ecx + addl %ebx,%eax + addl 24(%esp),%edi + xorl %ecx,%esi + movl %eax,%ebp + roll $5,%eax + addl %esi,%edi + xorl %ecx,%ebp + rorl $7,%ebx + addl %eax,%edi + addl 28(%esp),%edx + xorl %ebx,%ebp + movl %edi,%esi + roll $5,%edi + addl %ebp,%edx + xorl %ebx,%esi + rorl $7,%eax + addl %edi,%edx + addl 32(%esp),%ecx + xorl %eax,%esi + movl %edx,%ebp + roll $5,%edx + addl %esi,%ecx + xorl %eax,%ebp + rorl $7,%edi + addl %edx,%ecx + addl 36(%esp),%ebx + xorl %edi,%ebp + movl %ecx,%esi + roll $5,%ecx + addl %ebp,%ebx + xorl %edi,%esi + rorl $7,%edx + addl %ecx,%ebx + addl 40(%esp),%eax + xorl %edx,%esi + movl %ebx,%ebp + roll $5,%ebx + addl %esi,%eax + xorl %edx,%ebp + rorl $7,%ecx + addl %ebx,%eax + addl 44(%esp),%edi + xorl %ecx,%ebp + movl %eax,%esi + roll $5,%eax + addl %ebp,%edi + xorl %ecx,%esi + rorl $7,%ebx + addl %eax,%edi + addl 48(%esp),%edx + xorl %ebx,%esi + movl %edi,%ebp + roll $5,%edi + addl %esi,%edx + xorl %ebx,%ebp + rorl $7,%eax + addl %edi,%edx + addl 52(%esp),%ecx + xorl %eax,%ebp + movl %edx,%esi + roll $5,%edx + addl %ebp,%ecx + xorl %eax,%esi + rorl $7,%edi + addl %edx,%ecx + addl 56(%esp),%ebx + xorl %edi,%esi + movl %ecx,%ebp + roll $5,%ecx + addl %esi,%ebx + xorl %edi,%ebp + rorl $7,%edx + addl %ecx,%ebx + addl 60(%esp),%eax + xorl %edx,%ebp + movl %ebx,%esi + roll $5,%ebx + addl %ebp,%eax + rorl $7,%ecx + addl %ebx,%eax + movl 192(%esp),%ebp + addl (%ebp),%eax + movl 204(%esp),%esp + addl 4(%ebp),%esi + addl 8(%ebp),%ecx + movl %eax,(%ebp) + addl 12(%ebp),%edx + movl %esi,4(%ebp) + addl 16(%ebp),%edi + movl %ecx,8(%ebp) + movl %edx,12(%ebp) + movl %edi,16(%ebp) + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.size _sha1_block_data_order_ssse3,.-_sha1_block_data_order_ssse3 +.type _sha1_block_data_order_avx,@function +.align 16 +_sha1_block_data_order_avx: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + call .L008pic_point +.L008pic_point: + popl %ebp + leal .LK_XX_XX-.L008pic_point(%ebp),%ebp +.Lavx_shortcut: + vzeroall + vmovdqa (%ebp),%xmm7 + vmovdqa 16(%ebp),%xmm0 + vmovdqa 32(%ebp),%xmm1 + vmovdqa 48(%ebp),%xmm2 + vmovdqa 64(%ebp),%xmm6 + movl 20(%esp),%edi + movl 24(%esp),%ebp + movl 28(%esp),%edx + movl %esp,%esi + subl $208,%esp + andl $-64,%esp + vmovdqa %xmm0,112(%esp) + vmovdqa %xmm1,128(%esp) + vmovdqa %xmm2,144(%esp) + shll $6,%edx + vmovdqa %xmm7,160(%esp) + addl %ebp,%edx + vmovdqa %xmm6,176(%esp) + addl $64,%ebp + movl %edi,192(%esp) + movl %ebp,196(%esp) + movl %edx,200(%esp) + movl %esi,204(%esp) + movl (%edi),%eax + movl 4(%edi),%ebx + movl 8(%edi),%ecx + movl 12(%edi),%edx + movl 16(%edi),%edi + movl %ebx,%esi + vmovdqu -64(%ebp),%xmm0 + vmovdqu -48(%ebp),%xmm1 + vmovdqu -32(%ebp),%xmm2 + vmovdqu -16(%ebp),%xmm3 + vpshufb %xmm6,%xmm0,%xmm0 + vpshufb %xmm6,%xmm1,%xmm1 + vpshufb %xmm6,%xmm2,%xmm2 + vmovdqa %xmm7,96(%esp) + vpshufb %xmm6,%xmm3,%xmm3 + vpaddd %xmm7,%xmm0,%xmm4 + vpaddd %xmm7,%xmm1,%xmm5 + vpaddd %xmm7,%xmm2,%xmm6 + vmovdqa %xmm4,(%esp) + movl %ecx,%ebp + vmovdqa %xmm5,16(%esp) + xorl %edx,%ebp + vmovdqa %xmm6,32(%esp) + andl %ebp,%esi + jmp .L009loop +.align 16 +.L009loop: + shrdl $2,%ebx,%ebx + xorl %edx,%esi + vpalignr $8,%xmm0,%xmm1,%xmm4 + movl %eax,%ebp + addl (%esp),%edi + vpaddd %xmm3,%xmm7,%xmm7 + vmovdqa %xmm0,64(%esp) + xorl %ecx,%ebx + shldl $5,%eax,%eax + vpsrldq $4,%xmm3,%xmm6 + addl %esi,%edi + andl %ebx,%ebp + vpxor %xmm0,%xmm4,%xmm4 + xorl %ecx,%ebx + addl %eax,%edi + vpxor %xmm2,%xmm6,%xmm6 + shrdl $7,%eax,%eax + xorl %ecx,%ebp + vmovdqa %xmm7,48(%esp) + movl %edi,%esi + addl 4(%esp),%edx + vpxor %xmm6,%xmm4,%xmm4 + xorl %ebx,%eax + shldl $5,%edi,%edi + addl %ebp,%edx + andl %eax,%esi + vpsrld $31,%xmm4,%xmm6 + xorl %ebx,%eax + addl %edi,%edx + shrdl $7,%edi,%edi + xorl %ebx,%esi + vpslldq $12,%xmm4,%xmm0 + vpaddd %xmm4,%xmm4,%xmm4 + movl %edx,%ebp + addl 8(%esp),%ecx + xorl %eax,%edi + shldl $5,%edx,%edx + vpsrld $30,%xmm0,%xmm7 + vpor %xmm6,%xmm4,%xmm4 + addl %esi,%ecx + andl %edi,%ebp + xorl %eax,%edi + addl %edx,%ecx + vpslld $2,%xmm0,%xmm0 + shrdl $7,%edx,%edx + xorl %eax,%ebp + vpxor %xmm7,%xmm4,%xmm4 + movl %ecx,%esi + addl 12(%esp),%ebx + xorl %edi,%edx + shldl $5,%ecx,%ecx + vpxor %xmm0,%xmm4,%xmm4 + addl %ebp,%ebx + andl %edx,%esi + vmovdqa 96(%esp),%xmm0 + xorl %edi,%edx + addl %ecx,%ebx + shrdl $7,%ecx,%ecx + xorl %edi,%esi + vpalignr $8,%xmm1,%xmm2,%xmm5 + movl %ebx,%ebp + addl 16(%esp),%eax + vpaddd %xmm4,%xmm0,%xmm0 + vmovdqa %xmm1,80(%esp) + xorl %edx,%ecx + shldl $5,%ebx,%ebx + vpsrldq $4,%xmm4,%xmm7 + addl %esi,%eax + andl %ecx,%ebp + vpxor %xmm1,%xmm5,%xmm5 + xorl %edx,%ecx + addl %ebx,%eax + vpxor %xmm3,%xmm7,%xmm7 + shrdl $7,%ebx,%ebx + xorl %edx,%ebp + vmovdqa %xmm0,(%esp) + movl %eax,%esi + addl 20(%esp),%edi + vpxor %xmm7,%xmm5,%xmm5 + xorl %ecx,%ebx + shldl $5,%eax,%eax + addl %ebp,%edi + andl %ebx,%esi + vpsrld $31,%xmm5,%xmm7 + xorl %ecx,%ebx + addl %eax,%edi + shrdl $7,%eax,%eax + xorl %ecx,%esi + vpslldq $12,%xmm5,%xmm1 + vpaddd %xmm5,%xmm5,%xmm5 + movl %edi,%ebp + addl 24(%esp),%edx + xorl %ebx,%eax + shldl $5,%edi,%edi + vpsrld $30,%xmm1,%xmm0 + vpor %xmm7,%xmm5,%xmm5 + addl %esi,%edx + andl %eax,%ebp + xorl %ebx,%eax + addl %edi,%edx + vpslld $2,%xmm1,%xmm1 + shrdl $7,%edi,%edi + xorl %ebx,%ebp + vpxor %xmm0,%xmm5,%xmm5 + movl %edx,%esi + addl 28(%esp),%ecx + xorl %eax,%edi + shldl $5,%edx,%edx + vpxor %xmm1,%xmm5,%xmm5 + addl %ebp,%ecx + andl %edi,%esi + vmovdqa 112(%esp),%xmm1 + xorl %eax,%edi + addl %edx,%ecx + shrdl $7,%edx,%edx + xorl %eax,%esi + vpalignr $8,%xmm2,%xmm3,%xmm6 + movl %ecx,%ebp + addl 32(%esp),%ebx + vpaddd %xmm5,%xmm1,%xmm1 + vmovdqa %xmm2,96(%esp) + xorl %edi,%edx + shldl $5,%ecx,%ecx + vpsrldq $4,%xmm5,%xmm0 + addl %esi,%ebx + andl %edx,%ebp + vpxor %xmm2,%xmm6,%xmm6 + xorl %edi,%edx + addl %ecx,%ebx + vpxor %xmm4,%xmm0,%xmm0 + shrdl $7,%ecx,%ecx + xorl %edi,%ebp + vmovdqa %xmm1,16(%esp) + movl %ebx,%esi + addl 36(%esp),%eax + vpxor %xmm0,%xmm6,%xmm6 + xorl %edx,%ecx + shldl $5,%ebx,%ebx + addl %ebp,%eax + andl %ecx,%esi + vpsrld $31,%xmm6,%xmm0 + xorl %edx,%ecx + addl %ebx,%eax + shrdl $7,%ebx,%ebx + xorl %edx,%esi + vpslldq $12,%xmm6,%xmm2 + vpaddd %xmm6,%xmm6,%xmm6 + movl %eax,%ebp + addl 40(%esp),%edi + xorl %ecx,%ebx + shldl $5,%eax,%eax + vpsrld $30,%xmm2,%xmm1 + vpor %xmm0,%xmm6,%xmm6 + addl %esi,%edi + andl %ebx,%ebp + xorl %ecx,%ebx + addl %eax,%edi + vpslld $2,%xmm2,%xmm2 + vmovdqa 64(%esp),%xmm0 + shrdl $7,%eax,%eax + xorl %ecx,%ebp + vpxor %xmm1,%xmm6,%xmm6 + movl %edi,%esi + addl 44(%esp),%edx + xorl %ebx,%eax + shldl $5,%edi,%edi + vpxor %xmm2,%xmm6,%xmm6 + addl %ebp,%edx + andl %eax,%esi + vmovdqa 112(%esp),%xmm2 + xorl %ebx,%eax + addl %edi,%edx + shrdl $7,%edi,%edi + xorl %ebx,%esi + vpalignr $8,%xmm3,%xmm4,%xmm7 + movl %edx,%ebp + addl 48(%esp),%ecx + vpaddd %xmm6,%xmm2,%xmm2 + vmovdqa %xmm3,64(%esp) + xorl %eax,%edi + shldl $5,%edx,%edx + vpsrldq $4,%xmm6,%xmm1 + addl %esi,%ecx + andl %edi,%ebp + vpxor %xmm3,%xmm7,%xmm7 + xorl %eax,%edi + addl %edx,%ecx + vpxor %xmm5,%xmm1,%xmm1 + shrdl $7,%edx,%edx + xorl %eax,%ebp + vmovdqa %xmm2,32(%esp) + movl %ecx,%esi + addl 52(%esp),%ebx + vpxor %xmm1,%xmm7,%xmm7 + xorl %edi,%edx + shldl $5,%ecx,%ecx + addl %ebp,%ebx + andl %edx,%esi + vpsrld $31,%xmm7,%xmm1 + xorl %edi,%edx + addl %ecx,%ebx + shrdl $7,%ecx,%ecx + xorl %edi,%esi + vpslldq $12,%xmm7,%xmm3 + vpaddd %xmm7,%xmm7,%xmm7 + movl %ebx,%ebp + addl 56(%esp),%eax + xorl %edx,%ecx + shldl $5,%ebx,%ebx + vpsrld $30,%xmm3,%xmm2 + vpor %xmm1,%xmm7,%xmm7 + addl %esi,%eax + andl %ecx,%ebp + xorl %edx,%ecx + addl %ebx,%eax + vpslld $2,%xmm3,%xmm3 + vmovdqa 80(%esp),%xmm1 + shrdl $7,%ebx,%ebx + xorl %edx,%ebp + vpxor %xmm2,%xmm7,%xmm7 + movl %eax,%esi + addl 60(%esp),%edi + xorl %ecx,%ebx + shldl $5,%eax,%eax + vpxor %xmm3,%xmm7,%xmm7 + addl %ebp,%edi + andl %ebx,%esi + vmovdqa 112(%esp),%xmm3 + xorl %ecx,%ebx + addl %eax,%edi + vpalignr $8,%xmm6,%xmm7,%xmm2 + vpxor %xmm4,%xmm0,%xmm0 + shrdl $7,%eax,%eax + xorl %ecx,%esi + movl %edi,%ebp + addl (%esp),%edx + vpxor %xmm1,%xmm0,%xmm0 + vmovdqa %xmm4,80(%esp) + xorl %ebx,%eax + shldl $5,%edi,%edi + vmovdqa %xmm3,%xmm4 + vpaddd %xmm7,%xmm3,%xmm3 + addl %esi,%edx + andl %eax,%ebp + vpxor %xmm2,%xmm0,%xmm0 + xorl %ebx,%eax + addl %edi,%edx + shrdl $7,%edi,%edi + xorl %ebx,%ebp + vpsrld $30,%xmm0,%xmm2 + vmovdqa %xmm3,48(%esp) + movl %edx,%esi + addl 4(%esp),%ecx + xorl %eax,%edi + shldl $5,%edx,%edx + vpslld $2,%xmm0,%xmm0 + addl %ebp,%ecx + andl %edi,%esi + xorl %eax,%edi + addl %edx,%ecx + shrdl $7,%edx,%edx + xorl %eax,%esi + movl %ecx,%ebp + addl 8(%esp),%ebx + vpor %xmm2,%xmm0,%xmm0 + xorl %edi,%edx + shldl $5,%ecx,%ecx + vmovdqa 96(%esp),%xmm2 + addl %esi,%ebx + andl %edx,%ebp + xorl %edi,%edx + addl %ecx,%ebx + addl 12(%esp),%eax + xorl %edi,%ebp + movl %ebx,%esi + shldl $5,%ebx,%ebx + addl %ebp,%eax + xorl %edx,%esi + shrdl $7,%ecx,%ecx + addl %ebx,%eax + vpalignr $8,%xmm7,%xmm0,%xmm3 + vpxor %xmm5,%xmm1,%xmm1 + addl 16(%esp),%edi + xorl %ecx,%esi + movl %eax,%ebp + shldl $5,%eax,%eax + vpxor %xmm2,%xmm1,%xmm1 + vmovdqa %xmm5,96(%esp) + addl %esi,%edi + xorl %ecx,%ebp + vmovdqa %xmm4,%xmm5 + vpaddd %xmm0,%xmm4,%xmm4 + shrdl $7,%ebx,%ebx + addl %eax,%edi + vpxor %xmm3,%xmm1,%xmm1 + addl 20(%esp),%edx + xorl %ebx,%ebp + movl %edi,%esi + shldl $5,%edi,%edi + vpsrld $30,%xmm1,%xmm3 + vmovdqa %xmm4,(%esp) + addl %ebp,%edx + xorl %ebx,%esi + shrdl $7,%eax,%eax + addl %edi,%edx + vpslld $2,%xmm1,%xmm1 + addl 24(%esp),%ecx + xorl %eax,%esi + movl %edx,%ebp + shldl $5,%edx,%edx + addl %esi,%ecx + xorl %eax,%ebp + shrdl $7,%edi,%edi + addl %edx,%ecx + vpor %xmm3,%xmm1,%xmm1 + addl 28(%esp),%ebx + xorl %edi,%ebp + vmovdqa 64(%esp),%xmm3 + movl %ecx,%esi + shldl $5,%ecx,%ecx + addl %ebp,%ebx + xorl %edi,%esi + shrdl $7,%edx,%edx + addl %ecx,%ebx + vpalignr $8,%xmm0,%xmm1,%xmm4 + vpxor %xmm6,%xmm2,%xmm2 + addl 32(%esp),%eax + xorl %edx,%esi + movl %ebx,%ebp + shldl $5,%ebx,%ebx + vpxor %xmm3,%xmm2,%xmm2 + vmovdqa %xmm6,64(%esp) + addl %esi,%eax + xorl %edx,%ebp + vmovdqa 128(%esp),%xmm6 + vpaddd %xmm1,%xmm5,%xmm5 + shrdl $7,%ecx,%ecx + addl %ebx,%eax + vpxor %xmm4,%xmm2,%xmm2 + addl 36(%esp),%edi + xorl %ecx,%ebp + movl %eax,%esi + shldl $5,%eax,%eax + vpsrld $30,%xmm2,%xmm4 + vmovdqa %xmm5,16(%esp) + addl %ebp,%edi + xorl %ecx,%esi + shrdl $7,%ebx,%ebx + addl %eax,%edi + vpslld $2,%xmm2,%xmm2 + addl 40(%esp),%edx + xorl %ebx,%esi + movl %edi,%ebp + shldl $5,%edi,%edi + addl %esi,%edx + xorl %ebx,%ebp + shrdl $7,%eax,%eax + addl %edi,%edx + vpor %xmm4,%xmm2,%xmm2 + addl 44(%esp),%ecx + xorl %eax,%ebp + vmovdqa 80(%esp),%xmm4 + movl %edx,%esi + shldl $5,%edx,%edx + addl %ebp,%ecx + xorl %eax,%esi + shrdl $7,%edi,%edi + addl %edx,%ecx + vpalignr $8,%xmm1,%xmm2,%xmm5 + vpxor %xmm7,%xmm3,%xmm3 + addl 48(%esp),%ebx + xorl %edi,%esi + movl %ecx,%ebp + shldl $5,%ecx,%ecx + vpxor %xmm4,%xmm3,%xmm3 + vmovdqa %xmm7,80(%esp) + addl %esi,%ebx + xorl %edi,%ebp + vmovdqa %xmm6,%xmm7 + vpaddd %xmm2,%xmm6,%xmm6 + shrdl $7,%edx,%edx + addl %ecx,%ebx + vpxor %xmm5,%xmm3,%xmm3 + addl 52(%esp),%eax + xorl %edx,%ebp + movl %ebx,%esi + shldl $5,%ebx,%ebx + vpsrld $30,%xmm3,%xmm5 + vmovdqa %xmm6,32(%esp) + addl %ebp,%eax + xorl %edx,%esi + shrdl $7,%ecx,%ecx + addl %ebx,%eax + vpslld $2,%xmm3,%xmm3 + addl 56(%esp),%edi + xorl %ecx,%esi + movl %eax,%ebp + shldl $5,%eax,%eax + addl %esi,%edi + xorl %ecx,%ebp + shrdl $7,%ebx,%ebx + addl %eax,%edi + vpor %xmm5,%xmm3,%xmm3 + addl 60(%esp),%edx + xorl %ebx,%ebp + vmovdqa 96(%esp),%xmm5 + movl %edi,%esi + shldl $5,%edi,%edi + addl %ebp,%edx + xorl %ebx,%esi + shrdl $7,%eax,%eax + addl %edi,%edx + vpalignr $8,%xmm2,%xmm3,%xmm6 + vpxor %xmm0,%xmm4,%xmm4 + addl (%esp),%ecx + xorl %eax,%esi + movl %edx,%ebp + shldl $5,%edx,%edx + vpxor %xmm5,%xmm4,%xmm4 + vmovdqa %xmm0,96(%esp) + addl %esi,%ecx + xorl %eax,%ebp + vmovdqa %xmm7,%xmm0 + vpaddd %xmm3,%xmm7,%xmm7 + shrdl $7,%edi,%edi + addl %edx,%ecx + vpxor %xmm6,%xmm4,%xmm4 + addl 4(%esp),%ebx + xorl %edi,%ebp + movl %ecx,%esi + shldl $5,%ecx,%ecx + vpsrld $30,%xmm4,%xmm6 + vmovdqa %xmm7,48(%esp) + addl %ebp,%ebx + xorl %edi,%esi + shrdl $7,%edx,%edx + addl %ecx,%ebx + vpslld $2,%xmm4,%xmm4 + addl 8(%esp),%eax + xorl %edx,%esi + movl %ebx,%ebp + shldl $5,%ebx,%ebx + addl %esi,%eax + xorl %edx,%ebp + shrdl $7,%ecx,%ecx + addl %ebx,%eax + vpor %xmm6,%xmm4,%xmm4 + addl 12(%esp),%edi + xorl %ecx,%ebp + vmovdqa 64(%esp),%xmm6 + movl %eax,%esi + shldl $5,%eax,%eax + addl %ebp,%edi + xorl %ecx,%esi + shrdl $7,%ebx,%ebx + addl %eax,%edi + vpalignr $8,%xmm3,%xmm4,%xmm7 + vpxor %xmm1,%xmm5,%xmm5 + addl 16(%esp),%edx + xorl %ebx,%esi + movl %edi,%ebp + shldl $5,%edi,%edi + vpxor %xmm6,%xmm5,%xmm5 + vmovdqa %xmm1,64(%esp) + addl %esi,%edx + xorl %ebx,%ebp + vmovdqa %xmm0,%xmm1 + vpaddd %xmm4,%xmm0,%xmm0 + shrdl $7,%eax,%eax + addl %edi,%edx + vpxor %xmm7,%xmm5,%xmm5 + addl 20(%esp),%ecx + xorl %eax,%ebp + movl %edx,%esi + shldl $5,%edx,%edx + vpsrld $30,%xmm5,%xmm7 + vmovdqa %xmm0,(%esp) + addl %ebp,%ecx + xorl %eax,%esi + shrdl $7,%edi,%edi + addl %edx,%ecx + vpslld $2,%xmm5,%xmm5 + addl 24(%esp),%ebx + xorl %edi,%esi + movl %ecx,%ebp + shldl $5,%ecx,%ecx + addl %esi,%ebx + xorl %edi,%ebp + shrdl $7,%edx,%edx + addl %ecx,%ebx + vpor %xmm7,%xmm5,%xmm5 + addl 28(%esp),%eax + vmovdqa 80(%esp),%xmm7 + shrdl $7,%ecx,%ecx + movl %ebx,%esi + xorl %edx,%ebp + shldl $5,%ebx,%ebx + addl %ebp,%eax + xorl %ecx,%esi + xorl %edx,%ecx + addl %ebx,%eax + vpalignr $8,%xmm4,%xmm5,%xmm0 + vpxor %xmm2,%xmm6,%xmm6 + addl 32(%esp),%edi + andl %ecx,%esi + xorl %edx,%ecx + shrdl $7,%ebx,%ebx + vpxor %xmm7,%xmm6,%xmm6 + vmovdqa %xmm2,80(%esp) + movl %eax,%ebp + xorl %ecx,%esi + vmovdqa %xmm1,%xmm2 + vpaddd %xmm5,%xmm1,%xmm1 + shldl $5,%eax,%eax + addl %esi,%edi + vpxor %xmm0,%xmm6,%xmm6 + xorl %ebx,%ebp + xorl %ecx,%ebx + addl %eax,%edi + addl 36(%esp),%edx + vpsrld $30,%xmm6,%xmm0 + vmovdqa %xmm1,16(%esp) + andl %ebx,%ebp + xorl %ecx,%ebx + shrdl $7,%eax,%eax + movl %edi,%esi + vpslld $2,%xmm6,%xmm6 + xorl %ebx,%ebp + shldl $5,%edi,%edi + addl %ebp,%edx + xorl %eax,%esi + xorl %ebx,%eax + addl %edi,%edx + addl 40(%esp),%ecx + andl %eax,%esi + vpor %xmm0,%xmm6,%xmm6 + xorl %ebx,%eax + shrdl $7,%edi,%edi + vmovdqa 96(%esp),%xmm0 + movl %edx,%ebp + xorl %eax,%esi + shldl $5,%edx,%edx + addl %esi,%ecx + xorl %edi,%ebp + xorl %eax,%edi + addl %edx,%ecx + addl 44(%esp),%ebx + andl %edi,%ebp + xorl %eax,%edi + shrdl $7,%edx,%edx + movl %ecx,%esi + xorl %edi,%ebp + shldl $5,%ecx,%ecx + addl %ebp,%ebx + xorl %edx,%esi + xorl %edi,%edx + addl %ecx,%ebx + vpalignr $8,%xmm5,%xmm6,%xmm1 + vpxor %xmm3,%xmm7,%xmm7 + addl 48(%esp),%eax + andl %edx,%esi + xorl %edi,%edx + shrdl $7,%ecx,%ecx + vpxor %xmm0,%xmm7,%xmm7 + vmovdqa %xmm3,96(%esp) + movl %ebx,%ebp + xorl %edx,%esi + vmovdqa 144(%esp),%xmm3 + vpaddd %xmm6,%xmm2,%xmm2 + shldl $5,%ebx,%ebx + addl %esi,%eax + vpxor %xmm1,%xmm7,%xmm7 + xorl %ecx,%ebp + xorl %edx,%ecx + addl %ebx,%eax + addl 52(%esp),%edi + vpsrld $30,%xmm7,%xmm1 + vmovdqa %xmm2,32(%esp) + andl %ecx,%ebp + xorl %edx,%ecx + shrdl $7,%ebx,%ebx + movl %eax,%esi + vpslld $2,%xmm7,%xmm7 + xorl %ecx,%ebp + shldl $5,%eax,%eax + addl %ebp,%edi + xorl %ebx,%esi + xorl %ecx,%ebx + addl %eax,%edi + addl 56(%esp),%edx + andl %ebx,%esi + vpor %xmm1,%xmm7,%xmm7 + xorl %ecx,%ebx + shrdl $7,%eax,%eax + vmovdqa 64(%esp),%xmm1 + movl %edi,%ebp + xorl %ebx,%esi + shldl $5,%edi,%edi + addl %esi,%edx + xorl %eax,%ebp + xorl %ebx,%eax + addl %edi,%edx + addl 60(%esp),%ecx + andl %eax,%ebp + xorl %ebx,%eax + shrdl $7,%edi,%edi + movl %edx,%esi + xorl %eax,%ebp + shldl $5,%edx,%edx + addl %ebp,%ecx + xorl %edi,%esi + xorl %eax,%edi + addl %edx,%ecx + vpalignr $8,%xmm6,%xmm7,%xmm2 + vpxor %xmm4,%xmm0,%xmm0 + addl (%esp),%ebx + andl %edi,%esi + xorl %eax,%edi + shrdl $7,%edx,%edx + vpxor %xmm1,%xmm0,%xmm0 + vmovdqa %xmm4,64(%esp) + movl %ecx,%ebp + xorl %edi,%esi + vmovdqa %xmm3,%xmm4 + vpaddd %xmm7,%xmm3,%xmm3 + shldl $5,%ecx,%ecx + addl %esi,%ebx + vpxor %xmm2,%xmm0,%xmm0 + xorl %edx,%ebp + xorl %edi,%edx + addl %ecx,%ebx + addl 4(%esp),%eax + vpsrld $30,%xmm0,%xmm2 + vmovdqa %xmm3,48(%esp) + andl %edx,%ebp + xorl %edi,%edx + shrdl $7,%ecx,%ecx + movl %ebx,%esi + vpslld $2,%xmm0,%xmm0 + xorl %edx,%ebp + shldl $5,%ebx,%ebx + addl %ebp,%eax + xorl %ecx,%esi + xorl %edx,%ecx + addl %ebx,%eax + addl 8(%esp),%edi + andl %ecx,%esi + vpor %xmm2,%xmm0,%xmm0 + xorl %edx,%ecx + shrdl $7,%ebx,%ebx + vmovdqa 80(%esp),%xmm2 + movl %eax,%ebp + xorl %ecx,%esi + shldl $5,%eax,%eax + addl %esi,%edi + xorl %ebx,%ebp + xorl %ecx,%ebx + addl %eax,%edi + addl 12(%esp),%edx + andl %ebx,%ebp + xorl %ecx,%ebx + shrdl $7,%eax,%eax + movl %edi,%esi + xorl %ebx,%ebp + shldl $5,%edi,%edi + addl %ebp,%edx + xorl %eax,%esi + xorl %ebx,%eax + addl %edi,%edx + vpalignr $8,%xmm7,%xmm0,%xmm3 + vpxor %xmm5,%xmm1,%xmm1 + addl 16(%esp),%ecx + andl %eax,%esi + xorl %ebx,%eax + shrdl $7,%edi,%edi + vpxor %xmm2,%xmm1,%xmm1 + vmovdqa %xmm5,80(%esp) + movl %edx,%ebp + xorl %eax,%esi + vmovdqa %xmm4,%xmm5 + vpaddd %xmm0,%xmm4,%xmm4 + shldl $5,%edx,%edx + addl %esi,%ecx + vpxor %xmm3,%xmm1,%xmm1 + xorl %edi,%ebp + xorl %eax,%edi + addl %edx,%ecx + addl 20(%esp),%ebx + vpsrld $30,%xmm1,%xmm3 + vmovdqa %xmm4,(%esp) + andl %edi,%ebp + xorl %eax,%edi + shrdl $7,%edx,%edx + movl %ecx,%esi + vpslld $2,%xmm1,%xmm1 + xorl %edi,%ebp + shldl $5,%ecx,%ecx + addl %ebp,%ebx + xorl %edx,%esi + xorl %edi,%edx + addl %ecx,%ebx + addl 24(%esp),%eax + andl %edx,%esi + vpor %xmm3,%xmm1,%xmm1 + xorl %edi,%edx + shrdl $7,%ecx,%ecx + vmovdqa 96(%esp),%xmm3 + movl %ebx,%ebp + xorl %edx,%esi + shldl $5,%ebx,%ebx + addl %esi,%eax + xorl %ecx,%ebp + xorl %edx,%ecx + addl %ebx,%eax + addl 28(%esp),%edi + andl %ecx,%ebp + xorl %edx,%ecx + shrdl $7,%ebx,%ebx + movl %eax,%esi + xorl %ecx,%ebp + shldl $5,%eax,%eax + addl %ebp,%edi + xorl %ebx,%esi + xorl %ecx,%ebx + addl %eax,%edi + vpalignr $8,%xmm0,%xmm1,%xmm4 + vpxor %xmm6,%xmm2,%xmm2 + addl 32(%esp),%edx + andl %ebx,%esi + xorl %ecx,%ebx + shrdl $7,%eax,%eax + vpxor %xmm3,%xmm2,%xmm2 + vmovdqa %xmm6,96(%esp) + movl %edi,%ebp + xorl %ebx,%esi + vmovdqa %xmm5,%xmm6 + vpaddd %xmm1,%xmm5,%xmm5 + shldl $5,%edi,%edi + addl %esi,%edx + vpxor %xmm4,%xmm2,%xmm2 + xorl %eax,%ebp + xorl %ebx,%eax + addl %edi,%edx + addl 36(%esp),%ecx + vpsrld $30,%xmm2,%xmm4 + vmovdqa %xmm5,16(%esp) + andl %eax,%ebp + xorl %ebx,%eax + shrdl $7,%edi,%edi + movl %edx,%esi + vpslld $2,%xmm2,%xmm2 + xorl %eax,%ebp + shldl $5,%edx,%edx + addl %ebp,%ecx + xorl %edi,%esi + xorl %eax,%edi + addl %edx,%ecx + addl 40(%esp),%ebx + andl %edi,%esi + vpor %xmm4,%xmm2,%xmm2 + xorl %eax,%edi + shrdl $7,%edx,%edx + vmovdqa 64(%esp),%xmm4 + movl %ecx,%ebp + xorl %edi,%esi + shldl $5,%ecx,%ecx + addl %esi,%ebx + xorl %edx,%ebp + xorl %edi,%edx + addl %ecx,%ebx + addl 44(%esp),%eax + andl %edx,%ebp + xorl %edi,%edx + shrdl $7,%ecx,%ecx + movl %ebx,%esi + xorl %edx,%ebp + shldl $5,%ebx,%ebx + addl %ebp,%eax + xorl %edx,%esi + addl %ebx,%eax + vpalignr $8,%xmm1,%xmm2,%xmm5 + vpxor %xmm7,%xmm3,%xmm3 + addl 48(%esp),%edi + xorl %ecx,%esi + movl %eax,%ebp + shldl $5,%eax,%eax + vpxor %xmm4,%xmm3,%xmm3 + vmovdqa %xmm7,64(%esp) + addl %esi,%edi + xorl %ecx,%ebp + vmovdqa %xmm6,%xmm7 + vpaddd %xmm2,%xmm6,%xmm6 + shrdl $7,%ebx,%ebx + addl %eax,%edi + vpxor %xmm5,%xmm3,%xmm3 + addl 52(%esp),%edx + xorl %ebx,%ebp + movl %edi,%esi + shldl $5,%edi,%edi + vpsrld $30,%xmm3,%xmm5 + vmovdqa %xmm6,32(%esp) + addl %ebp,%edx + xorl %ebx,%esi + shrdl $7,%eax,%eax + addl %edi,%edx + vpslld $2,%xmm3,%xmm3 + addl 56(%esp),%ecx + xorl %eax,%esi + movl %edx,%ebp + shldl $5,%edx,%edx + addl %esi,%ecx + xorl %eax,%ebp + shrdl $7,%edi,%edi + addl %edx,%ecx + vpor %xmm5,%xmm3,%xmm3 + addl 60(%esp),%ebx + xorl %edi,%ebp + movl %ecx,%esi + shldl $5,%ecx,%ecx + addl %ebp,%ebx + xorl %edi,%esi + shrdl $7,%edx,%edx + addl %ecx,%ebx + addl (%esp),%eax + vpaddd %xmm3,%xmm7,%xmm7 + xorl %edx,%esi + movl %ebx,%ebp + shldl $5,%ebx,%ebx + addl %esi,%eax + vmovdqa %xmm7,48(%esp) + xorl %edx,%ebp + shrdl $7,%ecx,%ecx + addl %ebx,%eax + addl 4(%esp),%edi + xorl %ecx,%ebp + movl %eax,%esi + shldl $5,%eax,%eax + addl %ebp,%edi + xorl %ecx,%esi + shrdl $7,%ebx,%ebx + addl %eax,%edi + addl 8(%esp),%edx + xorl %ebx,%esi + movl %edi,%ebp + shldl $5,%edi,%edi + addl %esi,%edx + xorl %ebx,%ebp + shrdl $7,%eax,%eax + addl %edi,%edx + addl 12(%esp),%ecx + xorl %eax,%ebp + movl %edx,%esi + shldl $5,%edx,%edx + addl %ebp,%ecx + xorl %eax,%esi + shrdl $7,%edi,%edi + addl %edx,%ecx + movl 196(%esp),%ebp + cmpl 200(%esp),%ebp + je .L010done + vmovdqa 160(%esp),%xmm7 + vmovdqa 176(%esp),%xmm6 + vmovdqu (%ebp),%xmm0 + vmovdqu 16(%ebp),%xmm1 + vmovdqu 32(%ebp),%xmm2 + vmovdqu 48(%ebp),%xmm3 + addl $64,%ebp + vpshufb %xmm6,%xmm0,%xmm0 + movl %ebp,196(%esp) + vmovdqa %xmm7,96(%esp) + addl 16(%esp),%ebx + xorl %edi,%esi + vpshufb %xmm6,%xmm1,%xmm1 + movl %ecx,%ebp + shldl $5,%ecx,%ecx + vpaddd %xmm7,%xmm0,%xmm4 + addl %esi,%ebx + xorl %edi,%ebp + shrdl $7,%edx,%edx + addl %ecx,%ebx + vmovdqa %xmm4,(%esp) + addl 20(%esp),%eax + xorl %edx,%ebp + movl %ebx,%esi + shldl $5,%ebx,%ebx + addl %ebp,%eax + xorl %edx,%esi + shrdl $7,%ecx,%ecx + addl %ebx,%eax + addl 24(%esp),%edi + xorl %ecx,%esi + movl %eax,%ebp + shldl $5,%eax,%eax + addl %esi,%edi + xorl %ecx,%ebp + shrdl $7,%ebx,%ebx + addl %eax,%edi + addl 28(%esp),%edx + xorl %ebx,%ebp + movl %edi,%esi + shldl $5,%edi,%edi + addl %ebp,%edx + xorl %ebx,%esi + shrdl $7,%eax,%eax + addl %edi,%edx + addl 32(%esp),%ecx + xorl %eax,%esi + vpshufb %xmm6,%xmm2,%xmm2 + movl %edx,%ebp + shldl $5,%edx,%edx + vpaddd %xmm7,%xmm1,%xmm5 + addl %esi,%ecx + xorl %eax,%ebp + shrdl $7,%edi,%edi + addl %edx,%ecx + vmovdqa %xmm5,16(%esp) + addl 36(%esp),%ebx + xorl %edi,%ebp + movl %ecx,%esi + shldl $5,%ecx,%ecx + addl %ebp,%ebx + xorl %edi,%esi + shrdl $7,%edx,%edx + addl %ecx,%ebx + addl 40(%esp),%eax + xorl %edx,%esi + movl %ebx,%ebp + shldl $5,%ebx,%ebx + addl %esi,%eax + xorl %edx,%ebp + shrdl $7,%ecx,%ecx + addl %ebx,%eax + addl 44(%esp),%edi + xorl %ecx,%ebp + movl %eax,%esi + shldl $5,%eax,%eax + addl %ebp,%edi + xorl %ecx,%esi + shrdl $7,%ebx,%ebx + addl %eax,%edi + addl 48(%esp),%edx + xorl %ebx,%esi + vpshufb %xmm6,%xmm3,%xmm3 + movl %edi,%ebp + shldl $5,%edi,%edi + vpaddd %xmm7,%xmm2,%xmm6 + addl %esi,%edx + xorl %ebx,%ebp + shrdl $7,%eax,%eax + addl %edi,%edx + vmovdqa %xmm6,32(%esp) + addl 52(%esp),%ecx + xorl %eax,%ebp + movl %edx,%esi + shldl $5,%edx,%edx + addl %ebp,%ecx + xorl %eax,%esi + shrdl $7,%edi,%edi + addl %edx,%ecx + addl 56(%esp),%ebx + xorl %edi,%esi + movl %ecx,%ebp + shldl $5,%ecx,%ecx + addl %esi,%ebx + xorl %edi,%ebp + shrdl $7,%edx,%edx + addl %ecx,%ebx + addl 60(%esp),%eax + xorl %edx,%ebp + movl %ebx,%esi + shldl $5,%ebx,%ebx + addl %ebp,%eax + shrdl $7,%ecx,%ecx + addl %ebx,%eax + movl 192(%esp),%ebp + addl (%ebp),%eax + addl 4(%ebp),%esi + addl 8(%ebp),%ecx + movl %eax,(%ebp) + addl 12(%ebp),%edx + movl %esi,4(%ebp) + addl 16(%ebp),%edi + movl %ecx,%ebx + movl %ecx,8(%ebp) + xorl %edx,%ebx + movl %edx,12(%ebp) + movl %edi,16(%ebp) + movl %esi,%ebp + andl %ebx,%esi + movl %ebp,%ebx + jmp .L009loop +.align 16 +.L010done: + addl 16(%esp),%ebx + xorl %edi,%esi + movl %ecx,%ebp + shldl $5,%ecx,%ecx + addl %esi,%ebx + xorl %edi,%ebp + shrdl $7,%edx,%edx + addl %ecx,%ebx + addl 20(%esp),%eax + xorl %edx,%ebp + movl %ebx,%esi + shldl $5,%ebx,%ebx + addl %ebp,%eax + xorl %edx,%esi + shrdl $7,%ecx,%ecx + addl %ebx,%eax + addl 24(%esp),%edi + xorl %ecx,%esi + movl %eax,%ebp + shldl $5,%eax,%eax + addl %esi,%edi + xorl %ecx,%ebp + shrdl $7,%ebx,%ebx + addl %eax,%edi + addl 28(%esp),%edx + xorl %ebx,%ebp + movl %edi,%esi + shldl $5,%edi,%edi + addl %ebp,%edx + xorl %ebx,%esi + shrdl $7,%eax,%eax + addl %edi,%edx + addl 32(%esp),%ecx + xorl %eax,%esi + movl %edx,%ebp + shldl $5,%edx,%edx + addl %esi,%ecx + xorl %eax,%ebp + shrdl $7,%edi,%edi + addl %edx,%ecx + addl 36(%esp),%ebx + xorl %edi,%ebp + movl %ecx,%esi + shldl $5,%ecx,%ecx + addl %ebp,%ebx + xorl %edi,%esi + shrdl $7,%edx,%edx + addl %ecx,%ebx + addl 40(%esp),%eax + xorl %edx,%esi + movl %ebx,%ebp + shldl $5,%ebx,%ebx + addl %esi,%eax + xorl %edx,%ebp + shrdl $7,%ecx,%ecx + addl %ebx,%eax + addl 44(%esp),%edi + xorl %ecx,%ebp + movl %eax,%esi + shldl $5,%eax,%eax + addl %ebp,%edi + xorl %ecx,%esi + shrdl $7,%ebx,%ebx + addl %eax,%edi + addl 48(%esp),%edx + xorl %ebx,%esi + movl %edi,%ebp + shldl $5,%edi,%edi + addl %esi,%edx + xorl %ebx,%ebp + shrdl $7,%eax,%eax + addl %edi,%edx + addl 52(%esp),%ecx + xorl %eax,%ebp + movl %edx,%esi + shldl $5,%edx,%edx + addl %ebp,%ecx + xorl %eax,%esi + shrdl $7,%edi,%edi + addl %edx,%ecx + addl 56(%esp),%ebx + xorl %edi,%esi + movl %ecx,%ebp + shldl $5,%ecx,%ecx + addl %esi,%ebx + xorl %edi,%ebp + shrdl $7,%edx,%edx + addl %ecx,%ebx + addl 60(%esp),%eax + xorl %edx,%ebp + movl %ebx,%esi + shldl $5,%ebx,%ebx + addl %ebp,%eax + shrdl $7,%ecx,%ecx + addl %ebx,%eax + vzeroall + movl 192(%esp),%ebp + addl (%ebp),%eax + movl 204(%esp),%esp + addl 4(%ebp),%esi + addl 8(%ebp),%ecx + movl %eax,(%ebp) + addl 12(%ebp),%edx + movl %esi,4(%ebp) + addl 16(%ebp),%edi + movl %ecx,8(%ebp) + movl %edx,12(%ebp) + movl %edi,16(%ebp) + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.size _sha1_block_data_order_avx,.-_sha1_block_data_order_avx +.align 64 +.LK_XX_XX: +.long 1518500249,1518500249,1518500249,1518500249 +.long 1859775393,1859775393,1859775393,1859775393 +.long 2400959708,2400959708,2400959708,2400959708 +.long 3395469782,3395469782,3395469782,3395469782 +.long 66051,67438087,134810123,202182159 +.byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0 .byte 83,72,65,49,32,98,108,111,99,107,32,116,114,97,110,115 .byte 102,111,114,109,32,102,111,114,32,120,56,54,44,32,67,82 .byte 89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112 .byte 114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 +.comm OPENSSL_ia32cap_P,16,4 diff --git a/deps/openssl/asm/x86-elf-gas/sha/sha256-586.s b/deps/openssl/asm/x86-elf-gas/sha/sha256-586.s index 77a89514f1af2a..b434e42babe16e 100644 --- a/deps/openssl/asm/x86-elf-gas/sha/sha256-586.s +++ b/deps/openssl/asm/x86-elf-gas/sha/sha256-586.s @@ -25,195 +25,212 @@ sha256_block_data_order: movl %edi,4(%esp) movl %eax,8(%esp) movl %ebx,12(%esp) + leal OPENSSL_ia32cap_P,%edx + movl (%edx),%ecx + movl 4(%edx),%ebx + testl $1048576,%ecx + jnz .L002loop + movl 8(%edx),%edx + testl $16777216,%ecx + jz .L003no_xmm + andl $1073741824,%ecx + andl $268435968,%ebx + testl $536870912,%edx + jnz .L004shaext + orl %ebx,%ecx + andl $1342177280,%ecx + cmpl $1342177280,%ecx + je .L005AVX + testl $512,%ebx + jnz .L006SSSE3 +.L003no_xmm: + subl %edi,%eax + cmpl $256,%eax + jae .L007unrolled + jmp .L002loop .align 16 .L002loop: movl (%edi),%eax movl 4(%edi),%ebx movl 8(%edi),%ecx - movl 12(%edi),%edx bswap %eax + movl 12(%edi),%edx bswap %ebx - bswap %ecx - bswap %edx pushl %eax + bswap %ecx pushl %ebx + bswap %edx pushl %ecx pushl %edx movl 16(%edi),%eax movl 20(%edi),%ebx movl 24(%edi),%ecx - movl 28(%edi),%edx bswap %eax + movl 28(%edi),%edx bswap %ebx - bswap %ecx - bswap %edx pushl %eax + bswap %ecx pushl %ebx + bswap %edx pushl %ecx pushl %edx movl 32(%edi),%eax movl 36(%edi),%ebx movl 40(%edi),%ecx - movl 44(%edi),%edx bswap %eax + movl 44(%edi),%edx bswap %ebx - bswap %ecx - bswap %edx pushl %eax + bswap %ecx pushl %ebx + bswap %edx pushl %ecx pushl %edx movl 48(%edi),%eax movl 52(%edi),%ebx movl 56(%edi),%ecx - movl 60(%edi),%edx bswap %eax + movl 60(%edi),%edx bswap %ebx - bswap %ecx - bswap %edx pushl %eax + bswap %ecx pushl %ebx + bswap %edx pushl %ecx pushl %edx addl $64,%edi - subl $32,%esp - movl %edi,100(%esp) + leal -36(%esp),%esp + movl %edi,104(%esp) movl (%esi),%eax movl 4(%esi),%ebx movl 8(%esi),%ecx movl 12(%esi),%edi - movl %ebx,4(%esp) - movl %ecx,8(%esp) - movl %edi,12(%esp) + movl %ebx,8(%esp) + xorl %ecx,%ebx + movl %ecx,12(%esp) + movl %edi,16(%esp) + movl %ebx,(%esp) movl 16(%esi),%edx movl 20(%esi),%ebx movl 24(%esi),%ecx movl 28(%esi),%edi - movl %ebx,20(%esp) - movl %ecx,24(%esp) - movl %edi,28(%esp) + movl %ebx,24(%esp) + movl %ecx,28(%esp) + movl %edi,32(%esp) .align 16 -.L00300_15: - movl 92(%esp),%ebx +.L00800_15: movl %edx,%ecx + movl 24(%esp),%esi rorl $14,%ecx - movl 20(%esp),%esi - xorl %edx,%ecx - rorl $5,%ecx + movl 28(%esp),%edi xorl %edx,%ecx - rorl $6,%ecx - movl 24(%esp),%edi - addl %ecx,%ebx xorl %edi,%esi - movl %edx,16(%esp) - movl %eax,%ecx + movl 96(%esp),%ebx + rorl $5,%ecx andl %edx,%esi - movl 12(%esp),%edx + movl %edx,20(%esp) + xorl %ecx,%edx + addl 32(%esp),%ebx xorl %edi,%esi - movl %eax,%edi + rorl $6,%edx + movl %eax,%ecx addl %esi,%ebx rorl $9,%ecx - addl 28(%esp),%ebx + addl %edx,%ebx + movl 8(%esp),%edi xorl %eax,%ecx + movl %eax,4(%esp) + leal -4(%esp),%esp rorl $11,%ecx - movl 4(%esp),%esi + movl (%ebp),%esi xorl %eax,%ecx + movl 20(%esp),%edx + xorl %edi,%eax rorl $2,%ecx + addl %esi,%ebx + movl %eax,(%esp) addl %ebx,%edx - movl 8(%esp),%edi + andl 4(%esp),%eax addl %ecx,%ebx - movl %eax,(%esp) - movl %eax,%ecx - subl $4,%esp - orl %esi,%eax - andl %esi,%ecx - andl %edi,%eax - movl (%ebp),%esi - orl %ecx,%eax + xorl %edi,%eax addl $4,%ebp addl %ebx,%eax - addl %esi,%edx - addl %esi,%eax cmpl $3248222580,%esi - jne .L00300_15 - movl 152(%esp),%ebx + jne .L00800_15 + movl 156(%esp),%ecx + jmp .L00916_63 .align 16 -.L00416_63: - movl %ebx,%esi - movl 100(%esp),%ecx - rorl $11,%esi - movl %ecx,%edi - xorl %ebx,%esi - rorl $7,%esi +.L00916_63: + movl %ecx,%ebx + movl 104(%esp),%esi + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx shrl $3,%ebx - rorl $2,%edi - xorl %esi,%ebx - xorl %ecx,%edi - rorl $17,%edi - shrl $10,%ecx - addl 156(%esp),%ebx - xorl %ecx,%edi - addl 120(%esp),%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 160(%esp),%ebx + shrl $10,%edi + addl 124(%esp),%ebx movl %edx,%ecx - addl %edi,%ebx + xorl %esi,%edi + movl 24(%esp),%esi rorl $14,%ecx - movl 20(%esp),%esi - xorl %edx,%ecx - rorl $5,%ecx - movl %ebx,92(%esp) + addl %edi,%ebx + movl 28(%esp),%edi xorl %edx,%ecx - rorl $6,%ecx - movl 24(%esp),%edi - addl %ecx,%ebx xorl %edi,%esi - movl %edx,16(%esp) - movl %eax,%ecx + movl %ebx,96(%esp) + rorl $5,%ecx andl %edx,%esi - movl 12(%esp),%edx + movl %edx,20(%esp) + xorl %ecx,%edx + addl 32(%esp),%ebx xorl %edi,%esi - movl %eax,%edi + rorl $6,%edx + movl %eax,%ecx addl %esi,%ebx rorl $9,%ecx - addl 28(%esp),%ebx + addl %edx,%ebx + movl 8(%esp),%edi xorl %eax,%ecx + movl %eax,4(%esp) + leal -4(%esp),%esp rorl $11,%ecx - movl 4(%esp),%esi + movl (%ebp),%esi xorl %eax,%ecx + movl 20(%esp),%edx + xorl %edi,%eax rorl $2,%ecx + addl %esi,%ebx + movl %eax,(%esp) addl %ebx,%edx - movl 8(%esp),%edi + andl 4(%esp),%eax addl %ecx,%ebx - movl %eax,(%esp) - movl %eax,%ecx - subl $4,%esp - orl %esi,%eax - andl %esi,%ecx - andl %edi,%eax - movl (%ebp),%esi - orl %ecx,%eax + xorl %edi,%eax + movl 156(%esp),%ecx addl $4,%ebp addl %ebx,%eax - movl 152(%esp),%ebx - addl %esi,%edx - addl %esi,%eax cmpl $3329325298,%esi - jne .L00416_63 - movl 352(%esp),%esi - movl 4(%esp),%ebx - movl 8(%esp),%ecx - movl 12(%esp),%edi + jne .L00916_63 + movl 356(%esp),%esi + movl 8(%esp),%ebx + movl 16(%esp),%ecx addl (%esi),%eax addl 4(%esi),%ebx - addl 8(%esi),%ecx - addl 12(%esi),%edi + addl 8(%esi),%edi + addl 12(%esi),%ecx movl %eax,(%esi) movl %ebx,4(%esi) - movl %ecx,8(%esi) - movl %edi,12(%esi) - movl 20(%esp),%eax - movl 24(%esp),%ebx - movl 28(%esp),%ecx - movl 356(%esp),%edi + movl %edi,8(%esi) + movl %ecx,12(%esi) + movl 24(%esp),%eax + movl 28(%esp),%ebx + movl 32(%esp),%ecx + movl 360(%esp),%edi addl 16(%esi),%edx addl 20(%esi),%eax addl 24(%esi),%ebx @@ -222,7 +239,7 @@ sha256_block_data_order: movl %eax,20(%esi) movl %ebx,24(%esi) movl %ecx,28(%esi) - addl $352,%esp + leal 356(%esp),%esp subl $256,%ebp cmpl 8(%esp),%edi jb .L002loop @@ -234,25 +251,6533 @@ sha256_block_data_order: ret .align 64 .L001K256: -.long 1116352408,1899447441,3049323471,3921009573 -.long 961987163,1508970993,2453635748,2870763221 -.long 3624381080,310598401,607225278,1426881987 -.long 1925078388,2162078206,2614888103,3248222580 -.long 3835390401,4022224774,264347078,604807628 -.long 770255983,1249150122,1555081692,1996064986 -.long 2554220882,2821834349,2952996808,3210313671 -.long 3336571891,3584528711,113926993,338241895 -.long 666307205,773529912,1294757372,1396182291 -.long 1695183700,1986661051,2177026350,2456956037 -.long 2730485921,2820302411,3259730800,3345764771 -.long 3516065817,3600352804,4094571909,275423344 -.long 430227734,506948616,659060556,883997877 -.long 958139571,1322822218,1537002063,1747873779 -.long 1955562222,2024104815,2227730452,2361852424 -.long 2428436474,2756734187,3204031479,3329325298 -.size sha256_block_data_order,.-.L_sha256_block_data_order_begin +.long 1116352408,1899447441,3049323471,3921009573,961987163,1508970993,2453635748,2870763221,3624381080,310598401,607225278,1426881987,1925078388,2162078206,2614888103,3248222580,3835390401,4022224774,264347078,604807628,770255983,1249150122,1555081692,1996064986,2554220882,2821834349,2952996808,3210313671,3336571891,3584528711,113926993,338241895,666307205,773529912,1294757372,1396182291,1695183700,1986661051,2177026350,2456956037,2730485921,2820302411,3259730800,3345764771,3516065817,3600352804,4094571909,275423344,430227734,506948616,659060556,883997877,958139571,1322822218,1537002063,1747873779,1955562222,2024104815,2227730452,2361852424,2428436474,2756734187,3204031479,3329325298 +.long 66051,67438087,134810123,202182159 .byte 83,72,65,50,53,54,32,98,108,111,99,107,32,116,114,97 .byte 110,115,102,111,114,109,32,102,111,114,32,120,56,54,44,32 .byte 67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97 .byte 112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103 .byte 62,0 +.align 16 +.L007unrolled: + leal -96(%esp),%esp + movl (%esi),%eax + movl 4(%esi),%ebp + movl 8(%esi),%ecx + movl 12(%esi),%ebx + movl %ebp,4(%esp) + xorl %ecx,%ebp + movl %ecx,8(%esp) + movl %ebx,12(%esp) + movl 16(%esi),%edx + movl 20(%esi),%ebx + movl 24(%esi),%ecx + movl 28(%esi),%esi + movl %ebx,20(%esp) + movl %ecx,24(%esp) + movl %esi,28(%esp) + jmp .L010grand_loop +.align 16 +.L010grand_loop: + movl (%edi),%ebx + movl 4(%edi),%ecx + bswap %ebx + movl 8(%edi),%esi + bswap %ecx + movl %ebx,32(%esp) + bswap %esi + movl %ecx,36(%esp) + movl %esi,40(%esp) + movl 12(%edi),%ebx + movl 16(%edi),%ecx + bswap %ebx + movl 20(%edi),%esi + bswap %ecx + movl %ebx,44(%esp) + bswap %esi + movl %ecx,48(%esp) + movl %esi,52(%esp) + movl 24(%edi),%ebx + movl 28(%edi),%ecx + bswap %ebx + movl 32(%edi),%esi + bswap %ecx + movl %ebx,56(%esp) + bswap %esi + movl %ecx,60(%esp) + movl %esi,64(%esp) + movl 36(%edi),%ebx + movl 40(%edi),%ecx + bswap %ebx + movl 44(%edi),%esi + bswap %ecx + movl %ebx,68(%esp) + bswap %esi + movl %ecx,72(%esp) + movl %esi,76(%esp) + movl 48(%edi),%ebx + movl 52(%edi),%ecx + bswap %ebx + movl 56(%edi),%esi + bswap %ecx + movl %ebx,80(%esp) + bswap %esi + movl %ecx,84(%esp) + movl %esi,88(%esp) + movl 60(%edi),%ebx + addl $64,%edi + bswap %ebx + movl %edi,100(%esp) + movl %ebx,92(%esp) + movl %edx,%ecx + movl 20(%esp),%esi + rorl $14,%edx + movl 24(%esp),%edi + xorl %ecx,%edx + movl 32(%esp),%ebx + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,16(%esp) + xorl %ecx,%edx + addl 28(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 4(%esp),%edi + xorl %eax,%ecx + movl %eax,(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 1116352408(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + rorl $2,%ecx + addl %edx,%ebp + addl 12(%esp),%edx + addl %ecx,%ebp + movl %edx,%esi + movl 16(%esp),%ecx + rorl $14,%edx + movl 20(%esp),%edi + xorl %esi,%edx + movl 36(%esp),%ebx + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,12(%esp) + xorl %esi,%edx + addl 24(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl (%esp),%edi + xorl %ebp,%esi + movl %ebp,28(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 1899447441(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + rorl $2,%esi + addl %edx,%eax + addl 8(%esp),%edx + addl %esi,%eax + movl %edx,%ecx + movl 12(%esp),%esi + rorl $14,%edx + movl 16(%esp),%edi + xorl %ecx,%edx + movl 40(%esp),%ebx + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,8(%esp) + xorl %ecx,%edx + addl 20(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 28(%esp),%edi + xorl %eax,%ecx + movl %eax,24(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 3049323471(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + rorl $2,%ecx + addl %edx,%ebp + addl 4(%esp),%edx + addl %ecx,%ebp + movl %edx,%esi + movl 8(%esp),%ecx + rorl $14,%edx + movl 12(%esp),%edi + xorl %esi,%edx + movl 44(%esp),%ebx + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,4(%esp) + xorl %esi,%edx + addl 16(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 24(%esp),%edi + xorl %ebp,%esi + movl %ebp,20(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 3921009573(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + rorl $2,%esi + addl %edx,%eax + addl (%esp),%edx + addl %esi,%eax + movl %edx,%ecx + movl 4(%esp),%esi + rorl $14,%edx + movl 8(%esp),%edi + xorl %ecx,%edx + movl 48(%esp),%ebx + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,(%esp) + xorl %ecx,%edx + addl 12(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 20(%esp),%edi + xorl %eax,%ecx + movl %eax,16(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 961987163(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + rorl $2,%ecx + addl %edx,%ebp + addl 28(%esp),%edx + addl %ecx,%ebp + movl %edx,%esi + movl (%esp),%ecx + rorl $14,%edx + movl 4(%esp),%edi + xorl %esi,%edx + movl 52(%esp),%ebx + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,28(%esp) + xorl %esi,%edx + addl 8(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 16(%esp),%edi + xorl %ebp,%esi + movl %ebp,12(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 1508970993(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + rorl $2,%esi + addl %edx,%eax + addl 24(%esp),%edx + addl %esi,%eax + movl %edx,%ecx + movl 28(%esp),%esi + rorl $14,%edx + movl (%esp),%edi + xorl %ecx,%edx + movl 56(%esp),%ebx + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,24(%esp) + xorl %ecx,%edx + addl 4(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 12(%esp),%edi + xorl %eax,%ecx + movl %eax,8(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 2453635748(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + rorl $2,%ecx + addl %edx,%ebp + addl 20(%esp),%edx + addl %ecx,%ebp + movl %edx,%esi + movl 24(%esp),%ecx + rorl $14,%edx + movl 28(%esp),%edi + xorl %esi,%edx + movl 60(%esp),%ebx + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,20(%esp) + xorl %esi,%edx + addl (%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 8(%esp),%edi + xorl %ebp,%esi + movl %ebp,4(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 2870763221(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + rorl $2,%esi + addl %edx,%eax + addl 16(%esp),%edx + addl %esi,%eax + movl %edx,%ecx + movl 20(%esp),%esi + rorl $14,%edx + movl 24(%esp),%edi + xorl %ecx,%edx + movl 64(%esp),%ebx + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,16(%esp) + xorl %ecx,%edx + addl 28(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 4(%esp),%edi + xorl %eax,%ecx + movl %eax,(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 3624381080(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + rorl $2,%ecx + addl %edx,%ebp + addl 12(%esp),%edx + addl %ecx,%ebp + movl %edx,%esi + movl 16(%esp),%ecx + rorl $14,%edx + movl 20(%esp),%edi + xorl %esi,%edx + movl 68(%esp),%ebx + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,12(%esp) + xorl %esi,%edx + addl 24(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl (%esp),%edi + xorl %ebp,%esi + movl %ebp,28(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 310598401(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + rorl $2,%esi + addl %edx,%eax + addl 8(%esp),%edx + addl %esi,%eax + movl %edx,%ecx + movl 12(%esp),%esi + rorl $14,%edx + movl 16(%esp),%edi + xorl %ecx,%edx + movl 72(%esp),%ebx + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,8(%esp) + xorl %ecx,%edx + addl 20(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 28(%esp),%edi + xorl %eax,%ecx + movl %eax,24(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 607225278(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + rorl $2,%ecx + addl %edx,%ebp + addl 4(%esp),%edx + addl %ecx,%ebp + movl %edx,%esi + movl 8(%esp),%ecx + rorl $14,%edx + movl 12(%esp),%edi + xorl %esi,%edx + movl 76(%esp),%ebx + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,4(%esp) + xorl %esi,%edx + addl 16(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 24(%esp),%edi + xorl %ebp,%esi + movl %ebp,20(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 1426881987(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + rorl $2,%esi + addl %edx,%eax + addl (%esp),%edx + addl %esi,%eax + movl %edx,%ecx + movl 4(%esp),%esi + rorl $14,%edx + movl 8(%esp),%edi + xorl %ecx,%edx + movl 80(%esp),%ebx + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,(%esp) + xorl %ecx,%edx + addl 12(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 20(%esp),%edi + xorl %eax,%ecx + movl %eax,16(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 1925078388(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + rorl $2,%ecx + addl %edx,%ebp + addl 28(%esp),%edx + addl %ecx,%ebp + movl %edx,%esi + movl (%esp),%ecx + rorl $14,%edx + movl 4(%esp),%edi + xorl %esi,%edx + movl 84(%esp),%ebx + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,28(%esp) + xorl %esi,%edx + addl 8(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 16(%esp),%edi + xorl %ebp,%esi + movl %ebp,12(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 2162078206(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + rorl $2,%esi + addl %edx,%eax + addl 24(%esp),%edx + addl %esi,%eax + movl %edx,%ecx + movl 28(%esp),%esi + rorl $14,%edx + movl (%esp),%edi + xorl %ecx,%edx + movl 88(%esp),%ebx + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,24(%esp) + xorl %ecx,%edx + addl 4(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 12(%esp),%edi + xorl %eax,%ecx + movl %eax,8(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 2614888103(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + rorl $2,%ecx + addl %edx,%ebp + addl 20(%esp),%edx + addl %ecx,%ebp + movl %edx,%esi + movl 24(%esp),%ecx + rorl $14,%edx + movl 28(%esp),%edi + xorl %esi,%edx + movl 92(%esp),%ebx + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,20(%esp) + xorl %esi,%edx + addl (%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 8(%esp),%edi + xorl %ebp,%esi + movl %ebp,4(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 3248222580(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 36(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl 16(%esp),%edx + addl %esi,%eax + movl 88(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 32(%esp),%ebx + shrl $10,%edi + addl 68(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 20(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl 24(%esp),%edi + xorl %ecx,%edx + movl %ebx,32(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,16(%esp) + xorl %ecx,%edx + addl 28(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 4(%esp),%edi + xorl %eax,%ecx + movl %eax,(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 3835390401(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 40(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 12(%esp),%edx + addl %ecx,%ebp + movl 92(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 36(%esp),%ebx + shrl $10,%edi + addl 72(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl 16(%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 20(%esp),%edi + xorl %esi,%edx + movl %ebx,36(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,12(%esp) + xorl %esi,%edx + addl 24(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl (%esp),%edi + xorl %ebp,%esi + movl %ebp,28(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 4022224774(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 44(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl 8(%esp),%edx + addl %esi,%eax + movl 32(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 40(%esp),%ebx + shrl $10,%edi + addl 76(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 12(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl 16(%esp),%edi + xorl %ecx,%edx + movl %ebx,40(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,8(%esp) + xorl %ecx,%edx + addl 20(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 28(%esp),%edi + xorl %eax,%ecx + movl %eax,24(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 264347078(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 48(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 4(%esp),%edx + addl %ecx,%ebp + movl 36(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 44(%esp),%ebx + shrl $10,%edi + addl 80(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl 8(%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 12(%esp),%edi + xorl %esi,%edx + movl %ebx,44(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,4(%esp) + xorl %esi,%edx + addl 16(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 24(%esp),%edi + xorl %ebp,%esi + movl %ebp,20(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 604807628(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 52(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl (%esp),%edx + addl %esi,%eax + movl 40(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 48(%esp),%ebx + shrl $10,%edi + addl 84(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 4(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl 8(%esp),%edi + xorl %ecx,%edx + movl %ebx,48(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,(%esp) + xorl %ecx,%edx + addl 12(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 20(%esp),%edi + xorl %eax,%ecx + movl %eax,16(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 770255983(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 56(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 28(%esp),%edx + addl %ecx,%ebp + movl 44(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 52(%esp),%ebx + shrl $10,%edi + addl 88(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl (%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 4(%esp),%edi + xorl %esi,%edx + movl %ebx,52(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,28(%esp) + xorl %esi,%edx + addl 8(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 16(%esp),%edi + xorl %ebp,%esi + movl %ebp,12(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 1249150122(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 60(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl 24(%esp),%edx + addl %esi,%eax + movl 48(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 56(%esp),%ebx + shrl $10,%edi + addl 92(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 28(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl (%esp),%edi + xorl %ecx,%edx + movl %ebx,56(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,24(%esp) + xorl %ecx,%edx + addl 4(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 12(%esp),%edi + xorl %eax,%ecx + movl %eax,8(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 1555081692(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 64(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 20(%esp),%edx + addl %ecx,%ebp + movl 52(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 60(%esp),%ebx + shrl $10,%edi + addl 32(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl 24(%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 28(%esp),%edi + xorl %esi,%edx + movl %ebx,60(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,20(%esp) + xorl %esi,%edx + addl (%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 8(%esp),%edi + xorl %ebp,%esi + movl %ebp,4(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 1996064986(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 68(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl 16(%esp),%edx + addl %esi,%eax + movl 56(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 64(%esp),%ebx + shrl $10,%edi + addl 36(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 20(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl 24(%esp),%edi + xorl %ecx,%edx + movl %ebx,64(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,16(%esp) + xorl %ecx,%edx + addl 28(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 4(%esp),%edi + xorl %eax,%ecx + movl %eax,(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 2554220882(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 72(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 12(%esp),%edx + addl %ecx,%ebp + movl 60(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 68(%esp),%ebx + shrl $10,%edi + addl 40(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl 16(%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 20(%esp),%edi + xorl %esi,%edx + movl %ebx,68(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,12(%esp) + xorl %esi,%edx + addl 24(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl (%esp),%edi + xorl %ebp,%esi + movl %ebp,28(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 2821834349(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 76(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl 8(%esp),%edx + addl %esi,%eax + movl 64(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 72(%esp),%ebx + shrl $10,%edi + addl 44(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 12(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl 16(%esp),%edi + xorl %ecx,%edx + movl %ebx,72(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,8(%esp) + xorl %ecx,%edx + addl 20(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 28(%esp),%edi + xorl %eax,%ecx + movl %eax,24(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 2952996808(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 80(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 4(%esp),%edx + addl %ecx,%ebp + movl 68(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 76(%esp),%ebx + shrl $10,%edi + addl 48(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl 8(%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 12(%esp),%edi + xorl %esi,%edx + movl %ebx,76(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,4(%esp) + xorl %esi,%edx + addl 16(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 24(%esp),%edi + xorl %ebp,%esi + movl %ebp,20(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 3210313671(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 84(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl (%esp),%edx + addl %esi,%eax + movl 72(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 80(%esp),%ebx + shrl $10,%edi + addl 52(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 4(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl 8(%esp),%edi + xorl %ecx,%edx + movl %ebx,80(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,(%esp) + xorl %ecx,%edx + addl 12(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 20(%esp),%edi + xorl %eax,%ecx + movl %eax,16(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 3336571891(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 88(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 28(%esp),%edx + addl %ecx,%ebp + movl 76(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 84(%esp),%ebx + shrl $10,%edi + addl 56(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl (%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 4(%esp),%edi + xorl %esi,%edx + movl %ebx,84(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,28(%esp) + xorl %esi,%edx + addl 8(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 16(%esp),%edi + xorl %ebp,%esi + movl %ebp,12(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 3584528711(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 92(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl 24(%esp),%edx + addl %esi,%eax + movl 80(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 88(%esp),%ebx + shrl $10,%edi + addl 60(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 28(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl (%esp),%edi + xorl %ecx,%edx + movl %ebx,88(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,24(%esp) + xorl %ecx,%edx + addl 4(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 12(%esp),%edi + xorl %eax,%ecx + movl %eax,8(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 113926993(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 32(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 20(%esp),%edx + addl %ecx,%ebp + movl 84(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 92(%esp),%ebx + shrl $10,%edi + addl 64(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl 24(%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 28(%esp),%edi + xorl %esi,%edx + movl %ebx,92(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,20(%esp) + xorl %esi,%edx + addl (%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 8(%esp),%edi + xorl %ebp,%esi + movl %ebp,4(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 338241895(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 36(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl 16(%esp),%edx + addl %esi,%eax + movl 88(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 32(%esp),%ebx + shrl $10,%edi + addl 68(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 20(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl 24(%esp),%edi + xorl %ecx,%edx + movl %ebx,32(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,16(%esp) + xorl %ecx,%edx + addl 28(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 4(%esp),%edi + xorl %eax,%ecx + movl %eax,(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 666307205(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 40(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 12(%esp),%edx + addl %ecx,%ebp + movl 92(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 36(%esp),%ebx + shrl $10,%edi + addl 72(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl 16(%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 20(%esp),%edi + xorl %esi,%edx + movl %ebx,36(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,12(%esp) + xorl %esi,%edx + addl 24(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl (%esp),%edi + xorl %ebp,%esi + movl %ebp,28(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 773529912(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 44(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl 8(%esp),%edx + addl %esi,%eax + movl 32(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 40(%esp),%ebx + shrl $10,%edi + addl 76(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 12(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl 16(%esp),%edi + xorl %ecx,%edx + movl %ebx,40(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,8(%esp) + xorl %ecx,%edx + addl 20(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 28(%esp),%edi + xorl %eax,%ecx + movl %eax,24(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 1294757372(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 48(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 4(%esp),%edx + addl %ecx,%ebp + movl 36(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 44(%esp),%ebx + shrl $10,%edi + addl 80(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl 8(%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 12(%esp),%edi + xorl %esi,%edx + movl %ebx,44(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,4(%esp) + xorl %esi,%edx + addl 16(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 24(%esp),%edi + xorl %ebp,%esi + movl %ebp,20(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 1396182291(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 52(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl (%esp),%edx + addl %esi,%eax + movl 40(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 48(%esp),%ebx + shrl $10,%edi + addl 84(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 4(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl 8(%esp),%edi + xorl %ecx,%edx + movl %ebx,48(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,(%esp) + xorl %ecx,%edx + addl 12(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 20(%esp),%edi + xorl %eax,%ecx + movl %eax,16(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 1695183700(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 56(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 28(%esp),%edx + addl %ecx,%ebp + movl 44(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 52(%esp),%ebx + shrl $10,%edi + addl 88(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl (%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 4(%esp),%edi + xorl %esi,%edx + movl %ebx,52(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,28(%esp) + xorl %esi,%edx + addl 8(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 16(%esp),%edi + xorl %ebp,%esi + movl %ebp,12(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 1986661051(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 60(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl 24(%esp),%edx + addl %esi,%eax + movl 48(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 56(%esp),%ebx + shrl $10,%edi + addl 92(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 28(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl (%esp),%edi + xorl %ecx,%edx + movl %ebx,56(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,24(%esp) + xorl %ecx,%edx + addl 4(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 12(%esp),%edi + xorl %eax,%ecx + movl %eax,8(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 2177026350(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 64(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 20(%esp),%edx + addl %ecx,%ebp + movl 52(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 60(%esp),%ebx + shrl $10,%edi + addl 32(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl 24(%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 28(%esp),%edi + xorl %esi,%edx + movl %ebx,60(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,20(%esp) + xorl %esi,%edx + addl (%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 8(%esp),%edi + xorl %ebp,%esi + movl %ebp,4(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 2456956037(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 68(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl 16(%esp),%edx + addl %esi,%eax + movl 56(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 64(%esp),%ebx + shrl $10,%edi + addl 36(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 20(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl 24(%esp),%edi + xorl %ecx,%edx + movl %ebx,64(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,16(%esp) + xorl %ecx,%edx + addl 28(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 4(%esp),%edi + xorl %eax,%ecx + movl %eax,(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 2730485921(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 72(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 12(%esp),%edx + addl %ecx,%ebp + movl 60(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 68(%esp),%ebx + shrl $10,%edi + addl 40(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl 16(%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 20(%esp),%edi + xorl %esi,%edx + movl %ebx,68(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,12(%esp) + xorl %esi,%edx + addl 24(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl (%esp),%edi + xorl %ebp,%esi + movl %ebp,28(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 2820302411(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 76(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl 8(%esp),%edx + addl %esi,%eax + movl 64(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 72(%esp),%ebx + shrl $10,%edi + addl 44(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 12(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl 16(%esp),%edi + xorl %ecx,%edx + movl %ebx,72(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,8(%esp) + xorl %ecx,%edx + addl 20(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 28(%esp),%edi + xorl %eax,%ecx + movl %eax,24(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 3259730800(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 80(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 4(%esp),%edx + addl %ecx,%ebp + movl 68(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 76(%esp),%ebx + shrl $10,%edi + addl 48(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl 8(%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 12(%esp),%edi + xorl %esi,%edx + movl %ebx,76(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,4(%esp) + xorl %esi,%edx + addl 16(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 24(%esp),%edi + xorl %ebp,%esi + movl %ebp,20(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 3345764771(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 84(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl (%esp),%edx + addl %esi,%eax + movl 72(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 80(%esp),%ebx + shrl $10,%edi + addl 52(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 4(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl 8(%esp),%edi + xorl %ecx,%edx + movl %ebx,80(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,(%esp) + xorl %ecx,%edx + addl 12(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 20(%esp),%edi + xorl %eax,%ecx + movl %eax,16(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 3516065817(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 88(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 28(%esp),%edx + addl %ecx,%ebp + movl 76(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 84(%esp),%ebx + shrl $10,%edi + addl 56(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl (%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 4(%esp),%edi + xorl %esi,%edx + movl %ebx,84(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,28(%esp) + xorl %esi,%edx + addl 8(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 16(%esp),%edi + xorl %ebp,%esi + movl %ebp,12(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 3600352804(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 92(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl 24(%esp),%edx + addl %esi,%eax + movl 80(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 88(%esp),%ebx + shrl $10,%edi + addl 60(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 28(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl (%esp),%edi + xorl %ecx,%edx + movl %ebx,88(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,24(%esp) + xorl %ecx,%edx + addl 4(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 12(%esp),%edi + xorl %eax,%ecx + movl %eax,8(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 4094571909(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 32(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 20(%esp),%edx + addl %ecx,%ebp + movl 84(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 92(%esp),%ebx + shrl $10,%edi + addl 64(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl 24(%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 28(%esp),%edi + xorl %esi,%edx + movl %ebx,92(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,20(%esp) + xorl %esi,%edx + addl (%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 8(%esp),%edi + xorl %ebp,%esi + movl %ebp,4(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 275423344(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 36(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl 16(%esp),%edx + addl %esi,%eax + movl 88(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 32(%esp),%ebx + shrl $10,%edi + addl 68(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 20(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl 24(%esp),%edi + xorl %ecx,%edx + movl %ebx,32(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,16(%esp) + xorl %ecx,%edx + addl 28(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 4(%esp),%edi + xorl %eax,%ecx + movl %eax,(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 430227734(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 40(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 12(%esp),%edx + addl %ecx,%ebp + movl 92(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 36(%esp),%ebx + shrl $10,%edi + addl 72(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl 16(%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 20(%esp),%edi + xorl %esi,%edx + movl %ebx,36(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,12(%esp) + xorl %esi,%edx + addl 24(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl (%esp),%edi + xorl %ebp,%esi + movl %ebp,28(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 506948616(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 44(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl 8(%esp),%edx + addl %esi,%eax + movl 32(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 40(%esp),%ebx + shrl $10,%edi + addl 76(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 12(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl 16(%esp),%edi + xorl %ecx,%edx + movl %ebx,40(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,8(%esp) + xorl %ecx,%edx + addl 20(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 28(%esp),%edi + xorl %eax,%ecx + movl %eax,24(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 659060556(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 48(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 4(%esp),%edx + addl %ecx,%ebp + movl 36(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 44(%esp),%ebx + shrl $10,%edi + addl 80(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl 8(%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 12(%esp),%edi + xorl %esi,%edx + movl %ebx,44(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,4(%esp) + xorl %esi,%edx + addl 16(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 24(%esp),%edi + xorl %ebp,%esi + movl %ebp,20(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 883997877(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 52(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl (%esp),%edx + addl %esi,%eax + movl 40(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 48(%esp),%ebx + shrl $10,%edi + addl 84(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 4(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl 8(%esp),%edi + xorl %ecx,%edx + movl %ebx,48(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,(%esp) + xorl %ecx,%edx + addl 12(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 20(%esp),%edi + xorl %eax,%ecx + movl %eax,16(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 958139571(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 56(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 28(%esp),%edx + addl %ecx,%ebp + movl 44(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 52(%esp),%ebx + shrl $10,%edi + addl 88(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl (%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 4(%esp),%edi + xorl %esi,%edx + movl %ebx,52(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,28(%esp) + xorl %esi,%edx + addl 8(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 16(%esp),%edi + xorl %ebp,%esi + movl %ebp,12(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 1322822218(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 60(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl 24(%esp),%edx + addl %esi,%eax + movl 48(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 56(%esp),%ebx + shrl $10,%edi + addl 92(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 28(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl (%esp),%edi + xorl %ecx,%edx + movl %ebx,56(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,24(%esp) + xorl %ecx,%edx + addl 4(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 12(%esp),%edi + xorl %eax,%ecx + movl %eax,8(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 1537002063(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 64(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 20(%esp),%edx + addl %ecx,%ebp + movl 52(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 60(%esp),%ebx + shrl $10,%edi + addl 32(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl 24(%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 28(%esp),%edi + xorl %esi,%edx + movl %ebx,60(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,20(%esp) + xorl %esi,%edx + addl (%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 8(%esp),%edi + xorl %ebp,%esi + movl %ebp,4(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 1747873779(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 68(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl 16(%esp),%edx + addl %esi,%eax + movl 56(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 64(%esp),%ebx + shrl $10,%edi + addl 36(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 20(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl 24(%esp),%edi + xorl %ecx,%edx + movl %ebx,64(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,16(%esp) + xorl %ecx,%edx + addl 28(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 4(%esp),%edi + xorl %eax,%ecx + movl %eax,(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 1955562222(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 72(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 12(%esp),%edx + addl %ecx,%ebp + movl 60(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 68(%esp),%ebx + shrl $10,%edi + addl 40(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl 16(%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 20(%esp),%edi + xorl %esi,%edx + movl %ebx,68(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,12(%esp) + xorl %esi,%edx + addl 24(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl (%esp),%edi + xorl %ebp,%esi + movl %ebp,28(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 2024104815(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 76(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl 8(%esp),%edx + addl %esi,%eax + movl 64(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 72(%esp),%ebx + shrl $10,%edi + addl 44(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 12(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl 16(%esp),%edi + xorl %ecx,%edx + movl %ebx,72(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,8(%esp) + xorl %ecx,%edx + addl 20(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 28(%esp),%edi + xorl %eax,%ecx + movl %eax,24(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 2227730452(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 80(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 4(%esp),%edx + addl %ecx,%ebp + movl 68(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 76(%esp),%ebx + shrl $10,%edi + addl 48(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl 8(%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 12(%esp),%edi + xorl %esi,%edx + movl %ebx,76(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,4(%esp) + xorl %esi,%edx + addl 16(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 24(%esp),%edi + xorl %ebp,%esi + movl %ebp,20(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 2361852424(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 84(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl (%esp),%edx + addl %esi,%eax + movl 72(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 80(%esp),%ebx + shrl $10,%edi + addl 52(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 4(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl 8(%esp),%edi + xorl %ecx,%edx + movl %ebx,80(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,(%esp) + xorl %ecx,%edx + addl 12(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 20(%esp),%edi + xorl %eax,%ecx + movl %eax,16(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 2428436474(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 88(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 28(%esp),%edx + addl %ecx,%ebp + movl 76(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 84(%esp),%ebx + shrl $10,%edi + addl 56(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl (%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 4(%esp),%edi + xorl %esi,%edx + movl %ebx,84(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,28(%esp) + xorl %esi,%edx + addl 8(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 16(%esp),%edi + xorl %ebp,%esi + movl %ebp,12(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 2756734187(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 92(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl 24(%esp),%edx + addl %esi,%eax + movl 80(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 88(%esp),%ebx + shrl $10,%edi + addl 60(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 28(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl (%esp),%edi + xorl %ecx,%edx + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,24(%esp) + xorl %ecx,%edx + addl 4(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 12(%esp),%edi + xorl %eax,%ecx + movl %eax,8(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 3204031479(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 32(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 20(%esp),%edx + addl %ecx,%ebp + movl 84(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 92(%esp),%ebx + shrl $10,%edi + addl 64(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl 24(%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 28(%esp),%edi + xorl %esi,%edx + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,20(%esp) + xorl %esi,%edx + addl (%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 8(%esp),%edi + xorl %ebp,%esi + movl %ebp,4(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 3329325298(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + rorl $2,%esi + addl %edx,%eax + addl 16(%esp),%edx + addl %esi,%eax + movl 96(%esp),%esi + xorl %edi,%ebp + movl 12(%esp),%ecx + addl (%esi),%eax + addl 4(%esi),%ebp + addl 8(%esi),%edi + addl 12(%esi),%ecx + movl %eax,(%esi) + movl %ebp,4(%esi) + movl %edi,8(%esi) + movl %ecx,12(%esi) + movl %ebp,4(%esp) + xorl %edi,%ebp + movl %edi,8(%esp) + movl %ecx,12(%esp) + movl 20(%esp),%edi + movl 24(%esp),%ebx + movl 28(%esp),%ecx + addl 16(%esi),%edx + addl 20(%esi),%edi + addl 24(%esi),%ebx + addl 28(%esi),%ecx + movl %edx,16(%esi) + movl %edi,20(%esi) + movl %ebx,24(%esi) + movl %ecx,28(%esi) + movl %edi,20(%esp) + movl 100(%esp),%edi + movl %ebx,24(%esp) + movl %ecx,28(%esp) + cmpl 104(%esp),%edi + jb .L010grand_loop + movl 108(%esp),%esp + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.align 32 +.L004shaext: + subl $32,%esp + movdqu (%esi),%xmm1 + leal 128(%ebp),%ebp + movdqu 16(%esi),%xmm2 + movdqa 128(%ebp),%xmm7 + pshufd $27,%xmm1,%xmm0 + pshufd $177,%xmm1,%xmm1 + pshufd $27,%xmm2,%xmm2 +.byte 102,15,58,15,202,8 + punpcklqdq %xmm0,%xmm2 + jmp .L011loop_shaext +.align 16 +.L011loop_shaext: + movdqu (%edi),%xmm3 + movdqu 16(%edi),%xmm4 + movdqu 32(%edi),%xmm5 +.byte 102,15,56,0,223 + movdqu 48(%edi),%xmm6 + movdqa %xmm2,16(%esp) + movdqa -128(%ebp),%xmm0 + paddd %xmm3,%xmm0 +.byte 102,15,56,0,231 +.byte 15,56,203,209 + pshufd $14,%xmm0,%xmm0 + nop + movdqa %xmm1,(%esp) +.byte 15,56,203,202 + movdqa -112(%ebp),%xmm0 + paddd %xmm4,%xmm0 +.byte 102,15,56,0,239 +.byte 15,56,203,209 + pshufd $14,%xmm0,%xmm0 + leal 64(%edi),%edi +.byte 15,56,204,220 +.byte 15,56,203,202 + movdqa -96(%ebp),%xmm0 + paddd %xmm5,%xmm0 +.byte 102,15,56,0,247 +.byte 15,56,203,209 + pshufd $14,%xmm0,%xmm0 + movdqa %xmm6,%xmm7 +.byte 102,15,58,15,253,4 + nop + paddd %xmm7,%xmm3 +.byte 15,56,204,229 +.byte 15,56,203,202 + movdqa -80(%ebp),%xmm0 + paddd %xmm6,%xmm0 +.byte 15,56,205,222 +.byte 15,56,203,209 + pshufd $14,%xmm0,%xmm0 + movdqa %xmm3,%xmm7 +.byte 102,15,58,15,254,4 + nop + paddd %xmm7,%xmm4 +.byte 15,56,204,238 +.byte 15,56,203,202 + movdqa -64(%ebp),%xmm0 + paddd %xmm3,%xmm0 +.byte 15,56,205,227 +.byte 15,56,203,209 + pshufd $14,%xmm0,%xmm0 + movdqa %xmm4,%xmm7 +.byte 102,15,58,15,251,4 + nop + paddd %xmm7,%xmm5 +.byte 15,56,204,243 +.byte 15,56,203,202 + movdqa -48(%ebp),%xmm0 + paddd %xmm4,%xmm0 +.byte 15,56,205,236 +.byte 15,56,203,209 + pshufd $14,%xmm0,%xmm0 + movdqa %xmm5,%xmm7 +.byte 102,15,58,15,252,4 + nop + paddd %xmm7,%xmm6 +.byte 15,56,204,220 +.byte 15,56,203,202 + movdqa -32(%ebp),%xmm0 + paddd %xmm5,%xmm0 +.byte 15,56,205,245 +.byte 15,56,203,209 + pshufd $14,%xmm0,%xmm0 + movdqa %xmm6,%xmm7 +.byte 102,15,58,15,253,4 + nop + paddd %xmm7,%xmm3 +.byte 15,56,204,229 +.byte 15,56,203,202 + movdqa -16(%ebp),%xmm0 + paddd %xmm6,%xmm0 +.byte 15,56,205,222 +.byte 15,56,203,209 + pshufd $14,%xmm0,%xmm0 + movdqa %xmm3,%xmm7 +.byte 102,15,58,15,254,4 + nop + paddd %xmm7,%xmm4 +.byte 15,56,204,238 +.byte 15,56,203,202 + movdqa (%ebp),%xmm0 + paddd %xmm3,%xmm0 +.byte 15,56,205,227 +.byte 15,56,203,209 + pshufd $14,%xmm0,%xmm0 + movdqa %xmm4,%xmm7 +.byte 102,15,58,15,251,4 + nop + paddd %xmm7,%xmm5 +.byte 15,56,204,243 +.byte 15,56,203,202 + movdqa 16(%ebp),%xmm0 + paddd %xmm4,%xmm0 +.byte 15,56,205,236 +.byte 15,56,203,209 + pshufd $14,%xmm0,%xmm0 + movdqa %xmm5,%xmm7 +.byte 102,15,58,15,252,4 + nop + paddd %xmm7,%xmm6 +.byte 15,56,204,220 +.byte 15,56,203,202 + movdqa 32(%ebp),%xmm0 + paddd %xmm5,%xmm0 +.byte 15,56,205,245 +.byte 15,56,203,209 + pshufd $14,%xmm0,%xmm0 + movdqa %xmm6,%xmm7 +.byte 102,15,58,15,253,4 + nop + paddd %xmm7,%xmm3 +.byte 15,56,204,229 +.byte 15,56,203,202 + movdqa 48(%ebp),%xmm0 + paddd %xmm6,%xmm0 +.byte 15,56,205,222 +.byte 15,56,203,209 + pshufd $14,%xmm0,%xmm0 + movdqa %xmm3,%xmm7 +.byte 102,15,58,15,254,4 + nop + paddd %xmm7,%xmm4 +.byte 15,56,204,238 +.byte 15,56,203,202 + movdqa 64(%ebp),%xmm0 + paddd %xmm3,%xmm0 +.byte 15,56,205,227 +.byte 15,56,203,209 + pshufd $14,%xmm0,%xmm0 + movdqa %xmm4,%xmm7 +.byte 102,15,58,15,251,4 + nop + paddd %xmm7,%xmm5 +.byte 15,56,204,243 +.byte 15,56,203,202 + movdqa 80(%ebp),%xmm0 + paddd %xmm4,%xmm0 +.byte 15,56,205,236 +.byte 15,56,203,209 + pshufd $14,%xmm0,%xmm0 + movdqa %xmm5,%xmm7 +.byte 102,15,58,15,252,4 +.byte 15,56,203,202 + paddd %xmm7,%xmm6 + movdqa 96(%ebp),%xmm0 + paddd %xmm5,%xmm0 +.byte 15,56,203,209 + pshufd $14,%xmm0,%xmm0 +.byte 15,56,205,245 + movdqa 128(%ebp),%xmm7 +.byte 15,56,203,202 + movdqa 112(%ebp),%xmm0 + paddd %xmm6,%xmm0 + nop +.byte 15,56,203,209 + pshufd $14,%xmm0,%xmm0 + cmpl %edi,%eax + nop +.byte 15,56,203,202 + paddd 16(%esp),%xmm2 + paddd (%esp),%xmm1 + jnz .L011loop_shaext + pshufd $177,%xmm2,%xmm2 + pshufd $27,%xmm1,%xmm7 + pshufd $177,%xmm1,%xmm1 + punpckhqdq %xmm2,%xmm1 +.byte 102,15,58,15,215,8 + movl 44(%esp),%esp + movdqu %xmm1,(%esi) + movdqu %xmm2,16(%esi) + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.align 32 +.L006SSSE3: + leal -96(%esp),%esp + movl (%esi),%eax + movl 4(%esi),%ebx + movl 8(%esi),%ecx + movl 12(%esi),%edi + movl %ebx,4(%esp) + xorl %ecx,%ebx + movl %ecx,8(%esp) + movl %edi,12(%esp) + movl 16(%esi),%edx + movl 20(%esi),%edi + movl 24(%esi),%ecx + movl 28(%esi),%esi + movl %edi,20(%esp) + movl 100(%esp),%edi + movl %ecx,24(%esp) + movl %esi,28(%esp) + movdqa 256(%ebp),%xmm7 + jmp .L012grand_ssse3 +.align 16 +.L012grand_ssse3: + movdqu (%edi),%xmm0 + movdqu 16(%edi),%xmm1 + movdqu 32(%edi),%xmm2 + movdqu 48(%edi),%xmm3 + addl $64,%edi +.byte 102,15,56,0,199 + movl %edi,100(%esp) +.byte 102,15,56,0,207 + movdqa (%ebp),%xmm4 +.byte 102,15,56,0,215 + movdqa 16(%ebp),%xmm5 + paddd %xmm0,%xmm4 +.byte 102,15,56,0,223 + movdqa 32(%ebp),%xmm6 + paddd %xmm1,%xmm5 + movdqa 48(%ebp),%xmm7 + movdqa %xmm4,32(%esp) + paddd %xmm2,%xmm6 + movdqa %xmm5,48(%esp) + paddd %xmm3,%xmm7 + movdqa %xmm6,64(%esp) + movdqa %xmm7,80(%esp) + jmp .L013ssse3_00_47 +.align 16 +.L013ssse3_00_47: + addl $64,%ebp + movl %edx,%ecx + movdqa %xmm1,%xmm4 + rorl $14,%edx + movl 20(%esp),%esi + movdqa %xmm3,%xmm7 + xorl %ecx,%edx + movl 24(%esp),%edi +.byte 102,15,58,15,224,4 + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi +.byte 102,15,58,15,250,4 + movl %ecx,16(%esp) + xorl %ecx,%edx + xorl %esi,%edi + movdqa %xmm4,%xmm5 + rorl $6,%edx + movl %eax,%ecx + movdqa %xmm4,%xmm6 + addl %edi,%edx + movl 4(%esp),%edi + psrld $3,%xmm4 + movl %eax,%esi + rorl $9,%ecx + paddd %xmm7,%xmm0 + movl %eax,(%esp) + xorl %eax,%ecx + psrld $7,%xmm6 + xorl %edi,%eax + addl 28(%esp),%edx + rorl $11,%ecx + andl %eax,%ebx + pshufd $250,%xmm3,%xmm7 + xorl %esi,%ecx + addl 32(%esp),%edx + pslld $14,%xmm5 + xorl %edi,%ebx + rorl $2,%ecx + pxor %xmm6,%xmm4 + addl %edx,%ebx + addl 12(%esp),%edx + psrld $11,%xmm6 + addl %ecx,%ebx + movl %edx,%ecx + rorl $14,%edx + pxor %xmm5,%xmm4 + movl 16(%esp),%esi + xorl %ecx,%edx + pslld $11,%xmm5 + movl 20(%esp),%edi + xorl %edi,%esi + rorl $5,%edx + pxor %xmm6,%xmm4 + andl %ecx,%esi + movl %ecx,12(%esp) + movdqa %xmm7,%xmm6 + xorl %ecx,%edx + xorl %esi,%edi + rorl $6,%edx + pxor %xmm5,%xmm4 + movl %ebx,%ecx + addl %edi,%edx + psrld $10,%xmm7 + movl (%esp),%edi + movl %ebx,%esi + rorl $9,%ecx + paddd %xmm4,%xmm0 + movl %ebx,28(%esp) + xorl %ebx,%ecx + psrlq $17,%xmm6 + xorl %edi,%ebx + addl 24(%esp),%edx + rorl $11,%ecx + pxor %xmm6,%xmm7 + andl %ebx,%eax + xorl %esi,%ecx + psrlq $2,%xmm6 + addl 36(%esp),%edx + xorl %edi,%eax + rorl $2,%ecx + pxor %xmm6,%xmm7 + addl %edx,%eax + addl 8(%esp),%edx + pshufd $128,%xmm7,%xmm7 + addl %ecx,%eax + movl %edx,%ecx + rorl $14,%edx + movl 12(%esp),%esi + xorl %ecx,%edx + movl 16(%esp),%edi + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + psrldq $8,%xmm7 + movl %ecx,8(%esp) + xorl %ecx,%edx + xorl %esi,%edi + paddd %xmm7,%xmm0 + rorl $6,%edx + movl %eax,%ecx + addl %edi,%edx + movl 28(%esp),%edi + movl %eax,%esi + rorl $9,%ecx + movl %eax,24(%esp) + pshufd $80,%xmm0,%xmm7 + xorl %eax,%ecx + xorl %edi,%eax + addl 20(%esp),%edx + movdqa %xmm7,%xmm6 + rorl $11,%ecx + psrld $10,%xmm7 + andl %eax,%ebx + psrlq $17,%xmm6 + xorl %esi,%ecx + addl 40(%esp),%edx + xorl %edi,%ebx + rorl $2,%ecx + pxor %xmm6,%xmm7 + addl %edx,%ebx + addl 4(%esp),%edx + psrlq $2,%xmm6 + addl %ecx,%ebx + movl %edx,%ecx + rorl $14,%edx + pxor %xmm6,%xmm7 + movl 8(%esp),%esi + xorl %ecx,%edx + movl 12(%esp),%edi + pshufd $8,%xmm7,%xmm7 + xorl %edi,%esi + rorl $5,%edx + movdqa (%ebp),%xmm6 + andl %ecx,%esi + movl %ecx,4(%esp) + pslldq $8,%xmm7 + xorl %ecx,%edx + xorl %esi,%edi + rorl $6,%edx + movl %ebx,%ecx + addl %edi,%edx + movl 24(%esp),%edi + movl %ebx,%esi + rorl $9,%ecx + paddd %xmm7,%xmm0 + movl %ebx,20(%esp) + xorl %ebx,%ecx + xorl %edi,%ebx + addl 16(%esp),%edx + paddd %xmm0,%xmm6 + rorl $11,%ecx + andl %ebx,%eax + xorl %esi,%ecx + addl 44(%esp),%edx + xorl %edi,%eax + rorl $2,%ecx + addl %edx,%eax + addl (%esp),%edx + addl %ecx,%eax + movdqa %xmm6,32(%esp) + movl %edx,%ecx + movdqa %xmm2,%xmm4 + rorl $14,%edx + movl 4(%esp),%esi + movdqa %xmm0,%xmm7 + xorl %ecx,%edx + movl 8(%esp),%edi +.byte 102,15,58,15,225,4 + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi +.byte 102,15,58,15,251,4 + movl %ecx,(%esp) + xorl %ecx,%edx + xorl %esi,%edi + movdqa %xmm4,%xmm5 + rorl $6,%edx + movl %eax,%ecx + movdqa %xmm4,%xmm6 + addl %edi,%edx + movl 20(%esp),%edi + psrld $3,%xmm4 + movl %eax,%esi + rorl $9,%ecx + paddd %xmm7,%xmm1 + movl %eax,16(%esp) + xorl %eax,%ecx + psrld $7,%xmm6 + xorl %edi,%eax + addl 12(%esp),%edx + rorl $11,%ecx + andl %eax,%ebx + pshufd $250,%xmm0,%xmm7 + xorl %esi,%ecx + addl 48(%esp),%edx + pslld $14,%xmm5 + xorl %edi,%ebx + rorl $2,%ecx + pxor %xmm6,%xmm4 + addl %edx,%ebx + addl 28(%esp),%edx + psrld $11,%xmm6 + addl %ecx,%ebx + movl %edx,%ecx + rorl $14,%edx + pxor %xmm5,%xmm4 + movl (%esp),%esi + xorl %ecx,%edx + pslld $11,%xmm5 + movl 4(%esp),%edi + xorl %edi,%esi + rorl $5,%edx + pxor %xmm6,%xmm4 + andl %ecx,%esi + movl %ecx,28(%esp) + movdqa %xmm7,%xmm6 + xorl %ecx,%edx + xorl %esi,%edi + rorl $6,%edx + pxor %xmm5,%xmm4 + movl %ebx,%ecx + addl %edi,%edx + psrld $10,%xmm7 + movl 16(%esp),%edi + movl %ebx,%esi + rorl $9,%ecx + paddd %xmm4,%xmm1 + movl %ebx,12(%esp) + xorl %ebx,%ecx + psrlq $17,%xmm6 + xorl %edi,%ebx + addl 8(%esp),%edx + rorl $11,%ecx + pxor %xmm6,%xmm7 + andl %ebx,%eax + xorl %esi,%ecx + psrlq $2,%xmm6 + addl 52(%esp),%edx + xorl %edi,%eax + rorl $2,%ecx + pxor %xmm6,%xmm7 + addl %edx,%eax + addl 24(%esp),%edx + pshufd $128,%xmm7,%xmm7 + addl %ecx,%eax + movl %edx,%ecx + rorl $14,%edx + movl 28(%esp),%esi + xorl %ecx,%edx + movl (%esp),%edi + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + psrldq $8,%xmm7 + movl %ecx,24(%esp) + xorl %ecx,%edx + xorl %esi,%edi + paddd %xmm7,%xmm1 + rorl $6,%edx + movl %eax,%ecx + addl %edi,%edx + movl 12(%esp),%edi + movl %eax,%esi + rorl $9,%ecx + movl %eax,8(%esp) + pshufd $80,%xmm1,%xmm7 + xorl %eax,%ecx + xorl %edi,%eax + addl 4(%esp),%edx + movdqa %xmm7,%xmm6 + rorl $11,%ecx + psrld $10,%xmm7 + andl %eax,%ebx + psrlq $17,%xmm6 + xorl %esi,%ecx + addl 56(%esp),%edx + xorl %edi,%ebx + rorl $2,%ecx + pxor %xmm6,%xmm7 + addl %edx,%ebx + addl 20(%esp),%edx + psrlq $2,%xmm6 + addl %ecx,%ebx + movl %edx,%ecx + rorl $14,%edx + pxor %xmm6,%xmm7 + movl 24(%esp),%esi + xorl %ecx,%edx + movl 28(%esp),%edi + pshufd $8,%xmm7,%xmm7 + xorl %edi,%esi + rorl $5,%edx + movdqa 16(%ebp),%xmm6 + andl %ecx,%esi + movl %ecx,20(%esp) + pslldq $8,%xmm7 + xorl %ecx,%edx + xorl %esi,%edi + rorl $6,%edx + movl %ebx,%ecx + addl %edi,%edx + movl 8(%esp),%edi + movl %ebx,%esi + rorl $9,%ecx + paddd %xmm7,%xmm1 + movl %ebx,4(%esp) + xorl %ebx,%ecx + xorl %edi,%ebx + addl (%esp),%edx + paddd %xmm1,%xmm6 + rorl $11,%ecx + andl %ebx,%eax + xorl %esi,%ecx + addl 60(%esp),%edx + xorl %edi,%eax + rorl $2,%ecx + addl %edx,%eax + addl 16(%esp),%edx + addl %ecx,%eax + movdqa %xmm6,48(%esp) + movl %edx,%ecx + movdqa %xmm3,%xmm4 + rorl $14,%edx + movl 20(%esp),%esi + movdqa %xmm1,%xmm7 + xorl %ecx,%edx + movl 24(%esp),%edi +.byte 102,15,58,15,226,4 + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi +.byte 102,15,58,15,248,4 + movl %ecx,16(%esp) + xorl %ecx,%edx + xorl %esi,%edi + movdqa %xmm4,%xmm5 + rorl $6,%edx + movl %eax,%ecx + movdqa %xmm4,%xmm6 + addl %edi,%edx + movl 4(%esp),%edi + psrld $3,%xmm4 + movl %eax,%esi + rorl $9,%ecx + paddd %xmm7,%xmm2 + movl %eax,(%esp) + xorl %eax,%ecx + psrld $7,%xmm6 + xorl %edi,%eax + addl 28(%esp),%edx + rorl $11,%ecx + andl %eax,%ebx + pshufd $250,%xmm1,%xmm7 + xorl %esi,%ecx + addl 64(%esp),%edx + pslld $14,%xmm5 + xorl %edi,%ebx + rorl $2,%ecx + pxor %xmm6,%xmm4 + addl %edx,%ebx + addl 12(%esp),%edx + psrld $11,%xmm6 + addl %ecx,%ebx + movl %edx,%ecx + rorl $14,%edx + pxor %xmm5,%xmm4 + movl 16(%esp),%esi + xorl %ecx,%edx + pslld $11,%xmm5 + movl 20(%esp),%edi + xorl %edi,%esi + rorl $5,%edx + pxor %xmm6,%xmm4 + andl %ecx,%esi + movl %ecx,12(%esp) + movdqa %xmm7,%xmm6 + xorl %ecx,%edx + xorl %esi,%edi + rorl $6,%edx + pxor %xmm5,%xmm4 + movl %ebx,%ecx + addl %edi,%edx + psrld $10,%xmm7 + movl (%esp),%edi + movl %ebx,%esi + rorl $9,%ecx + paddd %xmm4,%xmm2 + movl %ebx,28(%esp) + xorl %ebx,%ecx + psrlq $17,%xmm6 + xorl %edi,%ebx + addl 24(%esp),%edx + rorl $11,%ecx + pxor %xmm6,%xmm7 + andl %ebx,%eax + xorl %esi,%ecx + psrlq $2,%xmm6 + addl 68(%esp),%edx + xorl %edi,%eax + rorl $2,%ecx + pxor %xmm6,%xmm7 + addl %edx,%eax + addl 8(%esp),%edx + pshufd $128,%xmm7,%xmm7 + addl %ecx,%eax + movl %edx,%ecx + rorl $14,%edx + movl 12(%esp),%esi + xorl %ecx,%edx + movl 16(%esp),%edi + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + psrldq $8,%xmm7 + movl %ecx,8(%esp) + xorl %ecx,%edx + xorl %esi,%edi + paddd %xmm7,%xmm2 + rorl $6,%edx + movl %eax,%ecx + addl %edi,%edx + movl 28(%esp),%edi + movl %eax,%esi + rorl $9,%ecx + movl %eax,24(%esp) + pshufd $80,%xmm2,%xmm7 + xorl %eax,%ecx + xorl %edi,%eax + addl 20(%esp),%edx + movdqa %xmm7,%xmm6 + rorl $11,%ecx + psrld $10,%xmm7 + andl %eax,%ebx + psrlq $17,%xmm6 + xorl %esi,%ecx + addl 72(%esp),%edx + xorl %edi,%ebx + rorl $2,%ecx + pxor %xmm6,%xmm7 + addl %edx,%ebx + addl 4(%esp),%edx + psrlq $2,%xmm6 + addl %ecx,%ebx + movl %edx,%ecx + rorl $14,%edx + pxor %xmm6,%xmm7 + movl 8(%esp),%esi + xorl %ecx,%edx + movl 12(%esp),%edi + pshufd $8,%xmm7,%xmm7 + xorl %edi,%esi + rorl $5,%edx + movdqa 32(%ebp),%xmm6 + andl %ecx,%esi + movl %ecx,4(%esp) + pslldq $8,%xmm7 + xorl %ecx,%edx + xorl %esi,%edi + rorl $6,%edx + movl %ebx,%ecx + addl %edi,%edx + movl 24(%esp),%edi + movl %ebx,%esi + rorl $9,%ecx + paddd %xmm7,%xmm2 + movl %ebx,20(%esp) + xorl %ebx,%ecx + xorl %edi,%ebx + addl 16(%esp),%edx + paddd %xmm2,%xmm6 + rorl $11,%ecx + andl %ebx,%eax + xorl %esi,%ecx + addl 76(%esp),%edx + xorl %edi,%eax + rorl $2,%ecx + addl %edx,%eax + addl (%esp),%edx + addl %ecx,%eax + movdqa %xmm6,64(%esp) + movl %edx,%ecx + movdqa %xmm0,%xmm4 + rorl $14,%edx + movl 4(%esp),%esi + movdqa %xmm2,%xmm7 + xorl %ecx,%edx + movl 8(%esp),%edi +.byte 102,15,58,15,227,4 + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi +.byte 102,15,58,15,249,4 + movl %ecx,(%esp) + xorl %ecx,%edx + xorl %esi,%edi + movdqa %xmm4,%xmm5 + rorl $6,%edx + movl %eax,%ecx + movdqa %xmm4,%xmm6 + addl %edi,%edx + movl 20(%esp),%edi + psrld $3,%xmm4 + movl %eax,%esi + rorl $9,%ecx + paddd %xmm7,%xmm3 + movl %eax,16(%esp) + xorl %eax,%ecx + psrld $7,%xmm6 + xorl %edi,%eax + addl 12(%esp),%edx + rorl $11,%ecx + andl %eax,%ebx + pshufd $250,%xmm2,%xmm7 + xorl %esi,%ecx + addl 80(%esp),%edx + pslld $14,%xmm5 + xorl %edi,%ebx + rorl $2,%ecx + pxor %xmm6,%xmm4 + addl %edx,%ebx + addl 28(%esp),%edx + psrld $11,%xmm6 + addl %ecx,%ebx + movl %edx,%ecx + rorl $14,%edx + pxor %xmm5,%xmm4 + movl (%esp),%esi + xorl %ecx,%edx + pslld $11,%xmm5 + movl 4(%esp),%edi + xorl %edi,%esi + rorl $5,%edx + pxor %xmm6,%xmm4 + andl %ecx,%esi + movl %ecx,28(%esp) + movdqa %xmm7,%xmm6 + xorl %ecx,%edx + xorl %esi,%edi + rorl $6,%edx + pxor %xmm5,%xmm4 + movl %ebx,%ecx + addl %edi,%edx + psrld $10,%xmm7 + movl 16(%esp),%edi + movl %ebx,%esi + rorl $9,%ecx + paddd %xmm4,%xmm3 + movl %ebx,12(%esp) + xorl %ebx,%ecx + psrlq $17,%xmm6 + xorl %edi,%ebx + addl 8(%esp),%edx + rorl $11,%ecx + pxor %xmm6,%xmm7 + andl %ebx,%eax + xorl %esi,%ecx + psrlq $2,%xmm6 + addl 84(%esp),%edx + xorl %edi,%eax + rorl $2,%ecx + pxor %xmm6,%xmm7 + addl %edx,%eax + addl 24(%esp),%edx + pshufd $128,%xmm7,%xmm7 + addl %ecx,%eax + movl %edx,%ecx + rorl $14,%edx + movl 28(%esp),%esi + xorl %ecx,%edx + movl (%esp),%edi + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + psrldq $8,%xmm7 + movl %ecx,24(%esp) + xorl %ecx,%edx + xorl %esi,%edi + paddd %xmm7,%xmm3 + rorl $6,%edx + movl %eax,%ecx + addl %edi,%edx + movl 12(%esp),%edi + movl %eax,%esi + rorl $9,%ecx + movl %eax,8(%esp) + pshufd $80,%xmm3,%xmm7 + xorl %eax,%ecx + xorl %edi,%eax + addl 4(%esp),%edx + movdqa %xmm7,%xmm6 + rorl $11,%ecx + psrld $10,%xmm7 + andl %eax,%ebx + psrlq $17,%xmm6 + xorl %esi,%ecx + addl 88(%esp),%edx + xorl %edi,%ebx + rorl $2,%ecx + pxor %xmm6,%xmm7 + addl %edx,%ebx + addl 20(%esp),%edx + psrlq $2,%xmm6 + addl %ecx,%ebx + movl %edx,%ecx + rorl $14,%edx + pxor %xmm6,%xmm7 + movl 24(%esp),%esi + xorl %ecx,%edx + movl 28(%esp),%edi + pshufd $8,%xmm7,%xmm7 + xorl %edi,%esi + rorl $5,%edx + movdqa 48(%ebp),%xmm6 + andl %ecx,%esi + movl %ecx,20(%esp) + pslldq $8,%xmm7 + xorl %ecx,%edx + xorl %esi,%edi + rorl $6,%edx + movl %ebx,%ecx + addl %edi,%edx + movl 8(%esp),%edi + movl %ebx,%esi + rorl $9,%ecx + paddd %xmm7,%xmm3 + movl %ebx,4(%esp) + xorl %ebx,%ecx + xorl %edi,%ebx + addl (%esp),%edx + paddd %xmm3,%xmm6 + rorl $11,%ecx + andl %ebx,%eax + xorl %esi,%ecx + addl 92(%esp),%edx + xorl %edi,%eax + rorl $2,%ecx + addl %edx,%eax + addl 16(%esp),%edx + addl %ecx,%eax + movdqa %xmm6,80(%esp) + cmpl $66051,64(%ebp) + jne .L013ssse3_00_47 + movl %edx,%ecx + rorl $14,%edx + movl 20(%esp),%esi + xorl %ecx,%edx + movl 24(%esp),%edi + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,16(%esp) + xorl %ecx,%edx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%edx + movl 4(%esp),%edi + movl %eax,%esi + rorl $9,%ecx + movl %eax,(%esp) + xorl %eax,%ecx + xorl %edi,%eax + addl 28(%esp),%edx + rorl $11,%ecx + andl %eax,%ebx + xorl %esi,%ecx + addl 32(%esp),%edx + xorl %edi,%ebx + rorl $2,%ecx + addl %edx,%ebx + addl 12(%esp),%edx + addl %ecx,%ebx + movl %edx,%ecx + rorl $14,%edx + movl 16(%esp),%esi + xorl %ecx,%edx + movl 20(%esp),%edi + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,12(%esp) + xorl %ecx,%edx + xorl %esi,%edi + rorl $6,%edx + movl %ebx,%ecx + addl %edi,%edx + movl (%esp),%edi + movl %ebx,%esi + rorl $9,%ecx + movl %ebx,28(%esp) + xorl %ebx,%ecx + xorl %edi,%ebx + addl 24(%esp),%edx + rorl $11,%ecx + andl %ebx,%eax + xorl %esi,%ecx + addl 36(%esp),%edx + xorl %edi,%eax + rorl $2,%ecx + addl %edx,%eax + addl 8(%esp),%edx + addl %ecx,%eax + movl %edx,%ecx + rorl $14,%edx + movl 12(%esp),%esi + xorl %ecx,%edx + movl 16(%esp),%edi + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,8(%esp) + xorl %ecx,%edx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%edx + movl 28(%esp),%edi + movl %eax,%esi + rorl $9,%ecx + movl %eax,24(%esp) + xorl %eax,%ecx + xorl %edi,%eax + addl 20(%esp),%edx + rorl $11,%ecx + andl %eax,%ebx + xorl %esi,%ecx + addl 40(%esp),%edx + xorl %edi,%ebx + rorl $2,%ecx + addl %edx,%ebx + addl 4(%esp),%edx + addl %ecx,%ebx + movl %edx,%ecx + rorl $14,%edx + movl 8(%esp),%esi + xorl %ecx,%edx + movl 12(%esp),%edi + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,4(%esp) + xorl %ecx,%edx + xorl %esi,%edi + rorl $6,%edx + movl %ebx,%ecx + addl %edi,%edx + movl 24(%esp),%edi + movl %ebx,%esi + rorl $9,%ecx + movl %ebx,20(%esp) + xorl %ebx,%ecx + xorl %edi,%ebx + addl 16(%esp),%edx + rorl $11,%ecx + andl %ebx,%eax + xorl %esi,%ecx + addl 44(%esp),%edx + xorl %edi,%eax + rorl $2,%ecx + addl %edx,%eax + addl (%esp),%edx + addl %ecx,%eax + movl %edx,%ecx + rorl $14,%edx + movl 4(%esp),%esi + xorl %ecx,%edx + movl 8(%esp),%edi + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,(%esp) + xorl %ecx,%edx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%edx + movl 20(%esp),%edi + movl %eax,%esi + rorl $9,%ecx + movl %eax,16(%esp) + xorl %eax,%ecx + xorl %edi,%eax + addl 12(%esp),%edx + rorl $11,%ecx + andl %eax,%ebx + xorl %esi,%ecx + addl 48(%esp),%edx + xorl %edi,%ebx + rorl $2,%ecx + addl %edx,%ebx + addl 28(%esp),%edx + addl %ecx,%ebx + movl %edx,%ecx + rorl $14,%edx + movl (%esp),%esi + xorl %ecx,%edx + movl 4(%esp),%edi + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,28(%esp) + xorl %ecx,%edx + xorl %esi,%edi + rorl $6,%edx + movl %ebx,%ecx + addl %edi,%edx + movl 16(%esp),%edi + movl %ebx,%esi + rorl $9,%ecx + movl %ebx,12(%esp) + xorl %ebx,%ecx + xorl %edi,%ebx + addl 8(%esp),%edx + rorl $11,%ecx + andl %ebx,%eax + xorl %esi,%ecx + addl 52(%esp),%edx + xorl %edi,%eax + rorl $2,%ecx + addl %edx,%eax + addl 24(%esp),%edx + addl %ecx,%eax + movl %edx,%ecx + rorl $14,%edx + movl 28(%esp),%esi + xorl %ecx,%edx + movl (%esp),%edi + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,24(%esp) + xorl %ecx,%edx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%edx + movl 12(%esp),%edi + movl %eax,%esi + rorl $9,%ecx + movl %eax,8(%esp) + xorl %eax,%ecx + xorl %edi,%eax + addl 4(%esp),%edx + rorl $11,%ecx + andl %eax,%ebx + xorl %esi,%ecx + addl 56(%esp),%edx + xorl %edi,%ebx + rorl $2,%ecx + addl %edx,%ebx + addl 20(%esp),%edx + addl %ecx,%ebx + movl %edx,%ecx + rorl $14,%edx + movl 24(%esp),%esi + xorl %ecx,%edx + movl 28(%esp),%edi + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,20(%esp) + xorl %ecx,%edx + xorl %esi,%edi + rorl $6,%edx + movl %ebx,%ecx + addl %edi,%edx + movl 8(%esp),%edi + movl %ebx,%esi + rorl $9,%ecx + movl %ebx,4(%esp) + xorl %ebx,%ecx + xorl %edi,%ebx + addl (%esp),%edx + rorl $11,%ecx + andl %ebx,%eax + xorl %esi,%ecx + addl 60(%esp),%edx + xorl %edi,%eax + rorl $2,%ecx + addl %edx,%eax + addl 16(%esp),%edx + addl %ecx,%eax + movl %edx,%ecx + rorl $14,%edx + movl 20(%esp),%esi + xorl %ecx,%edx + movl 24(%esp),%edi + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,16(%esp) + xorl %ecx,%edx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%edx + movl 4(%esp),%edi + movl %eax,%esi + rorl $9,%ecx + movl %eax,(%esp) + xorl %eax,%ecx + xorl %edi,%eax + addl 28(%esp),%edx + rorl $11,%ecx + andl %eax,%ebx + xorl %esi,%ecx + addl 64(%esp),%edx + xorl %edi,%ebx + rorl $2,%ecx + addl %edx,%ebx + addl 12(%esp),%edx + addl %ecx,%ebx + movl %edx,%ecx + rorl $14,%edx + movl 16(%esp),%esi + xorl %ecx,%edx + movl 20(%esp),%edi + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,12(%esp) + xorl %ecx,%edx + xorl %esi,%edi + rorl $6,%edx + movl %ebx,%ecx + addl %edi,%edx + movl (%esp),%edi + movl %ebx,%esi + rorl $9,%ecx + movl %ebx,28(%esp) + xorl %ebx,%ecx + xorl %edi,%ebx + addl 24(%esp),%edx + rorl $11,%ecx + andl %ebx,%eax + xorl %esi,%ecx + addl 68(%esp),%edx + xorl %edi,%eax + rorl $2,%ecx + addl %edx,%eax + addl 8(%esp),%edx + addl %ecx,%eax + movl %edx,%ecx + rorl $14,%edx + movl 12(%esp),%esi + xorl %ecx,%edx + movl 16(%esp),%edi + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,8(%esp) + xorl %ecx,%edx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%edx + movl 28(%esp),%edi + movl %eax,%esi + rorl $9,%ecx + movl %eax,24(%esp) + xorl %eax,%ecx + xorl %edi,%eax + addl 20(%esp),%edx + rorl $11,%ecx + andl %eax,%ebx + xorl %esi,%ecx + addl 72(%esp),%edx + xorl %edi,%ebx + rorl $2,%ecx + addl %edx,%ebx + addl 4(%esp),%edx + addl %ecx,%ebx + movl %edx,%ecx + rorl $14,%edx + movl 8(%esp),%esi + xorl %ecx,%edx + movl 12(%esp),%edi + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,4(%esp) + xorl %ecx,%edx + xorl %esi,%edi + rorl $6,%edx + movl %ebx,%ecx + addl %edi,%edx + movl 24(%esp),%edi + movl %ebx,%esi + rorl $9,%ecx + movl %ebx,20(%esp) + xorl %ebx,%ecx + xorl %edi,%ebx + addl 16(%esp),%edx + rorl $11,%ecx + andl %ebx,%eax + xorl %esi,%ecx + addl 76(%esp),%edx + xorl %edi,%eax + rorl $2,%ecx + addl %edx,%eax + addl (%esp),%edx + addl %ecx,%eax + movl %edx,%ecx + rorl $14,%edx + movl 4(%esp),%esi + xorl %ecx,%edx + movl 8(%esp),%edi + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,(%esp) + xorl %ecx,%edx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%edx + movl 20(%esp),%edi + movl %eax,%esi + rorl $9,%ecx + movl %eax,16(%esp) + xorl %eax,%ecx + xorl %edi,%eax + addl 12(%esp),%edx + rorl $11,%ecx + andl %eax,%ebx + xorl %esi,%ecx + addl 80(%esp),%edx + xorl %edi,%ebx + rorl $2,%ecx + addl %edx,%ebx + addl 28(%esp),%edx + addl %ecx,%ebx + movl %edx,%ecx + rorl $14,%edx + movl (%esp),%esi + xorl %ecx,%edx + movl 4(%esp),%edi + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,28(%esp) + xorl %ecx,%edx + xorl %esi,%edi + rorl $6,%edx + movl %ebx,%ecx + addl %edi,%edx + movl 16(%esp),%edi + movl %ebx,%esi + rorl $9,%ecx + movl %ebx,12(%esp) + xorl %ebx,%ecx + xorl %edi,%ebx + addl 8(%esp),%edx + rorl $11,%ecx + andl %ebx,%eax + xorl %esi,%ecx + addl 84(%esp),%edx + xorl %edi,%eax + rorl $2,%ecx + addl %edx,%eax + addl 24(%esp),%edx + addl %ecx,%eax + movl %edx,%ecx + rorl $14,%edx + movl 28(%esp),%esi + xorl %ecx,%edx + movl (%esp),%edi + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,24(%esp) + xorl %ecx,%edx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%edx + movl 12(%esp),%edi + movl %eax,%esi + rorl $9,%ecx + movl %eax,8(%esp) + xorl %eax,%ecx + xorl %edi,%eax + addl 4(%esp),%edx + rorl $11,%ecx + andl %eax,%ebx + xorl %esi,%ecx + addl 88(%esp),%edx + xorl %edi,%ebx + rorl $2,%ecx + addl %edx,%ebx + addl 20(%esp),%edx + addl %ecx,%ebx + movl %edx,%ecx + rorl $14,%edx + movl 24(%esp),%esi + xorl %ecx,%edx + movl 28(%esp),%edi + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,20(%esp) + xorl %ecx,%edx + xorl %esi,%edi + rorl $6,%edx + movl %ebx,%ecx + addl %edi,%edx + movl 8(%esp),%edi + movl %ebx,%esi + rorl $9,%ecx + movl %ebx,4(%esp) + xorl %ebx,%ecx + xorl %edi,%ebx + addl (%esp),%edx + rorl $11,%ecx + andl %ebx,%eax + xorl %esi,%ecx + addl 92(%esp),%edx + xorl %edi,%eax + rorl $2,%ecx + addl %edx,%eax + addl 16(%esp),%edx + addl %ecx,%eax + movl 96(%esp),%esi + xorl %edi,%ebx + movl 12(%esp),%ecx + addl (%esi),%eax + addl 4(%esi),%ebx + addl 8(%esi),%edi + addl 12(%esi),%ecx + movl %eax,(%esi) + movl %ebx,4(%esi) + movl %edi,8(%esi) + movl %ecx,12(%esi) + movl %ebx,4(%esp) + xorl %edi,%ebx + movl %edi,8(%esp) + movl %ecx,12(%esp) + movl 20(%esp),%edi + movl 24(%esp),%ecx + addl 16(%esi),%edx + addl 20(%esi),%edi + addl 24(%esi),%ecx + movl %edx,16(%esi) + movl %edi,20(%esi) + movl %edi,20(%esp) + movl 28(%esp),%edi + movl %ecx,24(%esi) + addl 28(%esi),%edi + movl %ecx,24(%esp) + movl %edi,28(%esi) + movl %edi,28(%esp) + movl 100(%esp),%edi + movdqa 64(%ebp),%xmm7 + subl $192,%ebp + cmpl 104(%esp),%edi + jb .L012grand_ssse3 + movl 108(%esp),%esp + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.align 32 +.L005AVX: + andl $264,%edx + cmpl $264,%edx + je .L014AVX_BMI + leal -96(%esp),%esp + vzeroall + movl (%esi),%eax + movl 4(%esi),%ebx + movl 8(%esi),%ecx + movl 12(%esi),%edi + movl %ebx,4(%esp) + xorl %ecx,%ebx + movl %ecx,8(%esp) + movl %edi,12(%esp) + movl 16(%esi),%edx + movl 20(%esi),%edi + movl 24(%esi),%ecx + movl 28(%esi),%esi + movl %edi,20(%esp) + movl 100(%esp),%edi + movl %ecx,24(%esp) + movl %esi,28(%esp) + vmovdqa 256(%ebp),%xmm7 + jmp .L015grand_avx +.align 32 +.L015grand_avx: + vmovdqu (%edi),%xmm0 + vmovdqu 16(%edi),%xmm1 + vmovdqu 32(%edi),%xmm2 + vmovdqu 48(%edi),%xmm3 + addl $64,%edi + vpshufb %xmm7,%xmm0,%xmm0 + movl %edi,100(%esp) + vpshufb %xmm7,%xmm1,%xmm1 + vpshufb %xmm7,%xmm2,%xmm2 + vpaddd (%ebp),%xmm0,%xmm4 + vpshufb %xmm7,%xmm3,%xmm3 + vpaddd 16(%ebp),%xmm1,%xmm5 + vpaddd 32(%ebp),%xmm2,%xmm6 + vpaddd 48(%ebp),%xmm3,%xmm7 + vmovdqa %xmm4,32(%esp) + vmovdqa %xmm5,48(%esp) + vmovdqa %xmm6,64(%esp) + vmovdqa %xmm7,80(%esp) + jmp .L016avx_00_47 +.align 16 +.L016avx_00_47: + addl $64,%ebp + vpalignr $4,%xmm0,%xmm1,%xmm4 + movl %edx,%ecx + shrdl $14,%edx,%edx + movl 20(%esp),%esi + vpalignr $4,%xmm2,%xmm3,%xmm7 + xorl %ecx,%edx + movl 24(%esp),%edi + xorl %edi,%esi + vpsrld $7,%xmm4,%xmm6 + shrdl $5,%edx,%edx + andl %ecx,%esi + movl %ecx,16(%esp) + vpaddd %xmm7,%xmm0,%xmm0 + xorl %ecx,%edx + xorl %esi,%edi + shrdl $6,%edx,%edx + vpsrld $3,%xmm4,%xmm7 + movl %eax,%ecx + addl %edi,%edx + movl 4(%esp),%edi + vpslld $14,%xmm4,%xmm5 + movl %eax,%esi + shrdl $9,%ecx,%ecx + movl %eax,(%esp) + vpxor %xmm6,%xmm7,%xmm4 + xorl %eax,%ecx + xorl %edi,%eax + addl 28(%esp),%edx + vpshufd $250,%xmm3,%xmm7 + shrdl $11,%ecx,%ecx + andl %eax,%ebx + xorl %esi,%ecx + vpsrld $11,%xmm6,%xmm6 + addl 32(%esp),%edx + xorl %edi,%ebx + shrdl $2,%ecx,%ecx + vpxor %xmm5,%xmm4,%xmm4 + addl %edx,%ebx + addl 12(%esp),%edx + addl %ecx,%ebx + vpslld $11,%xmm5,%xmm5 + movl %edx,%ecx + shrdl $14,%edx,%edx + movl 16(%esp),%esi + vpxor %xmm6,%xmm4,%xmm4 + xorl %ecx,%edx + movl 20(%esp),%edi + xorl %edi,%esi + vpsrld $10,%xmm7,%xmm6 + shrdl $5,%edx,%edx + andl %ecx,%esi + movl %ecx,12(%esp) + vpxor %xmm5,%xmm4,%xmm4 + xorl %ecx,%edx + xorl %esi,%edi + shrdl $6,%edx,%edx + vpsrlq $17,%xmm7,%xmm5 + movl %ebx,%ecx + addl %edi,%edx + movl (%esp),%edi + vpaddd %xmm4,%xmm0,%xmm0 + movl %ebx,%esi + shrdl $9,%ecx,%ecx + movl %ebx,28(%esp) + vpxor %xmm5,%xmm6,%xmm6 + xorl %ebx,%ecx + xorl %edi,%ebx + addl 24(%esp),%edx + vpsrlq $19,%xmm7,%xmm7 + shrdl $11,%ecx,%ecx + andl %ebx,%eax + xorl %esi,%ecx + vpxor %xmm7,%xmm6,%xmm6 + addl 36(%esp),%edx + xorl %edi,%eax + shrdl $2,%ecx,%ecx + vpshufd $132,%xmm6,%xmm7 + addl %edx,%eax + addl 8(%esp),%edx + addl %ecx,%eax + vpsrldq $8,%xmm7,%xmm7 + movl %edx,%ecx + shrdl $14,%edx,%edx + movl 12(%esp),%esi + vpaddd %xmm7,%xmm0,%xmm0 + xorl %ecx,%edx + movl 16(%esp),%edi + xorl %edi,%esi + vpshufd $80,%xmm0,%xmm7 + shrdl $5,%edx,%edx + andl %ecx,%esi + movl %ecx,8(%esp) + vpsrld $10,%xmm7,%xmm6 + xorl %ecx,%edx + xorl %esi,%edi + shrdl $6,%edx,%edx + vpsrlq $17,%xmm7,%xmm5 + movl %eax,%ecx + addl %edi,%edx + movl 28(%esp),%edi + vpxor %xmm5,%xmm6,%xmm6 + movl %eax,%esi + shrdl $9,%ecx,%ecx + movl %eax,24(%esp) + vpsrlq $19,%xmm7,%xmm7 + xorl %eax,%ecx + xorl %edi,%eax + addl 20(%esp),%edx + vpxor %xmm7,%xmm6,%xmm6 + shrdl $11,%ecx,%ecx + andl %eax,%ebx + xorl %esi,%ecx + vpshufd $232,%xmm6,%xmm7 + addl 40(%esp),%edx + xorl %edi,%ebx + shrdl $2,%ecx,%ecx + vpslldq $8,%xmm7,%xmm7 + addl %edx,%ebx + addl 4(%esp),%edx + addl %ecx,%ebx + vpaddd %xmm7,%xmm0,%xmm0 + movl %edx,%ecx + shrdl $14,%edx,%edx + movl 8(%esp),%esi + vpaddd (%ebp),%xmm0,%xmm6 + xorl %ecx,%edx + movl 12(%esp),%edi + xorl %edi,%esi + shrdl $5,%edx,%edx + andl %ecx,%esi + movl %ecx,4(%esp) + xorl %ecx,%edx + xorl %esi,%edi + shrdl $6,%edx,%edx + movl %ebx,%ecx + addl %edi,%edx + movl 24(%esp),%edi + movl %ebx,%esi + shrdl $9,%ecx,%ecx + movl %ebx,20(%esp) + xorl %ebx,%ecx + xorl %edi,%ebx + addl 16(%esp),%edx + shrdl $11,%ecx,%ecx + andl %ebx,%eax + xorl %esi,%ecx + addl 44(%esp),%edx + xorl %edi,%eax + shrdl $2,%ecx,%ecx + addl %edx,%eax + addl (%esp),%edx + addl %ecx,%eax + vmovdqa %xmm6,32(%esp) + vpalignr $4,%xmm1,%xmm2,%xmm4 + movl %edx,%ecx + shrdl $14,%edx,%edx + movl 4(%esp),%esi + vpalignr $4,%xmm3,%xmm0,%xmm7 + xorl %ecx,%edx + movl 8(%esp),%edi + xorl %edi,%esi + vpsrld $7,%xmm4,%xmm6 + shrdl $5,%edx,%edx + andl %ecx,%esi + movl %ecx,(%esp) + vpaddd %xmm7,%xmm1,%xmm1 + xorl %ecx,%edx + xorl %esi,%edi + shrdl $6,%edx,%edx + vpsrld $3,%xmm4,%xmm7 + movl %eax,%ecx + addl %edi,%edx + movl 20(%esp),%edi + vpslld $14,%xmm4,%xmm5 + movl %eax,%esi + shrdl $9,%ecx,%ecx + movl %eax,16(%esp) + vpxor %xmm6,%xmm7,%xmm4 + xorl %eax,%ecx + xorl %edi,%eax + addl 12(%esp),%edx + vpshufd $250,%xmm0,%xmm7 + shrdl $11,%ecx,%ecx + andl %eax,%ebx + xorl %esi,%ecx + vpsrld $11,%xmm6,%xmm6 + addl 48(%esp),%edx + xorl %edi,%ebx + shrdl $2,%ecx,%ecx + vpxor %xmm5,%xmm4,%xmm4 + addl %edx,%ebx + addl 28(%esp),%edx + addl %ecx,%ebx + vpslld $11,%xmm5,%xmm5 + movl %edx,%ecx + shrdl $14,%edx,%edx + movl (%esp),%esi + vpxor %xmm6,%xmm4,%xmm4 + xorl %ecx,%edx + movl 4(%esp),%edi + xorl %edi,%esi + vpsrld $10,%xmm7,%xmm6 + shrdl $5,%edx,%edx + andl %ecx,%esi + movl %ecx,28(%esp) + vpxor %xmm5,%xmm4,%xmm4 + xorl %ecx,%edx + xorl %esi,%edi + shrdl $6,%edx,%edx + vpsrlq $17,%xmm7,%xmm5 + movl %ebx,%ecx + addl %edi,%edx + movl 16(%esp),%edi + vpaddd %xmm4,%xmm1,%xmm1 + movl %ebx,%esi + shrdl $9,%ecx,%ecx + movl %ebx,12(%esp) + vpxor %xmm5,%xmm6,%xmm6 + xorl %ebx,%ecx + xorl %edi,%ebx + addl 8(%esp),%edx + vpsrlq $19,%xmm7,%xmm7 + shrdl $11,%ecx,%ecx + andl %ebx,%eax + xorl %esi,%ecx + vpxor %xmm7,%xmm6,%xmm6 + addl 52(%esp),%edx + xorl %edi,%eax + shrdl $2,%ecx,%ecx + vpshufd $132,%xmm6,%xmm7 + addl %edx,%eax + addl 24(%esp),%edx + addl %ecx,%eax + vpsrldq $8,%xmm7,%xmm7 + movl %edx,%ecx + shrdl $14,%edx,%edx + movl 28(%esp),%esi + vpaddd %xmm7,%xmm1,%xmm1 + xorl %ecx,%edx + movl (%esp),%edi + xorl %edi,%esi + vpshufd $80,%xmm1,%xmm7 + shrdl $5,%edx,%edx + andl %ecx,%esi + movl %ecx,24(%esp) + vpsrld $10,%xmm7,%xmm6 + xorl %ecx,%edx + xorl %esi,%edi + shrdl $6,%edx,%edx + vpsrlq $17,%xmm7,%xmm5 + movl %eax,%ecx + addl %edi,%edx + movl 12(%esp),%edi + vpxor %xmm5,%xmm6,%xmm6 + movl %eax,%esi + shrdl $9,%ecx,%ecx + movl %eax,8(%esp) + vpsrlq $19,%xmm7,%xmm7 + xorl %eax,%ecx + xorl %edi,%eax + addl 4(%esp),%edx + vpxor %xmm7,%xmm6,%xmm6 + shrdl $11,%ecx,%ecx + andl %eax,%ebx + xorl %esi,%ecx + vpshufd $232,%xmm6,%xmm7 + addl 56(%esp),%edx + xorl %edi,%ebx + shrdl $2,%ecx,%ecx + vpslldq $8,%xmm7,%xmm7 + addl %edx,%ebx + addl 20(%esp),%edx + addl %ecx,%ebx + vpaddd %xmm7,%xmm1,%xmm1 + movl %edx,%ecx + shrdl $14,%edx,%edx + movl 24(%esp),%esi + vpaddd 16(%ebp),%xmm1,%xmm6 + xorl %ecx,%edx + movl 28(%esp),%edi + xorl %edi,%esi + shrdl $5,%edx,%edx + andl %ecx,%esi + movl %ecx,20(%esp) + xorl %ecx,%edx + xorl %esi,%edi + shrdl $6,%edx,%edx + movl %ebx,%ecx + addl %edi,%edx + movl 8(%esp),%edi + movl %ebx,%esi + shrdl $9,%ecx,%ecx + movl %ebx,4(%esp) + xorl %ebx,%ecx + xorl %edi,%ebx + addl (%esp),%edx + shrdl $11,%ecx,%ecx + andl %ebx,%eax + xorl %esi,%ecx + addl 60(%esp),%edx + xorl %edi,%eax + shrdl $2,%ecx,%ecx + addl %edx,%eax + addl 16(%esp),%edx + addl %ecx,%eax + vmovdqa %xmm6,48(%esp) + vpalignr $4,%xmm2,%xmm3,%xmm4 + movl %edx,%ecx + shrdl $14,%edx,%edx + movl 20(%esp),%esi + vpalignr $4,%xmm0,%xmm1,%xmm7 + xorl %ecx,%edx + movl 24(%esp),%edi + xorl %edi,%esi + vpsrld $7,%xmm4,%xmm6 + shrdl $5,%edx,%edx + andl %ecx,%esi + movl %ecx,16(%esp) + vpaddd %xmm7,%xmm2,%xmm2 + xorl %ecx,%edx + xorl %esi,%edi + shrdl $6,%edx,%edx + vpsrld $3,%xmm4,%xmm7 + movl %eax,%ecx + addl %edi,%edx + movl 4(%esp),%edi + vpslld $14,%xmm4,%xmm5 + movl %eax,%esi + shrdl $9,%ecx,%ecx + movl %eax,(%esp) + vpxor %xmm6,%xmm7,%xmm4 + xorl %eax,%ecx + xorl %edi,%eax + addl 28(%esp),%edx + vpshufd $250,%xmm1,%xmm7 + shrdl $11,%ecx,%ecx + andl %eax,%ebx + xorl %esi,%ecx + vpsrld $11,%xmm6,%xmm6 + addl 64(%esp),%edx + xorl %edi,%ebx + shrdl $2,%ecx,%ecx + vpxor %xmm5,%xmm4,%xmm4 + addl %edx,%ebx + addl 12(%esp),%edx + addl %ecx,%ebx + vpslld $11,%xmm5,%xmm5 + movl %edx,%ecx + shrdl $14,%edx,%edx + movl 16(%esp),%esi + vpxor %xmm6,%xmm4,%xmm4 + xorl %ecx,%edx + movl 20(%esp),%edi + xorl %edi,%esi + vpsrld $10,%xmm7,%xmm6 + shrdl $5,%edx,%edx + andl %ecx,%esi + movl %ecx,12(%esp) + vpxor %xmm5,%xmm4,%xmm4 + xorl %ecx,%edx + xorl %esi,%edi + shrdl $6,%edx,%edx + vpsrlq $17,%xmm7,%xmm5 + movl %ebx,%ecx + addl %edi,%edx + movl (%esp),%edi + vpaddd %xmm4,%xmm2,%xmm2 + movl %ebx,%esi + shrdl $9,%ecx,%ecx + movl %ebx,28(%esp) + vpxor %xmm5,%xmm6,%xmm6 + xorl %ebx,%ecx + xorl %edi,%ebx + addl 24(%esp),%edx + vpsrlq $19,%xmm7,%xmm7 + shrdl $11,%ecx,%ecx + andl %ebx,%eax + xorl %esi,%ecx + vpxor %xmm7,%xmm6,%xmm6 + addl 68(%esp),%edx + xorl %edi,%eax + shrdl $2,%ecx,%ecx + vpshufd $132,%xmm6,%xmm7 + addl %edx,%eax + addl 8(%esp),%edx + addl %ecx,%eax + vpsrldq $8,%xmm7,%xmm7 + movl %edx,%ecx + shrdl $14,%edx,%edx + movl 12(%esp),%esi + vpaddd %xmm7,%xmm2,%xmm2 + xorl %ecx,%edx + movl 16(%esp),%edi + xorl %edi,%esi + vpshufd $80,%xmm2,%xmm7 + shrdl $5,%edx,%edx + andl %ecx,%esi + movl %ecx,8(%esp) + vpsrld $10,%xmm7,%xmm6 + xorl %ecx,%edx + xorl %esi,%edi + shrdl $6,%edx,%edx + vpsrlq $17,%xmm7,%xmm5 + movl %eax,%ecx + addl %edi,%edx + movl 28(%esp),%edi + vpxor %xmm5,%xmm6,%xmm6 + movl %eax,%esi + shrdl $9,%ecx,%ecx + movl %eax,24(%esp) + vpsrlq $19,%xmm7,%xmm7 + xorl %eax,%ecx + xorl %edi,%eax + addl 20(%esp),%edx + vpxor %xmm7,%xmm6,%xmm6 + shrdl $11,%ecx,%ecx + andl %eax,%ebx + xorl %esi,%ecx + vpshufd $232,%xmm6,%xmm7 + addl 72(%esp),%edx + xorl %edi,%ebx + shrdl $2,%ecx,%ecx + vpslldq $8,%xmm7,%xmm7 + addl %edx,%ebx + addl 4(%esp),%edx + addl %ecx,%ebx + vpaddd %xmm7,%xmm2,%xmm2 + movl %edx,%ecx + shrdl $14,%edx,%edx + movl 8(%esp),%esi + vpaddd 32(%ebp),%xmm2,%xmm6 + xorl %ecx,%edx + movl 12(%esp),%edi + xorl %edi,%esi + shrdl $5,%edx,%edx + andl %ecx,%esi + movl %ecx,4(%esp) + xorl %ecx,%edx + xorl %esi,%edi + shrdl $6,%edx,%edx + movl %ebx,%ecx + addl %edi,%edx + movl 24(%esp),%edi + movl %ebx,%esi + shrdl $9,%ecx,%ecx + movl %ebx,20(%esp) + xorl %ebx,%ecx + xorl %edi,%ebx + addl 16(%esp),%edx + shrdl $11,%ecx,%ecx + andl %ebx,%eax + xorl %esi,%ecx + addl 76(%esp),%edx + xorl %edi,%eax + shrdl $2,%ecx,%ecx + addl %edx,%eax + addl (%esp),%edx + addl %ecx,%eax + vmovdqa %xmm6,64(%esp) + vpalignr $4,%xmm3,%xmm0,%xmm4 + movl %edx,%ecx + shrdl $14,%edx,%edx + movl 4(%esp),%esi + vpalignr $4,%xmm1,%xmm2,%xmm7 + xorl %ecx,%edx + movl 8(%esp),%edi + xorl %edi,%esi + vpsrld $7,%xmm4,%xmm6 + shrdl $5,%edx,%edx + andl %ecx,%esi + movl %ecx,(%esp) + vpaddd %xmm7,%xmm3,%xmm3 + xorl %ecx,%edx + xorl %esi,%edi + shrdl $6,%edx,%edx + vpsrld $3,%xmm4,%xmm7 + movl %eax,%ecx + addl %edi,%edx + movl 20(%esp),%edi + vpslld $14,%xmm4,%xmm5 + movl %eax,%esi + shrdl $9,%ecx,%ecx + movl %eax,16(%esp) + vpxor %xmm6,%xmm7,%xmm4 + xorl %eax,%ecx + xorl %edi,%eax + addl 12(%esp),%edx + vpshufd $250,%xmm2,%xmm7 + shrdl $11,%ecx,%ecx + andl %eax,%ebx + xorl %esi,%ecx + vpsrld $11,%xmm6,%xmm6 + addl 80(%esp),%edx + xorl %edi,%ebx + shrdl $2,%ecx,%ecx + vpxor %xmm5,%xmm4,%xmm4 + addl %edx,%ebx + addl 28(%esp),%edx + addl %ecx,%ebx + vpslld $11,%xmm5,%xmm5 + movl %edx,%ecx + shrdl $14,%edx,%edx + movl (%esp),%esi + vpxor %xmm6,%xmm4,%xmm4 + xorl %ecx,%edx + movl 4(%esp),%edi + xorl %edi,%esi + vpsrld $10,%xmm7,%xmm6 + shrdl $5,%edx,%edx + andl %ecx,%esi + movl %ecx,28(%esp) + vpxor %xmm5,%xmm4,%xmm4 + xorl %ecx,%edx + xorl %esi,%edi + shrdl $6,%edx,%edx + vpsrlq $17,%xmm7,%xmm5 + movl %ebx,%ecx + addl %edi,%edx + movl 16(%esp),%edi + vpaddd %xmm4,%xmm3,%xmm3 + movl %ebx,%esi + shrdl $9,%ecx,%ecx + movl %ebx,12(%esp) + vpxor %xmm5,%xmm6,%xmm6 + xorl %ebx,%ecx + xorl %edi,%ebx + addl 8(%esp),%edx + vpsrlq $19,%xmm7,%xmm7 + shrdl $11,%ecx,%ecx + andl %ebx,%eax + xorl %esi,%ecx + vpxor %xmm7,%xmm6,%xmm6 + addl 84(%esp),%edx + xorl %edi,%eax + shrdl $2,%ecx,%ecx + vpshufd $132,%xmm6,%xmm7 + addl %edx,%eax + addl 24(%esp),%edx + addl %ecx,%eax + vpsrldq $8,%xmm7,%xmm7 + movl %edx,%ecx + shrdl $14,%edx,%edx + movl 28(%esp),%esi + vpaddd %xmm7,%xmm3,%xmm3 + xorl %ecx,%edx + movl (%esp),%edi + xorl %edi,%esi + vpshufd $80,%xmm3,%xmm7 + shrdl $5,%edx,%edx + andl %ecx,%esi + movl %ecx,24(%esp) + vpsrld $10,%xmm7,%xmm6 + xorl %ecx,%edx + xorl %esi,%edi + shrdl $6,%edx,%edx + vpsrlq $17,%xmm7,%xmm5 + movl %eax,%ecx + addl %edi,%edx + movl 12(%esp),%edi + vpxor %xmm5,%xmm6,%xmm6 + movl %eax,%esi + shrdl $9,%ecx,%ecx + movl %eax,8(%esp) + vpsrlq $19,%xmm7,%xmm7 + xorl %eax,%ecx + xorl %edi,%eax + addl 4(%esp),%edx + vpxor %xmm7,%xmm6,%xmm6 + shrdl $11,%ecx,%ecx + andl %eax,%ebx + xorl %esi,%ecx + vpshufd $232,%xmm6,%xmm7 + addl 88(%esp),%edx + xorl %edi,%ebx + shrdl $2,%ecx,%ecx + vpslldq $8,%xmm7,%xmm7 + addl %edx,%ebx + addl 20(%esp),%edx + addl %ecx,%ebx + vpaddd %xmm7,%xmm3,%xmm3 + movl %edx,%ecx + shrdl $14,%edx,%edx + movl 24(%esp),%esi + vpaddd 48(%ebp),%xmm3,%xmm6 + xorl %ecx,%edx + movl 28(%esp),%edi + xorl %edi,%esi + shrdl $5,%edx,%edx + andl %ecx,%esi + movl %ecx,20(%esp) + xorl %ecx,%edx + xorl %esi,%edi + shrdl $6,%edx,%edx + movl %ebx,%ecx + addl %edi,%edx + movl 8(%esp),%edi + movl %ebx,%esi + shrdl $9,%ecx,%ecx + movl %ebx,4(%esp) + xorl %ebx,%ecx + xorl %edi,%ebx + addl (%esp),%edx + shrdl $11,%ecx,%ecx + andl %ebx,%eax + xorl %esi,%ecx + addl 92(%esp),%edx + xorl %edi,%eax + shrdl $2,%ecx,%ecx + addl %edx,%eax + addl 16(%esp),%edx + addl %ecx,%eax + vmovdqa %xmm6,80(%esp) + cmpl $66051,64(%ebp) + jne .L016avx_00_47 + movl %edx,%ecx + shrdl $14,%edx,%edx + movl 20(%esp),%esi + xorl %ecx,%edx + movl 24(%esp),%edi + xorl %edi,%esi + shrdl $5,%edx,%edx + andl %ecx,%esi + movl %ecx,16(%esp) + xorl %ecx,%edx + xorl %esi,%edi + shrdl $6,%edx,%edx + movl %eax,%ecx + addl %edi,%edx + movl 4(%esp),%edi + movl %eax,%esi + shrdl $9,%ecx,%ecx + movl %eax,(%esp) + xorl %eax,%ecx + xorl %edi,%eax + addl 28(%esp),%edx + shrdl $11,%ecx,%ecx + andl %eax,%ebx + xorl %esi,%ecx + addl 32(%esp),%edx + xorl %edi,%ebx + shrdl $2,%ecx,%ecx + addl %edx,%ebx + addl 12(%esp),%edx + addl %ecx,%ebx + movl %edx,%ecx + shrdl $14,%edx,%edx + movl 16(%esp),%esi + xorl %ecx,%edx + movl 20(%esp),%edi + xorl %edi,%esi + shrdl $5,%edx,%edx + andl %ecx,%esi + movl %ecx,12(%esp) + xorl %ecx,%edx + xorl %esi,%edi + shrdl $6,%edx,%edx + movl %ebx,%ecx + addl %edi,%edx + movl (%esp),%edi + movl %ebx,%esi + shrdl $9,%ecx,%ecx + movl %ebx,28(%esp) + xorl %ebx,%ecx + xorl %edi,%ebx + addl 24(%esp),%edx + shrdl $11,%ecx,%ecx + andl %ebx,%eax + xorl %esi,%ecx + addl 36(%esp),%edx + xorl %edi,%eax + shrdl $2,%ecx,%ecx + addl %edx,%eax + addl 8(%esp),%edx + addl %ecx,%eax + movl %edx,%ecx + shrdl $14,%edx,%edx + movl 12(%esp),%esi + xorl %ecx,%edx + movl 16(%esp),%edi + xorl %edi,%esi + shrdl $5,%edx,%edx + andl %ecx,%esi + movl %ecx,8(%esp) + xorl %ecx,%edx + xorl %esi,%edi + shrdl $6,%edx,%edx + movl %eax,%ecx + addl %edi,%edx + movl 28(%esp),%edi + movl %eax,%esi + shrdl $9,%ecx,%ecx + movl %eax,24(%esp) + xorl %eax,%ecx + xorl %edi,%eax + addl 20(%esp),%edx + shrdl $11,%ecx,%ecx + andl %eax,%ebx + xorl %esi,%ecx + addl 40(%esp),%edx + xorl %edi,%ebx + shrdl $2,%ecx,%ecx + addl %edx,%ebx + addl 4(%esp),%edx + addl %ecx,%ebx + movl %edx,%ecx + shrdl $14,%edx,%edx + movl 8(%esp),%esi + xorl %ecx,%edx + movl 12(%esp),%edi + xorl %edi,%esi + shrdl $5,%edx,%edx + andl %ecx,%esi + movl %ecx,4(%esp) + xorl %ecx,%edx + xorl %esi,%edi + shrdl $6,%edx,%edx + movl %ebx,%ecx + addl %edi,%edx + movl 24(%esp),%edi + movl %ebx,%esi + shrdl $9,%ecx,%ecx + movl %ebx,20(%esp) + xorl %ebx,%ecx + xorl %edi,%ebx + addl 16(%esp),%edx + shrdl $11,%ecx,%ecx + andl %ebx,%eax + xorl %esi,%ecx + addl 44(%esp),%edx + xorl %edi,%eax + shrdl $2,%ecx,%ecx + addl %edx,%eax + addl (%esp),%edx + addl %ecx,%eax + movl %edx,%ecx + shrdl $14,%edx,%edx + movl 4(%esp),%esi + xorl %ecx,%edx + movl 8(%esp),%edi + xorl %edi,%esi + shrdl $5,%edx,%edx + andl %ecx,%esi + movl %ecx,(%esp) + xorl %ecx,%edx + xorl %esi,%edi + shrdl $6,%edx,%edx + movl %eax,%ecx + addl %edi,%edx + movl 20(%esp),%edi + movl %eax,%esi + shrdl $9,%ecx,%ecx + movl %eax,16(%esp) + xorl %eax,%ecx + xorl %edi,%eax + addl 12(%esp),%edx + shrdl $11,%ecx,%ecx + andl %eax,%ebx + xorl %esi,%ecx + addl 48(%esp),%edx + xorl %edi,%ebx + shrdl $2,%ecx,%ecx + addl %edx,%ebx + addl 28(%esp),%edx + addl %ecx,%ebx + movl %edx,%ecx + shrdl $14,%edx,%edx + movl (%esp),%esi + xorl %ecx,%edx + movl 4(%esp),%edi + xorl %edi,%esi + shrdl $5,%edx,%edx + andl %ecx,%esi + movl %ecx,28(%esp) + xorl %ecx,%edx + xorl %esi,%edi + shrdl $6,%edx,%edx + movl %ebx,%ecx + addl %edi,%edx + movl 16(%esp),%edi + movl %ebx,%esi + shrdl $9,%ecx,%ecx + movl %ebx,12(%esp) + xorl %ebx,%ecx + xorl %edi,%ebx + addl 8(%esp),%edx + shrdl $11,%ecx,%ecx + andl %ebx,%eax + xorl %esi,%ecx + addl 52(%esp),%edx + xorl %edi,%eax + shrdl $2,%ecx,%ecx + addl %edx,%eax + addl 24(%esp),%edx + addl %ecx,%eax + movl %edx,%ecx + shrdl $14,%edx,%edx + movl 28(%esp),%esi + xorl %ecx,%edx + movl (%esp),%edi + xorl %edi,%esi + shrdl $5,%edx,%edx + andl %ecx,%esi + movl %ecx,24(%esp) + xorl %ecx,%edx + xorl %esi,%edi + shrdl $6,%edx,%edx + movl %eax,%ecx + addl %edi,%edx + movl 12(%esp),%edi + movl %eax,%esi + shrdl $9,%ecx,%ecx + movl %eax,8(%esp) + xorl %eax,%ecx + xorl %edi,%eax + addl 4(%esp),%edx + shrdl $11,%ecx,%ecx + andl %eax,%ebx + xorl %esi,%ecx + addl 56(%esp),%edx + xorl %edi,%ebx + shrdl $2,%ecx,%ecx + addl %edx,%ebx + addl 20(%esp),%edx + addl %ecx,%ebx + movl %edx,%ecx + shrdl $14,%edx,%edx + movl 24(%esp),%esi + xorl %ecx,%edx + movl 28(%esp),%edi + xorl %edi,%esi + shrdl $5,%edx,%edx + andl %ecx,%esi + movl %ecx,20(%esp) + xorl %ecx,%edx + xorl %esi,%edi + shrdl $6,%edx,%edx + movl %ebx,%ecx + addl %edi,%edx + movl 8(%esp),%edi + movl %ebx,%esi + shrdl $9,%ecx,%ecx + movl %ebx,4(%esp) + xorl %ebx,%ecx + xorl %edi,%ebx + addl (%esp),%edx + shrdl $11,%ecx,%ecx + andl %ebx,%eax + xorl %esi,%ecx + addl 60(%esp),%edx + xorl %edi,%eax + shrdl $2,%ecx,%ecx + addl %edx,%eax + addl 16(%esp),%edx + addl %ecx,%eax + movl %edx,%ecx + shrdl $14,%edx,%edx + movl 20(%esp),%esi + xorl %ecx,%edx + movl 24(%esp),%edi + xorl %edi,%esi + shrdl $5,%edx,%edx + andl %ecx,%esi + movl %ecx,16(%esp) + xorl %ecx,%edx + xorl %esi,%edi + shrdl $6,%edx,%edx + movl %eax,%ecx + addl %edi,%edx + movl 4(%esp),%edi + movl %eax,%esi + shrdl $9,%ecx,%ecx + movl %eax,(%esp) + xorl %eax,%ecx + xorl %edi,%eax + addl 28(%esp),%edx + shrdl $11,%ecx,%ecx + andl %eax,%ebx + xorl %esi,%ecx + addl 64(%esp),%edx + xorl %edi,%ebx + shrdl $2,%ecx,%ecx + addl %edx,%ebx + addl 12(%esp),%edx + addl %ecx,%ebx + movl %edx,%ecx + shrdl $14,%edx,%edx + movl 16(%esp),%esi + xorl %ecx,%edx + movl 20(%esp),%edi + xorl %edi,%esi + shrdl $5,%edx,%edx + andl %ecx,%esi + movl %ecx,12(%esp) + xorl %ecx,%edx + xorl %esi,%edi + shrdl $6,%edx,%edx + movl %ebx,%ecx + addl %edi,%edx + movl (%esp),%edi + movl %ebx,%esi + shrdl $9,%ecx,%ecx + movl %ebx,28(%esp) + xorl %ebx,%ecx + xorl %edi,%ebx + addl 24(%esp),%edx + shrdl $11,%ecx,%ecx + andl %ebx,%eax + xorl %esi,%ecx + addl 68(%esp),%edx + xorl %edi,%eax + shrdl $2,%ecx,%ecx + addl %edx,%eax + addl 8(%esp),%edx + addl %ecx,%eax + movl %edx,%ecx + shrdl $14,%edx,%edx + movl 12(%esp),%esi + xorl %ecx,%edx + movl 16(%esp),%edi + xorl %edi,%esi + shrdl $5,%edx,%edx + andl %ecx,%esi + movl %ecx,8(%esp) + xorl %ecx,%edx + xorl %esi,%edi + shrdl $6,%edx,%edx + movl %eax,%ecx + addl %edi,%edx + movl 28(%esp),%edi + movl %eax,%esi + shrdl $9,%ecx,%ecx + movl %eax,24(%esp) + xorl %eax,%ecx + xorl %edi,%eax + addl 20(%esp),%edx + shrdl $11,%ecx,%ecx + andl %eax,%ebx + xorl %esi,%ecx + addl 72(%esp),%edx + xorl %edi,%ebx + shrdl $2,%ecx,%ecx + addl %edx,%ebx + addl 4(%esp),%edx + addl %ecx,%ebx + movl %edx,%ecx + shrdl $14,%edx,%edx + movl 8(%esp),%esi + xorl %ecx,%edx + movl 12(%esp),%edi + xorl %edi,%esi + shrdl $5,%edx,%edx + andl %ecx,%esi + movl %ecx,4(%esp) + xorl %ecx,%edx + xorl %esi,%edi + shrdl $6,%edx,%edx + movl %ebx,%ecx + addl %edi,%edx + movl 24(%esp),%edi + movl %ebx,%esi + shrdl $9,%ecx,%ecx + movl %ebx,20(%esp) + xorl %ebx,%ecx + xorl %edi,%ebx + addl 16(%esp),%edx + shrdl $11,%ecx,%ecx + andl %ebx,%eax + xorl %esi,%ecx + addl 76(%esp),%edx + xorl %edi,%eax + shrdl $2,%ecx,%ecx + addl %edx,%eax + addl (%esp),%edx + addl %ecx,%eax + movl %edx,%ecx + shrdl $14,%edx,%edx + movl 4(%esp),%esi + xorl %ecx,%edx + movl 8(%esp),%edi + xorl %edi,%esi + shrdl $5,%edx,%edx + andl %ecx,%esi + movl %ecx,(%esp) + xorl %ecx,%edx + xorl %esi,%edi + shrdl $6,%edx,%edx + movl %eax,%ecx + addl %edi,%edx + movl 20(%esp),%edi + movl %eax,%esi + shrdl $9,%ecx,%ecx + movl %eax,16(%esp) + xorl %eax,%ecx + xorl %edi,%eax + addl 12(%esp),%edx + shrdl $11,%ecx,%ecx + andl %eax,%ebx + xorl %esi,%ecx + addl 80(%esp),%edx + xorl %edi,%ebx + shrdl $2,%ecx,%ecx + addl %edx,%ebx + addl 28(%esp),%edx + addl %ecx,%ebx + movl %edx,%ecx + shrdl $14,%edx,%edx + movl (%esp),%esi + xorl %ecx,%edx + movl 4(%esp),%edi + xorl %edi,%esi + shrdl $5,%edx,%edx + andl %ecx,%esi + movl %ecx,28(%esp) + xorl %ecx,%edx + xorl %esi,%edi + shrdl $6,%edx,%edx + movl %ebx,%ecx + addl %edi,%edx + movl 16(%esp),%edi + movl %ebx,%esi + shrdl $9,%ecx,%ecx + movl %ebx,12(%esp) + xorl %ebx,%ecx + xorl %edi,%ebx + addl 8(%esp),%edx + shrdl $11,%ecx,%ecx + andl %ebx,%eax + xorl %esi,%ecx + addl 84(%esp),%edx + xorl %edi,%eax + shrdl $2,%ecx,%ecx + addl %edx,%eax + addl 24(%esp),%edx + addl %ecx,%eax + movl %edx,%ecx + shrdl $14,%edx,%edx + movl 28(%esp),%esi + xorl %ecx,%edx + movl (%esp),%edi + xorl %edi,%esi + shrdl $5,%edx,%edx + andl %ecx,%esi + movl %ecx,24(%esp) + xorl %ecx,%edx + xorl %esi,%edi + shrdl $6,%edx,%edx + movl %eax,%ecx + addl %edi,%edx + movl 12(%esp),%edi + movl %eax,%esi + shrdl $9,%ecx,%ecx + movl %eax,8(%esp) + xorl %eax,%ecx + xorl %edi,%eax + addl 4(%esp),%edx + shrdl $11,%ecx,%ecx + andl %eax,%ebx + xorl %esi,%ecx + addl 88(%esp),%edx + xorl %edi,%ebx + shrdl $2,%ecx,%ecx + addl %edx,%ebx + addl 20(%esp),%edx + addl %ecx,%ebx + movl %edx,%ecx + shrdl $14,%edx,%edx + movl 24(%esp),%esi + xorl %ecx,%edx + movl 28(%esp),%edi + xorl %edi,%esi + shrdl $5,%edx,%edx + andl %ecx,%esi + movl %ecx,20(%esp) + xorl %ecx,%edx + xorl %esi,%edi + shrdl $6,%edx,%edx + movl %ebx,%ecx + addl %edi,%edx + movl 8(%esp),%edi + movl %ebx,%esi + shrdl $9,%ecx,%ecx + movl %ebx,4(%esp) + xorl %ebx,%ecx + xorl %edi,%ebx + addl (%esp),%edx + shrdl $11,%ecx,%ecx + andl %ebx,%eax + xorl %esi,%ecx + addl 92(%esp),%edx + xorl %edi,%eax + shrdl $2,%ecx,%ecx + addl %edx,%eax + addl 16(%esp),%edx + addl %ecx,%eax + movl 96(%esp),%esi + xorl %edi,%ebx + movl 12(%esp),%ecx + addl (%esi),%eax + addl 4(%esi),%ebx + addl 8(%esi),%edi + addl 12(%esi),%ecx + movl %eax,(%esi) + movl %ebx,4(%esi) + movl %edi,8(%esi) + movl %ecx,12(%esi) + movl %ebx,4(%esp) + xorl %edi,%ebx + movl %edi,8(%esp) + movl %ecx,12(%esp) + movl 20(%esp),%edi + movl 24(%esp),%ecx + addl 16(%esi),%edx + addl 20(%esi),%edi + addl 24(%esi),%ecx + movl %edx,16(%esi) + movl %edi,20(%esi) + movl %edi,20(%esp) + movl 28(%esp),%edi + movl %ecx,24(%esi) + addl 28(%esi),%edi + movl %ecx,24(%esp) + movl %edi,28(%esi) + movl %edi,28(%esp) + movl 100(%esp),%edi + vmovdqa 64(%ebp),%xmm7 + subl $192,%ebp + cmpl 104(%esp),%edi + jb .L015grand_avx + movl 108(%esp),%esp + vzeroall + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.align 32 +.L014AVX_BMI: + leal -96(%esp),%esp + vzeroall + movl (%esi),%eax + movl 4(%esi),%ebx + movl 8(%esi),%ecx + movl 12(%esi),%edi + movl %ebx,4(%esp) + xorl %ecx,%ebx + movl %ecx,8(%esp) + movl %edi,12(%esp) + movl 16(%esi),%edx + movl 20(%esi),%edi + movl 24(%esi),%ecx + movl 28(%esi),%esi + movl %edi,20(%esp) + movl 100(%esp),%edi + movl %ecx,24(%esp) + movl %esi,28(%esp) + vmovdqa 256(%ebp),%xmm7 + jmp .L017grand_avx_bmi +.align 32 +.L017grand_avx_bmi: + vmovdqu (%edi),%xmm0 + vmovdqu 16(%edi),%xmm1 + vmovdqu 32(%edi),%xmm2 + vmovdqu 48(%edi),%xmm3 + addl $64,%edi + vpshufb %xmm7,%xmm0,%xmm0 + movl %edi,100(%esp) + vpshufb %xmm7,%xmm1,%xmm1 + vpshufb %xmm7,%xmm2,%xmm2 + vpaddd (%ebp),%xmm0,%xmm4 + vpshufb %xmm7,%xmm3,%xmm3 + vpaddd 16(%ebp),%xmm1,%xmm5 + vpaddd 32(%ebp),%xmm2,%xmm6 + vpaddd 48(%ebp),%xmm3,%xmm7 + vmovdqa %xmm4,32(%esp) + vmovdqa %xmm5,48(%esp) + vmovdqa %xmm6,64(%esp) + vmovdqa %xmm7,80(%esp) + jmp .L018avx_bmi_00_47 +.align 16 +.L018avx_bmi_00_47: + addl $64,%ebp + vpalignr $4,%xmm0,%xmm1,%xmm4 + rorxl $6,%edx,%ecx + rorxl $11,%edx,%esi + movl %edx,16(%esp) + vpalignr $4,%xmm2,%xmm3,%xmm7 + rorxl $25,%edx,%edi + xorl %esi,%ecx + andnl 24(%esp),%edx,%esi + vpsrld $7,%xmm4,%xmm6 + xorl %edi,%ecx + andl 20(%esp),%edx + movl %eax,(%esp) + vpaddd %xmm7,%xmm0,%xmm0 + orl %esi,%edx + rorxl $2,%eax,%edi + rorxl $13,%eax,%esi + vpsrld $3,%xmm4,%xmm7 + leal (%edx,%ecx,1),%edx + rorxl $22,%eax,%ecx + xorl %edi,%esi + vpslld $14,%xmm4,%xmm5 + movl 4(%esp),%edi + xorl %esi,%ecx + xorl %edi,%eax + vpxor %xmm6,%xmm7,%xmm4 + addl 28(%esp),%edx + andl %eax,%ebx + addl 32(%esp),%edx + vpshufd $250,%xmm3,%xmm7 + xorl %edi,%ebx + addl %edx,%ecx + addl 12(%esp),%edx + vpsrld $11,%xmm6,%xmm6 + leal (%ebx,%ecx,1),%ebx + rorxl $6,%edx,%ecx + rorxl $11,%edx,%esi + vpxor %xmm5,%xmm4,%xmm4 + movl %edx,12(%esp) + rorxl $25,%edx,%edi + xorl %esi,%ecx + vpslld $11,%xmm5,%xmm5 + andnl 20(%esp),%edx,%esi + xorl %edi,%ecx + andl 16(%esp),%edx + vpxor %xmm6,%xmm4,%xmm4 + movl %ebx,28(%esp) + orl %esi,%edx + rorxl $2,%ebx,%edi + rorxl $13,%ebx,%esi + vpsrld $10,%xmm7,%xmm6 + leal (%edx,%ecx,1),%edx + rorxl $22,%ebx,%ecx + xorl %edi,%esi + vpxor %xmm5,%xmm4,%xmm4 + movl (%esp),%edi + xorl %esi,%ecx + xorl %edi,%ebx + vpsrlq $17,%xmm7,%xmm5 + addl 24(%esp),%edx + andl %ebx,%eax + addl 36(%esp),%edx + vpaddd %xmm4,%xmm0,%xmm0 + xorl %edi,%eax + addl %edx,%ecx + addl 8(%esp),%edx + vpxor %xmm5,%xmm6,%xmm6 + leal (%eax,%ecx,1),%eax + rorxl $6,%edx,%ecx + rorxl $11,%edx,%esi + vpsrlq $19,%xmm7,%xmm7 + movl %edx,8(%esp) + rorxl $25,%edx,%edi + xorl %esi,%ecx + vpxor %xmm7,%xmm6,%xmm6 + andnl 16(%esp),%edx,%esi + xorl %edi,%ecx + andl 12(%esp),%edx + vpshufd $132,%xmm6,%xmm7 + movl %eax,24(%esp) + orl %esi,%edx + rorxl $2,%eax,%edi + rorxl $13,%eax,%esi + vpsrldq $8,%xmm7,%xmm7 + leal (%edx,%ecx,1),%edx + rorxl $22,%eax,%ecx + xorl %edi,%esi + vpaddd %xmm7,%xmm0,%xmm0 + movl 28(%esp),%edi + xorl %esi,%ecx + xorl %edi,%eax + vpshufd $80,%xmm0,%xmm7 + addl 20(%esp),%edx + andl %eax,%ebx + addl 40(%esp),%edx + vpsrld $10,%xmm7,%xmm6 + xorl %edi,%ebx + addl %edx,%ecx + addl 4(%esp),%edx + vpsrlq $17,%xmm7,%xmm5 + leal (%ebx,%ecx,1),%ebx + rorxl $6,%edx,%ecx + rorxl $11,%edx,%esi + vpxor %xmm5,%xmm6,%xmm6 + movl %edx,4(%esp) + rorxl $25,%edx,%edi + xorl %esi,%ecx + vpsrlq $19,%xmm7,%xmm7 + andnl 12(%esp),%edx,%esi + xorl %edi,%ecx + andl 8(%esp),%edx + vpxor %xmm7,%xmm6,%xmm6 + movl %ebx,20(%esp) + orl %esi,%edx + rorxl $2,%ebx,%edi + rorxl $13,%ebx,%esi + vpshufd $232,%xmm6,%xmm7 + leal (%edx,%ecx,1),%edx + rorxl $22,%ebx,%ecx + xorl %edi,%esi + vpslldq $8,%xmm7,%xmm7 + movl 24(%esp),%edi + xorl %esi,%ecx + xorl %edi,%ebx + vpaddd %xmm7,%xmm0,%xmm0 + addl 16(%esp),%edx + andl %ebx,%eax + addl 44(%esp),%edx + vpaddd (%ebp),%xmm0,%xmm6 + xorl %edi,%eax + addl %edx,%ecx + addl (%esp),%edx + leal (%eax,%ecx,1),%eax + vmovdqa %xmm6,32(%esp) + vpalignr $4,%xmm1,%xmm2,%xmm4 + rorxl $6,%edx,%ecx + rorxl $11,%edx,%esi + movl %edx,(%esp) + vpalignr $4,%xmm3,%xmm0,%xmm7 + rorxl $25,%edx,%edi + xorl %esi,%ecx + andnl 8(%esp),%edx,%esi + vpsrld $7,%xmm4,%xmm6 + xorl %edi,%ecx + andl 4(%esp),%edx + movl %eax,16(%esp) + vpaddd %xmm7,%xmm1,%xmm1 + orl %esi,%edx + rorxl $2,%eax,%edi + rorxl $13,%eax,%esi + vpsrld $3,%xmm4,%xmm7 + leal (%edx,%ecx,1),%edx + rorxl $22,%eax,%ecx + xorl %edi,%esi + vpslld $14,%xmm4,%xmm5 + movl 20(%esp),%edi + xorl %esi,%ecx + xorl %edi,%eax + vpxor %xmm6,%xmm7,%xmm4 + addl 12(%esp),%edx + andl %eax,%ebx + addl 48(%esp),%edx + vpshufd $250,%xmm0,%xmm7 + xorl %edi,%ebx + addl %edx,%ecx + addl 28(%esp),%edx + vpsrld $11,%xmm6,%xmm6 + leal (%ebx,%ecx,1),%ebx + rorxl $6,%edx,%ecx + rorxl $11,%edx,%esi + vpxor %xmm5,%xmm4,%xmm4 + movl %edx,28(%esp) + rorxl $25,%edx,%edi + xorl %esi,%ecx + vpslld $11,%xmm5,%xmm5 + andnl 4(%esp),%edx,%esi + xorl %edi,%ecx + andl (%esp),%edx + vpxor %xmm6,%xmm4,%xmm4 + movl %ebx,12(%esp) + orl %esi,%edx + rorxl $2,%ebx,%edi + rorxl $13,%ebx,%esi + vpsrld $10,%xmm7,%xmm6 + leal (%edx,%ecx,1),%edx + rorxl $22,%ebx,%ecx + xorl %edi,%esi + vpxor %xmm5,%xmm4,%xmm4 + movl 16(%esp),%edi + xorl %esi,%ecx + xorl %edi,%ebx + vpsrlq $17,%xmm7,%xmm5 + addl 8(%esp),%edx + andl %ebx,%eax + addl 52(%esp),%edx + vpaddd %xmm4,%xmm1,%xmm1 + xorl %edi,%eax + addl %edx,%ecx + addl 24(%esp),%edx + vpxor %xmm5,%xmm6,%xmm6 + leal (%eax,%ecx,1),%eax + rorxl $6,%edx,%ecx + rorxl $11,%edx,%esi + vpsrlq $19,%xmm7,%xmm7 + movl %edx,24(%esp) + rorxl $25,%edx,%edi + xorl %esi,%ecx + vpxor %xmm7,%xmm6,%xmm6 + andnl (%esp),%edx,%esi + xorl %edi,%ecx + andl 28(%esp),%edx + vpshufd $132,%xmm6,%xmm7 + movl %eax,8(%esp) + orl %esi,%edx + rorxl $2,%eax,%edi + rorxl $13,%eax,%esi + vpsrldq $8,%xmm7,%xmm7 + leal (%edx,%ecx,1),%edx + rorxl $22,%eax,%ecx + xorl %edi,%esi + vpaddd %xmm7,%xmm1,%xmm1 + movl 12(%esp),%edi + xorl %esi,%ecx + xorl %edi,%eax + vpshufd $80,%xmm1,%xmm7 + addl 4(%esp),%edx + andl %eax,%ebx + addl 56(%esp),%edx + vpsrld $10,%xmm7,%xmm6 + xorl %edi,%ebx + addl %edx,%ecx + addl 20(%esp),%edx + vpsrlq $17,%xmm7,%xmm5 + leal (%ebx,%ecx,1),%ebx + rorxl $6,%edx,%ecx + rorxl $11,%edx,%esi + vpxor %xmm5,%xmm6,%xmm6 + movl %edx,20(%esp) + rorxl $25,%edx,%edi + xorl %esi,%ecx + vpsrlq $19,%xmm7,%xmm7 + andnl 28(%esp),%edx,%esi + xorl %edi,%ecx + andl 24(%esp),%edx + vpxor %xmm7,%xmm6,%xmm6 + movl %ebx,4(%esp) + orl %esi,%edx + rorxl $2,%ebx,%edi + rorxl $13,%ebx,%esi + vpshufd $232,%xmm6,%xmm7 + leal (%edx,%ecx,1),%edx + rorxl $22,%ebx,%ecx + xorl %edi,%esi + vpslldq $8,%xmm7,%xmm7 + movl 8(%esp),%edi + xorl %esi,%ecx + xorl %edi,%ebx + vpaddd %xmm7,%xmm1,%xmm1 + addl (%esp),%edx + andl %ebx,%eax + addl 60(%esp),%edx + vpaddd 16(%ebp),%xmm1,%xmm6 + xorl %edi,%eax + addl %edx,%ecx + addl 16(%esp),%edx + leal (%eax,%ecx,1),%eax + vmovdqa %xmm6,48(%esp) + vpalignr $4,%xmm2,%xmm3,%xmm4 + rorxl $6,%edx,%ecx + rorxl $11,%edx,%esi + movl %edx,16(%esp) + vpalignr $4,%xmm0,%xmm1,%xmm7 + rorxl $25,%edx,%edi + xorl %esi,%ecx + andnl 24(%esp),%edx,%esi + vpsrld $7,%xmm4,%xmm6 + xorl %edi,%ecx + andl 20(%esp),%edx + movl %eax,(%esp) + vpaddd %xmm7,%xmm2,%xmm2 + orl %esi,%edx + rorxl $2,%eax,%edi + rorxl $13,%eax,%esi + vpsrld $3,%xmm4,%xmm7 + leal (%edx,%ecx,1),%edx + rorxl $22,%eax,%ecx + xorl %edi,%esi + vpslld $14,%xmm4,%xmm5 + movl 4(%esp),%edi + xorl %esi,%ecx + xorl %edi,%eax + vpxor %xmm6,%xmm7,%xmm4 + addl 28(%esp),%edx + andl %eax,%ebx + addl 64(%esp),%edx + vpshufd $250,%xmm1,%xmm7 + xorl %edi,%ebx + addl %edx,%ecx + addl 12(%esp),%edx + vpsrld $11,%xmm6,%xmm6 + leal (%ebx,%ecx,1),%ebx + rorxl $6,%edx,%ecx + rorxl $11,%edx,%esi + vpxor %xmm5,%xmm4,%xmm4 + movl %edx,12(%esp) + rorxl $25,%edx,%edi + xorl %esi,%ecx + vpslld $11,%xmm5,%xmm5 + andnl 20(%esp),%edx,%esi + xorl %edi,%ecx + andl 16(%esp),%edx + vpxor %xmm6,%xmm4,%xmm4 + movl %ebx,28(%esp) + orl %esi,%edx + rorxl $2,%ebx,%edi + rorxl $13,%ebx,%esi + vpsrld $10,%xmm7,%xmm6 + leal (%edx,%ecx,1),%edx + rorxl $22,%ebx,%ecx + xorl %edi,%esi + vpxor %xmm5,%xmm4,%xmm4 + movl (%esp),%edi + xorl %esi,%ecx + xorl %edi,%ebx + vpsrlq $17,%xmm7,%xmm5 + addl 24(%esp),%edx + andl %ebx,%eax + addl 68(%esp),%edx + vpaddd %xmm4,%xmm2,%xmm2 + xorl %edi,%eax + addl %edx,%ecx + addl 8(%esp),%edx + vpxor %xmm5,%xmm6,%xmm6 + leal (%eax,%ecx,1),%eax + rorxl $6,%edx,%ecx + rorxl $11,%edx,%esi + vpsrlq $19,%xmm7,%xmm7 + movl %edx,8(%esp) + rorxl $25,%edx,%edi + xorl %esi,%ecx + vpxor %xmm7,%xmm6,%xmm6 + andnl 16(%esp),%edx,%esi + xorl %edi,%ecx + andl 12(%esp),%edx + vpshufd $132,%xmm6,%xmm7 + movl %eax,24(%esp) + orl %esi,%edx + rorxl $2,%eax,%edi + rorxl $13,%eax,%esi + vpsrldq $8,%xmm7,%xmm7 + leal (%edx,%ecx,1),%edx + rorxl $22,%eax,%ecx + xorl %edi,%esi + vpaddd %xmm7,%xmm2,%xmm2 + movl 28(%esp),%edi + xorl %esi,%ecx + xorl %edi,%eax + vpshufd $80,%xmm2,%xmm7 + addl 20(%esp),%edx + andl %eax,%ebx + addl 72(%esp),%edx + vpsrld $10,%xmm7,%xmm6 + xorl %edi,%ebx + addl %edx,%ecx + addl 4(%esp),%edx + vpsrlq $17,%xmm7,%xmm5 + leal (%ebx,%ecx,1),%ebx + rorxl $6,%edx,%ecx + rorxl $11,%edx,%esi + vpxor %xmm5,%xmm6,%xmm6 + movl %edx,4(%esp) + rorxl $25,%edx,%edi + xorl %esi,%ecx + vpsrlq $19,%xmm7,%xmm7 + andnl 12(%esp),%edx,%esi + xorl %edi,%ecx + andl 8(%esp),%edx + vpxor %xmm7,%xmm6,%xmm6 + movl %ebx,20(%esp) + orl %esi,%edx + rorxl $2,%ebx,%edi + rorxl $13,%ebx,%esi + vpshufd $232,%xmm6,%xmm7 + leal (%edx,%ecx,1),%edx + rorxl $22,%ebx,%ecx + xorl %edi,%esi + vpslldq $8,%xmm7,%xmm7 + movl 24(%esp),%edi + xorl %esi,%ecx + xorl %edi,%ebx + vpaddd %xmm7,%xmm2,%xmm2 + addl 16(%esp),%edx + andl %ebx,%eax + addl 76(%esp),%edx + vpaddd 32(%ebp),%xmm2,%xmm6 + xorl %edi,%eax + addl %edx,%ecx + addl (%esp),%edx + leal (%eax,%ecx,1),%eax + vmovdqa %xmm6,64(%esp) + vpalignr $4,%xmm3,%xmm0,%xmm4 + rorxl $6,%edx,%ecx + rorxl $11,%edx,%esi + movl %edx,(%esp) + vpalignr $4,%xmm1,%xmm2,%xmm7 + rorxl $25,%edx,%edi + xorl %esi,%ecx + andnl 8(%esp),%edx,%esi + vpsrld $7,%xmm4,%xmm6 + xorl %edi,%ecx + andl 4(%esp),%edx + movl %eax,16(%esp) + vpaddd %xmm7,%xmm3,%xmm3 + orl %esi,%edx + rorxl $2,%eax,%edi + rorxl $13,%eax,%esi + vpsrld $3,%xmm4,%xmm7 + leal (%edx,%ecx,1),%edx + rorxl $22,%eax,%ecx + xorl %edi,%esi + vpslld $14,%xmm4,%xmm5 + movl 20(%esp),%edi + xorl %esi,%ecx + xorl %edi,%eax + vpxor %xmm6,%xmm7,%xmm4 + addl 12(%esp),%edx + andl %eax,%ebx + addl 80(%esp),%edx + vpshufd $250,%xmm2,%xmm7 + xorl %edi,%ebx + addl %edx,%ecx + addl 28(%esp),%edx + vpsrld $11,%xmm6,%xmm6 + leal (%ebx,%ecx,1),%ebx + rorxl $6,%edx,%ecx + rorxl $11,%edx,%esi + vpxor %xmm5,%xmm4,%xmm4 + movl %edx,28(%esp) + rorxl $25,%edx,%edi + xorl %esi,%ecx + vpslld $11,%xmm5,%xmm5 + andnl 4(%esp),%edx,%esi + xorl %edi,%ecx + andl (%esp),%edx + vpxor %xmm6,%xmm4,%xmm4 + movl %ebx,12(%esp) + orl %esi,%edx + rorxl $2,%ebx,%edi + rorxl $13,%ebx,%esi + vpsrld $10,%xmm7,%xmm6 + leal (%edx,%ecx,1),%edx + rorxl $22,%ebx,%ecx + xorl %edi,%esi + vpxor %xmm5,%xmm4,%xmm4 + movl 16(%esp),%edi + xorl %esi,%ecx + xorl %edi,%ebx + vpsrlq $17,%xmm7,%xmm5 + addl 8(%esp),%edx + andl %ebx,%eax + addl 84(%esp),%edx + vpaddd %xmm4,%xmm3,%xmm3 + xorl %edi,%eax + addl %edx,%ecx + addl 24(%esp),%edx + vpxor %xmm5,%xmm6,%xmm6 + leal (%eax,%ecx,1),%eax + rorxl $6,%edx,%ecx + rorxl $11,%edx,%esi + vpsrlq $19,%xmm7,%xmm7 + movl %edx,24(%esp) + rorxl $25,%edx,%edi + xorl %esi,%ecx + vpxor %xmm7,%xmm6,%xmm6 + andnl (%esp),%edx,%esi + xorl %edi,%ecx + andl 28(%esp),%edx + vpshufd $132,%xmm6,%xmm7 + movl %eax,8(%esp) + orl %esi,%edx + rorxl $2,%eax,%edi + rorxl $13,%eax,%esi + vpsrldq $8,%xmm7,%xmm7 + leal (%edx,%ecx,1),%edx + rorxl $22,%eax,%ecx + xorl %edi,%esi + vpaddd %xmm7,%xmm3,%xmm3 + movl 12(%esp),%edi + xorl %esi,%ecx + xorl %edi,%eax + vpshufd $80,%xmm3,%xmm7 + addl 4(%esp),%edx + andl %eax,%ebx + addl 88(%esp),%edx + vpsrld $10,%xmm7,%xmm6 + xorl %edi,%ebx + addl %edx,%ecx + addl 20(%esp),%edx + vpsrlq $17,%xmm7,%xmm5 + leal (%ebx,%ecx,1),%ebx + rorxl $6,%edx,%ecx + rorxl $11,%edx,%esi + vpxor %xmm5,%xmm6,%xmm6 + movl %edx,20(%esp) + rorxl $25,%edx,%edi + xorl %esi,%ecx + vpsrlq $19,%xmm7,%xmm7 + andnl 28(%esp),%edx,%esi + xorl %edi,%ecx + andl 24(%esp),%edx + vpxor %xmm7,%xmm6,%xmm6 + movl %ebx,4(%esp) + orl %esi,%edx + rorxl $2,%ebx,%edi + rorxl $13,%ebx,%esi + vpshufd $232,%xmm6,%xmm7 + leal (%edx,%ecx,1),%edx + rorxl $22,%ebx,%ecx + xorl %edi,%esi + vpslldq $8,%xmm7,%xmm7 + movl 8(%esp),%edi + xorl %esi,%ecx + xorl %edi,%ebx + vpaddd %xmm7,%xmm3,%xmm3 + addl (%esp),%edx + andl %ebx,%eax + addl 92(%esp),%edx + vpaddd 48(%ebp),%xmm3,%xmm6 + xorl %edi,%eax + addl %edx,%ecx + addl 16(%esp),%edx + leal (%eax,%ecx,1),%eax + vmovdqa %xmm6,80(%esp) + cmpl $66051,64(%ebp) + jne .L018avx_bmi_00_47 + rorxl $6,%edx,%ecx + rorxl $11,%edx,%esi + movl %edx,16(%esp) + rorxl $25,%edx,%edi + xorl %esi,%ecx + andnl 24(%esp),%edx,%esi + xorl %edi,%ecx + andl 20(%esp),%edx + movl %eax,(%esp) + orl %esi,%edx + rorxl $2,%eax,%edi + rorxl $13,%eax,%esi + leal (%edx,%ecx,1),%edx + rorxl $22,%eax,%ecx + xorl %edi,%esi + movl 4(%esp),%edi + xorl %esi,%ecx + xorl %edi,%eax + addl 28(%esp),%edx + andl %eax,%ebx + addl 32(%esp),%edx + xorl %edi,%ebx + addl %edx,%ecx + addl 12(%esp),%edx + leal (%ebx,%ecx,1),%ebx + rorxl $6,%edx,%ecx + rorxl $11,%edx,%esi + movl %edx,12(%esp) + rorxl $25,%edx,%edi + xorl %esi,%ecx + andnl 20(%esp),%edx,%esi + xorl %edi,%ecx + andl 16(%esp),%edx + movl %ebx,28(%esp) + orl %esi,%edx + rorxl $2,%ebx,%edi + rorxl $13,%ebx,%esi + leal (%edx,%ecx,1),%edx + rorxl $22,%ebx,%ecx + xorl %edi,%esi + movl (%esp),%edi + xorl %esi,%ecx + xorl %edi,%ebx + addl 24(%esp),%edx + andl %ebx,%eax + addl 36(%esp),%edx + xorl %edi,%eax + addl %edx,%ecx + addl 8(%esp),%edx + leal (%eax,%ecx,1),%eax + rorxl $6,%edx,%ecx + rorxl $11,%edx,%esi + movl %edx,8(%esp) + rorxl $25,%edx,%edi + xorl %esi,%ecx + andnl 16(%esp),%edx,%esi + xorl %edi,%ecx + andl 12(%esp),%edx + movl %eax,24(%esp) + orl %esi,%edx + rorxl $2,%eax,%edi + rorxl $13,%eax,%esi + leal (%edx,%ecx,1),%edx + rorxl $22,%eax,%ecx + xorl %edi,%esi + movl 28(%esp),%edi + xorl %esi,%ecx + xorl %edi,%eax + addl 20(%esp),%edx + andl %eax,%ebx + addl 40(%esp),%edx + xorl %edi,%ebx + addl %edx,%ecx + addl 4(%esp),%edx + leal (%ebx,%ecx,1),%ebx + rorxl $6,%edx,%ecx + rorxl $11,%edx,%esi + movl %edx,4(%esp) + rorxl $25,%edx,%edi + xorl %esi,%ecx + andnl 12(%esp),%edx,%esi + xorl %edi,%ecx + andl 8(%esp),%edx + movl %ebx,20(%esp) + orl %esi,%edx + rorxl $2,%ebx,%edi + rorxl $13,%ebx,%esi + leal (%edx,%ecx,1),%edx + rorxl $22,%ebx,%ecx + xorl %edi,%esi + movl 24(%esp),%edi + xorl %esi,%ecx + xorl %edi,%ebx + addl 16(%esp),%edx + andl %ebx,%eax + addl 44(%esp),%edx + xorl %edi,%eax + addl %edx,%ecx + addl (%esp),%edx + leal (%eax,%ecx,1),%eax + rorxl $6,%edx,%ecx + rorxl $11,%edx,%esi + movl %edx,(%esp) + rorxl $25,%edx,%edi + xorl %esi,%ecx + andnl 8(%esp),%edx,%esi + xorl %edi,%ecx + andl 4(%esp),%edx + movl %eax,16(%esp) + orl %esi,%edx + rorxl $2,%eax,%edi + rorxl $13,%eax,%esi + leal (%edx,%ecx,1),%edx + rorxl $22,%eax,%ecx + xorl %edi,%esi + movl 20(%esp),%edi + xorl %esi,%ecx + xorl %edi,%eax + addl 12(%esp),%edx + andl %eax,%ebx + addl 48(%esp),%edx + xorl %edi,%ebx + addl %edx,%ecx + addl 28(%esp),%edx + leal (%ebx,%ecx,1),%ebx + rorxl $6,%edx,%ecx + rorxl $11,%edx,%esi + movl %edx,28(%esp) + rorxl $25,%edx,%edi + xorl %esi,%ecx + andnl 4(%esp),%edx,%esi + xorl %edi,%ecx + andl (%esp),%edx + movl %ebx,12(%esp) + orl %esi,%edx + rorxl $2,%ebx,%edi + rorxl $13,%ebx,%esi + leal (%edx,%ecx,1),%edx + rorxl $22,%ebx,%ecx + xorl %edi,%esi + movl 16(%esp),%edi + xorl %esi,%ecx + xorl %edi,%ebx + addl 8(%esp),%edx + andl %ebx,%eax + addl 52(%esp),%edx + xorl %edi,%eax + addl %edx,%ecx + addl 24(%esp),%edx + leal (%eax,%ecx,1),%eax + rorxl $6,%edx,%ecx + rorxl $11,%edx,%esi + movl %edx,24(%esp) + rorxl $25,%edx,%edi + xorl %esi,%ecx + andnl (%esp),%edx,%esi + xorl %edi,%ecx + andl 28(%esp),%edx + movl %eax,8(%esp) + orl %esi,%edx + rorxl $2,%eax,%edi + rorxl $13,%eax,%esi + leal (%edx,%ecx,1),%edx + rorxl $22,%eax,%ecx + xorl %edi,%esi + movl 12(%esp),%edi + xorl %esi,%ecx + xorl %edi,%eax + addl 4(%esp),%edx + andl %eax,%ebx + addl 56(%esp),%edx + xorl %edi,%ebx + addl %edx,%ecx + addl 20(%esp),%edx + leal (%ebx,%ecx,1),%ebx + rorxl $6,%edx,%ecx + rorxl $11,%edx,%esi + movl %edx,20(%esp) + rorxl $25,%edx,%edi + xorl %esi,%ecx + andnl 28(%esp),%edx,%esi + xorl %edi,%ecx + andl 24(%esp),%edx + movl %ebx,4(%esp) + orl %esi,%edx + rorxl $2,%ebx,%edi + rorxl $13,%ebx,%esi + leal (%edx,%ecx,1),%edx + rorxl $22,%ebx,%ecx + xorl %edi,%esi + movl 8(%esp),%edi + xorl %esi,%ecx + xorl %edi,%ebx + addl (%esp),%edx + andl %ebx,%eax + addl 60(%esp),%edx + xorl %edi,%eax + addl %edx,%ecx + addl 16(%esp),%edx + leal (%eax,%ecx,1),%eax + rorxl $6,%edx,%ecx + rorxl $11,%edx,%esi + movl %edx,16(%esp) + rorxl $25,%edx,%edi + xorl %esi,%ecx + andnl 24(%esp),%edx,%esi + xorl %edi,%ecx + andl 20(%esp),%edx + movl %eax,(%esp) + orl %esi,%edx + rorxl $2,%eax,%edi + rorxl $13,%eax,%esi + leal (%edx,%ecx,1),%edx + rorxl $22,%eax,%ecx + xorl %edi,%esi + movl 4(%esp),%edi + xorl %esi,%ecx + xorl %edi,%eax + addl 28(%esp),%edx + andl %eax,%ebx + addl 64(%esp),%edx + xorl %edi,%ebx + addl %edx,%ecx + addl 12(%esp),%edx + leal (%ebx,%ecx,1),%ebx + rorxl $6,%edx,%ecx + rorxl $11,%edx,%esi + movl %edx,12(%esp) + rorxl $25,%edx,%edi + xorl %esi,%ecx + andnl 20(%esp),%edx,%esi + xorl %edi,%ecx + andl 16(%esp),%edx + movl %ebx,28(%esp) + orl %esi,%edx + rorxl $2,%ebx,%edi + rorxl $13,%ebx,%esi + leal (%edx,%ecx,1),%edx + rorxl $22,%ebx,%ecx + xorl %edi,%esi + movl (%esp),%edi + xorl %esi,%ecx + xorl %edi,%ebx + addl 24(%esp),%edx + andl %ebx,%eax + addl 68(%esp),%edx + xorl %edi,%eax + addl %edx,%ecx + addl 8(%esp),%edx + leal (%eax,%ecx,1),%eax + rorxl $6,%edx,%ecx + rorxl $11,%edx,%esi + movl %edx,8(%esp) + rorxl $25,%edx,%edi + xorl %esi,%ecx + andnl 16(%esp),%edx,%esi + xorl %edi,%ecx + andl 12(%esp),%edx + movl %eax,24(%esp) + orl %esi,%edx + rorxl $2,%eax,%edi + rorxl $13,%eax,%esi + leal (%edx,%ecx,1),%edx + rorxl $22,%eax,%ecx + xorl %edi,%esi + movl 28(%esp),%edi + xorl %esi,%ecx + xorl %edi,%eax + addl 20(%esp),%edx + andl %eax,%ebx + addl 72(%esp),%edx + xorl %edi,%ebx + addl %edx,%ecx + addl 4(%esp),%edx + leal (%ebx,%ecx,1),%ebx + rorxl $6,%edx,%ecx + rorxl $11,%edx,%esi + movl %edx,4(%esp) + rorxl $25,%edx,%edi + xorl %esi,%ecx + andnl 12(%esp),%edx,%esi + xorl %edi,%ecx + andl 8(%esp),%edx + movl %ebx,20(%esp) + orl %esi,%edx + rorxl $2,%ebx,%edi + rorxl $13,%ebx,%esi + leal (%edx,%ecx,1),%edx + rorxl $22,%ebx,%ecx + xorl %edi,%esi + movl 24(%esp),%edi + xorl %esi,%ecx + xorl %edi,%ebx + addl 16(%esp),%edx + andl %ebx,%eax + addl 76(%esp),%edx + xorl %edi,%eax + addl %edx,%ecx + addl (%esp),%edx + leal (%eax,%ecx,1),%eax + rorxl $6,%edx,%ecx + rorxl $11,%edx,%esi + movl %edx,(%esp) + rorxl $25,%edx,%edi + xorl %esi,%ecx + andnl 8(%esp),%edx,%esi + xorl %edi,%ecx + andl 4(%esp),%edx + movl %eax,16(%esp) + orl %esi,%edx + rorxl $2,%eax,%edi + rorxl $13,%eax,%esi + leal (%edx,%ecx,1),%edx + rorxl $22,%eax,%ecx + xorl %edi,%esi + movl 20(%esp),%edi + xorl %esi,%ecx + xorl %edi,%eax + addl 12(%esp),%edx + andl %eax,%ebx + addl 80(%esp),%edx + xorl %edi,%ebx + addl %edx,%ecx + addl 28(%esp),%edx + leal (%ebx,%ecx,1),%ebx + rorxl $6,%edx,%ecx + rorxl $11,%edx,%esi + movl %edx,28(%esp) + rorxl $25,%edx,%edi + xorl %esi,%ecx + andnl 4(%esp),%edx,%esi + xorl %edi,%ecx + andl (%esp),%edx + movl %ebx,12(%esp) + orl %esi,%edx + rorxl $2,%ebx,%edi + rorxl $13,%ebx,%esi + leal (%edx,%ecx,1),%edx + rorxl $22,%ebx,%ecx + xorl %edi,%esi + movl 16(%esp),%edi + xorl %esi,%ecx + xorl %edi,%ebx + addl 8(%esp),%edx + andl %ebx,%eax + addl 84(%esp),%edx + xorl %edi,%eax + addl %edx,%ecx + addl 24(%esp),%edx + leal (%eax,%ecx,1),%eax + rorxl $6,%edx,%ecx + rorxl $11,%edx,%esi + movl %edx,24(%esp) + rorxl $25,%edx,%edi + xorl %esi,%ecx + andnl (%esp),%edx,%esi + xorl %edi,%ecx + andl 28(%esp),%edx + movl %eax,8(%esp) + orl %esi,%edx + rorxl $2,%eax,%edi + rorxl $13,%eax,%esi + leal (%edx,%ecx,1),%edx + rorxl $22,%eax,%ecx + xorl %edi,%esi + movl 12(%esp),%edi + xorl %esi,%ecx + xorl %edi,%eax + addl 4(%esp),%edx + andl %eax,%ebx + addl 88(%esp),%edx + xorl %edi,%ebx + addl %edx,%ecx + addl 20(%esp),%edx + leal (%ebx,%ecx,1),%ebx + rorxl $6,%edx,%ecx + rorxl $11,%edx,%esi + movl %edx,20(%esp) + rorxl $25,%edx,%edi + xorl %esi,%ecx + andnl 28(%esp),%edx,%esi + xorl %edi,%ecx + andl 24(%esp),%edx + movl %ebx,4(%esp) + orl %esi,%edx + rorxl $2,%ebx,%edi + rorxl $13,%ebx,%esi + leal (%edx,%ecx,1),%edx + rorxl $22,%ebx,%ecx + xorl %edi,%esi + movl 8(%esp),%edi + xorl %esi,%ecx + xorl %edi,%ebx + addl (%esp),%edx + andl %ebx,%eax + addl 92(%esp),%edx + xorl %edi,%eax + addl %edx,%ecx + addl 16(%esp),%edx + leal (%eax,%ecx,1),%eax + movl 96(%esp),%esi + xorl %edi,%ebx + movl 12(%esp),%ecx + addl (%esi),%eax + addl 4(%esi),%ebx + addl 8(%esi),%edi + addl 12(%esi),%ecx + movl %eax,(%esi) + movl %ebx,4(%esi) + movl %edi,8(%esi) + movl %ecx,12(%esi) + movl %ebx,4(%esp) + xorl %edi,%ebx + movl %edi,8(%esp) + movl %ecx,12(%esp) + movl 20(%esp),%edi + movl 24(%esp),%ecx + addl 16(%esi),%edx + addl 20(%esi),%edi + addl 24(%esi),%ecx + movl %edx,16(%esi) + movl %edi,20(%esi) + movl %edi,20(%esp) + movl 28(%esp),%edi + movl %ecx,24(%esi) + addl 28(%esi),%edi + movl %ecx,24(%esp) + movl %edi,28(%esi) + movl %edi,28(%esp) + movl 100(%esp),%edi + vmovdqa 64(%ebp),%xmm7 + subl $192,%ebp + cmpl 104(%esp),%edi + jb .L017grand_avx_bmi + movl 108(%esp),%esp + vzeroall + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.size sha256_block_data_order,.-.L_sha256_block_data_order_begin +.comm OPENSSL_ia32cap_P,16,4 diff --git a/deps/openssl/asm/x86-elf-gas/sha/sha512-586.s b/deps/openssl/asm/x86-elf-gas/sha/sha512-586.s index 4b806f352e299a..83329f439e8b79 100644 --- a/deps/openssl/asm/x86-elf-gas/sha/sha512-586.s +++ b/deps/openssl/asm/x86-elf-gas/sha/sha512-586.s @@ -25,6 +25,2269 @@ sha512_block_data_order: movl %edi,4(%esp) movl %eax,8(%esp) movl %ebx,12(%esp) + leal OPENSSL_ia32cap_P,%edx + movl (%edx),%ecx + testl $67108864,%ecx + jz .L002loop_x86 + movl 4(%edx),%edx + movq (%esi),%mm0 + andl $16777216,%ecx + movq 8(%esi),%mm1 + andl $512,%edx + movq 16(%esi),%mm2 + orl %edx,%ecx + movq 24(%esi),%mm3 + movq 32(%esi),%mm4 + movq 40(%esi),%mm5 + movq 48(%esi),%mm6 + movq 56(%esi),%mm7 + cmpl $16777728,%ecx + je .L003SSSE3 + subl $80,%esp + jmp .L004loop_sse2 +.align 16 +.L004loop_sse2: + movq %mm1,8(%esp) + movq %mm2,16(%esp) + movq %mm3,24(%esp) + movq %mm5,40(%esp) + movq %mm6,48(%esp) + pxor %mm1,%mm2 + movq %mm7,56(%esp) + movq %mm0,%mm3 + movl (%edi),%eax + movl 4(%edi),%ebx + addl $8,%edi + movl $15,%edx + bswap %eax + bswap %ebx + jmp .L00500_14_sse2 +.align 16 +.L00500_14_sse2: + movd %eax,%mm1 + movl (%edi),%eax + movd %ebx,%mm7 + movl 4(%edi),%ebx + addl $8,%edi + bswap %eax + bswap %ebx + punpckldq %mm1,%mm7 + movq %mm4,%mm1 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,32(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + movq %mm3,%mm0 + movq %mm7,72(%esp) + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm0,(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 56(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + paddq (%ebp),%mm7 + pxor %mm4,%mm3 + movq 24(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm0,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm0,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 8(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + subl $8,%esp + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm0,%mm2 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + pxor %mm7,%mm6 + movq 40(%esp),%mm5 + paddq %mm2,%mm3 + movq %mm0,%mm2 + addl $8,%ebp + paddq %mm6,%mm3 + movq 48(%esp),%mm6 + decl %edx + jnz .L00500_14_sse2 + movd %eax,%mm1 + movd %ebx,%mm7 + punpckldq %mm1,%mm7 + movq %mm4,%mm1 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,32(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + movq %mm3,%mm0 + movq %mm7,72(%esp) + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm0,(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 56(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + paddq (%ebp),%mm7 + pxor %mm4,%mm3 + movq 24(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm0,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm0,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 8(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + subl $8,%esp + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm0,%mm2 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + pxor %mm7,%mm6 + movq 192(%esp),%mm7 + paddq %mm2,%mm3 + movq %mm0,%mm2 + addl $8,%ebp + paddq %mm6,%mm3 + pxor %mm0,%mm0 + movl $32,%edx + jmp .L00616_79_sse2 +.align 16 +.L00616_79_sse2: + movq 88(%esp),%mm5 + movq %mm7,%mm1 + psrlq $1,%mm7 + movq %mm5,%mm6 + psrlq $6,%mm5 + psllq $56,%mm1 + paddq %mm3,%mm0 + movq %mm7,%mm3 + psrlq $6,%mm7 + pxor %mm1,%mm3 + psllq $7,%mm1 + pxor %mm7,%mm3 + psrlq $1,%mm7 + pxor %mm1,%mm3 + movq %mm5,%mm1 + psrlq $13,%mm5 + pxor %mm3,%mm7 + psllq $3,%mm6 + pxor %mm5,%mm1 + paddq 200(%esp),%mm7 + pxor %mm6,%mm1 + psrlq $42,%mm5 + paddq 128(%esp),%mm7 + pxor %mm5,%mm1 + psllq $42,%mm6 + movq 40(%esp),%mm5 + pxor %mm6,%mm1 + movq 48(%esp),%mm6 + paddq %mm1,%mm7 + movq %mm4,%mm1 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,32(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + movq %mm7,72(%esp) + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm0,(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 56(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + paddq (%ebp),%mm7 + pxor %mm4,%mm3 + movq 24(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm0,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm0,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 8(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + subl $8,%esp + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm0,%mm2 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + pxor %mm7,%mm6 + movq 192(%esp),%mm7 + paddq %mm6,%mm2 + addl $8,%ebp + movq 88(%esp),%mm5 + movq %mm7,%mm1 + psrlq $1,%mm7 + movq %mm5,%mm6 + psrlq $6,%mm5 + psllq $56,%mm1 + paddq %mm3,%mm2 + movq %mm7,%mm3 + psrlq $6,%mm7 + pxor %mm1,%mm3 + psllq $7,%mm1 + pxor %mm7,%mm3 + psrlq $1,%mm7 + pxor %mm1,%mm3 + movq %mm5,%mm1 + psrlq $13,%mm5 + pxor %mm3,%mm7 + psllq $3,%mm6 + pxor %mm5,%mm1 + paddq 200(%esp),%mm7 + pxor %mm6,%mm1 + psrlq $42,%mm5 + paddq 128(%esp),%mm7 + pxor %mm5,%mm1 + psllq $42,%mm6 + movq 40(%esp),%mm5 + pxor %mm6,%mm1 + movq 48(%esp),%mm6 + paddq %mm1,%mm7 + movq %mm4,%mm1 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,32(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + movq %mm7,72(%esp) + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm2,(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 56(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + paddq (%ebp),%mm7 + pxor %mm4,%mm3 + movq 24(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm2,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm2,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 8(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + subl $8,%esp + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm2,%mm0 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + pxor %mm7,%mm6 + movq 192(%esp),%mm7 + paddq %mm6,%mm0 + addl $8,%ebp + decl %edx + jnz .L00616_79_sse2 + paddq %mm3,%mm0 + movq 8(%esp),%mm1 + movq 24(%esp),%mm3 + movq 40(%esp),%mm5 + movq 48(%esp),%mm6 + movq 56(%esp),%mm7 + pxor %mm1,%mm2 + paddq (%esi),%mm0 + paddq 8(%esi),%mm1 + paddq 16(%esi),%mm2 + paddq 24(%esi),%mm3 + paddq 32(%esi),%mm4 + paddq 40(%esi),%mm5 + paddq 48(%esi),%mm6 + paddq 56(%esi),%mm7 + movl $640,%eax + movq %mm0,(%esi) + movq %mm1,8(%esi) + movq %mm2,16(%esi) + movq %mm3,24(%esi) + movq %mm4,32(%esi) + movq %mm5,40(%esi) + movq %mm6,48(%esi) + movq %mm7,56(%esi) + leal (%esp,%eax,1),%esp + subl %eax,%ebp + cmpl 88(%esp),%edi + jb .L004loop_sse2 + movl 92(%esp),%esp + emms + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.align 32 +.L003SSSE3: + leal -64(%esp),%edx + subl $256,%esp + movdqa 640(%ebp),%xmm1 + movdqu (%edi),%xmm0 +.byte 102,15,56,0,193 + movdqa (%ebp),%xmm3 + movdqa %xmm1,%xmm2 + movdqu 16(%edi),%xmm1 + paddq %xmm0,%xmm3 +.byte 102,15,56,0,202 + movdqa %xmm3,-128(%edx) + movdqa 16(%ebp),%xmm4 + movdqa %xmm2,%xmm3 + movdqu 32(%edi),%xmm2 + paddq %xmm1,%xmm4 +.byte 102,15,56,0,211 + movdqa %xmm4,-112(%edx) + movdqa 32(%ebp),%xmm5 + movdqa %xmm3,%xmm4 + movdqu 48(%edi),%xmm3 + paddq %xmm2,%xmm5 +.byte 102,15,56,0,220 + movdqa %xmm5,-96(%edx) + movdqa 48(%ebp),%xmm6 + movdqa %xmm4,%xmm5 + movdqu 64(%edi),%xmm4 + paddq %xmm3,%xmm6 +.byte 102,15,56,0,229 + movdqa %xmm6,-80(%edx) + movdqa 64(%ebp),%xmm7 + movdqa %xmm5,%xmm6 + movdqu 80(%edi),%xmm5 + paddq %xmm4,%xmm7 +.byte 102,15,56,0,238 + movdqa %xmm7,-64(%edx) + movdqa %xmm0,(%edx) + movdqa 80(%ebp),%xmm0 + movdqa %xmm6,%xmm7 + movdqu 96(%edi),%xmm6 + paddq %xmm5,%xmm0 +.byte 102,15,56,0,247 + movdqa %xmm0,-48(%edx) + movdqa %xmm1,16(%edx) + movdqa 96(%ebp),%xmm1 + movdqa %xmm7,%xmm0 + movdqu 112(%edi),%xmm7 + paddq %xmm6,%xmm1 +.byte 102,15,56,0,248 + movdqa %xmm1,-32(%edx) + movdqa %xmm2,32(%edx) + movdqa 112(%ebp),%xmm2 + movdqa (%edx),%xmm0 + paddq %xmm7,%xmm2 + movdqa %xmm2,-16(%edx) + nop +.align 32 +.L007loop_ssse3: + movdqa 16(%edx),%xmm2 + movdqa %xmm3,48(%edx) + leal 128(%ebp),%ebp + movq %mm1,8(%esp) + movl %edi,%ebx + movq %mm2,16(%esp) + leal 128(%edi),%edi + movq %mm3,24(%esp) + cmpl %eax,%edi + movq %mm5,40(%esp) + cmovbl %edi,%ebx + movq %mm6,48(%esp) + movl $4,%ecx + pxor %mm1,%mm2 + movq %mm7,56(%esp) + pxor %mm3,%mm3 + jmp .L00800_47_ssse3 +.align 32 +.L00800_47_ssse3: + movdqa %xmm5,%xmm3 + movdqa %xmm2,%xmm1 +.byte 102,15,58,15,208,8 + movdqa %xmm4,(%edx) +.byte 102,15,58,15,220,8 + movdqa %xmm2,%xmm4 + psrlq $7,%xmm2 + paddq %xmm3,%xmm0 + movdqa %xmm4,%xmm3 + psrlq $1,%xmm4 + psllq $56,%xmm3 + pxor %xmm4,%xmm2 + psrlq $7,%xmm4 + pxor %xmm3,%xmm2 + psllq $7,%xmm3 + pxor %xmm4,%xmm2 + movdqa %xmm7,%xmm4 + pxor %xmm3,%xmm2 + movdqa %xmm7,%xmm3 + psrlq $6,%xmm4 + paddq %xmm2,%xmm0 + movdqa %xmm7,%xmm2 + psrlq $19,%xmm3 + psllq $3,%xmm2 + pxor %xmm3,%xmm4 + psrlq $42,%xmm3 + pxor %xmm2,%xmm4 + psllq $42,%xmm2 + pxor %xmm3,%xmm4 + movdqa 32(%edx),%xmm3 + pxor %xmm2,%xmm4 + movdqa (%ebp),%xmm2 + movq %mm4,%mm1 + paddq %xmm4,%xmm0 + movq -128(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,32(%esp) + paddq %xmm0,%xmm2 + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm0 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm0,(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 56(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 24(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm0,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm0,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 8(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm0,%mm2 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + pxor %mm7,%mm6 + movq 32(%esp),%mm5 + paddq %mm6,%mm2 + movq 40(%esp),%mm6 + movq %mm4,%mm1 + movq -120(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,24(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm2 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm2,56(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 48(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 16(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm2,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm2,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq (%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm2,%mm0 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + pxor %mm7,%mm6 + movq 24(%esp),%mm5 + paddq %mm6,%mm0 + movq 32(%esp),%mm6 + movdqa %xmm2,-128(%edx) + movdqa %xmm6,%xmm4 + movdqa %xmm3,%xmm2 +.byte 102,15,58,15,217,8 + movdqa %xmm5,16(%edx) +.byte 102,15,58,15,229,8 + movdqa %xmm3,%xmm5 + psrlq $7,%xmm3 + paddq %xmm4,%xmm1 + movdqa %xmm5,%xmm4 + psrlq $1,%xmm5 + psllq $56,%xmm4 + pxor %xmm5,%xmm3 + psrlq $7,%xmm5 + pxor %xmm4,%xmm3 + psllq $7,%xmm4 + pxor %xmm5,%xmm3 + movdqa %xmm0,%xmm5 + pxor %xmm4,%xmm3 + movdqa %xmm0,%xmm4 + psrlq $6,%xmm5 + paddq %xmm3,%xmm1 + movdqa %xmm0,%xmm3 + psrlq $19,%xmm4 + psllq $3,%xmm3 + pxor %xmm4,%xmm5 + psrlq $42,%xmm4 + pxor %xmm3,%xmm5 + psllq $42,%xmm3 + pxor %xmm4,%xmm5 + movdqa 48(%edx),%xmm4 + pxor %xmm3,%xmm5 + movdqa 16(%ebp),%xmm3 + movq %mm4,%mm1 + paddq %xmm5,%xmm1 + movq -112(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,16(%esp) + paddq %xmm1,%xmm3 + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm0 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm0,48(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 40(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 8(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm0,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm0,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 56(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm0,%mm2 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + pxor %mm7,%mm6 + movq 16(%esp),%mm5 + paddq %mm6,%mm2 + movq 24(%esp),%mm6 + movq %mm4,%mm1 + movq -104(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,8(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm2 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm2,40(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 32(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq (%esp),%mm4 + paddq %mm7,%mm3 + movq %mm2,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm2,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 48(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm2,%mm0 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + pxor %mm7,%mm6 + movq 8(%esp),%mm5 + paddq %mm6,%mm0 + movq 16(%esp),%mm6 + movdqa %xmm3,-112(%edx) + movdqa %xmm7,%xmm5 + movdqa %xmm4,%xmm3 +.byte 102,15,58,15,226,8 + movdqa %xmm6,32(%edx) +.byte 102,15,58,15,238,8 + movdqa %xmm4,%xmm6 + psrlq $7,%xmm4 + paddq %xmm5,%xmm2 + movdqa %xmm6,%xmm5 + psrlq $1,%xmm6 + psllq $56,%xmm5 + pxor %xmm6,%xmm4 + psrlq $7,%xmm6 + pxor %xmm5,%xmm4 + psllq $7,%xmm5 + pxor %xmm6,%xmm4 + movdqa %xmm1,%xmm6 + pxor %xmm5,%xmm4 + movdqa %xmm1,%xmm5 + psrlq $6,%xmm6 + paddq %xmm4,%xmm2 + movdqa %xmm1,%xmm4 + psrlq $19,%xmm5 + psllq $3,%xmm4 + pxor %xmm5,%xmm6 + psrlq $42,%xmm5 + pxor %xmm4,%xmm6 + psllq $42,%xmm4 + pxor %xmm5,%xmm6 + movdqa (%edx),%xmm5 + pxor %xmm4,%xmm6 + movdqa 32(%ebp),%xmm4 + movq %mm4,%mm1 + paddq %xmm6,%xmm2 + movq -96(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,(%esp) + paddq %xmm2,%xmm4 + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm0 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm0,32(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 24(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 56(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm0,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm0,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 40(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm0,%mm2 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + pxor %mm7,%mm6 + movq (%esp),%mm5 + paddq %mm6,%mm2 + movq 8(%esp),%mm6 + movq %mm4,%mm1 + movq -88(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,56(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm2 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm2,24(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 16(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 48(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm2,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm2,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 32(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm2,%mm0 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + pxor %mm7,%mm6 + movq 56(%esp),%mm5 + paddq %mm6,%mm0 + movq (%esp),%mm6 + movdqa %xmm4,-96(%edx) + movdqa %xmm0,%xmm6 + movdqa %xmm5,%xmm4 +.byte 102,15,58,15,235,8 + movdqa %xmm7,48(%edx) +.byte 102,15,58,15,247,8 + movdqa %xmm5,%xmm7 + psrlq $7,%xmm5 + paddq %xmm6,%xmm3 + movdqa %xmm7,%xmm6 + psrlq $1,%xmm7 + psllq $56,%xmm6 + pxor %xmm7,%xmm5 + psrlq $7,%xmm7 + pxor %xmm6,%xmm5 + psllq $7,%xmm6 + pxor %xmm7,%xmm5 + movdqa %xmm2,%xmm7 + pxor %xmm6,%xmm5 + movdqa %xmm2,%xmm6 + psrlq $6,%xmm7 + paddq %xmm5,%xmm3 + movdqa %xmm2,%xmm5 + psrlq $19,%xmm6 + psllq $3,%xmm5 + pxor %xmm6,%xmm7 + psrlq $42,%xmm6 + pxor %xmm5,%xmm7 + psllq $42,%xmm5 + pxor %xmm6,%xmm7 + movdqa 16(%edx),%xmm6 + pxor %xmm5,%xmm7 + movdqa 48(%ebp),%xmm5 + movq %mm4,%mm1 + paddq %xmm7,%xmm3 + movq -80(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,48(%esp) + paddq %xmm3,%xmm5 + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm0 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm0,16(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 8(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 40(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm0,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm0,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 24(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm0,%mm2 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + pxor %mm7,%mm6 + movq 48(%esp),%mm5 + paddq %mm6,%mm2 + movq 56(%esp),%mm6 + movq %mm4,%mm1 + movq -72(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,40(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm2 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm2,8(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq (%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 32(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm2,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm2,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 16(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm2,%mm0 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + pxor %mm7,%mm6 + movq 40(%esp),%mm5 + paddq %mm6,%mm0 + movq 48(%esp),%mm6 + movdqa %xmm5,-80(%edx) + movdqa %xmm1,%xmm7 + movdqa %xmm6,%xmm5 +.byte 102,15,58,15,244,8 + movdqa %xmm0,(%edx) +.byte 102,15,58,15,248,8 + movdqa %xmm6,%xmm0 + psrlq $7,%xmm6 + paddq %xmm7,%xmm4 + movdqa %xmm0,%xmm7 + psrlq $1,%xmm0 + psllq $56,%xmm7 + pxor %xmm0,%xmm6 + psrlq $7,%xmm0 + pxor %xmm7,%xmm6 + psllq $7,%xmm7 + pxor %xmm0,%xmm6 + movdqa %xmm3,%xmm0 + pxor %xmm7,%xmm6 + movdqa %xmm3,%xmm7 + psrlq $6,%xmm0 + paddq %xmm6,%xmm4 + movdqa %xmm3,%xmm6 + psrlq $19,%xmm7 + psllq $3,%xmm6 + pxor %xmm7,%xmm0 + psrlq $42,%xmm7 + pxor %xmm6,%xmm0 + psllq $42,%xmm6 + pxor %xmm7,%xmm0 + movdqa 32(%edx),%xmm7 + pxor %xmm6,%xmm0 + movdqa 64(%ebp),%xmm6 + movq %mm4,%mm1 + paddq %xmm0,%xmm4 + movq -64(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,32(%esp) + paddq %xmm4,%xmm6 + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm0 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm0,(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 56(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 24(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm0,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm0,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 8(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm0,%mm2 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + pxor %mm7,%mm6 + movq 32(%esp),%mm5 + paddq %mm6,%mm2 + movq 40(%esp),%mm6 + movq %mm4,%mm1 + movq -56(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,24(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm2 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm2,56(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 48(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 16(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm2,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm2,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq (%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm2,%mm0 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + pxor %mm7,%mm6 + movq 24(%esp),%mm5 + paddq %mm6,%mm0 + movq 32(%esp),%mm6 + movdqa %xmm6,-64(%edx) + movdqa %xmm2,%xmm0 + movdqa %xmm7,%xmm6 +.byte 102,15,58,15,253,8 + movdqa %xmm1,16(%edx) +.byte 102,15,58,15,193,8 + movdqa %xmm7,%xmm1 + psrlq $7,%xmm7 + paddq %xmm0,%xmm5 + movdqa %xmm1,%xmm0 + psrlq $1,%xmm1 + psllq $56,%xmm0 + pxor %xmm1,%xmm7 + psrlq $7,%xmm1 + pxor %xmm0,%xmm7 + psllq $7,%xmm0 + pxor %xmm1,%xmm7 + movdqa %xmm4,%xmm1 + pxor %xmm0,%xmm7 + movdqa %xmm4,%xmm0 + psrlq $6,%xmm1 + paddq %xmm7,%xmm5 + movdqa %xmm4,%xmm7 + psrlq $19,%xmm0 + psllq $3,%xmm7 + pxor %xmm0,%xmm1 + psrlq $42,%xmm0 + pxor %xmm7,%xmm1 + psllq $42,%xmm7 + pxor %xmm0,%xmm1 + movdqa 48(%edx),%xmm0 + pxor %xmm7,%xmm1 + movdqa 80(%ebp),%xmm7 + movq %mm4,%mm1 + paddq %xmm1,%xmm5 + movq -48(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,16(%esp) + paddq %xmm5,%xmm7 + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm0 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm0,48(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 40(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 8(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm0,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm0,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 56(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm0,%mm2 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + pxor %mm7,%mm6 + movq 16(%esp),%mm5 + paddq %mm6,%mm2 + movq 24(%esp),%mm6 + movq %mm4,%mm1 + movq -40(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,8(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm2 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm2,40(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 32(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq (%esp),%mm4 + paddq %mm7,%mm3 + movq %mm2,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm2,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 48(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm2,%mm0 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + pxor %mm7,%mm6 + movq 8(%esp),%mm5 + paddq %mm6,%mm0 + movq 16(%esp),%mm6 + movdqa %xmm7,-48(%edx) + movdqa %xmm3,%xmm1 + movdqa %xmm0,%xmm7 +.byte 102,15,58,15,198,8 + movdqa %xmm2,32(%edx) +.byte 102,15,58,15,202,8 + movdqa %xmm0,%xmm2 + psrlq $7,%xmm0 + paddq %xmm1,%xmm6 + movdqa %xmm2,%xmm1 + psrlq $1,%xmm2 + psllq $56,%xmm1 + pxor %xmm2,%xmm0 + psrlq $7,%xmm2 + pxor %xmm1,%xmm0 + psllq $7,%xmm1 + pxor %xmm2,%xmm0 + movdqa %xmm5,%xmm2 + pxor %xmm1,%xmm0 + movdqa %xmm5,%xmm1 + psrlq $6,%xmm2 + paddq %xmm0,%xmm6 + movdqa %xmm5,%xmm0 + psrlq $19,%xmm1 + psllq $3,%xmm0 + pxor %xmm1,%xmm2 + psrlq $42,%xmm1 + pxor %xmm0,%xmm2 + psllq $42,%xmm0 + pxor %xmm1,%xmm2 + movdqa (%edx),%xmm1 + pxor %xmm0,%xmm2 + movdqa 96(%ebp),%xmm0 + movq %mm4,%mm1 + paddq %xmm2,%xmm6 + movq -32(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,(%esp) + paddq %xmm6,%xmm0 + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm0 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm0,32(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 24(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 56(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm0,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm0,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 40(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm0,%mm2 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + pxor %mm7,%mm6 + movq (%esp),%mm5 + paddq %mm6,%mm2 + movq 8(%esp),%mm6 + movq %mm4,%mm1 + movq -24(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,56(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm2 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm2,24(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 16(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 48(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm2,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm2,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 32(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm2,%mm0 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + pxor %mm7,%mm6 + movq 56(%esp),%mm5 + paddq %mm6,%mm0 + movq (%esp),%mm6 + movdqa %xmm0,-32(%edx) + movdqa %xmm4,%xmm2 + movdqa %xmm1,%xmm0 +.byte 102,15,58,15,207,8 + movdqa %xmm3,48(%edx) +.byte 102,15,58,15,211,8 + movdqa %xmm1,%xmm3 + psrlq $7,%xmm1 + paddq %xmm2,%xmm7 + movdqa %xmm3,%xmm2 + psrlq $1,%xmm3 + psllq $56,%xmm2 + pxor %xmm3,%xmm1 + psrlq $7,%xmm3 + pxor %xmm2,%xmm1 + psllq $7,%xmm2 + pxor %xmm3,%xmm1 + movdqa %xmm6,%xmm3 + pxor %xmm2,%xmm1 + movdqa %xmm6,%xmm2 + psrlq $6,%xmm3 + paddq %xmm1,%xmm7 + movdqa %xmm6,%xmm1 + psrlq $19,%xmm2 + psllq $3,%xmm1 + pxor %xmm2,%xmm3 + psrlq $42,%xmm2 + pxor %xmm1,%xmm3 + psllq $42,%xmm1 + pxor %xmm2,%xmm3 + movdqa 16(%edx),%xmm2 + pxor %xmm1,%xmm3 + movdqa 112(%ebp),%xmm1 + movq %mm4,%mm1 + paddq %xmm3,%xmm7 + movq -16(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,48(%esp) + paddq %xmm7,%xmm1 + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm0 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm0,16(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 8(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 40(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm0,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm0,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 24(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm0,%mm2 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + pxor %mm7,%mm6 + movq 48(%esp),%mm5 + paddq %mm6,%mm2 + movq 56(%esp),%mm6 + movq %mm4,%mm1 + movq -8(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,40(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm2 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm2,8(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq (%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 32(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm2,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm2,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 16(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm2,%mm0 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + pxor %mm7,%mm6 + movq 40(%esp),%mm5 + paddq %mm6,%mm0 + movq 48(%esp),%mm6 + movdqa %xmm1,-16(%edx) + leal 128(%ebp),%ebp + decl %ecx + jnz .L00800_47_ssse3 + movdqa (%ebp),%xmm1 + leal -640(%ebp),%ebp + movdqu (%ebx),%xmm0 +.byte 102,15,56,0,193 + movdqa (%ebp),%xmm3 + movdqa %xmm1,%xmm2 + movdqu 16(%ebx),%xmm1 + paddq %xmm0,%xmm3 +.byte 102,15,56,0,202 + movq %mm4,%mm1 + movq -128(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,32(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm0 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm0,(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 56(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 24(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm0,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm0,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 8(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm0,%mm2 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + pxor %mm7,%mm6 + movq 32(%esp),%mm5 + paddq %mm6,%mm2 + movq 40(%esp),%mm6 + movq %mm4,%mm1 + movq -120(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,24(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm2 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm2,56(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 48(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 16(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm2,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm2,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq (%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm2,%mm0 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + pxor %mm7,%mm6 + movq 24(%esp),%mm5 + paddq %mm6,%mm0 + movq 32(%esp),%mm6 + movdqa %xmm3,-128(%edx) + movdqa 16(%ebp),%xmm4 + movdqa %xmm2,%xmm3 + movdqu 32(%ebx),%xmm2 + paddq %xmm1,%xmm4 +.byte 102,15,56,0,211 + movq %mm4,%mm1 + movq -112(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,16(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm0 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm0,48(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 40(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 8(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm0,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm0,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 56(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm0,%mm2 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + pxor %mm7,%mm6 + movq 16(%esp),%mm5 + paddq %mm6,%mm2 + movq 24(%esp),%mm6 + movq %mm4,%mm1 + movq -104(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,8(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm2 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm2,40(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 32(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq (%esp),%mm4 + paddq %mm7,%mm3 + movq %mm2,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm2,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 48(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm2,%mm0 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + pxor %mm7,%mm6 + movq 8(%esp),%mm5 + paddq %mm6,%mm0 + movq 16(%esp),%mm6 + movdqa %xmm4,-112(%edx) + movdqa 32(%ebp),%xmm5 + movdqa %xmm3,%xmm4 + movdqu 48(%ebx),%xmm3 + paddq %xmm2,%xmm5 +.byte 102,15,56,0,220 + movq %mm4,%mm1 + movq -96(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm0 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm0,32(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 24(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 56(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm0,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm0,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 40(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm0,%mm2 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + pxor %mm7,%mm6 + movq (%esp),%mm5 + paddq %mm6,%mm2 + movq 8(%esp),%mm6 + movq %mm4,%mm1 + movq -88(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,56(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm2 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm2,24(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 16(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 48(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm2,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm2,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 32(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm2,%mm0 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + pxor %mm7,%mm6 + movq 56(%esp),%mm5 + paddq %mm6,%mm0 + movq (%esp),%mm6 + movdqa %xmm5,-96(%edx) + movdqa 48(%ebp),%xmm6 + movdqa %xmm4,%xmm5 + movdqu 64(%ebx),%xmm4 + paddq %xmm3,%xmm6 +.byte 102,15,56,0,229 + movq %mm4,%mm1 + movq -80(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,48(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm0 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm0,16(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 8(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 40(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm0,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm0,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 24(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm0,%mm2 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + pxor %mm7,%mm6 + movq 48(%esp),%mm5 + paddq %mm6,%mm2 + movq 56(%esp),%mm6 + movq %mm4,%mm1 + movq -72(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,40(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm2 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm2,8(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq (%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 32(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm2,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm2,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 16(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm2,%mm0 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + pxor %mm7,%mm6 + movq 40(%esp),%mm5 + paddq %mm6,%mm0 + movq 48(%esp),%mm6 + movdqa %xmm6,-80(%edx) + movdqa 64(%ebp),%xmm7 + movdqa %xmm5,%xmm6 + movdqu 80(%ebx),%xmm5 + paddq %xmm4,%xmm7 +.byte 102,15,56,0,238 + movq %mm4,%mm1 + movq -64(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,32(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm0 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm0,(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 56(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 24(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm0,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm0,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 8(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm0,%mm2 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + pxor %mm7,%mm6 + movq 32(%esp),%mm5 + paddq %mm6,%mm2 + movq 40(%esp),%mm6 + movq %mm4,%mm1 + movq -56(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,24(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm2 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm2,56(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 48(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 16(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm2,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm2,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq (%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm2,%mm0 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + pxor %mm7,%mm6 + movq 24(%esp),%mm5 + paddq %mm6,%mm0 + movq 32(%esp),%mm6 + movdqa %xmm7,-64(%edx) + movdqa %xmm0,(%edx) + movdqa 80(%ebp),%xmm0 + movdqa %xmm6,%xmm7 + movdqu 96(%ebx),%xmm6 + paddq %xmm5,%xmm0 +.byte 102,15,56,0,247 + movq %mm4,%mm1 + movq -48(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,16(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm0 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm0,48(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 40(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 8(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm0,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm0,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 56(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm0,%mm2 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + pxor %mm7,%mm6 + movq 16(%esp),%mm5 + paddq %mm6,%mm2 + movq 24(%esp),%mm6 + movq %mm4,%mm1 + movq -40(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,8(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm2 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm2,40(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 32(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq (%esp),%mm4 + paddq %mm7,%mm3 + movq %mm2,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm2,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 48(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm2,%mm0 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + pxor %mm7,%mm6 + movq 8(%esp),%mm5 + paddq %mm6,%mm0 + movq 16(%esp),%mm6 + movdqa %xmm0,-48(%edx) + movdqa %xmm1,16(%edx) + movdqa 96(%ebp),%xmm1 + movdqa %xmm7,%xmm0 + movdqu 112(%ebx),%xmm7 + paddq %xmm6,%xmm1 +.byte 102,15,56,0,248 + movq %mm4,%mm1 + movq -32(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm0 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm0,32(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 24(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 56(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm0,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm0,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 40(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm0,%mm2 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + pxor %mm7,%mm6 + movq (%esp),%mm5 + paddq %mm6,%mm2 + movq 8(%esp),%mm6 + movq %mm4,%mm1 + movq -24(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,56(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm2 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm2,24(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 16(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 48(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm2,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm2,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 32(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm2,%mm0 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + pxor %mm7,%mm6 + movq 56(%esp),%mm5 + paddq %mm6,%mm0 + movq (%esp),%mm6 + movdqa %xmm1,-32(%edx) + movdqa %xmm2,32(%edx) + movdqa 112(%ebp),%xmm2 + movdqa (%edx),%xmm0 + paddq %xmm7,%xmm2 + movq %mm4,%mm1 + movq -16(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,48(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm0 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm0,16(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 8(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 40(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm0,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm0,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 24(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm0,%mm2 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + pxor %mm7,%mm6 + movq 48(%esp),%mm5 + paddq %mm6,%mm2 + movq 56(%esp),%mm6 + movq %mm4,%mm1 + movq -8(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,40(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm2 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm2,8(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq (%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 32(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm2,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm2,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 16(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm2,%mm0 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + pxor %mm7,%mm6 + movq 40(%esp),%mm5 + paddq %mm6,%mm0 + movq 48(%esp),%mm6 + movdqa %xmm2,-16(%edx) + movq 8(%esp),%mm1 + paddq %mm3,%mm0 + movq 24(%esp),%mm3 + movq 56(%esp),%mm7 + pxor %mm1,%mm2 + paddq (%esi),%mm0 + paddq 8(%esi),%mm1 + paddq 16(%esi),%mm2 + paddq 24(%esi),%mm3 + paddq 32(%esi),%mm4 + paddq 40(%esi),%mm5 + paddq 48(%esi),%mm6 + paddq 56(%esi),%mm7 + movq %mm0,(%esi) + movq %mm1,8(%esi) + movq %mm2,16(%esi) + movq %mm3,24(%esi) + movq %mm4,32(%esi) + movq %mm5,40(%esi) + movq %mm6,48(%esi) + movq %mm7,56(%esi) + cmpl %eax,%edi + jb .L007loop_ssse3 + movl 76(%edx),%esp + emms + popl %edi + popl %esi + popl %ebx + popl %ebp + ret .align 16 .L002loop_x86: movl (%edi),%eax @@ -130,7 +2393,7 @@ sha512_block_data_order: movl $16,%ecx .long 2784229001 .align 16 -.L00300_15_x86: +.L00900_15_x86: movl 40(%esp),%ecx movl 44(%esp),%edx movl %ecx,%esi @@ -237,9 +2500,9 @@ sha512_block_data_order: subl $8,%esp leal 8(%ebp),%ebp cmpb $148,%dl - jne .L00300_15_x86 + jne .L00900_15_x86 .align 16 -.L00416_79_x86: +.L01016_79_x86: movl 312(%esp),%ecx movl 316(%esp),%edx movl %ecx,%esi @@ -412,7 +2675,7 @@ sha512_block_data_order: subl $8,%esp leal 8(%ebp),%ebp cmpb $23,%dl - jne .L00416_79_x86 + jne .L01016_79_x86 movl 840(%esp),%esi movl 844(%esp),%edi movl (%esi),%eax @@ -555,9 +2818,12 @@ sha512_block_data_order: .long 4234509866,1501505948 .long 987167468,1607167915 .long 1246189591,1816402316 +.long 67438087,66051 +.long 202182159,134810123 .size sha512_block_data_order,.-.L_sha512_block_data_order_begin .byte 83,72,65,53,49,50,32,98,108,111,99,107,32,116,114,97 .byte 110,115,102,111,114,109,32,102,111,114,32,120,56,54,44,32 .byte 67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97 .byte 112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103 .byte 62,0 +.comm OPENSSL_ia32cap_P,16,4 diff --git a/deps/openssl/asm/x86-elf-gas/whrlpool/wp-mmx.s b/deps/openssl/asm/x86-elf-gas/whrlpool/wp-mmx.s index c03f3f6cb7d068..37f50898ae6b53 100644 --- a/deps/openssl/asm/x86-elf-gas/whrlpool/wp-mmx.s +++ b/deps/openssl/asm/x86-elf-gas/whrlpool/wp-mmx.s @@ -66,228 +66,230 @@ whirlpool_block_mmx: movq 4096(%ebp,%esi,8),%mm0 movl (%esp),%eax movl 4(%esp),%ebx - movb %al,%cl - movb %ah,%dl + movzbl %al,%ecx + movzbl %ah,%edx + shrl $16,%eax leal (%ecx,%ecx,1),%esi + movzbl %al,%ecx leal (%edx,%edx,1),%edi - shrl $16,%eax + movzbl %ah,%edx pxor (%ebp,%esi,8),%mm0 movq 7(%ebp,%edi,8),%mm1 - movb %al,%cl - movb %ah,%dl movl 8(%esp),%eax leal (%ecx,%ecx,1),%esi + movzbl %bl,%ecx leal (%edx,%edx,1),%edi + movzbl %bh,%edx movq 6(%ebp,%esi,8),%mm2 movq 5(%ebp,%edi,8),%mm3 - movb %bl,%cl - movb %bh,%dl + shrl $16,%ebx leal (%ecx,%ecx,1),%esi + movzbl %bl,%ecx leal (%edx,%edx,1),%edi - shrl $16,%ebx + movzbl %bh,%edx movq 4(%ebp,%esi,8),%mm4 movq 3(%ebp,%edi,8),%mm5 - movb %bl,%cl - movb %bh,%dl movl 12(%esp),%ebx leal (%ecx,%ecx,1),%esi + movzbl %al,%ecx leal (%edx,%edx,1),%edi + movzbl %ah,%edx movq 2(%ebp,%esi,8),%mm6 movq 1(%ebp,%edi,8),%mm7 - movb %al,%cl - movb %ah,%dl + shrl $16,%eax leal (%ecx,%ecx,1),%esi + movzbl %al,%ecx leal (%edx,%edx,1),%edi - shrl $16,%eax + movzbl %ah,%edx pxor (%ebp,%esi,8),%mm1 pxor 7(%ebp,%edi,8),%mm2 - movb %al,%cl - movb %ah,%dl movl 16(%esp),%eax leal (%ecx,%ecx,1),%esi + movzbl %bl,%ecx leal (%edx,%edx,1),%edi + movzbl %bh,%edx pxor 6(%ebp,%esi,8),%mm3 pxor 5(%ebp,%edi,8),%mm4 - movb %bl,%cl - movb %bh,%dl + shrl $16,%ebx leal (%ecx,%ecx,1),%esi + movzbl %bl,%ecx leal (%edx,%edx,1),%edi - shrl $16,%ebx + movzbl %bh,%edx pxor 4(%ebp,%esi,8),%mm5 pxor 3(%ebp,%edi,8),%mm6 - movb %bl,%cl - movb %bh,%dl movl 20(%esp),%ebx leal (%ecx,%ecx,1),%esi + movzbl %al,%ecx leal (%edx,%edx,1),%edi + movzbl %ah,%edx pxor 2(%ebp,%esi,8),%mm7 pxor 1(%ebp,%edi,8),%mm0 - movb %al,%cl - movb %ah,%dl + shrl $16,%eax leal (%ecx,%ecx,1),%esi + movzbl %al,%ecx leal (%edx,%edx,1),%edi - shrl $16,%eax + movzbl %ah,%edx pxor (%ebp,%esi,8),%mm2 pxor 7(%ebp,%edi,8),%mm3 - movb %al,%cl - movb %ah,%dl movl 24(%esp),%eax leal (%ecx,%ecx,1),%esi + movzbl %bl,%ecx leal (%edx,%edx,1),%edi + movzbl %bh,%edx pxor 6(%ebp,%esi,8),%mm4 pxor 5(%ebp,%edi,8),%mm5 - movb %bl,%cl - movb %bh,%dl + shrl $16,%ebx leal (%ecx,%ecx,1),%esi + movzbl %bl,%ecx leal (%edx,%edx,1),%edi - shrl $16,%ebx + movzbl %bh,%edx pxor 4(%ebp,%esi,8),%mm6 pxor 3(%ebp,%edi,8),%mm7 - movb %bl,%cl - movb %bh,%dl movl 28(%esp),%ebx leal (%ecx,%ecx,1),%esi + movzbl %al,%ecx leal (%edx,%edx,1),%edi + movzbl %ah,%edx pxor 2(%ebp,%esi,8),%mm0 pxor 1(%ebp,%edi,8),%mm1 - movb %al,%cl - movb %ah,%dl + shrl $16,%eax leal (%ecx,%ecx,1),%esi + movzbl %al,%ecx leal (%edx,%edx,1),%edi - shrl $16,%eax + movzbl %ah,%edx pxor (%ebp,%esi,8),%mm3 pxor 7(%ebp,%edi,8),%mm4 - movb %al,%cl - movb %ah,%dl movl 32(%esp),%eax leal (%ecx,%ecx,1),%esi + movzbl %bl,%ecx leal (%edx,%edx,1),%edi + movzbl %bh,%edx pxor 6(%ebp,%esi,8),%mm5 pxor 5(%ebp,%edi,8),%mm6 - movb %bl,%cl - movb %bh,%dl + shrl $16,%ebx leal (%ecx,%ecx,1),%esi + movzbl %bl,%ecx leal (%edx,%edx,1),%edi - shrl $16,%ebx + movzbl %bh,%edx pxor 4(%ebp,%esi,8),%mm7 pxor 3(%ebp,%edi,8),%mm0 - movb %bl,%cl - movb %bh,%dl movl 36(%esp),%ebx leal (%ecx,%ecx,1),%esi + movzbl %al,%ecx leal (%edx,%edx,1),%edi + movzbl %ah,%edx pxor 2(%ebp,%esi,8),%mm1 pxor 1(%ebp,%edi,8),%mm2 - movb %al,%cl - movb %ah,%dl + shrl $16,%eax leal (%ecx,%ecx,1),%esi + movzbl %al,%ecx leal (%edx,%edx,1),%edi - shrl $16,%eax + movzbl %ah,%edx pxor (%ebp,%esi,8),%mm4 pxor 7(%ebp,%edi,8),%mm5 - movb %al,%cl - movb %ah,%dl movl 40(%esp),%eax leal (%ecx,%ecx,1),%esi + movzbl %bl,%ecx leal (%edx,%edx,1),%edi + movzbl %bh,%edx pxor 6(%ebp,%esi,8),%mm6 pxor 5(%ebp,%edi,8),%mm7 - movb %bl,%cl - movb %bh,%dl + shrl $16,%ebx leal (%ecx,%ecx,1),%esi + movzbl %bl,%ecx leal (%edx,%edx,1),%edi - shrl $16,%ebx + movzbl %bh,%edx pxor 4(%ebp,%esi,8),%mm0 pxor 3(%ebp,%edi,8),%mm1 - movb %bl,%cl - movb %bh,%dl movl 44(%esp),%ebx leal (%ecx,%ecx,1),%esi + movzbl %al,%ecx leal (%edx,%edx,1),%edi + movzbl %ah,%edx pxor 2(%ebp,%esi,8),%mm2 pxor 1(%ebp,%edi,8),%mm3 - movb %al,%cl - movb %ah,%dl + shrl $16,%eax leal (%ecx,%ecx,1),%esi + movzbl %al,%ecx leal (%edx,%edx,1),%edi - shrl $16,%eax + movzbl %ah,%edx pxor (%ebp,%esi,8),%mm5 pxor 7(%ebp,%edi,8),%mm6 - movb %al,%cl - movb %ah,%dl movl 48(%esp),%eax leal (%ecx,%ecx,1),%esi + movzbl %bl,%ecx leal (%edx,%edx,1),%edi + movzbl %bh,%edx pxor 6(%ebp,%esi,8),%mm7 pxor 5(%ebp,%edi,8),%mm0 - movb %bl,%cl - movb %bh,%dl + shrl $16,%ebx leal (%ecx,%ecx,1),%esi + movzbl %bl,%ecx leal (%edx,%edx,1),%edi - shrl $16,%ebx + movzbl %bh,%edx pxor 4(%ebp,%esi,8),%mm1 pxor 3(%ebp,%edi,8),%mm2 - movb %bl,%cl - movb %bh,%dl movl 52(%esp),%ebx leal (%ecx,%ecx,1),%esi + movzbl %al,%ecx leal (%edx,%edx,1),%edi + movzbl %ah,%edx pxor 2(%ebp,%esi,8),%mm3 pxor 1(%ebp,%edi,8),%mm4 - movb %al,%cl - movb %ah,%dl + shrl $16,%eax leal (%ecx,%ecx,1),%esi + movzbl %al,%ecx leal (%edx,%edx,1),%edi - shrl $16,%eax + movzbl %ah,%edx pxor (%ebp,%esi,8),%mm6 pxor 7(%ebp,%edi,8),%mm7 - movb %al,%cl - movb %ah,%dl movl 56(%esp),%eax leal (%ecx,%ecx,1),%esi + movzbl %bl,%ecx leal (%edx,%edx,1),%edi + movzbl %bh,%edx pxor 6(%ebp,%esi,8),%mm0 pxor 5(%ebp,%edi,8),%mm1 - movb %bl,%cl - movb %bh,%dl + shrl $16,%ebx leal (%ecx,%ecx,1),%esi + movzbl %bl,%ecx leal (%edx,%edx,1),%edi - shrl $16,%ebx + movzbl %bh,%edx pxor 4(%ebp,%esi,8),%mm2 pxor 3(%ebp,%edi,8),%mm3 - movb %bl,%cl - movb %bh,%dl movl 60(%esp),%ebx leal (%ecx,%ecx,1),%esi + movzbl %al,%ecx leal (%edx,%edx,1),%edi + movzbl %ah,%edx pxor 2(%ebp,%esi,8),%mm4 pxor 1(%ebp,%edi,8),%mm5 - movb %al,%cl - movb %ah,%dl + shrl $16,%eax leal (%ecx,%ecx,1),%esi + movzbl %al,%ecx leal (%edx,%edx,1),%edi - shrl $16,%eax + movzbl %ah,%edx pxor (%ebp,%esi,8),%mm7 pxor 7(%ebp,%edi,8),%mm0 - movb %al,%cl - movb %ah,%dl movl 64(%esp),%eax leal (%ecx,%ecx,1),%esi + movzbl %bl,%ecx leal (%edx,%edx,1),%edi + movzbl %bh,%edx pxor 6(%ebp,%esi,8),%mm1 pxor 5(%ebp,%edi,8),%mm2 - movb %bl,%cl - movb %bh,%dl + shrl $16,%ebx leal (%ecx,%ecx,1),%esi + movzbl %bl,%ecx leal (%edx,%edx,1),%edi - shrl $16,%ebx + movzbl %bh,%edx pxor 4(%ebp,%esi,8),%mm3 pxor 3(%ebp,%edi,8),%mm4 - movb %bl,%cl - movb %bh,%dl movl 68(%esp),%ebx leal (%ecx,%ecx,1),%esi + movzbl %al,%ecx leal (%edx,%edx,1),%edi + movzbl %ah,%edx pxor 2(%ebp,%esi,8),%mm5 pxor 1(%ebp,%edi,8),%mm6 movq %mm0,(%esp) @@ -298,226 +300,226 @@ whirlpool_block_mmx: movq %mm5,40(%esp) movq %mm6,48(%esp) movq %mm7,56(%esp) - movb %al,%cl - movb %ah,%dl + shrl $16,%eax leal (%ecx,%ecx,1),%esi + movzbl %al,%ecx leal (%edx,%edx,1),%edi - shrl $16,%eax + movzbl %ah,%edx pxor (%ebp,%esi,8),%mm0 pxor 7(%ebp,%edi,8),%mm1 - movb %al,%cl - movb %ah,%dl movl 72(%esp),%eax leal (%ecx,%ecx,1),%esi + movzbl %bl,%ecx leal (%edx,%edx,1),%edi + movzbl %bh,%edx pxor 6(%ebp,%esi,8),%mm2 pxor 5(%ebp,%edi,8),%mm3 - movb %bl,%cl - movb %bh,%dl + shrl $16,%ebx leal (%ecx,%ecx,1),%esi + movzbl %bl,%ecx leal (%edx,%edx,1),%edi - shrl $16,%ebx + movzbl %bh,%edx pxor 4(%ebp,%esi,8),%mm4 pxor 3(%ebp,%edi,8),%mm5 - movb %bl,%cl - movb %bh,%dl movl 76(%esp),%ebx leal (%ecx,%ecx,1),%esi + movzbl %al,%ecx leal (%edx,%edx,1),%edi + movzbl %ah,%edx pxor 2(%ebp,%esi,8),%mm6 pxor 1(%ebp,%edi,8),%mm7 - movb %al,%cl - movb %ah,%dl + shrl $16,%eax leal (%ecx,%ecx,1),%esi + movzbl %al,%ecx leal (%edx,%edx,1),%edi - shrl $16,%eax + movzbl %ah,%edx pxor (%ebp,%esi,8),%mm1 pxor 7(%ebp,%edi,8),%mm2 - movb %al,%cl - movb %ah,%dl movl 80(%esp),%eax leal (%ecx,%ecx,1),%esi + movzbl %bl,%ecx leal (%edx,%edx,1),%edi + movzbl %bh,%edx pxor 6(%ebp,%esi,8),%mm3 pxor 5(%ebp,%edi,8),%mm4 - movb %bl,%cl - movb %bh,%dl + shrl $16,%ebx leal (%ecx,%ecx,1),%esi + movzbl %bl,%ecx leal (%edx,%edx,1),%edi - shrl $16,%ebx + movzbl %bh,%edx pxor 4(%ebp,%esi,8),%mm5 pxor 3(%ebp,%edi,8),%mm6 - movb %bl,%cl - movb %bh,%dl movl 84(%esp),%ebx leal (%ecx,%ecx,1),%esi + movzbl %al,%ecx leal (%edx,%edx,1),%edi + movzbl %ah,%edx pxor 2(%ebp,%esi,8),%mm7 pxor 1(%ebp,%edi,8),%mm0 - movb %al,%cl - movb %ah,%dl + shrl $16,%eax leal (%ecx,%ecx,1),%esi + movzbl %al,%ecx leal (%edx,%edx,1),%edi - shrl $16,%eax + movzbl %ah,%edx pxor (%ebp,%esi,8),%mm2 pxor 7(%ebp,%edi,8),%mm3 - movb %al,%cl - movb %ah,%dl movl 88(%esp),%eax leal (%ecx,%ecx,1),%esi + movzbl %bl,%ecx leal (%edx,%edx,1),%edi + movzbl %bh,%edx pxor 6(%ebp,%esi,8),%mm4 pxor 5(%ebp,%edi,8),%mm5 - movb %bl,%cl - movb %bh,%dl + shrl $16,%ebx leal (%ecx,%ecx,1),%esi + movzbl %bl,%ecx leal (%edx,%edx,1),%edi - shrl $16,%ebx + movzbl %bh,%edx pxor 4(%ebp,%esi,8),%mm6 pxor 3(%ebp,%edi,8),%mm7 - movb %bl,%cl - movb %bh,%dl movl 92(%esp),%ebx leal (%ecx,%ecx,1),%esi + movzbl %al,%ecx leal (%edx,%edx,1),%edi + movzbl %ah,%edx pxor 2(%ebp,%esi,8),%mm0 pxor 1(%ebp,%edi,8),%mm1 - movb %al,%cl - movb %ah,%dl + shrl $16,%eax leal (%ecx,%ecx,1),%esi + movzbl %al,%ecx leal (%edx,%edx,1),%edi - shrl $16,%eax + movzbl %ah,%edx pxor (%ebp,%esi,8),%mm3 pxor 7(%ebp,%edi,8),%mm4 - movb %al,%cl - movb %ah,%dl movl 96(%esp),%eax leal (%ecx,%ecx,1),%esi + movzbl %bl,%ecx leal (%edx,%edx,1),%edi + movzbl %bh,%edx pxor 6(%ebp,%esi,8),%mm5 pxor 5(%ebp,%edi,8),%mm6 - movb %bl,%cl - movb %bh,%dl + shrl $16,%ebx leal (%ecx,%ecx,1),%esi + movzbl %bl,%ecx leal (%edx,%edx,1),%edi - shrl $16,%ebx + movzbl %bh,%edx pxor 4(%ebp,%esi,8),%mm7 pxor 3(%ebp,%edi,8),%mm0 - movb %bl,%cl - movb %bh,%dl movl 100(%esp),%ebx leal (%ecx,%ecx,1),%esi + movzbl %al,%ecx leal (%edx,%edx,1),%edi + movzbl %ah,%edx pxor 2(%ebp,%esi,8),%mm1 pxor 1(%ebp,%edi,8),%mm2 - movb %al,%cl - movb %ah,%dl + shrl $16,%eax leal (%ecx,%ecx,1),%esi + movzbl %al,%ecx leal (%edx,%edx,1),%edi - shrl $16,%eax + movzbl %ah,%edx pxor (%ebp,%esi,8),%mm4 pxor 7(%ebp,%edi,8),%mm5 - movb %al,%cl - movb %ah,%dl movl 104(%esp),%eax leal (%ecx,%ecx,1),%esi + movzbl %bl,%ecx leal (%edx,%edx,1),%edi + movzbl %bh,%edx pxor 6(%ebp,%esi,8),%mm6 pxor 5(%ebp,%edi,8),%mm7 - movb %bl,%cl - movb %bh,%dl + shrl $16,%ebx leal (%ecx,%ecx,1),%esi + movzbl %bl,%ecx leal (%edx,%edx,1),%edi - shrl $16,%ebx + movzbl %bh,%edx pxor 4(%ebp,%esi,8),%mm0 pxor 3(%ebp,%edi,8),%mm1 - movb %bl,%cl - movb %bh,%dl movl 108(%esp),%ebx leal (%ecx,%ecx,1),%esi + movzbl %al,%ecx leal (%edx,%edx,1),%edi + movzbl %ah,%edx pxor 2(%ebp,%esi,8),%mm2 pxor 1(%ebp,%edi,8),%mm3 - movb %al,%cl - movb %ah,%dl + shrl $16,%eax leal (%ecx,%ecx,1),%esi + movzbl %al,%ecx leal (%edx,%edx,1),%edi - shrl $16,%eax + movzbl %ah,%edx pxor (%ebp,%esi,8),%mm5 pxor 7(%ebp,%edi,8),%mm6 - movb %al,%cl - movb %ah,%dl movl 112(%esp),%eax leal (%ecx,%ecx,1),%esi + movzbl %bl,%ecx leal (%edx,%edx,1),%edi + movzbl %bh,%edx pxor 6(%ebp,%esi,8),%mm7 pxor 5(%ebp,%edi,8),%mm0 - movb %bl,%cl - movb %bh,%dl + shrl $16,%ebx leal (%ecx,%ecx,1),%esi + movzbl %bl,%ecx leal (%edx,%edx,1),%edi - shrl $16,%ebx + movzbl %bh,%edx pxor 4(%ebp,%esi,8),%mm1 pxor 3(%ebp,%edi,8),%mm2 - movb %bl,%cl - movb %bh,%dl movl 116(%esp),%ebx leal (%ecx,%ecx,1),%esi + movzbl %al,%ecx leal (%edx,%edx,1),%edi + movzbl %ah,%edx pxor 2(%ebp,%esi,8),%mm3 pxor 1(%ebp,%edi,8),%mm4 - movb %al,%cl - movb %ah,%dl + shrl $16,%eax leal (%ecx,%ecx,1),%esi + movzbl %al,%ecx leal (%edx,%edx,1),%edi - shrl $16,%eax + movzbl %ah,%edx pxor (%ebp,%esi,8),%mm6 pxor 7(%ebp,%edi,8),%mm7 - movb %al,%cl - movb %ah,%dl movl 120(%esp),%eax leal (%ecx,%ecx,1),%esi + movzbl %bl,%ecx leal (%edx,%edx,1),%edi + movzbl %bh,%edx pxor 6(%ebp,%esi,8),%mm0 pxor 5(%ebp,%edi,8),%mm1 - movb %bl,%cl - movb %bh,%dl + shrl $16,%ebx leal (%ecx,%ecx,1),%esi + movzbl %bl,%ecx leal (%edx,%edx,1),%edi - shrl $16,%ebx + movzbl %bh,%edx pxor 4(%ebp,%esi,8),%mm2 pxor 3(%ebp,%edi,8),%mm3 - movb %bl,%cl - movb %bh,%dl movl 124(%esp),%ebx leal (%ecx,%ecx,1),%esi + movzbl %al,%ecx leal (%edx,%edx,1),%edi + movzbl %ah,%edx pxor 2(%ebp,%esi,8),%mm4 pxor 1(%ebp,%edi,8),%mm5 - movb %al,%cl - movb %ah,%dl + shrl $16,%eax leal (%ecx,%ecx,1),%esi + movzbl %al,%ecx leal (%edx,%edx,1),%edi - shrl $16,%eax + movzbl %ah,%edx pxor (%ebp,%esi,8),%mm7 pxor 7(%ebp,%edi,8),%mm0 - movb %al,%cl - movb %ah,%dl leal (%ecx,%ecx,1),%esi + movzbl %bl,%ecx leal (%edx,%edx,1),%edi + movzbl %bh,%edx pxor 6(%ebp,%esi,8),%mm1 pxor 5(%ebp,%edi,8),%mm2 - movb %bl,%cl - movb %bh,%dl + shrl $16,%ebx leal (%ecx,%ecx,1),%esi + movzbl %bl,%ecx leal (%edx,%edx,1),%edi - shrl $16,%ebx + movzbl %bh,%edx pxor 4(%ebp,%esi,8),%mm3 pxor 3(%ebp,%edi,8),%mm4 - movb %bl,%cl - movb %bh,%dl leal (%ecx,%ecx,1),%esi + movzbl %al,%ecx leal (%edx,%edx,1),%edi + movzbl %ah,%edx pxor 2(%ebp,%esi,8),%mm5 pxor 1(%ebp,%edi,8),%mm6 leal 128(%esp),%ebx diff --git a/deps/openssl/asm/x86-elf-gas/x86cpuid.s b/deps/openssl/asm/x86-elf-gas/x86cpuid.s index 145355e2c1c458..80a4d1a5183adf 100644 --- a/deps/openssl/asm/x86-elf-gas/x86cpuid.s +++ b/deps/openssl/asm/x86-elf-gas/x86cpuid.s @@ -22,6 +22,8 @@ OPENSSL_ia32_cpuid: xorl %eax,%eax btl $21,%ecx jnc .L000nocpuid + movl 20(%esp),%esi + movl %eax,8(%esi) .byte 0x0f,0xa2 movl %eax,%edi xorl %eax,%eax @@ -72,28 +74,36 @@ OPENSSL_ia32_cpuid: andl $4026531839,%edx jmp .L002generic .L001intel: + cmpl $7,%edi + jb .L003cacheinfo + movl 20(%esp),%esi + movl $7,%eax + xorl %ecx,%ecx + .byte 0x0f,0xa2 + movl %ebx,8(%esi) +.L003cacheinfo: cmpl $4,%edi movl $-1,%edi - jb .L003nocacheinfo + jb .L004nocacheinfo movl $4,%eax movl $0,%ecx .byte 0x0f,0xa2 movl %eax,%edi shrl $14,%edi andl $4095,%edi -.L003nocacheinfo: +.L004nocacheinfo: movl $1,%eax xorl %ecx,%ecx .byte 0x0f,0xa2 andl $3220176895,%edx cmpl $0,%ebp - jne .L004notintel + jne .L005notintel orl $1073741824,%edx andb $15,%ah cmpb $15,%ah - jne .L004notintel + jne .L005notintel orl $1048576,%edx -.L004notintel: +.L005notintel: btl $28,%edx jnc .L002generic andl $4026531839,%edx @@ -110,20 +120,22 @@ OPENSSL_ia32_cpuid: movl %edx,%esi orl %ecx,%ebp btl $27,%ecx - jnc .L005clear_avx + jnc .L006clear_avx xorl %ecx,%ecx .byte 15,1,208 andl $6,%eax cmpl $6,%eax - je .L006done + je .L007done cmpl $2,%eax - je .L005clear_avx -.L007clear_xmm: + je .L006clear_avx +.L008clear_xmm: andl $4261412861,%ebp andl $4278190079,%esi -.L005clear_avx: +.L006clear_avx: andl $4026525695,%ebp -.L006done: + movl 20(%esp),%edi + andl $4294967263,8(%edi) +.L007done: movl %esi,%eax movl %ebp,%edx .L000nocpuid: @@ -142,9 +154,9 @@ OPENSSL_rdtsc: xorl %edx,%edx leal OPENSSL_ia32cap_P,%ecx btl $4,(%ecx) - jnc .L008notsc + jnc .L009notsc .byte 0x0f,0x31 -.L008notsc: +.L009notsc: ret .size OPENSSL_rdtsc,.-.L_OPENSSL_rdtsc_begin .globl OPENSSL_instrument_halt @@ -154,14 +166,14 @@ OPENSSL_instrument_halt: .L_OPENSSL_instrument_halt_begin: leal OPENSSL_ia32cap_P,%ecx btl $4,(%ecx) - jnc .L009nohalt + jnc .L010nohalt .long 2421723150 andl $3,%eax - jnz .L009nohalt + jnz .L010nohalt pushfl popl %eax btl $9,%eax - jnc .L009nohalt + jnc .L010nohalt .byte 0x0f,0x31 pushl %edx pushl %eax @@ -171,7 +183,7 @@ OPENSSL_instrument_halt: sbbl 4(%esp),%edx addl $8,%esp ret -.L009nohalt: +.L010nohalt: xorl %eax,%eax xorl %edx,%edx ret @@ -184,21 +196,21 @@ OPENSSL_far_spin: pushfl popl %eax btl $9,%eax - jnc .L010nospin + jnc .L011nospin movl 4(%esp),%eax movl 8(%esp),%ecx .long 2430111262 xorl %eax,%eax movl (%ecx),%edx - jmp .L011spin + jmp .L012spin .align 16 -.L011spin: +.L012spin: incl %eax cmpl (%ecx),%edx - je .L011spin + je .L012spin .long 529567888 ret -.L010nospin: +.L011nospin: xorl %eax,%eax xorl %edx,%edx ret @@ -213,9 +225,21 @@ OPENSSL_wipe_cpu: leal OPENSSL_ia32cap_P,%ecx movl (%ecx),%ecx btl $1,(%ecx) - jnc .L012no_x87 + jnc .L013no_x87 + andl $83886080,%ecx + cmpl $83886080,%ecx + jne .L014no_sse2 + pxor %xmm0,%xmm0 + pxor %xmm1,%xmm1 + pxor %xmm2,%xmm2 + pxor %xmm3,%xmm3 + pxor %xmm4,%xmm4 + pxor %xmm5,%xmm5 + pxor %xmm6,%xmm6 + pxor %xmm7,%xmm7 +.L014no_sse2: .long 4007259865,4007259865,4007259865,4007259865,2430851995 -.L012no_x87: +.L013no_x87: leal 4(%esp),%eax ret .size OPENSSL_wipe_cpu,.-.L_OPENSSL_wipe_cpu_begin @@ -229,11 +253,11 @@ OPENSSL_atomic_add: pushl %ebx nop movl (%edx),%eax -.L013spin: +.L015spin: leal (%eax,%ecx,1),%ebx nop .long 447811568 - jne .L013spin + jne .L015spin movl %ebx,%eax popl %ebx ret @@ -274,32 +298,32 @@ OPENSSL_cleanse: movl 8(%esp),%ecx xorl %eax,%eax cmpl $7,%ecx - jae .L014lot + jae .L016lot cmpl $0,%ecx - je .L015ret -.L016little: + je .L017ret +.L018little: movb %al,(%edx) subl $1,%ecx leal 1(%edx),%edx - jnz .L016little -.L015ret: + jnz .L018little +.L017ret: ret .align 16 -.L014lot: +.L016lot: testl $3,%edx - jz .L017aligned + jz .L019aligned movb %al,(%edx) leal -1(%ecx),%ecx leal 1(%edx),%edx - jmp .L014lot -.L017aligned: + jmp .L016lot +.L019aligned: movl %eax,(%edx) leal -4(%ecx),%ecx testl $-4,%ecx leal 4(%edx),%edx - jnz .L017aligned + jnz .L019aligned cmpl $0,%ecx - jne .L016little + jne .L018little ret .size OPENSSL_cleanse,.-.L_OPENSSL_cleanse_begin .globl OPENSSL_ia32_rdrand @@ -308,15 +332,32 @@ OPENSSL_cleanse: OPENSSL_ia32_rdrand: .L_OPENSSL_ia32_rdrand_begin: movl $8,%ecx -.L018loop: +.L020loop: .byte 15,199,240 - jc .L019break - loop .L018loop -.L019break: + jc .L021break + loop .L020loop +.L021break: cmpl $0,%eax cmovel %ecx,%eax ret .size OPENSSL_ia32_rdrand,.-.L_OPENSSL_ia32_rdrand_begin -.comm OPENSSL_ia32cap_P,8,4 +.globl OPENSSL_ia32_rdseed +.type OPENSSL_ia32_rdseed,@function +.align 16 +OPENSSL_ia32_rdseed: +.L_OPENSSL_ia32_rdseed_begin: + movl $8,%ecx +.L022loop: +.byte 15,199,248 + jc .L023break + loop .L022loop +.L023break: + cmpl $0,%eax + cmovel %ecx,%eax + ret +.size OPENSSL_ia32_rdseed,.-.L_OPENSSL_ia32_rdseed_begin +.hidden OPENSSL_cpuid_setup +.hidden OPENSSL_ia32cap_P +.comm OPENSSL_ia32cap_P,16,4 .section .init call OPENSSL_cpuid_setup diff --git a/deps/openssl/asm/x86-macosx-gas/aes/aes-586.s b/deps/openssl/asm/x86-macosx-gas/aes/aes-586.s index 15d5d7b36d557e..12e9100222d438 100644 --- a/deps/openssl/asm/x86-macosx-gas/aes/aes-586.s +++ b/deps/openssl/asm/x86-macosx-gas/aes/aes-586.s @@ -95,74 +95,78 @@ L000loop: shll $24,%ecx xorl %ecx,%edx movl %esi,%ecx - movl %ecx,%esi - andl $2155905152,%esi - movl %esi,%ebp - shrl $7,%ebp + movl $2155905152,%ebp + andl %ecx,%ebp leal (%ecx,%ecx,1),%edi - subl %ebp,%esi + movl %ebp,%esi + shrl $7,%ebp andl $4278124286,%edi - andl $454761243,%esi + subl %ebp,%esi movl %ecx,%ebp + andl $454761243,%esi + rorl $16,%ebp xorl %edi,%esi + movl %ecx,%edi xorl %esi,%ecx + rorl $24,%edi + xorl %ebp,%esi roll $24,%ecx + xorl %edi,%esi + movl $2155905152,%ebp xorl %esi,%ecx - rorl $16,%ebp - xorl %ebp,%ecx - rorl $8,%ebp - xorl %ebp,%ecx - movl %edx,%esi - andl $2155905152,%esi - movl %esi,%ebp - shrl $7,%ebp + andl %edx,%ebp leal (%edx,%edx,1),%edi - subl %ebp,%esi + movl %ebp,%esi + shrl $7,%ebp andl $4278124286,%edi - andl $454761243,%esi + subl %ebp,%esi movl %edx,%ebp + andl $454761243,%esi + rorl $16,%ebp xorl %edi,%esi + movl %edx,%edi xorl %esi,%edx + rorl $24,%edi + xorl %ebp,%esi roll $24,%edx + xorl %edi,%esi + movl $2155905152,%ebp xorl %esi,%edx - rorl $16,%ebp - xorl %ebp,%edx - rorl $8,%ebp - xorl %ebp,%edx - movl %eax,%esi - andl $2155905152,%esi - movl %esi,%ebp - shrl $7,%ebp + andl %eax,%ebp leal (%eax,%eax,1),%edi - subl %ebp,%esi + movl %ebp,%esi + shrl $7,%ebp andl $4278124286,%edi - andl $454761243,%esi + subl %ebp,%esi movl %eax,%ebp + andl $454761243,%esi + rorl $16,%ebp xorl %edi,%esi + movl %eax,%edi xorl %esi,%eax + rorl $24,%edi + xorl %ebp,%esi roll $24,%eax + xorl %edi,%esi + movl $2155905152,%ebp xorl %esi,%eax - rorl $16,%ebp - xorl %ebp,%eax - rorl $8,%ebp - xorl %ebp,%eax - movl %ebx,%esi - andl $2155905152,%esi - movl %esi,%ebp - shrl $7,%ebp + andl %ebx,%ebp leal (%ebx,%ebx,1),%edi - subl %ebp,%esi + movl %ebp,%esi + shrl $7,%ebp andl $4278124286,%edi - andl $454761243,%esi + subl %ebp,%esi movl %ebx,%ebp + andl $454761243,%esi + rorl $16,%ebp xorl %edi,%esi + movl %ebx,%edi xorl %esi,%ebx + rorl $24,%edi + xorl %ebp,%esi roll $24,%ebx + xorl %edi,%esi xorl %esi,%ebx - rorl $16,%ebp - xorl %ebp,%ebx - rorl $8,%ebp - xorl %ebp,%ebx movl 20(%esp),%edi movl 28(%esp),%ebp addl $16,%edi @@ -278,74 +282,76 @@ L001loop: pshufw $13,%mm4,%mm5 movd %mm1,%eax movd %mm5,%ebx + movl %edi,20(%esp) movzbl %al,%esi - movzbl -128(%ebp,%esi,1),%ecx - pshufw $13,%mm0,%mm2 movzbl %ah,%edx + pshufw $13,%mm0,%mm2 + movzbl -128(%ebp,%esi,1),%ecx + movzbl %bl,%edi movzbl -128(%ebp,%edx,1),%edx - shll $8,%edx shrl $16,%eax - movzbl %bl,%esi - movzbl -128(%ebp,%esi,1),%esi + shll $8,%edx + movzbl -128(%ebp,%edi,1),%esi + movzbl %bh,%edi shll $16,%esi - orl %esi,%ecx pshufw $8,%mm4,%mm6 - movzbl %bh,%esi - movzbl -128(%ebp,%esi,1),%esi + orl %esi,%ecx + movzbl -128(%ebp,%edi,1),%esi + movzbl %ah,%edi shll $24,%esi - orl %esi,%edx shrl $16,%ebx - movzbl %ah,%esi - movzbl -128(%ebp,%esi,1),%esi + orl %esi,%edx + movzbl -128(%ebp,%edi,1),%esi + movzbl %bh,%edi shll $8,%esi orl %esi,%ecx - movzbl %bh,%esi - movzbl -128(%ebp,%esi,1),%esi + movzbl -128(%ebp,%edi,1),%esi + movzbl %al,%edi shll $24,%esi orl %esi,%ecx - movd %ecx,%mm0 - movzbl %al,%esi - movzbl -128(%ebp,%esi,1),%ecx + movzbl -128(%ebp,%edi,1),%esi + movzbl %bl,%edi movd %mm2,%eax - movzbl %bl,%esi - movzbl -128(%ebp,%esi,1),%esi - shll $16,%esi - orl %esi,%ecx + movd %ecx,%mm0 + movzbl -128(%ebp,%edi,1),%ecx + movzbl %ah,%edi + shll $16,%ecx movd %mm6,%ebx - movzbl %ah,%esi - movzbl -128(%ebp,%esi,1),%esi + orl %esi,%ecx + movzbl -128(%ebp,%edi,1),%esi + movzbl %bh,%edi shll $24,%esi orl %esi,%ecx - movzbl %bh,%esi - movzbl -128(%ebp,%esi,1),%esi + movzbl -128(%ebp,%edi,1),%esi + movzbl %bl,%edi shll $8,%esi - orl %esi,%ecx - movd %ecx,%mm1 - movzbl %bl,%esi - movzbl -128(%ebp,%esi,1),%ecx shrl $16,%ebx - movzbl %al,%esi - movzbl -128(%ebp,%esi,1),%esi - shll $16,%esi orl %esi,%ecx + movzbl -128(%ebp,%edi,1),%esi + movzbl %al,%edi shrl $16,%eax + movd %ecx,%mm1 + movzbl -128(%ebp,%edi,1),%ecx + movzbl %ah,%edi + shll $16,%ecx + andl $255,%eax + orl %esi,%ecx punpckldq %mm1,%mm0 - movzbl %ah,%esi - movzbl -128(%ebp,%esi,1),%esi + movzbl -128(%ebp,%edi,1),%esi + movzbl %bh,%edi shll $24,%esi - orl %esi,%ecx - andl $255,%eax + andl $255,%ebx movzbl -128(%ebp,%eax,1),%eax + orl %esi,%ecx shll $16,%eax + movzbl -128(%ebp,%edi,1),%esi orl %eax,%edx - movzbl %bh,%esi - movzbl -128(%ebp,%esi,1),%esi shll $8,%esi - orl %esi,%ecx - movd %ecx,%mm4 - andl $255,%ebx movzbl -128(%ebp,%ebx,1),%ebx + orl %esi,%ecx orl %ebx,%edx + movl 20(%esp),%edi + movd %ecx,%mm4 movd %edx,%mm5 punpckldq %mm5,%mm4 addl $16,%edi @@ -1108,28 +1114,28 @@ L006loop: movzbl -128(%ebp,%eax,1),%eax shll $24,%eax xorl %eax,%edx - movl %ecx,%esi - andl $2155905152,%esi - movl %esi,%edi + movl $2155905152,%edi + andl %ecx,%edi + movl %edi,%esi shrl $7,%edi leal (%ecx,%ecx,1),%eax subl %edi,%esi andl $4278124286,%eax andl $454761243,%esi - xorl %eax,%esi - movl %esi,%eax - andl $2155905152,%esi - movl %esi,%edi + xorl %esi,%eax + movl $2155905152,%edi + andl %eax,%edi + movl %edi,%esi shrl $7,%edi leal (%eax,%eax,1),%ebx subl %edi,%esi andl $4278124286,%ebx andl $454761243,%esi xorl %ecx,%eax - xorl %ebx,%esi - movl %esi,%ebx - andl $2155905152,%esi - movl %esi,%edi + xorl %esi,%ebx + movl $2155905152,%edi + andl %ebx,%edi + movl %edi,%esi shrl $7,%edi leal (%ebx,%ebx,1),%ebp subl %edi,%esi @@ -1140,39 +1146,39 @@ L006loop: xorl %esi,%ebp xorl %eax,%ecx xorl %ebp,%eax - roll $24,%eax xorl %ebx,%ecx xorl %ebp,%ebx - roll $16,%ebx + roll $24,%eax xorl %ebp,%ecx - roll $8,%ebp + roll $16,%ebx xorl %eax,%ecx + roll $8,%ebp xorl %ebx,%ecx movl 4(%esp),%eax xorl %ebp,%ecx movl %ecx,12(%esp) - movl %edx,%esi - andl $2155905152,%esi - movl %esi,%edi + movl $2155905152,%edi + andl %edx,%edi + movl %edi,%esi shrl $7,%edi leal (%edx,%edx,1),%ebx subl %edi,%esi andl $4278124286,%ebx andl $454761243,%esi - xorl %ebx,%esi - movl %esi,%ebx - andl $2155905152,%esi - movl %esi,%edi + xorl %esi,%ebx + movl $2155905152,%edi + andl %ebx,%edi + movl %edi,%esi shrl $7,%edi leal (%ebx,%ebx,1),%ecx subl %edi,%esi andl $4278124286,%ecx andl $454761243,%esi xorl %edx,%ebx - xorl %ecx,%esi - movl %esi,%ecx - andl $2155905152,%esi - movl %esi,%edi + xorl %esi,%ecx + movl $2155905152,%edi + andl %ecx,%edi + movl %edi,%esi shrl $7,%edi leal (%ecx,%ecx,1),%ebp subl %edi,%esi @@ -1183,39 +1189,39 @@ L006loop: xorl %esi,%ebp xorl %ebx,%edx xorl %ebp,%ebx - roll $24,%ebx xorl %ecx,%edx xorl %ebp,%ecx - roll $16,%ecx + roll $24,%ebx xorl %ebp,%edx - roll $8,%ebp + roll $16,%ecx xorl %ebx,%edx + roll $8,%ebp xorl %ecx,%edx movl 8(%esp),%ebx xorl %ebp,%edx movl %edx,16(%esp) - movl %eax,%esi - andl $2155905152,%esi - movl %esi,%edi + movl $2155905152,%edi + andl %eax,%edi + movl %edi,%esi shrl $7,%edi leal (%eax,%eax,1),%ecx subl %edi,%esi andl $4278124286,%ecx andl $454761243,%esi - xorl %ecx,%esi - movl %esi,%ecx - andl $2155905152,%esi - movl %esi,%edi + xorl %esi,%ecx + movl $2155905152,%edi + andl %ecx,%edi + movl %edi,%esi shrl $7,%edi leal (%ecx,%ecx,1),%edx subl %edi,%esi andl $4278124286,%edx andl $454761243,%esi xorl %eax,%ecx - xorl %edx,%esi - movl %esi,%edx - andl $2155905152,%esi - movl %esi,%edi + xorl %esi,%edx + movl $2155905152,%edi + andl %edx,%edi + movl %edi,%esi shrl $7,%edi leal (%edx,%edx,1),%ebp subl %edi,%esi @@ -1226,37 +1232,37 @@ L006loop: xorl %esi,%ebp xorl %ecx,%eax xorl %ebp,%ecx - roll $24,%ecx xorl %edx,%eax xorl %ebp,%edx - roll $16,%edx + roll $24,%ecx xorl %ebp,%eax - roll $8,%ebp + roll $16,%edx xorl %ecx,%eax + roll $8,%ebp xorl %edx,%eax xorl %ebp,%eax - movl %ebx,%esi - andl $2155905152,%esi - movl %esi,%edi + movl $2155905152,%edi + andl %ebx,%edi + movl %edi,%esi shrl $7,%edi leal (%ebx,%ebx,1),%ecx subl %edi,%esi andl $4278124286,%ecx andl $454761243,%esi - xorl %ecx,%esi - movl %esi,%ecx - andl $2155905152,%esi - movl %esi,%edi + xorl %esi,%ecx + movl $2155905152,%edi + andl %ecx,%edi + movl %edi,%esi shrl $7,%edi leal (%ecx,%ecx,1),%edx subl %edi,%esi andl $4278124286,%edx andl $454761243,%esi xorl %ebx,%ecx - xorl %edx,%esi - movl %esi,%edx - andl $2155905152,%esi - movl %esi,%edi + xorl %esi,%edx + movl $2155905152,%edi + andl %edx,%edi + movl %edi,%esi shrl $7,%edi leal (%edx,%edx,1),%ebp subl %edi,%esi @@ -1267,13 +1273,13 @@ L006loop: xorl %esi,%ebp xorl %ecx,%ebx xorl %ebp,%ecx - roll $24,%ecx xorl %edx,%ebx xorl %ebp,%edx - roll $16,%edx + roll $24,%ecx xorl %ebp,%ebx - roll $8,%ebp + roll $16,%edx xorl %ecx,%ebx + roll $8,%ebp xorl %edx,%ebx movl 12(%esp),%ecx xorl %ebp,%ebx @@ -1390,77 +1396,79 @@ __sse_AES_decrypt_compact: .align 4,0x90 L007loop: pshufw $12,%mm0,%mm1 - movd %mm1,%eax pshufw $9,%mm4,%mm5 - movzbl %al,%esi - movzbl -128(%ebp,%esi,1),%ecx + movd %mm1,%eax movd %mm5,%ebx + movl %edi,20(%esp) + movzbl %al,%esi movzbl %ah,%edx + pshufw $6,%mm0,%mm2 + movzbl -128(%ebp,%esi,1),%ecx + movzbl %bl,%edi movzbl -128(%ebp,%edx,1),%edx + shrl $16,%eax shll $8,%edx - pshufw $6,%mm0,%mm2 - movzbl %bl,%esi - movzbl -128(%ebp,%esi,1),%esi + movzbl -128(%ebp,%edi,1),%esi + movzbl %bh,%edi shll $16,%esi + pshufw $3,%mm4,%mm6 orl %esi,%ecx - shrl $16,%eax - movzbl %bh,%esi - movzbl -128(%ebp,%esi,1),%esi + movzbl -128(%ebp,%edi,1),%esi + movzbl %ah,%edi shll $24,%esi - orl %esi,%edx shrl $16,%ebx - pshufw $3,%mm4,%mm6 - movzbl %ah,%esi - movzbl -128(%ebp,%esi,1),%esi + orl %esi,%edx + movzbl -128(%ebp,%edi,1),%esi + movzbl %bh,%edi shll $24,%esi orl %esi,%ecx - movzbl %bh,%esi - movzbl -128(%ebp,%esi,1),%esi + movzbl -128(%ebp,%edi,1),%esi + movzbl %al,%edi shll $8,%esi - orl %esi,%ecx - movd %ecx,%mm0 - movzbl %al,%esi movd %mm2,%eax - movzbl -128(%ebp,%esi,1),%ecx - shll $16,%ecx - movzbl %bl,%esi + orl %esi,%ecx + movzbl -128(%ebp,%edi,1),%esi + movzbl %bl,%edi + shll $16,%esi movd %mm6,%ebx - movzbl -128(%ebp,%esi,1),%esi + movd %ecx,%mm0 + movzbl -128(%ebp,%edi,1),%ecx + movzbl %al,%edi orl %esi,%ecx - movzbl %al,%esi - movzbl -128(%ebp,%esi,1),%esi + movzbl -128(%ebp,%edi,1),%esi + movzbl %bl,%edi orl %esi,%edx - movzbl %bl,%esi - movzbl -128(%ebp,%esi,1),%esi + movzbl -128(%ebp,%edi,1),%esi + movzbl %ah,%edi shll $16,%esi - orl %esi,%edx - movd %edx,%mm1 - movzbl %ah,%esi - movzbl -128(%ebp,%esi,1),%edx - shll $8,%edx - movzbl %bh,%esi shrl $16,%eax - movzbl -128(%ebp,%esi,1),%esi - shll $24,%esi orl %esi,%edx + movzbl -128(%ebp,%edi,1),%esi + movzbl %bh,%edi shrl $16,%ebx - punpckldq %mm1,%mm0 - movzbl %bh,%esi - movzbl -128(%ebp,%esi,1),%esi shll $8,%esi - orl %esi,%ecx + movd %edx,%mm1 + movzbl -128(%ebp,%edi,1),%edx + movzbl %bh,%edi + shll $24,%edx andl $255,%ebx + orl %esi,%edx + punpckldq %mm1,%mm0 + movzbl -128(%ebp,%edi,1),%esi + movzbl %al,%edi + shll $8,%esi + movzbl %ah,%eax movzbl -128(%ebp,%ebx,1),%ebx + orl %esi,%ecx + movzbl -128(%ebp,%edi,1),%esi orl %ebx,%edx - movzbl %al,%esi - movzbl -128(%ebp,%esi,1),%esi shll $16,%esi - orl %esi,%edx - movd %edx,%mm4 - movzbl %ah,%eax movzbl -128(%ebp,%eax,1),%eax + orl %esi,%edx shll $24,%eax orl %eax,%ecx + movl 20(%esp),%edi + movd %edx,%mm4 movd %ecx,%mm5 punpckldq %mm5,%mm4 addl $16,%edi @@ -3006,30 +3014,30 @@ L055invert: .align 2,0x90 L056permute: addl $16,%edi - movl %eax,%esi - andl $2155905152,%esi - movl %esi,%ebp - shrl $7,%ebp + movl $2155905152,%ebp + andl %eax,%ebp leal (%eax,%eax,1),%ebx + movl %ebp,%esi + shrl $7,%ebp subl %ebp,%esi andl $4278124286,%ebx andl $454761243,%esi - xorl %ebx,%esi - movl %esi,%ebx - andl $2155905152,%esi - movl %esi,%ebp - shrl $7,%ebp + xorl %esi,%ebx + movl $2155905152,%ebp + andl %ebx,%ebp leal (%ebx,%ebx,1),%ecx + movl %ebp,%esi + shrl $7,%ebp subl %ebp,%esi andl $4278124286,%ecx andl $454761243,%esi xorl %eax,%ebx - xorl %ecx,%esi - movl %esi,%ecx - andl $2155905152,%esi - movl %esi,%ebp - shrl $7,%ebp + xorl %esi,%ecx + movl $2155905152,%ebp + andl %ecx,%ebp leal (%ecx,%ecx,1),%edx + movl %ebp,%esi + shrl $7,%ebp xorl %eax,%ecx subl %ebp,%esi andl $4278124286,%edx @@ -3050,30 +3058,30 @@ L056permute: movl %ebp,%ebx xorl %edx,%eax movl %eax,(%edi) - movl %ebx,%esi - andl $2155905152,%esi - movl %esi,%ebp - shrl $7,%ebp + movl $2155905152,%ebp + andl %ebx,%ebp leal (%ebx,%ebx,1),%ecx + movl %ebp,%esi + shrl $7,%ebp subl %ebp,%esi andl $4278124286,%ecx andl $454761243,%esi - xorl %ecx,%esi - movl %esi,%ecx - andl $2155905152,%esi - movl %esi,%ebp - shrl $7,%ebp + xorl %esi,%ecx + movl $2155905152,%ebp + andl %ecx,%ebp leal (%ecx,%ecx,1),%edx + movl %ebp,%esi + shrl $7,%ebp subl %ebp,%esi andl $4278124286,%edx andl $454761243,%esi xorl %ebx,%ecx - xorl %edx,%esi - movl %esi,%edx - andl $2155905152,%esi - movl %esi,%ebp - shrl $7,%ebp + xorl %esi,%edx + movl $2155905152,%ebp + andl %edx,%ebp leal (%edx,%edx,1),%eax + movl %ebp,%esi + shrl $7,%ebp xorl %ebx,%edx subl %ebp,%esi andl $4278124286,%eax @@ -3094,30 +3102,30 @@ L056permute: movl %ebp,%ecx xorl %eax,%ebx movl %ebx,4(%edi) - movl %ecx,%esi - andl $2155905152,%esi - movl %esi,%ebp - shrl $7,%ebp + movl $2155905152,%ebp + andl %ecx,%ebp leal (%ecx,%ecx,1),%edx + movl %ebp,%esi + shrl $7,%ebp subl %ebp,%esi andl $4278124286,%edx andl $454761243,%esi - xorl %edx,%esi - movl %esi,%edx - andl $2155905152,%esi - movl %esi,%ebp - shrl $7,%ebp + xorl %esi,%edx + movl $2155905152,%ebp + andl %edx,%ebp leal (%edx,%edx,1),%eax + movl %ebp,%esi + shrl $7,%ebp subl %ebp,%esi andl $4278124286,%eax andl $454761243,%esi xorl %ecx,%edx - xorl %eax,%esi - movl %esi,%eax - andl $2155905152,%esi - movl %esi,%ebp - shrl $7,%ebp + xorl %esi,%eax + movl $2155905152,%ebp + andl %eax,%ebp leal (%eax,%eax,1),%ebx + movl %ebp,%esi + shrl $7,%ebp xorl %ecx,%eax subl %ebp,%esi andl $4278124286,%ebx @@ -3138,30 +3146,30 @@ L056permute: movl %ebp,%edx xorl %ebx,%ecx movl %ecx,8(%edi) - movl %edx,%esi - andl $2155905152,%esi - movl %esi,%ebp - shrl $7,%ebp + movl $2155905152,%ebp + andl %edx,%ebp leal (%edx,%edx,1),%eax + movl %ebp,%esi + shrl $7,%ebp subl %ebp,%esi andl $4278124286,%eax andl $454761243,%esi - xorl %eax,%esi - movl %esi,%eax - andl $2155905152,%esi - movl %esi,%ebp - shrl $7,%ebp + xorl %esi,%eax + movl $2155905152,%ebp + andl %eax,%ebp leal (%eax,%eax,1),%ebx + movl %ebp,%esi + shrl $7,%ebp subl %ebp,%esi andl $4278124286,%ebx andl $454761243,%esi xorl %edx,%eax - xorl %ebx,%esi - movl %esi,%ebx - andl $2155905152,%esi - movl %esi,%ebp - shrl $7,%ebp + xorl %esi,%ebx + movl $2155905152,%ebp + andl %ebx,%ebp leal (%ebx,%ebx,1),%ecx + movl %ebp,%esi + shrl $7,%ebp xorl %edx,%ebx subl %ebp,%esi andl $4278124286,%ecx @@ -3197,4 +3205,4 @@ L056permute: L_OPENSSL_ia32cap_P$non_lazy_ptr: .indirect_symbol _OPENSSL_ia32cap_P .long 0 -.comm _OPENSSL_ia32cap_P,8,2 +.comm _OPENSSL_ia32cap_P,16,2 diff --git a/deps/openssl/asm/x86-macosx-gas/aes/aesni-x86.s b/deps/openssl/asm/x86-macosx-gas/aes/aesni-x86.s index 183ecad2995944..cecd5f83f71e6d 100644 --- a/deps/openssl/asm/x86-macosx-gas/aes/aesni-x86.s +++ b/deps/openssl/asm/x86-macosx-gas/aes/aesni-x86.s @@ -45,27 +45,78 @@ L001dec1_loop_2: movups %xmm2,(%eax) ret .align 4 +__aesni_encrypt2: + movups (%edx),%xmm0 + shll $4,%ecx + movups 16(%edx),%xmm1 + xorps %xmm0,%xmm2 + pxor %xmm0,%xmm3 + movups 32(%edx),%xmm0 + leal 32(%edx,%ecx,1),%edx + negl %ecx + addl $16,%ecx +L002enc2_loop: +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 + movups (%edx,%ecx,1),%xmm1 + addl $32,%ecx +.byte 102,15,56,220,208 +.byte 102,15,56,220,216 + movups -16(%edx,%ecx,1),%xmm0 + jnz L002enc2_loop +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 +.byte 102,15,56,221,208 +.byte 102,15,56,221,216 + ret +.align 4 +__aesni_decrypt2: + movups (%edx),%xmm0 + shll $4,%ecx + movups 16(%edx),%xmm1 + xorps %xmm0,%xmm2 + pxor %xmm0,%xmm3 + movups 32(%edx),%xmm0 + leal 32(%edx,%ecx,1),%edx + negl %ecx + addl $16,%ecx +L003dec2_loop: +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 + movups (%edx,%ecx,1),%xmm1 + addl $32,%ecx +.byte 102,15,56,222,208 +.byte 102,15,56,222,216 + movups -16(%edx,%ecx,1),%xmm0 + jnz L003dec2_loop +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 +.byte 102,15,56,223,208 +.byte 102,15,56,223,216 + ret +.align 4 __aesni_encrypt3: movups (%edx),%xmm0 - shrl $1,%ecx + shll $4,%ecx movups 16(%edx),%xmm1 - leal 32(%edx),%edx xorps %xmm0,%xmm2 pxor %xmm0,%xmm3 pxor %xmm0,%xmm4 - movups (%edx),%xmm0 -L002enc3_loop: + movups 32(%edx),%xmm0 + leal 32(%edx,%ecx,1),%edx + negl %ecx + addl $16,%ecx +L004enc3_loop: .byte 102,15,56,220,209 .byte 102,15,56,220,217 - decl %ecx .byte 102,15,56,220,225 - movups 16(%edx),%xmm1 + movups (%edx,%ecx,1),%xmm1 + addl $32,%ecx .byte 102,15,56,220,208 .byte 102,15,56,220,216 - leal 32(%edx),%edx .byte 102,15,56,220,224 - movups (%edx),%xmm0 - jnz L002enc3_loop + movups -16(%edx,%ecx,1),%xmm0 + jnz L004enc3_loop .byte 102,15,56,220,209 .byte 102,15,56,220,217 .byte 102,15,56,220,225 @@ -76,25 +127,26 @@ L002enc3_loop: .align 4 __aesni_decrypt3: movups (%edx),%xmm0 - shrl $1,%ecx + shll $4,%ecx movups 16(%edx),%xmm1 - leal 32(%edx),%edx xorps %xmm0,%xmm2 pxor %xmm0,%xmm3 pxor %xmm0,%xmm4 - movups (%edx),%xmm0 -L003dec3_loop: + movups 32(%edx),%xmm0 + leal 32(%edx,%ecx,1),%edx + negl %ecx + addl $16,%ecx +L005dec3_loop: .byte 102,15,56,222,209 .byte 102,15,56,222,217 - decl %ecx .byte 102,15,56,222,225 - movups 16(%edx),%xmm1 + movups (%edx,%ecx,1),%xmm1 + addl $32,%ecx .byte 102,15,56,222,208 .byte 102,15,56,222,216 - leal 32(%edx),%edx .byte 102,15,56,222,224 - movups (%edx),%xmm0 - jnz L003dec3_loop + movups -16(%edx,%ecx,1),%xmm0 + jnz L005dec3_loop .byte 102,15,56,222,209 .byte 102,15,56,222,217 .byte 102,15,56,222,225 @@ -106,27 +158,29 @@ L003dec3_loop: __aesni_encrypt4: movups (%edx),%xmm0 movups 16(%edx),%xmm1 - shrl $1,%ecx - leal 32(%edx),%edx + shll $4,%ecx xorps %xmm0,%xmm2 pxor %xmm0,%xmm3 pxor %xmm0,%xmm4 pxor %xmm0,%xmm5 - movups (%edx),%xmm0 -L004enc4_loop: + movups 32(%edx),%xmm0 + leal 32(%edx,%ecx,1),%edx + negl %ecx +.byte 15,31,64,0 + addl $16,%ecx +L006enc4_loop: .byte 102,15,56,220,209 .byte 102,15,56,220,217 - decl %ecx .byte 102,15,56,220,225 .byte 102,15,56,220,233 - movups 16(%edx),%xmm1 + movups (%edx,%ecx,1),%xmm1 + addl $32,%ecx .byte 102,15,56,220,208 .byte 102,15,56,220,216 - leal 32(%edx),%edx .byte 102,15,56,220,224 .byte 102,15,56,220,232 - movups (%edx),%xmm0 - jnz L004enc4_loop + movups -16(%edx,%ecx,1),%xmm0 + jnz L006enc4_loop .byte 102,15,56,220,209 .byte 102,15,56,220,217 .byte 102,15,56,220,225 @@ -140,27 +194,29 @@ L004enc4_loop: __aesni_decrypt4: movups (%edx),%xmm0 movups 16(%edx),%xmm1 - shrl $1,%ecx - leal 32(%edx),%edx + shll $4,%ecx xorps %xmm0,%xmm2 pxor %xmm0,%xmm3 pxor %xmm0,%xmm4 pxor %xmm0,%xmm5 - movups (%edx),%xmm0 -L005dec4_loop: + movups 32(%edx),%xmm0 + leal 32(%edx,%ecx,1),%edx + negl %ecx +.byte 15,31,64,0 + addl $16,%ecx +L007dec4_loop: .byte 102,15,56,222,209 .byte 102,15,56,222,217 - decl %ecx .byte 102,15,56,222,225 .byte 102,15,56,222,233 - movups 16(%edx),%xmm1 + movups (%edx,%ecx,1),%xmm1 + addl $32,%ecx .byte 102,15,56,222,208 .byte 102,15,56,222,216 - leal 32(%edx),%edx .byte 102,15,56,222,224 .byte 102,15,56,222,232 - movups (%edx),%xmm0 - jnz L005dec4_loop + movups -16(%edx,%ecx,1),%xmm0 + jnz L007dec4_loop .byte 102,15,56,222,209 .byte 102,15,56,222,217 .byte 102,15,56,222,225 @@ -173,45 +229,44 @@ L005dec4_loop: .align 4 __aesni_encrypt6: movups (%edx),%xmm0 - shrl $1,%ecx + shll $4,%ecx movups 16(%edx),%xmm1 - leal 32(%edx),%edx xorps %xmm0,%xmm2 pxor %xmm0,%xmm3 -.byte 102,15,56,220,209 pxor %xmm0,%xmm4 -.byte 102,15,56,220,217 +.byte 102,15,56,220,209 pxor %xmm0,%xmm5 - decl %ecx -.byte 102,15,56,220,225 pxor %xmm0,%xmm6 -.byte 102,15,56,220,233 +.byte 102,15,56,220,217 + leal 32(%edx,%ecx,1),%edx + negl %ecx +.byte 102,15,56,220,225 pxor %xmm0,%xmm7 + addl $16,%ecx +.byte 102,15,56,220,233 .byte 102,15,56,220,241 - movups (%edx),%xmm0 .byte 102,15,56,220,249 + movups -16(%edx,%ecx,1),%xmm0 jmp L_aesni_encrypt6_enter .align 4,0x90 -L006enc6_loop: +L008enc6_loop: .byte 102,15,56,220,209 .byte 102,15,56,220,217 - decl %ecx .byte 102,15,56,220,225 .byte 102,15,56,220,233 .byte 102,15,56,220,241 .byte 102,15,56,220,249 -.align 4,0x90 L_aesni_encrypt6_enter: - movups 16(%edx),%xmm1 + movups (%edx,%ecx,1),%xmm1 + addl $32,%ecx .byte 102,15,56,220,208 .byte 102,15,56,220,216 - leal 32(%edx),%edx .byte 102,15,56,220,224 .byte 102,15,56,220,232 .byte 102,15,56,220,240 .byte 102,15,56,220,248 - movups (%edx),%xmm0 - jnz L006enc6_loop + movups -16(%edx,%ecx,1),%xmm0 + jnz L008enc6_loop .byte 102,15,56,220,209 .byte 102,15,56,220,217 .byte 102,15,56,220,225 @@ -228,45 +283,44 @@ L_aesni_encrypt6_enter: .align 4 __aesni_decrypt6: movups (%edx),%xmm0 - shrl $1,%ecx + shll $4,%ecx movups 16(%edx),%xmm1 - leal 32(%edx),%edx xorps %xmm0,%xmm2 pxor %xmm0,%xmm3 -.byte 102,15,56,222,209 pxor %xmm0,%xmm4 -.byte 102,15,56,222,217 +.byte 102,15,56,222,209 pxor %xmm0,%xmm5 - decl %ecx -.byte 102,15,56,222,225 pxor %xmm0,%xmm6 -.byte 102,15,56,222,233 +.byte 102,15,56,222,217 + leal 32(%edx,%ecx,1),%edx + negl %ecx +.byte 102,15,56,222,225 pxor %xmm0,%xmm7 + addl $16,%ecx +.byte 102,15,56,222,233 .byte 102,15,56,222,241 - movups (%edx),%xmm0 .byte 102,15,56,222,249 + movups -16(%edx,%ecx,1),%xmm0 jmp L_aesni_decrypt6_enter .align 4,0x90 -L007dec6_loop: +L009dec6_loop: .byte 102,15,56,222,209 .byte 102,15,56,222,217 - decl %ecx .byte 102,15,56,222,225 .byte 102,15,56,222,233 .byte 102,15,56,222,241 .byte 102,15,56,222,249 -.align 4,0x90 L_aesni_decrypt6_enter: - movups 16(%edx),%xmm1 + movups (%edx,%ecx,1),%xmm1 + addl $32,%ecx .byte 102,15,56,222,208 .byte 102,15,56,222,216 - leal 32(%edx),%edx .byte 102,15,56,222,224 .byte 102,15,56,222,232 .byte 102,15,56,222,240 .byte 102,15,56,222,248 - movups (%edx),%xmm0 - jnz L007dec6_loop + movups -16(%edx,%ecx,1),%xmm0 + jnz L009dec6_loop .byte 102,15,56,222,209 .byte 102,15,56,222,217 .byte 102,15,56,222,225 @@ -294,14 +348,14 @@ L_aesni_ecb_encrypt_begin: movl 32(%esp),%edx movl 36(%esp),%ebx andl $-16,%eax - jz L008ecb_ret + jz L010ecb_ret movl 240(%edx),%ecx testl %ebx,%ebx - jz L009ecb_decrypt + jz L011ecb_decrypt movl %edx,%ebp movl %ecx,%ebx cmpl $96,%eax - jb L010ecb_enc_tail + jb L012ecb_enc_tail movdqu (%esi),%xmm2 movdqu 16(%esi),%xmm3 movdqu 32(%esi),%xmm4 @@ -310,9 +364,9 @@ L_aesni_ecb_encrypt_begin: movdqu 80(%esi),%xmm7 leal 96(%esi),%esi subl $96,%eax - jmp L011ecb_enc_loop6_enter + jmp L013ecb_enc_loop6_enter .align 4,0x90 -L012ecb_enc_loop6: +L014ecb_enc_loop6: movups %xmm2,(%edi) movdqu (%esi),%xmm2 movups %xmm3,16(%edi) @@ -327,12 +381,12 @@ L012ecb_enc_loop6: leal 96(%edi),%edi movdqu 80(%esi),%xmm7 leal 96(%esi),%esi -L011ecb_enc_loop6_enter: +L013ecb_enc_loop6_enter: call __aesni_encrypt6 movl %ebp,%edx movl %ebx,%ecx subl $96,%eax - jnc L012ecb_enc_loop6 + jnc L014ecb_enc_loop6 movups %xmm2,(%edi) movups %xmm3,16(%edi) movups %xmm4,32(%edi) @@ -341,18 +395,18 @@ L011ecb_enc_loop6_enter: movups %xmm7,80(%edi) leal 96(%edi),%edi addl $96,%eax - jz L008ecb_ret -L010ecb_enc_tail: + jz L010ecb_ret +L012ecb_enc_tail: movups (%esi),%xmm2 cmpl $32,%eax - jb L013ecb_enc_one + jb L015ecb_enc_one movups 16(%esi),%xmm3 - je L014ecb_enc_two + je L016ecb_enc_two movups 32(%esi),%xmm4 cmpl $64,%eax - jb L015ecb_enc_three + jb L017ecb_enc_three movups 48(%esi),%xmm5 - je L016ecb_enc_four + je L018ecb_enc_four movups 64(%esi),%xmm6 xorps %xmm7,%xmm7 call __aesni_encrypt6 @@ -361,50 +415,49 @@ L010ecb_enc_tail: movups %xmm4,32(%edi) movups %xmm5,48(%edi) movups %xmm6,64(%edi) - jmp L008ecb_ret + jmp L010ecb_ret .align 4,0x90 -L013ecb_enc_one: +L015ecb_enc_one: movups (%edx),%xmm0 movups 16(%edx),%xmm1 leal 32(%edx),%edx xorps %xmm0,%xmm2 -L017enc1_loop_3: +L019enc1_loop_3: .byte 102,15,56,220,209 decl %ecx movups (%edx),%xmm1 leal 16(%edx),%edx - jnz L017enc1_loop_3 + jnz L019enc1_loop_3 .byte 102,15,56,221,209 movups %xmm2,(%edi) - jmp L008ecb_ret + jmp L010ecb_ret .align 4,0x90 -L014ecb_enc_two: - xorps %xmm4,%xmm4 - call __aesni_encrypt3 +L016ecb_enc_two: + call __aesni_encrypt2 movups %xmm2,(%edi) movups %xmm3,16(%edi) - jmp L008ecb_ret + jmp L010ecb_ret .align 4,0x90 -L015ecb_enc_three: +L017ecb_enc_three: call __aesni_encrypt3 movups %xmm2,(%edi) movups %xmm3,16(%edi) movups %xmm4,32(%edi) - jmp L008ecb_ret + jmp L010ecb_ret .align 4,0x90 -L016ecb_enc_four: +L018ecb_enc_four: call __aesni_encrypt4 movups %xmm2,(%edi) movups %xmm3,16(%edi) movups %xmm4,32(%edi) movups %xmm5,48(%edi) - jmp L008ecb_ret + jmp L010ecb_ret .align 4,0x90 -L009ecb_decrypt: +L011ecb_decrypt: movl %edx,%ebp movl %ecx,%ebx cmpl $96,%eax - jb L018ecb_dec_tail + jb L020ecb_dec_tail movdqu (%esi),%xmm2 movdqu 16(%esi),%xmm3 movdqu 32(%esi),%xmm4 @@ -413,9 +466,9 @@ L009ecb_decrypt: movdqu 80(%esi),%xmm7 leal 96(%esi),%esi subl $96,%eax - jmp L019ecb_dec_loop6_enter + jmp L021ecb_dec_loop6_enter .align 4,0x90 -L020ecb_dec_loop6: +L022ecb_dec_loop6: movups %xmm2,(%edi) movdqu (%esi),%xmm2 movups %xmm3,16(%edi) @@ -430,12 +483,12 @@ L020ecb_dec_loop6: leal 96(%edi),%edi movdqu 80(%esi),%xmm7 leal 96(%esi),%esi -L019ecb_dec_loop6_enter: +L021ecb_dec_loop6_enter: call __aesni_decrypt6 movl %ebp,%edx movl %ebx,%ecx subl $96,%eax - jnc L020ecb_dec_loop6 + jnc L022ecb_dec_loop6 movups %xmm2,(%edi) movups %xmm3,16(%edi) movups %xmm4,32(%edi) @@ -444,18 +497,18 @@ L019ecb_dec_loop6_enter: movups %xmm7,80(%edi) leal 96(%edi),%edi addl $96,%eax - jz L008ecb_ret -L018ecb_dec_tail: + jz L010ecb_ret +L020ecb_dec_tail: movups (%esi),%xmm2 cmpl $32,%eax - jb L021ecb_dec_one + jb L023ecb_dec_one movups 16(%esi),%xmm3 - je L022ecb_dec_two + je L024ecb_dec_two movups 32(%esi),%xmm4 cmpl $64,%eax - jb L023ecb_dec_three + jb L025ecb_dec_three movups 48(%esi),%xmm5 - je L024ecb_dec_four + je L026ecb_dec_four movups 64(%esi),%xmm6 xorps %xmm7,%xmm7 call __aesni_decrypt6 @@ -464,44 +517,43 @@ L018ecb_dec_tail: movups %xmm4,32(%edi) movups %xmm5,48(%edi) movups %xmm6,64(%edi) - jmp L008ecb_ret + jmp L010ecb_ret .align 4,0x90 -L021ecb_dec_one: +L023ecb_dec_one: movups (%edx),%xmm0 movups 16(%edx),%xmm1 leal 32(%edx),%edx xorps %xmm0,%xmm2 -L025dec1_loop_4: +L027dec1_loop_4: .byte 102,15,56,222,209 decl %ecx movups (%edx),%xmm1 leal 16(%edx),%edx - jnz L025dec1_loop_4 + jnz L027dec1_loop_4 .byte 102,15,56,223,209 movups %xmm2,(%edi) - jmp L008ecb_ret + jmp L010ecb_ret .align 4,0x90 -L022ecb_dec_two: - xorps %xmm4,%xmm4 - call __aesni_decrypt3 +L024ecb_dec_two: + call __aesni_decrypt2 movups %xmm2,(%edi) movups %xmm3,16(%edi) - jmp L008ecb_ret + jmp L010ecb_ret .align 4,0x90 -L023ecb_dec_three: +L025ecb_dec_three: call __aesni_decrypt3 movups %xmm2,(%edi) movups %xmm3,16(%edi) movups %xmm4,32(%edi) - jmp L008ecb_ret + jmp L010ecb_ret .align 4,0x90 -L024ecb_dec_four: +L026ecb_dec_four: call __aesni_decrypt4 movups %xmm2,(%edi) movups %xmm3,16(%edi) movups %xmm4,32(%edi) movups %xmm5,48(%edi) -L008ecb_ret: +L010ecb_ret: popl %edi popl %esi popl %ebx @@ -538,45 +590,45 @@ L_aesni_ccm64_encrypt_blocks_begin: movl %ebp,20(%esp) movl %ebp,24(%esp) movl %ebp,28(%esp) - shrl $1,%ecx + shll $4,%ecx + movl $16,%ebx leal (%edx),%ebp movdqa (%esp),%xmm5 movdqa %xmm7,%xmm2 - movl %ecx,%ebx + leal 32(%edx,%ecx,1),%edx + subl %ecx,%ebx .byte 102,15,56,0,253 -L026ccm64_enc_outer: +L028ccm64_enc_outer: movups (%ebp),%xmm0 movl %ebx,%ecx movups (%esi),%xmm6 xorps %xmm0,%xmm2 movups 16(%ebp),%xmm1 xorps %xmm6,%xmm0 - leal 32(%ebp),%edx xorps %xmm0,%xmm3 - movups (%edx),%xmm0 -L027ccm64_enc2_loop: + movups 32(%ebp),%xmm0 +L029ccm64_enc2_loop: .byte 102,15,56,220,209 - decl %ecx .byte 102,15,56,220,217 - movups 16(%edx),%xmm1 + movups (%edx,%ecx,1),%xmm1 + addl $32,%ecx .byte 102,15,56,220,208 - leal 32(%edx),%edx .byte 102,15,56,220,216 - movups (%edx),%xmm0 - jnz L027ccm64_enc2_loop + movups -16(%edx,%ecx,1),%xmm0 + jnz L029ccm64_enc2_loop .byte 102,15,56,220,209 .byte 102,15,56,220,217 paddq 16(%esp),%xmm7 + decl %eax .byte 102,15,56,221,208 .byte 102,15,56,221,216 - decl %eax leal 16(%esi),%esi xorps %xmm2,%xmm6 movdqa %xmm7,%xmm2 movups %xmm6,(%edi) - leal 16(%edi),%edi .byte 102,15,56,0,213 - jnz L026ccm64_enc_outer + leal 16(%edi),%edi + jnz L028ccm64_enc_outer movl 48(%esp),%esp movl 40(%esp),%edi movups %xmm3,(%edi) @@ -625,67 +677,70 @@ L_aesni_ccm64_decrypt_blocks_begin: movups 16(%edx),%xmm1 leal 32(%edx),%edx xorps %xmm0,%xmm2 -L028enc1_loop_5: +L030enc1_loop_5: .byte 102,15,56,220,209 decl %ecx movups (%edx),%xmm1 leal 16(%edx),%edx - jnz L028enc1_loop_5 + jnz L030enc1_loop_5 .byte 102,15,56,221,209 + shll $4,%ebx + movl $16,%ecx movups (%esi),%xmm6 paddq 16(%esp),%xmm7 leal 16(%esi),%esi - jmp L029ccm64_dec_outer + subl %ebx,%ecx + leal 32(%ebp,%ebx,1),%edx + movl %ecx,%ebx + jmp L031ccm64_dec_outer .align 4,0x90 -L029ccm64_dec_outer: +L031ccm64_dec_outer: xorps %xmm2,%xmm6 movdqa %xmm7,%xmm2 - movl %ebx,%ecx movups %xmm6,(%edi) leal 16(%edi),%edi .byte 102,15,56,0,213 subl $1,%eax - jz L030ccm64_dec_break + jz L032ccm64_dec_break movups (%ebp),%xmm0 - shrl $1,%ecx + movl %ebx,%ecx movups 16(%ebp),%xmm1 xorps %xmm0,%xmm6 - leal 32(%ebp),%edx xorps %xmm0,%xmm2 xorps %xmm6,%xmm3 - movups (%edx),%xmm0 -L031ccm64_dec2_loop: + movups 32(%ebp),%xmm0 +L033ccm64_dec2_loop: .byte 102,15,56,220,209 - decl %ecx .byte 102,15,56,220,217 - movups 16(%edx),%xmm1 + movups (%edx,%ecx,1),%xmm1 + addl $32,%ecx .byte 102,15,56,220,208 - leal 32(%edx),%edx .byte 102,15,56,220,216 - movups (%edx),%xmm0 - jnz L031ccm64_dec2_loop + movups -16(%edx,%ecx,1),%xmm0 + jnz L033ccm64_dec2_loop movups (%esi),%xmm6 paddq 16(%esp),%xmm7 .byte 102,15,56,220,209 .byte 102,15,56,220,217 - leal 16(%esi),%esi .byte 102,15,56,221,208 .byte 102,15,56,221,216 - jmp L029ccm64_dec_outer + leal 16(%esi),%esi + jmp L031ccm64_dec_outer .align 4,0x90 -L030ccm64_dec_break: +L032ccm64_dec_break: + movl 240(%ebp),%ecx movl %ebp,%edx movups (%edx),%xmm0 movups 16(%edx),%xmm1 xorps %xmm0,%xmm6 leal 32(%edx),%edx xorps %xmm6,%xmm3 -L032enc1_loop_6: +L034enc1_loop_6: .byte 102,15,56,220,217 decl %ecx movups (%edx),%xmm1 leal 16(%edx),%edx - jnz L032enc1_loop_6 + jnz L034enc1_loop_6 .byte 102,15,56,221,217 movl 48(%esp),%esp movl 40(%esp),%edi @@ -713,7 +768,7 @@ L_aesni_ctr32_encrypt_blocks_begin: andl $-16,%esp movl %ebp,80(%esp) cmpl $1,%eax - je L033ctr32_one_shortcut + je L035ctr32_one_shortcut movdqu (%ebx),%xmm7 movl $202182159,(%esp) movl $134810123,4(%esp) @@ -729,63 +784,59 @@ L_aesni_ctr32_encrypt_blocks_begin: .byte 102,15,58,34,253,3 movl 240(%edx),%ecx bswap %ebx - pxor %xmm1,%xmm1 pxor %xmm0,%xmm0 + pxor %xmm1,%xmm1 movdqa (%esp),%xmm2 -.byte 102,15,58,34,203,0 +.byte 102,15,58,34,195,0 leal 3(%ebx),%ebp -.byte 102,15,58,34,197,0 +.byte 102,15,58,34,205,0 incl %ebx -.byte 102,15,58,34,203,1 +.byte 102,15,58,34,195,1 incl %ebp -.byte 102,15,58,34,197,1 +.byte 102,15,58,34,205,1 incl %ebx -.byte 102,15,58,34,203,2 +.byte 102,15,58,34,195,2 incl %ebp -.byte 102,15,58,34,197,2 - movdqa %xmm1,48(%esp) -.byte 102,15,56,0,202 - movdqa %xmm0,64(%esp) +.byte 102,15,58,34,205,2 + movdqa %xmm0,48(%esp) .byte 102,15,56,0,194 - pshufd $192,%xmm1,%xmm2 - pshufd $128,%xmm1,%xmm3 + movdqu (%edx),%xmm6 + movdqa %xmm1,64(%esp) +.byte 102,15,56,0,202 + pshufd $192,%xmm0,%xmm2 + pshufd $128,%xmm0,%xmm3 cmpl $6,%eax - jb L034ctr32_tail + jb L036ctr32_tail + pxor %xmm6,%xmm7 + shll $4,%ecx + movl $16,%ebx movdqa %xmm7,32(%esp) - shrl $1,%ecx movl %edx,%ebp - movl %ecx,%ebx + subl %ecx,%ebx + leal 32(%edx,%ecx,1),%edx subl $6,%eax - jmp L035ctr32_loop6 + jmp L037ctr32_loop6 .align 4,0x90 -L035ctr32_loop6: - pshufd $64,%xmm1,%xmm4 - movdqa 32(%esp),%xmm1 - pshufd $192,%xmm0,%xmm5 - por %xmm1,%xmm2 - pshufd $128,%xmm0,%xmm6 - por %xmm1,%xmm3 - pshufd $64,%xmm0,%xmm7 - por %xmm1,%xmm4 - por %xmm1,%xmm5 - por %xmm1,%xmm6 - por %xmm1,%xmm7 - movups (%ebp),%xmm0 - movups 16(%ebp),%xmm1 - leal 32(%ebp),%edx - decl %ecx +L037ctr32_loop6: + pshufd $64,%xmm0,%xmm4 + movdqa 32(%esp),%xmm0 + pshufd $192,%xmm1,%xmm5 pxor %xmm0,%xmm2 + pshufd $128,%xmm1,%xmm6 pxor %xmm0,%xmm3 -.byte 102,15,56,220,209 + pshufd $64,%xmm1,%xmm7 + movups 16(%ebp),%xmm1 pxor %xmm0,%xmm4 -.byte 102,15,56,220,217 pxor %xmm0,%xmm5 -.byte 102,15,56,220,225 +.byte 102,15,56,220,209 pxor %xmm0,%xmm6 -.byte 102,15,56,220,233 pxor %xmm0,%xmm7 +.byte 102,15,56,220,217 + movups 32(%ebp),%xmm0 + movl %ebx,%ecx +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 .byte 102,15,56,220,241 - movups (%edx),%xmm0 .byte 102,15,56,220,249 call L_aesni_encrypt6_enter movups (%esi),%xmm1 @@ -796,51 +847,51 @@ L035ctr32_loop6: movups %xmm2,(%edi) movdqa 16(%esp),%xmm0 xorps %xmm1,%xmm4 - movdqa 48(%esp),%xmm1 + movdqa 64(%esp),%xmm1 movups %xmm3,16(%edi) movups %xmm4,32(%edi) paddd %xmm0,%xmm1 - paddd 64(%esp),%xmm0 + paddd 48(%esp),%xmm0 movdqa (%esp),%xmm2 movups 48(%esi),%xmm3 movups 64(%esi),%xmm4 xorps %xmm3,%xmm5 movups 80(%esi),%xmm3 leal 96(%esi),%esi - movdqa %xmm1,48(%esp) -.byte 102,15,56,0,202 + movdqa %xmm0,48(%esp) +.byte 102,15,56,0,194 xorps %xmm4,%xmm6 movups %xmm5,48(%edi) xorps %xmm3,%xmm7 - movdqa %xmm0,64(%esp) -.byte 102,15,56,0,194 + movdqa %xmm1,64(%esp) +.byte 102,15,56,0,202 movups %xmm6,64(%edi) - pshufd $192,%xmm1,%xmm2 + pshufd $192,%xmm0,%xmm2 movups %xmm7,80(%edi) leal 96(%edi),%edi - movl %ebx,%ecx - pshufd $128,%xmm1,%xmm3 + pshufd $128,%xmm0,%xmm3 subl $6,%eax - jnc L035ctr32_loop6 + jnc L037ctr32_loop6 addl $6,%eax - jz L036ctr32_ret + jz L038ctr32_ret + movdqu (%ebp),%xmm7 movl %ebp,%edx - leal 1(,%ecx,2),%ecx - movdqa 32(%esp),%xmm7 -L034ctr32_tail: + pxor 32(%esp),%xmm7 + movl 240(%ebp),%ecx +L036ctr32_tail: por %xmm7,%xmm2 cmpl $2,%eax - jb L037ctr32_one - pshufd $64,%xmm1,%xmm4 + jb L039ctr32_one + pshufd $64,%xmm0,%xmm4 por %xmm7,%xmm3 - je L038ctr32_two - pshufd $192,%xmm0,%xmm5 + je L040ctr32_two + pshufd $192,%xmm1,%xmm5 por %xmm7,%xmm4 cmpl $4,%eax - jb L039ctr32_three - pshufd $128,%xmm0,%xmm6 + jb L041ctr32_three + pshufd $128,%xmm1,%xmm6 por %xmm7,%xmm5 - je L040ctr32_four + je L042ctr32_four por %xmm7,%xmm6 call __aesni_encrypt6 movups (%esi),%xmm1 @@ -858,39 +909,39 @@ L034ctr32_tail: movups %xmm4,32(%edi) movups %xmm5,48(%edi) movups %xmm6,64(%edi) - jmp L036ctr32_ret + jmp L038ctr32_ret .align 4,0x90 -L033ctr32_one_shortcut: +L035ctr32_one_shortcut: movups (%ebx),%xmm2 movl 240(%edx),%ecx -L037ctr32_one: +L039ctr32_one: movups (%edx),%xmm0 movups 16(%edx),%xmm1 leal 32(%edx),%edx xorps %xmm0,%xmm2 -L041enc1_loop_7: +L043enc1_loop_7: .byte 102,15,56,220,209 decl %ecx movups (%edx),%xmm1 leal 16(%edx),%edx - jnz L041enc1_loop_7 + jnz L043enc1_loop_7 .byte 102,15,56,221,209 movups (%esi),%xmm6 xorps %xmm2,%xmm6 movups %xmm6,(%edi) - jmp L036ctr32_ret + jmp L038ctr32_ret .align 4,0x90 -L038ctr32_two: - call __aesni_encrypt3 +L040ctr32_two: + call __aesni_encrypt2 movups (%esi),%xmm5 movups 16(%esi),%xmm6 xorps %xmm5,%xmm2 xorps %xmm6,%xmm3 movups %xmm2,(%edi) movups %xmm3,16(%edi) - jmp L036ctr32_ret + jmp L038ctr32_ret .align 4,0x90 -L039ctr32_three: +L041ctr32_three: call __aesni_encrypt3 movups (%esi),%xmm5 movups 16(%esi),%xmm6 @@ -901,9 +952,9 @@ L039ctr32_three: xorps %xmm7,%xmm4 movups %xmm3,16(%edi) movups %xmm4,32(%edi) - jmp L036ctr32_ret + jmp L038ctr32_ret .align 4,0x90 -L040ctr32_four: +L042ctr32_four: call __aesni_encrypt4 movups (%esi),%xmm6 movups 16(%esi),%xmm7 @@ -917,7 +968,7 @@ L040ctr32_four: xorps %xmm0,%xmm5 movups %xmm4,32(%edi) movups %xmm5,48(%edi) -L036ctr32_ret: +L038ctr32_ret: movl 80(%esp),%esp popl %edi popl %esi @@ -940,12 +991,12 @@ L_aesni_xts_encrypt_begin: movups 16(%edx),%xmm1 leal 32(%edx),%edx xorps %xmm0,%xmm2 -L042enc1_loop_8: +L044enc1_loop_8: .byte 102,15,56,220,209 decl %ecx movups (%edx),%xmm1 leal 16(%edx),%edx - jnz L042enc1_loop_8 + jnz L044enc1_loop_8 .byte 102,15,56,221,209 movl 20(%esp),%esi movl 24(%esp),%edi @@ -969,12 +1020,14 @@ L042enc1_loop_8: movl %edx,%ebp movl %ecx,%ebx subl $96,%eax - jc L043xts_enc_short - shrl $1,%ecx - movl %ecx,%ebx - jmp L044xts_enc_loop6 + jc L045xts_enc_short + shll $4,%ecx + movl $16,%ebx + subl %ecx,%ebx + leal 32(%edx,%ecx,1),%edx + jmp L046xts_enc_loop6 .align 4,0x90 -L044xts_enc_loop6: +L046xts_enc_loop6: pshufd $19,%xmm0,%xmm2 pxor %xmm0,%xmm0 movdqa %xmm1,(%esp) @@ -1010,6 +1063,7 @@ L044xts_enc_loop6: pand %xmm3,%xmm7 movups (%esi),%xmm2 pxor %xmm1,%xmm7 + movl %ebx,%ecx movdqu 16(%esi),%xmm3 xorps %xmm0,%xmm2 movdqu 32(%esi),%xmm4 @@ -1025,19 +1079,17 @@ L044xts_enc_loop6: movdqa %xmm7,80(%esp) pxor %xmm1,%xmm7 movups 16(%ebp),%xmm1 - leal 32(%ebp),%edx pxor 16(%esp),%xmm3 -.byte 102,15,56,220,209 pxor 32(%esp),%xmm4 -.byte 102,15,56,220,217 +.byte 102,15,56,220,209 pxor 48(%esp),%xmm5 - decl %ecx -.byte 102,15,56,220,225 pxor 64(%esp),%xmm6 -.byte 102,15,56,220,233 +.byte 102,15,56,220,217 pxor %xmm0,%xmm7 + movups 32(%ebp),%xmm0 +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 .byte 102,15,56,220,241 - movups (%edx),%xmm0 .byte 102,15,56,220,249 call L_aesni_encrypt6_enter movdqa 80(%esp),%xmm1 @@ -1062,26 +1114,25 @@ L044xts_enc_loop6: paddq %xmm1,%xmm1 pand %xmm3,%xmm2 pcmpgtd %xmm1,%xmm0 - movl %ebx,%ecx pxor %xmm2,%xmm1 subl $96,%eax - jnc L044xts_enc_loop6 - leal 1(,%ecx,2),%ecx + jnc L046xts_enc_loop6 + movl 240(%ebp),%ecx movl %ebp,%edx movl %ecx,%ebx -L043xts_enc_short: +L045xts_enc_short: addl $96,%eax - jz L045xts_enc_done6x + jz L047xts_enc_done6x movdqa %xmm1,%xmm5 cmpl $32,%eax - jb L046xts_enc_one + jb L048xts_enc_one pshufd $19,%xmm0,%xmm2 pxor %xmm0,%xmm0 paddq %xmm1,%xmm1 pand %xmm3,%xmm2 pcmpgtd %xmm1,%xmm0 pxor %xmm2,%xmm1 - je L047xts_enc_two + je L049xts_enc_two pshufd $19,%xmm0,%xmm2 pxor %xmm0,%xmm0 movdqa %xmm1,%xmm6 @@ -1090,7 +1141,7 @@ L043xts_enc_short: pcmpgtd %xmm1,%xmm0 pxor %xmm2,%xmm1 cmpl $64,%eax - jb L048xts_enc_three + jb L050xts_enc_three pshufd $19,%xmm0,%xmm2 pxor %xmm0,%xmm0 movdqa %xmm1,%xmm7 @@ -1100,7 +1151,7 @@ L043xts_enc_short: pxor %xmm2,%xmm1 movdqa %xmm5,(%esp) movdqa %xmm6,16(%esp) - je L049xts_enc_four + je L051xts_enc_four movdqa %xmm7,32(%esp) pshufd $19,%xmm0,%xmm7 movdqa %xmm1,48(%esp) @@ -1132,9 +1183,9 @@ L043xts_enc_short: movups %xmm5,48(%edi) movups %xmm6,64(%edi) leal 80(%edi),%edi - jmp L050xts_enc_done + jmp L052xts_enc_done .align 4,0x90 -L046xts_enc_one: +L048xts_enc_one: movups (%esi),%xmm2 leal 16(%esi),%esi xorps %xmm5,%xmm2 @@ -1142,37 +1193,36 @@ L046xts_enc_one: movups 16(%edx),%xmm1 leal 32(%edx),%edx xorps %xmm0,%xmm2 -L051enc1_loop_9: +L053enc1_loop_9: .byte 102,15,56,220,209 decl %ecx movups (%edx),%xmm1 leal 16(%edx),%edx - jnz L051enc1_loop_9 + jnz L053enc1_loop_9 .byte 102,15,56,221,209 xorps %xmm5,%xmm2 movups %xmm2,(%edi) leal 16(%edi),%edi movdqa %xmm5,%xmm1 - jmp L050xts_enc_done + jmp L052xts_enc_done .align 4,0x90 -L047xts_enc_two: +L049xts_enc_two: movaps %xmm1,%xmm6 movups (%esi),%xmm2 movups 16(%esi),%xmm3 leal 32(%esi),%esi xorps %xmm5,%xmm2 xorps %xmm6,%xmm3 - xorps %xmm4,%xmm4 - call __aesni_encrypt3 + call __aesni_encrypt2 xorps %xmm5,%xmm2 xorps %xmm6,%xmm3 movups %xmm2,(%edi) movups %xmm3,16(%edi) leal 32(%edi),%edi movdqa %xmm6,%xmm1 - jmp L050xts_enc_done + jmp L052xts_enc_done .align 4,0x90 -L048xts_enc_three: +L050xts_enc_three: movaps %xmm1,%xmm7 movups (%esi),%xmm2 movups 16(%esi),%xmm3 @@ -1190,9 +1240,9 @@ L048xts_enc_three: movups %xmm4,32(%edi) leal 48(%edi),%edi movdqa %xmm7,%xmm1 - jmp L050xts_enc_done + jmp L052xts_enc_done .align 4,0x90 -L049xts_enc_four: +L051xts_enc_four: movaps %xmm1,%xmm6 movups (%esi),%xmm2 movups 16(%esi),%xmm3 @@ -1214,28 +1264,28 @@ L049xts_enc_four: movups %xmm5,48(%edi) leal 64(%edi),%edi movdqa %xmm6,%xmm1 - jmp L050xts_enc_done + jmp L052xts_enc_done .align 4,0x90 -L045xts_enc_done6x: +L047xts_enc_done6x: movl 112(%esp),%eax andl $15,%eax - jz L052xts_enc_ret + jz L054xts_enc_ret movdqa %xmm1,%xmm5 movl %eax,112(%esp) - jmp L053xts_enc_steal + jmp L055xts_enc_steal .align 4,0x90 -L050xts_enc_done: +L052xts_enc_done: movl 112(%esp),%eax pxor %xmm0,%xmm0 andl $15,%eax - jz L052xts_enc_ret + jz L054xts_enc_ret pcmpgtd %xmm1,%xmm0 movl %eax,112(%esp) pshufd $19,%xmm0,%xmm5 paddq %xmm1,%xmm1 pand 96(%esp),%xmm5 pxor %xmm1,%xmm5 -L053xts_enc_steal: +L055xts_enc_steal: movzbl (%esi),%ecx movzbl -16(%edi),%edx leal 1(%esi),%esi @@ -1243,7 +1293,7 @@ L053xts_enc_steal: movb %dl,(%edi) leal 1(%edi),%edi subl $1,%eax - jnz L053xts_enc_steal + jnz L055xts_enc_steal subl 112(%esp),%edi movl %ebp,%edx movl %ebx,%ecx @@ -1253,16 +1303,16 @@ L053xts_enc_steal: movups 16(%edx),%xmm1 leal 32(%edx),%edx xorps %xmm0,%xmm2 -L054enc1_loop_10: +L056enc1_loop_10: .byte 102,15,56,220,209 decl %ecx movups (%edx),%xmm1 leal 16(%edx),%edx - jnz L054enc1_loop_10 + jnz L056enc1_loop_10 .byte 102,15,56,221,209 xorps %xmm5,%xmm2 movups %xmm2,-16(%edi) -L052xts_enc_ret: +L054xts_enc_ret: movl 116(%esp),%esp popl %edi popl %esi @@ -1285,12 +1335,12 @@ L_aesni_xts_decrypt_begin: movups 16(%edx),%xmm1 leal 32(%edx),%edx xorps %xmm0,%xmm2 -L055enc1_loop_11: +L057enc1_loop_11: .byte 102,15,56,220,209 decl %ecx movups (%edx),%xmm1 leal 16(%edx),%edx - jnz L055enc1_loop_11 + jnz L057enc1_loop_11 .byte 102,15,56,221,209 movl 20(%esp),%esi movl 24(%esp),%edi @@ -1319,12 +1369,14 @@ L055enc1_loop_11: pcmpgtd %xmm1,%xmm0 andl $-16,%eax subl $96,%eax - jc L056xts_dec_short - shrl $1,%ecx - movl %ecx,%ebx - jmp L057xts_dec_loop6 + jc L058xts_dec_short + shll $4,%ecx + movl $16,%ebx + subl %ecx,%ebx + leal 32(%edx,%ecx,1),%edx + jmp L059xts_dec_loop6 .align 4,0x90 -L057xts_dec_loop6: +L059xts_dec_loop6: pshufd $19,%xmm0,%xmm2 pxor %xmm0,%xmm0 movdqa %xmm1,(%esp) @@ -1360,6 +1412,7 @@ L057xts_dec_loop6: pand %xmm3,%xmm7 movups (%esi),%xmm2 pxor %xmm1,%xmm7 + movl %ebx,%ecx movdqu 16(%esi),%xmm3 xorps %xmm0,%xmm2 movdqu 32(%esi),%xmm4 @@ -1375,19 +1428,17 @@ L057xts_dec_loop6: movdqa %xmm7,80(%esp) pxor %xmm1,%xmm7 movups 16(%ebp),%xmm1 - leal 32(%ebp),%edx pxor 16(%esp),%xmm3 -.byte 102,15,56,222,209 pxor 32(%esp),%xmm4 -.byte 102,15,56,222,217 +.byte 102,15,56,222,209 pxor 48(%esp),%xmm5 - decl %ecx -.byte 102,15,56,222,225 pxor 64(%esp),%xmm6 -.byte 102,15,56,222,233 +.byte 102,15,56,222,217 pxor %xmm0,%xmm7 + movups 32(%ebp),%xmm0 +.byte 102,15,56,222,225 +.byte 102,15,56,222,233 .byte 102,15,56,222,241 - movups (%edx),%xmm0 .byte 102,15,56,222,249 call L_aesni_decrypt6_enter movdqa 80(%esp),%xmm1 @@ -1412,26 +1463,25 @@ L057xts_dec_loop6: paddq %xmm1,%xmm1 pand %xmm3,%xmm2 pcmpgtd %xmm1,%xmm0 - movl %ebx,%ecx pxor %xmm2,%xmm1 subl $96,%eax - jnc L057xts_dec_loop6 - leal 1(,%ecx,2),%ecx + jnc L059xts_dec_loop6 + movl 240(%ebp),%ecx movl %ebp,%edx movl %ecx,%ebx -L056xts_dec_short: +L058xts_dec_short: addl $96,%eax - jz L058xts_dec_done6x + jz L060xts_dec_done6x movdqa %xmm1,%xmm5 cmpl $32,%eax - jb L059xts_dec_one + jb L061xts_dec_one pshufd $19,%xmm0,%xmm2 pxor %xmm0,%xmm0 paddq %xmm1,%xmm1 pand %xmm3,%xmm2 pcmpgtd %xmm1,%xmm0 pxor %xmm2,%xmm1 - je L060xts_dec_two + je L062xts_dec_two pshufd $19,%xmm0,%xmm2 pxor %xmm0,%xmm0 movdqa %xmm1,%xmm6 @@ -1440,7 +1490,7 @@ L056xts_dec_short: pcmpgtd %xmm1,%xmm0 pxor %xmm2,%xmm1 cmpl $64,%eax - jb L061xts_dec_three + jb L063xts_dec_three pshufd $19,%xmm0,%xmm2 pxor %xmm0,%xmm0 movdqa %xmm1,%xmm7 @@ -1450,7 +1500,7 @@ L056xts_dec_short: pxor %xmm2,%xmm1 movdqa %xmm5,(%esp) movdqa %xmm6,16(%esp) - je L062xts_dec_four + je L064xts_dec_four movdqa %xmm7,32(%esp) pshufd $19,%xmm0,%xmm7 movdqa %xmm1,48(%esp) @@ -1482,9 +1532,9 @@ L056xts_dec_short: movups %xmm5,48(%edi) movups %xmm6,64(%edi) leal 80(%edi),%edi - jmp L063xts_dec_done + jmp L065xts_dec_done .align 4,0x90 -L059xts_dec_one: +L061xts_dec_one: movups (%esi),%xmm2 leal 16(%esi),%esi xorps %xmm5,%xmm2 @@ -1492,36 +1542,36 @@ L059xts_dec_one: movups 16(%edx),%xmm1 leal 32(%edx),%edx xorps %xmm0,%xmm2 -L064dec1_loop_12: +L066dec1_loop_12: .byte 102,15,56,222,209 decl %ecx movups (%edx),%xmm1 leal 16(%edx),%edx - jnz L064dec1_loop_12 + jnz L066dec1_loop_12 .byte 102,15,56,223,209 xorps %xmm5,%xmm2 movups %xmm2,(%edi) leal 16(%edi),%edi movdqa %xmm5,%xmm1 - jmp L063xts_dec_done + jmp L065xts_dec_done .align 4,0x90 -L060xts_dec_two: +L062xts_dec_two: movaps %xmm1,%xmm6 movups (%esi),%xmm2 movups 16(%esi),%xmm3 leal 32(%esi),%esi xorps %xmm5,%xmm2 xorps %xmm6,%xmm3 - call __aesni_decrypt3 + call __aesni_decrypt2 xorps %xmm5,%xmm2 xorps %xmm6,%xmm3 movups %xmm2,(%edi) movups %xmm3,16(%edi) leal 32(%edi),%edi movdqa %xmm6,%xmm1 - jmp L063xts_dec_done + jmp L065xts_dec_done .align 4,0x90 -L061xts_dec_three: +L063xts_dec_three: movaps %xmm1,%xmm7 movups (%esi),%xmm2 movups 16(%esi),%xmm3 @@ -1539,9 +1589,9 @@ L061xts_dec_three: movups %xmm4,32(%edi) leal 48(%edi),%edi movdqa %xmm7,%xmm1 - jmp L063xts_dec_done + jmp L065xts_dec_done .align 4,0x90 -L062xts_dec_four: +L064xts_dec_four: movaps %xmm1,%xmm6 movups (%esi),%xmm2 movups 16(%esi),%xmm3 @@ -1563,20 +1613,20 @@ L062xts_dec_four: movups %xmm5,48(%edi) leal 64(%edi),%edi movdqa %xmm6,%xmm1 - jmp L063xts_dec_done + jmp L065xts_dec_done .align 4,0x90 -L058xts_dec_done6x: +L060xts_dec_done6x: movl 112(%esp),%eax andl $15,%eax - jz L065xts_dec_ret + jz L067xts_dec_ret movl %eax,112(%esp) - jmp L066xts_dec_only_one_more + jmp L068xts_dec_only_one_more .align 4,0x90 -L063xts_dec_done: +L065xts_dec_done: movl 112(%esp),%eax pxor %xmm0,%xmm0 andl $15,%eax - jz L065xts_dec_ret + jz L067xts_dec_ret pcmpgtd %xmm1,%xmm0 movl %eax,112(%esp) pshufd $19,%xmm0,%xmm2 @@ -1586,7 +1636,7 @@ L063xts_dec_done: pand %xmm3,%xmm2 pcmpgtd %xmm1,%xmm0 pxor %xmm2,%xmm1 -L066xts_dec_only_one_more: +L068xts_dec_only_one_more: pshufd $19,%xmm0,%xmm5 movdqa %xmm1,%xmm6 paddq %xmm1,%xmm1 @@ -1600,16 +1650,16 @@ L066xts_dec_only_one_more: movups 16(%edx),%xmm1 leal 32(%edx),%edx xorps %xmm0,%xmm2 -L067dec1_loop_13: +L069dec1_loop_13: .byte 102,15,56,222,209 decl %ecx movups (%edx),%xmm1 leal 16(%edx),%edx - jnz L067dec1_loop_13 + jnz L069dec1_loop_13 .byte 102,15,56,223,209 xorps %xmm5,%xmm2 movups %xmm2,(%edi) -L068xts_dec_steal: +L070xts_dec_steal: movzbl 16(%esi),%ecx movzbl (%edi),%edx leal 1(%esi),%esi @@ -1617,7 +1667,7 @@ L068xts_dec_steal: movb %dl,16(%edi) leal 1(%edi),%edi subl $1,%eax - jnz L068xts_dec_steal + jnz L070xts_dec_steal subl 112(%esp),%edi movl %ebp,%edx movl %ebx,%ecx @@ -1627,16 +1677,16 @@ L068xts_dec_steal: movups 16(%edx),%xmm1 leal 32(%edx),%edx xorps %xmm0,%xmm2 -L069dec1_loop_14: +L071dec1_loop_14: .byte 102,15,56,222,209 decl %ecx movups (%edx),%xmm1 leal 16(%edx),%edx - jnz L069dec1_loop_14 + jnz L071dec1_loop_14 .byte 102,15,56,223,209 xorps %xmm6,%xmm2 movups %xmm2,(%edi) -L065xts_dec_ret: +L067xts_dec_ret: movl 116(%esp),%esp popl %edi popl %esi @@ -1660,7 +1710,7 @@ L_aesni_cbc_encrypt_begin: movl 32(%esp),%edx movl 36(%esp),%ebp testl %eax,%eax - jz L070cbc_abort + jz L072cbc_abort cmpl $0,40(%esp) xchgl %esp,%ebx movups (%ebp),%xmm7 @@ -1668,14 +1718,14 @@ L_aesni_cbc_encrypt_begin: movl %edx,%ebp movl %ebx,16(%esp) movl %ecx,%ebx - je L071cbc_decrypt + je L073cbc_decrypt movaps %xmm7,%xmm2 cmpl $16,%eax - jb L072cbc_enc_tail + jb L074cbc_enc_tail subl $16,%eax - jmp L073cbc_enc_loop + jmp L075cbc_enc_loop .align 4,0x90 -L073cbc_enc_loop: +L075cbc_enc_loop: movups (%esi),%xmm7 leal 16(%esi),%esi movups (%edx),%xmm0 @@ -1683,24 +1733,24 @@ L073cbc_enc_loop: xorps %xmm0,%xmm7 leal 32(%edx),%edx xorps %xmm7,%xmm2 -L074enc1_loop_15: +L076enc1_loop_15: .byte 102,15,56,220,209 decl %ecx movups (%edx),%xmm1 leal 16(%edx),%edx - jnz L074enc1_loop_15 + jnz L076enc1_loop_15 .byte 102,15,56,221,209 movl %ebx,%ecx movl %ebp,%edx movups %xmm2,(%edi) leal 16(%edi),%edi subl $16,%eax - jnc L073cbc_enc_loop + jnc L075cbc_enc_loop addl $16,%eax - jnz L072cbc_enc_tail + jnz L074cbc_enc_tail movaps %xmm2,%xmm7 - jmp L075cbc_ret -L072cbc_enc_tail: + jmp L077cbc_ret +L074cbc_enc_tail: movl %eax,%ecx .long 2767451785 movl $16,%ecx @@ -1711,20 +1761,20 @@ L072cbc_enc_tail: movl %ebx,%ecx movl %edi,%esi movl %ebp,%edx - jmp L073cbc_enc_loop + jmp L075cbc_enc_loop .align 4,0x90 -L071cbc_decrypt: +L073cbc_decrypt: cmpl $80,%eax - jbe L076cbc_dec_tail + jbe L078cbc_dec_tail movaps %xmm7,(%esp) subl $80,%eax - jmp L077cbc_dec_loop6_enter + jmp L079cbc_dec_loop6_enter .align 4,0x90 -L078cbc_dec_loop6: +L080cbc_dec_loop6: movaps %xmm0,(%esp) movups %xmm7,(%edi) leal 16(%edi),%edi -L077cbc_dec_loop6_enter: +L079cbc_dec_loop6_enter: movdqu (%esi),%xmm2 movdqu 16(%esi),%xmm3 movdqu 32(%esi),%xmm4 @@ -1754,28 +1804,28 @@ L077cbc_dec_loop6_enter: movups %xmm6,64(%edi) leal 80(%edi),%edi subl $96,%eax - ja L078cbc_dec_loop6 + ja L080cbc_dec_loop6 movaps %xmm7,%xmm2 movaps %xmm0,%xmm7 addl $80,%eax - jle L079cbc_dec_tail_collected + jle L081cbc_dec_tail_collected movups %xmm2,(%edi) leal 16(%edi),%edi -L076cbc_dec_tail: +L078cbc_dec_tail: movups (%esi),%xmm2 movaps %xmm2,%xmm6 cmpl $16,%eax - jbe L080cbc_dec_one + jbe L082cbc_dec_one movups 16(%esi),%xmm3 movaps %xmm3,%xmm5 cmpl $32,%eax - jbe L081cbc_dec_two + jbe L083cbc_dec_two movups 32(%esi),%xmm4 cmpl $48,%eax - jbe L082cbc_dec_three + jbe L084cbc_dec_three movups 48(%esi),%xmm5 cmpl $64,%eax - jbe L083cbc_dec_four + jbe L085cbc_dec_four movups 64(%esi),%xmm6 movaps %xmm7,(%esp) movups (%esi),%xmm2 @@ -1798,28 +1848,27 @@ L076cbc_dec_tail: leal 64(%edi),%edi movaps %xmm6,%xmm2 subl $80,%eax - jmp L079cbc_dec_tail_collected + jmp L081cbc_dec_tail_collected .align 4,0x90 -L080cbc_dec_one: +L082cbc_dec_one: movups (%edx),%xmm0 movups 16(%edx),%xmm1 leal 32(%edx),%edx xorps %xmm0,%xmm2 -L084dec1_loop_16: +L086dec1_loop_16: .byte 102,15,56,222,209 decl %ecx movups (%edx),%xmm1 leal 16(%edx),%edx - jnz L084dec1_loop_16 + jnz L086dec1_loop_16 .byte 102,15,56,223,209 xorps %xmm7,%xmm2 movaps %xmm6,%xmm7 subl $16,%eax - jmp L079cbc_dec_tail_collected + jmp L081cbc_dec_tail_collected .align 4,0x90 -L081cbc_dec_two: - xorps %xmm4,%xmm4 - call __aesni_decrypt3 +L083cbc_dec_two: + call __aesni_decrypt2 xorps %xmm7,%xmm2 xorps %xmm6,%xmm3 movups %xmm2,(%edi) @@ -1827,9 +1876,9 @@ L081cbc_dec_two: leal 16(%edi),%edi movaps %xmm5,%xmm7 subl $32,%eax - jmp L079cbc_dec_tail_collected + jmp L081cbc_dec_tail_collected .align 4,0x90 -L082cbc_dec_three: +L084cbc_dec_three: call __aesni_decrypt3 xorps %xmm7,%xmm2 xorps %xmm6,%xmm3 @@ -1840,9 +1889,9 @@ L082cbc_dec_three: leal 32(%edi),%edi movups 32(%esi),%xmm7 subl $48,%eax - jmp L079cbc_dec_tail_collected + jmp L081cbc_dec_tail_collected .align 4,0x90 -L083cbc_dec_four: +L085cbc_dec_four: call __aesni_decrypt4 movups 16(%esi),%xmm1 movups 32(%esi),%xmm0 @@ -1857,23 +1906,23 @@ L083cbc_dec_four: leal 48(%edi),%edi movaps %xmm5,%xmm2 subl $64,%eax -L079cbc_dec_tail_collected: +L081cbc_dec_tail_collected: andl $15,%eax - jnz L085cbc_dec_tail_partial + jnz L087cbc_dec_tail_partial movups %xmm2,(%edi) - jmp L075cbc_ret + jmp L077cbc_ret .align 4,0x90 -L085cbc_dec_tail_partial: +L087cbc_dec_tail_partial: movaps %xmm2,(%esp) movl $16,%ecx movl %esp,%esi subl %eax,%ecx .long 2767451785 -L075cbc_ret: +L077cbc_ret: movl 16(%esp),%esp movl 36(%esp),%ebp movups %xmm7,(%ebp) -L070cbc_abort: +L072cbc_abort: popl %edi popl %esi popl %ebx @@ -1882,51 +1931,51 @@ L070cbc_abort: .align 4 __aesni_set_encrypt_key: testl %eax,%eax - jz L086bad_pointer + jz L088bad_pointer testl %edx,%edx - jz L086bad_pointer + jz L088bad_pointer movups (%eax),%xmm0 xorps %xmm4,%xmm4 leal 16(%edx),%edx cmpl $256,%ecx - je L08714rounds + je L08914rounds cmpl $192,%ecx - je L08812rounds + je L09012rounds cmpl $128,%ecx - jne L089bad_keybits + jne L091bad_keybits .align 4,0x90 -L09010rounds: +L09210rounds: movl $9,%ecx movups %xmm0,-16(%edx) .byte 102,15,58,223,200,1 - call L091key_128_cold + call L093key_128_cold .byte 102,15,58,223,200,2 - call L092key_128 + call L094key_128 .byte 102,15,58,223,200,4 - call L092key_128 + call L094key_128 .byte 102,15,58,223,200,8 - call L092key_128 + call L094key_128 .byte 102,15,58,223,200,16 - call L092key_128 + call L094key_128 .byte 102,15,58,223,200,32 - call L092key_128 + call L094key_128 .byte 102,15,58,223,200,64 - call L092key_128 + call L094key_128 .byte 102,15,58,223,200,128 - call L092key_128 + call L094key_128 .byte 102,15,58,223,200,27 - call L092key_128 + call L094key_128 .byte 102,15,58,223,200,54 - call L092key_128 + call L094key_128 movups %xmm0,(%edx) movl %ecx,80(%edx) xorl %eax,%eax ret .align 4,0x90 -L092key_128: +L094key_128: movups %xmm0,(%edx) leal 16(%edx),%edx -L091key_128_cold: +L093key_128_cold: shufps $16,%xmm0,%xmm4 xorps %xmm4,%xmm0 shufps $140,%xmm0,%xmm4 @@ -1935,38 +1984,38 @@ L091key_128_cold: xorps %xmm1,%xmm0 ret .align 4,0x90 -L08812rounds: +L09012rounds: movq 16(%eax),%xmm2 movl $11,%ecx movups %xmm0,-16(%edx) .byte 102,15,58,223,202,1 - call L093key_192a_cold + call L095key_192a_cold .byte 102,15,58,223,202,2 - call L094key_192b + call L096key_192b .byte 102,15,58,223,202,4 - call L095key_192a + call L097key_192a .byte 102,15,58,223,202,8 - call L094key_192b + call L096key_192b .byte 102,15,58,223,202,16 - call L095key_192a + call L097key_192a .byte 102,15,58,223,202,32 - call L094key_192b + call L096key_192b .byte 102,15,58,223,202,64 - call L095key_192a + call L097key_192a .byte 102,15,58,223,202,128 - call L094key_192b + call L096key_192b movups %xmm0,(%edx) movl %ecx,48(%edx) xorl %eax,%eax ret .align 4,0x90 -L095key_192a: +L097key_192a: movups %xmm0,(%edx) leal 16(%edx),%edx .align 4,0x90 -L093key_192a_cold: +L095key_192a_cold: movaps %xmm2,%xmm5 -L096key_192b_warm: +L098key_192b_warm: shufps $16,%xmm0,%xmm4 movdqa %xmm2,%xmm3 xorps %xmm4,%xmm0 @@ -1980,56 +2029,56 @@ L096key_192b_warm: pxor %xmm3,%xmm2 ret .align 4,0x90 -L094key_192b: +L096key_192b: movaps %xmm0,%xmm3 shufps $68,%xmm0,%xmm5 movups %xmm5,(%edx) shufps $78,%xmm2,%xmm3 movups %xmm3,16(%edx) leal 32(%edx),%edx - jmp L096key_192b_warm + jmp L098key_192b_warm .align 4,0x90 -L08714rounds: +L08914rounds: movups 16(%eax),%xmm2 movl $13,%ecx leal 16(%edx),%edx movups %xmm0,-32(%edx) movups %xmm2,-16(%edx) .byte 102,15,58,223,202,1 - call L097key_256a_cold + call L099key_256a_cold .byte 102,15,58,223,200,1 - call L098key_256b + call L100key_256b .byte 102,15,58,223,202,2 - call L099key_256a + call L101key_256a .byte 102,15,58,223,200,2 - call L098key_256b + call L100key_256b .byte 102,15,58,223,202,4 - call L099key_256a + call L101key_256a .byte 102,15,58,223,200,4 - call L098key_256b + call L100key_256b .byte 102,15,58,223,202,8 - call L099key_256a + call L101key_256a .byte 102,15,58,223,200,8 - call L098key_256b + call L100key_256b .byte 102,15,58,223,202,16 - call L099key_256a + call L101key_256a .byte 102,15,58,223,200,16 - call L098key_256b + call L100key_256b .byte 102,15,58,223,202,32 - call L099key_256a + call L101key_256a .byte 102,15,58,223,200,32 - call L098key_256b + call L100key_256b .byte 102,15,58,223,202,64 - call L099key_256a + call L101key_256a movups %xmm0,(%edx) movl %ecx,16(%edx) xorl %eax,%eax ret .align 4,0x90 -L099key_256a: +L101key_256a: movups %xmm2,(%edx) leal 16(%edx),%edx -L097key_256a_cold: +L099key_256a_cold: shufps $16,%xmm0,%xmm4 xorps %xmm4,%xmm0 shufps $140,%xmm0,%xmm4 @@ -2038,7 +2087,7 @@ L097key_256a_cold: xorps %xmm1,%xmm0 ret .align 4,0x90 -L098key_256b: +L100key_256b: movups %xmm0,(%edx) leal 16(%edx),%edx shufps $16,%xmm2,%xmm4 @@ -2049,11 +2098,11 @@ L098key_256b: xorps %xmm1,%xmm2 ret .align 2,0x90 -L086bad_pointer: +L088bad_pointer: movl $-1,%eax ret .align 2,0x90 -L089bad_keybits: +L091bad_keybits: movl $-2,%eax ret .globl _aesni_set_encrypt_key @@ -2076,7 +2125,7 @@ L_aesni_set_decrypt_key_begin: movl 12(%esp),%edx shll $4,%ecx testl %eax,%eax - jnz L100dec_key_ret + jnz L102dec_key_ret leal 16(%edx,%ecx,1),%eax movups (%edx),%xmm0 movups (%eax),%xmm1 @@ -2084,7 +2133,7 @@ L_aesni_set_decrypt_key_begin: movups %xmm1,(%edx) leal 16(%edx),%edx leal -16(%eax),%eax -L101dec_key_inverse: +L103dec_key_inverse: movups (%edx),%xmm0 movups (%eax),%xmm1 .byte 102,15,56,219,192 @@ -2094,12 +2143,12 @@ L101dec_key_inverse: movups %xmm0,16(%eax) movups %xmm1,-16(%edx) cmpl %edx,%eax - ja L101dec_key_inverse + ja L103dec_key_inverse movups (%edx),%xmm0 .byte 102,15,56,219,192 movups %xmm0,(%edx) xorl %eax,%eax -L100dec_key_ret: +L102dec_key_ret: ret .byte 65,69,83,32,102,111,114,32,73,110,116,101,108,32,65,69 .byte 83,45,78,73,44,32,67,82,89,80,84,79,71,65,77,83 diff --git a/deps/openssl/asm/x86-macosx-gas/aes/vpaes-x86.s b/deps/openssl/asm/x86-macosx-gas/aes/vpaes-x86.s index f6d164f66fca36..81f7af82388417 100644 --- a/deps/openssl/asm/x86-macosx-gas/aes/vpaes-x86.s +++ b/deps/openssl/asm/x86-macosx-gas/aes/vpaes-x86.s @@ -70,33 +70,33 @@ __vpaes_encrypt_core: movdqa %xmm6,%xmm1 movdqa (%ebp),%xmm2 pandn %xmm0,%xmm1 - movdqu (%edx),%xmm5 - psrld $4,%xmm1 pand %xmm6,%xmm0 + movdqu (%edx),%xmm5 .byte 102,15,56,0,208 movdqa 16(%ebp),%xmm0 -.byte 102,15,56,0,193 pxor %xmm5,%xmm2 - pxor %xmm2,%xmm0 + psrld $4,%xmm1 addl $16,%edx +.byte 102,15,56,0,193 leal 192(%ebp),%ebx + pxor %xmm2,%xmm0 jmp L000enc_entry .align 4,0x90 L001enc_loop: movdqa 32(%ebp),%xmm4 -.byte 102,15,56,0,226 - pxor %xmm5,%xmm4 movdqa 48(%ebp),%xmm0 +.byte 102,15,56,0,226 .byte 102,15,56,0,195 - pxor %xmm4,%xmm0 + pxor %xmm5,%xmm4 movdqa 64(%ebp),%xmm5 -.byte 102,15,56,0,234 + pxor %xmm4,%xmm0 movdqa -64(%ebx,%ecx,1),%xmm1 +.byte 102,15,56,0,234 movdqa 80(%ebp),%xmm2 -.byte 102,15,56,0,211 - pxor %xmm5,%xmm2 movdqa (%ebx,%ecx,1),%xmm4 +.byte 102,15,56,0,211 movdqa %xmm0,%xmm3 + pxor %xmm5,%xmm2 .byte 102,15,56,0,193 addl $16,%edx pxor %xmm2,%xmm0 @@ -105,28 +105,28 @@ L001enc_loop: pxor %xmm0,%xmm3 .byte 102,15,56,0,193 andl $48,%ecx - pxor %xmm3,%xmm0 subl $1,%eax + pxor %xmm3,%xmm0 L000enc_entry: movdqa %xmm6,%xmm1 + movdqa -32(%ebp),%xmm5 pandn %xmm0,%xmm1 psrld $4,%xmm1 pand %xmm6,%xmm0 - movdqa -32(%ebp),%xmm5 .byte 102,15,56,0,232 - pxor %xmm1,%xmm0 movdqa %xmm7,%xmm3 + pxor %xmm1,%xmm0 .byte 102,15,56,0,217 - pxor %xmm5,%xmm3 movdqa %xmm7,%xmm4 + pxor %xmm5,%xmm3 .byte 102,15,56,0,224 - pxor %xmm5,%xmm4 movdqa %xmm7,%xmm2 + pxor %xmm5,%xmm4 .byte 102,15,56,0,211 - pxor %xmm0,%xmm2 movdqa %xmm7,%xmm3 - movdqu (%edx),%xmm5 + pxor %xmm0,%xmm2 .byte 102,15,56,0,220 + movdqu (%edx),%xmm5 pxor %xmm1,%xmm3 jnz L001enc_loop movdqa 96(%ebp),%xmm4 @@ -140,8 +140,8 @@ L000enc_entry: ret .align 4 __vpaes_decrypt_core: - movl 240(%edx),%eax leal 608(%ebp),%ebx + movl 240(%edx),%eax movdqa %xmm6,%xmm1 movdqa -64(%ebx),%xmm2 pandn %xmm0,%xmm1 @@ -164,56 +164,56 @@ __vpaes_decrypt_core: .align 4,0x90 L003dec_loop: movdqa -32(%ebx),%xmm4 + movdqa -16(%ebx),%xmm1 .byte 102,15,56,0,226 - pxor %xmm0,%xmm4 - movdqa -16(%ebx),%xmm0 -.byte 102,15,56,0,195 +.byte 102,15,56,0,203 pxor %xmm4,%xmm0 - addl $16,%edx -.byte 102,15,56,0,197 movdqa (%ebx),%xmm4 + pxor %xmm1,%xmm0 + movdqa 16(%ebx),%xmm1 .byte 102,15,56,0,226 - pxor %xmm0,%xmm4 - movdqa 16(%ebx),%xmm0 -.byte 102,15,56,0,195 - pxor %xmm4,%xmm0 - subl $1,%eax .byte 102,15,56,0,197 +.byte 102,15,56,0,203 + pxor %xmm4,%xmm0 movdqa 32(%ebx),%xmm4 + pxor %xmm1,%xmm0 + movdqa 48(%ebx),%xmm1 .byte 102,15,56,0,226 - pxor %xmm0,%xmm4 - movdqa 48(%ebx),%xmm0 -.byte 102,15,56,0,195 - pxor %xmm4,%xmm0 .byte 102,15,56,0,197 +.byte 102,15,56,0,203 + pxor %xmm4,%xmm0 movdqa 64(%ebx),%xmm4 + pxor %xmm1,%xmm0 + movdqa 80(%ebx),%xmm1 .byte 102,15,56,0,226 - pxor %xmm0,%xmm4 - movdqa 80(%ebx),%xmm0 -.byte 102,15,56,0,195 +.byte 102,15,56,0,197 +.byte 102,15,56,0,203 pxor %xmm4,%xmm0 + addl $16,%edx .byte 102,15,58,15,237,12 + pxor %xmm1,%xmm0 + subl $1,%eax L002dec_entry: movdqa %xmm6,%xmm1 + movdqa -32(%ebp),%xmm2 pandn %xmm0,%xmm1 - psrld $4,%xmm1 pand %xmm6,%xmm0 - movdqa -32(%ebp),%xmm2 + psrld $4,%xmm1 .byte 102,15,56,0,208 - pxor %xmm1,%xmm0 movdqa %xmm7,%xmm3 + pxor %xmm1,%xmm0 .byte 102,15,56,0,217 - pxor %xmm2,%xmm3 movdqa %xmm7,%xmm4 + pxor %xmm2,%xmm3 .byte 102,15,56,0,224 pxor %xmm2,%xmm4 movdqa %xmm7,%xmm2 .byte 102,15,56,0,211 - pxor %xmm0,%xmm2 movdqa %xmm7,%xmm3 + pxor %xmm0,%xmm2 .byte 102,15,56,0,220 - pxor %xmm1,%xmm3 movdqu (%edx),%xmm0 + pxor %xmm1,%xmm3 jnz L003dec_loop movdqa 96(%ebx),%xmm4 .byte 102,15,56,0,226 @@ -318,12 +318,12 @@ L013schedule_mangle_last_dec: ret .align 4 __vpaes_schedule_192_smear: - pshufd $128,%xmm6,%xmm0 - pxor %xmm0,%xmm6 + pshufd $128,%xmm6,%xmm1 pshufd $254,%xmm7,%xmm0 + pxor %xmm1,%xmm6 + pxor %xmm1,%xmm1 pxor %xmm0,%xmm6 movdqa %xmm6,%xmm0 - pxor %xmm1,%xmm1 movhlps %xmm1,%xmm6 ret .align 4 diff --git a/deps/openssl/asm/x86-macosx-gas/bf/bf-686.s b/deps/openssl/asm/x86-macosx-gas/bf/bf-586.s similarity index 99% rename from deps/openssl/asm/x86-macosx-gas/bf/bf-686.s rename to deps/openssl/asm/x86-macosx-gas/bf/bf-586.s index 013d2dec8e4633..2b75536e78e668 100644 --- a/deps/openssl/asm/x86-macosx-gas/bf/bf-686.s +++ b/deps/openssl/asm/x86-macosx-gas/bf/bf-586.s @@ -10,20 +10,17 @@ L_BF_encrypt_begin: pushl %edi # Load the 2 words - movl 20(%esp),%eax movl (%eax),%ecx movl 4(%eax),%edx # P pointer, s and enc flag - movl 24(%esp),%edi xorl %eax,%eax xorl %ebx,%ebx xorl (%edi),%ecx # Round 0 - rorl $16,%ecx movl 4(%edi),%esi movb %ch,%al @@ -43,7 +40,6 @@ L_BF_encrypt_begin: xorl %esi,%edx # Round 1 - rorl $16,%edx movl 8(%edi),%esi movb %dh,%al @@ -63,7 +59,6 @@ L_BF_encrypt_begin: xorl %esi,%ecx # Round 2 - rorl $16,%ecx movl 12(%edi),%esi movb %ch,%al @@ -83,7 +78,6 @@ L_BF_encrypt_begin: xorl %esi,%edx # Round 3 - rorl $16,%edx movl 16(%edi),%esi movb %dh,%al @@ -103,7 +97,6 @@ L_BF_encrypt_begin: xorl %esi,%ecx # Round 4 - rorl $16,%ecx movl 20(%edi),%esi movb %ch,%al @@ -123,7 +116,6 @@ L_BF_encrypt_begin: xorl %esi,%edx # Round 5 - rorl $16,%edx movl 24(%edi),%esi movb %dh,%al @@ -143,7 +135,6 @@ L_BF_encrypt_begin: xorl %esi,%ecx # Round 6 - rorl $16,%ecx movl 28(%edi),%esi movb %ch,%al @@ -163,7 +154,6 @@ L_BF_encrypt_begin: xorl %esi,%edx # Round 7 - rorl $16,%edx movl 32(%edi),%esi movb %dh,%al @@ -183,7 +173,6 @@ L_BF_encrypt_begin: xorl %esi,%ecx # Round 8 - rorl $16,%ecx movl 36(%edi),%esi movb %ch,%al @@ -203,7 +192,6 @@ L_BF_encrypt_begin: xorl %esi,%edx # Round 9 - rorl $16,%edx movl 40(%edi),%esi movb %dh,%al @@ -223,7 +211,6 @@ L_BF_encrypt_begin: xorl %esi,%ecx # Round 10 - rorl $16,%ecx movl 44(%edi),%esi movb %ch,%al @@ -243,7 +230,6 @@ L_BF_encrypt_begin: xorl %esi,%edx # Round 11 - rorl $16,%edx movl 48(%edi),%esi movb %dh,%al @@ -263,7 +249,6 @@ L_BF_encrypt_begin: xorl %esi,%ecx # Round 12 - rorl $16,%ecx movl 52(%edi),%esi movb %ch,%al @@ -283,7 +268,6 @@ L_BF_encrypt_begin: xorl %esi,%edx # Round 13 - rorl $16,%edx movl 56(%edi),%esi movb %dh,%al @@ -303,7 +287,6 @@ L_BF_encrypt_begin: xorl %esi,%ecx # Round 14 - rorl $16,%ecx movl 60(%edi),%esi movb %ch,%al @@ -323,7 +306,6 @@ L_BF_encrypt_begin: xorl %esi,%edx # Round 15 - rorl $16,%edx movl 64(%edi),%esi movb %dh,%al @@ -360,20 +342,17 @@ L_BF_decrypt_begin: pushl %edi # Load the 2 words - movl 20(%esp),%eax movl (%eax),%ecx movl 4(%eax),%edx # P pointer, s and enc flag - movl 24(%esp),%edi xorl %eax,%eax xorl %ebx,%ebx xorl 68(%edi),%ecx # Round 16 - rorl $16,%ecx movl 64(%edi),%esi movb %ch,%al @@ -393,7 +372,6 @@ L_BF_decrypt_begin: xorl %esi,%edx # Round 15 - rorl $16,%edx movl 60(%edi),%esi movb %dh,%al @@ -413,7 +391,6 @@ L_BF_decrypt_begin: xorl %esi,%ecx # Round 14 - rorl $16,%ecx movl 56(%edi),%esi movb %ch,%al @@ -433,7 +410,6 @@ L_BF_decrypt_begin: xorl %esi,%edx # Round 13 - rorl $16,%edx movl 52(%edi),%esi movb %dh,%al @@ -453,7 +429,6 @@ L_BF_decrypt_begin: xorl %esi,%ecx # Round 12 - rorl $16,%ecx movl 48(%edi),%esi movb %ch,%al @@ -473,7 +448,6 @@ L_BF_decrypt_begin: xorl %esi,%edx # Round 11 - rorl $16,%edx movl 44(%edi),%esi movb %dh,%al @@ -493,7 +467,6 @@ L_BF_decrypt_begin: xorl %esi,%ecx # Round 10 - rorl $16,%ecx movl 40(%edi),%esi movb %ch,%al @@ -513,7 +486,6 @@ L_BF_decrypt_begin: xorl %esi,%edx # Round 9 - rorl $16,%edx movl 36(%edi),%esi movb %dh,%al @@ -533,7 +505,6 @@ L_BF_decrypt_begin: xorl %esi,%ecx # Round 8 - rorl $16,%ecx movl 32(%edi),%esi movb %ch,%al @@ -553,7 +524,6 @@ L_BF_decrypt_begin: xorl %esi,%edx # Round 7 - rorl $16,%edx movl 28(%edi),%esi movb %dh,%al @@ -573,7 +543,6 @@ L_BF_decrypt_begin: xorl %esi,%ecx # Round 6 - rorl $16,%ecx movl 24(%edi),%esi movb %ch,%al @@ -593,7 +562,6 @@ L_BF_decrypt_begin: xorl %esi,%edx # Round 5 - rorl $16,%edx movl 20(%edi),%esi movb %dh,%al @@ -613,7 +581,6 @@ L_BF_decrypt_begin: xorl %esi,%ecx # Round 4 - rorl $16,%ecx movl 16(%edi),%esi movb %ch,%al @@ -633,7 +600,6 @@ L_BF_decrypt_begin: xorl %esi,%edx # Round 3 - rorl $16,%edx movl 12(%edi),%esi movb %dh,%al @@ -653,7 +619,6 @@ L_BF_decrypt_begin: xorl %esi,%ecx # Round 2 - rorl $16,%ecx movl 8(%edi),%esi movb %ch,%al @@ -673,7 +638,6 @@ L_BF_decrypt_begin: xorl %esi,%edx # Round 1 - rorl $16,%edx movl 4(%edi),%esi movb %dh,%al @@ -711,7 +675,6 @@ L_BF_cbc_encrypt_begin: pushl %edi movl 28(%esp),%ebp # getting iv ptr from parameter 4 - movl 36(%esp),%ebx movl (%ebx),%esi movl 4(%ebx),%edi @@ -723,10 +686,8 @@ L_BF_cbc_encrypt_begin: movl 36(%esp),%esi movl 40(%esp),%edi # getting encrypt flag from parameter 5 - movl 56(%esp),%ecx # get and push parameter 3 - movl 48(%esp),%eax pushl %eax pushl %ebx diff --git a/deps/openssl/asm/x86-macosx-gas/bn/bn-586.s b/deps/openssl/asm/x86-macosx-gas/bn/bn-586.s new file mode 100644 index 00000000000000..777121eae88f3f --- /dev/null +++ b/deps/openssl/asm/x86-macosx-gas/bn/bn-586.s @@ -0,0 +1,1520 @@ +.file "../openssl/crypto/bn/asm/bn-586.s" +.text +.globl _bn_mul_add_words +.align 4 +_bn_mul_add_words: +L_bn_mul_add_words_begin: + call L000PIC_me_up +L000PIC_me_up: + popl %eax + movl L_OPENSSL_ia32cap_P$non_lazy_ptr-L000PIC_me_up(%eax),%eax + btl $26,(%eax) + jnc L001maw_non_sse2 + movl 4(%esp),%eax + movl 8(%esp),%edx + movl 12(%esp),%ecx + movd 16(%esp),%mm0 + pxor %mm1,%mm1 + jmp L002maw_sse2_entry +.align 4,0x90 +L003maw_sse2_unrolled: + movd (%eax),%mm3 + paddq %mm3,%mm1 + movd (%edx),%mm2 + pmuludq %mm0,%mm2 + movd 4(%edx),%mm4 + pmuludq %mm0,%mm4 + movd 8(%edx),%mm6 + pmuludq %mm0,%mm6 + movd 12(%edx),%mm7 + pmuludq %mm0,%mm7 + paddq %mm2,%mm1 + movd 4(%eax),%mm3 + paddq %mm4,%mm3 + movd 8(%eax),%mm5 + paddq %mm6,%mm5 + movd 12(%eax),%mm4 + paddq %mm4,%mm7 + movd %mm1,(%eax) + movd 16(%edx),%mm2 + pmuludq %mm0,%mm2 + psrlq $32,%mm1 + movd 20(%edx),%mm4 + pmuludq %mm0,%mm4 + paddq %mm3,%mm1 + movd 24(%edx),%mm6 + pmuludq %mm0,%mm6 + movd %mm1,4(%eax) + psrlq $32,%mm1 + movd 28(%edx),%mm3 + addl $32,%edx + pmuludq %mm0,%mm3 + paddq %mm5,%mm1 + movd 16(%eax),%mm5 + paddq %mm5,%mm2 + movd %mm1,8(%eax) + psrlq $32,%mm1 + paddq %mm7,%mm1 + movd 20(%eax),%mm5 + paddq %mm5,%mm4 + movd %mm1,12(%eax) + psrlq $32,%mm1 + paddq %mm2,%mm1 + movd 24(%eax),%mm5 + paddq %mm5,%mm6 + movd %mm1,16(%eax) + psrlq $32,%mm1 + paddq %mm4,%mm1 + movd 28(%eax),%mm5 + paddq %mm5,%mm3 + movd %mm1,20(%eax) + psrlq $32,%mm1 + paddq %mm6,%mm1 + movd %mm1,24(%eax) + psrlq $32,%mm1 + paddq %mm3,%mm1 + movd %mm1,28(%eax) + leal 32(%eax),%eax + psrlq $32,%mm1 + subl $8,%ecx + jz L004maw_sse2_exit +L002maw_sse2_entry: + testl $4294967288,%ecx + jnz L003maw_sse2_unrolled +.align 2,0x90 +L005maw_sse2_loop: + movd (%edx),%mm2 + movd (%eax),%mm3 + pmuludq %mm0,%mm2 + leal 4(%edx),%edx + paddq %mm3,%mm1 + paddq %mm2,%mm1 + movd %mm1,(%eax) + subl $1,%ecx + psrlq $32,%mm1 + leal 4(%eax),%eax + jnz L005maw_sse2_loop +L004maw_sse2_exit: + movd %mm1,%eax + emms + ret +.align 4,0x90 +L001maw_non_sse2: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + + xorl %esi,%esi + movl 20(%esp),%edi + movl 28(%esp),%ecx + movl 24(%esp),%ebx + andl $4294967288,%ecx + movl 32(%esp),%ebp + pushl %ecx + jz L006maw_finish +.align 4,0x90 +L007maw_loop: + # Round 0 + movl (%ebx),%eax + mull %ebp + addl %esi,%eax + adcl $0,%edx + addl (%edi),%eax + adcl $0,%edx + movl %eax,(%edi) + movl %edx,%esi + # Round 4 + movl 4(%ebx),%eax + mull %ebp + addl %esi,%eax + adcl $0,%edx + addl 4(%edi),%eax + adcl $0,%edx + movl %eax,4(%edi) + movl %edx,%esi + # Round 8 + movl 8(%ebx),%eax + mull %ebp + addl %esi,%eax + adcl $0,%edx + addl 8(%edi),%eax + adcl $0,%edx + movl %eax,8(%edi) + movl %edx,%esi + # Round 12 + movl 12(%ebx),%eax + mull %ebp + addl %esi,%eax + adcl $0,%edx + addl 12(%edi),%eax + adcl $0,%edx + movl %eax,12(%edi) + movl %edx,%esi + # Round 16 + movl 16(%ebx),%eax + mull %ebp + addl %esi,%eax + adcl $0,%edx + addl 16(%edi),%eax + adcl $0,%edx + movl %eax,16(%edi) + movl %edx,%esi + # Round 20 + movl 20(%ebx),%eax + mull %ebp + addl %esi,%eax + adcl $0,%edx + addl 20(%edi),%eax + adcl $0,%edx + movl %eax,20(%edi) + movl %edx,%esi + # Round 24 + movl 24(%ebx),%eax + mull %ebp + addl %esi,%eax + adcl $0,%edx + addl 24(%edi),%eax + adcl $0,%edx + movl %eax,24(%edi) + movl %edx,%esi + # Round 28 + movl 28(%ebx),%eax + mull %ebp + addl %esi,%eax + adcl $0,%edx + addl 28(%edi),%eax + adcl $0,%edx + movl %eax,28(%edi) + movl %edx,%esi + + subl $8,%ecx + leal 32(%ebx),%ebx + leal 32(%edi),%edi + jnz L007maw_loop +L006maw_finish: + movl 32(%esp),%ecx + andl $7,%ecx + jnz L008maw_finish2 + jmp L009maw_end +L008maw_finish2: + # Tail Round 0 + movl (%ebx),%eax + mull %ebp + addl %esi,%eax + adcl $0,%edx + addl (%edi),%eax + adcl $0,%edx + decl %ecx + movl %eax,(%edi) + movl %edx,%esi + jz L009maw_end + # Tail Round 1 + movl 4(%ebx),%eax + mull %ebp + addl %esi,%eax + adcl $0,%edx + addl 4(%edi),%eax + adcl $0,%edx + decl %ecx + movl %eax,4(%edi) + movl %edx,%esi + jz L009maw_end + # Tail Round 2 + movl 8(%ebx),%eax + mull %ebp + addl %esi,%eax + adcl $0,%edx + addl 8(%edi),%eax + adcl $0,%edx + decl %ecx + movl %eax,8(%edi) + movl %edx,%esi + jz L009maw_end + # Tail Round 3 + movl 12(%ebx),%eax + mull %ebp + addl %esi,%eax + adcl $0,%edx + addl 12(%edi),%eax + adcl $0,%edx + decl %ecx + movl %eax,12(%edi) + movl %edx,%esi + jz L009maw_end + # Tail Round 4 + movl 16(%ebx),%eax + mull %ebp + addl %esi,%eax + adcl $0,%edx + addl 16(%edi),%eax + adcl $0,%edx + decl %ecx + movl %eax,16(%edi) + movl %edx,%esi + jz L009maw_end + # Tail Round 5 + movl 20(%ebx),%eax + mull %ebp + addl %esi,%eax + adcl $0,%edx + addl 20(%edi),%eax + adcl $0,%edx + decl %ecx + movl %eax,20(%edi) + movl %edx,%esi + jz L009maw_end + # Tail Round 6 + movl 24(%ebx),%eax + mull %ebp + addl %esi,%eax + adcl $0,%edx + addl 24(%edi),%eax + adcl $0,%edx + movl %eax,24(%edi) + movl %edx,%esi +L009maw_end: + movl %esi,%eax + popl %ecx + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.globl _bn_mul_words +.align 4 +_bn_mul_words: +L_bn_mul_words_begin: + call L010PIC_me_up +L010PIC_me_up: + popl %eax + movl L_OPENSSL_ia32cap_P$non_lazy_ptr-L010PIC_me_up(%eax),%eax + btl $26,(%eax) + jnc L011mw_non_sse2 + movl 4(%esp),%eax + movl 8(%esp),%edx + movl 12(%esp),%ecx + movd 16(%esp),%mm0 + pxor %mm1,%mm1 +.align 4,0x90 +L012mw_sse2_loop: + movd (%edx),%mm2 + pmuludq %mm0,%mm2 + leal 4(%edx),%edx + paddq %mm2,%mm1 + movd %mm1,(%eax) + subl $1,%ecx + psrlq $32,%mm1 + leal 4(%eax),%eax + jnz L012mw_sse2_loop + movd %mm1,%eax + emms + ret +.align 4,0x90 +L011mw_non_sse2: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + + xorl %esi,%esi + movl 20(%esp),%edi + movl 24(%esp),%ebx + movl 28(%esp),%ebp + movl 32(%esp),%ecx + andl $4294967288,%ebp + jz L013mw_finish +L014mw_loop: + # Round 0 + movl (%ebx),%eax + mull %ecx + addl %esi,%eax + adcl $0,%edx + movl %eax,(%edi) + movl %edx,%esi + # Round 4 + movl 4(%ebx),%eax + mull %ecx + addl %esi,%eax + adcl $0,%edx + movl %eax,4(%edi) + movl %edx,%esi + # Round 8 + movl 8(%ebx),%eax + mull %ecx + addl %esi,%eax + adcl $0,%edx + movl %eax,8(%edi) + movl %edx,%esi + # Round 12 + movl 12(%ebx),%eax + mull %ecx + addl %esi,%eax + adcl $0,%edx + movl %eax,12(%edi) + movl %edx,%esi + # Round 16 + movl 16(%ebx),%eax + mull %ecx + addl %esi,%eax + adcl $0,%edx + movl %eax,16(%edi) + movl %edx,%esi + # Round 20 + movl 20(%ebx),%eax + mull %ecx + addl %esi,%eax + adcl $0,%edx + movl %eax,20(%edi) + movl %edx,%esi + # Round 24 + movl 24(%ebx),%eax + mull %ecx + addl %esi,%eax + adcl $0,%edx + movl %eax,24(%edi) + movl %edx,%esi + # Round 28 + movl 28(%ebx),%eax + mull %ecx + addl %esi,%eax + adcl $0,%edx + movl %eax,28(%edi) + movl %edx,%esi + + addl $32,%ebx + addl $32,%edi + subl $8,%ebp + jz L013mw_finish + jmp L014mw_loop +L013mw_finish: + movl 28(%esp),%ebp + andl $7,%ebp + jnz L015mw_finish2 + jmp L016mw_end +L015mw_finish2: + # Tail Round 0 + movl (%ebx),%eax + mull %ecx + addl %esi,%eax + adcl $0,%edx + movl %eax,(%edi) + movl %edx,%esi + decl %ebp + jz L016mw_end + # Tail Round 1 + movl 4(%ebx),%eax + mull %ecx + addl %esi,%eax + adcl $0,%edx + movl %eax,4(%edi) + movl %edx,%esi + decl %ebp + jz L016mw_end + # Tail Round 2 + movl 8(%ebx),%eax + mull %ecx + addl %esi,%eax + adcl $0,%edx + movl %eax,8(%edi) + movl %edx,%esi + decl %ebp + jz L016mw_end + # Tail Round 3 + movl 12(%ebx),%eax + mull %ecx + addl %esi,%eax + adcl $0,%edx + movl %eax,12(%edi) + movl %edx,%esi + decl %ebp + jz L016mw_end + # Tail Round 4 + movl 16(%ebx),%eax + mull %ecx + addl %esi,%eax + adcl $0,%edx + movl %eax,16(%edi) + movl %edx,%esi + decl %ebp + jz L016mw_end + # Tail Round 5 + movl 20(%ebx),%eax + mull %ecx + addl %esi,%eax + adcl $0,%edx + movl %eax,20(%edi) + movl %edx,%esi + decl %ebp + jz L016mw_end + # Tail Round 6 + movl 24(%ebx),%eax + mull %ecx + addl %esi,%eax + adcl $0,%edx + movl %eax,24(%edi) + movl %edx,%esi +L016mw_end: + movl %esi,%eax + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.globl _bn_sqr_words +.align 4 +_bn_sqr_words: +L_bn_sqr_words_begin: + call L017PIC_me_up +L017PIC_me_up: + popl %eax + movl L_OPENSSL_ia32cap_P$non_lazy_ptr-L017PIC_me_up(%eax),%eax + btl $26,(%eax) + jnc L018sqr_non_sse2 + movl 4(%esp),%eax + movl 8(%esp),%edx + movl 12(%esp),%ecx +.align 4,0x90 +L019sqr_sse2_loop: + movd (%edx),%mm0 + pmuludq %mm0,%mm0 + leal 4(%edx),%edx + movq %mm0,(%eax) + subl $1,%ecx + leal 8(%eax),%eax + jnz L019sqr_sse2_loop + emms + ret +.align 4,0x90 +L018sqr_non_sse2: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + + movl 20(%esp),%esi + movl 24(%esp),%edi + movl 28(%esp),%ebx + andl $4294967288,%ebx + jz L020sw_finish +L021sw_loop: + # Round 0 + movl (%edi),%eax + mull %eax + movl %eax,(%esi) + movl %edx,4(%esi) + # Round 4 + movl 4(%edi),%eax + mull %eax + movl %eax,8(%esi) + movl %edx,12(%esi) + # Round 8 + movl 8(%edi),%eax + mull %eax + movl %eax,16(%esi) + movl %edx,20(%esi) + # Round 12 + movl 12(%edi),%eax + mull %eax + movl %eax,24(%esi) + movl %edx,28(%esi) + # Round 16 + movl 16(%edi),%eax + mull %eax + movl %eax,32(%esi) + movl %edx,36(%esi) + # Round 20 + movl 20(%edi),%eax + mull %eax + movl %eax,40(%esi) + movl %edx,44(%esi) + # Round 24 + movl 24(%edi),%eax + mull %eax + movl %eax,48(%esi) + movl %edx,52(%esi) + # Round 28 + movl 28(%edi),%eax + mull %eax + movl %eax,56(%esi) + movl %edx,60(%esi) + + addl $32,%edi + addl $64,%esi + subl $8,%ebx + jnz L021sw_loop +L020sw_finish: + movl 28(%esp),%ebx + andl $7,%ebx + jz L022sw_end + # Tail Round 0 + movl (%edi),%eax + mull %eax + movl %eax,(%esi) + decl %ebx + movl %edx,4(%esi) + jz L022sw_end + # Tail Round 1 + movl 4(%edi),%eax + mull %eax + movl %eax,8(%esi) + decl %ebx + movl %edx,12(%esi) + jz L022sw_end + # Tail Round 2 + movl 8(%edi),%eax + mull %eax + movl %eax,16(%esi) + decl %ebx + movl %edx,20(%esi) + jz L022sw_end + # Tail Round 3 + movl 12(%edi),%eax + mull %eax + movl %eax,24(%esi) + decl %ebx + movl %edx,28(%esi) + jz L022sw_end + # Tail Round 4 + movl 16(%edi),%eax + mull %eax + movl %eax,32(%esi) + decl %ebx + movl %edx,36(%esi) + jz L022sw_end + # Tail Round 5 + movl 20(%edi),%eax + mull %eax + movl %eax,40(%esi) + decl %ebx + movl %edx,44(%esi) + jz L022sw_end + # Tail Round 6 + movl 24(%edi),%eax + mull %eax + movl %eax,48(%esi) + movl %edx,52(%esi) +L022sw_end: + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.globl _bn_div_words +.align 4 +_bn_div_words: +L_bn_div_words_begin: + movl 4(%esp),%edx + movl 8(%esp),%eax + movl 12(%esp),%ecx + divl %ecx + ret +.globl _bn_add_words +.align 4 +_bn_add_words: +L_bn_add_words_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + + movl 20(%esp),%ebx + movl 24(%esp),%esi + movl 28(%esp),%edi + movl 32(%esp),%ebp + xorl %eax,%eax + andl $4294967288,%ebp + jz L023aw_finish +L024aw_loop: + # Round 0 + movl (%esi),%ecx + movl (%edi),%edx + addl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + addl %edx,%ecx + adcl $0,%eax + movl %ecx,(%ebx) + # Round 1 + movl 4(%esi),%ecx + movl 4(%edi),%edx + addl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + addl %edx,%ecx + adcl $0,%eax + movl %ecx,4(%ebx) + # Round 2 + movl 8(%esi),%ecx + movl 8(%edi),%edx + addl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + addl %edx,%ecx + adcl $0,%eax + movl %ecx,8(%ebx) + # Round 3 + movl 12(%esi),%ecx + movl 12(%edi),%edx + addl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + addl %edx,%ecx + adcl $0,%eax + movl %ecx,12(%ebx) + # Round 4 + movl 16(%esi),%ecx + movl 16(%edi),%edx + addl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + addl %edx,%ecx + adcl $0,%eax + movl %ecx,16(%ebx) + # Round 5 + movl 20(%esi),%ecx + movl 20(%edi),%edx + addl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + addl %edx,%ecx + adcl $0,%eax + movl %ecx,20(%ebx) + # Round 6 + movl 24(%esi),%ecx + movl 24(%edi),%edx + addl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + addl %edx,%ecx + adcl $0,%eax + movl %ecx,24(%ebx) + # Round 7 + movl 28(%esi),%ecx + movl 28(%edi),%edx + addl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + addl %edx,%ecx + adcl $0,%eax + movl %ecx,28(%ebx) + + addl $32,%esi + addl $32,%edi + addl $32,%ebx + subl $8,%ebp + jnz L024aw_loop +L023aw_finish: + movl 32(%esp),%ebp + andl $7,%ebp + jz L025aw_end + # Tail Round 0 + movl (%esi),%ecx + movl (%edi),%edx + addl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + addl %edx,%ecx + adcl $0,%eax + decl %ebp + movl %ecx,(%ebx) + jz L025aw_end + # Tail Round 1 + movl 4(%esi),%ecx + movl 4(%edi),%edx + addl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + addl %edx,%ecx + adcl $0,%eax + decl %ebp + movl %ecx,4(%ebx) + jz L025aw_end + # Tail Round 2 + movl 8(%esi),%ecx + movl 8(%edi),%edx + addl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + addl %edx,%ecx + adcl $0,%eax + decl %ebp + movl %ecx,8(%ebx) + jz L025aw_end + # Tail Round 3 + movl 12(%esi),%ecx + movl 12(%edi),%edx + addl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + addl %edx,%ecx + adcl $0,%eax + decl %ebp + movl %ecx,12(%ebx) + jz L025aw_end + # Tail Round 4 + movl 16(%esi),%ecx + movl 16(%edi),%edx + addl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + addl %edx,%ecx + adcl $0,%eax + decl %ebp + movl %ecx,16(%ebx) + jz L025aw_end + # Tail Round 5 + movl 20(%esi),%ecx + movl 20(%edi),%edx + addl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + addl %edx,%ecx + adcl $0,%eax + decl %ebp + movl %ecx,20(%ebx) + jz L025aw_end + # Tail Round 6 + movl 24(%esi),%ecx + movl 24(%edi),%edx + addl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + addl %edx,%ecx + adcl $0,%eax + movl %ecx,24(%ebx) +L025aw_end: + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.globl _bn_sub_words +.align 4 +_bn_sub_words: +L_bn_sub_words_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + + movl 20(%esp),%ebx + movl 24(%esp),%esi + movl 28(%esp),%edi + movl 32(%esp),%ebp + xorl %eax,%eax + andl $4294967288,%ebp + jz L026aw_finish +L027aw_loop: + # Round 0 + movl (%esi),%ecx + movl (%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,(%ebx) + # Round 1 + movl 4(%esi),%ecx + movl 4(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,4(%ebx) + # Round 2 + movl 8(%esi),%ecx + movl 8(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,8(%ebx) + # Round 3 + movl 12(%esi),%ecx + movl 12(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,12(%ebx) + # Round 4 + movl 16(%esi),%ecx + movl 16(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,16(%ebx) + # Round 5 + movl 20(%esi),%ecx + movl 20(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,20(%ebx) + # Round 6 + movl 24(%esi),%ecx + movl 24(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,24(%ebx) + # Round 7 + movl 28(%esi),%ecx + movl 28(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,28(%ebx) + + addl $32,%esi + addl $32,%edi + addl $32,%ebx + subl $8,%ebp + jnz L027aw_loop +L026aw_finish: + movl 32(%esp),%ebp + andl $7,%ebp + jz L028aw_end + # Tail Round 0 + movl (%esi),%ecx + movl (%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + decl %ebp + movl %ecx,(%ebx) + jz L028aw_end + # Tail Round 1 + movl 4(%esi),%ecx + movl 4(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + decl %ebp + movl %ecx,4(%ebx) + jz L028aw_end + # Tail Round 2 + movl 8(%esi),%ecx + movl 8(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + decl %ebp + movl %ecx,8(%ebx) + jz L028aw_end + # Tail Round 3 + movl 12(%esi),%ecx + movl 12(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + decl %ebp + movl %ecx,12(%ebx) + jz L028aw_end + # Tail Round 4 + movl 16(%esi),%ecx + movl 16(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + decl %ebp + movl %ecx,16(%ebx) + jz L028aw_end + # Tail Round 5 + movl 20(%esi),%ecx + movl 20(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + decl %ebp + movl %ecx,20(%ebx) + jz L028aw_end + # Tail Round 6 + movl 24(%esi),%ecx + movl 24(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,24(%ebx) +L028aw_end: + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.globl _bn_sub_part_words +.align 4 +_bn_sub_part_words: +L_bn_sub_part_words_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + + movl 20(%esp),%ebx + movl 24(%esp),%esi + movl 28(%esp),%edi + movl 32(%esp),%ebp + xorl %eax,%eax + andl $4294967288,%ebp + jz L029aw_finish +L030aw_loop: + # Round 0 + movl (%esi),%ecx + movl (%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,(%ebx) + # Round 1 + movl 4(%esi),%ecx + movl 4(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,4(%ebx) + # Round 2 + movl 8(%esi),%ecx + movl 8(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,8(%ebx) + # Round 3 + movl 12(%esi),%ecx + movl 12(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,12(%ebx) + # Round 4 + movl 16(%esi),%ecx + movl 16(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,16(%ebx) + # Round 5 + movl 20(%esi),%ecx + movl 20(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,20(%ebx) + # Round 6 + movl 24(%esi),%ecx + movl 24(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,24(%ebx) + # Round 7 + movl 28(%esi),%ecx + movl 28(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,28(%ebx) + + addl $32,%esi + addl $32,%edi + addl $32,%ebx + subl $8,%ebp + jnz L030aw_loop +L029aw_finish: + movl 32(%esp),%ebp + andl $7,%ebp + jz L031aw_end + # Tail Round 0 + movl (%esi),%ecx + movl (%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,(%ebx) + addl $4,%esi + addl $4,%edi + addl $4,%ebx + decl %ebp + jz L031aw_end + # Tail Round 1 + movl (%esi),%ecx + movl (%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,(%ebx) + addl $4,%esi + addl $4,%edi + addl $4,%ebx + decl %ebp + jz L031aw_end + # Tail Round 2 + movl (%esi),%ecx + movl (%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,(%ebx) + addl $4,%esi + addl $4,%edi + addl $4,%ebx + decl %ebp + jz L031aw_end + # Tail Round 3 + movl (%esi),%ecx + movl (%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,(%ebx) + addl $4,%esi + addl $4,%edi + addl $4,%ebx + decl %ebp + jz L031aw_end + # Tail Round 4 + movl (%esi),%ecx + movl (%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,(%ebx) + addl $4,%esi + addl $4,%edi + addl $4,%ebx + decl %ebp + jz L031aw_end + # Tail Round 5 + movl (%esi),%ecx + movl (%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,(%ebx) + addl $4,%esi + addl $4,%edi + addl $4,%ebx + decl %ebp + jz L031aw_end + # Tail Round 6 + movl (%esi),%ecx + movl (%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,(%ebx) + addl $4,%esi + addl $4,%edi + addl $4,%ebx +L031aw_end: + cmpl $0,36(%esp) + je L032pw_end + movl 36(%esp),%ebp + cmpl $0,%ebp + je L032pw_end + jge L033pw_pos + # pw_neg + movl $0,%edx + subl %ebp,%edx + movl %edx,%ebp + andl $4294967288,%ebp + jz L034pw_neg_finish +L035pw_neg_loop: + # dl<0 Round 0 + movl $0,%ecx + movl (%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,(%ebx) + # dl<0 Round 1 + movl $0,%ecx + movl 4(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,4(%ebx) + # dl<0 Round 2 + movl $0,%ecx + movl 8(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,8(%ebx) + # dl<0 Round 3 + movl $0,%ecx + movl 12(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,12(%ebx) + # dl<0 Round 4 + movl $0,%ecx + movl 16(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,16(%ebx) + # dl<0 Round 5 + movl $0,%ecx + movl 20(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,20(%ebx) + # dl<0 Round 6 + movl $0,%ecx + movl 24(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,24(%ebx) + # dl<0 Round 7 + movl $0,%ecx + movl 28(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,28(%ebx) + + addl $32,%edi + addl $32,%ebx + subl $8,%ebp + jnz L035pw_neg_loop +L034pw_neg_finish: + movl 36(%esp),%edx + movl $0,%ebp + subl %edx,%ebp + andl $7,%ebp + jz L032pw_end + # dl<0 Tail Round 0 + movl $0,%ecx + movl (%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + decl %ebp + movl %ecx,(%ebx) + jz L032pw_end + # dl<0 Tail Round 1 + movl $0,%ecx + movl 4(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + decl %ebp + movl %ecx,4(%ebx) + jz L032pw_end + # dl<0 Tail Round 2 + movl $0,%ecx + movl 8(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + decl %ebp + movl %ecx,8(%ebx) + jz L032pw_end + # dl<0 Tail Round 3 + movl $0,%ecx + movl 12(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + decl %ebp + movl %ecx,12(%ebx) + jz L032pw_end + # dl<0 Tail Round 4 + movl $0,%ecx + movl 16(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + decl %ebp + movl %ecx,16(%ebx) + jz L032pw_end + # dl<0 Tail Round 5 + movl $0,%ecx + movl 20(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + decl %ebp + movl %ecx,20(%ebx) + jz L032pw_end + # dl<0 Tail Round 6 + movl $0,%ecx + movl 24(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,24(%ebx) + jmp L032pw_end +L033pw_pos: + andl $4294967288,%ebp + jz L036pw_pos_finish +L037pw_pos_loop: + # dl>0 Round 0 + movl (%esi),%ecx + subl %eax,%ecx + movl %ecx,(%ebx) + jnc L038pw_nc0 + # dl>0 Round 1 + movl 4(%esi),%ecx + subl %eax,%ecx + movl %ecx,4(%ebx) + jnc L039pw_nc1 + # dl>0 Round 2 + movl 8(%esi),%ecx + subl %eax,%ecx + movl %ecx,8(%ebx) + jnc L040pw_nc2 + # dl>0 Round 3 + movl 12(%esi),%ecx + subl %eax,%ecx + movl %ecx,12(%ebx) + jnc L041pw_nc3 + # dl>0 Round 4 + movl 16(%esi),%ecx + subl %eax,%ecx + movl %ecx,16(%ebx) + jnc L042pw_nc4 + # dl>0 Round 5 + movl 20(%esi),%ecx + subl %eax,%ecx + movl %ecx,20(%ebx) + jnc L043pw_nc5 + # dl>0 Round 6 + movl 24(%esi),%ecx + subl %eax,%ecx + movl %ecx,24(%ebx) + jnc L044pw_nc6 + # dl>0 Round 7 + movl 28(%esi),%ecx + subl %eax,%ecx + movl %ecx,28(%ebx) + jnc L045pw_nc7 + + addl $32,%esi + addl $32,%ebx + subl $8,%ebp + jnz L037pw_pos_loop +L036pw_pos_finish: + movl 36(%esp),%ebp + andl $7,%ebp + jz L032pw_end + # dl>0 Tail Round 0 + movl (%esi),%ecx + subl %eax,%ecx + movl %ecx,(%ebx) + jnc L046pw_tail_nc0 + decl %ebp + jz L032pw_end + # dl>0 Tail Round 1 + movl 4(%esi),%ecx + subl %eax,%ecx + movl %ecx,4(%ebx) + jnc L047pw_tail_nc1 + decl %ebp + jz L032pw_end + # dl>0 Tail Round 2 + movl 8(%esi),%ecx + subl %eax,%ecx + movl %ecx,8(%ebx) + jnc L048pw_tail_nc2 + decl %ebp + jz L032pw_end + # dl>0 Tail Round 3 + movl 12(%esi),%ecx + subl %eax,%ecx + movl %ecx,12(%ebx) + jnc L049pw_tail_nc3 + decl %ebp + jz L032pw_end + # dl>0 Tail Round 4 + movl 16(%esi),%ecx + subl %eax,%ecx + movl %ecx,16(%ebx) + jnc L050pw_tail_nc4 + decl %ebp + jz L032pw_end + # dl>0 Tail Round 5 + movl 20(%esi),%ecx + subl %eax,%ecx + movl %ecx,20(%ebx) + jnc L051pw_tail_nc5 + decl %ebp + jz L032pw_end + # dl>0 Tail Round 6 + movl 24(%esi),%ecx + subl %eax,%ecx + movl %ecx,24(%ebx) + jnc L052pw_tail_nc6 + movl $1,%eax + jmp L032pw_end +L053pw_nc_loop: + movl (%esi),%ecx + movl %ecx,(%ebx) +L038pw_nc0: + movl 4(%esi),%ecx + movl %ecx,4(%ebx) +L039pw_nc1: + movl 8(%esi),%ecx + movl %ecx,8(%ebx) +L040pw_nc2: + movl 12(%esi),%ecx + movl %ecx,12(%ebx) +L041pw_nc3: + movl 16(%esi),%ecx + movl %ecx,16(%ebx) +L042pw_nc4: + movl 20(%esi),%ecx + movl %ecx,20(%ebx) +L043pw_nc5: + movl 24(%esi),%ecx + movl %ecx,24(%ebx) +L044pw_nc6: + movl 28(%esi),%ecx + movl %ecx,28(%ebx) +L045pw_nc7: + + addl $32,%esi + addl $32,%ebx + subl $8,%ebp + jnz L053pw_nc_loop + movl 36(%esp),%ebp + andl $7,%ebp + jz L054pw_nc_end + movl (%esi),%ecx + movl %ecx,(%ebx) +L046pw_tail_nc0: + decl %ebp + jz L054pw_nc_end + movl 4(%esi),%ecx + movl %ecx,4(%ebx) +L047pw_tail_nc1: + decl %ebp + jz L054pw_nc_end + movl 8(%esi),%ecx + movl %ecx,8(%ebx) +L048pw_tail_nc2: + decl %ebp + jz L054pw_nc_end + movl 12(%esi),%ecx + movl %ecx,12(%ebx) +L049pw_tail_nc3: + decl %ebp + jz L054pw_nc_end + movl 16(%esi),%ecx + movl %ecx,16(%ebx) +L050pw_tail_nc4: + decl %ebp + jz L054pw_nc_end + movl 20(%esi),%ecx + movl %ecx,20(%ebx) +L051pw_tail_nc5: + decl %ebp + jz L054pw_nc_end + movl 24(%esi),%ecx + movl %ecx,24(%ebx) +L052pw_tail_nc6: +L054pw_nc_end: + movl $0,%eax +L032pw_end: + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.section __IMPORT,__pointers,non_lazy_symbol_pointers +L_OPENSSL_ia32cap_P$non_lazy_ptr: +.indirect_symbol _OPENSSL_ia32cap_P +.long 0 +.comm _OPENSSL_ia32cap_P,16,2 diff --git a/deps/openssl/asm/x86-macosx-gas/bn/x86.s b/deps/openssl/asm/x86-macosx-gas/bn/co-586.s similarity index 60% rename from deps/openssl/asm/x86-macosx-gas/bn/x86.s rename to deps/openssl/asm/x86-macosx-gas/bn/co-586.s index eb975d247bab90..6174dea38f0243 100644 --- a/deps/openssl/asm/x86-macosx-gas/bn/x86.s +++ b/deps/openssl/asm/x86-macosx-gas/bn/co-586.s @@ -1,928 +1,5 @@ -.file "../openssl/crypto/bn/asm/x86.s" +.file "../openssl/crypto/bn/asm/co-586.s" .text -.globl _bn_mul_add_words -.align 4 -_bn_mul_add_words: -L_bn_mul_add_words_begin: - pushl %ebp - pushl %ebx - pushl %esi - pushl %edi - - xorl %esi,%esi - movl 20(%esp),%edi - movl 28(%esp),%ecx - movl 24(%esp),%ebx - andl $4294967288,%ecx - movl 32(%esp),%ebp - pushl %ecx - jz L000maw_finish -L001maw_loop: - movl %ecx,(%esp) - # Round 0 - - movl (%ebx),%eax - mull %ebp - addl %esi,%eax - movl (%edi),%esi - adcl $0,%edx - addl %esi,%eax - adcl $0,%edx - movl %eax,(%edi) - movl %edx,%esi - # Round 4 - - movl 4(%ebx),%eax - mull %ebp - addl %esi,%eax - movl 4(%edi),%esi - adcl $0,%edx - addl %esi,%eax - adcl $0,%edx - movl %eax,4(%edi) - movl %edx,%esi - # Round 8 - - movl 8(%ebx),%eax - mull %ebp - addl %esi,%eax - movl 8(%edi),%esi - adcl $0,%edx - addl %esi,%eax - adcl $0,%edx - movl %eax,8(%edi) - movl %edx,%esi - # Round 12 - - movl 12(%ebx),%eax - mull %ebp - addl %esi,%eax - movl 12(%edi),%esi - adcl $0,%edx - addl %esi,%eax - adcl $0,%edx - movl %eax,12(%edi) - movl %edx,%esi - # Round 16 - - movl 16(%ebx),%eax - mull %ebp - addl %esi,%eax - movl 16(%edi),%esi - adcl $0,%edx - addl %esi,%eax - adcl $0,%edx - movl %eax,16(%edi) - movl %edx,%esi - # Round 20 - - movl 20(%ebx),%eax - mull %ebp - addl %esi,%eax - movl 20(%edi),%esi - adcl $0,%edx - addl %esi,%eax - adcl $0,%edx - movl %eax,20(%edi) - movl %edx,%esi - # Round 24 - - movl 24(%ebx),%eax - mull %ebp - addl %esi,%eax - movl 24(%edi),%esi - adcl $0,%edx - addl %esi,%eax - adcl $0,%edx - movl %eax,24(%edi) - movl %edx,%esi - # Round 28 - - movl 28(%ebx),%eax - mull %ebp - addl %esi,%eax - movl 28(%edi),%esi - adcl $0,%edx - addl %esi,%eax - adcl $0,%edx - movl %eax,28(%edi) - movl %edx,%esi - - movl (%esp),%ecx - addl $32,%ebx - addl $32,%edi - subl $8,%ecx - jnz L001maw_loop -L000maw_finish: - movl 32(%esp),%ecx - andl $7,%ecx - jnz L002maw_finish2 - jmp L003maw_end -L002maw_finish2: - # Tail Round 0 - - movl (%ebx),%eax - mull %ebp - addl %esi,%eax - movl (%edi),%esi - adcl $0,%edx - addl %esi,%eax - adcl $0,%edx - decl %ecx - movl %eax,(%edi) - movl %edx,%esi - jz L003maw_end - # Tail Round 1 - - movl 4(%ebx),%eax - mull %ebp - addl %esi,%eax - movl 4(%edi),%esi - adcl $0,%edx - addl %esi,%eax - adcl $0,%edx - decl %ecx - movl %eax,4(%edi) - movl %edx,%esi - jz L003maw_end - # Tail Round 2 - - movl 8(%ebx),%eax - mull %ebp - addl %esi,%eax - movl 8(%edi),%esi - adcl $0,%edx - addl %esi,%eax - adcl $0,%edx - decl %ecx - movl %eax,8(%edi) - movl %edx,%esi - jz L003maw_end - # Tail Round 3 - - movl 12(%ebx),%eax - mull %ebp - addl %esi,%eax - movl 12(%edi),%esi - adcl $0,%edx - addl %esi,%eax - adcl $0,%edx - decl %ecx - movl %eax,12(%edi) - movl %edx,%esi - jz L003maw_end - # Tail Round 4 - - movl 16(%ebx),%eax - mull %ebp - addl %esi,%eax - movl 16(%edi),%esi - adcl $0,%edx - addl %esi,%eax - adcl $0,%edx - decl %ecx - movl %eax,16(%edi) - movl %edx,%esi - jz L003maw_end - # Tail Round 5 - - movl 20(%ebx),%eax - mull %ebp - addl %esi,%eax - movl 20(%edi),%esi - adcl $0,%edx - addl %esi,%eax - adcl $0,%edx - decl %ecx - movl %eax,20(%edi) - movl %edx,%esi - jz L003maw_end - # Tail Round 6 - - movl 24(%ebx),%eax - mull %ebp - addl %esi,%eax - movl 24(%edi),%esi - adcl $0,%edx - addl %esi,%eax - adcl $0,%edx - movl %eax,24(%edi) - movl %edx,%esi -L003maw_end: - movl %esi,%eax - popl %ecx - popl %edi - popl %esi - popl %ebx - popl %ebp - ret -.globl _bn_mul_words -.align 4 -_bn_mul_words: -L_bn_mul_words_begin: - pushl %ebp - pushl %ebx - pushl %esi - pushl %edi - - xorl %esi,%esi - movl 20(%esp),%edi - movl 24(%esp),%ebx - movl 28(%esp),%ebp - movl 32(%esp),%ecx - andl $4294967288,%ebp - jz L004mw_finish -L005mw_loop: - # Round 0 - - movl (%ebx),%eax - mull %ecx - addl %esi,%eax - adcl $0,%edx - movl %eax,(%edi) - movl %edx,%esi - # Round 4 - - movl 4(%ebx),%eax - mull %ecx - addl %esi,%eax - adcl $0,%edx - movl %eax,4(%edi) - movl %edx,%esi - # Round 8 - - movl 8(%ebx),%eax - mull %ecx - addl %esi,%eax - adcl $0,%edx - movl %eax,8(%edi) - movl %edx,%esi - # Round 12 - - movl 12(%ebx),%eax - mull %ecx - addl %esi,%eax - adcl $0,%edx - movl %eax,12(%edi) - movl %edx,%esi - # Round 16 - - movl 16(%ebx),%eax - mull %ecx - addl %esi,%eax - adcl $0,%edx - movl %eax,16(%edi) - movl %edx,%esi - # Round 20 - - movl 20(%ebx),%eax - mull %ecx - addl %esi,%eax - adcl $0,%edx - movl %eax,20(%edi) - movl %edx,%esi - # Round 24 - - movl 24(%ebx),%eax - mull %ecx - addl %esi,%eax - adcl $0,%edx - movl %eax,24(%edi) - movl %edx,%esi - # Round 28 - - movl 28(%ebx),%eax - mull %ecx - addl %esi,%eax - adcl $0,%edx - movl %eax,28(%edi) - movl %edx,%esi - - addl $32,%ebx - addl $32,%edi - subl $8,%ebp - jz L004mw_finish - jmp L005mw_loop -L004mw_finish: - movl 28(%esp),%ebp - andl $7,%ebp - jnz L006mw_finish2 - jmp L007mw_end -L006mw_finish2: - # Tail Round 0 - - movl (%ebx),%eax - mull %ecx - addl %esi,%eax - adcl $0,%edx - movl %eax,(%edi) - movl %edx,%esi - decl %ebp - jz L007mw_end - # Tail Round 1 - - movl 4(%ebx),%eax - mull %ecx - addl %esi,%eax - adcl $0,%edx - movl %eax,4(%edi) - movl %edx,%esi - decl %ebp - jz L007mw_end - # Tail Round 2 - - movl 8(%ebx),%eax - mull %ecx - addl %esi,%eax - adcl $0,%edx - movl %eax,8(%edi) - movl %edx,%esi - decl %ebp - jz L007mw_end - # Tail Round 3 - - movl 12(%ebx),%eax - mull %ecx - addl %esi,%eax - adcl $0,%edx - movl %eax,12(%edi) - movl %edx,%esi - decl %ebp - jz L007mw_end - # Tail Round 4 - - movl 16(%ebx),%eax - mull %ecx - addl %esi,%eax - adcl $0,%edx - movl %eax,16(%edi) - movl %edx,%esi - decl %ebp - jz L007mw_end - # Tail Round 5 - - movl 20(%ebx),%eax - mull %ecx - addl %esi,%eax - adcl $0,%edx - movl %eax,20(%edi) - movl %edx,%esi - decl %ebp - jz L007mw_end - # Tail Round 6 - - movl 24(%ebx),%eax - mull %ecx - addl %esi,%eax - adcl $0,%edx - movl %eax,24(%edi) - movl %edx,%esi -L007mw_end: - movl %esi,%eax - popl %edi - popl %esi - popl %ebx - popl %ebp - ret -.globl _bn_sqr_words -.align 4 -_bn_sqr_words: -L_bn_sqr_words_begin: - pushl %ebp - pushl %ebx - pushl %esi - pushl %edi - - movl 20(%esp),%esi - movl 24(%esp),%edi - movl 28(%esp),%ebx - andl $4294967288,%ebx - jz L008sw_finish -L009sw_loop: - # Round 0 - - movl (%edi),%eax - mull %eax - movl %eax,(%esi) - movl %edx,4(%esi) - # Round 4 - - movl 4(%edi),%eax - mull %eax - movl %eax,8(%esi) - movl %edx,12(%esi) - # Round 8 - - movl 8(%edi),%eax - mull %eax - movl %eax,16(%esi) - movl %edx,20(%esi) - # Round 12 - - movl 12(%edi),%eax - mull %eax - movl %eax,24(%esi) - movl %edx,28(%esi) - # Round 16 - - movl 16(%edi),%eax - mull %eax - movl %eax,32(%esi) - movl %edx,36(%esi) - # Round 20 - - movl 20(%edi),%eax - mull %eax - movl %eax,40(%esi) - movl %edx,44(%esi) - # Round 24 - - movl 24(%edi),%eax - mull %eax - movl %eax,48(%esi) - movl %edx,52(%esi) - # Round 28 - - movl 28(%edi),%eax - mull %eax - movl %eax,56(%esi) - movl %edx,60(%esi) - - addl $32,%edi - addl $64,%esi - subl $8,%ebx - jnz L009sw_loop -L008sw_finish: - movl 28(%esp),%ebx - andl $7,%ebx - jz L010sw_end - # Tail Round 0 - - movl (%edi),%eax - mull %eax - movl %eax,(%esi) - decl %ebx - movl %edx,4(%esi) - jz L010sw_end - # Tail Round 1 - - movl 4(%edi),%eax - mull %eax - movl %eax,8(%esi) - decl %ebx - movl %edx,12(%esi) - jz L010sw_end - # Tail Round 2 - - movl 8(%edi),%eax - mull %eax - movl %eax,16(%esi) - decl %ebx - movl %edx,20(%esi) - jz L010sw_end - # Tail Round 3 - - movl 12(%edi),%eax - mull %eax - movl %eax,24(%esi) - decl %ebx - movl %edx,28(%esi) - jz L010sw_end - # Tail Round 4 - - movl 16(%edi),%eax - mull %eax - movl %eax,32(%esi) - decl %ebx - movl %edx,36(%esi) - jz L010sw_end - # Tail Round 5 - - movl 20(%edi),%eax - mull %eax - movl %eax,40(%esi) - decl %ebx - movl %edx,44(%esi) - jz L010sw_end - # Tail Round 6 - - movl 24(%edi),%eax - mull %eax - movl %eax,48(%esi) - movl %edx,52(%esi) -L010sw_end: - popl %edi - popl %esi - popl %ebx - popl %ebp - ret -.globl _bn_div_words -.align 4 -_bn_div_words: -L_bn_div_words_begin: - pushl %ebp - pushl %ebx - pushl %esi - pushl %edi - movl 20(%esp),%edx - movl 24(%esp),%eax - movl 28(%esp),%ebx - divl %ebx - popl %edi - popl %esi - popl %ebx - popl %ebp - ret -.globl _bn_add_words -.align 4 -_bn_add_words: -L_bn_add_words_begin: - pushl %ebp - pushl %ebx - pushl %esi - pushl %edi - - movl 20(%esp),%ebx - movl 24(%esp),%esi - movl 28(%esp),%edi - movl 32(%esp),%ebp - xorl %eax,%eax - andl $4294967288,%ebp - jz L011aw_finish -L012aw_loop: - # Round 0 - - movl (%esi),%ecx - movl (%edi),%edx - addl %eax,%ecx - movl $0,%eax - adcl %eax,%eax - addl %edx,%ecx - adcl $0,%eax - movl %ecx,(%ebx) - # Round 1 - - movl 4(%esi),%ecx - movl 4(%edi),%edx - addl %eax,%ecx - movl $0,%eax - adcl %eax,%eax - addl %edx,%ecx - adcl $0,%eax - movl %ecx,4(%ebx) - # Round 2 - - movl 8(%esi),%ecx - movl 8(%edi),%edx - addl %eax,%ecx - movl $0,%eax - adcl %eax,%eax - addl %edx,%ecx - adcl $0,%eax - movl %ecx,8(%ebx) - # Round 3 - - movl 12(%esi),%ecx - movl 12(%edi),%edx - addl %eax,%ecx - movl $0,%eax - adcl %eax,%eax - addl %edx,%ecx - adcl $0,%eax - movl %ecx,12(%ebx) - # Round 4 - - movl 16(%esi),%ecx - movl 16(%edi),%edx - addl %eax,%ecx - movl $0,%eax - adcl %eax,%eax - addl %edx,%ecx - adcl $0,%eax - movl %ecx,16(%ebx) - # Round 5 - - movl 20(%esi),%ecx - movl 20(%edi),%edx - addl %eax,%ecx - movl $0,%eax - adcl %eax,%eax - addl %edx,%ecx - adcl $0,%eax - movl %ecx,20(%ebx) - # Round 6 - - movl 24(%esi),%ecx - movl 24(%edi),%edx - addl %eax,%ecx - movl $0,%eax - adcl %eax,%eax - addl %edx,%ecx - adcl $0,%eax - movl %ecx,24(%ebx) - # Round 7 - - movl 28(%esi),%ecx - movl 28(%edi),%edx - addl %eax,%ecx - movl $0,%eax - adcl %eax,%eax - addl %edx,%ecx - adcl $0,%eax - movl %ecx,28(%ebx) - - addl $32,%esi - addl $32,%edi - addl $32,%ebx - subl $8,%ebp - jnz L012aw_loop -L011aw_finish: - movl 32(%esp),%ebp - andl $7,%ebp - jz L013aw_end - # Tail Round 0 - - movl (%esi),%ecx - movl (%edi),%edx - addl %eax,%ecx - movl $0,%eax - adcl %eax,%eax - addl %edx,%ecx - adcl $0,%eax - decl %ebp - movl %ecx,(%ebx) - jz L013aw_end - # Tail Round 1 - - movl 4(%esi),%ecx - movl 4(%edi),%edx - addl %eax,%ecx - movl $0,%eax - adcl %eax,%eax - addl %edx,%ecx - adcl $0,%eax - decl %ebp - movl %ecx,4(%ebx) - jz L013aw_end - # Tail Round 2 - - movl 8(%esi),%ecx - movl 8(%edi),%edx - addl %eax,%ecx - movl $0,%eax - adcl %eax,%eax - addl %edx,%ecx - adcl $0,%eax - decl %ebp - movl %ecx,8(%ebx) - jz L013aw_end - # Tail Round 3 - - movl 12(%esi),%ecx - movl 12(%edi),%edx - addl %eax,%ecx - movl $0,%eax - adcl %eax,%eax - addl %edx,%ecx - adcl $0,%eax - decl %ebp - movl %ecx,12(%ebx) - jz L013aw_end - # Tail Round 4 - - movl 16(%esi),%ecx - movl 16(%edi),%edx - addl %eax,%ecx - movl $0,%eax - adcl %eax,%eax - addl %edx,%ecx - adcl $0,%eax - decl %ebp - movl %ecx,16(%ebx) - jz L013aw_end - # Tail Round 5 - - movl 20(%esi),%ecx - movl 20(%edi),%edx - addl %eax,%ecx - movl $0,%eax - adcl %eax,%eax - addl %edx,%ecx - adcl $0,%eax - decl %ebp - movl %ecx,20(%ebx) - jz L013aw_end - # Tail Round 6 - - movl 24(%esi),%ecx - movl 24(%edi),%edx - addl %eax,%ecx - movl $0,%eax - adcl %eax,%eax - addl %edx,%ecx - adcl $0,%eax - movl %ecx,24(%ebx) -L013aw_end: - popl %edi - popl %esi - popl %ebx - popl %ebp - ret -.globl _bn_sub_words -.align 4 -_bn_sub_words: -L_bn_sub_words_begin: - pushl %ebp - pushl %ebx - pushl %esi - pushl %edi - - movl 20(%esp),%ebx - movl 24(%esp),%esi - movl 28(%esp),%edi - movl 32(%esp),%ebp - xorl %eax,%eax - andl $4294967288,%ebp - jz L014aw_finish -L015aw_loop: - # Round 0 - - movl (%esi),%ecx - movl (%edi),%edx - subl %eax,%ecx - movl $0,%eax - adcl %eax,%eax - subl %edx,%ecx - adcl $0,%eax - movl %ecx,(%ebx) - # Round 1 - - movl 4(%esi),%ecx - movl 4(%edi),%edx - subl %eax,%ecx - movl $0,%eax - adcl %eax,%eax - subl %edx,%ecx - adcl $0,%eax - movl %ecx,4(%ebx) - # Round 2 - - movl 8(%esi),%ecx - movl 8(%edi),%edx - subl %eax,%ecx - movl $0,%eax - adcl %eax,%eax - subl %edx,%ecx - adcl $0,%eax - movl %ecx,8(%ebx) - # Round 3 - - movl 12(%esi),%ecx - movl 12(%edi),%edx - subl %eax,%ecx - movl $0,%eax - adcl %eax,%eax - subl %edx,%ecx - adcl $0,%eax - movl %ecx,12(%ebx) - # Round 4 - - movl 16(%esi),%ecx - movl 16(%edi),%edx - subl %eax,%ecx - movl $0,%eax - adcl %eax,%eax - subl %edx,%ecx - adcl $0,%eax - movl %ecx,16(%ebx) - # Round 5 - - movl 20(%esi),%ecx - movl 20(%edi),%edx - subl %eax,%ecx - movl $0,%eax - adcl %eax,%eax - subl %edx,%ecx - adcl $0,%eax - movl %ecx,20(%ebx) - # Round 6 - - movl 24(%esi),%ecx - movl 24(%edi),%edx - subl %eax,%ecx - movl $0,%eax - adcl %eax,%eax - subl %edx,%ecx - adcl $0,%eax - movl %ecx,24(%ebx) - # Round 7 - - movl 28(%esi),%ecx - movl 28(%edi),%edx - subl %eax,%ecx - movl $0,%eax - adcl %eax,%eax - subl %edx,%ecx - adcl $0,%eax - movl %ecx,28(%ebx) - - addl $32,%esi - addl $32,%edi - addl $32,%ebx - subl $8,%ebp - jnz L015aw_loop -L014aw_finish: - movl 32(%esp),%ebp - andl $7,%ebp - jz L016aw_end - # Tail Round 0 - - movl (%esi),%ecx - movl (%edi),%edx - subl %eax,%ecx - movl $0,%eax - adcl %eax,%eax - subl %edx,%ecx - adcl $0,%eax - decl %ebp - movl %ecx,(%ebx) - jz L016aw_end - # Tail Round 1 - - movl 4(%esi),%ecx - movl 4(%edi),%edx - subl %eax,%ecx - movl $0,%eax - adcl %eax,%eax - subl %edx,%ecx - adcl $0,%eax - decl %ebp - movl %ecx,4(%ebx) - jz L016aw_end - # Tail Round 2 - - movl 8(%esi),%ecx - movl 8(%edi),%edx - subl %eax,%ecx - movl $0,%eax - adcl %eax,%eax - subl %edx,%ecx - adcl $0,%eax - decl %ebp - movl %ecx,8(%ebx) - jz L016aw_end - # Tail Round 3 - - movl 12(%esi),%ecx - movl 12(%edi),%edx - subl %eax,%ecx - movl $0,%eax - adcl %eax,%eax - subl %edx,%ecx - adcl $0,%eax - decl %ebp - movl %ecx,12(%ebx) - jz L016aw_end - # Tail Round 4 - - movl 16(%esi),%ecx - movl 16(%edi),%edx - subl %eax,%ecx - movl $0,%eax - adcl %eax,%eax - subl %edx,%ecx - adcl $0,%eax - decl %ebp - movl %ecx,16(%ebx) - jz L016aw_end - # Tail Round 5 - - movl 20(%esi),%ecx - movl 20(%edi),%edx - subl %eax,%ecx - movl $0,%eax - adcl %eax,%eax - subl %edx,%ecx - adcl $0,%eax - decl %ebp - movl %ecx,20(%ebx) - jz L016aw_end - # Tail Round 6 - - movl 24(%esi),%ecx - movl 24(%edi),%edx - subl %eax,%ecx - movl $0,%eax - adcl %eax,%eax - subl %edx,%ecx - adcl $0,%eax - movl %ecx,24(%ebx) -L016aw_end: - popl %edi - popl %esi - popl %ebx - popl %ebp - ret .globl _bn_mul_comba8 .align 4 _bn_mul_comba8: @@ -938,10 +15,8 @@ L_bn_mul_comba8_begin: xorl %ecx,%ecx movl (%edi),%edx # ################## Calculate word 0 - xorl %ebp,%ebp # mul a[0]*b[0] - mull %edx addl %eax,%ebx movl 20(%esp),%eax @@ -951,12 +26,9 @@ L_bn_mul_comba8_begin: movl %ebx,(%eax) movl 4(%esi),%eax # saved r[0] - # ################## Calculate word 1 - xorl %ebx,%ebx # mul a[1]*b[0] - mull %edx addl %eax,%ecx movl (%esi),%eax @@ -964,7 +36,6 @@ L_bn_mul_comba8_begin: movl 4(%edi),%edx adcl $0,%ebx # mul a[0]*b[1] - mull %edx addl %eax,%ecx movl 20(%esp),%eax @@ -974,12 +45,9 @@ L_bn_mul_comba8_begin: movl %ecx,4(%eax) movl 8(%esi),%eax # saved r[1] - # ################## Calculate word 2 - xorl %ecx,%ecx # mul a[2]*b[0] - mull %edx addl %eax,%ebp movl 4(%esi),%eax @@ -987,7 +55,6 @@ L_bn_mul_comba8_begin: movl 4(%edi),%edx adcl $0,%ecx # mul a[1]*b[1] - mull %edx addl %eax,%ebp movl (%esi),%eax @@ -995,7 +62,6 @@ L_bn_mul_comba8_begin: movl 8(%edi),%edx adcl $0,%ecx # mul a[0]*b[2] - mull %edx addl %eax,%ebp movl 20(%esp),%eax @@ -1005,12 +71,9 @@ L_bn_mul_comba8_begin: movl %ebp,8(%eax) movl 12(%esi),%eax # saved r[2] - # ################## Calculate word 3 - xorl %ebp,%ebp # mul a[3]*b[0] - mull %edx addl %eax,%ebx movl 8(%esi),%eax @@ -1018,7 +81,6 @@ L_bn_mul_comba8_begin: movl 4(%edi),%edx adcl $0,%ebp # mul a[2]*b[1] - mull %edx addl %eax,%ebx movl 4(%esi),%eax @@ -1026,7 +88,6 @@ L_bn_mul_comba8_begin: movl 8(%edi),%edx adcl $0,%ebp # mul a[1]*b[2] - mull %edx addl %eax,%ebx movl (%esi),%eax @@ -1034,7 +95,6 @@ L_bn_mul_comba8_begin: movl 12(%edi),%edx adcl $0,%ebp # mul a[0]*b[3] - mull %edx addl %eax,%ebx movl 20(%esp),%eax @@ -1044,12 +104,9 @@ L_bn_mul_comba8_begin: movl %ebx,12(%eax) movl 16(%esi),%eax # saved r[3] - # ################## Calculate word 4 - xorl %ebx,%ebx # mul a[4]*b[0] - mull %edx addl %eax,%ecx movl 12(%esi),%eax @@ -1057,7 +114,6 @@ L_bn_mul_comba8_begin: movl 4(%edi),%edx adcl $0,%ebx # mul a[3]*b[1] - mull %edx addl %eax,%ecx movl 8(%esi),%eax @@ -1065,7 +121,6 @@ L_bn_mul_comba8_begin: movl 8(%edi),%edx adcl $0,%ebx # mul a[2]*b[2] - mull %edx addl %eax,%ecx movl 4(%esi),%eax @@ -1073,7 +128,6 @@ L_bn_mul_comba8_begin: movl 12(%edi),%edx adcl $0,%ebx # mul a[1]*b[3] - mull %edx addl %eax,%ecx movl (%esi),%eax @@ -1081,7 +135,6 @@ L_bn_mul_comba8_begin: movl 16(%edi),%edx adcl $0,%ebx # mul a[0]*b[4] - mull %edx addl %eax,%ecx movl 20(%esp),%eax @@ -1091,12 +144,9 @@ L_bn_mul_comba8_begin: movl %ecx,16(%eax) movl 20(%esi),%eax # saved r[4] - # ################## Calculate word 5 - xorl %ecx,%ecx # mul a[5]*b[0] - mull %edx addl %eax,%ebp movl 16(%esi),%eax @@ -1104,7 +154,6 @@ L_bn_mul_comba8_begin: movl 4(%edi),%edx adcl $0,%ecx # mul a[4]*b[1] - mull %edx addl %eax,%ebp movl 12(%esi),%eax @@ -1112,7 +161,6 @@ L_bn_mul_comba8_begin: movl 8(%edi),%edx adcl $0,%ecx # mul a[3]*b[2] - mull %edx addl %eax,%ebp movl 8(%esi),%eax @@ -1120,7 +168,6 @@ L_bn_mul_comba8_begin: movl 12(%edi),%edx adcl $0,%ecx # mul a[2]*b[3] - mull %edx addl %eax,%ebp movl 4(%esi),%eax @@ -1128,7 +175,6 @@ L_bn_mul_comba8_begin: movl 16(%edi),%edx adcl $0,%ecx # mul a[1]*b[4] - mull %edx addl %eax,%ebp movl (%esi),%eax @@ -1136,7 +182,6 @@ L_bn_mul_comba8_begin: movl 20(%edi),%edx adcl $0,%ecx # mul a[0]*b[5] - mull %edx addl %eax,%ebp movl 20(%esp),%eax @@ -1146,12 +191,9 @@ L_bn_mul_comba8_begin: movl %ebp,20(%eax) movl 24(%esi),%eax # saved r[5] - # ################## Calculate word 6 - xorl %ebp,%ebp # mul a[6]*b[0] - mull %edx addl %eax,%ebx movl 20(%esi),%eax @@ -1159,7 +201,6 @@ L_bn_mul_comba8_begin: movl 4(%edi),%edx adcl $0,%ebp # mul a[5]*b[1] - mull %edx addl %eax,%ebx movl 16(%esi),%eax @@ -1167,7 +208,6 @@ L_bn_mul_comba8_begin: movl 8(%edi),%edx adcl $0,%ebp # mul a[4]*b[2] - mull %edx addl %eax,%ebx movl 12(%esi),%eax @@ -1175,7 +215,6 @@ L_bn_mul_comba8_begin: movl 12(%edi),%edx adcl $0,%ebp # mul a[3]*b[3] - mull %edx addl %eax,%ebx movl 8(%esi),%eax @@ -1183,7 +222,6 @@ L_bn_mul_comba8_begin: movl 16(%edi),%edx adcl $0,%ebp # mul a[2]*b[4] - mull %edx addl %eax,%ebx movl 4(%esi),%eax @@ -1191,7 +229,6 @@ L_bn_mul_comba8_begin: movl 20(%edi),%edx adcl $0,%ebp # mul a[1]*b[5] - mull %edx addl %eax,%ebx movl (%esi),%eax @@ -1199,7 +236,6 @@ L_bn_mul_comba8_begin: movl 24(%edi),%edx adcl $0,%ebp # mul a[0]*b[6] - mull %edx addl %eax,%ebx movl 20(%esp),%eax @@ -1209,12 +245,9 @@ L_bn_mul_comba8_begin: movl %ebx,24(%eax) movl 28(%esi),%eax # saved r[6] - # ################## Calculate word 7 - xorl %ebx,%ebx # mul a[7]*b[0] - mull %edx addl %eax,%ecx movl 24(%esi),%eax @@ -1222,7 +255,6 @@ L_bn_mul_comba8_begin: movl 4(%edi),%edx adcl $0,%ebx # mul a[6]*b[1] - mull %edx addl %eax,%ecx movl 20(%esi),%eax @@ -1230,7 +262,6 @@ L_bn_mul_comba8_begin: movl 8(%edi),%edx adcl $0,%ebx # mul a[5]*b[2] - mull %edx addl %eax,%ecx movl 16(%esi),%eax @@ -1238,7 +269,6 @@ L_bn_mul_comba8_begin: movl 12(%edi),%edx adcl $0,%ebx # mul a[4]*b[3] - mull %edx addl %eax,%ecx movl 12(%esi),%eax @@ -1246,7 +276,6 @@ L_bn_mul_comba8_begin: movl 16(%edi),%edx adcl $0,%ebx # mul a[3]*b[4] - mull %edx addl %eax,%ecx movl 8(%esi),%eax @@ -1254,7 +283,6 @@ L_bn_mul_comba8_begin: movl 20(%edi),%edx adcl $0,%ebx # mul a[2]*b[5] - mull %edx addl %eax,%ecx movl 4(%esi),%eax @@ -1262,7 +290,6 @@ L_bn_mul_comba8_begin: movl 24(%edi),%edx adcl $0,%ebx # mul a[1]*b[6] - mull %edx addl %eax,%ecx movl (%esi),%eax @@ -1270,7 +297,6 @@ L_bn_mul_comba8_begin: movl 28(%edi),%edx adcl $0,%ebx # mul a[0]*b[7] - mull %edx addl %eax,%ecx movl 20(%esp),%eax @@ -1280,12 +306,9 @@ L_bn_mul_comba8_begin: movl %ecx,28(%eax) movl 28(%esi),%eax # saved r[7] - # ################## Calculate word 8 - xorl %ecx,%ecx # mul a[7]*b[1] - mull %edx addl %eax,%ebp movl 24(%esi),%eax @@ -1293,7 +316,6 @@ L_bn_mul_comba8_begin: movl 8(%edi),%edx adcl $0,%ecx # mul a[6]*b[2] - mull %edx addl %eax,%ebp movl 20(%esi),%eax @@ -1301,7 +323,6 @@ L_bn_mul_comba8_begin: movl 12(%edi),%edx adcl $0,%ecx # mul a[5]*b[3] - mull %edx addl %eax,%ebp movl 16(%esi),%eax @@ -1309,7 +330,6 @@ L_bn_mul_comba8_begin: movl 16(%edi),%edx adcl $0,%ecx # mul a[4]*b[4] - mull %edx addl %eax,%ebp movl 12(%esi),%eax @@ -1317,7 +337,6 @@ L_bn_mul_comba8_begin: movl 20(%edi),%edx adcl $0,%ecx # mul a[3]*b[5] - mull %edx addl %eax,%ebp movl 8(%esi),%eax @@ -1325,7 +344,6 @@ L_bn_mul_comba8_begin: movl 24(%edi),%edx adcl $0,%ecx # mul a[2]*b[6] - mull %edx addl %eax,%ebp movl 4(%esi),%eax @@ -1333,7 +351,6 @@ L_bn_mul_comba8_begin: movl 28(%edi),%edx adcl $0,%ecx # mul a[1]*b[7] - mull %edx addl %eax,%ebp movl 20(%esp),%eax @@ -1343,12 +360,9 @@ L_bn_mul_comba8_begin: movl %ebp,32(%eax) movl 28(%esi),%eax # saved r[8] - # ################## Calculate word 9 - xorl %ebp,%ebp # mul a[7]*b[2] - mull %edx addl %eax,%ebx movl 24(%esi),%eax @@ -1356,7 +370,6 @@ L_bn_mul_comba8_begin: movl 12(%edi),%edx adcl $0,%ebp # mul a[6]*b[3] - mull %edx addl %eax,%ebx movl 20(%esi),%eax @@ -1364,7 +377,6 @@ L_bn_mul_comba8_begin: movl 16(%edi),%edx adcl $0,%ebp # mul a[5]*b[4] - mull %edx addl %eax,%ebx movl 16(%esi),%eax @@ -1372,7 +384,6 @@ L_bn_mul_comba8_begin: movl 20(%edi),%edx adcl $0,%ebp # mul a[4]*b[5] - mull %edx addl %eax,%ebx movl 12(%esi),%eax @@ -1380,7 +391,6 @@ L_bn_mul_comba8_begin: movl 24(%edi),%edx adcl $0,%ebp # mul a[3]*b[6] - mull %edx addl %eax,%ebx movl 8(%esi),%eax @@ -1388,7 +398,6 @@ L_bn_mul_comba8_begin: movl 28(%edi),%edx adcl $0,%ebp # mul a[2]*b[7] - mull %edx addl %eax,%ebx movl 20(%esp),%eax @@ -1398,12 +407,9 @@ L_bn_mul_comba8_begin: movl %ebx,36(%eax) movl 28(%esi),%eax # saved r[9] - # ################## Calculate word 10 - xorl %ebx,%ebx # mul a[7]*b[3] - mull %edx addl %eax,%ecx movl 24(%esi),%eax @@ -1411,7 +417,6 @@ L_bn_mul_comba8_begin: movl 16(%edi),%edx adcl $0,%ebx # mul a[6]*b[4] - mull %edx addl %eax,%ecx movl 20(%esi),%eax @@ -1419,7 +424,6 @@ L_bn_mul_comba8_begin: movl 20(%edi),%edx adcl $0,%ebx # mul a[5]*b[5] - mull %edx addl %eax,%ecx movl 16(%esi),%eax @@ -1427,7 +431,6 @@ L_bn_mul_comba8_begin: movl 24(%edi),%edx adcl $0,%ebx # mul a[4]*b[6] - mull %edx addl %eax,%ecx movl 12(%esi),%eax @@ -1435,7 +438,6 @@ L_bn_mul_comba8_begin: movl 28(%edi),%edx adcl $0,%ebx # mul a[3]*b[7] - mull %edx addl %eax,%ecx movl 20(%esp),%eax @@ -1445,12 +447,9 @@ L_bn_mul_comba8_begin: movl %ecx,40(%eax) movl 28(%esi),%eax # saved r[10] - # ################## Calculate word 11 - xorl %ecx,%ecx # mul a[7]*b[4] - mull %edx addl %eax,%ebp movl 24(%esi),%eax @@ -1458,7 +457,6 @@ L_bn_mul_comba8_begin: movl 20(%edi),%edx adcl $0,%ecx # mul a[6]*b[5] - mull %edx addl %eax,%ebp movl 20(%esi),%eax @@ -1466,7 +464,6 @@ L_bn_mul_comba8_begin: movl 24(%edi),%edx adcl $0,%ecx # mul a[5]*b[6] - mull %edx addl %eax,%ebp movl 16(%esi),%eax @@ -1474,7 +471,6 @@ L_bn_mul_comba8_begin: movl 28(%edi),%edx adcl $0,%ecx # mul a[4]*b[7] - mull %edx addl %eax,%ebp movl 20(%esp),%eax @@ -1484,12 +480,9 @@ L_bn_mul_comba8_begin: movl %ebp,44(%eax) movl 28(%esi),%eax # saved r[11] - # ################## Calculate word 12 - xorl %ebp,%ebp # mul a[7]*b[5] - mull %edx addl %eax,%ebx movl 24(%esi),%eax @@ -1497,7 +490,6 @@ L_bn_mul_comba8_begin: movl 24(%edi),%edx adcl $0,%ebp # mul a[6]*b[6] - mull %edx addl %eax,%ebx movl 20(%esi),%eax @@ -1505,7 +497,6 @@ L_bn_mul_comba8_begin: movl 28(%edi),%edx adcl $0,%ebp # mul a[5]*b[7] - mull %edx addl %eax,%ebx movl 20(%esp),%eax @@ -1515,12 +506,9 @@ L_bn_mul_comba8_begin: movl %ebx,48(%eax) movl 28(%esi),%eax # saved r[12] - # ################## Calculate word 13 - xorl %ebx,%ebx # mul a[7]*b[6] - mull %edx addl %eax,%ecx movl 24(%esi),%eax @@ -1528,7 +516,6 @@ L_bn_mul_comba8_begin: movl 28(%edi),%edx adcl $0,%ebx # mul a[6]*b[7] - mull %edx addl %eax,%ecx movl 20(%esp),%eax @@ -1538,12 +525,9 @@ L_bn_mul_comba8_begin: movl %ecx,52(%eax) movl 28(%esi),%eax # saved r[13] - # ################## Calculate word 14 - xorl %ecx,%ecx # mul a[7]*b[7] - mull %edx addl %eax,%ebp movl 20(%esp),%eax @@ -1551,9 +535,7 @@ L_bn_mul_comba8_begin: adcl $0,%ecx movl %ebp,56(%eax) # saved r[14] - # save r[15] - movl %ebx,60(%eax) popl %ebx popl %ebp @@ -1575,10 +557,8 @@ L_bn_mul_comba4_begin: xorl %ecx,%ecx movl (%edi),%edx # ################## Calculate word 0 - xorl %ebp,%ebp # mul a[0]*b[0] - mull %edx addl %eax,%ebx movl 20(%esp),%eax @@ -1588,12 +568,9 @@ L_bn_mul_comba4_begin: movl %ebx,(%eax) movl 4(%esi),%eax # saved r[0] - # ################## Calculate word 1 - xorl %ebx,%ebx # mul a[1]*b[0] - mull %edx addl %eax,%ecx movl (%esi),%eax @@ -1601,7 +578,6 @@ L_bn_mul_comba4_begin: movl 4(%edi),%edx adcl $0,%ebx # mul a[0]*b[1] - mull %edx addl %eax,%ecx movl 20(%esp),%eax @@ -1611,12 +587,9 @@ L_bn_mul_comba4_begin: movl %ecx,4(%eax) movl 8(%esi),%eax # saved r[1] - # ################## Calculate word 2 - xorl %ecx,%ecx # mul a[2]*b[0] - mull %edx addl %eax,%ebp movl 4(%esi),%eax @@ -1624,7 +597,6 @@ L_bn_mul_comba4_begin: movl 4(%edi),%edx adcl $0,%ecx # mul a[1]*b[1] - mull %edx addl %eax,%ebp movl (%esi),%eax @@ -1632,7 +604,6 @@ L_bn_mul_comba4_begin: movl 8(%edi),%edx adcl $0,%ecx # mul a[0]*b[2] - mull %edx addl %eax,%ebp movl 20(%esp),%eax @@ -1642,12 +613,9 @@ L_bn_mul_comba4_begin: movl %ebp,8(%eax) movl 12(%esi),%eax # saved r[2] - # ################## Calculate word 3 - xorl %ebp,%ebp # mul a[3]*b[0] - mull %edx addl %eax,%ebx movl 8(%esi),%eax @@ -1655,7 +623,6 @@ L_bn_mul_comba4_begin: movl 4(%edi),%edx adcl $0,%ebp # mul a[2]*b[1] - mull %edx addl %eax,%ebx movl 4(%esi),%eax @@ -1663,7 +630,6 @@ L_bn_mul_comba4_begin: movl 8(%edi),%edx adcl $0,%ebp # mul a[1]*b[2] - mull %edx addl %eax,%ebx movl (%esi),%eax @@ -1671,7 +637,6 @@ L_bn_mul_comba4_begin: movl 12(%edi),%edx adcl $0,%ebp # mul a[0]*b[3] - mull %edx addl %eax,%ebx movl 20(%esp),%eax @@ -1681,12 +646,9 @@ L_bn_mul_comba4_begin: movl %ebx,12(%eax) movl 12(%esi),%eax # saved r[3] - # ################## Calculate word 4 - xorl %ebx,%ebx # mul a[3]*b[1] - mull %edx addl %eax,%ecx movl 8(%esi),%eax @@ -1694,7 +656,6 @@ L_bn_mul_comba4_begin: movl 8(%edi),%edx adcl $0,%ebx # mul a[2]*b[2] - mull %edx addl %eax,%ecx movl 4(%esi),%eax @@ -1702,7 +663,6 @@ L_bn_mul_comba4_begin: movl 12(%edi),%edx adcl $0,%ebx # mul a[1]*b[3] - mull %edx addl %eax,%ecx movl 20(%esp),%eax @@ -1712,12 +672,9 @@ L_bn_mul_comba4_begin: movl %ecx,16(%eax) movl 12(%esi),%eax # saved r[4] - # ################## Calculate word 5 - xorl %ecx,%ecx # mul a[3]*b[2] - mull %edx addl %eax,%ebp movl 8(%esi),%eax @@ -1725,7 +682,6 @@ L_bn_mul_comba4_begin: movl 12(%edi),%edx adcl $0,%ecx # mul a[2]*b[3] - mull %edx addl %eax,%ebp movl 20(%esp),%eax @@ -1735,12 +691,9 @@ L_bn_mul_comba4_begin: movl %ebp,20(%eax) movl 12(%esi),%eax # saved r[5] - # ################## Calculate word 6 - xorl %ebp,%ebp # mul a[3]*b[3] - mull %edx addl %eax,%ebx movl 20(%esp),%eax @@ -1748,9 +701,7 @@ L_bn_mul_comba4_begin: adcl $0,%ebp movl %ebx,24(%eax) # saved r[6] - # save r[7] - movl %ecx,28(%eax) popl %ebx popl %ebp @@ -1771,10 +722,8 @@ L_bn_sqr_comba8_begin: xorl %ecx,%ecx movl (%esi),%eax # ############### Calculate word 0 - xorl %ebp,%ebp # sqr a[0]*a[0] - mull %eax addl %eax,%ebx adcl %edx,%ecx @@ -1783,12 +732,9 @@ L_bn_sqr_comba8_begin: movl %ebx,(%edi) movl 4(%esi),%eax # saved r[0] - # ############### Calculate word 1 - xorl %ebx,%ebx # sqr a[1]*a[0] - mull %edx addl %eax,%eax adcl %edx,%edx @@ -1800,12 +746,9 @@ L_bn_sqr_comba8_begin: movl %ecx,4(%edi) movl (%esi),%edx # saved r[1] - # ############### Calculate word 2 - xorl %ecx,%ecx # sqr a[2]*a[0] - mull %edx addl %eax,%eax adcl %edx,%edx @@ -1815,7 +758,6 @@ L_bn_sqr_comba8_begin: movl 4(%esi),%eax adcl $0,%ecx # sqr a[1]*a[1] - mull %eax addl %eax,%ebp adcl %edx,%ebx @@ -1824,12 +766,9 @@ L_bn_sqr_comba8_begin: movl %ebp,8(%edi) movl 12(%esi),%eax # saved r[2] - # ############### Calculate word 3 - xorl %ebp,%ebp # sqr a[3]*a[0] - mull %edx addl %eax,%eax adcl %edx,%edx @@ -1840,7 +779,6 @@ L_bn_sqr_comba8_begin: adcl $0,%ebp movl 4(%esi),%edx # sqr a[2]*a[1] - mull %edx addl %eax,%eax adcl %edx,%edx @@ -1852,12 +790,9 @@ L_bn_sqr_comba8_begin: movl %ebx,12(%edi) movl (%esi),%edx # saved r[3] - # ############### Calculate word 4 - xorl %ebx,%ebx # sqr a[4]*a[0] - mull %edx addl %eax,%eax adcl %edx,%edx @@ -1868,7 +803,6 @@ L_bn_sqr_comba8_begin: adcl $0,%ebx movl 4(%esi),%edx # sqr a[3]*a[1] - mull %edx addl %eax,%eax adcl %edx,%edx @@ -1878,7 +812,6 @@ L_bn_sqr_comba8_begin: movl 8(%esi),%eax adcl $0,%ebx # sqr a[2]*a[2] - mull %eax addl %eax,%ecx adcl %edx,%ebp @@ -1887,12 +820,9 @@ L_bn_sqr_comba8_begin: movl %ecx,16(%edi) movl 20(%esi),%eax # saved r[4] - # ############### Calculate word 5 - xorl %ecx,%ecx # sqr a[5]*a[0] - mull %edx addl %eax,%eax adcl %edx,%edx @@ -1903,7 +833,6 @@ L_bn_sqr_comba8_begin: adcl $0,%ecx movl 4(%esi),%edx # sqr a[4]*a[1] - mull %edx addl %eax,%eax adcl %edx,%edx @@ -1914,7 +843,6 @@ L_bn_sqr_comba8_begin: adcl $0,%ecx movl 8(%esi),%edx # sqr a[3]*a[2] - mull %edx addl %eax,%eax adcl %edx,%edx @@ -1926,12 +854,9 @@ L_bn_sqr_comba8_begin: movl %ebp,20(%edi) movl (%esi),%edx # saved r[5] - # ############### Calculate word 6 - xorl %ebp,%ebp # sqr a[6]*a[0] - mull %edx addl %eax,%eax adcl %edx,%edx @@ -1942,7 +867,6 @@ L_bn_sqr_comba8_begin: adcl $0,%ebp movl 4(%esi),%edx # sqr a[5]*a[1] - mull %edx addl %eax,%eax adcl %edx,%edx @@ -1953,7 +877,6 @@ L_bn_sqr_comba8_begin: adcl $0,%ebp movl 8(%esi),%edx # sqr a[4]*a[2] - mull %edx addl %eax,%eax adcl %edx,%edx @@ -1963,7 +886,6 @@ L_bn_sqr_comba8_begin: movl 12(%esi),%eax adcl $0,%ebp # sqr a[3]*a[3] - mull %eax addl %eax,%ebx adcl %edx,%ecx @@ -1972,12 +894,9 @@ L_bn_sqr_comba8_begin: movl %ebx,24(%edi) movl 28(%esi),%eax # saved r[6] - # ############### Calculate word 7 - xorl %ebx,%ebx # sqr a[7]*a[0] - mull %edx addl %eax,%eax adcl %edx,%edx @@ -1988,7 +907,6 @@ L_bn_sqr_comba8_begin: adcl $0,%ebx movl 4(%esi),%edx # sqr a[6]*a[1] - mull %edx addl %eax,%eax adcl %edx,%edx @@ -1999,7 +917,6 @@ L_bn_sqr_comba8_begin: adcl $0,%ebx movl 8(%esi),%edx # sqr a[5]*a[2] - mull %edx addl %eax,%eax adcl %edx,%edx @@ -2010,7 +927,6 @@ L_bn_sqr_comba8_begin: adcl $0,%ebx movl 12(%esi),%edx # sqr a[4]*a[3] - mull %edx addl %eax,%eax adcl %edx,%edx @@ -2022,12 +938,9 @@ L_bn_sqr_comba8_begin: movl %ecx,28(%edi) movl 4(%esi),%edx # saved r[7] - # ############### Calculate word 8 - xorl %ecx,%ecx # sqr a[7]*a[1] - mull %edx addl %eax,%eax adcl %edx,%edx @@ -2038,7 +951,6 @@ L_bn_sqr_comba8_begin: adcl $0,%ecx movl 8(%esi),%edx # sqr a[6]*a[2] - mull %edx addl %eax,%eax adcl %edx,%edx @@ -2049,7 +961,6 @@ L_bn_sqr_comba8_begin: adcl $0,%ecx movl 12(%esi),%edx # sqr a[5]*a[3] - mull %edx addl %eax,%eax adcl %edx,%edx @@ -2059,7 +970,6 @@ L_bn_sqr_comba8_begin: movl 16(%esi),%eax adcl $0,%ecx # sqr a[4]*a[4] - mull %eax addl %eax,%ebp adcl %edx,%ebx @@ -2068,12 +978,9 @@ L_bn_sqr_comba8_begin: movl %ebp,32(%edi) movl 28(%esi),%eax # saved r[8] - # ############### Calculate word 9 - xorl %ebp,%ebp # sqr a[7]*a[2] - mull %edx addl %eax,%eax adcl %edx,%edx @@ -2084,7 +991,6 @@ L_bn_sqr_comba8_begin: adcl $0,%ebp movl 12(%esi),%edx # sqr a[6]*a[3] - mull %edx addl %eax,%eax adcl %edx,%edx @@ -2095,7 +1001,6 @@ L_bn_sqr_comba8_begin: adcl $0,%ebp movl 16(%esi),%edx # sqr a[5]*a[4] - mull %edx addl %eax,%eax adcl %edx,%edx @@ -2107,12 +1012,9 @@ L_bn_sqr_comba8_begin: movl %ebx,36(%edi) movl 12(%esi),%edx # saved r[9] - # ############### Calculate word 10 - xorl %ebx,%ebx # sqr a[7]*a[3] - mull %edx addl %eax,%eax adcl %edx,%edx @@ -2123,7 +1025,6 @@ L_bn_sqr_comba8_begin: adcl $0,%ebx movl 16(%esi),%edx # sqr a[6]*a[4] - mull %edx addl %eax,%eax adcl %edx,%edx @@ -2133,7 +1034,6 @@ L_bn_sqr_comba8_begin: movl 20(%esi),%eax adcl $0,%ebx # sqr a[5]*a[5] - mull %eax addl %eax,%ecx adcl %edx,%ebp @@ -2142,12 +1042,9 @@ L_bn_sqr_comba8_begin: movl %ecx,40(%edi) movl 28(%esi),%eax # saved r[10] - # ############### Calculate word 11 - xorl %ecx,%ecx # sqr a[7]*a[4] - mull %edx addl %eax,%eax adcl %edx,%edx @@ -2158,7 +1055,6 @@ L_bn_sqr_comba8_begin: adcl $0,%ecx movl 20(%esi),%edx # sqr a[6]*a[5] - mull %edx addl %eax,%eax adcl %edx,%edx @@ -2170,12 +1066,9 @@ L_bn_sqr_comba8_begin: movl %ebp,44(%edi) movl 20(%esi),%edx # saved r[11] - # ############### Calculate word 12 - xorl %ebp,%ebp # sqr a[7]*a[5] - mull %edx addl %eax,%eax adcl %edx,%edx @@ -2185,7 +1078,6 @@ L_bn_sqr_comba8_begin: movl 24(%esi),%eax adcl $0,%ebp # sqr a[6]*a[6] - mull %eax addl %eax,%ebx adcl %edx,%ecx @@ -2194,12 +1086,9 @@ L_bn_sqr_comba8_begin: movl %ebx,48(%edi) movl 28(%esi),%eax # saved r[12] - # ############### Calculate word 13 - xorl %ebx,%ebx # sqr a[7]*a[6] - mull %edx addl %eax,%eax adcl %edx,%edx @@ -2210,19 +1099,15 @@ L_bn_sqr_comba8_begin: adcl $0,%ebx movl %ecx,52(%edi) # saved r[13] - # ############### Calculate word 14 - xorl %ecx,%ecx # sqr a[7]*a[7] - mull %eax addl %eax,%ebp adcl %edx,%ebx adcl $0,%ecx movl %ebp,56(%edi) # saved r[14] - movl %ebx,60(%edi) popl %ebx popl %ebp @@ -2243,10 +1128,8 @@ L_bn_sqr_comba4_begin: xorl %ecx,%ecx movl (%esi),%eax # ############### Calculate word 0 - xorl %ebp,%ebp # sqr a[0]*a[0] - mull %eax addl %eax,%ebx adcl %edx,%ecx @@ -2255,12 +1138,9 @@ L_bn_sqr_comba4_begin: movl %ebx,(%edi) movl 4(%esi),%eax # saved r[0] - # ############### Calculate word 1 - xorl %ebx,%ebx # sqr a[1]*a[0] - mull %edx addl %eax,%eax adcl %edx,%edx @@ -2272,12 +1152,9 @@ L_bn_sqr_comba4_begin: movl %ecx,4(%edi) movl (%esi),%edx # saved r[1] - # ############### Calculate word 2 - xorl %ecx,%ecx # sqr a[2]*a[0] - mull %edx addl %eax,%eax adcl %edx,%edx @@ -2287,7 +1164,6 @@ L_bn_sqr_comba4_begin: movl 4(%esi),%eax adcl $0,%ecx # sqr a[1]*a[1] - mull %eax addl %eax,%ebp adcl %edx,%ebx @@ -2296,12 +1172,9 @@ L_bn_sqr_comba4_begin: movl %ebp,8(%edi) movl 12(%esi),%eax # saved r[2] - # ############### Calculate word 3 - xorl %ebp,%ebp # sqr a[3]*a[0] - mull %edx addl %eax,%eax adcl %edx,%edx @@ -2312,7 +1185,6 @@ L_bn_sqr_comba4_begin: adcl $0,%ebp movl 4(%esi),%edx # sqr a[2]*a[1] - mull %edx addl %eax,%eax adcl %edx,%edx @@ -2324,12 +1196,9 @@ L_bn_sqr_comba4_begin: movl %ebx,12(%edi) movl 4(%esi),%edx # saved r[3] - # ############### Calculate word 4 - xorl %ebx,%ebx # sqr a[3]*a[1] - mull %edx addl %eax,%eax adcl %edx,%edx @@ -2339,7 +1208,6 @@ L_bn_sqr_comba4_begin: movl 8(%esi),%eax adcl $0,%ebx # sqr a[2]*a[2] - mull %eax addl %eax,%ecx adcl %edx,%ebp @@ -2348,12 +1216,9 @@ L_bn_sqr_comba4_begin: movl %ecx,16(%edi) movl 12(%esi),%eax # saved r[4] - # ############### Calculate word 5 - xorl %ecx,%ecx # sqr a[3]*a[2] - mull %edx addl %eax,%eax adcl %edx,%edx @@ -2364,19 +1229,15 @@ L_bn_sqr_comba4_begin: adcl $0,%ecx movl %ebp,20(%edi) # saved r[5] - # ############### Calculate word 6 - xorl %ebp,%ebp # sqr a[3]*a[3] - mull %eax addl %eax,%ebx adcl %edx,%ecx adcl $0,%ebp movl %ebx,24(%edi) # saved r[6] - movl %ecx,28(%edi) popl %ebx popl %ebp diff --git a/deps/openssl/asm/x86-macosx-gas/bn/x86-gf2m.s b/deps/openssl/asm/x86-macosx-gas/bn/x86-gf2m.s new file mode 100644 index 00000000000000..6aa546e3389c37 --- /dev/null +++ b/deps/openssl/asm/x86-macosx-gas/bn/x86-gf2m.s @@ -0,0 +1,344 @@ +.file "../openssl/crypto/bn/asm/x86-gf2m.s" +.text +.align 4 +__mul_1x1_mmx: + subl $36,%esp + movl %eax,%ecx + leal (%eax,%eax,1),%edx + andl $1073741823,%ecx + leal (%edx,%edx,1),%ebp + movl $0,(%esp) + andl $2147483647,%edx + movd %eax,%mm2 + movd %ebx,%mm3 + movl %ecx,4(%esp) + xorl %edx,%ecx + pxor %mm5,%mm5 + pxor %mm4,%mm4 + movl %edx,8(%esp) + xorl %ebp,%edx + movl %ecx,12(%esp) + pcmpgtd %mm2,%mm5 + paddd %mm2,%mm2 + xorl %edx,%ecx + movl %ebp,16(%esp) + xorl %edx,%ebp + pand %mm3,%mm5 + pcmpgtd %mm2,%mm4 + movl %ecx,20(%esp) + xorl %ecx,%ebp + psllq $31,%mm5 + pand %mm3,%mm4 + movl %edx,24(%esp) + movl $7,%esi + movl %ebp,28(%esp) + movl %esi,%ebp + andl %ebx,%esi + shrl $3,%ebx + movl %ebp,%edi + psllq $30,%mm4 + andl %ebx,%edi + shrl $3,%ebx + movd (%esp,%esi,4),%mm0 + movl %ebp,%esi + andl %ebx,%esi + shrl $3,%ebx + movd (%esp,%edi,4),%mm2 + movl %ebp,%edi + psllq $3,%mm2 + andl %ebx,%edi + shrl $3,%ebx + pxor %mm2,%mm0 + movd (%esp,%esi,4),%mm1 + movl %ebp,%esi + psllq $6,%mm1 + andl %ebx,%esi + shrl $3,%ebx + pxor %mm1,%mm0 + movd (%esp,%edi,4),%mm2 + movl %ebp,%edi + psllq $9,%mm2 + andl %ebx,%edi + shrl $3,%ebx + pxor %mm2,%mm0 + movd (%esp,%esi,4),%mm1 + movl %ebp,%esi + psllq $12,%mm1 + andl %ebx,%esi + shrl $3,%ebx + pxor %mm1,%mm0 + movd (%esp,%edi,4),%mm2 + movl %ebp,%edi + psllq $15,%mm2 + andl %ebx,%edi + shrl $3,%ebx + pxor %mm2,%mm0 + movd (%esp,%esi,4),%mm1 + movl %ebp,%esi + psllq $18,%mm1 + andl %ebx,%esi + shrl $3,%ebx + pxor %mm1,%mm0 + movd (%esp,%edi,4),%mm2 + movl %ebp,%edi + psllq $21,%mm2 + andl %ebx,%edi + shrl $3,%ebx + pxor %mm2,%mm0 + movd (%esp,%esi,4),%mm1 + movl %ebp,%esi + psllq $24,%mm1 + andl %ebx,%esi + shrl $3,%ebx + pxor %mm1,%mm0 + movd (%esp,%edi,4),%mm2 + pxor %mm4,%mm0 + psllq $27,%mm2 + pxor %mm2,%mm0 + movd (%esp,%esi,4),%mm1 + pxor %mm5,%mm0 + psllq $30,%mm1 + addl $36,%esp + pxor %mm1,%mm0 + ret +.align 4 +__mul_1x1_ialu: + subl $36,%esp + movl %eax,%ecx + leal (%eax,%eax,1),%edx + leal (,%eax,4),%ebp + andl $1073741823,%ecx + leal (%eax,%eax,1),%edi + sarl $31,%eax + movl $0,(%esp) + andl $2147483647,%edx + movl %ecx,4(%esp) + xorl %edx,%ecx + movl %edx,8(%esp) + xorl %ebp,%edx + movl %ecx,12(%esp) + xorl %edx,%ecx + movl %ebp,16(%esp) + xorl %edx,%ebp + movl %ecx,20(%esp) + xorl %ecx,%ebp + sarl $31,%edi + andl %ebx,%eax + movl %edx,24(%esp) + andl %ebx,%edi + movl %ebp,28(%esp) + movl %eax,%edx + shll $31,%eax + movl %edi,%ecx + shrl $1,%edx + movl $7,%esi + shll $30,%edi + andl %ebx,%esi + shrl $2,%ecx + xorl %edi,%eax + shrl $3,%ebx + movl $7,%edi + andl %ebx,%edi + shrl $3,%ebx + xorl %ecx,%edx + xorl (%esp,%esi,4),%eax + movl $7,%esi + andl %ebx,%esi + shrl $3,%ebx + movl (%esp,%edi,4),%ebp + movl $7,%edi + movl %ebp,%ecx + shll $3,%ebp + andl %ebx,%edi + shrl $29,%ecx + xorl %ebp,%eax + shrl $3,%ebx + xorl %ecx,%edx + movl (%esp,%esi,4),%ecx + movl $7,%esi + movl %ecx,%ebp + shll $6,%ecx + andl %ebx,%esi + shrl $26,%ebp + xorl %ecx,%eax + shrl $3,%ebx + xorl %ebp,%edx + movl (%esp,%edi,4),%ebp + movl $7,%edi + movl %ebp,%ecx + shll $9,%ebp + andl %ebx,%edi + shrl $23,%ecx + xorl %ebp,%eax + shrl $3,%ebx + xorl %ecx,%edx + movl (%esp,%esi,4),%ecx + movl $7,%esi + movl %ecx,%ebp + shll $12,%ecx + andl %ebx,%esi + shrl $20,%ebp + xorl %ecx,%eax + shrl $3,%ebx + xorl %ebp,%edx + movl (%esp,%edi,4),%ebp + movl $7,%edi + movl %ebp,%ecx + shll $15,%ebp + andl %ebx,%edi + shrl $17,%ecx + xorl %ebp,%eax + shrl $3,%ebx + xorl %ecx,%edx + movl (%esp,%esi,4),%ecx + movl $7,%esi + movl %ecx,%ebp + shll $18,%ecx + andl %ebx,%esi + shrl $14,%ebp + xorl %ecx,%eax + shrl $3,%ebx + xorl %ebp,%edx + movl (%esp,%edi,4),%ebp + movl $7,%edi + movl %ebp,%ecx + shll $21,%ebp + andl %ebx,%edi + shrl $11,%ecx + xorl %ebp,%eax + shrl $3,%ebx + xorl %ecx,%edx + movl (%esp,%esi,4),%ecx + movl $7,%esi + movl %ecx,%ebp + shll $24,%ecx + andl %ebx,%esi + shrl $8,%ebp + xorl %ecx,%eax + shrl $3,%ebx + xorl %ebp,%edx + movl (%esp,%edi,4),%ebp + movl %ebp,%ecx + shll $27,%ebp + movl (%esp,%esi,4),%edi + shrl $5,%ecx + movl %edi,%esi + xorl %ebp,%eax + shll $30,%edi + xorl %ecx,%edx + shrl $2,%esi + xorl %edi,%eax + xorl %esi,%edx + addl $36,%esp + ret +.globl _bn_GF2m_mul_2x2 +.align 4 +_bn_GF2m_mul_2x2: +L_bn_GF2m_mul_2x2_begin: + call L000PIC_me_up +L000PIC_me_up: + popl %edx + movl L_OPENSSL_ia32cap_P$non_lazy_ptr-L000PIC_me_up(%edx),%edx + movl (%edx),%eax + movl 4(%edx),%edx + testl $8388608,%eax + jz L001ialu + testl $16777216,%eax + jz L002mmx + testl $2,%edx + jz L002mmx + movups 8(%esp),%xmm0 + shufps $177,%xmm0,%xmm0 +.byte 102,15,58,68,192,1 + movl 4(%esp),%eax + movups %xmm0,(%eax) + ret +.align 4,0x90 +L002mmx: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 24(%esp),%eax + movl 32(%esp),%ebx + call __mul_1x1_mmx + movq %mm0,%mm7 + movl 28(%esp),%eax + movl 36(%esp),%ebx + call __mul_1x1_mmx + movq %mm0,%mm6 + movl 24(%esp),%eax + movl 32(%esp),%ebx + xorl 28(%esp),%eax + xorl 36(%esp),%ebx + call __mul_1x1_mmx + pxor %mm7,%mm0 + movl 20(%esp),%eax + pxor %mm6,%mm0 + movq %mm0,%mm2 + psllq $32,%mm0 + popl %edi + psrlq $32,%mm2 + popl %esi + pxor %mm6,%mm0 + popl %ebx + pxor %mm7,%mm2 + movq %mm0,(%eax) + popl %ebp + movq %mm2,8(%eax) + emms + ret +.align 4,0x90 +L001ialu: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + subl $20,%esp + movl 44(%esp),%eax + movl 52(%esp),%ebx + call __mul_1x1_ialu + movl %eax,8(%esp) + movl %edx,12(%esp) + movl 48(%esp),%eax + movl 56(%esp),%ebx + call __mul_1x1_ialu + movl %eax,(%esp) + movl %edx,4(%esp) + movl 44(%esp),%eax + movl 52(%esp),%ebx + xorl 48(%esp),%eax + xorl 56(%esp),%ebx + call __mul_1x1_ialu + movl 40(%esp),%ebp + movl (%esp),%ebx + movl 4(%esp),%ecx + movl 8(%esp),%edi + movl 12(%esp),%esi + xorl %edx,%eax + xorl %ecx,%edx + xorl %ebx,%eax + movl %ebx,(%ebp) + xorl %edi,%edx + movl %esi,12(%ebp) + xorl %esi,%eax + addl $20,%esp + xorl %esi,%edx + popl %edi + xorl %edx,%eax + popl %esi + movl %edx,8(%ebp) + popl %ebx + movl %eax,4(%ebp) + popl %ebp + ret +.byte 71,70,40,50,94,109,41,32,77,117,108,116,105,112,108,105 +.byte 99,97,116,105,111,110,32,102,111,114,32,120,56,54,44,32 +.byte 67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97 +.byte 112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103 +.byte 62,0 +.section __IMPORT,__pointers,non_lazy_symbol_pointers +L_OPENSSL_ia32cap_P$non_lazy_ptr: +.indirect_symbol _OPENSSL_ia32cap_P +.long 0 +.comm _OPENSSL_ia32cap_P,16,2 diff --git a/deps/openssl/asm/x86-macosx-gas/bn/x86-mont.s b/deps/openssl/asm/x86-macosx-gas/bn/x86-mont.s index 48598cc62dd5d5..b544a46c6758af 100644 --- a/deps/openssl/asm/x86-macosx-gas/bn/x86-mont.s +++ b/deps/openssl/asm/x86-macosx-gas/bn/x86-mont.s @@ -41,6 +41,126 @@ L_bn_mul_mont_begin: movl %esi,20(%esp) leal -3(%edi),%ebx movl %ebp,24(%esp) + call L001PIC_me_up +L001PIC_me_up: + popl %eax + movl L_OPENSSL_ia32cap_P$non_lazy_ptr-L001PIC_me_up(%eax),%eax + btl $26,(%eax) + jnc L002non_sse2 + movl $-1,%eax + movd %eax,%mm7 + movl 8(%esp),%esi + movl 12(%esp),%edi + movl 16(%esp),%ebp + xorl %edx,%edx + xorl %ecx,%ecx + movd (%edi),%mm4 + movd (%esi),%mm5 + movd (%ebp),%mm3 + pmuludq %mm4,%mm5 + movq %mm5,%mm2 + movq %mm5,%mm0 + pand %mm7,%mm0 + pmuludq 20(%esp),%mm5 + pmuludq %mm5,%mm3 + paddq %mm0,%mm3 + movd 4(%ebp),%mm1 + movd 4(%esi),%mm0 + psrlq $32,%mm2 + psrlq $32,%mm3 + incl %ecx +.align 4,0x90 +L0031st: + pmuludq %mm4,%mm0 + pmuludq %mm5,%mm1 + paddq %mm0,%mm2 + paddq %mm1,%mm3 + movq %mm2,%mm0 + pand %mm7,%mm0 + movd 4(%ebp,%ecx,4),%mm1 + paddq %mm0,%mm3 + movd 4(%esi,%ecx,4),%mm0 + psrlq $32,%mm2 + movd %mm3,28(%esp,%ecx,4) + psrlq $32,%mm3 + leal 1(%ecx),%ecx + cmpl %ebx,%ecx + jl L0031st + pmuludq %mm4,%mm0 + pmuludq %mm5,%mm1 + paddq %mm0,%mm2 + paddq %mm1,%mm3 + movq %mm2,%mm0 + pand %mm7,%mm0 + paddq %mm0,%mm3 + movd %mm3,28(%esp,%ecx,4) + psrlq $32,%mm2 + psrlq $32,%mm3 + paddq %mm2,%mm3 + movq %mm3,32(%esp,%ebx,4) + incl %edx +L004outer: + xorl %ecx,%ecx + movd (%edi,%edx,4),%mm4 + movd (%esi),%mm5 + movd 32(%esp),%mm6 + movd (%ebp),%mm3 + pmuludq %mm4,%mm5 + paddq %mm6,%mm5 + movq %mm5,%mm0 + movq %mm5,%mm2 + pand %mm7,%mm0 + pmuludq 20(%esp),%mm5 + pmuludq %mm5,%mm3 + paddq %mm0,%mm3 + movd 36(%esp),%mm6 + movd 4(%ebp),%mm1 + movd 4(%esi),%mm0 + psrlq $32,%mm2 + psrlq $32,%mm3 + paddq %mm6,%mm2 + incl %ecx + decl %ebx +L005inner: + pmuludq %mm4,%mm0 + pmuludq %mm5,%mm1 + paddq %mm0,%mm2 + paddq %mm1,%mm3 + movq %mm2,%mm0 + movd 36(%esp,%ecx,4),%mm6 + pand %mm7,%mm0 + movd 4(%ebp,%ecx,4),%mm1 + paddq %mm0,%mm3 + movd 4(%esi,%ecx,4),%mm0 + psrlq $32,%mm2 + movd %mm3,28(%esp,%ecx,4) + psrlq $32,%mm3 + paddq %mm6,%mm2 + decl %ebx + leal 1(%ecx),%ecx + jnz L005inner + movl %ecx,%ebx + pmuludq %mm4,%mm0 + pmuludq %mm5,%mm1 + paddq %mm0,%mm2 + paddq %mm1,%mm3 + movq %mm2,%mm0 + pand %mm7,%mm0 + paddq %mm0,%mm3 + movd %mm3,28(%esp,%ecx,4) + psrlq $32,%mm2 + psrlq $32,%mm3 + movd 36(%esp,%ebx,4),%mm6 + paddq %mm2,%mm3 + paddq %mm6,%mm3 + movq %mm3,32(%esp,%ebx,4) + leal 1(%edx),%edx + cmpl %ebx,%edx + jle L004outer + emms + jmp L006common_tail +.align 4,0x90 +L002non_sse2: movl 8(%esp),%esi leal 1(%ebx),%ebp movl 12(%esp),%edi @@ -51,12 +171,12 @@ L_bn_mul_mont_begin: leal 4(%edi,%ebx,4),%eax orl %edx,%ebp movl (%edi),%edi - jz L001bn_sqr_mont + jz L007bn_sqr_mont movl %eax,28(%esp) movl (%esi),%eax xorl %edx,%edx .align 4,0x90 -L002mull: +L008mull: movl %edx,%ebp mull %edi addl %eax,%ebp @@ -65,7 +185,7 @@ L002mull: movl (%esi,%ecx,4),%eax cmpl %ebx,%ecx movl %ebp,28(%esp,%ecx,4) - jl L002mull + jl L008mull movl %edx,%ebp mull %edi movl 20(%esp),%edi @@ -83,9 +203,9 @@ L002mull: movl 4(%esi),%eax adcl $0,%edx incl %ecx - jmp L0032ndmadd + jmp L0092ndmadd .align 4,0x90 -L0041stmadd: +L0101stmadd: movl %edx,%ebp mull %edi addl 32(%esp,%ecx,4),%ebp @@ -96,7 +216,7 @@ L0041stmadd: adcl $0,%edx cmpl %ebx,%ecx movl %ebp,28(%esp,%ecx,4) - jl L0041stmadd + jl L0101stmadd movl %edx,%ebp mull %edi addl 32(%esp,%ebx,4),%eax @@ -119,7 +239,7 @@ L0041stmadd: adcl $0,%edx movl $1,%ecx .align 4,0x90 -L0032ndmadd: +L0092ndmadd: movl %edx,%ebp mull %edi addl 32(%esp,%ecx,4),%ebp @@ -130,7 +250,7 @@ L0032ndmadd: adcl $0,%edx cmpl %ebx,%ecx movl %ebp,24(%esp,%ecx,4) - jl L0032ndmadd + jl L0092ndmadd movl %edx,%ebp mull %edi addl 32(%esp,%ebx,4),%ebp @@ -146,16 +266,16 @@ L0032ndmadd: movl %edx,32(%esp,%ebx,4) cmpl 28(%esp),%ecx movl %eax,36(%esp,%ebx,4) - je L005common_tail + je L006common_tail movl (%ecx),%edi movl 8(%esp),%esi movl %ecx,12(%esp) xorl %ecx,%ecx xorl %edx,%edx movl (%esi),%eax - jmp L0041stmadd + jmp L0101stmadd .align 4,0x90 -L001bn_sqr_mont: +L007bn_sqr_mont: movl %ebx,(%esp) movl %ecx,12(%esp) movl %edi,%eax @@ -166,7 +286,7 @@ L001bn_sqr_mont: andl $1,%ebx incl %ecx .align 4,0x90 -L006sqr: +L011sqr: movl (%esi,%ecx,4),%eax movl %edx,%ebp mull %edi @@ -178,7 +298,7 @@ L006sqr: cmpl (%esp),%ecx movl %eax,%ebx movl %ebp,28(%esp,%ecx,4) - jl L006sqr + jl L011sqr movl (%esi,%ecx,4),%eax movl %edx,%ebp mull %edi @@ -202,7 +322,7 @@ L006sqr: movl 4(%esi),%eax movl $1,%ecx .align 4,0x90 -L0073rdmadd: +L0123rdmadd: movl %edx,%ebp mull %edi addl 32(%esp,%ecx,4),%ebp @@ -221,7 +341,7 @@ L0073rdmadd: adcl $0,%edx cmpl %ebx,%ecx movl %ebp,24(%esp,%ecx,4) - jl L0073rdmadd + jl L0123rdmadd movl %edx,%ebp mull %edi addl 32(%esp,%ebx,4),%ebp @@ -237,7 +357,7 @@ L0073rdmadd: movl %edx,32(%esp,%ebx,4) cmpl %ebx,%ecx movl %eax,36(%esp,%ebx,4) - je L005common_tail + je L006common_tail movl 4(%esi,%ecx,4),%edi leal 1(%ecx),%ecx movl %edi,%eax @@ -249,12 +369,12 @@ L0073rdmadd: xorl %ebp,%ebp cmpl %ebx,%ecx leal 1(%ecx),%ecx - je L008sqrlast + je L013sqrlast movl %edx,%ebx shrl $1,%edx andl $1,%ebx .align 4,0x90 -L009sqradd: +L014sqradd: movl (%esi,%ecx,4),%eax movl %edx,%ebp mull %edi @@ -270,13 +390,13 @@ L009sqradd: cmpl (%esp),%ecx movl %ebp,28(%esp,%ecx,4) movl %eax,%ebx - jle L009sqradd + jle L014sqradd movl %edx,%ebp addl %edx,%edx shrl $31,%ebp addl %ebx,%edx adcl $0,%ebp -L008sqrlast: +L013sqrlast: movl 20(%esp),%edi movl 16(%esp),%esi imull 32(%esp),%edi @@ -291,9 +411,9 @@ L008sqrlast: adcl $0,%edx movl $1,%ecx movl 4(%esi),%eax - jmp L0073rdmadd + jmp L0123rdmadd .align 4,0x90 -L005common_tail: +L006common_tail: movl 16(%esp),%ebp movl 4(%esp),%edi leal 32(%esp),%esi @@ -301,13 +421,13 @@ L005common_tail: movl %ebx,%ecx xorl %edx,%edx .align 4,0x90 -L010sub: +L015sub: sbbl (%ebp,%edx,4),%eax movl %eax,(%edi,%edx,4) decl %ecx movl 4(%esi,%edx,4),%eax leal 1(%edx),%edx - jge L010sub + jge L015sub sbbl $0,%eax andl %eax,%esi notl %eax @@ -315,12 +435,12 @@ L010sub: andl %eax,%ebp orl %ebp,%esi .align 4,0x90 -L011copy: +L016copy: movl (%esi,%ebx,4),%eax movl %eax,(%edi,%ebx,4) movl %ecx,32(%esp,%ebx,4) decl %ebx - jge L011copy + jge L016copy movl 24(%esp),%esp movl $1,%eax L000just_leave: @@ -334,3 +454,8 @@ L000just_leave: .byte 54,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121 .byte 32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46 .byte 111,114,103,62,0 +.section __IMPORT,__pointers,non_lazy_symbol_pointers +L_OPENSSL_ia32cap_P$non_lazy_ptr: +.indirect_symbol _OPENSSL_ia32cap_P +.long 0 +.comm _OPENSSL_ia32cap_P,16,2 diff --git a/deps/openssl/asm/x86-macosx-gas/cast/cast-586.s b/deps/openssl/asm/x86-macosx-gas/cast/cast-586.s index 9314dff21d423e..3e797b6145f527 100644 --- a/deps/openssl/asm/x86-macosx-gas/cast/cast-586.s +++ b/deps/openssl/asm/x86-macosx-gas/cast/cast-586.s @@ -12,16 +12,13 @@ L_CAST_encrypt_begin: pushl %esi pushl %edi # Load the 2 words - movl (%ebx),%edi movl 4(%ebx),%esi # Get short key flag - movl 128(%ebp),%eax pushl %eax xorl %eax,%eax # round 0 - movl (%ebp),%edx movl 4(%ebp),%ecx addl %esi,%edx @@ -43,7 +40,6 @@ L_CAST_encrypt_begin: addl %ebx,%ecx xorl %ecx,%edi # round 1 - movl 8(%ebp),%edx movl 12(%ebp),%ecx xorl %edi,%edx @@ -65,7 +61,6 @@ L_CAST_encrypt_begin: xorl %ebx,%ecx xorl %ecx,%esi # round 2 - movl 16(%ebp),%edx movl 20(%ebp),%ecx subl %esi,%edx @@ -87,7 +82,6 @@ L_CAST_encrypt_begin: subl %ebx,%ecx xorl %ecx,%edi # round 3 - movl 24(%ebp),%edx movl 28(%ebp),%ecx addl %edi,%edx @@ -109,7 +103,6 @@ L_CAST_encrypt_begin: addl %ebx,%ecx xorl %ecx,%esi # round 4 - movl 32(%ebp),%edx movl 36(%ebp),%ecx xorl %esi,%edx @@ -131,7 +124,6 @@ L_CAST_encrypt_begin: xorl %ebx,%ecx xorl %ecx,%edi # round 5 - movl 40(%ebp),%edx movl 44(%ebp),%ecx subl %edi,%edx @@ -153,7 +145,6 @@ L_CAST_encrypt_begin: subl %ebx,%ecx xorl %ecx,%esi # round 6 - movl 48(%ebp),%edx movl 52(%ebp),%ecx addl %esi,%edx @@ -175,7 +166,6 @@ L_CAST_encrypt_begin: addl %ebx,%ecx xorl %ecx,%edi # round 7 - movl 56(%ebp),%edx movl 60(%ebp),%ecx xorl %edi,%edx @@ -197,7 +187,6 @@ L_CAST_encrypt_begin: xorl %ebx,%ecx xorl %ecx,%esi # round 8 - movl 64(%ebp),%edx movl 68(%ebp),%ecx subl %esi,%edx @@ -219,7 +208,6 @@ L_CAST_encrypt_begin: subl %ebx,%ecx xorl %ecx,%edi # round 9 - movl 72(%ebp),%edx movl 76(%ebp),%ecx addl %edi,%edx @@ -241,7 +229,6 @@ L_CAST_encrypt_begin: addl %ebx,%ecx xorl %ecx,%esi # round 10 - movl 80(%ebp),%edx movl 84(%ebp),%ecx xorl %esi,%edx @@ -263,7 +250,6 @@ L_CAST_encrypt_begin: xorl %ebx,%ecx xorl %ecx,%edi # round 11 - movl 88(%ebp),%edx movl 92(%ebp),%ecx subl %edi,%edx @@ -285,12 +271,10 @@ L_CAST_encrypt_begin: subl %ebx,%ecx xorl %ecx,%esi # test short key flag - popl %edx orl %edx,%edx jnz L000cast_enc_done # round 12 - movl 96(%ebp),%edx movl 100(%ebp),%ecx addl %esi,%edx @@ -312,7 +296,6 @@ L_CAST_encrypt_begin: addl %ebx,%ecx xorl %ecx,%edi # round 13 - movl 104(%ebp),%edx movl 108(%ebp),%ecx xorl %edi,%edx @@ -334,7 +317,6 @@ L_CAST_encrypt_begin: xorl %ebx,%ecx xorl %ecx,%esi # round 14 - movl 112(%ebp),%edx movl 116(%ebp),%ecx subl %esi,%edx @@ -356,7 +338,6 @@ L_CAST_encrypt_begin: subl %ebx,%ecx xorl %ecx,%edi # round 15 - movl 120(%ebp),%edx movl 124(%ebp),%ecx addl %edi,%edx @@ -399,17 +380,14 @@ L_CAST_decrypt_begin: pushl %esi pushl %edi # Load the 2 words - movl (%ebx),%edi movl 4(%ebx),%esi # Get short key flag - movl 128(%ebp),%eax orl %eax,%eax jnz L001cast_dec_skip xorl %eax,%eax # round 15 - movl 120(%ebp),%edx movl 124(%ebp),%ecx addl %esi,%edx @@ -431,7 +409,6 @@ L_CAST_decrypt_begin: addl %ebx,%ecx xorl %ecx,%edi # round 14 - movl 112(%ebp),%edx movl 116(%ebp),%ecx subl %edi,%edx @@ -453,7 +430,6 @@ L_CAST_decrypt_begin: subl %ebx,%ecx xorl %ecx,%esi # round 13 - movl 104(%ebp),%edx movl 108(%ebp),%ecx xorl %esi,%edx @@ -475,7 +451,6 @@ L_CAST_decrypt_begin: xorl %ebx,%ecx xorl %ecx,%edi # round 12 - movl 96(%ebp),%edx movl 100(%ebp),%ecx addl %edi,%edx @@ -498,7 +473,6 @@ L_CAST_decrypt_begin: xorl %ecx,%esi L001cast_dec_skip: # round 11 - movl 88(%ebp),%edx movl 92(%ebp),%ecx subl %esi,%edx @@ -520,7 +494,6 @@ L001cast_dec_skip: subl %ebx,%ecx xorl %ecx,%edi # round 10 - movl 80(%ebp),%edx movl 84(%ebp),%ecx xorl %edi,%edx @@ -542,7 +515,6 @@ L001cast_dec_skip: xorl %ebx,%ecx xorl %ecx,%esi # round 9 - movl 72(%ebp),%edx movl 76(%ebp),%ecx addl %esi,%edx @@ -564,7 +536,6 @@ L001cast_dec_skip: addl %ebx,%ecx xorl %ecx,%edi # round 8 - movl 64(%ebp),%edx movl 68(%ebp),%ecx subl %edi,%edx @@ -586,7 +557,6 @@ L001cast_dec_skip: subl %ebx,%ecx xorl %ecx,%esi # round 7 - movl 56(%ebp),%edx movl 60(%ebp),%ecx xorl %esi,%edx @@ -608,7 +578,6 @@ L001cast_dec_skip: xorl %ebx,%ecx xorl %ecx,%edi # round 6 - movl 48(%ebp),%edx movl 52(%ebp),%ecx addl %edi,%edx @@ -630,7 +599,6 @@ L001cast_dec_skip: addl %ebx,%ecx xorl %ecx,%esi # round 5 - movl 40(%ebp),%edx movl 44(%ebp),%ecx subl %esi,%edx @@ -652,7 +620,6 @@ L001cast_dec_skip: subl %ebx,%ecx xorl %ecx,%edi # round 4 - movl 32(%ebp),%edx movl 36(%ebp),%ecx xorl %edi,%edx @@ -674,7 +641,6 @@ L001cast_dec_skip: xorl %ebx,%ecx xorl %ecx,%esi # round 3 - movl 24(%ebp),%edx movl 28(%ebp),%ecx addl %esi,%edx @@ -696,7 +662,6 @@ L001cast_dec_skip: addl %ebx,%ecx xorl %ecx,%edi # round 2 - movl 16(%ebp),%edx movl 20(%ebp),%ecx subl %edi,%edx @@ -718,7 +683,6 @@ L001cast_dec_skip: subl %ebx,%ecx xorl %ecx,%esi # round 1 - movl 8(%ebp),%edx movl 12(%ebp),%ecx xorl %esi,%edx @@ -740,7 +704,6 @@ L001cast_dec_skip: xorl %ebx,%ecx xorl %ecx,%edi # round 0 - movl (%ebp),%edx movl 4(%ebp),%ecx addl %edi,%edx @@ -781,7 +744,6 @@ L_CAST_cbc_encrypt_begin: pushl %edi movl 28(%esp),%ebp # getting iv ptr from parameter 4 - movl 36(%esp),%ebx movl (%ebx),%esi movl 4(%ebx),%edi @@ -793,10 +755,8 @@ L_CAST_cbc_encrypt_begin: movl 36(%esp),%esi movl 40(%esp),%edi # getting encrypt flag from parameter 5 - movl 56(%esp),%ecx # get and push parameter 3 - movl 48(%esp),%eax pushl %eax pushl %ebx diff --git a/deps/openssl/asm/x86-macosx-gas/des/crypt586.s b/deps/openssl/asm/x86-macosx-gas/des/crypt586.s index 7d0074ec2a9337..1731c53faac06e 100644 --- a/deps/openssl/asm/x86-macosx-gas/des/crypt586.s +++ b/deps/openssl/asm/x86-macosx-gas/des/crypt586.s @@ -10,7 +10,6 @@ L_fcrypt_body_begin: pushl %edi # Load the 2 words - xorl %edi,%edi xorl %esi,%esi call L000PIC_me_up @@ -23,7 +22,6 @@ L000PIC_me_up: L001start: # Round 0 - movl 36(%esp),%eax movl %esi,%edx shrl $16,%edx @@ -74,7 +72,6 @@ L001start: movl 32(%esp),%ebp # Round 1 - movl 36(%esp),%eax movl %edi,%edx shrl $16,%edx @@ -125,7 +122,6 @@ L001start: movl 32(%esp),%ebp # Round 2 - movl 36(%esp),%eax movl %esi,%edx shrl $16,%edx @@ -176,7 +172,6 @@ L001start: movl 32(%esp),%ebp # Round 3 - movl 36(%esp),%eax movl %edi,%edx shrl $16,%edx @@ -227,7 +222,6 @@ L001start: movl 32(%esp),%ebp # Round 4 - movl 36(%esp),%eax movl %esi,%edx shrl $16,%edx @@ -278,7 +272,6 @@ L001start: movl 32(%esp),%ebp # Round 5 - movl 36(%esp),%eax movl %edi,%edx shrl $16,%edx @@ -329,7 +322,6 @@ L001start: movl 32(%esp),%ebp # Round 6 - movl 36(%esp),%eax movl %esi,%edx shrl $16,%edx @@ -380,7 +372,6 @@ L001start: movl 32(%esp),%ebp # Round 7 - movl 36(%esp),%eax movl %edi,%edx shrl $16,%edx @@ -431,7 +422,6 @@ L001start: movl 32(%esp),%ebp # Round 8 - movl 36(%esp),%eax movl %esi,%edx shrl $16,%edx @@ -482,7 +472,6 @@ L001start: movl 32(%esp),%ebp # Round 9 - movl 36(%esp),%eax movl %edi,%edx shrl $16,%edx @@ -533,7 +522,6 @@ L001start: movl 32(%esp),%ebp # Round 10 - movl 36(%esp),%eax movl %esi,%edx shrl $16,%edx @@ -584,7 +572,6 @@ L001start: movl 32(%esp),%ebp # Round 11 - movl 36(%esp),%eax movl %edi,%edx shrl $16,%edx @@ -635,7 +622,6 @@ L001start: movl 32(%esp),%ebp # Round 12 - movl 36(%esp),%eax movl %esi,%edx shrl $16,%edx @@ -686,7 +672,6 @@ L001start: movl 32(%esp),%ebp # Round 13 - movl 36(%esp),%eax movl %edi,%edx shrl $16,%edx @@ -737,7 +722,6 @@ L001start: movl 32(%esp),%ebp # Round 14 - movl 36(%esp),%eax movl %esi,%edx shrl $16,%edx @@ -788,7 +772,6 @@ L001start: movl 32(%esp),%ebp # Round 15 - movl 36(%esp),%eax movl %edi,%edx shrl $16,%edx @@ -846,7 +829,6 @@ L001start: jnz L001start # FP - movl 28(%esp),%edx rorl $1,%edi movl %esi,%eax diff --git a/deps/openssl/asm/x86-macosx-gas/des/des-586.s b/deps/openssl/asm/x86-macosx-gas/des/des-586.s index f9e0ad333762bf..43354871fcd4fa 100644 --- a/deps/openssl/asm/x86-macosx-gas/des/des-586.s +++ b/deps/openssl/asm/x86-macosx-gas/des/des-586.s @@ -5,7 +5,6 @@ __x86_DES_encrypt: pushl %ecx # Round 0 - movl (%ecx),%eax xorl %ebx,%ebx movl 4(%ecx),%edx @@ -35,7 +34,6 @@ __x86_DES_encrypt: xorl 0x400(%ebp,%eax,1),%edi xorl 0x500(%ebp,%edx,1),%edi # Round 1 - movl 8(%ecx),%eax xorl %ebx,%ebx movl 12(%ecx),%edx @@ -65,7 +63,6 @@ __x86_DES_encrypt: xorl 0x400(%ebp,%eax,1),%esi xorl 0x500(%ebp,%edx,1),%esi # Round 2 - movl 16(%ecx),%eax xorl %ebx,%ebx movl 20(%ecx),%edx @@ -95,7 +92,6 @@ __x86_DES_encrypt: xorl 0x400(%ebp,%eax,1),%edi xorl 0x500(%ebp,%edx,1),%edi # Round 3 - movl 24(%ecx),%eax xorl %ebx,%ebx movl 28(%ecx),%edx @@ -125,7 +121,6 @@ __x86_DES_encrypt: xorl 0x400(%ebp,%eax,1),%esi xorl 0x500(%ebp,%edx,1),%esi # Round 4 - movl 32(%ecx),%eax xorl %ebx,%ebx movl 36(%ecx),%edx @@ -155,7 +150,6 @@ __x86_DES_encrypt: xorl 0x400(%ebp,%eax,1),%edi xorl 0x500(%ebp,%edx,1),%edi # Round 5 - movl 40(%ecx),%eax xorl %ebx,%ebx movl 44(%ecx),%edx @@ -185,7 +179,6 @@ __x86_DES_encrypt: xorl 0x400(%ebp,%eax,1),%esi xorl 0x500(%ebp,%edx,1),%esi # Round 6 - movl 48(%ecx),%eax xorl %ebx,%ebx movl 52(%ecx),%edx @@ -215,7 +208,6 @@ __x86_DES_encrypt: xorl 0x400(%ebp,%eax,1),%edi xorl 0x500(%ebp,%edx,1),%edi # Round 7 - movl 56(%ecx),%eax xorl %ebx,%ebx movl 60(%ecx),%edx @@ -245,7 +237,6 @@ __x86_DES_encrypt: xorl 0x400(%ebp,%eax,1),%esi xorl 0x500(%ebp,%edx,1),%esi # Round 8 - movl 64(%ecx),%eax xorl %ebx,%ebx movl 68(%ecx),%edx @@ -275,7 +266,6 @@ __x86_DES_encrypt: xorl 0x400(%ebp,%eax,1),%edi xorl 0x500(%ebp,%edx,1),%edi # Round 9 - movl 72(%ecx),%eax xorl %ebx,%ebx movl 76(%ecx),%edx @@ -305,7 +295,6 @@ __x86_DES_encrypt: xorl 0x400(%ebp,%eax,1),%esi xorl 0x500(%ebp,%edx,1),%esi # Round 10 - movl 80(%ecx),%eax xorl %ebx,%ebx movl 84(%ecx),%edx @@ -335,7 +324,6 @@ __x86_DES_encrypt: xorl 0x400(%ebp,%eax,1),%edi xorl 0x500(%ebp,%edx,1),%edi # Round 11 - movl 88(%ecx),%eax xorl %ebx,%ebx movl 92(%ecx),%edx @@ -365,7 +353,6 @@ __x86_DES_encrypt: xorl 0x400(%ebp,%eax,1),%esi xorl 0x500(%ebp,%edx,1),%esi # Round 12 - movl 96(%ecx),%eax xorl %ebx,%ebx movl 100(%ecx),%edx @@ -395,7 +382,6 @@ __x86_DES_encrypt: xorl 0x400(%ebp,%eax,1),%edi xorl 0x500(%ebp,%edx,1),%edi # Round 13 - movl 104(%ecx),%eax xorl %ebx,%ebx movl 108(%ecx),%edx @@ -425,7 +411,6 @@ __x86_DES_encrypt: xorl 0x400(%ebp,%eax,1),%esi xorl 0x500(%ebp,%edx,1),%esi # Round 14 - movl 112(%ecx),%eax xorl %ebx,%ebx movl 116(%ecx),%edx @@ -455,7 +440,6 @@ __x86_DES_encrypt: xorl 0x400(%ebp,%eax,1),%edi xorl 0x500(%ebp,%edx,1),%edi # Round 15 - movl 120(%ecx),%eax xorl %ebx,%ebx movl 124(%ecx),%edx @@ -490,7 +474,6 @@ __x86_DES_encrypt: __x86_DES_decrypt: pushl %ecx # Round 15 - movl 120(%ecx),%eax xorl %ebx,%ebx movl 124(%ecx),%edx @@ -520,7 +503,6 @@ __x86_DES_decrypt: xorl 0x400(%ebp,%eax,1),%edi xorl 0x500(%ebp,%edx,1),%edi # Round 14 - movl 112(%ecx),%eax xorl %ebx,%ebx movl 116(%ecx),%edx @@ -550,7 +532,6 @@ __x86_DES_decrypt: xorl 0x400(%ebp,%eax,1),%esi xorl 0x500(%ebp,%edx,1),%esi # Round 13 - movl 104(%ecx),%eax xorl %ebx,%ebx movl 108(%ecx),%edx @@ -580,7 +561,6 @@ __x86_DES_decrypt: xorl 0x400(%ebp,%eax,1),%edi xorl 0x500(%ebp,%edx,1),%edi # Round 12 - movl 96(%ecx),%eax xorl %ebx,%ebx movl 100(%ecx),%edx @@ -610,7 +590,6 @@ __x86_DES_decrypt: xorl 0x400(%ebp,%eax,1),%esi xorl 0x500(%ebp,%edx,1),%esi # Round 11 - movl 88(%ecx),%eax xorl %ebx,%ebx movl 92(%ecx),%edx @@ -640,7 +619,6 @@ __x86_DES_decrypt: xorl 0x400(%ebp,%eax,1),%edi xorl 0x500(%ebp,%edx,1),%edi # Round 10 - movl 80(%ecx),%eax xorl %ebx,%ebx movl 84(%ecx),%edx @@ -670,7 +648,6 @@ __x86_DES_decrypt: xorl 0x400(%ebp,%eax,1),%esi xorl 0x500(%ebp,%edx,1),%esi # Round 9 - movl 72(%ecx),%eax xorl %ebx,%ebx movl 76(%ecx),%edx @@ -700,7 +677,6 @@ __x86_DES_decrypt: xorl 0x400(%ebp,%eax,1),%edi xorl 0x500(%ebp,%edx,1),%edi # Round 8 - movl 64(%ecx),%eax xorl %ebx,%ebx movl 68(%ecx),%edx @@ -730,7 +706,6 @@ __x86_DES_decrypt: xorl 0x400(%ebp,%eax,1),%esi xorl 0x500(%ebp,%edx,1),%esi # Round 7 - movl 56(%ecx),%eax xorl %ebx,%ebx movl 60(%ecx),%edx @@ -760,7 +735,6 @@ __x86_DES_decrypt: xorl 0x400(%ebp,%eax,1),%edi xorl 0x500(%ebp,%edx,1),%edi # Round 6 - movl 48(%ecx),%eax xorl %ebx,%ebx movl 52(%ecx),%edx @@ -790,7 +764,6 @@ __x86_DES_decrypt: xorl 0x400(%ebp,%eax,1),%esi xorl 0x500(%ebp,%edx,1),%esi # Round 5 - movl 40(%ecx),%eax xorl %ebx,%ebx movl 44(%ecx),%edx @@ -820,7 +793,6 @@ __x86_DES_decrypt: xorl 0x400(%ebp,%eax,1),%edi xorl 0x500(%ebp,%edx,1),%edi # Round 4 - movl 32(%ecx),%eax xorl %ebx,%ebx movl 36(%ecx),%edx @@ -850,7 +822,6 @@ __x86_DES_decrypt: xorl 0x400(%ebp,%eax,1),%esi xorl 0x500(%ebp,%edx,1),%esi # Round 3 - movl 24(%ecx),%eax xorl %ebx,%ebx movl 28(%ecx),%edx @@ -880,7 +851,6 @@ __x86_DES_decrypt: xorl 0x400(%ebp,%eax,1),%edi xorl 0x500(%ebp,%edx,1),%edi # Round 2 - movl 16(%ecx),%eax xorl %ebx,%ebx movl 20(%ecx),%edx @@ -910,7 +880,6 @@ __x86_DES_decrypt: xorl 0x400(%ebp,%eax,1),%esi xorl 0x500(%ebp,%edx,1),%esi # Round 1 - movl 8(%ecx),%eax xorl %ebx,%ebx movl 12(%ecx),%edx @@ -940,7 +909,6 @@ __x86_DES_decrypt: xorl 0x400(%ebp,%eax,1),%edi xorl 0x500(%ebp,%edx,1),%edi # Round 0 - movl (%ecx),%eax xorl %ebx,%ebx movl 4(%ecx),%edx @@ -979,7 +947,6 @@ L_DES_encrypt1_begin: pushl %edi # Load the 2 words - movl 12(%esp),%esi xorl %ecx,%ecx pushl %ebx @@ -989,7 +956,6 @@ L_DES_encrypt1_begin: movl 4(%esi),%edi # IP - roll $4,%eax movl %eax,%esi xorl %edi,%eax @@ -1029,7 +995,7 @@ L_DES_encrypt1_begin: call L000pic_point L000pic_point: popl %ebp - leal _DES_SPtrans-L000pic_point(%ebp),%ebp + leal Ldes_sptrans-L000pic_point(%ebp),%ebp movl 24(%esp),%ecx cmpl $0,%ebx je L001decrypt @@ -1040,7 +1006,6 @@ L001decrypt: L002done: # FP - movl 20(%esp),%edx rorl $1,%esi movl %edi,%eax @@ -1093,7 +1058,6 @@ L_DES_encrypt2_begin: pushl %edi # Load the 2 words - movl 12(%esp),%eax xorl %ecx,%ecx pushl %ebx @@ -1106,7 +1070,7 @@ L_DES_encrypt2_begin: call L003pic_point L003pic_point: popl %ebp - leal _DES_SPtrans-L003pic_point(%ebp),%ebp + leal Ldes_sptrans-L003pic_point(%ebp),%ebp movl 24(%esp),%ecx cmpl $0,%ebx je L004decrypt @@ -1117,7 +1081,6 @@ L004decrypt: L005done: # Fixup - rorl $3,%edi movl 20(%esp),%eax rorl $3,%esi @@ -1139,13 +1102,11 @@ L_DES_encrypt3_begin: pushl %edi # Load the data words - movl (%ebx),%edi movl 4(%ebx),%esi subl $12,%esp # IP - roll $4,%edi movl %edi,%edx xorl %esi,%edi @@ -1205,7 +1166,6 @@ L_DES_encrypt3_begin: movl 4(%ebx),%esi # FP - roll $2,%esi roll $3,%edi movl %edi,%eax @@ -1261,13 +1221,11 @@ L_DES_decrypt3_begin: pushl %edi # Load the data words - movl (%ebx),%edi movl 4(%ebx),%esi subl $12,%esp # IP - roll $4,%edi movl %edi,%edx xorl %esi,%edi @@ -1327,7 +1285,6 @@ L_DES_decrypt3_begin: movl 4(%ebx),%esi # FP - roll $2,%esi roll $3,%edi movl %edi,%eax @@ -1383,7 +1340,6 @@ L_DES_ncbc_encrypt_begin: pushl %edi movl 28(%esp),%ebp # getting iv ptr from parameter 4 - movl 36(%esp),%ebx movl (%ebx),%esi movl 4(%ebx),%edi @@ -1395,13 +1351,10 @@ L_DES_ncbc_encrypt_begin: movl 36(%esp),%esi movl 40(%esp),%edi # getting encrypt flag from parameter 5 - movl 56(%esp),%ecx # get and push parameter 5 - pushl %ecx # get and push parameter 3 - movl 52(%esp),%eax pushl %eax pushl %ebx @@ -1565,7 +1518,6 @@ L_DES_ede3_cbc_encrypt_begin: pushl %edi movl 28(%esp),%ebp # getting iv ptr from parameter 6 - movl 44(%esp),%ebx movl (%ebx),%esi movl 4(%ebx),%edi @@ -1577,18 +1529,14 @@ L_DES_ede3_cbc_encrypt_begin: movl 36(%esp),%esi movl 40(%esp),%edi # getting encrypt flag from parameter 7 - movl 64(%esp),%ecx # get and push parameter 5 - movl 56(%esp),%eax pushl %eax # get and push parameter 4 - movl 56(%esp),%eax pushl %eax # get and push parameter 3 - movl 56(%esp),%eax pushl %eax pushl %ebx @@ -1743,6 +1691,7 @@ L035cbc_enc_jmp_table: .align 6,0x90 .align 6,0x90 _DES_SPtrans: +Ldes_sptrans: .long 34080768,524288,33554434,34080770 .long 33554432,526338,524290,33554434 .long 526338,34080768,34078720,2050 diff --git a/deps/openssl/asm/x86-macosx-gas/md5/md5-586.s b/deps/openssl/asm/x86-macosx-gas/md5/md5-586.s index 5336574c86f657..cd5dd459ca0337 100644 --- a/deps/openssl/asm/x86-macosx-gas/md5/md5-586.s +++ b/deps/openssl/asm/x86-macosx-gas/md5/md5-586.s @@ -22,11 +22,9 @@ L_md5_block_asm_data_order_begin: L000start: # R0 section - movl %ecx,%edi movl (%esi),%ebp # R0 0 - xorl %edx,%edi andl %ebx,%edi leal 3614090360(%eax,%ebp,1),%eax @@ -37,7 +35,6 @@ L000start: movl 4(%esi),%ebp addl %ebx,%eax # R0 1 - xorl %ecx,%edi andl %eax,%edi leal 3905402710(%edx,%ebp,1),%edx @@ -48,7 +45,6 @@ L000start: movl 8(%esi),%ebp addl %eax,%edx # R0 2 - xorl %ebx,%edi andl %edx,%edi leal 606105819(%ecx,%ebp,1),%ecx @@ -59,7 +55,6 @@ L000start: movl 12(%esi),%ebp addl %edx,%ecx # R0 3 - xorl %eax,%edi andl %ecx,%edi leal 3250441966(%ebx,%ebp,1),%ebx @@ -70,7 +65,6 @@ L000start: movl 16(%esi),%ebp addl %ecx,%ebx # R0 4 - xorl %edx,%edi andl %ebx,%edi leal 4118548399(%eax,%ebp,1),%eax @@ -81,7 +75,6 @@ L000start: movl 20(%esi),%ebp addl %ebx,%eax # R0 5 - xorl %ecx,%edi andl %eax,%edi leal 1200080426(%edx,%ebp,1),%edx @@ -92,7 +85,6 @@ L000start: movl 24(%esi),%ebp addl %eax,%edx # R0 6 - xorl %ebx,%edi andl %edx,%edi leal 2821735955(%ecx,%ebp,1),%ecx @@ -103,7 +95,6 @@ L000start: movl 28(%esi),%ebp addl %edx,%ecx # R0 7 - xorl %eax,%edi andl %ecx,%edi leal 4249261313(%ebx,%ebp,1),%ebx @@ -114,7 +105,6 @@ L000start: movl 32(%esi),%ebp addl %ecx,%ebx # R0 8 - xorl %edx,%edi andl %ebx,%edi leal 1770035416(%eax,%ebp,1),%eax @@ -125,7 +115,6 @@ L000start: movl 36(%esi),%ebp addl %ebx,%eax # R0 9 - xorl %ecx,%edi andl %eax,%edi leal 2336552879(%edx,%ebp,1),%edx @@ -136,7 +125,6 @@ L000start: movl 40(%esi),%ebp addl %eax,%edx # R0 10 - xorl %ebx,%edi andl %edx,%edi leal 4294925233(%ecx,%ebp,1),%ecx @@ -147,7 +135,6 @@ L000start: movl 44(%esi),%ebp addl %edx,%ecx # R0 11 - xorl %eax,%edi andl %ecx,%edi leal 2304563134(%ebx,%ebp,1),%ebx @@ -158,7 +145,6 @@ L000start: movl 48(%esi),%ebp addl %ecx,%ebx # R0 12 - xorl %edx,%edi andl %ebx,%edi leal 1804603682(%eax,%ebp,1),%eax @@ -169,7 +155,6 @@ L000start: movl 52(%esi),%ebp addl %ebx,%eax # R0 13 - xorl %ecx,%edi andl %eax,%edi leal 4254626195(%edx,%ebp,1),%edx @@ -180,7 +165,6 @@ L000start: movl 56(%esi),%ebp addl %eax,%edx # R0 14 - xorl %ebx,%edi andl %edx,%edi leal 2792965006(%ecx,%ebp,1),%ecx @@ -191,7 +175,6 @@ L000start: movl 60(%esi),%ebp addl %edx,%ecx # R0 15 - xorl %eax,%edi andl %ecx,%edi leal 1236535329(%ebx,%ebp,1),%ebx @@ -203,9 +186,7 @@ L000start: addl %ecx,%ebx # R1 section - # R1 16 - leal 4129170786(%eax,%ebp,1),%eax xorl %ebx,%edi andl %edx,%edi @@ -216,7 +197,6 @@ L000start: roll $5,%eax addl %ebx,%eax # R1 17 - leal 3225465664(%edx,%ebp,1),%edx xorl %eax,%edi andl %ecx,%edi @@ -227,7 +207,6 @@ L000start: roll $9,%edx addl %eax,%edx # R1 18 - leal 643717713(%ecx,%ebp,1),%ecx xorl %edx,%edi andl %ebx,%edi @@ -238,7 +217,6 @@ L000start: roll $14,%ecx addl %edx,%ecx # R1 19 - leal 3921069994(%ebx,%ebp,1),%ebx xorl %ecx,%edi andl %eax,%edi @@ -249,7 +227,6 @@ L000start: roll $20,%ebx addl %ecx,%ebx # R1 20 - leal 3593408605(%eax,%ebp,1),%eax xorl %ebx,%edi andl %edx,%edi @@ -260,7 +237,6 @@ L000start: roll $5,%eax addl %ebx,%eax # R1 21 - leal 38016083(%edx,%ebp,1),%edx xorl %eax,%edi andl %ecx,%edi @@ -271,7 +247,6 @@ L000start: roll $9,%edx addl %eax,%edx # R1 22 - leal 3634488961(%ecx,%ebp,1),%ecx xorl %edx,%edi andl %ebx,%edi @@ -282,7 +257,6 @@ L000start: roll $14,%ecx addl %edx,%ecx # R1 23 - leal 3889429448(%ebx,%ebp,1),%ebx xorl %ecx,%edi andl %eax,%edi @@ -293,7 +267,6 @@ L000start: roll $20,%ebx addl %ecx,%ebx # R1 24 - leal 568446438(%eax,%ebp,1),%eax xorl %ebx,%edi andl %edx,%edi @@ -304,7 +277,6 @@ L000start: roll $5,%eax addl %ebx,%eax # R1 25 - leal 3275163606(%edx,%ebp,1),%edx xorl %eax,%edi andl %ecx,%edi @@ -315,7 +287,6 @@ L000start: roll $9,%edx addl %eax,%edx # R1 26 - leal 4107603335(%ecx,%ebp,1),%ecx xorl %edx,%edi andl %ebx,%edi @@ -326,7 +297,6 @@ L000start: roll $14,%ecx addl %edx,%ecx # R1 27 - leal 1163531501(%ebx,%ebp,1),%ebx xorl %ecx,%edi andl %eax,%edi @@ -337,7 +307,6 @@ L000start: roll $20,%ebx addl %ecx,%ebx # R1 28 - leal 2850285829(%eax,%ebp,1),%eax xorl %ebx,%edi andl %edx,%edi @@ -348,7 +317,6 @@ L000start: roll $5,%eax addl %ebx,%eax # R1 29 - leal 4243563512(%edx,%ebp,1),%edx xorl %eax,%edi andl %ecx,%edi @@ -359,7 +327,6 @@ L000start: roll $9,%edx addl %eax,%edx # R1 30 - leal 1735328473(%ecx,%ebp,1),%ecx xorl %edx,%edi andl %ebx,%edi @@ -370,7 +337,6 @@ L000start: roll $14,%ecx addl %edx,%ecx # R1 31 - leal 2368359562(%ebx,%ebp,1),%ebx xorl %ecx,%edi andl %eax,%edi @@ -382,9 +348,7 @@ L000start: addl %ecx,%ebx # R2 section - # R2 32 - xorl %edx,%edi xorl %ebx,%edi leal 4294588738(%eax,%ebp,1),%eax @@ -393,7 +357,6 @@ L000start: movl 32(%esi),%ebp movl %ebx,%edi # R2 33 - leal 2272392833(%edx,%ebp,1),%edx addl %ebx,%eax xorl %ecx,%edi @@ -404,7 +367,6 @@ L000start: roll $11,%edx addl %eax,%edx # R2 34 - xorl %ebx,%edi xorl %edx,%edi leal 1839030562(%ecx,%ebp,1),%ecx @@ -413,7 +375,6 @@ L000start: movl 56(%esi),%ebp movl %edx,%edi # R2 35 - leal 4259657740(%ebx,%ebp,1),%ebx addl %edx,%ecx xorl %eax,%edi @@ -424,7 +385,6 @@ L000start: roll $23,%ebx addl %ecx,%ebx # R2 36 - xorl %edx,%edi xorl %ebx,%edi leal 2763975236(%eax,%ebp,1),%eax @@ -433,7 +393,6 @@ L000start: movl 16(%esi),%ebp movl %ebx,%edi # R2 37 - leal 1272893353(%edx,%ebp,1),%edx addl %ebx,%eax xorl %ecx,%edi @@ -444,7 +403,6 @@ L000start: roll $11,%edx addl %eax,%edx # R2 38 - xorl %ebx,%edi xorl %edx,%edi leal 4139469664(%ecx,%ebp,1),%ecx @@ -453,7 +411,6 @@ L000start: movl 40(%esi),%ebp movl %edx,%edi # R2 39 - leal 3200236656(%ebx,%ebp,1),%ebx addl %edx,%ecx xorl %eax,%edi @@ -464,7 +421,6 @@ L000start: roll $23,%ebx addl %ecx,%ebx # R2 40 - xorl %edx,%edi xorl %ebx,%edi leal 681279174(%eax,%ebp,1),%eax @@ -473,7 +429,6 @@ L000start: movl (%esi),%ebp movl %ebx,%edi # R2 41 - leal 3936430074(%edx,%ebp,1),%edx addl %ebx,%eax xorl %ecx,%edi @@ -484,7 +439,6 @@ L000start: roll $11,%edx addl %eax,%edx # R2 42 - xorl %ebx,%edi xorl %edx,%edi leal 3572445317(%ecx,%ebp,1),%ecx @@ -493,7 +447,6 @@ L000start: movl 24(%esi),%ebp movl %edx,%edi # R2 43 - leal 76029189(%ebx,%ebp,1),%ebx addl %edx,%ecx xorl %eax,%edi @@ -504,7 +457,6 @@ L000start: roll $23,%ebx addl %ecx,%ebx # R2 44 - xorl %edx,%edi xorl %ebx,%edi leal 3654602809(%eax,%ebp,1),%eax @@ -513,7 +465,6 @@ L000start: movl 48(%esi),%ebp movl %ebx,%edi # R2 45 - leal 3873151461(%edx,%ebp,1),%edx addl %ebx,%eax xorl %ecx,%edi @@ -524,7 +475,6 @@ L000start: roll $11,%edx addl %eax,%edx # R2 46 - xorl %ebx,%edi xorl %edx,%edi leal 530742520(%ecx,%ebp,1),%ecx @@ -533,7 +483,6 @@ L000start: movl 8(%esi),%ebp movl %edx,%edi # R2 47 - leal 3299628645(%ebx,%ebp,1),%ebx addl %edx,%ecx xorl %eax,%edi @@ -545,9 +494,7 @@ L000start: addl %ecx,%ebx # R3 section - # R3 48 - xorl %edx,%edi orl %ebx,%edi leal 4096336452(%eax,%ebp,1),%eax @@ -559,7 +506,6 @@ L000start: xorl %ecx,%edi addl %ebx,%eax # R3 49 - orl %eax,%edi leal 1126891415(%edx,%ebp,1),%edx xorl %ebx,%edi @@ -570,7 +516,6 @@ L000start: xorl %ebx,%edi addl %eax,%edx # R3 50 - orl %edx,%edi leal 2878612391(%ecx,%ebp,1),%ecx xorl %eax,%edi @@ -581,7 +526,6 @@ L000start: xorl %eax,%edi addl %edx,%ecx # R3 51 - orl %ecx,%edi leal 4237533241(%ebx,%ebp,1),%ebx xorl %edx,%edi @@ -592,7 +536,6 @@ L000start: xorl %edx,%edi addl %ecx,%ebx # R3 52 - orl %ebx,%edi leal 1700485571(%eax,%ebp,1),%eax xorl %ecx,%edi @@ -603,7 +546,6 @@ L000start: xorl %ecx,%edi addl %ebx,%eax # R3 53 - orl %eax,%edi leal 2399980690(%edx,%ebp,1),%edx xorl %ebx,%edi @@ -614,7 +556,6 @@ L000start: xorl %ebx,%edi addl %eax,%edx # R3 54 - orl %edx,%edi leal 4293915773(%ecx,%ebp,1),%ecx xorl %eax,%edi @@ -625,7 +566,6 @@ L000start: xorl %eax,%edi addl %edx,%ecx # R3 55 - orl %ecx,%edi leal 2240044497(%ebx,%ebp,1),%ebx xorl %edx,%edi @@ -636,7 +576,6 @@ L000start: xorl %edx,%edi addl %ecx,%ebx # R3 56 - orl %ebx,%edi leal 1873313359(%eax,%ebp,1),%eax xorl %ecx,%edi @@ -647,7 +586,6 @@ L000start: xorl %ecx,%edi addl %ebx,%eax # R3 57 - orl %eax,%edi leal 4264355552(%edx,%ebp,1),%edx xorl %ebx,%edi @@ -658,7 +596,6 @@ L000start: xorl %ebx,%edi addl %eax,%edx # R3 58 - orl %edx,%edi leal 2734768916(%ecx,%ebp,1),%ecx xorl %eax,%edi @@ -669,7 +606,6 @@ L000start: xorl %eax,%edi addl %edx,%ecx # R3 59 - orl %ecx,%edi leal 1309151649(%ebx,%ebp,1),%ebx xorl %edx,%edi @@ -680,7 +616,6 @@ L000start: xorl %edx,%edi addl %ecx,%ebx # R3 60 - orl %ebx,%edi leal 4149444226(%eax,%ebp,1),%eax xorl %ecx,%edi @@ -691,7 +626,6 @@ L000start: xorl %ecx,%edi addl %ebx,%eax # R3 61 - orl %eax,%edi leal 3174756917(%edx,%ebp,1),%edx xorl %ebx,%edi @@ -702,7 +636,6 @@ L000start: xorl %ebx,%edi addl %eax,%edx # R3 62 - orl %edx,%edi leal 718787259(%ecx,%ebp,1),%ecx xorl %eax,%edi @@ -713,7 +646,6 @@ L000start: xorl %eax,%edi addl %edx,%ecx # R3 63 - orl %ecx,%edi leal 3951481745(%ebx,%ebp,1),%ebx xorl %edx,%edi diff --git a/deps/openssl/asm/x86-macosx-gas/modes/ghash-x86.s b/deps/openssl/asm/x86-macosx-gas/modes/ghash-x86.s index dc6ba14e6956cb..c68edef5be97d8 100644 --- a/deps/openssl/asm/x86-macosx-gas/modes/ghash-x86.s +++ b/deps/openssl/asm/x86-macosx-gas/modes/ghash-x86.s @@ -199,415 +199,93 @@ L004x86_break: popl %ebx popl %ebp ret +.globl _gcm_gmult_4bit_mmx .align 4 -__mmx_gmult_4bit_inner: +_gcm_gmult_4bit_mmx: +L_gcm_gmult_4bit_mmx_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 20(%esp),%edi + movl 24(%esp),%esi + call L005pic_point +L005pic_point: + popl %eax + leal Lrem_4bit-L005pic_point(%eax),%eax + movzbl 15(%edi),%ebx xorl %ecx,%ecx movl %ebx,%edx movb %dl,%cl + movl $14,%ebp shlb $4,%cl andl $240,%edx movq 8(%esi,%ecx,1),%mm0 movq (%esi,%ecx,1),%mm1 - movd %mm0,%ebp - psrlq $4,%mm0 - movq %mm1,%mm2 - psrlq $4,%mm1 - pxor 8(%esi,%edx,1),%mm0 - movb 14(%edi),%cl - psllq $60,%mm2 - andl $15,%ebp - pxor (%esi,%edx,1),%mm1 - movl %ecx,%edx movd %mm0,%ebx - pxor %mm2,%mm0 - shlb $4,%cl + jmp L006mmx_loop +.align 4,0x90 +L006mmx_loop: psrlq $4,%mm0 - movq %mm1,%mm2 - psrlq $4,%mm1 - pxor 8(%esi,%ecx,1),%mm0 - psllq $60,%mm2 - andl $240,%edx - pxor (%eax,%ebp,8),%mm1 andl $15,%ebx - pxor (%esi,%ecx,1),%mm1 - movd %mm0,%ebp - pxor %mm2,%mm0 - psrlq $4,%mm0 movq %mm1,%mm2 psrlq $4,%mm1 pxor 8(%esi,%edx,1),%mm0 - movb 13(%edi),%cl + movb (%edi,%ebp,1),%cl psllq $60,%mm2 pxor (%eax,%ebx,8),%mm1 - andl $15,%ebp - pxor (%esi,%edx,1),%mm1 - movl %ecx,%edx + decl %ebp movd %mm0,%ebx - pxor %mm2,%mm0 - shlb $4,%cl - psrlq $4,%mm0 - movq %mm1,%mm2 - psrlq $4,%mm1 - pxor 8(%esi,%ecx,1),%mm0 - psllq $60,%mm2 - andl $240,%edx - pxor (%eax,%ebp,8),%mm1 - andl $15,%ebx - pxor (%esi,%ecx,1),%mm1 - movd %mm0,%ebp - pxor %mm2,%mm0 - psrlq $4,%mm0 - movq %mm1,%mm2 - psrlq $4,%mm1 - pxor 8(%esi,%edx,1),%mm0 - movb 12(%edi),%cl - psllq $60,%mm2 - pxor (%eax,%ebx,8),%mm1 - andl $15,%ebp pxor (%esi,%edx,1),%mm1 movl %ecx,%edx - movd %mm0,%ebx pxor %mm2,%mm0 + js L007mmx_break shlb $4,%cl - psrlq $4,%mm0 - movq %mm1,%mm2 - psrlq $4,%mm1 - pxor 8(%esi,%ecx,1),%mm0 - psllq $60,%mm2 - andl $240,%edx - pxor (%eax,%ebp,8),%mm1 andl $15,%ebx - pxor (%esi,%ecx,1),%mm1 - movd %mm0,%ebp - pxor %mm2,%mm0 - psrlq $4,%mm0 - movq %mm1,%mm2 - psrlq $4,%mm1 - pxor 8(%esi,%edx,1),%mm0 - movb 11(%edi),%cl - psllq $60,%mm2 - pxor (%eax,%ebx,8),%mm1 - andl $15,%ebp - pxor (%esi,%edx,1),%mm1 - movl %ecx,%edx - movd %mm0,%ebx - pxor %mm2,%mm0 - shlb $4,%cl psrlq $4,%mm0 - movq %mm1,%mm2 - psrlq $4,%mm1 - pxor 8(%esi,%ecx,1),%mm0 - psllq $60,%mm2 andl $240,%edx - pxor (%eax,%ebp,8),%mm1 - andl $15,%ebx - pxor (%esi,%ecx,1),%mm1 - movd %mm0,%ebp - pxor %mm2,%mm0 - psrlq $4,%mm0 - movq %mm1,%mm2 - psrlq $4,%mm1 - pxor 8(%esi,%edx,1),%mm0 - movb 10(%edi),%cl - psllq $60,%mm2 - pxor (%eax,%ebx,8),%mm1 - andl $15,%ebp - pxor (%esi,%edx,1),%mm1 - movl %ecx,%edx - movd %mm0,%ebx - pxor %mm2,%mm0 - shlb $4,%cl - psrlq $4,%mm0 movq %mm1,%mm2 psrlq $4,%mm1 pxor 8(%esi,%ecx,1),%mm0 psllq $60,%mm2 - andl $240,%edx - pxor (%eax,%ebp,8),%mm1 - andl $15,%ebx - pxor (%esi,%ecx,1),%mm1 - movd %mm0,%ebp - pxor %mm2,%mm0 - psrlq $4,%mm0 - movq %mm1,%mm2 - psrlq $4,%mm1 - pxor 8(%esi,%edx,1),%mm0 - movb 9(%edi),%cl - psllq $60,%mm2 pxor (%eax,%ebx,8),%mm1 - andl $15,%ebp - pxor (%esi,%edx,1),%mm1 - movl %ecx,%edx movd %mm0,%ebx - pxor %mm2,%mm0 - shlb $4,%cl - psrlq $4,%mm0 - movq %mm1,%mm2 - psrlq $4,%mm1 - pxor 8(%esi,%ecx,1),%mm0 - psllq $60,%mm2 - andl $240,%edx - pxor (%eax,%ebp,8),%mm1 - andl $15,%ebx pxor (%esi,%ecx,1),%mm1 - movd %mm0,%ebp - pxor %mm2,%mm0 - psrlq $4,%mm0 - movq %mm1,%mm2 - psrlq $4,%mm1 - pxor 8(%esi,%edx,1),%mm0 - movb 8(%edi),%cl - psllq $60,%mm2 - pxor (%eax,%ebx,8),%mm1 - andl $15,%ebp - pxor (%esi,%edx,1),%mm1 - movl %ecx,%edx - movd %mm0,%ebx pxor %mm2,%mm0 + jmp L006mmx_loop +.align 4,0x90 +L007mmx_break: shlb $4,%cl - psrlq $4,%mm0 - movq %mm1,%mm2 - psrlq $4,%mm1 - pxor 8(%esi,%ecx,1),%mm0 - psllq $60,%mm2 - andl $240,%edx - pxor (%eax,%ebp,8),%mm1 andl $15,%ebx - pxor (%esi,%ecx,1),%mm1 - movd %mm0,%ebp - pxor %mm2,%mm0 - psrlq $4,%mm0 - movq %mm1,%mm2 - psrlq $4,%mm1 - pxor 8(%esi,%edx,1),%mm0 - movb 7(%edi),%cl - psllq $60,%mm2 - pxor (%eax,%ebx,8),%mm1 - andl $15,%ebp - pxor (%esi,%edx,1),%mm1 - movl %ecx,%edx - movd %mm0,%ebx - pxor %mm2,%mm0 - shlb $4,%cl psrlq $4,%mm0 - movq %mm1,%mm2 - psrlq $4,%mm1 - pxor 8(%esi,%ecx,1),%mm0 - psllq $60,%mm2 andl $240,%edx - pxor (%eax,%ebp,8),%mm1 - andl $15,%ebx - pxor (%esi,%ecx,1),%mm1 - movd %mm0,%ebp - pxor %mm2,%mm0 - psrlq $4,%mm0 - movq %mm1,%mm2 - psrlq $4,%mm1 - pxor 8(%esi,%edx,1),%mm0 - movb 6(%edi),%cl - psllq $60,%mm2 - pxor (%eax,%ebx,8),%mm1 - andl $15,%ebp - pxor (%esi,%edx,1),%mm1 - movl %ecx,%edx - movd %mm0,%ebx - pxor %mm2,%mm0 - shlb $4,%cl - psrlq $4,%mm0 movq %mm1,%mm2 psrlq $4,%mm1 pxor 8(%esi,%ecx,1),%mm0 psllq $60,%mm2 - andl $240,%edx - pxor (%eax,%ebp,8),%mm1 - andl $15,%ebx - pxor (%esi,%ecx,1),%mm1 - movd %mm0,%ebp - pxor %mm2,%mm0 - psrlq $4,%mm0 - movq %mm1,%mm2 - psrlq $4,%mm1 - pxor 8(%esi,%edx,1),%mm0 - movb 5(%edi),%cl - psllq $60,%mm2 pxor (%eax,%ebx,8),%mm1 - andl $15,%ebp - pxor (%esi,%edx,1),%mm1 - movl %ecx,%edx movd %mm0,%ebx - pxor %mm2,%mm0 - shlb $4,%cl - psrlq $4,%mm0 - movq %mm1,%mm2 - psrlq $4,%mm1 - pxor 8(%esi,%ecx,1),%mm0 - psllq $60,%mm2 - andl $240,%edx - pxor (%eax,%ebp,8),%mm1 - andl $15,%ebx pxor (%esi,%ecx,1),%mm1 - movd %mm0,%ebp pxor %mm2,%mm0 psrlq $4,%mm0 - movq %mm1,%mm2 - psrlq $4,%mm1 - pxor 8(%esi,%edx,1),%mm0 - movb 4(%edi),%cl - psllq $60,%mm2 - pxor (%eax,%ebx,8),%mm1 - andl $15,%ebp - pxor (%esi,%edx,1),%mm1 - movl %ecx,%edx - movd %mm0,%ebx - pxor %mm2,%mm0 - shlb $4,%cl - psrlq $4,%mm0 - movq %mm1,%mm2 - psrlq $4,%mm1 - pxor 8(%esi,%ecx,1),%mm0 - psllq $60,%mm2 - andl $240,%edx - pxor (%eax,%ebp,8),%mm1 - andl $15,%ebx - pxor (%esi,%ecx,1),%mm1 - movd %mm0,%ebp - pxor %mm2,%mm0 - psrlq $4,%mm0 - movq %mm1,%mm2 - psrlq $4,%mm1 - pxor 8(%esi,%edx,1),%mm0 - movb 3(%edi),%cl - psllq $60,%mm2 - pxor (%eax,%ebx,8),%mm1 - andl $15,%ebp - pxor (%esi,%edx,1),%mm1 - movl %ecx,%edx - movd %mm0,%ebx - pxor %mm2,%mm0 - shlb $4,%cl - psrlq $4,%mm0 - movq %mm1,%mm2 - psrlq $4,%mm1 - pxor 8(%esi,%ecx,1),%mm0 - psllq $60,%mm2 - andl $240,%edx - pxor (%eax,%ebp,8),%mm1 - andl $15,%ebx - pxor (%esi,%ecx,1),%mm1 - movd %mm0,%ebp - pxor %mm2,%mm0 - psrlq $4,%mm0 - movq %mm1,%mm2 - psrlq $4,%mm1 - pxor 8(%esi,%edx,1),%mm0 - movb 2(%edi),%cl - psllq $60,%mm2 - pxor (%eax,%ebx,8),%mm1 - andl $15,%ebp - pxor (%esi,%edx,1),%mm1 - movl %ecx,%edx - movd %mm0,%ebx - pxor %mm2,%mm0 - shlb $4,%cl - psrlq $4,%mm0 - movq %mm1,%mm2 - psrlq $4,%mm1 - pxor 8(%esi,%ecx,1),%mm0 - psllq $60,%mm2 - andl $240,%edx - pxor (%eax,%ebp,8),%mm1 andl $15,%ebx - pxor (%esi,%ecx,1),%mm1 - movd %mm0,%ebp - pxor %mm2,%mm0 - psrlq $4,%mm0 movq %mm1,%mm2 psrlq $4,%mm1 pxor 8(%esi,%edx,1),%mm0 - movb 1(%edi),%cl psllq $60,%mm2 pxor (%eax,%ebx,8),%mm1 - andl $15,%ebp - pxor (%esi,%edx,1),%mm1 - movl %ecx,%edx movd %mm0,%ebx - pxor %mm2,%mm0 - shlb $4,%cl - psrlq $4,%mm0 - movq %mm1,%mm2 - psrlq $4,%mm1 - pxor 8(%esi,%ecx,1),%mm0 - psllq $60,%mm2 - andl $240,%edx - pxor (%eax,%ebp,8),%mm1 - andl $15,%ebx - pxor (%esi,%ecx,1),%mm1 - movd %mm0,%ebp - pxor %mm2,%mm0 - psrlq $4,%mm0 - movq %mm1,%mm2 - psrlq $4,%mm1 - pxor 8(%esi,%edx,1),%mm0 - movb (%edi),%cl - psllq $60,%mm2 - pxor (%eax,%ebx,8),%mm1 - andl $15,%ebp pxor (%esi,%edx,1),%mm1 - movl %ecx,%edx - movd %mm0,%ebx pxor %mm2,%mm0 - shlb $4,%cl - psrlq $4,%mm0 - movq %mm1,%mm2 - psrlq $4,%mm1 - pxor 8(%esi,%ecx,1),%mm0 - psllq $60,%mm2 - andl $240,%edx - pxor (%eax,%ebp,8),%mm1 - andl $15,%ebx - pxor (%esi,%ecx,1),%mm1 - movd %mm0,%ebp - pxor %mm2,%mm0 - psrlq $4,%mm0 - movq %mm1,%mm2 - psrlq $4,%mm1 - pxor 8(%esi,%edx,1),%mm0 - psllq $60,%mm2 - pxor (%eax,%ebx,8),%mm1 - andl $15,%ebp - pxor (%esi,%edx,1),%mm1 - movd %mm0,%ebx - pxor %mm2,%mm0 - movl 4(%eax,%ebp,8),%edi psrlq $32,%mm0 movd %mm1,%edx psrlq $32,%mm1 movd %mm0,%ecx movd %mm1,%ebp - shll $4,%edi bswap %ebx bswap %edx bswap %ecx - xorl %edi,%ebp bswap %ebp - ret -.globl _gcm_gmult_4bit_mmx -.align 4 -_gcm_gmult_4bit_mmx: -L_gcm_gmult_4bit_mmx_begin: - pushl %ebp - pushl %ebx - pushl %esi - pushl %edi - movl 20(%esp),%edi - movl 24(%esp),%esi - call L005pic_point -L005pic_point: - popl %eax - leal Lrem_4bit-L005pic_point(%eax),%eax - movzbl 15(%edi),%ebx - call __mmx_gmult_4bit_inner - movl 20(%esp),%edi emms movl %ebx,12(%edi) movl %edx,4(%edi) @@ -626,60 +304,909 @@ L_gcm_ghash_4bit_mmx_begin: pushl %ebx pushl %esi pushl %edi - movl 20(%esp),%ebp - movl 24(%esp),%esi - movl 28(%esp),%edi - movl 32(%esp),%ecx - call L006pic_point -L006pic_point: - popl %eax - leal Lrem_4bit-L006pic_point(%eax),%eax - addl %edi,%ecx - movl %ecx,32(%esp) - subl $20,%esp - movl 12(%ebp),%ebx - movl 4(%ebp),%edx - movl 8(%ebp),%ecx - movl (%ebp),%ebp - jmp L007mmx_outer_loop + movl 20(%esp),%eax + movl 24(%esp),%ebx + movl 28(%esp),%ecx + movl 32(%esp),%edx + movl %esp,%ebp + call L008pic_point +L008pic_point: + popl %esi + leal Lrem_8bit-L008pic_point(%esi),%esi + subl $544,%esp + andl $-64,%esp + subl $16,%esp + addl %ecx,%edx + movl %eax,544(%esp) + movl %edx,552(%esp) + movl %ebp,556(%esp) + addl $128,%ebx + leal 144(%esp),%edi + leal 400(%esp),%ebp + movl -120(%ebx),%edx + movq -120(%ebx),%mm0 + movq -128(%ebx),%mm3 + shll $4,%edx + movb %dl,(%esp) + movl -104(%ebx),%edx + movq -104(%ebx),%mm2 + movq -112(%ebx),%mm5 + movq %mm0,-128(%edi) + psrlq $4,%mm0 + movq %mm3,(%edi) + movq %mm3,%mm7 + psrlq $4,%mm3 + shll $4,%edx + movb %dl,1(%esp) + movl -88(%ebx),%edx + movq -88(%ebx),%mm1 + psllq $60,%mm7 + movq -96(%ebx),%mm4 + por %mm7,%mm0 + movq %mm2,-120(%edi) + psrlq $4,%mm2 + movq %mm5,8(%edi) + movq %mm5,%mm6 + movq %mm0,-128(%ebp) + psrlq $4,%mm5 + movq %mm3,(%ebp) + shll $4,%edx + movb %dl,2(%esp) + movl -72(%ebx),%edx + movq -72(%ebx),%mm0 + psllq $60,%mm6 + movq -80(%ebx),%mm3 + por %mm6,%mm2 + movq %mm1,-112(%edi) + psrlq $4,%mm1 + movq %mm4,16(%edi) + movq %mm4,%mm7 + movq %mm2,-120(%ebp) + psrlq $4,%mm4 + movq %mm5,8(%ebp) + shll $4,%edx + movb %dl,3(%esp) + movl -56(%ebx),%edx + movq -56(%ebx),%mm2 + psllq $60,%mm7 + movq -64(%ebx),%mm5 + por %mm7,%mm1 + movq %mm0,-104(%edi) + psrlq $4,%mm0 + movq %mm3,24(%edi) + movq %mm3,%mm6 + movq %mm1,-112(%ebp) + psrlq $4,%mm3 + movq %mm4,16(%ebp) + shll $4,%edx + movb %dl,4(%esp) + movl -40(%ebx),%edx + movq -40(%ebx),%mm1 + psllq $60,%mm6 + movq -48(%ebx),%mm4 + por %mm6,%mm0 + movq %mm2,-96(%edi) + psrlq $4,%mm2 + movq %mm5,32(%edi) + movq %mm5,%mm7 + movq %mm0,-104(%ebp) + psrlq $4,%mm5 + movq %mm3,24(%ebp) + shll $4,%edx + movb %dl,5(%esp) + movl -24(%ebx),%edx + movq -24(%ebx),%mm0 + psllq $60,%mm7 + movq -32(%ebx),%mm3 + por %mm7,%mm2 + movq %mm1,-88(%edi) + psrlq $4,%mm1 + movq %mm4,40(%edi) + movq %mm4,%mm6 + movq %mm2,-96(%ebp) + psrlq $4,%mm4 + movq %mm5,32(%ebp) + shll $4,%edx + movb %dl,6(%esp) + movl -8(%ebx),%edx + movq -8(%ebx),%mm2 + psllq $60,%mm6 + movq -16(%ebx),%mm5 + por %mm6,%mm1 + movq %mm0,-80(%edi) + psrlq $4,%mm0 + movq %mm3,48(%edi) + movq %mm3,%mm7 + movq %mm1,-88(%ebp) + psrlq $4,%mm3 + movq %mm4,40(%ebp) + shll $4,%edx + movb %dl,7(%esp) + movl 8(%ebx),%edx + movq 8(%ebx),%mm1 + psllq $60,%mm7 + movq (%ebx),%mm4 + por %mm7,%mm0 + movq %mm2,-72(%edi) + psrlq $4,%mm2 + movq %mm5,56(%edi) + movq %mm5,%mm6 + movq %mm0,-80(%ebp) + psrlq $4,%mm5 + movq %mm3,48(%ebp) + shll $4,%edx + movb %dl,8(%esp) + movl 24(%ebx),%edx + movq 24(%ebx),%mm0 + psllq $60,%mm6 + movq 16(%ebx),%mm3 + por %mm6,%mm2 + movq %mm1,-64(%edi) + psrlq $4,%mm1 + movq %mm4,64(%edi) + movq %mm4,%mm7 + movq %mm2,-72(%ebp) + psrlq $4,%mm4 + movq %mm5,56(%ebp) + shll $4,%edx + movb %dl,9(%esp) + movl 40(%ebx),%edx + movq 40(%ebx),%mm2 + psllq $60,%mm7 + movq 32(%ebx),%mm5 + por %mm7,%mm1 + movq %mm0,-56(%edi) + psrlq $4,%mm0 + movq %mm3,72(%edi) + movq %mm3,%mm6 + movq %mm1,-64(%ebp) + psrlq $4,%mm3 + movq %mm4,64(%ebp) + shll $4,%edx + movb %dl,10(%esp) + movl 56(%ebx),%edx + movq 56(%ebx),%mm1 + psllq $60,%mm6 + movq 48(%ebx),%mm4 + por %mm6,%mm0 + movq %mm2,-48(%edi) + psrlq $4,%mm2 + movq %mm5,80(%edi) + movq %mm5,%mm7 + movq %mm0,-56(%ebp) + psrlq $4,%mm5 + movq %mm3,72(%ebp) + shll $4,%edx + movb %dl,11(%esp) + movl 72(%ebx),%edx + movq 72(%ebx),%mm0 + psllq $60,%mm7 + movq 64(%ebx),%mm3 + por %mm7,%mm2 + movq %mm1,-40(%edi) + psrlq $4,%mm1 + movq %mm4,88(%edi) + movq %mm4,%mm6 + movq %mm2,-48(%ebp) + psrlq $4,%mm4 + movq %mm5,80(%ebp) + shll $4,%edx + movb %dl,12(%esp) + movl 88(%ebx),%edx + movq 88(%ebx),%mm2 + psllq $60,%mm6 + movq 80(%ebx),%mm5 + por %mm6,%mm1 + movq %mm0,-32(%edi) + psrlq $4,%mm0 + movq %mm3,96(%edi) + movq %mm3,%mm7 + movq %mm1,-40(%ebp) + psrlq $4,%mm3 + movq %mm4,88(%ebp) + shll $4,%edx + movb %dl,13(%esp) + movl 104(%ebx),%edx + movq 104(%ebx),%mm1 + psllq $60,%mm7 + movq 96(%ebx),%mm4 + por %mm7,%mm0 + movq %mm2,-24(%edi) + psrlq $4,%mm2 + movq %mm5,104(%edi) + movq %mm5,%mm6 + movq %mm0,-32(%ebp) + psrlq $4,%mm5 + movq %mm3,96(%ebp) + shll $4,%edx + movb %dl,14(%esp) + movl 120(%ebx),%edx + movq 120(%ebx),%mm0 + psllq $60,%mm6 + movq 112(%ebx),%mm3 + por %mm6,%mm2 + movq %mm1,-16(%edi) + psrlq $4,%mm1 + movq %mm4,112(%edi) + movq %mm4,%mm7 + movq %mm2,-24(%ebp) + psrlq $4,%mm4 + movq %mm5,104(%ebp) + shll $4,%edx + movb %dl,15(%esp) + psllq $60,%mm7 + por %mm7,%mm1 + movq %mm0,-8(%edi) + psrlq $4,%mm0 + movq %mm3,120(%edi) + movq %mm3,%mm6 + movq %mm1,-16(%ebp) + psrlq $4,%mm3 + movq %mm4,112(%ebp) + psllq $60,%mm6 + por %mm6,%mm0 + movq %mm0,-8(%ebp) + movq %mm3,120(%ebp) + movq (%eax),%mm6 + movl 8(%eax),%ebx + movl 12(%eax),%edx .align 4,0x90 -L007mmx_outer_loop: - xorl 12(%edi),%ebx - xorl 4(%edi),%edx - xorl 8(%edi),%ecx - xorl (%edi),%ebp - movl %edi,48(%esp) - movl %ebx,12(%esp) - movl %edx,4(%esp) - movl %ecx,8(%esp) - movl %ebp,(%esp) - movl %esp,%edi - shrl $24,%ebx - call __mmx_gmult_4bit_inner - movl 48(%esp),%edi - leal 16(%edi),%edi - cmpl 52(%esp),%edi - jb L007mmx_outer_loop - movl 40(%esp),%edi +L009outer: + xorl 12(%ecx),%edx + xorl 8(%ecx),%ebx + pxor (%ecx),%mm6 + leal 16(%ecx),%ecx + movl %ebx,536(%esp) + movq %mm6,528(%esp) + movl %ecx,548(%esp) + xorl %eax,%eax + roll $8,%edx + movb %dl,%al + movl %eax,%ebp + andb $15,%al + shrl $4,%ebp + pxor %mm0,%mm0 + roll $8,%edx + pxor %mm1,%mm1 + pxor %mm2,%mm2 + movq 16(%esp,%eax,8),%mm7 + movq 144(%esp,%eax,8),%mm6 + movb %dl,%al + movd %mm7,%ebx + psrlq $8,%mm7 + movq %mm6,%mm3 + movl %eax,%edi + psrlq $8,%mm6 + pxor 272(%esp,%ebp,8),%mm7 + andb $15,%al + psllq $56,%mm3 + shrl $4,%edi + pxor 16(%esp,%eax,8),%mm7 + roll $8,%edx + pxor 144(%esp,%eax,8),%mm6 + pxor %mm3,%mm7 + pxor 400(%esp,%ebp,8),%mm6 + xorb (%esp,%ebp,1),%bl + movb %dl,%al + movd %mm7,%ecx + movzbl %bl,%ebx + psrlq $8,%mm7 + movq %mm6,%mm3 + movl %eax,%ebp + psrlq $8,%mm6 + pxor 272(%esp,%edi,8),%mm7 + andb $15,%al + psllq $56,%mm3 + shrl $4,%ebp + pinsrw $2,(%esi,%ebx,2),%mm2 + pxor 16(%esp,%eax,8),%mm7 + roll $8,%edx + pxor 144(%esp,%eax,8),%mm6 + pxor %mm3,%mm7 + pxor 400(%esp,%edi,8),%mm6 + xorb (%esp,%edi,1),%cl + movb %dl,%al + movl 536(%esp),%edx + movd %mm7,%ebx + movzbl %cl,%ecx + psrlq $8,%mm7 + movq %mm6,%mm3 + movl %eax,%edi + psrlq $8,%mm6 + pxor 272(%esp,%ebp,8),%mm7 + andb $15,%al + psllq $56,%mm3 + pxor %mm2,%mm6 + shrl $4,%edi + pinsrw $2,(%esi,%ecx,2),%mm1 + pxor 16(%esp,%eax,8),%mm7 + roll $8,%edx + pxor 144(%esp,%eax,8),%mm6 + pxor %mm3,%mm7 + pxor 400(%esp,%ebp,8),%mm6 + xorb (%esp,%ebp,1),%bl + movb %dl,%al + movd %mm7,%ecx + movzbl %bl,%ebx + psrlq $8,%mm7 + movq %mm6,%mm3 + movl %eax,%ebp + psrlq $8,%mm6 + pxor 272(%esp,%edi,8),%mm7 + andb $15,%al + psllq $56,%mm3 + pxor %mm1,%mm6 + shrl $4,%ebp + pinsrw $2,(%esi,%ebx,2),%mm0 + pxor 16(%esp,%eax,8),%mm7 + roll $8,%edx + pxor 144(%esp,%eax,8),%mm6 + pxor %mm3,%mm7 + pxor 400(%esp,%edi,8),%mm6 + xorb (%esp,%edi,1),%cl + movb %dl,%al + movd %mm7,%ebx + movzbl %cl,%ecx + psrlq $8,%mm7 + movq %mm6,%mm3 + movl %eax,%edi + psrlq $8,%mm6 + pxor 272(%esp,%ebp,8),%mm7 + andb $15,%al + psllq $56,%mm3 + pxor %mm0,%mm6 + shrl $4,%edi + pinsrw $2,(%esi,%ecx,2),%mm2 + pxor 16(%esp,%eax,8),%mm7 + roll $8,%edx + pxor 144(%esp,%eax,8),%mm6 + pxor %mm3,%mm7 + pxor 400(%esp,%ebp,8),%mm6 + xorb (%esp,%ebp,1),%bl + movb %dl,%al + movd %mm7,%ecx + movzbl %bl,%ebx + psrlq $8,%mm7 + movq %mm6,%mm3 + movl %eax,%ebp + psrlq $8,%mm6 + pxor 272(%esp,%edi,8),%mm7 + andb $15,%al + psllq $56,%mm3 + pxor %mm2,%mm6 + shrl $4,%ebp + pinsrw $2,(%esi,%ebx,2),%mm1 + pxor 16(%esp,%eax,8),%mm7 + roll $8,%edx + pxor 144(%esp,%eax,8),%mm6 + pxor %mm3,%mm7 + pxor 400(%esp,%edi,8),%mm6 + xorb (%esp,%edi,1),%cl + movb %dl,%al + movl 532(%esp),%edx + movd %mm7,%ebx + movzbl %cl,%ecx + psrlq $8,%mm7 + movq %mm6,%mm3 + movl %eax,%edi + psrlq $8,%mm6 + pxor 272(%esp,%ebp,8),%mm7 + andb $15,%al + psllq $56,%mm3 + pxor %mm1,%mm6 + shrl $4,%edi + pinsrw $2,(%esi,%ecx,2),%mm0 + pxor 16(%esp,%eax,8),%mm7 + roll $8,%edx + pxor 144(%esp,%eax,8),%mm6 + pxor %mm3,%mm7 + pxor 400(%esp,%ebp,8),%mm6 + xorb (%esp,%ebp,1),%bl + movb %dl,%al + movd %mm7,%ecx + movzbl %bl,%ebx + psrlq $8,%mm7 + movq %mm6,%mm3 + movl %eax,%ebp + psrlq $8,%mm6 + pxor 272(%esp,%edi,8),%mm7 + andb $15,%al + psllq $56,%mm3 + pxor %mm0,%mm6 + shrl $4,%ebp + pinsrw $2,(%esi,%ebx,2),%mm2 + pxor 16(%esp,%eax,8),%mm7 + roll $8,%edx + pxor 144(%esp,%eax,8),%mm6 + pxor %mm3,%mm7 + pxor 400(%esp,%edi,8),%mm6 + xorb (%esp,%edi,1),%cl + movb %dl,%al + movd %mm7,%ebx + movzbl %cl,%ecx + psrlq $8,%mm7 + movq %mm6,%mm3 + movl %eax,%edi + psrlq $8,%mm6 + pxor 272(%esp,%ebp,8),%mm7 + andb $15,%al + psllq $56,%mm3 + pxor %mm2,%mm6 + shrl $4,%edi + pinsrw $2,(%esi,%ecx,2),%mm1 + pxor 16(%esp,%eax,8),%mm7 + roll $8,%edx + pxor 144(%esp,%eax,8),%mm6 + pxor %mm3,%mm7 + pxor 400(%esp,%ebp,8),%mm6 + xorb (%esp,%ebp,1),%bl + movb %dl,%al + movd %mm7,%ecx + movzbl %bl,%ebx + psrlq $8,%mm7 + movq %mm6,%mm3 + movl %eax,%ebp + psrlq $8,%mm6 + pxor 272(%esp,%edi,8),%mm7 + andb $15,%al + psllq $56,%mm3 + pxor %mm1,%mm6 + shrl $4,%ebp + pinsrw $2,(%esi,%ebx,2),%mm0 + pxor 16(%esp,%eax,8),%mm7 + roll $8,%edx + pxor 144(%esp,%eax,8),%mm6 + pxor %mm3,%mm7 + pxor 400(%esp,%edi,8),%mm6 + xorb (%esp,%edi,1),%cl + movb %dl,%al + movl 528(%esp),%edx + movd %mm7,%ebx + movzbl %cl,%ecx + psrlq $8,%mm7 + movq %mm6,%mm3 + movl %eax,%edi + psrlq $8,%mm6 + pxor 272(%esp,%ebp,8),%mm7 + andb $15,%al + psllq $56,%mm3 + pxor %mm0,%mm6 + shrl $4,%edi + pinsrw $2,(%esi,%ecx,2),%mm2 + pxor 16(%esp,%eax,8),%mm7 + roll $8,%edx + pxor 144(%esp,%eax,8),%mm6 + pxor %mm3,%mm7 + pxor 400(%esp,%ebp,8),%mm6 + xorb (%esp,%ebp,1),%bl + movb %dl,%al + movd %mm7,%ecx + movzbl %bl,%ebx + psrlq $8,%mm7 + movq %mm6,%mm3 + movl %eax,%ebp + psrlq $8,%mm6 + pxor 272(%esp,%edi,8),%mm7 + andb $15,%al + psllq $56,%mm3 + pxor %mm2,%mm6 + shrl $4,%ebp + pinsrw $2,(%esi,%ebx,2),%mm1 + pxor 16(%esp,%eax,8),%mm7 + roll $8,%edx + pxor 144(%esp,%eax,8),%mm6 + pxor %mm3,%mm7 + pxor 400(%esp,%edi,8),%mm6 + xorb (%esp,%edi,1),%cl + movb %dl,%al + movd %mm7,%ebx + movzbl %cl,%ecx + psrlq $8,%mm7 + movq %mm6,%mm3 + movl %eax,%edi + psrlq $8,%mm6 + pxor 272(%esp,%ebp,8),%mm7 + andb $15,%al + psllq $56,%mm3 + pxor %mm1,%mm6 + shrl $4,%edi + pinsrw $2,(%esi,%ecx,2),%mm0 + pxor 16(%esp,%eax,8),%mm7 + roll $8,%edx + pxor 144(%esp,%eax,8),%mm6 + pxor %mm3,%mm7 + pxor 400(%esp,%ebp,8),%mm6 + xorb (%esp,%ebp,1),%bl + movb %dl,%al + movd %mm7,%ecx + movzbl %bl,%ebx + psrlq $8,%mm7 + movq %mm6,%mm3 + movl %eax,%ebp + psrlq $8,%mm6 + pxor 272(%esp,%edi,8),%mm7 + andb $15,%al + psllq $56,%mm3 + pxor %mm0,%mm6 + shrl $4,%ebp + pinsrw $2,(%esi,%ebx,2),%mm2 + pxor 16(%esp,%eax,8),%mm7 + roll $8,%edx + pxor 144(%esp,%eax,8),%mm6 + pxor %mm3,%mm7 + pxor 400(%esp,%edi,8),%mm6 + xorb (%esp,%edi,1),%cl + movb %dl,%al + movl 524(%esp),%edx + movd %mm7,%ebx + movzbl %cl,%ecx + psrlq $8,%mm7 + movq %mm6,%mm3 + movl %eax,%edi + psrlq $8,%mm6 + pxor 272(%esp,%ebp,8),%mm7 + andb $15,%al + psllq $56,%mm3 + pxor %mm2,%mm6 + shrl $4,%edi + pinsrw $2,(%esi,%ecx,2),%mm1 + pxor 16(%esp,%eax,8),%mm7 + pxor 144(%esp,%eax,8),%mm6 + xorb (%esp,%ebp,1),%bl + pxor %mm3,%mm7 + pxor 400(%esp,%ebp,8),%mm6 + movzbl %bl,%ebx + pxor %mm2,%mm2 + psllq $4,%mm1 + movd %mm7,%ecx + psrlq $4,%mm7 + movq %mm6,%mm3 + psrlq $4,%mm6 + shll $4,%ecx + pxor 16(%esp,%edi,8),%mm7 + psllq $60,%mm3 + movzbl %cl,%ecx + pxor %mm3,%mm7 + pxor 144(%esp,%edi,8),%mm6 + pinsrw $2,(%esi,%ebx,2),%mm0 + pxor %mm1,%mm6 + movd %mm7,%edx + pinsrw $3,(%esi,%ecx,2),%mm2 + psllq $12,%mm0 + pxor %mm0,%mm6 + psrlq $32,%mm7 + pxor %mm2,%mm6 + movl 548(%esp),%ecx + movd %mm7,%ebx + movq %mm6,%mm3 + psllw $8,%mm6 + psrlw $8,%mm3 + por %mm3,%mm6 + bswap %edx + pshufw $27,%mm6,%mm6 + bswap %ebx + cmpl 552(%esp),%ecx + jne L009outer + movl 544(%esp),%eax + movl %edx,12(%eax) + movl %ebx,8(%eax) + movq %mm6,(%eax) + movl 556(%esp),%esp emms - movl %ebx,12(%edi) - movl %edx,4(%edi) - movl %ecx,8(%edi) - movl %ebp,(%edi) - addl $20,%esp + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.globl _gcm_init_clmul +.align 4 +_gcm_init_clmul: +L_gcm_init_clmul_begin: + movl 4(%esp),%edx + movl 8(%esp),%eax + call L010pic +L010pic: + popl %ecx + leal Lbswap-L010pic(%ecx),%ecx + movdqu (%eax),%xmm2 + pshufd $78,%xmm2,%xmm2 + pshufd $255,%xmm2,%xmm4 + movdqa %xmm2,%xmm3 + psllq $1,%xmm2 + pxor %xmm5,%xmm5 + psrlq $63,%xmm3 + pcmpgtd %xmm4,%xmm5 + pslldq $8,%xmm3 + por %xmm3,%xmm2 + pand 16(%ecx),%xmm5 + pxor %xmm5,%xmm2 + movdqa %xmm2,%xmm0 + movdqa %xmm0,%xmm1 + pshufd $78,%xmm0,%xmm3 + pshufd $78,%xmm2,%xmm4 + pxor %xmm0,%xmm3 + pxor %xmm2,%xmm4 +.byte 102,15,58,68,194,0 +.byte 102,15,58,68,202,17 +.byte 102,15,58,68,220,0 + xorps %xmm0,%xmm3 + xorps %xmm1,%xmm3 + movdqa %xmm3,%xmm4 + psrldq $8,%xmm3 + pslldq $8,%xmm4 + pxor %xmm3,%xmm1 + pxor %xmm4,%xmm0 + movdqa %xmm0,%xmm4 + movdqa %xmm0,%xmm3 + psllq $5,%xmm0 + pxor %xmm0,%xmm3 + psllq $1,%xmm0 + pxor %xmm3,%xmm0 + psllq $57,%xmm0 + movdqa %xmm0,%xmm3 + pslldq $8,%xmm0 + psrldq $8,%xmm3 + pxor %xmm4,%xmm0 + pxor %xmm3,%xmm1 + movdqa %xmm0,%xmm4 + psrlq $1,%xmm0 + pxor %xmm4,%xmm1 + pxor %xmm0,%xmm4 + psrlq $5,%xmm0 + pxor %xmm4,%xmm0 + psrlq $1,%xmm0 + pxor %xmm1,%xmm0 + pshufd $78,%xmm2,%xmm3 + pshufd $78,%xmm0,%xmm4 + pxor %xmm2,%xmm3 + movdqu %xmm2,(%edx) + pxor %xmm0,%xmm4 + movdqu %xmm0,16(%edx) +.byte 102,15,58,15,227,8 + movdqu %xmm4,32(%edx) + ret +.globl _gcm_gmult_clmul +.align 4 +_gcm_gmult_clmul: +L_gcm_gmult_clmul_begin: + movl 4(%esp),%eax + movl 8(%esp),%edx + call L011pic +L011pic: + popl %ecx + leal Lbswap-L011pic(%ecx),%ecx + movdqu (%eax),%xmm0 + movdqa (%ecx),%xmm5 + movups (%edx),%xmm2 +.byte 102,15,56,0,197 + movups 32(%edx),%xmm4 + movdqa %xmm0,%xmm1 + pshufd $78,%xmm0,%xmm3 + pxor %xmm0,%xmm3 +.byte 102,15,58,68,194,0 +.byte 102,15,58,68,202,17 +.byte 102,15,58,68,220,0 + xorps %xmm0,%xmm3 + xorps %xmm1,%xmm3 + movdqa %xmm3,%xmm4 + psrldq $8,%xmm3 + pslldq $8,%xmm4 + pxor %xmm3,%xmm1 + pxor %xmm4,%xmm0 + movdqa %xmm0,%xmm4 + movdqa %xmm0,%xmm3 + psllq $5,%xmm0 + pxor %xmm0,%xmm3 + psllq $1,%xmm0 + pxor %xmm3,%xmm0 + psllq $57,%xmm0 + movdqa %xmm0,%xmm3 + pslldq $8,%xmm0 + psrldq $8,%xmm3 + pxor %xmm4,%xmm0 + pxor %xmm3,%xmm1 + movdqa %xmm0,%xmm4 + psrlq $1,%xmm0 + pxor %xmm4,%xmm1 + pxor %xmm0,%xmm4 + psrlq $5,%xmm0 + pxor %xmm4,%xmm0 + psrlq $1,%xmm0 + pxor %xmm1,%xmm0 +.byte 102,15,56,0,197 + movdqu %xmm0,(%eax) + ret +.globl _gcm_ghash_clmul +.align 4 +_gcm_ghash_clmul: +L_gcm_ghash_clmul_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 20(%esp),%eax + movl 24(%esp),%edx + movl 28(%esp),%esi + movl 32(%esp),%ebx + call L012pic +L012pic: + popl %ecx + leal Lbswap-L012pic(%ecx),%ecx + movdqu (%eax),%xmm0 + movdqa (%ecx),%xmm5 + movdqu (%edx),%xmm2 +.byte 102,15,56,0,197 + subl $16,%ebx + jz L013odd_tail + movdqu (%esi),%xmm3 + movdqu 16(%esi),%xmm6 +.byte 102,15,56,0,221 +.byte 102,15,56,0,245 + movdqu 32(%edx),%xmm5 + pxor %xmm3,%xmm0 + pshufd $78,%xmm6,%xmm3 + movdqa %xmm6,%xmm7 + pxor %xmm6,%xmm3 + leal 32(%esi),%esi +.byte 102,15,58,68,242,0 +.byte 102,15,58,68,250,17 +.byte 102,15,58,68,221,0 + movups 16(%edx),%xmm2 + nop + subl $32,%ebx + jbe L014even_tail + jmp L015mod_loop +.align 5,0x90 +L015mod_loop: + pshufd $78,%xmm0,%xmm4 + movdqa %xmm0,%xmm1 + pxor %xmm0,%xmm4 + nop +.byte 102,15,58,68,194,0 +.byte 102,15,58,68,202,17 +.byte 102,15,58,68,229,16 + movups (%edx),%xmm2 + xorps %xmm6,%xmm0 + movdqa (%ecx),%xmm5 + xorps %xmm7,%xmm1 + movdqu (%esi),%xmm7 + pxor %xmm0,%xmm3 + movdqu 16(%esi),%xmm6 + pxor %xmm1,%xmm3 +.byte 102,15,56,0,253 + pxor %xmm3,%xmm4 + movdqa %xmm4,%xmm3 + psrldq $8,%xmm4 + pslldq $8,%xmm3 + pxor %xmm4,%xmm1 + pxor %xmm3,%xmm0 +.byte 102,15,56,0,245 + pxor %xmm7,%xmm1 + movdqa %xmm6,%xmm7 + movdqa %xmm0,%xmm4 + movdqa %xmm0,%xmm3 + psllq $5,%xmm0 + pxor %xmm0,%xmm3 + psllq $1,%xmm0 + pxor %xmm3,%xmm0 +.byte 102,15,58,68,242,0 + movups 32(%edx),%xmm5 + psllq $57,%xmm0 + movdqa %xmm0,%xmm3 + pslldq $8,%xmm0 + psrldq $8,%xmm3 + pxor %xmm4,%xmm0 + pxor %xmm3,%xmm1 + pshufd $78,%xmm7,%xmm3 + movdqa %xmm0,%xmm4 + psrlq $1,%xmm0 + pxor %xmm7,%xmm3 + pxor %xmm4,%xmm1 +.byte 102,15,58,68,250,17 + movups 16(%edx),%xmm2 + pxor %xmm0,%xmm4 + psrlq $5,%xmm0 + pxor %xmm4,%xmm0 + psrlq $1,%xmm0 + pxor %xmm1,%xmm0 +.byte 102,15,58,68,221,0 + leal 32(%esi),%esi + subl $32,%ebx + ja L015mod_loop +L014even_tail: + pshufd $78,%xmm0,%xmm4 + movdqa %xmm0,%xmm1 + pxor %xmm0,%xmm4 +.byte 102,15,58,68,194,0 +.byte 102,15,58,68,202,17 +.byte 102,15,58,68,229,16 + movdqa (%ecx),%xmm5 + xorps %xmm6,%xmm0 + xorps %xmm7,%xmm1 + pxor %xmm0,%xmm3 + pxor %xmm1,%xmm3 + pxor %xmm3,%xmm4 + movdqa %xmm4,%xmm3 + psrldq $8,%xmm4 + pslldq $8,%xmm3 + pxor %xmm4,%xmm1 + pxor %xmm3,%xmm0 + movdqa %xmm0,%xmm4 + movdqa %xmm0,%xmm3 + psllq $5,%xmm0 + pxor %xmm0,%xmm3 + psllq $1,%xmm0 + pxor %xmm3,%xmm0 + psllq $57,%xmm0 + movdqa %xmm0,%xmm3 + pslldq $8,%xmm0 + psrldq $8,%xmm3 + pxor %xmm4,%xmm0 + pxor %xmm3,%xmm1 + movdqa %xmm0,%xmm4 + psrlq $1,%xmm0 + pxor %xmm4,%xmm1 + pxor %xmm0,%xmm4 + psrlq $5,%xmm0 + pxor %xmm4,%xmm0 + psrlq $1,%xmm0 + pxor %xmm1,%xmm0 + testl %ebx,%ebx + jnz L016done + movups (%edx),%xmm2 +L013odd_tail: + movdqu (%esi),%xmm3 +.byte 102,15,56,0,221 + pxor %xmm3,%xmm0 + movdqa %xmm0,%xmm1 + pshufd $78,%xmm0,%xmm3 + pshufd $78,%xmm2,%xmm4 + pxor %xmm0,%xmm3 + pxor %xmm2,%xmm4 +.byte 102,15,58,68,194,0 +.byte 102,15,58,68,202,17 +.byte 102,15,58,68,220,0 + xorps %xmm0,%xmm3 + xorps %xmm1,%xmm3 + movdqa %xmm3,%xmm4 + psrldq $8,%xmm3 + pslldq $8,%xmm4 + pxor %xmm3,%xmm1 + pxor %xmm4,%xmm0 + movdqa %xmm0,%xmm4 + movdqa %xmm0,%xmm3 + psllq $5,%xmm0 + pxor %xmm0,%xmm3 + psllq $1,%xmm0 + pxor %xmm3,%xmm0 + psllq $57,%xmm0 + movdqa %xmm0,%xmm3 + pslldq $8,%xmm0 + psrldq $8,%xmm3 + pxor %xmm4,%xmm0 + pxor %xmm3,%xmm1 + movdqa %xmm0,%xmm4 + psrlq $1,%xmm0 + pxor %xmm4,%xmm1 + pxor %xmm0,%xmm4 + psrlq $5,%xmm0 + pxor %xmm4,%xmm0 + psrlq $1,%xmm0 + pxor %xmm1,%xmm0 +L016done: +.byte 102,15,56,0,197 + movdqu %xmm0,(%eax) popl %edi popl %esi popl %ebx popl %ebp ret .align 6,0x90 -Lrem_4bit: -.long 0,0,0,29491200,0,58982400,0,38141952 -.long 0,117964800,0,113901568,0,76283904,0,88997888 -.long 0,235929600,0,265420800,0,227803136,0,206962688 -.long 0,152567808,0,148504576,0,177995776,0,190709760 +Lbswap: +.byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0 +.byte 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,194 .align 6,0x90 -L008rem_8bit: +Lrem_8bit: .value 0,450,900,582,1800,1738,1164,1358 .value 3600,4050,3476,3158,2328,2266,2716,2910 .value 7200,7650,8100,7782,6952,6890,6316,6510 @@ -712,6 +1239,12 @@ L008rem_8bit: .value 42960,42514,42068,42390,41176,41242,41820,41630 .value 46560,46114,46692,47014,45800,45866,45420,45230 .value 48112,47666,47220,47542,48376,48442,49020,48830 +.align 6,0x90 +Lrem_4bit: +.long 0,0,0,471859200,0,943718400,0,610271232 +.long 0,1887436800,0,1822425088,0,1220542464,0,1423966208 +.long 0,3774873600,0,4246732800,0,3644850176,0,3311403008 +.long 0,2441084928,0,2376073216,0,2847932416,0,3051356160 .byte 71,72,65,83,72,32,102,111,114,32,120,56,54,44,32,67 .byte 82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112 .byte 112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62 diff --git a/deps/openssl/asm/x86-macosx-gas/rc4/rc4-586.s b/deps/openssl/asm/x86-macosx-gas/rc4/rc4-586.s index 882a02d74cd77f..ac82e76498b3b7 100644 --- a/deps/openssl/asm/x86-macosx-gas/rc4/rc4-586.s +++ b/deps/openssl/asm/x86-macosx-gas/rc4/rc4-586.s @@ -28,8 +28,8 @@ L_RC4_begin: movl (%edi,%eax,4),%ecx andl $-4,%edx jz L002loop1 - testl $-8,%edx movl %ebp,32(%esp) + testl $-8,%edx jz L003go4loop4 call L004PIC_me_up L004PIC_me_up: @@ -376,4 +376,4 @@ L019opts: L_OPENSSL_ia32cap_P$non_lazy_ptr: .indirect_symbol _OPENSSL_ia32cap_P .long 0 -.comm _OPENSSL_ia32cap_P,8,2 +.comm _OPENSSL_ia32cap_P,16,2 diff --git a/deps/openssl/asm/x86-macosx-gas/rc5/rc5-586.s b/deps/openssl/asm/x86-macosx-gas/rc5/rc5-586.s deleted file mode 100644 index ed7f7dc76253f0..00000000000000 --- a/deps/openssl/asm/x86-macosx-gas/rc5/rc5-586.s +++ /dev/null @@ -1,563 +0,0 @@ -.file "rc5-586.s" -.text -.globl _RC5_32_encrypt -.align 4 -_RC5_32_encrypt: -L_RC5_32_encrypt_begin: - - pushl %ebp - pushl %esi - pushl %edi - movl 16(%esp),%edx - movl 20(%esp),%ebp - # Load the 2 words - - movl (%edx),%edi - movl 4(%edx),%esi - pushl %ebx - movl (%ebp),%ebx - addl 4(%ebp),%edi - addl 8(%ebp),%esi - xorl %esi,%edi - movl 12(%ebp),%eax - movl %esi,%ecx - roll %cl,%edi - addl %eax,%edi - xorl %edi,%esi - movl 16(%ebp),%eax - movl %edi,%ecx - roll %cl,%esi - addl %eax,%esi - xorl %esi,%edi - movl 20(%ebp),%eax - movl %esi,%ecx - roll %cl,%edi - addl %eax,%edi - xorl %edi,%esi - movl 24(%ebp),%eax - movl %edi,%ecx - roll %cl,%esi - addl %eax,%esi - xorl %esi,%edi - movl 28(%ebp),%eax - movl %esi,%ecx - roll %cl,%edi - addl %eax,%edi - xorl %edi,%esi - movl 32(%ebp),%eax - movl %edi,%ecx - roll %cl,%esi - addl %eax,%esi - xorl %esi,%edi - movl 36(%ebp),%eax - movl %esi,%ecx - roll %cl,%edi - addl %eax,%edi - xorl %edi,%esi - movl 40(%ebp),%eax - movl %edi,%ecx - roll %cl,%esi - addl %eax,%esi - xorl %esi,%edi - movl 44(%ebp),%eax - movl %esi,%ecx - roll %cl,%edi - addl %eax,%edi - xorl %edi,%esi - movl 48(%ebp),%eax - movl %edi,%ecx - roll %cl,%esi - addl %eax,%esi - xorl %esi,%edi - movl 52(%ebp),%eax - movl %esi,%ecx - roll %cl,%edi - addl %eax,%edi - xorl %edi,%esi - movl 56(%ebp),%eax - movl %edi,%ecx - roll %cl,%esi - addl %eax,%esi - xorl %esi,%edi - movl 60(%ebp),%eax - movl %esi,%ecx - roll %cl,%edi - addl %eax,%edi - xorl %edi,%esi - movl 64(%ebp),%eax - movl %edi,%ecx - roll %cl,%esi - addl %eax,%esi - xorl %esi,%edi - movl 68(%ebp),%eax - movl %esi,%ecx - roll %cl,%edi - addl %eax,%edi - xorl %edi,%esi - movl 72(%ebp),%eax - movl %edi,%ecx - roll %cl,%esi - addl %eax,%esi - cmpl $8,%ebx - je L000rc5_exit - xorl %esi,%edi - movl 76(%ebp),%eax - movl %esi,%ecx - roll %cl,%edi - addl %eax,%edi - xorl %edi,%esi - movl 80(%ebp),%eax - movl %edi,%ecx - roll %cl,%esi - addl %eax,%esi - xorl %esi,%edi - movl 84(%ebp),%eax - movl %esi,%ecx - roll %cl,%edi - addl %eax,%edi - xorl %edi,%esi - movl 88(%ebp),%eax - movl %edi,%ecx - roll %cl,%esi - addl %eax,%esi - xorl %esi,%edi - movl 92(%ebp),%eax - movl %esi,%ecx - roll %cl,%edi - addl %eax,%edi - xorl %edi,%esi - movl 96(%ebp),%eax - movl %edi,%ecx - roll %cl,%esi - addl %eax,%esi - xorl %esi,%edi - movl 100(%ebp),%eax - movl %esi,%ecx - roll %cl,%edi - addl %eax,%edi - xorl %edi,%esi - movl 104(%ebp),%eax - movl %edi,%ecx - roll %cl,%esi - addl %eax,%esi - cmpl $12,%ebx - je L000rc5_exit - xorl %esi,%edi - movl 108(%ebp),%eax - movl %esi,%ecx - roll %cl,%edi - addl %eax,%edi - xorl %edi,%esi - movl 112(%ebp),%eax - movl %edi,%ecx - roll %cl,%esi - addl %eax,%esi - xorl %esi,%edi - movl 116(%ebp),%eax - movl %esi,%ecx - roll %cl,%edi - addl %eax,%edi - xorl %edi,%esi - movl 120(%ebp),%eax - movl %edi,%ecx - roll %cl,%esi - addl %eax,%esi - xorl %esi,%edi - movl 124(%ebp),%eax - movl %esi,%ecx - roll %cl,%edi - addl %eax,%edi - xorl %edi,%esi - movl 128(%ebp),%eax - movl %edi,%ecx - roll %cl,%esi - addl %eax,%esi - xorl %esi,%edi - movl 132(%ebp),%eax - movl %esi,%ecx - roll %cl,%edi - addl %eax,%edi - xorl %edi,%esi - movl 136(%ebp),%eax - movl %edi,%ecx - roll %cl,%esi - addl %eax,%esi -L000rc5_exit: - movl %edi,(%edx) - movl %esi,4(%edx) - popl %ebx - popl %edi - popl %esi - popl %ebp - ret -.globl _RC5_32_decrypt -.align 4 -_RC5_32_decrypt: -L_RC5_32_decrypt_begin: - - pushl %ebp - pushl %esi - pushl %edi - movl 16(%esp),%edx - movl 20(%esp),%ebp - # Load the 2 words - - movl (%edx),%edi - movl 4(%edx),%esi - pushl %ebx - movl (%ebp),%ebx - cmpl $12,%ebx - je L001rc5_dec_12 - cmpl $8,%ebx - je L002rc5_dec_8 - movl 136(%ebp),%eax - subl %eax,%esi - movl %edi,%ecx - rorl %cl,%esi - xorl %edi,%esi - movl 132(%ebp),%eax - subl %eax,%edi - movl %esi,%ecx - rorl %cl,%edi - xorl %esi,%edi - movl 128(%ebp),%eax - subl %eax,%esi - movl %edi,%ecx - rorl %cl,%esi - xorl %edi,%esi - movl 124(%ebp),%eax - subl %eax,%edi - movl %esi,%ecx - rorl %cl,%edi - xorl %esi,%edi - movl 120(%ebp),%eax - subl %eax,%esi - movl %edi,%ecx - rorl %cl,%esi - xorl %edi,%esi - movl 116(%ebp),%eax - subl %eax,%edi - movl %esi,%ecx - rorl %cl,%edi - xorl %esi,%edi - movl 112(%ebp),%eax - subl %eax,%esi - movl %edi,%ecx - rorl %cl,%esi - xorl %edi,%esi - movl 108(%ebp),%eax - subl %eax,%edi - movl %esi,%ecx - rorl %cl,%edi - xorl %esi,%edi -L001rc5_dec_12: - movl 104(%ebp),%eax - subl %eax,%esi - movl %edi,%ecx - rorl %cl,%esi - xorl %edi,%esi - movl 100(%ebp),%eax - subl %eax,%edi - movl %esi,%ecx - rorl %cl,%edi - xorl %esi,%edi - movl 96(%ebp),%eax - subl %eax,%esi - movl %edi,%ecx - rorl %cl,%esi - xorl %edi,%esi - movl 92(%ebp),%eax - subl %eax,%edi - movl %esi,%ecx - rorl %cl,%edi - xorl %esi,%edi - movl 88(%ebp),%eax - subl %eax,%esi - movl %edi,%ecx - rorl %cl,%esi - xorl %edi,%esi - movl 84(%ebp),%eax - subl %eax,%edi - movl %esi,%ecx - rorl %cl,%edi - xorl %esi,%edi - movl 80(%ebp),%eax - subl %eax,%esi - movl %edi,%ecx - rorl %cl,%esi - xorl %edi,%esi - movl 76(%ebp),%eax - subl %eax,%edi - movl %esi,%ecx - rorl %cl,%edi - xorl %esi,%edi -L002rc5_dec_8: - movl 72(%ebp),%eax - subl %eax,%esi - movl %edi,%ecx - rorl %cl,%esi - xorl %edi,%esi - movl 68(%ebp),%eax - subl %eax,%edi - movl %esi,%ecx - rorl %cl,%edi - xorl %esi,%edi - movl 64(%ebp),%eax - subl %eax,%esi - movl %edi,%ecx - rorl %cl,%esi - xorl %edi,%esi - movl 60(%ebp),%eax - subl %eax,%edi - movl %esi,%ecx - rorl %cl,%edi - xorl %esi,%edi - movl 56(%ebp),%eax - subl %eax,%esi - movl %edi,%ecx - rorl %cl,%esi - xorl %edi,%esi - movl 52(%ebp),%eax - subl %eax,%edi - movl %esi,%ecx - rorl %cl,%edi - xorl %esi,%edi - movl 48(%ebp),%eax - subl %eax,%esi - movl %edi,%ecx - rorl %cl,%esi - xorl %edi,%esi - movl 44(%ebp),%eax - subl %eax,%edi - movl %esi,%ecx - rorl %cl,%edi - xorl %esi,%edi - movl 40(%ebp),%eax - subl %eax,%esi - movl %edi,%ecx - rorl %cl,%esi - xorl %edi,%esi - movl 36(%ebp),%eax - subl %eax,%edi - movl %esi,%ecx - rorl %cl,%edi - xorl %esi,%edi - movl 32(%ebp),%eax - subl %eax,%esi - movl %edi,%ecx - rorl %cl,%esi - xorl %edi,%esi - movl 28(%ebp),%eax - subl %eax,%edi - movl %esi,%ecx - rorl %cl,%edi - xorl %esi,%edi - movl 24(%ebp),%eax - subl %eax,%esi - movl %edi,%ecx - rorl %cl,%esi - xorl %edi,%esi - movl 20(%ebp),%eax - subl %eax,%edi - movl %esi,%ecx - rorl %cl,%edi - xorl %esi,%edi - movl 16(%ebp),%eax - subl %eax,%esi - movl %edi,%ecx - rorl %cl,%esi - xorl %edi,%esi - movl 12(%ebp),%eax - subl %eax,%edi - movl %esi,%ecx - rorl %cl,%edi - xorl %esi,%edi - subl 8(%ebp),%esi - subl 4(%ebp),%edi -L003rc5_exit: - movl %edi,(%edx) - movl %esi,4(%edx) - popl %ebx - popl %edi - popl %esi - popl %ebp - ret -.globl _RC5_32_cbc_encrypt -.align 4 -_RC5_32_cbc_encrypt: -L_RC5_32_cbc_encrypt_begin: - - pushl %ebp - pushl %ebx - pushl %esi - pushl %edi - movl 28(%esp),%ebp - # getting iv ptr from parameter 4 - - movl 36(%esp),%ebx - movl (%ebx),%esi - movl 4(%ebx),%edi - pushl %edi - pushl %esi - pushl %edi - pushl %esi - movl %esp,%ebx - movl 36(%esp),%esi - movl 40(%esp),%edi - # getting encrypt flag from parameter 5 - - movl 56(%esp),%ecx - # get and push parameter 3 - - movl 48(%esp),%eax - pushl %eax - pushl %ebx - cmpl $0,%ecx - jz L004decrypt - andl $4294967288,%ebp - movl 8(%esp),%eax - movl 12(%esp),%ebx - jz L005encrypt_finish -L006encrypt_loop: - movl (%esi),%ecx - movl 4(%esi),%edx - xorl %ecx,%eax - xorl %edx,%ebx - movl %eax,8(%esp) - movl %ebx,12(%esp) - call L_RC5_32_encrypt_begin - movl 8(%esp),%eax - movl 12(%esp),%ebx - movl %eax,(%edi) - movl %ebx,4(%edi) - addl $8,%esi - addl $8,%edi - subl $8,%ebp - jnz L006encrypt_loop -L005encrypt_finish: - movl 52(%esp),%ebp - andl $7,%ebp - jz L007finish - call L008PIC_point -L008PIC_point: - popl %edx - leal L009cbc_enc_jmp_table-L008PIC_point(%edx),%ecx - movl (%ecx,%ebp,4),%ebp - addl %edx,%ebp - xorl %ecx,%ecx - xorl %edx,%edx - jmp *%ebp -L010ej7: - movb 6(%esi),%dh - shll $8,%edx -L011ej6: - movb 5(%esi),%dh -L012ej5: - movb 4(%esi),%dl -L013ej4: - movl (%esi),%ecx - jmp L014ejend -L015ej3: - movb 2(%esi),%ch - shll $8,%ecx -L016ej2: - movb 1(%esi),%ch -L017ej1: - movb (%esi),%cl -L014ejend: - xorl %ecx,%eax - xorl %edx,%ebx - movl %eax,8(%esp) - movl %ebx,12(%esp) - call L_RC5_32_encrypt_begin - movl 8(%esp),%eax - movl 12(%esp),%ebx - movl %eax,(%edi) - movl %ebx,4(%edi) - jmp L007finish -L004decrypt: - andl $4294967288,%ebp - movl 16(%esp),%eax - movl 20(%esp),%ebx - jz L018decrypt_finish -L019decrypt_loop: - movl (%esi),%eax - movl 4(%esi),%ebx - movl %eax,8(%esp) - movl %ebx,12(%esp) - call L_RC5_32_decrypt_begin - movl 8(%esp),%eax - movl 12(%esp),%ebx - movl 16(%esp),%ecx - movl 20(%esp),%edx - xorl %eax,%ecx - xorl %ebx,%edx - movl (%esi),%eax - movl 4(%esi),%ebx - movl %ecx,(%edi) - movl %edx,4(%edi) - movl %eax,16(%esp) - movl %ebx,20(%esp) - addl $8,%esi - addl $8,%edi - subl $8,%ebp - jnz L019decrypt_loop -L018decrypt_finish: - movl 52(%esp),%ebp - andl $7,%ebp - jz L007finish - movl (%esi),%eax - movl 4(%esi),%ebx - movl %eax,8(%esp) - movl %ebx,12(%esp) - call L_RC5_32_decrypt_begin - movl 8(%esp),%eax - movl 12(%esp),%ebx - movl 16(%esp),%ecx - movl 20(%esp),%edx - xorl %eax,%ecx - xorl %ebx,%edx - movl (%esi),%eax - movl 4(%esi),%ebx -L020dj7: - rorl $16,%edx - movb %dl,6(%edi) - shrl $16,%edx -L021dj6: - movb %dh,5(%edi) -L022dj5: - movb %dl,4(%edi) -L023dj4: - movl %ecx,(%edi) - jmp L024djend -L025dj3: - rorl $16,%ecx - movb %cl,2(%edi) - shll $16,%ecx -L026dj2: - movb %ch,1(%esi) -L027dj1: - movb %cl,(%esi) -L024djend: - jmp L007finish -L007finish: - movl 60(%esp),%ecx - addl $24,%esp - movl %eax,(%ecx) - movl %ebx,4(%ecx) - popl %edi - popl %esi - popl %ebx - popl %ebp - ret -.align 6,0x90 -L009cbc_enc_jmp_table: -.long 0 -.long L017ej1-L008PIC_point -.long L016ej2-L008PIC_point -.long L015ej3-L008PIC_point -.long L013ej4-L008PIC_point -.long L012ej5-L008PIC_point -.long L011ej6-L008PIC_point -.long L010ej7-L008PIC_point -.align 6,0x90 diff --git a/deps/openssl/asm/x86-macosx-gas/ripemd/rmd-586.s b/deps/openssl/asm/x86-macosx-gas/ripemd/rmd-586.s index 7d9d0e4d68baa6..7323b2de7305a3 100644 --- a/deps/openssl/asm/x86-macosx-gas/ripemd/rmd-586.s +++ b/deps/openssl/asm/x86-macosx-gas/ripemd/rmd-586.s @@ -52,7 +52,6 @@ L000start: movl 12(%edx),%ebx movl 16(%edx),%ebp # 0 - xorl %ebx,%eax movl (%esp),%edx xorl %esi,%eax @@ -63,7 +62,6 @@ L000start: roll $11,%ecx addl %ebp,%ecx # 1 - xorl %edi,%eax movl 4(%esp),%edx xorl %ecx,%eax @@ -75,7 +73,6 @@ L000start: roll $14,%ebp addl %ebx,%ebp # 2 - movl 8(%esp),%edx xorl %ebp,%eax addl %edx,%ebx @@ -85,7 +82,6 @@ L000start: roll $15,%ebx addl %edi,%ebx # 3 - xorl %ecx,%eax movl 12(%esp),%edx xorl %ebx,%eax @@ -97,7 +93,6 @@ L000start: roll $12,%edi addl %esi,%edi # 4 - movl 16(%esp),%edx xorl %edi,%eax addl %edx,%esi @@ -107,7 +102,6 @@ L000start: roll $5,%esi addl %ecx,%esi # 5 - xorl %ebx,%eax movl 20(%esp),%edx xorl %esi,%eax @@ -119,7 +113,6 @@ L000start: roll $8,%ecx addl %ebp,%ecx # 6 - movl 24(%esp),%edx xorl %ecx,%eax addl %edx,%ebp @@ -129,7 +122,6 @@ L000start: roll $7,%ebp addl %ebx,%ebp # 7 - xorl %esi,%eax movl 28(%esp),%edx xorl %ebp,%eax @@ -141,7 +133,6 @@ L000start: roll $9,%ebx addl %edi,%ebx # 8 - movl 32(%esp),%edx xorl %ebx,%eax addl %edx,%edi @@ -151,7 +142,6 @@ L000start: roll $11,%edi addl %esi,%edi # 9 - xorl %ebp,%eax movl 36(%esp),%edx xorl %edi,%eax @@ -163,7 +153,6 @@ L000start: roll $13,%esi addl %ecx,%esi # 10 - movl 40(%esp),%edx xorl %esi,%eax addl %edx,%ecx @@ -173,7 +162,6 @@ L000start: roll $14,%ecx addl %ebp,%ecx # 11 - xorl %edi,%eax movl 44(%esp),%edx xorl %ecx,%eax @@ -185,7 +173,6 @@ L000start: roll $15,%ebp addl %ebx,%ebp # 12 - movl 48(%esp),%edx xorl %ebp,%eax addl %edx,%ebx @@ -195,7 +182,6 @@ L000start: roll $6,%ebx addl %edi,%ebx # 13 - xorl %ecx,%eax movl 52(%esp),%edx xorl %ebx,%eax @@ -207,7 +193,6 @@ L000start: roll $7,%edi addl %esi,%edi # 14 - movl 56(%esp),%edx xorl %edi,%eax addl %edx,%esi @@ -217,7 +202,6 @@ L000start: roll $9,%esi addl %ecx,%esi # 15 - xorl %ebx,%eax movl 60(%esp),%edx xorl %esi,%eax @@ -229,7 +213,6 @@ L000start: roll $8,%ecx addl %ebp,%ecx # 16 - addl %edx,%ebp movl %esi,%edx subl %ecx,%eax @@ -243,7 +226,6 @@ L000start: roll $7,%ebp addl %ebx,%ebp # 17 - addl %eax,%ebx movl %ecx,%eax subl %ebp,%edx @@ -257,7 +239,6 @@ L000start: roll $6,%ebx addl %edi,%ebx # 18 - addl %edx,%edi movl %ebp,%edx subl %ebx,%eax @@ -271,7 +252,6 @@ L000start: roll $8,%edi addl %esi,%edi # 19 - addl %eax,%esi movl %ebx,%eax subl %edi,%edx @@ -285,7 +265,6 @@ L000start: roll $13,%esi addl %ecx,%esi # 20 - addl %edx,%ecx movl %edi,%edx subl %esi,%eax @@ -299,7 +278,6 @@ L000start: roll $11,%ecx addl %ebp,%ecx # 21 - addl %eax,%ebp movl %esi,%eax subl %ecx,%edx @@ -313,7 +291,6 @@ L000start: roll $9,%ebp addl %ebx,%ebp # 22 - addl %edx,%ebx movl %ecx,%edx subl %ebp,%eax @@ -327,7 +304,6 @@ L000start: roll $7,%ebx addl %edi,%ebx # 23 - addl %eax,%edi movl %ebp,%eax subl %ebx,%edx @@ -341,7 +317,6 @@ L000start: roll $15,%edi addl %esi,%edi # 24 - addl %edx,%esi movl %ebx,%edx subl %edi,%eax @@ -355,7 +330,6 @@ L000start: roll $7,%esi addl %ecx,%esi # 25 - addl %eax,%ecx movl %edi,%eax subl %esi,%edx @@ -369,7 +343,6 @@ L000start: roll $12,%ecx addl %ebp,%ecx # 26 - addl %edx,%ebp movl %esi,%edx subl %ecx,%eax @@ -383,7 +356,6 @@ L000start: roll $15,%ebp addl %ebx,%ebp # 27 - addl %eax,%ebx movl %ecx,%eax subl %ebp,%edx @@ -397,7 +369,6 @@ L000start: roll $9,%ebx addl %edi,%ebx # 28 - addl %edx,%edi movl %ebp,%edx subl %ebx,%eax @@ -411,7 +382,6 @@ L000start: roll $11,%edi addl %esi,%edi # 29 - addl %eax,%esi movl %ebx,%eax subl %edi,%edx @@ -425,7 +395,6 @@ L000start: roll $7,%esi addl %ecx,%esi # 30 - addl %edx,%ecx movl %edi,%edx subl %esi,%eax @@ -439,7 +408,6 @@ L000start: roll $13,%ecx addl %ebp,%ecx # 31 - addl %eax,%ebp movl %esi,%eax subl %ecx,%edx @@ -453,7 +421,6 @@ L000start: roll $12,%ebp addl %ebx,%ebp # 32 - movl 12(%esp),%eax orl %ebp,%edx addl %eax,%ebx @@ -465,7 +432,6 @@ L000start: roll $11,%ebx addl %edi,%ebx # 33 - movl 40(%esp),%edx orl %ebx,%eax addl %edx,%edi @@ -477,7 +443,6 @@ L000start: roll $13,%edi addl %esi,%edi # 34 - movl 56(%esp),%eax orl %edi,%edx addl %eax,%esi @@ -489,7 +454,6 @@ L000start: roll $6,%esi addl %ecx,%esi # 35 - movl 16(%esp),%edx orl %esi,%eax addl %edx,%ecx @@ -501,7 +465,6 @@ L000start: roll $7,%ecx addl %ebp,%ecx # 36 - movl 36(%esp),%eax orl %ecx,%edx addl %eax,%ebp @@ -513,7 +476,6 @@ L000start: roll $14,%ebp addl %ebx,%ebp # 37 - movl 60(%esp),%edx orl %ebp,%eax addl %edx,%ebx @@ -525,7 +487,6 @@ L000start: roll $9,%ebx addl %edi,%ebx # 38 - movl 32(%esp),%eax orl %ebx,%edx addl %eax,%edi @@ -537,7 +498,6 @@ L000start: roll $13,%edi addl %esi,%edi # 39 - movl 4(%esp),%edx orl %edi,%eax addl %edx,%esi @@ -549,7 +509,6 @@ L000start: roll $15,%esi addl %ecx,%esi # 40 - movl 8(%esp),%eax orl %esi,%edx addl %eax,%ecx @@ -561,7 +520,6 @@ L000start: roll $14,%ecx addl %ebp,%ecx # 41 - movl 28(%esp),%edx orl %ecx,%eax addl %edx,%ebp @@ -573,7 +531,6 @@ L000start: roll $8,%ebp addl %ebx,%ebp # 42 - movl (%esp),%eax orl %ebp,%edx addl %eax,%ebx @@ -585,7 +542,6 @@ L000start: roll $13,%ebx addl %edi,%ebx # 43 - movl 24(%esp),%edx orl %ebx,%eax addl %edx,%edi @@ -597,7 +553,6 @@ L000start: roll $6,%edi addl %esi,%edi # 44 - movl 52(%esp),%eax orl %edi,%edx addl %eax,%esi @@ -609,7 +564,6 @@ L000start: roll $5,%esi addl %ecx,%esi # 45 - movl 44(%esp),%edx orl %esi,%eax addl %edx,%ecx @@ -621,7 +575,6 @@ L000start: roll $12,%ecx addl %ebp,%ecx # 46 - movl 20(%esp),%eax orl %ecx,%edx addl %eax,%ebp @@ -633,7 +586,6 @@ L000start: roll $7,%ebp addl %ebx,%ebp # 47 - movl 48(%esp),%edx orl %ebp,%eax addl %edx,%ebx @@ -645,7 +597,6 @@ L000start: roll $5,%ebx addl %edi,%ebx # 48 - subl %ecx,%edx andl %ebx,%eax andl %ebp,%edx @@ -659,7 +610,6 @@ L000start: roll $11,%edi addl %esi,%edi # 49 - subl %ebp,%edx andl %edi,%eax andl %ebx,%edx @@ -673,7 +623,6 @@ L000start: roll $12,%esi addl %ecx,%esi # 50 - subl %ebx,%edx andl %esi,%eax andl %edi,%edx @@ -687,7 +636,6 @@ L000start: roll $14,%ecx addl %ebp,%ecx # 51 - subl %edi,%edx andl %ecx,%eax andl %esi,%edx @@ -701,7 +649,6 @@ L000start: roll $15,%ebp addl %ebx,%ebp # 52 - subl %esi,%edx andl %ebp,%eax andl %ecx,%edx @@ -715,7 +662,6 @@ L000start: roll $14,%ebx addl %edi,%ebx # 53 - subl %ecx,%edx andl %ebx,%eax andl %ebp,%edx @@ -729,7 +675,6 @@ L000start: roll $15,%edi addl %esi,%edi # 54 - subl %ebp,%edx andl %edi,%eax andl %ebx,%edx @@ -743,7 +688,6 @@ L000start: roll $9,%esi addl %ecx,%esi # 55 - subl %ebx,%edx andl %esi,%eax andl %edi,%edx @@ -757,7 +701,6 @@ L000start: roll $8,%ecx addl %ebp,%ecx # 56 - subl %edi,%edx andl %ecx,%eax andl %esi,%edx @@ -771,7 +714,6 @@ L000start: roll $9,%ebp addl %ebx,%ebp # 57 - subl %esi,%edx andl %ebp,%eax andl %ecx,%edx @@ -785,7 +727,6 @@ L000start: roll $14,%ebx addl %edi,%ebx # 58 - subl %ecx,%edx andl %ebx,%eax andl %ebp,%edx @@ -799,7 +740,6 @@ L000start: roll $5,%edi addl %esi,%edi # 59 - subl %ebp,%edx andl %edi,%eax andl %ebx,%edx @@ -813,7 +753,6 @@ L000start: roll $6,%esi addl %ecx,%esi # 60 - subl %ebx,%edx andl %esi,%eax andl %edi,%edx @@ -827,7 +766,6 @@ L000start: roll $8,%ecx addl %ebp,%ecx # 61 - subl %edi,%edx andl %ecx,%eax andl %esi,%edx @@ -841,7 +779,6 @@ L000start: roll $6,%ebp addl %ebx,%ebp # 62 - subl %esi,%edx andl %ebp,%eax andl %ecx,%edx @@ -855,7 +792,6 @@ L000start: roll $5,%ebx addl %edi,%ebx # 63 - subl %ecx,%edx andl %ebx,%eax andl %ebp,%edx @@ -869,7 +805,6 @@ L000start: roll $12,%edi addl %esi,%edi # 64 - movl 16(%esp),%eax orl %ebx,%edx addl %eax,%esi @@ -881,7 +816,6 @@ L000start: roll $9,%esi addl %ecx,%esi # 65 - movl (%esp),%edx orl %edi,%eax addl %edx,%ecx @@ -893,7 +827,6 @@ L000start: roll $15,%ecx addl %ebp,%ecx # 66 - movl 20(%esp),%eax orl %esi,%edx addl %eax,%ebp @@ -905,7 +838,6 @@ L000start: roll $5,%ebp addl %ebx,%ebp # 67 - movl 36(%esp),%edx orl %ecx,%eax addl %edx,%ebx @@ -917,7 +849,6 @@ L000start: roll $11,%ebx addl %edi,%ebx # 68 - movl 28(%esp),%eax orl %ebp,%edx addl %eax,%edi @@ -929,7 +860,6 @@ L000start: roll $6,%edi addl %esi,%edi # 69 - movl 48(%esp),%edx orl %ebx,%eax addl %edx,%esi @@ -941,7 +871,6 @@ L000start: roll $8,%esi addl %ecx,%esi # 70 - movl 8(%esp),%eax orl %edi,%edx addl %eax,%ecx @@ -953,7 +882,6 @@ L000start: roll $13,%ecx addl %ebp,%ecx # 71 - movl 40(%esp),%edx orl %esi,%eax addl %edx,%ebp @@ -965,7 +893,6 @@ L000start: roll $12,%ebp addl %ebx,%ebp # 72 - movl 56(%esp),%eax orl %ecx,%edx addl %eax,%ebx @@ -977,7 +904,6 @@ L000start: roll $5,%ebx addl %edi,%ebx # 73 - movl 4(%esp),%edx orl %ebp,%eax addl %edx,%edi @@ -989,7 +915,6 @@ L000start: roll $12,%edi addl %esi,%edi # 74 - movl 12(%esp),%eax orl %ebx,%edx addl %eax,%esi @@ -1001,7 +926,6 @@ L000start: roll $13,%esi addl %ecx,%esi # 75 - movl 32(%esp),%edx orl %edi,%eax addl %edx,%ecx @@ -1013,7 +937,6 @@ L000start: roll $14,%ecx addl %ebp,%ecx # 76 - movl 44(%esp),%eax orl %esi,%edx addl %eax,%ebp @@ -1025,7 +948,6 @@ L000start: roll $11,%ebp addl %ebx,%ebp # 77 - movl 24(%esp),%edx orl %ecx,%eax addl %edx,%ebx @@ -1037,7 +959,6 @@ L000start: roll $8,%ebx addl %edi,%ebx # 78 - movl 60(%esp),%eax orl %ebp,%edx addl %eax,%edi @@ -1049,7 +970,6 @@ L000start: roll $5,%edi addl %esi,%edi # 79 - movl 52(%esp),%edx orl %ebx,%eax addl %edx,%esi @@ -1070,7 +990,6 @@ L000start: movl 12(%edx),%ebx movl 16(%edx),%ebp # 80 - movl $-1,%edx subl %ebx,%edx movl 20(%esp),%eax @@ -1084,7 +1003,6 @@ L000start: roll $8,%ecx addl %ebp,%ecx # 81 - movl 56(%esp),%edx orl %esi,%eax addl %edx,%ebp @@ -1096,7 +1014,6 @@ L000start: roll $9,%ebp addl %ebx,%ebp # 82 - movl 28(%esp),%eax orl %ecx,%edx addl %eax,%ebx @@ -1108,7 +1025,6 @@ L000start: roll $9,%ebx addl %edi,%ebx # 83 - movl (%esp),%edx orl %ebp,%eax addl %edx,%edi @@ -1120,7 +1036,6 @@ L000start: roll $11,%edi addl %esi,%edi # 84 - movl 36(%esp),%eax orl %ebx,%edx addl %eax,%esi @@ -1132,7 +1047,6 @@ L000start: roll $13,%esi addl %ecx,%esi # 85 - movl 8(%esp),%edx orl %edi,%eax addl %edx,%ecx @@ -1144,7 +1058,6 @@ L000start: roll $15,%ecx addl %ebp,%ecx # 86 - movl 44(%esp),%eax orl %esi,%edx addl %eax,%ebp @@ -1156,7 +1069,6 @@ L000start: roll $15,%ebp addl %ebx,%ebp # 87 - movl 16(%esp),%edx orl %ecx,%eax addl %edx,%ebx @@ -1168,7 +1080,6 @@ L000start: roll $5,%ebx addl %edi,%ebx # 88 - movl 52(%esp),%eax orl %ebp,%edx addl %eax,%edi @@ -1180,7 +1091,6 @@ L000start: roll $7,%edi addl %esi,%edi # 89 - movl 24(%esp),%edx orl %ebx,%eax addl %edx,%esi @@ -1192,7 +1102,6 @@ L000start: roll $7,%esi addl %ecx,%esi # 90 - movl 60(%esp),%eax orl %edi,%edx addl %eax,%ecx @@ -1204,7 +1113,6 @@ L000start: roll $8,%ecx addl %ebp,%ecx # 91 - movl 32(%esp),%edx orl %esi,%eax addl %edx,%ebp @@ -1216,7 +1124,6 @@ L000start: roll $11,%ebp addl %ebx,%ebp # 92 - movl 4(%esp),%eax orl %ecx,%edx addl %eax,%ebx @@ -1228,7 +1135,6 @@ L000start: roll $14,%ebx addl %edi,%ebx # 93 - movl 40(%esp),%edx orl %ebp,%eax addl %edx,%edi @@ -1240,7 +1146,6 @@ L000start: roll $14,%edi addl %esi,%edi # 94 - movl 12(%esp),%eax orl %ebx,%edx addl %eax,%esi @@ -1252,7 +1157,6 @@ L000start: roll $12,%esi addl %ecx,%esi # 95 - movl 48(%esp),%edx orl %edi,%eax addl %edx,%ecx @@ -1264,7 +1168,6 @@ L000start: roll $6,%ecx addl %ebp,%ecx # 96 - subl %edi,%edx andl %ecx,%eax andl %esi,%edx @@ -1278,7 +1181,6 @@ L000start: roll $9,%ebp addl %ebx,%ebp # 97 - subl %esi,%edx andl %ebp,%eax andl %ecx,%edx @@ -1292,7 +1194,6 @@ L000start: roll $13,%ebx addl %edi,%ebx # 98 - subl %ecx,%edx andl %ebx,%eax andl %ebp,%edx @@ -1306,7 +1207,6 @@ L000start: roll $15,%edi addl %esi,%edi # 99 - subl %ebp,%edx andl %edi,%eax andl %ebx,%edx @@ -1320,7 +1220,6 @@ L000start: roll $7,%esi addl %ecx,%esi # 100 - subl %ebx,%edx andl %esi,%eax andl %edi,%edx @@ -1334,7 +1233,6 @@ L000start: roll $12,%ecx addl %ebp,%ecx # 101 - subl %edi,%edx andl %ecx,%eax andl %esi,%edx @@ -1348,7 +1246,6 @@ L000start: roll $8,%ebp addl %ebx,%ebp # 102 - subl %esi,%edx andl %ebp,%eax andl %ecx,%edx @@ -1362,7 +1259,6 @@ L000start: roll $9,%ebx addl %edi,%ebx # 103 - subl %ecx,%edx andl %ebx,%eax andl %ebp,%edx @@ -1376,7 +1272,6 @@ L000start: roll $11,%edi addl %esi,%edi # 104 - subl %ebp,%edx andl %edi,%eax andl %ebx,%edx @@ -1390,7 +1285,6 @@ L000start: roll $7,%esi addl %ecx,%esi # 105 - subl %ebx,%edx andl %esi,%eax andl %edi,%edx @@ -1404,7 +1298,6 @@ L000start: roll $7,%ecx addl %ebp,%ecx # 106 - subl %edi,%edx andl %ecx,%eax andl %esi,%edx @@ -1418,7 +1311,6 @@ L000start: roll $12,%ebp addl %ebx,%ebp # 107 - subl %esi,%edx andl %ebp,%eax andl %ecx,%edx @@ -1432,7 +1324,6 @@ L000start: roll $7,%ebx addl %edi,%ebx # 108 - subl %ecx,%edx andl %ebx,%eax andl %ebp,%edx @@ -1446,7 +1337,6 @@ L000start: roll $6,%edi addl %esi,%edi # 109 - subl %ebp,%edx andl %edi,%eax andl %ebx,%edx @@ -1460,7 +1350,6 @@ L000start: roll $15,%esi addl %ecx,%esi # 110 - subl %ebx,%edx andl %esi,%eax andl %edi,%edx @@ -1474,7 +1363,6 @@ L000start: roll $13,%ecx addl %ebp,%ecx # 111 - subl %edi,%edx andl %ecx,%eax andl %esi,%edx @@ -1488,7 +1376,6 @@ L000start: roll $11,%ebp addl %ebx,%ebp # 112 - movl 60(%esp),%eax orl %ebp,%edx addl %eax,%ebx @@ -1500,7 +1387,6 @@ L000start: roll $9,%ebx addl %edi,%ebx # 113 - movl 20(%esp),%edx orl %ebx,%eax addl %edx,%edi @@ -1512,7 +1398,6 @@ L000start: roll $7,%edi addl %esi,%edi # 114 - movl 4(%esp),%eax orl %edi,%edx addl %eax,%esi @@ -1524,7 +1409,6 @@ L000start: roll $15,%esi addl %ecx,%esi # 115 - movl 12(%esp),%edx orl %esi,%eax addl %edx,%ecx @@ -1536,7 +1420,6 @@ L000start: roll $11,%ecx addl %ebp,%ecx # 116 - movl 28(%esp),%eax orl %ecx,%edx addl %eax,%ebp @@ -1548,7 +1431,6 @@ L000start: roll $8,%ebp addl %ebx,%ebp # 117 - movl 56(%esp),%edx orl %ebp,%eax addl %edx,%ebx @@ -1560,7 +1442,6 @@ L000start: roll $6,%ebx addl %edi,%ebx # 118 - movl 24(%esp),%eax orl %ebx,%edx addl %eax,%edi @@ -1572,7 +1453,6 @@ L000start: roll $6,%edi addl %esi,%edi # 119 - movl 36(%esp),%edx orl %edi,%eax addl %edx,%esi @@ -1584,7 +1464,6 @@ L000start: roll $14,%esi addl %ecx,%esi # 120 - movl 44(%esp),%eax orl %esi,%edx addl %eax,%ecx @@ -1596,7 +1475,6 @@ L000start: roll $12,%ecx addl %ebp,%ecx # 121 - movl 32(%esp),%edx orl %ecx,%eax addl %edx,%ebp @@ -1608,7 +1486,6 @@ L000start: roll $13,%ebp addl %ebx,%ebp # 122 - movl 48(%esp),%eax orl %ebp,%edx addl %eax,%ebx @@ -1620,7 +1497,6 @@ L000start: roll $5,%ebx addl %edi,%ebx # 123 - movl 8(%esp),%edx orl %ebx,%eax addl %edx,%edi @@ -1632,7 +1508,6 @@ L000start: roll $14,%edi addl %esi,%edi # 124 - movl 40(%esp),%eax orl %edi,%edx addl %eax,%esi @@ -1644,7 +1519,6 @@ L000start: roll $13,%esi addl %ecx,%esi # 125 - movl (%esp),%edx orl %esi,%eax addl %edx,%ecx @@ -1656,7 +1530,6 @@ L000start: roll $13,%ecx addl %ebp,%ecx # 126 - movl 16(%esp),%eax orl %ecx,%edx addl %eax,%ebp @@ -1668,7 +1541,6 @@ L000start: roll $7,%ebp addl %ebx,%ebp # 127 - movl 52(%esp),%edx orl %ebp,%eax addl %edx,%ebx @@ -1680,7 +1552,6 @@ L000start: roll $5,%ebx addl %edi,%ebx # 128 - addl %edx,%edi movl %ebp,%edx subl %ebx,%eax @@ -1694,7 +1565,6 @@ L000start: roll $15,%edi addl %esi,%edi # 129 - addl %eax,%esi movl %ebx,%eax subl %edi,%edx @@ -1708,7 +1578,6 @@ L000start: roll $5,%esi addl %ecx,%esi # 130 - addl %edx,%ecx movl %edi,%edx subl %esi,%eax @@ -1722,7 +1591,6 @@ L000start: roll $8,%ecx addl %ebp,%ecx # 131 - addl %eax,%ebp movl %esi,%eax subl %ecx,%edx @@ -1736,7 +1604,6 @@ L000start: roll $11,%ebp addl %ebx,%ebp # 132 - addl %edx,%ebx movl %ecx,%edx subl %ebp,%eax @@ -1750,7 +1617,6 @@ L000start: roll $14,%ebx addl %edi,%ebx # 133 - addl %eax,%edi movl %ebp,%eax subl %ebx,%edx @@ -1764,7 +1630,6 @@ L000start: roll $14,%edi addl %esi,%edi # 134 - addl %edx,%esi movl %ebx,%edx subl %edi,%eax @@ -1778,7 +1643,6 @@ L000start: roll $6,%esi addl %ecx,%esi # 135 - addl %eax,%ecx movl %edi,%eax subl %esi,%edx @@ -1792,7 +1656,6 @@ L000start: roll $14,%ecx addl %ebp,%ecx # 136 - addl %edx,%ebp movl %esi,%edx subl %ecx,%eax @@ -1806,7 +1669,6 @@ L000start: roll $6,%ebp addl %ebx,%ebp # 137 - addl %eax,%ebx movl %ecx,%eax subl %ebp,%edx @@ -1820,7 +1682,6 @@ L000start: roll $9,%ebx addl %edi,%ebx # 138 - addl %edx,%edi movl %ebp,%edx subl %ebx,%eax @@ -1834,7 +1695,6 @@ L000start: roll $12,%edi addl %esi,%edi # 139 - addl %eax,%esi movl %ebx,%eax subl %edi,%edx @@ -1848,7 +1708,6 @@ L000start: roll $9,%esi addl %ecx,%esi # 140 - addl %edx,%ecx movl %edi,%edx subl %esi,%eax @@ -1862,7 +1721,6 @@ L000start: roll $12,%ecx addl %ebp,%ecx # 141 - addl %eax,%ebp movl %esi,%eax subl %ecx,%edx @@ -1876,7 +1734,6 @@ L000start: roll $5,%ebp addl %ebx,%ebp # 142 - addl %edx,%ebx movl %ecx,%edx subl %ebp,%eax @@ -1890,7 +1747,6 @@ L000start: roll $15,%ebx addl %edi,%ebx # 143 - addl %eax,%edi movl %ebp,%eax subl %ebx,%edx @@ -1904,7 +1760,6 @@ L000start: roll $8,%edi addl %esi,%edi # 144 - movl 48(%esp),%edx xorl %edi,%eax addl %edx,%esi @@ -1914,7 +1769,6 @@ L000start: roll $8,%esi addl %ecx,%esi # 145 - xorl %ebx,%eax movl 60(%esp),%edx xorl %esi,%eax @@ -1926,7 +1780,6 @@ L000start: roll $5,%ecx addl %ebp,%ecx # 146 - movl 40(%esp),%edx xorl %ecx,%eax addl %edx,%ebp @@ -1936,7 +1789,6 @@ L000start: roll $12,%ebp addl %ebx,%ebp # 147 - xorl %esi,%eax movl 16(%esp),%edx xorl %ebp,%eax @@ -1948,7 +1800,6 @@ L000start: roll $9,%ebx addl %edi,%ebx # 148 - movl 4(%esp),%edx xorl %ebx,%eax addl %edx,%edi @@ -1958,7 +1809,6 @@ L000start: roll $12,%edi addl %esi,%edi # 149 - xorl %ebp,%eax movl 20(%esp),%edx xorl %edi,%eax @@ -1970,7 +1820,6 @@ L000start: roll $5,%esi addl %ecx,%esi # 150 - movl 32(%esp),%edx xorl %esi,%eax addl %edx,%ecx @@ -1980,7 +1829,6 @@ L000start: roll $14,%ecx addl %ebp,%ecx # 151 - xorl %edi,%eax movl 28(%esp),%edx xorl %ecx,%eax @@ -1992,7 +1840,6 @@ L000start: roll $6,%ebp addl %ebx,%ebp # 152 - movl 24(%esp),%edx xorl %ebp,%eax addl %edx,%ebx @@ -2002,7 +1849,6 @@ L000start: roll $8,%ebx addl %edi,%ebx # 153 - xorl %ecx,%eax movl 8(%esp),%edx xorl %ebx,%eax @@ -2014,7 +1860,6 @@ L000start: roll $13,%edi addl %esi,%edi # 154 - movl 52(%esp),%edx xorl %edi,%eax addl %edx,%esi @@ -2024,7 +1869,6 @@ L000start: roll $6,%esi addl %ecx,%esi # 155 - xorl %ebx,%eax movl 56(%esp),%edx xorl %esi,%eax @@ -2036,7 +1880,6 @@ L000start: roll $5,%ecx addl %ebp,%ecx # 156 - movl (%esp),%edx xorl %ecx,%eax addl %edx,%ebp @@ -2046,7 +1889,6 @@ L000start: roll $15,%ebp addl %ebx,%ebp # 157 - xorl %esi,%eax movl 12(%esp),%edx xorl %ebp,%eax @@ -2058,7 +1900,6 @@ L000start: roll $13,%ebx addl %edi,%ebx # 158 - movl 36(%esp),%edx xorl %ebx,%eax addl %edx,%edi @@ -2068,7 +1909,6 @@ L000start: roll $11,%edi addl %esi,%edi # 159 - xorl %ebp,%eax movl 44(%esp),%edx xorl %edi,%eax diff --git a/deps/openssl/asm/x86-macosx-gas/sha/sha1-586.s b/deps/openssl/asm/x86-macosx-gas/sha/sha1-586.s index 28d95721b8fd47..d75e61693d5de1 100644 --- a/deps/openssl/asm/x86-macosx-gas/sha/sha1-586.s +++ b/deps/openssl/asm/x86-macosx-gas/sha/sha1-586.s @@ -8,6 +8,28 @@ L_sha1_block_data_order_begin: pushl %ebx pushl %esi pushl %edi + call L000pic_point +L000pic_point: + popl %ebp + movl L_OPENSSL_ia32cap_P$non_lazy_ptr-L000pic_point(%ebp),%esi + leal LK_XX_XX-L000pic_point(%ebp),%ebp + movl (%esi),%eax + movl 4(%esi),%edx + testl $512,%edx + jz L001x86 + movl 8(%esi),%ecx + testl $16777216,%eax + jz L001x86 + testl $536870912,%ecx + jnz Lshaext_shortcut + andl $268435456,%edx + andl $1073741824,%eax + orl %edx,%eax + cmpl $1342177280,%eax + je Lavx_shortcut + jmp Lssse3_shortcut +.align 4,0x90 +L001x86: movl 20(%esp),%ebp movl 24(%esp),%esi movl 28(%esp),%eax @@ -16,9 +38,9 @@ L_sha1_block_data_order_begin: addl %esi,%eax movl %eax,104(%esp) movl 16(%ebp),%edi - jmp L000loop + jmp L002loop .align 4,0x90 -L000loop: +L002loop: movl (%esi),%eax movl 4(%esi),%ebx movl 8(%esi),%ecx @@ -73,7 +95,6 @@ L000loop: movl 8(%ebp),%ecx movl 12(%ebp),%edx # 00_15 0 - movl %ecx,%esi movl %eax,%ebp roll $5,%ebp @@ -86,7 +107,6 @@ L000loop: leal 1518500249(%ebp,%edi,1),%ebp addl %esi,%ebp # 00_15 1 - movl %ebx,%edi movl %ebp,%esi roll $5,%ebp @@ -99,7 +119,6 @@ L000loop: leal 1518500249(%ebp,%edx,1),%ebp addl %edi,%ebp # 00_15 2 - movl %eax,%edx movl %ebp,%edi roll $5,%ebp @@ -112,7 +131,6 @@ L000loop: leal 1518500249(%ebp,%ecx,1),%ebp addl %edx,%ebp # 00_15 3 - movl %esi,%ecx movl %ebp,%edx roll $5,%ebp @@ -125,7 +143,6 @@ L000loop: leal 1518500249(%ebp,%ebx,1),%ebp addl %ecx,%ebp # 00_15 4 - movl %edi,%ebx movl %ebp,%ecx roll $5,%ebp @@ -138,7 +155,6 @@ L000loop: leal 1518500249(%ebp,%eax,1),%ebp addl %ebx,%ebp # 00_15 5 - movl %edx,%eax movl %ebp,%ebx roll $5,%ebp @@ -151,7 +167,6 @@ L000loop: leal 1518500249(%ebp,%esi,1),%ebp addl %eax,%ebp # 00_15 6 - movl %ecx,%esi movl %ebp,%eax roll $5,%ebp @@ -164,7 +179,6 @@ L000loop: leal 1518500249(%ebp,%edi,1),%ebp addl %esi,%ebp # 00_15 7 - movl %ebx,%edi movl %ebp,%esi roll $5,%ebp @@ -177,7 +191,6 @@ L000loop: leal 1518500249(%ebp,%edx,1),%ebp addl %edi,%ebp # 00_15 8 - movl %eax,%edx movl %ebp,%edi roll $5,%ebp @@ -190,7 +203,6 @@ L000loop: leal 1518500249(%ebp,%ecx,1),%ebp addl %edx,%ebp # 00_15 9 - movl %esi,%ecx movl %ebp,%edx roll $5,%ebp @@ -203,7 +215,6 @@ L000loop: leal 1518500249(%ebp,%ebx,1),%ebp addl %ecx,%ebp # 00_15 10 - movl %edi,%ebx movl %ebp,%ecx roll $5,%ebp @@ -216,7 +227,6 @@ L000loop: leal 1518500249(%ebp,%eax,1),%ebp addl %ebx,%ebp # 00_15 11 - movl %edx,%eax movl %ebp,%ebx roll $5,%ebp @@ -229,7 +239,6 @@ L000loop: leal 1518500249(%ebp,%esi,1),%ebp addl %eax,%ebp # 00_15 12 - movl %ecx,%esi movl %ebp,%eax roll $5,%ebp @@ -242,7 +251,6 @@ L000loop: leal 1518500249(%ebp,%edi,1),%ebp addl %esi,%ebp # 00_15 13 - movl %ebx,%edi movl %ebp,%esi roll $5,%ebp @@ -255,7 +263,6 @@ L000loop: leal 1518500249(%ebp,%edx,1),%ebp addl %edi,%ebp # 00_15 14 - movl %eax,%edx movl %ebp,%edi roll $5,%ebp @@ -268,7 +275,6 @@ L000loop: leal 1518500249(%ebp,%ecx,1),%ebp addl %edx,%ebp # 00_15 15 - movl %esi,%ecx movl %ebp,%edx roll $5,%ebp @@ -282,7 +288,6 @@ L000loop: movl (%esp),%ebx addl %ebp,%ecx # 16_19 16 - movl %edi,%ebp xorl 8(%esp),%ebx xorl %esi,%ebp @@ -300,7 +305,6 @@ L000loop: movl 4(%esp),%eax addl %ebp,%ebx # 16_19 17 - movl %edx,%ebp xorl 12(%esp),%eax xorl %edi,%ebp @@ -318,7 +322,6 @@ L000loop: movl 8(%esp),%esi addl %ebp,%eax # 16_19 18 - movl %ecx,%ebp xorl 16(%esp),%esi xorl %edx,%ebp @@ -336,7 +339,6 @@ L000loop: movl 12(%esp),%edi addl %ebp,%esi # 16_19 19 - movl %ebx,%ebp xorl 20(%esp),%edi xorl %ecx,%ebp @@ -354,7 +356,6 @@ L000loop: movl 16(%esp),%edx addl %ebp,%edi # 20_39 20 - movl %esi,%ebp xorl 24(%esp),%edx xorl %eax,%ebp @@ -371,7 +372,6 @@ L000loop: movl 20(%esp),%ecx addl %ebp,%edx # 20_39 21 - movl %edi,%ebp xorl 28(%esp),%ecx xorl %esi,%ebp @@ -388,7 +388,6 @@ L000loop: movl 24(%esp),%ebx addl %ebp,%ecx # 20_39 22 - movl %edx,%ebp xorl 32(%esp),%ebx xorl %edi,%ebp @@ -405,7 +404,6 @@ L000loop: movl 28(%esp),%eax addl %ebp,%ebx # 20_39 23 - movl %ecx,%ebp xorl 36(%esp),%eax xorl %edx,%ebp @@ -422,7 +420,6 @@ L000loop: movl 32(%esp),%esi addl %ebp,%eax # 20_39 24 - movl %ebx,%ebp xorl 40(%esp),%esi xorl %ecx,%ebp @@ -439,7 +436,6 @@ L000loop: movl 36(%esp),%edi addl %ebp,%esi # 20_39 25 - movl %eax,%ebp xorl 44(%esp),%edi xorl %ebx,%ebp @@ -456,7 +452,6 @@ L000loop: movl 40(%esp),%edx addl %ebp,%edi # 20_39 26 - movl %esi,%ebp xorl 48(%esp),%edx xorl %eax,%ebp @@ -473,7 +468,6 @@ L000loop: movl 44(%esp),%ecx addl %ebp,%edx # 20_39 27 - movl %edi,%ebp xorl 52(%esp),%ecx xorl %esi,%ebp @@ -490,7 +484,6 @@ L000loop: movl 48(%esp),%ebx addl %ebp,%ecx # 20_39 28 - movl %edx,%ebp xorl 56(%esp),%ebx xorl %edi,%ebp @@ -507,7 +500,6 @@ L000loop: movl 52(%esp),%eax addl %ebp,%ebx # 20_39 29 - movl %ecx,%ebp xorl 60(%esp),%eax xorl %edx,%ebp @@ -524,7 +516,6 @@ L000loop: movl 56(%esp),%esi addl %ebp,%eax # 20_39 30 - movl %ebx,%ebp xorl (%esp),%esi xorl %ecx,%ebp @@ -541,7 +532,6 @@ L000loop: movl 60(%esp),%edi addl %ebp,%esi # 20_39 31 - movl %eax,%ebp xorl 4(%esp),%edi xorl %ebx,%ebp @@ -558,7 +548,6 @@ L000loop: movl (%esp),%edx addl %ebp,%edi # 20_39 32 - movl %esi,%ebp xorl 8(%esp),%edx xorl %eax,%ebp @@ -575,7 +564,6 @@ L000loop: movl 4(%esp),%ecx addl %ebp,%edx # 20_39 33 - movl %edi,%ebp xorl 12(%esp),%ecx xorl %esi,%ebp @@ -592,7 +580,6 @@ L000loop: movl 8(%esp),%ebx addl %ebp,%ecx # 20_39 34 - movl %edx,%ebp xorl 16(%esp),%ebx xorl %edi,%ebp @@ -609,7 +596,6 @@ L000loop: movl 12(%esp),%eax addl %ebp,%ebx # 20_39 35 - movl %ecx,%ebp xorl 20(%esp),%eax xorl %edx,%ebp @@ -626,7 +612,6 @@ L000loop: movl 16(%esp),%esi addl %ebp,%eax # 20_39 36 - movl %ebx,%ebp xorl 24(%esp),%esi xorl %ecx,%ebp @@ -643,7 +628,6 @@ L000loop: movl 20(%esp),%edi addl %ebp,%esi # 20_39 37 - movl %eax,%ebp xorl 28(%esp),%edi xorl %ebx,%ebp @@ -660,7 +644,6 @@ L000loop: movl 24(%esp),%edx addl %ebp,%edi # 20_39 38 - movl %esi,%ebp xorl 32(%esp),%edx xorl %eax,%ebp @@ -677,7 +660,6 @@ L000loop: movl 28(%esp),%ecx addl %ebp,%edx # 20_39 39 - movl %edi,%ebp xorl 36(%esp),%ecx xorl %esi,%ebp @@ -694,7 +676,6 @@ L000loop: movl 32(%esp),%ebx addl %ebp,%ecx # 40_59 40 - movl %edi,%ebp xorl 40(%esp),%ebx xorl %esi,%ebp @@ -714,7 +695,6 @@ L000loop: movl 36(%esp),%eax addl %ebp,%ebx # 40_59 41 - movl %edx,%ebp xorl 44(%esp),%eax xorl %edi,%ebp @@ -734,7 +714,6 @@ L000loop: movl 40(%esp),%esi addl %ebp,%eax # 40_59 42 - movl %ecx,%ebp xorl 48(%esp),%esi xorl %edx,%ebp @@ -754,7 +733,6 @@ L000loop: movl 44(%esp),%edi addl %ebp,%esi # 40_59 43 - movl %ebx,%ebp xorl 52(%esp),%edi xorl %ecx,%ebp @@ -774,7 +752,6 @@ L000loop: movl 48(%esp),%edx addl %ebp,%edi # 40_59 44 - movl %eax,%ebp xorl 56(%esp),%edx xorl %ebx,%ebp @@ -794,7 +771,6 @@ L000loop: movl 52(%esp),%ecx addl %ebp,%edx # 40_59 45 - movl %esi,%ebp xorl 60(%esp),%ecx xorl %eax,%ebp @@ -814,7 +790,6 @@ L000loop: movl 56(%esp),%ebx addl %ebp,%ecx # 40_59 46 - movl %edi,%ebp xorl (%esp),%ebx xorl %esi,%ebp @@ -834,7 +809,6 @@ L000loop: movl 60(%esp),%eax addl %ebp,%ebx # 40_59 47 - movl %edx,%ebp xorl 4(%esp),%eax xorl %edi,%ebp @@ -854,7 +828,6 @@ L000loop: movl (%esp),%esi addl %ebp,%eax # 40_59 48 - movl %ecx,%ebp xorl 8(%esp),%esi xorl %edx,%ebp @@ -874,7 +847,6 @@ L000loop: movl 4(%esp),%edi addl %ebp,%esi # 40_59 49 - movl %ebx,%ebp xorl 12(%esp),%edi xorl %ecx,%ebp @@ -894,7 +866,6 @@ L000loop: movl 8(%esp),%edx addl %ebp,%edi # 40_59 50 - movl %eax,%ebp xorl 16(%esp),%edx xorl %ebx,%ebp @@ -914,7 +885,6 @@ L000loop: movl 12(%esp),%ecx addl %ebp,%edx # 40_59 51 - movl %esi,%ebp xorl 20(%esp),%ecx xorl %eax,%ebp @@ -934,7 +904,6 @@ L000loop: movl 16(%esp),%ebx addl %ebp,%ecx # 40_59 52 - movl %edi,%ebp xorl 24(%esp),%ebx xorl %esi,%ebp @@ -954,7 +923,6 @@ L000loop: movl 20(%esp),%eax addl %ebp,%ebx # 40_59 53 - movl %edx,%ebp xorl 28(%esp),%eax xorl %edi,%ebp @@ -974,7 +942,6 @@ L000loop: movl 24(%esp),%esi addl %ebp,%eax # 40_59 54 - movl %ecx,%ebp xorl 32(%esp),%esi xorl %edx,%ebp @@ -994,7 +961,6 @@ L000loop: movl 28(%esp),%edi addl %ebp,%esi # 40_59 55 - movl %ebx,%ebp xorl 36(%esp),%edi xorl %ecx,%ebp @@ -1014,7 +980,6 @@ L000loop: movl 32(%esp),%edx addl %ebp,%edi # 40_59 56 - movl %eax,%ebp xorl 40(%esp),%edx xorl %ebx,%ebp @@ -1034,7 +999,6 @@ L000loop: movl 36(%esp),%ecx addl %ebp,%edx # 40_59 57 - movl %esi,%ebp xorl 44(%esp),%ecx xorl %eax,%ebp @@ -1054,7 +1018,6 @@ L000loop: movl 40(%esp),%ebx addl %ebp,%ecx # 40_59 58 - movl %edi,%ebp xorl 48(%esp),%ebx xorl %esi,%ebp @@ -1074,7 +1037,6 @@ L000loop: movl 44(%esp),%eax addl %ebp,%ebx # 40_59 59 - movl %edx,%ebp xorl 52(%esp),%eax xorl %edi,%ebp @@ -1094,7 +1056,6 @@ L000loop: movl 48(%esp),%esi addl %ebp,%eax # 20_39 60 - movl %ebx,%ebp xorl 56(%esp),%esi xorl %ecx,%ebp @@ -1111,7 +1072,6 @@ L000loop: movl 52(%esp),%edi addl %ebp,%esi # 20_39 61 - movl %eax,%ebp xorl 60(%esp),%edi xorl %ebx,%ebp @@ -1128,7 +1088,6 @@ L000loop: movl 56(%esp),%edx addl %ebp,%edi # 20_39 62 - movl %esi,%ebp xorl (%esp),%edx xorl %eax,%ebp @@ -1145,7 +1104,6 @@ L000loop: movl 60(%esp),%ecx addl %ebp,%edx # 20_39 63 - movl %edi,%ebp xorl 4(%esp),%ecx xorl %esi,%ebp @@ -1162,7 +1120,6 @@ L000loop: movl (%esp),%ebx addl %ebp,%ecx # 20_39 64 - movl %edx,%ebp xorl 8(%esp),%ebx xorl %edi,%ebp @@ -1179,7 +1136,6 @@ L000loop: movl 4(%esp),%eax addl %ebp,%ebx # 20_39 65 - movl %ecx,%ebp xorl 12(%esp),%eax xorl %edx,%ebp @@ -1196,7 +1152,6 @@ L000loop: movl 8(%esp),%esi addl %ebp,%eax # 20_39 66 - movl %ebx,%ebp xorl 16(%esp),%esi xorl %ecx,%ebp @@ -1213,7 +1168,6 @@ L000loop: movl 12(%esp),%edi addl %ebp,%esi # 20_39 67 - movl %eax,%ebp xorl 20(%esp),%edi xorl %ebx,%ebp @@ -1230,7 +1184,6 @@ L000loop: movl 16(%esp),%edx addl %ebp,%edi # 20_39 68 - movl %esi,%ebp xorl 24(%esp),%edx xorl %eax,%ebp @@ -1247,7 +1200,6 @@ L000loop: movl 20(%esp),%ecx addl %ebp,%edx # 20_39 69 - movl %edi,%ebp xorl 28(%esp),%ecx xorl %esi,%ebp @@ -1264,7 +1216,6 @@ L000loop: movl 24(%esp),%ebx addl %ebp,%ecx # 20_39 70 - movl %edx,%ebp xorl 32(%esp),%ebx xorl %edi,%ebp @@ -1281,7 +1232,6 @@ L000loop: movl 28(%esp),%eax addl %ebp,%ebx # 20_39 71 - movl %ecx,%ebp xorl 36(%esp),%eax xorl %edx,%ebp @@ -1298,7 +1248,6 @@ L000loop: movl 32(%esp),%esi addl %ebp,%eax # 20_39 72 - movl %ebx,%ebp xorl 40(%esp),%esi xorl %ecx,%ebp @@ -1315,7 +1264,6 @@ L000loop: movl 36(%esp),%edi addl %ebp,%esi # 20_39 73 - movl %eax,%ebp xorl 44(%esp),%edi xorl %ebx,%ebp @@ -1332,7 +1280,6 @@ L000loop: movl 40(%esp),%edx addl %ebp,%edi # 20_39 74 - movl %esi,%ebp xorl 48(%esp),%edx xorl %eax,%ebp @@ -1349,7 +1296,6 @@ L000loop: movl 44(%esp),%ecx addl %ebp,%edx # 20_39 75 - movl %edi,%ebp xorl 52(%esp),%ecx xorl %esi,%ebp @@ -1366,7 +1312,6 @@ L000loop: movl 48(%esp),%ebx addl %ebp,%ecx # 20_39 76 - movl %edx,%ebp xorl 56(%esp),%ebx xorl %edi,%ebp @@ -1383,7 +1328,6 @@ L000loop: movl 52(%esp),%eax addl %ebp,%ebx # 20_39 77 - movl %ecx,%ebp xorl 60(%esp),%eax xorl %edx,%ebp @@ -1399,7 +1343,6 @@ L000loop: movl 56(%esp),%esi addl %ebp,%eax # 20_39 78 - movl %ebx,%ebp xorl (%esp),%esi xorl %ecx,%ebp @@ -1415,7 +1358,6 @@ L000loop: movl 60(%esp),%edi addl %ebp,%esi # 20_39 79 - movl %eax,%ebp xorl 4(%esp),%edi xorl %ebx,%ebp @@ -1445,14 +1387,2580 @@ L000loop: movl %ebx,12(%ebp) movl %edx,%esi movl %ecx,16(%ebp) - jb L000loop + jb L002loop addl $76,%esp popl %edi popl %esi popl %ebx popl %ebp ret +.align 4 +__sha1_block_data_order_shaext: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + call L003pic_point +L003pic_point: + popl %ebp + leal LK_XX_XX-L003pic_point(%ebp),%ebp +Lshaext_shortcut: + movl 20(%esp),%edi + movl %esp,%ebx + movl 24(%esp),%esi + movl 28(%esp),%ecx + subl $32,%esp + movdqu (%edi),%xmm0 + movd 16(%edi),%xmm1 + andl $-32,%esp + movdqa 80(%ebp),%xmm3 + movdqu (%esi),%xmm4 + pshufd $27,%xmm0,%xmm0 + movdqu 16(%esi),%xmm5 + pshufd $27,%xmm1,%xmm1 + movdqu 32(%esi),%xmm6 +.byte 102,15,56,0,227 + movdqu 48(%esi),%xmm7 +.byte 102,15,56,0,235 +.byte 102,15,56,0,243 +.byte 102,15,56,0,251 + jmp L004loop_shaext +.align 4,0x90 +L004loop_shaext: + decl %ecx + leal 64(%esi),%eax + movdqa %xmm1,(%esp) + paddd %xmm4,%xmm1 + cmovnel %eax,%esi + movdqa %xmm0,16(%esp) +.byte 15,56,201,229 + movdqa %xmm0,%xmm2 +.byte 15,58,204,193,0 +.byte 15,56,200,213 + pxor %xmm6,%xmm4 +.byte 15,56,201,238 +.byte 15,56,202,231 + movdqa %xmm0,%xmm1 +.byte 15,58,204,194,0 +.byte 15,56,200,206 + pxor %xmm7,%xmm5 +.byte 15,56,202,236 +.byte 15,56,201,247 + movdqa %xmm0,%xmm2 +.byte 15,58,204,193,0 +.byte 15,56,200,215 + pxor %xmm4,%xmm6 +.byte 15,56,201,252 +.byte 15,56,202,245 + movdqa %xmm0,%xmm1 +.byte 15,58,204,194,0 +.byte 15,56,200,204 + pxor %xmm5,%xmm7 +.byte 15,56,202,254 +.byte 15,56,201,229 + movdqa %xmm0,%xmm2 +.byte 15,58,204,193,0 +.byte 15,56,200,213 + pxor %xmm6,%xmm4 +.byte 15,56,201,238 +.byte 15,56,202,231 + movdqa %xmm0,%xmm1 +.byte 15,58,204,194,1 +.byte 15,56,200,206 + pxor %xmm7,%xmm5 +.byte 15,56,202,236 +.byte 15,56,201,247 + movdqa %xmm0,%xmm2 +.byte 15,58,204,193,1 +.byte 15,56,200,215 + pxor %xmm4,%xmm6 +.byte 15,56,201,252 +.byte 15,56,202,245 + movdqa %xmm0,%xmm1 +.byte 15,58,204,194,1 +.byte 15,56,200,204 + pxor %xmm5,%xmm7 +.byte 15,56,202,254 +.byte 15,56,201,229 + movdqa %xmm0,%xmm2 +.byte 15,58,204,193,1 +.byte 15,56,200,213 + pxor %xmm6,%xmm4 +.byte 15,56,201,238 +.byte 15,56,202,231 + movdqa %xmm0,%xmm1 +.byte 15,58,204,194,1 +.byte 15,56,200,206 + pxor %xmm7,%xmm5 +.byte 15,56,202,236 +.byte 15,56,201,247 + movdqa %xmm0,%xmm2 +.byte 15,58,204,193,2 +.byte 15,56,200,215 + pxor %xmm4,%xmm6 +.byte 15,56,201,252 +.byte 15,56,202,245 + movdqa %xmm0,%xmm1 +.byte 15,58,204,194,2 +.byte 15,56,200,204 + pxor %xmm5,%xmm7 +.byte 15,56,202,254 +.byte 15,56,201,229 + movdqa %xmm0,%xmm2 +.byte 15,58,204,193,2 +.byte 15,56,200,213 + pxor %xmm6,%xmm4 +.byte 15,56,201,238 +.byte 15,56,202,231 + movdqa %xmm0,%xmm1 +.byte 15,58,204,194,2 +.byte 15,56,200,206 + pxor %xmm7,%xmm5 +.byte 15,56,202,236 +.byte 15,56,201,247 + movdqa %xmm0,%xmm2 +.byte 15,58,204,193,2 +.byte 15,56,200,215 + pxor %xmm4,%xmm6 +.byte 15,56,201,252 +.byte 15,56,202,245 + movdqa %xmm0,%xmm1 +.byte 15,58,204,194,3 +.byte 15,56,200,204 + pxor %xmm5,%xmm7 +.byte 15,56,202,254 + movdqu (%esi),%xmm4 + movdqa %xmm0,%xmm2 +.byte 15,58,204,193,3 +.byte 15,56,200,213 + movdqu 16(%esi),%xmm5 +.byte 102,15,56,0,227 + movdqa %xmm0,%xmm1 +.byte 15,58,204,194,3 +.byte 15,56,200,206 + movdqu 32(%esi),%xmm6 +.byte 102,15,56,0,235 + movdqa %xmm0,%xmm2 +.byte 15,58,204,193,3 +.byte 15,56,200,215 + movdqu 48(%esi),%xmm7 +.byte 102,15,56,0,243 + movdqa %xmm0,%xmm1 +.byte 15,58,204,194,3 + movdqa (%esp),%xmm2 +.byte 102,15,56,0,251 +.byte 15,56,200,202 + paddd 16(%esp),%xmm0 + jnz L004loop_shaext + pshufd $27,%xmm0,%xmm0 + pshufd $27,%xmm1,%xmm1 + movdqu %xmm0,(%edi) + movd %xmm1,16(%edi) + movl %ebx,%esp + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.align 4 +__sha1_block_data_order_ssse3: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + call L005pic_point +L005pic_point: + popl %ebp + leal LK_XX_XX-L005pic_point(%ebp),%ebp +Lssse3_shortcut: + movdqa (%ebp),%xmm7 + movdqa 16(%ebp),%xmm0 + movdqa 32(%ebp),%xmm1 + movdqa 48(%ebp),%xmm2 + movdqa 64(%ebp),%xmm6 + movl 20(%esp),%edi + movl 24(%esp),%ebp + movl 28(%esp),%edx + movl %esp,%esi + subl $208,%esp + andl $-64,%esp + movdqa %xmm0,112(%esp) + movdqa %xmm1,128(%esp) + movdqa %xmm2,144(%esp) + shll $6,%edx + movdqa %xmm7,160(%esp) + addl %ebp,%edx + movdqa %xmm6,176(%esp) + addl $64,%ebp + movl %edi,192(%esp) + movl %ebp,196(%esp) + movl %edx,200(%esp) + movl %esi,204(%esp) + movl (%edi),%eax + movl 4(%edi),%ebx + movl 8(%edi),%ecx + movl 12(%edi),%edx + movl 16(%edi),%edi + movl %ebx,%esi + movdqu -64(%ebp),%xmm0 + movdqu -48(%ebp),%xmm1 + movdqu -32(%ebp),%xmm2 + movdqu -16(%ebp),%xmm3 +.byte 102,15,56,0,198 +.byte 102,15,56,0,206 +.byte 102,15,56,0,214 + movdqa %xmm7,96(%esp) +.byte 102,15,56,0,222 + paddd %xmm7,%xmm0 + paddd %xmm7,%xmm1 + paddd %xmm7,%xmm2 + movdqa %xmm0,(%esp) + psubd %xmm7,%xmm0 + movdqa %xmm1,16(%esp) + psubd %xmm7,%xmm1 + movdqa %xmm2,32(%esp) + movl %ecx,%ebp + psubd %xmm7,%xmm2 + xorl %edx,%ebp + pshufd $238,%xmm0,%xmm4 + andl %ebp,%esi + jmp L006loop +.align 4,0x90 +L006loop: + rorl $2,%ebx + xorl %edx,%esi + movl %eax,%ebp + punpcklqdq %xmm1,%xmm4 + movdqa %xmm3,%xmm6 + addl (%esp),%edi + xorl %ecx,%ebx + paddd %xmm3,%xmm7 + movdqa %xmm0,64(%esp) + roll $5,%eax + addl %esi,%edi + psrldq $4,%xmm6 + andl %ebx,%ebp + xorl %ecx,%ebx + pxor %xmm0,%xmm4 + addl %eax,%edi + rorl $7,%eax + pxor %xmm2,%xmm6 + xorl %ecx,%ebp + movl %edi,%esi + addl 4(%esp),%edx + pxor %xmm6,%xmm4 + xorl %ebx,%eax + roll $5,%edi + movdqa %xmm7,48(%esp) + addl %ebp,%edx + andl %eax,%esi + movdqa %xmm4,%xmm0 + xorl %ebx,%eax + addl %edi,%edx + rorl $7,%edi + movdqa %xmm4,%xmm6 + xorl %ebx,%esi + pslldq $12,%xmm0 + paddd %xmm4,%xmm4 + movl %edx,%ebp + addl 8(%esp),%ecx + psrld $31,%xmm6 + xorl %eax,%edi + roll $5,%edx + movdqa %xmm0,%xmm7 + addl %esi,%ecx + andl %edi,%ebp + xorl %eax,%edi + psrld $30,%xmm0 + addl %edx,%ecx + rorl $7,%edx + por %xmm6,%xmm4 + xorl %eax,%ebp + movl %ecx,%esi + addl 12(%esp),%ebx + pslld $2,%xmm7 + xorl %edi,%edx + roll $5,%ecx + pxor %xmm0,%xmm4 + movdqa 96(%esp),%xmm0 + addl %ebp,%ebx + andl %edx,%esi + pxor %xmm7,%xmm4 + pshufd $238,%xmm1,%xmm5 + xorl %edi,%edx + addl %ecx,%ebx + rorl $7,%ecx + xorl %edi,%esi + movl %ebx,%ebp + punpcklqdq %xmm2,%xmm5 + movdqa %xmm4,%xmm7 + addl 16(%esp),%eax + xorl %edx,%ecx + paddd %xmm4,%xmm0 + movdqa %xmm1,80(%esp) + roll $5,%ebx + addl %esi,%eax + psrldq $4,%xmm7 + andl %ecx,%ebp + xorl %edx,%ecx + pxor %xmm1,%xmm5 + addl %ebx,%eax + rorl $7,%ebx + pxor %xmm3,%xmm7 + xorl %edx,%ebp + movl %eax,%esi + addl 20(%esp),%edi + pxor %xmm7,%xmm5 + xorl %ecx,%ebx + roll $5,%eax + movdqa %xmm0,(%esp) + addl %ebp,%edi + andl %ebx,%esi + movdqa %xmm5,%xmm1 + xorl %ecx,%ebx + addl %eax,%edi + rorl $7,%eax + movdqa %xmm5,%xmm7 + xorl %ecx,%esi + pslldq $12,%xmm1 + paddd %xmm5,%xmm5 + movl %edi,%ebp + addl 24(%esp),%edx + psrld $31,%xmm7 + xorl %ebx,%eax + roll $5,%edi + movdqa %xmm1,%xmm0 + addl %esi,%edx + andl %eax,%ebp + xorl %ebx,%eax + psrld $30,%xmm1 + addl %edi,%edx + rorl $7,%edi + por %xmm7,%xmm5 + xorl %ebx,%ebp + movl %edx,%esi + addl 28(%esp),%ecx + pslld $2,%xmm0 + xorl %eax,%edi + roll $5,%edx + pxor %xmm1,%xmm5 + movdqa 112(%esp),%xmm1 + addl %ebp,%ecx + andl %edi,%esi + pxor %xmm0,%xmm5 + pshufd $238,%xmm2,%xmm6 + xorl %eax,%edi + addl %edx,%ecx + rorl $7,%edx + xorl %eax,%esi + movl %ecx,%ebp + punpcklqdq %xmm3,%xmm6 + movdqa %xmm5,%xmm0 + addl 32(%esp),%ebx + xorl %edi,%edx + paddd %xmm5,%xmm1 + movdqa %xmm2,96(%esp) + roll $5,%ecx + addl %esi,%ebx + psrldq $4,%xmm0 + andl %edx,%ebp + xorl %edi,%edx + pxor %xmm2,%xmm6 + addl %ecx,%ebx + rorl $7,%ecx + pxor %xmm4,%xmm0 + xorl %edi,%ebp + movl %ebx,%esi + addl 36(%esp),%eax + pxor %xmm0,%xmm6 + xorl %edx,%ecx + roll $5,%ebx + movdqa %xmm1,16(%esp) + addl %ebp,%eax + andl %ecx,%esi + movdqa %xmm6,%xmm2 + xorl %edx,%ecx + addl %ebx,%eax + rorl $7,%ebx + movdqa %xmm6,%xmm0 + xorl %edx,%esi + pslldq $12,%xmm2 + paddd %xmm6,%xmm6 + movl %eax,%ebp + addl 40(%esp),%edi + psrld $31,%xmm0 + xorl %ecx,%ebx + roll $5,%eax + movdqa %xmm2,%xmm1 + addl %esi,%edi + andl %ebx,%ebp + xorl %ecx,%ebx + psrld $30,%xmm2 + addl %eax,%edi + rorl $7,%eax + por %xmm0,%xmm6 + xorl %ecx,%ebp + movdqa 64(%esp),%xmm0 + movl %edi,%esi + addl 44(%esp),%edx + pslld $2,%xmm1 + xorl %ebx,%eax + roll $5,%edi + pxor %xmm2,%xmm6 + movdqa 112(%esp),%xmm2 + addl %ebp,%edx + andl %eax,%esi + pxor %xmm1,%xmm6 + pshufd $238,%xmm3,%xmm7 + xorl %ebx,%eax + addl %edi,%edx + rorl $7,%edi + xorl %ebx,%esi + movl %edx,%ebp + punpcklqdq %xmm4,%xmm7 + movdqa %xmm6,%xmm1 + addl 48(%esp),%ecx + xorl %eax,%edi + paddd %xmm6,%xmm2 + movdqa %xmm3,64(%esp) + roll $5,%edx + addl %esi,%ecx + psrldq $4,%xmm1 + andl %edi,%ebp + xorl %eax,%edi + pxor %xmm3,%xmm7 + addl %edx,%ecx + rorl $7,%edx + pxor %xmm5,%xmm1 + xorl %eax,%ebp + movl %ecx,%esi + addl 52(%esp),%ebx + pxor %xmm1,%xmm7 + xorl %edi,%edx + roll $5,%ecx + movdqa %xmm2,32(%esp) + addl %ebp,%ebx + andl %edx,%esi + movdqa %xmm7,%xmm3 + xorl %edi,%edx + addl %ecx,%ebx + rorl $7,%ecx + movdqa %xmm7,%xmm1 + xorl %edi,%esi + pslldq $12,%xmm3 + paddd %xmm7,%xmm7 + movl %ebx,%ebp + addl 56(%esp),%eax + psrld $31,%xmm1 + xorl %edx,%ecx + roll $5,%ebx + movdqa %xmm3,%xmm2 + addl %esi,%eax + andl %ecx,%ebp + xorl %edx,%ecx + psrld $30,%xmm3 + addl %ebx,%eax + rorl $7,%ebx + por %xmm1,%xmm7 + xorl %edx,%ebp + movdqa 80(%esp),%xmm1 + movl %eax,%esi + addl 60(%esp),%edi + pslld $2,%xmm2 + xorl %ecx,%ebx + roll $5,%eax + pxor %xmm3,%xmm7 + movdqa 112(%esp),%xmm3 + addl %ebp,%edi + andl %ebx,%esi + pxor %xmm2,%xmm7 + pshufd $238,%xmm6,%xmm2 + xorl %ecx,%ebx + addl %eax,%edi + rorl $7,%eax + pxor %xmm4,%xmm0 + punpcklqdq %xmm7,%xmm2 + xorl %ecx,%esi + movl %edi,%ebp + addl (%esp),%edx + pxor %xmm1,%xmm0 + movdqa %xmm4,80(%esp) + xorl %ebx,%eax + roll $5,%edi + movdqa %xmm3,%xmm4 + addl %esi,%edx + paddd %xmm7,%xmm3 + andl %eax,%ebp + pxor %xmm2,%xmm0 + xorl %ebx,%eax + addl %edi,%edx + rorl $7,%edi + xorl %ebx,%ebp + movdqa %xmm0,%xmm2 + movdqa %xmm3,48(%esp) + movl %edx,%esi + addl 4(%esp),%ecx + xorl %eax,%edi + roll $5,%edx + pslld $2,%xmm0 + addl %ebp,%ecx + andl %edi,%esi + psrld $30,%xmm2 + xorl %eax,%edi + addl %edx,%ecx + rorl $7,%edx + xorl %eax,%esi + movl %ecx,%ebp + addl 8(%esp),%ebx + xorl %edi,%edx + roll $5,%ecx + por %xmm2,%xmm0 + addl %esi,%ebx + andl %edx,%ebp + movdqa 96(%esp),%xmm2 + xorl %edi,%edx + addl %ecx,%ebx + addl 12(%esp),%eax + xorl %edi,%ebp + movl %ebx,%esi + pshufd $238,%xmm7,%xmm3 + roll $5,%ebx + addl %ebp,%eax + xorl %edx,%esi + rorl $7,%ecx + addl %ebx,%eax + addl 16(%esp),%edi + pxor %xmm5,%xmm1 + punpcklqdq %xmm0,%xmm3 + xorl %ecx,%esi + movl %eax,%ebp + roll $5,%eax + pxor %xmm2,%xmm1 + movdqa %xmm5,96(%esp) + addl %esi,%edi + xorl %ecx,%ebp + movdqa %xmm4,%xmm5 + rorl $7,%ebx + paddd %xmm0,%xmm4 + addl %eax,%edi + pxor %xmm3,%xmm1 + addl 20(%esp),%edx + xorl %ebx,%ebp + movl %edi,%esi + roll $5,%edi + movdqa %xmm1,%xmm3 + movdqa %xmm4,(%esp) + addl %ebp,%edx + xorl %ebx,%esi + rorl $7,%eax + addl %edi,%edx + pslld $2,%xmm1 + addl 24(%esp),%ecx + xorl %eax,%esi + psrld $30,%xmm3 + movl %edx,%ebp + roll $5,%edx + addl %esi,%ecx + xorl %eax,%ebp + rorl $7,%edi + addl %edx,%ecx + por %xmm3,%xmm1 + addl 28(%esp),%ebx + xorl %edi,%ebp + movdqa 64(%esp),%xmm3 + movl %ecx,%esi + roll $5,%ecx + addl %ebp,%ebx + xorl %edi,%esi + rorl $7,%edx + pshufd $238,%xmm0,%xmm4 + addl %ecx,%ebx + addl 32(%esp),%eax + pxor %xmm6,%xmm2 + punpcklqdq %xmm1,%xmm4 + xorl %edx,%esi + movl %ebx,%ebp + roll $5,%ebx + pxor %xmm3,%xmm2 + movdqa %xmm6,64(%esp) + addl %esi,%eax + xorl %edx,%ebp + movdqa 128(%esp),%xmm6 + rorl $7,%ecx + paddd %xmm1,%xmm5 + addl %ebx,%eax + pxor %xmm4,%xmm2 + addl 36(%esp),%edi + xorl %ecx,%ebp + movl %eax,%esi + roll $5,%eax + movdqa %xmm2,%xmm4 + movdqa %xmm5,16(%esp) + addl %ebp,%edi + xorl %ecx,%esi + rorl $7,%ebx + addl %eax,%edi + pslld $2,%xmm2 + addl 40(%esp),%edx + xorl %ebx,%esi + psrld $30,%xmm4 + movl %edi,%ebp + roll $5,%edi + addl %esi,%edx + xorl %ebx,%ebp + rorl $7,%eax + addl %edi,%edx + por %xmm4,%xmm2 + addl 44(%esp),%ecx + xorl %eax,%ebp + movdqa 80(%esp),%xmm4 + movl %edx,%esi + roll $5,%edx + addl %ebp,%ecx + xorl %eax,%esi + rorl $7,%edi + pshufd $238,%xmm1,%xmm5 + addl %edx,%ecx + addl 48(%esp),%ebx + pxor %xmm7,%xmm3 + punpcklqdq %xmm2,%xmm5 + xorl %edi,%esi + movl %ecx,%ebp + roll $5,%ecx + pxor %xmm4,%xmm3 + movdqa %xmm7,80(%esp) + addl %esi,%ebx + xorl %edi,%ebp + movdqa %xmm6,%xmm7 + rorl $7,%edx + paddd %xmm2,%xmm6 + addl %ecx,%ebx + pxor %xmm5,%xmm3 + addl 52(%esp),%eax + xorl %edx,%ebp + movl %ebx,%esi + roll $5,%ebx + movdqa %xmm3,%xmm5 + movdqa %xmm6,32(%esp) + addl %ebp,%eax + xorl %edx,%esi + rorl $7,%ecx + addl %ebx,%eax + pslld $2,%xmm3 + addl 56(%esp),%edi + xorl %ecx,%esi + psrld $30,%xmm5 + movl %eax,%ebp + roll $5,%eax + addl %esi,%edi + xorl %ecx,%ebp + rorl $7,%ebx + addl %eax,%edi + por %xmm5,%xmm3 + addl 60(%esp),%edx + xorl %ebx,%ebp + movdqa 96(%esp),%xmm5 + movl %edi,%esi + roll $5,%edi + addl %ebp,%edx + xorl %ebx,%esi + rorl $7,%eax + pshufd $238,%xmm2,%xmm6 + addl %edi,%edx + addl (%esp),%ecx + pxor %xmm0,%xmm4 + punpcklqdq %xmm3,%xmm6 + xorl %eax,%esi + movl %edx,%ebp + roll $5,%edx + pxor %xmm5,%xmm4 + movdqa %xmm0,96(%esp) + addl %esi,%ecx + xorl %eax,%ebp + movdqa %xmm7,%xmm0 + rorl $7,%edi + paddd %xmm3,%xmm7 + addl %edx,%ecx + pxor %xmm6,%xmm4 + addl 4(%esp),%ebx + xorl %edi,%ebp + movl %ecx,%esi + roll $5,%ecx + movdqa %xmm4,%xmm6 + movdqa %xmm7,48(%esp) + addl %ebp,%ebx + xorl %edi,%esi + rorl $7,%edx + addl %ecx,%ebx + pslld $2,%xmm4 + addl 8(%esp),%eax + xorl %edx,%esi + psrld $30,%xmm6 + movl %ebx,%ebp + roll $5,%ebx + addl %esi,%eax + xorl %edx,%ebp + rorl $7,%ecx + addl %ebx,%eax + por %xmm6,%xmm4 + addl 12(%esp),%edi + xorl %ecx,%ebp + movdqa 64(%esp),%xmm6 + movl %eax,%esi + roll $5,%eax + addl %ebp,%edi + xorl %ecx,%esi + rorl $7,%ebx + pshufd $238,%xmm3,%xmm7 + addl %eax,%edi + addl 16(%esp),%edx + pxor %xmm1,%xmm5 + punpcklqdq %xmm4,%xmm7 + xorl %ebx,%esi + movl %edi,%ebp + roll $5,%edi + pxor %xmm6,%xmm5 + movdqa %xmm1,64(%esp) + addl %esi,%edx + xorl %ebx,%ebp + movdqa %xmm0,%xmm1 + rorl $7,%eax + paddd %xmm4,%xmm0 + addl %edi,%edx + pxor %xmm7,%xmm5 + addl 20(%esp),%ecx + xorl %eax,%ebp + movl %edx,%esi + roll $5,%edx + movdqa %xmm5,%xmm7 + movdqa %xmm0,(%esp) + addl %ebp,%ecx + xorl %eax,%esi + rorl $7,%edi + addl %edx,%ecx + pslld $2,%xmm5 + addl 24(%esp),%ebx + xorl %edi,%esi + psrld $30,%xmm7 + movl %ecx,%ebp + roll $5,%ecx + addl %esi,%ebx + xorl %edi,%ebp + rorl $7,%edx + addl %ecx,%ebx + por %xmm7,%xmm5 + addl 28(%esp),%eax + movdqa 80(%esp),%xmm7 + rorl $7,%ecx + movl %ebx,%esi + xorl %edx,%ebp + roll $5,%ebx + pshufd $238,%xmm4,%xmm0 + addl %ebp,%eax + xorl %ecx,%esi + xorl %edx,%ecx + addl %ebx,%eax + addl 32(%esp),%edi + pxor %xmm2,%xmm6 + punpcklqdq %xmm5,%xmm0 + andl %ecx,%esi + xorl %edx,%ecx + rorl $7,%ebx + pxor %xmm7,%xmm6 + movdqa %xmm2,80(%esp) + movl %eax,%ebp + xorl %ecx,%esi + roll $5,%eax + movdqa %xmm1,%xmm2 + addl %esi,%edi + paddd %xmm5,%xmm1 + xorl %ebx,%ebp + pxor %xmm0,%xmm6 + xorl %ecx,%ebx + addl %eax,%edi + addl 36(%esp),%edx + andl %ebx,%ebp + movdqa %xmm6,%xmm0 + movdqa %xmm1,16(%esp) + xorl %ecx,%ebx + rorl $7,%eax + movl %edi,%esi + xorl %ebx,%ebp + roll $5,%edi + pslld $2,%xmm6 + addl %ebp,%edx + xorl %eax,%esi + psrld $30,%xmm0 + xorl %ebx,%eax + addl %edi,%edx + addl 40(%esp),%ecx + andl %eax,%esi + xorl %ebx,%eax + rorl $7,%edi + por %xmm0,%xmm6 + movl %edx,%ebp + xorl %eax,%esi + movdqa 96(%esp),%xmm0 + roll $5,%edx + addl %esi,%ecx + xorl %edi,%ebp + xorl %eax,%edi + addl %edx,%ecx + pshufd $238,%xmm5,%xmm1 + addl 44(%esp),%ebx + andl %edi,%ebp + xorl %eax,%edi + rorl $7,%edx + movl %ecx,%esi + xorl %edi,%ebp + roll $5,%ecx + addl %ebp,%ebx + xorl %edx,%esi + xorl %edi,%edx + addl %ecx,%ebx + addl 48(%esp),%eax + pxor %xmm3,%xmm7 + punpcklqdq %xmm6,%xmm1 + andl %edx,%esi + xorl %edi,%edx + rorl $7,%ecx + pxor %xmm0,%xmm7 + movdqa %xmm3,96(%esp) + movl %ebx,%ebp + xorl %edx,%esi + roll $5,%ebx + movdqa 144(%esp),%xmm3 + addl %esi,%eax + paddd %xmm6,%xmm2 + xorl %ecx,%ebp + pxor %xmm1,%xmm7 + xorl %edx,%ecx + addl %ebx,%eax + addl 52(%esp),%edi + andl %ecx,%ebp + movdqa %xmm7,%xmm1 + movdqa %xmm2,32(%esp) + xorl %edx,%ecx + rorl $7,%ebx + movl %eax,%esi + xorl %ecx,%ebp + roll $5,%eax + pslld $2,%xmm7 + addl %ebp,%edi + xorl %ebx,%esi + psrld $30,%xmm1 + xorl %ecx,%ebx + addl %eax,%edi + addl 56(%esp),%edx + andl %ebx,%esi + xorl %ecx,%ebx + rorl $7,%eax + por %xmm1,%xmm7 + movl %edi,%ebp + xorl %ebx,%esi + movdqa 64(%esp),%xmm1 + roll $5,%edi + addl %esi,%edx + xorl %eax,%ebp + xorl %ebx,%eax + addl %edi,%edx + pshufd $238,%xmm6,%xmm2 + addl 60(%esp),%ecx + andl %eax,%ebp + xorl %ebx,%eax + rorl $7,%edi + movl %edx,%esi + xorl %eax,%ebp + roll $5,%edx + addl %ebp,%ecx + xorl %edi,%esi + xorl %eax,%edi + addl %edx,%ecx + addl (%esp),%ebx + pxor %xmm4,%xmm0 + punpcklqdq %xmm7,%xmm2 + andl %edi,%esi + xorl %eax,%edi + rorl $7,%edx + pxor %xmm1,%xmm0 + movdqa %xmm4,64(%esp) + movl %ecx,%ebp + xorl %edi,%esi + roll $5,%ecx + movdqa %xmm3,%xmm4 + addl %esi,%ebx + paddd %xmm7,%xmm3 + xorl %edx,%ebp + pxor %xmm2,%xmm0 + xorl %edi,%edx + addl %ecx,%ebx + addl 4(%esp),%eax + andl %edx,%ebp + movdqa %xmm0,%xmm2 + movdqa %xmm3,48(%esp) + xorl %edi,%edx + rorl $7,%ecx + movl %ebx,%esi + xorl %edx,%ebp + roll $5,%ebx + pslld $2,%xmm0 + addl %ebp,%eax + xorl %ecx,%esi + psrld $30,%xmm2 + xorl %edx,%ecx + addl %ebx,%eax + addl 8(%esp),%edi + andl %ecx,%esi + xorl %edx,%ecx + rorl $7,%ebx + por %xmm2,%xmm0 + movl %eax,%ebp + xorl %ecx,%esi + movdqa 80(%esp),%xmm2 + roll $5,%eax + addl %esi,%edi + xorl %ebx,%ebp + xorl %ecx,%ebx + addl %eax,%edi + pshufd $238,%xmm7,%xmm3 + addl 12(%esp),%edx + andl %ebx,%ebp + xorl %ecx,%ebx + rorl $7,%eax + movl %edi,%esi + xorl %ebx,%ebp + roll $5,%edi + addl %ebp,%edx + xorl %eax,%esi + xorl %ebx,%eax + addl %edi,%edx + addl 16(%esp),%ecx + pxor %xmm5,%xmm1 + punpcklqdq %xmm0,%xmm3 + andl %eax,%esi + xorl %ebx,%eax + rorl $7,%edi + pxor %xmm2,%xmm1 + movdqa %xmm5,80(%esp) + movl %edx,%ebp + xorl %eax,%esi + roll $5,%edx + movdqa %xmm4,%xmm5 + addl %esi,%ecx + paddd %xmm0,%xmm4 + xorl %edi,%ebp + pxor %xmm3,%xmm1 + xorl %eax,%edi + addl %edx,%ecx + addl 20(%esp),%ebx + andl %edi,%ebp + movdqa %xmm1,%xmm3 + movdqa %xmm4,(%esp) + xorl %eax,%edi + rorl $7,%edx + movl %ecx,%esi + xorl %edi,%ebp + roll $5,%ecx + pslld $2,%xmm1 + addl %ebp,%ebx + xorl %edx,%esi + psrld $30,%xmm3 + xorl %edi,%edx + addl %ecx,%ebx + addl 24(%esp),%eax + andl %edx,%esi + xorl %edi,%edx + rorl $7,%ecx + por %xmm3,%xmm1 + movl %ebx,%ebp + xorl %edx,%esi + movdqa 96(%esp),%xmm3 + roll $5,%ebx + addl %esi,%eax + xorl %ecx,%ebp + xorl %edx,%ecx + addl %ebx,%eax + pshufd $238,%xmm0,%xmm4 + addl 28(%esp),%edi + andl %ecx,%ebp + xorl %edx,%ecx + rorl $7,%ebx + movl %eax,%esi + xorl %ecx,%ebp + roll $5,%eax + addl %ebp,%edi + xorl %ebx,%esi + xorl %ecx,%ebx + addl %eax,%edi + addl 32(%esp),%edx + pxor %xmm6,%xmm2 + punpcklqdq %xmm1,%xmm4 + andl %ebx,%esi + xorl %ecx,%ebx + rorl $7,%eax + pxor %xmm3,%xmm2 + movdqa %xmm6,96(%esp) + movl %edi,%ebp + xorl %ebx,%esi + roll $5,%edi + movdqa %xmm5,%xmm6 + addl %esi,%edx + paddd %xmm1,%xmm5 + xorl %eax,%ebp + pxor %xmm4,%xmm2 + xorl %ebx,%eax + addl %edi,%edx + addl 36(%esp),%ecx + andl %eax,%ebp + movdqa %xmm2,%xmm4 + movdqa %xmm5,16(%esp) + xorl %ebx,%eax + rorl $7,%edi + movl %edx,%esi + xorl %eax,%ebp + roll $5,%edx + pslld $2,%xmm2 + addl %ebp,%ecx + xorl %edi,%esi + psrld $30,%xmm4 + xorl %eax,%edi + addl %edx,%ecx + addl 40(%esp),%ebx + andl %edi,%esi + xorl %eax,%edi + rorl $7,%edx + por %xmm4,%xmm2 + movl %ecx,%ebp + xorl %edi,%esi + movdqa 64(%esp),%xmm4 + roll $5,%ecx + addl %esi,%ebx + xorl %edx,%ebp + xorl %edi,%edx + addl %ecx,%ebx + pshufd $238,%xmm1,%xmm5 + addl 44(%esp),%eax + andl %edx,%ebp + xorl %edi,%edx + rorl $7,%ecx + movl %ebx,%esi + xorl %edx,%ebp + roll $5,%ebx + addl %ebp,%eax + xorl %edx,%esi + addl %ebx,%eax + addl 48(%esp),%edi + pxor %xmm7,%xmm3 + punpcklqdq %xmm2,%xmm5 + xorl %ecx,%esi + movl %eax,%ebp + roll $5,%eax + pxor %xmm4,%xmm3 + movdqa %xmm7,64(%esp) + addl %esi,%edi + xorl %ecx,%ebp + movdqa %xmm6,%xmm7 + rorl $7,%ebx + paddd %xmm2,%xmm6 + addl %eax,%edi + pxor %xmm5,%xmm3 + addl 52(%esp),%edx + xorl %ebx,%ebp + movl %edi,%esi + roll $5,%edi + movdqa %xmm3,%xmm5 + movdqa %xmm6,32(%esp) + addl %ebp,%edx + xorl %ebx,%esi + rorl $7,%eax + addl %edi,%edx + pslld $2,%xmm3 + addl 56(%esp),%ecx + xorl %eax,%esi + psrld $30,%xmm5 + movl %edx,%ebp + roll $5,%edx + addl %esi,%ecx + xorl %eax,%ebp + rorl $7,%edi + addl %edx,%ecx + por %xmm5,%xmm3 + addl 60(%esp),%ebx + xorl %edi,%ebp + movl %ecx,%esi + roll $5,%ecx + addl %ebp,%ebx + xorl %edi,%esi + rorl $7,%edx + addl %ecx,%ebx + addl (%esp),%eax + xorl %edx,%esi + movl %ebx,%ebp + roll $5,%ebx + addl %esi,%eax + xorl %edx,%ebp + rorl $7,%ecx + paddd %xmm3,%xmm7 + addl %ebx,%eax + addl 4(%esp),%edi + xorl %ecx,%ebp + movl %eax,%esi + movdqa %xmm7,48(%esp) + roll $5,%eax + addl %ebp,%edi + xorl %ecx,%esi + rorl $7,%ebx + addl %eax,%edi + addl 8(%esp),%edx + xorl %ebx,%esi + movl %edi,%ebp + roll $5,%edi + addl %esi,%edx + xorl %ebx,%ebp + rorl $7,%eax + addl %edi,%edx + addl 12(%esp),%ecx + xorl %eax,%ebp + movl %edx,%esi + roll $5,%edx + addl %ebp,%ecx + xorl %eax,%esi + rorl $7,%edi + addl %edx,%ecx + movl 196(%esp),%ebp + cmpl 200(%esp),%ebp + je L007done + movdqa 160(%esp),%xmm7 + movdqa 176(%esp),%xmm6 + movdqu (%ebp),%xmm0 + movdqu 16(%ebp),%xmm1 + movdqu 32(%ebp),%xmm2 + movdqu 48(%ebp),%xmm3 + addl $64,%ebp +.byte 102,15,56,0,198 + movl %ebp,196(%esp) + movdqa %xmm7,96(%esp) + addl 16(%esp),%ebx + xorl %edi,%esi + movl %ecx,%ebp + roll $5,%ecx + addl %esi,%ebx + xorl %edi,%ebp + rorl $7,%edx +.byte 102,15,56,0,206 + addl %ecx,%ebx + addl 20(%esp),%eax + xorl %edx,%ebp + movl %ebx,%esi + paddd %xmm7,%xmm0 + roll $5,%ebx + addl %ebp,%eax + xorl %edx,%esi + rorl $7,%ecx + movdqa %xmm0,(%esp) + addl %ebx,%eax + addl 24(%esp),%edi + xorl %ecx,%esi + movl %eax,%ebp + psubd %xmm7,%xmm0 + roll $5,%eax + addl %esi,%edi + xorl %ecx,%ebp + rorl $7,%ebx + addl %eax,%edi + addl 28(%esp),%edx + xorl %ebx,%ebp + movl %edi,%esi + roll $5,%edi + addl %ebp,%edx + xorl %ebx,%esi + rorl $7,%eax + addl %edi,%edx + addl 32(%esp),%ecx + xorl %eax,%esi + movl %edx,%ebp + roll $5,%edx + addl %esi,%ecx + xorl %eax,%ebp + rorl $7,%edi +.byte 102,15,56,0,214 + addl %edx,%ecx + addl 36(%esp),%ebx + xorl %edi,%ebp + movl %ecx,%esi + paddd %xmm7,%xmm1 + roll $5,%ecx + addl %ebp,%ebx + xorl %edi,%esi + rorl $7,%edx + movdqa %xmm1,16(%esp) + addl %ecx,%ebx + addl 40(%esp),%eax + xorl %edx,%esi + movl %ebx,%ebp + psubd %xmm7,%xmm1 + roll $5,%ebx + addl %esi,%eax + xorl %edx,%ebp + rorl $7,%ecx + addl %ebx,%eax + addl 44(%esp),%edi + xorl %ecx,%ebp + movl %eax,%esi + roll $5,%eax + addl %ebp,%edi + xorl %ecx,%esi + rorl $7,%ebx + addl %eax,%edi + addl 48(%esp),%edx + xorl %ebx,%esi + movl %edi,%ebp + roll $5,%edi + addl %esi,%edx + xorl %ebx,%ebp + rorl $7,%eax +.byte 102,15,56,0,222 + addl %edi,%edx + addl 52(%esp),%ecx + xorl %eax,%ebp + movl %edx,%esi + paddd %xmm7,%xmm2 + roll $5,%edx + addl %ebp,%ecx + xorl %eax,%esi + rorl $7,%edi + movdqa %xmm2,32(%esp) + addl %edx,%ecx + addl 56(%esp),%ebx + xorl %edi,%esi + movl %ecx,%ebp + psubd %xmm7,%xmm2 + roll $5,%ecx + addl %esi,%ebx + xorl %edi,%ebp + rorl $7,%edx + addl %ecx,%ebx + addl 60(%esp),%eax + xorl %edx,%ebp + movl %ebx,%esi + roll $5,%ebx + addl %ebp,%eax + rorl $7,%ecx + addl %ebx,%eax + movl 192(%esp),%ebp + addl (%ebp),%eax + addl 4(%ebp),%esi + addl 8(%ebp),%ecx + movl %eax,(%ebp) + addl 12(%ebp),%edx + movl %esi,4(%ebp) + addl 16(%ebp),%edi + movl %ecx,8(%ebp) + movl %ecx,%ebx + movl %edx,12(%ebp) + xorl %edx,%ebx + movl %edi,16(%ebp) + movl %esi,%ebp + pshufd $238,%xmm0,%xmm4 + andl %ebx,%esi + movl %ebp,%ebx + jmp L006loop +.align 4,0x90 +L007done: + addl 16(%esp),%ebx + xorl %edi,%esi + movl %ecx,%ebp + roll $5,%ecx + addl %esi,%ebx + xorl %edi,%ebp + rorl $7,%edx + addl %ecx,%ebx + addl 20(%esp),%eax + xorl %edx,%ebp + movl %ebx,%esi + roll $5,%ebx + addl %ebp,%eax + xorl %edx,%esi + rorl $7,%ecx + addl %ebx,%eax + addl 24(%esp),%edi + xorl %ecx,%esi + movl %eax,%ebp + roll $5,%eax + addl %esi,%edi + xorl %ecx,%ebp + rorl $7,%ebx + addl %eax,%edi + addl 28(%esp),%edx + xorl %ebx,%ebp + movl %edi,%esi + roll $5,%edi + addl %ebp,%edx + xorl %ebx,%esi + rorl $7,%eax + addl %edi,%edx + addl 32(%esp),%ecx + xorl %eax,%esi + movl %edx,%ebp + roll $5,%edx + addl %esi,%ecx + xorl %eax,%ebp + rorl $7,%edi + addl %edx,%ecx + addl 36(%esp),%ebx + xorl %edi,%ebp + movl %ecx,%esi + roll $5,%ecx + addl %ebp,%ebx + xorl %edi,%esi + rorl $7,%edx + addl %ecx,%ebx + addl 40(%esp),%eax + xorl %edx,%esi + movl %ebx,%ebp + roll $5,%ebx + addl %esi,%eax + xorl %edx,%ebp + rorl $7,%ecx + addl %ebx,%eax + addl 44(%esp),%edi + xorl %ecx,%ebp + movl %eax,%esi + roll $5,%eax + addl %ebp,%edi + xorl %ecx,%esi + rorl $7,%ebx + addl %eax,%edi + addl 48(%esp),%edx + xorl %ebx,%esi + movl %edi,%ebp + roll $5,%edi + addl %esi,%edx + xorl %ebx,%ebp + rorl $7,%eax + addl %edi,%edx + addl 52(%esp),%ecx + xorl %eax,%ebp + movl %edx,%esi + roll $5,%edx + addl %ebp,%ecx + xorl %eax,%esi + rorl $7,%edi + addl %edx,%ecx + addl 56(%esp),%ebx + xorl %edi,%esi + movl %ecx,%ebp + roll $5,%ecx + addl %esi,%ebx + xorl %edi,%ebp + rorl $7,%edx + addl %ecx,%ebx + addl 60(%esp),%eax + xorl %edx,%ebp + movl %ebx,%esi + roll $5,%ebx + addl %ebp,%eax + rorl $7,%ecx + addl %ebx,%eax + movl 192(%esp),%ebp + addl (%ebp),%eax + movl 204(%esp),%esp + addl 4(%ebp),%esi + addl 8(%ebp),%ecx + movl %eax,(%ebp) + addl 12(%ebp),%edx + movl %esi,4(%ebp) + addl 16(%ebp),%edi + movl %ecx,8(%ebp) + movl %edx,12(%ebp) + movl %edi,16(%ebp) + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.align 4 +__sha1_block_data_order_avx: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + call L008pic_point +L008pic_point: + popl %ebp + leal LK_XX_XX-L008pic_point(%ebp),%ebp +Lavx_shortcut: + vzeroall + vmovdqa (%ebp),%xmm7 + vmovdqa 16(%ebp),%xmm0 + vmovdqa 32(%ebp),%xmm1 + vmovdqa 48(%ebp),%xmm2 + vmovdqa 64(%ebp),%xmm6 + movl 20(%esp),%edi + movl 24(%esp),%ebp + movl 28(%esp),%edx + movl %esp,%esi + subl $208,%esp + andl $-64,%esp + vmovdqa %xmm0,112(%esp) + vmovdqa %xmm1,128(%esp) + vmovdqa %xmm2,144(%esp) + shll $6,%edx + vmovdqa %xmm7,160(%esp) + addl %ebp,%edx + vmovdqa %xmm6,176(%esp) + addl $64,%ebp + movl %edi,192(%esp) + movl %ebp,196(%esp) + movl %edx,200(%esp) + movl %esi,204(%esp) + movl (%edi),%eax + movl 4(%edi),%ebx + movl 8(%edi),%ecx + movl 12(%edi),%edx + movl 16(%edi),%edi + movl %ebx,%esi + vmovdqu -64(%ebp),%xmm0 + vmovdqu -48(%ebp),%xmm1 + vmovdqu -32(%ebp),%xmm2 + vmovdqu -16(%ebp),%xmm3 + vpshufb %xmm6,%xmm0,%xmm0 + vpshufb %xmm6,%xmm1,%xmm1 + vpshufb %xmm6,%xmm2,%xmm2 + vmovdqa %xmm7,96(%esp) + vpshufb %xmm6,%xmm3,%xmm3 + vpaddd %xmm7,%xmm0,%xmm4 + vpaddd %xmm7,%xmm1,%xmm5 + vpaddd %xmm7,%xmm2,%xmm6 + vmovdqa %xmm4,(%esp) + movl %ecx,%ebp + vmovdqa %xmm5,16(%esp) + xorl %edx,%ebp + vmovdqa %xmm6,32(%esp) + andl %ebp,%esi + jmp L009loop +.align 4,0x90 +L009loop: + shrdl $2,%ebx,%ebx + xorl %edx,%esi + vpalignr $8,%xmm0,%xmm1,%xmm4 + movl %eax,%ebp + addl (%esp),%edi + vpaddd %xmm3,%xmm7,%xmm7 + vmovdqa %xmm0,64(%esp) + xorl %ecx,%ebx + shldl $5,%eax,%eax + vpsrldq $4,%xmm3,%xmm6 + addl %esi,%edi + andl %ebx,%ebp + vpxor %xmm0,%xmm4,%xmm4 + xorl %ecx,%ebx + addl %eax,%edi + vpxor %xmm2,%xmm6,%xmm6 + shrdl $7,%eax,%eax + xorl %ecx,%ebp + vmovdqa %xmm7,48(%esp) + movl %edi,%esi + addl 4(%esp),%edx + vpxor %xmm6,%xmm4,%xmm4 + xorl %ebx,%eax + shldl $5,%edi,%edi + addl %ebp,%edx + andl %eax,%esi + vpsrld $31,%xmm4,%xmm6 + xorl %ebx,%eax + addl %edi,%edx + shrdl $7,%edi,%edi + xorl %ebx,%esi + vpslldq $12,%xmm4,%xmm0 + vpaddd %xmm4,%xmm4,%xmm4 + movl %edx,%ebp + addl 8(%esp),%ecx + xorl %eax,%edi + shldl $5,%edx,%edx + vpsrld $30,%xmm0,%xmm7 + vpor %xmm6,%xmm4,%xmm4 + addl %esi,%ecx + andl %edi,%ebp + xorl %eax,%edi + addl %edx,%ecx + vpslld $2,%xmm0,%xmm0 + shrdl $7,%edx,%edx + xorl %eax,%ebp + vpxor %xmm7,%xmm4,%xmm4 + movl %ecx,%esi + addl 12(%esp),%ebx + xorl %edi,%edx + shldl $5,%ecx,%ecx + vpxor %xmm0,%xmm4,%xmm4 + addl %ebp,%ebx + andl %edx,%esi + vmovdqa 96(%esp),%xmm0 + xorl %edi,%edx + addl %ecx,%ebx + shrdl $7,%ecx,%ecx + xorl %edi,%esi + vpalignr $8,%xmm1,%xmm2,%xmm5 + movl %ebx,%ebp + addl 16(%esp),%eax + vpaddd %xmm4,%xmm0,%xmm0 + vmovdqa %xmm1,80(%esp) + xorl %edx,%ecx + shldl $5,%ebx,%ebx + vpsrldq $4,%xmm4,%xmm7 + addl %esi,%eax + andl %ecx,%ebp + vpxor %xmm1,%xmm5,%xmm5 + xorl %edx,%ecx + addl %ebx,%eax + vpxor %xmm3,%xmm7,%xmm7 + shrdl $7,%ebx,%ebx + xorl %edx,%ebp + vmovdqa %xmm0,(%esp) + movl %eax,%esi + addl 20(%esp),%edi + vpxor %xmm7,%xmm5,%xmm5 + xorl %ecx,%ebx + shldl $5,%eax,%eax + addl %ebp,%edi + andl %ebx,%esi + vpsrld $31,%xmm5,%xmm7 + xorl %ecx,%ebx + addl %eax,%edi + shrdl $7,%eax,%eax + xorl %ecx,%esi + vpslldq $12,%xmm5,%xmm1 + vpaddd %xmm5,%xmm5,%xmm5 + movl %edi,%ebp + addl 24(%esp),%edx + xorl %ebx,%eax + shldl $5,%edi,%edi + vpsrld $30,%xmm1,%xmm0 + vpor %xmm7,%xmm5,%xmm5 + addl %esi,%edx + andl %eax,%ebp + xorl %ebx,%eax + addl %edi,%edx + vpslld $2,%xmm1,%xmm1 + shrdl $7,%edi,%edi + xorl %ebx,%ebp + vpxor %xmm0,%xmm5,%xmm5 + movl %edx,%esi + addl 28(%esp),%ecx + xorl %eax,%edi + shldl $5,%edx,%edx + vpxor %xmm1,%xmm5,%xmm5 + addl %ebp,%ecx + andl %edi,%esi + vmovdqa 112(%esp),%xmm1 + xorl %eax,%edi + addl %edx,%ecx + shrdl $7,%edx,%edx + xorl %eax,%esi + vpalignr $8,%xmm2,%xmm3,%xmm6 + movl %ecx,%ebp + addl 32(%esp),%ebx + vpaddd %xmm5,%xmm1,%xmm1 + vmovdqa %xmm2,96(%esp) + xorl %edi,%edx + shldl $5,%ecx,%ecx + vpsrldq $4,%xmm5,%xmm0 + addl %esi,%ebx + andl %edx,%ebp + vpxor %xmm2,%xmm6,%xmm6 + xorl %edi,%edx + addl %ecx,%ebx + vpxor %xmm4,%xmm0,%xmm0 + shrdl $7,%ecx,%ecx + xorl %edi,%ebp + vmovdqa %xmm1,16(%esp) + movl %ebx,%esi + addl 36(%esp),%eax + vpxor %xmm0,%xmm6,%xmm6 + xorl %edx,%ecx + shldl $5,%ebx,%ebx + addl %ebp,%eax + andl %ecx,%esi + vpsrld $31,%xmm6,%xmm0 + xorl %edx,%ecx + addl %ebx,%eax + shrdl $7,%ebx,%ebx + xorl %edx,%esi + vpslldq $12,%xmm6,%xmm2 + vpaddd %xmm6,%xmm6,%xmm6 + movl %eax,%ebp + addl 40(%esp),%edi + xorl %ecx,%ebx + shldl $5,%eax,%eax + vpsrld $30,%xmm2,%xmm1 + vpor %xmm0,%xmm6,%xmm6 + addl %esi,%edi + andl %ebx,%ebp + xorl %ecx,%ebx + addl %eax,%edi + vpslld $2,%xmm2,%xmm2 + vmovdqa 64(%esp),%xmm0 + shrdl $7,%eax,%eax + xorl %ecx,%ebp + vpxor %xmm1,%xmm6,%xmm6 + movl %edi,%esi + addl 44(%esp),%edx + xorl %ebx,%eax + shldl $5,%edi,%edi + vpxor %xmm2,%xmm6,%xmm6 + addl %ebp,%edx + andl %eax,%esi + vmovdqa 112(%esp),%xmm2 + xorl %ebx,%eax + addl %edi,%edx + shrdl $7,%edi,%edi + xorl %ebx,%esi + vpalignr $8,%xmm3,%xmm4,%xmm7 + movl %edx,%ebp + addl 48(%esp),%ecx + vpaddd %xmm6,%xmm2,%xmm2 + vmovdqa %xmm3,64(%esp) + xorl %eax,%edi + shldl $5,%edx,%edx + vpsrldq $4,%xmm6,%xmm1 + addl %esi,%ecx + andl %edi,%ebp + vpxor %xmm3,%xmm7,%xmm7 + xorl %eax,%edi + addl %edx,%ecx + vpxor %xmm5,%xmm1,%xmm1 + shrdl $7,%edx,%edx + xorl %eax,%ebp + vmovdqa %xmm2,32(%esp) + movl %ecx,%esi + addl 52(%esp),%ebx + vpxor %xmm1,%xmm7,%xmm7 + xorl %edi,%edx + shldl $5,%ecx,%ecx + addl %ebp,%ebx + andl %edx,%esi + vpsrld $31,%xmm7,%xmm1 + xorl %edi,%edx + addl %ecx,%ebx + shrdl $7,%ecx,%ecx + xorl %edi,%esi + vpslldq $12,%xmm7,%xmm3 + vpaddd %xmm7,%xmm7,%xmm7 + movl %ebx,%ebp + addl 56(%esp),%eax + xorl %edx,%ecx + shldl $5,%ebx,%ebx + vpsrld $30,%xmm3,%xmm2 + vpor %xmm1,%xmm7,%xmm7 + addl %esi,%eax + andl %ecx,%ebp + xorl %edx,%ecx + addl %ebx,%eax + vpslld $2,%xmm3,%xmm3 + vmovdqa 80(%esp),%xmm1 + shrdl $7,%ebx,%ebx + xorl %edx,%ebp + vpxor %xmm2,%xmm7,%xmm7 + movl %eax,%esi + addl 60(%esp),%edi + xorl %ecx,%ebx + shldl $5,%eax,%eax + vpxor %xmm3,%xmm7,%xmm7 + addl %ebp,%edi + andl %ebx,%esi + vmovdqa 112(%esp),%xmm3 + xorl %ecx,%ebx + addl %eax,%edi + vpalignr $8,%xmm6,%xmm7,%xmm2 + vpxor %xmm4,%xmm0,%xmm0 + shrdl $7,%eax,%eax + xorl %ecx,%esi + movl %edi,%ebp + addl (%esp),%edx + vpxor %xmm1,%xmm0,%xmm0 + vmovdqa %xmm4,80(%esp) + xorl %ebx,%eax + shldl $5,%edi,%edi + vmovdqa %xmm3,%xmm4 + vpaddd %xmm7,%xmm3,%xmm3 + addl %esi,%edx + andl %eax,%ebp + vpxor %xmm2,%xmm0,%xmm0 + xorl %ebx,%eax + addl %edi,%edx + shrdl $7,%edi,%edi + xorl %ebx,%ebp + vpsrld $30,%xmm0,%xmm2 + vmovdqa %xmm3,48(%esp) + movl %edx,%esi + addl 4(%esp),%ecx + xorl %eax,%edi + shldl $5,%edx,%edx + vpslld $2,%xmm0,%xmm0 + addl %ebp,%ecx + andl %edi,%esi + xorl %eax,%edi + addl %edx,%ecx + shrdl $7,%edx,%edx + xorl %eax,%esi + movl %ecx,%ebp + addl 8(%esp),%ebx + vpor %xmm2,%xmm0,%xmm0 + xorl %edi,%edx + shldl $5,%ecx,%ecx + vmovdqa 96(%esp),%xmm2 + addl %esi,%ebx + andl %edx,%ebp + xorl %edi,%edx + addl %ecx,%ebx + addl 12(%esp),%eax + xorl %edi,%ebp + movl %ebx,%esi + shldl $5,%ebx,%ebx + addl %ebp,%eax + xorl %edx,%esi + shrdl $7,%ecx,%ecx + addl %ebx,%eax + vpalignr $8,%xmm7,%xmm0,%xmm3 + vpxor %xmm5,%xmm1,%xmm1 + addl 16(%esp),%edi + xorl %ecx,%esi + movl %eax,%ebp + shldl $5,%eax,%eax + vpxor %xmm2,%xmm1,%xmm1 + vmovdqa %xmm5,96(%esp) + addl %esi,%edi + xorl %ecx,%ebp + vmovdqa %xmm4,%xmm5 + vpaddd %xmm0,%xmm4,%xmm4 + shrdl $7,%ebx,%ebx + addl %eax,%edi + vpxor %xmm3,%xmm1,%xmm1 + addl 20(%esp),%edx + xorl %ebx,%ebp + movl %edi,%esi + shldl $5,%edi,%edi + vpsrld $30,%xmm1,%xmm3 + vmovdqa %xmm4,(%esp) + addl %ebp,%edx + xorl %ebx,%esi + shrdl $7,%eax,%eax + addl %edi,%edx + vpslld $2,%xmm1,%xmm1 + addl 24(%esp),%ecx + xorl %eax,%esi + movl %edx,%ebp + shldl $5,%edx,%edx + addl %esi,%ecx + xorl %eax,%ebp + shrdl $7,%edi,%edi + addl %edx,%ecx + vpor %xmm3,%xmm1,%xmm1 + addl 28(%esp),%ebx + xorl %edi,%ebp + vmovdqa 64(%esp),%xmm3 + movl %ecx,%esi + shldl $5,%ecx,%ecx + addl %ebp,%ebx + xorl %edi,%esi + shrdl $7,%edx,%edx + addl %ecx,%ebx + vpalignr $8,%xmm0,%xmm1,%xmm4 + vpxor %xmm6,%xmm2,%xmm2 + addl 32(%esp),%eax + xorl %edx,%esi + movl %ebx,%ebp + shldl $5,%ebx,%ebx + vpxor %xmm3,%xmm2,%xmm2 + vmovdqa %xmm6,64(%esp) + addl %esi,%eax + xorl %edx,%ebp + vmovdqa 128(%esp),%xmm6 + vpaddd %xmm1,%xmm5,%xmm5 + shrdl $7,%ecx,%ecx + addl %ebx,%eax + vpxor %xmm4,%xmm2,%xmm2 + addl 36(%esp),%edi + xorl %ecx,%ebp + movl %eax,%esi + shldl $5,%eax,%eax + vpsrld $30,%xmm2,%xmm4 + vmovdqa %xmm5,16(%esp) + addl %ebp,%edi + xorl %ecx,%esi + shrdl $7,%ebx,%ebx + addl %eax,%edi + vpslld $2,%xmm2,%xmm2 + addl 40(%esp),%edx + xorl %ebx,%esi + movl %edi,%ebp + shldl $5,%edi,%edi + addl %esi,%edx + xorl %ebx,%ebp + shrdl $7,%eax,%eax + addl %edi,%edx + vpor %xmm4,%xmm2,%xmm2 + addl 44(%esp),%ecx + xorl %eax,%ebp + vmovdqa 80(%esp),%xmm4 + movl %edx,%esi + shldl $5,%edx,%edx + addl %ebp,%ecx + xorl %eax,%esi + shrdl $7,%edi,%edi + addl %edx,%ecx + vpalignr $8,%xmm1,%xmm2,%xmm5 + vpxor %xmm7,%xmm3,%xmm3 + addl 48(%esp),%ebx + xorl %edi,%esi + movl %ecx,%ebp + shldl $5,%ecx,%ecx + vpxor %xmm4,%xmm3,%xmm3 + vmovdqa %xmm7,80(%esp) + addl %esi,%ebx + xorl %edi,%ebp + vmovdqa %xmm6,%xmm7 + vpaddd %xmm2,%xmm6,%xmm6 + shrdl $7,%edx,%edx + addl %ecx,%ebx + vpxor %xmm5,%xmm3,%xmm3 + addl 52(%esp),%eax + xorl %edx,%ebp + movl %ebx,%esi + shldl $5,%ebx,%ebx + vpsrld $30,%xmm3,%xmm5 + vmovdqa %xmm6,32(%esp) + addl %ebp,%eax + xorl %edx,%esi + shrdl $7,%ecx,%ecx + addl %ebx,%eax + vpslld $2,%xmm3,%xmm3 + addl 56(%esp),%edi + xorl %ecx,%esi + movl %eax,%ebp + shldl $5,%eax,%eax + addl %esi,%edi + xorl %ecx,%ebp + shrdl $7,%ebx,%ebx + addl %eax,%edi + vpor %xmm5,%xmm3,%xmm3 + addl 60(%esp),%edx + xorl %ebx,%ebp + vmovdqa 96(%esp),%xmm5 + movl %edi,%esi + shldl $5,%edi,%edi + addl %ebp,%edx + xorl %ebx,%esi + shrdl $7,%eax,%eax + addl %edi,%edx + vpalignr $8,%xmm2,%xmm3,%xmm6 + vpxor %xmm0,%xmm4,%xmm4 + addl (%esp),%ecx + xorl %eax,%esi + movl %edx,%ebp + shldl $5,%edx,%edx + vpxor %xmm5,%xmm4,%xmm4 + vmovdqa %xmm0,96(%esp) + addl %esi,%ecx + xorl %eax,%ebp + vmovdqa %xmm7,%xmm0 + vpaddd %xmm3,%xmm7,%xmm7 + shrdl $7,%edi,%edi + addl %edx,%ecx + vpxor %xmm6,%xmm4,%xmm4 + addl 4(%esp),%ebx + xorl %edi,%ebp + movl %ecx,%esi + shldl $5,%ecx,%ecx + vpsrld $30,%xmm4,%xmm6 + vmovdqa %xmm7,48(%esp) + addl %ebp,%ebx + xorl %edi,%esi + shrdl $7,%edx,%edx + addl %ecx,%ebx + vpslld $2,%xmm4,%xmm4 + addl 8(%esp),%eax + xorl %edx,%esi + movl %ebx,%ebp + shldl $5,%ebx,%ebx + addl %esi,%eax + xorl %edx,%ebp + shrdl $7,%ecx,%ecx + addl %ebx,%eax + vpor %xmm6,%xmm4,%xmm4 + addl 12(%esp),%edi + xorl %ecx,%ebp + vmovdqa 64(%esp),%xmm6 + movl %eax,%esi + shldl $5,%eax,%eax + addl %ebp,%edi + xorl %ecx,%esi + shrdl $7,%ebx,%ebx + addl %eax,%edi + vpalignr $8,%xmm3,%xmm4,%xmm7 + vpxor %xmm1,%xmm5,%xmm5 + addl 16(%esp),%edx + xorl %ebx,%esi + movl %edi,%ebp + shldl $5,%edi,%edi + vpxor %xmm6,%xmm5,%xmm5 + vmovdqa %xmm1,64(%esp) + addl %esi,%edx + xorl %ebx,%ebp + vmovdqa %xmm0,%xmm1 + vpaddd %xmm4,%xmm0,%xmm0 + shrdl $7,%eax,%eax + addl %edi,%edx + vpxor %xmm7,%xmm5,%xmm5 + addl 20(%esp),%ecx + xorl %eax,%ebp + movl %edx,%esi + shldl $5,%edx,%edx + vpsrld $30,%xmm5,%xmm7 + vmovdqa %xmm0,(%esp) + addl %ebp,%ecx + xorl %eax,%esi + shrdl $7,%edi,%edi + addl %edx,%ecx + vpslld $2,%xmm5,%xmm5 + addl 24(%esp),%ebx + xorl %edi,%esi + movl %ecx,%ebp + shldl $5,%ecx,%ecx + addl %esi,%ebx + xorl %edi,%ebp + shrdl $7,%edx,%edx + addl %ecx,%ebx + vpor %xmm7,%xmm5,%xmm5 + addl 28(%esp),%eax + vmovdqa 80(%esp),%xmm7 + shrdl $7,%ecx,%ecx + movl %ebx,%esi + xorl %edx,%ebp + shldl $5,%ebx,%ebx + addl %ebp,%eax + xorl %ecx,%esi + xorl %edx,%ecx + addl %ebx,%eax + vpalignr $8,%xmm4,%xmm5,%xmm0 + vpxor %xmm2,%xmm6,%xmm6 + addl 32(%esp),%edi + andl %ecx,%esi + xorl %edx,%ecx + shrdl $7,%ebx,%ebx + vpxor %xmm7,%xmm6,%xmm6 + vmovdqa %xmm2,80(%esp) + movl %eax,%ebp + xorl %ecx,%esi + vmovdqa %xmm1,%xmm2 + vpaddd %xmm5,%xmm1,%xmm1 + shldl $5,%eax,%eax + addl %esi,%edi + vpxor %xmm0,%xmm6,%xmm6 + xorl %ebx,%ebp + xorl %ecx,%ebx + addl %eax,%edi + addl 36(%esp),%edx + vpsrld $30,%xmm6,%xmm0 + vmovdqa %xmm1,16(%esp) + andl %ebx,%ebp + xorl %ecx,%ebx + shrdl $7,%eax,%eax + movl %edi,%esi + vpslld $2,%xmm6,%xmm6 + xorl %ebx,%ebp + shldl $5,%edi,%edi + addl %ebp,%edx + xorl %eax,%esi + xorl %ebx,%eax + addl %edi,%edx + addl 40(%esp),%ecx + andl %eax,%esi + vpor %xmm0,%xmm6,%xmm6 + xorl %ebx,%eax + shrdl $7,%edi,%edi + vmovdqa 96(%esp),%xmm0 + movl %edx,%ebp + xorl %eax,%esi + shldl $5,%edx,%edx + addl %esi,%ecx + xorl %edi,%ebp + xorl %eax,%edi + addl %edx,%ecx + addl 44(%esp),%ebx + andl %edi,%ebp + xorl %eax,%edi + shrdl $7,%edx,%edx + movl %ecx,%esi + xorl %edi,%ebp + shldl $5,%ecx,%ecx + addl %ebp,%ebx + xorl %edx,%esi + xorl %edi,%edx + addl %ecx,%ebx + vpalignr $8,%xmm5,%xmm6,%xmm1 + vpxor %xmm3,%xmm7,%xmm7 + addl 48(%esp),%eax + andl %edx,%esi + xorl %edi,%edx + shrdl $7,%ecx,%ecx + vpxor %xmm0,%xmm7,%xmm7 + vmovdqa %xmm3,96(%esp) + movl %ebx,%ebp + xorl %edx,%esi + vmovdqa 144(%esp),%xmm3 + vpaddd %xmm6,%xmm2,%xmm2 + shldl $5,%ebx,%ebx + addl %esi,%eax + vpxor %xmm1,%xmm7,%xmm7 + xorl %ecx,%ebp + xorl %edx,%ecx + addl %ebx,%eax + addl 52(%esp),%edi + vpsrld $30,%xmm7,%xmm1 + vmovdqa %xmm2,32(%esp) + andl %ecx,%ebp + xorl %edx,%ecx + shrdl $7,%ebx,%ebx + movl %eax,%esi + vpslld $2,%xmm7,%xmm7 + xorl %ecx,%ebp + shldl $5,%eax,%eax + addl %ebp,%edi + xorl %ebx,%esi + xorl %ecx,%ebx + addl %eax,%edi + addl 56(%esp),%edx + andl %ebx,%esi + vpor %xmm1,%xmm7,%xmm7 + xorl %ecx,%ebx + shrdl $7,%eax,%eax + vmovdqa 64(%esp),%xmm1 + movl %edi,%ebp + xorl %ebx,%esi + shldl $5,%edi,%edi + addl %esi,%edx + xorl %eax,%ebp + xorl %ebx,%eax + addl %edi,%edx + addl 60(%esp),%ecx + andl %eax,%ebp + xorl %ebx,%eax + shrdl $7,%edi,%edi + movl %edx,%esi + xorl %eax,%ebp + shldl $5,%edx,%edx + addl %ebp,%ecx + xorl %edi,%esi + xorl %eax,%edi + addl %edx,%ecx + vpalignr $8,%xmm6,%xmm7,%xmm2 + vpxor %xmm4,%xmm0,%xmm0 + addl (%esp),%ebx + andl %edi,%esi + xorl %eax,%edi + shrdl $7,%edx,%edx + vpxor %xmm1,%xmm0,%xmm0 + vmovdqa %xmm4,64(%esp) + movl %ecx,%ebp + xorl %edi,%esi + vmovdqa %xmm3,%xmm4 + vpaddd %xmm7,%xmm3,%xmm3 + shldl $5,%ecx,%ecx + addl %esi,%ebx + vpxor %xmm2,%xmm0,%xmm0 + xorl %edx,%ebp + xorl %edi,%edx + addl %ecx,%ebx + addl 4(%esp),%eax + vpsrld $30,%xmm0,%xmm2 + vmovdqa %xmm3,48(%esp) + andl %edx,%ebp + xorl %edi,%edx + shrdl $7,%ecx,%ecx + movl %ebx,%esi + vpslld $2,%xmm0,%xmm0 + xorl %edx,%ebp + shldl $5,%ebx,%ebx + addl %ebp,%eax + xorl %ecx,%esi + xorl %edx,%ecx + addl %ebx,%eax + addl 8(%esp),%edi + andl %ecx,%esi + vpor %xmm2,%xmm0,%xmm0 + xorl %edx,%ecx + shrdl $7,%ebx,%ebx + vmovdqa 80(%esp),%xmm2 + movl %eax,%ebp + xorl %ecx,%esi + shldl $5,%eax,%eax + addl %esi,%edi + xorl %ebx,%ebp + xorl %ecx,%ebx + addl %eax,%edi + addl 12(%esp),%edx + andl %ebx,%ebp + xorl %ecx,%ebx + shrdl $7,%eax,%eax + movl %edi,%esi + xorl %ebx,%ebp + shldl $5,%edi,%edi + addl %ebp,%edx + xorl %eax,%esi + xorl %ebx,%eax + addl %edi,%edx + vpalignr $8,%xmm7,%xmm0,%xmm3 + vpxor %xmm5,%xmm1,%xmm1 + addl 16(%esp),%ecx + andl %eax,%esi + xorl %ebx,%eax + shrdl $7,%edi,%edi + vpxor %xmm2,%xmm1,%xmm1 + vmovdqa %xmm5,80(%esp) + movl %edx,%ebp + xorl %eax,%esi + vmovdqa %xmm4,%xmm5 + vpaddd %xmm0,%xmm4,%xmm4 + shldl $5,%edx,%edx + addl %esi,%ecx + vpxor %xmm3,%xmm1,%xmm1 + xorl %edi,%ebp + xorl %eax,%edi + addl %edx,%ecx + addl 20(%esp),%ebx + vpsrld $30,%xmm1,%xmm3 + vmovdqa %xmm4,(%esp) + andl %edi,%ebp + xorl %eax,%edi + shrdl $7,%edx,%edx + movl %ecx,%esi + vpslld $2,%xmm1,%xmm1 + xorl %edi,%ebp + shldl $5,%ecx,%ecx + addl %ebp,%ebx + xorl %edx,%esi + xorl %edi,%edx + addl %ecx,%ebx + addl 24(%esp),%eax + andl %edx,%esi + vpor %xmm3,%xmm1,%xmm1 + xorl %edi,%edx + shrdl $7,%ecx,%ecx + vmovdqa 96(%esp),%xmm3 + movl %ebx,%ebp + xorl %edx,%esi + shldl $5,%ebx,%ebx + addl %esi,%eax + xorl %ecx,%ebp + xorl %edx,%ecx + addl %ebx,%eax + addl 28(%esp),%edi + andl %ecx,%ebp + xorl %edx,%ecx + shrdl $7,%ebx,%ebx + movl %eax,%esi + xorl %ecx,%ebp + shldl $5,%eax,%eax + addl %ebp,%edi + xorl %ebx,%esi + xorl %ecx,%ebx + addl %eax,%edi + vpalignr $8,%xmm0,%xmm1,%xmm4 + vpxor %xmm6,%xmm2,%xmm2 + addl 32(%esp),%edx + andl %ebx,%esi + xorl %ecx,%ebx + shrdl $7,%eax,%eax + vpxor %xmm3,%xmm2,%xmm2 + vmovdqa %xmm6,96(%esp) + movl %edi,%ebp + xorl %ebx,%esi + vmovdqa %xmm5,%xmm6 + vpaddd %xmm1,%xmm5,%xmm5 + shldl $5,%edi,%edi + addl %esi,%edx + vpxor %xmm4,%xmm2,%xmm2 + xorl %eax,%ebp + xorl %ebx,%eax + addl %edi,%edx + addl 36(%esp),%ecx + vpsrld $30,%xmm2,%xmm4 + vmovdqa %xmm5,16(%esp) + andl %eax,%ebp + xorl %ebx,%eax + shrdl $7,%edi,%edi + movl %edx,%esi + vpslld $2,%xmm2,%xmm2 + xorl %eax,%ebp + shldl $5,%edx,%edx + addl %ebp,%ecx + xorl %edi,%esi + xorl %eax,%edi + addl %edx,%ecx + addl 40(%esp),%ebx + andl %edi,%esi + vpor %xmm4,%xmm2,%xmm2 + xorl %eax,%edi + shrdl $7,%edx,%edx + vmovdqa 64(%esp),%xmm4 + movl %ecx,%ebp + xorl %edi,%esi + shldl $5,%ecx,%ecx + addl %esi,%ebx + xorl %edx,%ebp + xorl %edi,%edx + addl %ecx,%ebx + addl 44(%esp),%eax + andl %edx,%ebp + xorl %edi,%edx + shrdl $7,%ecx,%ecx + movl %ebx,%esi + xorl %edx,%ebp + shldl $5,%ebx,%ebx + addl %ebp,%eax + xorl %edx,%esi + addl %ebx,%eax + vpalignr $8,%xmm1,%xmm2,%xmm5 + vpxor %xmm7,%xmm3,%xmm3 + addl 48(%esp),%edi + xorl %ecx,%esi + movl %eax,%ebp + shldl $5,%eax,%eax + vpxor %xmm4,%xmm3,%xmm3 + vmovdqa %xmm7,64(%esp) + addl %esi,%edi + xorl %ecx,%ebp + vmovdqa %xmm6,%xmm7 + vpaddd %xmm2,%xmm6,%xmm6 + shrdl $7,%ebx,%ebx + addl %eax,%edi + vpxor %xmm5,%xmm3,%xmm3 + addl 52(%esp),%edx + xorl %ebx,%ebp + movl %edi,%esi + shldl $5,%edi,%edi + vpsrld $30,%xmm3,%xmm5 + vmovdqa %xmm6,32(%esp) + addl %ebp,%edx + xorl %ebx,%esi + shrdl $7,%eax,%eax + addl %edi,%edx + vpslld $2,%xmm3,%xmm3 + addl 56(%esp),%ecx + xorl %eax,%esi + movl %edx,%ebp + shldl $5,%edx,%edx + addl %esi,%ecx + xorl %eax,%ebp + shrdl $7,%edi,%edi + addl %edx,%ecx + vpor %xmm5,%xmm3,%xmm3 + addl 60(%esp),%ebx + xorl %edi,%ebp + movl %ecx,%esi + shldl $5,%ecx,%ecx + addl %ebp,%ebx + xorl %edi,%esi + shrdl $7,%edx,%edx + addl %ecx,%ebx + addl (%esp),%eax + vpaddd %xmm3,%xmm7,%xmm7 + xorl %edx,%esi + movl %ebx,%ebp + shldl $5,%ebx,%ebx + addl %esi,%eax + vmovdqa %xmm7,48(%esp) + xorl %edx,%ebp + shrdl $7,%ecx,%ecx + addl %ebx,%eax + addl 4(%esp),%edi + xorl %ecx,%ebp + movl %eax,%esi + shldl $5,%eax,%eax + addl %ebp,%edi + xorl %ecx,%esi + shrdl $7,%ebx,%ebx + addl %eax,%edi + addl 8(%esp),%edx + xorl %ebx,%esi + movl %edi,%ebp + shldl $5,%edi,%edi + addl %esi,%edx + xorl %ebx,%ebp + shrdl $7,%eax,%eax + addl %edi,%edx + addl 12(%esp),%ecx + xorl %eax,%ebp + movl %edx,%esi + shldl $5,%edx,%edx + addl %ebp,%ecx + xorl %eax,%esi + shrdl $7,%edi,%edi + addl %edx,%ecx + movl 196(%esp),%ebp + cmpl 200(%esp),%ebp + je L010done + vmovdqa 160(%esp),%xmm7 + vmovdqa 176(%esp),%xmm6 + vmovdqu (%ebp),%xmm0 + vmovdqu 16(%ebp),%xmm1 + vmovdqu 32(%ebp),%xmm2 + vmovdqu 48(%ebp),%xmm3 + addl $64,%ebp + vpshufb %xmm6,%xmm0,%xmm0 + movl %ebp,196(%esp) + vmovdqa %xmm7,96(%esp) + addl 16(%esp),%ebx + xorl %edi,%esi + vpshufb %xmm6,%xmm1,%xmm1 + movl %ecx,%ebp + shldl $5,%ecx,%ecx + vpaddd %xmm7,%xmm0,%xmm4 + addl %esi,%ebx + xorl %edi,%ebp + shrdl $7,%edx,%edx + addl %ecx,%ebx + vmovdqa %xmm4,(%esp) + addl 20(%esp),%eax + xorl %edx,%ebp + movl %ebx,%esi + shldl $5,%ebx,%ebx + addl %ebp,%eax + xorl %edx,%esi + shrdl $7,%ecx,%ecx + addl %ebx,%eax + addl 24(%esp),%edi + xorl %ecx,%esi + movl %eax,%ebp + shldl $5,%eax,%eax + addl %esi,%edi + xorl %ecx,%ebp + shrdl $7,%ebx,%ebx + addl %eax,%edi + addl 28(%esp),%edx + xorl %ebx,%ebp + movl %edi,%esi + shldl $5,%edi,%edi + addl %ebp,%edx + xorl %ebx,%esi + shrdl $7,%eax,%eax + addl %edi,%edx + addl 32(%esp),%ecx + xorl %eax,%esi + vpshufb %xmm6,%xmm2,%xmm2 + movl %edx,%ebp + shldl $5,%edx,%edx + vpaddd %xmm7,%xmm1,%xmm5 + addl %esi,%ecx + xorl %eax,%ebp + shrdl $7,%edi,%edi + addl %edx,%ecx + vmovdqa %xmm5,16(%esp) + addl 36(%esp),%ebx + xorl %edi,%ebp + movl %ecx,%esi + shldl $5,%ecx,%ecx + addl %ebp,%ebx + xorl %edi,%esi + shrdl $7,%edx,%edx + addl %ecx,%ebx + addl 40(%esp),%eax + xorl %edx,%esi + movl %ebx,%ebp + shldl $5,%ebx,%ebx + addl %esi,%eax + xorl %edx,%ebp + shrdl $7,%ecx,%ecx + addl %ebx,%eax + addl 44(%esp),%edi + xorl %ecx,%ebp + movl %eax,%esi + shldl $5,%eax,%eax + addl %ebp,%edi + xorl %ecx,%esi + shrdl $7,%ebx,%ebx + addl %eax,%edi + addl 48(%esp),%edx + xorl %ebx,%esi + vpshufb %xmm6,%xmm3,%xmm3 + movl %edi,%ebp + shldl $5,%edi,%edi + vpaddd %xmm7,%xmm2,%xmm6 + addl %esi,%edx + xorl %ebx,%ebp + shrdl $7,%eax,%eax + addl %edi,%edx + vmovdqa %xmm6,32(%esp) + addl 52(%esp),%ecx + xorl %eax,%ebp + movl %edx,%esi + shldl $5,%edx,%edx + addl %ebp,%ecx + xorl %eax,%esi + shrdl $7,%edi,%edi + addl %edx,%ecx + addl 56(%esp),%ebx + xorl %edi,%esi + movl %ecx,%ebp + shldl $5,%ecx,%ecx + addl %esi,%ebx + xorl %edi,%ebp + shrdl $7,%edx,%edx + addl %ecx,%ebx + addl 60(%esp),%eax + xorl %edx,%ebp + movl %ebx,%esi + shldl $5,%ebx,%ebx + addl %ebp,%eax + shrdl $7,%ecx,%ecx + addl %ebx,%eax + movl 192(%esp),%ebp + addl (%ebp),%eax + addl 4(%ebp),%esi + addl 8(%ebp),%ecx + movl %eax,(%ebp) + addl 12(%ebp),%edx + movl %esi,4(%ebp) + addl 16(%ebp),%edi + movl %ecx,%ebx + movl %ecx,8(%ebp) + xorl %edx,%ebx + movl %edx,12(%ebp) + movl %edi,16(%ebp) + movl %esi,%ebp + andl %ebx,%esi + movl %ebp,%ebx + jmp L009loop +.align 4,0x90 +L010done: + addl 16(%esp),%ebx + xorl %edi,%esi + movl %ecx,%ebp + shldl $5,%ecx,%ecx + addl %esi,%ebx + xorl %edi,%ebp + shrdl $7,%edx,%edx + addl %ecx,%ebx + addl 20(%esp),%eax + xorl %edx,%ebp + movl %ebx,%esi + shldl $5,%ebx,%ebx + addl %ebp,%eax + xorl %edx,%esi + shrdl $7,%ecx,%ecx + addl %ebx,%eax + addl 24(%esp),%edi + xorl %ecx,%esi + movl %eax,%ebp + shldl $5,%eax,%eax + addl %esi,%edi + xorl %ecx,%ebp + shrdl $7,%ebx,%ebx + addl %eax,%edi + addl 28(%esp),%edx + xorl %ebx,%ebp + movl %edi,%esi + shldl $5,%edi,%edi + addl %ebp,%edx + xorl %ebx,%esi + shrdl $7,%eax,%eax + addl %edi,%edx + addl 32(%esp),%ecx + xorl %eax,%esi + movl %edx,%ebp + shldl $5,%edx,%edx + addl %esi,%ecx + xorl %eax,%ebp + shrdl $7,%edi,%edi + addl %edx,%ecx + addl 36(%esp),%ebx + xorl %edi,%ebp + movl %ecx,%esi + shldl $5,%ecx,%ecx + addl %ebp,%ebx + xorl %edi,%esi + shrdl $7,%edx,%edx + addl %ecx,%ebx + addl 40(%esp),%eax + xorl %edx,%esi + movl %ebx,%ebp + shldl $5,%ebx,%ebx + addl %esi,%eax + xorl %edx,%ebp + shrdl $7,%ecx,%ecx + addl %ebx,%eax + addl 44(%esp),%edi + xorl %ecx,%ebp + movl %eax,%esi + shldl $5,%eax,%eax + addl %ebp,%edi + xorl %ecx,%esi + shrdl $7,%ebx,%ebx + addl %eax,%edi + addl 48(%esp),%edx + xorl %ebx,%esi + movl %edi,%ebp + shldl $5,%edi,%edi + addl %esi,%edx + xorl %ebx,%ebp + shrdl $7,%eax,%eax + addl %edi,%edx + addl 52(%esp),%ecx + xorl %eax,%ebp + movl %edx,%esi + shldl $5,%edx,%edx + addl %ebp,%ecx + xorl %eax,%esi + shrdl $7,%edi,%edi + addl %edx,%ecx + addl 56(%esp),%ebx + xorl %edi,%esi + movl %ecx,%ebp + shldl $5,%ecx,%ecx + addl %esi,%ebx + xorl %edi,%ebp + shrdl $7,%edx,%edx + addl %ecx,%ebx + addl 60(%esp),%eax + xorl %edx,%ebp + movl %ebx,%esi + shldl $5,%ebx,%ebx + addl %ebp,%eax + shrdl $7,%ecx,%ecx + addl %ebx,%eax + vzeroall + movl 192(%esp),%ebp + addl (%ebp),%eax + movl 204(%esp),%esp + addl 4(%ebp),%esi + addl 8(%ebp),%ecx + movl %eax,(%ebp) + addl 12(%ebp),%edx + movl %esi,4(%ebp) + addl 16(%ebp),%edi + movl %ecx,8(%ebp) + movl %edx,12(%ebp) + movl %edi,16(%ebp) + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.align 6,0x90 +LK_XX_XX: +.long 1518500249,1518500249,1518500249,1518500249 +.long 1859775393,1859775393,1859775393,1859775393 +.long 2400959708,2400959708,2400959708,2400959708 +.long 3395469782,3395469782,3395469782,3395469782 +.long 66051,67438087,134810123,202182159 +.byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0 .byte 83,72,65,49,32,98,108,111,99,107,32,116,114,97,110,115 .byte 102,111,114,109,32,102,111,114,32,120,56,54,44,32,67,82 .byte 89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112 .byte 114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 +.section __IMPORT,__pointers,non_lazy_symbol_pointers +L_OPENSSL_ia32cap_P$non_lazy_ptr: +.indirect_symbol _OPENSSL_ia32cap_P +.long 0 +.comm _OPENSSL_ia32cap_P,16,2 diff --git a/deps/openssl/asm/x86-macosx-gas/sha/sha256-586.s b/deps/openssl/asm/x86-macosx-gas/sha/sha256-586.s index 67c7a96bc02a5f..d30c582726c2be 100644 --- a/deps/openssl/asm/x86-macosx-gas/sha/sha256-586.s +++ b/deps/openssl/asm/x86-macosx-gas/sha/sha256-586.s @@ -24,195 +24,212 @@ L000pic_point: movl %edi,4(%esp) movl %eax,8(%esp) movl %ebx,12(%esp) + movl L_OPENSSL_ia32cap_P$non_lazy_ptr-L001K256(%ebp),%edx + movl (%edx),%ecx + movl 4(%edx),%ebx + testl $1048576,%ecx + jnz L002loop + movl 8(%edx),%edx + testl $16777216,%ecx + jz L003no_xmm + andl $1073741824,%ecx + andl $268435968,%ebx + testl $536870912,%edx + jnz L004shaext + orl %ebx,%ecx + andl $1342177280,%ecx + cmpl $1342177280,%ecx + je L005AVX + testl $512,%ebx + jnz L006SSSE3 +L003no_xmm: + subl %edi,%eax + cmpl $256,%eax + jae L007unrolled + jmp L002loop .align 4,0x90 L002loop: movl (%edi),%eax movl 4(%edi),%ebx movl 8(%edi),%ecx - movl 12(%edi),%edx bswap %eax + movl 12(%edi),%edx bswap %ebx - bswap %ecx - bswap %edx pushl %eax + bswap %ecx pushl %ebx + bswap %edx pushl %ecx pushl %edx movl 16(%edi),%eax movl 20(%edi),%ebx movl 24(%edi),%ecx - movl 28(%edi),%edx bswap %eax + movl 28(%edi),%edx bswap %ebx - bswap %ecx - bswap %edx pushl %eax + bswap %ecx pushl %ebx + bswap %edx pushl %ecx pushl %edx movl 32(%edi),%eax movl 36(%edi),%ebx movl 40(%edi),%ecx - movl 44(%edi),%edx bswap %eax + movl 44(%edi),%edx bswap %ebx - bswap %ecx - bswap %edx pushl %eax + bswap %ecx pushl %ebx + bswap %edx pushl %ecx pushl %edx movl 48(%edi),%eax movl 52(%edi),%ebx movl 56(%edi),%ecx - movl 60(%edi),%edx bswap %eax + movl 60(%edi),%edx bswap %ebx - bswap %ecx - bswap %edx pushl %eax + bswap %ecx pushl %ebx + bswap %edx pushl %ecx pushl %edx addl $64,%edi - subl $32,%esp - movl %edi,100(%esp) + leal -36(%esp),%esp + movl %edi,104(%esp) movl (%esi),%eax movl 4(%esi),%ebx movl 8(%esi),%ecx movl 12(%esi),%edi - movl %ebx,4(%esp) - movl %ecx,8(%esp) - movl %edi,12(%esp) + movl %ebx,8(%esp) + xorl %ecx,%ebx + movl %ecx,12(%esp) + movl %edi,16(%esp) + movl %ebx,(%esp) movl 16(%esi),%edx movl 20(%esi),%ebx movl 24(%esi),%ecx movl 28(%esi),%edi - movl %ebx,20(%esp) - movl %ecx,24(%esp) - movl %edi,28(%esp) + movl %ebx,24(%esp) + movl %ecx,28(%esp) + movl %edi,32(%esp) .align 4,0x90 -L00300_15: - movl 92(%esp),%ebx +L00800_15: movl %edx,%ecx + movl 24(%esp),%esi rorl $14,%ecx - movl 20(%esp),%esi - xorl %edx,%ecx - rorl $5,%ecx + movl 28(%esp),%edi xorl %edx,%ecx - rorl $6,%ecx - movl 24(%esp),%edi - addl %ecx,%ebx xorl %edi,%esi - movl %edx,16(%esp) - movl %eax,%ecx + movl 96(%esp),%ebx + rorl $5,%ecx andl %edx,%esi - movl 12(%esp),%edx + movl %edx,20(%esp) + xorl %ecx,%edx + addl 32(%esp),%ebx xorl %edi,%esi - movl %eax,%edi + rorl $6,%edx + movl %eax,%ecx addl %esi,%ebx rorl $9,%ecx - addl 28(%esp),%ebx + addl %edx,%ebx + movl 8(%esp),%edi xorl %eax,%ecx + movl %eax,4(%esp) + leal -4(%esp),%esp rorl $11,%ecx - movl 4(%esp),%esi + movl (%ebp),%esi xorl %eax,%ecx + movl 20(%esp),%edx + xorl %edi,%eax rorl $2,%ecx + addl %esi,%ebx + movl %eax,(%esp) addl %ebx,%edx - movl 8(%esp),%edi + andl 4(%esp),%eax addl %ecx,%ebx - movl %eax,(%esp) - movl %eax,%ecx - subl $4,%esp - orl %esi,%eax - andl %esi,%ecx - andl %edi,%eax - movl (%ebp),%esi - orl %ecx,%eax + xorl %edi,%eax addl $4,%ebp addl %ebx,%eax - addl %esi,%edx - addl %esi,%eax cmpl $3248222580,%esi - jne L00300_15 - movl 152(%esp),%ebx + jne L00800_15 + movl 156(%esp),%ecx + jmp L00916_63 .align 4,0x90 -L00416_63: - movl %ebx,%esi - movl 100(%esp),%ecx - rorl $11,%esi - movl %ecx,%edi - xorl %ebx,%esi - rorl $7,%esi +L00916_63: + movl %ecx,%ebx + movl 104(%esp),%esi + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx shrl $3,%ebx - rorl $2,%edi - xorl %esi,%ebx - xorl %ecx,%edi - rorl $17,%edi - shrl $10,%ecx - addl 156(%esp),%ebx - xorl %ecx,%edi - addl 120(%esp),%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 160(%esp),%ebx + shrl $10,%edi + addl 124(%esp),%ebx movl %edx,%ecx - addl %edi,%ebx + xorl %esi,%edi + movl 24(%esp),%esi rorl $14,%ecx - movl 20(%esp),%esi - xorl %edx,%ecx - rorl $5,%ecx - movl %ebx,92(%esp) + addl %edi,%ebx + movl 28(%esp),%edi xorl %edx,%ecx - rorl $6,%ecx - movl 24(%esp),%edi - addl %ecx,%ebx xorl %edi,%esi - movl %edx,16(%esp) - movl %eax,%ecx + movl %ebx,96(%esp) + rorl $5,%ecx andl %edx,%esi - movl 12(%esp),%edx + movl %edx,20(%esp) + xorl %ecx,%edx + addl 32(%esp),%ebx xorl %edi,%esi - movl %eax,%edi + rorl $6,%edx + movl %eax,%ecx addl %esi,%ebx rorl $9,%ecx - addl 28(%esp),%ebx + addl %edx,%ebx + movl 8(%esp),%edi xorl %eax,%ecx + movl %eax,4(%esp) + leal -4(%esp),%esp rorl $11,%ecx - movl 4(%esp),%esi + movl (%ebp),%esi xorl %eax,%ecx + movl 20(%esp),%edx + xorl %edi,%eax rorl $2,%ecx + addl %esi,%ebx + movl %eax,(%esp) addl %ebx,%edx - movl 8(%esp),%edi + andl 4(%esp),%eax addl %ecx,%ebx - movl %eax,(%esp) - movl %eax,%ecx - subl $4,%esp - orl %esi,%eax - andl %esi,%ecx - andl %edi,%eax - movl (%ebp),%esi - orl %ecx,%eax + xorl %edi,%eax + movl 156(%esp),%ecx addl $4,%ebp addl %ebx,%eax - movl 152(%esp),%ebx - addl %esi,%edx - addl %esi,%eax cmpl $3329325298,%esi - jne L00416_63 - movl 352(%esp),%esi - movl 4(%esp),%ebx - movl 8(%esp),%ecx - movl 12(%esp),%edi + jne L00916_63 + movl 356(%esp),%esi + movl 8(%esp),%ebx + movl 16(%esp),%ecx addl (%esi),%eax addl 4(%esi),%ebx - addl 8(%esi),%ecx - addl 12(%esi),%edi + addl 8(%esi),%edi + addl 12(%esi),%ecx movl %eax,(%esi) movl %ebx,4(%esi) - movl %ecx,8(%esi) - movl %edi,12(%esi) - movl 20(%esp),%eax - movl 24(%esp),%ebx - movl 28(%esp),%ecx - movl 356(%esp),%edi + movl %edi,8(%esi) + movl %ecx,12(%esi) + movl 24(%esp),%eax + movl 28(%esp),%ebx + movl 32(%esp),%ecx + movl 360(%esp),%edi addl 16(%esi),%edx addl 20(%esi),%eax addl 24(%esi),%ebx @@ -221,7 +238,7 @@ L00416_63: movl %eax,20(%esi) movl %ebx,24(%esi) movl %ecx,28(%esi) - addl $352,%esp + leal 356(%esp),%esp subl $256,%ebp cmpl 8(%esp),%edi jb L002loop @@ -233,24 +250,6536 @@ L00416_63: ret .align 6,0x90 L001K256: -.long 1116352408,1899447441,3049323471,3921009573 -.long 961987163,1508970993,2453635748,2870763221 -.long 3624381080,310598401,607225278,1426881987 -.long 1925078388,2162078206,2614888103,3248222580 -.long 3835390401,4022224774,264347078,604807628 -.long 770255983,1249150122,1555081692,1996064986 -.long 2554220882,2821834349,2952996808,3210313671 -.long 3336571891,3584528711,113926993,338241895 -.long 666307205,773529912,1294757372,1396182291 -.long 1695183700,1986661051,2177026350,2456956037 -.long 2730485921,2820302411,3259730800,3345764771 -.long 3516065817,3600352804,4094571909,275423344 -.long 430227734,506948616,659060556,883997877 -.long 958139571,1322822218,1537002063,1747873779 -.long 1955562222,2024104815,2227730452,2361852424 -.long 2428436474,2756734187,3204031479,3329325298 +.long 1116352408,1899447441,3049323471,3921009573,961987163,1508970993,2453635748,2870763221,3624381080,310598401,607225278,1426881987,1925078388,2162078206,2614888103,3248222580,3835390401,4022224774,264347078,604807628,770255983,1249150122,1555081692,1996064986,2554220882,2821834349,2952996808,3210313671,3336571891,3584528711,113926993,338241895,666307205,773529912,1294757372,1396182291,1695183700,1986661051,2177026350,2456956037,2730485921,2820302411,3259730800,3345764771,3516065817,3600352804,4094571909,275423344,430227734,506948616,659060556,883997877,958139571,1322822218,1537002063,1747873779,1955562222,2024104815,2227730452,2361852424,2428436474,2756734187,3204031479,3329325298 +.long 66051,67438087,134810123,202182159 .byte 83,72,65,50,53,54,32,98,108,111,99,107,32,116,114,97 .byte 110,115,102,111,114,109,32,102,111,114,32,120,56,54,44,32 .byte 67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97 .byte 112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103 .byte 62,0 +.align 4,0x90 +L007unrolled: + leal -96(%esp),%esp + movl (%esi),%eax + movl 4(%esi),%ebp + movl 8(%esi),%ecx + movl 12(%esi),%ebx + movl %ebp,4(%esp) + xorl %ecx,%ebp + movl %ecx,8(%esp) + movl %ebx,12(%esp) + movl 16(%esi),%edx + movl 20(%esi),%ebx + movl 24(%esi),%ecx + movl 28(%esi),%esi + movl %ebx,20(%esp) + movl %ecx,24(%esp) + movl %esi,28(%esp) + jmp L010grand_loop +.align 4,0x90 +L010grand_loop: + movl (%edi),%ebx + movl 4(%edi),%ecx + bswap %ebx + movl 8(%edi),%esi + bswap %ecx + movl %ebx,32(%esp) + bswap %esi + movl %ecx,36(%esp) + movl %esi,40(%esp) + movl 12(%edi),%ebx + movl 16(%edi),%ecx + bswap %ebx + movl 20(%edi),%esi + bswap %ecx + movl %ebx,44(%esp) + bswap %esi + movl %ecx,48(%esp) + movl %esi,52(%esp) + movl 24(%edi),%ebx + movl 28(%edi),%ecx + bswap %ebx + movl 32(%edi),%esi + bswap %ecx + movl %ebx,56(%esp) + bswap %esi + movl %ecx,60(%esp) + movl %esi,64(%esp) + movl 36(%edi),%ebx + movl 40(%edi),%ecx + bswap %ebx + movl 44(%edi),%esi + bswap %ecx + movl %ebx,68(%esp) + bswap %esi + movl %ecx,72(%esp) + movl %esi,76(%esp) + movl 48(%edi),%ebx + movl 52(%edi),%ecx + bswap %ebx + movl 56(%edi),%esi + bswap %ecx + movl %ebx,80(%esp) + bswap %esi + movl %ecx,84(%esp) + movl %esi,88(%esp) + movl 60(%edi),%ebx + addl $64,%edi + bswap %ebx + movl %edi,100(%esp) + movl %ebx,92(%esp) + movl %edx,%ecx + movl 20(%esp),%esi + rorl $14,%edx + movl 24(%esp),%edi + xorl %ecx,%edx + movl 32(%esp),%ebx + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,16(%esp) + xorl %ecx,%edx + addl 28(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 4(%esp),%edi + xorl %eax,%ecx + movl %eax,(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 1116352408(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + rorl $2,%ecx + addl %edx,%ebp + addl 12(%esp),%edx + addl %ecx,%ebp + movl %edx,%esi + movl 16(%esp),%ecx + rorl $14,%edx + movl 20(%esp),%edi + xorl %esi,%edx + movl 36(%esp),%ebx + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,12(%esp) + xorl %esi,%edx + addl 24(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl (%esp),%edi + xorl %ebp,%esi + movl %ebp,28(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 1899447441(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + rorl $2,%esi + addl %edx,%eax + addl 8(%esp),%edx + addl %esi,%eax + movl %edx,%ecx + movl 12(%esp),%esi + rorl $14,%edx + movl 16(%esp),%edi + xorl %ecx,%edx + movl 40(%esp),%ebx + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,8(%esp) + xorl %ecx,%edx + addl 20(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 28(%esp),%edi + xorl %eax,%ecx + movl %eax,24(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 3049323471(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + rorl $2,%ecx + addl %edx,%ebp + addl 4(%esp),%edx + addl %ecx,%ebp + movl %edx,%esi + movl 8(%esp),%ecx + rorl $14,%edx + movl 12(%esp),%edi + xorl %esi,%edx + movl 44(%esp),%ebx + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,4(%esp) + xorl %esi,%edx + addl 16(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 24(%esp),%edi + xorl %ebp,%esi + movl %ebp,20(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 3921009573(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + rorl $2,%esi + addl %edx,%eax + addl (%esp),%edx + addl %esi,%eax + movl %edx,%ecx + movl 4(%esp),%esi + rorl $14,%edx + movl 8(%esp),%edi + xorl %ecx,%edx + movl 48(%esp),%ebx + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,(%esp) + xorl %ecx,%edx + addl 12(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 20(%esp),%edi + xorl %eax,%ecx + movl %eax,16(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 961987163(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + rorl $2,%ecx + addl %edx,%ebp + addl 28(%esp),%edx + addl %ecx,%ebp + movl %edx,%esi + movl (%esp),%ecx + rorl $14,%edx + movl 4(%esp),%edi + xorl %esi,%edx + movl 52(%esp),%ebx + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,28(%esp) + xorl %esi,%edx + addl 8(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 16(%esp),%edi + xorl %ebp,%esi + movl %ebp,12(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 1508970993(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + rorl $2,%esi + addl %edx,%eax + addl 24(%esp),%edx + addl %esi,%eax + movl %edx,%ecx + movl 28(%esp),%esi + rorl $14,%edx + movl (%esp),%edi + xorl %ecx,%edx + movl 56(%esp),%ebx + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,24(%esp) + xorl %ecx,%edx + addl 4(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 12(%esp),%edi + xorl %eax,%ecx + movl %eax,8(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 2453635748(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + rorl $2,%ecx + addl %edx,%ebp + addl 20(%esp),%edx + addl %ecx,%ebp + movl %edx,%esi + movl 24(%esp),%ecx + rorl $14,%edx + movl 28(%esp),%edi + xorl %esi,%edx + movl 60(%esp),%ebx + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,20(%esp) + xorl %esi,%edx + addl (%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 8(%esp),%edi + xorl %ebp,%esi + movl %ebp,4(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 2870763221(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + rorl $2,%esi + addl %edx,%eax + addl 16(%esp),%edx + addl %esi,%eax + movl %edx,%ecx + movl 20(%esp),%esi + rorl $14,%edx + movl 24(%esp),%edi + xorl %ecx,%edx + movl 64(%esp),%ebx + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,16(%esp) + xorl %ecx,%edx + addl 28(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 4(%esp),%edi + xorl %eax,%ecx + movl %eax,(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 3624381080(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + rorl $2,%ecx + addl %edx,%ebp + addl 12(%esp),%edx + addl %ecx,%ebp + movl %edx,%esi + movl 16(%esp),%ecx + rorl $14,%edx + movl 20(%esp),%edi + xorl %esi,%edx + movl 68(%esp),%ebx + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,12(%esp) + xorl %esi,%edx + addl 24(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl (%esp),%edi + xorl %ebp,%esi + movl %ebp,28(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 310598401(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + rorl $2,%esi + addl %edx,%eax + addl 8(%esp),%edx + addl %esi,%eax + movl %edx,%ecx + movl 12(%esp),%esi + rorl $14,%edx + movl 16(%esp),%edi + xorl %ecx,%edx + movl 72(%esp),%ebx + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,8(%esp) + xorl %ecx,%edx + addl 20(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 28(%esp),%edi + xorl %eax,%ecx + movl %eax,24(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 607225278(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + rorl $2,%ecx + addl %edx,%ebp + addl 4(%esp),%edx + addl %ecx,%ebp + movl %edx,%esi + movl 8(%esp),%ecx + rorl $14,%edx + movl 12(%esp),%edi + xorl %esi,%edx + movl 76(%esp),%ebx + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,4(%esp) + xorl %esi,%edx + addl 16(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 24(%esp),%edi + xorl %ebp,%esi + movl %ebp,20(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 1426881987(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + rorl $2,%esi + addl %edx,%eax + addl (%esp),%edx + addl %esi,%eax + movl %edx,%ecx + movl 4(%esp),%esi + rorl $14,%edx + movl 8(%esp),%edi + xorl %ecx,%edx + movl 80(%esp),%ebx + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,(%esp) + xorl %ecx,%edx + addl 12(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 20(%esp),%edi + xorl %eax,%ecx + movl %eax,16(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 1925078388(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + rorl $2,%ecx + addl %edx,%ebp + addl 28(%esp),%edx + addl %ecx,%ebp + movl %edx,%esi + movl (%esp),%ecx + rorl $14,%edx + movl 4(%esp),%edi + xorl %esi,%edx + movl 84(%esp),%ebx + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,28(%esp) + xorl %esi,%edx + addl 8(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 16(%esp),%edi + xorl %ebp,%esi + movl %ebp,12(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 2162078206(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + rorl $2,%esi + addl %edx,%eax + addl 24(%esp),%edx + addl %esi,%eax + movl %edx,%ecx + movl 28(%esp),%esi + rorl $14,%edx + movl (%esp),%edi + xorl %ecx,%edx + movl 88(%esp),%ebx + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,24(%esp) + xorl %ecx,%edx + addl 4(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 12(%esp),%edi + xorl %eax,%ecx + movl %eax,8(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 2614888103(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + rorl $2,%ecx + addl %edx,%ebp + addl 20(%esp),%edx + addl %ecx,%ebp + movl %edx,%esi + movl 24(%esp),%ecx + rorl $14,%edx + movl 28(%esp),%edi + xorl %esi,%edx + movl 92(%esp),%ebx + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,20(%esp) + xorl %esi,%edx + addl (%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 8(%esp),%edi + xorl %ebp,%esi + movl %ebp,4(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 3248222580(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 36(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl 16(%esp),%edx + addl %esi,%eax + movl 88(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 32(%esp),%ebx + shrl $10,%edi + addl 68(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 20(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl 24(%esp),%edi + xorl %ecx,%edx + movl %ebx,32(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,16(%esp) + xorl %ecx,%edx + addl 28(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 4(%esp),%edi + xorl %eax,%ecx + movl %eax,(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 3835390401(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 40(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 12(%esp),%edx + addl %ecx,%ebp + movl 92(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 36(%esp),%ebx + shrl $10,%edi + addl 72(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl 16(%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 20(%esp),%edi + xorl %esi,%edx + movl %ebx,36(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,12(%esp) + xorl %esi,%edx + addl 24(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl (%esp),%edi + xorl %ebp,%esi + movl %ebp,28(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 4022224774(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 44(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl 8(%esp),%edx + addl %esi,%eax + movl 32(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 40(%esp),%ebx + shrl $10,%edi + addl 76(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 12(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl 16(%esp),%edi + xorl %ecx,%edx + movl %ebx,40(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,8(%esp) + xorl %ecx,%edx + addl 20(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 28(%esp),%edi + xorl %eax,%ecx + movl %eax,24(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 264347078(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 48(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 4(%esp),%edx + addl %ecx,%ebp + movl 36(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 44(%esp),%ebx + shrl $10,%edi + addl 80(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl 8(%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 12(%esp),%edi + xorl %esi,%edx + movl %ebx,44(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,4(%esp) + xorl %esi,%edx + addl 16(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 24(%esp),%edi + xorl %ebp,%esi + movl %ebp,20(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 604807628(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 52(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl (%esp),%edx + addl %esi,%eax + movl 40(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 48(%esp),%ebx + shrl $10,%edi + addl 84(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 4(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl 8(%esp),%edi + xorl %ecx,%edx + movl %ebx,48(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,(%esp) + xorl %ecx,%edx + addl 12(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 20(%esp),%edi + xorl %eax,%ecx + movl %eax,16(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 770255983(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 56(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 28(%esp),%edx + addl %ecx,%ebp + movl 44(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 52(%esp),%ebx + shrl $10,%edi + addl 88(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl (%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 4(%esp),%edi + xorl %esi,%edx + movl %ebx,52(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,28(%esp) + xorl %esi,%edx + addl 8(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 16(%esp),%edi + xorl %ebp,%esi + movl %ebp,12(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 1249150122(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 60(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl 24(%esp),%edx + addl %esi,%eax + movl 48(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 56(%esp),%ebx + shrl $10,%edi + addl 92(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 28(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl (%esp),%edi + xorl %ecx,%edx + movl %ebx,56(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,24(%esp) + xorl %ecx,%edx + addl 4(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 12(%esp),%edi + xorl %eax,%ecx + movl %eax,8(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 1555081692(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 64(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 20(%esp),%edx + addl %ecx,%ebp + movl 52(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 60(%esp),%ebx + shrl $10,%edi + addl 32(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl 24(%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 28(%esp),%edi + xorl %esi,%edx + movl %ebx,60(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,20(%esp) + xorl %esi,%edx + addl (%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 8(%esp),%edi + xorl %ebp,%esi + movl %ebp,4(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 1996064986(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 68(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl 16(%esp),%edx + addl %esi,%eax + movl 56(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 64(%esp),%ebx + shrl $10,%edi + addl 36(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 20(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl 24(%esp),%edi + xorl %ecx,%edx + movl %ebx,64(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,16(%esp) + xorl %ecx,%edx + addl 28(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 4(%esp),%edi + xorl %eax,%ecx + movl %eax,(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 2554220882(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 72(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 12(%esp),%edx + addl %ecx,%ebp + movl 60(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 68(%esp),%ebx + shrl $10,%edi + addl 40(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl 16(%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 20(%esp),%edi + xorl %esi,%edx + movl %ebx,68(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,12(%esp) + xorl %esi,%edx + addl 24(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl (%esp),%edi + xorl %ebp,%esi + movl %ebp,28(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 2821834349(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 76(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl 8(%esp),%edx + addl %esi,%eax + movl 64(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 72(%esp),%ebx + shrl $10,%edi + addl 44(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 12(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl 16(%esp),%edi + xorl %ecx,%edx + movl %ebx,72(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,8(%esp) + xorl %ecx,%edx + addl 20(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 28(%esp),%edi + xorl %eax,%ecx + movl %eax,24(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 2952996808(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 80(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 4(%esp),%edx + addl %ecx,%ebp + movl 68(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 76(%esp),%ebx + shrl $10,%edi + addl 48(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl 8(%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 12(%esp),%edi + xorl %esi,%edx + movl %ebx,76(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,4(%esp) + xorl %esi,%edx + addl 16(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 24(%esp),%edi + xorl %ebp,%esi + movl %ebp,20(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 3210313671(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 84(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl (%esp),%edx + addl %esi,%eax + movl 72(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 80(%esp),%ebx + shrl $10,%edi + addl 52(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 4(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl 8(%esp),%edi + xorl %ecx,%edx + movl %ebx,80(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,(%esp) + xorl %ecx,%edx + addl 12(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 20(%esp),%edi + xorl %eax,%ecx + movl %eax,16(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 3336571891(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 88(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 28(%esp),%edx + addl %ecx,%ebp + movl 76(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 84(%esp),%ebx + shrl $10,%edi + addl 56(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl (%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 4(%esp),%edi + xorl %esi,%edx + movl %ebx,84(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,28(%esp) + xorl %esi,%edx + addl 8(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 16(%esp),%edi + xorl %ebp,%esi + movl %ebp,12(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 3584528711(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 92(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl 24(%esp),%edx + addl %esi,%eax + movl 80(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 88(%esp),%ebx + shrl $10,%edi + addl 60(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 28(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl (%esp),%edi + xorl %ecx,%edx + movl %ebx,88(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,24(%esp) + xorl %ecx,%edx + addl 4(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 12(%esp),%edi + xorl %eax,%ecx + movl %eax,8(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 113926993(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 32(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 20(%esp),%edx + addl %ecx,%ebp + movl 84(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 92(%esp),%ebx + shrl $10,%edi + addl 64(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl 24(%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 28(%esp),%edi + xorl %esi,%edx + movl %ebx,92(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,20(%esp) + xorl %esi,%edx + addl (%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 8(%esp),%edi + xorl %ebp,%esi + movl %ebp,4(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 338241895(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 36(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl 16(%esp),%edx + addl %esi,%eax + movl 88(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 32(%esp),%ebx + shrl $10,%edi + addl 68(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 20(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl 24(%esp),%edi + xorl %ecx,%edx + movl %ebx,32(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,16(%esp) + xorl %ecx,%edx + addl 28(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 4(%esp),%edi + xorl %eax,%ecx + movl %eax,(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 666307205(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 40(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 12(%esp),%edx + addl %ecx,%ebp + movl 92(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 36(%esp),%ebx + shrl $10,%edi + addl 72(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl 16(%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 20(%esp),%edi + xorl %esi,%edx + movl %ebx,36(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,12(%esp) + xorl %esi,%edx + addl 24(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl (%esp),%edi + xorl %ebp,%esi + movl %ebp,28(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 773529912(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 44(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl 8(%esp),%edx + addl %esi,%eax + movl 32(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 40(%esp),%ebx + shrl $10,%edi + addl 76(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 12(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl 16(%esp),%edi + xorl %ecx,%edx + movl %ebx,40(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,8(%esp) + xorl %ecx,%edx + addl 20(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 28(%esp),%edi + xorl %eax,%ecx + movl %eax,24(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 1294757372(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 48(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 4(%esp),%edx + addl %ecx,%ebp + movl 36(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 44(%esp),%ebx + shrl $10,%edi + addl 80(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl 8(%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 12(%esp),%edi + xorl %esi,%edx + movl %ebx,44(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,4(%esp) + xorl %esi,%edx + addl 16(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 24(%esp),%edi + xorl %ebp,%esi + movl %ebp,20(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 1396182291(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 52(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl (%esp),%edx + addl %esi,%eax + movl 40(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 48(%esp),%ebx + shrl $10,%edi + addl 84(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 4(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl 8(%esp),%edi + xorl %ecx,%edx + movl %ebx,48(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,(%esp) + xorl %ecx,%edx + addl 12(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 20(%esp),%edi + xorl %eax,%ecx + movl %eax,16(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 1695183700(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 56(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 28(%esp),%edx + addl %ecx,%ebp + movl 44(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 52(%esp),%ebx + shrl $10,%edi + addl 88(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl (%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 4(%esp),%edi + xorl %esi,%edx + movl %ebx,52(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,28(%esp) + xorl %esi,%edx + addl 8(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 16(%esp),%edi + xorl %ebp,%esi + movl %ebp,12(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 1986661051(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 60(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl 24(%esp),%edx + addl %esi,%eax + movl 48(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 56(%esp),%ebx + shrl $10,%edi + addl 92(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 28(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl (%esp),%edi + xorl %ecx,%edx + movl %ebx,56(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,24(%esp) + xorl %ecx,%edx + addl 4(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 12(%esp),%edi + xorl %eax,%ecx + movl %eax,8(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 2177026350(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 64(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 20(%esp),%edx + addl %ecx,%ebp + movl 52(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 60(%esp),%ebx + shrl $10,%edi + addl 32(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl 24(%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 28(%esp),%edi + xorl %esi,%edx + movl %ebx,60(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,20(%esp) + xorl %esi,%edx + addl (%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 8(%esp),%edi + xorl %ebp,%esi + movl %ebp,4(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 2456956037(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 68(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl 16(%esp),%edx + addl %esi,%eax + movl 56(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 64(%esp),%ebx + shrl $10,%edi + addl 36(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 20(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl 24(%esp),%edi + xorl %ecx,%edx + movl %ebx,64(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,16(%esp) + xorl %ecx,%edx + addl 28(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 4(%esp),%edi + xorl %eax,%ecx + movl %eax,(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 2730485921(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 72(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 12(%esp),%edx + addl %ecx,%ebp + movl 60(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 68(%esp),%ebx + shrl $10,%edi + addl 40(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl 16(%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 20(%esp),%edi + xorl %esi,%edx + movl %ebx,68(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,12(%esp) + xorl %esi,%edx + addl 24(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl (%esp),%edi + xorl %ebp,%esi + movl %ebp,28(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 2820302411(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 76(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl 8(%esp),%edx + addl %esi,%eax + movl 64(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 72(%esp),%ebx + shrl $10,%edi + addl 44(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 12(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl 16(%esp),%edi + xorl %ecx,%edx + movl %ebx,72(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,8(%esp) + xorl %ecx,%edx + addl 20(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 28(%esp),%edi + xorl %eax,%ecx + movl %eax,24(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 3259730800(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 80(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 4(%esp),%edx + addl %ecx,%ebp + movl 68(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 76(%esp),%ebx + shrl $10,%edi + addl 48(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl 8(%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 12(%esp),%edi + xorl %esi,%edx + movl %ebx,76(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,4(%esp) + xorl %esi,%edx + addl 16(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 24(%esp),%edi + xorl %ebp,%esi + movl %ebp,20(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 3345764771(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 84(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl (%esp),%edx + addl %esi,%eax + movl 72(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 80(%esp),%ebx + shrl $10,%edi + addl 52(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 4(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl 8(%esp),%edi + xorl %ecx,%edx + movl %ebx,80(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,(%esp) + xorl %ecx,%edx + addl 12(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 20(%esp),%edi + xorl %eax,%ecx + movl %eax,16(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 3516065817(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 88(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 28(%esp),%edx + addl %ecx,%ebp + movl 76(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 84(%esp),%ebx + shrl $10,%edi + addl 56(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl (%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 4(%esp),%edi + xorl %esi,%edx + movl %ebx,84(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,28(%esp) + xorl %esi,%edx + addl 8(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 16(%esp),%edi + xorl %ebp,%esi + movl %ebp,12(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 3600352804(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 92(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl 24(%esp),%edx + addl %esi,%eax + movl 80(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 88(%esp),%ebx + shrl $10,%edi + addl 60(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 28(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl (%esp),%edi + xorl %ecx,%edx + movl %ebx,88(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,24(%esp) + xorl %ecx,%edx + addl 4(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 12(%esp),%edi + xorl %eax,%ecx + movl %eax,8(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 4094571909(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 32(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 20(%esp),%edx + addl %ecx,%ebp + movl 84(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 92(%esp),%ebx + shrl $10,%edi + addl 64(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl 24(%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 28(%esp),%edi + xorl %esi,%edx + movl %ebx,92(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,20(%esp) + xorl %esi,%edx + addl (%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 8(%esp),%edi + xorl %ebp,%esi + movl %ebp,4(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 275423344(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 36(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl 16(%esp),%edx + addl %esi,%eax + movl 88(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 32(%esp),%ebx + shrl $10,%edi + addl 68(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 20(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl 24(%esp),%edi + xorl %ecx,%edx + movl %ebx,32(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,16(%esp) + xorl %ecx,%edx + addl 28(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 4(%esp),%edi + xorl %eax,%ecx + movl %eax,(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 430227734(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 40(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 12(%esp),%edx + addl %ecx,%ebp + movl 92(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 36(%esp),%ebx + shrl $10,%edi + addl 72(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl 16(%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 20(%esp),%edi + xorl %esi,%edx + movl %ebx,36(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,12(%esp) + xorl %esi,%edx + addl 24(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl (%esp),%edi + xorl %ebp,%esi + movl %ebp,28(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 506948616(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 44(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl 8(%esp),%edx + addl %esi,%eax + movl 32(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 40(%esp),%ebx + shrl $10,%edi + addl 76(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 12(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl 16(%esp),%edi + xorl %ecx,%edx + movl %ebx,40(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,8(%esp) + xorl %ecx,%edx + addl 20(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 28(%esp),%edi + xorl %eax,%ecx + movl %eax,24(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 659060556(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 48(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 4(%esp),%edx + addl %ecx,%ebp + movl 36(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 44(%esp),%ebx + shrl $10,%edi + addl 80(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl 8(%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 12(%esp),%edi + xorl %esi,%edx + movl %ebx,44(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,4(%esp) + xorl %esi,%edx + addl 16(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 24(%esp),%edi + xorl %ebp,%esi + movl %ebp,20(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 883997877(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 52(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl (%esp),%edx + addl %esi,%eax + movl 40(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 48(%esp),%ebx + shrl $10,%edi + addl 84(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 4(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl 8(%esp),%edi + xorl %ecx,%edx + movl %ebx,48(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,(%esp) + xorl %ecx,%edx + addl 12(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 20(%esp),%edi + xorl %eax,%ecx + movl %eax,16(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 958139571(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 56(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 28(%esp),%edx + addl %ecx,%ebp + movl 44(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 52(%esp),%ebx + shrl $10,%edi + addl 88(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl (%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 4(%esp),%edi + xorl %esi,%edx + movl %ebx,52(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,28(%esp) + xorl %esi,%edx + addl 8(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 16(%esp),%edi + xorl %ebp,%esi + movl %ebp,12(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 1322822218(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 60(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl 24(%esp),%edx + addl %esi,%eax + movl 48(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 56(%esp),%ebx + shrl $10,%edi + addl 92(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 28(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl (%esp),%edi + xorl %ecx,%edx + movl %ebx,56(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,24(%esp) + xorl %ecx,%edx + addl 4(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 12(%esp),%edi + xorl %eax,%ecx + movl %eax,8(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 1537002063(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 64(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 20(%esp),%edx + addl %ecx,%ebp + movl 52(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 60(%esp),%ebx + shrl $10,%edi + addl 32(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl 24(%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 28(%esp),%edi + xorl %esi,%edx + movl %ebx,60(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,20(%esp) + xorl %esi,%edx + addl (%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 8(%esp),%edi + xorl %ebp,%esi + movl %ebp,4(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 1747873779(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 68(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl 16(%esp),%edx + addl %esi,%eax + movl 56(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 64(%esp),%ebx + shrl $10,%edi + addl 36(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 20(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl 24(%esp),%edi + xorl %ecx,%edx + movl %ebx,64(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,16(%esp) + xorl %ecx,%edx + addl 28(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 4(%esp),%edi + xorl %eax,%ecx + movl %eax,(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 1955562222(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 72(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 12(%esp),%edx + addl %ecx,%ebp + movl 60(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 68(%esp),%ebx + shrl $10,%edi + addl 40(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl 16(%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 20(%esp),%edi + xorl %esi,%edx + movl %ebx,68(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,12(%esp) + xorl %esi,%edx + addl 24(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl (%esp),%edi + xorl %ebp,%esi + movl %ebp,28(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 2024104815(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 76(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl 8(%esp),%edx + addl %esi,%eax + movl 64(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 72(%esp),%ebx + shrl $10,%edi + addl 44(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 12(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl 16(%esp),%edi + xorl %ecx,%edx + movl %ebx,72(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,8(%esp) + xorl %ecx,%edx + addl 20(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 28(%esp),%edi + xorl %eax,%ecx + movl %eax,24(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 2227730452(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 80(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 4(%esp),%edx + addl %ecx,%ebp + movl 68(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 76(%esp),%ebx + shrl $10,%edi + addl 48(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl 8(%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 12(%esp),%edi + xorl %esi,%edx + movl %ebx,76(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,4(%esp) + xorl %esi,%edx + addl 16(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 24(%esp),%edi + xorl %ebp,%esi + movl %ebp,20(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 2361852424(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 84(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl (%esp),%edx + addl %esi,%eax + movl 72(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 80(%esp),%ebx + shrl $10,%edi + addl 52(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 4(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl 8(%esp),%edi + xorl %ecx,%edx + movl %ebx,80(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,(%esp) + xorl %ecx,%edx + addl 12(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 20(%esp),%edi + xorl %eax,%ecx + movl %eax,16(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 2428436474(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 88(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 28(%esp),%edx + addl %ecx,%ebp + movl 76(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 84(%esp),%ebx + shrl $10,%edi + addl 56(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl (%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 4(%esp),%edi + xorl %esi,%edx + movl %ebx,84(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,28(%esp) + xorl %esi,%edx + addl 8(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 16(%esp),%edi + xorl %ebp,%esi + movl %ebp,12(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 2756734187(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 92(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl 24(%esp),%edx + addl %esi,%eax + movl 80(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 88(%esp),%ebx + shrl $10,%edi + addl 60(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 28(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl (%esp),%edi + xorl %ecx,%edx + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,24(%esp) + xorl %ecx,%edx + addl 4(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 12(%esp),%edi + xorl %eax,%ecx + movl %eax,8(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 3204031479(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 32(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 20(%esp),%edx + addl %ecx,%ebp + movl 84(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 92(%esp),%ebx + shrl $10,%edi + addl 64(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl 24(%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 28(%esp),%edi + xorl %esi,%edx + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,20(%esp) + xorl %esi,%edx + addl (%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 8(%esp),%edi + xorl %ebp,%esi + movl %ebp,4(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 3329325298(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + rorl $2,%esi + addl %edx,%eax + addl 16(%esp),%edx + addl %esi,%eax + movl 96(%esp),%esi + xorl %edi,%ebp + movl 12(%esp),%ecx + addl (%esi),%eax + addl 4(%esi),%ebp + addl 8(%esi),%edi + addl 12(%esi),%ecx + movl %eax,(%esi) + movl %ebp,4(%esi) + movl %edi,8(%esi) + movl %ecx,12(%esi) + movl %ebp,4(%esp) + xorl %edi,%ebp + movl %edi,8(%esp) + movl %ecx,12(%esp) + movl 20(%esp),%edi + movl 24(%esp),%ebx + movl 28(%esp),%ecx + addl 16(%esi),%edx + addl 20(%esi),%edi + addl 24(%esi),%ebx + addl 28(%esi),%ecx + movl %edx,16(%esi) + movl %edi,20(%esi) + movl %ebx,24(%esi) + movl %ecx,28(%esi) + movl %edi,20(%esp) + movl 100(%esp),%edi + movl %ebx,24(%esp) + movl %ecx,28(%esp) + cmpl 104(%esp),%edi + jb L010grand_loop + movl 108(%esp),%esp + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.align 5,0x90 +L004shaext: + subl $32,%esp + movdqu (%esi),%xmm1 + leal 128(%ebp),%ebp + movdqu 16(%esi),%xmm2 + movdqa 128(%ebp),%xmm7 + pshufd $27,%xmm1,%xmm0 + pshufd $177,%xmm1,%xmm1 + pshufd $27,%xmm2,%xmm2 +.byte 102,15,58,15,202,8 + punpcklqdq %xmm0,%xmm2 + jmp L011loop_shaext +.align 4,0x90 +L011loop_shaext: + movdqu (%edi),%xmm3 + movdqu 16(%edi),%xmm4 + movdqu 32(%edi),%xmm5 +.byte 102,15,56,0,223 + movdqu 48(%edi),%xmm6 + movdqa %xmm2,16(%esp) + movdqa -128(%ebp),%xmm0 + paddd %xmm3,%xmm0 +.byte 102,15,56,0,231 +.byte 15,56,203,209 + pshufd $14,%xmm0,%xmm0 + nop + movdqa %xmm1,(%esp) +.byte 15,56,203,202 + movdqa -112(%ebp),%xmm0 + paddd %xmm4,%xmm0 +.byte 102,15,56,0,239 +.byte 15,56,203,209 + pshufd $14,%xmm0,%xmm0 + leal 64(%edi),%edi +.byte 15,56,204,220 +.byte 15,56,203,202 + movdqa -96(%ebp),%xmm0 + paddd %xmm5,%xmm0 +.byte 102,15,56,0,247 +.byte 15,56,203,209 + pshufd $14,%xmm0,%xmm0 + movdqa %xmm6,%xmm7 +.byte 102,15,58,15,253,4 + nop + paddd %xmm7,%xmm3 +.byte 15,56,204,229 +.byte 15,56,203,202 + movdqa -80(%ebp),%xmm0 + paddd %xmm6,%xmm0 +.byte 15,56,205,222 +.byte 15,56,203,209 + pshufd $14,%xmm0,%xmm0 + movdqa %xmm3,%xmm7 +.byte 102,15,58,15,254,4 + nop + paddd %xmm7,%xmm4 +.byte 15,56,204,238 +.byte 15,56,203,202 + movdqa -64(%ebp),%xmm0 + paddd %xmm3,%xmm0 +.byte 15,56,205,227 +.byte 15,56,203,209 + pshufd $14,%xmm0,%xmm0 + movdqa %xmm4,%xmm7 +.byte 102,15,58,15,251,4 + nop + paddd %xmm7,%xmm5 +.byte 15,56,204,243 +.byte 15,56,203,202 + movdqa -48(%ebp),%xmm0 + paddd %xmm4,%xmm0 +.byte 15,56,205,236 +.byte 15,56,203,209 + pshufd $14,%xmm0,%xmm0 + movdqa %xmm5,%xmm7 +.byte 102,15,58,15,252,4 + nop + paddd %xmm7,%xmm6 +.byte 15,56,204,220 +.byte 15,56,203,202 + movdqa -32(%ebp),%xmm0 + paddd %xmm5,%xmm0 +.byte 15,56,205,245 +.byte 15,56,203,209 + pshufd $14,%xmm0,%xmm0 + movdqa %xmm6,%xmm7 +.byte 102,15,58,15,253,4 + nop + paddd %xmm7,%xmm3 +.byte 15,56,204,229 +.byte 15,56,203,202 + movdqa -16(%ebp),%xmm0 + paddd %xmm6,%xmm0 +.byte 15,56,205,222 +.byte 15,56,203,209 + pshufd $14,%xmm0,%xmm0 + movdqa %xmm3,%xmm7 +.byte 102,15,58,15,254,4 + nop + paddd %xmm7,%xmm4 +.byte 15,56,204,238 +.byte 15,56,203,202 + movdqa (%ebp),%xmm0 + paddd %xmm3,%xmm0 +.byte 15,56,205,227 +.byte 15,56,203,209 + pshufd $14,%xmm0,%xmm0 + movdqa %xmm4,%xmm7 +.byte 102,15,58,15,251,4 + nop + paddd %xmm7,%xmm5 +.byte 15,56,204,243 +.byte 15,56,203,202 + movdqa 16(%ebp),%xmm0 + paddd %xmm4,%xmm0 +.byte 15,56,205,236 +.byte 15,56,203,209 + pshufd $14,%xmm0,%xmm0 + movdqa %xmm5,%xmm7 +.byte 102,15,58,15,252,4 + nop + paddd %xmm7,%xmm6 +.byte 15,56,204,220 +.byte 15,56,203,202 + movdqa 32(%ebp),%xmm0 + paddd %xmm5,%xmm0 +.byte 15,56,205,245 +.byte 15,56,203,209 + pshufd $14,%xmm0,%xmm0 + movdqa %xmm6,%xmm7 +.byte 102,15,58,15,253,4 + nop + paddd %xmm7,%xmm3 +.byte 15,56,204,229 +.byte 15,56,203,202 + movdqa 48(%ebp),%xmm0 + paddd %xmm6,%xmm0 +.byte 15,56,205,222 +.byte 15,56,203,209 + pshufd $14,%xmm0,%xmm0 + movdqa %xmm3,%xmm7 +.byte 102,15,58,15,254,4 + nop + paddd %xmm7,%xmm4 +.byte 15,56,204,238 +.byte 15,56,203,202 + movdqa 64(%ebp),%xmm0 + paddd %xmm3,%xmm0 +.byte 15,56,205,227 +.byte 15,56,203,209 + pshufd $14,%xmm0,%xmm0 + movdqa %xmm4,%xmm7 +.byte 102,15,58,15,251,4 + nop + paddd %xmm7,%xmm5 +.byte 15,56,204,243 +.byte 15,56,203,202 + movdqa 80(%ebp),%xmm0 + paddd %xmm4,%xmm0 +.byte 15,56,205,236 +.byte 15,56,203,209 + pshufd $14,%xmm0,%xmm0 + movdqa %xmm5,%xmm7 +.byte 102,15,58,15,252,4 +.byte 15,56,203,202 + paddd %xmm7,%xmm6 + movdqa 96(%ebp),%xmm0 + paddd %xmm5,%xmm0 +.byte 15,56,203,209 + pshufd $14,%xmm0,%xmm0 +.byte 15,56,205,245 + movdqa 128(%ebp),%xmm7 +.byte 15,56,203,202 + movdqa 112(%ebp),%xmm0 + paddd %xmm6,%xmm0 + nop +.byte 15,56,203,209 + pshufd $14,%xmm0,%xmm0 + cmpl %edi,%eax + nop +.byte 15,56,203,202 + paddd 16(%esp),%xmm2 + paddd (%esp),%xmm1 + jnz L011loop_shaext + pshufd $177,%xmm2,%xmm2 + pshufd $27,%xmm1,%xmm7 + pshufd $177,%xmm1,%xmm1 + punpckhqdq %xmm2,%xmm1 +.byte 102,15,58,15,215,8 + movl 44(%esp),%esp + movdqu %xmm1,(%esi) + movdqu %xmm2,16(%esi) + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.align 5,0x90 +L006SSSE3: + leal -96(%esp),%esp + movl (%esi),%eax + movl 4(%esi),%ebx + movl 8(%esi),%ecx + movl 12(%esi),%edi + movl %ebx,4(%esp) + xorl %ecx,%ebx + movl %ecx,8(%esp) + movl %edi,12(%esp) + movl 16(%esi),%edx + movl 20(%esi),%edi + movl 24(%esi),%ecx + movl 28(%esi),%esi + movl %edi,20(%esp) + movl 100(%esp),%edi + movl %ecx,24(%esp) + movl %esi,28(%esp) + movdqa 256(%ebp),%xmm7 + jmp L012grand_ssse3 +.align 4,0x90 +L012grand_ssse3: + movdqu (%edi),%xmm0 + movdqu 16(%edi),%xmm1 + movdqu 32(%edi),%xmm2 + movdqu 48(%edi),%xmm3 + addl $64,%edi +.byte 102,15,56,0,199 + movl %edi,100(%esp) +.byte 102,15,56,0,207 + movdqa (%ebp),%xmm4 +.byte 102,15,56,0,215 + movdqa 16(%ebp),%xmm5 + paddd %xmm0,%xmm4 +.byte 102,15,56,0,223 + movdqa 32(%ebp),%xmm6 + paddd %xmm1,%xmm5 + movdqa 48(%ebp),%xmm7 + movdqa %xmm4,32(%esp) + paddd %xmm2,%xmm6 + movdqa %xmm5,48(%esp) + paddd %xmm3,%xmm7 + movdqa %xmm6,64(%esp) + movdqa %xmm7,80(%esp) + jmp L013ssse3_00_47 +.align 4,0x90 +L013ssse3_00_47: + addl $64,%ebp + movl %edx,%ecx + movdqa %xmm1,%xmm4 + rorl $14,%edx + movl 20(%esp),%esi + movdqa %xmm3,%xmm7 + xorl %ecx,%edx + movl 24(%esp),%edi +.byte 102,15,58,15,224,4 + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi +.byte 102,15,58,15,250,4 + movl %ecx,16(%esp) + xorl %ecx,%edx + xorl %esi,%edi + movdqa %xmm4,%xmm5 + rorl $6,%edx + movl %eax,%ecx + movdqa %xmm4,%xmm6 + addl %edi,%edx + movl 4(%esp),%edi + psrld $3,%xmm4 + movl %eax,%esi + rorl $9,%ecx + paddd %xmm7,%xmm0 + movl %eax,(%esp) + xorl %eax,%ecx + psrld $7,%xmm6 + xorl %edi,%eax + addl 28(%esp),%edx + rorl $11,%ecx + andl %eax,%ebx + pshufd $250,%xmm3,%xmm7 + xorl %esi,%ecx + addl 32(%esp),%edx + pslld $14,%xmm5 + xorl %edi,%ebx + rorl $2,%ecx + pxor %xmm6,%xmm4 + addl %edx,%ebx + addl 12(%esp),%edx + psrld $11,%xmm6 + addl %ecx,%ebx + movl %edx,%ecx + rorl $14,%edx + pxor %xmm5,%xmm4 + movl 16(%esp),%esi + xorl %ecx,%edx + pslld $11,%xmm5 + movl 20(%esp),%edi + xorl %edi,%esi + rorl $5,%edx + pxor %xmm6,%xmm4 + andl %ecx,%esi + movl %ecx,12(%esp) + movdqa %xmm7,%xmm6 + xorl %ecx,%edx + xorl %esi,%edi + rorl $6,%edx + pxor %xmm5,%xmm4 + movl %ebx,%ecx + addl %edi,%edx + psrld $10,%xmm7 + movl (%esp),%edi + movl %ebx,%esi + rorl $9,%ecx + paddd %xmm4,%xmm0 + movl %ebx,28(%esp) + xorl %ebx,%ecx + psrlq $17,%xmm6 + xorl %edi,%ebx + addl 24(%esp),%edx + rorl $11,%ecx + pxor %xmm6,%xmm7 + andl %ebx,%eax + xorl %esi,%ecx + psrlq $2,%xmm6 + addl 36(%esp),%edx + xorl %edi,%eax + rorl $2,%ecx + pxor %xmm6,%xmm7 + addl %edx,%eax + addl 8(%esp),%edx + pshufd $128,%xmm7,%xmm7 + addl %ecx,%eax + movl %edx,%ecx + rorl $14,%edx + movl 12(%esp),%esi + xorl %ecx,%edx + movl 16(%esp),%edi + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + psrldq $8,%xmm7 + movl %ecx,8(%esp) + xorl %ecx,%edx + xorl %esi,%edi + paddd %xmm7,%xmm0 + rorl $6,%edx + movl %eax,%ecx + addl %edi,%edx + movl 28(%esp),%edi + movl %eax,%esi + rorl $9,%ecx + movl %eax,24(%esp) + pshufd $80,%xmm0,%xmm7 + xorl %eax,%ecx + xorl %edi,%eax + addl 20(%esp),%edx + movdqa %xmm7,%xmm6 + rorl $11,%ecx + psrld $10,%xmm7 + andl %eax,%ebx + psrlq $17,%xmm6 + xorl %esi,%ecx + addl 40(%esp),%edx + xorl %edi,%ebx + rorl $2,%ecx + pxor %xmm6,%xmm7 + addl %edx,%ebx + addl 4(%esp),%edx + psrlq $2,%xmm6 + addl %ecx,%ebx + movl %edx,%ecx + rorl $14,%edx + pxor %xmm6,%xmm7 + movl 8(%esp),%esi + xorl %ecx,%edx + movl 12(%esp),%edi + pshufd $8,%xmm7,%xmm7 + xorl %edi,%esi + rorl $5,%edx + movdqa (%ebp),%xmm6 + andl %ecx,%esi + movl %ecx,4(%esp) + pslldq $8,%xmm7 + xorl %ecx,%edx + xorl %esi,%edi + rorl $6,%edx + movl %ebx,%ecx + addl %edi,%edx + movl 24(%esp),%edi + movl %ebx,%esi + rorl $9,%ecx + paddd %xmm7,%xmm0 + movl %ebx,20(%esp) + xorl %ebx,%ecx + xorl %edi,%ebx + addl 16(%esp),%edx + paddd %xmm0,%xmm6 + rorl $11,%ecx + andl %ebx,%eax + xorl %esi,%ecx + addl 44(%esp),%edx + xorl %edi,%eax + rorl $2,%ecx + addl %edx,%eax + addl (%esp),%edx + addl %ecx,%eax + movdqa %xmm6,32(%esp) + movl %edx,%ecx + movdqa %xmm2,%xmm4 + rorl $14,%edx + movl 4(%esp),%esi + movdqa %xmm0,%xmm7 + xorl %ecx,%edx + movl 8(%esp),%edi +.byte 102,15,58,15,225,4 + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi +.byte 102,15,58,15,251,4 + movl %ecx,(%esp) + xorl %ecx,%edx + xorl %esi,%edi + movdqa %xmm4,%xmm5 + rorl $6,%edx + movl %eax,%ecx + movdqa %xmm4,%xmm6 + addl %edi,%edx + movl 20(%esp),%edi + psrld $3,%xmm4 + movl %eax,%esi + rorl $9,%ecx + paddd %xmm7,%xmm1 + movl %eax,16(%esp) + xorl %eax,%ecx + psrld $7,%xmm6 + xorl %edi,%eax + addl 12(%esp),%edx + rorl $11,%ecx + andl %eax,%ebx + pshufd $250,%xmm0,%xmm7 + xorl %esi,%ecx + addl 48(%esp),%edx + pslld $14,%xmm5 + xorl %edi,%ebx + rorl $2,%ecx + pxor %xmm6,%xmm4 + addl %edx,%ebx + addl 28(%esp),%edx + psrld $11,%xmm6 + addl %ecx,%ebx + movl %edx,%ecx + rorl $14,%edx + pxor %xmm5,%xmm4 + movl (%esp),%esi + xorl %ecx,%edx + pslld $11,%xmm5 + movl 4(%esp),%edi + xorl %edi,%esi + rorl $5,%edx + pxor %xmm6,%xmm4 + andl %ecx,%esi + movl %ecx,28(%esp) + movdqa %xmm7,%xmm6 + xorl %ecx,%edx + xorl %esi,%edi + rorl $6,%edx + pxor %xmm5,%xmm4 + movl %ebx,%ecx + addl %edi,%edx + psrld $10,%xmm7 + movl 16(%esp),%edi + movl %ebx,%esi + rorl $9,%ecx + paddd %xmm4,%xmm1 + movl %ebx,12(%esp) + xorl %ebx,%ecx + psrlq $17,%xmm6 + xorl %edi,%ebx + addl 8(%esp),%edx + rorl $11,%ecx + pxor %xmm6,%xmm7 + andl %ebx,%eax + xorl %esi,%ecx + psrlq $2,%xmm6 + addl 52(%esp),%edx + xorl %edi,%eax + rorl $2,%ecx + pxor %xmm6,%xmm7 + addl %edx,%eax + addl 24(%esp),%edx + pshufd $128,%xmm7,%xmm7 + addl %ecx,%eax + movl %edx,%ecx + rorl $14,%edx + movl 28(%esp),%esi + xorl %ecx,%edx + movl (%esp),%edi + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + psrldq $8,%xmm7 + movl %ecx,24(%esp) + xorl %ecx,%edx + xorl %esi,%edi + paddd %xmm7,%xmm1 + rorl $6,%edx + movl %eax,%ecx + addl %edi,%edx + movl 12(%esp),%edi + movl %eax,%esi + rorl $9,%ecx + movl %eax,8(%esp) + pshufd $80,%xmm1,%xmm7 + xorl %eax,%ecx + xorl %edi,%eax + addl 4(%esp),%edx + movdqa %xmm7,%xmm6 + rorl $11,%ecx + psrld $10,%xmm7 + andl %eax,%ebx + psrlq $17,%xmm6 + xorl %esi,%ecx + addl 56(%esp),%edx + xorl %edi,%ebx + rorl $2,%ecx + pxor %xmm6,%xmm7 + addl %edx,%ebx + addl 20(%esp),%edx + psrlq $2,%xmm6 + addl %ecx,%ebx + movl %edx,%ecx + rorl $14,%edx + pxor %xmm6,%xmm7 + movl 24(%esp),%esi + xorl %ecx,%edx + movl 28(%esp),%edi + pshufd $8,%xmm7,%xmm7 + xorl %edi,%esi + rorl $5,%edx + movdqa 16(%ebp),%xmm6 + andl %ecx,%esi + movl %ecx,20(%esp) + pslldq $8,%xmm7 + xorl %ecx,%edx + xorl %esi,%edi + rorl $6,%edx + movl %ebx,%ecx + addl %edi,%edx + movl 8(%esp),%edi + movl %ebx,%esi + rorl $9,%ecx + paddd %xmm7,%xmm1 + movl %ebx,4(%esp) + xorl %ebx,%ecx + xorl %edi,%ebx + addl (%esp),%edx + paddd %xmm1,%xmm6 + rorl $11,%ecx + andl %ebx,%eax + xorl %esi,%ecx + addl 60(%esp),%edx + xorl %edi,%eax + rorl $2,%ecx + addl %edx,%eax + addl 16(%esp),%edx + addl %ecx,%eax + movdqa %xmm6,48(%esp) + movl %edx,%ecx + movdqa %xmm3,%xmm4 + rorl $14,%edx + movl 20(%esp),%esi + movdqa %xmm1,%xmm7 + xorl %ecx,%edx + movl 24(%esp),%edi +.byte 102,15,58,15,226,4 + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi +.byte 102,15,58,15,248,4 + movl %ecx,16(%esp) + xorl %ecx,%edx + xorl %esi,%edi + movdqa %xmm4,%xmm5 + rorl $6,%edx + movl %eax,%ecx + movdqa %xmm4,%xmm6 + addl %edi,%edx + movl 4(%esp),%edi + psrld $3,%xmm4 + movl %eax,%esi + rorl $9,%ecx + paddd %xmm7,%xmm2 + movl %eax,(%esp) + xorl %eax,%ecx + psrld $7,%xmm6 + xorl %edi,%eax + addl 28(%esp),%edx + rorl $11,%ecx + andl %eax,%ebx + pshufd $250,%xmm1,%xmm7 + xorl %esi,%ecx + addl 64(%esp),%edx + pslld $14,%xmm5 + xorl %edi,%ebx + rorl $2,%ecx + pxor %xmm6,%xmm4 + addl %edx,%ebx + addl 12(%esp),%edx + psrld $11,%xmm6 + addl %ecx,%ebx + movl %edx,%ecx + rorl $14,%edx + pxor %xmm5,%xmm4 + movl 16(%esp),%esi + xorl %ecx,%edx + pslld $11,%xmm5 + movl 20(%esp),%edi + xorl %edi,%esi + rorl $5,%edx + pxor %xmm6,%xmm4 + andl %ecx,%esi + movl %ecx,12(%esp) + movdqa %xmm7,%xmm6 + xorl %ecx,%edx + xorl %esi,%edi + rorl $6,%edx + pxor %xmm5,%xmm4 + movl %ebx,%ecx + addl %edi,%edx + psrld $10,%xmm7 + movl (%esp),%edi + movl %ebx,%esi + rorl $9,%ecx + paddd %xmm4,%xmm2 + movl %ebx,28(%esp) + xorl %ebx,%ecx + psrlq $17,%xmm6 + xorl %edi,%ebx + addl 24(%esp),%edx + rorl $11,%ecx + pxor %xmm6,%xmm7 + andl %ebx,%eax + xorl %esi,%ecx + psrlq $2,%xmm6 + addl 68(%esp),%edx + xorl %edi,%eax + rorl $2,%ecx + pxor %xmm6,%xmm7 + addl %edx,%eax + addl 8(%esp),%edx + pshufd $128,%xmm7,%xmm7 + addl %ecx,%eax + movl %edx,%ecx + rorl $14,%edx + movl 12(%esp),%esi + xorl %ecx,%edx + movl 16(%esp),%edi + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + psrldq $8,%xmm7 + movl %ecx,8(%esp) + xorl %ecx,%edx + xorl %esi,%edi + paddd %xmm7,%xmm2 + rorl $6,%edx + movl %eax,%ecx + addl %edi,%edx + movl 28(%esp),%edi + movl %eax,%esi + rorl $9,%ecx + movl %eax,24(%esp) + pshufd $80,%xmm2,%xmm7 + xorl %eax,%ecx + xorl %edi,%eax + addl 20(%esp),%edx + movdqa %xmm7,%xmm6 + rorl $11,%ecx + psrld $10,%xmm7 + andl %eax,%ebx + psrlq $17,%xmm6 + xorl %esi,%ecx + addl 72(%esp),%edx + xorl %edi,%ebx + rorl $2,%ecx + pxor %xmm6,%xmm7 + addl %edx,%ebx + addl 4(%esp),%edx + psrlq $2,%xmm6 + addl %ecx,%ebx + movl %edx,%ecx + rorl $14,%edx + pxor %xmm6,%xmm7 + movl 8(%esp),%esi + xorl %ecx,%edx + movl 12(%esp),%edi + pshufd $8,%xmm7,%xmm7 + xorl %edi,%esi + rorl $5,%edx + movdqa 32(%ebp),%xmm6 + andl %ecx,%esi + movl %ecx,4(%esp) + pslldq $8,%xmm7 + xorl %ecx,%edx + xorl %esi,%edi + rorl $6,%edx + movl %ebx,%ecx + addl %edi,%edx + movl 24(%esp),%edi + movl %ebx,%esi + rorl $9,%ecx + paddd %xmm7,%xmm2 + movl %ebx,20(%esp) + xorl %ebx,%ecx + xorl %edi,%ebx + addl 16(%esp),%edx + paddd %xmm2,%xmm6 + rorl $11,%ecx + andl %ebx,%eax + xorl %esi,%ecx + addl 76(%esp),%edx + xorl %edi,%eax + rorl $2,%ecx + addl %edx,%eax + addl (%esp),%edx + addl %ecx,%eax + movdqa %xmm6,64(%esp) + movl %edx,%ecx + movdqa %xmm0,%xmm4 + rorl $14,%edx + movl 4(%esp),%esi + movdqa %xmm2,%xmm7 + xorl %ecx,%edx + movl 8(%esp),%edi +.byte 102,15,58,15,227,4 + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi +.byte 102,15,58,15,249,4 + movl %ecx,(%esp) + xorl %ecx,%edx + xorl %esi,%edi + movdqa %xmm4,%xmm5 + rorl $6,%edx + movl %eax,%ecx + movdqa %xmm4,%xmm6 + addl %edi,%edx + movl 20(%esp),%edi + psrld $3,%xmm4 + movl %eax,%esi + rorl $9,%ecx + paddd %xmm7,%xmm3 + movl %eax,16(%esp) + xorl %eax,%ecx + psrld $7,%xmm6 + xorl %edi,%eax + addl 12(%esp),%edx + rorl $11,%ecx + andl %eax,%ebx + pshufd $250,%xmm2,%xmm7 + xorl %esi,%ecx + addl 80(%esp),%edx + pslld $14,%xmm5 + xorl %edi,%ebx + rorl $2,%ecx + pxor %xmm6,%xmm4 + addl %edx,%ebx + addl 28(%esp),%edx + psrld $11,%xmm6 + addl %ecx,%ebx + movl %edx,%ecx + rorl $14,%edx + pxor %xmm5,%xmm4 + movl (%esp),%esi + xorl %ecx,%edx + pslld $11,%xmm5 + movl 4(%esp),%edi + xorl %edi,%esi + rorl $5,%edx + pxor %xmm6,%xmm4 + andl %ecx,%esi + movl %ecx,28(%esp) + movdqa %xmm7,%xmm6 + xorl %ecx,%edx + xorl %esi,%edi + rorl $6,%edx + pxor %xmm5,%xmm4 + movl %ebx,%ecx + addl %edi,%edx + psrld $10,%xmm7 + movl 16(%esp),%edi + movl %ebx,%esi + rorl $9,%ecx + paddd %xmm4,%xmm3 + movl %ebx,12(%esp) + xorl %ebx,%ecx + psrlq $17,%xmm6 + xorl %edi,%ebx + addl 8(%esp),%edx + rorl $11,%ecx + pxor %xmm6,%xmm7 + andl %ebx,%eax + xorl %esi,%ecx + psrlq $2,%xmm6 + addl 84(%esp),%edx + xorl %edi,%eax + rorl $2,%ecx + pxor %xmm6,%xmm7 + addl %edx,%eax + addl 24(%esp),%edx + pshufd $128,%xmm7,%xmm7 + addl %ecx,%eax + movl %edx,%ecx + rorl $14,%edx + movl 28(%esp),%esi + xorl %ecx,%edx + movl (%esp),%edi + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + psrldq $8,%xmm7 + movl %ecx,24(%esp) + xorl %ecx,%edx + xorl %esi,%edi + paddd %xmm7,%xmm3 + rorl $6,%edx + movl %eax,%ecx + addl %edi,%edx + movl 12(%esp),%edi + movl %eax,%esi + rorl $9,%ecx + movl %eax,8(%esp) + pshufd $80,%xmm3,%xmm7 + xorl %eax,%ecx + xorl %edi,%eax + addl 4(%esp),%edx + movdqa %xmm7,%xmm6 + rorl $11,%ecx + psrld $10,%xmm7 + andl %eax,%ebx + psrlq $17,%xmm6 + xorl %esi,%ecx + addl 88(%esp),%edx + xorl %edi,%ebx + rorl $2,%ecx + pxor %xmm6,%xmm7 + addl %edx,%ebx + addl 20(%esp),%edx + psrlq $2,%xmm6 + addl %ecx,%ebx + movl %edx,%ecx + rorl $14,%edx + pxor %xmm6,%xmm7 + movl 24(%esp),%esi + xorl %ecx,%edx + movl 28(%esp),%edi + pshufd $8,%xmm7,%xmm7 + xorl %edi,%esi + rorl $5,%edx + movdqa 48(%ebp),%xmm6 + andl %ecx,%esi + movl %ecx,20(%esp) + pslldq $8,%xmm7 + xorl %ecx,%edx + xorl %esi,%edi + rorl $6,%edx + movl %ebx,%ecx + addl %edi,%edx + movl 8(%esp),%edi + movl %ebx,%esi + rorl $9,%ecx + paddd %xmm7,%xmm3 + movl %ebx,4(%esp) + xorl %ebx,%ecx + xorl %edi,%ebx + addl (%esp),%edx + paddd %xmm3,%xmm6 + rorl $11,%ecx + andl %ebx,%eax + xorl %esi,%ecx + addl 92(%esp),%edx + xorl %edi,%eax + rorl $2,%ecx + addl %edx,%eax + addl 16(%esp),%edx + addl %ecx,%eax + movdqa %xmm6,80(%esp) + cmpl $66051,64(%ebp) + jne L013ssse3_00_47 + movl %edx,%ecx + rorl $14,%edx + movl 20(%esp),%esi + xorl %ecx,%edx + movl 24(%esp),%edi + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,16(%esp) + xorl %ecx,%edx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%edx + movl 4(%esp),%edi + movl %eax,%esi + rorl $9,%ecx + movl %eax,(%esp) + xorl %eax,%ecx + xorl %edi,%eax + addl 28(%esp),%edx + rorl $11,%ecx + andl %eax,%ebx + xorl %esi,%ecx + addl 32(%esp),%edx + xorl %edi,%ebx + rorl $2,%ecx + addl %edx,%ebx + addl 12(%esp),%edx + addl %ecx,%ebx + movl %edx,%ecx + rorl $14,%edx + movl 16(%esp),%esi + xorl %ecx,%edx + movl 20(%esp),%edi + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,12(%esp) + xorl %ecx,%edx + xorl %esi,%edi + rorl $6,%edx + movl %ebx,%ecx + addl %edi,%edx + movl (%esp),%edi + movl %ebx,%esi + rorl $9,%ecx + movl %ebx,28(%esp) + xorl %ebx,%ecx + xorl %edi,%ebx + addl 24(%esp),%edx + rorl $11,%ecx + andl %ebx,%eax + xorl %esi,%ecx + addl 36(%esp),%edx + xorl %edi,%eax + rorl $2,%ecx + addl %edx,%eax + addl 8(%esp),%edx + addl %ecx,%eax + movl %edx,%ecx + rorl $14,%edx + movl 12(%esp),%esi + xorl %ecx,%edx + movl 16(%esp),%edi + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,8(%esp) + xorl %ecx,%edx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%edx + movl 28(%esp),%edi + movl %eax,%esi + rorl $9,%ecx + movl %eax,24(%esp) + xorl %eax,%ecx + xorl %edi,%eax + addl 20(%esp),%edx + rorl $11,%ecx + andl %eax,%ebx + xorl %esi,%ecx + addl 40(%esp),%edx + xorl %edi,%ebx + rorl $2,%ecx + addl %edx,%ebx + addl 4(%esp),%edx + addl %ecx,%ebx + movl %edx,%ecx + rorl $14,%edx + movl 8(%esp),%esi + xorl %ecx,%edx + movl 12(%esp),%edi + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,4(%esp) + xorl %ecx,%edx + xorl %esi,%edi + rorl $6,%edx + movl %ebx,%ecx + addl %edi,%edx + movl 24(%esp),%edi + movl %ebx,%esi + rorl $9,%ecx + movl %ebx,20(%esp) + xorl %ebx,%ecx + xorl %edi,%ebx + addl 16(%esp),%edx + rorl $11,%ecx + andl %ebx,%eax + xorl %esi,%ecx + addl 44(%esp),%edx + xorl %edi,%eax + rorl $2,%ecx + addl %edx,%eax + addl (%esp),%edx + addl %ecx,%eax + movl %edx,%ecx + rorl $14,%edx + movl 4(%esp),%esi + xorl %ecx,%edx + movl 8(%esp),%edi + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,(%esp) + xorl %ecx,%edx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%edx + movl 20(%esp),%edi + movl %eax,%esi + rorl $9,%ecx + movl %eax,16(%esp) + xorl %eax,%ecx + xorl %edi,%eax + addl 12(%esp),%edx + rorl $11,%ecx + andl %eax,%ebx + xorl %esi,%ecx + addl 48(%esp),%edx + xorl %edi,%ebx + rorl $2,%ecx + addl %edx,%ebx + addl 28(%esp),%edx + addl %ecx,%ebx + movl %edx,%ecx + rorl $14,%edx + movl (%esp),%esi + xorl %ecx,%edx + movl 4(%esp),%edi + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,28(%esp) + xorl %ecx,%edx + xorl %esi,%edi + rorl $6,%edx + movl %ebx,%ecx + addl %edi,%edx + movl 16(%esp),%edi + movl %ebx,%esi + rorl $9,%ecx + movl %ebx,12(%esp) + xorl %ebx,%ecx + xorl %edi,%ebx + addl 8(%esp),%edx + rorl $11,%ecx + andl %ebx,%eax + xorl %esi,%ecx + addl 52(%esp),%edx + xorl %edi,%eax + rorl $2,%ecx + addl %edx,%eax + addl 24(%esp),%edx + addl %ecx,%eax + movl %edx,%ecx + rorl $14,%edx + movl 28(%esp),%esi + xorl %ecx,%edx + movl (%esp),%edi + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,24(%esp) + xorl %ecx,%edx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%edx + movl 12(%esp),%edi + movl %eax,%esi + rorl $9,%ecx + movl %eax,8(%esp) + xorl %eax,%ecx + xorl %edi,%eax + addl 4(%esp),%edx + rorl $11,%ecx + andl %eax,%ebx + xorl %esi,%ecx + addl 56(%esp),%edx + xorl %edi,%ebx + rorl $2,%ecx + addl %edx,%ebx + addl 20(%esp),%edx + addl %ecx,%ebx + movl %edx,%ecx + rorl $14,%edx + movl 24(%esp),%esi + xorl %ecx,%edx + movl 28(%esp),%edi + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,20(%esp) + xorl %ecx,%edx + xorl %esi,%edi + rorl $6,%edx + movl %ebx,%ecx + addl %edi,%edx + movl 8(%esp),%edi + movl %ebx,%esi + rorl $9,%ecx + movl %ebx,4(%esp) + xorl %ebx,%ecx + xorl %edi,%ebx + addl (%esp),%edx + rorl $11,%ecx + andl %ebx,%eax + xorl %esi,%ecx + addl 60(%esp),%edx + xorl %edi,%eax + rorl $2,%ecx + addl %edx,%eax + addl 16(%esp),%edx + addl %ecx,%eax + movl %edx,%ecx + rorl $14,%edx + movl 20(%esp),%esi + xorl %ecx,%edx + movl 24(%esp),%edi + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,16(%esp) + xorl %ecx,%edx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%edx + movl 4(%esp),%edi + movl %eax,%esi + rorl $9,%ecx + movl %eax,(%esp) + xorl %eax,%ecx + xorl %edi,%eax + addl 28(%esp),%edx + rorl $11,%ecx + andl %eax,%ebx + xorl %esi,%ecx + addl 64(%esp),%edx + xorl %edi,%ebx + rorl $2,%ecx + addl %edx,%ebx + addl 12(%esp),%edx + addl %ecx,%ebx + movl %edx,%ecx + rorl $14,%edx + movl 16(%esp),%esi + xorl %ecx,%edx + movl 20(%esp),%edi + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,12(%esp) + xorl %ecx,%edx + xorl %esi,%edi + rorl $6,%edx + movl %ebx,%ecx + addl %edi,%edx + movl (%esp),%edi + movl %ebx,%esi + rorl $9,%ecx + movl %ebx,28(%esp) + xorl %ebx,%ecx + xorl %edi,%ebx + addl 24(%esp),%edx + rorl $11,%ecx + andl %ebx,%eax + xorl %esi,%ecx + addl 68(%esp),%edx + xorl %edi,%eax + rorl $2,%ecx + addl %edx,%eax + addl 8(%esp),%edx + addl %ecx,%eax + movl %edx,%ecx + rorl $14,%edx + movl 12(%esp),%esi + xorl %ecx,%edx + movl 16(%esp),%edi + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,8(%esp) + xorl %ecx,%edx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%edx + movl 28(%esp),%edi + movl %eax,%esi + rorl $9,%ecx + movl %eax,24(%esp) + xorl %eax,%ecx + xorl %edi,%eax + addl 20(%esp),%edx + rorl $11,%ecx + andl %eax,%ebx + xorl %esi,%ecx + addl 72(%esp),%edx + xorl %edi,%ebx + rorl $2,%ecx + addl %edx,%ebx + addl 4(%esp),%edx + addl %ecx,%ebx + movl %edx,%ecx + rorl $14,%edx + movl 8(%esp),%esi + xorl %ecx,%edx + movl 12(%esp),%edi + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,4(%esp) + xorl %ecx,%edx + xorl %esi,%edi + rorl $6,%edx + movl %ebx,%ecx + addl %edi,%edx + movl 24(%esp),%edi + movl %ebx,%esi + rorl $9,%ecx + movl %ebx,20(%esp) + xorl %ebx,%ecx + xorl %edi,%ebx + addl 16(%esp),%edx + rorl $11,%ecx + andl %ebx,%eax + xorl %esi,%ecx + addl 76(%esp),%edx + xorl %edi,%eax + rorl $2,%ecx + addl %edx,%eax + addl (%esp),%edx + addl %ecx,%eax + movl %edx,%ecx + rorl $14,%edx + movl 4(%esp),%esi + xorl %ecx,%edx + movl 8(%esp),%edi + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,(%esp) + xorl %ecx,%edx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%edx + movl 20(%esp),%edi + movl %eax,%esi + rorl $9,%ecx + movl %eax,16(%esp) + xorl %eax,%ecx + xorl %edi,%eax + addl 12(%esp),%edx + rorl $11,%ecx + andl %eax,%ebx + xorl %esi,%ecx + addl 80(%esp),%edx + xorl %edi,%ebx + rorl $2,%ecx + addl %edx,%ebx + addl 28(%esp),%edx + addl %ecx,%ebx + movl %edx,%ecx + rorl $14,%edx + movl (%esp),%esi + xorl %ecx,%edx + movl 4(%esp),%edi + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,28(%esp) + xorl %ecx,%edx + xorl %esi,%edi + rorl $6,%edx + movl %ebx,%ecx + addl %edi,%edx + movl 16(%esp),%edi + movl %ebx,%esi + rorl $9,%ecx + movl %ebx,12(%esp) + xorl %ebx,%ecx + xorl %edi,%ebx + addl 8(%esp),%edx + rorl $11,%ecx + andl %ebx,%eax + xorl %esi,%ecx + addl 84(%esp),%edx + xorl %edi,%eax + rorl $2,%ecx + addl %edx,%eax + addl 24(%esp),%edx + addl %ecx,%eax + movl %edx,%ecx + rorl $14,%edx + movl 28(%esp),%esi + xorl %ecx,%edx + movl (%esp),%edi + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,24(%esp) + xorl %ecx,%edx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%edx + movl 12(%esp),%edi + movl %eax,%esi + rorl $9,%ecx + movl %eax,8(%esp) + xorl %eax,%ecx + xorl %edi,%eax + addl 4(%esp),%edx + rorl $11,%ecx + andl %eax,%ebx + xorl %esi,%ecx + addl 88(%esp),%edx + xorl %edi,%ebx + rorl $2,%ecx + addl %edx,%ebx + addl 20(%esp),%edx + addl %ecx,%ebx + movl %edx,%ecx + rorl $14,%edx + movl 24(%esp),%esi + xorl %ecx,%edx + movl 28(%esp),%edi + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,20(%esp) + xorl %ecx,%edx + xorl %esi,%edi + rorl $6,%edx + movl %ebx,%ecx + addl %edi,%edx + movl 8(%esp),%edi + movl %ebx,%esi + rorl $9,%ecx + movl %ebx,4(%esp) + xorl %ebx,%ecx + xorl %edi,%ebx + addl (%esp),%edx + rorl $11,%ecx + andl %ebx,%eax + xorl %esi,%ecx + addl 92(%esp),%edx + xorl %edi,%eax + rorl $2,%ecx + addl %edx,%eax + addl 16(%esp),%edx + addl %ecx,%eax + movl 96(%esp),%esi + xorl %edi,%ebx + movl 12(%esp),%ecx + addl (%esi),%eax + addl 4(%esi),%ebx + addl 8(%esi),%edi + addl 12(%esi),%ecx + movl %eax,(%esi) + movl %ebx,4(%esi) + movl %edi,8(%esi) + movl %ecx,12(%esi) + movl %ebx,4(%esp) + xorl %edi,%ebx + movl %edi,8(%esp) + movl %ecx,12(%esp) + movl 20(%esp),%edi + movl 24(%esp),%ecx + addl 16(%esi),%edx + addl 20(%esi),%edi + addl 24(%esi),%ecx + movl %edx,16(%esi) + movl %edi,20(%esi) + movl %edi,20(%esp) + movl 28(%esp),%edi + movl %ecx,24(%esi) + addl 28(%esi),%edi + movl %ecx,24(%esp) + movl %edi,28(%esi) + movl %edi,28(%esp) + movl 100(%esp),%edi + movdqa 64(%ebp),%xmm7 + subl $192,%ebp + cmpl 104(%esp),%edi + jb L012grand_ssse3 + movl 108(%esp),%esp + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.align 5,0x90 +L005AVX: + andl $264,%edx + cmpl $264,%edx + je L014AVX_BMI + leal -96(%esp),%esp + vzeroall + movl (%esi),%eax + movl 4(%esi),%ebx + movl 8(%esi),%ecx + movl 12(%esi),%edi + movl %ebx,4(%esp) + xorl %ecx,%ebx + movl %ecx,8(%esp) + movl %edi,12(%esp) + movl 16(%esi),%edx + movl 20(%esi),%edi + movl 24(%esi),%ecx + movl 28(%esi),%esi + movl %edi,20(%esp) + movl 100(%esp),%edi + movl %ecx,24(%esp) + movl %esi,28(%esp) + vmovdqa 256(%ebp),%xmm7 + jmp L015grand_avx +.align 5,0x90 +L015grand_avx: + vmovdqu (%edi),%xmm0 + vmovdqu 16(%edi),%xmm1 + vmovdqu 32(%edi),%xmm2 + vmovdqu 48(%edi),%xmm3 + addl $64,%edi + vpshufb %xmm7,%xmm0,%xmm0 + movl %edi,100(%esp) + vpshufb %xmm7,%xmm1,%xmm1 + vpshufb %xmm7,%xmm2,%xmm2 + vpaddd (%ebp),%xmm0,%xmm4 + vpshufb %xmm7,%xmm3,%xmm3 + vpaddd 16(%ebp),%xmm1,%xmm5 + vpaddd 32(%ebp),%xmm2,%xmm6 + vpaddd 48(%ebp),%xmm3,%xmm7 + vmovdqa %xmm4,32(%esp) + vmovdqa %xmm5,48(%esp) + vmovdqa %xmm6,64(%esp) + vmovdqa %xmm7,80(%esp) + jmp L016avx_00_47 +.align 4,0x90 +L016avx_00_47: + addl $64,%ebp + vpalignr $4,%xmm0,%xmm1,%xmm4 + movl %edx,%ecx + shrdl $14,%edx,%edx + movl 20(%esp),%esi + vpalignr $4,%xmm2,%xmm3,%xmm7 + xorl %ecx,%edx + movl 24(%esp),%edi + xorl %edi,%esi + vpsrld $7,%xmm4,%xmm6 + shrdl $5,%edx,%edx + andl %ecx,%esi + movl %ecx,16(%esp) + vpaddd %xmm7,%xmm0,%xmm0 + xorl %ecx,%edx + xorl %esi,%edi + shrdl $6,%edx,%edx + vpsrld $3,%xmm4,%xmm7 + movl %eax,%ecx + addl %edi,%edx + movl 4(%esp),%edi + vpslld $14,%xmm4,%xmm5 + movl %eax,%esi + shrdl $9,%ecx,%ecx + movl %eax,(%esp) + vpxor %xmm6,%xmm7,%xmm4 + xorl %eax,%ecx + xorl %edi,%eax + addl 28(%esp),%edx + vpshufd $250,%xmm3,%xmm7 + shrdl $11,%ecx,%ecx + andl %eax,%ebx + xorl %esi,%ecx + vpsrld $11,%xmm6,%xmm6 + addl 32(%esp),%edx + xorl %edi,%ebx + shrdl $2,%ecx,%ecx + vpxor %xmm5,%xmm4,%xmm4 + addl %edx,%ebx + addl 12(%esp),%edx + addl %ecx,%ebx + vpslld $11,%xmm5,%xmm5 + movl %edx,%ecx + shrdl $14,%edx,%edx + movl 16(%esp),%esi + vpxor %xmm6,%xmm4,%xmm4 + xorl %ecx,%edx + movl 20(%esp),%edi + xorl %edi,%esi + vpsrld $10,%xmm7,%xmm6 + shrdl $5,%edx,%edx + andl %ecx,%esi + movl %ecx,12(%esp) + vpxor %xmm5,%xmm4,%xmm4 + xorl %ecx,%edx + xorl %esi,%edi + shrdl $6,%edx,%edx + vpsrlq $17,%xmm7,%xmm5 + movl %ebx,%ecx + addl %edi,%edx + movl (%esp),%edi + vpaddd %xmm4,%xmm0,%xmm0 + movl %ebx,%esi + shrdl $9,%ecx,%ecx + movl %ebx,28(%esp) + vpxor %xmm5,%xmm6,%xmm6 + xorl %ebx,%ecx + xorl %edi,%ebx + addl 24(%esp),%edx + vpsrlq $19,%xmm7,%xmm7 + shrdl $11,%ecx,%ecx + andl %ebx,%eax + xorl %esi,%ecx + vpxor %xmm7,%xmm6,%xmm6 + addl 36(%esp),%edx + xorl %edi,%eax + shrdl $2,%ecx,%ecx + vpshufd $132,%xmm6,%xmm7 + addl %edx,%eax + addl 8(%esp),%edx + addl %ecx,%eax + vpsrldq $8,%xmm7,%xmm7 + movl %edx,%ecx + shrdl $14,%edx,%edx + movl 12(%esp),%esi + vpaddd %xmm7,%xmm0,%xmm0 + xorl %ecx,%edx + movl 16(%esp),%edi + xorl %edi,%esi + vpshufd $80,%xmm0,%xmm7 + shrdl $5,%edx,%edx + andl %ecx,%esi + movl %ecx,8(%esp) + vpsrld $10,%xmm7,%xmm6 + xorl %ecx,%edx + xorl %esi,%edi + shrdl $6,%edx,%edx + vpsrlq $17,%xmm7,%xmm5 + movl %eax,%ecx + addl %edi,%edx + movl 28(%esp),%edi + vpxor %xmm5,%xmm6,%xmm6 + movl %eax,%esi + shrdl $9,%ecx,%ecx + movl %eax,24(%esp) + vpsrlq $19,%xmm7,%xmm7 + xorl %eax,%ecx + xorl %edi,%eax + addl 20(%esp),%edx + vpxor %xmm7,%xmm6,%xmm6 + shrdl $11,%ecx,%ecx + andl %eax,%ebx + xorl %esi,%ecx + vpshufd $232,%xmm6,%xmm7 + addl 40(%esp),%edx + xorl %edi,%ebx + shrdl $2,%ecx,%ecx + vpslldq $8,%xmm7,%xmm7 + addl %edx,%ebx + addl 4(%esp),%edx + addl %ecx,%ebx + vpaddd %xmm7,%xmm0,%xmm0 + movl %edx,%ecx + shrdl $14,%edx,%edx + movl 8(%esp),%esi + vpaddd (%ebp),%xmm0,%xmm6 + xorl %ecx,%edx + movl 12(%esp),%edi + xorl %edi,%esi + shrdl $5,%edx,%edx + andl %ecx,%esi + movl %ecx,4(%esp) + xorl %ecx,%edx + xorl %esi,%edi + shrdl $6,%edx,%edx + movl %ebx,%ecx + addl %edi,%edx + movl 24(%esp),%edi + movl %ebx,%esi + shrdl $9,%ecx,%ecx + movl %ebx,20(%esp) + xorl %ebx,%ecx + xorl %edi,%ebx + addl 16(%esp),%edx + shrdl $11,%ecx,%ecx + andl %ebx,%eax + xorl %esi,%ecx + addl 44(%esp),%edx + xorl %edi,%eax + shrdl $2,%ecx,%ecx + addl %edx,%eax + addl (%esp),%edx + addl %ecx,%eax + vmovdqa %xmm6,32(%esp) + vpalignr $4,%xmm1,%xmm2,%xmm4 + movl %edx,%ecx + shrdl $14,%edx,%edx + movl 4(%esp),%esi + vpalignr $4,%xmm3,%xmm0,%xmm7 + xorl %ecx,%edx + movl 8(%esp),%edi + xorl %edi,%esi + vpsrld $7,%xmm4,%xmm6 + shrdl $5,%edx,%edx + andl %ecx,%esi + movl %ecx,(%esp) + vpaddd %xmm7,%xmm1,%xmm1 + xorl %ecx,%edx + xorl %esi,%edi + shrdl $6,%edx,%edx + vpsrld $3,%xmm4,%xmm7 + movl %eax,%ecx + addl %edi,%edx + movl 20(%esp),%edi + vpslld $14,%xmm4,%xmm5 + movl %eax,%esi + shrdl $9,%ecx,%ecx + movl %eax,16(%esp) + vpxor %xmm6,%xmm7,%xmm4 + xorl %eax,%ecx + xorl %edi,%eax + addl 12(%esp),%edx + vpshufd $250,%xmm0,%xmm7 + shrdl $11,%ecx,%ecx + andl %eax,%ebx + xorl %esi,%ecx + vpsrld $11,%xmm6,%xmm6 + addl 48(%esp),%edx + xorl %edi,%ebx + shrdl $2,%ecx,%ecx + vpxor %xmm5,%xmm4,%xmm4 + addl %edx,%ebx + addl 28(%esp),%edx + addl %ecx,%ebx + vpslld $11,%xmm5,%xmm5 + movl %edx,%ecx + shrdl $14,%edx,%edx + movl (%esp),%esi + vpxor %xmm6,%xmm4,%xmm4 + xorl %ecx,%edx + movl 4(%esp),%edi + xorl %edi,%esi + vpsrld $10,%xmm7,%xmm6 + shrdl $5,%edx,%edx + andl %ecx,%esi + movl %ecx,28(%esp) + vpxor %xmm5,%xmm4,%xmm4 + xorl %ecx,%edx + xorl %esi,%edi + shrdl $6,%edx,%edx + vpsrlq $17,%xmm7,%xmm5 + movl %ebx,%ecx + addl %edi,%edx + movl 16(%esp),%edi + vpaddd %xmm4,%xmm1,%xmm1 + movl %ebx,%esi + shrdl $9,%ecx,%ecx + movl %ebx,12(%esp) + vpxor %xmm5,%xmm6,%xmm6 + xorl %ebx,%ecx + xorl %edi,%ebx + addl 8(%esp),%edx + vpsrlq $19,%xmm7,%xmm7 + shrdl $11,%ecx,%ecx + andl %ebx,%eax + xorl %esi,%ecx + vpxor %xmm7,%xmm6,%xmm6 + addl 52(%esp),%edx + xorl %edi,%eax + shrdl $2,%ecx,%ecx + vpshufd $132,%xmm6,%xmm7 + addl %edx,%eax + addl 24(%esp),%edx + addl %ecx,%eax + vpsrldq $8,%xmm7,%xmm7 + movl %edx,%ecx + shrdl $14,%edx,%edx + movl 28(%esp),%esi + vpaddd %xmm7,%xmm1,%xmm1 + xorl %ecx,%edx + movl (%esp),%edi + xorl %edi,%esi + vpshufd $80,%xmm1,%xmm7 + shrdl $5,%edx,%edx + andl %ecx,%esi + movl %ecx,24(%esp) + vpsrld $10,%xmm7,%xmm6 + xorl %ecx,%edx + xorl %esi,%edi + shrdl $6,%edx,%edx + vpsrlq $17,%xmm7,%xmm5 + movl %eax,%ecx + addl %edi,%edx + movl 12(%esp),%edi + vpxor %xmm5,%xmm6,%xmm6 + movl %eax,%esi + shrdl $9,%ecx,%ecx + movl %eax,8(%esp) + vpsrlq $19,%xmm7,%xmm7 + xorl %eax,%ecx + xorl %edi,%eax + addl 4(%esp),%edx + vpxor %xmm7,%xmm6,%xmm6 + shrdl $11,%ecx,%ecx + andl %eax,%ebx + xorl %esi,%ecx + vpshufd $232,%xmm6,%xmm7 + addl 56(%esp),%edx + xorl %edi,%ebx + shrdl $2,%ecx,%ecx + vpslldq $8,%xmm7,%xmm7 + addl %edx,%ebx + addl 20(%esp),%edx + addl %ecx,%ebx + vpaddd %xmm7,%xmm1,%xmm1 + movl %edx,%ecx + shrdl $14,%edx,%edx + movl 24(%esp),%esi + vpaddd 16(%ebp),%xmm1,%xmm6 + xorl %ecx,%edx + movl 28(%esp),%edi + xorl %edi,%esi + shrdl $5,%edx,%edx + andl %ecx,%esi + movl %ecx,20(%esp) + xorl %ecx,%edx + xorl %esi,%edi + shrdl $6,%edx,%edx + movl %ebx,%ecx + addl %edi,%edx + movl 8(%esp),%edi + movl %ebx,%esi + shrdl $9,%ecx,%ecx + movl %ebx,4(%esp) + xorl %ebx,%ecx + xorl %edi,%ebx + addl (%esp),%edx + shrdl $11,%ecx,%ecx + andl %ebx,%eax + xorl %esi,%ecx + addl 60(%esp),%edx + xorl %edi,%eax + shrdl $2,%ecx,%ecx + addl %edx,%eax + addl 16(%esp),%edx + addl %ecx,%eax + vmovdqa %xmm6,48(%esp) + vpalignr $4,%xmm2,%xmm3,%xmm4 + movl %edx,%ecx + shrdl $14,%edx,%edx + movl 20(%esp),%esi + vpalignr $4,%xmm0,%xmm1,%xmm7 + xorl %ecx,%edx + movl 24(%esp),%edi + xorl %edi,%esi + vpsrld $7,%xmm4,%xmm6 + shrdl $5,%edx,%edx + andl %ecx,%esi + movl %ecx,16(%esp) + vpaddd %xmm7,%xmm2,%xmm2 + xorl %ecx,%edx + xorl %esi,%edi + shrdl $6,%edx,%edx + vpsrld $3,%xmm4,%xmm7 + movl %eax,%ecx + addl %edi,%edx + movl 4(%esp),%edi + vpslld $14,%xmm4,%xmm5 + movl %eax,%esi + shrdl $9,%ecx,%ecx + movl %eax,(%esp) + vpxor %xmm6,%xmm7,%xmm4 + xorl %eax,%ecx + xorl %edi,%eax + addl 28(%esp),%edx + vpshufd $250,%xmm1,%xmm7 + shrdl $11,%ecx,%ecx + andl %eax,%ebx + xorl %esi,%ecx + vpsrld $11,%xmm6,%xmm6 + addl 64(%esp),%edx + xorl %edi,%ebx + shrdl $2,%ecx,%ecx + vpxor %xmm5,%xmm4,%xmm4 + addl %edx,%ebx + addl 12(%esp),%edx + addl %ecx,%ebx + vpslld $11,%xmm5,%xmm5 + movl %edx,%ecx + shrdl $14,%edx,%edx + movl 16(%esp),%esi + vpxor %xmm6,%xmm4,%xmm4 + xorl %ecx,%edx + movl 20(%esp),%edi + xorl %edi,%esi + vpsrld $10,%xmm7,%xmm6 + shrdl $5,%edx,%edx + andl %ecx,%esi + movl %ecx,12(%esp) + vpxor %xmm5,%xmm4,%xmm4 + xorl %ecx,%edx + xorl %esi,%edi + shrdl $6,%edx,%edx + vpsrlq $17,%xmm7,%xmm5 + movl %ebx,%ecx + addl %edi,%edx + movl (%esp),%edi + vpaddd %xmm4,%xmm2,%xmm2 + movl %ebx,%esi + shrdl $9,%ecx,%ecx + movl %ebx,28(%esp) + vpxor %xmm5,%xmm6,%xmm6 + xorl %ebx,%ecx + xorl %edi,%ebx + addl 24(%esp),%edx + vpsrlq $19,%xmm7,%xmm7 + shrdl $11,%ecx,%ecx + andl %ebx,%eax + xorl %esi,%ecx + vpxor %xmm7,%xmm6,%xmm6 + addl 68(%esp),%edx + xorl %edi,%eax + shrdl $2,%ecx,%ecx + vpshufd $132,%xmm6,%xmm7 + addl %edx,%eax + addl 8(%esp),%edx + addl %ecx,%eax + vpsrldq $8,%xmm7,%xmm7 + movl %edx,%ecx + shrdl $14,%edx,%edx + movl 12(%esp),%esi + vpaddd %xmm7,%xmm2,%xmm2 + xorl %ecx,%edx + movl 16(%esp),%edi + xorl %edi,%esi + vpshufd $80,%xmm2,%xmm7 + shrdl $5,%edx,%edx + andl %ecx,%esi + movl %ecx,8(%esp) + vpsrld $10,%xmm7,%xmm6 + xorl %ecx,%edx + xorl %esi,%edi + shrdl $6,%edx,%edx + vpsrlq $17,%xmm7,%xmm5 + movl %eax,%ecx + addl %edi,%edx + movl 28(%esp),%edi + vpxor %xmm5,%xmm6,%xmm6 + movl %eax,%esi + shrdl $9,%ecx,%ecx + movl %eax,24(%esp) + vpsrlq $19,%xmm7,%xmm7 + xorl %eax,%ecx + xorl %edi,%eax + addl 20(%esp),%edx + vpxor %xmm7,%xmm6,%xmm6 + shrdl $11,%ecx,%ecx + andl %eax,%ebx + xorl %esi,%ecx + vpshufd $232,%xmm6,%xmm7 + addl 72(%esp),%edx + xorl %edi,%ebx + shrdl $2,%ecx,%ecx + vpslldq $8,%xmm7,%xmm7 + addl %edx,%ebx + addl 4(%esp),%edx + addl %ecx,%ebx + vpaddd %xmm7,%xmm2,%xmm2 + movl %edx,%ecx + shrdl $14,%edx,%edx + movl 8(%esp),%esi + vpaddd 32(%ebp),%xmm2,%xmm6 + xorl %ecx,%edx + movl 12(%esp),%edi + xorl %edi,%esi + shrdl $5,%edx,%edx + andl %ecx,%esi + movl %ecx,4(%esp) + xorl %ecx,%edx + xorl %esi,%edi + shrdl $6,%edx,%edx + movl %ebx,%ecx + addl %edi,%edx + movl 24(%esp),%edi + movl %ebx,%esi + shrdl $9,%ecx,%ecx + movl %ebx,20(%esp) + xorl %ebx,%ecx + xorl %edi,%ebx + addl 16(%esp),%edx + shrdl $11,%ecx,%ecx + andl %ebx,%eax + xorl %esi,%ecx + addl 76(%esp),%edx + xorl %edi,%eax + shrdl $2,%ecx,%ecx + addl %edx,%eax + addl (%esp),%edx + addl %ecx,%eax + vmovdqa %xmm6,64(%esp) + vpalignr $4,%xmm3,%xmm0,%xmm4 + movl %edx,%ecx + shrdl $14,%edx,%edx + movl 4(%esp),%esi + vpalignr $4,%xmm1,%xmm2,%xmm7 + xorl %ecx,%edx + movl 8(%esp),%edi + xorl %edi,%esi + vpsrld $7,%xmm4,%xmm6 + shrdl $5,%edx,%edx + andl %ecx,%esi + movl %ecx,(%esp) + vpaddd %xmm7,%xmm3,%xmm3 + xorl %ecx,%edx + xorl %esi,%edi + shrdl $6,%edx,%edx + vpsrld $3,%xmm4,%xmm7 + movl %eax,%ecx + addl %edi,%edx + movl 20(%esp),%edi + vpslld $14,%xmm4,%xmm5 + movl %eax,%esi + shrdl $9,%ecx,%ecx + movl %eax,16(%esp) + vpxor %xmm6,%xmm7,%xmm4 + xorl %eax,%ecx + xorl %edi,%eax + addl 12(%esp),%edx + vpshufd $250,%xmm2,%xmm7 + shrdl $11,%ecx,%ecx + andl %eax,%ebx + xorl %esi,%ecx + vpsrld $11,%xmm6,%xmm6 + addl 80(%esp),%edx + xorl %edi,%ebx + shrdl $2,%ecx,%ecx + vpxor %xmm5,%xmm4,%xmm4 + addl %edx,%ebx + addl 28(%esp),%edx + addl %ecx,%ebx + vpslld $11,%xmm5,%xmm5 + movl %edx,%ecx + shrdl $14,%edx,%edx + movl (%esp),%esi + vpxor %xmm6,%xmm4,%xmm4 + xorl %ecx,%edx + movl 4(%esp),%edi + xorl %edi,%esi + vpsrld $10,%xmm7,%xmm6 + shrdl $5,%edx,%edx + andl %ecx,%esi + movl %ecx,28(%esp) + vpxor %xmm5,%xmm4,%xmm4 + xorl %ecx,%edx + xorl %esi,%edi + shrdl $6,%edx,%edx + vpsrlq $17,%xmm7,%xmm5 + movl %ebx,%ecx + addl %edi,%edx + movl 16(%esp),%edi + vpaddd %xmm4,%xmm3,%xmm3 + movl %ebx,%esi + shrdl $9,%ecx,%ecx + movl %ebx,12(%esp) + vpxor %xmm5,%xmm6,%xmm6 + xorl %ebx,%ecx + xorl %edi,%ebx + addl 8(%esp),%edx + vpsrlq $19,%xmm7,%xmm7 + shrdl $11,%ecx,%ecx + andl %ebx,%eax + xorl %esi,%ecx + vpxor %xmm7,%xmm6,%xmm6 + addl 84(%esp),%edx + xorl %edi,%eax + shrdl $2,%ecx,%ecx + vpshufd $132,%xmm6,%xmm7 + addl %edx,%eax + addl 24(%esp),%edx + addl %ecx,%eax + vpsrldq $8,%xmm7,%xmm7 + movl %edx,%ecx + shrdl $14,%edx,%edx + movl 28(%esp),%esi + vpaddd %xmm7,%xmm3,%xmm3 + xorl %ecx,%edx + movl (%esp),%edi + xorl %edi,%esi + vpshufd $80,%xmm3,%xmm7 + shrdl $5,%edx,%edx + andl %ecx,%esi + movl %ecx,24(%esp) + vpsrld $10,%xmm7,%xmm6 + xorl %ecx,%edx + xorl %esi,%edi + shrdl $6,%edx,%edx + vpsrlq $17,%xmm7,%xmm5 + movl %eax,%ecx + addl %edi,%edx + movl 12(%esp),%edi + vpxor %xmm5,%xmm6,%xmm6 + movl %eax,%esi + shrdl $9,%ecx,%ecx + movl %eax,8(%esp) + vpsrlq $19,%xmm7,%xmm7 + xorl %eax,%ecx + xorl %edi,%eax + addl 4(%esp),%edx + vpxor %xmm7,%xmm6,%xmm6 + shrdl $11,%ecx,%ecx + andl %eax,%ebx + xorl %esi,%ecx + vpshufd $232,%xmm6,%xmm7 + addl 88(%esp),%edx + xorl %edi,%ebx + shrdl $2,%ecx,%ecx + vpslldq $8,%xmm7,%xmm7 + addl %edx,%ebx + addl 20(%esp),%edx + addl %ecx,%ebx + vpaddd %xmm7,%xmm3,%xmm3 + movl %edx,%ecx + shrdl $14,%edx,%edx + movl 24(%esp),%esi + vpaddd 48(%ebp),%xmm3,%xmm6 + xorl %ecx,%edx + movl 28(%esp),%edi + xorl %edi,%esi + shrdl $5,%edx,%edx + andl %ecx,%esi + movl %ecx,20(%esp) + xorl %ecx,%edx + xorl %esi,%edi + shrdl $6,%edx,%edx + movl %ebx,%ecx + addl %edi,%edx + movl 8(%esp),%edi + movl %ebx,%esi + shrdl $9,%ecx,%ecx + movl %ebx,4(%esp) + xorl %ebx,%ecx + xorl %edi,%ebx + addl (%esp),%edx + shrdl $11,%ecx,%ecx + andl %ebx,%eax + xorl %esi,%ecx + addl 92(%esp),%edx + xorl %edi,%eax + shrdl $2,%ecx,%ecx + addl %edx,%eax + addl 16(%esp),%edx + addl %ecx,%eax + vmovdqa %xmm6,80(%esp) + cmpl $66051,64(%ebp) + jne L016avx_00_47 + movl %edx,%ecx + shrdl $14,%edx,%edx + movl 20(%esp),%esi + xorl %ecx,%edx + movl 24(%esp),%edi + xorl %edi,%esi + shrdl $5,%edx,%edx + andl %ecx,%esi + movl %ecx,16(%esp) + xorl %ecx,%edx + xorl %esi,%edi + shrdl $6,%edx,%edx + movl %eax,%ecx + addl %edi,%edx + movl 4(%esp),%edi + movl %eax,%esi + shrdl $9,%ecx,%ecx + movl %eax,(%esp) + xorl %eax,%ecx + xorl %edi,%eax + addl 28(%esp),%edx + shrdl $11,%ecx,%ecx + andl %eax,%ebx + xorl %esi,%ecx + addl 32(%esp),%edx + xorl %edi,%ebx + shrdl $2,%ecx,%ecx + addl %edx,%ebx + addl 12(%esp),%edx + addl %ecx,%ebx + movl %edx,%ecx + shrdl $14,%edx,%edx + movl 16(%esp),%esi + xorl %ecx,%edx + movl 20(%esp),%edi + xorl %edi,%esi + shrdl $5,%edx,%edx + andl %ecx,%esi + movl %ecx,12(%esp) + xorl %ecx,%edx + xorl %esi,%edi + shrdl $6,%edx,%edx + movl %ebx,%ecx + addl %edi,%edx + movl (%esp),%edi + movl %ebx,%esi + shrdl $9,%ecx,%ecx + movl %ebx,28(%esp) + xorl %ebx,%ecx + xorl %edi,%ebx + addl 24(%esp),%edx + shrdl $11,%ecx,%ecx + andl %ebx,%eax + xorl %esi,%ecx + addl 36(%esp),%edx + xorl %edi,%eax + shrdl $2,%ecx,%ecx + addl %edx,%eax + addl 8(%esp),%edx + addl %ecx,%eax + movl %edx,%ecx + shrdl $14,%edx,%edx + movl 12(%esp),%esi + xorl %ecx,%edx + movl 16(%esp),%edi + xorl %edi,%esi + shrdl $5,%edx,%edx + andl %ecx,%esi + movl %ecx,8(%esp) + xorl %ecx,%edx + xorl %esi,%edi + shrdl $6,%edx,%edx + movl %eax,%ecx + addl %edi,%edx + movl 28(%esp),%edi + movl %eax,%esi + shrdl $9,%ecx,%ecx + movl %eax,24(%esp) + xorl %eax,%ecx + xorl %edi,%eax + addl 20(%esp),%edx + shrdl $11,%ecx,%ecx + andl %eax,%ebx + xorl %esi,%ecx + addl 40(%esp),%edx + xorl %edi,%ebx + shrdl $2,%ecx,%ecx + addl %edx,%ebx + addl 4(%esp),%edx + addl %ecx,%ebx + movl %edx,%ecx + shrdl $14,%edx,%edx + movl 8(%esp),%esi + xorl %ecx,%edx + movl 12(%esp),%edi + xorl %edi,%esi + shrdl $5,%edx,%edx + andl %ecx,%esi + movl %ecx,4(%esp) + xorl %ecx,%edx + xorl %esi,%edi + shrdl $6,%edx,%edx + movl %ebx,%ecx + addl %edi,%edx + movl 24(%esp),%edi + movl %ebx,%esi + shrdl $9,%ecx,%ecx + movl %ebx,20(%esp) + xorl %ebx,%ecx + xorl %edi,%ebx + addl 16(%esp),%edx + shrdl $11,%ecx,%ecx + andl %ebx,%eax + xorl %esi,%ecx + addl 44(%esp),%edx + xorl %edi,%eax + shrdl $2,%ecx,%ecx + addl %edx,%eax + addl (%esp),%edx + addl %ecx,%eax + movl %edx,%ecx + shrdl $14,%edx,%edx + movl 4(%esp),%esi + xorl %ecx,%edx + movl 8(%esp),%edi + xorl %edi,%esi + shrdl $5,%edx,%edx + andl %ecx,%esi + movl %ecx,(%esp) + xorl %ecx,%edx + xorl %esi,%edi + shrdl $6,%edx,%edx + movl %eax,%ecx + addl %edi,%edx + movl 20(%esp),%edi + movl %eax,%esi + shrdl $9,%ecx,%ecx + movl %eax,16(%esp) + xorl %eax,%ecx + xorl %edi,%eax + addl 12(%esp),%edx + shrdl $11,%ecx,%ecx + andl %eax,%ebx + xorl %esi,%ecx + addl 48(%esp),%edx + xorl %edi,%ebx + shrdl $2,%ecx,%ecx + addl %edx,%ebx + addl 28(%esp),%edx + addl %ecx,%ebx + movl %edx,%ecx + shrdl $14,%edx,%edx + movl (%esp),%esi + xorl %ecx,%edx + movl 4(%esp),%edi + xorl %edi,%esi + shrdl $5,%edx,%edx + andl %ecx,%esi + movl %ecx,28(%esp) + xorl %ecx,%edx + xorl %esi,%edi + shrdl $6,%edx,%edx + movl %ebx,%ecx + addl %edi,%edx + movl 16(%esp),%edi + movl %ebx,%esi + shrdl $9,%ecx,%ecx + movl %ebx,12(%esp) + xorl %ebx,%ecx + xorl %edi,%ebx + addl 8(%esp),%edx + shrdl $11,%ecx,%ecx + andl %ebx,%eax + xorl %esi,%ecx + addl 52(%esp),%edx + xorl %edi,%eax + shrdl $2,%ecx,%ecx + addl %edx,%eax + addl 24(%esp),%edx + addl %ecx,%eax + movl %edx,%ecx + shrdl $14,%edx,%edx + movl 28(%esp),%esi + xorl %ecx,%edx + movl (%esp),%edi + xorl %edi,%esi + shrdl $5,%edx,%edx + andl %ecx,%esi + movl %ecx,24(%esp) + xorl %ecx,%edx + xorl %esi,%edi + shrdl $6,%edx,%edx + movl %eax,%ecx + addl %edi,%edx + movl 12(%esp),%edi + movl %eax,%esi + shrdl $9,%ecx,%ecx + movl %eax,8(%esp) + xorl %eax,%ecx + xorl %edi,%eax + addl 4(%esp),%edx + shrdl $11,%ecx,%ecx + andl %eax,%ebx + xorl %esi,%ecx + addl 56(%esp),%edx + xorl %edi,%ebx + shrdl $2,%ecx,%ecx + addl %edx,%ebx + addl 20(%esp),%edx + addl %ecx,%ebx + movl %edx,%ecx + shrdl $14,%edx,%edx + movl 24(%esp),%esi + xorl %ecx,%edx + movl 28(%esp),%edi + xorl %edi,%esi + shrdl $5,%edx,%edx + andl %ecx,%esi + movl %ecx,20(%esp) + xorl %ecx,%edx + xorl %esi,%edi + shrdl $6,%edx,%edx + movl %ebx,%ecx + addl %edi,%edx + movl 8(%esp),%edi + movl %ebx,%esi + shrdl $9,%ecx,%ecx + movl %ebx,4(%esp) + xorl %ebx,%ecx + xorl %edi,%ebx + addl (%esp),%edx + shrdl $11,%ecx,%ecx + andl %ebx,%eax + xorl %esi,%ecx + addl 60(%esp),%edx + xorl %edi,%eax + shrdl $2,%ecx,%ecx + addl %edx,%eax + addl 16(%esp),%edx + addl %ecx,%eax + movl %edx,%ecx + shrdl $14,%edx,%edx + movl 20(%esp),%esi + xorl %ecx,%edx + movl 24(%esp),%edi + xorl %edi,%esi + shrdl $5,%edx,%edx + andl %ecx,%esi + movl %ecx,16(%esp) + xorl %ecx,%edx + xorl %esi,%edi + shrdl $6,%edx,%edx + movl %eax,%ecx + addl %edi,%edx + movl 4(%esp),%edi + movl %eax,%esi + shrdl $9,%ecx,%ecx + movl %eax,(%esp) + xorl %eax,%ecx + xorl %edi,%eax + addl 28(%esp),%edx + shrdl $11,%ecx,%ecx + andl %eax,%ebx + xorl %esi,%ecx + addl 64(%esp),%edx + xorl %edi,%ebx + shrdl $2,%ecx,%ecx + addl %edx,%ebx + addl 12(%esp),%edx + addl %ecx,%ebx + movl %edx,%ecx + shrdl $14,%edx,%edx + movl 16(%esp),%esi + xorl %ecx,%edx + movl 20(%esp),%edi + xorl %edi,%esi + shrdl $5,%edx,%edx + andl %ecx,%esi + movl %ecx,12(%esp) + xorl %ecx,%edx + xorl %esi,%edi + shrdl $6,%edx,%edx + movl %ebx,%ecx + addl %edi,%edx + movl (%esp),%edi + movl %ebx,%esi + shrdl $9,%ecx,%ecx + movl %ebx,28(%esp) + xorl %ebx,%ecx + xorl %edi,%ebx + addl 24(%esp),%edx + shrdl $11,%ecx,%ecx + andl %ebx,%eax + xorl %esi,%ecx + addl 68(%esp),%edx + xorl %edi,%eax + shrdl $2,%ecx,%ecx + addl %edx,%eax + addl 8(%esp),%edx + addl %ecx,%eax + movl %edx,%ecx + shrdl $14,%edx,%edx + movl 12(%esp),%esi + xorl %ecx,%edx + movl 16(%esp),%edi + xorl %edi,%esi + shrdl $5,%edx,%edx + andl %ecx,%esi + movl %ecx,8(%esp) + xorl %ecx,%edx + xorl %esi,%edi + shrdl $6,%edx,%edx + movl %eax,%ecx + addl %edi,%edx + movl 28(%esp),%edi + movl %eax,%esi + shrdl $9,%ecx,%ecx + movl %eax,24(%esp) + xorl %eax,%ecx + xorl %edi,%eax + addl 20(%esp),%edx + shrdl $11,%ecx,%ecx + andl %eax,%ebx + xorl %esi,%ecx + addl 72(%esp),%edx + xorl %edi,%ebx + shrdl $2,%ecx,%ecx + addl %edx,%ebx + addl 4(%esp),%edx + addl %ecx,%ebx + movl %edx,%ecx + shrdl $14,%edx,%edx + movl 8(%esp),%esi + xorl %ecx,%edx + movl 12(%esp),%edi + xorl %edi,%esi + shrdl $5,%edx,%edx + andl %ecx,%esi + movl %ecx,4(%esp) + xorl %ecx,%edx + xorl %esi,%edi + shrdl $6,%edx,%edx + movl %ebx,%ecx + addl %edi,%edx + movl 24(%esp),%edi + movl %ebx,%esi + shrdl $9,%ecx,%ecx + movl %ebx,20(%esp) + xorl %ebx,%ecx + xorl %edi,%ebx + addl 16(%esp),%edx + shrdl $11,%ecx,%ecx + andl %ebx,%eax + xorl %esi,%ecx + addl 76(%esp),%edx + xorl %edi,%eax + shrdl $2,%ecx,%ecx + addl %edx,%eax + addl (%esp),%edx + addl %ecx,%eax + movl %edx,%ecx + shrdl $14,%edx,%edx + movl 4(%esp),%esi + xorl %ecx,%edx + movl 8(%esp),%edi + xorl %edi,%esi + shrdl $5,%edx,%edx + andl %ecx,%esi + movl %ecx,(%esp) + xorl %ecx,%edx + xorl %esi,%edi + shrdl $6,%edx,%edx + movl %eax,%ecx + addl %edi,%edx + movl 20(%esp),%edi + movl %eax,%esi + shrdl $9,%ecx,%ecx + movl %eax,16(%esp) + xorl %eax,%ecx + xorl %edi,%eax + addl 12(%esp),%edx + shrdl $11,%ecx,%ecx + andl %eax,%ebx + xorl %esi,%ecx + addl 80(%esp),%edx + xorl %edi,%ebx + shrdl $2,%ecx,%ecx + addl %edx,%ebx + addl 28(%esp),%edx + addl %ecx,%ebx + movl %edx,%ecx + shrdl $14,%edx,%edx + movl (%esp),%esi + xorl %ecx,%edx + movl 4(%esp),%edi + xorl %edi,%esi + shrdl $5,%edx,%edx + andl %ecx,%esi + movl %ecx,28(%esp) + xorl %ecx,%edx + xorl %esi,%edi + shrdl $6,%edx,%edx + movl %ebx,%ecx + addl %edi,%edx + movl 16(%esp),%edi + movl %ebx,%esi + shrdl $9,%ecx,%ecx + movl %ebx,12(%esp) + xorl %ebx,%ecx + xorl %edi,%ebx + addl 8(%esp),%edx + shrdl $11,%ecx,%ecx + andl %ebx,%eax + xorl %esi,%ecx + addl 84(%esp),%edx + xorl %edi,%eax + shrdl $2,%ecx,%ecx + addl %edx,%eax + addl 24(%esp),%edx + addl %ecx,%eax + movl %edx,%ecx + shrdl $14,%edx,%edx + movl 28(%esp),%esi + xorl %ecx,%edx + movl (%esp),%edi + xorl %edi,%esi + shrdl $5,%edx,%edx + andl %ecx,%esi + movl %ecx,24(%esp) + xorl %ecx,%edx + xorl %esi,%edi + shrdl $6,%edx,%edx + movl %eax,%ecx + addl %edi,%edx + movl 12(%esp),%edi + movl %eax,%esi + shrdl $9,%ecx,%ecx + movl %eax,8(%esp) + xorl %eax,%ecx + xorl %edi,%eax + addl 4(%esp),%edx + shrdl $11,%ecx,%ecx + andl %eax,%ebx + xorl %esi,%ecx + addl 88(%esp),%edx + xorl %edi,%ebx + shrdl $2,%ecx,%ecx + addl %edx,%ebx + addl 20(%esp),%edx + addl %ecx,%ebx + movl %edx,%ecx + shrdl $14,%edx,%edx + movl 24(%esp),%esi + xorl %ecx,%edx + movl 28(%esp),%edi + xorl %edi,%esi + shrdl $5,%edx,%edx + andl %ecx,%esi + movl %ecx,20(%esp) + xorl %ecx,%edx + xorl %esi,%edi + shrdl $6,%edx,%edx + movl %ebx,%ecx + addl %edi,%edx + movl 8(%esp),%edi + movl %ebx,%esi + shrdl $9,%ecx,%ecx + movl %ebx,4(%esp) + xorl %ebx,%ecx + xorl %edi,%ebx + addl (%esp),%edx + shrdl $11,%ecx,%ecx + andl %ebx,%eax + xorl %esi,%ecx + addl 92(%esp),%edx + xorl %edi,%eax + shrdl $2,%ecx,%ecx + addl %edx,%eax + addl 16(%esp),%edx + addl %ecx,%eax + movl 96(%esp),%esi + xorl %edi,%ebx + movl 12(%esp),%ecx + addl (%esi),%eax + addl 4(%esi),%ebx + addl 8(%esi),%edi + addl 12(%esi),%ecx + movl %eax,(%esi) + movl %ebx,4(%esi) + movl %edi,8(%esi) + movl %ecx,12(%esi) + movl %ebx,4(%esp) + xorl %edi,%ebx + movl %edi,8(%esp) + movl %ecx,12(%esp) + movl 20(%esp),%edi + movl 24(%esp),%ecx + addl 16(%esi),%edx + addl 20(%esi),%edi + addl 24(%esi),%ecx + movl %edx,16(%esi) + movl %edi,20(%esi) + movl %edi,20(%esp) + movl 28(%esp),%edi + movl %ecx,24(%esi) + addl 28(%esi),%edi + movl %ecx,24(%esp) + movl %edi,28(%esi) + movl %edi,28(%esp) + movl 100(%esp),%edi + vmovdqa 64(%ebp),%xmm7 + subl $192,%ebp + cmpl 104(%esp),%edi + jb L015grand_avx + movl 108(%esp),%esp + vzeroall + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.align 5,0x90 +L014AVX_BMI: + leal -96(%esp),%esp + vzeroall + movl (%esi),%eax + movl 4(%esi),%ebx + movl 8(%esi),%ecx + movl 12(%esi),%edi + movl %ebx,4(%esp) + xorl %ecx,%ebx + movl %ecx,8(%esp) + movl %edi,12(%esp) + movl 16(%esi),%edx + movl 20(%esi),%edi + movl 24(%esi),%ecx + movl 28(%esi),%esi + movl %edi,20(%esp) + movl 100(%esp),%edi + movl %ecx,24(%esp) + movl %esi,28(%esp) + vmovdqa 256(%ebp),%xmm7 + jmp L017grand_avx_bmi +.align 5,0x90 +L017grand_avx_bmi: + vmovdqu (%edi),%xmm0 + vmovdqu 16(%edi),%xmm1 + vmovdqu 32(%edi),%xmm2 + vmovdqu 48(%edi),%xmm3 + addl $64,%edi + vpshufb %xmm7,%xmm0,%xmm0 + movl %edi,100(%esp) + vpshufb %xmm7,%xmm1,%xmm1 + vpshufb %xmm7,%xmm2,%xmm2 + vpaddd (%ebp),%xmm0,%xmm4 + vpshufb %xmm7,%xmm3,%xmm3 + vpaddd 16(%ebp),%xmm1,%xmm5 + vpaddd 32(%ebp),%xmm2,%xmm6 + vpaddd 48(%ebp),%xmm3,%xmm7 + vmovdqa %xmm4,32(%esp) + vmovdqa %xmm5,48(%esp) + vmovdqa %xmm6,64(%esp) + vmovdqa %xmm7,80(%esp) + jmp L018avx_bmi_00_47 +.align 4,0x90 +L018avx_bmi_00_47: + addl $64,%ebp + vpalignr $4,%xmm0,%xmm1,%xmm4 + rorxl $6,%edx,%ecx + rorxl $11,%edx,%esi + movl %edx,16(%esp) + vpalignr $4,%xmm2,%xmm3,%xmm7 + rorxl $25,%edx,%edi + xorl %esi,%ecx + andnl 24(%esp),%edx,%esi + vpsrld $7,%xmm4,%xmm6 + xorl %edi,%ecx + andl 20(%esp),%edx + movl %eax,(%esp) + vpaddd %xmm7,%xmm0,%xmm0 + orl %esi,%edx + rorxl $2,%eax,%edi + rorxl $13,%eax,%esi + vpsrld $3,%xmm4,%xmm7 + leal (%edx,%ecx,1),%edx + rorxl $22,%eax,%ecx + xorl %edi,%esi + vpslld $14,%xmm4,%xmm5 + movl 4(%esp),%edi + xorl %esi,%ecx + xorl %edi,%eax + vpxor %xmm6,%xmm7,%xmm4 + addl 28(%esp),%edx + andl %eax,%ebx + addl 32(%esp),%edx + vpshufd $250,%xmm3,%xmm7 + xorl %edi,%ebx + addl %edx,%ecx + addl 12(%esp),%edx + vpsrld $11,%xmm6,%xmm6 + leal (%ebx,%ecx,1),%ebx + rorxl $6,%edx,%ecx + rorxl $11,%edx,%esi + vpxor %xmm5,%xmm4,%xmm4 + movl %edx,12(%esp) + rorxl $25,%edx,%edi + xorl %esi,%ecx + vpslld $11,%xmm5,%xmm5 + andnl 20(%esp),%edx,%esi + xorl %edi,%ecx + andl 16(%esp),%edx + vpxor %xmm6,%xmm4,%xmm4 + movl %ebx,28(%esp) + orl %esi,%edx + rorxl $2,%ebx,%edi + rorxl $13,%ebx,%esi + vpsrld $10,%xmm7,%xmm6 + leal (%edx,%ecx,1),%edx + rorxl $22,%ebx,%ecx + xorl %edi,%esi + vpxor %xmm5,%xmm4,%xmm4 + movl (%esp),%edi + xorl %esi,%ecx + xorl %edi,%ebx + vpsrlq $17,%xmm7,%xmm5 + addl 24(%esp),%edx + andl %ebx,%eax + addl 36(%esp),%edx + vpaddd %xmm4,%xmm0,%xmm0 + xorl %edi,%eax + addl %edx,%ecx + addl 8(%esp),%edx + vpxor %xmm5,%xmm6,%xmm6 + leal (%eax,%ecx,1),%eax + rorxl $6,%edx,%ecx + rorxl $11,%edx,%esi + vpsrlq $19,%xmm7,%xmm7 + movl %edx,8(%esp) + rorxl $25,%edx,%edi + xorl %esi,%ecx + vpxor %xmm7,%xmm6,%xmm6 + andnl 16(%esp),%edx,%esi + xorl %edi,%ecx + andl 12(%esp),%edx + vpshufd $132,%xmm6,%xmm7 + movl %eax,24(%esp) + orl %esi,%edx + rorxl $2,%eax,%edi + rorxl $13,%eax,%esi + vpsrldq $8,%xmm7,%xmm7 + leal (%edx,%ecx,1),%edx + rorxl $22,%eax,%ecx + xorl %edi,%esi + vpaddd %xmm7,%xmm0,%xmm0 + movl 28(%esp),%edi + xorl %esi,%ecx + xorl %edi,%eax + vpshufd $80,%xmm0,%xmm7 + addl 20(%esp),%edx + andl %eax,%ebx + addl 40(%esp),%edx + vpsrld $10,%xmm7,%xmm6 + xorl %edi,%ebx + addl %edx,%ecx + addl 4(%esp),%edx + vpsrlq $17,%xmm7,%xmm5 + leal (%ebx,%ecx,1),%ebx + rorxl $6,%edx,%ecx + rorxl $11,%edx,%esi + vpxor %xmm5,%xmm6,%xmm6 + movl %edx,4(%esp) + rorxl $25,%edx,%edi + xorl %esi,%ecx + vpsrlq $19,%xmm7,%xmm7 + andnl 12(%esp),%edx,%esi + xorl %edi,%ecx + andl 8(%esp),%edx + vpxor %xmm7,%xmm6,%xmm6 + movl %ebx,20(%esp) + orl %esi,%edx + rorxl $2,%ebx,%edi + rorxl $13,%ebx,%esi + vpshufd $232,%xmm6,%xmm7 + leal (%edx,%ecx,1),%edx + rorxl $22,%ebx,%ecx + xorl %edi,%esi + vpslldq $8,%xmm7,%xmm7 + movl 24(%esp),%edi + xorl %esi,%ecx + xorl %edi,%ebx + vpaddd %xmm7,%xmm0,%xmm0 + addl 16(%esp),%edx + andl %ebx,%eax + addl 44(%esp),%edx + vpaddd (%ebp),%xmm0,%xmm6 + xorl %edi,%eax + addl %edx,%ecx + addl (%esp),%edx + leal (%eax,%ecx,1),%eax + vmovdqa %xmm6,32(%esp) + vpalignr $4,%xmm1,%xmm2,%xmm4 + rorxl $6,%edx,%ecx + rorxl $11,%edx,%esi + movl %edx,(%esp) + vpalignr $4,%xmm3,%xmm0,%xmm7 + rorxl $25,%edx,%edi + xorl %esi,%ecx + andnl 8(%esp),%edx,%esi + vpsrld $7,%xmm4,%xmm6 + xorl %edi,%ecx + andl 4(%esp),%edx + movl %eax,16(%esp) + vpaddd %xmm7,%xmm1,%xmm1 + orl %esi,%edx + rorxl $2,%eax,%edi + rorxl $13,%eax,%esi + vpsrld $3,%xmm4,%xmm7 + leal (%edx,%ecx,1),%edx + rorxl $22,%eax,%ecx + xorl %edi,%esi + vpslld $14,%xmm4,%xmm5 + movl 20(%esp),%edi + xorl %esi,%ecx + xorl %edi,%eax + vpxor %xmm6,%xmm7,%xmm4 + addl 12(%esp),%edx + andl %eax,%ebx + addl 48(%esp),%edx + vpshufd $250,%xmm0,%xmm7 + xorl %edi,%ebx + addl %edx,%ecx + addl 28(%esp),%edx + vpsrld $11,%xmm6,%xmm6 + leal (%ebx,%ecx,1),%ebx + rorxl $6,%edx,%ecx + rorxl $11,%edx,%esi + vpxor %xmm5,%xmm4,%xmm4 + movl %edx,28(%esp) + rorxl $25,%edx,%edi + xorl %esi,%ecx + vpslld $11,%xmm5,%xmm5 + andnl 4(%esp),%edx,%esi + xorl %edi,%ecx + andl (%esp),%edx + vpxor %xmm6,%xmm4,%xmm4 + movl %ebx,12(%esp) + orl %esi,%edx + rorxl $2,%ebx,%edi + rorxl $13,%ebx,%esi + vpsrld $10,%xmm7,%xmm6 + leal (%edx,%ecx,1),%edx + rorxl $22,%ebx,%ecx + xorl %edi,%esi + vpxor %xmm5,%xmm4,%xmm4 + movl 16(%esp),%edi + xorl %esi,%ecx + xorl %edi,%ebx + vpsrlq $17,%xmm7,%xmm5 + addl 8(%esp),%edx + andl %ebx,%eax + addl 52(%esp),%edx + vpaddd %xmm4,%xmm1,%xmm1 + xorl %edi,%eax + addl %edx,%ecx + addl 24(%esp),%edx + vpxor %xmm5,%xmm6,%xmm6 + leal (%eax,%ecx,1),%eax + rorxl $6,%edx,%ecx + rorxl $11,%edx,%esi + vpsrlq $19,%xmm7,%xmm7 + movl %edx,24(%esp) + rorxl $25,%edx,%edi + xorl %esi,%ecx + vpxor %xmm7,%xmm6,%xmm6 + andnl (%esp),%edx,%esi + xorl %edi,%ecx + andl 28(%esp),%edx + vpshufd $132,%xmm6,%xmm7 + movl %eax,8(%esp) + orl %esi,%edx + rorxl $2,%eax,%edi + rorxl $13,%eax,%esi + vpsrldq $8,%xmm7,%xmm7 + leal (%edx,%ecx,1),%edx + rorxl $22,%eax,%ecx + xorl %edi,%esi + vpaddd %xmm7,%xmm1,%xmm1 + movl 12(%esp),%edi + xorl %esi,%ecx + xorl %edi,%eax + vpshufd $80,%xmm1,%xmm7 + addl 4(%esp),%edx + andl %eax,%ebx + addl 56(%esp),%edx + vpsrld $10,%xmm7,%xmm6 + xorl %edi,%ebx + addl %edx,%ecx + addl 20(%esp),%edx + vpsrlq $17,%xmm7,%xmm5 + leal (%ebx,%ecx,1),%ebx + rorxl $6,%edx,%ecx + rorxl $11,%edx,%esi + vpxor %xmm5,%xmm6,%xmm6 + movl %edx,20(%esp) + rorxl $25,%edx,%edi + xorl %esi,%ecx + vpsrlq $19,%xmm7,%xmm7 + andnl 28(%esp),%edx,%esi + xorl %edi,%ecx + andl 24(%esp),%edx + vpxor %xmm7,%xmm6,%xmm6 + movl %ebx,4(%esp) + orl %esi,%edx + rorxl $2,%ebx,%edi + rorxl $13,%ebx,%esi + vpshufd $232,%xmm6,%xmm7 + leal (%edx,%ecx,1),%edx + rorxl $22,%ebx,%ecx + xorl %edi,%esi + vpslldq $8,%xmm7,%xmm7 + movl 8(%esp),%edi + xorl %esi,%ecx + xorl %edi,%ebx + vpaddd %xmm7,%xmm1,%xmm1 + addl (%esp),%edx + andl %ebx,%eax + addl 60(%esp),%edx + vpaddd 16(%ebp),%xmm1,%xmm6 + xorl %edi,%eax + addl %edx,%ecx + addl 16(%esp),%edx + leal (%eax,%ecx,1),%eax + vmovdqa %xmm6,48(%esp) + vpalignr $4,%xmm2,%xmm3,%xmm4 + rorxl $6,%edx,%ecx + rorxl $11,%edx,%esi + movl %edx,16(%esp) + vpalignr $4,%xmm0,%xmm1,%xmm7 + rorxl $25,%edx,%edi + xorl %esi,%ecx + andnl 24(%esp),%edx,%esi + vpsrld $7,%xmm4,%xmm6 + xorl %edi,%ecx + andl 20(%esp),%edx + movl %eax,(%esp) + vpaddd %xmm7,%xmm2,%xmm2 + orl %esi,%edx + rorxl $2,%eax,%edi + rorxl $13,%eax,%esi + vpsrld $3,%xmm4,%xmm7 + leal (%edx,%ecx,1),%edx + rorxl $22,%eax,%ecx + xorl %edi,%esi + vpslld $14,%xmm4,%xmm5 + movl 4(%esp),%edi + xorl %esi,%ecx + xorl %edi,%eax + vpxor %xmm6,%xmm7,%xmm4 + addl 28(%esp),%edx + andl %eax,%ebx + addl 64(%esp),%edx + vpshufd $250,%xmm1,%xmm7 + xorl %edi,%ebx + addl %edx,%ecx + addl 12(%esp),%edx + vpsrld $11,%xmm6,%xmm6 + leal (%ebx,%ecx,1),%ebx + rorxl $6,%edx,%ecx + rorxl $11,%edx,%esi + vpxor %xmm5,%xmm4,%xmm4 + movl %edx,12(%esp) + rorxl $25,%edx,%edi + xorl %esi,%ecx + vpslld $11,%xmm5,%xmm5 + andnl 20(%esp),%edx,%esi + xorl %edi,%ecx + andl 16(%esp),%edx + vpxor %xmm6,%xmm4,%xmm4 + movl %ebx,28(%esp) + orl %esi,%edx + rorxl $2,%ebx,%edi + rorxl $13,%ebx,%esi + vpsrld $10,%xmm7,%xmm6 + leal (%edx,%ecx,1),%edx + rorxl $22,%ebx,%ecx + xorl %edi,%esi + vpxor %xmm5,%xmm4,%xmm4 + movl (%esp),%edi + xorl %esi,%ecx + xorl %edi,%ebx + vpsrlq $17,%xmm7,%xmm5 + addl 24(%esp),%edx + andl %ebx,%eax + addl 68(%esp),%edx + vpaddd %xmm4,%xmm2,%xmm2 + xorl %edi,%eax + addl %edx,%ecx + addl 8(%esp),%edx + vpxor %xmm5,%xmm6,%xmm6 + leal (%eax,%ecx,1),%eax + rorxl $6,%edx,%ecx + rorxl $11,%edx,%esi + vpsrlq $19,%xmm7,%xmm7 + movl %edx,8(%esp) + rorxl $25,%edx,%edi + xorl %esi,%ecx + vpxor %xmm7,%xmm6,%xmm6 + andnl 16(%esp),%edx,%esi + xorl %edi,%ecx + andl 12(%esp),%edx + vpshufd $132,%xmm6,%xmm7 + movl %eax,24(%esp) + orl %esi,%edx + rorxl $2,%eax,%edi + rorxl $13,%eax,%esi + vpsrldq $8,%xmm7,%xmm7 + leal (%edx,%ecx,1),%edx + rorxl $22,%eax,%ecx + xorl %edi,%esi + vpaddd %xmm7,%xmm2,%xmm2 + movl 28(%esp),%edi + xorl %esi,%ecx + xorl %edi,%eax + vpshufd $80,%xmm2,%xmm7 + addl 20(%esp),%edx + andl %eax,%ebx + addl 72(%esp),%edx + vpsrld $10,%xmm7,%xmm6 + xorl %edi,%ebx + addl %edx,%ecx + addl 4(%esp),%edx + vpsrlq $17,%xmm7,%xmm5 + leal (%ebx,%ecx,1),%ebx + rorxl $6,%edx,%ecx + rorxl $11,%edx,%esi + vpxor %xmm5,%xmm6,%xmm6 + movl %edx,4(%esp) + rorxl $25,%edx,%edi + xorl %esi,%ecx + vpsrlq $19,%xmm7,%xmm7 + andnl 12(%esp),%edx,%esi + xorl %edi,%ecx + andl 8(%esp),%edx + vpxor %xmm7,%xmm6,%xmm6 + movl %ebx,20(%esp) + orl %esi,%edx + rorxl $2,%ebx,%edi + rorxl $13,%ebx,%esi + vpshufd $232,%xmm6,%xmm7 + leal (%edx,%ecx,1),%edx + rorxl $22,%ebx,%ecx + xorl %edi,%esi + vpslldq $8,%xmm7,%xmm7 + movl 24(%esp),%edi + xorl %esi,%ecx + xorl %edi,%ebx + vpaddd %xmm7,%xmm2,%xmm2 + addl 16(%esp),%edx + andl %ebx,%eax + addl 76(%esp),%edx + vpaddd 32(%ebp),%xmm2,%xmm6 + xorl %edi,%eax + addl %edx,%ecx + addl (%esp),%edx + leal (%eax,%ecx,1),%eax + vmovdqa %xmm6,64(%esp) + vpalignr $4,%xmm3,%xmm0,%xmm4 + rorxl $6,%edx,%ecx + rorxl $11,%edx,%esi + movl %edx,(%esp) + vpalignr $4,%xmm1,%xmm2,%xmm7 + rorxl $25,%edx,%edi + xorl %esi,%ecx + andnl 8(%esp),%edx,%esi + vpsrld $7,%xmm4,%xmm6 + xorl %edi,%ecx + andl 4(%esp),%edx + movl %eax,16(%esp) + vpaddd %xmm7,%xmm3,%xmm3 + orl %esi,%edx + rorxl $2,%eax,%edi + rorxl $13,%eax,%esi + vpsrld $3,%xmm4,%xmm7 + leal (%edx,%ecx,1),%edx + rorxl $22,%eax,%ecx + xorl %edi,%esi + vpslld $14,%xmm4,%xmm5 + movl 20(%esp),%edi + xorl %esi,%ecx + xorl %edi,%eax + vpxor %xmm6,%xmm7,%xmm4 + addl 12(%esp),%edx + andl %eax,%ebx + addl 80(%esp),%edx + vpshufd $250,%xmm2,%xmm7 + xorl %edi,%ebx + addl %edx,%ecx + addl 28(%esp),%edx + vpsrld $11,%xmm6,%xmm6 + leal (%ebx,%ecx,1),%ebx + rorxl $6,%edx,%ecx + rorxl $11,%edx,%esi + vpxor %xmm5,%xmm4,%xmm4 + movl %edx,28(%esp) + rorxl $25,%edx,%edi + xorl %esi,%ecx + vpslld $11,%xmm5,%xmm5 + andnl 4(%esp),%edx,%esi + xorl %edi,%ecx + andl (%esp),%edx + vpxor %xmm6,%xmm4,%xmm4 + movl %ebx,12(%esp) + orl %esi,%edx + rorxl $2,%ebx,%edi + rorxl $13,%ebx,%esi + vpsrld $10,%xmm7,%xmm6 + leal (%edx,%ecx,1),%edx + rorxl $22,%ebx,%ecx + xorl %edi,%esi + vpxor %xmm5,%xmm4,%xmm4 + movl 16(%esp),%edi + xorl %esi,%ecx + xorl %edi,%ebx + vpsrlq $17,%xmm7,%xmm5 + addl 8(%esp),%edx + andl %ebx,%eax + addl 84(%esp),%edx + vpaddd %xmm4,%xmm3,%xmm3 + xorl %edi,%eax + addl %edx,%ecx + addl 24(%esp),%edx + vpxor %xmm5,%xmm6,%xmm6 + leal (%eax,%ecx,1),%eax + rorxl $6,%edx,%ecx + rorxl $11,%edx,%esi + vpsrlq $19,%xmm7,%xmm7 + movl %edx,24(%esp) + rorxl $25,%edx,%edi + xorl %esi,%ecx + vpxor %xmm7,%xmm6,%xmm6 + andnl (%esp),%edx,%esi + xorl %edi,%ecx + andl 28(%esp),%edx + vpshufd $132,%xmm6,%xmm7 + movl %eax,8(%esp) + orl %esi,%edx + rorxl $2,%eax,%edi + rorxl $13,%eax,%esi + vpsrldq $8,%xmm7,%xmm7 + leal (%edx,%ecx,1),%edx + rorxl $22,%eax,%ecx + xorl %edi,%esi + vpaddd %xmm7,%xmm3,%xmm3 + movl 12(%esp),%edi + xorl %esi,%ecx + xorl %edi,%eax + vpshufd $80,%xmm3,%xmm7 + addl 4(%esp),%edx + andl %eax,%ebx + addl 88(%esp),%edx + vpsrld $10,%xmm7,%xmm6 + xorl %edi,%ebx + addl %edx,%ecx + addl 20(%esp),%edx + vpsrlq $17,%xmm7,%xmm5 + leal (%ebx,%ecx,1),%ebx + rorxl $6,%edx,%ecx + rorxl $11,%edx,%esi + vpxor %xmm5,%xmm6,%xmm6 + movl %edx,20(%esp) + rorxl $25,%edx,%edi + xorl %esi,%ecx + vpsrlq $19,%xmm7,%xmm7 + andnl 28(%esp),%edx,%esi + xorl %edi,%ecx + andl 24(%esp),%edx + vpxor %xmm7,%xmm6,%xmm6 + movl %ebx,4(%esp) + orl %esi,%edx + rorxl $2,%ebx,%edi + rorxl $13,%ebx,%esi + vpshufd $232,%xmm6,%xmm7 + leal (%edx,%ecx,1),%edx + rorxl $22,%ebx,%ecx + xorl %edi,%esi + vpslldq $8,%xmm7,%xmm7 + movl 8(%esp),%edi + xorl %esi,%ecx + xorl %edi,%ebx + vpaddd %xmm7,%xmm3,%xmm3 + addl (%esp),%edx + andl %ebx,%eax + addl 92(%esp),%edx + vpaddd 48(%ebp),%xmm3,%xmm6 + xorl %edi,%eax + addl %edx,%ecx + addl 16(%esp),%edx + leal (%eax,%ecx,1),%eax + vmovdqa %xmm6,80(%esp) + cmpl $66051,64(%ebp) + jne L018avx_bmi_00_47 + rorxl $6,%edx,%ecx + rorxl $11,%edx,%esi + movl %edx,16(%esp) + rorxl $25,%edx,%edi + xorl %esi,%ecx + andnl 24(%esp),%edx,%esi + xorl %edi,%ecx + andl 20(%esp),%edx + movl %eax,(%esp) + orl %esi,%edx + rorxl $2,%eax,%edi + rorxl $13,%eax,%esi + leal (%edx,%ecx,1),%edx + rorxl $22,%eax,%ecx + xorl %edi,%esi + movl 4(%esp),%edi + xorl %esi,%ecx + xorl %edi,%eax + addl 28(%esp),%edx + andl %eax,%ebx + addl 32(%esp),%edx + xorl %edi,%ebx + addl %edx,%ecx + addl 12(%esp),%edx + leal (%ebx,%ecx,1),%ebx + rorxl $6,%edx,%ecx + rorxl $11,%edx,%esi + movl %edx,12(%esp) + rorxl $25,%edx,%edi + xorl %esi,%ecx + andnl 20(%esp),%edx,%esi + xorl %edi,%ecx + andl 16(%esp),%edx + movl %ebx,28(%esp) + orl %esi,%edx + rorxl $2,%ebx,%edi + rorxl $13,%ebx,%esi + leal (%edx,%ecx,1),%edx + rorxl $22,%ebx,%ecx + xorl %edi,%esi + movl (%esp),%edi + xorl %esi,%ecx + xorl %edi,%ebx + addl 24(%esp),%edx + andl %ebx,%eax + addl 36(%esp),%edx + xorl %edi,%eax + addl %edx,%ecx + addl 8(%esp),%edx + leal (%eax,%ecx,1),%eax + rorxl $6,%edx,%ecx + rorxl $11,%edx,%esi + movl %edx,8(%esp) + rorxl $25,%edx,%edi + xorl %esi,%ecx + andnl 16(%esp),%edx,%esi + xorl %edi,%ecx + andl 12(%esp),%edx + movl %eax,24(%esp) + orl %esi,%edx + rorxl $2,%eax,%edi + rorxl $13,%eax,%esi + leal (%edx,%ecx,1),%edx + rorxl $22,%eax,%ecx + xorl %edi,%esi + movl 28(%esp),%edi + xorl %esi,%ecx + xorl %edi,%eax + addl 20(%esp),%edx + andl %eax,%ebx + addl 40(%esp),%edx + xorl %edi,%ebx + addl %edx,%ecx + addl 4(%esp),%edx + leal (%ebx,%ecx,1),%ebx + rorxl $6,%edx,%ecx + rorxl $11,%edx,%esi + movl %edx,4(%esp) + rorxl $25,%edx,%edi + xorl %esi,%ecx + andnl 12(%esp),%edx,%esi + xorl %edi,%ecx + andl 8(%esp),%edx + movl %ebx,20(%esp) + orl %esi,%edx + rorxl $2,%ebx,%edi + rorxl $13,%ebx,%esi + leal (%edx,%ecx,1),%edx + rorxl $22,%ebx,%ecx + xorl %edi,%esi + movl 24(%esp),%edi + xorl %esi,%ecx + xorl %edi,%ebx + addl 16(%esp),%edx + andl %ebx,%eax + addl 44(%esp),%edx + xorl %edi,%eax + addl %edx,%ecx + addl (%esp),%edx + leal (%eax,%ecx,1),%eax + rorxl $6,%edx,%ecx + rorxl $11,%edx,%esi + movl %edx,(%esp) + rorxl $25,%edx,%edi + xorl %esi,%ecx + andnl 8(%esp),%edx,%esi + xorl %edi,%ecx + andl 4(%esp),%edx + movl %eax,16(%esp) + orl %esi,%edx + rorxl $2,%eax,%edi + rorxl $13,%eax,%esi + leal (%edx,%ecx,1),%edx + rorxl $22,%eax,%ecx + xorl %edi,%esi + movl 20(%esp),%edi + xorl %esi,%ecx + xorl %edi,%eax + addl 12(%esp),%edx + andl %eax,%ebx + addl 48(%esp),%edx + xorl %edi,%ebx + addl %edx,%ecx + addl 28(%esp),%edx + leal (%ebx,%ecx,1),%ebx + rorxl $6,%edx,%ecx + rorxl $11,%edx,%esi + movl %edx,28(%esp) + rorxl $25,%edx,%edi + xorl %esi,%ecx + andnl 4(%esp),%edx,%esi + xorl %edi,%ecx + andl (%esp),%edx + movl %ebx,12(%esp) + orl %esi,%edx + rorxl $2,%ebx,%edi + rorxl $13,%ebx,%esi + leal (%edx,%ecx,1),%edx + rorxl $22,%ebx,%ecx + xorl %edi,%esi + movl 16(%esp),%edi + xorl %esi,%ecx + xorl %edi,%ebx + addl 8(%esp),%edx + andl %ebx,%eax + addl 52(%esp),%edx + xorl %edi,%eax + addl %edx,%ecx + addl 24(%esp),%edx + leal (%eax,%ecx,1),%eax + rorxl $6,%edx,%ecx + rorxl $11,%edx,%esi + movl %edx,24(%esp) + rorxl $25,%edx,%edi + xorl %esi,%ecx + andnl (%esp),%edx,%esi + xorl %edi,%ecx + andl 28(%esp),%edx + movl %eax,8(%esp) + orl %esi,%edx + rorxl $2,%eax,%edi + rorxl $13,%eax,%esi + leal (%edx,%ecx,1),%edx + rorxl $22,%eax,%ecx + xorl %edi,%esi + movl 12(%esp),%edi + xorl %esi,%ecx + xorl %edi,%eax + addl 4(%esp),%edx + andl %eax,%ebx + addl 56(%esp),%edx + xorl %edi,%ebx + addl %edx,%ecx + addl 20(%esp),%edx + leal (%ebx,%ecx,1),%ebx + rorxl $6,%edx,%ecx + rorxl $11,%edx,%esi + movl %edx,20(%esp) + rorxl $25,%edx,%edi + xorl %esi,%ecx + andnl 28(%esp),%edx,%esi + xorl %edi,%ecx + andl 24(%esp),%edx + movl %ebx,4(%esp) + orl %esi,%edx + rorxl $2,%ebx,%edi + rorxl $13,%ebx,%esi + leal (%edx,%ecx,1),%edx + rorxl $22,%ebx,%ecx + xorl %edi,%esi + movl 8(%esp),%edi + xorl %esi,%ecx + xorl %edi,%ebx + addl (%esp),%edx + andl %ebx,%eax + addl 60(%esp),%edx + xorl %edi,%eax + addl %edx,%ecx + addl 16(%esp),%edx + leal (%eax,%ecx,1),%eax + rorxl $6,%edx,%ecx + rorxl $11,%edx,%esi + movl %edx,16(%esp) + rorxl $25,%edx,%edi + xorl %esi,%ecx + andnl 24(%esp),%edx,%esi + xorl %edi,%ecx + andl 20(%esp),%edx + movl %eax,(%esp) + orl %esi,%edx + rorxl $2,%eax,%edi + rorxl $13,%eax,%esi + leal (%edx,%ecx,1),%edx + rorxl $22,%eax,%ecx + xorl %edi,%esi + movl 4(%esp),%edi + xorl %esi,%ecx + xorl %edi,%eax + addl 28(%esp),%edx + andl %eax,%ebx + addl 64(%esp),%edx + xorl %edi,%ebx + addl %edx,%ecx + addl 12(%esp),%edx + leal (%ebx,%ecx,1),%ebx + rorxl $6,%edx,%ecx + rorxl $11,%edx,%esi + movl %edx,12(%esp) + rorxl $25,%edx,%edi + xorl %esi,%ecx + andnl 20(%esp),%edx,%esi + xorl %edi,%ecx + andl 16(%esp),%edx + movl %ebx,28(%esp) + orl %esi,%edx + rorxl $2,%ebx,%edi + rorxl $13,%ebx,%esi + leal (%edx,%ecx,1),%edx + rorxl $22,%ebx,%ecx + xorl %edi,%esi + movl (%esp),%edi + xorl %esi,%ecx + xorl %edi,%ebx + addl 24(%esp),%edx + andl %ebx,%eax + addl 68(%esp),%edx + xorl %edi,%eax + addl %edx,%ecx + addl 8(%esp),%edx + leal (%eax,%ecx,1),%eax + rorxl $6,%edx,%ecx + rorxl $11,%edx,%esi + movl %edx,8(%esp) + rorxl $25,%edx,%edi + xorl %esi,%ecx + andnl 16(%esp),%edx,%esi + xorl %edi,%ecx + andl 12(%esp),%edx + movl %eax,24(%esp) + orl %esi,%edx + rorxl $2,%eax,%edi + rorxl $13,%eax,%esi + leal (%edx,%ecx,1),%edx + rorxl $22,%eax,%ecx + xorl %edi,%esi + movl 28(%esp),%edi + xorl %esi,%ecx + xorl %edi,%eax + addl 20(%esp),%edx + andl %eax,%ebx + addl 72(%esp),%edx + xorl %edi,%ebx + addl %edx,%ecx + addl 4(%esp),%edx + leal (%ebx,%ecx,1),%ebx + rorxl $6,%edx,%ecx + rorxl $11,%edx,%esi + movl %edx,4(%esp) + rorxl $25,%edx,%edi + xorl %esi,%ecx + andnl 12(%esp),%edx,%esi + xorl %edi,%ecx + andl 8(%esp),%edx + movl %ebx,20(%esp) + orl %esi,%edx + rorxl $2,%ebx,%edi + rorxl $13,%ebx,%esi + leal (%edx,%ecx,1),%edx + rorxl $22,%ebx,%ecx + xorl %edi,%esi + movl 24(%esp),%edi + xorl %esi,%ecx + xorl %edi,%ebx + addl 16(%esp),%edx + andl %ebx,%eax + addl 76(%esp),%edx + xorl %edi,%eax + addl %edx,%ecx + addl (%esp),%edx + leal (%eax,%ecx,1),%eax + rorxl $6,%edx,%ecx + rorxl $11,%edx,%esi + movl %edx,(%esp) + rorxl $25,%edx,%edi + xorl %esi,%ecx + andnl 8(%esp),%edx,%esi + xorl %edi,%ecx + andl 4(%esp),%edx + movl %eax,16(%esp) + orl %esi,%edx + rorxl $2,%eax,%edi + rorxl $13,%eax,%esi + leal (%edx,%ecx,1),%edx + rorxl $22,%eax,%ecx + xorl %edi,%esi + movl 20(%esp),%edi + xorl %esi,%ecx + xorl %edi,%eax + addl 12(%esp),%edx + andl %eax,%ebx + addl 80(%esp),%edx + xorl %edi,%ebx + addl %edx,%ecx + addl 28(%esp),%edx + leal (%ebx,%ecx,1),%ebx + rorxl $6,%edx,%ecx + rorxl $11,%edx,%esi + movl %edx,28(%esp) + rorxl $25,%edx,%edi + xorl %esi,%ecx + andnl 4(%esp),%edx,%esi + xorl %edi,%ecx + andl (%esp),%edx + movl %ebx,12(%esp) + orl %esi,%edx + rorxl $2,%ebx,%edi + rorxl $13,%ebx,%esi + leal (%edx,%ecx,1),%edx + rorxl $22,%ebx,%ecx + xorl %edi,%esi + movl 16(%esp),%edi + xorl %esi,%ecx + xorl %edi,%ebx + addl 8(%esp),%edx + andl %ebx,%eax + addl 84(%esp),%edx + xorl %edi,%eax + addl %edx,%ecx + addl 24(%esp),%edx + leal (%eax,%ecx,1),%eax + rorxl $6,%edx,%ecx + rorxl $11,%edx,%esi + movl %edx,24(%esp) + rorxl $25,%edx,%edi + xorl %esi,%ecx + andnl (%esp),%edx,%esi + xorl %edi,%ecx + andl 28(%esp),%edx + movl %eax,8(%esp) + orl %esi,%edx + rorxl $2,%eax,%edi + rorxl $13,%eax,%esi + leal (%edx,%ecx,1),%edx + rorxl $22,%eax,%ecx + xorl %edi,%esi + movl 12(%esp),%edi + xorl %esi,%ecx + xorl %edi,%eax + addl 4(%esp),%edx + andl %eax,%ebx + addl 88(%esp),%edx + xorl %edi,%ebx + addl %edx,%ecx + addl 20(%esp),%edx + leal (%ebx,%ecx,1),%ebx + rorxl $6,%edx,%ecx + rorxl $11,%edx,%esi + movl %edx,20(%esp) + rorxl $25,%edx,%edi + xorl %esi,%ecx + andnl 28(%esp),%edx,%esi + xorl %edi,%ecx + andl 24(%esp),%edx + movl %ebx,4(%esp) + orl %esi,%edx + rorxl $2,%ebx,%edi + rorxl $13,%ebx,%esi + leal (%edx,%ecx,1),%edx + rorxl $22,%ebx,%ecx + xorl %edi,%esi + movl 8(%esp),%edi + xorl %esi,%ecx + xorl %edi,%ebx + addl (%esp),%edx + andl %ebx,%eax + addl 92(%esp),%edx + xorl %edi,%eax + addl %edx,%ecx + addl 16(%esp),%edx + leal (%eax,%ecx,1),%eax + movl 96(%esp),%esi + xorl %edi,%ebx + movl 12(%esp),%ecx + addl (%esi),%eax + addl 4(%esi),%ebx + addl 8(%esi),%edi + addl 12(%esi),%ecx + movl %eax,(%esi) + movl %ebx,4(%esi) + movl %edi,8(%esi) + movl %ecx,12(%esi) + movl %ebx,4(%esp) + xorl %edi,%ebx + movl %edi,8(%esp) + movl %ecx,12(%esp) + movl 20(%esp),%edi + movl 24(%esp),%ecx + addl 16(%esi),%edx + addl 20(%esi),%edi + addl 24(%esi),%ecx + movl %edx,16(%esi) + movl %edi,20(%esi) + movl %edi,20(%esp) + movl 28(%esp),%edi + movl %ecx,24(%esi) + addl 28(%esi),%edi + movl %ecx,24(%esp) + movl %edi,28(%esi) + movl %edi,28(%esp) + movl 100(%esp),%edi + vmovdqa 64(%ebp),%xmm7 + subl $192,%ebp + cmpl 104(%esp),%edi + jb L017grand_avx_bmi + movl 108(%esp),%esp + vzeroall + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.section __IMPORT,__pointers,non_lazy_symbol_pointers +L_OPENSSL_ia32cap_P$non_lazy_ptr: +.indirect_symbol _OPENSSL_ia32cap_P +.long 0 +.comm _OPENSSL_ia32cap_P,16,2 diff --git a/deps/openssl/asm/x86-macosx-gas/sha/sha512-586.s b/deps/openssl/asm/x86-macosx-gas/sha/sha512-586.s index 2c9975305a3e83..d539b1d168116f 100644 --- a/deps/openssl/asm/x86-macosx-gas/sha/sha512-586.s +++ b/deps/openssl/asm/x86-macosx-gas/sha/sha512-586.s @@ -24,6 +24,2269 @@ L000pic_point: movl %edi,4(%esp) movl %eax,8(%esp) movl %ebx,12(%esp) + movl L_OPENSSL_ia32cap_P$non_lazy_ptr-L001K512(%ebp),%edx + movl (%edx),%ecx + testl $67108864,%ecx + jz L002loop_x86 + movl 4(%edx),%edx + movq (%esi),%mm0 + andl $16777216,%ecx + movq 8(%esi),%mm1 + andl $512,%edx + movq 16(%esi),%mm2 + orl %edx,%ecx + movq 24(%esi),%mm3 + movq 32(%esi),%mm4 + movq 40(%esi),%mm5 + movq 48(%esi),%mm6 + movq 56(%esi),%mm7 + cmpl $16777728,%ecx + je L003SSSE3 + subl $80,%esp + jmp L004loop_sse2 +.align 4,0x90 +L004loop_sse2: + movq %mm1,8(%esp) + movq %mm2,16(%esp) + movq %mm3,24(%esp) + movq %mm5,40(%esp) + movq %mm6,48(%esp) + pxor %mm1,%mm2 + movq %mm7,56(%esp) + movq %mm0,%mm3 + movl (%edi),%eax + movl 4(%edi),%ebx + addl $8,%edi + movl $15,%edx + bswap %eax + bswap %ebx + jmp L00500_14_sse2 +.align 4,0x90 +L00500_14_sse2: + movd %eax,%mm1 + movl (%edi),%eax + movd %ebx,%mm7 + movl 4(%edi),%ebx + addl $8,%edi + bswap %eax + bswap %ebx + punpckldq %mm1,%mm7 + movq %mm4,%mm1 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,32(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + movq %mm3,%mm0 + movq %mm7,72(%esp) + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm0,(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 56(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + paddq (%ebp),%mm7 + pxor %mm4,%mm3 + movq 24(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm0,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm0,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 8(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + subl $8,%esp + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm0,%mm2 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + pxor %mm7,%mm6 + movq 40(%esp),%mm5 + paddq %mm2,%mm3 + movq %mm0,%mm2 + addl $8,%ebp + paddq %mm6,%mm3 + movq 48(%esp),%mm6 + decl %edx + jnz L00500_14_sse2 + movd %eax,%mm1 + movd %ebx,%mm7 + punpckldq %mm1,%mm7 + movq %mm4,%mm1 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,32(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + movq %mm3,%mm0 + movq %mm7,72(%esp) + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm0,(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 56(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + paddq (%ebp),%mm7 + pxor %mm4,%mm3 + movq 24(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm0,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm0,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 8(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + subl $8,%esp + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm0,%mm2 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + pxor %mm7,%mm6 + movq 192(%esp),%mm7 + paddq %mm2,%mm3 + movq %mm0,%mm2 + addl $8,%ebp + paddq %mm6,%mm3 + pxor %mm0,%mm0 + movl $32,%edx + jmp L00616_79_sse2 +.align 4,0x90 +L00616_79_sse2: + movq 88(%esp),%mm5 + movq %mm7,%mm1 + psrlq $1,%mm7 + movq %mm5,%mm6 + psrlq $6,%mm5 + psllq $56,%mm1 + paddq %mm3,%mm0 + movq %mm7,%mm3 + psrlq $6,%mm7 + pxor %mm1,%mm3 + psllq $7,%mm1 + pxor %mm7,%mm3 + psrlq $1,%mm7 + pxor %mm1,%mm3 + movq %mm5,%mm1 + psrlq $13,%mm5 + pxor %mm3,%mm7 + psllq $3,%mm6 + pxor %mm5,%mm1 + paddq 200(%esp),%mm7 + pxor %mm6,%mm1 + psrlq $42,%mm5 + paddq 128(%esp),%mm7 + pxor %mm5,%mm1 + psllq $42,%mm6 + movq 40(%esp),%mm5 + pxor %mm6,%mm1 + movq 48(%esp),%mm6 + paddq %mm1,%mm7 + movq %mm4,%mm1 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,32(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + movq %mm7,72(%esp) + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm0,(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 56(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + paddq (%ebp),%mm7 + pxor %mm4,%mm3 + movq 24(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm0,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm0,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 8(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + subl $8,%esp + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm0,%mm2 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + pxor %mm7,%mm6 + movq 192(%esp),%mm7 + paddq %mm6,%mm2 + addl $8,%ebp + movq 88(%esp),%mm5 + movq %mm7,%mm1 + psrlq $1,%mm7 + movq %mm5,%mm6 + psrlq $6,%mm5 + psllq $56,%mm1 + paddq %mm3,%mm2 + movq %mm7,%mm3 + psrlq $6,%mm7 + pxor %mm1,%mm3 + psllq $7,%mm1 + pxor %mm7,%mm3 + psrlq $1,%mm7 + pxor %mm1,%mm3 + movq %mm5,%mm1 + psrlq $13,%mm5 + pxor %mm3,%mm7 + psllq $3,%mm6 + pxor %mm5,%mm1 + paddq 200(%esp),%mm7 + pxor %mm6,%mm1 + psrlq $42,%mm5 + paddq 128(%esp),%mm7 + pxor %mm5,%mm1 + psllq $42,%mm6 + movq 40(%esp),%mm5 + pxor %mm6,%mm1 + movq 48(%esp),%mm6 + paddq %mm1,%mm7 + movq %mm4,%mm1 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,32(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + movq %mm7,72(%esp) + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm2,(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 56(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + paddq (%ebp),%mm7 + pxor %mm4,%mm3 + movq 24(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm2,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm2,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 8(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + subl $8,%esp + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm2,%mm0 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + pxor %mm7,%mm6 + movq 192(%esp),%mm7 + paddq %mm6,%mm0 + addl $8,%ebp + decl %edx + jnz L00616_79_sse2 + paddq %mm3,%mm0 + movq 8(%esp),%mm1 + movq 24(%esp),%mm3 + movq 40(%esp),%mm5 + movq 48(%esp),%mm6 + movq 56(%esp),%mm7 + pxor %mm1,%mm2 + paddq (%esi),%mm0 + paddq 8(%esi),%mm1 + paddq 16(%esi),%mm2 + paddq 24(%esi),%mm3 + paddq 32(%esi),%mm4 + paddq 40(%esi),%mm5 + paddq 48(%esi),%mm6 + paddq 56(%esi),%mm7 + movl $640,%eax + movq %mm0,(%esi) + movq %mm1,8(%esi) + movq %mm2,16(%esi) + movq %mm3,24(%esi) + movq %mm4,32(%esi) + movq %mm5,40(%esi) + movq %mm6,48(%esi) + movq %mm7,56(%esi) + leal (%esp,%eax,1),%esp + subl %eax,%ebp + cmpl 88(%esp),%edi + jb L004loop_sse2 + movl 92(%esp),%esp + emms + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.align 5,0x90 +L003SSSE3: + leal -64(%esp),%edx + subl $256,%esp + movdqa 640(%ebp),%xmm1 + movdqu (%edi),%xmm0 +.byte 102,15,56,0,193 + movdqa (%ebp),%xmm3 + movdqa %xmm1,%xmm2 + movdqu 16(%edi),%xmm1 + paddq %xmm0,%xmm3 +.byte 102,15,56,0,202 + movdqa %xmm3,-128(%edx) + movdqa 16(%ebp),%xmm4 + movdqa %xmm2,%xmm3 + movdqu 32(%edi),%xmm2 + paddq %xmm1,%xmm4 +.byte 102,15,56,0,211 + movdqa %xmm4,-112(%edx) + movdqa 32(%ebp),%xmm5 + movdqa %xmm3,%xmm4 + movdqu 48(%edi),%xmm3 + paddq %xmm2,%xmm5 +.byte 102,15,56,0,220 + movdqa %xmm5,-96(%edx) + movdqa 48(%ebp),%xmm6 + movdqa %xmm4,%xmm5 + movdqu 64(%edi),%xmm4 + paddq %xmm3,%xmm6 +.byte 102,15,56,0,229 + movdqa %xmm6,-80(%edx) + movdqa 64(%ebp),%xmm7 + movdqa %xmm5,%xmm6 + movdqu 80(%edi),%xmm5 + paddq %xmm4,%xmm7 +.byte 102,15,56,0,238 + movdqa %xmm7,-64(%edx) + movdqa %xmm0,(%edx) + movdqa 80(%ebp),%xmm0 + movdqa %xmm6,%xmm7 + movdqu 96(%edi),%xmm6 + paddq %xmm5,%xmm0 +.byte 102,15,56,0,247 + movdqa %xmm0,-48(%edx) + movdqa %xmm1,16(%edx) + movdqa 96(%ebp),%xmm1 + movdqa %xmm7,%xmm0 + movdqu 112(%edi),%xmm7 + paddq %xmm6,%xmm1 +.byte 102,15,56,0,248 + movdqa %xmm1,-32(%edx) + movdqa %xmm2,32(%edx) + movdqa 112(%ebp),%xmm2 + movdqa (%edx),%xmm0 + paddq %xmm7,%xmm2 + movdqa %xmm2,-16(%edx) + nop +.align 5,0x90 +L007loop_ssse3: + movdqa 16(%edx),%xmm2 + movdqa %xmm3,48(%edx) + leal 128(%ebp),%ebp + movq %mm1,8(%esp) + movl %edi,%ebx + movq %mm2,16(%esp) + leal 128(%edi),%edi + movq %mm3,24(%esp) + cmpl %eax,%edi + movq %mm5,40(%esp) + cmovbl %edi,%ebx + movq %mm6,48(%esp) + movl $4,%ecx + pxor %mm1,%mm2 + movq %mm7,56(%esp) + pxor %mm3,%mm3 + jmp L00800_47_ssse3 +.align 5,0x90 +L00800_47_ssse3: + movdqa %xmm5,%xmm3 + movdqa %xmm2,%xmm1 +.byte 102,15,58,15,208,8 + movdqa %xmm4,(%edx) +.byte 102,15,58,15,220,8 + movdqa %xmm2,%xmm4 + psrlq $7,%xmm2 + paddq %xmm3,%xmm0 + movdqa %xmm4,%xmm3 + psrlq $1,%xmm4 + psllq $56,%xmm3 + pxor %xmm4,%xmm2 + psrlq $7,%xmm4 + pxor %xmm3,%xmm2 + psllq $7,%xmm3 + pxor %xmm4,%xmm2 + movdqa %xmm7,%xmm4 + pxor %xmm3,%xmm2 + movdqa %xmm7,%xmm3 + psrlq $6,%xmm4 + paddq %xmm2,%xmm0 + movdqa %xmm7,%xmm2 + psrlq $19,%xmm3 + psllq $3,%xmm2 + pxor %xmm3,%xmm4 + psrlq $42,%xmm3 + pxor %xmm2,%xmm4 + psllq $42,%xmm2 + pxor %xmm3,%xmm4 + movdqa 32(%edx),%xmm3 + pxor %xmm2,%xmm4 + movdqa (%ebp),%xmm2 + movq %mm4,%mm1 + paddq %xmm4,%xmm0 + movq -128(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,32(%esp) + paddq %xmm0,%xmm2 + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm0 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm0,(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 56(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 24(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm0,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm0,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 8(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm0,%mm2 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + pxor %mm7,%mm6 + movq 32(%esp),%mm5 + paddq %mm6,%mm2 + movq 40(%esp),%mm6 + movq %mm4,%mm1 + movq -120(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,24(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm2 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm2,56(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 48(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 16(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm2,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm2,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq (%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm2,%mm0 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + pxor %mm7,%mm6 + movq 24(%esp),%mm5 + paddq %mm6,%mm0 + movq 32(%esp),%mm6 + movdqa %xmm2,-128(%edx) + movdqa %xmm6,%xmm4 + movdqa %xmm3,%xmm2 +.byte 102,15,58,15,217,8 + movdqa %xmm5,16(%edx) +.byte 102,15,58,15,229,8 + movdqa %xmm3,%xmm5 + psrlq $7,%xmm3 + paddq %xmm4,%xmm1 + movdqa %xmm5,%xmm4 + psrlq $1,%xmm5 + psllq $56,%xmm4 + pxor %xmm5,%xmm3 + psrlq $7,%xmm5 + pxor %xmm4,%xmm3 + psllq $7,%xmm4 + pxor %xmm5,%xmm3 + movdqa %xmm0,%xmm5 + pxor %xmm4,%xmm3 + movdqa %xmm0,%xmm4 + psrlq $6,%xmm5 + paddq %xmm3,%xmm1 + movdqa %xmm0,%xmm3 + psrlq $19,%xmm4 + psllq $3,%xmm3 + pxor %xmm4,%xmm5 + psrlq $42,%xmm4 + pxor %xmm3,%xmm5 + psllq $42,%xmm3 + pxor %xmm4,%xmm5 + movdqa 48(%edx),%xmm4 + pxor %xmm3,%xmm5 + movdqa 16(%ebp),%xmm3 + movq %mm4,%mm1 + paddq %xmm5,%xmm1 + movq -112(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,16(%esp) + paddq %xmm1,%xmm3 + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm0 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm0,48(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 40(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 8(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm0,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm0,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 56(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm0,%mm2 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + pxor %mm7,%mm6 + movq 16(%esp),%mm5 + paddq %mm6,%mm2 + movq 24(%esp),%mm6 + movq %mm4,%mm1 + movq -104(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,8(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm2 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm2,40(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 32(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq (%esp),%mm4 + paddq %mm7,%mm3 + movq %mm2,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm2,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 48(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm2,%mm0 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + pxor %mm7,%mm6 + movq 8(%esp),%mm5 + paddq %mm6,%mm0 + movq 16(%esp),%mm6 + movdqa %xmm3,-112(%edx) + movdqa %xmm7,%xmm5 + movdqa %xmm4,%xmm3 +.byte 102,15,58,15,226,8 + movdqa %xmm6,32(%edx) +.byte 102,15,58,15,238,8 + movdqa %xmm4,%xmm6 + psrlq $7,%xmm4 + paddq %xmm5,%xmm2 + movdqa %xmm6,%xmm5 + psrlq $1,%xmm6 + psllq $56,%xmm5 + pxor %xmm6,%xmm4 + psrlq $7,%xmm6 + pxor %xmm5,%xmm4 + psllq $7,%xmm5 + pxor %xmm6,%xmm4 + movdqa %xmm1,%xmm6 + pxor %xmm5,%xmm4 + movdqa %xmm1,%xmm5 + psrlq $6,%xmm6 + paddq %xmm4,%xmm2 + movdqa %xmm1,%xmm4 + psrlq $19,%xmm5 + psllq $3,%xmm4 + pxor %xmm5,%xmm6 + psrlq $42,%xmm5 + pxor %xmm4,%xmm6 + psllq $42,%xmm4 + pxor %xmm5,%xmm6 + movdqa (%edx),%xmm5 + pxor %xmm4,%xmm6 + movdqa 32(%ebp),%xmm4 + movq %mm4,%mm1 + paddq %xmm6,%xmm2 + movq -96(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,(%esp) + paddq %xmm2,%xmm4 + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm0 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm0,32(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 24(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 56(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm0,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm0,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 40(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm0,%mm2 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + pxor %mm7,%mm6 + movq (%esp),%mm5 + paddq %mm6,%mm2 + movq 8(%esp),%mm6 + movq %mm4,%mm1 + movq -88(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,56(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm2 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm2,24(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 16(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 48(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm2,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm2,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 32(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm2,%mm0 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + pxor %mm7,%mm6 + movq 56(%esp),%mm5 + paddq %mm6,%mm0 + movq (%esp),%mm6 + movdqa %xmm4,-96(%edx) + movdqa %xmm0,%xmm6 + movdqa %xmm5,%xmm4 +.byte 102,15,58,15,235,8 + movdqa %xmm7,48(%edx) +.byte 102,15,58,15,247,8 + movdqa %xmm5,%xmm7 + psrlq $7,%xmm5 + paddq %xmm6,%xmm3 + movdqa %xmm7,%xmm6 + psrlq $1,%xmm7 + psllq $56,%xmm6 + pxor %xmm7,%xmm5 + psrlq $7,%xmm7 + pxor %xmm6,%xmm5 + psllq $7,%xmm6 + pxor %xmm7,%xmm5 + movdqa %xmm2,%xmm7 + pxor %xmm6,%xmm5 + movdqa %xmm2,%xmm6 + psrlq $6,%xmm7 + paddq %xmm5,%xmm3 + movdqa %xmm2,%xmm5 + psrlq $19,%xmm6 + psllq $3,%xmm5 + pxor %xmm6,%xmm7 + psrlq $42,%xmm6 + pxor %xmm5,%xmm7 + psllq $42,%xmm5 + pxor %xmm6,%xmm7 + movdqa 16(%edx),%xmm6 + pxor %xmm5,%xmm7 + movdqa 48(%ebp),%xmm5 + movq %mm4,%mm1 + paddq %xmm7,%xmm3 + movq -80(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,48(%esp) + paddq %xmm3,%xmm5 + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm0 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm0,16(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 8(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 40(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm0,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm0,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 24(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm0,%mm2 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + pxor %mm7,%mm6 + movq 48(%esp),%mm5 + paddq %mm6,%mm2 + movq 56(%esp),%mm6 + movq %mm4,%mm1 + movq -72(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,40(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm2 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm2,8(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq (%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 32(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm2,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm2,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 16(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm2,%mm0 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + pxor %mm7,%mm6 + movq 40(%esp),%mm5 + paddq %mm6,%mm0 + movq 48(%esp),%mm6 + movdqa %xmm5,-80(%edx) + movdqa %xmm1,%xmm7 + movdqa %xmm6,%xmm5 +.byte 102,15,58,15,244,8 + movdqa %xmm0,(%edx) +.byte 102,15,58,15,248,8 + movdqa %xmm6,%xmm0 + psrlq $7,%xmm6 + paddq %xmm7,%xmm4 + movdqa %xmm0,%xmm7 + psrlq $1,%xmm0 + psllq $56,%xmm7 + pxor %xmm0,%xmm6 + psrlq $7,%xmm0 + pxor %xmm7,%xmm6 + psllq $7,%xmm7 + pxor %xmm0,%xmm6 + movdqa %xmm3,%xmm0 + pxor %xmm7,%xmm6 + movdqa %xmm3,%xmm7 + psrlq $6,%xmm0 + paddq %xmm6,%xmm4 + movdqa %xmm3,%xmm6 + psrlq $19,%xmm7 + psllq $3,%xmm6 + pxor %xmm7,%xmm0 + psrlq $42,%xmm7 + pxor %xmm6,%xmm0 + psllq $42,%xmm6 + pxor %xmm7,%xmm0 + movdqa 32(%edx),%xmm7 + pxor %xmm6,%xmm0 + movdqa 64(%ebp),%xmm6 + movq %mm4,%mm1 + paddq %xmm0,%xmm4 + movq -64(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,32(%esp) + paddq %xmm4,%xmm6 + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm0 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm0,(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 56(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 24(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm0,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm0,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 8(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm0,%mm2 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + pxor %mm7,%mm6 + movq 32(%esp),%mm5 + paddq %mm6,%mm2 + movq 40(%esp),%mm6 + movq %mm4,%mm1 + movq -56(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,24(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm2 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm2,56(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 48(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 16(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm2,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm2,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq (%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm2,%mm0 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + pxor %mm7,%mm6 + movq 24(%esp),%mm5 + paddq %mm6,%mm0 + movq 32(%esp),%mm6 + movdqa %xmm6,-64(%edx) + movdqa %xmm2,%xmm0 + movdqa %xmm7,%xmm6 +.byte 102,15,58,15,253,8 + movdqa %xmm1,16(%edx) +.byte 102,15,58,15,193,8 + movdqa %xmm7,%xmm1 + psrlq $7,%xmm7 + paddq %xmm0,%xmm5 + movdqa %xmm1,%xmm0 + psrlq $1,%xmm1 + psllq $56,%xmm0 + pxor %xmm1,%xmm7 + psrlq $7,%xmm1 + pxor %xmm0,%xmm7 + psllq $7,%xmm0 + pxor %xmm1,%xmm7 + movdqa %xmm4,%xmm1 + pxor %xmm0,%xmm7 + movdqa %xmm4,%xmm0 + psrlq $6,%xmm1 + paddq %xmm7,%xmm5 + movdqa %xmm4,%xmm7 + psrlq $19,%xmm0 + psllq $3,%xmm7 + pxor %xmm0,%xmm1 + psrlq $42,%xmm0 + pxor %xmm7,%xmm1 + psllq $42,%xmm7 + pxor %xmm0,%xmm1 + movdqa 48(%edx),%xmm0 + pxor %xmm7,%xmm1 + movdqa 80(%ebp),%xmm7 + movq %mm4,%mm1 + paddq %xmm1,%xmm5 + movq -48(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,16(%esp) + paddq %xmm5,%xmm7 + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm0 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm0,48(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 40(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 8(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm0,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm0,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 56(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm0,%mm2 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + pxor %mm7,%mm6 + movq 16(%esp),%mm5 + paddq %mm6,%mm2 + movq 24(%esp),%mm6 + movq %mm4,%mm1 + movq -40(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,8(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm2 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm2,40(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 32(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq (%esp),%mm4 + paddq %mm7,%mm3 + movq %mm2,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm2,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 48(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm2,%mm0 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + pxor %mm7,%mm6 + movq 8(%esp),%mm5 + paddq %mm6,%mm0 + movq 16(%esp),%mm6 + movdqa %xmm7,-48(%edx) + movdqa %xmm3,%xmm1 + movdqa %xmm0,%xmm7 +.byte 102,15,58,15,198,8 + movdqa %xmm2,32(%edx) +.byte 102,15,58,15,202,8 + movdqa %xmm0,%xmm2 + psrlq $7,%xmm0 + paddq %xmm1,%xmm6 + movdqa %xmm2,%xmm1 + psrlq $1,%xmm2 + psllq $56,%xmm1 + pxor %xmm2,%xmm0 + psrlq $7,%xmm2 + pxor %xmm1,%xmm0 + psllq $7,%xmm1 + pxor %xmm2,%xmm0 + movdqa %xmm5,%xmm2 + pxor %xmm1,%xmm0 + movdqa %xmm5,%xmm1 + psrlq $6,%xmm2 + paddq %xmm0,%xmm6 + movdqa %xmm5,%xmm0 + psrlq $19,%xmm1 + psllq $3,%xmm0 + pxor %xmm1,%xmm2 + psrlq $42,%xmm1 + pxor %xmm0,%xmm2 + psllq $42,%xmm0 + pxor %xmm1,%xmm2 + movdqa (%edx),%xmm1 + pxor %xmm0,%xmm2 + movdqa 96(%ebp),%xmm0 + movq %mm4,%mm1 + paddq %xmm2,%xmm6 + movq -32(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,(%esp) + paddq %xmm6,%xmm0 + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm0 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm0,32(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 24(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 56(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm0,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm0,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 40(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm0,%mm2 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + pxor %mm7,%mm6 + movq (%esp),%mm5 + paddq %mm6,%mm2 + movq 8(%esp),%mm6 + movq %mm4,%mm1 + movq -24(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,56(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm2 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm2,24(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 16(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 48(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm2,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm2,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 32(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm2,%mm0 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + pxor %mm7,%mm6 + movq 56(%esp),%mm5 + paddq %mm6,%mm0 + movq (%esp),%mm6 + movdqa %xmm0,-32(%edx) + movdqa %xmm4,%xmm2 + movdqa %xmm1,%xmm0 +.byte 102,15,58,15,207,8 + movdqa %xmm3,48(%edx) +.byte 102,15,58,15,211,8 + movdqa %xmm1,%xmm3 + psrlq $7,%xmm1 + paddq %xmm2,%xmm7 + movdqa %xmm3,%xmm2 + psrlq $1,%xmm3 + psllq $56,%xmm2 + pxor %xmm3,%xmm1 + psrlq $7,%xmm3 + pxor %xmm2,%xmm1 + psllq $7,%xmm2 + pxor %xmm3,%xmm1 + movdqa %xmm6,%xmm3 + pxor %xmm2,%xmm1 + movdqa %xmm6,%xmm2 + psrlq $6,%xmm3 + paddq %xmm1,%xmm7 + movdqa %xmm6,%xmm1 + psrlq $19,%xmm2 + psllq $3,%xmm1 + pxor %xmm2,%xmm3 + psrlq $42,%xmm2 + pxor %xmm1,%xmm3 + psllq $42,%xmm1 + pxor %xmm2,%xmm3 + movdqa 16(%edx),%xmm2 + pxor %xmm1,%xmm3 + movdqa 112(%ebp),%xmm1 + movq %mm4,%mm1 + paddq %xmm3,%xmm7 + movq -16(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,48(%esp) + paddq %xmm7,%xmm1 + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm0 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm0,16(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 8(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 40(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm0,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm0,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 24(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm0,%mm2 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + pxor %mm7,%mm6 + movq 48(%esp),%mm5 + paddq %mm6,%mm2 + movq 56(%esp),%mm6 + movq %mm4,%mm1 + movq -8(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,40(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm2 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm2,8(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq (%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 32(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm2,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm2,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 16(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm2,%mm0 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + pxor %mm7,%mm6 + movq 40(%esp),%mm5 + paddq %mm6,%mm0 + movq 48(%esp),%mm6 + movdqa %xmm1,-16(%edx) + leal 128(%ebp),%ebp + decl %ecx + jnz L00800_47_ssse3 + movdqa (%ebp),%xmm1 + leal -640(%ebp),%ebp + movdqu (%ebx),%xmm0 +.byte 102,15,56,0,193 + movdqa (%ebp),%xmm3 + movdqa %xmm1,%xmm2 + movdqu 16(%ebx),%xmm1 + paddq %xmm0,%xmm3 +.byte 102,15,56,0,202 + movq %mm4,%mm1 + movq -128(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,32(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm0 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm0,(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 56(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 24(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm0,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm0,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 8(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm0,%mm2 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + pxor %mm7,%mm6 + movq 32(%esp),%mm5 + paddq %mm6,%mm2 + movq 40(%esp),%mm6 + movq %mm4,%mm1 + movq -120(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,24(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm2 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm2,56(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 48(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 16(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm2,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm2,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq (%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm2,%mm0 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + pxor %mm7,%mm6 + movq 24(%esp),%mm5 + paddq %mm6,%mm0 + movq 32(%esp),%mm6 + movdqa %xmm3,-128(%edx) + movdqa 16(%ebp),%xmm4 + movdqa %xmm2,%xmm3 + movdqu 32(%ebx),%xmm2 + paddq %xmm1,%xmm4 +.byte 102,15,56,0,211 + movq %mm4,%mm1 + movq -112(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,16(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm0 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm0,48(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 40(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 8(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm0,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm0,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 56(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm0,%mm2 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + pxor %mm7,%mm6 + movq 16(%esp),%mm5 + paddq %mm6,%mm2 + movq 24(%esp),%mm6 + movq %mm4,%mm1 + movq -104(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,8(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm2 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm2,40(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 32(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq (%esp),%mm4 + paddq %mm7,%mm3 + movq %mm2,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm2,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 48(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm2,%mm0 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + pxor %mm7,%mm6 + movq 8(%esp),%mm5 + paddq %mm6,%mm0 + movq 16(%esp),%mm6 + movdqa %xmm4,-112(%edx) + movdqa 32(%ebp),%xmm5 + movdqa %xmm3,%xmm4 + movdqu 48(%ebx),%xmm3 + paddq %xmm2,%xmm5 +.byte 102,15,56,0,220 + movq %mm4,%mm1 + movq -96(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm0 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm0,32(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 24(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 56(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm0,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm0,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 40(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm0,%mm2 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + pxor %mm7,%mm6 + movq (%esp),%mm5 + paddq %mm6,%mm2 + movq 8(%esp),%mm6 + movq %mm4,%mm1 + movq -88(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,56(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm2 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm2,24(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 16(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 48(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm2,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm2,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 32(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm2,%mm0 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + pxor %mm7,%mm6 + movq 56(%esp),%mm5 + paddq %mm6,%mm0 + movq (%esp),%mm6 + movdqa %xmm5,-96(%edx) + movdqa 48(%ebp),%xmm6 + movdqa %xmm4,%xmm5 + movdqu 64(%ebx),%xmm4 + paddq %xmm3,%xmm6 +.byte 102,15,56,0,229 + movq %mm4,%mm1 + movq -80(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,48(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm0 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm0,16(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 8(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 40(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm0,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm0,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 24(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm0,%mm2 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + pxor %mm7,%mm6 + movq 48(%esp),%mm5 + paddq %mm6,%mm2 + movq 56(%esp),%mm6 + movq %mm4,%mm1 + movq -72(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,40(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm2 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm2,8(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq (%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 32(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm2,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm2,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 16(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm2,%mm0 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + pxor %mm7,%mm6 + movq 40(%esp),%mm5 + paddq %mm6,%mm0 + movq 48(%esp),%mm6 + movdqa %xmm6,-80(%edx) + movdqa 64(%ebp),%xmm7 + movdqa %xmm5,%xmm6 + movdqu 80(%ebx),%xmm5 + paddq %xmm4,%xmm7 +.byte 102,15,56,0,238 + movq %mm4,%mm1 + movq -64(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,32(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm0 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm0,(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 56(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 24(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm0,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm0,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 8(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm0,%mm2 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + pxor %mm7,%mm6 + movq 32(%esp),%mm5 + paddq %mm6,%mm2 + movq 40(%esp),%mm6 + movq %mm4,%mm1 + movq -56(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,24(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm2 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm2,56(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 48(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 16(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm2,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm2,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq (%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm2,%mm0 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + pxor %mm7,%mm6 + movq 24(%esp),%mm5 + paddq %mm6,%mm0 + movq 32(%esp),%mm6 + movdqa %xmm7,-64(%edx) + movdqa %xmm0,(%edx) + movdqa 80(%ebp),%xmm0 + movdqa %xmm6,%xmm7 + movdqu 96(%ebx),%xmm6 + paddq %xmm5,%xmm0 +.byte 102,15,56,0,247 + movq %mm4,%mm1 + movq -48(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,16(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm0 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm0,48(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 40(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 8(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm0,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm0,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 56(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm0,%mm2 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + pxor %mm7,%mm6 + movq 16(%esp),%mm5 + paddq %mm6,%mm2 + movq 24(%esp),%mm6 + movq %mm4,%mm1 + movq -40(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,8(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm2 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm2,40(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 32(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq (%esp),%mm4 + paddq %mm7,%mm3 + movq %mm2,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm2,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 48(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm2,%mm0 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + pxor %mm7,%mm6 + movq 8(%esp),%mm5 + paddq %mm6,%mm0 + movq 16(%esp),%mm6 + movdqa %xmm0,-48(%edx) + movdqa %xmm1,16(%edx) + movdqa 96(%ebp),%xmm1 + movdqa %xmm7,%xmm0 + movdqu 112(%ebx),%xmm7 + paddq %xmm6,%xmm1 +.byte 102,15,56,0,248 + movq %mm4,%mm1 + movq -32(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm0 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm0,32(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 24(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 56(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm0,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm0,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 40(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm0,%mm2 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + pxor %mm7,%mm6 + movq (%esp),%mm5 + paddq %mm6,%mm2 + movq 8(%esp),%mm6 + movq %mm4,%mm1 + movq -24(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,56(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm2 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm2,24(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 16(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 48(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm2,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm2,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 32(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm2,%mm0 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + pxor %mm7,%mm6 + movq 56(%esp),%mm5 + paddq %mm6,%mm0 + movq (%esp),%mm6 + movdqa %xmm1,-32(%edx) + movdqa %xmm2,32(%edx) + movdqa 112(%ebp),%xmm2 + movdqa (%edx),%xmm0 + paddq %xmm7,%xmm2 + movq %mm4,%mm1 + movq -16(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,48(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm0 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm0,16(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 8(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 40(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm0,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm0,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 24(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm0,%mm2 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + pxor %mm7,%mm6 + movq 48(%esp),%mm5 + paddq %mm6,%mm2 + movq 56(%esp),%mm6 + movq %mm4,%mm1 + movq -8(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,40(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm2 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm2,8(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq (%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 32(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm2,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm2,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 16(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm2,%mm0 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + pxor %mm7,%mm6 + movq 40(%esp),%mm5 + paddq %mm6,%mm0 + movq 48(%esp),%mm6 + movdqa %xmm2,-16(%edx) + movq 8(%esp),%mm1 + paddq %mm3,%mm0 + movq 24(%esp),%mm3 + movq 56(%esp),%mm7 + pxor %mm1,%mm2 + paddq (%esi),%mm0 + paddq 8(%esi),%mm1 + paddq 16(%esi),%mm2 + paddq 24(%esi),%mm3 + paddq 32(%esi),%mm4 + paddq 40(%esi),%mm5 + paddq 48(%esi),%mm6 + paddq 56(%esi),%mm7 + movq %mm0,(%esi) + movq %mm1,8(%esi) + movq %mm2,16(%esi) + movq %mm3,24(%esi) + movq %mm4,32(%esi) + movq %mm5,40(%esi) + movq %mm6,48(%esi) + movq %mm7,56(%esi) + cmpl %eax,%edi + jb L007loop_ssse3 + movl 76(%edx),%esp + emms + popl %edi + popl %esi + popl %ebx + popl %ebp + ret .align 4,0x90 L002loop_x86: movl (%edi),%eax @@ -129,7 +2392,7 @@ L002loop_x86: movl $16,%ecx .long 2784229001 .align 4,0x90 -L00300_15_x86: +L00900_15_x86: movl 40(%esp),%ecx movl 44(%esp),%edx movl %ecx,%esi @@ -236,9 +2499,9 @@ L00300_15_x86: subl $8,%esp leal 8(%ebp),%ebp cmpb $148,%dl - jne L00300_15_x86 + jne L00900_15_x86 .align 4,0x90 -L00416_79_x86: +L01016_79_x86: movl 312(%esp),%ecx movl 316(%esp),%edx movl %ecx,%esi @@ -411,7 +2674,7 @@ L00416_79_x86: subl $8,%esp leal 8(%ebp),%ebp cmpb $23,%dl - jne L00416_79_x86 + jne L01016_79_x86 movl 840(%esp),%esi movl 844(%esp),%edi movl (%esi),%eax @@ -554,8 +2817,15 @@ L001K512: .long 4234509866,1501505948 .long 987167468,1607167915 .long 1246189591,1816402316 +.long 67438087,66051 +.long 202182159,134810123 .byte 83,72,65,53,49,50,32,98,108,111,99,107,32,116,114,97 .byte 110,115,102,111,114,109,32,102,111,114,32,120,56,54,44,32 .byte 67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97 .byte 112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103 .byte 62,0 +.section __IMPORT,__pointers,non_lazy_symbol_pointers +L_OPENSSL_ia32cap_P$non_lazy_ptr: +.indirect_symbol _OPENSSL_ia32cap_P +.long 0 +.comm _OPENSSL_ia32cap_P,16,2 diff --git a/deps/openssl/asm/x86-macosx-gas/whrlpool/wp-mmx.s b/deps/openssl/asm/x86-macosx-gas/whrlpool/wp-mmx.s index 5d612e0f7537e6..379e585263db4d 100644 --- a/deps/openssl/asm/x86-macosx-gas/whrlpool/wp-mmx.s +++ b/deps/openssl/asm/x86-macosx-gas/whrlpool/wp-mmx.s @@ -65,228 +65,230 @@ L003round: movq 4096(%ebp,%esi,8),%mm0 movl (%esp),%eax movl 4(%esp),%ebx - movb %al,%cl - movb %ah,%dl + movzbl %al,%ecx + movzbl %ah,%edx + shrl $16,%eax leal (%ecx,%ecx,1),%esi + movzbl %al,%ecx leal (%edx,%edx,1),%edi - shrl $16,%eax + movzbl %ah,%edx pxor (%ebp,%esi,8),%mm0 movq 7(%ebp,%edi,8),%mm1 - movb %al,%cl - movb %ah,%dl movl 8(%esp),%eax leal (%ecx,%ecx,1),%esi + movzbl %bl,%ecx leal (%edx,%edx,1),%edi + movzbl %bh,%edx movq 6(%ebp,%esi,8),%mm2 movq 5(%ebp,%edi,8),%mm3 - movb %bl,%cl - movb %bh,%dl + shrl $16,%ebx leal (%ecx,%ecx,1),%esi + movzbl %bl,%ecx leal (%edx,%edx,1),%edi - shrl $16,%ebx + movzbl %bh,%edx movq 4(%ebp,%esi,8),%mm4 movq 3(%ebp,%edi,8),%mm5 - movb %bl,%cl - movb %bh,%dl movl 12(%esp),%ebx leal (%ecx,%ecx,1),%esi + movzbl %al,%ecx leal (%edx,%edx,1),%edi + movzbl %ah,%edx movq 2(%ebp,%esi,8),%mm6 movq 1(%ebp,%edi,8),%mm7 - movb %al,%cl - movb %ah,%dl + shrl $16,%eax leal (%ecx,%ecx,1),%esi + movzbl %al,%ecx leal (%edx,%edx,1),%edi - shrl $16,%eax + movzbl %ah,%edx pxor (%ebp,%esi,8),%mm1 pxor 7(%ebp,%edi,8),%mm2 - movb %al,%cl - movb %ah,%dl movl 16(%esp),%eax leal (%ecx,%ecx,1),%esi + movzbl %bl,%ecx leal (%edx,%edx,1),%edi + movzbl %bh,%edx pxor 6(%ebp,%esi,8),%mm3 pxor 5(%ebp,%edi,8),%mm4 - movb %bl,%cl - movb %bh,%dl + shrl $16,%ebx leal (%ecx,%ecx,1),%esi + movzbl %bl,%ecx leal (%edx,%edx,1),%edi - shrl $16,%ebx + movzbl %bh,%edx pxor 4(%ebp,%esi,8),%mm5 pxor 3(%ebp,%edi,8),%mm6 - movb %bl,%cl - movb %bh,%dl movl 20(%esp),%ebx leal (%ecx,%ecx,1),%esi + movzbl %al,%ecx leal (%edx,%edx,1),%edi + movzbl %ah,%edx pxor 2(%ebp,%esi,8),%mm7 pxor 1(%ebp,%edi,8),%mm0 - movb %al,%cl - movb %ah,%dl + shrl $16,%eax leal (%ecx,%ecx,1),%esi + movzbl %al,%ecx leal (%edx,%edx,1),%edi - shrl $16,%eax + movzbl %ah,%edx pxor (%ebp,%esi,8),%mm2 pxor 7(%ebp,%edi,8),%mm3 - movb %al,%cl - movb %ah,%dl movl 24(%esp),%eax leal (%ecx,%ecx,1),%esi + movzbl %bl,%ecx leal (%edx,%edx,1),%edi + movzbl %bh,%edx pxor 6(%ebp,%esi,8),%mm4 pxor 5(%ebp,%edi,8),%mm5 - movb %bl,%cl - movb %bh,%dl + shrl $16,%ebx leal (%ecx,%ecx,1),%esi + movzbl %bl,%ecx leal (%edx,%edx,1),%edi - shrl $16,%ebx + movzbl %bh,%edx pxor 4(%ebp,%esi,8),%mm6 pxor 3(%ebp,%edi,8),%mm7 - movb %bl,%cl - movb %bh,%dl movl 28(%esp),%ebx leal (%ecx,%ecx,1),%esi + movzbl %al,%ecx leal (%edx,%edx,1),%edi + movzbl %ah,%edx pxor 2(%ebp,%esi,8),%mm0 pxor 1(%ebp,%edi,8),%mm1 - movb %al,%cl - movb %ah,%dl + shrl $16,%eax leal (%ecx,%ecx,1),%esi + movzbl %al,%ecx leal (%edx,%edx,1),%edi - shrl $16,%eax + movzbl %ah,%edx pxor (%ebp,%esi,8),%mm3 pxor 7(%ebp,%edi,8),%mm4 - movb %al,%cl - movb %ah,%dl movl 32(%esp),%eax leal (%ecx,%ecx,1),%esi + movzbl %bl,%ecx leal (%edx,%edx,1),%edi + movzbl %bh,%edx pxor 6(%ebp,%esi,8),%mm5 pxor 5(%ebp,%edi,8),%mm6 - movb %bl,%cl - movb %bh,%dl + shrl $16,%ebx leal (%ecx,%ecx,1),%esi + movzbl %bl,%ecx leal (%edx,%edx,1),%edi - shrl $16,%ebx + movzbl %bh,%edx pxor 4(%ebp,%esi,8),%mm7 pxor 3(%ebp,%edi,8),%mm0 - movb %bl,%cl - movb %bh,%dl movl 36(%esp),%ebx leal (%ecx,%ecx,1),%esi + movzbl %al,%ecx leal (%edx,%edx,1),%edi + movzbl %ah,%edx pxor 2(%ebp,%esi,8),%mm1 pxor 1(%ebp,%edi,8),%mm2 - movb %al,%cl - movb %ah,%dl + shrl $16,%eax leal (%ecx,%ecx,1),%esi + movzbl %al,%ecx leal (%edx,%edx,1),%edi - shrl $16,%eax + movzbl %ah,%edx pxor (%ebp,%esi,8),%mm4 pxor 7(%ebp,%edi,8),%mm5 - movb %al,%cl - movb %ah,%dl movl 40(%esp),%eax leal (%ecx,%ecx,1),%esi + movzbl %bl,%ecx leal (%edx,%edx,1),%edi + movzbl %bh,%edx pxor 6(%ebp,%esi,8),%mm6 pxor 5(%ebp,%edi,8),%mm7 - movb %bl,%cl - movb %bh,%dl + shrl $16,%ebx leal (%ecx,%ecx,1),%esi + movzbl %bl,%ecx leal (%edx,%edx,1),%edi - shrl $16,%ebx + movzbl %bh,%edx pxor 4(%ebp,%esi,8),%mm0 pxor 3(%ebp,%edi,8),%mm1 - movb %bl,%cl - movb %bh,%dl movl 44(%esp),%ebx leal (%ecx,%ecx,1),%esi + movzbl %al,%ecx leal (%edx,%edx,1),%edi + movzbl %ah,%edx pxor 2(%ebp,%esi,8),%mm2 pxor 1(%ebp,%edi,8),%mm3 - movb %al,%cl - movb %ah,%dl + shrl $16,%eax leal (%ecx,%ecx,1),%esi + movzbl %al,%ecx leal (%edx,%edx,1),%edi - shrl $16,%eax + movzbl %ah,%edx pxor (%ebp,%esi,8),%mm5 pxor 7(%ebp,%edi,8),%mm6 - movb %al,%cl - movb %ah,%dl movl 48(%esp),%eax leal (%ecx,%ecx,1),%esi + movzbl %bl,%ecx leal (%edx,%edx,1),%edi + movzbl %bh,%edx pxor 6(%ebp,%esi,8),%mm7 pxor 5(%ebp,%edi,8),%mm0 - movb %bl,%cl - movb %bh,%dl + shrl $16,%ebx leal (%ecx,%ecx,1),%esi + movzbl %bl,%ecx leal (%edx,%edx,1),%edi - shrl $16,%ebx + movzbl %bh,%edx pxor 4(%ebp,%esi,8),%mm1 pxor 3(%ebp,%edi,8),%mm2 - movb %bl,%cl - movb %bh,%dl movl 52(%esp),%ebx leal (%ecx,%ecx,1),%esi + movzbl %al,%ecx leal (%edx,%edx,1),%edi + movzbl %ah,%edx pxor 2(%ebp,%esi,8),%mm3 pxor 1(%ebp,%edi,8),%mm4 - movb %al,%cl - movb %ah,%dl + shrl $16,%eax leal (%ecx,%ecx,1),%esi + movzbl %al,%ecx leal (%edx,%edx,1),%edi - shrl $16,%eax + movzbl %ah,%edx pxor (%ebp,%esi,8),%mm6 pxor 7(%ebp,%edi,8),%mm7 - movb %al,%cl - movb %ah,%dl movl 56(%esp),%eax leal (%ecx,%ecx,1),%esi + movzbl %bl,%ecx leal (%edx,%edx,1),%edi + movzbl %bh,%edx pxor 6(%ebp,%esi,8),%mm0 pxor 5(%ebp,%edi,8),%mm1 - movb %bl,%cl - movb %bh,%dl + shrl $16,%ebx leal (%ecx,%ecx,1),%esi + movzbl %bl,%ecx leal (%edx,%edx,1),%edi - shrl $16,%ebx + movzbl %bh,%edx pxor 4(%ebp,%esi,8),%mm2 pxor 3(%ebp,%edi,8),%mm3 - movb %bl,%cl - movb %bh,%dl movl 60(%esp),%ebx leal (%ecx,%ecx,1),%esi + movzbl %al,%ecx leal (%edx,%edx,1),%edi + movzbl %ah,%edx pxor 2(%ebp,%esi,8),%mm4 pxor 1(%ebp,%edi,8),%mm5 - movb %al,%cl - movb %ah,%dl + shrl $16,%eax leal (%ecx,%ecx,1),%esi + movzbl %al,%ecx leal (%edx,%edx,1),%edi - shrl $16,%eax + movzbl %ah,%edx pxor (%ebp,%esi,8),%mm7 pxor 7(%ebp,%edi,8),%mm0 - movb %al,%cl - movb %ah,%dl movl 64(%esp),%eax leal (%ecx,%ecx,1),%esi + movzbl %bl,%ecx leal (%edx,%edx,1),%edi + movzbl %bh,%edx pxor 6(%ebp,%esi,8),%mm1 pxor 5(%ebp,%edi,8),%mm2 - movb %bl,%cl - movb %bh,%dl + shrl $16,%ebx leal (%ecx,%ecx,1),%esi + movzbl %bl,%ecx leal (%edx,%edx,1),%edi - shrl $16,%ebx + movzbl %bh,%edx pxor 4(%ebp,%esi,8),%mm3 pxor 3(%ebp,%edi,8),%mm4 - movb %bl,%cl - movb %bh,%dl movl 68(%esp),%ebx leal (%ecx,%ecx,1),%esi + movzbl %al,%ecx leal (%edx,%edx,1),%edi + movzbl %ah,%edx pxor 2(%ebp,%esi,8),%mm5 pxor 1(%ebp,%edi,8),%mm6 movq %mm0,(%esp) @@ -297,226 +299,226 @@ L003round: movq %mm5,40(%esp) movq %mm6,48(%esp) movq %mm7,56(%esp) - movb %al,%cl - movb %ah,%dl + shrl $16,%eax leal (%ecx,%ecx,1),%esi + movzbl %al,%ecx leal (%edx,%edx,1),%edi - shrl $16,%eax + movzbl %ah,%edx pxor (%ebp,%esi,8),%mm0 pxor 7(%ebp,%edi,8),%mm1 - movb %al,%cl - movb %ah,%dl movl 72(%esp),%eax leal (%ecx,%ecx,1),%esi + movzbl %bl,%ecx leal (%edx,%edx,1),%edi + movzbl %bh,%edx pxor 6(%ebp,%esi,8),%mm2 pxor 5(%ebp,%edi,8),%mm3 - movb %bl,%cl - movb %bh,%dl + shrl $16,%ebx leal (%ecx,%ecx,1),%esi + movzbl %bl,%ecx leal (%edx,%edx,1),%edi - shrl $16,%ebx + movzbl %bh,%edx pxor 4(%ebp,%esi,8),%mm4 pxor 3(%ebp,%edi,8),%mm5 - movb %bl,%cl - movb %bh,%dl movl 76(%esp),%ebx leal (%ecx,%ecx,1),%esi + movzbl %al,%ecx leal (%edx,%edx,1),%edi + movzbl %ah,%edx pxor 2(%ebp,%esi,8),%mm6 pxor 1(%ebp,%edi,8),%mm7 - movb %al,%cl - movb %ah,%dl + shrl $16,%eax leal (%ecx,%ecx,1),%esi + movzbl %al,%ecx leal (%edx,%edx,1),%edi - shrl $16,%eax + movzbl %ah,%edx pxor (%ebp,%esi,8),%mm1 pxor 7(%ebp,%edi,8),%mm2 - movb %al,%cl - movb %ah,%dl movl 80(%esp),%eax leal (%ecx,%ecx,1),%esi + movzbl %bl,%ecx leal (%edx,%edx,1),%edi + movzbl %bh,%edx pxor 6(%ebp,%esi,8),%mm3 pxor 5(%ebp,%edi,8),%mm4 - movb %bl,%cl - movb %bh,%dl + shrl $16,%ebx leal (%ecx,%ecx,1),%esi + movzbl %bl,%ecx leal (%edx,%edx,1),%edi - shrl $16,%ebx + movzbl %bh,%edx pxor 4(%ebp,%esi,8),%mm5 pxor 3(%ebp,%edi,8),%mm6 - movb %bl,%cl - movb %bh,%dl movl 84(%esp),%ebx leal (%ecx,%ecx,1),%esi + movzbl %al,%ecx leal (%edx,%edx,1),%edi + movzbl %ah,%edx pxor 2(%ebp,%esi,8),%mm7 pxor 1(%ebp,%edi,8),%mm0 - movb %al,%cl - movb %ah,%dl + shrl $16,%eax leal (%ecx,%ecx,1),%esi + movzbl %al,%ecx leal (%edx,%edx,1),%edi - shrl $16,%eax + movzbl %ah,%edx pxor (%ebp,%esi,8),%mm2 pxor 7(%ebp,%edi,8),%mm3 - movb %al,%cl - movb %ah,%dl movl 88(%esp),%eax leal (%ecx,%ecx,1),%esi + movzbl %bl,%ecx leal (%edx,%edx,1),%edi + movzbl %bh,%edx pxor 6(%ebp,%esi,8),%mm4 pxor 5(%ebp,%edi,8),%mm5 - movb %bl,%cl - movb %bh,%dl + shrl $16,%ebx leal (%ecx,%ecx,1),%esi + movzbl %bl,%ecx leal (%edx,%edx,1),%edi - shrl $16,%ebx + movzbl %bh,%edx pxor 4(%ebp,%esi,8),%mm6 pxor 3(%ebp,%edi,8),%mm7 - movb %bl,%cl - movb %bh,%dl movl 92(%esp),%ebx leal (%ecx,%ecx,1),%esi + movzbl %al,%ecx leal (%edx,%edx,1),%edi + movzbl %ah,%edx pxor 2(%ebp,%esi,8),%mm0 pxor 1(%ebp,%edi,8),%mm1 - movb %al,%cl - movb %ah,%dl + shrl $16,%eax leal (%ecx,%ecx,1),%esi + movzbl %al,%ecx leal (%edx,%edx,1),%edi - shrl $16,%eax + movzbl %ah,%edx pxor (%ebp,%esi,8),%mm3 pxor 7(%ebp,%edi,8),%mm4 - movb %al,%cl - movb %ah,%dl movl 96(%esp),%eax leal (%ecx,%ecx,1),%esi + movzbl %bl,%ecx leal (%edx,%edx,1),%edi + movzbl %bh,%edx pxor 6(%ebp,%esi,8),%mm5 pxor 5(%ebp,%edi,8),%mm6 - movb %bl,%cl - movb %bh,%dl + shrl $16,%ebx leal (%ecx,%ecx,1),%esi + movzbl %bl,%ecx leal (%edx,%edx,1),%edi - shrl $16,%ebx + movzbl %bh,%edx pxor 4(%ebp,%esi,8),%mm7 pxor 3(%ebp,%edi,8),%mm0 - movb %bl,%cl - movb %bh,%dl movl 100(%esp),%ebx leal (%ecx,%ecx,1),%esi + movzbl %al,%ecx leal (%edx,%edx,1),%edi + movzbl %ah,%edx pxor 2(%ebp,%esi,8),%mm1 pxor 1(%ebp,%edi,8),%mm2 - movb %al,%cl - movb %ah,%dl + shrl $16,%eax leal (%ecx,%ecx,1),%esi + movzbl %al,%ecx leal (%edx,%edx,1),%edi - shrl $16,%eax + movzbl %ah,%edx pxor (%ebp,%esi,8),%mm4 pxor 7(%ebp,%edi,8),%mm5 - movb %al,%cl - movb %ah,%dl movl 104(%esp),%eax leal (%ecx,%ecx,1),%esi + movzbl %bl,%ecx leal (%edx,%edx,1),%edi + movzbl %bh,%edx pxor 6(%ebp,%esi,8),%mm6 pxor 5(%ebp,%edi,8),%mm7 - movb %bl,%cl - movb %bh,%dl + shrl $16,%ebx leal (%ecx,%ecx,1),%esi + movzbl %bl,%ecx leal (%edx,%edx,1),%edi - shrl $16,%ebx + movzbl %bh,%edx pxor 4(%ebp,%esi,8),%mm0 pxor 3(%ebp,%edi,8),%mm1 - movb %bl,%cl - movb %bh,%dl movl 108(%esp),%ebx leal (%ecx,%ecx,1),%esi + movzbl %al,%ecx leal (%edx,%edx,1),%edi + movzbl %ah,%edx pxor 2(%ebp,%esi,8),%mm2 pxor 1(%ebp,%edi,8),%mm3 - movb %al,%cl - movb %ah,%dl + shrl $16,%eax leal (%ecx,%ecx,1),%esi + movzbl %al,%ecx leal (%edx,%edx,1),%edi - shrl $16,%eax + movzbl %ah,%edx pxor (%ebp,%esi,8),%mm5 pxor 7(%ebp,%edi,8),%mm6 - movb %al,%cl - movb %ah,%dl movl 112(%esp),%eax leal (%ecx,%ecx,1),%esi + movzbl %bl,%ecx leal (%edx,%edx,1),%edi + movzbl %bh,%edx pxor 6(%ebp,%esi,8),%mm7 pxor 5(%ebp,%edi,8),%mm0 - movb %bl,%cl - movb %bh,%dl + shrl $16,%ebx leal (%ecx,%ecx,1),%esi + movzbl %bl,%ecx leal (%edx,%edx,1),%edi - shrl $16,%ebx + movzbl %bh,%edx pxor 4(%ebp,%esi,8),%mm1 pxor 3(%ebp,%edi,8),%mm2 - movb %bl,%cl - movb %bh,%dl movl 116(%esp),%ebx leal (%ecx,%ecx,1),%esi + movzbl %al,%ecx leal (%edx,%edx,1),%edi + movzbl %ah,%edx pxor 2(%ebp,%esi,8),%mm3 pxor 1(%ebp,%edi,8),%mm4 - movb %al,%cl - movb %ah,%dl + shrl $16,%eax leal (%ecx,%ecx,1),%esi + movzbl %al,%ecx leal (%edx,%edx,1),%edi - shrl $16,%eax + movzbl %ah,%edx pxor (%ebp,%esi,8),%mm6 pxor 7(%ebp,%edi,8),%mm7 - movb %al,%cl - movb %ah,%dl movl 120(%esp),%eax leal (%ecx,%ecx,1),%esi + movzbl %bl,%ecx leal (%edx,%edx,1),%edi + movzbl %bh,%edx pxor 6(%ebp,%esi,8),%mm0 pxor 5(%ebp,%edi,8),%mm1 - movb %bl,%cl - movb %bh,%dl + shrl $16,%ebx leal (%ecx,%ecx,1),%esi + movzbl %bl,%ecx leal (%edx,%edx,1),%edi - shrl $16,%ebx + movzbl %bh,%edx pxor 4(%ebp,%esi,8),%mm2 pxor 3(%ebp,%edi,8),%mm3 - movb %bl,%cl - movb %bh,%dl movl 124(%esp),%ebx leal (%ecx,%ecx,1),%esi + movzbl %al,%ecx leal (%edx,%edx,1),%edi + movzbl %ah,%edx pxor 2(%ebp,%esi,8),%mm4 pxor 1(%ebp,%edi,8),%mm5 - movb %al,%cl - movb %ah,%dl + shrl $16,%eax leal (%ecx,%ecx,1),%esi + movzbl %al,%ecx leal (%edx,%edx,1),%edi - shrl $16,%eax + movzbl %ah,%edx pxor (%ebp,%esi,8),%mm7 pxor 7(%ebp,%edi,8),%mm0 - movb %al,%cl - movb %ah,%dl leal (%ecx,%ecx,1),%esi + movzbl %bl,%ecx leal (%edx,%edx,1),%edi + movzbl %bh,%edx pxor 6(%ebp,%esi,8),%mm1 pxor 5(%ebp,%edi,8),%mm2 - movb %bl,%cl - movb %bh,%dl + shrl $16,%ebx leal (%ecx,%ecx,1),%esi + movzbl %bl,%ecx leal (%edx,%edx,1),%edi - shrl $16,%ebx + movzbl %bh,%edx pxor 4(%ebp,%esi,8),%mm3 pxor 3(%ebp,%edi,8),%mm4 - movb %bl,%cl - movb %bh,%dl leal (%ecx,%ecx,1),%esi + movzbl %al,%ecx leal (%edx,%edx,1),%edi + movzbl %ah,%edx pxor 2(%ebp,%esi,8),%mm5 pxor 1(%ebp,%edi,8),%mm6 leal 128(%esp),%ebx diff --git a/deps/openssl/asm/x86-macosx-gas/x86cpuid.s b/deps/openssl/asm/x86-macosx-gas/x86cpuid.s index 35fcafa6a4fd21..3db70b61139e72 100644 --- a/deps/openssl/asm/x86-macosx-gas/x86cpuid.s +++ b/deps/openssl/asm/x86-macosx-gas/x86cpuid.s @@ -21,6 +21,8 @@ L_OPENSSL_ia32_cpuid_begin: xorl %eax,%eax btl $21,%ecx jnc L000nocpuid + movl 20(%esp),%esi + movl %eax,8(%esi) .byte 0x0f,0xa2 movl %eax,%edi xorl %eax,%eax @@ -71,28 +73,36 @@ L_OPENSSL_ia32_cpuid_begin: andl $4026531839,%edx jmp L002generic L001intel: + cmpl $7,%edi + jb L003cacheinfo + movl 20(%esp),%esi + movl $7,%eax + xorl %ecx,%ecx + .byte 0x0f,0xa2 + movl %ebx,8(%esi) +L003cacheinfo: cmpl $4,%edi movl $-1,%edi - jb L003nocacheinfo + jb L004nocacheinfo movl $4,%eax movl $0,%ecx .byte 0x0f,0xa2 movl %eax,%edi shrl $14,%edi andl $4095,%edi -L003nocacheinfo: +L004nocacheinfo: movl $1,%eax xorl %ecx,%ecx .byte 0x0f,0xa2 andl $3220176895,%edx cmpl $0,%ebp - jne L004notintel + jne L005notintel orl $1073741824,%edx andb $15,%ah cmpb $15,%ah - jne L004notintel + jne L005notintel orl $1048576,%edx -L004notintel: +L005notintel: btl $28,%edx jnc L002generic andl $4026531839,%edx @@ -109,20 +119,22 @@ L002generic: movl %edx,%esi orl %ecx,%ebp btl $27,%ecx - jnc L005clear_avx + jnc L006clear_avx xorl %ecx,%ecx .byte 15,1,208 andl $6,%eax cmpl $6,%eax - je L006done + je L007done cmpl $2,%eax - je L005clear_avx -L007clear_xmm: + je L006clear_avx +L008clear_xmm: andl $4261412861,%ebp andl $4278190079,%esi -L005clear_avx: +L006clear_avx: andl $4026525695,%ebp -L006done: + movl 20(%esp),%edi + andl $4294967263,8(%edi) +L007done: movl %esi,%eax movl %ebp,%edx L000nocpuid: @@ -137,32 +149,32 @@ _OPENSSL_rdtsc: L_OPENSSL_rdtsc_begin: xorl %eax,%eax xorl %edx,%edx - call L008PIC_me_up -L008PIC_me_up: + call L009PIC_me_up +L009PIC_me_up: popl %ecx - movl L_OPENSSL_ia32cap_P$non_lazy_ptr-L008PIC_me_up(%ecx),%ecx + movl L_OPENSSL_ia32cap_P$non_lazy_ptr-L009PIC_me_up(%ecx),%ecx btl $4,(%ecx) - jnc L009notsc + jnc L010notsc .byte 0x0f,0x31 -L009notsc: +L010notsc: ret .globl _OPENSSL_instrument_halt .align 4 _OPENSSL_instrument_halt: L_OPENSSL_instrument_halt_begin: - call L010PIC_me_up -L010PIC_me_up: + call L011PIC_me_up +L011PIC_me_up: popl %ecx - movl L_OPENSSL_ia32cap_P$non_lazy_ptr-L010PIC_me_up(%ecx),%ecx + movl L_OPENSSL_ia32cap_P$non_lazy_ptr-L011PIC_me_up(%ecx),%ecx btl $4,(%ecx) - jnc L011nohalt + jnc L012nohalt .long 2421723150 andl $3,%eax - jnz L011nohalt + jnz L012nohalt pushfl popl %eax btl $9,%eax - jnc L011nohalt + jnc L012nohalt .byte 0x0f,0x31 pushl %edx pushl %eax @@ -172,7 +184,7 @@ L010PIC_me_up: sbbl 4(%esp),%edx addl $8,%esp ret -L011nohalt: +L012nohalt: xorl %eax,%eax xorl %edx,%edx ret @@ -183,21 +195,21 @@ L_OPENSSL_far_spin_begin: pushfl popl %eax btl $9,%eax - jnc L012nospin + jnc L013nospin movl 4(%esp),%eax movl 8(%esp),%ecx .long 2430111262 xorl %eax,%eax movl (%ecx),%edx - jmp L013spin + jmp L014spin .align 4,0x90 -L013spin: +L014spin: incl %eax cmpl (%ecx),%edx - je L013spin + je L014spin .long 529567888 ret -L012nospin: +L013nospin: xorl %eax,%eax xorl %edx,%edx ret @@ -207,15 +219,27 @@ _OPENSSL_wipe_cpu: L_OPENSSL_wipe_cpu_begin: xorl %eax,%eax xorl %edx,%edx - call L014PIC_me_up -L014PIC_me_up: + call L015PIC_me_up +L015PIC_me_up: popl %ecx - movl L_OPENSSL_ia32cap_P$non_lazy_ptr-L014PIC_me_up(%ecx),%ecx + movl L_OPENSSL_ia32cap_P$non_lazy_ptr-L015PIC_me_up(%ecx),%ecx movl (%ecx),%ecx btl $1,(%ecx) - jnc L015no_x87 + jnc L016no_x87 + andl $83886080,%ecx + cmpl $83886080,%ecx + jne L017no_sse2 + pxor %xmm0,%xmm0 + pxor %xmm1,%xmm1 + pxor %xmm2,%xmm2 + pxor %xmm3,%xmm3 + pxor %xmm4,%xmm4 + pxor %xmm5,%xmm5 + pxor %xmm6,%xmm6 + pxor %xmm7,%xmm7 +L017no_sse2: .long 4007259865,4007259865,4007259865,4007259865,2430851995 -L015no_x87: +L016no_x87: leal 4(%esp),%eax ret .globl _OPENSSL_atomic_add @@ -227,11 +251,11 @@ L_OPENSSL_atomic_add_begin: pushl %ebx nop movl (%edx),%eax -L016spin: +L018spin: leal (%eax,%ecx,1),%ebx nop .long 447811568 - jne L016spin + jne L018spin movl %ebx,%eax popl %ebx ret @@ -268,43 +292,56 @@ L_OPENSSL_cleanse_begin: movl 8(%esp),%ecx xorl %eax,%eax cmpl $7,%ecx - jae L017lot + jae L019lot cmpl $0,%ecx - je L018ret -L019little: + je L020ret +L021little: movb %al,(%edx) subl $1,%ecx leal 1(%edx),%edx - jnz L019little -L018ret: + jnz L021little +L020ret: ret .align 4,0x90 -L017lot: +L019lot: testl $3,%edx - jz L020aligned + jz L022aligned movb %al,(%edx) leal -1(%ecx),%ecx leal 1(%edx),%edx - jmp L017lot -L020aligned: + jmp L019lot +L022aligned: movl %eax,(%edx) leal -4(%ecx),%ecx testl $-4,%ecx leal 4(%edx),%edx - jnz L020aligned + jnz L022aligned cmpl $0,%ecx - jne L019little + jne L021little ret .globl _OPENSSL_ia32_rdrand .align 4 _OPENSSL_ia32_rdrand: L_OPENSSL_ia32_rdrand_begin: movl $8,%ecx -L021loop: +L023loop: .byte 15,199,240 - jc L022break - loop L021loop -L022break: + jc L024break + loop L023loop +L024break: + cmpl $0,%eax + cmovel %ecx,%eax + ret +.globl _OPENSSL_ia32_rdseed +.align 4 +_OPENSSL_ia32_rdseed: +L_OPENSSL_ia32_rdseed_begin: + movl $8,%ecx +L025loop: +.byte 15,199,248 + jc L026break + loop L025loop +L026break: cmpl $0,%eax cmovel %ecx,%eax ret @@ -312,7 +349,7 @@ L022break: L_OPENSSL_ia32cap_P$non_lazy_ptr: .indirect_symbol _OPENSSL_ia32cap_P .long 0 -.comm _OPENSSL_ia32cap_P,8,2 +.comm _OPENSSL_ia32cap_P,16,2 .mod_init_func .align 2 .long _OPENSSL_cpuid_setup diff --git a/deps/openssl/asm/x86-win32-masm/aes/aes-586.asm b/deps/openssl/asm/x86-win32-masm/aes/aes-586.asm index 5e53b1c2f3779d..ea853704c7458e 100644 --- a/deps/openssl/asm/x86-win32-masm/aes/aes-586.asm +++ b/deps/openssl/asm/x86-win32-masm/aes/aes-586.asm @@ -112,74 +112,78 @@ $L000loop: shl ecx,24 xor edx,ecx mov ecx,esi - mov esi,ecx - and esi,2155905152 - mov ebp,esi - shr ebp,7 + mov ebp,2155905152 + and ebp,ecx lea edi,DWORD PTR [ecx*1+ecx] - sub esi,ebp + mov esi,ebp + shr ebp,7 and edi,4278124286 - and esi,454761243 + sub esi,ebp mov ebp,ecx + and esi,454761243 + ror ebp,16 xor esi,edi + mov edi,ecx xor ecx,esi + ror edi,24 + xor esi,ebp rol ecx,24 + xor esi,edi + mov ebp,2155905152 xor ecx,esi - ror ebp,16 - xor ecx,ebp - ror ebp,8 - xor ecx,ebp - mov esi,edx - and esi,2155905152 - mov ebp,esi - shr ebp,7 + and ebp,edx lea edi,DWORD PTR [edx*1+edx] - sub esi,ebp + mov esi,ebp + shr ebp,7 and edi,4278124286 - and esi,454761243 + sub esi,ebp mov ebp,edx + and esi,454761243 + ror ebp,16 xor esi,edi + mov edi,edx xor edx,esi + ror edi,24 + xor esi,ebp rol edx,24 + xor esi,edi + mov ebp,2155905152 xor edx,esi - ror ebp,16 - xor edx,ebp - ror ebp,8 - xor edx,ebp - mov esi,eax - and esi,2155905152 - mov ebp,esi - shr ebp,7 + and ebp,eax lea edi,DWORD PTR [eax*1+eax] - sub esi,ebp + mov esi,ebp + shr ebp,7 and edi,4278124286 - and esi,454761243 + sub esi,ebp mov ebp,eax + and esi,454761243 + ror ebp,16 xor esi,edi + mov edi,eax xor eax,esi + ror edi,24 + xor esi,ebp rol eax,24 + xor esi,edi + mov ebp,2155905152 xor eax,esi - ror ebp,16 - xor eax,ebp - ror ebp,8 - xor eax,ebp - mov esi,ebx - and esi,2155905152 - mov ebp,esi - shr ebp,7 + and ebp,ebx lea edi,DWORD PTR [ebx*1+ebx] - sub esi,ebp + mov esi,ebp + shr ebp,7 and edi,4278124286 - and esi,454761243 + sub esi,ebp mov ebp,ebx + and esi,454761243 + ror ebp,16 xor esi,edi + mov edi,ebx xor ebx,esi + ror edi,24 + xor esi,ebp rol ebx,24 + xor esi,edi xor ebx,esi - ror ebp,16 - xor ebx,ebp - ror ebp,8 - xor ebx,ebp mov edi,DWORD PTR 20[esp] mov ebp,DWORD PTR 28[esp] add edi,16 @@ -296,74 +300,76 @@ $L001loop: pshufw mm5,mm4,13 movd eax,mm1 movd ebx,mm5 + mov DWORD PTR 20[esp],edi movzx esi,al - movzx ecx,BYTE PTR [esi*1+ebp-128] - pshufw mm2,mm0,13 movzx edx,ah + pshufw mm2,mm0,13 + movzx ecx,BYTE PTR [esi*1+ebp-128] + movzx edi,bl movzx edx,BYTE PTR [edx*1+ebp-128] - shl edx,8 shr eax,16 - movzx esi,bl - movzx esi,BYTE PTR [esi*1+ebp-128] + shl edx,8 + movzx esi,BYTE PTR [edi*1+ebp-128] + movzx edi,bh shl esi,16 - or ecx,esi pshufw mm6,mm4,8 - movzx esi,bh - movzx esi,BYTE PTR [esi*1+ebp-128] + or ecx,esi + movzx esi,BYTE PTR [edi*1+ebp-128] + movzx edi,ah shl esi,24 - or edx,esi shr ebx,16 - movzx esi,ah - movzx esi,BYTE PTR [esi*1+ebp-128] + or edx,esi + movzx esi,BYTE PTR [edi*1+ebp-128] + movzx edi,bh shl esi,8 or ecx,esi - movzx esi,bh - movzx esi,BYTE PTR [esi*1+ebp-128] + movzx esi,BYTE PTR [edi*1+ebp-128] + movzx edi,al shl esi,24 or ecx,esi - movd mm0,ecx - movzx esi,al - movzx ecx,BYTE PTR [esi*1+ebp-128] + movzx esi,BYTE PTR [edi*1+ebp-128] + movzx edi,bl movd eax,mm2 - movzx esi,bl - movzx esi,BYTE PTR [esi*1+ebp-128] - shl esi,16 - or ecx,esi + movd mm0,ecx + movzx ecx,BYTE PTR [edi*1+ebp-128] + movzx edi,ah + shl ecx,16 movd ebx,mm6 - movzx esi,ah - movzx esi,BYTE PTR [esi*1+ebp-128] + or ecx,esi + movzx esi,BYTE PTR [edi*1+ebp-128] + movzx edi,bh shl esi,24 or ecx,esi - movzx esi,bh - movzx esi,BYTE PTR [esi*1+ebp-128] + movzx esi,BYTE PTR [edi*1+ebp-128] + movzx edi,bl shl esi,8 - or ecx,esi - movd mm1,ecx - movzx esi,bl - movzx ecx,BYTE PTR [esi*1+ebp-128] shr ebx,16 - movzx esi,al - movzx esi,BYTE PTR [esi*1+ebp-128] - shl esi,16 or ecx,esi + movzx esi,BYTE PTR [edi*1+ebp-128] + movzx edi,al shr eax,16 + movd mm1,ecx + movzx ecx,BYTE PTR [edi*1+ebp-128] + movzx edi,ah + shl ecx,16 + and eax,255 + or ecx,esi punpckldq mm0,mm1 - movzx esi,ah - movzx esi,BYTE PTR [esi*1+ebp-128] + movzx esi,BYTE PTR [edi*1+ebp-128] + movzx edi,bh shl esi,24 - or ecx,esi - and eax,255 + and ebx,255 movzx eax,BYTE PTR [eax*1+ebp-128] + or ecx,esi shl eax,16 + movzx esi,BYTE PTR [edi*1+ebp-128] or edx,eax - movzx esi,bh - movzx esi,BYTE PTR [esi*1+ebp-128] shl esi,8 - or ecx,esi - movd mm4,ecx - and ebx,255 movzx ebx,BYTE PTR [ebx*1+ebp-128] + or ecx,esi or edx,ebx + mov edi,DWORD PTR 20[esp] + movd mm4,ecx movd mm5,edx punpckldq mm4,mm5 add edi,16 @@ -1128,28 +1134,28 @@ $L006loop: movzx eax,BYTE PTR [eax*1+ebp-128] shl eax,24 xor edx,eax - mov esi,ecx - and esi,2155905152 - mov edi,esi + mov edi,2155905152 + and edi,ecx + mov esi,edi shr edi,7 lea eax,DWORD PTR [ecx*1+ecx] sub esi,edi and eax,4278124286 and esi,454761243 - xor esi,eax - mov eax,esi - and esi,2155905152 - mov edi,esi + xor eax,esi + mov edi,2155905152 + and edi,eax + mov esi,edi shr edi,7 lea ebx,DWORD PTR [eax*1+eax] sub esi,edi and ebx,4278124286 and esi,454761243 xor eax,ecx - xor esi,ebx - mov ebx,esi - and esi,2155905152 - mov edi,esi + xor ebx,esi + mov edi,2155905152 + and edi,ebx + mov esi,edi shr edi,7 lea ebp,DWORD PTR [ebx*1+ebx] sub esi,edi @@ -1160,39 +1166,39 @@ $L006loop: xor ebp,esi xor ecx,eax xor eax,ebp - rol eax,24 xor ecx,ebx xor ebx,ebp - rol ebx,16 + rol eax,24 xor ecx,ebp - rol ebp,8 + rol ebx,16 xor ecx,eax + rol ebp,8 xor ecx,ebx mov eax,DWORD PTR 4[esp] xor ecx,ebp mov DWORD PTR 12[esp],ecx - mov esi,edx - and esi,2155905152 - mov edi,esi + mov edi,2155905152 + and edi,edx + mov esi,edi shr edi,7 lea ebx,DWORD PTR [edx*1+edx] sub esi,edi and ebx,4278124286 and esi,454761243 - xor esi,ebx - mov ebx,esi - and esi,2155905152 - mov edi,esi + xor ebx,esi + mov edi,2155905152 + and edi,ebx + mov esi,edi shr edi,7 lea ecx,DWORD PTR [ebx*1+ebx] sub esi,edi and ecx,4278124286 and esi,454761243 xor ebx,edx - xor esi,ecx - mov ecx,esi - and esi,2155905152 - mov edi,esi + xor ecx,esi + mov edi,2155905152 + and edi,ecx + mov esi,edi shr edi,7 lea ebp,DWORD PTR [ecx*1+ecx] sub esi,edi @@ -1203,39 +1209,39 @@ $L006loop: xor ebp,esi xor edx,ebx xor ebx,ebp - rol ebx,24 xor edx,ecx xor ecx,ebp - rol ecx,16 + rol ebx,24 xor edx,ebp - rol ebp,8 + rol ecx,16 xor edx,ebx + rol ebp,8 xor edx,ecx mov ebx,DWORD PTR 8[esp] xor edx,ebp mov DWORD PTR 16[esp],edx - mov esi,eax - and esi,2155905152 - mov edi,esi + mov edi,2155905152 + and edi,eax + mov esi,edi shr edi,7 lea ecx,DWORD PTR [eax*1+eax] sub esi,edi and ecx,4278124286 and esi,454761243 - xor esi,ecx - mov ecx,esi - and esi,2155905152 - mov edi,esi + xor ecx,esi + mov edi,2155905152 + and edi,ecx + mov esi,edi shr edi,7 lea edx,DWORD PTR [ecx*1+ecx] sub esi,edi and edx,4278124286 and esi,454761243 xor ecx,eax - xor esi,edx - mov edx,esi - and esi,2155905152 - mov edi,esi + xor edx,esi + mov edi,2155905152 + and edi,edx + mov esi,edi shr edi,7 lea ebp,DWORD PTR [edx*1+edx] sub esi,edi @@ -1246,37 +1252,37 @@ $L006loop: xor ebp,esi xor eax,ecx xor ecx,ebp - rol ecx,24 xor eax,edx xor edx,ebp - rol edx,16 + rol ecx,24 xor eax,ebp - rol ebp,8 + rol edx,16 xor eax,ecx + rol ebp,8 xor eax,edx xor eax,ebp - mov esi,ebx - and esi,2155905152 - mov edi,esi + mov edi,2155905152 + and edi,ebx + mov esi,edi shr edi,7 lea ecx,DWORD PTR [ebx*1+ebx] sub esi,edi and ecx,4278124286 and esi,454761243 - xor esi,ecx - mov ecx,esi - and esi,2155905152 - mov edi,esi + xor ecx,esi + mov edi,2155905152 + and edi,ecx + mov esi,edi shr edi,7 lea edx,DWORD PTR [ecx*1+ecx] sub esi,edi and edx,4278124286 and esi,454761243 xor ecx,ebx - xor esi,edx - mov edx,esi - and esi,2155905152 - mov edi,esi + xor edx,esi + mov edi,2155905152 + and edi,edx + mov esi,edi shr edi,7 lea ebp,DWORD PTR [edx*1+edx] sub esi,edi @@ -1287,13 +1293,13 @@ $L006loop: xor ebp,esi xor ebx,ecx xor ecx,ebp - rol ecx,24 xor ebx,edx xor edx,ebp - rol edx,16 + rol ecx,24 xor ebx,ebp - rol ebp,8 + rol edx,16 xor ebx,ecx + rol ebp,8 xor ebx,edx mov ecx,DWORD PTR 12[esp] xor ebx,ebp @@ -1411,77 +1417,79 @@ __sse_AES_decrypt_compact PROC PRIVATE ALIGN 16 $L007loop: pshufw mm1,mm0,12 - movd eax,mm1 pshufw mm5,mm4,9 - movzx esi,al - movzx ecx,BYTE PTR [esi*1+ebp-128] + movd eax,mm1 movd ebx,mm5 + mov DWORD PTR 20[esp],edi + movzx esi,al movzx edx,ah + pshufw mm2,mm0,6 + movzx ecx,BYTE PTR [esi*1+ebp-128] + movzx edi,bl movzx edx,BYTE PTR [edx*1+ebp-128] + shr eax,16 shl edx,8 - pshufw mm2,mm0,6 - movzx esi,bl - movzx esi,BYTE PTR [esi*1+ebp-128] + movzx esi,BYTE PTR [edi*1+ebp-128] + movzx edi,bh shl esi,16 + pshufw mm6,mm4,3 or ecx,esi - shr eax,16 - movzx esi,bh - movzx esi,BYTE PTR [esi*1+ebp-128] + movzx esi,BYTE PTR [edi*1+ebp-128] + movzx edi,ah shl esi,24 - or edx,esi shr ebx,16 - pshufw mm6,mm4,3 - movzx esi,ah - movzx esi,BYTE PTR [esi*1+ebp-128] + or edx,esi + movzx esi,BYTE PTR [edi*1+ebp-128] + movzx edi,bh shl esi,24 or ecx,esi - movzx esi,bh - movzx esi,BYTE PTR [esi*1+ebp-128] + movzx esi,BYTE PTR [edi*1+ebp-128] + movzx edi,al shl esi,8 - or ecx,esi - movd mm0,ecx - movzx esi,al movd eax,mm2 - movzx ecx,BYTE PTR [esi*1+ebp-128] - shl ecx,16 - movzx esi,bl + or ecx,esi + movzx esi,BYTE PTR [edi*1+ebp-128] + movzx edi,bl + shl esi,16 movd ebx,mm6 - movzx esi,BYTE PTR [esi*1+ebp-128] + movd mm0,ecx + movzx ecx,BYTE PTR [edi*1+ebp-128] + movzx edi,al or ecx,esi - movzx esi,al - movzx esi,BYTE PTR [esi*1+ebp-128] + movzx esi,BYTE PTR [edi*1+ebp-128] + movzx edi,bl or edx,esi - movzx esi,bl - movzx esi,BYTE PTR [esi*1+ebp-128] + movzx esi,BYTE PTR [edi*1+ebp-128] + movzx edi,ah shl esi,16 - or edx,esi - movd mm1,edx - movzx esi,ah - movzx edx,BYTE PTR [esi*1+ebp-128] - shl edx,8 - movzx esi,bh shr eax,16 - movzx esi,BYTE PTR [esi*1+ebp-128] - shl esi,24 or edx,esi + movzx esi,BYTE PTR [edi*1+ebp-128] + movzx edi,bh shr ebx,16 - punpckldq mm0,mm1 - movzx esi,bh - movzx esi,BYTE PTR [esi*1+ebp-128] shl esi,8 - or ecx,esi + movd mm1,edx + movzx edx,BYTE PTR [edi*1+ebp-128] + movzx edi,bh + shl edx,24 and ebx,255 + or edx,esi + punpckldq mm0,mm1 + movzx esi,BYTE PTR [edi*1+ebp-128] + movzx edi,al + shl esi,8 + movzx eax,ah movzx ebx,BYTE PTR [ebx*1+ebp-128] + or ecx,esi + movzx esi,BYTE PTR [edi*1+ebp-128] or edx,ebx - movzx esi,al - movzx esi,BYTE PTR [esi*1+ebp-128] shl esi,16 - or edx,esi - movd mm4,edx - movzx eax,ah movzx eax,BYTE PTR [eax*1+ebp-128] + or edx,esi shl eax,24 or ecx,eax + mov edi,DWORD PTR 20[esp] + movd mm4,edx movd mm5,ecx punpckldq mm4,mm5 add edi,16 @@ -3029,30 +3037,30 @@ $L055invert: ALIGN 4 $L056permute: add edi,16 - mov esi,eax - and esi,2155905152 - mov ebp,esi - shr ebp,7 + mov ebp,2155905152 + and ebp,eax lea ebx,DWORD PTR [eax*1+eax] + mov esi,ebp + shr ebp,7 sub esi,ebp and ebx,4278124286 and esi,454761243 - xor esi,ebx - mov ebx,esi - and esi,2155905152 - mov ebp,esi - shr ebp,7 + xor ebx,esi + mov ebp,2155905152 + and ebp,ebx lea ecx,DWORD PTR [ebx*1+ebx] + mov esi,ebp + shr ebp,7 sub esi,ebp and ecx,4278124286 and esi,454761243 xor ebx,eax - xor esi,ecx - mov ecx,esi - and esi,2155905152 - mov ebp,esi - shr ebp,7 + xor ecx,esi + mov ebp,2155905152 + and ebp,ecx lea edx,DWORD PTR [ecx*1+ecx] + mov esi,ebp + shr ebp,7 xor ecx,eax sub esi,ebp and edx,4278124286 @@ -3073,30 +3081,30 @@ $L056permute: mov ebx,ebp xor eax,edx mov DWORD PTR [edi],eax - mov esi,ebx - and esi,2155905152 - mov ebp,esi - shr ebp,7 + mov ebp,2155905152 + and ebp,ebx lea ecx,DWORD PTR [ebx*1+ebx] + mov esi,ebp + shr ebp,7 sub esi,ebp and ecx,4278124286 and esi,454761243 - xor esi,ecx - mov ecx,esi - and esi,2155905152 - mov ebp,esi - shr ebp,7 + xor ecx,esi + mov ebp,2155905152 + and ebp,ecx lea edx,DWORD PTR [ecx*1+ecx] + mov esi,ebp + shr ebp,7 sub esi,ebp and edx,4278124286 and esi,454761243 xor ecx,ebx - xor esi,edx - mov edx,esi - and esi,2155905152 - mov ebp,esi - shr ebp,7 + xor edx,esi + mov ebp,2155905152 + and ebp,edx lea eax,DWORD PTR [edx*1+edx] + mov esi,ebp + shr ebp,7 xor edx,ebx sub esi,ebp and eax,4278124286 @@ -3117,30 +3125,30 @@ $L056permute: mov ecx,ebp xor ebx,eax mov DWORD PTR 4[edi],ebx - mov esi,ecx - and esi,2155905152 - mov ebp,esi - shr ebp,7 + mov ebp,2155905152 + and ebp,ecx lea edx,DWORD PTR [ecx*1+ecx] + mov esi,ebp + shr ebp,7 sub esi,ebp and edx,4278124286 and esi,454761243 - xor esi,edx - mov edx,esi - and esi,2155905152 - mov ebp,esi - shr ebp,7 + xor edx,esi + mov ebp,2155905152 + and ebp,edx lea eax,DWORD PTR [edx*1+edx] + mov esi,ebp + shr ebp,7 sub esi,ebp and eax,4278124286 and esi,454761243 xor edx,ecx - xor esi,eax - mov eax,esi - and esi,2155905152 - mov ebp,esi - shr ebp,7 + xor eax,esi + mov ebp,2155905152 + and ebp,eax lea ebx,DWORD PTR [eax*1+eax] + mov esi,ebp + shr ebp,7 xor eax,ecx sub esi,ebp and ebx,4278124286 @@ -3161,30 +3169,30 @@ $L056permute: mov edx,ebp xor ecx,ebx mov DWORD PTR 8[edi],ecx - mov esi,edx - and esi,2155905152 - mov ebp,esi - shr ebp,7 + mov ebp,2155905152 + and ebp,edx lea eax,DWORD PTR [edx*1+edx] + mov esi,ebp + shr ebp,7 sub esi,ebp and eax,4278124286 and esi,454761243 - xor esi,eax - mov eax,esi - and esi,2155905152 - mov ebp,esi - shr ebp,7 + xor eax,esi + mov ebp,2155905152 + and ebp,eax lea ebx,DWORD PTR [eax*1+eax] + mov esi,ebp + shr ebp,7 sub esi,ebp and ebx,4278124286 and esi,454761243 xor eax,edx - xor esi,ebx - mov ebx,esi - and esi,2155905152 - mov ebp,esi - shr ebp,7 + xor ebx,esi + mov ebp,2155905152 + and ebp,ebx lea ecx,DWORD PTR [ebx*1+ebx] + mov esi,ebp + shr ebp,7 xor ebx,edx sub esi,ebp and ecx,4278124286 @@ -3219,6 +3227,6 @@ DB 80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114 DB 111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 .text$ ENDS .bss SEGMENT 'BSS' -COMM _OPENSSL_ia32cap_P:QWORD +COMM _OPENSSL_ia32cap_P:DWORD:4 .bss ENDS END diff --git a/deps/openssl/asm/x86-win32-masm/aes/aesni-x86.asm b/deps/openssl/asm/x86-win32-masm/aes/aesni-x86.asm index a1602cc692f7ae..43fdb5a0345e93 100644 --- a/deps/openssl/asm/x86-win32-masm/aes/aesni-x86.asm +++ b/deps/openssl/asm/x86-win32-masm/aes/aesni-x86.asm @@ -62,27 +62,80 @@ DB 102,15,56,223,209 ret _aesni_decrypt ENDP ALIGN 16 +__aesni_encrypt2 PROC PRIVATE + movups xmm0,XMMWORD PTR [edx] + shl ecx,4 + movups xmm1,XMMWORD PTR 16[edx] + xorps xmm2,xmm0 + pxor xmm3,xmm0 + movups xmm0,XMMWORD PTR 32[edx] + lea edx,DWORD PTR 32[ecx*1+edx] + neg ecx + add ecx,16 +$L002enc2_loop: +DB 102,15,56,220,209 +DB 102,15,56,220,217 + movups xmm1,XMMWORD PTR [ecx*1+edx] + add ecx,32 +DB 102,15,56,220,208 +DB 102,15,56,220,216 + movups xmm0,XMMWORD PTR [ecx*1+edx-16] + jnz $L002enc2_loop +DB 102,15,56,220,209 +DB 102,15,56,220,217 +DB 102,15,56,221,208 +DB 102,15,56,221,216 + ret +__aesni_encrypt2 ENDP +ALIGN 16 +__aesni_decrypt2 PROC PRIVATE + movups xmm0,XMMWORD PTR [edx] + shl ecx,4 + movups xmm1,XMMWORD PTR 16[edx] + xorps xmm2,xmm0 + pxor xmm3,xmm0 + movups xmm0,XMMWORD PTR 32[edx] + lea edx,DWORD PTR 32[ecx*1+edx] + neg ecx + add ecx,16 +$L003dec2_loop: +DB 102,15,56,222,209 +DB 102,15,56,222,217 + movups xmm1,XMMWORD PTR [ecx*1+edx] + add ecx,32 +DB 102,15,56,222,208 +DB 102,15,56,222,216 + movups xmm0,XMMWORD PTR [ecx*1+edx-16] + jnz $L003dec2_loop +DB 102,15,56,222,209 +DB 102,15,56,222,217 +DB 102,15,56,223,208 +DB 102,15,56,223,216 + ret +__aesni_decrypt2 ENDP +ALIGN 16 __aesni_encrypt3 PROC PRIVATE movups xmm0,XMMWORD PTR [edx] - shr ecx,1 + shl ecx,4 movups xmm1,XMMWORD PTR 16[edx] - lea edx,DWORD PTR 32[edx] xorps xmm2,xmm0 pxor xmm3,xmm0 pxor xmm4,xmm0 - movups xmm0,XMMWORD PTR [edx] -$L002enc3_loop: + movups xmm0,XMMWORD PTR 32[edx] + lea edx,DWORD PTR 32[ecx*1+edx] + neg ecx + add ecx,16 +$L004enc3_loop: DB 102,15,56,220,209 DB 102,15,56,220,217 - dec ecx DB 102,15,56,220,225 - movups xmm1,XMMWORD PTR 16[edx] + movups xmm1,XMMWORD PTR [ecx*1+edx] + add ecx,32 DB 102,15,56,220,208 DB 102,15,56,220,216 - lea edx,DWORD PTR 32[edx] DB 102,15,56,220,224 - movups xmm0,XMMWORD PTR [edx] - jnz $L002enc3_loop + movups xmm0,XMMWORD PTR [ecx*1+edx-16] + jnz $L004enc3_loop DB 102,15,56,220,209 DB 102,15,56,220,217 DB 102,15,56,220,225 @@ -94,25 +147,26 @@ __aesni_encrypt3 ENDP ALIGN 16 __aesni_decrypt3 PROC PRIVATE movups xmm0,XMMWORD PTR [edx] - shr ecx,1 + shl ecx,4 movups xmm1,XMMWORD PTR 16[edx] - lea edx,DWORD PTR 32[edx] xorps xmm2,xmm0 pxor xmm3,xmm0 pxor xmm4,xmm0 - movups xmm0,XMMWORD PTR [edx] -$L003dec3_loop: + movups xmm0,XMMWORD PTR 32[edx] + lea edx,DWORD PTR 32[ecx*1+edx] + neg ecx + add ecx,16 +$L005dec3_loop: DB 102,15,56,222,209 DB 102,15,56,222,217 - dec ecx DB 102,15,56,222,225 - movups xmm1,XMMWORD PTR 16[edx] + movups xmm1,XMMWORD PTR [ecx*1+edx] + add ecx,32 DB 102,15,56,222,208 DB 102,15,56,222,216 - lea edx,DWORD PTR 32[edx] DB 102,15,56,222,224 - movups xmm0,XMMWORD PTR [edx] - jnz $L003dec3_loop + movups xmm0,XMMWORD PTR [ecx*1+edx-16] + jnz $L005dec3_loop DB 102,15,56,222,209 DB 102,15,56,222,217 DB 102,15,56,222,225 @@ -125,27 +179,29 @@ ALIGN 16 __aesni_encrypt4 PROC PRIVATE movups xmm0,XMMWORD PTR [edx] movups xmm1,XMMWORD PTR 16[edx] - shr ecx,1 - lea edx,DWORD PTR 32[edx] + shl ecx,4 xorps xmm2,xmm0 pxor xmm3,xmm0 pxor xmm4,xmm0 pxor xmm5,xmm0 - movups xmm0,XMMWORD PTR [edx] -$L004enc4_loop: + movups xmm0,XMMWORD PTR 32[edx] + lea edx,DWORD PTR 32[ecx*1+edx] + neg ecx +DB 15,31,64,0 + add ecx,16 +$L006enc4_loop: DB 102,15,56,220,209 DB 102,15,56,220,217 - dec ecx DB 102,15,56,220,225 DB 102,15,56,220,233 - movups xmm1,XMMWORD PTR 16[edx] + movups xmm1,XMMWORD PTR [ecx*1+edx] + add ecx,32 DB 102,15,56,220,208 DB 102,15,56,220,216 - lea edx,DWORD PTR 32[edx] DB 102,15,56,220,224 DB 102,15,56,220,232 - movups xmm0,XMMWORD PTR [edx] - jnz $L004enc4_loop + movups xmm0,XMMWORD PTR [ecx*1+edx-16] + jnz $L006enc4_loop DB 102,15,56,220,209 DB 102,15,56,220,217 DB 102,15,56,220,225 @@ -160,27 +216,29 @@ ALIGN 16 __aesni_decrypt4 PROC PRIVATE movups xmm0,XMMWORD PTR [edx] movups xmm1,XMMWORD PTR 16[edx] - shr ecx,1 - lea edx,DWORD PTR 32[edx] + shl ecx,4 xorps xmm2,xmm0 pxor xmm3,xmm0 pxor xmm4,xmm0 pxor xmm5,xmm0 - movups xmm0,XMMWORD PTR [edx] -$L005dec4_loop: + movups xmm0,XMMWORD PTR 32[edx] + lea edx,DWORD PTR 32[ecx*1+edx] + neg ecx +DB 15,31,64,0 + add ecx,16 +$L007dec4_loop: DB 102,15,56,222,209 DB 102,15,56,222,217 - dec ecx DB 102,15,56,222,225 DB 102,15,56,222,233 - movups xmm1,XMMWORD PTR 16[edx] + movups xmm1,XMMWORD PTR [ecx*1+edx] + add ecx,32 DB 102,15,56,222,208 DB 102,15,56,222,216 - lea edx,DWORD PTR 32[edx] DB 102,15,56,222,224 DB 102,15,56,222,232 - movups xmm0,XMMWORD PTR [edx] - jnz $L005dec4_loop + movups xmm0,XMMWORD PTR [ecx*1+edx-16] + jnz $L007dec4_loop DB 102,15,56,222,209 DB 102,15,56,222,217 DB 102,15,56,222,225 @@ -194,45 +252,44 @@ __aesni_decrypt4 ENDP ALIGN 16 __aesni_encrypt6 PROC PRIVATE movups xmm0,XMMWORD PTR [edx] - shr ecx,1 + shl ecx,4 movups xmm1,XMMWORD PTR 16[edx] - lea edx,DWORD PTR 32[edx] xorps xmm2,xmm0 pxor xmm3,xmm0 -DB 102,15,56,220,209 pxor xmm4,xmm0 -DB 102,15,56,220,217 +DB 102,15,56,220,209 pxor xmm5,xmm0 - dec ecx -DB 102,15,56,220,225 pxor xmm6,xmm0 -DB 102,15,56,220,233 +DB 102,15,56,220,217 + lea edx,DWORD PTR 32[ecx*1+edx] + neg ecx +DB 102,15,56,220,225 pxor xmm7,xmm0 + add ecx,16 +DB 102,15,56,220,233 DB 102,15,56,220,241 - movups xmm0,XMMWORD PTR [edx] DB 102,15,56,220,249 + movups xmm0,XMMWORD PTR [ecx*1+edx-16] jmp $L_aesni_encrypt6_enter ALIGN 16 -$L006enc6_loop: +$L008enc6_loop: DB 102,15,56,220,209 DB 102,15,56,220,217 - dec ecx DB 102,15,56,220,225 DB 102,15,56,220,233 DB 102,15,56,220,241 DB 102,15,56,220,249 -ALIGN 16 $L_aesni_encrypt6_enter:: - movups xmm1,XMMWORD PTR 16[edx] + movups xmm1,XMMWORD PTR [ecx*1+edx] + add ecx,32 DB 102,15,56,220,208 DB 102,15,56,220,216 - lea edx,DWORD PTR 32[edx] DB 102,15,56,220,224 DB 102,15,56,220,232 DB 102,15,56,220,240 DB 102,15,56,220,248 - movups xmm0,XMMWORD PTR [edx] - jnz $L006enc6_loop + movups xmm0,XMMWORD PTR [ecx*1+edx-16] + jnz $L008enc6_loop DB 102,15,56,220,209 DB 102,15,56,220,217 DB 102,15,56,220,225 @@ -250,45 +307,44 @@ __aesni_encrypt6 ENDP ALIGN 16 __aesni_decrypt6 PROC PRIVATE movups xmm0,XMMWORD PTR [edx] - shr ecx,1 + shl ecx,4 movups xmm1,XMMWORD PTR 16[edx] - lea edx,DWORD PTR 32[edx] xorps xmm2,xmm0 pxor xmm3,xmm0 -DB 102,15,56,222,209 pxor xmm4,xmm0 -DB 102,15,56,222,217 +DB 102,15,56,222,209 pxor xmm5,xmm0 - dec ecx -DB 102,15,56,222,225 pxor xmm6,xmm0 -DB 102,15,56,222,233 +DB 102,15,56,222,217 + lea edx,DWORD PTR 32[ecx*1+edx] + neg ecx +DB 102,15,56,222,225 pxor xmm7,xmm0 + add ecx,16 +DB 102,15,56,222,233 DB 102,15,56,222,241 - movups xmm0,XMMWORD PTR [edx] DB 102,15,56,222,249 + movups xmm0,XMMWORD PTR [ecx*1+edx-16] jmp $L_aesni_decrypt6_enter ALIGN 16 -$L007dec6_loop: +$L009dec6_loop: DB 102,15,56,222,209 DB 102,15,56,222,217 - dec ecx DB 102,15,56,222,225 DB 102,15,56,222,233 DB 102,15,56,222,241 DB 102,15,56,222,249 -ALIGN 16 $L_aesni_decrypt6_enter:: - movups xmm1,XMMWORD PTR 16[edx] + movups xmm1,XMMWORD PTR [ecx*1+edx] + add ecx,32 DB 102,15,56,222,208 DB 102,15,56,222,216 - lea edx,DWORD PTR 32[edx] DB 102,15,56,222,224 DB 102,15,56,222,232 DB 102,15,56,222,240 DB 102,15,56,222,248 - movups xmm0,XMMWORD PTR [edx] - jnz $L007dec6_loop + movups xmm0,XMMWORD PTR [ecx*1+edx-16] + jnz $L009dec6_loop DB 102,15,56,222,209 DB 102,15,56,222,217 DB 102,15,56,222,225 @@ -316,14 +372,14 @@ $L_aesni_ecb_encrypt_begin:: mov edx,DWORD PTR 32[esp] mov ebx,DWORD PTR 36[esp] and eax,-16 - jz $L008ecb_ret + jz $L010ecb_ret mov ecx,DWORD PTR 240[edx] test ebx,ebx - jz $L009ecb_decrypt + jz $L011ecb_decrypt mov ebp,edx mov ebx,ecx cmp eax,96 - jb $L010ecb_enc_tail + jb $L012ecb_enc_tail movdqu xmm2,XMMWORD PTR [esi] movdqu xmm3,XMMWORD PTR 16[esi] movdqu xmm4,XMMWORD PTR 32[esi] @@ -332,9 +388,9 @@ $L_aesni_ecb_encrypt_begin:: movdqu xmm7,XMMWORD PTR 80[esi] lea esi,DWORD PTR 96[esi] sub eax,96 - jmp $L011ecb_enc_loop6_enter + jmp $L013ecb_enc_loop6_enter ALIGN 16 -$L012ecb_enc_loop6: +$L014ecb_enc_loop6: movups XMMWORD PTR [edi],xmm2 movdqu xmm2,XMMWORD PTR [esi] movups XMMWORD PTR 16[edi],xmm3 @@ -349,12 +405,12 @@ $L012ecb_enc_loop6: lea edi,DWORD PTR 96[edi] movdqu xmm7,XMMWORD PTR 80[esi] lea esi,DWORD PTR 96[esi] -$L011ecb_enc_loop6_enter: +$L013ecb_enc_loop6_enter: call __aesni_encrypt6 mov edx,ebp mov ecx,ebx sub eax,96 - jnc $L012ecb_enc_loop6 + jnc $L014ecb_enc_loop6 movups XMMWORD PTR [edi],xmm2 movups XMMWORD PTR 16[edi],xmm3 movups XMMWORD PTR 32[edi],xmm4 @@ -363,18 +419,18 @@ $L011ecb_enc_loop6_enter: movups XMMWORD PTR 80[edi],xmm7 lea edi,DWORD PTR 96[edi] add eax,96 - jz $L008ecb_ret -$L010ecb_enc_tail: + jz $L010ecb_ret +$L012ecb_enc_tail: movups xmm2,XMMWORD PTR [esi] cmp eax,32 - jb $L013ecb_enc_one + jb $L015ecb_enc_one movups xmm3,XMMWORD PTR 16[esi] - je $L014ecb_enc_two + je $L016ecb_enc_two movups xmm4,XMMWORD PTR 32[esi] cmp eax,64 - jb $L015ecb_enc_three + jb $L017ecb_enc_three movups xmm5,XMMWORD PTR 48[esi] - je $L016ecb_enc_four + je $L018ecb_enc_four movups xmm6,XMMWORD PTR 64[esi] xorps xmm7,xmm7 call __aesni_encrypt6 @@ -383,50 +439,49 @@ $L010ecb_enc_tail: movups XMMWORD PTR 32[edi],xmm4 movups XMMWORD PTR 48[edi],xmm5 movups XMMWORD PTR 64[edi],xmm6 - jmp $L008ecb_ret + jmp $L010ecb_ret ALIGN 16 -$L013ecb_enc_one: +$L015ecb_enc_one: movups xmm0,XMMWORD PTR [edx] movups xmm1,XMMWORD PTR 16[edx] lea edx,DWORD PTR 32[edx] xorps xmm2,xmm0 -$L017enc1_loop_3: +$L019enc1_loop_3: DB 102,15,56,220,209 dec ecx movups xmm1,XMMWORD PTR [edx] lea edx,DWORD PTR 16[edx] - jnz $L017enc1_loop_3 + jnz $L019enc1_loop_3 DB 102,15,56,221,209 movups XMMWORD PTR [edi],xmm2 - jmp $L008ecb_ret + jmp $L010ecb_ret ALIGN 16 -$L014ecb_enc_two: - xorps xmm4,xmm4 - call __aesni_encrypt3 +$L016ecb_enc_two: + call __aesni_encrypt2 movups XMMWORD PTR [edi],xmm2 movups XMMWORD PTR 16[edi],xmm3 - jmp $L008ecb_ret + jmp $L010ecb_ret ALIGN 16 -$L015ecb_enc_three: +$L017ecb_enc_three: call __aesni_encrypt3 movups XMMWORD PTR [edi],xmm2 movups XMMWORD PTR 16[edi],xmm3 movups XMMWORD PTR 32[edi],xmm4 - jmp $L008ecb_ret + jmp $L010ecb_ret ALIGN 16 -$L016ecb_enc_four: +$L018ecb_enc_four: call __aesni_encrypt4 movups XMMWORD PTR [edi],xmm2 movups XMMWORD PTR 16[edi],xmm3 movups XMMWORD PTR 32[edi],xmm4 movups XMMWORD PTR 48[edi],xmm5 - jmp $L008ecb_ret + jmp $L010ecb_ret ALIGN 16 -$L009ecb_decrypt: +$L011ecb_decrypt: mov ebp,edx mov ebx,ecx cmp eax,96 - jb $L018ecb_dec_tail + jb $L020ecb_dec_tail movdqu xmm2,XMMWORD PTR [esi] movdqu xmm3,XMMWORD PTR 16[esi] movdqu xmm4,XMMWORD PTR 32[esi] @@ -435,9 +490,9 @@ $L009ecb_decrypt: movdqu xmm7,XMMWORD PTR 80[esi] lea esi,DWORD PTR 96[esi] sub eax,96 - jmp $L019ecb_dec_loop6_enter + jmp $L021ecb_dec_loop6_enter ALIGN 16 -$L020ecb_dec_loop6: +$L022ecb_dec_loop6: movups XMMWORD PTR [edi],xmm2 movdqu xmm2,XMMWORD PTR [esi] movups XMMWORD PTR 16[edi],xmm3 @@ -452,12 +507,12 @@ $L020ecb_dec_loop6: lea edi,DWORD PTR 96[edi] movdqu xmm7,XMMWORD PTR 80[esi] lea esi,DWORD PTR 96[esi] -$L019ecb_dec_loop6_enter: +$L021ecb_dec_loop6_enter: call __aesni_decrypt6 mov edx,ebp mov ecx,ebx sub eax,96 - jnc $L020ecb_dec_loop6 + jnc $L022ecb_dec_loop6 movups XMMWORD PTR [edi],xmm2 movups XMMWORD PTR 16[edi],xmm3 movups XMMWORD PTR 32[edi],xmm4 @@ -466,18 +521,18 @@ $L019ecb_dec_loop6_enter: movups XMMWORD PTR 80[edi],xmm7 lea edi,DWORD PTR 96[edi] add eax,96 - jz $L008ecb_ret -$L018ecb_dec_tail: + jz $L010ecb_ret +$L020ecb_dec_tail: movups xmm2,XMMWORD PTR [esi] cmp eax,32 - jb $L021ecb_dec_one + jb $L023ecb_dec_one movups xmm3,XMMWORD PTR 16[esi] - je $L022ecb_dec_two + je $L024ecb_dec_two movups xmm4,XMMWORD PTR 32[esi] cmp eax,64 - jb $L023ecb_dec_three + jb $L025ecb_dec_three movups xmm5,XMMWORD PTR 48[esi] - je $L024ecb_dec_four + je $L026ecb_dec_four movups xmm6,XMMWORD PTR 64[esi] xorps xmm7,xmm7 call __aesni_decrypt6 @@ -486,44 +541,43 @@ $L018ecb_dec_tail: movups XMMWORD PTR 32[edi],xmm4 movups XMMWORD PTR 48[edi],xmm5 movups XMMWORD PTR 64[edi],xmm6 - jmp $L008ecb_ret + jmp $L010ecb_ret ALIGN 16 -$L021ecb_dec_one: +$L023ecb_dec_one: movups xmm0,XMMWORD PTR [edx] movups xmm1,XMMWORD PTR 16[edx] lea edx,DWORD PTR 32[edx] xorps xmm2,xmm0 -$L025dec1_loop_4: +$L027dec1_loop_4: DB 102,15,56,222,209 dec ecx movups xmm1,XMMWORD PTR [edx] lea edx,DWORD PTR 16[edx] - jnz $L025dec1_loop_4 + jnz $L027dec1_loop_4 DB 102,15,56,223,209 movups XMMWORD PTR [edi],xmm2 - jmp $L008ecb_ret + jmp $L010ecb_ret ALIGN 16 -$L022ecb_dec_two: - xorps xmm4,xmm4 - call __aesni_decrypt3 +$L024ecb_dec_two: + call __aesni_decrypt2 movups XMMWORD PTR [edi],xmm2 movups XMMWORD PTR 16[edi],xmm3 - jmp $L008ecb_ret + jmp $L010ecb_ret ALIGN 16 -$L023ecb_dec_three: +$L025ecb_dec_three: call __aesni_decrypt3 movups XMMWORD PTR [edi],xmm2 movups XMMWORD PTR 16[edi],xmm3 movups XMMWORD PTR 32[edi],xmm4 - jmp $L008ecb_ret + jmp $L010ecb_ret ALIGN 16 -$L024ecb_dec_four: +$L026ecb_dec_four: call __aesni_decrypt4 movups XMMWORD PTR [edi],xmm2 movups XMMWORD PTR 16[edi],xmm3 movups XMMWORD PTR 32[edi],xmm4 movups XMMWORD PTR 48[edi],xmm5 -$L008ecb_ret: +$L010ecb_ret: pop edi pop esi pop ebx @@ -560,45 +614,45 @@ $L_aesni_ccm64_encrypt_blocks_begin:: mov DWORD PTR 20[esp],ebp mov DWORD PTR 24[esp],ebp mov DWORD PTR 28[esp],ebp - shr ecx,1 + shl ecx,4 + mov ebx,16 lea ebp,DWORD PTR [edx] movdqa xmm5,XMMWORD PTR [esp] movdqa xmm2,xmm7 - mov ebx,ecx + lea edx,DWORD PTR 32[ecx*1+edx] + sub ebx,ecx DB 102,15,56,0,253 -$L026ccm64_enc_outer: +$L028ccm64_enc_outer: movups xmm0,XMMWORD PTR [ebp] mov ecx,ebx movups xmm6,XMMWORD PTR [esi] xorps xmm2,xmm0 movups xmm1,XMMWORD PTR 16[ebp] xorps xmm0,xmm6 - lea edx,DWORD PTR 32[ebp] xorps xmm3,xmm0 - movups xmm0,XMMWORD PTR [edx] -$L027ccm64_enc2_loop: + movups xmm0,XMMWORD PTR 32[ebp] +$L029ccm64_enc2_loop: DB 102,15,56,220,209 - dec ecx DB 102,15,56,220,217 - movups xmm1,XMMWORD PTR 16[edx] + movups xmm1,XMMWORD PTR [ecx*1+edx] + add ecx,32 DB 102,15,56,220,208 - lea edx,DWORD PTR 32[edx] DB 102,15,56,220,216 - movups xmm0,XMMWORD PTR [edx] - jnz $L027ccm64_enc2_loop + movups xmm0,XMMWORD PTR [ecx*1+edx-16] + jnz $L029ccm64_enc2_loop DB 102,15,56,220,209 DB 102,15,56,220,217 paddq xmm7,XMMWORD PTR 16[esp] + dec eax DB 102,15,56,221,208 DB 102,15,56,221,216 - dec eax lea esi,DWORD PTR 16[esi] xorps xmm6,xmm2 movdqa xmm2,xmm7 movups XMMWORD PTR [edi],xmm6 - lea edi,DWORD PTR 16[edi] DB 102,15,56,0,213 - jnz $L026ccm64_enc_outer + lea edi,DWORD PTR 16[edi] + jnz $L028ccm64_enc_outer mov esp,DWORD PTR 48[esp] mov edi,DWORD PTR 40[esp] movups XMMWORD PTR [edi],xmm3 @@ -647,67 +701,70 @@ DB 102,15,56,0,253 movups xmm1,XMMWORD PTR 16[edx] lea edx,DWORD PTR 32[edx] xorps xmm2,xmm0 -$L028enc1_loop_5: +$L030enc1_loop_5: DB 102,15,56,220,209 dec ecx movups xmm1,XMMWORD PTR [edx] lea edx,DWORD PTR 16[edx] - jnz $L028enc1_loop_5 + jnz $L030enc1_loop_5 DB 102,15,56,221,209 + shl ebx,4 + mov ecx,16 movups xmm6,XMMWORD PTR [esi] paddq xmm7,XMMWORD PTR 16[esp] lea esi,QWORD PTR 16[esi] - jmp $L029ccm64_dec_outer + sub ecx,ebx + lea edx,DWORD PTR 32[ebx*1+ebp] + mov ebx,ecx + jmp $L031ccm64_dec_outer ALIGN 16 -$L029ccm64_dec_outer: +$L031ccm64_dec_outer: xorps xmm6,xmm2 movdqa xmm2,xmm7 - mov ecx,ebx movups XMMWORD PTR [edi],xmm6 lea edi,DWORD PTR 16[edi] DB 102,15,56,0,213 sub eax,1 - jz $L030ccm64_dec_break + jz $L032ccm64_dec_break movups xmm0,XMMWORD PTR [ebp] - shr ecx,1 + mov ecx,ebx movups xmm1,XMMWORD PTR 16[ebp] xorps xmm6,xmm0 - lea edx,DWORD PTR 32[ebp] xorps xmm2,xmm0 xorps xmm3,xmm6 - movups xmm0,XMMWORD PTR [edx] -$L031ccm64_dec2_loop: + movups xmm0,XMMWORD PTR 32[ebp] +$L033ccm64_dec2_loop: DB 102,15,56,220,209 - dec ecx DB 102,15,56,220,217 - movups xmm1,XMMWORD PTR 16[edx] + movups xmm1,XMMWORD PTR [ecx*1+edx] + add ecx,32 DB 102,15,56,220,208 - lea edx,DWORD PTR 32[edx] DB 102,15,56,220,216 - movups xmm0,XMMWORD PTR [edx] - jnz $L031ccm64_dec2_loop + movups xmm0,XMMWORD PTR [ecx*1+edx-16] + jnz $L033ccm64_dec2_loop movups xmm6,XMMWORD PTR [esi] paddq xmm7,XMMWORD PTR 16[esp] DB 102,15,56,220,209 DB 102,15,56,220,217 - lea esi,QWORD PTR 16[esi] DB 102,15,56,221,208 DB 102,15,56,221,216 - jmp $L029ccm64_dec_outer + lea esi,QWORD PTR 16[esi] + jmp $L031ccm64_dec_outer ALIGN 16 -$L030ccm64_dec_break: +$L032ccm64_dec_break: + mov ecx,DWORD PTR 240[ebp] mov edx,ebp movups xmm0,XMMWORD PTR [edx] movups xmm1,XMMWORD PTR 16[edx] xorps xmm6,xmm0 lea edx,DWORD PTR 32[edx] xorps xmm3,xmm6 -$L032enc1_loop_6: +$L034enc1_loop_6: DB 102,15,56,220,217 dec ecx movups xmm1,XMMWORD PTR [edx] lea edx,DWORD PTR 16[edx] - jnz $L032enc1_loop_6 + jnz $L034enc1_loop_6 DB 102,15,56,221,217 mov esp,DWORD PTR 48[esp] mov edi,DWORD PTR 40[esp] @@ -735,7 +792,7 @@ $L_aesni_ctr32_encrypt_blocks_begin:: and esp,-16 mov DWORD PTR 80[esp],ebp cmp eax,1 - je $L033ctr32_one_shortcut + je $L035ctr32_one_shortcut movdqu xmm7,XMMWORD PTR [ebx] mov DWORD PTR [esp],202182159 mov DWORD PTR 4[esp],134810123 @@ -751,63 +808,59 @@ DB 102,15,58,22,251,3 DB 102,15,58,34,253,3 mov ecx,DWORD PTR 240[edx] bswap ebx - pxor xmm1,xmm1 pxor xmm0,xmm0 + pxor xmm1,xmm1 movdqa xmm2,XMMWORD PTR [esp] -DB 102,15,58,34,203,0 +DB 102,15,58,34,195,0 lea ebp,DWORD PTR 3[ebx] -DB 102,15,58,34,197,0 +DB 102,15,58,34,205,0 inc ebx -DB 102,15,58,34,203,1 +DB 102,15,58,34,195,1 inc ebp -DB 102,15,58,34,197,1 +DB 102,15,58,34,205,1 inc ebx -DB 102,15,58,34,203,2 +DB 102,15,58,34,195,2 inc ebp -DB 102,15,58,34,197,2 - movdqa XMMWORD PTR 48[esp],xmm1 -DB 102,15,56,0,202 - movdqa XMMWORD PTR 64[esp],xmm0 +DB 102,15,58,34,205,2 + movdqa XMMWORD PTR 48[esp],xmm0 DB 102,15,56,0,194 - pshufd xmm2,xmm1,192 - pshufd xmm3,xmm1,128 + movdqu xmm6,XMMWORD PTR [edx] + movdqa XMMWORD PTR 64[esp],xmm1 +DB 102,15,56,0,202 + pshufd xmm2,xmm0,192 + pshufd xmm3,xmm0,128 cmp eax,6 - jb $L034ctr32_tail + jb $L036ctr32_tail + pxor xmm7,xmm6 + shl ecx,4 + mov ebx,16 movdqa XMMWORD PTR 32[esp],xmm7 - shr ecx,1 mov ebp,edx - mov ebx,ecx + sub ebx,ecx + lea edx,DWORD PTR 32[ecx*1+edx] sub eax,6 - jmp $L035ctr32_loop6 -ALIGN 16 -$L035ctr32_loop6: - pshufd xmm4,xmm1,64 - movdqa xmm1,XMMWORD PTR 32[esp] - pshufd xmm5,xmm0,192 - por xmm2,xmm1 - pshufd xmm6,xmm0,128 - por xmm3,xmm1 - pshufd xmm7,xmm0,64 - por xmm4,xmm1 - por xmm5,xmm1 - por xmm6,xmm1 - por xmm7,xmm1 - movups xmm0,XMMWORD PTR [ebp] - movups xmm1,XMMWORD PTR 16[ebp] - lea edx,DWORD PTR 32[ebp] - dec ecx + jmp $L037ctr32_loop6 +ALIGN 16 +$L037ctr32_loop6: + pshufd xmm4,xmm0,64 + movdqa xmm0,XMMWORD PTR 32[esp] + pshufd xmm5,xmm1,192 pxor xmm2,xmm0 + pshufd xmm6,xmm1,128 pxor xmm3,xmm0 -DB 102,15,56,220,209 + pshufd xmm7,xmm1,64 + movups xmm1,XMMWORD PTR 16[ebp] pxor xmm4,xmm0 -DB 102,15,56,220,217 pxor xmm5,xmm0 -DB 102,15,56,220,225 +DB 102,15,56,220,209 pxor xmm6,xmm0 -DB 102,15,56,220,233 pxor xmm7,xmm0 +DB 102,15,56,220,217 + movups xmm0,XMMWORD PTR 32[ebp] + mov ecx,ebx +DB 102,15,56,220,225 +DB 102,15,56,220,233 DB 102,15,56,220,241 - movups xmm0,XMMWORD PTR [edx] DB 102,15,56,220,249 call $L_aesni_encrypt6_enter movups xmm1,XMMWORD PTR [esi] @@ -818,51 +871,51 @@ DB 102,15,56,220,249 movups XMMWORD PTR [edi],xmm2 movdqa xmm0,XMMWORD PTR 16[esp] xorps xmm4,xmm1 - movdqa xmm1,XMMWORD PTR 48[esp] + movdqa xmm1,XMMWORD PTR 64[esp] movups XMMWORD PTR 16[edi],xmm3 movups XMMWORD PTR 32[edi],xmm4 paddd xmm1,xmm0 - paddd xmm0,XMMWORD PTR 64[esp] + paddd xmm0,XMMWORD PTR 48[esp] movdqa xmm2,XMMWORD PTR [esp] movups xmm3,XMMWORD PTR 48[esi] movups xmm4,XMMWORD PTR 64[esi] xorps xmm5,xmm3 movups xmm3,XMMWORD PTR 80[esi] lea esi,DWORD PTR 96[esi] - movdqa XMMWORD PTR 48[esp],xmm1 -DB 102,15,56,0,202 + movdqa XMMWORD PTR 48[esp],xmm0 +DB 102,15,56,0,194 xorps xmm6,xmm4 movups XMMWORD PTR 48[edi],xmm5 xorps xmm7,xmm3 - movdqa XMMWORD PTR 64[esp],xmm0 -DB 102,15,56,0,194 + movdqa XMMWORD PTR 64[esp],xmm1 +DB 102,15,56,0,202 movups XMMWORD PTR 64[edi],xmm6 - pshufd xmm2,xmm1,192 + pshufd xmm2,xmm0,192 movups XMMWORD PTR 80[edi],xmm7 lea edi,DWORD PTR 96[edi] - mov ecx,ebx - pshufd xmm3,xmm1,128 + pshufd xmm3,xmm0,128 sub eax,6 - jnc $L035ctr32_loop6 + jnc $L037ctr32_loop6 add eax,6 - jz $L036ctr32_ret + jz $L038ctr32_ret + movdqu xmm7,XMMWORD PTR [ebp] mov edx,ebp - lea ecx,DWORD PTR 1[ecx*2] - movdqa xmm7,XMMWORD PTR 32[esp] -$L034ctr32_tail: + pxor xmm7,XMMWORD PTR 32[esp] + mov ecx,DWORD PTR 240[ebp] +$L036ctr32_tail: por xmm2,xmm7 cmp eax,2 - jb $L037ctr32_one - pshufd xmm4,xmm1,64 + jb $L039ctr32_one + pshufd xmm4,xmm0,64 por xmm3,xmm7 - je $L038ctr32_two - pshufd xmm5,xmm0,192 + je $L040ctr32_two + pshufd xmm5,xmm1,192 por xmm4,xmm7 cmp eax,4 - jb $L039ctr32_three - pshufd xmm6,xmm0,128 + jb $L041ctr32_three + pshufd xmm6,xmm1,128 por xmm5,xmm7 - je $L040ctr32_four + je $L042ctr32_four por xmm6,xmm7 call __aesni_encrypt6 movups xmm1,XMMWORD PTR [esi] @@ -880,39 +933,39 @@ $L034ctr32_tail: movups XMMWORD PTR 32[edi],xmm4 movups XMMWORD PTR 48[edi],xmm5 movups XMMWORD PTR 64[edi],xmm6 - jmp $L036ctr32_ret + jmp $L038ctr32_ret ALIGN 16 -$L033ctr32_one_shortcut: +$L035ctr32_one_shortcut: movups xmm2,XMMWORD PTR [ebx] mov ecx,DWORD PTR 240[edx] -$L037ctr32_one: +$L039ctr32_one: movups xmm0,XMMWORD PTR [edx] movups xmm1,XMMWORD PTR 16[edx] lea edx,DWORD PTR 32[edx] xorps xmm2,xmm0 -$L041enc1_loop_7: +$L043enc1_loop_7: DB 102,15,56,220,209 dec ecx movups xmm1,XMMWORD PTR [edx] lea edx,DWORD PTR 16[edx] - jnz $L041enc1_loop_7 + jnz $L043enc1_loop_7 DB 102,15,56,221,209 movups xmm6,XMMWORD PTR [esi] xorps xmm6,xmm2 movups XMMWORD PTR [edi],xmm6 - jmp $L036ctr32_ret + jmp $L038ctr32_ret ALIGN 16 -$L038ctr32_two: - call __aesni_encrypt3 +$L040ctr32_two: + call __aesni_encrypt2 movups xmm5,XMMWORD PTR [esi] movups xmm6,XMMWORD PTR 16[esi] xorps xmm2,xmm5 xorps xmm3,xmm6 movups XMMWORD PTR [edi],xmm2 movups XMMWORD PTR 16[edi],xmm3 - jmp $L036ctr32_ret + jmp $L038ctr32_ret ALIGN 16 -$L039ctr32_three: +$L041ctr32_three: call __aesni_encrypt3 movups xmm5,XMMWORD PTR [esi] movups xmm6,XMMWORD PTR 16[esi] @@ -923,9 +976,9 @@ $L039ctr32_three: xorps xmm4,xmm7 movups XMMWORD PTR 16[edi],xmm3 movups XMMWORD PTR 32[edi],xmm4 - jmp $L036ctr32_ret + jmp $L038ctr32_ret ALIGN 16 -$L040ctr32_four: +$L042ctr32_four: call __aesni_encrypt4 movups xmm6,XMMWORD PTR [esi] movups xmm7,XMMWORD PTR 16[esi] @@ -939,7 +992,7 @@ $L040ctr32_four: xorps xmm5,xmm0 movups XMMWORD PTR 32[edi],xmm4 movups XMMWORD PTR 48[edi],xmm5 -$L036ctr32_ret: +$L038ctr32_ret: mov esp,DWORD PTR 80[esp] pop edi pop esi @@ -962,12 +1015,12 @@ $L_aesni_xts_encrypt_begin:: movups xmm1,XMMWORD PTR 16[edx] lea edx,DWORD PTR 32[edx] xorps xmm2,xmm0 -$L042enc1_loop_8: +$L044enc1_loop_8: DB 102,15,56,220,209 dec ecx movups xmm1,XMMWORD PTR [edx] lea edx,DWORD PTR 16[edx] - jnz $L042enc1_loop_8 + jnz $L044enc1_loop_8 DB 102,15,56,221,209 mov esi,DWORD PTR 20[esp] mov edi,DWORD PTR 24[esp] @@ -991,12 +1044,14 @@ DB 102,15,56,221,209 mov ebp,edx mov ebx,ecx sub eax,96 - jc $L043xts_enc_short - shr ecx,1 - mov ebx,ecx - jmp $L044xts_enc_loop6 + jc $L045xts_enc_short + shl ecx,4 + mov ebx,16 + sub ebx,ecx + lea edx,DWORD PTR 32[ecx*1+edx] + jmp $L046xts_enc_loop6 ALIGN 16 -$L044xts_enc_loop6: +$L046xts_enc_loop6: pshufd xmm2,xmm0,19 pxor xmm0,xmm0 movdqa XMMWORD PTR [esp],xmm1 @@ -1032,6 +1087,7 @@ $L044xts_enc_loop6: pand xmm7,xmm3 movups xmm2,XMMWORD PTR [esi] pxor xmm7,xmm1 + mov ecx,ebx movdqu xmm3,XMMWORD PTR 16[esi] xorps xmm2,xmm0 movdqu xmm4,XMMWORD PTR 32[esi] @@ -1047,19 +1103,17 @@ $L044xts_enc_loop6: movdqa XMMWORD PTR 80[esp],xmm7 pxor xmm7,xmm1 movups xmm1,XMMWORD PTR 16[ebp] - lea edx,DWORD PTR 32[ebp] pxor xmm3,XMMWORD PTR 16[esp] -DB 102,15,56,220,209 pxor xmm4,XMMWORD PTR 32[esp] -DB 102,15,56,220,217 +DB 102,15,56,220,209 pxor xmm5,XMMWORD PTR 48[esp] - dec ecx -DB 102,15,56,220,225 pxor xmm6,XMMWORD PTR 64[esp] -DB 102,15,56,220,233 +DB 102,15,56,220,217 pxor xmm7,xmm0 + movups xmm0,XMMWORD PTR 32[ebp] +DB 102,15,56,220,225 +DB 102,15,56,220,233 DB 102,15,56,220,241 - movups xmm0,XMMWORD PTR [edx] DB 102,15,56,220,249 call $L_aesni_encrypt6_enter movdqa xmm1,XMMWORD PTR 80[esp] @@ -1084,26 +1138,25 @@ DB 102,15,56,220,249 paddq xmm1,xmm1 pand xmm2,xmm3 pcmpgtd xmm0,xmm1 - mov ecx,ebx pxor xmm1,xmm2 sub eax,96 - jnc $L044xts_enc_loop6 - lea ecx,DWORD PTR 1[ecx*2] + jnc $L046xts_enc_loop6 + mov ecx,DWORD PTR 240[ebp] mov edx,ebp mov ebx,ecx -$L043xts_enc_short: +$L045xts_enc_short: add eax,96 - jz $L045xts_enc_done6x + jz $L047xts_enc_done6x movdqa xmm5,xmm1 cmp eax,32 - jb $L046xts_enc_one + jb $L048xts_enc_one pshufd xmm2,xmm0,19 pxor xmm0,xmm0 paddq xmm1,xmm1 pand xmm2,xmm3 pcmpgtd xmm0,xmm1 pxor xmm1,xmm2 - je $L047xts_enc_two + je $L049xts_enc_two pshufd xmm2,xmm0,19 pxor xmm0,xmm0 movdqa xmm6,xmm1 @@ -1112,7 +1165,7 @@ $L043xts_enc_short: pcmpgtd xmm0,xmm1 pxor xmm1,xmm2 cmp eax,64 - jb $L048xts_enc_three + jb $L050xts_enc_three pshufd xmm2,xmm0,19 pxor xmm0,xmm0 movdqa xmm7,xmm1 @@ -1122,7 +1175,7 @@ $L043xts_enc_short: pxor xmm1,xmm2 movdqa XMMWORD PTR [esp],xmm5 movdqa XMMWORD PTR 16[esp],xmm6 - je $L049xts_enc_four + je $L051xts_enc_four movdqa XMMWORD PTR 32[esp],xmm7 pshufd xmm7,xmm0,19 movdqa XMMWORD PTR 48[esp],xmm1 @@ -1154,9 +1207,9 @@ $L043xts_enc_short: movups XMMWORD PTR 48[edi],xmm5 movups XMMWORD PTR 64[edi],xmm6 lea edi,DWORD PTR 80[edi] - jmp $L050xts_enc_done + jmp $L052xts_enc_done ALIGN 16 -$L046xts_enc_one: +$L048xts_enc_one: movups xmm2,XMMWORD PTR [esi] lea esi,DWORD PTR 16[esi] xorps xmm2,xmm5 @@ -1164,37 +1217,36 @@ $L046xts_enc_one: movups xmm1,XMMWORD PTR 16[edx] lea edx,DWORD PTR 32[edx] xorps xmm2,xmm0 -$L051enc1_loop_9: +$L053enc1_loop_9: DB 102,15,56,220,209 dec ecx movups xmm1,XMMWORD PTR [edx] lea edx,DWORD PTR 16[edx] - jnz $L051enc1_loop_9 + jnz $L053enc1_loop_9 DB 102,15,56,221,209 xorps xmm2,xmm5 movups XMMWORD PTR [edi],xmm2 lea edi,DWORD PTR 16[edi] movdqa xmm1,xmm5 - jmp $L050xts_enc_done + jmp $L052xts_enc_done ALIGN 16 -$L047xts_enc_two: +$L049xts_enc_two: movaps xmm6,xmm1 movups xmm2,XMMWORD PTR [esi] movups xmm3,XMMWORD PTR 16[esi] lea esi,DWORD PTR 32[esi] xorps xmm2,xmm5 xorps xmm3,xmm6 - xorps xmm4,xmm4 - call __aesni_encrypt3 + call __aesni_encrypt2 xorps xmm2,xmm5 xorps xmm3,xmm6 movups XMMWORD PTR [edi],xmm2 movups XMMWORD PTR 16[edi],xmm3 lea edi,DWORD PTR 32[edi] movdqa xmm1,xmm6 - jmp $L050xts_enc_done + jmp $L052xts_enc_done ALIGN 16 -$L048xts_enc_three: +$L050xts_enc_three: movaps xmm7,xmm1 movups xmm2,XMMWORD PTR [esi] movups xmm3,XMMWORD PTR 16[esi] @@ -1212,9 +1264,9 @@ $L048xts_enc_three: movups XMMWORD PTR 32[edi],xmm4 lea edi,DWORD PTR 48[edi] movdqa xmm1,xmm7 - jmp $L050xts_enc_done + jmp $L052xts_enc_done ALIGN 16 -$L049xts_enc_four: +$L051xts_enc_four: movaps xmm6,xmm1 movups xmm2,XMMWORD PTR [esi] movups xmm3,XMMWORD PTR 16[esi] @@ -1236,28 +1288,28 @@ $L049xts_enc_four: movups XMMWORD PTR 48[edi],xmm5 lea edi,DWORD PTR 64[edi] movdqa xmm1,xmm6 - jmp $L050xts_enc_done + jmp $L052xts_enc_done ALIGN 16 -$L045xts_enc_done6x: +$L047xts_enc_done6x: mov eax,DWORD PTR 112[esp] and eax,15 - jz $L052xts_enc_ret + jz $L054xts_enc_ret movdqa xmm5,xmm1 mov DWORD PTR 112[esp],eax - jmp $L053xts_enc_steal + jmp $L055xts_enc_steal ALIGN 16 -$L050xts_enc_done: +$L052xts_enc_done: mov eax,DWORD PTR 112[esp] pxor xmm0,xmm0 and eax,15 - jz $L052xts_enc_ret + jz $L054xts_enc_ret pcmpgtd xmm0,xmm1 mov DWORD PTR 112[esp],eax pshufd xmm5,xmm0,19 paddq xmm1,xmm1 pand xmm5,XMMWORD PTR 96[esp] pxor xmm5,xmm1 -$L053xts_enc_steal: +$L055xts_enc_steal: movzx ecx,BYTE PTR [esi] movzx edx,BYTE PTR [edi-16] lea esi,DWORD PTR 1[esi] @@ -1265,7 +1317,7 @@ $L053xts_enc_steal: mov BYTE PTR [edi],dl lea edi,DWORD PTR 1[edi] sub eax,1 - jnz $L053xts_enc_steal + jnz $L055xts_enc_steal sub edi,DWORD PTR 112[esp] mov edx,ebp mov ecx,ebx @@ -1275,16 +1327,16 @@ $L053xts_enc_steal: movups xmm1,XMMWORD PTR 16[edx] lea edx,DWORD PTR 32[edx] xorps xmm2,xmm0 -$L054enc1_loop_10: +$L056enc1_loop_10: DB 102,15,56,220,209 dec ecx movups xmm1,XMMWORD PTR [edx] lea edx,DWORD PTR 16[edx] - jnz $L054enc1_loop_10 + jnz $L056enc1_loop_10 DB 102,15,56,221,209 xorps xmm2,xmm5 movups XMMWORD PTR [edi-16],xmm2 -$L052xts_enc_ret: +$L054xts_enc_ret: mov esp,DWORD PTR 116[esp] pop edi pop esi @@ -1307,12 +1359,12 @@ $L_aesni_xts_decrypt_begin:: movups xmm1,XMMWORD PTR 16[edx] lea edx,DWORD PTR 32[edx] xorps xmm2,xmm0 -$L055enc1_loop_11: +$L057enc1_loop_11: DB 102,15,56,220,209 dec ecx movups xmm1,XMMWORD PTR [edx] lea edx,DWORD PTR 16[edx] - jnz $L055enc1_loop_11 + jnz $L057enc1_loop_11 DB 102,15,56,221,209 mov esi,DWORD PTR 20[esp] mov edi,DWORD PTR 24[esp] @@ -1341,12 +1393,14 @@ DB 102,15,56,221,209 pcmpgtd xmm0,xmm1 and eax,-16 sub eax,96 - jc $L056xts_dec_short - shr ecx,1 - mov ebx,ecx - jmp $L057xts_dec_loop6 + jc $L058xts_dec_short + shl ecx,4 + mov ebx,16 + sub ebx,ecx + lea edx,DWORD PTR 32[ecx*1+edx] + jmp $L059xts_dec_loop6 ALIGN 16 -$L057xts_dec_loop6: +$L059xts_dec_loop6: pshufd xmm2,xmm0,19 pxor xmm0,xmm0 movdqa XMMWORD PTR [esp],xmm1 @@ -1382,6 +1436,7 @@ $L057xts_dec_loop6: pand xmm7,xmm3 movups xmm2,XMMWORD PTR [esi] pxor xmm7,xmm1 + mov ecx,ebx movdqu xmm3,XMMWORD PTR 16[esi] xorps xmm2,xmm0 movdqu xmm4,XMMWORD PTR 32[esi] @@ -1397,19 +1452,17 @@ $L057xts_dec_loop6: movdqa XMMWORD PTR 80[esp],xmm7 pxor xmm7,xmm1 movups xmm1,XMMWORD PTR 16[ebp] - lea edx,DWORD PTR 32[ebp] pxor xmm3,XMMWORD PTR 16[esp] -DB 102,15,56,222,209 pxor xmm4,XMMWORD PTR 32[esp] -DB 102,15,56,222,217 +DB 102,15,56,222,209 pxor xmm5,XMMWORD PTR 48[esp] - dec ecx -DB 102,15,56,222,225 pxor xmm6,XMMWORD PTR 64[esp] -DB 102,15,56,222,233 +DB 102,15,56,222,217 pxor xmm7,xmm0 + movups xmm0,XMMWORD PTR 32[ebp] +DB 102,15,56,222,225 +DB 102,15,56,222,233 DB 102,15,56,222,241 - movups xmm0,XMMWORD PTR [edx] DB 102,15,56,222,249 call $L_aesni_decrypt6_enter movdqa xmm1,XMMWORD PTR 80[esp] @@ -1434,26 +1487,25 @@ DB 102,15,56,222,249 paddq xmm1,xmm1 pand xmm2,xmm3 pcmpgtd xmm0,xmm1 - mov ecx,ebx pxor xmm1,xmm2 sub eax,96 - jnc $L057xts_dec_loop6 - lea ecx,DWORD PTR 1[ecx*2] + jnc $L059xts_dec_loop6 + mov ecx,DWORD PTR 240[ebp] mov edx,ebp mov ebx,ecx -$L056xts_dec_short: +$L058xts_dec_short: add eax,96 - jz $L058xts_dec_done6x + jz $L060xts_dec_done6x movdqa xmm5,xmm1 cmp eax,32 - jb $L059xts_dec_one + jb $L061xts_dec_one pshufd xmm2,xmm0,19 pxor xmm0,xmm0 paddq xmm1,xmm1 pand xmm2,xmm3 pcmpgtd xmm0,xmm1 pxor xmm1,xmm2 - je $L060xts_dec_two + je $L062xts_dec_two pshufd xmm2,xmm0,19 pxor xmm0,xmm0 movdqa xmm6,xmm1 @@ -1462,7 +1514,7 @@ $L056xts_dec_short: pcmpgtd xmm0,xmm1 pxor xmm1,xmm2 cmp eax,64 - jb $L061xts_dec_three + jb $L063xts_dec_three pshufd xmm2,xmm0,19 pxor xmm0,xmm0 movdqa xmm7,xmm1 @@ -1472,7 +1524,7 @@ $L056xts_dec_short: pxor xmm1,xmm2 movdqa XMMWORD PTR [esp],xmm5 movdqa XMMWORD PTR 16[esp],xmm6 - je $L062xts_dec_four + je $L064xts_dec_four movdqa XMMWORD PTR 32[esp],xmm7 pshufd xmm7,xmm0,19 movdqa XMMWORD PTR 48[esp],xmm1 @@ -1504,9 +1556,9 @@ $L056xts_dec_short: movups XMMWORD PTR 48[edi],xmm5 movups XMMWORD PTR 64[edi],xmm6 lea edi,DWORD PTR 80[edi] - jmp $L063xts_dec_done + jmp $L065xts_dec_done ALIGN 16 -$L059xts_dec_one: +$L061xts_dec_one: movups xmm2,XMMWORD PTR [esi] lea esi,DWORD PTR 16[esi] xorps xmm2,xmm5 @@ -1514,36 +1566,36 @@ $L059xts_dec_one: movups xmm1,XMMWORD PTR 16[edx] lea edx,DWORD PTR 32[edx] xorps xmm2,xmm0 -$L064dec1_loop_12: +$L066dec1_loop_12: DB 102,15,56,222,209 dec ecx movups xmm1,XMMWORD PTR [edx] lea edx,DWORD PTR 16[edx] - jnz $L064dec1_loop_12 + jnz $L066dec1_loop_12 DB 102,15,56,223,209 xorps xmm2,xmm5 movups XMMWORD PTR [edi],xmm2 lea edi,DWORD PTR 16[edi] movdqa xmm1,xmm5 - jmp $L063xts_dec_done + jmp $L065xts_dec_done ALIGN 16 -$L060xts_dec_two: +$L062xts_dec_two: movaps xmm6,xmm1 movups xmm2,XMMWORD PTR [esi] movups xmm3,XMMWORD PTR 16[esi] lea esi,DWORD PTR 32[esi] xorps xmm2,xmm5 xorps xmm3,xmm6 - call __aesni_decrypt3 + call __aesni_decrypt2 xorps xmm2,xmm5 xorps xmm3,xmm6 movups XMMWORD PTR [edi],xmm2 movups XMMWORD PTR 16[edi],xmm3 lea edi,DWORD PTR 32[edi] movdqa xmm1,xmm6 - jmp $L063xts_dec_done + jmp $L065xts_dec_done ALIGN 16 -$L061xts_dec_three: +$L063xts_dec_three: movaps xmm7,xmm1 movups xmm2,XMMWORD PTR [esi] movups xmm3,XMMWORD PTR 16[esi] @@ -1561,9 +1613,9 @@ $L061xts_dec_three: movups XMMWORD PTR 32[edi],xmm4 lea edi,DWORD PTR 48[edi] movdqa xmm1,xmm7 - jmp $L063xts_dec_done + jmp $L065xts_dec_done ALIGN 16 -$L062xts_dec_four: +$L064xts_dec_four: movaps xmm6,xmm1 movups xmm2,XMMWORD PTR [esi] movups xmm3,XMMWORD PTR 16[esi] @@ -1585,20 +1637,20 @@ $L062xts_dec_four: movups XMMWORD PTR 48[edi],xmm5 lea edi,DWORD PTR 64[edi] movdqa xmm1,xmm6 - jmp $L063xts_dec_done + jmp $L065xts_dec_done ALIGN 16 -$L058xts_dec_done6x: +$L060xts_dec_done6x: mov eax,DWORD PTR 112[esp] and eax,15 - jz $L065xts_dec_ret + jz $L067xts_dec_ret mov DWORD PTR 112[esp],eax - jmp $L066xts_dec_only_one_more + jmp $L068xts_dec_only_one_more ALIGN 16 -$L063xts_dec_done: +$L065xts_dec_done: mov eax,DWORD PTR 112[esp] pxor xmm0,xmm0 and eax,15 - jz $L065xts_dec_ret + jz $L067xts_dec_ret pcmpgtd xmm0,xmm1 mov DWORD PTR 112[esp],eax pshufd xmm2,xmm0,19 @@ -1608,7 +1660,7 @@ $L063xts_dec_done: pand xmm2,xmm3 pcmpgtd xmm0,xmm1 pxor xmm1,xmm2 -$L066xts_dec_only_one_more: +$L068xts_dec_only_one_more: pshufd xmm5,xmm0,19 movdqa xmm6,xmm1 paddq xmm1,xmm1 @@ -1622,16 +1674,16 @@ $L066xts_dec_only_one_more: movups xmm1,XMMWORD PTR 16[edx] lea edx,DWORD PTR 32[edx] xorps xmm2,xmm0 -$L067dec1_loop_13: +$L069dec1_loop_13: DB 102,15,56,222,209 dec ecx movups xmm1,XMMWORD PTR [edx] lea edx,DWORD PTR 16[edx] - jnz $L067dec1_loop_13 + jnz $L069dec1_loop_13 DB 102,15,56,223,209 xorps xmm2,xmm5 movups XMMWORD PTR [edi],xmm2 -$L068xts_dec_steal: +$L070xts_dec_steal: movzx ecx,BYTE PTR 16[esi] movzx edx,BYTE PTR [edi] lea esi,DWORD PTR 1[esi] @@ -1639,7 +1691,7 @@ $L068xts_dec_steal: mov BYTE PTR 16[edi],dl lea edi,DWORD PTR 1[edi] sub eax,1 - jnz $L068xts_dec_steal + jnz $L070xts_dec_steal sub edi,DWORD PTR 112[esp] mov edx,ebp mov ecx,ebx @@ -1649,16 +1701,16 @@ $L068xts_dec_steal: movups xmm1,XMMWORD PTR 16[edx] lea edx,DWORD PTR 32[edx] xorps xmm2,xmm0 -$L069dec1_loop_14: +$L071dec1_loop_14: DB 102,15,56,222,209 dec ecx movups xmm1,XMMWORD PTR [edx] lea edx,DWORD PTR 16[edx] - jnz $L069dec1_loop_14 + jnz $L071dec1_loop_14 DB 102,15,56,223,209 xorps xmm2,xmm6 movups XMMWORD PTR [edi],xmm2 -$L065xts_dec_ret: +$L067xts_dec_ret: mov esp,DWORD PTR 116[esp] pop edi pop esi @@ -1682,7 +1734,7 @@ $L_aesni_cbc_encrypt_begin:: mov edx,DWORD PTR 32[esp] mov ebp,DWORD PTR 36[esp] test eax,eax - jz $L070cbc_abort + jz $L072cbc_abort cmp DWORD PTR 40[esp],0 xchg ebx,esp movups xmm7,XMMWORD PTR [ebp] @@ -1690,14 +1742,14 @@ $L_aesni_cbc_encrypt_begin:: mov ebp,edx mov DWORD PTR 16[esp],ebx mov ebx,ecx - je $L071cbc_decrypt + je $L073cbc_decrypt movaps xmm2,xmm7 cmp eax,16 - jb $L072cbc_enc_tail + jb $L074cbc_enc_tail sub eax,16 - jmp $L073cbc_enc_loop + jmp $L075cbc_enc_loop ALIGN 16 -$L073cbc_enc_loop: +$L075cbc_enc_loop: movups xmm7,XMMWORD PTR [esi] lea esi,DWORD PTR 16[esi] movups xmm0,XMMWORD PTR [edx] @@ -1705,24 +1757,24 @@ $L073cbc_enc_loop: xorps xmm7,xmm0 lea edx,DWORD PTR 32[edx] xorps xmm2,xmm7 -$L074enc1_loop_15: +$L076enc1_loop_15: DB 102,15,56,220,209 dec ecx movups xmm1,XMMWORD PTR [edx] lea edx,DWORD PTR 16[edx] - jnz $L074enc1_loop_15 + jnz $L076enc1_loop_15 DB 102,15,56,221,209 mov ecx,ebx mov edx,ebp movups XMMWORD PTR [edi],xmm2 lea edi,DWORD PTR 16[edi] sub eax,16 - jnc $L073cbc_enc_loop + jnc $L075cbc_enc_loop add eax,16 - jnz $L072cbc_enc_tail + jnz $L074cbc_enc_tail movaps xmm7,xmm2 - jmp $L075cbc_ret -$L072cbc_enc_tail: + jmp $L077cbc_ret +$L074cbc_enc_tail: mov ecx,eax DD 2767451785 mov ecx,16 @@ -1733,20 +1785,20 @@ DD 2868115081 mov ecx,ebx mov esi,edi mov edx,ebp - jmp $L073cbc_enc_loop + jmp $L075cbc_enc_loop ALIGN 16 -$L071cbc_decrypt: +$L073cbc_decrypt: cmp eax,80 - jbe $L076cbc_dec_tail + jbe $L078cbc_dec_tail movaps XMMWORD PTR [esp],xmm7 sub eax,80 - jmp $L077cbc_dec_loop6_enter + jmp $L079cbc_dec_loop6_enter ALIGN 16 -$L078cbc_dec_loop6: +$L080cbc_dec_loop6: movaps XMMWORD PTR [esp],xmm0 movups XMMWORD PTR [edi],xmm7 lea edi,DWORD PTR 16[edi] -$L077cbc_dec_loop6_enter: +$L079cbc_dec_loop6_enter: movdqu xmm2,XMMWORD PTR [esi] movdqu xmm3,XMMWORD PTR 16[esi] movdqu xmm4,XMMWORD PTR 32[esi] @@ -1776,28 +1828,28 @@ $L077cbc_dec_loop6_enter: movups XMMWORD PTR 64[edi],xmm6 lea edi,DWORD PTR 80[edi] sub eax,96 - ja $L078cbc_dec_loop6 + ja $L080cbc_dec_loop6 movaps xmm2,xmm7 movaps xmm7,xmm0 add eax,80 - jle $L079cbc_dec_tail_collected + jle $L081cbc_dec_tail_collected movups XMMWORD PTR [edi],xmm2 lea edi,DWORD PTR 16[edi] -$L076cbc_dec_tail: +$L078cbc_dec_tail: movups xmm2,XMMWORD PTR [esi] movaps xmm6,xmm2 cmp eax,16 - jbe $L080cbc_dec_one + jbe $L082cbc_dec_one movups xmm3,XMMWORD PTR 16[esi] movaps xmm5,xmm3 cmp eax,32 - jbe $L081cbc_dec_two + jbe $L083cbc_dec_two movups xmm4,XMMWORD PTR 32[esi] cmp eax,48 - jbe $L082cbc_dec_three + jbe $L084cbc_dec_three movups xmm5,XMMWORD PTR 48[esi] cmp eax,64 - jbe $L083cbc_dec_four + jbe $L085cbc_dec_four movups xmm6,XMMWORD PTR 64[esi] movaps XMMWORD PTR [esp],xmm7 movups xmm2,XMMWORD PTR [esi] @@ -1820,28 +1872,27 @@ $L076cbc_dec_tail: lea edi,DWORD PTR 64[edi] movaps xmm2,xmm6 sub eax,80 - jmp $L079cbc_dec_tail_collected + jmp $L081cbc_dec_tail_collected ALIGN 16 -$L080cbc_dec_one: +$L082cbc_dec_one: movups xmm0,XMMWORD PTR [edx] movups xmm1,XMMWORD PTR 16[edx] lea edx,DWORD PTR 32[edx] xorps xmm2,xmm0 -$L084dec1_loop_16: +$L086dec1_loop_16: DB 102,15,56,222,209 dec ecx movups xmm1,XMMWORD PTR [edx] lea edx,DWORD PTR 16[edx] - jnz $L084dec1_loop_16 + jnz $L086dec1_loop_16 DB 102,15,56,223,209 xorps xmm2,xmm7 movaps xmm7,xmm6 sub eax,16 - jmp $L079cbc_dec_tail_collected + jmp $L081cbc_dec_tail_collected ALIGN 16 -$L081cbc_dec_two: - xorps xmm4,xmm4 - call __aesni_decrypt3 +$L083cbc_dec_two: + call __aesni_decrypt2 xorps xmm2,xmm7 xorps xmm3,xmm6 movups XMMWORD PTR [edi],xmm2 @@ -1849,9 +1900,9 @@ $L081cbc_dec_two: lea edi,DWORD PTR 16[edi] movaps xmm7,xmm5 sub eax,32 - jmp $L079cbc_dec_tail_collected + jmp $L081cbc_dec_tail_collected ALIGN 16 -$L082cbc_dec_three: +$L084cbc_dec_three: call __aesni_decrypt3 xorps xmm2,xmm7 xorps xmm3,xmm6 @@ -1862,9 +1913,9 @@ $L082cbc_dec_three: lea edi,DWORD PTR 32[edi] movups xmm7,XMMWORD PTR 32[esi] sub eax,48 - jmp $L079cbc_dec_tail_collected + jmp $L081cbc_dec_tail_collected ALIGN 16 -$L083cbc_dec_four: +$L085cbc_dec_four: call __aesni_decrypt4 movups xmm1,XMMWORD PTR 16[esi] movups xmm0,XMMWORD PTR 32[esi] @@ -1879,23 +1930,23 @@ $L083cbc_dec_four: lea edi,DWORD PTR 48[edi] movaps xmm2,xmm5 sub eax,64 -$L079cbc_dec_tail_collected: +$L081cbc_dec_tail_collected: and eax,15 - jnz $L085cbc_dec_tail_partial + jnz $L087cbc_dec_tail_partial movups XMMWORD PTR [edi],xmm2 - jmp $L075cbc_ret + jmp $L077cbc_ret ALIGN 16 -$L085cbc_dec_tail_partial: +$L087cbc_dec_tail_partial: movaps XMMWORD PTR [esp],xmm2 mov ecx,16 mov esi,esp sub ecx,eax DD 2767451785 -$L075cbc_ret: +$L077cbc_ret: mov esp,DWORD PTR 16[esp] mov ebp,DWORD PTR 36[esp] movups XMMWORD PTR [ebp],xmm7 -$L070cbc_abort: +$L072cbc_abort: pop edi pop esi pop ebx @@ -1905,51 +1956,51 @@ _aesni_cbc_encrypt ENDP ALIGN 16 __aesni_set_encrypt_key PROC PRIVATE test eax,eax - jz $L086bad_pointer + jz $L088bad_pointer test edx,edx - jz $L086bad_pointer + jz $L088bad_pointer movups xmm0,XMMWORD PTR [eax] xorps xmm4,xmm4 lea edx,DWORD PTR 16[edx] cmp ecx,256 - je $L08714rounds + je $L08914rounds cmp ecx,192 - je $L08812rounds + je $L09012rounds cmp ecx,128 - jne $L089bad_keybits + jne $L091bad_keybits ALIGN 16 -$L09010rounds: +$L09210rounds: mov ecx,9 movups XMMWORD PTR [edx-16],xmm0 DB 102,15,58,223,200,1 - call $L091key_128_cold + call $L093key_128_cold DB 102,15,58,223,200,2 - call $L092key_128 + call $L094key_128 DB 102,15,58,223,200,4 - call $L092key_128 + call $L094key_128 DB 102,15,58,223,200,8 - call $L092key_128 + call $L094key_128 DB 102,15,58,223,200,16 - call $L092key_128 + call $L094key_128 DB 102,15,58,223,200,32 - call $L092key_128 + call $L094key_128 DB 102,15,58,223,200,64 - call $L092key_128 + call $L094key_128 DB 102,15,58,223,200,128 - call $L092key_128 + call $L094key_128 DB 102,15,58,223,200,27 - call $L092key_128 + call $L094key_128 DB 102,15,58,223,200,54 - call $L092key_128 + call $L094key_128 movups XMMWORD PTR [edx],xmm0 mov DWORD PTR 80[edx],ecx xor eax,eax ret ALIGN 16 -$L092key_128: +$L094key_128: movups XMMWORD PTR [edx],xmm0 lea edx,DWORD PTR 16[edx] -$L091key_128_cold: +$L093key_128_cold: shufps xmm4,xmm0,16 xorps xmm0,xmm4 shufps xmm4,xmm0,140 @@ -1958,38 +2009,38 @@ $L091key_128_cold: xorps xmm0,xmm1 ret ALIGN 16 -$L08812rounds: +$L09012rounds: movq xmm2,QWORD PTR 16[eax] mov ecx,11 movups XMMWORD PTR [edx-16],xmm0 DB 102,15,58,223,202,1 - call $L093key_192a_cold + call $L095key_192a_cold DB 102,15,58,223,202,2 - call $L094key_192b + call $L096key_192b DB 102,15,58,223,202,4 - call $L095key_192a + call $L097key_192a DB 102,15,58,223,202,8 - call $L094key_192b + call $L096key_192b DB 102,15,58,223,202,16 - call $L095key_192a + call $L097key_192a DB 102,15,58,223,202,32 - call $L094key_192b + call $L096key_192b DB 102,15,58,223,202,64 - call $L095key_192a + call $L097key_192a DB 102,15,58,223,202,128 - call $L094key_192b + call $L096key_192b movups XMMWORD PTR [edx],xmm0 mov DWORD PTR 48[edx],ecx xor eax,eax ret ALIGN 16 -$L095key_192a: +$L097key_192a: movups XMMWORD PTR [edx],xmm0 lea edx,DWORD PTR 16[edx] ALIGN 16 -$L093key_192a_cold: +$L095key_192a_cold: movaps xmm5,xmm2 -$L096key_192b_warm: +$L098key_192b_warm: shufps xmm4,xmm0,16 movdqa xmm3,xmm2 xorps xmm0,xmm4 @@ -2003,56 +2054,56 @@ $L096key_192b_warm: pxor xmm2,xmm3 ret ALIGN 16 -$L094key_192b: +$L096key_192b: movaps xmm3,xmm0 shufps xmm5,xmm0,68 movups XMMWORD PTR [edx],xmm5 shufps xmm3,xmm2,78 movups XMMWORD PTR 16[edx],xmm3 lea edx,DWORD PTR 32[edx] - jmp $L096key_192b_warm + jmp $L098key_192b_warm ALIGN 16 -$L08714rounds: +$L08914rounds: movups xmm2,XMMWORD PTR 16[eax] mov ecx,13 lea edx,DWORD PTR 16[edx] movups XMMWORD PTR [edx-32],xmm0 movups XMMWORD PTR [edx-16],xmm2 DB 102,15,58,223,202,1 - call $L097key_256a_cold + call $L099key_256a_cold DB 102,15,58,223,200,1 - call $L098key_256b + call $L100key_256b DB 102,15,58,223,202,2 - call $L099key_256a + call $L101key_256a DB 102,15,58,223,200,2 - call $L098key_256b + call $L100key_256b DB 102,15,58,223,202,4 - call $L099key_256a + call $L101key_256a DB 102,15,58,223,200,4 - call $L098key_256b + call $L100key_256b DB 102,15,58,223,202,8 - call $L099key_256a + call $L101key_256a DB 102,15,58,223,200,8 - call $L098key_256b + call $L100key_256b DB 102,15,58,223,202,16 - call $L099key_256a + call $L101key_256a DB 102,15,58,223,200,16 - call $L098key_256b + call $L100key_256b DB 102,15,58,223,202,32 - call $L099key_256a + call $L101key_256a DB 102,15,58,223,200,32 - call $L098key_256b + call $L100key_256b DB 102,15,58,223,202,64 - call $L099key_256a + call $L101key_256a movups XMMWORD PTR [edx],xmm0 mov DWORD PTR 16[edx],ecx xor eax,eax ret ALIGN 16 -$L099key_256a: +$L101key_256a: movups XMMWORD PTR [edx],xmm2 lea edx,DWORD PTR 16[edx] -$L097key_256a_cold: +$L099key_256a_cold: shufps xmm4,xmm0,16 xorps xmm0,xmm4 shufps xmm4,xmm0,140 @@ -2061,7 +2112,7 @@ $L097key_256a_cold: xorps xmm0,xmm1 ret ALIGN 16 -$L098key_256b: +$L100key_256b: movups XMMWORD PTR [edx],xmm0 lea edx,DWORD PTR 16[edx] shufps xmm4,xmm2,16 @@ -2072,11 +2123,11 @@ $L098key_256b: xorps xmm2,xmm1 ret ALIGN 4 -$L086bad_pointer: +$L088bad_pointer: mov eax,-1 ret ALIGN 4 -$L089bad_keybits: +$L091bad_keybits: mov eax,-2 ret __aesni_set_encrypt_key ENDP @@ -2099,7 +2150,7 @@ $L_aesni_set_decrypt_key_begin:: mov edx,DWORD PTR 12[esp] shl ecx,4 test eax,eax - jnz $L100dec_key_ret + jnz $L102dec_key_ret lea eax,DWORD PTR 16[ecx*1+edx] movups xmm0,XMMWORD PTR [edx] movups xmm1,XMMWORD PTR [eax] @@ -2107,7 +2158,7 @@ $L_aesni_set_decrypt_key_begin:: movups XMMWORD PTR [edx],xmm1 lea edx,DWORD PTR 16[edx] lea eax,DWORD PTR [eax-16] -$L101dec_key_inverse: +$L103dec_key_inverse: movups xmm0,XMMWORD PTR [edx] movups xmm1,XMMWORD PTR [eax] DB 102,15,56,219,192 @@ -2117,12 +2168,12 @@ DB 102,15,56,219,201 movups XMMWORD PTR 16[eax],xmm0 movups XMMWORD PTR [edx-16],xmm1 cmp eax,edx - ja $L101dec_key_inverse + ja $L103dec_key_inverse movups xmm0,XMMWORD PTR [edx] DB 102,15,56,219,192 movups XMMWORD PTR [edx],xmm0 xor eax,eax -$L100dec_key_ret: +$L102dec_key_ret: ret _aesni_set_decrypt_key ENDP DB 65,69,83,32,102,111,114,32,73,110,116,101,108,32,65,69 diff --git a/deps/openssl/asm/x86-win32-masm/aes/vpaes-x86.asm b/deps/openssl/asm/x86-win32-masm/aes/vpaes-x86.asm index 621f58f3a7a777..57afc202bb7f31 100644 --- a/deps/openssl/asm/x86-win32-masm/aes/vpaes-x86.asm +++ b/deps/openssl/asm/x86-win32-masm/aes/vpaes-x86.asm @@ -88,33 +88,33 @@ __vpaes_encrypt_core PROC PRIVATE movdqa xmm1,xmm6 movdqa xmm2,XMMWORD PTR [ebp] pandn xmm1,xmm0 - movdqu xmm5,XMMWORD PTR [edx] - psrld xmm1,4 pand xmm0,xmm6 + movdqu xmm5,XMMWORD PTR [edx] DB 102,15,56,0,208 movdqa xmm0,XMMWORD PTR 16[ebp] -DB 102,15,56,0,193 pxor xmm2,xmm5 - pxor xmm0,xmm2 + psrld xmm1,4 add edx,16 +DB 102,15,56,0,193 lea ebx,DWORD PTR 192[ebp] + pxor xmm0,xmm2 jmp $L000enc_entry ALIGN 16 $L001enc_loop: movdqa xmm4,XMMWORD PTR 32[ebp] -DB 102,15,56,0,226 - pxor xmm4,xmm5 movdqa xmm0,XMMWORD PTR 48[ebp] +DB 102,15,56,0,226 DB 102,15,56,0,195 - pxor xmm0,xmm4 + pxor xmm4,xmm5 movdqa xmm5,XMMWORD PTR 64[ebp] -DB 102,15,56,0,234 + pxor xmm0,xmm4 movdqa xmm1,XMMWORD PTR [ecx*1+ebx-64] +DB 102,15,56,0,234 movdqa xmm2,XMMWORD PTR 80[ebp] -DB 102,15,56,0,211 - pxor xmm2,xmm5 movdqa xmm4,XMMWORD PTR [ecx*1+ebx] +DB 102,15,56,0,211 movdqa xmm3,xmm0 + pxor xmm2,xmm5 DB 102,15,56,0,193 add edx,16 pxor xmm0,xmm2 @@ -123,28 +123,28 @@ DB 102,15,56,0,220 pxor xmm3,xmm0 DB 102,15,56,0,193 and ecx,48 - pxor xmm0,xmm3 sub eax,1 + pxor xmm0,xmm3 $L000enc_entry: movdqa xmm1,xmm6 + movdqa xmm5,XMMWORD PTR [ebp-32] pandn xmm1,xmm0 psrld xmm1,4 pand xmm0,xmm6 - movdqa xmm5,XMMWORD PTR [ebp-32] DB 102,15,56,0,232 - pxor xmm0,xmm1 movdqa xmm3,xmm7 + pxor xmm0,xmm1 DB 102,15,56,0,217 - pxor xmm3,xmm5 movdqa xmm4,xmm7 + pxor xmm3,xmm5 DB 102,15,56,0,224 - pxor xmm4,xmm5 movdqa xmm2,xmm7 + pxor xmm4,xmm5 DB 102,15,56,0,211 - pxor xmm2,xmm0 movdqa xmm3,xmm7 - movdqu xmm5,XMMWORD PTR [edx] + pxor xmm2,xmm0 DB 102,15,56,0,220 + movdqu xmm5,XMMWORD PTR [edx] pxor xmm3,xmm1 jnz $L001enc_loop movdqa xmm4,XMMWORD PTR 96[ebp] @@ -159,8 +159,8 @@ DB 102,15,56,0,193 __vpaes_encrypt_core ENDP ALIGN 16 __vpaes_decrypt_core PROC PRIVATE - mov eax,DWORD PTR 240[edx] lea ebx,DWORD PTR 608[ebp] + mov eax,DWORD PTR 240[edx] movdqa xmm1,xmm6 movdqa xmm2,XMMWORD PTR [ebx-64] pandn xmm1,xmm0 @@ -183,56 +183,56 @@ DB 102,15,56,0,193 ALIGN 16 $L003dec_loop: movdqa xmm4,XMMWORD PTR [ebx-32] + movdqa xmm1,XMMWORD PTR [ebx-16] DB 102,15,56,0,226 - pxor xmm4,xmm0 - movdqa xmm0,XMMWORD PTR [ebx-16] -DB 102,15,56,0,195 +DB 102,15,56,0,203 pxor xmm0,xmm4 - add edx,16 -DB 102,15,56,0,197 movdqa xmm4,XMMWORD PTR [ebx] + pxor xmm0,xmm1 + movdqa xmm1,XMMWORD PTR 16[ebx] DB 102,15,56,0,226 - pxor xmm4,xmm0 - movdqa xmm0,XMMWORD PTR 16[ebx] -DB 102,15,56,0,195 - pxor xmm0,xmm4 - sub eax,1 DB 102,15,56,0,197 +DB 102,15,56,0,203 + pxor xmm0,xmm4 movdqa xmm4,XMMWORD PTR 32[ebx] + pxor xmm0,xmm1 + movdqa xmm1,XMMWORD PTR 48[ebx] DB 102,15,56,0,226 - pxor xmm4,xmm0 - movdqa xmm0,XMMWORD PTR 48[ebx] -DB 102,15,56,0,195 - pxor xmm0,xmm4 DB 102,15,56,0,197 +DB 102,15,56,0,203 + pxor xmm0,xmm4 movdqa xmm4,XMMWORD PTR 64[ebx] + pxor xmm0,xmm1 + movdqa xmm1,XMMWORD PTR 80[ebx] DB 102,15,56,0,226 - pxor xmm4,xmm0 - movdqa xmm0,XMMWORD PTR 80[ebx] -DB 102,15,56,0,195 +DB 102,15,56,0,197 +DB 102,15,56,0,203 pxor xmm0,xmm4 + add edx,16 DB 102,15,58,15,237,12 + pxor xmm0,xmm1 + sub eax,1 $L002dec_entry: movdqa xmm1,xmm6 + movdqa xmm2,XMMWORD PTR [ebp-32] pandn xmm1,xmm0 - psrld xmm1,4 pand xmm0,xmm6 - movdqa xmm2,XMMWORD PTR [ebp-32] + psrld xmm1,4 DB 102,15,56,0,208 - pxor xmm0,xmm1 movdqa xmm3,xmm7 + pxor xmm0,xmm1 DB 102,15,56,0,217 - pxor xmm3,xmm2 movdqa xmm4,xmm7 + pxor xmm3,xmm2 DB 102,15,56,0,224 pxor xmm4,xmm2 movdqa xmm2,xmm7 DB 102,15,56,0,211 - pxor xmm2,xmm0 movdqa xmm3,xmm7 + pxor xmm2,xmm0 DB 102,15,56,0,220 - pxor xmm3,xmm1 movdqu xmm0,XMMWORD PTR [edx] + pxor xmm3,xmm1 jnz $L003dec_loop movdqa xmm4,XMMWORD PTR 96[ebx] DB 102,15,56,0,226 @@ -339,12 +339,12 @@ $L013schedule_mangle_last_dec: __vpaes_schedule_core ENDP ALIGN 16 __vpaes_schedule_192_smear PROC PRIVATE - pshufd xmm0,xmm6,128 - pxor xmm6,xmm0 + pshufd xmm1,xmm6,128 pshufd xmm0,xmm7,254 + pxor xmm6,xmm1 + pxor xmm1,xmm1 pxor xmm6,xmm0 movdqa xmm0,xmm6 - pxor xmm1,xmm1 movhlps xmm6,xmm1 ret __vpaes_schedule_192_smear ENDP diff --git a/deps/openssl/asm/x86-win32-masm/bf/bf-586.asm b/deps/openssl/asm/x86-win32-masm/bf/bf-586.asm new file mode 100644 index 00000000000000..218e8f5c7d445d --- /dev/null +++ b/deps/openssl/asm/x86-win32-masm/bf/bf-586.asm @@ -0,0 +1,902 @@ +TITLE bf-586.asm +IF @Version LT 800 +ECHO MASM version 8.00 or later is strongly recommended. +ENDIF +.686 +.MODEL FLAT +OPTION DOTNAME +IF @Version LT 800 +.text$ SEGMENT PAGE 'CODE' +ELSE +.text$ SEGMENT ALIGN(64) 'CODE' +ENDIF +ALIGN 16 +_BF_encrypt PROC PUBLIC +$L_BF_encrypt_begin:: + ; + push ebp + push ebx + mov ebx,DWORD PTR 12[esp] + mov ebp,DWORD PTR 16[esp] + push esi + push edi + ; Load the 2 words + mov edi,DWORD PTR [ebx] + mov esi,DWORD PTR 4[ebx] + xor eax,eax + mov ebx,DWORD PTR [ebp] + xor ecx,ecx + xor edi,ebx + ; + ; Round 0 + mov edx,DWORD PTR 4[ebp] + mov ebx,edi + xor esi,edx + shr ebx,16 + mov edx,edi + mov al,bh + and ebx,255 + mov cl,dh + and edx,255 + mov eax,DWORD PTR 72[eax*4+ebp] + mov ebx,DWORD PTR 1096[ebx*4+ebp] + add ebx,eax + mov eax,DWORD PTR 2120[ecx*4+ebp] + xor ebx,eax + mov edx,DWORD PTR 3144[edx*4+ebp] + add ebx,edx + xor eax,eax + xor esi,ebx + ; + ; Round 1 + mov edx,DWORD PTR 8[ebp] + mov ebx,esi + xor edi,edx + shr ebx,16 + mov edx,esi + mov al,bh + and ebx,255 + mov cl,dh + and edx,255 + mov eax,DWORD PTR 72[eax*4+ebp] + mov ebx,DWORD PTR 1096[ebx*4+ebp] + add ebx,eax + mov eax,DWORD PTR 2120[ecx*4+ebp] + xor ebx,eax + mov edx,DWORD PTR 3144[edx*4+ebp] + add ebx,edx + xor eax,eax + xor edi,ebx + ; + ; Round 2 + mov edx,DWORD PTR 12[ebp] + mov ebx,edi + xor esi,edx + shr ebx,16 + mov edx,edi + mov al,bh + and ebx,255 + mov cl,dh + and edx,255 + mov eax,DWORD PTR 72[eax*4+ebp] + mov ebx,DWORD PTR 1096[ebx*4+ebp] + add ebx,eax + mov eax,DWORD PTR 2120[ecx*4+ebp] + xor ebx,eax + mov edx,DWORD PTR 3144[edx*4+ebp] + add ebx,edx + xor eax,eax + xor esi,ebx + ; + ; Round 3 + mov edx,DWORD PTR 16[ebp] + mov ebx,esi + xor edi,edx + shr ebx,16 + mov edx,esi + mov al,bh + and ebx,255 + mov cl,dh + and edx,255 + mov eax,DWORD PTR 72[eax*4+ebp] + mov ebx,DWORD PTR 1096[ebx*4+ebp] + add ebx,eax + mov eax,DWORD PTR 2120[ecx*4+ebp] + xor ebx,eax + mov edx,DWORD PTR 3144[edx*4+ebp] + add ebx,edx + xor eax,eax + xor edi,ebx + ; + ; Round 4 + mov edx,DWORD PTR 20[ebp] + mov ebx,edi + xor esi,edx + shr ebx,16 + mov edx,edi + mov al,bh + and ebx,255 + mov cl,dh + and edx,255 + mov eax,DWORD PTR 72[eax*4+ebp] + mov ebx,DWORD PTR 1096[ebx*4+ebp] + add ebx,eax + mov eax,DWORD PTR 2120[ecx*4+ebp] + xor ebx,eax + mov edx,DWORD PTR 3144[edx*4+ebp] + add ebx,edx + xor eax,eax + xor esi,ebx + ; + ; Round 5 + mov edx,DWORD PTR 24[ebp] + mov ebx,esi + xor edi,edx + shr ebx,16 + mov edx,esi + mov al,bh + and ebx,255 + mov cl,dh + and edx,255 + mov eax,DWORD PTR 72[eax*4+ebp] + mov ebx,DWORD PTR 1096[ebx*4+ebp] + add ebx,eax + mov eax,DWORD PTR 2120[ecx*4+ebp] + xor ebx,eax + mov edx,DWORD PTR 3144[edx*4+ebp] + add ebx,edx + xor eax,eax + xor edi,ebx + ; + ; Round 6 + mov edx,DWORD PTR 28[ebp] + mov ebx,edi + xor esi,edx + shr ebx,16 + mov edx,edi + mov al,bh + and ebx,255 + mov cl,dh + and edx,255 + mov eax,DWORD PTR 72[eax*4+ebp] + mov ebx,DWORD PTR 1096[ebx*4+ebp] + add ebx,eax + mov eax,DWORD PTR 2120[ecx*4+ebp] + xor ebx,eax + mov edx,DWORD PTR 3144[edx*4+ebp] + add ebx,edx + xor eax,eax + xor esi,ebx + ; + ; Round 7 + mov edx,DWORD PTR 32[ebp] + mov ebx,esi + xor edi,edx + shr ebx,16 + mov edx,esi + mov al,bh + and ebx,255 + mov cl,dh + and edx,255 + mov eax,DWORD PTR 72[eax*4+ebp] + mov ebx,DWORD PTR 1096[ebx*4+ebp] + add ebx,eax + mov eax,DWORD PTR 2120[ecx*4+ebp] + xor ebx,eax + mov edx,DWORD PTR 3144[edx*4+ebp] + add ebx,edx + xor eax,eax + xor edi,ebx + ; + ; Round 8 + mov edx,DWORD PTR 36[ebp] + mov ebx,edi + xor esi,edx + shr ebx,16 + mov edx,edi + mov al,bh + and ebx,255 + mov cl,dh + and edx,255 + mov eax,DWORD PTR 72[eax*4+ebp] + mov ebx,DWORD PTR 1096[ebx*4+ebp] + add ebx,eax + mov eax,DWORD PTR 2120[ecx*4+ebp] + xor ebx,eax + mov edx,DWORD PTR 3144[edx*4+ebp] + add ebx,edx + xor eax,eax + xor esi,ebx + ; + ; Round 9 + mov edx,DWORD PTR 40[ebp] + mov ebx,esi + xor edi,edx + shr ebx,16 + mov edx,esi + mov al,bh + and ebx,255 + mov cl,dh + and edx,255 + mov eax,DWORD PTR 72[eax*4+ebp] + mov ebx,DWORD PTR 1096[ebx*4+ebp] + add ebx,eax + mov eax,DWORD PTR 2120[ecx*4+ebp] + xor ebx,eax + mov edx,DWORD PTR 3144[edx*4+ebp] + add ebx,edx + xor eax,eax + xor edi,ebx + ; + ; Round 10 + mov edx,DWORD PTR 44[ebp] + mov ebx,edi + xor esi,edx + shr ebx,16 + mov edx,edi + mov al,bh + and ebx,255 + mov cl,dh + and edx,255 + mov eax,DWORD PTR 72[eax*4+ebp] + mov ebx,DWORD PTR 1096[ebx*4+ebp] + add ebx,eax + mov eax,DWORD PTR 2120[ecx*4+ebp] + xor ebx,eax + mov edx,DWORD PTR 3144[edx*4+ebp] + add ebx,edx + xor eax,eax + xor esi,ebx + ; + ; Round 11 + mov edx,DWORD PTR 48[ebp] + mov ebx,esi + xor edi,edx + shr ebx,16 + mov edx,esi + mov al,bh + and ebx,255 + mov cl,dh + and edx,255 + mov eax,DWORD PTR 72[eax*4+ebp] + mov ebx,DWORD PTR 1096[ebx*4+ebp] + add ebx,eax + mov eax,DWORD PTR 2120[ecx*4+ebp] + xor ebx,eax + mov edx,DWORD PTR 3144[edx*4+ebp] + add ebx,edx + xor eax,eax + xor edi,ebx + ; + ; Round 12 + mov edx,DWORD PTR 52[ebp] + mov ebx,edi + xor esi,edx + shr ebx,16 + mov edx,edi + mov al,bh + and ebx,255 + mov cl,dh + and edx,255 + mov eax,DWORD PTR 72[eax*4+ebp] + mov ebx,DWORD PTR 1096[ebx*4+ebp] + add ebx,eax + mov eax,DWORD PTR 2120[ecx*4+ebp] + xor ebx,eax + mov edx,DWORD PTR 3144[edx*4+ebp] + add ebx,edx + xor eax,eax + xor esi,ebx + ; + ; Round 13 + mov edx,DWORD PTR 56[ebp] + mov ebx,esi + xor edi,edx + shr ebx,16 + mov edx,esi + mov al,bh + and ebx,255 + mov cl,dh + and edx,255 + mov eax,DWORD PTR 72[eax*4+ebp] + mov ebx,DWORD PTR 1096[ebx*4+ebp] + add ebx,eax + mov eax,DWORD PTR 2120[ecx*4+ebp] + xor ebx,eax + mov edx,DWORD PTR 3144[edx*4+ebp] + add ebx,edx + xor eax,eax + xor edi,ebx + ; + ; Round 14 + mov edx,DWORD PTR 60[ebp] + mov ebx,edi + xor esi,edx + shr ebx,16 + mov edx,edi + mov al,bh + and ebx,255 + mov cl,dh + and edx,255 + mov eax,DWORD PTR 72[eax*4+ebp] + mov ebx,DWORD PTR 1096[ebx*4+ebp] + add ebx,eax + mov eax,DWORD PTR 2120[ecx*4+ebp] + xor ebx,eax + mov edx,DWORD PTR 3144[edx*4+ebp] + add ebx,edx + xor eax,eax + xor esi,ebx + ; + ; Round 15 + mov edx,DWORD PTR 64[ebp] + mov ebx,esi + xor edi,edx + shr ebx,16 + mov edx,esi + mov al,bh + and ebx,255 + mov cl,dh + and edx,255 + mov eax,DWORD PTR 72[eax*4+ebp] + mov ebx,DWORD PTR 1096[ebx*4+ebp] + add ebx,eax + mov eax,DWORD PTR 2120[ecx*4+ebp] + xor ebx,eax + mov edx,DWORD PTR 3144[edx*4+ebp] + add ebx,edx + ; Load parameter 0 (16) enc=1 + mov eax,DWORD PTR 20[esp] + xor edi,ebx + mov edx,DWORD PTR 68[ebp] + xor esi,edx + mov DWORD PTR 4[eax],edi + mov DWORD PTR [eax],esi + pop edi + pop esi + pop ebx + pop ebp + ret +_BF_encrypt ENDP +ALIGN 16 +_BF_decrypt PROC PUBLIC +$L_BF_decrypt_begin:: + ; + push ebp + push ebx + mov ebx,DWORD PTR 12[esp] + mov ebp,DWORD PTR 16[esp] + push esi + push edi + ; Load the 2 words + mov edi,DWORD PTR [ebx] + mov esi,DWORD PTR 4[ebx] + xor eax,eax + mov ebx,DWORD PTR 68[ebp] + xor ecx,ecx + xor edi,ebx + ; + ; Round 16 + mov edx,DWORD PTR 64[ebp] + mov ebx,edi + xor esi,edx + shr ebx,16 + mov edx,edi + mov al,bh + and ebx,255 + mov cl,dh + and edx,255 + mov eax,DWORD PTR 72[eax*4+ebp] + mov ebx,DWORD PTR 1096[ebx*4+ebp] + add ebx,eax + mov eax,DWORD PTR 2120[ecx*4+ebp] + xor ebx,eax + mov edx,DWORD PTR 3144[edx*4+ebp] + add ebx,edx + xor eax,eax + xor esi,ebx + ; + ; Round 15 + mov edx,DWORD PTR 60[ebp] + mov ebx,esi + xor edi,edx + shr ebx,16 + mov edx,esi + mov al,bh + and ebx,255 + mov cl,dh + and edx,255 + mov eax,DWORD PTR 72[eax*4+ebp] + mov ebx,DWORD PTR 1096[ebx*4+ebp] + add ebx,eax + mov eax,DWORD PTR 2120[ecx*4+ebp] + xor ebx,eax + mov edx,DWORD PTR 3144[edx*4+ebp] + add ebx,edx + xor eax,eax + xor edi,ebx + ; + ; Round 14 + mov edx,DWORD PTR 56[ebp] + mov ebx,edi + xor esi,edx + shr ebx,16 + mov edx,edi + mov al,bh + and ebx,255 + mov cl,dh + and edx,255 + mov eax,DWORD PTR 72[eax*4+ebp] + mov ebx,DWORD PTR 1096[ebx*4+ebp] + add ebx,eax + mov eax,DWORD PTR 2120[ecx*4+ebp] + xor ebx,eax + mov edx,DWORD PTR 3144[edx*4+ebp] + add ebx,edx + xor eax,eax + xor esi,ebx + ; + ; Round 13 + mov edx,DWORD PTR 52[ebp] + mov ebx,esi + xor edi,edx + shr ebx,16 + mov edx,esi + mov al,bh + and ebx,255 + mov cl,dh + and edx,255 + mov eax,DWORD PTR 72[eax*4+ebp] + mov ebx,DWORD PTR 1096[ebx*4+ebp] + add ebx,eax + mov eax,DWORD PTR 2120[ecx*4+ebp] + xor ebx,eax + mov edx,DWORD PTR 3144[edx*4+ebp] + add ebx,edx + xor eax,eax + xor edi,ebx + ; + ; Round 12 + mov edx,DWORD PTR 48[ebp] + mov ebx,edi + xor esi,edx + shr ebx,16 + mov edx,edi + mov al,bh + and ebx,255 + mov cl,dh + and edx,255 + mov eax,DWORD PTR 72[eax*4+ebp] + mov ebx,DWORD PTR 1096[ebx*4+ebp] + add ebx,eax + mov eax,DWORD PTR 2120[ecx*4+ebp] + xor ebx,eax + mov edx,DWORD PTR 3144[edx*4+ebp] + add ebx,edx + xor eax,eax + xor esi,ebx + ; + ; Round 11 + mov edx,DWORD PTR 44[ebp] + mov ebx,esi + xor edi,edx + shr ebx,16 + mov edx,esi + mov al,bh + and ebx,255 + mov cl,dh + and edx,255 + mov eax,DWORD PTR 72[eax*4+ebp] + mov ebx,DWORD PTR 1096[ebx*4+ebp] + add ebx,eax + mov eax,DWORD PTR 2120[ecx*4+ebp] + xor ebx,eax + mov edx,DWORD PTR 3144[edx*4+ebp] + add ebx,edx + xor eax,eax + xor edi,ebx + ; + ; Round 10 + mov edx,DWORD PTR 40[ebp] + mov ebx,edi + xor esi,edx + shr ebx,16 + mov edx,edi + mov al,bh + and ebx,255 + mov cl,dh + and edx,255 + mov eax,DWORD PTR 72[eax*4+ebp] + mov ebx,DWORD PTR 1096[ebx*4+ebp] + add ebx,eax + mov eax,DWORD PTR 2120[ecx*4+ebp] + xor ebx,eax + mov edx,DWORD PTR 3144[edx*4+ebp] + add ebx,edx + xor eax,eax + xor esi,ebx + ; + ; Round 9 + mov edx,DWORD PTR 36[ebp] + mov ebx,esi + xor edi,edx + shr ebx,16 + mov edx,esi + mov al,bh + and ebx,255 + mov cl,dh + and edx,255 + mov eax,DWORD PTR 72[eax*4+ebp] + mov ebx,DWORD PTR 1096[ebx*4+ebp] + add ebx,eax + mov eax,DWORD PTR 2120[ecx*4+ebp] + xor ebx,eax + mov edx,DWORD PTR 3144[edx*4+ebp] + add ebx,edx + xor eax,eax + xor edi,ebx + ; + ; Round 8 + mov edx,DWORD PTR 32[ebp] + mov ebx,edi + xor esi,edx + shr ebx,16 + mov edx,edi + mov al,bh + and ebx,255 + mov cl,dh + and edx,255 + mov eax,DWORD PTR 72[eax*4+ebp] + mov ebx,DWORD PTR 1096[ebx*4+ebp] + add ebx,eax + mov eax,DWORD PTR 2120[ecx*4+ebp] + xor ebx,eax + mov edx,DWORD PTR 3144[edx*4+ebp] + add ebx,edx + xor eax,eax + xor esi,ebx + ; + ; Round 7 + mov edx,DWORD PTR 28[ebp] + mov ebx,esi + xor edi,edx + shr ebx,16 + mov edx,esi + mov al,bh + and ebx,255 + mov cl,dh + and edx,255 + mov eax,DWORD PTR 72[eax*4+ebp] + mov ebx,DWORD PTR 1096[ebx*4+ebp] + add ebx,eax + mov eax,DWORD PTR 2120[ecx*4+ebp] + xor ebx,eax + mov edx,DWORD PTR 3144[edx*4+ebp] + add ebx,edx + xor eax,eax + xor edi,ebx + ; + ; Round 6 + mov edx,DWORD PTR 24[ebp] + mov ebx,edi + xor esi,edx + shr ebx,16 + mov edx,edi + mov al,bh + and ebx,255 + mov cl,dh + and edx,255 + mov eax,DWORD PTR 72[eax*4+ebp] + mov ebx,DWORD PTR 1096[ebx*4+ebp] + add ebx,eax + mov eax,DWORD PTR 2120[ecx*4+ebp] + xor ebx,eax + mov edx,DWORD PTR 3144[edx*4+ebp] + add ebx,edx + xor eax,eax + xor esi,ebx + ; + ; Round 5 + mov edx,DWORD PTR 20[ebp] + mov ebx,esi + xor edi,edx + shr ebx,16 + mov edx,esi + mov al,bh + and ebx,255 + mov cl,dh + and edx,255 + mov eax,DWORD PTR 72[eax*4+ebp] + mov ebx,DWORD PTR 1096[ebx*4+ebp] + add ebx,eax + mov eax,DWORD PTR 2120[ecx*4+ebp] + xor ebx,eax + mov edx,DWORD PTR 3144[edx*4+ebp] + add ebx,edx + xor eax,eax + xor edi,ebx + ; + ; Round 4 + mov edx,DWORD PTR 16[ebp] + mov ebx,edi + xor esi,edx + shr ebx,16 + mov edx,edi + mov al,bh + and ebx,255 + mov cl,dh + and edx,255 + mov eax,DWORD PTR 72[eax*4+ebp] + mov ebx,DWORD PTR 1096[ebx*4+ebp] + add ebx,eax + mov eax,DWORD PTR 2120[ecx*4+ebp] + xor ebx,eax + mov edx,DWORD PTR 3144[edx*4+ebp] + add ebx,edx + xor eax,eax + xor esi,ebx + ; + ; Round 3 + mov edx,DWORD PTR 12[ebp] + mov ebx,esi + xor edi,edx + shr ebx,16 + mov edx,esi + mov al,bh + and ebx,255 + mov cl,dh + and edx,255 + mov eax,DWORD PTR 72[eax*4+ebp] + mov ebx,DWORD PTR 1096[ebx*4+ebp] + add ebx,eax + mov eax,DWORD PTR 2120[ecx*4+ebp] + xor ebx,eax + mov edx,DWORD PTR 3144[edx*4+ebp] + add ebx,edx + xor eax,eax + xor edi,ebx + ; + ; Round 2 + mov edx,DWORD PTR 8[ebp] + mov ebx,edi + xor esi,edx + shr ebx,16 + mov edx,edi + mov al,bh + and ebx,255 + mov cl,dh + and edx,255 + mov eax,DWORD PTR 72[eax*4+ebp] + mov ebx,DWORD PTR 1096[ebx*4+ebp] + add ebx,eax + mov eax,DWORD PTR 2120[ecx*4+ebp] + xor ebx,eax + mov edx,DWORD PTR 3144[edx*4+ebp] + add ebx,edx + xor eax,eax + xor esi,ebx + ; + ; Round 1 + mov edx,DWORD PTR 4[ebp] + mov ebx,esi + xor edi,edx + shr ebx,16 + mov edx,esi + mov al,bh + and ebx,255 + mov cl,dh + and edx,255 + mov eax,DWORD PTR 72[eax*4+ebp] + mov ebx,DWORD PTR 1096[ebx*4+ebp] + add ebx,eax + mov eax,DWORD PTR 2120[ecx*4+ebp] + xor ebx,eax + mov edx,DWORD PTR 3144[edx*4+ebp] + add ebx,edx + ; Load parameter 0 (1) enc=0 + mov eax,DWORD PTR 20[esp] + xor edi,ebx + mov edx,DWORD PTR [ebp] + xor esi,edx + mov DWORD PTR 4[eax],edi + mov DWORD PTR [eax],esi + pop edi + pop esi + pop ebx + pop ebp + ret +_BF_decrypt ENDP +ALIGN 16 +_BF_cbc_encrypt PROC PUBLIC +$L_BF_cbc_encrypt_begin:: + ; + push ebp + push ebx + push esi + push edi + mov ebp,DWORD PTR 28[esp] + ; getting iv ptr from parameter 4 + mov ebx,DWORD PTR 36[esp] + mov esi,DWORD PTR [ebx] + mov edi,DWORD PTR 4[ebx] + push edi + push esi + push edi + push esi + mov ebx,esp + mov esi,DWORD PTR 36[esp] + mov edi,DWORD PTR 40[esp] + ; getting encrypt flag from parameter 5 + mov ecx,DWORD PTR 56[esp] + ; get and push parameter 3 + mov eax,DWORD PTR 48[esp] + push eax + push ebx + cmp ecx,0 + jz $L000decrypt + and ebp,4294967288 + mov eax,DWORD PTR 8[esp] + mov ebx,DWORD PTR 12[esp] + jz $L001encrypt_finish +$L002encrypt_loop: + mov ecx,DWORD PTR [esi] + mov edx,DWORD PTR 4[esi] + xor eax,ecx + xor ebx,edx + bswap eax + bswap ebx + mov DWORD PTR 8[esp],eax + mov DWORD PTR 12[esp],ebx + call $L_BF_encrypt_begin + mov eax,DWORD PTR 8[esp] + mov ebx,DWORD PTR 12[esp] + bswap eax + bswap ebx + mov DWORD PTR [edi],eax + mov DWORD PTR 4[edi],ebx + add esi,8 + add edi,8 + sub ebp,8 + jnz $L002encrypt_loop +$L001encrypt_finish: + mov ebp,DWORD PTR 52[esp] + and ebp,7 + jz $L003finish + call $L004PIC_point +$L004PIC_point: + pop edx + lea ecx,DWORD PTR ($L005cbc_enc_jmp_table-$L004PIC_point)[edx] + mov ebp,DWORD PTR [ebp*4+ecx] + add ebp,edx + xor ecx,ecx + xor edx,edx + jmp ebp +$L006ej7: + mov dh,BYTE PTR 6[esi] + shl edx,8 +$L007ej6: + mov dh,BYTE PTR 5[esi] +$L008ej5: + mov dl,BYTE PTR 4[esi] +$L009ej4: + mov ecx,DWORD PTR [esi] + jmp $L010ejend +$L011ej3: + mov ch,BYTE PTR 2[esi] + shl ecx,8 +$L012ej2: + mov ch,BYTE PTR 1[esi] +$L013ej1: + mov cl,BYTE PTR [esi] +$L010ejend: + xor eax,ecx + xor ebx,edx + bswap eax + bswap ebx + mov DWORD PTR 8[esp],eax + mov DWORD PTR 12[esp],ebx + call $L_BF_encrypt_begin + mov eax,DWORD PTR 8[esp] + mov ebx,DWORD PTR 12[esp] + bswap eax + bswap ebx + mov DWORD PTR [edi],eax + mov DWORD PTR 4[edi],ebx + jmp $L003finish +$L000decrypt: + and ebp,4294967288 + mov eax,DWORD PTR 16[esp] + mov ebx,DWORD PTR 20[esp] + jz $L014decrypt_finish +$L015decrypt_loop: + mov eax,DWORD PTR [esi] + mov ebx,DWORD PTR 4[esi] + bswap eax + bswap ebx + mov DWORD PTR 8[esp],eax + mov DWORD PTR 12[esp],ebx + call $L_BF_decrypt_begin + mov eax,DWORD PTR 8[esp] + mov ebx,DWORD PTR 12[esp] + bswap eax + bswap ebx + mov ecx,DWORD PTR 16[esp] + mov edx,DWORD PTR 20[esp] + xor ecx,eax + xor edx,ebx + mov eax,DWORD PTR [esi] + mov ebx,DWORD PTR 4[esi] + mov DWORD PTR [edi],ecx + mov DWORD PTR 4[edi],edx + mov DWORD PTR 16[esp],eax + mov DWORD PTR 20[esp],ebx + add esi,8 + add edi,8 + sub ebp,8 + jnz $L015decrypt_loop +$L014decrypt_finish: + mov ebp,DWORD PTR 52[esp] + and ebp,7 + jz $L003finish + mov eax,DWORD PTR [esi] + mov ebx,DWORD PTR 4[esi] + bswap eax + bswap ebx + mov DWORD PTR 8[esp],eax + mov DWORD PTR 12[esp],ebx + call $L_BF_decrypt_begin + mov eax,DWORD PTR 8[esp] + mov ebx,DWORD PTR 12[esp] + bswap eax + bswap ebx + mov ecx,DWORD PTR 16[esp] + mov edx,DWORD PTR 20[esp] + xor ecx,eax + xor edx,ebx + mov eax,DWORD PTR [esi] + mov ebx,DWORD PTR 4[esi] +$L016dj7: + ror edx,16 + mov BYTE PTR 6[edi],dl + shr edx,16 +$L017dj6: + mov BYTE PTR 5[edi],dh +$L018dj5: + mov BYTE PTR 4[edi],dl +$L019dj4: + mov DWORD PTR [edi],ecx + jmp $L020djend +$L021dj3: + ror ecx,16 + mov BYTE PTR 2[edi],cl + shl ecx,16 +$L022dj2: + mov BYTE PTR 1[esi],ch +$L023dj1: + mov BYTE PTR [esi],cl +$L020djend: + jmp $L003finish +$L003finish: + mov ecx,DWORD PTR 60[esp] + add esp,24 + mov DWORD PTR [ecx],eax + mov DWORD PTR 4[ecx],ebx + pop edi + pop esi + pop ebx + pop ebp + ret +ALIGN 64 +$L005cbc_enc_jmp_table: +DD 0 +DD $L013ej1-$L004PIC_point +DD $L012ej2-$L004PIC_point +DD $L011ej3-$L004PIC_point +DD $L009ej4-$L004PIC_point +DD $L008ej5-$L004PIC_point +DD $L007ej6-$L004PIC_point +DD $L006ej7-$L004PIC_point +ALIGN 64 +_BF_cbc_encrypt ENDP +.text$ ENDS +END diff --git a/deps/openssl/asm/x86-win32-masm/bf/bf-686.asm b/deps/openssl/asm/x86-win32-masm/bf/bf-686.asm deleted file mode 100644 index 28831796742746..00000000000000 --- a/deps/openssl/asm/x86-win32-masm/bf/bf-686.asm +++ /dev/null @@ -1,907 +0,0 @@ -TITLE bf-686.asm -IF @Version LT 800 -ECHO MASM version 8.00 or later is strongly recommended. -ENDIF -.686 -.MODEL FLAT -OPTION DOTNAME -IF @Version LT 800 -.text$ SEGMENT PAGE 'CODE' -ELSE -.text$ SEGMENT ALIGN(64) 'CODE' -ENDIF -ALIGN 16 -_BF_encrypt PROC PUBLIC -$L_BF_encrypt_begin:: - push ebp - push ebx - push esi - push edi - ; - - ; Load the 2 words - mov eax,DWORD PTR 20[esp] - mov ecx,DWORD PTR [eax] - mov edx,DWORD PTR 4[eax] - ; - - ; P pointer, s and enc flag - mov edi,DWORD PTR 24[esp] - xor eax,eax - xor ebx,ebx - xor ecx,DWORD PTR [edi] - ; - - ; Round 0 - ror ecx,16 - mov esi,DWORD PTR 4[edi] - mov al,ch - mov bl,cl - ror ecx,16 - xor edx,esi - mov esi,DWORD PTR 72[eax*4+edi] - mov ebp,DWORD PTR 1096[ebx*4+edi] - mov al,ch - mov bl,cl - add esi,ebp - mov eax,DWORD PTR 2120[eax*4+edi] - xor esi,eax - mov ebp,DWORD PTR 3144[ebx*4+edi] - add esi,ebp - xor eax,eax - xor edx,esi - ; - - ; Round 1 - ror edx,16 - mov esi,DWORD PTR 8[edi] - mov al,dh - mov bl,dl - ror edx,16 - xor ecx,esi - mov esi,DWORD PTR 72[eax*4+edi] - mov ebp,DWORD PTR 1096[ebx*4+edi] - mov al,dh - mov bl,dl - add esi,ebp - mov eax,DWORD PTR 2120[eax*4+edi] - xor esi,eax - mov ebp,DWORD PTR 3144[ebx*4+edi] - add esi,ebp - xor eax,eax - xor ecx,esi - ; - - ; Round 2 - ror ecx,16 - mov esi,DWORD PTR 12[edi] - mov al,ch - mov bl,cl - ror ecx,16 - xor edx,esi - mov esi,DWORD PTR 72[eax*4+edi] - mov ebp,DWORD PTR 1096[ebx*4+edi] - mov al,ch - mov bl,cl - add esi,ebp - mov eax,DWORD PTR 2120[eax*4+edi] - xor esi,eax - mov ebp,DWORD PTR 3144[ebx*4+edi] - add esi,ebp - xor eax,eax - xor edx,esi - ; - - ; Round 3 - ror edx,16 - mov esi,DWORD PTR 16[edi] - mov al,dh - mov bl,dl - ror edx,16 - xor ecx,esi - mov esi,DWORD PTR 72[eax*4+edi] - mov ebp,DWORD PTR 1096[ebx*4+edi] - mov al,dh - mov bl,dl - add esi,ebp - mov eax,DWORD PTR 2120[eax*4+edi] - xor esi,eax - mov ebp,DWORD PTR 3144[ebx*4+edi] - add esi,ebp - xor eax,eax - xor ecx,esi - ; - - ; Round 4 - ror ecx,16 - mov esi,DWORD PTR 20[edi] - mov al,ch - mov bl,cl - ror ecx,16 - xor edx,esi - mov esi,DWORD PTR 72[eax*4+edi] - mov ebp,DWORD PTR 1096[ebx*4+edi] - mov al,ch - mov bl,cl - add esi,ebp - mov eax,DWORD PTR 2120[eax*4+edi] - xor esi,eax - mov ebp,DWORD PTR 3144[ebx*4+edi] - add esi,ebp - xor eax,eax - xor edx,esi - ; - - ; Round 5 - ror edx,16 - mov esi,DWORD PTR 24[edi] - mov al,dh - mov bl,dl - ror edx,16 - xor ecx,esi - mov esi,DWORD PTR 72[eax*4+edi] - mov ebp,DWORD PTR 1096[ebx*4+edi] - mov al,dh - mov bl,dl - add esi,ebp - mov eax,DWORD PTR 2120[eax*4+edi] - xor esi,eax - mov ebp,DWORD PTR 3144[ebx*4+edi] - add esi,ebp - xor eax,eax - xor ecx,esi - ; - - ; Round 6 - ror ecx,16 - mov esi,DWORD PTR 28[edi] - mov al,ch - mov bl,cl - ror ecx,16 - xor edx,esi - mov esi,DWORD PTR 72[eax*4+edi] - mov ebp,DWORD PTR 1096[ebx*4+edi] - mov al,ch - mov bl,cl - add esi,ebp - mov eax,DWORD PTR 2120[eax*4+edi] - xor esi,eax - mov ebp,DWORD PTR 3144[ebx*4+edi] - add esi,ebp - xor eax,eax - xor edx,esi - ; - - ; Round 7 - ror edx,16 - mov esi,DWORD PTR 32[edi] - mov al,dh - mov bl,dl - ror edx,16 - xor ecx,esi - mov esi,DWORD PTR 72[eax*4+edi] - mov ebp,DWORD PTR 1096[ebx*4+edi] - mov al,dh - mov bl,dl - add esi,ebp - mov eax,DWORD PTR 2120[eax*4+edi] - xor esi,eax - mov ebp,DWORD PTR 3144[ebx*4+edi] - add esi,ebp - xor eax,eax - xor ecx,esi - ; - - ; Round 8 - ror ecx,16 - mov esi,DWORD PTR 36[edi] - mov al,ch - mov bl,cl - ror ecx,16 - xor edx,esi - mov esi,DWORD PTR 72[eax*4+edi] - mov ebp,DWORD PTR 1096[ebx*4+edi] - mov al,ch - mov bl,cl - add esi,ebp - mov eax,DWORD PTR 2120[eax*4+edi] - xor esi,eax - mov ebp,DWORD PTR 3144[ebx*4+edi] - add esi,ebp - xor eax,eax - xor edx,esi - ; - - ; Round 9 - ror edx,16 - mov esi,DWORD PTR 40[edi] - mov al,dh - mov bl,dl - ror edx,16 - xor ecx,esi - mov esi,DWORD PTR 72[eax*4+edi] - mov ebp,DWORD PTR 1096[ebx*4+edi] - mov al,dh - mov bl,dl - add esi,ebp - mov eax,DWORD PTR 2120[eax*4+edi] - xor esi,eax - mov ebp,DWORD PTR 3144[ebx*4+edi] - add esi,ebp - xor eax,eax - xor ecx,esi - ; - - ; Round 10 - ror ecx,16 - mov esi,DWORD PTR 44[edi] - mov al,ch - mov bl,cl - ror ecx,16 - xor edx,esi - mov esi,DWORD PTR 72[eax*4+edi] - mov ebp,DWORD PTR 1096[ebx*4+edi] - mov al,ch - mov bl,cl - add esi,ebp - mov eax,DWORD PTR 2120[eax*4+edi] - xor esi,eax - mov ebp,DWORD PTR 3144[ebx*4+edi] - add esi,ebp - xor eax,eax - xor edx,esi - ; - - ; Round 11 - ror edx,16 - mov esi,DWORD PTR 48[edi] - mov al,dh - mov bl,dl - ror edx,16 - xor ecx,esi - mov esi,DWORD PTR 72[eax*4+edi] - mov ebp,DWORD PTR 1096[ebx*4+edi] - mov al,dh - mov bl,dl - add esi,ebp - mov eax,DWORD PTR 2120[eax*4+edi] - xor esi,eax - mov ebp,DWORD PTR 3144[ebx*4+edi] - add esi,ebp - xor eax,eax - xor ecx,esi - ; - - ; Round 12 - ror ecx,16 - mov esi,DWORD PTR 52[edi] - mov al,ch - mov bl,cl - ror ecx,16 - xor edx,esi - mov esi,DWORD PTR 72[eax*4+edi] - mov ebp,DWORD PTR 1096[ebx*4+edi] - mov al,ch - mov bl,cl - add esi,ebp - mov eax,DWORD PTR 2120[eax*4+edi] - xor esi,eax - mov ebp,DWORD PTR 3144[ebx*4+edi] - add esi,ebp - xor eax,eax - xor edx,esi - ; - - ; Round 13 - ror edx,16 - mov esi,DWORD PTR 56[edi] - mov al,dh - mov bl,dl - ror edx,16 - xor ecx,esi - mov esi,DWORD PTR 72[eax*4+edi] - mov ebp,DWORD PTR 1096[ebx*4+edi] - mov al,dh - mov bl,dl - add esi,ebp - mov eax,DWORD PTR 2120[eax*4+edi] - xor esi,eax - mov ebp,DWORD PTR 3144[ebx*4+edi] - add esi,ebp - xor eax,eax - xor ecx,esi - ; - - ; Round 14 - ror ecx,16 - mov esi,DWORD PTR 60[edi] - mov al,ch - mov bl,cl - ror ecx,16 - xor edx,esi - mov esi,DWORD PTR 72[eax*4+edi] - mov ebp,DWORD PTR 1096[ebx*4+edi] - mov al,ch - mov bl,cl - add esi,ebp - mov eax,DWORD PTR 2120[eax*4+edi] - xor esi,eax - mov ebp,DWORD PTR 3144[ebx*4+edi] - add esi,ebp - xor eax,eax - xor edx,esi - ; - - ; Round 15 - ror edx,16 - mov esi,DWORD PTR 64[edi] - mov al,dh - mov bl,dl - ror edx,16 - xor ecx,esi - mov esi,DWORD PTR 72[eax*4+edi] - mov ebp,DWORD PTR 1096[ebx*4+edi] - mov al,dh - mov bl,dl - add esi,ebp - mov eax,DWORD PTR 2120[eax*4+edi] - xor esi,eax - mov ebp,DWORD PTR 3144[ebx*4+edi] - add esi,ebp - xor eax,eax - xor ecx,esi - xor edx,DWORD PTR 68[edi] - mov eax,DWORD PTR 20[esp] - mov DWORD PTR [eax],edx - mov DWORD PTR 4[eax],ecx - pop edi - pop esi - pop ebx - pop ebp - ret -_BF_encrypt ENDP -ALIGN 16 -_BF_decrypt PROC PUBLIC -$L_BF_decrypt_begin:: - push ebp - push ebx - push esi - push edi - ; - - ; Load the 2 words - mov eax,DWORD PTR 20[esp] - mov ecx,DWORD PTR [eax] - mov edx,DWORD PTR 4[eax] - ; - - ; P pointer, s and enc flag - mov edi,DWORD PTR 24[esp] - xor eax,eax - xor ebx,ebx - xor ecx,DWORD PTR 68[edi] - ; - - ; Round 16 - ror ecx,16 - mov esi,DWORD PTR 64[edi] - mov al,ch - mov bl,cl - ror ecx,16 - xor edx,esi - mov esi,DWORD PTR 72[eax*4+edi] - mov ebp,DWORD PTR 1096[ebx*4+edi] - mov al,ch - mov bl,cl - add esi,ebp - mov eax,DWORD PTR 2120[eax*4+edi] - xor esi,eax - mov ebp,DWORD PTR 3144[ebx*4+edi] - add esi,ebp - xor eax,eax - xor edx,esi - ; - - ; Round 15 - ror edx,16 - mov esi,DWORD PTR 60[edi] - mov al,dh - mov bl,dl - ror edx,16 - xor ecx,esi - mov esi,DWORD PTR 72[eax*4+edi] - mov ebp,DWORD PTR 1096[ebx*4+edi] - mov al,dh - mov bl,dl - add esi,ebp - mov eax,DWORD PTR 2120[eax*4+edi] - xor esi,eax - mov ebp,DWORD PTR 3144[ebx*4+edi] - add esi,ebp - xor eax,eax - xor ecx,esi - ; - - ; Round 14 - ror ecx,16 - mov esi,DWORD PTR 56[edi] - mov al,ch - mov bl,cl - ror ecx,16 - xor edx,esi - mov esi,DWORD PTR 72[eax*4+edi] - mov ebp,DWORD PTR 1096[ebx*4+edi] - mov al,ch - mov bl,cl - add esi,ebp - mov eax,DWORD PTR 2120[eax*4+edi] - xor esi,eax - mov ebp,DWORD PTR 3144[ebx*4+edi] - add esi,ebp - xor eax,eax - xor edx,esi - ; - - ; Round 13 - ror edx,16 - mov esi,DWORD PTR 52[edi] - mov al,dh - mov bl,dl - ror edx,16 - xor ecx,esi - mov esi,DWORD PTR 72[eax*4+edi] - mov ebp,DWORD PTR 1096[ebx*4+edi] - mov al,dh - mov bl,dl - add esi,ebp - mov eax,DWORD PTR 2120[eax*4+edi] - xor esi,eax - mov ebp,DWORD PTR 3144[ebx*4+edi] - add esi,ebp - xor eax,eax - xor ecx,esi - ; - - ; Round 12 - ror ecx,16 - mov esi,DWORD PTR 48[edi] - mov al,ch - mov bl,cl - ror ecx,16 - xor edx,esi - mov esi,DWORD PTR 72[eax*4+edi] - mov ebp,DWORD PTR 1096[ebx*4+edi] - mov al,ch - mov bl,cl - add esi,ebp - mov eax,DWORD PTR 2120[eax*4+edi] - xor esi,eax - mov ebp,DWORD PTR 3144[ebx*4+edi] - add esi,ebp - xor eax,eax - xor edx,esi - ; - - ; Round 11 - ror edx,16 - mov esi,DWORD PTR 44[edi] - mov al,dh - mov bl,dl - ror edx,16 - xor ecx,esi - mov esi,DWORD PTR 72[eax*4+edi] - mov ebp,DWORD PTR 1096[ebx*4+edi] - mov al,dh - mov bl,dl - add esi,ebp - mov eax,DWORD PTR 2120[eax*4+edi] - xor esi,eax - mov ebp,DWORD PTR 3144[ebx*4+edi] - add esi,ebp - xor eax,eax - xor ecx,esi - ; - - ; Round 10 - ror ecx,16 - mov esi,DWORD PTR 40[edi] - mov al,ch - mov bl,cl - ror ecx,16 - xor edx,esi - mov esi,DWORD PTR 72[eax*4+edi] - mov ebp,DWORD PTR 1096[ebx*4+edi] - mov al,ch - mov bl,cl - add esi,ebp - mov eax,DWORD PTR 2120[eax*4+edi] - xor esi,eax - mov ebp,DWORD PTR 3144[ebx*4+edi] - add esi,ebp - xor eax,eax - xor edx,esi - ; - - ; Round 9 - ror edx,16 - mov esi,DWORD PTR 36[edi] - mov al,dh - mov bl,dl - ror edx,16 - xor ecx,esi - mov esi,DWORD PTR 72[eax*4+edi] - mov ebp,DWORD PTR 1096[ebx*4+edi] - mov al,dh - mov bl,dl - add esi,ebp - mov eax,DWORD PTR 2120[eax*4+edi] - xor esi,eax - mov ebp,DWORD PTR 3144[ebx*4+edi] - add esi,ebp - xor eax,eax - xor ecx,esi - ; - - ; Round 8 - ror ecx,16 - mov esi,DWORD PTR 32[edi] - mov al,ch - mov bl,cl - ror ecx,16 - xor edx,esi - mov esi,DWORD PTR 72[eax*4+edi] - mov ebp,DWORD PTR 1096[ebx*4+edi] - mov al,ch - mov bl,cl - add esi,ebp - mov eax,DWORD PTR 2120[eax*4+edi] - xor esi,eax - mov ebp,DWORD PTR 3144[ebx*4+edi] - add esi,ebp - xor eax,eax - xor edx,esi - ; - - ; Round 7 - ror edx,16 - mov esi,DWORD PTR 28[edi] - mov al,dh - mov bl,dl - ror edx,16 - xor ecx,esi - mov esi,DWORD PTR 72[eax*4+edi] - mov ebp,DWORD PTR 1096[ebx*4+edi] - mov al,dh - mov bl,dl - add esi,ebp - mov eax,DWORD PTR 2120[eax*4+edi] - xor esi,eax - mov ebp,DWORD PTR 3144[ebx*4+edi] - add esi,ebp - xor eax,eax - xor ecx,esi - ; - - ; Round 6 - ror ecx,16 - mov esi,DWORD PTR 24[edi] - mov al,ch - mov bl,cl - ror ecx,16 - xor edx,esi - mov esi,DWORD PTR 72[eax*4+edi] - mov ebp,DWORD PTR 1096[ebx*4+edi] - mov al,ch - mov bl,cl - add esi,ebp - mov eax,DWORD PTR 2120[eax*4+edi] - xor esi,eax - mov ebp,DWORD PTR 3144[ebx*4+edi] - add esi,ebp - xor eax,eax - xor edx,esi - ; - - ; Round 5 - ror edx,16 - mov esi,DWORD PTR 20[edi] - mov al,dh - mov bl,dl - ror edx,16 - xor ecx,esi - mov esi,DWORD PTR 72[eax*4+edi] - mov ebp,DWORD PTR 1096[ebx*4+edi] - mov al,dh - mov bl,dl - add esi,ebp - mov eax,DWORD PTR 2120[eax*4+edi] - xor esi,eax - mov ebp,DWORD PTR 3144[ebx*4+edi] - add esi,ebp - xor eax,eax - xor ecx,esi - ; - - ; Round 4 - ror ecx,16 - mov esi,DWORD PTR 16[edi] - mov al,ch - mov bl,cl - ror ecx,16 - xor edx,esi - mov esi,DWORD PTR 72[eax*4+edi] - mov ebp,DWORD PTR 1096[ebx*4+edi] - mov al,ch - mov bl,cl - add esi,ebp - mov eax,DWORD PTR 2120[eax*4+edi] - xor esi,eax - mov ebp,DWORD PTR 3144[ebx*4+edi] - add esi,ebp - xor eax,eax - xor edx,esi - ; - - ; Round 3 - ror edx,16 - mov esi,DWORD PTR 12[edi] - mov al,dh - mov bl,dl - ror edx,16 - xor ecx,esi - mov esi,DWORD PTR 72[eax*4+edi] - mov ebp,DWORD PTR 1096[ebx*4+edi] - mov al,dh - mov bl,dl - add esi,ebp - mov eax,DWORD PTR 2120[eax*4+edi] - xor esi,eax - mov ebp,DWORD PTR 3144[ebx*4+edi] - add esi,ebp - xor eax,eax - xor ecx,esi - ; - - ; Round 2 - ror ecx,16 - mov esi,DWORD PTR 8[edi] - mov al,ch - mov bl,cl - ror ecx,16 - xor edx,esi - mov esi,DWORD PTR 72[eax*4+edi] - mov ebp,DWORD PTR 1096[ebx*4+edi] - mov al,ch - mov bl,cl - add esi,ebp - mov eax,DWORD PTR 2120[eax*4+edi] - xor esi,eax - mov ebp,DWORD PTR 3144[ebx*4+edi] - add esi,ebp - xor eax,eax - xor edx,esi - ; - - ; Round 1 - ror edx,16 - mov esi,DWORD PTR 4[edi] - mov al,dh - mov bl,dl - ror edx,16 - xor ecx,esi - mov esi,DWORD PTR 72[eax*4+edi] - mov ebp,DWORD PTR 1096[ebx*4+edi] - mov al,dh - mov bl,dl - add esi,ebp - mov eax,DWORD PTR 2120[eax*4+edi] - xor esi,eax - mov ebp,DWORD PTR 3144[ebx*4+edi] - add esi,ebp - xor eax,eax - xor ecx,esi - xor edx,DWORD PTR [edi] - mov eax,DWORD PTR 20[esp] - mov DWORD PTR [eax],edx - mov DWORD PTR 4[eax],ecx - pop edi - pop esi - pop ebx - pop ebp - ret -_BF_decrypt ENDP -ALIGN 16 -_BF_cbc_encrypt PROC PUBLIC -$L_BF_cbc_encrypt_begin:: - ; - - push ebp - push ebx - push esi - push edi - mov ebp,DWORD PTR 28[esp] - ; getting iv ptr from parameter 4 - mov ebx,DWORD PTR 36[esp] - mov esi,DWORD PTR [ebx] - mov edi,DWORD PTR 4[ebx] - push edi - push esi - push edi - push esi - mov ebx,esp - mov esi,DWORD PTR 36[esp] - mov edi,DWORD PTR 40[esp] - ; getting encrypt flag from parameter 5 - mov ecx,DWORD PTR 56[esp] - ; get and push parameter 3 - mov eax,DWORD PTR 48[esp] - push eax - push ebx - cmp ecx,0 - jz $L000decrypt - and ebp,4294967288 - mov eax,DWORD PTR 8[esp] - mov ebx,DWORD PTR 12[esp] - jz $L001encrypt_finish -$L002encrypt_loop: - mov ecx,DWORD PTR [esi] - mov edx,DWORD PTR 4[esi] - xor eax,ecx - xor ebx,edx - bswap eax - bswap ebx - mov DWORD PTR 8[esp],eax - mov DWORD PTR 12[esp],ebx - call $L_BF_encrypt_begin - mov eax,DWORD PTR 8[esp] - mov ebx,DWORD PTR 12[esp] - bswap eax - bswap ebx - mov DWORD PTR [edi],eax - mov DWORD PTR 4[edi],ebx - add esi,8 - add edi,8 - sub ebp,8 - jnz $L002encrypt_loop -$L001encrypt_finish: - mov ebp,DWORD PTR 52[esp] - and ebp,7 - jz $L003finish - call $L004PIC_point -$L004PIC_point: - pop edx - lea ecx,DWORD PTR ($L005cbc_enc_jmp_table-$L004PIC_point)[edx] - mov ebp,DWORD PTR [ebp*4+ecx] - add ebp,edx - xor ecx,ecx - xor edx,edx - jmp ebp -$L006ej7: - mov dh,BYTE PTR 6[esi] - shl edx,8 -$L007ej6: - mov dh,BYTE PTR 5[esi] -$L008ej5: - mov dl,BYTE PTR 4[esi] -$L009ej4: - mov ecx,DWORD PTR [esi] - jmp $L010ejend -$L011ej3: - mov ch,BYTE PTR 2[esi] - shl ecx,8 -$L012ej2: - mov ch,BYTE PTR 1[esi] -$L013ej1: - mov cl,BYTE PTR [esi] -$L010ejend: - xor eax,ecx - xor ebx,edx - bswap eax - bswap ebx - mov DWORD PTR 8[esp],eax - mov DWORD PTR 12[esp],ebx - call $L_BF_encrypt_begin - mov eax,DWORD PTR 8[esp] - mov ebx,DWORD PTR 12[esp] - bswap eax - bswap ebx - mov DWORD PTR [edi],eax - mov DWORD PTR 4[edi],ebx - jmp $L003finish -$L000decrypt: - and ebp,4294967288 - mov eax,DWORD PTR 16[esp] - mov ebx,DWORD PTR 20[esp] - jz $L014decrypt_finish -$L015decrypt_loop: - mov eax,DWORD PTR [esi] - mov ebx,DWORD PTR 4[esi] - bswap eax - bswap ebx - mov DWORD PTR 8[esp],eax - mov DWORD PTR 12[esp],ebx - call $L_BF_decrypt_begin - mov eax,DWORD PTR 8[esp] - mov ebx,DWORD PTR 12[esp] - bswap eax - bswap ebx - mov ecx,DWORD PTR 16[esp] - mov edx,DWORD PTR 20[esp] - xor ecx,eax - xor edx,ebx - mov eax,DWORD PTR [esi] - mov ebx,DWORD PTR 4[esi] - mov DWORD PTR [edi],ecx - mov DWORD PTR 4[edi],edx - mov DWORD PTR 16[esp],eax - mov DWORD PTR 20[esp],ebx - add esi,8 - add edi,8 - sub ebp,8 - jnz $L015decrypt_loop -$L014decrypt_finish: - mov ebp,DWORD PTR 52[esp] - and ebp,7 - jz $L003finish - mov eax,DWORD PTR [esi] - mov ebx,DWORD PTR 4[esi] - bswap eax - bswap ebx - mov DWORD PTR 8[esp],eax - mov DWORD PTR 12[esp],ebx - call $L_BF_decrypt_begin - mov eax,DWORD PTR 8[esp] - mov ebx,DWORD PTR 12[esp] - bswap eax - bswap ebx - mov ecx,DWORD PTR 16[esp] - mov edx,DWORD PTR 20[esp] - xor ecx,eax - xor edx,ebx - mov eax,DWORD PTR [esi] - mov ebx,DWORD PTR 4[esi] -$L016dj7: - ror edx,16 - mov BYTE PTR 6[edi],dl - shr edx,16 -$L017dj6: - mov BYTE PTR 5[edi],dh -$L018dj5: - mov BYTE PTR 4[edi],dl -$L019dj4: - mov DWORD PTR [edi],ecx - jmp $L020djend -$L021dj3: - ror ecx,16 - mov BYTE PTR 2[edi],cl - shl ecx,16 -$L022dj2: - mov BYTE PTR 1[esi],ch -$L023dj1: - mov BYTE PTR [esi],cl -$L020djend: - jmp $L003finish -$L003finish: - mov ecx,DWORD PTR 60[esp] - add esp,24 - mov DWORD PTR [ecx],eax - mov DWORD PTR 4[ecx],ebx - pop edi - pop esi - pop ebx - pop ebp - ret -ALIGN 64 -$L005cbc_enc_jmp_table: -DD 0 -DD $L013ej1-$L004PIC_point -DD $L012ej2-$L004PIC_point -DD $L011ej3-$L004PIC_point -DD $L009ej4-$L004PIC_point -DD $L008ej5-$L004PIC_point -DD $L007ej6-$L004PIC_point -DD $L006ej7-$L004PIC_point -ALIGN 64 -_BF_cbc_encrypt ENDP -.text$ ENDS -END diff --git a/deps/openssl/asm/x86-win32-masm/bn/bn-586.asm b/deps/openssl/asm/x86-win32-masm/bn/bn-586.asm new file mode 100644 index 00000000000000..916ed888615f8b --- /dev/null +++ b/deps/openssl/asm/x86-win32-masm/bn/bn-586.asm @@ -0,0 +1,1529 @@ +TITLE ../openssl/crypto/bn/asm/bn-586.asm +IF @Version LT 800 +ECHO MASM version 8.00 or later is strongly recommended. +ENDIF +.686 +.XMM +IF @Version LT 800 +XMMWORD STRUCT 16 +DQ 2 dup (?) +XMMWORD ENDS +ENDIF + +.MODEL FLAT +OPTION DOTNAME +IF @Version LT 800 +.text$ SEGMENT PAGE 'CODE' +ELSE +.text$ SEGMENT ALIGN(64) 'CODE' +ENDIF +;EXTERN _OPENSSL_ia32cap_P:NEAR +ALIGN 16 +_bn_mul_add_words PROC PUBLIC +$L_bn_mul_add_words_begin:: + lea eax,DWORD PTR _OPENSSL_ia32cap_P + bt DWORD PTR [eax],26 + jnc $L000maw_non_sse2 + mov eax,DWORD PTR 4[esp] + mov edx,DWORD PTR 8[esp] + mov ecx,DWORD PTR 12[esp] + movd mm0,DWORD PTR 16[esp] + pxor mm1,mm1 + jmp $L001maw_sse2_entry +ALIGN 16 +$L002maw_sse2_unrolled: + movd mm3,DWORD PTR [eax] + paddq mm1,mm3 + movd mm2,DWORD PTR [edx] + pmuludq mm2,mm0 + movd mm4,DWORD PTR 4[edx] + pmuludq mm4,mm0 + movd mm6,DWORD PTR 8[edx] + pmuludq mm6,mm0 + movd mm7,DWORD PTR 12[edx] + pmuludq mm7,mm0 + paddq mm1,mm2 + movd mm3,DWORD PTR 4[eax] + paddq mm3,mm4 + movd mm5,DWORD PTR 8[eax] + paddq mm5,mm6 + movd mm4,DWORD PTR 12[eax] + paddq mm7,mm4 + movd DWORD PTR [eax],mm1 + movd mm2,DWORD PTR 16[edx] + pmuludq mm2,mm0 + psrlq mm1,32 + movd mm4,DWORD PTR 20[edx] + pmuludq mm4,mm0 + paddq mm1,mm3 + movd mm6,DWORD PTR 24[edx] + pmuludq mm6,mm0 + movd DWORD PTR 4[eax],mm1 + psrlq mm1,32 + movd mm3,DWORD PTR 28[edx] + add edx,32 + pmuludq mm3,mm0 + paddq mm1,mm5 + movd mm5,DWORD PTR 16[eax] + paddq mm2,mm5 + movd DWORD PTR 8[eax],mm1 + psrlq mm1,32 + paddq mm1,mm7 + movd mm5,DWORD PTR 20[eax] + paddq mm4,mm5 + movd DWORD PTR 12[eax],mm1 + psrlq mm1,32 + paddq mm1,mm2 + movd mm5,DWORD PTR 24[eax] + paddq mm6,mm5 + movd DWORD PTR 16[eax],mm1 + psrlq mm1,32 + paddq mm1,mm4 + movd mm5,DWORD PTR 28[eax] + paddq mm3,mm5 + movd DWORD PTR 20[eax],mm1 + psrlq mm1,32 + paddq mm1,mm6 + movd DWORD PTR 24[eax],mm1 + psrlq mm1,32 + paddq mm1,mm3 + movd DWORD PTR 28[eax],mm1 + lea eax,DWORD PTR 32[eax] + psrlq mm1,32 + sub ecx,8 + jz $L003maw_sse2_exit +$L001maw_sse2_entry: + test ecx,4294967288 + jnz $L002maw_sse2_unrolled +ALIGN 4 +$L004maw_sse2_loop: + movd mm2,DWORD PTR [edx] + movd mm3,DWORD PTR [eax] + pmuludq mm2,mm0 + lea edx,DWORD PTR 4[edx] + paddq mm1,mm3 + paddq mm1,mm2 + movd DWORD PTR [eax],mm1 + sub ecx,1 + psrlq mm1,32 + lea eax,DWORD PTR 4[eax] + jnz $L004maw_sse2_loop +$L003maw_sse2_exit: + movd eax,mm1 + emms + ret +ALIGN 16 +$L000maw_non_sse2: + push ebp + push ebx + push esi + push edi + ; + xor esi,esi + mov edi,DWORD PTR 20[esp] + mov ecx,DWORD PTR 28[esp] + mov ebx,DWORD PTR 24[esp] + and ecx,4294967288 + mov ebp,DWORD PTR 32[esp] + push ecx + jz $L005maw_finish +ALIGN 16 +$L006maw_loop: + ; Round 0 + mov eax,DWORD PTR [ebx] + mul ebp + add eax,esi + adc edx,0 + add eax,DWORD PTR [edi] + adc edx,0 + mov DWORD PTR [edi],eax + mov esi,edx + ; Round 4 + mov eax,DWORD PTR 4[ebx] + mul ebp + add eax,esi + adc edx,0 + add eax,DWORD PTR 4[edi] + adc edx,0 + mov DWORD PTR 4[edi],eax + mov esi,edx + ; Round 8 + mov eax,DWORD PTR 8[ebx] + mul ebp + add eax,esi + adc edx,0 + add eax,DWORD PTR 8[edi] + adc edx,0 + mov DWORD PTR 8[edi],eax + mov esi,edx + ; Round 12 + mov eax,DWORD PTR 12[ebx] + mul ebp + add eax,esi + adc edx,0 + add eax,DWORD PTR 12[edi] + adc edx,0 + mov DWORD PTR 12[edi],eax + mov esi,edx + ; Round 16 + mov eax,DWORD PTR 16[ebx] + mul ebp + add eax,esi + adc edx,0 + add eax,DWORD PTR 16[edi] + adc edx,0 + mov DWORD PTR 16[edi],eax + mov esi,edx + ; Round 20 + mov eax,DWORD PTR 20[ebx] + mul ebp + add eax,esi + adc edx,0 + add eax,DWORD PTR 20[edi] + adc edx,0 + mov DWORD PTR 20[edi],eax + mov esi,edx + ; Round 24 + mov eax,DWORD PTR 24[ebx] + mul ebp + add eax,esi + adc edx,0 + add eax,DWORD PTR 24[edi] + adc edx,0 + mov DWORD PTR 24[edi],eax + mov esi,edx + ; Round 28 + mov eax,DWORD PTR 28[ebx] + mul ebp + add eax,esi + adc edx,0 + add eax,DWORD PTR 28[edi] + adc edx,0 + mov DWORD PTR 28[edi],eax + mov esi,edx + ; + sub ecx,8 + lea ebx,DWORD PTR 32[ebx] + lea edi,DWORD PTR 32[edi] + jnz $L006maw_loop +$L005maw_finish: + mov ecx,DWORD PTR 32[esp] + and ecx,7 + jnz $L007maw_finish2 + jmp $L008maw_end +$L007maw_finish2: + ; Tail Round 0 + mov eax,DWORD PTR [ebx] + mul ebp + add eax,esi + adc edx,0 + add eax,DWORD PTR [edi] + adc edx,0 + dec ecx + mov DWORD PTR [edi],eax + mov esi,edx + jz $L008maw_end + ; Tail Round 1 + mov eax,DWORD PTR 4[ebx] + mul ebp + add eax,esi + adc edx,0 + add eax,DWORD PTR 4[edi] + adc edx,0 + dec ecx + mov DWORD PTR 4[edi],eax + mov esi,edx + jz $L008maw_end + ; Tail Round 2 + mov eax,DWORD PTR 8[ebx] + mul ebp + add eax,esi + adc edx,0 + add eax,DWORD PTR 8[edi] + adc edx,0 + dec ecx + mov DWORD PTR 8[edi],eax + mov esi,edx + jz $L008maw_end + ; Tail Round 3 + mov eax,DWORD PTR 12[ebx] + mul ebp + add eax,esi + adc edx,0 + add eax,DWORD PTR 12[edi] + adc edx,0 + dec ecx + mov DWORD PTR 12[edi],eax + mov esi,edx + jz $L008maw_end + ; Tail Round 4 + mov eax,DWORD PTR 16[ebx] + mul ebp + add eax,esi + adc edx,0 + add eax,DWORD PTR 16[edi] + adc edx,0 + dec ecx + mov DWORD PTR 16[edi],eax + mov esi,edx + jz $L008maw_end + ; Tail Round 5 + mov eax,DWORD PTR 20[ebx] + mul ebp + add eax,esi + adc edx,0 + add eax,DWORD PTR 20[edi] + adc edx,0 + dec ecx + mov DWORD PTR 20[edi],eax + mov esi,edx + jz $L008maw_end + ; Tail Round 6 + mov eax,DWORD PTR 24[ebx] + mul ebp + add eax,esi + adc edx,0 + add eax,DWORD PTR 24[edi] + adc edx,0 + mov DWORD PTR 24[edi],eax + mov esi,edx +$L008maw_end: + mov eax,esi + pop ecx + pop edi + pop esi + pop ebx + pop ebp + ret +_bn_mul_add_words ENDP +ALIGN 16 +_bn_mul_words PROC PUBLIC +$L_bn_mul_words_begin:: + lea eax,DWORD PTR _OPENSSL_ia32cap_P + bt DWORD PTR [eax],26 + jnc $L009mw_non_sse2 + mov eax,DWORD PTR 4[esp] + mov edx,DWORD PTR 8[esp] + mov ecx,DWORD PTR 12[esp] + movd mm0,DWORD PTR 16[esp] + pxor mm1,mm1 +ALIGN 16 +$L010mw_sse2_loop: + movd mm2,DWORD PTR [edx] + pmuludq mm2,mm0 + lea edx,DWORD PTR 4[edx] + paddq mm1,mm2 + movd DWORD PTR [eax],mm1 + sub ecx,1 + psrlq mm1,32 + lea eax,DWORD PTR 4[eax] + jnz $L010mw_sse2_loop + movd eax,mm1 + emms + ret +ALIGN 16 +$L009mw_non_sse2: + push ebp + push ebx + push esi + push edi + ; + xor esi,esi + mov edi,DWORD PTR 20[esp] + mov ebx,DWORD PTR 24[esp] + mov ebp,DWORD PTR 28[esp] + mov ecx,DWORD PTR 32[esp] + and ebp,4294967288 + jz $L011mw_finish +$L012mw_loop: + ; Round 0 + mov eax,DWORD PTR [ebx] + mul ecx + add eax,esi + adc edx,0 + mov DWORD PTR [edi],eax + mov esi,edx + ; Round 4 + mov eax,DWORD PTR 4[ebx] + mul ecx + add eax,esi + adc edx,0 + mov DWORD PTR 4[edi],eax + mov esi,edx + ; Round 8 + mov eax,DWORD PTR 8[ebx] + mul ecx + add eax,esi + adc edx,0 + mov DWORD PTR 8[edi],eax + mov esi,edx + ; Round 12 + mov eax,DWORD PTR 12[ebx] + mul ecx + add eax,esi + adc edx,0 + mov DWORD PTR 12[edi],eax + mov esi,edx + ; Round 16 + mov eax,DWORD PTR 16[ebx] + mul ecx + add eax,esi + adc edx,0 + mov DWORD PTR 16[edi],eax + mov esi,edx + ; Round 20 + mov eax,DWORD PTR 20[ebx] + mul ecx + add eax,esi + adc edx,0 + mov DWORD PTR 20[edi],eax + mov esi,edx + ; Round 24 + mov eax,DWORD PTR 24[ebx] + mul ecx + add eax,esi + adc edx,0 + mov DWORD PTR 24[edi],eax + mov esi,edx + ; Round 28 + mov eax,DWORD PTR 28[ebx] + mul ecx + add eax,esi + adc edx,0 + mov DWORD PTR 28[edi],eax + mov esi,edx + ; + add ebx,32 + add edi,32 + sub ebp,8 + jz $L011mw_finish + jmp $L012mw_loop +$L011mw_finish: + mov ebp,DWORD PTR 28[esp] + and ebp,7 + jnz $L013mw_finish2 + jmp $L014mw_end +$L013mw_finish2: + ; Tail Round 0 + mov eax,DWORD PTR [ebx] + mul ecx + add eax,esi + adc edx,0 + mov DWORD PTR [edi],eax + mov esi,edx + dec ebp + jz $L014mw_end + ; Tail Round 1 + mov eax,DWORD PTR 4[ebx] + mul ecx + add eax,esi + adc edx,0 + mov DWORD PTR 4[edi],eax + mov esi,edx + dec ebp + jz $L014mw_end + ; Tail Round 2 + mov eax,DWORD PTR 8[ebx] + mul ecx + add eax,esi + adc edx,0 + mov DWORD PTR 8[edi],eax + mov esi,edx + dec ebp + jz $L014mw_end + ; Tail Round 3 + mov eax,DWORD PTR 12[ebx] + mul ecx + add eax,esi + adc edx,0 + mov DWORD PTR 12[edi],eax + mov esi,edx + dec ebp + jz $L014mw_end + ; Tail Round 4 + mov eax,DWORD PTR 16[ebx] + mul ecx + add eax,esi + adc edx,0 + mov DWORD PTR 16[edi],eax + mov esi,edx + dec ebp + jz $L014mw_end + ; Tail Round 5 + mov eax,DWORD PTR 20[ebx] + mul ecx + add eax,esi + adc edx,0 + mov DWORD PTR 20[edi],eax + mov esi,edx + dec ebp + jz $L014mw_end + ; Tail Round 6 + mov eax,DWORD PTR 24[ebx] + mul ecx + add eax,esi + adc edx,0 + mov DWORD PTR 24[edi],eax + mov esi,edx +$L014mw_end: + mov eax,esi + pop edi + pop esi + pop ebx + pop ebp + ret +_bn_mul_words ENDP +ALIGN 16 +_bn_sqr_words PROC PUBLIC +$L_bn_sqr_words_begin:: + lea eax,DWORD PTR _OPENSSL_ia32cap_P + bt DWORD PTR [eax],26 + jnc $L015sqr_non_sse2 + mov eax,DWORD PTR 4[esp] + mov edx,DWORD PTR 8[esp] + mov ecx,DWORD PTR 12[esp] +ALIGN 16 +$L016sqr_sse2_loop: + movd mm0,DWORD PTR [edx] + pmuludq mm0,mm0 + lea edx,DWORD PTR 4[edx] + movq QWORD PTR [eax],mm0 + sub ecx,1 + lea eax,DWORD PTR 8[eax] + jnz $L016sqr_sse2_loop + emms + ret +ALIGN 16 +$L015sqr_non_sse2: + push ebp + push ebx + push esi + push edi + ; + mov esi,DWORD PTR 20[esp] + mov edi,DWORD PTR 24[esp] + mov ebx,DWORD PTR 28[esp] + and ebx,4294967288 + jz $L017sw_finish +$L018sw_loop: + ; Round 0 + mov eax,DWORD PTR [edi] + mul eax + mov DWORD PTR [esi],eax + mov DWORD PTR 4[esi],edx + ; Round 4 + mov eax,DWORD PTR 4[edi] + mul eax + mov DWORD PTR 8[esi],eax + mov DWORD PTR 12[esi],edx + ; Round 8 + mov eax,DWORD PTR 8[edi] + mul eax + mov DWORD PTR 16[esi],eax + mov DWORD PTR 20[esi],edx + ; Round 12 + mov eax,DWORD PTR 12[edi] + mul eax + mov DWORD PTR 24[esi],eax + mov DWORD PTR 28[esi],edx + ; Round 16 + mov eax,DWORD PTR 16[edi] + mul eax + mov DWORD PTR 32[esi],eax + mov DWORD PTR 36[esi],edx + ; Round 20 + mov eax,DWORD PTR 20[edi] + mul eax + mov DWORD PTR 40[esi],eax + mov DWORD PTR 44[esi],edx + ; Round 24 + mov eax,DWORD PTR 24[edi] + mul eax + mov DWORD PTR 48[esi],eax + mov DWORD PTR 52[esi],edx + ; Round 28 + mov eax,DWORD PTR 28[edi] + mul eax + mov DWORD PTR 56[esi],eax + mov DWORD PTR 60[esi],edx + ; + add edi,32 + add esi,64 + sub ebx,8 + jnz $L018sw_loop +$L017sw_finish: + mov ebx,DWORD PTR 28[esp] + and ebx,7 + jz $L019sw_end + ; Tail Round 0 + mov eax,DWORD PTR [edi] + mul eax + mov DWORD PTR [esi],eax + dec ebx + mov DWORD PTR 4[esi],edx + jz $L019sw_end + ; Tail Round 1 + mov eax,DWORD PTR 4[edi] + mul eax + mov DWORD PTR 8[esi],eax + dec ebx + mov DWORD PTR 12[esi],edx + jz $L019sw_end + ; Tail Round 2 + mov eax,DWORD PTR 8[edi] + mul eax + mov DWORD PTR 16[esi],eax + dec ebx + mov DWORD PTR 20[esi],edx + jz $L019sw_end + ; Tail Round 3 + mov eax,DWORD PTR 12[edi] + mul eax + mov DWORD PTR 24[esi],eax + dec ebx + mov DWORD PTR 28[esi],edx + jz $L019sw_end + ; Tail Round 4 + mov eax,DWORD PTR 16[edi] + mul eax + mov DWORD PTR 32[esi],eax + dec ebx + mov DWORD PTR 36[esi],edx + jz $L019sw_end + ; Tail Round 5 + mov eax,DWORD PTR 20[edi] + mul eax + mov DWORD PTR 40[esi],eax + dec ebx + mov DWORD PTR 44[esi],edx + jz $L019sw_end + ; Tail Round 6 + mov eax,DWORD PTR 24[edi] + mul eax + mov DWORD PTR 48[esi],eax + mov DWORD PTR 52[esi],edx +$L019sw_end: + pop edi + pop esi + pop ebx + pop ebp + ret +_bn_sqr_words ENDP +ALIGN 16 +_bn_div_words PROC PUBLIC +$L_bn_div_words_begin:: + mov edx,DWORD PTR 4[esp] + mov eax,DWORD PTR 8[esp] + mov ecx,DWORD PTR 12[esp] + div ecx + ret +_bn_div_words ENDP +ALIGN 16 +_bn_add_words PROC PUBLIC +$L_bn_add_words_begin:: + push ebp + push ebx + push esi + push edi + ; + mov ebx,DWORD PTR 20[esp] + mov esi,DWORD PTR 24[esp] + mov edi,DWORD PTR 28[esp] + mov ebp,DWORD PTR 32[esp] + xor eax,eax + and ebp,4294967288 + jz $L020aw_finish +$L021aw_loop: + ; Round 0 + mov ecx,DWORD PTR [esi] + mov edx,DWORD PTR [edi] + add ecx,eax + mov eax,0 + adc eax,eax + add ecx,edx + adc eax,0 + mov DWORD PTR [ebx],ecx + ; Round 1 + mov ecx,DWORD PTR 4[esi] + mov edx,DWORD PTR 4[edi] + add ecx,eax + mov eax,0 + adc eax,eax + add ecx,edx + adc eax,0 + mov DWORD PTR 4[ebx],ecx + ; Round 2 + mov ecx,DWORD PTR 8[esi] + mov edx,DWORD PTR 8[edi] + add ecx,eax + mov eax,0 + adc eax,eax + add ecx,edx + adc eax,0 + mov DWORD PTR 8[ebx],ecx + ; Round 3 + mov ecx,DWORD PTR 12[esi] + mov edx,DWORD PTR 12[edi] + add ecx,eax + mov eax,0 + adc eax,eax + add ecx,edx + adc eax,0 + mov DWORD PTR 12[ebx],ecx + ; Round 4 + mov ecx,DWORD PTR 16[esi] + mov edx,DWORD PTR 16[edi] + add ecx,eax + mov eax,0 + adc eax,eax + add ecx,edx + adc eax,0 + mov DWORD PTR 16[ebx],ecx + ; Round 5 + mov ecx,DWORD PTR 20[esi] + mov edx,DWORD PTR 20[edi] + add ecx,eax + mov eax,0 + adc eax,eax + add ecx,edx + adc eax,0 + mov DWORD PTR 20[ebx],ecx + ; Round 6 + mov ecx,DWORD PTR 24[esi] + mov edx,DWORD PTR 24[edi] + add ecx,eax + mov eax,0 + adc eax,eax + add ecx,edx + adc eax,0 + mov DWORD PTR 24[ebx],ecx + ; Round 7 + mov ecx,DWORD PTR 28[esi] + mov edx,DWORD PTR 28[edi] + add ecx,eax + mov eax,0 + adc eax,eax + add ecx,edx + adc eax,0 + mov DWORD PTR 28[ebx],ecx + ; + add esi,32 + add edi,32 + add ebx,32 + sub ebp,8 + jnz $L021aw_loop +$L020aw_finish: + mov ebp,DWORD PTR 32[esp] + and ebp,7 + jz $L022aw_end + ; Tail Round 0 + mov ecx,DWORD PTR [esi] + mov edx,DWORD PTR [edi] + add ecx,eax + mov eax,0 + adc eax,eax + add ecx,edx + adc eax,0 + dec ebp + mov DWORD PTR [ebx],ecx + jz $L022aw_end + ; Tail Round 1 + mov ecx,DWORD PTR 4[esi] + mov edx,DWORD PTR 4[edi] + add ecx,eax + mov eax,0 + adc eax,eax + add ecx,edx + adc eax,0 + dec ebp + mov DWORD PTR 4[ebx],ecx + jz $L022aw_end + ; Tail Round 2 + mov ecx,DWORD PTR 8[esi] + mov edx,DWORD PTR 8[edi] + add ecx,eax + mov eax,0 + adc eax,eax + add ecx,edx + adc eax,0 + dec ebp + mov DWORD PTR 8[ebx],ecx + jz $L022aw_end + ; Tail Round 3 + mov ecx,DWORD PTR 12[esi] + mov edx,DWORD PTR 12[edi] + add ecx,eax + mov eax,0 + adc eax,eax + add ecx,edx + adc eax,0 + dec ebp + mov DWORD PTR 12[ebx],ecx + jz $L022aw_end + ; Tail Round 4 + mov ecx,DWORD PTR 16[esi] + mov edx,DWORD PTR 16[edi] + add ecx,eax + mov eax,0 + adc eax,eax + add ecx,edx + adc eax,0 + dec ebp + mov DWORD PTR 16[ebx],ecx + jz $L022aw_end + ; Tail Round 5 + mov ecx,DWORD PTR 20[esi] + mov edx,DWORD PTR 20[edi] + add ecx,eax + mov eax,0 + adc eax,eax + add ecx,edx + adc eax,0 + dec ebp + mov DWORD PTR 20[ebx],ecx + jz $L022aw_end + ; Tail Round 6 + mov ecx,DWORD PTR 24[esi] + mov edx,DWORD PTR 24[edi] + add ecx,eax + mov eax,0 + adc eax,eax + add ecx,edx + adc eax,0 + mov DWORD PTR 24[ebx],ecx +$L022aw_end: + pop edi + pop esi + pop ebx + pop ebp + ret +_bn_add_words ENDP +ALIGN 16 +_bn_sub_words PROC PUBLIC +$L_bn_sub_words_begin:: + push ebp + push ebx + push esi + push edi + ; + mov ebx,DWORD PTR 20[esp] + mov esi,DWORD PTR 24[esp] + mov edi,DWORD PTR 28[esp] + mov ebp,DWORD PTR 32[esp] + xor eax,eax + and ebp,4294967288 + jz $L023aw_finish +$L024aw_loop: + ; Round 0 + mov ecx,DWORD PTR [esi] + mov edx,DWORD PTR [edi] + sub ecx,eax + mov eax,0 + adc eax,eax + sub ecx,edx + adc eax,0 + mov DWORD PTR [ebx],ecx + ; Round 1 + mov ecx,DWORD PTR 4[esi] + mov edx,DWORD PTR 4[edi] + sub ecx,eax + mov eax,0 + adc eax,eax + sub ecx,edx + adc eax,0 + mov DWORD PTR 4[ebx],ecx + ; Round 2 + mov ecx,DWORD PTR 8[esi] + mov edx,DWORD PTR 8[edi] + sub ecx,eax + mov eax,0 + adc eax,eax + sub ecx,edx + adc eax,0 + mov DWORD PTR 8[ebx],ecx + ; Round 3 + mov ecx,DWORD PTR 12[esi] + mov edx,DWORD PTR 12[edi] + sub ecx,eax + mov eax,0 + adc eax,eax + sub ecx,edx + adc eax,0 + mov DWORD PTR 12[ebx],ecx + ; Round 4 + mov ecx,DWORD PTR 16[esi] + mov edx,DWORD PTR 16[edi] + sub ecx,eax + mov eax,0 + adc eax,eax + sub ecx,edx + adc eax,0 + mov DWORD PTR 16[ebx],ecx + ; Round 5 + mov ecx,DWORD PTR 20[esi] + mov edx,DWORD PTR 20[edi] + sub ecx,eax + mov eax,0 + adc eax,eax + sub ecx,edx + adc eax,0 + mov DWORD PTR 20[ebx],ecx + ; Round 6 + mov ecx,DWORD PTR 24[esi] + mov edx,DWORD PTR 24[edi] + sub ecx,eax + mov eax,0 + adc eax,eax + sub ecx,edx + adc eax,0 + mov DWORD PTR 24[ebx],ecx + ; Round 7 + mov ecx,DWORD PTR 28[esi] + mov edx,DWORD PTR 28[edi] + sub ecx,eax + mov eax,0 + adc eax,eax + sub ecx,edx + adc eax,0 + mov DWORD PTR 28[ebx],ecx + ; + add esi,32 + add edi,32 + add ebx,32 + sub ebp,8 + jnz $L024aw_loop +$L023aw_finish: + mov ebp,DWORD PTR 32[esp] + and ebp,7 + jz $L025aw_end + ; Tail Round 0 + mov ecx,DWORD PTR [esi] + mov edx,DWORD PTR [edi] + sub ecx,eax + mov eax,0 + adc eax,eax + sub ecx,edx + adc eax,0 + dec ebp + mov DWORD PTR [ebx],ecx + jz $L025aw_end + ; Tail Round 1 + mov ecx,DWORD PTR 4[esi] + mov edx,DWORD PTR 4[edi] + sub ecx,eax + mov eax,0 + adc eax,eax + sub ecx,edx + adc eax,0 + dec ebp + mov DWORD PTR 4[ebx],ecx + jz $L025aw_end + ; Tail Round 2 + mov ecx,DWORD PTR 8[esi] + mov edx,DWORD PTR 8[edi] + sub ecx,eax + mov eax,0 + adc eax,eax + sub ecx,edx + adc eax,0 + dec ebp + mov DWORD PTR 8[ebx],ecx + jz $L025aw_end + ; Tail Round 3 + mov ecx,DWORD PTR 12[esi] + mov edx,DWORD PTR 12[edi] + sub ecx,eax + mov eax,0 + adc eax,eax + sub ecx,edx + adc eax,0 + dec ebp + mov DWORD PTR 12[ebx],ecx + jz $L025aw_end + ; Tail Round 4 + mov ecx,DWORD PTR 16[esi] + mov edx,DWORD PTR 16[edi] + sub ecx,eax + mov eax,0 + adc eax,eax + sub ecx,edx + adc eax,0 + dec ebp + mov DWORD PTR 16[ebx],ecx + jz $L025aw_end + ; Tail Round 5 + mov ecx,DWORD PTR 20[esi] + mov edx,DWORD PTR 20[edi] + sub ecx,eax + mov eax,0 + adc eax,eax + sub ecx,edx + adc eax,0 + dec ebp + mov DWORD PTR 20[ebx],ecx + jz $L025aw_end + ; Tail Round 6 + mov ecx,DWORD PTR 24[esi] + mov edx,DWORD PTR 24[edi] + sub ecx,eax + mov eax,0 + adc eax,eax + sub ecx,edx + adc eax,0 + mov DWORD PTR 24[ebx],ecx +$L025aw_end: + pop edi + pop esi + pop ebx + pop ebp + ret +_bn_sub_words ENDP +ALIGN 16 +_bn_sub_part_words PROC PUBLIC +$L_bn_sub_part_words_begin:: + push ebp + push ebx + push esi + push edi + ; + mov ebx,DWORD PTR 20[esp] + mov esi,DWORD PTR 24[esp] + mov edi,DWORD PTR 28[esp] + mov ebp,DWORD PTR 32[esp] + xor eax,eax + and ebp,4294967288 + jz $L026aw_finish +$L027aw_loop: + ; Round 0 + mov ecx,DWORD PTR [esi] + mov edx,DWORD PTR [edi] + sub ecx,eax + mov eax,0 + adc eax,eax + sub ecx,edx + adc eax,0 + mov DWORD PTR [ebx],ecx + ; Round 1 + mov ecx,DWORD PTR 4[esi] + mov edx,DWORD PTR 4[edi] + sub ecx,eax + mov eax,0 + adc eax,eax + sub ecx,edx + adc eax,0 + mov DWORD PTR 4[ebx],ecx + ; Round 2 + mov ecx,DWORD PTR 8[esi] + mov edx,DWORD PTR 8[edi] + sub ecx,eax + mov eax,0 + adc eax,eax + sub ecx,edx + adc eax,0 + mov DWORD PTR 8[ebx],ecx + ; Round 3 + mov ecx,DWORD PTR 12[esi] + mov edx,DWORD PTR 12[edi] + sub ecx,eax + mov eax,0 + adc eax,eax + sub ecx,edx + adc eax,0 + mov DWORD PTR 12[ebx],ecx + ; Round 4 + mov ecx,DWORD PTR 16[esi] + mov edx,DWORD PTR 16[edi] + sub ecx,eax + mov eax,0 + adc eax,eax + sub ecx,edx + adc eax,0 + mov DWORD PTR 16[ebx],ecx + ; Round 5 + mov ecx,DWORD PTR 20[esi] + mov edx,DWORD PTR 20[edi] + sub ecx,eax + mov eax,0 + adc eax,eax + sub ecx,edx + adc eax,0 + mov DWORD PTR 20[ebx],ecx + ; Round 6 + mov ecx,DWORD PTR 24[esi] + mov edx,DWORD PTR 24[edi] + sub ecx,eax + mov eax,0 + adc eax,eax + sub ecx,edx + adc eax,0 + mov DWORD PTR 24[ebx],ecx + ; Round 7 + mov ecx,DWORD PTR 28[esi] + mov edx,DWORD PTR 28[edi] + sub ecx,eax + mov eax,0 + adc eax,eax + sub ecx,edx + adc eax,0 + mov DWORD PTR 28[ebx],ecx + ; + add esi,32 + add edi,32 + add ebx,32 + sub ebp,8 + jnz $L027aw_loop +$L026aw_finish: + mov ebp,DWORD PTR 32[esp] + and ebp,7 + jz $L028aw_end + ; Tail Round 0 + mov ecx,DWORD PTR [esi] + mov edx,DWORD PTR [edi] + sub ecx,eax + mov eax,0 + adc eax,eax + sub ecx,edx + adc eax,0 + mov DWORD PTR [ebx],ecx + add esi,4 + add edi,4 + add ebx,4 + dec ebp + jz $L028aw_end + ; Tail Round 1 + mov ecx,DWORD PTR [esi] + mov edx,DWORD PTR [edi] + sub ecx,eax + mov eax,0 + adc eax,eax + sub ecx,edx + adc eax,0 + mov DWORD PTR [ebx],ecx + add esi,4 + add edi,4 + add ebx,4 + dec ebp + jz $L028aw_end + ; Tail Round 2 + mov ecx,DWORD PTR [esi] + mov edx,DWORD PTR [edi] + sub ecx,eax + mov eax,0 + adc eax,eax + sub ecx,edx + adc eax,0 + mov DWORD PTR [ebx],ecx + add esi,4 + add edi,4 + add ebx,4 + dec ebp + jz $L028aw_end + ; Tail Round 3 + mov ecx,DWORD PTR [esi] + mov edx,DWORD PTR [edi] + sub ecx,eax + mov eax,0 + adc eax,eax + sub ecx,edx + adc eax,0 + mov DWORD PTR [ebx],ecx + add esi,4 + add edi,4 + add ebx,4 + dec ebp + jz $L028aw_end + ; Tail Round 4 + mov ecx,DWORD PTR [esi] + mov edx,DWORD PTR [edi] + sub ecx,eax + mov eax,0 + adc eax,eax + sub ecx,edx + adc eax,0 + mov DWORD PTR [ebx],ecx + add esi,4 + add edi,4 + add ebx,4 + dec ebp + jz $L028aw_end + ; Tail Round 5 + mov ecx,DWORD PTR [esi] + mov edx,DWORD PTR [edi] + sub ecx,eax + mov eax,0 + adc eax,eax + sub ecx,edx + adc eax,0 + mov DWORD PTR [ebx],ecx + add esi,4 + add edi,4 + add ebx,4 + dec ebp + jz $L028aw_end + ; Tail Round 6 + mov ecx,DWORD PTR [esi] + mov edx,DWORD PTR [edi] + sub ecx,eax + mov eax,0 + adc eax,eax + sub ecx,edx + adc eax,0 + mov DWORD PTR [ebx],ecx + add esi,4 + add edi,4 + add ebx,4 +$L028aw_end: + cmp DWORD PTR 36[esp],0 + je $L029pw_end + mov ebp,DWORD PTR 36[esp] + cmp ebp,0 + je $L029pw_end + jge $L030pw_pos + ; pw_neg + mov edx,0 + sub edx,ebp + mov ebp,edx + and ebp,4294967288 + jz $L031pw_neg_finish +$L032pw_neg_loop: + ; dl<0 Round 0 + mov ecx,0 + mov edx,DWORD PTR [edi] + sub ecx,eax + mov eax,0 + adc eax,eax + sub ecx,edx + adc eax,0 + mov DWORD PTR [ebx],ecx + ; dl<0 Round 1 + mov ecx,0 + mov edx,DWORD PTR 4[edi] + sub ecx,eax + mov eax,0 + adc eax,eax + sub ecx,edx + adc eax,0 + mov DWORD PTR 4[ebx],ecx + ; dl<0 Round 2 + mov ecx,0 + mov edx,DWORD PTR 8[edi] + sub ecx,eax + mov eax,0 + adc eax,eax + sub ecx,edx + adc eax,0 + mov DWORD PTR 8[ebx],ecx + ; dl<0 Round 3 + mov ecx,0 + mov edx,DWORD PTR 12[edi] + sub ecx,eax + mov eax,0 + adc eax,eax + sub ecx,edx + adc eax,0 + mov DWORD PTR 12[ebx],ecx + ; dl<0 Round 4 + mov ecx,0 + mov edx,DWORD PTR 16[edi] + sub ecx,eax + mov eax,0 + adc eax,eax + sub ecx,edx + adc eax,0 + mov DWORD PTR 16[ebx],ecx + ; dl<0 Round 5 + mov ecx,0 + mov edx,DWORD PTR 20[edi] + sub ecx,eax + mov eax,0 + adc eax,eax + sub ecx,edx + adc eax,0 + mov DWORD PTR 20[ebx],ecx + ; dl<0 Round 6 + mov ecx,0 + mov edx,DWORD PTR 24[edi] + sub ecx,eax + mov eax,0 + adc eax,eax + sub ecx,edx + adc eax,0 + mov DWORD PTR 24[ebx],ecx + ; dl<0 Round 7 + mov ecx,0 + mov edx,DWORD PTR 28[edi] + sub ecx,eax + mov eax,0 + adc eax,eax + sub ecx,edx + adc eax,0 + mov DWORD PTR 28[ebx],ecx + ; + add edi,32 + add ebx,32 + sub ebp,8 + jnz $L032pw_neg_loop +$L031pw_neg_finish: + mov edx,DWORD PTR 36[esp] + mov ebp,0 + sub ebp,edx + and ebp,7 + jz $L029pw_end + ; dl<0 Tail Round 0 + mov ecx,0 + mov edx,DWORD PTR [edi] + sub ecx,eax + mov eax,0 + adc eax,eax + sub ecx,edx + adc eax,0 + dec ebp + mov DWORD PTR [ebx],ecx + jz $L029pw_end + ; dl<0 Tail Round 1 + mov ecx,0 + mov edx,DWORD PTR 4[edi] + sub ecx,eax + mov eax,0 + adc eax,eax + sub ecx,edx + adc eax,0 + dec ebp + mov DWORD PTR 4[ebx],ecx + jz $L029pw_end + ; dl<0 Tail Round 2 + mov ecx,0 + mov edx,DWORD PTR 8[edi] + sub ecx,eax + mov eax,0 + adc eax,eax + sub ecx,edx + adc eax,0 + dec ebp + mov DWORD PTR 8[ebx],ecx + jz $L029pw_end + ; dl<0 Tail Round 3 + mov ecx,0 + mov edx,DWORD PTR 12[edi] + sub ecx,eax + mov eax,0 + adc eax,eax + sub ecx,edx + adc eax,0 + dec ebp + mov DWORD PTR 12[ebx],ecx + jz $L029pw_end + ; dl<0 Tail Round 4 + mov ecx,0 + mov edx,DWORD PTR 16[edi] + sub ecx,eax + mov eax,0 + adc eax,eax + sub ecx,edx + adc eax,0 + dec ebp + mov DWORD PTR 16[ebx],ecx + jz $L029pw_end + ; dl<0 Tail Round 5 + mov ecx,0 + mov edx,DWORD PTR 20[edi] + sub ecx,eax + mov eax,0 + adc eax,eax + sub ecx,edx + adc eax,0 + dec ebp + mov DWORD PTR 20[ebx],ecx + jz $L029pw_end + ; dl<0 Tail Round 6 + mov ecx,0 + mov edx,DWORD PTR 24[edi] + sub ecx,eax + mov eax,0 + adc eax,eax + sub ecx,edx + adc eax,0 + mov DWORD PTR 24[ebx],ecx + jmp $L029pw_end +$L030pw_pos: + and ebp,4294967288 + jz $L033pw_pos_finish +$L034pw_pos_loop: + ; dl>0 Round 0 + mov ecx,DWORD PTR [esi] + sub ecx,eax + mov DWORD PTR [ebx],ecx + jnc $L035pw_nc0 + ; dl>0 Round 1 + mov ecx,DWORD PTR 4[esi] + sub ecx,eax + mov DWORD PTR 4[ebx],ecx + jnc $L036pw_nc1 + ; dl>0 Round 2 + mov ecx,DWORD PTR 8[esi] + sub ecx,eax + mov DWORD PTR 8[ebx],ecx + jnc $L037pw_nc2 + ; dl>0 Round 3 + mov ecx,DWORD PTR 12[esi] + sub ecx,eax + mov DWORD PTR 12[ebx],ecx + jnc $L038pw_nc3 + ; dl>0 Round 4 + mov ecx,DWORD PTR 16[esi] + sub ecx,eax + mov DWORD PTR 16[ebx],ecx + jnc $L039pw_nc4 + ; dl>0 Round 5 + mov ecx,DWORD PTR 20[esi] + sub ecx,eax + mov DWORD PTR 20[ebx],ecx + jnc $L040pw_nc5 + ; dl>0 Round 6 + mov ecx,DWORD PTR 24[esi] + sub ecx,eax + mov DWORD PTR 24[ebx],ecx + jnc $L041pw_nc6 + ; dl>0 Round 7 + mov ecx,DWORD PTR 28[esi] + sub ecx,eax + mov DWORD PTR 28[ebx],ecx + jnc $L042pw_nc7 + ; + add esi,32 + add ebx,32 + sub ebp,8 + jnz $L034pw_pos_loop +$L033pw_pos_finish: + mov ebp,DWORD PTR 36[esp] + and ebp,7 + jz $L029pw_end + ; dl>0 Tail Round 0 + mov ecx,DWORD PTR [esi] + sub ecx,eax + mov DWORD PTR [ebx],ecx + jnc $L043pw_tail_nc0 + dec ebp + jz $L029pw_end + ; dl>0 Tail Round 1 + mov ecx,DWORD PTR 4[esi] + sub ecx,eax + mov DWORD PTR 4[ebx],ecx + jnc $L044pw_tail_nc1 + dec ebp + jz $L029pw_end + ; dl>0 Tail Round 2 + mov ecx,DWORD PTR 8[esi] + sub ecx,eax + mov DWORD PTR 8[ebx],ecx + jnc $L045pw_tail_nc2 + dec ebp + jz $L029pw_end + ; dl>0 Tail Round 3 + mov ecx,DWORD PTR 12[esi] + sub ecx,eax + mov DWORD PTR 12[ebx],ecx + jnc $L046pw_tail_nc3 + dec ebp + jz $L029pw_end + ; dl>0 Tail Round 4 + mov ecx,DWORD PTR 16[esi] + sub ecx,eax + mov DWORD PTR 16[ebx],ecx + jnc $L047pw_tail_nc4 + dec ebp + jz $L029pw_end + ; dl>0 Tail Round 5 + mov ecx,DWORD PTR 20[esi] + sub ecx,eax + mov DWORD PTR 20[ebx],ecx + jnc $L048pw_tail_nc5 + dec ebp + jz $L029pw_end + ; dl>0 Tail Round 6 + mov ecx,DWORD PTR 24[esi] + sub ecx,eax + mov DWORD PTR 24[ebx],ecx + jnc $L049pw_tail_nc6 + mov eax,1 + jmp $L029pw_end +$L050pw_nc_loop: + mov ecx,DWORD PTR [esi] + mov DWORD PTR [ebx],ecx +$L035pw_nc0: + mov ecx,DWORD PTR 4[esi] + mov DWORD PTR 4[ebx],ecx +$L036pw_nc1: + mov ecx,DWORD PTR 8[esi] + mov DWORD PTR 8[ebx],ecx +$L037pw_nc2: + mov ecx,DWORD PTR 12[esi] + mov DWORD PTR 12[ebx],ecx +$L038pw_nc3: + mov ecx,DWORD PTR 16[esi] + mov DWORD PTR 16[ebx],ecx +$L039pw_nc4: + mov ecx,DWORD PTR 20[esi] + mov DWORD PTR 20[ebx],ecx +$L040pw_nc5: + mov ecx,DWORD PTR 24[esi] + mov DWORD PTR 24[ebx],ecx +$L041pw_nc6: + mov ecx,DWORD PTR 28[esi] + mov DWORD PTR 28[ebx],ecx +$L042pw_nc7: + ; + add esi,32 + add ebx,32 + sub ebp,8 + jnz $L050pw_nc_loop + mov ebp,DWORD PTR 36[esp] + and ebp,7 + jz $L051pw_nc_end + mov ecx,DWORD PTR [esi] + mov DWORD PTR [ebx],ecx +$L043pw_tail_nc0: + dec ebp + jz $L051pw_nc_end + mov ecx,DWORD PTR 4[esi] + mov DWORD PTR 4[ebx],ecx +$L044pw_tail_nc1: + dec ebp + jz $L051pw_nc_end + mov ecx,DWORD PTR 8[esi] + mov DWORD PTR 8[ebx],ecx +$L045pw_tail_nc2: + dec ebp + jz $L051pw_nc_end + mov ecx,DWORD PTR 12[esi] + mov DWORD PTR 12[ebx],ecx +$L046pw_tail_nc3: + dec ebp + jz $L051pw_nc_end + mov ecx,DWORD PTR 16[esi] + mov DWORD PTR 16[ebx],ecx +$L047pw_tail_nc4: + dec ebp + jz $L051pw_nc_end + mov ecx,DWORD PTR 20[esi] + mov DWORD PTR 20[ebx],ecx +$L048pw_tail_nc5: + dec ebp + jz $L051pw_nc_end + mov ecx,DWORD PTR 24[esi] + mov DWORD PTR 24[ebx],ecx +$L049pw_tail_nc6: +$L051pw_nc_end: + mov eax,0 +$L029pw_end: + pop edi + pop esi + pop ebx + pop ebp + ret +_bn_sub_part_words ENDP +.text$ ENDS +.bss SEGMENT 'BSS' +COMM _OPENSSL_ia32cap_P:DWORD:4 +.bss ENDS +END diff --git a/deps/openssl/asm/x86-win32-masm/bn/x86.asm b/deps/openssl/asm/x86-win32-masm/bn/co-586.asm similarity index 60% rename from deps/openssl/asm/x86-win32-masm/bn/x86.asm rename to deps/openssl/asm/x86-win32-masm/bn/co-586.asm index 2e7a0d4aafd837..a44b2b1b957268 100644 --- a/deps/openssl/asm/x86-win32-masm/bn/x86.asm +++ b/deps/openssl/asm/x86-win32-masm/bn/co-586.asm @@ -1,4 +1,4 @@ -TITLE ../openssl/crypto/bn/asm/x86.asm +TITLE ../openssl/crypto/bn/asm/co-586.asm IF @Version LT 800 ECHO MASM version 8.00 or later is strongly recommended. ENDIF @@ -11,864 +11,6 @@ ELSE .text$ SEGMENT ALIGN(64) 'CODE' ENDIF ALIGN 16 -_bn_mul_add_words PROC PUBLIC -$L_bn_mul_add_words_begin:: - push ebp - push ebx - push esi - push edi - ; - - xor esi,esi - mov edi,DWORD PTR 20[esp] - mov ecx,DWORD PTR 28[esp] - mov ebx,DWORD PTR 24[esp] - and ecx,4294967288 - mov ebp,DWORD PTR 32[esp] - push ecx - jz $L000maw_finish -$L001maw_loop: - mov DWORD PTR [esp],ecx - ; Round 0 - mov eax,DWORD PTR [ebx] - mul ebp - add eax,esi - mov esi,DWORD PTR [edi] - adc edx,0 - add eax,esi - adc edx,0 - mov DWORD PTR [edi],eax - mov esi,edx - ; Round 4 - mov eax,DWORD PTR 4[ebx] - mul ebp - add eax,esi - mov esi,DWORD PTR 4[edi] - adc edx,0 - add eax,esi - adc edx,0 - mov DWORD PTR 4[edi],eax - mov esi,edx - ; Round 8 - mov eax,DWORD PTR 8[ebx] - mul ebp - add eax,esi - mov esi,DWORD PTR 8[edi] - adc edx,0 - add eax,esi - adc edx,0 - mov DWORD PTR 8[edi],eax - mov esi,edx - ; Round 12 - mov eax,DWORD PTR 12[ebx] - mul ebp - add eax,esi - mov esi,DWORD PTR 12[edi] - adc edx,0 - add eax,esi - adc edx,0 - mov DWORD PTR 12[edi],eax - mov esi,edx - ; Round 16 - mov eax,DWORD PTR 16[ebx] - mul ebp - add eax,esi - mov esi,DWORD PTR 16[edi] - adc edx,0 - add eax,esi - adc edx,0 - mov DWORD PTR 16[edi],eax - mov esi,edx - ; Round 20 - mov eax,DWORD PTR 20[ebx] - mul ebp - add eax,esi - mov esi,DWORD PTR 20[edi] - adc edx,0 - add eax,esi - adc edx,0 - mov DWORD PTR 20[edi],eax - mov esi,edx - ; Round 24 - mov eax,DWORD PTR 24[ebx] - mul ebp - add eax,esi - mov esi,DWORD PTR 24[edi] - adc edx,0 - add eax,esi - adc edx,0 - mov DWORD PTR 24[edi],eax - mov esi,edx - ; Round 28 - mov eax,DWORD PTR 28[ebx] - mul ebp - add eax,esi - mov esi,DWORD PTR 28[edi] - adc edx,0 - add eax,esi - adc edx,0 - mov DWORD PTR 28[edi],eax - mov esi,edx - ; - - mov ecx,DWORD PTR [esp] - add ebx,32 - add edi,32 - sub ecx,8 - jnz $L001maw_loop -$L000maw_finish: - mov ecx,DWORD PTR 32[esp] - and ecx,7 - jnz $L002maw_finish2 - jmp $L003maw_end -$L002maw_finish2: - ; Tail Round 0 - mov eax,DWORD PTR [ebx] - mul ebp - add eax,esi - mov esi,DWORD PTR [edi] - adc edx,0 - add eax,esi - adc edx,0 - dec ecx - mov DWORD PTR [edi],eax - mov esi,edx - jz $L003maw_end - ; Tail Round 1 - mov eax,DWORD PTR 4[ebx] - mul ebp - add eax,esi - mov esi,DWORD PTR 4[edi] - adc edx,0 - add eax,esi - adc edx,0 - dec ecx - mov DWORD PTR 4[edi],eax - mov esi,edx - jz $L003maw_end - ; Tail Round 2 - mov eax,DWORD PTR 8[ebx] - mul ebp - add eax,esi - mov esi,DWORD PTR 8[edi] - adc edx,0 - add eax,esi - adc edx,0 - dec ecx - mov DWORD PTR 8[edi],eax - mov esi,edx - jz $L003maw_end - ; Tail Round 3 - mov eax,DWORD PTR 12[ebx] - mul ebp - add eax,esi - mov esi,DWORD PTR 12[edi] - adc edx,0 - add eax,esi - adc edx,0 - dec ecx - mov DWORD PTR 12[edi],eax - mov esi,edx - jz $L003maw_end - ; Tail Round 4 - mov eax,DWORD PTR 16[ebx] - mul ebp - add eax,esi - mov esi,DWORD PTR 16[edi] - adc edx,0 - add eax,esi - adc edx,0 - dec ecx - mov DWORD PTR 16[edi],eax - mov esi,edx - jz $L003maw_end - ; Tail Round 5 - mov eax,DWORD PTR 20[ebx] - mul ebp - add eax,esi - mov esi,DWORD PTR 20[edi] - adc edx,0 - add eax,esi - adc edx,0 - dec ecx - mov DWORD PTR 20[edi],eax - mov esi,edx - jz $L003maw_end - ; Tail Round 6 - mov eax,DWORD PTR 24[ebx] - mul ebp - add eax,esi - mov esi,DWORD PTR 24[edi] - adc edx,0 - add eax,esi - adc edx,0 - mov DWORD PTR 24[edi],eax - mov esi,edx -$L003maw_end: - mov eax,esi - pop ecx - pop edi - pop esi - pop ebx - pop ebp - ret -_bn_mul_add_words ENDP -ALIGN 16 -_bn_mul_words PROC PUBLIC -$L_bn_mul_words_begin:: - push ebp - push ebx - push esi - push edi - ; - - xor esi,esi - mov edi,DWORD PTR 20[esp] - mov ebx,DWORD PTR 24[esp] - mov ebp,DWORD PTR 28[esp] - mov ecx,DWORD PTR 32[esp] - and ebp,4294967288 - jz $L004mw_finish -$L005mw_loop: - ; Round 0 - mov eax,DWORD PTR [ebx] - mul ecx - add eax,esi - adc edx,0 - mov DWORD PTR [edi],eax - mov esi,edx - ; Round 4 - mov eax,DWORD PTR 4[ebx] - mul ecx - add eax,esi - adc edx,0 - mov DWORD PTR 4[edi],eax - mov esi,edx - ; Round 8 - mov eax,DWORD PTR 8[ebx] - mul ecx - add eax,esi - adc edx,0 - mov DWORD PTR 8[edi],eax - mov esi,edx - ; Round 12 - mov eax,DWORD PTR 12[ebx] - mul ecx - add eax,esi - adc edx,0 - mov DWORD PTR 12[edi],eax - mov esi,edx - ; Round 16 - mov eax,DWORD PTR 16[ebx] - mul ecx - add eax,esi - adc edx,0 - mov DWORD PTR 16[edi],eax - mov esi,edx - ; Round 20 - mov eax,DWORD PTR 20[ebx] - mul ecx - add eax,esi - adc edx,0 - mov DWORD PTR 20[edi],eax - mov esi,edx - ; Round 24 - mov eax,DWORD PTR 24[ebx] - mul ecx - add eax,esi - adc edx,0 - mov DWORD PTR 24[edi],eax - mov esi,edx - ; Round 28 - mov eax,DWORD PTR 28[ebx] - mul ecx - add eax,esi - adc edx,0 - mov DWORD PTR 28[edi],eax - mov esi,edx - ; - - add ebx,32 - add edi,32 - sub ebp,8 - jz $L004mw_finish - jmp $L005mw_loop -$L004mw_finish: - mov ebp,DWORD PTR 28[esp] - and ebp,7 - jnz $L006mw_finish2 - jmp $L007mw_end -$L006mw_finish2: - ; Tail Round 0 - mov eax,DWORD PTR [ebx] - mul ecx - add eax,esi - adc edx,0 - mov DWORD PTR [edi],eax - mov esi,edx - dec ebp - jz $L007mw_end - ; Tail Round 1 - mov eax,DWORD PTR 4[ebx] - mul ecx - add eax,esi - adc edx,0 - mov DWORD PTR 4[edi],eax - mov esi,edx - dec ebp - jz $L007mw_end - ; Tail Round 2 - mov eax,DWORD PTR 8[ebx] - mul ecx - add eax,esi - adc edx,0 - mov DWORD PTR 8[edi],eax - mov esi,edx - dec ebp - jz $L007mw_end - ; Tail Round 3 - mov eax,DWORD PTR 12[ebx] - mul ecx - add eax,esi - adc edx,0 - mov DWORD PTR 12[edi],eax - mov esi,edx - dec ebp - jz $L007mw_end - ; Tail Round 4 - mov eax,DWORD PTR 16[ebx] - mul ecx - add eax,esi - adc edx,0 - mov DWORD PTR 16[edi],eax - mov esi,edx - dec ebp - jz $L007mw_end - ; Tail Round 5 - mov eax,DWORD PTR 20[ebx] - mul ecx - add eax,esi - adc edx,0 - mov DWORD PTR 20[edi],eax - mov esi,edx - dec ebp - jz $L007mw_end - ; Tail Round 6 - mov eax,DWORD PTR 24[ebx] - mul ecx - add eax,esi - adc edx,0 - mov DWORD PTR 24[edi],eax - mov esi,edx -$L007mw_end: - mov eax,esi - pop edi - pop esi - pop ebx - pop ebp - ret -_bn_mul_words ENDP -ALIGN 16 -_bn_sqr_words PROC PUBLIC -$L_bn_sqr_words_begin:: - push ebp - push ebx - push esi - push edi - ; - - mov esi,DWORD PTR 20[esp] - mov edi,DWORD PTR 24[esp] - mov ebx,DWORD PTR 28[esp] - and ebx,4294967288 - jz $L008sw_finish -$L009sw_loop: - ; Round 0 - mov eax,DWORD PTR [edi] - mul eax - mov DWORD PTR [esi],eax - mov DWORD PTR 4[esi],edx - ; Round 4 - mov eax,DWORD PTR 4[edi] - mul eax - mov DWORD PTR 8[esi],eax - mov DWORD PTR 12[esi],edx - ; Round 8 - mov eax,DWORD PTR 8[edi] - mul eax - mov DWORD PTR 16[esi],eax - mov DWORD PTR 20[esi],edx - ; Round 12 - mov eax,DWORD PTR 12[edi] - mul eax - mov DWORD PTR 24[esi],eax - mov DWORD PTR 28[esi],edx - ; Round 16 - mov eax,DWORD PTR 16[edi] - mul eax - mov DWORD PTR 32[esi],eax - mov DWORD PTR 36[esi],edx - ; Round 20 - mov eax,DWORD PTR 20[edi] - mul eax - mov DWORD PTR 40[esi],eax - mov DWORD PTR 44[esi],edx - ; Round 24 - mov eax,DWORD PTR 24[edi] - mul eax - mov DWORD PTR 48[esi],eax - mov DWORD PTR 52[esi],edx - ; Round 28 - mov eax,DWORD PTR 28[edi] - mul eax - mov DWORD PTR 56[esi],eax - mov DWORD PTR 60[esi],edx - ; - - add edi,32 - add esi,64 - sub ebx,8 - jnz $L009sw_loop -$L008sw_finish: - mov ebx,DWORD PTR 28[esp] - and ebx,7 - jz $L010sw_end - ; Tail Round 0 - mov eax,DWORD PTR [edi] - mul eax - mov DWORD PTR [esi],eax - dec ebx - mov DWORD PTR 4[esi],edx - jz $L010sw_end - ; Tail Round 1 - mov eax,DWORD PTR 4[edi] - mul eax - mov DWORD PTR 8[esi],eax - dec ebx - mov DWORD PTR 12[esi],edx - jz $L010sw_end - ; Tail Round 2 - mov eax,DWORD PTR 8[edi] - mul eax - mov DWORD PTR 16[esi],eax - dec ebx - mov DWORD PTR 20[esi],edx - jz $L010sw_end - ; Tail Round 3 - mov eax,DWORD PTR 12[edi] - mul eax - mov DWORD PTR 24[esi],eax - dec ebx - mov DWORD PTR 28[esi],edx - jz $L010sw_end - ; Tail Round 4 - mov eax,DWORD PTR 16[edi] - mul eax - mov DWORD PTR 32[esi],eax - dec ebx - mov DWORD PTR 36[esi],edx - jz $L010sw_end - ; Tail Round 5 - mov eax,DWORD PTR 20[edi] - mul eax - mov DWORD PTR 40[esi],eax - dec ebx - mov DWORD PTR 44[esi],edx - jz $L010sw_end - ; Tail Round 6 - mov eax,DWORD PTR 24[edi] - mul eax - mov DWORD PTR 48[esi],eax - mov DWORD PTR 52[esi],edx -$L010sw_end: - pop edi - pop esi - pop ebx - pop ebp - ret -_bn_sqr_words ENDP -ALIGN 16 -_bn_div_words PROC PUBLIC -$L_bn_div_words_begin:: - push ebp - push ebx - push esi - push edi - mov edx,DWORD PTR 20[esp] - mov eax,DWORD PTR 24[esp] - mov ebx,DWORD PTR 28[esp] - div ebx - pop edi - pop esi - pop ebx - pop ebp - ret -_bn_div_words ENDP -ALIGN 16 -_bn_add_words PROC PUBLIC -$L_bn_add_words_begin:: - push ebp - push ebx - push esi - push edi - ; - - mov ebx,DWORD PTR 20[esp] - mov esi,DWORD PTR 24[esp] - mov edi,DWORD PTR 28[esp] - mov ebp,DWORD PTR 32[esp] - xor eax,eax - and ebp,4294967288 - jz $L011aw_finish -$L012aw_loop: - ; Round 0 - mov ecx,DWORD PTR [esi] - mov edx,DWORD PTR [edi] - add ecx,eax - mov eax,0 - adc eax,eax - add ecx,edx - adc eax,0 - mov DWORD PTR [ebx],ecx - ; Round 1 - mov ecx,DWORD PTR 4[esi] - mov edx,DWORD PTR 4[edi] - add ecx,eax - mov eax,0 - adc eax,eax - add ecx,edx - adc eax,0 - mov DWORD PTR 4[ebx],ecx - ; Round 2 - mov ecx,DWORD PTR 8[esi] - mov edx,DWORD PTR 8[edi] - add ecx,eax - mov eax,0 - adc eax,eax - add ecx,edx - adc eax,0 - mov DWORD PTR 8[ebx],ecx - ; Round 3 - mov ecx,DWORD PTR 12[esi] - mov edx,DWORD PTR 12[edi] - add ecx,eax - mov eax,0 - adc eax,eax - add ecx,edx - adc eax,0 - mov DWORD PTR 12[ebx],ecx - ; Round 4 - mov ecx,DWORD PTR 16[esi] - mov edx,DWORD PTR 16[edi] - add ecx,eax - mov eax,0 - adc eax,eax - add ecx,edx - adc eax,0 - mov DWORD PTR 16[ebx],ecx - ; Round 5 - mov ecx,DWORD PTR 20[esi] - mov edx,DWORD PTR 20[edi] - add ecx,eax - mov eax,0 - adc eax,eax - add ecx,edx - adc eax,0 - mov DWORD PTR 20[ebx],ecx - ; Round 6 - mov ecx,DWORD PTR 24[esi] - mov edx,DWORD PTR 24[edi] - add ecx,eax - mov eax,0 - adc eax,eax - add ecx,edx - adc eax,0 - mov DWORD PTR 24[ebx],ecx - ; Round 7 - mov ecx,DWORD PTR 28[esi] - mov edx,DWORD PTR 28[edi] - add ecx,eax - mov eax,0 - adc eax,eax - add ecx,edx - adc eax,0 - mov DWORD PTR 28[ebx],ecx - ; - - add esi,32 - add edi,32 - add ebx,32 - sub ebp,8 - jnz $L012aw_loop -$L011aw_finish: - mov ebp,DWORD PTR 32[esp] - and ebp,7 - jz $L013aw_end - ; Tail Round 0 - mov ecx,DWORD PTR [esi] - mov edx,DWORD PTR [edi] - add ecx,eax - mov eax,0 - adc eax,eax - add ecx,edx - adc eax,0 - dec ebp - mov DWORD PTR [ebx],ecx - jz $L013aw_end - ; Tail Round 1 - mov ecx,DWORD PTR 4[esi] - mov edx,DWORD PTR 4[edi] - add ecx,eax - mov eax,0 - adc eax,eax - add ecx,edx - adc eax,0 - dec ebp - mov DWORD PTR 4[ebx],ecx - jz $L013aw_end - ; Tail Round 2 - mov ecx,DWORD PTR 8[esi] - mov edx,DWORD PTR 8[edi] - add ecx,eax - mov eax,0 - adc eax,eax - add ecx,edx - adc eax,0 - dec ebp - mov DWORD PTR 8[ebx],ecx - jz $L013aw_end - ; Tail Round 3 - mov ecx,DWORD PTR 12[esi] - mov edx,DWORD PTR 12[edi] - add ecx,eax - mov eax,0 - adc eax,eax - add ecx,edx - adc eax,0 - dec ebp - mov DWORD PTR 12[ebx],ecx - jz $L013aw_end - ; Tail Round 4 - mov ecx,DWORD PTR 16[esi] - mov edx,DWORD PTR 16[edi] - add ecx,eax - mov eax,0 - adc eax,eax - add ecx,edx - adc eax,0 - dec ebp - mov DWORD PTR 16[ebx],ecx - jz $L013aw_end - ; Tail Round 5 - mov ecx,DWORD PTR 20[esi] - mov edx,DWORD PTR 20[edi] - add ecx,eax - mov eax,0 - adc eax,eax - add ecx,edx - adc eax,0 - dec ebp - mov DWORD PTR 20[ebx],ecx - jz $L013aw_end - ; Tail Round 6 - mov ecx,DWORD PTR 24[esi] - mov edx,DWORD PTR 24[edi] - add ecx,eax - mov eax,0 - adc eax,eax - add ecx,edx - adc eax,0 - mov DWORD PTR 24[ebx],ecx -$L013aw_end: - pop edi - pop esi - pop ebx - pop ebp - ret -_bn_add_words ENDP -ALIGN 16 -_bn_sub_words PROC PUBLIC -$L_bn_sub_words_begin:: - push ebp - push ebx - push esi - push edi - ; - - mov ebx,DWORD PTR 20[esp] - mov esi,DWORD PTR 24[esp] - mov edi,DWORD PTR 28[esp] - mov ebp,DWORD PTR 32[esp] - xor eax,eax - and ebp,4294967288 - jz $L014aw_finish -$L015aw_loop: - ; Round 0 - mov ecx,DWORD PTR [esi] - mov edx,DWORD PTR [edi] - sub ecx,eax - mov eax,0 - adc eax,eax - sub ecx,edx - adc eax,0 - mov DWORD PTR [ebx],ecx - ; Round 1 - mov ecx,DWORD PTR 4[esi] - mov edx,DWORD PTR 4[edi] - sub ecx,eax - mov eax,0 - adc eax,eax - sub ecx,edx - adc eax,0 - mov DWORD PTR 4[ebx],ecx - ; Round 2 - mov ecx,DWORD PTR 8[esi] - mov edx,DWORD PTR 8[edi] - sub ecx,eax - mov eax,0 - adc eax,eax - sub ecx,edx - adc eax,0 - mov DWORD PTR 8[ebx],ecx - ; Round 3 - mov ecx,DWORD PTR 12[esi] - mov edx,DWORD PTR 12[edi] - sub ecx,eax - mov eax,0 - adc eax,eax - sub ecx,edx - adc eax,0 - mov DWORD PTR 12[ebx],ecx - ; Round 4 - mov ecx,DWORD PTR 16[esi] - mov edx,DWORD PTR 16[edi] - sub ecx,eax - mov eax,0 - adc eax,eax - sub ecx,edx - adc eax,0 - mov DWORD PTR 16[ebx],ecx - ; Round 5 - mov ecx,DWORD PTR 20[esi] - mov edx,DWORD PTR 20[edi] - sub ecx,eax - mov eax,0 - adc eax,eax - sub ecx,edx - adc eax,0 - mov DWORD PTR 20[ebx],ecx - ; Round 6 - mov ecx,DWORD PTR 24[esi] - mov edx,DWORD PTR 24[edi] - sub ecx,eax - mov eax,0 - adc eax,eax - sub ecx,edx - adc eax,0 - mov DWORD PTR 24[ebx],ecx - ; Round 7 - mov ecx,DWORD PTR 28[esi] - mov edx,DWORD PTR 28[edi] - sub ecx,eax - mov eax,0 - adc eax,eax - sub ecx,edx - adc eax,0 - mov DWORD PTR 28[ebx],ecx - ; - - add esi,32 - add edi,32 - add ebx,32 - sub ebp,8 - jnz $L015aw_loop -$L014aw_finish: - mov ebp,DWORD PTR 32[esp] - and ebp,7 - jz $L016aw_end - ; Tail Round 0 - mov ecx,DWORD PTR [esi] - mov edx,DWORD PTR [edi] - sub ecx,eax - mov eax,0 - adc eax,eax - sub ecx,edx - adc eax,0 - dec ebp - mov DWORD PTR [ebx],ecx - jz $L016aw_end - ; Tail Round 1 - mov ecx,DWORD PTR 4[esi] - mov edx,DWORD PTR 4[edi] - sub ecx,eax - mov eax,0 - adc eax,eax - sub ecx,edx - adc eax,0 - dec ebp - mov DWORD PTR 4[ebx],ecx - jz $L016aw_end - ; Tail Round 2 - mov ecx,DWORD PTR 8[esi] - mov edx,DWORD PTR 8[edi] - sub ecx,eax - mov eax,0 - adc eax,eax - sub ecx,edx - adc eax,0 - dec ebp - mov DWORD PTR 8[ebx],ecx - jz $L016aw_end - ; Tail Round 3 - mov ecx,DWORD PTR 12[esi] - mov edx,DWORD PTR 12[edi] - sub ecx,eax - mov eax,0 - adc eax,eax - sub ecx,edx - adc eax,0 - dec ebp - mov DWORD PTR 12[ebx],ecx - jz $L016aw_end - ; Tail Round 4 - mov ecx,DWORD PTR 16[esi] - mov edx,DWORD PTR 16[edi] - sub ecx,eax - mov eax,0 - adc eax,eax - sub ecx,edx - adc eax,0 - dec ebp - mov DWORD PTR 16[ebx],ecx - jz $L016aw_end - ; Tail Round 5 - mov ecx,DWORD PTR 20[esi] - mov edx,DWORD PTR 20[edi] - sub ecx,eax - mov eax,0 - adc eax,eax - sub ecx,edx - adc eax,0 - dec ebp - mov DWORD PTR 20[ebx],ecx - jz $L016aw_end - ; Tail Round 6 - mov ecx,DWORD PTR 24[esi] - mov edx,DWORD PTR 24[edi] - sub ecx,eax - mov eax,0 - adc eax,eax - sub ecx,edx - adc eax,0 - mov DWORD PTR 24[ebx],ecx -$L016aw_end: - pop edi - pop esi - pop ebx - pop ebp - ret -_bn_sub_words ENDP -ALIGN 16 _bn_mul_comba8 PROC PUBLIC $L_bn_mul_comba8_begin:: push esi diff --git a/deps/openssl/asm/x86-win32-masm/bn/x86-gf2m.asm b/deps/openssl/asm/x86-win32-masm/bn/x86-gf2m.asm new file mode 100644 index 00000000000000..57adf3ace49e1f --- /dev/null +++ b/deps/openssl/asm/x86-win32-masm/bn/x86-gf2m.asm @@ -0,0 +1,361 @@ +TITLE ../openssl/crypto/bn/asm/x86-gf2m.asm +IF @Version LT 800 +ECHO MASM version 8.00 or later is strongly recommended. +ENDIF +.686 +.XMM +IF @Version LT 800 +XMMWORD STRUCT 16 +DQ 2 dup (?) +XMMWORD ENDS +ENDIF + +.MODEL FLAT +OPTION DOTNAME +IF @Version LT 800 +.text$ SEGMENT PAGE 'CODE' +ELSE +.text$ SEGMENT ALIGN(64) 'CODE' +ENDIF +;EXTERN _OPENSSL_ia32cap_P:NEAR +ALIGN 16 +__mul_1x1_mmx PROC PRIVATE + sub esp,36 + mov ecx,eax + lea edx,DWORD PTR [eax*1+eax] + and ecx,1073741823 + lea ebp,DWORD PTR [edx*1+edx] + mov DWORD PTR [esp],0 + and edx,2147483647 + movd mm2,eax + movd mm3,ebx + mov DWORD PTR 4[esp],ecx + xor ecx,edx + pxor mm5,mm5 + pxor mm4,mm4 + mov DWORD PTR 8[esp],edx + xor edx,ebp + mov DWORD PTR 12[esp],ecx + pcmpgtd mm5,mm2 + paddd mm2,mm2 + xor ecx,edx + mov DWORD PTR 16[esp],ebp + xor ebp,edx + pand mm5,mm3 + pcmpgtd mm4,mm2 + mov DWORD PTR 20[esp],ecx + xor ebp,ecx + psllq mm5,31 + pand mm4,mm3 + mov DWORD PTR 24[esp],edx + mov esi,7 + mov DWORD PTR 28[esp],ebp + mov ebp,esi + and esi,ebx + shr ebx,3 + mov edi,ebp + psllq mm4,30 + and edi,ebx + shr ebx,3 + movd mm0,DWORD PTR [esi*4+esp] + mov esi,ebp + and esi,ebx + shr ebx,3 + movd mm2,DWORD PTR [edi*4+esp] + mov edi,ebp + psllq mm2,3 + and edi,ebx + shr ebx,3 + pxor mm0,mm2 + movd mm1,DWORD PTR [esi*4+esp] + mov esi,ebp + psllq mm1,6 + and esi,ebx + shr ebx,3 + pxor mm0,mm1 + movd mm2,DWORD PTR [edi*4+esp] + mov edi,ebp + psllq mm2,9 + and edi,ebx + shr ebx,3 + pxor mm0,mm2 + movd mm1,DWORD PTR [esi*4+esp] + mov esi,ebp + psllq mm1,12 + and esi,ebx + shr ebx,3 + pxor mm0,mm1 + movd mm2,DWORD PTR [edi*4+esp] + mov edi,ebp + psllq mm2,15 + and edi,ebx + shr ebx,3 + pxor mm0,mm2 + movd mm1,DWORD PTR [esi*4+esp] + mov esi,ebp + psllq mm1,18 + and esi,ebx + shr ebx,3 + pxor mm0,mm1 + movd mm2,DWORD PTR [edi*4+esp] + mov edi,ebp + psllq mm2,21 + and edi,ebx + shr ebx,3 + pxor mm0,mm2 + movd mm1,DWORD PTR [esi*4+esp] + mov esi,ebp + psllq mm1,24 + and esi,ebx + shr ebx,3 + pxor mm0,mm1 + movd mm2,DWORD PTR [edi*4+esp] + pxor mm0,mm4 + psllq mm2,27 + pxor mm0,mm2 + movd mm1,DWORD PTR [esi*4+esp] + pxor mm0,mm5 + psllq mm1,30 + add esp,36 + pxor mm0,mm1 + ret +__mul_1x1_mmx ENDP +ALIGN 16 +__mul_1x1_ialu PROC PRIVATE + sub esp,36 + mov ecx,eax + lea edx,DWORD PTR [eax*1+eax] + lea ebp,DWORD PTR [eax*4] + and ecx,1073741823 + lea edi,DWORD PTR [eax*1+eax] + sar eax,31 + mov DWORD PTR [esp],0 + and edx,2147483647 + mov DWORD PTR 4[esp],ecx + xor ecx,edx + mov DWORD PTR 8[esp],edx + xor edx,ebp + mov DWORD PTR 12[esp],ecx + xor ecx,edx + mov DWORD PTR 16[esp],ebp + xor ebp,edx + mov DWORD PTR 20[esp],ecx + xor ebp,ecx + sar edi,31 + and eax,ebx + mov DWORD PTR 24[esp],edx + and edi,ebx + mov DWORD PTR 28[esp],ebp + mov edx,eax + shl eax,31 + mov ecx,edi + shr edx,1 + mov esi,7 + shl edi,30 + and esi,ebx + shr ecx,2 + xor eax,edi + shr ebx,3 + mov edi,7 + and edi,ebx + shr ebx,3 + xor edx,ecx + xor eax,DWORD PTR [esi*4+esp] + mov esi,7 + and esi,ebx + shr ebx,3 + mov ebp,DWORD PTR [edi*4+esp] + mov edi,7 + mov ecx,ebp + shl ebp,3 + and edi,ebx + shr ecx,29 + xor eax,ebp + shr ebx,3 + xor edx,ecx + mov ecx,DWORD PTR [esi*4+esp] + mov esi,7 + mov ebp,ecx + shl ecx,6 + and esi,ebx + shr ebp,26 + xor eax,ecx + shr ebx,3 + xor edx,ebp + mov ebp,DWORD PTR [edi*4+esp] + mov edi,7 + mov ecx,ebp + shl ebp,9 + and edi,ebx + shr ecx,23 + xor eax,ebp + shr ebx,3 + xor edx,ecx + mov ecx,DWORD PTR [esi*4+esp] + mov esi,7 + mov ebp,ecx + shl ecx,12 + and esi,ebx + shr ebp,20 + xor eax,ecx + shr ebx,3 + xor edx,ebp + mov ebp,DWORD PTR [edi*4+esp] + mov edi,7 + mov ecx,ebp + shl ebp,15 + and edi,ebx + shr ecx,17 + xor eax,ebp + shr ebx,3 + xor edx,ecx + mov ecx,DWORD PTR [esi*4+esp] + mov esi,7 + mov ebp,ecx + shl ecx,18 + and esi,ebx + shr ebp,14 + xor eax,ecx + shr ebx,3 + xor edx,ebp + mov ebp,DWORD PTR [edi*4+esp] + mov edi,7 + mov ecx,ebp + shl ebp,21 + and edi,ebx + shr ecx,11 + xor eax,ebp + shr ebx,3 + xor edx,ecx + mov ecx,DWORD PTR [esi*4+esp] + mov esi,7 + mov ebp,ecx + shl ecx,24 + and esi,ebx + shr ebp,8 + xor eax,ecx + shr ebx,3 + xor edx,ebp + mov ebp,DWORD PTR [edi*4+esp] + mov ecx,ebp + shl ebp,27 + mov edi,DWORD PTR [esi*4+esp] + shr ecx,5 + mov esi,edi + xor eax,ebp + shl edi,30 + xor edx,ecx + shr esi,2 + xor eax,edi + xor edx,esi + add esp,36 + ret +__mul_1x1_ialu ENDP +ALIGN 16 +_bn_GF2m_mul_2x2 PROC PUBLIC +$L_bn_GF2m_mul_2x2_begin:: + lea edx,DWORD PTR _OPENSSL_ia32cap_P + mov eax,DWORD PTR [edx] + mov edx,DWORD PTR 4[edx] + test eax,8388608 + jz $L000ialu + test eax,16777216 + jz $L001mmx + test edx,2 + jz $L001mmx + movups xmm0,XMMWORD PTR 8[esp] + shufps xmm0,xmm0,177 +DB 102,15,58,68,192,1 + mov eax,DWORD PTR 4[esp] + movups XMMWORD PTR [eax],xmm0 + ret +ALIGN 16 +$L001mmx: + push ebp + push ebx + push esi + push edi + mov eax,DWORD PTR 24[esp] + mov ebx,DWORD PTR 32[esp] + call __mul_1x1_mmx + movq mm7,mm0 + mov eax,DWORD PTR 28[esp] + mov ebx,DWORD PTR 36[esp] + call __mul_1x1_mmx + movq mm6,mm0 + mov eax,DWORD PTR 24[esp] + mov ebx,DWORD PTR 32[esp] + xor eax,DWORD PTR 28[esp] + xor ebx,DWORD PTR 36[esp] + call __mul_1x1_mmx + pxor mm0,mm7 + mov eax,DWORD PTR 20[esp] + pxor mm0,mm6 + movq mm2,mm0 + psllq mm0,32 + pop edi + psrlq mm2,32 + pop esi + pxor mm0,mm6 + pop ebx + pxor mm2,mm7 + movq QWORD PTR [eax],mm0 + pop ebp + movq QWORD PTR 8[eax],mm2 + emms + ret +ALIGN 16 +$L000ialu: + push ebp + push ebx + push esi + push edi + sub esp,20 + mov eax,DWORD PTR 44[esp] + mov ebx,DWORD PTR 52[esp] + call __mul_1x1_ialu + mov DWORD PTR 8[esp],eax + mov DWORD PTR 12[esp],edx + mov eax,DWORD PTR 48[esp] + mov ebx,DWORD PTR 56[esp] + call __mul_1x1_ialu + mov DWORD PTR [esp],eax + mov DWORD PTR 4[esp],edx + mov eax,DWORD PTR 44[esp] + mov ebx,DWORD PTR 52[esp] + xor eax,DWORD PTR 48[esp] + xor ebx,DWORD PTR 56[esp] + call __mul_1x1_ialu + mov ebp,DWORD PTR 40[esp] + mov ebx,DWORD PTR [esp] + mov ecx,DWORD PTR 4[esp] + mov edi,DWORD PTR 8[esp] + mov esi,DWORD PTR 12[esp] + xor eax,edx + xor edx,ecx + xor eax,ebx + mov DWORD PTR [ebp],ebx + xor edx,edi + mov DWORD PTR 12[ebp],esi + xor eax,esi + add esp,20 + xor edx,esi + pop edi + xor eax,edx + pop esi + mov DWORD PTR 8[ebp],edx + pop ebx + mov DWORD PTR 4[ebp],eax + pop ebp + ret +_bn_GF2m_mul_2x2 ENDP +DB 71,70,40,50,94,109,41,32,77,117,108,116,105,112,108,105 +DB 99,97,116,105,111,110,32,102,111,114,32,120,56,54,44,32 +DB 67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97 +DB 112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103 +DB 62,0 +.text$ ENDS +.bss SEGMENT 'BSS' +COMM _OPENSSL_ia32cap_P:DWORD:4 +.bss ENDS +END diff --git a/deps/openssl/asm/x86-win32-masm/bn/x86-mont.asm b/deps/openssl/asm/x86-win32-masm/bn/x86-mont.asm index 031be4e7ea51df..9bfa4dc8eb1c88 100644 --- a/deps/openssl/asm/x86-win32-masm/bn/x86-mont.asm +++ b/deps/openssl/asm/x86-win32-masm/bn/x86-mont.asm @@ -3,6 +3,13 @@ IF @Version LT 800 ECHO MASM version 8.00 or later is strongly recommended. ENDIF .686 +.XMM +IF @Version LT 800 +XMMWORD STRUCT 16 +DQ 2 dup (?) +XMMWORD ENDS +ENDIF + .MODEL FLAT OPTION DOTNAME IF @Version LT 800 @@ -10,6 +17,7 @@ IF @Version LT 800 ELSE .text$ SEGMENT ALIGN(64) 'CODE' ENDIF +;EXTERN _OPENSSL_ia32cap_P:NEAR ALIGN 16 _bn_mul_mont PROC PUBLIC $L_bn_mul_mont_begin:: @@ -50,6 +58,123 @@ $L_bn_mul_mont_begin:: mov DWORD PTR 20[esp],esi lea ebx,DWORD PTR [edi-3] mov DWORD PTR 24[esp],ebp + lea eax,DWORD PTR _OPENSSL_ia32cap_P + bt DWORD PTR [eax],26 + jnc $L001non_sse2 + mov eax,-1 + movd mm7,eax + mov esi,DWORD PTR 8[esp] + mov edi,DWORD PTR 12[esp] + mov ebp,DWORD PTR 16[esp] + xor edx,edx + xor ecx,ecx + movd mm4,DWORD PTR [edi] + movd mm5,DWORD PTR [esi] + movd mm3,DWORD PTR [ebp] + pmuludq mm5,mm4 + movq mm2,mm5 + movq mm0,mm5 + pand mm0,mm7 + pmuludq mm5,QWORD PTR 20[esp] + pmuludq mm3,mm5 + paddq mm3,mm0 + movd mm1,DWORD PTR 4[ebp] + movd mm0,DWORD PTR 4[esi] + psrlq mm2,32 + psrlq mm3,32 + inc ecx +ALIGN 16 +$L0021st: + pmuludq mm0,mm4 + pmuludq mm1,mm5 + paddq mm2,mm0 + paddq mm3,mm1 + movq mm0,mm2 + pand mm0,mm7 + movd mm1,DWORD PTR 4[ecx*4+ebp] + paddq mm3,mm0 + movd mm0,DWORD PTR 4[ecx*4+esi] + psrlq mm2,32 + movd DWORD PTR 28[ecx*4+esp],mm3 + psrlq mm3,32 + lea ecx,DWORD PTR 1[ecx] + cmp ecx,ebx + jl $L0021st + pmuludq mm0,mm4 + pmuludq mm1,mm5 + paddq mm2,mm0 + paddq mm3,mm1 + movq mm0,mm2 + pand mm0,mm7 + paddq mm3,mm0 + movd DWORD PTR 28[ecx*4+esp],mm3 + psrlq mm2,32 + psrlq mm3,32 + paddq mm3,mm2 + movq QWORD PTR 32[ebx*4+esp],mm3 + inc edx +$L003outer: + xor ecx,ecx + movd mm4,DWORD PTR [edx*4+edi] + movd mm5,DWORD PTR [esi] + movd mm6,DWORD PTR 32[esp] + movd mm3,DWORD PTR [ebp] + pmuludq mm5,mm4 + paddq mm5,mm6 + movq mm0,mm5 + movq mm2,mm5 + pand mm0,mm7 + pmuludq mm5,QWORD PTR 20[esp] + pmuludq mm3,mm5 + paddq mm3,mm0 + movd mm6,DWORD PTR 36[esp] + movd mm1,DWORD PTR 4[ebp] + movd mm0,DWORD PTR 4[esi] + psrlq mm2,32 + psrlq mm3,32 + paddq mm2,mm6 + inc ecx + dec ebx +$L004inner: + pmuludq mm0,mm4 + pmuludq mm1,mm5 + paddq mm2,mm0 + paddq mm3,mm1 + movq mm0,mm2 + movd mm6,DWORD PTR 36[ecx*4+esp] + pand mm0,mm7 + movd mm1,DWORD PTR 4[ecx*4+ebp] + paddq mm3,mm0 + movd mm0,DWORD PTR 4[ecx*4+esi] + psrlq mm2,32 + movd DWORD PTR 28[ecx*4+esp],mm3 + psrlq mm3,32 + paddq mm2,mm6 + dec ebx + lea ecx,DWORD PTR 1[ecx] + jnz $L004inner + mov ebx,ecx + pmuludq mm0,mm4 + pmuludq mm1,mm5 + paddq mm2,mm0 + paddq mm3,mm1 + movq mm0,mm2 + pand mm0,mm7 + paddq mm3,mm0 + movd DWORD PTR 28[ecx*4+esp],mm3 + psrlq mm2,32 + psrlq mm3,32 + movd mm6,DWORD PTR 36[ebx*4+esp] + paddq mm3,mm2 + paddq mm3,mm6 + movq QWORD PTR 32[ebx*4+esp],mm3 + lea edx,DWORD PTR 1[edx] + cmp edx,ebx + jle $L003outer + emms + jmp $L005common_tail +ALIGN 16 +$L001non_sse2: mov esi,DWORD PTR 8[esp] lea ebp,DWORD PTR 1[ebx] mov edi,DWORD PTR 12[esp] @@ -60,12 +185,12 @@ $L_bn_mul_mont_begin:: lea eax,DWORD PTR 4[ebx*4+edi] or ebp,edx mov edi,DWORD PTR [edi] - jz $L001bn_sqr_mont + jz $L006bn_sqr_mont mov DWORD PTR 28[esp],eax mov eax,DWORD PTR [esi] xor edx,edx ALIGN 16 -$L002mull: +$L007mull: mov ebp,edx mul edi add ebp,eax @@ -74,7 +199,7 @@ $L002mull: mov eax,DWORD PTR [ecx*4+esi] cmp ecx,ebx mov DWORD PTR 28[ecx*4+esp],ebp - jl $L002mull + jl $L007mull mov ebp,edx mul edi mov edi,DWORD PTR 20[esp] @@ -92,9 +217,9 @@ $L002mull: mov eax,DWORD PTR 4[esi] adc edx,0 inc ecx - jmp $L0032ndmadd + jmp $L0082ndmadd ALIGN 16 -$L0041stmadd: +$L0091stmadd: mov ebp,edx mul edi add ebp,DWORD PTR 32[ecx*4+esp] @@ -105,7 +230,7 @@ $L0041stmadd: adc edx,0 cmp ecx,ebx mov DWORD PTR 28[ecx*4+esp],ebp - jl $L0041stmadd + jl $L0091stmadd mov ebp,edx mul edi add eax,DWORD PTR 32[ebx*4+esp] @@ -128,7 +253,7 @@ $L0041stmadd: adc edx,0 mov ecx,1 ALIGN 16 -$L0032ndmadd: +$L0082ndmadd: mov ebp,edx mul edi add ebp,DWORD PTR 32[ecx*4+esp] @@ -139,7 +264,7 @@ $L0032ndmadd: adc edx,0 cmp ecx,ebx mov DWORD PTR 24[ecx*4+esp],ebp - jl $L0032ndmadd + jl $L0082ndmadd mov ebp,edx mul edi add ebp,DWORD PTR 32[ebx*4+esp] @@ -162,9 +287,9 @@ $L0032ndmadd: xor ecx,ecx xor edx,edx mov eax,DWORD PTR [esi] - jmp $L0041stmadd + jmp $L0091stmadd ALIGN 16 -$L001bn_sqr_mont: +$L006bn_sqr_mont: mov DWORD PTR [esp],ebx mov DWORD PTR 12[esp],ecx mov eax,edi @@ -175,7 +300,7 @@ $L001bn_sqr_mont: and ebx,1 inc ecx ALIGN 16 -$L006sqr: +$L010sqr: mov eax,DWORD PTR [ecx*4+esi] mov ebp,edx mul edi @@ -187,7 +312,7 @@ $L006sqr: cmp ecx,DWORD PTR [esp] mov ebx,eax mov DWORD PTR 28[ecx*4+esp],ebp - jl $L006sqr + jl $L010sqr mov eax,DWORD PTR [ecx*4+esi] mov ebp,edx mul edi @@ -211,7 +336,7 @@ $L006sqr: mov eax,DWORD PTR 4[esi] mov ecx,1 ALIGN 16 -$L0073rdmadd: +$L0113rdmadd: mov ebp,edx mul edi add ebp,DWORD PTR 32[ecx*4+esp] @@ -230,7 +355,7 @@ $L0073rdmadd: adc edx,0 cmp ecx,ebx mov DWORD PTR 24[ecx*4+esp],ebp - jl $L0073rdmadd + jl $L0113rdmadd mov ebp,edx mul edi add ebp,DWORD PTR 32[ebx*4+esp] @@ -258,12 +383,12 @@ $L0073rdmadd: xor ebp,ebp cmp ecx,ebx lea ecx,DWORD PTR 1[ecx] - je $L008sqrlast + je $L012sqrlast mov ebx,edx shr edx,1 and ebx,1 ALIGN 16 -$L009sqradd: +$L013sqradd: mov eax,DWORD PTR [ecx*4+esi] mov ebp,edx mul edi @@ -279,13 +404,13 @@ $L009sqradd: cmp ecx,DWORD PTR [esp] mov DWORD PTR 28[ecx*4+esp],ebp mov ebx,eax - jle $L009sqradd + jle $L013sqradd mov ebp,edx add edx,edx shr ebp,31 add edx,ebx adc ebp,0 -$L008sqrlast: +$L012sqrlast: mov edi,DWORD PTR 20[esp] mov esi,DWORD PTR 16[esp] imul edi,DWORD PTR 32[esp] @@ -300,7 +425,7 @@ $L008sqrlast: adc edx,0 mov ecx,1 mov eax,DWORD PTR 4[esi] - jmp $L0073rdmadd + jmp $L0113rdmadd ALIGN 16 $L005common_tail: mov ebp,DWORD PTR 16[esp] @@ -310,13 +435,13 @@ $L005common_tail: mov ecx,ebx xor edx,edx ALIGN 16 -$L010sub: +$L014sub: sbb eax,DWORD PTR [edx*4+ebp] mov DWORD PTR [edx*4+edi],eax dec ecx mov eax,DWORD PTR 4[edx*4+esi] lea edx,DWORD PTR 1[edx] - jge $L010sub + jge $L014sub sbb eax,0 and esi,eax not eax @@ -324,12 +449,12 @@ $L010sub: and ebp,eax or esi,ebp ALIGN 16 -$L011copy: +$L015copy: mov eax,DWORD PTR [ebx*4+esi] mov DWORD PTR [ebx*4+edi],eax mov DWORD PTR 32[ebx*4+esp],ecx dec ebx - jge $L011copy + jge $L015copy mov esp,DWORD PTR 24[esp] mov eax,1 $L000just_leave: @@ -345,4 +470,7 @@ DB 54,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121 DB 32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46 DB 111,114,103,62,0 .text$ ENDS +.bss SEGMENT 'BSS' +COMM _OPENSSL_ia32cap_P:DWORD:4 +.bss ENDS END diff --git a/deps/openssl/asm/x86-win32-masm/camellia/cmll-x86.asm b/deps/openssl/asm/x86-win32-masm/camellia/cmll-x86.asm index e32d28135bbfe8..6aac94e4906752 100644 --- a/deps/openssl/asm/x86-win32-masm/camellia/cmll-x86.asm +++ b/deps/openssl/asm/x86-win32-masm/camellia/cmll-x86.asm @@ -1566,7 +1566,10 @@ $L014done: _private_Camellia_set_key ENDP ALIGN 64 $LCamellia_SIGMA:: -DD 2694735487,1003262091,3061508184,1286239154,3337565999,3914302142,1426019237,4057165596,283453434,3731369245,2958461122,3018244605,0,0,0,0 +DD 2694735487,1003262091,3061508184,1286239154 +DD 3337565999,3914302142,1426019237,4057165596 +DD 283453434,3731369245,2958461122,3018244605 +DD 0,0,0,0 ALIGN 64 $LCamellia_SBOX:: DD 1886416896,1886388336 diff --git a/deps/openssl/asm/x86-win32-masm/cast/cast-586.asm b/deps/openssl/asm/x86-win32-masm/cast/cast-586.asm index 6f85c34d28854f..0801d204e20353 100644 --- a/deps/openssl/asm/x86-win32-masm/cast/cast-586.asm +++ b/deps/openssl/asm/x86-win32-masm/cast/cast-586.asm @@ -18,7 +18,6 @@ ALIGN 16 _CAST_encrypt PROC PUBLIC $L_CAST_encrypt_begin:: ; - push ebp push ebx mov ebx,DWORD PTR 12[esp] @@ -391,7 +390,6 @@ ALIGN 16 _CAST_decrypt PROC PUBLIC $L_CAST_decrypt_begin:: ; - push ebp push ebx mov ebx,DWORD PTR 12[esp] @@ -757,7 +755,6 @@ ALIGN 16 _CAST_cbc_encrypt PROC PUBLIC $L_CAST_cbc_encrypt_begin:: ; - push ebp push ebx push esi diff --git a/deps/openssl/asm/x86-win32-masm/des/crypt586.asm b/deps/openssl/asm/x86-win32-masm/des/crypt586.asm index 4c82c7a265b6f1..6ca04c3afbdb1b 100644 --- a/deps/openssl/asm/x86-win32-masm/des/crypt586.asm +++ b/deps/openssl/asm/x86-win32-masm/des/crypt586.asm @@ -19,7 +19,6 @@ $L_fcrypt_body_begin:: push esi push edi ; - ; Load the 2 words xor edi,edi xor esi,esi @@ -29,7 +28,6 @@ $L_fcrypt_body_begin:: push 25 $L000start: ; - ; Round 0 mov eax,DWORD PTR 36[esp] mov edx,esi @@ -80,7 +78,6 @@ $L000start: xor edi,ebx mov ebp,DWORD PTR 32[esp] ; - ; Round 1 mov eax,DWORD PTR 36[esp] mov edx,edi @@ -131,7 +128,6 @@ $L000start: xor esi,ebx mov ebp,DWORD PTR 32[esp] ; - ; Round 2 mov eax,DWORD PTR 36[esp] mov edx,esi @@ -182,7 +178,6 @@ $L000start: xor edi,ebx mov ebp,DWORD PTR 32[esp] ; - ; Round 3 mov eax,DWORD PTR 36[esp] mov edx,edi @@ -233,7 +228,6 @@ $L000start: xor esi,ebx mov ebp,DWORD PTR 32[esp] ; - ; Round 4 mov eax,DWORD PTR 36[esp] mov edx,esi @@ -284,7 +278,6 @@ $L000start: xor edi,ebx mov ebp,DWORD PTR 32[esp] ; - ; Round 5 mov eax,DWORD PTR 36[esp] mov edx,edi @@ -335,7 +328,6 @@ $L000start: xor esi,ebx mov ebp,DWORD PTR 32[esp] ; - ; Round 6 mov eax,DWORD PTR 36[esp] mov edx,esi @@ -386,7 +378,6 @@ $L000start: xor edi,ebx mov ebp,DWORD PTR 32[esp] ; - ; Round 7 mov eax,DWORD PTR 36[esp] mov edx,edi @@ -437,7 +428,6 @@ $L000start: xor esi,ebx mov ebp,DWORD PTR 32[esp] ; - ; Round 8 mov eax,DWORD PTR 36[esp] mov edx,esi @@ -488,7 +478,6 @@ $L000start: xor edi,ebx mov ebp,DWORD PTR 32[esp] ; - ; Round 9 mov eax,DWORD PTR 36[esp] mov edx,edi @@ -539,7 +528,6 @@ $L000start: xor esi,ebx mov ebp,DWORD PTR 32[esp] ; - ; Round 10 mov eax,DWORD PTR 36[esp] mov edx,esi @@ -590,7 +578,6 @@ $L000start: xor edi,ebx mov ebp,DWORD PTR 32[esp] ; - ; Round 11 mov eax,DWORD PTR 36[esp] mov edx,edi @@ -641,7 +628,6 @@ $L000start: xor esi,ebx mov ebp,DWORD PTR 32[esp] ; - ; Round 12 mov eax,DWORD PTR 36[esp] mov edx,esi @@ -692,7 +678,6 @@ $L000start: xor edi,ebx mov ebp,DWORD PTR 32[esp] ; - ; Round 13 mov eax,DWORD PTR 36[esp] mov edx,edi @@ -743,7 +728,6 @@ $L000start: xor esi,ebx mov ebp,DWORD PTR 32[esp] ; - ; Round 14 mov eax,DWORD PTR 36[esp] mov edx,esi @@ -794,7 +778,6 @@ $L000start: xor edi,ebx mov ebp,DWORD PTR 32[esp] ; - ; Round 15 mov eax,DWORD PTR 36[esp] mov edx,edi @@ -852,7 +835,6 @@ $L000start: mov DWORD PTR [esp],ebx jnz $L000start ; - ; FP mov edx,DWORD PTR 28[esp] ror edi,1 @@ -862,7 +844,6 @@ $L000start: xor eax,esi xor edi,esi ; - rol eax,23 mov esi,eax xor eax,edi @@ -870,7 +851,6 @@ $L000start: xor esi,eax xor edi,eax ; - rol esi,10 mov eax,esi xor esi,edi @@ -878,7 +858,6 @@ $L000start: xor eax,esi xor edi,esi ; - rol edi,18 mov esi,edi xor edi,eax @@ -886,7 +865,6 @@ $L000start: xor esi,edi xor eax,edi ; - rol esi,12 mov edi,esi xor esi,eax @@ -894,7 +872,6 @@ $L000start: xor edi,esi xor eax,esi ; - ror eax,4 mov DWORD PTR [edx],eax mov DWORD PTR 4[edx],edi diff --git a/deps/openssl/asm/x86-win32-masm/des/des-586.asm b/deps/openssl/asm/x86-win32-masm/des/des-586.asm index 24f19a6603c69d..ecae90ec7cbe5b 100644 --- a/deps/openssl/asm/x86-win32-masm/des/des-586.asm +++ b/deps/openssl/asm/x86-win32-masm/des/des-586.asm @@ -957,7 +957,6 @@ $L_DES_encrypt1_begin:: push esi push edi ; - ; Load the 2 words mov esi,DWORD PTR 12[esp] xor ecx,ecx @@ -967,7 +966,6 @@ $L_DES_encrypt1_begin:: mov ebx,DWORD PTR 28[esp] mov edi,DWORD PTR 4[esi] ; - ; IP rol eax,4 mov esi,eax @@ -976,7 +974,6 @@ $L_DES_encrypt1_begin:: xor esi,eax xor edi,eax ; - rol edi,20 mov eax,edi xor edi,esi @@ -984,7 +981,6 @@ $L_DES_encrypt1_begin:: xor eax,edi xor esi,edi ; - rol eax,14 mov edi,eax xor eax,esi @@ -992,7 +988,6 @@ $L_DES_encrypt1_begin:: xor edi,eax xor esi,eax ; - rol esi,22 mov eax,esi xor esi,edi @@ -1000,7 +995,6 @@ $L_DES_encrypt1_begin:: xor eax,esi xor edi,esi ; - rol eax,9 mov esi,eax xor eax,edi @@ -1008,12 +1002,11 @@ $L_DES_encrypt1_begin:: xor esi,eax xor edi,eax ; - rol edi,1 call $L000pic_point $L000pic_point: pop ebp - lea ebp,DWORD PTR (_DES_SPtrans-$L000pic_point)[ebp] + lea ebp,DWORD PTR ($Ldes_sptrans-$L000pic_point)[ebp] mov ecx,DWORD PTR 24[esp] cmp ebx,0 je $L001decrypt @@ -1023,7 +1016,6 @@ $L001decrypt: call __x86_DES_decrypt $L002done: ; - ; FP mov edx,DWORD PTR 20[esp] ror esi,1 @@ -1033,7 +1025,6 @@ $L002done: xor eax,edi xor esi,edi ; - rol eax,23 mov edi,eax xor eax,esi @@ -1041,7 +1032,6 @@ $L002done: xor edi,eax xor esi,eax ; - rol edi,10 mov eax,edi xor edi,esi @@ -1049,7 +1039,6 @@ $L002done: xor eax,edi xor esi,edi ; - rol esi,18 mov edi,esi xor esi,eax @@ -1057,7 +1046,6 @@ $L002done: xor edi,esi xor eax,esi ; - rol edi,12 mov esi,edi xor edi,eax @@ -1065,7 +1053,6 @@ $L002done: xor esi,edi xor eax,edi ; - ror eax,4 mov DWORD PTR [edx],eax mov DWORD PTR 4[edx],esi @@ -1081,7 +1068,6 @@ $L_DES_encrypt2_begin:: push esi push edi ; - ; Load the 2 words mov eax,DWORD PTR 12[esp] xor ecx,ecx @@ -1095,7 +1081,7 @@ $L_DES_encrypt2_begin:: call $L003pic_point $L003pic_point: pop ebp - lea ebp,DWORD PTR (_DES_SPtrans-$L003pic_point)[ebp] + lea ebp,DWORD PTR ($Ldes_sptrans-$L003pic_point)[ebp] mov ecx,DWORD PTR 24[esp] cmp ebx,0 je $L004decrypt @@ -1105,7 +1091,6 @@ $L004decrypt: call __x86_DES_decrypt $L005done: ; - ; Fixup ror edi,3 mov eax,DWORD PTR 20[esp] @@ -1127,13 +1112,11 @@ $L_DES_encrypt3_begin:: push esi push edi ; - ; Load the data words mov edi,DWORD PTR [ebx] mov esi,DWORD PTR 4[ebx] sub esp,12 ; - ; IP rol edi,4 mov edx,edi @@ -1142,7 +1125,6 @@ $L_DES_encrypt3_begin:: xor edx,edi xor esi,edi ; - rol esi,20 mov edi,esi xor esi,edx @@ -1150,7 +1132,6 @@ $L_DES_encrypt3_begin:: xor edi,esi xor edx,esi ; - rol edi,14 mov esi,edi xor edi,edx @@ -1158,7 +1139,6 @@ $L_DES_encrypt3_begin:: xor esi,edi xor edx,edi ; - rol edx,22 mov edi,edx xor edx,esi @@ -1166,7 +1146,6 @@ $L_DES_encrypt3_begin:: xor edi,edx xor esi,edx ; - rol edi,9 mov edx,edi xor edi,esi @@ -1174,7 +1153,6 @@ $L_DES_encrypt3_begin:: xor edx,edi xor esi,edi ; - ror edx,3 ror esi,2 mov DWORD PTR 4[ebx],esi @@ -1198,7 +1176,6 @@ $L_DES_encrypt3_begin:: mov edi,DWORD PTR [ebx] mov esi,DWORD PTR 4[ebx] ; - ; FP rol esi,2 rol edi,3 @@ -1208,7 +1185,6 @@ $L_DES_encrypt3_begin:: xor eax,edi xor esi,edi ; - rol eax,23 mov edi,eax xor eax,esi @@ -1216,7 +1192,6 @@ $L_DES_encrypt3_begin:: xor edi,eax xor esi,eax ; - rol edi,10 mov eax,edi xor edi,esi @@ -1224,7 +1199,6 @@ $L_DES_encrypt3_begin:: xor eax,edi xor esi,edi ; - rol esi,18 mov edi,esi xor esi,eax @@ -1232,7 +1206,6 @@ $L_DES_encrypt3_begin:: xor edi,esi xor eax,esi ; - rol edi,12 mov esi,edi xor edi,eax @@ -1240,7 +1213,6 @@ $L_DES_encrypt3_begin:: xor esi,edi xor eax,edi ; - ror eax,4 mov DWORD PTR [ebx],eax mov DWORD PTR 4[ebx],esi @@ -1259,13 +1231,11 @@ $L_DES_decrypt3_begin:: push esi push edi ; - ; Load the data words mov edi,DWORD PTR [ebx] mov esi,DWORD PTR 4[ebx] sub esp,12 ; - ; IP rol edi,4 mov edx,edi @@ -1274,7 +1244,6 @@ $L_DES_decrypt3_begin:: xor edx,edi xor esi,edi ; - rol esi,20 mov edi,esi xor esi,edx @@ -1282,7 +1251,6 @@ $L_DES_decrypt3_begin:: xor edi,esi xor edx,esi ; - rol edi,14 mov esi,edi xor edi,edx @@ -1290,7 +1258,6 @@ $L_DES_decrypt3_begin:: xor esi,edi xor edx,edi ; - rol edx,22 mov edi,edx xor edx,esi @@ -1298,7 +1265,6 @@ $L_DES_decrypt3_begin:: xor edi,edx xor esi,edx ; - rol edi,9 mov edx,edi xor edi,esi @@ -1306,7 +1272,6 @@ $L_DES_decrypt3_begin:: xor edx,edi xor esi,edi ; - ror edx,3 ror esi,2 mov DWORD PTR 4[ebx],esi @@ -1330,7 +1295,6 @@ $L_DES_decrypt3_begin:: mov edi,DWORD PTR [ebx] mov esi,DWORD PTR 4[ebx] ; - ; FP rol esi,2 rol edi,3 @@ -1340,7 +1304,6 @@ $L_DES_decrypt3_begin:: xor eax,edi xor esi,edi ; - rol eax,23 mov edi,eax xor eax,esi @@ -1348,7 +1311,6 @@ $L_DES_decrypt3_begin:: xor edi,eax xor esi,eax ; - rol edi,10 mov eax,edi xor edi,esi @@ -1356,7 +1318,6 @@ $L_DES_decrypt3_begin:: xor eax,edi xor esi,edi ; - rol esi,18 mov edi,esi xor esi,eax @@ -1364,7 +1325,6 @@ $L_DES_decrypt3_begin:: xor edi,esi xor eax,esi ; - rol edi,12 mov esi,edi xor edi,eax @@ -1372,7 +1332,6 @@ $L_DES_decrypt3_begin:: xor esi,edi xor eax,edi ; - ror eax,4 mov DWORD PTR [ebx],eax mov DWORD PTR 4[ebx],esi @@ -1386,7 +1345,6 @@ ALIGN 16 _DES_ncbc_encrypt PROC PUBLIC $L_DES_ncbc_encrypt_begin:: ; - push ebp push ebx push esi @@ -1565,7 +1523,6 @@ ALIGN 16 _DES_ede3_cbc_encrypt PROC PUBLIC $L_DES_ede3_cbc_encrypt_begin:: ; - push ebp push ebx push esi @@ -1746,6 +1703,7 @@ ALIGN 64 _DES_ede3_cbc_encrypt ENDP ALIGN 64 _DES_SPtrans:: +$Ldes_sptrans:: DD 34080768,524288,33554434,34080770 DD 33554432,526338,524290,33554434 DD 526338,34080768,34078720,2050 diff --git a/deps/openssl/asm/x86-win32-masm/md5/md5-586.asm b/deps/openssl/asm/x86-win32-masm/md5/md5-586.asm index 8e263de0fddb60..ae47efcb7a9af4 100644 --- a/deps/openssl/asm/x86-win32-masm/md5/md5-586.asm +++ b/deps/openssl/asm/x86-win32-masm/md5/md5-586.asm @@ -30,7 +30,6 @@ $L_md5_block_asm_data_order_begin:: mov edx,DWORD PTR 12[edi] $L000start: ; - ; R0 section mov edi,ecx mov ebp,DWORD PTR [esi] @@ -195,7 +194,6 @@ $L000start: mov ebp,DWORD PTR 4[esi] add ebx,ecx ; - ; R1 section ; R1 16 lea eax,DWORD PTR 4129170786[ebp*1+eax] @@ -358,7 +356,6 @@ $L000start: rol ebx,20 add ebx,ecx ; - ; R2 section ; R2 32 xor edi,edx @@ -505,7 +502,6 @@ $L000start: rol ebx,23 add ebx,ecx ; - ; R3 section ; R3 48 xor edi,edx diff --git a/deps/openssl/asm/x86-win32-masm/modes/ghash-x86.asm b/deps/openssl/asm/x86-win32-masm/modes/ghash-x86.asm index d5041d22092c6a..d18bb128c591d7 100644 --- a/deps/openssl/asm/x86-win32-masm/modes/ghash-x86.asm +++ b/deps/openssl/asm/x86-win32-masm/modes/ghash-x86.asm @@ -217,414 +217,91 @@ $L004x86_break: ret _gcm_ghash_4bit_x86 ENDP ALIGN 16 -__mmx_gmult_4bit_inner PROC PRIVATE +_gcm_gmult_4bit_mmx PROC PUBLIC +$L_gcm_gmult_4bit_mmx_begin:: + push ebp + push ebx + push esi + push edi + mov edi,DWORD PTR 20[esp] + mov esi,DWORD PTR 24[esp] + call $L005pic_point +$L005pic_point: + pop eax + lea eax,DWORD PTR ($Lrem_4bit-$L005pic_point)[eax] + movzx ebx,BYTE PTR 15[edi] xor ecx,ecx mov edx,ebx mov cl,dl + mov ebp,14 shl cl,4 and edx,240 movq mm0,QWORD PTR 8[ecx*1+esi] movq mm1,QWORD PTR [ecx*1+esi] - movd ebp,mm0 - psrlq mm0,4 - movq mm2,mm1 - psrlq mm1,4 - pxor mm0,QWORD PTR 8[edx*1+esi] - mov cl,BYTE PTR 14[edi] - psllq mm2,60 - and ebp,15 - pxor mm1,QWORD PTR [edx*1+esi] - mov edx,ecx movd ebx,mm0 - pxor mm0,mm2 - shl cl,4 + jmp $L006mmx_loop +ALIGN 16 +$L006mmx_loop: psrlq mm0,4 - movq mm2,mm1 - psrlq mm1,4 - pxor mm0,QWORD PTR 8[ecx*1+esi] - psllq mm2,60 - and edx,240 - pxor mm1,QWORD PTR [ebp*8+eax] and ebx,15 - pxor mm1,QWORD PTR [ecx*1+esi] - movd ebp,mm0 - pxor mm0,mm2 - psrlq mm0,4 movq mm2,mm1 psrlq mm1,4 pxor mm0,QWORD PTR 8[edx*1+esi] - mov cl,BYTE PTR 13[edi] + mov cl,BYTE PTR [ebp*1+edi] psllq mm2,60 pxor mm1,QWORD PTR [ebx*8+eax] - and ebp,15 - pxor mm1,QWORD PTR [edx*1+esi] - mov edx,ecx + dec ebp movd ebx,mm0 - pxor mm0,mm2 - shl cl,4 - psrlq mm0,4 - movq mm2,mm1 - psrlq mm1,4 - pxor mm0,QWORD PTR 8[ecx*1+esi] - psllq mm2,60 - and edx,240 - pxor mm1,QWORD PTR [ebp*8+eax] - and ebx,15 - pxor mm1,QWORD PTR [ecx*1+esi] - movd ebp,mm0 - pxor mm0,mm2 - psrlq mm0,4 - movq mm2,mm1 - psrlq mm1,4 - pxor mm0,QWORD PTR 8[edx*1+esi] - mov cl,BYTE PTR 12[edi] - psllq mm2,60 - pxor mm1,QWORD PTR [ebx*8+eax] - and ebp,15 pxor mm1,QWORD PTR [edx*1+esi] mov edx,ecx - movd ebx,mm0 pxor mm0,mm2 + js $L007mmx_break shl cl,4 - psrlq mm0,4 - movq mm2,mm1 - psrlq mm1,4 - pxor mm0,QWORD PTR 8[ecx*1+esi] - psllq mm2,60 - and edx,240 - pxor mm1,QWORD PTR [ebp*8+eax] and ebx,15 - pxor mm1,QWORD PTR [ecx*1+esi] - movd ebp,mm0 - pxor mm0,mm2 - psrlq mm0,4 - movq mm2,mm1 - psrlq mm1,4 - pxor mm0,QWORD PTR 8[edx*1+esi] - mov cl,BYTE PTR 11[edi] - psllq mm2,60 - pxor mm1,QWORD PTR [ebx*8+eax] - and ebp,15 - pxor mm1,QWORD PTR [edx*1+esi] - mov edx,ecx - movd ebx,mm0 - pxor mm0,mm2 - shl cl,4 psrlq mm0,4 - movq mm2,mm1 - psrlq mm1,4 - pxor mm0,QWORD PTR 8[ecx*1+esi] - psllq mm2,60 and edx,240 - pxor mm1,QWORD PTR [ebp*8+eax] - and ebx,15 - pxor mm1,QWORD PTR [ecx*1+esi] - movd ebp,mm0 - pxor mm0,mm2 - psrlq mm0,4 - movq mm2,mm1 - psrlq mm1,4 - pxor mm0,QWORD PTR 8[edx*1+esi] - mov cl,BYTE PTR 10[edi] - psllq mm2,60 - pxor mm1,QWORD PTR [ebx*8+eax] - and ebp,15 - pxor mm1,QWORD PTR [edx*1+esi] - mov edx,ecx - movd ebx,mm0 - pxor mm0,mm2 - shl cl,4 - psrlq mm0,4 movq mm2,mm1 psrlq mm1,4 pxor mm0,QWORD PTR 8[ecx*1+esi] psllq mm2,60 - and edx,240 - pxor mm1,QWORD PTR [ebp*8+eax] - and ebx,15 - pxor mm1,QWORD PTR [ecx*1+esi] - movd ebp,mm0 - pxor mm0,mm2 - psrlq mm0,4 - movq mm2,mm1 - psrlq mm1,4 - pxor mm0,QWORD PTR 8[edx*1+esi] - mov cl,BYTE PTR 9[edi] - psllq mm2,60 pxor mm1,QWORD PTR [ebx*8+eax] - and ebp,15 - pxor mm1,QWORD PTR [edx*1+esi] - mov edx,ecx movd ebx,mm0 - pxor mm0,mm2 - shl cl,4 - psrlq mm0,4 - movq mm2,mm1 - psrlq mm1,4 - pxor mm0,QWORD PTR 8[ecx*1+esi] - psllq mm2,60 - and edx,240 - pxor mm1,QWORD PTR [ebp*8+eax] - and ebx,15 pxor mm1,QWORD PTR [ecx*1+esi] - movd ebp,mm0 - pxor mm0,mm2 - psrlq mm0,4 - movq mm2,mm1 - psrlq mm1,4 - pxor mm0,QWORD PTR 8[edx*1+esi] - mov cl,BYTE PTR 8[edi] - psllq mm2,60 - pxor mm1,QWORD PTR [ebx*8+eax] - and ebp,15 - pxor mm1,QWORD PTR [edx*1+esi] - mov edx,ecx - movd ebx,mm0 pxor mm0,mm2 + jmp $L006mmx_loop +ALIGN 16 +$L007mmx_break: shl cl,4 - psrlq mm0,4 - movq mm2,mm1 - psrlq mm1,4 - pxor mm0,QWORD PTR 8[ecx*1+esi] - psllq mm2,60 - and edx,240 - pxor mm1,QWORD PTR [ebp*8+eax] and ebx,15 - pxor mm1,QWORD PTR [ecx*1+esi] - movd ebp,mm0 - pxor mm0,mm2 psrlq mm0,4 - movq mm2,mm1 - psrlq mm1,4 - pxor mm0,QWORD PTR 8[edx*1+esi] - mov cl,BYTE PTR 7[edi] - psllq mm2,60 - pxor mm1,QWORD PTR [ebx*8+eax] - and ebp,15 - pxor mm1,QWORD PTR [edx*1+esi] - mov edx,ecx - movd ebx,mm0 - pxor mm0,mm2 - shl cl,4 - psrlq mm0,4 - movq mm2,mm1 - psrlq mm1,4 - pxor mm0,QWORD PTR 8[ecx*1+esi] - psllq mm2,60 and edx,240 - pxor mm1,QWORD PTR [ebp*8+eax] - and ebx,15 - pxor mm1,QWORD PTR [ecx*1+esi] - movd ebp,mm0 - pxor mm0,mm2 - psrlq mm0,4 - movq mm2,mm1 - psrlq mm1,4 - pxor mm0,QWORD PTR 8[edx*1+esi] - mov cl,BYTE PTR 6[edi] - psllq mm2,60 - pxor mm1,QWORD PTR [ebx*8+eax] - and ebp,15 - pxor mm1,QWORD PTR [edx*1+esi] - mov edx,ecx - movd ebx,mm0 - pxor mm0,mm2 - shl cl,4 - psrlq mm0,4 movq mm2,mm1 psrlq mm1,4 pxor mm0,QWORD PTR 8[ecx*1+esi] psllq mm2,60 - and edx,240 - pxor mm1,QWORD PTR [ebp*8+eax] - and ebx,15 - pxor mm1,QWORD PTR [ecx*1+esi] - movd ebp,mm0 - pxor mm0,mm2 - psrlq mm0,4 - movq mm2,mm1 - psrlq mm1,4 - pxor mm0,QWORD PTR 8[edx*1+esi] - mov cl,BYTE PTR 5[edi] - psllq mm2,60 pxor mm1,QWORD PTR [ebx*8+eax] - and ebp,15 - pxor mm1,QWORD PTR [edx*1+esi] - mov edx,ecx movd ebx,mm0 - pxor mm0,mm2 - shl cl,4 - psrlq mm0,4 - movq mm2,mm1 - psrlq mm1,4 - pxor mm0,QWORD PTR 8[ecx*1+esi] - psllq mm2,60 - and edx,240 - pxor mm1,QWORD PTR [ebp*8+eax] - and ebx,15 pxor mm1,QWORD PTR [ecx*1+esi] - movd ebp,mm0 pxor mm0,mm2 psrlq mm0,4 - movq mm2,mm1 - psrlq mm1,4 - pxor mm0,QWORD PTR 8[edx*1+esi] - mov cl,BYTE PTR 4[edi] - psllq mm2,60 - pxor mm1,QWORD PTR [ebx*8+eax] - and ebp,15 - pxor mm1,QWORD PTR [edx*1+esi] - mov edx,ecx - movd ebx,mm0 - pxor mm0,mm2 - shl cl,4 - psrlq mm0,4 - movq mm2,mm1 - psrlq mm1,4 - pxor mm0,QWORD PTR 8[ecx*1+esi] - psllq mm2,60 - and edx,240 - pxor mm1,QWORD PTR [ebp*8+eax] - and ebx,15 - pxor mm1,QWORD PTR [ecx*1+esi] - movd ebp,mm0 - pxor mm0,mm2 - psrlq mm0,4 - movq mm2,mm1 - psrlq mm1,4 - pxor mm0,QWORD PTR 8[edx*1+esi] - mov cl,BYTE PTR 3[edi] - psllq mm2,60 - pxor mm1,QWORD PTR [ebx*8+eax] - and ebp,15 - pxor mm1,QWORD PTR [edx*1+esi] - mov edx,ecx - movd ebx,mm0 - pxor mm0,mm2 - shl cl,4 - psrlq mm0,4 - movq mm2,mm1 - psrlq mm1,4 - pxor mm0,QWORD PTR 8[ecx*1+esi] - psllq mm2,60 - and edx,240 - pxor mm1,QWORD PTR [ebp*8+eax] and ebx,15 - pxor mm1,QWORD PTR [ecx*1+esi] - movd ebp,mm0 - pxor mm0,mm2 - psrlq mm0,4 movq mm2,mm1 psrlq mm1,4 pxor mm0,QWORD PTR 8[edx*1+esi] - mov cl,BYTE PTR 2[edi] psllq mm2,60 pxor mm1,QWORD PTR [ebx*8+eax] - and ebp,15 - pxor mm1,QWORD PTR [edx*1+esi] - mov edx,ecx movd ebx,mm0 - pxor mm0,mm2 - shl cl,4 - psrlq mm0,4 - movq mm2,mm1 - psrlq mm1,4 - pxor mm0,QWORD PTR 8[ecx*1+esi] - psllq mm2,60 - and edx,240 - pxor mm1,QWORD PTR [ebp*8+eax] - and ebx,15 - pxor mm1,QWORD PTR [ecx*1+esi] - movd ebp,mm0 - pxor mm0,mm2 - psrlq mm0,4 - movq mm2,mm1 - psrlq mm1,4 - pxor mm0,QWORD PTR 8[edx*1+esi] - mov cl,BYTE PTR 1[edi] - psllq mm2,60 - pxor mm1,QWORD PTR [ebx*8+eax] - and ebp,15 - pxor mm1,QWORD PTR [edx*1+esi] - mov edx,ecx - movd ebx,mm0 - pxor mm0,mm2 - shl cl,4 - psrlq mm0,4 - movq mm2,mm1 - psrlq mm1,4 - pxor mm0,QWORD PTR 8[ecx*1+esi] - psllq mm2,60 - and edx,240 - pxor mm1,QWORD PTR [ebp*8+eax] - and ebx,15 - pxor mm1,QWORD PTR [ecx*1+esi] - movd ebp,mm0 - pxor mm0,mm2 - psrlq mm0,4 - movq mm2,mm1 - psrlq mm1,4 - pxor mm0,QWORD PTR 8[edx*1+esi] - mov cl,BYTE PTR [edi] - psllq mm2,60 - pxor mm1,QWORD PTR [ebx*8+eax] - and ebp,15 - pxor mm1,QWORD PTR [edx*1+esi] - mov edx,ecx - movd ebx,mm0 - pxor mm0,mm2 - shl cl,4 - psrlq mm0,4 - movq mm2,mm1 - psrlq mm1,4 - pxor mm0,QWORD PTR 8[ecx*1+esi] - psllq mm2,60 - and edx,240 - pxor mm1,QWORD PTR [ebp*8+eax] - and ebx,15 - pxor mm1,QWORD PTR [ecx*1+esi] - movd ebp,mm0 - pxor mm0,mm2 - psrlq mm0,4 - movq mm2,mm1 - psrlq mm1,4 - pxor mm0,QWORD PTR 8[edx*1+esi] - psllq mm2,60 - pxor mm1,QWORD PTR [ebx*8+eax] - and ebp,15 pxor mm1,QWORD PTR [edx*1+esi] - movd ebx,mm0 pxor mm0,mm2 - mov edi,DWORD PTR 4[ebp*8+eax] psrlq mm0,32 movd edx,mm1 psrlq mm1,32 movd ecx,mm0 movd ebp,mm1 - shl edi,4 bswap ebx bswap edx bswap ecx - xor ebp,edi bswap ebp - ret -__mmx_gmult_4bit_inner ENDP -ALIGN 16 -_gcm_gmult_4bit_mmx PROC PUBLIC -$L_gcm_gmult_4bit_mmx_begin:: - push ebp - push ebx - push esi - push edi - mov edi,DWORD PTR 20[esp] - mov esi,DWORD PTR 24[esp] - call $L005pic_point -$L005pic_point: - pop eax - lea eax,DWORD PTR ($Lrem_4bit-$L005pic_point)[eax] - movzx ebx,BYTE PTR 15[edi] - call __mmx_gmult_4bit_inner - mov edi,DWORD PTR 20[esp] emms mov DWORD PTR 12[edi],ebx mov DWORD PTR 4[edi],edx @@ -643,61 +320,910 @@ $L_gcm_ghash_4bit_mmx_begin:: push ebx push esi push edi - mov ebp,DWORD PTR 20[esp] - mov esi,DWORD PTR 24[esp] - mov edi,DWORD PTR 28[esp] - mov ecx,DWORD PTR 32[esp] - call $L006pic_point -$L006pic_point: - pop eax - lea eax,DWORD PTR ($Lrem_4bit-$L006pic_point)[eax] - add ecx,edi - mov DWORD PTR 32[esp],ecx - sub esp,20 - mov ebx,DWORD PTR 12[ebp] - mov edx,DWORD PTR 4[ebp] - mov ecx,DWORD PTR 8[ebp] - mov ebp,DWORD PTR [ebp] - jmp $L007mmx_outer_loop + mov eax,DWORD PTR 20[esp] + mov ebx,DWORD PTR 24[esp] + mov ecx,DWORD PTR 28[esp] + mov edx,DWORD PTR 32[esp] + mov ebp,esp + call $L008pic_point +$L008pic_point: + pop esi + lea esi,DWORD PTR ($Lrem_8bit-$L008pic_point)[esi] + sub esp,544 + and esp,-64 + sub esp,16 + add edx,ecx + mov DWORD PTR 544[esp],eax + mov DWORD PTR 552[esp],edx + mov DWORD PTR 556[esp],ebp + add ebx,128 + lea edi,DWORD PTR 144[esp] + lea ebp,DWORD PTR 400[esp] + mov edx,DWORD PTR [ebx-120] + movq mm0,QWORD PTR [ebx-120] + movq mm3,QWORD PTR [ebx-128] + shl edx,4 + mov BYTE PTR [esp],dl + mov edx,DWORD PTR [ebx-104] + movq mm2,QWORD PTR [ebx-104] + movq mm5,QWORD PTR [ebx-112] + movq QWORD PTR [edi-128],mm0 + psrlq mm0,4 + movq QWORD PTR [edi],mm3 + movq mm7,mm3 + psrlq mm3,4 + shl edx,4 + mov BYTE PTR 1[esp],dl + mov edx,DWORD PTR [ebx-88] + movq mm1,QWORD PTR [ebx-88] + psllq mm7,60 + movq mm4,QWORD PTR [ebx-96] + por mm0,mm7 + movq QWORD PTR [edi-120],mm2 + psrlq mm2,4 + movq QWORD PTR 8[edi],mm5 + movq mm6,mm5 + movq QWORD PTR [ebp-128],mm0 + psrlq mm5,4 + movq QWORD PTR [ebp],mm3 + shl edx,4 + mov BYTE PTR 2[esp],dl + mov edx,DWORD PTR [ebx-72] + movq mm0,QWORD PTR [ebx-72] + psllq mm6,60 + movq mm3,QWORD PTR [ebx-80] + por mm2,mm6 + movq QWORD PTR [edi-112],mm1 + psrlq mm1,4 + movq QWORD PTR 16[edi],mm4 + movq mm7,mm4 + movq QWORD PTR [ebp-120],mm2 + psrlq mm4,4 + movq QWORD PTR 8[ebp],mm5 + shl edx,4 + mov BYTE PTR 3[esp],dl + mov edx,DWORD PTR [ebx-56] + movq mm2,QWORD PTR [ebx-56] + psllq mm7,60 + movq mm5,QWORD PTR [ebx-64] + por mm1,mm7 + movq QWORD PTR [edi-104],mm0 + psrlq mm0,4 + movq QWORD PTR 24[edi],mm3 + movq mm6,mm3 + movq QWORD PTR [ebp-112],mm1 + psrlq mm3,4 + movq QWORD PTR 16[ebp],mm4 + shl edx,4 + mov BYTE PTR 4[esp],dl + mov edx,DWORD PTR [ebx-40] + movq mm1,QWORD PTR [ebx-40] + psllq mm6,60 + movq mm4,QWORD PTR [ebx-48] + por mm0,mm6 + movq QWORD PTR [edi-96],mm2 + psrlq mm2,4 + movq QWORD PTR 32[edi],mm5 + movq mm7,mm5 + movq QWORD PTR [ebp-104],mm0 + psrlq mm5,4 + movq QWORD PTR 24[ebp],mm3 + shl edx,4 + mov BYTE PTR 5[esp],dl + mov edx,DWORD PTR [ebx-24] + movq mm0,QWORD PTR [ebx-24] + psllq mm7,60 + movq mm3,QWORD PTR [ebx-32] + por mm2,mm7 + movq QWORD PTR [edi-88],mm1 + psrlq mm1,4 + movq QWORD PTR 40[edi],mm4 + movq mm6,mm4 + movq QWORD PTR [ebp-96],mm2 + psrlq mm4,4 + movq QWORD PTR 32[ebp],mm5 + shl edx,4 + mov BYTE PTR 6[esp],dl + mov edx,DWORD PTR [ebx-8] + movq mm2,QWORD PTR [ebx-8] + psllq mm6,60 + movq mm5,QWORD PTR [ebx-16] + por mm1,mm6 + movq QWORD PTR [edi-80],mm0 + psrlq mm0,4 + movq QWORD PTR 48[edi],mm3 + movq mm7,mm3 + movq QWORD PTR [ebp-88],mm1 + psrlq mm3,4 + movq QWORD PTR 40[ebp],mm4 + shl edx,4 + mov BYTE PTR 7[esp],dl + mov edx,DWORD PTR 8[ebx] + movq mm1,QWORD PTR 8[ebx] + psllq mm7,60 + movq mm4,QWORD PTR [ebx] + por mm0,mm7 + movq QWORD PTR [edi-72],mm2 + psrlq mm2,4 + movq QWORD PTR 56[edi],mm5 + movq mm6,mm5 + movq QWORD PTR [ebp-80],mm0 + psrlq mm5,4 + movq QWORD PTR 48[ebp],mm3 + shl edx,4 + mov BYTE PTR 8[esp],dl + mov edx,DWORD PTR 24[ebx] + movq mm0,QWORD PTR 24[ebx] + psllq mm6,60 + movq mm3,QWORD PTR 16[ebx] + por mm2,mm6 + movq QWORD PTR [edi-64],mm1 + psrlq mm1,4 + movq QWORD PTR 64[edi],mm4 + movq mm7,mm4 + movq QWORD PTR [ebp-72],mm2 + psrlq mm4,4 + movq QWORD PTR 56[ebp],mm5 + shl edx,4 + mov BYTE PTR 9[esp],dl + mov edx,DWORD PTR 40[ebx] + movq mm2,QWORD PTR 40[ebx] + psllq mm7,60 + movq mm5,QWORD PTR 32[ebx] + por mm1,mm7 + movq QWORD PTR [edi-56],mm0 + psrlq mm0,4 + movq QWORD PTR 72[edi],mm3 + movq mm6,mm3 + movq QWORD PTR [ebp-64],mm1 + psrlq mm3,4 + movq QWORD PTR 64[ebp],mm4 + shl edx,4 + mov BYTE PTR 10[esp],dl + mov edx,DWORD PTR 56[ebx] + movq mm1,QWORD PTR 56[ebx] + psllq mm6,60 + movq mm4,QWORD PTR 48[ebx] + por mm0,mm6 + movq QWORD PTR [edi-48],mm2 + psrlq mm2,4 + movq QWORD PTR 80[edi],mm5 + movq mm7,mm5 + movq QWORD PTR [ebp-56],mm0 + psrlq mm5,4 + movq QWORD PTR 72[ebp],mm3 + shl edx,4 + mov BYTE PTR 11[esp],dl + mov edx,DWORD PTR 72[ebx] + movq mm0,QWORD PTR 72[ebx] + psllq mm7,60 + movq mm3,QWORD PTR 64[ebx] + por mm2,mm7 + movq QWORD PTR [edi-40],mm1 + psrlq mm1,4 + movq QWORD PTR 88[edi],mm4 + movq mm6,mm4 + movq QWORD PTR [ebp-48],mm2 + psrlq mm4,4 + movq QWORD PTR 80[ebp],mm5 + shl edx,4 + mov BYTE PTR 12[esp],dl + mov edx,DWORD PTR 88[ebx] + movq mm2,QWORD PTR 88[ebx] + psllq mm6,60 + movq mm5,QWORD PTR 80[ebx] + por mm1,mm6 + movq QWORD PTR [edi-32],mm0 + psrlq mm0,4 + movq QWORD PTR 96[edi],mm3 + movq mm7,mm3 + movq QWORD PTR [ebp-40],mm1 + psrlq mm3,4 + movq QWORD PTR 88[ebp],mm4 + shl edx,4 + mov BYTE PTR 13[esp],dl + mov edx,DWORD PTR 104[ebx] + movq mm1,QWORD PTR 104[ebx] + psllq mm7,60 + movq mm4,QWORD PTR 96[ebx] + por mm0,mm7 + movq QWORD PTR [edi-24],mm2 + psrlq mm2,4 + movq QWORD PTR 104[edi],mm5 + movq mm6,mm5 + movq QWORD PTR [ebp-32],mm0 + psrlq mm5,4 + movq QWORD PTR 96[ebp],mm3 + shl edx,4 + mov BYTE PTR 14[esp],dl + mov edx,DWORD PTR 120[ebx] + movq mm0,QWORD PTR 120[ebx] + psllq mm6,60 + movq mm3,QWORD PTR 112[ebx] + por mm2,mm6 + movq QWORD PTR [edi-16],mm1 + psrlq mm1,4 + movq QWORD PTR 112[edi],mm4 + movq mm7,mm4 + movq QWORD PTR [ebp-24],mm2 + psrlq mm4,4 + movq QWORD PTR 104[ebp],mm5 + shl edx,4 + mov BYTE PTR 15[esp],dl + psllq mm7,60 + por mm1,mm7 + movq QWORD PTR [edi-8],mm0 + psrlq mm0,4 + movq QWORD PTR 120[edi],mm3 + movq mm6,mm3 + movq QWORD PTR [ebp-16],mm1 + psrlq mm3,4 + movq QWORD PTR 112[ebp],mm4 + psllq mm6,60 + por mm0,mm6 + movq QWORD PTR [ebp-8],mm0 + movq QWORD PTR 120[ebp],mm3 + movq mm6,QWORD PTR [eax] + mov ebx,DWORD PTR 8[eax] + mov edx,DWORD PTR 12[eax] ALIGN 16 -$L007mmx_outer_loop: - xor ebx,DWORD PTR 12[edi] - xor edx,DWORD PTR 4[edi] - xor ecx,DWORD PTR 8[edi] - xor ebp,DWORD PTR [edi] - mov DWORD PTR 48[esp],edi - mov DWORD PTR 12[esp],ebx - mov DWORD PTR 4[esp],edx - mov DWORD PTR 8[esp],ecx - mov DWORD PTR [esp],ebp - mov edi,esp - shr ebx,24 - call __mmx_gmult_4bit_inner - mov edi,DWORD PTR 48[esp] - lea edi,DWORD PTR 16[edi] - cmp edi,DWORD PTR 52[esp] - jb $L007mmx_outer_loop - mov edi,DWORD PTR 40[esp] +$L009outer: + xor edx,DWORD PTR 12[ecx] + xor ebx,DWORD PTR 8[ecx] + pxor mm6,QWORD PTR [ecx] + lea ecx,DWORD PTR 16[ecx] + mov DWORD PTR 536[esp],ebx + movq QWORD PTR 528[esp],mm6 + mov DWORD PTR 548[esp],ecx + xor eax,eax + rol edx,8 + mov al,dl + mov ebp,eax + and al,15 + shr ebp,4 + pxor mm0,mm0 + rol edx,8 + pxor mm1,mm1 + pxor mm2,mm2 + movq mm7,QWORD PTR 16[eax*8+esp] + movq mm6,QWORD PTR 144[eax*8+esp] + mov al,dl + movd ebx,mm7 + psrlq mm7,8 + movq mm3,mm6 + mov edi,eax + psrlq mm6,8 + pxor mm7,QWORD PTR 272[ebp*8+esp] + and al,15 + psllq mm3,56 + shr edi,4 + pxor mm7,QWORD PTR 16[eax*8+esp] + rol edx,8 + pxor mm6,QWORD PTR 144[eax*8+esp] + pxor mm7,mm3 + pxor mm6,QWORD PTR 400[ebp*8+esp] + xor bl,BYTE PTR [ebp*1+esp] + mov al,dl + movd ecx,mm7 + movzx ebx,bl + psrlq mm7,8 + movq mm3,mm6 + mov ebp,eax + psrlq mm6,8 + pxor mm7,QWORD PTR 272[edi*8+esp] + and al,15 + psllq mm3,56 + shr ebp,4 + pinsrw mm2,WORD PTR [ebx*2+esi],2 + pxor mm7,QWORD PTR 16[eax*8+esp] + rol edx,8 + pxor mm6,QWORD PTR 144[eax*8+esp] + pxor mm7,mm3 + pxor mm6,QWORD PTR 400[edi*8+esp] + xor cl,BYTE PTR [edi*1+esp] + mov al,dl + mov edx,DWORD PTR 536[esp] + movd ebx,mm7 + movzx ecx,cl + psrlq mm7,8 + movq mm3,mm6 + mov edi,eax + psrlq mm6,8 + pxor mm7,QWORD PTR 272[ebp*8+esp] + and al,15 + psllq mm3,56 + pxor mm6,mm2 + shr edi,4 + pinsrw mm1,WORD PTR [ecx*2+esi],2 + pxor mm7,QWORD PTR 16[eax*8+esp] + rol edx,8 + pxor mm6,QWORD PTR 144[eax*8+esp] + pxor mm7,mm3 + pxor mm6,QWORD PTR 400[ebp*8+esp] + xor bl,BYTE PTR [ebp*1+esp] + mov al,dl + movd ecx,mm7 + movzx ebx,bl + psrlq mm7,8 + movq mm3,mm6 + mov ebp,eax + psrlq mm6,8 + pxor mm7,QWORD PTR 272[edi*8+esp] + and al,15 + psllq mm3,56 + pxor mm6,mm1 + shr ebp,4 + pinsrw mm0,WORD PTR [ebx*2+esi],2 + pxor mm7,QWORD PTR 16[eax*8+esp] + rol edx,8 + pxor mm6,QWORD PTR 144[eax*8+esp] + pxor mm7,mm3 + pxor mm6,QWORD PTR 400[edi*8+esp] + xor cl,BYTE PTR [edi*1+esp] + mov al,dl + movd ebx,mm7 + movzx ecx,cl + psrlq mm7,8 + movq mm3,mm6 + mov edi,eax + psrlq mm6,8 + pxor mm7,QWORD PTR 272[ebp*8+esp] + and al,15 + psllq mm3,56 + pxor mm6,mm0 + shr edi,4 + pinsrw mm2,WORD PTR [ecx*2+esi],2 + pxor mm7,QWORD PTR 16[eax*8+esp] + rol edx,8 + pxor mm6,QWORD PTR 144[eax*8+esp] + pxor mm7,mm3 + pxor mm6,QWORD PTR 400[ebp*8+esp] + xor bl,BYTE PTR [ebp*1+esp] + mov al,dl + movd ecx,mm7 + movzx ebx,bl + psrlq mm7,8 + movq mm3,mm6 + mov ebp,eax + psrlq mm6,8 + pxor mm7,QWORD PTR 272[edi*8+esp] + and al,15 + psllq mm3,56 + pxor mm6,mm2 + shr ebp,4 + pinsrw mm1,WORD PTR [ebx*2+esi],2 + pxor mm7,QWORD PTR 16[eax*8+esp] + rol edx,8 + pxor mm6,QWORD PTR 144[eax*8+esp] + pxor mm7,mm3 + pxor mm6,QWORD PTR 400[edi*8+esp] + xor cl,BYTE PTR [edi*1+esp] + mov al,dl + mov edx,DWORD PTR 532[esp] + movd ebx,mm7 + movzx ecx,cl + psrlq mm7,8 + movq mm3,mm6 + mov edi,eax + psrlq mm6,8 + pxor mm7,QWORD PTR 272[ebp*8+esp] + and al,15 + psllq mm3,56 + pxor mm6,mm1 + shr edi,4 + pinsrw mm0,WORD PTR [ecx*2+esi],2 + pxor mm7,QWORD PTR 16[eax*8+esp] + rol edx,8 + pxor mm6,QWORD PTR 144[eax*8+esp] + pxor mm7,mm3 + pxor mm6,QWORD PTR 400[ebp*8+esp] + xor bl,BYTE PTR [ebp*1+esp] + mov al,dl + movd ecx,mm7 + movzx ebx,bl + psrlq mm7,8 + movq mm3,mm6 + mov ebp,eax + psrlq mm6,8 + pxor mm7,QWORD PTR 272[edi*8+esp] + and al,15 + psllq mm3,56 + pxor mm6,mm0 + shr ebp,4 + pinsrw mm2,WORD PTR [ebx*2+esi],2 + pxor mm7,QWORD PTR 16[eax*8+esp] + rol edx,8 + pxor mm6,QWORD PTR 144[eax*8+esp] + pxor mm7,mm3 + pxor mm6,QWORD PTR 400[edi*8+esp] + xor cl,BYTE PTR [edi*1+esp] + mov al,dl + movd ebx,mm7 + movzx ecx,cl + psrlq mm7,8 + movq mm3,mm6 + mov edi,eax + psrlq mm6,8 + pxor mm7,QWORD PTR 272[ebp*8+esp] + and al,15 + psllq mm3,56 + pxor mm6,mm2 + shr edi,4 + pinsrw mm1,WORD PTR [ecx*2+esi],2 + pxor mm7,QWORD PTR 16[eax*8+esp] + rol edx,8 + pxor mm6,QWORD PTR 144[eax*8+esp] + pxor mm7,mm3 + pxor mm6,QWORD PTR 400[ebp*8+esp] + xor bl,BYTE PTR [ebp*1+esp] + mov al,dl + movd ecx,mm7 + movzx ebx,bl + psrlq mm7,8 + movq mm3,mm6 + mov ebp,eax + psrlq mm6,8 + pxor mm7,QWORD PTR 272[edi*8+esp] + and al,15 + psllq mm3,56 + pxor mm6,mm1 + shr ebp,4 + pinsrw mm0,WORD PTR [ebx*2+esi],2 + pxor mm7,QWORD PTR 16[eax*8+esp] + rol edx,8 + pxor mm6,QWORD PTR 144[eax*8+esp] + pxor mm7,mm3 + pxor mm6,QWORD PTR 400[edi*8+esp] + xor cl,BYTE PTR [edi*1+esp] + mov al,dl + mov edx,DWORD PTR 528[esp] + movd ebx,mm7 + movzx ecx,cl + psrlq mm7,8 + movq mm3,mm6 + mov edi,eax + psrlq mm6,8 + pxor mm7,QWORD PTR 272[ebp*8+esp] + and al,15 + psllq mm3,56 + pxor mm6,mm0 + shr edi,4 + pinsrw mm2,WORD PTR [ecx*2+esi],2 + pxor mm7,QWORD PTR 16[eax*8+esp] + rol edx,8 + pxor mm6,QWORD PTR 144[eax*8+esp] + pxor mm7,mm3 + pxor mm6,QWORD PTR 400[ebp*8+esp] + xor bl,BYTE PTR [ebp*1+esp] + mov al,dl + movd ecx,mm7 + movzx ebx,bl + psrlq mm7,8 + movq mm3,mm6 + mov ebp,eax + psrlq mm6,8 + pxor mm7,QWORD PTR 272[edi*8+esp] + and al,15 + psllq mm3,56 + pxor mm6,mm2 + shr ebp,4 + pinsrw mm1,WORD PTR [ebx*2+esi],2 + pxor mm7,QWORD PTR 16[eax*8+esp] + rol edx,8 + pxor mm6,QWORD PTR 144[eax*8+esp] + pxor mm7,mm3 + pxor mm6,QWORD PTR 400[edi*8+esp] + xor cl,BYTE PTR [edi*1+esp] + mov al,dl + movd ebx,mm7 + movzx ecx,cl + psrlq mm7,8 + movq mm3,mm6 + mov edi,eax + psrlq mm6,8 + pxor mm7,QWORD PTR 272[ebp*8+esp] + and al,15 + psllq mm3,56 + pxor mm6,mm1 + shr edi,4 + pinsrw mm0,WORD PTR [ecx*2+esi],2 + pxor mm7,QWORD PTR 16[eax*8+esp] + rol edx,8 + pxor mm6,QWORD PTR 144[eax*8+esp] + pxor mm7,mm3 + pxor mm6,QWORD PTR 400[ebp*8+esp] + xor bl,BYTE PTR [ebp*1+esp] + mov al,dl + movd ecx,mm7 + movzx ebx,bl + psrlq mm7,8 + movq mm3,mm6 + mov ebp,eax + psrlq mm6,8 + pxor mm7,QWORD PTR 272[edi*8+esp] + and al,15 + psllq mm3,56 + pxor mm6,mm0 + shr ebp,4 + pinsrw mm2,WORD PTR [ebx*2+esi],2 + pxor mm7,QWORD PTR 16[eax*8+esp] + rol edx,8 + pxor mm6,QWORD PTR 144[eax*8+esp] + pxor mm7,mm3 + pxor mm6,QWORD PTR 400[edi*8+esp] + xor cl,BYTE PTR [edi*1+esp] + mov al,dl + mov edx,DWORD PTR 524[esp] + movd ebx,mm7 + movzx ecx,cl + psrlq mm7,8 + movq mm3,mm6 + mov edi,eax + psrlq mm6,8 + pxor mm7,QWORD PTR 272[ebp*8+esp] + and al,15 + psllq mm3,56 + pxor mm6,mm2 + shr edi,4 + pinsrw mm1,WORD PTR [ecx*2+esi],2 + pxor mm7,QWORD PTR 16[eax*8+esp] + pxor mm6,QWORD PTR 144[eax*8+esp] + xor bl,BYTE PTR [ebp*1+esp] + pxor mm7,mm3 + pxor mm6,QWORD PTR 400[ebp*8+esp] + movzx ebx,bl + pxor mm2,mm2 + psllq mm1,4 + movd ecx,mm7 + psrlq mm7,4 + movq mm3,mm6 + psrlq mm6,4 + shl ecx,4 + pxor mm7,QWORD PTR 16[edi*8+esp] + psllq mm3,60 + movzx ecx,cl + pxor mm7,mm3 + pxor mm6,QWORD PTR 144[edi*8+esp] + pinsrw mm0,WORD PTR [ebx*2+esi],2 + pxor mm6,mm1 + movd edx,mm7 + pinsrw mm2,WORD PTR [ecx*2+esi],3 + psllq mm0,12 + pxor mm6,mm0 + psrlq mm7,32 + pxor mm6,mm2 + mov ecx,DWORD PTR 548[esp] + movd ebx,mm7 + movq mm3,mm6 + psllw mm6,8 + psrlw mm3,8 + por mm6,mm3 + bswap edx + pshufw mm6,mm6,27 + bswap ebx + cmp ecx,DWORD PTR 552[esp] + jne $L009outer + mov eax,DWORD PTR 544[esp] + mov DWORD PTR 12[eax],edx + mov DWORD PTR 8[eax],ebx + movq QWORD PTR [eax],mm6 + mov esp,DWORD PTR 556[esp] emms - mov DWORD PTR 12[edi],ebx - mov DWORD PTR 4[edi],edx - mov DWORD PTR 8[edi],ecx - mov DWORD PTR [edi],ebp - add esp,20 pop edi pop esi pop ebx pop ebp ret _gcm_ghash_4bit_mmx ENDP +ALIGN 16 +_gcm_init_clmul PROC PUBLIC +$L_gcm_init_clmul_begin:: + mov edx,DWORD PTR 4[esp] + mov eax,DWORD PTR 8[esp] + call $L010pic +$L010pic: + pop ecx + lea ecx,DWORD PTR ($Lbswap-$L010pic)[ecx] + movdqu xmm2,XMMWORD PTR [eax] + pshufd xmm2,xmm2,78 + pshufd xmm4,xmm2,255 + movdqa xmm3,xmm2 + psllq xmm2,1 + pxor xmm5,xmm5 + psrlq xmm3,63 + pcmpgtd xmm5,xmm4 + pslldq xmm3,8 + por xmm2,xmm3 + pand xmm5,XMMWORD PTR 16[ecx] + pxor xmm2,xmm5 + movdqa xmm0,xmm2 + movdqa xmm1,xmm0 + pshufd xmm3,xmm0,78 + pshufd xmm4,xmm2,78 + pxor xmm3,xmm0 + pxor xmm4,xmm2 +DB 102,15,58,68,194,0 +DB 102,15,58,68,202,17 +DB 102,15,58,68,220,0 + xorps xmm3,xmm0 + xorps xmm3,xmm1 + movdqa xmm4,xmm3 + psrldq xmm3,8 + pslldq xmm4,8 + pxor xmm1,xmm3 + pxor xmm0,xmm4 + movdqa xmm4,xmm0 + movdqa xmm3,xmm0 + psllq xmm0,5 + pxor xmm3,xmm0 + psllq xmm0,1 + pxor xmm0,xmm3 + psllq xmm0,57 + movdqa xmm3,xmm0 + pslldq xmm0,8 + psrldq xmm3,8 + pxor xmm0,xmm4 + pxor xmm1,xmm3 + movdqa xmm4,xmm0 + psrlq xmm0,1 + pxor xmm1,xmm4 + pxor xmm4,xmm0 + psrlq xmm0,5 + pxor xmm0,xmm4 + psrlq xmm0,1 + pxor xmm0,xmm1 + pshufd xmm3,xmm2,78 + pshufd xmm4,xmm0,78 + pxor xmm3,xmm2 + movdqu XMMWORD PTR [edx],xmm2 + pxor xmm4,xmm0 + movdqu XMMWORD PTR 16[edx],xmm0 +DB 102,15,58,15,227,8 + movdqu XMMWORD PTR 32[edx],xmm4 + ret +_gcm_init_clmul ENDP +ALIGN 16 +_gcm_gmult_clmul PROC PUBLIC +$L_gcm_gmult_clmul_begin:: + mov eax,DWORD PTR 4[esp] + mov edx,DWORD PTR 8[esp] + call $L011pic +$L011pic: + pop ecx + lea ecx,DWORD PTR ($Lbswap-$L011pic)[ecx] + movdqu xmm0,XMMWORD PTR [eax] + movdqa xmm5,XMMWORD PTR [ecx] + movups xmm2,XMMWORD PTR [edx] +DB 102,15,56,0,197 + movups xmm4,XMMWORD PTR 32[edx] + movdqa xmm1,xmm0 + pshufd xmm3,xmm0,78 + pxor xmm3,xmm0 +DB 102,15,58,68,194,0 +DB 102,15,58,68,202,17 +DB 102,15,58,68,220,0 + xorps xmm3,xmm0 + xorps xmm3,xmm1 + movdqa xmm4,xmm3 + psrldq xmm3,8 + pslldq xmm4,8 + pxor xmm1,xmm3 + pxor xmm0,xmm4 + movdqa xmm4,xmm0 + movdqa xmm3,xmm0 + psllq xmm0,5 + pxor xmm3,xmm0 + psllq xmm0,1 + pxor xmm0,xmm3 + psllq xmm0,57 + movdqa xmm3,xmm0 + pslldq xmm0,8 + psrldq xmm3,8 + pxor xmm0,xmm4 + pxor xmm1,xmm3 + movdqa xmm4,xmm0 + psrlq xmm0,1 + pxor xmm1,xmm4 + pxor xmm4,xmm0 + psrlq xmm0,5 + pxor xmm0,xmm4 + psrlq xmm0,1 + pxor xmm0,xmm1 +DB 102,15,56,0,197 + movdqu XMMWORD PTR [eax],xmm0 + ret +_gcm_gmult_clmul ENDP +ALIGN 16 +_gcm_ghash_clmul PROC PUBLIC +$L_gcm_ghash_clmul_begin:: + push ebp + push ebx + push esi + push edi + mov eax,DWORD PTR 20[esp] + mov edx,DWORD PTR 24[esp] + mov esi,DWORD PTR 28[esp] + mov ebx,DWORD PTR 32[esp] + call $L012pic +$L012pic: + pop ecx + lea ecx,DWORD PTR ($Lbswap-$L012pic)[ecx] + movdqu xmm0,XMMWORD PTR [eax] + movdqa xmm5,XMMWORD PTR [ecx] + movdqu xmm2,XMMWORD PTR [edx] +DB 102,15,56,0,197 + sub ebx,16 + jz $L013odd_tail + movdqu xmm3,XMMWORD PTR [esi] + movdqu xmm6,XMMWORD PTR 16[esi] +DB 102,15,56,0,221 +DB 102,15,56,0,245 + movdqu xmm5,XMMWORD PTR 32[edx] + pxor xmm0,xmm3 + pshufd xmm3,xmm6,78 + movdqa xmm7,xmm6 + pxor xmm3,xmm6 + lea esi,DWORD PTR 32[esi] +DB 102,15,58,68,242,0 +DB 102,15,58,68,250,17 +DB 102,15,58,68,221,0 + movups xmm2,XMMWORD PTR 16[edx] + nop + sub ebx,32 + jbe $L014even_tail + jmp $L015mod_loop +ALIGN 32 +$L015mod_loop: + pshufd xmm4,xmm0,78 + movdqa xmm1,xmm0 + pxor xmm4,xmm0 + nop +DB 102,15,58,68,194,0 +DB 102,15,58,68,202,17 +DB 102,15,58,68,229,16 + movups xmm2,XMMWORD PTR [edx] + xorps xmm0,xmm6 + movdqa xmm5,XMMWORD PTR [ecx] + xorps xmm1,xmm7 + movdqu xmm7,XMMWORD PTR [esi] + pxor xmm3,xmm0 + movdqu xmm6,XMMWORD PTR 16[esi] + pxor xmm3,xmm1 +DB 102,15,56,0,253 + pxor xmm4,xmm3 + movdqa xmm3,xmm4 + psrldq xmm4,8 + pslldq xmm3,8 + pxor xmm1,xmm4 + pxor xmm0,xmm3 +DB 102,15,56,0,245 + pxor xmm1,xmm7 + movdqa xmm7,xmm6 + movdqa xmm4,xmm0 + movdqa xmm3,xmm0 + psllq xmm0,5 + pxor xmm3,xmm0 + psllq xmm0,1 + pxor xmm0,xmm3 +DB 102,15,58,68,242,0 + movups xmm5,XMMWORD PTR 32[edx] + psllq xmm0,57 + movdqa xmm3,xmm0 + pslldq xmm0,8 + psrldq xmm3,8 + pxor xmm0,xmm4 + pxor xmm1,xmm3 + pshufd xmm3,xmm7,78 + movdqa xmm4,xmm0 + psrlq xmm0,1 + pxor xmm3,xmm7 + pxor xmm1,xmm4 +DB 102,15,58,68,250,17 + movups xmm2,XMMWORD PTR 16[edx] + pxor xmm4,xmm0 + psrlq xmm0,5 + pxor xmm0,xmm4 + psrlq xmm0,1 + pxor xmm0,xmm1 +DB 102,15,58,68,221,0 + lea esi,DWORD PTR 32[esi] + sub ebx,32 + ja $L015mod_loop +$L014even_tail: + pshufd xmm4,xmm0,78 + movdqa xmm1,xmm0 + pxor xmm4,xmm0 +DB 102,15,58,68,194,0 +DB 102,15,58,68,202,17 +DB 102,15,58,68,229,16 + movdqa xmm5,XMMWORD PTR [ecx] + xorps xmm0,xmm6 + xorps xmm1,xmm7 + pxor xmm3,xmm0 + pxor xmm3,xmm1 + pxor xmm4,xmm3 + movdqa xmm3,xmm4 + psrldq xmm4,8 + pslldq xmm3,8 + pxor xmm1,xmm4 + pxor xmm0,xmm3 + movdqa xmm4,xmm0 + movdqa xmm3,xmm0 + psllq xmm0,5 + pxor xmm3,xmm0 + psllq xmm0,1 + pxor xmm0,xmm3 + psllq xmm0,57 + movdqa xmm3,xmm0 + pslldq xmm0,8 + psrldq xmm3,8 + pxor xmm0,xmm4 + pxor xmm1,xmm3 + movdqa xmm4,xmm0 + psrlq xmm0,1 + pxor xmm1,xmm4 + pxor xmm4,xmm0 + psrlq xmm0,5 + pxor xmm0,xmm4 + psrlq xmm0,1 + pxor xmm0,xmm1 + test ebx,ebx + jnz $L016done + movups xmm2,XMMWORD PTR [edx] +$L013odd_tail: + movdqu xmm3,XMMWORD PTR [esi] +DB 102,15,56,0,221 + pxor xmm0,xmm3 + movdqa xmm1,xmm0 + pshufd xmm3,xmm0,78 + pshufd xmm4,xmm2,78 + pxor xmm3,xmm0 + pxor xmm4,xmm2 +DB 102,15,58,68,194,0 +DB 102,15,58,68,202,17 +DB 102,15,58,68,220,0 + xorps xmm3,xmm0 + xorps xmm3,xmm1 + movdqa xmm4,xmm3 + psrldq xmm3,8 + pslldq xmm4,8 + pxor xmm1,xmm3 + pxor xmm0,xmm4 + movdqa xmm4,xmm0 + movdqa xmm3,xmm0 + psllq xmm0,5 + pxor xmm3,xmm0 + psllq xmm0,1 + pxor xmm0,xmm3 + psllq xmm0,57 + movdqa xmm3,xmm0 + pslldq xmm0,8 + psrldq xmm3,8 + pxor xmm0,xmm4 + pxor xmm1,xmm3 + movdqa xmm4,xmm0 + psrlq xmm0,1 + pxor xmm1,xmm4 + pxor xmm4,xmm0 + psrlq xmm0,5 + pxor xmm0,xmm4 + psrlq xmm0,1 + pxor xmm0,xmm1 +$L016done: +DB 102,15,56,0,197 + movdqu XMMWORD PTR [eax],xmm0 + pop edi + pop esi + pop ebx + pop ebp + ret +_gcm_ghash_clmul ENDP ALIGN 64 -$Lrem_4bit:: -DD 0,0,0,29491200,0,58982400,0,38141952 -DD 0,117964800,0,113901568,0,76283904,0,88997888 -DD 0,235929600,0,265420800,0,227803136,0,206962688 -DD 0,152567808,0,148504576,0,177995776,0,190709760 +$Lbswap:: +DB 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0 +DB 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,194 ALIGN 64 -$L008rem_8bit: +$Lrem_8bit:: DW 0,450,900,582,1800,1738,1164,1358 DW 3600,4050,3476,3158,2328,2266,2716,2910 DW 7200,7650,8100,7782,6952,6890,6316,6510 @@ -730,6 +1256,16 @@ DW 43456,43010,43588,43910,44744,44810,44364,44174 DW 42960,42514,42068,42390,41176,41242,41820,41630 DW 46560,46114,46692,47014,45800,45866,45420,45230 DW 48112,47666,47220,47542,48376,48442,49020,48830 +ALIGN 64 +$Lrem_4bit:: +DD 0,0,0,471859200 +DD 0,943718400,0,610271232 +DD 0,1887436800,0,1822425088 +DD 0,1220542464,0,1423966208 +DD 0,3774873600,0,4246732800 +DD 0,3644850176,0,3311403008 +DD 0,2441084928,0,2376073216 +DD 0,2847932416,0,3051356160 DB 71,72,65,83,72,32,102,111,114,32,120,56,54,44,32,67 DB 82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112 DB 112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62 diff --git a/deps/openssl/asm/x86-win32-masm/rc4/rc4-586.asm b/deps/openssl/asm/x86-win32-masm/rc4/rc4-586.asm index d179090911ede4..90ab38ab8a1f0a 100644 --- a/deps/openssl/asm/x86-win32-masm/rc4/rc4-586.asm +++ b/deps/openssl/asm/x86-win32-masm/rc4/rc4-586.asm @@ -45,8 +45,8 @@ $L_RC4_begin:: mov ecx,DWORD PTR [eax*4+edi] and edx,-4 jz $L002loop1 - test edx,-8 mov DWORD PTR 32[esp],ebp + test edx,-8 jz $L003go4loop4 lea ebp,DWORD PTR _OPENSSL_ia32cap_P bt DWORD PTR [ebp],26 @@ -383,6 +383,6 @@ ALIGN 64 _RC4_options ENDP .text$ ENDS .bss SEGMENT 'BSS' -COMM _OPENSSL_ia32cap_P:QWORD +COMM _OPENSSL_ia32cap_P:DWORD:4 .bss ENDS END diff --git a/deps/openssl/asm/x86-win32-masm/rc5/rc5-586.asm b/deps/openssl/asm/x86-win32-masm/rc5/rc5-586.asm deleted file mode 100644 index 7ce74110e26ec6..00000000000000 --- a/deps/openssl/asm/x86-win32-masm/rc5/rc5-586.asm +++ /dev/null @@ -1,573 +0,0 @@ -TITLE rc5-586.asm -IF @Version LT 800 -ECHO MASM version 8.00 or later is strongly recommended. -ENDIF -.686 -.MODEL FLAT -OPTION DOTNAME -IF @Version LT 800 -.text$ SEGMENT PAGE 'CODE' -ELSE -.text$ SEGMENT ALIGN(64) 'CODE' -ENDIF -ALIGN 16 -_RC5_32_encrypt PROC PUBLIC -$L_RC5_32_encrypt_begin:: - ; - - push ebp - push esi - push edi - mov edx,DWORD PTR 16[esp] - mov ebp,DWORD PTR 20[esp] - ; Load the 2 words - mov edi,DWORD PTR [edx] - mov esi,DWORD PTR 4[edx] - push ebx - mov ebx,DWORD PTR [ebp] - add edi,DWORD PTR 4[ebp] - add esi,DWORD PTR 8[ebp] - xor edi,esi - mov eax,DWORD PTR 12[ebp] - mov ecx,esi - rol edi,cl - add edi,eax - xor esi,edi - mov eax,DWORD PTR 16[ebp] - mov ecx,edi - rol esi,cl - add esi,eax - xor edi,esi - mov eax,DWORD PTR 20[ebp] - mov ecx,esi - rol edi,cl - add edi,eax - xor esi,edi - mov eax,DWORD PTR 24[ebp] - mov ecx,edi - rol esi,cl - add esi,eax - xor edi,esi - mov eax,DWORD PTR 28[ebp] - mov ecx,esi - rol edi,cl - add edi,eax - xor esi,edi - mov eax,DWORD PTR 32[ebp] - mov ecx,edi - rol esi,cl - add esi,eax - xor edi,esi - mov eax,DWORD PTR 36[ebp] - mov ecx,esi - rol edi,cl - add edi,eax - xor esi,edi - mov eax,DWORD PTR 40[ebp] - mov ecx,edi - rol esi,cl - add esi,eax - xor edi,esi - mov eax,DWORD PTR 44[ebp] - mov ecx,esi - rol edi,cl - add edi,eax - xor esi,edi - mov eax,DWORD PTR 48[ebp] - mov ecx,edi - rol esi,cl - add esi,eax - xor edi,esi - mov eax,DWORD PTR 52[ebp] - mov ecx,esi - rol edi,cl - add edi,eax - xor esi,edi - mov eax,DWORD PTR 56[ebp] - mov ecx,edi - rol esi,cl - add esi,eax - xor edi,esi - mov eax,DWORD PTR 60[ebp] - mov ecx,esi - rol edi,cl - add edi,eax - xor esi,edi - mov eax,DWORD PTR 64[ebp] - mov ecx,edi - rol esi,cl - add esi,eax - xor edi,esi - mov eax,DWORD PTR 68[ebp] - mov ecx,esi - rol edi,cl - add edi,eax - xor esi,edi - mov eax,DWORD PTR 72[ebp] - mov ecx,edi - rol esi,cl - add esi,eax - cmp ebx,8 - je $L000rc5_exit - xor edi,esi - mov eax,DWORD PTR 76[ebp] - mov ecx,esi - rol edi,cl - add edi,eax - xor esi,edi - mov eax,DWORD PTR 80[ebp] - mov ecx,edi - rol esi,cl - add esi,eax - xor edi,esi - mov eax,DWORD PTR 84[ebp] - mov ecx,esi - rol edi,cl - add edi,eax - xor esi,edi - mov eax,DWORD PTR 88[ebp] - mov ecx,edi - rol esi,cl - add esi,eax - xor edi,esi - mov eax,DWORD PTR 92[ebp] - mov ecx,esi - rol edi,cl - add edi,eax - xor esi,edi - mov eax,DWORD PTR 96[ebp] - mov ecx,edi - rol esi,cl - add esi,eax - xor edi,esi - mov eax,DWORD PTR 100[ebp] - mov ecx,esi - rol edi,cl - add edi,eax - xor esi,edi - mov eax,DWORD PTR 104[ebp] - mov ecx,edi - rol esi,cl - add esi,eax - cmp ebx,12 - je $L000rc5_exit - xor edi,esi - mov eax,DWORD PTR 108[ebp] - mov ecx,esi - rol edi,cl - add edi,eax - xor esi,edi - mov eax,DWORD PTR 112[ebp] - mov ecx,edi - rol esi,cl - add esi,eax - xor edi,esi - mov eax,DWORD PTR 116[ebp] - mov ecx,esi - rol edi,cl - add edi,eax - xor esi,edi - mov eax,DWORD PTR 120[ebp] - mov ecx,edi - rol esi,cl - add esi,eax - xor edi,esi - mov eax,DWORD PTR 124[ebp] - mov ecx,esi - rol edi,cl - add edi,eax - xor esi,edi - mov eax,DWORD PTR 128[ebp] - mov ecx,edi - rol esi,cl - add esi,eax - xor edi,esi - mov eax,DWORD PTR 132[ebp] - mov ecx,esi - rol edi,cl - add edi,eax - xor esi,edi - mov eax,DWORD PTR 136[ebp] - mov ecx,edi - rol esi,cl - add esi,eax -$L000rc5_exit: - mov DWORD PTR [edx],edi - mov DWORD PTR 4[edx],esi - pop ebx - pop edi - pop esi - pop ebp - ret -_RC5_32_encrypt ENDP -ALIGN 16 -_RC5_32_decrypt PROC PUBLIC -$L_RC5_32_decrypt_begin:: - ; - - push ebp - push esi - push edi - mov edx,DWORD PTR 16[esp] - mov ebp,DWORD PTR 20[esp] - ; Load the 2 words - mov edi,DWORD PTR [edx] - mov esi,DWORD PTR 4[edx] - push ebx - mov ebx,DWORD PTR [ebp] - cmp ebx,12 - je $L001rc5_dec_12 - cmp ebx,8 - je $L002rc5_dec_8 - mov eax,DWORD PTR 136[ebp] - sub esi,eax - mov ecx,edi - ror esi,cl - xor esi,edi - mov eax,DWORD PTR 132[ebp] - sub edi,eax - mov ecx,esi - ror edi,cl - xor edi,esi - mov eax,DWORD PTR 128[ebp] - sub esi,eax - mov ecx,edi - ror esi,cl - xor esi,edi - mov eax,DWORD PTR 124[ebp] - sub edi,eax - mov ecx,esi - ror edi,cl - xor edi,esi - mov eax,DWORD PTR 120[ebp] - sub esi,eax - mov ecx,edi - ror esi,cl - xor esi,edi - mov eax,DWORD PTR 116[ebp] - sub edi,eax - mov ecx,esi - ror edi,cl - xor edi,esi - mov eax,DWORD PTR 112[ebp] - sub esi,eax - mov ecx,edi - ror esi,cl - xor esi,edi - mov eax,DWORD PTR 108[ebp] - sub edi,eax - mov ecx,esi - ror edi,cl - xor edi,esi -$L001rc5_dec_12: - mov eax,DWORD PTR 104[ebp] - sub esi,eax - mov ecx,edi - ror esi,cl - xor esi,edi - mov eax,DWORD PTR 100[ebp] - sub edi,eax - mov ecx,esi - ror edi,cl - xor edi,esi - mov eax,DWORD PTR 96[ebp] - sub esi,eax - mov ecx,edi - ror esi,cl - xor esi,edi - mov eax,DWORD PTR 92[ebp] - sub edi,eax - mov ecx,esi - ror edi,cl - xor edi,esi - mov eax,DWORD PTR 88[ebp] - sub esi,eax - mov ecx,edi - ror esi,cl - xor esi,edi - mov eax,DWORD PTR 84[ebp] - sub edi,eax - mov ecx,esi - ror edi,cl - xor edi,esi - mov eax,DWORD PTR 80[ebp] - sub esi,eax - mov ecx,edi - ror esi,cl - xor esi,edi - mov eax,DWORD PTR 76[ebp] - sub edi,eax - mov ecx,esi - ror edi,cl - xor edi,esi -$L002rc5_dec_8: - mov eax,DWORD PTR 72[ebp] - sub esi,eax - mov ecx,edi - ror esi,cl - xor esi,edi - mov eax,DWORD PTR 68[ebp] - sub edi,eax - mov ecx,esi - ror edi,cl - xor edi,esi - mov eax,DWORD PTR 64[ebp] - sub esi,eax - mov ecx,edi - ror esi,cl - xor esi,edi - mov eax,DWORD PTR 60[ebp] - sub edi,eax - mov ecx,esi - ror edi,cl - xor edi,esi - mov eax,DWORD PTR 56[ebp] - sub esi,eax - mov ecx,edi - ror esi,cl - xor esi,edi - mov eax,DWORD PTR 52[ebp] - sub edi,eax - mov ecx,esi - ror edi,cl - xor edi,esi - mov eax,DWORD PTR 48[ebp] - sub esi,eax - mov ecx,edi - ror esi,cl - xor esi,edi - mov eax,DWORD PTR 44[ebp] - sub edi,eax - mov ecx,esi - ror edi,cl - xor edi,esi - mov eax,DWORD PTR 40[ebp] - sub esi,eax - mov ecx,edi - ror esi,cl - xor esi,edi - mov eax,DWORD PTR 36[ebp] - sub edi,eax - mov ecx,esi - ror edi,cl - xor edi,esi - mov eax,DWORD PTR 32[ebp] - sub esi,eax - mov ecx,edi - ror esi,cl - xor esi,edi - mov eax,DWORD PTR 28[ebp] - sub edi,eax - mov ecx,esi - ror edi,cl - xor edi,esi - mov eax,DWORD PTR 24[ebp] - sub esi,eax - mov ecx,edi - ror esi,cl - xor esi,edi - mov eax,DWORD PTR 20[ebp] - sub edi,eax - mov ecx,esi - ror edi,cl - xor edi,esi - mov eax,DWORD PTR 16[ebp] - sub esi,eax - mov ecx,edi - ror esi,cl - xor esi,edi - mov eax,DWORD PTR 12[ebp] - sub edi,eax - mov ecx,esi - ror edi,cl - xor edi,esi - sub esi,DWORD PTR 8[ebp] - sub edi,DWORD PTR 4[ebp] -$L003rc5_exit: - mov DWORD PTR [edx],edi - mov DWORD PTR 4[edx],esi - pop ebx - pop edi - pop esi - pop ebp - ret -_RC5_32_decrypt ENDP -ALIGN 16 -_RC5_32_cbc_encrypt PROC PUBLIC -$L_RC5_32_cbc_encrypt_begin:: - ; - - push ebp - push ebx - push esi - push edi - mov ebp,DWORD PTR 28[esp] - ; getting iv ptr from parameter 4 - mov ebx,DWORD PTR 36[esp] - mov esi,DWORD PTR [ebx] - mov edi,DWORD PTR 4[ebx] - push edi - push esi - push edi - push esi - mov ebx,esp - mov esi,DWORD PTR 36[esp] - mov edi,DWORD PTR 40[esp] - ; getting encrypt flag from parameter 5 - mov ecx,DWORD PTR 56[esp] - ; get and push parameter 3 - mov eax,DWORD PTR 48[esp] - push eax - push ebx - cmp ecx,0 - jz $L004decrypt - and ebp,4294967288 - mov eax,DWORD PTR 8[esp] - mov ebx,DWORD PTR 12[esp] - jz $L005encrypt_finish -$L006encrypt_loop: - mov ecx,DWORD PTR [esi] - mov edx,DWORD PTR 4[esi] - xor eax,ecx - xor ebx,edx - mov DWORD PTR 8[esp],eax - mov DWORD PTR 12[esp],ebx - call $L_RC5_32_encrypt_begin - mov eax,DWORD PTR 8[esp] - mov ebx,DWORD PTR 12[esp] - mov DWORD PTR [edi],eax - mov DWORD PTR 4[edi],ebx - add esi,8 - add edi,8 - sub ebp,8 - jnz $L006encrypt_loop -$L005encrypt_finish: - mov ebp,DWORD PTR 52[esp] - and ebp,7 - jz $L007finish - call $L008PIC_point -$L008PIC_point: - pop edx - lea ecx,DWORD PTR ($L009cbc_enc_jmp_table-$L008PIC_point)[edx] - mov ebp,DWORD PTR [ebp*4+ecx] - add ebp,edx - xor ecx,ecx - xor edx,edx - jmp ebp -$L010ej7: - mov dh,BYTE PTR 6[esi] - shl edx,8 -$L011ej6: - mov dh,BYTE PTR 5[esi] -$L012ej5: - mov dl,BYTE PTR 4[esi] -$L013ej4: - mov ecx,DWORD PTR [esi] - jmp $L014ejend -$L015ej3: - mov ch,BYTE PTR 2[esi] - shl ecx,8 -$L016ej2: - mov ch,BYTE PTR 1[esi] -$L017ej1: - mov cl,BYTE PTR [esi] -$L014ejend: - xor eax,ecx - xor ebx,edx - mov DWORD PTR 8[esp],eax - mov DWORD PTR 12[esp],ebx - call $L_RC5_32_encrypt_begin - mov eax,DWORD PTR 8[esp] - mov ebx,DWORD PTR 12[esp] - mov DWORD PTR [edi],eax - mov DWORD PTR 4[edi],ebx - jmp $L007finish -$L004decrypt: - and ebp,4294967288 - mov eax,DWORD PTR 16[esp] - mov ebx,DWORD PTR 20[esp] - jz $L018decrypt_finish -$L019decrypt_loop: - mov eax,DWORD PTR [esi] - mov ebx,DWORD PTR 4[esi] - mov DWORD PTR 8[esp],eax - mov DWORD PTR 12[esp],ebx - call $L_RC5_32_decrypt_begin - mov eax,DWORD PTR 8[esp] - mov ebx,DWORD PTR 12[esp] - mov ecx,DWORD PTR 16[esp] - mov edx,DWORD PTR 20[esp] - xor ecx,eax - xor edx,ebx - mov eax,DWORD PTR [esi] - mov ebx,DWORD PTR 4[esi] - mov DWORD PTR [edi],ecx - mov DWORD PTR 4[edi],edx - mov DWORD PTR 16[esp],eax - mov DWORD PTR 20[esp],ebx - add esi,8 - add edi,8 - sub ebp,8 - jnz $L019decrypt_loop -$L018decrypt_finish: - mov ebp,DWORD PTR 52[esp] - and ebp,7 - jz $L007finish - mov eax,DWORD PTR [esi] - mov ebx,DWORD PTR 4[esi] - mov DWORD PTR 8[esp],eax - mov DWORD PTR 12[esp],ebx - call $L_RC5_32_decrypt_begin - mov eax,DWORD PTR 8[esp] - mov ebx,DWORD PTR 12[esp] - mov ecx,DWORD PTR 16[esp] - mov edx,DWORD PTR 20[esp] - xor ecx,eax - xor edx,ebx - mov eax,DWORD PTR [esi] - mov ebx,DWORD PTR 4[esi] -$L020dj7: - ror edx,16 - mov BYTE PTR 6[edi],dl - shr edx,16 -$L021dj6: - mov BYTE PTR 5[edi],dh -$L022dj5: - mov BYTE PTR 4[edi],dl -$L023dj4: - mov DWORD PTR [edi],ecx - jmp $L024djend -$L025dj3: - ror ecx,16 - mov BYTE PTR 2[edi],cl - shl ecx,16 -$L026dj2: - mov BYTE PTR 1[esi],ch -$L027dj1: - mov BYTE PTR [esi],cl -$L024djend: - jmp $L007finish -$L007finish: - mov ecx,DWORD PTR 60[esp] - add esp,24 - mov DWORD PTR [ecx],eax - mov DWORD PTR 4[ecx],ebx - pop edi - pop esi - pop ebx - pop ebp - ret -ALIGN 64 -$L009cbc_enc_jmp_table: -DD 0 -DD $L017ej1-$L008PIC_point -DD $L016ej2-$L008PIC_point -DD $L015ej3-$L008PIC_point -DD $L013ej4-$L008PIC_point -DD $L012ej5-$L008PIC_point -DD $L011ej6-$L008PIC_point -DD $L010ej7-$L008PIC_point -ALIGN 64 -_RC5_32_cbc_encrypt ENDP -.text$ ENDS -END diff --git a/deps/openssl/asm/x86-win32-masm/ripemd/rmd-586.asm b/deps/openssl/asm/x86-win32-masm/ripemd/rmd-586.asm index 7f6458cefd0051..9bcd60a0b9120c 100644 --- a/deps/openssl/asm/x86-win32-masm/ripemd/rmd-586.asm +++ b/deps/openssl/asm/x86-win32-masm/ripemd/rmd-586.asm @@ -25,7 +25,6 @@ $L_ripemd160_block_asm_data_order_begin:: sub esp,108 $L000start: ; - mov ebx,DWORD PTR [eax] mov ebp,DWORD PTR 4[eax] mov DWORD PTR [esp],ebx diff --git a/deps/openssl/asm/x86-win32-masm/sha/sha1-586.asm b/deps/openssl/asm/x86-win32-masm/sha/sha1-586.asm index 878b1d3b994c28..4607eda762a75a 100644 --- a/deps/openssl/asm/x86-win32-masm/sha/sha1-586.asm +++ b/deps/openssl/asm/x86-win32-masm/sha/sha1-586.asm @@ -3,6 +3,13 @@ IF @Version LT 800 ECHO MASM version 8.00 or later is strongly recommended. ENDIF .686 +.XMM +IF @Version LT 800 +XMMWORD STRUCT 16 +DQ 2 dup (?) +XMMWORD ENDS +ENDIF + .MODEL FLAT OPTION DOTNAME IF @Version LT 800 @@ -10,6 +17,7 @@ IF @Version LT 800 ELSE .text$ SEGMENT ALIGN(64) 'CODE' ENDIF +;EXTERN _OPENSSL_ia32cap_P:NEAR ALIGN 16 _sha1_block_data_order PROC PUBLIC $L_sha1_block_data_order_begin:: @@ -17,6 +25,28 @@ $L_sha1_block_data_order_begin:: push ebx push esi push edi + call $L000pic_point +$L000pic_point: + pop ebp + lea esi,DWORD PTR _OPENSSL_ia32cap_P + lea ebp,DWORD PTR ($LK_XX_XX-$L000pic_point)[ebp] + mov eax,DWORD PTR [esi] + mov edx,DWORD PTR 4[esi] + test edx,512 + jz $L001x86 + mov ecx,DWORD PTR 8[esi] + test eax,16777216 + jz $L001x86 + test ecx,536870912 + jnz $Lshaext_shortcut + and edx,268435456 + and eax,1073741824 + or eax,edx + cmp eax,1342177280 + je $Lavx_shortcut + jmp $Lssse3_shortcut +ALIGN 16 +$L001x86: mov ebp,DWORD PTR 20[esp] mov esi,DWORD PTR 24[esp] mov eax,DWORD PTR 28[esp] @@ -25,9 +55,9 @@ $L_sha1_block_data_order_begin:: add eax,esi mov DWORD PTR 104[esp],eax mov edi,DWORD PTR 16[ebp] - jmp $L000loop + jmp $L002loop ALIGN 16 -$L000loop: +$L002loop: mov eax,DWORD PTR [esi] mov ebx,DWORD PTR 4[esi] mov ecx,DWORD PTR 8[esi] @@ -1374,7 +1404,7 @@ $L000loop: mov DWORD PTR 12[ebp],ebx mov esi,edx mov DWORD PTR 16[ebp],ecx - jb $L000loop + jb $L002loop add esp,76 pop edi pop esi @@ -1382,9 +1412,2576 @@ $L000loop: pop ebp ret _sha1_block_data_order ENDP +ALIGN 16 +__sha1_block_data_order_shaext PROC PRIVATE + push ebp + push ebx + push esi + push edi + call $L003pic_point +$L003pic_point: + pop ebp + lea ebp,DWORD PTR ($LK_XX_XX-$L003pic_point)[ebp] +$Lshaext_shortcut:: + mov edi,DWORD PTR 20[esp] + mov ebx,esp + mov esi,DWORD PTR 24[esp] + mov ecx,DWORD PTR 28[esp] + sub esp,32 + movdqu xmm0,XMMWORD PTR [edi] + movd xmm1,DWORD PTR 16[edi] + and esp,-32 + movdqa xmm3,XMMWORD PTR 80[ebp] + movdqu xmm4,XMMWORD PTR [esi] + pshufd xmm0,xmm0,27 + movdqu xmm5,XMMWORD PTR 16[esi] + pshufd xmm1,xmm1,27 + movdqu xmm6,XMMWORD PTR 32[esi] +DB 102,15,56,0,227 + movdqu xmm7,XMMWORD PTR 48[esi] +DB 102,15,56,0,235 +DB 102,15,56,0,243 +DB 102,15,56,0,251 + jmp $L004loop_shaext +ALIGN 16 +$L004loop_shaext: + dec ecx + lea eax,DWORD PTR 64[esi] + movdqa XMMWORD PTR [esp],xmm1 + paddd xmm1,xmm4 + cmovne esi,eax + movdqa XMMWORD PTR 16[esp],xmm0 +DB 15,56,201,229 + movdqa xmm2,xmm0 +DB 15,58,204,193,0 +DB 15,56,200,213 + pxor xmm4,xmm6 +DB 15,56,201,238 +DB 15,56,202,231 + movdqa xmm1,xmm0 +DB 15,58,204,194,0 +DB 15,56,200,206 + pxor xmm5,xmm7 +DB 15,56,202,236 +DB 15,56,201,247 + movdqa xmm2,xmm0 +DB 15,58,204,193,0 +DB 15,56,200,215 + pxor xmm6,xmm4 +DB 15,56,201,252 +DB 15,56,202,245 + movdqa xmm1,xmm0 +DB 15,58,204,194,0 +DB 15,56,200,204 + pxor xmm7,xmm5 +DB 15,56,202,254 +DB 15,56,201,229 + movdqa xmm2,xmm0 +DB 15,58,204,193,0 +DB 15,56,200,213 + pxor xmm4,xmm6 +DB 15,56,201,238 +DB 15,56,202,231 + movdqa xmm1,xmm0 +DB 15,58,204,194,1 +DB 15,56,200,206 + pxor xmm5,xmm7 +DB 15,56,202,236 +DB 15,56,201,247 + movdqa xmm2,xmm0 +DB 15,58,204,193,1 +DB 15,56,200,215 + pxor xmm6,xmm4 +DB 15,56,201,252 +DB 15,56,202,245 + movdqa xmm1,xmm0 +DB 15,58,204,194,1 +DB 15,56,200,204 + pxor xmm7,xmm5 +DB 15,56,202,254 +DB 15,56,201,229 + movdqa xmm2,xmm0 +DB 15,58,204,193,1 +DB 15,56,200,213 + pxor xmm4,xmm6 +DB 15,56,201,238 +DB 15,56,202,231 + movdqa xmm1,xmm0 +DB 15,58,204,194,1 +DB 15,56,200,206 + pxor xmm5,xmm7 +DB 15,56,202,236 +DB 15,56,201,247 + movdqa xmm2,xmm0 +DB 15,58,204,193,2 +DB 15,56,200,215 + pxor xmm6,xmm4 +DB 15,56,201,252 +DB 15,56,202,245 + movdqa xmm1,xmm0 +DB 15,58,204,194,2 +DB 15,56,200,204 + pxor xmm7,xmm5 +DB 15,56,202,254 +DB 15,56,201,229 + movdqa xmm2,xmm0 +DB 15,58,204,193,2 +DB 15,56,200,213 + pxor xmm4,xmm6 +DB 15,56,201,238 +DB 15,56,202,231 + movdqa xmm1,xmm0 +DB 15,58,204,194,2 +DB 15,56,200,206 + pxor xmm5,xmm7 +DB 15,56,202,236 +DB 15,56,201,247 + movdqa xmm2,xmm0 +DB 15,58,204,193,2 +DB 15,56,200,215 + pxor xmm6,xmm4 +DB 15,56,201,252 +DB 15,56,202,245 + movdqa xmm1,xmm0 +DB 15,58,204,194,3 +DB 15,56,200,204 + pxor xmm7,xmm5 +DB 15,56,202,254 + movdqu xmm4,XMMWORD PTR [esi] + movdqa xmm2,xmm0 +DB 15,58,204,193,3 +DB 15,56,200,213 + movdqu xmm5,XMMWORD PTR 16[esi] +DB 102,15,56,0,227 + movdqa xmm1,xmm0 +DB 15,58,204,194,3 +DB 15,56,200,206 + movdqu xmm6,XMMWORD PTR 32[esi] +DB 102,15,56,0,235 + movdqa xmm2,xmm0 +DB 15,58,204,193,3 +DB 15,56,200,215 + movdqu xmm7,XMMWORD PTR 48[esi] +DB 102,15,56,0,243 + movdqa xmm1,xmm0 +DB 15,58,204,194,3 + movdqa xmm2,XMMWORD PTR [esp] +DB 102,15,56,0,251 +DB 15,56,200,202 + paddd xmm0,XMMWORD PTR 16[esp] + jnz $L004loop_shaext + pshufd xmm0,xmm0,27 + pshufd xmm1,xmm1,27 + movdqu XMMWORD PTR [edi],xmm0 + movd DWORD PTR 16[edi],xmm1 + mov esp,ebx + pop edi + pop esi + pop ebx + pop ebp + ret +__sha1_block_data_order_shaext ENDP +ALIGN 16 +__sha1_block_data_order_ssse3 PROC PRIVATE + push ebp + push ebx + push esi + push edi + call $L005pic_point +$L005pic_point: + pop ebp + lea ebp,DWORD PTR ($LK_XX_XX-$L005pic_point)[ebp] +$Lssse3_shortcut:: + movdqa xmm7,XMMWORD PTR [ebp] + movdqa xmm0,XMMWORD PTR 16[ebp] + movdqa xmm1,XMMWORD PTR 32[ebp] + movdqa xmm2,XMMWORD PTR 48[ebp] + movdqa xmm6,XMMWORD PTR 64[ebp] + mov edi,DWORD PTR 20[esp] + mov ebp,DWORD PTR 24[esp] + mov edx,DWORD PTR 28[esp] + mov esi,esp + sub esp,208 + and esp,-64 + movdqa XMMWORD PTR 112[esp],xmm0 + movdqa XMMWORD PTR 128[esp],xmm1 + movdqa XMMWORD PTR 144[esp],xmm2 + shl edx,6 + movdqa XMMWORD PTR 160[esp],xmm7 + add edx,ebp + movdqa XMMWORD PTR 176[esp],xmm6 + add ebp,64 + mov DWORD PTR 192[esp],edi + mov DWORD PTR 196[esp],ebp + mov DWORD PTR 200[esp],edx + mov DWORD PTR 204[esp],esi + mov eax,DWORD PTR [edi] + mov ebx,DWORD PTR 4[edi] + mov ecx,DWORD PTR 8[edi] + mov edx,DWORD PTR 12[edi] + mov edi,DWORD PTR 16[edi] + mov esi,ebx + movdqu xmm0,XMMWORD PTR [ebp-64] + movdqu xmm1,XMMWORD PTR [ebp-48] + movdqu xmm2,XMMWORD PTR [ebp-32] + movdqu xmm3,XMMWORD PTR [ebp-16] +DB 102,15,56,0,198 +DB 102,15,56,0,206 +DB 102,15,56,0,214 + movdqa XMMWORD PTR 96[esp],xmm7 +DB 102,15,56,0,222 + paddd xmm0,xmm7 + paddd xmm1,xmm7 + paddd xmm2,xmm7 + movdqa XMMWORD PTR [esp],xmm0 + psubd xmm0,xmm7 + movdqa XMMWORD PTR 16[esp],xmm1 + psubd xmm1,xmm7 + movdqa XMMWORD PTR 32[esp],xmm2 + mov ebp,ecx + psubd xmm2,xmm7 + xor ebp,edx + pshufd xmm4,xmm0,238 + and esi,ebp + jmp $L006loop +ALIGN 16 +$L006loop: + ror ebx,2 + xor esi,edx + mov ebp,eax + punpcklqdq xmm4,xmm1 + movdqa xmm6,xmm3 + add edi,DWORD PTR [esp] + xor ebx,ecx + paddd xmm7,xmm3 + movdqa XMMWORD PTR 64[esp],xmm0 + rol eax,5 + add edi,esi + psrldq xmm6,4 + and ebp,ebx + xor ebx,ecx + pxor xmm4,xmm0 + add edi,eax + ror eax,7 + pxor xmm6,xmm2 + xor ebp,ecx + mov esi,edi + add edx,DWORD PTR 4[esp] + pxor xmm4,xmm6 + xor eax,ebx + rol edi,5 + movdqa XMMWORD PTR 48[esp],xmm7 + add edx,ebp + and esi,eax + movdqa xmm0,xmm4 + xor eax,ebx + add edx,edi + ror edi,7 + movdqa xmm6,xmm4 + xor esi,ebx + pslldq xmm0,12 + paddd xmm4,xmm4 + mov ebp,edx + add ecx,DWORD PTR 8[esp] + psrld xmm6,31 + xor edi,eax + rol edx,5 + movdqa xmm7,xmm0 + add ecx,esi + and ebp,edi + xor edi,eax + psrld xmm0,30 + add ecx,edx + ror edx,7 + por xmm4,xmm6 + xor ebp,eax + mov esi,ecx + add ebx,DWORD PTR 12[esp] + pslld xmm7,2 + xor edx,edi + rol ecx,5 + pxor xmm4,xmm0 + movdqa xmm0,XMMWORD PTR 96[esp] + add ebx,ebp + and esi,edx + pxor xmm4,xmm7 + pshufd xmm5,xmm1,238 + xor edx,edi + add ebx,ecx + ror ecx,7 + xor esi,edi + mov ebp,ebx + punpcklqdq xmm5,xmm2 + movdqa xmm7,xmm4 + add eax,DWORD PTR 16[esp] + xor ecx,edx + paddd xmm0,xmm4 + movdqa XMMWORD PTR 80[esp],xmm1 + rol ebx,5 + add eax,esi + psrldq xmm7,4 + and ebp,ecx + xor ecx,edx + pxor xmm5,xmm1 + add eax,ebx + ror ebx,7 + pxor xmm7,xmm3 + xor ebp,edx + mov esi,eax + add edi,DWORD PTR 20[esp] + pxor xmm5,xmm7 + xor ebx,ecx + rol eax,5 + movdqa XMMWORD PTR [esp],xmm0 + add edi,ebp + and esi,ebx + movdqa xmm1,xmm5 + xor ebx,ecx + add edi,eax + ror eax,7 + movdqa xmm7,xmm5 + xor esi,ecx + pslldq xmm1,12 + paddd xmm5,xmm5 + mov ebp,edi + add edx,DWORD PTR 24[esp] + psrld xmm7,31 + xor eax,ebx + rol edi,5 + movdqa xmm0,xmm1 + add edx,esi + and ebp,eax + xor eax,ebx + psrld xmm1,30 + add edx,edi + ror edi,7 + por xmm5,xmm7 + xor ebp,ebx + mov esi,edx + add ecx,DWORD PTR 28[esp] + pslld xmm0,2 + xor edi,eax + rol edx,5 + pxor xmm5,xmm1 + movdqa xmm1,XMMWORD PTR 112[esp] + add ecx,ebp + and esi,edi + pxor xmm5,xmm0 + pshufd xmm6,xmm2,238 + xor edi,eax + add ecx,edx + ror edx,7 + xor esi,eax + mov ebp,ecx + punpcklqdq xmm6,xmm3 + movdqa xmm0,xmm5 + add ebx,DWORD PTR 32[esp] + xor edx,edi + paddd xmm1,xmm5 + movdqa XMMWORD PTR 96[esp],xmm2 + rol ecx,5 + add ebx,esi + psrldq xmm0,4 + and ebp,edx + xor edx,edi + pxor xmm6,xmm2 + add ebx,ecx + ror ecx,7 + pxor xmm0,xmm4 + xor ebp,edi + mov esi,ebx + add eax,DWORD PTR 36[esp] + pxor xmm6,xmm0 + xor ecx,edx + rol ebx,5 + movdqa XMMWORD PTR 16[esp],xmm1 + add eax,ebp + and esi,ecx + movdqa xmm2,xmm6 + xor ecx,edx + add eax,ebx + ror ebx,7 + movdqa xmm0,xmm6 + xor esi,edx + pslldq xmm2,12 + paddd xmm6,xmm6 + mov ebp,eax + add edi,DWORD PTR 40[esp] + psrld xmm0,31 + xor ebx,ecx + rol eax,5 + movdqa xmm1,xmm2 + add edi,esi + and ebp,ebx + xor ebx,ecx + psrld xmm2,30 + add edi,eax + ror eax,7 + por xmm6,xmm0 + xor ebp,ecx + movdqa xmm0,XMMWORD PTR 64[esp] + mov esi,edi + add edx,DWORD PTR 44[esp] + pslld xmm1,2 + xor eax,ebx + rol edi,5 + pxor xmm6,xmm2 + movdqa xmm2,XMMWORD PTR 112[esp] + add edx,ebp + and esi,eax + pxor xmm6,xmm1 + pshufd xmm7,xmm3,238 + xor eax,ebx + add edx,edi + ror edi,7 + xor esi,ebx + mov ebp,edx + punpcklqdq xmm7,xmm4 + movdqa xmm1,xmm6 + add ecx,DWORD PTR 48[esp] + xor edi,eax + paddd xmm2,xmm6 + movdqa XMMWORD PTR 64[esp],xmm3 + rol edx,5 + add ecx,esi + psrldq xmm1,4 + and ebp,edi + xor edi,eax + pxor xmm7,xmm3 + add ecx,edx + ror edx,7 + pxor xmm1,xmm5 + xor ebp,eax + mov esi,ecx + add ebx,DWORD PTR 52[esp] + pxor xmm7,xmm1 + xor edx,edi + rol ecx,5 + movdqa XMMWORD PTR 32[esp],xmm2 + add ebx,ebp + and esi,edx + movdqa xmm3,xmm7 + xor edx,edi + add ebx,ecx + ror ecx,7 + movdqa xmm1,xmm7 + xor esi,edi + pslldq xmm3,12 + paddd xmm7,xmm7 + mov ebp,ebx + add eax,DWORD PTR 56[esp] + psrld xmm1,31 + xor ecx,edx + rol ebx,5 + movdqa xmm2,xmm3 + add eax,esi + and ebp,ecx + xor ecx,edx + psrld xmm3,30 + add eax,ebx + ror ebx,7 + por xmm7,xmm1 + xor ebp,edx + movdqa xmm1,XMMWORD PTR 80[esp] + mov esi,eax + add edi,DWORD PTR 60[esp] + pslld xmm2,2 + xor ebx,ecx + rol eax,5 + pxor xmm7,xmm3 + movdqa xmm3,XMMWORD PTR 112[esp] + add edi,ebp + and esi,ebx + pxor xmm7,xmm2 + pshufd xmm2,xmm6,238 + xor ebx,ecx + add edi,eax + ror eax,7 + pxor xmm0,xmm4 + punpcklqdq xmm2,xmm7 + xor esi,ecx + mov ebp,edi + add edx,DWORD PTR [esp] + pxor xmm0,xmm1 + movdqa XMMWORD PTR 80[esp],xmm4 + xor eax,ebx + rol edi,5 + movdqa xmm4,xmm3 + add edx,esi + paddd xmm3,xmm7 + and ebp,eax + pxor xmm0,xmm2 + xor eax,ebx + add edx,edi + ror edi,7 + xor ebp,ebx + movdqa xmm2,xmm0 + movdqa XMMWORD PTR 48[esp],xmm3 + mov esi,edx + add ecx,DWORD PTR 4[esp] + xor edi,eax + rol edx,5 + pslld xmm0,2 + add ecx,ebp + and esi,edi + psrld xmm2,30 + xor edi,eax + add ecx,edx + ror edx,7 + xor esi,eax + mov ebp,ecx + add ebx,DWORD PTR 8[esp] + xor edx,edi + rol ecx,5 + por xmm0,xmm2 + add ebx,esi + and ebp,edx + movdqa xmm2,XMMWORD PTR 96[esp] + xor edx,edi + add ebx,ecx + add eax,DWORD PTR 12[esp] + xor ebp,edi + mov esi,ebx + pshufd xmm3,xmm7,238 + rol ebx,5 + add eax,ebp + xor esi,edx + ror ecx,7 + add eax,ebx + add edi,DWORD PTR 16[esp] + pxor xmm1,xmm5 + punpcklqdq xmm3,xmm0 + xor esi,ecx + mov ebp,eax + rol eax,5 + pxor xmm1,xmm2 + movdqa XMMWORD PTR 96[esp],xmm5 + add edi,esi + xor ebp,ecx + movdqa xmm5,xmm4 + ror ebx,7 + paddd xmm4,xmm0 + add edi,eax + pxor xmm1,xmm3 + add edx,DWORD PTR 20[esp] + xor ebp,ebx + mov esi,edi + rol edi,5 + movdqa xmm3,xmm1 + movdqa XMMWORD PTR [esp],xmm4 + add edx,ebp + xor esi,ebx + ror eax,7 + add edx,edi + pslld xmm1,2 + add ecx,DWORD PTR 24[esp] + xor esi,eax + psrld xmm3,30 + mov ebp,edx + rol edx,5 + add ecx,esi + xor ebp,eax + ror edi,7 + add ecx,edx + por xmm1,xmm3 + add ebx,DWORD PTR 28[esp] + xor ebp,edi + movdqa xmm3,XMMWORD PTR 64[esp] + mov esi,ecx + rol ecx,5 + add ebx,ebp + xor esi,edi + ror edx,7 + pshufd xmm4,xmm0,238 + add ebx,ecx + add eax,DWORD PTR 32[esp] + pxor xmm2,xmm6 + punpcklqdq xmm4,xmm1 + xor esi,edx + mov ebp,ebx + rol ebx,5 + pxor xmm2,xmm3 + movdqa XMMWORD PTR 64[esp],xmm6 + add eax,esi + xor ebp,edx + movdqa xmm6,XMMWORD PTR 128[esp] + ror ecx,7 + paddd xmm5,xmm1 + add eax,ebx + pxor xmm2,xmm4 + add edi,DWORD PTR 36[esp] + xor ebp,ecx + mov esi,eax + rol eax,5 + movdqa xmm4,xmm2 + movdqa XMMWORD PTR 16[esp],xmm5 + add edi,ebp + xor esi,ecx + ror ebx,7 + add edi,eax + pslld xmm2,2 + add edx,DWORD PTR 40[esp] + xor esi,ebx + psrld xmm4,30 + mov ebp,edi + rol edi,5 + add edx,esi + xor ebp,ebx + ror eax,7 + add edx,edi + por xmm2,xmm4 + add ecx,DWORD PTR 44[esp] + xor ebp,eax + movdqa xmm4,XMMWORD PTR 80[esp] + mov esi,edx + rol edx,5 + add ecx,ebp + xor esi,eax + ror edi,7 + pshufd xmm5,xmm1,238 + add ecx,edx + add ebx,DWORD PTR 48[esp] + pxor xmm3,xmm7 + punpcklqdq xmm5,xmm2 + xor esi,edi + mov ebp,ecx + rol ecx,5 + pxor xmm3,xmm4 + movdqa XMMWORD PTR 80[esp],xmm7 + add ebx,esi + xor ebp,edi + movdqa xmm7,xmm6 + ror edx,7 + paddd xmm6,xmm2 + add ebx,ecx + pxor xmm3,xmm5 + add eax,DWORD PTR 52[esp] + xor ebp,edx + mov esi,ebx + rol ebx,5 + movdqa xmm5,xmm3 + movdqa XMMWORD PTR 32[esp],xmm6 + add eax,ebp + xor esi,edx + ror ecx,7 + add eax,ebx + pslld xmm3,2 + add edi,DWORD PTR 56[esp] + xor esi,ecx + psrld xmm5,30 + mov ebp,eax + rol eax,5 + add edi,esi + xor ebp,ecx + ror ebx,7 + add edi,eax + por xmm3,xmm5 + add edx,DWORD PTR 60[esp] + xor ebp,ebx + movdqa xmm5,XMMWORD PTR 96[esp] + mov esi,edi + rol edi,5 + add edx,ebp + xor esi,ebx + ror eax,7 + pshufd xmm6,xmm2,238 + add edx,edi + add ecx,DWORD PTR [esp] + pxor xmm4,xmm0 + punpcklqdq xmm6,xmm3 + xor esi,eax + mov ebp,edx + rol edx,5 + pxor xmm4,xmm5 + movdqa XMMWORD PTR 96[esp],xmm0 + add ecx,esi + xor ebp,eax + movdqa xmm0,xmm7 + ror edi,7 + paddd xmm7,xmm3 + add ecx,edx + pxor xmm4,xmm6 + add ebx,DWORD PTR 4[esp] + xor ebp,edi + mov esi,ecx + rol ecx,5 + movdqa xmm6,xmm4 + movdqa XMMWORD PTR 48[esp],xmm7 + add ebx,ebp + xor esi,edi + ror edx,7 + add ebx,ecx + pslld xmm4,2 + add eax,DWORD PTR 8[esp] + xor esi,edx + psrld xmm6,30 + mov ebp,ebx + rol ebx,5 + add eax,esi + xor ebp,edx + ror ecx,7 + add eax,ebx + por xmm4,xmm6 + add edi,DWORD PTR 12[esp] + xor ebp,ecx + movdqa xmm6,XMMWORD PTR 64[esp] + mov esi,eax + rol eax,5 + add edi,ebp + xor esi,ecx + ror ebx,7 + pshufd xmm7,xmm3,238 + add edi,eax + add edx,DWORD PTR 16[esp] + pxor xmm5,xmm1 + punpcklqdq xmm7,xmm4 + xor esi,ebx + mov ebp,edi + rol edi,5 + pxor xmm5,xmm6 + movdqa XMMWORD PTR 64[esp],xmm1 + add edx,esi + xor ebp,ebx + movdqa xmm1,xmm0 + ror eax,7 + paddd xmm0,xmm4 + add edx,edi + pxor xmm5,xmm7 + add ecx,DWORD PTR 20[esp] + xor ebp,eax + mov esi,edx + rol edx,5 + movdqa xmm7,xmm5 + movdqa XMMWORD PTR [esp],xmm0 + add ecx,ebp + xor esi,eax + ror edi,7 + add ecx,edx + pslld xmm5,2 + add ebx,DWORD PTR 24[esp] + xor esi,edi + psrld xmm7,30 + mov ebp,ecx + rol ecx,5 + add ebx,esi + xor ebp,edi + ror edx,7 + add ebx,ecx + por xmm5,xmm7 + add eax,DWORD PTR 28[esp] + movdqa xmm7,XMMWORD PTR 80[esp] + ror ecx,7 + mov esi,ebx + xor ebp,edx + rol ebx,5 + pshufd xmm0,xmm4,238 + add eax,ebp + xor esi,ecx + xor ecx,edx + add eax,ebx + add edi,DWORD PTR 32[esp] + pxor xmm6,xmm2 + punpcklqdq xmm0,xmm5 + and esi,ecx + xor ecx,edx + ror ebx,7 + pxor xmm6,xmm7 + movdqa XMMWORD PTR 80[esp],xmm2 + mov ebp,eax + xor esi,ecx + rol eax,5 + movdqa xmm2,xmm1 + add edi,esi + paddd xmm1,xmm5 + xor ebp,ebx + pxor xmm6,xmm0 + xor ebx,ecx + add edi,eax + add edx,DWORD PTR 36[esp] + and ebp,ebx + movdqa xmm0,xmm6 + movdqa XMMWORD PTR 16[esp],xmm1 + xor ebx,ecx + ror eax,7 + mov esi,edi + xor ebp,ebx + rol edi,5 + pslld xmm6,2 + add edx,ebp + xor esi,eax + psrld xmm0,30 + xor eax,ebx + add edx,edi + add ecx,DWORD PTR 40[esp] + and esi,eax + xor eax,ebx + ror edi,7 + por xmm6,xmm0 + mov ebp,edx + xor esi,eax + movdqa xmm0,XMMWORD PTR 96[esp] + rol edx,5 + add ecx,esi + xor ebp,edi + xor edi,eax + add ecx,edx + pshufd xmm1,xmm5,238 + add ebx,DWORD PTR 44[esp] + and ebp,edi + xor edi,eax + ror edx,7 + mov esi,ecx + xor ebp,edi + rol ecx,5 + add ebx,ebp + xor esi,edx + xor edx,edi + add ebx,ecx + add eax,DWORD PTR 48[esp] + pxor xmm7,xmm3 + punpcklqdq xmm1,xmm6 + and esi,edx + xor edx,edi + ror ecx,7 + pxor xmm7,xmm0 + movdqa XMMWORD PTR 96[esp],xmm3 + mov ebp,ebx + xor esi,edx + rol ebx,5 + movdqa xmm3,XMMWORD PTR 144[esp] + add eax,esi + paddd xmm2,xmm6 + xor ebp,ecx + pxor xmm7,xmm1 + xor ecx,edx + add eax,ebx + add edi,DWORD PTR 52[esp] + and ebp,ecx + movdqa xmm1,xmm7 + movdqa XMMWORD PTR 32[esp],xmm2 + xor ecx,edx + ror ebx,7 + mov esi,eax + xor ebp,ecx + rol eax,5 + pslld xmm7,2 + add edi,ebp + xor esi,ebx + psrld xmm1,30 + xor ebx,ecx + add edi,eax + add edx,DWORD PTR 56[esp] + and esi,ebx + xor ebx,ecx + ror eax,7 + por xmm7,xmm1 + mov ebp,edi + xor esi,ebx + movdqa xmm1,XMMWORD PTR 64[esp] + rol edi,5 + add edx,esi + xor ebp,eax + xor eax,ebx + add edx,edi + pshufd xmm2,xmm6,238 + add ecx,DWORD PTR 60[esp] + and ebp,eax + xor eax,ebx + ror edi,7 + mov esi,edx + xor ebp,eax + rol edx,5 + add ecx,ebp + xor esi,edi + xor edi,eax + add ecx,edx + add ebx,DWORD PTR [esp] + pxor xmm0,xmm4 + punpcklqdq xmm2,xmm7 + and esi,edi + xor edi,eax + ror edx,7 + pxor xmm0,xmm1 + movdqa XMMWORD PTR 64[esp],xmm4 + mov ebp,ecx + xor esi,edi + rol ecx,5 + movdqa xmm4,xmm3 + add ebx,esi + paddd xmm3,xmm7 + xor ebp,edx + pxor xmm0,xmm2 + xor edx,edi + add ebx,ecx + add eax,DWORD PTR 4[esp] + and ebp,edx + movdqa xmm2,xmm0 + movdqa XMMWORD PTR 48[esp],xmm3 + xor edx,edi + ror ecx,7 + mov esi,ebx + xor ebp,edx + rol ebx,5 + pslld xmm0,2 + add eax,ebp + xor esi,ecx + psrld xmm2,30 + xor ecx,edx + add eax,ebx + add edi,DWORD PTR 8[esp] + and esi,ecx + xor ecx,edx + ror ebx,7 + por xmm0,xmm2 + mov ebp,eax + xor esi,ecx + movdqa xmm2,XMMWORD PTR 80[esp] + rol eax,5 + add edi,esi + xor ebp,ebx + xor ebx,ecx + add edi,eax + pshufd xmm3,xmm7,238 + add edx,DWORD PTR 12[esp] + and ebp,ebx + xor ebx,ecx + ror eax,7 + mov esi,edi + xor ebp,ebx + rol edi,5 + add edx,ebp + xor esi,eax + xor eax,ebx + add edx,edi + add ecx,DWORD PTR 16[esp] + pxor xmm1,xmm5 + punpcklqdq xmm3,xmm0 + and esi,eax + xor eax,ebx + ror edi,7 + pxor xmm1,xmm2 + movdqa XMMWORD PTR 80[esp],xmm5 + mov ebp,edx + xor esi,eax + rol edx,5 + movdqa xmm5,xmm4 + add ecx,esi + paddd xmm4,xmm0 + xor ebp,edi + pxor xmm1,xmm3 + xor edi,eax + add ecx,edx + add ebx,DWORD PTR 20[esp] + and ebp,edi + movdqa xmm3,xmm1 + movdqa XMMWORD PTR [esp],xmm4 + xor edi,eax + ror edx,7 + mov esi,ecx + xor ebp,edi + rol ecx,5 + pslld xmm1,2 + add ebx,ebp + xor esi,edx + psrld xmm3,30 + xor edx,edi + add ebx,ecx + add eax,DWORD PTR 24[esp] + and esi,edx + xor edx,edi + ror ecx,7 + por xmm1,xmm3 + mov ebp,ebx + xor esi,edx + movdqa xmm3,XMMWORD PTR 96[esp] + rol ebx,5 + add eax,esi + xor ebp,ecx + xor ecx,edx + add eax,ebx + pshufd xmm4,xmm0,238 + add edi,DWORD PTR 28[esp] + and ebp,ecx + xor ecx,edx + ror ebx,7 + mov esi,eax + xor ebp,ecx + rol eax,5 + add edi,ebp + xor esi,ebx + xor ebx,ecx + add edi,eax + add edx,DWORD PTR 32[esp] + pxor xmm2,xmm6 + punpcklqdq xmm4,xmm1 + and esi,ebx + xor ebx,ecx + ror eax,7 + pxor xmm2,xmm3 + movdqa XMMWORD PTR 96[esp],xmm6 + mov ebp,edi + xor esi,ebx + rol edi,5 + movdqa xmm6,xmm5 + add edx,esi + paddd xmm5,xmm1 + xor ebp,eax + pxor xmm2,xmm4 + xor eax,ebx + add edx,edi + add ecx,DWORD PTR 36[esp] + and ebp,eax + movdqa xmm4,xmm2 + movdqa XMMWORD PTR 16[esp],xmm5 + xor eax,ebx + ror edi,7 + mov esi,edx + xor ebp,eax + rol edx,5 + pslld xmm2,2 + add ecx,ebp + xor esi,edi + psrld xmm4,30 + xor edi,eax + add ecx,edx + add ebx,DWORD PTR 40[esp] + and esi,edi + xor edi,eax + ror edx,7 + por xmm2,xmm4 + mov ebp,ecx + xor esi,edi + movdqa xmm4,XMMWORD PTR 64[esp] + rol ecx,5 + add ebx,esi + xor ebp,edx + xor edx,edi + add ebx,ecx + pshufd xmm5,xmm1,238 + add eax,DWORD PTR 44[esp] + and ebp,edx + xor edx,edi + ror ecx,7 + mov esi,ebx + xor ebp,edx + rol ebx,5 + add eax,ebp + xor esi,edx + add eax,ebx + add edi,DWORD PTR 48[esp] + pxor xmm3,xmm7 + punpcklqdq xmm5,xmm2 + xor esi,ecx + mov ebp,eax + rol eax,5 + pxor xmm3,xmm4 + movdqa XMMWORD PTR 64[esp],xmm7 + add edi,esi + xor ebp,ecx + movdqa xmm7,xmm6 + ror ebx,7 + paddd xmm6,xmm2 + add edi,eax + pxor xmm3,xmm5 + add edx,DWORD PTR 52[esp] + xor ebp,ebx + mov esi,edi + rol edi,5 + movdqa xmm5,xmm3 + movdqa XMMWORD PTR 32[esp],xmm6 + add edx,ebp + xor esi,ebx + ror eax,7 + add edx,edi + pslld xmm3,2 + add ecx,DWORD PTR 56[esp] + xor esi,eax + psrld xmm5,30 + mov ebp,edx + rol edx,5 + add ecx,esi + xor ebp,eax + ror edi,7 + add ecx,edx + por xmm3,xmm5 + add ebx,DWORD PTR 60[esp] + xor ebp,edi + mov esi,ecx + rol ecx,5 + add ebx,ebp + xor esi,edi + ror edx,7 + add ebx,ecx + add eax,DWORD PTR [esp] + xor esi,edx + mov ebp,ebx + rol ebx,5 + add eax,esi + xor ebp,edx + ror ecx,7 + paddd xmm7,xmm3 + add eax,ebx + add edi,DWORD PTR 4[esp] + xor ebp,ecx + mov esi,eax + movdqa XMMWORD PTR 48[esp],xmm7 + rol eax,5 + add edi,ebp + xor esi,ecx + ror ebx,7 + add edi,eax + add edx,DWORD PTR 8[esp] + xor esi,ebx + mov ebp,edi + rol edi,5 + add edx,esi + xor ebp,ebx + ror eax,7 + add edx,edi + add ecx,DWORD PTR 12[esp] + xor ebp,eax + mov esi,edx + rol edx,5 + add ecx,ebp + xor esi,eax + ror edi,7 + add ecx,edx + mov ebp,DWORD PTR 196[esp] + cmp ebp,DWORD PTR 200[esp] + je $L007done + movdqa xmm7,XMMWORD PTR 160[esp] + movdqa xmm6,XMMWORD PTR 176[esp] + movdqu xmm0,XMMWORD PTR [ebp] + movdqu xmm1,XMMWORD PTR 16[ebp] + movdqu xmm2,XMMWORD PTR 32[ebp] + movdqu xmm3,XMMWORD PTR 48[ebp] + add ebp,64 +DB 102,15,56,0,198 + mov DWORD PTR 196[esp],ebp + movdqa XMMWORD PTR 96[esp],xmm7 + add ebx,DWORD PTR 16[esp] + xor esi,edi + mov ebp,ecx + rol ecx,5 + add ebx,esi + xor ebp,edi + ror edx,7 +DB 102,15,56,0,206 + add ebx,ecx + add eax,DWORD PTR 20[esp] + xor ebp,edx + mov esi,ebx + paddd xmm0,xmm7 + rol ebx,5 + add eax,ebp + xor esi,edx + ror ecx,7 + movdqa XMMWORD PTR [esp],xmm0 + add eax,ebx + add edi,DWORD PTR 24[esp] + xor esi,ecx + mov ebp,eax + psubd xmm0,xmm7 + rol eax,5 + add edi,esi + xor ebp,ecx + ror ebx,7 + add edi,eax + add edx,DWORD PTR 28[esp] + xor ebp,ebx + mov esi,edi + rol edi,5 + add edx,ebp + xor esi,ebx + ror eax,7 + add edx,edi + add ecx,DWORD PTR 32[esp] + xor esi,eax + mov ebp,edx + rol edx,5 + add ecx,esi + xor ebp,eax + ror edi,7 +DB 102,15,56,0,214 + add ecx,edx + add ebx,DWORD PTR 36[esp] + xor ebp,edi + mov esi,ecx + paddd xmm1,xmm7 + rol ecx,5 + add ebx,ebp + xor esi,edi + ror edx,7 + movdqa XMMWORD PTR 16[esp],xmm1 + add ebx,ecx + add eax,DWORD PTR 40[esp] + xor esi,edx + mov ebp,ebx + psubd xmm1,xmm7 + rol ebx,5 + add eax,esi + xor ebp,edx + ror ecx,7 + add eax,ebx + add edi,DWORD PTR 44[esp] + xor ebp,ecx + mov esi,eax + rol eax,5 + add edi,ebp + xor esi,ecx + ror ebx,7 + add edi,eax + add edx,DWORD PTR 48[esp] + xor esi,ebx + mov ebp,edi + rol edi,5 + add edx,esi + xor ebp,ebx + ror eax,7 +DB 102,15,56,0,222 + add edx,edi + add ecx,DWORD PTR 52[esp] + xor ebp,eax + mov esi,edx + paddd xmm2,xmm7 + rol edx,5 + add ecx,ebp + xor esi,eax + ror edi,7 + movdqa XMMWORD PTR 32[esp],xmm2 + add ecx,edx + add ebx,DWORD PTR 56[esp] + xor esi,edi + mov ebp,ecx + psubd xmm2,xmm7 + rol ecx,5 + add ebx,esi + xor ebp,edi + ror edx,7 + add ebx,ecx + add eax,DWORD PTR 60[esp] + xor ebp,edx + mov esi,ebx + rol ebx,5 + add eax,ebp + ror ecx,7 + add eax,ebx + mov ebp,DWORD PTR 192[esp] + add eax,DWORD PTR [ebp] + add esi,DWORD PTR 4[ebp] + add ecx,DWORD PTR 8[ebp] + mov DWORD PTR [ebp],eax + add edx,DWORD PTR 12[ebp] + mov DWORD PTR 4[ebp],esi + add edi,DWORD PTR 16[ebp] + mov DWORD PTR 8[ebp],ecx + mov ebx,ecx + mov DWORD PTR 12[ebp],edx + xor ebx,edx + mov DWORD PTR 16[ebp],edi + mov ebp,esi + pshufd xmm4,xmm0,238 + and esi,ebx + mov ebx,ebp + jmp $L006loop +ALIGN 16 +$L007done: + add ebx,DWORD PTR 16[esp] + xor esi,edi + mov ebp,ecx + rol ecx,5 + add ebx,esi + xor ebp,edi + ror edx,7 + add ebx,ecx + add eax,DWORD PTR 20[esp] + xor ebp,edx + mov esi,ebx + rol ebx,5 + add eax,ebp + xor esi,edx + ror ecx,7 + add eax,ebx + add edi,DWORD PTR 24[esp] + xor esi,ecx + mov ebp,eax + rol eax,5 + add edi,esi + xor ebp,ecx + ror ebx,7 + add edi,eax + add edx,DWORD PTR 28[esp] + xor ebp,ebx + mov esi,edi + rol edi,5 + add edx,ebp + xor esi,ebx + ror eax,7 + add edx,edi + add ecx,DWORD PTR 32[esp] + xor esi,eax + mov ebp,edx + rol edx,5 + add ecx,esi + xor ebp,eax + ror edi,7 + add ecx,edx + add ebx,DWORD PTR 36[esp] + xor ebp,edi + mov esi,ecx + rol ecx,5 + add ebx,ebp + xor esi,edi + ror edx,7 + add ebx,ecx + add eax,DWORD PTR 40[esp] + xor esi,edx + mov ebp,ebx + rol ebx,5 + add eax,esi + xor ebp,edx + ror ecx,7 + add eax,ebx + add edi,DWORD PTR 44[esp] + xor ebp,ecx + mov esi,eax + rol eax,5 + add edi,ebp + xor esi,ecx + ror ebx,7 + add edi,eax + add edx,DWORD PTR 48[esp] + xor esi,ebx + mov ebp,edi + rol edi,5 + add edx,esi + xor ebp,ebx + ror eax,7 + add edx,edi + add ecx,DWORD PTR 52[esp] + xor ebp,eax + mov esi,edx + rol edx,5 + add ecx,ebp + xor esi,eax + ror edi,7 + add ecx,edx + add ebx,DWORD PTR 56[esp] + xor esi,edi + mov ebp,ecx + rol ecx,5 + add ebx,esi + xor ebp,edi + ror edx,7 + add ebx,ecx + add eax,DWORD PTR 60[esp] + xor ebp,edx + mov esi,ebx + rol ebx,5 + add eax,ebp + ror ecx,7 + add eax,ebx + mov ebp,DWORD PTR 192[esp] + add eax,DWORD PTR [ebp] + mov esp,DWORD PTR 204[esp] + add esi,DWORD PTR 4[ebp] + add ecx,DWORD PTR 8[ebp] + mov DWORD PTR [ebp],eax + add edx,DWORD PTR 12[ebp] + mov DWORD PTR 4[ebp],esi + add edi,DWORD PTR 16[ebp] + mov DWORD PTR 8[ebp],ecx + mov DWORD PTR 12[ebp],edx + mov DWORD PTR 16[ebp],edi + pop edi + pop esi + pop ebx + pop ebp + ret +__sha1_block_data_order_ssse3 ENDP +ALIGN 16 +__sha1_block_data_order_avx PROC PRIVATE + push ebp + push ebx + push esi + push edi + call $L008pic_point +$L008pic_point: + pop ebp + lea ebp,DWORD PTR ($LK_XX_XX-$L008pic_point)[ebp] +$Lavx_shortcut:: + vzeroall + vmovdqa xmm7,XMMWORD PTR [ebp] + vmovdqa xmm0,XMMWORD PTR 16[ebp] + vmovdqa xmm1,XMMWORD PTR 32[ebp] + vmovdqa xmm2,XMMWORD PTR 48[ebp] + vmovdqa xmm6,XMMWORD PTR 64[ebp] + mov edi,DWORD PTR 20[esp] + mov ebp,DWORD PTR 24[esp] + mov edx,DWORD PTR 28[esp] + mov esi,esp + sub esp,208 + and esp,-64 + vmovdqa XMMWORD PTR 112[esp],xmm0 + vmovdqa XMMWORD PTR 128[esp],xmm1 + vmovdqa XMMWORD PTR 144[esp],xmm2 + shl edx,6 + vmovdqa XMMWORD PTR 160[esp],xmm7 + add edx,ebp + vmovdqa XMMWORD PTR 176[esp],xmm6 + add ebp,64 + mov DWORD PTR 192[esp],edi + mov DWORD PTR 196[esp],ebp + mov DWORD PTR 200[esp],edx + mov DWORD PTR 204[esp],esi + mov eax,DWORD PTR [edi] + mov ebx,DWORD PTR 4[edi] + mov ecx,DWORD PTR 8[edi] + mov edx,DWORD PTR 12[edi] + mov edi,DWORD PTR 16[edi] + mov esi,ebx + vmovdqu xmm0,XMMWORD PTR [ebp-64] + vmovdqu xmm1,XMMWORD PTR [ebp-48] + vmovdqu xmm2,XMMWORD PTR [ebp-32] + vmovdqu xmm3,XMMWORD PTR [ebp-16] + vpshufb xmm0,xmm0,xmm6 + vpshufb xmm1,xmm1,xmm6 + vpshufb xmm2,xmm2,xmm6 + vmovdqa XMMWORD PTR 96[esp],xmm7 + vpshufb xmm3,xmm3,xmm6 + vpaddd xmm4,xmm0,xmm7 + vpaddd xmm5,xmm1,xmm7 + vpaddd xmm6,xmm2,xmm7 + vmovdqa XMMWORD PTR [esp],xmm4 + mov ebp,ecx + vmovdqa XMMWORD PTR 16[esp],xmm5 + xor ebp,edx + vmovdqa XMMWORD PTR 32[esp],xmm6 + and esi,ebp + jmp $L009loop +ALIGN 16 +$L009loop: + shrd ebx,ebx,2 + xor esi,edx + vpalignr xmm4,xmm1,xmm0,8 + mov ebp,eax + add edi,DWORD PTR [esp] + vpaddd xmm7,xmm7,xmm3 + vmovdqa XMMWORD PTR 64[esp],xmm0 + xor ebx,ecx + shld eax,eax,5 + vpsrldq xmm6,xmm3,4 + add edi,esi + and ebp,ebx + vpxor xmm4,xmm4,xmm0 + xor ebx,ecx + add edi,eax + vpxor xmm6,xmm6,xmm2 + shrd eax,eax,7 + xor ebp,ecx + vmovdqa XMMWORD PTR 48[esp],xmm7 + mov esi,edi + add edx,DWORD PTR 4[esp] + vpxor xmm4,xmm4,xmm6 + xor eax,ebx + shld edi,edi,5 + add edx,ebp + and esi,eax + vpsrld xmm6,xmm4,31 + xor eax,ebx + add edx,edi + shrd edi,edi,7 + xor esi,ebx + vpslldq xmm0,xmm4,12 + vpaddd xmm4,xmm4,xmm4 + mov ebp,edx + add ecx,DWORD PTR 8[esp] + xor edi,eax + shld edx,edx,5 + vpsrld xmm7,xmm0,30 + vpor xmm4,xmm4,xmm6 + add ecx,esi + and ebp,edi + xor edi,eax + add ecx,edx + vpslld xmm0,xmm0,2 + shrd edx,edx,7 + xor ebp,eax + vpxor xmm4,xmm4,xmm7 + mov esi,ecx + add ebx,DWORD PTR 12[esp] + xor edx,edi + shld ecx,ecx,5 + vpxor xmm4,xmm4,xmm0 + add ebx,ebp + and esi,edx + vmovdqa xmm0,XMMWORD PTR 96[esp] + xor edx,edi + add ebx,ecx + shrd ecx,ecx,7 + xor esi,edi + vpalignr xmm5,xmm2,xmm1,8 + mov ebp,ebx + add eax,DWORD PTR 16[esp] + vpaddd xmm0,xmm0,xmm4 + vmovdqa XMMWORD PTR 80[esp],xmm1 + xor ecx,edx + shld ebx,ebx,5 + vpsrldq xmm7,xmm4,4 + add eax,esi + and ebp,ecx + vpxor xmm5,xmm5,xmm1 + xor ecx,edx + add eax,ebx + vpxor xmm7,xmm7,xmm3 + shrd ebx,ebx,7 + xor ebp,edx + vmovdqa XMMWORD PTR [esp],xmm0 + mov esi,eax + add edi,DWORD PTR 20[esp] + vpxor xmm5,xmm5,xmm7 + xor ebx,ecx + shld eax,eax,5 + add edi,ebp + and esi,ebx + vpsrld xmm7,xmm5,31 + xor ebx,ecx + add edi,eax + shrd eax,eax,7 + xor esi,ecx + vpslldq xmm1,xmm5,12 + vpaddd xmm5,xmm5,xmm5 + mov ebp,edi + add edx,DWORD PTR 24[esp] + xor eax,ebx + shld edi,edi,5 + vpsrld xmm0,xmm1,30 + vpor xmm5,xmm5,xmm7 + add edx,esi + and ebp,eax + xor eax,ebx + add edx,edi + vpslld xmm1,xmm1,2 + shrd edi,edi,7 + xor ebp,ebx + vpxor xmm5,xmm5,xmm0 + mov esi,edx + add ecx,DWORD PTR 28[esp] + xor edi,eax + shld edx,edx,5 + vpxor xmm5,xmm5,xmm1 + add ecx,ebp + and esi,edi + vmovdqa xmm1,XMMWORD PTR 112[esp] + xor edi,eax + add ecx,edx + shrd edx,edx,7 + xor esi,eax + vpalignr xmm6,xmm3,xmm2,8 + mov ebp,ecx + add ebx,DWORD PTR 32[esp] + vpaddd xmm1,xmm1,xmm5 + vmovdqa XMMWORD PTR 96[esp],xmm2 + xor edx,edi + shld ecx,ecx,5 + vpsrldq xmm0,xmm5,4 + add ebx,esi + and ebp,edx + vpxor xmm6,xmm6,xmm2 + xor edx,edi + add ebx,ecx + vpxor xmm0,xmm0,xmm4 + shrd ecx,ecx,7 + xor ebp,edi + vmovdqa XMMWORD PTR 16[esp],xmm1 + mov esi,ebx + add eax,DWORD PTR 36[esp] + vpxor xmm6,xmm6,xmm0 + xor ecx,edx + shld ebx,ebx,5 + add eax,ebp + and esi,ecx + vpsrld xmm0,xmm6,31 + xor ecx,edx + add eax,ebx + shrd ebx,ebx,7 + xor esi,edx + vpslldq xmm2,xmm6,12 + vpaddd xmm6,xmm6,xmm6 + mov ebp,eax + add edi,DWORD PTR 40[esp] + xor ebx,ecx + shld eax,eax,5 + vpsrld xmm1,xmm2,30 + vpor xmm6,xmm6,xmm0 + add edi,esi + and ebp,ebx + xor ebx,ecx + add edi,eax + vpslld xmm2,xmm2,2 + vmovdqa xmm0,XMMWORD PTR 64[esp] + shrd eax,eax,7 + xor ebp,ecx + vpxor xmm6,xmm6,xmm1 + mov esi,edi + add edx,DWORD PTR 44[esp] + xor eax,ebx + shld edi,edi,5 + vpxor xmm6,xmm6,xmm2 + add edx,ebp + and esi,eax + vmovdqa xmm2,XMMWORD PTR 112[esp] + xor eax,ebx + add edx,edi + shrd edi,edi,7 + xor esi,ebx + vpalignr xmm7,xmm4,xmm3,8 + mov ebp,edx + add ecx,DWORD PTR 48[esp] + vpaddd xmm2,xmm2,xmm6 + vmovdqa XMMWORD PTR 64[esp],xmm3 + xor edi,eax + shld edx,edx,5 + vpsrldq xmm1,xmm6,4 + add ecx,esi + and ebp,edi + vpxor xmm7,xmm7,xmm3 + xor edi,eax + add ecx,edx + vpxor xmm1,xmm1,xmm5 + shrd edx,edx,7 + xor ebp,eax + vmovdqa XMMWORD PTR 32[esp],xmm2 + mov esi,ecx + add ebx,DWORD PTR 52[esp] + vpxor xmm7,xmm7,xmm1 + xor edx,edi + shld ecx,ecx,5 + add ebx,ebp + and esi,edx + vpsrld xmm1,xmm7,31 + xor edx,edi + add ebx,ecx + shrd ecx,ecx,7 + xor esi,edi + vpslldq xmm3,xmm7,12 + vpaddd xmm7,xmm7,xmm7 + mov ebp,ebx + add eax,DWORD PTR 56[esp] + xor ecx,edx + shld ebx,ebx,5 + vpsrld xmm2,xmm3,30 + vpor xmm7,xmm7,xmm1 + add eax,esi + and ebp,ecx + xor ecx,edx + add eax,ebx + vpslld xmm3,xmm3,2 + vmovdqa xmm1,XMMWORD PTR 80[esp] + shrd ebx,ebx,7 + xor ebp,edx + vpxor xmm7,xmm7,xmm2 + mov esi,eax + add edi,DWORD PTR 60[esp] + xor ebx,ecx + shld eax,eax,5 + vpxor xmm7,xmm7,xmm3 + add edi,ebp + and esi,ebx + vmovdqa xmm3,XMMWORD PTR 112[esp] + xor ebx,ecx + add edi,eax + vpalignr xmm2,xmm7,xmm6,8 + vpxor xmm0,xmm0,xmm4 + shrd eax,eax,7 + xor esi,ecx + mov ebp,edi + add edx,DWORD PTR [esp] + vpxor xmm0,xmm0,xmm1 + vmovdqa XMMWORD PTR 80[esp],xmm4 + xor eax,ebx + shld edi,edi,5 + vmovdqa xmm4,xmm3 + vpaddd xmm3,xmm3,xmm7 + add edx,esi + and ebp,eax + vpxor xmm0,xmm0,xmm2 + xor eax,ebx + add edx,edi + shrd edi,edi,7 + xor ebp,ebx + vpsrld xmm2,xmm0,30 + vmovdqa XMMWORD PTR 48[esp],xmm3 + mov esi,edx + add ecx,DWORD PTR 4[esp] + xor edi,eax + shld edx,edx,5 + vpslld xmm0,xmm0,2 + add ecx,ebp + and esi,edi + xor edi,eax + add ecx,edx + shrd edx,edx,7 + xor esi,eax + mov ebp,ecx + add ebx,DWORD PTR 8[esp] + vpor xmm0,xmm0,xmm2 + xor edx,edi + shld ecx,ecx,5 + vmovdqa xmm2,XMMWORD PTR 96[esp] + add ebx,esi + and ebp,edx + xor edx,edi + add ebx,ecx + add eax,DWORD PTR 12[esp] + xor ebp,edi + mov esi,ebx + shld ebx,ebx,5 + add eax,ebp + xor esi,edx + shrd ecx,ecx,7 + add eax,ebx + vpalignr xmm3,xmm0,xmm7,8 + vpxor xmm1,xmm1,xmm5 + add edi,DWORD PTR 16[esp] + xor esi,ecx + mov ebp,eax + shld eax,eax,5 + vpxor xmm1,xmm1,xmm2 + vmovdqa XMMWORD PTR 96[esp],xmm5 + add edi,esi + xor ebp,ecx + vmovdqa xmm5,xmm4 + vpaddd xmm4,xmm4,xmm0 + shrd ebx,ebx,7 + add edi,eax + vpxor xmm1,xmm1,xmm3 + add edx,DWORD PTR 20[esp] + xor ebp,ebx + mov esi,edi + shld edi,edi,5 + vpsrld xmm3,xmm1,30 + vmovdqa XMMWORD PTR [esp],xmm4 + add edx,ebp + xor esi,ebx + shrd eax,eax,7 + add edx,edi + vpslld xmm1,xmm1,2 + add ecx,DWORD PTR 24[esp] + xor esi,eax + mov ebp,edx + shld edx,edx,5 + add ecx,esi + xor ebp,eax + shrd edi,edi,7 + add ecx,edx + vpor xmm1,xmm1,xmm3 + add ebx,DWORD PTR 28[esp] + xor ebp,edi + vmovdqa xmm3,XMMWORD PTR 64[esp] + mov esi,ecx + shld ecx,ecx,5 + add ebx,ebp + xor esi,edi + shrd edx,edx,7 + add ebx,ecx + vpalignr xmm4,xmm1,xmm0,8 + vpxor xmm2,xmm2,xmm6 + add eax,DWORD PTR 32[esp] + xor esi,edx + mov ebp,ebx + shld ebx,ebx,5 + vpxor xmm2,xmm2,xmm3 + vmovdqa XMMWORD PTR 64[esp],xmm6 + add eax,esi + xor ebp,edx + vmovdqa xmm6,XMMWORD PTR 128[esp] + vpaddd xmm5,xmm5,xmm1 + shrd ecx,ecx,7 + add eax,ebx + vpxor xmm2,xmm2,xmm4 + add edi,DWORD PTR 36[esp] + xor ebp,ecx + mov esi,eax + shld eax,eax,5 + vpsrld xmm4,xmm2,30 + vmovdqa XMMWORD PTR 16[esp],xmm5 + add edi,ebp + xor esi,ecx + shrd ebx,ebx,7 + add edi,eax + vpslld xmm2,xmm2,2 + add edx,DWORD PTR 40[esp] + xor esi,ebx + mov ebp,edi + shld edi,edi,5 + add edx,esi + xor ebp,ebx + shrd eax,eax,7 + add edx,edi + vpor xmm2,xmm2,xmm4 + add ecx,DWORD PTR 44[esp] + xor ebp,eax + vmovdqa xmm4,XMMWORD PTR 80[esp] + mov esi,edx + shld edx,edx,5 + add ecx,ebp + xor esi,eax + shrd edi,edi,7 + add ecx,edx + vpalignr xmm5,xmm2,xmm1,8 + vpxor xmm3,xmm3,xmm7 + add ebx,DWORD PTR 48[esp] + xor esi,edi + mov ebp,ecx + shld ecx,ecx,5 + vpxor xmm3,xmm3,xmm4 + vmovdqa XMMWORD PTR 80[esp],xmm7 + add ebx,esi + xor ebp,edi + vmovdqa xmm7,xmm6 + vpaddd xmm6,xmm6,xmm2 + shrd edx,edx,7 + add ebx,ecx + vpxor xmm3,xmm3,xmm5 + add eax,DWORD PTR 52[esp] + xor ebp,edx + mov esi,ebx + shld ebx,ebx,5 + vpsrld xmm5,xmm3,30 + vmovdqa XMMWORD PTR 32[esp],xmm6 + add eax,ebp + xor esi,edx + shrd ecx,ecx,7 + add eax,ebx + vpslld xmm3,xmm3,2 + add edi,DWORD PTR 56[esp] + xor esi,ecx + mov ebp,eax + shld eax,eax,5 + add edi,esi + xor ebp,ecx + shrd ebx,ebx,7 + add edi,eax + vpor xmm3,xmm3,xmm5 + add edx,DWORD PTR 60[esp] + xor ebp,ebx + vmovdqa xmm5,XMMWORD PTR 96[esp] + mov esi,edi + shld edi,edi,5 + add edx,ebp + xor esi,ebx + shrd eax,eax,7 + add edx,edi + vpalignr xmm6,xmm3,xmm2,8 + vpxor xmm4,xmm4,xmm0 + add ecx,DWORD PTR [esp] + xor esi,eax + mov ebp,edx + shld edx,edx,5 + vpxor xmm4,xmm4,xmm5 + vmovdqa XMMWORD PTR 96[esp],xmm0 + add ecx,esi + xor ebp,eax + vmovdqa xmm0,xmm7 + vpaddd xmm7,xmm7,xmm3 + shrd edi,edi,7 + add ecx,edx + vpxor xmm4,xmm4,xmm6 + add ebx,DWORD PTR 4[esp] + xor ebp,edi + mov esi,ecx + shld ecx,ecx,5 + vpsrld xmm6,xmm4,30 + vmovdqa XMMWORD PTR 48[esp],xmm7 + add ebx,ebp + xor esi,edi + shrd edx,edx,7 + add ebx,ecx + vpslld xmm4,xmm4,2 + add eax,DWORD PTR 8[esp] + xor esi,edx + mov ebp,ebx + shld ebx,ebx,5 + add eax,esi + xor ebp,edx + shrd ecx,ecx,7 + add eax,ebx + vpor xmm4,xmm4,xmm6 + add edi,DWORD PTR 12[esp] + xor ebp,ecx + vmovdqa xmm6,XMMWORD PTR 64[esp] + mov esi,eax + shld eax,eax,5 + add edi,ebp + xor esi,ecx + shrd ebx,ebx,7 + add edi,eax + vpalignr xmm7,xmm4,xmm3,8 + vpxor xmm5,xmm5,xmm1 + add edx,DWORD PTR 16[esp] + xor esi,ebx + mov ebp,edi + shld edi,edi,5 + vpxor xmm5,xmm5,xmm6 + vmovdqa XMMWORD PTR 64[esp],xmm1 + add edx,esi + xor ebp,ebx + vmovdqa xmm1,xmm0 + vpaddd xmm0,xmm0,xmm4 + shrd eax,eax,7 + add edx,edi + vpxor xmm5,xmm5,xmm7 + add ecx,DWORD PTR 20[esp] + xor ebp,eax + mov esi,edx + shld edx,edx,5 + vpsrld xmm7,xmm5,30 + vmovdqa XMMWORD PTR [esp],xmm0 + add ecx,ebp + xor esi,eax + shrd edi,edi,7 + add ecx,edx + vpslld xmm5,xmm5,2 + add ebx,DWORD PTR 24[esp] + xor esi,edi + mov ebp,ecx + shld ecx,ecx,5 + add ebx,esi + xor ebp,edi + shrd edx,edx,7 + add ebx,ecx + vpor xmm5,xmm5,xmm7 + add eax,DWORD PTR 28[esp] + vmovdqa xmm7,XMMWORD PTR 80[esp] + shrd ecx,ecx,7 + mov esi,ebx + xor ebp,edx + shld ebx,ebx,5 + add eax,ebp + xor esi,ecx + xor ecx,edx + add eax,ebx + vpalignr xmm0,xmm5,xmm4,8 + vpxor xmm6,xmm6,xmm2 + add edi,DWORD PTR 32[esp] + and esi,ecx + xor ecx,edx + shrd ebx,ebx,7 + vpxor xmm6,xmm6,xmm7 + vmovdqa XMMWORD PTR 80[esp],xmm2 + mov ebp,eax + xor esi,ecx + vmovdqa xmm2,xmm1 + vpaddd xmm1,xmm1,xmm5 + shld eax,eax,5 + add edi,esi + vpxor xmm6,xmm6,xmm0 + xor ebp,ebx + xor ebx,ecx + add edi,eax + add edx,DWORD PTR 36[esp] + vpsrld xmm0,xmm6,30 + vmovdqa XMMWORD PTR 16[esp],xmm1 + and ebp,ebx + xor ebx,ecx + shrd eax,eax,7 + mov esi,edi + vpslld xmm6,xmm6,2 + xor ebp,ebx + shld edi,edi,5 + add edx,ebp + xor esi,eax + xor eax,ebx + add edx,edi + add ecx,DWORD PTR 40[esp] + and esi,eax + vpor xmm6,xmm6,xmm0 + xor eax,ebx + shrd edi,edi,7 + vmovdqa xmm0,XMMWORD PTR 96[esp] + mov ebp,edx + xor esi,eax + shld edx,edx,5 + add ecx,esi + xor ebp,edi + xor edi,eax + add ecx,edx + add ebx,DWORD PTR 44[esp] + and ebp,edi + xor edi,eax + shrd edx,edx,7 + mov esi,ecx + xor ebp,edi + shld ecx,ecx,5 + add ebx,ebp + xor esi,edx + xor edx,edi + add ebx,ecx + vpalignr xmm1,xmm6,xmm5,8 + vpxor xmm7,xmm7,xmm3 + add eax,DWORD PTR 48[esp] + and esi,edx + xor edx,edi + shrd ecx,ecx,7 + vpxor xmm7,xmm7,xmm0 + vmovdqa XMMWORD PTR 96[esp],xmm3 + mov ebp,ebx + xor esi,edx + vmovdqa xmm3,XMMWORD PTR 144[esp] + vpaddd xmm2,xmm2,xmm6 + shld ebx,ebx,5 + add eax,esi + vpxor xmm7,xmm7,xmm1 + xor ebp,ecx + xor ecx,edx + add eax,ebx + add edi,DWORD PTR 52[esp] + vpsrld xmm1,xmm7,30 + vmovdqa XMMWORD PTR 32[esp],xmm2 + and ebp,ecx + xor ecx,edx + shrd ebx,ebx,7 + mov esi,eax + vpslld xmm7,xmm7,2 + xor ebp,ecx + shld eax,eax,5 + add edi,ebp + xor esi,ebx + xor ebx,ecx + add edi,eax + add edx,DWORD PTR 56[esp] + and esi,ebx + vpor xmm7,xmm7,xmm1 + xor ebx,ecx + shrd eax,eax,7 + vmovdqa xmm1,XMMWORD PTR 64[esp] + mov ebp,edi + xor esi,ebx + shld edi,edi,5 + add edx,esi + xor ebp,eax + xor eax,ebx + add edx,edi + add ecx,DWORD PTR 60[esp] + and ebp,eax + xor eax,ebx + shrd edi,edi,7 + mov esi,edx + xor ebp,eax + shld edx,edx,5 + add ecx,ebp + xor esi,edi + xor edi,eax + add ecx,edx + vpalignr xmm2,xmm7,xmm6,8 + vpxor xmm0,xmm0,xmm4 + add ebx,DWORD PTR [esp] + and esi,edi + xor edi,eax + shrd edx,edx,7 + vpxor xmm0,xmm0,xmm1 + vmovdqa XMMWORD PTR 64[esp],xmm4 + mov ebp,ecx + xor esi,edi + vmovdqa xmm4,xmm3 + vpaddd xmm3,xmm3,xmm7 + shld ecx,ecx,5 + add ebx,esi + vpxor xmm0,xmm0,xmm2 + xor ebp,edx + xor edx,edi + add ebx,ecx + add eax,DWORD PTR 4[esp] + vpsrld xmm2,xmm0,30 + vmovdqa XMMWORD PTR 48[esp],xmm3 + and ebp,edx + xor edx,edi + shrd ecx,ecx,7 + mov esi,ebx + vpslld xmm0,xmm0,2 + xor ebp,edx + shld ebx,ebx,5 + add eax,ebp + xor esi,ecx + xor ecx,edx + add eax,ebx + add edi,DWORD PTR 8[esp] + and esi,ecx + vpor xmm0,xmm0,xmm2 + xor ecx,edx + shrd ebx,ebx,7 + vmovdqa xmm2,XMMWORD PTR 80[esp] + mov ebp,eax + xor esi,ecx + shld eax,eax,5 + add edi,esi + xor ebp,ebx + xor ebx,ecx + add edi,eax + add edx,DWORD PTR 12[esp] + and ebp,ebx + xor ebx,ecx + shrd eax,eax,7 + mov esi,edi + xor ebp,ebx + shld edi,edi,5 + add edx,ebp + xor esi,eax + xor eax,ebx + add edx,edi + vpalignr xmm3,xmm0,xmm7,8 + vpxor xmm1,xmm1,xmm5 + add ecx,DWORD PTR 16[esp] + and esi,eax + xor eax,ebx + shrd edi,edi,7 + vpxor xmm1,xmm1,xmm2 + vmovdqa XMMWORD PTR 80[esp],xmm5 + mov ebp,edx + xor esi,eax + vmovdqa xmm5,xmm4 + vpaddd xmm4,xmm4,xmm0 + shld edx,edx,5 + add ecx,esi + vpxor xmm1,xmm1,xmm3 + xor ebp,edi + xor edi,eax + add ecx,edx + add ebx,DWORD PTR 20[esp] + vpsrld xmm3,xmm1,30 + vmovdqa XMMWORD PTR [esp],xmm4 + and ebp,edi + xor edi,eax + shrd edx,edx,7 + mov esi,ecx + vpslld xmm1,xmm1,2 + xor ebp,edi + shld ecx,ecx,5 + add ebx,ebp + xor esi,edx + xor edx,edi + add ebx,ecx + add eax,DWORD PTR 24[esp] + and esi,edx + vpor xmm1,xmm1,xmm3 + xor edx,edi + shrd ecx,ecx,7 + vmovdqa xmm3,XMMWORD PTR 96[esp] + mov ebp,ebx + xor esi,edx + shld ebx,ebx,5 + add eax,esi + xor ebp,ecx + xor ecx,edx + add eax,ebx + add edi,DWORD PTR 28[esp] + and ebp,ecx + xor ecx,edx + shrd ebx,ebx,7 + mov esi,eax + xor ebp,ecx + shld eax,eax,5 + add edi,ebp + xor esi,ebx + xor ebx,ecx + add edi,eax + vpalignr xmm4,xmm1,xmm0,8 + vpxor xmm2,xmm2,xmm6 + add edx,DWORD PTR 32[esp] + and esi,ebx + xor ebx,ecx + shrd eax,eax,7 + vpxor xmm2,xmm2,xmm3 + vmovdqa XMMWORD PTR 96[esp],xmm6 + mov ebp,edi + xor esi,ebx + vmovdqa xmm6,xmm5 + vpaddd xmm5,xmm5,xmm1 + shld edi,edi,5 + add edx,esi + vpxor xmm2,xmm2,xmm4 + xor ebp,eax + xor eax,ebx + add edx,edi + add ecx,DWORD PTR 36[esp] + vpsrld xmm4,xmm2,30 + vmovdqa XMMWORD PTR 16[esp],xmm5 + and ebp,eax + xor eax,ebx + shrd edi,edi,7 + mov esi,edx + vpslld xmm2,xmm2,2 + xor ebp,eax + shld edx,edx,5 + add ecx,ebp + xor esi,edi + xor edi,eax + add ecx,edx + add ebx,DWORD PTR 40[esp] + and esi,edi + vpor xmm2,xmm2,xmm4 + xor edi,eax + shrd edx,edx,7 + vmovdqa xmm4,XMMWORD PTR 64[esp] + mov ebp,ecx + xor esi,edi + shld ecx,ecx,5 + add ebx,esi + xor ebp,edx + xor edx,edi + add ebx,ecx + add eax,DWORD PTR 44[esp] + and ebp,edx + xor edx,edi + shrd ecx,ecx,7 + mov esi,ebx + xor ebp,edx + shld ebx,ebx,5 + add eax,ebp + xor esi,edx + add eax,ebx + vpalignr xmm5,xmm2,xmm1,8 + vpxor xmm3,xmm3,xmm7 + add edi,DWORD PTR 48[esp] + xor esi,ecx + mov ebp,eax + shld eax,eax,5 + vpxor xmm3,xmm3,xmm4 + vmovdqa XMMWORD PTR 64[esp],xmm7 + add edi,esi + xor ebp,ecx + vmovdqa xmm7,xmm6 + vpaddd xmm6,xmm6,xmm2 + shrd ebx,ebx,7 + add edi,eax + vpxor xmm3,xmm3,xmm5 + add edx,DWORD PTR 52[esp] + xor ebp,ebx + mov esi,edi + shld edi,edi,5 + vpsrld xmm5,xmm3,30 + vmovdqa XMMWORD PTR 32[esp],xmm6 + add edx,ebp + xor esi,ebx + shrd eax,eax,7 + add edx,edi + vpslld xmm3,xmm3,2 + add ecx,DWORD PTR 56[esp] + xor esi,eax + mov ebp,edx + shld edx,edx,5 + add ecx,esi + xor ebp,eax + shrd edi,edi,7 + add ecx,edx + vpor xmm3,xmm3,xmm5 + add ebx,DWORD PTR 60[esp] + xor ebp,edi + mov esi,ecx + shld ecx,ecx,5 + add ebx,ebp + xor esi,edi + shrd edx,edx,7 + add ebx,ecx + add eax,DWORD PTR [esp] + vpaddd xmm7,xmm7,xmm3 + xor esi,edx + mov ebp,ebx + shld ebx,ebx,5 + add eax,esi + vmovdqa XMMWORD PTR 48[esp],xmm7 + xor ebp,edx + shrd ecx,ecx,7 + add eax,ebx + add edi,DWORD PTR 4[esp] + xor ebp,ecx + mov esi,eax + shld eax,eax,5 + add edi,ebp + xor esi,ecx + shrd ebx,ebx,7 + add edi,eax + add edx,DWORD PTR 8[esp] + xor esi,ebx + mov ebp,edi + shld edi,edi,5 + add edx,esi + xor ebp,ebx + shrd eax,eax,7 + add edx,edi + add ecx,DWORD PTR 12[esp] + xor ebp,eax + mov esi,edx + shld edx,edx,5 + add ecx,ebp + xor esi,eax + shrd edi,edi,7 + add ecx,edx + mov ebp,DWORD PTR 196[esp] + cmp ebp,DWORD PTR 200[esp] + je $L010done + vmovdqa xmm7,XMMWORD PTR 160[esp] + vmovdqa xmm6,XMMWORD PTR 176[esp] + vmovdqu xmm0,XMMWORD PTR [ebp] + vmovdqu xmm1,XMMWORD PTR 16[ebp] + vmovdqu xmm2,XMMWORD PTR 32[ebp] + vmovdqu xmm3,XMMWORD PTR 48[ebp] + add ebp,64 + vpshufb xmm0,xmm0,xmm6 + mov DWORD PTR 196[esp],ebp + vmovdqa XMMWORD PTR 96[esp],xmm7 + add ebx,DWORD PTR 16[esp] + xor esi,edi + vpshufb xmm1,xmm1,xmm6 + mov ebp,ecx + shld ecx,ecx,5 + vpaddd xmm4,xmm0,xmm7 + add ebx,esi + xor ebp,edi + shrd edx,edx,7 + add ebx,ecx + vmovdqa XMMWORD PTR [esp],xmm4 + add eax,DWORD PTR 20[esp] + xor ebp,edx + mov esi,ebx + shld ebx,ebx,5 + add eax,ebp + xor esi,edx + shrd ecx,ecx,7 + add eax,ebx + add edi,DWORD PTR 24[esp] + xor esi,ecx + mov ebp,eax + shld eax,eax,5 + add edi,esi + xor ebp,ecx + shrd ebx,ebx,7 + add edi,eax + add edx,DWORD PTR 28[esp] + xor ebp,ebx + mov esi,edi + shld edi,edi,5 + add edx,ebp + xor esi,ebx + shrd eax,eax,7 + add edx,edi + add ecx,DWORD PTR 32[esp] + xor esi,eax + vpshufb xmm2,xmm2,xmm6 + mov ebp,edx + shld edx,edx,5 + vpaddd xmm5,xmm1,xmm7 + add ecx,esi + xor ebp,eax + shrd edi,edi,7 + add ecx,edx + vmovdqa XMMWORD PTR 16[esp],xmm5 + add ebx,DWORD PTR 36[esp] + xor ebp,edi + mov esi,ecx + shld ecx,ecx,5 + add ebx,ebp + xor esi,edi + shrd edx,edx,7 + add ebx,ecx + add eax,DWORD PTR 40[esp] + xor esi,edx + mov ebp,ebx + shld ebx,ebx,5 + add eax,esi + xor ebp,edx + shrd ecx,ecx,7 + add eax,ebx + add edi,DWORD PTR 44[esp] + xor ebp,ecx + mov esi,eax + shld eax,eax,5 + add edi,ebp + xor esi,ecx + shrd ebx,ebx,7 + add edi,eax + add edx,DWORD PTR 48[esp] + xor esi,ebx + vpshufb xmm3,xmm3,xmm6 + mov ebp,edi + shld edi,edi,5 + vpaddd xmm6,xmm2,xmm7 + add edx,esi + xor ebp,ebx + shrd eax,eax,7 + add edx,edi + vmovdqa XMMWORD PTR 32[esp],xmm6 + add ecx,DWORD PTR 52[esp] + xor ebp,eax + mov esi,edx + shld edx,edx,5 + add ecx,ebp + xor esi,eax + shrd edi,edi,7 + add ecx,edx + add ebx,DWORD PTR 56[esp] + xor esi,edi + mov ebp,ecx + shld ecx,ecx,5 + add ebx,esi + xor ebp,edi + shrd edx,edx,7 + add ebx,ecx + add eax,DWORD PTR 60[esp] + xor ebp,edx + mov esi,ebx + shld ebx,ebx,5 + add eax,ebp + shrd ecx,ecx,7 + add eax,ebx + mov ebp,DWORD PTR 192[esp] + add eax,DWORD PTR [ebp] + add esi,DWORD PTR 4[ebp] + add ecx,DWORD PTR 8[ebp] + mov DWORD PTR [ebp],eax + add edx,DWORD PTR 12[ebp] + mov DWORD PTR 4[ebp],esi + add edi,DWORD PTR 16[ebp] + mov ebx,ecx + mov DWORD PTR 8[ebp],ecx + xor ebx,edx + mov DWORD PTR 12[ebp],edx + mov DWORD PTR 16[ebp],edi + mov ebp,esi + and esi,ebx + mov ebx,ebp + jmp $L009loop +ALIGN 16 +$L010done: + add ebx,DWORD PTR 16[esp] + xor esi,edi + mov ebp,ecx + shld ecx,ecx,5 + add ebx,esi + xor ebp,edi + shrd edx,edx,7 + add ebx,ecx + add eax,DWORD PTR 20[esp] + xor ebp,edx + mov esi,ebx + shld ebx,ebx,5 + add eax,ebp + xor esi,edx + shrd ecx,ecx,7 + add eax,ebx + add edi,DWORD PTR 24[esp] + xor esi,ecx + mov ebp,eax + shld eax,eax,5 + add edi,esi + xor ebp,ecx + shrd ebx,ebx,7 + add edi,eax + add edx,DWORD PTR 28[esp] + xor ebp,ebx + mov esi,edi + shld edi,edi,5 + add edx,ebp + xor esi,ebx + shrd eax,eax,7 + add edx,edi + add ecx,DWORD PTR 32[esp] + xor esi,eax + mov ebp,edx + shld edx,edx,5 + add ecx,esi + xor ebp,eax + shrd edi,edi,7 + add ecx,edx + add ebx,DWORD PTR 36[esp] + xor ebp,edi + mov esi,ecx + shld ecx,ecx,5 + add ebx,ebp + xor esi,edi + shrd edx,edx,7 + add ebx,ecx + add eax,DWORD PTR 40[esp] + xor esi,edx + mov ebp,ebx + shld ebx,ebx,5 + add eax,esi + xor ebp,edx + shrd ecx,ecx,7 + add eax,ebx + add edi,DWORD PTR 44[esp] + xor ebp,ecx + mov esi,eax + shld eax,eax,5 + add edi,ebp + xor esi,ecx + shrd ebx,ebx,7 + add edi,eax + add edx,DWORD PTR 48[esp] + xor esi,ebx + mov ebp,edi + shld edi,edi,5 + add edx,esi + xor ebp,ebx + shrd eax,eax,7 + add edx,edi + add ecx,DWORD PTR 52[esp] + xor ebp,eax + mov esi,edx + shld edx,edx,5 + add ecx,ebp + xor esi,eax + shrd edi,edi,7 + add ecx,edx + add ebx,DWORD PTR 56[esp] + xor esi,edi + mov ebp,ecx + shld ecx,ecx,5 + add ebx,esi + xor ebp,edi + shrd edx,edx,7 + add ebx,ecx + add eax,DWORD PTR 60[esp] + xor ebp,edx + mov esi,ebx + shld ebx,ebx,5 + add eax,ebp + shrd ecx,ecx,7 + add eax,ebx + vzeroall + mov ebp,DWORD PTR 192[esp] + add eax,DWORD PTR [ebp] + mov esp,DWORD PTR 204[esp] + add esi,DWORD PTR 4[ebp] + add ecx,DWORD PTR 8[ebp] + mov DWORD PTR [ebp],eax + add edx,DWORD PTR 12[ebp] + mov DWORD PTR 4[ebp],esi + add edi,DWORD PTR 16[ebp] + mov DWORD PTR 8[ebp],ecx + mov DWORD PTR 12[ebp],edx + mov DWORD PTR 16[ebp],edi + pop edi + pop esi + pop ebx + pop ebp + ret +__sha1_block_data_order_avx ENDP +ALIGN 64 +$LK_XX_XX:: +DD 1518500249,1518500249,1518500249,1518500249 +DD 1859775393,1859775393,1859775393,1859775393 +DD 2400959708,2400959708,2400959708,2400959708 +DD 3395469782,3395469782,3395469782,3395469782 +DD 66051,67438087,134810123,202182159 +DB 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0 DB 83,72,65,49,32,98,108,111,99,107,32,116,114,97,110,115 DB 102,111,114,109,32,102,111,114,32,120,56,54,44,32,67,82 DB 89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112 DB 114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 .text$ ENDS +.bss SEGMENT 'BSS' +COMM _OPENSSL_ia32cap_P:DWORD:4 +.bss ENDS END diff --git a/deps/openssl/asm/x86-win32-masm/sha/sha256-586.asm b/deps/openssl/asm/x86-win32-masm/sha/sha256-586.asm index 577c38ffab3564..d184877bb717a2 100644 --- a/deps/openssl/asm/x86-win32-masm/sha/sha256-586.asm +++ b/deps/openssl/asm/x86-win32-masm/sha/sha256-586.asm @@ -3,6 +3,13 @@ IF @Version LT 800 ECHO MASM version 8.00 or later is strongly recommended. ENDIF .686 +.XMM +IF @Version LT 800 +XMMWORD STRUCT 16 +DQ 2 dup (?) +XMMWORD ENDS +ENDIF + .MODEL FLAT OPTION DOTNAME IF @Version LT 800 @@ -10,6 +17,7 @@ IF @Version LT 800 ELSE .text$ SEGMENT ALIGN(64) 'CODE' ENDIF +;EXTERN _OPENSSL_ia32cap_P:NEAR ALIGN 16 _sha256_block_data_order PROC PUBLIC $L_sha256_block_data_order_begin:: @@ -33,195 +41,212 @@ $L000pic_point: mov DWORD PTR 4[esp],edi mov DWORD PTR 8[esp],eax mov DWORD PTR 12[esp],ebx + lea edx,DWORD PTR _OPENSSL_ia32cap_P + mov ecx,DWORD PTR [edx] + mov ebx,DWORD PTR 4[edx] + test ecx,1048576 + jnz $L002loop + mov edx,DWORD PTR 8[edx] + test ecx,16777216 + jz $L003no_xmm + and ecx,1073741824 + and ebx,268435968 + test edx,536870912 + jnz $L004shaext + or ecx,ebx + and ecx,1342177280 + cmp ecx,1342177280 + je $L005AVX + test ebx,512 + jnz $L006SSSE3 +$L003no_xmm: + sub eax,edi + cmp eax,256 + jae $L007unrolled + jmp $L002loop ALIGN 16 $L002loop: mov eax,DWORD PTR [edi] mov ebx,DWORD PTR 4[edi] mov ecx,DWORD PTR 8[edi] - mov edx,DWORD PTR 12[edi] bswap eax + mov edx,DWORD PTR 12[edi] bswap ebx - bswap ecx - bswap edx push eax + bswap ecx push ebx + bswap edx push ecx push edx mov eax,DWORD PTR 16[edi] mov ebx,DWORD PTR 20[edi] mov ecx,DWORD PTR 24[edi] - mov edx,DWORD PTR 28[edi] bswap eax + mov edx,DWORD PTR 28[edi] bswap ebx - bswap ecx - bswap edx push eax + bswap ecx push ebx + bswap edx push ecx push edx mov eax,DWORD PTR 32[edi] mov ebx,DWORD PTR 36[edi] mov ecx,DWORD PTR 40[edi] - mov edx,DWORD PTR 44[edi] bswap eax + mov edx,DWORD PTR 44[edi] bswap ebx - bswap ecx - bswap edx push eax + bswap ecx push ebx + bswap edx push ecx push edx mov eax,DWORD PTR 48[edi] mov ebx,DWORD PTR 52[edi] mov ecx,DWORD PTR 56[edi] - mov edx,DWORD PTR 60[edi] bswap eax + mov edx,DWORD PTR 60[edi] bswap ebx - bswap ecx - bswap edx push eax + bswap ecx push ebx + bswap edx push ecx push edx add edi,64 - sub esp,32 - mov DWORD PTR 100[esp],edi + lea esp,DWORD PTR [esp-36] + mov DWORD PTR 104[esp],edi mov eax,DWORD PTR [esi] mov ebx,DWORD PTR 4[esi] mov ecx,DWORD PTR 8[esi] mov edi,DWORD PTR 12[esi] - mov DWORD PTR 4[esp],ebx - mov DWORD PTR 8[esp],ecx - mov DWORD PTR 12[esp],edi + mov DWORD PTR 8[esp],ebx + xor ebx,ecx + mov DWORD PTR 12[esp],ecx + mov DWORD PTR 16[esp],edi + mov DWORD PTR [esp],ebx mov edx,DWORD PTR 16[esi] mov ebx,DWORD PTR 20[esi] mov ecx,DWORD PTR 24[esi] mov edi,DWORD PTR 28[esi] - mov DWORD PTR 20[esp],ebx - mov DWORD PTR 24[esp],ecx - mov DWORD PTR 28[esp],edi + mov DWORD PTR 24[esp],ebx + mov DWORD PTR 28[esp],ecx + mov DWORD PTR 32[esp],edi ALIGN 16 -$L00300_15: - mov ebx,DWORD PTR 92[esp] +$L00800_15: mov ecx,edx + mov esi,DWORD PTR 24[esp] ror ecx,14 - mov esi,DWORD PTR 20[esp] + mov edi,DWORD PTR 28[esp] xor ecx,edx - ror ecx,5 - xor ecx,edx - ror ecx,6 - mov edi,DWORD PTR 24[esp] - add ebx,ecx xor esi,edi - mov DWORD PTR 16[esp],edx - mov ecx,eax + mov ebx,DWORD PTR 96[esp] + ror ecx,5 and esi,edx - mov edx,DWORD PTR 12[esp] + mov DWORD PTR 20[esp],edx + xor edx,ecx + add ebx,DWORD PTR 32[esp] xor esi,edi - mov edi,eax + ror edx,6 + mov ecx,eax add ebx,esi ror ecx,9 - add ebx,DWORD PTR 28[esp] + add ebx,edx + mov edi,DWORD PTR 8[esp] xor ecx,eax + mov DWORD PTR 4[esp],eax + lea esp,DWORD PTR [esp-4] ror ecx,11 - mov esi,DWORD PTR 4[esp] + mov esi,DWORD PTR [ebp] xor ecx,eax + mov edx,DWORD PTR 20[esp] + xor eax,edi ror ecx,2 + add ebx,esi + mov DWORD PTR [esp],eax add edx,ebx - mov edi,DWORD PTR 8[esp] + and eax,DWORD PTR 4[esp] add ebx,ecx - mov DWORD PTR [esp],eax - mov ecx,eax - sub esp,4 - or eax,esi - and ecx,esi - and eax,edi - mov esi,DWORD PTR [ebp] - or eax,ecx + xor eax,edi add ebp,4 add eax,ebx - add edx,esi - add eax,esi cmp esi,3248222580 - jne $L00300_15 - mov ebx,DWORD PTR 152[esp] + jne $L00800_15 + mov ecx,DWORD PTR 156[esp] + jmp $L00916_63 ALIGN 16 -$L00416_63: - mov esi,ebx - mov ecx,DWORD PTR 100[esp] - ror esi,11 - mov edi,ecx - xor esi,ebx - ror esi,7 +$L00916_63: + mov ebx,ecx + mov esi,DWORD PTR 104[esp] + ror ecx,11 + mov edi,esi + ror esi,2 + xor ecx,ebx shr ebx,3 - ror edi,2 - xor ebx,esi - xor edi,ecx - ror edi,17 - shr ecx,10 - add ebx,DWORD PTR 156[esp] - xor edi,ecx - add ebx,DWORD PTR 120[esp] + ror ecx,7 + xor esi,edi + xor ebx,ecx + ror esi,17 + add ebx,DWORD PTR 160[esp] + shr edi,10 + add ebx,DWORD PTR 124[esp] mov ecx,edx - add ebx,edi + xor edi,esi + mov esi,DWORD PTR 24[esp] ror ecx,14 - mov esi,DWORD PTR 20[esp] - xor ecx,edx - ror ecx,5 - mov DWORD PTR 92[esp],ebx + add ebx,edi + mov edi,DWORD PTR 28[esp] xor ecx,edx - ror ecx,6 - mov edi,DWORD PTR 24[esp] - add ebx,ecx xor esi,edi - mov DWORD PTR 16[esp],edx - mov ecx,eax + mov DWORD PTR 96[esp],ebx + ror ecx,5 and esi,edx - mov edx,DWORD PTR 12[esp] + mov DWORD PTR 20[esp],edx + xor edx,ecx + add ebx,DWORD PTR 32[esp] xor esi,edi - mov edi,eax + ror edx,6 + mov ecx,eax add ebx,esi ror ecx,9 - add ebx,DWORD PTR 28[esp] + add ebx,edx + mov edi,DWORD PTR 8[esp] xor ecx,eax + mov DWORD PTR 4[esp],eax + lea esp,DWORD PTR [esp-4] ror ecx,11 - mov esi,DWORD PTR 4[esp] + mov esi,DWORD PTR [ebp] xor ecx,eax + mov edx,DWORD PTR 20[esp] + xor eax,edi ror ecx,2 + add ebx,esi + mov DWORD PTR [esp],eax add edx,ebx - mov edi,DWORD PTR 8[esp] + and eax,DWORD PTR 4[esp] add ebx,ecx - mov DWORD PTR [esp],eax - mov ecx,eax - sub esp,4 - or eax,esi - and ecx,esi - and eax,edi - mov esi,DWORD PTR [ebp] - or eax,ecx + xor eax,edi + mov ecx,DWORD PTR 156[esp] add ebp,4 add eax,ebx - mov ebx,DWORD PTR 152[esp] - add edx,esi - add eax,esi cmp esi,3329325298 - jne $L00416_63 - mov esi,DWORD PTR 352[esp] - mov ebx,DWORD PTR 4[esp] - mov ecx,DWORD PTR 8[esp] - mov edi,DWORD PTR 12[esp] + jne $L00916_63 + mov esi,DWORD PTR 356[esp] + mov ebx,DWORD PTR 8[esp] + mov ecx,DWORD PTR 16[esp] add eax,DWORD PTR [esi] add ebx,DWORD PTR 4[esi] - add ecx,DWORD PTR 8[esi] - add edi,DWORD PTR 12[esi] + add edi,DWORD PTR 8[esi] + add ecx,DWORD PTR 12[esi] mov DWORD PTR [esi],eax mov DWORD PTR 4[esi],ebx - mov DWORD PTR 8[esi],ecx - mov DWORD PTR 12[esi],edi - mov eax,DWORD PTR 20[esp] - mov ebx,DWORD PTR 24[esp] - mov ecx,DWORD PTR 28[esp] - mov edi,DWORD PTR 356[esp] + mov DWORD PTR 8[esi],edi + mov DWORD PTR 12[esi],ecx + mov eax,DWORD PTR 24[esp] + mov ebx,DWORD PTR 28[esp] + mov ecx,DWORD PTR 32[esp] + mov edi,DWORD PTR 360[esp] add edx,DWORD PTR 16[esi] add eax,DWORD PTR 20[esi] add ebx,DWORD PTR 24[esi] @@ -230,7 +255,7 @@ $L00416_63: mov DWORD PTR 20[esi],eax mov DWORD PTR 24[esi],ebx mov DWORD PTR 28[esi],ecx - add esp,352 + lea esp,DWORD PTR 356[esp] sub ebp,256 cmp edi,DWORD PTR 8[esp] jb $L002loop @@ -258,11 +283,6536 @@ DD 430227734,506948616,659060556,883997877 DD 958139571,1322822218,1537002063,1747873779 DD 1955562222,2024104815,2227730452,2361852424 DD 2428436474,2756734187,3204031479,3329325298 -_sha256_block_data_order ENDP +DD 66051,67438087,134810123,202182159 DB 83,72,65,50,53,54,32,98,108,111,99,107,32,116,114,97 DB 110,115,102,111,114,109,32,102,111,114,32,120,56,54,44,32 DB 67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97 DB 112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103 DB 62,0 +ALIGN 16 +$L007unrolled: + lea esp,DWORD PTR [esp-96] + mov eax,DWORD PTR [esi] + mov ebp,DWORD PTR 4[esi] + mov ecx,DWORD PTR 8[esi] + mov ebx,DWORD PTR 12[esi] + mov DWORD PTR 4[esp],ebp + xor ebp,ecx + mov DWORD PTR 8[esp],ecx + mov DWORD PTR 12[esp],ebx + mov edx,DWORD PTR 16[esi] + mov ebx,DWORD PTR 20[esi] + mov ecx,DWORD PTR 24[esi] + mov esi,DWORD PTR 28[esi] + mov DWORD PTR 20[esp],ebx + mov DWORD PTR 24[esp],ecx + mov DWORD PTR 28[esp],esi + jmp $L010grand_loop +ALIGN 16 +$L010grand_loop: + mov ebx,DWORD PTR [edi] + mov ecx,DWORD PTR 4[edi] + bswap ebx + mov esi,DWORD PTR 8[edi] + bswap ecx + mov DWORD PTR 32[esp],ebx + bswap esi + mov DWORD PTR 36[esp],ecx + mov DWORD PTR 40[esp],esi + mov ebx,DWORD PTR 12[edi] + mov ecx,DWORD PTR 16[edi] + bswap ebx + mov esi,DWORD PTR 20[edi] + bswap ecx + mov DWORD PTR 44[esp],ebx + bswap esi + mov DWORD PTR 48[esp],ecx + mov DWORD PTR 52[esp],esi + mov ebx,DWORD PTR 24[edi] + mov ecx,DWORD PTR 28[edi] + bswap ebx + mov esi,DWORD PTR 32[edi] + bswap ecx + mov DWORD PTR 56[esp],ebx + bswap esi + mov DWORD PTR 60[esp],ecx + mov DWORD PTR 64[esp],esi + mov ebx,DWORD PTR 36[edi] + mov ecx,DWORD PTR 40[edi] + bswap ebx + mov esi,DWORD PTR 44[edi] + bswap ecx + mov DWORD PTR 68[esp],ebx + bswap esi + mov DWORD PTR 72[esp],ecx + mov DWORD PTR 76[esp],esi + mov ebx,DWORD PTR 48[edi] + mov ecx,DWORD PTR 52[edi] + bswap ebx + mov esi,DWORD PTR 56[edi] + bswap ecx + mov DWORD PTR 80[esp],ebx + bswap esi + mov DWORD PTR 84[esp],ecx + mov DWORD PTR 88[esp],esi + mov ebx,DWORD PTR 60[edi] + add edi,64 + bswap ebx + mov DWORD PTR 100[esp],edi + mov DWORD PTR 92[esp],ebx + mov ecx,edx + mov esi,DWORD PTR 20[esp] + ror edx,14 + mov edi,DWORD PTR 24[esp] + xor edx,ecx + mov ebx,DWORD PTR 32[esp] + xor esi,edi + ror edx,5 + and esi,ecx + mov DWORD PTR 16[esp],ecx + xor edx,ecx + add ebx,DWORD PTR 28[esp] + xor edi,esi + ror edx,6 + mov ecx,eax + add ebx,edi + ror ecx,9 + mov esi,eax + mov edi,DWORD PTR 4[esp] + xor ecx,eax + mov DWORD PTR [esp],eax + xor eax,edi + ror ecx,11 + and ebp,eax + lea edx,DWORD PTR 1116352408[edx*1+ebx] + xor ecx,esi + xor ebp,edi + ror ecx,2 + add ebp,edx + add edx,DWORD PTR 12[esp] + add ebp,ecx + mov esi,edx + mov ecx,DWORD PTR 16[esp] + ror edx,14 + mov edi,DWORD PTR 20[esp] + xor edx,esi + mov ebx,DWORD PTR 36[esp] + xor ecx,edi + ror edx,5 + and ecx,esi + mov DWORD PTR 12[esp],esi + xor edx,esi + add ebx,DWORD PTR 24[esp] + xor edi,ecx + ror edx,6 + mov esi,ebp + add ebx,edi + ror esi,9 + mov ecx,ebp + mov edi,DWORD PTR [esp] + xor esi,ebp + mov DWORD PTR 28[esp],ebp + xor ebp,edi + ror esi,11 + and eax,ebp + lea edx,DWORD PTR 1899447441[edx*1+ebx] + xor esi,ecx + xor eax,edi + ror esi,2 + add eax,edx + add edx,DWORD PTR 8[esp] + add eax,esi + mov ecx,edx + mov esi,DWORD PTR 12[esp] + ror edx,14 + mov edi,DWORD PTR 16[esp] + xor edx,ecx + mov ebx,DWORD PTR 40[esp] + xor esi,edi + ror edx,5 + and esi,ecx + mov DWORD PTR 8[esp],ecx + xor edx,ecx + add ebx,DWORD PTR 20[esp] + xor edi,esi + ror edx,6 + mov ecx,eax + add ebx,edi + ror ecx,9 + mov esi,eax + mov edi,DWORD PTR 28[esp] + xor ecx,eax + mov DWORD PTR 24[esp],eax + xor eax,edi + ror ecx,11 + and ebp,eax + lea edx,DWORD PTR 3049323471[edx*1+ebx] + xor ecx,esi + xor ebp,edi + ror ecx,2 + add ebp,edx + add edx,DWORD PTR 4[esp] + add ebp,ecx + mov esi,edx + mov ecx,DWORD PTR 8[esp] + ror edx,14 + mov edi,DWORD PTR 12[esp] + xor edx,esi + mov ebx,DWORD PTR 44[esp] + xor ecx,edi + ror edx,5 + and ecx,esi + mov DWORD PTR 4[esp],esi + xor edx,esi + add ebx,DWORD PTR 16[esp] + xor edi,ecx + ror edx,6 + mov esi,ebp + add ebx,edi + ror esi,9 + mov ecx,ebp + mov edi,DWORD PTR 24[esp] + xor esi,ebp + mov DWORD PTR 20[esp],ebp + xor ebp,edi + ror esi,11 + and eax,ebp + lea edx,DWORD PTR 3921009573[edx*1+ebx] + xor esi,ecx + xor eax,edi + ror esi,2 + add eax,edx + add edx,DWORD PTR [esp] + add eax,esi + mov ecx,edx + mov esi,DWORD PTR 4[esp] + ror edx,14 + mov edi,DWORD PTR 8[esp] + xor edx,ecx + mov ebx,DWORD PTR 48[esp] + xor esi,edi + ror edx,5 + and esi,ecx + mov DWORD PTR [esp],ecx + xor edx,ecx + add ebx,DWORD PTR 12[esp] + xor edi,esi + ror edx,6 + mov ecx,eax + add ebx,edi + ror ecx,9 + mov esi,eax + mov edi,DWORD PTR 20[esp] + xor ecx,eax + mov DWORD PTR 16[esp],eax + xor eax,edi + ror ecx,11 + and ebp,eax + lea edx,DWORD PTR 961987163[edx*1+ebx] + xor ecx,esi + xor ebp,edi + ror ecx,2 + add ebp,edx + add edx,DWORD PTR 28[esp] + add ebp,ecx + mov esi,edx + mov ecx,DWORD PTR [esp] + ror edx,14 + mov edi,DWORD PTR 4[esp] + xor edx,esi + mov ebx,DWORD PTR 52[esp] + xor ecx,edi + ror edx,5 + and ecx,esi + mov DWORD PTR 28[esp],esi + xor edx,esi + add ebx,DWORD PTR 8[esp] + xor edi,ecx + ror edx,6 + mov esi,ebp + add ebx,edi + ror esi,9 + mov ecx,ebp + mov edi,DWORD PTR 16[esp] + xor esi,ebp + mov DWORD PTR 12[esp],ebp + xor ebp,edi + ror esi,11 + and eax,ebp + lea edx,DWORD PTR 1508970993[edx*1+ebx] + xor esi,ecx + xor eax,edi + ror esi,2 + add eax,edx + add edx,DWORD PTR 24[esp] + add eax,esi + mov ecx,edx + mov esi,DWORD PTR 28[esp] + ror edx,14 + mov edi,DWORD PTR [esp] + xor edx,ecx + mov ebx,DWORD PTR 56[esp] + xor esi,edi + ror edx,5 + and esi,ecx + mov DWORD PTR 24[esp],ecx + xor edx,ecx + add ebx,DWORD PTR 4[esp] + xor edi,esi + ror edx,6 + mov ecx,eax + add ebx,edi + ror ecx,9 + mov esi,eax + mov edi,DWORD PTR 12[esp] + xor ecx,eax + mov DWORD PTR 8[esp],eax + xor eax,edi + ror ecx,11 + and ebp,eax + lea edx,DWORD PTR 2453635748[edx*1+ebx] + xor ecx,esi + xor ebp,edi + ror ecx,2 + add ebp,edx + add edx,DWORD PTR 20[esp] + add ebp,ecx + mov esi,edx + mov ecx,DWORD PTR 24[esp] + ror edx,14 + mov edi,DWORD PTR 28[esp] + xor edx,esi + mov ebx,DWORD PTR 60[esp] + xor ecx,edi + ror edx,5 + and ecx,esi + mov DWORD PTR 20[esp],esi + xor edx,esi + add ebx,DWORD PTR [esp] + xor edi,ecx + ror edx,6 + mov esi,ebp + add ebx,edi + ror esi,9 + mov ecx,ebp + mov edi,DWORD PTR 8[esp] + xor esi,ebp + mov DWORD PTR 4[esp],ebp + xor ebp,edi + ror esi,11 + and eax,ebp + lea edx,DWORD PTR 2870763221[edx*1+ebx] + xor esi,ecx + xor eax,edi + ror esi,2 + add eax,edx + add edx,DWORD PTR 16[esp] + add eax,esi + mov ecx,edx + mov esi,DWORD PTR 20[esp] + ror edx,14 + mov edi,DWORD PTR 24[esp] + xor edx,ecx + mov ebx,DWORD PTR 64[esp] + xor esi,edi + ror edx,5 + and esi,ecx + mov DWORD PTR 16[esp],ecx + xor edx,ecx + add ebx,DWORD PTR 28[esp] + xor edi,esi + ror edx,6 + mov ecx,eax + add ebx,edi + ror ecx,9 + mov esi,eax + mov edi,DWORD PTR 4[esp] + xor ecx,eax + mov DWORD PTR [esp],eax + xor eax,edi + ror ecx,11 + and ebp,eax + lea edx,DWORD PTR 3624381080[edx*1+ebx] + xor ecx,esi + xor ebp,edi + ror ecx,2 + add ebp,edx + add edx,DWORD PTR 12[esp] + add ebp,ecx + mov esi,edx + mov ecx,DWORD PTR 16[esp] + ror edx,14 + mov edi,DWORD PTR 20[esp] + xor edx,esi + mov ebx,DWORD PTR 68[esp] + xor ecx,edi + ror edx,5 + and ecx,esi + mov DWORD PTR 12[esp],esi + xor edx,esi + add ebx,DWORD PTR 24[esp] + xor edi,ecx + ror edx,6 + mov esi,ebp + add ebx,edi + ror esi,9 + mov ecx,ebp + mov edi,DWORD PTR [esp] + xor esi,ebp + mov DWORD PTR 28[esp],ebp + xor ebp,edi + ror esi,11 + and eax,ebp + lea edx,DWORD PTR 310598401[edx*1+ebx] + xor esi,ecx + xor eax,edi + ror esi,2 + add eax,edx + add edx,DWORD PTR 8[esp] + add eax,esi + mov ecx,edx + mov esi,DWORD PTR 12[esp] + ror edx,14 + mov edi,DWORD PTR 16[esp] + xor edx,ecx + mov ebx,DWORD PTR 72[esp] + xor esi,edi + ror edx,5 + and esi,ecx + mov DWORD PTR 8[esp],ecx + xor edx,ecx + add ebx,DWORD PTR 20[esp] + xor edi,esi + ror edx,6 + mov ecx,eax + add ebx,edi + ror ecx,9 + mov esi,eax + mov edi,DWORD PTR 28[esp] + xor ecx,eax + mov DWORD PTR 24[esp],eax + xor eax,edi + ror ecx,11 + and ebp,eax + lea edx,DWORD PTR 607225278[edx*1+ebx] + xor ecx,esi + xor ebp,edi + ror ecx,2 + add ebp,edx + add edx,DWORD PTR 4[esp] + add ebp,ecx + mov esi,edx + mov ecx,DWORD PTR 8[esp] + ror edx,14 + mov edi,DWORD PTR 12[esp] + xor edx,esi + mov ebx,DWORD PTR 76[esp] + xor ecx,edi + ror edx,5 + and ecx,esi + mov DWORD PTR 4[esp],esi + xor edx,esi + add ebx,DWORD PTR 16[esp] + xor edi,ecx + ror edx,6 + mov esi,ebp + add ebx,edi + ror esi,9 + mov ecx,ebp + mov edi,DWORD PTR 24[esp] + xor esi,ebp + mov DWORD PTR 20[esp],ebp + xor ebp,edi + ror esi,11 + and eax,ebp + lea edx,DWORD PTR 1426881987[edx*1+ebx] + xor esi,ecx + xor eax,edi + ror esi,2 + add eax,edx + add edx,DWORD PTR [esp] + add eax,esi + mov ecx,edx + mov esi,DWORD PTR 4[esp] + ror edx,14 + mov edi,DWORD PTR 8[esp] + xor edx,ecx + mov ebx,DWORD PTR 80[esp] + xor esi,edi + ror edx,5 + and esi,ecx + mov DWORD PTR [esp],ecx + xor edx,ecx + add ebx,DWORD PTR 12[esp] + xor edi,esi + ror edx,6 + mov ecx,eax + add ebx,edi + ror ecx,9 + mov esi,eax + mov edi,DWORD PTR 20[esp] + xor ecx,eax + mov DWORD PTR 16[esp],eax + xor eax,edi + ror ecx,11 + and ebp,eax + lea edx,DWORD PTR 1925078388[edx*1+ebx] + xor ecx,esi + xor ebp,edi + ror ecx,2 + add ebp,edx + add edx,DWORD PTR 28[esp] + add ebp,ecx + mov esi,edx + mov ecx,DWORD PTR [esp] + ror edx,14 + mov edi,DWORD PTR 4[esp] + xor edx,esi + mov ebx,DWORD PTR 84[esp] + xor ecx,edi + ror edx,5 + and ecx,esi + mov DWORD PTR 28[esp],esi + xor edx,esi + add ebx,DWORD PTR 8[esp] + xor edi,ecx + ror edx,6 + mov esi,ebp + add ebx,edi + ror esi,9 + mov ecx,ebp + mov edi,DWORD PTR 16[esp] + xor esi,ebp + mov DWORD PTR 12[esp],ebp + xor ebp,edi + ror esi,11 + and eax,ebp + lea edx,DWORD PTR 2162078206[edx*1+ebx] + xor esi,ecx + xor eax,edi + ror esi,2 + add eax,edx + add edx,DWORD PTR 24[esp] + add eax,esi + mov ecx,edx + mov esi,DWORD PTR 28[esp] + ror edx,14 + mov edi,DWORD PTR [esp] + xor edx,ecx + mov ebx,DWORD PTR 88[esp] + xor esi,edi + ror edx,5 + and esi,ecx + mov DWORD PTR 24[esp],ecx + xor edx,ecx + add ebx,DWORD PTR 4[esp] + xor edi,esi + ror edx,6 + mov ecx,eax + add ebx,edi + ror ecx,9 + mov esi,eax + mov edi,DWORD PTR 12[esp] + xor ecx,eax + mov DWORD PTR 8[esp],eax + xor eax,edi + ror ecx,11 + and ebp,eax + lea edx,DWORD PTR 2614888103[edx*1+ebx] + xor ecx,esi + xor ebp,edi + ror ecx,2 + add ebp,edx + add edx,DWORD PTR 20[esp] + add ebp,ecx + mov esi,edx + mov ecx,DWORD PTR 24[esp] + ror edx,14 + mov edi,DWORD PTR 28[esp] + xor edx,esi + mov ebx,DWORD PTR 92[esp] + xor ecx,edi + ror edx,5 + and ecx,esi + mov DWORD PTR 20[esp],esi + xor edx,esi + add ebx,DWORD PTR [esp] + xor edi,ecx + ror edx,6 + mov esi,ebp + add ebx,edi + ror esi,9 + mov ecx,ebp + mov edi,DWORD PTR 8[esp] + xor esi,ebp + mov DWORD PTR 4[esp],ebp + xor ebp,edi + ror esi,11 + and eax,ebp + lea edx,DWORD PTR 3248222580[edx*1+ebx] + xor esi,ecx + xor eax,edi + mov ecx,DWORD PTR 36[esp] + ror esi,2 + add eax,edx + add edx,DWORD PTR 16[esp] + add eax,esi + mov esi,DWORD PTR 88[esp] + mov ebx,ecx + ror ecx,11 + mov edi,esi + ror esi,2 + xor ecx,ebx + shr ebx,3 + ror ecx,7 + xor esi,edi + xor ebx,ecx + ror esi,17 + add ebx,DWORD PTR 32[esp] + shr edi,10 + add ebx,DWORD PTR 68[esp] + mov ecx,edx + xor edi,esi + mov esi,DWORD PTR 20[esp] + ror edx,14 + add ebx,edi + mov edi,DWORD PTR 24[esp] + xor edx,ecx + mov DWORD PTR 32[esp],ebx + xor esi,edi + ror edx,5 + and esi,ecx + mov DWORD PTR 16[esp],ecx + xor edx,ecx + add ebx,DWORD PTR 28[esp] + xor edi,esi + ror edx,6 + mov ecx,eax + add ebx,edi + ror ecx,9 + mov esi,eax + mov edi,DWORD PTR 4[esp] + xor ecx,eax + mov DWORD PTR [esp],eax + xor eax,edi + ror ecx,11 + and ebp,eax + lea edx,DWORD PTR 3835390401[edx*1+ebx] + xor ecx,esi + xor ebp,edi + mov esi,DWORD PTR 40[esp] + ror ecx,2 + add ebp,edx + add edx,DWORD PTR 12[esp] + add ebp,ecx + mov ecx,DWORD PTR 92[esp] + mov ebx,esi + ror esi,11 + mov edi,ecx + ror ecx,2 + xor esi,ebx + shr ebx,3 + ror esi,7 + xor ecx,edi + xor ebx,esi + ror ecx,17 + add ebx,DWORD PTR 36[esp] + shr edi,10 + add ebx,DWORD PTR 72[esp] + mov esi,edx + xor edi,ecx + mov ecx,DWORD PTR 16[esp] + ror edx,14 + add ebx,edi + mov edi,DWORD PTR 20[esp] + xor edx,esi + mov DWORD PTR 36[esp],ebx + xor ecx,edi + ror edx,5 + and ecx,esi + mov DWORD PTR 12[esp],esi + xor edx,esi + add ebx,DWORD PTR 24[esp] + xor edi,ecx + ror edx,6 + mov esi,ebp + add ebx,edi + ror esi,9 + mov ecx,ebp + mov edi,DWORD PTR [esp] + xor esi,ebp + mov DWORD PTR 28[esp],ebp + xor ebp,edi + ror esi,11 + and eax,ebp + lea edx,DWORD PTR 4022224774[edx*1+ebx] + xor esi,ecx + xor eax,edi + mov ecx,DWORD PTR 44[esp] + ror esi,2 + add eax,edx + add edx,DWORD PTR 8[esp] + add eax,esi + mov esi,DWORD PTR 32[esp] + mov ebx,ecx + ror ecx,11 + mov edi,esi + ror esi,2 + xor ecx,ebx + shr ebx,3 + ror ecx,7 + xor esi,edi + xor ebx,ecx + ror esi,17 + add ebx,DWORD PTR 40[esp] + shr edi,10 + add ebx,DWORD PTR 76[esp] + mov ecx,edx + xor edi,esi + mov esi,DWORD PTR 12[esp] + ror edx,14 + add ebx,edi + mov edi,DWORD PTR 16[esp] + xor edx,ecx + mov DWORD PTR 40[esp],ebx + xor esi,edi + ror edx,5 + and esi,ecx + mov DWORD PTR 8[esp],ecx + xor edx,ecx + add ebx,DWORD PTR 20[esp] + xor edi,esi + ror edx,6 + mov ecx,eax + add ebx,edi + ror ecx,9 + mov esi,eax + mov edi,DWORD PTR 28[esp] + xor ecx,eax + mov DWORD PTR 24[esp],eax + xor eax,edi + ror ecx,11 + and ebp,eax + lea edx,DWORD PTR 264347078[edx*1+ebx] + xor ecx,esi + xor ebp,edi + mov esi,DWORD PTR 48[esp] + ror ecx,2 + add ebp,edx + add edx,DWORD PTR 4[esp] + add ebp,ecx + mov ecx,DWORD PTR 36[esp] + mov ebx,esi + ror esi,11 + mov edi,ecx + ror ecx,2 + xor esi,ebx + shr ebx,3 + ror esi,7 + xor ecx,edi + xor ebx,esi + ror ecx,17 + add ebx,DWORD PTR 44[esp] + shr edi,10 + add ebx,DWORD PTR 80[esp] + mov esi,edx + xor edi,ecx + mov ecx,DWORD PTR 8[esp] + ror edx,14 + add ebx,edi + mov edi,DWORD PTR 12[esp] + xor edx,esi + mov DWORD PTR 44[esp],ebx + xor ecx,edi + ror edx,5 + and ecx,esi + mov DWORD PTR 4[esp],esi + xor edx,esi + add ebx,DWORD PTR 16[esp] + xor edi,ecx + ror edx,6 + mov esi,ebp + add ebx,edi + ror esi,9 + mov ecx,ebp + mov edi,DWORD PTR 24[esp] + xor esi,ebp + mov DWORD PTR 20[esp],ebp + xor ebp,edi + ror esi,11 + and eax,ebp + lea edx,DWORD PTR 604807628[edx*1+ebx] + xor esi,ecx + xor eax,edi + mov ecx,DWORD PTR 52[esp] + ror esi,2 + add eax,edx + add edx,DWORD PTR [esp] + add eax,esi + mov esi,DWORD PTR 40[esp] + mov ebx,ecx + ror ecx,11 + mov edi,esi + ror esi,2 + xor ecx,ebx + shr ebx,3 + ror ecx,7 + xor esi,edi + xor ebx,ecx + ror esi,17 + add ebx,DWORD PTR 48[esp] + shr edi,10 + add ebx,DWORD PTR 84[esp] + mov ecx,edx + xor edi,esi + mov esi,DWORD PTR 4[esp] + ror edx,14 + add ebx,edi + mov edi,DWORD PTR 8[esp] + xor edx,ecx + mov DWORD PTR 48[esp],ebx + xor esi,edi + ror edx,5 + and esi,ecx + mov DWORD PTR [esp],ecx + xor edx,ecx + add ebx,DWORD PTR 12[esp] + xor edi,esi + ror edx,6 + mov ecx,eax + add ebx,edi + ror ecx,9 + mov esi,eax + mov edi,DWORD PTR 20[esp] + xor ecx,eax + mov DWORD PTR 16[esp],eax + xor eax,edi + ror ecx,11 + and ebp,eax + lea edx,DWORD PTR 770255983[edx*1+ebx] + xor ecx,esi + xor ebp,edi + mov esi,DWORD PTR 56[esp] + ror ecx,2 + add ebp,edx + add edx,DWORD PTR 28[esp] + add ebp,ecx + mov ecx,DWORD PTR 44[esp] + mov ebx,esi + ror esi,11 + mov edi,ecx + ror ecx,2 + xor esi,ebx + shr ebx,3 + ror esi,7 + xor ecx,edi + xor ebx,esi + ror ecx,17 + add ebx,DWORD PTR 52[esp] + shr edi,10 + add ebx,DWORD PTR 88[esp] + mov esi,edx + xor edi,ecx + mov ecx,DWORD PTR [esp] + ror edx,14 + add ebx,edi + mov edi,DWORD PTR 4[esp] + xor edx,esi + mov DWORD PTR 52[esp],ebx + xor ecx,edi + ror edx,5 + and ecx,esi + mov DWORD PTR 28[esp],esi + xor edx,esi + add ebx,DWORD PTR 8[esp] + xor edi,ecx + ror edx,6 + mov esi,ebp + add ebx,edi + ror esi,9 + mov ecx,ebp + mov edi,DWORD PTR 16[esp] + xor esi,ebp + mov DWORD PTR 12[esp],ebp + xor ebp,edi + ror esi,11 + and eax,ebp + lea edx,DWORD PTR 1249150122[edx*1+ebx] + xor esi,ecx + xor eax,edi + mov ecx,DWORD PTR 60[esp] + ror esi,2 + add eax,edx + add edx,DWORD PTR 24[esp] + add eax,esi + mov esi,DWORD PTR 48[esp] + mov ebx,ecx + ror ecx,11 + mov edi,esi + ror esi,2 + xor ecx,ebx + shr ebx,3 + ror ecx,7 + xor esi,edi + xor ebx,ecx + ror esi,17 + add ebx,DWORD PTR 56[esp] + shr edi,10 + add ebx,DWORD PTR 92[esp] + mov ecx,edx + xor edi,esi + mov esi,DWORD PTR 28[esp] + ror edx,14 + add ebx,edi + mov edi,DWORD PTR [esp] + xor edx,ecx + mov DWORD PTR 56[esp],ebx + xor esi,edi + ror edx,5 + and esi,ecx + mov DWORD PTR 24[esp],ecx + xor edx,ecx + add ebx,DWORD PTR 4[esp] + xor edi,esi + ror edx,6 + mov ecx,eax + add ebx,edi + ror ecx,9 + mov esi,eax + mov edi,DWORD PTR 12[esp] + xor ecx,eax + mov DWORD PTR 8[esp],eax + xor eax,edi + ror ecx,11 + and ebp,eax + lea edx,DWORD PTR 1555081692[edx*1+ebx] + xor ecx,esi + xor ebp,edi + mov esi,DWORD PTR 64[esp] + ror ecx,2 + add ebp,edx + add edx,DWORD PTR 20[esp] + add ebp,ecx + mov ecx,DWORD PTR 52[esp] + mov ebx,esi + ror esi,11 + mov edi,ecx + ror ecx,2 + xor esi,ebx + shr ebx,3 + ror esi,7 + xor ecx,edi + xor ebx,esi + ror ecx,17 + add ebx,DWORD PTR 60[esp] + shr edi,10 + add ebx,DWORD PTR 32[esp] + mov esi,edx + xor edi,ecx + mov ecx,DWORD PTR 24[esp] + ror edx,14 + add ebx,edi + mov edi,DWORD PTR 28[esp] + xor edx,esi + mov DWORD PTR 60[esp],ebx + xor ecx,edi + ror edx,5 + and ecx,esi + mov DWORD PTR 20[esp],esi + xor edx,esi + add ebx,DWORD PTR [esp] + xor edi,ecx + ror edx,6 + mov esi,ebp + add ebx,edi + ror esi,9 + mov ecx,ebp + mov edi,DWORD PTR 8[esp] + xor esi,ebp + mov DWORD PTR 4[esp],ebp + xor ebp,edi + ror esi,11 + and eax,ebp + lea edx,DWORD PTR 1996064986[edx*1+ebx] + xor esi,ecx + xor eax,edi + mov ecx,DWORD PTR 68[esp] + ror esi,2 + add eax,edx + add edx,DWORD PTR 16[esp] + add eax,esi + mov esi,DWORD PTR 56[esp] + mov ebx,ecx + ror ecx,11 + mov edi,esi + ror esi,2 + xor ecx,ebx + shr ebx,3 + ror ecx,7 + xor esi,edi + xor ebx,ecx + ror esi,17 + add ebx,DWORD PTR 64[esp] + shr edi,10 + add ebx,DWORD PTR 36[esp] + mov ecx,edx + xor edi,esi + mov esi,DWORD PTR 20[esp] + ror edx,14 + add ebx,edi + mov edi,DWORD PTR 24[esp] + xor edx,ecx + mov DWORD PTR 64[esp],ebx + xor esi,edi + ror edx,5 + and esi,ecx + mov DWORD PTR 16[esp],ecx + xor edx,ecx + add ebx,DWORD PTR 28[esp] + xor edi,esi + ror edx,6 + mov ecx,eax + add ebx,edi + ror ecx,9 + mov esi,eax + mov edi,DWORD PTR 4[esp] + xor ecx,eax + mov DWORD PTR [esp],eax + xor eax,edi + ror ecx,11 + and ebp,eax + lea edx,DWORD PTR 2554220882[edx*1+ebx] + xor ecx,esi + xor ebp,edi + mov esi,DWORD PTR 72[esp] + ror ecx,2 + add ebp,edx + add edx,DWORD PTR 12[esp] + add ebp,ecx + mov ecx,DWORD PTR 60[esp] + mov ebx,esi + ror esi,11 + mov edi,ecx + ror ecx,2 + xor esi,ebx + shr ebx,3 + ror esi,7 + xor ecx,edi + xor ebx,esi + ror ecx,17 + add ebx,DWORD PTR 68[esp] + shr edi,10 + add ebx,DWORD PTR 40[esp] + mov esi,edx + xor edi,ecx + mov ecx,DWORD PTR 16[esp] + ror edx,14 + add ebx,edi + mov edi,DWORD PTR 20[esp] + xor edx,esi + mov DWORD PTR 68[esp],ebx + xor ecx,edi + ror edx,5 + and ecx,esi + mov DWORD PTR 12[esp],esi + xor edx,esi + add ebx,DWORD PTR 24[esp] + xor edi,ecx + ror edx,6 + mov esi,ebp + add ebx,edi + ror esi,9 + mov ecx,ebp + mov edi,DWORD PTR [esp] + xor esi,ebp + mov DWORD PTR 28[esp],ebp + xor ebp,edi + ror esi,11 + and eax,ebp + lea edx,DWORD PTR 2821834349[edx*1+ebx] + xor esi,ecx + xor eax,edi + mov ecx,DWORD PTR 76[esp] + ror esi,2 + add eax,edx + add edx,DWORD PTR 8[esp] + add eax,esi + mov esi,DWORD PTR 64[esp] + mov ebx,ecx + ror ecx,11 + mov edi,esi + ror esi,2 + xor ecx,ebx + shr ebx,3 + ror ecx,7 + xor esi,edi + xor ebx,ecx + ror esi,17 + add ebx,DWORD PTR 72[esp] + shr edi,10 + add ebx,DWORD PTR 44[esp] + mov ecx,edx + xor edi,esi + mov esi,DWORD PTR 12[esp] + ror edx,14 + add ebx,edi + mov edi,DWORD PTR 16[esp] + xor edx,ecx + mov DWORD PTR 72[esp],ebx + xor esi,edi + ror edx,5 + and esi,ecx + mov DWORD PTR 8[esp],ecx + xor edx,ecx + add ebx,DWORD PTR 20[esp] + xor edi,esi + ror edx,6 + mov ecx,eax + add ebx,edi + ror ecx,9 + mov esi,eax + mov edi,DWORD PTR 28[esp] + xor ecx,eax + mov DWORD PTR 24[esp],eax + xor eax,edi + ror ecx,11 + and ebp,eax + lea edx,DWORD PTR 2952996808[edx*1+ebx] + xor ecx,esi + xor ebp,edi + mov esi,DWORD PTR 80[esp] + ror ecx,2 + add ebp,edx + add edx,DWORD PTR 4[esp] + add ebp,ecx + mov ecx,DWORD PTR 68[esp] + mov ebx,esi + ror esi,11 + mov edi,ecx + ror ecx,2 + xor esi,ebx + shr ebx,3 + ror esi,7 + xor ecx,edi + xor ebx,esi + ror ecx,17 + add ebx,DWORD PTR 76[esp] + shr edi,10 + add ebx,DWORD PTR 48[esp] + mov esi,edx + xor edi,ecx + mov ecx,DWORD PTR 8[esp] + ror edx,14 + add ebx,edi + mov edi,DWORD PTR 12[esp] + xor edx,esi + mov DWORD PTR 76[esp],ebx + xor ecx,edi + ror edx,5 + and ecx,esi + mov DWORD PTR 4[esp],esi + xor edx,esi + add ebx,DWORD PTR 16[esp] + xor edi,ecx + ror edx,6 + mov esi,ebp + add ebx,edi + ror esi,9 + mov ecx,ebp + mov edi,DWORD PTR 24[esp] + xor esi,ebp + mov DWORD PTR 20[esp],ebp + xor ebp,edi + ror esi,11 + and eax,ebp + lea edx,DWORD PTR 3210313671[edx*1+ebx] + xor esi,ecx + xor eax,edi + mov ecx,DWORD PTR 84[esp] + ror esi,2 + add eax,edx + add edx,DWORD PTR [esp] + add eax,esi + mov esi,DWORD PTR 72[esp] + mov ebx,ecx + ror ecx,11 + mov edi,esi + ror esi,2 + xor ecx,ebx + shr ebx,3 + ror ecx,7 + xor esi,edi + xor ebx,ecx + ror esi,17 + add ebx,DWORD PTR 80[esp] + shr edi,10 + add ebx,DWORD PTR 52[esp] + mov ecx,edx + xor edi,esi + mov esi,DWORD PTR 4[esp] + ror edx,14 + add ebx,edi + mov edi,DWORD PTR 8[esp] + xor edx,ecx + mov DWORD PTR 80[esp],ebx + xor esi,edi + ror edx,5 + and esi,ecx + mov DWORD PTR [esp],ecx + xor edx,ecx + add ebx,DWORD PTR 12[esp] + xor edi,esi + ror edx,6 + mov ecx,eax + add ebx,edi + ror ecx,9 + mov esi,eax + mov edi,DWORD PTR 20[esp] + xor ecx,eax + mov DWORD PTR 16[esp],eax + xor eax,edi + ror ecx,11 + and ebp,eax + lea edx,DWORD PTR 3336571891[edx*1+ebx] + xor ecx,esi + xor ebp,edi + mov esi,DWORD PTR 88[esp] + ror ecx,2 + add ebp,edx + add edx,DWORD PTR 28[esp] + add ebp,ecx + mov ecx,DWORD PTR 76[esp] + mov ebx,esi + ror esi,11 + mov edi,ecx + ror ecx,2 + xor esi,ebx + shr ebx,3 + ror esi,7 + xor ecx,edi + xor ebx,esi + ror ecx,17 + add ebx,DWORD PTR 84[esp] + shr edi,10 + add ebx,DWORD PTR 56[esp] + mov esi,edx + xor edi,ecx + mov ecx,DWORD PTR [esp] + ror edx,14 + add ebx,edi + mov edi,DWORD PTR 4[esp] + xor edx,esi + mov DWORD PTR 84[esp],ebx + xor ecx,edi + ror edx,5 + and ecx,esi + mov DWORD PTR 28[esp],esi + xor edx,esi + add ebx,DWORD PTR 8[esp] + xor edi,ecx + ror edx,6 + mov esi,ebp + add ebx,edi + ror esi,9 + mov ecx,ebp + mov edi,DWORD PTR 16[esp] + xor esi,ebp + mov DWORD PTR 12[esp],ebp + xor ebp,edi + ror esi,11 + and eax,ebp + lea edx,DWORD PTR 3584528711[edx*1+ebx] + xor esi,ecx + xor eax,edi + mov ecx,DWORD PTR 92[esp] + ror esi,2 + add eax,edx + add edx,DWORD PTR 24[esp] + add eax,esi + mov esi,DWORD PTR 80[esp] + mov ebx,ecx + ror ecx,11 + mov edi,esi + ror esi,2 + xor ecx,ebx + shr ebx,3 + ror ecx,7 + xor esi,edi + xor ebx,ecx + ror esi,17 + add ebx,DWORD PTR 88[esp] + shr edi,10 + add ebx,DWORD PTR 60[esp] + mov ecx,edx + xor edi,esi + mov esi,DWORD PTR 28[esp] + ror edx,14 + add ebx,edi + mov edi,DWORD PTR [esp] + xor edx,ecx + mov DWORD PTR 88[esp],ebx + xor esi,edi + ror edx,5 + and esi,ecx + mov DWORD PTR 24[esp],ecx + xor edx,ecx + add ebx,DWORD PTR 4[esp] + xor edi,esi + ror edx,6 + mov ecx,eax + add ebx,edi + ror ecx,9 + mov esi,eax + mov edi,DWORD PTR 12[esp] + xor ecx,eax + mov DWORD PTR 8[esp],eax + xor eax,edi + ror ecx,11 + and ebp,eax + lea edx,DWORD PTR 113926993[edx*1+ebx] + xor ecx,esi + xor ebp,edi + mov esi,DWORD PTR 32[esp] + ror ecx,2 + add ebp,edx + add edx,DWORD PTR 20[esp] + add ebp,ecx + mov ecx,DWORD PTR 84[esp] + mov ebx,esi + ror esi,11 + mov edi,ecx + ror ecx,2 + xor esi,ebx + shr ebx,3 + ror esi,7 + xor ecx,edi + xor ebx,esi + ror ecx,17 + add ebx,DWORD PTR 92[esp] + shr edi,10 + add ebx,DWORD PTR 64[esp] + mov esi,edx + xor edi,ecx + mov ecx,DWORD PTR 24[esp] + ror edx,14 + add ebx,edi + mov edi,DWORD PTR 28[esp] + xor edx,esi + mov DWORD PTR 92[esp],ebx + xor ecx,edi + ror edx,5 + and ecx,esi + mov DWORD PTR 20[esp],esi + xor edx,esi + add ebx,DWORD PTR [esp] + xor edi,ecx + ror edx,6 + mov esi,ebp + add ebx,edi + ror esi,9 + mov ecx,ebp + mov edi,DWORD PTR 8[esp] + xor esi,ebp + mov DWORD PTR 4[esp],ebp + xor ebp,edi + ror esi,11 + and eax,ebp + lea edx,DWORD PTR 338241895[edx*1+ebx] + xor esi,ecx + xor eax,edi + mov ecx,DWORD PTR 36[esp] + ror esi,2 + add eax,edx + add edx,DWORD PTR 16[esp] + add eax,esi + mov esi,DWORD PTR 88[esp] + mov ebx,ecx + ror ecx,11 + mov edi,esi + ror esi,2 + xor ecx,ebx + shr ebx,3 + ror ecx,7 + xor esi,edi + xor ebx,ecx + ror esi,17 + add ebx,DWORD PTR 32[esp] + shr edi,10 + add ebx,DWORD PTR 68[esp] + mov ecx,edx + xor edi,esi + mov esi,DWORD PTR 20[esp] + ror edx,14 + add ebx,edi + mov edi,DWORD PTR 24[esp] + xor edx,ecx + mov DWORD PTR 32[esp],ebx + xor esi,edi + ror edx,5 + and esi,ecx + mov DWORD PTR 16[esp],ecx + xor edx,ecx + add ebx,DWORD PTR 28[esp] + xor edi,esi + ror edx,6 + mov ecx,eax + add ebx,edi + ror ecx,9 + mov esi,eax + mov edi,DWORD PTR 4[esp] + xor ecx,eax + mov DWORD PTR [esp],eax + xor eax,edi + ror ecx,11 + and ebp,eax + lea edx,DWORD PTR 666307205[edx*1+ebx] + xor ecx,esi + xor ebp,edi + mov esi,DWORD PTR 40[esp] + ror ecx,2 + add ebp,edx + add edx,DWORD PTR 12[esp] + add ebp,ecx + mov ecx,DWORD PTR 92[esp] + mov ebx,esi + ror esi,11 + mov edi,ecx + ror ecx,2 + xor esi,ebx + shr ebx,3 + ror esi,7 + xor ecx,edi + xor ebx,esi + ror ecx,17 + add ebx,DWORD PTR 36[esp] + shr edi,10 + add ebx,DWORD PTR 72[esp] + mov esi,edx + xor edi,ecx + mov ecx,DWORD PTR 16[esp] + ror edx,14 + add ebx,edi + mov edi,DWORD PTR 20[esp] + xor edx,esi + mov DWORD PTR 36[esp],ebx + xor ecx,edi + ror edx,5 + and ecx,esi + mov DWORD PTR 12[esp],esi + xor edx,esi + add ebx,DWORD PTR 24[esp] + xor edi,ecx + ror edx,6 + mov esi,ebp + add ebx,edi + ror esi,9 + mov ecx,ebp + mov edi,DWORD PTR [esp] + xor esi,ebp + mov DWORD PTR 28[esp],ebp + xor ebp,edi + ror esi,11 + and eax,ebp + lea edx,DWORD PTR 773529912[edx*1+ebx] + xor esi,ecx + xor eax,edi + mov ecx,DWORD PTR 44[esp] + ror esi,2 + add eax,edx + add edx,DWORD PTR 8[esp] + add eax,esi + mov esi,DWORD PTR 32[esp] + mov ebx,ecx + ror ecx,11 + mov edi,esi + ror esi,2 + xor ecx,ebx + shr ebx,3 + ror ecx,7 + xor esi,edi + xor ebx,ecx + ror esi,17 + add ebx,DWORD PTR 40[esp] + shr edi,10 + add ebx,DWORD PTR 76[esp] + mov ecx,edx + xor edi,esi + mov esi,DWORD PTR 12[esp] + ror edx,14 + add ebx,edi + mov edi,DWORD PTR 16[esp] + xor edx,ecx + mov DWORD PTR 40[esp],ebx + xor esi,edi + ror edx,5 + and esi,ecx + mov DWORD PTR 8[esp],ecx + xor edx,ecx + add ebx,DWORD PTR 20[esp] + xor edi,esi + ror edx,6 + mov ecx,eax + add ebx,edi + ror ecx,9 + mov esi,eax + mov edi,DWORD PTR 28[esp] + xor ecx,eax + mov DWORD PTR 24[esp],eax + xor eax,edi + ror ecx,11 + and ebp,eax + lea edx,DWORD PTR 1294757372[edx*1+ebx] + xor ecx,esi + xor ebp,edi + mov esi,DWORD PTR 48[esp] + ror ecx,2 + add ebp,edx + add edx,DWORD PTR 4[esp] + add ebp,ecx + mov ecx,DWORD PTR 36[esp] + mov ebx,esi + ror esi,11 + mov edi,ecx + ror ecx,2 + xor esi,ebx + shr ebx,3 + ror esi,7 + xor ecx,edi + xor ebx,esi + ror ecx,17 + add ebx,DWORD PTR 44[esp] + shr edi,10 + add ebx,DWORD PTR 80[esp] + mov esi,edx + xor edi,ecx + mov ecx,DWORD PTR 8[esp] + ror edx,14 + add ebx,edi + mov edi,DWORD PTR 12[esp] + xor edx,esi + mov DWORD PTR 44[esp],ebx + xor ecx,edi + ror edx,5 + and ecx,esi + mov DWORD PTR 4[esp],esi + xor edx,esi + add ebx,DWORD PTR 16[esp] + xor edi,ecx + ror edx,6 + mov esi,ebp + add ebx,edi + ror esi,9 + mov ecx,ebp + mov edi,DWORD PTR 24[esp] + xor esi,ebp + mov DWORD PTR 20[esp],ebp + xor ebp,edi + ror esi,11 + and eax,ebp + lea edx,DWORD PTR 1396182291[edx*1+ebx] + xor esi,ecx + xor eax,edi + mov ecx,DWORD PTR 52[esp] + ror esi,2 + add eax,edx + add edx,DWORD PTR [esp] + add eax,esi + mov esi,DWORD PTR 40[esp] + mov ebx,ecx + ror ecx,11 + mov edi,esi + ror esi,2 + xor ecx,ebx + shr ebx,3 + ror ecx,7 + xor esi,edi + xor ebx,ecx + ror esi,17 + add ebx,DWORD PTR 48[esp] + shr edi,10 + add ebx,DWORD PTR 84[esp] + mov ecx,edx + xor edi,esi + mov esi,DWORD PTR 4[esp] + ror edx,14 + add ebx,edi + mov edi,DWORD PTR 8[esp] + xor edx,ecx + mov DWORD PTR 48[esp],ebx + xor esi,edi + ror edx,5 + and esi,ecx + mov DWORD PTR [esp],ecx + xor edx,ecx + add ebx,DWORD PTR 12[esp] + xor edi,esi + ror edx,6 + mov ecx,eax + add ebx,edi + ror ecx,9 + mov esi,eax + mov edi,DWORD PTR 20[esp] + xor ecx,eax + mov DWORD PTR 16[esp],eax + xor eax,edi + ror ecx,11 + and ebp,eax + lea edx,DWORD PTR 1695183700[edx*1+ebx] + xor ecx,esi + xor ebp,edi + mov esi,DWORD PTR 56[esp] + ror ecx,2 + add ebp,edx + add edx,DWORD PTR 28[esp] + add ebp,ecx + mov ecx,DWORD PTR 44[esp] + mov ebx,esi + ror esi,11 + mov edi,ecx + ror ecx,2 + xor esi,ebx + shr ebx,3 + ror esi,7 + xor ecx,edi + xor ebx,esi + ror ecx,17 + add ebx,DWORD PTR 52[esp] + shr edi,10 + add ebx,DWORD PTR 88[esp] + mov esi,edx + xor edi,ecx + mov ecx,DWORD PTR [esp] + ror edx,14 + add ebx,edi + mov edi,DWORD PTR 4[esp] + xor edx,esi + mov DWORD PTR 52[esp],ebx + xor ecx,edi + ror edx,5 + and ecx,esi + mov DWORD PTR 28[esp],esi + xor edx,esi + add ebx,DWORD PTR 8[esp] + xor edi,ecx + ror edx,6 + mov esi,ebp + add ebx,edi + ror esi,9 + mov ecx,ebp + mov edi,DWORD PTR 16[esp] + xor esi,ebp + mov DWORD PTR 12[esp],ebp + xor ebp,edi + ror esi,11 + and eax,ebp + lea edx,DWORD PTR 1986661051[edx*1+ebx] + xor esi,ecx + xor eax,edi + mov ecx,DWORD PTR 60[esp] + ror esi,2 + add eax,edx + add edx,DWORD PTR 24[esp] + add eax,esi + mov esi,DWORD PTR 48[esp] + mov ebx,ecx + ror ecx,11 + mov edi,esi + ror esi,2 + xor ecx,ebx + shr ebx,3 + ror ecx,7 + xor esi,edi + xor ebx,ecx + ror esi,17 + add ebx,DWORD PTR 56[esp] + shr edi,10 + add ebx,DWORD PTR 92[esp] + mov ecx,edx + xor edi,esi + mov esi,DWORD PTR 28[esp] + ror edx,14 + add ebx,edi + mov edi,DWORD PTR [esp] + xor edx,ecx + mov DWORD PTR 56[esp],ebx + xor esi,edi + ror edx,5 + and esi,ecx + mov DWORD PTR 24[esp],ecx + xor edx,ecx + add ebx,DWORD PTR 4[esp] + xor edi,esi + ror edx,6 + mov ecx,eax + add ebx,edi + ror ecx,9 + mov esi,eax + mov edi,DWORD PTR 12[esp] + xor ecx,eax + mov DWORD PTR 8[esp],eax + xor eax,edi + ror ecx,11 + and ebp,eax + lea edx,DWORD PTR 2177026350[edx*1+ebx] + xor ecx,esi + xor ebp,edi + mov esi,DWORD PTR 64[esp] + ror ecx,2 + add ebp,edx + add edx,DWORD PTR 20[esp] + add ebp,ecx + mov ecx,DWORD PTR 52[esp] + mov ebx,esi + ror esi,11 + mov edi,ecx + ror ecx,2 + xor esi,ebx + shr ebx,3 + ror esi,7 + xor ecx,edi + xor ebx,esi + ror ecx,17 + add ebx,DWORD PTR 60[esp] + shr edi,10 + add ebx,DWORD PTR 32[esp] + mov esi,edx + xor edi,ecx + mov ecx,DWORD PTR 24[esp] + ror edx,14 + add ebx,edi + mov edi,DWORD PTR 28[esp] + xor edx,esi + mov DWORD PTR 60[esp],ebx + xor ecx,edi + ror edx,5 + and ecx,esi + mov DWORD PTR 20[esp],esi + xor edx,esi + add ebx,DWORD PTR [esp] + xor edi,ecx + ror edx,6 + mov esi,ebp + add ebx,edi + ror esi,9 + mov ecx,ebp + mov edi,DWORD PTR 8[esp] + xor esi,ebp + mov DWORD PTR 4[esp],ebp + xor ebp,edi + ror esi,11 + and eax,ebp + lea edx,DWORD PTR 2456956037[edx*1+ebx] + xor esi,ecx + xor eax,edi + mov ecx,DWORD PTR 68[esp] + ror esi,2 + add eax,edx + add edx,DWORD PTR 16[esp] + add eax,esi + mov esi,DWORD PTR 56[esp] + mov ebx,ecx + ror ecx,11 + mov edi,esi + ror esi,2 + xor ecx,ebx + shr ebx,3 + ror ecx,7 + xor esi,edi + xor ebx,ecx + ror esi,17 + add ebx,DWORD PTR 64[esp] + shr edi,10 + add ebx,DWORD PTR 36[esp] + mov ecx,edx + xor edi,esi + mov esi,DWORD PTR 20[esp] + ror edx,14 + add ebx,edi + mov edi,DWORD PTR 24[esp] + xor edx,ecx + mov DWORD PTR 64[esp],ebx + xor esi,edi + ror edx,5 + and esi,ecx + mov DWORD PTR 16[esp],ecx + xor edx,ecx + add ebx,DWORD PTR 28[esp] + xor edi,esi + ror edx,6 + mov ecx,eax + add ebx,edi + ror ecx,9 + mov esi,eax + mov edi,DWORD PTR 4[esp] + xor ecx,eax + mov DWORD PTR [esp],eax + xor eax,edi + ror ecx,11 + and ebp,eax + lea edx,DWORD PTR 2730485921[edx*1+ebx] + xor ecx,esi + xor ebp,edi + mov esi,DWORD PTR 72[esp] + ror ecx,2 + add ebp,edx + add edx,DWORD PTR 12[esp] + add ebp,ecx + mov ecx,DWORD PTR 60[esp] + mov ebx,esi + ror esi,11 + mov edi,ecx + ror ecx,2 + xor esi,ebx + shr ebx,3 + ror esi,7 + xor ecx,edi + xor ebx,esi + ror ecx,17 + add ebx,DWORD PTR 68[esp] + shr edi,10 + add ebx,DWORD PTR 40[esp] + mov esi,edx + xor edi,ecx + mov ecx,DWORD PTR 16[esp] + ror edx,14 + add ebx,edi + mov edi,DWORD PTR 20[esp] + xor edx,esi + mov DWORD PTR 68[esp],ebx + xor ecx,edi + ror edx,5 + and ecx,esi + mov DWORD PTR 12[esp],esi + xor edx,esi + add ebx,DWORD PTR 24[esp] + xor edi,ecx + ror edx,6 + mov esi,ebp + add ebx,edi + ror esi,9 + mov ecx,ebp + mov edi,DWORD PTR [esp] + xor esi,ebp + mov DWORD PTR 28[esp],ebp + xor ebp,edi + ror esi,11 + and eax,ebp + lea edx,DWORD PTR 2820302411[edx*1+ebx] + xor esi,ecx + xor eax,edi + mov ecx,DWORD PTR 76[esp] + ror esi,2 + add eax,edx + add edx,DWORD PTR 8[esp] + add eax,esi + mov esi,DWORD PTR 64[esp] + mov ebx,ecx + ror ecx,11 + mov edi,esi + ror esi,2 + xor ecx,ebx + shr ebx,3 + ror ecx,7 + xor esi,edi + xor ebx,ecx + ror esi,17 + add ebx,DWORD PTR 72[esp] + shr edi,10 + add ebx,DWORD PTR 44[esp] + mov ecx,edx + xor edi,esi + mov esi,DWORD PTR 12[esp] + ror edx,14 + add ebx,edi + mov edi,DWORD PTR 16[esp] + xor edx,ecx + mov DWORD PTR 72[esp],ebx + xor esi,edi + ror edx,5 + and esi,ecx + mov DWORD PTR 8[esp],ecx + xor edx,ecx + add ebx,DWORD PTR 20[esp] + xor edi,esi + ror edx,6 + mov ecx,eax + add ebx,edi + ror ecx,9 + mov esi,eax + mov edi,DWORD PTR 28[esp] + xor ecx,eax + mov DWORD PTR 24[esp],eax + xor eax,edi + ror ecx,11 + and ebp,eax + lea edx,DWORD PTR 3259730800[edx*1+ebx] + xor ecx,esi + xor ebp,edi + mov esi,DWORD PTR 80[esp] + ror ecx,2 + add ebp,edx + add edx,DWORD PTR 4[esp] + add ebp,ecx + mov ecx,DWORD PTR 68[esp] + mov ebx,esi + ror esi,11 + mov edi,ecx + ror ecx,2 + xor esi,ebx + shr ebx,3 + ror esi,7 + xor ecx,edi + xor ebx,esi + ror ecx,17 + add ebx,DWORD PTR 76[esp] + shr edi,10 + add ebx,DWORD PTR 48[esp] + mov esi,edx + xor edi,ecx + mov ecx,DWORD PTR 8[esp] + ror edx,14 + add ebx,edi + mov edi,DWORD PTR 12[esp] + xor edx,esi + mov DWORD PTR 76[esp],ebx + xor ecx,edi + ror edx,5 + and ecx,esi + mov DWORD PTR 4[esp],esi + xor edx,esi + add ebx,DWORD PTR 16[esp] + xor edi,ecx + ror edx,6 + mov esi,ebp + add ebx,edi + ror esi,9 + mov ecx,ebp + mov edi,DWORD PTR 24[esp] + xor esi,ebp + mov DWORD PTR 20[esp],ebp + xor ebp,edi + ror esi,11 + and eax,ebp + lea edx,DWORD PTR 3345764771[edx*1+ebx] + xor esi,ecx + xor eax,edi + mov ecx,DWORD PTR 84[esp] + ror esi,2 + add eax,edx + add edx,DWORD PTR [esp] + add eax,esi + mov esi,DWORD PTR 72[esp] + mov ebx,ecx + ror ecx,11 + mov edi,esi + ror esi,2 + xor ecx,ebx + shr ebx,3 + ror ecx,7 + xor esi,edi + xor ebx,ecx + ror esi,17 + add ebx,DWORD PTR 80[esp] + shr edi,10 + add ebx,DWORD PTR 52[esp] + mov ecx,edx + xor edi,esi + mov esi,DWORD PTR 4[esp] + ror edx,14 + add ebx,edi + mov edi,DWORD PTR 8[esp] + xor edx,ecx + mov DWORD PTR 80[esp],ebx + xor esi,edi + ror edx,5 + and esi,ecx + mov DWORD PTR [esp],ecx + xor edx,ecx + add ebx,DWORD PTR 12[esp] + xor edi,esi + ror edx,6 + mov ecx,eax + add ebx,edi + ror ecx,9 + mov esi,eax + mov edi,DWORD PTR 20[esp] + xor ecx,eax + mov DWORD PTR 16[esp],eax + xor eax,edi + ror ecx,11 + and ebp,eax + lea edx,DWORD PTR 3516065817[edx*1+ebx] + xor ecx,esi + xor ebp,edi + mov esi,DWORD PTR 88[esp] + ror ecx,2 + add ebp,edx + add edx,DWORD PTR 28[esp] + add ebp,ecx + mov ecx,DWORD PTR 76[esp] + mov ebx,esi + ror esi,11 + mov edi,ecx + ror ecx,2 + xor esi,ebx + shr ebx,3 + ror esi,7 + xor ecx,edi + xor ebx,esi + ror ecx,17 + add ebx,DWORD PTR 84[esp] + shr edi,10 + add ebx,DWORD PTR 56[esp] + mov esi,edx + xor edi,ecx + mov ecx,DWORD PTR [esp] + ror edx,14 + add ebx,edi + mov edi,DWORD PTR 4[esp] + xor edx,esi + mov DWORD PTR 84[esp],ebx + xor ecx,edi + ror edx,5 + and ecx,esi + mov DWORD PTR 28[esp],esi + xor edx,esi + add ebx,DWORD PTR 8[esp] + xor edi,ecx + ror edx,6 + mov esi,ebp + add ebx,edi + ror esi,9 + mov ecx,ebp + mov edi,DWORD PTR 16[esp] + xor esi,ebp + mov DWORD PTR 12[esp],ebp + xor ebp,edi + ror esi,11 + and eax,ebp + lea edx,DWORD PTR 3600352804[edx*1+ebx] + xor esi,ecx + xor eax,edi + mov ecx,DWORD PTR 92[esp] + ror esi,2 + add eax,edx + add edx,DWORD PTR 24[esp] + add eax,esi + mov esi,DWORD PTR 80[esp] + mov ebx,ecx + ror ecx,11 + mov edi,esi + ror esi,2 + xor ecx,ebx + shr ebx,3 + ror ecx,7 + xor esi,edi + xor ebx,ecx + ror esi,17 + add ebx,DWORD PTR 88[esp] + shr edi,10 + add ebx,DWORD PTR 60[esp] + mov ecx,edx + xor edi,esi + mov esi,DWORD PTR 28[esp] + ror edx,14 + add ebx,edi + mov edi,DWORD PTR [esp] + xor edx,ecx + mov DWORD PTR 88[esp],ebx + xor esi,edi + ror edx,5 + and esi,ecx + mov DWORD PTR 24[esp],ecx + xor edx,ecx + add ebx,DWORD PTR 4[esp] + xor edi,esi + ror edx,6 + mov ecx,eax + add ebx,edi + ror ecx,9 + mov esi,eax + mov edi,DWORD PTR 12[esp] + xor ecx,eax + mov DWORD PTR 8[esp],eax + xor eax,edi + ror ecx,11 + and ebp,eax + lea edx,DWORD PTR 4094571909[edx*1+ebx] + xor ecx,esi + xor ebp,edi + mov esi,DWORD PTR 32[esp] + ror ecx,2 + add ebp,edx + add edx,DWORD PTR 20[esp] + add ebp,ecx + mov ecx,DWORD PTR 84[esp] + mov ebx,esi + ror esi,11 + mov edi,ecx + ror ecx,2 + xor esi,ebx + shr ebx,3 + ror esi,7 + xor ecx,edi + xor ebx,esi + ror ecx,17 + add ebx,DWORD PTR 92[esp] + shr edi,10 + add ebx,DWORD PTR 64[esp] + mov esi,edx + xor edi,ecx + mov ecx,DWORD PTR 24[esp] + ror edx,14 + add ebx,edi + mov edi,DWORD PTR 28[esp] + xor edx,esi + mov DWORD PTR 92[esp],ebx + xor ecx,edi + ror edx,5 + and ecx,esi + mov DWORD PTR 20[esp],esi + xor edx,esi + add ebx,DWORD PTR [esp] + xor edi,ecx + ror edx,6 + mov esi,ebp + add ebx,edi + ror esi,9 + mov ecx,ebp + mov edi,DWORD PTR 8[esp] + xor esi,ebp + mov DWORD PTR 4[esp],ebp + xor ebp,edi + ror esi,11 + and eax,ebp + lea edx,DWORD PTR 275423344[edx*1+ebx] + xor esi,ecx + xor eax,edi + mov ecx,DWORD PTR 36[esp] + ror esi,2 + add eax,edx + add edx,DWORD PTR 16[esp] + add eax,esi + mov esi,DWORD PTR 88[esp] + mov ebx,ecx + ror ecx,11 + mov edi,esi + ror esi,2 + xor ecx,ebx + shr ebx,3 + ror ecx,7 + xor esi,edi + xor ebx,ecx + ror esi,17 + add ebx,DWORD PTR 32[esp] + shr edi,10 + add ebx,DWORD PTR 68[esp] + mov ecx,edx + xor edi,esi + mov esi,DWORD PTR 20[esp] + ror edx,14 + add ebx,edi + mov edi,DWORD PTR 24[esp] + xor edx,ecx + mov DWORD PTR 32[esp],ebx + xor esi,edi + ror edx,5 + and esi,ecx + mov DWORD PTR 16[esp],ecx + xor edx,ecx + add ebx,DWORD PTR 28[esp] + xor edi,esi + ror edx,6 + mov ecx,eax + add ebx,edi + ror ecx,9 + mov esi,eax + mov edi,DWORD PTR 4[esp] + xor ecx,eax + mov DWORD PTR [esp],eax + xor eax,edi + ror ecx,11 + and ebp,eax + lea edx,DWORD PTR 430227734[edx*1+ebx] + xor ecx,esi + xor ebp,edi + mov esi,DWORD PTR 40[esp] + ror ecx,2 + add ebp,edx + add edx,DWORD PTR 12[esp] + add ebp,ecx + mov ecx,DWORD PTR 92[esp] + mov ebx,esi + ror esi,11 + mov edi,ecx + ror ecx,2 + xor esi,ebx + shr ebx,3 + ror esi,7 + xor ecx,edi + xor ebx,esi + ror ecx,17 + add ebx,DWORD PTR 36[esp] + shr edi,10 + add ebx,DWORD PTR 72[esp] + mov esi,edx + xor edi,ecx + mov ecx,DWORD PTR 16[esp] + ror edx,14 + add ebx,edi + mov edi,DWORD PTR 20[esp] + xor edx,esi + mov DWORD PTR 36[esp],ebx + xor ecx,edi + ror edx,5 + and ecx,esi + mov DWORD PTR 12[esp],esi + xor edx,esi + add ebx,DWORD PTR 24[esp] + xor edi,ecx + ror edx,6 + mov esi,ebp + add ebx,edi + ror esi,9 + mov ecx,ebp + mov edi,DWORD PTR [esp] + xor esi,ebp + mov DWORD PTR 28[esp],ebp + xor ebp,edi + ror esi,11 + and eax,ebp + lea edx,DWORD PTR 506948616[edx*1+ebx] + xor esi,ecx + xor eax,edi + mov ecx,DWORD PTR 44[esp] + ror esi,2 + add eax,edx + add edx,DWORD PTR 8[esp] + add eax,esi + mov esi,DWORD PTR 32[esp] + mov ebx,ecx + ror ecx,11 + mov edi,esi + ror esi,2 + xor ecx,ebx + shr ebx,3 + ror ecx,7 + xor esi,edi + xor ebx,ecx + ror esi,17 + add ebx,DWORD PTR 40[esp] + shr edi,10 + add ebx,DWORD PTR 76[esp] + mov ecx,edx + xor edi,esi + mov esi,DWORD PTR 12[esp] + ror edx,14 + add ebx,edi + mov edi,DWORD PTR 16[esp] + xor edx,ecx + mov DWORD PTR 40[esp],ebx + xor esi,edi + ror edx,5 + and esi,ecx + mov DWORD PTR 8[esp],ecx + xor edx,ecx + add ebx,DWORD PTR 20[esp] + xor edi,esi + ror edx,6 + mov ecx,eax + add ebx,edi + ror ecx,9 + mov esi,eax + mov edi,DWORD PTR 28[esp] + xor ecx,eax + mov DWORD PTR 24[esp],eax + xor eax,edi + ror ecx,11 + and ebp,eax + lea edx,DWORD PTR 659060556[edx*1+ebx] + xor ecx,esi + xor ebp,edi + mov esi,DWORD PTR 48[esp] + ror ecx,2 + add ebp,edx + add edx,DWORD PTR 4[esp] + add ebp,ecx + mov ecx,DWORD PTR 36[esp] + mov ebx,esi + ror esi,11 + mov edi,ecx + ror ecx,2 + xor esi,ebx + shr ebx,3 + ror esi,7 + xor ecx,edi + xor ebx,esi + ror ecx,17 + add ebx,DWORD PTR 44[esp] + shr edi,10 + add ebx,DWORD PTR 80[esp] + mov esi,edx + xor edi,ecx + mov ecx,DWORD PTR 8[esp] + ror edx,14 + add ebx,edi + mov edi,DWORD PTR 12[esp] + xor edx,esi + mov DWORD PTR 44[esp],ebx + xor ecx,edi + ror edx,5 + and ecx,esi + mov DWORD PTR 4[esp],esi + xor edx,esi + add ebx,DWORD PTR 16[esp] + xor edi,ecx + ror edx,6 + mov esi,ebp + add ebx,edi + ror esi,9 + mov ecx,ebp + mov edi,DWORD PTR 24[esp] + xor esi,ebp + mov DWORD PTR 20[esp],ebp + xor ebp,edi + ror esi,11 + and eax,ebp + lea edx,DWORD PTR 883997877[edx*1+ebx] + xor esi,ecx + xor eax,edi + mov ecx,DWORD PTR 52[esp] + ror esi,2 + add eax,edx + add edx,DWORD PTR [esp] + add eax,esi + mov esi,DWORD PTR 40[esp] + mov ebx,ecx + ror ecx,11 + mov edi,esi + ror esi,2 + xor ecx,ebx + shr ebx,3 + ror ecx,7 + xor esi,edi + xor ebx,ecx + ror esi,17 + add ebx,DWORD PTR 48[esp] + shr edi,10 + add ebx,DWORD PTR 84[esp] + mov ecx,edx + xor edi,esi + mov esi,DWORD PTR 4[esp] + ror edx,14 + add ebx,edi + mov edi,DWORD PTR 8[esp] + xor edx,ecx + mov DWORD PTR 48[esp],ebx + xor esi,edi + ror edx,5 + and esi,ecx + mov DWORD PTR [esp],ecx + xor edx,ecx + add ebx,DWORD PTR 12[esp] + xor edi,esi + ror edx,6 + mov ecx,eax + add ebx,edi + ror ecx,9 + mov esi,eax + mov edi,DWORD PTR 20[esp] + xor ecx,eax + mov DWORD PTR 16[esp],eax + xor eax,edi + ror ecx,11 + and ebp,eax + lea edx,DWORD PTR 958139571[edx*1+ebx] + xor ecx,esi + xor ebp,edi + mov esi,DWORD PTR 56[esp] + ror ecx,2 + add ebp,edx + add edx,DWORD PTR 28[esp] + add ebp,ecx + mov ecx,DWORD PTR 44[esp] + mov ebx,esi + ror esi,11 + mov edi,ecx + ror ecx,2 + xor esi,ebx + shr ebx,3 + ror esi,7 + xor ecx,edi + xor ebx,esi + ror ecx,17 + add ebx,DWORD PTR 52[esp] + shr edi,10 + add ebx,DWORD PTR 88[esp] + mov esi,edx + xor edi,ecx + mov ecx,DWORD PTR [esp] + ror edx,14 + add ebx,edi + mov edi,DWORD PTR 4[esp] + xor edx,esi + mov DWORD PTR 52[esp],ebx + xor ecx,edi + ror edx,5 + and ecx,esi + mov DWORD PTR 28[esp],esi + xor edx,esi + add ebx,DWORD PTR 8[esp] + xor edi,ecx + ror edx,6 + mov esi,ebp + add ebx,edi + ror esi,9 + mov ecx,ebp + mov edi,DWORD PTR 16[esp] + xor esi,ebp + mov DWORD PTR 12[esp],ebp + xor ebp,edi + ror esi,11 + and eax,ebp + lea edx,DWORD PTR 1322822218[edx*1+ebx] + xor esi,ecx + xor eax,edi + mov ecx,DWORD PTR 60[esp] + ror esi,2 + add eax,edx + add edx,DWORD PTR 24[esp] + add eax,esi + mov esi,DWORD PTR 48[esp] + mov ebx,ecx + ror ecx,11 + mov edi,esi + ror esi,2 + xor ecx,ebx + shr ebx,3 + ror ecx,7 + xor esi,edi + xor ebx,ecx + ror esi,17 + add ebx,DWORD PTR 56[esp] + shr edi,10 + add ebx,DWORD PTR 92[esp] + mov ecx,edx + xor edi,esi + mov esi,DWORD PTR 28[esp] + ror edx,14 + add ebx,edi + mov edi,DWORD PTR [esp] + xor edx,ecx + mov DWORD PTR 56[esp],ebx + xor esi,edi + ror edx,5 + and esi,ecx + mov DWORD PTR 24[esp],ecx + xor edx,ecx + add ebx,DWORD PTR 4[esp] + xor edi,esi + ror edx,6 + mov ecx,eax + add ebx,edi + ror ecx,9 + mov esi,eax + mov edi,DWORD PTR 12[esp] + xor ecx,eax + mov DWORD PTR 8[esp],eax + xor eax,edi + ror ecx,11 + and ebp,eax + lea edx,DWORD PTR 1537002063[edx*1+ebx] + xor ecx,esi + xor ebp,edi + mov esi,DWORD PTR 64[esp] + ror ecx,2 + add ebp,edx + add edx,DWORD PTR 20[esp] + add ebp,ecx + mov ecx,DWORD PTR 52[esp] + mov ebx,esi + ror esi,11 + mov edi,ecx + ror ecx,2 + xor esi,ebx + shr ebx,3 + ror esi,7 + xor ecx,edi + xor ebx,esi + ror ecx,17 + add ebx,DWORD PTR 60[esp] + shr edi,10 + add ebx,DWORD PTR 32[esp] + mov esi,edx + xor edi,ecx + mov ecx,DWORD PTR 24[esp] + ror edx,14 + add ebx,edi + mov edi,DWORD PTR 28[esp] + xor edx,esi + mov DWORD PTR 60[esp],ebx + xor ecx,edi + ror edx,5 + and ecx,esi + mov DWORD PTR 20[esp],esi + xor edx,esi + add ebx,DWORD PTR [esp] + xor edi,ecx + ror edx,6 + mov esi,ebp + add ebx,edi + ror esi,9 + mov ecx,ebp + mov edi,DWORD PTR 8[esp] + xor esi,ebp + mov DWORD PTR 4[esp],ebp + xor ebp,edi + ror esi,11 + and eax,ebp + lea edx,DWORD PTR 1747873779[edx*1+ebx] + xor esi,ecx + xor eax,edi + mov ecx,DWORD PTR 68[esp] + ror esi,2 + add eax,edx + add edx,DWORD PTR 16[esp] + add eax,esi + mov esi,DWORD PTR 56[esp] + mov ebx,ecx + ror ecx,11 + mov edi,esi + ror esi,2 + xor ecx,ebx + shr ebx,3 + ror ecx,7 + xor esi,edi + xor ebx,ecx + ror esi,17 + add ebx,DWORD PTR 64[esp] + shr edi,10 + add ebx,DWORD PTR 36[esp] + mov ecx,edx + xor edi,esi + mov esi,DWORD PTR 20[esp] + ror edx,14 + add ebx,edi + mov edi,DWORD PTR 24[esp] + xor edx,ecx + mov DWORD PTR 64[esp],ebx + xor esi,edi + ror edx,5 + and esi,ecx + mov DWORD PTR 16[esp],ecx + xor edx,ecx + add ebx,DWORD PTR 28[esp] + xor edi,esi + ror edx,6 + mov ecx,eax + add ebx,edi + ror ecx,9 + mov esi,eax + mov edi,DWORD PTR 4[esp] + xor ecx,eax + mov DWORD PTR [esp],eax + xor eax,edi + ror ecx,11 + and ebp,eax + lea edx,DWORD PTR 1955562222[edx*1+ebx] + xor ecx,esi + xor ebp,edi + mov esi,DWORD PTR 72[esp] + ror ecx,2 + add ebp,edx + add edx,DWORD PTR 12[esp] + add ebp,ecx + mov ecx,DWORD PTR 60[esp] + mov ebx,esi + ror esi,11 + mov edi,ecx + ror ecx,2 + xor esi,ebx + shr ebx,3 + ror esi,7 + xor ecx,edi + xor ebx,esi + ror ecx,17 + add ebx,DWORD PTR 68[esp] + shr edi,10 + add ebx,DWORD PTR 40[esp] + mov esi,edx + xor edi,ecx + mov ecx,DWORD PTR 16[esp] + ror edx,14 + add ebx,edi + mov edi,DWORD PTR 20[esp] + xor edx,esi + mov DWORD PTR 68[esp],ebx + xor ecx,edi + ror edx,5 + and ecx,esi + mov DWORD PTR 12[esp],esi + xor edx,esi + add ebx,DWORD PTR 24[esp] + xor edi,ecx + ror edx,6 + mov esi,ebp + add ebx,edi + ror esi,9 + mov ecx,ebp + mov edi,DWORD PTR [esp] + xor esi,ebp + mov DWORD PTR 28[esp],ebp + xor ebp,edi + ror esi,11 + and eax,ebp + lea edx,DWORD PTR 2024104815[edx*1+ebx] + xor esi,ecx + xor eax,edi + mov ecx,DWORD PTR 76[esp] + ror esi,2 + add eax,edx + add edx,DWORD PTR 8[esp] + add eax,esi + mov esi,DWORD PTR 64[esp] + mov ebx,ecx + ror ecx,11 + mov edi,esi + ror esi,2 + xor ecx,ebx + shr ebx,3 + ror ecx,7 + xor esi,edi + xor ebx,ecx + ror esi,17 + add ebx,DWORD PTR 72[esp] + shr edi,10 + add ebx,DWORD PTR 44[esp] + mov ecx,edx + xor edi,esi + mov esi,DWORD PTR 12[esp] + ror edx,14 + add ebx,edi + mov edi,DWORD PTR 16[esp] + xor edx,ecx + mov DWORD PTR 72[esp],ebx + xor esi,edi + ror edx,5 + and esi,ecx + mov DWORD PTR 8[esp],ecx + xor edx,ecx + add ebx,DWORD PTR 20[esp] + xor edi,esi + ror edx,6 + mov ecx,eax + add ebx,edi + ror ecx,9 + mov esi,eax + mov edi,DWORD PTR 28[esp] + xor ecx,eax + mov DWORD PTR 24[esp],eax + xor eax,edi + ror ecx,11 + and ebp,eax + lea edx,DWORD PTR 2227730452[edx*1+ebx] + xor ecx,esi + xor ebp,edi + mov esi,DWORD PTR 80[esp] + ror ecx,2 + add ebp,edx + add edx,DWORD PTR 4[esp] + add ebp,ecx + mov ecx,DWORD PTR 68[esp] + mov ebx,esi + ror esi,11 + mov edi,ecx + ror ecx,2 + xor esi,ebx + shr ebx,3 + ror esi,7 + xor ecx,edi + xor ebx,esi + ror ecx,17 + add ebx,DWORD PTR 76[esp] + shr edi,10 + add ebx,DWORD PTR 48[esp] + mov esi,edx + xor edi,ecx + mov ecx,DWORD PTR 8[esp] + ror edx,14 + add ebx,edi + mov edi,DWORD PTR 12[esp] + xor edx,esi + mov DWORD PTR 76[esp],ebx + xor ecx,edi + ror edx,5 + and ecx,esi + mov DWORD PTR 4[esp],esi + xor edx,esi + add ebx,DWORD PTR 16[esp] + xor edi,ecx + ror edx,6 + mov esi,ebp + add ebx,edi + ror esi,9 + mov ecx,ebp + mov edi,DWORD PTR 24[esp] + xor esi,ebp + mov DWORD PTR 20[esp],ebp + xor ebp,edi + ror esi,11 + and eax,ebp + lea edx,DWORD PTR 2361852424[edx*1+ebx] + xor esi,ecx + xor eax,edi + mov ecx,DWORD PTR 84[esp] + ror esi,2 + add eax,edx + add edx,DWORD PTR [esp] + add eax,esi + mov esi,DWORD PTR 72[esp] + mov ebx,ecx + ror ecx,11 + mov edi,esi + ror esi,2 + xor ecx,ebx + shr ebx,3 + ror ecx,7 + xor esi,edi + xor ebx,ecx + ror esi,17 + add ebx,DWORD PTR 80[esp] + shr edi,10 + add ebx,DWORD PTR 52[esp] + mov ecx,edx + xor edi,esi + mov esi,DWORD PTR 4[esp] + ror edx,14 + add ebx,edi + mov edi,DWORD PTR 8[esp] + xor edx,ecx + mov DWORD PTR 80[esp],ebx + xor esi,edi + ror edx,5 + and esi,ecx + mov DWORD PTR [esp],ecx + xor edx,ecx + add ebx,DWORD PTR 12[esp] + xor edi,esi + ror edx,6 + mov ecx,eax + add ebx,edi + ror ecx,9 + mov esi,eax + mov edi,DWORD PTR 20[esp] + xor ecx,eax + mov DWORD PTR 16[esp],eax + xor eax,edi + ror ecx,11 + and ebp,eax + lea edx,DWORD PTR 2428436474[edx*1+ebx] + xor ecx,esi + xor ebp,edi + mov esi,DWORD PTR 88[esp] + ror ecx,2 + add ebp,edx + add edx,DWORD PTR 28[esp] + add ebp,ecx + mov ecx,DWORD PTR 76[esp] + mov ebx,esi + ror esi,11 + mov edi,ecx + ror ecx,2 + xor esi,ebx + shr ebx,3 + ror esi,7 + xor ecx,edi + xor ebx,esi + ror ecx,17 + add ebx,DWORD PTR 84[esp] + shr edi,10 + add ebx,DWORD PTR 56[esp] + mov esi,edx + xor edi,ecx + mov ecx,DWORD PTR [esp] + ror edx,14 + add ebx,edi + mov edi,DWORD PTR 4[esp] + xor edx,esi + mov DWORD PTR 84[esp],ebx + xor ecx,edi + ror edx,5 + and ecx,esi + mov DWORD PTR 28[esp],esi + xor edx,esi + add ebx,DWORD PTR 8[esp] + xor edi,ecx + ror edx,6 + mov esi,ebp + add ebx,edi + ror esi,9 + mov ecx,ebp + mov edi,DWORD PTR 16[esp] + xor esi,ebp + mov DWORD PTR 12[esp],ebp + xor ebp,edi + ror esi,11 + and eax,ebp + lea edx,DWORD PTR 2756734187[edx*1+ebx] + xor esi,ecx + xor eax,edi + mov ecx,DWORD PTR 92[esp] + ror esi,2 + add eax,edx + add edx,DWORD PTR 24[esp] + add eax,esi + mov esi,DWORD PTR 80[esp] + mov ebx,ecx + ror ecx,11 + mov edi,esi + ror esi,2 + xor ecx,ebx + shr ebx,3 + ror ecx,7 + xor esi,edi + xor ebx,ecx + ror esi,17 + add ebx,DWORD PTR 88[esp] + shr edi,10 + add ebx,DWORD PTR 60[esp] + mov ecx,edx + xor edi,esi + mov esi,DWORD PTR 28[esp] + ror edx,14 + add ebx,edi + mov edi,DWORD PTR [esp] + xor edx,ecx + xor esi,edi + ror edx,5 + and esi,ecx + mov DWORD PTR 24[esp],ecx + xor edx,ecx + add ebx,DWORD PTR 4[esp] + xor edi,esi + ror edx,6 + mov ecx,eax + add ebx,edi + ror ecx,9 + mov esi,eax + mov edi,DWORD PTR 12[esp] + xor ecx,eax + mov DWORD PTR 8[esp],eax + xor eax,edi + ror ecx,11 + and ebp,eax + lea edx,DWORD PTR 3204031479[edx*1+ebx] + xor ecx,esi + xor ebp,edi + mov esi,DWORD PTR 32[esp] + ror ecx,2 + add ebp,edx + add edx,DWORD PTR 20[esp] + add ebp,ecx + mov ecx,DWORD PTR 84[esp] + mov ebx,esi + ror esi,11 + mov edi,ecx + ror ecx,2 + xor esi,ebx + shr ebx,3 + ror esi,7 + xor ecx,edi + xor ebx,esi + ror ecx,17 + add ebx,DWORD PTR 92[esp] + shr edi,10 + add ebx,DWORD PTR 64[esp] + mov esi,edx + xor edi,ecx + mov ecx,DWORD PTR 24[esp] + ror edx,14 + add ebx,edi + mov edi,DWORD PTR 28[esp] + xor edx,esi + xor ecx,edi + ror edx,5 + and ecx,esi + mov DWORD PTR 20[esp],esi + xor edx,esi + add ebx,DWORD PTR [esp] + xor edi,ecx + ror edx,6 + mov esi,ebp + add ebx,edi + ror esi,9 + mov ecx,ebp + mov edi,DWORD PTR 8[esp] + xor esi,ebp + mov DWORD PTR 4[esp],ebp + xor ebp,edi + ror esi,11 + and eax,ebp + lea edx,DWORD PTR 3329325298[edx*1+ebx] + xor esi,ecx + xor eax,edi + ror esi,2 + add eax,edx + add edx,DWORD PTR 16[esp] + add eax,esi + mov esi,DWORD PTR 96[esp] + xor ebp,edi + mov ecx,DWORD PTR 12[esp] + add eax,DWORD PTR [esi] + add ebp,DWORD PTR 4[esi] + add edi,DWORD PTR 8[esi] + add ecx,DWORD PTR 12[esi] + mov DWORD PTR [esi],eax + mov DWORD PTR 4[esi],ebp + mov DWORD PTR 8[esi],edi + mov DWORD PTR 12[esi],ecx + mov DWORD PTR 4[esp],ebp + xor ebp,edi + mov DWORD PTR 8[esp],edi + mov DWORD PTR 12[esp],ecx + mov edi,DWORD PTR 20[esp] + mov ebx,DWORD PTR 24[esp] + mov ecx,DWORD PTR 28[esp] + add edx,DWORD PTR 16[esi] + add edi,DWORD PTR 20[esi] + add ebx,DWORD PTR 24[esi] + add ecx,DWORD PTR 28[esi] + mov DWORD PTR 16[esi],edx + mov DWORD PTR 20[esi],edi + mov DWORD PTR 24[esi],ebx + mov DWORD PTR 28[esi],ecx + mov DWORD PTR 20[esp],edi + mov edi,DWORD PTR 100[esp] + mov DWORD PTR 24[esp],ebx + mov DWORD PTR 28[esp],ecx + cmp edi,DWORD PTR 104[esp] + jb $L010grand_loop + mov esp,DWORD PTR 108[esp] + pop edi + pop esi + pop ebx + pop ebp + ret +ALIGN 32 +$L004shaext: + sub esp,32 + movdqu xmm1,XMMWORD PTR [esi] + lea ebp,DWORD PTR 128[ebp] + movdqu xmm2,XMMWORD PTR 16[esi] + movdqa xmm7,XMMWORD PTR 128[ebp] + pshufd xmm0,xmm1,27 + pshufd xmm1,xmm1,177 + pshufd xmm2,xmm2,27 +DB 102,15,58,15,202,8 + punpcklqdq xmm2,xmm0 + jmp $L011loop_shaext +ALIGN 16 +$L011loop_shaext: + movdqu xmm3,XMMWORD PTR [edi] + movdqu xmm4,XMMWORD PTR 16[edi] + movdqu xmm5,XMMWORD PTR 32[edi] +DB 102,15,56,0,223 + movdqu xmm6,XMMWORD PTR 48[edi] + movdqa XMMWORD PTR 16[esp],xmm2 + movdqa xmm0,XMMWORD PTR [ebp-128] + paddd xmm0,xmm3 +DB 102,15,56,0,231 +DB 15,56,203,209 + pshufd xmm0,xmm0,14 + nop + movdqa XMMWORD PTR [esp],xmm1 +DB 15,56,203,202 + movdqa xmm0,XMMWORD PTR [ebp-112] + paddd xmm0,xmm4 +DB 102,15,56,0,239 +DB 15,56,203,209 + pshufd xmm0,xmm0,14 + lea edi,DWORD PTR 64[edi] +DB 15,56,204,220 +DB 15,56,203,202 + movdqa xmm0,XMMWORD PTR [ebp-96] + paddd xmm0,xmm5 +DB 102,15,56,0,247 +DB 15,56,203,209 + pshufd xmm0,xmm0,14 + movdqa xmm7,xmm6 +DB 102,15,58,15,253,4 + nop + paddd xmm3,xmm7 +DB 15,56,204,229 +DB 15,56,203,202 + movdqa xmm0,XMMWORD PTR [ebp-80] + paddd xmm0,xmm6 +DB 15,56,205,222 +DB 15,56,203,209 + pshufd xmm0,xmm0,14 + movdqa xmm7,xmm3 +DB 102,15,58,15,254,4 + nop + paddd xmm4,xmm7 +DB 15,56,204,238 +DB 15,56,203,202 + movdqa xmm0,XMMWORD PTR [ebp-64] + paddd xmm0,xmm3 +DB 15,56,205,227 +DB 15,56,203,209 + pshufd xmm0,xmm0,14 + movdqa xmm7,xmm4 +DB 102,15,58,15,251,4 + nop + paddd xmm5,xmm7 +DB 15,56,204,243 +DB 15,56,203,202 + movdqa xmm0,XMMWORD PTR [ebp-48] + paddd xmm0,xmm4 +DB 15,56,205,236 +DB 15,56,203,209 + pshufd xmm0,xmm0,14 + movdqa xmm7,xmm5 +DB 102,15,58,15,252,4 + nop + paddd xmm6,xmm7 +DB 15,56,204,220 +DB 15,56,203,202 + movdqa xmm0,XMMWORD PTR [ebp-32] + paddd xmm0,xmm5 +DB 15,56,205,245 +DB 15,56,203,209 + pshufd xmm0,xmm0,14 + movdqa xmm7,xmm6 +DB 102,15,58,15,253,4 + nop + paddd xmm3,xmm7 +DB 15,56,204,229 +DB 15,56,203,202 + movdqa xmm0,XMMWORD PTR [ebp-16] + paddd xmm0,xmm6 +DB 15,56,205,222 +DB 15,56,203,209 + pshufd xmm0,xmm0,14 + movdqa xmm7,xmm3 +DB 102,15,58,15,254,4 + nop + paddd xmm4,xmm7 +DB 15,56,204,238 +DB 15,56,203,202 + movdqa xmm0,XMMWORD PTR [ebp] + paddd xmm0,xmm3 +DB 15,56,205,227 +DB 15,56,203,209 + pshufd xmm0,xmm0,14 + movdqa xmm7,xmm4 +DB 102,15,58,15,251,4 + nop + paddd xmm5,xmm7 +DB 15,56,204,243 +DB 15,56,203,202 + movdqa xmm0,XMMWORD PTR 16[ebp] + paddd xmm0,xmm4 +DB 15,56,205,236 +DB 15,56,203,209 + pshufd xmm0,xmm0,14 + movdqa xmm7,xmm5 +DB 102,15,58,15,252,4 + nop + paddd xmm6,xmm7 +DB 15,56,204,220 +DB 15,56,203,202 + movdqa xmm0,XMMWORD PTR 32[ebp] + paddd xmm0,xmm5 +DB 15,56,205,245 +DB 15,56,203,209 + pshufd xmm0,xmm0,14 + movdqa xmm7,xmm6 +DB 102,15,58,15,253,4 + nop + paddd xmm3,xmm7 +DB 15,56,204,229 +DB 15,56,203,202 + movdqa xmm0,XMMWORD PTR 48[ebp] + paddd xmm0,xmm6 +DB 15,56,205,222 +DB 15,56,203,209 + pshufd xmm0,xmm0,14 + movdqa xmm7,xmm3 +DB 102,15,58,15,254,4 + nop + paddd xmm4,xmm7 +DB 15,56,204,238 +DB 15,56,203,202 + movdqa xmm0,XMMWORD PTR 64[ebp] + paddd xmm0,xmm3 +DB 15,56,205,227 +DB 15,56,203,209 + pshufd xmm0,xmm0,14 + movdqa xmm7,xmm4 +DB 102,15,58,15,251,4 + nop + paddd xmm5,xmm7 +DB 15,56,204,243 +DB 15,56,203,202 + movdqa xmm0,XMMWORD PTR 80[ebp] + paddd xmm0,xmm4 +DB 15,56,205,236 +DB 15,56,203,209 + pshufd xmm0,xmm0,14 + movdqa xmm7,xmm5 +DB 102,15,58,15,252,4 +DB 15,56,203,202 + paddd xmm6,xmm7 + movdqa xmm0,XMMWORD PTR 96[ebp] + paddd xmm0,xmm5 +DB 15,56,203,209 + pshufd xmm0,xmm0,14 +DB 15,56,205,245 + movdqa xmm7,XMMWORD PTR 128[ebp] +DB 15,56,203,202 + movdqa xmm0,XMMWORD PTR 112[ebp] + paddd xmm0,xmm6 + nop +DB 15,56,203,209 + pshufd xmm0,xmm0,14 + cmp eax,edi + nop +DB 15,56,203,202 + paddd xmm2,XMMWORD PTR 16[esp] + paddd xmm1,XMMWORD PTR [esp] + jnz $L011loop_shaext + pshufd xmm2,xmm2,177 + pshufd xmm7,xmm1,27 + pshufd xmm1,xmm1,177 + punpckhqdq xmm1,xmm2 +DB 102,15,58,15,215,8 + mov esp,DWORD PTR 44[esp] + movdqu XMMWORD PTR [esi],xmm1 + movdqu XMMWORD PTR 16[esi],xmm2 + pop edi + pop esi + pop ebx + pop ebp + ret +ALIGN 32 +$L006SSSE3: + lea esp,DWORD PTR [esp-96] + mov eax,DWORD PTR [esi] + mov ebx,DWORD PTR 4[esi] + mov ecx,DWORD PTR 8[esi] + mov edi,DWORD PTR 12[esi] + mov DWORD PTR 4[esp],ebx + xor ebx,ecx + mov DWORD PTR 8[esp],ecx + mov DWORD PTR 12[esp],edi + mov edx,DWORD PTR 16[esi] + mov edi,DWORD PTR 20[esi] + mov ecx,DWORD PTR 24[esi] + mov esi,DWORD PTR 28[esi] + mov DWORD PTR 20[esp],edi + mov edi,DWORD PTR 100[esp] + mov DWORD PTR 24[esp],ecx + mov DWORD PTR 28[esp],esi + movdqa xmm7,XMMWORD PTR 256[ebp] + jmp $L012grand_ssse3 +ALIGN 16 +$L012grand_ssse3: + movdqu xmm0,XMMWORD PTR [edi] + movdqu xmm1,XMMWORD PTR 16[edi] + movdqu xmm2,XMMWORD PTR 32[edi] + movdqu xmm3,XMMWORD PTR 48[edi] + add edi,64 +DB 102,15,56,0,199 + mov DWORD PTR 100[esp],edi +DB 102,15,56,0,207 + movdqa xmm4,XMMWORD PTR [ebp] +DB 102,15,56,0,215 + movdqa xmm5,XMMWORD PTR 16[ebp] + paddd xmm4,xmm0 +DB 102,15,56,0,223 + movdqa xmm6,XMMWORD PTR 32[ebp] + paddd xmm5,xmm1 + movdqa xmm7,XMMWORD PTR 48[ebp] + movdqa XMMWORD PTR 32[esp],xmm4 + paddd xmm6,xmm2 + movdqa XMMWORD PTR 48[esp],xmm5 + paddd xmm7,xmm3 + movdqa XMMWORD PTR 64[esp],xmm6 + movdqa XMMWORD PTR 80[esp],xmm7 + jmp $L013ssse3_00_47 +ALIGN 16 +$L013ssse3_00_47: + add ebp,64 + mov ecx,edx + movdqa xmm4,xmm1 + ror edx,14 + mov esi,DWORD PTR 20[esp] + movdqa xmm7,xmm3 + xor edx,ecx + mov edi,DWORD PTR 24[esp] +DB 102,15,58,15,224,4 + xor esi,edi + ror edx,5 + and esi,ecx +DB 102,15,58,15,250,4 + mov DWORD PTR 16[esp],ecx + xor edx,ecx + xor edi,esi + movdqa xmm5,xmm4 + ror edx,6 + mov ecx,eax + movdqa xmm6,xmm4 + add edx,edi + mov edi,DWORD PTR 4[esp] + psrld xmm4,3 + mov esi,eax + ror ecx,9 + paddd xmm0,xmm7 + mov DWORD PTR [esp],eax + xor ecx,eax + psrld xmm6,7 + xor eax,edi + add edx,DWORD PTR 28[esp] + ror ecx,11 + and ebx,eax + pshufd xmm7,xmm3,250 + xor ecx,esi + add edx,DWORD PTR 32[esp] + pslld xmm5,14 + xor ebx,edi + ror ecx,2 + pxor xmm4,xmm6 + add ebx,edx + add edx,DWORD PTR 12[esp] + psrld xmm6,11 + add ebx,ecx + mov ecx,edx + ror edx,14 + pxor xmm4,xmm5 + mov esi,DWORD PTR 16[esp] + xor edx,ecx + pslld xmm5,11 + mov edi,DWORD PTR 20[esp] + xor esi,edi + ror edx,5 + pxor xmm4,xmm6 + and esi,ecx + mov DWORD PTR 12[esp],ecx + movdqa xmm6,xmm7 + xor edx,ecx + xor edi,esi + ror edx,6 + pxor xmm4,xmm5 + mov ecx,ebx + add edx,edi + psrld xmm7,10 + mov edi,DWORD PTR [esp] + mov esi,ebx + ror ecx,9 + paddd xmm0,xmm4 + mov DWORD PTR 28[esp],ebx + xor ecx,ebx + psrlq xmm6,17 + xor ebx,edi + add edx,DWORD PTR 24[esp] + ror ecx,11 + pxor xmm7,xmm6 + and eax,ebx + xor ecx,esi + psrlq xmm6,2 + add edx,DWORD PTR 36[esp] + xor eax,edi + ror ecx,2 + pxor xmm7,xmm6 + add eax,edx + add edx,DWORD PTR 8[esp] + pshufd xmm7,xmm7,128 + add eax,ecx + mov ecx,edx + ror edx,14 + mov esi,DWORD PTR 12[esp] + xor edx,ecx + mov edi,DWORD PTR 16[esp] + xor esi,edi + ror edx,5 + and esi,ecx + psrldq xmm7,8 + mov DWORD PTR 8[esp],ecx + xor edx,ecx + xor edi,esi + paddd xmm0,xmm7 + ror edx,6 + mov ecx,eax + add edx,edi + mov edi,DWORD PTR 28[esp] + mov esi,eax + ror ecx,9 + mov DWORD PTR 24[esp],eax + pshufd xmm7,xmm0,80 + xor ecx,eax + xor eax,edi + add edx,DWORD PTR 20[esp] + movdqa xmm6,xmm7 + ror ecx,11 + psrld xmm7,10 + and ebx,eax + psrlq xmm6,17 + xor ecx,esi + add edx,DWORD PTR 40[esp] + xor ebx,edi + ror ecx,2 + pxor xmm7,xmm6 + add ebx,edx + add edx,DWORD PTR 4[esp] + psrlq xmm6,2 + add ebx,ecx + mov ecx,edx + ror edx,14 + pxor xmm7,xmm6 + mov esi,DWORD PTR 8[esp] + xor edx,ecx + mov edi,DWORD PTR 12[esp] + pshufd xmm7,xmm7,8 + xor esi,edi + ror edx,5 + movdqa xmm6,XMMWORD PTR [ebp] + and esi,ecx + mov DWORD PTR 4[esp],ecx + pslldq xmm7,8 + xor edx,ecx + xor edi,esi + ror edx,6 + mov ecx,ebx + add edx,edi + mov edi,DWORD PTR 24[esp] + mov esi,ebx + ror ecx,9 + paddd xmm0,xmm7 + mov DWORD PTR 20[esp],ebx + xor ecx,ebx + xor ebx,edi + add edx,DWORD PTR 16[esp] + paddd xmm6,xmm0 + ror ecx,11 + and eax,ebx + xor ecx,esi + add edx,DWORD PTR 44[esp] + xor eax,edi + ror ecx,2 + add eax,edx + add edx,DWORD PTR [esp] + add eax,ecx + movdqa XMMWORD PTR 32[esp],xmm6 + mov ecx,edx + movdqa xmm4,xmm2 + ror edx,14 + mov esi,DWORD PTR 4[esp] + movdqa xmm7,xmm0 + xor edx,ecx + mov edi,DWORD PTR 8[esp] +DB 102,15,58,15,225,4 + xor esi,edi + ror edx,5 + and esi,ecx +DB 102,15,58,15,251,4 + mov DWORD PTR [esp],ecx + xor edx,ecx + xor edi,esi + movdqa xmm5,xmm4 + ror edx,6 + mov ecx,eax + movdqa xmm6,xmm4 + add edx,edi + mov edi,DWORD PTR 20[esp] + psrld xmm4,3 + mov esi,eax + ror ecx,9 + paddd xmm1,xmm7 + mov DWORD PTR 16[esp],eax + xor ecx,eax + psrld xmm6,7 + xor eax,edi + add edx,DWORD PTR 12[esp] + ror ecx,11 + and ebx,eax + pshufd xmm7,xmm0,250 + xor ecx,esi + add edx,DWORD PTR 48[esp] + pslld xmm5,14 + xor ebx,edi + ror ecx,2 + pxor xmm4,xmm6 + add ebx,edx + add edx,DWORD PTR 28[esp] + psrld xmm6,11 + add ebx,ecx + mov ecx,edx + ror edx,14 + pxor xmm4,xmm5 + mov esi,DWORD PTR [esp] + xor edx,ecx + pslld xmm5,11 + mov edi,DWORD PTR 4[esp] + xor esi,edi + ror edx,5 + pxor xmm4,xmm6 + and esi,ecx + mov DWORD PTR 28[esp],ecx + movdqa xmm6,xmm7 + xor edx,ecx + xor edi,esi + ror edx,6 + pxor xmm4,xmm5 + mov ecx,ebx + add edx,edi + psrld xmm7,10 + mov edi,DWORD PTR 16[esp] + mov esi,ebx + ror ecx,9 + paddd xmm1,xmm4 + mov DWORD PTR 12[esp],ebx + xor ecx,ebx + psrlq xmm6,17 + xor ebx,edi + add edx,DWORD PTR 8[esp] + ror ecx,11 + pxor xmm7,xmm6 + and eax,ebx + xor ecx,esi + psrlq xmm6,2 + add edx,DWORD PTR 52[esp] + xor eax,edi + ror ecx,2 + pxor xmm7,xmm6 + add eax,edx + add edx,DWORD PTR 24[esp] + pshufd xmm7,xmm7,128 + add eax,ecx + mov ecx,edx + ror edx,14 + mov esi,DWORD PTR 28[esp] + xor edx,ecx + mov edi,DWORD PTR [esp] + xor esi,edi + ror edx,5 + and esi,ecx + psrldq xmm7,8 + mov DWORD PTR 24[esp],ecx + xor edx,ecx + xor edi,esi + paddd xmm1,xmm7 + ror edx,6 + mov ecx,eax + add edx,edi + mov edi,DWORD PTR 12[esp] + mov esi,eax + ror ecx,9 + mov DWORD PTR 8[esp],eax + pshufd xmm7,xmm1,80 + xor ecx,eax + xor eax,edi + add edx,DWORD PTR 4[esp] + movdqa xmm6,xmm7 + ror ecx,11 + psrld xmm7,10 + and ebx,eax + psrlq xmm6,17 + xor ecx,esi + add edx,DWORD PTR 56[esp] + xor ebx,edi + ror ecx,2 + pxor xmm7,xmm6 + add ebx,edx + add edx,DWORD PTR 20[esp] + psrlq xmm6,2 + add ebx,ecx + mov ecx,edx + ror edx,14 + pxor xmm7,xmm6 + mov esi,DWORD PTR 24[esp] + xor edx,ecx + mov edi,DWORD PTR 28[esp] + pshufd xmm7,xmm7,8 + xor esi,edi + ror edx,5 + movdqa xmm6,XMMWORD PTR 16[ebp] + and esi,ecx + mov DWORD PTR 20[esp],ecx + pslldq xmm7,8 + xor edx,ecx + xor edi,esi + ror edx,6 + mov ecx,ebx + add edx,edi + mov edi,DWORD PTR 8[esp] + mov esi,ebx + ror ecx,9 + paddd xmm1,xmm7 + mov DWORD PTR 4[esp],ebx + xor ecx,ebx + xor ebx,edi + add edx,DWORD PTR [esp] + paddd xmm6,xmm1 + ror ecx,11 + and eax,ebx + xor ecx,esi + add edx,DWORD PTR 60[esp] + xor eax,edi + ror ecx,2 + add eax,edx + add edx,DWORD PTR 16[esp] + add eax,ecx + movdqa XMMWORD PTR 48[esp],xmm6 + mov ecx,edx + movdqa xmm4,xmm3 + ror edx,14 + mov esi,DWORD PTR 20[esp] + movdqa xmm7,xmm1 + xor edx,ecx + mov edi,DWORD PTR 24[esp] +DB 102,15,58,15,226,4 + xor esi,edi + ror edx,5 + and esi,ecx +DB 102,15,58,15,248,4 + mov DWORD PTR 16[esp],ecx + xor edx,ecx + xor edi,esi + movdqa xmm5,xmm4 + ror edx,6 + mov ecx,eax + movdqa xmm6,xmm4 + add edx,edi + mov edi,DWORD PTR 4[esp] + psrld xmm4,3 + mov esi,eax + ror ecx,9 + paddd xmm2,xmm7 + mov DWORD PTR [esp],eax + xor ecx,eax + psrld xmm6,7 + xor eax,edi + add edx,DWORD PTR 28[esp] + ror ecx,11 + and ebx,eax + pshufd xmm7,xmm1,250 + xor ecx,esi + add edx,DWORD PTR 64[esp] + pslld xmm5,14 + xor ebx,edi + ror ecx,2 + pxor xmm4,xmm6 + add ebx,edx + add edx,DWORD PTR 12[esp] + psrld xmm6,11 + add ebx,ecx + mov ecx,edx + ror edx,14 + pxor xmm4,xmm5 + mov esi,DWORD PTR 16[esp] + xor edx,ecx + pslld xmm5,11 + mov edi,DWORD PTR 20[esp] + xor esi,edi + ror edx,5 + pxor xmm4,xmm6 + and esi,ecx + mov DWORD PTR 12[esp],ecx + movdqa xmm6,xmm7 + xor edx,ecx + xor edi,esi + ror edx,6 + pxor xmm4,xmm5 + mov ecx,ebx + add edx,edi + psrld xmm7,10 + mov edi,DWORD PTR [esp] + mov esi,ebx + ror ecx,9 + paddd xmm2,xmm4 + mov DWORD PTR 28[esp],ebx + xor ecx,ebx + psrlq xmm6,17 + xor ebx,edi + add edx,DWORD PTR 24[esp] + ror ecx,11 + pxor xmm7,xmm6 + and eax,ebx + xor ecx,esi + psrlq xmm6,2 + add edx,DWORD PTR 68[esp] + xor eax,edi + ror ecx,2 + pxor xmm7,xmm6 + add eax,edx + add edx,DWORD PTR 8[esp] + pshufd xmm7,xmm7,128 + add eax,ecx + mov ecx,edx + ror edx,14 + mov esi,DWORD PTR 12[esp] + xor edx,ecx + mov edi,DWORD PTR 16[esp] + xor esi,edi + ror edx,5 + and esi,ecx + psrldq xmm7,8 + mov DWORD PTR 8[esp],ecx + xor edx,ecx + xor edi,esi + paddd xmm2,xmm7 + ror edx,6 + mov ecx,eax + add edx,edi + mov edi,DWORD PTR 28[esp] + mov esi,eax + ror ecx,9 + mov DWORD PTR 24[esp],eax + pshufd xmm7,xmm2,80 + xor ecx,eax + xor eax,edi + add edx,DWORD PTR 20[esp] + movdqa xmm6,xmm7 + ror ecx,11 + psrld xmm7,10 + and ebx,eax + psrlq xmm6,17 + xor ecx,esi + add edx,DWORD PTR 72[esp] + xor ebx,edi + ror ecx,2 + pxor xmm7,xmm6 + add ebx,edx + add edx,DWORD PTR 4[esp] + psrlq xmm6,2 + add ebx,ecx + mov ecx,edx + ror edx,14 + pxor xmm7,xmm6 + mov esi,DWORD PTR 8[esp] + xor edx,ecx + mov edi,DWORD PTR 12[esp] + pshufd xmm7,xmm7,8 + xor esi,edi + ror edx,5 + movdqa xmm6,XMMWORD PTR 32[ebp] + and esi,ecx + mov DWORD PTR 4[esp],ecx + pslldq xmm7,8 + xor edx,ecx + xor edi,esi + ror edx,6 + mov ecx,ebx + add edx,edi + mov edi,DWORD PTR 24[esp] + mov esi,ebx + ror ecx,9 + paddd xmm2,xmm7 + mov DWORD PTR 20[esp],ebx + xor ecx,ebx + xor ebx,edi + add edx,DWORD PTR 16[esp] + paddd xmm6,xmm2 + ror ecx,11 + and eax,ebx + xor ecx,esi + add edx,DWORD PTR 76[esp] + xor eax,edi + ror ecx,2 + add eax,edx + add edx,DWORD PTR [esp] + add eax,ecx + movdqa XMMWORD PTR 64[esp],xmm6 + mov ecx,edx + movdqa xmm4,xmm0 + ror edx,14 + mov esi,DWORD PTR 4[esp] + movdqa xmm7,xmm2 + xor edx,ecx + mov edi,DWORD PTR 8[esp] +DB 102,15,58,15,227,4 + xor esi,edi + ror edx,5 + and esi,ecx +DB 102,15,58,15,249,4 + mov DWORD PTR [esp],ecx + xor edx,ecx + xor edi,esi + movdqa xmm5,xmm4 + ror edx,6 + mov ecx,eax + movdqa xmm6,xmm4 + add edx,edi + mov edi,DWORD PTR 20[esp] + psrld xmm4,3 + mov esi,eax + ror ecx,9 + paddd xmm3,xmm7 + mov DWORD PTR 16[esp],eax + xor ecx,eax + psrld xmm6,7 + xor eax,edi + add edx,DWORD PTR 12[esp] + ror ecx,11 + and ebx,eax + pshufd xmm7,xmm2,250 + xor ecx,esi + add edx,DWORD PTR 80[esp] + pslld xmm5,14 + xor ebx,edi + ror ecx,2 + pxor xmm4,xmm6 + add ebx,edx + add edx,DWORD PTR 28[esp] + psrld xmm6,11 + add ebx,ecx + mov ecx,edx + ror edx,14 + pxor xmm4,xmm5 + mov esi,DWORD PTR [esp] + xor edx,ecx + pslld xmm5,11 + mov edi,DWORD PTR 4[esp] + xor esi,edi + ror edx,5 + pxor xmm4,xmm6 + and esi,ecx + mov DWORD PTR 28[esp],ecx + movdqa xmm6,xmm7 + xor edx,ecx + xor edi,esi + ror edx,6 + pxor xmm4,xmm5 + mov ecx,ebx + add edx,edi + psrld xmm7,10 + mov edi,DWORD PTR 16[esp] + mov esi,ebx + ror ecx,9 + paddd xmm3,xmm4 + mov DWORD PTR 12[esp],ebx + xor ecx,ebx + psrlq xmm6,17 + xor ebx,edi + add edx,DWORD PTR 8[esp] + ror ecx,11 + pxor xmm7,xmm6 + and eax,ebx + xor ecx,esi + psrlq xmm6,2 + add edx,DWORD PTR 84[esp] + xor eax,edi + ror ecx,2 + pxor xmm7,xmm6 + add eax,edx + add edx,DWORD PTR 24[esp] + pshufd xmm7,xmm7,128 + add eax,ecx + mov ecx,edx + ror edx,14 + mov esi,DWORD PTR 28[esp] + xor edx,ecx + mov edi,DWORD PTR [esp] + xor esi,edi + ror edx,5 + and esi,ecx + psrldq xmm7,8 + mov DWORD PTR 24[esp],ecx + xor edx,ecx + xor edi,esi + paddd xmm3,xmm7 + ror edx,6 + mov ecx,eax + add edx,edi + mov edi,DWORD PTR 12[esp] + mov esi,eax + ror ecx,9 + mov DWORD PTR 8[esp],eax + pshufd xmm7,xmm3,80 + xor ecx,eax + xor eax,edi + add edx,DWORD PTR 4[esp] + movdqa xmm6,xmm7 + ror ecx,11 + psrld xmm7,10 + and ebx,eax + psrlq xmm6,17 + xor ecx,esi + add edx,DWORD PTR 88[esp] + xor ebx,edi + ror ecx,2 + pxor xmm7,xmm6 + add ebx,edx + add edx,DWORD PTR 20[esp] + psrlq xmm6,2 + add ebx,ecx + mov ecx,edx + ror edx,14 + pxor xmm7,xmm6 + mov esi,DWORD PTR 24[esp] + xor edx,ecx + mov edi,DWORD PTR 28[esp] + pshufd xmm7,xmm7,8 + xor esi,edi + ror edx,5 + movdqa xmm6,XMMWORD PTR 48[ebp] + and esi,ecx + mov DWORD PTR 20[esp],ecx + pslldq xmm7,8 + xor edx,ecx + xor edi,esi + ror edx,6 + mov ecx,ebx + add edx,edi + mov edi,DWORD PTR 8[esp] + mov esi,ebx + ror ecx,9 + paddd xmm3,xmm7 + mov DWORD PTR 4[esp],ebx + xor ecx,ebx + xor ebx,edi + add edx,DWORD PTR [esp] + paddd xmm6,xmm3 + ror ecx,11 + and eax,ebx + xor ecx,esi + add edx,DWORD PTR 92[esp] + xor eax,edi + ror ecx,2 + add eax,edx + add edx,DWORD PTR 16[esp] + add eax,ecx + movdqa XMMWORD PTR 80[esp],xmm6 + cmp DWORD PTR 64[ebp],66051 + jne $L013ssse3_00_47 + mov ecx,edx + ror edx,14 + mov esi,DWORD PTR 20[esp] + xor edx,ecx + mov edi,DWORD PTR 24[esp] + xor esi,edi + ror edx,5 + and esi,ecx + mov DWORD PTR 16[esp],ecx + xor edx,ecx + xor edi,esi + ror edx,6 + mov ecx,eax + add edx,edi + mov edi,DWORD PTR 4[esp] + mov esi,eax + ror ecx,9 + mov DWORD PTR [esp],eax + xor ecx,eax + xor eax,edi + add edx,DWORD PTR 28[esp] + ror ecx,11 + and ebx,eax + xor ecx,esi + add edx,DWORD PTR 32[esp] + xor ebx,edi + ror ecx,2 + add ebx,edx + add edx,DWORD PTR 12[esp] + add ebx,ecx + mov ecx,edx + ror edx,14 + mov esi,DWORD PTR 16[esp] + xor edx,ecx + mov edi,DWORD PTR 20[esp] + xor esi,edi + ror edx,5 + and esi,ecx + mov DWORD PTR 12[esp],ecx + xor edx,ecx + xor edi,esi + ror edx,6 + mov ecx,ebx + add edx,edi + mov edi,DWORD PTR [esp] + mov esi,ebx + ror ecx,9 + mov DWORD PTR 28[esp],ebx + xor ecx,ebx + xor ebx,edi + add edx,DWORD PTR 24[esp] + ror ecx,11 + and eax,ebx + xor ecx,esi + add edx,DWORD PTR 36[esp] + xor eax,edi + ror ecx,2 + add eax,edx + add edx,DWORD PTR 8[esp] + add eax,ecx + mov ecx,edx + ror edx,14 + mov esi,DWORD PTR 12[esp] + xor edx,ecx + mov edi,DWORD PTR 16[esp] + xor esi,edi + ror edx,5 + and esi,ecx + mov DWORD PTR 8[esp],ecx + xor edx,ecx + xor edi,esi + ror edx,6 + mov ecx,eax + add edx,edi + mov edi,DWORD PTR 28[esp] + mov esi,eax + ror ecx,9 + mov DWORD PTR 24[esp],eax + xor ecx,eax + xor eax,edi + add edx,DWORD PTR 20[esp] + ror ecx,11 + and ebx,eax + xor ecx,esi + add edx,DWORD PTR 40[esp] + xor ebx,edi + ror ecx,2 + add ebx,edx + add edx,DWORD PTR 4[esp] + add ebx,ecx + mov ecx,edx + ror edx,14 + mov esi,DWORD PTR 8[esp] + xor edx,ecx + mov edi,DWORD PTR 12[esp] + xor esi,edi + ror edx,5 + and esi,ecx + mov DWORD PTR 4[esp],ecx + xor edx,ecx + xor edi,esi + ror edx,6 + mov ecx,ebx + add edx,edi + mov edi,DWORD PTR 24[esp] + mov esi,ebx + ror ecx,9 + mov DWORD PTR 20[esp],ebx + xor ecx,ebx + xor ebx,edi + add edx,DWORD PTR 16[esp] + ror ecx,11 + and eax,ebx + xor ecx,esi + add edx,DWORD PTR 44[esp] + xor eax,edi + ror ecx,2 + add eax,edx + add edx,DWORD PTR [esp] + add eax,ecx + mov ecx,edx + ror edx,14 + mov esi,DWORD PTR 4[esp] + xor edx,ecx + mov edi,DWORD PTR 8[esp] + xor esi,edi + ror edx,5 + and esi,ecx + mov DWORD PTR [esp],ecx + xor edx,ecx + xor edi,esi + ror edx,6 + mov ecx,eax + add edx,edi + mov edi,DWORD PTR 20[esp] + mov esi,eax + ror ecx,9 + mov DWORD PTR 16[esp],eax + xor ecx,eax + xor eax,edi + add edx,DWORD PTR 12[esp] + ror ecx,11 + and ebx,eax + xor ecx,esi + add edx,DWORD PTR 48[esp] + xor ebx,edi + ror ecx,2 + add ebx,edx + add edx,DWORD PTR 28[esp] + add ebx,ecx + mov ecx,edx + ror edx,14 + mov esi,DWORD PTR [esp] + xor edx,ecx + mov edi,DWORD PTR 4[esp] + xor esi,edi + ror edx,5 + and esi,ecx + mov DWORD PTR 28[esp],ecx + xor edx,ecx + xor edi,esi + ror edx,6 + mov ecx,ebx + add edx,edi + mov edi,DWORD PTR 16[esp] + mov esi,ebx + ror ecx,9 + mov DWORD PTR 12[esp],ebx + xor ecx,ebx + xor ebx,edi + add edx,DWORD PTR 8[esp] + ror ecx,11 + and eax,ebx + xor ecx,esi + add edx,DWORD PTR 52[esp] + xor eax,edi + ror ecx,2 + add eax,edx + add edx,DWORD PTR 24[esp] + add eax,ecx + mov ecx,edx + ror edx,14 + mov esi,DWORD PTR 28[esp] + xor edx,ecx + mov edi,DWORD PTR [esp] + xor esi,edi + ror edx,5 + and esi,ecx + mov DWORD PTR 24[esp],ecx + xor edx,ecx + xor edi,esi + ror edx,6 + mov ecx,eax + add edx,edi + mov edi,DWORD PTR 12[esp] + mov esi,eax + ror ecx,9 + mov DWORD PTR 8[esp],eax + xor ecx,eax + xor eax,edi + add edx,DWORD PTR 4[esp] + ror ecx,11 + and ebx,eax + xor ecx,esi + add edx,DWORD PTR 56[esp] + xor ebx,edi + ror ecx,2 + add ebx,edx + add edx,DWORD PTR 20[esp] + add ebx,ecx + mov ecx,edx + ror edx,14 + mov esi,DWORD PTR 24[esp] + xor edx,ecx + mov edi,DWORD PTR 28[esp] + xor esi,edi + ror edx,5 + and esi,ecx + mov DWORD PTR 20[esp],ecx + xor edx,ecx + xor edi,esi + ror edx,6 + mov ecx,ebx + add edx,edi + mov edi,DWORD PTR 8[esp] + mov esi,ebx + ror ecx,9 + mov DWORD PTR 4[esp],ebx + xor ecx,ebx + xor ebx,edi + add edx,DWORD PTR [esp] + ror ecx,11 + and eax,ebx + xor ecx,esi + add edx,DWORD PTR 60[esp] + xor eax,edi + ror ecx,2 + add eax,edx + add edx,DWORD PTR 16[esp] + add eax,ecx + mov ecx,edx + ror edx,14 + mov esi,DWORD PTR 20[esp] + xor edx,ecx + mov edi,DWORD PTR 24[esp] + xor esi,edi + ror edx,5 + and esi,ecx + mov DWORD PTR 16[esp],ecx + xor edx,ecx + xor edi,esi + ror edx,6 + mov ecx,eax + add edx,edi + mov edi,DWORD PTR 4[esp] + mov esi,eax + ror ecx,9 + mov DWORD PTR [esp],eax + xor ecx,eax + xor eax,edi + add edx,DWORD PTR 28[esp] + ror ecx,11 + and ebx,eax + xor ecx,esi + add edx,DWORD PTR 64[esp] + xor ebx,edi + ror ecx,2 + add ebx,edx + add edx,DWORD PTR 12[esp] + add ebx,ecx + mov ecx,edx + ror edx,14 + mov esi,DWORD PTR 16[esp] + xor edx,ecx + mov edi,DWORD PTR 20[esp] + xor esi,edi + ror edx,5 + and esi,ecx + mov DWORD PTR 12[esp],ecx + xor edx,ecx + xor edi,esi + ror edx,6 + mov ecx,ebx + add edx,edi + mov edi,DWORD PTR [esp] + mov esi,ebx + ror ecx,9 + mov DWORD PTR 28[esp],ebx + xor ecx,ebx + xor ebx,edi + add edx,DWORD PTR 24[esp] + ror ecx,11 + and eax,ebx + xor ecx,esi + add edx,DWORD PTR 68[esp] + xor eax,edi + ror ecx,2 + add eax,edx + add edx,DWORD PTR 8[esp] + add eax,ecx + mov ecx,edx + ror edx,14 + mov esi,DWORD PTR 12[esp] + xor edx,ecx + mov edi,DWORD PTR 16[esp] + xor esi,edi + ror edx,5 + and esi,ecx + mov DWORD PTR 8[esp],ecx + xor edx,ecx + xor edi,esi + ror edx,6 + mov ecx,eax + add edx,edi + mov edi,DWORD PTR 28[esp] + mov esi,eax + ror ecx,9 + mov DWORD PTR 24[esp],eax + xor ecx,eax + xor eax,edi + add edx,DWORD PTR 20[esp] + ror ecx,11 + and ebx,eax + xor ecx,esi + add edx,DWORD PTR 72[esp] + xor ebx,edi + ror ecx,2 + add ebx,edx + add edx,DWORD PTR 4[esp] + add ebx,ecx + mov ecx,edx + ror edx,14 + mov esi,DWORD PTR 8[esp] + xor edx,ecx + mov edi,DWORD PTR 12[esp] + xor esi,edi + ror edx,5 + and esi,ecx + mov DWORD PTR 4[esp],ecx + xor edx,ecx + xor edi,esi + ror edx,6 + mov ecx,ebx + add edx,edi + mov edi,DWORD PTR 24[esp] + mov esi,ebx + ror ecx,9 + mov DWORD PTR 20[esp],ebx + xor ecx,ebx + xor ebx,edi + add edx,DWORD PTR 16[esp] + ror ecx,11 + and eax,ebx + xor ecx,esi + add edx,DWORD PTR 76[esp] + xor eax,edi + ror ecx,2 + add eax,edx + add edx,DWORD PTR [esp] + add eax,ecx + mov ecx,edx + ror edx,14 + mov esi,DWORD PTR 4[esp] + xor edx,ecx + mov edi,DWORD PTR 8[esp] + xor esi,edi + ror edx,5 + and esi,ecx + mov DWORD PTR [esp],ecx + xor edx,ecx + xor edi,esi + ror edx,6 + mov ecx,eax + add edx,edi + mov edi,DWORD PTR 20[esp] + mov esi,eax + ror ecx,9 + mov DWORD PTR 16[esp],eax + xor ecx,eax + xor eax,edi + add edx,DWORD PTR 12[esp] + ror ecx,11 + and ebx,eax + xor ecx,esi + add edx,DWORD PTR 80[esp] + xor ebx,edi + ror ecx,2 + add ebx,edx + add edx,DWORD PTR 28[esp] + add ebx,ecx + mov ecx,edx + ror edx,14 + mov esi,DWORD PTR [esp] + xor edx,ecx + mov edi,DWORD PTR 4[esp] + xor esi,edi + ror edx,5 + and esi,ecx + mov DWORD PTR 28[esp],ecx + xor edx,ecx + xor edi,esi + ror edx,6 + mov ecx,ebx + add edx,edi + mov edi,DWORD PTR 16[esp] + mov esi,ebx + ror ecx,9 + mov DWORD PTR 12[esp],ebx + xor ecx,ebx + xor ebx,edi + add edx,DWORD PTR 8[esp] + ror ecx,11 + and eax,ebx + xor ecx,esi + add edx,DWORD PTR 84[esp] + xor eax,edi + ror ecx,2 + add eax,edx + add edx,DWORD PTR 24[esp] + add eax,ecx + mov ecx,edx + ror edx,14 + mov esi,DWORD PTR 28[esp] + xor edx,ecx + mov edi,DWORD PTR [esp] + xor esi,edi + ror edx,5 + and esi,ecx + mov DWORD PTR 24[esp],ecx + xor edx,ecx + xor edi,esi + ror edx,6 + mov ecx,eax + add edx,edi + mov edi,DWORD PTR 12[esp] + mov esi,eax + ror ecx,9 + mov DWORD PTR 8[esp],eax + xor ecx,eax + xor eax,edi + add edx,DWORD PTR 4[esp] + ror ecx,11 + and ebx,eax + xor ecx,esi + add edx,DWORD PTR 88[esp] + xor ebx,edi + ror ecx,2 + add ebx,edx + add edx,DWORD PTR 20[esp] + add ebx,ecx + mov ecx,edx + ror edx,14 + mov esi,DWORD PTR 24[esp] + xor edx,ecx + mov edi,DWORD PTR 28[esp] + xor esi,edi + ror edx,5 + and esi,ecx + mov DWORD PTR 20[esp],ecx + xor edx,ecx + xor edi,esi + ror edx,6 + mov ecx,ebx + add edx,edi + mov edi,DWORD PTR 8[esp] + mov esi,ebx + ror ecx,9 + mov DWORD PTR 4[esp],ebx + xor ecx,ebx + xor ebx,edi + add edx,DWORD PTR [esp] + ror ecx,11 + and eax,ebx + xor ecx,esi + add edx,DWORD PTR 92[esp] + xor eax,edi + ror ecx,2 + add eax,edx + add edx,DWORD PTR 16[esp] + add eax,ecx + mov esi,DWORD PTR 96[esp] + xor ebx,edi + mov ecx,DWORD PTR 12[esp] + add eax,DWORD PTR [esi] + add ebx,DWORD PTR 4[esi] + add edi,DWORD PTR 8[esi] + add ecx,DWORD PTR 12[esi] + mov DWORD PTR [esi],eax + mov DWORD PTR 4[esi],ebx + mov DWORD PTR 8[esi],edi + mov DWORD PTR 12[esi],ecx + mov DWORD PTR 4[esp],ebx + xor ebx,edi + mov DWORD PTR 8[esp],edi + mov DWORD PTR 12[esp],ecx + mov edi,DWORD PTR 20[esp] + mov ecx,DWORD PTR 24[esp] + add edx,DWORD PTR 16[esi] + add edi,DWORD PTR 20[esi] + add ecx,DWORD PTR 24[esi] + mov DWORD PTR 16[esi],edx + mov DWORD PTR 20[esi],edi + mov DWORD PTR 20[esp],edi + mov edi,DWORD PTR 28[esp] + mov DWORD PTR 24[esi],ecx + add edi,DWORD PTR 28[esi] + mov DWORD PTR 24[esp],ecx + mov DWORD PTR 28[esi],edi + mov DWORD PTR 28[esp],edi + mov edi,DWORD PTR 100[esp] + movdqa xmm7,XMMWORD PTR 64[ebp] + sub ebp,192 + cmp edi,DWORD PTR 104[esp] + jb $L012grand_ssse3 + mov esp,DWORD PTR 108[esp] + pop edi + pop esi + pop ebx + pop ebp + ret +ALIGN 32 +$L005AVX: + and edx,264 + cmp edx,264 + je $L014AVX_BMI + lea esp,DWORD PTR [esp-96] + vzeroall + mov eax,DWORD PTR [esi] + mov ebx,DWORD PTR 4[esi] + mov ecx,DWORD PTR 8[esi] + mov edi,DWORD PTR 12[esi] + mov DWORD PTR 4[esp],ebx + xor ebx,ecx + mov DWORD PTR 8[esp],ecx + mov DWORD PTR 12[esp],edi + mov edx,DWORD PTR 16[esi] + mov edi,DWORD PTR 20[esi] + mov ecx,DWORD PTR 24[esi] + mov esi,DWORD PTR 28[esi] + mov DWORD PTR 20[esp],edi + mov edi,DWORD PTR 100[esp] + mov DWORD PTR 24[esp],ecx + mov DWORD PTR 28[esp],esi + vmovdqa xmm7,XMMWORD PTR 256[ebp] + jmp $L015grand_avx +ALIGN 32 +$L015grand_avx: + vmovdqu xmm0,XMMWORD PTR [edi] + vmovdqu xmm1,XMMWORD PTR 16[edi] + vmovdqu xmm2,XMMWORD PTR 32[edi] + vmovdqu xmm3,XMMWORD PTR 48[edi] + add edi,64 + vpshufb xmm0,xmm0,xmm7 + mov DWORD PTR 100[esp],edi + vpshufb xmm1,xmm1,xmm7 + vpshufb xmm2,xmm2,xmm7 + vpaddd xmm4,xmm0,XMMWORD PTR [ebp] + vpshufb xmm3,xmm3,xmm7 + vpaddd xmm5,xmm1,XMMWORD PTR 16[ebp] + vpaddd xmm6,xmm2,XMMWORD PTR 32[ebp] + vpaddd xmm7,xmm3,XMMWORD PTR 48[ebp] + vmovdqa XMMWORD PTR 32[esp],xmm4 + vmovdqa XMMWORD PTR 48[esp],xmm5 + vmovdqa XMMWORD PTR 64[esp],xmm6 + vmovdqa XMMWORD PTR 80[esp],xmm7 + jmp $L016avx_00_47 +ALIGN 16 +$L016avx_00_47: + add ebp,64 + vpalignr xmm4,xmm1,xmm0,4 + mov ecx,edx + shrd edx,edx,14 + mov esi,DWORD PTR 20[esp] + vpalignr xmm7,xmm3,xmm2,4 + xor edx,ecx + mov edi,DWORD PTR 24[esp] + xor esi,edi + vpsrld xmm6,xmm4,7 + shrd edx,edx,5 + and esi,ecx + mov DWORD PTR 16[esp],ecx + vpaddd xmm0,xmm0,xmm7 + xor edx,ecx + xor edi,esi + shrd edx,edx,6 + vpsrld xmm7,xmm4,3 + mov ecx,eax + add edx,edi + mov edi,DWORD PTR 4[esp] + vpslld xmm5,xmm4,14 + mov esi,eax + shrd ecx,ecx,9 + mov DWORD PTR [esp],eax + vpxor xmm4,xmm7,xmm6 + xor ecx,eax + xor eax,edi + add edx,DWORD PTR 28[esp] + vpshufd xmm7,xmm3,250 + shrd ecx,ecx,11 + and ebx,eax + xor ecx,esi + vpsrld xmm6,xmm6,11 + add edx,DWORD PTR 32[esp] + xor ebx,edi + shrd ecx,ecx,2 + vpxor xmm4,xmm4,xmm5 + add ebx,edx + add edx,DWORD PTR 12[esp] + add ebx,ecx + vpslld xmm5,xmm5,11 + mov ecx,edx + shrd edx,edx,14 + mov esi,DWORD PTR 16[esp] + vpxor xmm4,xmm4,xmm6 + xor edx,ecx + mov edi,DWORD PTR 20[esp] + xor esi,edi + vpsrld xmm6,xmm7,10 + shrd edx,edx,5 + and esi,ecx + mov DWORD PTR 12[esp],ecx + vpxor xmm4,xmm4,xmm5 + xor edx,ecx + xor edi,esi + shrd edx,edx,6 + vpsrlq xmm5,xmm7,17 + mov ecx,ebx + add edx,edi + mov edi,DWORD PTR [esp] + vpaddd xmm0,xmm0,xmm4 + mov esi,ebx + shrd ecx,ecx,9 + mov DWORD PTR 28[esp],ebx + vpxor xmm6,xmm6,xmm5 + xor ecx,ebx + xor ebx,edi + add edx,DWORD PTR 24[esp] + vpsrlq xmm7,xmm7,19 + shrd ecx,ecx,11 + and eax,ebx + xor ecx,esi + vpxor xmm6,xmm6,xmm7 + add edx,DWORD PTR 36[esp] + xor eax,edi + shrd ecx,ecx,2 + vpshufd xmm7,xmm6,132 + add eax,edx + add edx,DWORD PTR 8[esp] + add eax,ecx + vpsrldq xmm7,xmm7,8 + mov ecx,edx + shrd edx,edx,14 + mov esi,DWORD PTR 12[esp] + vpaddd xmm0,xmm0,xmm7 + xor edx,ecx + mov edi,DWORD PTR 16[esp] + xor esi,edi + vpshufd xmm7,xmm0,80 + shrd edx,edx,5 + and esi,ecx + mov DWORD PTR 8[esp],ecx + vpsrld xmm6,xmm7,10 + xor edx,ecx + xor edi,esi + shrd edx,edx,6 + vpsrlq xmm5,xmm7,17 + mov ecx,eax + add edx,edi + mov edi,DWORD PTR 28[esp] + vpxor xmm6,xmm6,xmm5 + mov esi,eax + shrd ecx,ecx,9 + mov DWORD PTR 24[esp],eax + vpsrlq xmm7,xmm7,19 + xor ecx,eax + xor eax,edi + add edx,DWORD PTR 20[esp] + vpxor xmm6,xmm6,xmm7 + shrd ecx,ecx,11 + and ebx,eax + xor ecx,esi + vpshufd xmm7,xmm6,232 + add edx,DWORD PTR 40[esp] + xor ebx,edi + shrd ecx,ecx,2 + vpslldq xmm7,xmm7,8 + add ebx,edx + add edx,DWORD PTR 4[esp] + add ebx,ecx + vpaddd xmm0,xmm0,xmm7 + mov ecx,edx + shrd edx,edx,14 + mov esi,DWORD PTR 8[esp] + vpaddd xmm6,xmm0,XMMWORD PTR [ebp] + xor edx,ecx + mov edi,DWORD PTR 12[esp] + xor esi,edi + shrd edx,edx,5 + and esi,ecx + mov DWORD PTR 4[esp],ecx + xor edx,ecx + xor edi,esi + shrd edx,edx,6 + mov ecx,ebx + add edx,edi + mov edi,DWORD PTR 24[esp] + mov esi,ebx + shrd ecx,ecx,9 + mov DWORD PTR 20[esp],ebx + xor ecx,ebx + xor ebx,edi + add edx,DWORD PTR 16[esp] + shrd ecx,ecx,11 + and eax,ebx + xor ecx,esi + add edx,DWORD PTR 44[esp] + xor eax,edi + shrd ecx,ecx,2 + add eax,edx + add edx,DWORD PTR [esp] + add eax,ecx + vmovdqa XMMWORD PTR 32[esp],xmm6 + vpalignr xmm4,xmm2,xmm1,4 + mov ecx,edx + shrd edx,edx,14 + mov esi,DWORD PTR 4[esp] + vpalignr xmm7,xmm0,xmm3,4 + xor edx,ecx + mov edi,DWORD PTR 8[esp] + xor esi,edi + vpsrld xmm6,xmm4,7 + shrd edx,edx,5 + and esi,ecx + mov DWORD PTR [esp],ecx + vpaddd xmm1,xmm1,xmm7 + xor edx,ecx + xor edi,esi + shrd edx,edx,6 + vpsrld xmm7,xmm4,3 + mov ecx,eax + add edx,edi + mov edi,DWORD PTR 20[esp] + vpslld xmm5,xmm4,14 + mov esi,eax + shrd ecx,ecx,9 + mov DWORD PTR 16[esp],eax + vpxor xmm4,xmm7,xmm6 + xor ecx,eax + xor eax,edi + add edx,DWORD PTR 12[esp] + vpshufd xmm7,xmm0,250 + shrd ecx,ecx,11 + and ebx,eax + xor ecx,esi + vpsrld xmm6,xmm6,11 + add edx,DWORD PTR 48[esp] + xor ebx,edi + shrd ecx,ecx,2 + vpxor xmm4,xmm4,xmm5 + add ebx,edx + add edx,DWORD PTR 28[esp] + add ebx,ecx + vpslld xmm5,xmm5,11 + mov ecx,edx + shrd edx,edx,14 + mov esi,DWORD PTR [esp] + vpxor xmm4,xmm4,xmm6 + xor edx,ecx + mov edi,DWORD PTR 4[esp] + xor esi,edi + vpsrld xmm6,xmm7,10 + shrd edx,edx,5 + and esi,ecx + mov DWORD PTR 28[esp],ecx + vpxor xmm4,xmm4,xmm5 + xor edx,ecx + xor edi,esi + shrd edx,edx,6 + vpsrlq xmm5,xmm7,17 + mov ecx,ebx + add edx,edi + mov edi,DWORD PTR 16[esp] + vpaddd xmm1,xmm1,xmm4 + mov esi,ebx + shrd ecx,ecx,9 + mov DWORD PTR 12[esp],ebx + vpxor xmm6,xmm6,xmm5 + xor ecx,ebx + xor ebx,edi + add edx,DWORD PTR 8[esp] + vpsrlq xmm7,xmm7,19 + shrd ecx,ecx,11 + and eax,ebx + xor ecx,esi + vpxor xmm6,xmm6,xmm7 + add edx,DWORD PTR 52[esp] + xor eax,edi + shrd ecx,ecx,2 + vpshufd xmm7,xmm6,132 + add eax,edx + add edx,DWORD PTR 24[esp] + add eax,ecx + vpsrldq xmm7,xmm7,8 + mov ecx,edx + shrd edx,edx,14 + mov esi,DWORD PTR 28[esp] + vpaddd xmm1,xmm1,xmm7 + xor edx,ecx + mov edi,DWORD PTR [esp] + xor esi,edi + vpshufd xmm7,xmm1,80 + shrd edx,edx,5 + and esi,ecx + mov DWORD PTR 24[esp],ecx + vpsrld xmm6,xmm7,10 + xor edx,ecx + xor edi,esi + shrd edx,edx,6 + vpsrlq xmm5,xmm7,17 + mov ecx,eax + add edx,edi + mov edi,DWORD PTR 12[esp] + vpxor xmm6,xmm6,xmm5 + mov esi,eax + shrd ecx,ecx,9 + mov DWORD PTR 8[esp],eax + vpsrlq xmm7,xmm7,19 + xor ecx,eax + xor eax,edi + add edx,DWORD PTR 4[esp] + vpxor xmm6,xmm6,xmm7 + shrd ecx,ecx,11 + and ebx,eax + xor ecx,esi + vpshufd xmm7,xmm6,232 + add edx,DWORD PTR 56[esp] + xor ebx,edi + shrd ecx,ecx,2 + vpslldq xmm7,xmm7,8 + add ebx,edx + add edx,DWORD PTR 20[esp] + add ebx,ecx + vpaddd xmm1,xmm1,xmm7 + mov ecx,edx + shrd edx,edx,14 + mov esi,DWORD PTR 24[esp] + vpaddd xmm6,xmm1,XMMWORD PTR 16[ebp] + xor edx,ecx + mov edi,DWORD PTR 28[esp] + xor esi,edi + shrd edx,edx,5 + and esi,ecx + mov DWORD PTR 20[esp],ecx + xor edx,ecx + xor edi,esi + shrd edx,edx,6 + mov ecx,ebx + add edx,edi + mov edi,DWORD PTR 8[esp] + mov esi,ebx + shrd ecx,ecx,9 + mov DWORD PTR 4[esp],ebx + xor ecx,ebx + xor ebx,edi + add edx,DWORD PTR [esp] + shrd ecx,ecx,11 + and eax,ebx + xor ecx,esi + add edx,DWORD PTR 60[esp] + xor eax,edi + shrd ecx,ecx,2 + add eax,edx + add edx,DWORD PTR 16[esp] + add eax,ecx + vmovdqa XMMWORD PTR 48[esp],xmm6 + vpalignr xmm4,xmm3,xmm2,4 + mov ecx,edx + shrd edx,edx,14 + mov esi,DWORD PTR 20[esp] + vpalignr xmm7,xmm1,xmm0,4 + xor edx,ecx + mov edi,DWORD PTR 24[esp] + xor esi,edi + vpsrld xmm6,xmm4,7 + shrd edx,edx,5 + and esi,ecx + mov DWORD PTR 16[esp],ecx + vpaddd xmm2,xmm2,xmm7 + xor edx,ecx + xor edi,esi + shrd edx,edx,6 + vpsrld xmm7,xmm4,3 + mov ecx,eax + add edx,edi + mov edi,DWORD PTR 4[esp] + vpslld xmm5,xmm4,14 + mov esi,eax + shrd ecx,ecx,9 + mov DWORD PTR [esp],eax + vpxor xmm4,xmm7,xmm6 + xor ecx,eax + xor eax,edi + add edx,DWORD PTR 28[esp] + vpshufd xmm7,xmm1,250 + shrd ecx,ecx,11 + and ebx,eax + xor ecx,esi + vpsrld xmm6,xmm6,11 + add edx,DWORD PTR 64[esp] + xor ebx,edi + shrd ecx,ecx,2 + vpxor xmm4,xmm4,xmm5 + add ebx,edx + add edx,DWORD PTR 12[esp] + add ebx,ecx + vpslld xmm5,xmm5,11 + mov ecx,edx + shrd edx,edx,14 + mov esi,DWORD PTR 16[esp] + vpxor xmm4,xmm4,xmm6 + xor edx,ecx + mov edi,DWORD PTR 20[esp] + xor esi,edi + vpsrld xmm6,xmm7,10 + shrd edx,edx,5 + and esi,ecx + mov DWORD PTR 12[esp],ecx + vpxor xmm4,xmm4,xmm5 + xor edx,ecx + xor edi,esi + shrd edx,edx,6 + vpsrlq xmm5,xmm7,17 + mov ecx,ebx + add edx,edi + mov edi,DWORD PTR [esp] + vpaddd xmm2,xmm2,xmm4 + mov esi,ebx + shrd ecx,ecx,9 + mov DWORD PTR 28[esp],ebx + vpxor xmm6,xmm6,xmm5 + xor ecx,ebx + xor ebx,edi + add edx,DWORD PTR 24[esp] + vpsrlq xmm7,xmm7,19 + shrd ecx,ecx,11 + and eax,ebx + xor ecx,esi + vpxor xmm6,xmm6,xmm7 + add edx,DWORD PTR 68[esp] + xor eax,edi + shrd ecx,ecx,2 + vpshufd xmm7,xmm6,132 + add eax,edx + add edx,DWORD PTR 8[esp] + add eax,ecx + vpsrldq xmm7,xmm7,8 + mov ecx,edx + shrd edx,edx,14 + mov esi,DWORD PTR 12[esp] + vpaddd xmm2,xmm2,xmm7 + xor edx,ecx + mov edi,DWORD PTR 16[esp] + xor esi,edi + vpshufd xmm7,xmm2,80 + shrd edx,edx,5 + and esi,ecx + mov DWORD PTR 8[esp],ecx + vpsrld xmm6,xmm7,10 + xor edx,ecx + xor edi,esi + shrd edx,edx,6 + vpsrlq xmm5,xmm7,17 + mov ecx,eax + add edx,edi + mov edi,DWORD PTR 28[esp] + vpxor xmm6,xmm6,xmm5 + mov esi,eax + shrd ecx,ecx,9 + mov DWORD PTR 24[esp],eax + vpsrlq xmm7,xmm7,19 + xor ecx,eax + xor eax,edi + add edx,DWORD PTR 20[esp] + vpxor xmm6,xmm6,xmm7 + shrd ecx,ecx,11 + and ebx,eax + xor ecx,esi + vpshufd xmm7,xmm6,232 + add edx,DWORD PTR 72[esp] + xor ebx,edi + shrd ecx,ecx,2 + vpslldq xmm7,xmm7,8 + add ebx,edx + add edx,DWORD PTR 4[esp] + add ebx,ecx + vpaddd xmm2,xmm2,xmm7 + mov ecx,edx + shrd edx,edx,14 + mov esi,DWORD PTR 8[esp] + vpaddd xmm6,xmm2,XMMWORD PTR 32[ebp] + xor edx,ecx + mov edi,DWORD PTR 12[esp] + xor esi,edi + shrd edx,edx,5 + and esi,ecx + mov DWORD PTR 4[esp],ecx + xor edx,ecx + xor edi,esi + shrd edx,edx,6 + mov ecx,ebx + add edx,edi + mov edi,DWORD PTR 24[esp] + mov esi,ebx + shrd ecx,ecx,9 + mov DWORD PTR 20[esp],ebx + xor ecx,ebx + xor ebx,edi + add edx,DWORD PTR 16[esp] + shrd ecx,ecx,11 + and eax,ebx + xor ecx,esi + add edx,DWORD PTR 76[esp] + xor eax,edi + shrd ecx,ecx,2 + add eax,edx + add edx,DWORD PTR [esp] + add eax,ecx + vmovdqa XMMWORD PTR 64[esp],xmm6 + vpalignr xmm4,xmm0,xmm3,4 + mov ecx,edx + shrd edx,edx,14 + mov esi,DWORD PTR 4[esp] + vpalignr xmm7,xmm2,xmm1,4 + xor edx,ecx + mov edi,DWORD PTR 8[esp] + xor esi,edi + vpsrld xmm6,xmm4,7 + shrd edx,edx,5 + and esi,ecx + mov DWORD PTR [esp],ecx + vpaddd xmm3,xmm3,xmm7 + xor edx,ecx + xor edi,esi + shrd edx,edx,6 + vpsrld xmm7,xmm4,3 + mov ecx,eax + add edx,edi + mov edi,DWORD PTR 20[esp] + vpslld xmm5,xmm4,14 + mov esi,eax + shrd ecx,ecx,9 + mov DWORD PTR 16[esp],eax + vpxor xmm4,xmm7,xmm6 + xor ecx,eax + xor eax,edi + add edx,DWORD PTR 12[esp] + vpshufd xmm7,xmm2,250 + shrd ecx,ecx,11 + and ebx,eax + xor ecx,esi + vpsrld xmm6,xmm6,11 + add edx,DWORD PTR 80[esp] + xor ebx,edi + shrd ecx,ecx,2 + vpxor xmm4,xmm4,xmm5 + add ebx,edx + add edx,DWORD PTR 28[esp] + add ebx,ecx + vpslld xmm5,xmm5,11 + mov ecx,edx + shrd edx,edx,14 + mov esi,DWORD PTR [esp] + vpxor xmm4,xmm4,xmm6 + xor edx,ecx + mov edi,DWORD PTR 4[esp] + xor esi,edi + vpsrld xmm6,xmm7,10 + shrd edx,edx,5 + and esi,ecx + mov DWORD PTR 28[esp],ecx + vpxor xmm4,xmm4,xmm5 + xor edx,ecx + xor edi,esi + shrd edx,edx,6 + vpsrlq xmm5,xmm7,17 + mov ecx,ebx + add edx,edi + mov edi,DWORD PTR 16[esp] + vpaddd xmm3,xmm3,xmm4 + mov esi,ebx + shrd ecx,ecx,9 + mov DWORD PTR 12[esp],ebx + vpxor xmm6,xmm6,xmm5 + xor ecx,ebx + xor ebx,edi + add edx,DWORD PTR 8[esp] + vpsrlq xmm7,xmm7,19 + shrd ecx,ecx,11 + and eax,ebx + xor ecx,esi + vpxor xmm6,xmm6,xmm7 + add edx,DWORD PTR 84[esp] + xor eax,edi + shrd ecx,ecx,2 + vpshufd xmm7,xmm6,132 + add eax,edx + add edx,DWORD PTR 24[esp] + add eax,ecx + vpsrldq xmm7,xmm7,8 + mov ecx,edx + shrd edx,edx,14 + mov esi,DWORD PTR 28[esp] + vpaddd xmm3,xmm3,xmm7 + xor edx,ecx + mov edi,DWORD PTR [esp] + xor esi,edi + vpshufd xmm7,xmm3,80 + shrd edx,edx,5 + and esi,ecx + mov DWORD PTR 24[esp],ecx + vpsrld xmm6,xmm7,10 + xor edx,ecx + xor edi,esi + shrd edx,edx,6 + vpsrlq xmm5,xmm7,17 + mov ecx,eax + add edx,edi + mov edi,DWORD PTR 12[esp] + vpxor xmm6,xmm6,xmm5 + mov esi,eax + shrd ecx,ecx,9 + mov DWORD PTR 8[esp],eax + vpsrlq xmm7,xmm7,19 + xor ecx,eax + xor eax,edi + add edx,DWORD PTR 4[esp] + vpxor xmm6,xmm6,xmm7 + shrd ecx,ecx,11 + and ebx,eax + xor ecx,esi + vpshufd xmm7,xmm6,232 + add edx,DWORD PTR 88[esp] + xor ebx,edi + shrd ecx,ecx,2 + vpslldq xmm7,xmm7,8 + add ebx,edx + add edx,DWORD PTR 20[esp] + add ebx,ecx + vpaddd xmm3,xmm3,xmm7 + mov ecx,edx + shrd edx,edx,14 + mov esi,DWORD PTR 24[esp] + vpaddd xmm6,xmm3,XMMWORD PTR 48[ebp] + xor edx,ecx + mov edi,DWORD PTR 28[esp] + xor esi,edi + shrd edx,edx,5 + and esi,ecx + mov DWORD PTR 20[esp],ecx + xor edx,ecx + xor edi,esi + shrd edx,edx,6 + mov ecx,ebx + add edx,edi + mov edi,DWORD PTR 8[esp] + mov esi,ebx + shrd ecx,ecx,9 + mov DWORD PTR 4[esp],ebx + xor ecx,ebx + xor ebx,edi + add edx,DWORD PTR [esp] + shrd ecx,ecx,11 + and eax,ebx + xor ecx,esi + add edx,DWORD PTR 92[esp] + xor eax,edi + shrd ecx,ecx,2 + add eax,edx + add edx,DWORD PTR 16[esp] + add eax,ecx + vmovdqa XMMWORD PTR 80[esp],xmm6 + cmp DWORD PTR 64[ebp],66051 + jne $L016avx_00_47 + mov ecx,edx + shrd edx,edx,14 + mov esi,DWORD PTR 20[esp] + xor edx,ecx + mov edi,DWORD PTR 24[esp] + xor esi,edi + shrd edx,edx,5 + and esi,ecx + mov DWORD PTR 16[esp],ecx + xor edx,ecx + xor edi,esi + shrd edx,edx,6 + mov ecx,eax + add edx,edi + mov edi,DWORD PTR 4[esp] + mov esi,eax + shrd ecx,ecx,9 + mov DWORD PTR [esp],eax + xor ecx,eax + xor eax,edi + add edx,DWORD PTR 28[esp] + shrd ecx,ecx,11 + and ebx,eax + xor ecx,esi + add edx,DWORD PTR 32[esp] + xor ebx,edi + shrd ecx,ecx,2 + add ebx,edx + add edx,DWORD PTR 12[esp] + add ebx,ecx + mov ecx,edx + shrd edx,edx,14 + mov esi,DWORD PTR 16[esp] + xor edx,ecx + mov edi,DWORD PTR 20[esp] + xor esi,edi + shrd edx,edx,5 + and esi,ecx + mov DWORD PTR 12[esp],ecx + xor edx,ecx + xor edi,esi + shrd edx,edx,6 + mov ecx,ebx + add edx,edi + mov edi,DWORD PTR [esp] + mov esi,ebx + shrd ecx,ecx,9 + mov DWORD PTR 28[esp],ebx + xor ecx,ebx + xor ebx,edi + add edx,DWORD PTR 24[esp] + shrd ecx,ecx,11 + and eax,ebx + xor ecx,esi + add edx,DWORD PTR 36[esp] + xor eax,edi + shrd ecx,ecx,2 + add eax,edx + add edx,DWORD PTR 8[esp] + add eax,ecx + mov ecx,edx + shrd edx,edx,14 + mov esi,DWORD PTR 12[esp] + xor edx,ecx + mov edi,DWORD PTR 16[esp] + xor esi,edi + shrd edx,edx,5 + and esi,ecx + mov DWORD PTR 8[esp],ecx + xor edx,ecx + xor edi,esi + shrd edx,edx,6 + mov ecx,eax + add edx,edi + mov edi,DWORD PTR 28[esp] + mov esi,eax + shrd ecx,ecx,9 + mov DWORD PTR 24[esp],eax + xor ecx,eax + xor eax,edi + add edx,DWORD PTR 20[esp] + shrd ecx,ecx,11 + and ebx,eax + xor ecx,esi + add edx,DWORD PTR 40[esp] + xor ebx,edi + shrd ecx,ecx,2 + add ebx,edx + add edx,DWORD PTR 4[esp] + add ebx,ecx + mov ecx,edx + shrd edx,edx,14 + mov esi,DWORD PTR 8[esp] + xor edx,ecx + mov edi,DWORD PTR 12[esp] + xor esi,edi + shrd edx,edx,5 + and esi,ecx + mov DWORD PTR 4[esp],ecx + xor edx,ecx + xor edi,esi + shrd edx,edx,6 + mov ecx,ebx + add edx,edi + mov edi,DWORD PTR 24[esp] + mov esi,ebx + shrd ecx,ecx,9 + mov DWORD PTR 20[esp],ebx + xor ecx,ebx + xor ebx,edi + add edx,DWORD PTR 16[esp] + shrd ecx,ecx,11 + and eax,ebx + xor ecx,esi + add edx,DWORD PTR 44[esp] + xor eax,edi + shrd ecx,ecx,2 + add eax,edx + add edx,DWORD PTR [esp] + add eax,ecx + mov ecx,edx + shrd edx,edx,14 + mov esi,DWORD PTR 4[esp] + xor edx,ecx + mov edi,DWORD PTR 8[esp] + xor esi,edi + shrd edx,edx,5 + and esi,ecx + mov DWORD PTR [esp],ecx + xor edx,ecx + xor edi,esi + shrd edx,edx,6 + mov ecx,eax + add edx,edi + mov edi,DWORD PTR 20[esp] + mov esi,eax + shrd ecx,ecx,9 + mov DWORD PTR 16[esp],eax + xor ecx,eax + xor eax,edi + add edx,DWORD PTR 12[esp] + shrd ecx,ecx,11 + and ebx,eax + xor ecx,esi + add edx,DWORD PTR 48[esp] + xor ebx,edi + shrd ecx,ecx,2 + add ebx,edx + add edx,DWORD PTR 28[esp] + add ebx,ecx + mov ecx,edx + shrd edx,edx,14 + mov esi,DWORD PTR [esp] + xor edx,ecx + mov edi,DWORD PTR 4[esp] + xor esi,edi + shrd edx,edx,5 + and esi,ecx + mov DWORD PTR 28[esp],ecx + xor edx,ecx + xor edi,esi + shrd edx,edx,6 + mov ecx,ebx + add edx,edi + mov edi,DWORD PTR 16[esp] + mov esi,ebx + shrd ecx,ecx,9 + mov DWORD PTR 12[esp],ebx + xor ecx,ebx + xor ebx,edi + add edx,DWORD PTR 8[esp] + shrd ecx,ecx,11 + and eax,ebx + xor ecx,esi + add edx,DWORD PTR 52[esp] + xor eax,edi + shrd ecx,ecx,2 + add eax,edx + add edx,DWORD PTR 24[esp] + add eax,ecx + mov ecx,edx + shrd edx,edx,14 + mov esi,DWORD PTR 28[esp] + xor edx,ecx + mov edi,DWORD PTR [esp] + xor esi,edi + shrd edx,edx,5 + and esi,ecx + mov DWORD PTR 24[esp],ecx + xor edx,ecx + xor edi,esi + shrd edx,edx,6 + mov ecx,eax + add edx,edi + mov edi,DWORD PTR 12[esp] + mov esi,eax + shrd ecx,ecx,9 + mov DWORD PTR 8[esp],eax + xor ecx,eax + xor eax,edi + add edx,DWORD PTR 4[esp] + shrd ecx,ecx,11 + and ebx,eax + xor ecx,esi + add edx,DWORD PTR 56[esp] + xor ebx,edi + shrd ecx,ecx,2 + add ebx,edx + add edx,DWORD PTR 20[esp] + add ebx,ecx + mov ecx,edx + shrd edx,edx,14 + mov esi,DWORD PTR 24[esp] + xor edx,ecx + mov edi,DWORD PTR 28[esp] + xor esi,edi + shrd edx,edx,5 + and esi,ecx + mov DWORD PTR 20[esp],ecx + xor edx,ecx + xor edi,esi + shrd edx,edx,6 + mov ecx,ebx + add edx,edi + mov edi,DWORD PTR 8[esp] + mov esi,ebx + shrd ecx,ecx,9 + mov DWORD PTR 4[esp],ebx + xor ecx,ebx + xor ebx,edi + add edx,DWORD PTR [esp] + shrd ecx,ecx,11 + and eax,ebx + xor ecx,esi + add edx,DWORD PTR 60[esp] + xor eax,edi + shrd ecx,ecx,2 + add eax,edx + add edx,DWORD PTR 16[esp] + add eax,ecx + mov ecx,edx + shrd edx,edx,14 + mov esi,DWORD PTR 20[esp] + xor edx,ecx + mov edi,DWORD PTR 24[esp] + xor esi,edi + shrd edx,edx,5 + and esi,ecx + mov DWORD PTR 16[esp],ecx + xor edx,ecx + xor edi,esi + shrd edx,edx,6 + mov ecx,eax + add edx,edi + mov edi,DWORD PTR 4[esp] + mov esi,eax + shrd ecx,ecx,9 + mov DWORD PTR [esp],eax + xor ecx,eax + xor eax,edi + add edx,DWORD PTR 28[esp] + shrd ecx,ecx,11 + and ebx,eax + xor ecx,esi + add edx,DWORD PTR 64[esp] + xor ebx,edi + shrd ecx,ecx,2 + add ebx,edx + add edx,DWORD PTR 12[esp] + add ebx,ecx + mov ecx,edx + shrd edx,edx,14 + mov esi,DWORD PTR 16[esp] + xor edx,ecx + mov edi,DWORD PTR 20[esp] + xor esi,edi + shrd edx,edx,5 + and esi,ecx + mov DWORD PTR 12[esp],ecx + xor edx,ecx + xor edi,esi + shrd edx,edx,6 + mov ecx,ebx + add edx,edi + mov edi,DWORD PTR [esp] + mov esi,ebx + shrd ecx,ecx,9 + mov DWORD PTR 28[esp],ebx + xor ecx,ebx + xor ebx,edi + add edx,DWORD PTR 24[esp] + shrd ecx,ecx,11 + and eax,ebx + xor ecx,esi + add edx,DWORD PTR 68[esp] + xor eax,edi + shrd ecx,ecx,2 + add eax,edx + add edx,DWORD PTR 8[esp] + add eax,ecx + mov ecx,edx + shrd edx,edx,14 + mov esi,DWORD PTR 12[esp] + xor edx,ecx + mov edi,DWORD PTR 16[esp] + xor esi,edi + shrd edx,edx,5 + and esi,ecx + mov DWORD PTR 8[esp],ecx + xor edx,ecx + xor edi,esi + shrd edx,edx,6 + mov ecx,eax + add edx,edi + mov edi,DWORD PTR 28[esp] + mov esi,eax + shrd ecx,ecx,9 + mov DWORD PTR 24[esp],eax + xor ecx,eax + xor eax,edi + add edx,DWORD PTR 20[esp] + shrd ecx,ecx,11 + and ebx,eax + xor ecx,esi + add edx,DWORD PTR 72[esp] + xor ebx,edi + shrd ecx,ecx,2 + add ebx,edx + add edx,DWORD PTR 4[esp] + add ebx,ecx + mov ecx,edx + shrd edx,edx,14 + mov esi,DWORD PTR 8[esp] + xor edx,ecx + mov edi,DWORD PTR 12[esp] + xor esi,edi + shrd edx,edx,5 + and esi,ecx + mov DWORD PTR 4[esp],ecx + xor edx,ecx + xor edi,esi + shrd edx,edx,6 + mov ecx,ebx + add edx,edi + mov edi,DWORD PTR 24[esp] + mov esi,ebx + shrd ecx,ecx,9 + mov DWORD PTR 20[esp],ebx + xor ecx,ebx + xor ebx,edi + add edx,DWORD PTR 16[esp] + shrd ecx,ecx,11 + and eax,ebx + xor ecx,esi + add edx,DWORD PTR 76[esp] + xor eax,edi + shrd ecx,ecx,2 + add eax,edx + add edx,DWORD PTR [esp] + add eax,ecx + mov ecx,edx + shrd edx,edx,14 + mov esi,DWORD PTR 4[esp] + xor edx,ecx + mov edi,DWORD PTR 8[esp] + xor esi,edi + shrd edx,edx,5 + and esi,ecx + mov DWORD PTR [esp],ecx + xor edx,ecx + xor edi,esi + shrd edx,edx,6 + mov ecx,eax + add edx,edi + mov edi,DWORD PTR 20[esp] + mov esi,eax + shrd ecx,ecx,9 + mov DWORD PTR 16[esp],eax + xor ecx,eax + xor eax,edi + add edx,DWORD PTR 12[esp] + shrd ecx,ecx,11 + and ebx,eax + xor ecx,esi + add edx,DWORD PTR 80[esp] + xor ebx,edi + shrd ecx,ecx,2 + add ebx,edx + add edx,DWORD PTR 28[esp] + add ebx,ecx + mov ecx,edx + shrd edx,edx,14 + mov esi,DWORD PTR [esp] + xor edx,ecx + mov edi,DWORD PTR 4[esp] + xor esi,edi + shrd edx,edx,5 + and esi,ecx + mov DWORD PTR 28[esp],ecx + xor edx,ecx + xor edi,esi + shrd edx,edx,6 + mov ecx,ebx + add edx,edi + mov edi,DWORD PTR 16[esp] + mov esi,ebx + shrd ecx,ecx,9 + mov DWORD PTR 12[esp],ebx + xor ecx,ebx + xor ebx,edi + add edx,DWORD PTR 8[esp] + shrd ecx,ecx,11 + and eax,ebx + xor ecx,esi + add edx,DWORD PTR 84[esp] + xor eax,edi + shrd ecx,ecx,2 + add eax,edx + add edx,DWORD PTR 24[esp] + add eax,ecx + mov ecx,edx + shrd edx,edx,14 + mov esi,DWORD PTR 28[esp] + xor edx,ecx + mov edi,DWORD PTR [esp] + xor esi,edi + shrd edx,edx,5 + and esi,ecx + mov DWORD PTR 24[esp],ecx + xor edx,ecx + xor edi,esi + shrd edx,edx,6 + mov ecx,eax + add edx,edi + mov edi,DWORD PTR 12[esp] + mov esi,eax + shrd ecx,ecx,9 + mov DWORD PTR 8[esp],eax + xor ecx,eax + xor eax,edi + add edx,DWORD PTR 4[esp] + shrd ecx,ecx,11 + and ebx,eax + xor ecx,esi + add edx,DWORD PTR 88[esp] + xor ebx,edi + shrd ecx,ecx,2 + add ebx,edx + add edx,DWORD PTR 20[esp] + add ebx,ecx + mov ecx,edx + shrd edx,edx,14 + mov esi,DWORD PTR 24[esp] + xor edx,ecx + mov edi,DWORD PTR 28[esp] + xor esi,edi + shrd edx,edx,5 + and esi,ecx + mov DWORD PTR 20[esp],ecx + xor edx,ecx + xor edi,esi + shrd edx,edx,6 + mov ecx,ebx + add edx,edi + mov edi,DWORD PTR 8[esp] + mov esi,ebx + shrd ecx,ecx,9 + mov DWORD PTR 4[esp],ebx + xor ecx,ebx + xor ebx,edi + add edx,DWORD PTR [esp] + shrd ecx,ecx,11 + and eax,ebx + xor ecx,esi + add edx,DWORD PTR 92[esp] + xor eax,edi + shrd ecx,ecx,2 + add eax,edx + add edx,DWORD PTR 16[esp] + add eax,ecx + mov esi,DWORD PTR 96[esp] + xor ebx,edi + mov ecx,DWORD PTR 12[esp] + add eax,DWORD PTR [esi] + add ebx,DWORD PTR 4[esi] + add edi,DWORD PTR 8[esi] + add ecx,DWORD PTR 12[esi] + mov DWORD PTR [esi],eax + mov DWORD PTR 4[esi],ebx + mov DWORD PTR 8[esi],edi + mov DWORD PTR 12[esi],ecx + mov DWORD PTR 4[esp],ebx + xor ebx,edi + mov DWORD PTR 8[esp],edi + mov DWORD PTR 12[esp],ecx + mov edi,DWORD PTR 20[esp] + mov ecx,DWORD PTR 24[esp] + add edx,DWORD PTR 16[esi] + add edi,DWORD PTR 20[esi] + add ecx,DWORD PTR 24[esi] + mov DWORD PTR 16[esi],edx + mov DWORD PTR 20[esi],edi + mov DWORD PTR 20[esp],edi + mov edi,DWORD PTR 28[esp] + mov DWORD PTR 24[esi],ecx + add edi,DWORD PTR 28[esi] + mov DWORD PTR 24[esp],ecx + mov DWORD PTR 28[esi],edi + mov DWORD PTR 28[esp],edi + mov edi,DWORD PTR 100[esp] + vmovdqa xmm7,XMMWORD PTR 64[ebp] + sub ebp,192 + cmp edi,DWORD PTR 104[esp] + jb $L015grand_avx + mov esp,DWORD PTR 108[esp] + vzeroall + pop edi + pop esi + pop ebx + pop ebp + ret +ALIGN 32 +$L014AVX_BMI: + lea esp,DWORD PTR [esp-96] + vzeroall + mov eax,DWORD PTR [esi] + mov ebx,DWORD PTR 4[esi] + mov ecx,DWORD PTR 8[esi] + mov edi,DWORD PTR 12[esi] + mov DWORD PTR 4[esp],ebx + xor ebx,ecx + mov DWORD PTR 8[esp],ecx + mov DWORD PTR 12[esp],edi + mov edx,DWORD PTR 16[esi] + mov edi,DWORD PTR 20[esi] + mov ecx,DWORD PTR 24[esi] + mov esi,DWORD PTR 28[esi] + mov DWORD PTR 20[esp],edi + mov edi,DWORD PTR 100[esp] + mov DWORD PTR 24[esp],ecx + mov DWORD PTR 28[esp],esi + vmovdqa xmm7,XMMWORD PTR 256[ebp] + jmp $L017grand_avx_bmi +ALIGN 32 +$L017grand_avx_bmi: + vmovdqu xmm0,XMMWORD PTR [edi] + vmovdqu xmm1,XMMWORD PTR 16[edi] + vmovdqu xmm2,XMMWORD PTR 32[edi] + vmovdqu xmm3,XMMWORD PTR 48[edi] + add edi,64 + vpshufb xmm0,xmm0,xmm7 + mov DWORD PTR 100[esp],edi + vpshufb xmm1,xmm1,xmm7 + vpshufb xmm2,xmm2,xmm7 + vpaddd xmm4,xmm0,XMMWORD PTR [ebp] + vpshufb xmm3,xmm3,xmm7 + vpaddd xmm5,xmm1,XMMWORD PTR 16[ebp] + vpaddd xmm6,xmm2,XMMWORD PTR 32[ebp] + vpaddd xmm7,xmm3,XMMWORD PTR 48[ebp] + vmovdqa XMMWORD PTR 32[esp],xmm4 + vmovdqa XMMWORD PTR 48[esp],xmm5 + vmovdqa XMMWORD PTR 64[esp],xmm6 + vmovdqa XMMWORD PTR 80[esp],xmm7 + jmp $L018avx_bmi_00_47 +ALIGN 16 +$L018avx_bmi_00_47: + add ebp,64 + vpalignr xmm4,xmm1,xmm0,4 + rorx ecx,edx,6 + rorx esi,edx,11 + mov DWORD PTR 16[esp],edx + vpalignr xmm7,xmm3,xmm2,4 + rorx edi,edx,25 + xor ecx,esi + andn esi,edx,DWORD PTR 24[esp] + vpsrld xmm6,xmm4,7 + xor ecx,edi + and edx,DWORD PTR 20[esp] + mov DWORD PTR [esp],eax + vpaddd xmm0,xmm0,xmm7 + or edx,esi + rorx edi,eax,2 + rorx esi,eax,13 + vpsrld xmm7,xmm4,3 + lea edx,DWORD PTR [ecx*1+edx] + rorx ecx,eax,22 + xor esi,edi + vpslld xmm5,xmm4,14 + mov edi,DWORD PTR 4[esp] + xor ecx,esi + xor eax,edi + vpxor xmm4,xmm7,xmm6 + add edx,DWORD PTR 28[esp] + and ebx,eax + add edx,DWORD PTR 32[esp] + vpshufd xmm7,xmm3,250 + xor ebx,edi + add ecx,edx + add edx,DWORD PTR 12[esp] + vpsrld xmm6,xmm6,11 + lea ebx,DWORD PTR [ecx*1+ebx] + rorx ecx,edx,6 + rorx esi,edx,11 + vpxor xmm4,xmm4,xmm5 + mov DWORD PTR 12[esp],edx + rorx edi,edx,25 + xor ecx,esi + vpslld xmm5,xmm5,11 + andn esi,edx,DWORD PTR 20[esp] + xor ecx,edi + and edx,DWORD PTR 16[esp] + vpxor xmm4,xmm4,xmm6 + mov DWORD PTR 28[esp],ebx + or edx,esi + rorx edi,ebx,2 + rorx esi,ebx,13 + vpsrld xmm6,xmm7,10 + lea edx,DWORD PTR [ecx*1+edx] + rorx ecx,ebx,22 + xor esi,edi + vpxor xmm4,xmm4,xmm5 + mov edi,DWORD PTR [esp] + xor ecx,esi + xor ebx,edi + vpsrlq xmm5,xmm7,17 + add edx,DWORD PTR 24[esp] + and eax,ebx + add edx,DWORD PTR 36[esp] + vpaddd xmm0,xmm0,xmm4 + xor eax,edi + add ecx,edx + add edx,DWORD PTR 8[esp] + vpxor xmm6,xmm6,xmm5 + lea eax,DWORD PTR [ecx*1+eax] + rorx ecx,edx,6 + rorx esi,edx,11 + vpsrlq xmm7,xmm7,19 + mov DWORD PTR 8[esp],edx + rorx edi,edx,25 + xor ecx,esi + vpxor xmm6,xmm6,xmm7 + andn esi,edx,DWORD PTR 16[esp] + xor ecx,edi + and edx,DWORD PTR 12[esp] + vpshufd xmm7,xmm6,132 + mov DWORD PTR 24[esp],eax + or edx,esi + rorx edi,eax,2 + rorx esi,eax,13 + vpsrldq xmm7,xmm7,8 + lea edx,DWORD PTR [ecx*1+edx] + rorx ecx,eax,22 + xor esi,edi + vpaddd xmm0,xmm0,xmm7 + mov edi,DWORD PTR 28[esp] + xor ecx,esi + xor eax,edi + vpshufd xmm7,xmm0,80 + add edx,DWORD PTR 20[esp] + and ebx,eax + add edx,DWORD PTR 40[esp] + vpsrld xmm6,xmm7,10 + xor ebx,edi + add ecx,edx + add edx,DWORD PTR 4[esp] + vpsrlq xmm5,xmm7,17 + lea ebx,DWORD PTR [ecx*1+ebx] + rorx ecx,edx,6 + rorx esi,edx,11 + vpxor xmm6,xmm6,xmm5 + mov DWORD PTR 4[esp],edx + rorx edi,edx,25 + xor ecx,esi + vpsrlq xmm7,xmm7,19 + andn esi,edx,DWORD PTR 12[esp] + xor ecx,edi + and edx,DWORD PTR 8[esp] + vpxor xmm6,xmm6,xmm7 + mov DWORD PTR 20[esp],ebx + or edx,esi + rorx edi,ebx,2 + rorx esi,ebx,13 + vpshufd xmm7,xmm6,232 + lea edx,DWORD PTR [ecx*1+edx] + rorx ecx,ebx,22 + xor esi,edi + vpslldq xmm7,xmm7,8 + mov edi,DWORD PTR 24[esp] + xor ecx,esi + xor ebx,edi + vpaddd xmm0,xmm0,xmm7 + add edx,DWORD PTR 16[esp] + and eax,ebx + add edx,DWORD PTR 44[esp] + vpaddd xmm6,xmm0,XMMWORD PTR [ebp] + xor eax,edi + add ecx,edx + add edx,DWORD PTR [esp] + lea eax,DWORD PTR [ecx*1+eax] + vmovdqa XMMWORD PTR 32[esp],xmm6 + vpalignr xmm4,xmm2,xmm1,4 + rorx ecx,edx,6 + rorx esi,edx,11 + mov DWORD PTR [esp],edx + vpalignr xmm7,xmm0,xmm3,4 + rorx edi,edx,25 + xor ecx,esi + andn esi,edx,DWORD PTR 8[esp] + vpsrld xmm6,xmm4,7 + xor ecx,edi + and edx,DWORD PTR 4[esp] + mov DWORD PTR 16[esp],eax + vpaddd xmm1,xmm1,xmm7 + or edx,esi + rorx edi,eax,2 + rorx esi,eax,13 + vpsrld xmm7,xmm4,3 + lea edx,DWORD PTR [ecx*1+edx] + rorx ecx,eax,22 + xor esi,edi + vpslld xmm5,xmm4,14 + mov edi,DWORD PTR 20[esp] + xor ecx,esi + xor eax,edi + vpxor xmm4,xmm7,xmm6 + add edx,DWORD PTR 12[esp] + and ebx,eax + add edx,DWORD PTR 48[esp] + vpshufd xmm7,xmm0,250 + xor ebx,edi + add ecx,edx + add edx,DWORD PTR 28[esp] + vpsrld xmm6,xmm6,11 + lea ebx,DWORD PTR [ecx*1+ebx] + rorx ecx,edx,6 + rorx esi,edx,11 + vpxor xmm4,xmm4,xmm5 + mov DWORD PTR 28[esp],edx + rorx edi,edx,25 + xor ecx,esi + vpslld xmm5,xmm5,11 + andn esi,edx,DWORD PTR 4[esp] + xor ecx,edi + and edx,DWORD PTR [esp] + vpxor xmm4,xmm4,xmm6 + mov DWORD PTR 12[esp],ebx + or edx,esi + rorx edi,ebx,2 + rorx esi,ebx,13 + vpsrld xmm6,xmm7,10 + lea edx,DWORD PTR [ecx*1+edx] + rorx ecx,ebx,22 + xor esi,edi + vpxor xmm4,xmm4,xmm5 + mov edi,DWORD PTR 16[esp] + xor ecx,esi + xor ebx,edi + vpsrlq xmm5,xmm7,17 + add edx,DWORD PTR 8[esp] + and eax,ebx + add edx,DWORD PTR 52[esp] + vpaddd xmm1,xmm1,xmm4 + xor eax,edi + add ecx,edx + add edx,DWORD PTR 24[esp] + vpxor xmm6,xmm6,xmm5 + lea eax,DWORD PTR [ecx*1+eax] + rorx ecx,edx,6 + rorx esi,edx,11 + vpsrlq xmm7,xmm7,19 + mov DWORD PTR 24[esp],edx + rorx edi,edx,25 + xor ecx,esi + vpxor xmm6,xmm6,xmm7 + andn esi,edx,DWORD PTR [esp] + xor ecx,edi + and edx,DWORD PTR 28[esp] + vpshufd xmm7,xmm6,132 + mov DWORD PTR 8[esp],eax + or edx,esi + rorx edi,eax,2 + rorx esi,eax,13 + vpsrldq xmm7,xmm7,8 + lea edx,DWORD PTR [ecx*1+edx] + rorx ecx,eax,22 + xor esi,edi + vpaddd xmm1,xmm1,xmm7 + mov edi,DWORD PTR 12[esp] + xor ecx,esi + xor eax,edi + vpshufd xmm7,xmm1,80 + add edx,DWORD PTR 4[esp] + and ebx,eax + add edx,DWORD PTR 56[esp] + vpsrld xmm6,xmm7,10 + xor ebx,edi + add ecx,edx + add edx,DWORD PTR 20[esp] + vpsrlq xmm5,xmm7,17 + lea ebx,DWORD PTR [ecx*1+ebx] + rorx ecx,edx,6 + rorx esi,edx,11 + vpxor xmm6,xmm6,xmm5 + mov DWORD PTR 20[esp],edx + rorx edi,edx,25 + xor ecx,esi + vpsrlq xmm7,xmm7,19 + andn esi,edx,DWORD PTR 28[esp] + xor ecx,edi + and edx,DWORD PTR 24[esp] + vpxor xmm6,xmm6,xmm7 + mov DWORD PTR 4[esp],ebx + or edx,esi + rorx edi,ebx,2 + rorx esi,ebx,13 + vpshufd xmm7,xmm6,232 + lea edx,DWORD PTR [ecx*1+edx] + rorx ecx,ebx,22 + xor esi,edi + vpslldq xmm7,xmm7,8 + mov edi,DWORD PTR 8[esp] + xor ecx,esi + xor ebx,edi + vpaddd xmm1,xmm1,xmm7 + add edx,DWORD PTR [esp] + and eax,ebx + add edx,DWORD PTR 60[esp] + vpaddd xmm6,xmm1,XMMWORD PTR 16[ebp] + xor eax,edi + add ecx,edx + add edx,DWORD PTR 16[esp] + lea eax,DWORD PTR [ecx*1+eax] + vmovdqa XMMWORD PTR 48[esp],xmm6 + vpalignr xmm4,xmm3,xmm2,4 + rorx ecx,edx,6 + rorx esi,edx,11 + mov DWORD PTR 16[esp],edx + vpalignr xmm7,xmm1,xmm0,4 + rorx edi,edx,25 + xor ecx,esi + andn esi,edx,DWORD PTR 24[esp] + vpsrld xmm6,xmm4,7 + xor ecx,edi + and edx,DWORD PTR 20[esp] + mov DWORD PTR [esp],eax + vpaddd xmm2,xmm2,xmm7 + or edx,esi + rorx edi,eax,2 + rorx esi,eax,13 + vpsrld xmm7,xmm4,3 + lea edx,DWORD PTR [ecx*1+edx] + rorx ecx,eax,22 + xor esi,edi + vpslld xmm5,xmm4,14 + mov edi,DWORD PTR 4[esp] + xor ecx,esi + xor eax,edi + vpxor xmm4,xmm7,xmm6 + add edx,DWORD PTR 28[esp] + and ebx,eax + add edx,DWORD PTR 64[esp] + vpshufd xmm7,xmm1,250 + xor ebx,edi + add ecx,edx + add edx,DWORD PTR 12[esp] + vpsrld xmm6,xmm6,11 + lea ebx,DWORD PTR [ecx*1+ebx] + rorx ecx,edx,6 + rorx esi,edx,11 + vpxor xmm4,xmm4,xmm5 + mov DWORD PTR 12[esp],edx + rorx edi,edx,25 + xor ecx,esi + vpslld xmm5,xmm5,11 + andn esi,edx,DWORD PTR 20[esp] + xor ecx,edi + and edx,DWORD PTR 16[esp] + vpxor xmm4,xmm4,xmm6 + mov DWORD PTR 28[esp],ebx + or edx,esi + rorx edi,ebx,2 + rorx esi,ebx,13 + vpsrld xmm6,xmm7,10 + lea edx,DWORD PTR [ecx*1+edx] + rorx ecx,ebx,22 + xor esi,edi + vpxor xmm4,xmm4,xmm5 + mov edi,DWORD PTR [esp] + xor ecx,esi + xor ebx,edi + vpsrlq xmm5,xmm7,17 + add edx,DWORD PTR 24[esp] + and eax,ebx + add edx,DWORD PTR 68[esp] + vpaddd xmm2,xmm2,xmm4 + xor eax,edi + add ecx,edx + add edx,DWORD PTR 8[esp] + vpxor xmm6,xmm6,xmm5 + lea eax,DWORD PTR [ecx*1+eax] + rorx ecx,edx,6 + rorx esi,edx,11 + vpsrlq xmm7,xmm7,19 + mov DWORD PTR 8[esp],edx + rorx edi,edx,25 + xor ecx,esi + vpxor xmm6,xmm6,xmm7 + andn esi,edx,DWORD PTR 16[esp] + xor ecx,edi + and edx,DWORD PTR 12[esp] + vpshufd xmm7,xmm6,132 + mov DWORD PTR 24[esp],eax + or edx,esi + rorx edi,eax,2 + rorx esi,eax,13 + vpsrldq xmm7,xmm7,8 + lea edx,DWORD PTR [ecx*1+edx] + rorx ecx,eax,22 + xor esi,edi + vpaddd xmm2,xmm2,xmm7 + mov edi,DWORD PTR 28[esp] + xor ecx,esi + xor eax,edi + vpshufd xmm7,xmm2,80 + add edx,DWORD PTR 20[esp] + and ebx,eax + add edx,DWORD PTR 72[esp] + vpsrld xmm6,xmm7,10 + xor ebx,edi + add ecx,edx + add edx,DWORD PTR 4[esp] + vpsrlq xmm5,xmm7,17 + lea ebx,DWORD PTR [ecx*1+ebx] + rorx ecx,edx,6 + rorx esi,edx,11 + vpxor xmm6,xmm6,xmm5 + mov DWORD PTR 4[esp],edx + rorx edi,edx,25 + xor ecx,esi + vpsrlq xmm7,xmm7,19 + andn esi,edx,DWORD PTR 12[esp] + xor ecx,edi + and edx,DWORD PTR 8[esp] + vpxor xmm6,xmm6,xmm7 + mov DWORD PTR 20[esp],ebx + or edx,esi + rorx edi,ebx,2 + rorx esi,ebx,13 + vpshufd xmm7,xmm6,232 + lea edx,DWORD PTR [ecx*1+edx] + rorx ecx,ebx,22 + xor esi,edi + vpslldq xmm7,xmm7,8 + mov edi,DWORD PTR 24[esp] + xor ecx,esi + xor ebx,edi + vpaddd xmm2,xmm2,xmm7 + add edx,DWORD PTR 16[esp] + and eax,ebx + add edx,DWORD PTR 76[esp] + vpaddd xmm6,xmm2,XMMWORD PTR 32[ebp] + xor eax,edi + add ecx,edx + add edx,DWORD PTR [esp] + lea eax,DWORD PTR [ecx*1+eax] + vmovdqa XMMWORD PTR 64[esp],xmm6 + vpalignr xmm4,xmm0,xmm3,4 + rorx ecx,edx,6 + rorx esi,edx,11 + mov DWORD PTR [esp],edx + vpalignr xmm7,xmm2,xmm1,4 + rorx edi,edx,25 + xor ecx,esi + andn esi,edx,DWORD PTR 8[esp] + vpsrld xmm6,xmm4,7 + xor ecx,edi + and edx,DWORD PTR 4[esp] + mov DWORD PTR 16[esp],eax + vpaddd xmm3,xmm3,xmm7 + or edx,esi + rorx edi,eax,2 + rorx esi,eax,13 + vpsrld xmm7,xmm4,3 + lea edx,DWORD PTR [ecx*1+edx] + rorx ecx,eax,22 + xor esi,edi + vpslld xmm5,xmm4,14 + mov edi,DWORD PTR 20[esp] + xor ecx,esi + xor eax,edi + vpxor xmm4,xmm7,xmm6 + add edx,DWORD PTR 12[esp] + and ebx,eax + add edx,DWORD PTR 80[esp] + vpshufd xmm7,xmm2,250 + xor ebx,edi + add ecx,edx + add edx,DWORD PTR 28[esp] + vpsrld xmm6,xmm6,11 + lea ebx,DWORD PTR [ecx*1+ebx] + rorx ecx,edx,6 + rorx esi,edx,11 + vpxor xmm4,xmm4,xmm5 + mov DWORD PTR 28[esp],edx + rorx edi,edx,25 + xor ecx,esi + vpslld xmm5,xmm5,11 + andn esi,edx,DWORD PTR 4[esp] + xor ecx,edi + and edx,DWORD PTR [esp] + vpxor xmm4,xmm4,xmm6 + mov DWORD PTR 12[esp],ebx + or edx,esi + rorx edi,ebx,2 + rorx esi,ebx,13 + vpsrld xmm6,xmm7,10 + lea edx,DWORD PTR [ecx*1+edx] + rorx ecx,ebx,22 + xor esi,edi + vpxor xmm4,xmm4,xmm5 + mov edi,DWORD PTR 16[esp] + xor ecx,esi + xor ebx,edi + vpsrlq xmm5,xmm7,17 + add edx,DWORD PTR 8[esp] + and eax,ebx + add edx,DWORD PTR 84[esp] + vpaddd xmm3,xmm3,xmm4 + xor eax,edi + add ecx,edx + add edx,DWORD PTR 24[esp] + vpxor xmm6,xmm6,xmm5 + lea eax,DWORD PTR [ecx*1+eax] + rorx ecx,edx,6 + rorx esi,edx,11 + vpsrlq xmm7,xmm7,19 + mov DWORD PTR 24[esp],edx + rorx edi,edx,25 + xor ecx,esi + vpxor xmm6,xmm6,xmm7 + andn esi,edx,DWORD PTR [esp] + xor ecx,edi + and edx,DWORD PTR 28[esp] + vpshufd xmm7,xmm6,132 + mov DWORD PTR 8[esp],eax + or edx,esi + rorx edi,eax,2 + rorx esi,eax,13 + vpsrldq xmm7,xmm7,8 + lea edx,DWORD PTR [ecx*1+edx] + rorx ecx,eax,22 + xor esi,edi + vpaddd xmm3,xmm3,xmm7 + mov edi,DWORD PTR 12[esp] + xor ecx,esi + xor eax,edi + vpshufd xmm7,xmm3,80 + add edx,DWORD PTR 4[esp] + and ebx,eax + add edx,DWORD PTR 88[esp] + vpsrld xmm6,xmm7,10 + xor ebx,edi + add ecx,edx + add edx,DWORD PTR 20[esp] + vpsrlq xmm5,xmm7,17 + lea ebx,DWORD PTR [ecx*1+ebx] + rorx ecx,edx,6 + rorx esi,edx,11 + vpxor xmm6,xmm6,xmm5 + mov DWORD PTR 20[esp],edx + rorx edi,edx,25 + xor ecx,esi + vpsrlq xmm7,xmm7,19 + andn esi,edx,DWORD PTR 28[esp] + xor ecx,edi + and edx,DWORD PTR 24[esp] + vpxor xmm6,xmm6,xmm7 + mov DWORD PTR 4[esp],ebx + or edx,esi + rorx edi,ebx,2 + rorx esi,ebx,13 + vpshufd xmm7,xmm6,232 + lea edx,DWORD PTR [ecx*1+edx] + rorx ecx,ebx,22 + xor esi,edi + vpslldq xmm7,xmm7,8 + mov edi,DWORD PTR 8[esp] + xor ecx,esi + xor ebx,edi + vpaddd xmm3,xmm3,xmm7 + add edx,DWORD PTR [esp] + and eax,ebx + add edx,DWORD PTR 92[esp] + vpaddd xmm6,xmm3,XMMWORD PTR 48[ebp] + xor eax,edi + add ecx,edx + add edx,DWORD PTR 16[esp] + lea eax,DWORD PTR [ecx*1+eax] + vmovdqa XMMWORD PTR 80[esp],xmm6 + cmp DWORD PTR 64[ebp],66051 + jne $L018avx_bmi_00_47 + rorx ecx,edx,6 + rorx esi,edx,11 + mov DWORD PTR 16[esp],edx + rorx edi,edx,25 + xor ecx,esi + andn esi,edx,DWORD PTR 24[esp] + xor ecx,edi + and edx,DWORD PTR 20[esp] + mov DWORD PTR [esp],eax + or edx,esi + rorx edi,eax,2 + rorx esi,eax,13 + lea edx,DWORD PTR [ecx*1+edx] + rorx ecx,eax,22 + xor esi,edi + mov edi,DWORD PTR 4[esp] + xor ecx,esi + xor eax,edi + add edx,DWORD PTR 28[esp] + and ebx,eax + add edx,DWORD PTR 32[esp] + xor ebx,edi + add ecx,edx + add edx,DWORD PTR 12[esp] + lea ebx,DWORD PTR [ecx*1+ebx] + rorx ecx,edx,6 + rorx esi,edx,11 + mov DWORD PTR 12[esp],edx + rorx edi,edx,25 + xor ecx,esi + andn esi,edx,DWORD PTR 20[esp] + xor ecx,edi + and edx,DWORD PTR 16[esp] + mov DWORD PTR 28[esp],ebx + or edx,esi + rorx edi,ebx,2 + rorx esi,ebx,13 + lea edx,DWORD PTR [ecx*1+edx] + rorx ecx,ebx,22 + xor esi,edi + mov edi,DWORD PTR [esp] + xor ecx,esi + xor ebx,edi + add edx,DWORD PTR 24[esp] + and eax,ebx + add edx,DWORD PTR 36[esp] + xor eax,edi + add ecx,edx + add edx,DWORD PTR 8[esp] + lea eax,DWORD PTR [ecx*1+eax] + rorx ecx,edx,6 + rorx esi,edx,11 + mov DWORD PTR 8[esp],edx + rorx edi,edx,25 + xor ecx,esi + andn esi,edx,DWORD PTR 16[esp] + xor ecx,edi + and edx,DWORD PTR 12[esp] + mov DWORD PTR 24[esp],eax + or edx,esi + rorx edi,eax,2 + rorx esi,eax,13 + lea edx,DWORD PTR [ecx*1+edx] + rorx ecx,eax,22 + xor esi,edi + mov edi,DWORD PTR 28[esp] + xor ecx,esi + xor eax,edi + add edx,DWORD PTR 20[esp] + and ebx,eax + add edx,DWORD PTR 40[esp] + xor ebx,edi + add ecx,edx + add edx,DWORD PTR 4[esp] + lea ebx,DWORD PTR [ecx*1+ebx] + rorx ecx,edx,6 + rorx esi,edx,11 + mov DWORD PTR 4[esp],edx + rorx edi,edx,25 + xor ecx,esi + andn esi,edx,DWORD PTR 12[esp] + xor ecx,edi + and edx,DWORD PTR 8[esp] + mov DWORD PTR 20[esp],ebx + or edx,esi + rorx edi,ebx,2 + rorx esi,ebx,13 + lea edx,DWORD PTR [ecx*1+edx] + rorx ecx,ebx,22 + xor esi,edi + mov edi,DWORD PTR 24[esp] + xor ecx,esi + xor ebx,edi + add edx,DWORD PTR 16[esp] + and eax,ebx + add edx,DWORD PTR 44[esp] + xor eax,edi + add ecx,edx + add edx,DWORD PTR [esp] + lea eax,DWORD PTR [ecx*1+eax] + rorx ecx,edx,6 + rorx esi,edx,11 + mov DWORD PTR [esp],edx + rorx edi,edx,25 + xor ecx,esi + andn esi,edx,DWORD PTR 8[esp] + xor ecx,edi + and edx,DWORD PTR 4[esp] + mov DWORD PTR 16[esp],eax + or edx,esi + rorx edi,eax,2 + rorx esi,eax,13 + lea edx,DWORD PTR [ecx*1+edx] + rorx ecx,eax,22 + xor esi,edi + mov edi,DWORD PTR 20[esp] + xor ecx,esi + xor eax,edi + add edx,DWORD PTR 12[esp] + and ebx,eax + add edx,DWORD PTR 48[esp] + xor ebx,edi + add ecx,edx + add edx,DWORD PTR 28[esp] + lea ebx,DWORD PTR [ecx*1+ebx] + rorx ecx,edx,6 + rorx esi,edx,11 + mov DWORD PTR 28[esp],edx + rorx edi,edx,25 + xor ecx,esi + andn esi,edx,DWORD PTR 4[esp] + xor ecx,edi + and edx,DWORD PTR [esp] + mov DWORD PTR 12[esp],ebx + or edx,esi + rorx edi,ebx,2 + rorx esi,ebx,13 + lea edx,DWORD PTR [ecx*1+edx] + rorx ecx,ebx,22 + xor esi,edi + mov edi,DWORD PTR 16[esp] + xor ecx,esi + xor ebx,edi + add edx,DWORD PTR 8[esp] + and eax,ebx + add edx,DWORD PTR 52[esp] + xor eax,edi + add ecx,edx + add edx,DWORD PTR 24[esp] + lea eax,DWORD PTR [ecx*1+eax] + rorx ecx,edx,6 + rorx esi,edx,11 + mov DWORD PTR 24[esp],edx + rorx edi,edx,25 + xor ecx,esi + andn esi,edx,DWORD PTR [esp] + xor ecx,edi + and edx,DWORD PTR 28[esp] + mov DWORD PTR 8[esp],eax + or edx,esi + rorx edi,eax,2 + rorx esi,eax,13 + lea edx,DWORD PTR [ecx*1+edx] + rorx ecx,eax,22 + xor esi,edi + mov edi,DWORD PTR 12[esp] + xor ecx,esi + xor eax,edi + add edx,DWORD PTR 4[esp] + and ebx,eax + add edx,DWORD PTR 56[esp] + xor ebx,edi + add ecx,edx + add edx,DWORD PTR 20[esp] + lea ebx,DWORD PTR [ecx*1+ebx] + rorx ecx,edx,6 + rorx esi,edx,11 + mov DWORD PTR 20[esp],edx + rorx edi,edx,25 + xor ecx,esi + andn esi,edx,DWORD PTR 28[esp] + xor ecx,edi + and edx,DWORD PTR 24[esp] + mov DWORD PTR 4[esp],ebx + or edx,esi + rorx edi,ebx,2 + rorx esi,ebx,13 + lea edx,DWORD PTR [ecx*1+edx] + rorx ecx,ebx,22 + xor esi,edi + mov edi,DWORD PTR 8[esp] + xor ecx,esi + xor ebx,edi + add edx,DWORD PTR [esp] + and eax,ebx + add edx,DWORD PTR 60[esp] + xor eax,edi + add ecx,edx + add edx,DWORD PTR 16[esp] + lea eax,DWORD PTR [ecx*1+eax] + rorx ecx,edx,6 + rorx esi,edx,11 + mov DWORD PTR 16[esp],edx + rorx edi,edx,25 + xor ecx,esi + andn esi,edx,DWORD PTR 24[esp] + xor ecx,edi + and edx,DWORD PTR 20[esp] + mov DWORD PTR [esp],eax + or edx,esi + rorx edi,eax,2 + rorx esi,eax,13 + lea edx,DWORD PTR [ecx*1+edx] + rorx ecx,eax,22 + xor esi,edi + mov edi,DWORD PTR 4[esp] + xor ecx,esi + xor eax,edi + add edx,DWORD PTR 28[esp] + and ebx,eax + add edx,DWORD PTR 64[esp] + xor ebx,edi + add ecx,edx + add edx,DWORD PTR 12[esp] + lea ebx,DWORD PTR [ecx*1+ebx] + rorx ecx,edx,6 + rorx esi,edx,11 + mov DWORD PTR 12[esp],edx + rorx edi,edx,25 + xor ecx,esi + andn esi,edx,DWORD PTR 20[esp] + xor ecx,edi + and edx,DWORD PTR 16[esp] + mov DWORD PTR 28[esp],ebx + or edx,esi + rorx edi,ebx,2 + rorx esi,ebx,13 + lea edx,DWORD PTR [ecx*1+edx] + rorx ecx,ebx,22 + xor esi,edi + mov edi,DWORD PTR [esp] + xor ecx,esi + xor ebx,edi + add edx,DWORD PTR 24[esp] + and eax,ebx + add edx,DWORD PTR 68[esp] + xor eax,edi + add ecx,edx + add edx,DWORD PTR 8[esp] + lea eax,DWORD PTR [ecx*1+eax] + rorx ecx,edx,6 + rorx esi,edx,11 + mov DWORD PTR 8[esp],edx + rorx edi,edx,25 + xor ecx,esi + andn esi,edx,DWORD PTR 16[esp] + xor ecx,edi + and edx,DWORD PTR 12[esp] + mov DWORD PTR 24[esp],eax + or edx,esi + rorx edi,eax,2 + rorx esi,eax,13 + lea edx,DWORD PTR [ecx*1+edx] + rorx ecx,eax,22 + xor esi,edi + mov edi,DWORD PTR 28[esp] + xor ecx,esi + xor eax,edi + add edx,DWORD PTR 20[esp] + and ebx,eax + add edx,DWORD PTR 72[esp] + xor ebx,edi + add ecx,edx + add edx,DWORD PTR 4[esp] + lea ebx,DWORD PTR [ecx*1+ebx] + rorx ecx,edx,6 + rorx esi,edx,11 + mov DWORD PTR 4[esp],edx + rorx edi,edx,25 + xor ecx,esi + andn esi,edx,DWORD PTR 12[esp] + xor ecx,edi + and edx,DWORD PTR 8[esp] + mov DWORD PTR 20[esp],ebx + or edx,esi + rorx edi,ebx,2 + rorx esi,ebx,13 + lea edx,DWORD PTR [ecx*1+edx] + rorx ecx,ebx,22 + xor esi,edi + mov edi,DWORD PTR 24[esp] + xor ecx,esi + xor ebx,edi + add edx,DWORD PTR 16[esp] + and eax,ebx + add edx,DWORD PTR 76[esp] + xor eax,edi + add ecx,edx + add edx,DWORD PTR [esp] + lea eax,DWORD PTR [ecx*1+eax] + rorx ecx,edx,6 + rorx esi,edx,11 + mov DWORD PTR [esp],edx + rorx edi,edx,25 + xor ecx,esi + andn esi,edx,DWORD PTR 8[esp] + xor ecx,edi + and edx,DWORD PTR 4[esp] + mov DWORD PTR 16[esp],eax + or edx,esi + rorx edi,eax,2 + rorx esi,eax,13 + lea edx,DWORD PTR [ecx*1+edx] + rorx ecx,eax,22 + xor esi,edi + mov edi,DWORD PTR 20[esp] + xor ecx,esi + xor eax,edi + add edx,DWORD PTR 12[esp] + and ebx,eax + add edx,DWORD PTR 80[esp] + xor ebx,edi + add ecx,edx + add edx,DWORD PTR 28[esp] + lea ebx,DWORD PTR [ecx*1+ebx] + rorx ecx,edx,6 + rorx esi,edx,11 + mov DWORD PTR 28[esp],edx + rorx edi,edx,25 + xor ecx,esi + andn esi,edx,DWORD PTR 4[esp] + xor ecx,edi + and edx,DWORD PTR [esp] + mov DWORD PTR 12[esp],ebx + or edx,esi + rorx edi,ebx,2 + rorx esi,ebx,13 + lea edx,DWORD PTR [ecx*1+edx] + rorx ecx,ebx,22 + xor esi,edi + mov edi,DWORD PTR 16[esp] + xor ecx,esi + xor ebx,edi + add edx,DWORD PTR 8[esp] + and eax,ebx + add edx,DWORD PTR 84[esp] + xor eax,edi + add ecx,edx + add edx,DWORD PTR 24[esp] + lea eax,DWORD PTR [ecx*1+eax] + rorx ecx,edx,6 + rorx esi,edx,11 + mov DWORD PTR 24[esp],edx + rorx edi,edx,25 + xor ecx,esi + andn esi,edx,DWORD PTR [esp] + xor ecx,edi + and edx,DWORD PTR 28[esp] + mov DWORD PTR 8[esp],eax + or edx,esi + rorx edi,eax,2 + rorx esi,eax,13 + lea edx,DWORD PTR [ecx*1+edx] + rorx ecx,eax,22 + xor esi,edi + mov edi,DWORD PTR 12[esp] + xor ecx,esi + xor eax,edi + add edx,DWORD PTR 4[esp] + and ebx,eax + add edx,DWORD PTR 88[esp] + xor ebx,edi + add ecx,edx + add edx,DWORD PTR 20[esp] + lea ebx,DWORD PTR [ecx*1+ebx] + rorx ecx,edx,6 + rorx esi,edx,11 + mov DWORD PTR 20[esp],edx + rorx edi,edx,25 + xor ecx,esi + andn esi,edx,DWORD PTR 28[esp] + xor ecx,edi + and edx,DWORD PTR 24[esp] + mov DWORD PTR 4[esp],ebx + or edx,esi + rorx edi,ebx,2 + rorx esi,ebx,13 + lea edx,DWORD PTR [ecx*1+edx] + rorx ecx,ebx,22 + xor esi,edi + mov edi,DWORD PTR 8[esp] + xor ecx,esi + xor ebx,edi + add edx,DWORD PTR [esp] + and eax,ebx + add edx,DWORD PTR 92[esp] + xor eax,edi + add ecx,edx + add edx,DWORD PTR 16[esp] + lea eax,DWORD PTR [ecx*1+eax] + mov esi,DWORD PTR 96[esp] + xor ebx,edi + mov ecx,DWORD PTR 12[esp] + add eax,DWORD PTR [esi] + add ebx,DWORD PTR 4[esi] + add edi,DWORD PTR 8[esi] + add ecx,DWORD PTR 12[esi] + mov DWORD PTR [esi],eax + mov DWORD PTR 4[esi],ebx + mov DWORD PTR 8[esi],edi + mov DWORD PTR 12[esi],ecx + mov DWORD PTR 4[esp],ebx + xor ebx,edi + mov DWORD PTR 8[esp],edi + mov DWORD PTR 12[esp],ecx + mov edi,DWORD PTR 20[esp] + mov ecx,DWORD PTR 24[esp] + add edx,DWORD PTR 16[esi] + add edi,DWORD PTR 20[esi] + add ecx,DWORD PTR 24[esi] + mov DWORD PTR 16[esi],edx + mov DWORD PTR 20[esi],edi + mov DWORD PTR 20[esp],edi + mov edi,DWORD PTR 28[esp] + mov DWORD PTR 24[esi],ecx + add edi,DWORD PTR 28[esi] + mov DWORD PTR 24[esp],ecx + mov DWORD PTR 28[esi],edi + mov DWORD PTR 28[esp],edi + mov edi,DWORD PTR 100[esp] + vmovdqa xmm7,XMMWORD PTR 64[ebp] + sub ebp,192 + cmp edi,DWORD PTR 104[esp] + jb $L017grand_avx_bmi + mov esp,DWORD PTR 108[esp] + vzeroall + pop edi + pop esi + pop ebx + pop ebp + ret +_sha256_block_data_order ENDP .text$ ENDS +.bss SEGMENT 'BSS' +COMM _OPENSSL_ia32cap_P:DWORD:4 +.bss ENDS END diff --git a/deps/openssl/asm/x86-win32-masm/sha/sha512-586.asm b/deps/openssl/asm/x86-win32-masm/sha/sha512-586.asm index 98c1c070d7fcd3..9a57e5af84be78 100644 --- a/deps/openssl/asm/x86-win32-masm/sha/sha512-586.asm +++ b/deps/openssl/asm/x86-win32-masm/sha/sha512-586.asm @@ -3,6 +3,13 @@ IF @Version LT 800 ECHO MASM version 8.00 or later is strongly recommended. ENDIF .686 +.XMM +IF @Version LT 800 +XMMWORD STRUCT 16 +DQ 2 dup (?) +XMMWORD ENDS +ENDIF + .MODEL FLAT OPTION DOTNAME IF @Version LT 800 @@ -10,6 +17,7 @@ IF @Version LT 800 ELSE .text$ SEGMENT ALIGN(64) 'CODE' ENDIF +;EXTERN _OPENSSL_ia32cap_P:NEAR ALIGN 16 _sha512_block_data_order PROC PUBLIC $L_sha512_block_data_order_begin:: @@ -33,6 +41,2269 @@ $L000pic_point: mov DWORD PTR 4[esp],edi mov DWORD PTR 8[esp],eax mov DWORD PTR 12[esp],ebx + lea edx,DWORD PTR _OPENSSL_ia32cap_P + mov ecx,DWORD PTR [edx] + test ecx,67108864 + jz $L002loop_x86 + mov edx,DWORD PTR 4[edx] + movq mm0,QWORD PTR [esi] + and ecx,16777216 + movq mm1,QWORD PTR 8[esi] + and edx,512 + movq mm2,QWORD PTR 16[esi] + or ecx,edx + movq mm3,QWORD PTR 24[esi] + movq mm4,QWORD PTR 32[esi] + movq mm5,QWORD PTR 40[esi] + movq mm6,QWORD PTR 48[esi] + movq mm7,QWORD PTR 56[esi] + cmp ecx,16777728 + je $L003SSSE3 + sub esp,80 + jmp $L004loop_sse2 +ALIGN 16 +$L004loop_sse2: + movq QWORD PTR 8[esp],mm1 + movq QWORD PTR 16[esp],mm2 + movq QWORD PTR 24[esp],mm3 + movq QWORD PTR 40[esp],mm5 + movq QWORD PTR 48[esp],mm6 + pxor mm2,mm1 + movq QWORD PTR 56[esp],mm7 + movq mm3,mm0 + mov eax,DWORD PTR [edi] + mov ebx,DWORD PTR 4[edi] + add edi,8 + mov edx,15 + bswap eax + bswap ebx + jmp $L00500_14_sse2 +ALIGN 16 +$L00500_14_sse2: + movd mm1,eax + mov eax,DWORD PTR [edi] + movd mm7,ebx + mov ebx,DWORD PTR 4[edi] + add edi,8 + bswap eax + bswap ebx + punpckldq mm7,mm1 + movq mm1,mm4 + pxor mm5,mm6 + psrlq mm1,14 + movq QWORD PTR 32[esp],mm4 + pand mm5,mm4 + psllq mm4,23 + movq mm0,mm3 + movq QWORD PTR 72[esp],mm7 + movq mm3,mm1 + psrlq mm1,4 + pxor mm5,mm6 + pxor mm3,mm4 + psllq mm4,23 + pxor mm3,mm1 + movq QWORD PTR [esp],mm0 + paddq mm7,mm5 + pxor mm3,mm4 + psrlq mm1,23 + paddq mm7,QWORD PTR 56[esp] + pxor mm3,mm1 + psllq mm4,4 + paddq mm7,QWORD PTR [ebp] + pxor mm3,mm4 + movq mm4,QWORD PTR 24[esp] + paddq mm3,mm7 + movq mm5,mm0 + psrlq mm5,28 + paddq mm4,mm3 + movq mm6,mm0 + movq mm7,mm5 + psllq mm6,25 + movq mm1,QWORD PTR 8[esp] + psrlq mm5,6 + pxor mm7,mm6 + sub esp,8 + psllq mm6,5 + pxor mm7,mm5 + pxor mm0,mm1 + psrlq mm5,5 + pxor mm7,mm6 + pand mm2,mm0 + psllq mm6,6 + pxor mm7,mm5 + pxor mm2,mm1 + pxor mm6,mm7 + movq mm5,QWORD PTR 40[esp] + paddq mm3,mm2 + movq mm2,mm0 + add ebp,8 + paddq mm3,mm6 + movq mm6,QWORD PTR 48[esp] + dec edx + jnz $L00500_14_sse2 + movd mm1,eax + movd mm7,ebx + punpckldq mm7,mm1 + movq mm1,mm4 + pxor mm5,mm6 + psrlq mm1,14 + movq QWORD PTR 32[esp],mm4 + pand mm5,mm4 + psllq mm4,23 + movq mm0,mm3 + movq QWORD PTR 72[esp],mm7 + movq mm3,mm1 + psrlq mm1,4 + pxor mm5,mm6 + pxor mm3,mm4 + psllq mm4,23 + pxor mm3,mm1 + movq QWORD PTR [esp],mm0 + paddq mm7,mm5 + pxor mm3,mm4 + psrlq mm1,23 + paddq mm7,QWORD PTR 56[esp] + pxor mm3,mm1 + psllq mm4,4 + paddq mm7,QWORD PTR [ebp] + pxor mm3,mm4 + movq mm4,QWORD PTR 24[esp] + paddq mm3,mm7 + movq mm5,mm0 + psrlq mm5,28 + paddq mm4,mm3 + movq mm6,mm0 + movq mm7,mm5 + psllq mm6,25 + movq mm1,QWORD PTR 8[esp] + psrlq mm5,6 + pxor mm7,mm6 + sub esp,8 + psllq mm6,5 + pxor mm7,mm5 + pxor mm0,mm1 + psrlq mm5,5 + pxor mm7,mm6 + pand mm2,mm0 + psllq mm6,6 + pxor mm7,mm5 + pxor mm2,mm1 + pxor mm6,mm7 + movq mm7,QWORD PTR 192[esp] + paddq mm3,mm2 + movq mm2,mm0 + add ebp,8 + paddq mm3,mm6 + pxor mm0,mm0 + mov edx,32 + jmp $L00616_79_sse2 +ALIGN 16 +$L00616_79_sse2: + movq mm5,QWORD PTR 88[esp] + movq mm1,mm7 + psrlq mm7,1 + movq mm6,mm5 + psrlq mm5,6 + psllq mm1,56 + paddq mm0,mm3 + movq mm3,mm7 + psrlq mm7,6 + pxor mm3,mm1 + psllq mm1,7 + pxor mm3,mm7 + psrlq mm7,1 + pxor mm3,mm1 + movq mm1,mm5 + psrlq mm5,13 + pxor mm7,mm3 + psllq mm6,3 + pxor mm1,mm5 + paddq mm7,QWORD PTR 200[esp] + pxor mm1,mm6 + psrlq mm5,42 + paddq mm7,QWORD PTR 128[esp] + pxor mm1,mm5 + psllq mm6,42 + movq mm5,QWORD PTR 40[esp] + pxor mm1,mm6 + movq mm6,QWORD PTR 48[esp] + paddq mm7,mm1 + movq mm1,mm4 + pxor mm5,mm6 + psrlq mm1,14 + movq QWORD PTR 32[esp],mm4 + pand mm5,mm4 + psllq mm4,23 + movq QWORD PTR 72[esp],mm7 + movq mm3,mm1 + psrlq mm1,4 + pxor mm5,mm6 + pxor mm3,mm4 + psllq mm4,23 + pxor mm3,mm1 + movq QWORD PTR [esp],mm0 + paddq mm7,mm5 + pxor mm3,mm4 + psrlq mm1,23 + paddq mm7,QWORD PTR 56[esp] + pxor mm3,mm1 + psllq mm4,4 + paddq mm7,QWORD PTR [ebp] + pxor mm3,mm4 + movq mm4,QWORD PTR 24[esp] + paddq mm3,mm7 + movq mm5,mm0 + psrlq mm5,28 + paddq mm4,mm3 + movq mm6,mm0 + movq mm7,mm5 + psllq mm6,25 + movq mm1,QWORD PTR 8[esp] + psrlq mm5,6 + pxor mm7,mm6 + sub esp,8 + psllq mm6,5 + pxor mm7,mm5 + pxor mm0,mm1 + psrlq mm5,5 + pxor mm7,mm6 + pand mm2,mm0 + psllq mm6,6 + pxor mm7,mm5 + pxor mm2,mm1 + pxor mm6,mm7 + movq mm7,QWORD PTR 192[esp] + paddq mm2,mm6 + add ebp,8 + movq mm5,QWORD PTR 88[esp] + movq mm1,mm7 + psrlq mm7,1 + movq mm6,mm5 + psrlq mm5,6 + psllq mm1,56 + paddq mm2,mm3 + movq mm3,mm7 + psrlq mm7,6 + pxor mm3,mm1 + psllq mm1,7 + pxor mm3,mm7 + psrlq mm7,1 + pxor mm3,mm1 + movq mm1,mm5 + psrlq mm5,13 + pxor mm7,mm3 + psllq mm6,3 + pxor mm1,mm5 + paddq mm7,QWORD PTR 200[esp] + pxor mm1,mm6 + psrlq mm5,42 + paddq mm7,QWORD PTR 128[esp] + pxor mm1,mm5 + psllq mm6,42 + movq mm5,QWORD PTR 40[esp] + pxor mm1,mm6 + movq mm6,QWORD PTR 48[esp] + paddq mm7,mm1 + movq mm1,mm4 + pxor mm5,mm6 + psrlq mm1,14 + movq QWORD PTR 32[esp],mm4 + pand mm5,mm4 + psllq mm4,23 + movq QWORD PTR 72[esp],mm7 + movq mm3,mm1 + psrlq mm1,4 + pxor mm5,mm6 + pxor mm3,mm4 + psllq mm4,23 + pxor mm3,mm1 + movq QWORD PTR [esp],mm2 + paddq mm7,mm5 + pxor mm3,mm4 + psrlq mm1,23 + paddq mm7,QWORD PTR 56[esp] + pxor mm3,mm1 + psllq mm4,4 + paddq mm7,QWORD PTR [ebp] + pxor mm3,mm4 + movq mm4,QWORD PTR 24[esp] + paddq mm3,mm7 + movq mm5,mm2 + psrlq mm5,28 + paddq mm4,mm3 + movq mm6,mm2 + movq mm7,mm5 + psllq mm6,25 + movq mm1,QWORD PTR 8[esp] + psrlq mm5,6 + pxor mm7,mm6 + sub esp,8 + psllq mm6,5 + pxor mm7,mm5 + pxor mm2,mm1 + psrlq mm5,5 + pxor mm7,mm6 + pand mm0,mm2 + psllq mm6,6 + pxor mm7,mm5 + pxor mm0,mm1 + pxor mm6,mm7 + movq mm7,QWORD PTR 192[esp] + paddq mm0,mm6 + add ebp,8 + dec edx + jnz $L00616_79_sse2 + paddq mm0,mm3 + movq mm1,QWORD PTR 8[esp] + movq mm3,QWORD PTR 24[esp] + movq mm5,QWORD PTR 40[esp] + movq mm6,QWORD PTR 48[esp] + movq mm7,QWORD PTR 56[esp] + pxor mm2,mm1 + paddq mm0,QWORD PTR [esi] + paddq mm1,QWORD PTR 8[esi] + paddq mm2,QWORD PTR 16[esi] + paddq mm3,QWORD PTR 24[esi] + paddq mm4,QWORD PTR 32[esi] + paddq mm5,QWORD PTR 40[esi] + paddq mm6,QWORD PTR 48[esi] + paddq mm7,QWORD PTR 56[esi] + mov eax,640 + movq QWORD PTR [esi],mm0 + movq QWORD PTR 8[esi],mm1 + movq QWORD PTR 16[esi],mm2 + movq QWORD PTR 24[esi],mm3 + movq QWORD PTR 32[esi],mm4 + movq QWORD PTR 40[esi],mm5 + movq QWORD PTR 48[esi],mm6 + movq QWORD PTR 56[esi],mm7 + lea esp,DWORD PTR [eax*1+esp] + sub ebp,eax + cmp edi,DWORD PTR 88[esp] + jb $L004loop_sse2 + mov esp,DWORD PTR 92[esp] + emms + pop edi + pop esi + pop ebx + pop ebp + ret +ALIGN 32 +$L003SSSE3: + lea edx,DWORD PTR [esp-64] + sub esp,256 + movdqa xmm1,XMMWORD PTR 640[ebp] + movdqu xmm0,XMMWORD PTR [edi] +DB 102,15,56,0,193 + movdqa xmm3,XMMWORD PTR [ebp] + movdqa xmm2,xmm1 + movdqu xmm1,XMMWORD PTR 16[edi] + paddq xmm3,xmm0 +DB 102,15,56,0,202 + movdqa XMMWORD PTR [edx-128],xmm3 + movdqa xmm4,XMMWORD PTR 16[ebp] + movdqa xmm3,xmm2 + movdqu xmm2,XMMWORD PTR 32[edi] + paddq xmm4,xmm1 +DB 102,15,56,0,211 + movdqa XMMWORD PTR [edx-112],xmm4 + movdqa xmm5,XMMWORD PTR 32[ebp] + movdqa xmm4,xmm3 + movdqu xmm3,XMMWORD PTR 48[edi] + paddq xmm5,xmm2 +DB 102,15,56,0,220 + movdqa XMMWORD PTR [edx-96],xmm5 + movdqa xmm6,XMMWORD PTR 48[ebp] + movdqa xmm5,xmm4 + movdqu xmm4,XMMWORD PTR 64[edi] + paddq xmm6,xmm3 +DB 102,15,56,0,229 + movdqa XMMWORD PTR [edx-80],xmm6 + movdqa xmm7,XMMWORD PTR 64[ebp] + movdqa xmm6,xmm5 + movdqu xmm5,XMMWORD PTR 80[edi] + paddq xmm7,xmm4 +DB 102,15,56,0,238 + movdqa XMMWORD PTR [edx-64],xmm7 + movdqa XMMWORD PTR [edx],xmm0 + movdqa xmm0,XMMWORD PTR 80[ebp] + movdqa xmm7,xmm6 + movdqu xmm6,XMMWORD PTR 96[edi] + paddq xmm0,xmm5 +DB 102,15,56,0,247 + movdqa XMMWORD PTR [edx-48],xmm0 + movdqa XMMWORD PTR 16[edx],xmm1 + movdqa xmm1,XMMWORD PTR 96[ebp] + movdqa xmm0,xmm7 + movdqu xmm7,XMMWORD PTR 112[edi] + paddq xmm1,xmm6 +DB 102,15,56,0,248 + movdqa XMMWORD PTR [edx-32],xmm1 + movdqa XMMWORD PTR 32[edx],xmm2 + movdqa xmm2,XMMWORD PTR 112[ebp] + movdqa xmm0,XMMWORD PTR [edx] + paddq xmm2,xmm7 + movdqa XMMWORD PTR [edx-16],xmm2 + nop +ALIGN 32 +$L007loop_ssse3: + movdqa xmm2,XMMWORD PTR 16[edx] + movdqa XMMWORD PTR 48[edx],xmm3 + lea ebp,DWORD PTR 128[ebp] + movq QWORD PTR 8[esp],mm1 + mov ebx,edi + movq QWORD PTR 16[esp],mm2 + lea edi,DWORD PTR 128[edi] + movq QWORD PTR 24[esp],mm3 + cmp edi,eax + movq QWORD PTR 40[esp],mm5 + cmovb ebx,edi + movq QWORD PTR 48[esp],mm6 + mov ecx,4 + pxor mm2,mm1 + movq QWORD PTR 56[esp],mm7 + pxor mm3,mm3 + jmp $L00800_47_ssse3 +ALIGN 32 +$L00800_47_ssse3: + movdqa xmm3,xmm5 + movdqa xmm1,xmm2 +DB 102,15,58,15,208,8 + movdqa XMMWORD PTR [edx],xmm4 +DB 102,15,58,15,220,8 + movdqa xmm4,xmm2 + psrlq xmm2,7 + paddq xmm0,xmm3 + movdqa xmm3,xmm4 + psrlq xmm4,1 + psllq xmm3,56 + pxor xmm2,xmm4 + psrlq xmm4,7 + pxor xmm2,xmm3 + psllq xmm3,7 + pxor xmm2,xmm4 + movdqa xmm4,xmm7 + pxor xmm2,xmm3 + movdqa xmm3,xmm7 + psrlq xmm4,6 + paddq xmm0,xmm2 + movdqa xmm2,xmm7 + psrlq xmm3,19 + psllq xmm2,3 + pxor xmm4,xmm3 + psrlq xmm3,42 + pxor xmm4,xmm2 + psllq xmm2,42 + pxor xmm4,xmm3 + movdqa xmm3,XMMWORD PTR 32[edx] + pxor xmm4,xmm2 + movdqa xmm2,XMMWORD PTR [ebp] + movq mm1,mm4 + paddq xmm0,xmm4 + movq mm7,QWORD PTR [edx-128] + pxor mm5,mm6 + psrlq mm1,14 + movq QWORD PTR 32[esp],mm4 + paddq xmm2,xmm0 + pand mm5,mm4 + psllq mm4,23 + paddq mm0,mm3 + movq mm3,mm1 + psrlq mm1,4 + pxor mm5,mm6 + pxor mm3,mm4 + psllq mm4,23 + pxor mm3,mm1 + movq QWORD PTR [esp],mm0 + paddq mm7,mm5 + pxor mm3,mm4 + psrlq mm1,23 + paddq mm7,QWORD PTR 56[esp] + pxor mm3,mm1 + psllq mm4,4 + pxor mm3,mm4 + movq mm4,QWORD PTR 24[esp] + paddq mm3,mm7 + movq mm5,mm0 + psrlq mm5,28 + paddq mm4,mm3 + movq mm6,mm0 + movq mm7,mm5 + psllq mm6,25 + movq mm1,QWORD PTR 8[esp] + psrlq mm5,6 + pxor mm7,mm6 + psllq mm6,5 + pxor mm7,mm5 + pxor mm0,mm1 + psrlq mm5,5 + pxor mm7,mm6 + pand mm2,mm0 + psllq mm6,6 + pxor mm7,mm5 + pxor mm2,mm1 + pxor mm6,mm7 + movq mm5,QWORD PTR 32[esp] + paddq mm2,mm6 + movq mm6,QWORD PTR 40[esp] + movq mm1,mm4 + movq mm7,QWORD PTR [edx-120] + pxor mm5,mm6 + psrlq mm1,14 + movq QWORD PTR 24[esp],mm4 + pand mm5,mm4 + psllq mm4,23 + paddq mm2,mm3 + movq mm3,mm1 + psrlq mm1,4 + pxor mm5,mm6 + pxor mm3,mm4 + psllq mm4,23 + pxor mm3,mm1 + movq QWORD PTR 56[esp],mm2 + paddq mm7,mm5 + pxor mm3,mm4 + psrlq mm1,23 + paddq mm7,QWORD PTR 48[esp] + pxor mm3,mm1 + psllq mm4,4 + pxor mm3,mm4 + movq mm4,QWORD PTR 16[esp] + paddq mm3,mm7 + movq mm5,mm2 + psrlq mm5,28 + paddq mm4,mm3 + movq mm6,mm2 + movq mm7,mm5 + psllq mm6,25 + movq mm1,QWORD PTR [esp] + psrlq mm5,6 + pxor mm7,mm6 + psllq mm6,5 + pxor mm7,mm5 + pxor mm2,mm1 + psrlq mm5,5 + pxor mm7,mm6 + pand mm0,mm2 + psllq mm6,6 + pxor mm7,mm5 + pxor mm0,mm1 + pxor mm6,mm7 + movq mm5,QWORD PTR 24[esp] + paddq mm0,mm6 + movq mm6,QWORD PTR 32[esp] + movdqa XMMWORD PTR [edx-128],xmm2 + movdqa xmm4,xmm6 + movdqa xmm2,xmm3 +DB 102,15,58,15,217,8 + movdqa XMMWORD PTR 16[edx],xmm5 +DB 102,15,58,15,229,8 + movdqa xmm5,xmm3 + psrlq xmm3,7 + paddq xmm1,xmm4 + movdqa xmm4,xmm5 + psrlq xmm5,1 + psllq xmm4,56 + pxor xmm3,xmm5 + psrlq xmm5,7 + pxor xmm3,xmm4 + psllq xmm4,7 + pxor xmm3,xmm5 + movdqa xmm5,xmm0 + pxor xmm3,xmm4 + movdqa xmm4,xmm0 + psrlq xmm5,6 + paddq xmm1,xmm3 + movdqa xmm3,xmm0 + psrlq xmm4,19 + psllq xmm3,3 + pxor xmm5,xmm4 + psrlq xmm4,42 + pxor xmm5,xmm3 + psllq xmm3,42 + pxor xmm5,xmm4 + movdqa xmm4,XMMWORD PTR 48[edx] + pxor xmm5,xmm3 + movdqa xmm3,XMMWORD PTR 16[ebp] + movq mm1,mm4 + paddq xmm1,xmm5 + movq mm7,QWORD PTR [edx-112] + pxor mm5,mm6 + psrlq mm1,14 + movq QWORD PTR 16[esp],mm4 + paddq xmm3,xmm1 + pand mm5,mm4 + psllq mm4,23 + paddq mm0,mm3 + movq mm3,mm1 + psrlq mm1,4 + pxor mm5,mm6 + pxor mm3,mm4 + psllq mm4,23 + pxor mm3,mm1 + movq QWORD PTR 48[esp],mm0 + paddq mm7,mm5 + pxor mm3,mm4 + psrlq mm1,23 + paddq mm7,QWORD PTR 40[esp] + pxor mm3,mm1 + psllq mm4,4 + pxor mm3,mm4 + movq mm4,QWORD PTR 8[esp] + paddq mm3,mm7 + movq mm5,mm0 + psrlq mm5,28 + paddq mm4,mm3 + movq mm6,mm0 + movq mm7,mm5 + psllq mm6,25 + movq mm1,QWORD PTR 56[esp] + psrlq mm5,6 + pxor mm7,mm6 + psllq mm6,5 + pxor mm7,mm5 + pxor mm0,mm1 + psrlq mm5,5 + pxor mm7,mm6 + pand mm2,mm0 + psllq mm6,6 + pxor mm7,mm5 + pxor mm2,mm1 + pxor mm6,mm7 + movq mm5,QWORD PTR 16[esp] + paddq mm2,mm6 + movq mm6,QWORD PTR 24[esp] + movq mm1,mm4 + movq mm7,QWORD PTR [edx-104] + pxor mm5,mm6 + psrlq mm1,14 + movq QWORD PTR 8[esp],mm4 + pand mm5,mm4 + psllq mm4,23 + paddq mm2,mm3 + movq mm3,mm1 + psrlq mm1,4 + pxor mm5,mm6 + pxor mm3,mm4 + psllq mm4,23 + pxor mm3,mm1 + movq QWORD PTR 40[esp],mm2 + paddq mm7,mm5 + pxor mm3,mm4 + psrlq mm1,23 + paddq mm7,QWORD PTR 32[esp] + pxor mm3,mm1 + psllq mm4,4 + pxor mm3,mm4 + movq mm4,QWORD PTR [esp] + paddq mm3,mm7 + movq mm5,mm2 + psrlq mm5,28 + paddq mm4,mm3 + movq mm6,mm2 + movq mm7,mm5 + psllq mm6,25 + movq mm1,QWORD PTR 48[esp] + psrlq mm5,6 + pxor mm7,mm6 + psllq mm6,5 + pxor mm7,mm5 + pxor mm2,mm1 + psrlq mm5,5 + pxor mm7,mm6 + pand mm0,mm2 + psllq mm6,6 + pxor mm7,mm5 + pxor mm0,mm1 + pxor mm6,mm7 + movq mm5,QWORD PTR 8[esp] + paddq mm0,mm6 + movq mm6,QWORD PTR 16[esp] + movdqa XMMWORD PTR [edx-112],xmm3 + movdqa xmm5,xmm7 + movdqa xmm3,xmm4 +DB 102,15,58,15,226,8 + movdqa XMMWORD PTR 32[edx],xmm6 +DB 102,15,58,15,238,8 + movdqa xmm6,xmm4 + psrlq xmm4,7 + paddq xmm2,xmm5 + movdqa xmm5,xmm6 + psrlq xmm6,1 + psllq xmm5,56 + pxor xmm4,xmm6 + psrlq xmm6,7 + pxor xmm4,xmm5 + psllq xmm5,7 + pxor xmm4,xmm6 + movdqa xmm6,xmm1 + pxor xmm4,xmm5 + movdqa xmm5,xmm1 + psrlq xmm6,6 + paddq xmm2,xmm4 + movdqa xmm4,xmm1 + psrlq xmm5,19 + psllq xmm4,3 + pxor xmm6,xmm5 + psrlq xmm5,42 + pxor xmm6,xmm4 + psllq xmm4,42 + pxor xmm6,xmm5 + movdqa xmm5,XMMWORD PTR [edx] + pxor xmm6,xmm4 + movdqa xmm4,XMMWORD PTR 32[ebp] + movq mm1,mm4 + paddq xmm2,xmm6 + movq mm7,QWORD PTR [edx-96] + pxor mm5,mm6 + psrlq mm1,14 + movq QWORD PTR [esp],mm4 + paddq xmm4,xmm2 + pand mm5,mm4 + psllq mm4,23 + paddq mm0,mm3 + movq mm3,mm1 + psrlq mm1,4 + pxor mm5,mm6 + pxor mm3,mm4 + psllq mm4,23 + pxor mm3,mm1 + movq QWORD PTR 32[esp],mm0 + paddq mm7,mm5 + pxor mm3,mm4 + psrlq mm1,23 + paddq mm7,QWORD PTR 24[esp] + pxor mm3,mm1 + psllq mm4,4 + pxor mm3,mm4 + movq mm4,QWORD PTR 56[esp] + paddq mm3,mm7 + movq mm5,mm0 + psrlq mm5,28 + paddq mm4,mm3 + movq mm6,mm0 + movq mm7,mm5 + psllq mm6,25 + movq mm1,QWORD PTR 40[esp] + psrlq mm5,6 + pxor mm7,mm6 + psllq mm6,5 + pxor mm7,mm5 + pxor mm0,mm1 + psrlq mm5,5 + pxor mm7,mm6 + pand mm2,mm0 + psllq mm6,6 + pxor mm7,mm5 + pxor mm2,mm1 + pxor mm6,mm7 + movq mm5,QWORD PTR [esp] + paddq mm2,mm6 + movq mm6,QWORD PTR 8[esp] + movq mm1,mm4 + movq mm7,QWORD PTR [edx-88] + pxor mm5,mm6 + psrlq mm1,14 + movq QWORD PTR 56[esp],mm4 + pand mm5,mm4 + psllq mm4,23 + paddq mm2,mm3 + movq mm3,mm1 + psrlq mm1,4 + pxor mm5,mm6 + pxor mm3,mm4 + psllq mm4,23 + pxor mm3,mm1 + movq QWORD PTR 24[esp],mm2 + paddq mm7,mm5 + pxor mm3,mm4 + psrlq mm1,23 + paddq mm7,QWORD PTR 16[esp] + pxor mm3,mm1 + psllq mm4,4 + pxor mm3,mm4 + movq mm4,QWORD PTR 48[esp] + paddq mm3,mm7 + movq mm5,mm2 + psrlq mm5,28 + paddq mm4,mm3 + movq mm6,mm2 + movq mm7,mm5 + psllq mm6,25 + movq mm1,QWORD PTR 32[esp] + psrlq mm5,6 + pxor mm7,mm6 + psllq mm6,5 + pxor mm7,mm5 + pxor mm2,mm1 + psrlq mm5,5 + pxor mm7,mm6 + pand mm0,mm2 + psllq mm6,6 + pxor mm7,mm5 + pxor mm0,mm1 + pxor mm6,mm7 + movq mm5,QWORD PTR 56[esp] + paddq mm0,mm6 + movq mm6,QWORD PTR [esp] + movdqa XMMWORD PTR [edx-96],xmm4 + movdqa xmm6,xmm0 + movdqa xmm4,xmm5 +DB 102,15,58,15,235,8 + movdqa XMMWORD PTR 48[edx],xmm7 +DB 102,15,58,15,247,8 + movdqa xmm7,xmm5 + psrlq xmm5,7 + paddq xmm3,xmm6 + movdqa xmm6,xmm7 + psrlq xmm7,1 + psllq xmm6,56 + pxor xmm5,xmm7 + psrlq xmm7,7 + pxor xmm5,xmm6 + psllq xmm6,7 + pxor xmm5,xmm7 + movdqa xmm7,xmm2 + pxor xmm5,xmm6 + movdqa xmm6,xmm2 + psrlq xmm7,6 + paddq xmm3,xmm5 + movdqa xmm5,xmm2 + psrlq xmm6,19 + psllq xmm5,3 + pxor xmm7,xmm6 + psrlq xmm6,42 + pxor xmm7,xmm5 + psllq xmm5,42 + pxor xmm7,xmm6 + movdqa xmm6,XMMWORD PTR 16[edx] + pxor xmm7,xmm5 + movdqa xmm5,XMMWORD PTR 48[ebp] + movq mm1,mm4 + paddq xmm3,xmm7 + movq mm7,QWORD PTR [edx-80] + pxor mm5,mm6 + psrlq mm1,14 + movq QWORD PTR 48[esp],mm4 + paddq xmm5,xmm3 + pand mm5,mm4 + psllq mm4,23 + paddq mm0,mm3 + movq mm3,mm1 + psrlq mm1,4 + pxor mm5,mm6 + pxor mm3,mm4 + psllq mm4,23 + pxor mm3,mm1 + movq QWORD PTR 16[esp],mm0 + paddq mm7,mm5 + pxor mm3,mm4 + psrlq mm1,23 + paddq mm7,QWORD PTR 8[esp] + pxor mm3,mm1 + psllq mm4,4 + pxor mm3,mm4 + movq mm4,QWORD PTR 40[esp] + paddq mm3,mm7 + movq mm5,mm0 + psrlq mm5,28 + paddq mm4,mm3 + movq mm6,mm0 + movq mm7,mm5 + psllq mm6,25 + movq mm1,QWORD PTR 24[esp] + psrlq mm5,6 + pxor mm7,mm6 + psllq mm6,5 + pxor mm7,mm5 + pxor mm0,mm1 + psrlq mm5,5 + pxor mm7,mm6 + pand mm2,mm0 + psllq mm6,6 + pxor mm7,mm5 + pxor mm2,mm1 + pxor mm6,mm7 + movq mm5,QWORD PTR 48[esp] + paddq mm2,mm6 + movq mm6,QWORD PTR 56[esp] + movq mm1,mm4 + movq mm7,QWORD PTR [edx-72] + pxor mm5,mm6 + psrlq mm1,14 + movq QWORD PTR 40[esp],mm4 + pand mm5,mm4 + psllq mm4,23 + paddq mm2,mm3 + movq mm3,mm1 + psrlq mm1,4 + pxor mm5,mm6 + pxor mm3,mm4 + psllq mm4,23 + pxor mm3,mm1 + movq QWORD PTR 8[esp],mm2 + paddq mm7,mm5 + pxor mm3,mm4 + psrlq mm1,23 + paddq mm7,QWORD PTR [esp] + pxor mm3,mm1 + psllq mm4,4 + pxor mm3,mm4 + movq mm4,QWORD PTR 32[esp] + paddq mm3,mm7 + movq mm5,mm2 + psrlq mm5,28 + paddq mm4,mm3 + movq mm6,mm2 + movq mm7,mm5 + psllq mm6,25 + movq mm1,QWORD PTR 16[esp] + psrlq mm5,6 + pxor mm7,mm6 + psllq mm6,5 + pxor mm7,mm5 + pxor mm2,mm1 + psrlq mm5,5 + pxor mm7,mm6 + pand mm0,mm2 + psllq mm6,6 + pxor mm7,mm5 + pxor mm0,mm1 + pxor mm6,mm7 + movq mm5,QWORD PTR 40[esp] + paddq mm0,mm6 + movq mm6,QWORD PTR 48[esp] + movdqa XMMWORD PTR [edx-80],xmm5 + movdqa xmm7,xmm1 + movdqa xmm5,xmm6 +DB 102,15,58,15,244,8 + movdqa XMMWORD PTR [edx],xmm0 +DB 102,15,58,15,248,8 + movdqa xmm0,xmm6 + psrlq xmm6,7 + paddq xmm4,xmm7 + movdqa xmm7,xmm0 + psrlq xmm0,1 + psllq xmm7,56 + pxor xmm6,xmm0 + psrlq xmm0,7 + pxor xmm6,xmm7 + psllq xmm7,7 + pxor xmm6,xmm0 + movdqa xmm0,xmm3 + pxor xmm6,xmm7 + movdqa xmm7,xmm3 + psrlq xmm0,6 + paddq xmm4,xmm6 + movdqa xmm6,xmm3 + psrlq xmm7,19 + psllq xmm6,3 + pxor xmm0,xmm7 + psrlq xmm7,42 + pxor xmm0,xmm6 + psllq xmm6,42 + pxor xmm0,xmm7 + movdqa xmm7,XMMWORD PTR 32[edx] + pxor xmm0,xmm6 + movdqa xmm6,XMMWORD PTR 64[ebp] + movq mm1,mm4 + paddq xmm4,xmm0 + movq mm7,QWORD PTR [edx-64] + pxor mm5,mm6 + psrlq mm1,14 + movq QWORD PTR 32[esp],mm4 + paddq xmm6,xmm4 + pand mm5,mm4 + psllq mm4,23 + paddq mm0,mm3 + movq mm3,mm1 + psrlq mm1,4 + pxor mm5,mm6 + pxor mm3,mm4 + psllq mm4,23 + pxor mm3,mm1 + movq QWORD PTR [esp],mm0 + paddq mm7,mm5 + pxor mm3,mm4 + psrlq mm1,23 + paddq mm7,QWORD PTR 56[esp] + pxor mm3,mm1 + psllq mm4,4 + pxor mm3,mm4 + movq mm4,QWORD PTR 24[esp] + paddq mm3,mm7 + movq mm5,mm0 + psrlq mm5,28 + paddq mm4,mm3 + movq mm6,mm0 + movq mm7,mm5 + psllq mm6,25 + movq mm1,QWORD PTR 8[esp] + psrlq mm5,6 + pxor mm7,mm6 + psllq mm6,5 + pxor mm7,mm5 + pxor mm0,mm1 + psrlq mm5,5 + pxor mm7,mm6 + pand mm2,mm0 + psllq mm6,6 + pxor mm7,mm5 + pxor mm2,mm1 + pxor mm6,mm7 + movq mm5,QWORD PTR 32[esp] + paddq mm2,mm6 + movq mm6,QWORD PTR 40[esp] + movq mm1,mm4 + movq mm7,QWORD PTR [edx-56] + pxor mm5,mm6 + psrlq mm1,14 + movq QWORD PTR 24[esp],mm4 + pand mm5,mm4 + psllq mm4,23 + paddq mm2,mm3 + movq mm3,mm1 + psrlq mm1,4 + pxor mm5,mm6 + pxor mm3,mm4 + psllq mm4,23 + pxor mm3,mm1 + movq QWORD PTR 56[esp],mm2 + paddq mm7,mm5 + pxor mm3,mm4 + psrlq mm1,23 + paddq mm7,QWORD PTR 48[esp] + pxor mm3,mm1 + psllq mm4,4 + pxor mm3,mm4 + movq mm4,QWORD PTR 16[esp] + paddq mm3,mm7 + movq mm5,mm2 + psrlq mm5,28 + paddq mm4,mm3 + movq mm6,mm2 + movq mm7,mm5 + psllq mm6,25 + movq mm1,QWORD PTR [esp] + psrlq mm5,6 + pxor mm7,mm6 + psllq mm6,5 + pxor mm7,mm5 + pxor mm2,mm1 + psrlq mm5,5 + pxor mm7,mm6 + pand mm0,mm2 + psllq mm6,6 + pxor mm7,mm5 + pxor mm0,mm1 + pxor mm6,mm7 + movq mm5,QWORD PTR 24[esp] + paddq mm0,mm6 + movq mm6,QWORD PTR 32[esp] + movdqa XMMWORD PTR [edx-64],xmm6 + movdqa xmm0,xmm2 + movdqa xmm6,xmm7 +DB 102,15,58,15,253,8 + movdqa XMMWORD PTR 16[edx],xmm1 +DB 102,15,58,15,193,8 + movdqa xmm1,xmm7 + psrlq xmm7,7 + paddq xmm5,xmm0 + movdqa xmm0,xmm1 + psrlq xmm1,1 + psllq xmm0,56 + pxor xmm7,xmm1 + psrlq xmm1,7 + pxor xmm7,xmm0 + psllq xmm0,7 + pxor xmm7,xmm1 + movdqa xmm1,xmm4 + pxor xmm7,xmm0 + movdqa xmm0,xmm4 + psrlq xmm1,6 + paddq xmm5,xmm7 + movdqa xmm7,xmm4 + psrlq xmm0,19 + psllq xmm7,3 + pxor xmm1,xmm0 + psrlq xmm0,42 + pxor xmm1,xmm7 + psllq xmm7,42 + pxor xmm1,xmm0 + movdqa xmm0,XMMWORD PTR 48[edx] + pxor xmm1,xmm7 + movdqa xmm7,XMMWORD PTR 80[ebp] + movq mm1,mm4 + paddq xmm5,xmm1 + movq mm7,QWORD PTR [edx-48] + pxor mm5,mm6 + psrlq mm1,14 + movq QWORD PTR 16[esp],mm4 + paddq xmm7,xmm5 + pand mm5,mm4 + psllq mm4,23 + paddq mm0,mm3 + movq mm3,mm1 + psrlq mm1,4 + pxor mm5,mm6 + pxor mm3,mm4 + psllq mm4,23 + pxor mm3,mm1 + movq QWORD PTR 48[esp],mm0 + paddq mm7,mm5 + pxor mm3,mm4 + psrlq mm1,23 + paddq mm7,QWORD PTR 40[esp] + pxor mm3,mm1 + psllq mm4,4 + pxor mm3,mm4 + movq mm4,QWORD PTR 8[esp] + paddq mm3,mm7 + movq mm5,mm0 + psrlq mm5,28 + paddq mm4,mm3 + movq mm6,mm0 + movq mm7,mm5 + psllq mm6,25 + movq mm1,QWORD PTR 56[esp] + psrlq mm5,6 + pxor mm7,mm6 + psllq mm6,5 + pxor mm7,mm5 + pxor mm0,mm1 + psrlq mm5,5 + pxor mm7,mm6 + pand mm2,mm0 + psllq mm6,6 + pxor mm7,mm5 + pxor mm2,mm1 + pxor mm6,mm7 + movq mm5,QWORD PTR 16[esp] + paddq mm2,mm6 + movq mm6,QWORD PTR 24[esp] + movq mm1,mm4 + movq mm7,QWORD PTR [edx-40] + pxor mm5,mm6 + psrlq mm1,14 + movq QWORD PTR 8[esp],mm4 + pand mm5,mm4 + psllq mm4,23 + paddq mm2,mm3 + movq mm3,mm1 + psrlq mm1,4 + pxor mm5,mm6 + pxor mm3,mm4 + psllq mm4,23 + pxor mm3,mm1 + movq QWORD PTR 40[esp],mm2 + paddq mm7,mm5 + pxor mm3,mm4 + psrlq mm1,23 + paddq mm7,QWORD PTR 32[esp] + pxor mm3,mm1 + psllq mm4,4 + pxor mm3,mm4 + movq mm4,QWORD PTR [esp] + paddq mm3,mm7 + movq mm5,mm2 + psrlq mm5,28 + paddq mm4,mm3 + movq mm6,mm2 + movq mm7,mm5 + psllq mm6,25 + movq mm1,QWORD PTR 48[esp] + psrlq mm5,6 + pxor mm7,mm6 + psllq mm6,5 + pxor mm7,mm5 + pxor mm2,mm1 + psrlq mm5,5 + pxor mm7,mm6 + pand mm0,mm2 + psllq mm6,6 + pxor mm7,mm5 + pxor mm0,mm1 + pxor mm6,mm7 + movq mm5,QWORD PTR 8[esp] + paddq mm0,mm6 + movq mm6,QWORD PTR 16[esp] + movdqa XMMWORD PTR [edx-48],xmm7 + movdqa xmm1,xmm3 + movdqa xmm7,xmm0 +DB 102,15,58,15,198,8 + movdqa XMMWORD PTR 32[edx],xmm2 +DB 102,15,58,15,202,8 + movdqa xmm2,xmm0 + psrlq xmm0,7 + paddq xmm6,xmm1 + movdqa xmm1,xmm2 + psrlq xmm2,1 + psllq xmm1,56 + pxor xmm0,xmm2 + psrlq xmm2,7 + pxor xmm0,xmm1 + psllq xmm1,7 + pxor xmm0,xmm2 + movdqa xmm2,xmm5 + pxor xmm0,xmm1 + movdqa xmm1,xmm5 + psrlq xmm2,6 + paddq xmm6,xmm0 + movdqa xmm0,xmm5 + psrlq xmm1,19 + psllq xmm0,3 + pxor xmm2,xmm1 + psrlq xmm1,42 + pxor xmm2,xmm0 + psllq xmm0,42 + pxor xmm2,xmm1 + movdqa xmm1,XMMWORD PTR [edx] + pxor xmm2,xmm0 + movdqa xmm0,XMMWORD PTR 96[ebp] + movq mm1,mm4 + paddq xmm6,xmm2 + movq mm7,QWORD PTR [edx-32] + pxor mm5,mm6 + psrlq mm1,14 + movq QWORD PTR [esp],mm4 + paddq xmm0,xmm6 + pand mm5,mm4 + psllq mm4,23 + paddq mm0,mm3 + movq mm3,mm1 + psrlq mm1,4 + pxor mm5,mm6 + pxor mm3,mm4 + psllq mm4,23 + pxor mm3,mm1 + movq QWORD PTR 32[esp],mm0 + paddq mm7,mm5 + pxor mm3,mm4 + psrlq mm1,23 + paddq mm7,QWORD PTR 24[esp] + pxor mm3,mm1 + psllq mm4,4 + pxor mm3,mm4 + movq mm4,QWORD PTR 56[esp] + paddq mm3,mm7 + movq mm5,mm0 + psrlq mm5,28 + paddq mm4,mm3 + movq mm6,mm0 + movq mm7,mm5 + psllq mm6,25 + movq mm1,QWORD PTR 40[esp] + psrlq mm5,6 + pxor mm7,mm6 + psllq mm6,5 + pxor mm7,mm5 + pxor mm0,mm1 + psrlq mm5,5 + pxor mm7,mm6 + pand mm2,mm0 + psllq mm6,6 + pxor mm7,mm5 + pxor mm2,mm1 + pxor mm6,mm7 + movq mm5,QWORD PTR [esp] + paddq mm2,mm6 + movq mm6,QWORD PTR 8[esp] + movq mm1,mm4 + movq mm7,QWORD PTR [edx-24] + pxor mm5,mm6 + psrlq mm1,14 + movq QWORD PTR 56[esp],mm4 + pand mm5,mm4 + psllq mm4,23 + paddq mm2,mm3 + movq mm3,mm1 + psrlq mm1,4 + pxor mm5,mm6 + pxor mm3,mm4 + psllq mm4,23 + pxor mm3,mm1 + movq QWORD PTR 24[esp],mm2 + paddq mm7,mm5 + pxor mm3,mm4 + psrlq mm1,23 + paddq mm7,QWORD PTR 16[esp] + pxor mm3,mm1 + psllq mm4,4 + pxor mm3,mm4 + movq mm4,QWORD PTR 48[esp] + paddq mm3,mm7 + movq mm5,mm2 + psrlq mm5,28 + paddq mm4,mm3 + movq mm6,mm2 + movq mm7,mm5 + psllq mm6,25 + movq mm1,QWORD PTR 32[esp] + psrlq mm5,6 + pxor mm7,mm6 + psllq mm6,5 + pxor mm7,mm5 + pxor mm2,mm1 + psrlq mm5,5 + pxor mm7,mm6 + pand mm0,mm2 + psllq mm6,6 + pxor mm7,mm5 + pxor mm0,mm1 + pxor mm6,mm7 + movq mm5,QWORD PTR 56[esp] + paddq mm0,mm6 + movq mm6,QWORD PTR [esp] + movdqa XMMWORD PTR [edx-32],xmm0 + movdqa xmm2,xmm4 + movdqa xmm0,xmm1 +DB 102,15,58,15,207,8 + movdqa XMMWORD PTR 48[edx],xmm3 +DB 102,15,58,15,211,8 + movdqa xmm3,xmm1 + psrlq xmm1,7 + paddq xmm7,xmm2 + movdqa xmm2,xmm3 + psrlq xmm3,1 + psllq xmm2,56 + pxor xmm1,xmm3 + psrlq xmm3,7 + pxor xmm1,xmm2 + psllq xmm2,7 + pxor xmm1,xmm3 + movdqa xmm3,xmm6 + pxor xmm1,xmm2 + movdqa xmm2,xmm6 + psrlq xmm3,6 + paddq xmm7,xmm1 + movdqa xmm1,xmm6 + psrlq xmm2,19 + psllq xmm1,3 + pxor xmm3,xmm2 + psrlq xmm2,42 + pxor xmm3,xmm1 + psllq xmm1,42 + pxor xmm3,xmm2 + movdqa xmm2,XMMWORD PTR 16[edx] + pxor xmm3,xmm1 + movdqa xmm1,XMMWORD PTR 112[ebp] + movq mm1,mm4 + paddq xmm7,xmm3 + movq mm7,QWORD PTR [edx-16] + pxor mm5,mm6 + psrlq mm1,14 + movq QWORD PTR 48[esp],mm4 + paddq xmm1,xmm7 + pand mm5,mm4 + psllq mm4,23 + paddq mm0,mm3 + movq mm3,mm1 + psrlq mm1,4 + pxor mm5,mm6 + pxor mm3,mm4 + psllq mm4,23 + pxor mm3,mm1 + movq QWORD PTR 16[esp],mm0 + paddq mm7,mm5 + pxor mm3,mm4 + psrlq mm1,23 + paddq mm7,QWORD PTR 8[esp] + pxor mm3,mm1 + psllq mm4,4 + pxor mm3,mm4 + movq mm4,QWORD PTR 40[esp] + paddq mm3,mm7 + movq mm5,mm0 + psrlq mm5,28 + paddq mm4,mm3 + movq mm6,mm0 + movq mm7,mm5 + psllq mm6,25 + movq mm1,QWORD PTR 24[esp] + psrlq mm5,6 + pxor mm7,mm6 + psllq mm6,5 + pxor mm7,mm5 + pxor mm0,mm1 + psrlq mm5,5 + pxor mm7,mm6 + pand mm2,mm0 + psllq mm6,6 + pxor mm7,mm5 + pxor mm2,mm1 + pxor mm6,mm7 + movq mm5,QWORD PTR 48[esp] + paddq mm2,mm6 + movq mm6,QWORD PTR 56[esp] + movq mm1,mm4 + movq mm7,QWORD PTR [edx-8] + pxor mm5,mm6 + psrlq mm1,14 + movq QWORD PTR 40[esp],mm4 + pand mm5,mm4 + psllq mm4,23 + paddq mm2,mm3 + movq mm3,mm1 + psrlq mm1,4 + pxor mm5,mm6 + pxor mm3,mm4 + psllq mm4,23 + pxor mm3,mm1 + movq QWORD PTR 8[esp],mm2 + paddq mm7,mm5 + pxor mm3,mm4 + psrlq mm1,23 + paddq mm7,QWORD PTR [esp] + pxor mm3,mm1 + psllq mm4,4 + pxor mm3,mm4 + movq mm4,QWORD PTR 32[esp] + paddq mm3,mm7 + movq mm5,mm2 + psrlq mm5,28 + paddq mm4,mm3 + movq mm6,mm2 + movq mm7,mm5 + psllq mm6,25 + movq mm1,QWORD PTR 16[esp] + psrlq mm5,6 + pxor mm7,mm6 + psllq mm6,5 + pxor mm7,mm5 + pxor mm2,mm1 + psrlq mm5,5 + pxor mm7,mm6 + pand mm0,mm2 + psllq mm6,6 + pxor mm7,mm5 + pxor mm0,mm1 + pxor mm6,mm7 + movq mm5,QWORD PTR 40[esp] + paddq mm0,mm6 + movq mm6,QWORD PTR 48[esp] + movdqa XMMWORD PTR [edx-16],xmm1 + lea ebp,DWORD PTR 128[ebp] + dec ecx + jnz $L00800_47_ssse3 + movdqa xmm1,XMMWORD PTR [ebp] + lea ebp,DWORD PTR [ebp-640] + movdqu xmm0,XMMWORD PTR [ebx] +DB 102,15,56,0,193 + movdqa xmm3,XMMWORD PTR [ebp] + movdqa xmm2,xmm1 + movdqu xmm1,XMMWORD PTR 16[ebx] + paddq xmm3,xmm0 +DB 102,15,56,0,202 + movq mm1,mm4 + movq mm7,QWORD PTR [edx-128] + pxor mm5,mm6 + psrlq mm1,14 + movq QWORD PTR 32[esp],mm4 + pand mm5,mm4 + psllq mm4,23 + paddq mm0,mm3 + movq mm3,mm1 + psrlq mm1,4 + pxor mm5,mm6 + pxor mm3,mm4 + psllq mm4,23 + pxor mm3,mm1 + movq QWORD PTR [esp],mm0 + paddq mm7,mm5 + pxor mm3,mm4 + psrlq mm1,23 + paddq mm7,QWORD PTR 56[esp] + pxor mm3,mm1 + psllq mm4,4 + pxor mm3,mm4 + movq mm4,QWORD PTR 24[esp] + paddq mm3,mm7 + movq mm5,mm0 + psrlq mm5,28 + paddq mm4,mm3 + movq mm6,mm0 + movq mm7,mm5 + psllq mm6,25 + movq mm1,QWORD PTR 8[esp] + psrlq mm5,6 + pxor mm7,mm6 + psllq mm6,5 + pxor mm7,mm5 + pxor mm0,mm1 + psrlq mm5,5 + pxor mm7,mm6 + pand mm2,mm0 + psllq mm6,6 + pxor mm7,mm5 + pxor mm2,mm1 + pxor mm6,mm7 + movq mm5,QWORD PTR 32[esp] + paddq mm2,mm6 + movq mm6,QWORD PTR 40[esp] + movq mm1,mm4 + movq mm7,QWORD PTR [edx-120] + pxor mm5,mm6 + psrlq mm1,14 + movq QWORD PTR 24[esp],mm4 + pand mm5,mm4 + psllq mm4,23 + paddq mm2,mm3 + movq mm3,mm1 + psrlq mm1,4 + pxor mm5,mm6 + pxor mm3,mm4 + psllq mm4,23 + pxor mm3,mm1 + movq QWORD PTR 56[esp],mm2 + paddq mm7,mm5 + pxor mm3,mm4 + psrlq mm1,23 + paddq mm7,QWORD PTR 48[esp] + pxor mm3,mm1 + psllq mm4,4 + pxor mm3,mm4 + movq mm4,QWORD PTR 16[esp] + paddq mm3,mm7 + movq mm5,mm2 + psrlq mm5,28 + paddq mm4,mm3 + movq mm6,mm2 + movq mm7,mm5 + psllq mm6,25 + movq mm1,QWORD PTR [esp] + psrlq mm5,6 + pxor mm7,mm6 + psllq mm6,5 + pxor mm7,mm5 + pxor mm2,mm1 + psrlq mm5,5 + pxor mm7,mm6 + pand mm0,mm2 + psllq mm6,6 + pxor mm7,mm5 + pxor mm0,mm1 + pxor mm6,mm7 + movq mm5,QWORD PTR 24[esp] + paddq mm0,mm6 + movq mm6,QWORD PTR 32[esp] + movdqa XMMWORD PTR [edx-128],xmm3 + movdqa xmm4,XMMWORD PTR 16[ebp] + movdqa xmm3,xmm2 + movdqu xmm2,XMMWORD PTR 32[ebx] + paddq xmm4,xmm1 +DB 102,15,56,0,211 + movq mm1,mm4 + movq mm7,QWORD PTR [edx-112] + pxor mm5,mm6 + psrlq mm1,14 + movq QWORD PTR 16[esp],mm4 + pand mm5,mm4 + psllq mm4,23 + paddq mm0,mm3 + movq mm3,mm1 + psrlq mm1,4 + pxor mm5,mm6 + pxor mm3,mm4 + psllq mm4,23 + pxor mm3,mm1 + movq QWORD PTR 48[esp],mm0 + paddq mm7,mm5 + pxor mm3,mm4 + psrlq mm1,23 + paddq mm7,QWORD PTR 40[esp] + pxor mm3,mm1 + psllq mm4,4 + pxor mm3,mm4 + movq mm4,QWORD PTR 8[esp] + paddq mm3,mm7 + movq mm5,mm0 + psrlq mm5,28 + paddq mm4,mm3 + movq mm6,mm0 + movq mm7,mm5 + psllq mm6,25 + movq mm1,QWORD PTR 56[esp] + psrlq mm5,6 + pxor mm7,mm6 + psllq mm6,5 + pxor mm7,mm5 + pxor mm0,mm1 + psrlq mm5,5 + pxor mm7,mm6 + pand mm2,mm0 + psllq mm6,6 + pxor mm7,mm5 + pxor mm2,mm1 + pxor mm6,mm7 + movq mm5,QWORD PTR 16[esp] + paddq mm2,mm6 + movq mm6,QWORD PTR 24[esp] + movq mm1,mm4 + movq mm7,QWORD PTR [edx-104] + pxor mm5,mm6 + psrlq mm1,14 + movq QWORD PTR 8[esp],mm4 + pand mm5,mm4 + psllq mm4,23 + paddq mm2,mm3 + movq mm3,mm1 + psrlq mm1,4 + pxor mm5,mm6 + pxor mm3,mm4 + psllq mm4,23 + pxor mm3,mm1 + movq QWORD PTR 40[esp],mm2 + paddq mm7,mm5 + pxor mm3,mm4 + psrlq mm1,23 + paddq mm7,QWORD PTR 32[esp] + pxor mm3,mm1 + psllq mm4,4 + pxor mm3,mm4 + movq mm4,QWORD PTR [esp] + paddq mm3,mm7 + movq mm5,mm2 + psrlq mm5,28 + paddq mm4,mm3 + movq mm6,mm2 + movq mm7,mm5 + psllq mm6,25 + movq mm1,QWORD PTR 48[esp] + psrlq mm5,6 + pxor mm7,mm6 + psllq mm6,5 + pxor mm7,mm5 + pxor mm2,mm1 + psrlq mm5,5 + pxor mm7,mm6 + pand mm0,mm2 + psllq mm6,6 + pxor mm7,mm5 + pxor mm0,mm1 + pxor mm6,mm7 + movq mm5,QWORD PTR 8[esp] + paddq mm0,mm6 + movq mm6,QWORD PTR 16[esp] + movdqa XMMWORD PTR [edx-112],xmm4 + movdqa xmm5,XMMWORD PTR 32[ebp] + movdqa xmm4,xmm3 + movdqu xmm3,XMMWORD PTR 48[ebx] + paddq xmm5,xmm2 +DB 102,15,56,0,220 + movq mm1,mm4 + movq mm7,QWORD PTR [edx-96] + pxor mm5,mm6 + psrlq mm1,14 + movq QWORD PTR [esp],mm4 + pand mm5,mm4 + psllq mm4,23 + paddq mm0,mm3 + movq mm3,mm1 + psrlq mm1,4 + pxor mm5,mm6 + pxor mm3,mm4 + psllq mm4,23 + pxor mm3,mm1 + movq QWORD PTR 32[esp],mm0 + paddq mm7,mm5 + pxor mm3,mm4 + psrlq mm1,23 + paddq mm7,QWORD PTR 24[esp] + pxor mm3,mm1 + psllq mm4,4 + pxor mm3,mm4 + movq mm4,QWORD PTR 56[esp] + paddq mm3,mm7 + movq mm5,mm0 + psrlq mm5,28 + paddq mm4,mm3 + movq mm6,mm0 + movq mm7,mm5 + psllq mm6,25 + movq mm1,QWORD PTR 40[esp] + psrlq mm5,6 + pxor mm7,mm6 + psllq mm6,5 + pxor mm7,mm5 + pxor mm0,mm1 + psrlq mm5,5 + pxor mm7,mm6 + pand mm2,mm0 + psllq mm6,6 + pxor mm7,mm5 + pxor mm2,mm1 + pxor mm6,mm7 + movq mm5,QWORD PTR [esp] + paddq mm2,mm6 + movq mm6,QWORD PTR 8[esp] + movq mm1,mm4 + movq mm7,QWORD PTR [edx-88] + pxor mm5,mm6 + psrlq mm1,14 + movq QWORD PTR 56[esp],mm4 + pand mm5,mm4 + psllq mm4,23 + paddq mm2,mm3 + movq mm3,mm1 + psrlq mm1,4 + pxor mm5,mm6 + pxor mm3,mm4 + psllq mm4,23 + pxor mm3,mm1 + movq QWORD PTR 24[esp],mm2 + paddq mm7,mm5 + pxor mm3,mm4 + psrlq mm1,23 + paddq mm7,QWORD PTR 16[esp] + pxor mm3,mm1 + psllq mm4,4 + pxor mm3,mm4 + movq mm4,QWORD PTR 48[esp] + paddq mm3,mm7 + movq mm5,mm2 + psrlq mm5,28 + paddq mm4,mm3 + movq mm6,mm2 + movq mm7,mm5 + psllq mm6,25 + movq mm1,QWORD PTR 32[esp] + psrlq mm5,6 + pxor mm7,mm6 + psllq mm6,5 + pxor mm7,mm5 + pxor mm2,mm1 + psrlq mm5,5 + pxor mm7,mm6 + pand mm0,mm2 + psllq mm6,6 + pxor mm7,mm5 + pxor mm0,mm1 + pxor mm6,mm7 + movq mm5,QWORD PTR 56[esp] + paddq mm0,mm6 + movq mm6,QWORD PTR [esp] + movdqa XMMWORD PTR [edx-96],xmm5 + movdqa xmm6,XMMWORD PTR 48[ebp] + movdqa xmm5,xmm4 + movdqu xmm4,XMMWORD PTR 64[ebx] + paddq xmm6,xmm3 +DB 102,15,56,0,229 + movq mm1,mm4 + movq mm7,QWORD PTR [edx-80] + pxor mm5,mm6 + psrlq mm1,14 + movq QWORD PTR 48[esp],mm4 + pand mm5,mm4 + psllq mm4,23 + paddq mm0,mm3 + movq mm3,mm1 + psrlq mm1,4 + pxor mm5,mm6 + pxor mm3,mm4 + psllq mm4,23 + pxor mm3,mm1 + movq QWORD PTR 16[esp],mm0 + paddq mm7,mm5 + pxor mm3,mm4 + psrlq mm1,23 + paddq mm7,QWORD PTR 8[esp] + pxor mm3,mm1 + psllq mm4,4 + pxor mm3,mm4 + movq mm4,QWORD PTR 40[esp] + paddq mm3,mm7 + movq mm5,mm0 + psrlq mm5,28 + paddq mm4,mm3 + movq mm6,mm0 + movq mm7,mm5 + psllq mm6,25 + movq mm1,QWORD PTR 24[esp] + psrlq mm5,6 + pxor mm7,mm6 + psllq mm6,5 + pxor mm7,mm5 + pxor mm0,mm1 + psrlq mm5,5 + pxor mm7,mm6 + pand mm2,mm0 + psllq mm6,6 + pxor mm7,mm5 + pxor mm2,mm1 + pxor mm6,mm7 + movq mm5,QWORD PTR 48[esp] + paddq mm2,mm6 + movq mm6,QWORD PTR 56[esp] + movq mm1,mm4 + movq mm7,QWORD PTR [edx-72] + pxor mm5,mm6 + psrlq mm1,14 + movq QWORD PTR 40[esp],mm4 + pand mm5,mm4 + psllq mm4,23 + paddq mm2,mm3 + movq mm3,mm1 + psrlq mm1,4 + pxor mm5,mm6 + pxor mm3,mm4 + psllq mm4,23 + pxor mm3,mm1 + movq QWORD PTR 8[esp],mm2 + paddq mm7,mm5 + pxor mm3,mm4 + psrlq mm1,23 + paddq mm7,QWORD PTR [esp] + pxor mm3,mm1 + psllq mm4,4 + pxor mm3,mm4 + movq mm4,QWORD PTR 32[esp] + paddq mm3,mm7 + movq mm5,mm2 + psrlq mm5,28 + paddq mm4,mm3 + movq mm6,mm2 + movq mm7,mm5 + psllq mm6,25 + movq mm1,QWORD PTR 16[esp] + psrlq mm5,6 + pxor mm7,mm6 + psllq mm6,5 + pxor mm7,mm5 + pxor mm2,mm1 + psrlq mm5,5 + pxor mm7,mm6 + pand mm0,mm2 + psllq mm6,6 + pxor mm7,mm5 + pxor mm0,mm1 + pxor mm6,mm7 + movq mm5,QWORD PTR 40[esp] + paddq mm0,mm6 + movq mm6,QWORD PTR 48[esp] + movdqa XMMWORD PTR [edx-80],xmm6 + movdqa xmm7,XMMWORD PTR 64[ebp] + movdqa xmm6,xmm5 + movdqu xmm5,XMMWORD PTR 80[ebx] + paddq xmm7,xmm4 +DB 102,15,56,0,238 + movq mm1,mm4 + movq mm7,QWORD PTR [edx-64] + pxor mm5,mm6 + psrlq mm1,14 + movq QWORD PTR 32[esp],mm4 + pand mm5,mm4 + psllq mm4,23 + paddq mm0,mm3 + movq mm3,mm1 + psrlq mm1,4 + pxor mm5,mm6 + pxor mm3,mm4 + psllq mm4,23 + pxor mm3,mm1 + movq QWORD PTR [esp],mm0 + paddq mm7,mm5 + pxor mm3,mm4 + psrlq mm1,23 + paddq mm7,QWORD PTR 56[esp] + pxor mm3,mm1 + psllq mm4,4 + pxor mm3,mm4 + movq mm4,QWORD PTR 24[esp] + paddq mm3,mm7 + movq mm5,mm0 + psrlq mm5,28 + paddq mm4,mm3 + movq mm6,mm0 + movq mm7,mm5 + psllq mm6,25 + movq mm1,QWORD PTR 8[esp] + psrlq mm5,6 + pxor mm7,mm6 + psllq mm6,5 + pxor mm7,mm5 + pxor mm0,mm1 + psrlq mm5,5 + pxor mm7,mm6 + pand mm2,mm0 + psllq mm6,6 + pxor mm7,mm5 + pxor mm2,mm1 + pxor mm6,mm7 + movq mm5,QWORD PTR 32[esp] + paddq mm2,mm6 + movq mm6,QWORD PTR 40[esp] + movq mm1,mm4 + movq mm7,QWORD PTR [edx-56] + pxor mm5,mm6 + psrlq mm1,14 + movq QWORD PTR 24[esp],mm4 + pand mm5,mm4 + psllq mm4,23 + paddq mm2,mm3 + movq mm3,mm1 + psrlq mm1,4 + pxor mm5,mm6 + pxor mm3,mm4 + psllq mm4,23 + pxor mm3,mm1 + movq QWORD PTR 56[esp],mm2 + paddq mm7,mm5 + pxor mm3,mm4 + psrlq mm1,23 + paddq mm7,QWORD PTR 48[esp] + pxor mm3,mm1 + psllq mm4,4 + pxor mm3,mm4 + movq mm4,QWORD PTR 16[esp] + paddq mm3,mm7 + movq mm5,mm2 + psrlq mm5,28 + paddq mm4,mm3 + movq mm6,mm2 + movq mm7,mm5 + psllq mm6,25 + movq mm1,QWORD PTR [esp] + psrlq mm5,6 + pxor mm7,mm6 + psllq mm6,5 + pxor mm7,mm5 + pxor mm2,mm1 + psrlq mm5,5 + pxor mm7,mm6 + pand mm0,mm2 + psllq mm6,6 + pxor mm7,mm5 + pxor mm0,mm1 + pxor mm6,mm7 + movq mm5,QWORD PTR 24[esp] + paddq mm0,mm6 + movq mm6,QWORD PTR 32[esp] + movdqa XMMWORD PTR [edx-64],xmm7 + movdqa XMMWORD PTR [edx],xmm0 + movdqa xmm0,XMMWORD PTR 80[ebp] + movdqa xmm7,xmm6 + movdqu xmm6,XMMWORD PTR 96[ebx] + paddq xmm0,xmm5 +DB 102,15,56,0,247 + movq mm1,mm4 + movq mm7,QWORD PTR [edx-48] + pxor mm5,mm6 + psrlq mm1,14 + movq QWORD PTR 16[esp],mm4 + pand mm5,mm4 + psllq mm4,23 + paddq mm0,mm3 + movq mm3,mm1 + psrlq mm1,4 + pxor mm5,mm6 + pxor mm3,mm4 + psllq mm4,23 + pxor mm3,mm1 + movq QWORD PTR 48[esp],mm0 + paddq mm7,mm5 + pxor mm3,mm4 + psrlq mm1,23 + paddq mm7,QWORD PTR 40[esp] + pxor mm3,mm1 + psllq mm4,4 + pxor mm3,mm4 + movq mm4,QWORD PTR 8[esp] + paddq mm3,mm7 + movq mm5,mm0 + psrlq mm5,28 + paddq mm4,mm3 + movq mm6,mm0 + movq mm7,mm5 + psllq mm6,25 + movq mm1,QWORD PTR 56[esp] + psrlq mm5,6 + pxor mm7,mm6 + psllq mm6,5 + pxor mm7,mm5 + pxor mm0,mm1 + psrlq mm5,5 + pxor mm7,mm6 + pand mm2,mm0 + psllq mm6,6 + pxor mm7,mm5 + pxor mm2,mm1 + pxor mm6,mm7 + movq mm5,QWORD PTR 16[esp] + paddq mm2,mm6 + movq mm6,QWORD PTR 24[esp] + movq mm1,mm4 + movq mm7,QWORD PTR [edx-40] + pxor mm5,mm6 + psrlq mm1,14 + movq QWORD PTR 8[esp],mm4 + pand mm5,mm4 + psllq mm4,23 + paddq mm2,mm3 + movq mm3,mm1 + psrlq mm1,4 + pxor mm5,mm6 + pxor mm3,mm4 + psllq mm4,23 + pxor mm3,mm1 + movq QWORD PTR 40[esp],mm2 + paddq mm7,mm5 + pxor mm3,mm4 + psrlq mm1,23 + paddq mm7,QWORD PTR 32[esp] + pxor mm3,mm1 + psllq mm4,4 + pxor mm3,mm4 + movq mm4,QWORD PTR [esp] + paddq mm3,mm7 + movq mm5,mm2 + psrlq mm5,28 + paddq mm4,mm3 + movq mm6,mm2 + movq mm7,mm5 + psllq mm6,25 + movq mm1,QWORD PTR 48[esp] + psrlq mm5,6 + pxor mm7,mm6 + psllq mm6,5 + pxor mm7,mm5 + pxor mm2,mm1 + psrlq mm5,5 + pxor mm7,mm6 + pand mm0,mm2 + psllq mm6,6 + pxor mm7,mm5 + pxor mm0,mm1 + pxor mm6,mm7 + movq mm5,QWORD PTR 8[esp] + paddq mm0,mm6 + movq mm6,QWORD PTR 16[esp] + movdqa XMMWORD PTR [edx-48],xmm0 + movdqa XMMWORD PTR 16[edx],xmm1 + movdqa xmm1,XMMWORD PTR 96[ebp] + movdqa xmm0,xmm7 + movdqu xmm7,XMMWORD PTR 112[ebx] + paddq xmm1,xmm6 +DB 102,15,56,0,248 + movq mm1,mm4 + movq mm7,QWORD PTR [edx-32] + pxor mm5,mm6 + psrlq mm1,14 + movq QWORD PTR [esp],mm4 + pand mm5,mm4 + psllq mm4,23 + paddq mm0,mm3 + movq mm3,mm1 + psrlq mm1,4 + pxor mm5,mm6 + pxor mm3,mm4 + psllq mm4,23 + pxor mm3,mm1 + movq QWORD PTR 32[esp],mm0 + paddq mm7,mm5 + pxor mm3,mm4 + psrlq mm1,23 + paddq mm7,QWORD PTR 24[esp] + pxor mm3,mm1 + psllq mm4,4 + pxor mm3,mm4 + movq mm4,QWORD PTR 56[esp] + paddq mm3,mm7 + movq mm5,mm0 + psrlq mm5,28 + paddq mm4,mm3 + movq mm6,mm0 + movq mm7,mm5 + psllq mm6,25 + movq mm1,QWORD PTR 40[esp] + psrlq mm5,6 + pxor mm7,mm6 + psllq mm6,5 + pxor mm7,mm5 + pxor mm0,mm1 + psrlq mm5,5 + pxor mm7,mm6 + pand mm2,mm0 + psllq mm6,6 + pxor mm7,mm5 + pxor mm2,mm1 + pxor mm6,mm7 + movq mm5,QWORD PTR [esp] + paddq mm2,mm6 + movq mm6,QWORD PTR 8[esp] + movq mm1,mm4 + movq mm7,QWORD PTR [edx-24] + pxor mm5,mm6 + psrlq mm1,14 + movq QWORD PTR 56[esp],mm4 + pand mm5,mm4 + psllq mm4,23 + paddq mm2,mm3 + movq mm3,mm1 + psrlq mm1,4 + pxor mm5,mm6 + pxor mm3,mm4 + psllq mm4,23 + pxor mm3,mm1 + movq QWORD PTR 24[esp],mm2 + paddq mm7,mm5 + pxor mm3,mm4 + psrlq mm1,23 + paddq mm7,QWORD PTR 16[esp] + pxor mm3,mm1 + psllq mm4,4 + pxor mm3,mm4 + movq mm4,QWORD PTR 48[esp] + paddq mm3,mm7 + movq mm5,mm2 + psrlq mm5,28 + paddq mm4,mm3 + movq mm6,mm2 + movq mm7,mm5 + psllq mm6,25 + movq mm1,QWORD PTR 32[esp] + psrlq mm5,6 + pxor mm7,mm6 + psllq mm6,5 + pxor mm7,mm5 + pxor mm2,mm1 + psrlq mm5,5 + pxor mm7,mm6 + pand mm0,mm2 + psllq mm6,6 + pxor mm7,mm5 + pxor mm0,mm1 + pxor mm6,mm7 + movq mm5,QWORD PTR 56[esp] + paddq mm0,mm6 + movq mm6,QWORD PTR [esp] + movdqa XMMWORD PTR [edx-32],xmm1 + movdqa XMMWORD PTR 32[edx],xmm2 + movdqa xmm2,XMMWORD PTR 112[ebp] + movdqa xmm0,XMMWORD PTR [edx] + paddq xmm2,xmm7 + movq mm1,mm4 + movq mm7,QWORD PTR [edx-16] + pxor mm5,mm6 + psrlq mm1,14 + movq QWORD PTR 48[esp],mm4 + pand mm5,mm4 + psllq mm4,23 + paddq mm0,mm3 + movq mm3,mm1 + psrlq mm1,4 + pxor mm5,mm6 + pxor mm3,mm4 + psllq mm4,23 + pxor mm3,mm1 + movq QWORD PTR 16[esp],mm0 + paddq mm7,mm5 + pxor mm3,mm4 + psrlq mm1,23 + paddq mm7,QWORD PTR 8[esp] + pxor mm3,mm1 + psllq mm4,4 + pxor mm3,mm4 + movq mm4,QWORD PTR 40[esp] + paddq mm3,mm7 + movq mm5,mm0 + psrlq mm5,28 + paddq mm4,mm3 + movq mm6,mm0 + movq mm7,mm5 + psllq mm6,25 + movq mm1,QWORD PTR 24[esp] + psrlq mm5,6 + pxor mm7,mm6 + psllq mm6,5 + pxor mm7,mm5 + pxor mm0,mm1 + psrlq mm5,5 + pxor mm7,mm6 + pand mm2,mm0 + psllq mm6,6 + pxor mm7,mm5 + pxor mm2,mm1 + pxor mm6,mm7 + movq mm5,QWORD PTR 48[esp] + paddq mm2,mm6 + movq mm6,QWORD PTR 56[esp] + movq mm1,mm4 + movq mm7,QWORD PTR [edx-8] + pxor mm5,mm6 + psrlq mm1,14 + movq QWORD PTR 40[esp],mm4 + pand mm5,mm4 + psllq mm4,23 + paddq mm2,mm3 + movq mm3,mm1 + psrlq mm1,4 + pxor mm5,mm6 + pxor mm3,mm4 + psllq mm4,23 + pxor mm3,mm1 + movq QWORD PTR 8[esp],mm2 + paddq mm7,mm5 + pxor mm3,mm4 + psrlq mm1,23 + paddq mm7,QWORD PTR [esp] + pxor mm3,mm1 + psllq mm4,4 + pxor mm3,mm4 + movq mm4,QWORD PTR 32[esp] + paddq mm3,mm7 + movq mm5,mm2 + psrlq mm5,28 + paddq mm4,mm3 + movq mm6,mm2 + movq mm7,mm5 + psllq mm6,25 + movq mm1,QWORD PTR 16[esp] + psrlq mm5,6 + pxor mm7,mm6 + psllq mm6,5 + pxor mm7,mm5 + pxor mm2,mm1 + psrlq mm5,5 + pxor mm7,mm6 + pand mm0,mm2 + psllq mm6,6 + pxor mm7,mm5 + pxor mm0,mm1 + pxor mm6,mm7 + movq mm5,QWORD PTR 40[esp] + paddq mm0,mm6 + movq mm6,QWORD PTR 48[esp] + movdqa XMMWORD PTR [edx-16],xmm2 + movq mm1,QWORD PTR 8[esp] + paddq mm0,mm3 + movq mm3,QWORD PTR 24[esp] + movq mm7,QWORD PTR 56[esp] + pxor mm2,mm1 + paddq mm0,QWORD PTR [esi] + paddq mm1,QWORD PTR 8[esi] + paddq mm2,QWORD PTR 16[esi] + paddq mm3,QWORD PTR 24[esi] + paddq mm4,QWORD PTR 32[esi] + paddq mm5,QWORD PTR 40[esi] + paddq mm6,QWORD PTR 48[esi] + paddq mm7,QWORD PTR 56[esi] + movq QWORD PTR [esi],mm0 + movq QWORD PTR 8[esi],mm1 + movq QWORD PTR 16[esi],mm2 + movq QWORD PTR 24[esi],mm3 + movq QWORD PTR 32[esi],mm4 + movq QWORD PTR 40[esi],mm5 + movq QWORD PTR 48[esi],mm6 + movq QWORD PTR 56[esi],mm7 + cmp edi,eax + jb $L007loop_ssse3 + mov esp,DWORD PTR 76[edx] + emms + pop edi + pop esi + pop ebx + pop ebp + ret ALIGN 16 $L002loop_x86: mov eax,DWORD PTR [edi] @@ -138,7 +2409,7 @@ $L002loop_x86: mov ecx,16 DD 2784229001 ALIGN 16 -$L00300_15_x86: +$L00900_15_x86: mov ecx,DWORD PTR 40[esp] mov edx,DWORD PTR 44[esp] mov esi,ecx @@ -245,9 +2516,9 @@ $L00300_15_x86: sub esp,8 lea ebp,DWORD PTR 8[ebp] cmp dl,148 - jne $L00300_15_x86 + jne $L00900_15_x86 ALIGN 16 -$L00416_79_x86: +$L01016_79_x86: mov ecx,DWORD PTR 312[esp] mov edx,DWORD PTR 316[esp] mov esi,ecx @@ -420,7 +2691,7 @@ $L00416_79_x86: sub esp,8 lea ebp,DWORD PTR 8[ebp] cmp dl,23 - jne $L00416_79_x86 + jne $L01016_79_x86 mov esi,DWORD PTR 840[esp] mov edi,DWORD PTR 844[esp] mov eax,DWORD PTR [esi] @@ -563,6 +2834,8 @@ DD 3409855158,1288033470 DD 4234509866,1501505948 DD 987167468,1607167915 DD 1246189591,1816402316 +DD 67438087,66051 +DD 202182159,134810123 _sha512_block_data_order ENDP DB 83,72,65,53,49,50,32,98,108,111,99,107,32,116,114,97 DB 110,115,102,111,114,109,32,102,111,114,32,120,56,54,44,32 @@ -570,4 +2843,7 @@ DB 67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97 DB 112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103 DB 62,0 .text$ ENDS +.bss SEGMENT 'BSS' +COMM _OPENSSL_ia32cap_P:DWORD:4 +.bss ENDS END diff --git a/deps/openssl/asm/x86-win32-masm/whrlpool/wp-mmx.asm b/deps/openssl/asm/x86-win32-masm/whrlpool/wp-mmx.asm index 9fa36662ce9a02..22a17d3510940c 100644 --- a/deps/openssl/asm/x86-win32-masm/whrlpool/wp-mmx.asm +++ b/deps/openssl/asm/x86-win32-masm/whrlpool/wp-mmx.asm @@ -81,228 +81,230 @@ $L003round: movq mm0,QWORD PTR 4096[esi*8+ebp] mov eax,DWORD PTR [esp] mov ebx,DWORD PTR 4[esp] - mov cl,al - mov dl,ah + movzx ecx,al + movzx edx,ah + shr eax,16 lea esi,DWORD PTR [ecx*1+ecx] + movzx ecx,al lea edi,DWORD PTR [edx*1+edx] - shr eax,16 + movzx edx,ah pxor mm0,QWORD PTR [esi*8+ebp] movq mm1,QWORD PTR 7[edi*8+ebp] - mov cl,al - mov dl,ah mov eax,DWORD PTR 8[esp] lea esi,DWORD PTR [ecx*1+ecx] + movzx ecx,bl lea edi,DWORD PTR [edx*1+edx] + movzx edx,bh movq mm2,QWORD PTR 6[esi*8+ebp] movq mm3,QWORD PTR 5[edi*8+ebp] - mov cl,bl - mov dl,bh + shr ebx,16 lea esi,DWORD PTR [ecx*1+ecx] + movzx ecx,bl lea edi,DWORD PTR [edx*1+edx] - shr ebx,16 + movzx edx,bh movq mm4,QWORD PTR 4[esi*8+ebp] movq mm5,QWORD PTR 3[edi*8+ebp] - mov cl,bl - mov dl,bh mov ebx,DWORD PTR 12[esp] lea esi,DWORD PTR [ecx*1+ecx] + movzx ecx,al lea edi,DWORD PTR [edx*1+edx] + movzx edx,ah movq mm6,QWORD PTR 2[esi*8+ebp] movq mm7,QWORD PTR 1[edi*8+ebp] - mov cl,al - mov dl,ah + shr eax,16 lea esi,DWORD PTR [ecx*1+ecx] + movzx ecx,al lea edi,DWORD PTR [edx*1+edx] - shr eax,16 + movzx edx,ah pxor mm1,QWORD PTR [esi*8+ebp] pxor mm2,QWORD PTR 7[edi*8+ebp] - mov cl,al - mov dl,ah mov eax,DWORD PTR 16[esp] lea esi,DWORD PTR [ecx*1+ecx] + movzx ecx,bl lea edi,DWORD PTR [edx*1+edx] + movzx edx,bh pxor mm3,QWORD PTR 6[esi*8+ebp] pxor mm4,QWORD PTR 5[edi*8+ebp] - mov cl,bl - mov dl,bh + shr ebx,16 lea esi,DWORD PTR [ecx*1+ecx] + movzx ecx,bl lea edi,DWORD PTR [edx*1+edx] - shr ebx,16 + movzx edx,bh pxor mm5,QWORD PTR 4[esi*8+ebp] pxor mm6,QWORD PTR 3[edi*8+ebp] - mov cl,bl - mov dl,bh mov ebx,DWORD PTR 20[esp] lea esi,DWORD PTR [ecx*1+ecx] + movzx ecx,al lea edi,DWORD PTR [edx*1+edx] + movzx edx,ah pxor mm7,QWORD PTR 2[esi*8+ebp] pxor mm0,QWORD PTR 1[edi*8+ebp] - mov cl,al - mov dl,ah + shr eax,16 lea esi,DWORD PTR [ecx*1+ecx] + movzx ecx,al lea edi,DWORD PTR [edx*1+edx] - shr eax,16 + movzx edx,ah pxor mm2,QWORD PTR [esi*8+ebp] pxor mm3,QWORD PTR 7[edi*8+ebp] - mov cl,al - mov dl,ah mov eax,DWORD PTR 24[esp] lea esi,DWORD PTR [ecx*1+ecx] + movzx ecx,bl lea edi,DWORD PTR [edx*1+edx] + movzx edx,bh pxor mm4,QWORD PTR 6[esi*8+ebp] pxor mm5,QWORD PTR 5[edi*8+ebp] - mov cl,bl - mov dl,bh + shr ebx,16 lea esi,DWORD PTR [ecx*1+ecx] + movzx ecx,bl lea edi,DWORD PTR [edx*1+edx] - shr ebx,16 + movzx edx,bh pxor mm6,QWORD PTR 4[esi*8+ebp] pxor mm7,QWORD PTR 3[edi*8+ebp] - mov cl,bl - mov dl,bh mov ebx,DWORD PTR 28[esp] lea esi,DWORD PTR [ecx*1+ecx] + movzx ecx,al lea edi,DWORD PTR [edx*1+edx] + movzx edx,ah pxor mm0,QWORD PTR 2[esi*8+ebp] pxor mm1,QWORD PTR 1[edi*8+ebp] - mov cl,al - mov dl,ah + shr eax,16 lea esi,DWORD PTR [ecx*1+ecx] + movzx ecx,al lea edi,DWORD PTR [edx*1+edx] - shr eax,16 + movzx edx,ah pxor mm3,QWORD PTR [esi*8+ebp] pxor mm4,QWORD PTR 7[edi*8+ebp] - mov cl,al - mov dl,ah mov eax,DWORD PTR 32[esp] lea esi,DWORD PTR [ecx*1+ecx] + movzx ecx,bl lea edi,DWORD PTR [edx*1+edx] + movzx edx,bh pxor mm5,QWORD PTR 6[esi*8+ebp] pxor mm6,QWORD PTR 5[edi*8+ebp] - mov cl,bl - mov dl,bh + shr ebx,16 lea esi,DWORD PTR [ecx*1+ecx] + movzx ecx,bl lea edi,DWORD PTR [edx*1+edx] - shr ebx,16 + movzx edx,bh pxor mm7,QWORD PTR 4[esi*8+ebp] pxor mm0,QWORD PTR 3[edi*8+ebp] - mov cl,bl - mov dl,bh mov ebx,DWORD PTR 36[esp] lea esi,DWORD PTR [ecx*1+ecx] + movzx ecx,al lea edi,DWORD PTR [edx*1+edx] + movzx edx,ah pxor mm1,QWORD PTR 2[esi*8+ebp] pxor mm2,QWORD PTR 1[edi*8+ebp] - mov cl,al - mov dl,ah + shr eax,16 lea esi,DWORD PTR [ecx*1+ecx] + movzx ecx,al lea edi,DWORD PTR [edx*1+edx] - shr eax,16 + movzx edx,ah pxor mm4,QWORD PTR [esi*8+ebp] pxor mm5,QWORD PTR 7[edi*8+ebp] - mov cl,al - mov dl,ah mov eax,DWORD PTR 40[esp] lea esi,DWORD PTR [ecx*1+ecx] + movzx ecx,bl lea edi,DWORD PTR [edx*1+edx] + movzx edx,bh pxor mm6,QWORD PTR 6[esi*8+ebp] pxor mm7,QWORD PTR 5[edi*8+ebp] - mov cl,bl - mov dl,bh + shr ebx,16 lea esi,DWORD PTR [ecx*1+ecx] + movzx ecx,bl lea edi,DWORD PTR [edx*1+edx] - shr ebx,16 + movzx edx,bh pxor mm0,QWORD PTR 4[esi*8+ebp] pxor mm1,QWORD PTR 3[edi*8+ebp] - mov cl,bl - mov dl,bh mov ebx,DWORD PTR 44[esp] lea esi,DWORD PTR [ecx*1+ecx] + movzx ecx,al lea edi,DWORD PTR [edx*1+edx] + movzx edx,ah pxor mm2,QWORD PTR 2[esi*8+ebp] pxor mm3,QWORD PTR 1[edi*8+ebp] - mov cl,al - mov dl,ah + shr eax,16 lea esi,DWORD PTR [ecx*1+ecx] + movzx ecx,al lea edi,DWORD PTR [edx*1+edx] - shr eax,16 + movzx edx,ah pxor mm5,QWORD PTR [esi*8+ebp] pxor mm6,QWORD PTR 7[edi*8+ebp] - mov cl,al - mov dl,ah mov eax,DWORD PTR 48[esp] lea esi,DWORD PTR [ecx*1+ecx] + movzx ecx,bl lea edi,DWORD PTR [edx*1+edx] + movzx edx,bh pxor mm7,QWORD PTR 6[esi*8+ebp] pxor mm0,QWORD PTR 5[edi*8+ebp] - mov cl,bl - mov dl,bh + shr ebx,16 lea esi,DWORD PTR [ecx*1+ecx] + movzx ecx,bl lea edi,DWORD PTR [edx*1+edx] - shr ebx,16 + movzx edx,bh pxor mm1,QWORD PTR 4[esi*8+ebp] pxor mm2,QWORD PTR 3[edi*8+ebp] - mov cl,bl - mov dl,bh mov ebx,DWORD PTR 52[esp] lea esi,DWORD PTR [ecx*1+ecx] + movzx ecx,al lea edi,DWORD PTR [edx*1+edx] + movzx edx,ah pxor mm3,QWORD PTR 2[esi*8+ebp] pxor mm4,QWORD PTR 1[edi*8+ebp] - mov cl,al - mov dl,ah + shr eax,16 lea esi,DWORD PTR [ecx*1+ecx] + movzx ecx,al lea edi,DWORD PTR [edx*1+edx] - shr eax,16 + movzx edx,ah pxor mm6,QWORD PTR [esi*8+ebp] pxor mm7,QWORD PTR 7[edi*8+ebp] - mov cl,al - mov dl,ah mov eax,DWORD PTR 56[esp] lea esi,DWORD PTR [ecx*1+ecx] + movzx ecx,bl lea edi,DWORD PTR [edx*1+edx] + movzx edx,bh pxor mm0,QWORD PTR 6[esi*8+ebp] pxor mm1,QWORD PTR 5[edi*8+ebp] - mov cl,bl - mov dl,bh + shr ebx,16 lea esi,DWORD PTR [ecx*1+ecx] + movzx ecx,bl lea edi,DWORD PTR [edx*1+edx] - shr ebx,16 + movzx edx,bh pxor mm2,QWORD PTR 4[esi*8+ebp] pxor mm3,QWORD PTR 3[edi*8+ebp] - mov cl,bl - mov dl,bh mov ebx,DWORD PTR 60[esp] lea esi,DWORD PTR [ecx*1+ecx] + movzx ecx,al lea edi,DWORD PTR [edx*1+edx] + movzx edx,ah pxor mm4,QWORD PTR 2[esi*8+ebp] pxor mm5,QWORD PTR 1[edi*8+ebp] - mov cl,al - mov dl,ah + shr eax,16 lea esi,DWORD PTR [ecx*1+ecx] + movzx ecx,al lea edi,DWORD PTR [edx*1+edx] - shr eax,16 + movzx edx,ah pxor mm7,QWORD PTR [esi*8+ebp] pxor mm0,QWORD PTR 7[edi*8+ebp] - mov cl,al - mov dl,ah mov eax,DWORD PTR 64[esp] lea esi,DWORD PTR [ecx*1+ecx] + movzx ecx,bl lea edi,DWORD PTR [edx*1+edx] + movzx edx,bh pxor mm1,QWORD PTR 6[esi*8+ebp] pxor mm2,QWORD PTR 5[edi*8+ebp] - mov cl,bl - mov dl,bh + shr ebx,16 lea esi,DWORD PTR [ecx*1+ecx] + movzx ecx,bl lea edi,DWORD PTR [edx*1+edx] - shr ebx,16 + movzx edx,bh pxor mm3,QWORD PTR 4[esi*8+ebp] pxor mm4,QWORD PTR 3[edi*8+ebp] - mov cl,bl - mov dl,bh mov ebx,DWORD PTR 68[esp] lea esi,DWORD PTR [ecx*1+ecx] + movzx ecx,al lea edi,DWORD PTR [edx*1+edx] + movzx edx,ah pxor mm5,QWORD PTR 2[esi*8+ebp] pxor mm6,QWORD PTR 1[edi*8+ebp] movq QWORD PTR [esp],mm0 @@ -313,226 +315,226 @@ $L003round: movq QWORD PTR 40[esp],mm5 movq QWORD PTR 48[esp],mm6 movq QWORD PTR 56[esp],mm7 - mov cl,al - mov dl,ah + shr eax,16 lea esi,DWORD PTR [ecx*1+ecx] + movzx ecx,al lea edi,DWORD PTR [edx*1+edx] - shr eax,16 + movzx edx,ah pxor mm0,QWORD PTR [esi*8+ebp] pxor mm1,QWORD PTR 7[edi*8+ebp] - mov cl,al - mov dl,ah mov eax,DWORD PTR 72[esp] lea esi,DWORD PTR [ecx*1+ecx] + movzx ecx,bl lea edi,DWORD PTR [edx*1+edx] + movzx edx,bh pxor mm2,QWORD PTR 6[esi*8+ebp] pxor mm3,QWORD PTR 5[edi*8+ebp] - mov cl,bl - mov dl,bh + shr ebx,16 lea esi,DWORD PTR [ecx*1+ecx] + movzx ecx,bl lea edi,DWORD PTR [edx*1+edx] - shr ebx,16 + movzx edx,bh pxor mm4,QWORD PTR 4[esi*8+ebp] pxor mm5,QWORD PTR 3[edi*8+ebp] - mov cl,bl - mov dl,bh mov ebx,DWORD PTR 76[esp] lea esi,DWORD PTR [ecx*1+ecx] + movzx ecx,al lea edi,DWORD PTR [edx*1+edx] + movzx edx,ah pxor mm6,QWORD PTR 2[esi*8+ebp] pxor mm7,QWORD PTR 1[edi*8+ebp] - mov cl,al - mov dl,ah + shr eax,16 lea esi,DWORD PTR [ecx*1+ecx] + movzx ecx,al lea edi,DWORD PTR [edx*1+edx] - shr eax,16 + movzx edx,ah pxor mm1,QWORD PTR [esi*8+ebp] pxor mm2,QWORD PTR 7[edi*8+ebp] - mov cl,al - mov dl,ah mov eax,DWORD PTR 80[esp] lea esi,DWORD PTR [ecx*1+ecx] + movzx ecx,bl lea edi,DWORD PTR [edx*1+edx] + movzx edx,bh pxor mm3,QWORD PTR 6[esi*8+ebp] pxor mm4,QWORD PTR 5[edi*8+ebp] - mov cl,bl - mov dl,bh + shr ebx,16 lea esi,DWORD PTR [ecx*1+ecx] + movzx ecx,bl lea edi,DWORD PTR [edx*1+edx] - shr ebx,16 + movzx edx,bh pxor mm5,QWORD PTR 4[esi*8+ebp] pxor mm6,QWORD PTR 3[edi*8+ebp] - mov cl,bl - mov dl,bh mov ebx,DWORD PTR 84[esp] lea esi,DWORD PTR [ecx*1+ecx] + movzx ecx,al lea edi,DWORD PTR [edx*1+edx] + movzx edx,ah pxor mm7,QWORD PTR 2[esi*8+ebp] pxor mm0,QWORD PTR 1[edi*8+ebp] - mov cl,al - mov dl,ah + shr eax,16 lea esi,DWORD PTR [ecx*1+ecx] + movzx ecx,al lea edi,DWORD PTR [edx*1+edx] - shr eax,16 + movzx edx,ah pxor mm2,QWORD PTR [esi*8+ebp] pxor mm3,QWORD PTR 7[edi*8+ebp] - mov cl,al - mov dl,ah mov eax,DWORD PTR 88[esp] lea esi,DWORD PTR [ecx*1+ecx] + movzx ecx,bl lea edi,DWORD PTR [edx*1+edx] + movzx edx,bh pxor mm4,QWORD PTR 6[esi*8+ebp] pxor mm5,QWORD PTR 5[edi*8+ebp] - mov cl,bl - mov dl,bh + shr ebx,16 lea esi,DWORD PTR [ecx*1+ecx] + movzx ecx,bl lea edi,DWORD PTR [edx*1+edx] - shr ebx,16 + movzx edx,bh pxor mm6,QWORD PTR 4[esi*8+ebp] pxor mm7,QWORD PTR 3[edi*8+ebp] - mov cl,bl - mov dl,bh mov ebx,DWORD PTR 92[esp] lea esi,DWORD PTR [ecx*1+ecx] + movzx ecx,al lea edi,DWORD PTR [edx*1+edx] + movzx edx,ah pxor mm0,QWORD PTR 2[esi*8+ebp] pxor mm1,QWORD PTR 1[edi*8+ebp] - mov cl,al - mov dl,ah + shr eax,16 lea esi,DWORD PTR [ecx*1+ecx] + movzx ecx,al lea edi,DWORD PTR [edx*1+edx] - shr eax,16 + movzx edx,ah pxor mm3,QWORD PTR [esi*8+ebp] pxor mm4,QWORD PTR 7[edi*8+ebp] - mov cl,al - mov dl,ah mov eax,DWORD PTR 96[esp] lea esi,DWORD PTR [ecx*1+ecx] + movzx ecx,bl lea edi,DWORD PTR [edx*1+edx] + movzx edx,bh pxor mm5,QWORD PTR 6[esi*8+ebp] pxor mm6,QWORD PTR 5[edi*8+ebp] - mov cl,bl - mov dl,bh + shr ebx,16 lea esi,DWORD PTR [ecx*1+ecx] + movzx ecx,bl lea edi,DWORD PTR [edx*1+edx] - shr ebx,16 + movzx edx,bh pxor mm7,QWORD PTR 4[esi*8+ebp] pxor mm0,QWORD PTR 3[edi*8+ebp] - mov cl,bl - mov dl,bh mov ebx,DWORD PTR 100[esp] lea esi,DWORD PTR [ecx*1+ecx] + movzx ecx,al lea edi,DWORD PTR [edx*1+edx] + movzx edx,ah pxor mm1,QWORD PTR 2[esi*8+ebp] pxor mm2,QWORD PTR 1[edi*8+ebp] - mov cl,al - mov dl,ah + shr eax,16 lea esi,DWORD PTR [ecx*1+ecx] + movzx ecx,al lea edi,DWORD PTR [edx*1+edx] - shr eax,16 + movzx edx,ah pxor mm4,QWORD PTR [esi*8+ebp] pxor mm5,QWORD PTR 7[edi*8+ebp] - mov cl,al - mov dl,ah mov eax,DWORD PTR 104[esp] lea esi,DWORD PTR [ecx*1+ecx] + movzx ecx,bl lea edi,DWORD PTR [edx*1+edx] + movzx edx,bh pxor mm6,QWORD PTR 6[esi*8+ebp] pxor mm7,QWORD PTR 5[edi*8+ebp] - mov cl,bl - mov dl,bh + shr ebx,16 lea esi,DWORD PTR [ecx*1+ecx] + movzx ecx,bl lea edi,DWORD PTR [edx*1+edx] - shr ebx,16 + movzx edx,bh pxor mm0,QWORD PTR 4[esi*8+ebp] pxor mm1,QWORD PTR 3[edi*8+ebp] - mov cl,bl - mov dl,bh mov ebx,DWORD PTR 108[esp] lea esi,DWORD PTR [ecx*1+ecx] + movzx ecx,al lea edi,DWORD PTR [edx*1+edx] + movzx edx,ah pxor mm2,QWORD PTR 2[esi*8+ebp] pxor mm3,QWORD PTR 1[edi*8+ebp] - mov cl,al - mov dl,ah + shr eax,16 lea esi,DWORD PTR [ecx*1+ecx] + movzx ecx,al lea edi,DWORD PTR [edx*1+edx] - shr eax,16 + movzx edx,ah pxor mm5,QWORD PTR [esi*8+ebp] pxor mm6,QWORD PTR 7[edi*8+ebp] - mov cl,al - mov dl,ah mov eax,DWORD PTR 112[esp] lea esi,DWORD PTR [ecx*1+ecx] + movzx ecx,bl lea edi,DWORD PTR [edx*1+edx] + movzx edx,bh pxor mm7,QWORD PTR 6[esi*8+ebp] pxor mm0,QWORD PTR 5[edi*8+ebp] - mov cl,bl - mov dl,bh + shr ebx,16 lea esi,DWORD PTR [ecx*1+ecx] + movzx ecx,bl lea edi,DWORD PTR [edx*1+edx] - shr ebx,16 + movzx edx,bh pxor mm1,QWORD PTR 4[esi*8+ebp] pxor mm2,QWORD PTR 3[edi*8+ebp] - mov cl,bl - mov dl,bh mov ebx,DWORD PTR 116[esp] lea esi,DWORD PTR [ecx*1+ecx] + movzx ecx,al lea edi,DWORD PTR [edx*1+edx] + movzx edx,ah pxor mm3,QWORD PTR 2[esi*8+ebp] pxor mm4,QWORD PTR 1[edi*8+ebp] - mov cl,al - mov dl,ah + shr eax,16 lea esi,DWORD PTR [ecx*1+ecx] + movzx ecx,al lea edi,DWORD PTR [edx*1+edx] - shr eax,16 + movzx edx,ah pxor mm6,QWORD PTR [esi*8+ebp] pxor mm7,QWORD PTR 7[edi*8+ebp] - mov cl,al - mov dl,ah mov eax,DWORD PTR 120[esp] lea esi,DWORD PTR [ecx*1+ecx] + movzx ecx,bl lea edi,DWORD PTR [edx*1+edx] + movzx edx,bh pxor mm0,QWORD PTR 6[esi*8+ebp] pxor mm1,QWORD PTR 5[edi*8+ebp] - mov cl,bl - mov dl,bh + shr ebx,16 lea esi,DWORD PTR [ecx*1+ecx] + movzx ecx,bl lea edi,DWORD PTR [edx*1+edx] - shr ebx,16 + movzx edx,bh pxor mm2,QWORD PTR 4[esi*8+ebp] pxor mm3,QWORD PTR 3[edi*8+ebp] - mov cl,bl - mov dl,bh mov ebx,DWORD PTR 124[esp] lea esi,DWORD PTR [ecx*1+ecx] + movzx ecx,al lea edi,DWORD PTR [edx*1+edx] + movzx edx,ah pxor mm4,QWORD PTR 2[esi*8+ebp] pxor mm5,QWORD PTR 1[edi*8+ebp] - mov cl,al - mov dl,ah + shr eax,16 lea esi,DWORD PTR [ecx*1+ecx] + movzx ecx,al lea edi,DWORD PTR [edx*1+edx] - shr eax,16 + movzx edx,ah pxor mm7,QWORD PTR [esi*8+ebp] pxor mm0,QWORD PTR 7[edi*8+ebp] - mov cl,al - mov dl,ah lea esi,DWORD PTR [ecx*1+ecx] + movzx ecx,bl lea edi,DWORD PTR [edx*1+edx] + movzx edx,bh pxor mm1,QWORD PTR 6[esi*8+ebp] pxor mm2,QWORD PTR 5[edi*8+ebp] - mov cl,bl - mov dl,bh + shr ebx,16 lea esi,DWORD PTR [ecx*1+ecx] + movzx ecx,bl lea edi,DWORD PTR [edx*1+edx] - shr ebx,16 + movzx edx,bh pxor mm3,QWORD PTR 4[esi*8+ebp] pxor mm4,QWORD PTR 3[edi*8+ebp] - mov cl,bl - mov dl,bh lea esi,DWORD PTR [ecx*1+ecx] + movzx ecx,al lea edi,DWORD PTR [edx*1+edx] + movzx edx,ah pxor mm5,QWORD PTR 2[esi*8+ebp] pxor mm6,QWORD PTR 1[edi*8+ebp] lea ebx,DWORD PTR 128[esp] diff --git a/deps/openssl/asm/x86-win32-masm/x86cpuid.asm b/deps/openssl/asm/x86-win32-masm/x86cpuid.asm index c361e17cc301d9..b4462fc2aa72bd 100644 --- a/deps/openssl/asm/x86-win32-masm/x86cpuid.asm +++ b/deps/openssl/asm/x86-win32-masm/x86cpuid.asm @@ -3,6 +3,13 @@ IF @Version LT 800 ECHO MASM version 8.00 or later is strongly recommended. ENDIF .686 +.XMM +IF @Version LT 800 +XMMWORD STRUCT 16 +DQ 2 dup (?) +XMMWORD ENDS +ENDIF + .MODEL FLAT OPTION DOTNAME IF @Version LT 800 @@ -30,6 +37,8 @@ $L_OPENSSL_ia32_cpuid_begin:: xor eax,eax bt ecx,21 jnc $L000nocpuid + mov esi,DWORD PTR 20[esp] + mov DWORD PTR 8[esi],eax cpuid mov edi,eax xor eax,eax @@ -80,28 +89,36 @@ $L_OPENSSL_ia32_cpuid_begin:: and edx,4026531839 jmp $L002generic $L001intel: + cmp edi,7 + jb $L003cacheinfo + mov esi,DWORD PTR 20[esp] + mov eax,7 + xor ecx,ecx + cpuid + mov DWORD PTR 8[esi],ebx +$L003cacheinfo: cmp edi,4 mov edi,-1 - jb $L003nocacheinfo + jb $L004nocacheinfo mov eax,4 mov ecx,0 cpuid mov edi,eax shr edi,14 and edi,4095 -$L003nocacheinfo: +$L004nocacheinfo: mov eax,1 xor ecx,ecx cpuid and edx,3220176895 cmp ebp,0 - jne $L004notintel + jne $L005notintel or edx,1073741824 and ah,15 cmp ah,15 - jne $L004notintel + jne $L005notintel or edx,1048576 -$L004notintel: +$L005notintel: bt edx,28 jnc $L002generic and edx,4026531839 @@ -118,20 +135,22 @@ $L002generic: mov esi,edx or ebp,ecx bt ecx,27 - jnc $L005clear_avx + jnc $L006clear_avx xor ecx,ecx DB 15,1,208 and eax,6 cmp eax,6 - je $L006done + je $L007done cmp eax,2 - je $L005clear_avx -$L007clear_xmm: + je $L006clear_avx +$L008clear_xmm: and ebp,4261412861 and esi,4278190079 -$L005clear_avx: +$L006clear_avx: and ebp,4026525695 -$L006done: + mov edi,DWORD PTR 20[esp] + and DWORD PTR 8[edi],4294967263 +$L007done: mov eax,esi mov edx,ebp $L000nocpuid: @@ -149,9 +168,9 @@ $L_OPENSSL_rdtsc_begin:: xor edx,edx lea ecx,DWORD PTR _OPENSSL_ia32cap_P bt DWORD PTR [ecx],4 - jnc $L008notsc + jnc $L009notsc rdtsc -$L008notsc: +$L009notsc: ret _OPENSSL_rdtsc ENDP ALIGN 16 @@ -159,14 +178,14 @@ _OPENSSL_instrument_halt PROC PUBLIC $L_OPENSSL_instrument_halt_begin:: lea ecx,DWORD PTR _OPENSSL_ia32cap_P bt DWORD PTR [ecx],4 - jnc $L009nohalt + jnc $L010nohalt DD 2421723150 and eax,3 - jnz $L009nohalt + jnz $L010nohalt pushfd pop eax bt eax,9 - jnc $L009nohalt + jnc $L010nohalt rdtsc push edx push eax @@ -176,7 +195,7 @@ DD 2421723150 sbb edx,DWORD PTR 4[esp] add esp,8 ret -$L009nohalt: +$L010nohalt: xor eax,eax xor edx,edx ret @@ -187,21 +206,21 @@ $L_OPENSSL_far_spin_begin:: pushfd pop eax bt eax,9 - jnc $L010nospin + jnc $L011nospin mov eax,DWORD PTR 4[esp] mov ecx,DWORD PTR 8[esp] DD 2430111262 xor eax,eax mov edx,DWORD PTR [ecx] - jmp $L011spin + jmp $L012spin ALIGN 16 -$L011spin: +$L012spin: inc eax cmp edx,DWORD PTR [ecx] - je $L011spin + je $L012spin DD 529567888 ret -$L010nospin: +$L011nospin: xor eax,eax xor edx,edx ret @@ -214,9 +233,22 @@ $L_OPENSSL_wipe_cpu_begin:: lea ecx,DWORD PTR _OPENSSL_ia32cap_P mov ecx,DWORD PTR [ecx] bt DWORD PTR [ecx],1 - jnc $L012no_x87 -DD 4007259865,4007259865,4007259865,4007259865,2430851995 -$L012no_x87: + jnc $L013no_x87 + and ecx,83886080 + cmp ecx,83886080 + jne $L014no_sse2 + pxor xmm0,xmm0 + pxor xmm1,xmm1 + pxor xmm2,xmm2 + pxor xmm3,xmm3 + pxor xmm4,xmm4 + pxor xmm5,xmm5 + pxor xmm6,xmm6 + pxor xmm7,xmm7 +$L014no_sse2: +DD 4007259865,4007259865,4007259865,4007259865 +DD 2430851995 +$L013no_x87: lea eax,DWORD PTR 4[esp] ret _OPENSSL_wipe_cpu ENDP @@ -228,11 +260,11 @@ $L_OPENSSL_atomic_add_begin:: push ebx nop mov eax,DWORD PTR [edx] -$L013spin: +$L015spin: lea ebx,DWORD PTR [ecx*1+eax] nop DD 447811568 - jne $L013spin + jne $L015spin mov eax,ebx pop ebx ret @@ -269,50 +301,63 @@ $L_OPENSSL_cleanse_begin:: mov ecx,DWORD PTR 8[esp] xor eax,eax cmp ecx,7 - jae $L014lot + jae $L016lot cmp ecx,0 - je $L015ret -$L016little: + je $L017ret +$L018little: mov BYTE PTR [edx],al sub ecx,1 lea edx,DWORD PTR 1[edx] - jnz $L016little -$L015ret: + jnz $L018little +$L017ret: ret ALIGN 16 -$L014lot: +$L016lot: test edx,3 - jz $L017aligned + jz $L019aligned mov BYTE PTR [edx],al lea ecx,DWORD PTR [ecx-1] lea edx,DWORD PTR 1[edx] - jmp $L014lot -$L017aligned: + jmp $L016lot +$L019aligned: mov DWORD PTR [edx],eax lea ecx,DWORD PTR [ecx-4] test ecx,-4 lea edx,DWORD PTR 4[edx] - jnz $L017aligned + jnz $L019aligned cmp ecx,0 - jne $L016little + jne $L018little ret _OPENSSL_cleanse ENDP ALIGN 16 _OPENSSL_ia32_rdrand PROC PUBLIC $L_OPENSSL_ia32_rdrand_begin:: mov ecx,8 -$L018loop: +$L020loop: DB 15,199,240 - jc $L019break - loop $L018loop -$L019break: + jc $L021break + loop $L020loop +$L021break: cmp eax,0 cmove eax,ecx ret _OPENSSL_ia32_rdrand ENDP +ALIGN 16 +_OPENSSL_ia32_rdseed PROC PUBLIC +$L_OPENSSL_ia32_rdseed_begin:: + mov ecx,8 +$L022loop: +DB 15,199,248 + jc $L023break + loop $L022loop +$L023break: + cmp eax,0 + cmove eax,ecx + ret +_OPENSSL_ia32_rdseed ENDP .text$ ENDS .bss SEGMENT 'BSS' -COMM _OPENSSL_ia32cap_P:QWORD +COMM _OPENSSL_ia32cap_P:DWORD:4 .bss ENDS .CRT$XCU SEGMENT DWORD PUBLIC 'DATA' EXTERN _OPENSSL_cpuid_setup:NEAR diff --git a/deps/openssl/asm_obsolete/Makefile b/deps/openssl/asm_obsolete/Makefile new file mode 100644 index 00000000000000..c3ccfa43ce40aa --- /dev/null +++ b/deps/openssl/asm_obsolete/Makefile @@ -0,0 +1,376 @@ +PERL ?= perl +PERL += -I../openssl/crypto/perlasm -I../openssl/crypto/bn/asm + +# OPENSSL_IA32_SSE2 flag is needed for checking the sse2 feature on ia32 +# see https://github.com/openssl/openssl/blob/OpenSSL_1_0_2-stable/crypto/sha/asm/sha512-586.pl#L56 +SSE2 = -DOPENSSL_IA32_SSE2 + +# CC and ASM enviroments are not needed for generating obsoluted asm files +CC = '' +ASM = '' + +OUTPUTS = \ + x86-elf-gas/aes/aes-586.s \ + x86-elf-gas/aes/aesni-x86.s \ + x86-elf-gas/aes/vpaes-x86.s \ + x86-elf-gas/bf/bf-586.s \ + x86-elf-gas/bn/bn-586.s \ + x86-elf-gas/bn/co-586.s \ + x86-elf-gas/bn/x86-mont.s \ + x86-elf-gas/bn/x86-gf2m.s \ + x86-elf-gas/camellia/cmll-x86.s \ + x86-elf-gas/cast/cast-586.s \ + x86-elf-gas/des/crypt586.s \ + x86-elf-gas/des/des-586.s \ + x86-elf-gas/md5/md5-586.s \ + x86-elf-gas/rc4/rc4-586.s \ + x86-elf-gas/ripemd/rmd-586.s \ + x86-elf-gas/sha/sha1-586.s \ + x86-elf-gas/sha/sha256-586.s \ + x86-elf-gas/sha/sha512-586.s \ + x86-elf-gas/whrlpool/wp-mmx.s \ + x86-elf-gas/modes/ghash-x86.s \ + x86-elf-gas/x86cpuid.s \ + x64-elf-gas/aes/aes-x86_64.s \ + x64-elf-gas/aes/aesni-mb-x86_64.s \ + x64-elf-gas/aes/aesni-sha256-x86_64.s \ + x64-elf-gas/aes/aesni-x86_64.s \ + x64-elf-gas/aes/vpaes-x86_64.s \ + x64-elf-gas/aes/bsaes-x86_64.s \ + x64-elf-gas/aes/aesni-sha1-x86_64.s \ + x64-elf-gas/bn/rsaz-avx2.s \ + x64-elf-gas/bn/rsaz-x86_64.s \ + x64-elf-gas/bn/x86_64-mont.s \ + x64-elf-gas/bn/x86_64-mont5.s \ + x64-elf-gas/bn/x86_64-gf2m.s \ + x64-elf-gas/camellia/cmll-x86_64.s \ + x64-elf-gas/ec/ecp_nistz256-x86_64.s \ + x64-elf-gas/md5/md5-x86_64.s \ + x64-elf-gas/rc4/rc4-x86_64.s \ + x64-elf-gas/rc4/rc4-md5-x86_64.s \ + x64-elf-gas/sha/sha1-mb-x86_64.s \ + x64-elf-gas/sha/sha1-x86_64.s \ + x64-elf-gas/sha/sha256-mb-x86_64.s \ + x64-elf-gas/sha/sha256-x86_64.s \ + x64-elf-gas/sha/sha512-x86_64.s \ + x64-elf-gas/whrlpool/wp-x86_64.s \ + x64-elf-gas/modes/aesni-gcm-x86_64.s \ + x64-elf-gas/modes/ghash-x86_64.s \ + x64-elf-gas/x86_64cpuid.s \ + arm-void-gas/aes/aes-armv4.S \ + arm-void-gas/aes/bsaes-armv7.S \ + arm-void-gas/aes/aesv8-armx.S \ + arm-void-gas/bn/armv4-mont.S \ + arm-void-gas/bn/armv4-gf2m.S \ + arm-void-gas/sha/sha1-armv4-large.S \ + arm-void-gas/sha/sha256-armv4.S \ + arm-void-gas/sha/sha512-armv4.S \ + arm-void-gas/modes/ghash-armv4.S \ + arm-void-gas/modes/ghashv8-armx.S \ + arm64-linux64-gas/aes/aesv8-armx.S \ + arm64-linux64-gas/modes/ghashv8-armx.S \ + arm64-linux64-gas/sha/sha1-armv8.S \ + arm64-linux64-gas/sha/sha256-armv8.S \ + arm64-linux64-gas/sha/sha512-armv8.S \ + x86-macosx-gas/aes/aes-586.s \ + x86-macosx-gas/aes/aesni-x86.s \ + x86-macosx-gas/aes/vpaes-x86.s \ + x86-macosx-gas/bf/bf-586.s \ + x86-macosx-gas/bn/bn-586.s \ + x86-macosx-gas/bn/co-586.s \ + x86-macosx-gas/bn/x86-mont.s \ + x86-macosx-gas/bn/x86-gf2m.s \ + x86-macosx-gas/camellia/cmll-x86.s \ + x86-macosx-gas/cast/cast-586.s \ + x86-macosx-gas/des/crypt586.s \ + x86-macosx-gas/des/des-586.s \ + x86-macosx-gas/md5/md5-586.s \ + x86-macosx-gas/rc4/rc4-586.s \ + x86-macosx-gas/ripemd/rmd-586.s \ + x86-macosx-gas/sha/sha1-586.s \ + x86-macosx-gas/sha/sha256-586.s \ + x86-macosx-gas/sha/sha512-586.s \ + x86-macosx-gas/whrlpool/wp-mmx.s \ + x86-macosx-gas/modes/ghash-x86.s \ + x86-macosx-gas/x86cpuid.s \ + x64-macosx-gas/aes/aes-x86_64.s \ + x64-macosx-gas/aes/aesni-x86_64.s \ + x64-macosx-gas/aes/vpaes-x86_64.s \ + x64-macosx-gas/aes/aesni-mb-x86_64.s \ + x64-macosx-gas/aes/aesni-sha256-x86_64.s \ + x64-macosx-gas/aes/bsaes-x86_64.s \ + x64-macosx-gas/aes/aesni-sha1-x86_64.s \ + x64-macosx-gas/bn/rsaz-avx2.s \ + x64-macosx-gas/bn/rsaz-x86_64.s \ + x64-macosx-gas/bn/x86_64-mont.s \ + x64-macosx-gas/bn/x86_64-mont5.s \ + x64-macosx-gas/bn/x86_64-gf2m.s \ + x64-macosx-gas/camellia/cmll-x86_64.s \ + x64-macosx-gas/ec/ecp_nistz256-x86_64.s \ + x64-macosx-gas/md5/md5-x86_64.s \ + x64-macosx-gas/sha/sha1-mb-x86_64.s \ + x64-macosx-gas/sha/sha1-x86_64.s \ + x64-macosx-gas/sha/sha256-mb-x86_64.s \ + x64-macosx-gas/sha/sha256-x86_64.s \ + x64-macosx-gas/sha/sha512-x86_64.s \ + x64-macosx-gas/whrlpool/wp-x86_64.s \ + x64-macosx-gas/modes/aesni-gcm-x86_64.s \ + x64-macosx-gas/modes/ghash-x86_64.s \ + x64-macosx-gas/x86_64cpuid.s \ + x86-win32-masm/aes/aes-586.asm \ + x86-win32-masm/aes/aesni-x86.asm \ + x86-win32-masm/aes/vpaes-x86.asm \ + x86-win32-masm/bf/bf-586.asm \ + x86-win32-masm/bn/bn-586.asm \ + x86-win32-masm/bn/co-586.asm \ + x86-win32-masm/bn/x86-mont.asm \ + x86-win32-masm/bn/x86-gf2m.asm \ + x86-win32-masm/camellia/cmll-x86.asm \ + x86-win32-masm/cast/cast-586.asm \ + x86-win32-masm/des/crypt586.asm \ + x86-win32-masm/des/des-586.asm \ + x86-win32-masm/md5/md5-586.asm \ + x86-win32-masm/rc4/rc4-586.asm \ + x86-win32-masm/ripemd/rmd-586.asm \ + x86-win32-masm/sha/sha1-586.asm \ + x86-win32-masm/sha/sha256-586.asm \ + x86-win32-masm/sha/sha512-586.asm \ + x86-win32-masm/whrlpool/wp-mmx.asm \ + x86-win32-masm/modes/ghash-x86.asm \ + x86-win32-masm/x86cpuid.asm \ + x64-win32-masm/aes/aes-x86_64.asm \ + x64-win32-masm/aes/aesni-mb-x86_64.asm \ + x64-win32-masm/aes/aesni-sha256-x86_64.asm \ + x64-win32-masm/aes/aesni-x86_64.asm \ + x64-win32-masm/aes/vpaes-x86_64.asm \ + x64-win32-masm/aes/bsaes-x86_64.asm \ + x64-win32-masm/aes/aesni-sha1-x86_64.asm \ + x64-win32-masm/bn/rsaz-avx2.asm \ + x64-win32-masm/bn/rsaz-x86_64.asm \ + x64-win32-masm/bn/x86_64-mont.asm \ + x64-win32-masm/bn/x86_64-mont5.asm \ + x64-win32-masm/bn/x86_64-gf2m.asm \ + x64-win32-masm/camellia/cmll-x86_64.asm \ + x64-win32-masm/ec/ecp_nistz256-x86_64.asm \ + x64-win32-masm/md5/md5-x86_64.asm \ + x64-win32-masm/rc4/rc4-x86_64.asm \ + x64-win32-masm/rc4/rc4-md5-x86_64.asm \ + x64-win32-masm/sha/sha1-mb-x86_64.asm \ + x64-win32-masm/sha/sha1-x86_64.asm \ + x64-win32-masm/sha/sha256-mb-x86_64.asm \ + x64-win32-masm/sha/sha256-x86_64.asm \ + x64-win32-masm/sha/sha512-x86_64.asm \ + x64-win32-masm/whrlpool/wp-x86_64.asm \ + x64-win32-masm/modes/aesni-gcm-x86_64.asm \ + x64-win32-masm/modes/ghash-x86_64.asm \ + x64-win32-masm/x86_64cpuid.asm \ + +# sha512 asm files for x86_64 need 512 in the filenames for outputs +# so that we add new rules to generate sha512 asm files with +# specifying its filename in the second argument. See +# https://github.com/openssl/openssl/blob/OpenSSL_1_0_2-stable/crypto/sha/asm/sha512-x86_64.pl#L137-L149 + +x64-elf-gas/sha/sha512-%.s: + $(PERL) $< elf $@ + +x64-elf-gas/%.s: + $(PERL) $< elf > $@ + +arm-void-gas/%.S: + $(PERL) $< void > $@ + +arm64-linux64-gas/sha/sha512-%.S: + $(PERL) $< linux64 $@ + +arm64-linux64-gas/%.S: + $(PERL) $< linux64 > $@ + +x64-macosx-gas/sha/sha512-%.s: + $(PERL) $< macosx $@ + +x64-macosx-gas/%.s: + $(PERL) $< macosx > $@ + +x64-win32-masm/sha/sha512-%.asm: + $(PERL) $< masm $@ + +x64-win32-masm/%.asm: + $(PERL) $< masm > $@ + +x86-elf-gas/%.s: + $(PERL) $< elf $(SSE2) > $@ + +x86-macosx-gas/%.s: + $(PERL) $< macosx $(SSE2) > $@ + +x86-win32-masm/%.asm: + $(PERL) $< win32 $(SSE2) > $@ + +.PHONY: all clean + +all: $(OUTPUTS) + # strip trailing whitespace and final blank newline + $(PERL) -pi -e 's/\s+$$/\n/; s/^\n$$// if eof' $^ + +clean: + find . -iname '*.asm' -exec rm "{}" \; + find . -iname '*.s' -exec rm "{}" \; + find . -iname '*.S' -exec rm "{}" \; + + +x64-elf-gas/aes/aes-x86_64.s: ../openssl/crypto/aes/asm/aes-x86_64.pl +x64-elf-gas/aes/aesni-x86_64.s: ../openssl/crypto/aes/asm/aesni-x86_64.pl +x64-elf-gas/aes/aesni-mb-x86_64.s: ../openssl/crypto/aes/asm/aesni-mb-x86_64.pl +x64-elf-gas/aes/aesni-sha256-x86_64.s: ../openssl/crypto/aes/asm/aesni-sha256-x86_64.pl +x64-elf-gas/aes/vpaes-x86_64.s: ../openssl/crypto/aes/asm/vpaes-x86_64.pl +x64-elf-gas/aes/bsaes-x86_64.s: ../openssl/crypto/aes/asm/bsaes-x86_64.pl +x64-elf-gas/aes/aesni-sha1-x86_64.s: ../openssl/crypto/aes/asm/aesni-sha1-x86_64.pl +x64-elf-gas/bn/rsaz-avx2.s: ../openssl/crypto/bn/asm/rsaz-avx2.pl +x64-elf-gas/bn/rsaz-x86_64.s: ../openssl/crypto/bn/asm/rsaz-x86_64.pl +x64-elf-gas/bn/x86_64-mont.s: ../openssl/crypto/bn/asm/x86_64-mont.pl +x64-elf-gas/bn/x86_64-mont5.s: ../openssl/crypto/bn/asm/x86_64-mont5.pl +x64-elf-gas/bn/x86_64-gf2m.s: ../openssl/crypto/bn/asm/x86_64-gf2m.pl +x64-elf-gas/camellia/cmll-x86_64.s: ../openssl/crypto/camellia/asm/cmll-x86_64.pl +x64-elf-gas/ec/ecp_nistz256-x86_64.s: ../openssl/crypto/ec/asm/ecp_nistz256-x86_64.pl +x64-elf-gas/md5/md5-x86_64.s: ../openssl/crypto/md5/asm/md5-x86_64.pl +x64-elf-gas/rc4/rc4-x86_64.s: ../openssl/crypto/rc4/asm/rc4-x86_64.pl +x64-elf-gas/rc4/rc4-md5-x86_64.s: ../openssl/crypto/rc4/asm/rc4-md5-x86_64.pl +x64-elf-gas/sha/sha1-mb-x86_64.s: ../openssl/crypto/sha/asm/sha1-mb-x86_64.pl +x64-elf-gas/sha/sha1-x86_64.s: ../openssl/crypto/sha/asm/sha1-x86_64.pl +x64-elf-gas/sha/sha512-x86_64.s: ../openssl/crypto/sha/asm/sha512-x86_64.pl +x64-elf-gas/sha/sha256-mb-x86_64.s: ../openssl/crypto/sha/asm/sha256-mb-x86_64.pl +x64-elf-gas/sha/sha256-x86_64.s: ../openssl/crypto/sha/asm/sha512-x86_64.pl +x64-elf-gas/whrlpool/wp-x86_64.s: ../openssl/crypto/whrlpool/asm/wp-x86_64.pl +x64-elf-gas/modes/aesni-gcm-x86_64.s: ../openssl/crypto/modes/asm/aesni-gcm-x86_64.pl +x64-elf-gas/modes/ghash-x86_64.s: ../openssl/crypto/modes/asm/ghash-x86_64.pl +x64-elf-gas/x86_64cpuid.s: ../openssl/crypto/x86_64cpuid.pl +x64-macosx-gas/aes/aes-x86_64.s: ../openssl/crypto/aes/asm/aes-x86_64.pl +x64-macosx-gas/aes/aesni-x86_64.s: ../openssl/crypto/aes/asm/aesni-x86_64.pl +x64-macosx-gas/aes/vpaes-x86_64.s: ../openssl/crypto/aes/asm/vpaes-x86_64.pl +x64-macosx-gas/aes/aesni-mb-x86_64.s: ../openssl/crypto/aes/asm/aesni-mb-x86_64.pl +x64-macosx-gas/aes/aesni-sha256-x86_64.s: ../openssl/crypto/aes/asm/aesni-sha256-x86_64.pl +x64-macosx-gas/aes/bsaes-x86_64.s: ../openssl/crypto/aes/asm/bsaes-x86_64.pl +x64-macosx-gas/aes/aesni-sha1-x86_64.s: ../openssl/crypto/aes/asm/aesni-sha1-x86_64.pl +x64-macosx-gas/bn/rsaz-avx2.s: ../openssl/crypto/bn/asm/rsaz-avx2.pl +x64-macosx-gas/bn/rsaz-x86_64.s: ../openssl/crypto/bn/asm/rsaz-x86_64.pl +x64-macosx-gas/bn/x86_64-mont.s: ../openssl/crypto/bn/asm/x86_64-mont.pl +x64-macosx-gas/bn/x86_64-mont5.s: ../openssl/crypto/bn/asm/x86_64-mont5.pl +x64-macosx-gas/bn/x86_64-gf2m.s: ../openssl/crypto/bn/asm/x86_64-gf2m.pl +x64-macosx-gas/camellia/cmll-x86_64.s: ../openssl/crypto/camellia/asm/cmll-x86_64.pl +x64-macosx-gas/ec/ecp_nistz256-x86_64.s: ../openssl/crypto/ec/asm/ecp_nistz256-x86_64.pl +x64-macosx-gas/md5/md5-x86_64.s: ../openssl/crypto/md5/asm/md5-x86_64.pl +x64-macosx-gas/sha/sha1-x86_64.s: ../openssl/crypto/sha/asm/sha1-x86_64.pl +x64-macosx-gas/sha/sha1-mb-x86_64.s: ../openssl/crypto/sha/asm/sha1-mb-x86_64.pl +x64-macosx-gas/sha/sha256-mb-x86_64.s: ../openssl/crypto/sha/asm/sha256-mb-x86_64.pl +x64-macosx-gas/sha/sha256-x86_64.s: ../openssl/crypto/sha/asm/sha512-x86_64.pl +x64-macosx-gas/sha/sha512-x86_64.s: ../openssl/crypto/sha/asm/sha512-x86_64.pl +x64-macosx-gas/whrlpool/wp-x86_64.s: ../openssl/crypto/whrlpool/asm/wp-x86_64.pl +x64-macosx-gas/modes/aesni-gcm-x86_64.s: ../openssl/crypto/modes/asm/aesni-gcm-x86_64.pl +x64-macosx-gas/modes/ghash-x86_64.s: ../openssl/crypto/modes/asm/ghash-x86_64.pl +x64-macosx-gas/x86_64cpuid.s: ../openssl/crypto/x86_64cpuid.pl +x64-win32-masm/aes/aes-x86_64.asm: ../openssl/crypto/aes/asm/aes-x86_64.pl +x64-win32-masm/aes/aesni-x86_64.asm: ../openssl/crypto/aes/asm/aesni-x86_64.pl +x64-win32-masm/aes/aesni-mb-x86_64.asm: ../openssl/crypto/aes/asm/aesni-mb-x86_64.pl +x64-win32-masm/aes/aesni-sha256-x86_64.asm: ../openssl/crypto/aes/asm/aesni-sha256-x86_64.pl +x64-win32-masm/aes/vpaes-x86_64.asm: ../openssl/crypto/aes/asm/vpaes-x86_64.pl +x64-win32-masm/aes/bsaes-x86_64.asm: ../openssl/crypto/aes/asm/bsaes-x86_64.pl +x64-win32-masm/aes/aesni-sha1-x86_64.asm: ../openssl/crypto/aes/asm/aesni-sha1-x86_64.pl +x64-win32-masm/bn/rsaz-avx2.asm: ../openssl/crypto/bn/asm/rsaz-avx2.pl +x64-win32-masm/bn/rsaz-x86_64.asm: ../openssl/crypto/bn/asm/rsaz-x86_64.pl +x64-win32-masm/bn/x86_64-mont.asm: ../openssl/crypto/bn/asm/x86_64-mont.pl +x64-win32-masm/bn/x86_64-mont5.asm: ../openssl/crypto/bn/asm/x86_64-mont5.pl +x64-win32-masm/bn/x86_64-gf2m.asm: ../openssl/crypto/bn/asm/x86_64-gf2m.pl +x64-win32-masm/camellia/cmll-x86_64.asm: ../openssl/crypto/camellia/asm/cmll-x86_64.pl +x64-win32-masm/ec/ecp_nistz256-x86_64.asm: ../openssl/crypto/ec/asm/ecp_nistz256-x86_64.pl +x64-win32-masm/md5/md5-x86_64.asm: ../openssl/crypto/md5/asm/md5-x86_64.pl +x64-win32-masm/rc4/rc4-x86_64.asm: ../openssl/crypto/rc4/asm/rc4-x86_64.pl +x64-win32-masm/rc4/rc4-md5-x86_64.asm: ../openssl/crypto/rc4/asm/rc4-md5-x86_64.pl +x64-win32-masm/sha/sha1-mb-x86_64.asm: ../openssl/crypto/sha/asm/sha1-mb-x86_64.pl +x64-win32-masm/sha/sha1-x86_64.asm: ../openssl/crypto/sha/asm/sha1-x86_64.pl +x64-win32-masm/sha/sha256-mb-x86_64.asm: ../openssl/crypto/sha/asm/sha256-mb-x86_64.pl +x64-win32-masm/sha/sha256-x86_64.asm: ../openssl/crypto/sha/asm/sha512-x86_64.pl +x64-win32-masm/sha/sha512-x86_64.asm: ../openssl/crypto/sha/asm/sha512-x86_64.pl +x64-win32-masm/whrlpool/wp-x86_64.asm: ../openssl/crypto/whrlpool/asm/wp-x86_64.pl +x64-win32-masm/modes/aesni-gcm-x86_64.asm: ../openssl/crypto/modes/asm/aesni-gcm-x86_64.pl +x64-win32-masm/modes/ghash-x86_64.asm: ../openssl/crypto/modes/asm/ghash-x86_64.pl +x64-win32-masm/x86_64cpuid.asm: ../openssl/crypto/x86_64cpuid.pl +x86-elf-gas/aes/aes-586.s: ../openssl/crypto/aes/asm/aes-586.pl +x86-elf-gas/aes/aesni-x86.s: ../openssl/crypto/aes/asm/aesni-x86.pl +x86-elf-gas/aes/vpaes-x86.s: ../openssl/crypto/aes/asm/vpaes-x86.pl +x86-elf-gas/bf/bf-586.s: ../openssl/crypto/bf/asm/bf-586.pl +x86-elf-gas/bn/bn-586.s: ../openssl/crypto/bn/asm/bn-586.pl +x86-elf-gas/bn/co-586.s: ../openssl/crypto/bn/asm/co-586.pl +x86-elf-gas/bn/x86-mont.s: ../openssl/crypto/bn/asm/x86-mont.pl +x86-elf-gas/bn/x86-gf2m.s: ../openssl/crypto/bn/asm/x86-gf2m.pl +x86-elf-gas/camellia/cmll-x86.s: ../openssl/crypto/camellia/asm/cmll-x86.pl +x86-elf-gas/cast/cast-586.s: ../openssl/crypto/cast/asm/cast-586.pl +x86-elf-gas/des/crypt586.s: ../openssl/crypto/des/asm/crypt586.pl +x86-elf-gas/des/des-586.s: ../openssl/crypto/des/asm/des-586.pl +x86-elf-gas/md5/md5-586.s: ../openssl/crypto/md5/asm/md5-586.pl +x86-elf-gas/rc4/rc4-586.s: ../openssl/crypto/rc4/asm/rc4-586.pl +x86-elf-gas/rc5/rc5-586.s: ../openssl/crypto/rc5/asm/rc5-586.pl +x86-elf-gas/ripemd/rmd-586.s: ../openssl/crypto/ripemd/asm/rmd-586.pl +x86-elf-gas/sha/sha1-586.s: ../openssl/crypto/sha/asm/sha1-586.pl +x86-elf-gas/sha/sha256-586.s: ../openssl/crypto/sha/asm/sha256-586.pl +x86-elf-gas/sha/sha512-586.s: ../openssl/crypto/sha/asm/sha512-586.pl +x86-elf-gas/whrlpool/wp-mmx.s: ../openssl/crypto/whrlpool/asm/wp-mmx.pl +x86-elf-gas/modes/ghash-x86.s: ../openssl/crypto/modes/asm/ghash-x86.pl +x86-elf-gas/x86cpuid.s: ../openssl/crypto/x86cpuid.pl +x86-macosx-gas/aes/aes-586.s: ../openssl/crypto/aes/asm/aes-586.pl +x86-macosx-gas/aes/aesni-x86.s: ../openssl/crypto/aes/asm/aesni-x86.pl +x86-macosx-gas/aes/vpaes-x86.s: ../openssl/crypto/aes/asm/vpaes-x86.pl +x86-macosx-gas/bf/bf-586.s: ../openssl/crypto/bf/asm/bf-686.pl +x86-macosx-gas/bn/bn-586.s: ../openssl/crypto/bn/asm/bn-586.pl +x86-macosx-gas/bn/co-586.s: ../openssl/crypto/bn/asm/co-586.pl +x86-macosx-gas/bn/x86-mont.s: ../openssl/crypto/bn/asm/x86-mont.pl +x86-macosx-gas/bn/x86-gf2m.s: ../openssl/crypto/bn/asm/x86-gf2m.pl +x86-macosx-gas/camellia/cmll-x86.s: ../openssl/crypto/camellia/asm/cmll-x86.pl +x86-macosx-gas/cast/cast-586.s: ../openssl/crypto/cast/asm/cast-586.pl +x86-macosx-gas/des/crypt586.s: ../openssl/crypto/des/asm/crypt586.pl +x86-macosx-gas/des/des-586.s: ../openssl/crypto/des/asm/des-586.pl +x86-macosx-gas/md5/md5-586.s: ../openssl/crypto/md5/asm/md5-586.pl +x86-macosx-gas/rc4/rc4-586.s: ../openssl/crypto/rc4/asm/rc4-586.pl +x86-macosx-gas/rc5/rc5-586.s: ../openssl/crypto/rc5/asm/rc5-586.pl +x86-macosx-gas/ripemd/rmd-586.s: ../openssl/crypto/ripemd/asm/rmd-586.pl +x86-macosx-gas/sha/sha1-586.s: ../openssl/crypto/sha/asm/sha1-586.pl +x86-macosx-gas/sha/sha256-586.s: ../openssl/crypto/sha/asm/sha256-586.pl +x86-macosx-gas/sha/sha512-586.s: ../openssl/crypto/sha/asm/sha512-586.pl +x86-macosx-gas/whrlpool/wp-mmx.s: ../openssl/crypto/whrlpool/asm/wp-mmx.pl +x86-macosx-gas/modes/ghash-x86.s: ../openssl/crypto/modes/asm/ghash-x86.pl +x86-macosx-gas/x86cpuid.s: ../openssl/crypto/x86cpuid.pl +x86-win32-masm/aes/aes-586.asm: ../openssl/crypto/aes/asm/aes-586.pl +x86-win32-masm/aes/aesni-x86.asm: ../openssl/crypto/aes/asm/aesni-x86.pl +x86-win32-masm/aes/vpaes-x86.asm: ../openssl/crypto/aes/asm/vpaes-x86.pl +x86-win32-masm/bf/bf-586.asm: ../openssl/crypto/bf/asm/bf-586.pl +x86-win32-masm/bn/bn-586.asm: ../openssl/crypto/bn/asm/bn-586.pl +x86-win32-masm/bn/co-586.asm: ../openssl/crypto/bn/asm/co-586.pl +x86-win32-masm/bn/x86-gf2m.asm: ../openssl/crypto/bn/asm/x86-gf2m.pl +x86-win32-masm/bn/x86-mont.asm: ../openssl/crypto/bn/asm/x86-mont.pl +x86-win32-masm/camellia/cmll-x86.asm: ../openssl/crypto/camellia/asm/cmll-x86.pl +x86-win32-masm/cast/cast-586.asm: ../openssl/crypto/cast/asm/cast-586.pl +x86-win32-masm/des/crypt586.asm: ../openssl/crypto/des/asm/crypt586.pl +x86-win32-masm/des/des-586.asm: ../openssl/crypto/des/asm/des-586.pl +x86-win32-masm/md5/md5-586.asm: ../openssl/crypto/md5/asm/md5-586.pl +x86-win32-masm/rc4/rc4-586.asm: ../openssl/crypto/rc4/asm/rc4-586.pl +x86-win32-masm/ripemd/rmd-586.asm: ../openssl/crypto/ripemd/asm/rmd-586.pl +x86-win32-masm/sha/sha1-586.asm: ../openssl/crypto/sha/asm/sha1-586.pl +x86-win32-masm/sha/sha256-586.asm: ../openssl/crypto/sha/asm/sha256-586.pl +x86-win32-masm/sha/sha512-586.asm: ../openssl/crypto/sha/asm/sha512-586.pl +x86-win32-masm/whrlpool/wp-mmx.asm: ../openssl/crypto/whrlpool/asm/wp-mmx.pl +x86-win32-masm/modes/ghash-x86.asm: ../openssl/crypto/modes/asm/ghash-x86.pl +x86-win32-masm/x86cpuid.asm: ../openssl/crypto/x86cpuid.pl +arm-void-gas/aes/aes-armv4.S: ../openssl/crypto/aes/asm/aes-armv4.pl +arm-void-gas/aes/bsaes-armv7.S: ../openssl/crypto/aes/asm/bsaes-armv7.pl +arm-void-gas/aes/aesv8-armx.S: ../openssl/crypto/aes/asm/aesv8-armx.pl +arm-void-gas/bn/armv4-mont.S: ../openssl/crypto/bn/asm/armv4-mont.pl +arm-void-gas/bn/armv4-gf2m.S: ../openssl/crypto/bn/asm/armv4-gf2m.pl +arm-void-gas/sha/sha1-armv4-large.S: ../openssl/crypto/sha/asm/sha1-armv4-large.pl +arm-void-gas/sha/sha512-armv4.S: ../openssl/crypto/sha/asm/sha512-armv4.pl +arm-void-gas/sha/sha256-armv4.S: ../openssl/crypto/sha/asm/sha256-armv4.pl +arm-void-gas/modes/ghash-armv4.S: ../openssl/crypto/modes/asm/ghash-armv4.pl +arm-void-gas/modes/ghashv8-armx.S: ../openssl/crypto/modes/asm/ghashv8-armx.pl +arm64-linux64-gas/aes/aesv8-armx.S: ../openssl/crypto/aes/asm/aesv8-armx.pl +arm64-linux64-gas/modes/ghashv8-armx.S: ../openssl/crypto/modes/asm/ghashv8-armx.pl +arm64-linux64-gas/sha/sha1-armv8.S: ../openssl/crypto/sha/asm/sha1-armv8.pl +arm64-linux64-gas/sha/sha256-armv8.S: ../openssl/crypto/sha/asm/sha512-armv8.pl +arm64-linux64-gas/sha/sha512-armv8.S: ../openssl/crypto/sha/asm/sha512-armv8.pl diff --git a/deps/openssl/asm_obsolete/arm-void-gas/aes/aes-armv4.S b/deps/openssl/asm_obsolete/arm-void-gas/aes/aes-armv4.S new file mode 100644 index 00000000000000..333a522730ba6c --- /dev/null +++ b/deps/openssl/asm_obsolete/arm-void-gas/aes/aes-armv4.S @@ -0,0 +1,1177 @@ + +@ ==================================================================== +@ Written by Andy Polyakov for the OpenSSL +@ project. The module is, however, dual licensed under OpenSSL and +@ CRYPTOGAMS licenses depending on where you obtain it. For further +@ details see http://www.openssl.org/~appro/cryptogams/. +@ ==================================================================== + +@ AES for ARMv4 + +@ January 2007. +@ +@ Code uses single 1K S-box and is >2 times faster than code generated +@ by gcc-3.4.1. This is thanks to unique feature of ARMv4 ISA, which +@ allows to merge logical or arithmetic operation with shift or rotate +@ in one instruction and emit combined result every cycle. The module +@ is endian-neutral. The performance is ~42 cycles/byte for 128-bit +@ key [on single-issue Xscale PXA250 core]. + +@ May 2007. +@ +@ AES_set_[en|de]crypt_key is added. + +@ July 2010. +@ +@ Rescheduling for dual-issue pipeline resulted in 12% improvement on +@ Cortex A8 core and ~25 cycles per byte processed with 128-bit key. + +@ February 2011. +@ +@ Profiler-assisted and platform-specific optimization resulted in 16% +@ improvement on Cortex A8 core and ~21.5 cycles per byte. + +#ifndef __KERNEL__ +# include "arm_arch.h" +#else +# define __ARM_ARCH__ __LINUX_ARM_ARCH__ +#endif + +.text +#if __ARM_ARCH__<7 +.code 32 +#else +.syntax unified +# ifdef __thumb2__ +.thumb +# else +.code 32 +# endif +#endif + +.type AES_Te,%object +.align 5 +AES_Te: +.word 0xc66363a5, 0xf87c7c84, 0xee777799, 0xf67b7b8d +.word 0xfff2f20d, 0xd66b6bbd, 0xde6f6fb1, 0x91c5c554 +.word 0x60303050, 0x02010103, 0xce6767a9, 0x562b2b7d +.word 0xe7fefe19, 0xb5d7d762, 0x4dababe6, 0xec76769a +.word 0x8fcaca45, 0x1f82829d, 0x89c9c940, 0xfa7d7d87 +.word 0xeffafa15, 0xb25959eb, 0x8e4747c9, 0xfbf0f00b +.word 0x41adadec, 0xb3d4d467, 0x5fa2a2fd, 0x45afafea +.word 0x239c9cbf, 0x53a4a4f7, 0xe4727296, 0x9bc0c05b +.word 0x75b7b7c2, 0xe1fdfd1c, 0x3d9393ae, 0x4c26266a +.word 0x6c36365a, 0x7e3f3f41, 0xf5f7f702, 0x83cccc4f +.word 0x6834345c, 0x51a5a5f4, 0xd1e5e534, 0xf9f1f108 +.word 0xe2717193, 0xabd8d873, 0x62313153, 0x2a15153f +.word 0x0804040c, 0x95c7c752, 0x46232365, 0x9dc3c35e +.word 0x30181828, 0x379696a1, 0x0a05050f, 0x2f9a9ab5 +.word 0x0e070709, 0x24121236, 0x1b80809b, 0xdfe2e23d +.word 0xcdebeb26, 0x4e272769, 0x7fb2b2cd, 0xea75759f +.word 0x1209091b, 0x1d83839e, 0x582c2c74, 0x341a1a2e +.word 0x361b1b2d, 0xdc6e6eb2, 0xb45a5aee, 0x5ba0a0fb +.word 0xa45252f6, 0x763b3b4d, 0xb7d6d661, 0x7db3b3ce +.word 0x5229297b, 0xdde3e33e, 0x5e2f2f71, 0x13848497 +.word 0xa65353f5, 0xb9d1d168, 0x00000000, 0xc1eded2c +.word 0x40202060, 0xe3fcfc1f, 0x79b1b1c8, 0xb65b5bed +.word 0xd46a6abe, 0x8dcbcb46, 0x67bebed9, 0x7239394b +.word 0x944a4ade, 0x984c4cd4, 0xb05858e8, 0x85cfcf4a +.word 0xbbd0d06b, 0xc5efef2a, 0x4faaaae5, 0xedfbfb16 +.word 0x864343c5, 0x9a4d4dd7, 0x66333355, 0x11858594 +.word 0x8a4545cf, 0xe9f9f910, 0x04020206, 0xfe7f7f81 +.word 0xa05050f0, 0x783c3c44, 0x259f9fba, 0x4ba8a8e3 +.word 0xa25151f3, 0x5da3a3fe, 0x804040c0, 0x058f8f8a +.word 0x3f9292ad, 0x219d9dbc, 0x70383848, 0xf1f5f504 +.word 0x63bcbcdf, 0x77b6b6c1, 0xafdada75, 0x42212163 +.word 0x20101030, 0xe5ffff1a, 0xfdf3f30e, 0xbfd2d26d +.word 0x81cdcd4c, 0x180c0c14, 0x26131335, 0xc3ecec2f +.word 0xbe5f5fe1, 0x359797a2, 0x884444cc, 0x2e171739 +.word 0x93c4c457, 0x55a7a7f2, 0xfc7e7e82, 0x7a3d3d47 +.word 0xc86464ac, 0xba5d5de7, 0x3219192b, 0xe6737395 +.word 0xc06060a0, 0x19818198, 0x9e4f4fd1, 0xa3dcdc7f +.word 0x44222266, 0x542a2a7e, 0x3b9090ab, 0x0b888883 +.word 0x8c4646ca, 0xc7eeee29, 0x6bb8b8d3, 0x2814143c +.word 0xa7dede79, 0xbc5e5ee2, 0x160b0b1d, 0xaddbdb76 +.word 0xdbe0e03b, 0x64323256, 0x743a3a4e, 0x140a0a1e +.word 0x924949db, 0x0c06060a, 0x4824246c, 0xb85c5ce4 +.word 0x9fc2c25d, 0xbdd3d36e, 0x43acacef, 0xc46262a6 +.word 0x399191a8, 0x319595a4, 0xd3e4e437, 0xf279798b +.word 0xd5e7e732, 0x8bc8c843, 0x6e373759, 0xda6d6db7 +.word 0x018d8d8c, 0xb1d5d564, 0x9c4e4ed2, 0x49a9a9e0 +.word 0xd86c6cb4, 0xac5656fa, 0xf3f4f407, 0xcfeaea25 +.word 0xca6565af, 0xf47a7a8e, 0x47aeaee9, 0x10080818 +.word 0x6fbabad5, 0xf0787888, 0x4a25256f, 0x5c2e2e72 +.word 0x381c1c24, 0x57a6a6f1, 0x73b4b4c7, 0x97c6c651 +.word 0xcbe8e823, 0xa1dddd7c, 0xe874749c, 0x3e1f1f21 +.word 0x964b4bdd, 0x61bdbddc, 0x0d8b8b86, 0x0f8a8a85 +.word 0xe0707090, 0x7c3e3e42, 0x71b5b5c4, 0xcc6666aa +.word 0x904848d8, 0x06030305, 0xf7f6f601, 0x1c0e0e12 +.word 0xc26161a3, 0x6a35355f, 0xae5757f9, 0x69b9b9d0 +.word 0x17868691, 0x99c1c158, 0x3a1d1d27, 0x279e9eb9 +.word 0xd9e1e138, 0xebf8f813, 0x2b9898b3, 0x22111133 +.word 0xd26969bb, 0xa9d9d970, 0x078e8e89, 0x339494a7 +.word 0x2d9b9bb6, 0x3c1e1e22, 0x15878792, 0xc9e9e920 +.word 0x87cece49, 0xaa5555ff, 0x50282878, 0xa5dfdf7a +.word 0x038c8c8f, 0x59a1a1f8, 0x09898980, 0x1a0d0d17 +.word 0x65bfbfda, 0xd7e6e631, 0x844242c6, 0xd06868b8 +.word 0x824141c3, 0x299999b0, 0x5a2d2d77, 0x1e0f0f11 +.word 0x7bb0b0cb, 0xa85454fc, 0x6dbbbbd6, 0x2c16163a +@ Te4[256] +.byte 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5 +.byte 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76 +.byte 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0 +.byte 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0 +.byte 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc +.byte 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15 +.byte 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a +.byte 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75 +.byte 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0 +.byte 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84 +.byte 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b +.byte 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf +.byte 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85 +.byte 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8 +.byte 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5 +.byte 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2 +.byte 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17 +.byte 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73 +.byte 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88 +.byte 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb +.byte 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c +.byte 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79 +.byte 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9 +.byte 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08 +.byte 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6 +.byte 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a +.byte 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e +.byte 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e +.byte 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94 +.byte 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf +.byte 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68 +.byte 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16 +@ rcon[] +.word 0x01000000, 0x02000000, 0x04000000, 0x08000000 +.word 0x10000000, 0x20000000, 0x40000000, 0x80000000 +.word 0x1B000000, 0x36000000, 0, 0, 0, 0, 0, 0 +.size AES_Te,.-AES_Te + +@ void AES_encrypt(const unsigned char *in, unsigned char *out, +@ const AES_KEY *key) { +.global AES_encrypt +.type AES_encrypt,%function +.align 5 +AES_encrypt: +#if __ARM_ARCH__<7 + sub r3,pc,#8 @ AES_encrypt +#else + adr r3,AES_encrypt +#endif + stmdb sp!,{r1,r4-r12,lr} + mov r12,r0 @ inp + mov r11,r2 + sub r10,r3,#AES_encrypt-AES_Te @ Te +#if __ARM_ARCH__<7 + ldrb r0,[r12,#3] @ load input data in endian-neutral + ldrb r4,[r12,#2] @ manner... + ldrb r5,[r12,#1] + ldrb r6,[r12,#0] + orr r0,r0,r4,lsl#8 + ldrb r1,[r12,#7] + orr r0,r0,r5,lsl#16 + ldrb r4,[r12,#6] + orr r0,r0,r6,lsl#24 + ldrb r5,[r12,#5] + ldrb r6,[r12,#4] + orr r1,r1,r4,lsl#8 + ldrb r2,[r12,#11] + orr r1,r1,r5,lsl#16 + ldrb r4,[r12,#10] + orr r1,r1,r6,lsl#24 + ldrb r5,[r12,#9] + ldrb r6,[r12,#8] + orr r2,r2,r4,lsl#8 + ldrb r3,[r12,#15] + orr r2,r2,r5,lsl#16 + ldrb r4,[r12,#14] + orr r2,r2,r6,lsl#24 + ldrb r5,[r12,#13] + ldrb r6,[r12,#12] + orr r3,r3,r4,lsl#8 + orr r3,r3,r5,lsl#16 + orr r3,r3,r6,lsl#24 +#else + ldr r0,[r12,#0] + ldr r1,[r12,#4] + ldr r2,[r12,#8] + ldr r3,[r12,#12] +#ifdef __ARMEL__ + rev r0,r0 + rev r1,r1 + rev r2,r2 + rev r3,r3 +#endif +#endif + bl _armv4_AES_encrypt + + ldr r12,[sp],#4 @ pop out +#if __ARM_ARCH__>=7 +#ifdef __ARMEL__ + rev r0,r0 + rev r1,r1 + rev r2,r2 + rev r3,r3 +#endif + str r0,[r12,#0] + str r1,[r12,#4] + str r2,[r12,#8] + str r3,[r12,#12] +#else + mov r4,r0,lsr#24 @ write output in endian-neutral + mov r5,r0,lsr#16 @ manner... + mov r6,r0,lsr#8 + strb r4,[r12,#0] + strb r5,[r12,#1] + mov r4,r1,lsr#24 + strb r6,[r12,#2] + mov r5,r1,lsr#16 + strb r0,[r12,#3] + mov r6,r1,lsr#8 + strb r4,[r12,#4] + strb r5,[r12,#5] + mov r4,r2,lsr#24 + strb r6,[r12,#6] + mov r5,r2,lsr#16 + strb r1,[r12,#7] + mov r6,r2,lsr#8 + strb r4,[r12,#8] + strb r5,[r12,#9] + mov r4,r3,lsr#24 + strb r6,[r12,#10] + mov r5,r3,lsr#16 + strb r2,[r12,#11] + mov r6,r3,lsr#8 + strb r4,[r12,#12] + strb r5,[r12,#13] + strb r6,[r12,#14] + strb r3,[r12,#15] +#endif +#if __ARM_ARCH__>=5 + ldmia sp!,{r4-r12,pc} +#else + ldmia sp!,{r4-r12,lr} + tst lr,#1 + moveq pc,lr @ be binary compatible with V4, yet + .word 0xe12fff1e @ interoperable with Thumb ISA:-) +#endif +.size AES_encrypt,.-AES_encrypt + +.type _armv4_AES_encrypt,%function +.align 2 +_armv4_AES_encrypt: + str lr,[sp,#-4]! @ push lr + ldmia r11!,{r4-r7} + eor r0,r0,r4 + ldr r12,[r11,#240-16] + eor r1,r1,r5 + eor r2,r2,r6 + eor r3,r3,r7 + sub r12,r12,#1 + mov lr,#255 + + and r7,lr,r0 + and r8,lr,r0,lsr#8 + and r9,lr,r0,lsr#16 + mov r0,r0,lsr#24 +.Lenc_loop: + ldr r4,[r10,r7,lsl#2] @ Te3[s0>>0] + and r7,lr,r1,lsr#16 @ i0 + ldr r5,[r10,r8,lsl#2] @ Te2[s0>>8] + and r8,lr,r1 + ldr r6,[r10,r9,lsl#2] @ Te1[s0>>16] + and r9,lr,r1,lsr#8 + ldr r0,[r10,r0,lsl#2] @ Te0[s0>>24] + mov r1,r1,lsr#24 + + ldr r7,[r10,r7,lsl#2] @ Te1[s1>>16] + ldr r8,[r10,r8,lsl#2] @ Te3[s1>>0] + ldr r9,[r10,r9,lsl#2] @ Te2[s1>>8] + eor r0,r0,r7,ror#8 + ldr r1,[r10,r1,lsl#2] @ Te0[s1>>24] + and r7,lr,r2,lsr#8 @ i0 + eor r5,r5,r8,ror#8 + and r8,lr,r2,lsr#16 @ i1 + eor r6,r6,r9,ror#8 + and r9,lr,r2 + ldr r7,[r10,r7,lsl#2] @ Te2[s2>>8] + eor r1,r1,r4,ror#24 + ldr r8,[r10,r8,lsl#2] @ Te1[s2>>16] + mov r2,r2,lsr#24 + + ldr r9,[r10,r9,lsl#2] @ Te3[s2>>0] + eor r0,r0,r7,ror#16 + ldr r2,[r10,r2,lsl#2] @ Te0[s2>>24] + and r7,lr,r3 @ i0 + eor r1,r1,r8,ror#8 + and r8,lr,r3,lsr#8 @ i1 + eor r6,r6,r9,ror#16 + and r9,lr,r3,lsr#16 @ i2 + ldr r7,[r10,r7,lsl#2] @ Te3[s3>>0] + eor r2,r2,r5,ror#16 + ldr r8,[r10,r8,lsl#2] @ Te2[s3>>8] + mov r3,r3,lsr#24 + + ldr r9,[r10,r9,lsl#2] @ Te1[s3>>16] + eor r0,r0,r7,ror#24 + ldr r7,[r11],#16 + eor r1,r1,r8,ror#16 + ldr r3,[r10,r3,lsl#2] @ Te0[s3>>24] + eor r2,r2,r9,ror#8 + ldr r4,[r11,#-12] + eor r3,r3,r6,ror#8 + + ldr r5,[r11,#-8] + eor r0,r0,r7 + ldr r6,[r11,#-4] + and r7,lr,r0 + eor r1,r1,r4 + and r8,lr,r0,lsr#8 + eor r2,r2,r5 + and r9,lr,r0,lsr#16 + eor r3,r3,r6 + mov r0,r0,lsr#24 + + subs r12,r12,#1 + bne .Lenc_loop + + add r10,r10,#2 + + ldrb r4,[r10,r7,lsl#2] @ Te4[s0>>0] + and r7,lr,r1,lsr#16 @ i0 + ldrb r5,[r10,r8,lsl#2] @ Te4[s0>>8] + and r8,lr,r1 + ldrb r6,[r10,r9,lsl#2] @ Te4[s0>>16] + and r9,lr,r1,lsr#8 + ldrb r0,[r10,r0,lsl#2] @ Te4[s0>>24] + mov r1,r1,lsr#24 + + ldrb r7,[r10,r7,lsl#2] @ Te4[s1>>16] + ldrb r8,[r10,r8,lsl#2] @ Te4[s1>>0] + ldrb r9,[r10,r9,lsl#2] @ Te4[s1>>8] + eor r0,r7,r0,lsl#8 + ldrb r1,[r10,r1,lsl#2] @ Te4[s1>>24] + and r7,lr,r2,lsr#8 @ i0 + eor r5,r8,r5,lsl#8 + and r8,lr,r2,lsr#16 @ i1 + eor r6,r9,r6,lsl#8 + and r9,lr,r2 + ldrb r7,[r10,r7,lsl#2] @ Te4[s2>>8] + eor r1,r4,r1,lsl#24 + ldrb r8,[r10,r8,lsl#2] @ Te4[s2>>16] + mov r2,r2,lsr#24 + + ldrb r9,[r10,r9,lsl#2] @ Te4[s2>>0] + eor r0,r7,r0,lsl#8 + ldrb r2,[r10,r2,lsl#2] @ Te4[s2>>24] + and r7,lr,r3 @ i0 + eor r1,r1,r8,lsl#16 + and r8,lr,r3,lsr#8 @ i1 + eor r6,r9,r6,lsl#8 + and r9,lr,r3,lsr#16 @ i2 + ldrb r7,[r10,r7,lsl#2] @ Te4[s3>>0] + eor r2,r5,r2,lsl#24 + ldrb r8,[r10,r8,lsl#2] @ Te4[s3>>8] + mov r3,r3,lsr#24 + + ldrb r9,[r10,r9,lsl#2] @ Te4[s3>>16] + eor r0,r7,r0,lsl#8 + ldr r7,[r11,#0] + ldrb r3,[r10,r3,lsl#2] @ Te4[s3>>24] + eor r1,r1,r8,lsl#8 + ldr r4,[r11,#4] + eor r2,r2,r9,lsl#16 + ldr r5,[r11,#8] + eor r3,r6,r3,lsl#24 + ldr r6,[r11,#12] + + eor r0,r0,r7 + eor r1,r1,r4 + eor r2,r2,r5 + eor r3,r3,r6 + + sub r10,r10,#2 + ldr pc,[sp],#4 @ pop and return +.size _armv4_AES_encrypt,.-_armv4_AES_encrypt + +.global private_AES_set_encrypt_key +.type private_AES_set_encrypt_key,%function +.align 5 +private_AES_set_encrypt_key: +_armv4_AES_set_encrypt_key: +#if __ARM_ARCH__<7 + sub r3,pc,#8 @ AES_set_encrypt_key +#else + adr r3,private_AES_set_encrypt_key +#endif + teq r0,#0 +#if __ARM_ARCH__>=7 + itt eq @ Thumb2 thing, sanity check in ARM +#endif + moveq r0,#-1 + beq .Labrt + teq r2,#0 +#if __ARM_ARCH__>=7 + itt eq @ Thumb2 thing, sanity check in ARM +#endif + moveq r0,#-1 + beq .Labrt + + teq r1,#128 + beq .Lok + teq r1,#192 + beq .Lok + teq r1,#256 +#if __ARM_ARCH__>=7 + itt ne @ Thumb2 thing, sanity check in ARM +#endif + movne r0,#-1 + bne .Labrt + +.Lok: stmdb sp!,{r4-r12,lr} + sub r10,r3,#_armv4_AES_set_encrypt_key-AES_Te-1024 @ Te4 + + mov r12,r0 @ inp + mov lr,r1 @ bits + mov r11,r2 @ key + +#if __ARM_ARCH__<7 + ldrb r0,[r12,#3] @ load input data in endian-neutral + ldrb r4,[r12,#2] @ manner... + ldrb r5,[r12,#1] + ldrb r6,[r12,#0] + orr r0,r0,r4,lsl#8 + ldrb r1,[r12,#7] + orr r0,r0,r5,lsl#16 + ldrb r4,[r12,#6] + orr r0,r0,r6,lsl#24 + ldrb r5,[r12,#5] + ldrb r6,[r12,#4] + orr r1,r1,r4,lsl#8 + ldrb r2,[r12,#11] + orr r1,r1,r5,lsl#16 + ldrb r4,[r12,#10] + orr r1,r1,r6,lsl#24 + ldrb r5,[r12,#9] + ldrb r6,[r12,#8] + orr r2,r2,r4,lsl#8 + ldrb r3,[r12,#15] + orr r2,r2,r5,lsl#16 + ldrb r4,[r12,#14] + orr r2,r2,r6,lsl#24 + ldrb r5,[r12,#13] + ldrb r6,[r12,#12] + orr r3,r3,r4,lsl#8 + str r0,[r11],#16 + orr r3,r3,r5,lsl#16 + str r1,[r11,#-12] + orr r3,r3,r6,lsl#24 + str r2,[r11,#-8] + str r3,[r11,#-4] +#else + ldr r0,[r12,#0] + ldr r1,[r12,#4] + ldr r2,[r12,#8] + ldr r3,[r12,#12] +#ifdef __ARMEL__ + rev r0,r0 + rev r1,r1 + rev r2,r2 + rev r3,r3 +#endif + str r0,[r11],#16 + str r1,[r11,#-12] + str r2,[r11,#-8] + str r3,[r11,#-4] +#endif + + teq lr,#128 + bne .Lnot128 + mov r12,#10 + str r12,[r11,#240-16] + add r6,r10,#256 @ rcon + mov lr,#255 + +.L128_loop: + and r5,lr,r3,lsr#24 + and r7,lr,r3,lsr#16 + ldrb r5,[r10,r5] + and r8,lr,r3,lsr#8 + ldrb r7,[r10,r7] + and r9,lr,r3 + ldrb r8,[r10,r8] + orr r5,r5,r7,lsl#24 + ldrb r9,[r10,r9] + orr r5,r5,r8,lsl#16 + ldr r4,[r6],#4 @ rcon[i++] + orr r5,r5,r9,lsl#8 + eor r5,r5,r4 + eor r0,r0,r5 @ rk[4]=rk[0]^... + eor r1,r1,r0 @ rk[5]=rk[1]^rk[4] + str r0,[r11],#16 + eor r2,r2,r1 @ rk[6]=rk[2]^rk[5] + str r1,[r11,#-12] + eor r3,r3,r2 @ rk[7]=rk[3]^rk[6] + str r2,[r11,#-8] + subs r12,r12,#1 + str r3,[r11,#-4] + bne .L128_loop + sub r2,r11,#176 + b .Ldone + +.Lnot128: +#if __ARM_ARCH__<7 + ldrb r8,[r12,#19] + ldrb r4,[r12,#18] + ldrb r5,[r12,#17] + ldrb r6,[r12,#16] + orr r8,r8,r4,lsl#8 + ldrb r9,[r12,#23] + orr r8,r8,r5,lsl#16 + ldrb r4,[r12,#22] + orr r8,r8,r6,lsl#24 + ldrb r5,[r12,#21] + ldrb r6,[r12,#20] + orr r9,r9,r4,lsl#8 + orr r9,r9,r5,lsl#16 + str r8,[r11],#8 + orr r9,r9,r6,lsl#24 + str r9,[r11,#-4] +#else + ldr r8,[r12,#16] + ldr r9,[r12,#20] +#ifdef __ARMEL__ + rev r8,r8 + rev r9,r9 +#endif + str r8,[r11],#8 + str r9,[r11,#-4] +#endif + + teq lr,#192 + bne .Lnot192 + mov r12,#12 + str r12,[r11,#240-24] + add r6,r10,#256 @ rcon + mov lr,#255 + mov r12,#8 + +.L192_loop: + and r5,lr,r9,lsr#24 + and r7,lr,r9,lsr#16 + ldrb r5,[r10,r5] + and r8,lr,r9,lsr#8 + ldrb r7,[r10,r7] + and r9,lr,r9 + ldrb r8,[r10,r8] + orr r5,r5,r7,lsl#24 + ldrb r9,[r10,r9] + orr r5,r5,r8,lsl#16 + ldr r4,[r6],#4 @ rcon[i++] + orr r5,r5,r9,lsl#8 + eor r9,r5,r4 + eor r0,r0,r9 @ rk[6]=rk[0]^... + eor r1,r1,r0 @ rk[7]=rk[1]^rk[6] + str r0,[r11],#24 + eor r2,r2,r1 @ rk[8]=rk[2]^rk[7] + str r1,[r11,#-20] + eor r3,r3,r2 @ rk[9]=rk[3]^rk[8] + str r2,[r11,#-16] + subs r12,r12,#1 + str r3,[r11,#-12] +#if __ARM_ARCH__>=7 + itt eq @ Thumb2 thing, sanity check in ARM +#endif + subeq r2,r11,#216 + beq .Ldone + + ldr r7,[r11,#-32] + ldr r8,[r11,#-28] + eor r7,r7,r3 @ rk[10]=rk[4]^rk[9] + eor r9,r8,r7 @ rk[11]=rk[5]^rk[10] + str r7,[r11,#-8] + str r9,[r11,#-4] + b .L192_loop + +.Lnot192: +#if __ARM_ARCH__<7 + ldrb r8,[r12,#27] + ldrb r4,[r12,#26] + ldrb r5,[r12,#25] + ldrb r6,[r12,#24] + orr r8,r8,r4,lsl#8 + ldrb r9,[r12,#31] + orr r8,r8,r5,lsl#16 + ldrb r4,[r12,#30] + orr r8,r8,r6,lsl#24 + ldrb r5,[r12,#29] + ldrb r6,[r12,#28] + orr r9,r9,r4,lsl#8 + orr r9,r9,r5,lsl#16 + str r8,[r11],#8 + orr r9,r9,r6,lsl#24 + str r9,[r11,#-4] +#else + ldr r8,[r12,#24] + ldr r9,[r12,#28] +#ifdef __ARMEL__ + rev r8,r8 + rev r9,r9 +#endif + str r8,[r11],#8 + str r9,[r11,#-4] +#endif + + mov r12,#14 + str r12,[r11,#240-32] + add r6,r10,#256 @ rcon + mov lr,#255 + mov r12,#7 + +.L256_loop: + and r5,lr,r9,lsr#24 + and r7,lr,r9,lsr#16 + ldrb r5,[r10,r5] + and r8,lr,r9,lsr#8 + ldrb r7,[r10,r7] + and r9,lr,r9 + ldrb r8,[r10,r8] + orr r5,r5,r7,lsl#24 + ldrb r9,[r10,r9] + orr r5,r5,r8,lsl#16 + ldr r4,[r6],#4 @ rcon[i++] + orr r5,r5,r9,lsl#8 + eor r9,r5,r4 + eor r0,r0,r9 @ rk[8]=rk[0]^... + eor r1,r1,r0 @ rk[9]=rk[1]^rk[8] + str r0,[r11],#32 + eor r2,r2,r1 @ rk[10]=rk[2]^rk[9] + str r1,[r11,#-28] + eor r3,r3,r2 @ rk[11]=rk[3]^rk[10] + str r2,[r11,#-24] + subs r12,r12,#1 + str r3,[r11,#-20] +#if __ARM_ARCH__>=7 + itt eq @ Thumb2 thing, sanity check in ARM +#endif + subeq r2,r11,#256 + beq .Ldone + + and r5,lr,r3 + and r7,lr,r3,lsr#8 + ldrb r5,[r10,r5] + and r8,lr,r3,lsr#16 + ldrb r7,[r10,r7] + and r9,lr,r3,lsr#24 + ldrb r8,[r10,r8] + orr r5,r5,r7,lsl#8 + ldrb r9,[r10,r9] + orr r5,r5,r8,lsl#16 + ldr r4,[r11,#-48] + orr r5,r5,r9,lsl#24 + + ldr r7,[r11,#-44] + ldr r8,[r11,#-40] + eor r4,r4,r5 @ rk[12]=rk[4]^... + ldr r9,[r11,#-36] + eor r7,r7,r4 @ rk[13]=rk[5]^rk[12] + str r4,[r11,#-16] + eor r8,r8,r7 @ rk[14]=rk[6]^rk[13] + str r7,[r11,#-12] + eor r9,r9,r8 @ rk[15]=rk[7]^rk[14] + str r8,[r11,#-8] + str r9,[r11,#-4] + b .L256_loop + +.align 2 +.Ldone: mov r0,#0 + ldmia sp!,{r4-r12,lr} +.Labrt: +#if __ARM_ARCH__>=5 + bx lr @ .word 0xe12fff1e +#else + tst lr,#1 + moveq pc,lr @ be binary compatible with V4, yet + .word 0xe12fff1e @ interoperable with Thumb ISA:-) +#endif +.size private_AES_set_encrypt_key,.-private_AES_set_encrypt_key + +.global private_AES_set_decrypt_key +.type private_AES_set_decrypt_key,%function +.align 5 +private_AES_set_decrypt_key: + str lr,[sp,#-4]! @ push lr + bl _armv4_AES_set_encrypt_key + teq r0,#0 + ldr lr,[sp],#4 @ pop lr + bne .Labrt + + mov r0,r2 @ AES_set_encrypt_key preserves r2, + mov r1,r2 @ which is AES_KEY *key + b _armv4_AES_set_enc2dec_key +.size private_AES_set_decrypt_key,.-private_AES_set_decrypt_key + +@ void AES_set_enc2dec_key(const AES_KEY *inp,AES_KEY *out) +.global AES_set_enc2dec_key +.type AES_set_enc2dec_key,%function +.align 5 +AES_set_enc2dec_key: +_armv4_AES_set_enc2dec_key: + stmdb sp!,{r4-r12,lr} + + ldr r12,[r0,#240] + mov r7,r0 @ input + add r8,r0,r12,lsl#4 + mov r11,r1 @ ouput + add r10,r1,r12,lsl#4 + str r12,[r1,#240] + +.Linv: ldr r0,[r7],#16 + ldr r1,[r7,#-12] + ldr r2,[r7,#-8] + ldr r3,[r7,#-4] + ldr r4,[r8],#-16 + ldr r5,[r8,#16+4] + ldr r6,[r8,#16+8] + ldr r9,[r8,#16+12] + str r0,[r10],#-16 + str r1,[r10,#16+4] + str r2,[r10,#16+8] + str r3,[r10,#16+12] + str r4,[r11],#16 + str r5,[r11,#-12] + str r6,[r11,#-8] + str r9,[r11,#-4] + teq r7,r8 + bne .Linv + + ldr r0,[r7] + ldr r1,[r7,#4] + ldr r2,[r7,#8] + ldr r3,[r7,#12] + str r0,[r11] + str r1,[r11,#4] + str r2,[r11,#8] + str r3,[r11,#12] + sub r11,r11,r12,lsl#3 + ldr r0,[r11,#16]! @ prefetch tp1 + mov r7,#0x80 + mov r8,#0x1b + orr r7,r7,#0x8000 + orr r8,r8,#0x1b00 + orr r7,r7,r7,lsl#16 + orr r8,r8,r8,lsl#16 + sub r12,r12,#1 + mvn r9,r7 + mov r12,r12,lsl#2 @ (rounds-1)*4 + +.Lmix: and r4,r0,r7 + and r1,r0,r9 + sub r4,r4,r4,lsr#7 + and r4,r4,r8 + eor r1,r4,r1,lsl#1 @ tp2 + + and r4,r1,r7 + and r2,r1,r9 + sub r4,r4,r4,lsr#7 + and r4,r4,r8 + eor r2,r4,r2,lsl#1 @ tp4 + + and r4,r2,r7 + and r3,r2,r9 + sub r4,r4,r4,lsr#7 + and r4,r4,r8 + eor r3,r4,r3,lsl#1 @ tp8 + + eor r4,r1,r2 + eor r5,r0,r3 @ tp9 + eor r4,r4,r3 @ tpe + eor r4,r4,r1,ror#24 + eor r4,r4,r5,ror#24 @ ^= ROTATE(tpb=tp9^tp2,8) + eor r4,r4,r2,ror#16 + eor r4,r4,r5,ror#16 @ ^= ROTATE(tpd=tp9^tp4,16) + eor r4,r4,r5,ror#8 @ ^= ROTATE(tp9,24) + + ldr r0,[r11,#4] @ prefetch tp1 + str r4,[r11],#4 + subs r12,r12,#1 + bne .Lmix + + mov r0,#0 +#if __ARM_ARCH__>=5 + ldmia sp!,{r4-r12,pc} +#else + ldmia sp!,{r4-r12,lr} + tst lr,#1 + moveq pc,lr @ be binary compatible with V4, yet + .word 0xe12fff1e @ interoperable with Thumb ISA:-) +#endif +.size AES_set_enc2dec_key,.-AES_set_enc2dec_key + +.type AES_Td,%object +.align 5 +AES_Td: +.word 0x51f4a750, 0x7e416553, 0x1a17a4c3, 0x3a275e96 +.word 0x3bab6bcb, 0x1f9d45f1, 0xacfa58ab, 0x4be30393 +.word 0x2030fa55, 0xad766df6, 0x88cc7691, 0xf5024c25 +.word 0x4fe5d7fc, 0xc52acbd7, 0x26354480, 0xb562a38f +.word 0xdeb15a49, 0x25ba1b67, 0x45ea0e98, 0x5dfec0e1 +.word 0xc32f7502, 0x814cf012, 0x8d4697a3, 0x6bd3f9c6 +.word 0x038f5fe7, 0x15929c95, 0xbf6d7aeb, 0x955259da +.word 0xd4be832d, 0x587421d3, 0x49e06929, 0x8ec9c844 +.word 0x75c2896a, 0xf48e7978, 0x99583e6b, 0x27b971dd +.word 0xbee14fb6, 0xf088ad17, 0xc920ac66, 0x7dce3ab4 +.word 0x63df4a18, 0xe51a3182, 0x97513360, 0x62537f45 +.word 0xb16477e0, 0xbb6bae84, 0xfe81a01c, 0xf9082b94 +.word 0x70486858, 0x8f45fd19, 0x94de6c87, 0x527bf8b7 +.word 0xab73d323, 0x724b02e2, 0xe31f8f57, 0x6655ab2a +.word 0xb2eb2807, 0x2fb5c203, 0x86c57b9a, 0xd33708a5 +.word 0x302887f2, 0x23bfa5b2, 0x02036aba, 0xed16825c +.word 0x8acf1c2b, 0xa779b492, 0xf307f2f0, 0x4e69e2a1 +.word 0x65daf4cd, 0x0605bed5, 0xd134621f, 0xc4a6fe8a +.word 0x342e539d, 0xa2f355a0, 0x058ae132, 0xa4f6eb75 +.word 0x0b83ec39, 0x4060efaa, 0x5e719f06, 0xbd6e1051 +.word 0x3e218af9, 0x96dd063d, 0xdd3e05ae, 0x4de6bd46 +.word 0x91548db5, 0x71c45d05, 0x0406d46f, 0x605015ff +.word 0x1998fb24, 0xd6bde997, 0x894043cc, 0x67d99e77 +.word 0xb0e842bd, 0x07898b88, 0xe7195b38, 0x79c8eedb +.word 0xa17c0a47, 0x7c420fe9, 0xf8841ec9, 0x00000000 +.word 0x09808683, 0x322bed48, 0x1e1170ac, 0x6c5a724e +.word 0xfd0efffb, 0x0f853856, 0x3daed51e, 0x362d3927 +.word 0x0a0fd964, 0x685ca621, 0x9b5b54d1, 0x24362e3a +.word 0x0c0a67b1, 0x9357e70f, 0xb4ee96d2, 0x1b9b919e +.word 0x80c0c54f, 0x61dc20a2, 0x5a774b69, 0x1c121a16 +.word 0xe293ba0a, 0xc0a02ae5, 0x3c22e043, 0x121b171d +.word 0x0e090d0b, 0xf28bc7ad, 0x2db6a8b9, 0x141ea9c8 +.word 0x57f11985, 0xaf75074c, 0xee99ddbb, 0xa37f60fd +.word 0xf701269f, 0x5c72f5bc, 0x44663bc5, 0x5bfb7e34 +.word 0x8b432976, 0xcb23c6dc, 0xb6edfc68, 0xb8e4f163 +.word 0xd731dcca, 0x42638510, 0x13972240, 0x84c61120 +.word 0x854a247d, 0xd2bb3df8, 0xaef93211, 0xc729a16d +.word 0x1d9e2f4b, 0xdcb230f3, 0x0d8652ec, 0x77c1e3d0 +.word 0x2bb3166c, 0xa970b999, 0x119448fa, 0x47e96422 +.word 0xa8fc8cc4, 0xa0f03f1a, 0x567d2cd8, 0x223390ef +.word 0x87494ec7, 0xd938d1c1, 0x8ccaa2fe, 0x98d40b36 +.word 0xa6f581cf, 0xa57ade28, 0xdab78e26, 0x3fadbfa4 +.word 0x2c3a9de4, 0x5078920d, 0x6a5fcc9b, 0x547e4662 +.word 0xf68d13c2, 0x90d8b8e8, 0x2e39f75e, 0x82c3aff5 +.word 0x9f5d80be, 0x69d0937c, 0x6fd52da9, 0xcf2512b3 +.word 0xc8ac993b, 0x10187da7, 0xe89c636e, 0xdb3bbb7b +.word 0xcd267809, 0x6e5918f4, 0xec9ab701, 0x834f9aa8 +.word 0xe6956e65, 0xaaffe67e, 0x21bccf08, 0xef15e8e6 +.word 0xbae79bd9, 0x4a6f36ce, 0xea9f09d4, 0x29b07cd6 +.word 0x31a4b2af, 0x2a3f2331, 0xc6a59430, 0x35a266c0 +.word 0x744ebc37, 0xfc82caa6, 0xe090d0b0, 0x33a7d815 +.word 0xf104984a, 0x41ecdaf7, 0x7fcd500e, 0x1791f62f +.word 0x764dd68d, 0x43efb04d, 0xccaa4d54, 0xe49604df +.word 0x9ed1b5e3, 0x4c6a881b, 0xc12c1fb8, 0x4665517f +.word 0x9d5eea04, 0x018c355d, 0xfa877473, 0xfb0b412e +.word 0xb3671d5a, 0x92dbd252, 0xe9105633, 0x6dd64713 +.word 0x9ad7618c, 0x37a10c7a, 0x59f8148e, 0xeb133c89 +.word 0xcea927ee, 0xb761c935, 0xe11ce5ed, 0x7a47b13c +.word 0x9cd2df59, 0x55f2733f, 0x1814ce79, 0x73c737bf +.word 0x53f7cdea, 0x5ffdaa5b, 0xdf3d6f14, 0x7844db86 +.word 0xcaaff381, 0xb968c43e, 0x3824342c, 0xc2a3405f +.word 0x161dc372, 0xbce2250c, 0x283c498b, 0xff0d9541 +.word 0x39a80171, 0x080cb3de, 0xd8b4e49c, 0x6456c190 +.word 0x7bcb8461, 0xd532b670, 0x486c5c74, 0xd0b85742 +@ Td4[256] +.byte 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38 +.byte 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb +.byte 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87 +.byte 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb +.byte 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d +.byte 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e +.byte 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2 +.byte 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25 +.byte 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16 +.byte 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92 +.byte 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda +.byte 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84 +.byte 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a +.byte 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06 +.byte 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02 +.byte 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b +.byte 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea +.byte 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73 +.byte 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85 +.byte 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e +.byte 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89 +.byte 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b +.byte 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20 +.byte 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4 +.byte 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31 +.byte 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f +.byte 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d +.byte 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef +.byte 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0 +.byte 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61 +.byte 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26 +.byte 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d +.size AES_Td,.-AES_Td + +@ void AES_decrypt(const unsigned char *in, unsigned char *out, +@ const AES_KEY *key) { +.global AES_decrypt +.type AES_decrypt,%function +.align 5 +AES_decrypt: +#if __ARM_ARCH__<7 + sub r3,pc,#8 @ AES_decrypt +#else + adr r3,AES_decrypt +#endif + stmdb sp!,{r1,r4-r12,lr} + mov r12,r0 @ inp + mov r11,r2 + sub r10,r3,#AES_decrypt-AES_Td @ Td +#if __ARM_ARCH__<7 + ldrb r0,[r12,#3] @ load input data in endian-neutral + ldrb r4,[r12,#2] @ manner... + ldrb r5,[r12,#1] + ldrb r6,[r12,#0] + orr r0,r0,r4,lsl#8 + ldrb r1,[r12,#7] + orr r0,r0,r5,lsl#16 + ldrb r4,[r12,#6] + orr r0,r0,r6,lsl#24 + ldrb r5,[r12,#5] + ldrb r6,[r12,#4] + orr r1,r1,r4,lsl#8 + ldrb r2,[r12,#11] + orr r1,r1,r5,lsl#16 + ldrb r4,[r12,#10] + orr r1,r1,r6,lsl#24 + ldrb r5,[r12,#9] + ldrb r6,[r12,#8] + orr r2,r2,r4,lsl#8 + ldrb r3,[r12,#15] + orr r2,r2,r5,lsl#16 + ldrb r4,[r12,#14] + orr r2,r2,r6,lsl#24 + ldrb r5,[r12,#13] + ldrb r6,[r12,#12] + orr r3,r3,r4,lsl#8 + orr r3,r3,r5,lsl#16 + orr r3,r3,r6,lsl#24 +#else + ldr r0,[r12,#0] + ldr r1,[r12,#4] + ldr r2,[r12,#8] + ldr r3,[r12,#12] +#ifdef __ARMEL__ + rev r0,r0 + rev r1,r1 + rev r2,r2 + rev r3,r3 +#endif +#endif + bl _armv4_AES_decrypt + + ldr r12,[sp],#4 @ pop out +#if __ARM_ARCH__>=7 +#ifdef __ARMEL__ + rev r0,r0 + rev r1,r1 + rev r2,r2 + rev r3,r3 +#endif + str r0,[r12,#0] + str r1,[r12,#4] + str r2,[r12,#8] + str r3,[r12,#12] +#else + mov r4,r0,lsr#24 @ write output in endian-neutral + mov r5,r0,lsr#16 @ manner... + mov r6,r0,lsr#8 + strb r4,[r12,#0] + strb r5,[r12,#1] + mov r4,r1,lsr#24 + strb r6,[r12,#2] + mov r5,r1,lsr#16 + strb r0,[r12,#3] + mov r6,r1,lsr#8 + strb r4,[r12,#4] + strb r5,[r12,#5] + mov r4,r2,lsr#24 + strb r6,[r12,#6] + mov r5,r2,lsr#16 + strb r1,[r12,#7] + mov r6,r2,lsr#8 + strb r4,[r12,#8] + strb r5,[r12,#9] + mov r4,r3,lsr#24 + strb r6,[r12,#10] + mov r5,r3,lsr#16 + strb r2,[r12,#11] + mov r6,r3,lsr#8 + strb r4,[r12,#12] + strb r5,[r12,#13] + strb r6,[r12,#14] + strb r3,[r12,#15] +#endif +#if __ARM_ARCH__>=5 + ldmia sp!,{r4-r12,pc} +#else + ldmia sp!,{r4-r12,lr} + tst lr,#1 + moveq pc,lr @ be binary compatible with V4, yet + .word 0xe12fff1e @ interoperable with Thumb ISA:-) +#endif +.size AES_decrypt,.-AES_decrypt + +.type _armv4_AES_decrypt,%function +.align 2 +_armv4_AES_decrypt: + str lr,[sp,#-4]! @ push lr + ldmia r11!,{r4-r7} + eor r0,r0,r4 + ldr r12,[r11,#240-16] + eor r1,r1,r5 + eor r2,r2,r6 + eor r3,r3,r7 + sub r12,r12,#1 + mov lr,#255 + + and r7,lr,r0,lsr#16 + and r8,lr,r0,lsr#8 + and r9,lr,r0 + mov r0,r0,lsr#24 +.Ldec_loop: + ldr r4,[r10,r7,lsl#2] @ Td1[s0>>16] + and r7,lr,r1 @ i0 + ldr r5,[r10,r8,lsl#2] @ Td2[s0>>8] + and r8,lr,r1,lsr#16 + ldr r6,[r10,r9,lsl#2] @ Td3[s0>>0] + and r9,lr,r1,lsr#8 + ldr r0,[r10,r0,lsl#2] @ Td0[s0>>24] + mov r1,r1,lsr#24 + + ldr r7,[r10,r7,lsl#2] @ Td3[s1>>0] + ldr r8,[r10,r8,lsl#2] @ Td1[s1>>16] + ldr r9,[r10,r9,lsl#2] @ Td2[s1>>8] + eor r0,r0,r7,ror#24 + ldr r1,[r10,r1,lsl#2] @ Td0[s1>>24] + and r7,lr,r2,lsr#8 @ i0 + eor r5,r8,r5,ror#8 + and r8,lr,r2 @ i1 + eor r6,r9,r6,ror#8 + and r9,lr,r2,lsr#16 + ldr r7,[r10,r7,lsl#2] @ Td2[s2>>8] + eor r1,r1,r4,ror#8 + ldr r8,[r10,r8,lsl#2] @ Td3[s2>>0] + mov r2,r2,lsr#24 + + ldr r9,[r10,r9,lsl#2] @ Td1[s2>>16] + eor r0,r0,r7,ror#16 + ldr r2,[r10,r2,lsl#2] @ Td0[s2>>24] + and r7,lr,r3,lsr#16 @ i0 + eor r1,r1,r8,ror#24 + and r8,lr,r3,lsr#8 @ i1 + eor r6,r9,r6,ror#8 + and r9,lr,r3 @ i2 + ldr r7,[r10,r7,lsl#2] @ Td1[s3>>16] + eor r2,r2,r5,ror#8 + ldr r8,[r10,r8,lsl#2] @ Td2[s3>>8] + mov r3,r3,lsr#24 + + ldr r9,[r10,r9,lsl#2] @ Td3[s3>>0] + eor r0,r0,r7,ror#8 + ldr r7,[r11],#16 + eor r1,r1,r8,ror#16 + ldr r3,[r10,r3,lsl#2] @ Td0[s3>>24] + eor r2,r2,r9,ror#24 + + ldr r4,[r11,#-12] + eor r0,r0,r7 + ldr r5,[r11,#-8] + eor r3,r3,r6,ror#8 + ldr r6,[r11,#-4] + and r7,lr,r0,lsr#16 + eor r1,r1,r4 + and r8,lr,r0,lsr#8 + eor r2,r2,r5 + and r9,lr,r0 + eor r3,r3,r6 + mov r0,r0,lsr#24 + + subs r12,r12,#1 + bne .Ldec_loop + + add r10,r10,#1024 + + ldr r5,[r10,#0] @ prefetch Td4 + ldr r6,[r10,#32] + ldr r4,[r10,#64] + ldr r5,[r10,#96] + ldr r6,[r10,#128] + ldr r4,[r10,#160] + ldr r5,[r10,#192] + ldr r6,[r10,#224] + + ldrb r0,[r10,r0] @ Td4[s0>>24] + ldrb r4,[r10,r7] @ Td4[s0>>16] + and r7,lr,r1 @ i0 + ldrb r5,[r10,r8] @ Td4[s0>>8] + and r8,lr,r1,lsr#16 + ldrb r6,[r10,r9] @ Td4[s0>>0] + and r9,lr,r1,lsr#8 + + add r1,r10,r1,lsr#24 + ldrb r7,[r10,r7] @ Td4[s1>>0] + ldrb r1,[r1] @ Td4[s1>>24] + ldrb r8,[r10,r8] @ Td4[s1>>16] + eor r0,r7,r0,lsl#24 + ldrb r9,[r10,r9] @ Td4[s1>>8] + eor r1,r4,r1,lsl#8 + and r7,lr,r2,lsr#8 @ i0 + eor r5,r5,r8,lsl#8 + and r8,lr,r2 @ i1 + ldrb r7,[r10,r7] @ Td4[s2>>8] + eor r6,r6,r9,lsl#8 + ldrb r8,[r10,r8] @ Td4[s2>>0] + and r9,lr,r2,lsr#16 + + add r2,r10,r2,lsr#24 + ldrb r2,[r2] @ Td4[s2>>24] + eor r0,r0,r7,lsl#8 + ldrb r9,[r10,r9] @ Td4[s2>>16] + eor r1,r8,r1,lsl#16 + and r7,lr,r3,lsr#16 @ i0 + eor r2,r5,r2,lsl#16 + and r8,lr,r3,lsr#8 @ i1 + ldrb r7,[r10,r7] @ Td4[s3>>16] + eor r6,r6,r9,lsl#16 + ldrb r8,[r10,r8] @ Td4[s3>>8] + and r9,lr,r3 @ i2 + + add r3,r10,r3,lsr#24 + ldrb r9,[r10,r9] @ Td4[s3>>0] + ldrb r3,[r3] @ Td4[s3>>24] + eor r0,r0,r7,lsl#16 + ldr r7,[r11,#0] + eor r1,r1,r8,lsl#8 + ldr r4,[r11,#4] + eor r2,r9,r2,lsl#8 + ldr r5,[r11,#8] + eor r3,r6,r3,lsl#24 + ldr r6,[r11,#12] + + eor r0,r0,r7 + eor r1,r1,r4 + eor r2,r2,r5 + eor r3,r3,r6 + + sub r10,r10,#1024 + ldr pc,[sp],#4 @ pop and return +.size _armv4_AES_decrypt,.-_armv4_AES_decrypt +.asciz "AES for ARMv4, CRYPTOGAMS by " +.align 2 diff --git a/deps/openssl/asm_obsolete/arm-void-gas/aes/aesv8-armx.S b/deps/openssl/asm_obsolete/arm-void-gas/aes/aesv8-armx.S new file mode 100644 index 00000000000000..732ba3d9c88b94 --- /dev/null +++ b/deps/openssl/asm_obsolete/arm-void-gas/aes/aesv8-armx.S @@ -0,0 +1,732 @@ +#include "arm_arch.h" + +#if __ARM_MAX_ARCH__>=7 +.text +.arch armv7-a +.fpu neon +.code 32 +.align 5 +rcon: +.long 0x01,0x01,0x01,0x01 +.long 0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d @ rotate-n-splat +.long 0x1b,0x1b,0x1b,0x1b + +.globl aes_v8_set_encrypt_key +.type aes_v8_set_encrypt_key,%function +.align 5 +aes_v8_set_encrypt_key: +.Lenc_key: + mov r3,#-1 + cmp r0,#0 + beq .Lenc_key_abort + cmp r2,#0 + beq .Lenc_key_abort + mov r3,#-2 + cmp r1,#128 + blt .Lenc_key_abort + cmp r1,#256 + bgt .Lenc_key_abort + tst r1,#0x3f + bne .Lenc_key_abort + + adr r3,rcon + cmp r1,#192 + + veor q0,q0,q0 + vld1.8 {q3},[r0]! + mov r1,#8 @ reuse r1 + vld1.32 {q1,q2},[r3]! + + blt .Loop128 + beq .L192 + b .L256 + +.align 4 +.Loop128: + vtbl.8 d20,{q3},d4 + vtbl.8 d21,{q3},d5 + vext.8 q9,q0,q3,#12 + vst1.32 {q3},[r2]! + .byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0 + subs r1,r1,#1 + + veor q3,q3,q9 + vext.8 q9,q0,q9,#12 + veor q3,q3,q9 + vext.8 q9,q0,q9,#12 + veor q10,q10,q1 + veor q3,q3,q9 + vshl.u8 q1,q1,#1 + veor q3,q3,q10 + bne .Loop128 + + vld1.32 {q1},[r3] + + vtbl.8 d20,{q3},d4 + vtbl.8 d21,{q3},d5 + vext.8 q9,q0,q3,#12 + vst1.32 {q3},[r2]! + .byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0 + + veor q3,q3,q9 + vext.8 q9,q0,q9,#12 + veor q3,q3,q9 + vext.8 q9,q0,q9,#12 + veor q10,q10,q1 + veor q3,q3,q9 + vshl.u8 q1,q1,#1 + veor q3,q3,q10 + + vtbl.8 d20,{q3},d4 + vtbl.8 d21,{q3},d5 + vext.8 q9,q0,q3,#12 + vst1.32 {q3},[r2]! + .byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0 + + veor q3,q3,q9 + vext.8 q9,q0,q9,#12 + veor q3,q3,q9 + vext.8 q9,q0,q9,#12 + veor q10,q10,q1 + veor q3,q3,q9 + veor q3,q3,q10 + vst1.32 {q3},[r2] + add r2,r2,#0x50 + + mov r12,#10 + b .Ldone + +.align 4 +.L192: + vld1.8 {d16},[r0]! + vmov.i8 q10,#8 @ borrow q10 + vst1.32 {q3},[r2]! + vsub.i8 q2,q2,q10 @ adjust the mask + +.Loop192: + vtbl.8 d20,{q8},d4 + vtbl.8 d21,{q8},d5 + vext.8 q9,q0,q3,#12 + vst1.32 {d16},[r2]! + .byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0 + subs r1,r1,#1 + + veor q3,q3,q9 + vext.8 q9,q0,q9,#12 + veor q3,q3,q9 + vext.8 q9,q0,q9,#12 + veor q3,q3,q9 + + vdup.32 q9,d7[1] + veor q9,q9,q8 + veor q10,q10,q1 + vext.8 q8,q0,q8,#12 + vshl.u8 q1,q1,#1 + veor q8,q8,q9 + veor q3,q3,q10 + veor q8,q8,q10 + vst1.32 {q3},[r2]! + bne .Loop192 + + mov r12,#12 + add r2,r2,#0x20 + b .Ldone + +.align 4 +.L256: + vld1.8 {q8},[r0] + mov r1,#7 + mov r12,#14 + vst1.32 {q3},[r2]! + +.Loop256: + vtbl.8 d20,{q8},d4 + vtbl.8 d21,{q8},d5 + vext.8 q9,q0,q3,#12 + vst1.32 {q8},[r2]! + .byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0 + subs r1,r1,#1 + + veor q3,q3,q9 + vext.8 q9,q0,q9,#12 + veor q3,q3,q9 + vext.8 q9,q0,q9,#12 + veor q10,q10,q1 + veor q3,q3,q9 + vshl.u8 q1,q1,#1 + veor q3,q3,q10 + vst1.32 {q3},[r2]! + beq .Ldone + + vdup.32 q10,d7[1] + vext.8 q9,q0,q8,#12 + .byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0 + + veor q8,q8,q9 + vext.8 q9,q0,q9,#12 + veor q8,q8,q9 + vext.8 q9,q0,q9,#12 + veor q8,q8,q9 + + veor q8,q8,q10 + b .Loop256 + +.Ldone: + str r12,[r2] + mov r3,#0 + +.Lenc_key_abort: + mov r0,r3 @ return value + + bx lr +.size aes_v8_set_encrypt_key,.-aes_v8_set_encrypt_key + +.globl aes_v8_set_decrypt_key +.type aes_v8_set_decrypt_key,%function +.align 5 +aes_v8_set_decrypt_key: + stmdb sp!,{r4,lr} + bl .Lenc_key + + cmp r0,#0 + bne .Ldec_key_abort + + sub r2,r2,#240 @ restore original r2 + mov r4,#-16 + add r0,r2,r12,lsl#4 @ end of key schedule + + vld1.32 {q0},[r2] + vld1.32 {q1},[r0] + vst1.32 {q0},[r0],r4 + vst1.32 {q1},[r2]! + +.Loop_imc: + vld1.32 {q0},[r2] + vld1.32 {q1},[r0] + .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 + .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 + vst1.32 {q0},[r0],r4 + vst1.32 {q1},[r2]! + cmp r0,r2 + bhi .Loop_imc + + vld1.32 {q0},[r2] + .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 + vst1.32 {q0},[r0] + + eor r0,r0,r0 @ return value +.Ldec_key_abort: + ldmia sp!,{r4,pc} +.size aes_v8_set_decrypt_key,.-aes_v8_set_decrypt_key +.globl aes_v8_encrypt +.type aes_v8_encrypt,%function +.align 5 +aes_v8_encrypt: + ldr r3,[r2,#240] + vld1.32 {q0},[r2]! + vld1.8 {q2},[r0] + sub r3,r3,#2 + vld1.32 {q1},[r2]! + +.Loop_enc: + .byte 0x00,0x43,0xb0,0xf3 @ aese q2,q0 + vld1.32 {q0},[r2]! + .byte 0x84,0x43,0xb0,0xf3 @ aesmc q2,q2 + subs r3,r3,#2 + .byte 0x02,0x43,0xb0,0xf3 @ aese q2,q1 + vld1.32 {q1},[r2]! + .byte 0x84,0x43,0xb0,0xf3 @ aesmc q2,q2 + bgt .Loop_enc + + .byte 0x00,0x43,0xb0,0xf3 @ aese q2,q0 + vld1.32 {q0},[r2] + .byte 0x84,0x43,0xb0,0xf3 @ aesmc q2,q2 + .byte 0x02,0x43,0xb0,0xf3 @ aese q2,q1 + veor q2,q2,q0 + + vst1.8 {q2},[r1] + bx lr +.size aes_v8_encrypt,.-aes_v8_encrypt +.globl aes_v8_decrypt +.type aes_v8_decrypt,%function +.align 5 +aes_v8_decrypt: + ldr r3,[r2,#240] + vld1.32 {q0},[r2]! + vld1.8 {q2},[r0] + sub r3,r3,#2 + vld1.32 {q1},[r2]! + +.Loop_dec: + .byte 0x40,0x43,0xb0,0xf3 @ aesd q2,q0 + vld1.32 {q0},[r2]! + .byte 0xc4,0x43,0xb0,0xf3 @ aesimc q2,q2 + subs r3,r3,#2 + .byte 0x42,0x43,0xb0,0xf3 @ aesd q2,q1 + vld1.32 {q1},[r2]! + .byte 0xc4,0x43,0xb0,0xf3 @ aesimc q2,q2 + bgt .Loop_dec + + .byte 0x40,0x43,0xb0,0xf3 @ aesd q2,q0 + vld1.32 {q0},[r2] + .byte 0xc4,0x43,0xb0,0xf3 @ aesimc q2,q2 + .byte 0x42,0x43,0xb0,0xf3 @ aesd q2,q1 + veor q2,q2,q0 + + vst1.8 {q2},[r1] + bx lr +.size aes_v8_decrypt,.-aes_v8_decrypt +.globl aes_v8_cbc_encrypt +.type aes_v8_cbc_encrypt,%function +.align 5 +aes_v8_cbc_encrypt: + mov ip,sp + stmdb sp!,{r4-r8,lr} + vstmdb sp!,{d8-d15} @ ABI specification says so + ldmia ip,{r4-r5} @ load remaining args + subs r2,r2,#16 + mov r8,#16 + blo .Lcbc_abort + moveq r8,#0 + + cmp r5,#0 @ en- or decrypting? + ldr r5,[r3,#240] + and r2,r2,#-16 + vld1.8 {q6},[r4] + vld1.8 {q0},[r0],r8 + + vld1.32 {q8-q9},[r3] @ load key schedule... + sub r5,r5,#6 + add r7,r3,r5,lsl#4 @ pointer to last 7 round keys + sub r5,r5,#2 + vld1.32 {q10-q11},[r7]! + vld1.32 {q12-q13},[r7]! + vld1.32 {q14-q15},[r7]! + vld1.32 {q7},[r7] + + add r7,r3,#32 + mov r6,r5 + beq .Lcbc_dec + + cmp r5,#2 + veor q0,q0,q6 + veor q5,q8,q7 + beq .Lcbc_enc128 + +.Loop_cbc_enc: + .byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 + vld1.32 {q8},[r7]! + .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 + subs r6,r6,#2 + .byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9 + vld1.32 {q9},[r7]! + .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 + bgt .Loop_cbc_enc + + .byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 + .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 + subs r2,r2,#16 + .byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9 + .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 + moveq r8,#0 + .byte 0x24,0x03,0xb0,0xf3 @ aese q0,q10 + .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 + add r7,r3,#16 + .byte 0x26,0x03,0xb0,0xf3 @ aese q0,q11 + .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 + vld1.8 {q8},[r0],r8 + .byte 0x28,0x03,0xb0,0xf3 @ aese q0,q12 + .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 + veor q8,q8,q5 + .byte 0x2a,0x03,0xb0,0xf3 @ aese q0,q13 + .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 + vld1.32 {q9},[r7]! @ re-pre-load rndkey[1] + .byte 0x2c,0x03,0xb0,0xf3 @ aese q0,q14 + .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 + .byte 0x2e,0x03,0xb0,0xf3 @ aese q0,q15 + + mov r6,r5 + veor q6,q0,q7 + vst1.8 {q6},[r1]! + bhs .Loop_cbc_enc + + b .Lcbc_done + +.align 5 +.Lcbc_enc128: + vld1.32 {q2-q3},[r7] + .byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 + .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 + b .Lenter_cbc_enc128 +.Loop_cbc_enc128: + .byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 + .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 + vst1.8 {q6},[r1]! +.Lenter_cbc_enc128: + .byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9 + .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 + subs r2,r2,#16 + .byte 0x04,0x03,0xb0,0xf3 @ aese q0,q2 + .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 + moveq r8,#0 + .byte 0x06,0x03,0xb0,0xf3 @ aese q0,q3 + .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 + .byte 0x24,0x03,0xb0,0xf3 @ aese q0,q10 + .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 + .byte 0x26,0x03,0xb0,0xf3 @ aese q0,q11 + .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 + vld1.8 {q8},[r0],r8 + .byte 0x28,0x03,0xb0,0xf3 @ aese q0,q12 + .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 + .byte 0x2a,0x03,0xb0,0xf3 @ aese q0,q13 + .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 + .byte 0x2c,0x03,0xb0,0xf3 @ aese q0,q14 + .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 + veor q8,q8,q5 + .byte 0x2e,0x03,0xb0,0xf3 @ aese q0,q15 + veor q6,q0,q7 + bhs .Loop_cbc_enc128 + + vst1.8 {q6},[r1]! + b .Lcbc_done +.align 5 +.Lcbc_dec: + vld1.8 {q10},[r0]! + subs r2,r2,#32 @ bias + add r6,r5,#2 + vorr q3,q0,q0 + vorr q1,q0,q0 + vorr q11,q10,q10 + blo .Lcbc_dec_tail + + vorr q1,q10,q10 + vld1.8 {q10},[r0]! + vorr q2,q0,q0 + vorr q3,q1,q1 + vorr q11,q10,q10 + +.Loop3x_cbc_dec: + .byte 0x60,0x03,0xb0,0xf3 @ aesd q0,q8 + .byte 0x60,0x23,0xb0,0xf3 @ aesd q1,q8 + .byte 0x60,0x43,0xf0,0xf3 @ aesd q10,q8 + vld1.32 {q8},[r7]! + .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 + .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 + .byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 + subs r6,r6,#2 + .byte 0x62,0x03,0xb0,0xf3 @ aesd q0,q9 + .byte 0x62,0x23,0xb0,0xf3 @ aesd q1,q9 + .byte 0x62,0x43,0xf0,0xf3 @ aesd q10,q9 + vld1.32 {q9},[r7]! + .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 + .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 + .byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 + bgt .Loop3x_cbc_dec + + .byte 0x60,0x03,0xb0,0xf3 @ aesd q0,q8 + .byte 0x60,0x23,0xb0,0xf3 @ aesd q1,q8 + .byte 0x60,0x43,0xf0,0xf3 @ aesd q10,q8 + veor q4,q6,q7 + .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 + .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 + .byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 + veor q5,q2,q7 + .byte 0x62,0x03,0xb0,0xf3 @ aesd q0,q9 + .byte 0x62,0x23,0xb0,0xf3 @ aesd q1,q9 + .byte 0x62,0x43,0xf0,0xf3 @ aesd q10,q9 + veor q9,q3,q7 + subs r2,r2,#0x30 + .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 + .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 + .byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 + vorr q6,q11,q11 + movlo r6,r2 @ r6, r6, is zero at this point + .byte 0x68,0x03,0xb0,0xf3 @ aesd q0,q12 + .byte 0x68,0x23,0xb0,0xf3 @ aesd q1,q12 + .byte 0x68,0x43,0xf0,0xf3 @ aesd q10,q12 + add r0,r0,r6 @ r0 is adjusted in such way that + @ at exit from the loop q1-q10 + @ are loaded with last "words" + .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 + .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 + .byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 + mov r7,r3 + .byte 0x6a,0x03,0xb0,0xf3 @ aesd q0,q13 + .byte 0x6a,0x23,0xb0,0xf3 @ aesd q1,q13 + .byte 0x6a,0x43,0xf0,0xf3 @ aesd q10,q13 + vld1.8 {q2},[r0]! + .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 + .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 + .byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 + vld1.8 {q3},[r0]! + .byte 0x6c,0x03,0xb0,0xf3 @ aesd q0,q14 + .byte 0x6c,0x23,0xb0,0xf3 @ aesd q1,q14 + .byte 0x6c,0x43,0xf0,0xf3 @ aesd q10,q14 + vld1.8 {q11},[r0]! + .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 + .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 + .byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 + vld1.32 {q8},[r7]! @ re-pre-load rndkey[0] + .byte 0x6e,0x03,0xb0,0xf3 @ aesd q0,q15 + .byte 0x6e,0x23,0xb0,0xf3 @ aesd q1,q15 + .byte 0x6e,0x43,0xf0,0xf3 @ aesd q10,q15 + + add r6,r5,#2 + veor q4,q4,q0 + veor q5,q5,q1 + veor q10,q10,q9 + vld1.32 {q9},[r7]! @ re-pre-load rndkey[1] + vorr q0,q2,q2 + vst1.8 {q4},[r1]! + vorr q1,q3,q3 + vst1.8 {q5},[r1]! + vst1.8 {q10},[r1]! + vorr q10,q11,q11 + bhs .Loop3x_cbc_dec + + cmn r2,#0x30 + beq .Lcbc_done + nop + +.Lcbc_dec_tail: + .byte 0x60,0x23,0xb0,0xf3 @ aesd q1,q8 + .byte 0x60,0x43,0xf0,0xf3 @ aesd q10,q8 + vld1.32 {q8},[r7]! + .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 + .byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 + subs r6,r6,#2 + .byte 0x62,0x23,0xb0,0xf3 @ aesd q1,q9 + .byte 0x62,0x43,0xf0,0xf3 @ aesd q10,q9 + vld1.32 {q9},[r7]! + .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 + .byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 + bgt .Lcbc_dec_tail + + .byte 0x60,0x23,0xb0,0xf3 @ aesd q1,q8 + .byte 0x60,0x43,0xf0,0xf3 @ aesd q10,q8 + .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 + .byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 + .byte 0x62,0x23,0xb0,0xf3 @ aesd q1,q9 + .byte 0x62,0x43,0xf0,0xf3 @ aesd q10,q9 + .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 + .byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 + .byte 0x68,0x23,0xb0,0xf3 @ aesd q1,q12 + .byte 0x68,0x43,0xf0,0xf3 @ aesd q10,q12 + .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 + .byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 + cmn r2,#0x20 + .byte 0x6a,0x23,0xb0,0xf3 @ aesd q1,q13 + .byte 0x6a,0x43,0xf0,0xf3 @ aesd q10,q13 + .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 + .byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 + veor q5,q6,q7 + .byte 0x6c,0x23,0xb0,0xf3 @ aesd q1,q14 + .byte 0x6c,0x43,0xf0,0xf3 @ aesd q10,q14 + .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 + .byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 + veor q9,q3,q7 + .byte 0x6e,0x23,0xb0,0xf3 @ aesd q1,q15 + .byte 0x6e,0x43,0xf0,0xf3 @ aesd q10,q15 + beq .Lcbc_dec_one + veor q5,q5,q1 + veor q9,q9,q10 + vorr q6,q11,q11 + vst1.8 {q5},[r1]! + vst1.8 {q9},[r1]! + b .Lcbc_done + +.Lcbc_dec_one: + veor q5,q5,q10 + vorr q6,q11,q11 + vst1.8 {q5},[r1]! + +.Lcbc_done: + vst1.8 {q6},[r4] +.Lcbc_abort: + vldmia sp!,{d8-d15} + ldmia sp!,{r4-r8,pc} +.size aes_v8_cbc_encrypt,.-aes_v8_cbc_encrypt +.globl aes_v8_ctr32_encrypt_blocks +.type aes_v8_ctr32_encrypt_blocks,%function +.align 5 +aes_v8_ctr32_encrypt_blocks: + mov ip,sp + stmdb sp!,{r4-r10,lr} + vstmdb sp!,{d8-d15} @ ABI specification says so + ldr r4, [ip] @ load remaining arg + ldr r5,[r3,#240] + + ldr r8, [r4, #12] + vld1.32 {q0},[r4] + + vld1.32 {q8-q9},[r3] @ load key schedule... + sub r5,r5,#4 + mov r12,#16 + cmp r2,#2 + add r7,r3,r5,lsl#4 @ pointer to last 5 round keys + sub r5,r5,#2 + vld1.32 {q12-q13},[r7]! + vld1.32 {q14-q15},[r7]! + vld1.32 {q7},[r7] + add r7,r3,#32 + mov r6,r5 + movlo r12,#0 +#ifndef __ARMEB__ + rev r8, r8 +#endif + vorr q1,q0,q0 + add r10, r8, #1 + vorr q10,q0,q0 + add r8, r8, #2 + vorr q6,q0,q0 + rev r10, r10 + vmov.32 d3[1],r10 + bls .Lctr32_tail + rev r12, r8 + sub r2,r2,#3 @ bias + vmov.32 d21[1],r12 + b .Loop3x_ctr32 + +.align 4 +.Loop3x_ctr32: + .byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 + .byte 0x20,0x23,0xb0,0xf3 @ aese q1,q8 + .byte 0x20,0x43,0xf0,0xf3 @ aese q10,q8 + vld1.32 {q8},[r7]! + .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 + .byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 + .byte 0xa4,0x43,0xf0,0xf3 @ aesmc q10,q10 + subs r6,r6,#2 + .byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9 + .byte 0x22,0x23,0xb0,0xf3 @ aese q1,q9 + .byte 0x22,0x43,0xf0,0xf3 @ aese q10,q9 + vld1.32 {q9},[r7]! + .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 + .byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 + .byte 0xa4,0x43,0xf0,0xf3 @ aesmc q10,q10 + bgt .Loop3x_ctr32 + + .byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 + .byte 0x20,0x23,0xb0,0xf3 @ aese q1,q8 + .byte 0x20,0x43,0xf0,0xf3 @ aese q10,q8 + mov r7,r3 + .byte 0x80,0x83,0xb0,0xf3 @ aesmc q4,q0 + vld1.8 {q2},[r0]! + .byte 0x82,0xa3,0xb0,0xf3 @ aesmc q5,q1 + .byte 0xa4,0x43,0xf0,0xf3 @ aesmc q10,q10 + vorr q0,q6,q6 + .byte 0x22,0x83,0xb0,0xf3 @ aese q4,q9 + vld1.8 {q3},[r0]! + .byte 0x22,0xa3,0xb0,0xf3 @ aese q5,q9 + .byte 0x22,0x43,0xf0,0xf3 @ aese q10,q9 + vorr q1,q6,q6 + .byte 0x88,0x83,0xb0,0xf3 @ aesmc q4,q4 + vld1.8 {q11},[r0]! + .byte 0x8a,0xa3,0xb0,0xf3 @ aesmc q5,q5 + .byte 0xa4,0x23,0xf0,0xf3 @ aesmc q9,q10 + vorr q10,q6,q6 + add r9,r8,#1 + .byte 0x28,0x83,0xb0,0xf3 @ aese q4,q12 + .byte 0x28,0xa3,0xb0,0xf3 @ aese q5,q12 + .byte 0x28,0x23,0xf0,0xf3 @ aese q9,q12 + veor q2,q2,q7 + add r10,r8,#2 + .byte 0x88,0x83,0xb0,0xf3 @ aesmc q4,q4 + .byte 0x8a,0xa3,0xb0,0xf3 @ aesmc q5,q5 + .byte 0xa2,0x23,0xf0,0xf3 @ aesmc q9,q9 + veor q3,q3,q7 + add r8,r8,#3 + .byte 0x2a,0x83,0xb0,0xf3 @ aese q4,q13 + .byte 0x2a,0xa3,0xb0,0xf3 @ aese q5,q13 + .byte 0x2a,0x23,0xf0,0xf3 @ aese q9,q13 + veor q11,q11,q7 + rev r9,r9 + .byte 0x88,0x83,0xb0,0xf3 @ aesmc q4,q4 + vld1.32 {q8},[r7]! @ re-pre-load rndkey[0] + .byte 0x8a,0xa3,0xb0,0xf3 @ aesmc q5,q5 + .byte 0xa2,0x23,0xf0,0xf3 @ aesmc q9,q9 + vmov.32 d1[1], r9 + rev r10,r10 + .byte 0x2c,0x83,0xb0,0xf3 @ aese q4,q14 + .byte 0x2c,0xa3,0xb0,0xf3 @ aese q5,q14 + .byte 0x2c,0x23,0xf0,0xf3 @ aese q9,q14 + vmov.32 d3[1], r10 + rev r12,r8 + .byte 0x88,0x83,0xb0,0xf3 @ aesmc q4,q4 + .byte 0x8a,0xa3,0xb0,0xf3 @ aesmc q5,q5 + .byte 0xa2,0x23,0xf0,0xf3 @ aesmc q9,q9 + vmov.32 d21[1], r12 + subs r2,r2,#3 + .byte 0x2e,0x83,0xb0,0xf3 @ aese q4,q15 + .byte 0x2e,0xa3,0xb0,0xf3 @ aese q5,q15 + .byte 0x2e,0x23,0xf0,0xf3 @ aese q9,q15 + + mov r6,r5 + veor q2,q2,q4 + veor q3,q3,q5 + veor q11,q11,q9 + vld1.32 {q9},[r7]! @ re-pre-load rndkey[1] + vst1.8 {q2},[r1]! + vst1.8 {q3},[r1]! + vst1.8 {q11},[r1]! + bhs .Loop3x_ctr32 + + adds r2,r2,#3 + beq .Lctr32_done + cmp r2,#1 + mov r12,#16 + moveq r12,#0 + +.Lctr32_tail: + .byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 + .byte 0x20,0x23,0xb0,0xf3 @ aese q1,q8 + vld1.32 {q8},[r7]! + .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 + .byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 + subs r6,r6,#2 + .byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9 + .byte 0x22,0x23,0xb0,0xf3 @ aese q1,q9 + vld1.32 {q9},[r7]! + .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 + .byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 + bgt .Lctr32_tail + + .byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 + .byte 0x20,0x23,0xb0,0xf3 @ aese q1,q8 + .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 + .byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 + .byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9 + .byte 0x22,0x23,0xb0,0xf3 @ aese q1,q9 + .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 + .byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 + vld1.8 {q2},[r0],r12 + .byte 0x28,0x03,0xb0,0xf3 @ aese q0,q12 + .byte 0x28,0x23,0xb0,0xf3 @ aese q1,q12 + vld1.8 {q3},[r0] + .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 + .byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 + .byte 0x2a,0x03,0xb0,0xf3 @ aese q0,q13 + .byte 0x2a,0x23,0xb0,0xf3 @ aese q1,q13 + .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 + .byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 + .byte 0x2c,0x03,0xb0,0xf3 @ aese q0,q14 + .byte 0x2c,0x23,0xb0,0xf3 @ aese q1,q14 + veor q2,q2,q7 + .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 + .byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 + veor q3,q3,q7 + .byte 0x2e,0x03,0xb0,0xf3 @ aese q0,q15 + .byte 0x2e,0x23,0xb0,0xf3 @ aese q1,q15 + + cmp r2,#1 + veor q2,q2,q0 + veor q3,q3,q1 + vst1.8 {q2},[r1]! + beq .Lctr32_done + vst1.8 {q3},[r1] + +.Lctr32_done: + vldmia sp!,{d8-d15} + ldmia sp!,{r4-r10,pc} +.size aes_v8_ctr32_encrypt_blocks,.-aes_v8_ctr32_encrypt_blocks +#endif diff --git a/deps/openssl/asm_obsolete/arm-void-gas/aes/bsaes-armv7.S b/deps/openssl/asm_obsolete/arm-void-gas/aes/bsaes-armv7.S new file mode 100644 index 00000000000000..9738ed50235b3e --- /dev/null +++ b/deps/openssl/asm_obsolete/arm-void-gas/aes/bsaes-armv7.S @@ -0,0 +1,2546 @@ + +@ ==================================================================== +@ Written by Andy Polyakov for the OpenSSL +@ project. The module is, however, dual licensed under OpenSSL and +@ CRYPTOGAMS licenses depending on where you obtain it. For further +@ details see http://www.openssl.org/~appro/cryptogams/. +@ +@ Specific modes and adaptation for Linux kernel by Ard Biesheuvel +@ . Permission to use under GPL terms is +@ granted. +@ ==================================================================== + +@ Bit-sliced AES for ARM NEON +@ +@ February 2012. +@ +@ This implementation is direct adaptation of bsaes-x86_64 module for +@ ARM NEON. Except that this module is endian-neutral [in sense that +@ it can be compiled for either endianness] by courtesy of vld1.8's +@ neutrality. Initial version doesn't implement interface to OpenSSL, +@ only low-level primitives and unsupported entry points, just enough +@ to collect performance results, which for Cortex-A8 core are: +@ +@ encrypt 19.5 cycles per byte processed with 128-bit key +@ decrypt 22.1 cycles per byte processed with 128-bit key +@ key conv. 440 cycles per 128-bit key/0.18 of 8x block +@ +@ Snapdragon S4 encrypts byte in 17.6 cycles and decrypts in 19.7, +@ which is [much] worse than anticipated (for further details see +@ http://www.openssl.org/~appro/Snapdragon-S4.html). +@ +@ Cortex-A15 manages in 14.2/16.1 cycles [when integer-only code +@ manages in 20.0 cycles]. +@ +@ When comparing to x86_64 results keep in mind that NEON unit is +@ [mostly] single-issue and thus can't [fully] benefit from +@ instruction-level parallelism. And when comparing to aes-armv4 +@ results keep in mind key schedule conversion overhead (see +@ bsaes-x86_64.pl for further details)... +@ +@ + +@ April-August 2013 +@ +@ Add CBC, CTR and XTS subroutines, adapt for kernel use. +@ +@ + +#ifndef __KERNEL__ +# include "arm_arch.h" + +# define VFP_ABI_PUSH vstmdb sp!,{d8-d15} +# define VFP_ABI_POP vldmia sp!,{d8-d15} +# define VFP_ABI_FRAME 0x40 +#else +# define VFP_ABI_PUSH +# define VFP_ABI_POP +# define VFP_ABI_FRAME 0 +# define BSAES_ASM_EXTENDED_KEY +# define XTS_CHAIN_TWEAK +# define __ARM_ARCH__ __LINUX_ARM_ARCH__ +# define __ARM_MAX_ARCH__ __LINUX_ARM_ARCH__ +#endif + +#ifdef __thumb__ +# define adrl adr +#endif + +#if __ARM_MAX_ARCH__>=7 +.arch armv7-a +.fpu neon + +.text +.syntax unified @ ARMv7-capable assembler is expected to handle this +#ifdef __thumb2__ +.thumb +#else +.code 32 +#endif + +.type _bsaes_decrypt8,%function +.align 4 +_bsaes_decrypt8: + adr r6,_bsaes_decrypt8 + vldmia r4!, {q9} @ round 0 key + add r6,r6,#.LM0ISR-_bsaes_decrypt8 + + vldmia r6!, {q8} @ .LM0ISR + veor q10, q0, q9 @ xor with round0 key + veor q11, q1, q9 + vtbl.8 d0, {q10}, d16 + vtbl.8 d1, {q10}, d17 + veor q12, q2, q9 + vtbl.8 d2, {q11}, d16 + vtbl.8 d3, {q11}, d17 + veor q13, q3, q9 + vtbl.8 d4, {q12}, d16 + vtbl.8 d5, {q12}, d17 + veor q14, q4, q9 + vtbl.8 d6, {q13}, d16 + vtbl.8 d7, {q13}, d17 + veor q15, q5, q9 + vtbl.8 d8, {q14}, d16 + vtbl.8 d9, {q14}, d17 + veor q10, q6, q9 + vtbl.8 d10, {q15}, d16 + vtbl.8 d11, {q15}, d17 + veor q11, q7, q9 + vtbl.8 d12, {q10}, d16 + vtbl.8 d13, {q10}, d17 + vtbl.8 d14, {q11}, d16 + vtbl.8 d15, {q11}, d17 + vmov.i8 q8,#0x55 @ compose .LBS0 + vmov.i8 q9,#0x33 @ compose .LBS1 + vshr.u64 q10, q6, #1 + vshr.u64 q11, q4, #1 + veor q10, q10, q7 + veor q11, q11, q5 + vand q10, q10, q8 + vand q11, q11, q8 + veor q7, q7, q10 + vshl.u64 q10, q10, #1 + veor q5, q5, q11 + vshl.u64 q11, q11, #1 + veor q6, q6, q10 + veor q4, q4, q11 + vshr.u64 q10, q2, #1 + vshr.u64 q11, q0, #1 + veor q10, q10, q3 + veor q11, q11, q1 + vand q10, q10, q8 + vand q11, q11, q8 + veor q3, q3, q10 + vshl.u64 q10, q10, #1 + veor q1, q1, q11 + vshl.u64 q11, q11, #1 + veor q2, q2, q10 + veor q0, q0, q11 + vmov.i8 q8,#0x0f @ compose .LBS2 + vshr.u64 q10, q5, #2 + vshr.u64 q11, q4, #2 + veor q10, q10, q7 + veor q11, q11, q6 + vand q10, q10, q9 + vand q11, q11, q9 + veor q7, q7, q10 + vshl.u64 q10, q10, #2 + veor q6, q6, q11 + vshl.u64 q11, q11, #2 + veor q5, q5, q10 + veor q4, q4, q11 + vshr.u64 q10, q1, #2 + vshr.u64 q11, q0, #2 + veor q10, q10, q3 + veor q11, q11, q2 + vand q10, q10, q9 + vand q11, q11, q9 + veor q3, q3, q10 + vshl.u64 q10, q10, #2 + veor q2, q2, q11 + vshl.u64 q11, q11, #2 + veor q1, q1, q10 + veor q0, q0, q11 + vshr.u64 q10, q3, #4 + vshr.u64 q11, q2, #4 + veor q10, q10, q7 + veor q11, q11, q6 + vand q10, q10, q8 + vand q11, q11, q8 + veor q7, q7, q10 + vshl.u64 q10, q10, #4 + veor q6, q6, q11 + vshl.u64 q11, q11, #4 + veor q3, q3, q10 + veor q2, q2, q11 + vshr.u64 q10, q1, #4 + vshr.u64 q11, q0, #4 + veor q10, q10, q5 + veor q11, q11, q4 + vand q10, q10, q8 + vand q11, q11, q8 + veor q5, q5, q10 + vshl.u64 q10, q10, #4 + veor q4, q4, q11 + vshl.u64 q11, q11, #4 + veor q1, q1, q10 + veor q0, q0, q11 + sub r5,r5,#1 + b .Ldec_sbox +.align 4 +.Ldec_loop: + vldmia r4!, {q8-q11} + veor q8, q8, q0 + veor q9, q9, q1 + vtbl.8 d0, {q8}, d24 + vtbl.8 d1, {q8}, d25 + vldmia r4!, {q8} + veor q10, q10, q2 + vtbl.8 d2, {q9}, d24 + vtbl.8 d3, {q9}, d25 + vldmia r4!, {q9} + veor q11, q11, q3 + vtbl.8 d4, {q10}, d24 + vtbl.8 d5, {q10}, d25 + vldmia r4!, {q10} + vtbl.8 d6, {q11}, d24 + vtbl.8 d7, {q11}, d25 + vldmia r4!, {q11} + veor q8, q8, q4 + veor q9, q9, q5 + vtbl.8 d8, {q8}, d24 + vtbl.8 d9, {q8}, d25 + veor q10, q10, q6 + vtbl.8 d10, {q9}, d24 + vtbl.8 d11, {q9}, d25 + veor q11, q11, q7 + vtbl.8 d12, {q10}, d24 + vtbl.8 d13, {q10}, d25 + vtbl.8 d14, {q11}, d24 + vtbl.8 d15, {q11}, d25 +.Ldec_sbox: + veor q1, q1, q4 + veor q3, q3, q4 + + veor q4, q4, q7 + veor q1, q1, q6 + veor q2, q2, q7 + veor q6, q6, q4 + + veor q0, q0, q1 + veor q2, q2, q5 + veor q7, q7, q6 + veor q3, q3, q0 + veor q5, q5, q0 + veor q1, q1, q3 + veor q11, q3, q0 + veor q10, q7, q4 + veor q9, q1, q6 + veor q13, q4, q0 + vmov q8, q10 + veor q12, q5, q2 + + vorr q10, q10, q9 + veor q15, q11, q8 + vand q14, q11, q12 + vorr q11, q11, q12 + veor q12, q12, q9 + vand q8, q8, q9 + veor q9, q6, q2 + vand q15, q15, q12 + vand q13, q13, q9 + veor q9, q3, q7 + veor q12, q1, q5 + veor q11, q11, q13 + veor q10, q10, q13 + vand q13, q9, q12 + vorr q9, q9, q12 + veor q11, q11, q15 + veor q8, q8, q13 + veor q10, q10, q14 + veor q9, q9, q15 + veor q8, q8, q14 + vand q12, q4, q6 + veor q9, q9, q14 + vand q13, q0, q2 + vand q14, q7, q1 + vorr q15, q3, q5 + veor q11, q11, q12 + veor q9, q9, q14 + veor q8, q8, q15 + veor q10, q10, q13 + + @ Inv_GF16 0, 1, 2, 3, s0, s1, s2, s3 + + @ new smaller inversion + + vand q14, q11, q9 + vmov q12, q8 + + veor q13, q10, q14 + veor q15, q8, q14 + veor q14, q8, q14 @ q14=q15 + + vbsl q13, q9, q8 + vbsl q15, q11, q10 + veor q11, q11, q10 + + vbsl q12, q13, q14 + vbsl q8, q14, q13 + + vand q14, q12, q15 + veor q9, q9, q8 + + veor q14, q14, q11 + veor q12, q5, q2 + veor q8, q1, q6 + veor q10, q15, q14 + vand q10, q10, q5 + veor q5, q5, q1 + vand q11, q1, q15 + vand q5, q5, q14 + veor q1, q11, q10 + veor q5, q5, q11 + veor q15, q15, q13 + veor q14, q14, q9 + veor q11, q15, q14 + veor q10, q13, q9 + vand q11, q11, q12 + vand q10, q10, q2 + veor q12, q12, q8 + veor q2, q2, q6 + vand q8, q8, q15 + vand q6, q6, q13 + vand q12, q12, q14 + vand q2, q2, q9 + veor q8, q8, q12 + veor q2, q2, q6 + veor q12, q12, q11 + veor q6, q6, q10 + veor q5, q5, q12 + veor q2, q2, q12 + veor q1, q1, q8 + veor q6, q6, q8 + + veor q12, q3, q0 + veor q8, q7, q4 + veor q11, q15, q14 + veor q10, q13, q9 + vand q11, q11, q12 + vand q10, q10, q0 + veor q12, q12, q8 + veor q0, q0, q4 + vand q8, q8, q15 + vand q4, q4, q13 + vand q12, q12, q14 + vand q0, q0, q9 + veor q8, q8, q12 + veor q0, q0, q4 + veor q12, q12, q11 + veor q4, q4, q10 + veor q15, q15, q13 + veor q14, q14, q9 + veor q10, q15, q14 + vand q10, q10, q3 + veor q3, q3, q7 + vand q11, q7, q15 + vand q3, q3, q14 + veor q7, q11, q10 + veor q3, q3, q11 + veor q3, q3, q12 + veor q0, q0, q12 + veor q7, q7, q8 + veor q4, q4, q8 + veor q1, q1, q7 + veor q6, q6, q5 + + veor q4, q4, q1 + veor q2, q2, q7 + veor q5, q5, q7 + veor q4, q4, q2 + veor q7, q7, q0 + veor q4, q4, q5 + veor q3, q3, q6 + veor q6, q6, q1 + veor q3, q3, q4 + + veor q4, q4, q0 + veor q7, q7, q3 + subs r5,r5,#1 + bcc .Ldec_done + @ multiplication by 0x05-0x00-0x04-0x00 + vext.8 q8, q0, q0, #8 + vext.8 q14, q3, q3, #8 + vext.8 q15, q5, q5, #8 + veor q8, q8, q0 + vext.8 q9, q1, q1, #8 + veor q14, q14, q3 + vext.8 q10, q6, q6, #8 + veor q15, q15, q5 + vext.8 q11, q4, q4, #8 + veor q9, q9, q1 + vext.8 q12, q2, q2, #8 + veor q10, q10, q6 + vext.8 q13, q7, q7, #8 + veor q11, q11, q4 + veor q12, q12, q2 + veor q13, q13, q7 + + veor q0, q0, q14 + veor q1, q1, q14 + veor q6, q6, q8 + veor q2, q2, q10 + veor q4, q4, q9 + veor q1, q1, q15 + veor q6, q6, q15 + veor q2, q2, q14 + veor q7, q7, q11 + veor q4, q4, q14 + veor q3, q3, q12 + veor q2, q2, q15 + veor q7, q7, q15 + veor q5, q5, q13 + vext.8 q8, q0, q0, #12 @ x0 <<< 32 + vext.8 q9, q1, q1, #12 + veor q0, q0, q8 @ x0 ^ (x0 <<< 32) + vext.8 q10, q6, q6, #12 + veor q1, q1, q9 + vext.8 q11, q4, q4, #12 + veor q6, q6, q10 + vext.8 q12, q2, q2, #12 + veor q4, q4, q11 + vext.8 q13, q7, q7, #12 + veor q2, q2, q12 + vext.8 q14, q3, q3, #12 + veor q7, q7, q13 + vext.8 q15, q5, q5, #12 + veor q3, q3, q14 + + veor q9, q9, q0 + veor q5, q5, q15 + vext.8 q0, q0, q0, #8 @ (x0 ^ (x0 <<< 32)) <<< 64) + veor q10, q10, q1 + veor q8, q8, q5 + veor q9, q9, q5 + vext.8 q1, q1, q1, #8 + veor q13, q13, q2 + veor q0, q0, q8 + veor q14, q14, q7 + veor q1, q1, q9 + vext.8 q8, q2, q2, #8 + veor q12, q12, q4 + vext.8 q9, q7, q7, #8 + veor q15, q15, q3 + vext.8 q2, q4, q4, #8 + veor q11, q11, q6 + vext.8 q7, q5, q5, #8 + veor q12, q12, q5 + vext.8 q4, q3, q3, #8 + veor q11, q11, q5 + vext.8 q3, q6, q6, #8 + veor q5, q9, q13 + veor q11, q11, q2 + veor q7, q7, q15 + veor q6, q4, q14 + veor q4, q8, q12 + veor q2, q3, q10 + vmov q3, q11 + @ vmov q5, q9 + vldmia r6, {q12} @ .LISR + ite eq @ Thumb2 thing, sanity check in ARM + addeq r6,r6,#0x10 + bne .Ldec_loop + vldmia r6, {q12} @ .LISRM0 + b .Ldec_loop +.align 4 +.Ldec_done: + vmov.i8 q8,#0x55 @ compose .LBS0 + vmov.i8 q9,#0x33 @ compose .LBS1 + vshr.u64 q10, q3, #1 + vshr.u64 q11, q2, #1 + veor q10, q10, q5 + veor q11, q11, q7 + vand q10, q10, q8 + vand q11, q11, q8 + veor q5, q5, q10 + vshl.u64 q10, q10, #1 + veor q7, q7, q11 + vshl.u64 q11, q11, #1 + veor q3, q3, q10 + veor q2, q2, q11 + vshr.u64 q10, q6, #1 + vshr.u64 q11, q0, #1 + veor q10, q10, q4 + veor q11, q11, q1 + vand q10, q10, q8 + vand q11, q11, q8 + veor q4, q4, q10 + vshl.u64 q10, q10, #1 + veor q1, q1, q11 + vshl.u64 q11, q11, #1 + veor q6, q6, q10 + veor q0, q0, q11 + vmov.i8 q8,#0x0f @ compose .LBS2 + vshr.u64 q10, q7, #2 + vshr.u64 q11, q2, #2 + veor q10, q10, q5 + veor q11, q11, q3 + vand q10, q10, q9 + vand q11, q11, q9 + veor q5, q5, q10 + vshl.u64 q10, q10, #2 + veor q3, q3, q11 + vshl.u64 q11, q11, #2 + veor q7, q7, q10 + veor q2, q2, q11 + vshr.u64 q10, q1, #2 + vshr.u64 q11, q0, #2 + veor q10, q10, q4 + veor q11, q11, q6 + vand q10, q10, q9 + vand q11, q11, q9 + veor q4, q4, q10 + vshl.u64 q10, q10, #2 + veor q6, q6, q11 + vshl.u64 q11, q11, #2 + veor q1, q1, q10 + veor q0, q0, q11 + vshr.u64 q10, q4, #4 + vshr.u64 q11, q6, #4 + veor q10, q10, q5 + veor q11, q11, q3 + vand q10, q10, q8 + vand q11, q11, q8 + veor q5, q5, q10 + vshl.u64 q10, q10, #4 + veor q3, q3, q11 + vshl.u64 q11, q11, #4 + veor q4, q4, q10 + veor q6, q6, q11 + vshr.u64 q10, q1, #4 + vshr.u64 q11, q0, #4 + veor q10, q10, q7 + veor q11, q11, q2 + vand q10, q10, q8 + vand q11, q11, q8 + veor q7, q7, q10 + vshl.u64 q10, q10, #4 + veor q2, q2, q11 + vshl.u64 q11, q11, #4 + veor q1, q1, q10 + veor q0, q0, q11 + vldmia r4, {q8} @ last round key + veor q6, q6, q8 + veor q4, q4, q8 + veor q2, q2, q8 + veor q7, q7, q8 + veor q3, q3, q8 + veor q5, q5, q8 + veor q0, q0, q8 + veor q1, q1, q8 + bx lr +.size _bsaes_decrypt8,.-_bsaes_decrypt8 + +.type _bsaes_const,%object +.align 6 +_bsaes_const: +.LM0ISR: @ InvShiftRows constants + .quad 0x0a0e0206070b0f03, 0x0004080c0d010509 +.LISR: + .quad 0x0504070602010003, 0x0f0e0d0c080b0a09 +.LISRM0: + .quad 0x01040b0e0205080f, 0x0306090c00070a0d +.LM0SR: @ ShiftRows constants + .quad 0x0a0e02060f03070b, 0x0004080c05090d01 +.LSR: + .quad 0x0504070600030201, 0x0f0e0d0c0a09080b +.LSRM0: + .quad 0x0304090e00050a0f, 0x01060b0c0207080d +.LM0: + .quad 0x02060a0e03070b0f, 0x0004080c0105090d +.LREVM0SR: + .quad 0x090d01050c000408, 0x03070b0f060a0e02 +.asciz "Bit-sliced AES for NEON, CRYPTOGAMS by " +.align 6 +.size _bsaes_const,.-_bsaes_const + +.type _bsaes_encrypt8,%function +.align 4 +_bsaes_encrypt8: + adr r6,_bsaes_encrypt8 + vldmia r4!, {q9} @ round 0 key + sub r6,r6,#_bsaes_encrypt8-.LM0SR + + vldmia r6!, {q8} @ .LM0SR +_bsaes_encrypt8_alt: + veor q10, q0, q9 @ xor with round0 key + veor q11, q1, q9 + vtbl.8 d0, {q10}, d16 + vtbl.8 d1, {q10}, d17 + veor q12, q2, q9 + vtbl.8 d2, {q11}, d16 + vtbl.8 d3, {q11}, d17 + veor q13, q3, q9 + vtbl.8 d4, {q12}, d16 + vtbl.8 d5, {q12}, d17 + veor q14, q4, q9 + vtbl.8 d6, {q13}, d16 + vtbl.8 d7, {q13}, d17 + veor q15, q5, q9 + vtbl.8 d8, {q14}, d16 + vtbl.8 d9, {q14}, d17 + veor q10, q6, q9 + vtbl.8 d10, {q15}, d16 + vtbl.8 d11, {q15}, d17 + veor q11, q7, q9 + vtbl.8 d12, {q10}, d16 + vtbl.8 d13, {q10}, d17 + vtbl.8 d14, {q11}, d16 + vtbl.8 d15, {q11}, d17 +_bsaes_encrypt8_bitslice: + vmov.i8 q8,#0x55 @ compose .LBS0 + vmov.i8 q9,#0x33 @ compose .LBS1 + vshr.u64 q10, q6, #1 + vshr.u64 q11, q4, #1 + veor q10, q10, q7 + veor q11, q11, q5 + vand q10, q10, q8 + vand q11, q11, q8 + veor q7, q7, q10 + vshl.u64 q10, q10, #1 + veor q5, q5, q11 + vshl.u64 q11, q11, #1 + veor q6, q6, q10 + veor q4, q4, q11 + vshr.u64 q10, q2, #1 + vshr.u64 q11, q0, #1 + veor q10, q10, q3 + veor q11, q11, q1 + vand q10, q10, q8 + vand q11, q11, q8 + veor q3, q3, q10 + vshl.u64 q10, q10, #1 + veor q1, q1, q11 + vshl.u64 q11, q11, #1 + veor q2, q2, q10 + veor q0, q0, q11 + vmov.i8 q8,#0x0f @ compose .LBS2 + vshr.u64 q10, q5, #2 + vshr.u64 q11, q4, #2 + veor q10, q10, q7 + veor q11, q11, q6 + vand q10, q10, q9 + vand q11, q11, q9 + veor q7, q7, q10 + vshl.u64 q10, q10, #2 + veor q6, q6, q11 + vshl.u64 q11, q11, #2 + veor q5, q5, q10 + veor q4, q4, q11 + vshr.u64 q10, q1, #2 + vshr.u64 q11, q0, #2 + veor q10, q10, q3 + veor q11, q11, q2 + vand q10, q10, q9 + vand q11, q11, q9 + veor q3, q3, q10 + vshl.u64 q10, q10, #2 + veor q2, q2, q11 + vshl.u64 q11, q11, #2 + veor q1, q1, q10 + veor q0, q0, q11 + vshr.u64 q10, q3, #4 + vshr.u64 q11, q2, #4 + veor q10, q10, q7 + veor q11, q11, q6 + vand q10, q10, q8 + vand q11, q11, q8 + veor q7, q7, q10 + vshl.u64 q10, q10, #4 + veor q6, q6, q11 + vshl.u64 q11, q11, #4 + veor q3, q3, q10 + veor q2, q2, q11 + vshr.u64 q10, q1, #4 + vshr.u64 q11, q0, #4 + veor q10, q10, q5 + veor q11, q11, q4 + vand q10, q10, q8 + vand q11, q11, q8 + veor q5, q5, q10 + vshl.u64 q10, q10, #4 + veor q4, q4, q11 + vshl.u64 q11, q11, #4 + veor q1, q1, q10 + veor q0, q0, q11 + sub r5,r5,#1 + b .Lenc_sbox +.align 4 +.Lenc_loop: + vldmia r4!, {q8-q11} + veor q8, q8, q0 + veor q9, q9, q1 + vtbl.8 d0, {q8}, d24 + vtbl.8 d1, {q8}, d25 + vldmia r4!, {q8} + veor q10, q10, q2 + vtbl.8 d2, {q9}, d24 + vtbl.8 d3, {q9}, d25 + vldmia r4!, {q9} + veor q11, q11, q3 + vtbl.8 d4, {q10}, d24 + vtbl.8 d5, {q10}, d25 + vldmia r4!, {q10} + vtbl.8 d6, {q11}, d24 + vtbl.8 d7, {q11}, d25 + vldmia r4!, {q11} + veor q8, q8, q4 + veor q9, q9, q5 + vtbl.8 d8, {q8}, d24 + vtbl.8 d9, {q8}, d25 + veor q10, q10, q6 + vtbl.8 d10, {q9}, d24 + vtbl.8 d11, {q9}, d25 + veor q11, q11, q7 + vtbl.8 d12, {q10}, d24 + vtbl.8 d13, {q10}, d25 + vtbl.8 d14, {q11}, d24 + vtbl.8 d15, {q11}, d25 +.Lenc_sbox: + veor q2, q2, q1 + veor q5, q5, q6 + veor q3, q3, q0 + veor q6, q6, q2 + veor q5, q5, q0 + + veor q6, q6, q3 + veor q3, q3, q7 + veor q7, q7, q5 + veor q3, q3, q4 + veor q4, q4, q5 + + veor q2, q2, q7 + veor q3, q3, q1 + veor q1, q1, q5 + veor q11, q7, q4 + veor q10, q1, q2 + veor q9, q5, q3 + veor q13, q2, q4 + vmov q8, q10 + veor q12, q6, q0 + + vorr q10, q10, q9 + veor q15, q11, q8 + vand q14, q11, q12 + vorr q11, q11, q12 + veor q12, q12, q9 + vand q8, q8, q9 + veor q9, q3, q0 + vand q15, q15, q12 + vand q13, q13, q9 + veor q9, q7, q1 + veor q12, q5, q6 + veor q11, q11, q13 + veor q10, q10, q13 + vand q13, q9, q12 + vorr q9, q9, q12 + veor q11, q11, q15 + veor q8, q8, q13 + veor q10, q10, q14 + veor q9, q9, q15 + veor q8, q8, q14 + vand q12, q2, q3 + veor q9, q9, q14 + vand q13, q4, q0 + vand q14, q1, q5 + vorr q15, q7, q6 + veor q11, q11, q12 + veor q9, q9, q14 + veor q8, q8, q15 + veor q10, q10, q13 + + @ Inv_GF16 0, 1, 2, 3, s0, s1, s2, s3 + + @ new smaller inversion + + vand q14, q11, q9 + vmov q12, q8 + + veor q13, q10, q14 + veor q15, q8, q14 + veor q14, q8, q14 @ q14=q15 + + vbsl q13, q9, q8 + vbsl q15, q11, q10 + veor q11, q11, q10 + + vbsl q12, q13, q14 + vbsl q8, q14, q13 + + vand q14, q12, q15 + veor q9, q9, q8 + + veor q14, q14, q11 + veor q12, q6, q0 + veor q8, q5, q3 + veor q10, q15, q14 + vand q10, q10, q6 + veor q6, q6, q5 + vand q11, q5, q15 + vand q6, q6, q14 + veor q5, q11, q10 + veor q6, q6, q11 + veor q15, q15, q13 + veor q14, q14, q9 + veor q11, q15, q14 + veor q10, q13, q9 + vand q11, q11, q12 + vand q10, q10, q0 + veor q12, q12, q8 + veor q0, q0, q3 + vand q8, q8, q15 + vand q3, q3, q13 + vand q12, q12, q14 + vand q0, q0, q9 + veor q8, q8, q12 + veor q0, q0, q3 + veor q12, q12, q11 + veor q3, q3, q10 + veor q6, q6, q12 + veor q0, q0, q12 + veor q5, q5, q8 + veor q3, q3, q8 + + veor q12, q7, q4 + veor q8, q1, q2 + veor q11, q15, q14 + veor q10, q13, q9 + vand q11, q11, q12 + vand q10, q10, q4 + veor q12, q12, q8 + veor q4, q4, q2 + vand q8, q8, q15 + vand q2, q2, q13 + vand q12, q12, q14 + vand q4, q4, q9 + veor q8, q8, q12 + veor q4, q4, q2 + veor q12, q12, q11 + veor q2, q2, q10 + veor q15, q15, q13 + veor q14, q14, q9 + veor q10, q15, q14 + vand q10, q10, q7 + veor q7, q7, q1 + vand q11, q1, q15 + vand q7, q7, q14 + veor q1, q11, q10 + veor q7, q7, q11 + veor q7, q7, q12 + veor q4, q4, q12 + veor q1, q1, q8 + veor q2, q2, q8 + veor q7, q7, q0 + veor q1, q1, q6 + veor q6, q6, q0 + veor q4, q4, q7 + veor q0, q0, q1 + + veor q1, q1, q5 + veor q5, q5, q2 + veor q2, q2, q3 + veor q3, q3, q5 + veor q4, q4, q5 + + veor q6, q6, q3 + subs r5,r5,#1 + bcc .Lenc_done + vext.8 q8, q0, q0, #12 @ x0 <<< 32 + vext.8 q9, q1, q1, #12 + veor q0, q0, q8 @ x0 ^ (x0 <<< 32) + vext.8 q10, q4, q4, #12 + veor q1, q1, q9 + vext.8 q11, q6, q6, #12 + veor q4, q4, q10 + vext.8 q12, q3, q3, #12 + veor q6, q6, q11 + vext.8 q13, q7, q7, #12 + veor q3, q3, q12 + vext.8 q14, q2, q2, #12 + veor q7, q7, q13 + vext.8 q15, q5, q5, #12 + veor q2, q2, q14 + + veor q9, q9, q0 + veor q5, q5, q15 + vext.8 q0, q0, q0, #8 @ (x0 ^ (x0 <<< 32)) <<< 64) + veor q10, q10, q1 + veor q8, q8, q5 + veor q9, q9, q5 + vext.8 q1, q1, q1, #8 + veor q13, q13, q3 + veor q0, q0, q8 + veor q14, q14, q7 + veor q1, q1, q9 + vext.8 q8, q3, q3, #8 + veor q12, q12, q6 + vext.8 q9, q7, q7, #8 + veor q15, q15, q2 + vext.8 q3, q6, q6, #8 + veor q11, q11, q4 + vext.8 q7, q5, q5, #8 + veor q12, q12, q5 + vext.8 q6, q2, q2, #8 + veor q11, q11, q5 + vext.8 q2, q4, q4, #8 + veor q5, q9, q13 + veor q4, q8, q12 + veor q3, q3, q11 + veor q7, q7, q15 + veor q6, q6, q14 + @ vmov q4, q8 + veor q2, q2, q10 + @ vmov q5, q9 + vldmia r6, {q12} @ .LSR + ite eq @ Thumb2 thing, samity check in ARM + addeq r6,r6,#0x10 + bne .Lenc_loop + vldmia r6, {q12} @ .LSRM0 + b .Lenc_loop +.align 4 +.Lenc_done: + vmov.i8 q8,#0x55 @ compose .LBS0 + vmov.i8 q9,#0x33 @ compose .LBS1 + vshr.u64 q10, q2, #1 + vshr.u64 q11, q3, #1 + veor q10, q10, q5 + veor q11, q11, q7 + vand q10, q10, q8 + vand q11, q11, q8 + veor q5, q5, q10 + vshl.u64 q10, q10, #1 + veor q7, q7, q11 + vshl.u64 q11, q11, #1 + veor q2, q2, q10 + veor q3, q3, q11 + vshr.u64 q10, q4, #1 + vshr.u64 q11, q0, #1 + veor q10, q10, q6 + veor q11, q11, q1 + vand q10, q10, q8 + vand q11, q11, q8 + veor q6, q6, q10 + vshl.u64 q10, q10, #1 + veor q1, q1, q11 + vshl.u64 q11, q11, #1 + veor q4, q4, q10 + veor q0, q0, q11 + vmov.i8 q8,#0x0f @ compose .LBS2 + vshr.u64 q10, q7, #2 + vshr.u64 q11, q3, #2 + veor q10, q10, q5 + veor q11, q11, q2 + vand q10, q10, q9 + vand q11, q11, q9 + veor q5, q5, q10 + vshl.u64 q10, q10, #2 + veor q2, q2, q11 + vshl.u64 q11, q11, #2 + veor q7, q7, q10 + veor q3, q3, q11 + vshr.u64 q10, q1, #2 + vshr.u64 q11, q0, #2 + veor q10, q10, q6 + veor q11, q11, q4 + vand q10, q10, q9 + vand q11, q11, q9 + veor q6, q6, q10 + vshl.u64 q10, q10, #2 + veor q4, q4, q11 + vshl.u64 q11, q11, #2 + veor q1, q1, q10 + veor q0, q0, q11 + vshr.u64 q10, q6, #4 + vshr.u64 q11, q4, #4 + veor q10, q10, q5 + veor q11, q11, q2 + vand q10, q10, q8 + vand q11, q11, q8 + veor q5, q5, q10 + vshl.u64 q10, q10, #4 + veor q2, q2, q11 + vshl.u64 q11, q11, #4 + veor q6, q6, q10 + veor q4, q4, q11 + vshr.u64 q10, q1, #4 + vshr.u64 q11, q0, #4 + veor q10, q10, q7 + veor q11, q11, q3 + vand q10, q10, q8 + vand q11, q11, q8 + veor q7, q7, q10 + vshl.u64 q10, q10, #4 + veor q3, q3, q11 + vshl.u64 q11, q11, #4 + veor q1, q1, q10 + veor q0, q0, q11 + vldmia r4, {q8} @ last round key + veor q4, q4, q8 + veor q6, q6, q8 + veor q3, q3, q8 + veor q7, q7, q8 + veor q2, q2, q8 + veor q5, q5, q8 + veor q0, q0, q8 + veor q1, q1, q8 + bx lr +.size _bsaes_encrypt8,.-_bsaes_encrypt8 +.type _bsaes_key_convert,%function +.align 4 +_bsaes_key_convert: + adr r6,_bsaes_key_convert + vld1.8 {q7}, [r4]! @ load round 0 key + sub r6,r6,#_bsaes_key_convert-.LM0 + vld1.8 {q15}, [r4]! @ load round 1 key + + vmov.i8 q8, #0x01 @ bit masks + vmov.i8 q9, #0x02 + vmov.i8 q10, #0x04 + vmov.i8 q11, #0x08 + vmov.i8 q12, #0x10 + vmov.i8 q13, #0x20 + vldmia r6, {q14} @ .LM0 + +#ifdef __ARMEL__ + vrev32.8 q7, q7 + vrev32.8 q15, q15 +#endif + sub r5,r5,#1 + vstmia r12!, {q7} @ save round 0 key + b .Lkey_loop + +.align 4 +.Lkey_loop: + vtbl.8 d14,{q15},d28 + vtbl.8 d15,{q15},d29 + vmov.i8 q6, #0x40 + vmov.i8 q15, #0x80 + + vtst.8 q0, q7, q8 + vtst.8 q1, q7, q9 + vtst.8 q2, q7, q10 + vtst.8 q3, q7, q11 + vtst.8 q4, q7, q12 + vtst.8 q5, q7, q13 + vtst.8 q6, q7, q6 + vtst.8 q7, q7, q15 + vld1.8 {q15}, [r4]! @ load next round key + vmvn q0, q0 @ "pnot" + vmvn q1, q1 + vmvn q5, q5 + vmvn q6, q6 +#ifdef __ARMEL__ + vrev32.8 q15, q15 +#endif + subs r5,r5,#1 + vstmia r12!,{q0-q7} @ write bit-sliced round key + bne .Lkey_loop + + vmov.i8 q7,#0x63 @ compose .L63 + @ don't save last round key + bx lr +.size _bsaes_key_convert,.-_bsaes_key_convert +.extern AES_cbc_encrypt +.extern AES_decrypt + +.global bsaes_cbc_encrypt +.type bsaes_cbc_encrypt,%function +.align 5 +bsaes_cbc_encrypt: +#ifndef __KERNEL__ + cmp r2, #128 +#ifndef __thumb__ + blo AES_cbc_encrypt +#else + bhs 1f + b AES_cbc_encrypt +1: +#endif +#endif + + @ it is up to the caller to make sure we are called with enc == 0 + + mov ip, sp + stmdb sp!, {r4-r10, lr} + VFP_ABI_PUSH + ldr r8, [ip] @ IV is 1st arg on the stack + mov r2, r2, lsr#4 @ len in 16 byte blocks + sub sp, #0x10 @ scratch space to carry over the IV + mov r9, sp @ save sp + + ldr r10, [r3, #240] @ get # of rounds +#ifndef BSAES_ASM_EXTENDED_KEY + @ allocate the key schedule on the stack + sub r12, sp, r10, lsl#7 @ 128 bytes per inner round key + add r12, #96 @ sifze of bit-slices key schedule + + @ populate the key schedule + mov r4, r3 @ pass key + mov r5, r10 @ pass # of rounds + mov sp, r12 @ sp is sp + bl _bsaes_key_convert + vldmia sp, {q6} + vstmia r12, {q15} @ save last round key + veor q7, q7, q6 @ fix up round 0 key + vstmia sp, {q7} +#else + ldr r12, [r3, #244] + eors r12, #1 + beq 0f + + @ populate the key schedule + str r12, [r3, #244] + mov r4, r3 @ pass key + mov r5, r10 @ pass # of rounds + add r12, r3, #248 @ pass key schedule + bl _bsaes_key_convert + add r4, r3, #248 + vldmia r4, {q6} + vstmia r12, {q15} @ save last round key + veor q7, q7, q6 @ fix up round 0 key + vstmia r4, {q7} + +.align 2 +0: +#endif + + vld1.8 {q15}, [r8] @ load IV + b .Lcbc_dec_loop + +.align 4 +.Lcbc_dec_loop: + subs r2, r2, #0x8 + bmi .Lcbc_dec_loop_finish + + vld1.8 {q0-q1}, [r0]! @ load input + vld1.8 {q2-q3}, [r0]! +#ifndef BSAES_ASM_EXTENDED_KEY + mov r4, sp @ pass the key +#else + add r4, r3, #248 +#endif + vld1.8 {q4-q5}, [r0]! + mov r5, r10 + vld1.8 {q6-q7}, [r0] + sub r0, r0, #0x60 + vstmia r9, {q15} @ put aside IV + + bl _bsaes_decrypt8 + + vldmia r9, {q14} @ reload IV + vld1.8 {q8-q9}, [r0]! @ reload input + veor q0, q0, q14 @ ^= IV + vld1.8 {q10-q11}, [r0]! + veor q1, q1, q8 + veor q6, q6, q9 + vld1.8 {q12-q13}, [r0]! + veor q4, q4, q10 + veor q2, q2, q11 + vld1.8 {q14-q15}, [r0]! + veor q7, q7, q12 + vst1.8 {q0-q1}, [r1]! @ write output + veor q3, q3, q13 + vst1.8 {q6}, [r1]! + veor q5, q5, q14 + vst1.8 {q4}, [r1]! + vst1.8 {q2}, [r1]! + vst1.8 {q7}, [r1]! + vst1.8 {q3}, [r1]! + vst1.8 {q5}, [r1]! + + b .Lcbc_dec_loop + +.Lcbc_dec_loop_finish: + adds r2, r2, #8 + beq .Lcbc_dec_done + + vld1.8 {q0}, [r0]! @ load input + cmp r2, #2 + blo .Lcbc_dec_one + vld1.8 {q1}, [r0]! +#ifndef BSAES_ASM_EXTENDED_KEY + mov r4, sp @ pass the key +#else + add r4, r3, #248 +#endif + mov r5, r10 + vstmia r9, {q15} @ put aside IV + beq .Lcbc_dec_two + vld1.8 {q2}, [r0]! + cmp r2, #4 + blo .Lcbc_dec_three + vld1.8 {q3}, [r0]! + beq .Lcbc_dec_four + vld1.8 {q4}, [r0]! + cmp r2, #6 + blo .Lcbc_dec_five + vld1.8 {q5}, [r0]! + beq .Lcbc_dec_six + vld1.8 {q6}, [r0]! + sub r0, r0, #0x70 + + bl _bsaes_decrypt8 + + vldmia r9, {q14} @ reload IV + vld1.8 {q8-q9}, [r0]! @ reload input + veor q0, q0, q14 @ ^= IV + vld1.8 {q10-q11}, [r0]! + veor q1, q1, q8 + veor q6, q6, q9 + vld1.8 {q12-q13}, [r0]! + veor q4, q4, q10 + veor q2, q2, q11 + vld1.8 {q15}, [r0]! + veor q7, q7, q12 + vst1.8 {q0-q1}, [r1]! @ write output + veor q3, q3, q13 + vst1.8 {q6}, [r1]! + vst1.8 {q4}, [r1]! + vst1.8 {q2}, [r1]! + vst1.8 {q7}, [r1]! + vst1.8 {q3}, [r1]! + b .Lcbc_dec_done +.align 4 +.Lcbc_dec_six: + sub r0, r0, #0x60 + bl _bsaes_decrypt8 + vldmia r9,{q14} @ reload IV + vld1.8 {q8-q9}, [r0]! @ reload input + veor q0, q0, q14 @ ^= IV + vld1.8 {q10-q11}, [r0]! + veor q1, q1, q8 + veor q6, q6, q9 + vld1.8 {q12}, [r0]! + veor q4, q4, q10 + veor q2, q2, q11 + vld1.8 {q15}, [r0]! + veor q7, q7, q12 + vst1.8 {q0-q1}, [r1]! @ write output + vst1.8 {q6}, [r1]! + vst1.8 {q4}, [r1]! + vst1.8 {q2}, [r1]! + vst1.8 {q7}, [r1]! + b .Lcbc_dec_done +.align 4 +.Lcbc_dec_five: + sub r0, r0, #0x50 + bl _bsaes_decrypt8 + vldmia r9, {q14} @ reload IV + vld1.8 {q8-q9}, [r0]! @ reload input + veor q0, q0, q14 @ ^= IV + vld1.8 {q10-q11}, [r0]! + veor q1, q1, q8 + veor q6, q6, q9 + vld1.8 {q15}, [r0]! + veor q4, q4, q10 + vst1.8 {q0-q1}, [r1]! @ write output + veor q2, q2, q11 + vst1.8 {q6}, [r1]! + vst1.8 {q4}, [r1]! + vst1.8 {q2}, [r1]! + b .Lcbc_dec_done +.align 4 +.Lcbc_dec_four: + sub r0, r0, #0x40 + bl _bsaes_decrypt8 + vldmia r9, {q14} @ reload IV + vld1.8 {q8-q9}, [r0]! @ reload input + veor q0, q0, q14 @ ^= IV + vld1.8 {q10}, [r0]! + veor q1, q1, q8 + veor q6, q6, q9 + vld1.8 {q15}, [r0]! + veor q4, q4, q10 + vst1.8 {q0-q1}, [r1]! @ write output + vst1.8 {q6}, [r1]! + vst1.8 {q4}, [r1]! + b .Lcbc_dec_done +.align 4 +.Lcbc_dec_three: + sub r0, r0, #0x30 + bl _bsaes_decrypt8 + vldmia r9, {q14} @ reload IV + vld1.8 {q8-q9}, [r0]! @ reload input + veor q0, q0, q14 @ ^= IV + vld1.8 {q15}, [r0]! + veor q1, q1, q8 + veor q6, q6, q9 + vst1.8 {q0-q1}, [r1]! @ write output + vst1.8 {q6}, [r1]! + b .Lcbc_dec_done +.align 4 +.Lcbc_dec_two: + sub r0, r0, #0x20 + bl _bsaes_decrypt8 + vldmia r9, {q14} @ reload IV + vld1.8 {q8}, [r0]! @ reload input + veor q0, q0, q14 @ ^= IV + vld1.8 {q15}, [r0]! @ reload input + veor q1, q1, q8 + vst1.8 {q0-q1}, [r1]! @ write output + b .Lcbc_dec_done +.align 4 +.Lcbc_dec_one: + sub r0, r0, #0x10 + mov r10, r1 @ save original out pointer + mov r1, r9 @ use the iv scratch space as out buffer + mov r2, r3 + vmov q4,q15 @ just in case ensure that IV + vmov q5,q0 @ and input are preserved + bl AES_decrypt + vld1.8 {q0}, [r9,:64] @ load result + veor q0, q0, q4 @ ^= IV + vmov q15, q5 @ q5 holds input + vst1.8 {q0}, [r10] @ write output + +.Lcbc_dec_done: +#ifndef BSAES_ASM_EXTENDED_KEY + vmov.i32 q0, #0 + vmov.i32 q1, #0 +.Lcbc_dec_bzero: @ wipe key schedule [if any] + vstmia sp!, {q0-q1} + cmp sp, r9 + bne .Lcbc_dec_bzero +#endif + + mov sp, r9 + add sp, #0x10 @ add sp,r9,#0x10 is no good for thumb + vst1.8 {q15}, [r8] @ return IV + VFP_ABI_POP + ldmia sp!, {r4-r10, pc} +.size bsaes_cbc_encrypt,.-bsaes_cbc_encrypt +.extern AES_encrypt +.global bsaes_ctr32_encrypt_blocks +.type bsaes_ctr32_encrypt_blocks,%function +.align 5 +bsaes_ctr32_encrypt_blocks: + cmp r2, #8 @ use plain AES for + blo .Lctr_enc_short @ small sizes + + mov ip, sp + stmdb sp!, {r4-r10, lr} + VFP_ABI_PUSH + ldr r8, [ip] @ ctr is 1st arg on the stack + sub sp, sp, #0x10 @ scratch space to carry over the ctr + mov r9, sp @ save sp + + ldr r10, [r3, #240] @ get # of rounds +#ifndef BSAES_ASM_EXTENDED_KEY + @ allocate the key schedule on the stack + sub r12, sp, r10, lsl#7 @ 128 bytes per inner round key + add r12, #96 @ size of bit-sliced key schedule + + @ populate the key schedule + mov r4, r3 @ pass key + mov r5, r10 @ pass # of rounds + mov sp, r12 @ sp is sp + bl _bsaes_key_convert + veor q7,q7,q15 @ fix up last round key + vstmia r12, {q7} @ save last round key + + vld1.8 {q0}, [r8] @ load counter + add r8, r6, #.LREVM0SR-.LM0 @ borrow r8 + vldmia sp, {q4} @ load round0 key +#else + ldr r12, [r3, #244] + eors r12, #1 + beq 0f + + @ populate the key schedule + str r12, [r3, #244] + mov r4, r3 @ pass key + mov r5, r10 @ pass # of rounds + add r12, r3, #248 @ pass key schedule + bl _bsaes_key_convert + veor q7,q7,q15 @ fix up last round key + vstmia r12, {q7} @ save last round key + +.align 2 +0: add r12, r3, #248 + vld1.8 {q0}, [r8] @ load counter + adrl r8, .LREVM0SR @ borrow r8 + vldmia r12, {q4} @ load round0 key + sub sp, #0x10 @ place for adjusted round0 key +#endif + + vmov.i32 q8,#1 @ compose 1<<96 + veor q9,q9,q9 + vrev32.8 q0,q0 + vext.8 q8,q9,q8,#4 + vrev32.8 q4,q4 + vadd.u32 q9,q8,q8 @ compose 2<<96 + vstmia sp, {q4} @ save adjusted round0 key + b .Lctr_enc_loop + +.align 4 +.Lctr_enc_loop: + vadd.u32 q10, q8, q9 @ compose 3<<96 + vadd.u32 q1, q0, q8 @ +1 + vadd.u32 q2, q0, q9 @ +2 + vadd.u32 q3, q0, q10 @ +3 + vadd.u32 q4, q1, q10 + vadd.u32 q5, q2, q10 + vadd.u32 q6, q3, q10 + vadd.u32 q7, q4, q10 + vadd.u32 q10, q5, q10 @ next counter + + @ Borrow prologue from _bsaes_encrypt8 to use the opportunity + @ to flip byte order in 32-bit counter + + vldmia sp, {q9} @ load round0 key +#ifndef BSAES_ASM_EXTENDED_KEY + add r4, sp, #0x10 @ pass next round key +#else + add r4, r3, #264 +#endif + vldmia r8, {q8} @ .LREVM0SR + mov r5, r10 @ pass rounds + vstmia r9, {q10} @ save next counter + sub r6, r8, #.LREVM0SR-.LSR @ pass constants + + bl _bsaes_encrypt8_alt + + subs r2, r2, #8 + blo .Lctr_enc_loop_done + + vld1.8 {q8-q9}, [r0]! @ load input + vld1.8 {q10-q11}, [r0]! + veor q0, q8 + veor q1, q9 + vld1.8 {q12-q13}, [r0]! + veor q4, q10 + veor q6, q11 + vld1.8 {q14-q15}, [r0]! + veor q3, q12 + vst1.8 {q0-q1}, [r1]! @ write output + veor q7, q13 + veor q2, q14 + vst1.8 {q4}, [r1]! + veor q5, q15 + vst1.8 {q6}, [r1]! + vmov.i32 q8, #1 @ compose 1<<96 + vst1.8 {q3}, [r1]! + veor q9, q9, q9 + vst1.8 {q7}, [r1]! + vext.8 q8, q9, q8, #4 + vst1.8 {q2}, [r1]! + vadd.u32 q9,q8,q8 @ compose 2<<96 + vst1.8 {q5}, [r1]! + vldmia r9, {q0} @ load counter + + bne .Lctr_enc_loop + b .Lctr_enc_done + +.align 4 +.Lctr_enc_loop_done: + add r2, r2, #8 + vld1.8 {q8}, [r0]! @ load input + veor q0, q8 + vst1.8 {q0}, [r1]! @ write output + cmp r2, #2 + blo .Lctr_enc_done + vld1.8 {q9}, [r0]! + veor q1, q9 + vst1.8 {q1}, [r1]! + beq .Lctr_enc_done + vld1.8 {q10}, [r0]! + veor q4, q10 + vst1.8 {q4}, [r1]! + cmp r2, #4 + blo .Lctr_enc_done + vld1.8 {q11}, [r0]! + veor q6, q11 + vst1.8 {q6}, [r1]! + beq .Lctr_enc_done + vld1.8 {q12}, [r0]! + veor q3, q12 + vst1.8 {q3}, [r1]! + cmp r2, #6 + blo .Lctr_enc_done + vld1.8 {q13}, [r0]! + veor q7, q13 + vst1.8 {q7}, [r1]! + beq .Lctr_enc_done + vld1.8 {q14}, [r0] + veor q2, q14 + vst1.8 {q2}, [r1]! + +.Lctr_enc_done: + vmov.i32 q0, #0 + vmov.i32 q1, #0 +#ifndef BSAES_ASM_EXTENDED_KEY +.Lctr_enc_bzero: @ wipe key schedule [if any] + vstmia sp!, {q0-q1} + cmp sp, r9 + bne .Lctr_enc_bzero +#else + vstmia sp, {q0-q1} +#endif + + mov sp, r9 + add sp, #0x10 @ add sp,r9,#0x10 is no good for thumb + VFP_ABI_POP + ldmia sp!, {r4-r10, pc} @ return + +.align 4 +.Lctr_enc_short: + ldr ip, [sp] @ ctr pointer is passed on stack + stmdb sp!, {r4-r8, lr} + + mov r4, r0 @ copy arguments + mov r5, r1 + mov r6, r2 + mov r7, r3 + ldr r8, [ip, #12] @ load counter LSW + vld1.8 {q1}, [ip] @ load whole counter value +#ifdef __ARMEL__ + rev r8, r8 +#endif + sub sp, sp, #0x10 + vst1.8 {q1}, [sp,:64] @ copy counter value + sub sp, sp, #0x10 + +.Lctr_enc_short_loop: + add r0, sp, #0x10 @ input counter value + mov r1, sp @ output on the stack + mov r2, r7 @ key + + bl AES_encrypt + + vld1.8 {q0}, [r4]! @ load input + vld1.8 {q1}, [sp,:64] @ load encrypted counter + add r8, r8, #1 +#ifdef __ARMEL__ + rev r0, r8 + str r0, [sp, #0x1c] @ next counter value +#else + str r8, [sp, #0x1c] @ next counter value +#endif + veor q0,q0,q1 + vst1.8 {q0}, [r5]! @ store output + subs r6, r6, #1 + bne .Lctr_enc_short_loop + + vmov.i32 q0, #0 + vmov.i32 q1, #0 + vstmia sp!, {q0-q1} + + ldmia sp!, {r4-r8, pc} +.size bsaes_ctr32_encrypt_blocks,.-bsaes_ctr32_encrypt_blocks +.globl bsaes_xts_encrypt +.type bsaes_xts_encrypt,%function +.align 4 +bsaes_xts_encrypt: + mov ip, sp + stmdb sp!, {r4-r10, lr} @ 0x20 + VFP_ABI_PUSH + mov r6, sp @ future r3 + + mov r7, r0 + mov r8, r1 + mov r9, r2 + mov r10, r3 + + sub r0, sp, #0x10 @ 0x10 + bic r0, #0xf @ align at 16 bytes + mov sp, r0 + +#ifdef XTS_CHAIN_TWEAK + ldr r0, [ip] @ pointer to input tweak +#else + @ generate initial tweak + ldr r0, [ip, #4] @ iv[] + mov r1, sp + ldr r2, [ip, #0] @ key2 + bl AES_encrypt + mov r0,sp @ pointer to initial tweak +#endif + + ldr r1, [r10, #240] @ get # of rounds + mov r3, r6 +#ifndef BSAES_ASM_EXTENDED_KEY + @ allocate the key schedule on the stack + sub r12, sp, r1, lsl#7 @ 128 bytes per inner round key + @ add r12, #96 @ size of bit-sliced key schedule + sub r12, #48 @ place for tweak[9] + + @ populate the key schedule + mov r4, r10 @ pass key + mov r5, r1 @ pass # of rounds + mov sp, r12 + add r12, #0x90 @ pass key schedule + bl _bsaes_key_convert + veor q7, q7, q15 @ fix up last round key + vstmia r12, {q7} @ save last round key +#else + ldr r12, [r10, #244] + eors r12, #1 + beq 0f + + str r12, [r10, #244] + mov r4, r10 @ pass key + mov r5, r1 @ pass # of rounds + add r12, r10, #248 @ pass key schedule + bl _bsaes_key_convert + veor q7, q7, q15 @ fix up last round key + vstmia r12, {q7} + +.align 2 +0: sub sp, #0x90 @ place for tweak[9] +#endif + + vld1.8 {q8}, [r0] @ initial tweak + adr r2, .Lxts_magic + + subs r9, #0x80 + blo .Lxts_enc_short + b .Lxts_enc_loop + +.align 4 +.Lxts_enc_loop: + vldmia r2, {q5} @ load XTS magic + vshr.s64 q6, q8, #63 + mov r0, sp + vand q6, q6, q5 + vadd.u64 q9, q8, q8 + vst1.64 {q8}, [r0,:128]! + vswp d13,d12 + vshr.s64 q7, q9, #63 + veor q9, q9, q6 + vand q7, q7, q5 + vadd.u64 q10, q9, q9 + vst1.64 {q9}, [r0,:128]! + vswp d15,d14 + vshr.s64 q6, q10, #63 + veor q10, q10, q7 + vand q6, q6, q5 + vld1.8 {q0}, [r7]! + vadd.u64 q11, q10, q10 + vst1.64 {q10}, [r0,:128]! + vswp d13,d12 + vshr.s64 q7, q11, #63 + veor q11, q11, q6 + vand q7, q7, q5 + vld1.8 {q1}, [r7]! + veor q0, q0, q8 + vadd.u64 q12, q11, q11 + vst1.64 {q11}, [r0,:128]! + vswp d15,d14 + vshr.s64 q6, q12, #63 + veor q12, q12, q7 + vand q6, q6, q5 + vld1.8 {q2}, [r7]! + veor q1, q1, q9 + vadd.u64 q13, q12, q12 + vst1.64 {q12}, [r0,:128]! + vswp d13,d12 + vshr.s64 q7, q13, #63 + veor q13, q13, q6 + vand q7, q7, q5 + vld1.8 {q3}, [r7]! + veor q2, q2, q10 + vadd.u64 q14, q13, q13 + vst1.64 {q13}, [r0,:128]! + vswp d15,d14 + vshr.s64 q6, q14, #63 + veor q14, q14, q7 + vand q6, q6, q5 + vld1.8 {q4}, [r7]! + veor q3, q3, q11 + vadd.u64 q15, q14, q14 + vst1.64 {q14}, [r0,:128]! + vswp d13,d12 + vshr.s64 q7, q15, #63 + veor q15, q15, q6 + vand q7, q7, q5 + vld1.8 {q5}, [r7]! + veor q4, q4, q12 + vadd.u64 q8, q15, q15 + vst1.64 {q15}, [r0,:128]! + vswp d15,d14 + veor q8, q8, q7 + vst1.64 {q8}, [r0,:128] @ next round tweak + + vld1.8 {q6-q7}, [r7]! + veor q5, q5, q13 +#ifndef BSAES_ASM_EXTENDED_KEY + add r4, sp, #0x90 @ pass key schedule +#else + add r4, r10, #248 @ pass key schedule +#endif + veor q6, q6, q14 + mov r5, r1 @ pass rounds + veor q7, q7, q15 + mov r0, sp + + bl _bsaes_encrypt8 + + vld1.64 {q8-q9}, [r0,:128]! + vld1.64 {q10-q11}, [r0,:128]! + veor q0, q0, q8 + vld1.64 {q12-q13}, [r0,:128]! + veor q1, q1, q9 + veor q8, q4, q10 + vst1.8 {q0-q1}, [r8]! + veor q9, q6, q11 + vld1.64 {q14-q15}, [r0,:128]! + veor q10, q3, q12 + vst1.8 {q8-q9}, [r8]! + veor q11, q7, q13 + veor q12, q2, q14 + vst1.8 {q10-q11}, [r8]! + veor q13, q5, q15 + vst1.8 {q12-q13}, [r8]! + + vld1.64 {q8}, [r0,:128] @ next round tweak + + subs r9, #0x80 + bpl .Lxts_enc_loop + +.Lxts_enc_short: + adds r9, #0x70 + bmi .Lxts_enc_done + + vldmia r2, {q5} @ load XTS magic + vshr.s64 q7, q8, #63 + mov r0, sp + vand q7, q7, q5 + vadd.u64 q9, q8, q8 + vst1.64 {q8}, [r0,:128]! + vswp d15,d14 + vshr.s64 q6, q9, #63 + veor q9, q9, q7 + vand q6, q6, q5 + vadd.u64 q10, q9, q9 + vst1.64 {q9}, [r0,:128]! + vswp d13,d12 + vshr.s64 q7, q10, #63 + veor q10, q10, q6 + vand q7, q7, q5 + vld1.8 {q0}, [r7]! + subs r9, #0x10 + bmi .Lxts_enc_1 + vadd.u64 q11, q10, q10 + vst1.64 {q10}, [r0,:128]! + vswp d15,d14 + vshr.s64 q6, q11, #63 + veor q11, q11, q7 + vand q6, q6, q5 + vld1.8 {q1}, [r7]! + subs r9, #0x10 + bmi .Lxts_enc_2 + veor q0, q0, q8 + vadd.u64 q12, q11, q11 + vst1.64 {q11}, [r0,:128]! + vswp d13,d12 + vshr.s64 q7, q12, #63 + veor q12, q12, q6 + vand q7, q7, q5 + vld1.8 {q2}, [r7]! + subs r9, #0x10 + bmi .Lxts_enc_3 + veor q1, q1, q9 + vadd.u64 q13, q12, q12 + vst1.64 {q12}, [r0,:128]! + vswp d15,d14 + vshr.s64 q6, q13, #63 + veor q13, q13, q7 + vand q6, q6, q5 + vld1.8 {q3}, [r7]! + subs r9, #0x10 + bmi .Lxts_enc_4 + veor q2, q2, q10 + vadd.u64 q14, q13, q13 + vst1.64 {q13}, [r0,:128]! + vswp d13,d12 + vshr.s64 q7, q14, #63 + veor q14, q14, q6 + vand q7, q7, q5 + vld1.8 {q4}, [r7]! + subs r9, #0x10 + bmi .Lxts_enc_5 + veor q3, q3, q11 + vadd.u64 q15, q14, q14 + vst1.64 {q14}, [r0,:128]! + vswp d15,d14 + vshr.s64 q6, q15, #63 + veor q15, q15, q7 + vand q6, q6, q5 + vld1.8 {q5}, [r7]! + subs r9, #0x10 + bmi .Lxts_enc_6 + veor q4, q4, q12 + sub r9, #0x10 + vst1.64 {q15}, [r0,:128] @ next round tweak + + vld1.8 {q6}, [r7]! + veor q5, q5, q13 +#ifndef BSAES_ASM_EXTENDED_KEY + add r4, sp, #0x90 @ pass key schedule +#else + add r4, r10, #248 @ pass key schedule +#endif + veor q6, q6, q14 + mov r5, r1 @ pass rounds + mov r0, sp + + bl _bsaes_encrypt8 + + vld1.64 {q8-q9}, [r0,:128]! + vld1.64 {q10-q11}, [r0,:128]! + veor q0, q0, q8 + vld1.64 {q12-q13}, [r0,:128]! + veor q1, q1, q9 + veor q8, q4, q10 + vst1.8 {q0-q1}, [r8]! + veor q9, q6, q11 + vld1.64 {q14}, [r0,:128]! + veor q10, q3, q12 + vst1.8 {q8-q9}, [r8]! + veor q11, q7, q13 + veor q12, q2, q14 + vst1.8 {q10-q11}, [r8]! + vst1.8 {q12}, [r8]! + + vld1.64 {q8}, [r0,:128] @ next round tweak + b .Lxts_enc_done +.align 4 +.Lxts_enc_6: + vst1.64 {q14}, [r0,:128] @ next round tweak + + veor q4, q4, q12 +#ifndef BSAES_ASM_EXTENDED_KEY + add r4, sp, #0x90 @ pass key schedule +#else + add r4, r10, #248 @ pass key schedule +#endif + veor q5, q5, q13 + mov r5, r1 @ pass rounds + mov r0, sp + + bl _bsaes_encrypt8 + + vld1.64 {q8-q9}, [r0,:128]! + vld1.64 {q10-q11}, [r0,:128]! + veor q0, q0, q8 + vld1.64 {q12-q13}, [r0,:128]! + veor q1, q1, q9 + veor q8, q4, q10 + vst1.8 {q0-q1}, [r8]! + veor q9, q6, q11 + veor q10, q3, q12 + vst1.8 {q8-q9}, [r8]! + veor q11, q7, q13 + vst1.8 {q10-q11}, [r8]! + + vld1.64 {q8}, [r0,:128] @ next round tweak + b .Lxts_enc_done + +@ put this in range for both ARM and Thumb mode adr instructions +.align 5 +.Lxts_magic: + .quad 1, 0x87 + +.align 5 +.Lxts_enc_5: + vst1.64 {q13}, [r0,:128] @ next round tweak + + veor q3, q3, q11 +#ifndef BSAES_ASM_EXTENDED_KEY + add r4, sp, #0x90 @ pass key schedule +#else + add r4, r10, #248 @ pass key schedule +#endif + veor q4, q4, q12 + mov r5, r1 @ pass rounds + mov r0, sp + + bl _bsaes_encrypt8 + + vld1.64 {q8-q9}, [r0,:128]! + vld1.64 {q10-q11}, [r0,:128]! + veor q0, q0, q8 + vld1.64 {q12}, [r0,:128]! + veor q1, q1, q9 + veor q8, q4, q10 + vst1.8 {q0-q1}, [r8]! + veor q9, q6, q11 + veor q10, q3, q12 + vst1.8 {q8-q9}, [r8]! + vst1.8 {q10}, [r8]! + + vld1.64 {q8}, [r0,:128] @ next round tweak + b .Lxts_enc_done +.align 4 +.Lxts_enc_4: + vst1.64 {q12}, [r0,:128] @ next round tweak + + veor q2, q2, q10 +#ifndef BSAES_ASM_EXTENDED_KEY + add r4, sp, #0x90 @ pass key schedule +#else + add r4, r10, #248 @ pass key schedule +#endif + veor q3, q3, q11 + mov r5, r1 @ pass rounds + mov r0, sp + + bl _bsaes_encrypt8 + + vld1.64 {q8-q9}, [r0,:128]! + vld1.64 {q10-q11}, [r0,:128]! + veor q0, q0, q8 + veor q1, q1, q9 + veor q8, q4, q10 + vst1.8 {q0-q1}, [r8]! + veor q9, q6, q11 + vst1.8 {q8-q9}, [r8]! + + vld1.64 {q8}, [r0,:128] @ next round tweak + b .Lxts_enc_done +.align 4 +.Lxts_enc_3: + vst1.64 {q11}, [r0,:128] @ next round tweak + + veor q1, q1, q9 +#ifndef BSAES_ASM_EXTENDED_KEY + add r4, sp, #0x90 @ pass key schedule +#else + add r4, r10, #248 @ pass key schedule +#endif + veor q2, q2, q10 + mov r5, r1 @ pass rounds + mov r0, sp + + bl _bsaes_encrypt8 + + vld1.64 {q8-q9}, [r0,:128]! + vld1.64 {q10}, [r0,:128]! + veor q0, q0, q8 + veor q1, q1, q9 + veor q8, q4, q10 + vst1.8 {q0-q1}, [r8]! + vst1.8 {q8}, [r8]! + + vld1.64 {q8}, [r0,:128] @ next round tweak + b .Lxts_enc_done +.align 4 +.Lxts_enc_2: + vst1.64 {q10}, [r0,:128] @ next round tweak + + veor q0, q0, q8 +#ifndef BSAES_ASM_EXTENDED_KEY + add r4, sp, #0x90 @ pass key schedule +#else + add r4, r10, #248 @ pass key schedule +#endif + veor q1, q1, q9 + mov r5, r1 @ pass rounds + mov r0, sp + + bl _bsaes_encrypt8 + + vld1.64 {q8-q9}, [r0,:128]! + veor q0, q0, q8 + veor q1, q1, q9 + vst1.8 {q0-q1}, [r8]! + + vld1.64 {q8}, [r0,:128] @ next round tweak + b .Lxts_enc_done +.align 4 +.Lxts_enc_1: + mov r0, sp + veor q0, q8 + mov r1, sp + vst1.8 {q0}, [sp,:128] + mov r2, r10 + mov r4, r3 @ preserve fp + + bl AES_encrypt + + vld1.8 {q0}, [sp,:128] + veor q0, q0, q8 + vst1.8 {q0}, [r8]! + mov r3, r4 + + vmov q8, q9 @ next round tweak + +.Lxts_enc_done: +#ifndef XTS_CHAIN_TWEAK + adds r9, #0x10 + beq .Lxts_enc_ret + sub r6, r8, #0x10 + +.Lxts_enc_steal: + ldrb r0, [r7], #1 + ldrb r1, [r8, #-0x10] + strb r0, [r8, #-0x10] + strb r1, [r8], #1 + + subs r9, #1 + bhi .Lxts_enc_steal + + vld1.8 {q0}, [r6] + mov r0, sp + veor q0, q0, q8 + mov r1, sp + vst1.8 {q0}, [sp,:128] + mov r2, r10 + mov r4, r3 @ preserve fp + + bl AES_encrypt + + vld1.8 {q0}, [sp,:128] + veor q0, q0, q8 + vst1.8 {q0}, [r6] + mov r3, r4 +#endif + +.Lxts_enc_ret: + bic r0, r3, #0xf + vmov.i32 q0, #0 + vmov.i32 q1, #0 +#ifdef XTS_CHAIN_TWEAK + ldr r1, [r3, #0x20+VFP_ABI_FRAME] @ chain tweak +#endif +.Lxts_enc_bzero: @ wipe key schedule [if any] + vstmia sp!, {q0-q1} + cmp sp, r0 + bne .Lxts_enc_bzero + + mov sp, r3 +#ifdef XTS_CHAIN_TWEAK + vst1.8 {q8}, [r1] +#endif + VFP_ABI_POP + ldmia sp!, {r4-r10, pc} @ return + +.size bsaes_xts_encrypt,.-bsaes_xts_encrypt + +.globl bsaes_xts_decrypt +.type bsaes_xts_decrypt,%function +.align 4 +bsaes_xts_decrypt: + mov ip, sp + stmdb sp!, {r4-r10, lr} @ 0x20 + VFP_ABI_PUSH + mov r6, sp @ future r3 + + mov r7, r0 + mov r8, r1 + mov r9, r2 + mov r10, r3 + + sub r0, sp, #0x10 @ 0x10 + bic r0, #0xf @ align at 16 bytes + mov sp, r0 + +#ifdef XTS_CHAIN_TWEAK + ldr r0, [ip] @ pointer to input tweak +#else + @ generate initial tweak + ldr r0, [ip, #4] @ iv[] + mov r1, sp + ldr r2, [ip, #0] @ key2 + bl AES_encrypt + mov r0, sp @ pointer to initial tweak +#endif + + ldr r1, [r10, #240] @ get # of rounds + mov r3, r6 +#ifndef BSAES_ASM_EXTENDED_KEY + @ allocate the key schedule on the stack + sub r12, sp, r1, lsl#7 @ 128 bytes per inner round key + @ add r12, #96 @ size of bit-sliced key schedule + sub r12, #48 @ place for tweak[9] + + @ populate the key schedule + mov r4, r10 @ pass key + mov r5, r1 @ pass # of rounds + mov sp, r12 + add r12, #0x90 @ pass key schedule + bl _bsaes_key_convert + add r4, sp, #0x90 + vldmia r4, {q6} + vstmia r12, {q15} @ save last round key + veor q7, q7, q6 @ fix up round 0 key + vstmia r4, {q7} +#else + ldr r12, [r10, #244] + eors r12, #1 + beq 0f + + str r12, [r10, #244] + mov r4, r10 @ pass key + mov r5, r1 @ pass # of rounds + add r12, r10, #248 @ pass key schedule + bl _bsaes_key_convert + add r4, r10, #248 + vldmia r4, {q6} + vstmia r12, {q15} @ save last round key + veor q7, q7, q6 @ fix up round 0 key + vstmia r4, {q7} + +.align 2 +0: sub sp, #0x90 @ place for tweak[9] +#endif + vld1.8 {q8}, [r0] @ initial tweak + adr r2, .Lxts_magic + + tst r9, #0xf @ if not multiple of 16 + it ne @ Thumb2 thing, sanity check in ARM + subne r9, #0x10 @ subtract another 16 bytes + subs r9, #0x80 + + blo .Lxts_dec_short + b .Lxts_dec_loop + +.align 4 +.Lxts_dec_loop: + vldmia r2, {q5} @ load XTS magic + vshr.s64 q6, q8, #63 + mov r0, sp + vand q6, q6, q5 + vadd.u64 q9, q8, q8 + vst1.64 {q8}, [r0,:128]! + vswp d13,d12 + vshr.s64 q7, q9, #63 + veor q9, q9, q6 + vand q7, q7, q5 + vadd.u64 q10, q9, q9 + vst1.64 {q9}, [r0,:128]! + vswp d15,d14 + vshr.s64 q6, q10, #63 + veor q10, q10, q7 + vand q6, q6, q5 + vld1.8 {q0}, [r7]! + vadd.u64 q11, q10, q10 + vst1.64 {q10}, [r0,:128]! + vswp d13,d12 + vshr.s64 q7, q11, #63 + veor q11, q11, q6 + vand q7, q7, q5 + vld1.8 {q1}, [r7]! + veor q0, q0, q8 + vadd.u64 q12, q11, q11 + vst1.64 {q11}, [r0,:128]! + vswp d15,d14 + vshr.s64 q6, q12, #63 + veor q12, q12, q7 + vand q6, q6, q5 + vld1.8 {q2}, [r7]! + veor q1, q1, q9 + vadd.u64 q13, q12, q12 + vst1.64 {q12}, [r0,:128]! + vswp d13,d12 + vshr.s64 q7, q13, #63 + veor q13, q13, q6 + vand q7, q7, q5 + vld1.8 {q3}, [r7]! + veor q2, q2, q10 + vadd.u64 q14, q13, q13 + vst1.64 {q13}, [r0,:128]! + vswp d15,d14 + vshr.s64 q6, q14, #63 + veor q14, q14, q7 + vand q6, q6, q5 + vld1.8 {q4}, [r7]! + veor q3, q3, q11 + vadd.u64 q15, q14, q14 + vst1.64 {q14}, [r0,:128]! + vswp d13,d12 + vshr.s64 q7, q15, #63 + veor q15, q15, q6 + vand q7, q7, q5 + vld1.8 {q5}, [r7]! + veor q4, q4, q12 + vadd.u64 q8, q15, q15 + vst1.64 {q15}, [r0,:128]! + vswp d15,d14 + veor q8, q8, q7 + vst1.64 {q8}, [r0,:128] @ next round tweak + + vld1.8 {q6-q7}, [r7]! + veor q5, q5, q13 +#ifndef BSAES_ASM_EXTENDED_KEY + add r4, sp, #0x90 @ pass key schedule +#else + add r4, r10, #248 @ pass key schedule +#endif + veor q6, q6, q14 + mov r5, r1 @ pass rounds + veor q7, q7, q15 + mov r0, sp + + bl _bsaes_decrypt8 + + vld1.64 {q8-q9}, [r0,:128]! + vld1.64 {q10-q11}, [r0,:128]! + veor q0, q0, q8 + vld1.64 {q12-q13}, [r0,:128]! + veor q1, q1, q9 + veor q8, q6, q10 + vst1.8 {q0-q1}, [r8]! + veor q9, q4, q11 + vld1.64 {q14-q15}, [r0,:128]! + veor q10, q2, q12 + vst1.8 {q8-q9}, [r8]! + veor q11, q7, q13 + veor q12, q3, q14 + vst1.8 {q10-q11}, [r8]! + veor q13, q5, q15 + vst1.8 {q12-q13}, [r8]! + + vld1.64 {q8}, [r0,:128] @ next round tweak + + subs r9, #0x80 + bpl .Lxts_dec_loop + +.Lxts_dec_short: + adds r9, #0x70 + bmi .Lxts_dec_done + + vldmia r2, {q5} @ load XTS magic + vshr.s64 q7, q8, #63 + mov r0, sp + vand q7, q7, q5 + vadd.u64 q9, q8, q8 + vst1.64 {q8}, [r0,:128]! + vswp d15,d14 + vshr.s64 q6, q9, #63 + veor q9, q9, q7 + vand q6, q6, q5 + vadd.u64 q10, q9, q9 + vst1.64 {q9}, [r0,:128]! + vswp d13,d12 + vshr.s64 q7, q10, #63 + veor q10, q10, q6 + vand q7, q7, q5 + vld1.8 {q0}, [r7]! + subs r9, #0x10 + bmi .Lxts_dec_1 + vadd.u64 q11, q10, q10 + vst1.64 {q10}, [r0,:128]! + vswp d15,d14 + vshr.s64 q6, q11, #63 + veor q11, q11, q7 + vand q6, q6, q5 + vld1.8 {q1}, [r7]! + subs r9, #0x10 + bmi .Lxts_dec_2 + veor q0, q0, q8 + vadd.u64 q12, q11, q11 + vst1.64 {q11}, [r0,:128]! + vswp d13,d12 + vshr.s64 q7, q12, #63 + veor q12, q12, q6 + vand q7, q7, q5 + vld1.8 {q2}, [r7]! + subs r9, #0x10 + bmi .Lxts_dec_3 + veor q1, q1, q9 + vadd.u64 q13, q12, q12 + vst1.64 {q12}, [r0,:128]! + vswp d15,d14 + vshr.s64 q6, q13, #63 + veor q13, q13, q7 + vand q6, q6, q5 + vld1.8 {q3}, [r7]! + subs r9, #0x10 + bmi .Lxts_dec_4 + veor q2, q2, q10 + vadd.u64 q14, q13, q13 + vst1.64 {q13}, [r0,:128]! + vswp d13,d12 + vshr.s64 q7, q14, #63 + veor q14, q14, q6 + vand q7, q7, q5 + vld1.8 {q4}, [r7]! + subs r9, #0x10 + bmi .Lxts_dec_5 + veor q3, q3, q11 + vadd.u64 q15, q14, q14 + vst1.64 {q14}, [r0,:128]! + vswp d15,d14 + vshr.s64 q6, q15, #63 + veor q15, q15, q7 + vand q6, q6, q5 + vld1.8 {q5}, [r7]! + subs r9, #0x10 + bmi .Lxts_dec_6 + veor q4, q4, q12 + sub r9, #0x10 + vst1.64 {q15}, [r0,:128] @ next round tweak + + vld1.8 {q6}, [r7]! + veor q5, q5, q13 +#ifndef BSAES_ASM_EXTENDED_KEY + add r4, sp, #0x90 @ pass key schedule +#else + add r4, r10, #248 @ pass key schedule +#endif + veor q6, q6, q14 + mov r5, r1 @ pass rounds + mov r0, sp + + bl _bsaes_decrypt8 + + vld1.64 {q8-q9}, [r0,:128]! + vld1.64 {q10-q11}, [r0,:128]! + veor q0, q0, q8 + vld1.64 {q12-q13}, [r0,:128]! + veor q1, q1, q9 + veor q8, q6, q10 + vst1.8 {q0-q1}, [r8]! + veor q9, q4, q11 + vld1.64 {q14}, [r0,:128]! + veor q10, q2, q12 + vst1.8 {q8-q9}, [r8]! + veor q11, q7, q13 + veor q12, q3, q14 + vst1.8 {q10-q11}, [r8]! + vst1.8 {q12}, [r8]! + + vld1.64 {q8}, [r0,:128] @ next round tweak + b .Lxts_dec_done +.align 4 +.Lxts_dec_6: + vst1.64 {q14}, [r0,:128] @ next round tweak + + veor q4, q4, q12 +#ifndef BSAES_ASM_EXTENDED_KEY + add r4, sp, #0x90 @ pass key schedule +#else + add r4, r10, #248 @ pass key schedule +#endif + veor q5, q5, q13 + mov r5, r1 @ pass rounds + mov r0, sp + + bl _bsaes_decrypt8 + + vld1.64 {q8-q9}, [r0,:128]! + vld1.64 {q10-q11}, [r0,:128]! + veor q0, q0, q8 + vld1.64 {q12-q13}, [r0,:128]! + veor q1, q1, q9 + veor q8, q6, q10 + vst1.8 {q0-q1}, [r8]! + veor q9, q4, q11 + veor q10, q2, q12 + vst1.8 {q8-q9}, [r8]! + veor q11, q7, q13 + vst1.8 {q10-q11}, [r8]! + + vld1.64 {q8}, [r0,:128] @ next round tweak + b .Lxts_dec_done +.align 4 +.Lxts_dec_5: + vst1.64 {q13}, [r0,:128] @ next round tweak + + veor q3, q3, q11 +#ifndef BSAES_ASM_EXTENDED_KEY + add r4, sp, #0x90 @ pass key schedule +#else + add r4, r10, #248 @ pass key schedule +#endif + veor q4, q4, q12 + mov r5, r1 @ pass rounds + mov r0, sp + + bl _bsaes_decrypt8 + + vld1.64 {q8-q9}, [r0,:128]! + vld1.64 {q10-q11}, [r0,:128]! + veor q0, q0, q8 + vld1.64 {q12}, [r0,:128]! + veor q1, q1, q9 + veor q8, q6, q10 + vst1.8 {q0-q1}, [r8]! + veor q9, q4, q11 + veor q10, q2, q12 + vst1.8 {q8-q9}, [r8]! + vst1.8 {q10}, [r8]! + + vld1.64 {q8}, [r0,:128] @ next round tweak + b .Lxts_dec_done +.align 4 +.Lxts_dec_4: + vst1.64 {q12}, [r0,:128] @ next round tweak + + veor q2, q2, q10 +#ifndef BSAES_ASM_EXTENDED_KEY + add r4, sp, #0x90 @ pass key schedule +#else + add r4, r10, #248 @ pass key schedule +#endif + veor q3, q3, q11 + mov r5, r1 @ pass rounds + mov r0, sp + + bl _bsaes_decrypt8 + + vld1.64 {q8-q9}, [r0,:128]! + vld1.64 {q10-q11}, [r0,:128]! + veor q0, q0, q8 + veor q1, q1, q9 + veor q8, q6, q10 + vst1.8 {q0-q1}, [r8]! + veor q9, q4, q11 + vst1.8 {q8-q9}, [r8]! + + vld1.64 {q8}, [r0,:128] @ next round tweak + b .Lxts_dec_done +.align 4 +.Lxts_dec_3: + vst1.64 {q11}, [r0,:128] @ next round tweak + + veor q1, q1, q9 +#ifndef BSAES_ASM_EXTENDED_KEY + add r4, sp, #0x90 @ pass key schedule +#else + add r4, r10, #248 @ pass key schedule +#endif + veor q2, q2, q10 + mov r5, r1 @ pass rounds + mov r0, sp + + bl _bsaes_decrypt8 + + vld1.64 {q8-q9}, [r0,:128]! + vld1.64 {q10}, [r0,:128]! + veor q0, q0, q8 + veor q1, q1, q9 + veor q8, q6, q10 + vst1.8 {q0-q1}, [r8]! + vst1.8 {q8}, [r8]! + + vld1.64 {q8}, [r0,:128] @ next round tweak + b .Lxts_dec_done +.align 4 +.Lxts_dec_2: + vst1.64 {q10}, [r0,:128] @ next round tweak + + veor q0, q0, q8 +#ifndef BSAES_ASM_EXTENDED_KEY + add r4, sp, #0x90 @ pass key schedule +#else + add r4, r10, #248 @ pass key schedule +#endif + veor q1, q1, q9 + mov r5, r1 @ pass rounds + mov r0, sp + + bl _bsaes_decrypt8 + + vld1.64 {q8-q9}, [r0,:128]! + veor q0, q0, q8 + veor q1, q1, q9 + vst1.8 {q0-q1}, [r8]! + + vld1.64 {q8}, [r0,:128] @ next round tweak + b .Lxts_dec_done +.align 4 +.Lxts_dec_1: + mov r0, sp + veor q0, q8 + mov r1, sp + vst1.8 {q0}, [sp,:128] + mov r2, r10 + mov r4, r3 @ preserve fp + mov r5, r2 @ preserve magic + + bl AES_decrypt + + vld1.8 {q0}, [sp,:128] + veor q0, q0, q8 + vst1.8 {q0}, [r8]! + mov r3, r4 + mov r2, r5 + + vmov q8, q9 @ next round tweak + +.Lxts_dec_done: +#ifndef XTS_CHAIN_TWEAK + adds r9, #0x10 + beq .Lxts_dec_ret + + @ calculate one round of extra tweak for the stolen ciphertext + vldmia r2, {q5} + vshr.s64 q6, q8, #63 + vand q6, q6, q5 + vadd.u64 q9, q8, q8 + vswp d13,d12 + veor q9, q9, q6 + + @ perform the final decryption with the last tweak value + vld1.8 {q0}, [r7]! + mov r0, sp + veor q0, q0, q9 + mov r1, sp + vst1.8 {q0}, [sp,:128] + mov r2, r10 + mov r4, r3 @ preserve fp + + bl AES_decrypt + + vld1.8 {q0}, [sp,:128] + veor q0, q0, q9 + vst1.8 {q0}, [r8] + + mov r6, r8 +.Lxts_dec_steal: + ldrb r1, [r8] + ldrb r0, [r7], #1 + strb r1, [r8, #0x10] + strb r0, [r8], #1 + + subs r9, #1 + bhi .Lxts_dec_steal + + vld1.8 {q0}, [r6] + mov r0, sp + veor q0, q8 + mov r1, sp + vst1.8 {q0}, [sp,:128] + mov r2, r10 + + bl AES_decrypt + + vld1.8 {q0}, [sp,:128] + veor q0, q0, q8 + vst1.8 {q0}, [r6] + mov r3, r4 +#endif + +.Lxts_dec_ret: + bic r0, r3, #0xf + vmov.i32 q0, #0 + vmov.i32 q1, #0 +#ifdef XTS_CHAIN_TWEAK + ldr r1, [r3, #0x20+VFP_ABI_FRAME] @ chain tweak +#endif +.Lxts_dec_bzero: @ wipe key schedule [if any] + vstmia sp!, {q0-q1} + cmp sp, r0 + bne .Lxts_dec_bzero + + mov sp, r3 +#ifdef XTS_CHAIN_TWEAK + vst1.8 {q8}, [r1] +#endif + VFP_ABI_POP + ldmia sp!, {r4-r10, pc} @ return + +.size bsaes_xts_decrypt,.-bsaes_xts_decrypt +#endif diff --git a/deps/openssl/asm_obsolete/arm-void-gas/bn/armv4-gf2m.S b/deps/openssl/asm_obsolete/arm-void-gas/bn/armv4-gf2m.S new file mode 100644 index 00000000000000..32610558ac5853 --- /dev/null +++ b/deps/openssl/asm_obsolete/arm-void-gas/bn/armv4-gf2m.S @@ -0,0 +1,203 @@ +#include "arm_arch.h" + +.text +.code 32 +.type mul_1x1_ialu,%function +.align 5 +mul_1x1_ialu: + mov r4,#0 + bic r5,r1,#3<<30 @ a1=a&0x3fffffff + str r4,[sp,#0] @ tab[0]=0 + add r6,r5,r5 @ a2=a1<<1 + str r5,[sp,#4] @ tab[1]=a1 + eor r7,r5,r6 @ a1^a2 + str r6,[sp,#8] @ tab[2]=a2 + mov r8,r5,lsl#2 @ a4=a1<<2 + str r7,[sp,#12] @ tab[3]=a1^a2 + eor r9,r5,r8 @ a1^a4 + str r8,[sp,#16] @ tab[4]=a4 + eor r4,r6,r8 @ a2^a4 + str r9,[sp,#20] @ tab[5]=a1^a4 + eor r7,r7,r8 @ a1^a2^a4 + str r4,[sp,#24] @ tab[6]=a2^a4 + and r8,r12,r0,lsl#2 + str r7,[sp,#28] @ tab[7]=a1^a2^a4 + + and r9,r12,r0,lsr#1 + ldr r5,[sp,r8] @ tab[b & 0x7] + and r8,r12,r0,lsr#4 + ldr r7,[sp,r9] @ tab[b >> 3 & 0x7] + and r9,r12,r0,lsr#7 + ldr r6,[sp,r8] @ tab[b >> 6 & 0x7] + eor r5,r5,r7,lsl#3 @ stall + mov r4,r7,lsr#29 + ldr r7,[sp,r9] @ tab[b >> 9 & 0x7] + + and r8,r12,r0,lsr#10 + eor r5,r5,r6,lsl#6 + eor r4,r4,r6,lsr#26 + ldr r6,[sp,r8] @ tab[b >> 12 & 0x7] + + and r9,r12,r0,lsr#13 + eor r5,r5,r7,lsl#9 + eor r4,r4,r7,lsr#23 + ldr r7,[sp,r9] @ tab[b >> 15 & 0x7] + + and r8,r12,r0,lsr#16 + eor r5,r5,r6,lsl#12 + eor r4,r4,r6,lsr#20 + ldr r6,[sp,r8] @ tab[b >> 18 & 0x7] + + and r9,r12,r0,lsr#19 + eor r5,r5,r7,lsl#15 + eor r4,r4,r7,lsr#17 + ldr r7,[sp,r9] @ tab[b >> 21 & 0x7] + + and r8,r12,r0,lsr#22 + eor r5,r5,r6,lsl#18 + eor r4,r4,r6,lsr#14 + ldr r6,[sp,r8] @ tab[b >> 24 & 0x7] + + and r9,r12,r0,lsr#25 + eor r5,r5,r7,lsl#21 + eor r4,r4,r7,lsr#11 + ldr r7,[sp,r9] @ tab[b >> 27 & 0x7] + + tst r1,#1<<30 + and r8,r12,r0,lsr#28 + eor r5,r5,r6,lsl#24 + eor r4,r4,r6,lsr#8 + ldr r6,[sp,r8] @ tab[b >> 30 ] + + eorne r5,r5,r0,lsl#30 + eorne r4,r4,r0,lsr#2 + tst r1,#1<<31 + eor r5,r5,r7,lsl#27 + eor r4,r4,r7,lsr#5 + eorne r5,r5,r0,lsl#31 + eorne r4,r4,r0,lsr#1 + eor r5,r5,r6,lsl#30 + eor r4,r4,r6,lsr#2 + + mov pc,lr +.size mul_1x1_ialu,.-mul_1x1_ialu +.global bn_GF2m_mul_2x2 +.type bn_GF2m_mul_2x2,%function +.align 5 +bn_GF2m_mul_2x2: +#if __ARM_MAX_ARCH__>=7 + ldr r12,.LOPENSSL_armcap +.Lpic: ldr r12,[pc,r12] + tst r12,#1 + bne .LNEON +#endif + stmdb sp!,{r4-r10,lr} + mov r10,r0 @ reassign 1st argument + mov r0,r3 @ r0=b1 + ldr r3,[sp,#32] @ load b0 + mov r12,#7<<2 + sub sp,sp,#32 @ allocate tab[8] + + bl mul_1x1_ialu @ a1·b1 + str r5,[r10,#8] + str r4,[r10,#12] + + eor r0,r0,r3 @ flip b0 and b1 + eor r1,r1,r2 @ flip a0 and a1 + eor r3,r3,r0 + eor r2,r2,r1 + eor r0,r0,r3 + eor r1,r1,r2 + bl mul_1x1_ialu @ a0·b0 + str r5,[r10] + str r4,[r10,#4] + + eor r1,r1,r2 + eor r0,r0,r3 + bl mul_1x1_ialu @ (a1+a0)·(b1+b0) + ldmia r10,{r6-r9} + eor r5,r5,r4 + eor r4,r4,r7 + eor r5,r5,r6 + eor r4,r4,r8 + eor r5,r5,r9 + eor r4,r4,r9 + str r4,[r10,#8] + eor r5,r5,r4 + add sp,sp,#32 @ destroy tab[8] + str r5,[r10,#4] + +#if __ARM_ARCH__>=5 + ldmia sp!,{r4-r10,pc} +#else + ldmia sp!,{r4-r10,lr} + tst lr,#1 + moveq pc,lr @ be binary compatible with V4, yet + .word 0xe12fff1e @ interoperable with Thumb ISA:-) +#endif +#if __ARM_MAX_ARCH__>=7 +.arch armv7-a +.fpu neon + +.align 5 +.LNEON: + ldr r12, [sp] @ 5th argument + vmov.32 d26, r2, r1 + vmov.32 d27, r12, r3 + vmov.i64 d28, #0x0000ffffffffffff + vmov.i64 d29, #0x00000000ffffffff + vmov.i64 d30, #0x000000000000ffff + + vext.8 d2, d26, d26, #1 @ A1 + vmull.p8 q1, d2, d27 @ F = A1*B + vext.8 d0, d27, d27, #1 @ B1 + vmull.p8 q0, d26, d0 @ E = A*B1 + vext.8 d4, d26, d26, #2 @ A2 + vmull.p8 q2, d4, d27 @ H = A2*B + vext.8 d16, d27, d27, #2 @ B2 + vmull.p8 q8, d26, d16 @ G = A*B2 + vext.8 d6, d26, d26, #3 @ A3 + veor q1, q1, q0 @ L = E + F + vmull.p8 q3, d6, d27 @ J = A3*B + vext.8 d0, d27, d27, #3 @ B3 + veor q2, q2, q8 @ M = G + H + vmull.p8 q0, d26, d0 @ I = A*B3 + veor d2, d2, d3 @ t0 = (L) (P0 + P1) << 8 + vand d3, d3, d28 + vext.8 d16, d27, d27, #4 @ B4 + veor d4, d4, d5 @ t1 = (M) (P2 + P3) << 16 + vand d5, d5, d29 + vmull.p8 q8, d26, d16 @ K = A*B4 + veor q3, q3, q0 @ N = I + J + veor d2, d2, d3 + veor d4, d4, d5 + veor d6, d6, d7 @ t2 = (N) (P4 + P5) << 24 + vand d7, d7, d30 + vext.8 q1, q1, q1, #15 + veor d16, d16, d17 @ t3 = (K) (P6 + P7) << 32 + vmov.i64 d17, #0 + vext.8 q2, q2, q2, #14 + veor d6, d6, d7 + vmull.p8 q0, d26, d27 @ D = A*B + vext.8 q8, q8, q8, #12 + vext.8 q3, q3, q3, #13 + veor q1, q1, q2 + veor q3, q3, q8 + veor q0, q0, q1 + veor q0, q0, q3 + + vst1.32 {q0}, [r0] + bx lr @ bx lr +#endif +.size bn_GF2m_mul_2x2,.-bn_GF2m_mul_2x2 +#if __ARM_MAX_ARCH__>=7 +.align 5 +.LOPENSSL_armcap: +.word OPENSSL_armcap_P-(.Lpic+8) +#endif +.asciz "GF(2^m) Multiplication for ARMv4/NEON, CRYPTOGAMS by " +.align 5 + +#if __ARM_MAX_ARCH__>=7 +.comm OPENSSL_armcap_P,4,4 +#endif diff --git a/deps/openssl/asm_obsolete/arm-void-gas/bn/armv4-mont.S b/deps/openssl/asm_obsolete/arm-void-gas/bn/armv4-mont.S new file mode 100644 index 00000000000000..71fc296fcab181 --- /dev/null +++ b/deps/openssl/asm_obsolete/arm-void-gas/bn/armv4-mont.S @@ -0,0 +1,580 @@ +#include "arm_arch.h" + +.text +.code 32 + +#if __ARM_MAX_ARCH__>=7 +.align 5 +.LOPENSSL_armcap: +.word OPENSSL_armcap_P-bn_mul_mont +#endif + +.global bn_mul_mont +.type bn_mul_mont,%function + +.align 5 +bn_mul_mont: + ldr ip,[sp,#4] @ load num + stmdb sp!,{r0,r2} @ sp points at argument block +#if __ARM_MAX_ARCH__>=7 + tst ip,#7 + bne .Lialu + adr r0,bn_mul_mont + ldr r2,.LOPENSSL_armcap + ldr r0,[r0,r2] + tst r0,#1 @ NEON available? + ldmia sp, {r0,r2} + beq .Lialu + add sp,sp,#8 + b bn_mul8x_mont_neon +.align 4 +.Lialu: +#endif + cmp ip,#2 + mov r0,ip @ load num + movlt r0,#0 + addlt sp,sp,#2*4 + blt .Labrt + + stmdb sp!,{r4-r12,lr} @ save 10 registers + + mov r0,r0,lsl#2 @ rescale r0 for byte count + sub sp,sp,r0 @ alloca(4*num) + sub sp,sp,#4 @ +extra dword + sub r0,r0,#4 @ "num=num-1" + add r4,r2,r0 @ &bp[num-1] + + add r0,sp,r0 @ r0 to point at &tp[num-1] + ldr r8,[r0,#14*4] @ &n0 + ldr r2,[r2] @ bp[0] + ldr r5,[r1],#4 @ ap[0],ap++ + ldr r6,[r3],#4 @ np[0],np++ + ldr r8,[r8] @ *n0 + str r4,[r0,#15*4] @ save &bp[num] + + umull r10,r11,r5,r2 @ ap[0]*bp[0] + str r8,[r0,#14*4] @ save n0 value + mul r8,r10,r8 @ "tp[0]"*n0 + mov r12,#0 + umlal r10,r12,r6,r8 @ np[0]*n0+"t[0]" + mov r4,sp + +.L1st: + ldr r5,[r1],#4 @ ap[j],ap++ + mov r10,r11 + ldr r6,[r3],#4 @ np[j],np++ + mov r11,#0 + umlal r10,r11,r5,r2 @ ap[j]*bp[0] + mov r14,#0 + umlal r12,r14,r6,r8 @ np[j]*n0 + adds r12,r12,r10 + str r12,[r4],#4 @ tp[j-1]=,tp++ + adc r12,r14,#0 + cmp r4,r0 + bne .L1st + + adds r12,r12,r11 + ldr r4,[r0,#13*4] @ restore bp + mov r14,#0 + ldr r8,[r0,#14*4] @ restore n0 + adc r14,r14,#0 + str r12,[r0] @ tp[num-1]= + str r14,[r0,#4] @ tp[num]= + +.Louter: + sub r7,r0,sp @ "original" r0-1 value + sub r1,r1,r7 @ "rewind" ap to &ap[1] + ldr r2,[r4,#4]! @ *(++bp) + sub r3,r3,r7 @ "rewind" np to &np[1] + ldr r5,[r1,#-4] @ ap[0] + ldr r10,[sp] @ tp[0] + ldr r6,[r3,#-4] @ np[0] + ldr r7,[sp,#4] @ tp[1] + + mov r11,#0 + umlal r10,r11,r5,r2 @ ap[0]*bp[i]+tp[0] + str r4,[r0,#13*4] @ save bp + mul r8,r10,r8 + mov r12,#0 + umlal r10,r12,r6,r8 @ np[0]*n0+"tp[0]" + mov r4,sp + +.Linner: + ldr r5,[r1],#4 @ ap[j],ap++ + adds r10,r11,r7 @ +=tp[j] + ldr r6,[r3],#4 @ np[j],np++ + mov r11,#0 + umlal r10,r11,r5,r2 @ ap[j]*bp[i] + mov r14,#0 + umlal r12,r14,r6,r8 @ np[j]*n0 + adc r11,r11,#0 + ldr r7,[r4,#8] @ tp[j+1] + adds r12,r12,r10 + str r12,[r4],#4 @ tp[j-1]=,tp++ + adc r12,r14,#0 + cmp r4,r0 + bne .Linner + + adds r12,r12,r11 + mov r14,#0 + ldr r4,[r0,#13*4] @ restore bp + adc r14,r14,#0 + ldr r8,[r0,#14*4] @ restore n0 + adds r12,r12,r7 + ldr r7,[r0,#15*4] @ restore &bp[num] + adc r14,r14,#0 + str r12,[r0] @ tp[num-1]= + str r14,[r0,#4] @ tp[num]= + + cmp r4,r7 + bne .Louter + + ldr r2,[r0,#12*4] @ pull rp + add r0,r0,#4 @ r0 to point at &tp[num] + sub r5,r0,sp @ "original" num value + mov r4,sp @ "rewind" r4 + mov r1,r4 @ "borrow" r1 + sub r3,r3,r5 @ "rewind" r3 to &np[0] + + subs r7,r7,r7 @ "clear" carry flag +.Lsub: ldr r7,[r4],#4 + ldr r6,[r3],#4 + sbcs r7,r7,r6 @ tp[j]-np[j] + str r7,[r2],#4 @ rp[j]= + teq r4,r0 @ preserve carry + bne .Lsub + sbcs r14,r14,#0 @ upmost carry + mov r4,sp @ "rewind" r4 + sub r2,r2,r5 @ "rewind" r2 + + and r1,r4,r14 + bic r3,r2,r14 + orr r1,r1,r3 @ ap=borrow?tp:rp + +.Lcopy: ldr r7,[r1],#4 @ copy or in-place refresh + str sp,[r4],#4 @ zap tp + str r7,[r2],#4 + cmp r4,r0 + bne .Lcopy + + add sp,r0,#4 @ skip over tp[num+1] + ldmia sp!,{r4-r12,lr} @ restore registers + add sp,sp,#2*4 @ skip over {r0,r2} + mov r0,#1 +.Labrt: +#if __ARM_ARCH__>=5 + bx lr @ .word 0xe12fff1e +#else + tst lr,#1 + moveq pc,lr @ be binary compatible with V4, yet + .word 0xe12fff1e @ interoperable with Thumb ISA:-) +#endif +.size bn_mul_mont,.-bn_mul_mont +#if __ARM_MAX_ARCH__>=7 +.arch armv7-a +.fpu neon + +.type bn_mul8x_mont_neon,%function +.align 5 +bn_mul8x_mont_neon: + mov ip,sp + stmdb sp!,{r4-r11} + vstmdb sp!,{d8-d15} @ ABI specification says so + ldmia ip,{r4-r5} @ load rest of parameter block + + sub r7,sp,#16 + vld1.32 {d28[0]}, [r2,:32]! + sub r7,r7,r5,lsl#4 + vld1.32 {d0-d3}, [r1]! @ can't specify :32 :-( + and r7,r7,#-64 + vld1.32 {d30[0]}, [r4,:32] + mov sp,r7 @ alloca + veor d8,d8,d8 + subs r8,r5,#8 + vzip.16 d28,d8 + + vmull.u32 q6,d28,d0[0] + vmull.u32 q7,d28,d0[1] + vmull.u32 q8,d28,d1[0] + vshl.i64 d10,d13,#16 + vmull.u32 q9,d28,d1[1] + + vadd.u64 d10,d10,d12 + veor d8,d8,d8 + vmul.u32 d29,d10,d30 + + vmull.u32 q10,d28,d2[0] + vld1.32 {d4-d7}, [r3]! + vmull.u32 q11,d28,d2[1] + vmull.u32 q12,d28,d3[0] + vzip.16 d29,d8 + vmull.u32 q13,d28,d3[1] + + bne .LNEON_1st + + @ special case for num=8, everything is in register bank... + + vmlal.u32 q6,d29,d4[0] + sub r9,r5,#1 + vmlal.u32 q7,d29,d4[1] + vmlal.u32 q8,d29,d5[0] + vmlal.u32 q9,d29,d5[1] + + vmlal.u32 q10,d29,d6[0] + vmov q5,q6 + vmlal.u32 q11,d29,d6[1] + vmov q6,q7 + vmlal.u32 q12,d29,d7[0] + vmov q7,q8 + vmlal.u32 q13,d29,d7[1] + vmov q8,q9 + vmov q9,q10 + vshr.u64 d10,d10,#16 + vmov q10,q11 + vmov q11,q12 + vadd.u64 d10,d10,d11 + vmov q12,q13 + veor q13,q13 + vshr.u64 d10,d10,#16 + + b .LNEON_outer8 + +.align 4 +.LNEON_outer8: + vld1.32 {d28[0]}, [r2,:32]! + veor d8,d8,d8 + vzip.16 d28,d8 + vadd.u64 d12,d12,d10 + + vmlal.u32 q6,d28,d0[0] + vmlal.u32 q7,d28,d0[1] + vmlal.u32 q8,d28,d1[0] + vshl.i64 d10,d13,#16 + vmlal.u32 q9,d28,d1[1] + + vadd.u64 d10,d10,d12 + veor d8,d8,d8 + subs r9,r9,#1 + vmul.u32 d29,d10,d30 + + vmlal.u32 q10,d28,d2[0] + vmlal.u32 q11,d28,d2[1] + vmlal.u32 q12,d28,d3[0] + vzip.16 d29,d8 + vmlal.u32 q13,d28,d3[1] + + vmlal.u32 q6,d29,d4[0] + vmlal.u32 q7,d29,d4[1] + vmlal.u32 q8,d29,d5[0] + vmlal.u32 q9,d29,d5[1] + + vmlal.u32 q10,d29,d6[0] + vmov q5,q6 + vmlal.u32 q11,d29,d6[1] + vmov q6,q7 + vmlal.u32 q12,d29,d7[0] + vmov q7,q8 + vmlal.u32 q13,d29,d7[1] + vmov q8,q9 + vmov q9,q10 + vshr.u64 d10,d10,#16 + vmov q10,q11 + vmov q11,q12 + vadd.u64 d10,d10,d11 + vmov q12,q13 + veor q13,q13 + vshr.u64 d10,d10,#16 + + bne .LNEON_outer8 + + vadd.u64 d12,d12,d10 + mov r7,sp + vshr.u64 d10,d12,#16 + mov r8,r5 + vadd.u64 d13,d13,d10 + add r6,sp,#16 + vshr.u64 d10,d13,#16 + vzip.16 d12,d13 + + b .LNEON_tail2 + +.align 4 +.LNEON_1st: + vmlal.u32 q6,d29,d4[0] + vld1.32 {d0-d3}, [r1]! + vmlal.u32 q7,d29,d4[1] + subs r8,r8,#8 + vmlal.u32 q8,d29,d5[0] + vmlal.u32 q9,d29,d5[1] + + vmlal.u32 q10,d29,d6[0] + vld1.32 {d4-d5}, [r3]! + vmlal.u32 q11,d29,d6[1] + vst1.64 {q6-q7}, [r7,:256]! + vmlal.u32 q12,d29,d7[0] + vmlal.u32 q13,d29,d7[1] + vst1.64 {q8-q9}, [r7,:256]! + + vmull.u32 q6,d28,d0[0] + vld1.32 {d6-d7}, [r3]! + vmull.u32 q7,d28,d0[1] + vst1.64 {q10-q11}, [r7,:256]! + vmull.u32 q8,d28,d1[0] + vmull.u32 q9,d28,d1[1] + vst1.64 {q12-q13}, [r7,:256]! + + vmull.u32 q10,d28,d2[0] + vmull.u32 q11,d28,d2[1] + vmull.u32 q12,d28,d3[0] + vmull.u32 q13,d28,d3[1] + + bne .LNEON_1st + + vmlal.u32 q6,d29,d4[0] + add r6,sp,#16 + vmlal.u32 q7,d29,d4[1] + sub r1,r1,r5,lsl#2 @ rewind r1 + vmlal.u32 q8,d29,d5[0] + vld1.64 {q5}, [sp,:128] + vmlal.u32 q9,d29,d5[1] + sub r9,r5,#1 + + vmlal.u32 q10,d29,d6[0] + vst1.64 {q6-q7}, [r7,:256]! + vmlal.u32 q11,d29,d6[1] + vshr.u64 d10,d10,#16 + vld1.64 {q6}, [r6, :128]! + vmlal.u32 q12,d29,d7[0] + vst1.64 {q8-q9}, [r7,:256]! + vmlal.u32 q13,d29,d7[1] + + vst1.64 {q10-q11}, [r7,:256]! + vadd.u64 d10,d10,d11 + veor q4,q4,q4 + vst1.64 {q12-q13}, [r7,:256]! + vld1.64 {q7-q8}, [r6, :256]! + vst1.64 {q4}, [r7,:128] + vshr.u64 d10,d10,#16 + + b .LNEON_outer + +.align 4 +.LNEON_outer: + vld1.32 {d28[0]}, [r2,:32]! + sub r3,r3,r5,lsl#2 @ rewind r3 + vld1.32 {d0-d3}, [r1]! + veor d8,d8,d8 + mov r7,sp + vzip.16 d28,d8 + sub r8,r5,#8 + vadd.u64 d12,d12,d10 + + vmlal.u32 q6,d28,d0[0] + vld1.64 {q9-q10},[r6,:256]! + vmlal.u32 q7,d28,d0[1] + vmlal.u32 q8,d28,d1[0] + vld1.64 {q11-q12},[r6,:256]! + vmlal.u32 q9,d28,d1[1] + + vshl.i64 d10,d13,#16 + veor d8,d8,d8 + vadd.u64 d10,d10,d12 + vld1.64 {q13},[r6,:128]! + vmul.u32 d29,d10,d30 + + vmlal.u32 q10,d28,d2[0] + vld1.32 {d4-d7}, [r3]! + vmlal.u32 q11,d28,d2[1] + vmlal.u32 q12,d28,d3[0] + vzip.16 d29,d8 + vmlal.u32 q13,d28,d3[1] + +.LNEON_inner: + vmlal.u32 q6,d29,d4[0] + vld1.32 {d0-d3}, [r1]! + vmlal.u32 q7,d29,d4[1] + subs r8,r8,#8 + vmlal.u32 q8,d29,d5[0] + vmlal.u32 q9,d29,d5[1] + vst1.64 {q6-q7}, [r7,:256]! + + vmlal.u32 q10,d29,d6[0] + vld1.64 {q6}, [r6, :128]! + vmlal.u32 q11,d29,d6[1] + vst1.64 {q8-q9}, [r7,:256]! + vmlal.u32 q12,d29,d7[0] + vld1.64 {q7-q8}, [r6, :256]! + vmlal.u32 q13,d29,d7[1] + vst1.64 {q10-q11}, [r7,:256]! + + vmlal.u32 q6,d28,d0[0] + vld1.64 {q9-q10}, [r6, :256]! + vmlal.u32 q7,d28,d0[1] + vst1.64 {q12-q13}, [r7,:256]! + vmlal.u32 q8,d28,d1[0] + vld1.64 {q11-q12}, [r6, :256]! + vmlal.u32 q9,d28,d1[1] + vld1.32 {d4-d7}, [r3]! + + vmlal.u32 q10,d28,d2[0] + vld1.64 {q13}, [r6, :128]! + vmlal.u32 q11,d28,d2[1] + vmlal.u32 q12,d28,d3[0] + vmlal.u32 q13,d28,d3[1] + + bne .LNEON_inner + + vmlal.u32 q6,d29,d4[0] + add r6,sp,#16 + vmlal.u32 q7,d29,d4[1] + sub r1,r1,r5,lsl#2 @ rewind r1 + vmlal.u32 q8,d29,d5[0] + vld1.64 {q5}, [sp,:128] + vmlal.u32 q9,d29,d5[1] + subs r9,r9,#1 + + vmlal.u32 q10,d29,d6[0] + vst1.64 {q6-q7}, [r7,:256]! + vmlal.u32 q11,d29,d6[1] + vld1.64 {q6}, [r6, :128]! + vshr.u64 d10,d10,#16 + vst1.64 {q8-q9}, [r7,:256]! + vmlal.u32 q12,d29,d7[0] + vld1.64 {q7-q8}, [r6, :256]! + vmlal.u32 q13,d29,d7[1] + + vst1.64 {q10-q11}, [r7,:256]! + vadd.u64 d10,d10,d11 + vst1.64 {q12-q13}, [r7,:256]! + vshr.u64 d10,d10,#16 + + bne .LNEON_outer + + mov r7,sp + mov r8,r5 + +.LNEON_tail: + vadd.u64 d12,d12,d10 + vld1.64 {q9-q10}, [r6, :256]! + vshr.u64 d10,d12,#16 + vadd.u64 d13,d13,d10 + vld1.64 {q11-q12}, [r6, :256]! + vshr.u64 d10,d13,#16 + vld1.64 {q13}, [r6, :128]! + vzip.16 d12,d13 + +.LNEON_tail2: + vadd.u64 d14,d14,d10 + vst1.32 {d12[0]}, [r7, :32]! + vshr.u64 d10,d14,#16 + vadd.u64 d15,d15,d10 + vshr.u64 d10,d15,#16 + vzip.16 d14,d15 + + vadd.u64 d16,d16,d10 + vst1.32 {d14[0]}, [r7, :32]! + vshr.u64 d10,d16,#16 + vadd.u64 d17,d17,d10 + vshr.u64 d10,d17,#16 + vzip.16 d16,d17 + + vadd.u64 d18,d18,d10 + vst1.32 {d16[0]}, [r7, :32]! + vshr.u64 d10,d18,#16 + vadd.u64 d19,d19,d10 + vshr.u64 d10,d19,#16 + vzip.16 d18,d19 + + vadd.u64 d20,d20,d10 + vst1.32 {d18[0]}, [r7, :32]! + vshr.u64 d10,d20,#16 + vadd.u64 d21,d21,d10 + vshr.u64 d10,d21,#16 + vzip.16 d20,d21 + + vadd.u64 d22,d22,d10 + vst1.32 {d20[0]}, [r7, :32]! + vshr.u64 d10,d22,#16 + vadd.u64 d23,d23,d10 + vshr.u64 d10,d23,#16 + vzip.16 d22,d23 + + vadd.u64 d24,d24,d10 + vst1.32 {d22[0]}, [r7, :32]! + vshr.u64 d10,d24,#16 + vadd.u64 d25,d25,d10 + vld1.64 {q6}, [r6, :128]! + vshr.u64 d10,d25,#16 + vzip.16 d24,d25 + + vadd.u64 d26,d26,d10 + vst1.32 {d24[0]}, [r7, :32]! + vshr.u64 d10,d26,#16 + vadd.u64 d27,d27,d10 + vld1.64 {q7-q8}, [r6, :256]! + vshr.u64 d10,d27,#16 + vzip.16 d26,d27 + subs r8,r8,#8 + vst1.32 {d26[0]}, [r7, :32]! + + bne .LNEON_tail + + vst1.32 {d10[0]}, [r7, :32] @ top-most bit + sub r3,r3,r5,lsl#2 @ rewind r3 + subs r1,sp,#0 @ clear carry flag + add r2,sp,r5,lsl#2 + +.LNEON_sub: + ldmia r1!, {r4-r7} + ldmia r3!, {r8-r11} + sbcs r8, r4,r8 + sbcs r9, r5,r9 + sbcs r10,r6,r10 + sbcs r11,r7,r11 + teq r1,r2 @ preserves carry + stmia r0!, {r8-r11} + bne .LNEON_sub + + ldr r10, [r1] @ load top-most bit + veor q0,q0,q0 + sub r11,r2,sp @ this is num*4 + veor q1,q1,q1 + mov r1,sp + sub r0,r0,r11 @ rewind r0 + mov r3,r2 @ second 3/4th of frame + sbcs r10,r10,#0 @ result is carry flag + +.LNEON_copy_n_zap: + ldmia r1!, {r4-r7} + ldmia r0, {r8-r11} + movcc r8, r4 + vst1.64 {q0-q1}, [r3,:256]! @ wipe + movcc r9, r5 + movcc r10,r6 + vst1.64 {q0-q1}, [r3,:256]! @ wipe + movcc r11,r7 + ldmia r1, {r4-r7} + stmia r0!, {r8-r11} + sub r1,r1,#16 + ldmia r0, {r8-r11} + movcc r8, r4 + vst1.64 {q0-q1}, [r1,:256]! @ wipe + movcc r9, r5 + movcc r10,r6 + vst1.64 {q0-q1}, [r3,:256]! @ wipe + movcc r11,r7 + teq r1,r2 @ preserves carry + stmia r0!, {r8-r11} + bne .LNEON_copy_n_zap + + sub sp,ip,#96 + vldmia sp!,{d8-d15} + ldmia sp!,{r4-r11} + bx lr @ .word 0xe12fff1e +.size bn_mul8x_mont_neon,.-bn_mul8x_mont_neon +#endif +.asciz "Montgomery multiplication for ARMv4/NEON, CRYPTOGAMS by " +.align 2 +#if __ARM_MAX_ARCH__>=7 +.comm OPENSSL_armcap_P,4,4 +#endif diff --git a/deps/openssl/asm_obsolete/arm-void-gas/modes/ghash-armv4.S b/deps/openssl/asm_obsolete/arm-void-gas/modes/ghash-armv4.S new file mode 100644 index 00000000000000..c54f5149974c6f --- /dev/null +++ b/deps/openssl/asm_obsolete/arm-void-gas/modes/ghash-armv4.S @@ -0,0 +1,523 @@ +#include "arm_arch.h" + +.text +.code 32 + +.type rem_4bit,%object +.align 5 +rem_4bit: +.short 0x0000,0x1C20,0x3840,0x2460 +.short 0x7080,0x6CA0,0x48C0,0x54E0 +.short 0xE100,0xFD20,0xD940,0xC560 +.short 0x9180,0x8DA0,0xA9C0,0xB5E0 +.size rem_4bit,.-rem_4bit + +.type rem_4bit_get,%function +rem_4bit_get: + sub r2,pc,#8 + sub r2,r2,#32 @ &rem_4bit + b .Lrem_4bit_got + nop +.size rem_4bit_get,.-rem_4bit_get + +.global gcm_ghash_4bit +.type gcm_ghash_4bit,%function +gcm_ghash_4bit: + sub r12,pc,#8 + add r3,r2,r3 @ r3 to point at the end + stmdb sp!,{r3-r11,lr} @ save r3/end too + sub r12,r12,#48 @ &rem_4bit + + ldmia r12,{r4-r11} @ copy rem_4bit ... + stmdb sp!,{r4-r11} @ ... to stack + + ldrb r12,[r2,#15] + ldrb r14,[r0,#15] +.Louter: + eor r12,r12,r14 + and r14,r12,#0xf0 + and r12,r12,#0x0f + mov r3,#14 + + add r7,r1,r12,lsl#4 + ldmia r7,{r4-r7} @ load Htbl[nlo] + add r11,r1,r14 + ldrb r12,[r2,#14] + + and r14,r4,#0xf @ rem + ldmia r11,{r8-r11} @ load Htbl[nhi] + add r14,r14,r14 + eor r4,r8,r4,lsr#4 + ldrh r8,[sp,r14] @ rem_4bit[rem] + eor r4,r4,r5,lsl#28 + ldrb r14,[r0,#14] + eor r5,r9,r5,lsr#4 + eor r5,r5,r6,lsl#28 + eor r6,r10,r6,lsr#4 + eor r6,r6,r7,lsl#28 + eor r7,r11,r7,lsr#4 + eor r12,r12,r14 + and r14,r12,#0xf0 + and r12,r12,#0x0f + eor r7,r7,r8,lsl#16 + +.Linner: + add r11,r1,r12,lsl#4 + and r12,r4,#0xf @ rem + subs r3,r3,#1 + add r12,r12,r12 + ldmia r11,{r8-r11} @ load Htbl[nlo] + eor r4,r8,r4,lsr#4 + eor r4,r4,r5,lsl#28 + eor r5,r9,r5,lsr#4 + eor r5,r5,r6,lsl#28 + ldrh r8,[sp,r12] @ rem_4bit[rem] + eor r6,r10,r6,lsr#4 + ldrplb r12,[r2,r3] + eor r6,r6,r7,lsl#28 + eor r7,r11,r7,lsr#4 + + add r11,r1,r14 + and r14,r4,#0xf @ rem + eor r7,r7,r8,lsl#16 @ ^= rem_4bit[rem] + add r14,r14,r14 + ldmia r11,{r8-r11} @ load Htbl[nhi] + eor r4,r8,r4,lsr#4 + ldrplb r8,[r0,r3] + eor r4,r4,r5,lsl#28 + eor r5,r9,r5,lsr#4 + ldrh r9,[sp,r14] + eor r5,r5,r6,lsl#28 + eor r6,r10,r6,lsr#4 + eor r6,r6,r7,lsl#28 + eorpl r12,r12,r8 + eor r7,r11,r7,lsr#4 + andpl r14,r12,#0xf0 + andpl r12,r12,#0x0f + eor r7,r7,r9,lsl#16 @ ^= rem_4bit[rem] + bpl .Linner + + ldr r3,[sp,#32] @ re-load r3/end + add r2,r2,#16 + mov r14,r4 +#if __ARM_ARCH__>=7 && defined(__ARMEL__) + rev r4,r4 + str r4,[r0,#12] +#elif defined(__ARMEB__) + str r4,[r0,#12] +#else + mov r9,r4,lsr#8 + strb r4,[r0,#12+3] + mov r10,r4,lsr#16 + strb r9,[r0,#12+2] + mov r11,r4,lsr#24 + strb r10,[r0,#12+1] + strb r11,[r0,#12] +#endif + cmp r2,r3 +#if __ARM_ARCH__>=7 && defined(__ARMEL__) + rev r5,r5 + str r5,[r0,#8] +#elif defined(__ARMEB__) + str r5,[r0,#8] +#else + mov r9,r5,lsr#8 + strb r5,[r0,#8+3] + mov r10,r5,lsr#16 + strb r9,[r0,#8+2] + mov r11,r5,lsr#24 + strb r10,[r0,#8+1] + strb r11,[r0,#8] +#endif + ldrneb r12,[r2,#15] +#if __ARM_ARCH__>=7 && defined(__ARMEL__) + rev r6,r6 + str r6,[r0,#4] +#elif defined(__ARMEB__) + str r6,[r0,#4] +#else + mov r9,r6,lsr#8 + strb r6,[r0,#4+3] + mov r10,r6,lsr#16 + strb r9,[r0,#4+2] + mov r11,r6,lsr#24 + strb r10,[r0,#4+1] + strb r11,[r0,#4] +#endif + +#if __ARM_ARCH__>=7 && defined(__ARMEL__) + rev r7,r7 + str r7,[r0,#0] +#elif defined(__ARMEB__) + str r7,[r0,#0] +#else + mov r9,r7,lsr#8 + strb r7,[r0,#0+3] + mov r10,r7,lsr#16 + strb r9,[r0,#0+2] + mov r11,r7,lsr#24 + strb r10,[r0,#0+1] + strb r11,[r0,#0] +#endif + + bne .Louter + + add sp,sp,#36 +#if __ARM_ARCH__>=5 + ldmia sp!,{r4-r11,pc} +#else + ldmia sp!,{r4-r11,lr} + tst lr,#1 + moveq pc,lr @ be binary compatible with V4, yet + .word 0xe12fff1e @ interoperable with Thumb ISA:-) +#endif +.size gcm_ghash_4bit,.-gcm_ghash_4bit + +.global gcm_gmult_4bit +.type gcm_gmult_4bit,%function +gcm_gmult_4bit: + stmdb sp!,{r4-r11,lr} + ldrb r12,[r0,#15] + b rem_4bit_get +.Lrem_4bit_got: + and r14,r12,#0xf0 + and r12,r12,#0x0f + mov r3,#14 + + add r7,r1,r12,lsl#4 + ldmia r7,{r4-r7} @ load Htbl[nlo] + ldrb r12,[r0,#14] + + add r11,r1,r14 + and r14,r4,#0xf @ rem + ldmia r11,{r8-r11} @ load Htbl[nhi] + add r14,r14,r14 + eor r4,r8,r4,lsr#4 + ldrh r8,[r2,r14] @ rem_4bit[rem] + eor r4,r4,r5,lsl#28 + eor r5,r9,r5,lsr#4 + eor r5,r5,r6,lsl#28 + eor r6,r10,r6,lsr#4 + eor r6,r6,r7,lsl#28 + eor r7,r11,r7,lsr#4 + and r14,r12,#0xf0 + eor r7,r7,r8,lsl#16 + and r12,r12,#0x0f + +.Loop: + add r11,r1,r12,lsl#4 + and r12,r4,#0xf @ rem + subs r3,r3,#1 + add r12,r12,r12 + ldmia r11,{r8-r11} @ load Htbl[nlo] + eor r4,r8,r4,lsr#4 + eor r4,r4,r5,lsl#28 + eor r5,r9,r5,lsr#4 + eor r5,r5,r6,lsl#28 + ldrh r8,[r2,r12] @ rem_4bit[rem] + eor r6,r10,r6,lsr#4 + ldrplb r12,[r0,r3] + eor r6,r6,r7,lsl#28 + eor r7,r11,r7,lsr#4 + + add r11,r1,r14 + and r14,r4,#0xf @ rem + eor r7,r7,r8,lsl#16 @ ^= rem_4bit[rem] + add r14,r14,r14 + ldmia r11,{r8-r11} @ load Htbl[nhi] + eor r4,r8,r4,lsr#4 + eor r4,r4,r5,lsl#28 + eor r5,r9,r5,lsr#4 + ldrh r8,[r2,r14] @ rem_4bit[rem] + eor r5,r5,r6,lsl#28 + eor r6,r10,r6,lsr#4 + eor r6,r6,r7,lsl#28 + eor r7,r11,r7,lsr#4 + andpl r14,r12,#0xf0 + andpl r12,r12,#0x0f + eor r7,r7,r8,lsl#16 @ ^= rem_4bit[rem] + bpl .Loop +#if __ARM_ARCH__>=7 && defined(__ARMEL__) + rev r4,r4 + str r4,[r0,#12] +#elif defined(__ARMEB__) + str r4,[r0,#12] +#else + mov r9,r4,lsr#8 + strb r4,[r0,#12+3] + mov r10,r4,lsr#16 + strb r9,[r0,#12+2] + mov r11,r4,lsr#24 + strb r10,[r0,#12+1] + strb r11,[r0,#12] +#endif + +#if __ARM_ARCH__>=7 && defined(__ARMEL__) + rev r5,r5 + str r5,[r0,#8] +#elif defined(__ARMEB__) + str r5,[r0,#8] +#else + mov r9,r5,lsr#8 + strb r5,[r0,#8+3] + mov r10,r5,lsr#16 + strb r9,[r0,#8+2] + mov r11,r5,lsr#24 + strb r10,[r0,#8+1] + strb r11,[r0,#8] +#endif + +#if __ARM_ARCH__>=7 && defined(__ARMEL__) + rev r6,r6 + str r6,[r0,#4] +#elif defined(__ARMEB__) + str r6,[r0,#4] +#else + mov r9,r6,lsr#8 + strb r6,[r0,#4+3] + mov r10,r6,lsr#16 + strb r9,[r0,#4+2] + mov r11,r6,lsr#24 + strb r10,[r0,#4+1] + strb r11,[r0,#4] +#endif + +#if __ARM_ARCH__>=7 && defined(__ARMEL__) + rev r7,r7 + str r7,[r0,#0] +#elif defined(__ARMEB__) + str r7,[r0,#0] +#else + mov r9,r7,lsr#8 + strb r7,[r0,#0+3] + mov r10,r7,lsr#16 + strb r9,[r0,#0+2] + mov r11,r7,lsr#24 + strb r10,[r0,#0+1] + strb r11,[r0,#0] +#endif + +#if __ARM_ARCH__>=5 + ldmia sp!,{r4-r11,pc} +#else + ldmia sp!,{r4-r11,lr} + tst lr,#1 + moveq pc,lr @ be binary compatible with V4, yet + .word 0xe12fff1e @ interoperable with Thumb ISA:-) +#endif +.size gcm_gmult_4bit,.-gcm_gmult_4bit +#if __ARM_MAX_ARCH__>=7 +.arch armv7-a +.fpu neon + +.global gcm_init_neon +.type gcm_init_neon,%function +.align 4 +gcm_init_neon: + vld1.64 d7,[r1,:64]! @ load H + vmov.i8 q8,#0xe1 + vld1.64 d6,[r1,:64] + vshl.i64 d17,#57 + vshr.u64 d16,#63 @ t0=0xc2....01 + vdup.8 q9,d7[7] + vshr.u64 d26,d6,#63 + vshr.s8 q9,#7 @ broadcast carry bit + vshl.i64 q3,q3,#1 + vand q8,q8,q9 + vorr d7,d26 @ H<<<=1 + veor q3,q3,q8 @ twisted H + vstmia r0,{q3} + + bx lr @ bx lr +.size gcm_init_neon,.-gcm_init_neon + +.global gcm_gmult_neon +.type gcm_gmult_neon,%function +.align 4 +gcm_gmult_neon: + vld1.64 d7,[r0,:64]! @ load Xi + vld1.64 d6,[r0,:64]! + vmov.i64 d29,#0x0000ffffffffffff + vldmia r1,{d26-d27} @ load twisted H + vmov.i64 d30,#0x00000000ffffffff +#ifdef __ARMEL__ + vrev64.8 q3,q3 +#endif + vmov.i64 d31,#0x000000000000ffff + veor d28,d26,d27 @ Karatsuba pre-processing + mov r3,#16 + b .Lgmult_neon +.size gcm_gmult_neon,.-gcm_gmult_neon + +.global gcm_ghash_neon +.type gcm_ghash_neon,%function +.align 4 +gcm_ghash_neon: + vld1.64 d1,[r0,:64]! @ load Xi + vld1.64 d0,[r0,:64]! + vmov.i64 d29,#0x0000ffffffffffff + vldmia r1,{d26-d27} @ load twisted H + vmov.i64 d30,#0x00000000ffffffff +#ifdef __ARMEL__ + vrev64.8 q0,q0 +#endif + vmov.i64 d31,#0x000000000000ffff + veor d28,d26,d27 @ Karatsuba pre-processing + +.Loop_neon: + vld1.64 d7,[r2]! @ load inp + vld1.64 d6,[r2]! +#ifdef __ARMEL__ + vrev64.8 q3,q3 +#endif + veor q3,q0 @ inp^=Xi +.Lgmult_neon: + vext.8 d16, d26, d26, #1 @ A1 + vmull.p8 q8, d16, d6 @ F = A1*B + vext.8 d0, d6, d6, #1 @ B1 + vmull.p8 q0, d26, d0 @ E = A*B1 + vext.8 d18, d26, d26, #2 @ A2 + vmull.p8 q9, d18, d6 @ H = A2*B + vext.8 d22, d6, d6, #2 @ B2 + vmull.p8 q11, d26, d22 @ G = A*B2 + vext.8 d20, d26, d26, #3 @ A3 + veor q8, q8, q0 @ L = E + F + vmull.p8 q10, d20, d6 @ J = A3*B + vext.8 d0, d6, d6, #3 @ B3 + veor q9, q9, q11 @ M = G + H + vmull.p8 q0, d26, d0 @ I = A*B3 + veor d16, d16, d17 @ t0 = (L) (P0 + P1) << 8 + vand d17, d17, d29 + vext.8 d22, d6, d6, #4 @ B4 + veor d18, d18, d19 @ t1 = (M) (P2 + P3) << 16 + vand d19, d19, d30 + vmull.p8 q11, d26, d22 @ K = A*B4 + veor q10, q10, q0 @ N = I + J + veor d16, d16, d17 + veor d18, d18, d19 + veor d20, d20, d21 @ t2 = (N) (P4 + P5) << 24 + vand d21, d21, d31 + vext.8 q8, q8, q8, #15 + veor d22, d22, d23 @ t3 = (K) (P6 + P7) << 32 + vmov.i64 d23, #0 + vext.8 q9, q9, q9, #14 + veor d20, d20, d21 + vmull.p8 q0, d26, d6 @ D = A*B + vext.8 q11, q11, q11, #12 + vext.8 q10, q10, q10, #13 + veor q8, q8, q9 + veor q10, q10, q11 + veor q0, q0, q8 + veor q0, q0, q10 + veor d6,d6,d7 @ Karatsuba pre-processing + vext.8 d16, d28, d28, #1 @ A1 + vmull.p8 q8, d16, d6 @ F = A1*B + vext.8 d2, d6, d6, #1 @ B1 + vmull.p8 q1, d28, d2 @ E = A*B1 + vext.8 d18, d28, d28, #2 @ A2 + vmull.p8 q9, d18, d6 @ H = A2*B + vext.8 d22, d6, d6, #2 @ B2 + vmull.p8 q11, d28, d22 @ G = A*B2 + vext.8 d20, d28, d28, #3 @ A3 + veor q8, q8, q1 @ L = E + F + vmull.p8 q10, d20, d6 @ J = A3*B + vext.8 d2, d6, d6, #3 @ B3 + veor q9, q9, q11 @ M = G + H + vmull.p8 q1, d28, d2 @ I = A*B3 + veor d16, d16, d17 @ t0 = (L) (P0 + P1) << 8 + vand d17, d17, d29 + vext.8 d22, d6, d6, #4 @ B4 + veor d18, d18, d19 @ t1 = (M) (P2 + P3) << 16 + vand d19, d19, d30 + vmull.p8 q11, d28, d22 @ K = A*B4 + veor q10, q10, q1 @ N = I + J + veor d16, d16, d17 + veor d18, d18, d19 + veor d20, d20, d21 @ t2 = (N) (P4 + P5) << 24 + vand d21, d21, d31 + vext.8 q8, q8, q8, #15 + veor d22, d22, d23 @ t3 = (K) (P6 + P7) << 32 + vmov.i64 d23, #0 + vext.8 q9, q9, q9, #14 + veor d20, d20, d21 + vmull.p8 q1, d28, d6 @ D = A*B + vext.8 q11, q11, q11, #12 + vext.8 q10, q10, q10, #13 + veor q8, q8, q9 + veor q10, q10, q11 + veor q1, q1, q8 + veor q1, q1, q10 + vext.8 d16, d27, d27, #1 @ A1 + vmull.p8 q8, d16, d7 @ F = A1*B + vext.8 d4, d7, d7, #1 @ B1 + vmull.p8 q2, d27, d4 @ E = A*B1 + vext.8 d18, d27, d27, #2 @ A2 + vmull.p8 q9, d18, d7 @ H = A2*B + vext.8 d22, d7, d7, #2 @ B2 + vmull.p8 q11, d27, d22 @ G = A*B2 + vext.8 d20, d27, d27, #3 @ A3 + veor q8, q8, q2 @ L = E + F + vmull.p8 q10, d20, d7 @ J = A3*B + vext.8 d4, d7, d7, #3 @ B3 + veor q9, q9, q11 @ M = G + H + vmull.p8 q2, d27, d4 @ I = A*B3 + veor d16, d16, d17 @ t0 = (L) (P0 + P1) << 8 + vand d17, d17, d29 + vext.8 d22, d7, d7, #4 @ B4 + veor d18, d18, d19 @ t1 = (M) (P2 + P3) << 16 + vand d19, d19, d30 + vmull.p8 q11, d27, d22 @ K = A*B4 + veor q10, q10, q2 @ N = I + J + veor d16, d16, d17 + veor d18, d18, d19 + veor d20, d20, d21 @ t2 = (N) (P4 + P5) << 24 + vand d21, d21, d31 + vext.8 q8, q8, q8, #15 + veor d22, d22, d23 @ t3 = (K) (P6 + P7) << 32 + vmov.i64 d23, #0 + vext.8 q9, q9, q9, #14 + veor d20, d20, d21 + vmull.p8 q2, d27, d7 @ D = A*B + vext.8 q11, q11, q11, #12 + vext.8 q10, q10, q10, #13 + veor q8, q8, q9 + veor q10, q10, q11 + veor q2, q2, q8 + veor q2, q2, q10 + veor q1,q1,q0 @ Karatsuba post-processing + veor q1,q1,q2 + veor d1,d1,d2 + veor d4,d4,d3 @ Xh|Xl - 256-bit result + + @ equivalent of reduction_avx from ghash-x86_64.pl + vshl.i64 q9,q0,#57 @ 1st phase + vshl.i64 q10,q0,#62 + veor q10,q10,q9 @ + vshl.i64 q9,q0,#63 + veor q10, q10, q9 @ + veor d1,d1,d20 @ + veor d4,d4,d21 + + vshr.u64 q10,q0,#1 @ 2nd phase + veor q2,q2,q0 + veor q0,q0,q10 @ + vshr.u64 q10,q10,#6 + vshr.u64 q0,q0,#1 @ + veor q0,q0,q2 @ + veor q0,q0,q10 @ + + subs r3,#16 + bne .Loop_neon + +#ifdef __ARMEL__ + vrev64.8 q0,q0 +#endif + sub r0,#16 + vst1.64 d1,[r0,:64]! @ write out Xi + vst1.64 d0,[r0,:64] + + bx lr @ bx lr +.size gcm_ghash_neon,.-gcm_ghash_neon +#endif +.asciz "GHASH for ARMv4/NEON, CRYPTOGAMS by " +.align 2 diff --git a/deps/openssl/asm_obsolete/arm-void-gas/modes/ghashv8-armx.S b/deps/openssl/asm_obsolete/arm-void-gas/modes/ghashv8-armx.S new file mode 100644 index 00000000000000..570d9175c47605 --- /dev/null +++ b/deps/openssl/asm_obsolete/arm-void-gas/modes/ghashv8-armx.S @@ -0,0 +1,116 @@ +#include "arm_arch.h" + +.text +.fpu neon +.code 32 +.global gcm_init_v8 +.type gcm_init_v8,%function +.align 4 +gcm_init_v8: + vld1.64 {q9},[r1] @ load H + vmov.i8 q8,#0xe1 + vext.8 q3,q9,q9,#8 + vshl.i64 q8,q8,#57 + vshr.u64 q10,q8,#63 + vext.8 q8,q10,q8,#8 @ t0=0xc2....01 + vdup.32 q9,d18[1] + vshr.u64 q11,q3,#63 + vshr.s32 q9,q9,#31 @ broadcast carry bit + vand q11,q11,q8 + vshl.i64 q3,q3,#1 + vext.8 q11,q11,q11,#8 + vand q8,q8,q9 + vorr q3,q3,q11 @ H<<<=1 + veor q3,q3,q8 @ twisted H + vst1.64 {q3},[r0] + + bx lr +.size gcm_init_v8,.-gcm_init_v8 + +.global gcm_gmult_v8 +.type gcm_gmult_v8,%function +.align 4 +gcm_gmult_v8: + vld1.64 {q9},[r0] @ load Xi + vmov.i8 q11,#0xe1 + vld1.64 {q12},[r1] @ load twisted H + vshl.u64 q11,q11,#57 +#ifndef __ARMEB__ + vrev64.8 q9,q9 +#endif + vext.8 q13,q12,q12,#8 + mov r3,#0 + vext.8 q3,q9,q9,#8 + mov r12,#0 + veor q13,q13,q12 @ Karatsuba pre-processing + mov r2,r0 + b .Lgmult_v8 +.size gcm_gmult_v8,.-gcm_gmult_v8 + +.global gcm_ghash_v8 +.type gcm_ghash_v8,%function +.align 4 +gcm_ghash_v8: + vld1.64 {q0},[r0] @ load [rotated] Xi + subs r3,r3,#16 + vmov.i8 q11,#0xe1 + mov r12,#16 + vld1.64 {q12},[r1] @ load twisted H + moveq r12,#0 + vext.8 q0,q0,q0,#8 + vshl.u64 q11,q11,#57 + vld1.64 {q9},[r2],r12 @ load [rotated] inp + vext.8 q13,q12,q12,#8 +#ifndef __ARMEB__ + vrev64.8 q0,q0 + vrev64.8 q9,q9 +#endif + veor q13,q13,q12 @ Karatsuba pre-processing + vext.8 q3,q9,q9,#8 + b .Loop_v8 + +.align 4 +.Loop_v8: + vext.8 q10,q0,q0,#8 + veor q3,q3,q0 @ inp^=Xi + veor q9,q9,q10 @ q9 is rotated inp^Xi + +.Lgmult_v8: + .byte 0x86,0x0e,0xa8,0xf2 @ pmull q0,q12,q3 @ H.lo·Xi.lo + veor q9,q9,q3 @ Karatsuba pre-processing + .byte 0x87,0x4e,0xa9,0xf2 @ pmull2 q2,q12,q3 @ H.hi·Xi.hi + subs r3,r3,#16 + .byte 0xa2,0x2e,0xaa,0xf2 @ pmull q1,q13,q9 @ (H.lo+H.hi)·(Xi.lo+Xi.hi) + moveq r12,#0 + + vext.8 q9,q0,q2,#8 @ Karatsuba post-processing + veor q10,q0,q2 + veor q1,q1,q9 + vld1.64 {q9},[r2],r12 @ load [rotated] inp + veor q1,q1,q10 + .byte 0x26,0x4e,0xe0,0xf2 @ pmull q10,q0,q11 @ 1st phase + + vmov d4,d3 @ Xh|Xm - 256-bit result + vmov d3,d0 @ Xm is rotated Xl +#ifndef __ARMEB__ + vrev64.8 q9,q9 +#endif + veor q0,q1,q10 + vext.8 q3,q9,q9,#8 + + vext.8 q10,q0,q0,#8 @ 2nd phase + .byte 0x26,0x0e,0xa0,0xf2 @ pmull q0,q0,q11 + veor q10,q10,q2 + veor q0,q0,q10 + bhs .Loop_v8 + +#ifndef __ARMEB__ + vrev64.8 q0,q0 +#endif + vext.8 q0,q0,q0,#8 + vst1.64 {q0},[r0] @ write out Xi + + bx lr +.size gcm_ghash_v8,.-gcm_ghash_v8 +.asciz "GHASH for ARMv8, CRYPTOGAMS by " +.align 2 diff --git a/deps/openssl/asm_obsolete/arm-void-gas/sha/sha1-armv4-large.S b/deps/openssl/asm_obsolete/arm-void-gas/sha/sha1-armv4-large.S new file mode 100644 index 00000000000000..b0893359cac772 --- /dev/null +++ b/deps/openssl/asm_obsolete/arm-void-gas/sha/sha1-armv4-large.S @@ -0,0 +1,1455 @@ +#include "arm_arch.h" + +.text +.code 32 + +.global sha1_block_data_order +.type sha1_block_data_order,%function + +.align 5 +sha1_block_data_order: +#if __ARM_MAX_ARCH__>=7 + sub r3,pc,#8 @ sha1_block_data_order + ldr r12,.LOPENSSL_armcap + ldr r12,[r3,r12] @ OPENSSL_armcap_P + tst r12,#ARMV8_SHA1 + bne .LARMv8 + tst r12,#ARMV7_NEON + bne .LNEON +#endif + stmdb sp!,{r4-r12,lr} + add r2,r1,r2,lsl#6 @ r2 to point at the end of r1 + ldmia r0,{r3,r4,r5,r6,r7} +.Lloop: + ldr r8,.LK_00_19 + mov r14,sp + sub sp,sp,#15*4 + mov r5,r5,ror#30 + mov r6,r6,ror#30 + mov r7,r7,ror#30 @ [6] +.L_00_15: +#if __ARM_ARCH__<7 + ldrb r10,[r1,#2] + ldrb r9,[r1,#3] + ldrb r11,[r1,#1] + add r7,r8,r7,ror#2 @ E+=K_00_19 + ldrb r12,[r1],#4 + orr r9,r9,r10,lsl#8 + eor r10,r5,r6 @ F_xx_xx + orr r9,r9,r11,lsl#16 + add r7,r7,r3,ror#27 @ E+=ROR(A,27) + orr r9,r9,r12,lsl#24 +#else + ldr r9,[r1],#4 @ handles unaligned + add r7,r8,r7,ror#2 @ E+=K_00_19 + eor r10,r5,r6 @ F_xx_xx + add r7,r7,r3,ror#27 @ E+=ROR(A,27) +#ifdef __ARMEL__ + rev r9,r9 @ byte swap +#endif +#endif + and r10,r4,r10,ror#2 + add r7,r7,r9 @ E+=X[i] + eor r10,r10,r6,ror#2 @ F_00_19(B,C,D) + str r9,[r14,#-4]! + add r7,r7,r10 @ E+=F_00_19(B,C,D) +#if __ARM_ARCH__<7 + ldrb r10,[r1,#2] + ldrb r9,[r1,#3] + ldrb r11,[r1,#1] + add r6,r8,r6,ror#2 @ E+=K_00_19 + ldrb r12,[r1],#4 + orr r9,r9,r10,lsl#8 + eor r10,r4,r5 @ F_xx_xx + orr r9,r9,r11,lsl#16 + add r6,r6,r7,ror#27 @ E+=ROR(A,27) + orr r9,r9,r12,lsl#24 +#else + ldr r9,[r1],#4 @ handles unaligned + add r6,r8,r6,ror#2 @ E+=K_00_19 + eor r10,r4,r5 @ F_xx_xx + add r6,r6,r7,ror#27 @ E+=ROR(A,27) +#ifdef __ARMEL__ + rev r9,r9 @ byte swap +#endif +#endif + and r10,r3,r10,ror#2 + add r6,r6,r9 @ E+=X[i] + eor r10,r10,r5,ror#2 @ F_00_19(B,C,D) + str r9,[r14,#-4]! + add r6,r6,r10 @ E+=F_00_19(B,C,D) +#if __ARM_ARCH__<7 + ldrb r10,[r1,#2] + ldrb r9,[r1,#3] + ldrb r11,[r1,#1] + add r5,r8,r5,ror#2 @ E+=K_00_19 + ldrb r12,[r1],#4 + orr r9,r9,r10,lsl#8 + eor r10,r3,r4 @ F_xx_xx + orr r9,r9,r11,lsl#16 + add r5,r5,r6,ror#27 @ E+=ROR(A,27) + orr r9,r9,r12,lsl#24 +#else + ldr r9,[r1],#4 @ handles unaligned + add r5,r8,r5,ror#2 @ E+=K_00_19 + eor r10,r3,r4 @ F_xx_xx + add r5,r5,r6,ror#27 @ E+=ROR(A,27) +#ifdef __ARMEL__ + rev r9,r9 @ byte swap +#endif +#endif + and r10,r7,r10,ror#2 + add r5,r5,r9 @ E+=X[i] + eor r10,r10,r4,ror#2 @ F_00_19(B,C,D) + str r9,[r14,#-4]! + add r5,r5,r10 @ E+=F_00_19(B,C,D) +#if __ARM_ARCH__<7 + ldrb r10,[r1,#2] + ldrb r9,[r1,#3] + ldrb r11,[r1,#1] + add r4,r8,r4,ror#2 @ E+=K_00_19 + ldrb r12,[r1],#4 + orr r9,r9,r10,lsl#8 + eor r10,r7,r3 @ F_xx_xx + orr r9,r9,r11,lsl#16 + add r4,r4,r5,ror#27 @ E+=ROR(A,27) + orr r9,r9,r12,lsl#24 +#else + ldr r9,[r1],#4 @ handles unaligned + add r4,r8,r4,ror#2 @ E+=K_00_19 + eor r10,r7,r3 @ F_xx_xx + add r4,r4,r5,ror#27 @ E+=ROR(A,27) +#ifdef __ARMEL__ + rev r9,r9 @ byte swap +#endif +#endif + and r10,r6,r10,ror#2 + add r4,r4,r9 @ E+=X[i] + eor r10,r10,r3,ror#2 @ F_00_19(B,C,D) + str r9,[r14,#-4]! + add r4,r4,r10 @ E+=F_00_19(B,C,D) +#if __ARM_ARCH__<7 + ldrb r10,[r1,#2] + ldrb r9,[r1,#3] + ldrb r11,[r1,#1] + add r3,r8,r3,ror#2 @ E+=K_00_19 + ldrb r12,[r1],#4 + orr r9,r9,r10,lsl#8 + eor r10,r6,r7 @ F_xx_xx + orr r9,r9,r11,lsl#16 + add r3,r3,r4,ror#27 @ E+=ROR(A,27) + orr r9,r9,r12,lsl#24 +#else + ldr r9,[r1],#4 @ handles unaligned + add r3,r8,r3,ror#2 @ E+=K_00_19 + eor r10,r6,r7 @ F_xx_xx + add r3,r3,r4,ror#27 @ E+=ROR(A,27) +#ifdef __ARMEL__ + rev r9,r9 @ byte swap +#endif +#endif + and r10,r5,r10,ror#2 + add r3,r3,r9 @ E+=X[i] + eor r10,r10,r7,ror#2 @ F_00_19(B,C,D) + str r9,[r14,#-4]! + add r3,r3,r10 @ E+=F_00_19(B,C,D) + teq r14,sp + bne .L_00_15 @ [((11+4)*5+2)*3] + sub sp,sp,#25*4 +#if __ARM_ARCH__<7 + ldrb r10,[r1,#2] + ldrb r9,[r1,#3] + ldrb r11,[r1,#1] + add r7,r8,r7,ror#2 @ E+=K_00_19 + ldrb r12,[r1],#4 + orr r9,r9,r10,lsl#8 + eor r10,r5,r6 @ F_xx_xx + orr r9,r9,r11,lsl#16 + add r7,r7,r3,ror#27 @ E+=ROR(A,27) + orr r9,r9,r12,lsl#24 +#else + ldr r9,[r1],#4 @ handles unaligned + add r7,r8,r7,ror#2 @ E+=K_00_19 + eor r10,r5,r6 @ F_xx_xx + add r7,r7,r3,ror#27 @ E+=ROR(A,27) +#ifdef __ARMEL__ + rev r9,r9 @ byte swap +#endif +#endif + and r10,r4,r10,ror#2 + add r7,r7,r9 @ E+=X[i] + eor r10,r10,r6,ror#2 @ F_00_19(B,C,D) + str r9,[r14,#-4]! + add r7,r7,r10 @ E+=F_00_19(B,C,D) + ldr r9,[r14,#15*4] + ldr r10,[r14,#13*4] + ldr r11,[r14,#7*4] + add r6,r8,r6,ror#2 @ E+=K_xx_xx + ldr r12,[r14,#2*4] + eor r9,r9,r10 + eor r11,r11,r12 @ 1 cycle stall + eor r10,r4,r5 @ F_xx_xx + mov r9,r9,ror#31 + add r6,r6,r7,ror#27 @ E+=ROR(A,27) + eor r9,r9,r11,ror#31 + str r9,[r14,#-4]! + and r10,r3,r10,ror#2 @ F_xx_xx + @ F_xx_xx + add r6,r6,r9 @ E+=X[i] + eor r10,r10,r5,ror#2 @ F_00_19(B,C,D) + add r6,r6,r10 @ E+=F_00_19(B,C,D) + ldr r9,[r14,#15*4] + ldr r10,[r14,#13*4] + ldr r11,[r14,#7*4] + add r5,r8,r5,ror#2 @ E+=K_xx_xx + ldr r12,[r14,#2*4] + eor r9,r9,r10 + eor r11,r11,r12 @ 1 cycle stall + eor r10,r3,r4 @ F_xx_xx + mov r9,r9,ror#31 + add r5,r5,r6,ror#27 @ E+=ROR(A,27) + eor r9,r9,r11,ror#31 + str r9,[r14,#-4]! + and r10,r7,r10,ror#2 @ F_xx_xx + @ F_xx_xx + add r5,r5,r9 @ E+=X[i] + eor r10,r10,r4,ror#2 @ F_00_19(B,C,D) + add r5,r5,r10 @ E+=F_00_19(B,C,D) + ldr r9,[r14,#15*4] + ldr r10,[r14,#13*4] + ldr r11,[r14,#7*4] + add r4,r8,r4,ror#2 @ E+=K_xx_xx + ldr r12,[r14,#2*4] + eor r9,r9,r10 + eor r11,r11,r12 @ 1 cycle stall + eor r10,r7,r3 @ F_xx_xx + mov r9,r9,ror#31 + add r4,r4,r5,ror#27 @ E+=ROR(A,27) + eor r9,r9,r11,ror#31 + str r9,[r14,#-4]! + and r10,r6,r10,ror#2 @ F_xx_xx + @ F_xx_xx + add r4,r4,r9 @ E+=X[i] + eor r10,r10,r3,ror#2 @ F_00_19(B,C,D) + add r4,r4,r10 @ E+=F_00_19(B,C,D) + ldr r9,[r14,#15*4] + ldr r10,[r14,#13*4] + ldr r11,[r14,#7*4] + add r3,r8,r3,ror#2 @ E+=K_xx_xx + ldr r12,[r14,#2*4] + eor r9,r9,r10 + eor r11,r11,r12 @ 1 cycle stall + eor r10,r6,r7 @ F_xx_xx + mov r9,r9,ror#31 + add r3,r3,r4,ror#27 @ E+=ROR(A,27) + eor r9,r9,r11,ror#31 + str r9,[r14,#-4]! + and r10,r5,r10,ror#2 @ F_xx_xx + @ F_xx_xx + add r3,r3,r9 @ E+=X[i] + eor r10,r10,r7,ror#2 @ F_00_19(B,C,D) + add r3,r3,r10 @ E+=F_00_19(B,C,D) + + ldr r8,.LK_20_39 @ [+15+16*4] + cmn sp,#0 @ [+3], clear carry to denote 20_39 +.L_20_39_or_60_79: + ldr r9,[r14,#15*4] + ldr r10,[r14,#13*4] + ldr r11,[r14,#7*4] + add r7,r8,r7,ror#2 @ E+=K_xx_xx + ldr r12,[r14,#2*4] + eor r9,r9,r10 + eor r11,r11,r12 @ 1 cycle stall + eor r10,r5,r6 @ F_xx_xx + mov r9,r9,ror#31 + add r7,r7,r3,ror#27 @ E+=ROR(A,27) + eor r9,r9,r11,ror#31 + str r9,[r14,#-4]! + eor r10,r4,r10,ror#2 @ F_xx_xx + @ F_xx_xx + add r7,r7,r9 @ E+=X[i] + add r7,r7,r10 @ E+=F_20_39(B,C,D) + ldr r9,[r14,#15*4] + ldr r10,[r14,#13*4] + ldr r11,[r14,#7*4] + add r6,r8,r6,ror#2 @ E+=K_xx_xx + ldr r12,[r14,#2*4] + eor r9,r9,r10 + eor r11,r11,r12 @ 1 cycle stall + eor r10,r4,r5 @ F_xx_xx + mov r9,r9,ror#31 + add r6,r6,r7,ror#27 @ E+=ROR(A,27) + eor r9,r9,r11,ror#31 + str r9,[r14,#-4]! + eor r10,r3,r10,ror#2 @ F_xx_xx + @ F_xx_xx + add r6,r6,r9 @ E+=X[i] + add r6,r6,r10 @ E+=F_20_39(B,C,D) + ldr r9,[r14,#15*4] + ldr r10,[r14,#13*4] + ldr r11,[r14,#7*4] + add r5,r8,r5,ror#2 @ E+=K_xx_xx + ldr r12,[r14,#2*4] + eor r9,r9,r10 + eor r11,r11,r12 @ 1 cycle stall + eor r10,r3,r4 @ F_xx_xx + mov r9,r9,ror#31 + add r5,r5,r6,ror#27 @ E+=ROR(A,27) + eor r9,r9,r11,ror#31 + str r9,[r14,#-4]! + eor r10,r7,r10,ror#2 @ F_xx_xx + @ F_xx_xx + add r5,r5,r9 @ E+=X[i] + add r5,r5,r10 @ E+=F_20_39(B,C,D) + ldr r9,[r14,#15*4] + ldr r10,[r14,#13*4] + ldr r11,[r14,#7*4] + add r4,r8,r4,ror#2 @ E+=K_xx_xx + ldr r12,[r14,#2*4] + eor r9,r9,r10 + eor r11,r11,r12 @ 1 cycle stall + eor r10,r7,r3 @ F_xx_xx + mov r9,r9,ror#31 + add r4,r4,r5,ror#27 @ E+=ROR(A,27) + eor r9,r9,r11,ror#31 + str r9,[r14,#-4]! + eor r10,r6,r10,ror#2 @ F_xx_xx + @ F_xx_xx + add r4,r4,r9 @ E+=X[i] + add r4,r4,r10 @ E+=F_20_39(B,C,D) + ldr r9,[r14,#15*4] + ldr r10,[r14,#13*4] + ldr r11,[r14,#7*4] + add r3,r8,r3,ror#2 @ E+=K_xx_xx + ldr r12,[r14,#2*4] + eor r9,r9,r10 + eor r11,r11,r12 @ 1 cycle stall + eor r10,r6,r7 @ F_xx_xx + mov r9,r9,ror#31 + add r3,r3,r4,ror#27 @ E+=ROR(A,27) + eor r9,r9,r11,ror#31 + str r9,[r14,#-4]! + eor r10,r5,r10,ror#2 @ F_xx_xx + @ F_xx_xx + add r3,r3,r9 @ E+=X[i] + add r3,r3,r10 @ E+=F_20_39(B,C,D) + teq r14,sp @ preserve carry + bne .L_20_39_or_60_79 @ [+((12+3)*5+2)*4] + bcs .L_done @ [+((12+3)*5+2)*4], spare 300 bytes + + ldr r8,.LK_40_59 + sub sp,sp,#20*4 @ [+2] +.L_40_59: + ldr r9,[r14,#15*4] + ldr r10,[r14,#13*4] + ldr r11,[r14,#7*4] + add r7,r8,r7,ror#2 @ E+=K_xx_xx + ldr r12,[r14,#2*4] + eor r9,r9,r10 + eor r11,r11,r12 @ 1 cycle stall + eor r10,r5,r6 @ F_xx_xx + mov r9,r9,ror#31 + add r7,r7,r3,ror#27 @ E+=ROR(A,27) + eor r9,r9,r11,ror#31 + str r9,[r14,#-4]! + and r10,r4,r10,ror#2 @ F_xx_xx + and r11,r5,r6 @ F_xx_xx + add r7,r7,r9 @ E+=X[i] + add r7,r7,r10 @ E+=F_40_59(B,C,D) + add r7,r7,r11,ror#2 + ldr r9,[r14,#15*4] + ldr r10,[r14,#13*4] + ldr r11,[r14,#7*4] + add r6,r8,r6,ror#2 @ E+=K_xx_xx + ldr r12,[r14,#2*4] + eor r9,r9,r10 + eor r11,r11,r12 @ 1 cycle stall + eor r10,r4,r5 @ F_xx_xx + mov r9,r9,ror#31 + add r6,r6,r7,ror#27 @ E+=ROR(A,27) + eor r9,r9,r11,ror#31 + str r9,[r14,#-4]! + and r10,r3,r10,ror#2 @ F_xx_xx + and r11,r4,r5 @ F_xx_xx + add r6,r6,r9 @ E+=X[i] + add r6,r6,r10 @ E+=F_40_59(B,C,D) + add r6,r6,r11,ror#2 + ldr r9,[r14,#15*4] + ldr r10,[r14,#13*4] + ldr r11,[r14,#7*4] + add r5,r8,r5,ror#2 @ E+=K_xx_xx + ldr r12,[r14,#2*4] + eor r9,r9,r10 + eor r11,r11,r12 @ 1 cycle stall + eor r10,r3,r4 @ F_xx_xx + mov r9,r9,ror#31 + add r5,r5,r6,ror#27 @ E+=ROR(A,27) + eor r9,r9,r11,ror#31 + str r9,[r14,#-4]! + and r10,r7,r10,ror#2 @ F_xx_xx + and r11,r3,r4 @ F_xx_xx + add r5,r5,r9 @ E+=X[i] + add r5,r5,r10 @ E+=F_40_59(B,C,D) + add r5,r5,r11,ror#2 + ldr r9,[r14,#15*4] + ldr r10,[r14,#13*4] + ldr r11,[r14,#7*4] + add r4,r8,r4,ror#2 @ E+=K_xx_xx + ldr r12,[r14,#2*4] + eor r9,r9,r10 + eor r11,r11,r12 @ 1 cycle stall + eor r10,r7,r3 @ F_xx_xx + mov r9,r9,ror#31 + add r4,r4,r5,ror#27 @ E+=ROR(A,27) + eor r9,r9,r11,ror#31 + str r9,[r14,#-4]! + and r10,r6,r10,ror#2 @ F_xx_xx + and r11,r7,r3 @ F_xx_xx + add r4,r4,r9 @ E+=X[i] + add r4,r4,r10 @ E+=F_40_59(B,C,D) + add r4,r4,r11,ror#2 + ldr r9,[r14,#15*4] + ldr r10,[r14,#13*4] + ldr r11,[r14,#7*4] + add r3,r8,r3,ror#2 @ E+=K_xx_xx + ldr r12,[r14,#2*4] + eor r9,r9,r10 + eor r11,r11,r12 @ 1 cycle stall + eor r10,r6,r7 @ F_xx_xx + mov r9,r9,ror#31 + add r3,r3,r4,ror#27 @ E+=ROR(A,27) + eor r9,r9,r11,ror#31 + str r9,[r14,#-4]! + and r10,r5,r10,ror#2 @ F_xx_xx + and r11,r6,r7 @ F_xx_xx + add r3,r3,r9 @ E+=X[i] + add r3,r3,r10 @ E+=F_40_59(B,C,D) + add r3,r3,r11,ror#2 + teq r14,sp + bne .L_40_59 @ [+((12+5)*5+2)*4] + + ldr r8,.LK_60_79 + sub sp,sp,#20*4 + cmp sp,#0 @ set carry to denote 60_79 + b .L_20_39_or_60_79 @ [+4], spare 300 bytes +.L_done: + add sp,sp,#80*4 @ "deallocate" stack frame + ldmia r0,{r8,r9,r10,r11,r12} + add r3,r8,r3 + add r4,r9,r4 + add r5,r10,r5,ror#2 + add r6,r11,r6,ror#2 + add r7,r12,r7,ror#2 + stmia r0,{r3,r4,r5,r6,r7} + teq r1,r2 + bne .Lloop @ [+18], total 1307 + +#if __ARM_ARCH__>=5 + ldmia sp!,{r4-r12,pc} +#else + ldmia sp!,{r4-r12,lr} + tst lr,#1 + moveq pc,lr @ be binary compatible with V4, yet + .word 0xe12fff1e @ interoperable with Thumb ISA:-) +#endif +.size sha1_block_data_order,.-sha1_block_data_order + +.align 5 +.LK_00_19: .word 0x5a827999 +.LK_20_39: .word 0x6ed9eba1 +.LK_40_59: .word 0x8f1bbcdc +.LK_60_79: .word 0xca62c1d6 +#if __ARM_MAX_ARCH__>=7 +.LOPENSSL_armcap: +.word OPENSSL_armcap_P-sha1_block_data_order +#endif +.asciz "SHA1 block transform for ARMv4/NEON/ARMv8, CRYPTOGAMS by " +.align 5 +#if __ARM_MAX_ARCH__>=7 +.arch armv7-a +.fpu neon + +.type sha1_block_data_order_neon,%function +.align 4 +sha1_block_data_order_neon: +.LNEON: + stmdb sp!,{r4-r12,lr} + add r2,r1,r2,lsl#6 @ r2 to point at the end of r1 + @ dmb @ errata #451034 on early Cortex A8 + @ vstmdb sp!,{d8-d15} @ ABI specification says so + mov r14,sp + sub sp,sp,#64 @ alloca + adr r8,.LK_00_19 + bic sp,sp,#15 @ align for 128-bit stores + + ldmia r0,{r3,r4,r5,r6,r7} @ load context + mov r12,sp + + vld1.8 {q0-q1},[r1]! @ handles unaligned + veor q15,q15,q15 + vld1.8 {q2-q3},[r1]! + vld1.32 {d28[],d29[]},[r8,:32]! @ load K_00_19 + vrev32.8 q0,q0 @ yes, even on + vrev32.8 q1,q1 @ big-endian... + vrev32.8 q2,q2 + vadd.i32 q8,q0,q14 + vrev32.8 q3,q3 + vadd.i32 q9,q1,q14 + vst1.32 {q8},[r12,:128]! + vadd.i32 q10,q2,q14 + vst1.32 {q9},[r12,:128]! + vst1.32 {q10},[r12,:128]! + ldr r9,[sp] @ big RAW stall + +.Loop_neon: + vext.8 q8,q0,q1,#8 + bic r10,r6,r4 + add r7,r7,r9 + and r11,r5,r4 + vadd.i32 q13,q3,q14 + ldr r9,[sp,#4] + add r7,r7,r3,ror#27 + vext.8 q12,q3,q15,#4 + eor r11,r11,r10 + mov r4,r4,ror#2 + add r7,r7,r11 + veor q8,q8,q0 + bic r10,r5,r3 + add r6,r6,r9 + veor q12,q12,q2 + and r11,r4,r3 + ldr r9,[sp,#8] + veor q12,q12,q8 + add r6,r6,r7,ror#27 + eor r11,r11,r10 + vst1.32 {q13},[r12,:128]! + sub r12,r12,#64 + mov r3,r3,ror#2 + add r6,r6,r11 + vext.8 q13,q15,q12,#4 + bic r10,r4,r7 + add r5,r5,r9 + vadd.i32 q8,q12,q12 + and r11,r3,r7 + ldr r9,[sp,#12] + vsri.32 q8,q12,#31 + add r5,r5,r6,ror#27 + eor r11,r11,r10 + mov r7,r7,ror#2 + vshr.u32 q12,q13,#30 + add r5,r5,r11 + bic r10,r3,r6 + vshl.u32 q13,q13,#2 + add r4,r4,r9 + and r11,r7,r6 + veor q8,q8,q12 + ldr r9,[sp,#16] + add r4,r4,r5,ror#27 + veor q8,q8,q13 + eor r11,r11,r10 + mov r6,r6,ror#2 + add r4,r4,r11 + vext.8 q9,q1,q2,#8 + bic r10,r7,r5 + add r3,r3,r9 + and r11,r6,r5 + vadd.i32 q13,q8,q14 + ldr r9,[sp,#20] + vld1.32 {d28[],d29[]},[r8,:32]! + add r3,r3,r4,ror#27 + vext.8 q12,q8,q15,#4 + eor r11,r11,r10 + mov r5,r5,ror#2 + add r3,r3,r11 + veor q9,q9,q1 + bic r10,r6,r4 + add r7,r7,r9 + veor q12,q12,q3 + and r11,r5,r4 + ldr r9,[sp,#24] + veor q12,q12,q9 + add r7,r7,r3,ror#27 + eor r11,r11,r10 + vst1.32 {q13},[r12,:128]! + mov r4,r4,ror#2 + add r7,r7,r11 + vext.8 q13,q15,q12,#4 + bic r10,r5,r3 + add r6,r6,r9 + vadd.i32 q9,q12,q12 + and r11,r4,r3 + ldr r9,[sp,#28] + vsri.32 q9,q12,#31 + add r6,r6,r7,ror#27 + eor r11,r11,r10 + mov r3,r3,ror#2 + vshr.u32 q12,q13,#30 + add r6,r6,r11 + bic r10,r4,r7 + vshl.u32 q13,q13,#2 + add r5,r5,r9 + and r11,r3,r7 + veor q9,q9,q12 + ldr r9,[sp,#32] + add r5,r5,r6,ror#27 + veor q9,q9,q13 + eor r11,r11,r10 + mov r7,r7,ror#2 + add r5,r5,r11 + vext.8 q10,q2,q3,#8 + bic r10,r3,r6 + add r4,r4,r9 + and r11,r7,r6 + vadd.i32 q13,q9,q14 + ldr r9,[sp,#36] + add r4,r4,r5,ror#27 + vext.8 q12,q9,q15,#4 + eor r11,r11,r10 + mov r6,r6,ror#2 + add r4,r4,r11 + veor q10,q10,q2 + bic r10,r7,r5 + add r3,r3,r9 + veor q12,q12,q8 + and r11,r6,r5 + ldr r9,[sp,#40] + veor q12,q12,q10 + add r3,r3,r4,ror#27 + eor r11,r11,r10 + vst1.32 {q13},[r12,:128]! + mov r5,r5,ror#2 + add r3,r3,r11 + vext.8 q13,q15,q12,#4 + bic r10,r6,r4 + add r7,r7,r9 + vadd.i32 q10,q12,q12 + and r11,r5,r4 + ldr r9,[sp,#44] + vsri.32 q10,q12,#31 + add r7,r7,r3,ror#27 + eor r11,r11,r10 + mov r4,r4,ror#2 + vshr.u32 q12,q13,#30 + add r7,r7,r11 + bic r10,r5,r3 + vshl.u32 q13,q13,#2 + add r6,r6,r9 + and r11,r4,r3 + veor q10,q10,q12 + ldr r9,[sp,#48] + add r6,r6,r7,ror#27 + veor q10,q10,q13 + eor r11,r11,r10 + mov r3,r3,ror#2 + add r6,r6,r11 + vext.8 q11,q3,q8,#8 + bic r10,r4,r7 + add r5,r5,r9 + and r11,r3,r7 + vadd.i32 q13,q10,q14 + ldr r9,[sp,#52] + add r5,r5,r6,ror#27 + vext.8 q12,q10,q15,#4 + eor r11,r11,r10 + mov r7,r7,ror#2 + add r5,r5,r11 + veor q11,q11,q3 + bic r10,r3,r6 + add r4,r4,r9 + veor q12,q12,q9 + and r11,r7,r6 + ldr r9,[sp,#56] + veor q12,q12,q11 + add r4,r4,r5,ror#27 + eor r11,r11,r10 + vst1.32 {q13},[r12,:128]! + mov r6,r6,ror#2 + add r4,r4,r11 + vext.8 q13,q15,q12,#4 + bic r10,r7,r5 + add r3,r3,r9 + vadd.i32 q11,q12,q12 + and r11,r6,r5 + ldr r9,[sp,#60] + vsri.32 q11,q12,#31 + add r3,r3,r4,ror#27 + eor r11,r11,r10 + mov r5,r5,ror#2 + vshr.u32 q12,q13,#30 + add r3,r3,r11 + bic r10,r6,r4 + vshl.u32 q13,q13,#2 + add r7,r7,r9 + and r11,r5,r4 + veor q11,q11,q12 + ldr r9,[sp,#0] + add r7,r7,r3,ror#27 + veor q11,q11,q13 + eor r11,r11,r10 + mov r4,r4,ror#2 + add r7,r7,r11 + vext.8 q12,q10,q11,#8 + bic r10,r5,r3 + add r6,r6,r9 + and r11,r4,r3 + veor q0,q0,q8 + ldr r9,[sp,#4] + add r6,r6,r7,ror#27 + veor q0,q0,q1 + eor r11,r11,r10 + mov r3,r3,ror#2 + vadd.i32 q13,q11,q14 + add r6,r6,r11 + bic r10,r4,r7 + veor q12,q12,q0 + add r5,r5,r9 + and r11,r3,r7 + vshr.u32 q0,q12,#30 + ldr r9,[sp,#8] + add r5,r5,r6,ror#27 + vst1.32 {q13},[r12,:128]! + sub r12,r12,#64 + eor r11,r11,r10 + mov r7,r7,ror#2 + vsli.32 q0,q12,#2 + add r5,r5,r11 + bic r10,r3,r6 + add r4,r4,r9 + and r11,r7,r6 + ldr r9,[sp,#12] + add r4,r4,r5,ror#27 + eor r11,r11,r10 + mov r6,r6,ror#2 + add r4,r4,r11 + bic r10,r7,r5 + add r3,r3,r9 + and r11,r6,r5 + ldr r9,[sp,#16] + add r3,r3,r4,ror#27 + eor r11,r11,r10 + mov r5,r5,ror#2 + add r3,r3,r11 + vext.8 q12,q11,q0,#8 + eor r10,r4,r6 + add r7,r7,r9 + ldr r9,[sp,#20] + veor q1,q1,q9 + eor r11,r10,r5 + add r7,r7,r3,ror#27 + veor q1,q1,q2 + mov r4,r4,ror#2 + add r7,r7,r11 + vadd.i32 q13,q0,q14 + eor r10,r3,r5 + add r6,r6,r9 + veor q12,q12,q1 + ldr r9,[sp,#24] + eor r11,r10,r4 + vshr.u32 q1,q12,#30 + add r6,r6,r7,ror#27 + mov r3,r3,ror#2 + vst1.32 {q13},[r12,:128]! + add r6,r6,r11 + eor r10,r7,r4 + vsli.32 q1,q12,#2 + add r5,r5,r9 + ldr r9,[sp,#28] + eor r11,r10,r3 + add r5,r5,r6,ror#27 + mov r7,r7,ror#2 + add r5,r5,r11 + eor r10,r6,r3 + add r4,r4,r9 + ldr r9,[sp,#32] + eor r11,r10,r7 + add r4,r4,r5,ror#27 + mov r6,r6,ror#2 + add r4,r4,r11 + vext.8 q12,q0,q1,#8 + eor r10,r5,r7 + add r3,r3,r9 + ldr r9,[sp,#36] + veor q2,q2,q10 + eor r11,r10,r6 + add r3,r3,r4,ror#27 + veor q2,q2,q3 + mov r5,r5,ror#2 + add r3,r3,r11 + vadd.i32 q13,q1,q14 + eor r10,r4,r6 + vld1.32 {d28[],d29[]},[r8,:32]! + add r7,r7,r9 + veor q12,q12,q2 + ldr r9,[sp,#40] + eor r11,r10,r5 + vshr.u32 q2,q12,#30 + add r7,r7,r3,ror#27 + mov r4,r4,ror#2 + vst1.32 {q13},[r12,:128]! + add r7,r7,r11 + eor r10,r3,r5 + vsli.32 q2,q12,#2 + add r6,r6,r9 + ldr r9,[sp,#44] + eor r11,r10,r4 + add r6,r6,r7,ror#27 + mov r3,r3,ror#2 + add r6,r6,r11 + eor r10,r7,r4 + add r5,r5,r9 + ldr r9,[sp,#48] + eor r11,r10,r3 + add r5,r5,r6,ror#27 + mov r7,r7,ror#2 + add r5,r5,r11 + vext.8 q12,q1,q2,#8 + eor r10,r6,r3 + add r4,r4,r9 + ldr r9,[sp,#52] + veor q3,q3,q11 + eor r11,r10,r7 + add r4,r4,r5,ror#27 + veor q3,q3,q8 + mov r6,r6,ror#2 + add r4,r4,r11 + vadd.i32 q13,q2,q14 + eor r10,r5,r7 + add r3,r3,r9 + veor q12,q12,q3 + ldr r9,[sp,#56] + eor r11,r10,r6 + vshr.u32 q3,q12,#30 + add r3,r3,r4,ror#27 + mov r5,r5,ror#2 + vst1.32 {q13},[r12,:128]! + add r3,r3,r11 + eor r10,r4,r6 + vsli.32 q3,q12,#2 + add r7,r7,r9 + ldr r9,[sp,#60] + eor r11,r10,r5 + add r7,r7,r3,ror#27 + mov r4,r4,ror#2 + add r7,r7,r11 + eor r10,r3,r5 + add r6,r6,r9 + ldr r9,[sp,#0] + eor r11,r10,r4 + add r6,r6,r7,ror#27 + mov r3,r3,ror#2 + add r6,r6,r11 + vext.8 q12,q2,q3,#8 + eor r10,r7,r4 + add r5,r5,r9 + ldr r9,[sp,#4] + veor q8,q8,q0 + eor r11,r10,r3 + add r5,r5,r6,ror#27 + veor q8,q8,q9 + mov r7,r7,ror#2 + add r5,r5,r11 + vadd.i32 q13,q3,q14 + eor r10,r6,r3 + add r4,r4,r9 + veor q12,q12,q8 + ldr r9,[sp,#8] + eor r11,r10,r7 + vshr.u32 q8,q12,#30 + add r4,r4,r5,ror#27 + mov r6,r6,ror#2 + vst1.32 {q13},[r12,:128]! + sub r12,r12,#64 + add r4,r4,r11 + eor r10,r5,r7 + vsli.32 q8,q12,#2 + add r3,r3,r9 + ldr r9,[sp,#12] + eor r11,r10,r6 + add r3,r3,r4,ror#27 + mov r5,r5,ror#2 + add r3,r3,r11 + eor r10,r4,r6 + add r7,r7,r9 + ldr r9,[sp,#16] + eor r11,r10,r5 + add r7,r7,r3,ror#27 + mov r4,r4,ror#2 + add r7,r7,r11 + vext.8 q12,q3,q8,#8 + eor r10,r3,r5 + add r6,r6,r9 + ldr r9,[sp,#20] + veor q9,q9,q1 + eor r11,r10,r4 + add r6,r6,r7,ror#27 + veor q9,q9,q10 + mov r3,r3,ror#2 + add r6,r6,r11 + vadd.i32 q13,q8,q14 + eor r10,r7,r4 + add r5,r5,r9 + veor q12,q12,q9 + ldr r9,[sp,#24] + eor r11,r10,r3 + vshr.u32 q9,q12,#30 + add r5,r5,r6,ror#27 + mov r7,r7,ror#2 + vst1.32 {q13},[r12,:128]! + add r5,r5,r11 + eor r10,r6,r3 + vsli.32 q9,q12,#2 + add r4,r4,r9 + ldr r9,[sp,#28] + eor r11,r10,r7 + add r4,r4,r5,ror#27 + mov r6,r6,ror#2 + add r4,r4,r11 + eor r10,r5,r7 + add r3,r3,r9 + ldr r9,[sp,#32] + eor r11,r10,r6 + add r3,r3,r4,ror#27 + mov r5,r5,ror#2 + add r3,r3,r11 + vext.8 q12,q8,q9,#8 + add r7,r7,r9 + and r10,r5,r6 + ldr r9,[sp,#36] + veor q10,q10,q2 + add r7,r7,r3,ror#27 + eor r11,r5,r6 + veor q10,q10,q11 + add r7,r7,r10 + and r11,r11,r4 + vadd.i32 q13,q9,q14 + mov r4,r4,ror#2 + add r7,r7,r11 + veor q12,q12,q10 + add r6,r6,r9 + and r10,r4,r5 + vshr.u32 q10,q12,#30 + ldr r9,[sp,#40] + add r6,r6,r7,ror#27 + vst1.32 {q13},[r12,:128]! + eor r11,r4,r5 + add r6,r6,r10 + vsli.32 q10,q12,#2 + and r11,r11,r3 + mov r3,r3,ror#2 + add r6,r6,r11 + add r5,r5,r9 + and r10,r3,r4 + ldr r9,[sp,#44] + add r5,r5,r6,ror#27 + eor r11,r3,r4 + add r5,r5,r10 + and r11,r11,r7 + mov r7,r7,ror#2 + add r5,r5,r11 + add r4,r4,r9 + and r10,r7,r3 + ldr r9,[sp,#48] + add r4,r4,r5,ror#27 + eor r11,r7,r3 + add r4,r4,r10 + and r11,r11,r6 + mov r6,r6,ror#2 + add r4,r4,r11 + vext.8 q12,q9,q10,#8 + add r3,r3,r9 + and r10,r6,r7 + ldr r9,[sp,#52] + veor q11,q11,q3 + add r3,r3,r4,ror#27 + eor r11,r6,r7 + veor q11,q11,q0 + add r3,r3,r10 + and r11,r11,r5 + vadd.i32 q13,q10,q14 + mov r5,r5,ror#2 + vld1.32 {d28[],d29[]},[r8,:32]! + add r3,r3,r11 + veor q12,q12,q11 + add r7,r7,r9 + and r10,r5,r6 + vshr.u32 q11,q12,#30 + ldr r9,[sp,#56] + add r7,r7,r3,ror#27 + vst1.32 {q13},[r12,:128]! + eor r11,r5,r6 + add r7,r7,r10 + vsli.32 q11,q12,#2 + and r11,r11,r4 + mov r4,r4,ror#2 + add r7,r7,r11 + add r6,r6,r9 + and r10,r4,r5 + ldr r9,[sp,#60] + add r6,r6,r7,ror#27 + eor r11,r4,r5 + add r6,r6,r10 + and r11,r11,r3 + mov r3,r3,ror#2 + add r6,r6,r11 + add r5,r5,r9 + and r10,r3,r4 + ldr r9,[sp,#0] + add r5,r5,r6,ror#27 + eor r11,r3,r4 + add r5,r5,r10 + and r11,r11,r7 + mov r7,r7,ror#2 + add r5,r5,r11 + vext.8 q12,q10,q11,#8 + add r4,r4,r9 + and r10,r7,r3 + ldr r9,[sp,#4] + veor q0,q0,q8 + add r4,r4,r5,ror#27 + eor r11,r7,r3 + veor q0,q0,q1 + add r4,r4,r10 + and r11,r11,r6 + vadd.i32 q13,q11,q14 + mov r6,r6,ror#2 + add r4,r4,r11 + veor q12,q12,q0 + add r3,r3,r9 + and r10,r6,r7 + vshr.u32 q0,q12,#30 + ldr r9,[sp,#8] + add r3,r3,r4,ror#27 + vst1.32 {q13},[r12,:128]! + sub r12,r12,#64 + eor r11,r6,r7 + add r3,r3,r10 + vsli.32 q0,q12,#2 + and r11,r11,r5 + mov r5,r5,ror#2 + add r3,r3,r11 + add r7,r7,r9 + and r10,r5,r6 + ldr r9,[sp,#12] + add r7,r7,r3,ror#27 + eor r11,r5,r6 + add r7,r7,r10 + and r11,r11,r4 + mov r4,r4,ror#2 + add r7,r7,r11 + add r6,r6,r9 + and r10,r4,r5 + ldr r9,[sp,#16] + add r6,r6,r7,ror#27 + eor r11,r4,r5 + add r6,r6,r10 + and r11,r11,r3 + mov r3,r3,ror#2 + add r6,r6,r11 + vext.8 q12,q11,q0,#8 + add r5,r5,r9 + and r10,r3,r4 + ldr r9,[sp,#20] + veor q1,q1,q9 + add r5,r5,r6,ror#27 + eor r11,r3,r4 + veor q1,q1,q2 + add r5,r5,r10 + and r11,r11,r7 + vadd.i32 q13,q0,q14 + mov r7,r7,ror#2 + add r5,r5,r11 + veor q12,q12,q1 + add r4,r4,r9 + and r10,r7,r3 + vshr.u32 q1,q12,#30 + ldr r9,[sp,#24] + add r4,r4,r5,ror#27 + vst1.32 {q13},[r12,:128]! + eor r11,r7,r3 + add r4,r4,r10 + vsli.32 q1,q12,#2 + and r11,r11,r6 + mov r6,r6,ror#2 + add r4,r4,r11 + add r3,r3,r9 + and r10,r6,r7 + ldr r9,[sp,#28] + add r3,r3,r4,ror#27 + eor r11,r6,r7 + add r3,r3,r10 + and r11,r11,r5 + mov r5,r5,ror#2 + add r3,r3,r11 + add r7,r7,r9 + and r10,r5,r6 + ldr r9,[sp,#32] + add r7,r7,r3,ror#27 + eor r11,r5,r6 + add r7,r7,r10 + and r11,r11,r4 + mov r4,r4,ror#2 + add r7,r7,r11 + vext.8 q12,q0,q1,#8 + add r6,r6,r9 + and r10,r4,r5 + ldr r9,[sp,#36] + veor q2,q2,q10 + add r6,r6,r7,ror#27 + eor r11,r4,r5 + veor q2,q2,q3 + add r6,r6,r10 + and r11,r11,r3 + vadd.i32 q13,q1,q14 + mov r3,r3,ror#2 + add r6,r6,r11 + veor q12,q12,q2 + add r5,r5,r9 + and r10,r3,r4 + vshr.u32 q2,q12,#30 + ldr r9,[sp,#40] + add r5,r5,r6,ror#27 + vst1.32 {q13},[r12,:128]! + eor r11,r3,r4 + add r5,r5,r10 + vsli.32 q2,q12,#2 + and r11,r11,r7 + mov r7,r7,ror#2 + add r5,r5,r11 + add r4,r4,r9 + and r10,r7,r3 + ldr r9,[sp,#44] + add r4,r4,r5,ror#27 + eor r11,r7,r3 + add r4,r4,r10 + and r11,r11,r6 + mov r6,r6,ror#2 + add r4,r4,r11 + add r3,r3,r9 + and r10,r6,r7 + ldr r9,[sp,#48] + add r3,r3,r4,ror#27 + eor r11,r6,r7 + add r3,r3,r10 + and r11,r11,r5 + mov r5,r5,ror#2 + add r3,r3,r11 + vext.8 q12,q1,q2,#8 + eor r10,r4,r6 + add r7,r7,r9 + ldr r9,[sp,#52] + veor q3,q3,q11 + eor r11,r10,r5 + add r7,r7,r3,ror#27 + veor q3,q3,q8 + mov r4,r4,ror#2 + add r7,r7,r11 + vadd.i32 q13,q2,q14 + eor r10,r3,r5 + add r6,r6,r9 + veor q12,q12,q3 + ldr r9,[sp,#56] + eor r11,r10,r4 + vshr.u32 q3,q12,#30 + add r6,r6,r7,ror#27 + mov r3,r3,ror#2 + vst1.32 {q13},[r12,:128]! + add r6,r6,r11 + eor r10,r7,r4 + vsli.32 q3,q12,#2 + add r5,r5,r9 + ldr r9,[sp,#60] + eor r11,r10,r3 + add r5,r5,r6,ror#27 + mov r7,r7,ror#2 + add r5,r5,r11 + eor r10,r6,r3 + add r4,r4,r9 + ldr r9,[sp,#0] + eor r11,r10,r7 + add r4,r4,r5,ror#27 + mov r6,r6,ror#2 + add r4,r4,r11 + vadd.i32 q13,q3,q14 + eor r10,r5,r7 + add r3,r3,r9 + vst1.32 {q13},[r12,:128]! + sub r12,r12,#64 + teq r1,r2 + sub r8,r8,#16 + subeq r1,r1,#64 + vld1.8 {q0-q1},[r1]! + ldr r9,[sp,#4] + eor r11,r10,r6 + vld1.8 {q2-q3},[r1]! + add r3,r3,r4,ror#27 + mov r5,r5,ror#2 + vld1.32 {d28[],d29[]},[r8,:32]! + add r3,r3,r11 + eor r10,r4,r6 + vrev32.8 q0,q0 + add r7,r7,r9 + ldr r9,[sp,#8] + eor r11,r10,r5 + add r7,r7,r3,ror#27 + mov r4,r4,ror#2 + add r7,r7,r11 + eor r10,r3,r5 + add r6,r6,r9 + ldr r9,[sp,#12] + eor r11,r10,r4 + add r6,r6,r7,ror#27 + mov r3,r3,ror#2 + add r6,r6,r11 + eor r10,r7,r4 + add r5,r5,r9 + ldr r9,[sp,#16] + eor r11,r10,r3 + add r5,r5,r6,ror#27 + mov r7,r7,ror#2 + add r5,r5,r11 + vrev32.8 q1,q1 + eor r10,r6,r3 + add r4,r4,r9 + vadd.i32 q8,q0,q14 + ldr r9,[sp,#20] + eor r11,r10,r7 + vst1.32 {q8},[r12,:128]! + add r4,r4,r5,ror#27 + mov r6,r6,ror#2 + add r4,r4,r11 + eor r10,r5,r7 + add r3,r3,r9 + ldr r9,[sp,#24] + eor r11,r10,r6 + add r3,r3,r4,ror#27 + mov r5,r5,ror#2 + add r3,r3,r11 + eor r10,r4,r6 + add r7,r7,r9 + ldr r9,[sp,#28] + eor r11,r10,r5 + add r7,r7,r3,ror#27 + mov r4,r4,ror#2 + add r7,r7,r11 + eor r10,r3,r5 + add r6,r6,r9 + ldr r9,[sp,#32] + eor r11,r10,r4 + add r6,r6,r7,ror#27 + mov r3,r3,ror#2 + add r6,r6,r11 + vrev32.8 q2,q2 + eor r10,r7,r4 + add r5,r5,r9 + vadd.i32 q9,q1,q14 + ldr r9,[sp,#36] + eor r11,r10,r3 + vst1.32 {q9},[r12,:128]! + add r5,r5,r6,ror#27 + mov r7,r7,ror#2 + add r5,r5,r11 + eor r10,r6,r3 + add r4,r4,r9 + ldr r9,[sp,#40] + eor r11,r10,r7 + add r4,r4,r5,ror#27 + mov r6,r6,ror#2 + add r4,r4,r11 + eor r10,r5,r7 + add r3,r3,r9 + ldr r9,[sp,#44] + eor r11,r10,r6 + add r3,r3,r4,ror#27 + mov r5,r5,ror#2 + add r3,r3,r11 + eor r10,r4,r6 + add r7,r7,r9 + ldr r9,[sp,#48] + eor r11,r10,r5 + add r7,r7,r3,ror#27 + mov r4,r4,ror#2 + add r7,r7,r11 + vrev32.8 q3,q3 + eor r10,r3,r5 + add r6,r6,r9 + vadd.i32 q10,q2,q14 + ldr r9,[sp,#52] + eor r11,r10,r4 + vst1.32 {q10},[r12,:128]! + add r6,r6,r7,ror#27 + mov r3,r3,ror#2 + add r6,r6,r11 + eor r10,r7,r4 + add r5,r5,r9 + ldr r9,[sp,#56] + eor r11,r10,r3 + add r5,r5,r6,ror#27 + mov r7,r7,ror#2 + add r5,r5,r11 + eor r10,r6,r3 + add r4,r4,r9 + ldr r9,[sp,#60] + eor r11,r10,r7 + add r4,r4,r5,ror#27 + mov r6,r6,ror#2 + add r4,r4,r11 + eor r10,r5,r7 + add r3,r3,r9 + eor r11,r10,r6 + add r3,r3,r4,ror#27 + mov r5,r5,ror#2 + add r3,r3,r11 + ldmia r0,{r9,r10,r11,r12} @ accumulate context + add r3,r3,r9 + ldr r9,[r0,#16] + add r4,r4,r10 + add r5,r5,r11 + add r6,r6,r12 + moveq sp,r14 + add r7,r7,r9 + ldrne r9,[sp] + stmia r0,{r3,r4,r5,r6,r7} + addne r12,sp,#3*16 + bne .Loop_neon + + @ vldmia sp!,{d8-d15} + ldmia sp!,{r4-r12,pc} +.size sha1_block_data_order_neon,.-sha1_block_data_order_neon +#endif +#if __ARM_MAX_ARCH__>=7 +.type sha1_block_data_order_armv8,%function +.align 5 +sha1_block_data_order_armv8: +.LARMv8: + vstmdb sp!,{d8-d15} @ ABI specification says so + + veor q1,q1,q1 + adr r3,.LK_00_19 + vld1.32 {q0},[r0]! + vld1.32 {d2[0]},[r0] + sub r0,r0,#16 + vld1.32 {d16[],d17[]},[r3,:32]! + vld1.32 {d18[],d19[]},[r3,:32]! + vld1.32 {d20[],d21[]},[r3,:32]! + vld1.32 {d22[],d23[]},[r3,:32] + +.Loop_v8: + vld1.8 {q4-q5},[r1]! + vld1.8 {q6-q7},[r1]! + vrev32.8 q4,q4 + vrev32.8 q5,q5 + + vadd.i32 q12,q8,q4 + vrev32.8 q6,q6 + vmov q14,q0 @ offload + subs r2,r2,#1 + + vadd.i32 q13,q8,q5 + vrev32.8 q7,q7 + .byte 0xc0,0x62,0xb9,0xf3 @ sha1h q3,q0 @ 0 + .byte 0x68,0x0c,0x02,0xf2 @ sha1c q0,q1,q12 + vadd.i32 q12,q8,q6 + .byte 0x4c,0x8c,0x3a,0xf2 @ sha1su0 q4,q5,q6 + .byte 0xc0,0x42,0xb9,0xf3 @ sha1h q2,q0 @ 1 + .byte 0x6a,0x0c,0x06,0xf2 @ sha1c q0,q3,q13 + vadd.i32 q13,q8,q7 + .byte 0x8e,0x83,0xba,0xf3 @ sha1su1 q4,q7 + .byte 0x4e,0xac,0x3c,0xf2 @ sha1su0 q5,q6,q7 + .byte 0xc0,0x62,0xb9,0xf3 @ sha1h q3,q0 @ 2 + .byte 0x68,0x0c,0x04,0xf2 @ sha1c q0,q2,q12 + vadd.i32 q12,q8,q4 + .byte 0x88,0xa3,0xba,0xf3 @ sha1su1 q5,q4 + .byte 0x48,0xcc,0x3e,0xf2 @ sha1su0 q6,q7,q4 + .byte 0xc0,0x42,0xb9,0xf3 @ sha1h q2,q0 @ 3 + .byte 0x6a,0x0c,0x06,0xf2 @ sha1c q0,q3,q13 + vadd.i32 q13,q9,q5 + .byte 0x8a,0xc3,0xba,0xf3 @ sha1su1 q6,q5 + .byte 0x4a,0xec,0x38,0xf2 @ sha1su0 q7,q4,q5 + .byte 0xc0,0x62,0xb9,0xf3 @ sha1h q3,q0 @ 4 + .byte 0x68,0x0c,0x04,0xf2 @ sha1c q0,q2,q12 + vadd.i32 q12,q9,q6 + .byte 0x8c,0xe3,0xba,0xf3 @ sha1su1 q7,q6 + .byte 0x4c,0x8c,0x3a,0xf2 @ sha1su0 q4,q5,q6 + .byte 0xc0,0x42,0xb9,0xf3 @ sha1h q2,q0 @ 5 + .byte 0x6a,0x0c,0x16,0xf2 @ sha1p q0,q3,q13 + vadd.i32 q13,q9,q7 + .byte 0x8e,0x83,0xba,0xf3 @ sha1su1 q4,q7 + .byte 0x4e,0xac,0x3c,0xf2 @ sha1su0 q5,q6,q7 + .byte 0xc0,0x62,0xb9,0xf3 @ sha1h q3,q0 @ 6 + .byte 0x68,0x0c,0x14,0xf2 @ sha1p q0,q2,q12 + vadd.i32 q12,q9,q4 + .byte 0x88,0xa3,0xba,0xf3 @ sha1su1 q5,q4 + .byte 0x48,0xcc,0x3e,0xf2 @ sha1su0 q6,q7,q4 + .byte 0xc0,0x42,0xb9,0xf3 @ sha1h q2,q0 @ 7 + .byte 0x6a,0x0c,0x16,0xf2 @ sha1p q0,q3,q13 + vadd.i32 q13,q9,q5 + .byte 0x8a,0xc3,0xba,0xf3 @ sha1su1 q6,q5 + .byte 0x4a,0xec,0x38,0xf2 @ sha1su0 q7,q4,q5 + .byte 0xc0,0x62,0xb9,0xf3 @ sha1h q3,q0 @ 8 + .byte 0x68,0x0c,0x14,0xf2 @ sha1p q0,q2,q12 + vadd.i32 q12,q10,q6 + .byte 0x8c,0xe3,0xba,0xf3 @ sha1su1 q7,q6 + .byte 0x4c,0x8c,0x3a,0xf2 @ sha1su0 q4,q5,q6 + .byte 0xc0,0x42,0xb9,0xf3 @ sha1h q2,q0 @ 9 + .byte 0x6a,0x0c,0x16,0xf2 @ sha1p q0,q3,q13 + vadd.i32 q13,q10,q7 + .byte 0x8e,0x83,0xba,0xf3 @ sha1su1 q4,q7 + .byte 0x4e,0xac,0x3c,0xf2 @ sha1su0 q5,q6,q7 + .byte 0xc0,0x62,0xb9,0xf3 @ sha1h q3,q0 @ 10 + .byte 0x68,0x0c,0x24,0xf2 @ sha1m q0,q2,q12 + vadd.i32 q12,q10,q4 + .byte 0x88,0xa3,0xba,0xf3 @ sha1su1 q5,q4 + .byte 0x48,0xcc,0x3e,0xf2 @ sha1su0 q6,q7,q4 + .byte 0xc0,0x42,0xb9,0xf3 @ sha1h q2,q0 @ 11 + .byte 0x6a,0x0c,0x26,0xf2 @ sha1m q0,q3,q13 + vadd.i32 q13,q10,q5 + .byte 0x8a,0xc3,0xba,0xf3 @ sha1su1 q6,q5 + .byte 0x4a,0xec,0x38,0xf2 @ sha1su0 q7,q4,q5 + .byte 0xc0,0x62,0xb9,0xf3 @ sha1h q3,q0 @ 12 + .byte 0x68,0x0c,0x24,0xf2 @ sha1m q0,q2,q12 + vadd.i32 q12,q10,q6 + .byte 0x8c,0xe3,0xba,0xf3 @ sha1su1 q7,q6 + .byte 0x4c,0x8c,0x3a,0xf2 @ sha1su0 q4,q5,q6 + .byte 0xc0,0x42,0xb9,0xf3 @ sha1h q2,q0 @ 13 + .byte 0x6a,0x0c,0x26,0xf2 @ sha1m q0,q3,q13 + vadd.i32 q13,q11,q7 + .byte 0x8e,0x83,0xba,0xf3 @ sha1su1 q4,q7 + .byte 0x4e,0xac,0x3c,0xf2 @ sha1su0 q5,q6,q7 + .byte 0xc0,0x62,0xb9,0xf3 @ sha1h q3,q0 @ 14 + .byte 0x68,0x0c,0x24,0xf2 @ sha1m q0,q2,q12 + vadd.i32 q12,q11,q4 + .byte 0x88,0xa3,0xba,0xf3 @ sha1su1 q5,q4 + .byte 0x48,0xcc,0x3e,0xf2 @ sha1su0 q6,q7,q4 + .byte 0xc0,0x42,0xb9,0xf3 @ sha1h q2,q0 @ 15 + .byte 0x6a,0x0c,0x16,0xf2 @ sha1p q0,q3,q13 + vadd.i32 q13,q11,q5 + .byte 0x8a,0xc3,0xba,0xf3 @ sha1su1 q6,q5 + .byte 0x4a,0xec,0x38,0xf2 @ sha1su0 q7,q4,q5 + .byte 0xc0,0x62,0xb9,0xf3 @ sha1h q3,q0 @ 16 + .byte 0x68,0x0c,0x14,0xf2 @ sha1p q0,q2,q12 + vadd.i32 q12,q11,q6 + .byte 0x8c,0xe3,0xba,0xf3 @ sha1su1 q7,q6 + .byte 0xc0,0x42,0xb9,0xf3 @ sha1h q2,q0 @ 17 + .byte 0x6a,0x0c,0x16,0xf2 @ sha1p q0,q3,q13 + vadd.i32 q13,q11,q7 + + .byte 0xc0,0x62,0xb9,0xf3 @ sha1h q3,q0 @ 18 + .byte 0x68,0x0c,0x14,0xf2 @ sha1p q0,q2,q12 + + .byte 0xc0,0x42,0xb9,0xf3 @ sha1h q2,q0 @ 19 + .byte 0x6a,0x0c,0x16,0xf2 @ sha1p q0,q3,q13 + + vadd.i32 q1,q1,q2 + vadd.i32 q0,q0,q14 + bne .Loop_v8 + + vst1.32 {q0},[r0]! + vst1.32 {d2[0]},[r0] + + vldmia sp!,{d8-d15} + bx lr @ bx lr +.size sha1_block_data_order_armv8,.-sha1_block_data_order_armv8 +#endif +#if __ARM_MAX_ARCH__>=7 +.comm OPENSSL_armcap_P,4,4 +#endif diff --git a/deps/openssl/asm_obsolete/arm-void-gas/sha/sha256-armv4.S b/deps/openssl/asm_obsolete/arm-void-gas/sha/sha256-armv4.S new file mode 100644 index 00000000000000..bf1ce4f997e7b7 --- /dev/null +++ b/deps/openssl/asm_obsolete/arm-void-gas/sha/sha256-armv4.S @@ -0,0 +1,2695 @@ +#include "arm_arch.h" + +.text +.code 32 + +.type K256,%object +.align 5 +K256: +.word 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 +.word 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 +.word 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 +.word 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 +.word 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc +.word 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da +.word 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 +.word 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967 +.word 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 +.word 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 +.word 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 +.word 0xd192e819,0xd6990624,0xf40e3585,0x106aa070 +.word 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 +.word 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 +.word 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 +.word 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 +.size K256,.-K256 +.word 0 @ terminator +#if __ARM_MAX_ARCH__>=7 +.LOPENSSL_armcap: +.word OPENSSL_armcap_P-sha256_block_data_order +#endif +.align 5 + +.global sha256_block_data_order +.type sha256_block_data_order,%function +sha256_block_data_order: + sub r3,pc,#8 @ sha256_block_data_order + add r2,r1,r2,lsl#6 @ len to point at the end of inp +#if __ARM_MAX_ARCH__>=7 + ldr r12,.LOPENSSL_armcap + ldr r12,[r3,r12] @ OPENSSL_armcap_P + tst r12,#ARMV8_SHA256 + bne .LARMv8 + tst r12,#ARMV7_NEON + bne .LNEON +#endif + stmdb sp!,{r0,r1,r2,r4-r11,lr} + ldmia r0,{r4,r5,r6,r7,r8,r9,r10,r11} + sub r14,r3,#256+32 @ K256 + sub sp,sp,#16*4 @ alloca(X[16]) +.Loop: +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 +# else + ldrb r2,[r1,#3] +# endif + eor r3,r5,r6 @ magic + eor r12,r12,r12 +#if __ARM_ARCH__>=7 + @ ldr r2,[r1],#4 @ 0 +# if 0==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r8,r8,ror#5 + add r4,r4,r12 @ h+=Maj(a,b,c) from the past + eor r0,r0,r8,ror#19 @ Sigma1(e) + rev r2,r2 +#else + @ ldrb r2,[r1,#3] @ 0 + add r4,r4,r12 @ h+=Maj(a,b,c) from the past + ldrb r12,[r1,#2] + ldrb r0,[r1,#1] + orr r2,r2,r12,lsl#8 + ldrb r12,[r1],#4 + orr r2,r2,r0,lsl#16 +# if 0==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r8,r8,ror#5 + orr r2,r2,r12,lsl#24 + eor r0,r0,r8,ror#19 @ Sigma1(e) +#endif + ldr r12,[r14],#4 @ *K256++ + add r11,r11,r2 @ h+=X[i] + str r2,[sp,#0*4] + eor r2,r9,r10 + add r11,r11,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r8 + add r11,r11,r12 @ h+=K256[i] + eor r2,r2,r10 @ Ch(e,f,g) + eor r0,r4,r4,ror#11 + add r11,r11,r2 @ h+=Ch(e,f,g) +#if 0==31 + and r12,r12,#0xff + cmp r12,#0xf2 @ done? +#endif +#if 0<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r12,r4,r5 @ a^b, b^c in next round +#else + ldr r2,[sp,#2*4] @ from future BODY_16_xx + eor r12,r4,r5 @ a^b, b^c in next round + ldr r1,[sp,#15*4] @ from future BODY_16_xx +#endif + eor r0,r0,r4,ror#20 @ Sigma0(a) + and r3,r3,r12 @ (b^c)&=(a^b) + add r7,r7,r11 @ d+=h + eor r3,r3,r5 @ Maj(a,b,c) + add r11,r11,r0,ror#2 @ h+=Sigma0(a) + @ add r11,r11,r3 @ h+=Maj(a,b,c) +#if __ARM_ARCH__>=7 + @ ldr r2,[r1],#4 @ 1 +# if 1==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r7,r7,ror#5 + add r11,r11,r3 @ h+=Maj(a,b,c) from the past + eor r0,r0,r7,ror#19 @ Sigma1(e) + rev r2,r2 +#else + @ ldrb r2,[r1,#3] @ 1 + add r11,r11,r3 @ h+=Maj(a,b,c) from the past + ldrb r3,[r1,#2] + ldrb r0,[r1,#1] + orr r2,r2,r3,lsl#8 + ldrb r3,[r1],#4 + orr r2,r2,r0,lsl#16 +# if 1==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r7,r7,ror#5 + orr r2,r2,r3,lsl#24 + eor r0,r0,r7,ror#19 @ Sigma1(e) +#endif + ldr r3,[r14],#4 @ *K256++ + add r10,r10,r2 @ h+=X[i] + str r2,[sp,#1*4] + eor r2,r8,r9 + add r10,r10,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r7 + add r10,r10,r3 @ h+=K256[i] + eor r2,r2,r9 @ Ch(e,f,g) + eor r0,r11,r11,ror#11 + add r10,r10,r2 @ h+=Ch(e,f,g) +#if 1==31 + and r3,r3,#0xff + cmp r3,#0xf2 @ done? +#endif +#if 1<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r3,r11,r4 @ a^b, b^c in next round +#else + ldr r2,[sp,#3*4] @ from future BODY_16_xx + eor r3,r11,r4 @ a^b, b^c in next round + ldr r1,[sp,#0*4] @ from future BODY_16_xx +#endif + eor r0,r0,r11,ror#20 @ Sigma0(a) + and r12,r12,r3 @ (b^c)&=(a^b) + add r6,r6,r10 @ d+=h + eor r12,r12,r4 @ Maj(a,b,c) + add r10,r10,r0,ror#2 @ h+=Sigma0(a) + @ add r10,r10,r12 @ h+=Maj(a,b,c) +#if __ARM_ARCH__>=7 + @ ldr r2,[r1],#4 @ 2 +# if 2==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r6,r6,ror#5 + add r10,r10,r12 @ h+=Maj(a,b,c) from the past + eor r0,r0,r6,ror#19 @ Sigma1(e) + rev r2,r2 +#else + @ ldrb r2,[r1,#3] @ 2 + add r10,r10,r12 @ h+=Maj(a,b,c) from the past + ldrb r12,[r1,#2] + ldrb r0,[r1,#1] + orr r2,r2,r12,lsl#8 + ldrb r12,[r1],#4 + orr r2,r2,r0,lsl#16 +# if 2==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r6,r6,ror#5 + orr r2,r2,r12,lsl#24 + eor r0,r0,r6,ror#19 @ Sigma1(e) +#endif + ldr r12,[r14],#4 @ *K256++ + add r9,r9,r2 @ h+=X[i] + str r2,[sp,#2*4] + eor r2,r7,r8 + add r9,r9,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r6 + add r9,r9,r12 @ h+=K256[i] + eor r2,r2,r8 @ Ch(e,f,g) + eor r0,r10,r10,ror#11 + add r9,r9,r2 @ h+=Ch(e,f,g) +#if 2==31 + and r12,r12,#0xff + cmp r12,#0xf2 @ done? +#endif +#if 2<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r12,r10,r11 @ a^b, b^c in next round +#else + ldr r2,[sp,#4*4] @ from future BODY_16_xx + eor r12,r10,r11 @ a^b, b^c in next round + ldr r1,[sp,#1*4] @ from future BODY_16_xx +#endif + eor r0,r0,r10,ror#20 @ Sigma0(a) + and r3,r3,r12 @ (b^c)&=(a^b) + add r5,r5,r9 @ d+=h + eor r3,r3,r11 @ Maj(a,b,c) + add r9,r9,r0,ror#2 @ h+=Sigma0(a) + @ add r9,r9,r3 @ h+=Maj(a,b,c) +#if __ARM_ARCH__>=7 + @ ldr r2,[r1],#4 @ 3 +# if 3==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r5,r5,ror#5 + add r9,r9,r3 @ h+=Maj(a,b,c) from the past + eor r0,r0,r5,ror#19 @ Sigma1(e) + rev r2,r2 +#else + @ ldrb r2,[r1,#3] @ 3 + add r9,r9,r3 @ h+=Maj(a,b,c) from the past + ldrb r3,[r1,#2] + ldrb r0,[r1,#1] + orr r2,r2,r3,lsl#8 + ldrb r3,[r1],#4 + orr r2,r2,r0,lsl#16 +# if 3==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r5,r5,ror#5 + orr r2,r2,r3,lsl#24 + eor r0,r0,r5,ror#19 @ Sigma1(e) +#endif + ldr r3,[r14],#4 @ *K256++ + add r8,r8,r2 @ h+=X[i] + str r2,[sp,#3*4] + eor r2,r6,r7 + add r8,r8,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r5 + add r8,r8,r3 @ h+=K256[i] + eor r2,r2,r7 @ Ch(e,f,g) + eor r0,r9,r9,ror#11 + add r8,r8,r2 @ h+=Ch(e,f,g) +#if 3==31 + and r3,r3,#0xff + cmp r3,#0xf2 @ done? +#endif +#if 3<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r3,r9,r10 @ a^b, b^c in next round +#else + ldr r2,[sp,#5*4] @ from future BODY_16_xx + eor r3,r9,r10 @ a^b, b^c in next round + ldr r1,[sp,#2*4] @ from future BODY_16_xx +#endif + eor r0,r0,r9,ror#20 @ Sigma0(a) + and r12,r12,r3 @ (b^c)&=(a^b) + add r4,r4,r8 @ d+=h + eor r12,r12,r10 @ Maj(a,b,c) + add r8,r8,r0,ror#2 @ h+=Sigma0(a) + @ add r8,r8,r12 @ h+=Maj(a,b,c) +#if __ARM_ARCH__>=7 + @ ldr r2,[r1],#4 @ 4 +# if 4==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r4,r4,ror#5 + add r8,r8,r12 @ h+=Maj(a,b,c) from the past + eor r0,r0,r4,ror#19 @ Sigma1(e) + rev r2,r2 +#else + @ ldrb r2,[r1,#3] @ 4 + add r8,r8,r12 @ h+=Maj(a,b,c) from the past + ldrb r12,[r1,#2] + ldrb r0,[r1,#1] + orr r2,r2,r12,lsl#8 + ldrb r12,[r1],#4 + orr r2,r2,r0,lsl#16 +# if 4==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r4,r4,ror#5 + orr r2,r2,r12,lsl#24 + eor r0,r0,r4,ror#19 @ Sigma1(e) +#endif + ldr r12,[r14],#4 @ *K256++ + add r7,r7,r2 @ h+=X[i] + str r2,[sp,#4*4] + eor r2,r5,r6 + add r7,r7,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r4 + add r7,r7,r12 @ h+=K256[i] + eor r2,r2,r6 @ Ch(e,f,g) + eor r0,r8,r8,ror#11 + add r7,r7,r2 @ h+=Ch(e,f,g) +#if 4==31 + and r12,r12,#0xff + cmp r12,#0xf2 @ done? +#endif +#if 4<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r12,r8,r9 @ a^b, b^c in next round +#else + ldr r2,[sp,#6*4] @ from future BODY_16_xx + eor r12,r8,r9 @ a^b, b^c in next round + ldr r1,[sp,#3*4] @ from future BODY_16_xx +#endif + eor r0,r0,r8,ror#20 @ Sigma0(a) + and r3,r3,r12 @ (b^c)&=(a^b) + add r11,r11,r7 @ d+=h + eor r3,r3,r9 @ Maj(a,b,c) + add r7,r7,r0,ror#2 @ h+=Sigma0(a) + @ add r7,r7,r3 @ h+=Maj(a,b,c) +#if __ARM_ARCH__>=7 + @ ldr r2,[r1],#4 @ 5 +# if 5==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r11,r11,ror#5 + add r7,r7,r3 @ h+=Maj(a,b,c) from the past + eor r0,r0,r11,ror#19 @ Sigma1(e) + rev r2,r2 +#else + @ ldrb r2,[r1,#3] @ 5 + add r7,r7,r3 @ h+=Maj(a,b,c) from the past + ldrb r3,[r1,#2] + ldrb r0,[r1,#1] + orr r2,r2,r3,lsl#8 + ldrb r3,[r1],#4 + orr r2,r2,r0,lsl#16 +# if 5==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r11,r11,ror#5 + orr r2,r2,r3,lsl#24 + eor r0,r0,r11,ror#19 @ Sigma1(e) +#endif + ldr r3,[r14],#4 @ *K256++ + add r6,r6,r2 @ h+=X[i] + str r2,[sp,#5*4] + eor r2,r4,r5 + add r6,r6,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r11 + add r6,r6,r3 @ h+=K256[i] + eor r2,r2,r5 @ Ch(e,f,g) + eor r0,r7,r7,ror#11 + add r6,r6,r2 @ h+=Ch(e,f,g) +#if 5==31 + and r3,r3,#0xff + cmp r3,#0xf2 @ done? +#endif +#if 5<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r3,r7,r8 @ a^b, b^c in next round +#else + ldr r2,[sp,#7*4] @ from future BODY_16_xx + eor r3,r7,r8 @ a^b, b^c in next round + ldr r1,[sp,#4*4] @ from future BODY_16_xx +#endif + eor r0,r0,r7,ror#20 @ Sigma0(a) + and r12,r12,r3 @ (b^c)&=(a^b) + add r10,r10,r6 @ d+=h + eor r12,r12,r8 @ Maj(a,b,c) + add r6,r6,r0,ror#2 @ h+=Sigma0(a) + @ add r6,r6,r12 @ h+=Maj(a,b,c) +#if __ARM_ARCH__>=7 + @ ldr r2,[r1],#4 @ 6 +# if 6==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r10,r10,ror#5 + add r6,r6,r12 @ h+=Maj(a,b,c) from the past + eor r0,r0,r10,ror#19 @ Sigma1(e) + rev r2,r2 +#else + @ ldrb r2,[r1,#3] @ 6 + add r6,r6,r12 @ h+=Maj(a,b,c) from the past + ldrb r12,[r1,#2] + ldrb r0,[r1,#1] + orr r2,r2,r12,lsl#8 + ldrb r12,[r1],#4 + orr r2,r2,r0,lsl#16 +# if 6==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r10,r10,ror#5 + orr r2,r2,r12,lsl#24 + eor r0,r0,r10,ror#19 @ Sigma1(e) +#endif + ldr r12,[r14],#4 @ *K256++ + add r5,r5,r2 @ h+=X[i] + str r2,[sp,#6*4] + eor r2,r11,r4 + add r5,r5,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r10 + add r5,r5,r12 @ h+=K256[i] + eor r2,r2,r4 @ Ch(e,f,g) + eor r0,r6,r6,ror#11 + add r5,r5,r2 @ h+=Ch(e,f,g) +#if 6==31 + and r12,r12,#0xff + cmp r12,#0xf2 @ done? +#endif +#if 6<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r12,r6,r7 @ a^b, b^c in next round +#else + ldr r2,[sp,#8*4] @ from future BODY_16_xx + eor r12,r6,r7 @ a^b, b^c in next round + ldr r1,[sp,#5*4] @ from future BODY_16_xx +#endif + eor r0,r0,r6,ror#20 @ Sigma0(a) + and r3,r3,r12 @ (b^c)&=(a^b) + add r9,r9,r5 @ d+=h + eor r3,r3,r7 @ Maj(a,b,c) + add r5,r5,r0,ror#2 @ h+=Sigma0(a) + @ add r5,r5,r3 @ h+=Maj(a,b,c) +#if __ARM_ARCH__>=7 + @ ldr r2,[r1],#4 @ 7 +# if 7==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r9,r9,ror#5 + add r5,r5,r3 @ h+=Maj(a,b,c) from the past + eor r0,r0,r9,ror#19 @ Sigma1(e) + rev r2,r2 +#else + @ ldrb r2,[r1,#3] @ 7 + add r5,r5,r3 @ h+=Maj(a,b,c) from the past + ldrb r3,[r1,#2] + ldrb r0,[r1,#1] + orr r2,r2,r3,lsl#8 + ldrb r3,[r1],#4 + orr r2,r2,r0,lsl#16 +# if 7==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r9,r9,ror#5 + orr r2,r2,r3,lsl#24 + eor r0,r0,r9,ror#19 @ Sigma1(e) +#endif + ldr r3,[r14],#4 @ *K256++ + add r4,r4,r2 @ h+=X[i] + str r2,[sp,#7*4] + eor r2,r10,r11 + add r4,r4,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r9 + add r4,r4,r3 @ h+=K256[i] + eor r2,r2,r11 @ Ch(e,f,g) + eor r0,r5,r5,ror#11 + add r4,r4,r2 @ h+=Ch(e,f,g) +#if 7==31 + and r3,r3,#0xff + cmp r3,#0xf2 @ done? +#endif +#if 7<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r3,r5,r6 @ a^b, b^c in next round +#else + ldr r2,[sp,#9*4] @ from future BODY_16_xx + eor r3,r5,r6 @ a^b, b^c in next round + ldr r1,[sp,#6*4] @ from future BODY_16_xx +#endif + eor r0,r0,r5,ror#20 @ Sigma0(a) + and r12,r12,r3 @ (b^c)&=(a^b) + add r8,r8,r4 @ d+=h + eor r12,r12,r6 @ Maj(a,b,c) + add r4,r4,r0,ror#2 @ h+=Sigma0(a) + @ add r4,r4,r12 @ h+=Maj(a,b,c) +#if __ARM_ARCH__>=7 + @ ldr r2,[r1],#4 @ 8 +# if 8==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r8,r8,ror#5 + add r4,r4,r12 @ h+=Maj(a,b,c) from the past + eor r0,r0,r8,ror#19 @ Sigma1(e) + rev r2,r2 +#else + @ ldrb r2,[r1,#3] @ 8 + add r4,r4,r12 @ h+=Maj(a,b,c) from the past + ldrb r12,[r1,#2] + ldrb r0,[r1,#1] + orr r2,r2,r12,lsl#8 + ldrb r12,[r1],#4 + orr r2,r2,r0,lsl#16 +# if 8==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r8,r8,ror#5 + orr r2,r2,r12,lsl#24 + eor r0,r0,r8,ror#19 @ Sigma1(e) +#endif + ldr r12,[r14],#4 @ *K256++ + add r11,r11,r2 @ h+=X[i] + str r2,[sp,#8*4] + eor r2,r9,r10 + add r11,r11,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r8 + add r11,r11,r12 @ h+=K256[i] + eor r2,r2,r10 @ Ch(e,f,g) + eor r0,r4,r4,ror#11 + add r11,r11,r2 @ h+=Ch(e,f,g) +#if 8==31 + and r12,r12,#0xff + cmp r12,#0xf2 @ done? +#endif +#if 8<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r12,r4,r5 @ a^b, b^c in next round +#else + ldr r2,[sp,#10*4] @ from future BODY_16_xx + eor r12,r4,r5 @ a^b, b^c in next round + ldr r1,[sp,#7*4] @ from future BODY_16_xx +#endif + eor r0,r0,r4,ror#20 @ Sigma0(a) + and r3,r3,r12 @ (b^c)&=(a^b) + add r7,r7,r11 @ d+=h + eor r3,r3,r5 @ Maj(a,b,c) + add r11,r11,r0,ror#2 @ h+=Sigma0(a) + @ add r11,r11,r3 @ h+=Maj(a,b,c) +#if __ARM_ARCH__>=7 + @ ldr r2,[r1],#4 @ 9 +# if 9==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r7,r7,ror#5 + add r11,r11,r3 @ h+=Maj(a,b,c) from the past + eor r0,r0,r7,ror#19 @ Sigma1(e) + rev r2,r2 +#else + @ ldrb r2,[r1,#3] @ 9 + add r11,r11,r3 @ h+=Maj(a,b,c) from the past + ldrb r3,[r1,#2] + ldrb r0,[r1,#1] + orr r2,r2,r3,lsl#8 + ldrb r3,[r1],#4 + orr r2,r2,r0,lsl#16 +# if 9==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r7,r7,ror#5 + orr r2,r2,r3,lsl#24 + eor r0,r0,r7,ror#19 @ Sigma1(e) +#endif + ldr r3,[r14],#4 @ *K256++ + add r10,r10,r2 @ h+=X[i] + str r2,[sp,#9*4] + eor r2,r8,r9 + add r10,r10,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r7 + add r10,r10,r3 @ h+=K256[i] + eor r2,r2,r9 @ Ch(e,f,g) + eor r0,r11,r11,ror#11 + add r10,r10,r2 @ h+=Ch(e,f,g) +#if 9==31 + and r3,r3,#0xff + cmp r3,#0xf2 @ done? +#endif +#if 9<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r3,r11,r4 @ a^b, b^c in next round +#else + ldr r2,[sp,#11*4] @ from future BODY_16_xx + eor r3,r11,r4 @ a^b, b^c in next round + ldr r1,[sp,#8*4] @ from future BODY_16_xx +#endif + eor r0,r0,r11,ror#20 @ Sigma0(a) + and r12,r12,r3 @ (b^c)&=(a^b) + add r6,r6,r10 @ d+=h + eor r12,r12,r4 @ Maj(a,b,c) + add r10,r10,r0,ror#2 @ h+=Sigma0(a) + @ add r10,r10,r12 @ h+=Maj(a,b,c) +#if __ARM_ARCH__>=7 + @ ldr r2,[r1],#4 @ 10 +# if 10==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r6,r6,ror#5 + add r10,r10,r12 @ h+=Maj(a,b,c) from the past + eor r0,r0,r6,ror#19 @ Sigma1(e) + rev r2,r2 +#else + @ ldrb r2,[r1,#3] @ 10 + add r10,r10,r12 @ h+=Maj(a,b,c) from the past + ldrb r12,[r1,#2] + ldrb r0,[r1,#1] + orr r2,r2,r12,lsl#8 + ldrb r12,[r1],#4 + orr r2,r2,r0,lsl#16 +# if 10==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r6,r6,ror#5 + orr r2,r2,r12,lsl#24 + eor r0,r0,r6,ror#19 @ Sigma1(e) +#endif + ldr r12,[r14],#4 @ *K256++ + add r9,r9,r2 @ h+=X[i] + str r2,[sp,#10*4] + eor r2,r7,r8 + add r9,r9,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r6 + add r9,r9,r12 @ h+=K256[i] + eor r2,r2,r8 @ Ch(e,f,g) + eor r0,r10,r10,ror#11 + add r9,r9,r2 @ h+=Ch(e,f,g) +#if 10==31 + and r12,r12,#0xff + cmp r12,#0xf2 @ done? +#endif +#if 10<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r12,r10,r11 @ a^b, b^c in next round +#else + ldr r2,[sp,#12*4] @ from future BODY_16_xx + eor r12,r10,r11 @ a^b, b^c in next round + ldr r1,[sp,#9*4] @ from future BODY_16_xx +#endif + eor r0,r0,r10,ror#20 @ Sigma0(a) + and r3,r3,r12 @ (b^c)&=(a^b) + add r5,r5,r9 @ d+=h + eor r3,r3,r11 @ Maj(a,b,c) + add r9,r9,r0,ror#2 @ h+=Sigma0(a) + @ add r9,r9,r3 @ h+=Maj(a,b,c) +#if __ARM_ARCH__>=7 + @ ldr r2,[r1],#4 @ 11 +# if 11==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r5,r5,ror#5 + add r9,r9,r3 @ h+=Maj(a,b,c) from the past + eor r0,r0,r5,ror#19 @ Sigma1(e) + rev r2,r2 +#else + @ ldrb r2,[r1,#3] @ 11 + add r9,r9,r3 @ h+=Maj(a,b,c) from the past + ldrb r3,[r1,#2] + ldrb r0,[r1,#1] + orr r2,r2,r3,lsl#8 + ldrb r3,[r1],#4 + orr r2,r2,r0,lsl#16 +# if 11==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r5,r5,ror#5 + orr r2,r2,r3,lsl#24 + eor r0,r0,r5,ror#19 @ Sigma1(e) +#endif + ldr r3,[r14],#4 @ *K256++ + add r8,r8,r2 @ h+=X[i] + str r2,[sp,#11*4] + eor r2,r6,r7 + add r8,r8,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r5 + add r8,r8,r3 @ h+=K256[i] + eor r2,r2,r7 @ Ch(e,f,g) + eor r0,r9,r9,ror#11 + add r8,r8,r2 @ h+=Ch(e,f,g) +#if 11==31 + and r3,r3,#0xff + cmp r3,#0xf2 @ done? +#endif +#if 11<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r3,r9,r10 @ a^b, b^c in next round +#else + ldr r2,[sp,#13*4] @ from future BODY_16_xx + eor r3,r9,r10 @ a^b, b^c in next round + ldr r1,[sp,#10*4] @ from future BODY_16_xx +#endif + eor r0,r0,r9,ror#20 @ Sigma0(a) + and r12,r12,r3 @ (b^c)&=(a^b) + add r4,r4,r8 @ d+=h + eor r12,r12,r10 @ Maj(a,b,c) + add r8,r8,r0,ror#2 @ h+=Sigma0(a) + @ add r8,r8,r12 @ h+=Maj(a,b,c) +#if __ARM_ARCH__>=7 + @ ldr r2,[r1],#4 @ 12 +# if 12==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r4,r4,ror#5 + add r8,r8,r12 @ h+=Maj(a,b,c) from the past + eor r0,r0,r4,ror#19 @ Sigma1(e) + rev r2,r2 +#else + @ ldrb r2,[r1,#3] @ 12 + add r8,r8,r12 @ h+=Maj(a,b,c) from the past + ldrb r12,[r1,#2] + ldrb r0,[r1,#1] + orr r2,r2,r12,lsl#8 + ldrb r12,[r1],#4 + orr r2,r2,r0,lsl#16 +# if 12==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r4,r4,ror#5 + orr r2,r2,r12,lsl#24 + eor r0,r0,r4,ror#19 @ Sigma1(e) +#endif + ldr r12,[r14],#4 @ *K256++ + add r7,r7,r2 @ h+=X[i] + str r2,[sp,#12*4] + eor r2,r5,r6 + add r7,r7,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r4 + add r7,r7,r12 @ h+=K256[i] + eor r2,r2,r6 @ Ch(e,f,g) + eor r0,r8,r8,ror#11 + add r7,r7,r2 @ h+=Ch(e,f,g) +#if 12==31 + and r12,r12,#0xff + cmp r12,#0xf2 @ done? +#endif +#if 12<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r12,r8,r9 @ a^b, b^c in next round +#else + ldr r2,[sp,#14*4] @ from future BODY_16_xx + eor r12,r8,r9 @ a^b, b^c in next round + ldr r1,[sp,#11*4] @ from future BODY_16_xx +#endif + eor r0,r0,r8,ror#20 @ Sigma0(a) + and r3,r3,r12 @ (b^c)&=(a^b) + add r11,r11,r7 @ d+=h + eor r3,r3,r9 @ Maj(a,b,c) + add r7,r7,r0,ror#2 @ h+=Sigma0(a) + @ add r7,r7,r3 @ h+=Maj(a,b,c) +#if __ARM_ARCH__>=7 + @ ldr r2,[r1],#4 @ 13 +# if 13==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r11,r11,ror#5 + add r7,r7,r3 @ h+=Maj(a,b,c) from the past + eor r0,r0,r11,ror#19 @ Sigma1(e) + rev r2,r2 +#else + @ ldrb r2,[r1,#3] @ 13 + add r7,r7,r3 @ h+=Maj(a,b,c) from the past + ldrb r3,[r1,#2] + ldrb r0,[r1,#1] + orr r2,r2,r3,lsl#8 + ldrb r3,[r1],#4 + orr r2,r2,r0,lsl#16 +# if 13==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r11,r11,ror#5 + orr r2,r2,r3,lsl#24 + eor r0,r0,r11,ror#19 @ Sigma1(e) +#endif + ldr r3,[r14],#4 @ *K256++ + add r6,r6,r2 @ h+=X[i] + str r2,[sp,#13*4] + eor r2,r4,r5 + add r6,r6,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r11 + add r6,r6,r3 @ h+=K256[i] + eor r2,r2,r5 @ Ch(e,f,g) + eor r0,r7,r7,ror#11 + add r6,r6,r2 @ h+=Ch(e,f,g) +#if 13==31 + and r3,r3,#0xff + cmp r3,#0xf2 @ done? +#endif +#if 13<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r3,r7,r8 @ a^b, b^c in next round +#else + ldr r2,[sp,#15*4] @ from future BODY_16_xx + eor r3,r7,r8 @ a^b, b^c in next round + ldr r1,[sp,#12*4] @ from future BODY_16_xx +#endif + eor r0,r0,r7,ror#20 @ Sigma0(a) + and r12,r12,r3 @ (b^c)&=(a^b) + add r10,r10,r6 @ d+=h + eor r12,r12,r8 @ Maj(a,b,c) + add r6,r6,r0,ror#2 @ h+=Sigma0(a) + @ add r6,r6,r12 @ h+=Maj(a,b,c) +#if __ARM_ARCH__>=7 + @ ldr r2,[r1],#4 @ 14 +# if 14==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r10,r10,ror#5 + add r6,r6,r12 @ h+=Maj(a,b,c) from the past + eor r0,r0,r10,ror#19 @ Sigma1(e) + rev r2,r2 +#else + @ ldrb r2,[r1,#3] @ 14 + add r6,r6,r12 @ h+=Maj(a,b,c) from the past + ldrb r12,[r1,#2] + ldrb r0,[r1,#1] + orr r2,r2,r12,lsl#8 + ldrb r12,[r1],#4 + orr r2,r2,r0,lsl#16 +# if 14==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r10,r10,ror#5 + orr r2,r2,r12,lsl#24 + eor r0,r0,r10,ror#19 @ Sigma1(e) +#endif + ldr r12,[r14],#4 @ *K256++ + add r5,r5,r2 @ h+=X[i] + str r2,[sp,#14*4] + eor r2,r11,r4 + add r5,r5,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r10 + add r5,r5,r12 @ h+=K256[i] + eor r2,r2,r4 @ Ch(e,f,g) + eor r0,r6,r6,ror#11 + add r5,r5,r2 @ h+=Ch(e,f,g) +#if 14==31 + and r12,r12,#0xff + cmp r12,#0xf2 @ done? +#endif +#if 14<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r12,r6,r7 @ a^b, b^c in next round +#else + ldr r2,[sp,#0*4] @ from future BODY_16_xx + eor r12,r6,r7 @ a^b, b^c in next round + ldr r1,[sp,#13*4] @ from future BODY_16_xx +#endif + eor r0,r0,r6,ror#20 @ Sigma0(a) + and r3,r3,r12 @ (b^c)&=(a^b) + add r9,r9,r5 @ d+=h + eor r3,r3,r7 @ Maj(a,b,c) + add r5,r5,r0,ror#2 @ h+=Sigma0(a) + @ add r5,r5,r3 @ h+=Maj(a,b,c) +#if __ARM_ARCH__>=7 + @ ldr r2,[r1],#4 @ 15 +# if 15==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r9,r9,ror#5 + add r5,r5,r3 @ h+=Maj(a,b,c) from the past + eor r0,r0,r9,ror#19 @ Sigma1(e) + rev r2,r2 +#else + @ ldrb r2,[r1,#3] @ 15 + add r5,r5,r3 @ h+=Maj(a,b,c) from the past + ldrb r3,[r1,#2] + ldrb r0,[r1,#1] + orr r2,r2,r3,lsl#8 + ldrb r3,[r1],#4 + orr r2,r2,r0,lsl#16 +# if 15==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r9,r9,ror#5 + orr r2,r2,r3,lsl#24 + eor r0,r0,r9,ror#19 @ Sigma1(e) +#endif + ldr r3,[r14],#4 @ *K256++ + add r4,r4,r2 @ h+=X[i] + str r2,[sp,#15*4] + eor r2,r10,r11 + add r4,r4,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r9 + add r4,r4,r3 @ h+=K256[i] + eor r2,r2,r11 @ Ch(e,f,g) + eor r0,r5,r5,ror#11 + add r4,r4,r2 @ h+=Ch(e,f,g) +#if 15==31 + and r3,r3,#0xff + cmp r3,#0xf2 @ done? +#endif +#if 15<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r3,r5,r6 @ a^b, b^c in next round +#else + ldr r2,[sp,#1*4] @ from future BODY_16_xx + eor r3,r5,r6 @ a^b, b^c in next round + ldr r1,[sp,#14*4] @ from future BODY_16_xx +#endif + eor r0,r0,r5,ror#20 @ Sigma0(a) + and r12,r12,r3 @ (b^c)&=(a^b) + add r8,r8,r4 @ d+=h + eor r12,r12,r6 @ Maj(a,b,c) + add r4,r4,r0,ror#2 @ h+=Sigma0(a) + @ add r4,r4,r12 @ h+=Maj(a,b,c) +.Lrounds_16_xx: + @ ldr r2,[sp,#1*4] @ 16 + @ ldr r1,[sp,#14*4] + mov r0,r2,ror#7 + add r4,r4,r12 @ h+=Maj(a,b,c) from the past + mov r12,r1,ror#17 + eor r0,r0,r2,ror#18 + eor r12,r12,r1,ror#19 + eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) + ldr r2,[sp,#0*4] + eor r12,r12,r1,lsr#10 @ sigma1(X[i+14]) + ldr r1,[sp,#9*4] + + add r12,r12,r0 + eor r0,r8,r8,ror#5 @ from BODY_00_15 + add r2,r2,r12 + eor r0,r0,r8,ror#19 @ Sigma1(e) + add r2,r2,r1 @ X[i] + ldr r12,[r14],#4 @ *K256++ + add r11,r11,r2 @ h+=X[i] + str r2,[sp,#0*4] + eor r2,r9,r10 + add r11,r11,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r8 + add r11,r11,r12 @ h+=K256[i] + eor r2,r2,r10 @ Ch(e,f,g) + eor r0,r4,r4,ror#11 + add r11,r11,r2 @ h+=Ch(e,f,g) +#if 16==31 + and r12,r12,#0xff + cmp r12,#0xf2 @ done? +#endif +#if 16<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r12,r4,r5 @ a^b, b^c in next round +#else + ldr r2,[sp,#2*4] @ from future BODY_16_xx + eor r12,r4,r5 @ a^b, b^c in next round + ldr r1,[sp,#15*4] @ from future BODY_16_xx +#endif + eor r0,r0,r4,ror#20 @ Sigma0(a) + and r3,r3,r12 @ (b^c)&=(a^b) + add r7,r7,r11 @ d+=h + eor r3,r3,r5 @ Maj(a,b,c) + add r11,r11,r0,ror#2 @ h+=Sigma0(a) + @ add r11,r11,r3 @ h+=Maj(a,b,c) + @ ldr r2,[sp,#2*4] @ 17 + @ ldr r1,[sp,#15*4] + mov r0,r2,ror#7 + add r11,r11,r3 @ h+=Maj(a,b,c) from the past + mov r3,r1,ror#17 + eor r0,r0,r2,ror#18 + eor r3,r3,r1,ror#19 + eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) + ldr r2,[sp,#1*4] + eor r3,r3,r1,lsr#10 @ sigma1(X[i+14]) + ldr r1,[sp,#10*4] + + add r3,r3,r0 + eor r0,r7,r7,ror#5 @ from BODY_00_15 + add r2,r2,r3 + eor r0,r0,r7,ror#19 @ Sigma1(e) + add r2,r2,r1 @ X[i] + ldr r3,[r14],#4 @ *K256++ + add r10,r10,r2 @ h+=X[i] + str r2,[sp,#1*4] + eor r2,r8,r9 + add r10,r10,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r7 + add r10,r10,r3 @ h+=K256[i] + eor r2,r2,r9 @ Ch(e,f,g) + eor r0,r11,r11,ror#11 + add r10,r10,r2 @ h+=Ch(e,f,g) +#if 17==31 + and r3,r3,#0xff + cmp r3,#0xf2 @ done? +#endif +#if 17<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r3,r11,r4 @ a^b, b^c in next round +#else + ldr r2,[sp,#3*4] @ from future BODY_16_xx + eor r3,r11,r4 @ a^b, b^c in next round + ldr r1,[sp,#0*4] @ from future BODY_16_xx +#endif + eor r0,r0,r11,ror#20 @ Sigma0(a) + and r12,r12,r3 @ (b^c)&=(a^b) + add r6,r6,r10 @ d+=h + eor r12,r12,r4 @ Maj(a,b,c) + add r10,r10,r0,ror#2 @ h+=Sigma0(a) + @ add r10,r10,r12 @ h+=Maj(a,b,c) + @ ldr r2,[sp,#3*4] @ 18 + @ ldr r1,[sp,#0*4] + mov r0,r2,ror#7 + add r10,r10,r12 @ h+=Maj(a,b,c) from the past + mov r12,r1,ror#17 + eor r0,r0,r2,ror#18 + eor r12,r12,r1,ror#19 + eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) + ldr r2,[sp,#2*4] + eor r12,r12,r1,lsr#10 @ sigma1(X[i+14]) + ldr r1,[sp,#11*4] + + add r12,r12,r0 + eor r0,r6,r6,ror#5 @ from BODY_00_15 + add r2,r2,r12 + eor r0,r0,r6,ror#19 @ Sigma1(e) + add r2,r2,r1 @ X[i] + ldr r12,[r14],#4 @ *K256++ + add r9,r9,r2 @ h+=X[i] + str r2,[sp,#2*4] + eor r2,r7,r8 + add r9,r9,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r6 + add r9,r9,r12 @ h+=K256[i] + eor r2,r2,r8 @ Ch(e,f,g) + eor r0,r10,r10,ror#11 + add r9,r9,r2 @ h+=Ch(e,f,g) +#if 18==31 + and r12,r12,#0xff + cmp r12,#0xf2 @ done? +#endif +#if 18<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r12,r10,r11 @ a^b, b^c in next round +#else + ldr r2,[sp,#4*4] @ from future BODY_16_xx + eor r12,r10,r11 @ a^b, b^c in next round + ldr r1,[sp,#1*4] @ from future BODY_16_xx +#endif + eor r0,r0,r10,ror#20 @ Sigma0(a) + and r3,r3,r12 @ (b^c)&=(a^b) + add r5,r5,r9 @ d+=h + eor r3,r3,r11 @ Maj(a,b,c) + add r9,r9,r0,ror#2 @ h+=Sigma0(a) + @ add r9,r9,r3 @ h+=Maj(a,b,c) + @ ldr r2,[sp,#4*4] @ 19 + @ ldr r1,[sp,#1*4] + mov r0,r2,ror#7 + add r9,r9,r3 @ h+=Maj(a,b,c) from the past + mov r3,r1,ror#17 + eor r0,r0,r2,ror#18 + eor r3,r3,r1,ror#19 + eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) + ldr r2,[sp,#3*4] + eor r3,r3,r1,lsr#10 @ sigma1(X[i+14]) + ldr r1,[sp,#12*4] + + add r3,r3,r0 + eor r0,r5,r5,ror#5 @ from BODY_00_15 + add r2,r2,r3 + eor r0,r0,r5,ror#19 @ Sigma1(e) + add r2,r2,r1 @ X[i] + ldr r3,[r14],#4 @ *K256++ + add r8,r8,r2 @ h+=X[i] + str r2,[sp,#3*4] + eor r2,r6,r7 + add r8,r8,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r5 + add r8,r8,r3 @ h+=K256[i] + eor r2,r2,r7 @ Ch(e,f,g) + eor r0,r9,r9,ror#11 + add r8,r8,r2 @ h+=Ch(e,f,g) +#if 19==31 + and r3,r3,#0xff + cmp r3,#0xf2 @ done? +#endif +#if 19<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r3,r9,r10 @ a^b, b^c in next round +#else + ldr r2,[sp,#5*4] @ from future BODY_16_xx + eor r3,r9,r10 @ a^b, b^c in next round + ldr r1,[sp,#2*4] @ from future BODY_16_xx +#endif + eor r0,r0,r9,ror#20 @ Sigma0(a) + and r12,r12,r3 @ (b^c)&=(a^b) + add r4,r4,r8 @ d+=h + eor r12,r12,r10 @ Maj(a,b,c) + add r8,r8,r0,ror#2 @ h+=Sigma0(a) + @ add r8,r8,r12 @ h+=Maj(a,b,c) + @ ldr r2,[sp,#5*4] @ 20 + @ ldr r1,[sp,#2*4] + mov r0,r2,ror#7 + add r8,r8,r12 @ h+=Maj(a,b,c) from the past + mov r12,r1,ror#17 + eor r0,r0,r2,ror#18 + eor r12,r12,r1,ror#19 + eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) + ldr r2,[sp,#4*4] + eor r12,r12,r1,lsr#10 @ sigma1(X[i+14]) + ldr r1,[sp,#13*4] + + add r12,r12,r0 + eor r0,r4,r4,ror#5 @ from BODY_00_15 + add r2,r2,r12 + eor r0,r0,r4,ror#19 @ Sigma1(e) + add r2,r2,r1 @ X[i] + ldr r12,[r14],#4 @ *K256++ + add r7,r7,r2 @ h+=X[i] + str r2,[sp,#4*4] + eor r2,r5,r6 + add r7,r7,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r4 + add r7,r7,r12 @ h+=K256[i] + eor r2,r2,r6 @ Ch(e,f,g) + eor r0,r8,r8,ror#11 + add r7,r7,r2 @ h+=Ch(e,f,g) +#if 20==31 + and r12,r12,#0xff + cmp r12,#0xf2 @ done? +#endif +#if 20<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r12,r8,r9 @ a^b, b^c in next round +#else + ldr r2,[sp,#6*4] @ from future BODY_16_xx + eor r12,r8,r9 @ a^b, b^c in next round + ldr r1,[sp,#3*4] @ from future BODY_16_xx +#endif + eor r0,r0,r8,ror#20 @ Sigma0(a) + and r3,r3,r12 @ (b^c)&=(a^b) + add r11,r11,r7 @ d+=h + eor r3,r3,r9 @ Maj(a,b,c) + add r7,r7,r0,ror#2 @ h+=Sigma0(a) + @ add r7,r7,r3 @ h+=Maj(a,b,c) + @ ldr r2,[sp,#6*4] @ 21 + @ ldr r1,[sp,#3*4] + mov r0,r2,ror#7 + add r7,r7,r3 @ h+=Maj(a,b,c) from the past + mov r3,r1,ror#17 + eor r0,r0,r2,ror#18 + eor r3,r3,r1,ror#19 + eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) + ldr r2,[sp,#5*4] + eor r3,r3,r1,lsr#10 @ sigma1(X[i+14]) + ldr r1,[sp,#14*4] + + add r3,r3,r0 + eor r0,r11,r11,ror#5 @ from BODY_00_15 + add r2,r2,r3 + eor r0,r0,r11,ror#19 @ Sigma1(e) + add r2,r2,r1 @ X[i] + ldr r3,[r14],#4 @ *K256++ + add r6,r6,r2 @ h+=X[i] + str r2,[sp,#5*4] + eor r2,r4,r5 + add r6,r6,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r11 + add r6,r6,r3 @ h+=K256[i] + eor r2,r2,r5 @ Ch(e,f,g) + eor r0,r7,r7,ror#11 + add r6,r6,r2 @ h+=Ch(e,f,g) +#if 21==31 + and r3,r3,#0xff + cmp r3,#0xf2 @ done? +#endif +#if 21<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r3,r7,r8 @ a^b, b^c in next round +#else + ldr r2,[sp,#7*4] @ from future BODY_16_xx + eor r3,r7,r8 @ a^b, b^c in next round + ldr r1,[sp,#4*4] @ from future BODY_16_xx +#endif + eor r0,r0,r7,ror#20 @ Sigma0(a) + and r12,r12,r3 @ (b^c)&=(a^b) + add r10,r10,r6 @ d+=h + eor r12,r12,r8 @ Maj(a,b,c) + add r6,r6,r0,ror#2 @ h+=Sigma0(a) + @ add r6,r6,r12 @ h+=Maj(a,b,c) + @ ldr r2,[sp,#7*4] @ 22 + @ ldr r1,[sp,#4*4] + mov r0,r2,ror#7 + add r6,r6,r12 @ h+=Maj(a,b,c) from the past + mov r12,r1,ror#17 + eor r0,r0,r2,ror#18 + eor r12,r12,r1,ror#19 + eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) + ldr r2,[sp,#6*4] + eor r12,r12,r1,lsr#10 @ sigma1(X[i+14]) + ldr r1,[sp,#15*4] + + add r12,r12,r0 + eor r0,r10,r10,ror#5 @ from BODY_00_15 + add r2,r2,r12 + eor r0,r0,r10,ror#19 @ Sigma1(e) + add r2,r2,r1 @ X[i] + ldr r12,[r14],#4 @ *K256++ + add r5,r5,r2 @ h+=X[i] + str r2,[sp,#6*4] + eor r2,r11,r4 + add r5,r5,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r10 + add r5,r5,r12 @ h+=K256[i] + eor r2,r2,r4 @ Ch(e,f,g) + eor r0,r6,r6,ror#11 + add r5,r5,r2 @ h+=Ch(e,f,g) +#if 22==31 + and r12,r12,#0xff + cmp r12,#0xf2 @ done? +#endif +#if 22<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r12,r6,r7 @ a^b, b^c in next round +#else + ldr r2,[sp,#8*4] @ from future BODY_16_xx + eor r12,r6,r7 @ a^b, b^c in next round + ldr r1,[sp,#5*4] @ from future BODY_16_xx +#endif + eor r0,r0,r6,ror#20 @ Sigma0(a) + and r3,r3,r12 @ (b^c)&=(a^b) + add r9,r9,r5 @ d+=h + eor r3,r3,r7 @ Maj(a,b,c) + add r5,r5,r0,ror#2 @ h+=Sigma0(a) + @ add r5,r5,r3 @ h+=Maj(a,b,c) + @ ldr r2,[sp,#8*4] @ 23 + @ ldr r1,[sp,#5*4] + mov r0,r2,ror#7 + add r5,r5,r3 @ h+=Maj(a,b,c) from the past + mov r3,r1,ror#17 + eor r0,r0,r2,ror#18 + eor r3,r3,r1,ror#19 + eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) + ldr r2,[sp,#7*4] + eor r3,r3,r1,lsr#10 @ sigma1(X[i+14]) + ldr r1,[sp,#0*4] + + add r3,r3,r0 + eor r0,r9,r9,ror#5 @ from BODY_00_15 + add r2,r2,r3 + eor r0,r0,r9,ror#19 @ Sigma1(e) + add r2,r2,r1 @ X[i] + ldr r3,[r14],#4 @ *K256++ + add r4,r4,r2 @ h+=X[i] + str r2,[sp,#7*4] + eor r2,r10,r11 + add r4,r4,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r9 + add r4,r4,r3 @ h+=K256[i] + eor r2,r2,r11 @ Ch(e,f,g) + eor r0,r5,r5,ror#11 + add r4,r4,r2 @ h+=Ch(e,f,g) +#if 23==31 + and r3,r3,#0xff + cmp r3,#0xf2 @ done? +#endif +#if 23<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r3,r5,r6 @ a^b, b^c in next round +#else + ldr r2,[sp,#9*4] @ from future BODY_16_xx + eor r3,r5,r6 @ a^b, b^c in next round + ldr r1,[sp,#6*4] @ from future BODY_16_xx +#endif + eor r0,r0,r5,ror#20 @ Sigma0(a) + and r12,r12,r3 @ (b^c)&=(a^b) + add r8,r8,r4 @ d+=h + eor r12,r12,r6 @ Maj(a,b,c) + add r4,r4,r0,ror#2 @ h+=Sigma0(a) + @ add r4,r4,r12 @ h+=Maj(a,b,c) + @ ldr r2,[sp,#9*4] @ 24 + @ ldr r1,[sp,#6*4] + mov r0,r2,ror#7 + add r4,r4,r12 @ h+=Maj(a,b,c) from the past + mov r12,r1,ror#17 + eor r0,r0,r2,ror#18 + eor r12,r12,r1,ror#19 + eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) + ldr r2,[sp,#8*4] + eor r12,r12,r1,lsr#10 @ sigma1(X[i+14]) + ldr r1,[sp,#1*4] + + add r12,r12,r0 + eor r0,r8,r8,ror#5 @ from BODY_00_15 + add r2,r2,r12 + eor r0,r0,r8,ror#19 @ Sigma1(e) + add r2,r2,r1 @ X[i] + ldr r12,[r14],#4 @ *K256++ + add r11,r11,r2 @ h+=X[i] + str r2,[sp,#8*4] + eor r2,r9,r10 + add r11,r11,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r8 + add r11,r11,r12 @ h+=K256[i] + eor r2,r2,r10 @ Ch(e,f,g) + eor r0,r4,r4,ror#11 + add r11,r11,r2 @ h+=Ch(e,f,g) +#if 24==31 + and r12,r12,#0xff + cmp r12,#0xf2 @ done? +#endif +#if 24<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r12,r4,r5 @ a^b, b^c in next round +#else + ldr r2,[sp,#10*4] @ from future BODY_16_xx + eor r12,r4,r5 @ a^b, b^c in next round + ldr r1,[sp,#7*4] @ from future BODY_16_xx +#endif + eor r0,r0,r4,ror#20 @ Sigma0(a) + and r3,r3,r12 @ (b^c)&=(a^b) + add r7,r7,r11 @ d+=h + eor r3,r3,r5 @ Maj(a,b,c) + add r11,r11,r0,ror#2 @ h+=Sigma0(a) + @ add r11,r11,r3 @ h+=Maj(a,b,c) + @ ldr r2,[sp,#10*4] @ 25 + @ ldr r1,[sp,#7*4] + mov r0,r2,ror#7 + add r11,r11,r3 @ h+=Maj(a,b,c) from the past + mov r3,r1,ror#17 + eor r0,r0,r2,ror#18 + eor r3,r3,r1,ror#19 + eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) + ldr r2,[sp,#9*4] + eor r3,r3,r1,lsr#10 @ sigma1(X[i+14]) + ldr r1,[sp,#2*4] + + add r3,r3,r0 + eor r0,r7,r7,ror#5 @ from BODY_00_15 + add r2,r2,r3 + eor r0,r0,r7,ror#19 @ Sigma1(e) + add r2,r2,r1 @ X[i] + ldr r3,[r14],#4 @ *K256++ + add r10,r10,r2 @ h+=X[i] + str r2,[sp,#9*4] + eor r2,r8,r9 + add r10,r10,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r7 + add r10,r10,r3 @ h+=K256[i] + eor r2,r2,r9 @ Ch(e,f,g) + eor r0,r11,r11,ror#11 + add r10,r10,r2 @ h+=Ch(e,f,g) +#if 25==31 + and r3,r3,#0xff + cmp r3,#0xf2 @ done? +#endif +#if 25<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r3,r11,r4 @ a^b, b^c in next round +#else + ldr r2,[sp,#11*4] @ from future BODY_16_xx + eor r3,r11,r4 @ a^b, b^c in next round + ldr r1,[sp,#8*4] @ from future BODY_16_xx +#endif + eor r0,r0,r11,ror#20 @ Sigma0(a) + and r12,r12,r3 @ (b^c)&=(a^b) + add r6,r6,r10 @ d+=h + eor r12,r12,r4 @ Maj(a,b,c) + add r10,r10,r0,ror#2 @ h+=Sigma0(a) + @ add r10,r10,r12 @ h+=Maj(a,b,c) + @ ldr r2,[sp,#11*4] @ 26 + @ ldr r1,[sp,#8*4] + mov r0,r2,ror#7 + add r10,r10,r12 @ h+=Maj(a,b,c) from the past + mov r12,r1,ror#17 + eor r0,r0,r2,ror#18 + eor r12,r12,r1,ror#19 + eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) + ldr r2,[sp,#10*4] + eor r12,r12,r1,lsr#10 @ sigma1(X[i+14]) + ldr r1,[sp,#3*4] + + add r12,r12,r0 + eor r0,r6,r6,ror#5 @ from BODY_00_15 + add r2,r2,r12 + eor r0,r0,r6,ror#19 @ Sigma1(e) + add r2,r2,r1 @ X[i] + ldr r12,[r14],#4 @ *K256++ + add r9,r9,r2 @ h+=X[i] + str r2,[sp,#10*4] + eor r2,r7,r8 + add r9,r9,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r6 + add r9,r9,r12 @ h+=K256[i] + eor r2,r2,r8 @ Ch(e,f,g) + eor r0,r10,r10,ror#11 + add r9,r9,r2 @ h+=Ch(e,f,g) +#if 26==31 + and r12,r12,#0xff + cmp r12,#0xf2 @ done? +#endif +#if 26<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r12,r10,r11 @ a^b, b^c in next round +#else + ldr r2,[sp,#12*4] @ from future BODY_16_xx + eor r12,r10,r11 @ a^b, b^c in next round + ldr r1,[sp,#9*4] @ from future BODY_16_xx +#endif + eor r0,r0,r10,ror#20 @ Sigma0(a) + and r3,r3,r12 @ (b^c)&=(a^b) + add r5,r5,r9 @ d+=h + eor r3,r3,r11 @ Maj(a,b,c) + add r9,r9,r0,ror#2 @ h+=Sigma0(a) + @ add r9,r9,r3 @ h+=Maj(a,b,c) + @ ldr r2,[sp,#12*4] @ 27 + @ ldr r1,[sp,#9*4] + mov r0,r2,ror#7 + add r9,r9,r3 @ h+=Maj(a,b,c) from the past + mov r3,r1,ror#17 + eor r0,r0,r2,ror#18 + eor r3,r3,r1,ror#19 + eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) + ldr r2,[sp,#11*4] + eor r3,r3,r1,lsr#10 @ sigma1(X[i+14]) + ldr r1,[sp,#4*4] + + add r3,r3,r0 + eor r0,r5,r5,ror#5 @ from BODY_00_15 + add r2,r2,r3 + eor r0,r0,r5,ror#19 @ Sigma1(e) + add r2,r2,r1 @ X[i] + ldr r3,[r14],#4 @ *K256++ + add r8,r8,r2 @ h+=X[i] + str r2,[sp,#11*4] + eor r2,r6,r7 + add r8,r8,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r5 + add r8,r8,r3 @ h+=K256[i] + eor r2,r2,r7 @ Ch(e,f,g) + eor r0,r9,r9,ror#11 + add r8,r8,r2 @ h+=Ch(e,f,g) +#if 27==31 + and r3,r3,#0xff + cmp r3,#0xf2 @ done? +#endif +#if 27<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r3,r9,r10 @ a^b, b^c in next round +#else + ldr r2,[sp,#13*4] @ from future BODY_16_xx + eor r3,r9,r10 @ a^b, b^c in next round + ldr r1,[sp,#10*4] @ from future BODY_16_xx +#endif + eor r0,r0,r9,ror#20 @ Sigma0(a) + and r12,r12,r3 @ (b^c)&=(a^b) + add r4,r4,r8 @ d+=h + eor r12,r12,r10 @ Maj(a,b,c) + add r8,r8,r0,ror#2 @ h+=Sigma0(a) + @ add r8,r8,r12 @ h+=Maj(a,b,c) + @ ldr r2,[sp,#13*4] @ 28 + @ ldr r1,[sp,#10*4] + mov r0,r2,ror#7 + add r8,r8,r12 @ h+=Maj(a,b,c) from the past + mov r12,r1,ror#17 + eor r0,r0,r2,ror#18 + eor r12,r12,r1,ror#19 + eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) + ldr r2,[sp,#12*4] + eor r12,r12,r1,lsr#10 @ sigma1(X[i+14]) + ldr r1,[sp,#5*4] + + add r12,r12,r0 + eor r0,r4,r4,ror#5 @ from BODY_00_15 + add r2,r2,r12 + eor r0,r0,r4,ror#19 @ Sigma1(e) + add r2,r2,r1 @ X[i] + ldr r12,[r14],#4 @ *K256++ + add r7,r7,r2 @ h+=X[i] + str r2,[sp,#12*4] + eor r2,r5,r6 + add r7,r7,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r4 + add r7,r7,r12 @ h+=K256[i] + eor r2,r2,r6 @ Ch(e,f,g) + eor r0,r8,r8,ror#11 + add r7,r7,r2 @ h+=Ch(e,f,g) +#if 28==31 + and r12,r12,#0xff + cmp r12,#0xf2 @ done? +#endif +#if 28<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r12,r8,r9 @ a^b, b^c in next round +#else + ldr r2,[sp,#14*4] @ from future BODY_16_xx + eor r12,r8,r9 @ a^b, b^c in next round + ldr r1,[sp,#11*4] @ from future BODY_16_xx +#endif + eor r0,r0,r8,ror#20 @ Sigma0(a) + and r3,r3,r12 @ (b^c)&=(a^b) + add r11,r11,r7 @ d+=h + eor r3,r3,r9 @ Maj(a,b,c) + add r7,r7,r0,ror#2 @ h+=Sigma0(a) + @ add r7,r7,r3 @ h+=Maj(a,b,c) + @ ldr r2,[sp,#14*4] @ 29 + @ ldr r1,[sp,#11*4] + mov r0,r2,ror#7 + add r7,r7,r3 @ h+=Maj(a,b,c) from the past + mov r3,r1,ror#17 + eor r0,r0,r2,ror#18 + eor r3,r3,r1,ror#19 + eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) + ldr r2,[sp,#13*4] + eor r3,r3,r1,lsr#10 @ sigma1(X[i+14]) + ldr r1,[sp,#6*4] + + add r3,r3,r0 + eor r0,r11,r11,ror#5 @ from BODY_00_15 + add r2,r2,r3 + eor r0,r0,r11,ror#19 @ Sigma1(e) + add r2,r2,r1 @ X[i] + ldr r3,[r14],#4 @ *K256++ + add r6,r6,r2 @ h+=X[i] + str r2,[sp,#13*4] + eor r2,r4,r5 + add r6,r6,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r11 + add r6,r6,r3 @ h+=K256[i] + eor r2,r2,r5 @ Ch(e,f,g) + eor r0,r7,r7,ror#11 + add r6,r6,r2 @ h+=Ch(e,f,g) +#if 29==31 + and r3,r3,#0xff + cmp r3,#0xf2 @ done? +#endif +#if 29<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r3,r7,r8 @ a^b, b^c in next round +#else + ldr r2,[sp,#15*4] @ from future BODY_16_xx + eor r3,r7,r8 @ a^b, b^c in next round + ldr r1,[sp,#12*4] @ from future BODY_16_xx +#endif + eor r0,r0,r7,ror#20 @ Sigma0(a) + and r12,r12,r3 @ (b^c)&=(a^b) + add r10,r10,r6 @ d+=h + eor r12,r12,r8 @ Maj(a,b,c) + add r6,r6,r0,ror#2 @ h+=Sigma0(a) + @ add r6,r6,r12 @ h+=Maj(a,b,c) + @ ldr r2,[sp,#15*4] @ 30 + @ ldr r1,[sp,#12*4] + mov r0,r2,ror#7 + add r6,r6,r12 @ h+=Maj(a,b,c) from the past + mov r12,r1,ror#17 + eor r0,r0,r2,ror#18 + eor r12,r12,r1,ror#19 + eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) + ldr r2,[sp,#14*4] + eor r12,r12,r1,lsr#10 @ sigma1(X[i+14]) + ldr r1,[sp,#7*4] + + add r12,r12,r0 + eor r0,r10,r10,ror#5 @ from BODY_00_15 + add r2,r2,r12 + eor r0,r0,r10,ror#19 @ Sigma1(e) + add r2,r2,r1 @ X[i] + ldr r12,[r14],#4 @ *K256++ + add r5,r5,r2 @ h+=X[i] + str r2,[sp,#14*4] + eor r2,r11,r4 + add r5,r5,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r10 + add r5,r5,r12 @ h+=K256[i] + eor r2,r2,r4 @ Ch(e,f,g) + eor r0,r6,r6,ror#11 + add r5,r5,r2 @ h+=Ch(e,f,g) +#if 30==31 + and r12,r12,#0xff + cmp r12,#0xf2 @ done? +#endif +#if 30<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r12,r6,r7 @ a^b, b^c in next round +#else + ldr r2,[sp,#0*4] @ from future BODY_16_xx + eor r12,r6,r7 @ a^b, b^c in next round + ldr r1,[sp,#13*4] @ from future BODY_16_xx +#endif + eor r0,r0,r6,ror#20 @ Sigma0(a) + and r3,r3,r12 @ (b^c)&=(a^b) + add r9,r9,r5 @ d+=h + eor r3,r3,r7 @ Maj(a,b,c) + add r5,r5,r0,ror#2 @ h+=Sigma0(a) + @ add r5,r5,r3 @ h+=Maj(a,b,c) + @ ldr r2,[sp,#0*4] @ 31 + @ ldr r1,[sp,#13*4] + mov r0,r2,ror#7 + add r5,r5,r3 @ h+=Maj(a,b,c) from the past + mov r3,r1,ror#17 + eor r0,r0,r2,ror#18 + eor r3,r3,r1,ror#19 + eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) + ldr r2,[sp,#15*4] + eor r3,r3,r1,lsr#10 @ sigma1(X[i+14]) + ldr r1,[sp,#8*4] + + add r3,r3,r0 + eor r0,r9,r9,ror#5 @ from BODY_00_15 + add r2,r2,r3 + eor r0,r0,r9,ror#19 @ Sigma1(e) + add r2,r2,r1 @ X[i] + ldr r3,[r14],#4 @ *K256++ + add r4,r4,r2 @ h+=X[i] + str r2,[sp,#15*4] + eor r2,r10,r11 + add r4,r4,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r9 + add r4,r4,r3 @ h+=K256[i] + eor r2,r2,r11 @ Ch(e,f,g) + eor r0,r5,r5,ror#11 + add r4,r4,r2 @ h+=Ch(e,f,g) +#if 31==31 + and r3,r3,#0xff + cmp r3,#0xf2 @ done? +#endif +#if 31<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r3,r5,r6 @ a^b, b^c in next round +#else + ldr r2,[sp,#1*4] @ from future BODY_16_xx + eor r3,r5,r6 @ a^b, b^c in next round + ldr r1,[sp,#14*4] @ from future BODY_16_xx +#endif + eor r0,r0,r5,ror#20 @ Sigma0(a) + and r12,r12,r3 @ (b^c)&=(a^b) + add r8,r8,r4 @ d+=h + eor r12,r12,r6 @ Maj(a,b,c) + add r4,r4,r0,ror#2 @ h+=Sigma0(a) + @ add r4,r4,r12 @ h+=Maj(a,b,c) + ldreq r3,[sp,#16*4] @ pull ctx + bne .Lrounds_16_xx + + add r4,r4,r12 @ h+=Maj(a,b,c) from the past + ldr r0,[r3,#0] + ldr r2,[r3,#4] + ldr r12,[r3,#8] + add r4,r4,r0 + ldr r0,[r3,#12] + add r5,r5,r2 + ldr r2,[r3,#16] + add r6,r6,r12 + ldr r12,[r3,#20] + add r7,r7,r0 + ldr r0,[r3,#24] + add r8,r8,r2 + ldr r2,[r3,#28] + add r9,r9,r12 + ldr r1,[sp,#17*4] @ pull inp + ldr r12,[sp,#18*4] @ pull inp+len + add r10,r10,r0 + add r11,r11,r2 + stmia r3,{r4,r5,r6,r7,r8,r9,r10,r11} + cmp r1,r12 + sub r14,r14,#256 @ rewind Ktbl + bne .Loop + + add sp,sp,#19*4 @ destroy frame +#if __ARM_ARCH__>=5 + ldmia sp!,{r4-r11,pc} +#else + ldmia sp!,{r4-r11,lr} + tst lr,#1 + moveq pc,lr @ be binary compatible with V4, yet + .word 0xe12fff1e @ interoperable with Thumb ISA:-) +#endif +.size sha256_block_data_order,.-sha256_block_data_order +#if __ARM_MAX_ARCH__>=7 +.arch armv7-a +.fpu neon + +.type sha256_block_data_order_neon,%function +.align 4 +sha256_block_data_order_neon: +.LNEON: + stmdb sp!,{r4-r12,lr} + + mov r12,sp + sub sp,sp,#16*4+16 @ alloca + sub r14,r3,#256+32 @ K256 + bic sp,sp,#15 @ align for 128-bit stores + + vld1.8 {q0},[r1]! + vld1.8 {q1},[r1]! + vld1.8 {q2},[r1]! + vld1.8 {q3},[r1]! + vld1.32 {q8},[r14,:128]! + vld1.32 {q9},[r14,:128]! + vld1.32 {q10},[r14,:128]! + vld1.32 {q11},[r14,:128]! + vrev32.8 q0,q0 @ yes, even on + str r0,[sp,#64] + vrev32.8 q1,q1 @ big-endian + str r1,[sp,#68] + mov r1,sp + vrev32.8 q2,q2 + str r2,[sp,#72] + vrev32.8 q3,q3 + str r12,[sp,#76] @ save original sp + vadd.i32 q8,q8,q0 + vadd.i32 q9,q9,q1 + vst1.32 {q8},[r1,:128]! + vadd.i32 q10,q10,q2 + vst1.32 {q9},[r1,:128]! + vadd.i32 q11,q11,q3 + vst1.32 {q10},[r1,:128]! + vst1.32 {q11},[r1,:128]! + + ldmia r0,{r4-r11} + sub r1,r1,#64 + ldr r2,[sp,#0] + eor r12,r12,r12 + eor r3,r5,r6 + b .L_00_48 + +.align 4 +.L_00_48: + vext.8 q8,q0,q1,#4 + add r11,r11,r2 + eor r2,r9,r10 + eor r0,r8,r8,ror#5 + vext.8 q9,q2,q3,#4 + add r4,r4,r12 + and r2,r2,r8 + eor r12,r0,r8,ror#19 + vshr.u32 q10,q8,#7 + eor r0,r4,r4,ror#11 + eor r2,r2,r10 + vadd.i32 q0,q0,q9 + add r11,r11,r12,ror#6 + eor r12,r4,r5 + vshr.u32 q9,q8,#3 + eor r0,r0,r4,ror#20 + add r11,r11,r2 + vsli.32 q10,q8,#25 + ldr r2,[sp,#4] + and r3,r3,r12 + vshr.u32 q11,q8,#18 + add r7,r7,r11 + add r11,r11,r0,ror#2 + eor r3,r3,r5 + veor q9,q9,q10 + add r10,r10,r2 + vsli.32 q11,q8,#14 + eor r2,r8,r9 + eor r0,r7,r7,ror#5 + vshr.u32 d24,d7,#17 + add r11,r11,r3 + and r2,r2,r7 + veor q9,q9,q11 + eor r3,r0,r7,ror#19 + eor r0,r11,r11,ror#11 + vsli.32 d24,d7,#15 + eor r2,r2,r9 + add r10,r10,r3,ror#6 + vshr.u32 d25,d7,#10 + eor r3,r11,r4 + eor r0,r0,r11,ror#20 + vadd.i32 q0,q0,q9 + add r10,r10,r2 + ldr r2,[sp,#8] + veor d25,d25,d24 + and r12,r12,r3 + add r6,r6,r10 + vshr.u32 d24,d7,#19 + add r10,r10,r0,ror#2 + eor r12,r12,r4 + vsli.32 d24,d7,#13 + add r9,r9,r2 + eor r2,r7,r8 + veor d25,d25,d24 + eor r0,r6,r6,ror#5 + add r10,r10,r12 + vadd.i32 d0,d0,d25 + and r2,r2,r6 + eor r12,r0,r6,ror#19 + vshr.u32 d24,d0,#17 + eor r0,r10,r10,ror#11 + eor r2,r2,r8 + vsli.32 d24,d0,#15 + add r9,r9,r12,ror#6 + eor r12,r10,r11 + vshr.u32 d25,d0,#10 + eor r0,r0,r10,ror#20 + add r9,r9,r2 + veor d25,d25,d24 + ldr r2,[sp,#12] + and r3,r3,r12 + vshr.u32 d24,d0,#19 + add r5,r5,r9 + add r9,r9,r0,ror#2 + eor r3,r3,r11 + vld1.32 {q8},[r14,:128]! + add r8,r8,r2 + vsli.32 d24,d0,#13 + eor r2,r6,r7 + eor r0,r5,r5,ror#5 + veor d25,d25,d24 + add r9,r9,r3 + and r2,r2,r5 + vadd.i32 d1,d1,d25 + eor r3,r0,r5,ror#19 + eor r0,r9,r9,ror#11 + vadd.i32 q8,q8,q0 + eor r2,r2,r7 + add r8,r8,r3,ror#6 + eor r3,r9,r10 + eor r0,r0,r9,ror#20 + add r8,r8,r2 + ldr r2,[sp,#16] + and r12,r12,r3 + add r4,r4,r8 + vst1.32 {q8},[r1,:128]! + add r8,r8,r0,ror#2 + eor r12,r12,r10 + vext.8 q8,q1,q2,#4 + add r7,r7,r2 + eor r2,r5,r6 + eor r0,r4,r4,ror#5 + vext.8 q9,q3,q0,#4 + add r8,r8,r12 + and r2,r2,r4 + eor r12,r0,r4,ror#19 + vshr.u32 q10,q8,#7 + eor r0,r8,r8,ror#11 + eor r2,r2,r6 + vadd.i32 q1,q1,q9 + add r7,r7,r12,ror#6 + eor r12,r8,r9 + vshr.u32 q9,q8,#3 + eor r0,r0,r8,ror#20 + add r7,r7,r2 + vsli.32 q10,q8,#25 + ldr r2,[sp,#20] + and r3,r3,r12 + vshr.u32 q11,q8,#18 + add r11,r11,r7 + add r7,r7,r0,ror#2 + eor r3,r3,r9 + veor q9,q9,q10 + add r6,r6,r2 + vsli.32 q11,q8,#14 + eor r2,r4,r5 + eor r0,r11,r11,ror#5 + vshr.u32 d24,d1,#17 + add r7,r7,r3 + and r2,r2,r11 + veor q9,q9,q11 + eor r3,r0,r11,ror#19 + eor r0,r7,r7,ror#11 + vsli.32 d24,d1,#15 + eor r2,r2,r5 + add r6,r6,r3,ror#6 + vshr.u32 d25,d1,#10 + eor r3,r7,r8 + eor r0,r0,r7,ror#20 + vadd.i32 q1,q1,q9 + add r6,r6,r2 + ldr r2,[sp,#24] + veor d25,d25,d24 + and r12,r12,r3 + add r10,r10,r6 + vshr.u32 d24,d1,#19 + add r6,r6,r0,ror#2 + eor r12,r12,r8 + vsli.32 d24,d1,#13 + add r5,r5,r2 + eor r2,r11,r4 + veor d25,d25,d24 + eor r0,r10,r10,ror#5 + add r6,r6,r12 + vadd.i32 d2,d2,d25 + and r2,r2,r10 + eor r12,r0,r10,ror#19 + vshr.u32 d24,d2,#17 + eor r0,r6,r6,ror#11 + eor r2,r2,r4 + vsli.32 d24,d2,#15 + add r5,r5,r12,ror#6 + eor r12,r6,r7 + vshr.u32 d25,d2,#10 + eor r0,r0,r6,ror#20 + add r5,r5,r2 + veor d25,d25,d24 + ldr r2,[sp,#28] + and r3,r3,r12 + vshr.u32 d24,d2,#19 + add r9,r9,r5 + add r5,r5,r0,ror#2 + eor r3,r3,r7 + vld1.32 {q8},[r14,:128]! + add r4,r4,r2 + vsli.32 d24,d2,#13 + eor r2,r10,r11 + eor r0,r9,r9,ror#5 + veor d25,d25,d24 + add r5,r5,r3 + and r2,r2,r9 + vadd.i32 d3,d3,d25 + eor r3,r0,r9,ror#19 + eor r0,r5,r5,ror#11 + vadd.i32 q8,q8,q1 + eor r2,r2,r11 + add r4,r4,r3,ror#6 + eor r3,r5,r6 + eor r0,r0,r5,ror#20 + add r4,r4,r2 + ldr r2,[sp,#32] + and r12,r12,r3 + add r8,r8,r4 + vst1.32 {q8},[r1,:128]! + add r4,r4,r0,ror#2 + eor r12,r12,r6 + vext.8 q8,q2,q3,#4 + add r11,r11,r2 + eor r2,r9,r10 + eor r0,r8,r8,ror#5 + vext.8 q9,q0,q1,#4 + add r4,r4,r12 + and r2,r2,r8 + eor r12,r0,r8,ror#19 + vshr.u32 q10,q8,#7 + eor r0,r4,r4,ror#11 + eor r2,r2,r10 + vadd.i32 q2,q2,q9 + add r11,r11,r12,ror#6 + eor r12,r4,r5 + vshr.u32 q9,q8,#3 + eor r0,r0,r4,ror#20 + add r11,r11,r2 + vsli.32 q10,q8,#25 + ldr r2,[sp,#36] + and r3,r3,r12 + vshr.u32 q11,q8,#18 + add r7,r7,r11 + add r11,r11,r0,ror#2 + eor r3,r3,r5 + veor q9,q9,q10 + add r10,r10,r2 + vsli.32 q11,q8,#14 + eor r2,r8,r9 + eor r0,r7,r7,ror#5 + vshr.u32 d24,d3,#17 + add r11,r11,r3 + and r2,r2,r7 + veor q9,q9,q11 + eor r3,r0,r7,ror#19 + eor r0,r11,r11,ror#11 + vsli.32 d24,d3,#15 + eor r2,r2,r9 + add r10,r10,r3,ror#6 + vshr.u32 d25,d3,#10 + eor r3,r11,r4 + eor r0,r0,r11,ror#20 + vadd.i32 q2,q2,q9 + add r10,r10,r2 + ldr r2,[sp,#40] + veor d25,d25,d24 + and r12,r12,r3 + add r6,r6,r10 + vshr.u32 d24,d3,#19 + add r10,r10,r0,ror#2 + eor r12,r12,r4 + vsli.32 d24,d3,#13 + add r9,r9,r2 + eor r2,r7,r8 + veor d25,d25,d24 + eor r0,r6,r6,ror#5 + add r10,r10,r12 + vadd.i32 d4,d4,d25 + and r2,r2,r6 + eor r12,r0,r6,ror#19 + vshr.u32 d24,d4,#17 + eor r0,r10,r10,ror#11 + eor r2,r2,r8 + vsli.32 d24,d4,#15 + add r9,r9,r12,ror#6 + eor r12,r10,r11 + vshr.u32 d25,d4,#10 + eor r0,r0,r10,ror#20 + add r9,r9,r2 + veor d25,d25,d24 + ldr r2,[sp,#44] + and r3,r3,r12 + vshr.u32 d24,d4,#19 + add r5,r5,r9 + add r9,r9,r0,ror#2 + eor r3,r3,r11 + vld1.32 {q8},[r14,:128]! + add r8,r8,r2 + vsli.32 d24,d4,#13 + eor r2,r6,r7 + eor r0,r5,r5,ror#5 + veor d25,d25,d24 + add r9,r9,r3 + and r2,r2,r5 + vadd.i32 d5,d5,d25 + eor r3,r0,r5,ror#19 + eor r0,r9,r9,ror#11 + vadd.i32 q8,q8,q2 + eor r2,r2,r7 + add r8,r8,r3,ror#6 + eor r3,r9,r10 + eor r0,r0,r9,ror#20 + add r8,r8,r2 + ldr r2,[sp,#48] + and r12,r12,r3 + add r4,r4,r8 + vst1.32 {q8},[r1,:128]! + add r8,r8,r0,ror#2 + eor r12,r12,r10 + vext.8 q8,q3,q0,#4 + add r7,r7,r2 + eor r2,r5,r6 + eor r0,r4,r4,ror#5 + vext.8 q9,q1,q2,#4 + add r8,r8,r12 + and r2,r2,r4 + eor r12,r0,r4,ror#19 + vshr.u32 q10,q8,#7 + eor r0,r8,r8,ror#11 + eor r2,r2,r6 + vadd.i32 q3,q3,q9 + add r7,r7,r12,ror#6 + eor r12,r8,r9 + vshr.u32 q9,q8,#3 + eor r0,r0,r8,ror#20 + add r7,r7,r2 + vsli.32 q10,q8,#25 + ldr r2,[sp,#52] + and r3,r3,r12 + vshr.u32 q11,q8,#18 + add r11,r11,r7 + add r7,r7,r0,ror#2 + eor r3,r3,r9 + veor q9,q9,q10 + add r6,r6,r2 + vsli.32 q11,q8,#14 + eor r2,r4,r5 + eor r0,r11,r11,ror#5 + vshr.u32 d24,d5,#17 + add r7,r7,r3 + and r2,r2,r11 + veor q9,q9,q11 + eor r3,r0,r11,ror#19 + eor r0,r7,r7,ror#11 + vsli.32 d24,d5,#15 + eor r2,r2,r5 + add r6,r6,r3,ror#6 + vshr.u32 d25,d5,#10 + eor r3,r7,r8 + eor r0,r0,r7,ror#20 + vadd.i32 q3,q3,q9 + add r6,r6,r2 + ldr r2,[sp,#56] + veor d25,d25,d24 + and r12,r12,r3 + add r10,r10,r6 + vshr.u32 d24,d5,#19 + add r6,r6,r0,ror#2 + eor r12,r12,r8 + vsli.32 d24,d5,#13 + add r5,r5,r2 + eor r2,r11,r4 + veor d25,d25,d24 + eor r0,r10,r10,ror#5 + add r6,r6,r12 + vadd.i32 d6,d6,d25 + and r2,r2,r10 + eor r12,r0,r10,ror#19 + vshr.u32 d24,d6,#17 + eor r0,r6,r6,ror#11 + eor r2,r2,r4 + vsli.32 d24,d6,#15 + add r5,r5,r12,ror#6 + eor r12,r6,r7 + vshr.u32 d25,d6,#10 + eor r0,r0,r6,ror#20 + add r5,r5,r2 + veor d25,d25,d24 + ldr r2,[sp,#60] + and r3,r3,r12 + vshr.u32 d24,d6,#19 + add r9,r9,r5 + add r5,r5,r0,ror#2 + eor r3,r3,r7 + vld1.32 {q8},[r14,:128]! + add r4,r4,r2 + vsli.32 d24,d6,#13 + eor r2,r10,r11 + eor r0,r9,r9,ror#5 + veor d25,d25,d24 + add r5,r5,r3 + and r2,r2,r9 + vadd.i32 d7,d7,d25 + eor r3,r0,r9,ror#19 + eor r0,r5,r5,ror#11 + vadd.i32 q8,q8,q3 + eor r2,r2,r11 + add r4,r4,r3,ror#6 + eor r3,r5,r6 + eor r0,r0,r5,ror#20 + add r4,r4,r2 + ldr r2,[r14] + and r12,r12,r3 + add r8,r8,r4 + vst1.32 {q8},[r1,:128]! + add r4,r4,r0,ror#2 + eor r12,r12,r6 + teq r2,#0 @ check for K256 terminator + ldr r2,[sp,#0] + sub r1,r1,#64 + bne .L_00_48 + + ldr r1,[sp,#68] + ldr r0,[sp,#72] + sub r14,r14,#256 @ rewind r14 + teq r1,r0 + subeq r1,r1,#64 @ avoid SEGV + vld1.8 {q0},[r1]! @ load next input block + vld1.8 {q1},[r1]! + vld1.8 {q2},[r1]! + vld1.8 {q3},[r1]! + strne r1,[sp,#68] + mov r1,sp + add r11,r11,r2 + eor r2,r9,r10 + eor r0,r8,r8,ror#5 + add r4,r4,r12 + vld1.32 {q8},[r14,:128]! + and r2,r2,r8 + eor r12,r0,r8,ror#19 + eor r0,r4,r4,ror#11 + eor r2,r2,r10 + vrev32.8 q0,q0 + add r11,r11,r12,ror#6 + eor r12,r4,r5 + eor r0,r0,r4,ror#20 + add r11,r11,r2 + vadd.i32 q8,q8,q0 + ldr r2,[sp,#4] + and r3,r3,r12 + add r7,r7,r11 + add r11,r11,r0,ror#2 + eor r3,r3,r5 + add r10,r10,r2 + eor r2,r8,r9 + eor r0,r7,r7,ror#5 + add r11,r11,r3 + and r2,r2,r7 + eor r3,r0,r7,ror#19 + eor r0,r11,r11,ror#11 + eor r2,r2,r9 + add r10,r10,r3,ror#6 + eor r3,r11,r4 + eor r0,r0,r11,ror#20 + add r10,r10,r2 + ldr r2,[sp,#8] + and r12,r12,r3 + add r6,r6,r10 + add r10,r10,r0,ror#2 + eor r12,r12,r4 + add r9,r9,r2 + eor r2,r7,r8 + eor r0,r6,r6,ror#5 + add r10,r10,r12 + and r2,r2,r6 + eor r12,r0,r6,ror#19 + eor r0,r10,r10,ror#11 + eor r2,r2,r8 + add r9,r9,r12,ror#6 + eor r12,r10,r11 + eor r0,r0,r10,ror#20 + add r9,r9,r2 + ldr r2,[sp,#12] + and r3,r3,r12 + add r5,r5,r9 + add r9,r9,r0,ror#2 + eor r3,r3,r11 + add r8,r8,r2 + eor r2,r6,r7 + eor r0,r5,r5,ror#5 + add r9,r9,r3 + and r2,r2,r5 + eor r3,r0,r5,ror#19 + eor r0,r9,r9,ror#11 + eor r2,r2,r7 + add r8,r8,r3,ror#6 + eor r3,r9,r10 + eor r0,r0,r9,ror#20 + add r8,r8,r2 + ldr r2,[sp,#16] + and r12,r12,r3 + add r4,r4,r8 + add r8,r8,r0,ror#2 + eor r12,r12,r10 + vst1.32 {q8},[r1,:128]! + add r7,r7,r2 + eor r2,r5,r6 + eor r0,r4,r4,ror#5 + add r8,r8,r12 + vld1.32 {q8},[r14,:128]! + and r2,r2,r4 + eor r12,r0,r4,ror#19 + eor r0,r8,r8,ror#11 + eor r2,r2,r6 + vrev32.8 q1,q1 + add r7,r7,r12,ror#6 + eor r12,r8,r9 + eor r0,r0,r8,ror#20 + add r7,r7,r2 + vadd.i32 q8,q8,q1 + ldr r2,[sp,#20] + and r3,r3,r12 + add r11,r11,r7 + add r7,r7,r0,ror#2 + eor r3,r3,r9 + add r6,r6,r2 + eor r2,r4,r5 + eor r0,r11,r11,ror#5 + add r7,r7,r3 + and r2,r2,r11 + eor r3,r0,r11,ror#19 + eor r0,r7,r7,ror#11 + eor r2,r2,r5 + add r6,r6,r3,ror#6 + eor r3,r7,r8 + eor r0,r0,r7,ror#20 + add r6,r6,r2 + ldr r2,[sp,#24] + and r12,r12,r3 + add r10,r10,r6 + add r6,r6,r0,ror#2 + eor r12,r12,r8 + add r5,r5,r2 + eor r2,r11,r4 + eor r0,r10,r10,ror#5 + add r6,r6,r12 + and r2,r2,r10 + eor r12,r0,r10,ror#19 + eor r0,r6,r6,ror#11 + eor r2,r2,r4 + add r5,r5,r12,ror#6 + eor r12,r6,r7 + eor r0,r0,r6,ror#20 + add r5,r5,r2 + ldr r2,[sp,#28] + and r3,r3,r12 + add r9,r9,r5 + add r5,r5,r0,ror#2 + eor r3,r3,r7 + add r4,r4,r2 + eor r2,r10,r11 + eor r0,r9,r9,ror#5 + add r5,r5,r3 + and r2,r2,r9 + eor r3,r0,r9,ror#19 + eor r0,r5,r5,ror#11 + eor r2,r2,r11 + add r4,r4,r3,ror#6 + eor r3,r5,r6 + eor r0,r0,r5,ror#20 + add r4,r4,r2 + ldr r2,[sp,#32] + and r12,r12,r3 + add r8,r8,r4 + add r4,r4,r0,ror#2 + eor r12,r12,r6 + vst1.32 {q8},[r1,:128]! + add r11,r11,r2 + eor r2,r9,r10 + eor r0,r8,r8,ror#5 + add r4,r4,r12 + vld1.32 {q8},[r14,:128]! + and r2,r2,r8 + eor r12,r0,r8,ror#19 + eor r0,r4,r4,ror#11 + eor r2,r2,r10 + vrev32.8 q2,q2 + add r11,r11,r12,ror#6 + eor r12,r4,r5 + eor r0,r0,r4,ror#20 + add r11,r11,r2 + vadd.i32 q8,q8,q2 + ldr r2,[sp,#36] + and r3,r3,r12 + add r7,r7,r11 + add r11,r11,r0,ror#2 + eor r3,r3,r5 + add r10,r10,r2 + eor r2,r8,r9 + eor r0,r7,r7,ror#5 + add r11,r11,r3 + and r2,r2,r7 + eor r3,r0,r7,ror#19 + eor r0,r11,r11,ror#11 + eor r2,r2,r9 + add r10,r10,r3,ror#6 + eor r3,r11,r4 + eor r0,r0,r11,ror#20 + add r10,r10,r2 + ldr r2,[sp,#40] + and r12,r12,r3 + add r6,r6,r10 + add r10,r10,r0,ror#2 + eor r12,r12,r4 + add r9,r9,r2 + eor r2,r7,r8 + eor r0,r6,r6,ror#5 + add r10,r10,r12 + and r2,r2,r6 + eor r12,r0,r6,ror#19 + eor r0,r10,r10,ror#11 + eor r2,r2,r8 + add r9,r9,r12,ror#6 + eor r12,r10,r11 + eor r0,r0,r10,ror#20 + add r9,r9,r2 + ldr r2,[sp,#44] + and r3,r3,r12 + add r5,r5,r9 + add r9,r9,r0,ror#2 + eor r3,r3,r11 + add r8,r8,r2 + eor r2,r6,r7 + eor r0,r5,r5,ror#5 + add r9,r9,r3 + and r2,r2,r5 + eor r3,r0,r5,ror#19 + eor r0,r9,r9,ror#11 + eor r2,r2,r7 + add r8,r8,r3,ror#6 + eor r3,r9,r10 + eor r0,r0,r9,ror#20 + add r8,r8,r2 + ldr r2,[sp,#48] + and r12,r12,r3 + add r4,r4,r8 + add r8,r8,r0,ror#2 + eor r12,r12,r10 + vst1.32 {q8},[r1,:128]! + add r7,r7,r2 + eor r2,r5,r6 + eor r0,r4,r4,ror#5 + add r8,r8,r12 + vld1.32 {q8},[r14,:128]! + and r2,r2,r4 + eor r12,r0,r4,ror#19 + eor r0,r8,r8,ror#11 + eor r2,r2,r6 + vrev32.8 q3,q3 + add r7,r7,r12,ror#6 + eor r12,r8,r9 + eor r0,r0,r8,ror#20 + add r7,r7,r2 + vadd.i32 q8,q8,q3 + ldr r2,[sp,#52] + and r3,r3,r12 + add r11,r11,r7 + add r7,r7,r0,ror#2 + eor r3,r3,r9 + add r6,r6,r2 + eor r2,r4,r5 + eor r0,r11,r11,ror#5 + add r7,r7,r3 + and r2,r2,r11 + eor r3,r0,r11,ror#19 + eor r0,r7,r7,ror#11 + eor r2,r2,r5 + add r6,r6,r3,ror#6 + eor r3,r7,r8 + eor r0,r0,r7,ror#20 + add r6,r6,r2 + ldr r2,[sp,#56] + and r12,r12,r3 + add r10,r10,r6 + add r6,r6,r0,ror#2 + eor r12,r12,r8 + add r5,r5,r2 + eor r2,r11,r4 + eor r0,r10,r10,ror#5 + add r6,r6,r12 + and r2,r2,r10 + eor r12,r0,r10,ror#19 + eor r0,r6,r6,ror#11 + eor r2,r2,r4 + add r5,r5,r12,ror#6 + eor r12,r6,r7 + eor r0,r0,r6,ror#20 + add r5,r5,r2 + ldr r2,[sp,#60] + and r3,r3,r12 + add r9,r9,r5 + add r5,r5,r0,ror#2 + eor r3,r3,r7 + add r4,r4,r2 + eor r2,r10,r11 + eor r0,r9,r9,ror#5 + add r5,r5,r3 + and r2,r2,r9 + eor r3,r0,r9,ror#19 + eor r0,r5,r5,ror#11 + eor r2,r2,r11 + add r4,r4,r3,ror#6 + eor r3,r5,r6 + eor r0,r0,r5,ror#20 + add r4,r4,r2 + ldr r2,[sp,#64] + and r12,r12,r3 + add r8,r8,r4 + add r4,r4,r0,ror#2 + eor r12,r12,r6 + vst1.32 {q8},[r1,:128]! + ldr r0,[r2,#0] + add r4,r4,r12 @ h+=Maj(a,b,c) from the past + ldr r12,[r2,#4] + ldr r3,[r2,#8] + ldr r1,[r2,#12] + add r4,r4,r0 @ accumulate + ldr r0,[r2,#16] + add r5,r5,r12 + ldr r12,[r2,#20] + add r6,r6,r3 + ldr r3,[r2,#24] + add r7,r7,r1 + ldr r1,[r2,#28] + add r8,r8,r0 + str r4,[r2],#4 + add r9,r9,r12 + str r5,[r2],#4 + add r10,r10,r3 + str r6,[r2],#4 + add r11,r11,r1 + str r7,[r2],#4 + stmia r2,{r8-r11} + + movne r1,sp + ldrne r2,[sp,#0] + eorne r12,r12,r12 + ldreq sp,[sp,#76] @ restore original sp + eorne r3,r5,r6 + bne .L_00_48 + + ldmia sp!,{r4-r12,pc} +.size sha256_block_data_order_neon,.-sha256_block_data_order_neon +#endif +#if __ARM_MAX_ARCH__>=7 +.type sha256_block_data_order_armv8,%function +.align 5 +sha256_block_data_order_armv8: +.LARMv8: + vld1.32 {q0,q1},[r0] + sub r3,r3,#sha256_block_data_order-K256 + +.Loop_v8: + vld1.8 {q8-q9},[r1]! + vld1.8 {q10-q11},[r1]! + vld1.32 {q12},[r3]! + vrev32.8 q8,q8 + vrev32.8 q9,q9 + vrev32.8 q10,q10 + vrev32.8 q11,q11 + vmov q14,q0 @ offload + vmov q15,q1 + teq r1,r2 + vld1.32 {q13},[r3]! + vadd.i32 q12,q12,q8 + .byte 0xe2,0x03,0xfa,0xf3 @ sha256su0 q8,q9 + vmov q2,q0 + .byte 0x68,0x0c,0x02,0xf3 @ sha256h q0,q1,q12 + .byte 0x68,0x2c,0x14,0xf3 @ sha256h2 q1,q2,q12 + .byte 0xe6,0x0c,0x64,0xf3 @ sha256su1 q8,q10,q11 + vld1.32 {q12},[r3]! + vadd.i32 q13,q13,q9 + .byte 0xe4,0x23,0xfa,0xf3 @ sha256su0 q9,q10 + vmov q2,q0 + .byte 0x6a,0x0c,0x02,0xf3 @ sha256h q0,q1,q13 + .byte 0x6a,0x2c,0x14,0xf3 @ sha256h2 q1,q2,q13 + .byte 0xe0,0x2c,0x66,0xf3 @ sha256su1 q9,q11,q8 + vld1.32 {q13},[r3]! + vadd.i32 q12,q12,q10 + .byte 0xe6,0x43,0xfa,0xf3 @ sha256su0 q10,q11 + vmov q2,q0 + .byte 0x68,0x0c,0x02,0xf3 @ sha256h q0,q1,q12 + .byte 0x68,0x2c,0x14,0xf3 @ sha256h2 q1,q2,q12 + .byte 0xe2,0x4c,0x60,0xf3 @ sha256su1 q10,q8,q9 + vld1.32 {q12},[r3]! + vadd.i32 q13,q13,q11 + .byte 0xe0,0x63,0xfa,0xf3 @ sha256su0 q11,q8 + vmov q2,q0 + .byte 0x6a,0x0c,0x02,0xf3 @ sha256h q0,q1,q13 + .byte 0x6a,0x2c,0x14,0xf3 @ sha256h2 q1,q2,q13 + .byte 0xe4,0x6c,0x62,0xf3 @ sha256su1 q11,q9,q10 + vld1.32 {q13},[r3]! + vadd.i32 q12,q12,q8 + .byte 0xe2,0x03,0xfa,0xf3 @ sha256su0 q8,q9 + vmov q2,q0 + .byte 0x68,0x0c,0x02,0xf3 @ sha256h q0,q1,q12 + .byte 0x68,0x2c,0x14,0xf3 @ sha256h2 q1,q2,q12 + .byte 0xe6,0x0c,0x64,0xf3 @ sha256su1 q8,q10,q11 + vld1.32 {q12},[r3]! + vadd.i32 q13,q13,q9 + .byte 0xe4,0x23,0xfa,0xf3 @ sha256su0 q9,q10 + vmov q2,q0 + .byte 0x6a,0x0c,0x02,0xf3 @ sha256h q0,q1,q13 + .byte 0x6a,0x2c,0x14,0xf3 @ sha256h2 q1,q2,q13 + .byte 0xe0,0x2c,0x66,0xf3 @ sha256su1 q9,q11,q8 + vld1.32 {q13},[r3]! + vadd.i32 q12,q12,q10 + .byte 0xe6,0x43,0xfa,0xf3 @ sha256su0 q10,q11 + vmov q2,q0 + .byte 0x68,0x0c,0x02,0xf3 @ sha256h q0,q1,q12 + .byte 0x68,0x2c,0x14,0xf3 @ sha256h2 q1,q2,q12 + .byte 0xe2,0x4c,0x60,0xf3 @ sha256su1 q10,q8,q9 + vld1.32 {q12},[r3]! + vadd.i32 q13,q13,q11 + .byte 0xe0,0x63,0xfa,0xf3 @ sha256su0 q11,q8 + vmov q2,q0 + .byte 0x6a,0x0c,0x02,0xf3 @ sha256h q0,q1,q13 + .byte 0x6a,0x2c,0x14,0xf3 @ sha256h2 q1,q2,q13 + .byte 0xe4,0x6c,0x62,0xf3 @ sha256su1 q11,q9,q10 + vld1.32 {q13},[r3]! + vadd.i32 q12,q12,q8 + .byte 0xe2,0x03,0xfa,0xf3 @ sha256su0 q8,q9 + vmov q2,q0 + .byte 0x68,0x0c,0x02,0xf3 @ sha256h q0,q1,q12 + .byte 0x68,0x2c,0x14,0xf3 @ sha256h2 q1,q2,q12 + .byte 0xe6,0x0c,0x64,0xf3 @ sha256su1 q8,q10,q11 + vld1.32 {q12},[r3]! + vadd.i32 q13,q13,q9 + .byte 0xe4,0x23,0xfa,0xf3 @ sha256su0 q9,q10 + vmov q2,q0 + .byte 0x6a,0x0c,0x02,0xf3 @ sha256h q0,q1,q13 + .byte 0x6a,0x2c,0x14,0xf3 @ sha256h2 q1,q2,q13 + .byte 0xe0,0x2c,0x66,0xf3 @ sha256su1 q9,q11,q8 + vld1.32 {q13},[r3]! + vadd.i32 q12,q12,q10 + .byte 0xe6,0x43,0xfa,0xf3 @ sha256su0 q10,q11 + vmov q2,q0 + .byte 0x68,0x0c,0x02,0xf3 @ sha256h q0,q1,q12 + .byte 0x68,0x2c,0x14,0xf3 @ sha256h2 q1,q2,q12 + .byte 0xe2,0x4c,0x60,0xf3 @ sha256su1 q10,q8,q9 + vld1.32 {q12},[r3]! + vadd.i32 q13,q13,q11 + .byte 0xe0,0x63,0xfa,0xf3 @ sha256su0 q11,q8 + vmov q2,q0 + .byte 0x6a,0x0c,0x02,0xf3 @ sha256h q0,q1,q13 + .byte 0x6a,0x2c,0x14,0xf3 @ sha256h2 q1,q2,q13 + .byte 0xe4,0x6c,0x62,0xf3 @ sha256su1 q11,q9,q10 + vld1.32 {q13},[r3]! + vadd.i32 q12,q12,q8 + vmov q2,q0 + .byte 0x68,0x0c,0x02,0xf3 @ sha256h q0,q1,q12 + .byte 0x68,0x2c,0x14,0xf3 @ sha256h2 q1,q2,q12 + + vld1.32 {q12},[r3]! + vadd.i32 q13,q13,q9 + vmov q2,q0 + .byte 0x6a,0x0c,0x02,0xf3 @ sha256h q0,q1,q13 + .byte 0x6a,0x2c,0x14,0xf3 @ sha256h2 q1,q2,q13 + + vld1.32 {q13},[r3] + vadd.i32 q12,q12,q10 + sub r3,r3,#256-16 @ rewind + vmov q2,q0 + .byte 0x68,0x0c,0x02,0xf3 @ sha256h q0,q1,q12 + .byte 0x68,0x2c,0x14,0xf3 @ sha256h2 q1,q2,q12 + + vadd.i32 q13,q13,q11 + vmov q2,q0 + .byte 0x6a,0x0c,0x02,0xf3 @ sha256h q0,q1,q13 + .byte 0x6a,0x2c,0x14,0xf3 @ sha256h2 q1,q2,q13 + + vadd.i32 q0,q0,q14 + vadd.i32 q1,q1,q15 + bne .Loop_v8 + + vst1.32 {q0,q1},[r0] + + bx lr @ bx lr +.size sha256_block_data_order_armv8,.-sha256_block_data_order_armv8 +#endif +.asciz "SHA256 block transform for ARMv4/NEON/ARMv8, CRYPTOGAMS by " +.align 2 +#if __ARM_MAX_ARCH__>=7 +.comm OPENSSL_armcap_P,4,4 +#endif diff --git a/deps/openssl/asm_obsolete/arm-void-gas/sha/sha512-armv4.S b/deps/openssl/asm_obsolete/arm-void-gas/sha/sha512-armv4.S new file mode 100644 index 00000000000000..1889fc701ac094 --- /dev/null +++ b/deps/openssl/asm_obsolete/arm-void-gas/sha/sha512-armv4.S @@ -0,0 +1,1775 @@ +#include "arm_arch.h" +#ifdef __ARMEL__ +# define LO 0 +# define HI 4 +# define WORD64(hi0,lo0,hi1,lo1) .word lo0,hi0, lo1,hi1 +#else +# define HI 0 +# define LO 4 +# define WORD64(hi0,lo0,hi1,lo1) .word hi0,lo0, hi1,lo1 +#endif + +.text +.code 32 +.type K512,%object +.align 5 +K512: +WORD64(0x428a2f98,0xd728ae22, 0x71374491,0x23ef65cd) +WORD64(0xb5c0fbcf,0xec4d3b2f, 0xe9b5dba5,0x8189dbbc) +WORD64(0x3956c25b,0xf348b538, 0x59f111f1,0xb605d019) +WORD64(0x923f82a4,0xaf194f9b, 0xab1c5ed5,0xda6d8118) +WORD64(0xd807aa98,0xa3030242, 0x12835b01,0x45706fbe) +WORD64(0x243185be,0x4ee4b28c, 0x550c7dc3,0xd5ffb4e2) +WORD64(0x72be5d74,0xf27b896f, 0x80deb1fe,0x3b1696b1) +WORD64(0x9bdc06a7,0x25c71235, 0xc19bf174,0xcf692694) +WORD64(0xe49b69c1,0x9ef14ad2, 0xefbe4786,0x384f25e3) +WORD64(0x0fc19dc6,0x8b8cd5b5, 0x240ca1cc,0x77ac9c65) +WORD64(0x2de92c6f,0x592b0275, 0x4a7484aa,0x6ea6e483) +WORD64(0x5cb0a9dc,0xbd41fbd4, 0x76f988da,0x831153b5) +WORD64(0x983e5152,0xee66dfab, 0xa831c66d,0x2db43210) +WORD64(0xb00327c8,0x98fb213f, 0xbf597fc7,0xbeef0ee4) +WORD64(0xc6e00bf3,0x3da88fc2, 0xd5a79147,0x930aa725) +WORD64(0x06ca6351,0xe003826f, 0x14292967,0x0a0e6e70) +WORD64(0x27b70a85,0x46d22ffc, 0x2e1b2138,0x5c26c926) +WORD64(0x4d2c6dfc,0x5ac42aed, 0x53380d13,0x9d95b3df) +WORD64(0x650a7354,0x8baf63de, 0x766a0abb,0x3c77b2a8) +WORD64(0x81c2c92e,0x47edaee6, 0x92722c85,0x1482353b) +WORD64(0xa2bfe8a1,0x4cf10364, 0xa81a664b,0xbc423001) +WORD64(0xc24b8b70,0xd0f89791, 0xc76c51a3,0x0654be30) +WORD64(0xd192e819,0xd6ef5218, 0xd6990624,0x5565a910) +WORD64(0xf40e3585,0x5771202a, 0x106aa070,0x32bbd1b8) +WORD64(0x19a4c116,0xb8d2d0c8, 0x1e376c08,0x5141ab53) +WORD64(0x2748774c,0xdf8eeb99, 0x34b0bcb5,0xe19b48a8) +WORD64(0x391c0cb3,0xc5c95a63, 0x4ed8aa4a,0xe3418acb) +WORD64(0x5b9cca4f,0x7763e373, 0x682e6ff3,0xd6b2b8a3) +WORD64(0x748f82ee,0x5defb2fc, 0x78a5636f,0x43172f60) +WORD64(0x84c87814,0xa1f0ab72, 0x8cc70208,0x1a6439ec) +WORD64(0x90befffa,0x23631e28, 0xa4506ceb,0xde82bde9) +WORD64(0xbef9a3f7,0xb2c67915, 0xc67178f2,0xe372532b) +WORD64(0xca273ece,0xea26619c, 0xd186b8c7,0x21c0c207) +WORD64(0xeada7dd6,0xcde0eb1e, 0xf57d4f7f,0xee6ed178) +WORD64(0x06f067aa,0x72176fba, 0x0a637dc5,0xa2c898a6) +WORD64(0x113f9804,0xbef90dae, 0x1b710b35,0x131c471b) +WORD64(0x28db77f5,0x23047d84, 0x32caab7b,0x40c72493) +WORD64(0x3c9ebe0a,0x15c9bebc, 0x431d67c4,0x9c100d4c) +WORD64(0x4cc5d4be,0xcb3e42b6, 0x597f299c,0xfc657e2a) +WORD64(0x5fcb6fab,0x3ad6faec, 0x6c44198c,0x4a475817) +.size K512,.-K512 +#if __ARM_MAX_ARCH__>=7 +.LOPENSSL_armcap: +.word OPENSSL_armcap_P-sha512_block_data_order +.skip 32-4 +#else +.skip 32 +#endif + +.global sha512_block_data_order +.type sha512_block_data_order,%function +sha512_block_data_order: + sub r3,pc,#8 @ sha512_block_data_order + add r2,r1,r2,lsl#7 @ len to point at the end of inp +#if __ARM_MAX_ARCH__>=7 + ldr r12,.LOPENSSL_armcap + ldr r12,[r3,r12] @ OPENSSL_armcap_P + tst r12,#1 + bne .LNEON +#endif + stmdb sp!,{r4-r12,lr} + sub r14,r3,#672 @ K512 + sub sp,sp,#9*8 + + ldr r7,[r0,#32+LO] + ldr r8,[r0,#32+HI] + ldr r9, [r0,#48+LO] + ldr r10, [r0,#48+HI] + ldr r11, [r0,#56+LO] + ldr r12, [r0,#56+HI] +.Loop: + str r9, [sp,#48+0] + str r10, [sp,#48+4] + str r11, [sp,#56+0] + str r12, [sp,#56+4] + ldr r5,[r0,#0+LO] + ldr r6,[r0,#0+HI] + ldr r3,[r0,#8+LO] + ldr r4,[r0,#8+HI] + ldr r9, [r0,#16+LO] + ldr r10, [r0,#16+HI] + ldr r11, [r0,#24+LO] + ldr r12, [r0,#24+HI] + str r3,[sp,#8+0] + str r4,[sp,#8+4] + str r9, [sp,#16+0] + str r10, [sp,#16+4] + str r11, [sp,#24+0] + str r12, [sp,#24+4] + ldr r3,[r0,#40+LO] + ldr r4,[r0,#40+HI] + str r3,[sp,#40+0] + str r4,[sp,#40+4] + +.L00_15: +#if __ARM_ARCH__<7 + ldrb r3,[r1,#7] + ldrb r9, [r1,#6] + ldrb r10, [r1,#5] + ldrb r11, [r1,#4] + ldrb r4,[r1,#3] + ldrb r12, [r1,#2] + orr r3,r3,r9,lsl#8 + ldrb r9, [r1,#1] + orr r3,r3,r10,lsl#16 + ldrb r10, [r1],#8 + orr r3,r3,r11,lsl#24 + orr r4,r4,r12,lsl#8 + orr r4,r4,r9,lsl#16 + orr r4,r4,r10,lsl#24 +#else + ldr r3,[r1,#4] + ldr r4,[r1],#8 +#ifdef __ARMEL__ + rev r3,r3 + rev r4,r4 +#endif +#endif + @ Sigma1(x) (ROTR((x),14) ^ ROTR((x),18) ^ ROTR((x),41)) + @ LO lo>>14^hi<<18 ^ lo>>18^hi<<14 ^ hi>>9^lo<<23 + @ HI hi>>14^lo<<18 ^ hi>>18^lo<<14 ^ lo>>9^hi<<23 + mov r9,r7,lsr#14 + str r3,[sp,#64+0] + mov r10,r8,lsr#14 + str r4,[sp,#64+4] + eor r9,r9,r8,lsl#18 + ldr r11,[sp,#56+0] @ h.lo + eor r10,r10,r7,lsl#18 + ldr r12,[sp,#56+4] @ h.hi + eor r9,r9,r7,lsr#18 + eor r10,r10,r8,lsr#18 + eor r9,r9,r8,lsl#14 + eor r10,r10,r7,lsl#14 + eor r9,r9,r8,lsr#9 + eor r10,r10,r7,lsr#9 + eor r9,r9,r7,lsl#23 + eor r10,r10,r8,lsl#23 @ Sigma1(e) + adds r3,r3,r9 + ldr r9,[sp,#40+0] @ f.lo + adc r4,r4,r10 @ T += Sigma1(e) + ldr r10,[sp,#40+4] @ f.hi + adds r3,r3,r11 + ldr r11,[sp,#48+0] @ g.lo + adc r4,r4,r12 @ T += h + ldr r12,[sp,#48+4] @ g.hi + + eor r9,r9,r11 + str r7,[sp,#32+0] + eor r10,r10,r12 + str r8,[sp,#32+4] + and r9,r9,r7 + str r5,[sp,#0+0] + and r10,r10,r8 + str r6,[sp,#0+4] + eor r9,r9,r11 + ldr r11,[r14,#LO] @ K[i].lo + eor r10,r10,r12 @ Ch(e,f,g) + ldr r12,[r14,#HI] @ K[i].hi + + adds r3,r3,r9 + ldr r7,[sp,#24+0] @ d.lo + adc r4,r4,r10 @ T += Ch(e,f,g) + ldr r8,[sp,#24+4] @ d.hi + adds r3,r3,r11 + and r9,r11,#0xff + adc r4,r4,r12 @ T += K[i] + adds r7,r7,r3 + ldr r11,[sp,#8+0] @ b.lo + adc r8,r8,r4 @ d += T + teq r9,#148 + + ldr r12,[sp,#16+0] @ c.lo + orreq r14,r14,#1 + @ Sigma0(x) (ROTR((x),28) ^ ROTR((x),34) ^ ROTR((x),39)) + @ LO lo>>28^hi<<4 ^ hi>>2^lo<<30 ^ hi>>7^lo<<25 + @ HI hi>>28^lo<<4 ^ lo>>2^hi<<30 ^ lo>>7^hi<<25 + mov r9,r5,lsr#28 + mov r10,r6,lsr#28 + eor r9,r9,r6,lsl#4 + eor r10,r10,r5,lsl#4 + eor r9,r9,r6,lsr#2 + eor r10,r10,r5,lsr#2 + eor r9,r9,r5,lsl#30 + eor r10,r10,r6,lsl#30 + eor r9,r9,r6,lsr#7 + eor r10,r10,r5,lsr#7 + eor r9,r9,r5,lsl#25 + eor r10,r10,r6,lsl#25 @ Sigma0(a) + adds r3,r3,r9 + and r9,r5,r11 + adc r4,r4,r10 @ T += Sigma0(a) + + ldr r10,[sp,#8+4] @ b.hi + orr r5,r5,r11 + ldr r11,[sp,#16+4] @ c.hi + and r5,r5,r12 + and r12,r6,r10 + orr r6,r6,r10 + orr r5,r5,r9 @ Maj(a,b,c).lo + and r6,r6,r11 + adds r5,r5,r3 + orr r6,r6,r12 @ Maj(a,b,c).hi + sub sp,sp,#8 + adc r6,r6,r4 @ h += T + tst r14,#1 + add r14,r14,#8 + tst r14,#1 + beq .L00_15 + ldr r9,[sp,#184+0] + ldr r10,[sp,#184+4] + bic r14,r14,#1 +.L16_79: + @ sigma0(x) (ROTR((x),1) ^ ROTR((x),8) ^ ((x)>>7)) + @ LO lo>>1^hi<<31 ^ lo>>8^hi<<24 ^ lo>>7^hi<<25 + @ HI hi>>1^lo<<31 ^ hi>>8^lo<<24 ^ hi>>7 + mov r3,r9,lsr#1 + ldr r11,[sp,#80+0] + mov r4,r10,lsr#1 + ldr r12,[sp,#80+4] + eor r3,r3,r10,lsl#31 + eor r4,r4,r9,lsl#31 + eor r3,r3,r9,lsr#8 + eor r4,r4,r10,lsr#8 + eor r3,r3,r10,lsl#24 + eor r4,r4,r9,lsl#24 + eor r3,r3,r9,lsr#7 + eor r4,r4,r10,lsr#7 + eor r3,r3,r10,lsl#25 + + @ sigma1(x) (ROTR((x),19) ^ ROTR((x),61) ^ ((x)>>6)) + @ LO lo>>19^hi<<13 ^ hi>>29^lo<<3 ^ lo>>6^hi<<26 + @ HI hi>>19^lo<<13 ^ lo>>29^hi<<3 ^ hi>>6 + mov r9,r11,lsr#19 + mov r10,r12,lsr#19 + eor r9,r9,r12,lsl#13 + eor r10,r10,r11,lsl#13 + eor r9,r9,r12,lsr#29 + eor r10,r10,r11,lsr#29 + eor r9,r9,r11,lsl#3 + eor r10,r10,r12,lsl#3 + eor r9,r9,r11,lsr#6 + eor r10,r10,r12,lsr#6 + ldr r11,[sp,#120+0] + eor r9,r9,r12,lsl#26 + + ldr r12,[sp,#120+4] + adds r3,r3,r9 + ldr r9,[sp,#192+0] + adc r4,r4,r10 + + ldr r10,[sp,#192+4] + adds r3,r3,r11 + adc r4,r4,r12 + adds r3,r3,r9 + adc r4,r4,r10 + @ Sigma1(x) (ROTR((x),14) ^ ROTR((x),18) ^ ROTR((x),41)) + @ LO lo>>14^hi<<18 ^ lo>>18^hi<<14 ^ hi>>9^lo<<23 + @ HI hi>>14^lo<<18 ^ hi>>18^lo<<14 ^ lo>>9^hi<<23 + mov r9,r7,lsr#14 + str r3,[sp,#64+0] + mov r10,r8,lsr#14 + str r4,[sp,#64+4] + eor r9,r9,r8,lsl#18 + ldr r11,[sp,#56+0] @ h.lo + eor r10,r10,r7,lsl#18 + ldr r12,[sp,#56+4] @ h.hi + eor r9,r9,r7,lsr#18 + eor r10,r10,r8,lsr#18 + eor r9,r9,r8,lsl#14 + eor r10,r10,r7,lsl#14 + eor r9,r9,r8,lsr#9 + eor r10,r10,r7,lsr#9 + eor r9,r9,r7,lsl#23 + eor r10,r10,r8,lsl#23 @ Sigma1(e) + adds r3,r3,r9 + ldr r9,[sp,#40+0] @ f.lo + adc r4,r4,r10 @ T += Sigma1(e) + ldr r10,[sp,#40+4] @ f.hi + adds r3,r3,r11 + ldr r11,[sp,#48+0] @ g.lo + adc r4,r4,r12 @ T += h + ldr r12,[sp,#48+4] @ g.hi + + eor r9,r9,r11 + str r7,[sp,#32+0] + eor r10,r10,r12 + str r8,[sp,#32+4] + and r9,r9,r7 + str r5,[sp,#0+0] + and r10,r10,r8 + str r6,[sp,#0+4] + eor r9,r9,r11 + ldr r11,[r14,#LO] @ K[i].lo + eor r10,r10,r12 @ Ch(e,f,g) + ldr r12,[r14,#HI] @ K[i].hi + + adds r3,r3,r9 + ldr r7,[sp,#24+0] @ d.lo + adc r4,r4,r10 @ T += Ch(e,f,g) + ldr r8,[sp,#24+4] @ d.hi + adds r3,r3,r11 + and r9,r11,#0xff + adc r4,r4,r12 @ T += K[i] + adds r7,r7,r3 + ldr r11,[sp,#8+0] @ b.lo + adc r8,r8,r4 @ d += T + teq r9,#23 + + ldr r12,[sp,#16+0] @ c.lo + orreq r14,r14,#1 + @ Sigma0(x) (ROTR((x),28) ^ ROTR((x),34) ^ ROTR((x),39)) + @ LO lo>>28^hi<<4 ^ hi>>2^lo<<30 ^ hi>>7^lo<<25 + @ HI hi>>28^lo<<4 ^ lo>>2^hi<<30 ^ lo>>7^hi<<25 + mov r9,r5,lsr#28 + mov r10,r6,lsr#28 + eor r9,r9,r6,lsl#4 + eor r10,r10,r5,lsl#4 + eor r9,r9,r6,lsr#2 + eor r10,r10,r5,lsr#2 + eor r9,r9,r5,lsl#30 + eor r10,r10,r6,lsl#30 + eor r9,r9,r6,lsr#7 + eor r10,r10,r5,lsr#7 + eor r9,r9,r5,lsl#25 + eor r10,r10,r6,lsl#25 @ Sigma0(a) + adds r3,r3,r9 + and r9,r5,r11 + adc r4,r4,r10 @ T += Sigma0(a) + + ldr r10,[sp,#8+4] @ b.hi + orr r5,r5,r11 + ldr r11,[sp,#16+4] @ c.hi + and r5,r5,r12 + and r12,r6,r10 + orr r6,r6,r10 + orr r5,r5,r9 @ Maj(a,b,c).lo + and r6,r6,r11 + adds r5,r5,r3 + orr r6,r6,r12 @ Maj(a,b,c).hi + sub sp,sp,#8 + adc r6,r6,r4 @ h += T + tst r14,#1 + add r14,r14,#8 + ldreq r9,[sp,#184+0] + ldreq r10,[sp,#184+4] + beq .L16_79 + bic r14,r14,#1 + + ldr r3,[sp,#8+0] + ldr r4,[sp,#8+4] + ldr r9, [r0,#0+LO] + ldr r10, [r0,#0+HI] + ldr r11, [r0,#8+LO] + ldr r12, [r0,#8+HI] + adds r9,r5,r9 + str r9, [r0,#0+LO] + adc r10,r6,r10 + str r10, [r0,#0+HI] + adds r11,r3,r11 + str r11, [r0,#8+LO] + adc r12,r4,r12 + str r12, [r0,#8+HI] + + ldr r5,[sp,#16+0] + ldr r6,[sp,#16+4] + ldr r3,[sp,#24+0] + ldr r4,[sp,#24+4] + ldr r9, [r0,#16+LO] + ldr r10, [r0,#16+HI] + ldr r11, [r0,#24+LO] + ldr r12, [r0,#24+HI] + adds r9,r5,r9 + str r9, [r0,#16+LO] + adc r10,r6,r10 + str r10, [r0,#16+HI] + adds r11,r3,r11 + str r11, [r0,#24+LO] + adc r12,r4,r12 + str r12, [r0,#24+HI] + + ldr r3,[sp,#40+0] + ldr r4,[sp,#40+4] + ldr r9, [r0,#32+LO] + ldr r10, [r0,#32+HI] + ldr r11, [r0,#40+LO] + ldr r12, [r0,#40+HI] + adds r7,r7,r9 + str r7,[r0,#32+LO] + adc r8,r8,r10 + str r8,[r0,#32+HI] + adds r11,r3,r11 + str r11, [r0,#40+LO] + adc r12,r4,r12 + str r12, [r0,#40+HI] + + ldr r5,[sp,#48+0] + ldr r6,[sp,#48+4] + ldr r3,[sp,#56+0] + ldr r4,[sp,#56+4] + ldr r9, [r0,#48+LO] + ldr r10, [r0,#48+HI] + ldr r11, [r0,#56+LO] + ldr r12, [r0,#56+HI] + adds r9,r5,r9 + str r9, [r0,#48+LO] + adc r10,r6,r10 + str r10, [r0,#48+HI] + adds r11,r3,r11 + str r11, [r0,#56+LO] + adc r12,r4,r12 + str r12, [r0,#56+HI] + + add sp,sp,#640 + sub r14,r14,#640 + + teq r1,r2 + bne .Loop + + add sp,sp,#8*9 @ destroy frame +#if __ARM_ARCH__>=5 + ldmia sp!,{r4-r12,pc} +#else + ldmia sp!,{r4-r12,lr} + tst lr,#1 + moveq pc,lr @ be binary compatible with V4, yet + .word 0xe12fff1e @ interoperable with Thumb ISA:-) +#endif +#if __ARM_MAX_ARCH__>=7 +.arch armv7-a +.fpu neon + +.align 4 +.LNEON: + dmb @ errata #451034 on early Cortex A8 + vstmdb sp!,{d8-d15} @ ABI specification says so + sub r3,r3,#672 @ K512 + vldmia r0,{d16-d23} @ load context +.Loop_neon: + vshr.u64 d24,d20,#14 @ 0 +#if 0<16 + vld1.64 {d0},[r1]! @ handles unaligned +#endif + vshr.u64 d25,d20,#18 +#if 0>0 + vadd.i64 d16,d30 @ h+=Maj from the past +#endif + vshr.u64 d26,d20,#41 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d20,#50 + vsli.64 d25,d20,#46 + vmov d29,d20 + vsli.64 d26,d20,#23 +#if 0<16 && defined(__ARMEL__) + vrev64.8 d0,d0 +#endif + veor d25,d24 + vbsl d29,d21,d22 @ Ch(e,f,g) + vshr.u64 d24,d16,#28 + veor d26,d25 @ Sigma1(e) + vadd.i64 d27,d29,d23 + vshr.u64 d25,d16,#34 + vsli.64 d24,d16,#36 + vadd.i64 d27,d26 + vshr.u64 d26,d16,#39 + vadd.i64 d28,d0 + vsli.64 d25,d16,#30 + veor d30,d16,d17 + vsli.64 d26,d16,#25 + veor d23,d24,d25 + vadd.i64 d27,d28 + vbsl d30,d18,d17 @ Maj(a,b,c) + veor d23,d26 @ Sigma0(a) + vadd.i64 d19,d27 + vadd.i64 d30,d27 + @ vadd.i64 d23,d30 + vshr.u64 d24,d19,#14 @ 1 +#if 1<16 + vld1.64 {d1},[r1]! @ handles unaligned +#endif + vshr.u64 d25,d19,#18 +#if 1>0 + vadd.i64 d23,d30 @ h+=Maj from the past +#endif + vshr.u64 d26,d19,#41 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d19,#50 + vsli.64 d25,d19,#46 + vmov d29,d19 + vsli.64 d26,d19,#23 +#if 1<16 && defined(__ARMEL__) + vrev64.8 d1,d1 +#endif + veor d25,d24 + vbsl d29,d20,d21 @ Ch(e,f,g) + vshr.u64 d24,d23,#28 + veor d26,d25 @ Sigma1(e) + vadd.i64 d27,d29,d22 + vshr.u64 d25,d23,#34 + vsli.64 d24,d23,#36 + vadd.i64 d27,d26 + vshr.u64 d26,d23,#39 + vadd.i64 d28,d1 + vsli.64 d25,d23,#30 + veor d30,d23,d16 + vsli.64 d26,d23,#25 + veor d22,d24,d25 + vadd.i64 d27,d28 + vbsl d30,d17,d16 @ Maj(a,b,c) + veor d22,d26 @ Sigma0(a) + vadd.i64 d18,d27 + vadd.i64 d30,d27 + @ vadd.i64 d22,d30 + vshr.u64 d24,d18,#14 @ 2 +#if 2<16 + vld1.64 {d2},[r1]! @ handles unaligned +#endif + vshr.u64 d25,d18,#18 +#if 2>0 + vadd.i64 d22,d30 @ h+=Maj from the past +#endif + vshr.u64 d26,d18,#41 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d18,#50 + vsli.64 d25,d18,#46 + vmov d29,d18 + vsli.64 d26,d18,#23 +#if 2<16 && defined(__ARMEL__) + vrev64.8 d2,d2 +#endif + veor d25,d24 + vbsl d29,d19,d20 @ Ch(e,f,g) + vshr.u64 d24,d22,#28 + veor d26,d25 @ Sigma1(e) + vadd.i64 d27,d29,d21 + vshr.u64 d25,d22,#34 + vsli.64 d24,d22,#36 + vadd.i64 d27,d26 + vshr.u64 d26,d22,#39 + vadd.i64 d28,d2 + vsli.64 d25,d22,#30 + veor d30,d22,d23 + vsli.64 d26,d22,#25 + veor d21,d24,d25 + vadd.i64 d27,d28 + vbsl d30,d16,d23 @ Maj(a,b,c) + veor d21,d26 @ Sigma0(a) + vadd.i64 d17,d27 + vadd.i64 d30,d27 + @ vadd.i64 d21,d30 + vshr.u64 d24,d17,#14 @ 3 +#if 3<16 + vld1.64 {d3},[r1]! @ handles unaligned +#endif + vshr.u64 d25,d17,#18 +#if 3>0 + vadd.i64 d21,d30 @ h+=Maj from the past +#endif + vshr.u64 d26,d17,#41 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d17,#50 + vsli.64 d25,d17,#46 + vmov d29,d17 + vsli.64 d26,d17,#23 +#if 3<16 && defined(__ARMEL__) + vrev64.8 d3,d3 +#endif + veor d25,d24 + vbsl d29,d18,d19 @ Ch(e,f,g) + vshr.u64 d24,d21,#28 + veor d26,d25 @ Sigma1(e) + vadd.i64 d27,d29,d20 + vshr.u64 d25,d21,#34 + vsli.64 d24,d21,#36 + vadd.i64 d27,d26 + vshr.u64 d26,d21,#39 + vadd.i64 d28,d3 + vsli.64 d25,d21,#30 + veor d30,d21,d22 + vsli.64 d26,d21,#25 + veor d20,d24,d25 + vadd.i64 d27,d28 + vbsl d30,d23,d22 @ Maj(a,b,c) + veor d20,d26 @ Sigma0(a) + vadd.i64 d16,d27 + vadd.i64 d30,d27 + @ vadd.i64 d20,d30 + vshr.u64 d24,d16,#14 @ 4 +#if 4<16 + vld1.64 {d4},[r1]! @ handles unaligned +#endif + vshr.u64 d25,d16,#18 +#if 4>0 + vadd.i64 d20,d30 @ h+=Maj from the past +#endif + vshr.u64 d26,d16,#41 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d16,#50 + vsli.64 d25,d16,#46 + vmov d29,d16 + vsli.64 d26,d16,#23 +#if 4<16 && defined(__ARMEL__) + vrev64.8 d4,d4 +#endif + veor d25,d24 + vbsl d29,d17,d18 @ Ch(e,f,g) + vshr.u64 d24,d20,#28 + veor d26,d25 @ Sigma1(e) + vadd.i64 d27,d29,d19 + vshr.u64 d25,d20,#34 + vsli.64 d24,d20,#36 + vadd.i64 d27,d26 + vshr.u64 d26,d20,#39 + vadd.i64 d28,d4 + vsli.64 d25,d20,#30 + veor d30,d20,d21 + vsli.64 d26,d20,#25 + veor d19,d24,d25 + vadd.i64 d27,d28 + vbsl d30,d22,d21 @ Maj(a,b,c) + veor d19,d26 @ Sigma0(a) + vadd.i64 d23,d27 + vadd.i64 d30,d27 + @ vadd.i64 d19,d30 + vshr.u64 d24,d23,#14 @ 5 +#if 5<16 + vld1.64 {d5},[r1]! @ handles unaligned +#endif + vshr.u64 d25,d23,#18 +#if 5>0 + vadd.i64 d19,d30 @ h+=Maj from the past +#endif + vshr.u64 d26,d23,#41 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d23,#50 + vsli.64 d25,d23,#46 + vmov d29,d23 + vsli.64 d26,d23,#23 +#if 5<16 && defined(__ARMEL__) + vrev64.8 d5,d5 +#endif + veor d25,d24 + vbsl d29,d16,d17 @ Ch(e,f,g) + vshr.u64 d24,d19,#28 + veor d26,d25 @ Sigma1(e) + vadd.i64 d27,d29,d18 + vshr.u64 d25,d19,#34 + vsli.64 d24,d19,#36 + vadd.i64 d27,d26 + vshr.u64 d26,d19,#39 + vadd.i64 d28,d5 + vsli.64 d25,d19,#30 + veor d30,d19,d20 + vsli.64 d26,d19,#25 + veor d18,d24,d25 + vadd.i64 d27,d28 + vbsl d30,d21,d20 @ Maj(a,b,c) + veor d18,d26 @ Sigma0(a) + vadd.i64 d22,d27 + vadd.i64 d30,d27 + @ vadd.i64 d18,d30 + vshr.u64 d24,d22,#14 @ 6 +#if 6<16 + vld1.64 {d6},[r1]! @ handles unaligned +#endif + vshr.u64 d25,d22,#18 +#if 6>0 + vadd.i64 d18,d30 @ h+=Maj from the past +#endif + vshr.u64 d26,d22,#41 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d22,#50 + vsli.64 d25,d22,#46 + vmov d29,d22 + vsli.64 d26,d22,#23 +#if 6<16 && defined(__ARMEL__) + vrev64.8 d6,d6 +#endif + veor d25,d24 + vbsl d29,d23,d16 @ Ch(e,f,g) + vshr.u64 d24,d18,#28 + veor d26,d25 @ Sigma1(e) + vadd.i64 d27,d29,d17 + vshr.u64 d25,d18,#34 + vsli.64 d24,d18,#36 + vadd.i64 d27,d26 + vshr.u64 d26,d18,#39 + vadd.i64 d28,d6 + vsli.64 d25,d18,#30 + veor d30,d18,d19 + vsli.64 d26,d18,#25 + veor d17,d24,d25 + vadd.i64 d27,d28 + vbsl d30,d20,d19 @ Maj(a,b,c) + veor d17,d26 @ Sigma0(a) + vadd.i64 d21,d27 + vadd.i64 d30,d27 + @ vadd.i64 d17,d30 + vshr.u64 d24,d21,#14 @ 7 +#if 7<16 + vld1.64 {d7},[r1]! @ handles unaligned +#endif + vshr.u64 d25,d21,#18 +#if 7>0 + vadd.i64 d17,d30 @ h+=Maj from the past +#endif + vshr.u64 d26,d21,#41 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d21,#50 + vsli.64 d25,d21,#46 + vmov d29,d21 + vsli.64 d26,d21,#23 +#if 7<16 && defined(__ARMEL__) + vrev64.8 d7,d7 +#endif + veor d25,d24 + vbsl d29,d22,d23 @ Ch(e,f,g) + vshr.u64 d24,d17,#28 + veor d26,d25 @ Sigma1(e) + vadd.i64 d27,d29,d16 + vshr.u64 d25,d17,#34 + vsli.64 d24,d17,#36 + vadd.i64 d27,d26 + vshr.u64 d26,d17,#39 + vadd.i64 d28,d7 + vsli.64 d25,d17,#30 + veor d30,d17,d18 + vsli.64 d26,d17,#25 + veor d16,d24,d25 + vadd.i64 d27,d28 + vbsl d30,d19,d18 @ Maj(a,b,c) + veor d16,d26 @ Sigma0(a) + vadd.i64 d20,d27 + vadd.i64 d30,d27 + @ vadd.i64 d16,d30 + vshr.u64 d24,d20,#14 @ 8 +#if 8<16 + vld1.64 {d8},[r1]! @ handles unaligned +#endif + vshr.u64 d25,d20,#18 +#if 8>0 + vadd.i64 d16,d30 @ h+=Maj from the past +#endif + vshr.u64 d26,d20,#41 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d20,#50 + vsli.64 d25,d20,#46 + vmov d29,d20 + vsli.64 d26,d20,#23 +#if 8<16 && defined(__ARMEL__) + vrev64.8 d8,d8 +#endif + veor d25,d24 + vbsl d29,d21,d22 @ Ch(e,f,g) + vshr.u64 d24,d16,#28 + veor d26,d25 @ Sigma1(e) + vadd.i64 d27,d29,d23 + vshr.u64 d25,d16,#34 + vsli.64 d24,d16,#36 + vadd.i64 d27,d26 + vshr.u64 d26,d16,#39 + vadd.i64 d28,d8 + vsli.64 d25,d16,#30 + veor d30,d16,d17 + vsli.64 d26,d16,#25 + veor d23,d24,d25 + vadd.i64 d27,d28 + vbsl d30,d18,d17 @ Maj(a,b,c) + veor d23,d26 @ Sigma0(a) + vadd.i64 d19,d27 + vadd.i64 d30,d27 + @ vadd.i64 d23,d30 + vshr.u64 d24,d19,#14 @ 9 +#if 9<16 + vld1.64 {d9},[r1]! @ handles unaligned +#endif + vshr.u64 d25,d19,#18 +#if 9>0 + vadd.i64 d23,d30 @ h+=Maj from the past +#endif + vshr.u64 d26,d19,#41 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d19,#50 + vsli.64 d25,d19,#46 + vmov d29,d19 + vsli.64 d26,d19,#23 +#if 9<16 && defined(__ARMEL__) + vrev64.8 d9,d9 +#endif + veor d25,d24 + vbsl d29,d20,d21 @ Ch(e,f,g) + vshr.u64 d24,d23,#28 + veor d26,d25 @ Sigma1(e) + vadd.i64 d27,d29,d22 + vshr.u64 d25,d23,#34 + vsli.64 d24,d23,#36 + vadd.i64 d27,d26 + vshr.u64 d26,d23,#39 + vadd.i64 d28,d9 + vsli.64 d25,d23,#30 + veor d30,d23,d16 + vsli.64 d26,d23,#25 + veor d22,d24,d25 + vadd.i64 d27,d28 + vbsl d30,d17,d16 @ Maj(a,b,c) + veor d22,d26 @ Sigma0(a) + vadd.i64 d18,d27 + vadd.i64 d30,d27 + @ vadd.i64 d22,d30 + vshr.u64 d24,d18,#14 @ 10 +#if 10<16 + vld1.64 {d10},[r1]! @ handles unaligned +#endif + vshr.u64 d25,d18,#18 +#if 10>0 + vadd.i64 d22,d30 @ h+=Maj from the past +#endif + vshr.u64 d26,d18,#41 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d18,#50 + vsli.64 d25,d18,#46 + vmov d29,d18 + vsli.64 d26,d18,#23 +#if 10<16 && defined(__ARMEL__) + vrev64.8 d10,d10 +#endif + veor d25,d24 + vbsl d29,d19,d20 @ Ch(e,f,g) + vshr.u64 d24,d22,#28 + veor d26,d25 @ Sigma1(e) + vadd.i64 d27,d29,d21 + vshr.u64 d25,d22,#34 + vsli.64 d24,d22,#36 + vadd.i64 d27,d26 + vshr.u64 d26,d22,#39 + vadd.i64 d28,d10 + vsli.64 d25,d22,#30 + veor d30,d22,d23 + vsli.64 d26,d22,#25 + veor d21,d24,d25 + vadd.i64 d27,d28 + vbsl d30,d16,d23 @ Maj(a,b,c) + veor d21,d26 @ Sigma0(a) + vadd.i64 d17,d27 + vadd.i64 d30,d27 + @ vadd.i64 d21,d30 + vshr.u64 d24,d17,#14 @ 11 +#if 11<16 + vld1.64 {d11},[r1]! @ handles unaligned +#endif + vshr.u64 d25,d17,#18 +#if 11>0 + vadd.i64 d21,d30 @ h+=Maj from the past +#endif + vshr.u64 d26,d17,#41 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d17,#50 + vsli.64 d25,d17,#46 + vmov d29,d17 + vsli.64 d26,d17,#23 +#if 11<16 && defined(__ARMEL__) + vrev64.8 d11,d11 +#endif + veor d25,d24 + vbsl d29,d18,d19 @ Ch(e,f,g) + vshr.u64 d24,d21,#28 + veor d26,d25 @ Sigma1(e) + vadd.i64 d27,d29,d20 + vshr.u64 d25,d21,#34 + vsli.64 d24,d21,#36 + vadd.i64 d27,d26 + vshr.u64 d26,d21,#39 + vadd.i64 d28,d11 + vsli.64 d25,d21,#30 + veor d30,d21,d22 + vsli.64 d26,d21,#25 + veor d20,d24,d25 + vadd.i64 d27,d28 + vbsl d30,d23,d22 @ Maj(a,b,c) + veor d20,d26 @ Sigma0(a) + vadd.i64 d16,d27 + vadd.i64 d30,d27 + @ vadd.i64 d20,d30 + vshr.u64 d24,d16,#14 @ 12 +#if 12<16 + vld1.64 {d12},[r1]! @ handles unaligned +#endif + vshr.u64 d25,d16,#18 +#if 12>0 + vadd.i64 d20,d30 @ h+=Maj from the past +#endif + vshr.u64 d26,d16,#41 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d16,#50 + vsli.64 d25,d16,#46 + vmov d29,d16 + vsli.64 d26,d16,#23 +#if 12<16 && defined(__ARMEL__) + vrev64.8 d12,d12 +#endif + veor d25,d24 + vbsl d29,d17,d18 @ Ch(e,f,g) + vshr.u64 d24,d20,#28 + veor d26,d25 @ Sigma1(e) + vadd.i64 d27,d29,d19 + vshr.u64 d25,d20,#34 + vsli.64 d24,d20,#36 + vadd.i64 d27,d26 + vshr.u64 d26,d20,#39 + vadd.i64 d28,d12 + vsli.64 d25,d20,#30 + veor d30,d20,d21 + vsli.64 d26,d20,#25 + veor d19,d24,d25 + vadd.i64 d27,d28 + vbsl d30,d22,d21 @ Maj(a,b,c) + veor d19,d26 @ Sigma0(a) + vadd.i64 d23,d27 + vadd.i64 d30,d27 + @ vadd.i64 d19,d30 + vshr.u64 d24,d23,#14 @ 13 +#if 13<16 + vld1.64 {d13},[r1]! @ handles unaligned +#endif + vshr.u64 d25,d23,#18 +#if 13>0 + vadd.i64 d19,d30 @ h+=Maj from the past +#endif + vshr.u64 d26,d23,#41 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d23,#50 + vsli.64 d25,d23,#46 + vmov d29,d23 + vsli.64 d26,d23,#23 +#if 13<16 && defined(__ARMEL__) + vrev64.8 d13,d13 +#endif + veor d25,d24 + vbsl d29,d16,d17 @ Ch(e,f,g) + vshr.u64 d24,d19,#28 + veor d26,d25 @ Sigma1(e) + vadd.i64 d27,d29,d18 + vshr.u64 d25,d19,#34 + vsli.64 d24,d19,#36 + vadd.i64 d27,d26 + vshr.u64 d26,d19,#39 + vadd.i64 d28,d13 + vsli.64 d25,d19,#30 + veor d30,d19,d20 + vsli.64 d26,d19,#25 + veor d18,d24,d25 + vadd.i64 d27,d28 + vbsl d30,d21,d20 @ Maj(a,b,c) + veor d18,d26 @ Sigma0(a) + vadd.i64 d22,d27 + vadd.i64 d30,d27 + @ vadd.i64 d18,d30 + vshr.u64 d24,d22,#14 @ 14 +#if 14<16 + vld1.64 {d14},[r1]! @ handles unaligned +#endif + vshr.u64 d25,d22,#18 +#if 14>0 + vadd.i64 d18,d30 @ h+=Maj from the past +#endif + vshr.u64 d26,d22,#41 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d22,#50 + vsli.64 d25,d22,#46 + vmov d29,d22 + vsli.64 d26,d22,#23 +#if 14<16 && defined(__ARMEL__) + vrev64.8 d14,d14 +#endif + veor d25,d24 + vbsl d29,d23,d16 @ Ch(e,f,g) + vshr.u64 d24,d18,#28 + veor d26,d25 @ Sigma1(e) + vadd.i64 d27,d29,d17 + vshr.u64 d25,d18,#34 + vsli.64 d24,d18,#36 + vadd.i64 d27,d26 + vshr.u64 d26,d18,#39 + vadd.i64 d28,d14 + vsli.64 d25,d18,#30 + veor d30,d18,d19 + vsli.64 d26,d18,#25 + veor d17,d24,d25 + vadd.i64 d27,d28 + vbsl d30,d20,d19 @ Maj(a,b,c) + veor d17,d26 @ Sigma0(a) + vadd.i64 d21,d27 + vadd.i64 d30,d27 + @ vadd.i64 d17,d30 + vshr.u64 d24,d21,#14 @ 15 +#if 15<16 + vld1.64 {d15},[r1]! @ handles unaligned +#endif + vshr.u64 d25,d21,#18 +#if 15>0 + vadd.i64 d17,d30 @ h+=Maj from the past +#endif + vshr.u64 d26,d21,#41 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d21,#50 + vsli.64 d25,d21,#46 + vmov d29,d21 + vsli.64 d26,d21,#23 +#if 15<16 && defined(__ARMEL__) + vrev64.8 d15,d15 +#endif + veor d25,d24 + vbsl d29,d22,d23 @ Ch(e,f,g) + vshr.u64 d24,d17,#28 + veor d26,d25 @ Sigma1(e) + vadd.i64 d27,d29,d16 + vshr.u64 d25,d17,#34 + vsli.64 d24,d17,#36 + vadd.i64 d27,d26 + vshr.u64 d26,d17,#39 + vadd.i64 d28,d15 + vsli.64 d25,d17,#30 + veor d30,d17,d18 + vsli.64 d26,d17,#25 + veor d16,d24,d25 + vadd.i64 d27,d28 + vbsl d30,d19,d18 @ Maj(a,b,c) + veor d16,d26 @ Sigma0(a) + vadd.i64 d20,d27 + vadd.i64 d30,d27 + @ vadd.i64 d16,d30 + mov r12,#4 +.L16_79_neon: + subs r12,#1 + vshr.u64 q12,q7,#19 + vshr.u64 q13,q7,#61 + vadd.i64 d16,d30 @ h+=Maj from the past + vshr.u64 q15,q7,#6 + vsli.64 q12,q7,#45 + vext.8 q14,q0,q1,#8 @ X[i+1] + vsli.64 q13,q7,#3 + veor q15,q12 + vshr.u64 q12,q14,#1 + veor q15,q13 @ sigma1(X[i+14]) + vshr.u64 q13,q14,#8 + vadd.i64 q0,q15 + vshr.u64 q15,q14,#7 + vsli.64 q12,q14,#63 + vsli.64 q13,q14,#56 + vext.8 q14,q4,q5,#8 @ X[i+9] + veor q15,q12 + vshr.u64 d24,d20,#14 @ from NEON_00_15 + vadd.i64 q0,q14 + vshr.u64 d25,d20,#18 @ from NEON_00_15 + veor q15,q13 @ sigma0(X[i+1]) + vshr.u64 d26,d20,#41 @ from NEON_00_15 + vadd.i64 q0,q15 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d20,#50 + vsli.64 d25,d20,#46 + vmov d29,d20 + vsli.64 d26,d20,#23 +#if 16<16 && defined(__ARMEL__) + vrev64.8 , +#endif + veor d25,d24 + vbsl d29,d21,d22 @ Ch(e,f,g) + vshr.u64 d24,d16,#28 + veor d26,d25 @ Sigma1(e) + vadd.i64 d27,d29,d23 + vshr.u64 d25,d16,#34 + vsli.64 d24,d16,#36 + vadd.i64 d27,d26 + vshr.u64 d26,d16,#39 + vadd.i64 d28,d0 + vsli.64 d25,d16,#30 + veor d30,d16,d17 + vsli.64 d26,d16,#25 + veor d23,d24,d25 + vadd.i64 d27,d28 + vbsl d30,d18,d17 @ Maj(a,b,c) + veor d23,d26 @ Sigma0(a) + vadd.i64 d19,d27 + vadd.i64 d30,d27 + @ vadd.i64 d23,d30 + vshr.u64 d24,d19,#14 @ 17 +#if 17<16 + vld1.64 {d1},[r1]! @ handles unaligned +#endif + vshr.u64 d25,d19,#18 +#if 17>0 + vadd.i64 d23,d30 @ h+=Maj from the past +#endif + vshr.u64 d26,d19,#41 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d19,#50 + vsli.64 d25,d19,#46 + vmov d29,d19 + vsli.64 d26,d19,#23 +#if 17<16 && defined(__ARMEL__) + vrev64.8 , +#endif + veor d25,d24 + vbsl d29,d20,d21 @ Ch(e,f,g) + vshr.u64 d24,d23,#28 + veor d26,d25 @ Sigma1(e) + vadd.i64 d27,d29,d22 + vshr.u64 d25,d23,#34 + vsli.64 d24,d23,#36 + vadd.i64 d27,d26 + vshr.u64 d26,d23,#39 + vadd.i64 d28,d1 + vsli.64 d25,d23,#30 + veor d30,d23,d16 + vsli.64 d26,d23,#25 + veor d22,d24,d25 + vadd.i64 d27,d28 + vbsl d30,d17,d16 @ Maj(a,b,c) + veor d22,d26 @ Sigma0(a) + vadd.i64 d18,d27 + vadd.i64 d30,d27 + @ vadd.i64 d22,d30 + vshr.u64 q12,q0,#19 + vshr.u64 q13,q0,#61 + vadd.i64 d22,d30 @ h+=Maj from the past + vshr.u64 q15,q0,#6 + vsli.64 q12,q0,#45 + vext.8 q14,q1,q2,#8 @ X[i+1] + vsli.64 q13,q0,#3 + veor q15,q12 + vshr.u64 q12,q14,#1 + veor q15,q13 @ sigma1(X[i+14]) + vshr.u64 q13,q14,#8 + vadd.i64 q1,q15 + vshr.u64 q15,q14,#7 + vsli.64 q12,q14,#63 + vsli.64 q13,q14,#56 + vext.8 q14,q5,q6,#8 @ X[i+9] + veor q15,q12 + vshr.u64 d24,d18,#14 @ from NEON_00_15 + vadd.i64 q1,q14 + vshr.u64 d25,d18,#18 @ from NEON_00_15 + veor q15,q13 @ sigma0(X[i+1]) + vshr.u64 d26,d18,#41 @ from NEON_00_15 + vadd.i64 q1,q15 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d18,#50 + vsli.64 d25,d18,#46 + vmov d29,d18 + vsli.64 d26,d18,#23 +#if 18<16 && defined(__ARMEL__) + vrev64.8 , +#endif + veor d25,d24 + vbsl d29,d19,d20 @ Ch(e,f,g) + vshr.u64 d24,d22,#28 + veor d26,d25 @ Sigma1(e) + vadd.i64 d27,d29,d21 + vshr.u64 d25,d22,#34 + vsli.64 d24,d22,#36 + vadd.i64 d27,d26 + vshr.u64 d26,d22,#39 + vadd.i64 d28,d2 + vsli.64 d25,d22,#30 + veor d30,d22,d23 + vsli.64 d26,d22,#25 + veor d21,d24,d25 + vadd.i64 d27,d28 + vbsl d30,d16,d23 @ Maj(a,b,c) + veor d21,d26 @ Sigma0(a) + vadd.i64 d17,d27 + vadd.i64 d30,d27 + @ vadd.i64 d21,d30 + vshr.u64 d24,d17,#14 @ 19 +#if 19<16 + vld1.64 {d3},[r1]! @ handles unaligned +#endif + vshr.u64 d25,d17,#18 +#if 19>0 + vadd.i64 d21,d30 @ h+=Maj from the past +#endif + vshr.u64 d26,d17,#41 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d17,#50 + vsli.64 d25,d17,#46 + vmov d29,d17 + vsli.64 d26,d17,#23 +#if 19<16 && defined(__ARMEL__) + vrev64.8 , +#endif + veor d25,d24 + vbsl d29,d18,d19 @ Ch(e,f,g) + vshr.u64 d24,d21,#28 + veor d26,d25 @ Sigma1(e) + vadd.i64 d27,d29,d20 + vshr.u64 d25,d21,#34 + vsli.64 d24,d21,#36 + vadd.i64 d27,d26 + vshr.u64 d26,d21,#39 + vadd.i64 d28,d3 + vsli.64 d25,d21,#30 + veor d30,d21,d22 + vsli.64 d26,d21,#25 + veor d20,d24,d25 + vadd.i64 d27,d28 + vbsl d30,d23,d22 @ Maj(a,b,c) + veor d20,d26 @ Sigma0(a) + vadd.i64 d16,d27 + vadd.i64 d30,d27 + @ vadd.i64 d20,d30 + vshr.u64 q12,q1,#19 + vshr.u64 q13,q1,#61 + vadd.i64 d20,d30 @ h+=Maj from the past + vshr.u64 q15,q1,#6 + vsli.64 q12,q1,#45 + vext.8 q14,q2,q3,#8 @ X[i+1] + vsli.64 q13,q1,#3 + veor q15,q12 + vshr.u64 q12,q14,#1 + veor q15,q13 @ sigma1(X[i+14]) + vshr.u64 q13,q14,#8 + vadd.i64 q2,q15 + vshr.u64 q15,q14,#7 + vsli.64 q12,q14,#63 + vsli.64 q13,q14,#56 + vext.8 q14,q6,q7,#8 @ X[i+9] + veor q15,q12 + vshr.u64 d24,d16,#14 @ from NEON_00_15 + vadd.i64 q2,q14 + vshr.u64 d25,d16,#18 @ from NEON_00_15 + veor q15,q13 @ sigma0(X[i+1]) + vshr.u64 d26,d16,#41 @ from NEON_00_15 + vadd.i64 q2,q15 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d16,#50 + vsli.64 d25,d16,#46 + vmov d29,d16 + vsli.64 d26,d16,#23 +#if 20<16 && defined(__ARMEL__) + vrev64.8 , +#endif + veor d25,d24 + vbsl d29,d17,d18 @ Ch(e,f,g) + vshr.u64 d24,d20,#28 + veor d26,d25 @ Sigma1(e) + vadd.i64 d27,d29,d19 + vshr.u64 d25,d20,#34 + vsli.64 d24,d20,#36 + vadd.i64 d27,d26 + vshr.u64 d26,d20,#39 + vadd.i64 d28,d4 + vsli.64 d25,d20,#30 + veor d30,d20,d21 + vsli.64 d26,d20,#25 + veor d19,d24,d25 + vadd.i64 d27,d28 + vbsl d30,d22,d21 @ Maj(a,b,c) + veor d19,d26 @ Sigma0(a) + vadd.i64 d23,d27 + vadd.i64 d30,d27 + @ vadd.i64 d19,d30 + vshr.u64 d24,d23,#14 @ 21 +#if 21<16 + vld1.64 {d5},[r1]! @ handles unaligned +#endif + vshr.u64 d25,d23,#18 +#if 21>0 + vadd.i64 d19,d30 @ h+=Maj from the past +#endif + vshr.u64 d26,d23,#41 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d23,#50 + vsli.64 d25,d23,#46 + vmov d29,d23 + vsli.64 d26,d23,#23 +#if 21<16 && defined(__ARMEL__) + vrev64.8 , +#endif + veor d25,d24 + vbsl d29,d16,d17 @ Ch(e,f,g) + vshr.u64 d24,d19,#28 + veor d26,d25 @ Sigma1(e) + vadd.i64 d27,d29,d18 + vshr.u64 d25,d19,#34 + vsli.64 d24,d19,#36 + vadd.i64 d27,d26 + vshr.u64 d26,d19,#39 + vadd.i64 d28,d5 + vsli.64 d25,d19,#30 + veor d30,d19,d20 + vsli.64 d26,d19,#25 + veor d18,d24,d25 + vadd.i64 d27,d28 + vbsl d30,d21,d20 @ Maj(a,b,c) + veor d18,d26 @ Sigma0(a) + vadd.i64 d22,d27 + vadd.i64 d30,d27 + @ vadd.i64 d18,d30 + vshr.u64 q12,q2,#19 + vshr.u64 q13,q2,#61 + vadd.i64 d18,d30 @ h+=Maj from the past + vshr.u64 q15,q2,#6 + vsli.64 q12,q2,#45 + vext.8 q14,q3,q4,#8 @ X[i+1] + vsli.64 q13,q2,#3 + veor q15,q12 + vshr.u64 q12,q14,#1 + veor q15,q13 @ sigma1(X[i+14]) + vshr.u64 q13,q14,#8 + vadd.i64 q3,q15 + vshr.u64 q15,q14,#7 + vsli.64 q12,q14,#63 + vsli.64 q13,q14,#56 + vext.8 q14,q7,q0,#8 @ X[i+9] + veor q15,q12 + vshr.u64 d24,d22,#14 @ from NEON_00_15 + vadd.i64 q3,q14 + vshr.u64 d25,d22,#18 @ from NEON_00_15 + veor q15,q13 @ sigma0(X[i+1]) + vshr.u64 d26,d22,#41 @ from NEON_00_15 + vadd.i64 q3,q15 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d22,#50 + vsli.64 d25,d22,#46 + vmov d29,d22 + vsli.64 d26,d22,#23 +#if 22<16 && defined(__ARMEL__) + vrev64.8 , +#endif + veor d25,d24 + vbsl d29,d23,d16 @ Ch(e,f,g) + vshr.u64 d24,d18,#28 + veor d26,d25 @ Sigma1(e) + vadd.i64 d27,d29,d17 + vshr.u64 d25,d18,#34 + vsli.64 d24,d18,#36 + vadd.i64 d27,d26 + vshr.u64 d26,d18,#39 + vadd.i64 d28,d6 + vsli.64 d25,d18,#30 + veor d30,d18,d19 + vsli.64 d26,d18,#25 + veor d17,d24,d25 + vadd.i64 d27,d28 + vbsl d30,d20,d19 @ Maj(a,b,c) + veor d17,d26 @ Sigma0(a) + vadd.i64 d21,d27 + vadd.i64 d30,d27 + @ vadd.i64 d17,d30 + vshr.u64 d24,d21,#14 @ 23 +#if 23<16 + vld1.64 {d7},[r1]! @ handles unaligned +#endif + vshr.u64 d25,d21,#18 +#if 23>0 + vadd.i64 d17,d30 @ h+=Maj from the past +#endif + vshr.u64 d26,d21,#41 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d21,#50 + vsli.64 d25,d21,#46 + vmov d29,d21 + vsli.64 d26,d21,#23 +#if 23<16 && defined(__ARMEL__) + vrev64.8 , +#endif + veor d25,d24 + vbsl d29,d22,d23 @ Ch(e,f,g) + vshr.u64 d24,d17,#28 + veor d26,d25 @ Sigma1(e) + vadd.i64 d27,d29,d16 + vshr.u64 d25,d17,#34 + vsli.64 d24,d17,#36 + vadd.i64 d27,d26 + vshr.u64 d26,d17,#39 + vadd.i64 d28,d7 + vsli.64 d25,d17,#30 + veor d30,d17,d18 + vsli.64 d26,d17,#25 + veor d16,d24,d25 + vadd.i64 d27,d28 + vbsl d30,d19,d18 @ Maj(a,b,c) + veor d16,d26 @ Sigma0(a) + vadd.i64 d20,d27 + vadd.i64 d30,d27 + @ vadd.i64 d16,d30 + vshr.u64 q12,q3,#19 + vshr.u64 q13,q3,#61 + vadd.i64 d16,d30 @ h+=Maj from the past + vshr.u64 q15,q3,#6 + vsli.64 q12,q3,#45 + vext.8 q14,q4,q5,#8 @ X[i+1] + vsli.64 q13,q3,#3 + veor q15,q12 + vshr.u64 q12,q14,#1 + veor q15,q13 @ sigma1(X[i+14]) + vshr.u64 q13,q14,#8 + vadd.i64 q4,q15 + vshr.u64 q15,q14,#7 + vsli.64 q12,q14,#63 + vsli.64 q13,q14,#56 + vext.8 q14,q0,q1,#8 @ X[i+9] + veor q15,q12 + vshr.u64 d24,d20,#14 @ from NEON_00_15 + vadd.i64 q4,q14 + vshr.u64 d25,d20,#18 @ from NEON_00_15 + veor q15,q13 @ sigma0(X[i+1]) + vshr.u64 d26,d20,#41 @ from NEON_00_15 + vadd.i64 q4,q15 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d20,#50 + vsli.64 d25,d20,#46 + vmov d29,d20 + vsli.64 d26,d20,#23 +#if 24<16 && defined(__ARMEL__) + vrev64.8 , +#endif + veor d25,d24 + vbsl d29,d21,d22 @ Ch(e,f,g) + vshr.u64 d24,d16,#28 + veor d26,d25 @ Sigma1(e) + vadd.i64 d27,d29,d23 + vshr.u64 d25,d16,#34 + vsli.64 d24,d16,#36 + vadd.i64 d27,d26 + vshr.u64 d26,d16,#39 + vadd.i64 d28,d8 + vsli.64 d25,d16,#30 + veor d30,d16,d17 + vsli.64 d26,d16,#25 + veor d23,d24,d25 + vadd.i64 d27,d28 + vbsl d30,d18,d17 @ Maj(a,b,c) + veor d23,d26 @ Sigma0(a) + vadd.i64 d19,d27 + vadd.i64 d30,d27 + @ vadd.i64 d23,d30 + vshr.u64 d24,d19,#14 @ 25 +#if 25<16 + vld1.64 {d9},[r1]! @ handles unaligned +#endif + vshr.u64 d25,d19,#18 +#if 25>0 + vadd.i64 d23,d30 @ h+=Maj from the past +#endif + vshr.u64 d26,d19,#41 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d19,#50 + vsli.64 d25,d19,#46 + vmov d29,d19 + vsli.64 d26,d19,#23 +#if 25<16 && defined(__ARMEL__) + vrev64.8 , +#endif + veor d25,d24 + vbsl d29,d20,d21 @ Ch(e,f,g) + vshr.u64 d24,d23,#28 + veor d26,d25 @ Sigma1(e) + vadd.i64 d27,d29,d22 + vshr.u64 d25,d23,#34 + vsli.64 d24,d23,#36 + vadd.i64 d27,d26 + vshr.u64 d26,d23,#39 + vadd.i64 d28,d9 + vsli.64 d25,d23,#30 + veor d30,d23,d16 + vsli.64 d26,d23,#25 + veor d22,d24,d25 + vadd.i64 d27,d28 + vbsl d30,d17,d16 @ Maj(a,b,c) + veor d22,d26 @ Sigma0(a) + vadd.i64 d18,d27 + vadd.i64 d30,d27 + @ vadd.i64 d22,d30 + vshr.u64 q12,q4,#19 + vshr.u64 q13,q4,#61 + vadd.i64 d22,d30 @ h+=Maj from the past + vshr.u64 q15,q4,#6 + vsli.64 q12,q4,#45 + vext.8 q14,q5,q6,#8 @ X[i+1] + vsli.64 q13,q4,#3 + veor q15,q12 + vshr.u64 q12,q14,#1 + veor q15,q13 @ sigma1(X[i+14]) + vshr.u64 q13,q14,#8 + vadd.i64 q5,q15 + vshr.u64 q15,q14,#7 + vsli.64 q12,q14,#63 + vsli.64 q13,q14,#56 + vext.8 q14,q1,q2,#8 @ X[i+9] + veor q15,q12 + vshr.u64 d24,d18,#14 @ from NEON_00_15 + vadd.i64 q5,q14 + vshr.u64 d25,d18,#18 @ from NEON_00_15 + veor q15,q13 @ sigma0(X[i+1]) + vshr.u64 d26,d18,#41 @ from NEON_00_15 + vadd.i64 q5,q15 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d18,#50 + vsli.64 d25,d18,#46 + vmov d29,d18 + vsli.64 d26,d18,#23 +#if 26<16 && defined(__ARMEL__) + vrev64.8 , +#endif + veor d25,d24 + vbsl d29,d19,d20 @ Ch(e,f,g) + vshr.u64 d24,d22,#28 + veor d26,d25 @ Sigma1(e) + vadd.i64 d27,d29,d21 + vshr.u64 d25,d22,#34 + vsli.64 d24,d22,#36 + vadd.i64 d27,d26 + vshr.u64 d26,d22,#39 + vadd.i64 d28,d10 + vsli.64 d25,d22,#30 + veor d30,d22,d23 + vsli.64 d26,d22,#25 + veor d21,d24,d25 + vadd.i64 d27,d28 + vbsl d30,d16,d23 @ Maj(a,b,c) + veor d21,d26 @ Sigma0(a) + vadd.i64 d17,d27 + vadd.i64 d30,d27 + @ vadd.i64 d21,d30 + vshr.u64 d24,d17,#14 @ 27 +#if 27<16 + vld1.64 {d11},[r1]! @ handles unaligned +#endif + vshr.u64 d25,d17,#18 +#if 27>0 + vadd.i64 d21,d30 @ h+=Maj from the past +#endif + vshr.u64 d26,d17,#41 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d17,#50 + vsli.64 d25,d17,#46 + vmov d29,d17 + vsli.64 d26,d17,#23 +#if 27<16 && defined(__ARMEL__) + vrev64.8 , +#endif + veor d25,d24 + vbsl d29,d18,d19 @ Ch(e,f,g) + vshr.u64 d24,d21,#28 + veor d26,d25 @ Sigma1(e) + vadd.i64 d27,d29,d20 + vshr.u64 d25,d21,#34 + vsli.64 d24,d21,#36 + vadd.i64 d27,d26 + vshr.u64 d26,d21,#39 + vadd.i64 d28,d11 + vsli.64 d25,d21,#30 + veor d30,d21,d22 + vsli.64 d26,d21,#25 + veor d20,d24,d25 + vadd.i64 d27,d28 + vbsl d30,d23,d22 @ Maj(a,b,c) + veor d20,d26 @ Sigma0(a) + vadd.i64 d16,d27 + vadd.i64 d30,d27 + @ vadd.i64 d20,d30 + vshr.u64 q12,q5,#19 + vshr.u64 q13,q5,#61 + vadd.i64 d20,d30 @ h+=Maj from the past + vshr.u64 q15,q5,#6 + vsli.64 q12,q5,#45 + vext.8 q14,q6,q7,#8 @ X[i+1] + vsli.64 q13,q5,#3 + veor q15,q12 + vshr.u64 q12,q14,#1 + veor q15,q13 @ sigma1(X[i+14]) + vshr.u64 q13,q14,#8 + vadd.i64 q6,q15 + vshr.u64 q15,q14,#7 + vsli.64 q12,q14,#63 + vsli.64 q13,q14,#56 + vext.8 q14,q2,q3,#8 @ X[i+9] + veor q15,q12 + vshr.u64 d24,d16,#14 @ from NEON_00_15 + vadd.i64 q6,q14 + vshr.u64 d25,d16,#18 @ from NEON_00_15 + veor q15,q13 @ sigma0(X[i+1]) + vshr.u64 d26,d16,#41 @ from NEON_00_15 + vadd.i64 q6,q15 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d16,#50 + vsli.64 d25,d16,#46 + vmov d29,d16 + vsli.64 d26,d16,#23 +#if 28<16 && defined(__ARMEL__) + vrev64.8 , +#endif + veor d25,d24 + vbsl d29,d17,d18 @ Ch(e,f,g) + vshr.u64 d24,d20,#28 + veor d26,d25 @ Sigma1(e) + vadd.i64 d27,d29,d19 + vshr.u64 d25,d20,#34 + vsli.64 d24,d20,#36 + vadd.i64 d27,d26 + vshr.u64 d26,d20,#39 + vadd.i64 d28,d12 + vsli.64 d25,d20,#30 + veor d30,d20,d21 + vsli.64 d26,d20,#25 + veor d19,d24,d25 + vadd.i64 d27,d28 + vbsl d30,d22,d21 @ Maj(a,b,c) + veor d19,d26 @ Sigma0(a) + vadd.i64 d23,d27 + vadd.i64 d30,d27 + @ vadd.i64 d19,d30 + vshr.u64 d24,d23,#14 @ 29 +#if 29<16 + vld1.64 {d13},[r1]! @ handles unaligned +#endif + vshr.u64 d25,d23,#18 +#if 29>0 + vadd.i64 d19,d30 @ h+=Maj from the past +#endif + vshr.u64 d26,d23,#41 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d23,#50 + vsli.64 d25,d23,#46 + vmov d29,d23 + vsli.64 d26,d23,#23 +#if 29<16 && defined(__ARMEL__) + vrev64.8 , +#endif + veor d25,d24 + vbsl d29,d16,d17 @ Ch(e,f,g) + vshr.u64 d24,d19,#28 + veor d26,d25 @ Sigma1(e) + vadd.i64 d27,d29,d18 + vshr.u64 d25,d19,#34 + vsli.64 d24,d19,#36 + vadd.i64 d27,d26 + vshr.u64 d26,d19,#39 + vadd.i64 d28,d13 + vsli.64 d25,d19,#30 + veor d30,d19,d20 + vsli.64 d26,d19,#25 + veor d18,d24,d25 + vadd.i64 d27,d28 + vbsl d30,d21,d20 @ Maj(a,b,c) + veor d18,d26 @ Sigma0(a) + vadd.i64 d22,d27 + vadd.i64 d30,d27 + @ vadd.i64 d18,d30 + vshr.u64 q12,q6,#19 + vshr.u64 q13,q6,#61 + vadd.i64 d18,d30 @ h+=Maj from the past + vshr.u64 q15,q6,#6 + vsli.64 q12,q6,#45 + vext.8 q14,q7,q0,#8 @ X[i+1] + vsli.64 q13,q6,#3 + veor q15,q12 + vshr.u64 q12,q14,#1 + veor q15,q13 @ sigma1(X[i+14]) + vshr.u64 q13,q14,#8 + vadd.i64 q7,q15 + vshr.u64 q15,q14,#7 + vsli.64 q12,q14,#63 + vsli.64 q13,q14,#56 + vext.8 q14,q3,q4,#8 @ X[i+9] + veor q15,q12 + vshr.u64 d24,d22,#14 @ from NEON_00_15 + vadd.i64 q7,q14 + vshr.u64 d25,d22,#18 @ from NEON_00_15 + veor q15,q13 @ sigma0(X[i+1]) + vshr.u64 d26,d22,#41 @ from NEON_00_15 + vadd.i64 q7,q15 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d22,#50 + vsli.64 d25,d22,#46 + vmov d29,d22 + vsli.64 d26,d22,#23 +#if 30<16 && defined(__ARMEL__) + vrev64.8 , +#endif + veor d25,d24 + vbsl d29,d23,d16 @ Ch(e,f,g) + vshr.u64 d24,d18,#28 + veor d26,d25 @ Sigma1(e) + vadd.i64 d27,d29,d17 + vshr.u64 d25,d18,#34 + vsli.64 d24,d18,#36 + vadd.i64 d27,d26 + vshr.u64 d26,d18,#39 + vadd.i64 d28,d14 + vsli.64 d25,d18,#30 + veor d30,d18,d19 + vsli.64 d26,d18,#25 + veor d17,d24,d25 + vadd.i64 d27,d28 + vbsl d30,d20,d19 @ Maj(a,b,c) + veor d17,d26 @ Sigma0(a) + vadd.i64 d21,d27 + vadd.i64 d30,d27 + @ vadd.i64 d17,d30 + vshr.u64 d24,d21,#14 @ 31 +#if 31<16 + vld1.64 {d15},[r1]! @ handles unaligned +#endif + vshr.u64 d25,d21,#18 +#if 31>0 + vadd.i64 d17,d30 @ h+=Maj from the past +#endif + vshr.u64 d26,d21,#41 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d21,#50 + vsli.64 d25,d21,#46 + vmov d29,d21 + vsli.64 d26,d21,#23 +#if 31<16 && defined(__ARMEL__) + vrev64.8 , +#endif + veor d25,d24 + vbsl d29,d22,d23 @ Ch(e,f,g) + vshr.u64 d24,d17,#28 + veor d26,d25 @ Sigma1(e) + vadd.i64 d27,d29,d16 + vshr.u64 d25,d17,#34 + vsli.64 d24,d17,#36 + vadd.i64 d27,d26 + vshr.u64 d26,d17,#39 + vadd.i64 d28,d15 + vsli.64 d25,d17,#30 + veor d30,d17,d18 + vsli.64 d26,d17,#25 + veor d16,d24,d25 + vadd.i64 d27,d28 + vbsl d30,d19,d18 @ Maj(a,b,c) + veor d16,d26 @ Sigma0(a) + vadd.i64 d20,d27 + vadd.i64 d30,d27 + @ vadd.i64 d16,d30 + bne .L16_79_neon + + vadd.i64 d16,d30 @ h+=Maj from the past + vldmia r0,{d24-d31} @ load context to temp + vadd.i64 q8,q12 @ vectorized accumulate + vadd.i64 q9,q13 + vadd.i64 q10,q14 + vadd.i64 q11,q15 + vstmia r0,{d16-d23} @ save context + teq r1,r2 + sub r3,#640 @ rewind K512 + bne .Loop_neon + + vldmia sp!,{d8-d15} @ epilogue + bx lr @ .word 0xe12fff1e +#endif +.size sha512_block_data_order,.-sha512_block_data_order +.asciz "SHA512 block transform for ARMv4/NEON, CRYPTOGAMS by " +.align 2 +#if __ARM_MAX_ARCH__>=7 +.comm OPENSSL_armcap_P,4,4 +#endif diff --git a/deps/openssl/asm_obsolete/arm64-linux64-gas/aes/aesv8-armx.S b/deps/openssl/asm_obsolete/arm64-linux64-gas/aes/aesv8-armx.S new file mode 100644 index 00000000000000..0a4b1ac4c40082 --- /dev/null +++ b/deps/openssl/asm_obsolete/arm64-linux64-gas/aes/aesv8-armx.S @@ -0,0 +1,725 @@ +#include "arm_arch.h" + +#if __ARM_MAX_ARCH__>=7 +.text +.arch armv8-a+crypto +.align 5 +rcon: +.long 0x01,0x01,0x01,0x01 +.long 0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d // rotate-n-splat +.long 0x1b,0x1b,0x1b,0x1b + +.globl aes_v8_set_encrypt_key +.type aes_v8_set_encrypt_key,%function +.align 5 +aes_v8_set_encrypt_key: +.Lenc_key: + stp x29,x30,[sp,#-16]! + add x29,sp,#0 + mov x3,#-1 + cmp x0,#0 + b.eq .Lenc_key_abort + cmp x2,#0 + b.eq .Lenc_key_abort + mov x3,#-2 + cmp w1,#128 + b.lt .Lenc_key_abort + cmp w1,#256 + b.gt .Lenc_key_abort + tst w1,#0x3f + b.ne .Lenc_key_abort + + adr x3,rcon + cmp w1,#192 + + eor v0.16b,v0.16b,v0.16b + ld1 {v3.16b},[x0],#16 + mov w1,#8 // reuse w1 + ld1 {v1.4s,v2.4s},[x3],#32 + + b.lt .Loop128 + b.eq .L192 + b .L256 + +.align 4 +.Loop128: + tbl v6.16b,{v3.16b},v2.16b + ext v5.16b,v0.16b,v3.16b,#12 + st1 {v3.4s},[x2],#16 + aese v6.16b,v0.16b + subs w1,w1,#1 + + eor v3.16b,v3.16b,v5.16b + ext v5.16b,v0.16b,v5.16b,#12 + eor v3.16b,v3.16b,v5.16b + ext v5.16b,v0.16b,v5.16b,#12 + eor v6.16b,v6.16b,v1.16b + eor v3.16b,v3.16b,v5.16b + shl v1.16b,v1.16b,#1 + eor v3.16b,v3.16b,v6.16b + b.ne .Loop128 + + ld1 {v1.4s},[x3] + + tbl v6.16b,{v3.16b},v2.16b + ext v5.16b,v0.16b,v3.16b,#12 + st1 {v3.4s},[x2],#16 + aese v6.16b,v0.16b + + eor v3.16b,v3.16b,v5.16b + ext v5.16b,v0.16b,v5.16b,#12 + eor v3.16b,v3.16b,v5.16b + ext v5.16b,v0.16b,v5.16b,#12 + eor v6.16b,v6.16b,v1.16b + eor v3.16b,v3.16b,v5.16b + shl v1.16b,v1.16b,#1 + eor v3.16b,v3.16b,v6.16b + + tbl v6.16b,{v3.16b},v2.16b + ext v5.16b,v0.16b,v3.16b,#12 + st1 {v3.4s},[x2],#16 + aese v6.16b,v0.16b + + eor v3.16b,v3.16b,v5.16b + ext v5.16b,v0.16b,v5.16b,#12 + eor v3.16b,v3.16b,v5.16b + ext v5.16b,v0.16b,v5.16b,#12 + eor v6.16b,v6.16b,v1.16b + eor v3.16b,v3.16b,v5.16b + eor v3.16b,v3.16b,v6.16b + st1 {v3.4s},[x2] + add x2,x2,#0x50 + + mov w12,#10 + b .Ldone + +.align 4 +.L192: + ld1 {v4.8b},[x0],#8 + movi v6.16b,#8 // borrow v6.16b + st1 {v3.4s},[x2],#16 + sub v2.16b,v2.16b,v6.16b // adjust the mask + +.Loop192: + tbl v6.16b,{v4.16b},v2.16b + ext v5.16b,v0.16b,v3.16b,#12 + st1 {v4.8b},[x2],#8 + aese v6.16b,v0.16b + subs w1,w1,#1 + + eor v3.16b,v3.16b,v5.16b + ext v5.16b,v0.16b,v5.16b,#12 + eor v3.16b,v3.16b,v5.16b + ext v5.16b,v0.16b,v5.16b,#12 + eor v3.16b,v3.16b,v5.16b + + dup v5.4s,v3.s[3] + eor v5.16b,v5.16b,v4.16b + eor v6.16b,v6.16b,v1.16b + ext v4.16b,v0.16b,v4.16b,#12 + shl v1.16b,v1.16b,#1 + eor v4.16b,v4.16b,v5.16b + eor v3.16b,v3.16b,v6.16b + eor v4.16b,v4.16b,v6.16b + st1 {v3.4s},[x2],#16 + b.ne .Loop192 + + mov w12,#12 + add x2,x2,#0x20 + b .Ldone + +.align 4 +.L256: + ld1 {v4.16b},[x0] + mov w1,#7 + mov w12,#14 + st1 {v3.4s},[x2],#16 + +.Loop256: + tbl v6.16b,{v4.16b},v2.16b + ext v5.16b,v0.16b,v3.16b,#12 + st1 {v4.4s},[x2],#16 + aese v6.16b,v0.16b + subs w1,w1,#1 + + eor v3.16b,v3.16b,v5.16b + ext v5.16b,v0.16b,v5.16b,#12 + eor v3.16b,v3.16b,v5.16b + ext v5.16b,v0.16b,v5.16b,#12 + eor v6.16b,v6.16b,v1.16b + eor v3.16b,v3.16b,v5.16b + shl v1.16b,v1.16b,#1 + eor v3.16b,v3.16b,v6.16b + st1 {v3.4s},[x2],#16 + b.eq .Ldone + + dup v6.4s,v3.s[3] // just splat + ext v5.16b,v0.16b,v4.16b,#12 + aese v6.16b,v0.16b + + eor v4.16b,v4.16b,v5.16b + ext v5.16b,v0.16b,v5.16b,#12 + eor v4.16b,v4.16b,v5.16b + ext v5.16b,v0.16b,v5.16b,#12 + eor v4.16b,v4.16b,v5.16b + + eor v4.16b,v4.16b,v6.16b + b .Loop256 + +.Ldone: + str w12,[x2] + mov x3,#0 + +.Lenc_key_abort: + mov x0,x3 // return value + ldr x29,[sp],#16 + ret +.size aes_v8_set_encrypt_key,.-aes_v8_set_encrypt_key + +.globl aes_v8_set_decrypt_key +.type aes_v8_set_decrypt_key,%function +.align 5 +aes_v8_set_decrypt_key: + stp x29,x30,[sp,#-16]! + add x29,sp,#0 + bl .Lenc_key + + cmp x0,#0 + b.ne .Ldec_key_abort + + sub x2,x2,#240 // restore original x2 + mov x4,#-16 + add x0,x2,x12,lsl#4 // end of key schedule + + ld1 {v0.4s},[x2] + ld1 {v1.4s},[x0] + st1 {v0.4s},[x0],x4 + st1 {v1.4s},[x2],#16 + +.Loop_imc: + ld1 {v0.4s},[x2] + ld1 {v1.4s},[x0] + aesimc v0.16b,v0.16b + aesimc v1.16b,v1.16b + st1 {v0.4s},[x0],x4 + st1 {v1.4s},[x2],#16 + cmp x0,x2 + b.hi .Loop_imc + + ld1 {v0.4s},[x2] + aesimc v0.16b,v0.16b + st1 {v0.4s},[x0] + + eor x0,x0,x0 // return value +.Ldec_key_abort: + ldp x29,x30,[sp],#16 + ret +.size aes_v8_set_decrypt_key,.-aes_v8_set_decrypt_key +.globl aes_v8_encrypt +.type aes_v8_encrypt,%function +.align 5 +aes_v8_encrypt: + ldr w3,[x2,#240] + ld1 {v0.4s},[x2],#16 + ld1 {v2.16b},[x0] + sub w3,w3,#2 + ld1 {v1.4s},[x2],#16 + +.Loop_enc: + aese v2.16b,v0.16b + ld1 {v0.4s},[x2],#16 + aesmc v2.16b,v2.16b + subs w3,w3,#2 + aese v2.16b,v1.16b + ld1 {v1.4s},[x2],#16 + aesmc v2.16b,v2.16b + b.gt .Loop_enc + + aese v2.16b,v0.16b + ld1 {v0.4s},[x2] + aesmc v2.16b,v2.16b + aese v2.16b,v1.16b + eor v2.16b,v2.16b,v0.16b + + st1 {v2.16b},[x1] + ret +.size aes_v8_encrypt,.-aes_v8_encrypt +.globl aes_v8_decrypt +.type aes_v8_decrypt,%function +.align 5 +aes_v8_decrypt: + ldr w3,[x2,#240] + ld1 {v0.4s},[x2],#16 + ld1 {v2.16b},[x0] + sub w3,w3,#2 + ld1 {v1.4s},[x2],#16 + +.Loop_dec: + aesd v2.16b,v0.16b + ld1 {v0.4s},[x2],#16 + aesimc v2.16b,v2.16b + subs w3,w3,#2 + aesd v2.16b,v1.16b + ld1 {v1.4s},[x2],#16 + aesimc v2.16b,v2.16b + b.gt .Loop_dec + + aesd v2.16b,v0.16b + ld1 {v0.4s},[x2] + aesimc v2.16b,v2.16b + aesd v2.16b,v1.16b + eor v2.16b,v2.16b,v0.16b + + st1 {v2.16b},[x1] + ret +.size aes_v8_decrypt,.-aes_v8_decrypt +.globl aes_v8_cbc_encrypt +.type aes_v8_cbc_encrypt,%function +.align 5 +aes_v8_cbc_encrypt: + stp x29,x30,[sp,#-16]! + add x29,sp,#0 + subs x2,x2,#16 + mov x8,#16 + b.lo .Lcbc_abort + csel x8,xzr,x8,eq + + cmp w5,#0 // en- or decrypting? + ldr w5,[x3,#240] + and x2,x2,#-16 + ld1 {v6.16b},[x4] + ld1 {v0.16b},[x0],x8 + + ld1 {v16.4s-v17.4s},[x3] // load key schedule... + sub w5,w5,#6 + add x7,x3,x5,lsl#4 // pointer to last 7 round keys + sub w5,w5,#2 + ld1 {v18.4s-v19.4s},[x7],#32 + ld1 {v20.4s-v21.4s},[x7],#32 + ld1 {v22.4s-v23.4s},[x7],#32 + ld1 {v7.4s},[x7] + + add x7,x3,#32 + mov w6,w5 + b.eq .Lcbc_dec + + cmp w5,#2 + eor v0.16b,v0.16b,v6.16b + eor v5.16b,v16.16b,v7.16b + b.eq .Lcbc_enc128 + +.Loop_cbc_enc: + aese v0.16b,v16.16b + ld1 {v16.4s},[x7],#16 + aesmc v0.16b,v0.16b + subs w6,w6,#2 + aese v0.16b,v17.16b + ld1 {v17.4s},[x7],#16 + aesmc v0.16b,v0.16b + b.gt .Loop_cbc_enc + + aese v0.16b,v16.16b + aesmc v0.16b,v0.16b + subs x2,x2,#16 + aese v0.16b,v17.16b + aesmc v0.16b,v0.16b + csel x8,xzr,x8,eq + aese v0.16b,v18.16b + aesmc v0.16b,v0.16b + add x7,x3,#16 + aese v0.16b,v19.16b + aesmc v0.16b,v0.16b + ld1 {v16.16b},[x0],x8 + aese v0.16b,v20.16b + aesmc v0.16b,v0.16b + eor v16.16b,v16.16b,v5.16b + aese v0.16b,v21.16b + aesmc v0.16b,v0.16b + ld1 {v17.4s},[x7],#16 // re-pre-load rndkey[1] + aese v0.16b,v22.16b + aesmc v0.16b,v0.16b + aese v0.16b,v23.16b + + mov w6,w5 + eor v6.16b,v0.16b,v7.16b + st1 {v6.16b},[x1],#16 + b.hs .Loop_cbc_enc + + b .Lcbc_done + +.align 5 +.Lcbc_enc128: + ld1 {v2.4s-v3.4s},[x7] + aese v0.16b,v16.16b + aesmc v0.16b,v0.16b + b .Lenter_cbc_enc128 +.Loop_cbc_enc128: + aese v0.16b,v16.16b + aesmc v0.16b,v0.16b + st1 {v6.16b},[x1],#16 +.Lenter_cbc_enc128: + aese v0.16b,v17.16b + aesmc v0.16b,v0.16b + subs x2,x2,#16 + aese v0.16b,v2.16b + aesmc v0.16b,v0.16b + csel x8,xzr,x8,eq + aese v0.16b,v3.16b + aesmc v0.16b,v0.16b + aese v0.16b,v18.16b + aesmc v0.16b,v0.16b + aese v0.16b,v19.16b + aesmc v0.16b,v0.16b + ld1 {v16.16b},[x0],x8 + aese v0.16b,v20.16b + aesmc v0.16b,v0.16b + aese v0.16b,v21.16b + aesmc v0.16b,v0.16b + aese v0.16b,v22.16b + aesmc v0.16b,v0.16b + eor v16.16b,v16.16b,v5.16b + aese v0.16b,v23.16b + eor v6.16b,v0.16b,v7.16b + b.hs .Loop_cbc_enc128 + + st1 {v6.16b},[x1],#16 + b .Lcbc_done +.align 5 +.Lcbc_dec: + ld1 {v18.16b},[x0],#16 + subs x2,x2,#32 // bias + add w6,w5,#2 + orr v3.16b,v0.16b,v0.16b + orr v1.16b,v0.16b,v0.16b + orr v19.16b,v18.16b,v18.16b + b.lo .Lcbc_dec_tail + + orr v1.16b,v18.16b,v18.16b + ld1 {v18.16b},[x0],#16 + orr v2.16b,v0.16b,v0.16b + orr v3.16b,v1.16b,v1.16b + orr v19.16b,v18.16b,v18.16b + +.Loop3x_cbc_dec: + aesd v0.16b,v16.16b + aesd v1.16b,v16.16b + aesd v18.16b,v16.16b + ld1 {v16.4s},[x7],#16 + aesimc v0.16b,v0.16b + aesimc v1.16b,v1.16b + aesimc v18.16b,v18.16b + subs w6,w6,#2 + aesd v0.16b,v17.16b + aesd v1.16b,v17.16b + aesd v18.16b,v17.16b + ld1 {v17.4s},[x7],#16 + aesimc v0.16b,v0.16b + aesimc v1.16b,v1.16b + aesimc v18.16b,v18.16b + b.gt .Loop3x_cbc_dec + + aesd v0.16b,v16.16b + aesd v1.16b,v16.16b + aesd v18.16b,v16.16b + eor v4.16b,v6.16b,v7.16b + aesimc v0.16b,v0.16b + aesimc v1.16b,v1.16b + aesimc v18.16b,v18.16b + eor v5.16b,v2.16b,v7.16b + aesd v0.16b,v17.16b + aesd v1.16b,v17.16b + aesd v18.16b,v17.16b + eor v17.16b,v3.16b,v7.16b + subs x2,x2,#0x30 + aesimc v0.16b,v0.16b + aesimc v1.16b,v1.16b + aesimc v18.16b,v18.16b + orr v6.16b,v19.16b,v19.16b + csel x6,x2,x6,lo // x6, w6, is zero at this point + aesd v0.16b,v20.16b + aesd v1.16b,v20.16b + aesd v18.16b,v20.16b + add x0,x0,x6 // x0 is adjusted in such way that + // at exit from the loop v1.16b-v18.16b + // are loaded with last "words" + aesimc v0.16b,v0.16b + aesimc v1.16b,v1.16b + aesimc v18.16b,v18.16b + mov x7,x3 + aesd v0.16b,v21.16b + aesd v1.16b,v21.16b + aesd v18.16b,v21.16b + ld1 {v2.16b},[x0],#16 + aesimc v0.16b,v0.16b + aesimc v1.16b,v1.16b + aesimc v18.16b,v18.16b + ld1 {v3.16b},[x0],#16 + aesd v0.16b,v22.16b + aesd v1.16b,v22.16b + aesd v18.16b,v22.16b + ld1 {v19.16b},[x0],#16 + aesimc v0.16b,v0.16b + aesimc v1.16b,v1.16b + aesimc v18.16b,v18.16b + ld1 {v16.4s},[x7],#16 // re-pre-load rndkey[0] + aesd v0.16b,v23.16b + aesd v1.16b,v23.16b + aesd v18.16b,v23.16b + + add w6,w5,#2 + eor v4.16b,v4.16b,v0.16b + eor v5.16b,v5.16b,v1.16b + eor v18.16b,v18.16b,v17.16b + ld1 {v17.4s},[x7],#16 // re-pre-load rndkey[1] + orr v0.16b,v2.16b,v2.16b + st1 {v4.16b},[x1],#16 + orr v1.16b,v3.16b,v3.16b + st1 {v5.16b},[x1],#16 + st1 {v18.16b},[x1],#16 + orr v18.16b,v19.16b,v19.16b + b.hs .Loop3x_cbc_dec + + cmn x2,#0x30 + b.eq .Lcbc_done + nop + +.Lcbc_dec_tail: + aesd v1.16b,v16.16b + aesd v18.16b,v16.16b + ld1 {v16.4s},[x7],#16 + aesimc v1.16b,v1.16b + aesimc v18.16b,v18.16b + subs w6,w6,#2 + aesd v1.16b,v17.16b + aesd v18.16b,v17.16b + ld1 {v17.4s},[x7],#16 + aesimc v1.16b,v1.16b + aesimc v18.16b,v18.16b + b.gt .Lcbc_dec_tail + + aesd v1.16b,v16.16b + aesd v18.16b,v16.16b + aesimc v1.16b,v1.16b + aesimc v18.16b,v18.16b + aesd v1.16b,v17.16b + aesd v18.16b,v17.16b + aesimc v1.16b,v1.16b + aesimc v18.16b,v18.16b + aesd v1.16b,v20.16b + aesd v18.16b,v20.16b + aesimc v1.16b,v1.16b + aesimc v18.16b,v18.16b + cmn x2,#0x20 + aesd v1.16b,v21.16b + aesd v18.16b,v21.16b + aesimc v1.16b,v1.16b + aesimc v18.16b,v18.16b + eor v5.16b,v6.16b,v7.16b + aesd v1.16b,v22.16b + aesd v18.16b,v22.16b + aesimc v1.16b,v1.16b + aesimc v18.16b,v18.16b + eor v17.16b,v3.16b,v7.16b + aesd v1.16b,v23.16b + aesd v18.16b,v23.16b + b.eq .Lcbc_dec_one + eor v5.16b,v5.16b,v1.16b + eor v17.16b,v17.16b,v18.16b + orr v6.16b,v19.16b,v19.16b + st1 {v5.16b},[x1],#16 + st1 {v17.16b},[x1],#16 + b .Lcbc_done + +.Lcbc_dec_one: + eor v5.16b,v5.16b,v18.16b + orr v6.16b,v19.16b,v19.16b + st1 {v5.16b},[x1],#16 + +.Lcbc_done: + st1 {v6.16b},[x4] +.Lcbc_abort: + ldr x29,[sp],#16 + ret +.size aes_v8_cbc_encrypt,.-aes_v8_cbc_encrypt +.globl aes_v8_ctr32_encrypt_blocks +.type aes_v8_ctr32_encrypt_blocks,%function +.align 5 +aes_v8_ctr32_encrypt_blocks: + stp x29,x30,[sp,#-16]! + add x29,sp,#0 + ldr w5,[x3,#240] + + ldr w8, [x4, #12] + ld1 {v0.4s},[x4] + + ld1 {v16.4s-v17.4s},[x3] // load key schedule... + sub w5,w5,#4 + mov x12,#16 + cmp x2,#2 + add x7,x3,x5,lsl#4 // pointer to last 5 round keys + sub w5,w5,#2 + ld1 {v20.4s-v21.4s},[x7],#32 + ld1 {v22.4s-v23.4s},[x7],#32 + ld1 {v7.4s},[x7] + add x7,x3,#32 + mov w6,w5 + csel x12,xzr,x12,lo +#ifndef __ARMEB__ + rev w8, w8 +#endif + orr v1.16b,v0.16b,v0.16b + add w10, w8, #1 + orr v18.16b,v0.16b,v0.16b + add w8, w8, #2 + orr v6.16b,v0.16b,v0.16b + rev w10, w10 + mov v1.s[3],w10 + b.ls .Lctr32_tail + rev w12, w8 + sub x2,x2,#3 // bias + mov v18.s[3],w12 + b .Loop3x_ctr32 + +.align 4 +.Loop3x_ctr32: + aese v0.16b,v16.16b + aese v1.16b,v16.16b + aese v18.16b,v16.16b + ld1 {v16.4s},[x7],#16 + aesmc v0.16b,v0.16b + aesmc v1.16b,v1.16b + aesmc v18.16b,v18.16b + subs w6,w6,#2 + aese v0.16b,v17.16b + aese v1.16b,v17.16b + aese v18.16b,v17.16b + ld1 {v17.4s},[x7],#16 + aesmc v0.16b,v0.16b + aesmc v1.16b,v1.16b + aesmc v18.16b,v18.16b + b.gt .Loop3x_ctr32 + + aese v0.16b,v16.16b + aese v1.16b,v16.16b + aese v18.16b,v16.16b + mov x7,x3 + aesmc v4.16b,v0.16b + ld1 {v2.16b},[x0],#16 + aesmc v5.16b,v1.16b + aesmc v18.16b,v18.16b + orr v0.16b,v6.16b,v6.16b + aese v4.16b,v17.16b + ld1 {v3.16b},[x0],#16 + aese v5.16b,v17.16b + aese v18.16b,v17.16b + orr v1.16b,v6.16b,v6.16b + aesmc v4.16b,v4.16b + ld1 {v19.16b},[x0],#16 + aesmc v5.16b,v5.16b + aesmc v17.16b,v18.16b + orr v18.16b,v6.16b,v6.16b + add w9,w8,#1 + aese v4.16b,v20.16b + aese v5.16b,v20.16b + aese v17.16b,v20.16b + eor v2.16b,v2.16b,v7.16b + add w10,w8,#2 + aesmc v4.16b,v4.16b + aesmc v5.16b,v5.16b + aesmc v17.16b,v17.16b + eor v3.16b,v3.16b,v7.16b + add w8,w8,#3 + aese v4.16b,v21.16b + aese v5.16b,v21.16b + aese v17.16b,v21.16b + eor v19.16b,v19.16b,v7.16b + rev w9,w9 + aesmc v4.16b,v4.16b + ld1 {v16.4s},[x7],#16 // re-pre-load rndkey[0] + aesmc v5.16b,v5.16b + aesmc v17.16b,v17.16b + mov v0.s[3], w9 + rev w10,w10 + aese v4.16b,v22.16b + aese v5.16b,v22.16b + aese v17.16b,v22.16b + mov v1.s[3], w10 + rev w12,w8 + aesmc v4.16b,v4.16b + aesmc v5.16b,v5.16b + aesmc v17.16b,v17.16b + mov v18.s[3], w12 + subs x2,x2,#3 + aese v4.16b,v23.16b + aese v5.16b,v23.16b + aese v17.16b,v23.16b + + mov w6,w5 + eor v2.16b,v2.16b,v4.16b + eor v3.16b,v3.16b,v5.16b + eor v19.16b,v19.16b,v17.16b + ld1 {v17.4s},[x7],#16 // re-pre-load rndkey[1] + st1 {v2.16b},[x1],#16 + st1 {v3.16b},[x1],#16 + st1 {v19.16b},[x1],#16 + b.hs .Loop3x_ctr32 + + adds x2,x2,#3 + b.eq .Lctr32_done + cmp x2,#1 + mov x12,#16 + csel x12,xzr,x12,eq + +.Lctr32_tail: + aese v0.16b,v16.16b + aese v1.16b,v16.16b + ld1 {v16.4s},[x7],#16 + aesmc v0.16b,v0.16b + aesmc v1.16b,v1.16b + subs w6,w6,#2 + aese v0.16b,v17.16b + aese v1.16b,v17.16b + ld1 {v17.4s},[x7],#16 + aesmc v0.16b,v0.16b + aesmc v1.16b,v1.16b + b.gt .Lctr32_tail + + aese v0.16b,v16.16b + aese v1.16b,v16.16b + aesmc v0.16b,v0.16b + aesmc v1.16b,v1.16b + aese v0.16b,v17.16b + aese v1.16b,v17.16b + aesmc v0.16b,v0.16b + aesmc v1.16b,v1.16b + ld1 {v2.16b},[x0],x12 + aese v0.16b,v20.16b + aese v1.16b,v20.16b + ld1 {v3.16b},[x0] + aesmc v0.16b,v0.16b + aesmc v1.16b,v1.16b + aese v0.16b,v21.16b + aese v1.16b,v21.16b + aesmc v0.16b,v0.16b + aesmc v1.16b,v1.16b + aese v0.16b,v22.16b + aese v1.16b,v22.16b + eor v2.16b,v2.16b,v7.16b + aesmc v0.16b,v0.16b + aesmc v1.16b,v1.16b + eor v3.16b,v3.16b,v7.16b + aese v0.16b,v23.16b + aese v1.16b,v23.16b + + cmp x2,#1 + eor v2.16b,v2.16b,v0.16b + eor v3.16b,v3.16b,v1.16b + st1 {v2.16b},[x1],#16 + b.eq .Lctr32_done + st1 {v3.16b},[x1] + +.Lctr32_done: + ldr x29,[sp],#16 + ret +.size aes_v8_ctr32_encrypt_blocks,.-aes_v8_ctr32_encrypt_blocks +#endif diff --git a/deps/openssl/asm_obsolete/arm64-linux64-gas/modes/ghashv8-armx.S b/deps/openssl/asm_obsolete/arm64-linux64-gas/modes/ghashv8-armx.S new file mode 100644 index 00000000000000..1bfb26340a6e9e --- /dev/null +++ b/deps/openssl/asm_obsolete/arm64-linux64-gas/modes/ghashv8-armx.S @@ -0,0 +1,115 @@ +#include "arm_arch.h" + +.text +.arch armv8-a+crypto +.global gcm_init_v8 +.type gcm_init_v8,%function +.align 4 +gcm_init_v8: + ld1 {v17.2d},[x1] //load H + movi v16.16b,#0xe1 + ext v3.16b,v17.16b,v17.16b,#8 + shl v16.2d,v16.2d,#57 + ushr v18.2d,v16.2d,#63 + ext v16.16b,v18.16b,v16.16b,#8 //t0=0xc2....01 + dup v17.4s,v17.s[1] + ushr v19.2d,v3.2d,#63 + sshr v17.4s,v17.4s,#31 //broadcast carry bit + and v19.16b,v19.16b,v16.16b + shl v3.2d,v3.2d,#1 + ext v19.16b,v19.16b,v19.16b,#8 + and v16.16b,v16.16b,v17.16b + orr v3.16b,v3.16b,v19.16b //H<<<=1 + eor v3.16b,v3.16b,v16.16b //twisted H + st1 {v3.2d},[x0] + + ret +.size gcm_init_v8,.-gcm_init_v8 + +.global gcm_gmult_v8 +.type gcm_gmult_v8,%function +.align 4 +gcm_gmult_v8: + ld1 {v17.2d},[x0] //load Xi + movi v19.16b,#0xe1 + ld1 {v20.2d},[x1] //load twisted H + shl v19.2d,v19.2d,#57 +#ifndef __ARMEB__ + rev64 v17.16b,v17.16b +#endif + ext v21.16b,v20.16b,v20.16b,#8 + mov x3,#0 + ext v3.16b,v17.16b,v17.16b,#8 + mov x12,#0 + eor v21.16b,v21.16b,v20.16b //Karatsuba pre-processing + mov x2,x0 + b .Lgmult_v8 +.size gcm_gmult_v8,.-gcm_gmult_v8 + +.global gcm_ghash_v8 +.type gcm_ghash_v8,%function +.align 4 +gcm_ghash_v8: + ld1 {v0.2d},[x0] //load [rotated] Xi + subs x3,x3,#16 + movi v19.16b,#0xe1 + mov x12,#16 + ld1 {v20.2d},[x1] //load twisted H + csel x12,xzr,x12,eq + ext v0.16b,v0.16b,v0.16b,#8 + shl v19.2d,v19.2d,#57 + ld1 {v17.2d},[x2],x12 //load [rotated] inp + ext v21.16b,v20.16b,v20.16b,#8 +#ifndef __ARMEB__ + rev64 v0.16b,v0.16b + rev64 v17.16b,v17.16b +#endif + eor v21.16b,v21.16b,v20.16b //Karatsuba pre-processing + ext v3.16b,v17.16b,v17.16b,#8 + b .Loop_v8 + +.align 4 +.Loop_v8: + ext v18.16b,v0.16b,v0.16b,#8 + eor v3.16b,v3.16b,v0.16b //inp^=Xi + eor v17.16b,v17.16b,v18.16b //v17.16b is rotated inp^Xi + +.Lgmult_v8: + pmull v0.1q,v20.1d,v3.1d //H.lo·Xi.lo + eor v17.16b,v17.16b,v3.16b //Karatsuba pre-processing + pmull2 v2.1q,v20.2d,v3.2d //H.hi·Xi.hi + subs x3,x3,#16 + pmull v1.1q,v21.1d,v17.1d //(H.lo+H.hi)·(Xi.lo+Xi.hi) + csel x12,xzr,x12,eq + + ext v17.16b,v0.16b,v2.16b,#8 //Karatsuba post-processing + eor v18.16b,v0.16b,v2.16b + eor v1.16b,v1.16b,v17.16b + ld1 {v17.2d},[x2],x12 //load [rotated] inp + eor v1.16b,v1.16b,v18.16b + pmull v18.1q,v0.1d,v19.1d //1st phase + + ins v2.d[0],v1.d[1] + ins v1.d[1],v0.d[0] +#ifndef __ARMEB__ + rev64 v17.16b,v17.16b +#endif + eor v0.16b,v1.16b,v18.16b + ext v3.16b,v17.16b,v17.16b,#8 + + ext v18.16b,v0.16b,v0.16b,#8 //2nd phase + pmull v0.1q,v0.1d,v19.1d + eor v18.16b,v18.16b,v2.16b + eor v0.16b,v0.16b,v18.16b + b.hs .Loop_v8 + +#ifndef __ARMEB__ + rev64 v0.16b,v0.16b +#endif + ext v0.16b,v0.16b,v0.16b,#8 + st1 {v0.2d},[x0] //write out Xi + + ret +.size gcm_ghash_v8,.-gcm_ghash_v8 +.asciz "GHASH for ARMv8, CRYPTOGAMS by " +.align 2 diff --git a/deps/openssl/asm_obsolete/arm64-linux64-gas/sha/sha1-armv8.S b/deps/openssl/asm_obsolete/arm64-linux64-gas/sha/sha1-armv8.S new file mode 100644 index 00000000000000..f9d126252ef2cd --- /dev/null +++ b/deps/openssl/asm_obsolete/arm64-linux64-gas/sha/sha1-armv8.S @@ -0,0 +1,1211 @@ +#include "arm_arch.h" + +.text + +.globl sha1_block_data_order +.type sha1_block_data_order,%function +.align 6 +sha1_block_data_order: + ldr x16,.LOPENSSL_armcap_P + adr x17,.LOPENSSL_armcap_P + add x16,x16,x17 + ldr w16,[x16] + tst w16,#ARMV8_SHA1 + b.ne .Lv8_entry + + stp x29,x30,[sp,#-96]! + add x29,sp,#0 + stp x19,x20,[sp,#16] + stp x21,x22,[sp,#32] + stp x23,x24,[sp,#48] + stp x25,x26,[sp,#64] + stp x27,x28,[sp,#80] + + ldp w20,w21,[x0] + ldp w22,w23,[x0,#8] + ldr w24,[x0,#16] + +.Loop: + ldr x3,[x1],#64 + movz w28,#0x7999 + sub x2,x2,#1 + movk w28,#0x5a82,lsl#16 +#ifdef __ARMEB__ + ror x3,x3,#32 +#else + rev32 x3,x3 +#endif + add w24,w24,w28 // warm it up + add w24,w24,w3 + lsr x4,x3,#32 + ldr x5,[x1,#-56] + bic w25,w23,w21 + and w26,w22,w21 + ror w27,w20,#27 + add w23,w23,w28 // future e+=K + orr w25,w25,w26 + add w24,w24,w27 // e+=rot(a,5) + ror w21,w21,#2 + add w23,w23,w4 // future e+=X[i] + add w24,w24,w25 // e+=F(b,c,d) +#ifdef __ARMEB__ + ror x5,x5,#32 +#else + rev32 x5,x5 +#endif + bic w25,w22,w20 + and w26,w21,w20 + ror w27,w24,#27 + add w22,w22,w28 // future e+=K + orr w25,w25,w26 + add w23,w23,w27 // e+=rot(a,5) + ror w20,w20,#2 + add w22,w22,w5 // future e+=X[i] + add w23,w23,w25 // e+=F(b,c,d) + lsr x6,x5,#32 + ldr x7,[x1,#-48] + bic w25,w21,w24 + and w26,w20,w24 + ror w27,w23,#27 + add w21,w21,w28 // future e+=K + orr w25,w25,w26 + add w22,w22,w27 // e+=rot(a,5) + ror w24,w24,#2 + add w21,w21,w6 // future e+=X[i] + add w22,w22,w25 // e+=F(b,c,d) +#ifdef __ARMEB__ + ror x7,x7,#32 +#else + rev32 x7,x7 +#endif + bic w25,w20,w23 + and w26,w24,w23 + ror w27,w22,#27 + add w20,w20,w28 // future e+=K + orr w25,w25,w26 + add w21,w21,w27 // e+=rot(a,5) + ror w23,w23,#2 + add w20,w20,w7 // future e+=X[i] + add w21,w21,w25 // e+=F(b,c,d) + lsr x8,x7,#32 + ldr x9,[x1,#-40] + bic w25,w24,w22 + and w26,w23,w22 + ror w27,w21,#27 + add w24,w24,w28 // future e+=K + orr w25,w25,w26 + add w20,w20,w27 // e+=rot(a,5) + ror w22,w22,#2 + add w24,w24,w8 // future e+=X[i] + add w20,w20,w25 // e+=F(b,c,d) +#ifdef __ARMEB__ + ror x9,x9,#32 +#else + rev32 x9,x9 +#endif + bic w25,w23,w21 + and w26,w22,w21 + ror w27,w20,#27 + add w23,w23,w28 // future e+=K + orr w25,w25,w26 + add w24,w24,w27 // e+=rot(a,5) + ror w21,w21,#2 + add w23,w23,w9 // future e+=X[i] + add w24,w24,w25 // e+=F(b,c,d) + lsr x10,x9,#32 + ldr x11,[x1,#-32] + bic w25,w22,w20 + and w26,w21,w20 + ror w27,w24,#27 + add w22,w22,w28 // future e+=K + orr w25,w25,w26 + add w23,w23,w27 // e+=rot(a,5) + ror w20,w20,#2 + add w22,w22,w10 // future e+=X[i] + add w23,w23,w25 // e+=F(b,c,d) +#ifdef __ARMEB__ + ror x11,x11,#32 +#else + rev32 x11,x11 +#endif + bic w25,w21,w24 + and w26,w20,w24 + ror w27,w23,#27 + add w21,w21,w28 // future e+=K + orr w25,w25,w26 + add w22,w22,w27 // e+=rot(a,5) + ror w24,w24,#2 + add w21,w21,w11 // future e+=X[i] + add w22,w22,w25 // e+=F(b,c,d) + lsr x12,x11,#32 + ldr x13,[x1,#-24] + bic w25,w20,w23 + and w26,w24,w23 + ror w27,w22,#27 + add w20,w20,w28 // future e+=K + orr w25,w25,w26 + add w21,w21,w27 // e+=rot(a,5) + ror w23,w23,#2 + add w20,w20,w12 // future e+=X[i] + add w21,w21,w25 // e+=F(b,c,d) +#ifdef __ARMEB__ + ror x13,x13,#32 +#else + rev32 x13,x13 +#endif + bic w25,w24,w22 + and w26,w23,w22 + ror w27,w21,#27 + add w24,w24,w28 // future e+=K + orr w25,w25,w26 + add w20,w20,w27 // e+=rot(a,5) + ror w22,w22,#2 + add w24,w24,w13 // future e+=X[i] + add w20,w20,w25 // e+=F(b,c,d) + lsr x14,x13,#32 + ldr x15,[x1,#-16] + bic w25,w23,w21 + and w26,w22,w21 + ror w27,w20,#27 + add w23,w23,w28 // future e+=K + orr w25,w25,w26 + add w24,w24,w27 // e+=rot(a,5) + ror w21,w21,#2 + add w23,w23,w14 // future e+=X[i] + add w24,w24,w25 // e+=F(b,c,d) +#ifdef __ARMEB__ + ror x15,x15,#32 +#else + rev32 x15,x15 +#endif + bic w25,w22,w20 + and w26,w21,w20 + ror w27,w24,#27 + add w22,w22,w28 // future e+=K + orr w25,w25,w26 + add w23,w23,w27 // e+=rot(a,5) + ror w20,w20,#2 + add w22,w22,w15 // future e+=X[i] + add w23,w23,w25 // e+=F(b,c,d) + lsr x16,x15,#32 + ldr x17,[x1,#-8] + bic w25,w21,w24 + and w26,w20,w24 + ror w27,w23,#27 + add w21,w21,w28 // future e+=K + orr w25,w25,w26 + add w22,w22,w27 // e+=rot(a,5) + ror w24,w24,#2 + add w21,w21,w16 // future e+=X[i] + add w22,w22,w25 // e+=F(b,c,d) +#ifdef __ARMEB__ + ror x17,x17,#32 +#else + rev32 x17,x17 +#endif + bic w25,w20,w23 + and w26,w24,w23 + ror w27,w22,#27 + add w20,w20,w28 // future e+=K + orr w25,w25,w26 + add w21,w21,w27 // e+=rot(a,5) + ror w23,w23,#2 + add w20,w20,w17 // future e+=X[i] + add w21,w21,w25 // e+=F(b,c,d) + lsr x19,x17,#32 + eor w3,w3,w5 + bic w25,w24,w22 + and w26,w23,w22 + ror w27,w21,#27 + eor w3,w3,w11 + add w24,w24,w28 // future e+=K + orr w25,w25,w26 + add w20,w20,w27 // e+=rot(a,5) + eor w3,w3,w16 + ror w22,w22,#2 + add w24,w24,w19 // future e+=X[i] + add w20,w20,w25 // e+=F(b,c,d) + ror w3,w3,#31 + eor w4,w4,w6 + bic w25,w23,w21 + and w26,w22,w21 + ror w27,w20,#27 + eor w4,w4,w12 + add w23,w23,w28 // future e+=K + orr w25,w25,w26 + add w24,w24,w27 // e+=rot(a,5) + eor w4,w4,w17 + ror w21,w21,#2 + add w23,w23,w3 // future e+=X[i] + add w24,w24,w25 // e+=F(b,c,d) + ror w4,w4,#31 + eor w5,w5,w7 + bic w25,w22,w20 + and w26,w21,w20 + ror w27,w24,#27 + eor w5,w5,w13 + add w22,w22,w28 // future e+=K + orr w25,w25,w26 + add w23,w23,w27 // e+=rot(a,5) + eor w5,w5,w19 + ror w20,w20,#2 + add w22,w22,w4 // future e+=X[i] + add w23,w23,w25 // e+=F(b,c,d) + ror w5,w5,#31 + eor w6,w6,w8 + bic w25,w21,w24 + and w26,w20,w24 + ror w27,w23,#27 + eor w6,w6,w14 + add w21,w21,w28 // future e+=K + orr w25,w25,w26 + add w22,w22,w27 // e+=rot(a,5) + eor w6,w6,w3 + ror w24,w24,#2 + add w21,w21,w5 // future e+=X[i] + add w22,w22,w25 // e+=F(b,c,d) + ror w6,w6,#31 + eor w7,w7,w9 + bic w25,w20,w23 + and w26,w24,w23 + ror w27,w22,#27 + eor w7,w7,w15 + add w20,w20,w28 // future e+=K + orr w25,w25,w26 + add w21,w21,w27 // e+=rot(a,5) + eor w7,w7,w4 + ror w23,w23,#2 + add w20,w20,w6 // future e+=X[i] + add w21,w21,w25 // e+=F(b,c,d) + ror w7,w7,#31 + movz w28,#0xeba1 + movk w28,#0x6ed9,lsl#16 + eor w8,w8,w10 + bic w25,w24,w22 + and w26,w23,w22 + ror w27,w21,#27 + eor w8,w8,w16 + add w24,w24,w28 // future e+=K + orr w25,w25,w26 + add w20,w20,w27 // e+=rot(a,5) + eor w8,w8,w5 + ror w22,w22,#2 + add w24,w24,w7 // future e+=X[i] + add w20,w20,w25 // e+=F(b,c,d) + ror w8,w8,#31 + eor w9,w9,w11 + eor w25,w23,w21 + ror w27,w20,#27 + add w23,w23,w28 // future e+=K + eor w9,w9,w17 + eor w25,w25,w22 + add w24,w24,w27 // e+=rot(a,5) + ror w21,w21,#2 + eor w9,w9,w6 + add w23,w23,w8 // future e+=X[i] + add w24,w24,w25 // e+=F(b,c,d) + ror w9,w9,#31 + eor w10,w10,w12 + eor w25,w22,w20 + ror w27,w24,#27 + add w22,w22,w28 // future e+=K + eor w10,w10,w19 + eor w25,w25,w21 + add w23,w23,w27 // e+=rot(a,5) + ror w20,w20,#2 + eor w10,w10,w7 + add w22,w22,w9 // future e+=X[i] + add w23,w23,w25 // e+=F(b,c,d) + ror w10,w10,#31 + eor w11,w11,w13 + eor w25,w21,w24 + ror w27,w23,#27 + add w21,w21,w28 // future e+=K + eor w11,w11,w3 + eor w25,w25,w20 + add w22,w22,w27 // e+=rot(a,5) + ror w24,w24,#2 + eor w11,w11,w8 + add w21,w21,w10 // future e+=X[i] + add w22,w22,w25 // e+=F(b,c,d) + ror w11,w11,#31 + eor w12,w12,w14 + eor w25,w20,w23 + ror w27,w22,#27 + add w20,w20,w28 // future e+=K + eor w12,w12,w4 + eor w25,w25,w24 + add w21,w21,w27 // e+=rot(a,5) + ror w23,w23,#2 + eor w12,w12,w9 + add w20,w20,w11 // future e+=X[i] + add w21,w21,w25 // e+=F(b,c,d) + ror w12,w12,#31 + eor w13,w13,w15 + eor w25,w24,w22 + ror w27,w21,#27 + add w24,w24,w28 // future e+=K + eor w13,w13,w5 + eor w25,w25,w23 + add w20,w20,w27 // e+=rot(a,5) + ror w22,w22,#2 + eor w13,w13,w10 + add w24,w24,w12 // future e+=X[i] + add w20,w20,w25 // e+=F(b,c,d) + ror w13,w13,#31 + eor w14,w14,w16 + eor w25,w23,w21 + ror w27,w20,#27 + add w23,w23,w28 // future e+=K + eor w14,w14,w6 + eor w25,w25,w22 + add w24,w24,w27 // e+=rot(a,5) + ror w21,w21,#2 + eor w14,w14,w11 + add w23,w23,w13 // future e+=X[i] + add w24,w24,w25 // e+=F(b,c,d) + ror w14,w14,#31 + eor w15,w15,w17 + eor w25,w22,w20 + ror w27,w24,#27 + add w22,w22,w28 // future e+=K + eor w15,w15,w7 + eor w25,w25,w21 + add w23,w23,w27 // e+=rot(a,5) + ror w20,w20,#2 + eor w15,w15,w12 + add w22,w22,w14 // future e+=X[i] + add w23,w23,w25 // e+=F(b,c,d) + ror w15,w15,#31 + eor w16,w16,w19 + eor w25,w21,w24 + ror w27,w23,#27 + add w21,w21,w28 // future e+=K + eor w16,w16,w8 + eor w25,w25,w20 + add w22,w22,w27 // e+=rot(a,5) + ror w24,w24,#2 + eor w16,w16,w13 + add w21,w21,w15 // future e+=X[i] + add w22,w22,w25 // e+=F(b,c,d) + ror w16,w16,#31 + eor w17,w17,w3 + eor w25,w20,w23 + ror w27,w22,#27 + add w20,w20,w28 // future e+=K + eor w17,w17,w9 + eor w25,w25,w24 + add w21,w21,w27 // e+=rot(a,5) + ror w23,w23,#2 + eor w17,w17,w14 + add w20,w20,w16 // future e+=X[i] + add w21,w21,w25 // e+=F(b,c,d) + ror w17,w17,#31 + eor w19,w19,w4 + eor w25,w24,w22 + ror w27,w21,#27 + add w24,w24,w28 // future e+=K + eor w19,w19,w10 + eor w25,w25,w23 + add w20,w20,w27 // e+=rot(a,5) + ror w22,w22,#2 + eor w19,w19,w15 + add w24,w24,w17 // future e+=X[i] + add w20,w20,w25 // e+=F(b,c,d) + ror w19,w19,#31 + eor w3,w3,w5 + eor w25,w23,w21 + ror w27,w20,#27 + add w23,w23,w28 // future e+=K + eor w3,w3,w11 + eor w25,w25,w22 + add w24,w24,w27 // e+=rot(a,5) + ror w21,w21,#2 + eor w3,w3,w16 + add w23,w23,w19 // future e+=X[i] + add w24,w24,w25 // e+=F(b,c,d) + ror w3,w3,#31 + eor w4,w4,w6 + eor w25,w22,w20 + ror w27,w24,#27 + add w22,w22,w28 // future e+=K + eor w4,w4,w12 + eor w25,w25,w21 + add w23,w23,w27 // e+=rot(a,5) + ror w20,w20,#2 + eor w4,w4,w17 + add w22,w22,w3 // future e+=X[i] + add w23,w23,w25 // e+=F(b,c,d) + ror w4,w4,#31 + eor w5,w5,w7 + eor w25,w21,w24 + ror w27,w23,#27 + add w21,w21,w28 // future e+=K + eor w5,w5,w13 + eor w25,w25,w20 + add w22,w22,w27 // e+=rot(a,5) + ror w24,w24,#2 + eor w5,w5,w19 + add w21,w21,w4 // future e+=X[i] + add w22,w22,w25 // e+=F(b,c,d) + ror w5,w5,#31 + eor w6,w6,w8 + eor w25,w20,w23 + ror w27,w22,#27 + add w20,w20,w28 // future e+=K + eor w6,w6,w14 + eor w25,w25,w24 + add w21,w21,w27 // e+=rot(a,5) + ror w23,w23,#2 + eor w6,w6,w3 + add w20,w20,w5 // future e+=X[i] + add w21,w21,w25 // e+=F(b,c,d) + ror w6,w6,#31 + eor w7,w7,w9 + eor w25,w24,w22 + ror w27,w21,#27 + add w24,w24,w28 // future e+=K + eor w7,w7,w15 + eor w25,w25,w23 + add w20,w20,w27 // e+=rot(a,5) + ror w22,w22,#2 + eor w7,w7,w4 + add w24,w24,w6 // future e+=X[i] + add w20,w20,w25 // e+=F(b,c,d) + ror w7,w7,#31 + eor w8,w8,w10 + eor w25,w23,w21 + ror w27,w20,#27 + add w23,w23,w28 // future e+=K + eor w8,w8,w16 + eor w25,w25,w22 + add w24,w24,w27 // e+=rot(a,5) + ror w21,w21,#2 + eor w8,w8,w5 + add w23,w23,w7 // future e+=X[i] + add w24,w24,w25 // e+=F(b,c,d) + ror w8,w8,#31 + eor w9,w9,w11 + eor w25,w22,w20 + ror w27,w24,#27 + add w22,w22,w28 // future e+=K + eor w9,w9,w17 + eor w25,w25,w21 + add w23,w23,w27 // e+=rot(a,5) + ror w20,w20,#2 + eor w9,w9,w6 + add w22,w22,w8 // future e+=X[i] + add w23,w23,w25 // e+=F(b,c,d) + ror w9,w9,#31 + eor w10,w10,w12 + eor w25,w21,w24 + ror w27,w23,#27 + add w21,w21,w28 // future e+=K + eor w10,w10,w19 + eor w25,w25,w20 + add w22,w22,w27 // e+=rot(a,5) + ror w24,w24,#2 + eor w10,w10,w7 + add w21,w21,w9 // future e+=X[i] + add w22,w22,w25 // e+=F(b,c,d) + ror w10,w10,#31 + eor w11,w11,w13 + eor w25,w20,w23 + ror w27,w22,#27 + add w20,w20,w28 // future e+=K + eor w11,w11,w3 + eor w25,w25,w24 + add w21,w21,w27 // e+=rot(a,5) + ror w23,w23,#2 + eor w11,w11,w8 + add w20,w20,w10 // future e+=X[i] + add w21,w21,w25 // e+=F(b,c,d) + ror w11,w11,#31 + movz w28,#0xbcdc + movk w28,#0x8f1b,lsl#16 + eor w12,w12,w14 + eor w25,w24,w22 + ror w27,w21,#27 + add w24,w24,w28 // future e+=K + eor w12,w12,w4 + eor w25,w25,w23 + add w20,w20,w27 // e+=rot(a,5) + ror w22,w22,#2 + eor w12,w12,w9 + add w24,w24,w11 // future e+=X[i] + add w20,w20,w25 // e+=F(b,c,d) + ror w12,w12,#31 + orr w25,w21,w22 + and w26,w21,w22 + eor w13,w13,w15 + ror w27,w20,#27 + and w25,w25,w23 + add w23,w23,w28 // future e+=K + eor w13,w13,w5 + add w24,w24,w27 // e+=rot(a,5) + orr w25,w25,w26 + ror w21,w21,#2 + eor w13,w13,w10 + add w23,w23,w12 // future e+=X[i] + add w24,w24,w25 // e+=F(b,c,d) + ror w13,w13,#31 + orr w25,w20,w21 + and w26,w20,w21 + eor w14,w14,w16 + ror w27,w24,#27 + and w25,w25,w22 + add w22,w22,w28 // future e+=K + eor w14,w14,w6 + add w23,w23,w27 // e+=rot(a,5) + orr w25,w25,w26 + ror w20,w20,#2 + eor w14,w14,w11 + add w22,w22,w13 // future e+=X[i] + add w23,w23,w25 // e+=F(b,c,d) + ror w14,w14,#31 + orr w25,w24,w20 + and w26,w24,w20 + eor w15,w15,w17 + ror w27,w23,#27 + and w25,w25,w21 + add w21,w21,w28 // future e+=K + eor w15,w15,w7 + add w22,w22,w27 // e+=rot(a,5) + orr w25,w25,w26 + ror w24,w24,#2 + eor w15,w15,w12 + add w21,w21,w14 // future e+=X[i] + add w22,w22,w25 // e+=F(b,c,d) + ror w15,w15,#31 + orr w25,w23,w24 + and w26,w23,w24 + eor w16,w16,w19 + ror w27,w22,#27 + and w25,w25,w20 + add w20,w20,w28 // future e+=K + eor w16,w16,w8 + add w21,w21,w27 // e+=rot(a,5) + orr w25,w25,w26 + ror w23,w23,#2 + eor w16,w16,w13 + add w20,w20,w15 // future e+=X[i] + add w21,w21,w25 // e+=F(b,c,d) + ror w16,w16,#31 + orr w25,w22,w23 + and w26,w22,w23 + eor w17,w17,w3 + ror w27,w21,#27 + and w25,w25,w24 + add w24,w24,w28 // future e+=K + eor w17,w17,w9 + add w20,w20,w27 // e+=rot(a,5) + orr w25,w25,w26 + ror w22,w22,#2 + eor w17,w17,w14 + add w24,w24,w16 // future e+=X[i] + add w20,w20,w25 // e+=F(b,c,d) + ror w17,w17,#31 + orr w25,w21,w22 + and w26,w21,w22 + eor w19,w19,w4 + ror w27,w20,#27 + and w25,w25,w23 + add w23,w23,w28 // future e+=K + eor w19,w19,w10 + add w24,w24,w27 // e+=rot(a,5) + orr w25,w25,w26 + ror w21,w21,#2 + eor w19,w19,w15 + add w23,w23,w17 // future e+=X[i] + add w24,w24,w25 // e+=F(b,c,d) + ror w19,w19,#31 + orr w25,w20,w21 + and w26,w20,w21 + eor w3,w3,w5 + ror w27,w24,#27 + and w25,w25,w22 + add w22,w22,w28 // future e+=K + eor w3,w3,w11 + add w23,w23,w27 // e+=rot(a,5) + orr w25,w25,w26 + ror w20,w20,#2 + eor w3,w3,w16 + add w22,w22,w19 // future e+=X[i] + add w23,w23,w25 // e+=F(b,c,d) + ror w3,w3,#31 + orr w25,w24,w20 + and w26,w24,w20 + eor w4,w4,w6 + ror w27,w23,#27 + and w25,w25,w21 + add w21,w21,w28 // future e+=K + eor w4,w4,w12 + add w22,w22,w27 // e+=rot(a,5) + orr w25,w25,w26 + ror w24,w24,#2 + eor w4,w4,w17 + add w21,w21,w3 // future e+=X[i] + add w22,w22,w25 // e+=F(b,c,d) + ror w4,w4,#31 + orr w25,w23,w24 + and w26,w23,w24 + eor w5,w5,w7 + ror w27,w22,#27 + and w25,w25,w20 + add w20,w20,w28 // future e+=K + eor w5,w5,w13 + add w21,w21,w27 // e+=rot(a,5) + orr w25,w25,w26 + ror w23,w23,#2 + eor w5,w5,w19 + add w20,w20,w4 // future e+=X[i] + add w21,w21,w25 // e+=F(b,c,d) + ror w5,w5,#31 + orr w25,w22,w23 + and w26,w22,w23 + eor w6,w6,w8 + ror w27,w21,#27 + and w25,w25,w24 + add w24,w24,w28 // future e+=K + eor w6,w6,w14 + add w20,w20,w27 // e+=rot(a,5) + orr w25,w25,w26 + ror w22,w22,#2 + eor w6,w6,w3 + add w24,w24,w5 // future e+=X[i] + add w20,w20,w25 // e+=F(b,c,d) + ror w6,w6,#31 + orr w25,w21,w22 + and w26,w21,w22 + eor w7,w7,w9 + ror w27,w20,#27 + and w25,w25,w23 + add w23,w23,w28 // future e+=K + eor w7,w7,w15 + add w24,w24,w27 // e+=rot(a,5) + orr w25,w25,w26 + ror w21,w21,#2 + eor w7,w7,w4 + add w23,w23,w6 // future e+=X[i] + add w24,w24,w25 // e+=F(b,c,d) + ror w7,w7,#31 + orr w25,w20,w21 + and w26,w20,w21 + eor w8,w8,w10 + ror w27,w24,#27 + and w25,w25,w22 + add w22,w22,w28 // future e+=K + eor w8,w8,w16 + add w23,w23,w27 // e+=rot(a,5) + orr w25,w25,w26 + ror w20,w20,#2 + eor w8,w8,w5 + add w22,w22,w7 // future e+=X[i] + add w23,w23,w25 // e+=F(b,c,d) + ror w8,w8,#31 + orr w25,w24,w20 + and w26,w24,w20 + eor w9,w9,w11 + ror w27,w23,#27 + and w25,w25,w21 + add w21,w21,w28 // future e+=K + eor w9,w9,w17 + add w22,w22,w27 // e+=rot(a,5) + orr w25,w25,w26 + ror w24,w24,#2 + eor w9,w9,w6 + add w21,w21,w8 // future e+=X[i] + add w22,w22,w25 // e+=F(b,c,d) + ror w9,w9,#31 + orr w25,w23,w24 + and w26,w23,w24 + eor w10,w10,w12 + ror w27,w22,#27 + and w25,w25,w20 + add w20,w20,w28 // future e+=K + eor w10,w10,w19 + add w21,w21,w27 // e+=rot(a,5) + orr w25,w25,w26 + ror w23,w23,#2 + eor w10,w10,w7 + add w20,w20,w9 // future e+=X[i] + add w21,w21,w25 // e+=F(b,c,d) + ror w10,w10,#31 + orr w25,w22,w23 + and w26,w22,w23 + eor w11,w11,w13 + ror w27,w21,#27 + and w25,w25,w24 + add w24,w24,w28 // future e+=K + eor w11,w11,w3 + add w20,w20,w27 // e+=rot(a,5) + orr w25,w25,w26 + ror w22,w22,#2 + eor w11,w11,w8 + add w24,w24,w10 // future e+=X[i] + add w20,w20,w25 // e+=F(b,c,d) + ror w11,w11,#31 + orr w25,w21,w22 + and w26,w21,w22 + eor w12,w12,w14 + ror w27,w20,#27 + and w25,w25,w23 + add w23,w23,w28 // future e+=K + eor w12,w12,w4 + add w24,w24,w27 // e+=rot(a,5) + orr w25,w25,w26 + ror w21,w21,#2 + eor w12,w12,w9 + add w23,w23,w11 // future e+=X[i] + add w24,w24,w25 // e+=F(b,c,d) + ror w12,w12,#31 + orr w25,w20,w21 + and w26,w20,w21 + eor w13,w13,w15 + ror w27,w24,#27 + and w25,w25,w22 + add w22,w22,w28 // future e+=K + eor w13,w13,w5 + add w23,w23,w27 // e+=rot(a,5) + orr w25,w25,w26 + ror w20,w20,#2 + eor w13,w13,w10 + add w22,w22,w12 // future e+=X[i] + add w23,w23,w25 // e+=F(b,c,d) + ror w13,w13,#31 + orr w25,w24,w20 + and w26,w24,w20 + eor w14,w14,w16 + ror w27,w23,#27 + and w25,w25,w21 + add w21,w21,w28 // future e+=K + eor w14,w14,w6 + add w22,w22,w27 // e+=rot(a,5) + orr w25,w25,w26 + ror w24,w24,#2 + eor w14,w14,w11 + add w21,w21,w13 // future e+=X[i] + add w22,w22,w25 // e+=F(b,c,d) + ror w14,w14,#31 + orr w25,w23,w24 + and w26,w23,w24 + eor w15,w15,w17 + ror w27,w22,#27 + and w25,w25,w20 + add w20,w20,w28 // future e+=K + eor w15,w15,w7 + add w21,w21,w27 // e+=rot(a,5) + orr w25,w25,w26 + ror w23,w23,#2 + eor w15,w15,w12 + add w20,w20,w14 // future e+=X[i] + add w21,w21,w25 // e+=F(b,c,d) + ror w15,w15,#31 + movz w28,#0xc1d6 + movk w28,#0xca62,lsl#16 + orr w25,w22,w23 + and w26,w22,w23 + eor w16,w16,w19 + ror w27,w21,#27 + and w25,w25,w24 + add w24,w24,w28 // future e+=K + eor w16,w16,w8 + add w20,w20,w27 // e+=rot(a,5) + orr w25,w25,w26 + ror w22,w22,#2 + eor w16,w16,w13 + add w24,w24,w15 // future e+=X[i] + add w20,w20,w25 // e+=F(b,c,d) + ror w16,w16,#31 + eor w17,w17,w3 + eor w25,w23,w21 + ror w27,w20,#27 + add w23,w23,w28 // future e+=K + eor w17,w17,w9 + eor w25,w25,w22 + add w24,w24,w27 // e+=rot(a,5) + ror w21,w21,#2 + eor w17,w17,w14 + add w23,w23,w16 // future e+=X[i] + add w24,w24,w25 // e+=F(b,c,d) + ror w17,w17,#31 + eor w19,w19,w4 + eor w25,w22,w20 + ror w27,w24,#27 + add w22,w22,w28 // future e+=K + eor w19,w19,w10 + eor w25,w25,w21 + add w23,w23,w27 // e+=rot(a,5) + ror w20,w20,#2 + eor w19,w19,w15 + add w22,w22,w17 // future e+=X[i] + add w23,w23,w25 // e+=F(b,c,d) + ror w19,w19,#31 + eor w3,w3,w5 + eor w25,w21,w24 + ror w27,w23,#27 + add w21,w21,w28 // future e+=K + eor w3,w3,w11 + eor w25,w25,w20 + add w22,w22,w27 // e+=rot(a,5) + ror w24,w24,#2 + eor w3,w3,w16 + add w21,w21,w19 // future e+=X[i] + add w22,w22,w25 // e+=F(b,c,d) + ror w3,w3,#31 + eor w4,w4,w6 + eor w25,w20,w23 + ror w27,w22,#27 + add w20,w20,w28 // future e+=K + eor w4,w4,w12 + eor w25,w25,w24 + add w21,w21,w27 // e+=rot(a,5) + ror w23,w23,#2 + eor w4,w4,w17 + add w20,w20,w3 // future e+=X[i] + add w21,w21,w25 // e+=F(b,c,d) + ror w4,w4,#31 + eor w5,w5,w7 + eor w25,w24,w22 + ror w27,w21,#27 + add w24,w24,w28 // future e+=K + eor w5,w5,w13 + eor w25,w25,w23 + add w20,w20,w27 // e+=rot(a,5) + ror w22,w22,#2 + eor w5,w5,w19 + add w24,w24,w4 // future e+=X[i] + add w20,w20,w25 // e+=F(b,c,d) + ror w5,w5,#31 + eor w6,w6,w8 + eor w25,w23,w21 + ror w27,w20,#27 + add w23,w23,w28 // future e+=K + eor w6,w6,w14 + eor w25,w25,w22 + add w24,w24,w27 // e+=rot(a,5) + ror w21,w21,#2 + eor w6,w6,w3 + add w23,w23,w5 // future e+=X[i] + add w24,w24,w25 // e+=F(b,c,d) + ror w6,w6,#31 + eor w7,w7,w9 + eor w25,w22,w20 + ror w27,w24,#27 + add w22,w22,w28 // future e+=K + eor w7,w7,w15 + eor w25,w25,w21 + add w23,w23,w27 // e+=rot(a,5) + ror w20,w20,#2 + eor w7,w7,w4 + add w22,w22,w6 // future e+=X[i] + add w23,w23,w25 // e+=F(b,c,d) + ror w7,w7,#31 + eor w8,w8,w10 + eor w25,w21,w24 + ror w27,w23,#27 + add w21,w21,w28 // future e+=K + eor w8,w8,w16 + eor w25,w25,w20 + add w22,w22,w27 // e+=rot(a,5) + ror w24,w24,#2 + eor w8,w8,w5 + add w21,w21,w7 // future e+=X[i] + add w22,w22,w25 // e+=F(b,c,d) + ror w8,w8,#31 + eor w9,w9,w11 + eor w25,w20,w23 + ror w27,w22,#27 + add w20,w20,w28 // future e+=K + eor w9,w9,w17 + eor w25,w25,w24 + add w21,w21,w27 // e+=rot(a,5) + ror w23,w23,#2 + eor w9,w9,w6 + add w20,w20,w8 // future e+=X[i] + add w21,w21,w25 // e+=F(b,c,d) + ror w9,w9,#31 + eor w10,w10,w12 + eor w25,w24,w22 + ror w27,w21,#27 + add w24,w24,w28 // future e+=K + eor w10,w10,w19 + eor w25,w25,w23 + add w20,w20,w27 // e+=rot(a,5) + ror w22,w22,#2 + eor w10,w10,w7 + add w24,w24,w9 // future e+=X[i] + add w20,w20,w25 // e+=F(b,c,d) + ror w10,w10,#31 + eor w11,w11,w13 + eor w25,w23,w21 + ror w27,w20,#27 + add w23,w23,w28 // future e+=K + eor w11,w11,w3 + eor w25,w25,w22 + add w24,w24,w27 // e+=rot(a,5) + ror w21,w21,#2 + eor w11,w11,w8 + add w23,w23,w10 // future e+=X[i] + add w24,w24,w25 // e+=F(b,c,d) + ror w11,w11,#31 + eor w12,w12,w14 + eor w25,w22,w20 + ror w27,w24,#27 + add w22,w22,w28 // future e+=K + eor w12,w12,w4 + eor w25,w25,w21 + add w23,w23,w27 // e+=rot(a,5) + ror w20,w20,#2 + eor w12,w12,w9 + add w22,w22,w11 // future e+=X[i] + add w23,w23,w25 // e+=F(b,c,d) + ror w12,w12,#31 + eor w13,w13,w15 + eor w25,w21,w24 + ror w27,w23,#27 + add w21,w21,w28 // future e+=K + eor w13,w13,w5 + eor w25,w25,w20 + add w22,w22,w27 // e+=rot(a,5) + ror w24,w24,#2 + eor w13,w13,w10 + add w21,w21,w12 // future e+=X[i] + add w22,w22,w25 // e+=F(b,c,d) + ror w13,w13,#31 + eor w14,w14,w16 + eor w25,w20,w23 + ror w27,w22,#27 + add w20,w20,w28 // future e+=K + eor w14,w14,w6 + eor w25,w25,w24 + add w21,w21,w27 // e+=rot(a,5) + ror w23,w23,#2 + eor w14,w14,w11 + add w20,w20,w13 // future e+=X[i] + add w21,w21,w25 // e+=F(b,c,d) + ror w14,w14,#31 + eor w15,w15,w17 + eor w25,w24,w22 + ror w27,w21,#27 + add w24,w24,w28 // future e+=K + eor w15,w15,w7 + eor w25,w25,w23 + add w20,w20,w27 // e+=rot(a,5) + ror w22,w22,#2 + eor w15,w15,w12 + add w24,w24,w14 // future e+=X[i] + add w20,w20,w25 // e+=F(b,c,d) + ror w15,w15,#31 + eor w16,w16,w19 + eor w25,w23,w21 + ror w27,w20,#27 + add w23,w23,w28 // future e+=K + eor w16,w16,w8 + eor w25,w25,w22 + add w24,w24,w27 // e+=rot(a,5) + ror w21,w21,#2 + eor w16,w16,w13 + add w23,w23,w15 // future e+=X[i] + add w24,w24,w25 // e+=F(b,c,d) + ror w16,w16,#31 + eor w17,w17,w3 + eor w25,w22,w20 + ror w27,w24,#27 + add w22,w22,w28 // future e+=K + eor w17,w17,w9 + eor w25,w25,w21 + add w23,w23,w27 // e+=rot(a,5) + ror w20,w20,#2 + eor w17,w17,w14 + add w22,w22,w16 // future e+=X[i] + add w23,w23,w25 // e+=F(b,c,d) + ror w17,w17,#31 + eor w19,w19,w4 + eor w25,w21,w24 + ror w27,w23,#27 + add w21,w21,w28 // future e+=K + eor w19,w19,w10 + eor w25,w25,w20 + add w22,w22,w27 // e+=rot(a,5) + ror w24,w24,#2 + eor w19,w19,w15 + add w21,w21,w17 // future e+=X[i] + add w22,w22,w25 // e+=F(b,c,d) + ror w19,w19,#31 + ldp w4,w5,[x0] + eor w25,w20,w23 + ror w27,w22,#27 + add w20,w20,w28 // future e+=K + eor w25,w25,w24 + add w21,w21,w27 // e+=rot(a,5) + ror w23,w23,#2 + add w20,w20,w19 // future e+=X[i] + add w21,w21,w25 // e+=F(b,c,d) + ldp w6,w7,[x0,#8] + eor w25,w24,w22 + ror w27,w21,#27 + eor w25,w25,w23 + add w20,w20,w27 // e+=rot(a,5) + ror w22,w22,#2 + ldr w8,[x0,#16] + add w20,w20,w25 // e+=F(b,c,d) + add w21,w21,w5 + add w22,w22,w6 + add w20,w20,w4 + add w23,w23,w7 + add w24,w24,w8 + stp w20,w21,[x0] + stp w22,w23,[x0,#8] + str w24,[x0,#16] + cbnz x2,.Loop + + ldp x19,x20,[sp,#16] + ldp x21,x22,[sp,#32] + ldp x23,x24,[sp,#48] + ldp x25,x26,[sp,#64] + ldp x27,x28,[sp,#80] + ldr x29,[sp],#96 + ret +.size sha1_block_data_order,.-sha1_block_data_order +.type sha1_block_armv8,%function +.align 6 +sha1_block_armv8: +.Lv8_entry: + stp x29,x30,[sp,#-16]! + add x29,sp,#0 + + adr x4,.Lconst + eor v1.16b,v1.16b,v1.16b + ld1 {v0.4s},[x0],#16 + ld1 {v1.s}[0],[x0] + sub x0,x0,#16 + ld1 {v16.4s-v19.4s},[x4] + +.Loop_hw: + ld1 {v4.16b-v7.16b},[x1],#64 + sub x2,x2,#1 + rev32 v4.16b,v4.16b + rev32 v5.16b,v5.16b + + add v20.4s,v16.4s,v4.4s + rev32 v6.16b,v6.16b + orr v22.16b,v0.16b,v0.16b // offload + + add v21.4s,v16.4s,v5.4s + rev32 v7.16b,v7.16b + .inst 0x5e280803 //sha1h v3.16b,v0.16b + .inst 0x5e140020 //sha1c v0.16b,v1.16b,v20.4s // 0 + add v20.4s,v16.4s,v6.4s + .inst 0x5e0630a4 //sha1su0 v4.16b,v5.16b,v6.16b + .inst 0x5e280802 //sha1h v2.16b,v0.16b // 1 + .inst 0x5e150060 //sha1c v0.16b,v3.16b,v21.4s + add v21.4s,v16.4s,v7.4s + .inst 0x5e2818e4 //sha1su1 v4.16b,v7.16b + .inst 0x5e0730c5 //sha1su0 v5.16b,v6.16b,v7.16b + .inst 0x5e280803 //sha1h v3.16b,v0.16b // 2 + .inst 0x5e140040 //sha1c v0.16b,v2.16b,v20.4s + add v20.4s,v16.4s,v4.4s + .inst 0x5e281885 //sha1su1 v5.16b,v4.16b + .inst 0x5e0430e6 //sha1su0 v6.16b,v7.16b,v4.16b + .inst 0x5e280802 //sha1h v2.16b,v0.16b // 3 + .inst 0x5e150060 //sha1c v0.16b,v3.16b,v21.4s + add v21.4s,v17.4s,v5.4s + .inst 0x5e2818a6 //sha1su1 v6.16b,v5.16b + .inst 0x5e053087 //sha1su0 v7.16b,v4.16b,v5.16b + .inst 0x5e280803 //sha1h v3.16b,v0.16b // 4 + .inst 0x5e140040 //sha1c v0.16b,v2.16b,v20.4s + add v20.4s,v17.4s,v6.4s + .inst 0x5e2818c7 //sha1su1 v7.16b,v6.16b + .inst 0x5e0630a4 //sha1su0 v4.16b,v5.16b,v6.16b + .inst 0x5e280802 //sha1h v2.16b,v0.16b // 5 + .inst 0x5e151060 //sha1p v0.16b,v3.16b,v21.4s + add v21.4s,v17.4s,v7.4s + .inst 0x5e2818e4 //sha1su1 v4.16b,v7.16b + .inst 0x5e0730c5 //sha1su0 v5.16b,v6.16b,v7.16b + .inst 0x5e280803 //sha1h v3.16b,v0.16b // 6 + .inst 0x5e141040 //sha1p v0.16b,v2.16b,v20.4s + add v20.4s,v17.4s,v4.4s + .inst 0x5e281885 //sha1su1 v5.16b,v4.16b + .inst 0x5e0430e6 //sha1su0 v6.16b,v7.16b,v4.16b + .inst 0x5e280802 //sha1h v2.16b,v0.16b // 7 + .inst 0x5e151060 //sha1p v0.16b,v3.16b,v21.4s + add v21.4s,v17.4s,v5.4s + .inst 0x5e2818a6 //sha1su1 v6.16b,v5.16b + .inst 0x5e053087 //sha1su0 v7.16b,v4.16b,v5.16b + .inst 0x5e280803 //sha1h v3.16b,v0.16b // 8 + .inst 0x5e141040 //sha1p v0.16b,v2.16b,v20.4s + add v20.4s,v18.4s,v6.4s + .inst 0x5e2818c7 //sha1su1 v7.16b,v6.16b + .inst 0x5e0630a4 //sha1su0 v4.16b,v5.16b,v6.16b + .inst 0x5e280802 //sha1h v2.16b,v0.16b // 9 + .inst 0x5e151060 //sha1p v0.16b,v3.16b,v21.4s + add v21.4s,v18.4s,v7.4s + .inst 0x5e2818e4 //sha1su1 v4.16b,v7.16b + .inst 0x5e0730c5 //sha1su0 v5.16b,v6.16b,v7.16b + .inst 0x5e280803 //sha1h v3.16b,v0.16b // 10 + .inst 0x5e142040 //sha1m v0.16b,v2.16b,v20.4s + add v20.4s,v18.4s,v4.4s + .inst 0x5e281885 //sha1su1 v5.16b,v4.16b + .inst 0x5e0430e6 //sha1su0 v6.16b,v7.16b,v4.16b + .inst 0x5e280802 //sha1h v2.16b,v0.16b // 11 + .inst 0x5e152060 //sha1m v0.16b,v3.16b,v21.4s + add v21.4s,v18.4s,v5.4s + .inst 0x5e2818a6 //sha1su1 v6.16b,v5.16b + .inst 0x5e053087 //sha1su0 v7.16b,v4.16b,v5.16b + .inst 0x5e280803 //sha1h v3.16b,v0.16b // 12 + .inst 0x5e142040 //sha1m v0.16b,v2.16b,v20.4s + add v20.4s,v18.4s,v6.4s + .inst 0x5e2818c7 //sha1su1 v7.16b,v6.16b + .inst 0x5e0630a4 //sha1su0 v4.16b,v5.16b,v6.16b + .inst 0x5e280802 //sha1h v2.16b,v0.16b // 13 + .inst 0x5e152060 //sha1m v0.16b,v3.16b,v21.4s + add v21.4s,v19.4s,v7.4s + .inst 0x5e2818e4 //sha1su1 v4.16b,v7.16b + .inst 0x5e0730c5 //sha1su0 v5.16b,v6.16b,v7.16b + .inst 0x5e280803 //sha1h v3.16b,v0.16b // 14 + .inst 0x5e142040 //sha1m v0.16b,v2.16b,v20.4s + add v20.4s,v19.4s,v4.4s + .inst 0x5e281885 //sha1su1 v5.16b,v4.16b + .inst 0x5e0430e6 //sha1su0 v6.16b,v7.16b,v4.16b + .inst 0x5e280802 //sha1h v2.16b,v0.16b // 15 + .inst 0x5e151060 //sha1p v0.16b,v3.16b,v21.4s + add v21.4s,v19.4s,v5.4s + .inst 0x5e2818a6 //sha1su1 v6.16b,v5.16b + .inst 0x5e053087 //sha1su0 v7.16b,v4.16b,v5.16b + .inst 0x5e280803 //sha1h v3.16b,v0.16b // 16 + .inst 0x5e141040 //sha1p v0.16b,v2.16b,v20.4s + add v20.4s,v19.4s,v6.4s + .inst 0x5e2818c7 //sha1su1 v7.16b,v6.16b + .inst 0x5e280802 //sha1h v2.16b,v0.16b // 17 + .inst 0x5e151060 //sha1p v0.16b,v3.16b,v21.4s + add v21.4s,v19.4s,v7.4s + + .inst 0x5e280803 //sha1h v3.16b,v0.16b // 18 + .inst 0x5e141040 //sha1p v0.16b,v2.16b,v20.4s + + .inst 0x5e280802 //sha1h v2.16b,v0.16b // 19 + .inst 0x5e151060 //sha1p v0.16b,v3.16b,v21.4s + + add v1.4s,v1.4s,v2.4s + add v0.4s,v0.4s,v22.4s + + cbnz x2,.Loop_hw + + st1 {v0.4s},[x0],#16 + st1 {v1.s}[0],[x0] + + ldr x29,[sp],#16 + ret +.size sha1_block_armv8,.-sha1_block_armv8 +.align 6 +.Lconst: +.long 0x5a827999,0x5a827999,0x5a827999,0x5a827999 //K_00_19 +.long 0x6ed9eba1,0x6ed9eba1,0x6ed9eba1,0x6ed9eba1 //K_20_39 +.long 0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc //K_40_59 +.long 0xca62c1d6,0xca62c1d6,0xca62c1d6,0xca62c1d6 //K_60_79 +.LOPENSSL_armcap_P: +.quad OPENSSL_armcap_P-. +.asciz "SHA1 block transform for ARMv8, CRYPTOGAMS by " +.align 2 +.comm OPENSSL_armcap_P,4,4 diff --git a/deps/openssl/asm_obsolete/arm64-linux64-gas/sha/sha256-armv8.S b/deps/openssl/asm_obsolete/arm64-linux64-gas/sha/sha256-armv8.S new file mode 100644 index 00000000000000..bd43b1fe76dd34 --- /dev/null +++ b/deps/openssl/asm_obsolete/arm64-linux64-gas/sha/sha256-armv8.S @@ -0,0 +1,1141 @@ +#include "arm_arch.h" + +.text + +.globl sha256_block_data_order +.type sha256_block_data_order,%function +.align 6 +sha256_block_data_order: + ldr x16,.LOPENSSL_armcap_P + adr x17,.LOPENSSL_armcap_P + add x16,x16,x17 + ldr w16,[x16] + tst w16,#ARMV8_SHA256 + b.ne .Lv8_entry + stp x29,x30,[sp,#-128]! + add x29,sp,#0 + + stp x19,x20,[sp,#16] + stp x21,x22,[sp,#32] + stp x23,x24,[sp,#48] + stp x25,x26,[sp,#64] + stp x27,x28,[sp,#80] + sub sp,sp,#4*4 + + ldp w20,w21,[x0] // load context + ldp w22,w23,[x0,#2*4] + ldp w24,w25,[x0,#4*4] + add x2,x1,x2,lsl#6 // end of input + ldp w26,w27,[x0,#6*4] + adr x30,K256 + stp x0,x2,[x29,#96] + +.Loop: + ldp w3,w4,[x1],#2*4 + ldr w19,[x30],#4 // *K++ + eor w28,w21,w22 // magic seed + str x1,[x29,#112] +#ifndef __ARMEB__ + rev w3,w3 // 0 +#endif + ror w16,w24,#6 + add w27,w27,w19 // h+=K[i] + eor w6,w24,w24,ror#14 + and w17,w25,w24 + bic w19,w26,w24 + add w27,w27,w3 // h+=X[i] + orr w17,w17,w19 // Ch(e,f,g) + eor w19,w20,w21 // a^b, b^c in next round + eor w16,w16,w6,ror#11 // Sigma1(e) + ror w6,w20,#2 + add w27,w27,w17 // h+=Ch(e,f,g) + eor w17,w20,w20,ror#9 + add w27,w27,w16 // h+=Sigma1(e) + and w28,w28,w19 // (b^c)&=(a^b) + add w23,w23,w27 // d+=h + eor w28,w28,w21 // Maj(a,b,c) + eor w17,w6,w17,ror#13 // Sigma0(a) + add w27,w27,w28 // h+=Maj(a,b,c) + ldr w28,[x30],#4 // *K++, w19 in next round + //add w27,w27,w17 // h+=Sigma0(a) +#ifndef __ARMEB__ + rev w4,w4 // 1 +#endif + ldp w5,w6,[x1],#2*4 + add w27,w27,w17 // h+=Sigma0(a) + ror w16,w23,#6 + add w26,w26,w28 // h+=K[i] + eor w7,w23,w23,ror#14 + and w17,w24,w23 + bic w28,w25,w23 + add w26,w26,w4 // h+=X[i] + orr w17,w17,w28 // Ch(e,f,g) + eor w28,w27,w20 // a^b, b^c in next round + eor w16,w16,w7,ror#11 // Sigma1(e) + ror w7,w27,#2 + add w26,w26,w17 // h+=Ch(e,f,g) + eor w17,w27,w27,ror#9 + add w26,w26,w16 // h+=Sigma1(e) + and w19,w19,w28 // (b^c)&=(a^b) + add w22,w22,w26 // d+=h + eor w19,w19,w20 // Maj(a,b,c) + eor w17,w7,w17,ror#13 // Sigma0(a) + add w26,w26,w19 // h+=Maj(a,b,c) + ldr w19,[x30],#4 // *K++, w28 in next round + //add w26,w26,w17 // h+=Sigma0(a) +#ifndef __ARMEB__ + rev w5,w5 // 2 +#endif + add w26,w26,w17 // h+=Sigma0(a) + ror w16,w22,#6 + add w25,w25,w19 // h+=K[i] + eor w8,w22,w22,ror#14 + and w17,w23,w22 + bic w19,w24,w22 + add w25,w25,w5 // h+=X[i] + orr w17,w17,w19 // Ch(e,f,g) + eor w19,w26,w27 // a^b, b^c in next round + eor w16,w16,w8,ror#11 // Sigma1(e) + ror w8,w26,#2 + add w25,w25,w17 // h+=Ch(e,f,g) + eor w17,w26,w26,ror#9 + add w25,w25,w16 // h+=Sigma1(e) + and w28,w28,w19 // (b^c)&=(a^b) + add w21,w21,w25 // d+=h + eor w28,w28,w27 // Maj(a,b,c) + eor w17,w8,w17,ror#13 // Sigma0(a) + add w25,w25,w28 // h+=Maj(a,b,c) + ldr w28,[x30],#4 // *K++, w19 in next round + //add w25,w25,w17 // h+=Sigma0(a) +#ifndef __ARMEB__ + rev w6,w6 // 3 +#endif + ldp w7,w8,[x1],#2*4 + add w25,w25,w17 // h+=Sigma0(a) + ror w16,w21,#6 + add w24,w24,w28 // h+=K[i] + eor w9,w21,w21,ror#14 + and w17,w22,w21 + bic w28,w23,w21 + add w24,w24,w6 // h+=X[i] + orr w17,w17,w28 // Ch(e,f,g) + eor w28,w25,w26 // a^b, b^c in next round + eor w16,w16,w9,ror#11 // Sigma1(e) + ror w9,w25,#2 + add w24,w24,w17 // h+=Ch(e,f,g) + eor w17,w25,w25,ror#9 + add w24,w24,w16 // h+=Sigma1(e) + and w19,w19,w28 // (b^c)&=(a^b) + add w20,w20,w24 // d+=h + eor w19,w19,w26 // Maj(a,b,c) + eor w17,w9,w17,ror#13 // Sigma0(a) + add w24,w24,w19 // h+=Maj(a,b,c) + ldr w19,[x30],#4 // *K++, w28 in next round + //add w24,w24,w17 // h+=Sigma0(a) +#ifndef __ARMEB__ + rev w7,w7 // 4 +#endif + add w24,w24,w17 // h+=Sigma0(a) + ror w16,w20,#6 + add w23,w23,w19 // h+=K[i] + eor w10,w20,w20,ror#14 + and w17,w21,w20 + bic w19,w22,w20 + add w23,w23,w7 // h+=X[i] + orr w17,w17,w19 // Ch(e,f,g) + eor w19,w24,w25 // a^b, b^c in next round + eor w16,w16,w10,ror#11 // Sigma1(e) + ror w10,w24,#2 + add w23,w23,w17 // h+=Ch(e,f,g) + eor w17,w24,w24,ror#9 + add w23,w23,w16 // h+=Sigma1(e) + and w28,w28,w19 // (b^c)&=(a^b) + add w27,w27,w23 // d+=h + eor w28,w28,w25 // Maj(a,b,c) + eor w17,w10,w17,ror#13 // Sigma0(a) + add w23,w23,w28 // h+=Maj(a,b,c) + ldr w28,[x30],#4 // *K++, w19 in next round + //add w23,w23,w17 // h+=Sigma0(a) +#ifndef __ARMEB__ + rev w8,w8 // 5 +#endif + ldp w9,w10,[x1],#2*4 + add w23,w23,w17 // h+=Sigma0(a) + ror w16,w27,#6 + add w22,w22,w28 // h+=K[i] + eor w11,w27,w27,ror#14 + and w17,w20,w27 + bic w28,w21,w27 + add w22,w22,w8 // h+=X[i] + orr w17,w17,w28 // Ch(e,f,g) + eor w28,w23,w24 // a^b, b^c in next round + eor w16,w16,w11,ror#11 // Sigma1(e) + ror w11,w23,#2 + add w22,w22,w17 // h+=Ch(e,f,g) + eor w17,w23,w23,ror#9 + add w22,w22,w16 // h+=Sigma1(e) + and w19,w19,w28 // (b^c)&=(a^b) + add w26,w26,w22 // d+=h + eor w19,w19,w24 // Maj(a,b,c) + eor w17,w11,w17,ror#13 // Sigma0(a) + add w22,w22,w19 // h+=Maj(a,b,c) + ldr w19,[x30],#4 // *K++, w28 in next round + //add w22,w22,w17 // h+=Sigma0(a) +#ifndef __ARMEB__ + rev w9,w9 // 6 +#endif + add w22,w22,w17 // h+=Sigma0(a) + ror w16,w26,#6 + add w21,w21,w19 // h+=K[i] + eor w12,w26,w26,ror#14 + and w17,w27,w26 + bic w19,w20,w26 + add w21,w21,w9 // h+=X[i] + orr w17,w17,w19 // Ch(e,f,g) + eor w19,w22,w23 // a^b, b^c in next round + eor w16,w16,w12,ror#11 // Sigma1(e) + ror w12,w22,#2 + add w21,w21,w17 // h+=Ch(e,f,g) + eor w17,w22,w22,ror#9 + add w21,w21,w16 // h+=Sigma1(e) + and w28,w28,w19 // (b^c)&=(a^b) + add w25,w25,w21 // d+=h + eor w28,w28,w23 // Maj(a,b,c) + eor w17,w12,w17,ror#13 // Sigma0(a) + add w21,w21,w28 // h+=Maj(a,b,c) + ldr w28,[x30],#4 // *K++, w19 in next round + //add w21,w21,w17 // h+=Sigma0(a) +#ifndef __ARMEB__ + rev w10,w10 // 7 +#endif + ldp w11,w12,[x1],#2*4 + add w21,w21,w17 // h+=Sigma0(a) + ror w16,w25,#6 + add w20,w20,w28 // h+=K[i] + eor w13,w25,w25,ror#14 + and w17,w26,w25 + bic w28,w27,w25 + add w20,w20,w10 // h+=X[i] + orr w17,w17,w28 // Ch(e,f,g) + eor w28,w21,w22 // a^b, b^c in next round + eor w16,w16,w13,ror#11 // Sigma1(e) + ror w13,w21,#2 + add w20,w20,w17 // h+=Ch(e,f,g) + eor w17,w21,w21,ror#9 + add w20,w20,w16 // h+=Sigma1(e) + and w19,w19,w28 // (b^c)&=(a^b) + add w24,w24,w20 // d+=h + eor w19,w19,w22 // Maj(a,b,c) + eor w17,w13,w17,ror#13 // Sigma0(a) + add w20,w20,w19 // h+=Maj(a,b,c) + ldr w19,[x30],#4 // *K++, w28 in next round + //add w20,w20,w17 // h+=Sigma0(a) +#ifndef __ARMEB__ + rev w11,w11 // 8 +#endif + add w20,w20,w17 // h+=Sigma0(a) + ror w16,w24,#6 + add w27,w27,w19 // h+=K[i] + eor w14,w24,w24,ror#14 + and w17,w25,w24 + bic w19,w26,w24 + add w27,w27,w11 // h+=X[i] + orr w17,w17,w19 // Ch(e,f,g) + eor w19,w20,w21 // a^b, b^c in next round + eor w16,w16,w14,ror#11 // Sigma1(e) + ror w14,w20,#2 + add w27,w27,w17 // h+=Ch(e,f,g) + eor w17,w20,w20,ror#9 + add w27,w27,w16 // h+=Sigma1(e) + and w28,w28,w19 // (b^c)&=(a^b) + add w23,w23,w27 // d+=h + eor w28,w28,w21 // Maj(a,b,c) + eor w17,w14,w17,ror#13 // Sigma0(a) + add w27,w27,w28 // h+=Maj(a,b,c) + ldr w28,[x30],#4 // *K++, w19 in next round + //add w27,w27,w17 // h+=Sigma0(a) +#ifndef __ARMEB__ + rev w12,w12 // 9 +#endif + ldp w13,w14,[x1],#2*4 + add w27,w27,w17 // h+=Sigma0(a) + ror w16,w23,#6 + add w26,w26,w28 // h+=K[i] + eor w15,w23,w23,ror#14 + and w17,w24,w23 + bic w28,w25,w23 + add w26,w26,w12 // h+=X[i] + orr w17,w17,w28 // Ch(e,f,g) + eor w28,w27,w20 // a^b, b^c in next round + eor w16,w16,w15,ror#11 // Sigma1(e) + ror w15,w27,#2 + add w26,w26,w17 // h+=Ch(e,f,g) + eor w17,w27,w27,ror#9 + add w26,w26,w16 // h+=Sigma1(e) + and w19,w19,w28 // (b^c)&=(a^b) + add w22,w22,w26 // d+=h + eor w19,w19,w20 // Maj(a,b,c) + eor w17,w15,w17,ror#13 // Sigma0(a) + add w26,w26,w19 // h+=Maj(a,b,c) + ldr w19,[x30],#4 // *K++, w28 in next round + //add w26,w26,w17 // h+=Sigma0(a) +#ifndef __ARMEB__ + rev w13,w13 // 10 +#endif + add w26,w26,w17 // h+=Sigma0(a) + ror w16,w22,#6 + add w25,w25,w19 // h+=K[i] + eor w0,w22,w22,ror#14 + and w17,w23,w22 + bic w19,w24,w22 + add w25,w25,w13 // h+=X[i] + orr w17,w17,w19 // Ch(e,f,g) + eor w19,w26,w27 // a^b, b^c in next round + eor w16,w16,w0,ror#11 // Sigma1(e) + ror w0,w26,#2 + add w25,w25,w17 // h+=Ch(e,f,g) + eor w17,w26,w26,ror#9 + add w25,w25,w16 // h+=Sigma1(e) + and w28,w28,w19 // (b^c)&=(a^b) + add w21,w21,w25 // d+=h + eor w28,w28,w27 // Maj(a,b,c) + eor w17,w0,w17,ror#13 // Sigma0(a) + add w25,w25,w28 // h+=Maj(a,b,c) + ldr w28,[x30],#4 // *K++, w19 in next round + //add w25,w25,w17 // h+=Sigma0(a) +#ifndef __ARMEB__ + rev w14,w14 // 11 +#endif + ldp w15,w0,[x1],#2*4 + add w25,w25,w17 // h+=Sigma0(a) + str w6,[sp,#12] + ror w16,w21,#6 + add w24,w24,w28 // h+=K[i] + eor w6,w21,w21,ror#14 + and w17,w22,w21 + bic w28,w23,w21 + add w24,w24,w14 // h+=X[i] + orr w17,w17,w28 // Ch(e,f,g) + eor w28,w25,w26 // a^b, b^c in next round + eor w16,w16,w6,ror#11 // Sigma1(e) + ror w6,w25,#2 + add w24,w24,w17 // h+=Ch(e,f,g) + eor w17,w25,w25,ror#9 + add w24,w24,w16 // h+=Sigma1(e) + and w19,w19,w28 // (b^c)&=(a^b) + add w20,w20,w24 // d+=h + eor w19,w19,w26 // Maj(a,b,c) + eor w17,w6,w17,ror#13 // Sigma0(a) + add w24,w24,w19 // h+=Maj(a,b,c) + ldr w19,[x30],#4 // *K++, w28 in next round + //add w24,w24,w17 // h+=Sigma0(a) +#ifndef __ARMEB__ + rev w15,w15 // 12 +#endif + add w24,w24,w17 // h+=Sigma0(a) + str w7,[sp,#0] + ror w16,w20,#6 + add w23,w23,w19 // h+=K[i] + eor w7,w20,w20,ror#14 + and w17,w21,w20 + bic w19,w22,w20 + add w23,w23,w15 // h+=X[i] + orr w17,w17,w19 // Ch(e,f,g) + eor w19,w24,w25 // a^b, b^c in next round + eor w16,w16,w7,ror#11 // Sigma1(e) + ror w7,w24,#2 + add w23,w23,w17 // h+=Ch(e,f,g) + eor w17,w24,w24,ror#9 + add w23,w23,w16 // h+=Sigma1(e) + and w28,w28,w19 // (b^c)&=(a^b) + add w27,w27,w23 // d+=h + eor w28,w28,w25 // Maj(a,b,c) + eor w17,w7,w17,ror#13 // Sigma0(a) + add w23,w23,w28 // h+=Maj(a,b,c) + ldr w28,[x30],#4 // *K++, w19 in next round + //add w23,w23,w17 // h+=Sigma0(a) +#ifndef __ARMEB__ + rev w0,w0 // 13 +#endif + ldp w1,w2,[x1] + add w23,w23,w17 // h+=Sigma0(a) + str w8,[sp,#4] + ror w16,w27,#6 + add w22,w22,w28 // h+=K[i] + eor w8,w27,w27,ror#14 + and w17,w20,w27 + bic w28,w21,w27 + add w22,w22,w0 // h+=X[i] + orr w17,w17,w28 // Ch(e,f,g) + eor w28,w23,w24 // a^b, b^c in next round + eor w16,w16,w8,ror#11 // Sigma1(e) + ror w8,w23,#2 + add w22,w22,w17 // h+=Ch(e,f,g) + eor w17,w23,w23,ror#9 + add w22,w22,w16 // h+=Sigma1(e) + and w19,w19,w28 // (b^c)&=(a^b) + add w26,w26,w22 // d+=h + eor w19,w19,w24 // Maj(a,b,c) + eor w17,w8,w17,ror#13 // Sigma0(a) + add w22,w22,w19 // h+=Maj(a,b,c) + ldr w19,[x30],#4 // *K++, w28 in next round + //add w22,w22,w17 // h+=Sigma0(a) +#ifndef __ARMEB__ + rev w1,w1 // 14 +#endif + ldr w6,[sp,#12] + add w22,w22,w17 // h+=Sigma0(a) + str w9,[sp,#8] + ror w16,w26,#6 + add w21,w21,w19 // h+=K[i] + eor w9,w26,w26,ror#14 + and w17,w27,w26 + bic w19,w20,w26 + add w21,w21,w1 // h+=X[i] + orr w17,w17,w19 // Ch(e,f,g) + eor w19,w22,w23 // a^b, b^c in next round + eor w16,w16,w9,ror#11 // Sigma1(e) + ror w9,w22,#2 + add w21,w21,w17 // h+=Ch(e,f,g) + eor w17,w22,w22,ror#9 + add w21,w21,w16 // h+=Sigma1(e) + and w28,w28,w19 // (b^c)&=(a^b) + add w25,w25,w21 // d+=h + eor w28,w28,w23 // Maj(a,b,c) + eor w17,w9,w17,ror#13 // Sigma0(a) + add w21,w21,w28 // h+=Maj(a,b,c) + ldr w28,[x30],#4 // *K++, w19 in next round + //add w21,w21,w17 // h+=Sigma0(a) +#ifndef __ARMEB__ + rev w2,w2 // 15 +#endif + ldr w7,[sp,#0] + add w21,w21,w17 // h+=Sigma0(a) + str w10,[sp,#12] + ror w16,w25,#6 + add w20,w20,w28 // h+=K[i] + ror w9,w4,#7 + and w17,w26,w25 + ror w8,w1,#17 + bic w28,w27,w25 + ror w10,w21,#2 + add w20,w20,w2 // h+=X[i] + eor w16,w16,w25,ror#11 + eor w9,w9,w4,ror#18 + orr w17,w17,w28 // Ch(e,f,g) + eor w28,w21,w22 // a^b, b^c in next round + eor w16,w16,w25,ror#25 // Sigma1(e) + eor w10,w10,w21,ror#13 + add w20,w20,w17 // h+=Ch(e,f,g) + and w19,w19,w28 // (b^c)&=(a^b) + eor w8,w8,w1,ror#19 + eor w9,w9,w4,lsr#3 // sigma0(X[i+1]) + add w20,w20,w16 // h+=Sigma1(e) + eor w19,w19,w22 // Maj(a,b,c) + eor w17,w10,w21,ror#22 // Sigma0(a) + eor w8,w8,w1,lsr#10 // sigma1(X[i+14]) + add w3,w3,w12 + add w24,w24,w20 // d+=h + add w20,w20,w19 // h+=Maj(a,b,c) + ldr w19,[x30],#4 // *K++, w28 in next round + add w3,w3,w9 + add w20,w20,w17 // h+=Sigma0(a) + add w3,w3,w8 +.Loop_16_xx: + ldr w8,[sp,#4] + str w11,[sp,#0] + ror w16,w24,#6 + add w27,w27,w19 // h+=K[i] + ror w10,w5,#7 + and w17,w25,w24 + ror w9,w2,#17 + bic w19,w26,w24 + ror w11,w20,#2 + add w27,w27,w3 // h+=X[i] + eor w16,w16,w24,ror#11 + eor w10,w10,w5,ror#18 + orr w17,w17,w19 // Ch(e,f,g) + eor w19,w20,w21 // a^b, b^c in next round + eor w16,w16,w24,ror#25 // Sigma1(e) + eor w11,w11,w20,ror#13 + add w27,w27,w17 // h+=Ch(e,f,g) + and w28,w28,w19 // (b^c)&=(a^b) + eor w9,w9,w2,ror#19 + eor w10,w10,w5,lsr#3 // sigma0(X[i+1]) + add w27,w27,w16 // h+=Sigma1(e) + eor w28,w28,w21 // Maj(a,b,c) + eor w17,w11,w20,ror#22 // Sigma0(a) + eor w9,w9,w2,lsr#10 // sigma1(X[i+14]) + add w4,w4,w13 + add w23,w23,w27 // d+=h + add w27,w27,w28 // h+=Maj(a,b,c) + ldr w28,[x30],#4 // *K++, w19 in next round + add w4,w4,w10 + add w27,w27,w17 // h+=Sigma0(a) + add w4,w4,w9 + ldr w9,[sp,#8] + str w12,[sp,#4] + ror w16,w23,#6 + add w26,w26,w28 // h+=K[i] + ror w11,w6,#7 + and w17,w24,w23 + ror w10,w3,#17 + bic w28,w25,w23 + ror w12,w27,#2 + add w26,w26,w4 // h+=X[i] + eor w16,w16,w23,ror#11 + eor w11,w11,w6,ror#18 + orr w17,w17,w28 // Ch(e,f,g) + eor w28,w27,w20 // a^b, b^c in next round + eor w16,w16,w23,ror#25 // Sigma1(e) + eor w12,w12,w27,ror#13 + add w26,w26,w17 // h+=Ch(e,f,g) + and w19,w19,w28 // (b^c)&=(a^b) + eor w10,w10,w3,ror#19 + eor w11,w11,w6,lsr#3 // sigma0(X[i+1]) + add w26,w26,w16 // h+=Sigma1(e) + eor w19,w19,w20 // Maj(a,b,c) + eor w17,w12,w27,ror#22 // Sigma0(a) + eor w10,w10,w3,lsr#10 // sigma1(X[i+14]) + add w5,w5,w14 + add w22,w22,w26 // d+=h + add w26,w26,w19 // h+=Maj(a,b,c) + ldr w19,[x30],#4 // *K++, w28 in next round + add w5,w5,w11 + add w26,w26,w17 // h+=Sigma0(a) + add w5,w5,w10 + ldr w10,[sp,#12] + str w13,[sp,#8] + ror w16,w22,#6 + add w25,w25,w19 // h+=K[i] + ror w12,w7,#7 + and w17,w23,w22 + ror w11,w4,#17 + bic w19,w24,w22 + ror w13,w26,#2 + add w25,w25,w5 // h+=X[i] + eor w16,w16,w22,ror#11 + eor w12,w12,w7,ror#18 + orr w17,w17,w19 // Ch(e,f,g) + eor w19,w26,w27 // a^b, b^c in next round + eor w16,w16,w22,ror#25 // Sigma1(e) + eor w13,w13,w26,ror#13 + add w25,w25,w17 // h+=Ch(e,f,g) + and w28,w28,w19 // (b^c)&=(a^b) + eor w11,w11,w4,ror#19 + eor w12,w12,w7,lsr#3 // sigma0(X[i+1]) + add w25,w25,w16 // h+=Sigma1(e) + eor w28,w28,w27 // Maj(a,b,c) + eor w17,w13,w26,ror#22 // Sigma0(a) + eor w11,w11,w4,lsr#10 // sigma1(X[i+14]) + add w6,w6,w15 + add w21,w21,w25 // d+=h + add w25,w25,w28 // h+=Maj(a,b,c) + ldr w28,[x30],#4 // *K++, w19 in next round + add w6,w6,w12 + add w25,w25,w17 // h+=Sigma0(a) + add w6,w6,w11 + ldr w11,[sp,#0] + str w14,[sp,#12] + ror w16,w21,#6 + add w24,w24,w28 // h+=K[i] + ror w13,w8,#7 + and w17,w22,w21 + ror w12,w5,#17 + bic w28,w23,w21 + ror w14,w25,#2 + add w24,w24,w6 // h+=X[i] + eor w16,w16,w21,ror#11 + eor w13,w13,w8,ror#18 + orr w17,w17,w28 // Ch(e,f,g) + eor w28,w25,w26 // a^b, b^c in next round + eor w16,w16,w21,ror#25 // Sigma1(e) + eor w14,w14,w25,ror#13 + add w24,w24,w17 // h+=Ch(e,f,g) + and w19,w19,w28 // (b^c)&=(a^b) + eor w12,w12,w5,ror#19 + eor w13,w13,w8,lsr#3 // sigma0(X[i+1]) + add w24,w24,w16 // h+=Sigma1(e) + eor w19,w19,w26 // Maj(a,b,c) + eor w17,w14,w25,ror#22 // Sigma0(a) + eor w12,w12,w5,lsr#10 // sigma1(X[i+14]) + add w7,w7,w0 + add w20,w20,w24 // d+=h + add w24,w24,w19 // h+=Maj(a,b,c) + ldr w19,[x30],#4 // *K++, w28 in next round + add w7,w7,w13 + add w24,w24,w17 // h+=Sigma0(a) + add w7,w7,w12 + ldr w12,[sp,#4] + str w15,[sp,#0] + ror w16,w20,#6 + add w23,w23,w19 // h+=K[i] + ror w14,w9,#7 + and w17,w21,w20 + ror w13,w6,#17 + bic w19,w22,w20 + ror w15,w24,#2 + add w23,w23,w7 // h+=X[i] + eor w16,w16,w20,ror#11 + eor w14,w14,w9,ror#18 + orr w17,w17,w19 // Ch(e,f,g) + eor w19,w24,w25 // a^b, b^c in next round + eor w16,w16,w20,ror#25 // Sigma1(e) + eor w15,w15,w24,ror#13 + add w23,w23,w17 // h+=Ch(e,f,g) + and w28,w28,w19 // (b^c)&=(a^b) + eor w13,w13,w6,ror#19 + eor w14,w14,w9,lsr#3 // sigma0(X[i+1]) + add w23,w23,w16 // h+=Sigma1(e) + eor w28,w28,w25 // Maj(a,b,c) + eor w17,w15,w24,ror#22 // Sigma0(a) + eor w13,w13,w6,lsr#10 // sigma1(X[i+14]) + add w8,w8,w1 + add w27,w27,w23 // d+=h + add w23,w23,w28 // h+=Maj(a,b,c) + ldr w28,[x30],#4 // *K++, w19 in next round + add w8,w8,w14 + add w23,w23,w17 // h+=Sigma0(a) + add w8,w8,w13 + ldr w13,[sp,#8] + str w0,[sp,#4] + ror w16,w27,#6 + add w22,w22,w28 // h+=K[i] + ror w15,w10,#7 + and w17,w20,w27 + ror w14,w7,#17 + bic w28,w21,w27 + ror w0,w23,#2 + add w22,w22,w8 // h+=X[i] + eor w16,w16,w27,ror#11 + eor w15,w15,w10,ror#18 + orr w17,w17,w28 // Ch(e,f,g) + eor w28,w23,w24 // a^b, b^c in next round + eor w16,w16,w27,ror#25 // Sigma1(e) + eor w0,w0,w23,ror#13 + add w22,w22,w17 // h+=Ch(e,f,g) + and w19,w19,w28 // (b^c)&=(a^b) + eor w14,w14,w7,ror#19 + eor w15,w15,w10,lsr#3 // sigma0(X[i+1]) + add w22,w22,w16 // h+=Sigma1(e) + eor w19,w19,w24 // Maj(a,b,c) + eor w17,w0,w23,ror#22 // Sigma0(a) + eor w14,w14,w7,lsr#10 // sigma1(X[i+14]) + add w9,w9,w2 + add w26,w26,w22 // d+=h + add w22,w22,w19 // h+=Maj(a,b,c) + ldr w19,[x30],#4 // *K++, w28 in next round + add w9,w9,w15 + add w22,w22,w17 // h+=Sigma0(a) + add w9,w9,w14 + ldr w14,[sp,#12] + str w1,[sp,#8] + ror w16,w26,#6 + add w21,w21,w19 // h+=K[i] + ror w0,w11,#7 + and w17,w27,w26 + ror w15,w8,#17 + bic w19,w20,w26 + ror w1,w22,#2 + add w21,w21,w9 // h+=X[i] + eor w16,w16,w26,ror#11 + eor w0,w0,w11,ror#18 + orr w17,w17,w19 // Ch(e,f,g) + eor w19,w22,w23 // a^b, b^c in next round + eor w16,w16,w26,ror#25 // Sigma1(e) + eor w1,w1,w22,ror#13 + add w21,w21,w17 // h+=Ch(e,f,g) + and w28,w28,w19 // (b^c)&=(a^b) + eor w15,w15,w8,ror#19 + eor w0,w0,w11,lsr#3 // sigma0(X[i+1]) + add w21,w21,w16 // h+=Sigma1(e) + eor w28,w28,w23 // Maj(a,b,c) + eor w17,w1,w22,ror#22 // Sigma0(a) + eor w15,w15,w8,lsr#10 // sigma1(X[i+14]) + add w10,w10,w3 + add w25,w25,w21 // d+=h + add w21,w21,w28 // h+=Maj(a,b,c) + ldr w28,[x30],#4 // *K++, w19 in next round + add w10,w10,w0 + add w21,w21,w17 // h+=Sigma0(a) + add w10,w10,w15 + ldr w15,[sp,#0] + str w2,[sp,#12] + ror w16,w25,#6 + add w20,w20,w28 // h+=K[i] + ror w1,w12,#7 + and w17,w26,w25 + ror w0,w9,#17 + bic w28,w27,w25 + ror w2,w21,#2 + add w20,w20,w10 // h+=X[i] + eor w16,w16,w25,ror#11 + eor w1,w1,w12,ror#18 + orr w17,w17,w28 // Ch(e,f,g) + eor w28,w21,w22 // a^b, b^c in next round + eor w16,w16,w25,ror#25 // Sigma1(e) + eor w2,w2,w21,ror#13 + add w20,w20,w17 // h+=Ch(e,f,g) + and w19,w19,w28 // (b^c)&=(a^b) + eor w0,w0,w9,ror#19 + eor w1,w1,w12,lsr#3 // sigma0(X[i+1]) + add w20,w20,w16 // h+=Sigma1(e) + eor w19,w19,w22 // Maj(a,b,c) + eor w17,w2,w21,ror#22 // Sigma0(a) + eor w0,w0,w9,lsr#10 // sigma1(X[i+14]) + add w11,w11,w4 + add w24,w24,w20 // d+=h + add w20,w20,w19 // h+=Maj(a,b,c) + ldr w19,[x30],#4 // *K++, w28 in next round + add w11,w11,w1 + add w20,w20,w17 // h+=Sigma0(a) + add w11,w11,w0 + ldr w0,[sp,#4] + str w3,[sp,#0] + ror w16,w24,#6 + add w27,w27,w19 // h+=K[i] + ror w2,w13,#7 + and w17,w25,w24 + ror w1,w10,#17 + bic w19,w26,w24 + ror w3,w20,#2 + add w27,w27,w11 // h+=X[i] + eor w16,w16,w24,ror#11 + eor w2,w2,w13,ror#18 + orr w17,w17,w19 // Ch(e,f,g) + eor w19,w20,w21 // a^b, b^c in next round + eor w16,w16,w24,ror#25 // Sigma1(e) + eor w3,w3,w20,ror#13 + add w27,w27,w17 // h+=Ch(e,f,g) + and w28,w28,w19 // (b^c)&=(a^b) + eor w1,w1,w10,ror#19 + eor w2,w2,w13,lsr#3 // sigma0(X[i+1]) + add w27,w27,w16 // h+=Sigma1(e) + eor w28,w28,w21 // Maj(a,b,c) + eor w17,w3,w20,ror#22 // Sigma0(a) + eor w1,w1,w10,lsr#10 // sigma1(X[i+14]) + add w12,w12,w5 + add w23,w23,w27 // d+=h + add w27,w27,w28 // h+=Maj(a,b,c) + ldr w28,[x30],#4 // *K++, w19 in next round + add w12,w12,w2 + add w27,w27,w17 // h+=Sigma0(a) + add w12,w12,w1 + ldr w1,[sp,#8] + str w4,[sp,#4] + ror w16,w23,#6 + add w26,w26,w28 // h+=K[i] + ror w3,w14,#7 + and w17,w24,w23 + ror w2,w11,#17 + bic w28,w25,w23 + ror w4,w27,#2 + add w26,w26,w12 // h+=X[i] + eor w16,w16,w23,ror#11 + eor w3,w3,w14,ror#18 + orr w17,w17,w28 // Ch(e,f,g) + eor w28,w27,w20 // a^b, b^c in next round + eor w16,w16,w23,ror#25 // Sigma1(e) + eor w4,w4,w27,ror#13 + add w26,w26,w17 // h+=Ch(e,f,g) + and w19,w19,w28 // (b^c)&=(a^b) + eor w2,w2,w11,ror#19 + eor w3,w3,w14,lsr#3 // sigma0(X[i+1]) + add w26,w26,w16 // h+=Sigma1(e) + eor w19,w19,w20 // Maj(a,b,c) + eor w17,w4,w27,ror#22 // Sigma0(a) + eor w2,w2,w11,lsr#10 // sigma1(X[i+14]) + add w13,w13,w6 + add w22,w22,w26 // d+=h + add w26,w26,w19 // h+=Maj(a,b,c) + ldr w19,[x30],#4 // *K++, w28 in next round + add w13,w13,w3 + add w26,w26,w17 // h+=Sigma0(a) + add w13,w13,w2 + ldr w2,[sp,#12] + str w5,[sp,#8] + ror w16,w22,#6 + add w25,w25,w19 // h+=K[i] + ror w4,w15,#7 + and w17,w23,w22 + ror w3,w12,#17 + bic w19,w24,w22 + ror w5,w26,#2 + add w25,w25,w13 // h+=X[i] + eor w16,w16,w22,ror#11 + eor w4,w4,w15,ror#18 + orr w17,w17,w19 // Ch(e,f,g) + eor w19,w26,w27 // a^b, b^c in next round + eor w16,w16,w22,ror#25 // Sigma1(e) + eor w5,w5,w26,ror#13 + add w25,w25,w17 // h+=Ch(e,f,g) + and w28,w28,w19 // (b^c)&=(a^b) + eor w3,w3,w12,ror#19 + eor w4,w4,w15,lsr#3 // sigma0(X[i+1]) + add w25,w25,w16 // h+=Sigma1(e) + eor w28,w28,w27 // Maj(a,b,c) + eor w17,w5,w26,ror#22 // Sigma0(a) + eor w3,w3,w12,lsr#10 // sigma1(X[i+14]) + add w14,w14,w7 + add w21,w21,w25 // d+=h + add w25,w25,w28 // h+=Maj(a,b,c) + ldr w28,[x30],#4 // *K++, w19 in next round + add w14,w14,w4 + add w25,w25,w17 // h+=Sigma0(a) + add w14,w14,w3 + ldr w3,[sp,#0] + str w6,[sp,#12] + ror w16,w21,#6 + add w24,w24,w28 // h+=K[i] + ror w5,w0,#7 + and w17,w22,w21 + ror w4,w13,#17 + bic w28,w23,w21 + ror w6,w25,#2 + add w24,w24,w14 // h+=X[i] + eor w16,w16,w21,ror#11 + eor w5,w5,w0,ror#18 + orr w17,w17,w28 // Ch(e,f,g) + eor w28,w25,w26 // a^b, b^c in next round + eor w16,w16,w21,ror#25 // Sigma1(e) + eor w6,w6,w25,ror#13 + add w24,w24,w17 // h+=Ch(e,f,g) + and w19,w19,w28 // (b^c)&=(a^b) + eor w4,w4,w13,ror#19 + eor w5,w5,w0,lsr#3 // sigma0(X[i+1]) + add w24,w24,w16 // h+=Sigma1(e) + eor w19,w19,w26 // Maj(a,b,c) + eor w17,w6,w25,ror#22 // Sigma0(a) + eor w4,w4,w13,lsr#10 // sigma1(X[i+14]) + add w15,w15,w8 + add w20,w20,w24 // d+=h + add w24,w24,w19 // h+=Maj(a,b,c) + ldr w19,[x30],#4 // *K++, w28 in next round + add w15,w15,w5 + add w24,w24,w17 // h+=Sigma0(a) + add w15,w15,w4 + ldr w4,[sp,#4] + str w7,[sp,#0] + ror w16,w20,#6 + add w23,w23,w19 // h+=K[i] + ror w6,w1,#7 + and w17,w21,w20 + ror w5,w14,#17 + bic w19,w22,w20 + ror w7,w24,#2 + add w23,w23,w15 // h+=X[i] + eor w16,w16,w20,ror#11 + eor w6,w6,w1,ror#18 + orr w17,w17,w19 // Ch(e,f,g) + eor w19,w24,w25 // a^b, b^c in next round + eor w16,w16,w20,ror#25 // Sigma1(e) + eor w7,w7,w24,ror#13 + add w23,w23,w17 // h+=Ch(e,f,g) + and w28,w28,w19 // (b^c)&=(a^b) + eor w5,w5,w14,ror#19 + eor w6,w6,w1,lsr#3 // sigma0(X[i+1]) + add w23,w23,w16 // h+=Sigma1(e) + eor w28,w28,w25 // Maj(a,b,c) + eor w17,w7,w24,ror#22 // Sigma0(a) + eor w5,w5,w14,lsr#10 // sigma1(X[i+14]) + add w0,w0,w9 + add w27,w27,w23 // d+=h + add w23,w23,w28 // h+=Maj(a,b,c) + ldr w28,[x30],#4 // *K++, w19 in next round + add w0,w0,w6 + add w23,w23,w17 // h+=Sigma0(a) + add w0,w0,w5 + ldr w5,[sp,#8] + str w8,[sp,#4] + ror w16,w27,#6 + add w22,w22,w28 // h+=K[i] + ror w7,w2,#7 + and w17,w20,w27 + ror w6,w15,#17 + bic w28,w21,w27 + ror w8,w23,#2 + add w22,w22,w0 // h+=X[i] + eor w16,w16,w27,ror#11 + eor w7,w7,w2,ror#18 + orr w17,w17,w28 // Ch(e,f,g) + eor w28,w23,w24 // a^b, b^c in next round + eor w16,w16,w27,ror#25 // Sigma1(e) + eor w8,w8,w23,ror#13 + add w22,w22,w17 // h+=Ch(e,f,g) + and w19,w19,w28 // (b^c)&=(a^b) + eor w6,w6,w15,ror#19 + eor w7,w7,w2,lsr#3 // sigma0(X[i+1]) + add w22,w22,w16 // h+=Sigma1(e) + eor w19,w19,w24 // Maj(a,b,c) + eor w17,w8,w23,ror#22 // Sigma0(a) + eor w6,w6,w15,lsr#10 // sigma1(X[i+14]) + add w1,w1,w10 + add w26,w26,w22 // d+=h + add w22,w22,w19 // h+=Maj(a,b,c) + ldr w19,[x30],#4 // *K++, w28 in next round + add w1,w1,w7 + add w22,w22,w17 // h+=Sigma0(a) + add w1,w1,w6 + ldr w6,[sp,#12] + str w9,[sp,#8] + ror w16,w26,#6 + add w21,w21,w19 // h+=K[i] + ror w8,w3,#7 + and w17,w27,w26 + ror w7,w0,#17 + bic w19,w20,w26 + ror w9,w22,#2 + add w21,w21,w1 // h+=X[i] + eor w16,w16,w26,ror#11 + eor w8,w8,w3,ror#18 + orr w17,w17,w19 // Ch(e,f,g) + eor w19,w22,w23 // a^b, b^c in next round + eor w16,w16,w26,ror#25 // Sigma1(e) + eor w9,w9,w22,ror#13 + add w21,w21,w17 // h+=Ch(e,f,g) + and w28,w28,w19 // (b^c)&=(a^b) + eor w7,w7,w0,ror#19 + eor w8,w8,w3,lsr#3 // sigma0(X[i+1]) + add w21,w21,w16 // h+=Sigma1(e) + eor w28,w28,w23 // Maj(a,b,c) + eor w17,w9,w22,ror#22 // Sigma0(a) + eor w7,w7,w0,lsr#10 // sigma1(X[i+14]) + add w2,w2,w11 + add w25,w25,w21 // d+=h + add w21,w21,w28 // h+=Maj(a,b,c) + ldr w28,[x30],#4 // *K++, w19 in next round + add w2,w2,w8 + add w21,w21,w17 // h+=Sigma0(a) + add w2,w2,w7 + ldr w7,[sp,#0] + str w10,[sp,#12] + ror w16,w25,#6 + add w20,w20,w28 // h+=K[i] + ror w9,w4,#7 + and w17,w26,w25 + ror w8,w1,#17 + bic w28,w27,w25 + ror w10,w21,#2 + add w20,w20,w2 // h+=X[i] + eor w16,w16,w25,ror#11 + eor w9,w9,w4,ror#18 + orr w17,w17,w28 // Ch(e,f,g) + eor w28,w21,w22 // a^b, b^c in next round + eor w16,w16,w25,ror#25 // Sigma1(e) + eor w10,w10,w21,ror#13 + add w20,w20,w17 // h+=Ch(e,f,g) + and w19,w19,w28 // (b^c)&=(a^b) + eor w8,w8,w1,ror#19 + eor w9,w9,w4,lsr#3 // sigma0(X[i+1]) + add w20,w20,w16 // h+=Sigma1(e) + eor w19,w19,w22 // Maj(a,b,c) + eor w17,w10,w21,ror#22 // Sigma0(a) + eor w8,w8,w1,lsr#10 // sigma1(X[i+14]) + add w3,w3,w12 + add w24,w24,w20 // d+=h + add w20,w20,w19 // h+=Maj(a,b,c) + ldr w19,[x30],#4 // *K++, w28 in next round + add w3,w3,w9 + add w20,w20,w17 // h+=Sigma0(a) + add w3,w3,w8 + cbnz w19,.Loop_16_xx + + ldp x0,x2,[x29,#96] + ldr x1,[x29,#112] + sub x30,x30,#260 // rewind + + ldp w3,w4,[x0] + ldp w5,w6,[x0,#2*4] + add x1,x1,#14*4 // advance input pointer + ldp w7,w8,[x0,#4*4] + add w20,w20,w3 + ldp w9,w10,[x0,#6*4] + add w21,w21,w4 + add w22,w22,w5 + add w23,w23,w6 + stp w20,w21,[x0] + add w24,w24,w7 + add w25,w25,w8 + stp w22,w23,[x0,#2*4] + add w26,w26,w9 + add w27,w27,w10 + cmp x1,x2 + stp w24,w25,[x0,#4*4] + stp w26,w27,[x0,#6*4] + b.ne .Loop + + ldp x19,x20,[x29,#16] + add sp,sp,#4*4 + ldp x21,x22,[x29,#32] + ldp x23,x24,[x29,#48] + ldp x25,x26,[x29,#64] + ldp x27,x28,[x29,#80] + ldp x29,x30,[sp],#128 + ret +.size sha256_block_data_order,.-sha256_block_data_order + +.align 6 +.type K256,%object +K256: + .long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 + .long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 + .long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 + .long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 + .long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc + .long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da + .long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 + .long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967 + .long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 + .long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 + .long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 + .long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070 + .long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 + .long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 + .long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 + .long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 + .long 0 //terminator +.size K256,.-K256 +.align 3 +.LOPENSSL_armcap_P: + .quad OPENSSL_armcap_P-. +.asciz "SHA256 block transform for ARMv8, CRYPTOGAMS by " +.align 2 +.type sha256_block_armv8,%function +.align 6 +sha256_block_armv8: +.Lv8_entry: + stp x29,x30,[sp,#-16]! + add x29,sp,#0 + + ld1 {v0.4s,v1.4s},[x0] + adr x3,K256 + +.Loop_hw: + ld1 {v4.16b-v7.16b},[x1],#64 + sub x2,x2,#1 + ld1 {v16.4s},[x3],#16 + rev32 v4.16b,v4.16b + rev32 v5.16b,v5.16b + rev32 v6.16b,v6.16b + rev32 v7.16b,v7.16b + orr v18.16b,v0.16b,v0.16b // offload + orr v19.16b,v1.16b,v1.16b + ld1 {v17.4s},[x3],#16 + add v16.4s,v16.4s,v4.4s + .inst 0x5e2828a4 //sha256su0 v4.16b,v5.16b + orr v2.16b,v0.16b,v0.16b + .inst 0x5e104020 //sha256h v0.16b,v1.16b,v16.4s + .inst 0x5e105041 //sha256h2 v1.16b,v2.16b,v16.4s + .inst 0x5e0760c4 //sha256su1 v4.16b,v6.16b,v7.16b + ld1 {v16.4s},[x3],#16 + add v17.4s,v17.4s,v5.4s + .inst 0x5e2828c5 //sha256su0 v5.16b,v6.16b + orr v2.16b,v0.16b,v0.16b + .inst 0x5e114020 //sha256h v0.16b,v1.16b,v17.4s + .inst 0x5e115041 //sha256h2 v1.16b,v2.16b,v17.4s + .inst 0x5e0460e5 //sha256su1 v5.16b,v7.16b,v4.16b + ld1 {v17.4s},[x3],#16 + add v16.4s,v16.4s,v6.4s + .inst 0x5e2828e6 //sha256su0 v6.16b,v7.16b + orr v2.16b,v0.16b,v0.16b + .inst 0x5e104020 //sha256h v0.16b,v1.16b,v16.4s + .inst 0x5e105041 //sha256h2 v1.16b,v2.16b,v16.4s + .inst 0x5e056086 //sha256su1 v6.16b,v4.16b,v5.16b + ld1 {v16.4s},[x3],#16 + add v17.4s,v17.4s,v7.4s + .inst 0x5e282887 //sha256su0 v7.16b,v4.16b + orr v2.16b,v0.16b,v0.16b + .inst 0x5e114020 //sha256h v0.16b,v1.16b,v17.4s + .inst 0x5e115041 //sha256h2 v1.16b,v2.16b,v17.4s + .inst 0x5e0660a7 //sha256su1 v7.16b,v5.16b,v6.16b + ld1 {v17.4s},[x3],#16 + add v16.4s,v16.4s,v4.4s + .inst 0x5e2828a4 //sha256su0 v4.16b,v5.16b + orr v2.16b,v0.16b,v0.16b + .inst 0x5e104020 //sha256h v0.16b,v1.16b,v16.4s + .inst 0x5e105041 //sha256h2 v1.16b,v2.16b,v16.4s + .inst 0x5e0760c4 //sha256su1 v4.16b,v6.16b,v7.16b + ld1 {v16.4s},[x3],#16 + add v17.4s,v17.4s,v5.4s + .inst 0x5e2828c5 //sha256su0 v5.16b,v6.16b + orr v2.16b,v0.16b,v0.16b + .inst 0x5e114020 //sha256h v0.16b,v1.16b,v17.4s + .inst 0x5e115041 //sha256h2 v1.16b,v2.16b,v17.4s + .inst 0x5e0460e5 //sha256su1 v5.16b,v7.16b,v4.16b + ld1 {v17.4s},[x3],#16 + add v16.4s,v16.4s,v6.4s + .inst 0x5e2828e6 //sha256su0 v6.16b,v7.16b + orr v2.16b,v0.16b,v0.16b + .inst 0x5e104020 //sha256h v0.16b,v1.16b,v16.4s + .inst 0x5e105041 //sha256h2 v1.16b,v2.16b,v16.4s + .inst 0x5e056086 //sha256su1 v6.16b,v4.16b,v5.16b + ld1 {v16.4s},[x3],#16 + add v17.4s,v17.4s,v7.4s + .inst 0x5e282887 //sha256su0 v7.16b,v4.16b + orr v2.16b,v0.16b,v0.16b + .inst 0x5e114020 //sha256h v0.16b,v1.16b,v17.4s + .inst 0x5e115041 //sha256h2 v1.16b,v2.16b,v17.4s + .inst 0x5e0660a7 //sha256su1 v7.16b,v5.16b,v6.16b + ld1 {v17.4s},[x3],#16 + add v16.4s,v16.4s,v4.4s + .inst 0x5e2828a4 //sha256su0 v4.16b,v5.16b + orr v2.16b,v0.16b,v0.16b + .inst 0x5e104020 //sha256h v0.16b,v1.16b,v16.4s + .inst 0x5e105041 //sha256h2 v1.16b,v2.16b,v16.4s + .inst 0x5e0760c4 //sha256su1 v4.16b,v6.16b,v7.16b + ld1 {v16.4s},[x3],#16 + add v17.4s,v17.4s,v5.4s + .inst 0x5e2828c5 //sha256su0 v5.16b,v6.16b + orr v2.16b,v0.16b,v0.16b + .inst 0x5e114020 //sha256h v0.16b,v1.16b,v17.4s + .inst 0x5e115041 //sha256h2 v1.16b,v2.16b,v17.4s + .inst 0x5e0460e5 //sha256su1 v5.16b,v7.16b,v4.16b + ld1 {v17.4s},[x3],#16 + add v16.4s,v16.4s,v6.4s + .inst 0x5e2828e6 //sha256su0 v6.16b,v7.16b + orr v2.16b,v0.16b,v0.16b + .inst 0x5e104020 //sha256h v0.16b,v1.16b,v16.4s + .inst 0x5e105041 //sha256h2 v1.16b,v2.16b,v16.4s + .inst 0x5e056086 //sha256su1 v6.16b,v4.16b,v5.16b + ld1 {v16.4s},[x3],#16 + add v17.4s,v17.4s,v7.4s + .inst 0x5e282887 //sha256su0 v7.16b,v4.16b + orr v2.16b,v0.16b,v0.16b + .inst 0x5e114020 //sha256h v0.16b,v1.16b,v17.4s + .inst 0x5e115041 //sha256h2 v1.16b,v2.16b,v17.4s + .inst 0x5e0660a7 //sha256su1 v7.16b,v5.16b,v6.16b + ld1 {v17.4s},[x3],#16 + add v16.4s,v16.4s,v4.4s + orr v2.16b,v0.16b,v0.16b + .inst 0x5e104020 //sha256h v0.16b,v1.16b,v16.4s + .inst 0x5e105041 //sha256h2 v1.16b,v2.16b,v16.4s + + ld1 {v16.4s},[x3],#16 + add v17.4s,v17.4s,v5.4s + orr v2.16b,v0.16b,v0.16b + .inst 0x5e114020 //sha256h v0.16b,v1.16b,v17.4s + .inst 0x5e115041 //sha256h2 v1.16b,v2.16b,v17.4s + + ld1 {v17.4s},[x3] + add v16.4s,v16.4s,v6.4s + sub x3,x3,#64*4-16 // rewind + orr v2.16b,v0.16b,v0.16b + .inst 0x5e104020 //sha256h v0.16b,v1.16b,v16.4s + .inst 0x5e105041 //sha256h2 v1.16b,v2.16b,v16.4s + + add v17.4s,v17.4s,v7.4s + orr v2.16b,v0.16b,v0.16b + .inst 0x5e114020 //sha256h v0.16b,v1.16b,v17.4s + .inst 0x5e115041 //sha256h2 v1.16b,v2.16b,v17.4s + + add v0.4s,v0.4s,v18.4s + add v1.4s,v1.4s,v19.4s + + cbnz x2,.Loop_hw + + st1 {v0.4s,v1.4s},[x0] + + ldr x29,[sp],#16 + ret +.size sha256_block_armv8,.-sha256_block_armv8 +.comm OPENSSL_armcap_P,4,4 diff --git a/deps/openssl/asm_obsolete/arm64-linux64-gas/sha/sha512-armv8.S b/deps/openssl/asm_obsolete/arm64-linux64-gas/sha/sha512-armv8.S new file mode 100644 index 00000000000000..6b0d1940c6bef1 --- /dev/null +++ b/deps/openssl/asm_obsolete/arm64-linux64-gas/sha/sha512-armv8.S @@ -0,0 +1,1021 @@ +#include "arm_arch.h" + +.text + +.globl sha512_block_data_order +.type sha512_block_data_order,%function +.align 6 +sha512_block_data_order: + stp x29,x30,[sp,#-128]! + add x29,sp,#0 + + stp x19,x20,[sp,#16] + stp x21,x22,[sp,#32] + stp x23,x24,[sp,#48] + stp x25,x26,[sp,#64] + stp x27,x28,[sp,#80] + sub sp,sp,#4*8 + + ldp x20,x21,[x0] // load context + ldp x22,x23,[x0,#2*8] + ldp x24,x25,[x0,#4*8] + add x2,x1,x2,lsl#7 // end of input + ldp x26,x27,[x0,#6*8] + adr x30,K512 + stp x0,x2,[x29,#96] + +.Loop: + ldp x3,x4,[x1],#2*8 + ldr x19,[x30],#8 // *K++ + eor x28,x21,x22 // magic seed + str x1,[x29,#112] +#ifndef __ARMEB__ + rev x3,x3 // 0 +#endif + ror x16,x24,#14 + add x27,x27,x19 // h+=K[i] + eor x6,x24,x24,ror#23 + and x17,x25,x24 + bic x19,x26,x24 + add x27,x27,x3 // h+=X[i] + orr x17,x17,x19 // Ch(e,f,g) + eor x19,x20,x21 // a^b, b^c in next round + eor x16,x16,x6,ror#18 // Sigma1(e) + ror x6,x20,#28 + add x27,x27,x17 // h+=Ch(e,f,g) + eor x17,x20,x20,ror#5 + add x27,x27,x16 // h+=Sigma1(e) + and x28,x28,x19 // (b^c)&=(a^b) + add x23,x23,x27 // d+=h + eor x28,x28,x21 // Maj(a,b,c) + eor x17,x6,x17,ror#34 // Sigma0(a) + add x27,x27,x28 // h+=Maj(a,b,c) + ldr x28,[x30],#8 // *K++, x19 in next round + //add x27,x27,x17 // h+=Sigma0(a) +#ifndef __ARMEB__ + rev x4,x4 // 1 +#endif + ldp x5,x6,[x1],#2*8 + add x27,x27,x17 // h+=Sigma0(a) + ror x16,x23,#14 + add x26,x26,x28 // h+=K[i] + eor x7,x23,x23,ror#23 + and x17,x24,x23 + bic x28,x25,x23 + add x26,x26,x4 // h+=X[i] + orr x17,x17,x28 // Ch(e,f,g) + eor x28,x27,x20 // a^b, b^c in next round + eor x16,x16,x7,ror#18 // Sigma1(e) + ror x7,x27,#28 + add x26,x26,x17 // h+=Ch(e,f,g) + eor x17,x27,x27,ror#5 + add x26,x26,x16 // h+=Sigma1(e) + and x19,x19,x28 // (b^c)&=(a^b) + add x22,x22,x26 // d+=h + eor x19,x19,x20 // Maj(a,b,c) + eor x17,x7,x17,ror#34 // Sigma0(a) + add x26,x26,x19 // h+=Maj(a,b,c) + ldr x19,[x30],#8 // *K++, x28 in next round + //add x26,x26,x17 // h+=Sigma0(a) +#ifndef __ARMEB__ + rev x5,x5 // 2 +#endif + add x26,x26,x17 // h+=Sigma0(a) + ror x16,x22,#14 + add x25,x25,x19 // h+=K[i] + eor x8,x22,x22,ror#23 + and x17,x23,x22 + bic x19,x24,x22 + add x25,x25,x5 // h+=X[i] + orr x17,x17,x19 // Ch(e,f,g) + eor x19,x26,x27 // a^b, b^c in next round + eor x16,x16,x8,ror#18 // Sigma1(e) + ror x8,x26,#28 + add x25,x25,x17 // h+=Ch(e,f,g) + eor x17,x26,x26,ror#5 + add x25,x25,x16 // h+=Sigma1(e) + and x28,x28,x19 // (b^c)&=(a^b) + add x21,x21,x25 // d+=h + eor x28,x28,x27 // Maj(a,b,c) + eor x17,x8,x17,ror#34 // Sigma0(a) + add x25,x25,x28 // h+=Maj(a,b,c) + ldr x28,[x30],#8 // *K++, x19 in next round + //add x25,x25,x17 // h+=Sigma0(a) +#ifndef __ARMEB__ + rev x6,x6 // 3 +#endif + ldp x7,x8,[x1],#2*8 + add x25,x25,x17 // h+=Sigma0(a) + ror x16,x21,#14 + add x24,x24,x28 // h+=K[i] + eor x9,x21,x21,ror#23 + and x17,x22,x21 + bic x28,x23,x21 + add x24,x24,x6 // h+=X[i] + orr x17,x17,x28 // Ch(e,f,g) + eor x28,x25,x26 // a^b, b^c in next round + eor x16,x16,x9,ror#18 // Sigma1(e) + ror x9,x25,#28 + add x24,x24,x17 // h+=Ch(e,f,g) + eor x17,x25,x25,ror#5 + add x24,x24,x16 // h+=Sigma1(e) + and x19,x19,x28 // (b^c)&=(a^b) + add x20,x20,x24 // d+=h + eor x19,x19,x26 // Maj(a,b,c) + eor x17,x9,x17,ror#34 // Sigma0(a) + add x24,x24,x19 // h+=Maj(a,b,c) + ldr x19,[x30],#8 // *K++, x28 in next round + //add x24,x24,x17 // h+=Sigma0(a) +#ifndef __ARMEB__ + rev x7,x7 // 4 +#endif + add x24,x24,x17 // h+=Sigma0(a) + ror x16,x20,#14 + add x23,x23,x19 // h+=K[i] + eor x10,x20,x20,ror#23 + and x17,x21,x20 + bic x19,x22,x20 + add x23,x23,x7 // h+=X[i] + orr x17,x17,x19 // Ch(e,f,g) + eor x19,x24,x25 // a^b, b^c in next round + eor x16,x16,x10,ror#18 // Sigma1(e) + ror x10,x24,#28 + add x23,x23,x17 // h+=Ch(e,f,g) + eor x17,x24,x24,ror#5 + add x23,x23,x16 // h+=Sigma1(e) + and x28,x28,x19 // (b^c)&=(a^b) + add x27,x27,x23 // d+=h + eor x28,x28,x25 // Maj(a,b,c) + eor x17,x10,x17,ror#34 // Sigma0(a) + add x23,x23,x28 // h+=Maj(a,b,c) + ldr x28,[x30],#8 // *K++, x19 in next round + //add x23,x23,x17 // h+=Sigma0(a) +#ifndef __ARMEB__ + rev x8,x8 // 5 +#endif + ldp x9,x10,[x1],#2*8 + add x23,x23,x17 // h+=Sigma0(a) + ror x16,x27,#14 + add x22,x22,x28 // h+=K[i] + eor x11,x27,x27,ror#23 + and x17,x20,x27 + bic x28,x21,x27 + add x22,x22,x8 // h+=X[i] + orr x17,x17,x28 // Ch(e,f,g) + eor x28,x23,x24 // a^b, b^c in next round + eor x16,x16,x11,ror#18 // Sigma1(e) + ror x11,x23,#28 + add x22,x22,x17 // h+=Ch(e,f,g) + eor x17,x23,x23,ror#5 + add x22,x22,x16 // h+=Sigma1(e) + and x19,x19,x28 // (b^c)&=(a^b) + add x26,x26,x22 // d+=h + eor x19,x19,x24 // Maj(a,b,c) + eor x17,x11,x17,ror#34 // Sigma0(a) + add x22,x22,x19 // h+=Maj(a,b,c) + ldr x19,[x30],#8 // *K++, x28 in next round + //add x22,x22,x17 // h+=Sigma0(a) +#ifndef __ARMEB__ + rev x9,x9 // 6 +#endif + add x22,x22,x17 // h+=Sigma0(a) + ror x16,x26,#14 + add x21,x21,x19 // h+=K[i] + eor x12,x26,x26,ror#23 + and x17,x27,x26 + bic x19,x20,x26 + add x21,x21,x9 // h+=X[i] + orr x17,x17,x19 // Ch(e,f,g) + eor x19,x22,x23 // a^b, b^c in next round + eor x16,x16,x12,ror#18 // Sigma1(e) + ror x12,x22,#28 + add x21,x21,x17 // h+=Ch(e,f,g) + eor x17,x22,x22,ror#5 + add x21,x21,x16 // h+=Sigma1(e) + and x28,x28,x19 // (b^c)&=(a^b) + add x25,x25,x21 // d+=h + eor x28,x28,x23 // Maj(a,b,c) + eor x17,x12,x17,ror#34 // Sigma0(a) + add x21,x21,x28 // h+=Maj(a,b,c) + ldr x28,[x30],#8 // *K++, x19 in next round + //add x21,x21,x17 // h+=Sigma0(a) +#ifndef __ARMEB__ + rev x10,x10 // 7 +#endif + ldp x11,x12,[x1],#2*8 + add x21,x21,x17 // h+=Sigma0(a) + ror x16,x25,#14 + add x20,x20,x28 // h+=K[i] + eor x13,x25,x25,ror#23 + and x17,x26,x25 + bic x28,x27,x25 + add x20,x20,x10 // h+=X[i] + orr x17,x17,x28 // Ch(e,f,g) + eor x28,x21,x22 // a^b, b^c in next round + eor x16,x16,x13,ror#18 // Sigma1(e) + ror x13,x21,#28 + add x20,x20,x17 // h+=Ch(e,f,g) + eor x17,x21,x21,ror#5 + add x20,x20,x16 // h+=Sigma1(e) + and x19,x19,x28 // (b^c)&=(a^b) + add x24,x24,x20 // d+=h + eor x19,x19,x22 // Maj(a,b,c) + eor x17,x13,x17,ror#34 // Sigma0(a) + add x20,x20,x19 // h+=Maj(a,b,c) + ldr x19,[x30],#8 // *K++, x28 in next round + //add x20,x20,x17 // h+=Sigma0(a) +#ifndef __ARMEB__ + rev x11,x11 // 8 +#endif + add x20,x20,x17 // h+=Sigma0(a) + ror x16,x24,#14 + add x27,x27,x19 // h+=K[i] + eor x14,x24,x24,ror#23 + and x17,x25,x24 + bic x19,x26,x24 + add x27,x27,x11 // h+=X[i] + orr x17,x17,x19 // Ch(e,f,g) + eor x19,x20,x21 // a^b, b^c in next round + eor x16,x16,x14,ror#18 // Sigma1(e) + ror x14,x20,#28 + add x27,x27,x17 // h+=Ch(e,f,g) + eor x17,x20,x20,ror#5 + add x27,x27,x16 // h+=Sigma1(e) + and x28,x28,x19 // (b^c)&=(a^b) + add x23,x23,x27 // d+=h + eor x28,x28,x21 // Maj(a,b,c) + eor x17,x14,x17,ror#34 // Sigma0(a) + add x27,x27,x28 // h+=Maj(a,b,c) + ldr x28,[x30],#8 // *K++, x19 in next round + //add x27,x27,x17 // h+=Sigma0(a) +#ifndef __ARMEB__ + rev x12,x12 // 9 +#endif + ldp x13,x14,[x1],#2*8 + add x27,x27,x17 // h+=Sigma0(a) + ror x16,x23,#14 + add x26,x26,x28 // h+=K[i] + eor x15,x23,x23,ror#23 + and x17,x24,x23 + bic x28,x25,x23 + add x26,x26,x12 // h+=X[i] + orr x17,x17,x28 // Ch(e,f,g) + eor x28,x27,x20 // a^b, b^c in next round + eor x16,x16,x15,ror#18 // Sigma1(e) + ror x15,x27,#28 + add x26,x26,x17 // h+=Ch(e,f,g) + eor x17,x27,x27,ror#5 + add x26,x26,x16 // h+=Sigma1(e) + and x19,x19,x28 // (b^c)&=(a^b) + add x22,x22,x26 // d+=h + eor x19,x19,x20 // Maj(a,b,c) + eor x17,x15,x17,ror#34 // Sigma0(a) + add x26,x26,x19 // h+=Maj(a,b,c) + ldr x19,[x30],#8 // *K++, x28 in next round + //add x26,x26,x17 // h+=Sigma0(a) +#ifndef __ARMEB__ + rev x13,x13 // 10 +#endif + add x26,x26,x17 // h+=Sigma0(a) + ror x16,x22,#14 + add x25,x25,x19 // h+=K[i] + eor x0,x22,x22,ror#23 + and x17,x23,x22 + bic x19,x24,x22 + add x25,x25,x13 // h+=X[i] + orr x17,x17,x19 // Ch(e,f,g) + eor x19,x26,x27 // a^b, b^c in next round + eor x16,x16,x0,ror#18 // Sigma1(e) + ror x0,x26,#28 + add x25,x25,x17 // h+=Ch(e,f,g) + eor x17,x26,x26,ror#5 + add x25,x25,x16 // h+=Sigma1(e) + and x28,x28,x19 // (b^c)&=(a^b) + add x21,x21,x25 // d+=h + eor x28,x28,x27 // Maj(a,b,c) + eor x17,x0,x17,ror#34 // Sigma0(a) + add x25,x25,x28 // h+=Maj(a,b,c) + ldr x28,[x30],#8 // *K++, x19 in next round + //add x25,x25,x17 // h+=Sigma0(a) +#ifndef __ARMEB__ + rev x14,x14 // 11 +#endif + ldp x15,x0,[x1],#2*8 + add x25,x25,x17 // h+=Sigma0(a) + str x6,[sp,#24] + ror x16,x21,#14 + add x24,x24,x28 // h+=K[i] + eor x6,x21,x21,ror#23 + and x17,x22,x21 + bic x28,x23,x21 + add x24,x24,x14 // h+=X[i] + orr x17,x17,x28 // Ch(e,f,g) + eor x28,x25,x26 // a^b, b^c in next round + eor x16,x16,x6,ror#18 // Sigma1(e) + ror x6,x25,#28 + add x24,x24,x17 // h+=Ch(e,f,g) + eor x17,x25,x25,ror#5 + add x24,x24,x16 // h+=Sigma1(e) + and x19,x19,x28 // (b^c)&=(a^b) + add x20,x20,x24 // d+=h + eor x19,x19,x26 // Maj(a,b,c) + eor x17,x6,x17,ror#34 // Sigma0(a) + add x24,x24,x19 // h+=Maj(a,b,c) + ldr x19,[x30],#8 // *K++, x28 in next round + //add x24,x24,x17 // h+=Sigma0(a) +#ifndef __ARMEB__ + rev x15,x15 // 12 +#endif + add x24,x24,x17 // h+=Sigma0(a) + str x7,[sp,#0] + ror x16,x20,#14 + add x23,x23,x19 // h+=K[i] + eor x7,x20,x20,ror#23 + and x17,x21,x20 + bic x19,x22,x20 + add x23,x23,x15 // h+=X[i] + orr x17,x17,x19 // Ch(e,f,g) + eor x19,x24,x25 // a^b, b^c in next round + eor x16,x16,x7,ror#18 // Sigma1(e) + ror x7,x24,#28 + add x23,x23,x17 // h+=Ch(e,f,g) + eor x17,x24,x24,ror#5 + add x23,x23,x16 // h+=Sigma1(e) + and x28,x28,x19 // (b^c)&=(a^b) + add x27,x27,x23 // d+=h + eor x28,x28,x25 // Maj(a,b,c) + eor x17,x7,x17,ror#34 // Sigma0(a) + add x23,x23,x28 // h+=Maj(a,b,c) + ldr x28,[x30],#8 // *K++, x19 in next round + //add x23,x23,x17 // h+=Sigma0(a) +#ifndef __ARMEB__ + rev x0,x0 // 13 +#endif + ldp x1,x2,[x1] + add x23,x23,x17 // h+=Sigma0(a) + str x8,[sp,#8] + ror x16,x27,#14 + add x22,x22,x28 // h+=K[i] + eor x8,x27,x27,ror#23 + and x17,x20,x27 + bic x28,x21,x27 + add x22,x22,x0 // h+=X[i] + orr x17,x17,x28 // Ch(e,f,g) + eor x28,x23,x24 // a^b, b^c in next round + eor x16,x16,x8,ror#18 // Sigma1(e) + ror x8,x23,#28 + add x22,x22,x17 // h+=Ch(e,f,g) + eor x17,x23,x23,ror#5 + add x22,x22,x16 // h+=Sigma1(e) + and x19,x19,x28 // (b^c)&=(a^b) + add x26,x26,x22 // d+=h + eor x19,x19,x24 // Maj(a,b,c) + eor x17,x8,x17,ror#34 // Sigma0(a) + add x22,x22,x19 // h+=Maj(a,b,c) + ldr x19,[x30],#8 // *K++, x28 in next round + //add x22,x22,x17 // h+=Sigma0(a) +#ifndef __ARMEB__ + rev x1,x1 // 14 +#endif + ldr x6,[sp,#24] + add x22,x22,x17 // h+=Sigma0(a) + str x9,[sp,#16] + ror x16,x26,#14 + add x21,x21,x19 // h+=K[i] + eor x9,x26,x26,ror#23 + and x17,x27,x26 + bic x19,x20,x26 + add x21,x21,x1 // h+=X[i] + orr x17,x17,x19 // Ch(e,f,g) + eor x19,x22,x23 // a^b, b^c in next round + eor x16,x16,x9,ror#18 // Sigma1(e) + ror x9,x22,#28 + add x21,x21,x17 // h+=Ch(e,f,g) + eor x17,x22,x22,ror#5 + add x21,x21,x16 // h+=Sigma1(e) + and x28,x28,x19 // (b^c)&=(a^b) + add x25,x25,x21 // d+=h + eor x28,x28,x23 // Maj(a,b,c) + eor x17,x9,x17,ror#34 // Sigma0(a) + add x21,x21,x28 // h+=Maj(a,b,c) + ldr x28,[x30],#8 // *K++, x19 in next round + //add x21,x21,x17 // h+=Sigma0(a) +#ifndef __ARMEB__ + rev x2,x2 // 15 +#endif + ldr x7,[sp,#0] + add x21,x21,x17 // h+=Sigma0(a) + str x10,[sp,#24] + ror x16,x25,#14 + add x20,x20,x28 // h+=K[i] + ror x9,x4,#1 + and x17,x26,x25 + ror x8,x1,#19 + bic x28,x27,x25 + ror x10,x21,#28 + add x20,x20,x2 // h+=X[i] + eor x16,x16,x25,ror#18 + eor x9,x9,x4,ror#8 + orr x17,x17,x28 // Ch(e,f,g) + eor x28,x21,x22 // a^b, b^c in next round + eor x16,x16,x25,ror#41 // Sigma1(e) + eor x10,x10,x21,ror#34 + add x20,x20,x17 // h+=Ch(e,f,g) + and x19,x19,x28 // (b^c)&=(a^b) + eor x8,x8,x1,ror#61 + eor x9,x9,x4,lsr#7 // sigma0(X[i+1]) + add x20,x20,x16 // h+=Sigma1(e) + eor x19,x19,x22 // Maj(a,b,c) + eor x17,x10,x21,ror#39 // Sigma0(a) + eor x8,x8,x1,lsr#6 // sigma1(X[i+14]) + add x3,x3,x12 + add x24,x24,x20 // d+=h + add x20,x20,x19 // h+=Maj(a,b,c) + ldr x19,[x30],#8 // *K++, x28 in next round + add x3,x3,x9 + add x20,x20,x17 // h+=Sigma0(a) + add x3,x3,x8 +.Loop_16_xx: + ldr x8,[sp,#8] + str x11,[sp,#0] + ror x16,x24,#14 + add x27,x27,x19 // h+=K[i] + ror x10,x5,#1 + and x17,x25,x24 + ror x9,x2,#19 + bic x19,x26,x24 + ror x11,x20,#28 + add x27,x27,x3 // h+=X[i] + eor x16,x16,x24,ror#18 + eor x10,x10,x5,ror#8 + orr x17,x17,x19 // Ch(e,f,g) + eor x19,x20,x21 // a^b, b^c in next round + eor x16,x16,x24,ror#41 // Sigma1(e) + eor x11,x11,x20,ror#34 + add x27,x27,x17 // h+=Ch(e,f,g) + and x28,x28,x19 // (b^c)&=(a^b) + eor x9,x9,x2,ror#61 + eor x10,x10,x5,lsr#7 // sigma0(X[i+1]) + add x27,x27,x16 // h+=Sigma1(e) + eor x28,x28,x21 // Maj(a,b,c) + eor x17,x11,x20,ror#39 // Sigma0(a) + eor x9,x9,x2,lsr#6 // sigma1(X[i+14]) + add x4,x4,x13 + add x23,x23,x27 // d+=h + add x27,x27,x28 // h+=Maj(a,b,c) + ldr x28,[x30],#8 // *K++, x19 in next round + add x4,x4,x10 + add x27,x27,x17 // h+=Sigma0(a) + add x4,x4,x9 + ldr x9,[sp,#16] + str x12,[sp,#8] + ror x16,x23,#14 + add x26,x26,x28 // h+=K[i] + ror x11,x6,#1 + and x17,x24,x23 + ror x10,x3,#19 + bic x28,x25,x23 + ror x12,x27,#28 + add x26,x26,x4 // h+=X[i] + eor x16,x16,x23,ror#18 + eor x11,x11,x6,ror#8 + orr x17,x17,x28 // Ch(e,f,g) + eor x28,x27,x20 // a^b, b^c in next round + eor x16,x16,x23,ror#41 // Sigma1(e) + eor x12,x12,x27,ror#34 + add x26,x26,x17 // h+=Ch(e,f,g) + and x19,x19,x28 // (b^c)&=(a^b) + eor x10,x10,x3,ror#61 + eor x11,x11,x6,lsr#7 // sigma0(X[i+1]) + add x26,x26,x16 // h+=Sigma1(e) + eor x19,x19,x20 // Maj(a,b,c) + eor x17,x12,x27,ror#39 // Sigma0(a) + eor x10,x10,x3,lsr#6 // sigma1(X[i+14]) + add x5,x5,x14 + add x22,x22,x26 // d+=h + add x26,x26,x19 // h+=Maj(a,b,c) + ldr x19,[x30],#8 // *K++, x28 in next round + add x5,x5,x11 + add x26,x26,x17 // h+=Sigma0(a) + add x5,x5,x10 + ldr x10,[sp,#24] + str x13,[sp,#16] + ror x16,x22,#14 + add x25,x25,x19 // h+=K[i] + ror x12,x7,#1 + and x17,x23,x22 + ror x11,x4,#19 + bic x19,x24,x22 + ror x13,x26,#28 + add x25,x25,x5 // h+=X[i] + eor x16,x16,x22,ror#18 + eor x12,x12,x7,ror#8 + orr x17,x17,x19 // Ch(e,f,g) + eor x19,x26,x27 // a^b, b^c in next round + eor x16,x16,x22,ror#41 // Sigma1(e) + eor x13,x13,x26,ror#34 + add x25,x25,x17 // h+=Ch(e,f,g) + and x28,x28,x19 // (b^c)&=(a^b) + eor x11,x11,x4,ror#61 + eor x12,x12,x7,lsr#7 // sigma0(X[i+1]) + add x25,x25,x16 // h+=Sigma1(e) + eor x28,x28,x27 // Maj(a,b,c) + eor x17,x13,x26,ror#39 // Sigma0(a) + eor x11,x11,x4,lsr#6 // sigma1(X[i+14]) + add x6,x6,x15 + add x21,x21,x25 // d+=h + add x25,x25,x28 // h+=Maj(a,b,c) + ldr x28,[x30],#8 // *K++, x19 in next round + add x6,x6,x12 + add x25,x25,x17 // h+=Sigma0(a) + add x6,x6,x11 + ldr x11,[sp,#0] + str x14,[sp,#24] + ror x16,x21,#14 + add x24,x24,x28 // h+=K[i] + ror x13,x8,#1 + and x17,x22,x21 + ror x12,x5,#19 + bic x28,x23,x21 + ror x14,x25,#28 + add x24,x24,x6 // h+=X[i] + eor x16,x16,x21,ror#18 + eor x13,x13,x8,ror#8 + orr x17,x17,x28 // Ch(e,f,g) + eor x28,x25,x26 // a^b, b^c in next round + eor x16,x16,x21,ror#41 // Sigma1(e) + eor x14,x14,x25,ror#34 + add x24,x24,x17 // h+=Ch(e,f,g) + and x19,x19,x28 // (b^c)&=(a^b) + eor x12,x12,x5,ror#61 + eor x13,x13,x8,lsr#7 // sigma0(X[i+1]) + add x24,x24,x16 // h+=Sigma1(e) + eor x19,x19,x26 // Maj(a,b,c) + eor x17,x14,x25,ror#39 // Sigma0(a) + eor x12,x12,x5,lsr#6 // sigma1(X[i+14]) + add x7,x7,x0 + add x20,x20,x24 // d+=h + add x24,x24,x19 // h+=Maj(a,b,c) + ldr x19,[x30],#8 // *K++, x28 in next round + add x7,x7,x13 + add x24,x24,x17 // h+=Sigma0(a) + add x7,x7,x12 + ldr x12,[sp,#8] + str x15,[sp,#0] + ror x16,x20,#14 + add x23,x23,x19 // h+=K[i] + ror x14,x9,#1 + and x17,x21,x20 + ror x13,x6,#19 + bic x19,x22,x20 + ror x15,x24,#28 + add x23,x23,x7 // h+=X[i] + eor x16,x16,x20,ror#18 + eor x14,x14,x9,ror#8 + orr x17,x17,x19 // Ch(e,f,g) + eor x19,x24,x25 // a^b, b^c in next round + eor x16,x16,x20,ror#41 // Sigma1(e) + eor x15,x15,x24,ror#34 + add x23,x23,x17 // h+=Ch(e,f,g) + and x28,x28,x19 // (b^c)&=(a^b) + eor x13,x13,x6,ror#61 + eor x14,x14,x9,lsr#7 // sigma0(X[i+1]) + add x23,x23,x16 // h+=Sigma1(e) + eor x28,x28,x25 // Maj(a,b,c) + eor x17,x15,x24,ror#39 // Sigma0(a) + eor x13,x13,x6,lsr#6 // sigma1(X[i+14]) + add x8,x8,x1 + add x27,x27,x23 // d+=h + add x23,x23,x28 // h+=Maj(a,b,c) + ldr x28,[x30],#8 // *K++, x19 in next round + add x8,x8,x14 + add x23,x23,x17 // h+=Sigma0(a) + add x8,x8,x13 + ldr x13,[sp,#16] + str x0,[sp,#8] + ror x16,x27,#14 + add x22,x22,x28 // h+=K[i] + ror x15,x10,#1 + and x17,x20,x27 + ror x14,x7,#19 + bic x28,x21,x27 + ror x0,x23,#28 + add x22,x22,x8 // h+=X[i] + eor x16,x16,x27,ror#18 + eor x15,x15,x10,ror#8 + orr x17,x17,x28 // Ch(e,f,g) + eor x28,x23,x24 // a^b, b^c in next round + eor x16,x16,x27,ror#41 // Sigma1(e) + eor x0,x0,x23,ror#34 + add x22,x22,x17 // h+=Ch(e,f,g) + and x19,x19,x28 // (b^c)&=(a^b) + eor x14,x14,x7,ror#61 + eor x15,x15,x10,lsr#7 // sigma0(X[i+1]) + add x22,x22,x16 // h+=Sigma1(e) + eor x19,x19,x24 // Maj(a,b,c) + eor x17,x0,x23,ror#39 // Sigma0(a) + eor x14,x14,x7,lsr#6 // sigma1(X[i+14]) + add x9,x9,x2 + add x26,x26,x22 // d+=h + add x22,x22,x19 // h+=Maj(a,b,c) + ldr x19,[x30],#8 // *K++, x28 in next round + add x9,x9,x15 + add x22,x22,x17 // h+=Sigma0(a) + add x9,x9,x14 + ldr x14,[sp,#24] + str x1,[sp,#16] + ror x16,x26,#14 + add x21,x21,x19 // h+=K[i] + ror x0,x11,#1 + and x17,x27,x26 + ror x15,x8,#19 + bic x19,x20,x26 + ror x1,x22,#28 + add x21,x21,x9 // h+=X[i] + eor x16,x16,x26,ror#18 + eor x0,x0,x11,ror#8 + orr x17,x17,x19 // Ch(e,f,g) + eor x19,x22,x23 // a^b, b^c in next round + eor x16,x16,x26,ror#41 // Sigma1(e) + eor x1,x1,x22,ror#34 + add x21,x21,x17 // h+=Ch(e,f,g) + and x28,x28,x19 // (b^c)&=(a^b) + eor x15,x15,x8,ror#61 + eor x0,x0,x11,lsr#7 // sigma0(X[i+1]) + add x21,x21,x16 // h+=Sigma1(e) + eor x28,x28,x23 // Maj(a,b,c) + eor x17,x1,x22,ror#39 // Sigma0(a) + eor x15,x15,x8,lsr#6 // sigma1(X[i+14]) + add x10,x10,x3 + add x25,x25,x21 // d+=h + add x21,x21,x28 // h+=Maj(a,b,c) + ldr x28,[x30],#8 // *K++, x19 in next round + add x10,x10,x0 + add x21,x21,x17 // h+=Sigma0(a) + add x10,x10,x15 + ldr x15,[sp,#0] + str x2,[sp,#24] + ror x16,x25,#14 + add x20,x20,x28 // h+=K[i] + ror x1,x12,#1 + and x17,x26,x25 + ror x0,x9,#19 + bic x28,x27,x25 + ror x2,x21,#28 + add x20,x20,x10 // h+=X[i] + eor x16,x16,x25,ror#18 + eor x1,x1,x12,ror#8 + orr x17,x17,x28 // Ch(e,f,g) + eor x28,x21,x22 // a^b, b^c in next round + eor x16,x16,x25,ror#41 // Sigma1(e) + eor x2,x2,x21,ror#34 + add x20,x20,x17 // h+=Ch(e,f,g) + and x19,x19,x28 // (b^c)&=(a^b) + eor x0,x0,x9,ror#61 + eor x1,x1,x12,lsr#7 // sigma0(X[i+1]) + add x20,x20,x16 // h+=Sigma1(e) + eor x19,x19,x22 // Maj(a,b,c) + eor x17,x2,x21,ror#39 // Sigma0(a) + eor x0,x0,x9,lsr#6 // sigma1(X[i+14]) + add x11,x11,x4 + add x24,x24,x20 // d+=h + add x20,x20,x19 // h+=Maj(a,b,c) + ldr x19,[x30],#8 // *K++, x28 in next round + add x11,x11,x1 + add x20,x20,x17 // h+=Sigma0(a) + add x11,x11,x0 + ldr x0,[sp,#8] + str x3,[sp,#0] + ror x16,x24,#14 + add x27,x27,x19 // h+=K[i] + ror x2,x13,#1 + and x17,x25,x24 + ror x1,x10,#19 + bic x19,x26,x24 + ror x3,x20,#28 + add x27,x27,x11 // h+=X[i] + eor x16,x16,x24,ror#18 + eor x2,x2,x13,ror#8 + orr x17,x17,x19 // Ch(e,f,g) + eor x19,x20,x21 // a^b, b^c in next round + eor x16,x16,x24,ror#41 // Sigma1(e) + eor x3,x3,x20,ror#34 + add x27,x27,x17 // h+=Ch(e,f,g) + and x28,x28,x19 // (b^c)&=(a^b) + eor x1,x1,x10,ror#61 + eor x2,x2,x13,lsr#7 // sigma0(X[i+1]) + add x27,x27,x16 // h+=Sigma1(e) + eor x28,x28,x21 // Maj(a,b,c) + eor x17,x3,x20,ror#39 // Sigma0(a) + eor x1,x1,x10,lsr#6 // sigma1(X[i+14]) + add x12,x12,x5 + add x23,x23,x27 // d+=h + add x27,x27,x28 // h+=Maj(a,b,c) + ldr x28,[x30],#8 // *K++, x19 in next round + add x12,x12,x2 + add x27,x27,x17 // h+=Sigma0(a) + add x12,x12,x1 + ldr x1,[sp,#16] + str x4,[sp,#8] + ror x16,x23,#14 + add x26,x26,x28 // h+=K[i] + ror x3,x14,#1 + and x17,x24,x23 + ror x2,x11,#19 + bic x28,x25,x23 + ror x4,x27,#28 + add x26,x26,x12 // h+=X[i] + eor x16,x16,x23,ror#18 + eor x3,x3,x14,ror#8 + orr x17,x17,x28 // Ch(e,f,g) + eor x28,x27,x20 // a^b, b^c in next round + eor x16,x16,x23,ror#41 // Sigma1(e) + eor x4,x4,x27,ror#34 + add x26,x26,x17 // h+=Ch(e,f,g) + and x19,x19,x28 // (b^c)&=(a^b) + eor x2,x2,x11,ror#61 + eor x3,x3,x14,lsr#7 // sigma0(X[i+1]) + add x26,x26,x16 // h+=Sigma1(e) + eor x19,x19,x20 // Maj(a,b,c) + eor x17,x4,x27,ror#39 // Sigma0(a) + eor x2,x2,x11,lsr#6 // sigma1(X[i+14]) + add x13,x13,x6 + add x22,x22,x26 // d+=h + add x26,x26,x19 // h+=Maj(a,b,c) + ldr x19,[x30],#8 // *K++, x28 in next round + add x13,x13,x3 + add x26,x26,x17 // h+=Sigma0(a) + add x13,x13,x2 + ldr x2,[sp,#24] + str x5,[sp,#16] + ror x16,x22,#14 + add x25,x25,x19 // h+=K[i] + ror x4,x15,#1 + and x17,x23,x22 + ror x3,x12,#19 + bic x19,x24,x22 + ror x5,x26,#28 + add x25,x25,x13 // h+=X[i] + eor x16,x16,x22,ror#18 + eor x4,x4,x15,ror#8 + orr x17,x17,x19 // Ch(e,f,g) + eor x19,x26,x27 // a^b, b^c in next round + eor x16,x16,x22,ror#41 // Sigma1(e) + eor x5,x5,x26,ror#34 + add x25,x25,x17 // h+=Ch(e,f,g) + and x28,x28,x19 // (b^c)&=(a^b) + eor x3,x3,x12,ror#61 + eor x4,x4,x15,lsr#7 // sigma0(X[i+1]) + add x25,x25,x16 // h+=Sigma1(e) + eor x28,x28,x27 // Maj(a,b,c) + eor x17,x5,x26,ror#39 // Sigma0(a) + eor x3,x3,x12,lsr#6 // sigma1(X[i+14]) + add x14,x14,x7 + add x21,x21,x25 // d+=h + add x25,x25,x28 // h+=Maj(a,b,c) + ldr x28,[x30],#8 // *K++, x19 in next round + add x14,x14,x4 + add x25,x25,x17 // h+=Sigma0(a) + add x14,x14,x3 + ldr x3,[sp,#0] + str x6,[sp,#24] + ror x16,x21,#14 + add x24,x24,x28 // h+=K[i] + ror x5,x0,#1 + and x17,x22,x21 + ror x4,x13,#19 + bic x28,x23,x21 + ror x6,x25,#28 + add x24,x24,x14 // h+=X[i] + eor x16,x16,x21,ror#18 + eor x5,x5,x0,ror#8 + orr x17,x17,x28 // Ch(e,f,g) + eor x28,x25,x26 // a^b, b^c in next round + eor x16,x16,x21,ror#41 // Sigma1(e) + eor x6,x6,x25,ror#34 + add x24,x24,x17 // h+=Ch(e,f,g) + and x19,x19,x28 // (b^c)&=(a^b) + eor x4,x4,x13,ror#61 + eor x5,x5,x0,lsr#7 // sigma0(X[i+1]) + add x24,x24,x16 // h+=Sigma1(e) + eor x19,x19,x26 // Maj(a,b,c) + eor x17,x6,x25,ror#39 // Sigma0(a) + eor x4,x4,x13,lsr#6 // sigma1(X[i+14]) + add x15,x15,x8 + add x20,x20,x24 // d+=h + add x24,x24,x19 // h+=Maj(a,b,c) + ldr x19,[x30],#8 // *K++, x28 in next round + add x15,x15,x5 + add x24,x24,x17 // h+=Sigma0(a) + add x15,x15,x4 + ldr x4,[sp,#8] + str x7,[sp,#0] + ror x16,x20,#14 + add x23,x23,x19 // h+=K[i] + ror x6,x1,#1 + and x17,x21,x20 + ror x5,x14,#19 + bic x19,x22,x20 + ror x7,x24,#28 + add x23,x23,x15 // h+=X[i] + eor x16,x16,x20,ror#18 + eor x6,x6,x1,ror#8 + orr x17,x17,x19 // Ch(e,f,g) + eor x19,x24,x25 // a^b, b^c in next round + eor x16,x16,x20,ror#41 // Sigma1(e) + eor x7,x7,x24,ror#34 + add x23,x23,x17 // h+=Ch(e,f,g) + and x28,x28,x19 // (b^c)&=(a^b) + eor x5,x5,x14,ror#61 + eor x6,x6,x1,lsr#7 // sigma0(X[i+1]) + add x23,x23,x16 // h+=Sigma1(e) + eor x28,x28,x25 // Maj(a,b,c) + eor x17,x7,x24,ror#39 // Sigma0(a) + eor x5,x5,x14,lsr#6 // sigma1(X[i+14]) + add x0,x0,x9 + add x27,x27,x23 // d+=h + add x23,x23,x28 // h+=Maj(a,b,c) + ldr x28,[x30],#8 // *K++, x19 in next round + add x0,x0,x6 + add x23,x23,x17 // h+=Sigma0(a) + add x0,x0,x5 + ldr x5,[sp,#16] + str x8,[sp,#8] + ror x16,x27,#14 + add x22,x22,x28 // h+=K[i] + ror x7,x2,#1 + and x17,x20,x27 + ror x6,x15,#19 + bic x28,x21,x27 + ror x8,x23,#28 + add x22,x22,x0 // h+=X[i] + eor x16,x16,x27,ror#18 + eor x7,x7,x2,ror#8 + orr x17,x17,x28 // Ch(e,f,g) + eor x28,x23,x24 // a^b, b^c in next round + eor x16,x16,x27,ror#41 // Sigma1(e) + eor x8,x8,x23,ror#34 + add x22,x22,x17 // h+=Ch(e,f,g) + and x19,x19,x28 // (b^c)&=(a^b) + eor x6,x6,x15,ror#61 + eor x7,x7,x2,lsr#7 // sigma0(X[i+1]) + add x22,x22,x16 // h+=Sigma1(e) + eor x19,x19,x24 // Maj(a,b,c) + eor x17,x8,x23,ror#39 // Sigma0(a) + eor x6,x6,x15,lsr#6 // sigma1(X[i+14]) + add x1,x1,x10 + add x26,x26,x22 // d+=h + add x22,x22,x19 // h+=Maj(a,b,c) + ldr x19,[x30],#8 // *K++, x28 in next round + add x1,x1,x7 + add x22,x22,x17 // h+=Sigma0(a) + add x1,x1,x6 + ldr x6,[sp,#24] + str x9,[sp,#16] + ror x16,x26,#14 + add x21,x21,x19 // h+=K[i] + ror x8,x3,#1 + and x17,x27,x26 + ror x7,x0,#19 + bic x19,x20,x26 + ror x9,x22,#28 + add x21,x21,x1 // h+=X[i] + eor x16,x16,x26,ror#18 + eor x8,x8,x3,ror#8 + orr x17,x17,x19 // Ch(e,f,g) + eor x19,x22,x23 // a^b, b^c in next round + eor x16,x16,x26,ror#41 // Sigma1(e) + eor x9,x9,x22,ror#34 + add x21,x21,x17 // h+=Ch(e,f,g) + and x28,x28,x19 // (b^c)&=(a^b) + eor x7,x7,x0,ror#61 + eor x8,x8,x3,lsr#7 // sigma0(X[i+1]) + add x21,x21,x16 // h+=Sigma1(e) + eor x28,x28,x23 // Maj(a,b,c) + eor x17,x9,x22,ror#39 // Sigma0(a) + eor x7,x7,x0,lsr#6 // sigma1(X[i+14]) + add x2,x2,x11 + add x25,x25,x21 // d+=h + add x21,x21,x28 // h+=Maj(a,b,c) + ldr x28,[x30],#8 // *K++, x19 in next round + add x2,x2,x8 + add x21,x21,x17 // h+=Sigma0(a) + add x2,x2,x7 + ldr x7,[sp,#0] + str x10,[sp,#24] + ror x16,x25,#14 + add x20,x20,x28 // h+=K[i] + ror x9,x4,#1 + and x17,x26,x25 + ror x8,x1,#19 + bic x28,x27,x25 + ror x10,x21,#28 + add x20,x20,x2 // h+=X[i] + eor x16,x16,x25,ror#18 + eor x9,x9,x4,ror#8 + orr x17,x17,x28 // Ch(e,f,g) + eor x28,x21,x22 // a^b, b^c in next round + eor x16,x16,x25,ror#41 // Sigma1(e) + eor x10,x10,x21,ror#34 + add x20,x20,x17 // h+=Ch(e,f,g) + and x19,x19,x28 // (b^c)&=(a^b) + eor x8,x8,x1,ror#61 + eor x9,x9,x4,lsr#7 // sigma0(X[i+1]) + add x20,x20,x16 // h+=Sigma1(e) + eor x19,x19,x22 // Maj(a,b,c) + eor x17,x10,x21,ror#39 // Sigma0(a) + eor x8,x8,x1,lsr#6 // sigma1(X[i+14]) + add x3,x3,x12 + add x24,x24,x20 // d+=h + add x20,x20,x19 // h+=Maj(a,b,c) + ldr x19,[x30],#8 // *K++, x28 in next round + add x3,x3,x9 + add x20,x20,x17 // h+=Sigma0(a) + add x3,x3,x8 + cbnz x19,.Loop_16_xx + + ldp x0,x2,[x29,#96] + ldr x1,[x29,#112] + sub x30,x30,#648 // rewind + + ldp x3,x4,[x0] + ldp x5,x6,[x0,#2*8] + add x1,x1,#14*8 // advance input pointer + ldp x7,x8,[x0,#4*8] + add x20,x20,x3 + ldp x9,x10,[x0,#6*8] + add x21,x21,x4 + add x22,x22,x5 + add x23,x23,x6 + stp x20,x21,[x0] + add x24,x24,x7 + add x25,x25,x8 + stp x22,x23,[x0,#2*8] + add x26,x26,x9 + add x27,x27,x10 + cmp x1,x2 + stp x24,x25,[x0,#4*8] + stp x26,x27,[x0,#6*8] + b.ne .Loop + + ldp x19,x20,[x29,#16] + add sp,sp,#4*8 + ldp x21,x22,[x29,#32] + ldp x23,x24,[x29,#48] + ldp x25,x26,[x29,#64] + ldp x27,x28,[x29,#80] + ldp x29,x30,[sp],#128 + ret +.size sha512_block_data_order,.-sha512_block_data_order + +.align 6 +.type K512,%object +K512: + .quad 0x428a2f98d728ae22,0x7137449123ef65cd + .quad 0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc + .quad 0x3956c25bf348b538,0x59f111f1b605d019 + .quad 0x923f82a4af194f9b,0xab1c5ed5da6d8118 + .quad 0xd807aa98a3030242,0x12835b0145706fbe + .quad 0x243185be4ee4b28c,0x550c7dc3d5ffb4e2 + .quad 0x72be5d74f27b896f,0x80deb1fe3b1696b1 + .quad 0x9bdc06a725c71235,0xc19bf174cf692694 + .quad 0xe49b69c19ef14ad2,0xefbe4786384f25e3 + .quad 0x0fc19dc68b8cd5b5,0x240ca1cc77ac9c65 + .quad 0x2de92c6f592b0275,0x4a7484aa6ea6e483 + .quad 0x5cb0a9dcbd41fbd4,0x76f988da831153b5 + .quad 0x983e5152ee66dfab,0xa831c66d2db43210 + .quad 0xb00327c898fb213f,0xbf597fc7beef0ee4 + .quad 0xc6e00bf33da88fc2,0xd5a79147930aa725 + .quad 0x06ca6351e003826f,0x142929670a0e6e70 + .quad 0x27b70a8546d22ffc,0x2e1b21385c26c926 + .quad 0x4d2c6dfc5ac42aed,0x53380d139d95b3df + .quad 0x650a73548baf63de,0x766a0abb3c77b2a8 + .quad 0x81c2c92e47edaee6,0x92722c851482353b + .quad 0xa2bfe8a14cf10364,0xa81a664bbc423001 + .quad 0xc24b8b70d0f89791,0xc76c51a30654be30 + .quad 0xd192e819d6ef5218,0xd69906245565a910 + .quad 0xf40e35855771202a,0x106aa07032bbd1b8 + .quad 0x19a4c116b8d2d0c8,0x1e376c085141ab53 + .quad 0x2748774cdf8eeb99,0x34b0bcb5e19b48a8 + .quad 0x391c0cb3c5c95a63,0x4ed8aa4ae3418acb + .quad 0x5b9cca4f7763e373,0x682e6ff3d6b2b8a3 + .quad 0x748f82ee5defb2fc,0x78a5636f43172f60 + .quad 0x84c87814a1f0ab72,0x8cc702081a6439ec + .quad 0x90befffa23631e28,0xa4506cebde82bde9 + .quad 0xbef9a3f7b2c67915,0xc67178f2e372532b + .quad 0xca273eceea26619c,0xd186b8c721c0c207 + .quad 0xeada7dd6cde0eb1e,0xf57d4f7fee6ed178 + .quad 0x06f067aa72176fba,0x0a637dc5a2c898a6 + .quad 0x113f9804bef90dae,0x1b710b35131c471b + .quad 0x28db77f523047d84,0x32caab7b40c72493 + .quad 0x3c9ebe0a15c9bebc,0x431d67c49c100d4c + .quad 0x4cc5d4becb3e42b6,0x597f299cfc657e2a + .quad 0x5fcb6fab3ad6faec,0x6c44198c4a475817 + .quad 0 // terminator +.size K512,.-K512 +.align 3 +.LOPENSSL_armcap_P: + .quad OPENSSL_armcap_P-. +.asciz "SHA512 block transform for ARMv8, CRYPTOGAMS by " +.align 2 +.comm OPENSSL_armcap_P,4,4 diff --git a/deps/openssl/asm_obsolete/x64-elf-gas/aes/aes-x86_64.s b/deps/openssl/asm_obsolete/x64-elf-gas/aes/aes-x86_64.s new file mode 100644 index 00000000000000..0bdfe91fc530bc --- /dev/null +++ b/deps/openssl/asm_obsolete/x64-elf-gas/aes/aes-x86_64.s @@ -0,0 +1,2534 @@ +.text +.type _x86_64_AES_encrypt,@function +.align 16 +_x86_64_AES_encrypt: + xorl 0(%r15),%eax + xorl 4(%r15),%ebx + xorl 8(%r15),%ecx + xorl 12(%r15),%edx + + movl 240(%r15),%r13d + subl $1,%r13d + jmp .Lenc_loop +.align 16 +.Lenc_loop: + + movzbl %al,%esi + movzbl %bl,%edi + movzbl %cl,%ebp + movl 0(%r14,%rsi,8),%r10d + movl 0(%r14,%rdi,8),%r11d + movl 0(%r14,%rbp,8),%r12d + + movzbl %bh,%esi + movzbl %ch,%edi + movzbl %dl,%ebp + xorl 3(%r14,%rsi,8),%r10d + xorl 3(%r14,%rdi,8),%r11d + movl 0(%r14,%rbp,8),%r8d + + movzbl %dh,%esi + shrl $16,%ecx + movzbl %ah,%ebp + xorl 3(%r14,%rsi,8),%r12d + shrl $16,%edx + xorl 3(%r14,%rbp,8),%r8d + + shrl $16,%ebx + leaq 16(%r15),%r15 + shrl $16,%eax + + movzbl %cl,%esi + movzbl %dl,%edi + movzbl %al,%ebp + xorl 2(%r14,%rsi,8),%r10d + xorl 2(%r14,%rdi,8),%r11d + xorl 2(%r14,%rbp,8),%r12d + + movzbl %dh,%esi + movzbl %ah,%edi + movzbl %bl,%ebp + xorl 1(%r14,%rsi,8),%r10d + xorl 1(%r14,%rdi,8),%r11d + xorl 2(%r14,%rbp,8),%r8d + + movl 12(%r15),%edx + movzbl %bh,%edi + movzbl %ch,%ebp + movl 0(%r15),%eax + xorl 1(%r14,%rdi,8),%r12d + xorl 1(%r14,%rbp,8),%r8d + + movl 4(%r15),%ebx + movl 8(%r15),%ecx + xorl %r10d,%eax + xorl %r11d,%ebx + xorl %r12d,%ecx + xorl %r8d,%edx + subl $1,%r13d + jnz .Lenc_loop + movzbl %al,%esi + movzbl %bl,%edi + movzbl %cl,%ebp + movzbl 2(%r14,%rsi,8),%r10d + movzbl 2(%r14,%rdi,8),%r11d + movzbl 2(%r14,%rbp,8),%r12d + + movzbl %dl,%esi + movzbl %bh,%edi + movzbl %ch,%ebp + movzbl 2(%r14,%rsi,8),%r8d + movl 0(%r14,%rdi,8),%edi + movl 0(%r14,%rbp,8),%ebp + + andl $65280,%edi + andl $65280,%ebp + + xorl %edi,%r10d + xorl %ebp,%r11d + shrl $16,%ecx + + movzbl %dh,%esi + movzbl %ah,%edi + shrl $16,%edx + movl 0(%r14,%rsi,8),%esi + movl 0(%r14,%rdi,8),%edi + + andl $65280,%esi + andl $65280,%edi + shrl $16,%ebx + xorl %esi,%r12d + xorl %edi,%r8d + shrl $16,%eax + + movzbl %cl,%esi + movzbl %dl,%edi + movzbl %al,%ebp + movl 0(%r14,%rsi,8),%esi + movl 0(%r14,%rdi,8),%edi + movl 0(%r14,%rbp,8),%ebp + + andl $16711680,%esi + andl $16711680,%edi + andl $16711680,%ebp + + xorl %esi,%r10d + xorl %edi,%r11d + xorl %ebp,%r12d + + movzbl %bl,%esi + movzbl %dh,%edi + movzbl %ah,%ebp + movl 0(%r14,%rsi,8),%esi + movl 2(%r14,%rdi,8),%edi + movl 2(%r14,%rbp,8),%ebp + + andl $16711680,%esi + andl $4278190080,%edi + andl $4278190080,%ebp + + xorl %esi,%r8d + xorl %edi,%r10d + xorl %ebp,%r11d + + movzbl %bh,%esi + movzbl %ch,%edi + movl 16+12(%r15),%edx + movl 2(%r14,%rsi,8),%esi + movl 2(%r14,%rdi,8),%edi + movl 16+0(%r15),%eax + + andl $4278190080,%esi + andl $4278190080,%edi + + xorl %esi,%r12d + xorl %edi,%r8d + + movl 16+4(%r15),%ebx + movl 16+8(%r15),%ecx + xorl %r10d,%eax + xorl %r11d,%ebx + xorl %r12d,%ecx + xorl %r8d,%edx +.byte 0xf3,0xc3 +.size _x86_64_AES_encrypt,.-_x86_64_AES_encrypt +.type _x86_64_AES_encrypt_compact,@function +.align 16 +_x86_64_AES_encrypt_compact: + leaq 128(%r14),%r8 + movl 0-128(%r8),%edi + movl 32-128(%r8),%ebp + movl 64-128(%r8),%r10d + movl 96-128(%r8),%r11d + movl 128-128(%r8),%edi + movl 160-128(%r8),%ebp + movl 192-128(%r8),%r10d + movl 224-128(%r8),%r11d + jmp .Lenc_loop_compact +.align 16 +.Lenc_loop_compact: + xorl 0(%r15),%eax + xorl 4(%r15),%ebx + xorl 8(%r15),%ecx + xorl 12(%r15),%edx + leaq 16(%r15),%r15 + movzbl %al,%r10d + movzbl %bl,%r11d + movzbl %cl,%r12d + movzbl %dl,%r8d + movzbl %bh,%esi + movzbl %ch,%edi + shrl $16,%ecx + movzbl %dh,%ebp + movzbl (%r14,%r10,1),%r10d + movzbl (%r14,%r11,1),%r11d + movzbl (%r14,%r12,1),%r12d + movzbl (%r14,%r8,1),%r8d + + movzbl (%r14,%rsi,1),%r9d + movzbl %ah,%esi + movzbl (%r14,%rdi,1),%r13d + movzbl %cl,%edi + movzbl (%r14,%rbp,1),%ebp + movzbl (%r14,%rsi,1),%esi + + shll $8,%r9d + shrl $16,%edx + shll $8,%r13d + xorl %r9d,%r10d + shrl $16,%eax + movzbl %dl,%r9d + shrl $16,%ebx + xorl %r13d,%r11d + shll $8,%ebp + movzbl %al,%r13d + movzbl (%r14,%rdi,1),%edi + xorl %ebp,%r12d + + shll $8,%esi + movzbl %bl,%ebp + shll $16,%edi + xorl %esi,%r8d + movzbl (%r14,%r9,1),%r9d + movzbl %dh,%esi + movzbl (%r14,%r13,1),%r13d + xorl %edi,%r10d + + shrl $8,%ecx + movzbl %ah,%edi + shll $16,%r9d + shrl $8,%ebx + shll $16,%r13d + xorl %r9d,%r11d + movzbl (%r14,%rbp,1),%ebp + movzbl (%r14,%rsi,1),%esi + movzbl (%r14,%rdi,1),%edi + movzbl (%r14,%rcx,1),%edx + movzbl (%r14,%rbx,1),%ecx + + shll $16,%ebp + xorl %r13d,%r12d + shll $24,%esi + xorl %ebp,%r8d + shll $24,%edi + xorl %esi,%r10d + shll $24,%edx + xorl %edi,%r11d + shll $24,%ecx + movl %r10d,%eax + movl %r11d,%ebx + xorl %r12d,%ecx + xorl %r8d,%edx + cmpq 16(%rsp),%r15 + je .Lenc_compact_done + movl $2155905152,%r10d + movl $2155905152,%r11d + andl %eax,%r10d + andl %ebx,%r11d + movl %r10d,%esi + movl %r11d,%edi + shrl $7,%r10d + leal (%rax,%rax,1),%r8d + shrl $7,%r11d + leal (%rbx,%rbx,1),%r9d + subl %r10d,%esi + subl %r11d,%edi + andl $4278124286,%r8d + andl $4278124286,%r9d + andl $454761243,%esi + andl $454761243,%edi + movl %eax,%r10d + movl %ebx,%r11d + xorl %esi,%r8d + xorl %edi,%r9d + + xorl %r8d,%eax + xorl %r9d,%ebx + movl $2155905152,%r12d + roll $24,%eax + movl $2155905152,%ebp + roll $24,%ebx + andl %ecx,%r12d + andl %edx,%ebp + xorl %r8d,%eax + xorl %r9d,%ebx + movl %r12d,%esi + rorl $16,%r10d + movl %ebp,%edi + rorl $16,%r11d + leal (%rcx,%rcx,1),%r8d + shrl $7,%r12d + xorl %r10d,%eax + shrl $7,%ebp + xorl %r11d,%ebx + rorl $8,%r10d + leal (%rdx,%rdx,1),%r9d + rorl $8,%r11d + subl %r12d,%esi + subl %ebp,%edi + xorl %r10d,%eax + xorl %r11d,%ebx + + andl $4278124286,%r8d + andl $4278124286,%r9d + andl $454761243,%esi + andl $454761243,%edi + movl %ecx,%r12d + movl %edx,%ebp + xorl %esi,%r8d + xorl %edi,%r9d + + rorl $16,%r12d + xorl %r8d,%ecx + rorl $16,%ebp + xorl %r9d,%edx + roll $24,%ecx + movl 0(%r14),%esi + roll $24,%edx + xorl %r8d,%ecx + movl 64(%r14),%edi + xorl %r9d,%edx + movl 128(%r14),%r8d + xorl %r12d,%ecx + rorl $8,%r12d + xorl %ebp,%edx + rorl $8,%ebp + xorl %r12d,%ecx + movl 192(%r14),%r9d + xorl %ebp,%edx + jmp .Lenc_loop_compact +.align 16 +.Lenc_compact_done: + xorl 0(%r15),%eax + xorl 4(%r15),%ebx + xorl 8(%r15),%ecx + xorl 12(%r15),%edx +.byte 0xf3,0xc3 +.size _x86_64_AES_encrypt_compact,.-_x86_64_AES_encrypt_compact +.globl AES_encrypt +.type AES_encrypt,@function +.align 16 +.globl asm_AES_encrypt +.hidden asm_AES_encrypt +asm_AES_encrypt: +AES_encrypt: + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + + + movq %rsp,%r10 + leaq -63(%rdx),%rcx + andq $-64,%rsp + subq %rsp,%rcx + negq %rcx + andq $960,%rcx + subq %rcx,%rsp + subq $32,%rsp + + movq %rsi,16(%rsp) + movq %r10,24(%rsp) +.Lenc_prologue: + + movq %rdx,%r15 + movl 240(%r15),%r13d + + movl 0(%rdi),%eax + movl 4(%rdi),%ebx + movl 8(%rdi),%ecx + movl 12(%rdi),%edx + + shll $4,%r13d + leaq (%r15,%r13,1),%rbp + movq %r15,(%rsp) + movq %rbp,8(%rsp) + + + leaq .LAES_Te+2048(%rip),%r14 + leaq 768(%rsp),%rbp + subq %r14,%rbp + andq $768,%rbp + leaq (%r14,%rbp,1),%r14 + + call _x86_64_AES_encrypt_compact + + movq 16(%rsp),%r9 + movq 24(%rsp),%rsi + movl %eax,0(%r9) + movl %ebx,4(%r9) + movl %ecx,8(%r9) + movl %edx,12(%r9) + + movq (%rsi),%r15 + movq 8(%rsi),%r14 + movq 16(%rsi),%r13 + movq 24(%rsi),%r12 + movq 32(%rsi),%rbp + movq 40(%rsi),%rbx + leaq 48(%rsi),%rsp +.Lenc_epilogue: + .byte 0xf3,0xc3 +.size AES_encrypt,.-AES_encrypt +.type _x86_64_AES_decrypt,@function +.align 16 +_x86_64_AES_decrypt: + xorl 0(%r15),%eax + xorl 4(%r15),%ebx + xorl 8(%r15),%ecx + xorl 12(%r15),%edx + + movl 240(%r15),%r13d + subl $1,%r13d + jmp .Ldec_loop +.align 16 +.Ldec_loop: + + movzbl %al,%esi + movzbl %bl,%edi + movzbl %cl,%ebp + movl 0(%r14,%rsi,8),%r10d + movl 0(%r14,%rdi,8),%r11d + movl 0(%r14,%rbp,8),%r12d + + movzbl %dh,%esi + movzbl %ah,%edi + movzbl %dl,%ebp + xorl 3(%r14,%rsi,8),%r10d + xorl 3(%r14,%rdi,8),%r11d + movl 0(%r14,%rbp,8),%r8d + + movzbl %bh,%esi + shrl $16,%eax + movzbl %ch,%ebp + xorl 3(%r14,%rsi,8),%r12d + shrl $16,%edx + xorl 3(%r14,%rbp,8),%r8d + + shrl $16,%ebx + leaq 16(%r15),%r15 + shrl $16,%ecx + + movzbl %cl,%esi + movzbl %dl,%edi + movzbl %al,%ebp + xorl 2(%r14,%rsi,8),%r10d + xorl 2(%r14,%rdi,8),%r11d + xorl 2(%r14,%rbp,8),%r12d + + movzbl %bh,%esi + movzbl %ch,%edi + movzbl %bl,%ebp + xorl 1(%r14,%rsi,8),%r10d + xorl 1(%r14,%rdi,8),%r11d + xorl 2(%r14,%rbp,8),%r8d + + movzbl %dh,%esi + movl 12(%r15),%edx + movzbl %ah,%ebp + xorl 1(%r14,%rsi,8),%r12d + movl 0(%r15),%eax + xorl 1(%r14,%rbp,8),%r8d + + xorl %r10d,%eax + movl 4(%r15),%ebx + movl 8(%r15),%ecx + xorl %r12d,%ecx + xorl %r11d,%ebx + xorl %r8d,%edx + subl $1,%r13d + jnz .Ldec_loop + leaq 2048(%r14),%r14 + movzbl %al,%esi + movzbl %bl,%edi + movzbl %cl,%ebp + movzbl (%r14,%rsi,1),%r10d + movzbl (%r14,%rdi,1),%r11d + movzbl (%r14,%rbp,1),%r12d + + movzbl %dl,%esi + movzbl %dh,%edi + movzbl %ah,%ebp + movzbl (%r14,%rsi,1),%r8d + movzbl (%r14,%rdi,1),%edi + movzbl (%r14,%rbp,1),%ebp + + shll $8,%edi + shll $8,%ebp + + xorl %edi,%r10d + xorl %ebp,%r11d + shrl $16,%edx + + movzbl %bh,%esi + movzbl %ch,%edi + shrl $16,%eax + movzbl (%r14,%rsi,1),%esi + movzbl (%r14,%rdi,1),%edi + + shll $8,%esi + shll $8,%edi + shrl $16,%ebx + xorl %esi,%r12d + xorl %edi,%r8d + shrl $16,%ecx + + movzbl %cl,%esi + movzbl %dl,%edi + movzbl %al,%ebp + movzbl (%r14,%rsi,1),%esi + movzbl (%r14,%rdi,1),%edi + movzbl (%r14,%rbp,1),%ebp + + shll $16,%esi + shll $16,%edi + shll $16,%ebp + + xorl %esi,%r10d + xorl %edi,%r11d + xorl %ebp,%r12d + + movzbl %bl,%esi + movzbl %bh,%edi + movzbl %ch,%ebp + movzbl (%r14,%rsi,1),%esi + movzbl (%r14,%rdi,1),%edi + movzbl (%r14,%rbp,1),%ebp + + shll $16,%esi + shll $24,%edi + shll $24,%ebp + + xorl %esi,%r8d + xorl %edi,%r10d + xorl %ebp,%r11d + + movzbl %dh,%esi + movzbl %ah,%edi + movl 16+12(%r15),%edx + movzbl (%r14,%rsi,1),%esi + movzbl (%r14,%rdi,1),%edi + movl 16+0(%r15),%eax + + shll $24,%esi + shll $24,%edi + + xorl %esi,%r12d + xorl %edi,%r8d + + movl 16+4(%r15),%ebx + movl 16+8(%r15),%ecx + leaq -2048(%r14),%r14 + xorl %r10d,%eax + xorl %r11d,%ebx + xorl %r12d,%ecx + xorl %r8d,%edx +.byte 0xf3,0xc3 +.size _x86_64_AES_decrypt,.-_x86_64_AES_decrypt +.type _x86_64_AES_decrypt_compact,@function +.align 16 +_x86_64_AES_decrypt_compact: + leaq 128(%r14),%r8 + movl 0-128(%r8),%edi + movl 32-128(%r8),%ebp + movl 64-128(%r8),%r10d + movl 96-128(%r8),%r11d + movl 128-128(%r8),%edi + movl 160-128(%r8),%ebp + movl 192-128(%r8),%r10d + movl 224-128(%r8),%r11d + jmp .Ldec_loop_compact + +.align 16 +.Ldec_loop_compact: + xorl 0(%r15),%eax + xorl 4(%r15),%ebx + xorl 8(%r15),%ecx + xorl 12(%r15),%edx + leaq 16(%r15),%r15 + movzbl %al,%r10d + movzbl %bl,%r11d + movzbl %cl,%r12d + movzbl %dl,%r8d + movzbl %dh,%esi + movzbl %ah,%edi + shrl $16,%edx + movzbl %bh,%ebp + movzbl (%r14,%r10,1),%r10d + movzbl (%r14,%r11,1),%r11d + movzbl (%r14,%r12,1),%r12d + movzbl (%r14,%r8,1),%r8d + + movzbl (%r14,%rsi,1),%r9d + movzbl %ch,%esi + movzbl (%r14,%rdi,1),%r13d + movzbl (%r14,%rbp,1),%ebp + movzbl (%r14,%rsi,1),%esi + + shrl $16,%ecx + shll $8,%r13d + shll $8,%r9d + movzbl %cl,%edi + shrl $16,%eax + xorl %r9d,%r10d + shrl $16,%ebx + movzbl %dl,%r9d + + shll $8,%ebp + xorl %r13d,%r11d + shll $8,%esi + movzbl %al,%r13d + movzbl (%r14,%rdi,1),%edi + xorl %ebp,%r12d + movzbl %bl,%ebp + + shll $16,%edi + xorl %esi,%r8d + movzbl (%r14,%r9,1),%r9d + movzbl %bh,%esi + movzbl (%r14,%rbp,1),%ebp + xorl %edi,%r10d + movzbl (%r14,%r13,1),%r13d + movzbl %ch,%edi + + shll $16,%ebp + shll $16,%r9d + shll $16,%r13d + xorl %ebp,%r8d + movzbl %dh,%ebp + xorl %r9d,%r11d + shrl $8,%eax + xorl %r13d,%r12d + + movzbl (%r14,%rsi,1),%esi + movzbl (%r14,%rdi,1),%ebx + movzbl (%r14,%rbp,1),%ecx + movzbl (%r14,%rax,1),%edx + + movl %r10d,%eax + shll $24,%esi + shll $24,%ebx + shll $24,%ecx + xorl %esi,%eax + shll $24,%edx + xorl %r11d,%ebx + xorl %r12d,%ecx + xorl %r8d,%edx + cmpq 16(%rsp),%r15 + je .Ldec_compact_done + + movq 256+0(%r14),%rsi + shlq $32,%rbx + shlq $32,%rdx + movq 256+8(%r14),%rdi + orq %rbx,%rax + orq %rdx,%rcx + movq 256+16(%r14),%rbp + movq %rsi,%r9 + movq %rsi,%r12 + andq %rax,%r9 + andq %rcx,%r12 + movq %r9,%rbx + movq %r12,%rdx + shrq $7,%r9 + leaq (%rax,%rax,1),%r8 + shrq $7,%r12 + leaq (%rcx,%rcx,1),%r11 + subq %r9,%rbx + subq %r12,%rdx + andq %rdi,%r8 + andq %rdi,%r11 + andq %rbp,%rbx + andq %rbp,%rdx + xorq %rbx,%r8 + xorq %rdx,%r11 + movq %rsi,%r10 + movq %rsi,%r13 + + andq %r8,%r10 + andq %r11,%r13 + movq %r10,%rbx + movq %r13,%rdx + shrq $7,%r10 + leaq (%r8,%r8,1),%r9 + shrq $7,%r13 + leaq (%r11,%r11,1),%r12 + subq %r10,%rbx + subq %r13,%rdx + andq %rdi,%r9 + andq %rdi,%r12 + andq %rbp,%rbx + andq %rbp,%rdx + xorq %rbx,%r9 + xorq %rdx,%r12 + movq %rsi,%r10 + movq %rsi,%r13 + + andq %r9,%r10 + andq %r12,%r13 + movq %r10,%rbx + movq %r13,%rdx + shrq $7,%r10 + xorq %rax,%r8 + shrq $7,%r13 + xorq %rcx,%r11 + subq %r10,%rbx + subq %r13,%rdx + leaq (%r9,%r9,1),%r10 + leaq (%r12,%r12,1),%r13 + xorq %rax,%r9 + xorq %rcx,%r12 + andq %rdi,%r10 + andq %rdi,%r13 + andq %rbp,%rbx + andq %rbp,%rdx + xorq %rbx,%r10 + xorq %rdx,%r13 + + xorq %r10,%rax + xorq %r13,%rcx + xorq %r10,%r8 + xorq %r13,%r11 + movq %rax,%rbx + movq %rcx,%rdx + xorq %r10,%r9 + shrq $32,%rbx + xorq %r13,%r12 + shrq $32,%rdx + xorq %r8,%r10 + roll $8,%eax + xorq %r11,%r13 + roll $8,%ecx + xorq %r9,%r10 + roll $8,%ebx + xorq %r12,%r13 + + roll $8,%edx + xorl %r10d,%eax + shrq $32,%r10 + xorl %r13d,%ecx + shrq $32,%r13 + xorl %r10d,%ebx + xorl %r13d,%edx + + movq %r8,%r10 + roll $24,%r8d + movq %r11,%r13 + roll $24,%r11d + shrq $32,%r10 + xorl %r8d,%eax + shrq $32,%r13 + xorl %r11d,%ecx + roll $24,%r10d + movq %r9,%r8 + roll $24,%r13d + movq %r12,%r11 + shrq $32,%r8 + xorl %r10d,%ebx + shrq $32,%r11 + xorl %r13d,%edx + + movq 0(%r14),%rsi + roll $16,%r9d + movq 64(%r14),%rdi + roll $16,%r12d + movq 128(%r14),%rbp + roll $16,%r8d + movq 192(%r14),%r10 + xorl %r9d,%eax + roll $16,%r11d + xorl %r12d,%ecx + movq 256(%r14),%r13 + xorl %r8d,%ebx + xorl %r11d,%edx + jmp .Ldec_loop_compact +.align 16 +.Ldec_compact_done: + xorl 0(%r15),%eax + xorl 4(%r15),%ebx + xorl 8(%r15),%ecx + xorl 12(%r15),%edx +.byte 0xf3,0xc3 +.size _x86_64_AES_decrypt_compact,.-_x86_64_AES_decrypt_compact +.globl AES_decrypt +.type AES_decrypt,@function +.align 16 +.globl asm_AES_decrypt +.hidden asm_AES_decrypt +asm_AES_decrypt: +AES_decrypt: + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + + + movq %rsp,%r10 + leaq -63(%rdx),%rcx + andq $-64,%rsp + subq %rsp,%rcx + negq %rcx + andq $960,%rcx + subq %rcx,%rsp + subq $32,%rsp + + movq %rsi,16(%rsp) + movq %r10,24(%rsp) +.Ldec_prologue: + + movq %rdx,%r15 + movl 240(%r15),%r13d + + movl 0(%rdi),%eax + movl 4(%rdi),%ebx + movl 8(%rdi),%ecx + movl 12(%rdi),%edx + + shll $4,%r13d + leaq (%r15,%r13,1),%rbp + movq %r15,(%rsp) + movq %rbp,8(%rsp) + + + leaq .LAES_Td+2048(%rip),%r14 + leaq 768(%rsp),%rbp + subq %r14,%rbp + andq $768,%rbp + leaq (%r14,%rbp,1),%r14 + shrq $3,%rbp + addq %rbp,%r14 + + call _x86_64_AES_decrypt_compact + + movq 16(%rsp),%r9 + movq 24(%rsp),%rsi + movl %eax,0(%r9) + movl %ebx,4(%r9) + movl %ecx,8(%r9) + movl %edx,12(%r9) + + movq (%rsi),%r15 + movq 8(%rsi),%r14 + movq 16(%rsi),%r13 + movq 24(%rsi),%r12 + movq 32(%rsi),%rbp + movq 40(%rsi),%rbx + leaq 48(%rsi),%rsp +.Ldec_epilogue: + .byte 0xf3,0xc3 +.size AES_decrypt,.-AES_decrypt +.globl private_AES_set_encrypt_key +.type private_AES_set_encrypt_key,@function +.align 16 +private_AES_set_encrypt_key: + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + subq $8,%rsp +.Lenc_key_prologue: + + call _x86_64_AES_set_encrypt_key + + movq 40(%rsp),%rbp + movq 48(%rsp),%rbx + addq $56,%rsp +.Lenc_key_epilogue: + .byte 0xf3,0xc3 +.size private_AES_set_encrypt_key,.-private_AES_set_encrypt_key + +.type _x86_64_AES_set_encrypt_key,@function +.align 16 +_x86_64_AES_set_encrypt_key: + movl %esi,%ecx + movq %rdi,%rsi + movq %rdx,%rdi + + testq $-1,%rsi + jz .Lbadpointer + testq $-1,%rdi + jz .Lbadpointer + + leaq .LAES_Te(%rip),%rbp + leaq 2048+128(%rbp),%rbp + + + movl 0-128(%rbp),%eax + movl 32-128(%rbp),%ebx + movl 64-128(%rbp),%r8d + movl 96-128(%rbp),%edx + movl 128-128(%rbp),%eax + movl 160-128(%rbp),%ebx + movl 192-128(%rbp),%r8d + movl 224-128(%rbp),%edx + + cmpl $128,%ecx + je .L10rounds + cmpl $192,%ecx + je .L12rounds + cmpl $256,%ecx + je .L14rounds + movq $-2,%rax + jmp .Lexit + +.L10rounds: + movq 0(%rsi),%rax + movq 8(%rsi),%rdx + movq %rax,0(%rdi) + movq %rdx,8(%rdi) + + shrq $32,%rdx + xorl %ecx,%ecx + jmp .L10shortcut +.align 4 +.L10loop: + movl 0(%rdi),%eax + movl 12(%rdi),%edx +.L10shortcut: + movzbl %dl,%esi + movzbl -128(%rbp,%rsi,1),%ebx + movzbl %dh,%esi + shll $24,%ebx + xorl %ebx,%eax + + movzbl -128(%rbp,%rsi,1),%ebx + shrl $16,%edx + movzbl %dl,%esi + xorl %ebx,%eax + + movzbl -128(%rbp,%rsi,1),%ebx + movzbl %dh,%esi + shll $8,%ebx + xorl %ebx,%eax + + movzbl -128(%rbp,%rsi,1),%ebx + shll $16,%ebx + xorl %ebx,%eax + + xorl 1024-128(%rbp,%rcx,4),%eax + movl %eax,16(%rdi) + xorl 4(%rdi),%eax + movl %eax,20(%rdi) + xorl 8(%rdi),%eax + movl %eax,24(%rdi) + xorl 12(%rdi),%eax + movl %eax,28(%rdi) + addl $1,%ecx + leaq 16(%rdi),%rdi + cmpl $10,%ecx + jl .L10loop + + movl $10,80(%rdi) + xorq %rax,%rax + jmp .Lexit + +.L12rounds: + movq 0(%rsi),%rax + movq 8(%rsi),%rbx + movq 16(%rsi),%rdx + movq %rax,0(%rdi) + movq %rbx,8(%rdi) + movq %rdx,16(%rdi) + + shrq $32,%rdx + xorl %ecx,%ecx + jmp .L12shortcut +.align 4 +.L12loop: + movl 0(%rdi),%eax + movl 20(%rdi),%edx +.L12shortcut: + movzbl %dl,%esi + movzbl -128(%rbp,%rsi,1),%ebx + movzbl %dh,%esi + shll $24,%ebx + xorl %ebx,%eax + + movzbl -128(%rbp,%rsi,1),%ebx + shrl $16,%edx + movzbl %dl,%esi + xorl %ebx,%eax + + movzbl -128(%rbp,%rsi,1),%ebx + movzbl %dh,%esi + shll $8,%ebx + xorl %ebx,%eax + + movzbl -128(%rbp,%rsi,1),%ebx + shll $16,%ebx + xorl %ebx,%eax + + xorl 1024-128(%rbp,%rcx,4),%eax + movl %eax,24(%rdi) + xorl 4(%rdi),%eax + movl %eax,28(%rdi) + xorl 8(%rdi),%eax + movl %eax,32(%rdi) + xorl 12(%rdi),%eax + movl %eax,36(%rdi) + + cmpl $7,%ecx + je .L12break + addl $1,%ecx + + xorl 16(%rdi),%eax + movl %eax,40(%rdi) + xorl 20(%rdi),%eax + movl %eax,44(%rdi) + + leaq 24(%rdi),%rdi + jmp .L12loop +.L12break: + movl $12,72(%rdi) + xorq %rax,%rax + jmp .Lexit + +.L14rounds: + movq 0(%rsi),%rax + movq 8(%rsi),%rbx + movq 16(%rsi),%rcx + movq 24(%rsi),%rdx + movq %rax,0(%rdi) + movq %rbx,8(%rdi) + movq %rcx,16(%rdi) + movq %rdx,24(%rdi) + + shrq $32,%rdx + xorl %ecx,%ecx + jmp .L14shortcut +.align 4 +.L14loop: + movl 0(%rdi),%eax + movl 28(%rdi),%edx +.L14shortcut: + movzbl %dl,%esi + movzbl -128(%rbp,%rsi,1),%ebx + movzbl %dh,%esi + shll $24,%ebx + xorl %ebx,%eax + + movzbl -128(%rbp,%rsi,1),%ebx + shrl $16,%edx + movzbl %dl,%esi + xorl %ebx,%eax + + movzbl -128(%rbp,%rsi,1),%ebx + movzbl %dh,%esi + shll $8,%ebx + xorl %ebx,%eax + + movzbl -128(%rbp,%rsi,1),%ebx + shll $16,%ebx + xorl %ebx,%eax + + xorl 1024-128(%rbp,%rcx,4),%eax + movl %eax,32(%rdi) + xorl 4(%rdi),%eax + movl %eax,36(%rdi) + xorl 8(%rdi),%eax + movl %eax,40(%rdi) + xorl 12(%rdi),%eax + movl %eax,44(%rdi) + + cmpl $6,%ecx + je .L14break + addl $1,%ecx + + movl %eax,%edx + movl 16(%rdi),%eax + movzbl %dl,%esi + movzbl -128(%rbp,%rsi,1),%ebx + movzbl %dh,%esi + xorl %ebx,%eax + + movzbl -128(%rbp,%rsi,1),%ebx + shrl $16,%edx + shll $8,%ebx + movzbl %dl,%esi + xorl %ebx,%eax + + movzbl -128(%rbp,%rsi,1),%ebx + movzbl %dh,%esi + shll $16,%ebx + xorl %ebx,%eax + + movzbl -128(%rbp,%rsi,1),%ebx + shll $24,%ebx + xorl %ebx,%eax + + movl %eax,48(%rdi) + xorl 20(%rdi),%eax + movl %eax,52(%rdi) + xorl 24(%rdi),%eax + movl %eax,56(%rdi) + xorl 28(%rdi),%eax + movl %eax,60(%rdi) + + leaq 32(%rdi),%rdi + jmp .L14loop +.L14break: + movl $14,48(%rdi) + xorq %rax,%rax + jmp .Lexit + +.Lbadpointer: + movq $-1,%rax +.Lexit: +.byte 0xf3,0xc3 +.size _x86_64_AES_set_encrypt_key,.-_x86_64_AES_set_encrypt_key +.globl private_AES_set_decrypt_key +.type private_AES_set_decrypt_key,@function +.align 16 +private_AES_set_decrypt_key: + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + pushq %rdx +.Ldec_key_prologue: + + call _x86_64_AES_set_encrypt_key + movq (%rsp),%r8 + cmpl $0,%eax + jne .Labort + + movl 240(%r8),%r14d + xorq %rdi,%rdi + leaq (%rdi,%r14,4),%rcx + movq %r8,%rsi + leaq (%r8,%rcx,4),%rdi +.align 4 +.Linvert: + movq 0(%rsi),%rax + movq 8(%rsi),%rbx + movq 0(%rdi),%rcx + movq 8(%rdi),%rdx + movq %rax,0(%rdi) + movq %rbx,8(%rdi) + movq %rcx,0(%rsi) + movq %rdx,8(%rsi) + leaq 16(%rsi),%rsi + leaq -16(%rdi),%rdi + cmpq %rsi,%rdi + jne .Linvert + + leaq .LAES_Te+2048+1024(%rip),%rax + + movq 40(%rax),%rsi + movq 48(%rax),%rdi + movq 56(%rax),%rbp + + movq %r8,%r15 + subl $1,%r14d +.align 4 +.Lpermute: + leaq 16(%r15),%r15 + movq 0(%r15),%rax + movq 8(%r15),%rcx + movq %rsi,%r9 + movq %rsi,%r12 + andq %rax,%r9 + andq %rcx,%r12 + movq %r9,%rbx + movq %r12,%rdx + shrq $7,%r9 + leaq (%rax,%rax,1),%r8 + shrq $7,%r12 + leaq (%rcx,%rcx,1),%r11 + subq %r9,%rbx + subq %r12,%rdx + andq %rdi,%r8 + andq %rdi,%r11 + andq %rbp,%rbx + andq %rbp,%rdx + xorq %rbx,%r8 + xorq %rdx,%r11 + movq %rsi,%r10 + movq %rsi,%r13 + + andq %r8,%r10 + andq %r11,%r13 + movq %r10,%rbx + movq %r13,%rdx + shrq $7,%r10 + leaq (%r8,%r8,1),%r9 + shrq $7,%r13 + leaq (%r11,%r11,1),%r12 + subq %r10,%rbx + subq %r13,%rdx + andq %rdi,%r9 + andq %rdi,%r12 + andq %rbp,%rbx + andq %rbp,%rdx + xorq %rbx,%r9 + xorq %rdx,%r12 + movq %rsi,%r10 + movq %rsi,%r13 + + andq %r9,%r10 + andq %r12,%r13 + movq %r10,%rbx + movq %r13,%rdx + shrq $7,%r10 + xorq %rax,%r8 + shrq $7,%r13 + xorq %rcx,%r11 + subq %r10,%rbx + subq %r13,%rdx + leaq (%r9,%r9,1),%r10 + leaq (%r12,%r12,1),%r13 + xorq %rax,%r9 + xorq %rcx,%r12 + andq %rdi,%r10 + andq %rdi,%r13 + andq %rbp,%rbx + andq %rbp,%rdx + xorq %rbx,%r10 + xorq %rdx,%r13 + + xorq %r10,%rax + xorq %r13,%rcx + xorq %r10,%r8 + xorq %r13,%r11 + movq %rax,%rbx + movq %rcx,%rdx + xorq %r10,%r9 + shrq $32,%rbx + xorq %r13,%r12 + shrq $32,%rdx + xorq %r8,%r10 + roll $8,%eax + xorq %r11,%r13 + roll $8,%ecx + xorq %r9,%r10 + roll $8,%ebx + xorq %r12,%r13 + + roll $8,%edx + xorl %r10d,%eax + shrq $32,%r10 + xorl %r13d,%ecx + shrq $32,%r13 + xorl %r10d,%ebx + xorl %r13d,%edx + + movq %r8,%r10 + roll $24,%r8d + movq %r11,%r13 + roll $24,%r11d + shrq $32,%r10 + xorl %r8d,%eax + shrq $32,%r13 + xorl %r11d,%ecx + roll $24,%r10d + movq %r9,%r8 + roll $24,%r13d + movq %r12,%r11 + shrq $32,%r8 + xorl %r10d,%ebx + shrq $32,%r11 + xorl %r13d,%edx + + + roll $16,%r9d + + roll $16,%r12d + + roll $16,%r8d + + xorl %r9d,%eax + roll $16,%r11d + xorl %r12d,%ecx + + xorl %r8d,%ebx + xorl %r11d,%edx + movl %eax,0(%r15) + movl %ebx,4(%r15) + movl %ecx,8(%r15) + movl %edx,12(%r15) + subl $1,%r14d + jnz .Lpermute + + xorq %rax,%rax +.Labort: + movq 8(%rsp),%r15 + movq 16(%rsp),%r14 + movq 24(%rsp),%r13 + movq 32(%rsp),%r12 + movq 40(%rsp),%rbp + movq 48(%rsp),%rbx + addq $56,%rsp +.Ldec_key_epilogue: + .byte 0xf3,0xc3 +.size private_AES_set_decrypt_key,.-private_AES_set_decrypt_key +.globl AES_cbc_encrypt +.type AES_cbc_encrypt,@function +.align 16 + +.globl asm_AES_cbc_encrypt +.hidden asm_AES_cbc_encrypt +asm_AES_cbc_encrypt: +AES_cbc_encrypt: + cmpq $0,%rdx + je .Lcbc_epilogue + pushfq + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 +.Lcbc_prologue: + + cld + movl %r9d,%r9d + + leaq .LAES_Te(%rip),%r14 + cmpq $0,%r9 + jne .Lcbc_picked_te + leaq .LAES_Td(%rip),%r14 +.Lcbc_picked_te: + + movl OPENSSL_ia32cap_P(%rip),%r10d + cmpq $512,%rdx + jb .Lcbc_slow_prologue + testq $15,%rdx + jnz .Lcbc_slow_prologue + btl $28,%r10d + jc .Lcbc_slow_prologue + + + leaq -88-248(%rsp),%r15 + andq $-64,%r15 + + + movq %r14,%r10 + leaq 2304(%r14),%r11 + movq %r15,%r12 + andq $4095,%r10 + andq $4095,%r11 + andq $4095,%r12 + + cmpq %r11,%r12 + jb .Lcbc_te_break_out + subq %r11,%r12 + subq %r12,%r15 + jmp .Lcbc_te_ok +.Lcbc_te_break_out: + subq %r10,%r12 + andq $4095,%r12 + addq $320,%r12 + subq %r12,%r15 +.align 4 +.Lcbc_te_ok: + + xchgq %rsp,%r15 + + movq %r15,16(%rsp) +.Lcbc_fast_body: + movq %rdi,24(%rsp) + movq %rsi,32(%rsp) + movq %rdx,40(%rsp) + movq %rcx,48(%rsp) + movq %r8,56(%rsp) + movl $0,80+240(%rsp) + movq %r8,%rbp + movq %r9,%rbx + movq %rsi,%r9 + movq %rdi,%r8 + movq %rcx,%r15 + + movl 240(%r15),%eax + + movq %r15,%r10 + subq %r14,%r10 + andq $4095,%r10 + cmpq $2304,%r10 + jb .Lcbc_do_ecopy + cmpq $4096-248,%r10 + jb .Lcbc_skip_ecopy +.align 4 +.Lcbc_do_ecopy: + movq %r15,%rsi + leaq 80(%rsp),%rdi + leaq 80(%rsp),%r15 + movl $30,%ecx +.long 0x90A548F3 + movl %eax,(%rdi) +.Lcbc_skip_ecopy: + movq %r15,0(%rsp) + + movl $18,%ecx +.align 4 +.Lcbc_prefetch_te: + movq 0(%r14),%r10 + movq 32(%r14),%r11 + movq 64(%r14),%r12 + movq 96(%r14),%r13 + leaq 128(%r14),%r14 + subl $1,%ecx + jnz .Lcbc_prefetch_te + leaq -2304(%r14),%r14 + + cmpq $0,%rbx + je .LFAST_DECRYPT + + + movl 0(%rbp),%eax + movl 4(%rbp),%ebx + movl 8(%rbp),%ecx + movl 12(%rbp),%edx + +.align 4 +.Lcbc_fast_enc_loop: + xorl 0(%r8),%eax + xorl 4(%r8),%ebx + xorl 8(%r8),%ecx + xorl 12(%r8),%edx + movq 0(%rsp),%r15 + movq %r8,24(%rsp) + + call _x86_64_AES_encrypt + + movq 24(%rsp),%r8 + movq 40(%rsp),%r10 + movl %eax,0(%r9) + movl %ebx,4(%r9) + movl %ecx,8(%r9) + movl %edx,12(%r9) + + leaq 16(%r8),%r8 + leaq 16(%r9),%r9 + subq $16,%r10 + testq $-16,%r10 + movq %r10,40(%rsp) + jnz .Lcbc_fast_enc_loop + movq 56(%rsp),%rbp + movl %eax,0(%rbp) + movl %ebx,4(%rbp) + movl %ecx,8(%rbp) + movl %edx,12(%rbp) + + jmp .Lcbc_fast_cleanup + + +.align 16 +.LFAST_DECRYPT: + cmpq %r8,%r9 + je .Lcbc_fast_dec_in_place + + movq %rbp,64(%rsp) +.align 4 +.Lcbc_fast_dec_loop: + movl 0(%r8),%eax + movl 4(%r8),%ebx + movl 8(%r8),%ecx + movl 12(%r8),%edx + movq 0(%rsp),%r15 + movq %r8,24(%rsp) + + call _x86_64_AES_decrypt + + movq 64(%rsp),%rbp + movq 24(%rsp),%r8 + movq 40(%rsp),%r10 + xorl 0(%rbp),%eax + xorl 4(%rbp),%ebx + xorl 8(%rbp),%ecx + xorl 12(%rbp),%edx + movq %r8,%rbp + + subq $16,%r10 + movq %r10,40(%rsp) + movq %rbp,64(%rsp) + + movl %eax,0(%r9) + movl %ebx,4(%r9) + movl %ecx,8(%r9) + movl %edx,12(%r9) + + leaq 16(%r8),%r8 + leaq 16(%r9),%r9 + jnz .Lcbc_fast_dec_loop + movq 56(%rsp),%r12 + movq 0(%rbp),%r10 + movq 8(%rbp),%r11 + movq %r10,0(%r12) + movq %r11,8(%r12) + jmp .Lcbc_fast_cleanup + +.align 16 +.Lcbc_fast_dec_in_place: + movq 0(%rbp),%r10 + movq 8(%rbp),%r11 + movq %r10,0+64(%rsp) + movq %r11,8+64(%rsp) +.align 4 +.Lcbc_fast_dec_in_place_loop: + movl 0(%r8),%eax + movl 4(%r8),%ebx + movl 8(%r8),%ecx + movl 12(%r8),%edx + movq 0(%rsp),%r15 + movq %r8,24(%rsp) + + call _x86_64_AES_decrypt + + movq 24(%rsp),%r8 + movq 40(%rsp),%r10 + xorl 0+64(%rsp),%eax + xorl 4+64(%rsp),%ebx + xorl 8+64(%rsp),%ecx + xorl 12+64(%rsp),%edx + + movq 0(%r8),%r11 + movq 8(%r8),%r12 + subq $16,%r10 + jz .Lcbc_fast_dec_in_place_done + + movq %r11,0+64(%rsp) + movq %r12,8+64(%rsp) + + movl %eax,0(%r9) + movl %ebx,4(%r9) + movl %ecx,8(%r9) + movl %edx,12(%r9) + + leaq 16(%r8),%r8 + leaq 16(%r9),%r9 + movq %r10,40(%rsp) + jmp .Lcbc_fast_dec_in_place_loop +.Lcbc_fast_dec_in_place_done: + movq 56(%rsp),%rdi + movq %r11,0(%rdi) + movq %r12,8(%rdi) + + movl %eax,0(%r9) + movl %ebx,4(%r9) + movl %ecx,8(%r9) + movl %edx,12(%r9) + +.align 4 +.Lcbc_fast_cleanup: + cmpl $0,80+240(%rsp) + leaq 80(%rsp),%rdi + je .Lcbc_exit + movl $30,%ecx + xorq %rax,%rax +.long 0x90AB48F3 + + jmp .Lcbc_exit + + +.align 16 +.Lcbc_slow_prologue: + + leaq -88(%rsp),%rbp + andq $-64,%rbp + + leaq -88-63(%rcx),%r10 + subq %rbp,%r10 + negq %r10 + andq $960,%r10 + subq %r10,%rbp + + xchgq %rsp,%rbp + + movq %rbp,16(%rsp) +.Lcbc_slow_body: + + + + + movq %r8,56(%rsp) + movq %r8,%rbp + movq %r9,%rbx + movq %rsi,%r9 + movq %rdi,%r8 + movq %rcx,%r15 + movq %rdx,%r10 + + movl 240(%r15),%eax + movq %r15,0(%rsp) + shll $4,%eax + leaq (%r15,%rax,1),%rax + movq %rax,8(%rsp) + + + leaq 2048(%r14),%r14 + leaq 768-8(%rsp),%rax + subq %r14,%rax + andq $768,%rax + leaq (%r14,%rax,1),%r14 + + cmpq $0,%rbx + je .LSLOW_DECRYPT + + + testq $-16,%r10 + movl 0(%rbp),%eax + movl 4(%rbp),%ebx + movl 8(%rbp),%ecx + movl 12(%rbp),%edx + jz .Lcbc_slow_enc_tail + +.align 4 +.Lcbc_slow_enc_loop: + xorl 0(%r8),%eax + xorl 4(%r8),%ebx + xorl 8(%r8),%ecx + xorl 12(%r8),%edx + movq 0(%rsp),%r15 + movq %r8,24(%rsp) + movq %r9,32(%rsp) + movq %r10,40(%rsp) + + call _x86_64_AES_encrypt_compact + + movq 24(%rsp),%r8 + movq 32(%rsp),%r9 + movq 40(%rsp),%r10 + movl %eax,0(%r9) + movl %ebx,4(%r9) + movl %ecx,8(%r9) + movl %edx,12(%r9) + + leaq 16(%r8),%r8 + leaq 16(%r9),%r9 + subq $16,%r10 + testq $-16,%r10 + jnz .Lcbc_slow_enc_loop + testq $15,%r10 + jnz .Lcbc_slow_enc_tail + movq 56(%rsp),%rbp + movl %eax,0(%rbp) + movl %ebx,4(%rbp) + movl %ecx,8(%rbp) + movl %edx,12(%rbp) + + jmp .Lcbc_exit + +.align 4 +.Lcbc_slow_enc_tail: + movq %rax,%r11 + movq %rcx,%r12 + movq %r10,%rcx + movq %r8,%rsi + movq %r9,%rdi +.long 0x9066A4F3 + movq $16,%rcx + subq %r10,%rcx + xorq %rax,%rax +.long 0x9066AAF3 + movq %r9,%r8 + movq $16,%r10 + movq %r11,%rax + movq %r12,%rcx + jmp .Lcbc_slow_enc_loop + +.align 16 +.LSLOW_DECRYPT: + shrq $3,%rax + addq %rax,%r14 + + movq 0(%rbp),%r11 + movq 8(%rbp),%r12 + movq %r11,0+64(%rsp) + movq %r12,8+64(%rsp) + +.align 4 +.Lcbc_slow_dec_loop: + movl 0(%r8),%eax + movl 4(%r8),%ebx + movl 8(%r8),%ecx + movl 12(%r8),%edx + movq 0(%rsp),%r15 + movq %r8,24(%rsp) + movq %r9,32(%rsp) + movq %r10,40(%rsp) + + call _x86_64_AES_decrypt_compact + + movq 24(%rsp),%r8 + movq 32(%rsp),%r9 + movq 40(%rsp),%r10 + xorl 0+64(%rsp),%eax + xorl 4+64(%rsp),%ebx + xorl 8+64(%rsp),%ecx + xorl 12+64(%rsp),%edx + + movq 0(%r8),%r11 + movq 8(%r8),%r12 + subq $16,%r10 + jc .Lcbc_slow_dec_partial + jz .Lcbc_slow_dec_done + + movq %r11,0+64(%rsp) + movq %r12,8+64(%rsp) + + movl %eax,0(%r9) + movl %ebx,4(%r9) + movl %ecx,8(%r9) + movl %edx,12(%r9) + + leaq 16(%r8),%r8 + leaq 16(%r9),%r9 + jmp .Lcbc_slow_dec_loop +.Lcbc_slow_dec_done: + movq 56(%rsp),%rdi + movq %r11,0(%rdi) + movq %r12,8(%rdi) + + movl %eax,0(%r9) + movl %ebx,4(%r9) + movl %ecx,8(%r9) + movl %edx,12(%r9) + + jmp .Lcbc_exit + +.align 4 +.Lcbc_slow_dec_partial: + movq 56(%rsp),%rdi + movq %r11,0(%rdi) + movq %r12,8(%rdi) + + movl %eax,0+64(%rsp) + movl %ebx,4+64(%rsp) + movl %ecx,8+64(%rsp) + movl %edx,12+64(%rsp) + + movq %r9,%rdi + leaq 64(%rsp),%rsi + leaq 16(%r10),%rcx +.long 0x9066A4F3 + jmp .Lcbc_exit + +.align 16 +.Lcbc_exit: + movq 16(%rsp),%rsi + movq (%rsi),%r15 + movq 8(%rsi),%r14 + movq 16(%rsi),%r13 + movq 24(%rsi),%r12 + movq 32(%rsi),%rbp + movq 40(%rsi),%rbx + leaq 48(%rsi),%rsp +.Lcbc_popfq: + popfq +.Lcbc_epilogue: + .byte 0xf3,0xc3 +.size AES_cbc_encrypt,.-AES_cbc_encrypt +.align 64 +.LAES_Te: +.long 0xa56363c6,0xa56363c6 +.long 0x847c7cf8,0x847c7cf8 +.long 0x997777ee,0x997777ee +.long 0x8d7b7bf6,0x8d7b7bf6 +.long 0x0df2f2ff,0x0df2f2ff +.long 0xbd6b6bd6,0xbd6b6bd6 +.long 0xb16f6fde,0xb16f6fde +.long 0x54c5c591,0x54c5c591 +.long 0x50303060,0x50303060 +.long 0x03010102,0x03010102 +.long 0xa96767ce,0xa96767ce +.long 0x7d2b2b56,0x7d2b2b56 +.long 0x19fefee7,0x19fefee7 +.long 0x62d7d7b5,0x62d7d7b5 +.long 0xe6abab4d,0xe6abab4d +.long 0x9a7676ec,0x9a7676ec +.long 0x45caca8f,0x45caca8f +.long 0x9d82821f,0x9d82821f +.long 0x40c9c989,0x40c9c989 +.long 0x877d7dfa,0x877d7dfa +.long 0x15fafaef,0x15fafaef +.long 0xeb5959b2,0xeb5959b2 +.long 0xc947478e,0xc947478e +.long 0x0bf0f0fb,0x0bf0f0fb +.long 0xecadad41,0xecadad41 +.long 0x67d4d4b3,0x67d4d4b3 +.long 0xfda2a25f,0xfda2a25f +.long 0xeaafaf45,0xeaafaf45 +.long 0xbf9c9c23,0xbf9c9c23 +.long 0xf7a4a453,0xf7a4a453 +.long 0x967272e4,0x967272e4 +.long 0x5bc0c09b,0x5bc0c09b +.long 0xc2b7b775,0xc2b7b775 +.long 0x1cfdfde1,0x1cfdfde1 +.long 0xae93933d,0xae93933d +.long 0x6a26264c,0x6a26264c +.long 0x5a36366c,0x5a36366c +.long 0x413f3f7e,0x413f3f7e +.long 0x02f7f7f5,0x02f7f7f5 +.long 0x4fcccc83,0x4fcccc83 +.long 0x5c343468,0x5c343468 +.long 0xf4a5a551,0xf4a5a551 +.long 0x34e5e5d1,0x34e5e5d1 +.long 0x08f1f1f9,0x08f1f1f9 +.long 0x937171e2,0x937171e2 +.long 0x73d8d8ab,0x73d8d8ab +.long 0x53313162,0x53313162 +.long 0x3f15152a,0x3f15152a +.long 0x0c040408,0x0c040408 +.long 0x52c7c795,0x52c7c795 +.long 0x65232346,0x65232346 +.long 0x5ec3c39d,0x5ec3c39d +.long 0x28181830,0x28181830 +.long 0xa1969637,0xa1969637 +.long 0x0f05050a,0x0f05050a +.long 0xb59a9a2f,0xb59a9a2f +.long 0x0907070e,0x0907070e +.long 0x36121224,0x36121224 +.long 0x9b80801b,0x9b80801b +.long 0x3de2e2df,0x3de2e2df +.long 0x26ebebcd,0x26ebebcd +.long 0x6927274e,0x6927274e +.long 0xcdb2b27f,0xcdb2b27f +.long 0x9f7575ea,0x9f7575ea +.long 0x1b090912,0x1b090912 +.long 0x9e83831d,0x9e83831d +.long 0x742c2c58,0x742c2c58 +.long 0x2e1a1a34,0x2e1a1a34 +.long 0x2d1b1b36,0x2d1b1b36 +.long 0xb26e6edc,0xb26e6edc +.long 0xee5a5ab4,0xee5a5ab4 +.long 0xfba0a05b,0xfba0a05b +.long 0xf65252a4,0xf65252a4 +.long 0x4d3b3b76,0x4d3b3b76 +.long 0x61d6d6b7,0x61d6d6b7 +.long 0xceb3b37d,0xceb3b37d +.long 0x7b292952,0x7b292952 +.long 0x3ee3e3dd,0x3ee3e3dd +.long 0x712f2f5e,0x712f2f5e +.long 0x97848413,0x97848413 +.long 0xf55353a6,0xf55353a6 +.long 0x68d1d1b9,0x68d1d1b9 +.long 0x00000000,0x00000000 +.long 0x2cededc1,0x2cededc1 +.long 0x60202040,0x60202040 +.long 0x1ffcfce3,0x1ffcfce3 +.long 0xc8b1b179,0xc8b1b179 +.long 0xed5b5bb6,0xed5b5bb6 +.long 0xbe6a6ad4,0xbe6a6ad4 +.long 0x46cbcb8d,0x46cbcb8d +.long 0xd9bebe67,0xd9bebe67 +.long 0x4b393972,0x4b393972 +.long 0xde4a4a94,0xde4a4a94 +.long 0xd44c4c98,0xd44c4c98 +.long 0xe85858b0,0xe85858b0 +.long 0x4acfcf85,0x4acfcf85 +.long 0x6bd0d0bb,0x6bd0d0bb +.long 0x2aefefc5,0x2aefefc5 +.long 0xe5aaaa4f,0xe5aaaa4f +.long 0x16fbfbed,0x16fbfbed +.long 0xc5434386,0xc5434386 +.long 0xd74d4d9a,0xd74d4d9a +.long 0x55333366,0x55333366 +.long 0x94858511,0x94858511 +.long 0xcf45458a,0xcf45458a +.long 0x10f9f9e9,0x10f9f9e9 +.long 0x06020204,0x06020204 +.long 0x817f7ffe,0x817f7ffe +.long 0xf05050a0,0xf05050a0 +.long 0x443c3c78,0x443c3c78 +.long 0xba9f9f25,0xba9f9f25 +.long 0xe3a8a84b,0xe3a8a84b +.long 0xf35151a2,0xf35151a2 +.long 0xfea3a35d,0xfea3a35d +.long 0xc0404080,0xc0404080 +.long 0x8a8f8f05,0x8a8f8f05 +.long 0xad92923f,0xad92923f +.long 0xbc9d9d21,0xbc9d9d21 +.long 0x48383870,0x48383870 +.long 0x04f5f5f1,0x04f5f5f1 +.long 0xdfbcbc63,0xdfbcbc63 +.long 0xc1b6b677,0xc1b6b677 +.long 0x75dadaaf,0x75dadaaf +.long 0x63212142,0x63212142 +.long 0x30101020,0x30101020 +.long 0x1affffe5,0x1affffe5 +.long 0x0ef3f3fd,0x0ef3f3fd +.long 0x6dd2d2bf,0x6dd2d2bf +.long 0x4ccdcd81,0x4ccdcd81 +.long 0x140c0c18,0x140c0c18 +.long 0x35131326,0x35131326 +.long 0x2fececc3,0x2fececc3 +.long 0xe15f5fbe,0xe15f5fbe +.long 0xa2979735,0xa2979735 +.long 0xcc444488,0xcc444488 +.long 0x3917172e,0x3917172e +.long 0x57c4c493,0x57c4c493 +.long 0xf2a7a755,0xf2a7a755 +.long 0x827e7efc,0x827e7efc +.long 0x473d3d7a,0x473d3d7a +.long 0xac6464c8,0xac6464c8 +.long 0xe75d5dba,0xe75d5dba +.long 0x2b191932,0x2b191932 +.long 0x957373e6,0x957373e6 +.long 0xa06060c0,0xa06060c0 +.long 0x98818119,0x98818119 +.long 0xd14f4f9e,0xd14f4f9e +.long 0x7fdcdca3,0x7fdcdca3 +.long 0x66222244,0x66222244 +.long 0x7e2a2a54,0x7e2a2a54 +.long 0xab90903b,0xab90903b +.long 0x8388880b,0x8388880b +.long 0xca46468c,0xca46468c +.long 0x29eeeec7,0x29eeeec7 +.long 0xd3b8b86b,0xd3b8b86b +.long 0x3c141428,0x3c141428 +.long 0x79dedea7,0x79dedea7 +.long 0xe25e5ebc,0xe25e5ebc +.long 0x1d0b0b16,0x1d0b0b16 +.long 0x76dbdbad,0x76dbdbad +.long 0x3be0e0db,0x3be0e0db +.long 0x56323264,0x56323264 +.long 0x4e3a3a74,0x4e3a3a74 +.long 0x1e0a0a14,0x1e0a0a14 +.long 0xdb494992,0xdb494992 +.long 0x0a06060c,0x0a06060c +.long 0x6c242448,0x6c242448 +.long 0xe45c5cb8,0xe45c5cb8 +.long 0x5dc2c29f,0x5dc2c29f +.long 0x6ed3d3bd,0x6ed3d3bd +.long 0xefacac43,0xefacac43 +.long 0xa66262c4,0xa66262c4 +.long 0xa8919139,0xa8919139 +.long 0xa4959531,0xa4959531 +.long 0x37e4e4d3,0x37e4e4d3 +.long 0x8b7979f2,0x8b7979f2 +.long 0x32e7e7d5,0x32e7e7d5 +.long 0x43c8c88b,0x43c8c88b +.long 0x5937376e,0x5937376e +.long 0xb76d6dda,0xb76d6dda +.long 0x8c8d8d01,0x8c8d8d01 +.long 0x64d5d5b1,0x64d5d5b1 +.long 0xd24e4e9c,0xd24e4e9c +.long 0xe0a9a949,0xe0a9a949 +.long 0xb46c6cd8,0xb46c6cd8 +.long 0xfa5656ac,0xfa5656ac +.long 0x07f4f4f3,0x07f4f4f3 +.long 0x25eaeacf,0x25eaeacf +.long 0xaf6565ca,0xaf6565ca +.long 0x8e7a7af4,0x8e7a7af4 +.long 0xe9aeae47,0xe9aeae47 +.long 0x18080810,0x18080810 +.long 0xd5baba6f,0xd5baba6f +.long 0x887878f0,0x887878f0 +.long 0x6f25254a,0x6f25254a +.long 0x722e2e5c,0x722e2e5c +.long 0x241c1c38,0x241c1c38 +.long 0xf1a6a657,0xf1a6a657 +.long 0xc7b4b473,0xc7b4b473 +.long 0x51c6c697,0x51c6c697 +.long 0x23e8e8cb,0x23e8e8cb +.long 0x7cdddda1,0x7cdddda1 +.long 0x9c7474e8,0x9c7474e8 +.long 0x211f1f3e,0x211f1f3e +.long 0xdd4b4b96,0xdd4b4b96 +.long 0xdcbdbd61,0xdcbdbd61 +.long 0x868b8b0d,0x868b8b0d +.long 0x858a8a0f,0x858a8a0f +.long 0x907070e0,0x907070e0 +.long 0x423e3e7c,0x423e3e7c +.long 0xc4b5b571,0xc4b5b571 +.long 0xaa6666cc,0xaa6666cc +.long 0xd8484890,0xd8484890 +.long 0x05030306,0x05030306 +.long 0x01f6f6f7,0x01f6f6f7 +.long 0x120e0e1c,0x120e0e1c +.long 0xa36161c2,0xa36161c2 +.long 0x5f35356a,0x5f35356a +.long 0xf95757ae,0xf95757ae +.long 0xd0b9b969,0xd0b9b969 +.long 0x91868617,0x91868617 +.long 0x58c1c199,0x58c1c199 +.long 0x271d1d3a,0x271d1d3a +.long 0xb99e9e27,0xb99e9e27 +.long 0x38e1e1d9,0x38e1e1d9 +.long 0x13f8f8eb,0x13f8f8eb +.long 0xb398982b,0xb398982b +.long 0x33111122,0x33111122 +.long 0xbb6969d2,0xbb6969d2 +.long 0x70d9d9a9,0x70d9d9a9 +.long 0x898e8e07,0x898e8e07 +.long 0xa7949433,0xa7949433 +.long 0xb69b9b2d,0xb69b9b2d +.long 0x221e1e3c,0x221e1e3c +.long 0x92878715,0x92878715 +.long 0x20e9e9c9,0x20e9e9c9 +.long 0x49cece87,0x49cece87 +.long 0xff5555aa,0xff5555aa +.long 0x78282850,0x78282850 +.long 0x7adfdfa5,0x7adfdfa5 +.long 0x8f8c8c03,0x8f8c8c03 +.long 0xf8a1a159,0xf8a1a159 +.long 0x80898909,0x80898909 +.long 0x170d0d1a,0x170d0d1a +.long 0xdabfbf65,0xdabfbf65 +.long 0x31e6e6d7,0x31e6e6d7 +.long 0xc6424284,0xc6424284 +.long 0xb86868d0,0xb86868d0 +.long 0xc3414182,0xc3414182 +.long 0xb0999929,0xb0999929 +.long 0x772d2d5a,0x772d2d5a +.long 0x110f0f1e,0x110f0f1e +.long 0xcbb0b07b,0xcbb0b07b +.long 0xfc5454a8,0xfc5454a8 +.long 0xd6bbbb6d,0xd6bbbb6d +.long 0x3a16162c,0x3a16162c +.byte 0x63,0x7c,0x77,0x7b,0xf2,0x6b,0x6f,0xc5 +.byte 0x30,0x01,0x67,0x2b,0xfe,0xd7,0xab,0x76 +.byte 0xca,0x82,0xc9,0x7d,0xfa,0x59,0x47,0xf0 +.byte 0xad,0xd4,0xa2,0xaf,0x9c,0xa4,0x72,0xc0 +.byte 0xb7,0xfd,0x93,0x26,0x36,0x3f,0xf7,0xcc +.byte 0x34,0xa5,0xe5,0xf1,0x71,0xd8,0x31,0x15 +.byte 0x04,0xc7,0x23,0xc3,0x18,0x96,0x05,0x9a +.byte 0x07,0x12,0x80,0xe2,0xeb,0x27,0xb2,0x75 +.byte 0x09,0x83,0x2c,0x1a,0x1b,0x6e,0x5a,0xa0 +.byte 0x52,0x3b,0xd6,0xb3,0x29,0xe3,0x2f,0x84 +.byte 0x53,0xd1,0x00,0xed,0x20,0xfc,0xb1,0x5b +.byte 0x6a,0xcb,0xbe,0x39,0x4a,0x4c,0x58,0xcf +.byte 0xd0,0xef,0xaa,0xfb,0x43,0x4d,0x33,0x85 +.byte 0x45,0xf9,0x02,0x7f,0x50,0x3c,0x9f,0xa8 +.byte 0x51,0xa3,0x40,0x8f,0x92,0x9d,0x38,0xf5 +.byte 0xbc,0xb6,0xda,0x21,0x10,0xff,0xf3,0xd2 +.byte 0xcd,0x0c,0x13,0xec,0x5f,0x97,0x44,0x17 +.byte 0xc4,0xa7,0x7e,0x3d,0x64,0x5d,0x19,0x73 +.byte 0x60,0x81,0x4f,0xdc,0x22,0x2a,0x90,0x88 +.byte 0x46,0xee,0xb8,0x14,0xde,0x5e,0x0b,0xdb +.byte 0xe0,0x32,0x3a,0x0a,0x49,0x06,0x24,0x5c +.byte 0xc2,0xd3,0xac,0x62,0x91,0x95,0xe4,0x79 +.byte 0xe7,0xc8,0x37,0x6d,0x8d,0xd5,0x4e,0xa9 +.byte 0x6c,0x56,0xf4,0xea,0x65,0x7a,0xae,0x08 +.byte 0xba,0x78,0x25,0x2e,0x1c,0xa6,0xb4,0xc6 +.byte 0xe8,0xdd,0x74,0x1f,0x4b,0xbd,0x8b,0x8a +.byte 0x70,0x3e,0xb5,0x66,0x48,0x03,0xf6,0x0e +.byte 0x61,0x35,0x57,0xb9,0x86,0xc1,0x1d,0x9e +.byte 0xe1,0xf8,0x98,0x11,0x69,0xd9,0x8e,0x94 +.byte 0x9b,0x1e,0x87,0xe9,0xce,0x55,0x28,0xdf +.byte 0x8c,0xa1,0x89,0x0d,0xbf,0xe6,0x42,0x68 +.byte 0x41,0x99,0x2d,0x0f,0xb0,0x54,0xbb,0x16 +.byte 0x63,0x7c,0x77,0x7b,0xf2,0x6b,0x6f,0xc5 +.byte 0x30,0x01,0x67,0x2b,0xfe,0xd7,0xab,0x76 +.byte 0xca,0x82,0xc9,0x7d,0xfa,0x59,0x47,0xf0 +.byte 0xad,0xd4,0xa2,0xaf,0x9c,0xa4,0x72,0xc0 +.byte 0xb7,0xfd,0x93,0x26,0x36,0x3f,0xf7,0xcc +.byte 0x34,0xa5,0xe5,0xf1,0x71,0xd8,0x31,0x15 +.byte 0x04,0xc7,0x23,0xc3,0x18,0x96,0x05,0x9a +.byte 0x07,0x12,0x80,0xe2,0xeb,0x27,0xb2,0x75 +.byte 0x09,0x83,0x2c,0x1a,0x1b,0x6e,0x5a,0xa0 +.byte 0x52,0x3b,0xd6,0xb3,0x29,0xe3,0x2f,0x84 +.byte 0x53,0xd1,0x00,0xed,0x20,0xfc,0xb1,0x5b +.byte 0x6a,0xcb,0xbe,0x39,0x4a,0x4c,0x58,0xcf +.byte 0xd0,0xef,0xaa,0xfb,0x43,0x4d,0x33,0x85 +.byte 0x45,0xf9,0x02,0x7f,0x50,0x3c,0x9f,0xa8 +.byte 0x51,0xa3,0x40,0x8f,0x92,0x9d,0x38,0xf5 +.byte 0xbc,0xb6,0xda,0x21,0x10,0xff,0xf3,0xd2 +.byte 0xcd,0x0c,0x13,0xec,0x5f,0x97,0x44,0x17 +.byte 0xc4,0xa7,0x7e,0x3d,0x64,0x5d,0x19,0x73 +.byte 0x60,0x81,0x4f,0xdc,0x22,0x2a,0x90,0x88 +.byte 0x46,0xee,0xb8,0x14,0xde,0x5e,0x0b,0xdb +.byte 0xe0,0x32,0x3a,0x0a,0x49,0x06,0x24,0x5c +.byte 0xc2,0xd3,0xac,0x62,0x91,0x95,0xe4,0x79 +.byte 0xe7,0xc8,0x37,0x6d,0x8d,0xd5,0x4e,0xa9 +.byte 0x6c,0x56,0xf4,0xea,0x65,0x7a,0xae,0x08 +.byte 0xba,0x78,0x25,0x2e,0x1c,0xa6,0xb4,0xc6 +.byte 0xe8,0xdd,0x74,0x1f,0x4b,0xbd,0x8b,0x8a +.byte 0x70,0x3e,0xb5,0x66,0x48,0x03,0xf6,0x0e +.byte 0x61,0x35,0x57,0xb9,0x86,0xc1,0x1d,0x9e +.byte 0xe1,0xf8,0x98,0x11,0x69,0xd9,0x8e,0x94 +.byte 0x9b,0x1e,0x87,0xe9,0xce,0x55,0x28,0xdf +.byte 0x8c,0xa1,0x89,0x0d,0xbf,0xe6,0x42,0x68 +.byte 0x41,0x99,0x2d,0x0f,0xb0,0x54,0xbb,0x16 +.byte 0x63,0x7c,0x77,0x7b,0xf2,0x6b,0x6f,0xc5 +.byte 0x30,0x01,0x67,0x2b,0xfe,0xd7,0xab,0x76 +.byte 0xca,0x82,0xc9,0x7d,0xfa,0x59,0x47,0xf0 +.byte 0xad,0xd4,0xa2,0xaf,0x9c,0xa4,0x72,0xc0 +.byte 0xb7,0xfd,0x93,0x26,0x36,0x3f,0xf7,0xcc +.byte 0x34,0xa5,0xe5,0xf1,0x71,0xd8,0x31,0x15 +.byte 0x04,0xc7,0x23,0xc3,0x18,0x96,0x05,0x9a +.byte 0x07,0x12,0x80,0xe2,0xeb,0x27,0xb2,0x75 +.byte 0x09,0x83,0x2c,0x1a,0x1b,0x6e,0x5a,0xa0 +.byte 0x52,0x3b,0xd6,0xb3,0x29,0xe3,0x2f,0x84 +.byte 0x53,0xd1,0x00,0xed,0x20,0xfc,0xb1,0x5b +.byte 0x6a,0xcb,0xbe,0x39,0x4a,0x4c,0x58,0xcf +.byte 0xd0,0xef,0xaa,0xfb,0x43,0x4d,0x33,0x85 +.byte 0x45,0xf9,0x02,0x7f,0x50,0x3c,0x9f,0xa8 +.byte 0x51,0xa3,0x40,0x8f,0x92,0x9d,0x38,0xf5 +.byte 0xbc,0xb6,0xda,0x21,0x10,0xff,0xf3,0xd2 +.byte 0xcd,0x0c,0x13,0xec,0x5f,0x97,0x44,0x17 +.byte 0xc4,0xa7,0x7e,0x3d,0x64,0x5d,0x19,0x73 +.byte 0x60,0x81,0x4f,0xdc,0x22,0x2a,0x90,0x88 +.byte 0x46,0xee,0xb8,0x14,0xde,0x5e,0x0b,0xdb +.byte 0xe0,0x32,0x3a,0x0a,0x49,0x06,0x24,0x5c +.byte 0xc2,0xd3,0xac,0x62,0x91,0x95,0xe4,0x79 +.byte 0xe7,0xc8,0x37,0x6d,0x8d,0xd5,0x4e,0xa9 +.byte 0x6c,0x56,0xf4,0xea,0x65,0x7a,0xae,0x08 +.byte 0xba,0x78,0x25,0x2e,0x1c,0xa6,0xb4,0xc6 +.byte 0xe8,0xdd,0x74,0x1f,0x4b,0xbd,0x8b,0x8a +.byte 0x70,0x3e,0xb5,0x66,0x48,0x03,0xf6,0x0e +.byte 0x61,0x35,0x57,0xb9,0x86,0xc1,0x1d,0x9e +.byte 0xe1,0xf8,0x98,0x11,0x69,0xd9,0x8e,0x94 +.byte 0x9b,0x1e,0x87,0xe9,0xce,0x55,0x28,0xdf +.byte 0x8c,0xa1,0x89,0x0d,0xbf,0xe6,0x42,0x68 +.byte 0x41,0x99,0x2d,0x0f,0xb0,0x54,0xbb,0x16 +.byte 0x63,0x7c,0x77,0x7b,0xf2,0x6b,0x6f,0xc5 +.byte 0x30,0x01,0x67,0x2b,0xfe,0xd7,0xab,0x76 +.byte 0xca,0x82,0xc9,0x7d,0xfa,0x59,0x47,0xf0 +.byte 0xad,0xd4,0xa2,0xaf,0x9c,0xa4,0x72,0xc0 +.byte 0xb7,0xfd,0x93,0x26,0x36,0x3f,0xf7,0xcc +.byte 0x34,0xa5,0xe5,0xf1,0x71,0xd8,0x31,0x15 +.byte 0x04,0xc7,0x23,0xc3,0x18,0x96,0x05,0x9a +.byte 0x07,0x12,0x80,0xe2,0xeb,0x27,0xb2,0x75 +.byte 0x09,0x83,0x2c,0x1a,0x1b,0x6e,0x5a,0xa0 +.byte 0x52,0x3b,0xd6,0xb3,0x29,0xe3,0x2f,0x84 +.byte 0x53,0xd1,0x00,0xed,0x20,0xfc,0xb1,0x5b +.byte 0x6a,0xcb,0xbe,0x39,0x4a,0x4c,0x58,0xcf +.byte 0xd0,0xef,0xaa,0xfb,0x43,0x4d,0x33,0x85 +.byte 0x45,0xf9,0x02,0x7f,0x50,0x3c,0x9f,0xa8 +.byte 0x51,0xa3,0x40,0x8f,0x92,0x9d,0x38,0xf5 +.byte 0xbc,0xb6,0xda,0x21,0x10,0xff,0xf3,0xd2 +.byte 0xcd,0x0c,0x13,0xec,0x5f,0x97,0x44,0x17 +.byte 0xc4,0xa7,0x7e,0x3d,0x64,0x5d,0x19,0x73 +.byte 0x60,0x81,0x4f,0xdc,0x22,0x2a,0x90,0x88 +.byte 0x46,0xee,0xb8,0x14,0xde,0x5e,0x0b,0xdb +.byte 0xe0,0x32,0x3a,0x0a,0x49,0x06,0x24,0x5c +.byte 0xc2,0xd3,0xac,0x62,0x91,0x95,0xe4,0x79 +.byte 0xe7,0xc8,0x37,0x6d,0x8d,0xd5,0x4e,0xa9 +.byte 0x6c,0x56,0xf4,0xea,0x65,0x7a,0xae,0x08 +.byte 0xba,0x78,0x25,0x2e,0x1c,0xa6,0xb4,0xc6 +.byte 0xe8,0xdd,0x74,0x1f,0x4b,0xbd,0x8b,0x8a +.byte 0x70,0x3e,0xb5,0x66,0x48,0x03,0xf6,0x0e +.byte 0x61,0x35,0x57,0xb9,0x86,0xc1,0x1d,0x9e +.byte 0xe1,0xf8,0x98,0x11,0x69,0xd9,0x8e,0x94 +.byte 0x9b,0x1e,0x87,0xe9,0xce,0x55,0x28,0xdf +.byte 0x8c,0xa1,0x89,0x0d,0xbf,0xe6,0x42,0x68 +.byte 0x41,0x99,0x2d,0x0f,0xb0,0x54,0xbb,0x16 +.long 0x00000001, 0x00000002, 0x00000004, 0x00000008 +.long 0x00000010, 0x00000020, 0x00000040, 0x00000080 +.long 0x0000001b, 0x00000036, 0x80808080, 0x80808080 +.long 0xfefefefe, 0xfefefefe, 0x1b1b1b1b, 0x1b1b1b1b +.align 64 +.LAES_Td: +.long 0x50a7f451,0x50a7f451 +.long 0x5365417e,0x5365417e +.long 0xc3a4171a,0xc3a4171a +.long 0x965e273a,0x965e273a +.long 0xcb6bab3b,0xcb6bab3b +.long 0xf1459d1f,0xf1459d1f +.long 0xab58faac,0xab58faac +.long 0x9303e34b,0x9303e34b +.long 0x55fa3020,0x55fa3020 +.long 0xf66d76ad,0xf66d76ad +.long 0x9176cc88,0x9176cc88 +.long 0x254c02f5,0x254c02f5 +.long 0xfcd7e54f,0xfcd7e54f +.long 0xd7cb2ac5,0xd7cb2ac5 +.long 0x80443526,0x80443526 +.long 0x8fa362b5,0x8fa362b5 +.long 0x495ab1de,0x495ab1de +.long 0x671bba25,0x671bba25 +.long 0x980eea45,0x980eea45 +.long 0xe1c0fe5d,0xe1c0fe5d +.long 0x02752fc3,0x02752fc3 +.long 0x12f04c81,0x12f04c81 +.long 0xa397468d,0xa397468d +.long 0xc6f9d36b,0xc6f9d36b +.long 0xe75f8f03,0xe75f8f03 +.long 0x959c9215,0x959c9215 +.long 0xeb7a6dbf,0xeb7a6dbf +.long 0xda595295,0xda595295 +.long 0x2d83bed4,0x2d83bed4 +.long 0xd3217458,0xd3217458 +.long 0x2969e049,0x2969e049 +.long 0x44c8c98e,0x44c8c98e +.long 0x6a89c275,0x6a89c275 +.long 0x78798ef4,0x78798ef4 +.long 0x6b3e5899,0x6b3e5899 +.long 0xdd71b927,0xdd71b927 +.long 0xb64fe1be,0xb64fe1be +.long 0x17ad88f0,0x17ad88f0 +.long 0x66ac20c9,0x66ac20c9 +.long 0xb43ace7d,0xb43ace7d +.long 0x184adf63,0x184adf63 +.long 0x82311ae5,0x82311ae5 +.long 0x60335197,0x60335197 +.long 0x457f5362,0x457f5362 +.long 0xe07764b1,0xe07764b1 +.long 0x84ae6bbb,0x84ae6bbb +.long 0x1ca081fe,0x1ca081fe +.long 0x942b08f9,0x942b08f9 +.long 0x58684870,0x58684870 +.long 0x19fd458f,0x19fd458f +.long 0x876cde94,0x876cde94 +.long 0xb7f87b52,0xb7f87b52 +.long 0x23d373ab,0x23d373ab +.long 0xe2024b72,0xe2024b72 +.long 0x578f1fe3,0x578f1fe3 +.long 0x2aab5566,0x2aab5566 +.long 0x0728ebb2,0x0728ebb2 +.long 0x03c2b52f,0x03c2b52f +.long 0x9a7bc586,0x9a7bc586 +.long 0xa50837d3,0xa50837d3 +.long 0xf2872830,0xf2872830 +.long 0xb2a5bf23,0xb2a5bf23 +.long 0xba6a0302,0xba6a0302 +.long 0x5c8216ed,0x5c8216ed +.long 0x2b1ccf8a,0x2b1ccf8a +.long 0x92b479a7,0x92b479a7 +.long 0xf0f207f3,0xf0f207f3 +.long 0xa1e2694e,0xa1e2694e +.long 0xcdf4da65,0xcdf4da65 +.long 0xd5be0506,0xd5be0506 +.long 0x1f6234d1,0x1f6234d1 +.long 0x8afea6c4,0x8afea6c4 +.long 0x9d532e34,0x9d532e34 +.long 0xa055f3a2,0xa055f3a2 +.long 0x32e18a05,0x32e18a05 +.long 0x75ebf6a4,0x75ebf6a4 +.long 0x39ec830b,0x39ec830b +.long 0xaaef6040,0xaaef6040 +.long 0x069f715e,0x069f715e +.long 0x51106ebd,0x51106ebd +.long 0xf98a213e,0xf98a213e +.long 0x3d06dd96,0x3d06dd96 +.long 0xae053edd,0xae053edd +.long 0x46bde64d,0x46bde64d +.long 0xb58d5491,0xb58d5491 +.long 0x055dc471,0x055dc471 +.long 0x6fd40604,0x6fd40604 +.long 0xff155060,0xff155060 +.long 0x24fb9819,0x24fb9819 +.long 0x97e9bdd6,0x97e9bdd6 +.long 0xcc434089,0xcc434089 +.long 0x779ed967,0x779ed967 +.long 0xbd42e8b0,0xbd42e8b0 +.long 0x888b8907,0x888b8907 +.long 0x385b19e7,0x385b19e7 +.long 0xdbeec879,0xdbeec879 +.long 0x470a7ca1,0x470a7ca1 +.long 0xe90f427c,0xe90f427c +.long 0xc91e84f8,0xc91e84f8 +.long 0x00000000,0x00000000 +.long 0x83868009,0x83868009 +.long 0x48ed2b32,0x48ed2b32 +.long 0xac70111e,0xac70111e +.long 0x4e725a6c,0x4e725a6c +.long 0xfbff0efd,0xfbff0efd +.long 0x5638850f,0x5638850f +.long 0x1ed5ae3d,0x1ed5ae3d +.long 0x27392d36,0x27392d36 +.long 0x64d90f0a,0x64d90f0a +.long 0x21a65c68,0x21a65c68 +.long 0xd1545b9b,0xd1545b9b +.long 0x3a2e3624,0x3a2e3624 +.long 0xb1670a0c,0xb1670a0c +.long 0x0fe75793,0x0fe75793 +.long 0xd296eeb4,0xd296eeb4 +.long 0x9e919b1b,0x9e919b1b +.long 0x4fc5c080,0x4fc5c080 +.long 0xa220dc61,0xa220dc61 +.long 0x694b775a,0x694b775a +.long 0x161a121c,0x161a121c +.long 0x0aba93e2,0x0aba93e2 +.long 0xe52aa0c0,0xe52aa0c0 +.long 0x43e0223c,0x43e0223c +.long 0x1d171b12,0x1d171b12 +.long 0x0b0d090e,0x0b0d090e +.long 0xadc78bf2,0xadc78bf2 +.long 0xb9a8b62d,0xb9a8b62d +.long 0xc8a91e14,0xc8a91e14 +.long 0x8519f157,0x8519f157 +.long 0x4c0775af,0x4c0775af +.long 0xbbdd99ee,0xbbdd99ee +.long 0xfd607fa3,0xfd607fa3 +.long 0x9f2601f7,0x9f2601f7 +.long 0xbcf5725c,0xbcf5725c +.long 0xc53b6644,0xc53b6644 +.long 0x347efb5b,0x347efb5b +.long 0x7629438b,0x7629438b +.long 0xdcc623cb,0xdcc623cb +.long 0x68fcedb6,0x68fcedb6 +.long 0x63f1e4b8,0x63f1e4b8 +.long 0xcadc31d7,0xcadc31d7 +.long 0x10856342,0x10856342 +.long 0x40229713,0x40229713 +.long 0x2011c684,0x2011c684 +.long 0x7d244a85,0x7d244a85 +.long 0xf83dbbd2,0xf83dbbd2 +.long 0x1132f9ae,0x1132f9ae +.long 0x6da129c7,0x6da129c7 +.long 0x4b2f9e1d,0x4b2f9e1d +.long 0xf330b2dc,0xf330b2dc +.long 0xec52860d,0xec52860d +.long 0xd0e3c177,0xd0e3c177 +.long 0x6c16b32b,0x6c16b32b +.long 0x99b970a9,0x99b970a9 +.long 0xfa489411,0xfa489411 +.long 0x2264e947,0x2264e947 +.long 0xc48cfca8,0xc48cfca8 +.long 0x1a3ff0a0,0x1a3ff0a0 +.long 0xd82c7d56,0xd82c7d56 +.long 0xef903322,0xef903322 +.long 0xc74e4987,0xc74e4987 +.long 0xc1d138d9,0xc1d138d9 +.long 0xfea2ca8c,0xfea2ca8c +.long 0x360bd498,0x360bd498 +.long 0xcf81f5a6,0xcf81f5a6 +.long 0x28de7aa5,0x28de7aa5 +.long 0x268eb7da,0x268eb7da +.long 0xa4bfad3f,0xa4bfad3f +.long 0xe49d3a2c,0xe49d3a2c +.long 0x0d927850,0x0d927850 +.long 0x9bcc5f6a,0x9bcc5f6a +.long 0x62467e54,0x62467e54 +.long 0xc2138df6,0xc2138df6 +.long 0xe8b8d890,0xe8b8d890 +.long 0x5ef7392e,0x5ef7392e +.long 0xf5afc382,0xf5afc382 +.long 0xbe805d9f,0xbe805d9f +.long 0x7c93d069,0x7c93d069 +.long 0xa92dd56f,0xa92dd56f +.long 0xb31225cf,0xb31225cf +.long 0x3b99acc8,0x3b99acc8 +.long 0xa77d1810,0xa77d1810 +.long 0x6e639ce8,0x6e639ce8 +.long 0x7bbb3bdb,0x7bbb3bdb +.long 0x097826cd,0x097826cd +.long 0xf418596e,0xf418596e +.long 0x01b79aec,0x01b79aec +.long 0xa89a4f83,0xa89a4f83 +.long 0x656e95e6,0x656e95e6 +.long 0x7ee6ffaa,0x7ee6ffaa +.long 0x08cfbc21,0x08cfbc21 +.long 0xe6e815ef,0xe6e815ef +.long 0xd99be7ba,0xd99be7ba +.long 0xce366f4a,0xce366f4a +.long 0xd4099fea,0xd4099fea +.long 0xd67cb029,0xd67cb029 +.long 0xafb2a431,0xafb2a431 +.long 0x31233f2a,0x31233f2a +.long 0x3094a5c6,0x3094a5c6 +.long 0xc066a235,0xc066a235 +.long 0x37bc4e74,0x37bc4e74 +.long 0xa6ca82fc,0xa6ca82fc +.long 0xb0d090e0,0xb0d090e0 +.long 0x15d8a733,0x15d8a733 +.long 0x4a9804f1,0x4a9804f1 +.long 0xf7daec41,0xf7daec41 +.long 0x0e50cd7f,0x0e50cd7f +.long 0x2ff69117,0x2ff69117 +.long 0x8dd64d76,0x8dd64d76 +.long 0x4db0ef43,0x4db0ef43 +.long 0x544daacc,0x544daacc +.long 0xdf0496e4,0xdf0496e4 +.long 0xe3b5d19e,0xe3b5d19e +.long 0x1b886a4c,0x1b886a4c +.long 0xb81f2cc1,0xb81f2cc1 +.long 0x7f516546,0x7f516546 +.long 0x04ea5e9d,0x04ea5e9d +.long 0x5d358c01,0x5d358c01 +.long 0x737487fa,0x737487fa +.long 0x2e410bfb,0x2e410bfb +.long 0x5a1d67b3,0x5a1d67b3 +.long 0x52d2db92,0x52d2db92 +.long 0x335610e9,0x335610e9 +.long 0x1347d66d,0x1347d66d +.long 0x8c61d79a,0x8c61d79a +.long 0x7a0ca137,0x7a0ca137 +.long 0x8e14f859,0x8e14f859 +.long 0x893c13eb,0x893c13eb +.long 0xee27a9ce,0xee27a9ce +.long 0x35c961b7,0x35c961b7 +.long 0xede51ce1,0xede51ce1 +.long 0x3cb1477a,0x3cb1477a +.long 0x59dfd29c,0x59dfd29c +.long 0x3f73f255,0x3f73f255 +.long 0x79ce1418,0x79ce1418 +.long 0xbf37c773,0xbf37c773 +.long 0xeacdf753,0xeacdf753 +.long 0x5baafd5f,0x5baafd5f +.long 0x146f3ddf,0x146f3ddf +.long 0x86db4478,0x86db4478 +.long 0x81f3afca,0x81f3afca +.long 0x3ec468b9,0x3ec468b9 +.long 0x2c342438,0x2c342438 +.long 0x5f40a3c2,0x5f40a3c2 +.long 0x72c31d16,0x72c31d16 +.long 0x0c25e2bc,0x0c25e2bc +.long 0x8b493c28,0x8b493c28 +.long 0x41950dff,0x41950dff +.long 0x7101a839,0x7101a839 +.long 0xdeb30c08,0xdeb30c08 +.long 0x9ce4b4d8,0x9ce4b4d8 +.long 0x90c15664,0x90c15664 +.long 0x6184cb7b,0x6184cb7b +.long 0x70b632d5,0x70b632d5 +.long 0x745c6c48,0x745c6c48 +.long 0x4257b8d0,0x4257b8d0 +.byte 0x52,0x09,0x6a,0xd5,0x30,0x36,0xa5,0x38 +.byte 0xbf,0x40,0xa3,0x9e,0x81,0xf3,0xd7,0xfb +.byte 0x7c,0xe3,0x39,0x82,0x9b,0x2f,0xff,0x87 +.byte 0x34,0x8e,0x43,0x44,0xc4,0xde,0xe9,0xcb +.byte 0x54,0x7b,0x94,0x32,0xa6,0xc2,0x23,0x3d +.byte 0xee,0x4c,0x95,0x0b,0x42,0xfa,0xc3,0x4e +.byte 0x08,0x2e,0xa1,0x66,0x28,0xd9,0x24,0xb2 +.byte 0x76,0x5b,0xa2,0x49,0x6d,0x8b,0xd1,0x25 +.byte 0x72,0xf8,0xf6,0x64,0x86,0x68,0x98,0x16 +.byte 0xd4,0xa4,0x5c,0xcc,0x5d,0x65,0xb6,0x92 +.byte 0x6c,0x70,0x48,0x50,0xfd,0xed,0xb9,0xda +.byte 0x5e,0x15,0x46,0x57,0xa7,0x8d,0x9d,0x84 +.byte 0x90,0xd8,0xab,0x00,0x8c,0xbc,0xd3,0x0a +.byte 0xf7,0xe4,0x58,0x05,0xb8,0xb3,0x45,0x06 +.byte 0xd0,0x2c,0x1e,0x8f,0xca,0x3f,0x0f,0x02 +.byte 0xc1,0xaf,0xbd,0x03,0x01,0x13,0x8a,0x6b +.byte 0x3a,0x91,0x11,0x41,0x4f,0x67,0xdc,0xea +.byte 0x97,0xf2,0xcf,0xce,0xf0,0xb4,0xe6,0x73 +.byte 0x96,0xac,0x74,0x22,0xe7,0xad,0x35,0x85 +.byte 0xe2,0xf9,0x37,0xe8,0x1c,0x75,0xdf,0x6e +.byte 0x47,0xf1,0x1a,0x71,0x1d,0x29,0xc5,0x89 +.byte 0x6f,0xb7,0x62,0x0e,0xaa,0x18,0xbe,0x1b +.byte 0xfc,0x56,0x3e,0x4b,0xc6,0xd2,0x79,0x20 +.byte 0x9a,0xdb,0xc0,0xfe,0x78,0xcd,0x5a,0xf4 +.byte 0x1f,0xdd,0xa8,0x33,0x88,0x07,0xc7,0x31 +.byte 0xb1,0x12,0x10,0x59,0x27,0x80,0xec,0x5f +.byte 0x60,0x51,0x7f,0xa9,0x19,0xb5,0x4a,0x0d +.byte 0x2d,0xe5,0x7a,0x9f,0x93,0xc9,0x9c,0xef +.byte 0xa0,0xe0,0x3b,0x4d,0xae,0x2a,0xf5,0xb0 +.byte 0xc8,0xeb,0xbb,0x3c,0x83,0x53,0x99,0x61 +.byte 0x17,0x2b,0x04,0x7e,0xba,0x77,0xd6,0x26 +.byte 0xe1,0x69,0x14,0x63,0x55,0x21,0x0c,0x7d +.long 0x80808080, 0x80808080, 0xfefefefe, 0xfefefefe +.long 0x1b1b1b1b, 0x1b1b1b1b, 0, 0 +.byte 0x52,0x09,0x6a,0xd5,0x30,0x36,0xa5,0x38 +.byte 0xbf,0x40,0xa3,0x9e,0x81,0xf3,0xd7,0xfb +.byte 0x7c,0xe3,0x39,0x82,0x9b,0x2f,0xff,0x87 +.byte 0x34,0x8e,0x43,0x44,0xc4,0xde,0xe9,0xcb +.byte 0x54,0x7b,0x94,0x32,0xa6,0xc2,0x23,0x3d +.byte 0xee,0x4c,0x95,0x0b,0x42,0xfa,0xc3,0x4e +.byte 0x08,0x2e,0xa1,0x66,0x28,0xd9,0x24,0xb2 +.byte 0x76,0x5b,0xa2,0x49,0x6d,0x8b,0xd1,0x25 +.byte 0x72,0xf8,0xf6,0x64,0x86,0x68,0x98,0x16 +.byte 0xd4,0xa4,0x5c,0xcc,0x5d,0x65,0xb6,0x92 +.byte 0x6c,0x70,0x48,0x50,0xfd,0xed,0xb9,0xda +.byte 0x5e,0x15,0x46,0x57,0xa7,0x8d,0x9d,0x84 +.byte 0x90,0xd8,0xab,0x00,0x8c,0xbc,0xd3,0x0a +.byte 0xf7,0xe4,0x58,0x05,0xb8,0xb3,0x45,0x06 +.byte 0xd0,0x2c,0x1e,0x8f,0xca,0x3f,0x0f,0x02 +.byte 0xc1,0xaf,0xbd,0x03,0x01,0x13,0x8a,0x6b +.byte 0x3a,0x91,0x11,0x41,0x4f,0x67,0xdc,0xea +.byte 0x97,0xf2,0xcf,0xce,0xf0,0xb4,0xe6,0x73 +.byte 0x96,0xac,0x74,0x22,0xe7,0xad,0x35,0x85 +.byte 0xe2,0xf9,0x37,0xe8,0x1c,0x75,0xdf,0x6e +.byte 0x47,0xf1,0x1a,0x71,0x1d,0x29,0xc5,0x89 +.byte 0x6f,0xb7,0x62,0x0e,0xaa,0x18,0xbe,0x1b +.byte 0xfc,0x56,0x3e,0x4b,0xc6,0xd2,0x79,0x20 +.byte 0x9a,0xdb,0xc0,0xfe,0x78,0xcd,0x5a,0xf4 +.byte 0x1f,0xdd,0xa8,0x33,0x88,0x07,0xc7,0x31 +.byte 0xb1,0x12,0x10,0x59,0x27,0x80,0xec,0x5f +.byte 0x60,0x51,0x7f,0xa9,0x19,0xb5,0x4a,0x0d +.byte 0x2d,0xe5,0x7a,0x9f,0x93,0xc9,0x9c,0xef +.byte 0xa0,0xe0,0x3b,0x4d,0xae,0x2a,0xf5,0xb0 +.byte 0xc8,0xeb,0xbb,0x3c,0x83,0x53,0x99,0x61 +.byte 0x17,0x2b,0x04,0x7e,0xba,0x77,0xd6,0x26 +.byte 0xe1,0x69,0x14,0x63,0x55,0x21,0x0c,0x7d +.long 0x80808080, 0x80808080, 0xfefefefe, 0xfefefefe +.long 0x1b1b1b1b, 0x1b1b1b1b, 0, 0 +.byte 0x52,0x09,0x6a,0xd5,0x30,0x36,0xa5,0x38 +.byte 0xbf,0x40,0xa3,0x9e,0x81,0xf3,0xd7,0xfb +.byte 0x7c,0xe3,0x39,0x82,0x9b,0x2f,0xff,0x87 +.byte 0x34,0x8e,0x43,0x44,0xc4,0xde,0xe9,0xcb +.byte 0x54,0x7b,0x94,0x32,0xa6,0xc2,0x23,0x3d +.byte 0xee,0x4c,0x95,0x0b,0x42,0xfa,0xc3,0x4e +.byte 0x08,0x2e,0xa1,0x66,0x28,0xd9,0x24,0xb2 +.byte 0x76,0x5b,0xa2,0x49,0x6d,0x8b,0xd1,0x25 +.byte 0x72,0xf8,0xf6,0x64,0x86,0x68,0x98,0x16 +.byte 0xd4,0xa4,0x5c,0xcc,0x5d,0x65,0xb6,0x92 +.byte 0x6c,0x70,0x48,0x50,0xfd,0xed,0xb9,0xda +.byte 0x5e,0x15,0x46,0x57,0xa7,0x8d,0x9d,0x84 +.byte 0x90,0xd8,0xab,0x00,0x8c,0xbc,0xd3,0x0a +.byte 0xf7,0xe4,0x58,0x05,0xb8,0xb3,0x45,0x06 +.byte 0xd0,0x2c,0x1e,0x8f,0xca,0x3f,0x0f,0x02 +.byte 0xc1,0xaf,0xbd,0x03,0x01,0x13,0x8a,0x6b +.byte 0x3a,0x91,0x11,0x41,0x4f,0x67,0xdc,0xea +.byte 0x97,0xf2,0xcf,0xce,0xf0,0xb4,0xe6,0x73 +.byte 0x96,0xac,0x74,0x22,0xe7,0xad,0x35,0x85 +.byte 0xe2,0xf9,0x37,0xe8,0x1c,0x75,0xdf,0x6e +.byte 0x47,0xf1,0x1a,0x71,0x1d,0x29,0xc5,0x89 +.byte 0x6f,0xb7,0x62,0x0e,0xaa,0x18,0xbe,0x1b +.byte 0xfc,0x56,0x3e,0x4b,0xc6,0xd2,0x79,0x20 +.byte 0x9a,0xdb,0xc0,0xfe,0x78,0xcd,0x5a,0xf4 +.byte 0x1f,0xdd,0xa8,0x33,0x88,0x07,0xc7,0x31 +.byte 0xb1,0x12,0x10,0x59,0x27,0x80,0xec,0x5f +.byte 0x60,0x51,0x7f,0xa9,0x19,0xb5,0x4a,0x0d +.byte 0x2d,0xe5,0x7a,0x9f,0x93,0xc9,0x9c,0xef +.byte 0xa0,0xe0,0x3b,0x4d,0xae,0x2a,0xf5,0xb0 +.byte 0xc8,0xeb,0xbb,0x3c,0x83,0x53,0x99,0x61 +.byte 0x17,0x2b,0x04,0x7e,0xba,0x77,0xd6,0x26 +.byte 0xe1,0x69,0x14,0x63,0x55,0x21,0x0c,0x7d +.long 0x80808080, 0x80808080, 0xfefefefe, 0xfefefefe +.long 0x1b1b1b1b, 0x1b1b1b1b, 0, 0 +.byte 0x52,0x09,0x6a,0xd5,0x30,0x36,0xa5,0x38 +.byte 0xbf,0x40,0xa3,0x9e,0x81,0xf3,0xd7,0xfb +.byte 0x7c,0xe3,0x39,0x82,0x9b,0x2f,0xff,0x87 +.byte 0x34,0x8e,0x43,0x44,0xc4,0xde,0xe9,0xcb +.byte 0x54,0x7b,0x94,0x32,0xa6,0xc2,0x23,0x3d +.byte 0xee,0x4c,0x95,0x0b,0x42,0xfa,0xc3,0x4e +.byte 0x08,0x2e,0xa1,0x66,0x28,0xd9,0x24,0xb2 +.byte 0x76,0x5b,0xa2,0x49,0x6d,0x8b,0xd1,0x25 +.byte 0x72,0xf8,0xf6,0x64,0x86,0x68,0x98,0x16 +.byte 0xd4,0xa4,0x5c,0xcc,0x5d,0x65,0xb6,0x92 +.byte 0x6c,0x70,0x48,0x50,0xfd,0xed,0xb9,0xda +.byte 0x5e,0x15,0x46,0x57,0xa7,0x8d,0x9d,0x84 +.byte 0x90,0xd8,0xab,0x00,0x8c,0xbc,0xd3,0x0a +.byte 0xf7,0xe4,0x58,0x05,0xb8,0xb3,0x45,0x06 +.byte 0xd0,0x2c,0x1e,0x8f,0xca,0x3f,0x0f,0x02 +.byte 0xc1,0xaf,0xbd,0x03,0x01,0x13,0x8a,0x6b +.byte 0x3a,0x91,0x11,0x41,0x4f,0x67,0xdc,0xea +.byte 0x97,0xf2,0xcf,0xce,0xf0,0xb4,0xe6,0x73 +.byte 0x96,0xac,0x74,0x22,0xe7,0xad,0x35,0x85 +.byte 0xe2,0xf9,0x37,0xe8,0x1c,0x75,0xdf,0x6e +.byte 0x47,0xf1,0x1a,0x71,0x1d,0x29,0xc5,0x89 +.byte 0x6f,0xb7,0x62,0x0e,0xaa,0x18,0xbe,0x1b +.byte 0xfc,0x56,0x3e,0x4b,0xc6,0xd2,0x79,0x20 +.byte 0x9a,0xdb,0xc0,0xfe,0x78,0xcd,0x5a,0xf4 +.byte 0x1f,0xdd,0xa8,0x33,0x88,0x07,0xc7,0x31 +.byte 0xb1,0x12,0x10,0x59,0x27,0x80,0xec,0x5f +.byte 0x60,0x51,0x7f,0xa9,0x19,0xb5,0x4a,0x0d +.byte 0x2d,0xe5,0x7a,0x9f,0x93,0xc9,0x9c,0xef +.byte 0xa0,0xe0,0x3b,0x4d,0xae,0x2a,0xf5,0xb0 +.byte 0xc8,0xeb,0xbb,0x3c,0x83,0x53,0x99,0x61 +.byte 0x17,0x2b,0x04,0x7e,0xba,0x77,0xd6,0x26 +.byte 0xe1,0x69,0x14,0x63,0x55,0x21,0x0c,0x7d +.long 0x80808080, 0x80808080, 0xfefefefe, 0xfefefefe +.long 0x1b1b1b1b, 0x1b1b1b1b, 0, 0 +.byte 65,69,83,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 +.align 64 diff --git a/deps/openssl/asm_obsolete/x64-elf-gas/aes/aesni-mb-x86_64.s b/deps/openssl/asm_obsolete/x64-elf-gas/aes/aesni-mb-x86_64.s new file mode 100644 index 00000000000000..b0467f2f9f2995 --- /dev/null +++ b/deps/openssl/asm_obsolete/x64-elf-gas/aes/aesni-mb-x86_64.s @@ -0,0 +1,506 @@ +.text + + + +.globl aesni_multi_cbc_encrypt +.type aesni_multi_cbc_encrypt,@function +.align 32 +aesni_multi_cbc_encrypt: + movq %rsp,%rax + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + + + + + + + subq $48,%rsp + andq $-64,%rsp + movq %rax,16(%rsp) + +.Lenc4x_body: + movdqu (%rsi),%xmm12 + leaq 120(%rsi),%rsi + leaq 80(%rdi),%rdi + +.Lenc4x_loop_grande: + movl %edx,24(%rsp) + xorl %edx,%edx + movl -64(%rdi),%ecx + movq -80(%rdi),%r8 + cmpl %edx,%ecx + movq -72(%rdi),%r12 + cmovgl %ecx,%edx + testl %ecx,%ecx + movdqu -56(%rdi),%xmm2 + movl %ecx,32(%rsp) + cmovleq %rsp,%r8 + movl -24(%rdi),%ecx + movq -40(%rdi),%r9 + cmpl %edx,%ecx + movq -32(%rdi),%r13 + cmovgl %ecx,%edx + testl %ecx,%ecx + movdqu -16(%rdi),%xmm3 + movl %ecx,36(%rsp) + cmovleq %rsp,%r9 + movl 16(%rdi),%ecx + movq 0(%rdi),%r10 + cmpl %edx,%ecx + movq 8(%rdi),%r14 + cmovgl %ecx,%edx + testl %ecx,%ecx + movdqu 24(%rdi),%xmm4 + movl %ecx,40(%rsp) + cmovleq %rsp,%r10 + movl 56(%rdi),%ecx + movq 40(%rdi),%r11 + cmpl %edx,%ecx + movq 48(%rdi),%r15 + cmovgl %ecx,%edx + testl %ecx,%ecx + movdqu 64(%rdi),%xmm5 + movl %ecx,44(%rsp) + cmovleq %rsp,%r11 + testl %edx,%edx + jz .Lenc4x_done + + movups 16-120(%rsi),%xmm1 + pxor %xmm12,%xmm2 + movups 32-120(%rsi),%xmm0 + pxor %xmm12,%xmm3 + movl 240-120(%rsi),%eax + pxor %xmm12,%xmm4 + movdqu (%r8),%xmm6 + pxor %xmm12,%xmm5 + movdqu (%r9),%xmm7 + pxor %xmm6,%xmm2 + movdqu (%r10),%xmm8 + pxor %xmm7,%xmm3 + movdqu (%r11),%xmm9 + pxor %xmm8,%xmm4 + pxor %xmm9,%xmm5 + movdqa 32(%rsp),%xmm10 + xorq %rbx,%rbx + jmp .Loop_enc4x + +.align 32 +.Loop_enc4x: + addq $16,%rbx + leaq 16(%rsp),%rbp + movl $1,%ecx + subq %rbx,%rbp + +.byte 102,15,56,220,209 + prefetcht0 31(%r8,%rbx,1) + prefetcht0 31(%r9,%rbx,1) +.byte 102,15,56,220,217 + prefetcht0 31(%r10,%rbx,1) + prefetcht0 31(%r10,%rbx,1) +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 + movups 48-120(%rsi),%xmm1 + cmpl 32(%rsp),%ecx +.byte 102,15,56,220,208 +.byte 102,15,56,220,216 +.byte 102,15,56,220,224 + cmovgeq %rbp,%r8 + cmovgq %rbp,%r12 +.byte 102,15,56,220,232 + movups -56(%rsi),%xmm0 + cmpl 36(%rsp),%ecx +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 +.byte 102,15,56,220,225 + cmovgeq %rbp,%r9 + cmovgq %rbp,%r13 +.byte 102,15,56,220,233 + movups -40(%rsi),%xmm1 + cmpl 40(%rsp),%ecx +.byte 102,15,56,220,208 +.byte 102,15,56,220,216 +.byte 102,15,56,220,224 + cmovgeq %rbp,%r10 + cmovgq %rbp,%r14 +.byte 102,15,56,220,232 + movups -24(%rsi),%xmm0 + cmpl 44(%rsp),%ecx +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 +.byte 102,15,56,220,225 + cmovgeq %rbp,%r11 + cmovgq %rbp,%r15 +.byte 102,15,56,220,233 + movups -8(%rsi),%xmm1 + movdqa %xmm10,%xmm11 +.byte 102,15,56,220,208 + prefetcht0 15(%r12,%rbx,1) + prefetcht0 15(%r13,%rbx,1) +.byte 102,15,56,220,216 + prefetcht0 15(%r14,%rbx,1) + prefetcht0 15(%r15,%rbx,1) +.byte 102,15,56,220,224 +.byte 102,15,56,220,232 + movups 128-120(%rsi),%xmm0 + pxor %xmm12,%xmm12 + +.byte 102,15,56,220,209 + pcmpgtd %xmm12,%xmm11 + movdqu -120(%rsi),%xmm12 +.byte 102,15,56,220,217 + paddd %xmm11,%xmm10 + movdqa %xmm10,32(%rsp) +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 + movups 144-120(%rsi),%xmm1 + + cmpl $11,%eax + +.byte 102,15,56,220,208 +.byte 102,15,56,220,216 +.byte 102,15,56,220,224 +.byte 102,15,56,220,232 + movups 160-120(%rsi),%xmm0 + + jb .Lenc4x_tail + +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 + movups 176-120(%rsi),%xmm1 + +.byte 102,15,56,220,208 +.byte 102,15,56,220,216 +.byte 102,15,56,220,224 +.byte 102,15,56,220,232 + movups 192-120(%rsi),%xmm0 + + je .Lenc4x_tail + +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 + movups 208-120(%rsi),%xmm1 + +.byte 102,15,56,220,208 +.byte 102,15,56,220,216 +.byte 102,15,56,220,224 +.byte 102,15,56,220,232 + movups 224-120(%rsi),%xmm0 + jmp .Lenc4x_tail + +.align 32 +.Lenc4x_tail: +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 + movdqu (%r8,%rbx,1),%xmm6 + movdqu 16-120(%rsi),%xmm1 + +.byte 102,15,56,221,208 + movdqu (%r9,%rbx,1),%xmm7 + pxor %xmm12,%xmm6 +.byte 102,15,56,221,216 + movdqu (%r10,%rbx,1),%xmm8 + pxor %xmm12,%xmm7 +.byte 102,15,56,221,224 + movdqu (%r11,%rbx,1),%xmm9 + pxor %xmm12,%xmm8 +.byte 102,15,56,221,232 + movdqu 32-120(%rsi),%xmm0 + pxor %xmm12,%xmm9 + + movups %xmm2,-16(%r12,%rbx,1) + pxor %xmm6,%xmm2 + movups %xmm3,-16(%r13,%rbx,1) + pxor %xmm7,%xmm3 + movups %xmm4,-16(%r14,%rbx,1) + pxor %xmm8,%xmm4 + movups %xmm5,-16(%r15,%rbx,1) + pxor %xmm9,%xmm5 + + decl %edx + jnz .Loop_enc4x + + movq 16(%rsp),%rax + movl 24(%rsp),%edx + + + + + + + + + + + leaq 160(%rdi),%rdi + decl %edx + jnz .Lenc4x_loop_grande + +.Lenc4x_done: + movq -48(%rax),%r15 + movq -40(%rax),%r14 + movq -32(%rax),%r13 + movq -24(%rax),%r12 + movq -16(%rax),%rbp + movq -8(%rax),%rbx + leaq (%rax),%rsp +.Lenc4x_epilogue: + .byte 0xf3,0xc3 +.size aesni_multi_cbc_encrypt,.-aesni_multi_cbc_encrypt + +.globl aesni_multi_cbc_decrypt +.type aesni_multi_cbc_decrypt,@function +.align 32 +aesni_multi_cbc_decrypt: + movq %rsp,%rax + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + + + + + + + subq $48,%rsp + andq $-64,%rsp + movq %rax,16(%rsp) + +.Ldec4x_body: + movdqu (%rsi),%xmm12 + leaq 120(%rsi),%rsi + leaq 80(%rdi),%rdi + +.Ldec4x_loop_grande: + movl %edx,24(%rsp) + xorl %edx,%edx + movl -64(%rdi),%ecx + movq -80(%rdi),%r8 + cmpl %edx,%ecx + movq -72(%rdi),%r12 + cmovgl %ecx,%edx + testl %ecx,%ecx + movdqu -56(%rdi),%xmm6 + movl %ecx,32(%rsp) + cmovleq %rsp,%r8 + movl -24(%rdi),%ecx + movq -40(%rdi),%r9 + cmpl %edx,%ecx + movq -32(%rdi),%r13 + cmovgl %ecx,%edx + testl %ecx,%ecx + movdqu -16(%rdi),%xmm7 + movl %ecx,36(%rsp) + cmovleq %rsp,%r9 + movl 16(%rdi),%ecx + movq 0(%rdi),%r10 + cmpl %edx,%ecx + movq 8(%rdi),%r14 + cmovgl %ecx,%edx + testl %ecx,%ecx + movdqu 24(%rdi),%xmm8 + movl %ecx,40(%rsp) + cmovleq %rsp,%r10 + movl 56(%rdi),%ecx + movq 40(%rdi),%r11 + cmpl %edx,%ecx + movq 48(%rdi),%r15 + cmovgl %ecx,%edx + testl %ecx,%ecx + movdqu 64(%rdi),%xmm9 + movl %ecx,44(%rsp) + cmovleq %rsp,%r11 + testl %edx,%edx + jz .Ldec4x_done + + movups 16-120(%rsi),%xmm1 + movups 32-120(%rsi),%xmm0 + movl 240-120(%rsi),%eax + movdqu (%r8),%xmm2 + movdqu (%r9),%xmm3 + pxor %xmm12,%xmm2 + movdqu (%r10),%xmm4 + pxor %xmm12,%xmm3 + movdqu (%r11),%xmm5 + pxor %xmm12,%xmm4 + pxor %xmm12,%xmm5 + movdqa 32(%rsp),%xmm10 + xorq %rbx,%rbx + jmp .Loop_dec4x + +.align 32 +.Loop_dec4x: + addq $16,%rbx + leaq 16(%rsp),%rbp + movl $1,%ecx + subq %rbx,%rbp + +.byte 102,15,56,222,209 + prefetcht0 31(%r8,%rbx,1) + prefetcht0 31(%r9,%rbx,1) +.byte 102,15,56,222,217 + prefetcht0 31(%r10,%rbx,1) + prefetcht0 31(%r11,%rbx,1) +.byte 102,15,56,222,225 +.byte 102,15,56,222,233 + movups 48-120(%rsi),%xmm1 + cmpl 32(%rsp),%ecx +.byte 102,15,56,222,208 +.byte 102,15,56,222,216 +.byte 102,15,56,222,224 + cmovgeq %rbp,%r8 + cmovgq %rbp,%r12 +.byte 102,15,56,222,232 + movups -56(%rsi),%xmm0 + cmpl 36(%rsp),%ecx +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 +.byte 102,15,56,222,225 + cmovgeq %rbp,%r9 + cmovgq %rbp,%r13 +.byte 102,15,56,222,233 + movups -40(%rsi),%xmm1 + cmpl 40(%rsp),%ecx +.byte 102,15,56,222,208 +.byte 102,15,56,222,216 +.byte 102,15,56,222,224 + cmovgeq %rbp,%r10 + cmovgq %rbp,%r14 +.byte 102,15,56,222,232 + movups -24(%rsi),%xmm0 + cmpl 44(%rsp),%ecx +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 +.byte 102,15,56,222,225 + cmovgeq %rbp,%r11 + cmovgq %rbp,%r15 +.byte 102,15,56,222,233 + movups -8(%rsi),%xmm1 + movdqa %xmm10,%xmm11 +.byte 102,15,56,222,208 + prefetcht0 15(%r12,%rbx,1) + prefetcht0 15(%r13,%rbx,1) +.byte 102,15,56,222,216 + prefetcht0 15(%r14,%rbx,1) + prefetcht0 15(%r15,%rbx,1) +.byte 102,15,56,222,224 +.byte 102,15,56,222,232 + movups 128-120(%rsi),%xmm0 + pxor %xmm12,%xmm12 + +.byte 102,15,56,222,209 + pcmpgtd %xmm12,%xmm11 + movdqu -120(%rsi),%xmm12 +.byte 102,15,56,222,217 + paddd %xmm11,%xmm10 + movdqa %xmm10,32(%rsp) +.byte 102,15,56,222,225 +.byte 102,15,56,222,233 + movups 144-120(%rsi),%xmm1 + + cmpl $11,%eax + +.byte 102,15,56,222,208 +.byte 102,15,56,222,216 +.byte 102,15,56,222,224 +.byte 102,15,56,222,232 + movups 160-120(%rsi),%xmm0 + + jb .Ldec4x_tail + +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 +.byte 102,15,56,222,225 +.byte 102,15,56,222,233 + movups 176-120(%rsi),%xmm1 + +.byte 102,15,56,222,208 +.byte 102,15,56,222,216 +.byte 102,15,56,222,224 +.byte 102,15,56,222,232 + movups 192-120(%rsi),%xmm0 + + je .Ldec4x_tail + +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 +.byte 102,15,56,222,225 +.byte 102,15,56,222,233 + movups 208-120(%rsi),%xmm1 + +.byte 102,15,56,222,208 +.byte 102,15,56,222,216 +.byte 102,15,56,222,224 +.byte 102,15,56,222,232 + movups 224-120(%rsi),%xmm0 + jmp .Ldec4x_tail + +.align 32 +.Ldec4x_tail: +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 +.byte 102,15,56,222,225 + pxor %xmm0,%xmm6 + pxor %xmm0,%xmm7 +.byte 102,15,56,222,233 + movdqu 16-120(%rsi),%xmm1 + pxor %xmm0,%xmm8 + pxor %xmm0,%xmm9 + movdqu 32-120(%rsi),%xmm0 + +.byte 102,15,56,223,214 +.byte 102,15,56,223,223 + movdqu -16(%r8,%rbx,1),%xmm6 + movdqu -16(%r9,%rbx,1),%xmm7 +.byte 102,65,15,56,223,224 +.byte 102,65,15,56,223,233 + movdqu -16(%r10,%rbx,1),%xmm8 + movdqu -16(%r11,%rbx,1),%xmm9 + + movups %xmm2,-16(%r12,%rbx,1) + movdqu (%r8,%rbx,1),%xmm2 + movups %xmm3,-16(%r13,%rbx,1) + movdqu (%r9,%rbx,1),%xmm3 + pxor %xmm12,%xmm2 + movups %xmm4,-16(%r14,%rbx,1) + movdqu (%r10,%rbx,1),%xmm4 + pxor %xmm12,%xmm3 + movups %xmm5,-16(%r15,%rbx,1) + movdqu (%r11,%rbx,1),%xmm5 + pxor %xmm12,%xmm4 + pxor %xmm12,%xmm5 + + decl %edx + jnz .Loop_dec4x + + movq 16(%rsp),%rax + movl 24(%rsp),%edx + + leaq 160(%rdi),%rdi + decl %edx + jnz .Ldec4x_loop_grande + +.Ldec4x_done: + movq -48(%rax),%r15 + movq -40(%rax),%r14 + movq -32(%rax),%r13 + movq -24(%rax),%r12 + movq -16(%rax),%rbp + movq -8(%rax),%rbx + leaq (%rax),%rsp +.Ldec4x_epilogue: + .byte 0xf3,0xc3 +.size aesni_multi_cbc_decrypt,.-aesni_multi_cbc_decrypt diff --git a/deps/openssl/asm_obsolete/x64-elf-gas/aes/aesni-sha1-x86_64.s b/deps/openssl/asm_obsolete/x64-elf-gas/aes/aesni-sha1-x86_64.s new file mode 100644 index 00000000000000..d4ed2047c6db9c --- /dev/null +++ b/deps/openssl/asm_obsolete/x64-elf-gas/aes/aesni-sha1-x86_64.s @@ -0,0 +1,1681 @@ +.text + + +.globl aesni_cbc_sha1_enc +.type aesni_cbc_sha1_enc,@function +.align 32 +aesni_cbc_sha1_enc: + + movl OPENSSL_ia32cap_P+0(%rip),%r10d + movq OPENSSL_ia32cap_P+4(%rip),%r11 + btq $61,%r11 + jc aesni_cbc_sha1_enc_shaext + jmp aesni_cbc_sha1_enc_ssse3 + .byte 0xf3,0xc3 +.size aesni_cbc_sha1_enc,.-aesni_cbc_sha1_enc +.type aesni_cbc_sha1_enc_ssse3,@function +.align 32 +aesni_cbc_sha1_enc_ssse3: + movq 8(%rsp),%r10 + + + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + leaq -104(%rsp),%rsp + + + movq %rdi,%r12 + movq %rsi,%r13 + movq %rdx,%r14 + leaq 112(%rcx),%r15 + movdqu (%r8),%xmm2 + movq %r8,88(%rsp) + shlq $6,%r14 + subq %r12,%r13 + movl 240-112(%r15),%r8d + addq %r10,%r14 + + leaq K_XX_XX(%rip),%r11 + movl 0(%r9),%eax + movl 4(%r9),%ebx + movl 8(%r9),%ecx + movl 12(%r9),%edx + movl %ebx,%esi + movl 16(%r9),%ebp + movl %ecx,%edi + xorl %edx,%edi + andl %edi,%esi + + movdqa 64(%r11),%xmm3 + movdqa 0(%r11),%xmm13 + movdqu 0(%r10),%xmm4 + movdqu 16(%r10),%xmm5 + movdqu 32(%r10),%xmm6 + movdqu 48(%r10),%xmm7 +.byte 102,15,56,0,227 +.byte 102,15,56,0,235 +.byte 102,15,56,0,243 + addq $64,%r10 + paddd %xmm13,%xmm4 +.byte 102,15,56,0,251 + paddd %xmm13,%xmm5 + paddd %xmm13,%xmm6 + movdqa %xmm4,0(%rsp) + psubd %xmm13,%xmm4 + movdqa %xmm5,16(%rsp) + psubd %xmm13,%xmm5 + movdqa %xmm6,32(%rsp) + psubd %xmm13,%xmm6 + movups -112(%r15),%xmm15 + movups 16-112(%r15),%xmm0 + jmp .Loop_ssse3 +.align 32 +.Loop_ssse3: + rorl $2,%ebx + movups 0(%r12),%xmm14 + xorps %xmm15,%xmm14 + xorps %xmm14,%xmm2 + movups -80(%r15),%xmm1 +.byte 102,15,56,220,208 + pshufd $238,%xmm4,%xmm8 + xorl %edx,%esi + movdqa %xmm7,%xmm12 + paddd %xmm7,%xmm13 + movl %eax,%edi + addl 0(%rsp),%ebp + punpcklqdq %xmm5,%xmm8 + xorl %ecx,%ebx + roll $5,%eax + addl %esi,%ebp + psrldq $4,%xmm12 + andl %ebx,%edi + xorl %ecx,%ebx + pxor %xmm4,%xmm8 + addl %eax,%ebp + rorl $7,%eax + pxor %xmm6,%xmm12 + xorl %ecx,%edi + movl %ebp,%esi + addl 4(%rsp),%edx + pxor %xmm12,%xmm8 + xorl %ebx,%eax + roll $5,%ebp + movdqa %xmm13,48(%rsp) + addl %edi,%edx + movups -64(%r15),%xmm0 +.byte 102,15,56,220,209 + andl %eax,%esi + movdqa %xmm8,%xmm3 + xorl %ebx,%eax + addl %ebp,%edx + rorl $7,%ebp + movdqa %xmm8,%xmm12 + xorl %ebx,%esi + pslldq $12,%xmm3 + paddd %xmm8,%xmm8 + movl %edx,%edi + addl 8(%rsp),%ecx + psrld $31,%xmm12 + xorl %eax,%ebp + roll $5,%edx + addl %esi,%ecx + movdqa %xmm3,%xmm13 + andl %ebp,%edi + xorl %eax,%ebp + psrld $30,%xmm3 + addl %edx,%ecx + rorl $7,%edx + por %xmm12,%xmm8 + xorl %eax,%edi + movl %ecx,%esi + addl 12(%rsp),%ebx + movups -48(%r15),%xmm1 +.byte 102,15,56,220,208 + pslld $2,%xmm13 + pxor %xmm3,%xmm8 + xorl %ebp,%edx + movdqa 0(%r11),%xmm3 + roll $5,%ecx + addl %edi,%ebx + andl %edx,%esi + pxor %xmm13,%xmm8 + xorl %ebp,%edx + addl %ecx,%ebx + rorl $7,%ecx + pshufd $238,%xmm5,%xmm9 + xorl %ebp,%esi + movdqa %xmm8,%xmm13 + paddd %xmm8,%xmm3 + movl %ebx,%edi + addl 16(%rsp),%eax + punpcklqdq %xmm6,%xmm9 + xorl %edx,%ecx + roll $5,%ebx + addl %esi,%eax + psrldq $4,%xmm13 + andl %ecx,%edi + xorl %edx,%ecx + pxor %xmm5,%xmm9 + addl %ebx,%eax + rorl $7,%ebx + movups -32(%r15),%xmm0 +.byte 102,15,56,220,209 + pxor %xmm7,%xmm13 + xorl %edx,%edi + movl %eax,%esi + addl 20(%rsp),%ebp + pxor %xmm13,%xmm9 + xorl %ecx,%ebx + roll $5,%eax + movdqa %xmm3,0(%rsp) + addl %edi,%ebp + andl %ebx,%esi + movdqa %xmm9,%xmm12 + xorl %ecx,%ebx + addl %eax,%ebp + rorl $7,%eax + movdqa %xmm9,%xmm13 + xorl %ecx,%esi + pslldq $12,%xmm12 + paddd %xmm9,%xmm9 + movl %ebp,%edi + addl 24(%rsp),%edx + psrld $31,%xmm13 + xorl %ebx,%eax + roll $5,%ebp + addl %esi,%edx + movups -16(%r15),%xmm1 +.byte 102,15,56,220,208 + movdqa %xmm12,%xmm3 + andl %eax,%edi + xorl %ebx,%eax + psrld $30,%xmm12 + addl %ebp,%edx + rorl $7,%ebp + por %xmm13,%xmm9 + xorl %ebx,%edi + movl %edx,%esi + addl 28(%rsp),%ecx + pslld $2,%xmm3 + pxor %xmm12,%xmm9 + xorl %eax,%ebp + movdqa 16(%r11),%xmm12 + roll $5,%edx + addl %edi,%ecx + andl %ebp,%esi + pxor %xmm3,%xmm9 + xorl %eax,%ebp + addl %edx,%ecx + rorl $7,%edx + pshufd $238,%xmm6,%xmm10 + xorl %eax,%esi + movdqa %xmm9,%xmm3 + paddd %xmm9,%xmm12 + movl %ecx,%edi + addl 32(%rsp),%ebx + movups 0(%r15),%xmm0 +.byte 102,15,56,220,209 + punpcklqdq %xmm7,%xmm10 + xorl %ebp,%edx + roll $5,%ecx + addl %esi,%ebx + psrldq $4,%xmm3 + andl %edx,%edi + xorl %ebp,%edx + pxor %xmm6,%xmm10 + addl %ecx,%ebx + rorl $7,%ecx + pxor %xmm8,%xmm3 + xorl %ebp,%edi + movl %ebx,%esi + addl 36(%rsp),%eax + pxor %xmm3,%xmm10 + xorl %edx,%ecx + roll $5,%ebx + movdqa %xmm12,16(%rsp) + addl %edi,%eax + andl %ecx,%esi + movdqa %xmm10,%xmm13 + xorl %edx,%ecx + addl %ebx,%eax + rorl $7,%ebx + movups 16(%r15),%xmm1 +.byte 102,15,56,220,208 + movdqa %xmm10,%xmm3 + xorl %edx,%esi + pslldq $12,%xmm13 + paddd %xmm10,%xmm10 + movl %eax,%edi + addl 40(%rsp),%ebp + psrld $31,%xmm3 + xorl %ecx,%ebx + roll $5,%eax + addl %esi,%ebp + movdqa %xmm13,%xmm12 + andl %ebx,%edi + xorl %ecx,%ebx + psrld $30,%xmm13 + addl %eax,%ebp + rorl $7,%eax + por %xmm3,%xmm10 + xorl %ecx,%edi + movl %ebp,%esi + addl 44(%rsp),%edx + pslld $2,%xmm12 + pxor %xmm13,%xmm10 + xorl %ebx,%eax + movdqa 16(%r11),%xmm13 + roll $5,%ebp + addl %edi,%edx + movups 32(%r15),%xmm0 +.byte 102,15,56,220,209 + andl %eax,%esi + pxor %xmm12,%xmm10 + xorl %ebx,%eax + addl %ebp,%edx + rorl $7,%ebp + pshufd $238,%xmm7,%xmm11 + xorl %ebx,%esi + movdqa %xmm10,%xmm12 + paddd %xmm10,%xmm13 + movl %edx,%edi + addl 48(%rsp),%ecx + punpcklqdq %xmm8,%xmm11 + xorl %eax,%ebp + roll $5,%edx + addl %esi,%ecx + psrldq $4,%xmm12 + andl %ebp,%edi + xorl %eax,%ebp + pxor %xmm7,%xmm11 + addl %edx,%ecx + rorl $7,%edx + pxor %xmm9,%xmm12 + xorl %eax,%edi + movl %ecx,%esi + addl 52(%rsp),%ebx + movups 48(%r15),%xmm1 +.byte 102,15,56,220,208 + pxor %xmm12,%xmm11 + xorl %ebp,%edx + roll $5,%ecx + movdqa %xmm13,32(%rsp) + addl %edi,%ebx + andl %edx,%esi + movdqa %xmm11,%xmm3 + xorl %ebp,%edx + addl %ecx,%ebx + rorl $7,%ecx + movdqa %xmm11,%xmm12 + xorl %ebp,%esi + pslldq $12,%xmm3 + paddd %xmm11,%xmm11 + movl %ebx,%edi + addl 56(%rsp),%eax + psrld $31,%xmm12 + xorl %edx,%ecx + roll $5,%ebx + addl %esi,%eax + movdqa %xmm3,%xmm13 + andl %ecx,%edi + xorl %edx,%ecx + psrld $30,%xmm3 + addl %ebx,%eax + rorl $7,%ebx + cmpl $11,%r8d + jb .Laesenclast1 + movups 64(%r15),%xmm0 +.byte 102,15,56,220,209 + movups 80(%r15),%xmm1 +.byte 102,15,56,220,208 + je .Laesenclast1 + movups 96(%r15),%xmm0 +.byte 102,15,56,220,209 + movups 112(%r15),%xmm1 +.byte 102,15,56,220,208 +.Laesenclast1: +.byte 102,15,56,221,209 + movups 16-112(%r15),%xmm0 + por %xmm12,%xmm11 + xorl %edx,%edi + movl %eax,%esi + addl 60(%rsp),%ebp + pslld $2,%xmm13 + pxor %xmm3,%xmm11 + xorl %ecx,%ebx + movdqa 16(%r11),%xmm3 + roll $5,%eax + addl %edi,%ebp + andl %ebx,%esi + pxor %xmm13,%xmm11 + pshufd $238,%xmm10,%xmm13 + xorl %ecx,%ebx + addl %eax,%ebp + rorl $7,%eax + pxor %xmm8,%xmm4 + xorl %ecx,%esi + movl %ebp,%edi + addl 0(%rsp),%edx + punpcklqdq %xmm11,%xmm13 + xorl %ebx,%eax + roll $5,%ebp + pxor %xmm5,%xmm4 + addl %esi,%edx + movups 16(%r12),%xmm14 + xorps %xmm15,%xmm14 + movups %xmm2,0(%r12,%r13,1) + xorps %xmm14,%xmm2 + movups -80(%r15),%xmm1 +.byte 102,15,56,220,208 + andl %eax,%edi + movdqa %xmm3,%xmm12 + xorl %ebx,%eax + paddd %xmm11,%xmm3 + addl %ebp,%edx + pxor %xmm13,%xmm4 + rorl $7,%ebp + xorl %ebx,%edi + movl %edx,%esi + addl 4(%rsp),%ecx + movdqa %xmm4,%xmm13 + xorl %eax,%ebp + roll $5,%edx + movdqa %xmm3,48(%rsp) + addl %edi,%ecx + andl %ebp,%esi + xorl %eax,%ebp + pslld $2,%xmm4 + addl %edx,%ecx + rorl $7,%edx + psrld $30,%xmm13 + xorl %eax,%esi + movl %ecx,%edi + addl 8(%rsp),%ebx + movups -64(%r15),%xmm0 +.byte 102,15,56,220,209 + por %xmm13,%xmm4 + xorl %ebp,%edx + roll $5,%ecx + pshufd $238,%xmm11,%xmm3 + addl %esi,%ebx + andl %edx,%edi + xorl %ebp,%edx + addl %ecx,%ebx + addl 12(%rsp),%eax + xorl %ebp,%edi + movl %ebx,%esi + roll $5,%ebx + addl %edi,%eax + xorl %edx,%esi + rorl $7,%ecx + addl %ebx,%eax + pxor %xmm9,%xmm5 + addl 16(%rsp),%ebp + movups -48(%r15),%xmm1 +.byte 102,15,56,220,208 + xorl %ecx,%esi + punpcklqdq %xmm4,%xmm3 + movl %eax,%edi + roll $5,%eax + pxor %xmm6,%xmm5 + addl %esi,%ebp + xorl %ecx,%edi + movdqa %xmm12,%xmm13 + rorl $7,%ebx + paddd %xmm4,%xmm12 + addl %eax,%ebp + pxor %xmm3,%xmm5 + addl 20(%rsp),%edx + xorl %ebx,%edi + movl %ebp,%esi + roll $5,%ebp + movdqa %xmm5,%xmm3 + addl %edi,%edx + xorl %ebx,%esi + movdqa %xmm12,0(%rsp) + rorl $7,%eax + addl %ebp,%edx + addl 24(%rsp),%ecx + pslld $2,%xmm5 + xorl %eax,%esi + movl %edx,%edi + psrld $30,%xmm3 + roll $5,%edx + addl %esi,%ecx + movups -32(%r15),%xmm0 +.byte 102,15,56,220,209 + xorl %eax,%edi + rorl $7,%ebp + por %xmm3,%xmm5 + addl %edx,%ecx + addl 28(%rsp),%ebx + pshufd $238,%xmm4,%xmm12 + xorl %ebp,%edi + movl %ecx,%esi + roll $5,%ecx + addl %edi,%ebx + xorl %ebp,%esi + rorl $7,%edx + addl %ecx,%ebx + pxor %xmm10,%xmm6 + addl 32(%rsp),%eax + xorl %edx,%esi + punpcklqdq %xmm5,%xmm12 + movl %ebx,%edi + roll $5,%ebx + pxor %xmm7,%xmm6 + addl %esi,%eax + xorl %edx,%edi + movdqa 32(%r11),%xmm3 + rorl $7,%ecx + paddd %xmm5,%xmm13 + addl %ebx,%eax + pxor %xmm12,%xmm6 + addl 36(%rsp),%ebp + movups -16(%r15),%xmm1 +.byte 102,15,56,220,208 + xorl %ecx,%edi + movl %eax,%esi + roll $5,%eax + movdqa %xmm6,%xmm12 + addl %edi,%ebp + xorl %ecx,%esi + movdqa %xmm13,16(%rsp) + rorl $7,%ebx + addl %eax,%ebp + addl 40(%rsp),%edx + pslld $2,%xmm6 + xorl %ebx,%esi + movl %ebp,%edi + psrld $30,%xmm12 + roll $5,%ebp + addl %esi,%edx + xorl %ebx,%edi + rorl $7,%eax + por %xmm12,%xmm6 + addl %ebp,%edx + addl 44(%rsp),%ecx + pshufd $238,%xmm5,%xmm13 + xorl %eax,%edi + movl %edx,%esi + roll $5,%edx + addl %edi,%ecx + movups 0(%r15),%xmm0 +.byte 102,15,56,220,209 + xorl %eax,%esi + rorl $7,%ebp + addl %edx,%ecx + pxor %xmm11,%xmm7 + addl 48(%rsp),%ebx + xorl %ebp,%esi + punpcklqdq %xmm6,%xmm13 + movl %ecx,%edi + roll $5,%ecx + pxor %xmm8,%xmm7 + addl %esi,%ebx + xorl %ebp,%edi + movdqa %xmm3,%xmm12 + rorl $7,%edx + paddd %xmm6,%xmm3 + addl %ecx,%ebx + pxor %xmm13,%xmm7 + addl 52(%rsp),%eax + xorl %edx,%edi + movl %ebx,%esi + roll $5,%ebx + movdqa %xmm7,%xmm13 + addl %edi,%eax + xorl %edx,%esi + movdqa %xmm3,32(%rsp) + rorl $7,%ecx + addl %ebx,%eax + addl 56(%rsp),%ebp + movups 16(%r15),%xmm1 +.byte 102,15,56,220,208 + pslld $2,%xmm7 + xorl %ecx,%esi + movl %eax,%edi + psrld $30,%xmm13 + roll $5,%eax + addl %esi,%ebp + xorl %ecx,%edi + rorl $7,%ebx + por %xmm13,%xmm7 + addl %eax,%ebp + addl 60(%rsp),%edx + pshufd $238,%xmm6,%xmm3 + xorl %ebx,%edi + movl %ebp,%esi + roll $5,%ebp + addl %edi,%edx + xorl %ebx,%esi + rorl $7,%eax + addl %ebp,%edx + pxor %xmm4,%xmm8 + addl 0(%rsp),%ecx + xorl %eax,%esi + punpcklqdq %xmm7,%xmm3 + movl %edx,%edi + roll $5,%edx + pxor %xmm9,%xmm8 + addl %esi,%ecx + movups 32(%r15),%xmm0 +.byte 102,15,56,220,209 + xorl %eax,%edi + movdqa %xmm12,%xmm13 + rorl $7,%ebp + paddd %xmm7,%xmm12 + addl %edx,%ecx + pxor %xmm3,%xmm8 + addl 4(%rsp),%ebx + xorl %ebp,%edi + movl %ecx,%esi + roll $5,%ecx + movdqa %xmm8,%xmm3 + addl %edi,%ebx + xorl %ebp,%esi + movdqa %xmm12,48(%rsp) + rorl $7,%edx + addl %ecx,%ebx + addl 8(%rsp),%eax + pslld $2,%xmm8 + xorl %edx,%esi + movl %ebx,%edi + psrld $30,%xmm3 + roll $5,%ebx + addl %esi,%eax + xorl %edx,%edi + rorl $7,%ecx + por %xmm3,%xmm8 + addl %ebx,%eax + addl 12(%rsp),%ebp + movups 48(%r15),%xmm1 +.byte 102,15,56,220,208 + pshufd $238,%xmm7,%xmm12 + xorl %ecx,%edi + movl %eax,%esi + roll $5,%eax + addl %edi,%ebp + xorl %ecx,%esi + rorl $7,%ebx + addl %eax,%ebp + pxor %xmm5,%xmm9 + addl 16(%rsp),%edx + xorl %ebx,%esi + punpcklqdq %xmm8,%xmm12 + movl %ebp,%edi + roll $5,%ebp + pxor %xmm10,%xmm9 + addl %esi,%edx + xorl %ebx,%edi + movdqa %xmm13,%xmm3 + rorl $7,%eax + paddd %xmm8,%xmm13 + addl %ebp,%edx + pxor %xmm12,%xmm9 + addl 20(%rsp),%ecx + xorl %eax,%edi + movl %edx,%esi + roll $5,%edx + movdqa %xmm9,%xmm12 + addl %edi,%ecx + cmpl $11,%r8d + jb .Laesenclast2 + movups 64(%r15),%xmm0 +.byte 102,15,56,220,209 + movups 80(%r15),%xmm1 +.byte 102,15,56,220,208 + je .Laesenclast2 + movups 96(%r15),%xmm0 +.byte 102,15,56,220,209 + movups 112(%r15),%xmm1 +.byte 102,15,56,220,208 +.Laesenclast2: +.byte 102,15,56,221,209 + movups 16-112(%r15),%xmm0 + xorl %eax,%esi + movdqa %xmm13,0(%rsp) + rorl $7,%ebp + addl %edx,%ecx + addl 24(%rsp),%ebx + pslld $2,%xmm9 + xorl %ebp,%esi + movl %ecx,%edi + psrld $30,%xmm12 + roll $5,%ecx + addl %esi,%ebx + xorl %ebp,%edi + rorl $7,%edx + por %xmm12,%xmm9 + addl %ecx,%ebx + addl 28(%rsp),%eax + pshufd $238,%xmm8,%xmm13 + rorl $7,%ecx + movl %ebx,%esi + xorl %edx,%edi + roll $5,%ebx + addl %edi,%eax + xorl %ecx,%esi + xorl %edx,%ecx + addl %ebx,%eax + pxor %xmm6,%xmm10 + addl 32(%rsp),%ebp + movups 32(%r12),%xmm14 + xorps %xmm15,%xmm14 + movups %xmm2,16(%r13,%r12,1) + xorps %xmm14,%xmm2 + movups -80(%r15),%xmm1 +.byte 102,15,56,220,208 + andl %ecx,%esi + xorl %edx,%ecx + rorl $7,%ebx + punpcklqdq %xmm9,%xmm13 + movl %eax,%edi + xorl %ecx,%esi + pxor %xmm11,%xmm10 + roll $5,%eax + addl %esi,%ebp + movdqa %xmm3,%xmm12 + xorl %ebx,%edi + paddd %xmm9,%xmm3 + xorl %ecx,%ebx + pxor %xmm13,%xmm10 + addl %eax,%ebp + addl 36(%rsp),%edx + andl %ebx,%edi + xorl %ecx,%ebx + rorl $7,%eax + movdqa %xmm10,%xmm13 + movl %ebp,%esi + xorl %ebx,%edi + movdqa %xmm3,16(%rsp) + roll $5,%ebp + addl %edi,%edx + movups -64(%r15),%xmm0 +.byte 102,15,56,220,209 + xorl %eax,%esi + pslld $2,%xmm10 + xorl %ebx,%eax + addl %ebp,%edx + psrld $30,%xmm13 + addl 40(%rsp),%ecx + andl %eax,%esi + xorl %ebx,%eax + por %xmm13,%xmm10 + rorl $7,%ebp + movl %edx,%edi + xorl %eax,%esi + roll $5,%edx + pshufd $238,%xmm9,%xmm3 + addl %esi,%ecx + xorl %ebp,%edi + xorl %eax,%ebp + addl %edx,%ecx + addl 44(%rsp),%ebx + andl %ebp,%edi + xorl %eax,%ebp + rorl $7,%edx + movups -48(%r15),%xmm1 +.byte 102,15,56,220,208 + movl %ecx,%esi + xorl %ebp,%edi + roll $5,%ecx + addl %edi,%ebx + xorl %edx,%esi + xorl %ebp,%edx + addl %ecx,%ebx + pxor %xmm7,%xmm11 + addl 48(%rsp),%eax + andl %edx,%esi + xorl %ebp,%edx + rorl $7,%ecx + punpcklqdq %xmm10,%xmm3 + movl %ebx,%edi + xorl %edx,%esi + pxor %xmm4,%xmm11 + roll $5,%ebx + addl %esi,%eax + movdqa 48(%r11),%xmm13 + xorl %ecx,%edi + paddd %xmm10,%xmm12 + xorl %edx,%ecx + pxor %xmm3,%xmm11 + addl %ebx,%eax + addl 52(%rsp),%ebp + movups -32(%r15),%xmm0 +.byte 102,15,56,220,209 + andl %ecx,%edi + xorl %edx,%ecx + rorl $7,%ebx + movdqa %xmm11,%xmm3 + movl %eax,%esi + xorl %ecx,%edi + movdqa %xmm12,32(%rsp) + roll $5,%eax + addl %edi,%ebp + xorl %ebx,%esi + pslld $2,%xmm11 + xorl %ecx,%ebx + addl %eax,%ebp + psrld $30,%xmm3 + addl 56(%rsp),%edx + andl %ebx,%esi + xorl %ecx,%ebx + por %xmm3,%xmm11 + rorl $7,%eax + movl %ebp,%edi + xorl %ebx,%esi + roll $5,%ebp + pshufd $238,%xmm10,%xmm12 + addl %esi,%edx + movups -16(%r15),%xmm1 +.byte 102,15,56,220,208 + xorl %eax,%edi + xorl %ebx,%eax + addl %ebp,%edx + addl 60(%rsp),%ecx + andl %eax,%edi + xorl %ebx,%eax + rorl $7,%ebp + movl %edx,%esi + xorl %eax,%edi + roll $5,%edx + addl %edi,%ecx + xorl %ebp,%esi + xorl %eax,%ebp + addl %edx,%ecx + pxor %xmm8,%xmm4 + addl 0(%rsp),%ebx + andl %ebp,%esi + xorl %eax,%ebp + rorl $7,%edx + movups 0(%r15),%xmm0 +.byte 102,15,56,220,209 + punpcklqdq %xmm11,%xmm12 + movl %ecx,%edi + xorl %ebp,%esi + pxor %xmm5,%xmm4 + roll $5,%ecx + addl %esi,%ebx + movdqa %xmm13,%xmm3 + xorl %edx,%edi + paddd %xmm11,%xmm13 + xorl %ebp,%edx + pxor %xmm12,%xmm4 + addl %ecx,%ebx + addl 4(%rsp),%eax + andl %edx,%edi + xorl %ebp,%edx + rorl $7,%ecx + movdqa %xmm4,%xmm12 + movl %ebx,%esi + xorl %edx,%edi + movdqa %xmm13,48(%rsp) + roll $5,%ebx + addl %edi,%eax + xorl %ecx,%esi + pslld $2,%xmm4 + xorl %edx,%ecx + addl %ebx,%eax + psrld $30,%xmm12 + addl 8(%rsp),%ebp + movups 16(%r15),%xmm1 +.byte 102,15,56,220,208 + andl %ecx,%esi + xorl %edx,%ecx + por %xmm12,%xmm4 + rorl $7,%ebx + movl %eax,%edi + xorl %ecx,%esi + roll $5,%eax + pshufd $238,%xmm11,%xmm13 + addl %esi,%ebp + xorl %ebx,%edi + xorl %ecx,%ebx + addl %eax,%ebp + addl 12(%rsp),%edx + andl %ebx,%edi + xorl %ecx,%ebx + rorl $7,%eax + movl %ebp,%esi + xorl %ebx,%edi + roll $5,%ebp + addl %edi,%edx + movups 32(%r15),%xmm0 +.byte 102,15,56,220,209 + xorl %eax,%esi + xorl %ebx,%eax + addl %ebp,%edx + pxor %xmm9,%xmm5 + addl 16(%rsp),%ecx + andl %eax,%esi + xorl %ebx,%eax + rorl $7,%ebp + punpcklqdq %xmm4,%xmm13 + movl %edx,%edi + xorl %eax,%esi + pxor %xmm6,%xmm5 + roll $5,%edx + addl %esi,%ecx + movdqa %xmm3,%xmm12 + xorl %ebp,%edi + paddd %xmm4,%xmm3 + xorl %eax,%ebp + pxor %xmm13,%xmm5 + addl %edx,%ecx + addl 20(%rsp),%ebx + andl %ebp,%edi + xorl %eax,%ebp + rorl $7,%edx + movups 48(%r15),%xmm1 +.byte 102,15,56,220,208 + movdqa %xmm5,%xmm13 + movl %ecx,%esi + xorl %ebp,%edi + movdqa %xmm3,0(%rsp) + roll $5,%ecx + addl %edi,%ebx + xorl %edx,%esi + pslld $2,%xmm5 + xorl %ebp,%edx + addl %ecx,%ebx + psrld $30,%xmm13 + addl 24(%rsp),%eax + andl %edx,%esi + xorl %ebp,%edx + por %xmm13,%xmm5 + rorl $7,%ecx + movl %ebx,%edi + xorl %edx,%esi + roll $5,%ebx + pshufd $238,%xmm4,%xmm3 + addl %esi,%eax + xorl %ecx,%edi + xorl %edx,%ecx + addl %ebx,%eax + addl 28(%rsp),%ebp + cmpl $11,%r8d + jb .Laesenclast3 + movups 64(%r15),%xmm0 +.byte 102,15,56,220,209 + movups 80(%r15),%xmm1 +.byte 102,15,56,220,208 + je .Laesenclast3 + movups 96(%r15),%xmm0 +.byte 102,15,56,220,209 + movups 112(%r15),%xmm1 +.byte 102,15,56,220,208 +.Laesenclast3: +.byte 102,15,56,221,209 + movups 16-112(%r15),%xmm0 + andl %ecx,%edi + xorl %edx,%ecx + rorl $7,%ebx + movl %eax,%esi + xorl %ecx,%edi + roll $5,%eax + addl %edi,%ebp + xorl %ebx,%esi + xorl %ecx,%ebx + addl %eax,%ebp + pxor %xmm10,%xmm6 + addl 32(%rsp),%edx + andl %ebx,%esi + xorl %ecx,%ebx + rorl $7,%eax + punpcklqdq %xmm5,%xmm3 + movl %ebp,%edi + xorl %ebx,%esi + pxor %xmm7,%xmm6 + roll $5,%ebp + addl %esi,%edx + movups 48(%r12),%xmm14 + xorps %xmm15,%xmm14 + movups %xmm2,32(%r13,%r12,1) + xorps %xmm14,%xmm2 + movups -80(%r15),%xmm1 +.byte 102,15,56,220,208 + movdqa %xmm12,%xmm13 + xorl %eax,%edi + paddd %xmm5,%xmm12 + xorl %ebx,%eax + pxor %xmm3,%xmm6 + addl %ebp,%edx + addl 36(%rsp),%ecx + andl %eax,%edi + xorl %ebx,%eax + rorl $7,%ebp + movdqa %xmm6,%xmm3 + movl %edx,%esi + xorl %eax,%edi + movdqa %xmm12,16(%rsp) + roll $5,%edx + addl %edi,%ecx + xorl %ebp,%esi + pslld $2,%xmm6 + xorl %eax,%ebp + addl %edx,%ecx + psrld $30,%xmm3 + addl 40(%rsp),%ebx + andl %ebp,%esi + xorl %eax,%ebp + por %xmm3,%xmm6 + rorl $7,%edx + movups -64(%r15),%xmm0 +.byte 102,15,56,220,209 + movl %ecx,%edi + xorl %ebp,%esi + roll $5,%ecx + pshufd $238,%xmm5,%xmm12 + addl %esi,%ebx + xorl %edx,%edi + xorl %ebp,%edx + addl %ecx,%ebx + addl 44(%rsp),%eax + andl %edx,%edi + xorl %ebp,%edx + rorl $7,%ecx + movl %ebx,%esi + xorl %edx,%edi + roll $5,%ebx + addl %edi,%eax + xorl %edx,%esi + addl %ebx,%eax + pxor %xmm11,%xmm7 + addl 48(%rsp),%ebp + movups -48(%r15),%xmm1 +.byte 102,15,56,220,208 + xorl %ecx,%esi + punpcklqdq %xmm6,%xmm12 + movl %eax,%edi + roll $5,%eax + pxor %xmm8,%xmm7 + addl %esi,%ebp + xorl %ecx,%edi + movdqa %xmm13,%xmm3 + rorl $7,%ebx + paddd %xmm6,%xmm13 + addl %eax,%ebp + pxor %xmm12,%xmm7 + addl 52(%rsp),%edx + xorl %ebx,%edi + movl %ebp,%esi + roll $5,%ebp + movdqa %xmm7,%xmm12 + addl %edi,%edx + xorl %ebx,%esi + movdqa %xmm13,32(%rsp) + rorl $7,%eax + addl %ebp,%edx + addl 56(%rsp),%ecx + pslld $2,%xmm7 + xorl %eax,%esi + movl %edx,%edi + psrld $30,%xmm12 + roll $5,%edx + addl %esi,%ecx + movups -32(%r15),%xmm0 +.byte 102,15,56,220,209 + xorl %eax,%edi + rorl $7,%ebp + por %xmm12,%xmm7 + addl %edx,%ecx + addl 60(%rsp),%ebx + xorl %ebp,%edi + movl %ecx,%esi + roll $5,%ecx + addl %edi,%ebx + xorl %ebp,%esi + rorl $7,%edx + addl %ecx,%ebx + addl 0(%rsp),%eax + xorl %edx,%esi + movl %ebx,%edi + roll $5,%ebx + paddd %xmm7,%xmm3 + addl %esi,%eax + xorl %edx,%edi + movdqa %xmm3,48(%rsp) + rorl $7,%ecx + addl %ebx,%eax + addl 4(%rsp),%ebp + movups -16(%r15),%xmm1 +.byte 102,15,56,220,208 + xorl %ecx,%edi + movl %eax,%esi + roll $5,%eax + addl %edi,%ebp + xorl %ecx,%esi + rorl $7,%ebx + addl %eax,%ebp + addl 8(%rsp),%edx + xorl %ebx,%esi + movl %ebp,%edi + roll $5,%ebp + addl %esi,%edx + xorl %ebx,%edi + rorl $7,%eax + addl %ebp,%edx + addl 12(%rsp),%ecx + xorl %eax,%edi + movl %edx,%esi + roll $5,%edx + addl %edi,%ecx + movups 0(%r15),%xmm0 +.byte 102,15,56,220,209 + xorl %eax,%esi + rorl $7,%ebp + addl %edx,%ecx + cmpq %r14,%r10 + je .Ldone_ssse3 + movdqa 64(%r11),%xmm3 + movdqa 0(%r11),%xmm13 + movdqu 0(%r10),%xmm4 + movdqu 16(%r10),%xmm5 + movdqu 32(%r10),%xmm6 + movdqu 48(%r10),%xmm7 +.byte 102,15,56,0,227 + addq $64,%r10 + addl 16(%rsp),%ebx + xorl %ebp,%esi + movl %ecx,%edi +.byte 102,15,56,0,235 + roll $5,%ecx + addl %esi,%ebx + xorl %ebp,%edi + rorl $7,%edx + paddd %xmm13,%xmm4 + addl %ecx,%ebx + addl 20(%rsp),%eax + xorl %edx,%edi + movl %ebx,%esi + movdqa %xmm4,0(%rsp) + roll $5,%ebx + addl %edi,%eax + xorl %edx,%esi + rorl $7,%ecx + psubd %xmm13,%xmm4 + addl %ebx,%eax + addl 24(%rsp),%ebp + movups 16(%r15),%xmm1 +.byte 102,15,56,220,208 + xorl %ecx,%esi + movl %eax,%edi + roll $5,%eax + addl %esi,%ebp + xorl %ecx,%edi + rorl $7,%ebx + addl %eax,%ebp + addl 28(%rsp),%edx + xorl %ebx,%edi + movl %ebp,%esi + roll $5,%ebp + addl %edi,%edx + xorl %ebx,%esi + rorl $7,%eax + addl %ebp,%edx + addl 32(%rsp),%ecx + xorl %eax,%esi + movl %edx,%edi +.byte 102,15,56,0,243 + roll $5,%edx + addl %esi,%ecx + movups 32(%r15),%xmm0 +.byte 102,15,56,220,209 + xorl %eax,%edi + rorl $7,%ebp + paddd %xmm13,%xmm5 + addl %edx,%ecx + addl 36(%rsp),%ebx + xorl %ebp,%edi + movl %ecx,%esi + movdqa %xmm5,16(%rsp) + roll $5,%ecx + addl %edi,%ebx + xorl %ebp,%esi + rorl $7,%edx + psubd %xmm13,%xmm5 + addl %ecx,%ebx + addl 40(%rsp),%eax + xorl %edx,%esi + movl %ebx,%edi + roll $5,%ebx + addl %esi,%eax + xorl %edx,%edi + rorl $7,%ecx + addl %ebx,%eax + addl 44(%rsp),%ebp + movups 48(%r15),%xmm1 +.byte 102,15,56,220,208 + xorl %ecx,%edi + movl %eax,%esi + roll $5,%eax + addl %edi,%ebp + xorl %ecx,%esi + rorl $7,%ebx + addl %eax,%ebp + addl 48(%rsp),%edx + xorl %ebx,%esi + movl %ebp,%edi +.byte 102,15,56,0,251 + roll $5,%ebp + addl %esi,%edx + xorl %ebx,%edi + rorl $7,%eax + paddd %xmm13,%xmm6 + addl %ebp,%edx + addl 52(%rsp),%ecx + xorl %eax,%edi + movl %edx,%esi + movdqa %xmm6,32(%rsp) + roll $5,%edx + addl %edi,%ecx + cmpl $11,%r8d + jb .Laesenclast4 + movups 64(%r15),%xmm0 +.byte 102,15,56,220,209 + movups 80(%r15),%xmm1 +.byte 102,15,56,220,208 + je .Laesenclast4 + movups 96(%r15),%xmm0 +.byte 102,15,56,220,209 + movups 112(%r15),%xmm1 +.byte 102,15,56,220,208 +.Laesenclast4: +.byte 102,15,56,221,209 + movups 16-112(%r15),%xmm0 + xorl %eax,%esi + rorl $7,%ebp + psubd %xmm13,%xmm6 + addl %edx,%ecx + addl 56(%rsp),%ebx + xorl %ebp,%esi + movl %ecx,%edi + roll $5,%ecx + addl %esi,%ebx + xorl %ebp,%edi + rorl $7,%edx + addl %ecx,%ebx + addl 60(%rsp),%eax + xorl %edx,%edi + movl %ebx,%esi + roll $5,%ebx + addl %edi,%eax + rorl $7,%ecx + addl %ebx,%eax + movups %xmm2,48(%r13,%r12,1) + leaq 64(%r12),%r12 + + addl 0(%r9),%eax + addl 4(%r9),%esi + addl 8(%r9),%ecx + addl 12(%r9),%edx + movl %eax,0(%r9) + addl 16(%r9),%ebp + movl %esi,4(%r9) + movl %esi,%ebx + movl %ecx,8(%r9) + movl %ecx,%edi + movl %edx,12(%r9) + xorl %edx,%edi + movl %ebp,16(%r9) + andl %edi,%esi + jmp .Loop_ssse3 + +.Ldone_ssse3: + addl 16(%rsp),%ebx + xorl %ebp,%esi + movl %ecx,%edi + roll $5,%ecx + addl %esi,%ebx + xorl %ebp,%edi + rorl $7,%edx + addl %ecx,%ebx + addl 20(%rsp),%eax + xorl %edx,%edi + movl %ebx,%esi + roll $5,%ebx + addl %edi,%eax + xorl %edx,%esi + rorl $7,%ecx + addl %ebx,%eax + addl 24(%rsp),%ebp + movups 16(%r15),%xmm1 +.byte 102,15,56,220,208 + xorl %ecx,%esi + movl %eax,%edi + roll $5,%eax + addl %esi,%ebp + xorl %ecx,%edi + rorl $7,%ebx + addl %eax,%ebp + addl 28(%rsp),%edx + xorl %ebx,%edi + movl %ebp,%esi + roll $5,%ebp + addl %edi,%edx + xorl %ebx,%esi + rorl $7,%eax + addl %ebp,%edx + addl 32(%rsp),%ecx + xorl %eax,%esi + movl %edx,%edi + roll $5,%edx + addl %esi,%ecx + movups 32(%r15),%xmm0 +.byte 102,15,56,220,209 + xorl %eax,%edi + rorl $7,%ebp + addl %edx,%ecx + addl 36(%rsp),%ebx + xorl %ebp,%edi + movl %ecx,%esi + roll $5,%ecx + addl %edi,%ebx + xorl %ebp,%esi + rorl $7,%edx + addl %ecx,%ebx + addl 40(%rsp),%eax + xorl %edx,%esi + movl %ebx,%edi + roll $5,%ebx + addl %esi,%eax + xorl %edx,%edi + rorl $7,%ecx + addl %ebx,%eax + addl 44(%rsp),%ebp + movups 48(%r15),%xmm1 +.byte 102,15,56,220,208 + xorl %ecx,%edi + movl %eax,%esi + roll $5,%eax + addl %edi,%ebp + xorl %ecx,%esi + rorl $7,%ebx + addl %eax,%ebp + addl 48(%rsp),%edx + xorl %ebx,%esi + movl %ebp,%edi + roll $5,%ebp + addl %esi,%edx + xorl %ebx,%edi + rorl $7,%eax + addl %ebp,%edx + addl 52(%rsp),%ecx + xorl %eax,%edi + movl %edx,%esi + roll $5,%edx + addl %edi,%ecx + cmpl $11,%r8d + jb .Laesenclast5 + movups 64(%r15),%xmm0 +.byte 102,15,56,220,209 + movups 80(%r15),%xmm1 +.byte 102,15,56,220,208 + je .Laesenclast5 + movups 96(%r15),%xmm0 +.byte 102,15,56,220,209 + movups 112(%r15),%xmm1 +.byte 102,15,56,220,208 +.Laesenclast5: +.byte 102,15,56,221,209 + movups 16-112(%r15),%xmm0 + xorl %eax,%esi + rorl $7,%ebp + addl %edx,%ecx + addl 56(%rsp),%ebx + xorl %ebp,%esi + movl %ecx,%edi + roll $5,%ecx + addl %esi,%ebx + xorl %ebp,%edi + rorl $7,%edx + addl %ecx,%ebx + addl 60(%rsp),%eax + xorl %edx,%edi + movl %ebx,%esi + roll $5,%ebx + addl %edi,%eax + rorl $7,%ecx + addl %ebx,%eax + movups %xmm2,48(%r13,%r12,1) + movq 88(%rsp),%r8 + + addl 0(%r9),%eax + addl 4(%r9),%esi + addl 8(%r9),%ecx + movl %eax,0(%r9) + addl 12(%r9),%edx + movl %esi,4(%r9) + addl 16(%r9),%ebp + movl %ecx,8(%r9) + movl %edx,12(%r9) + movl %ebp,16(%r9) + movups %xmm2,(%r8) + leaq 104(%rsp),%rsi + movq 0(%rsi),%r15 + movq 8(%rsi),%r14 + movq 16(%rsi),%r13 + movq 24(%rsi),%r12 + movq 32(%rsi),%rbp + movq 40(%rsi),%rbx + leaq 48(%rsi),%rsp +.Lepilogue_ssse3: + .byte 0xf3,0xc3 +.size aesni_cbc_sha1_enc_ssse3,.-aesni_cbc_sha1_enc_ssse3 +.align 64 +K_XX_XX: +.long 0x5a827999,0x5a827999,0x5a827999,0x5a827999 +.long 0x6ed9eba1,0x6ed9eba1,0x6ed9eba1,0x6ed9eba1 +.long 0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc +.long 0xca62c1d6,0xca62c1d6,0xca62c1d6,0xca62c1d6 +.long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f +.byte 0xf,0xe,0xd,0xc,0xb,0xa,0x9,0x8,0x7,0x6,0x5,0x4,0x3,0x2,0x1,0x0 + +.byte 65,69,83,78,73,45,67,66,67,43,83,72,65,49,32,115,116,105,116,99,104,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 +.align 64 +.type aesni_cbc_sha1_enc_shaext,@function +.align 32 +aesni_cbc_sha1_enc_shaext: + movq 8(%rsp),%r10 + movdqu (%r9),%xmm8 + movd 16(%r9),%xmm9 + movdqa K_XX_XX+80(%rip),%xmm7 + + movl 240(%rcx),%r11d + subq %rdi,%rsi + movups (%rcx),%xmm15 + movups 16(%rcx),%xmm0 + leaq 112(%rcx),%rcx + + pshufd $27,%xmm8,%xmm8 + pshufd $27,%xmm9,%xmm9 + jmp .Loop_shaext + +.align 16 +.Loop_shaext: + movups 0(%rdi),%xmm14 + xorps %xmm15,%xmm14 + xorps %xmm14,%xmm2 + movups -80(%rcx),%xmm1 +.byte 102,15,56,220,208 + movdqu (%r10),%xmm3 + movdqa %xmm9,%xmm12 +.byte 102,15,56,0,223 + movdqu 16(%r10),%xmm4 + movdqa %xmm8,%xmm11 + movups -64(%rcx),%xmm0 +.byte 102,15,56,220,209 +.byte 102,15,56,0,231 + + paddd %xmm3,%xmm9 + movdqu 32(%r10),%xmm5 + leaq 64(%r10),%r10 + pxor %xmm12,%xmm3 + movups -48(%rcx),%xmm1 +.byte 102,15,56,220,208 + pxor %xmm12,%xmm3 + movdqa %xmm8,%xmm10 +.byte 102,15,56,0,239 +.byte 69,15,58,204,193,0 +.byte 68,15,56,200,212 + movups -32(%rcx),%xmm0 +.byte 102,15,56,220,209 +.byte 15,56,201,220 + movdqu -16(%r10),%xmm6 + movdqa %xmm8,%xmm9 +.byte 102,15,56,0,247 + movups -16(%rcx),%xmm1 +.byte 102,15,56,220,208 +.byte 69,15,58,204,194,0 +.byte 68,15,56,200,205 + pxor %xmm5,%xmm3 +.byte 15,56,201,229 + movups 0(%rcx),%xmm0 +.byte 102,15,56,220,209 + movdqa %xmm8,%xmm10 +.byte 69,15,58,204,193,0 +.byte 68,15,56,200,214 + movups 16(%rcx),%xmm1 +.byte 102,15,56,220,208 +.byte 15,56,202,222 + pxor %xmm6,%xmm4 +.byte 15,56,201,238 + movups 32(%rcx),%xmm0 +.byte 102,15,56,220,209 + movdqa %xmm8,%xmm9 +.byte 69,15,58,204,194,0 +.byte 68,15,56,200,203 + movups 48(%rcx),%xmm1 +.byte 102,15,56,220,208 +.byte 15,56,202,227 + pxor %xmm3,%xmm5 +.byte 15,56,201,243 + cmpl $11,%r11d + jb .Laesenclast6 + movups 64(%rcx),%xmm0 +.byte 102,15,56,220,209 + movups 80(%rcx),%xmm1 +.byte 102,15,56,220,208 + je .Laesenclast6 + movups 96(%rcx),%xmm0 +.byte 102,15,56,220,209 + movups 112(%rcx),%xmm1 +.byte 102,15,56,220,208 +.Laesenclast6: +.byte 102,15,56,221,209 + movups 16-112(%rcx),%xmm0 + movdqa %xmm8,%xmm10 +.byte 69,15,58,204,193,0 +.byte 68,15,56,200,212 + movups 16(%rdi),%xmm14 + xorps %xmm15,%xmm14 + movups %xmm2,0(%rsi,%rdi,1) + xorps %xmm14,%xmm2 + movups -80(%rcx),%xmm1 +.byte 102,15,56,220,208 +.byte 15,56,202,236 + pxor %xmm4,%xmm6 +.byte 15,56,201,220 + movups -64(%rcx),%xmm0 +.byte 102,15,56,220,209 + movdqa %xmm8,%xmm9 +.byte 69,15,58,204,194,1 +.byte 68,15,56,200,205 + movups -48(%rcx),%xmm1 +.byte 102,15,56,220,208 +.byte 15,56,202,245 + pxor %xmm5,%xmm3 +.byte 15,56,201,229 + movups -32(%rcx),%xmm0 +.byte 102,15,56,220,209 + movdqa %xmm8,%xmm10 +.byte 69,15,58,204,193,1 +.byte 68,15,56,200,214 + movups -16(%rcx),%xmm1 +.byte 102,15,56,220,208 +.byte 15,56,202,222 + pxor %xmm6,%xmm4 +.byte 15,56,201,238 + movups 0(%rcx),%xmm0 +.byte 102,15,56,220,209 + movdqa %xmm8,%xmm9 +.byte 69,15,58,204,194,1 +.byte 68,15,56,200,203 + movups 16(%rcx),%xmm1 +.byte 102,15,56,220,208 +.byte 15,56,202,227 + pxor %xmm3,%xmm5 +.byte 15,56,201,243 + movups 32(%rcx),%xmm0 +.byte 102,15,56,220,209 + movdqa %xmm8,%xmm10 +.byte 69,15,58,204,193,1 +.byte 68,15,56,200,212 + movups 48(%rcx),%xmm1 +.byte 102,15,56,220,208 +.byte 15,56,202,236 + pxor %xmm4,%xmm6 +.byte 15,56,201,220 + cmpl $11,%r11d + jb .Laesenclast7 + movups 64(%rcx),%xmm0 +.byte 102,15,56,220,209 + movups 80(%rcx),%xmm1 +.byte 102,15,56,220,208 + je .Laesenclast7 + movups 96(%rcx),%xmm0 +.byte 102,15,56,220,209 + movups 112(%rcx),%xmm1 +.byte 102,15,56,220,208 +.Laesenclast7: +.byte 102,15,56,221,209 + movups 16-112(%rcx),%xmm0 + movdqa %xmm8,%xmm9 +.byte 69,15,58,204,194,1 +.byte 68,15,56,200,205 + movups 32(%rdi),%xmm14 + xorps %xmm15,%xmm14 + movups %xmm2,16(%rsi,%rdi,1) + xorps %xmm14,%xmm2 + movups -80(%rcx),%xmm1 +.byte 102,15,56,220,208 +.byte 15,56,202,245 + pxor %xmm5,%xmm3 +.byte 15,56,201,229 + movups -64(%rcx),%xmm0 +.byte 102,15,56,220,209 + movdqa %xmm8,%xmm10 +.byte 69,15,58,204,193,2 +.byte 68,15,56,200,214 + movups -48(%rcx),%xmm1 +.byte 102,15,56,220,208 +.byte 15,56,202,222 + pxor %xmm6,%xmm4 +.byte 15,56,201,238 + movups -32(%rcx),%xmm0 +.byte 102,15,56,220,209 + movdqa %xmm8,%xmm9 +.byte 69,15,58,204,194,2 +.byte 68,15,56,200,203 + movups -16(%rcx),%xmm1 +.byte 102,15,56,220,208 +.byte 15,56,202,227 + pxor %xmm3,%xmm5 +.byte 15,56,201,243 + movups 0(%rcx),%xmm0 +.byte 102,15,56,220,209 + movdqa %xmm8,%xmm10 +.byte 69,15,58,204,193,2 +.byte 68,15,56,200,212 + movups 16(%rcx),%xmm1 +.byte 102,15,56,220,208 +.byte 15,56,202,236 + pxor %xmm4,%xmm6 +.byte 15,56,201,220 + movups 32(%rcx),%xmm0 +.byte 102,15,56,220,209 + movdqa %xmm8,%xmm9 +.byte 69,15,58,204,194,2 +.byte 68,15,56,200,205 + movups 48(%rcx),%xmm1 +.byte 102,15,56,220,208 +.byte 15,56,202,245 + pxor %xmm5,%xmm3 +.byte 15,56,201,229 + cmpl $11,%r11d + jb .Laesenclast8 + movups 64(%rcx),%xmm0 +.byte 102,15,56,220,209 + movups 80(%rcx),%xmm1 +.byte 102,15,56,220,208 + je .Laesenclast8 + movups 96(%rcx),%xmm0 +.byte 102,15,56,220,209 + movups 112(%rcx),%xmm1 +.byte 102,15,56,220,208 +.Laesenclast8: +.byte 102,15,56,221,209 + movups 16-112(%rcx),%xmm0 + movdqa %xmm8,%xmm10 +.byte 69,15,58,204,193,2 +.byte 68,15,56,200,214 + movups 48(%rdi),%xmm14 + xorps %xmm15,%xmm14 + movups %xmm2,32(%rsi,%rdi,1) + xorps %xmm14,%xmm2 + movups -80(%rcx),%xmm1 +.byte 102,15,56,220,208 +.byte 15,56,202,222 + pxor %xmm6,%xmm4 +.byte 15,56,201,238 + movups -64(%rcx),%xmm0 +.byte 102,15,56,220,209 + movdqa %xmm8,%xmm9 +.byte 69,15,58,204,194,3 +.byte 68,15,56,200,203 + movups -48(%rcx),%xmm1 +.byte 102,15,56,220,208 +.byte 15,56,202,227 + pxor %xmm3,%xmm5 +.byte 15,56,201,243 + movups -32(%rcx),%xmm0 +.byte 102,15,56,220,209 + movdqa %xmm8,%xmm10 +.byte 69,15,58,204,193,3 +.byte 68,15,56,200,212 +.byte 15,56,202,236 + pxor %xmm4,%xmm6 + movups -16(%rcx),%xmm1 +.byte 102,15,56,220,208 + movdqa %xmm8,%xmm9 +.byte 69,15,58,204,194,3 +.byte 68,15,56,200,205 +.byte 15,56,202,245 + movups 0(%rcx),%xmm0 +.byte 102,15,56,220,209 + movdqa %xmm12,%xmm5 + movdqa %xmm8,%xmm10 +.byte 69,15,58,204,193,3 +.byte 68,15,56,200,214 + movups 16(%rcx),%xmm1 +.byte 102,15,56,220,208 + movdqa %xmm8,%xmm9 +.byte 69,15,58,204,194,3 +.byte 68,15,56,200,205 + movups 32(%rcx),%xmm0 +.byte 102,15,56,220,209 + movups 48(%rcx),%xmm1 +.byte 102,15,56,220,208 + cmpl $11,%r11d + jb .Laesenclast9 + movups 64(%rcx),%xmm0 +.byte 102,15,56,220,209 + movups 80(%rcx),%xmm1 +.byte 102,15,56,220,208 + je .Laesenclast9 + movups 96(%rcx),%xmm0 +.byte 102,15,56,220,209 + movups 112(%rcx),%xmm1 +.byte 102,15,56,220,208 +.Laesenclast9: +.byte 102,15,56,221,209 + movups 16-112(%rcx),%xmm0 + decq %rdx + + paddd %xmm11,%xmm8 + movups %xmm2,48(%rsi,%rdi,1) + leaq 64(%rdi),%rdi + jnz .Loop_shaext + + pshufd $27,%xmm8,%xmm8 + pshufd $27,%xmm9,%xmm9 + movups %xmm2,(%r8) + movdqu %xmm8,(%r9) + movd %xmm9,16(%r9) + .byte 0xf3,0xc3 +.size aesni_cbc_sha1_enc_shaext,.-aesni_cbc_sha1_enc_shaext diff --git a/deps/openssl/asm_obsolete/x64-elf-gas/aes/aesni-sha256-x86_64.s b/deps/openssl/asm_obsolete/x64-elf-gas/aes/aesni-sha256-x86_64.s new file mode 100644 index 00000000000000..2c85f62495fe5e --- /dev/null +++ b/deps/openssl/asm_obsolete/x64-elf-gas/aes/aesni-sha256-x86_64.s @@ -0,0 +1,57 @@ +.text + + +.globl aesni_cbc_sha256_enc +.type aesni_cbc_sha256_enc,@function +.align 16 +aesni_cbc_sha256_enc: + xorl %eax,%eax + cmpq $0,%rdi + je .Lprobe + ud2 +.Lprobe: + .byte 0xf3,0xc3 +.size aesni_cbc_sha256_enc,.-aesni_cbc_sha256_enc + +.align 64 +.type K256,@object +K256: +.long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 +.long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 +.long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 +.long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 +.long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 +.long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 +.long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 +.long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 +.long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc +.long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc +.long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da +.long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da +.long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 +.long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 +.long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967 +.long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967 +.long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 +.long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 +.long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 +.long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 +.long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 +.long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 +.long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070 +.long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070 +.long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 +.long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 +.long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 +.long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 +.long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 +.long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 +.long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 +.long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 + +.long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f +.long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f +.long 0,0,0,0, 0,0,0,0, -1,-1,-1,-1 +.long 0,0,0,0, 0,0,0,0 +.byte 65,69,83,78,73,45,67,66,67,43,83,72,65,50,53,54,32,115,116,105,116,99,104,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 +.align 64 diff --git a/deps/openssl/asm_obsolete/x64-elf-gas/aes/aesni-x86_64.s b/deps/openssl/asm_obsolete/x64-elf-gas/aes/aesni-x86_64.s new file mode 100644 index 00000000000000..84708afbbb352f --- /dev/null +++ b/deps/openssl/asm_obsolete/x64-elf-gas/aes/aesni-x86_64.s @@ -0,0 +1,3165 @@ +.text + +.globl aesni_encrypt +.type aesni_encrypt,@function +.align 16 +aesni_encrypt: + movups (%rdi),%xmm2 + movl 240(%rdx),%eax + movups (%rdx),%xmm0 + movups 16(%rdx),%xmm1 + leaq 32(%rdx),%rdx + xorps %xmm0,%xmm2 +.Loop_enc1_1: +.byte 102,15,56,220,209 + decl %eax + movups (%rdx),%xmm1 + leaq 16(%rdx),%rdx + jnz .Loop_enc1_1 +.byte 102,15,56,221,209 + movups %xmm2,(%rsi) + .byte 0xf3,0xc3 +.size aesni_encrypt,.-aesni_encrypt + +.globl aesni_decrypt +.type aesni_decrypt,@function +.align 16 +aesni_decrypt: + movups (%rdi),%xmm2 + movl 240(%rdx),%eax + movups (%rdx),%xmm0 + movups 16(%rdx),%xmm1 + leaq 32(%rdx),%rdx + xorps %xmm0,%xmm2 +.Loop_dec1_2: +.byte 102,15,56,222,209 + decl %eax + movups (%rdx),%xmm1 + leaq 16(%rdx),%rdx + jnz .Loop_dec1_2 +.byte 102,15,56,223,209 + movups %xmm2,(%rsi) + .byte 0xf3,0xc3 +.size aesni_decrypt, .-aesni_decrypt +.type _aesni_encrypt2,@function +.align 16 +_aesni_encrypt2: + movups (%rcx),%xmm0 + shll $4,%eax + movups 16(%rcx),%xmm1 + xorps %xmm0,%xmm2 + xorps %xmm0,%xmm3 + movups 32(%rcx),%xmm0 + leaq 32(%rcx,%rax,1),%rcx + negq %rax + addq $16,%rax + +.Lenc_loop2: +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 + movups (%rcx,%rax,1),%xmm1 + addq $32,%rax +.byte 102,15,56,220,208 +.byte 102,15,56,220,216 + movups -16(%rcx,%rax,1),%xmm0 + jnz .Lenc_loop2 + +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 +.byte 102,15,56,221,208 +.byte 102,15,56,221,216 + .byte 0xf3,0xc3 +.size _aesni_encrypt2,.-_aesni_encrypt2 +.type _aesni_decrypt2,@function +.align 16 +_aesni_decrypt2: + movups (%rcx),%xmm0 + shll $4,%eax + movups 16(%rcx),%xmm1 + xorps %xmm0,%xmm2 + xorps %xmm0,%xmm3 + movups 32(%rcx),%xmm0 + leaq 32(%rcx,%rax,1),%rcx + negq %rax + addq $16,%rax + +.Ldec_loop2: +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 + movups (%rcx,%rax,1),%xmm1 + addq $32,%rax +.byte 102,15,56,222,208 +.byte 102,15,56,222,216 + movups -16(%rcx,%rax,1),%xmm0 + jnz .Ldec_loop2 + +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 +.byte 102,15,56,223,208 +.byte 102,15,56,223,216 + .byte 0xf3,0xc3 +.size _aesni_decrypt2,.-_aesni_decrypt2 +.type _aesni_encrypt3,@function +.align 16 +_aesni_encrypt3: + movups (%rcx),%xmm0 + shll $4,%eax + movups 16(%rcx),%xmm1 + xorps %xmm0,%xmm2 + xorps %xmm0,%xmm3 + xorps %xmm0,%xmm4 + movups 32(%rcx),%xmm0 + leaq 32(%rcx,%rax,1),%rcx + negq %rax + addq $16,%rax + +.Lenc_loop3: +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 +.byte 102,15,56,220,225 + movups (%rcx,%rax,1),%xmm1 + addq $32,%rax +.byte 102,15,56,220,208 +.byte 102,15,56,220,216 +.byte 102,15,56,220,224 + movups -16(%rcx,%rax,1),%xmm0 + jnz .Lenc_loop3 + +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 +.byte 102,15,56,220,225 +.byte 102,15,56,221,208 +.byte 102,15,56,221,216 +.byte 102,15,56,221,224 + .byte 0xf3,0xc3 +.size _aesni_encrypt3,.-_aesni_encrypt3 +.type _aesni_decrypt3,@function +.align 16 +_aesni_decrypt3: + movups (%rcx),%xmm0 + shll $4,%eax + movups 16(%rcx),%xmm1 + xorps %xmm0,%xmm2 + xorps %xmm0,%xmm3 + xorps %xmm0,%xmm4 + movups 32(%rcx),%xmm0 + leaq 32(%rcx,%rax,1),%rcx + negq %rax + addq $16,%rax + +.Ldec_loop3: +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 +.byte 102,15,56,222,225 + movups (%rcx,%rax,1),%xmm1 + addq $32,%rax +.byte 102,15,56,222,208 +.byte 102,15,56,222,216 +.byte 102,15,56,222,224 + movups -16(%rcx,%rax,1),%xmm0 + jnz .Ldec_loop3 + +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 +.byte 102,15,56,222,225 +.byte 102,15,56,223,208 +.byte 102,15,56,223,216 +.byte 102,15,56,223,224 + .byte 0xf3,0xc3 +.size _aesni_decrypt3,.-_aesni_decrypt3 +.type _aesni_encrypt4,@function +.align 16 +_aesni_encrypt4: + movups (%rcx),%xmm0 + shll $4,%eax + movups 16(%rcx),%xmm1 + xorps %xmm0,%xmm2 + xorps %xmm0,%xmm3 + xorps %xmm0,%xmm4 + xorps %xmm0,%xmm5 + movups 32(%rcx),%xmm0 + leaq 32(%rcx,%rax,1),%rcx + negq %rax +.byte 0x0f,0x1f,0x00 + addq $16,%rax + +.Lenc_loop4: +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 + movups (%rcx,%rax,1),%xmm1 + addq $32,%rax +.byte 102,15,56,220,208 +.byte 102,15,56,220,216 +.byte 102,15,56,220,224 +.byte 102,15,56,220,232 + movups -16(%rcx,%rax,1),%xmm0 + jnz .Lenc_loop4 + +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 +.byte 102,15,56,221,208 +.byte 102,15,56,221,216 +.byte 102,15,56,221,224 +.byte 102,15,56,221,232 + .byte 0xf3,0xc3 +.size _aesni_encrypt4,.-_aesni_encrypt4 +.type _aesni_decrypt4,@function +.align 16 +_aesni_decrypt4: + movups (%rcx),%xmm0 + shll $4,%eax + movups 16(%rcx),%xmm1 + xorps %xmm0,%xmm2 + xorps %xmm0,%xmm3 + xorps %xmm0,%xmm4 + xorps %xmm0,%xmm5 + movups 32(%rcx),%xmm0 + leaq 32(%rcx,%rax,1),%rcx + negq %rax +.byte 0x0f,0x1f,0x00 + addq $16,%rax + +.Ldec_loop4: +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 +.byte 102,15,56,222,225 +.byte 102,15,56,222,233 + movups (%rcx,%rax,1),%xmm1 + addq $32,%rax +.byte 102,15,56,222,208 +.byte 102,15,56,222,216 +.byte 102,15,56,222,224 +.byte 102,15,56,222,232 + movups -16(%rcx,%rax,1),%xmm0 + jnz .Ldec_loop4 + +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 +.byte 102,15,56,222,225 +.byte 102,15,56,222,233 +.byte 102,15,56,223,208 +.byte 102,15,56,223,216 +.byte 102,15,56,223,224 +.byte 102,15,56,223,232 + .byte 0xf3,0xc3 +.size _aesni_decrypt4,.-_aesni_decrypt4 +.type _aesni_encrypt6,@function +.align 16 +_aesni_encrypt6: + movups (%rcx),%xmm0 + shll $4,%eax + movups 16(%rcx),%xmm1 + xorps %xmm0,%xmm2 + pxor %xmm0,%xmm3 + pxor %xmm0,%xmm4 +.byte 102,15,56,220,209 + leaq 32(%rcx,%rax,1),%rcx + negq %rax +.byte 102,15,56,220,217 + pxor %xmm0,%xmm5 + pxor %xmm0,%xmm6 +.byte 102,15,56,220,225 + pxor %xmm0,%xmm7 + addq $16,%rax +.byte 102,15,56,220,233 +.byte 102,15,56,220,241 +.byte 102,15,56,220,249 + movups -16(%rcx,%rax,1),%xmm0 + jmp .Lenc_loop6_enter +.align 16 +.Lenc_loop6: +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 +.byte 102,15,56,220,241 +.byte 102,15,56,220,249 +.Lenc_loop6_enter: + movups (%rcx,%rax,1),%xmm1 + addq $32,%rax +.byte 102,15,56,220,208 +.byte 102,15,56,220,216 +.byte 102,15,56,220,224 +.byte 102,15,56,220,232 +.byte 102,15,56,220,240 +.byte 102,15,56,220,248 + movups -16(%rcx,%rax,1),%xmm0 + jnz .Lenc_loop6 + +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 +.byte 102,15,56,220,241 +.byte 102,15,56,220,249 +.byte 102,15,56,221,208 +.byte 102,15,56,221,216 +.byte 102,15,56,221,224 +.byte 102,15,56,221,232 +.byte 102,15,56,221,240 +.byte 102,15,56,221,248 + .byte 0xf3,0xc3 +.size _aesni_encrypt6,.-_aesni_encrypt6 +.type _aesni_decrypt6,@function +.align 16 +_aesni_decrypt6: + movups (%rcx),%xmm0 + shll $4,%eax + movups 16(%rcx),%xmm1 + xorps %xmm0,%xmm2 + pxor %xmm0,%xmm3 + pxor %xmm0,%xmm4 +.byte 102,15,56,222,209 + leaq 32(%rcx,%rax,1),%rcx + negq %rax +.byte 102,15,56,222,217 + pxor %xmm0,%xmm5 + pxor %xmm0,%xmm6 +.byte 102,15,56,222,225 + pxor %xmm0,%xmm7 + addq $16,%rax +.byte 102,15,56,222,233 +.byte 102,15,56,222,241 +.byte 102,15,56,222,249 + movups -16(%rcx,%rax,1),%xmm0 + jmp .Ldec_loop6_enter +.align 16 +.Ldec_loop6: +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 +.byte 102,15,56,222,225 +.byte 102,15,56,222,233 +.byte 102,15,56,222,241 +.byte 102,15,56,222,249 +.Ldec_loop6_enter: + movups (%rcx,%rax,1),%xmm1 + addq $32,%rax +.byte 102,15,56,222,208 +.byte 102,15,56,222,216 +.byte 102,15,56,222,224 +.byte 102,15,56,222,232 +.byte 102,15,56,222,240 +.byte 102,15,56,222,248 + movups -16(%rcx,%rax,1),%xmm0 + jnz .Ldec_loop6 + +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 +.byte 102,15,56,222,225 +.byte 102,15,56,222,233 +.byte 102,15,56,222,241 +.byte 102,15,56,222,249 +.byte 102,15,56,223,208 +.byte 102,15,56,223,216 +.byte 102,15,56,223,224 +.byte 102,15,56,223,232 +.byte 102,15,56,223,240 +.byte 102,15,56,223,248 + .byte 0xf3,0xc3 +.size _aesni_decrypt6,.-_aesni_decrypt6 +.type _aesni_encrypt8,@function +.align 16 +_aesni_encrypt8: + movups (%rcx),%xmm0 + shll $4,%eax + movups 16(%rcx),%xmm1 + xorps %xmm0,%xmm2 + xorps %xmm0,%xmm3 + pxor %xmm0,%xmm4 + pxor %xmm0,%xmm5 + pxor %xmm0,%xmm6 + leaq 32(%rcx,%rax,1),%rcx + negq %rax +.byte 102,15,56,220,209 + addq $16,%rax + pxor %xmm0,%xmm7 +.byte 102,15,56,220,217 + pxor %xmm0,%xmm8 + pxor %xmm0,%xmm9 +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 +.byte 102,15,56,220,241 +.byte 102,15,56,220,249 +.byte 102,68,15,56,220,193 +.byte 102,68,15,56,220,201 + movups -16(%rcx,%rax,1),%xmm0 + jmp .Lenc_loop8_enter +.align 16 +.Lenc_loop8: +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 +.byte 102,15,56,220,241 +.byte 102,15,56,220,249 +.byte 102,68,15,56,220,193 +.byte 102,68,15,56,220,201 +.Lenc_loop8_enter: + movups (%rcx,%rax,1),%xmm1 + addq $32,%rax +.byte 102,15,56,220,208 +.byte 102,15,56,220,216 +.byte 102,15,56,220,224 +.byte 102,15,56,220,232 +.byte 102,15,56,220,240 +.byte 102,15,56,220,248 +.byte 102,68,15,56,220,192 +.byte 102,68,15,56,220,200 + movups -16(%rcx,%rax,1),%xmm0 + jnz .Lenc_loop8 + +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 +.byte 102,15,56,220,241 +.byte 102,15,56,220,249 +.byte 102,68,15,56,220,193 +.byte 102,68,15,56,220,201 +.byte 102,15,56,221,208 +.byte 102,15,56,221,216 +.byte 102,15,56,221,224 +.byte 102,15,56,221,232 +.byte 102,15,56,221,240 +.byte 102,15,56,221,248 +.byte 102,68,15,56,221,192 +.byte 102,68,15,56,221,200 + .byte 0xf3,0xc3 +.size _aesni_encrypt8,.-_aesni_encrypt8 +.type _aesni_decrypt8,@function +.align 16 +_aesni_decrypt8: + movups (%rcx),%xmm0 + shll $4,%eax + movups 16(%rcx),%xmm1 + xorps %xmm0,%xmm2 + xorps %xmm0,%xmm3 + pxor %xmm0,%xmm4 + pxor %xmm0,%xmm5 + pxor %xmm0,%xmm6 + leaq 32(%rcx,%rax,1),%rcx + negq %rax +.byte 102,15,56,222,209 + addq $16,%rax + pxor %xmm0,%xmm7 +.byte 102,15,56,222,217 + pxor %xmm0,%xmm8 + pxor %xmm0,%xmm9 +.byte 102,15,56,222,225 +.byte 102,15,56,222,233 +.byte 102,15,56,222,241 +.byte 102,15,56,222,249 +.byte 102,68,15,56,222,193 +.byte 102,68,15,56,222,201 + movups -16(%rcx,%rax,1),%xmm0 + jmp .Ldec_loop8_enter +.align 16 +.Ldec_loop8: +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 +.byte 102,15,56,222,225 +.byte 102,15,56,222,233 +.byte 102,15,56,222,241 +.byte 102,15,56,222,249 +.byte 102,68,15,56,222,193 +.byte 102,68,15,56,222,201 +.Ldec_loop8_enter: + movups (%rcx,%rax,1),%xmm1 + addq $32,%rax +.byte 102,15,56,222,208 +.byte 102,15,56,222,216 +.byte 102,15,56,222,224 +.byte 102,15,56,222,232 +.byte 102,15,56,222,240 +.byte 102,15,56,222,248 +.byte 102,68,15,56,222,192 +.byte 102,68,15,56,222,200 + movups -16(%rcx,%rax,1),%xmm0 + jnz .Ldec_loop8 + +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 +.byte 102,15,56,222,225 +.byte 102,15,56,222,233 +.byte 102,15,56,222,241 +.byte 102,15,56,222,249 +.byte 102,68,15,56,222,193 +.byte 102,68,15,56,222,201 +.byte 102,15,56,223,208 +.byte 102,15,56,223,216 +.byte 102,15,56,223,224 +.byte 102,15,56,223,232 +.byte 102,15,56,223,240 +.byte 102,15,56,223,248 +.byte 102,68,15,56,223,192 +.byte 102,68,15,56,223,200 + .byte 0xf3,0xc3 +.size _aesni_decrypt8,.-_aesni_decrypt8 +.globl aesni_ecb_encrypt +.type aesni_ecb_encrypt,@function +.align 16 +aesni_ecb_encrypt: + andq $-16,%rdx + jz .Lecb_ret + + movl 240(%rcx),%eax + movups (%rcx),%xmm0 + movq %rcx,%r11 + movl %eax,%r10d + testl %r8d,%r8d + jz .Lecb_decrypt + + cmpq $128,%rdx + jb .Lecb_enc_tail + + movdqu (%rdi),%xmm2 + movdqu 16(%rdi),%xmm3 + movdqu 32(%rdi),%xmm4 + movdqu 48(%rdi),%xmm5 + movdqu 64(%rdi),%xmm6 + movdqu 80(%rdi),%xmm7 + movdqu 96(%rdi),%xmm8 + movdqu 112(%rdi),%xmm9 + leaq 128(%rdi),%rdi + subq $128,%rdx + jmp .Lecb_enc_loop8_enter +.align 16 +.Lecb_enc_loop8: + movups %xmm2,(%rsi) + movq %r11,%rcx + movdqu (%rdi),%xmm2 + movl %r10d,%eax + movups %xmm3,16(%rsi) + movdqu 16(%rdi),%xmm3 + movups %xmm4,32(%rsi) + movdqu 32(%rdi),%xmm4 + movups %xmm5,48(%rsi) + movdqu 48(%rdi),%xmm5 + movups %xmm6,64(%rsi) + movdqu 64(%rdi),%xmm6 + movups %xmm7,80(%rsi) + movdqu 80(%rdi),%xmm7 + movups %xmm8,96(%rsi) + movdqu 96(%rdi),%xmm8 + movups %xmm9,112(%rsi) + leaq 128(%rsi),%rsi + movdqu 112(%rdi),%xmm9 + leaq 128(%rdi),%rdi +.Lecb_enc_loop8_enter: + + call _aesni_encrypt8 + + subq $128,%rdx + jnc .Lecb_enc_loop8 + + movups %xmm2,(%rsi) + movq %r11,%rcx + movups %xmm3,16(%rsi) + movl %r10d,%eax + movups %xmm4,32(%rsi) + movups %xmm5,48(%rsi) + movups %xmm6,64(%rsi) + movups %xmm7,80(%rsi) + movups %xmm8,96(%rsi) + movups %xmm9,112(%rsi) + leaq 128(%rsi),%rsi + addq $128,%rdx + jz .Lecb_ret + +.Lecb_enc_tail: + movups (%rdi),%xmm2 + cmpq $32,%rdx + jb .Lecb_enc_one + movups 16(%rdi),%xmm3 + je .Lecb_enc_two + movups 32(%rdi),%xmm4 + cmpq $64,%rdx + jb .Lecb_enc_three + movups 48(%rdi),%xmm5 + je .Lecb_enc_four + movups 64(%rdi),%xmm6 + cmpq $96,%rdx + jb .Lecb_enc_five + movups 80(%rdi),%xmm7 + je .Lecb_enc_six + movdqu 96(%rdi),%xmm8 + call _aesni_encrypt8 + movups %xmm2,(%rsi) + movups %xmm3,16(%rsi) + movups %xmm4,32(%rsi) + movups %xmm5,48(%rsi) + movups %xmm6,64(%rsi) + movups %xmm7,80(%rsi) + movups %xmm8,96(%rsi) + jmp .Lecb_ret +.align 16 +.Lecb_enc_one: + movups (%rcx),%xmm0 + movups 16(%rcx),%xmm1 + leaq 32(%rcx),%rcx + xorps %xmm0,%xmm2 +.Loop_enc1_3: +.byte 102,15,56,220,209 + decl %eax + movups (%rcx),%xmm1 + leaq 16(%rcx),%rcx + jnz .Loop_enc1_3 +.byte 102,15,56,221,209 + movups %xmm2,(%rsi) + jmp .Lecb_ret +.align 16 +.Lecb_enc_two: + call _aesni_encrypt2 + movups %xmm2,(%rsi) + movups %xmm3,16(%rsi) + jmp .Lecb_ret +.align 16 +.Lecb_enc_three: + call _aesni_encrypt3 + movups %xmm2,(%rsi) + movups %xmm3,16(%rsi) + movups %xmm4,32(%rsi) + jmp .Lecb_ret +.align 16 +.Lecb_enc_four: + call _aesni_encrypt4 + movups %xmm2,(%rsi) + movups %xmm3,16(%rsi) + movups %xmm4,32(%rsi) + movups %xmm5,48(%rsi) + jmp .Lecb_ret +.align 16 +.Lecb_enc_five: + xorps %xmm7,%xmm7 + call _aesni_encrypt6 + movups %xmm2,(%rsi) + movups %xmm3,16(%rsi) + movups %xmm4,32(%rsi) + movups %xmm5,48(%rsi) + movups %xmm6,64(%rsi) + jmp .Lecb_ret +.align 16 +.Lecb_enc_six: + call _aesni_encrypt6 + movups %xmm2,(%rsi) + movups %xmm3,16(%rsi) + movups %xmm4,32(%rsi) + movups %xmm5,48(%rsi) + movups %xmm6,64(%rsi) + movups %xmm7,80(%rsi) + jmp .Lecb_ret + +.align 16 +.Lecb_decrypt: + cmpq $128,%rdx + jb .Lecb_dec_tail + + movdqu (%rdi),%xmm2 + movdqu 16(%rdi),%xmm3 + movdqu 32(%rdi),%xmm4 + movdqu 48(%rdi),%xmm5 + movdqu 64(%rdi),%xmm6 + movdqu 80(%rdi),%xmm7 + movdqu 96(%rdi),%xmm8 + movdqu 112(%rdi),%xmm9 + leaq 128(%rdi),%rdi + subq $128,%rdx + jmp .Lecb_dec_loop8_enter +.align 16 +.Lecb_dec_loop8: + movups %xmm2,(%rsi) + movq %r11,%rcx + movdqu (%rdi),%xmm2 + movl %r10d,%eax + movups %xmm3,16(%rsi) + movdqu 16(%rdi),%xmm3 + movups %xmm4,32(%rsi) + movdqu 32(%rdi),%xmm4 + movups %xmm5,48(%rsi) + movdqu 48(%rdi),%xmm5 + movups %xmm6,64(%rsi) + movdqu 64(%rdi),%xmm6 + movups %xmm7,80(%rsi) + movdqu 80(%rdi),%xmm7 + movups %xmm8,96(%rsi) + movdqu 96(%rdi),%xmm8 + movups %xmm9,112(%rsi) + leaq 128(%rsi),%rsi + movdqu 112(%rdi),%xmm9 + leaq 128(%rdi),%rdi +.Lecb_dec_loop8_enter: + + call _aesni_decrypt8 + + movups (%r11),%xmm0 + subq $128,%rdx + jnc .Lecb_dec_loop8 + + movups %xmm2,(%rsi) + movq %r11,%rcx + movups %xmm3,16(%rsi) + movl %r10d,%eax + movups %xmm4,32(%rsi) + movups %xmm5,48(%rsi) + movups %xmm6,64(%rsi) + movups %xmm7,80(%rsi) + movups %xmm8,96(%rsi) + movups %xmm9,112(%rsi) + leaq 128(%rsi),%rsi + addq $128,%rdx + jz .Lecb_ret + +.Lecb_dec_tail: + movups (%rdi),%xmm2 + cmpq $32,%rdx + jb .Lecb_dec_one + movups 16(%rdi),%xmm3 + je .Lecb_dec_two + movups 32(%rdi),%xmm4 + cmpq $64,%rdx + jb .Lecb_dec_three + movups 48(%rdi),%xmm5 + je .Lecb_dec_four + movups 64(%rdi),%xmm6 + cmpq $96,%rdx + jb .Lecb_dec_five + movups 80(%rdi),%xmm7 + je .Lecb_dec_six + movups 96(%rdi),%xmm8 + movups (%rcx),%xmm0 + call _aesni_decrypt8 + movups %xmm2,(%rsi) + movups %xmm3,16(%rsi) + movups %xmm4,32(%rsi) + movups %xmm5,48(%rsi) + movups %xmm6,64(%rsi) + movups %xmm7,80(%rsi) + movups %xmm8,96(%rsi) + jmp .Lecb_ret +.align 16 +.Lecb_dec_one: + movups (%rcx),%xmm0 + movups 16(%rcx),%xmm1 + leaq 32(%rcx),%rcx + xorps %xmm0,%xmm2 +.Loop_dec1_4: +.byte 102,15,56,222,209 + decl %eax + movups (%rcx),%xmm1 + leaq 16(%rcx),%rcx + jnz .Loop_dec1_4 +.byte 102,15,56,223,209 + movups %xmm2,(%rsi) + jmp .Lecb_ret +.align 16 +.Lecb_dec_two: + call _aesni_decrypt2 + movups %xmm2,(%rsi) + movups %xmm3,16(%rsi) + jmp .Lecb_ret +.align 16 +.Lecb_dec_three: + call _aesni_decrypt3 + movups %xmm2,(%rsi) + movups %xmm3,16(%rsi) + movups %xmm4,32(%rsi) + jmp .Lecb_ret +.align 16 +.Lecb_dec_four: + call _aesni_decrypt4 + movups %xmm2,(%rsi) + movups %xmm3,16(%rsi) + movups %xmm4,32(%rsi) + movups %xmm5,48(%rsi) + jmp .Lecb_ret +.align 16 +.Lecb_dec_five: + xorps %xmm7,%xmm7 + call _aesni_decrypt6 + movups %xmm2,(%rsi) + movups %xmm3,16(%rsi) + movups %xmm4,32(%rsi) + movups %xmm5,48(%rsi) + movups %xmm6,64(%rsi) + jmp .Lecb_ret +.align 16 +.Lecb_dec_six: + call _aesni_decrypt6 + movups %xmm2,(%rsi) + movups %xmm3,16(%rsi) + movups %xmm4,32(%rsi) + movups %xmm5,48(%rsi) + movups %xmm6,64(%rsi) + movups %xmm7,80(%rsi) + +.Lecb_ret: + .byte 0xf3,0xc3 +.size aesni_ecb_encrypt,.-aesni_ecb_encrypt +.globl aesni_ccm64_encrypt_blocks +.type aesni_ccm64_encrypt_blocks,@function +.align 16 +aesni_ccm64_encrypt_blocks: + movl 240(%rcx),%eax + movdqu (%r8),%xmm6 + movdqa .Lincrement64(%rip),%xmm9 + movdqa .Lbswap_mask(%rip),%xmm7 + + shll $4,%eax + movl $16,%r10d + leaq 0(%rcx),%r11 + movdqu (%r9),%xmm3 + movdqa %xmm6,%xmm2 + leaq 32(%rcx,%rax,1),%rcx +.byte 102,15,56,0,247 + subq %rax,%r10 + jmp .Lccm64_enc_outer +.align 16 +.Lccm64_enc_outer: + movups (%r11),%xmm0 + movq %r10,%rax + movups (%rdi),%xmm8 + + xorps %xmm0,%xmm2 + movups 16(%r11),%xmm1 + xorps %xmm8,%xmm0 + xorps %xmm0,%xmm3 + movups 32(%r11),%xmm0 + +.Lccm64_enc2_loop: +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 + movups (%rcx,%rax,1),%xmm1 + addq $32,%rax +.byte 102,15,56,220,208 +.byte 102,15,56,220,216 + movups -16(%rcx,%rax,1),%xmm0 + jnz .Lccm64_enc2_loop +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 + paddq %xmm9,%xmm6 + decq %rdx +.byte 102,15,56,221,208 +.byte 102,15,56,221,216 + + leaq 16(%rdi),%rdi + xorps %xmm2,%xmm8 + movdqa %xmm6,%xmm2 + movups %xmm8,(%rsi) +.byte 102,15,56,0,215 + leaq 16(%rsi),%rsi + jnz .Lccm64_enc_outer + + movups %xmm3,(%r9) + .byte 0xf3,0xc3 +.size aesni_ccm64_encrypt_blocks,.-aesni_ccm64_encrypt_blocks +.globl aesni_ccm64_decrypt_blocks +.type aesni_ccm64_decrypt_blocks,@function +.align 16 +aesni_ccm64_decrypt_blocks: + movl 240(%rcx),%eax + movups (%r8),%xmm6 + movdqu (%r9),%xmm3 + movdqa .Lincrement64(%rip),%xmm9 + movdqa .Lbswap_mask(%rip),%xmm7 + + movaps %xmm6,%xmm2 + movl %eax,%r10d + movq %rcx,%r11 +.byte 102,15,56,0,247 + movups (%rcx),%xmm0 + movups 16(%rcx),%xmm1 + leaq 32(%rcx),%rcx + xorps %xmm0,%xmm2 +.Loop_enc1_5: +.byte 102,15,56,220,209 + decl %eax + movups (%rcx),%xmm1 + leaq 16(%rcx),%rcx + jnz .Loop_enc1_5 +.byte 102,15,56,221,209 + shll $4,%r10d + movl $16,%eax + movups (%rdi),%xmm8 + paddq %xmm9,%xmm6 + leaq 16(%rdi),%rdi + subq %r10,%rax + leaq 32(%r11,%r10,1),%rcx + movq %rax,%r10 + jmp .Lccm64_dec_outer +.align 16 +.Lccm64_dec_outer: + xorps %xmm2,%xmm8 + movdqa %xmm6,%xmm2 + movups %xmm8,(%rsi) + leaq 16(%rsi),%rsi +.byte 102,15,56,0,215 + + subq $1,%rdx + jz .Lccm64_dec_break + + movups (%r11),%xmm0 + movq %r10,%rax + movups 16(%r11),%xmm1 + xorps %xmm0,%xmm8 + xorps %xmm0,%xmm2 + xorps %xmm8,%xmm3 + movups 32(%r11),%xmm0 + jmp .Lccm64_dec2_loop +.align 16 +.Lccm64_dec2_loop: +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 + movups (%rcx,%rax,1),%xmm1 + addq $32,%rax +.byte 102,15,56,220,208 +.byte 102,15,56,220,216 + movups -16(%rcx,%rax,1),%xmm0 + jnz .Lccm64_dec2_loop + movups (%rdi),%xmm8 + paddq %xmm9,%xmm6 +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 +.byte 102,15,56,221,208 +.byte 102,15,56,221,216 + leaq 16(%rdi),%rdi + jmp .Lccm64_dec_outer + +.align 16 +.Lccm64_dec_break: + + movl 240(%r11),%eax + movups (%r11),%xmm0 + movups 16(%r11),%xmm1 + xorps %xmm0,%xmm8 + leaq 32(%r11),%r11 + xorps %xmm8,%xmm3 +.Loop_enc1_6: +.byte 102,15,56,220,217 + decl %eax + movups (%r11),%xmm1 + leaq 16(%r11),%r11 + jnz .Loop_enc1_6 +.byte 102,15,56,221,217 + movups %xmm3,(%r9) + .byte 0xf3,0xc3 +.size aesni_ccm64_decrypt_blocks,.-aesni_ccm64_decrypt_blocks +.globl aesni_ctr32_encrypt_blocks +.type aesni_ctr32_encrypt_blocks,@function +.align 16 +aesni_ctr32_encrypt_blocks: + leaq (%rsp),%rax + pushq %rbp + subq $128,%rsp + andq $-16,%rsp + leaq -8(%rax),%rbp + + cmpq $1,%rdx + je .Lctr32_one_shortcut + + movdqu (%r8),%xmm2 + movdqu (%rcx),%xmm0 + movl 12(%r8),%r8d + pxor %xmm0,%xmm2 + movl 12(%rcx),%r11d + movdqa %xmm2,0(%rsp) + bswapl %r8d + movdqa %xmm2,%xmm3 + movdqa %xmm2,%xmm4 + movdqa %xmm2,%xmm5 + movdqa %xmm2,64(%rsp) + movdqa %xmm2,80(%rsp) + movdqa %xmm2,96(%rsp) + movq %rdx,%r10 + movdqa %xmm2,112(%rsp) + + leaq 1(%r8),%rax + leaq 2(%r8),%rdx + bswapl %eax + bswapl %edx + xorl %r11d,%eax + xorl %r11d,%edx +.byte 102,15,58,34,216,3 + leaq 3(%r8),%rax + movdqa %xmm3,16(%rsp) +.byte 102,15,58,34,226,3 + bswapl %eax + movq %r10,%rdx + leaq 4(%r8),%r10 + movdqa %xmm4,32(%rsp) + xorl %r11d,%eax + bswapl %r10d +.byte 102,15,58,34,232,3 + xorl %r11d,%r10d + movdqa %xmm5,48(%rsp) + leaq 5(%r8),%r9 + movl %r10d,64+12(%rsp) + bswapl %r9d + leaq 6(%r8),%r10 + movl 240(%rcx),%eax + xorl %r11d,%r9d + bswapl %r10d + movl %r9d,80+12(%rsp) + xorl %r11d,%r10d + leaq 7(%r8),%r9 + movl %r10d,96+12(%rsp) + bswapl %r9d + movl OPENSSL_ia32cap_P+4(%rip),%r10d + xorl %r11d,%r9d + andl $71303168,%r10d + movl %r9d,112+12(%rsp) + + movups 16(%rcx),%xmm1 + + movdqa 64(%rsp),%xmm6 + movdqa 80(%rsp),%xmm7 + + cmpq $8,%rdx + jb .Lctr32_tail + + subq $6,%rdx + cmpl $4194304,%r10d + je .Lctr32_6x + + leaq 128(%rcx),%rcx + subq $2,%rdx + jmp .Lctr32_loop8 + +.align 16 +.Lctr32_6x: + shll $4,%eax + movl $48,%r10d + bswapl %r11d + leaq 32(%rcx,%rax,1),%rcx + subq %rax,%r10 + jmp .Lctr32_loop6 + +.align 16 +.Lctr32_loop6: + addl $6,%r8d + movups -48(%rcx,%r10,1),%xmm0 +.byte 102,15,56,220,209 + movl %r8d,%eax + xorl %r11d,%eax +.byte 102,15,56,220,217 +.byte 0x0f,0x38,0xf1,0x44,0x24,12 + leal 1(%r8),%eax +.byte 102,15,56,220,225 + xorl %r11d,%eax +.byte 0x0f,0x38,0xf1,0x44,0x24,28 +.byte 102,15,56,220,233 + leal 2(%r8),%eax + xorl %r11d,%eax +.byte 102,15,56,220,241 +.byte 0x0f,0x38,0xf1,0x44,0x24,44 + leal 3(%r8),%eax +.byte 102,15,56,220,249 + movups -32(%rcx,%r10,1),%xmm1 + xorl %r11d,%eax + +.byte 102,15,56,220,208 +.byte 0x0f,0x38,0xf1,0x44,0x24,60 + leal 4(%r8),%eax +.byte 102,15,56,220,216 + xorl %r11d,%eax +.byte 0x0f,0x38,0xf1,0x44,0x24,76 +.byte 102,15,56,220,224 + leal 5(%r8),%eax + xorl %r11d,%eax +.byte 102,15,56,220,232 +.byte 0x0f,0x38,0xf1,0x44,0x24,92 + movq %r10,%rax +.byte 102,15,56,220,240 +.byte 102,15,56,220,248 + movups -16(%rcx,%r10,1),%xmm0 + + call .Lenc_loop6 + + movdqu (%rdi),%xmm8 + movdqu 16(%rdi),%xmm9 + movdqu 32(%rdi),%xmm10 + movdqu 48(%rdi),%xmm11 + movdqu 64(%rdi),%xmm12 + movdqu 80(%rdi),%xmm13 + leaq 96(%rdi),%rdi + movups -64(%rcx,%r10,1),%xmm1 + pxor %xmm2,%xmm8 + movaps 0(%rsp),%xmm2 + pxor %xmm3,%xmm9 + movaps 16(%rsp),%xmm3 + pxor %xmm4,%xmm10 + movaps 32(%rsp),%xmm4 + pxor %xmm5,%xmm11 + movaps 48(%rsp),%xmm5 + pxor %xmm6,%xmm12 + movaps 64(%rsp),%xmm6 + pxor %xmm7,%xmm13 + movaps 80(%rsp),%xmm7 + movdqu %xmm8,(%rsi) + movdqu %xmm9,16(%rsi) + movdqu %xmm10,32(%rsi) + movdqu %xmm11,48(%rsi) + movdqu %xmm12,64(%rsi) + movdqu %xmm13,80(%rsi) + leaq 96(%rsi),%rsi + + subq $6,%rdx + jnc .Lctr32_loop6 + + addq $6,%rdx + jz .Lctr32_done + + leal -48(%r10),%eax + leaq -80(%rcx,%r10,1),%rcx + negl %eax + shrl $4,%eax + jmp .Lctr32_tail + +.align 32 +.Lctr32_loop8: + addl $8,%r8d + movdqa 96(%rsp),%xmm8 +.byte 102,15,56,220,209 + movl %r8d,%r9d + movdqa 112(%rsp),%xmm9 +.byte 102,15,56,220,217 + bswapl %r9d + movups 32-128(%rcx),%xmm0 +.byte 102,15,56,220,225 + xorl %r11d,%r9d + nop +.byte 102,15,56,220,233 + movl %r9d,0+12(%rsp) + leaq 1(%r8),%r9 +.byte 102,15,56,220,241 +.byte 102,15,56,220,249 +.byte 102,68,15,56,220,193 +.byte 102,68,15,56,220,201 + movups 48-128(%rcx),%xmm1 + bswapl %r9d +.byte 102,15,56,220,208 +.byte 102,15,56,220,216 + xorl %r11d,%r9d +.byte 0x66,0x90 +.byte 102,15,56,220,224 +.byte 102,15,56,220,232 + movl %r9d,16+12(%rsp) + leaq 2(%r8),%r9 +.byte 102,15,56,220,240 +.byte 102,15,56,220,248 +.byte 102,68,15,56,220,192 +.byte 102,68,15,56,220,200 + movups 64-128(%rcx),%xmm0 + bswapl %r9d +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 + xorl %r11d,%r9d +.byte 0x66,0x90 +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 + movl %r9d,32+12(%rsp) + leaq 3(%r8),%r9 +.byte 102,15,56,220,241 +.byte 102,15,56,220,249 +.byte 102,68,15,56,220,193 +.byte 102,68,15,56,220,201 + movups 80-128(%rcx),%xmm1 + bswapl %r9d +.byte 102,15,56,220,208 +.byte 102,15,56,220,216 + xorl %r11d,%r9d +.byte 0x66,0x90 +.byte 102,15,56,220,224 +.byte 102,15,56,220,232 + movl %r9d,48+12(%rsp) + leaq 4(%r8),%r9 +.byte 102,15,56,220,240 +.byte 102,15,56,220,248 +.byte 102,68,15,56,220,192 +.byte 102,68,15,56,220,200 + movups 96-128(%rcx),%xmm0 + bswapl %r9d +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 + xorl %r11d,%r9d +.byte 0x66,0x90 +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 + movl %r9d,64+12(%rsp) + leaq 5(%r8),%r9 +.byte 102,15,56,220,241 +.byte 102,15,56,220,249 +.byte 102,68,15,56,220,193 +.byte 102,68,15,56,220,201 + movups 112-128(%rcx),%xmm1 + bswapl %r9d +.byte 102,15,56,220,208 +.byte 102,15,56,220,216 + xorl %r11d,%r9d +.byte 0x66,0x90 +.byte 102,15,56,220,224 +.byte 102,15,56,220,232 + movl %r9d,80+12(%rsp) + leaq 6(%r8),%r9 +.byte 102,15,56,220,240 +.byte 102,15,56,220,248 +.byte 102,68,15,56,220,192 +.byte 102,68,15,56,220,200 + movups 128-128(%rcx),%xmm0 + bswapl %r9d +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 + xorl %r11d,%r9d +.byte 0x66,0x90 +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 + movl %r9d,96+12(%rsp) + leaq 7(%r8),%r9 +.byte 102,15,56,220,241 +.byte 102,15,56,220,249 +.byte 102,68,15,56,220,193 +.byte 102,68,15,56,220,201 + movups 144-128(%rcx),%xmm1 + bswapl %r9d +.byte 102,15,56,220,208 +.byte 102,15,56,220,216 +.byte 102,15,56,220,224 + xorl %r11d,%r9d + movdqu 0(%rdi),%xmm10 +.byte 102,15,56,220,232 + movl %r9d,112+12(%rsp) + cmpl $11,%eax +.byte 102,15,56,220,240 +.byte 102,15,56,220,248 +.byte 102,68,15,56,220,192 +.byte 102,68,15,56,220,200 + movups 160-128(%rcx),%xmm0 + + jb .Lctr32_enc_done + +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 +.byte 102,15,56,220,241 +.byte 102,15,56,220,249 +.byte 102,68,15,56,220,193 +.byte 102,68,15,56,220,201 + movups 176-128(%rcx),%xmm1 + +.byte 102,15,56,220,208 +.byte 102,15,56,220,216 +.byte 102,15,56,220,224 +.byte 102,15,56,220,232 +.byte 102,15,56,220,240 +.byte 102,15,56,220,248 +.byte 102,68,15,56,220,192 +.byte 102,68,15,56,220,200 + movups 192-128(%rcx),%xmm0 + je .Lctr32_enc_done + +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 +.byte 102,15,56,220,241 +.byte 102,15,56,220,249 +.byte 102,68,15,56,220,193 +.byte 102,68,15,56,220,201 + movups 208-128(%rcx),%xmm1 + +.byte 102,15,56,220,208 +.byte 102,15,56,220,216 +.byte 102,15,56,220,224 +.byte 102,15,56,220,232 +.byte 102,15,56,220,240 +.byte 102,15,56,220,248 +.byte 102,68,15,56,220,192 +.byte 102,68,15,56,220,200 + movups 224-128(%rcx),%xmm0 + jmp .Lctr32_enc_done + +.align 16 +.Lctr32_enc_done: + movdqu 16(%rdi),%xmm11 + pxor %xmm0,%xmm10 + movdqu 32(%rdi),%xmm12 + pxor %xmm0,%xmm11 + movdqu 48(%rdi),%xmm13 + pxor %xmm0,%xmm12 + movdqu 64(%rdi),%xmm14 + pxor %xmm0,%xmm13 + movdqu 80(%rdi),%xmm15 + pxor %xmm0,%xmm14 + pxor %xmm0,%xmm15 +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 +.byte 102,15,56,220,241 +.byte 102,15,56,220,249 +.byte 102,68,15,56,220,193 +.byte 102,68,15,56,220,201 + movdqu 96(%rdi),%xmm1 + leaq 128(%rdi),%rdi + +.byte 102,65,15,56,221,210 + pxor %xmm0,%xmm1 + movdqu 112-128(%rdi),%xmm10 +.byte 102,65,15,56,221,219 + pxor %xmm0,%xmm10 + movdqa 0(%rsp),%xmm11 +.byte 102,65,15,56,221,228 +.byte 102,65,15,56,221,237 + movdqa 16(%rsp),%xmm12 + movdqa 32(%rsp),%xmm13 +.byte 102,65,15,56,221,246 +.byte 102,65,15,56,221,255 + movdqa 48(%rsp),%xmm14 + movdqa 64(%rsp),%xmm15 +.byte 102,68,15,56,221,193 + movdqa 80(%rsp),%xmm0 + movups 16-128(%rcx),%xmm1 +.byte 102,69,15,56,221,202 + + movups %xmm2,(%rsi) + movdqa %xmm11,%xmm2 + movups %xmm3,16(%rsi) + movdqa %xmm12,%xmm3 + movups %xmm4,32(%rsi) + movdqa %xmm13,%xmm4 + movups %xmm5,48(%rsi) + movdqa %xmm14,%xmm5 + movups %xmm6,64(%rsi) + movdqa %xmm15,%xmm6 + movups %xmm7,80(%rsi) + movdqa %xmm0,%xmm7 + movups %xmm8,96(%rsi) + movups %xmm9,112(%rsi) + leaq 128(%rsi),%rsi + + subq $8,%rdx + jnc .Lctr32_loop8 + + addq $8,%rdx + jz .Lctr32_done + leaq -128(%rcx),%rcx + +.Lctr32_tail: + leaq 16(%rcx),%rcx + cmpq $4,%rdx + jb .Lctr32_loop3 + je .Lctr32_loop4 + + shll $4,%eax + movdqa 96(%rsp),%xmm8 + pxor %xmm9,%xmm9 + + movups 16(%rcx),%xmm0 +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 + leaq 32-16(%rcx,%rax,1),%rcx + negq %rax +.byte 102,15,56,220,225 + addq $16,%rax + movups (%rdi),%xmm10 +.byte 102,15,56,220,233 +.byte 102,15,56,220,241 + movups 16(%rdi),%xmm11 + movups 32(%rdi),%xmm12 +.byte 102,15,56,220,249 +.byte 102,68,15,56,220,193 + + call .Lenc_loop8_enter + + movdqu 48(%rdi),%xmm13 + pxor %xmm10,%xmm2 + movdqu 64(%rdi),%xmm10 + pxor %xmm11,%xmm3 + movdqu %xmm2,(%rsi) + pxor %xmm12,%xmm4 + movdqu %xmm3,16(%rsi) + pxor %xmm13,%xmm5 + movdqu %xmm4,32(%rsi) + pxor %xmm10,%xmm6 + movdqu %xmm5,48(%rsi) + movdqu %xmm6,64(%rsi) + cmpq $6,%rdx + jb .Lctr32_done + + movups 80(%rdi),%xmm11 + xorps %xmm11,%xmm7 + movups %xmm7,80(%rsi) + je .Lctr32_done + + movups 96(%rdi),%xmm12 + xorps %xmm12,%xmm8 + movups %xmm8,96(%rsi) + jmp .Lctr32_done + +.align 32 +.Lctr32_loop4: +.byte 102,15,56,220,209 + leaq 16(%rcx),%rcx + decl %eax +.byte 102,15,56,220,217 +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 + movups (%rcx),%xmm1 + jnz .Lctr32_loop4 +.byte 102,15,56,221,209 +.byte 102,15,56,221,217 + movups (%rdi),%xmm10 + movups 16(%rdi),%xmm11 +.byte 102,15,56,221,225 +.byte 102,15,56,221,233 + movups 32(%rdi),%xmm12 + movups 48(%rdi),%xmm13 + + xorps %xmm10,%xmm2 + movups %xmm2,(%rsi) + xorps %xmm11,%xmm3 + movups %xmm3,16(%rsi) + pxor %xmm12,%xmm4 + movdqu %xmm4,32(%rsi) + pxor %xmm13,%xmm5 + movdqu %xmm5,48(%rsi) + jmp .Lctr32_done + +.align 32 +.Lctr32_loop3: +.byte 102,15,56,220,209 + leaq 16(%rcx),%rcx + decl %eax +.byte 102,15,56,220,217 +.byte 102,15,56,220,225 + movups (%rcx),%xmm1 + jnz .Lctr32_loop3 +.byte 102,15,56,221,209 +.byte 102,15,56,221,217 +.byte 102,15,56,221,225 + + movups (%rdi),%xmm10 + xorps %xmm10,%xmm2 + movups %xmm2,(%rsi) + cmpq $2,%rdx + jb .Lctr32_done + + movups 16(%rdi),%xmm11 + xorps %xmm11,%xmm3 + movups %xmm3,16(%rsi) + je .Lctr32_done + + movups 32(%rdi),%xmm12 + xorps %xmm12,%xmm4 + movups %xmm4,32(%rsi) + jmp .Lctr32_done + +.align 16 +.Lctr32_one_shortcut: + movups (%r8),%xmm2 + movups (%rdi),%xmm10 + movl 240(%rcx),%eax + movups (%rcx),%xmm0 + movups 16(%rcx),%xmm1 + leaq 32(%rcx),%rcx + xorps %xmm0,%xmm2 +.Loop_enc1_7: +.byte 102,15,56,220,209 + decl %eax + movups (%rcx),%xmm1 + leaq 16(%rcx),%rcx + jnz .Loop_enc1_7 +.byte 102,15,56,221,209 + xorps %xmm10,%xmm2 + movups %xmm2,(%rsi) + jmp .Lctr32_done + +.align 16 +.Lctr32_done: + leaq (%rbp),%rsp + popq %rbp +.Lctr32_epilogue: + .byte 0xf3,0xc3 +.size aesni_ctr32_encrypt_blocks,.-aesni_ctr32_encrypt_blocks +.globl aesni_xts_encrypt +.type aesni_xts_encrypt,@function +.align 16 +aesni_xts_encrypt: + leaq (%rsp),%rax + pushq %rbp + subq $112,%rsp + andq $-16,%rsp + leaq -8(%rax),%rbp + movups (%r9),%xmm2 + movl 240(%r8),%eax + movl 240(%rcx),%r10d + movups (%r8),%xmm0 + movups 16(%r8),%xmm1 + leaq 32(%r8),%r8 + xorps %xmm0,%xmm2 +.Loop_enc1_8: +.byte 102,15,56,220,209 + decl %eax + movups (%r8),%xmm1 + leaq 16(%r8),%r8 + jnz .Loop_enc1_8 +.byte 102,15,56,221,209 + movups (%rcx),%xmm0 + movq %rcx,%r11 + movl %r10d,%eax + shll $4,%r10d + movq %rdx,%r9 + andq $-16,%rdx + + movups 16(%rcx,%r10,1),%xmm1 + + movdqa .Lxts_magic(%rip),%xmm8 + movdqa %xmm2,%xmm15 + pshufd $95,%xmm2,%xmm9 + pxor %xmm0,%xmm1 + movdqa %xmm9,%xmm14 + paddd %xmm9,%xmm9 + movdqa %xmm15,%xmm10 + psrad $31,%xmm14 + paddq %xmm15,%xmm15 + pand %xmm8,%xmm14 + pxor %xmm0,%xmm10 + pxor %xmm14,%xmm15 + movdqa %xmm9,%xmm14 + paddd %xmm9,%xmm9 + movdqa %xmm15,%xmm11 + psrad $31,%xmm14 + paddq %xmm15,%xmm15 + pand %xmm8,%xmm14 + pxor %xmm0,%xmm11 + pxor %xmm14,%xmm15 + movdqa %xmm9,%xmm14 + paddd %xmm9,%xmm9 + movdqa %xmm15,%xmm12 + psrad $31,%xmm14 + paddq %xmm15,%xmm15 + pand %xmm8,%xmm14 + pxor %xmm0,%xmm12 + pxor %xmm14,%xmm15 + movdqa %xmm9,%xmm14 + paddd %xmm9,%xmm9 + movdqa %xmm15,%xmm13 + psrad $31,%xmm14 + paddq %xmm15,%xmm15 + pand %xmm8,%xmm14 + pxor %xmm0,%xmm13 + pxor %xmm14,%xmm15 + movdqa %xmm15,%xmm14 + psrad $31,%xmm9 + paddq %xmm15,%xmm15 + pand %xmm8,%xmm9 + pxor %xmm0,%xmm14 + pxor %xmm9,%xmm15 + movaps %xmm1,96(%rsp) + + subq $96,%rdx + jc .Lxts_enc_short + + movl $16+96,%eax + leaq 32(%r11,%r10,1),%rcx + subq %r10,%rax + movups 16(%r11),%xmm1 + movq %rax,%r10 + leaq .Lxts_magic(%rip),%r8 + jmp .Lxts_enc_grandloop + +.align 32 +.Lxts_enc_grandloop: + movdqu 0(%rdi),%xmm2 + movdqa %xmm0,%xmm8 + movdqu 16(%rdi),%xmm3 + pxor %xmm10,%xmm2 + movdqu 32(%rdi),%xmm4 + pxor %xmm11,%xmm3 +.byte 102,15,56,220,209 + movdqu 48(%rdi),%xmm5 + pxor %xmm12,%xmm4 +.byte 102,15,56,220,217 + movdqu 64(%rdi),%xmm6 + pxor %xmm13,%xmm5 +.byte 102,15,56,220,225 + movdqu 80(%rdi),%xmm7 + pxor %xmm15,%xmm8 + movdqa 96(%rsp),%xmm9 + pxor %xmm14,%xmm6 +.byte 102,15,56,220,233 + movups 32(%r11),%xmm0 + leaq 96(%rdi),%rdi + pxor %xmm8,%xmm7 + + pxor %xmm9,%xmm10 +.byte 102,15,56,220,241 + pxor %xmm9,%xmm11 + movdqa %xmm10,0(%rsp) +.byte 102,15,56,220,249 + movups 48(%r11),%xmm1 + pxor %xmm9,%xmm12 + +.byte 102,15,56,220,208 + pxor %xmm9,%xmm13 + movdqa %xmm11,16(%rsp) +.byte 102,15,56,220,216 + pxor %xmm9,%xmm14 + movdqa %xmm12,32(%rsp) +.byte 102,15,56,220,224 +.byte 102,15,56,220,232 + pxor %xmm9,%xmm8 + movdqa %xmm14,64(%rsp) +.byte 102,15,56,220,240 +.byte 102,15,56,220,248 + movups 64(%r11),%xmm0 + movdqa %xmm8,80(%rsp) + pshufd $95,%xmm15,%xmm9 + jmp .Lxts_enc_loop6 +.align 32 +.Lxts_enc_loop6: +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 +.byte 102,15,56,220,241 +.byte 102,15,56,220,249 + movups -64(%rcx,%rax,1),%xmm1 + addq $32,%rax + +.byte 102,15,56,220,208 +.byte 102,15,56,220,216 +.byte 102,15,56,220,224 +.byte 102,15,56,220,232 +.byte 102,15,56,220,240 +.byte 102,15,56,220,248 + movups -80(%rcx,%rax,1),%xmm0 + jnz .Lxts_enc_loop6 + + movdqa (%r8),%xmm8 + movdqa %xmm9,%xmm14 + paddd %xmm9,%xmm9 +.byte 102,15,56,220,209 + paddq %xmm15,%xmm15 + psrad $31,%xmm14 +.byte 102,15,56,220,217 + pand %xmm8,%xmm14 + movups (%r11),%xmm10 +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 +.byte 102,15,56,220,241 + pxor %xmm14,%xmm15 + movaps %xmm10,%xmm11 +.byte 102,15,56,220,249 + movups -64(%rcx),%xmm1 + + movdqa %xmm9,%xmm14 +.byte 102,15,56,220,208 + paddd %xmm9,%xmm9 + pxor %xmm15,%xmm10 +.byte 102,15,56,220,216 + psrad $31,%xmm14 + paddq %xmm15,%xmm15 +.byte 102,15,56,220,224 +.byte 102,15,56,220,232 + pand %xmm8,%xmm14 + movaps %xmm11,%xmm12 +.byte 102,15,56,220,240 + pxor %xmm14,%xmm15 + movdqa %xmm9,%xmm14 +.byte 102,15,56,220,248 + movups -48(%rcx),%xmm0 + + paddd %xmm9,%xmm9 +.byte 102,15,56,220,209 + pxor %xmm15,%xmm11 + psrad $31,%xmm14 +.byte 102,15,56,220,217 + paddq %xmm15,%xmm15 + pand %xmm8,%xmm14 +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 + movdqa %xmm13,48(%rsp) + pxor %xmm14,%xmm15 +.byte 102,15,56,220,241 + movaps %xmm12,%xmm13 + movdqa %xmm9,%xmm14 +.byte 102,15,56,220,249 + movups -32(%rcx),%xmm1 + + paddd %xmm9,%xmm9 +.byte 102,15,56,220,208 + pxor %xmm15,%xmm12 + psrad $31,%xmm14 +.byte 102,15,56,220,216 + paddq %xmm15,%xmm15 + pand %xmm8,%xmm14 +.byte 102,15,56,220,224 +.byte 102,15,56,220,232 +.byte 102,15,56,220,240 + pxor %xmm14,%xmm15 + movaps %xmm13,%xmm14 +.byte 102,15,56,220,248 + + movdqa %xmm9,%xmm0 + paddd %xmm9,%xmm9 +.byte 102,15,56,220,209 + pxor %xmm15,%xmm13 + psrad $31,%xmm0 +.byte 102,15,56,220,217 + paddq %xmm15,%xmm15 + pand %xmm8,%xmm0 +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 + pxor %xmm0,%xmm15 + movups (%r11),%xmm0 +.byte 102,15,56,220,241 +.byte 102,15,56,220,249 + movups 16(%r11),%xmm1 + + pxor %xmm15,%xmm14 +.byte 102,15,56,221,84,36,0 + psrad $31,%xmm9 + paddq %xmm15,%xmm15 +.byte 102,15,56,221,92,36,16 +.byte 102,15,56,221,100,36,32 + pand %xmm8,%xmm9 + movq %r10,%rax +.byte 102,15,56,221,108,36,48 +.byte 102,15,56,221,116,36,64 +.byte 102,15,56,221,124,36,80 + pxor %xmm9,%xmm15 + + leaq 96(%rsi),%rsi + movups %xmm2,-96(%rsi) + movups %xmm3,-80(%rsi) + movups %xmm4,-64(%rsi) + movups %xmm5,-48(%rsi) + movups %xmm6,-32(%rsi) + movups %xmm7,-16(%rsi) + subq $96,%rdx + jnc .Lxts_enc_grandloop + + movl $16+96,%eax + subl %r10d,%eax + movq %r11,%rcx + shrl $4,%eax + +.Lxts_enc_short: + movl %eax,%r10d + pxor %xmm0,%xmm10 + addq $96,%rdx + jz .Lxts_enc_done + + pxor %xmm0,%xmm11 + cmpq $32,%rdx + jb .Lxts_enc_one + pxor %xmm0,%xmm12 + je .Lxts_enc_two + + pxor %xmm0,%xmm13 + cmpq $64,%rdx + jb .Lxts_enc_three + pxor %xmm0,%xmm14 + je .Lxts_enc_four + + movdqu (%rdi),%xmm2 + movdqu 16(%rdi),%xmm3 + movdqu 32(%rdi),%xmm4 + pxor %xmm10,%xmm2 + movdqu 48(%rdi),%xmm5 + pxor %xmm11,%xmm3 + movdqu 64(%rdi),%xmm6 + leaq 80(%rdi),%rdi + pxor %xmm12,%xmm4 + pxor %xmm13,%xmm5 + pxor %xmm14,%xmm6 + + call _aesni_encrypt6 + + xorps %xmm10,%xmm2 + movdqa %xmm15,%xmm10 + xorps %xmm11,%xmm3 + xorps %xmm12,%xmm4 + movdqu %xmm2,(%rsi) + xorps %xmm13,%xmm5 + movdqu %xmm3,16(%rsi) + xorps %xmm14,%xmm6 + movdqu %xmm4,32(%rsi) + movdqu %xmm5,48(%rsi) + movdqu %xmm6,64(%rsi) + leaq 80(%rsi),%rsi + jmp .Lxts_enc_done + +.align 16 +.Lxts_enc_one: + movups (%rdi),%xmm2 + leaq 16(%rdi),%rdi + xorps %xmm10,%xmm2 + movups (%rcx),%xmm0 + movups 16(%rcx),%xmm1 + leaq 32(%rcx),%rcx + xorps %xmm0,%xmm2 +.Loop_enc1_9: +.byte 102,15,56,220,209 + decl %eax + movups (%rcx),%xmm1 + leaq 16(%rcx),%rcx + jnz .Loop_enc1_9 +.byte 102,15,56,221,209 + xorps %xmm10,%xmm2 + movdqa %xmm11,%xmm10 + movups %xmm2,(%rsi) + leaq 16(%rsi),%rsi + jmp .Lxts_enc_done + +.align 16 +.Lxts_enc_two: + movups (%rdi),%xmm2 + movups 16(%rdi),%xmm3 + leaq 32(%rdi),%rdi + xorps %xmm10,%xmm2 + xorps %xmm11,%xmm3 + + call _aesni_encrypt2 + + xorps %xmm10,%xmm2 + movdqa %xmm12,%xmm10 + xorps %xmm11,%xmm3 + movups %xmm2,(%rsi) + movups %xmm3,16(%rsi) + leaq 32(%rsi),%rsi + jmp .Lxts_enc_done + +.align 16 +.Lxts_enc_three: + movups (%rdi),%xmm2 + movups 16(%rdi),%xmm3 + movups 32(%rdi),%xmm4 + leaq 48(%rdi),%rdi + xorps %xmm10,%xmm2 + xorps %xmm11,%xmm3 + xorps %xmm12,%xmm4 + + call _aesni_encrypt3 + + xorps %xmm10,%xmm2 + movdqa %xmm13,%xmm10 + xorps %xmm11,%xmm3 + xorps %xmm12,%xmm4 + movups %xmm2,(%rsi) + movups %xmm3,16(%rsi) + movups %xmm4,32(%rsi) + leaq 48(%rsi),%rsi + jmp .Lxts_enc_done + +.align 16 +.Lxts_enc_four: + movups (%rdi),%xmm2 + movups 16(%rdi),%xmm3 + movups 32(%rdi),%xmm4 + xorps %xmm10,%xmm2 + movups 48(%rdi),%xmm5 + leaq 64(%rdi),%rdi + xorps %xmm11,%xmm3 + xorps %xmm12,%xmm4 + xorps %xmm13,%xmm5 + + call _aesni_encrypt4 + + pxor %xmm10,%xmm2 + movdqa %xmm14,%xmm10 + pxor %xmm11,%xmm3 + pxor %xmm12,%xmm4 + movdqu %xmm2,(%rsi) + pxor %xmm13,%xmm5 + movdqu %xmm3,16(%rsi) + movdqu %xmm4,32(%rsi) + movdqu %xmm5,48(%rsi) + leaq 64(%rsi),%rsi + jmp .Lxts_enc_done + +.align 16 +.Lxts_enc_done: + andq $15,%r9 + jz .Lxts_enc_ret + movq %r9,%rdx + +.Lxts_enc_steal: + movzbl (%rdi),%eax + movzbl -16(%rsi),%ecx + leaq 1(%rdi),%rdi + movb %al,-16(%rsi) + movb %cl,0(%rsi) + leaq 1(%rsi),%rsi + subq $1,%rdx + jnz .Lxts_enc_steal + + subq %r9,%rsi + movq %r11,%rcx + movl %r10d,%eax + + movups -16(%rsi),%xmm2 + xorps %xmm10,%xmm2 + movups (%rcx),%xmm0 + movups 16(%rcx),%xmm1 + leaq 32(%rcx),%rcx + xorps %xmm0,%xmm2 +.Loop_enc1_10: +.byte 102,15,56,220,209 + decl %eax + movups (%rcx),%xmm1 + leaq 16(%rcx),%rcx + jnz .Loop_enc1_10 +.byte 102,15,56,221,209 + xorps %xmm10,%xmm2 + movups %xmm2,-16(%rsi) + +.Lxts_enc_ret: + leaq (%rbp),%rsp + popq %rbp +.Lxts_enc_epilogue: + .byte 0xf3,0xc3 +.size aesni_xts_encrypt,.-aesni_xts_encrypt +.globl aesni_xts_decrypt +.type aesni_xts_decrypt,@function +.align 16 +aesni_xts_decrypt: + leaq (%rsp),%rax + pushq %rbp + subq $112,%rsp + andq $-16,%rsp + leaq -8(%rax),%rbp + movups (%r9),%xmm2 + movl 240(%r8),%eax + movl 240(%rcx),%r10d + movups (%r8),%xmm0 + movups 16(%r8),%xmm1 + leaq 32(%r8),%r8 + xorps %xmm0,%xmm2 +.Loop_enc1_11: +.byte 102,15,56,220,209 + decl %eax + movups (%r8),%xmm1 + leaq 16(%r8),%r8 + jnz .Loop_enc1_11 +.byte 102,15,56,221,209 + xorl %eax,%eax + testq $15,%rdx + setnz %al + shlq $4,%rax + subq %rax,%rdx + + movups (%rcx),%xmm0 + movq %rcx,%r11 + movl %r10d,%eax + shll $4,%r10d + movq %rdx,%r9 + andq $-16,%rdx + + movups 16(%rcx,%r10,1),%xmm1 + + movdqa .Lxts_magic(%rip),%xmm8 + movdqa %xmm2,%xmm15 + pshufd $95,%xmm2,%xmm9 + pxor %xmm0,%xmm1 + movdqa %xmm9,%xmm14 + paddd %xmm9,%xmm9 + movdqa %xmm15,%xmm10 + psrad $31,%xmm14 + paddq %xmm15,%xmm15 + pand %xmm8,%xmm14 + pxor %xmm0,%xmm10 + pxor %xmm14,%xmm15 + movdqa %xmm9,%xmm14 + paddd %xmm9,%xmm9 + movdqa %xmm15,%xmm11 + psrad $31,%xmm14 + paddq %xmm15,%xmm15 + pand %xmm8,%xmm14 + pxor %xmm0,%xmm11 + pxor %xmm14,%xmm15 + movdqa %xmm9,%xmm14 + paddd %xmm9,%xmm9 + movdqa %xmm15,%xmm12 + psrad $31,%xmm14 + paddq %xmm15,%xmm15 + pand %xmm8,%xmm14 + pxor %xmm0,%xmm12 + pxor %xmm14,%xmm15 + movdqa %xmm9,%xmm14 + paddd %xmm9,%xmm9 + movdqa %xmm15,%xmm13 + psrad $31,%xmm14 + paddq %xmm15,%xmm15 + pand %xmm8,%xmm14 + pxor %xmm0,%xmm13 + pxor %xmm14,%xmm15 + movdqa %xmm15,%xmm14 + psrad $31,%xmm9 + paddq %xmm15,%xmm15 + pand %xmm8,%xmm9 + pxor %xmm0,%xmm14 + pxor %xmm9,%xmm15 + movaps %xmm1,96(%rsp) + + subq $96,%rdx + jc .Lxts_dec_short + + movl $16+96,%eax + leaq 32(%r11,%r10,1),%rcx + subq %r10,%rax + movups 16(%r11),%xmm1 + movq %rax,%r10 + leaq .Lxts_magic(%rip),%r8 + jmp .Lxts_dec_grandloop + +.align 32 +.Lxts_dec_grandloop: + movdqu 0(%rdi),%xmm2 + movdqa %xmm0,%xmm8 + movdqu 16(%rdi),%xmm3 + pxor %xmm10,%xmm2 + movdqu 32(%rdi),%xmm4 + pxor %xmm11,%xmm3 +.byte 102,15,56,222,209 + movdqu 48(%rdi),%xmm5 + pxor %xmm12,%xmm4 +.byte 102,15,56,222,217 + movdqu 64(%rdi),%xmm6 + pxor %xmm13,%xmm5 +.byte 102,15,56,222,225 + movdqu 80(%rdi),%xmm7 + pxor %xmm15,%xmm8 + movdqa 96(%rsp),%xmm9 + pxor %xmm14,%xmm6 +.byte 102,15,56,222,233 + movups 32(%r11),%xmm0 + leaq 96(%rdi),%rdi + pxor %xmm8,%xmm7 + + pxor %xmm9,%xmm10 +.byte 102,15,56,222,241 + pxor %xmm9,%xmm11 + movdqa %xmm10,0(%rsp) +.byte 102,15,56,222,249 + movups 48(%r11),%xmm1 + pxor %xmm9,%xmm12 + +.byte 102,15,56,222,208 + pxor %xmm9,%xmm13 + movdqa %xmm11,16(%rsp) +.byte 102,15,56,222,216 + pxor %xmm9,%xmm14 + movdqa %xmm12,32(%rsp) +.byte 102,15,56,222,224 +.byte 102,15,56,222,232 + pxor %xmm9,%xmm8 + movdqa %xmm14,64(%rsp) +.byte 102,15,56,222,240 +.byte 102,15,56,222,248 + movups 64(%r11),%xmm0 + movdqa %xmm8,80(%rsp) + pshufd $95,%xmm15,%xmm9 + jmp .Lxts_dec_loop6 +.align 32 +.Lxts_dec_loop6: +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 +.byte 102,15,56,222,225 +.byte 102,15,56,222,233 +.byte 102,15,56,222,241 +.byte 102,15,56,222,249 + movups -64(%rcx,%rax,1),%xmm1 + addq $32,%rax + +.byte 102,15,56,222,208 +.byte 102,15,56,222,216 +.byte 102,15,56,222,224 +.byte 102,15,56,222,232 +.byte 102,15,56,222,240 +.byte 102,15,56,222,248 + movups -80(%rcx,%rax,1),%xmm0 + jnz .Lxts_dec_loop6 + + movdqa (%r8),%xmm8 + movdqa %xmm9,%xmm14 + paddd %xmm9,%xmm9 +.byte 102,15,56,222,209 + paddq %xmm15,%xmm15 + psrad $31,%xmm14 +.byte 102,15,56,222,217 + pand %xmm8,%xmm14 + movups (%r11),%xmm10 +.byte 102,15,56,222,225 +.byte 102,15,56,222,233 +.byte 102,15,56,222,241 + pxor %xmm14,%xmm15 + movaps %xmm10,%xmm11 +.byte 102,15,56,222,249 + movups -64(%rcx),%xmm1 + + movdqa %xmm9,%xmm14 +.byte 102,15,56,222,208 + paddd %xmm9,%xmm9 + pxor %xmm15,%xmm10 +.byte 102,15,56,222,216 + psrad $31,%xmm14 + paddq %xmm15,%xmm15 +.byte 102,15,56,222,224 +.byte 102,15,56,222,232 + pand %xmm8,%xmm14 + movaps %xmm11,%xmm12 +.byte 102,15,56,222,240 + pxor %xmm14,%xmm15 + movdqa %xmm9,%xmm14 +.byte 102,15,56,222,248 + movups -48(%rcx),%xmm0 + + paddd %xmm9,%xmm9 +.byte 102,15,56,222,209 + pxor %xmm15,%xmm11 + psrad $31,%xmm14 +.byte 102,15,56,222,217 + paddq %xmm15,%xmm15 + pand %xmm8,%xmm14 +.byte 102,15,56,222,225 +.byte 102,15,56,222,233 + movdqa %xmm13,48(%rsp) + pxor %xmm14,%xmm15 +.byte 102,15,56,222,241 + movaps %xmm12,%xmm13 + movdqa %xmm9,%xmm14 +.byte 102,15,56,222,249 + movups -32(%rcx),%xmm1 + + paddd %xmm9,%xmm9 +.byte 102,15,56,222,208 + pxor %xmm15,%xmm12 + psrad $31,%xmm14 +.byte 102,15,56,222,216 + paddq %xmm15,%xmm15 + pand %xmm8,%xmm14 +.byte 102,15,56,222,224 +.byte 102,15,56,222,232 +.byte 102,15,56,222,240 + pxor %xmm14,%xmm15 + movaps %xmm13,%xmm14 +.byte 102,15,56,222,248 + + movdqa %xmm9,%xmm0 + paddd %xmm9,%xmm9 +.byte 102,15,56,222,209 + pxor %xmm15,%xmm13 + psrad $31,%xmm0 +.byte 102,15,56,222,217 + paddq %xmm15,%xmm15 + pand %xmm8,%xmm0 +.byte 102,15,56,222,225 +.byte 102,15,56,222,233 + pxor %xmm0,%xmm15 + movups (%r11),%xmm0 +.byte 102,15,56,222,241 +.byte 102,15,56,222,249 + movups 16(%r11),%xmm1 + + pxor %xmm15,%xmm14 +.byte 102,15,56,223,84,36,0 + psrad $31,%xmm9 + paddq %xmm15,%xmm15 +.byte 102,15,56,223,92,36,16 +.byte 102,15,56,223,100,36,32 + pand %xmm8,%xmm9 + movq %r10,%rax +.byte 102,15,56,223,108,36,48 +.byte 102,15,56,223,116,36,64 +.byte 102,15,56,223,124,36,80 + pxor %xmm9,%xmm15 + + leaq 96(%rsi),%rsi + movups %xmm2,-96(%rsi) + movups %xmm3,-80(%rsi) + movups %xmm4,-64(%rsi) + movups %xmm5,-48(%rsi) + movups %xmm6,-32(%rsi) + movups %xmm7,-16(%rsi) + subq $96,%rdx + jnc .Lxts_dec_grandloop + + movl $16+96,%eax + subl %r10d,%eax + movq %r11,%rcx + shrl $4,%eax + +.Lxts_dec_short: + movl %eax,%r10d + pxor %xmm0,%xmm10 + pxor %xmm0,%xmm11 + addq $96,%rdx + jz .Lxts_dec_done + + pxor %xmm0,%xmm12 + cmpq $32,%rdx + jb .Lxts_dec_one + pxor %xmm0,%xmm13 + je .Lxts_dec_two + + pxor %xmm0,%xmm14 + cmpq $64,%rdx + jb .Lxts_dec_three + je .Lxts_dec_four + + movdqu (%rdi),%xmm2 + movdqu 16(%rdi),%xmm3 + movdqu 32(%rdi),%xmm4 + pxor %xmm10,%xmm2 + movdqu 48(%rdi),%xmm5 + pxor %xmm11,%xmm3 + movdqu 64(%rdi),%xmm6 + leaq 80(%rdi),%rdi + pxor %xmm12,%xmm4 + pxor %xmm13,%xmm5 + pxor %xmm14,%xmm6 + + call _aesni_decrypt6 + + xorps %xmm10,%xmm2 + xorps %xmm11,%xmm3 + xorps %xmm12,%xmm4 + movdqu %xmm2,(%rsi) + xorps %xmm13,%xmm5 + movdqu %xmm3,16(%rsi) + xorps %xmm14,%xmm6 + movdqu %xmm4,32(%rsi) + pxor %xmm14,%xmm14 + movdqu %xmm5,48(%rsi) + pcmpgtd %xmm15,%xmm14 + movdqu %xmm6,64(%rsi) + leaq 80(%rsi),%rsi + pshufd $19,%xmm14,%xmm11 + andq $15,%r9 + jz .Lxts_dec_ret + + movdqa %xmm15,%xmm10 + paddq %xmm15,%xmm15 + pand %xmm8,%xmm11 + pxor %xmm15,%xmm11 + jmp .Lxts_dec_done2 + +.align 16 +.Lxts_dec_one: + movups (%rdi),%xmm2 + leaq 16(%rdi),%rdi + xorps %xmm10,%xmm2 + movups (%rcx),%xmm0 + movups 16(%rcx),%xmm1 + leaq 32(%rcx),%rcx + xorps %xmm0,%xmm2 +.Loop_dec1_12: +.byte 102,15,56,222,209 + decl %eax + movups (%rcx),%xmm1 + leaq 16(%rcx),%rcx + jnz .Loop_dec1_12 +.byte 102,15,56,223,209 + xorps %xmm10,%xmm2 + movdqa %xmm11,%xmm10 + movups %xmm2,(%rsi) + movdqa %xmm12,%xmm11 + leaq 16(%rsi),%rsi + jmp .Lxts_dec_done + +.align 16 +.Lxts_dec_two: + movups (%rdi),%xmm2 + movups 16(%rdi),%xmm3 + leaq 32(%rdi),%rdi + xorps %xmm10,%xmm2 + xorps %xmm11,%xmm3 + + call _aesni_decrypt2 + + xorps %xmm10,%xmm2 + movdqa %xmm12,%xmm10 + xorps %xmm11,%xmm3 + movdqa %xmm13,%xmm11 + movups %xmm2,(%rsi) + movups %xmm3,16(%rsi) + leaq 32(%rsi),%rsi + jmp .Lxts_dec_done + +.align 16 +.Lxts_dec_three: + movups (%rdi),%xmm2 + movups 16(%rdi),%xmm3 + movups 32(%rdi),%xmm4 + leaq 48(%rdi),%rdi + xorps %xmm10,%xmm2 + xorps %xmm11,%xmm3 + xorps %xmm12,%xmm4 + + call _aesni_decrypt3 + + xorps %xmm10,%xmm2 + movdqa %xmm13,%xmm10 + xorps %xmm11,%xmm3 + movdqa %xmm14,%xmm11 + xorps %xmm12,%xmm4 + movups %xmm2,(%rsi) + movups %xmm3,16(%rsi) + movups %xmm4,32(%rsi) + leaq 48(%rsi),%rsi + jmp .Lxts_dec_done + +.align 16 +.Lxts_dec_four: + movups (%rdi),%xmm2 + movups 16(%rdi),%xmm3 + movups 32(%rdi),%xmm4 + xorps %xmm10,%xmm2 + movups 48(%rdi),%xmm5 + leaq 64(%rdi),%rdi + xorps %xmm11,%xmm3 + xorps %xmm12,%xmm4 + xorps %xmm13,%xmm5 + + call _aesni_decrypt4 + + pxor %xmm10,%xmm2 + movdqa %xmm14,%xmm10 + pxor %xmm11,%xmm3 + movdqa %xmm15,%xmm11 + pxor %xmm12,%xmm4 + movdqu %xmm2,(%rsi) + pxor %xmm13,%xmm5 + movdqu %xmm3,16(%rsi) + movdqu %xmm4,32(%rsi) + movdqu %xmm5,48(%rsi) + leaq 64(%rsi),%rsi + jmp .Lxts_dec_done + +.align 16 +.Lxts_dec_done: + andq $15,%r9 + jz .Lxts_dec_ret +.Lxts_dec_done2: + movq %r9,%rdx + movq %r11,%rcx + movl %r10d,%eax + + movups (%rdi),%xmm2 + xorps %xmm11,%xmm2 + movups (%rcx),%xmm0 + movups 16(%rcx),%xmm1 + leaq 32(%rcx),%rcx + xorps %xmm0,%xmm2 +.Loop_dec1_13: +.byte 102,15,56,222,209 + decl %eax + movups (%rcx),%xmm1 + leaq 16(%rcx),%rcx + jnz .Loop_dec1_13 +.byte 102,15,56,223,209 + xorps %xmm11,%xmm2 + movups %xmm2,(%rsi) + +.Lxts_dec_steal: + movzbl 16(%rdi),%eax + movzbl (%rsi),%ecx + leaq 1(%rdi),%rdi + movb %al,(%rsi) + movb %cl,16(%rsi) + leaq 1(%rsi),%rsi + subq $1,%rdx + jnz .Lxts_dec_steal + + subq %r9,%rsi + movq %r11,%rcx + movl %r10d,%eax + + movups (%rsi),%xmm2 + xorps %xmm10,%xmm2 + movups (%rcx),%xmm0 + movups 16(%rcx),%xmm1 + leaq 32(%rcx),%rcx + xorps %xmm0,%xmm2 +.Loop_dec1_14: +.byte 102,15,56,222,209 + decl %eax + movups (%rcx),%xmm1 + leaq 16(%rcx),%rcx + jnz .Loop_dec1_14 +.byte 102,15,56,223,209 + xorps %xmm10,%xmm2 + movups %xmm2,(%rsi) + +.Lxts_dec_ret: + leaq (%rbp),%rsp + popq %rbp +.Lxts_dec_epilogue: + .byte 0xf3,0xc3 +.size aesni_xts_decrypt,.-aesni_xts_decrypt +.globl aesni_cbc_encrypt +.type aesni_cbc_encrypt,@function +.align 16 +aesni_cbc_encrypt: + testq %rdx,%rdx + jz .Lcbc_ret + + movl 240(%rcx),%r10d + movq %rcx,%r11 + testl %r9d,%r9d + jz .Lcbc_decrypt + + movups (%r8),%xmm2 + movl %r10d,%eax + cmpq $16,%rdx + jb .Lcbc_enc_tail + subq $16,%rdx + jmp .Lcbc_enc_loop +.align 16 +.Lcbc_enc_loop: + movups (%rdi),%xmm3 + leaq 16(%rdi),%rdi + + movups (%rcx),%xmm0 + movups 16(%rcx),%xmm1 + xorps %xmm0,%xmm3 + leaq 32(%rcx),%rcx + xorps %xmm3,%xmm2 +.Loop_enc1_15: +.byte 102,15,56,220,209 + decl %eax + movups (%rcx),%xmm1 + leaq 16(%rcx),%rcx + jnz .Loop_enc1_15 +.byte 102,15,56,221,209 + movl %r10d,%eax + movq %r11,%rcx + movups %xmm2,0(%rsi) + leaq 16(%rsi),%rsi + subq $16,%rdx + jnc .Lcbc_enc_loop + addq $16,%rdx + jnz .Lcbc_enc_tail + movups %xmm2,(%r8) + jmp .Lcbc_ret + +.Lcbc_enc_tail: + movq %rdx,%rcx + xchgq %rdi,%rsi +.long 0x9066A4F3 + movl $16,%ecx + subq %rdx,%rcx + xorl %eax,%eax +.long 0x9066AAF3 + leaq -16(%rdi),%rdi + movl %r10d,%eax + movq %rdi,%rsi + movq %r11,%rcx + xorq %rdx,%rdx + jmp .Lcbc_enc_loop + +.align 16 +.Lcbc_decrypt: + leaq (%rsp),%rax + pushq %rbp + subq $16,%rsp + andq $-16,%rsp + leaq -8(%rax),%rbp + movups (%r8),%xmm10 + movl %r10d,%eax + cmpq $80,%rdx + jbe .Lcbc_dec_tail + + movups (%rcx),%xmm0 + movdqu 0(%rdi),%xmm2 + movdqu 16(%rdi),%xmm3 + movdqa %xmm2,%xmm11 + movdqu 32(%rdi),%xmm4 + movdqa %xmm3,%xmm12 + movdqu 48(%rdi),%xmm5 + movdqa %xmm4,%xmm13 + movdqu 64(%rdi),%xmm6 + movdqa %xmm5,%xmm14 + movdqu 80(%rdi),%xmm7 + movdqa %xmm6,%xmm15 + movl OPENSSL_ia32cap_P+4(%rip),%r9d + cmpq $112,%rdx + jbe .Lcbc_dec_six_or_seven + + andl $71303168,%r9d + subq $80,%rdx + cmpl $4194304,%r9d + je .Lcbc_dec_loop6_enter + subq $32,%rdx + leaq 112(%rcx),%rcx + jmp .Lcbc_dec_loop8_enter +.align 16 +.Lcbc_dec_loop8: + movups %xmm9,(%rsi) + leaq 16(%rsi),%rsi +.Lcbc_dec_loop8_enter: + movdqu 96(%rdi),%xmm8 + pxor %xmm0,%xmm2 + movdqu 112(%rdi),%xmm9 + pxor %xmm0,%xmm3 + movups 16-112(%rcx),%xmm1 + pxor %xmm0,%xmm4 + xorq %r11,%r11 + cmpq $112,%rdx + pxor %xmm0,%xmm5 + pxor %xmm0,%xmm6 + pxor %xmm0,%xmm7 + pxor %xmm0,%xmm8 + +.byte 102,15,56,222,209 + pxor %xmm0,%xmm9 + movups 32-112(%rcx),%xmm0 +.byte 102,15,56,222,217 +.byte 102,15,56,222,225 +.byte 102,15,56,222,233 +.byte 102,15,56,222,241 +.byte 102,15,56,222,249 +.byte 102,68,15,56,222,193 + setnc %r11b + shlq $7,%r11 +.byte 102,68,15,56,222,201 + addq %rdi,%r11 + movups 48-112(%rcx),%xmm1 +.byte 102,15,56,222,208 +.byte 102,15,56,222,216 +.byte 102,15,56,222,224 +.byte 102,15,56,222,232 +.byte 102,15,56,222,240 +.byte 102,15,56,222,248 +.byte 102,68,15,56,222,192 +.byte 102,68,15,56,222,200 + movups 64-112(%rcx),%xmm0 + nop +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 +.byte 102,15,56,222,225 +.byte 102,15,56,222,233 +.byte 102,15,56,222,241 +.byte 102,15,56,222,249 +.byte 102,68,15,56,222,193 +.byte 102,68,15,56,222,201 + movups 80-112(%rcx),%xmm1 + nop +.byte 102,15,56,222,208 +.byte 102,15,56,222,216 +.byte 102,15,56,222,224 +.byte 102,15,56,222,232 +.byte 102,15,56,222,240 +.byte 102,15,56,222,248 +.byte 102,68,15,56,222,192 +.byte 102,68,15,56,222,200 + movups 96-112(%rcx),%xmm0 + nop +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 +.byte 102,15,56,222,225 +.byte 102,15,56,222,233 +.byte 102,15,56,222,241 +.byte 102,15,56,222,249 +.byte 102,68,15,56,222,193 +.byte 102,68,15,56,222,201 + movups 112-112(%rcx),%xmm1 + nop +.byte 102,15,56,222,208 +.byte 102,15,56,222,216 +.byte 102,15,56,222,224 +.byte 102,15,56,222,232 +.byte 102,15,56,222,240 +.byte 102,15,56,222,248 +.byte 102,68,15,56,222,192 +.byte 102,68,15,56,222,200 + movups 128-112(%rcx),%xmm0 + nop +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 +.byte 102,15,56,222,225 +.byte 102,15,56,222,233 +.byte 102,15,56,222,241 +.byte 102,15,56,222,249 +.byte 102,68,15,56,222,193 +.byte 102,68,15,56,222,201 + movups 144-112(%rcx),%xmm1 + cmpl $11,%eax +.byte 102,15,56,222,208 +.byte 102,15,56,222,216 +.byte 102,15,56,222,224 +.byte 102,15,56,222,232 +.byte 102,15,56,222,240 +.byte 102,15,56,222,248 +.byte 102,68,15,56,222,192 +.byte 102,68,15,56,222,200 + movups 160-112(%rcx),%xmm0 + jb .Lcbc_dec_done +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 +.byte 102,15,56,222,225 +.byte 102,15,56,222,233 +.byte 102,15,56,222,241 +.byte 102,15,56,222,249 +.byte 102,68,15,56,222,193 +.byte 102,68,15,56,222,201 + movups 176-112(%rcx),%xmm1 + nop +.byte 102,15,56,222,208 +.byte 102,15,56,222,216 +.byte 102,15,56,222,224 +.byte 102,15,56,222,232 +.byte 102,15,56,222,240 +.byte 102,15,56,222,248 +.byte 102,68,15,56,222,192 +.byte 102,68,15,56,222,200 + movups 192-112(%rcx),%xmm0 + je .Lcbc_dec_done +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 +.byte 102,15,56,222,225 +.byte 102,15,56,222,233 +.byte 102,15,56,222,241 +.byte 102,15,56,222,249 +.byte 102,68,15,56,222,193 +.byte 102,68,15,56,222,201 + movups 208-112(%rcx),%xmm1 + nop +.byte 102,15,56,222,208 +.byte 102,15,56,222,216 +.byte 102,15,56,222,224 +.byte 102,15,56,222,232 +.byte 102,15,56,222,240 +.byte 102,15,56,222,248 +.byte 102,68,15,56,222,192 +.byte 102,68,15,56,222,200 + movups 224-112(%rcx),%xmm0 + jmp .Lcbc_dec_done +.align 16 +.Lcbc_dec_done: +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 + pxor %xmm0,%xmm10 + pxor %xmm0,%xmm11 +.byte 102,15,56,222,225 +.byte 102,15,56,222,233 + pxor %xmm0,%xmm12 + pxor %xmm0,%xmm13 +.byte 102,15,56,222,241 +.byte 102,15,56,222,249 + pxor %xmm0,%xmm14 + pxor %xmm0,%xmm15 +.byte 102,68,15,56,222,193 +.byte 102,68,15,56,222,201 + movdqu 80(%rdi),%xmm1 + +.byte 102,65,15,56,223,210 + movdqu 96(%rdi),%xmm10 + pxor %xmm0,%xmm1 +.byte 102,65,15,56,223,219 + pxor %xmm0,%xmm10 + movdqu 112(%rdi),%xmm0 +.byte 102,65,15,56,223,228 + leaq 128(%rdi),%rdi + movdqu 0(%r11),%xmm11 +.byte 102,65,15,56,223,237 +.byte 102,65,15,56,223,246 + movdqu 16(%r11),%xmm12 + movdqu 32(%r11),%xmm13 +.byte 102,65,15,56,223,255 +.byte 102,68,15,56,223,193 + movdqu 48(%r11),%xmm14 + movdqu 64(%r11),%xmm15 +.byte 102,69,15,56,223,202 + movdqa %xmm0,%xmm10 + movdqu 80(%r11),%xmm1 + movups -112(%rcx),%xmm0 + + movups %xmm2,(%rsi) + movdqa %xmm11,%xmm2 + movups %xmm3,16(%rsi) + movdqa %xmm12,%xmm3 + movups %xmm4,32(%rsi) + movdqa %xmm13,%xmm4 + movups %xmm5,48(%rsi) + movdqa %xmm14,%xmm5 + movups %xmm6,64(%rsi) + movdqa %xmm15,%xmm6 + movups %xmm7,80(%rsi) + movdqa %xmm1,%xmm7 + movups %xmm8,96(%rsi) + leaq 112(%rsi),%rsi + + subq $128,%rdx + ja .Lcbc_dec_loop8 + + movaps %xmm9,%xmm2 + leaq -112(%rcx),%rcx + addq $112,%rdx + jle .Lcbc_dec_tail_collected + movups %xmm9,(%rsi) + leaq 16(%rsi),%rsi + cmpq $80,%rdx + jbe .Lcbc_dec_tail + + movaps %xmm11,%xmm2 +.Lcbc_dec_six_or_seven: + cmpq $96,%rdx + ja .Lcbc_dec_seven + + movaps %xmm7,%xmm8 + call _aesni_decrypt6 + pxor %xmm10,%xmm2 + movaps %xmm8,%xmm10 + pxor %xmm11,%xmm3 + movdqu %xmm2,(%rsi) + pxor %xmm12,%xmm4 + movdqu %xmm3,16(%rsi) + pxor %xmm13,%xmm5 + movdqu %xmm4,32(%rsi) + pxor %xmm14,%xmm6 + movdqu %xmm5,48(%rsi) + pxor %xmm15,%xmm7 + movdqu %xmm6,64(%rsi) + leaq 80(%rsi),%rsi + movdqa %xmm7,%xmm2 + jmp .Lcbc_dec_tail_collected + +.align 16 +.Lcbc_dec_seven: + movups 96(%rdi),%xmm8 + xorps %xmm9,%xmm9 + call _aesni_decrypt8 + movups 80(%rdi),%xmm9 + pxor %xmm10,%xmm2 + movups 96(%rdi),%xmm10 + pxor %xmm11,%xmm3 + movdqu %xmm2,(%rsi) + pxor %xmm12,%xmm4 + movdqu %xmm3,16(%rsi) + pxor %xmm13,%xmm5 + movdqu %xmm4,32(%rsi) + pxor %xmm14,%xmm6 + movdqu %xmm5,48(%rsi) + pxor %xmm15,%xmm7 + movdqu %xmm6,64(%rsi) + pxor %xmm9,%xmm8 + movdqu %xmm7,80(%rsi) + leaq 96(%rsi),%rsi + movdqa %xmm8,%xmm2 + jmp .Lcbc_dec_tail_collected + +.align 16 +.Lcbc_dec_loop6: + movups %xmm7,(%rsi) + leaq 16(%rsi),%rsi + movdqu 0(%rdi),%xmm2 + movdqu 16(%rdi),%xmm3 + movdqa %xmm2,%xmm11 + movdqu 32(%rdi),%xmm4 + movdqa %xmm3,%xmm12 + movdqu 48(%rdi),%xmm5 + movdqa %xmm4,%xmm13 + movdqu 64(%rdi),%xmm6 + movdqa %xmm5,%xmm14 + movdqu 80(%rdi),%xmm7 + movdqa %xmm6,%xmm15 +.Lcbc_dec_loop6_enter: + leaq 96(%rdi),%rdi + movdqa %xmm7,%xmm8 + + call _aesni_decrypt6 + + pxor %xmm10,%xmm2 + movdqa %xmm8,%xmm10 + pxor %xmm11,%xmm3 + movdqu %xmm2,(%rsi) + pxor %xmm12,%xmm4 + movdqu %xmm3,16(%rsi) + pxor %xmm13,%xmm5 + movdqu %xmm4,32(%rsi) + pxor %xmm14,%xmm6 + movq %r11,%rcx + movdqu %xmm5,48(%rsi) + pxor %xmm15,%xmm7 + movl %r10d,%eax + movdqu %xmm6,64(%rsi) + leaq 80(%rsi),%rsi + subq $96,%rdx + ja .Lcbc_dec_loop6 + + movdqa %xmm7,%xmm2 + addq $80,%rdx + jle .Lcbc_dec_tail_collected + movups %xmm7,(%rsi) + leaq 16(%rsi),%rsi + +.Lcbc_dec_tail: + movups (%rdi),%xmm2 + subq $16,%rdx + jbe .Lcbc_dec_one + + movups 16(%rdi),%xmm3 + movaps %xmm2,%xmm11 + subq $16,%rdx + jbe .Lcbc_dec_two + + movups 32(%rdi),%xmm4 + movaps %xmm3,%xmm12 + subq $16,%rdx + jbe .Lcbc_dec_three + + movups 48(%rdi),%xmm5 + movaps %xmm4,%xmm13 + subq $16,%rdx + jbe .Lcbc_dec_four + + movups 64(%rdi),%xmm6 + movaps %xmm5,%xmm14 + movaps %xmm6,%xmm15 + xorps %xmm7,%xmm7 + call _aesni_decrypt6 + pxor %xmm10,%xmm2 + movaps %xmm15,%xmm10 + pxor %xmm11,%xmm3 + movdqu %xmm2,(%rsi) + pxor %xmm12,%xmm4 + movdqu %xmm3,16(%rsi) + pxor %xmm13,%xmm5 + movdqu %xmm4,32(%rsi) + pxor %xmm14,%xmm6 + movdqu %xmm5,48(%rsi) + leaq 64(%rsi),%rsi + movdqa %xmm6,%xmm2 + subq $16,%rdx + jmp .Lcbc_dec_tail_collected + +.align 16 +.Lcbc_dec_one: + movaps %xmm2,%xmm11 + movups (%rcx),%xmm0 + movups 16(%rcx),%xmm1 + leaq 32(%rcx),%rcx + xorps %xmm0,%xmm2 +.Loop_dec1_16: +.byte 102,15,56,222,209 + decl %eax + movups (%rcx),%xmm1 + leaq 16(%rcx),%rcx + jnz .Loop_dec1_16 +.byte 102,15,56,223,209 + xorps %xmm10,%xmm2 + movaps %xmm11,%xmm10 + jmp .Lcbc_dec_tail_collected +.align 16 +.Lcbc_dec_two: + movaps %xmm3,%xmm12 + call _aesni_decrypt2 + pxor %xmm10,%xmm2 + movaps %xmm12,%xmm10 + pxor %xmm11,%xmm3 + movdqu %xmm2,(%rsi) + movdqa %xmm3,%xmm2 + leaq 16(%rsi),%rsi + jmp .Lcbc_dec_tail_collected +.align 16 +.Lcbc_dec_three: + movaps %xmm4,%xmm13 + call _aesni_decrypt3 + pxor %xmm10,%xmm2 + movaps %xmm13,%xmm10 + pxor %xmm11,%xmm3 + movdqu %xmm2,(%rsi) + pxor %xmm12,%xmm4 + movdqu %xmm3,16(%rsi) + movdqa %xmm4,%xmm2 + leaq 32(%rsi),%rsi + jmp .Lcbc_dec_tail_collected +.align 16 +.Lcbc_dec_four: + movaps %xmm5,%xmm14 + call _aesni_decrypt4 + pxor %xmm10,%xmm2 + movaps %xmm14,%xmm10 + pxor %xmm11,%xmm3 + movdqu %xmm2,(%rsi) + pxor %xmm12,%xmm4 + movdqu %xmm3,16(%rsi) + pxor %xmm13,%xmm5 + movdqu %xmm4,32(%rsi) + movdqa %xmm5,%xmm2 + leaq 48(%rsi),%rsi + jmp .Lcbc_dec_tail_collected + +.align 16 +.Lcbc_dec_tail_collected: + movups %xmm10,(%r8) + andq $15,%rdx + jnz .Lcbc_dec_tail_partial + movups %xmm2,(%rsi) + jmp .Lcbc_dec_ret +.align 16 +.Lcbc_dec_tail_partial: + movaps %xmm2,(%rsp) + movq $16,%rcx + movq %rsi,%rdi + subq %rdx,%rcx + leaq (%rsp),%rsi +.long 0x9066A4F3 + +.Lcbc_dec_ret: + leaq (%rbp),%rsp + popq %rbp +.Lcbc_ret: + .byte 0xf3,0xc3 +.size aesni_cbc_encrypt,.-aesni_cbc_encrypt +.globl aesni_set_decrypt_key +.type aesni_set_decrypt_key,@function +.align 16 +aesni_set_decrypt_key: +.byte 0x48,0x83,0xEC,0x08 + call __aesni_set_encrypt_key + shll $4,%esi + testl %eax,%eax + jnz .Ldec_key_ret + leaq 16(%rdx,%rsi,1),%rdi + + movups (%rdx),%xmm0 + movups (%rdi),%xmm1 + movups %xmm0,(%rdi) + movups %xmm1,(%rdx) + leaq 16(%rdx),%rdx + leaq -16(%rdi),%rdi + +.Ldec_key_inverse: + movups (%rdx),%xmm0 + movups (%rdi),%xmm1 +.byte 102,15,56,219,192 +.byte 102,15,56,219,201 + leaq 16(%rdx),%rdx + leaq -16(%rdi),%rdi + movups %xmm0,16(%rdi) + movups %xmm1,-16(%rdx) + cmpq %rdx,%rdi + ja .Ldec_key_inverse + + movups (%rdx),%xmm0 +.byte 102,15,56,219,192 + movups %xmm0,(%rdi) +.Ldec_key_ret: + addq $8,%rsp + .byte 0xf3,0xc3 +.LSEH_end_set_decrypt_key: +.size aesni_set_decrypt_key,.-aesni_set_decrypt_key +.globl aesni_set_encrypt_key +.type aesni_set_encrypt_key,@function +.align 16 +aesni_set_encrypt_key: +__aesni_set_encrypt_key: +.byte 0x48,0x83,0xEC,0x08 + movq $-1,%rax + testq %rdi,%rdi + jz .Lenc_key_ret + testq %rdx,%rdx + jz .Lenc_key_ret + + movups (%rdi),%xmm0 + xorps %xmm4,%xmm4 + leaq 16(%rdx),%rax + cmpl $256,%esi + je .L14rounds + cmpl $192,%esi + je .L12rounds + cmpl $128,%esi + jne .Lbad_keybits + +.L10rounds: + movl $9,%esi + movups %xmm0,(%rdx) +.byte 102,15,58,223,200,1 + call .Lkey_expansion_128_cold +.byte 102,15,58,223,200,2 + call .Lkey_expansion_128 +.byte 102,15,58,223,200,4 + call .Lkey_expansion_128 +.byte 102,15,58,223,200,8 + call .Lkey_expansion_128 +.byte 102,15,58,223,200,16 + call .Lkey_expansion_128 +.byte 102,15,58,223,200,32 + call .Lkey_expansion_128 +.byte 102,15,58,223,200,64 + call .Lkey_expansion_128 +.byte 102,15,58,223,200,128 + call .Lkey_expansion_128 +.byte 102,15,58,223,200,27 + call .Lkey_expansion_128 +.byte 102,15,58,223,200,54 + call .Lkey_expansion_128 + movups %xmm0,(%rax) + movl %esi,80(%rax) + xorl %eax,%eax + jmp .Lenc_key_ret + +.align 16 +.L12rounds: + movq 16(%rdi),%xmm2 + movl $11,%esi + movups %xmm0,(%rdx) +.byte 102,15,58,223,202,1 + call .Lkey_expansion_192a_cold +.byte 102,15,58,223,202,2 + call .Lkey_expansion_192b +.byte 102,15,58,223,202,4 + call .Lkey_expansion_192a +.byte 102,15,58,223,202,8 + call .Lkey_expansion_192b +.byte 102,15,58,223,202,16 + call .Lkey_expansion_192a +.byte 102,15,58,223,202,32 + call .Lkey_expansion_192b +.byte 102,15,58,223,202,64 + call .Lkey_expansion_192a +.byte 102,15,58,223,202,128 + call .Lkey_expansion_192b + movups %xmm0,(%rax) + movl %esi,48(%rax) + xorq %rax,%rax + jmp .Lenc_key_ret + +.align 16 +.L14rounds: + movups 16(%rdi),%xmm2 + movl $13,%esi + leaq 16(%rax),%rax + movups %xmm0,(%rdx) + movups %xmm2,16(%rdx) +.byte 102,15,58,223,202,1 + call .Lkey_expansion_256a_cold +.byte 102,15,58,223,200,1 + call .Lkey_expansion_256b +.byte 102,15,58,223,202,2 + call .Lkey_expansion_256a +.byte 102,15,58,223,200,2 + call .Lkey_expansion_256b +.byte 102,15,58,223,202,4 + call .Lkey_expansion_256a +.byte 102,15,58,223,200,4 + call .Lkey_expansion_256b +.byte 102,15,58,223,202,8 + call .Lkey_expansion_256a +.byte 102,15,58,223,200,8 + call .Lkey_expansion_256b +.byte 102,15,58,223,202,16 + call .Lkey_expansion_256a +.byte 102,15,58,223,200,16 + call .Lkey_expansion_256b +.byte 102,15,58,223,202,32 + call .Lkey_expansion_256a +.byte 102,15,58,223,200,32 + call .Lkey_expansion_256b +.byte 102,15,58,223,202,64 + call .Lkey_expansion_256a + movups %xmm0,(%rax) + movl %esi,16(%rax) + xorq %rax,%rax + jmp .Lenc_key_ret + +.align 16 +.Lbad_keybits: + movq $-2,%rax +.Lenc_key_ret: + addq $8,%rsp + .byte 0xf3,0xc3 +.LSEH_end_set_encrypt_key: + +.align 16 +.Lkey_expansion_128: + movups %xmm0,(%rax) + leaq 16(%rax),%rax +.Lkey_expansion_128_cold: + shufps $16,%xmm0,%xmm4 + xorps %xmm4,%xmm0 + shufps $140,%xmm0,%xmm4 + xorps %xmm4,%xmm0 + shufps $255,%xmm1,%xmm1 + xorps %xmm1,%xmm0 + .byte 0xf3,0xc3 + +.align 16 +.Lkey_expansion_192a: + movups %xmm0,(%rax) + leaq 16(%rax),%rax +.Lkey_expansion_192a_cold: + movaps %xmm2,%xmm5 +.Lkey_expansion_192b_warm: + shufps $16,%xmm0,%xmm4 + movdqa %xmm2,%xmm3 + xorps %xmm4,%xmm0 + shufps $140,%xmm0,%xmm4 + pslldq $4,%xmm3 + xorps %xmm4,%xmm0 + pshufd $85,%xmm1,%xmm1 + pxor %xmm3,%xmm2 + pxor %xmm1,%xmm0 + pshufd $255,%xmm0,%xmm3 + pxor %xmm3,%xmm2 + .byte 0xf3,0xc3 + +.align 16 +.Lkey_expansion_192b: + movaps %xmm0,%xmm3 + shufps $68,%xmm0,%xmm5 + movups %xmm5,(%rax) + shufps $78,%xmm2,%xmm3 + movups %xmm3,16(%rax) + leaq 32(%rax),%rax + jmp .Lkey_expansion_192b_warm + +.align 16 +.Lkey_expansion_256a: + movups %xmm2,(%rax) + leaq 16(%rax),%rax +.Lkey_expansion_256a_cold: + shufps $16,%xmm0,%xmm4 + xorps %xmm4,%xmm0 + shufps $140,%xmm0,%xmm4 + xorps %xmm4,%xmm0 + shufps $255,%xmm1,%xmm1 + xorps %xmm1,%xmm0 + .byte 0xf3,0xc3 + +.align 16 +.Lkey_expansion_256b: + movups %xmm0,(%rax) + leaq 16(%rax),%rax + + shufps $16,%xmm2,%xmm4 + xorps %xmm4,%xmm2 + shufps $140,%xmm2,%xmm4 + xorps %xmm4,%xmm2 + shufps $170,%xmm1,%xmm1 + xorps %xmm1,%xmm2 + .byte 0xf3,0xc3 +.size aesni_set_encrypt_key,.-aesni_set_encrypt_key +.size __aesni_set_encrypt_key,.-__aesni_set_encrypt_key +.align 64 +.Lbswap_mask: +.byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0 +.Lincrement32: +.long 6,6,6,0 +.Lincrement64: +.long 1,0,0,0 +.Lxts_magic: +.long 0x87,0,1,0 +.Lincrement1: +.byte 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1 + +.byte 65,69,83,32,102,111,114,32,73,110,116,101,108,32,65,69,83,45,78,73,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 +.align 64 diff --git a/deps/openssl/asm_obsolete/x64-elf-gas/aes/bsaes-x86_64.s b/deps/openssl/asm_obsolete/x64-elf-gas/aes/bsaes-x86_64.s new file mode 100644 index 00000000000000..5b363a5eef9020 --- /dev/null +++ b/deps/openssl/asm_obsolete/x64-elf-gas/aes/bsaes-x86_64.s @@ -0,0 +1,2498 @@ +.text + + + + +.type _bsaes_encrypt8,@function +.align 64 +_bsaes_encrypt8: + leaq .LBS0(%rip),%r11 + + movdqa (%rax),%xmm8 + leaq 16(%rax),%rax + movdqa 80(%r11),%xmm7 + pxor %xmm8,%xmm15 + pxor %xmm8,%xmm0 + pxor %xmm8,%xmm1 + pxor %xmm8,%xmm2 +.byte 102,68,15,56,0,255 +.byte 102,15,56,0,199 + pxor %xmm8,%xmm3 + pxor %xmm8,%xmm4 +.byte 102,15,56,0,207 +.byte 102,15,56,0,215 + pxor %xmm8,%xmm5 + pxor %xmm8,%xmm6 +.byte 102,15,56,0,223 +.byte 102,15,56,0,231 +.byte 102,15,56,0,239 +.byte 102,15,56,0,247 +_bsaes_encrypt8_bitslice: + movdqa 0(%r11),%xmm7 + movdqa 16(%r11),%xmm8 + movdqa %xmm5,%xmm9 + psrlq $1,%xmm5 + movdqa %xmm3,%xmm10 + psrlq $1,%xmm3 + pxor %xmm6,%xmm5 + pxor %xmm4,%xmm3 + pand %xmm7,%xmm5 + pand %xmm7,%xmm3 + pxor %xmm5,%xmm6 + psllq $1,%xmm5 + pxor %xmm3,%xmm4 + psllq $1,%xmm3 + pxor %xmm9,%xmm5 + pxor %xmm10,%xmm3 + movdqa %xmm1,%xmm9 + psrlq $1,%xmm1 + movdqa %xmm15,%xmm10 + psrlq $1,%xmm15 + pxor %xmm2,%xmm1 + pxor %xmm0,%xmm15 + pand %xmm7,%xmm1 + pand %xmm7,%xmm15 + pxor %xmm1,%xmm2 + psllq $1,%xmm1 + pxor %xmm15,%xmm0 + psllq $1,%xmm15 + pxor %xmm9,%xmm1 + pxor %xmm10,%xmm15 + movdqa 32(%r11),%xmm7 + movdqa %xmm4,%xmm9 + psrlq $2,%xmm4 + movdqa %xmm3,%xmm10 + psrlq $2,%xmm3 + pxor %xmm6,%xmm4 + pxor %xmm5,%xmm3 + pand %xmm8,%xmm4 + pand %xmm8,%xmm3 + pxor %xmm4,%xmm6 + psllq $2,%xmm4 + pxor %xmm3,%xmm5 + psllq $2,%xmm3 + pxor %xmm9,%xmm4 + pxor %xmm10,%xmm3 + movdqa %xmm0,%xmm9 + psrlq $2,%xmm0 + movdqa %xmm15,%xmm10 + psrlq $2,%xmm15 + pxor %xmm2,%xmm0 + pxor %xmm1,%xmm15 + pand %xmm8,%xmm0 + pand %xmm8,%xmm15 + pxor %xmm0,%xmm2 + psllq $2,%xmm0 + pxor %xmm15,%xmm1 + psllq $2,%xmm15 + pxor %xmm9,%xmm0 + pxor %xmm10,%xmm15 + movdqa %xmm2,%xmm9 + psrlq $4,%xmm2 + movdqa %xmm1,%xmm10 + psrlq $4,%xmm1 + pxor %xmm6,%xmm2 + pxor %xmm5,%xmm1 + pand %xmm7,%xmm2 + pand %xmm7,%xmm1 + pxor %xmm2,%xmm6 + psllq $4,%xmm2 + pxor %xmm1,%xmm5 + psllq $4,%xmm1 + pxor %xmm9,%xmm2 + pxor %xmm10,%xmm1 + movdqa %xmm0,%xmm9 + psrlq $4,%xmm0 + movdqa %xmm15,%xmm10 + psrlq $4,%xmm15 + pxor %xmm4,%xmm0 + pxor %xmm3,%xmm15 + pand %xmm7,%xmm0 + pand %xmm7,%xmm15 + pxor %xmm0,%xmm4 + psllq $4,%xmm0 + pxor %xmm15,%xmm3 + psllq $4,%xmm15 + pxor %xmm9,%xmm0 + pxor %xmm10,%xmm15 + decl %r10d + jmp .Lenc_sbox +.align 16 +.Lenc_loop: + pxor 0(%rax),%xmm15 + pxor 16(%rax),%xmm0 + pxor 32(%rax),%xmm1 + pxor 48(%rax),%xmm2 +.byte 102,68,15,56,0,255 +.byte 102,15,56,0,199 + pxor 64(%rax),%xmm3 + pxor 80(%rax),%xmm4 +.byte 102,15,56,0,207 +.byte 102,15,56,0,215 + pxor 96(%rax),%xmm5 + pxor 112(%rax),%xmm6 +.byte 102,15,56,0,223 +.byte 102,15,56,0,231 +.byte 102,15,56,0,239 +.byte 102,15,56,0,247 + leaq 128(%rax),%rax +.Lenc_sbox: + pxor %xmm5,%xmm4 + pxor %xmm0,%xmm1 + pxor %xmm15,%xmm2 + pxor %xmm1,%xmm5 + pxor %xmm15,%xmm4 + + pxor %xmm2,%xmm5 + pxor %xmm6,%xmm2 + pxor %xmm4,%xmm6 + pxor %xmm3,%xmm2 + pxor %xmm4,%xmm3 + pxor %xmm0,%xmm2 + + pxor %xmm6,%xmm1 + pxor %xmm4,%xmm0 + movdqa %xmm6,%xmm10 + movdqa %xmm0,%xmm9 + movdqa %xmm4,%xmm8 + movdqa %xmm1,%xmm12 + movdqa %xmm5,%xmm11 + + pxor %xmm3,%xmm10 + pxor %xmm1,%xmm9 + pxor %xmm2,%xmm8 + movdqa %xmm10,%xmm13 + pxor %xmm3,%xmm12 + movdqa %xmm9,%xmm7 + pxor %xmm15,%xmm11 + movdqa %xmm10,%xmm14 + + por %xmm8,%xmm9 + por %xmm11,%xmm10 + pxor %xmm7,%xmm14 + pand %xmm11,%xmm13 + pxor %xmm8,%xmm11 + pand %xmm8,%xmm7 + pand %xmm11,%xmm14 + movdqa %xmm2,%xmm11 + pxor %xmm15,%xmm11 + pand %xmm11,%xmm12 + pxor %xmm12,%xmm10 + pxor %xmm12,%xmm9 + movdqa %xmm6,%xmm12 + movdqa %xmm4,%xmm11 + pxor %xmm0,%xmm12 + pxor %xmm5,%xmm11 + movdqa %xmm12,%xmm8 + pand %xmm11,%xmm12 + por %xmm11,%xmm8 + pxor %xmm12,%xmm7 + pxor %xmm14,%xmm10 + pxor %xmm13,%xmm9 + pxor %xmm14,%xmm8 + movdqa %xmm1,%xmm11 + pxor %xmm13,%xmm7 + movdqa %xmm3,%xmm12 + pxor %xmm13,%xmm8 + movdqa %xmm0,%xmm13 + pand %xmm2,%xmm11 + movdqa %xmm6,%xmm14 + pand %xmm15,%xmm12 + pand %xmm4,%xmm13 + por %xmm5,%xmm14 + pxor %xmm11,%xmm10 + pxor %xmm12,%xmm9 + pxor %xmm13,%xmm8 + pxor %xmm14,%xmm7 + + + + + + movdqa %xmm10,%xmm11 + pand %xmm8,%xmm10 + pxor %xmm9,%xmm11 + + movdqa %xmm7,%xmm13 + movdqa %xmm11,%xmm14 + pxor %xmm10,%xmm13 + pand %xmm13,%xmm14 + + movdqa %xmm8,%xmm12 + pxor %xmm9,%xmm14 + pxor %xmm7,%xmm12 + + pxor %xmm9,%xmm10 + + pand %xmm10,%xmm12 + + movdqa %xmm13,%xmm9 + pxor %xmm7,%xmm12 + + pxor %xmm12,%xmm9 + pxor %xmm12,%xmm8 + + pand %xmm7,%xmm9 + + pxor %xmm9,%xmm13 + pxor %xmm9,%xmm8 + + pand %xmm14,%xmm13 + + pxor %xmm11,%xmm13 + movdqa %xmm5,%xmm11 + movdqa %xmm4,%xmm7 + movdqa %xmm14,%xmm9 + pxor %xmm13,%xmm9 + pand %xmm5,%xmm9 + pxor %xmm4,%xmm5 + pand %xmm14,%xmm4 + pand %xmm13,%xmm5 + pxor %xmm4,%xmm5 + pxor %xmm9,%xmm4 + pxor %xmm15,%xmm11 + pxor %xmm2,%xmm7 + pxor %xmm12,%xmm14 + pxor %xmm8,%xmm13 + movdqa %xmm14,%xmm10 + movdqa %xmm12,%xmm9 + pxor %xmm13,%xmm10 + pxor %xmm8,%xmm9 + pand %xmm11,%xmm10 + pand %xmm15,%xmm9 + pxor %xmm7,%xmm11 + pxor %xmm2,%xmm15 + pand %xmm14,%xmm7 + pand %xmm12,%xmm2 + pand %xmm13,%xmm11 + pand %xmm8,%xmm15 + pxor %xmm11,%xmm7 + pxor %xmm2,%xmm15 + pxor %xmm10,%xmm11 + pxor %xmm9,%xmm2 + pxor %xmm11,%xmm5 + pxor %xmm11,%xmm15 + pxor %xmm7,%xmm4 + pxor %xmm7,%xmm2 + + movdqa %xmm6,%xmm11 + movdqa %xmm0,%xmm7 + pxor %xmm3,%xmm11 + pxor %xmm1,%xmm7 + movdqa %xmm14,%xmm10 + movdqa %xmm12,%xmm9 + pxor %xmm13,%xmm10 + pxor %xmm8,%xmm9 + pand %xmm11,%xmm10 + pand %xmm3,%xmm9 + pxor %xmm7,%xmm11 + pxor %xmm1,%xmm3 + pand %xmm14,%xmm7 + pand %xmm12,%xmm1 + pand %xmm13,%xmm11 + pand %xmm8,%xmm3 + pxor %xmm11,%xmm7 + pxor %xmm1,%xmm3 + pxor %xmm10,%xmm11 + pxor %xmm9,%xmm1 + pxor %xmm12,%xmm14 + pxor %xmm8,%xmm13 + movdqa %xmm14,%xmm10 + pxor %xmm13,%xmm10 + pand %xmm6,%xmm10 + pxor %xmm0,%xmm6 + pand %xmm14,%xmm0 + pand %xmm13,%xmm6 + pxor %xmm0,%xmm6 + pxor %xmm10,%xmm0 + pxor %xmm11,%xmm6 + pxor %xmm11,%xmm3 + pxor %xmm7,%xmm0 + pxor %xmm7,%xmm1 + pxor %xmm15,%xmm6 + pxor %xmm5,%xmm0 + pxor %xmm6,%xmm3 + pxor %xmm15,%xmm5 + pxor %xmm0,%xmm15 + + pxor %xmm4,%xmm0 + pxor %xmm1,%xmm4 + pxor %xmm2,%xmm1 + pxor %xmm4,%xmm2 + pxor %xmm4,%xmm3 + + pxor %xmm2,%xmm5 + decl %r10d + jl .Lenc_done + pshufd $147,%xmm15,%xmm7 + pshufd $147,%xmm0,%xmm8 + pxor %xmm7,%xmm15 + pshufd $147,%xmm3,%xmm9 + pxor %xmm8,%xmm0 + pshufd $147,%xmm5,%xmm10 + pxor %xmm9,%xmm3 + pshufd $147,%xmm2,%xmm11 + pxor %xmm10,%xmm5 + pshufd $147,%xmm6,%xmm12 + pxor %xmm11,%xmm2 + pshufd $147,%xmm1,%xmm13 + pxor %xmm12,%xmm6 + pshufd $147,%xmm4,%xmm14 + pxor %xmm13,%xmm1 + pxor %xmm14,%xmm4 + + pxor %xmm15,%xmm8 + pxor %xmm4,%xmm7 + pxor %xmm4,%xmm8 + pshufd $78,%xmm15,%xmm15 + pxor %xmm0,%xmm9 + pshufd $78,%xmm0,%xmm0 + pxor %xmm2,%xmm12 + pxor %xmm7,%xmm15 + pxor %xmm6,%xmm13 + pxor %xmm8,%xmm0 + pxor %xmm5,%xmm11 + pshufd $78,%xmm2,%xmm7 + pxor %xmm1,%xmm14 + pshufd $78,%xmm6,%xmm8 + pxor %xmm3,%xmm10 + pshufd $78,%xmm5,%xmm2 + pxor %xmm4,%xmm10 + pshufd $78,%xmm4,%xmm6 + pxor %xmm4,%xmm11 + pshufd $78,%xmm1,%xmm5 + pxor %xmm11,%xmm7 + pshufd $78,%xmm3,%xmm1 + pxor %xmm12,%xmm8 + pxor %xmm10,%xmm2 + pxor %xmm14,%xmm6 + pxor %xmm13,%xmm5 + movdqa %xmm7,%xmm3 + pxor %xmm9,%xmm1 + movdqa %xmm8,%xmm4 + movdqa 48(%r11),%xmm7 + jnz .Lenc_loop + movdqa 64(%r11),%xmm7 + jmp .Lenc_loop +.align 16 +.Lenc_done: + movdqa 0(%r11),%xmm7 + movdqa 16(%r11),%xmm8 + movdqa %xmm1,%xmm9 + psrlq $1,%xmm1 + movdqa %xmm2,%xmm10 + psrlq $1,%xmm2 + pxor %xmm4,%xmm1 + pxor %xmm6,%xmm2 + pand %xmm7,%xmm1 + pand %xmm7,%xmm2 + pxor %xmm1,%xmm4 + psllq $1,%xmm1 + pxor %xmm2,%xmm6 + psllq $1,%xmm2 + pxor %xmm9,%xmm1 + pxor %xmm10,%xmm2 + movdqa %xmm3,%xmm9 + psrlq $1,%xmm3 + movdqa %xmm15,%xmm10 + psrlq $1,%xmm15 + pxor %xmm5,%xmm3 + pxor %xmm0,%xmm15 + pand %xmm7,%xmm3 + pand %xmm7,%xmm15 + pxor %xmm3,%xmm5 + psllq $1,%xmm3 + pxor %xmm15,%xmm0 + psllq $1,%xmm15 + pxor %xmm9,%xmm3 + pxor %xmm10,%xmm15 + movdqa 32(%r11),%xmm7 + movdqa %xmm6,%xmm9 + psrlq $2,%xmm6 + movdqa %xmm2,%xmm10 + psrlq $2,%xmm2 + pxor %xmm4,%xmm6 + pxor %xmm1,%xmm2 + pand %xmm8,%xmm6 + pand %xmm8,%xmm2 + pxor %xmm6,%xmm4 + psllq $2,%xmm6 + pxor %xmm2,%xmm1 + psllq $2,%xmm2 + pxor %xmm9,%xmm6 + pxor %xmm10,%xmm2 + movdqa %xmm0,%xmm9 + psrlq $2,%xmm0 + movdqa %xmm15,%xmm10 + psrlq $2,%xmm15 + pxor %xmm5,%xmm0 + pxor %xmm3,%xmm15 + pand %xmm8,%xmm0 + pand %xmm8,%xmm15 + pxor %xmm0,%xmm5 + psllq $2,%xmm0 + pxor %xmm15,%xmm3 + psllq $2,%xmm15 + pxor %xmm9,%xmm0 + pxor %xmm10,%xmm15 + movdqa %xmm5,%xmm9 + psrlq $4,%xmm5 + movdqa %xmm3,%xmm10 + psrlq $4,%xmm3 + pxor %xmm4,%xmm5 + pxor %xmm1,%xmm3 + pand %xmm7,%xmm5 + pand %xmm7,%xmm3 + pxor %xmm5,%xmm4 + psllq $4,%xmm5 + pxor %xmm3,%xmm1 + psllq $4,%xmm3 + pxor %xmm9,%xmm5 + pxor %xmm10,%xmm3 + movdqa %xmm0,%xmm9 + psrlq $4,%xmm0 + movdqa %xmm15,%xmm10 + psrlq $4,%xmm15 + pxor %xmm6,%xmm0 + pxor %xmm2,%xmm15 + pand %xmm7,%xmm0 + pand %xmm7,%xmm15 + pxor %xmm0,%xmm6 + psllq $4,%xmm0 + pxor %xmm15,%xmm2 + psllq $4,%xmm15 + pxor %xmm9,%xmm0 + pxor %xmm10,%xmm15 + movdqa (%rax),%xmm7 + pxor %xmm7,%xmm3 + pxor %xmm7,%xmm5 + pxor %xmm7,%xmm2 + pxor %xmm7,%xmm6 + pxor %xmm7,%xmm1 + pxor %xmm7,%xmm4 + pxor %xmm7,%xmm15 + pxor %xmm7,%xmm0 + .byte 0xf3,0xc3 +.size _bsaes_encrypt8,.-_bsaes_encrypt8 + +.type _bsaes_decrypt8,@function +.align 64 +_bsaes_decrypt8: + leaq .LBS0(%rip),%r11 + + movdqa (%rax),%xmm8 + leaq 16(%rax),%rax + movdqa -48(%r11),%xmm7 + pxor %xmm8,%xmm15 + pxor %xmm8,%xmm0 + pxor %xmm8,%xmm1 + pxor %xmm8,%xmm2 +.byte 102,68,15,56,0,255 +.byte 102,15,56,0,199 + pxor %xmm8,%xmm3 + pxor %xmm8,%xmm4 +.byte 102,15,56,0,207 +.byte 102,15,56,0,215 + pxor %xmm8,%xmm5 + pxor %xmm8,%xmm6 +.byte 102,15,56,0,223 +.byte 102,15,56,0,231 +.byte 102,15,56,0,239 +.byte 102,15,56,0,247 + movdqa 0(%r11),%xmm7 + movdqa 16(%r11),%xmm8 + movdqa %xmm5,%xmm9 + psrlq $1,%xmm5 + movdqa %xmm3,%xmm10 + psrlq $1,%xmm3 + pxor %xmm6,%xmm5 + pxor %xmm4,%xmm3 + pand %xmm7,%xmm5 + pand %xmm7,%xmm3 + pxor %xmm5,%xmm6 + psllq $1,%xmm5 + pxor %xmm3,%xmm4 + psllq $1,%xmm3 + pxor %xmm9,%xmm5 + pxor %xmm10,%xmm3 + movdqa %xmm1,%xmm9 + psrlq $1,%xmm1 + movdqa %xmm15,%xmm10 + psrlq $1,%xmm15 + pxor %xmm2,%xmm1 + pxor %xmm0,%xmm15 + pand %xmm7,%xmm1 + pand %xmm7,%xmm15 + pxor %xmm1,%xmm2 + psllq $1,%xmm1 + pxor %xmm15,%xmm0 + psllq $1,%xmm15 + pxor %xmm9,%xmm1 + pxor %xmm10,%xmm15 + movdqa 32(%r11),%xmm7 + movdqa %xmm4,%xmm9 + psrlq $2,%xmm4 + movdqa %xmm3,%xmm10 + psrlq $2,%xmm3 + pxor %xmm6,%xmm4 + pxor %xmm5,%xmm3 + pand %xmm8,%xmm4 + pand %xmm8,%xmm3 + pxor %xmm4,%xmm6 + psllq $2,%xmm4 + pxor %xmm3,%xmm5 + psllq $2,%xmm3 + pxor %xmm9,%xmm4 + pxor %xmm10,%xmm3 + movdqa %xmm0,%xmm9 + psrlq $2,%xmm0 + movdqa %xmm15,%xmm10 + psrlq $2,%xmm15 + pxor %xmm2,%xmm0 + pxor %xmm1,%xmm15 + pand %xmm8,%xmm0 + pand %xmm8,%xmm15 + pxor %xmm0,%xmm2 + psllq $2,%xmm0 + pxor %xmm15,%xmm1 + psllq $2,%xmm15 + pxor %xmm9,%xmm0 + pxor %xmm10,%xmm15 + movdqa %xmm2,%xmm9 + psrlq $4,%xmm2 + movdqa %xmm1,%xmm10 + psrlq $4,%xmm1 + pxor %xmm6,%xmm2 + pxor %xmm5,%xmm1 + pand %xmm7,%xmm2 + pand %xmm7,%xmm1 + pxor %xmm2,%xmm6 + psllq $4,%xmm2 + pxor %xmm1,%xmm5 + psllq $4,%xmm1 + pxor %xmm9,%xmm2 + pxor %xmm10,%xmm1 + movdqa %xmm0,%xmm9 + psrlq $4,%xmm0 + movdqa %xmm15,%xmm10 + psrlq $4,%xmm15 + pxor %xmm4,%xmm0 + pxor %xmm3,%xmm15 + pand %xmm7,%xmm0 + pand %xmm7,%xmm15 + pxor %xmm0,%xmm4 + psllq $4,%xmm0 + pxor %xmm15,%xmm3 + psllq $4,%xmm15 + pxor %xmm9,%xmm0 + pxor %xmm10,%xmm15 + decl %r10d + jmp .Ldec_sbox +.align 16 +.Ldec_loop: + pxor 0(%rax),%xmm15 + pxor 16(%rax),%xmm0 + pxor 32(%rax),%xmm1 + pxor 48(%rax),%xmm2 +.byte 102,68,15,56,0,255 +.byte 102,15,56,0,199 + pxor 64(%rax),%xmm3 + pxor 80(%rax),%xmm4 +.byte 102,15,56,0,207 +.byte 102,15,56,0,215 + pxor 96(%rax),%xmm5 + pxor 112(%rax),%xmm6 +.byte 102,15,56,0,223 +.byte 102,15,56,0,231 +.byte 102,15,56,0,239 +.byte 102,15,56,0,247 + leaq 128(%rax),%rax +.Ldec_sbox: + pxor %xmm3,%xmm2 + + pxor %xmm6,%xmm3 + pxor %xmm6,%xmm1 + pxor %xmm3,%xmm5 + pxor %xmm5,%xmm6 + pxor %xmm6,%xmm0 + + pxor %xmm0,%xmm15 + pxor %xmm4,%xmm1 + pxor %xmm15,%xmm2 + pxor %xmm15,%xmm4 + pxor %xmm2,%xmm0 + movdqa %xmm2,%xmm10 + movdqa %xmm6,%xmm9 + movdqa %xmm0,%xmm8 + movdqa %xmm3,%xmm12 + movdqa %xmm4,%xmm11 + + pxor %xmm15,%xmm10 + pxor %xmm3,%xmm9 + pxor %xmm5,%xmm8 + movdqa %xmm10,%xmm13 + pxor %xmm15,%xmm12 + movdqa %xmm9,%xmm7 + pxor %xmm1,%xmm11 + movdqa %xmm10,%xmm14 + + por %xmm8,%xmm9 + por %xmm11,%xmm10 + pxor %xmm7,%xmm14 + pand %xmm11,%xmm13 + pxor %xmm8,%xmm11 + pand %xmm8,%xmm7 + pand %xmm11,%xmm14 + movdqa %xmm5,%xmm11 + pxor %xmm1,%xmm11 + pand %xmm11,%xmm12 + pxor %xmm12,%xmm10 + pxor %xmm12,%xmm9 + movdqa %xmm2,%xmm12 + movdqa %xmm0,%xmm11 + pxor %xmm6,%xmm12 + pxor %xmm4,%xmm11 + movdqa %xmm12,%xmm8 + pand %xmm11,%xmm12 + por %xmm11,%xmm8 + pxor %xmm12,%xmm7 + pxor %xmm14,%xmm10 + pxor %xmm13,%xmm9 + pxor %xmm14,%xmm8 + movdqa %xmm3,%xmm11 + pxor %xmm13,%xmm7 + movdqa %xmm15,%xmm12 + pxor %xmm13,%xmm8 + movdqa %xmm6,%xmm13 + pand %xmm5,%xmm11 + movdqa %xmm2,%xmm14 + pand %xmm1,%xmm12 + pand %xmm0,%xmm13 + por %xmm4,%xmm14 + pxor %xmm11,%xmm10 + pxor %xmm12,%xmm9 + pxor %xmm13,%xmm8 + pxor %xmm14,%xmm7 + + + + + + movdqa %xmm10,%xmm11 + pand %xmm8,%xmm10 + pxor %xmm9,%xmm11 + + movdqa %xmm7,%xmm13 + movdqa %xmm11,%xmm14 + pxor %xmm10,%xmm13 + pand %xmm13,%xmm14 + + movdqa %xmm8,%xmm12 + pxor %xmm9,%xmm14 + pxor %xmm7,%xmm12 + + pxor %xmm9,%xmm10 + + pand %xmm10,%xmm12 + + movdqa %xmm13,%xmm9 + pxor %xmm7,%xmm12 + + pxor %xmm12,%xmm9 + pxor %xmm12,%xmm8 + + pand %xmm7,%xmm9 + + pxor %xmm9,%xmm13 + pxor %xmm9,%xmm8 + + pand %xmm14,%xmm13 + + pxor %xmm11,%xmm13 + movdqa %xmm4,%xmm11 + movdqa %xmm0,%xmm7 + movdqa %xmm14,%xmm9 + pxor %xmm13,%xmm9 + pand %xmm4,%xmm9 + pxor %xmm0,%xmm4 + pand %xmm14,%xmm0 + pand %xmm13,%xmm4 + pxor %xmm0,%xmm4 + pxor %xmm9,%xmm0 + pxor %xmm1,%xmm11 + pxor %xmm5,%xmm7 + pxor %xmm12,%xmm14 + pxor %xmm8,%xmm13 + movdqa %xmm14,%xmm10 + movdqa %xmm12,%xmm9 + pxor %xmm13,%xmm10 + pxor %xmm8,%xmm9 + pand %xmm11,%xmm10 + pand %xmm1,%xmm9 + pxor %xmm7,%xmm11 + pxor %xmm5,%xmm1 + pand %xmm14,%xmm7 + pand %xmm12,%xmm5 + pand %xmm13,%xmm11 + pand %xmm8,%xmm1 + pxor %xmm11,%xmm7 + pxor %xmm5,%xmm1 + pxor %xmm10,%xmm11 + pxor %xmm9,%xmm5 + pxor %xmm11,%xmm4 + pxor %xmm11,%xmm1 + pxor %xmm7,%xmm0 + pxor %xmm7,%xmm5 + + movdqa %xmm2,%xmm11 + movdqa %xmm6,%xmm7 + pxor %xmm15,%xmm11 + pxor %xmm3,%xmm7 + movdqa %xmm14,%xmm10 + movdqa %xmm12,%xmm9 + pxor %xmm13,%xmm10 + pxor %xmm8,%xmm9 + pand %xmm11,%xmm10 + pand %xmm15,%xmm9 + pxor %xmm7,%xmm11 + pxor %xmm3,%xmm15 + pand %xmm14,%xmm7 + pand %xmm12,%xmm3 + pand %xmm13,%xmm11 + pand %xmm8,%xmm15 + pxor %xmm11,%xmm7 + pxor %xmm3,%xmm15 + pxor %xmm10,%xmm11 + pxor %xmm9,%xmm3 + pxor %xmm12,%xmm14 + pxor %xmm8,%xmm13 + movdqa %xmm14,%xmm10 + pxor %xmm13,%xmm10 + pand %xmm2,%xmm10 + pxor %xmm6,%xmm2 + pand %xmm14,%xmm6 + pand %xmm13,%xmm2 + pxor %xmm6,%xmm2 + pxor %xmm10,%xmm6 + pxor %xmm11,%xmm2 + pxor %xmm11,%xmm15 + pxor %xmm7,%xmm6 + pxor %xmm7,%xmm3 + pxor %xmm6,%xmm0 + pxor %xmm4,%xmm5 + + pxor %xmm0,%xmm3 + pxor %xmm6,%xmm1 + pxor %xmm6,%xmm4 + pxor %xmm1,%xmm3 + pxor %xmm15,%xmm6 + pxor %xmm4,%xmm3 + pxor %xmm5,%xmm2 + pxor %xmm0,%xmm5 + pxor %xmm3,%xmm2 + + pxor %xmm15,%xmm3 + pxor %xmm2,%xmm6 + decl %r10d + jl .Ldec_done + + pshufd $78,%xmm15,%xmm7 + pshufd $78,%xmm2,%xmm13 + pxor %xmm15,%xmm7 + pshufd $78,%xmm4,%xmm14 + pxor %xmm2,%xmm13 + pshufd $78,%xmm0,%xmm8 + pxor %xmm4,%xmm14 + pshufd $78,%xmm5,%xmm9 + pxor %xmm0,%xmm8 + pshufd $78,%xmm3,%xmm10 + pxor %xmm5,%xmm9 + pxor %xmm13,%xmm15 + pxor %xmm13,%xmm0 + pshufd $78,%xmm1,%xmm11 + pxor %xmm3,%xmm10 + pxor %xmm7,%xmm5 + pxor %xmm8,%xmm3 + pshufd $78,%xmm6,%xmm12 + pxor %xmm1,%xmm11 + pxor %xmm14,%xmm0 + pxor %xmm9,%xmm1 + pxor %xmm6,%xmm12 + + pxor %xmm14,%xmm5 + pxor %xmm13,%xmm3 + pxor %xmm13,%xmm1 + pxor %xmm10,%xmm6 + pxor %xmm11,%xmm2 + pxor %xmm14,%xmm1 + pxor %xmm14,%xmm6 + pxor %xmm12,%xmm4 + pshufd $147,%xmm15,%xmm7 + pshufd $147,%xmm0,%xmm8 + pxor %xmm7,%xmm15 + pshufd $147,%xmm5,%xmm9 + pxor %xmm8,%xmm0 + pshufd $147,%xmm3,%xmm10 + pxor %xmm9,%xmm5 + pshufd $147,%xmm1,%xmm11 + pxor %xmm10,%xmm3 + pshufd $147,%xmm6,%xmm12 + pxor %xmm11,%xmm1 + pshufd $147,%xmm2,%xmm13 + pxor %xmm12,%xmm6 + pshufd $147,%xmm4,%xmm14 + pxor %xmm13,%xmm2 + pxor %xmm14,%xmm4 + + pxor %xmm15,%xmm8 + pxor %xmm4,%xmm7 + pxor %xmm4,%xmm8 + pshufd $78,%xmm15,%xmm15 + pxor %xmm0,%xmm9 + pshufd $78,%xmm0,%xmm0 + pxor %xmm1,%xmm12 + pxor %xmm7,%xmm15 + pxor %xmm6,%xmm13 + pxor %xmm8,%xmm0 + pxor %xmm3,%xmm11 + pshufd $78,%xmm1,%xmm7 + pxor %xmm2,%xmm14 + pshufd $78,%xmm6,%xmm8 + pxor %xmm5,%xmm10 + pshufd $78,%xmm3,%xmm1 + pxor %xmm4,%xmm10 + pshufd $78,%xmm4,%xmm6 + pxor %xmm4,%xmm11 + pshufd $78,%xmm2,%xmm3 + pxor %xmm11,%xmm7 + pshufd $78,%xmm5,%xmm2 + pxor %xmm12,%xmm8 + pxor %xmm1,%xmm10 + pxor %xmm14,%xmm6 + pxor %xmm3,%xmm13 + movdqa %xmm7,%xmm3 + pxor %xmm9,%xmm2 + movdqa %xmm13,%xmm5 + movdqa %xmm8,%xmm4 + movdqa %xmm2,%xmm1 + movdqa %xmm10,%xmm2 + movdqa -16(%r11),%xmm7 + jnz .Ldec_loop + movdqa -32(%r11),%xmm7 + jmp .Ldec_loop +.align 16 +.Ldec_done: + movdqa 0(%r11),%xmm7 + movdqa 16(%r11),%xmm8 + movdqa %xmm2,%xmm9 + psrlq $1,%xmm2 + movdqa %xmm1,%xmm10 + psrlq $1,%xmm1 + pxor %xmm4,%xmm2 + pxor %xmm6,%xmm1 + pand %xmm7,%xmm2 + pand %xmm7,%xmm1 + pxor %xmm2,%xmm4 + psllq $1,%xmm2 + pxor %xmm1,%xmm6 + psllq $1,%xmm1 + pxor %xmm9,%xmm2 + pxor %xmm10,%xmm1 + movdqa %xmm5,%xmm9 + psrlq $1,%xmm5 + movdqa %xmm15,%xmm10 + psrlq $1,%xmm15 + pxor %xmm3,%xmm5 + pxor %xmm0,%xmm15 + pand %xmm7,%xmm5 + pand %xmm7,%xmm15 + pxor %xmm5,%xmm3 + psllq $1,%xmm5 + pxor %xmm15,%xmm0 + psllq $1,%xmm15 + pxor %xmm9,%xmm5 + pxor %xmm10,%xmm15 + movdqa 32(%r11),%xmm7 + movdqa %xmm6,%xmm9 + psrlq $2,%xmm6 + movdqa %xmm1,%xmm10 + psrlq $2,%xmm1 + pxor %xmm4,%xmm6 + pxor %xmm2,%xmm1 + pand %xmm8,%xmm6 + pand %xmm8,%xmm1 + pxor %xmm6,%xmm4 + psllq $2,%xmm6 + pxor %xmm1,%xmm2 + psllq $2,%xmm1 + pxor %xmm9,%xmm6 + pxor %xmm10,%xmm1 + movdqa %xmm0,%xmm9 + psrlq $2,%xmm0 + movdqa %xmm15,%xmm10 + psrlq $2,%xmm15 + pxor %xmm3,%xmm0 + pxor %xmm5,%xmm15 + pand %xmm8,%xmm0 + pand %xmm8,%xmm15 + pxor %xmm0,%xmm3 + psllq $2,%xmm0 + pxor %xmm15,%xmm5 + psllq $2,%xmm15 + pxor %xmm9,%xmm0 + pxor %xmm10,%xmm15 + movdqa %xmm3,%xmm9 + psrlq $4,%xmm3 + movdqa %xmm5,%xmm10 + psrlq $4,%xmm5 + pxor %xmm4,%xmm3 + pxor %xmm2,%xmm5 + pand %xmm7,%xmm3 + pand %xmm7,%xmm5 + pxor %xmm3,%xmm4 + psllq $4,%xmm3 + pxor %xmm5,%xmm2 + psllq $4,%xmm5 + pxor %xmm9,%xmm3 + pxor %xmm10,%xmm5 + movdqa %xmm0,%xmm9 + psrlq $4,%xmm0 + movdqa %xmm15,%xmm10 + psrlq $4,%xmm15 + pxor %xmm6,%xmm0 + pxor %xmm1,%xmm15 + pand %xmm7,%xmm0 + pand %xmm7,%xmm15 + pxor %xmm0,%xmm6 + psllq $4,%xmm0 + pxor %xmm15,%xmm1 + psllq $4,%xmm15 + pxor %xmm9,%xmm0 + pxor %xmm10,%xmm15 + movdqa (%rax),%xmm7 + pxor %xmm7,%xmm5 + pxor %xmm7,%xmm3 + pxor %xmm7,%xmm1 + pxor %xmm7,%xmm6 + pxor %xmm7,%xmm2 + pxor %xmm7,%xmm4 + pxor %xmm7,%xmm15 + pxor %xmm7,%xmm0 + .byte 0xf3,0xc3 +.size _bsaes_decrypt8,.-_bsaes_decrypt8 +.type _bsaes_key_convert,@function +.align 16 +_bsaes_key_convert: + leaq .Lmasks(%rip),%r11 + movdqu (%rcx),%xmm7 + leaq 16(%rcx),%rcx + movdqa 0(%r11),%xmm0 + movdqa 16(%r11),%xmm1 + movdqa 32(%r11),%xmm2 + movdqa 48(%r11),%xmm3 + movdqa 64(%r11),%xmm4 + pcmpeqd %xmm5,%xmm5 + + movdqu (%rcx),%xmm6 + movdqa %xmm7,(%rax) + leaq 16(%rax),%rax + decl %r10d + jmp .Lkey_loop +.align 16 +.Lkey_loop: +.byte 102,15,56,0,244 + + movdqa %xmm0,%xmm8 + movdqa %xmm1,%xmm9 + + pand %xmm6,%xmm8 + pand %xmm6,%xmm9 + movdqa %xmm2,%xmm10 + pcmpeqb %xmm0,%xmm8 + psllq $4,%xmm0 + movdqa %xmm3,%xmm11 + pcmpeqb %xmm1,%xmm9 + psllq $4,%xmm1 + + pand %xmm6,%xmm10 + pand %xmm6,%xmm11 + movdqa %xmm0,%xmm12 + pcmpeqb %xmm2,%xmm10 + psllq $4,%xmm2 + movdqa %xmm1,%xmm13 + pcmpeqb %xmm3,%xmm11 + psllq $4,%xmm3 + + movdqa %xmm2,%xmm14 + movdqa %xmm3,%xmm15 + pxor %xmm5,%xmm8 + pxor %xmm5,%xmm9 + + pand %xmm6,%xmm12 + pand %xmm6,%xmm13 + movdqa %xmm8,0(%rax) + pcmpeqb %xmm0,%xmm12 + psrlq $4,%xmm0 + movdqa %xmm9,16(%rax) + pcmpeqb %xmm1,%xmm13 + psrlq $4,%xmm1 + leaq 16(%rcx),%rcx + + pand %xmm6,%xmm14 + pand %xmm6,%xmm15 + movdqa %xmm10,32(%rax) + pcmpeqb %xmm2,%xmm14 + psrlq $4,%xmm2 + movdqa %xmm11,48(%rax) + pcmpeqb %xmm3,%xmm15 + psrlq $4,%xmm3 + movdqu (%rcx),%xmm6 + + pxor %xmm5,%xmm13 + pxor %xmm5,%xmm14 + movdqa %xmm12,64(%rax) + movdqa %xmm13,80(%rax) + movdqa %xmm14,96(%rax) + movdqa %xmm15,112(%rax) + leaq 128(%rax),%rax + decl %r10d + jnz .Lkey_loop + + movdqa 80(%r11),%xmm7 + + .byte 0xf3,0xc3 +.size _bsaes_key_convert,.-_bsaes_key_convert + +.globl bsaes_cbc_encrypt +.type bsaes_cbc_encrypt,@function +.align 16 +bsaes_cbc_encrypt: + cmpl $0,%r9d + jne asm_AES_cbc_encrypt + cmpq $128,%rdx + jb asm_AES_cbc_encrypt + + movq %rsp,%rax +.Lcbc_dec_prologue: + pushq %rbp + pushq %rbx + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + leaq -72(%rsp),%rsp + movq %rsp,%rbp + movl 240(%rcx),%eax + movq %rdi,%r12 + movq %rsi,%r13 + movq %rdx,%r14 + movq %rcx,%r15 + movq %r8,%rbx + shrq $4,%r14 + + movl %eax,%edx + shlq $7,%rax + subq $96,%rax + subq %rax,%rsp + + movq %rsp,%rax + movq %r15,%rcx + movl %edx,%r10d + call _bsaes_key_convert + pxor (%rsp),%xmm7 + movdqa %xmm6,(%rax) + movdqa %xmm7,(%rsp) + + movdqu (%rbx),%xmm14 + subq $8,%r14 +.Lcbc_dec_loop: + movdqu 0(%r12),%xmm15 + movdqu 16(%r12),%xmm0 + movdqu 32(%r12),%xmm1 + movdqu 48(%r12),%xmm2 + movdqu 64(%r12),%xmm3 + movdqu 80(%r12),%xmm4 + movq %rsp,%rax + movdqu 96(%r12),%xmm5 + movl %edx,%r10d + movdqu 112(%r12),%xmm6 + movdqa %xmm14,32(%rbp) + + call _bsaes_decrypt8 + + pxor 32(%rbp),%xmm15 + movdqu 0(%r12),%xmm7 + movdqu 16(%r12),%xmm8 + pxor %xmm7,%xmm0 + movdqu 32(%r12),%xmm9 + pxor %xmm8,%xmm5 + movdqu 48(%r12),%xmm10 + pxor %xmm9,%xmm3 + movdqu 64(%r12),%xmm11 + pxor %xmm10,%xmm1 + movdqu 80(%r12),%xmm12 + pxor %xmm11,%xmm6 + movdqu 96(%r12),%xmm13 + pxor %xmm12,%xmm2 + movdqu 112(%r12),%xmm14 + pxor %xmm13,%xmm4 + movdqu %xmm15,0(%r13) + leaq 128(%r12),%r12 + movdqu %xmm0,16(%r13) + movdqu %xmm5,32(%r13) + movdqu %xmm3,48(%r13) + movdqu %xmm1,64(%r13) + movdqu %xmm6,80(%r13) + movdqu %xmm2,96(%r13) + movdqu %xmm4,112(%r13) + leaq 128(%r13),%r13 + subq $8,%r14 + jnc .Lcbc_dec_loop + + addq $8,%r14 + jz .Lcbc_dec_done + + movdqu 0(%r12),%xmm15 + movq %rsp,%rax + movl %edx,%r10d + cmpq $2,%r14 + jb .Lcbc_dec_one + movdqu 16(%r12),%xmm0 + je .Lcbc_dec_two + movdqu 32(%r12),%xmm1 + cmpq $4,%r14 + jb .Lcbc_dec_three + movdqu 48(%r12),%xmm2 + je .Lcbc_dec_four + movdqu 64(%r12),%xmm3 + cmpq $6,%r14 + jb .Lcbc_dec_five + movdqu 80(%r12),%xmm4 + je .Lcbc_dec_six + movdqu 96(%r12),%xmm5 + movdqa %xmm14,32(%rbp) + call _bsaes_decrypt8 + pxor 32(%rbp),%xmm15 + movdqu 0(%r12),%xmm7 + movdqu 16(%r12),%xmm8 + pxor %xmm7,%xmm0 + movdqu 32(%r12),%xmm9 + pxor %xmm8,%xmm5 + movdqu 48(%r12),%xmm10 + pxor %xmm9,%xmm3 + movdqu 64(%r12),%xmm11 + pxor %xmm10,%xmm1 + movdqu 80(%r12),%xmm12 + pxor %xmm11,%xmm6 + movdqu 96(%r12),%xmm14 + pxor %xmm12,%xmm2 + movdqu %xmm15,0(%r13) + movdqu %xmm0,16(%r13) + movdqu %xmm5,32(%r13) + movdqu %xmm3,48(%r13) + movdqu %xmm1,64(%r13) + movdqu %xmm6,80(%r13) + movdqu %xmm2,96(%r13) + jmp .Lcbc_dec_done +.align 16 +.Lcbc_dec_six: + movdqa %xmm14,32(%rbp) + call _bsaes_decrypt8 + pxor 32(%rbp),%xmm15 + movdqu 0(%r12),%xmm7 + movdqu 16(%r12),%xmm8 + pxor %xmm7,%xmm0 + movdqu 32(%r12),%xmm9 + pxor %xmm8,%xmm5 + movdqu 48(%r12),%xmm10 + pxor %xmm9,%xmm3 + movdqu 64(%r12),%xmm11 + pxor %xmm10,%xmm1 + movdqu 80(%r12),%xmm14 + pxor %xmm11,%xmm6 + movdqu %xmm15,0(%r13) + movdqu %xmm0,16(%r13) + movdqu %xmm5,32(%r13) + movdqu %xmm3,48(%r13) + movdqu %xmm1,64(%r13) + movdqu %xmm6,80(%r13) + jmp .Lcbc_dec_done +.align 16 +.Lcbc_dec_five: + movdqa %xmm14,32(%rbp) + call _bsaes_decrypt8 + pxor 32(%rbp),%xmm15 + movdqu 0(%r12),%xmm7 + movdqu 16(%r12),%xmm8 + pxor %xmm7,%xmm0 + movdqu 32(%r12),%xmm9 + pxor %xmm8,%xmm5 + movdqu 48(%r12),%xmm10 + pxor %xmm9,%xmm3 + movdqu 64(%r12),%xmm14 + pxor %xmm10,%xmm1 + movdqu %xmm15,0(%r13) + movdqu %xmm0,16(%r13) + movdqu %xmm5,32(%r13) + movdqu %xmm3,48(%r13) + movdqu %xmm1,64(%r13) + jmp .Lcbc_dec_done +.align 16 +.Lcbc_dec_four: + movdqa %xmm14,32(%rbp) + call _bsaes_decrypt8 + pxor 32(%rbp),%xmm15 + movdqu 0(%r12),%xmm7 + movdqu 16(%r12),%xmm8 + pxor %xmm7,%xmm0 + movdqu 32(%r12),%xmm9 + pxor %xmm8,%xmm5 + movdqu 48(%r12),%xmm14 + pxor %xmm9,%xmm3 + movdqu %xmm15,0(%r13) + movdqu %xmm0,16(%r13) + movdqu %xmm5,32(%r13) + movdqu %xmm3,48(%r13) + jmp .Lcbc_dec_done +.align 16 +.Lcbc_dec_three: + movdqa %xmm14,32(%rbp) + call _bsaes_decrypt8 + pxor 32(%rbp),%xmm15 + movdqu 0(%r12),%xmm7 + movdqu 16(%r12),%xmm8 + pxor %xmm7,%xmm0 + movdqu 32(%r12),%xmm14 + pxor %xmm8,%xmm5 + movdqu %xmm15,0(%r13) + movdqu %xmm0,16(%r13) + movdqu %xmm5,32(%r13) + jmp .Lcbc_dec_done +.align 16 +.Lcbc_dec_two: + movdqa %xmm14,32(%rbp) + call _bsaes_decrypt8 + pxor 32(%rbp),%xmm15 + movdqu 0(%r12),%xmm7 + movdqu 16(%r12),%xmm14 + pxor %xmm7,%xmm0 + movdqu %xmm15,0(%r13) + movdqu %xmm0,16(%r13) + jmp .Lcbc_dec_done +.align 16 +.Lcbc_dec_one: + leaq (%r12),%rdi + leaq 32(%rbp),%rsi + leaq (%r15),%rdx + call asm_AES_decrypt + pxor 32(%rbp),%xmm14 + movdqu %xmm14,(%r13) + movdqa %xmm15,%xmm14 + +.Lcbc_dec_done: + movdqu %xmm14,(%rbx) + leaq (%rsp),%rax + pxor %xmm0,%xmm0 +.Lcbc_dec_bzero: + movdqa %xmm0,0(%rax) + movdqa %xmm0,16(%rax) + leaq 32(%rax),%rax + cmpq %rax,%rbp + ja .Lcbc_dec_bzero + + leaq (%rbp),%rsp + movq 72(%rsp),%r15 + movq 80(%rsp),%r14 + movq 88(%rsp),%r13 + movq 96(%rsp),%r12 + movq 104(%rsp),%rbx + movq 112(%rsp),%rax + leaq 120(%rsp),%rsp + movq %rax,%rbp +.Lcbc_dec_epilogue: + .byte 0xf3,0xc3 +.size bsaes_cbc_encrypt,.-bsaes_cbc_encrypt + +.globl bsaes_ctr32_encrypt_blocks +.type bsaes_ctr32_encrypt_blocks,@function +.align 16 +bsaes_ctr32_encrypt_blocks: + movq %rsp,%rax +.Lctr_enc_prologue: + pushq %rbp + pushq %rbx + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + leaq -72(%rsp),%rsp + movq %rsp,%rbp + movdqu (%r8),%xmm0 + movl 240(%rcx),%eax + movq %rdi,%r12 + movq %rsi,%r13 + movq %rdx,%r14 + movq %rcx,%r15 + movdqa %xmm0,32(%rbp) + cmpq $8,%rdx + jb .Lctr_enc_short + + movl %eax,%ebx + shlq $7,%rax + subq $96,%rax + subq %rax,%rsp + + movq %rsp,%rax + movq %r15,%rcx + movl %ebx,%r10d + call _bsaes_key_convert + pxor %xmm6,%xmm7 + movdqa %xmm7,(%rax) + + movdqa (%rsp),%xmm8 + leaq .LADD1(%rip),%r11 + movdqa 32(%rbp),%xmm15 + movdqa -32(%r11),%xmm7 +.byte 102,68,15,56,0,199 +.byte 102,68,15,56,0,255 + movdqa %xmm8,(%rsp) + jmp .Lctr_enc_loop +.align 16 +.Lctr_enc_loop: + movdqa %xmm15,32(%rbp) + movdqa %xmm15,%xmm0 + movdqa %xmm15,%xmm1 + paddd 0(%r11),%xmm0 + movdqa %xmm15,%xmm2 + paddd 16(%r11),%xmm1 + movdqa %xmm15,%xmm3 + paddd 32(%r11),%xmm2 + movdqa %xmm15,%xmm4 + paddd 48(%r11),%xmm3 + movdqa %xmm15,%xmm5 + paddd 64(%r11),%xmm4 + movdqa %xmm15,%xmm6 + paddd 80(%r11),%xmm5 + paddd 96(%r11),%xmm6 + + + + movdqa (%rsp),%xmm8 + leaq 16(%rsp),%rax + movdqa -16(%r11),%xmm7 + pxor %xmm8,%xmm15 + pxor %xmm8,%xmm0 + pxor %xmm8,%xmm1 + pxor %xmm8,%xmm2 +.byte 102,68,15,56,0,255 +.byte 102,15,56,0,199 + pxor %xmm8,%xmm3 + pxor %xmm8,%xmm4 +.byte 102,15,56,0,207 +.byte 102,15,56,0,215 + pxor %xmm8,%xmm5 + pxor %xmm8,%xmm6 +.byte 102,15,56,0,223 +.byte 102,15,56,0,231 +.byte 102,15,56,0,239 +.byte 102,15,56,0,247 + leaq .LBS0(%rip),%r11 + movl %ebx,%r10d + + call _bsaes_encrypt8_bitslice + + subq $8,%r14 + jc .Lctr_enc_loop_done + + movdqu 0(%r12),%xmm7 + movdqu 16(%r12),%xmm8 + movdqu 32(%r12),%xmm9 + movdqu 48(%r12),%xmm10 + movdqu 64(%r12),%xmm11 + movdqu 80(%r12),%xmm12 + movdqu 96(%r12),%xmm13 + movdqu 112(%r12),%xmm14 + leaq 128(%r12),%r12 + pxor %xmm15,%xmm7 + movdqa 32(%rbp),%xmm15 + pxor %xmm8,%xmm0 + movdqu %xmm7,0(%r13) + pxor %xmm9,%xmm3 + movdqu %xmm0,16(%r13) + pxor %xmm10,%xmm5 + movdqu %xmm3,32(%r13) + pxor %xmm11,%xmm2 + movdqu %xmm5,48(%r13) + pxor %xmm12,%xmm6 + movdqu %xmm2,64(%r13) + pxor %xmm13,%xmm1 + movdqu %xmm6,80(%r13) + pxor %xmm14,%xmm4 + movdqu %xmm1,96(%r13) + leaq .LADD1(%rip),%r11 + movdqu %xmm4,112(%r13) + leaq 128(%r13),%r13 + paddd 112(%r11),%xmm15 + jnz .Lctr_enc_loop + + jmp .Lctr_enc_done +.align 16 +.Lctr_enc_loop_done: + addq $8,%r14 + movdqu 0(%r12),%xmm7 + pxor %xmm7,%xmm15 + movdqu %xmm15,0(%r13) + cmpq $2,%r14 + jb .Lctr_enc_done + movdqu 16(%r12),%xmm8 + pxor %xmm8,%xmm0 + movdqu %xmm0,16(%r13) + je .Lctr_enc_done + movdqu 32(%r12),%xmm9 + pxor %xmm9,%xmm3 + movdqu %xmm3,32(%r13) + cmpq $4,%r14 + jb .Lctr_enc_done + movdqu 48(%r12),%xmm10 + pxor %xmm10,%xmm5 + movdqu %xmm5,48(%r13) + je .Lctr_enc_done + movdqu 64(%r12),%xmm11 + pxor %xmm11,%xmm2 + movdqu %xmm2,64(%r13) + cmpq $6,%r14 + jb .Lctr_enc_done + movdqu 80(%r12),%xmm12 + pxor %xmm12,%xmm6 + movdqu %xmm6,80(%r13) + je .Lctr_enc_done + movdqu 96(%r12),%xmm13 + pxor %xmm13,%xmm1 + movdqu %xmm1,96(%r13) + jmp .Lctr_enc_done + +.align 16 +.Lctr_enc_short: + leaq 32(%rbp),%rdi + leaq 48(%rbp),%rsi + leaq (%r15),%rdx + call asm_AES_encrypt + movdqu (%r12),%xmm0 + leaq 16(%r12),%r12 + movl 44(%rbp),%eax + bswapl %eax + pxor 48(%rbp),%xmm0 + incl %eax + movdqu %xmm0,(%r13) + bswapl %eax + leaq 16(%r13),%r13 + movl %eax,44(%rsp) + decq %r14 + jnz .Lctr_enc_short + +.Lctr_enc_done: + leaq (%rsp),%rax + pxor %xmm0,%xmm0 +.Lctr_enc_bzero: + movdqa %xmm0,0(%rax) + movdqa %xmm0,16(%rax) + leaq 32(%rax),%rax + cmpq %rax,%rbp + ja .Lctr_enc_bzero + + leaq (%rbp),%rsp + movq 72(%rsp),%r15 + movq 80(%rsp),%r14 + movq 88(%rsp),%r13 + movq 96(%rsp),%r12 + movq 104(%rsp),%rbx + movq 112(%rsp),%rax + leaq 120(%rsp),%rsp + movq %rax,%rbp +.Lctr_enc_epilogue: + .byte 0xf3,0xc3 +.size bsaes_ctr32_encrypt_blocks,.-bsaes_ctr32_encrypt_blocks +.globl bsaes_xts_encrypt +.type bsaes_xts_encrypt,@function +.align 16 +bsaes_xts_encrypt: + movq %rsp,%rax +.Lxts_enc_prologue: + pushq %rbp + pushq %rbx + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + leaq -72(%rsp),%rsp + movq %rsp,%rbp + movq %rdi,%r12 + movq %rsi,%r13 + movq %rdx,%r14 + movq %rcx,%r15 + + leaq (%r9),%rdi + leaq 32(%rbp),%rsi + leaq (%r8),%rdx + call asm_AES_encrypt + + movl 240(%r15),%eax + movq %r14,%rbx + + movl %eax,%edx + shlq $7,%rax + subq $96,%rax + subq %rax,%rsp + + movq %rsp,%rax + movq %r15,%rcx + movl %edx,%r10d + call _bsaes_key_convert + pxor %xmm6,%xmm7 + movdqa %xmm7,(%rax) + + andq $-16,%r14 + subq $128,%rsp + movdqa 32(%rbp),%xmm6 + + pxor %xmm14,%xmm14 + movdqa .Lxts_magic(%rip),%xmm12 + pcmpgtd %xmm6,%xmm14 + + subq $128,%r14 + jc .Lxts_enc_short + jmp .Lxts_enc_loop + +.align 16 +.Lxts_enc_loop: + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm15 + movdqa %xmm6,0(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm0 + movdqa %xmm6,16(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + movdqu 0(%r12),%xmm7 + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm1 + movdqa %xmm6,32(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + movdqu 16(%r12),%xmm8 + pxor %xmm7,%xmm15 + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm2 + movdqa %xmm6,48(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + movdqu 32(%r12),%xmm9 + pxor %xmm8,%xmm0 + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm3 + movdqa %xmm6,64(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + movdqu 48(%r12),%xmm10 + pxor %xmm9,%xmm1 + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm4 + movdqa %xmm6,80(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + movdqu 64(%r12),%xmm11 + pxor %xmm10,%xmm2 + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm5 + movdqa %xmm6,96(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + movdqu 80(%r12),%xmm12 + pxor %xmm11,%xmm3 + movdqu 96(%r12),%xmm13 + pxor %xmm12,%xmm4 + movdqu 112(%r12),%xmm14 + leaq 128(%r12),%r12 + movdqa %xmm6,112(%rsp) + pxor %xmm13,%xmm5 + leaq 128(%rsp),%rax + pxor %xmm14,%xmm6 + movl %edx,%r10d + + call _bsaes_encrypt8 + + pxor 0(%rsp),%xmm15 + pxor 16(%rsp),%xmm0 + movdqu %xmm15,0(%r13) + pxor 32(%rsp),%xmm3 + movdqu %xmm0,16(%r13) + pxor 48(%rsp),%xmm5 + movdqu %xmm3,32(%r13) + pxor 64(%rsp),%xmm2 + movdqu %xmm5,48(%r13) + pxor 80(%rsp),%xmm6 + movdqu %xmm2,64(%r13) + pxor 96(%rsp),%xmm1 + movdqu %xmm6,80(%r13) + pxor 112(%rsp),%xmm4 + movdqu %xmm1,96(%r13) + movdqu %xmm4,112(%r13) + leaq 128(%r13),%r13 + + movdqa 112(%rsp),%xmm6 + pxor %xmm14,%xmm14 + movdqa .Lxts_magic(%rip),%xmm12 + pcmpgtd %xmm6,%xmm14 + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + + subq $128,%r14 + jnc .Lxts_enc_loop + +.Lxts_enc_short: + addq $128,%r14 + jz .Lxts_enc_done + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm15 + movdqa %xmm6,0(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm0 + movdqa %xmm6,16(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + movdqu 0(%r12),%xmm7 + cmpq $16,%r14 + je .Lxts_enc_1 + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm1 + movdqa %xmm6,32(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + movdqu 16(%r12),%xmm8 + cmpq $32,%r14 + je .Lxts_enc_2 + pxor %xmm7,%xmm15 + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm2 + movdqa %xmm6,48(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + movdqu 32(%r12),%xmm9 + cmpq $48,%r14 + je .Lxts_enc_3 + pxor %xmm8,%xmm0 + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm3 + movdqa %xmm6,64(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + movdqu 48(%r12),%xmm10 + cmpq $64,%r14 + je .Lxts_enc_4 + pxor %xmm9,%xmm1 + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm4 + movdqa %xmm6,80(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + movdqu 64(%r12),%xmm11 + cmpq $80,%r14 + je .Lxts_enc_5 + pxor %xmm10,%xmm2 + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm5 + movdqa %xmm6,96(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + movdqu 80(%r12),%xmm12 + cmpq $96,%r14 + je .Lxts_enc_6 + pxor %xmm11,%xmm3 + movdqu 96(%r12),%xmm13 + pxor %xmm12,%xmm4 + movdqa %xmm6,112(%rsp) + leaq 112(%r12),%r12 + pxor %xmm13,%xmm5 + leaq 128(%rsp),%rax + movl %edx,%r10d + + call _bsaes_encrypt8 + + pxor 0(%rsp),%xmm15 + pxor 16(%rsp),%xmm0 + movdqu %xmm15,0(%r13) + pxor 32(%rsp),%xmm3 + movdqu %xmm0,16(%r13) + pxor 48(%rsp),%xmm5 + movdqu %xmm3,32(%r13) + pxor 64(%rsp),%xmm2 + movdqu %xmm5,48(%r13) + pxor 80(%rsp),%xmm6 + movdqu %xmm2,64(%r13) + pxor 96(%rsp),%xmm1 + movdqu %xmm6,80(%r13) + movdqu %xmm1,96(%r13) + leaq 112(%r13),%r13 + + movdqa 112(%rsp),%xmm6 + jmp .Lxts_enc_done +.align 16 +.Lxts_enc_6: + pxor %xmm11,%xmm3 + leaq 96(%r12),%r12 + pxor %xmm12,%xmm4 + leaq 128(%rsp),%rax + movl %edx,%r10d + + call _bsaes_encrypt8 + + pxor 0(%rsp),%xmm15 + pxor 16(%rsp),%xmm0 + movdqu %xmm15,0(%r13) + pxor 32(%rsp),%xmm3 + movdqu %xmm0,16(%r13) + pxor 48(%rsp),%xmm5 + movdqu %xmm3,32(%r13) + pxor 64(%rsp),%xmm2 + movdqu %xmm5,48(%r13) + pxor 80(%rsp),%xmm6 + movdqu %xmm2,64(%r13) + movdqu %xmm6,80(%r13) + leaq 96(%r13),%r13 + + movdqa 96(%rsp),%xmm6 + jmp .Lxts_enc_done +.align 16 +.Lxts_enc_5: + pxor %xmm10,%xmm2 + leaq 80(%r12),%r12 + pxor %xmm11,%xmm3 + leaq 128(%rsp),%rax + movl %edx,%r10d + + call _bsaes_encrypt8 + + pxor 0(%rsp),%xmm15 + pxor 16(%rsp),%xmm0 + movdqu %xmm15,0(%r13) + pxor 32(%rsp),%xmm3 + movdqu %xmm0,16(%r13) + pxor 48(%rsp),%xmm5 + movdqu %xmm3,32(%r13) + pxor 64(%rsp),%xmm2 + movdqu %xmm5,48(%r13) + movdqu %xmm2,64(%r13) + leaq 80(%r13),%r13 + + movdqa 80(%rsp),%xmm6 + jmp .Lxts_enc_done +.align 16 +.Lxts_enc_4: + pxor %xmm9,%xmm1 + leaq 64(%r12),%r12 + pxor %xmm10,%xmm2 + leaq 128(%rsp),%rax + movl %edx,%r10d + + call _bsaes_encrypt8 + + pxor 0(%rsp),%xmm15 + pxor 16(%rsp),%xmm0 + movdqu %xmm15,0(%r13) + pxor 32(%rsp),%xmm3 + movdqu %xmm0,16(%r13) + pxor 48(%rsp),%xmm5 + movdqu %xmm3,32(%r13) + movdqu %xmm5,48(%r13) + leaq 64(%r13),%r13 + + movdqa 64(%rsp),%xmm6 + jmp .Lxts_enc_done +.align 16 +.Lxts_enc_3: + pxor %xmm8,%xmm0 + leaq 48(%r12),%r12 + pxor %xmm9,%xmm1 + leaq 128(%rsp),%rax + movl %edx,%r10d + + call _bsaes_encrypt8 + + pxor 0(%rsp),%xmm15 + pxor 16(%rsp),%xmm0 + movdqu %xmm15,0(%r13) + pxor 32(%rsp),%xmm3 + movdqu %xmm0,16(%r13) + movdqu %xmm3,32(%r13) + leaq 48(%r13),%r13 + + movdqa 48(%rsp),%xmm6 + jmp .Lxts_enc_done +.align 16 +.Lxts_enc_2: + pxor %xmm7,%xmm15 + leaq 32(%r12),%r12 + pxor %xmm8,%xmm0 + leaq 128(%rsp),%rax + movl %edx,%r10d + + call _bsaes_encrypt8 + + pxor 0(%rsp),%xmm15 + pxor 16(%rsp),%xmm0 + movdqu %xmm15,0(%r13) + movdqu %xmm0,16(%r13) + leaq 32(%r13),%r13 + + movdqa 32(%rsp),%xmm6 + jmp .Lxts_enc_done +.align 16 +.Lxts_enc_1: + pxor %xmm15,%xmm7 + leaq 16(%r12),%r12 + movdqa %xmm7,32(%rbp) + leaq 32(%rbp),%rdi + leaq 32(%rbp),%rsi + leaq (%r15),%rdx + call asm_AES_encrypt + pxor 32(%rbp),%xmm15 + + + + + + movdqu %xmm15,0(%r13) + leaq 16(%r13),%r13 + + movdqa 16(%rsp),%xmm6 + +.Lxts_enc_done: + andl $15,%ebx + jz .Lxts_enc_ret + movq %r13,%rdx + +.Lxts_enc_steal: + movzbl (%r12),%eax + movzbl -16(%rdx),%ecx + leaq 1(%r12),%r12 + movb %al,-16(%rdx) + movb %cl,0(%rdx) + leaq 1(%rdx),%rdx + subl $1,%ebx + jnz .Lxts_enc_steal + + movdqu -16(%r13),%xmm15 + leaq 32(%rbp),%rdi + pxor %xmm6,%xmm15 + leaq 32(%rbp),%rsi + movdqa %xmm15,32(%rbp) + leaq (%r15),%rdx + call asm_AES_encrypt + pxor 32(%rbp),%xmm6 + movdqu %xmm6,-16(%r13) + +.Lxts_enc_ret: + leaq (%rsp),%rax + pxor %xmm0,%xmm0 +.Lxts_enc_bzero: + movdqa %xmm0,0(%rax) + movdqa %xmm0,16(%rax) + leaq 32(%rax),%rax + cmpq %rax,%rbp + ja .Lxts_enc_bzero + + leaq (%rbp),%rsp + movq 72(%rsp),%r15 + movq 80(%rsp),%r14 + movq 88(%rsp),%r13 + movq 96(%rsp),%r12 + movq 104(%rsp),%rbx + movq 112(%rsp),%rax + leaq 120(%rsp),%rsp + movq %rax,%rbp +.Lxts_enc_epilogue: + .byte 0xf3,0xc3 +.size bsaes_xts_encrypt,.-bsaes_xts_encrypt + +.globl bsaes_xts_decrypt +.type bsaes_xts_decrypt,@function +.align 16 +bsaes_xts_decrypt: + movq %rsp,%rax +.Lxts_dec_prologue: + pushq %rbp + pushq %rbx + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + leaq -72(%rsp),%rsp + movq %rsp,%rbp + movq %rdi,%r12 + movq %rsi,%r13 + movq %rdx,%r14 + movq %rcx,%r15 + + leaq (%r9),%rdi + leaq 32(%rbp),%rsi + leaq (%r8),%rdx + call asm_AES_encrypt + + movl 240(%r15),%eax + movq %r14,%rbx + + movl %eax,%edx + shlq $7,%rax + subq $96,%rax + subq %rax,%rsp + + movq %rsp,%rax + movq %r15,%rcx + movl %edx,%r10d + call _bsaes_key_convert + pxor (%rsp),%xmm7 + movdqa %xmm6,(%rax) + movdqa %xmm7,(%rsp) + + xorl %eax,%eax + andq $-16,%r14 + testl $15,%ebx + setnz %al + shlq $4,%rax + subq %rax,%r14 + + subq $128,%rsp + movdqa 32(%rbp),%xmm6 + + pxor %xmm14,%xmm14 + movdqa .Lxts_magic(%rip),%xmm12 + pcmpgtd %xmm6,%xmm14 + + subq $128,%r14 + jc .Lxts_dec_short + jmp .Lxts_dec_loop + +.align 16 +.Lxts_dec_loop: + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm15 + movdqa %xmm6,0(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm0 + movdqa %xmm6,16(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + movdqu 0(%r12),%xmm7 + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm1 + movdqa %xmm6,32(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + movdqu 16(%r12),%xmm8 + pxor %xmm7,%xmm15 + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm2 + movdqa %xmm6,48(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + movdqu 32(%r12),%xmm9 + pxor %xmm8,%xmm0 + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm3 + movdqa %xmm6,64(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + movdqu 48(%r12),%xmm10 + pxor %xmm9,%xmm1 + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm4 + movdqa %xmm6,80(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + movdqu 64(%r12),%xmm11 + pxor %xmm10,%xmm2 + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm5 + movdqa %xmm6,96(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + movdqu 80(%r12),%xmm12 + pxor %xmm11,%xmm3 + movdqu 96(%r12),%xmm13 + pxor %xmm12,%xmm4 + movdqu 112(%r12),%xmm14 + leaq 128(%r12),%r12 + movdqa %xmm6,112(%rsp) + pxor %xmm13,%xmm5 + leaq 128(%rsp),%rax + pxor %xmm14,%xmm6 + movl %edx,%r10d + + call _bsaes_decrypt8 + + pxor 0(%rsp),%xmm15 + pxor 16(%rsp),%xmm0 + movdqu %xmm15,0(%r13) + pxor 32(%rsp),%xmm5 + movdqu %xmm0,16(%r13) + pxor 48(%rsp),%xmm3 + movdqu %xmm5,32(%r13) + pxor 64(%rsp),%xmm1 + movdqu %xmm3,48(%r13) + pxor 80(%rsp),%xmm6 + movdqu %xmm1,64(%r13) + pxor 96(%rsp),%xmm2 + movdqu %xmm6,80(%r13) + pxor 112(%rsp),%xmm4 + movdqu %xmm2,96(%r13) + movdqu %xmm4,112(%r13) + leaq 128(%r13),%r13 + + movdqa 112(%rsp),%xmm6 + pxor %xmm14,%xmm14 + movdqa .Lxts_magic(%rip),%xmm12 + pcmpgtd %xmm6,%xmm14 + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + + subq $128,%r14 + jnc .Lxts_dec_loop + +.Lxts_dec_short: + addq $128,%r14 + jz .Lxts_dec_done + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm15 + movdqa %xmm6,0(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm0 + movdqa %xmm6,16(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + movdqu 0(%r12),%xmm7 + cmpq $16,%r14 + je .Lxts_dec_1 + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm1 + movdqa %xmm6,32(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + movdqu 16(%r12),%xmm8 + cmpq $32,%r14 + je .Lxts_dec_2 + pxor %xmm7,%xmm15 + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm2 + movdqa %xmm6,48(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + movdqu 32(%r12),%xmm9 + cmpq $48,%r14 + je .Lxts_dec_3 + pxor %xmm8,%xmm0 + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm3 + movdqa %xmm6,64(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + movdqu 48(%r12),%xmm10 + cmpq $64,%r14 + je .Lxts_dec_4 + pxor %xmm9,%xmm1 + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm4 + movdqa %xmm6,80(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + movdqu 64(%r12),%xmm11 + cmpq $80,%r14 + je .Lxts_dec_5 + pxor %xmm10,%xmm2 + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm5 + movdqa %xmm6,96(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + movdqu 80(%r12),%xmm12 + cmpq $96,%r14 + je .Lxts_dec_6 + pxor %xmm11,%xmm3 + movdqu 96(%r12),%xmm13 + pxor %xmm12,%xmm4 + movdqa %xmm6,112(%rsp) + leaq 112(%r12),%r12 + pxor %xmm13,%xmm5 + leaq 128(%rsp),%rax + movl %edx,%r10d + + call _bsaes_decrypt8 + + pxor 0(%rsp),%xmm15 + pxor 16(%rsp),%xmm0 + movdqu %xmm15,0(%r13) + pxor 32(%rsp),%xmm5 + movdqu %xmm0,16(%r13) + pxor 48(%rsp),%xmm3 + movdqu %xmm5,32(%r13) + pxor 64(%rsp),%xmm1 + movdqu %xmm3,48(%r13) + pxor 80(%rsp),%xmm6 + movdqu %xmm1,64(%r13) + pxor 96(%rsp),%xmm2 + movdqu %xmm6,80(%r13) + movdqu %xmm2,96(%r13) + leaq 112(%r13),%r13 + + movdqa 112(%rsp),%xmm6 + jmp .Lxts_dec_done +.align 16 +.Lxts_dec_6: + pxor %xmm11,%xmm3 + leaq 96(%r12),%r12 + pxor %xmm12,%xmm4 + leaq 128(%rsp),%rax + movl %edx,%r10d + + call _bsaes_decrypt8 + + pxor 0(%rsp),%xmm15 + pxor 16(%rsp),%xmm0 + movdqu %xmm15,0(%r13) + pxor 32(%rsp),%xmm5 + movdqu %xmm0,16(%r13) + pxor 48(%rsp),%xmm3 + movdqu %xmm5,32(%r13) + pxor 64(%rsp),%xmm1 + movdqu %xmm3,48(%r13) + pxor 80(%rsp),%xmm6 + movdqu %xmm1,64(%r13) + movdqu %xmm6,80(%r13) + leaq 96(%r13),%r13 + + movdqa 96(%rsp),%xmm6 + jmp .Lxts_dec_done +.align 16 +.Lxts_dec_5: + pxor %xmm10,%xmm2 + leaq 80(%r12),%r12 + pxor %xmm11,%xmm3 + leaq 128(%rsp),%rax + movl %edx,%r10d + + call _bsaes_decrypt8 + + pxor 0(%rsp),%xmm15 + pxor 16(%rsp),%xmm0 + movdqu %xmm15,0(%r13) + pxor 32(%rsp),%xmm5 + movdqu %xmm0,16(%r13) + pxor 48(%rsp),%xmm3 + movdqu %xmm5,32(%r13) + pxor 64(%rsp),%xmm1 + movdqu %xmm3,48(%r13) + movdqu %xmm1,64(%r13) + leaq 80(%r13),%r13 + + movdqa 80(%rsp),%xmm6 + jmp .Lxts_dec_done +.align 16 +.Lxts_dec_4: + pxor %xmm9,%xmm1 + leaq 64(%r12),%r12 + pxor %xmm10,%xmm2 + leaq 128(%rsp),%rax + movl %edx,%r10d + + call _bsaes_decrypt8 + + pxor 0(%rsp),%xmm15 + pxor 16(%rsp),%xmm0 + movdqu %xmm15,0(%r13) + pxor 32(%rsp),%xmm5 + movdqu %xmm0,16(%r13) + pxor 48(%rsp),%xmm3 + movdqu %xmm5,32(%r13) + movdqu %xmm3,48(%r13) + leaq 64(%r13),%r13 + + movdqa 64(%rsp),%xmm6 + jmp .Lxts_dec_done +.align 16 +.Lxts_dec_3: + pxor %xmm8,%xmm0 + leaq 48(%r12),%r12 + pxor %xmm9,%xmm1 + leaq 128(%rsp),%rax + movl %edx,%r10d + + call _bsaes_decrypt8 + + pxor 0(%rsp),%xmm15 + pxor 16(%rsp),%xmm0 + movdqu %xmm15,0(%r13) + pxor 32(%rsp),%xmm5 + movdqu %xmm0,16(%r13) + movdqu %xmm5,32(%r13) + leaq 48(%r13),%r13 + + movdqa 48(%rsp),%xmm6 + jmp .Lxts_dec_done +.align 16 +.Lxts_dec_2: + pxor %xmm7,%xmm15 + leaq 32(%r12),%r12 + pxor %xmm8,%xmm0 + leaq 128(%rsp),%rax + movl %edx,%r10d + + call _bsaes_decrypt8 + + pxor 0(%rsp),%xmm15 + pxor 16(%rsp),%xmm0 + movdqu %xmm15,0(%r13) + movdqu %xmm0,16(%r13) + leaq 32(%r13),%r13 + + movdqa 32(%rsp),%xmm6 + jmp .Lxts_dec_done +.align 16 +.Lxts_dec_1: + pxor %xmm15,%xmm7 + leaq 16(%r12),%r12 + movdqa %xmm7,32(%rbp) + leaq 32(%rbp),%rdi + leaq 32(%rbp),%rsi + leaq (%r15),%rdx + call asm_AES_decrypt + pxor 32(%rbp),%xmm15 + + + + + + movdqu %xmm15,0(%r13) + leaq 16(%r13),%r13 + + movdqa 16(%rsp),%xmm6 + +.Lxts_dec_done: + andl $15,%ebx + jz .Lxts_dec_ret + + pxor %xmm14,%xmm14 + movdqa .Lxts_magic(%rip),%xmm12 + pcmpgtd %xmm6,%xmm14 + pshufd $19,%xmm14,%xmm13 + movdqa %xmm6,%xmm5 + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + movdqu (%r12),%xmm15 + pxor %xmm13,%xmm6 + + leaq 32(%rbp),%rdi + pxor %xmm6,%xmm15 + leaq 32(%rbp),%rsi + movdqa %xmm15,32(%rbp) + leaq (%r15),%rdx + call asm_AES_decrypt + pxor 32(%rbp),%xmm6 + movq %r13,%rdx + movdqu %xmm6,(%r13) + +.Lxts_dec_steal: + movzbl 16(%r12),%eax + movzbl (%rdx),%ecx + leaq 1(%r12),%r12 + movb %al,(%rdx) + movb %cl,16(%rdx) + leaq 1(%rdx),%rdx + subl $1,%ebx + jnz .Lxts_dec_steal + + movdqu (%r13),%xmm15 + leaq 32(%rbp),%rdi + pxor %xmm5,%xmm15 + leaq 32(%rbp),%rsi + movdqa %xmm15,32(%rbp) + leaq (%r15),%rdx + call asm_AES_decrypt + pxor 32(%rbp),%xmm5 + movdqu %xmm5,(%r13) + +.Lxts_dec_ret: + leaq (%rsp),%rax + pxor %xmm0,%xmm0 +.Lxts_dec_bzero: + movdqa %xmm0,0(%rax) + movdqa %xmm0,16(%rax) + leaq 32(%rax),%rax + cmpq %rax,%rbp + ja .Lxts_dec_bzero + + leaq (%rbp),%rsp + movq 72(%rsp),%r15 + movq 80(%rsp),%r14 + movq 88(%rsp),%r13 + movq 96(%rsp),%r12 + movq 104(%rsp),%rbx + movq 112(%rsp),%rax + leaq 120(%rsp),%rsp + movq %rax,%rbp +.Lxts_dec_epilogue: + .byte 0xf3,0xc3 +.size bsaes_xts_decrypt,.-bsaes_xts_decrypt +.type _bsaes_const,@object +.align 64 +_bsaes_const: +.LM0ISR: +.quad 0x0a0e0206070b0f03, 0x0004080c0d010509 +.LISRM0: +.quad 0x01040b0e0205080f, 0x0306090c00070a0d +.LISR: +.quad 0x0504070602010003, 0x0f0e0d0c080b0a09 +.LBS0: +.quad 0x5555555555555555, 0x5555555555555555 +.LBS1: +.quad 0x3333333333333333, 0x3333333333333333 +.LBS2: +.quad 0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f +.LSR: +.quad 0x0504070600030201, 0x0f0e0d0c0a09080b +.LSRM0: +.quad 0x0304090e00050a0f, 0x01060b0c0207080d +.LM0SR: +.quad 0x0a0e02060f03070b, 0x0004080c05090d01 +.LSWPUP: +.quad 0x0706050403020100, 0x0c0d0e0f0b0a0908 +.LSWPUPM0SR: +.quad 0x0a0d02060c03070b, 0x0004080f05090e01 +.LADD1: +.quad 0x0000000000000000, 0x0000000100000000 +.LADD2: +.quad 0x0000000000000000, 0x0000000200000000 +.LADD3: +.quad 0x0000000000000000, 0x0000000300000000 +.LADD4: +.quad 0x0000000000000000, 0x0000000400000000 +.LADD5: +.quad 0x0000000000000000, 0x0000000500000000 +.LADD6: +.quad 0x0000000000000000, 0x0000000600000000 +.LADD7: +.quad 0x0000000000000000, 0x0000000700000000 +.LADD8: +.quad 0x0000000000000000, 0x0000000800000000 +.Lxts_magic: +.long 0x87,0,1,0 +.Lmasks: +.quad 0x0101010101010101, 0x0101010101010101 +.quad 0x0202020202020202, 0x0202020202020202 +.quad 0x0404040404040404, 0x0404040404040404 +.quad 0x0808080808080808, 0x0808080808080808 +.LM0: +.quad 0x02060a0e03070b0f, 0x0004080c0105090d +.L63: +.quad 0x6363636363636363, 0x6363636363636363 +.byte 66,105,116,45,115,108,105,99,101,100,32,65,69,83,32,102,111,114,32,120,56,54,95,54,52,47,83,83,83,69,51,44,32,69,109,105,108,105,97,32,75,195,164,115,112,101,114,44,32,80,101,116,101,114,32,83,99,104,119,97,98,101,44,32,65,110,100,121,32,80,111,108,121,97,107,111,118,0 +.align 64 +.size _bsaes_const,.-_bsaes_const diff --git a/deps/openssl/asm_obsolete/x64-elf-gas/aes/vpaes-x86_64.s b/deps/openssl/asm_obsolete/x64-elf-gas/aes/vpaes-x86_64.s new file mode 100644 index 00000000000000..b9d6df5134ec60 --- /dev/null +++ b/deps/openssl/asm_obsolete/x64-elf-gas/aes/vpaes-x86_64.s @@ -0,0 +1,827 @@ +.text + + + + + + + + + + + + + + + + +.type _vpaes_encrypt_core,@function +.align 16 +_vpaes_encrypt_core: + movq %rdx,%r9 + movq $16,%r11 + movl 240(%rdx),%eax + movdqa %xmm9,%xmm1 + movdqa .Lk_ipt(%rip),%xmm2 + pandn %xmm0,%xmm1 + movdqu (%r9),%xmm5 + psrld $4,%xmm1 + pand %xmm9,%xmm0 +.byte 102,15,56,0,208 + movdqa .Lk_ipt+16(%rip),%xmm0 +.byte 102,15,56,0,193 + pxor %xmm5,%xmm2 + addq $16,%r9 + pxor %xmm2,%xmm0 + leaq .Lk_mc_backward(%rip),%r10 + jmp .Lenc_entry + +.align 16 +.Lenc_loop: + + movdqa %xmm13,%xmm4 + movdqa %xmm12,%xmm0 +.byte 102,15,56,0,226 +.byte 102,15,56,0,195 + pxor %xmm5,%xmm4 + movdqa %xmm15,%xmm5 + pxor %xmm4,%xmm0 + movdqa -64(%r11,%r10,1),%xmm1 +.byte 102,15,56,0,234 + movdqa (%r11,%r10,1),%xmm4 + movdqa %xmm14,%xmm2 +.byte 102,15,56,0,211 + movdqa %xmm0,%xmm3 + pxor %xmm5,%xmm2 +.byte 102,15,56,0,193 + addq $16,%r9 + pxor %xmm2,%xmm0 +.byte 102,15,56,0,220 + addq $16,%r11 + pxor %xmm0,%xmm3 +.byte 102,15,56,0,193 + andq $48,%r11 + subq $1,%rax + pxor %xmm3,%xmm0 + +.Lenc_entry: + + movdqa %xmm9,%xmm1 + movdqa %xmm11,%xmm5 + pandn %xmm0,%xmm1 + psrld $4,%xmm1 + pand %xmm9,%xmm0 +.byte 102,15,56,0,232 + movdqa %xmm10,%xmm3 + pxor %xmm1,%xmm0 +.byte 102,15,56,0,217 + movdqa %xmm10,%xmm4 + pxor %xmm5,%xmm3 +.byte 102,15,56,0,224 + movdqa %xmm10,%xmm2 + pxor %xmm5,%xmm4 +.byte 102,15,56,0,211 + movdqa %xmm10,%xmm3 + pxor %xmm0,%xmm2 +.byte 102,15,56,0,220 + movdqu (%r9),%xmm5 + pxor %xmm1,%xmm3 + jnz .Lenc_loop + + + movdqa -96(%r10),%xmm4 + movdqa -80(%r10),%xmm0 +.byte 102,15,56,0,226 + pxor %xmm5,%xmm4 +.byte 102,15,56,0,195 + movdqa 64(%r11,%r10,1),%xmm1 + pxor %xmm4,%xmm0 +.byte 102,15,56,0,193 + .byte 0xf3,0xc3 +.size _vpaes_encrypt_core,.-_vpaes_encrypt_core + + + + + + +.type _vpaes_decrypt_core,@function +.align 16 +_vpaes_decrypt_core: + movq %rdx,%r9 + movl 240(%rdx),%eax + movdqa %xmm9,%xmm1 + movdqa .Lk_dipt(%rip),%xmm2 + pandn %xmm0,%xmm1 + movq %rax,%r11 + psrld $4,%xmm1 + movdqu (%r9),%xmm5 + shlq $4,%r11 + pand %xmm9,%xmm0 +.byte 102,15,56,0,208 + movdqa .Lk_dipt+16(%rip),%xmm0 + xorq $48,%r11 + leaq .Lk_dsbd(%rip),%r10 +.byte 102,15,56,0,193 + andq $48,%r11 + pxor %xmm5,%xmm2 + movdqa .Lk_mc_forward+48(%rip),%xmm5 + pxor %xmm2,%xmm0 + addq $16,%r9 + addq %r10,%r11 + jmp .Ldec_entry + +.align 16 +.Ldec_loop: + + + + movdqa -32(%r10),%xmm4 + movdqa -16(%r10),%xmm1 +.byte 102,15,56,0,226 +.byte 102,15,56,0,203 + pxor %xmm4,%xmm0 + movdqa 0(%r10),%xmm4 + pxor %xmm1,%xmm0 + movdqa 16(%r10),%xmm1 + +.byte 102,15,56,0,226 +.byte 102,15,56,0,197 +.byte 102,15,56,0,203 + pxor %xmm4,%xmm0 + movdqa 32(%r10),%xmm4 + pxor %xmm1,%xmm0 + movdqa 48(%r10),%xmm1 + +.byte 102,15,56,0,226 +.byte 102,15,56,0,197 +.byte 102,15,56,0,203 + pxor %xmm4,%xmm0 + movdqa 64(%r10),%xmm4 + pxor %xmm1,%xmm0 + movdqa 80(%r10),%xmm1 + +.byte 102,15,56,0,226 +.byte 102,15,56,0,197 +.byte 102,15,56,0,203 + pxor %xmm4,%xmm0 + addq $16,%r9 +.byte 102,15,58,15,237,12 + pxor %xmm1,%xmm0 + subq $1,%rax + +.Ldec_entry: + + movdqa %xmm9,%xmm1 + pandn %xmm0,%xmm1 + movdqa %xmm11,%xmm2 + psrld $4,%xmm1 + pand %xmm9,%xmm0 +.byte 102,15,56,0,208 + movdqa %xmm10,%xmm3 + pxor %xmm1,%xmm0 +.byte 102,15,56,0,217 + movdqa %xmm10,%xmm4 + pxor %xmm2,%xmm3 +.byte 102,15,56,0,224 + pxor %xmm2,%xmm4 + movdqa %xmm10,%xmm2 +.byte 102,15,56,0,211 + movdqa %xmm10,%xmm3 + pxor %xmm0,%xmm2 +.byte 102,15,56,0,220 + movdqu (%r9),%xmm0 + pxor %xmm1,%xmm3 + jnz .Ldec_loop + + + movdqa 96(%r10),%xmm4 +.byte 102,15,56,0,226 + pxor %xmm0,%xmm4 + movdqa 112(%r10),%xmm0 + movdqa -352(%r11),%xmm2 +.byte 102,15,56,0,195 + pxor %xmm4,%xmm0 +.byte 102,15,56,0,194 + .byte 0xf3,0xc3 +.size _vpaes_decrypt_core,.-_vpaes_decrypt_core + + + + + + +.type _vpaes_schedule_core,@function +.align 16 +_vpaes_schedule_core: + + + + + + call _vpaes_preheat + movdqa .Lk_rcon(%rip),%xmm8 + movdqu (%rdi),%xmm0 + + + movdqa %xmm0,%xmm3 + leaq .Lk_ipt(%rip),%r11 + call _vpaes_schedule_transform + movdqa %xmm0,%xmm7 + + leaq .Lk_sr(%rip),%r10 + testq %rcx,%rcx + jnz .Lschedule_am_decrypting + + + movdqu %xmm0,(%rdx) + jmp .Lschedule_go + +.Lschedule_am_decrypting: + + movdqa (%r8,%r10,1),%xmm1 +.byte 102,15,56,0,217 + movdqu %xmm3,(%rdx) + xorq $48,%r8 + +.Lschedule_go: + cmpl $192,%esi + ja .Lschedule_256 + je .Lschedule_192 + + + + + + + + + + +.Lschedule_128: + movl $10,%esi + +.Loop_schedule_128: + call _vpaes_schedule_round + decq %rsi + jz .Lschedule_mangle_last + call _vpaes_schedule_mangle + jmp .Loop_schedule_128 + + + + + + + + + + + + + + + + +.align 16 +.Lschedule_192: + movdqu 8(%rdi),%xmm0 + call _vpaes_schedule_transform + movdqa %xmm0,%xmm6 + pxor %xmm4,%xmm4 + movhlps %xmm4,%xmm6 + movl $4,%esi + +.Loop_schedule_192: + call _vpaes_schedule_round +.byte 102,15,58,15,198,8 + call _vpaes_schedule_mangle + call _vpaes_schedule_192_smear + call _vpaes_schedule_mangle + call _vpaes_schedule_round + decq %rsi + jz .Lschedule_mangle_last + call _vpaes_schedule_mangle + call _vpaes_schedule_192_smear + jmp .Loop_schedule_192 + + + + + + + + + + + +.align 16 +.Lschedule_256: + movdqu 16(%rdi),%xmm0 + call _vpaes_schedule_transform + movl $7,%esi + +.Loop_schedule_256: + call _vpaes_schedule_mangle + movdqa %xmm0,%xmm6 + + + call _vpaes_schedule_round + decq %rsi + jz .Lschedule_mangle_last + call _vpaes_schedule_mangle + + + pshufd $255,%xmm0,%xmm0 + movdqa %xmm7,%xmm5 + movdqa %xmm6,%xmm7 + call _vpaes_schedule_low_round + movdqa %xmm5,%xmm7 + + jmp .Loop_schedule_256 + + + + + + + + + + + + +.align 16 +.Lschedule_mangle_last: + + leaq .Lk_deskew(%rip),%r11 + testq %rcx,%rcx + jnz .Lschedule_mangle_last_dec + + + movdqa (%r8,%r10,1),%xmm1 +.byte 102,15,56,0,193 + leaq .Lk_opt(%rip),%r11 + addq $32,%rdx + +.Lschedule_mangle_last_dec: + addq $-16,%rdx + pxor .Lk_s63(%rip),%xmm0 + call _vpaes_schedule_transform + movdqu %xmm0,(%rdx) + + + pxor %xmm0,%xmm0 + pxor %xmm1,%xmm1 + pxor %xmm2,%xmm2 + pxor %xmm3,%xmm3 + pxor %xmm4,%xmm4 + pxor %xmm5,%xmm5 + pxor %xmm6,%xmm6 + pxor %xmm7,%xmm7 + .byte 0xf3,0xc3 +.size _vpaes_schedule_core,.-_vpaes_schedule_core + + + + + + + + + + + + + + + +.type _vpaes_schedule_192_smear,@function +.align 16 +_vpaes_schedule_192_smear: + pshufd $128,%xmm6,%xmm1 + pshufd $254,%xmm7,%xmm0 + pxor %xmm1,%xmm6 + pxor %xmm1,%xmm1 + pxor %xmm0,%xmm6 + movdqa %xmm6,%xmm0 + movhlps %xmm1,%xmm6 + .byte 0xf3,0xc3 +.size _vpaes_schedule_192_smear,.-_vpaes_schedule_192_smear + + + + + + + + + + + + + + + + + + + +.type _vpaes_schedule_round,@function +.align 16 +_vpaes_schedule_round: + + pxor %xmm1,%xmm1 +.byte 102,65,15,58,15,200,15 +.byte 102,69,15,58,15,192,15 + pxor %xmm1,%xmm7 + + + pshufd $255,%xmm0,%xmm0 +.byte 102,15,58,15,192,1 + + + + +_vpaes_schedule_low_round: + + movdqa %xmm7,%xmm1 + pslldq $4,%xmm7 + pxor %xmm1,%xmm7 + movdqa %xmm7,%xmm1 + pslldq $8,%xmm7 + pxor %xmm1,%xmm7 + pxor .Lk_s63(%rip),%xmm7 + + + movdqa %xmm9,%xmm1 + pandn %xmm0,%xmm1 + psrld $4,%xmm1 + pand %xmm9,%xmm0 + movdqa %xmm11,%xmm2 +.byte 102,15,56,0,208 + pxor %xmm1,%xmm0 + movdqa %xmm10,%xmm3 +.byte 102,15,56,0,217 + pxor %xmm2,%xmm3 + movdqa %xmm10,%xmm4 +.byte 102,15,56,0,224 + pxor %xmm2,%xmm4 + movdqa %xmm10,%xmm2 +.byte 102,15,56,0,211 + pxor %xmm0,%xmm2 + movdqa %xmm10,%xmm3 +.byte 102,15,56,0,220 + pxor %xmm1,%xmm3 + movdqa %xmm13,%xmm4 +.byte 102,15,56,0,226 + movdqa %xmm12,%xmm0 +.byte 102,15,56,0,195 + pxor %xmm4,%xmm0 + + + pxor %xmm7,%xmm0 + movdqa %xmm0,%xmm7 + .byte 0xf3,0xc3 +.size _vpaes_schedule_round,.-_vpaes_schedule_round + + + + + + + + + + +.type _vpaes_schedule_transform,@function +.align 16 +_vpaes_schedule_transform: + movdqa %xmm9,%xmm1 + pandn %xmm0,%xmm1 + psrld $4,%xmm1 + pand %xmm9,%xmm0 + movdqa (%r11),%xmm2 +.byte 102,15,56,0,208 + movdqa 16(%r11),%xmm0 +.byte 102,15,56,0,193 + pxor %xmm2,%xmm0 + .byte 0xf3,0xc3 +.size _vpaes_schedule_transform,.-_vpaes_schedule_transform + + + + + + + + + + + + + + + + + + + + + + + + +.type _vpaes_schedule_mangle,@function +.align 16 +_vpaes_schedule_mangle: + movdqa %xmm0,%xmm4 + movdqa .Lk_mc_forward(%rip),%xmm5 + testq %rcx,%rcx + jnz .Lschedule_mangle_dec + + + addq $16,%rdx + pxor .Lk_s63(%rip),%xmm4 +.byte 102,15,56,0,229 + movdqa %xmm4,%xmm3 +.byte 102,15,56,0,229 + pxor %xmm4,%xmm3 +.byte 102,15,56,0,229 + pxor %xmm4,%xmm3 + + jmp .Lschedule_mangle_both +.align 16 +.Lschedule_mangle_dec: + + leaq .Lk_dksd(%rip),%r11 + movdqa %xmm9,%xmm1 + pandn %xmm4,%xmm1 + psrld $4,%xmm1 + pand %xmm9,%xmm4 + + movdqa 0(%r11),%xmm2 +.byte 102,15,56,0,212 + movdqa 16(%r11),%xmm3 +.byte 102,15,56,0,217 + pxor %xmm2,%xmm3 +.byte 102,15,56,0,221 + + movdqa 32(%r11),%xmm2 +.byte 102,15,56,0,212 + pxor %xmm3,%xmm2 + movdqa 48(%r11),%xmm3 +.byte 102,15,56,0,217 + pxor %xmm2,%xmm3 +.byte 102,15,56,0,221 + + movdqa 64(%r11),%xmm2 +.byte 102,15,56,0,212 + pxor %xmm3,%xmm2 + movdqa 80(%r11),%xmm3 +.byte 102,15,56,0,217 + pxor %xmm2,%xmm3 +.byte 102,15,56,0,221 + + movdqa 96(%r11),%xmm2 +.byte 102,15,56,0,212 + pxor %xmm3,%xmm2 + movdqa 112(%r11),%xmm3 +.byte 102,15,56,0,217 + pxor %xmm2,%xmm3 + + addq $-16,%rdx + +.Lschedule_mangle_both: + movdqa (%r8,%r10,1),%xmm1 +.byte 102,15,56,0,217 + addq $-16,%r8 + andq $48,%r8 + movdqu %xmm3,(%rdx) + .byte 0xf3,0xc3 +.size _vpaes_schedule_mangle,.-_vpaes_schedule_mangle + + + + +.globl vpaes_set_encrypt_key +.type vpaes_set_encrypt_key,@function +.align 16 +vpaes_set_encrypt_key: + movl %esi,%eax + shrl $5,%eax + addl $5,%eax + movl %eax,240(%rdx) + + movl $0,%ecx + movl $48,%r8d + call _vpaes_schedule_core + xorl %eax,%eax + .byte 0xf3,0xc3 +.size vpaes_set_encrypt_key,.-vpaes_set_encrypt_key + +.globl vpaes_set_decrypt_key +.type vpaes_set_decrypt_key,@function +.align 16 +vpaes_set_decrypt_key: + movl %esi,%eax + shrl $5,%eax + addl $5,%eax + movl %eax,240(%rdx) + shll $4,%eax + leaq 16(%rdx,%rax,1),%rdx + + movl $1,%ecx + movl %esi,%r8d + shrl $1,%r8d + andl $32,%r8d + xorl $32,%r8d + call _vpaes_schedule_core + xorl %eax,%eax + .byte 0xf3,0xc3 +.size vpaes_set_decrypt_key,.-vpaes_set_decrypt_key + +.globl vpaes_encrypt +.type vpaes_encrypt,@function +.align 16 +vpaes_encrypt: + movdqu (%rdi),%xmm0 + call _vpaes_preheat + call _vpaes_encrypt_core + movdqu %xmm0,(%rsi) + .byte 0xf3,0xc3 +.size vpaes_encrypt,.-vpaes_encrypt + +.globl vpaes_decrypt +.type vpaes_decrypt,@function +.align 16 +vpaes_decrypt: + movdqu (%rdi),%xmm0 + call _vpaes_preheat + call _vpaes_decrypt_core + movdqu %xmm0,(%rsi) + .byte 0xf3,0xc3 +.size vpaes_decrypt,.-vpaes_decrypt +.globl vpaes_cbc_encrypt +.type vpaes_cbc_encrypt,@function +.align 16 +vpaes_cbc_encrypt: + xchgq %rcx,%rdx + subq $16,%rcx + jc .Lcbc_abort + movdqu (%r8),%xmm6 + subq %rdi,%rsi + call _vpaes_preheat + cmpl $0,%r9d + je .Lcbc_dec_loop + jmp .Lcbc_enc_loop +.align 16 +.Lcbc_enc_loop: + movdqu (%rdi),%xmm0 + pxor %xmm6,%xmm0 + call _vpaes_encrypt_core + movdqa %xmm0,%xmm6 + movdqu %xmm0,(%rsi,%rdi,1) + leaq 16(%rdi),%rdi + subq $16,%rcx + jnc .Lcbc_enc_loop + jmp .Lcbc_done +.align 16 +.Lcbc_dec_loop: + movdqu (%rdi),%xmm0 + movdqa %xmm0,%xmm7 + call _vpaes_decrypt_core + pxor %xmm6,%xmm0 + movdqa %xmm7,%xmm6 + movdqu %xmm0,(%rsi,%rdi,1) + leaq 16(%rdi),%rdi + subq $16,%rcx + jnc .Lcbc_dec_loop +.Lcbc_done: + movdqu %xmm6,(%r8) +.Lcbc_abort: + .byte 0xf3,0xc3 +.size vpaes_cbc_encrypt,.-vpaes_cbc_encrypt + + + + + + +.type _vpaes_preheat,@function +.align 16 +_vpaes_preheat: + leaq .Lk_s0F(%rip),%r10 + movdqa -32(%r10),%xmm10 + movdqa -16(%r10),%xmm11 + movdqa 0(%r10),%xmm9 + movdqa 48(%r10),%xmm13 + movdqa 64(%r10),%xmm12 + movdqa 80(%r10),%xmm15 + movdqa 96(%r10),%xmm14 + .byte 0xf3,0xc3 +.size _vpaes_preheat,.-_vpaes_preheat + + + + + +.type _vpaes_consts,@object +.align 64 +_vpaes_consts: +.Lk_inv: +.quad 0x0E05060F0D080180, 0x040703090A0B0C02 +.quad 0x01040A060F0B0780, 0x030D0E0C02050809 + +.Lk_s0F: +.quad 0x0F0F0F0F0F0F0F0F, 0x0F0F0F0F0F0F0F0F + +.Lk_ipt: +.quad 0xC2B2E8985A2A7000, 0xCABAE09052227808 +.quad 0x4C01307D317C4D00, 0xCD80B1FCB0FDCC81 + +.Lk_sb1: +.quad 0xB19BE18FCB503E00, 0xA5DF7A6E142AF544 +.quad 0x3618D415FAE22300, 0x3BF7CCC10D2ED9EF +.Lk_sb2: +.quad 0xE27A93C60B712400, 0x5EB7E955BC982FCD +.quad 0x69EB88400AE12900, 0xC2A163C8AB82234A +.Lk_sbo: +.quad 0xD0D26D176FBDC700, 0x15AABF7AC502A878 +.quad 0xCFE474A55FBB6A00, 0x8E1E90D1412B35FA + +.Lk_mc_forward: +.quad 0x0407060500030201, 0x0C0F0E0D080B0A09 +.quad 0x080B0A0904070605, 0x000302010C0F0E0D +.quad 0x0C0F0E0D080B0A09, 0x0407060500030201 +.quad 0x000302010C0F0E0D, 0x080B0A0904070605 + +.Lk_mc_backward: +.quad 0x0605040702010003, 0x0E0D0C0F0A09080B +.quad 0x020100030E0D0C0F, 0x0A09080B06050407 +.quad 0x0E0D0C0F0A09080B, 0x0605040702010003 +.quad 0x0A09080B06050407, 0x020100030E0D0C0F + +.Lk_sr: +.quad 0x0706050403020100, 0x0F0E0D0C0B0A0908 +.quad 0x030E09040F0A0500, 0x0B06010C07020D08 +.quad 0x0F060D040B020900, 0x070E050C030A0108 +.quad 0x0B0E0104070A0D00, 0x0306090C0F020508 + +.Lk_rcon: +.quad 0x1F8391B9AF9DEEB6, 0x702A98084D7C7D81 + +.Lk_s63: +.quad 0x5B5B5B5B5B5B5B5B, 0x5B5B5B5B5B5B5B5B + +.Lk_opt: +.quad 0xFF9F4929D6B66000, 0xF7974121DEBE6808 +.quad 0x01EDBD5150BCEC00, 0xE10D5DB1B05C0CE0 + +.Lk_deskew: +.quad 0x07E4A34047A4E300, 0x1DFEB95A5DBEF91A +.quad 0x5F36B5DC83EA6900, 0x2841C2ABF49D1E77 + + + + + +.Lk_dksd: +.quad 0xFEB91A5DA3E44700, 0x0740E3A45A1DBEF9 +.quad 0x41C277F4B5368300, 0x5FDC69EAAB289D1E +.Lk_dksb: +.quad 0x9A4FCA1F8550D500, 0x03D653861CC94C99 +.quad 0x115BEDA7B6FC4A00, 0xD993256F7E3482C8 +.Lk_dkse: +.quad 0xD5031CCA1FC9D600, 0x53859A4C994F5086 +.quad 0xA23196054FDC7BE8, 0xCD5EF96A20B31487 +.Lk_dks9: +.quad 0xB6116FC87ED9A700, 0x4AED933482255BFC +.quad 0x4576516227143300, 0x8BB89FACE9DAFDCE + + + + + +.Lk_dipt: +.quad 0x0F505B040B545F00, 0x154A411E114E451A +.quad 0x86E383E660056500, 0x12771772F491F194 + +.Lk_dsb9: +.quad 0x851C03539A86D600, 0xCAD51F504F994CC9 +.quad 0xC03B1789ECD74900, 0x725E2C9EB2FBA565 +.Lk_dsbd: +.quad 0x7D57CCDFE6B1A200, 0xF56E9B13882A4439 +.quad 0x3CE2FAF724C6CB00, 0x2931180D15DEEFD3 +.Lk_dsbb: +.quad 0xD022649296B44200, 0x602646F6B0F2D404 +.quad 0xC19498A6CD596700, 0xF3FF0C3E3255AA6B +.Lk_dsbe: +.quad 0x46F2929626D4D000, 0x2242600464B4F6B0 +.quad 0x0C55A6CDFFAAC100, 0x9467F36B98593E32 +.Lk_dsbo: +.quad 0x1387EA537EF94000, 0xC7AA6DB9D4943E2D +.quad 0x12D7560F93441D00, 0xCA4B8159D8C58E9C +.byte 86,101,99,116,111,114,32,80,101,114,109,117,116,97,116,105,111,110,32,65,69,83,32,102,111,114,32,120,56,54,95,54,52,47,83,83,83,69,51,44,32,77,105,107,101,32,72,97,109,98,117,114,103,32,40,83,116,97,110,102,111,114,100,32,85,110,105,118,101,114,115,105,116,121,41,0 +.align 64 +.size _vpaes_consts,.-_vpaes_consts diff --git a/deps/openssl/asm_obsolete/x64-elf-gas/bn/rsaz-avx2.s b/deps/openssl/asm_obsolete/x64-elf-gas/bn/rsaz-avx2.s new file mode 100644 index 00000000000000..d8b8bd8de5a4b3 --- /dev/null +++ b/deps/openssl/asm_obsolete/x64-elf-gas/bn/rsaz-avx2.s @@ -0,0 +1,25 @@ +.text + +.globl rsaz_avx2_eligible +.type rsaz_avx2_eligible,@function +rsaz_avx2_eligible: + xorl %eax,%eax + .byte 0xf3,0xc3 +.size rsaz_avx2_eligible,.-rsaz_avx2_eligible + +.globl rsaz_1024_sqr_avx2 +.globl rsaz_1024_mul_avx2 +.globl rsaz_1024_norm2red_avx2 +.globl rsaz_1024_red2norm_avx2 +.globl rsaz_1024_scatter5_avx2 +.globl rsaz_1024_gather5_avx2 +.type rsaz_1024_sqr_avx2,@function +rsaz_1024_sqr_avx2: +rsaz_1024_mul_avx2: +rsaz_1024_norm2red_avx2: +rsaz_1024_red2norm_avx2: +rsaz_1024_scatter5_avx2: +rsaz_1024_gather5_avx2: +.byte 0x0f,0x0b + .byte 0xf3,0xc3 +.size rsaz_1024_sqr_avx2,.-rsaz_1024_sqr_avx2 diff --git a/deps/openssl/asm_obsolete/x64-elf-gas/bn/rsaz-x86_64.s b/deps/openssl/asm_obsolete/x64-elf-gas/bn/rsaz-x86_64.s new file mode 100644 index 00000000000000..b43eb278e2cc06 --- /dev/null +++ b/deps/openssl/asm_obsolete/x64-elf-gas/bn/rsaz-x86_64.s @@ -0,0 +1,1117 @@ +.text + + + +.globl rsaz_512_sqr +.type rsaz_512_sqr,@function +.align 32 +rsaz_512_sqr: + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + + subq $128+24,%rsp +.Lsqr_body: + movq %rdx,%rbp + movq (%rsi),%rdx + movq 8(%rsi),%rax + movq %rcx,128(%rsp) + jmp .Loop_sqr + +.align 32 +.Loop_sqr: + movl %r8d,128+8(%rsp) + + movq %rdx,%rbx + mulq %rdx + movq %rax,%r8 + movq 16(%rsi),%rax + movq %rdx,%r9 + + mulq %rbx + addq %rax,%r9 + movq 24(%rsi),%rax + movq %rdx,%r10 + adcq $0,%r10 + + mulq %rbx + addq %rax,%r10 + movq 32(%rsi),%rax + movq %rdx,%r11 + adcq $0,%r11 + + mulq %rbx + addq %rax,%r11 + movq 40(%rsi),%rax + movq %rdx,%r12 + adcq $0,%r12 + + mulq %rbx + addq %rax,%r12 + movq 48(%rsi),%rax + movq %rdx,%r13 + adcq $0,%r13 + + mulq %rbx + addq %rax,%r13 + movq 56(%rsi),%rax + movq %rdx,%r14 + adcq $0,%r14 + + mulq %rbx + addq %rax,%r14 + movq %rbx,%rax + movq %rdx,%r15 + adcq $0,%r15 + + addq %r8,%r8 + movq %r9,%rcx + adcq %r9,%r9 + + mulq %rax + movq %rax,(%rsp) + addq %rdx,%r8 + adcq $0,%r9 + + movq %r8,8(%rsp) + shrq $63,%rcx + + + movq 8(%rsi),%r8 + movq 16(%rsi),%rax + mulq %r8 + addq %rax,%r10 + movq 24(%rsi),%rax + movq %rdx,%rbx + adcq $0,%rbx + + mulq %r8 + addq %rax,%r11 + movq 32(%rsi),%rax + adcq $0,%rdx + addq %rbx,%r11 + movq %rdx,%rbx + adcq $0,%rbx + + mulq %r8 + addq %rax,%r12 + movq 40(%rsi),%rax + adcq $0,%rdx + addq %rbx,%r12 + movq %rdx,%rbx + adcq $0,%rbx + + mulq %r8 + addq %rax,%r13 + movq 48(%rsi),%rax + adcq $0,%rdx + addq %rbx,%r13 + movq %rdx,%rbx + adcq $0,%rbx + + mulq %r8 + addq %rax,%r14 + movq 56(%rsi),%rax + adcq $0,%rdx + addq %rbx,%r14 + movq %rdx,%rbx + adcq $0,%rbx + + mulq %r8 + addq %rax,%r15 + movq %r8,%rax + adcq $0,%rdx + addq %rbx,%r15 + movq %rdx,%r8 + movq %r10,%rdx + adcq $0,%r8 + + addq %rdx,%rdx + leaq (%rcx,%r10,2),%r10 + movq %r11,%rbx + adcq %r11,%r11 + + mulq %rax + addq %rax,%r9 + adcq %rdx,%r10 + adcq $0,%r11 + + movq %r9,16(%rsp) + movq %r10,24(%rsp) + shrq $63,%rbx + + + movq 16(%rsi),%r9 + movq 24(%rsi),%rax + mulq %r9 + addq %rax,%r12 + movq 32(%rsi),%rax + movq %rdx,%rcx + adcq $0,%rcx + + mulq %r9 + addq %rax,%r13 + movq 40(%rsi),%rax + adcq $0,%rdx + addq %rcx,%r13 + movq %rdx,%rcx + adcq $0,%rcx + + mulq %r9 + addq %rax,%r14 + movq 48(%rsi),%rax + adcq $0,%rdx + addq %rcx,%r14 + movq %rdx,%rcx + adcq $0,%rcx + + mulq %r9 + movq %r12,%r10 + leaq (%rbx,%r12,2),%r12 + addq %rax,%r15 + movq 56(%rsi),%rax + adcq $0,%rdx + addq %rcx,%r15 + movq %rdx,%rcx + adcq $0,%rcx + + mulq %r9 + shrq $63,%r10 + addq %rax,%r8 + movq %r9,%rax + adcq $0,%rdx + addq %rcx,%r8 + movq %rdx,%r9 + adcq $0,%r9 + + movq %r13,%rcx + leaq (%r10,%r13,2),%r13 + + mulq %rax + addq %rax,%r11 + adcq %rdx,%r12 + adcq $0,%r13 + + movq %r11,32(%rsp) + movq %r12,40(%rsp) + shrq $63,%rcx + + + movq 24(%rsi),%r10 + movq 32(%rsi),%rax + mulq %r10 + addq %rax,%r14 + movq 40(%rsi),%rax + movq %rdx,%rbx + adcq $0,%rbx + + mulq %r10 + addq %rax,%r15 + movq 48(%rsi),%rax + adcq $0,%rdx + addq %rbx,%r15 + movq %rdx,%rbx + adcq $0,%rbx + + mulq %r10 + movq %r14,%r12 + leaq (%rcx,%r14,2),%r14 + addq %rax,%r8 + movq 56(%rsi),%rax + adcq $0,%rdx + addq %rbx,%r8 + movq %rdx,%rbx + adcq $0,%rbx + + mulq %r10 + shrq $63,%r12 + addq %rax,%r9 + movq %r10,%rax + adcq $0,%rdx + addq %rbx,%r9 + movq %rdx,%r10 + adcq $0,%r10 + + movq %r15,%rbx + leaq (%r12,%r15,2),%r15 + + mulq %rax + addq %rax,%r13 + adcq %rdx,%r14 + adcq $0,%r15 + + movq %r13,48(%rsp) + movq %r14,56(%rsp) + shrq $63,%rbx + + + movq 32(%rsi),%r11 + movq 40(%rsi),%rax + mulq %r11 + addq %rax,%r8 + movq 48(%rsi),%rax + movq %rdx,%rcx + adcq $0,%rcx + + mulq %r11 + addq %rax,%r9 + movq 56(%rsi),%rax + adcq $0,%rdx + movq %r8,%r12 + leaq (%rbx,%r8,2),%r8 + addq %rcx,%r9 + movq %rdx,%rcx + adcq $0,%rcx + + mulq %r11 + shrq $63,%r12 + addq %rax,%r10 + movq %r11,%rax + adcq $0,%rdx + addq %rcx,%r10 + movq %rdx,%r11 + adcq $0,%r11 + + movq %r9,%rcx + leaq (%r12,%r9,2),%r9 + + mulq %rax + addq %rax,%r15 + adcq %rdx,%r8 + adcq $0,%r9 + + movq %r15,64(%rsp) + movq %r8,72(%rsp) + shrq $63,%rcx + + + movq 40(%rsi),%r12 + movq 48(%rsi),%rax + mulq %r12 + addq %rax,%r10 + movq 56(%rsi),%rax + movq %rdx,%rbx + adcq $0,%rbx + + mulq %r12 + addq %rax,%r11 + movq %r12,%rax + movq %r10,%r15 + leaq (%rcx,%r10,2),%r10 + adcq $0,%rdx + shrq $63,%r15 + addq %rbx,%r11 + movq %rdx,%r12 + adcq $0,%r12 + + movq %r11,%rbx + leaq (%r15,%r11,2),%r11 + + mulq %rax + addq %rax,%r9 + adcq %rdx,%r10 + adcq $0,%r11 + + movq %r9,80(%rsp) + movq %r10,88(%rsp) + + + movq 48(%rsi),%r13 + movq 56(%rsi),%rax + mulq %r13 + addq %rax,%r12 + movq %r13,%rax + movq %rdx,%r13 + adcq $0,%r13 + + xorq %r14,%r14 + shlq $1,%rbx + adcq %r12,%r12 + adcq %r13,%r13 + adcq %r14,%r14 + + mulq %rax + addq %rax,%r11 + adcq %rdx,%r12 + adcq $0,%r13 + + movq %r11,96(%rsp) + movq %r12,104(%rsp) + + + movq 56(%rsi),%rax + mulq %rax + addq %rax,%r13 + adcq $0,%rdx + + addq %rdx,%r14 + + movq %r13,112(%rsp) + movq %r14,120(%rsp) + + movq (%rsp),%r8 + movq 8(%rsp),%r9 + movq 16(%rsp),%r10 + movq 24(%rsp),%r11 + movq 32(%rsp),%r12 + movq 40(%rsp),%r13 + movq 48(%rsp),%r14 + movq 56(%rsp),%r15 + + call __rsaz_512_reduce + + addq 64(%rsp),%r8 + adcq 72(%rsp),%r9 + adcq 80(%rsp),%r10 + adcq 88(%rsp),%r11 + adcq 96(%rsp),%r12 + adcq 104(%rsp),%r13 + adcq 112(%rsp),%r14 + adcq 120(%rsp),%r15 + sbbq %rcx,%rcx + + call __rsaz_512_subtract + + movq %r8,%rdx + movq %r9,%rax + movl 128+8(%rsp),%r8d + movq %rdi,%rsi + + decl %r8d + jnz .Loop_sqr + + leaq 128+24+48(%rsp),%rax + movq -48(%rax),%r15 + movq -40(%rax),%r14 + movq -32(%rax),%r13 + movq -24(%rax),%r12 + movq -16(%rax),%rbp + movq -8(%rax),%rbx + leaq (%rax),%rsp +.Lsqr_epilogue: + .byte 0xf3,0xc3 +.size rsaz_512_sqr,.-rsaz_512_sqr +.globl rsaz_512_mul +.type rsaz_512_mul,@function +.align 32 +rsaz_512_mul: + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + + subq $128+24,%rsp +.Lmul_body: +.byte 102,72,15,110,199 +.byte 102,72,15,110,201 + movq %r8,128(%rsp) + movq (%rdx),%rbx + movq %rdx,%rbp + call __rsaz_512_mul + +.byte 102,72,15,126,199 +.byte 102,72,15,126,205 + + movq (%rsp),%r8 + movq 8(%rsp),%r9 + movq 16(%rsp),%r10 + movq 24(%rsp),%r11 + movq 32(%rsp),%r12 + movq 40(%rsp),%r13 + movq 48(%rsp),%r14 + movq 56(%rsp),%r15 + + call __rsaz_512_reduce + addq 64(%rsp),%r8 + adcq 72(%rsp),%r9 + adcq 80(%rsp),%r10 + adcq 88(%rsp),%r11 + adcq 96(%rsp),%r12 + adcq 104(%rsp),%r13 + adcq 112(%rsp),%r14 + adcq 120(%rsp),%r15 + sbbq %rcx,%rcx + + call __rsaz_512_subtract + + leaq 128+24+48(%rsp),%rax + movq -48(%rax),%r15 + movq -40(%rax),%r14 + movq -32(%rax),%r13 + movq -24(%rax),%r12 + movq -16(%rax),%rbp + movq -8(%rax),%rbx + leaq (%rax),%rsp +.Lmul_epilogue: + .byte 0xf3,0xc3 +.size rsaz_512_mul,.-rsaz_512_mul +.globl rsaz_512_mul_gather4 +.type rsaz_512_mul_gather4,@function +.align 32 +rsaz_512_mul_gather4: + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + + movl %r9d,%r9d + subq $128+24,%rsp +.Lmul_gather4_body: + movl 64(%rdx,%r9,4),%eax +.byte 102,72,15,110,199 + movl (%rdx,%r9,4),%ebx +.byte 102,72,15,110,201 + movq %r8,128(%rsp) + + shlq $32,%rax + orq %rax,%rbx + movq (%rsi),%rax + movq 8(%rsi),%rcx + leaq 128(%rdx,%r9,4),%rbp + mulq %rbx + movq %rax,(%rsp) + movq %rcx,%rax + movq %rdx,%r8 + + mulq %rbx + movd (%rbp),%xmm4 + addq %rax,%r8 + movq 16(%rsi),%rax + movq %rdx,%r9 + adcq $0,%r9 + + mulq %rbx + movd 64(%rbp),%xmm5 + addq %rax,%r9 + movq 24(%rsi),%rax + movq %rdx,%r10 + adcq $0,%r10 + + mulq %rbx + pslldq $4,%xmm5 + addq %rax,%r10 + movq 32(%rsi),%rax + movq %rdx,%r11 + adcq $0,%r11 + + mulq %rbx + por %xmm5,%xmm4 + addq %rax,%r11 + movq 40(%rsi),%rax + movq %rdx,%r12 + adcq $0,%r12 + + mulq %rbx + addq %rax,%r12 + movq 48(%rsi),%rax + movq %rdx,%r13 + adcq $0,%r13 + + mulq %rbx + leaq 128(%rbp),%rbp + addq %rax,%r13 + movq 56(%rsi),%rax + movq %rdx,%r14 + adcq $0,%r14 + + mulq %rbx +.byte 102,72,15,126,227 + addq %rax,%r14 + movq (%rsi),%rax + movq %rdx,%r15 + adcq $0,%r15 + + leaq 8(%rsp),%rdi + movl $7,%ecx + jmp .Loop_mul_gather + +.align 32 +.Loop_mul_gather: + mulq %rbx + addq %rax,%r8 + movq 8(%rsi),%rax + movq %r8,(%rdi) + movq %rdx,%r8 + adcq $0,%r8 + + mulq %rbx + movd (%rbp),%xmm4 + addq %rax,%r9 + movq 16(%rsi),%rax + adcq $0,%rdx + addq %r9,%r8 + movq %rdx,%r9 + adcq $0,%r9 + + mulq %rbx + movd 64(%rbp),%xmm5 + addq %rax,%r10 + movq 24(%rsi),%rax + adcq $0,%rdx + addq %r10,%r9 + movq %rdx,%r10 + adcq $0,%r10 + + mulq %rbx + pslldq $4,%xmm5 + addq %rax,%r11 + movq 32(%rsi),%rax + adcq $0,%rdx + addq %r11,%r10 + movq %rdx,%r11 + adcq $0,%r11 + + mulq %rbx + por %xmm5,%xmm4 + addq %rax,%r12 + movq 40(%rsi),%rax + adcq $0,%rdx + addq %r12,%r11 + movq %rdx,%r12 + adcq $0,%r12 + + mulq %rbx + addq %rax,%r13 + movq 48(%rsi),%rax + adcq $0,%rdx + addq %r13,%r12 + movq %rdx,%r13 + adcq $0,%r13 + + mulq %rbx + addq %rax,%r14 + movq 56(%rsi),%rax + adcq $0,%rdx + addq %r14,%r13 + movq %rdx,%r14 + adcq $0,%r14 + + mulq %rbx +.byte 102,72,15,126,227 + addq %rax,%r15 + movq (%rsi),%rax + adcq $0,%rdx + addq %r15,%r14 + movq %rdx,%r15 + adcq $0,%r15 + + leaq 128(%rbp),%rbp + leaq 8(%rdi),%rdi + + decl %ecx + jnz .Loop_mul_gather + + movq %r8,(%rdi) + movq %r9,8(%rdi) + movq %r10,16(%rdi) + movq %r11,24(%rdi) + movq %r12,32(%rdi) + movq %r13,40(%rdi) + movq %r14,48(%rdi) + movq %r15,56(%rdi) + +.byte 102,72,15,126,199 +.byte 102,72,15,126,205 + + movq (%rsp),%r8 + movq 8(%rsp),%r9 + movq 16(%rsp),%r10 + movq 24(%rsp),%r11 + movq 32(%rsp),%r12 + movq 40(%rsp),%r13 + movq 48(%rsp),%r14 + movq 56(%rsp),%r15 + + call __rsaz_512_reduce + addq 64(%rsp),%r8 + adcq 72(%rsp),%r9 + adcq 80(%rsp),%r10 + adcq 88(%rsp),%r11 + adcq 96(%rsp),%r12 + adcq 104(%rsp),%r13 + adcq 112(%rsp),%r14 + adcq 120(%rsp),%r15 + sbbq %rcx,%rcx + + call __rsaz_512_subtract + + leaq 128+24+48(%rsp),%rax + movq -48(%rax),%r15 + movq -40(%rax),%r14 + movq -32(%rax),%r13 + movq -24(%rax),%r12 + movq -16(%rax),%rbp + movq -8(%rax),%rbx + leaq (%rax),%rsp +.Lmul_gather4_epilogue: + .byte 0xf3,0xc3 +.size rsaz_512_mul_gather4,.-rsaz_512_mul_gather4 +.globl rsaz_512_mul_scatter4 +.type rsaz_512_mul_scatter4,@function +.align 32 +rsaz_512_mul_scatter4: + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + + movl %r9d,%r9d + subq $128+24,%rsp +.Lmul_scatter4_body: + leaq (%r8,%r9,4),%r8 +.byte 102,72,15,110,199 +.byte 102,72,15,110,202 +.byte 102,73,15,110,208 + movq %rcx,128(%rsp) + + movq %rdi,%rbp + movq (%rdi),%rbx + call __rsaz_512_mul + +.byte 102,72,15,126,199 +.byte 102,72,15,126,205 + + movq (%rsp),%r8 + movq 8(%rsp),%r9 + movq 16(%rsp),%r10 + movq 24(%rsp),%r11 + movq 32(%rsp),%r12 + movq 40(%rsp),%r13 + movq 48(%rsp),%r14 + movq 56(%rsp),%r15 + + call __rsaz_512_reduce + addq 64(%rsp),%r8 + adcq 72(%rsp),%r9 + adcq 80(%rsp),%r10 + adcq 88(%rsp),%r11 + adcq 96(%rsp),%r12 + adcq 104(%rsp),%r13 + adcq 112(%rsp),%r14 + adcq 120(%rsp),%r15 +.byte 102,72,15,126,214 + sbbq %rcx,%rcx + + call __rsaz_512_subtract + + movl %r8d,0(%rsi) + shrq $32,%r8 + movl %r9d,128(%rsi) + shrq $32,%r9 + movl %r10d,256(%rsi) + shrq $32,%r10 + movl %r11d,384(%rsi) + shrq $32,%r11 + movl %r12d,512(%rsi) + shrq $32,%r12 + movl %r13d,640(%rsi) + shrq $32,%r13 + movl %r14d,768(%rsi) + shrq $32,%r14 + movl %r15d,896(%rsi) + shrq $32,%r15 + movl %r8d,64(%rsi) + movl %r9d,192(%rsi) + movl %r10d,320(%rsi) + movl %r11d,448(%rsi) + movl %r12d,576(%rsi) + movl %r13d,704(%rsi) + movl %r14d,832(%rsi) + movl %r15d,960(%rsi) + + leaq 128+24+48(%rsp),%rax + movq -48(%rax),%r15 + movq -40(%rax),%r14 + movq -32(%rax),%r13 + movq -24(%rax),%r12 + movq -16(%rax),%rbp + movq -8(%rax),%rbx + leaq (%rax),%rsp +.Lmul_scatter4_epilogue: + .byte 0xf3,0xc3 +.size rsaz_512_mul_scatter4,.-rsaz_512_mul_scatter4 +.globl rsaz_512_mul_by_one +.type rsaz_512_mul_by_one,@function +.align 32 +rsaz_512_mul_by_one: + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + + subq $128+24,%rsp +.Lmul_by_one_body: + movq %rdx,%rbp + movq %rcx,128(%rsp) + + movq (%rsi),%r8 + pxor %xmm0,%xmm0 + movq 8(%rsi),%r9 + movq 16(%rsi),%r10 + movq 24(%rsi),%r11 + movq 32(%rsi),%r12 + movq 40(%rsi),%r13 + movq 48(%rsi),%r14 + movq 56(%rsi),%r15 + + movdqa %xmm0,(%rsp) + movdqa %xmm0,16(%rsp) + movdqa %xmm0,32(%rsp) + movdqa %xmm0,48(%rsp) + movdqa %xmm0,64(%rsp) + movdqa %xmm0,80(%rsp) + movdqa %xmm0,96(%rsp) + call __rsaz_512_reduce + movq %r8,(%rdi) + movq %r9,8(%rdi) + movq %r10,16(%rdi) + movq %r11,24(%rdi) + movq %r12,32(%rdi) + movq %r13,40(%rdi) + movq %r14,48(%rdi) + movq %r15,56(%rdi) + + leaq 128+24+48(%rsp),%rax + movq -48(%rax),%r15 + movq -40(%rax),%r14 + movq -32(%rax),%r13 + movq -24(%rax),%r12 + movq -16(%rax),%rbp + movq -8(%rax),%rbx + leaq (%rax),%rsp +.Lmul_by_one_epilogue: + .byte 0xf3,0xc3 +.size rsaz_512_mul_by_one,.-rsaz_512_mul_by_one +.type __rsaz_512_reduce,@function +.align 32 +__rsaz_512_reduce: + movq %r8,%rbx + imulq 128+8(%rsp),%rbx + movq 0(%rbp),%rax + movl $8,%ecx + jmp .Lreduction_loop + +.align 32 +.Lreduction_loop: + mulq %rbx + movq 8(%rbp),%rax + negq %r8 + movq %rdx,%r8 + adcq $0,%r8 + + mulq %rbx + addq %rax,%r9 + movq 16(%rbp),%rax + adcq $0,%rdx + addq %r9,%r8 + movq %rdx,%r9 + adcq $0,%r9 + + mulq %rbx + addq %rax,%r10 + movq 24(%rbp),%rax + adcq $0,%rdx + addq %r10,%r9 + movq %rdx,%r10 + adcq $0,%r10 + + mulq %rbx + addq %rax,%r11 + movq 32(%rbp),%rax + adcq $0,%rdx + addq %r11,%r10 + movq 128+8(%rsp),%rsi + + + adcq $0,%rdx + movq %rdx,%r11 + + mulq %rbx + addq %rax,%r12 + movq 40(%rbp),%rax + adcq $0,%rdx + imulq %r8,%rsi + addq %r12,%r11 + movq %rdx,%r12 + adcq $0,%r12 + + mulq %rbx + addq %rax,%r13 + movq 48(%rbp),%rax + adcq $0,%rdx + addq %r13,%r12 + movq %rdx,%r13 + adcq $0,%r13 + + mulq %rbx + addq %rax,%r14 + movq 56(%rbp),%rax + adcq $0,%rdx + addq %r14,%r13 + movq %rdx,%r14 + adcq $0,%r14 + + mulq %rbx + movq %rsi,%rbx + addq %rax,%r15 + movq 0(%rbp),%rax + adcq $0,%rdx + addq %r15,%r14 + movq %rdx,%r15 + adcq $0,%r15 + + decl %ecx + jne .Lreduction_loop + + .byte 0xf3,0xc3 +.size __rsaz_512_reduce,.-__rsaz_512_reduce +.type __rsaz_512_subtract,@function +.align 32 +__rsaz_512_subtract: + movq %r8,(%rdi) + movq %r9,8(%rdi) + movq %r10,16(%rdi) + movq %r11,24(%rdi) + movq %r12,32(%rdi) + movq %r13,40(%rdi) + movq %r14,48(%rdi) + movq %r15,56(%rdi) + + movq 0(%rbp),%r8 + movq 8(%rbp),%r9 + negq %r8 + notq %r9 + andq %rcx,%r8 + movq 16(%rbp),%r10 + andq %rcx,%r9 + notq %r10 + movq 24(%rbp),%r11 + andq %rcx,%r10 + notq %r11 + movq 32(%rbp),%r12 + andq %rcx,%r11 + notq %r12 + movq 40(%rbp),%r13 + andq %rcx,%r12 + notq %r13 + movq 48(%rbp),%r14 + andq %rcx,%r13 + notq %r14 + movq 56(%rbp),%r15 + andq %rcx,%r14 + notq %r15 + andq %rcx,%r15 + + addq (%rdi),%r8 + adcq 8(%rdi),%r9 + adcq 16(%rdi),%r10 + adcq 24(%rdi),%r11 + adcq 32(%rdi),%r12 + adcq 40(%rdi),%r13 + adcq 48(%rdi),%r14 + adcq 56(%rdi),%r15 + + movq %r8,(%rdi) + movq %r9,8(%rdi) + movq %r10,16(%rdi) + movq %r11,24(%rdi) + movq %r12,32(%rdi) + movq %r13,40(%rdi) + movq %r14,48(%rdi) + movq %r15,56(%rdi) + + .byte 0xf3,0xc3 +.size __rsaz_512_subtract,.-__rsaz_512_subtract +.type __rsaz_512_mul,@function +.align 32 +__rsaz_512_mul: + leaq 8(%rsp),%rdi + + movq (%rsi),%rax + mulq %rbx + movq %rax,(%rdi) + movq 8(%rsi),%rax + movq %rdx,%r8 + + mulq %rbx + addq %rax,%r8 + movq 16(%rsi),%rax + movq %rdx,%r9 + adcq $0,%r9 + + mulq %rbx + addq %rax,%r9 + movq 24(%rsi),%rax + movq %rdx,%r10 + adcq $0,%r10 + + mulq %rbx + addq %rax,%r10 + movq 32(%rsi),%rax + movq %rdx,%r11 + adcq $0,%r11 + + mulq %rbx + addq %rax,%r11 + movq 40(%rsi),%rax + movq %rdx,%r12 + adcq $0,%r12 + + mulq %rbx + addq %rax,%r12 + movq 48(%rsi),%rax + movq %rdx,%r13 + adcq $0,%r13 + + mulq %rbx + addq %rax,%r13 + movq 56(%rsi),%rax + movq %rdx,%r14 + adcq $0,%r14 + + mulq %rbx + addq %rax,%r14 + movq (%rsi),%rax + movq %rdx,%r15 + adcq $0,%r15 + + leaq 8(%rbp),%rbp + leaq 8(%rdi),%rdi + + movl $7,%ecx + jmp .Loop_mul + +.align 32 +.Loop_mul: + movq (%rbp),%rbx + mulq %rbx + addq %rax,%r8 + movq 8(%rsi),%rax + movq %r8,(%rdi) + movq %rdx,%r8 + adcq $0,%r8 + + mulq %rbx + addq %rax,%r9 + movq 16(%rsi),%rax + adcq $0,%rdx + addq %r9,%r8 + movq %rdx,%r9 + adcq $0,%r9 + + mulq %rbx + addq %rax,%r10 + movq 24(%rsi),%rax + adcq $0,%rdx + addq %r10,%r9 + movq %rdx,%r10 + adcq $0,%r10 + + mulq %rbx + addq %rax,%r11 + movq 32(%rsi),%rax + adcq $0,%rdx + addq %r11,%r10 + movq %rdx,%r11 + adcq $0,%r11 + + mulq %rbx + addq %rax,%r12 + movq 40(%rsi),%rax + adcq $0,%rdx + addq %r12,%r11 + movq %rdx,%r12 + adcq $0,%r12 + + mulq %rbx + addq %rax,%r13 + movq 48(%rsi),%rax + adcq $0,%rdx + addq %r13,%r12 + movq %rdx,%r13 + adcq $0,%r13 + + mulq %rbx + addq %rax,%r14 + movq 56(%rsi),%rax + adcq $0,%rdx + addq %r14,%r13 + movq %rdx,%r14 + leaq 8(%rbp),%rbp + adcq $0,%r14 + + mulq %rbx + addq %rax,%r15 + movq (%rsi),%rax + adcq $0,%rdx + addq %r15,%r14 + movq %rdx,%r15 + adcq $0,%r15 + + leaq 8(%rdi),%rdi + + decl %ecx + jnz .Loop_mul + + movq %r8,(%rdi) + movq %r9,8(%rdi) + movq %r10,16(%rdi) + movq %r11,24(%rdi) + movq %r12,32(%rdi) + movq %r13,40(%rdi) + movq %r14,48(%rdi) + movq %r15,56(%rdi) + + .byte 0xf3,0xc3 +.size __rsaz_512_mul,.-__rsaz_512_mul +.globl rsaz_512_scatter4 +.type rsaz_512_scatter4,@function +.align 16 +rsaz_512_scatter4: + leaq (%rdi,%rdx,4),%rdi + movl $8,%r9d + jmp .Loop_scatter +.align 16 +.Loop_scatter: + movq (%rsi),%rax + leaq 8(%rsi),%rsi + movl %eax,(%rdi) + shrq $32,%rax + movl %eax,64(%rdi) + leaq 128(%rdi),%rdi + decl %r9d + jnz .Loop_scatter + .byte 0xf3,0xc3 +.size rsaz_512_scatter4,.-rsaz_512_scatter4 + +.globl rsaz_512_gather4 +.type rsaz_512_gather4,@function +.align 16 +rsaz_512_gather4: + leaq (%rsi,%rdx,4),%rsi + movl $8,%r9d + jmp .Loop_gather +.align 16 +.Loop_gather: + movl (%rsi),%eax + movl 64(%rsi),%r8d + leaq 128(%rsi),%rsi + shlq $32,%r8 + orq %r8,%rax + movq %rax,(%rdi) + leaq 8(%rdi),%rdi + decl %r9d + jnz .Loop_gather + .byte 0xf3,0xc3 +.size rsaz_512_gather4,.-rsaz_512_gather4 diff --git a/deps/openssl/asm_obsolete/x64-elf-gas/bn/x86_64-gf2m.s b/deps/openssl/asm_obsolete/x64-elf-gas/bn/x86_64-gf2m.s new file mode 100644 index 00000000000000..eed057ad6a1a8a --- /dev/null +++ b/deps/openssl/asm_obsolete/x64-elf-gas/bn/x86_64-gf2m.s @@ -0,0 +1,291 @@ +.text + +.type _mul_1x1,@function +.align 16 +_mul_1x1: + subq $128+8,%rsp + movq $-1,%r9 + leaq (%rax,%rax,1),%rsi + shrq $3,%r9 + leaq (,%rax,4),%rdi + andq %rax,%r9 + leaq (,%rax,8),%r12 + sarq $63,%rax + leaq (%r9,%r9,1),%r10 + sarq $63,%rsi + leaq (,%r9,4),%r11 + andq %rbp,%rax + sarq $63,%rdi + movq %rax,%rdx + shlq $63,%rax + andq %rbp,%rsi + shrq $1,%rdx + movq %rsi,%rcx + shlq $62,%rsi + andq %rbp,%rdi + shrq $2,%rcx + xorq %rsi,%rax + movq %rdi,%rbx + shlq $61,%rdi + xorq %rcx,%rdx + shrq $3,%rbx + xorq %rdi,%rax + xorq %rbx,%rdx + + movq %r9,%r13 + movq $0,0(%rsp) + xorq %r10,%r13 + movq %r9,8(%rsp) + movq %r11,%r14 + movq %r10,16(%rsp) + xorq %r12,%r14 + movq %r13,24(%rsp) + + xorq %r11,%r9 + movq %r11,32(%rsp) + xorq %r11,%r10 + movq %r9,40(%rsp) + xorq %r11,%r13 + movq %r10,48(%rsp) + xorq %r14,%r9 + movq %r13,56(%rsp) + xorq %r14,%r10 + + movq %r12,64(%rsp) + xorq %r14,%r13 + movq %r9,72(%rsp) + xorq %r11,%r9 + movq %r10,80(%rsp) + xorq %r11,%r10 + movq %r13,88(%rsp) + + xorq %r11,%r13 + movq %r14,96(%rsp) + movq %r8,%rsi + movq %r9,104(%rsp) + andq %rbp,%rsi + movq %r10,112(%rsp) + shrq $4,%rbp + movq %r13,120(%rsp) + movq %r8,%rdi + andq %rbp,%rdi + shrq $4,%rbp + + movq (%rsp,%rsi,8),%xmm0 + movq %r8,%rsi + andq %rbp,%rsi + shrq $4,%rbp + movq (%rsp,%rdi,8),%rcx + movq %r8,%rdi + movq %rcx,%rbx + shlq $4,%rcx + andq %rbp,%rdi + movq (%rsp,%rsi,8),%xmm1 + shrq $60,%rbx + xorq %rcx,%rax + pslldq $1,%xmm1 + movq %r8,%rsi + shrq $4,%rbp + xorq %rbx,%rdx + andq %rbp,%rsi + shrq $4,%rbp + pxor %xmm1,%xmm0 + movq (%rsp,%rdi,8),%rcx + movq %r8,%rdi + movq %rcx,%rbx + shlq $12,%rcx + andq %rbp,%rdi + movq (%rsp,%rsi,8),%xmm1 + shrq $52,%rbx + xorq %rcx,%rax + pslldq $2,%xmm1 + movq %r8,%rsi + shrq $4,%rbp + xorq %rbx,%rdx + andq %rbp,%rsi + shrq $4,%rbp + pxor %xmm1,%xmm0 + movq (%rsp,%rdi,8),%rcx + movq %r8,%rdi + movq %rcx,%rbx + shlq $20,%rcx + andq %rbp,%rdi + movq (%rsp,%rsi,8),%xmm1 + shrq $44,%rbx + xorq %rcx,%rax + pslldq $3,%xmm1 + movq %r8,%rsi + shrq $4,%rbp + xorq %rbx,%rdx + andq %rbp,%rsi + shrq $4,%rbp + pxor %xmm1,%xmm0 + movq (%rsp,%rdi,8),%rcx + movq %r8,%rdi + movq %rcx,%rbx + shlq $28,%rcx + andq %rbp,%rdi + movq (%rsp,%rsi,8),%xmm1 + shrq $36,%rbx + xorq %rcx,%rax + pslldq $4,%xmm1 + movq %r8,%rsi + shrq $4,%rbp + xorq %rbx,%rdx + andq %rbp,%rsi + shrq $4,%rbp + pxor %xmm1,%xmm0 + movq (%rsp,%rdi,8),%rcx + movq %r8,%rdi + movq %rcx,%rbx + shlq $36,%rcx + andq %rbp,%rdi + movq (%rsp,%rsi,8),%xmm1 + shrq $28,%rbx + xorq %rcx,%rax + pslldq $5,%xmm1 + movq %r8,%rsi + shrq $4,%rbp + xorq %rbx,%rdx + andq %rbp,%rsi + shrq $4,%rbp + pxor %xmm1,%xmm0 + movq (%rsp,%rdi,8),%rcx + movq %r8,%rdi + movq %rcx,%rbx + shlq $44,%rcx + andq %rbp,%rdi + movq (%rsp,%rsi,8),%xmm1 + shrq $20,%rbx + xorq %rcx,%rax + pslldq $6,%xmm1 + movq %r8,%rsi + shrq $4,%rbp + xorq %rbx,%rdx + andq %rbp,%rsi + shrq $4,%rbp + pxor %xmm1,%xmm0 + movq (%rsp,%rdi,8),%rcx + movq %r8,%rdi + movq %rcx,%rbx + shlq $52,%rcx + andq %rbp,%rdi + movq (%rsp,%rsi,8),%xmm1 + shrq $12,%rbx + xorq %rcx,%rax + pslldq $7,%xmm1 + movq %r8,%rsi + shrq $4,%rbp + xorq %rbx,%rdx + andq %rbp,%rsi + shrq $4,%rbp + pxor %xmm1,%xmm0 + movq (%rsp,%rdi,8),%rcx + movq %rcx,%rbx + shlq $60,%rcx +.byte 102,72,15,126,198 + shrq $4,%rbx + xorq %rcx,%rax + psrldq $8,%xmm0 + xorq %rbx,%rdx +.byte 102,72,15,126,199 + xorq %rsi,%rax + xorq %rdi,%rdx + + addq $128+8,%rsp + .byte 0xf3,0xc3 +.Lend_mul_1x1: +.size _mul_1x1,.-_mul_1x1 + +.globl bn_GF2m_mul_2x2 +.type bn_GF2m_mul_2x2,@function +.align 16 +bn_GF2m_mul_2x2: + movq OPENSSL_ia32cap_P(%rip),%rax + btq $33,%rax + jnc .Lvanilla_mul_2x2 + +.byte 102,72,15,110,198 +.byte 102,72,15,110,201 +.byte 102,72,15,110,210 +.byte 102,73,15,110,216 + movdqa %xmm0,%xmm4 + movdqa %xmm1,%xmm5 +.byte 102,15,58,68,193,0 + pxor %xmm2,%xmm4 + pxor %xmm3,%xmm5 +.byte 102,15,58,68,211,0 +.byte 102,15,58,68,229,0 + xorps %xmm0,%xmm4 + xorps %xmm2,%xmm4 + movdqa %xmm4,%xmm5 + pslldq $8,%xmm4 + psrldq $8,%xmm5 + pxor %xmm4,%xmm2 + pxor %xmm5,%xmm0 + movdqu %xmm2,0(%rdi) + movdqu %xmm0,16(%rdi) + .byte 0xf3,0xc3 + +.align 16 +.Lvanilla_mul_2x2: + leaq -136(%rsp),%rsp + movq %r14,80(%rsp) + movq %r13,88(%rsp) + movq %r12,96(%rsp) + movq %rbp,104(%rsp) + movq %rbx,112(%rsp) +.Lbody_mul_2x2: + movq %rdi,32(%rsp) + movq %rsi,40(%rsp) + movq %rdx,48(%rsp) + movq %rcx,56(%rsp) + movq %r8,64(%rsp) + + movq $15,%r8 + movq %rsi,%rax + movq %rcx,%rbp + call _mul_1x1 + movq %rax,16(%rsp) + movq %rdx,24(%rsp) + + movq 48(%rsp),%rax + movq 64(%rsp),%rbp + call _mul_1x1 + movq %rax,0(%rsp) + movq %rdx,8(%rsp) + + movq 40(%rsp),%rax + movq 56(%rsp),%rbp + xorq 48(%rsp),%rax + xorq 64(%rsp),%rbp + call _mul_1x1 + movq 0(%rsp),%rbx + movq 8(%rsp),%rcx + movq 16(%rsp),%rdi + movq 24(%rsp),%rsi + movq 32(%rsp),%rbp + + xorq %rdx,%rax + xorq %rcx,%rdx + xorq %rbx,%rax + movq %rbx,0(%rbp) + xorq %rdi,%rdx + movq %rsi,24(%rbp) + xorq %rsi,%rax + xorq %rsi,%rdx + xorq %rdx,%rax + movq %rdx,16(%rbp) + movq %rax,8(%rbp) + + movq 80(%rsp),%r14 + movq 88(%rsp),%r13 + movq 96(%rsp),%r12 + movq 104(%rsp),%rbp + movq 112(%rsp),%rbx + leaq 136(%rsp),%rsp + .byte 0xf3,0xc3 +.Lend_mul_2x2: +.size bn_GF2m_mul_2x2,.-bn_GF2m_mul_2x2 +.byte 71,70,40,50,94,109,41,32,77,117,108,116,105,112,108,105,99,97,116,105,111,110,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 +.align 16 diff --git a/deps/openssl/asm_obsolete/x64-elf-gas/bn/x86_64-mont.s b/deps/openssl/asm_obsolete/x64-elf-gas/bn/x86_64-mont.s new file mode 100644 index 00000000000000..b0981692130394 --- /dev/null +++ b/deps/openssl/asm_obsolete/x64-elf-gas/bn/x86_64-mont.s @@ -0,0 +1,724 @@ +.text + + + +.globl bn_mul_mont +.type bn_mul_mont,@function +.align 16 +bn_mul_mont: + testl $3,%r9d + jnz .Lmul_enter + cmpl $8,%r9d + jb .Lmul_enter + cmpq %rsi,%rdx + jne .Lmul4x_enter + testl $7,%r9d + jz .Lsqr8x_enter + jmp .Lmul4x_enter + +.align 16 +.Lmul_enter: + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + + movl %r9d,%r9d + leaq 2(%r9),%r10 + movq %rsp,%r11 + negq %r10 + leaq (%rsp,%r10,8),%rsp + andq $-1024,%rsp + + movq %r11,8(%rsp,%r9,8) +.Lmul_body: + movq %rdx,%r12 + movq (%r8),%r8 + movq (%r12),%rbx + movq (%rsi),%rax + + xorq %r14,%r14 + xorq %r15,%r15 + + movq %r8,%rbp + mulq %rbx + movq %rax,%r10 + movq (%rcx),%rax + + imulq %r10,%rbp + movq %rdx,%r11 + + mulq %rbp + addq %rax,%r10 + movq 8(%rsi),%rax + adcq $0,%rdx + movq %rdx,%r13 + + leaq 1(%r15),%r15 + jmp .L1st_enter + +.align 16 +.L1st: + addq %rax,%r13 + movq (%rsi,%r15,8),%rax + adcq $0,%rdx + addq %r11,%r13 + movq %r10,%r11 + adcq $0,%rdx + movq %r13,-16(%rsp,%r15,8) + movq %rdx,%r13 + +.L1st_enter: + mulq %rbx + addq %rax,%r11 + movq (%rcx,%r15,8),%rax + adcq $0,%rdx + leaq 1(%r15),%r15 + movq %rdx,%r10 + + mulq %rbp + cmpq %r9,%r15 + jne .L1st + + addq %rax,%r13 + movq (%rsi),%rax + adcq $0,%rdx + addq %r11,%r13 + adcq $0,%rdx + movq %r13,-16(%rsp,%r15,8) + movq %rdx,%r13 + movq %r10,%r11 + + xorq %rdx,%rdx + addq %r11,%r13 + adcq $0,%rdx + movq %r13,-8(%rsp,%r9,8) + movq %rdx,(%rsp,%r9,8) + + leaq 1(%r14),%r14 + jmp .Louter +.align 16 +.Louter: + movq (%r12,%r14,8),%rbx + xorq %r15,%r15 + movq %r8,%rbp + movq (%rsp),%r10 + mulq %rbx + addq %rax,%r10 + movq (%rcx),%rax + adcq $0,%rdx + + imulq %r10,%rbp + movq %rdx,%r11 + + mulq %rbp + addq %rax,%r10 + movq 8(%rsi),%rax + adcq $0,%rdx + movq 8(%rsp),%r10 + movq %rdx,%r13 + + leaq 1(%r15),%r15 + jmp .Linner_enter + +.align 16 +.Linner: + addq %rax,%r13 + movq (%rsi,%r15,8),%rax + adcq $0,%rdx + addq %r10,%r13 + movq (%rsp,%r15,8),%r10 + adcq $0,%rdx + movq %r13,-16(%rsp,%r15,8) + movq %rdx,%r13 + +.Linner_enter: + mulq %rbx + addq %rax,%r11 + movq (%rcx,%r15,8),%rax + adcq $0,%rdx + addq %r11,%r10 + movq %rdx,%r11 + adcq $0,%r11 + leaq 1(%r15),%r15 + + mulq %rbp + cmpq %r9,%r15 + jne .Linner + + addq %rax,%r13 + movq (%rsi),%rax + adcq $0,%rdx + addq %r10,%r13 + movq (%rsp,%r15,8),%r10 + adcq $0,%rdx + movq %r13,-16(%rsp,%r15,8) + movq %rdx,%r13 + + xorq %rdx,%rdx + addq %r11,%r13 + adcq $0,%rdx + addq %r10,%r13 + adcq $0,%rdx + movq %r13,-8(%rsp,%r9,8) + movq %rdx,(%rsp,%r9,8) + + leaq 1(%r14),%r14 + cmpq %r9,%r14 + jb .Louter + + xorq %r14,%r14 + movq (%rsp),%rax + leaq (%rsp),%rsi + movq %r9,%r15 + jmp .Lsub +.align 16 +.Lsub: sbbq (%rcx,%r14,8),%rax + movq %rax,(%rdi,%r14,8) + movq 8(%rsi,%r14,8),%rax + leaq 1(%r14),%r14 + decq %r15 + jnz .Lsub + + sbbq $0,%rax + xorq %r14,%r14 + andq %rax,%rsi + notq %rax + movq %rdi,%rcx + andq %rax,%rcx + movq %r9,%r15 + orq %rcx,%rsi +.align 16 +.Lcopy: + movq (%rsi,%r14,8),%rax + movq %r14,(%rsp,%r14,8) + movq %rax,(%rdi,%r14,8) + leaq 1(%r14),%r14 + subq $1,%r15 + jnz .Lcopy + + movq 8(%rsp,%r9,8),%rsi + movq $1,%rax + movq (%rsi),%r15 + movq 8(%rsi),%r14 + movq 16(%rsi),%r13 + movq 24(%rsi),%r12 + movq 32(%rsi),%rbp + movq 40(%rsi),%rbx + leaq 48(%rsi),%rsp +.Lmul_epilogue: + .byte 0xf3,0xc3 +.size bn_mul_mont,.-bn_mul_mont +.type bn_mul4x_mont,@function +.align 16 +bn_mul4x_mont: +.Lmul4x_enter: + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + + movl %r9d,%r9d + leaq 4(%r9),%r10 + movq %rsp,%r11 + negq %r10 + leaq (%rsp,%r10,8),%rsp + andq $-1024,%rsp + + movq %r11,8(%rsp,%r9,8) +.Lmul4x_body: + movq %rdi,16(%rsp,%r9,8) + movq %rdx,%r12 + movq (%r8),%r8 + movq (%r12),%rbx + movq (%rsi),%rax + + xorq %r14,%r14 + xorq %r15,%r15 + + movq %r8,%rbp + mulq %rbx + movq %rax,%r10 + movq (%rcx),%rax + + imulq %r10,%rbp + movq %rdx,%r11 + + mulq %rbp + addq %rax,%r10 + movq 8(%rsi),%rax + adcq $0,%rdx + movq %rdx,%rdi + + mulq %rbx + addq %rax,%r11 + movq 8(%rcx),%rax + adcq $0,%rdx + movq %rdx,%r10 + + mulq %rbp + addq %rax,%rdi + movq 16(%rsi),%rax + adcq $0,%rdx + addq %r11,%rdi + leaq 4(%r15),%r15 + adcq $0,%rdx + movq %rdi,(%rsp) + movq %rdx,%r13 + jmp .L1st4x +.align 16 +.L1st4x: + mulq %rbx + addq %rax,%r10 + movq -16(%rcx,%r15,8),%rax + adcq $0,%rdx + movq %rdx,%r11 + + mulq %rbp + addq %rax,%r13 + movq -8(%rsi,%r15,8),%rax + adcq $0,%rdx + addq %r10,%r13 + adcq $0,%rdx + movq %r13,-24(%rsp,%r15,8) + movq %rdx,%rdi + + mulq %rbx + addq %rax,%r11 + movq -8(%rcx,%r15,8),%rax + adcq $0,%rdx + movq %rdx,%r10 + + mulq %rbp + addq %rax,%rdi + movq (%rsi,%r15,8),%rax + adcq $0,%rdx + addq %r11,%rdi + adcq $0,%rdx + movq %rdi,-16(%rsp,%r15,8) + movq %rdx,%r13 + + mulq %rbx + addq %rax,%r10 + movq (%rcx,%r15,8),%rax + adcq $0,%rdx + movq %rdx,%r11 + + mulq %rbp + addq %rax,%r13 + movq 8(%rsi,%r15,8),%rax + adcq $0,%rdx + addq %r10,%r13 + adcq $0,%rdx + movq %r13,-8(%rsp,%r15,8) + movq %rdx,%rdi + + mulq %rbx + addq %rax,%r11 + movq 8(%rcx,%r15,8),%rax + adcq $0,%rdx + leaq 4(%r15),%r15 + movq %rdx,%r10 + + mulq %rbp + addq %rax,%rdi + movq -16(%rsi,%r15,8),%rax + adcq $0,%rdx + addq %r11,%rdi + adcq $0,%rdx + movq %rdi,-32(%rsp,%r15,8) + movq %rdx,%r13 + cmpq %r9,%r15 + jb .L1st4x + + mulq %rbx + addq %rax,%r10 + movq -16(%rcx,%r15,8),%rax + adcq $0,%rdx + movq %rdx,%r11 + + mulq %rbp + addq %rax,%r13 + movq -8(%rsi,%r15,8),%rax + adcq $0,%rdx + addq %r10,%r13 + adcq $0,%rdx + movq %r13,-24(%rsp,%r15,8) + movq %rdx,%rdi + + mulq %rbx + addq %rax,%r11 + movq -8(%rcx,%r15,8),%rax + adcq $0,%rdx + movq %rdx,%r10 + + mulq %rbp + addq %rax,%rdi + movq (%rsi),%rax + adcq $0,%rdx + addq %r11,%rdi + adcq $0,%rdx + movq %rdi,-16(%rsp,%r15,8) + movq %rdx,%r13 + + xorq %rdi,%rdi + addq %r10,%r13 + adcq $0,%rdi + movq %r13,-8(%rsp,%r15,8) + movq %rdi,(%rsp,%r15,8) + + leaq 1(%r14),%r14 +.align 4 +.Louter4x: + movq (%r12,%r14,8),%rbx + xorq %r15,%r15 + movq (%rsp),%r10 + movq %r8,%rbp + mulq %rbx + addq %rax,%r10 + movq (%rcx),%rax + adcq $0,%rdx + + imulq %r10,%rbp + movq %rdx,%r11 + + mulq %rbp + addq %rax,%r10 + movq 8(%rsi),%rax + adcq $0,%rdx + movq %rdx,%rdi + + mulq %rbx + addq %rax,%r11 + movq 8(%rcx),%rax + adcq $0,%rdx + addq 8(%rsp),%r11 + adcq $0,%rdx + movq %rdx,%r10 + + mulq %rbp + addq %rax,%rdi + movq 16(%rsi),%rax + adcq $0,%rdx + addq %r11,%rdi + leaq 4(%r15),%r15 + adcq $0,%rdx + movq %rdi,(%rsp) + movq %rdx,%r13 + jmp .Linner4x +.align 16 +.Linner4x: + mulq %rbx + addq %rax,%r10 + movq -16(%rcx,%r15,8),%rax + adcq $0,%rdx + addq -16(%rsp,%r15,8),%r10 + adcq $0,%rdx + movq %rdx,%r11 + + mulq %rbp + addq %rax,%r13 + movq -8(%rsi,%r15,8),%rax + adcq $0,%rdx + addq %r10,%r13 + adcq $0,%rdx + movq %r13,-24(%rsp,%r15,8) + movq %rdx,%rdi + + mulq %rbx + addq %rax,%r11 + movq -8(%rcx,%r15,8),%rax + adcq $0,%rdx + addq -8(%rsp,%r15,8),%r11 + adcq $0,%rdx + movq %rdx,%r10 + + mulq %rbp + addq %rax,%rdi + movq (%rsi,%r15,8),%rax + adcq $0,%rdx + addq %r11,%rdi + adcq $0,%rdx + movq %rdi,-16(%rsp,%r15,8) + movq %rdx,%r13 + + mulq %rbx + addq %rax,%r10 + movq (%rcx,%r15,8),%rax + adcq $0,%rdx + addq (%rsp,%r15,8),%r10 + adcq $0,%rdx + movq %rdx,%r11 + + mulq %rbp + addq %rax,%r13 + movq 8(%rsi,%r15,8),%rax + adcq $0,%rdx + addq %r10,%r13 + adcq $0,%rdx + movq %r13,-8(%rsp,%r15,8) + movq %rdx,%rdi + + mulq %rbx + addq %rax,%r11 + movq 8(%rcx,%r15,8),%rax + adcq $0,%rdx + addq 8(%rsp,%r15,8),%r11 + adcq $0,%rdx + leaq 4(%r15),%r15 + movq %rdx,%r10 + + mulq %rbp + addq %rax,%rdi + movq -16(%rsi,%r15,8),%rax + adcq $0,%rdx + addq %r11,%rdi + adcq $0,%rdx + movq %rdi,-32(%rsp,%r15,8) + movq %rdx,%r13 + cmpq %r9,%r15 + jb .Linner4x + + mulq %rbx + addq %rax,%r10 + movq -16(%rcx,%r15,8),%rax + adcq $0,%rdx + addq -16(%rsp,%r15,8),%r10 + adcq $0,%rdx + movq %rdx,%r11 + + mulq %rbp + addq %rax,%r13 + movq -8(%rsi,%r15,8),%rax + adcq $0,%rdx + addq %r10,%r13 + adcq $0,%rdx + movq %r13,-24(%rsp,%r15,8) + movq %rdx,%rdi + + mulq %rbx + addq %rax,%r11 + movq -8(%rcx,%r15,8),%rax + adcq $0,%rdx + addq -8(%rsp,%r15,8),%r11 + adcq $0,%rdx + leaq 1(%r14),%r14 + movq %rdx,%r10 + + mulq %rbp + addq %rax,%rdi + movq (%rsi),%rax + adcq $0,%rdx + addq %r11,%rdi + adcq $0,%rdx + movq %rdi,-16(%rsp,%r15,8) + movq %rdx,%r13 + + xorq %rdi,%rdi + addq %r10,%r13 + adcq $0,%rdi + addq (%rsp,%r9,8),%r13 + adcq $0,%rdi + movq %r13,-8(%rsp,%r15,8) + movq %rdi,(%rsp,%r15,8) + + cmpq %r9,%r14 + jb .Louter4x + movq 16(%rsp,%r9,8),%rdi + movq 0(%rsp),%rax + pxor %xmm0,%xmm0 + movq 8(%rsp),%rdx + shrq $2,%r9 + leaq (%rsp),%rsi + xorq %r14,%r14 + + subq 0(%rcx),%rax + movq 16(%rsi),%rbx + movq 24(%rsi),%rbp + sbbq 8(%rcx),%rdx + leaq -1(%r9),%r15 + jmp .Lsub4x +.align 16 +.Lsub4x: + movq %rax,0(%rdi,%r14,8) + movq %rdx,8(%rdi,%r14,8) + sbbq 16(%rcx,%r14,8),%rbx + movq 32(%rsi,%r14,8),%rax + movq 40(%rsi,%r14,8),%rdx + sbbq 24(%rcx,%r14,8),%rbp + movq %rbx,16(%rdi,%r14,8) + movq %rbp,24(%rdi,%r14,8) + sbbq 32(%rcx,%r14,8),%rax + movq 48(%rsi,%r14,8),%rbx + movq 56(%rsi,%r14,8),%rbp + sbbq 40(%rcx,%r14,8),%rdx + leaq 4(%r14),%r14 + decq %r15 + jnz .Lsub4x + + movq %rax,0(%rdi,%r14,8) + movq 32(%rsi,%r14,8),%rax + sbbq 16(%rcx,%r14,8),%rbx + movq %rdx,8(%rdi,%r14,8) + sbbq 24(%rcx,%r14,8),%rbp + movq %rbx,16(%rdi,%r14,8) + + sbbq $0,%rax + movq %rbp,24(%rdi,%r14,8) + xorq %r14,%r14 + andq %rax,%rsi + notq %rax + movq %rdi,%rcx + andq %rax,%rcx + leaq -1(%r9),%r15 + orq %rcx,%rsi + + movdqu (%rsi),%xmm1 + movdqa %xmm0,(%rsp) + movdqu %xmm1,(%rdi) + jmp .Lcopy4x +.align 16 +.Lcopy4x: + movdqu 16(%rsi,%r14,1),%xmm2 + movdqu 32(%rsi,%r14,1),%xmm1 + movdqa %xmm0,16(%rsp,%r14,1) + movdqu %xmm2,16(%rdi,%r14,1) + movdqa %xmm0,32(%rsp,%r14,1) + movdqu %xmm1,32(%rdi,%r14,1) + leaq 32(%r14),%r14 + decq %r15 + jnz .Lcopy4x + + shlq $2,%r9 + movdqu 16(%rsi,%r14,1),%xmm2 + movdqa %xmm0,16(%rsp,%r14,1) + movdqu %xmm2,16(%rdi,%r14,1) + movq 8(%rsp,%r9,8),%rsi + movq $1,%rax + movq (%rsi),%r15 + movq 8(%rsi),%r14 + movq 16(%rsi),%r13 + movq 24(%rsi),%r12 + movq 32(%rsi),%rbp + movq 40(%rsi),%rbx + leaq 48(%rsi),%rsp +.Lmul4x_epilogue: + .byte 0xf3,0xc3 +.size bn_mul4x_mont,.-bn_mul4x_mont + + +.type bn_sqr8x_mont,@function +.align 32 +bn_sqr8x_mont: +.Lsqr8x_enter: + movq %rsp,%rax + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + + movl %r9d,%r10d + shll $3,%r9d + shlq $3+2,%r10 + negq %r9 + + + + + + + leaq -64(%rsp,%r9,4),%r11 + movq (%r8),%r8 + subq %rsi,%r11 + andq $4095,%r11 + cmpq %r11,%r10 + jb .Lsqr8x_sp_alt + subq %r11,%rsp + leaq -64(%rsp,%r9,4),%rsp + jmp .Lsqr8x_sp_done + +.align 32 +.Lsqr8x_sp_alt: + leaq 4096-64(,%r9,4),%r10 + leaq -64(%rsp,%r9,4),%rsp + subq %r10,%r11 + movq $0,%r10 + cmovcq %r10,%r11 + subq %r11,%rsp +.Lsqr8x_sp_done: + andq $-64,%rsp + movq %r9,%r10 + negq %r9 + + leaq 64(%rsp,%r9,2),%r11 + movq %r8,32(%rsp) + movq %rax,40(%rsp) +.Lsqr8x_body: + + movq %r9,%rbp +.byte 102,73,15,110,211 + shrq $3+2,%rbp + movl OPENSSL_ia32cap_P+8(%rip),%eax + jmp .Lsqr8x_copy_n + +.align 32 +.Lsqr8x_copy_n: + movq 0(%rcx),%xmm0 + movq 8(%rcx),%xmm1 + movq 16(%rcx),%xmm3 + movq 24(%rcx),%xmm4 + leaq 32(%rcx),%rcx + movdqa %xmm0,0(%r11) + movdqa %xmm1,16(%r11) + movdqa %xmm3,32(%r11) + movdqa %xmm4,48(%r11) + leaq 64(%r11),%r11 + decq %rbp + jnz .Lsqr8x_copy_n + + pxor %xmm0,%xmm0 +.byte 102,72,15,110,207 +.byte 102,73,15,110,218 + call bn_sqr8x_internal + + pxor %xmm0,%xmm0 + leaq 48(%rsp),%rax + leaq 64(%rsp,%r9,2),%rdx + shrq $3+2,%r9 + movq 40(%rsp),%rsi + jmp .Lsqr8x_zero + +.align 32 +.Lsqr8x_zero: + movdqa %xmm0,0(%rax) + movdqa %xmm0,16(%rax) + movdqa %xmm0,32(%rax) + movdqa %xmm0,48(%rax) + leaq 64(%rax),%rax + movdqa %xmm0,0(%rdx) + movdqa %xmm0,16(%rdx) + movdqa %xmm0,32(%rdx) + movdqa %xmm0,48(%rdx) + leaq 64(%rdx),%rdx + decq %r9 + jnz .Lsqr8x_zero + + movq $1,%rax + movq -48(%rsi),%r15 + movq -40(%rsi),%r14 + movq -32(%rsi),%r13 + movq -24(%rsi),%r12 + movq -16(%rsi),%rbp + movq -8(%rsi),%rbx + leaq (%rsi),%rsp +.Lsqr8x_epilogue: + .byte 0xf3,0xc3 +.size bn_sqr8x_mont,.-bn_sqr8x_mont +.byte 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105,112,108,105,99,97,116,105,111,110,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 +.align 16 diff --git a/deps/openssl/asm_obsolete/x64-elf-gas/bn/x86_64-mont5.s b/deps/openssl/asm_obsolete/x64-elf-gas/bn/x86_64-mont5.s new file mode 100644 index 00000000000000..1bf368c7eb29bf --- /dev/null +++ b/deps/openssl/asm_obsolete/x64-elf-gas/bn/x86_64-mont5.s @@ -0,0 +1,1829 @@ +.text + + + +.globl bn_mul_mont_gather5 +.type bn_mul_mont_gather5,@function +.align 64 +bn_mul_mont_gather5: + testl $7,%r9d + jnz .Lmul_enter + jmp .Lmul4x_enter + +.align 16 +.Lmul_enter: + movl %r9d,%r9d + movq %rsp,%rax + movl 8(%rsp),%r10d + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + leaq 2(%r9),%r11 + negq %r11 + leaq (%rsp,%r11,8),%rsp + andq $-1024,%rsp + + movq %rax,8(%rsp,%r9,8) +.Lmul_body: + movq %rdx,%r12 + movq %r10,%r11 + shrq $3,%r10 + andq $7,%r11 + notq %r10 + leaq .Lmagic_masks(%rip),%rax + andq $3,%r10 + leaq 96(%r12,%r11,8),%r12 + movq 0(%rax,%r10,8),%xmm4 + movq 8(%rax,%r10,8),%xmm5 + movq 16(%rax,%r10,8),%xmm6 + movq 24(%rax,%r10,8),%xmm7 + + movq -96(%r12),%xmm0 + movq -32(%r12),%xmm1 + pand %xmm4,%xmm0 + movq 32(%r12),%xmm2 + pand %xmm5,%xmm1 + movq 96(%r12),%xmm3 + pand %xmm6,%xmm2 + por %xmm1,%xmm0 + pand %xmm7,%xmm3 + por %xmm2,%xmm0 + leaq 256(%r12),%r12 + por %xmm3,%xmm0 + +.byte 102,72,15,126,195 + + movq (%r8),%r8 + movq (%rsi),%rax + + xorq %r14,%r14 + xorq %r15,%r15 + + movq -96(%r12),%xmm0 + movq -32(%r12),%xmm1 + pand %xmm4,%xmm0 + movq 32(%r12),%xmm2 + pand %xmm5,%xmm1 + + movq %r8,%rbp + mulq %rbx + movq %rax,%r10 + movq (%rcx),%rax + + movq 96(%r12),%xmm3 + pand %xmm6,%xmm2 + por %xmm1,%xmm0 + pand %xmm7,%xmm3 + + imulq %r10,%rbp + movq %rdx,%r11 + + por %xmm2,%xmm0 + leaq 256(%r12),%r12 + por %xmm3,%xmm0 + + mulq %rbp + addq %rax,%r10 + movq 8(%rsi),%rax + adcq $0,%rdx + movq %rdx,%r13 + + leaq 1(%r15),%r15 + jmp .L1st_enter + +.align 16 +.L1st: + addq %rax,%r13 + movq (%rsi,%r15,8),%rax + adcq $0,%rdx + addq %r11,%r13 + movq %r10,%r11 + adcq $0,%rdx + movq %r13,-16(%rsp,%r15,8) + movq %rdx,%r13 + +.L1st_enter: + mulq %rbx + addq %rax,%r11 + movq (%rcx,%r15,8),%rax + adcq $0,%rdx + leaq 1(%r15),%r15 + movq %rdx,%r10 + + mulq %rbp + cmpq %r9,%r15 + jne .L1st + +.byte 102,72,15,126,195 + + addq %rax,%r13 + movq (%rsi),%rax + adcq $0,%rdx + addq %r11,%r13 + adcq $0,%rdx + movq %r13,-16(%rsp,%r15,8) + movq %rdx,%r13 + movq %r10,%r11 + + xorq %rdx,%rdx + addq %r11,%r13 + adcq $0,%rdx + movq %r13,-8(%rsp,%r9,8) + movq %rdx,(%rsp,%r9,8) + + leaq 1(%r14),%r14 + jmp .Louter +.align 16 +.Louter: + xorq %r15,%r15 + movq %r8,%rbp + movq (%rsp),%r10 + + movq -96(%r12),%xmm0 + movq -32(%r12),%xmm1 + pand %xmm4,%xmm0 + movq 32(%r12),%xmm2 + pand %xmm5,%xmm1 + + mulq %rbx + addq %rax,%r10 + movq (%rcx),%rax + adcq $0,%rdx + + movq 96(%r12),%xmm3 + pand %xmm6,%xmm2 + por %xmm1,%xmm0 + pand %xmm7,%xmm3 + + imulq %r10,%rbp + movq %rdx,%r11 + + por %xmm2,%xmm0 + leaq 256(%r12),%r12 + por %xmm3,%xmm0 + + mulq %rbp + addq %rax,%r10 + movq 8(%rsi),%rax + adcq $0,%rdx + movq 8(%rsp),%r10 + movq %rdx,%r13 + + leaq 1(%r15),%r15 + jmp .Linner_enter + +.align 16 +.Linner: + addq %rax,%r13 + movq (%rsi,%r15,8),%rax + adcq $0,%rdx + addq %r10,%r13 + movq (%rsp,%r15,8),%r10 + adcq $0,%rdx + movq %r13,-16(%rsp,%r15,8) + movq %rdx,%r13 + +.Linner_enter: + mulq %rbx + addq %rax,%r11 + movq (%rcx,%r15,8),%rax + adcq $0,%rdx + addq %r11,%r10 + movq %rdx,%r11 + adcq $0,%r11 + leaq 1(%r15),%r15 + + mulq %rbp + cmpq %r9,%r15 + jne .Linner + +.byte 102,72,15,126,195 + + addq %rax,%r13 + movq (%rsi),%rax + adcq $0,%rdx + addq %r10,%r13 + movq (%rsp,%r15,8),%r10 + adcq $0,%rdx + movq %r13,-16(%rsp,%r15,8) + movq %rdx,%r13 + + xorq %rdx,%rdx + addq %r11,%r13 + adcq $0,%rdx + addq %r10,%r13 + adcq $0,%rdx + movq %r13,-8(%rsp,%r9,8) + movq %rdx,(%rsp,%r9,8) + + leaq 1(%r14),%r14 + cmpq %r9,%r14 + jb .Louter + + xorq %r14,%r14 + movq (%rsp),%rax + leaq (%rsp),%rsi + movq %r9,%r15 + jmp .Lsub +.align 16 +.Lsub: sbbq (%rcx,%r14,8),%rax + movq %rax,(%rdi,%r14,8) + movq 8(%rsi,%r14,8),%rax + leaq 1(%r14),%r14 + decq %r15 + jnz .Lsub + + sbbq $0,%rax + xorq %r14,%r14 + andq %rax,%rsi + notq %rax + movq %rdi,%rcx + andq %rax,%rcx + movq %r9,%r15 + orq %rcx,%rsi +.align 16 +.Lcopy: + movq (%rsi,%r14,8),%rax + movq %r14,(%rsp,%r14,8) + movq %rax,(%rdi,%r14,8) + leaq 1(%r14),%r14 + subq $1,%r15 + jnz .Lcopy + + movq 8(%rsp,%r9,8),%rsi + movq $1,%rax + movq -48(%rsi),%r15 + movq -40(%rsi),%r14 + movq -32(%rsi),%r13 + movq -24(%rsi),%r12 + movq -16(%rsi),%rbp + movq -8(%rsi),%rbx + leaq (%rsi),%rsp +.Lmul_epilogue: + .byte 0xf3,0xc3 +.size bn_mul_mont_gather5,.-bn_mul_mont_gather5 +.type bn_mul4x_mont_gather5,@function +.align 32 +bn_mul4x_mont_gather5: +.Lmul4x_enter: +.byte 0x67 + movq %rsp,%rax + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 +.byte 0x67 + movl %r9d,%r10d + shll $3,%r9d + shll $3+2,%r10d + negq %r9 + + + + + + + + + leaq -64(%rsp,%r9,2),%r11 + subq %rsi,%r11 + andq $4095,%r11 + cmpq %r11,%r10 + jb .Lmul4xsp_alt + subq %r11,%rsp + leaq -64(%rsp,%r9,2),%rsp + jmp .Lmul4xsp_done + +.align 32 +.Lmul4xsp_alt: + leaq 4096-64(,%r9,2),%r10 + leaq -64(%rsp,%r9,2),%rsp + subq %r10,%r11 + movq $0,%r10 + cmovcq %r10,%r11 + subq %r11,%rsp +.Lmul4xsp_done: + andq $-64,%rsp + negq %r9 + + movq %rax,40(%rsp) +.Lmul4x_body: + + call mul4x_internal + + movq 40(%rsp),%rsi + movq $1,%rax + movq -48(%rsi),%r15 + movq -40(%rsi),%r14 + movq -32(%rsi),%r13 + movq -24(%rsi),%r12 + movq -16(%rsi),%rbp + movq -8(%rsi),%rbx + leaq (%rsi),%rsp +.Lmul4x_epilogue: + .byte 0xf3,0xc3 +.size bn_mul4x_mont_gather5,.-bn_mul4x_mont_gather5 + +.type mul4x_internal,@function +.align 32 +mul4x_internal: + shlq $5,%r9 + movl 8(%rax),%r10d + leaq 256(%rdx,%r9,1),%r13 + shrq $5,%r9 + movq %r10,%r11 + shrq $3,%r10 + andq $7,%r11 + notq %r10 + leaq .Lmagic_masks(%rip),%rax + andq $3,%r10 + leaq 96(%rdx,%r11,8),%r12 + movq 0(%rax,%r10,8),%xmm4 + movq 8(%rax,%r10,8),%xmm5 + addq $7,%r11 + movq 16(%rax,%r10,8),%xmm6 + movq 24(%rax,%r10,8),%xmm7 + andq $7,%r11 + + movq -96(%r12),%xmm0 + leaq 256(%r12),%r14 + movq -32(%r12),%xmm1 + pand %xmm4,%xmm0 + movq 32(%r12),%xmm2 + pand %xmm5,%xmm1 + movq 96(%r12),%xmm3 + pand %xmm6,%xmm2 +.byte 0x67 + por %xmm1,%xmm0 + movq -96(%r14),%xmm1 +.byte 0x67 + pand %xmm7,%xmm3 +.byte 0x67 + por %xmm2,%xmm0 + movq -32(%r14),%xmm2 +.byte 0x67 + pand %xmm4,%xmm1 +.byte 0x67 + por %xmm3,%xmm0 + movq 32(%r14),%xmm3 + +.byte 102,72,15,126,195 + movq 96(%r14),%xmm0 + movq %r13,16+8(%rsp) + movq %rdi,56+8(%rsp) + + movq (%r8),%r8 + movq (%rsi),%rax + leaq (%rsi,%r9,1),%rsi + negq %r9 + + movq %r8,%rbp + mulq %rbx + movq %rax,%r10 + movq (%rcx),%rax + + pand %xmm5,%xmm2 + pand %xmm6,%xmm3 + por %xmm2,%xmm1 + + imulq %r10,%rbp + + + + + + + + leaq 64+8(%rsp,%r11,8),%r14 + movq %rdx,%r11 + + pand %xmm7,%xmm0 + por %xmm3,%xmm1 + leaq 512(%r12),%r12 + por %xmm1,%xmm0 + + mulq %rbp + addq %rax,%r10 + movq 8(%rsi,%r9,1),%rax + adcq $0,%rdx + movq %rdx,%rdi + + mulq %rbx + addq %rax,%r11 + movq 16(%rcx),%rax + adcq $0,%rdx + movq %rdx,%r10 + + mulq %rbp + addq %rax,%rdi + movq 16(%rsi,%r9,1),%rax + adcq $0,%rdx + addq %r11,%rdi + leaq 32(%r9),%r15 + leaq 64(%rcx),%rcx + adcq $0,%rdx + movq %rdi,(%r14) + movq %rdx,%r13 + jmp .L1st4x + +.align 32 +.L1st4x: + mulq %rbx + addq %rax,%r10 + movq -32(%rcx),%rax + leaq 32(%r14),%r14 + adcq $0,%rdx + movq %rdx,%r11 + + mulq %rbp + addq %rax,%r13 + movq -8(%rsi,%r15,1),%rax + adcq $0,%rdx + addq %r10,%r13 + adcq $0,%rdx + movq %r13,-24(%r14) + movq %rdx,%rdi + + mulq %rbx + addq %rax,%r11 + movq -16(%rcx),%rax + adcq $0,%rdx + movq %rdx,%r10 + + mulq %rbp + addq %rax,%rdi + movq (%rsi,%r15,1),%rax + adcq $0,%rdx + addq %r11,%rdi + adcq $0,%rdx + movq %rdi,-16(%r14) + movq %rdx,%r13 + + mulq %rbx + addq %rax,%r10 + movq 0(%rcx),%rax + adcq $0,%rdx + movq %rdx,%r11 + + mulq %rbp + addq %rax,%r13 + movq 8(%rsi,%r15,1),%rax + adcq $0,%rdx + addq %r10,%r13 + adcq $0,%rdx + movq %r13,-8(%r14) + movq %rdx,%rdi + + mulq %rbx + addq %rax,%r11 + movq 16(%rcx),%rax + adcq $0,%rdx + movq %rdx,%r10 + + mulq %rbp + addq %rax,%rdi + movq 16(%rsi,%r15,1),%rax + adcq $0,%rdx + addq %r11,%rdi + leaq 64(%rcx),%rcx + adcq $0,%rdx + movq %rdi,(%r14) + movq %rdx,%r13 + + addq $32,%r15 + jnz .L1st4x + + mulq %rbx + addq %rax,%r10 + movq -32(%rcx),%rax + leaq 32(%r14),%r14 + adcq $0,%rdx + movq %rdx,%r11 + + mulq %rbp + addq %rax,%r13 + movq -8(%rsi),%rax + adcq $0,%rdx + addq %r10,%r13 + adcq $0,%rdx + movq %r13,-24(%r14) + movq %rdx,%rdi + + mulq %rbx + addq %rax,%r11 + movq -16(%rcx),%rax + adcq $0,%rdx + movq %rdx,%r10 + + mulq %rbp + addq %rax,%rdi + movq (%rsi,%r9,1),%rax + adcq $0,%rdx + addq %r11,%rdi + adcq $0,%rdx + movq %rdi,-16(%r14) + movq %rdx,%r13 + +.byte 102,72,15,126,195 + leaq (%rcx,%r9,2),%rcx + + xorq %rdi,%rdi + addq %r10,%r13 + adcq $0,%rdi + movq %r13,-8(%r14) + + jmp .Louter4x + +.align 32 +.Louter4x: + movq (%r14,%r9,1),%r10 + movq %r8,%rbp + mulq %rbx + addq %rax,%r10 + movq (%rcx),%rax + adcq $0,%rdx + + movq -96(%r12),%xmm0 + movq -32(%r12),%xmm1 + pand %xmm4,%xmm0 + movq 32(%r12),%xmm2 + pand %xmm5,%xmm1 + movq 96(%r12),%xmm3 + + imulq %r10,%rbp +.byte 0x67 + movq %rdx,%r11 + movq %rdi,(%r14) + + pand %xmm6,%xmm2 + por %xmm1,%xmm0 + pand %xmm7,%xmm3 + por %xmm2,%xmm0 + leaq (%r14,%r9,1),%r14 + leaq 256(%r12),%r12 + por %xmm3,%xmm0 + + mulq %rbp + addq %rax,%r10 + movq 8(%rsi,%r9,1),%rax + adcq $0,%rdx + movq %rdx,%rdi + + mulq %rbx + addq %rax,%r11 + movq 16(%rcx),%rax + adcq $0,%rdx + addq 8(%r14),%r11 + adcq $0,%rdx + movq %rdx,%r10 + + mulq %rbp + addq %rax,%rdi + movq 16(%rsi,%r9,1),%rax + adcq $0,%rdx + addq %r11,%rdi + leaq 32(%r9),%r15 + leaq 64(%rcx),%rcx + adcq $0,%rdx + movq %rdx,%r13 + jmp .Linner4x + +.align 32 +.Linner4x: + mulq %rbx + addq %rax,%r10 + movq -32(%rcx),%rax + adcq $0,%rdx + addq 16(%r14),%r10 + leaq 32(%r14),%r14 + adcq $0,%rdx + movq %rdx,%r11 + + mulq %rbp + addq %rax,%r13 + movq -8(%rsi,%r15,1),%rax + adcq $0,%rdx + addq %r10,%r13 + adcq $0,%rdx + movq %rdi,-32(%r14) + movq %rdx,%rdi + + mulq %rbx + addq %rax,%r11 + movq -16(%rcx),%rax + adcq $0,%rdx + addq -8(%r14),%r11 + adcq $0,%rdx + movq %rdx,%r10 + + mulq %rbp + addq %rax,%rdi + movq (%rsi,%r15,1),%rax + adcq $0,%rdx + addq %r11,%rdi + adcq $0,%rdx + movq %r13,-24(%r14) + movq %rdx,%r13 + + mulq %rbx + addq %rax,%r10 + movq 0(%rcx),%rax + adcq $0,%rdx + addq (%r14),%r10 + adcq $0,%rdx + movq %rdx,%r11 + + mulq %rbp + addq %rax,%r13 + movq 8(%rsi,%r15,1),%rax + adcq $0,%rdx + addq %r10,%r13 + adcq $0,%rdx + movq %rdi,-16(%r14) + movq %rdx,%rdi + + mulq %rbx + addq %rax,%r11 + movq 16(%rcx),%rax + adcq $0,%rdx + addq 8(%r14),%r11 + adcq $0,%rdx + movq %rdx,%r10 + + mulq %rbp + addq %rax,%rdi + movq 16(%rsi,%r15,1),%rax + adcq $0,%rdx + addq %r11,%rdi + leaq 64(%rcx),%rcx + adcq $0,%rdx + movq %r13,-8(%r14) + movq %rdx,%r13 + + addq $32,%r15 + jnz .Linner4x + + mulq %rbx + addq %rax,%r10 + movq -32(%rcx),%rax + adcq $0,%rdx + addq 16(%r14),%r10 + leaq 32(%r14),%r14 + adcq $0,%rdx + movq %rdx,%r11 + + mulq %rbp + addq %rax,%r13 + movq -8(%rsi),%rax + adcq $0,%rdx + addq %r10,%r13 + adcq $0,%rdx + movq %rdi,-32(%r14) + movq %rdx,%rdi + + mulq %rbx + addq %rax,%r11 + movq %rbp,%rax + movq -16(%rcx),%rbp + adcq $0,%rdx + addq -8(%r14),%r11 + adcq $0,%rdx + movq %rdx,%r10 + + mulq %rbp + addq %rax,%rdi + movq (%rsi,%r9,1),%rax + adcq $0,%rdx + addq %r11,%rdi + adcq $0,%rdx + movq %r13,-24(%r14) + movq %rdx,%r13 + +.byte 102,72,15,126,195 + movq %rdi,-16(%r14) + leaq (%rcx,%r9,2),%rcx + + xorq %rdi,%rdi + addq %r10,%r13 + adcq $0,%rdi + addq (%r14),%r13 + adcq $0,%rdi + movq %r13,-8(%r14) + + cmpq 16+8(%rsp),%r12 + jb .Louter4x + subq %r13,%rbp + adcq %r15,%r15 + orq %r15,%rdi + xorq $1,%rdi + leaq (%r14,%r9,1),%rbx + leaq (%rcx,%rdi,8),%rbp + movq %r9,%rcx + sarq $3+2,%rcx + movq 56+8(%rsp),%rdi + jmp .Lsqr4x_sub +.size mul4x_internal,.-mul4x_internal +.globl bn_power5 +.type bn_power5,@function +.align 32 +bn_power5: + movq %rsp,%rax + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + movl %r9d,%r10d + shll $3,%r9d + shll $3+2,%r10d + negq %r9 + movq (%r8),%r8 + + + + + + + + leaq -64(%rsp,%r9,2),%r11 + subq %rsi,%r11 + andq $4095,%r11 + cmpq %r11,%r10 + jb .Lpwr_sp_alt + subq %r11,%rsp + leaq -64(%rsp,%r9,2),%rsp + jmp .Lpwr_sp_done + +.align 32 +.Lpwr_sp_alt: + leaq 4096-64(,%r9,2),%r10 + leaq -64(%rsp,%r9,2),%rsp + subq %r10,%r11 + movq $0,%r10 + cmovcq %r10,%r11 + subq %r11,%rsp +.Lpwr_sp_done: + andq $-64,%rsp + movq %r9,%r10 + negq %r9 + + + + + + + + + + + movq %r8,32(%rsp) + movq %rax,40(%rsp) +.Lpower5_body: +.byte 102,72,15,110,207 +.byte 102,72,15,110,209 +.byte 102,73,15,110,218 +.byte 102,72,15,110,226 + + call __bn_sqr8x_internal + call __bn_sqr8x_internal + call __bn_sqr8x_internal + call __bn_sqr8x_internal + call __bn_sqr8x_internal + +.byte 102,72,15,126,209 +.byte 102,72,15,126,226 + movq %rsi,%rdi + movq 40(%rsp),%rax + leaq 32(%rsp),%r8 + + call mul4x_internal + + movq 40(%rsp),%rsi + movq $1,%rax + movq -48(%rsi),%r15 + movq -40(%rsi),%r14 + movq -32(%rsi),%r13 + movq -24(%rsi),%r12 + movq -16(%rsi),%rbp + movq -8(%rsi),%rbx + leaq (%rsi),%rsp +.Lpower5_epilogue: + .byte 0xf3,0xc3 +.size bn_power5,.-bn_power5 + +.globl bn_sqr8x_internal +.hidden bn_sqr8x_internal +.type bn_sqr8x_internal,@function +.align 32 +bn_sqr8x_internal: +__bn_sqr8x_internal: + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + leaq 32(%r10),%rbp + leaq (%rsi,%r9,1),%rsi + + movq %r9,%rcx + + + movq -32(%rsi,%rbp,1),%r14 + leaq 48+8(%rsp,%r9,2),%rdi + movq -24(%rsi,%rbp,1),%rax + leaq -32(%rdi,%rbp,1),%rdi + movq -16(%rsi,%rbp,1),%rbx + movq %rax,%r15 + + mulq %r14 + movq %rax,%r10 + movq %rbx,%rax + movq %rdx,%r11 + movq %r10,-24(%rdi,%rbp,1) + + mulq %r14 + addq %rax,%r11 + movq %rbx,%rax + adcq $0,%rdx + movq %r11,-16(%rdi,%rbp,1) + movq %rdx,%r10 + + + movq -8(%rsi,%rbp,1),%rbx + mulq %r15 + movq %rax,%r12 + movq %rbx,%rax + movq %rdx,%r13 + + leaq (%rbp),%rcx + mulq %r14 + addq %rax,%r10 + movq %rbx,%rax + movq %rdx,%r11 + adcq $0,%r11 + addq %r12,%r10 + adcq $0,%r11 + movq %r10,-8(%rdi,%rcx,1) + jmp .Lsqr4x_1st + +.align 32 +.Lsqr4x_1st: + movq (%rsi,%rcx,1),%rbx + mulq %r15 + addq %rax,%r13 + movq %rbx,%rax + movq %rdx,%r12 + adcq $0,%r12 + + mulq %r14 + addq %rax,%r11 + movq %rbx,%rax + movq 8(%rsi,%rcx,1),%rbx + movq %rdx,%r10 + adcq $0,%r10 + addq %r13,%r11 + adcq $0,%r10 + + + mulq %r15 + addq %rax,%r12 + movq %rbx,%rax + movq %r11,(%rdi,%rcx,1) + movq %rdx,%r13 + adcq $0,%r13 + + mulq %r14 + addq %rax,%r10 + movq %rbx,%rax + movq 16(%rsi,%rcx,1),%rbx + movq %rdx,%r11 + adcq $0,%r11 + addq %r12,%r10 + adcq $0,%r11 + + mulq %r15 + addq %rax,%r13 + movq %rbx,%rax + movq %r10,8(%rdi,%rcx,1) + movq %rdx,%r12 + adcq $0,%r12 + + mulq %r14 + addq %rax,%r11 + movq %rbx,%rax + movq 24(%rsi,%rcx,1),%rbx + movq %rdx,%r10 + adcq $0,%r10 + addq %r13,%r11 + adcq $0,%r10 + + + mulq %r15 + addq %rax,%r12 + movq %rbx,%rax + movq %r11,16(%rdi,%rcx,1) + movq %rdx,%r13 + adcq $0,%r13 + leaq 32(%rcx),%rcx + + mulq %r14 + addq %rax,%r10 + movq %rbx,%rax + movq %rdx,%r11 + adcq $0,%r11 + addq %r12,%r10 + adcq $0,%r11 + movq %r10,-8(%rdi,%rcx,1) + + cmpq $0,%rcx + jne .Lsqr4x_1st + + mulq %r15 + addq %rax,%r13 + leaq 16(%rbp),%rbp + adcq $0,%rdx + addq %r11,%r13 + adcq $0,%rdx + + movq %r13,(%rdi) + movq %rdx,%r12 + movq %rdx,8(%rdi) + jmp .Lsqr4x_outer + +.align 32 +.Lsqr4x_outer: + movq -32(%rsi,%rbp,1),%r14 + leaq 48+8(%rsp,%r9,2),%rdi + movq -24(%rsi,%rbp,1),%rax + leaq -32(%rdi,%rbp,1),%rdi + movq -16(%rsi,%rbp,1),%rbx + movq %rax,%r15 + + mulq %r14 + movq -24(%rdi,%rbp,1),%r10 + addq %rax,%r10 + movq %rbx,%rax + adcq $0,%rdx + movq %r10,-24(%rdi,%rbp,1) + movq %rdx,%r11 + + mulq %r14 + addq %rax,%r11 + movq %rbx,%rax + adcq $0,%rdx + addq -16(%rdi,%rbp,1),%r11 + movq %rdx,%r10 + adcq $0,%r10 + movq %r11,-16(%rdi,%rbp,1) + + xorq %r12,%r12 + + movq -8(%rsi,%rbp,1),%rbx + mulq %r15 + addq %rax,%r12 + movq %rbx,%rax + adcq $0,%rdx + addq -8(%rdi,%rbp,1),%r12 + movq %rdx,%r13 + adcq $0,%r13 + + mulq %r14 + addq %rax,%r10 + movq %rbx,%rax + adcq $0,%rdx + addq %r12,%r10 + movq %rdx,%r11 + adcq $0,%r11 + movq %r10,-8(%rdi,%rbp,1) + + leaq (%rbp),%rcx + jmp .Lsqr4x_inner + +.align 32 +.Lsqr4x_inner: + movq (%rsi,%rcx,1),%rbx + mulq %r15 + addq %rax,%r13 + movq %rbx,%rax + movq %rdx,%r12 + adcq $0,%r12 + addq (%rdi,%rcx,1),%r13 + adcq $0,%r12 + +.byte 0x67 + mulq %r14 + addq %rax,%r11 + movq %rbx,%rax + movq 8(%rsi,%rcx,1),%rbx + movq %rdx,%r10 + adcq $0,%r10 + addq %r13,%r11 + adcq $0,%r10 + + mulq %r15 + addq %rax,%r12 + movq %r11,(%rdi,%rcx,1) + movq %rbx,%rax + movq %rdx,%r13 + adcq $0,%r13 + addq 8(%rdi,%rcx,1),%r12 + leaq 16(%rcx),%rcx + adcq $0,%r13 + + mulq %r14 + addq %rax,%r10 + movq %rbx,%rax + adcq $0,%rdx + addq %r12,%r10 + movq %rdx,%r11 + adcq $0,%r11 + movq %r10,-8(%rdi,%rcx,1) + + cmpq $0,%rcx + jne .Lsqr4x_inner + +.byte 0x67 + mulq %r15 + addq %rax,%r13 + adcq $0,%rdx + addq %r11,%r13 + adcq $0,%rdx + + movq %r13,(%rdi) + movq %rdx,%r12 + movq %rdx,8(%rdi) + + addq $16,%rbp + jnz .Lsqr4x_outer + + + movq -32(%rsi),%r14 + leaq 48+8(%rsp,%r9,2),%rdi + movq -24(%rsi),%rax + leaq -32(%rdi,%rbp,1),%rdi + movq -16(%rsi),%rbx + movq %rax,%r15 + + mulq %r14 + addq %rax,%r10 + movq %rbx,%rax + movq %rdx,%r11 + adcq $0,%r11 + + mulq %r14 + addq %rax,%r11 + movq %rbx,%rax + movq %r10,-24(%rdi) + movq %rdx,%r10 + adcq $0,%r10 + addq %r13,%r11 + movq -8(%rsi),%rbx + adcq $0,%r10 + + mulq %r15 + addq %rax,%r12 + movq %rbx,%rax + movq %r11,-16(%rdi) + movq %rdx,%r13 + adcq $0,%r13 + + mulq %r14 + addq %rax,%r10 + movq %rbx,%rax + movq %rdx,%r11 + adcq $0,%r11 + addq %r12,%r10 + adcq $0,%r11 + movq %r10,-8(%rdi) + + mulq %r15 + addq %rax,%r13 + movq -16(%rsi),%rax + adcq $0,%rdx + addq %r11,%r13 + adcq $0,%rdx + + movq %r13,(%rdi) + movq %rdx,%r12 + movq %rdx,8(%rdi) + + mulq %rbx + addq $16,%rbp + xorq %r14,%r14 + subq %r9,%rbp + xorq %r15,%r15 + + addq %r12,%rax + adcq $0,%rdx + movq %rax,8(%rdi) + movq %rdx,16(%rdi) + movq %r15,24(%rdi) + + movq -16(%rsi,%rbp,1),%rax + leaq 48+8(%rsp),%rdi + xorq %r10,%r10 + movq 8(%rdi),%r11 + + leaq (%r14,%r10,2),%r12 + shrq $63,%r10 + leaq (%rcx,%r11,2),%r13 + shrq $63,%r11 + orq %r10,%r13 + movq 16(%rdi),%r10 + movq %r11,%r14 + mulq %rax + negq %r15 + movq 24(%rdi),%r11 + adcq %rax,%r12 + movq -8(%rsi,%rbp,1),%rax + movq %r12,(%rdi) + adcq %rdx,%r13 + + leaq (%r14,%r10,2),%rbx + movq %r13,8(%rdi) + sbbq %r15,%r15 + shrq $63,%r10 + leaq (%rcx,%r11,2),%r8 + shrq $63,%r11 + orq %r10,%r8 + movq 32(%rdi),%r10 + movq %r11,%r14 + mulq %rax + negq %r15 + movq 40(%rdi),%r11 + adcq %rax,%rbx + movq 0(%rsi,%rbp,1),%rax + movq %rbx,16(%rdi) + adcq %rdx,%r8 + leaq 16(%rbp),%rbp + movq %r8,24(%rdi) + sbbq %r15,%r15 + leaq 64(%rdi),%rdi + jmp .Lsqr4x_shift_n_add + +.align 32 +.Lsqr4x_shift_n_add: + leaq (%r14,%r10,2),%r12 + shrq $63,%r10 + leaq (%rcx,%r11,2),%r13 + shrq $63,%r11 + orq %r10,%r13 + movq -16(%rdi),%r10 + movq %r11,%r14 + mulq %rax + negq %r15 + movq -8(%rdi),%r11 + adcq %rax,%r12 + movq -8(%rsi,%rbp,1),%rax + movq %r12,-32(%rdi) + adcq %rdx,%r13 + + leaq (%r14,%r10,2),%rbx + movq %r13,-24(%rdi) + sbbq %r15,%r15 + shrq $63,%r10 + leaq (%rcx,%r11,2),%r8 + shrq $63,%r11 + orq %r10,%r8 + movq 0(%rdi),%r10 + movq %r11,%r14 + mulq %rax + negq %r15 + movq 8(%rdi),%r11 + adcq %rax,%rbx + movq 0(%rsi,%rbp,1),%rax + movq %rbx,-16(%rdi) + adcq %rdx,%r8 + + leaq (%r14,%r10,2),%r12 + movq %r8,-8(%rdi) + sbbq %r15,%r15 + shrq $63,%r10 + leaq (%rcx,%r11,2),%r13 + shrq $63,%r11 + orq %r10,%r13 + movq 16(%rdi),%r10 + movq %r11,%r14 + mulq %rax + negq %r15 + movq 24(%rdi),%r11 + adcq %rax,%r12 + movq 8(%rsi,%rbp,1),%rax + movq %r12,0(%rdi) + adcq %rdx,%r13 + + leaq (%r14,%r10,2),%rbx + movq %r13,8(%rdi) + sbbq %r15,%r15 + shrq $63,%r10 + leaq (%rcx,%r11,2),%r8 + shrq $63,%r11 + orq %r10,%r8 + movq 32(%rdi),%r10 + movq %r11,%r14 + mulq %rax + negq %r15 + movq 40(%rdi),%r11 + adcq %rax,%rbx + movq 16(%rsi,%rbp,1),%rax + movq %rbx,16(%rdi) + adcq %rdx,%r8 + movq %r8,24(%rdi) + sbbq %r15,%r15 + leaq 64(%rdi),%rdi + addq $32,%rbp + jnz .Lsqr4x_shift_n_add + + leaq (%r14,%r10,2),%r12 +.byte 0x67 + shrq $63,%r10 + leaq (%rcx,%r11,2),%r13 + shrq $63,%r11 + orq %r10,%r13 + movq -16(%rdi),%r10 + movq %r11,%r14 + mulq %rax + negq %r15 + movq -8(%rdi),%r11 + adcq %rax,%r12 + movq -8(%rsi),%rax + movq %r12,-32(%rdi) + adcq %rdx,%r13 + + leaq (%r14,%r10,2),%rbx + movq %r13,-24(%rdi) + sbbq %r15,%r15 + shrq $63,%r10 + leaq (%rcx,%r11,2),%r8 + shrq $63,%r11 + orq %r10,%r8 + mulq %rax + negq %r15 + adcq %rax,%rbx + adcq %rdx,%r8 + movq %rbx,-16(%rdi) + movq %r8,-8(%rdi) +.byte 102,72,15,126,213 +sqr8x_reduction: + xorq %rax,%rax + leaq (%rbp,%r9,2),%rcx + leaq 48+8(%rsp,%r9,2),%rdx + movq %rcx,0+8(%rsp) + leaq 48+8(%rsp,%r9,1),%rdi + movq %rdx,8+8(%rsp) + negq %r9 + jmp .L8x_reduction_loop + +.align 32 +.L8x_reduction_loop: + leaq (%rdi,%r9,1),%rdi +.byte 0x66 + movq 0(%rdi),%rbx + movq 8(%rdi),%r9 + movq 16(%rdi),%r10 + movq 24(%rdi),%r11 + movq 32(%rdi),%r12 + movq 40(%rdi),%r13 + movq 48(%rdi),%r14 + movq 56(%rdi),%r15 + movq %rax,(%rdx) + leaq 64(%rdi),%rdi + +.byte 0x67 + movq %rbx,%r8 + imulq 32+8(%rsp),%rbx + movq 0(%rbp),%rax + movl $8,%ecx + jmp .L8x_reduce + +.align 32 +.L8x_reduce: + mulq %rbx + movq 16(%rbp),%rax + negq %r8 + movq %rdx,%r8 + adcq $0,%r8 + + mulq %rbx + addq %rax,%r9 + movq 32(%rbp),%rax + adcq $0,%rdx + addq %r9,%r8 + movq %rbx,48-8+8(%rsp,%rcx,8) + movq %rdx,%r9 + adcq $0,%r9 + + mulq %rbx + addq %rax,%r10 + movq 48(%rbp),%rax + adcq $0,%rdx + addq %r10,%r9 + movq 32+8(%rsp),%rsi + movq %rdx,%r10 + adcq $0,%r10 + + mulq %rbx + addq %rax,%r11 + movq 64(%rbp),%rax + adcq $0,%rdx + imulq %r8,%rsi + addq %r11,%r10 + movq %rdx,%r11 + adcq $0,%r11 + + mulq %rbx + addq %rax,%r12 + movq 80(%rbp),%rax + adcq $0,%rdx + addq %r12,%r11 + movq %rdx,%r12 + adcq $0,%r12 + + mulq %rbx + addq %rax,%r13 + movq 96(%rbp),%rax + adcq $0,%rdx + addq %r13,%r12 + movq %rdx,%r13 + adcq $0,%r13 + + mulq %rbx + addq %rax,%r14 + movq 112(%rbp),%rax + adcq $0,%rdx + addq %r14,%r13 + movq %rdx,%r14 + adcq $0,%r14 + + mulq %rbx + movq %rsi,%rbx + addq %rax,%r15 + movq 0(%rbp),%rax + adcq $0,%rdx + addq %r15,%r14 + movq %rdx,%r15 + adcq $0,%r15 + + decl %ecx + jnz .L8x_reduce + + leaq 128(%rbp),%rbp + xorq %rax,%rax + movq 8+8(%rsp),%rdx + cmpq 0+8(%rsp),%rbp + jae .L8x_no_tail + +.byte 0x66 + addq 0(%rdi),%r8 + adcq 8(%rdi),%r9 + adcq 16(%rdi),%r10 + adcq 24(%rdi),%r11 + adcq 32(%rdi),%r12 + adcq 40(%rdi),%r13 + adcq 48(%rdi),%r14 + adcq 56(%rdi),%r15 + sbbq %rsi,%rsi + + movq 48+56+8(%rsp),%rbx + movl $8,%ecx + movq 0(%rbp),%rax + jmp .L8x_tail + +.align 32 +.L8x_tail: + mulq %rbx + addq %rax,%r8 + movq 16(%rbp),%rax + movq %r8,(%rdi) + movq %rdx,%r8 + adcq $0,%r8 + + mulq %rbx + addq %rax,%r9 + movq 32(%rbp),%rax + adcq $0,%rdx + addq %r9,%r8 + leaq 8(%rdi),%rdi + movq %rdx,%r9 + adcq $0,%r9 + + mulq %rbx + addq %rax,%r10 + movq 48(%rbp),%rax + adcq $0,%rdx + addq %r10,%r9 + movq %rdx,%r10 + adcq $0,%r10 + + mulq %rbx + addq %rax,%r11 + movq 64(%rbp),%rax + adcq $0,%rdx + addq %r11,%r10 + movq %rdx,%r11 + adcq $0,%r11 + + mulq %rbx + addq %rax,%r12 + movq 80(%rbp),%rax + adcq $0,%rdx + addq %r12,%r11 + movq %rdx,%r12 + adcq $0,%r12 + + mulq %rbx + addq %rax,%r13 + movq 96(%rbp),%rax + adcq $0,%rdx + addq %r13,%r12 + movq %rdx,%r13 + adcq $0,%r13 + + mulq %rbx + addq %rax,%r14 + movq 112(%rbp),%rax + adcq $0,%rdx + addq %r14,%r13 + movq %rdx,%r14 + adcq $0,%r14 + + mulq %rbx + movq 48-16+8(%rsp,%rcx,8),%rbx + addq %rax,%r15 + adcq $0,%rdx + addq %r15,%r14 + movq 0(%rbp),%rax + movq %rdx,%r15 + adcq $0,%r15 + + decl %ecx + jnz .L8x_tail + + leaq 128(%rbp),%rbp + movq 8+8(%rsp),%rdx + cmpq 0+8(%rsp),%rbp + jae .L8x_tail_done + + movq 48+56+8(%rsp),%rbx + negq %rsi + movq 0(%rbp),%rax + adcq 0(%rdi),%r8 + adcq 8(%rdi),%r9 + adcq 16(%rdi),%r10 + adcq 24(%rdi),%r11 + adcq 32(%rdi),%r12 + adcq 40(%rdi),%r13 + adcq 48(%rdi),%r14 + adcq 56(%rdi),%r15 + sbbq %rsi,%rsi + + movl $8,%ecx + jmp .L8x_tail + +.align 32 +.L8x_tail_done: + addq (%rdx),%r8 + xorq %rax,%rax + + negq %rsi +.L8x_no_tail: + adcq 0(%rdi),%r8 + adcq 8(%rdi),%r9 + adcq 16(%rdi),%r10 + adcq 24(%rdi),%r11 + adcq 32(%rdi),%r12 + adcq 40(%rdi),%r13 + adcq 48(%rdi),%r14 + adcq 56(%rdi),%r15 + adcq $0,%rax + movq -16(%rbp),%rcx + xorq %rsi,%rsi + +.byte 102,72,15,126,213 + + movq %r8,0(%rdi) + movq %r9,8(%rdi) +.byte 102,73,15,126,217 + movq %r10,16(%rdi) + movq %r11,24(%rdi) + movq %r12,32(%rdi) + movq %r13,40(%rdi) + movq %r14,48(%rdi) + movq %r15,56(%rdi) + leaq 64(%rdi),%rdi + + cmpq %rdx,%rdi + jb .L8x_reduction_loop + + subq %r15,%rcx + leaq (%rdi,%r9,1),%rbx + adcq %rsi,%rsi + movq %r9,%rcx + orq %rsi,%rax +.byte 102,72,15,126,207 + xorq $1,%rax +.byte 102,72,15,126,206 + leaq (%rbp,%rax,8),%rbp + sarq $3+2,%rcx + jmp .Lsqr4x_sub + +.align 32 +.Lsqr4x_sub: +.byte 0x66 + movq 0(%rbx),%r12 + movq 8(%rbx),%r13 + sbbq 0(%rbp),%r12 + movq 16(%rbx),%r14 + sbbq 16(%rbp),%r13 + movq 24(%rbx),%r15 + leaq 32(%rbx),%rbx + sbbq 32(%rbp),%r14 + movq %r12,0(%rdi) + sbbq 48(%rbp),%r15 + leaq 64(%rbp),%rbp + movq %r13,8(%rdi) + movq %r14,16(%rdi) + movq %r15,24(%rdi) + leaq 32(%rdi),%rdi + + incq %rcx + jnz .Lsqr4x_sub + movq %r9,%r10 + negq %r9 + .byte 0xf3,0xc3 +.size bn_sqr8x_internal,.-bn_sqr8x_internal +.globl bn_from_montgomery +.type bn_from_montgomery,@function +.align 32 +bn_from_montgomery: + testl $7,%r9d + jz bn_from_mont8x + xorl %eax,%eax + .byte 0xf3,0xc3 +.size bn_from_montgomery,.-bn_from_montgomery + +.type bn_from_mont8x,@function +.align 32 +bn_from_mont8x: +.byte 0x67 + movq %rsp,%rax + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 +.byte 0x67 + movl %r9d,%r10d + shll $3,%r9d + shll $3+2,%r10d + negq %r9 + movq (%r8),%r8 + + + + + + + + leaq -64(%rsp,%r9,2),%r11 + subq %rsi,%r11 + andq $4095,%r11 + cmpq %r11,%r10 + jb .Lfrom_sp_alt + subq %r11,%rsp + leaq -64(%rsp,%r9,2),%rsp + jmp .Lfrom_sp_done + +.align 32 +.Lfrom_sp_alt: + leaq 4096-64(,%r9,2),%r10 + leaq -64(%rsp,%r9,2),%rsp + subq %r10,%r11 + movq $0,%r10 + cmovcq %r10,%r11 + subq %r11,%rsp +.Lfrom_sp_done: + andq $-64,%rsp + movq %r9,%r10 + negq %r9 + + + + + + + + + + + movq %r8,32(%rsp) + movq %rax,40(%rsp) +.Lfrom_body: + movq %r9,%r11 + leaq 48(%rsp),%rax + pxor %xmm0,%xmm0 + jmp .Lmul_by_1 + +.align 32 +.Lmul_by_1: + movdqu (%rsi),%xmm1 + movdqu 16(%rsi),%xmm2 + movdqu 32(%rsi),%xmm3 + movdqa %xmm0,(%rax,%r9,1) + movdqu 48(%rsi),%xmm4 + movdqa %xmm0,16(%rax,%r9,1) +.byte 0x48,0x8d,0xb6,0x40,0x00,0x00,0x00 + movdqa %xmm1,(%rax) + movdqa %xmm0,32(%rax,%r9,1) + movdqa %xmm2,16(%rax) + movdqa %xmm0,48(%rax,%r9,1) + movdqa %xmm3,32(%rax) + movdqa %xmm4,48(%rax) + leaq 64(%rax),%rax + subq $64,%r11 + jnz .Lmul_by_1 + +.byte 102,72,15,110,207 +.byte 102,72,15,110,209 +.byte 0x67 + movq %rcx,%rbp +.byte 102,73,15,110,218 + call sqr8x_reduction + + pxor %xmm0,%xmm0 + leaq 48(%rsp),%rax + movq 40(%rsp),%rsi + jmp .Lfrom_mont_zero + +.align 32 +.Lfrom_mont_zero: + movdqa %xmm0,0(%rax) + movdqa %xmm0,16(%rax) + movdqa %xmm0,32(%rax) + movdqa %xmm0,48(%rax) + leaq 64(%rax),%rax + subq $32,%r9 + jnz .Lfrom_mont_zero + + movq $1,%rax + movq -48(%rsi),%r15 + movq -40(%rsi),%r14 + movq -32(%rsi),%r13 + movq -24(%rsi),%r12 + movq -16(%rsi),%rbp + movq -8(%rsi),%rbx + leaq (%rsi),%rsp +.Lfrom_epilogue: + .byte 0xf3,0xc3 +.size bn_from_mont8x,.-bn_from_mont8x +.globl bn_get_bits5 +.type bn_get_bits5,@function +.align 16 +bn_get_bits5: + movq %rdi,%r10 + movl %esi,%ecx + shrl $3,%esi + movzwl (%r10,%rsi,1),%eax + andl $7,%ecx + shrl %cl,%eax + andl $31,%eax + .byte 0xf3,0xc3 +.size bn_get_bits5,.-bn_get_bits5 + +.globl bn_scatter5 +.type bn_scatter5,@function +.align 16 +bn_scatter5: + cmpl $0,%esi + jz .Lscatter_epilogue + leaq (%rdx,%rcx,8),%rdx +.Lscatter: + movq (%rdi),%rax + leaq 8(%rdi),%rdi + movq %rax,(%rdx) + leaq 256(%rdx),%rdx + subl $1,%esi + jnz .Lscatter +.Lscatter_epilogue: + .byte 0xf3,0xc3 +.size bn_scatter5,.-bn_scatter5 + +.globl bn_gather5 +.type bn_gather5,@function +.align 16 +bn_gather5: + movl %ecx,%r11d + shrl $3,%ecx + andq $7,%r11 + notl %ecx + leaq .Lmagic_masks(%rip),%rax + andl $3,%ecx + leaq 128(%rdx,%r11,8),%rdx + movq 0(%rax,%rcx,8),%xmm4 + movq 8(%rax,%rcx,8),%xmm5 + movq 16(%rax,%rcx,8),%xmm6 + movq 24(%rax,%rcx,8),%xmm7 + jmp .Lgather +.align 16 +.Lgather: + movq -128(%rdx),%xmm0 + movq -64(%rdx),%xmm1 + pand %xmm4,%xmm0 + movq 0(%rdx),%xmm2 + pand %xmm5,%xmm1 + movq 64(%rdx),%xmm3 + pand %xmm6,%xmm2 + por %xmm1,%xmm0 + pand %xmm7,%xmm3 +.byte 0x67,0x67 + por %xmm2,%xmm0 + leaq 256(%rdx),%rdx + por %xmm3,%xmm0 + + movq %xmm0,(%rdi) + leaq 8(%rdi),%rdi + subl $1,%esi + jnz .Lgather + .byte 0xf3,0xc3 +.LSEH_end_bn_gather5: +.size bn_gather5,.-bn_gather5 +.align 64 +.Lmagic_masks: +.long 0,0, 0,0, 0,0, -1,-1 +.long 0,0, 0,0, 0,0, 0,0 +.byte 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105,112,108,105,99,97,116,105,111,110,32,119,105,116,104,32,115,99,97,116,116,101,114,47,103,97,116,104,101,114,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 diff --git a/deps/openssl/asm_obsolete/x64-elf-gas/camellia/cmll-x86_64.s b/deps/openssl/asm_obsolete/x64-elf-gas/camellia/cmll-x86_64.s new file mode 100644 index 00000000000000..ac7da4dfc2d19e --- /dev/null +++ b/deps/openssl/asm_obsolete/x64-elf-gas/camellia/cmll-x86_64.s @@ -0,0 +1,1838 @@ +.text + + +.globl Camellia_EncryptBlock +.type Camellia_EncryptBlock,@function +.align 16 +Camellia_EncryptBlock: + movl $128,%eax + subl %edi,%eax + movl $3,%edi + adcl $0,%edi + jmp .Lenc_rounds +.size Camellia_EncryptBlock,.-Camellia_EncryptBlock + +.globl Camellia_EncryptBlock_Rounds +.type Camellia_EncryptBlock_Rounds,@function +.align 16 +.Lenc_rounds: +Camellia_EncryptBlock_Rounds: + pushq %rbx + pushq %rbp + pushq %r13 + pushq %r14 + pushq %r15 +.Lenc_prologue: + + + movq %rcx,%r13 + movq %rdx,%r14 + + shll $6,%edi + leaq .LCamellia_SBOX(%rip),%rbp + leaq (%r14,%rdi,1),%r15 + + movl 0(%rsi),%r8d + movl 4(%rsi),%r9d + movl 8(%rsi),%r10d + bswapl %r8d + movl 12(%rsi),%r11d + bswapl %r9d + bswapl %r10d + bswapl %r11d + + call _x86_64_Camellia_encrypt + + bswapl %r8d + bswapl %r9d + bswapl %r10d + movl %r8d,0(%r13) + bswapl %r11d + movl %r9d,4(%r13) + movl %r10d,8(%r13) + movl %r11d,12(%r13) + + movq 0(%rsp),%r15 + movq 8(%rsp),%r14 + movq 16(%rsp),%r13 + movq 24(%rsp),%rbp + movq 32(%rsp),%rbx + leaq 40(%rsp),%rsp +.Lenc_epilogue: + .byte 0xf3,0xc3 +.size Camellia_EncryptBlock_Rounds,.-Camellia_EncryptBlock_Rounds + +.type _x86_64_Camellia_encrypt,@function +.align 16 +_x86_64_Camellia_encrypt: + xorl 0(%r14),%r9d + xorl 4(%r14),%r8d + xorl 8(%r14),%r11d + xorl 12(%r14),%r10d +.align 16 +.Leloop: + movl 16(%r14),%ebx + movl 20(%r14),%eax + + xorl %r8d,%eax + xorl %r9d,%ebx + movzbl %ah,%esi + movzbl %bl,%edi + movl 2052(%rbp,%rsi,8),%edx + movl 0(%rbp,%rdi,8),%ecx + movzbl %al,%esi + shrl $16,%eax + movzbl %bh,%edi + xorl 4(%rbp,%rsi,8),%edx + shrl $16,%ebx + xorl 4(%rbp,%rdi,8),%ecx + movzbl %ah,%esi + movzbl %bl,%edi + xorl 0(%rbp,%rsi,8),%edx + xorl 2052(%rbp,%rdi,8),%ecx + movzbl %al,%esi + movzbl %bh,%edi + xorl 2048(%rbp,%rsi,8),%edx + xorl 2048(%rbp,%rdi,8),%ecx + movl 24(%r14),%ebx + movl 28(%r14),%eax + xorl %edx,%ecx + rorl $8,%edx + xorl %ecx,%r10d + xorl %ecx,%r11d + xorl %edx,%r11d + xorl %r10d,%eax + xorl %r11d,%ebx + movzbl %ah,%esi + movzbl %bl,%edi + movl 2052(%rbp,%rsi,8),%edx + movl 0(%rbp,%rdi,8),%ecx + movzbl %al,%esi + shrl $16,%eax + movzbl %bh,%edi + xorl 4(%rbp,%rsi,8),%edx + shrl $16,%ebx + xorl 4(%rbp,%rdi,8),%ecx + movzbl %ah,%esi + movzbl %bl,%edi + xorl 0(%rbp,%rsi,8),%edx + xorl 2052(%rbp,%rdi,8),%ecx + movzbl %al,%esi + movzbl %bh,%edi + xorl 2048(%rbp,%rsi,8),%edx + xorl 2048(%rbp,%rdi,8),%ecx + movl 32(%r14),%ebx + movl 36(%r14),%eax + xorl %edx,%ecx + rorl $8,%edx + xorl %ecx,%r8d + xorl %ecx,%r9d + xorl %edx,%r9d + xorl %r8d,%eax + xorl %r9d,%ebx + movzbl %ah,%esi + movzbl %bl,%edi + movl 2052(%rbp,%rsi,8),%edx + movl 0(%rbp,%rdi,8),%ecx + movzbl %al,%esi + shrl $16,%eax + movzbl %bh,%edi + xorl 4(%rbp,%rsi,8),%edx + shrl $16,%ebx + xorl 4(%rbp,%rdi,8),%ecx + movzbl %ah,%esi + movzbl %bl,%edi + xorl 0(%rbp,%rsi,8),%edx + xorl 2052(%rbp,%rdi,8),%ecx + movzbl %al,%esi + movzbl %bh,%edi + xorl 2048(%rbp,%rsi,8),%edx + xorl 2048(%rbp,%rdi,8),%ecx + movl 40(%r14),%ebx + movl 44(%r14),%eax + xorl %edx,%ecx + rorl $8,%edx + xorl %ecx,%r10d + xorl %ecx,%r11d + xorl %edx,%r11d + xorl %r10d,%eax + xorl %r11d,%ebx + movzbl %ah,%esi + movzbl %bl,%edi + movl 2052(%rbp,%rsi,8),%edx + movl 0(%rbp,%rdi,8),%ecx + movzbl %al,%esi + shrl $16,%eax + movzbl %bh,%edi + xorl 4(%rbp,%rsi,8),%edx + shrl $16,%ebx + xorl 4(%rbp,%rdi,8),%ecx + movzbl %ah,%esi + movzbl %bl,%edi + xorl 0(%rbp,%rsi,8),%edx + xorl 2052(%rbp,%rdi,8),%ecx + movzbl %al,%esi + movzbl %bh,%edi + xorl 2048(%rbp,%rsi,8),%edx + xorl 2048(%rbp,%rdi,8),%ecx + movl 48(%r14),%ebx + movl 52(%r14),%eax + xorl %edx,%ecx + rorl $8,%edx + xorl %ecx,%r8d + xorl %ecx,%r9d + xorl %edx,%r9d + xorl %r8d,%eax + xorl %r9d,%ebx + movzbl %ah,%esi + movzbl %bl,%edi + movl 2052(%rbp,%rsi,8),%edx + movl 0(%rbp,%rdi,8),%ecx + movzbl %al,%esi + shrl $16,%eax + movzbl %bh,%edi + xorl 4(%rbp,%rsi,8),%edx + shrl $16,%ebx + xorl 4(%rbp,%rdi,8),%ecx + movzbl %ah,%esi + movzbl %bl,%edi + xorl 0(%rbp,%rsi,8),%edx + xorl 2052(%rbp,%rdi,8),%ecx + movzbl %al,%esi + movzbl %bh,%edi + xorl 2048(%rbp,%rsi,8),%edx + xorl 2048(%rbp,%rdi,8),%ecx + movl 56(%r14),%ebx + movl 60(%r14),%eax + xorl %edx,%ecx + rorl $8,%edx + xorl %ecx,%r10d + xorl %ecx,%r11d + xorl %edx,%r11d + xorl %r10d,%eax + xorl %r11d,%ebx + movzbl %ah,%esi + movzbl %bl,%edi + movl 2052(%rbp,%rsi,8),%edx + movl 0(%rbp,%rdi,8),%ecx + movzbl %al,%esi + shrl $16,%eax + movzbl %bh,%edi + xorl 4(%rbp,%rsi,8),%edx + shrl $16,%ebx + xorl 4(%rbp,%rdi,8),%ecx + movzbl %ah,%esi + movzbl %bl,%edi + xorl 0(%rbp,%rsi,8),%edx + xorl 2052(%rbp,%rdi,8),%ecx + movzbl %al,%esi + movzbl %bh,%edi + xorl 2048(%rbp,%rsi,8),%edx + xorl 2048(%rbp,%rdi,8),%ecx + movl 64(%r14),%ebx + movl 68(%r14),%eax + xorl %edx,%ecx + rorl $8,%edx + xorl %ecx,%r8d + xorl %ecx,%r9d + xorl %edx,%r9d + leaq 64(%r14),%r14 + cmpq %r15,%r14 + movl 8(%r14),%edx + movl 12(%r14),%ecx + je .Ledone + + andl %r8d,%eax + orl %r11d,%edx + roll $1,%eax + xorl %edx,%r10d + xorl %eax,%r9d + andl %r10d,%ecx + orl %r9d,%ebx + roll $1,%ecx + xorl %ebx,%r8d + xorl %ecx,%r11d + jmp .Leloop + +.align 16 +.Ledone: + xorl %r10d,%eax + xorl %r11d,%ebx + xorl %r8d,%ecx + xorl %r9d,%edx + + movl %eax,%r8d + movl %ebx,%r9d + movl %ecx,%r10d + movl %edx,%r11d + +.byte 0xf3,0xc3 +.size _x86_64_Camellia_encrypt,.-_x86_64_Camellia_encrypt + + +.globl Camellia_DecryptBlock +.type Camellia_DecryptBlock,@function +.align 16 +Camellia_DecryptBlock: + movl $128,%eax + subl %edi,%eax + movl $3,%edi + adcl $0,%edi + jmp .Ldec_rounds +.size Camellia_DecryptBlock,.-Camellia_DecryptBlock + +.globl Camellia_DecryptBlock_Rounds +.type Camellia_DecryptBlock_Rounds,@function +.align 16 +.Ldec_rounds: +Camellia_DecryptBlock_Rounds: + pushq %rbx + pushq %rbp + pushq %r13 + pushq %r14 + pushq %r15 +.Ldec_prologue: + + + movq %rcx,%r13 + movq %rdx,%r15 + + shll $6,%edi + leaq .LCamellia_SBOX(%rip),%rbp + leaq (%r15,%rdi,1),%r14 + + movl 0(%rsi),%r8d + movl 4(%rsi),%r9d + movl 8(%rsi),%r10d + bswapl %r8d + movl 12(%rsi),%r11d + bswapl %r9d + bswapl %r10d + bswapl %r11d + + call _x86_64_Camellia_decrypt + + bswapl %r8d + bswapl %r9d + bswapl %r10d + movl %r8d,0(%r13) + bswapl %r11d + movl %r9d,4(%r13) + movl %r10d,8(%r13) + movl %r11d,12(%r13) + + movq 0(%rsp),%r15 + movq 8(%rsp),%r14 + movq 16(%rsp),%r13 + movq 24(%rsp),%rbp + movq 32(%rsp),%rbx + leaq 40(%rsp),%rsp +.Ldec_epilogue: + .byte 0xf3,0xc3 +.size Camellia_DecryptBlock_Rounds,.-Camellia_DecryptBlock_Rounds + +.type _x86_64_Camellia_decrypt,@function +.align 16 +_x86_64_Camellia_decrypt: + xorl 0(%r14),%r9d + xorl 4(%r14),%r8d + xorl 8(%r14),%r11d + xorl 12(%r14),%r10d +.align 16 +.Ldloop: + movl -8(%r14),%ebx + movl -4(%r14),%eax + + xorl %r8d,%eax + xorl %r9d,%ebx + movzbl %ah,%esi + movzbl %bl,%edi + movl 2052(%rbp,%rsi,8),%edx + movl 0(%rbp,%rdi,8),%ecx + movzbl %al,%esi + shrl $16,%eax + movzbl %bh,%edi + xorl 4(%rbp,%rsi,8),%edx + shrl $16,%ebx + xorl 4(%rbp,%rdi,8),%ecx + movzbl %ah,%esi + movzbl %bl,%edi + xorl 0(%rbp,%rsi,8),%edx + xorl 2052(%rbp,%rdi,8),%ecx + movzbl %al,%esi + movzbl %bh,%edi + xorl 2048(%rbp,%rsi,8),%edx + xorl 2048(%rbp,%rdi,8),%ecx + movl -16(%r14),%ebx + movl -12(%r14),%eax + xorl %edx,%ecx + rorl $8,%edx + xorl %ecx,%r10d + xorl %ecx,%r11d + xorl %edx,%r11d + xorl %r10d,%eax + xorl %r11d,%ebx + movzbl %ah,%esi + movzbl %bl,%edi + movl 2052(%rbp,%rsi,8),%edx + movl 0(%rbp,%rdi,8),%ecx + movzbl %al,%esi + shrl $16,%eax + movzbl %bh,%edi + xorl 4(%rbp,%rsi,8),%edx + shrl $16,%ebx + xorl 4(%rbp,%rdi,8),%ecx + movzbl %ah,%esi + movzbl %bl,%edi + xorl 0(%rbp,%rsi,8),%edx + xorl 2052(%rbp,%rdi,8),%ecx + movzbl %al,%esi + movzbl %bh,%edi + xorl 2048(%rbp,%rsi,8),%edx + xorl 2048(%rbp,%rdi,8),%ecx + movl -24(%r14),%ebx + movl -20(%r14),%eax + xorl %edx,%ecx + rorl $8,%edx + xorl %ecx,%r8d + xorl %ecx,%r9d + xorl %edx,%r9d + xorl %r8d,%eax + xorl %r9d,%ebx + movzbl %ah,%esi + movzbl %bl,%edi + movl 2052(%rbp,%rsi,8),%edx + movl 0(%rbp,%rdi,8),%ecx + movzbl %al,%esi + shrl $16,%eax + movzbl %bh,%edi + xorl 4(%rbp,%rsi,8),%edx + shrl $16,%ebx + xorl 4(%rbp,%rdi,8),%ecx + movzbl %ah,%esi + movzbl %bl,%edi + xorl 0(%rbp,%rsi,8),%edx + xorl 2052(%rbp,%rdi,8),%ecx + movzbl %al,%esi + movzbl %bh,%edi + xorl 2048(%rbp,%rsi,8),%edx + xorl 2048(%rbp,%rdi,8),%ecx + movl -32(%r14),%ebx + movl -28(%r14),%eax + xorl %edx,%ecx + rorl $8,%edx + xorl %ecx,%r10d + xorl %ecx,%r11d + xorl %edx,%r11d + xorl %r10d,%eax + xorl %r11d,%ebx + movzbl %ah,%esi + movzbl %bl,%edi + movl 2052(%rbp,%rsi,8),%edx + movl 0(%rbp,%rdi,8),%ecx + movzbl %al,%esi + shrl $16,%eax + movzbl %bh,%edi + xorl 4(%rbp,%rsi,8),%edx + shrl $16,%ebx + xorl 4(%rbp,%rdi,8),%ecx + movzbl %ah,%esi + movzbl %bl,%edi + xorl 0(%rbp,%rsi,8),%edx + xorl 2052(%rbp,%rdi,8),%ecx + movzbl %al,%esi + movzbl %bh,%edi + xorl 2048(%rbp,%rsi,8),%edx + xorl 2048(%rbp,%rdi,8),%ecx + movl -40(%r14),%ebx + movl -36(%r14),%eax + xorl %edx,%ecx + rorl $8,%edx + xorl %ecx,%r8d + xorl %ecx,%r9d + xorl %edx,%r9d + xorl %r8d,%eax + xorl %r9d,%ebx + movzbl %ah,%esi + movzbl %bl,%edi + movl 2052(%rbp,%rsi,8),%edx + movl 0(%rbp,%rdi,8),%ecx + movzbl %al,%esi + shrl $16,%eax + movzbl %bh,%edi + xorl 4(%rbp,%rsi,8),%edx + shrl $16,%ebx + xorl 4(%rbp,%rdi,8),%ecx + movzbl %ah,%esi + movzbl %bl,%edi + xorl 0(%rbp,%rsi,8),%edx + xorl 2052(%rbp,%rdi,8),%ecx + movzbl %al,%esi + movzbl %bh,%edi + xorl 2048(%rbp,%rsi,8),%edx + xorl 2048(%rbp,%rdi,8),%ecx + movl -48(%r14),%ebx + movl -44(%r14),%eax + xorl %edx,%ecx + rorl $8,%edx + xorl %ecx,%r10d + xorl %ecx,%r11d + xorl %edx,%r11d + xorl %r10d,%eax + xorl %r11d,%ebx + movzbl %ah,%esi + movzbl %bl,%edi + movl 2052(%rbp,%rsi,8),%edx + movl 0(%rbp,%rdi,8),%ecx + movzbl %al,%esi + shrl $16,%eax + movzbl %bh,%edi + xorl 4(%rbp,%rsi,8),%edx + shrl $16,%ebx + xorl 4(%rbp,%rdi,8),%ecx + movzbl %ah,%esi + movzbl %bl,%edi + xorl 0(%rbp,%rsi,8),%edx + xorl 2052(%rbp,%rdi,8),%ecx + movzbl %al,%esi + movzbl %bh,%edi + xorl 2048(%rbp,%rsi,8),%edx + xorl 2048(%rbp,%rdi,8),%ecx + movl -56(%r14),%ebx + movl -52(%r14),%eax + xorl %edx,%ecx + rorl $8,%edx + xorl %ecx,%r8d + xorl %ecx,%r9d + xorl %edx,%r9d + leaq -64(%r14),%r14 + cmpq %r15,%r14 + movl 0(%r14),%edx + movl 4(%r14),%ecx + je .Lddone + + andl %r8d,%eax + orl %r11d,%edx + roll $1,%eax + xorl %edx,%r10d + xorl %eax,%r9d + andl %r10d,%ecx + orl %r9d,%ebx + roll $1,%ecx + xorl %ebx,%r8d + xorl %ecx,%r11d + + jmp .Ldloop + +.align 16 +.Lddone: + xorl %r10d,%ecx + xorl %r11d,%edx + xorl %r8d,%eax + xorl %r9d,%ebx + + movl %ecx,%r8d + movl %edx,%r9d + movl %eax,%r10d + movl %ebx,%r11d + +.byte 0xf3,0xc3 +.size _x86_64_Camellia_decrypt,.-_x86_64_Camellia_decrypt +.globl Camellia_Ekeygen +.type Camellia_Ekeygen,@function +.align 16 +Camellia_Ekeygen: + pushq %rbx + pushq %rbp + pushq %r13 + pushq %r14 + pushq %r15 +.Lkey_prologue: + + movl %edi,%r15d + movq %rdx,%r13 + + movl 0(%rsi),%r8d + movl 4(%rsi),%r9d + movl 8(%rsi),%r10d + movl 12(%rsi),%r11d + + bswapl %r8d + bswapl %r9d + bswapl %r10d + bswapl %r11d + movl %r9d,0(%r13) + movl %r8d,4(%r13) + movl %r11d,8(%r13) + movl %r10d,12(%r13) + cmpq $128,%r15 + je .L1st128 + + movl 16(%rsi),%r8d + movl 20(%rsi),%r9d + cmpq $192,%r15 + je .L1st192 + movl 24(%rsi),%r10d + movl 28(%rsi),%r11d + jmp .L1st256 +.L1st192: + movl %r8d,%r10d + movl %r9d,%r11d + notl %r10d + notl %r11d +.L1st256: + bswapl %r8d + bswapl %r9d + bswapl %r10d + bswapl %r11d + movl %r9d,32(%r13) + movl %r8d,36(%r13) + movl %r11d,40(%r13) + movl %r10d,44(%r13) + xorl 0(%r13),%r9d + xorl 4(%r13),%r8d + xorl 8(%r13),%r11d + xorl 12(%r13),%r10d + +.L1st128: + leaq .LCamellia_SIGMA(%rip),%r14 + leaq .LCamellia_SBOX(%rip),%rbp + + movl 0(%r14),%ebx + movl 4(%r14),%eax + xorl %r8d,%eax + xorl %r9d,%ebx + movzbl %ah,%esi + movzbl %bl,%edi + movl 2052(%rbp,%rsi,8),%edx + movl 0(%rbp,%rdi,8),%ecx + movzbl %al,%esi + shrl $16,%eax + movzbl %bh,%edi + xorl 4(%rbp,%rsi,8),%edx + shrl $16,%ebx + xorl 4(%rbp,%rdi,8),%ecx + movzbl %ah,%esi + movzbl %bl,%edi + xorl 0(%rbp,%rsi,8),%edx + xorl 2052(%rbp,%rdi,8),%ecx + movzbl %al,%esi + movzbl %bh,%edi + xorl 2048(%rbp,%rsi,8),%edx + xorl 2048(%rbp,%rdi,8),%ecx + movl 8(%r14),%ebx + movl 12(%r14),%eax + xorl %edx,%ecx + rorl $8,%edx + xorl %ecx,%r10d + xorl %ecx,%r11d + xorl %edx,%r11d + xorl %r10d,%eax + xorl %r11d,%ebx + movzbl %ah,%esi + movzbl %bl,%edi + movl 2052(%rbp,%rsi,8),%edx + movl 0(%rbp,%rdi,8),%ecx + movzbl %al,%esi + shrl $16,%eax + movzbl %bh,%edi + xorl 4(%rbp,%rsi,8),%edx + shrl $16,%ebx + xorl 4(%rbp,%rdi,8),%ecx + movzbl %ah,%esi + movzbl %bl,%edi + xorl 0(%rbp,%rsi,8),%edx + xorl 2052(%rbp,%rdi,8),%ecx + movzbl %al,%esi + movzbl %bh,%edi + xorl 2048(%rbp,%rsi,8),%edx + xorl 2048(%rbp,%rdi,8),%ecx + movl 16(%r14),%ebx + movl 20(%r14),%eax + xorl %edx,%ecx + rorl $8,%edx + xorl %ecx,%r8d + xorl %ecx,%r9d + xorl %edx,%r9d + xorl 0(%r13),%r9d + xorl 4(%r13),%r8d + xorl 8(%r13),%r11d + xorl 12(%r13),%r10d + xorl %r8d,%eax + xorl %r9d,%ebx + movzbl %ah,%esi + movzbl %bl,%edi + movl 2052(%rbp,%rsi,8),%edx + movl 0(%rbp,%rdi,8),%ecx + movzbl %al,%esi + shrl $16,%eax + movzbl %bh,%edi + xorl 4(%rbp,%rsi,8),%edx + shrl $16,%ebx + xorl 4(%rbp,%rdi,8),%ecx + movzbl %ah,%esi + movzbl %bl,%edi + xorl 0(%rbp,%rsi,8),%edx + xorl 2052(%rbp,%rdi,8),%ecx + movzbl %al,%esi + movzbl %bh,%edi + xorl 2048(%rbp,%rsi,8),%edx + xorl 2048(%rbp,%rdi,8),%ecx + movl 24(%r14),%ebx + movl 28(%r14),%eax + xorl %edx,%ecx + rorl $8,%edx + xorl %ecx,%r10d + xorl %ecx,%r11d + xorl %edx,%r11d + xorl %r10d,%eax + xorl %r11d,%ebx + movzbl %ah,%esi + movzbl %bl,%edi + movl 2052(%rbp,%rsi,8),%edx + movl 0(%rbp,%rdi,8),%ecx + movzbl %al,%esi + shrl $16,%eax + movzbl %bh,%edi + xorl 4(%rbp,%rsi,8),%edx + shrl $16,%ebx + xorl 4(%rbp,%rdi,8),%ecx + movzbl %ah,%esi + movzbl %bl,%edi + xorl 0(%rbp,%rsi,8),%edx + xorl 2052(%rbp,%rdi,8),%ecx + movzbl %al,%esi + movzbl %bh,%edi + xorl 2048(%rbp,%rsi,8),%edx + xorl 2048(%rbp,%rdi,8),%ecx + movl 32(%r14),%ebx + movl 36(%r14),%eax + xorl %edx,%ecx + rorl $8,%edx + xorl %ecx,%r8d + xorl %ecx,%r9d + xorl %edx,%r9d + cmpq $128,%r15 + jne .L2nd256 + + leaq 128(%r13),%r13 + shlq $32,%r8 + shlq $32,%r10 + orq %r9,%r8 + orq %r11,%r10 + movq -128(%r13),%rax + movq -120(%r13),%rbx + movq %r8,-112(%r13) + movq %r10,-104(%r13) + movq %rax,%r11 + shlq $15,%rax + movq %rbx,%r9 + shrq $49,%r9 + shrq $49,%r11 + orq %r9,%rax + shlq $15,%rbx + orq %r11,%rbx + movq %rax,-96(%r13) + movq %rbx,-88(%r13) + movq %r8,%r11 + shlq $15,%r8 + movq %r10,%r9 + shrq $49,%r9 + shrq $49,%r11 + orq %r9,%r8 + shlq $15,%r10 + orq %r11,%r10 + movq %r8,-80(%r13) + movq %r10,-72(%r13) + movq %r8,%r11 + shlq $15,%r8 + movq %r10,%r9 + shrq $49,%r9 + shrq $49,%r11 + orq %r9,%r8 + shlq $15,%r10 + orq %r11,%r10 + movq %r8,-64(%r13) + movq %r10,-56(%r13) + movq %rax,%r11 + shlq $30,%rax + movq %rbx,%r9 + shrq $34,%r9 + shrq $34,%r11 + orq %r9,%rax + shlq $30,%rbx + orq %r11,%rbx + movq %rax,-48(%r13) + movq %rbx,-40(%r13) + movq %r8,%r11 + shlq $15,%r8 + movq %r10,%r9 + shrq $49,%r9 + shrq $49,%r11 + orq %r9,%r8 + shlq $15,%r10 + orq %r11,%r10 + movq %r8,-32(%r13) + movq %rax,%r11 + shlq $15,%rax + movq %rbx,%r9 + shrq $49,%r9 + shrq $49,%r11 + orq %r9,%rax + shlq $15,%rbx + orq %r11,%rbx + movq %rbx,-24(%r13) + movq %r8,%r11 + shlq $15,%r8 + movq %r10,%r9 + shrq $49,%r9 + shrq $49,%r11 + orq %r9,%r8 + shlq $15,%r10 + orq %r11,%r10 + movq %r8,-16(%r13) + movq %r10,-8(%r13) + movq %rax,%r11 + shlq $17,%rax + movq %rbx,%r9 + shrq $47,%r9 + shrq $47,%r11 + orq %r9,%rax + shlq $17,%rbx + orq %r11,%rbx + movq %rax,0(%r13) + movq %rbx,8(%r13) + movq %rax,%r11 + shlq $17,%rax + movq %rbx,%r9 + shrq $47,%r9 + shrq $47,%r11 + orq %r9,%rax + shlq $17,%rbx + orq %r11,%rbx + movq %rax,16(%r13) + movq %rbx,24(%r13) + movq %r8,%r11 + shlq $34,%r8 + movq %r10,%r9 + shrq $30,%r9 + shrq $30,%r11 + orq %r9,%r8 + shlq $34,%r10 + orq %r11,%r10 + movq %r8,32(%r13) + movq %r10,40(%r13) + movq %rax,%r11 + shlq $17,%rax + movq %rbx,%r9 + shrq $47,%r9 + shrq $47,%r11 + orq %r9,%rax + shlq $17,%rbx + orq %r11,%rbx + movq %rax,48(%r13) + movq %rbx,56(%r13) + movq %r8,%r11 + shlq $17,%r8 + movq %r10,%r9 + shrq $47,%r9 + shrq $47,%r11 + orq %r9,%r8 + shlq $17,%r10 + orq %r11,%r10 + movq %r8,64(%r13) + movq %r10,72(%r13) + movl $3,%eax + jmp .Ldone +.align 16 +.L2nd256: + movl %r9d,48(%r13) + movl %r8d,52(%r13) + movl %r11d,56(%r13) + movl %r10d,60(%r13) + xorl 32(%r13),%r9d + xorl 36(%r13),%r8d + xorl 40(%r13),%r11d + xorl 44(%r13),%r10d + xorl %r8d,%eax + xorl %r9d,%ebx + movzbl %ah,%esi + movzbl %bl,%edi + movl 2052(%rbp,%rsi,8),%edx + movl 0(%rbp,%rdi,8),%ecx + movzbl %al,%esi + shrl $16,%eax + movzbl %bh,%edi + xorl 4(%rbp,%rsi,8),%edx + shrl $16,%ebx + xorl 4(%rbp,%rdi,8),%ecx + movzbl %ah,%esi + movzbl %bl,%edi + xorl 0(%rbp,%rsi,8),%edx + xorl 2052(%rbp,%rdi,8),%ecx + movzbl %al,%esi + movzbl %bh,%edi + xorl 2048(%rbp,%rsi,8),%edx + xorl 2048(%rbp,%rdi,8),%ecx + movl 40(%r14),%ebx + movl 44(%r14),%eax + xorl %edx,%ecx + rorl $8,%edx + xorl %ecx,%r10d + xorl %ecx,%r11d + xorl %edx,%r11d + xorl %r10d,%eax + xorl %r11d,%ebx + movzbl %ah,%esi + movzbl %bl,%edi + movl 2052(%rbp,%rsi,8),%edx + movl 0(%rbp,%rdi,8),%ecx + movzbl %al,%esi + shrl $16,%eax + movzbl %bh,%edi + xorl 4(%rbp,%rsi,8),%edx + shrl $16,%ebx + xorl 4(%rbp,%rdi,8),%ecx + movzbl %ah,%esi + movzbl %bl,%edi + xorl 0(%rbp,%rsi,8),%edx + xorl 2052(%rbp,%rdi,8),%ecx + movzbl %al,%esi + movzbl %bh,%edi + xorl 2048(%rbp,%rsi,8),%edx + xorl 2048(%rbp,%rdi,8),%ecx + movl 48(%r14),%ebx + movl 52(%r14),%eax + xorl %edx,%ecx + rorl $8,%edx + xorl %ecx,%r8d + xorl %ecx,%r9d + xorl %edx,%r9d + movq 0(%r13),%rax + movq 8(%r13),%rbx + movq 32(%r13),%rcx + movq 40(%r13),%rdx + movq 48(%r13),%r14 + movq 56(%r13),%r15 + leaq 128(%r13),%r13 + shlq $32,%r8 + shlq $32,%r10 + orq %r9,%r8 + orq %r11,%r10 + movq %r8,-112(%r13) + movq %r10,-104(%r13) + movq %rcx,%r11 + shlq $15,%rcx + movq %rdx,%r9 + shrq $49,%r9 + shrq $49,%r11 + orq %r9,%rcx + shlq $15,%rdx + orq %r11,%rdx + movq %rcx,-96(%r13) + movq %rdx,-88(%r13) + movq %r14,%r11 + shlq $15,%r14 + movq %r15,%r9 + shrq $49,%r9 + shrq $49,%r11 + orq %r9,%r14 + shlq $15,%r15 + orq %r11,%r15 + movq %r14,-80(%r13) + movq %r15,-72(%r13) + movq %rcx,%r11 + shlq $15,%rcx + movq %rdx,%r9 + shrq $49,%r9 + shrq $49,%r11 + orq %r9,%rcx + shlq $15,%rdx + orq %r11,%rdx + movq %rcx,-64(%r13) + movq %rdx,-56(%r13) + movq %r8,%r11 + shlq $30,%r8 + movq %r10,%r9 + shrq $34,%r9 + shrq $34,%r11 + orq %r9,%r8 + shlq $30,%r10 + orq %r11,%r10 + movq %r8,-48(%r13) + movq %r10,-40(%r13) + movq %rax,%r11 + shlq $45,%rax + movq %rbx,%r9 + shrq $19,%r9 + shrq $19,%r11 + orq %r9,%rax + shlq $45,%rbx + orq %r11,%rbx + movq %rax,-32(%r13) + movq %rbx,-24(%r13) + movq %r14,%r11 + shlq $30,%r14 + movq %r15,%r9 + shrq $34,%r9 + shrq $34,%r11 + orq %r9,%r14 + shlq $30,%r15 + orq %r11,%r15 + movq %r14,-16(%r13) + movq %r15,-8(%r13) + movq %rax,%r11 + shlq $15,%rax + movq %rbx,%r9 + shrq $49,%r9 + shrq $49,%r11 + orq %r9,%rax + shlq $15,%rbx + orq %r11,%rbx + movq %rax,0(%r13) + movq %rbx,8(%r13) + movq %rcx,%r11 + shlq $30,%rcx + movq %rdx,%r9 + shrq $34,%r9 + shrq $34,%r11 + orq %r9,%rcx + shlq $30,%rdx + orq %r11,%rdx + movq %rcx,16(%r13) + movq %rdx,24(%r13) + movq %r8,%r11 + shlq $30,%r8 + movq %r10,%r9 + shrq $34,%r9 + shrq $34,%r11 + orq %r9,%r8 + shlq $30,%r10 + orq %r11,%r10 + movq %r8,32(%r13) + movq %r10,40(%r13) + movq %rax,%r11 + shlq $17,%rax + movq %rbx,%r9 + shrq $47,%r9 + shrq $47,%r11 + orq %r9,%rax + shlq $17,%rbx + orq %r11,%rbx + movq %rax,48(%r13) + movq %rbx,56(%r13) + movq %r14,%r11 + shlq $32,%r14 + movq %r15,%r9 + shrq $32,%r9 + shrq $32,%r11 + orq %r9,%r14 + shlq $32,%r15 + orq %r11,%r15 + movq %r14,64(%r13) + movq %r15,72(%r13) + movq %rcx,%r11 + shlq $34,%rcx + movq %rdx,%r9 + shrq $30,%r9 + shrq $30,%r11 + orq %r9,%rcx + shlq $34,%rdx + orq %r11,%rdx + movq %rcx,80(%r13) + movq %rdx,88(%r13) + movq %r14,%r11 + shlq $17,%r14 + movq %r15,%r9 + shrq $47,%r9 + shrq $47,%r11 + orq %r9,%r14 + shlq $17,%r15 + orq %r11,%r15 + movq %r14,96(%r13) + movq %r15,104(%r13) + movq %rax,%r11 + shlq $34,%rax + movq %rbx,%r9 + shrq $30,%r9 + shrq $30,%r11 + orq %r9,%rax + shlq $34,%rbx + orq %r11,%rbx + movq %rax,112(%r13) + movq %rbx,120(%r13) + movq %r8,%r11 + shlq $51,%r8 + movq %r10,%r9 + shrq $13,%r9 + shrq $13,%r11 + orq %r9,%r8 + shlq $51,%r10 + orq %r11,%r10 + movq %r8,128(%r13) + movq %r10,136(%r13) + movl $4,%eax +.Ldone: + movq 0(%rsp),%r15 + movq 8(%rsp),%r14 + movq 16(%rsp),%r13 + movq 24(%rsp),%rbp + movq 32(%rsp),%rbx + leaq 40(%rsp),%rsp +.Lkey_epilogue: + .byte 0xf3,0xc3 +.size Camellia_Ekeygen,.-Camellia_Ekeygen +.align 64 +.LCamellia_SIGMA: +.long 0x3bcc908b, 0xa09e667f, 0x4caa73b2, 0xb67ae858 +.long 0xe94f82be, 0xc6ef372f, 0xf1d36f1c, 0x54ff53a5 +.long 0xde682d1d, 0x10e527fa, 0xb3e6c1fd, 0xb05688c2 +.long 0, 0, 0, 0 +.LCamellia_SBOX: +.long 0x70707000,0x70700070 +.long 0x82828200,0x2c2c002c +.long 0x2c2c2c00,0xb3b300b3 +.long 0xececec00,0xc0c000c0 +.long 0xb3b3b300,0xe4e400e4 +.long 0x27272700,0x57570057 +.long 0xc0c0c000,0xeaea00ea +.long 0xe5e5e500,0xaeae00ae +.long 0xe4e4e400,0x23230023 +.long 0x85858500,0x6b6b006b +.long 0x57575700,0x45450045 +.long 0x35353500,0xa5a500a5 +.long 0xeaeaea00,0xeded00ed +.long 0x0c0c0c00,0x4f4f004f +.long 0xaeaeae00,0x1d1d001d +.long 0x41414100,0x92920092 +.long 0x23232300,0x86860086 +.long 0xefefef00,0xafaf00af +.long 0x6b6b6b00,0x7c7c007c +.long 0x93939300,0x1f1f001f +.long 0x45454500,0x3e3e003e +.long 0x19191900,0xdcdc00dc +.long 0xa5a5a500,0x5e5e005e +.long 0x21212100,0x0b0b000b +.long 0xededed00,0xa6a600a6 +.long 0x0e0e0e00,0x39390039 +.long 0x4f4f4f00,0xd5d500d5 +.long 0x4e4e4e00,0x5d5d005d +.long 0x1d1d1d00,0xd9d900d9 +.long 0x65656500,0x5a5a005a +.long 0x92929200,0x51510051 +.long 0xbdbdbd00,0x6c6c006c +.long 0x86868600,0x8b8b008b +.long 0xb8b8b800,0x9a9a009a +.long 0xafafaf00,0xfbfb00fb +.long 0x8f8f8f00,0xb0b000b0 +.long 0x7c7c7c00,0x74740074 +.long 0xebebeb00,0x2b2b002b +.long 0x1f1f1f00,0xf0f000f0 +.long 0xcecece00,0x84840084 +.long 0x3e3e3e00,0xdfdf00df +.long 0x30303000,0xcbcb00cb +.long 0xdcdcdc00,0x34340034 +.long 0x5f5f5f00,0x76760076 +.long 0x5e5e5e00,0x6d6d006d +.long 0xc5c5c500,0xa9a900a9 +.long 0x0b0b0b00,0xd1d100d1 +.long 0x1a1a1a00,0x04040004 +.long 0xa6a6a600,0x14140014 +.long 0xe1e1e100,0x3a3a003a +.long 0x39393900,0xdede00de +.long 0xcacaca00,0x11110011 +.long 0xd5d5d500,0x32320032 +.long 0x47474700,0x9c9c009c +.long 0x5d5d5d00,0x53530053 +.long 0x3d3d3d00,0xf2f200f2 +.long 0xd9d9d900,0xfefe00fe +.long 0x01010100,0xcfcf00cf +.long 0x5a5a5a00,0xc3c300c3 +.long 0xd6d6d600,0x7a7a007a +.long 0x51515100,0x24240024 +.long 0x56565600,0xe8e800e8 +.long 0x6c6c6c00,0x60600060 +.long 0x4d4d4d00,0x69690069 +.long 0x8b8b8b00,0xaaaa00aa +.long 0x0d0d0d00,0xa0a000a0 +.long 0x9a9a9a00,0xa1a100a1 +.long 0x66666600,0x62620062 +.long 0xfbfbfb00,0x54540054 +.long 0xcccccc00,0x1e1e001e +.long 0xb0b0b000,0xe0e000e0 +.long 0x2d2d2d00,0x64640064 +.long 0x74747400,0x10100010 +.long 0x12121200,0x00000000 +.long 0x2b2b2b00,0xa3a300a3 +.long 0x20202000,0x75750075 +.long 0xf0f0f000,0x8a8a008a +.long 0xb1b1b100,0xe6e600e6 +.long 0x84848400,0x09090009 +.long 0x99999900,0xdddd00dd +.long 0xdfdfdf00,0x87870087 +.long 0x4c4c4c00,0x83830083 +.long 0xcbcbcb00,0xcdcd00cd +.long 0xc2c2c200,0x90900090 +.long 0x34343400,0x73730073 +.long 0x7e7e7e00,0xf6f600f6 +.long 0x76767600,0x9d9d009d +.long 0x05050500,0xbfbf00bf +.long 0x6d6d6d00,0x52520052 +.long 0xb7b7b700,0xd8d800d8 +.long 0xa9a9a900,0xc8c800c8 +.long 0x31313100,0xc6c600c6 +.long 0xd1d1d100,0x81810081 +.long 0x17171700,0x6f6f006f +.long 0x04040400,0x13130013 +.long 0xd7d7d700,0x63630063 +.long 0x14141400,0xe9e900e9 +.long 0x58585800,0xa7a700a7 +.long 0x3a3a3a00,0x9f9f009f +.long 0x61616100,0xbcbc00bc +.long 0xdedede00,0x29290029 +.long 0x1b1b1b00,0xf9f900f9 +.long 0x11111100,0x2f2f002f +.long 0x1c1c1c00,0xb4b400b4 +.long 0x32323200,0x78780078 +.long 0x0f0f0f00,0x06060006 +.long 0x9c9c9c00,0xe7e700e7 +.long 0x16161600,0x71710071 +.long 0x53535300,0xd4d400d4 +.long 0x18181800,0xabab00ab +.long 0xf2f2f200,0x88880088 +.long 0x22222200,0x8d8d008d +.long 0xfefefe00,0x72720072 +.long 0x44444400,0xb9b900b9 +.long 0xcfcfcf00,0xf8f800f8 +.long 0xb2b2b200,0xacac00ac +.long 0xc3c3c300,0x36360036 +.long 0xb5b5b500,0x2a2a002a +.long 0x7a7a7a00,0x3c3c003c +.long 0x91919100,0xf1f100f1 +.long 0x24242400,0x40400040 +.long 0x08080800,0xd3d300d3 +.long 0xe8e8e800,0xbbbb00bb +.long 0xa8a8a800,0x43430043 +.long 0x60606000,0x15150015 +.long 0xfcfcfc00,0xadad00ad +.long 0x69696900,0x77770077 +.long 0x50505000,0x80800080 +.long 0xaaaaaa00,0x82820082 +.long 0xd0d0d000,0xecec00ec +.long 0xa0a0a000,0x27270027 +.long 0x7d7d7d00,0xe5e500e5 +.long 0xa1a1a100,0x85850085 +.long 0x89898900,0x35350035 +.long 0x62626200,0x0c0c000c +.long 0x97979700,0x41410041 +.long 0x54545400,0xefef00ef +.long 0x5b5b5b00,0x93930093 +.long 0x1e1e1e00,0x19190019 +.long 0x95959500,0x21210021 +.long 0xe0e0e000,0x0e0e000e +.long 0xffffff00,0x4e4e004e +.long 0x64646400,0x65650065 +.long 0xd2d2d200,0xbdbd00bd +.long 0x10101000,0xb8b800b8 +.long 0xc4c4c400,0x8f8f008f +.long 0x00000000,0xebeb00eb +.long 0x48484800,0xcece00ce +.long 0xa3a3a300,0x30300030 +.long 0xf7f7f700,0x5f5f005f +.long 0x75757500,0xc5c500c5 +.long 0xdbdbdb00,0x1a1a001a +.long 0x8a8a8a00,0xe1e100e1 +.long 0x03030300,0xcaca00ca +.long 0xe6e6e600,0x47470047 +.long 0xdadada00,0x3d3d003d +.long 0x09090900,0x01010001 +.long 0x3f3f3f00,0xd6d600d6 +.long 0xdddddd00,0x56560056 +.long 0x94949400,0x4d4d004d +.long 0x87878700,0x0d0d000d +.long 0x5c5c5c00,0x66660066 +.long 0x83838300,0xcccc00cc +.long 0x02020200,0x2d2d002d +.long 0xcdcdcd00,0x12120012 +.long 0x4a4a4a00,0x20200020 +.long 0x90909000,0xb1b100b1 +.long 0x33333300,0x99990099 +.long 0x73737300,0x4c4c004c +.long 0x67676700,0xc2c200c2 +.long 0xf6f6f600,0x7e7e007e +.long 0xf3f3f300,0x05050005 +.long 0x9d9d9d00,0xb7b700b7 +.long 0x7f7f7f00,0x31310031 +.long 0xbfbfbf00,0x17170017 +.long 0xe2e2e200,0xd7d700d7 +.long 0x52525200,0x58580058 +.long 0x9b9b9b00,0x61610061 +.long 0xd8d8d800,0x1b1b001b +.long 0x26262600,0x1c1c001c +.long 0xc8c8c800,0x0f0f000f +.long 0x37373700,0x16160016 +.long 0xc6c6c600,0x18180018 +.long 0x3b3b3b00,0x22220022 +.long 0x81818100,0x44440044 +.long 0x96969600,0xb2b200b2 +.long 0x6f6f6f00,0xb5b500b5 +.long 0x4b4b4b00,0x91910091 +.long 0x13131300,0x08080008 +.long 0xbebebe00,0xa8a800a8 +.long 0x63636300,0xfcfc00fc +.long 0x2e2e2e00,0x50500050 +.long 0xe9e9e900,0xd0d000d0 +.long 0x79797900,0x7d7d007d +.long 0xa7a7a700,0x89890089 +.long 0x8c8c8c00,0x97970097 +.long 0x9f9f9f00,0x5b5b005b +.long 0x6e6e6e00,0x95950095 +.long 0xbcbcbc00,0xffff00ff +.long 0x8e8e8e00,0xd2d200d2 +.long 0x29292900,0xc4c400c4 +.long 0xf5f5f500,0x48480048 +.long 0xf9f9f900,0xf7f700f7 +.long 0xb6b6b600,0xdbdb00db +.long 0x2f2f2f00,0x03030003 +.long 0xfdfdfd00,0xdada00da +.long 0xb4b4b400,0x3f3f003f +.long 0x59595900,0x94940094 +.long 0x78787800,0x5c5c005c +.long 0x98989800,0x02020002 +.long 0x06060600,0x4a4a004a +.long 0x6a6a6a00,0x33330033 +.long 0xe7e7e700,0x67670067 +.long 0x46464600,0xf3f300f3 +.long 0x71717100,0x7f7f007f +.long 0xbababa00,0xe2e200e2 +.long 0xd4d4d400,0x9b9b009b +.long 0x25252500,0x26260026 +.long 0xababab00,0x37370037 +.long 0x42424200,0x3b3b003b +.long 0x88888800,0x96960096 +.long 0xa2a2a200,0x4b4b004b +.long 0x8d8d8d00,0xbebe00be +.long 0xfafafa00,0x2e2e002e +.long 0x72727200,0x79790079 +.long 0x07070700,0x8c8c008c +.long 0xb9b9b900,0x6e6e006e +.long 0x55555500,0x8e8e008e +.long 0xf8f8f800,0xf5f500f5 +.long 0xeeeeee00,0xb6b600b6 +.long 0xacacac00,0xfdfd00fd +.long 0x0a0a0a00,0x59590059 +.long 0x36363600,0x98980098 +.long 0x49494900,0x6a6a006a +.long 0x2a2a2a00,0x46460046 +.long 0x68686800,0xbaba00ba +.long 0x3c3c3c00,0x25250025 +.long 0x38383800,0x42420042 +.long 0xf1f1f100,0xa2a200a2 +.long 0xa4a4a400,0xfafa00fa +.long 0x40404000,0x07070007 +.long 0x28282800,0x55550055 +.long 0xd3d3d300,0xeeee00ee +.long 0x7b7b7b00,0x0a0a000a +.long 0xbbbbbb00,0x49490049 +.long 0xc9c9c900,0x68680068 +.long 0x43434300,0x38380038 +.long 0xc1c1c100,0xa4a400a4 +.long 0x15151500,0x28280028 +.long 0xe3e3e300,0x7b7b007b +.long 0xadadad00,0xc9c900c9 +.long 0xf4f4f400,0xc1c100c1 +.long 0x77777700,0xe3e300e3 +.long 0xc7c7c700,0xf4f400f4 +.long 0x80808000,0xc7c700c7 +.long 0x9e9e9e00,0x9e9e009e +.long 0x00e0e0e0,0x38003838 +.long 0x00050505,0x41004141 +.long 0x00585858,0x16001616 +.long 0x00d9d9d9,0x76007676 +.long 0x00676767,0xd900d9d9 +.long 0x004e4e4e,0x93009393 +.long 0x00818181,0x60006060 +.long 0x00cbcbcb,0xf200f2f2 +.long 0x00c9c9c9,0x72007272 +.long 0x000b0b0b,0xc200c2c2 +.long 0x00aeaeae,0xab00abab +.long 0x006a6a6a,0x9a009a9a +.long 0x00d5d5d5,0x75007575 +.long 0x00181818,0x06000606 +.long 0x005d5d5d,0x57005757 +.long 0x00828282,0xa000a0a0 +.long 0x00464646,0x91009191 +.long 0x00dfdfdf,0xf700f7f7 +.long 0x00d6d6d6,0xb500b5b5 +.long 0x00272727,0xc900c9c9 +.long 0x008a8a8a,0xa200a2a2 +.long 0x00323232,0x8c008c8c +.long 0x004b4b4b,0xd200d2d2 +.long 0x00424242,0x90009090 +.long 0x00dbdbdb,0xf600f6f6 +.long 0x001c1c1c,0x07000707 +.long 0x009e9e9e,0xa700a7a7 +.long 0x009c9c9c,0x27002727 +.long 0x003a3a3a,0x8e008e8e +.long 0x00cacaca,0xb200b2b2 +.long 0x00252525,0x49004949 +.long 0x007b7b7b,0xde00dede +.long 0x000d0d0d,0x43004343 +.long 0x00717171,0x5c005c5c +.long 0x005f5f5f,0xd700d7d7 +.long 0x001f1f1f,0xc700c7c7 +.long 0x00f8f8f8,0x3e003e3e +.long 0x00d7d7d7,0xf500f5f5 +.long 0x003e3e3e,0x8f008f8f +.long 0x009d9d9d,0x67006767 +.long 0x007c7c7c,0x1f001f1f +.long 0x00606060,0x18001818 +.long 0x00b9b9b9,0x6e006e6e +.long 0x00bebebe,0xaf00afaf +.long 0x00bcbcbc,0x2f002f2f +.long 0x008b8b8b,0xe200e2e2 +.long 0x00161616,0x85008585 +.long 0x00343434,0x0d000d0d +.long 0x004d4d4d,0x53005353 +.long 0x00c3c3c3,0xf000f0f0 +.long 0x00727272,0x9c009c9c +.long 0x00959595,0x65006565 +.long 0x00ababab,0xea00eaea +.long 0x008e8e8e,0xa300a3a3 +.long 0x00bababa,0xae00aeae +.long 0x007a7a7a,0x9e009e9e +.long 0x00b3b3b3,0xec00ecec +.long 0x00020202,0x80008080 +.long 0x00b4b4b4,0x2d002d2d +.long 0x00adadad,0x6b006b6b +.long 0x00a2a2a2,0xa800a8a8 +.long 0x00acacac,0x2b002b2b +.long 0x00d8d8d8,0x36003636 +.long 0x009a9a9a,0xa600a6a6 +.long 0x00171717,0xc500c5c5 +.long 0x001a1a1a,0x86008686 +.long 0x00353535,0x4d004d4d +.long 0x00cccccc,0x33003333 +.long 0x00f7f7f7,0xfd00fdfd +.long 0x00999999,0x66006666 +.long 0x00616161,0x58005858 +.long 0x005a5a5a,0x96009696 +.long 0x00e8e8e8,0x3a003a3a +.long 0x00242424,0x09000909 +.long 0x00565656,0x95009595 +.long 0x00404040,0x10001010 +.long 0x00e1e1e1,0x78007878 +.long 0x00636363,0xd800d8d8 +.long 0x00090909,0x42004242 +.long 0x00333333,0xcc00cccc +.long 0x00bfbfbf,0xef00efef +.long 0x00989898,0x26002626 +.long 0x00979797,0xe500e5e5 +.long 0x00858585,0x61006161 +.long 0x00686868,0x1a001a1a +.long 0x00fcfcfc,0x3f003f3f +.long 0x00ececec,0x3b003b3b +.long 0x000a0a0a,0x82008282 +.long 0x00dadada,0xb600b6b6 +.long 0x006f6f6f,0xdb00dbdb +.long 0x00535353,0xd400d4d4 +.long 0x00626262,0x98009898 +.long 0x00a3a3a3,0xe800e8e8 +.long 0x002e2e2e,0x8b008b8b +.long 0x00080808,0x02000202 +.long 0x00afafaf,0xeb00ebeb +.long 0x00282828,0x0a000a0a +.long 0x00b0b0b0,0x2c002c2c +.long 0x00747474,0x1d001d1d +.long 0x00c2c2c2,0xb000b0b0 +.long 0x00bdbdbd,0x6f006f6f +.long 0x00363636,0x8d008d8d +.long 0x00222222,0x88008888 +.long 0x00383838,0x0e000e0e +.long 0x00646464,0x19001919 +.long 0x001e1e1e,0x87008787 +.long 0x00393939,0x4e004e4e +.long 0x002c2c2c,0x0b000b0b +.long 0x00a6a6a6,0xa900a9a9 +.long 0x00303030,0x0c000c0c +.long 0x00e5e5e5,0x79007979 +.long 0x00444444,0x11001111 +.long 0x00fdfdfd,0x7f007f7f +.long 0x00888888,0x22002222 +.long 0x009f9f9f,0xe700e7e7 +.long 0x00656565,0x59005959 +.long 0x00878787,0xe100e1e1 +.long 0x006b6b6b,0xda00dada +.long 0x00f4f4f4,0x3d003d3d +.long 0x00232323,0xc800c8c8 +.long 0x00484848,0x12001212 +.long 0x00101010,0x04000404 +.long 0x00d1d1d1,0x74007474 +.long 0x00515151,0x54005454 +.long 0x00c0c0c0,0x30003030 +.long 0x00f9f9f9,0x7e007e7e +.long 0x00d2d2d2,0xb400b4b4 +.long 0x00a0a0a0,0x28002828 +.long 0x00555555,0x55005555 +.long 0x00a1a1a1,0x68006868 +.long 0x00414141,0x50005050 +.long 0x00fafafa,0xbe00bebe +.long 0x00434343,0xd000d0d0 +.long 0x00131313,0xc400c4c4 +.long 0x00c4c4c4,0x31003131 +.long 0x002f2f2f,0xcb00cbcb +.long 0x00a8a8a8,0x2a002a2a +.long 0x00b6b6b6,0xad00adad +.long 0x003c3c3c,0x0f000f0f +.long 0x002b2b2b,0xca00caca +.long 0x00c1c1c1,0x70007070 +.long 0x00ffffff,0xff00ffff +.long 0x00c8c8c8,0x32003232 +.long 0x00a5a5a5,0x69006969 +.long 0x00202020,0x08000808 +.long 0x00898989,0x62006262 +.long 0x00000000,0x00000000 +.long 0x00909090,0x24002424 +.long 0x00474747,0xd100d1d1 +.long 0x00efefef,0xfb00fbfb +.long 0x00eaeaea,0xba00baba +.long 0x00b7b7b7,0xed00eded +.long 0x00151515,0x45004545 +.long 0x00060606,0x81008181 +.long 0x00cdcdcd,0x73007373 +.long 0x00b5b5b5,0x6d006d6d +.long 0x00121212,0x84008484 +.long 0x007e7e7e,0x9f009f9f +.long 0x00bbbbbb,0xee00eeee +.long 0x00292929,0x4a004a4a +.long 0x000f0f0f,0xc300c3c3 +.long 0x00b8b8b8,0x2e002e2e +.long 0x00070707,0xc100c1c1 +.long 0x00040404,0x01000101 +.long 0x009b9b9b,0xe600e6e6 +.long 0x00949494,0x25002525 +.long 0x00212121,0x48004848 +.long 0x00666666,0x99009999 +.long 0x00e6e6e6,0xb900b9b9 +.long 0x00cecece,0xb300b3b3 +.long 0x00ededed,0x7b007b7b +.long 0x00e7e7e7,0xf900f9f9 +.long 0x003b3b3b,0xce00cece +.long 0x00fefefe,0xbf00bfbf +.long 0x007f7f7f,0xdf00dfdf +.long 0x00c5c5c5,0x71007171 +.long 0x00a4a4a4,0x29002929 +.long 0x00373737,0xcd00cdcd +.long 0x00b1b1b1,0x6c006c6c +.long 0x004c4c4c,0x13001313 +.long 0x00919191,0x64006464 +.long 0x006e6e6e,0x9b009b9b +.long 0x008d8d8d,0x63006363 +.long 0x00767676,0x9d009d9d +.long 0x00030303,0xc000c0c0 +.long 0x002d2d2d,0x4b004b4b +.long 0x00dedede,0xb700b7b7 +.long 0x00969696,0xa500a5a5 +.long 0x00262626,0x89008989 +.long 0x007d7d7d,0x5f005f5f +.long 0x00c6c6c6,0xb100b1b1 +.long 0x005c5c5c,0x17001717 +.long 0x00d3d3d3,0xf400f4f4 +.long 0x00f2f2f2,0xbc00bcbc +.long 0x004f4f4f,0xd300d3d3 +.long 0x00191919,0x46004646 +.long 0x003f3f3f,0xcf00cfcf +.long 0x00dcdcdc,0x37003737 +.long 0x00797979,0x5e005e5e +.long 0x001d1d1d,0x47004747 +.long 0x00525252,0x94009494 +.long 0x00ebebeb,0xfa00fafa +.long 0x00f3f3f3,0xfc00fcfc +.long 0x006d6d6d,0x5b005b5b +.long 0x005e5e5e,0x97009797 +.long 0x00fbfbfb,0xfe00fefe +.long 0x00696969,0x5a005a5a +.long 0x00b2b2b2,0xac00acac +.long 0x00f0f0f0,0x3c003c3c +.long 0x00313131,0x4c004c4c +.long 0x000c0c0c,0x03000303 +.long 0x00d4d4d4,0x35003535 +.long 0x00cfcfcf,0xf300f3f3 +.long 0x008c8c8c,0x23002323 +.long 0x00e2e2e2,0xb800b8b8 +.long 0x00757575,0x5d005d5d +.long 0x00a9a9a9,0x6a006a6a +.long 0x004a4a4a,0x92009292 +.long 0x00575757,0xd500d5d5 +.long 0x00848484,0x21002121 +.long 0x00111111,0x44004444 +.long 0x00454545,0x51005151 +.long 0x001b1b1b,0xc600c6c6 +.long 0x00f5f5f5,0x7d007d7d +.long 0x00e4e4e4,0x39003939 +.long 0x000e0e0e,0x83008383 +.long 0x00737373,0xdc00dcdc +.long 0x00aaaaaa,0xaa00aaaa +.long 0x00f1f1f1,0x7c007c7c +.long 0x00dddddd,0x77007777 +.long 0x00595959,0x56005656 +.long 0x00141414,0x05000505 +.long 0x006c6c6c,0x1b001b1b +.long 0x00929292,0xa400a4a4 +.long 0x00545454,0x15001515 +.long 0x00d0d0d0,0x34003434 +.long 0x00787878,0x1e001e1e +.long 0x00707070,0x1c001c1c +.long 0x00e3e3e3,0xf800f8f8 +.long 0x00494949,0x52005252 +.long 0x00808080,0x20002020 +.long 0x00505050,0x14001414 +.long 0x00a7a7a7,0xe900e9e9 +.long 0x00f6f6f6,0xbd00bdbd +.long 0x00777777,0xdd00dddd +.long 0x00939393,0xe400e4e4 +.long 0x00868686,0xa100a1a1 +.long 0x00838383,0xe000e0e0 +.long 0x002a2a2a,0x8a008a8a +.long 0x00c7c7c7,0xf100f1f1 +.long 0x005b5b5b,0xd600d6d6 +.long 0x00e9e9e9,0x7a007a7a +.long 0x00eeeeee,0xbb00bbbb +.long 0x008f8f8f,0xe300e3e3 +.long 0x00010101,0x40004040 +.long 0x003d3d3d,0x4f004f4f +.globl Camellia_cbc_encrypt +.type Camellia_cbc_encrypt,@function +.align 16 +Camellia_cbc_encrypt: + cmpq $0,%rdx + je .Lcbc_abort + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 +.Lcbc_prologue: + + movq %rsp,%rbp + subq $64,%rsp + andq $-64,%rsp + + + + leaq -64-63(%rcx),%r10 + subq %rsp,%r10 + negq %r10 + andq $960,%r10 + subq %r10,%rsp + + + movq %rdi,%r12 + movq %rsi,%r13 + movq %r8,%rbx + movq %rcx,%r14 + movl 272(%rcx),%r15d + + movq %r8,40(%rsp) + movq %rbp,48(%rsp) + +.Lcbc_body: + leaq .LCamellia_SBOX(%rip),%rbp + + movl $32,%ecx +.align 4 +.Lcbc_prefetch_sbox: + movq 0(%rbp),%rax + movq 32(%rbp),%rsi + movq 64(%rbp),%rdi + movq 96(%rbp),%r11 + leaq 128(%rbp),%rbp + loop .Lcbc_prefetch_sbox + subq $4096,%rbp + shlq $6,%r15 + movq %rdx,%rcx + leaq (%r14,%r15,1),%r15 + + cmpl $0,%r9d + je .LCBC_DECRYPT + + andq $-16,%rdx + andq $15,%rcx + leaq (%r12,%rdx,1),%rdx + movq %r14,0(%rsp) + movq %rdx,8(%rsp) + movq %rcx,16(%rsp) + + cmpq %r12,%rdx + movl 0(%rbx),%r8d + movl 4(%rbx),%r9d + movl 8(%rbx),%r10d + movl 12(%rbx),%r11d + je .Lcbc_enc_tail + jmp .Lcbc_eloop + +.align 16 +.Lcbc_eloop: + xorl 0(%r12),%r8d + xorl 4(%r12),%r9d + xorl 8(%r12),%r10d + bswapl %r8d + xorl 12(%r12),%r11d + bswapl %r9d + bswapl %r10d + bswapl %r11d + + call _x86_64_Camellia_encrypt + + movq 0(%rsp),%r14 + bswapl %r8d + movq 8(%rsp),%rdx + bswapl %r9d + movq 16(%rsp),%rcx + bswapl %r10d + movl %r8d,0(%r13) + bswapl %r11d + movl %r9d,4(%r13) + movl %r10d,8(%r13) + leaq 16(%r12),%r12 + movl %r11d,12(%r13) + cmpq %rdx,%r12 + leaq 16(%r13),%r13 + jne .Lcbc_eloop + + cmpq $0,%rcx + jne .Lcbc_enc_tail + + movq 40(%rsp),%r13 + movl %r8d,0(%r13) + movl %r9d,4(%r13) + movl %r10d,8(%r13) + movl %r11d,12(%r13) + jmp .Lcbc_done + +.align 16 +.Lcbc_enc_tail: + xorq %rax,%rax + movq %rax,0+24(%rsp) + movq %rax,8+24(%rsp) + movq %rax,16(%rsp) + +.Lcbc_enc_pushf: + pushfq + cld + movq %r12,%rsi + leaq 8+24(%rsp),%rdi +.long 0x9066A4F3 + popfq +.Lcbc_enc_popf: + + leaq 24(%rsp),%r12 + leaq 16+24(%rsp),%rax + movq %rax,8(%rsp) + jmp .Lcbc_eloop + +.align 16 +.LCBC_DECRYPT: + xchgq %r14,%r15 + addq $15,%rdx + andq $15,%rcx + andq $-16,%rdx + movq %r14,0(%rsp) + leaq (%r12,%rdx,1),%rdx + movq %rdx,8(%rsp) + movq %rcx,16(%rsp) + + movq (%rbx),%rax + movq 8(%rbx),%rbx + jmp .Lcbc_dloop +.align 16 +.Lcbc_dloop: + movl 0(%r12),%r8d + movl 4(%r12),%r9d + movl 8(%r12),%r10d + bswapl %r8d + movl 12(%r12),%r11d + bswapl %r9d + movq %rax,0+24(%rsp) + bswapl %r10d + movq %rbx,8+24(%rsp) + bswapl %r11d + + call _x86_64_Camellia_decrypt + + movq 0(%rsp),%r14 + movq 8(%rsp),%rdx + movq 16(%rsp),%rcx + + bswapl %r8d + movq (%r12),%rax + bswapl %r9d + movq 8(%r12),%rbx + bswapl %r10d + xorl 0+24(%rsp),%r8d + bswapl %r11d + xorl 4+24(%rsp),%r9d + xorl 8+24(%rsp),%r10d + leaq 16(%r12),%r12 + xorl 12+24(%rsp),%r11d + cmpq %rdx,%r12 + je .Lcbc_ddone + + movl %r8d,0(%r13) + movl %r9d,4(%r13) + movl %r10d,8(%r13) + movl %r11d,12(%r13) + + leaq 16(%r13),%r13 + jmp .Lcbc_dloop + +.align 16 +.Lcbc_ddone: + movq 40(%rsp),%rdx + cmpq $0,%rcx + jne .Lcbc_dec_tail + + movl %r8d,0(%r13) + movl %r9d,4(%r13) + movl %r10d,8(%r13) + movl %r11d,12(%r13) + + movq %rax,(%rdx) + movq %rbx,8(%rdx) + jmp .Lcbc_done +.align 16 +.Lcbc_dec_tail: + movl %r8d,0+24(%rsp) + movl %r9d,4+24(%rsp) + movl %r10d,8+24(%rsp) + movl %r11d,12+24(%rsp) + +.Lcbc_dec_pushf: + pushfq + cld + leaq 8+24(%rsp),%rsi + leaq (%r13),%rdi +.long 0x9066A4F3 + popfq +.Lcbc_dec_popf: + + movq %rax,(%rdx) + movq %rbx,8(%rdx) + jmp .Lcbc_done + +.align 16 +.Lcbc_done: + movq 48(%rsp),%rcx + movq 0(%rcx),%r15 + movq 8(%rcx),%r14 + movq 16(%rcx),%r13 + movq 24(%rcx),%r12 + movq 32(%rcx),%rbp + movq 40(%rcx),%rbx + leaq 48(%rcx),%rsp +.Lcbc_abort: + .byte 0xf3,0xc3 +.size Camellia_cbc_encrypt,.-Camellia_cbc_encrypt + +.byte 67,97,109,101,108,108,105,97,32,102,111,114,32,120,56,54,95,54,52,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 diff --git a/deps/openssl/asm_obsolete/x64-elf-gas/ec/ecp_nistz256-x86_64.s b/deps/openssl/asm_obsolete/x64-elf-gas/ec/ecp_nistz256-x86_64.s new file mode 100644 index 00000000000000..393782329e0b91 --- /dev/null +++ b/deps/openssl/asm_obsolete/x64-elf-gas/ec/ecp_nistz256-x86_64.s @@ -0,0 +1,2004 @@ +.text + + + +.align 64 +.Lpoly: +.quad 0xffffffffffffffff, 0x00000000ffffffff, 0x0000000000000000, 0xffffffff00000001 + + +.LRR: +.quad 0x0000000000000003, 0xfffffffbffffffff, 0xfffffffffffffffe, 0x00000004fffffffd + +.LOne: +.long 1,1,1,1,1,1,1,1 +.LTwo: +.long 2,2,2,2,2,2,2,2 +.LThree: +.long 3,3,3,3,3,3,3,3 +.LONE_mont: +.quad 0x0000000000000001, 0xffffffff00000000, 0xffffffffffffffff, 0x00000000fffffffe + +.globl ecp_nistz256_mul_by_2 +.type ecp_nistz256_mul_by_2,@function +.align 64 +ecp_nistz256_mul_by_2: + pushq %r12 + pushq %r13 + + movq 0(%rsi),%r8 + movq 8(%rsi),%r9 + addq %r8,%r8 + movq 16(%rsi),%r10 + adcq %r9,%r9 + movq 24(%rsi),%r11 + leaq .Lpoly(%rip),%rsi + movq %r8,%rax + adcq %r10,%r10 + adcq %r11,%r11 + movq %r9,%rdx + sbbq %r13,%r13 + + subq 0(%rsi),%r8 + movq %r10,%rcx + sbbq 8(%rsi),%r9 + sbbq 16(%rsi),%r10 + movq %r11,%r12 + sbbq 24(%rsi),%r11 + testq %r13,%r13 + + cmovzq %rax,%r8 + cmovzq %rdx,%r9 + movq %r8,0(%rdi) + cmovzq %rcx,%r10 + movq %r9,8(%rdi) + cmovzq %r12,%r11 + movq %r10,16(%rdi) + movq %r11,24(%rdi) + + popq %r13 + popq %r12 + .byte 0xf3,0xc3 +.size ecp_nistz256_mul_by_2,.-ecp_nistz256_mul_by_2 + + + +.globl ecp_nistz256_div_by_2 +.type ecp_nistz256_div_by_2,@function +.align 32 +ecp_nistz256_div_by_2: + pushq %r12 + pushq %r13 + + movq 0(%rsi),%r8 + movq 8(%rsi),%r9 + movq 16(%rsi),%r10 + movq %r8,%rax + movq 24(%rsi),%r11 + leaq .Lpoly(%rip),%rsi + + movq %r9,%rdx + xorq %r13,%r13 + addq 0(%rsi),%r8 + movq %r10,%rcx + adcq 8(%rsi),%r9 + adcq 16(%rsi),%r10 + movq %r11,%r12 + adcq 24(%rsi),%r11 + adcq $0,%r13 + xorq %rsi,%rsi + testq $1,%rax + + cmovzq %rax,%r8 + cmovzq %rdx,%r9 + cmovzq %rcx,%r10 + cmovzq %r12,%r11 + cmovzq %rsi,%r13 + + movq %r9,%rax + shrq $1,%r8 + shlq $63,%rax + movq %r10,%rdx + shrq $1,%r9 + orq %rax,%r8 + shlq $63,%rdx + movq %r11,%rcx + shrq $1,%r10 + orq %rdx,%r9 + shlq $63,%rcx + shrq $1,%r11 + shlq $63,%r13 + orq %rcx,%r10 + orq %r13,%r11 + + movq %r8,0(%rdi) + movq %r9,8(%rdi) + movq %r10,16(%rdi) + movq %r11,24(%rdi) + + popq %r13 + popq %r12 + .byte 0xf3,0xc3 +.size ecp_nistz256_div_by_2,.-ecp_nistz256_div_by_2 + + + +.globl ecp_nistz256_mul_by_3 +.type ecp_nistz256_mul_by_3,@function +.align 32 +ecp_nistz256_mul_by_3: + pushq %r12 + pushq %r13 + + movq 0(%rsi),%r8 + xorq %r13,%r13 + movq 8(%rsi),%r9 + addq %r8,%r8 + movq 16(%rsi),%r10 + adcq %r9,%r9 + movq 24(%rsi),%r11 + movq %r8,%rax + adcq %r10,%r10 + adcq %r11,%r11 + movq %r9,%rdx + adcq $0,%r13 + + subq $-1,%r8 + movq %r10,%rcx + sbbq .Lpoly+8(%rip),%r9 + sbbq $0,%r10 + movq %r11,%r12 + sbbq .Lpoly+24(%rip),%r11 + testq %r13,%r13 + + cmovzq %rax,%r8 + cmovzq %rdx,%r9 + cmovzq %rcx,%r10 + cmovzq %r12,%r11 + + xorq %r13,%r13 + addq 0(%rsi),%r8 + adcq 8(%rsi),%r9 + movq %r8,%rax + adcq 16(%rsi),%r10 + adcq 24(%rsi),%r11 + movq %r9,%rdx + adcq $0,%r13 + + subq $-1,%r8 + movq %r10,%rcx + sbbq .Lpoly+8(%rip),%r9 + sbbq $0,%r10 + movq %r11,%r12 + sbbq .Lpoly+24(%rip),%r11 + testq %r13,%r13 + + cmovzq %rax,%r8 + cmovzq %rdx,%r9 + movq %r8,0(%rdi) + cmovzq %rcx,%r10 + movq %r9,8(%rdi) + cmovzq %r12,%r11 + movq %r10,16(%rdi) + movq %r11,24(%rdi) + + popq %r13 + popq %r12 + .byte 0xf3,0xc3 +.size ecp_nistz256_mul_by_3,.-ecp_nistz256_mul_by_3 + + + +.globl ecp_nistz256_add +.type ecp_nistz256_add,@function +.align 32 +ecp_nistz256_add: + pushq %r12 + pushq %r13 + + movq 0(%rsi),%r8 + xorq %r13,%r13 + movq 8(%rsi),%r9 + movq 16(%rsi),%r10 + movq 24(%rsi),%r11 + leaq .Lpoly(%rip),%rsi + + addq 0(%rdx),%r8 + adcq 8(%rdx),%r9 + movq %r8,%rax + adcq 16(%rdx),%r10 + adcq 24(%rdx),%r11 + movq %r9,%rdx + adcq $0,%r13 + + subq 0(%rsi),%r8 + movq %r10,%rcx + sbbq 8(%rsi),%r9 + sbbq 16(%rsi),%r10 + movq %r11,%r12 + sbbq 24(%rsi),%r11 + testq %r13,%r13 + + cmovzq %rax,%r8 + cmovzq %rdx,%r9 + movq %r8,0(%rdi) + cmovzq %rcx,%r10 + movq %r9,8(%rdi) + cmovzq %r12,%r11 + movq %r10,16(%rdi) + movq %r11,24(%rdi) + + popq %r13 + popq %r12 + .byte 0xf3,0xc3 +.size ecp_nistz256_add,.-ecp_nistz256_add + + + +.globl ecp_nistz256_sub +.type ecp_nistz256_sub,@function +.align 32 +ecp_nistz256_sub: + pushq %r12 + pushq %r13 + + movq 0(%rsi),%r8 + xorq %r13,%r13 + movq 8(%rsi),%r9 + movq 16(%rsi),%r10 + movq 24(%rsi),%r11 + leaq .Lpoly(%rip),%rsi + + subq 0(%rdx),%r8 + sbbq 8(%rdx),%r9 + movq %r8,%rax + sbbq 16(%rdx),%r10 + sbbq 24(%rdx),%r11 + movq %r9,%rdx + sbbq $0,%r13 + + addq 0(%rsi),%r8 + movq %r10,%rcx + adcq 8(%rsi),%r9 + adcq 16(%rsi),%r10 + movq %r11,%r12 + adcq 24(%rsi),%r11 + testq %r13,%r13 + + cmovzq %rax,%r8 + cmovzq %rdx,%r9 + movq %r8,0(%rdi) + cmovzq %rcx,%r10 + movq %r9,8(%rdi) + cmovzq %r12,%r11 + movq %r10,16(%rdi) + movq %r11,24(%rdi) + + popq %r13 + popq %r12 + .byte 0xf3,0xc3 +.size ecp_nistz256_sub,.-ecp_nistz256_sub + + + +.globl ecp_nistz256_neg +.type ecp_nistz256_neg,@function +.align 32 +ecp_nistz256_neg: + pushq %r12 + pushq %r13 + + xorq %r8,%r8 + xorq %r9,%r9 + xorq %r10,%r10 + xorq %r11,%r11 + xorq %r13,%r13 + + subq 0(%rsi),%r8 + sbbq 8(%rsi),%r9 + sbbq 16(%rsi),%r10 + movq %r8,%rax + sbbq 24(%rsi),%r11 + leaq .Lpoly(%rip),%rsi + movq %r9,%rdx + sbbq $0,%r13 + + addq 0(%rsi),%r8 + movq %r10,%rcx + adcq 8(%rsi),%r9 + adcq 16(%rsi),%r10 + movq %r11,%r12 + adcq 24(%rsi),%r11 + testq %r13,%r13 + + cmovzq %rax,%r8 + cmovzq %rdx,%r9 + movq %r8,0(%rdi) + cmovzq %rcx,%r10 + movq %r9,8(%rdi) + cmovzq %r12,%r11 + movq %r10,16(%rdi) + movq %r11,24(%rdi) + + popq %r13 + popq %r12 + .byte 0xf3,0xc3 +.size ecp_nistz256_neg,.-ecp_nistz256_neg + + + + +.globl ecp_nistz256_to_mont +.type ecp_nistz256_to_mont,@function +.align 32 +ecp_nistz256_to_mont: + leaq .LRR(%rip),%rdx + jmp .Lmul_mont +.size ecp_nistz256_to_mont,.-ecp_nistz256_to_mont + + + + + + + +.globl ecp_nistz256_mul_mont +.type ecp_nistz256_mul_mont,@function +.align 32 +ecp_nistz256_mul_mont: +.Lmul_mont: + pushq %rbp + pushq %rbx + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + movq %rdx,%rbx + movq 0(%rdx),%rax + movq 0(%rsi),%r9 + movq 8(%rsi),%r10 + movq 16(%rsi),%r11 + movq 24(%rsi),%r12 + + call __ecp_nistz256_mul_montq +.Lmul_mont_done: + popq %r15 + popq %r14 + popq %r13 + popq %r12 + popq %rbx + popq %rbp + .byte 0xf3,0xc3 +.size ecp_nistz256_mul_mont,.-ecp_nistz256_mul_mont + +.type __ecp_nistz256_mul_montq,@function +.align 32 +__ecp_nistz256_mul_montq: + + + movq %rax,%rbp + mulq %r9 + movq .Lpoly+8(%rip),%r14 + movq %rax,%r8 + movq %rbp,%rax + movq %rdx,%r9 + + mulq %r10 + movq .Lpoly+24(%rip),%r15 + addq %rax,%r9 + movq %rbp,%rax + adcq $0,%rdx + movq %rdx,%r10 + + mulq %r11 + addq %rax,%r10 + movq %rbp,%rax + adcq $0,%rdx + movq %rdx,%r11 + + mulq %r12 + addq %rax,%r11 + movq %r8,%rax + adcq $0,%rdx + xorq %r13,%r13 + movq %rdx,%r12 + + + + + + + + + + + movq %r8,%rbp + shlq $32,%r8 + mulq %r15 + shrq $32,%rbp + addq %r8,%r9 + adcq %rbp,%r10 + adcq %rax,%r11 + movq 8(%rbx),%rax + adcq %rdx,%r12 + adcq $0,%r13 + xorq %r8,%r8 + + + + movq %rax,%rbp + mulq 0(%rsi) + addq %rax,%r9 + movq %rbp,%rax + adcq $0,%rdx + movq %rdx,%rcx + + mulq 8(%rsi) + addq %rcx,%r10 + adcq $0,%rdx + addq %rax,%r10 + movq %rbp,%rax + adcq $0,%rdx + movq %rdx,%rcx + + mulq 16(%rsi) + addq %rcx,%r11 + adcq $0,%rdx + addq %rax,%r11 + movq %rbp,%rax + adcq $0,%rdx + movq %rdx,%rcx + + mulq 24(%rsi) + addq %rcx,%r12 + adcq $0,%rdx + addq %rax,%r12 + movq %r9,%rax + adcq %rdx,%r13 + adcq $0,%r8 + + + + movq %r9,%rbp + shlq $32,%r9 + mulq %r15 + shrq $32,%rbp + addq %r9,%r10 + adcq %rbp,%r11 + adcq %rax,%r12 + movq 16(%rbx),%rax + adcq %rdx,%r13 + adcq $0,%r8 + xorq %r9,%r9 + + + + movq %rax,%rbp + mulq 0(%rsi) + addq %rax,%r10 + movq %rbp,%rax + adcq $0,%rdx + movq %rdx,%rcx + + mulq 8(%rsi) + addq %rcx,%r11 + adcq $0,%rdx + addq %rax,%r11 + movq %rbp,%rax + adcq $0,%rdx + movq %rdx,%rcx + + mulq 16(%rsi) + addq %rcx,%r12 + adcq $0,%rdx + addq %rax,%r12 + movq %rbp,%rax + adcq $0,%rdx + movq %rdx,%rcx + + mulq 24(%rsi) + addq %rcx,%r13 + adcq $0,%rdx + addq %rax,%r13 + movq %r10,%rax + adcq %rdx,%r8 + adcq $0,%r9 + + + + movq %r10,%rbp + shlq $32,%r10 + mulq %r15 + shrq $32,%rbp + addq %r10,%r11 + adcq %rbp,%r12 + adcq %rax,%r13 + movq 24(%rbx),%rax + adcq %rdx,%r8 + adcq $0,%r9 + xorq %r10,%r10 + + + + movq %rax,%rbp + mulq 0(%rsi) + addq %rax,%r11 + movq %rbp,%rax + adcq $0,%rdx + movq %rdx,%rcx + + mulq 8(%rsi) + addq %rcx,%r12 + adcq $0,%rdx + addq %rax,%r12 + movq %rbp,%rax + adcq $0,%rdx + movq %rdx,%rcx + + mulq 16(%rsi) + addq %rcx,%r13 + adcq $0,%rdx + addq %rax,%r13 + movq %rbp,%rax + adcq $0,%rdx + movq %rdx,%rcx + + mulq 24(%rsi) + addq %rcx,%r8 + adcq $0,%rdx + addq %rax,%r8 + movq %r11,%rax + adcq %rdx,%r9 + adcq $0,%r10 + + + + movq %r11,%rbp + shlq $32,%r11 + mulq %r15 + shrq $32,%rbp + addq %r11,%r12 + adcq %rbp,%r13 + movq %r12,%rcx + adcq %rax,%r8 + adcq %rdx,%r9 + movq %r13,%rbp + adcq $0,%r10 + + + + subq $-1,%r12 + movq %r8,%rbx + sbbq %r14,%r13 + sbbq $0,%r8 + movq %r9,%rdx + sbbq %r15,%r9 + sbbq $0,%r10 + + cmovcq %rcx,%r12 + cmovcq %rbp,%r13 + movq %r12,0(%rdi) + cmovcq %rbx,%r8 + movq %r13,8(%rdi) + cmovcq %rdx,%r9 + movq %r8,16(%rdi) + movq %r9,24(%rdi) + + .byte 0xf3,0xc3 +.size __ecp_nistz256_mul_montq,.-__ecp_nistz256_mul_montq + + + + + + + + +.globl ecp_nistz256_sqr_mont +.type ecp_nistz256_sqr_mont,@function +.align 32 +ecp_nistz256_sqr_mont: + pushq %rbp + pushq %rbx + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + movq 0(%rsi),%rax + movq 8(%rsi),%r14 + movq 16(%rsi),%r15 + movq 24(%rsi),%r8 + + call __ecp_nistz256_sqr_montq +.Lsqr_mont_done: + popq %r15 + popq %r14 + popq %r13 + popq %r12 + popq %rbx + popq %rbp + .byte 0xf3,0xc3 +.size ecp_nistz256_sqr_mont,.-ecp_nistz256_sqr_mont + +.type __ecp_nistz256_sqr_montq,@function +.align 32 +__ecp_nistz256_sqr_montq: + movq %rax,%r13 + mulq %r14 + movq %rax,%r9 + movq %r15,%rax + movq %rdx,%r10 + + mulq %r13 + addq %rax,%r10 + movq %r8,%rax + adcq $0,%rdx + movq %rdx,%r11 + + mulq %r13 + addq %rax,%r11 + movq %r15,%rax + adcq $0,%rdx + movq %rdx,%r12 + + + mulq %r14 + addq %rax,%r11 + movq %r8,%rax + adcq $0,%rdx + movq %rdx,%rbp + + mulq %r14 + addq %rax,%r12 + movq %r8,%rax + adcq $0,%rdx + addq %rbp,%r12 + movq %rdx,%r13 + adcq $0,%r13 + + + mulq %r15 + xorq %r15,%r15 + addq %rax,%r13 + movq 0(%rsi),%rax + movq %rdx,%r14 + adcq $0,%r14 + + addq %r9,%r9 + adcq %r10,%r10 + adcq %r11,%r11 + adcq %r12,%r12 + adcq %r13,%r13 + adcq %r14,%r14 + adcq $0,%r15 + + mulq %rax + movq %rax,%r8 + movq 8(%rsi),%rax + movq %rdx,%rcx + + mulq %rax + addq %rcx,%r9 + adcq %rax,%r10 + movq 16(%rsi),%rax + adcq $0,%rdx + movq %rdx,%rcx + + mulq %rax + addq %rcx,%r11 + adcq %rax,%r12 + movq 24(%rsi),%rax + adcq $0,%rdx + movq %rdx,%rcx + + mulq %rax + addq %rcx,%r13 + adcq %rax,%r14 + movq %r8,%rax + adcq %rdx,%r15 + + movq .Lpoly+8(%rip),%rsi + movq .Lpoly+24(%rip),%rbp + + + + + movq %r8,%rcx + shlq $32,%r8 + mulq %rbp + shrq $32,%rcx + addq %r8,%r9 + adcq %rcx,%r10 + adcq %rax,%r11 + movq %r9,%rax + adcq $0,%rdx + + + + movq %r9,%rcx + shlq $32,%r9 + movq %rdx,%r8 + mulq %rbp + shrq $32,%rcx + addq %r9,%r10 + adcq %rcx,%r11 + adcq %rax,%r8 + movq %r10,%rax + adcq $0,%rdx + + + + movq %r10,%rcx + shlq $32,%r10 + movq %rdx,%r9 + mulq %rbp + shrq $32,%rcx + addq %r10,%r11 + adcq %rcx,%r8 + adcq %rax,%r9 + movq %r11,%rax + adcq $0,%rdx + + + + movq %r11,%rcx + shlq $32,%r11 + movq %rdx,%r10 + mulq %rbp + shrq $32,%rcx + addq %r11,%r8 + adcq %rcx,%r9 + adcq %rax,%r10 + adcq $0,%rdx + xorq %r11,%r11 + + + + addq %r8,%r12 + adcq %r9,%r13 + movq %r12,%r8 + adcq %r10,%r14 + adcq %rdx,%r15 + movq %r13,%r9 + adcq $0,%r11 + + subq $-1,%r12 + movq %r14,%r10 + sbbq %rsi,%r13 + sbbq $0,%r14 + movq %r15,%rcx + sbbq %rbp,%r15 + sbbq $0,%r11 + + cmovcq %r8,%r12 + cmovcq %r9,%r13 + movq %r12,0(%rdi) + cmovcq %r10,%r14 + movq %r13,8(%rdi) + cmovcq %rcx,%r15 + movq %r14,16(%rdi) + movq %r15,24(%rdi) + + .byte 0xf3,0xc3 +.size __ecp_nistz256_sqr_montq,.-__ecp_nistz256_sqr_montq + + + + + + +.globl ecp_nistz256_from_mont +.type ecp_nistz256_from_mont,@function +.align 32 +ecp_nistz256_from_mont: + pushq %r12 + pushq %r13 + + movq 0(%rsi),%rax + movq .Lpoly+24(%rip),%r13 + movq 8(%rsi),%r9 + movq 16(%rsi),%r10 + movq 24(%rsi),%r11 + movq %rax,%r8 + movq .Lpoly+8(%rip),%r12 + + + + movq %rax,%rcx + shlq $32,%r8 + mulq %r13 + shrq $32,%rcx + addq %r8,%r9 + adcq %rcx,%r10 + adcq %rax,%r11 + movq %r9,%rax + adcq $0,%rdx + + + + movq %r9,%rcx + shlq $32,%r9 + movq %rdx,%r8 + mulq %r13 + shrq $32,%rcx + addq %r9,%r10 + adcq %rcx,%r11 + adcq %rax,%r8 + movq %r10,%rax + adcq $0,%rdx + + + + movq %r10,%rcx + shlq $32,%r10 + movq %rdx,%r9 + mulq %r13 + shrq $32,%rcx + addq %r10,%r11 + adcq %rcx,%r8 + adcq %rax,%r9 + movq %r11,%rax + adcq $0,%rdx + + + + movq %r11,%rcx + shlq $32,%r11 + movq %rdx,%r10 + mulq %r13 + shrq $32,%rcx + addq %r11,%r8 + adcq %rcx,%r9 + movq %r8,%rcx + adcq %rax,%r10 + movq %r9,%rsi + adcq $0,%rdx + + + + subq $-1,%r8 + movq %r10,%rax + sbbq %r12,%r9 + sbbq $0,%r10 + movq %rdx,%r11 + sbbq %r13,%rdx + sbbq %r13,%r13 + + cmovnzq %rcx,%r8 + cmovnzq %rsi,%r9 + movq %r8,0(%rdi) + cmovnzq %rax,%r10 + movq %r9,8(%rdi) + cmovzq %rdx,%r11 + movq %r10,16(%rdi) + movq %r11,24(%rdi) + + popq %r13 + popq %r12 + .byte 0xf3,0xc3 +.size ecp_nistz256_from_mont,.-ecp_nistz256_from_mont + + +.globl ecp_nistz256_select_w5 +.type ecp_nistz256_select_w5,@function +.align 32 +ecp_nistz256_select_w5: + movdqa .LOne(%rip),%xmm0 + movd %edx,%xmm1 + + pxor %xmm2,%xmm2 + pxor %xmm3,%xmm3 + pxor %xmm4,%xmm4 + pxor %xmm5,%xmm5 + pxor %xmm6,%xmm6 + pxor %xmm7,%xmm7 + + movdqa %xmm0,%xmm8 + pshufd $0,%xmm1,%xmm1 + + movq $16,%rax +.Lselect_loop_sse_w5: + + movdqa %xmm8,%xmm15 + paddd %xmm0,%xmm8 + pcmpeqd %xmm1,%xmm15 + + movdqa 0(%rsi),%xmm9 + movdqa 16(%rsi),%xmm10 + movdqa 32(%rsi),%xmm11 + movdqa 48(%rsi),%xmm12 + movdqa 64(%rsi),%xmm13 + movdqa 80(%rsi),%xmm14 + leaq 96(%rsi),%rsi + + pand %xmm15,%xmm9 + pand %xmm15,%xmm10 + por %xmm9,%xmm2 + pand %xmm15,%xmm11 + por %xmm10,%xmm3 + pand %xmm15,%xmm12 + por %xmm11,%xmm4 + pand %xmm15,%xmm13 + por %xmm12,%xmm5 + pand %xmm15,%xmm14 + por %xmm13,%xmm6 + por %xmm14,%xmm7 + + decq %rax + jnz .Lselect_loop_sse_w5 + + movdqu %xmm2,0(%rdi) + movdqu %xmm3,16(%rdi) + movdqu %xmm4,32(%rdi) + movdqu %xmm5,48(%rdi) + movdqu %xmm6,64(%rdi) + movdqu %xmm7,80(%rdi) + .byte 0xf3,0xc3 +.size ecp_nistz256_select_w5,.-ecp_nistz256_select_w5 + + + +.globl ecp_nistz256_select_w7 +.type ecp_nistz256_select_w7,@function +.align 32 +ecp_nistz256_select_w7: + movdqa .LOne(%rip),%xmm8 + movd %edx,%xmm1 + + pxor %xmm2,%xmm2 + pxor %xmm3,%xmm3 + pxor %xmm4,%xmm4 + pxor %xmm5,%xmm5 + + movdqa %xmm8,%xmm0 + pshufd $0,%xmm1,%xmm1 + movq $64,%rax + +.Lselect_loop_sse_w7: + movdqa %xmm8,%xmm15 + paddd %xmm0,%xmm8 + movdqa 0(%rsi),%xmm9 + movdqa 16(%rsi),%xmm10 + pcmpeqd %xmm1,%xmm15 + movdqa 32(%rsi),%xmm11 + movdqa 48(%rsi),%xmm12 + leaq 64(%rsi),%rsi + + pand %xmm15,%xmm9 + pand %xmm15,%xmm10 + por %xmm9,%xmm2 + pand %xmm15,%xmm11 + por %xmm10,%xmm3 + pand %xmm15,%xmm12 + por %xmm11,%xmm4 + prefetcht0 255(%rsi) + por %xmm12,%xmm5 + + decq %rax + jnz .Lselect_loop_sse_w7 + + movdqu %xmm2,0(%rdi) + movdqu %xmm3,16(%rdi) + movdqu %xmm4,32(%rdi) + movdqu %xmm5,48(%rdi) + .byte 0xf3,0xc3 +.size ecp_nistz256_select_w7,.-ecp_nistz256_select_w7 +.globl ecp_nistz256_avx2_select_w7 +.type ecp_nistz256_avx2_select_w7,@function +.align 32 +ecp_nistz256_avx2_select_w7: +.byte 0x0f,0x0b + .byte 0xf3,0xc3 +.size ecp_nistz256_avx2_select_w7,.-ecp_nistz256_avx2_select_w7 +.type __ecp_nistz256_add_toq,@function +.align 32 +__ecp_nistz256_add_toq: + addq 0(%rbx),%r12 + adcq 8(%rbx),%r13 + movq %r12,%rax + adcq 16(%rbx),%r8 + adcq 24(%rbx),%r9 + movq %r13,%rbp + sbbq %r11,%r11 + + subq $-1,%r12 + movq %r8,%rcx + sbbq %r14,%r13 + sbbq $0,%r8 + movq %r9,%r10 + sbbq %r15,%r9 + testq %r11,%r11 + + cmovzq %rax,%r12 + cmovzq %rbp,%r13 + movq %r12,0(%rdi) + cmovzq %rcx,%r8 + movq %r13,8(%rdi) + cmovzq %r10,%r9 + movq %r8,16(%rdi) + movq %r9,24(%rdi) + + .byte 0xf3,0xc3 +.size __ecp_nistz256_add_toq,.-__ecp_nistz256_add_toq + +.type __ecp_nistz256_sub_fromq,@function +.align 32 +__ecp_nistz256_sub_fromq: + subq 0(%rbx),%r12 + sbbq 8(%rbx),%r13 + movq %r12,%rax + sbbq 16(%rbx),%r8 + sbbq 24(%rbx),%r9 + movq %r13,%rbp + sbbq %r11,%r11 + + addq $-1,%r12 + movq %r8,%rcx + adcq %r14,%r13 + adcq $0,%r8 + movq %r9,%r10 + adcq %r15,%r9 + testq %r11,%r11 + + cmovzq %rax,%r12 + cmovzq %rbp,%r13 + movq %r12,0(%rdi) + cmovzq %rcx,%r8 + movq %r13,8(%rdi) + cmovzq %r10,%r9 + movq %r8,16(%rdi) + movq %r9,24(%rdi) + + .byte 0xf3,0xc3 +.size __ecp_nistz256_sub_fromq,.-__ecp_nistz256_sub_fromq + +.type __ecp_nistz256_subq,@function +.align 32 +__ecp_nistz256_subq: + subq %r12,%rax + sbbq %r13,%rbp + movq %rax,%r12 + sbbq %r8,%rcx + sbbq %r9,%r10 + movq %rbp,%r13 + sbbq %r11,%r11 + + addq $-1,%rax + movq %rcx,%r8 + adcq %r14,%rbp + adcq $0,%rcx + movq %r10,%r9 + adcq %r15,%r10 + testq %r11,%r11 + + cmovnzq %rax,%r12 + cmovnzq %rbp,%r13 + cmovnzq %rcx,%r8 + cmovnzq %r10,%r9 + + .byte 0xf3,0xc3 +.size __ecp_nistz256_subq,.-__ecp_nistz256_subq + +.type __ecp_nistz256_mul_by_2q,@function +.align 32 +__ecp_nistz256_mul_by_2q: + addq %r12,%r12 + adcq %r13,%r13 + movq %r12,%rax + adcq %r8,%r8 + adcq %r9,%r9 + movq %r13,%rbp + sbbq %r11,%r11 + + subq $-1,%r12 + movq %r8,%rcx + sbbq %r14,%r13 + sbbq $0,%r8 + movq %r9,%r10 + sbbq %r15,%r9 + testq %r11,%r11 + + cmovzq %rax,%r12 + cmovzq %rbp,%r13 + movq %r12,0(%rdi) + cmovzq %rcx,%r8 + movq %r13,8(%rdi) + cmovzq %r10,%r9 + movq %r8,16(%rdi) + movq %r9,24(%rdi) + + .byte 0xf3,0xc3 +.size __ecp_nistz256_mul_by_2q,.-__ecp_nistz256_mul_by_2q +.globl ecp_nistz256_point_double +.type ecp_nistz256_point_double,@function +.align 32 +ecp_nistz256_point_double: + pushq %rbp + pushq %rbx + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + subq $160+8,%rsp + + movdqu 0(%rsi),%xmm0 + movq %rsi,%rbx + movdqu 16(%rsi),%xmm1 + movq 32+0(%rsi),%r12 + movq 32+8(%rsi),%r13 + movq 32+16(%rsi),%r8 + movq 32+24(%rsi),%r9 + movq .Lpoly+8(%rip),%r14 + movq .Lpoly+24(%rip),%r15 + movdqa %xmm0,96(%rsp) + movdqa %xmm1,96+16(%rsp) + leaq 32(%rdi),%r10 + leaq 64(%rdi),%r11 +.byte 102,72,15,110,199 +.byte 102,73,15,110,202 +.byte 102,73,15,110,211 + + leaq 0(%rsp),%rdi + call __ecp_nistz256_mul_by_2q + + movq 64+0(%rsi),%rax + movq 64+8(%rsi),%r14 + movq 64+16(%rsi),%r15 + movq 64+24(%rsi),%r8 + leaq 64-0(%rsi),%rsi + leaq 64(%rsp),%rdi + call __ecp_nistz256_sqr_montq + + movq 0+0(%rsp),%rax + movq 8+0(%rsp),%r14 + leaq 0+0(%rsp),%rsi + movq 16+0(%rsp),%r15 + movq 24+0(%rsp),%r8 + leaq 0(%rsp),%rdi + call __ecp_nistz256_sqr_montq + + movq 32(%rbx),%rax + movq 64+0(%rbx),%r9 + movq 64+8(%rbx),%r10 + movq 64+16(%rbx),%r11 + movq 64+24(%rbx),%r12 + leaq 64-0(%rbx),%rsi + leaq 32(%rbx),%rbx +.byte 102,72,15,126,215 + call __ecp_nistz256_mul_montq + call __ecp_nistz256_mul_by_2q + + movq 96+0(%rsp),%r12 + movq 96+8(%rsp),%r13 + leaq 64(%rsp),%rbx + movq 96+16(%rsp),%r8 + movq 96+24(%rsp),%r9 + leaq 32(%rsp),%rdi + call __ecp_nistz256_add_toq + + movq 96+0(%rsp),%r12 + movq 96+8(%rsp),%r13 + leaq 64(%rsp),%rbx + movq 96+16(%rsp),%r8 + movq 96+24(%rsp),%r9 + leaq 64(%rsp),%rdi + call __ecp_nistz256_sub_fromq + + movq 0+0(%rsp),%rax + movq 8+0(%rsp),%r14 + leaq 0+0(%rsp),%rsi + movq 16+0(%rsp),%r15 + movq 24+0(%rsp),%r8 +.byte 102,72,15,126,207 + call __ecp_nistz256_sqr_montq + xorq %r9,%r9 + movq %r12,%rax + addq $-1,%r12 + movq %r13,%r10 + adcq %rsi,%r13 + movq %r14,%rcx + adcq $0,%r14 + movq %r15,%r8 + adcq %rbp,%r15 + adcq $0,%r9 + xorq %rsi,%rsi + testq $1,%rax + + cmovzq %rax,%r12 + cmovzq %r10,%r13 + cmovzq %rcx,%r14 + cmovzq %r8,%r15 + cmovzq %rsi,%r9 + + movq %r13,%rax + shrq $1,%r12 + shlq $63,%rax + movq %r14,%r10 + shrq $1,%r13 + orq %rax,%r12 + shlq $63,%r10 + movq %r15,%rcx + shrq $1,%r14 + orq %r10,%r13 + shlq $63,%rcx + movq %r12,0(%rdi) + shrq $1,%r15 + movq %r13,8(%rdi) + shlq $63,%r9 + orq %rcx,%r14 + orq %r9,%r15 + movq %r14,16(%rdi) + movq %r15,24(%rdi) + movq 64(%rsp),%rax + leaq 64(%rsp),%rbx + movq 0+32(%rsp),%r9 + movq 8+32(%rsp),%r10 + leaq 0+32(%rsp),%rsi + movq 16+32(%rsp),%r11 + movq 24+32(%rsp),%r12 + leaq 32(%rsp),%rdi + call __ecp_nistz256_mul_montq + + leaq 128(%rsp),%rdi + call __ecp_nistz256_mul_by_2q + + leaq 32(%rsp),%rbx + leaq 32(%rsp),%rdi + call __ecp_nistz256_add_toq + + movq 96(%rsp),%rax + leaq 96(%rsp),%rbx + movq 0+0(%rsp),%r9 + movq 8+0(%rsp),%r10 + leaq 0+0(%rsp),%rsi + movq 16+0(%rsp),%r11 + movq 24+0(%rsp),%r12 + leaq 0(%rsp),%rdi + call __ecp_nistz256_mul_montq + + leaq 128(%rsp),%rdi + call __ecp_nistz256_mul_by_2q + + movq 0+32(%rsp),%rax + movq 8+32(%rsp),%r14 + leaq 0+32(%rsp),%rsi + movq 16+32(%rsp),%r15 + movq 24+32(%rsp),%r8 +.byte 102,72,15,126,199 + call __ecp_nistz256_sqr_montq + + leaq 128(%rsp),%rbx + movq %r14,%r8 + movq %r15,%r9 + movq %rsi,%r14 + movq %rbp,%r15 + call __ecp_nistz256_sub_fromq + + movq 0+0(%rsp),%rax + movq 0+8(%rsp),%rbp + movq 0+16(%rsp),%rcx + movq 0+24(%rsp),%r10 + leaq 0(%rsp),%rdi + call __ecp_nistz256_subq + + movq 32(%rsp),%rax + leaq 32(%rsp),%rbx + movq %r12,%r14 + xorl %ecx,%ecx + movq %r12,0+0(%rsp) + movq %r13,%r10 + movq %r13,0+8(%rsp) + cmovzq %r8,%r11 + movq %r8,0+16(%rsp) + leaq 0-0(%rsp),%rsi + cmovzq %r9,%r12 + movq %r9,0+24(%rsp) + movq %r14,%r9 + leaq 0(%rsp),%rdi + call __ecp_nistz256_mul_montq + +.byte 102,72,15,126,203 +.byte 102,72,15,126,207 + call __ecp_nistz256_sub_fromq + + addq $160+8,%rsp + popq %r15 + popq %r14 + popq %r13 + popq %r12 + popq %rbx + popq %rbp + .byte 0xf3,0xc3 +.size ecp_nistz256_point_double,.-ecp_nistz256_point_double +.globl ecp_nistz256_point_add +.type ecp_nistz256_point_add,@function +.align 32 +ecp_nistz256_point_add: + pushq %rbp + pushq %rbx + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + subq $576+8,%rsp + + movdqu 0(%rsi),%xmm0 + movdqu 16(%rsi),%xmm1 + movdqu 32(%rsi),%xmm2 + movdqu 48(%rsi),%xmm3 + movdqu 64(%rsi),%xmm4 + movdqu 80(%rsi),%xmm5 + movq %rsi,%rbx + movq %rdx,%rsi + movdqa %xmm0,384(%rsp) + movdqa %xmm1,384+16(%rsp) + por %xmm0,%xmm1 + movdqa %xmm2,416(%rsp) + movdqa %xmm3,416+16(%rsp) + por %xmm2,%xmm3 + movdqa %xmm4,448(%rsp) + movdqa %xmm5,448+16(%rsp) + por %xmm1,%xmm3 + + movdqu 0(%rsi),%xmm0 + pshufd $177,%xmm3,%xmm5 + movdqu 16(%rsi),%xmm1 + movdqu 32(%rsi),%xmm2 + por %xmm3,%xmm5 + movdqu 48(%rsi),%xmm3 + movq 64+0(%rsi),%rax + movq 64+8(%rsi),%r14 + movq 64+16(%rsi),%r15 + movq 64+24(%rsi),%r8 + movdqa %xmm0,480(%rsp) + pshufd $30,%xmm5,%xmm4 + movdqa %xmm1,480+16(%rsp) + por %xmm0,%xmm1 +.byte 102,72,15,110,199 + movdqa %xmm2,512(%rsp) + movdqa %xmm3,512+16(%rsp) + por %xmm2,%xmm3 + por %xmm4,%xmm5 + pxor %xmm4,%xmm4 + por %xmm1,%xmm3 + + leaq 64-0(%rsi),%rsi + movq %rax,544+0(%rsp) + movq %r14,544+8(%rsp) + movq %r15,544+16(%rsp) + movq %r8,544+24(%rsp) + leaq 96(%rsp),%rdi + call __ecp_nistz256_sqr_montq + + pcmpeqd %xmm4,%xmm5 + pshufd $177,%xmm3,%xmm4 + por %xmm3,%xmm4 + pshufd $0,%xmm5,%xmm5 + pshufd $30,%xmm4,%xmm3 + por %xmm3,%xmm4 + pxor %xmm3,%xmm3 + pcmpeqd %xmm3,%xmm4 + pshufd $0,%xmm4,%xmm4 + movq 64+0(%rbx),%rax + movq 64+8(%rbx),%r14 + movq 64+16(%rbx),%r15 + movq 64+24(%rbx),%r8 + + leaq 64-0(%rbx),%rsi + leaq 32(%rsp),%rdi + call __ecp_nistz256_sqr_montq + + movq 544(%rsp),%rax + leaq 544(%rsp),%rbx + movq 0+96(%rsp),%r9 + movq 8+96(%rsp),%r10 + leaq 0+96(%rsp),%rsi + movq 16+96(%rsp),%r11 + movq 24+96(%rsp),%r12 + leaq 224(%rsp),%rdi + call __ecp_nistz256_mul_montq + + movq 448(%rsp),%rax + leaq 448(%rsp),%rbx + movq 0+32(%rsp),%r9 + movq 8+32(%rsp),%r10 + leaq 0+32(%rsp),%rsi + movq 16+32(%rsp),%r11 + movq 24+32(%rsp),%r12 + leaq 256(%rsp),%rdi + call __ecp_nistz256_mul_montq + + movq 416(%rsp),%rax + leaq 416(%rsp),%rbx + movq 0+224(%rsp),%r9 + movq 8+224(%rsp),%r10 + leaq 0+224(%rsp),%rsi + movq 16+224(%rsp),%r11 + movq 24+224(%rsp),%r12 + leaq 224(%rsp),%rdi + call __ecp_nistz256_mul_montq + + movq 512(%rsp),%rax + leaq 512(%rsp),%rbx + movq 0+256(%rsp),%r9 + movq 8+256(%rsp),%r10 + leaq 0+256(%rsp),%rsi + movq 16+256(%rsp),%r11 + movq 24+256(%rsp),%r12 + leaq 256(%rsp),%rdi + call __ecp_nistz256_mul_montq + + leaq 224(%rsp),%rbx + leaq 64(%rsp),%rdi + call __ecp_nistz256_sub_fromq + + orq %r13,%r12 + movdqa %xmm4,%xmm2 + orq %r8,%r12 + orq %r9,%r12 + por %xmm5,%xmm2 +.byte 102,73,15,110,220 + + movq 384(%rsp),%rax + leaq 384(%rsp),%rbx + movq 0+96(%rsp),%r9 + movq 8+96(%rsp),%r10 + leaq 0+96(%rsp),%rsi + movq 16+96(%rsp),%r11 + movq 24+96(%rsp),%r12 + leaq 160(%rsp),%rdi + call __ecp_nistz256_mul_montq + + movq 480(%rsp),%rax + leaq 480(%rsp),%rbx + movq 0+32(%rsp),%r9 + movq 8+32(%rsp),%r10 + leaq 0+32(%rsp),%rsi + movq 16+32(%rsp),%r11 + movq 24+32(%rsp),%r12 + leaq 192(%rsp),%rdi + call __ecp_nistz256_mul_montq + + leaq 160(%rsp),%rbx + leaq 0(%rsp),%rdi + call __ecp_nistz256_sub_fromq + + orq %r13,%r12 + orq %r8,%r12 + orq %r9,%r12 + +.byte 0x3e + jnz .Ladd_proceedq +.byte 102,73,15,126,208 +.byte 102,73,15,126,217 + testq %r8,%r8 + jnz .Ladd_proceedq + testq %r9,%r9 + jz .Ladd_proceedq + +.byte 102,72,15,126,199 + pxor %xmm0,%xmm0 + movdqu %xmm0,0(%rdi) + movdqu %xmm0,16(%rdi) + movdqu %xmm0,32(%rdi) + movdqu %xmm0,48(%rdi) + movdqu %xmm0,64(%rdi) + movdqu %xmm0,80(%rdi) + jmp .Ladd_doneq + +.align 32 +.Ladd_proceedq: + movq 0+64(%rsp),%rax + movq 8+64(%rsp),%r14 + leaq 0+64(%rsp),%rsi + movq 16+64(%rsp),%r15 + movq 24+64(%rsp),%r8 + leaq 96(%rsp),%rdi + call __ecp_nistz256_sqr_montq + + movq 448(%rsp),%rax + leaq 448(%rsp),%rbx + movq 0+0(%rsp),%r9 + movq 8+0(%rsp),%r10 + leaq 0+0(%rsp),%rsi + movq 16+0(%rsp),%r11 + movq 24+0(%rsp),%r12 + leaq 352(%rsp),%rdi + call __ecp_nistz256_mul_montq + + movq 0+0(%rsp),%rax + movq 8+0(%rsp),%r14 + leaq 0+0(%rsp),%rsi + movq 16+0(%rsp),%r15 + movq 24+0(%rsp),%r8 + leaq 32(%rsp),%rdi + call __ecp_nistz256_sqr_montq + + movq 544(%rsp),%rax + leaq 544(%rsp),%rbx + movq 0+352(%rsp),%r9 + movq 8+352(%rsp),%r10 + leaq 0+352(%rsp),%rsi + movq 16+352(%rsp),%r11 + movq 24+352(%rsp),%r12 + leaq 352(%rsp),%rdi + call __ecp_nistz256_mul_montq + + movq 0(%rsp),%rax + leaq 0(%rsp),%rbx + movq 0+32(%rsp),%r9 + movq 8+32(%rsp),%r10 + leaq 0+32(%rsp),%rsi + movq 16+32(%rsp),%r11 + movq 24+32(%rsp),%r12 + leaq 128(%rsp),%rdi + call __ecp_nistz256_mul_montq + + movq 160(%rsp),%rax + leaq 160(%rsp),%rbx + movq 0+32(%rsp),%r9 + movq 8+32(%rsp),%r10 + leaq 0+32(%rsp),%rsi + movq 16+32(%rsp),%r11 + movq 24+32(%rsp),%r12 + leaq 192(%rsp),%rdi + call __ecp_nistz256_mul_montq + + + + + addq %r12,%r12 + leaq 96(%rsp),%rsi + adcq %r13,%r13 + movq %r12,%rax + adcq %r8,%r8 + adcq %r9,%r9 + movq %r13,%rbp + sbbq %r11,%r11 + + subq $-1,%r12 + movq %r8,%rcx + sbbq %r14,%r13 + sbbq $0,%r8 + movq %r9,%r10 + sbbq %r15,%r9 + testq %r11,%r11 + + cmovzq %rax,%r12 + movq 0(%rsi),%rax + cmovzq %rbp,%r13 + movq 8(%rsi),%rbp + cmovzq %rcx,%r8 + movq 16(%rsi),%rcx + cmovzq %r10,%r9 + movq 24(%rsi),%r10 + + call __ecp_nistz256_subq + + leaq 128(%rsp),%rbx + leaq 288(%rsp),%rdi + call __ecp_nistz256_sub_fromq + + movq 192+0(%rsp),%rax + movq 192+8(%rsp),%rbp + movq 192+16(%rsp),%rcx + movq 192+24(%rsp),%r10 + leaq 320(%rsp),%rdi + + call __ecp_nistz256_subq + + movq %r12,0(%rdi) + movq %r13,8(%rdi) + movq %r8,16(%rdi) + movq %r9,24(%rdi) + movq 128(%rsp),%rax + leaq 128(%rsp),%rbx + movq 0+224(%rsp),%r9 + movq 8+224(%rsp),%r10 + leaq 0+224(%rsp),%rsi + movq 16+224(%rsp),%r11 + movq 24+224(%rsp),%r12 + leaq 256(%rsp),%rdi + call __ecp_nistz256_mul_montq + + movq 320(%rsp),%rax + leaq 320(%rsp),%rbx + movq 0+64(%rsp),%r9 + movq 8+64(%rsp),%r10 + leaq 0+64(%rsp),%rsi + movq 16+64(%rsp),%r11 + movq 24+64(%rsp),%r12 + leaq 320(%rsp),%rdi + call __ecp_nistz256_mul_montq + + leaq 256(%rsp),%rbx + leaq 320(%rsp),%rdi + call __ecp_nistz256_sub_fromq + +.byte 102,72,15,126,199 + + movdqa %xmm5,%xmm0 + movdqa %xmm5,%xmm1 + pandn 352(%rsp),%xmm0 + movdqa %xmm5,%xmm2 + pandn 352+16(%rsp),%xmm1 + movdqa %xmm5,%xmm3 + pand 544(%rsp),%xmm2 + pand 544+16(%rsp),%xmm3 + por %xmm0,%xmm2 + por %xmm1,%xmm3 + + movdqa %xmm4,%xmm0 + movdqa %xmm4,%xmm1 + pandn %xmm2,%xmm0 + movdqa %xmm4,%xmm2 + pandn %xmm3,%xmm1 + movdqa %xmm4,%xmm3 + pand 448(%rsp),%xmm2 + pand 448+16(%rsp),%xmm3 + por %xmm0,%xmm2 + por %xmm1,%xmm3 + movdqu %xmm2,64(%rdi) + movdqu %xmm3,80(%rdi) + + movdqa %xmm5,%xmm0 + movdqa %xmm5,%xmm1 + pandn 288(%rsp),%xmm0 + movdqa %xmm5,%xmm2 + pandn 288+16(%rsp),%xmm1 + movdqa %xmm5,%xmm3 + pand 480(%rsp),%xmm2 + pand 480+16(%rsp),%xmm3 + por %xmm0,%xmm2 + por %xmm1,%xmm3 + + movdqa %xmm4,%xmm0 + movdqa %xmm4,%xmm1 + pandn %xmm2,%xmm0 + movdqa %xmm4,%xmm2 + pandn %xmm3,%xmm1 + movdqa %xmm4,%xmm3 + pand 384(%rsp),%xmm2 + pand 384+16(%rsp),%xmm3 + por %xmm0,%xmm2 + por %xmm1,%xmm3 + movdqu %xmm2,0(%rdi) + movdqu %xmm3,16(%rdi) + + movdqa %xmm5,%xmm0 + movdqa %xmm5,%xmm1 + pandn 320(%rsp),%xmm0 + movdqa %xmm5,%xmm2 + pandn 320+16(%rsp),%xmm1 + movdqa %xmm5,%xmm3 + pand 512(%rsp),%xmm2 + pand 512+16(%rsp),%xmm3 + por %xmm0,%xmm2 + por %xmm1,%xmm3 + + movdqa %xmm4,%xmm0 + movdqa %xmm4,%xmm1 + pandn %xmm2,%xmm0 + movdqa %xmm4,%xmm2 + pandn %xmm3,%xmm1 + movdqa %xmm4,%xmm3 + pand 416(%rsp),%xmm2 + pand 416+16(%rsp),%xmm3 + por %xmm0,%xmm2 + por %xmm1,%xmm3 + movdqu %xmm2,32(%rdi) + movdqu %xmm3,48(%rdi) + +.Ladd_doneq: + addq $576+8,%rsp + popq %r15 + popq %r14 + popq %r13 + popq %r12 + popq %rbx + popq %rbp + .byte 0xf3,0xc3 +.size ecp_nistz256_point_add,.-ecp_nistz256_point_add +.globl ecp_nistz256_point_add_affine +.type ecp_nistz256_point_add_affine,@function +.align 32 +ecp_nistz256_point_add_affine: + pushq %rbp + pushq %rbx + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + subq $480+8,%rsp + + movdqu 0(%rsi),%xmm0 + movq %rdx,%rbx + movdqu 16(%rsi),%xmm1 + movdqu 32(%rsi),%xmm2 + movdqu 48(%rsi),%xmm3 + movdqu 64(%rsi),%xmm4 + movdqu 80(%rsi),%xmm5 + movq 64+0(%rsi),%rax + movq 64+8(%rsi),%r14 + movq 64+16(%rsi),%r15 + movq 64+24(%rsi),%r8 + movdqa %xmm0,320(%rsp) + movdqa %xmm1,320+16(%rsp) + por %xmm0,%xmm1 + movdqa %xmm2,352(%rsp) + movdqa %xmm3,352+16(%rsp) + por %xmm2,%xmm3 + movdqa %xmm4,384(%rsp) + movdqa %xmm5,384+16(%rsp) + por %xmm1,%xmm3 + + movdqu 0(%rbx),%xmm0 + pshufd $177,%xmm3,%xmm5 + movdqu 16(%rbx),%xmm1 + movdqu 32(%rbx),%xmm2 + por %xmm3,%xmm5 + movdqu 48(%rbx),%xmm3 + movdqa %xmm0,416(%rsp) + pshufd $30,%xmm5,%xmm4 + movdqa %xmm1,416+16(%rsp) + por %xmm0,%xmm1 +.byte 102,72,15,110,199 + movdqa %xmm2,448(%rsp) + movdqa %xmm3,448+16(%rsp) + por %xmm2,%xmm3 + por %xmm4,%xmm5 + pxor %xmm4,%xmm4 + por %xmm1,%xmm3 + + leaq 64-0(%rsi),%rsi + leaq 32(%rsp),%rdi + call __ecp_nistz256_sqr_montq + + pcmpeqd %xmm4,%xmm5 + pshufd $177,%xmm3,%xmm4 + movq 0(%rbx),%rax + + movq %r12,%r9 + por %xmm3,%xmm4 + pshufd $0,%xmm5,%xmm5 + pshufd $30,%xmm4,%xmm3 + movq %r13,%r10 + por %xmm3,%xmm4 + pxor %xmm3,%xmm3 + movq %r14,%r11 + pcmpeqd %xmm3,%xmm4 + pshufd $0,%xmm4,%xmm4 + + leaq 32-0(%rsp),%rsi + movq %r15,%r12 + leaq 0(%rsp),%rdi + call __ecp_nistz256_mul_montq + + leaq 320(%rsp),%rbx + leaq 64(%rsp),%rdi + call __ecp_nistz256_sub_fromq + + movq 384(%rsp),%rax + leaq 384(%rsp),%rbx + movq 0+32(%rsp),%r9 + movq 8+32(%rsp),%r10 + leaq 0+32(%rsp),%rsi + movq 16+32(%rsp),%r11 + movq 24+32(%rsp),%r12 + leaq 32(%rsp),%rdi + call __ecp_nistz256_mul_montq + + movq 384(%rsp),%rax + leaq 384(%rsp),%rbx + movq 0+64(%rsp),%r9 + movq 8+64(%rsp),%r10 + leaq 0+64(%rsp),%rsi + movq 16+64(%rsp),%r11 + movq 24+64(%rsp),%r12 + leaq 288(%rsp),%rdi + call __ecp_nistz256_mul_montq + + movq 448(%rsp),%rax + leaq 448(%rsp),%rbx + movq 0+32(%rsp),%r9 + movq 8+32(%rsp),%r10 + leaq 0+32(%rsp),%rsi + movq 16+32(%rsp),%r11 + movq 24+32(%rsp),%r12 + leaq 32(%rsp),%rdi + call __ecp_nistz256_mul_montq + + leaq 352(%rsp),%rbx + leaq 96(%rsp),%rdi + call __ecp_nistz256_sub_fromq + + movq 0+64(%rsp),%rax + movq 8+64(%rsp),%r14 + leaq 0+64(%rsp),%rsi + movq 16+64(%rsp),%r15 + movq 24+64(%rsp),%r8 + leaq 128(%rsp),%rdi + call __ecp_nistz256_sqr_montq + + movq 0+96(%rsp),%rax + movq 8+96(%rsp),%r14 + leaq 0+96(%rsp),%rsi + movq 16+96(%rsp),%r15 + movq 24+96(%rsp),%r8 + leaq 192(%rsp),%rdi + call __ecp_nistz256_sqr_montq + + movq 128(%rsp),%rax + leaq 128(%rsp),%rbx + movq 0+64(%rsp),%r9 + movq 8+64(%rsp),%r10 + leaq 0+64(%rsp),%rsi + movq 16+64(%rsp),%r11 + movq 24+64(%rsp),%r12 + leaq 160(%rsp),%rdi + call __ecp_nistz256_mul_montq + + movq 320(%rsp),%rax + leaq 320(%rsp),%rbx + movq 0+128(%rsp),%r9 + movq 8+128(%rsp),%r10 + leaq 0+128(%rsp),%rsi + movq 16+128(%rsp),%r11 + movq 24+128(%rsp),%r12 + leaq 0(%rsp),%rdi + call __ecp_nistz256_mul_montq + + + + + addq %r12,%r12 + leaq 192(%rsp),%rsi + adcq %r13,%r13 + movq %r12,%rax + adcq %r8,%r8 + adcq %r9,%r9 + movq %r13,%rbp + sbbq %r11,%r11 + + subq $-1,%r12 + movq %r8,%rcx + sbbq %r14,%r13 + sbbq $0,%r8 + movq %r9,%r10 + sbbq %r15,%r9 + testq %r11,%r11 + + cmovzq %rax,%r12 + movq 0(%rsi),%rax + cmovzq %rbp,%r13 + movq 8(%rsi),%rbp + cmovzq %rcx,%r8 + movq 16(%rsi),%rcx + cmovzq %r10,%r9 + movq 24(%rsi),%r10 + + call __ecp_nistz256_subq + + leaq 160(%rsp),%rbx + leaq 224(%rsp),%rdi + call __ecp_nistz256_sub_fromq + + movq 0+0(%rsp),%rax + movq 0+8(%rsp),%rbp + movq 0+16(%rsp),%rcx + movq 0+24(%rsp),%r10 + leaq 64(%rsp),%rdi + + call __ecp_nistz256_subq + + movq %r12,0(%rdi) + movq %r13,8(%rdi) + movq %r8,16(%rdi) + movq %r9,24(%rdi) + movq 352(%rsp),%rax + leaq 352(%rsp),%rbx + movq 0+160(%rsp),%r9 + movq 8+160(%rsp),%r10 + leaq 0+160(%rsp),%rsi + movq 16+160(%rsp),%r11 + movq 24+160(%rsp),%r12 + leaq 32(%rsp),%rdi + call __ecp_nistz256_mul_montq + + movq 96(%rsp),%rax + leaq 96(%rsp),%rbx + movq 0+64(%rsp),%r9 + movq 8+64(%rsp),%r10 + leaq 0+64(%rsp),%rsi + movq 16+64(%rsp),%r11 + movq 24+64(%rsp),%r12 + leaq 64(%rsp),%rdi + call __ecp_nistz256_mul_montq + + leaq 32(%rsp),%rbx + leaq 256(%rsp),%rdi + call __ecp_nistz256_sub_fromq + +.byte 102,72,15,126,199 + + movdqa %xmm5,%xmm0 + movdqa %xmm5,%xmm1 + pandn 288(%rsp),%xmm0 + movdqa %xmm5,%xmm2 + pandn 288+16(%rsp),%xmm1 + movdqa %xmm5,%xmm3 + pand .LONE_mont(%rip),%xmm2 + pand .LONE_mont+16(%rip),%xmm3 + por %xmm0,%xmm2 + por %xmm1,%xmm3 + + movdqa %xmm4,%xmm0 + movdqa %xmm4,%xmm1 + pandn %xmm2,%xmm0 + movdqa %xmm4,%xmm2 + pandn %xmm3,%xmm1 + movdqa %xmm4,%xmm3 + pand 384(%rsp),%xmm2 + pand 384+16(%rsp),%xmm3 + por %xmm0,%xmm2 + por %xmm1,%xmm3 + movdqu %xmm2,64(%rdi) + movdqu %xmm3,80(%rdi) + + movdqa %xmm5,%xmm0 + movdqa %xmm5,%xmm1 + pandn 224(%rsp),%xmm0 + movdqa %xmm5,%xmm2 + pandn 224+16(%rsp),%xmm1 + movdqa %xmm5,%xmm3 + pand 416(%rsp),%xmm2 + pand 416+16(%rsp),%xmm3 + por %xmm0,%xmm2 + por %xmm1,%xmm3 + + movdqa %xmm4,%xmm0 + movdqa %xmm4,%xmm1 + pandn %xmm2,%xmm0 + movdqa %xmm4,%xmm2 + pandn %xmm3,%xmm1 + movdqa %xmm4,%xmm3 + pand 320(%rsp),%xmm2 + pand 320+16(%rsp),%xmm3 + por %xmm0,%xmm2 + por %xmm1,%xmm3 + movdqu %xmm2,0(%rdi) + movdqu %xmm3,16(%rdi) + + movdqa %xmm5,%xmm0 + movdqa %xmm5,%xmm1 + pandn 256(%rsp),%xmm0 + movdqa %xmm5,%xmm2 + pandn 256+16(%rsp),%xmm1 + movdqa %xmm5,%xmm3 + pand 448(%rsp),%xmm2 + pand 448+16(%rsp),%xmm3 + por %xmm0,%xmm2 + por %xmm1,%xmm3 + + movdqa %xmm4,%xmm0 + movdqa %xmm4,%xmm1 + pandn %xmm2,%xmm0 + movdqa %xmm4,%xmm2 + pandn %xmm3,%xmm1 + movdqa %xmm4,%xmm3 + pand 352(%rsp),%xmm2 + pand 352+16(%rsp),%xmm3 + por %xmm0,%xmm2 + por %xmm1,%xmm3 + movdqu %xmm2,32(%rdi) + movdqu %xmm3,48(%rdi) + + addq $480+8,%rsp + popq %r15 + popq %r14 + popq %r13 + popq %r12 + popq %rbx + popq %rbp + .byte 0xf3,0xc3 +.size ecp_nistz256_point_add_affine,.-ecp_nistz256_point_add_affine diff --git a/deps/openssl/asm_obsolete/x64-elf-gas/md5/md5-x86_64.s b/deps/openssl/asm_obsolete/x64-elf-gas/md5/md5-x86_64.s new file mode 100644 index 00000000000000..53f44ff5f180bc --- /dev/null +++ b/deps/openssl/asm_obsolete/x64-elf-gas/md5/md5-x86_64.s @@ -0,0 +1,668 @@ +.text +.align 16 + +.globl md5_block_asm_data_order +.type md5_block_asm_data_order,@function +md5_block_asm_data_order: + pushq %rbp + pushq %rbx + pushq %r12 + pushq %r14 + pushq %r15 +.Lprologue: + + + + + movq %rdi,%rbp + shlq $6,%rdx + leaq (%rsi,%rdx,1),%rdi + movl 0(%rbp),%eax + movl 4(%rbp),%ebx + movl 8(%rbp),%ecx + movl 12(%rbp),%edx + + + + + + + + cmpq %rdi,%rsi + je .Lend + + +.Lloop: + movl %eax,%r8d + movl %ebx,%r9d + movl %ecx,%r14d + movl %edx,%r15d + movl 0(%rsi),%r10d + movl %edx,%r11d + xorl %ecx,%r11d + leal -680876936(%rax,%r10,1),%eax + andl %ebx,%r11d + xorl %edx,%r11d + movl 4(%rsi),%r10d + addl %r11d,%eax + roll $7,%eax + movl %ecx,%r11d + addl %ebx,%eax + xorl %ebx,%r11d + leal -389564586(%rdx,%r10,1),%edx + andl %eax,%r11d + xorl %ecx,%r11d + movl 8(%rsi),%r10d + addl %r11d,%edx + roll $12,%edx + movl %ebx,%r11d + addl %eax,%edx + xorl %eax,%r11d + leal 606105819(%rcx,%r10,1),%ecx + andl %edx,%r11d + xorl %ebx,%r11d + movl 12(%rsi),%r10d + addl %r11d,%ecx + roll $17,%ecx + movl %eax,%r11d + addl %edx,%ecx + xorl %edx,%r11d + leal -1044525330(%rbx,%r10,1),%ebx + andl %ecx,%r11d + xorl %eax,%r11d + movl 16(%rsi),%r10d + addl %r11d,%ebx + roll $22,%ebx + movl %edx,%r11d + addl %ecx,%ebx + xorl %ecx,%r11d + leal -176418897(%rax,%r10,1),%eax + andl %ebx,%r11d + xorl %edx,%r11d + movl 20(%rsi),%r10d + addl %r11d,%eax + roll $7,%eax + movl %ecx,%r11d + addl %ebx,%eax + xorl %ebx,%r11d + leal 1200080426(%rdx,%r10,1),%edx + andl %eax,%r11d + xorl %ecx,%r11d + movl 24(%rsi),%r10d + addl %r11d,%edx + roll $12,%edx + movl %ebx,%r11d + addl %eax,%edx + xorl %eax,%r11d + leal -1473231341(%rcx,%r10,1),%ecx + andl %edx,%r11d + xorl %ebx,%r11d + movl 28(%rsi),%r10d + addl %r11d,%ecx + roll $17,%ecx + movl %eax,%r11d + addl %edx,%ecx + xorl %edx,%r11d + leal -45705983(%rbx,%r10,1),%ebx + andl %ecx,%r11d + xorl %eax,%r11d + movl 32(%rsi),%r10d + addl %r11d,%ebx + roll $22,%ebx + movl %edx,%r11d + addl %ecx,%ebx + xorl %ecx,%r11d + leal 1770035416(%rax,%r10,1),%eax + andl %ebx,%r11d + xorl %edx,%r11d + movl 36(%rsi),%r10d + addl %r11d,%eax + roll $7,%eax + movl %ecx,%r11d + addl %ebx,%eax + xorl %ebx,%r11d + leal -1958414417(%rdx,%r10,1),%edx + andl %eax,%r11d + xorl %ecx,%r11d + movl 40(%rsi),%r10d + addl %r11d,%edx + roll $12,%edx + movl %ebx,%r11d + addl %eax,%edx + xorl %eax,%r11d + leal -42063(%rcx,%r10,1),%ecx + andl %edx,%r11d + xorl %ebx,%r11d + movl 44(%rsi),%r10d + addl %r11d,%ecx + roll $17,%ecx + movl %eax,%r11d + addl %edx,%ecx + xorl %edx,%r11d + leal -1990404162(%rbx,%r10,1),%ebx + andl %ecx,%r11d + xorl %eax,%r11d + movl 48(%rsi),%r10d + addl %r11d,%ebx + roll $22,%ebx + movl %edx,%r11d + addl %ecx,%ebx + xorl %ecx,%r11d + leal 1804603682(%rax,%r10,1),%eax + andl %ebx,%r11d + xorl %edx,%r11d + movl 52(%rsi),%r10d + addl %r11d,%eax + roll $7,%eax + movl %ecx,%r11d + addl %ebx,%eax + xorl %ebx,%r11d + leal -40341101(%rdx,%r10,1),%edx + andl %eax,%r11d + xorl %ecx,%r11d + movl 56(%rsi),%r10d + addl %r11d,%edx + roll $12,%edx + movl %ebx,%r11d + addl %eax,%edx + xorl %eax,%r11d + leal -1502002290(%rcx,%r10,1),%ecx + andl %edx,%r11d + xorl %ebx,%r11d + movl 60(%rsi),%r10d + addl %r11d,%ecx + roll $17,%ecx + movl %eax,%r11d + addl %edx,%ecx + xorl %edx,%r11d + leal 1236535329(%rbx,%r10,1),%ebx + andl %ecx,%r11d + xorl %eax,%r11d + movl 0(%rsi),%r10d + addl %r11d,%ebx + roll $22,%ebx + movl %edx,%r11d + addl %ecx,%ebx + movl 4(%rsi),%r10d + movl %edx,%r11d + movl %edx,%r12d + notl %r11d + leal -165796510(%rax,%r10,1),%eax + andl %ebx,%r12d + andl %ecx,%r11d + movl 24(%rsi),%r10d + orl %r11d,%r12d + movl %ecx,%r11d + addl %r12d,%eax + movl %ecx,%r12d + roll $5,%eax + addl %ebx,%eax + notl %r11d + leal -1069501632(%rdx,%r10,1),%edx + andl %eax,%r12d + andl %ebx,%r11d + movl 44(%rsi),%r10d + orl %r11d,%r12d + movl %ebx,%r11d + addl %r12d,%edx + movl %ebx,%r12d + roll $9,%edx + addl %eax,%edx + notl %r11d + leal 643717713(%rcx,%r10,1),%ecx + andl %edx,%r12d + andl %eax,%r11d + movl 0(%rsi),%r10d + orl %r11d,%r12d + movl %eax,%r11d + addl %r12d,%ecx + movl %eax,%r12d + roll $14,%ecx + addl %edx,%ecx + notl %r11d + leal -373897302(%rbx,%r10,1),%ebx + andl %ecx,%r12d + andl %edx,%r11d + movl 20(%rsi),%r10d + orl %r11d,%r12d + movl %edx,%r11d + addl %r12d,%ebx + movl %edx,%r12d + roll $20,%ebx + addl %ecx,%ebx + notl %r11d + leal -701558691(%rax,%r10,1),%eax + andl %ebx,%r12d + andl %ecx,%r11d + movl 40(%rsi),%r10d + orl %r11d,%r12d + movl %ecx,%r11d + addl %r12d,%eax + movl %ecx,%r12d + roll $5,%eax + addl %ebx,%eax + notl %r11d + leal 38016083(%rdx,%r10,1),%edx + andl %eax,%r12d + andl %ebx,%r11d + movl 60(%rsi),%r10d + orl %r11d,%r12d + movl %ebx,%r11d + addl %r12d,%edx + movl %ebx,%r12d + roll $9,%edx + addl %eax,%edx + notl %r11d + leal -660478335(%rcx,%r10,1),%ecx + andl %edx,%r12d + andl %eax,%r11d + movl 16(%rsi),%r10d + orl %r11d,%r12d + movl %eax,%r11d + addl %r12d,%ecx + movl %eax,%r12d + roll $14,%ecx + addl %edx,%ecx + notl %r11d + leal -405537848(%rbx,%r10,1),%ebx + andl %ecx,%r12d + andl %edx,%r11d + movl 36(%rsi),%r10d + orl %r11d,%r12d + movl %edx,%r11d + addl %r12d,%ebx + movl %edx,%r12d + roll $20,%ebx + addl %ecx,%ebx + notl %r11d + leal 568446438(%rax,%r10,1),%eax + andl %ebx,%r12d + andl %ecx,%r11d + movl 56(%rsi),%r10d + orl %r11d,%r12d + movl %ecx,%r11d + addl %r12d,%eax + movl %ecx,%r12d + roll $5,%eax + addl %ebx,%eax + notl %r11d + leal -1019803690(%rdx,%r10,1),%edx + andl %eax,%r12d + andl %ebx,%r11d + movl 12(%rsi),%r10d + orl %r11d,%r12d + movl %ebx,%r11d + addl %r12d,%edx + movl %ebx,%r12d + roll $9,%edx + addl %eax,%edx + notl %r11d + leal -187363961(%rcx,%r10,1),%ecx + andl %edx,%r12d + andl %eax,%r11d + movl 32(%rsi),%r10d + orl %r11d,%r12d + movl %eax,%r11d + addl %r12d,%ecx + movl %eax,%r12d + roll $14,%ecx + addl %edx,%ecx + notl %r11d + leal 1163531501(%rbx,%r10,1),%ebx + andl %ecx,%r12d + andl %edx,%r11d + movl 52(%rsi),%r10d + orl %r11d,%r12d + movl %edx,%r11d + addl %r12d,%ebx + movl %edx,%r12d + roll $20,%ebx + addl %ecx,%ebx + notl %r11d + leal -1444681467(%rax,%r10,1),%eax + andl %ebx,%r12d + andl %ecx,%r11d + movl 8(%rsi),%r10d + orl %r11d,%r12d + movl %ecx,%r11d + addl %r12d,%eax + movl %ecx,%r12d + roll $5,%eax + addl %ebx,%eax + notl %r11d + leal -51403784(%rdx,%r10,1),%edx + andl %eax,%r12d + andl %ebx,%r11d + movl 28(%rsi),%r10d + orl %r11d,%r12d + movl %ebx,%r11d + addl %r12d,%edx + movl %ebx,%r12d + roll $9,%edx + addl %eax,%edx + notl %r11d + leal 1735328473(%rcx,%r10,1),%ecx + andl %edx,%r12d + andl %eax,%r11d + movl 48(%rsi),%r10d + orl %r11d,%r12d + movl %eax,%r11d + addl %r12d,%ecx + movl %eax,%r12d + roll $14,%ecx + addl %edx,%ecx + notl %r11d + leal -1926607734(%rbx,%r10,1),%ebx + andl %ecx,%r12d + andl %edx,%r11d + movl 0(%rsi),%r10d + orl %r11d,%r12d + movl %edx,%r11d + addl %r12d,%ebx + movl %edx,%r12d + roll $20,%ebx + addl %ecx,%ebx + movl 20(%rsi),%r10d + movl %ecx,%r11d + leal -378558(%rax,%r10,1),%eax + movl 32(%rsi),%r10d + xorl %edx,%r11d + xorl %ebx,%r11d + addl %r11d,%eax + roll $4,%eax + movl %ebx,%r11d + addl %ebx,%eax + leal -2022574463(%rdx,%r10,1),%edx + movl 44(%rsi),%r10d + xorl %ecx,%r11d + xorl %eax,%r11d + addl %r11d,%edx + roll $11,%edx + movl %eax,%r11d + addl %eax,%edx + leal 1839030562(%rcx,%r10,1),%ecx + movl 56(%rsi),%r10d + xorl %ebx,%r11d + xorl %edx,%r11d + addl %r11d,%ecx + roll $16,%ecx + movl %edx,%r11d + addl %edx,%ecx + leal -35309556(%rbx,%r10,1),%ebx + movl 4(%rsi),%r10d + xorl %eax,%r11d + xorl %ecx,%r11d + addl %r11d,%ebx + roll $23,%ebx + movl %ecx,%r11d + addl %ecx,%ebx + leal -1530992060(%rax,%r10,1),%eax + movl 16(%rsi),%r10d + xorl %edx,%r11d + xorl %ebx,%r11d + addl %r11d,%eax + roll $4,%eax + movl %ebx,%r11d + addl %ebx,%eax + leal 1272893353(%rdx,%r10,1),%edx + movl 28(%rsi),%r10d + xorl %ecx,%r11d + xorl %eax,%r11d + addl %r11d,%edx + roll $11,%edx + movl %eax,%r11d + addl %eax,%edx + leal -155497632(%rcx,%r10,1),%ecx + movl 40(%rsi),%r10d + xorl %ebx,%r11d + xorl %edx,%r11d + addl %r11d,%ecx + roll $16,%ecx + movl %edx,%r11d + addl %edx,%ecx + leal -1094730640(%rbx,%r10,1),%ebx + movl 52(%rsi),%r10d + xorl %eax,%r11d + xorl %ecx,%r11d + addl %r11d,%ebx + roll $23,%ebx + movl %ecx,%r11d + addl %ecx,%ebx + leal 681279174(%rax,%r10,1),%eax + movl 0(%rsi),%r10d + xorl %edx,%r11d + xorl %ebx,%r11d + addl %r11d,%eax + roll $4,%eax + movl %ebx,%r11d + addl %ebx,%eax + leal -358537222(%rdx,%r10,1),%edx + movl 12(%rsi),%r10d + xorl %ecx,%r11d + xorl %eax,%r11d + addl %r11d,%edx + roll $11,%edx + movl %eax,%r11d + addl %eax,%edx + leal -722521979(%rcx,%r10,1),%ecx + movl 24(%rsi),%r10d + xorl %ebx,%r11d + xorl %edx,%r11d + addl %r11d,%ecx + roll $16,%ecx + movl %edx,%r11d + addl %edx,%ecx + leal 76029189(%rbx,%r10,1),%ebx + movl 36(%rsi),%r10d + xorl %eax,%r11d + xorl %ecx,%r11d + addl %r11d,%ebx + roll $23,%ebx + movl %ecx,%r11d + addl %ecx,%ebx + leal -640364487(%rax,%r10,1),%eax + movl 48(%rsi),%r10d + xorl %edx,%r11d + xorl %ebx,%r11d + addl %r11d,%eax + roll $4,%eax + movl %ebx,%r11d + addl %ebx,%eax + leal -421815835(%rdx,%r10,1),%edx + movl 60(%rsi),%r10d + xorl %ecx,%r11d + xorl %eax,%r11d + addl %r11d,%edx + roll $11,%edx + movl %eax,%r11d + addl %eax,%edx + leal 530742520(%rcx,%r10,1),%ecx + movl 8(%rsi),%r10d + xorl %ebx,%r11d + xorl %edx,%r11d + addl %r11d,%ecx + roll $16,%ecx + movl %edx,%r11d + addl %edx,%ecx + leal -995338651(%rbx,%r10,1),%ebx + movl 0(%rsi),%r10d + xorl %eax,%r11d + xorl %ecx,%r11d + addl %r11d,%ebx + roll $23,%ebx + movl %ecx,%r11d + addl %ecx,%ebx + movl 0(%rsi),%r10d + movl $4294967295,%r11d + xorl %edx,%r11d + leal -198630844(%rax,%r10,1),%eax + orl %ebx,%r11d + xorl %ecx,%r11d + addl %r11d,%eax + movl 28(%rsi),%r10d + movl $4294967295,%r11d + roll $6,%eax + xorl %ecx,%r11d + addl %ebx,%eax + leal 1126891415(%rdx,%r10,1),%edx + orl %eax,%r11d + xorl %ebx,%r11d + addl %r11d,%edx + movl 56(%rsi),%r10d + movl $4294967295,%r11d + roll $10,%edx + xorl %ebx,%r11d + addl %eax,%edx + leal -1416354905(%rcx,%r10,1),%ecx + orl %edx,%r11d + xorl %eax,%r11d + addl %r11d,%ecx + movl 20(%rsi),%r10d + movl $4294967295,%r11d + roll $15,%ecx + xorl %eax,%r11d + addl %edx,%ecx + leal -57434055(%rbx,%r10,1),%ebx + orl %ecx,%r11d + xorl %edx,%r11d + addl %r11d,%ebx + movl 48(%rsi),%r10d + movl $4294967295,%r11d + roll $21,%ebx + xorl %edx,%r11d + addl %ecx,%ebx + leal 1700485571(%rax,%r10,1),%eax + orl %ebx,%r11d + xorl %ecx,%r11d + addl %r11d,%eax + movl 12(%rsi),%r10d + movl $4294967295,%r11d + roll $6,%eax + xorl %ecx,%r11d + addl %ebx,%eax + leal -1894986606(%rdx,%r10,1),%edx + orl %eax,%r11d + xorl %ebx,%r11d + addl %r11d,%edx + movl 40(%rsi),%r10d + movl $4294967295,%r11d + roll $10,%edx + xorl %ebx,%r11d + addl %eax,%edx + leal -1051523(%rcx,%r10,1),%ecx + orl %edx,%r11d + xorl %eax,%r11d + addl %r11d,%ecx + movl 4(%rsi),%r10d + movl $4294967295,%r11d + roll $15,%ecx + xorl %eax,%r11d + addl %edx,%ecx + leal -2054922799(%rbx,%r10,1),%ebx + orl %ecx,%r11d + xorl %edx,%r11d + addl %r11d,%ebx + movl 32(%rsi),%r10d + movl $4294967295,%r11d + roll $21,%ebx + xorl %edx,%r11d + addl %ecx,%ebx + leal 1873313359(%rax,%r10,1),%eax + orl %ebx,%r11d + xorl %ecx,%r11d + addl %r11d,%eax + movl 60(%rsi),%r10d + movl $4294967295,%r11d + roll $6,%eax + xorl %ecx,%r11d + addl %ebx,%eax + leal -30611744(%rdx,%r10,1),%edx + orl %eax,%r11d + xorl %ebx,%r11d + addl %r11d,%edx + movl 24(%rsi),%r10d + movl $4294967295,%r11d + roll $10,%edx + xorl %ebx,%r11d + addl %eax,%edx + leal -1560198380(%rcx,%r10,1),%ecx + orl %edx,%r11d + xorl %eax,%r11d + addl %r11d,%ecx + movl 52(%rsi),%r10d + movl $4294967295,%r11d + roll $15,%ecx + xorl %eax,%r11d + addl %edx,%ecx + leal 1309151649(%rbx,%r10,1),%ebx + orl %ecx,%r11d + xorl %edx,%r11d + addl %r11d,%ebx + movl 16(%rsi),%r10d + movl $4294967295,%r11d + roll $21,%ebx + xorl %edx,%r11d + addl %ecx,%ebx + leal -145523070(%rax,%r10,1),%eax + orl %ebx,%r11d + xorl %ecx,%r11d + addl %r11d,%eax + movl 44(%rsi),%r10d + movl $4294967295,%r11d + roll $6,%eax + xorl %ecx,%r11d + addl %ebx,%eax + leal -1120210379(%rdx,%r10,1),%edx + orl %eax,%r11d + xorl %ebx,%r11d + addl %r11d,%edx + movl 8(%rsi),%r10d + movl $4294967295,%r11d + roll $10,%edx + xorl %ebx,%r11d + addl %eax,%edx + leal 718787259(%rcx,%r10,1),%ecx + orl %edx,%r11d + xorl %eax,%r11d + addl %r11d,%ecx + movl 36(%rsi),%r10d + movl $4294967295,%r11d + roll $15,%ecx + xorl %eax,%r11d + addl %edx,%ecx + leal -343485551(%rbx,%r10,1),%ebx + orl %ecx,%r11d + xorl %edx,%r11d + addl %r11d,%ebx + movl 0(%rsi),%r10d + movl $4294967295,%r11d + roll $21,%ebx + xorl %edx,%r11d + addl %ecx,%ebx + + addl %r8d,%eax + addl %r9d,%ebx + addl %r14d,%ecx + addl %r15d,%edx + + + addq $64,%rsi + cmpq %rdi,%rsi + jb .Lloop + + +.Lend: + movl %eax,0(%rbp) + movl %ebx,4(%rbp) + movl %ecx,8(%rbp) + movl %edx,12(%rbp) + + movq (%rsp),%r15 + movq 8(%rsp),%r14 + movq 16(%rsp),%r12 + movq 24(%rsp),%rbx + movq 32(%rsp),%rbp + addq $40,%rsp +.Lepilogue: + .byte 0xf3,0xc3 +.size md5_block_asm_data_order,.-md5_block_asm_data_order diff --git a/deps/openssl/asm_obsolete/x64-elf-gas/modes/aesni-gcm-x86_64.s b/deps/openssl/asm_obsolete/x64-elf-gas/modes/aesni-gcm-x86_64.s new file mode 100644 index 00000000000000..35ebd9b4e09076 --- /dev/null +++ b/deps/openssl/asm_obsolete/x64-elf-gas/modes/aesni-gcm-x86_64.s @@ -0,0 +1,15 @@ +.text + +.globl aesni_gcm_encrypt +.type aesni_gcm_encrypt,@function +aesni_gcm_encrypt: + xorl %eax,%eax + .byte 0xf3,0xc3 +.size aesni_gcm_encrypt,.-aesni_gcm_encrypt + +.globl aesni_gcm_decrypt +.type aesni_gcm_decrypt,@function +aesni_gcm_decrypt: + xorl %eax,%eax + .byte 0xf3,0xc3 +.size aesni_gcm_decrypt,.-aesni_gcm_decrypt diff --git a/deps/openssl/asm_obsolete/x64-elf-gas/modes/ghash-x86_64.s b/deps/openssl/asm_obsolete/x64-elf-gas/modes/ghash-x86_64.s new file mode 100644 index 00000000000000..462ef7fe739f2f --- /dev/null +++ b/deps/openssl/asm_obsolete/x64-elf-gas/modes/ghash-x86_64.s @@ -0,0 +1,1318 @@ +.text + + +.globl gcm_gmult_4bit +.type gcm_gmult_4bit,@function +.align 16 +gcm_gmult_4bit: + pushq %rbx + pushq %rbp + pushq %r12 +.Lgmult_prologue: + + movzbq 15(%rdi),%r8 + leaq .Lrem_4bit(%rip),%r11 + xorq %rax,%rax + xorq %rbx,%rbx + movb %r8b,%al + movb %r8b,%bl + shlb $4,%al + movq $14,%rcx + movq 8(%rsi,%rax,1),%r8 + movq (%rsi,%rax,1),%r9 + andb $240,%bl + movq %r8,%rdx + jmp .Loop1 + +.align 16 +.Loop1: + shrq $4,%r8 + andq $15,%rdx + movq %r9,%r10 + movb (%rdi,%rcx,1),%al + shrq $4,%r9 + xorq 8(%rsi,%rbx,1),%r8 + shlq $60,%r10 + xorq (%rsi,%rbx,1),%r9 + movb %al,%bl + xorq (%r11,%rdx,8),%r9 + movq %r8,%rdx + shlb $4,%al + xorq %r10,%r8 + decq %rcx + js .Lbreak1 + + shrq $4,%r8 + andq $15,%rdx + movq %r9,%r10 + shrq $4,%r9 + xorq 8(%rsi,%rax,1),%r8 + shlq $60,%r10 + xorq (%rsi,%rax,1),%r9 + andb $240,%bl + xorq (%r11,%rdx,8),%r9 + movq %r8,%rdx + xorq %r10,%r8 + jmp .Loop1 + +.align 16 +.Lbreak1: + shrq $4,%r8 + andq $15,%rdx + movq %r9,%r10 + shrq $4,%r9 + xorq 8(%rsi,%rax,1),%r8 + shlq $60,%r10 + xorq (%rsi,%rax,1),%r9 + andb $240,%bl + xorq (%r11,%rdx,8),%r9 + movq %r8,%rdx + xorq %r10,%r8 + + shrq $4,%r8 + andq $15,%rdx + movq %r9,%r10 + shrq $4,%r9 + xorq 8(%rsi,%rbx,1),%r8 + shlq $60,%r10 + xorq (%rsi,%rbx,1),%r9 + xorq %r10,%r8 + xorq (%r11,%rdx,8),%r9 + + bswapq %r8 + bswapq %r9 + movq %r8,8(%rdi) + movq %r9,(%rdi) + + movq 16(%rsp),%rbx + leaq 24(%rsp),%rsp +.Lgmult_epilogue: + .byte 0xf3,0xc3 +.size gcm_gmult_4bit,.-gcm_gmult_4bit +.globl gcm_ghash_4bit +.type gcm_ghash_4bit,@function +.align 16 +gcm_ghash_4bit: + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + subq $280,%rsp +.Lghash_prologue: + movq %rdx,%r14 + movq %rcx,%r15 + subq $-128,%rsi + leaq 16+128(%rsp),%rbp + xorl %edx,%edx + movq 0+0-128(%rsi),%r8 + movq 0+8-128(%rsi),%rax + movb %al,%dl + shrq $4,%rax + movq %r8,%r10 + shrq $4,%r8 + movq 16+0-128(%rsi),%r9 + shlb $4,%dl + movq 16+8-128(%rsi),%rbx + shlq $60,%r10 + movb %dl,0(%rsp) + orq %r10,%rax + movb %bl,%dl + shrq $4,%rbx + movq %r9,%r10 + shrq $4,%r9 + movq %r8,0(%rbp) + movq 32+0-128(%rsi),%r8 + shlb $4,%dl + movq %rax,0-128(%rbp) + movq 32+8-128(%rsi),%rax + shlq $60,%r10 + movb %dl,1(%rsp) + orq %r10,%rbx + movb %al,%dl + shrq $4,%rax + movq %r8,%r10 + shrq $4,%r8 + movq %r9,8(%rbp) + movq 48+0-128(%rsi),%r9 + shlb $4,%dl + movq %rbx,8-128(%rbp) + movq 48+8-128(%rsi),%rbx + shlq $60,%r10 + movb %dl,2(%rsp) + orq %r10,%rax + movb %bl,%dl + shrq $4,%rbx + movq %r9,%r10 + shrq $4,%r9 + movq %r8,16(%rbp) + movq 64+0-128(%rsi),%r8 + shlb $4,%dl + movq %rax,16-128(%rbp) + movq 64+8-128(%rsi),%rax + shlq $60,%r10 + movb %dl,3(%rsp) + orq %r10,%rbx + movb %al,%dl + shrq $4,%rax + movq %r8,%r10 + shrq $4,%r8 + movq %r9,24(%rbp) + movq 80+0-128(%rsi),%r9 + shlb $4,%dl + movq %rbx,24-128(%rbp) + movq 80+8-128(%rsi),%rbx + shlq $60,%r10 + movb %dl,4(%rsp) + orq %r10,%rax + movb %bl,%dl + shrq $4,%rbx + movq %r9,%r10 + shrq $4,%r9 + movq %r8,32(%rbp) + movq 96+0-128(%rsi),%r8 + shlb $4,%dl + movq %rax,32-128(%rbp) + movq 96+8-128(%rsi),%rax + shlq $60,%r10 + movb %dl,5(%rsp) + orq %r10,%rbx + movb %al,%dl + shrq $4,%rax + movq %r8,%r10 + shrq $4,%r8 + movq %r9,40(%rbp) + movq 112+0-128(%rsi),%r9 + shlb $4,%dl + movq %rbx,40-128(%rbp) + movq 112+8-128(%rsi),%rbx + shlq $60,%r10 + movb %dl,6(%rsp) + orq %r10,%rax + movb %bl,%dl + shrq $4,%rbx + movq %r9,%r10 + shrq $4,%r9 + movq %r8,48(%rbp) + movq 128+0-128(%rsi),%r8 + shlb $4,%dl + movq %rax,48-128(%rbp) + movq 128+8-128(%rsi),%rax + shlq $60,%r10 + movb %dl,7(%rsp) + orq %r10,%rbx + movb %al,%dl + shrq $4,%rax + movq %r8,%r10 + shrq $4,%r8 + movq %r9,56(%rbp) + movq 144+0-128(%rsi),%r9 + shlb $4,%dl + movq %rbx,56-128(%rbp) + movq 144+8-128(%rsi),%rbx + shlq $60,%r10 + movb %dl,8(%rsp) + orq %r10,%rax + movb %bl,%dl + shrq $4,%rbx + movq %r9,%r10 + shrq $4,%r9 + movq %r8,64(%rbp) + movq 160+0-128(%rsi),%r8 + shlb $4,%dl + movq %rax,64-128(%rbp) + movq 160+8-128(%rsi),%rax + shlq $60,%r10 + movb %dl,9(%rsp) + orq %r10,%rbx + movb %al,%dl + shrq $4,%rax + movq %r8,%r10 + shrq $4,%r8 + movq %r9,72(%rbp) + movq 176+0-128(%rsi),%r9 + shlb $4,%dl + movq %rbx,72-128(%rbp) + movq 176+8-128(%rsi),%rbx + shlq $60,%r10 + movb %dl,10(%rsp) + orq %r10,%rax + movb %bl,%dl + shrq $4,%rbx + movq %r9,%r10 + shrq $4,%r9 + movq %r8,80(%rbp) + movq 192+0-128(%rsi),%r8 + shlb $4,%dl + movq %rax,80-128(%rbp) + movq 192+8-128(%rsi),%rax + shlq $60,%r10 + movb %dl,11(%rsp) + orq %r10,%rbx + movb %al,%dl + shrq $4,%rax + movq %r8,%r10 + shrq $4,%r8 + movq %r9,88(%rbp) + movq 208+0-128(%rsi),%r9 + shlb $4,%dl + movq %rbx,88-128(%rbp) + movq 208+8-128(%rsi),%rbx + shlq $60,%r10 + movb %dl,12(%rsp) + orq %r10,%rax + movb %bl,%dl + shrq $4,%rbx + movq %r9,%r10 + shrq $4,%r9 + movq %r8,96(%rbp) + movq 224+0-128(%rsi),%r8 + shlb $4,%dl + movq %rax,96-128(%rbp) + movq 224+8-128(%rsi),%rax + shlq $60,%r10 + movb %dl,13(%rsp) + orq %r10,%rbx + movb %al,%dl + shrq $4,%rax + movq %r8,%r10 + shrq $4,%r8 + movq %r9,104(%rbp) + movq 240+0-128(%rsi),%r9 + shlb $4,%dl + movq %rbx,104-128(%rbp) + movq 240+8-128(%rsi),%rbx + shlq $60,%r10 + movb %dl,14(%rsp) + orq %r10,%rax + movb %bl,%dl + shrq $4,%rbx + movq %r9,%r10 + shrq $4,%r9 + movq %r8,112(%rbp) + shlb $4,%dl + movq %rax,112-128(%rbp) + shlq $60,%r10 + movb %dl,15(%rsp) + orq %r10,%rbx + movq %r9,120(%rbp) + movq %rbx,120-128(%rbp) + addq $-128,%rsi + movq 8(%rdi),%r8 + movq 0(%rdi),%r9 + addq %r14,%r15 + leaq .Lrem_8bit(%rip),%r11 + jmp .Louter_loop +.align 16 +.Louter_loop: + xorq (%r14),%r9 + movq 8(%r14),%rdx + leaq 16(%r14),%r14 + xorq %r8,%rdx + movq %r9,(%rdi) + movq %rdx,8(%rdi) + shrq $32,%rdx + xorq %rax,%rax + roll $8,%edx + movb %dl,%al + movzbl %dl,%ebx + shlb $4,%al + shrl $4,%ebx + roll $8,%edx + movq 8(%rsi,%rax,1),%r8 + movq (%rsi,%rax,1),%r9 + movb %dl,%al + movzbl %dl,%ecx + shlb $4,%al + movzbq (%rsp,%rbx,1),%r12 + shrl $4,%ecx + xorq %r8,%r12 + movq %r9,%r10 + shrq $8,%r8 + movzbq %r12b,%r12 + shrq $8,%r9 + xorq -128(%rbp,%rbx,8),%r8 + shlq $56,%r10 + xorq (%rbp,%rbx,8),%r9 + roll $8,%edx + xorq 8(%rsi,%rax,1),%r8 + xorq (%rsi,%rax,1),%r9 + movb %dl,%al + xorq %r10,%r8 + movzwq (%r11,%r12,2),%r12 + movzbl %dl,%ebx + shlb $4,%al + movzbq (%rsp,%rcx,1),%r13 + shrl $4,%ebx + shlq $48,%r12 + xorq %r8,%r13 + movq %r9,%r10 + xorq %r12,%r9 + shrq $8,%r8 + movzbq %r13b,%r13 + shrq $8,%r9 + xorq -128(%rbp,%rcx,8),%r8 + shlq $56,%r10 + xorq (%rbp,%rcx,8),%r9 + roll $8,%edx + xorq 8(%rsi,%rax,1),%r8 + xorq (%rsi,%rax,1),%r9 + movb %dl,%al + xorq %r10,%r8 + movzwq (%r11,%r13,2),%r13 + movzbl %dl,%ecx + shlb $4,%al + movzbq (%rsp,%rbx,1),%r12 + shrl $4,%ecx + shlq $48,%r13 + xorq %r8,%r12 + movq %r9,%r10 + xorq %r13,%r9 + shrq $8,%r8 + movzbq %r12b,%r12 + movl 8(%rdi),%edx + shrq $8,%r9 + xorq -128(%rbp,%rbx,8),%r8 + shlq $56,%r10 + xorq (%rbp,%rbx,8),%r9 + roll $8,%edx + xorq 8(%rsi,%rax,1),%r8 + xorq (%rsi,%rax,1),%r9 + movb %dl,%al + xorq %r10,%r8 + movzwq (%r11,%r12,2),%r12 + movzbl %dl,%ebx + shlb $4,%al + movzbq (%rsp,%rcx,1),%r13 + shrl $4,%ebx + shlq $48,%r12 + xorq %r8,%r13 + movq %r9,%r10 + xorq %r12,%r9 + shrq $8,%r8 + movzbq %r13b,%r13 + shrq $8,%r9 + xorq -128(%rbp,%rcx,8),%r8 + shlq $56,%r10 + xorq (%rbp,%rcx,8),%r9 + roll $8,%edx + xorq 8(%rsi,%rax,1),%r8 + xorq (%rsi,%rax,1),%r9 + movb %dl,%al + xorq %r10,%r8 + movzwq (%r11,%r13,2),%r13 + movzbl %dl,%ecx + shlb $4,%al + movzbq (%rsp,%rbx,1),%r12 + shrl $4,%ecx + shlq $48,%r13 + xorq %r8,%r12 + movq %r9,%r10 + xorq %r13,%r9 + shrq $8,%r8 + movzbq %r12b,%r12 + shrq $8,%r9 + xorq -128(%rbp,%rbx,8),%r8 + shlq $56,%r10 + xorq (%rbp,%rbx,8),%r9 + roll $8,%edx + xorq 8(%rsi,%rax,1),%r8 + xorq (%rsi,%rax,1),%r9 + movb %dl,%al + xorq %r10,%r8 + movzwq (%r11,%r12,2),%r12 + movzbl %dl,%ebx + shlb $4,%al + movzbq (%rsp,%rcx,1),%r13 + shrl $4,%ebx + shlq $48,%r12 + xorq %r8,%r13 + movq %r9,%r10 + xorq %r12,%r9 + shrq $8,%r8 + movzbq %r13b,%r13 + shrq $8,%r9 + xorq -128(%rbp,%rcx,8),%r8 + shlq $56,%r10 + xorq (%rbp,%rcx,8),%r9 + roll $8,%edx + xorq 8(%rsi,%rax,1),%r8 + xorq (%rsi,%rax,1),%r9 + movb %dl,%al + xorq %r10,%r8 + movzwq (%r11,%r13,2),%r13 + movzbl %dl,%ecx + shlb $4,%al + movzbq (%rsp,%rbx,1),%r12 + shrl $4,%ecx + shlq $48,%r13 + xorq %r8,%r12 + movq %r9,%r10 + xorq %r13,%r9 + shrq $8,%r8 + movzbq %r12b,%r12 + movl 4(%rdi),%edx + shrq $8,%r9 + xorq -128(%rbp,%rbx,8),%r8 + shlq $56,%r10 + xorq (%rbp,%rbx,8),%r9 + roll $8,%edx + xorq 8(%rsi,%rax,1),%r8 + xorq (%rsi,%rax,1),%r9 + movb %dl,%al + xorq %r10,%r8 + movzwq (%r11,%r12,2),%r12 + movzbl %dl,%ebx + shlb $4,%al + movzbq (%rsp,%rcx,1),%r13 + shrl $4,%ebx + shlq $48,%r12 + xorq %r8,%r13 + movq %r9,%r10 + xorq %r12,%r9 + shrq $8,%r8 + movzbq %r13b,%r13 + shrq $8,%r9 + xorq -128(%rbp,%rcx,8),%r8 + shlq $56,%r10 + xorq (%rbp,%rcx,8),%r9 + roll $8,%edx + xorq 8(%rsi,%rax,1),%r8 + xorq (%rsi,%rax,1),%r9 + movb %dl,%al + xorq %r10,%r8 + movzwq (%r11,%r13,2),%r13 + movzbl %dl,%ecx + shlb $4,%al + movzbq (%rsp,%rbx,1),%r12 + shrl $4,%ecx + shlq $48,%r13 + xorq %r8,%r12 + movq %r9,%r10 + xorq %r13,%r9 + shrq $8,%r8 + movzbq %r12b,%r12 + shrq $8,%r9 + xorq -128(%rbp,%rbx,8),%r8 + shlq $56,%r10 + xorq (%rbp,%rbx,8),%r9 + roll $8,%edx + xorq 8(%rsi,%rax,1),%r8 + xorq (%rsi,%rax,1),%r9 + movb %dl,%al + xorq %r10,%r8 + movzwq (%r11,%r12,2),%r12 + movzbl %dl,%ebx + shlb $4,%al + movzbq (%rsp,%rcx,1),%r13 + shrl $4,%ebx + shlq $48,%r12 + xorq %r8,%r13 + movq %r9,%r10 + xorq %r12,%r9 + shrq $8,%r8 + movzbq %r13b,%r13 + shrq $8,%r9 + xorq -128(%rbp,%rcx,8),%r8 + shlq $56,%r10 + xorq (%rbp,%rcx,8),%r9 + roll $8,%edx + xorq 8(%rsi,%rax,1),%r8 + xorq (%rsi,%rax,1),%r9 + movb %dl,%al + xorq %r10,%r8 + movzwq (%r11,%r13,2),%r13 + movzbl %dl,%ecx + shlb $4,%al + movzbq (%rsp,%rbx,1),%r12 + shrl $4,%ecx + shlq $48,%r13 + xorq %r8,%r12 + movq %r9,%r10 + xorq %r13,%r9 + shrq $8,%r8 + movzbq %r12b,%r12 + movl 0(%rdi),%edx + shrq $8,%r9 + xorq -128(%rbp,%rbx,8),%r8 + shlq $56,%r10 + xorq (%rbp,%rbx,8),%r9 + roll $8,%edx + xorq 8(%rsi,%rax,1),%r8 + xorq (%rsi,%rax,1),%r9 + movb %dl,%al + xorq %r10,%r8 + movzwq (%r11,%r12,2),%r12 + movzbl %dl,%ebx + shlb $4,%al + movzbq (%rsp,%rcx,1),%r13 + shrl $4,%ebx + shlq $48,%r12 + xorq %r8,%r13 + movq %r9,%r10 + xorq %r12,%r9 + shrq $8,%r8 + movzbq %r13b,%r13 + shrq $8,%r9 + xorq -128(%rbp,%rcx,8),%r8 + shlq $56,%r10 + xorq (%rbp,%rcx,8),%r9 + roll $8,%edx + xorq 8(%rsi,%rax,1),%r8 + xorq (%rsi,%rax,1),%r9 + movb %dl,%al + xorq %r10,%r8 + movzwq (%r11,%r13,2),%r13 + movzbl %dl,%ecx + shlb $4,%al + movzbq (%rsp,%rbx,1),%r12 + shrl $4,%ecx + shlq $48,%r13 + xorq %r8,%r12 + movq %r9,%r10 + xorq %r13,%r9 + shrq $8,%r8 + movzbq %r12b,%r12 + shrq $8,%r9 + xorq -128(%rbp,%rbx,8),%r8 + shlq $56,%r10 + xorq (%rbp,%rbx,8),%r9 + roll $8,%edx + xorq 8(%rsi,%rax,1),%r8 + xorq (%rsi,%rax,1),%r9 + movb %dl,%al + xorq %r10,%r8 + movzwq (%r11,%r12,2),%r12 + movzbl %dl,%ebx + shlb $4,%al + movzbq (%rsp,%rcx,1),%r13 + shrl $4,%ebx + shlq $48,%r12 + xorq %r8,%r13 + movq %r9,%r10 + xorq %r12,%r9 + shrq $8,%r8 + movzbq %r13b,%r13 + shrq $8,%r9 + xorq -128(%rbp,%rcx,8),%r8 + shlq $56,%r10 + xorq (%rbp,%rcx,8),%r9 + roll $8,%edx + xorq 8(%rsi,%rax,1),%r8 + xorq (%rsi,%rax,1),%r9 + movb %dl,%al + xorq %r10,%r8 + movzwq (%r11,%r13,2),%r13 + movzbl %dl,%ecx + shlb $4,%al + movzbq (%rsp,%rbx,1),%r12 + andl $240,%ecx + shlq $48,%r13 + xorq %r8,%r12 + movq %r9,%r10 + xorq %r13,%r9 + shrq $8,%r8 + movzbq %r12b,%r12 + movl -4(%rdi),%edx + shrq $8,%r9 + xorq -128(%rbp,%rbx,8),%r8 + shlq $56,%r10 + xorq (%rbp,%rbx,8),%r9 + movzwq (%r11,%r12,2),%r12 + xorq 8(%rsi,%rax,1),%r8 + xorq (%rsi,%rax,1),%r9 + shlq $48,%r12 + xorq %r10,%r8 + xorq %r12,%r9 + movzbq %r8b,%r13 + shrq $4,%r8 + movq %r9,%r10 + shlb $4,%r13b + shrq $4,%r9 + xorq 8(%rsi,%rcx,1),%r8 + movzwq (%r11,%r13,2),%r13 + shlq $60,%r10 + xorq (%rsi,%rcx,1),%r9 + xorq %r10,%r8 + shlq $48,%r13 + bswapq %r8 + xorq %r13,%r9 + bswapq %r9 + cmpq %r15,%r14 + jb .Louter_loop + movq %r8,8(%rdi) + movq %r9,(%rdi) + + leaq 280(%rsp),%rsi + movq 0(%rsi),%r15 + movq 8(%rsi),%r14 + movq 16(%rsi),%r13 + movq 24(%rsi),%r12 + movq 32(%rsi),%rbp + movq 40(%rsi),%rbx + leaq 48(%rsi),%rsp +.Lghash_epilogue: + .byte 0xf3,0xc3 +.size gcm_ghash_4bit,.-gcm_ghash_4bit +.globl gcm_init_clmul +.type gcm_init_clmul,@function +.align 16 +gcm_init_clmul: +.L_init_clmul: + movdqu (%rsi),%xmm2 + pshufd $78,%xmm2,%xmm2 + + + pshufd $255,%xmm2,%xmm4 + movdqa %xmm2,%xmm3 + psllq $1,%xmm2 + pxor %xmm5,%xmm5 + psrlq $63,%xmm3 + pcmpgtd %xmm4,%xmm5 + pslldq $8,%xmm3 + por %xmm3,%xmm2 + + + pand .L0x1c2_polynomial(%rip),%xmm5 + pxor %xmm5,%xmm2 + + + pshufd $78,%xmm2,%xmm6 + movdqa %xmm2,%xmm0 + pxor %xmm2,%xmm6 + movdqa %xmm0,%xmm1 + pshufd $78,%xmm0,%xmm3 + pxor %xmm0,%xmm3 +.byte 102,15,58,68,194,0 +.byte 102,15,58,68,202,17 +.byte 102,15,58,68,222,0 + pxor %xmm0,%xmm3 + pxor %xmm1,%xmm3 + + movdqa %xmm3,%xmm4 + psrldq $8,%xmm3 + pslldq $8,%xmm4 + pxor %xmm3,%xmm1 + pxor %xmm4,%xmm0 + + movdqa %xmm0,%xmm4 + movdqa %xmm0,%xmm3 + psllq $5,%xmm0 + pxor %xmm0,%xmm3 + psllq $1,%xmm0 + pxor %xmm3,%xmm0 + psllq $57,%xmm0 + movdqa %xmm0,%xmm3 + pslldq $8,%xmm0 + psrldq $8,%xmm3 + pxor %xmm4,%xmm0 + pxor %xmm3,%xmm1 + + + movdqa %xmm0,%xmm4 + psrlq $1,%xmm0 + pxor %xmm4,%xmm1 + pxor %xmm0,%xmm4 + psrlq $5,%xmm0 + pxor %xmm4,%xmm0 + psrlq $1,%xmm0 + pxor %xmm1,%xmm0 + pshufd $78,%xmm2,%xmm3 + pshufd $78,%xmm0,%xmm4 + pxor %xmm2,%xmm3 + movdqu %xmm2,0(%rdi) + pxor %xmm0,%xmm4 + movdqu %xmm0,16(%rdi) +.byte 102,15,58,15,227,8 + movdqu %xmm4,32(%rdi) + movdqa %xmm0,%xmm1 + pshufd $78,%xmm0,%xmm3 + pxor %xmm0,%xmm3 +.byte 102,15,58,68,194,0 +.byte 102,15,58,68,202,17 +.byte 102,15,58,68,222,0 + pxor %xmm0,%xmm3 + pxor %xmm1,%xmm3 + + movdqa %xmm3,%xmm4 + psrldq $8,%xmm3 + pslldq $8,%xmm4 + pxor %xmm3,%xmm1 + pxor %xmm4,%xmm0 + + movdqa %xmm0,%xmm4 + movdqa %xmm0,%xmm3 + psllq $5,%xmm0 + pxor %xmm0,%xmm3 + psllq $1,%xmm0 + pxor %xmm3,%xmm0 + psllq $57,%xmm0 + movdqa %xmm0,%xmm3 + pslldq $8,%xmm0 + psrldq $8,%xmm3 + pxor %xmm4,%xmm0 + pxor %xmm3,%xmm1 + + + movdqa %xmm0,%xmm4 + psrlq $1,%xmm0 + pxor %xmm4,%xmm1 + pxor %xmm0,%xmm4 + psrlq $5,%xmm0 + pxor %xmm4,%xmm0 + psrlq $1,%xmm0 + pxor %xmm1,%xmm0 + movdqa %xmm0,%xmm5 + movdqa %xmm0,%xmm1 + pshufd $78,%xmm0,%xmm3 + pxor %xmm0,%xmm3 +.byte 102,15,58,68,194,0 +.byte 102,15,58,68,202,17 +.byte 102,15,58,68,222,0 + pxor %xmm0,%xmm3 + pxor %xmm1,%xmm3 + + movdqa %xmm3,%xmm4 + psrldq $8,%xmm3 + pslldq $8,%xmm4 + pxor %xmm3,%xmm1 + pxor %xmm4,%xmm0 + + movdqa %xmm0,%xmm4 + movdqa %xmm0,%xmm3 + psllq $5,%xmm0 + pxor %xmm0,%xmm3 + psllq $1,%xmm0 + pxor %xmm3,%xmm0 + psllq $57,%xmm0 + movdqa %xmm0,%xmm3 + pslldq $8,%xmm0 + psrldq $8,%xmm3 + pxor %xmm4,%xmm0 + pxor %xmm3,%xmm1 + + + movdqa %xmm0,%xmm4 + psrlq $1,%xmm0 + pxor %xmm4,%xmm1 + pxor %xmm0,%xmm4 + psrlq $5,%xmm0 + pxor %xmm4,%xmm0 + psrlq $1,%xmm0 + pxor %xmm1,%xmm0 + pshufd $78,%xmm5,%xmm3 + pshufd $78,%xmm0,%xmm4 + pxor %xmm5,%xmm3 + movdqu %xmm5,48(%rdi) + pxor %xmm0,%xmm4 + movdqu %xmm0,64(%rdi) +.byte 102,15,58,15,227,8 + movdqu %xmm4,80(%rdi) + .byte 0xf3,0xc3 +.size gcm_init_clmul,.-gcm_init_clmul +.globl gcm_gmult_clmul +.type gcm_gmult_clmul,@function +.align 16 +gcm_gmult_clmul: +.L_gmult_clmul: + movdqu (%rdi),%xmm0 + movdqa .Lbswap_mask(%rip),%xmm5 + movdqu (%rsi),%xmm2 + movdqu 32(%rsi),%xmm4 +.byte 102,15,56,0,197 + movdqa %xmm0,%xmm1 + pshufd $78,%xmm0,%xmm3 + pxor %xmm0,%xmm3 +.byte 102,15,58,68,194,0 +.byte 102,15,58,68,202,17 +.byte 102,15,58,68,220,0 + pxor %xmm0,%xmm3 + pxor %xmm1,%xmm3 + + movdqa %xmm3,%xmm4 + psrldq $8,%xmm3 + pslldq $8,%xmm4 + pxor %xmm3,%xmm1 + pxor %xmm4,%xmm0 + + movdqa %xmm0,%xmm4 + movdqa %xmm0,%xmm3 + psllq $5,%xmm0 + pxor %xmm0,%xmm3 + psllq $1,%xmm0 + pxor %xmm3,%xmm0 + psllq $57,%xmm0 + movdqa %xmm0,%xmm3 + pslldq $8,%xmm0 + psrldq $8,%xmm3 + pxor %xmm4,%xmm0 + pxor %xmm3,%xmm1 + + + movdqa %xmm0,%xmm4 + psrlq $1,%xmm0 + pxor %xmm4,%xmm1 + pxor %xmm0,%xmm4 + psrlq $5,%xmm0 + pxor %xmm4,%xmm0 + psrlq $1,%xmm0 + pxor %xmm1,%xmm0 +.byte 102,15,56,0,197 + movdqu %xmm0,(%rdi) + .byte 0xf3,0xc3 +.size gcm_gmult_clmul,.-gcm_gmult_clmul +.globl gcm_ghash_clmul +.type gcm_ghash_clmul,@function +.align 32 +gcm_ghash_clmul: +.L_ghash_clmul: + movdqa .Lbswap_mask(%rip),%xmm10 + + movdqu (%rdi),%xmm0 + movdqu (%rsi),%xmm2 + movdqu 32(%rsi),%xmm7 +.byte 102,65,15,56,0,194 + + subq $16,%rcx + jz .Lodd_tail + + movdqu 16(%rsi),%xmm6 + movl OPENSSL_ia32cap_P+4(%rip),%eax + cmpq $48,%rcx + jb .Lskip4x + + andl $71303168,%eax + cmpl $4194304,%eax + je .Lskip4x + + subq $48,%rcx + movq $11547335547999543296,%rax + movdqu 48(%rsi),%xmm14 + movdqu 64(%rsi),%xmm15 + + + + + movdqu 48(%rdx),%xmm3 + movdqu 32(%rdx),%xmm11 +.byte 102,65,15,56,0,218 +.byte 102,69,15,56,0,218 + movdqa %xmm3,%xmm5 + pshufd $78,%xmm3,%xmm4 + pxor %xmm3,%xmm4 +.byte 102,15,58,68,218,0 +.byte 102,15,58,68,234,17 +.byte 102,15,58,68,231,0 + + movdqa %xmm11,%xmm13 + pshufd $78,%xmm11,%xmm12 + pxor %xmm11,%xmm12 +.byte 102,68,15,58,68,222,0 +.byte 102,68,15,58,68,238,17 +.byte 102,68,15,58,68,231,16 + xorps %xmm11,%xmm3 + xorps %xmm13,%xmm5 + movups 80(%rsi),%xmm7 + xorps %xmm12,%xmm4 + + movdqu 16(%rdx),%xmm11 + movdqu 0(%rdx),%xmm8 +.byte 102,69,15,56,0,218 +.byte 102,69,15,56,0,194 + movdqa %xmm11,%xmm13 + pshufd $78,%xmm11,%xmm12 + pxor %xmm8,%xmm0 + pxor %xmm11,%xmm12 +.byte 102,69,15,58,68,222,0 + movdqa %xmm0,%xmm1 + pshufd $78,%xmm0,%xmm8 + pxor %xmm0,%xmm8 +.byte 102,69,15,58,68,238,17 +.byte 102,68,15,58,68,231,0 + xorps %xmm11,%xmm3 + xorps %xmm13,%xmm5 + + leaq 64(%rdx),%rdx + subq $64,%rcx + jc .Ltail4x + + jmp .Lmod4_loop +.align 32 +.Lmod4_loop: +.byte 102,65,15,58,68,199,0 + xorps %xmm12,%xmm4 + movdqu 48(%rdx),%xmm11 +.byte 102,69,15,56,0,218 +.byte 102,65,15,58,68,207,17 + xorps %xmm3,%xmm0 + movdqu 32(%rdx),%xmm3 + movdqa %xmm11,%xmm13 +.byte 102,68,15,58,68,199,16 + pshufd $78,%xmm11,%xmm12 + xorps %xmm5,%xmm1 + pxor %xmm11,%xmm12 +.byte 102,65,15,56,0,218 + movups 32(%rsi),%xmm7 + xorps %xmm4,%xmm8 +.byte 102,68,15,58,68,218,0 + pshufd $78,%xmm3,%xmm4 + + pxor %xmm0,%xmm8 + movdqa %xmm3,%xmm5 + pxor %xmm1,%xmm8 + pxor %xmm3,%xmm4 + movdqa %xmm8,%xmm9 +.byte 102,68,15,58,68,234,17 + pslldq $8,%xmm8 + psrldq $8,%xmm9 + pxor %xmm8,%xmm0 + movdqa .L7_mask(%rip),%xmm8 + pxor %xmm9,%xmm1 +.byte 102,76,15,110,200 + + pand %xmm0,%xmm8 +.byte 102,69,15,56,0,200 + pxor %xmm0,%xmm9 +.byte 102,68,15,58,68,231,0 + psllq $57,%xmm9 + movdqa %xmm9,%xmm8 + pslldq $8,%xmm9 +.byte 102,15,58,68,222,0 + psrldq $8,%xmm8 + pxor %xmm9,%xmm0 + pxor %xmm8,%xmm1 + movdqu 0(%rdx),%xmm8 + + movdqa %xmm0,%xmm9 + psrlq $1,%xmm0 +.byte 102,15,58,68,238,17 + xorps %xmm11,%xmm3 + movdqu 16(%rdx),%xmm11 +.byte 102,69,15,56,0,218 +.byte 102,15,58,68,231,16 + xorps %xmm13,%xmm5 + movups 80(%rsi),%xmm7 +.byte 102,69,15,56,0,194 + pxor %xmm9,%xmm1 + pxor %xmm0,%xmm9 + psrlq $5,%xmm0 + + movdqa %xmm11,%xmm13 + pxor %xmm12,%xmm4 + pshufd $78,%xmm11,%xmm12 + pxor %xmm9,%xmm0 + pxor %xmm8,%xmm1 + pxor %xmm11,%xmm12 +.byte 102,69,15,58,68,222,0 + psrlq $1,%xmm0 + pxor %xmm1,%xmm0 + movdqa %xmm0,%xmm1 +.byte 102,69,15,58,68,238,17 + xorps %xmm11,%xmm3 + pshufd $78,%xmm0,%xmm8 + pxor %xmm0,%xmm8 + +.byte 102,68,15,58,68,231,0 + xorps %xmm13,%xmm5 + + leaq 64(%rdx),%rdx + subq $64,%rcx + jnc .Lmod4_loop + +.Ltail4x: +.byte 102,65,15,58,68,199,0 +.byte 102,65,15,58,68,207,17 +.byte 102,68,15,58,68,199,16 + xorps %xmm12,%xmm4 + xorps %xmm3,%xmm0 + xorps %xmm5,%xmm1 + pxor %xmm0,%xmm1 + pxor %xmm4,%xmm8 + + pxor %xmm1,%xmm8 + pxor %xmm0,%xmm1 + + movdqa %xmm8,%xmm9 + psrldq $8,%xmm8 + pslldq $8,%xmm9 + pxor %xmm8,%xmm1 + pxor %xmm9,%xmm0 + + movdqa %xmm0,%xmm4 + movdqa %xmm0,%xmm3 + psllq $5,%xmm0 + pxor %xmm0,%xmm3 + psllq $1,%xmm0 + pxor %xmm3,%xmm0 + psllq $57,%xmm0 + movdqa %xmm0,%xmm3 + pslldq $8,%xmm0 + psrldq $8,%xmm3 + pxor %xmm4,%xmm0 + pxor %xmm3,%xmm1 + + + movdqa %xmm0,%xmm4 + psrlq $1,%xmm0 + pxor %xmm4,%xmm1 + pxor %xmm0,%xmm4 + psrlq $5,%xmm0 + pxor %xmm4,%xmm0 + psrlq $1,%xmm0 + pxor %xmm1,%xmm0 + addq $64,%rcx + jz .Ldone + movdqu 32(%rsi),%xmm7 + subq $16,%rcx + jz .Lodd_tail +.Lskip4x: + + + + + + movdqu (%rdx),%xmm8 + movdqu 16(%rdx),%xmm3 +.byte 102,69,15,56,0,194 +.byte 102,65,15,56,0,218 + pxor %xmm8,%xmm0 + + movdqa %xmm3,%xmm5 + pshufd $78,%xmm3,%xmm4 + pxor %xmm3,%xmm4 +.byte 102,15,58,68,218,0 +.byte 102,15,58,68,234,17 +.byte 102,15,58,68,231,0 + + leaq 32(%rdx),%rdx + nop + subq $32,%rcx + jbe .Leven_tail + nop + jmp .Lmod_loop + +.align 32 +.Lmod_loop: + movdqa %xmm0,%xmm1 + movdqa %xmm4,%xmm8 + pshufd $78,%xmm0,%xmm4 + pxor %xmm0,%xmm4 + +.byte 102,15,58,68,198,0 +.byte 102,15,58,68,206,17 +.byte 102,15,58,68,231,16 + + pxor %xmm3,%xmm0 + pxor %xmm5,%xmm1 + movdqu (%rdx),%xmm9 + pxor %xmm0,%xmm8 +.byte 102,69,15,56,0,202 + movdqu 16(%rdx),%xmm3 + + pxor %xmm1,%xmm8 + pxor %xmm9,%xmm1 + pxor %xmm8,%xmm4 +.byte 102,65,15,56,0,218 + movdqa %xmm4,%xmm8 + psrldq $8,%xmm8 + pslldq $8,%xmm4 + pxor %xmm8,%xmm1 + pxor %xmm4,%xmm0 + + movdqa %xmm3,%xmm5 + + movdqa %xmm0,%xmm9 + movdqa %xmm0,%xmm8 + psllq $5,%xmm0 + pxor %xmm0,%xmm8 +.byte 102,15,58,68,218,0 + psllq $1,%xmm0 + pxor %xmm8,%xmm0 + psllq $57,%xmm0 + movdqa %xmm0,%xmm8 + pslldq $8,%xmm0 + psrldq $8,%xmm8 + pxor %xmm9,%xmm0 + pshufd $78,%xmm5,%xmm4 + pxor %xmm8,%xmm1 + pxor %xmm5,%xmm4 + + movdqa %xmm0,%xmm9 + psrlq $1,%xmm0 +.byte 102,15,58,68,234,17 + pxor %xmm9,%xmm1 + pxor %xmm0,%xmm9 + psrlq $5,%xmm0 + pxor %xmm9,%xmm0 + leaq 32(%rdx),%rdx + psrlq $1,%xmm0 +.byte 102,15,58,68,231,0 + pxor %xmm1,%xmm0 + + subq $32,%rcx + ja .Lmod_loop + +.Leven_tail: + movdqa %xmm0,%xmm1 + movdqa %xmm4,%xmm8 + pshufd $78,%xmm0,%xmm4 + pxor %xmm0,%xmm4 + +.byte 102,15,58,68,198,0 +.byte 102,15,58,68,206,17 +.byte 102,15,58,68,231,16 + + pxor %xmm3,%xmm0 + pxor %xmm5,%xmm1 + pxor %xmm0,%xmm8 + pxor %xmm1,%xmm8 + pxor %xmm8,%xmm4 + movdqa %xmm4,%xmm8 + psrldq $8,%xmm8 + pslldq $8,%xmm4 + pxor %xmm8,%xmm1 + pxor %xmm4,%xmm0 + + movdqa %xmm0,%xmm4 + movdqa %xmm0,%xmm3 + psllq $5,%xmm0 + pxor %xmm0,%xmm3 + psllq $1,%xmm0 + pxor %xmm3,%xmm0 + psllq $57,%xmm0 + movdqa %xmm0,%xmm3 + pslldq $8,%xmm0 + psrldq $8,%xmm3 + pxor %xmm4,%xmm0 + pxor %xmm3,%xmm1 + + + movdqa %xmm0,%xmm4 + psrlq $1,%xmm0 + pxor %xmm4,%xmm1 + pxor %xmm0,%xmm4 + psrlq $5,%xmm0 + pxor %xmm4,%xmm0 + psrlq $1,%xmm0 + pxor %xmm1,%xmm0 + testq %rcx,%rcx + jnz .Ldone + +.Lodd_tail: + movdqu (%rdx),%xmm8 +.byte 102,69,15,56,0,194 + pxor %xmm8,%xmm0 + movdqa %xmm0,%xmm1 + pshufd $78,%xmm0,%xmm3 + pxor %xmm0,%xmm3 +.byte 102,15,58,68,194,0 +.byte 102,15,58,68,202,17 +.byte 102,15,58,68,223,0 + pxor %xmm0,%xmm3 + pxor %xmm1,%xmm3 + + movdqa %xmm3,%xmm4 + psrldq $8,%xmm3 + pslldq $8,%xmm4 + pxor %xmm3,%xmm1 + pxor %xmm4,%xmm0 + + movdqa %xmm0,%xmm4 + movdqa %xmm0,%xmm3 + psllq $5,%xmm0 + pxor %xmm0,%xmm3 + psllq $1,%xmm0 + pxor %xmm3,%xmm0 + psllq $57,%xmm0 + movdqa %xmm0,%xmm3 + pslldq $8,%xmm0 + psrldq $8,%xmm3 + pxor %xmm4,%xmm0 + pxor %xmm3,%xmm1 + + + movdqa %xmm0,%xmm4 + psrlq $1,%xmm0 + pxor %xmm4,%xmm1 + pxor %xmm0,%xmm4 + psrlq $5,%xmm0 + pxor %xmm4,%xmm0 + psrlq $1,%xmm0 + pxor %xmm1,%xmm0 +.Ldone: +.byte 102,65,15,56,0,194 + movdqu %xmm0,(%rdi) + .byte 0xf3,0xc3 +.size gcm_ghash_clmul,.-gcm_ghash_clmul +.globl gcm_init_avx +.type gcm_init_avx,@function +.align 32 +gcm_init_avx: + jmp .L_init_clmul +.size gcm_init_avx,.-gcm_init_avx +.globl gcm_gmult_avx +.type gcm_gmult_avx,@function +.align 32 +gcm_gmult_avx: + jmp .L_gmult_clmul +.size gcm_gmult_avx,.-gcm_gmult_avx +.globl gcm_ghash_avx +.type gcm_ghash_avx,@function +.align 32 +gcm_ghash_avx: + jmp .L_ghash_clmul +.size gcm_ghash_avx,.-gcm_ghash_avx +.align 64 +.Lbswap_mask: +.byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0 +.L0x1c2_polynomial: +.byte 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xc2 +.L7_mask: +.long 7,0,7,0 +.L7_mask_poly: +.long 7,0,450,0 +.align 64 +.type .Lrem_4bit,@object +.Lrem_4bit: +.long 0,0,0,471859200,0,943718400,0,610271232 +.long 0,1887436800,0,1822425088,0,1220542464,0,1423966208 +.long 0,3774873600,0,4246732800,0,3644850176,0,3311403008 +.long 0,2441084928,0,2376073216,0,2847932416,0,3051356160 +.type .Lrem_8bit,@object +.Lrem_8bit: +.value 0x0000,0x01C2,0x0384,0x0246,0x0708,0x06CA,0x048C,0x054E +.value 0x0E10,0x0FD2,0x0D94,0x0C56,0x0918,0x08DA,0x0A9C,0x0B5E +.value 0x1C20,0x1DE2,0x1FA4,0x1E66,0x1B28,0x1AEA,0x18AC,0x196E +.value 0x1230,0x13F2,0x11B4,0x1076,0x1538,0x14FA,0x16BC,0x177E +.value 0x3840,0x3982,0x3BC4,0x3A06,0x3F48,0x3E8A,0x3CCC,0x3D0E +.value 0x3650,0x3792,0x35D4,0x3416,0x3158,0x309A,0x32DC,0x331E +.value 0x2460,0x25A2,0x27E4,0x2626,0x2368,0x22AA,0x20EC,0x212E +.value 0x2A70,0x2BB2,0x29F4,0x2836,0x2D78,0x2CBA,0x2EFC,0x2F3E +.value 0x7080,0x7142,0x7304,0x72C6,0x7788,0x764A,0x740C,0x75CE +.value 0x7E90,0x7F52,0x7D14,0x7CD6,0x7998,0x785A,0x7A1C,0x7BDE +.value 0x6CA0,0x6D62,0x6F24,0x6EE6,0x6BA8,0x6A6A,0x682C,0x69EE +.value 0x62B0,0x6372,0x6134,0x60F6,0x65B8,0x647A,0x663C,0x67FE +.value 0x48C0,0x4902,0x4B44,0x4A86,0x4FC8,0x4E0A,0x4C4C,0x4D8E +.value 0x46D0,0x4712,0x4554,0x4496,0x41D8,0x401A,0x425C,0x439E +.value 0x54E0,0x5522,0x5764,0x56A6,0x53E8,0x522A,0x506C,0x51AE +.value 0x5AF0,0x5B32,0x5974,0x58B6,0x5DF8,0x5C3A,0x5E7C,0x5FBE +.value 0xE100,0xE0C2,0xE284,0xE346,0xE608,0xE7CA,0xE58C,0xE44E +.value 0xEF10,0xEED2,0xEC94,0xED56,0xE818,0xE9DA,0xEB9C,0xEA5E +.value 0xFD20,0xFCE2,0xFEA4,0xFF66,0xFA28,0xFBEA,0xF9AC,0xF86E +.value 0xF330,0xF2F2,0xF0B4,0xF176,0xF438,0xF5FA,0xF7BC,0xF67E +.value 0xD940,0xD882,0xDAC4,0xDB06,0xDE48,0xDF8A,0xDDCC,0xDC0E +.value 0xD750,0xD692,0xD4D4,0xD516,0xD058,0xD19A,0xD3DC,0xD21E +.value 0xC560,0xC4A2,0xC6E4,0xC726,0xC268,0xC3AA,0xC1EC,0xC02E +.value 0xCB70,0xCAB2,0xC8F4,0xC936,0xCC78,0xCDBA,0xCFFC,0xCE3E +.value 0x9180,0x9042,0x9204,0x93C6,0x9688,0x974A,0x950C,0x94CE +.value 0x9F90,0x9E52,0x9C14,0x9DD6,0x9898,0x995A,0x9B1C,0x9ADE +.value 0x8DA0,0x8C62,0x8E24,0x8FE6,0x8AA8,0x8B6A,0x892C,0x88EE +.value 0x83B0,0x8272,0x8034,0x81F6,0x84B8,0x857A,0x873C,0x86FE +.value 0xA9C0,0xA802,0xAA44,0xAB86,0xAEC8,0xAF0A,0xAD4C,0xAC8E +.value 0xA7D0,0xA612,0xA454,0xA596,0xA0D8,0xA11A,0xA35C,0xA29E +.value 0xB5E0,0xB422,0xB664,0xB7A6,0xB2E8,0xB32A,0xB16C,0xB0AE +.value 0xBBF0,0xBA32,0xB874,0xB9B6,0xBCF8,0xBD3A,0xBF7C,0xBEBE + +.byte 71,72,65,83,72,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 +.align 64 diff --git a/deps/openssl/asm/x64-macosx-gas/rc4/rc4-md5-x86_64.s b/deps/openssl/asm_obsolete/x64-elf-gas/rc4/rc4-md5-x86_64.s similarity index 99% rename from deps/openssl/asm/x64-macosx-gas/rc4/rc4-md5-x86_64.s rename to deps/openssl/asm_obsolete/x64-elf-gas/rc4/rc4-md5-x86_64.s index 85f9905a840ed4..9c7110f4ef09c3 100644 --- a/deps/openssl/asm/x64-macosx-gas/rc4/rc4-md5-x86_64.s +++ b/deps/openssl/asm_obsolete/x64-elf-gas/rc4/rc4-md5-x86_64.s @@ -1,12 +1,11 @@ .text +.align 16 -.p2align 4 - -.globl _rc4_md5_enc - -_rc4_md5_enc: +.globl rc4_md5_enc +.type rc4_md5_enc,@function +rc4_md5_enc: cmpq $0,%r9 - je L$abort + je .Labort pushq %rbx pushq %rbp pushq %r12 @@ -14,7 +13,7 @@ _rc4_md5_enc: pushq %r14 pushq %r15 subq $40,%rsp -L$body: +.Lbody: movq %rcx,%r11 movq %r9,%r12 movq %rsi,%r13 @@ -41,10 +40,10 @@ L$body: movl 4(%r11),%r9d movl 8(%r11),%r10d movl 12(%r11),%r11d - jmp L$oop + jmp .Loop -.p2align 4 -L$oop: +.align 16 +.Loop: movl %r8d,0(%rsp) movl %r9d,4(%rsp) movl %r10d,8(%rsp) @@ -1235,7 +1234,7 @@ L$oop: leaq 64(%r15),%r15 leaq 64(%r13),%r13 cmpq 16(%rsp),%r15 - jb L$oop + jb .Loop movq 24(%rsp),%r12 subb %al,%cl @@ -1254,6 +1253,7 @@ L$oop: movq 72(%rsp),%rbp movq 80(%rsp),%rbx leaq 88(%rsp),%rsp -L$epilogue: -L$abort: +.Lepilogue: +.Labort: .byte 0xf3,0xc3 +.size rc4_md5_enc,.-rc4_md5_enc diff --git a/deps/openssl/asm/x64-macosx-gas/rc4/rc4-x86_64.s b/deps/openssl/asm_obsolete/x64-elf-gas/rc4/rc4-x86_64.s similarity index 84% rename from deps/openssl/asm/x64-macosx-gas/rc4/rc4-x86_64.s rename to deps/openssl/asm_obsolete/x64-elf-gas/rc4/rc4-x86_64.s index 8c4f29ecbb03cd..d52224afcfa806 100644 --- a/deps/openssl/asm/x64-macosx-gas/rc4/rc4-x86_64.s +++ b/deps/openssl/asm_obsolete/x64-elf-gas/rc4/rc4-x86_64.s @@ -1,18 +1,17 @@ .text - -.globl _RC4 - -.p2align 4 -_RC4: orq %rsi,%rsi - jne L$entry +.globl RC4 +.type RC4,@function +.align 16 +RC4: orq %rsi,%rsi + jne .Lentry .byte 0xf3,0xc3 -L$entry: +.Lentry: pushq %rbx pushq %r12 pushq %r13 -L$prologue: +.Lprologue: movq %rsi,%r11 movq %rdx,%r12 movq %rcx,%r13 @@ -23,22 +22,22 @@ L$prologue: movb -8(%rdi),%r10b movb -4(%rdi),%cl cmpl $-1,256(%rdi) - je L$RC4_CHAR - movl _OPENSSL_ia32cap_P(%rip),%r8d + je .LRC4_CHAR + movl OPENSSL_ia32cap_P(%rip),%r8d xorq %rbx,%rbx incb %r10b subq %r10,%rbx subq %r12,%r13 movl (%rdi,%r10,4),%eax testq $-16,%r11 - jz L$loop1 + jz .Lloop1 btl $30,%r8d - jc L$intel + jc .Lintel andq $7,%rbx leaq 1(%r10),%rsi - jz L$oop8 + jz .Loop8 subq %rbx,%r11 -L$oop8_warmup: +.Loop8_warmup: addb %al,%cl movl (%rdi,%rcx,4),%edx movl %eax,(%rdi,%rcx,4) @@ -48,15 +47,15 @@ L$oop8_warmup: movl (%rdi,%rax,4),%edx movl (%rdi,%r10,4),%eax xorb (%r12),%dl - movb %dl,(%r13,%r12,1) + movb %dl,(%r12,%r13,1) leaq 1(%r12),%r12 decq %rbx - jnz L$oop8_warmup + jnz .Loop8_warmup leaq 1(%r10),%rsi - jmp L$oop8 -.p2align 4 -L$oop8: + jmp .Loop8 +.align 16 +.Loop8: addb %al,%cl movl (%rdi,%rcx,4),%edx movl %eax,(%rdi,%rcx,4) @@ -127,23 +126,23 @@ L$oop8: subq $8,%r11 xorq (%r12),%r8 - movq %r8,(%r13,%r12,1) + movq %r8,(%r12,%r13,1) leaq 8(%r12),%r12 testq $-8,%r11 - jnz L$oop8 + jnz .Loop8 cmpq $0,%r11 - jne L$loop1 - jmp L$exit + jne .Lloop1 + jmp .Lexit -.p2align 4 -L$intel: +.align 16 +.Lintel: testq $-32,%r11 - jz L$loop1 + jz .Lloop1 andq $15,%rbx - jz L$oop16_is_hot + jz .Loop16_is_hot subq %rbx,%r11 -L$oop16_warmup: +.Loop16_warmup: addb %al,%cl movl (%rdi,%rcx,4),%edx movl %eax,(%rdi,%rcx,4) @@ -153,16 +152,16 @@ L$oop16_warmup: movl (%rdi,%rax,4),%edx movl (%rdi,%r10,4),%eax xorb (%r12),%dl - movb %dl,(%r13,%r12,1) + movb %dl,(%r12,%r13,1) leaq 1(%r12),%r12 decq %rbx - jnz L$oop16_warmup + jnz .Loop16_warmup movq %rcx,%rbx xorq %rcx,%rcx movb %bl,%cl -L$oop16_is_hot: +.Loop16_is_hot: leaq (%rdi,%r10,4),%rsi addb %al,%cl movl (%rdi,%rcx,4),%edx @@ -174,9 +173,9 @@ L$oop16_is_hot: movl %edx,0(%rsi) addb %bl,%cl pinsrw $0,(%rdi,%rax,4),%xmm0 - jmp L$oop16_enter -.p2align 4 -L$oop16: + jmp .Loop16_enter +.align 16 +.Loop16: addb %al,%cl movl (%rdi,%rcx,4),%edx pxor %xmm0,%xmm2 @@ -190,9 +189,9 @@ L$oop16: pxor %xmm1,%xmm2 addb %bl,%cl pinsrw $0,(%rdi,%rax,4),%xmm0 - movdqu %xmm2,(%r13,%r12,1) + movdqu %xmm2,(%r12,%r13,1) leaq 16(%r12),%r12 -L$oop16_enter: +.Loop16_enter: movl (%rdi,%rcx,4),%edx pxor %xmm1,%xmm1 movl %ebx,(%rdi,%rcx,4) @@ -321,20 +320,20 @@ L$oop16_enter: subq $16,%r11 movb %bl,%cl testq $-16,%r11 - jnz L$oop16 + jnz .Loop16 psllq $8,%xmm1 pxor %xmm0,%xmm2 pxor %xmm1,%xmm2 - movdqu %xmm2,(%r13,%r12,1) + movdqu %xmm2,(%r12,%r13,1) leaq 16(%r12),%r12 cmpq $0,%r11 - jne L$loop1 - jmp L$exit + jne .Lloop1 + jmp .Lexit -.p2align 4 -L$loop1: +.align 16 +.Lloop1: addb %al,%cl movl (%rdi,%rcx,4),%edx movl %eax,(%rdi,%rcx,4) @@ -344,21 +343,21 @@ L$loop1: movl (%rdi,%rax,4),%edx movl (%rdi,%r10,4),%eax xorb (%r12),%dl - movb %dl,(%r13,%r12,1) + movb %dl,(%r12,%r13,1) leaq 1(%r12),%r12 decq %r11 - jnz L$loop1 - jmp L$exit + jnz .Lloop1 + jmp .Lexit -.p2align 4 -L$RC4_CHAR: +.align 16 +.LRC4_CHAR: addb $1,%r10b movzbl (%rdi,%r10,1),%eax testq $-8,%r11 - jz L$cloop1 - jmp L$cloop8 -.p2align 4 -L$cloop8: + jz .Lcloop1 + jmp .Lcloop8 +.align 16 +.Lcloop8: movl (%r12),%r8d movl 4(%r12),%r9d addb %al,%cl @@ -369,10 +368,9 @@ L$cloop8: movb %al,(%rdi,%rcx,1) cmpq %rsi,%rcx movb %dl,(%rdi,%r10,1) - jne L$cmov0 - + jne .Lcmov0 movq %rax,%rbx -L$cmov0: +.Lcmov0: addb %al,%dl xorb (%rdi,%rdx,1),%r8b rorl $8,%r8d @@ -384,10 +382,9 @@ L$cmov0: movb %bl,(%rdi,%rcx,1) cmpq %r10,%rcx movb %dl,(%rdi,%rsi,1) - jne L$cmov1 - + jne .Lcmov1 movq %rbx,%rax -L$cmov1: +.Lcmov1: addb %bl,%dl xorb (%rdi,%rdx,1),%r8b rorl $8,%r8d @@ -399,10 +396,9 @@ L$cmov1: movb %al,(%rdi,%rcx,1) cmpq %rsi,%rcx movb %dl,(%rdi,%r10,1) - jne L$cmov2 - + jne .Lcmov2 movq %rax,%rbx -L$cmov2: +.Lcmov2: addb %al,%dl xorb (%rdi,%rdx,1),%r8b rorl $8,%r8d @@ -414,10 +410,9 @@ L$cmov2: movb %bl,(%rdi,%rcx,1) cmpq %r10,%rcx movb %dl,(%rdi,%rsi,1) - jne L$cmov3 - + jne .Lcmov3 movq %rbx,%rax -L$cmov3: +.Lcmov3: addb %bl,%dl xorb (%rdi,%rdx,1),%r8b rorl $8,%r8d @@ -429,10 +424,9 @@ L$cmov3: movb %al,(%rdi,%rcx,1) cmpq %rsi,%rcx movb %dl,(%rdi,%r10,1) - jne L$cmov4 - + jne .Lcmov4 movq %rax,%rbx -L$cmov4: +.Lcmov4: addb %al,%dl xorb (%rdi,%rdx,1),%r9b rorl $8,%r9d @@ -444,10 +438,9 @@ L$cmov4: movb %bl,(%rdi,%rcx,1) cmpq %r10,%rcx movb %dl,(%rdi,%rsi,1) - jne L$cmov5 - + jne .Lcmov5 movq %rbx,%rax -L$cmov5: +.Lcmov5: addb %bl,%dl xorb (%rdi,%rdx,1),%r9b rorl $8,%r9d @@ -459,10 +452,9 @@ L$cmov5: movb %al,(%rdi,%rcx,1) cmpq %rsi,%rcx movb %dl,(%rdi,%r10,1) - jne L$cmov6 - + jne .Lcmov6 movq %rax,%rbx -L$cmov6: +.Lcmov6: addb %al,%dl xorb (%rdi,%rdx,1),%r9b rorl $8,%r9d @@ -474,10 +466,9 @@ L$cmov6: movb %bl,(%rdi,%rcx,1) cmpq %r10,%rcx movb %dl,(%rdi,%rsi,1) - jne L$cmov7 - + jne .Lcmov7 movq %rbx,%rax -L$cmov7: +.Lcmov7: addb %bl,%dl xorb (%rdi,%rdx,1),%r9b rorl $8,%r9d @@ -488,12 +479,12 @@ L$cmov7: leaq 8(%r13),%r13 testq $-8,%r11 - jnz L$cloop8 + jnz .Lcloop8 cmpq $0,%r11 - jne L$cloop1 - jmp L$exit -.p2align 4 -L$cloop1: + jne .Lcloop1 + jmp .Lexit +.align 16 +.Lcloop1: addb %al,%cl movzbl %cl,%ecx movzbl (%rdi,%rcx,1),%edx @@ -510,11 +501,11 @@ L$cloop1: movb %dl,(%r13) leaq 1(%r13),%r13 subq $1,%r11 - jnz L$cloop1 - jmp L$exit + jnz .Lcloop1 + jmp .Lexit -.p2align 4 -L$exit: +.align 16 +.Lexit: subb $1,%r10b movl %r10d,-8(%rdi) movl %ecx,-4(%rdi) @@ -523,13 +514,13 @@ L$exit: movq 8(%rsp),%r12 movq 16(%rsp),%rbx addq $24,%rsp -L$epilogue: +.Lepilogue: .byte 0xf3,0xc3 - -.globl _private_RC4_set_key - -.p2align 4 -_private_RC4_set_key: +.size RC4,.-RC4 +.globl private_RC4_set_key +.type private_RC4_set_key,@function +.align 16 +private_RC4_set_key: leaq 8(%rdi),%rdi leaq (%rdx,%rsi,1),%rdx negq %rsi @@ -539,21 +530,21 @@ _private_RC4_set_key: xorq %r10,%r10 xorq %r11,%r11 - movl _OPENSSL_ia32cap_P(%rip),%r8d + movl OPENSSL_ia32cap_P(%rip),%r8d btl $20,%r8d - jc L$c1stloop - jmp L$w1stloop + jc .Lc1stloop + jmp .Lw1stloop -.p2align 4 -L$w1stloop: +.align 16 +.Lw1stloop: movl %eax,(%rdi,%rax,4) addb $1,%al - jnc L$w1stloop + jnc .Lw1stloop xorq %r9,%r9 xorq %r8,%r8 -.p2align 4 -L$w2ndloop: +.align 16 +.Lw2ndloop: movl (%rdi,%r9,4),%r10d addb (%rdx,%rsi,1),%r8b addb %r10b,%r8b @@ -563,61 +554,62 @@ L$w2ndloop: movl %r10d,(%rdi,%r8,4) movl %r11d,(%rdi,%r9,4) addb $1,%r9b - jnc L$w2ndloop - jmp L$exit_key + jnc .Lw2ndloop + jmp .Lexit_key -.p2align 4 -L$c1stloop: +.align 16 +.Lc1stloop: movb %al,(%rdi,%rax,1) addb $1,%al - jnc L$c1stloop + jnc .Lc1stloop xorq %r9,%r9 xorq %r8,%r8 -.p2align 4 -L$c2ndloop: +.align 16 +.Lc2ndloop: movb (%rdi,%r9,1),%r10b addb (%rdx,%rsi,1),%r8b addb %r10b,%r8b addq $1,%rsi movb (%rdi,%r8,1),%r11b - jnz L$cnowrap + jnz .Lcnowrap movq %rcx,%rsi -L$cnowrap: +.Lcnowrap: movb %r10b,(%rdi,%r8,1) movb %r11b,(%rdi,%r9,1) addb $1,%r9b - jnc L$c2ndloop + jnc .Lc2ndloop movl $-1,256(%rdi) -.p2align 4 -L$exit_key: +.align 16 +.Lexit_key: xorl %eax,%eax movl %eax,-8(%rdi) movl %eax,-4(%rdi) .byte 0xf3,0xc3 - - -.globl _RC4_options - -.p2align 4 -_RC4_options: - leaq L$opts(%rip),%rax - movl _OPENSSL_ia32cap_P(%rip),%edx +.size private_RC4_set_key,.-private_RC4_set_key + +.globl RC4_options +.type RC4_options,@function +.align 16 +RC4_options: + leaq .Lopts(%rip),%rax + movl OPENSSL_ia32cap_P(%rip),%edx btl $20,%edx - jc L$8xchar + jc .L8xchar btl $30,%edx - jnc L$done + jnc .Ldone addq $25,%rax .byte 0xf3,0xc3 -L$8xchar: +.L8xchar: addq $12,%rax -L$done: +.Ldone: .byte 0xf3,0xc3 -.p2align 6 -L$opts: +.align 64 +.Lopts: .byte 114,99,52,40,56,120,44,105,110,116,41,0 .byte 114,99,52,40,56,120,44,99,104,97,114,41,0 .byte 114,99,52,40,49,54,120,44,105,110,116,41,0 .byte 82,67,52,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 -.p2align 6 +.align 64 +.size RC4_options,.-RC4_options diff --git a/deps/openssl/asm_obsolete/x64-elf-gas/sha/sha1-mb-x86_64.s b/deps/openssl/asm_obsolete/x64-elf-gas/sha/sha1-mb-x86_64.s new file mode 100644 index 00000000000000..8a1e5e7b595379 --- /dev/null +++ b/deps/openssl/asm_obsolete/x64-elf-gas/sha/sha1-mb-x86_64.s @@ -0,0 +1,2934 @@ +.text + + + +.globl sha1_multi_block +.type sha1_multi_block,@function +.align 32 +sha1_multi_block: + movq OPENSSL_ia32cap_P+4(%rip),%rcx + btq $61,%rcx + jc _shaext_shortcut + movq %rsp,%rax + pushq %rbx + pushq %rbp + subq $288,%rsp + andq $-256,%rsp + movq %rax,272(%rsp) +.Lbody: + leaq K_XX_XX(%rip),%rbp + leaq 256(%rsp),%rbx + +.Loop_grande: + movl %edx,280(%rsp) + xorl %edx,%edx + movq 0(%rsi),%r8 + movl 8(%rsi),%ecx + cmpl %edx,%ecx + cmovgl %ecx,%edx + testl %ecx,%ecx + movl %ecx,0(%rbx) + cmovleq %rbp,%r8 + movq 16(%rsi),%r9 + movl 24(%rsi),%ecx + cmpl %edx,%ecx + cmovgl %ecx,%edx + testl %ecx,%ecx + movl %ecx,4(%rbx) + cmovleq %rbp,%r9 + movq 32(%rsi),%r10 + movl 40(%rsi),%ecx + cmpl %edx,%ecx + cmovgl %ecx,%edx + testl %ecx,%ecx + movl %ecx,8(%rbx) + cmovleq %rbp,%r10 + movq 48(%rsi),%r11 + movl 56(%rsi),%ecx + cmpl %edx,%ecx + cmovgl %ecx,%edx + testl %ecx,%ecx + movl %ecx,12(%rbx) + cmovleq %rbp,%r11 + testl %edx,%edx + jz .Ldone + + movdqu 0(%rdi),%xmm10 + leaq 128(%rsp),%rax + movdqu 32(%rdi),%xmm11 + movdqu 64(%rdi),%xmm12 + movdqu 96(%rdi),%xmm13 + movdqu 128(%rdi),%xmm14 + movdqa 96(%rbp),%xmm5 + movdqa -32(%rbp),%xmm15 + jmp .Loop + +.align 32 +.Loop: + movd (%r8),%xmm0 + leaq 64(%r8),%r8 + movd (%r9),%xmm2 + leaq 64(%r9),%r9 + movd (%r10),%xmm3 + leaq 64(%r10),%r10 + movd (%r11),%xmm4 + leaq 64(%r11),%r11 + punpckldq %xmm3,%xmm0 + movd -60(%r8),%xmm1 + punpckldq %xmm4,%xmm2 + movd -60(%r9),%xmm9 + punpckldq %xmm2,%xmm0 + movd -60(%r10),%xmm8 +.byte 102,15,56,0,197 + movd -60(%r11),%xmm7 + punpckldq %xmm8,%xmm1 + movdqa %xmm10,%xmm8 + paddd %xmm15,%xmm14 + punpckldq %xmm7,%xmm9 + movdqa %xmm11,%xmm7 + movdqa %xmm11,%xmm6 + pslld $5,%xmm8 + pandn %xmm13,%xmm7 + pand %xmm12,%xmm6 + punpckldq %xmm9,%xmm1 + movdqa %xmm10,%xmm9 + + movdqa %xmm0,0-128(%rax) + paddd %xmm0,%xmm14 + movd -56(%r8),%xmm2 + psrld $27,%xmm9 + pxor %xmm7,%xmm6 + movdqa %xmm11,%xmm7 + + por %xmm9,%xmm8 + movd -56(%r9),%xmm9 + pslld $30,%xmm7 + paddd %xmm6,%xmm14 + + psrld $2,%xmm11 + paddd %xmm8,%xmm14 +.byte 102,15,56,0,205 + movd -56(%r10),%xmm8 + por %xmm7,%xmm11 + movd -56(%r11),%xmm7 + punpckldq %xmm8,%xmm2 + movdqa %xmm14,%xmm8 + paddd %xmm15,%xmm13 + punpckldq %xmm7,%xmm9 + movdqa %xmm10,%xmm7 + movdqa %xmm10,%xmm6 + pslld $5,%xmm8 + pandn %xmm12,%xmm7 + pand %xmm11,%xmm6 + punpckldq %xmm9,%xmm2 + movdqa %xmm14,%xmm9 + + movdqa %xmm1,16-128(%rax) + paddd %xmm1,%xmm13 + movd -52(%r8),%xmm3 + psrld $27,%xmm9 + pxor %xmm7,%xmm6 + movdqa %xmm10,%xmm7 + + por %xmm9,%xmm8 + movd -52(%r9),%xmm9 + pslld $30,%xmm7 + paddd %xmm6,%xmm13 + + psrld $2,%xmm10 + paddd %xmm8,%xmm13 +.byte 102,15,56,0,213 + movd -52(%r10),%xmm8 + por %xmm7,%xmm10 + movd -52(%r11),%xmm7 + punpckldq %xmm8,%xmm3 + movdqa %xmm13,%xmm8 + paddd %xmm15,%xmm12 + punpckldq %xmm7,%xmm9 + movdqa %xmm14,%xmm7 + movdqa %xmm14,%xmm6 + pslld $5,%xmm8 + pandn %xmm11,%xmm7 + pand %xmm10,%xmm6 + punpckldq %xmm9,%xmm3 + movdqa %xmm13,%xmm9 + + movdqa %xmm2,32-128(%rax) + paddd %xmm2,%xmm12 + movd -48(%r8),%xmm4 + psrld $27,%xmm9 + pxor %xmm7,%xmm6 + movdqa %xmm14,%xmm7 + + por %xmm9,%xmm8 + movd -48(%r9),%xmm9 + pslld $30,%xmm7 + paddd %xmm6,%xmm12 + + psrld $2,%xmm14 + paddd %xmm8,%xmm12 +.byte 102,15,56,0,221 + movd -48(%r10),%xmm8 + por %xmm7,%xmm14 + movd -48(%r11),%xmm7 + punpckldq %xmm8,%xmm4 + movdqa %xmm12,%xmm8 + paddd %xmm15,%xmm11 + punpckldq %xmm7,%xmm9 + movdqa %xmm13,%xmm7 + movdqa %xmm13,%xmm6 + pslld $5,%xmm8 + pandn %xmm10,%xmm7 + pand %xmm14,%xmm6 + punpckldq %xmm9,%xmm4 + movdqa %xmm12,%xmm9 + + movdqa %xmm3,48-128(%rax) + paddd %xmm3,%xmm11 + movd -44(%r8),%xmm0 + psrld $27,%xmm9 + pxor %xmm7,%xmm6 + movdqa %xmm13,%xmm7 + + por %xmm9,%xmm8 + movd -44(%r9),%xmm9 + pslld $30,%xmm7 + paddd %xmm6,%xmm11 + + psrld $2,%xmm13 + paddd %xmm8,%xmm11 +.byte 102,15,56,0,229 + movd -44(%r10),%xmm8 + por %xmm7,%xmm13 + movd -44(%r11),%xmm7 + punpckldq %xmm8,%xmm0 + movdqa %xmm11,%xmm8 + paddd %xmm15,%xmm10 + punpckldq %xmm7,%xmm9 + movdqa %xmm12,%xmm7 + movdqa %xmm12,%xmm6 + pslld $5,%xmm8 + pandn %xmm14,%xmm7 + pand %xmm13,%xmm6 + punpckldq %xmm9,%xmm0 + movdqa %xmm11,%xmm9 + + movdqa %xmm4,64-128(%rax) + paddd %xmm4,%xmm10 + movd -40(%r8),%xmm1 + psrld $27,%xmm9 + pxor %xmm7,%xmm6 + movdqa %xmm12,%xmm7 + + por %xmm9,%xmm8 + movd -40(%r9),%xmm9 + pslld $30,%xmm7 + paddd %xmm6,%xmm10 + + psrld $2,%xmm12 + paddd %xmm8,%xmm10 +.byte 102,15,56,0,197 + movd -40(%r10),%xmm8 + por %xmm7,%xmm12 + movd -40(%r11),%xmm7 + punpckldq %xmm8,%xmm1 + movdqa %xmm10,%xmm8 + paddd %xmm15,%xmm14 + punpckldq %xmm7,%xmm9 + movdqa %xmm11,%xmm7 + movdqa %xmm11,%xmm6 + pslld $5,%xmm8 + pandn %xmm13,%xmm7 + pand %xmm12,%xmm6 + punpckldq %xmm9,%xmm1 + movdqa %xmm10,%xmm9 + + movdqa %xmm0,80-128(%rax) + paddd %xmm0,%xmm14 + movd -36(%r8),%xmm2 + psrld $27,%xmm9 + pxor %xmm7,%xmm6 + movdqa %xmm11,%xmm7 + + por %xmm9,%xmm8 + movd -36(%r9),%xmm9 + pslld $30,%xmm7 + paddd %xmm6,%xmm14 + + psrld $2,%xmm11 + paddd %xmm8,%xmm14 +.byte 102,15,56,0,205 + movd -36(%r10),%xmm8 + por %xmm7,%xmm11 + movd -36(%r11),%xmm7 + punpckldq %xmm8,%xmm2 + movdqa %xmm14,%xmm8 + paddd %xmm15,%xmm13 + punpckldq %xmm7,%xmm9 + movdqa %xmm10,%xmm7 + movdqa %xmm10,%xmm6 + pslld $5,%xmm8 + pandn %xmm12,%xmm7 + pand %xmm11,%xmm6 + punpckldq %xmm9,%xmm2 + movdqa %xmm14,%xmm9 + + movdqa %xmm1,96-128(%rax) + paddd %xmm1,%xmm13 + movd -32(%r8),%xmm3 + psrld $27,%xmm9 + pxor %xmm7,%xmm6 + movdqa %xmm10,%xmm7 + + por %xmm9,%xmm8 + movd -32(%r9),%xmm9 + pslld $30,%xmm7 + paddd %xmm6,%xmm13 + + psrld $2,%xmm10 + paddd %xmm8,%xmm13 +.byte 102,15,56,0,213 + movd -32(%r10),%xmm8 + por %xmm7,%xmm10 + movd -32(%r11),%xmm7 + punpckldq %xmm8,%xmm3 + movdqa %xmm13,%xmm8 + paddd %xmm15,%xmm12 + punpckldq %xmm7,%xmm9 + movdqa %xmm14,%xmm7 + movdqa %xmm14,%xmm6 + pslld $5,%xmm8 + pandn %xmm11,%xmm7 + pand %xmm10,%xmm6 + punpckldq %xmm9,%xmm3 + movdqa %xmm13,%xmm9 + + movdqa %xmm2,112-128(%rax) + paddd %xmm2,%xmm12 + movd -28(%r8),%xmm4 + psrld $27,%xmm9 + pxor %xmm7,%xmm6 + movdqa %xmm14,%xmm7 + + por %xmm9,%xmm8 + movd -28(%r9),%xmm9 + pslld $30,%xmm7 + paddd %xmm6,%xmm12 + + psrld $2,%xmm14 + paddd %xmm8,%xmm12 +.byte 102,15,56,0,221 + movd -28(%r10),%xmm8 + por %xmm7,%xmm14 + movd -28(%r11),%xmm7 + punpckldq %xmm8,%xmm4 + movdqa %xmm12,%xmm8 + paddd %xmm15,%xmm11 + punpckldq %xmm7,%xmm9 + movdqa %xmm13,%xmm7 + movdqa %xmm13,%xmm6 + pslld $5,%xmm8 + pandn %xmm10,%xmm7 + pand %xmm14,%xmm6 + punpckldq %xmm9,%xmm4 + movdqa %xmm12,%xmm9 + + movdqa %xmm3,128-128(%rax) + paddd %xmm3,%xmm11 + movd -24(%r8),%xmm0 + psrld $27,%xmm9 + pxor %xmm7,%xmm6 + movdqa %xmm13,%xmm7 + + por %xmm9,%xmm8 + movd -24(%r9),%xmm9 + pslld $30,%xmm7 + paddd %xmm6,%xmm11 + + psrld $2,%xmm13 + paddd %xmm8,%xmm11 +.byte 102,15,56,0,229 + movd -24(%r10),%xmm8 + por %xmm7,%xmm13 + movd -24(%r11),%xmm7 + punpckldq %xmm8,%xmm0 + movdqa %xmm11,%xmm8 + paddd %xmm15,%xmm10 + punpckldq %xmm7,%xmm9 + movdqa %xmm12,%xmm7 + movdqa %xmm12,%xmm6 + pslld $5,%xmm8 + pandn %xmm14,%xmm7 + pand %xmm13,%xmm6 + punpckldq %xmm9,%xmm0 + movdqa %xmm11,%xmm9 + + movdqa %xmm4,144-128(%rax) + paddd %xmm4,%xmm10 + movd -20(%r8),%xmm1 + psrld $27,%xmm9 + pxor %xmm7,%xmm6 + movdqa %xmm12,%xmm7 + + por %xmm9,%xmm8 + movd -20(%r9),%xmm9 + pslld $30,%xmm7 + paddd %xmm6,%xmm10 + + psrld $2,%xmm12 + paddd %xmm8,%xmm10 +.byte 102,15,56,0,197 + movd -20(%r10),%xmm8 + por %xmm7,%xmm12 + movd -20(%r11),%xmm7 + punpckldq %xmm8,%xmm1 + movdqa %xmm10,%xmm8 + paddd %xmm15,%xmm14 + punpckldq %xmm7,%xmm9 + movdqa %xmm11,%xmm7 + movdqa %xmm11,%xmm6 + pslld $5,%xmm8 + pandn %xmm13,%xmm7 + pand %xmm12,%xmm6 + punpckldq %xmm9,%xmm1 + movdqa %xmm10,%xmm9 + + movdqa %xmm0,160-128(%rax) + paddd %xmm0,%xmm14 + movd -16(%r8),%xmm2 + psrld $27,%xmm9 + pxor %xmm7,%xmm6 + movdqa %xmm11,%xmm7 + + por %xmm9,%xmm8 + movd -16(%r9),%xmm9 + pslld $30,%xmm7 + paddd %xmm6,%xmm14 + + psrld $2,%xmm11 + paddd %xmm8,%xmm14 +.byte 102,15,56,0,205 + movd -16(%r10),%xmm8 + por %xmm7,%xmm11 + movd -16(%r11),%xmm7 + punpckldq %xmm8,%xmm2 + movdqa %xmm14,%xmm8 + paddd %xmm15,%xmm13 + punpckldq %xmm7,%xmm9 + movdqa %xmm10,%xmm7 + movdqa %xmm10,%xmm6 + pslld $5,%xmm8 + pandn %xmm12,%xmm7 + pand %xmm11,%xmm6 + punpckldq %xmm9,%xmm2 + movdqa %xmm14,%xmm9 + + movdqa %xmm1,176-128(%rax) + paddd %xmm1,%xmm13 + movd -12(%r8),%xmm3 + psrld $27,%xmm9 + pxor %xmm7,%xmm6 + movdqa %xmm10,%xmm7 + + por %xmm9,%xmm8 + movd -12(%r9),%xmm9 + pslld $30,%xmm7 + paddd %xmm6,%xmm13 + + psrld $2,%xmm10 + paddd %xmm8,%xmm13 +.byte 102,15,56,0,213 + movd -12(%r10),%xmm8 + por %xmm7,%xmm10 + movd -12(%r11),%xmm7 + punpckldq %xmm8,%xmm3 + movdqa %xmm13,%xmm8 + paddd %xmm15,%xmm12 + punpckldq %xmm7,%xmm9 + movdqa %xmm14,%xmm7 + movdqa %xmm14,%xmm6 + pslld $5,%xmm8 + pandn %xmm11,%xmm7 + pand %xmm10,%xmm6 + punpckldq %xmm9,%xmm3 + movdqa %xmm13,%xmm9 + + movdqa %xmm2,192-128(%rax) + paddd %xmm2,%xmm12 + movd -8(%r8),%xmm4 + psrld $27,%xmm9 + pxor %xmm7,%xmm6 + movdqa %xmm14,%xmm7 + + por %xmm9,%xmm8 + movd -8(%r9),%xmm9 + pslld $30,%xmm7 + paddd %xmm6,%xmm12 + + psrld $2,%xmm14 + paddd %xmm8,%xmm12 +.byte 102,15,56,0,221 + movd -8(%r10),%xmm8 + por %xmm7,%xmm14 + movd -8(%r11),%xmm7 + punpckldq %xmm8,%xmm4 + movdqa %xmm12,%xmm8 + paddd %xmm15,%xmm11 + punpckldq %xmm7,%xmm9 + movdqa %xmm13,%xmm7 + movdqa %xmm13,%xmm6 + pslld $5,%xmm8 + pandn %xmm10,%xmm7 + pand %xmm14,%xmm6 + punpckldq %xmm9,%xmm4 + movdqa %xmm12,%xmm9 + + movdqa %xmm3,208-128(%rax) + paddd %xmm3,%xmm11 + movd -4(%r8),%xmm0 + psrld $27,%xmm9 + pxor %xmm7,%xmm6 + movdqa %xmm13,%xmm7 + + por %xmm9,%xmm8 + movd -4(%r9),%xmm9 + pslld $30,%xmm7 + paddd %xmm6,%xmm11 + + psrld $2,%xmm13 + paddd %xmm8,%xmm11 +.byte 102,15,56,0,229 + movd -4(%r10),%xmm8 + por %xmm7,%xmm13 + movdqa 0-128(%rax),%xmm1 + movd -4(%r11),%xmm7 + punpckldq %xmm8,%xmm0 + movdqa %xmm11,%xmm8 + paddd %xmm15,%xmm10 + punpckldq %xmm7,%xmm9 + movdqa %xmm12,%xmm7 + movdqa %xmm12,%xmm6 + pslld $5,%xmm8 + prefetcht0 63(%r8) + pandn %xmm14,%xmm7 + pand %xmm13,%xmm6 + punpckldq %xmm9,%xmm0 + movdqa %xmm11,%xmm9 + + movdqa %xmm4,224-128(%rax) + paddd %xmm4,%xmm10 + psrld $27,%xmm9 + pxor %xmm7,%xmm6 + movdqa %xmm12,%xmm7 + prefetcht0 63(%r9) + + por %xmm9,%xmm8 + pslld $30,%xmm7 + paddd %xmm6,%xmm10 + prefetcht0 63(%r10) + + psrld $2,%xmm12 + paddd %xmm8,%xmm10 +.byte 102,15,56,0,197 + prefetcht0 63(%r11) + por %xmm7,%xmm12 + movdqa 16-128(%rax),%xmm2 + pxor %xmm3,%xmm1 + movdqa 32-128(%rax),%xmm3 + + movdqa %xmm10,%xmm8 + pxor 128-128(%rax),%xmm1 + paddd %xmm15,%xmm14 + movdqa %xmm11,%xmm7 + pslld $5,%xmm8 + pxor %xmm3,%xmm1 + movdqa %xmm11,%xmm6 + pandn %xmm13,%xmm7 + movdqa %xmm1,%xmm5 + pand %xmm12,%xmm6 + movdqa %xmm10,%xmm9 + psrld $31,%xmm5 + paddd %xmm1,%xmm1 + + movdqa %xmm0,240-128(%rax) + paddd %xmm0,%xmm14 + psrld $27,%xmm9 + pxor %xmm7,%xmm6 + + movdqa %xmm11,%xmm7 + por %xmm9,%xmm8 + pslld $30,%xmm7 + paddd %xmm6,%xmm14 + + psrld $2,%xmm11 + paddd %xmm8,%xmm14 + por %xmm5,%xmm1 + por %xmm7,%xmm11 + pxor %xmm4,%xmm2 + movdqa 48-128(%rax),%xmm4 + + movdqa %xmm14,%xmm8 + pxor 144-128(%rax),%xmm2 + paddd %xmm15,%xmm13 + movdqa %xmm10,%xmm7 + pslld $5,%xmm8 + pxor %xmm4,%xmm2 + movdqa %xmm10,%xmm6 + pandn %xmm12,%xmm7 + movdqa %xmm2,%xmm5 + pand %xmm11,%xmm6 + movdqa %xmm14,%xmm9 + psrld $31,%xmm5 + paddd %xmm2,%xmm2 + + movdqa %xmm1,0-128(%rax) + paddd %xmm1,%xmm13 + psrld $27,%xmm9 + pxor %xmm7,%xmm6 + + movdqa %xmm10,%xmm7 + por %xmm9,%xmm8 + pslld $30,%xmm7 + paddd %xmm6,%xmm13 + + psrld $2,%xmm10 + paddd %xmm8,%xmm13 + por %xmm5,%xmm2 + por %xmm7,%xmm10 + pxor %xmm0,%xmm3 + movdqa 64-128(%rax),%xmm0 + + movdqa %xmm13,%xmm8 + pxor 160-128(%rax),%xmm3 + paddd %xmm15,%xmm12 + movdqa %xmm14,%xmm7 + pslld $5,%xmm8 + pxor %xmm0,%xmm3 + movdqa %xmm14,%xmm6 + pandn %xmm11,%xmm7 + movdqa %xmm3,%xmm5 + pand %xmm10,%xmm6 + movdqa %xmm13,%xmm9 + psrld $31,%xmm5 + paddd %xmm3,%xmm3 + + movdqa %xmm2,16-128(%rax) + paddd %xmm2,%xmm12 + psrld $27,%xmm9 + pxor %xmm7,%xmm6 + + movdqa %xmm14,%xmm7 + por %xmm9,%xmm8 + pslld $30,%xmm7 + paddd %xmm6,%xmm12 + + psrld $2,%xmm14 + paddd %xmm8,%xmm12 + por %xmm5,%xmm3 + por %xmm7,%xmm14 + pxor %xmm1,%xmm4 + movdqa 80-128(%rax),%xmm1 + + movdqa %xmm12,%xmm8 + pxor 176-128(%rax),%xmm4 + paddd %xmm15,%xmm11 + movdqa %xmm13,%xmm7 + pslld $5,%xmm8 + pxor %xmm1,%xmm4 + movdqa %xmm13,%xmm6 + pandn %xmm10,%xmm7 + movdqa %xmm4,%xmm5 + pand %xmm14,%xmm6 + movdqa %xmm12,%xmm9 + psrld $31,%xmm5 + paddd %xmm4,%xmm4 + + movdqa %xmm3,32-128(%rax) + paddd %xmm3,%xmm11 + psrld $27,%xmm9 + pxor %xmm7,%xmm6 + + movdqa %xmm13,%xmm7 + por %xmm9,%xmm8 + pslld $30,%xmm7 + paddd %xmm6,%xmm11 + + psrld $2,%xmm13 + paddd %xmm8,%xmm11 + por %xmm5,%xmm4 + por %xmm7,%xmm13 + pxor %xmm2,%xmm0 + movdqa 96-128(%rax),%xmm2 + + movdqa %xmm11,%xmm8 + pxor 192-128(%rax),%xmm0 + paddd %xmm15,%xmm10 + movdqa %xmm12,%xmm7 + pslld $5,%xmm8 + pxor %xmm2,%xmm0 + movdqa %xmm12,%xmm6 + pandn %xmm14,%xmm7 + movdqa %xmm0,%xmm5 + pand %xmm13,%xmm6 + movdqa %xmm11,%xmm9 + psrld $31,%xmm5 + paddd %xmm0,%xmm0 + + movdqa %xmm4,48-128(%rax) + paddd %xmm4,%xmm10 + psrld $27,%xmm9 + pxor %xmm7,%xmm6 + + movdqa %xmm12,%xmm7 + por %xmm9,%xmm8 + pslld $30,%xmm7 + paddd %xmm6,%xmm10 + + psrld $2,%xmm12 + paddd %xmm8,%xmm10 + por %xmm5,%xmm0 + por %xmm7,%xmm12 + movdqa 0(%rbp),%xmm15 + pxor %xmm3,%xmm1 + movdqa 112-128(%rax),%xmm3 + + movdqa %xmm10,%xmm8 + movdqa %xmm13,%xmm6 + pxor 208-128(%rax),%xmm1 + paddd %xmm15,%xmm14 + pslld $5,%xmm8 + pxor %xmm11,%xmm6 + + movdqa %xmm10,%xmm9 + movdqa %xmm0,64-128(%rax) + paddd %xmm0,%xmm14 + pxor %xmm3,%xmm1 + psrld $27,%xmm9 + pxor %xmm12,%xmm6 + movdqa %xmm11,%xmm7 + + pslld $30,%xmm7 + movdqa %xmm1,%xmm5 + por %xmm9,%xmm8 + psrld $31,%xmm5 + paddd %xmm6,%xmm14 + paddd %xmm1,%xmm1 + + psrld $2,%xmm11 + paddd %xmm8,%xmm14 + por %xmm5,%xmm1 + por %xmm7,%xmm11 + pxor %xmm4,%xmm2 + movdqa 128-128(%rax),%xmm4 + + movdqa %xmm14,%xmm8 + movdqa %xmm12,%xmm6 + pxor 224-128(%rax),%xmm2 + paddd %xmm15,%xmm13 + pslld $5,%xmm8 + pxor %xmm10,%xmm6 + + movdqa %xmm14,%xmm9 + movdqa %xmm1,80-128(%rax) + paddd %xmm1,%xmm13 + pxor %xmm4,%xmm2 + psrld $27,%xmm9 + pxor %xmm11,%xmm6 + movdqa %xmm10,%xmm7 + + pslld $30,%xmm7 + movdqa %xmm2,%xmm5 + por %xmm9,%xmm8 + psrld $31,%xmm5 + paddd %xmm6,%xmm13 + paddd %xmm2,%xmm2 + + psrld $2,%xmm10 + paddd %xmm8,%xmm13 + por %xmm5,%xmm2 + por %xmm7,%xmm10 + pxor %xmm0,%xmm3 + movdqa 144-128(%rax),%xmm0 + + movdqa %xmm13,%xmm8 + movdqa %xmm11,%xmm6 + pxor 240-128(%rax),%xmm3 + paddd %xmm15,%xmm12 + pslld $5,%xmm8 + pxor %xmm14,%xmm6 + + movdqa %xmm13,%xmm9 + movdqa %xmm2,96-128(%rax) + paddd %xmm2,%xmm12 + pxor %xmm0,%xmm3 + psrld $27,%xmm9 + pxor %xmm10,%xmm6 + movdqa %xmm14,%xmm7 + + pslld $30,%xmm7 + movdqa %xmm3,%xmm5 + por %xmm9,%xmm8 + psrld $31,%xmm5 + paddd %xmm6,%xmm12 + paddd %xmm3,%xmm3 + + psrld $2,%xmm14 + paddd %xmm8,%xmm12 + por %xmm5,%xmm3 + por %xmm7,%xmm14 + pxor %xmm1,%xmm4 + movdqa 160-128(%rax),%xmm1 + + movdqa %xmm12,%xmm8 + movdqa %xmm10,%xmm6 + pxor 0-128(%rax),%xmm4 + paddd %xmm15,%xmm11 + pslld $5,%xmm8 + pxor %xmm13,%xmm6 + + movdqa %xmm12,%xmm9 + movdqa %xmm3,112-128(%rax) + paddd %xmm3,%xmm11 + pxor %xmm1,%xmm4 + psrld $27,%xmm9 + pxor %xmm14,%xmm6 + movdqa %xmm13,%xmm7 + + pslld $30,%xmm7 + movdqa %xmm4,%xmm5 + por %xmm9,%xmm8 + psrld $31,%xmm5 + paddd %xmm6,%xmm11 + paddd %xmm4,%xmm4 + + psrld $2,%xmm13 + paddd %xmm8,%xmm11 + por %xmm5,%xmm4 + por %xmm7,%xmm13 + pxor %xmm2,%xmm0 + movdqa 176-128(%rax),%xmm2 + + movdqa %xmm11,%xmm8 + movdqa %xmm14,%xmm6 + pxor 16-128(%rax),%xmm0 + paddd %xmm15,%xmm10 + pslld $5,%xmm8 + pxor %xmm12,%xmm6 + + movdqa %xmm11,%xmm9 + movdqa %xmm4,128-128(%rax) + paddd %xmm4,%xmm10 + pxor %xmm2,%xmm0 + psrld $27,%xmm9 + pxor %xmm13,%xmm6 + movdqa %xmm12,%xmm7 + + pslld $30,%xmm7 + movdqa %xmm0,%xmm5 + por %xmm9,%xmm8 + psrld $31,%xmm5 + paddd %xmm6,%xmm10 + paddd %xmm0,%xmm0 + + psrld $2,%xmm12 + paddd %xmm8,%xmm10 + por %xmm5,%xmm0 + por %xmm7,%xmm12 + pxor %xmm3,%xmm1 + movdqa 192-128(%rax),%xmm3 + + movdqa %xmm10,%xmm8 + movdqa %xmm13,%xmm6 + pxor 32-128(%rax),%xmm1 + paddd %xmm15,%xmm14 + pslld $5,%xmm8 + pxor %xmm11,%xmm6 + + movdqa %xmm10,%xmm9 + movdqa %xmm0,144-128(%rax) + paddd %xmm0,%xmm14 + pxor %xmm3,%xmm1 + psrld $27,%xmm9 + pxor %xmm12,%xmm6 + movdqa %xmm11,%xmm7 + + pslld $30,%xmm7 + movdqa %xmm1,%xmm5 + por %xmm9,%xmm8 + psrld $31,%xmm5 + paddd %xmm6,%xmm14 + paddd %xmm1,%xmm1 + + psrld $2,%xmm11 + paddd %xmm8,%xmm14 + por %xmm5,%xmm1 + por %xmm7,%xmm11 + pxor %xmm4,%xmm2 + movdqa 208-128(%rax),%xmm4 + + movdqa %xmm14,%xmm8 + movdqa %xmm12,%xmm6 + pxor 48-128(%rax),%xmm2 + paddd %xmm15,%xmm13 + pslld $5,%xmm8 + pxor %xmm10,%xmm6 + + movdqa %xmm14,%xmm9 + movdqa %xmm1,160-128(%rax) + paddd %xmm1,%xmm13 + pxor %xmm4,%xmm2 + psrld $27,%xmm9 + pxor %xmm11,%xmm6 + movdqa %xmm10,%xmm7 + + pslld $30,%xmm7 + movdqa %xmm2,%xmm5 + por %xmm9,%xmm8 + psrld $31,%xmm5 + paddd %xmm6,%xmm13 + paddd %xmm2,%xmm2 + + psrld $2,%xmm10 + paddd %xmm8,%xmm13 + por %xmm5,%xmm2 + por %xmm7,%xmm10 + pxor %xmm0,%xmm3 + movdqa 224-128(%rax),%xmm0 + + movdqa %xmm13,%xmm8 + movdqa %xmm11,%xmm6 + pxor 64-128(%rax),%xmm3 + paddd %xmm15,%xmm12 + pslld $5,%xmm8 + pxor %xmm14,%xmm6 + + movdqa %xmm13,%xmm9 + movdqa %xmm2,176-128(%rax) + paddd %xmm2,%xmm12 + pxor %xmm0,%xmm3 + psrld $27,%xmm9 + pxor %xmm10,%xmm6 + movdqa %xmm14,%xmm7 + + pslld $30,%xmm7 + movdqa %xmm3,%xmm5 + por %xmm9,%xmm8 + psrld $31,%xmm5 + paddd %xmm6,%xmm12 + paddd %xmm3,%xmm3 + + psrld $2,%xmm14 + paddd %xmm8,%xmm12 + por %xmm5,%xmm3 + por %xmm7,%xmm14 + pxor %xmm1,%xmm4 + movdqa 240-128(%rax),%xmm1 + + movdqa %xmm12,%xmm8 + movdqa %xmm10,%xmm6 + pxor 80-128(%rax),%xmm4 + paddd %xmm15,%xmm11 + pslld $5,%xmm8 + pxor %xmm13,%xmm6 + + movdqa %xmm12,%xmm9 + movdqa %xmm3,192-128(%rax) + paddd %xmm3,%xmm11 + pxor %xmm1,%xmm4 + psrld $27,%xmm9 + pxor %xmm14,%xmm6 + movdqa %xmm13,%xmm7 + + pslld $30,%xmm7 + movdqa %xmm4,%xmm5 + por %xmm9,%xmm8 + psrld $31,%xmm5 + paddd %xmm6,%xmm11 + paddd %xmm4,%xmm4 + + psrld $2,%xmm13 + paddd %xmm8,%xmm11 + por %xmm5,%xmm4 + por %xmm7,%xmm13 + pxor %xmm2,%xmm0 + movdqa 0-128(%rax),%xmm2 + + movdqa %xmm11,%xmm8 + movdqa %xmm14,%xmm6 + pxor 96-128(%rax),%xmm0 + paddd %xmm15,%xmm10 + pslld $5,%xmm8 + pxor %xmm12,%xmm6 + + movdqa %xmm11,%xmm9 + movdqa %xmm4,208-128(%rax) + paddd %xmm4,%xmm10 + pxor %xmm2,%xmm0 + psrld $27,%xmm9 + pxor %xmm13,%xmm6 + movdqa %xmm12,%xmm7 + + pslld $30,%xmm7 + movdqa %xmm0,%xmm5 + por %xmm9,%xmm8 + psrld $31,%xmm5 + paddd %xmm6,%xmm10 + paddd %xmm0,%xmm0 + + psrld $2,%xmm12 + paddd %xmm8,%xmm10 + por %xmm5,%xmm0 + por %xmm7,%xmm12 + pxor %xmm3,%xmm1 + movdqa 16-128(%rax),%xmm3 + + movdqa %xmm10,%xmm8 + movdqa %xmm13,%xmm6 + pxor 112-128(%rax),%xmm1 + paddd %xmm15,%xmm14 + pslld $5,%xmm8 + pxor %xmm11,%xmm6 + + movdqa %xmm10,%xmm9 + movdqa %xmm0,224-128(%rax) + paddd %xmm0,%xmm14 + pxor %xmm3,%xmm1 + psrld $27,%xmm9 + pxor %xmm12,%xmm6 + movdqa %xmm11,%xmm7 + + pslld $30,%xmm7 + movdqa %xmm1,%xmm5 + por %xmm9,%xmm8 + psrld $31,%xmm5 + paddd %xmm6,%xmm14 + paddd %xmm1,%xmm1 + + psrld $2,%xmm11 + paddd %xmm8,%xmm14 + por %xmm5,%xmm1 + por %xmm7,%xmm11 + pxor %xmm4,%xmm2 + movdqa 32-128(%rax),%xmm4 + + movdqa %xmm14,%xmm8 + movdqa %xmm12,%xmm6 + pxor 128-128(%rax),%xmm2 + paddd %xmm15,%xmm13 + pslld $5,%xmm8 + pxor %xmm10,%xmm6 + + movdqa %xmm14,%xmm9 + movdqa %xmm1,240-128(%rax) + paddd %xmm1,%xmm13 + pxor %xmm4,%xmm2 + psrld $27,%xmm9 + pxor %xmm11,%xmm6 + movdqa %xmm10,%xmm7 + + pslld $30,%xmm7 + movdqa %xmm2,%xmm5 + por %xmm9,%xmm8 + psrld $31,%xmm5 + paddd %xmm6,%xmm13 + paddd %xmm2,%xmm2 + + psrld $2,%xmm10 + paddd %xmm8,%xmm13 + por %xmm5,%xmm2 + por %xmm7,%xmm10 + pxor %xmm0,%xmm3 + movdqa 48-128(%rax),%xmm0 + + movdqa %xmm13,%xmm8 + movdqa %xmm11,%xmm6 + pxor 144-128(%rax),%xmm3 + paddd %xmm15,%xmm12 + pslld $5,%xmm8 + pxor %xmm14,%xmm6 + + movdqa %xmm13,%xmm9 + movdqa %xmm2,0-128(%rax) + paddd %xmm2,%xmm12 + pxor %xmm0,%xmm3 + psrld $27,%xmm9 + pxor %xmm10,%xmm6 + movdqa %xmm14,%xmm7 + + pslld $30,%xmm7 + movdqa %xmm3,%xmm5 + por %xmm9,%xmm8 + psrld $31,%xmm5 + paddd %xmm6,%xmm12 + paddd %xmm3,%xmm3 + + psrld $2,%xmm14 + paddd %xmm8,%xmm12 + por %xmm5,%xmm3 + por %xmm7,%xmm14 + pxor %xmm1,%xmm4 + movdqa 64-128(%rax),%xmm1 + + movdqa %xmm12,%xmm8 + movdqa %xmm10,%xmm6 + pxor 160-128(%rax),%xmm4 + paddd %xmm15,%xmm11 + pslld $5,%xmm8 + pxor %xmm13,%xmm6 + + movdqa %xmm12,%xmm9 + movdqa %xmm3,16-128(%rax) + paddd %xmm3,%xmm11 + pxor %xmm1,%xmm4 + psrld $27,%xmm9 + pxor %xmm14,%xmm6 + movdqa %xmm13,%xmm7 + + pslld $30,%xmm7 + movdqa %xmm4,%xmm5 + por %xmm9,%xmm8 + psrld $31,%xmm5 + paddd %xmm6,%xmm11 + paddd %xmm4,%xmm4 + + psrld $2,%xmm13 + paddd %xmm8,%xmm11 + por %xmm5,%xmm4 + por %xmm7,%xmm13 + pxor %xmm2,%xmm0 + movdqa 80-128(%rax),%xmm2 + + movdqa %xmm11,%xmm8 + movdqa %xmm14,%xmm6 + pxor 176-128(%rax),%xmm0 + paddd %xmm15,%xmm10 + pslld $5,%xmm8 + pxor %xmm12,%xmm6 + + movdqa %xmm11,%xmm9 + movdqa %xmm4,32-128(%rax) + paddd %xmm4,%xmm10 + pxor %xmm2,%xmm0 + psrld $27,%xmm9 + pxor %xmm13,%xmm6 + movdqa %xmm12,%xmm7 + + pslld $30,%xmm7 + movdqa %xmm0,%xmm5 + por %xmm9,%xmm8 + psrld $31,%xmm5 + paddd %xmm6,%xmm10 + paddd %xmm0,%xmm0 + + psrld $2,%xmm12 + paddd %xmm8,%xmm10 + por %xmm5,%xmm0 + por %xmm7,%xmm12 + pxor %xmm3,%xmm1 + movdqa 96-128(%rax),%xmm3 + + movdqa %xmm10,%xmm8 + movdqa %xmm13,%xmm6 + pxor 192-128(%rax),%xmm1 + paddd %xmm15,%xmm14 + pslld $5,%xmm8 + pxor %xmm11,%xmm6 + + movdqa %xmm10,%xmm9 + movdqa %xmm0,48-128(%rax) + paddd %xmm0,%xmm14 + pxor %xmm3,%xmm1 + psrld $27,%xmm9 + pxor %xmm12,%xmm6 + movdqa %xmm11,%xmm7 + + pslld $30,%xmm7 + movdqa %xmm1,%xmm5 + por %xmm9,%xmm8 + psrld $31,%xmm5 + paddd %xmm6,%xmm14 + paddd %xmm1,%xmm1 + + psrld $2,%xmm11 + paddd %xmm8,%xmm14 + por %xmm5,%xmm1 + por %xmm7,%xmm11 + pxor %xmm4,%xmm2 + movdqa 112-128(%rax),%xmm4 + + movdqa %xmm14,%xmm8 + movdqa %xmm12,%xmm6 + pxor 208-128(%rax),%xmm2 + paddd %xmm15,%xmm13 + pslld $5,%xmm8 + pxor %xmm10,%xmm6 + + movdqa %xmm14,%xmm9 + movdqa %xmm1,64-128(%rax) + paddd %xmm1,%xmm13 + pxor %xmm4,%xmm2 + psrld $27,%xmm9 + pxor %xmm11,%xmm6 + movdqa %xmm10,%xmm7 + + pslld $30,%xmm7 + movdqa %xmm2,%xmm5 + por %xmm9,%xmm8 + psrld $31,%xmm5 + paddd %xmm6,%xmm13 + paddd %xmm2,%xmm2 + + psrld $2,%xmm10 + paddd %xmm8,%xmm13 + por %xmm5,%xmm2 + por %xmm7,%xmm10 + pxor %xmm0,%xmm3 + movdqa 128-128(%rax),%xmm0 + + movdqa %xmm13,%xmm8 + movdqa %xmm11,%xmm6 + pxor 224-128(%rax),%xmm3 + paddd %xmm15,%xmm12 + pslld $5,%xmm8 + pxor %xmm14,%xmm6 + + movdqa %xmm13,%xmm9 + movdqa %xmm2,80-128(%rax) + paddd %xmm2,%xmm12 + pxor %xmm0,%xmm3 + psrld $27,%xmm9 + pxor %xmm10,%xmm6 + movdqa %xmm14,%xmm7 + + pslld $30,%xmm7 + movdqa %xmm3,%xmm5 + por %xmm9,%xmm8 + psrld $31,%xmm5 + paddd %xmm6,%xmm12 + paddd %xmm3,%xmm3 + + psrld $2,%xmm14 + paddd %xmm8,%xmm12 + por %xmm5,%xmm3 + por %xmm7,%xmm14 + pxor %xmm1,%xmm4 + movdqa 144-128(%rax),%xmm1 + + movdqa %xmm12,%xmm8 + movdqa %xmm10,%xmm6 + pxor 240-128(%rax),%xmm4 + paddd %xmm15,%xmm11 + pslld $5,%xmm8 + pxor %xmm13,%xmm6 + + movdqa %xmm12,%xmm9 + movdqa %xmm3,96-128(%rax) + paddd %xmm3,%xmm11 + pxor %xmm1,%xmm4 + psrld $27,%xmm9 + pxor %xmm14,%xmm6 + movdqa %xmm13,%xmm7 + + pslld $30,%xmm7 + movdqa %xmm4,%xmm5 + por %xmm9,%xmm8 + psrld $31,%xmm5 + paddd %xmm6,%xmm11 + paddd %xmm4,%xmm4 + + psrld $2,%xmm13 + paddd %xmm8,%xmm11 + por %xmm5,%xmm4 + por %xmm7,%xmm13 + pxor %xmm2,%xmm0 + movdqa 160-128(%rax),%xmm2 + + movdqa %xmm11,%xmm8 + movdqa %xmm14,%xmm6 + pxor 0-128(%rax),%xmm0 + paddd %xmm15,%xmm10 + pslld $5,%xmm8 + pxor %xmm12,%xmm6 + + movdqa %xmm11,%xmm9 + movdqa %xmm4,112-128(%rax) + paddd %xmm4,%xmm10 + pxor %xmm2,%xmm0 + psrld $27,%xmm9 + pxor %xmm13,%xmm6 + movdqa %xmm12,%xmm7 + + pslld $30,%xmm7 + movdqa %xmm0,%xmm5 + por %xmm9,%xmm8 + psrld $31,%xmm5 + paddd %xmm6,%xmm10 + paddd %xmm0,%xmm0 + + psrld $2,%xmm12 + paddd %xmm8,%xmm10 + por %xmm5,%xmm0 + por %xmm7,%xmm12 + movdqa 32(%rbp),%xmm15 + pxor %xmm3,%xmm1 + movdqa 176-128(%rax),%xmm3 + + movdqa %xmm10,%xmm8 + movdqa %xmm13,%xmm7 + pxor 16-128(%rax),%xmm1 + pxor %xmm3,%xmm1 + paddd %xmm15,%xmm14 + pslld $5,%xmm8 + movdqa %xmm10,%xmm9 + pand %xmm12,%xmm7 + + movdqa %xmm13,%xmm6 + movdqa %xmm1,%xmm5 + psrld $27,%xmm9 + paddd %xmm7,%xmm14 + pxor %xmm12,%xmm6 + + movdqa %xmm0,128-128(%rax) + paddd %xmm0,%xmm14 + por %xmm9,%xmm8 + psrld $31,%xmm5 + pand %xmm11,%xmm6 + movdqa %xmm11,%xmm7 + + pslld $30,%xmm7 + paddd %xmm1,%xmm1 + paddd %xmm6,%xmm14 + + psrld $2,%xmm11 + paddd %xmm8,%xmm14 + por %xmm5,%xmm1 + por %xmm7,%xmm11 + pxor %xmm4,%xmm2 + movdqa 192-128(%rax),%xmm4 + + movdqa %xmm14,%xmm8 + movdqa %xmm12,%xmm7 + pxor 32-128(%rax),%xmm2 + pxor %xmm4,%xmm2 + paddd %xmm15,%xmm13 + pslld $5,%xmm8 + movdqa %xmm14,%xmm9 + pand %xmm11,%xmm7 + + movdqa %xmm12,%xmm6 + movdqa %xmm2,%xmm5 + psrld $27,%xmm9 + paddd %xmm7,%xmm13 + pxor %xmm11,%xmm6 + + movdqa %xmm1,144-128(%rax) + paddd %xmm1,%xmm13 + por %xmm9,%xmm8 + psrld $31,%xmm5 + pand %xmm10,%xmm6 + movdqa %xmm10,%xmm7 + + pslld $30,%xmm7 + paddd %xmm2,%xmm2 + paddd %xmm6,%xmm13 + + psrld $2,%xmm10 + paddd %xmm8,%xmm13 + por %xmm5,%xmm2 + por %xmm7,%xmm10 + pxor %xmm0,%xmm3 + movdqa 208-128(%rax),%xmm0 + + movdqa %xmm13,%xmm8 + movdqa %xmm11,%xmm7 + pxor 48-128(%rax),%xmm3 + pxor %xmm0,%xmm3 + paddd %xmm15,%xmm12 + pslld $5,%xmm8 + movdqa %xmm13,%xmm9 + pand %xmm10,%xmm7 + + movdqa %xmm11,%xmm6 + movdqa %xmm3,%xmm5 + psrld $27,%xmm9 + paddd %xmm7,%xmm12 + pxor %xmm10,%xmm6 + + movdqa %xmm2,160-128(%rax) + paddd %xmm2,%xmm12 + por %xmm9,%xmm8 + psrld $31,%xmm5 + pand %xmm14,%xmm6 + movdqa %xmm14,%xmm7 + + pslld $30,%xmm7 + paddd %xmm3,%xmm3 + paddd %xmm6,%xmm12 + + psrld $2,%xmm14 + paddd %xmm8,%xmm12 + por %xmm5,%xmm3 + por %xmm7,%xmm14 + pxor %xmm1,%xmm4 + movdqa 224-128(%rax),%xmm1 + + movdqa %xmm12,%xmm8 + movdqa %xmm10,%xmm7 + pxor 64-128(%rax),%xmm4 + pxor %xmm1,%xmm4 + paddd %xmm15,%xmm11 + pslld $5,%xmm8 + movdqa %xmm12,%xmm9 + pand %xmm14,%xmm7 + + movdqa %xmm10,%xmm6 + movdqa %xmm4,%xmm5 + psrld $27,%xmm9 + paddd %xmm7,%xmm11 + pxor %xmm14,%xmm6 + + movdqa %xmm3,176-128(%rax) + paddd %xmm3,%xmm11 + por %xmm9,%xmm8 + psrld $31,%xmm5 + pand %xmm13,%xmm6 + movdqa %xmm13,%xmm7 + + pslld $30,%xmm7 + paddd %xmm4,%xmm4 + paddd %xmm6,%xmm11 + + psrld $2,%xmm13 + paddd %xmm8,%xmm11 + por %xmm5,%xmm4 + por %xmm7,%xmm13 + pxor %xmm2,%xmm0 + movdqa 240-128(%rax),%xmm2 + + movdqa %xmm11,%xmm8 + movdqa %xmm14,%xmm7 + pxor 80-128(%rax),%xmm0 + pxor %xmm2,%xmm0 + paddd %xmm15,%xmm10 + pslld $5,%xmm8 + movdqa %xmm11,%xmm9 + pand %xmm13,%xmm7 + + movdqa %xmm14,%xmm6 + movdqa %xmm0,%xmm5 + psrld $27,%xmm9 + paddd %xmm7,%xmm10 + pxor %xmm13,%xmm6 + + movdqa %xmm4,192-128(%rax) + paddd %xmm4,%xmm10 + por %xmm9,%xmm8 + psrld $31,%xmm5 + pand %xmm12,%xmm6 + movdqa %xmm12,%xmm7 + + pslld $30,%xmm7 + paddd %xmm0,%xmm0 + paddd %xmm6,%xmm10 + + psrld $2,%xmm12 + paddd %xmm8,%xmm10 + por %xmm5,%xmm0 + por %xmm7,%xmm12 + pxor %xmm3,%xmm1 + movdqa 0-128(%rax),%xmm3 + + movdqa %xmm10,%xmm8 + movdqa %xmm13,%xmm7 + pxor 96-128(%rax),%xmm1 + pxor %xmm3,%xmm1 + paddd %xmm15,%xmm14 + pslld $5,%xmm8 + movdqa %xmm10,%xmm9 + pand %xmm12,%xmm7 + + movdqa %xmm13,%xmm6 + movdqa %xmm1,%xmm5 + psrld $27,%xmm9 + paddd %xmm7,%xmm14 + pxor %xmm12,%xmm6 + + movdqa %xmm0,208-128(%rax) + paddd %xmm0,%xmm14 + por %xmm9,%xmm8 + psrld $31,%xmm5 + pand %xmm11,%xmm6 + movdqa %xmm11,%xmm7 + + pslld $30,%xmm7 + paddd %xmm1,%xmm1 + paddd %xmm6,%xmm14 + + psrld $2,%xmm11 + paddd %xmm8,%xmm14 + por %xmm5,%xmm1 + por %xmm7,%xmm11 + pxor %xmm4,%xmm2 + movdqa 16-128(%rax),%xmm4 + + movdqa %xmm14,%xmm8 + movdqa %xmm12,%xmm7 + pxor 112-128(%rax),%xmm2 + pxor %xmm4,%xmm2 + paddd %xmm15,%xmm13 + pslld $5,%xmm8 + movdqa %xmm14,%xmm9 + pand %xmm11,%xmm7 + + movdqa %xmm12,%xmm6 + movdqa %xmm2,%xmm5 + psrld $27,%xmm9 + paddd %xmm7,%xmm13 + pxor %xmm11,%xmm6 + + movdqa %xmm1,224-128(%rax) + paddd %xmm1,%xmm13 + por %xmm9,%xmm8 + psrld $31,%xmm5 + pand %xmm10,%xmm6 + movdqa %xmm10,%xmm7 + + pslld $30,%xmm7 + paddd %xmm2,%xmm2 + paddd %xmm6,%xmm13 + + psrld $2,%xmm10 + paddd %xmm8,%xmm13 + por %xmm5,%xmm2 + por %xmm7,%xmm10 + pxor %xmm0,%xmm3 + movdqa 32-128(%rax),%xmm0 + + movdqa %xmm13,%xmm8 + movdqa %xmm11,%xmm7 + pxor 128-128(%rax),%xmm3 + pxor %xmm0,%xmm3 + paddd %xmm15,%xmm12 + pslld $5,%xmm8 + movdqa %xmm13,%xmm9 + pand %xmm10,%xmm7 + + movdqa %xmm11,%xmm6 + movdqa %xmm3,%xmm5 + psrld $27,%xmm9 + paddd %xmm7,%xmm12 + pxor %xmm10,%xmm6 + + movdqa %xmm2,240-128(%rax) + paddd %xmm2,%xmm12 + por %xmm9,%xmm8 + psrld $31,%xmm5 + pand %xmm14,%xmm6 + movdqa %xmm14,%xmm7 + + pslld $30,%xmm7 + paddd %xmm3,%xmm3 + paddd %xmm6,%xmm12 + + psrld $2,%xmm14 + paddd %xmm8,%xmm12 + por %xmm5,%xmm3 + por %xmm7,%xmm14 + pxor %xmm1,%xmm4 + movdqa 48-128(%rax),%xmm1 + + movdqa %xmm12,%xmm8 + movdqa %xmm10,%xmm7 + pxor 144-128(%rax),%xmm4 + pxor %xmm1,%xmm4 + paddd %xmm15,%xmm11 + pslld $5,%xmm8 + movdqa %xmm12,%xmm9 + pand %xmm14,%xmm7 + + movdqa %xmm10,%xmm6 + movdqa %xmm4,%xmm5 + psrld $27,%xmm9 + paddd %xmm7,%xmm11 + pxor %xmm14,%xmm6 + + movdqa %xmm3,0-128(%rax) + paddd %xmm3,%xmm11 + por %xmm9,%xmm8 + psrld $31,%xmm5 + pand %xmm13,%xmm6 + movdqa %xmm13,%xmm7 + + pslld $30,%xmm7 + paddd %xmm4,%xmm4 + paddd %xmm6,%xmm11 + + psrld $2,%xmm13 + paddd %xmm8,%xmm11 + por %xmm5,%xmm4 + por %xmm7,%xmm13 + pxor %xmm2,%xmm0 + movdqa 64-128(%rax),%xmm2 + + movdqa %xmm11,%xmm8 + movdqa %xmm14,%xmm7 + pxor 160-128(%rax),%xmm0 + pxor %xmm2,%xmm0 + paddd %xmm15,%xmm10 + pslld $5,%xmm8 + movdqa %xmm11,%xmm9 + pand %xmm13,%xmm7 + + movdqa %xmm14,%xmm6 + movdqa %xmm0,%xmm5 + psrld $27,%xmm9 + paddd %xmm7,%xmm10 + pxor %xmm13,%xmm6 + + movdqa %xmm4,16-128(%rax) + paddd %xmm4,%xmm10 + por %xmm9,%xmm8 + psrld $31,%xmm5 + pand %xmm12,%xmm6 + movdqa %xmm12,%xmm7 + + pslld $30,%xmm7 + paddd %xmm0,%xmm0 + paddd %xmm6,%xmm10 + + psrld $2,%xmm12 + paddd %xmm8,%xmm10 + por %xmm5,%xmm0 + por %xmm7,%xmm12 + pxor %xmm3,%xmm1 + movdqa 80-128(%rax),%xmm3 + + movdqa %xmm10,%xmm8 + movdqa %xmm13,%xmm7 + pxor 176-128(%rax),%xmm1 + pxor %xmm3,%xmm1 + paddd %xmm15,%xmm14 + pslld $5,%xmm8 + movdqa %xmm10,%xmm9 + pand %xmm12,%xmm7 + + movdqa %xmm13,%xmm6 + movdqa %xmm1,%xmm5 + psrld $27,%xmm9 + paddd %xmm7,%xmm14 + pxor %xmm12,%xmm6 + + movdqa %xmm0,32-128(%rax) + paddd %xmm0,%xmm14 + por %xmm9,%xmm8 + psrld $31,%xmm5 + pand %xmm11,%xmm6 + movdqa %xmm11,%xmm7 + + pslld $30,%xmm7 + paddd %xmm1,%xmm1 + paddd %xmm6,%xmm14 + + psrld $2,%xmm11 + paddd %xmm8,%xmm14 + por %xmm5,%xmm1 + por %xmm7,%xmm11 + pxor %xmm4,%xmm2 + movdqa 96-128(%rax),%xmm4 + + movdqa %xmm14,%xmm8 + movdqa %xmm12,%xmm7 + pxor 192-128(%rax),%xmm2 + pxor %xmm4,%xmm2 + paddd %xmm15,%xmm13 + pslld $5,%xmm8 + movdqa %xmm14,%xmm9 + pand %xmm11,%xmm7 + + movdqa %xmm12,%xmm6 + movdqa %xmm2,%xmm5 + psrld $27,%xmm9 + paddd %xmm7,%xmm13 + pxor %xmm11,%xmm6 + + movdqa %xmm1,48-128(%rax) + paddd %xmm1,%xmm13 + por %xmm9,%xmm8 + psrld $31,%xmm5 + pand %xmm10,%xmm6 + movdqa %xmm10,%xmm7 + + pslld $30,%xmm7 + paddd %xmm2,%xmm2 + paddd %xmm6,%xmm13 + + psrld $2,%xmm10 + paddd %xmm8,%xmm13 + por %xmm5,%xmm2 + por %xmm7,%xmm10 + pxor %xmm0,%xmm3 + movdqa 112-128(%rax),%xmm0 + + movdqa %xmm13,%xmm8 + movdqa %xmm11,%xmm7 + pxor 208-128(%rax),%xmm3 + pxor %xmm0,%xmm3 + paddd %xmm15,%xmm12 + pslld $5,%xmm8 + movdqa %xmm13,%xmm9 + pand %xmm10,%xmm7 + + movdqa %xmm11,%xmm6 + movdqa %xmm3,%xmm5 + psrld $27,%xmm9 + paddd %xmm7,%xmm12 + pxor %xmm10,%xmm6 + + movdqa %xmm2,64-128(%rax) + paddd %xmm2,%xmm12 + por %xmm9,%xmm8 + psrld $31,%xmm5 + pand %xmm14,%xmm6 + movdqa %xmm14,%xmm7 + + pslld $30,%xmm7 + paddd %xmm3,%xmm3 + paddd %xmm6,%xmm12 + + psrld $2,%xmm14 + paddd %xmm8,%xmm12 + por %xmm5,%xmm3 + por %xmm7,%xmm14 + pxor %xmm1,%xmm4 + movdqa 128-128(%rax),%xmm1 + + movdqa %xmm12,%xmm8 + movdqa %xmm10,%xmm7 + pxor 224-128(%rax),%xmm4 + pxor %xmm1,%xmm4 + paddd %xmm15,%xmm11 + pslld $5,%xmm8 + movdqa %xmm12,%xmm9 + pand %xmm14,%xmm7 + + movdqa %xmm10,%xmm6 + movdqa %xmm4,%xmm5 + psrld $27,%xmm9 + paddd %xmm7,%xmm11 + pxor %xmm14,%xmm6 + + movdqa %xmm3,80-128(%rax) + paddd %xmm3,%xmm11 + por %xmm9,%xmm8 + psrld $31,%xmm5 + pand %xmm13,%xmm6 + movdqa %xmm13,%xmm7 + + pslld $30,%xmm7 + paddd %xmm4,%xmm4 + paddd %xmm6,%xmm11 + + psrld $2,%xmm13 + paddd %xmm8,%xmm11 + por %xmm5,%xmm4 + por %xmm7,%xmm13 + pxor %xmm2,%xmm0 + movdqa 144-128(%rax),%xmm2 + + movdqa %xmm11,%xmm8 + movdqa %xmm14,%xmm7 + pxor 240-128(%rax),%xmm0 + pxor %xmm2,%xmm0 + paddd %xmm15,%xmm10 + pslld $5,%xmm8 + movdqa %xmm11,%xmm9 + pand %xmm13,%xmm7 + + movdqa %xmm14,%xmm6 + movdqa %xmm0,%xmm5 + psrld $27,%xmm9 + paddd %xmm7,%xmm10 + pxor %xmm13,%xmm6 + + movdqa %xmm4,96-128(%rax) + paddd %xmm4,%xmm10 + por %xmm9,%xmm8 + psrld $31,%xmm5 + pand %xmm12,%xmm6 + movdqa %xmm12,%xmm7 + + pslld $30,%xmm7 + paddd %xmm0,%xmm0 + paddd %xmm6,%xmm10 + + psrld $2,%xmm12 + paddd %xmm8,%xmm10 + por %xmm5,%xmm0 + por %xmm7,%xmm12 + pxor %xmm3,%xmm1 + movdqa 160-128(%rax),%xmm3 + + movdqa %xmm10,%xmm8 + movdqa %xmm13,%xmm7 + pxor 0-128(%rax),%xmm1 + pxor %xmm3,%xmm1 + paddd %xmm15,%xmm14 + pslld $5,%xmm8 + movdqa %xmm10,%xmm9 + pand %xmm12,%xmm7 + + movdqa %xmm13,%xmm6 + movdqa %xmm1,%xmm5 + psrld $27,%xmm9 + paddd %xmm7,%xmm14 + pxor %xmm12,%xmm6 + + movdqa %xmm0,112-128(%rax) + paddd %xmm0,%xmm14 + por %xmm9,%xmm8 + psrld $31,%xmm5 + pand %xmm11,%xmm6 + movdqa %xmm11,%xmm7 + + pslld $30,%xmm7 + paddd %xmm1,%xmm1 + paddd %xmm6,%xmm14 + + psrld $2,%xmm11 + paddd %xmm8,%xmm14 + por %xmm5,%xmm1 + por %xmm7,%xmm11 + pxor %xmm4,%xmm2 + movdqa 176-128(%rax),%xmm4 + + movdqa %xmm14,%xmm8 + movdqa %xmm12,%xmm7 + pxor 16-128(%rax),%xmm2 + pxor %xmm4,%xmm2 + paddd %xmm15,%xmm13 + pslld $5,%xmm8 + movdqa %xmm14,%xmm9 + pand %xmm11,%xmm7 + + movdqa %xmm12,%xmm6 + movdqa %xmm2,%xmm5 + psrld $27,%xmm9 + paddd %xmm7,%xmm13 + pxor %xmm11,%xmm6 + + movdqa %xmm1,128-128(%rax) + paddd %xmm1,%xmm13 + por %xmm9,%xmm8 + psrld $31,%xmm5 + pand %xmm10,%xmm6 + movdqa %xmm10,%xmm7 + + pslld $30,%xmm7 + paddd %xmm2,%xmm2 + paddd %xmm6,%xmm13 + + psrld $2,%xmm10 + paddd %xmm8,%xmm13 + por %xmm5,%xmm2 + por %xmm7,%xmm10 + pxor %xmm0,%xmm3 + movdqa 192-128(%rax),%xmm0 + + movdqa %xmm13,%xmm8 + movdqa %xmm11,%xmm7 + pxor 32-128(%rax),%xmm3 + pxor %xmm0,%xmm3 + paddd %xmm15,%xmm12 + pslld $5,%xmm8 + movdqa %xmm13,%xmm9 + pand %xmm10,%xmm7 + + movdqa %xmm11,%xmm6 + movdqa %xmm3,%xmm5 + psrld $27,%xmm9 + paddd %xmm7,%xmm12 + pxor %xmm10,%xmm6 + + movdqa %xmm2,144-128(%rax) + paddd %xmm2,%xmm12 + por %xmm9,%xmm8 + psrld $31,%xmm5 + pand %xmm14,%xmm6 + movdqa %xmm14,%xmm7 + + pslld $30,%xmm7 + paddd %xmm3,%xmm3 + paddd %xmm6,%xmm12 + + psrld $2,%xmm14 + paddd %xmm8,%xmm12 + por %xmm5,%xmm3 + por %xmm7,%xmm14 + pxor %xmm1,%xmm4 + movdqa 208-128(%rax),%xmm1 + + movdqa %xmm12,%xmm8 + movdqa %xmm10,%xmm7 + pxor 48-128(%rax),%xmm4 + pxor %xmm1,%xmm4 + paddd %xmm15,%xmm11 + pslld $5,%xmm8 + movdqa %xmm12,%xmm9 + pand %xmm14,%xmm7 + + movdqa %xmm10,%xmm6 + movdqa %xmm4,%xmm5 + psrld $27,%xmm9 + paddd %xmm7,%xmm11 + pxor %xmm14,%xmm6 + + movdqa %xmm3,160-128(%rax) + paddd %xmm3,%xmm11 + por %xmm9,%xmm8 + psrld $31,%xmm5 + pand %xmm13,%xmm6 + movdqa %xmm13,%xmm7 + + pslld $30,%xmm7 + paddd %xmm4,%xmm4 + paddd %xmm6,%xmm11 + + psrld $2,%xmm13 + paddd %xmm8,%xmm11 + por %xmm5,%xmm4 + por %xmm7,%xmm13 + pxor %xmm2,%xmm0 + movdqa 224-128(%rax),%xmm2 + + movdqa %xmm11,%xmm8 + movdqa %xmm14,%xmm7 + pxor 64-128(%rax),%xmm0 + pxor %xmm2,%xmm0 + paddd %xmm15,%xmm10 + pslld $5,%xmm8 + movdqa %xmm11,%xmm9 + pand %xmm13,%xmm7 + + movdqa %xmm14,%xmm6 + movdqa %xmm0,%xmm5 + psrld $27,%xmm9 + paddd %xmm7,%xmm10 + pxor %xmm13,%xmm6 + + movdqa %xmm4,176-128(%rax) + paddd %xmm4,%xmm10 + por %xmm9,%xmm8 + psrld $31,%xmm5 + pand %xmm12,%xmm6 + movdqa %xmm12,%xmm7 + + pslld $30,%xmm7 + paddd %xmm0,%xmm0 + paddd %xmm6,%xmm10 + + psrld $2,%xmm12 + paddd %xmm8,%xmm10 + por %xmm5,%xmm0 + por %xmm7,%xmm12 + movdqa 64(%rbp),%xmm15 + pxor %xmm3,%xmm1 + movdqa 240-128(%rax),%xmm3 + + movdqa %xmm10,%xmm8 + movdqa %xmm13,%xmm6 + pxor 80-128(%rax),%xmm1 + paddd %xmm15,%xmm14 + pslld $5,%xmm8 + pxor %xmm11,%xmm6 + + movdqa %xmm10,%xmm9 + movdqa %xmm0,192-128(%rax) + paddd %xmm0,%xmm14 + pxor %xmm3,%xmm1 + psrld $27,%xmm9 + pxor %xmm12,%xmm6 + movdqa %xmm11,%xmm7 + + pslld $30,%xmm7 + movdqa %xmm1,%xmm5 + por %xmm9,%xmm8 + psrld $31,%xmm5 + paddd %xmm6,%xmm14 + paddd %xmm1,%xmm1 + + psrld $2,%xmm11 + paddd %xmm8,%xmm14 + por %xmm5,%xmm1 + por %xmm7,%xmm11 + pxor %xmm4,%xmm2 + movdqa 0-128(%rax),%xmm4 + + movdqa %xmm14,%xmm8 + movdqa %xmm12,%xmm6 + pxor 96-128(%rax),%xmm2 + paddd %xmm15,%xmm13 + pslld $5,%xmm8 + pxor %xmm10,%xmm6 + + movdqa %xmm14,%xmm9 + movdqa %xmm1,208-128(%rax) + paddd %xmm1,%xmm13 + pxor %xmm4,%xmm2 + psrld $27,%xmm9 + pxor %xmm11,%xmm6 + movdqa %xmm10,%xmm7 + + pslld $30,%xmm7 + movdqa %xmm2,%xmm5 + por %xmm9,%xmm8 + psrld $31,%xmm5 + paddd %xmm6,%xmm13 + paddd %xmm2,%xmm2 + + psrld $2,%xmm10 + paddd %xmm8,%xmm13 + por %xmm5,%xmm2 + por %xmm7,%xmm10 + pxor %xmm0,%xmm3 + movdqa 16-128(%rax),%xmm0 + + movdqa %xmm13,%xmm8 + movdqa %xmm11,%xmm6 + pxor 112-128(%rax),%xmm3 + paddd %xmm15,%xmm12 + pslld $5,%xmm8 + pxor %xmm14,%xmm6 + + movdqa %xmm13,%xmm9 + movdqa %xmm2,224-128(%rax) + paddd %xmm2,%xmm12 + pxor %xmm0,%xmm3 + psrld $27,%xmm9 + pxor %xmm10,%xmm6 + movdqa %xmm14,%xmm7 + + pslld $30,%xmm7 + movdqa %xmm3,%xmm5 + por %xmm9,%xmm8 + psrld $31,%xmm5 + paddd %xmm6,%xmm12 + paddd %xmm3,%xmm3 + + psrld $2,%xmm14 + paddd %xmm8,%xmm12 + por %xmm5,%xmm3 + por %xmm7,%xmm14 + pxor %xmm1,%xmm4 + movdqa 32-128(%rax),%xmm1 + + movdqa %xmm12,%xmm8 + movdqa %xmm10,%xmm6 + pxor 128-128(%rax),%xmm4 + paddd %xmm15,%xmm11 + pslld $5,%xmm8 + pxor %xmm13,%xmm6 + + movdqa %xmm12,%xmm9 + movdqa %xmm3,240-128(%rax) + paddd %xmm3,%xmm11 + pxor %xmm1,%xmm4 + psrld $27,%xmm9 + pxor %xmm14,%xmm6 + movdqa %xmm13,%xmm7 + + pslld $30,%xmm7 + movdqa %xmm4,%xmm5 + por %xmm9,%xmm8 + psrld $31,%xmm5 + paddd %xmm6,%xmm11 + paddd %xmm4,%xmm4 + + psrld $2,%xmm13 + paddd %xmm8,%xmm11 + por %xmm5,%xmm4 + por %xmm7,%xmm13 + pxor %xmm2,%xmm0 + movdqa 48-128(%rax),%xmm2 + + movdqa %xmm11,%xmm8 + movdqa %xmm14,%xmm6 + pxor 144-128(%rax),%xmm0 + paddd %xmm15,%xmm10 + pslld $5,%xmm8 + pxor %xmm12,%xmm6 + + movdqa %xmm11,%xmm9 + movdqa %xmm4,0-128(%rax) + paddd %xmm4,%xmm10 + pxor %xmm2,%xmm0 + psrld $27,%xmm9 + pxor %xmm13,%xmm6 + movdqa %xmm12,%xmm7 + + pslld $30,%xmm7 + movdqa %xmm0,%xmm5 + por %xmm9,%xmm8 + psrld $31,%xmm5 + paddd %xmm6,%xmm10 + paddd %xmm0,%xmm0 + + psrld $2,%xmm12 + paddd %xmm8,%xmm10 + por %xmm5,%xmm0 + por %xmm7,%xmm12 + pxor %xmm3,%xmm1 + movdqa 64-128(%rax),%xmm3 + + movdqa %xmm10,%xmm8 + movdqa %xmm13,%xmm6 + pxor 160-128(%rax),%xmm1 + paddd %xmm15,%xmm14 + pslld $5,%xmm8 + pxor %xmm11,%xmm6 + + movdqa %xmm10,%xmm9 + movdqa %xmm0,16-128(%rax) + paddd %xmm0,%xmm14 + pxor %xmm3,%xmm1 + psrld $27,%xmm9 + pxor %xmm12,%xmm6 + movdqa %xmm11,%xmm7 + + pslld $30,%xmm7 + movdqa %xmm1,%xmm5 + por %xmm9,%xmm8 + psrld $31,%xmm5 + paddd %xmm6,%xmm14 + paddd %xmm1,%xmm1 + + psrld $2,%xmm11 + paddd %xmm8,%xmm14 + por %xmm5,%xmm1 + por %xmm7,%xmm11 + pxor %xmm4,%xmm2 + movdqa 80-128(%rax),%xmm4 + + movdqa %xmm14,%xmm8 + movdqa %xmm12,%xmm6 + pxor 176-128(%rax),%xmm2 + paddd %xmm15,%xmm13 + pslld $5,%xmm8 + pxor %xmm10,%xmm6 + + movdqa %xmm14,%xmm9 + movdqa %xmm1,32-128(%rax) + paddd %xmm1,%xmm13 + pxor %xmm4,%xmm2 + psrld $27,%xmm9 + pxor %xmm11,%xmm6 + movdqa %xmm10,%xmm7 + + pslld $30,%xmm7 + movdqa %xmm2,%xmm5 + por %xmm9,%xmm8 + psrld $31,%xmm5 + paddd %xmm6,%xmm13 + paddd %xmm2,%xmm2 + + psrld $2,%xmm10 + paddd %xmm8,%xmm13 + por %xmm5,%xmm2 + por %xmm7,%xmm10 + pxor %xmm0,%xmm3 + movdqa 96-128(%rax),%xmm0 + + movdqa %xmm13,%xmm8 + movdqa %xmm11,%xmm6 + pxor 192-128(%rax),%xmm3 + paddd %xmm15,%xmm12 + pslld $5,%xmm8 + pxor %xmm14,%xmm6 + + movdqa %xmm13,%xmm9 + movdqa %xmm2,48-128(%rax) + paddd %xmm2,%xmm12 + pxor %xmm0,%xmm3 + psrld $27,%xmm9 + pxor %xmm10,%xmm6 + movdqa %xmm14,%xmm7 + + pslld $30,%xmm7 + movdqa %xmm3,%xmm5 + por %xmm9,%xmm8 + psrld $31,%xmm5 + paddd %xmm6,%xmm12 + paddd %xmm3,%xmm3 + + psrld $2,%xmm14 + paddd %xmm8,%xmm12 + por %xmm5,%xmm3 + por %xmm7,%xmm14 + pxor %xmm1,%xmm4 + movdqa 112-128(%rax),%xmm1 + + movdqa %xmm12,%xmm8 + movdqa %xmm10,%xmm6 + pxor 208-128(%rax),%xmm4 + paddd %xmm15,%xmm11 + pslld $5,%xmm8 + pxor %xmm13,%xmm6 + + movdqa %xmm12,%xmm9 + movdqa %xmm3,64-128(%rax) + paddd %xmm3,%xmm11 + pxor %xmm1,%xmm4 + psrld $27,%xmm9 + pxor %xmm14,%xmm6 + movdqa %xmm13,%xmm7 + + pslld $30,%xmm7 + movdqa %xmm4,%xmm5 + por %xmm9,%xmm8 + psrld $31,%xmm5 + paddd %xmm6,%xmm11 + paddd %xmm4,%xmm4 + + psrld $2,%xmm13 + paddd %xmm8,%xmm11 + por %xmm5,%xmm4 + por %xmm7,%xmm13 + pxor %xmm2,%xmm0 + movdqa 128-128(%rax),%xmm2 + + movdqa %xmm11,%xmm8 + movdqa %xmm14,%xmm6 + pxor 224-128(%rax),%xmm0 + paddd %xmm15,%xmm10 + pslld $5,%xmm8 + pxor %xmm12,%xmm6 + + movdqa %xmm11,%xmm9 + movdqa %xmm4,80-128(%rax) + paddd %xmm4,%xmm10 + pxor %xmm2,%xmm0 + psrld $27,%xmm9 + pxor %xmm13,%xmm6 + movdqa %xmm12,%xmm7 + + pslld $30,%xmm7 + movdqa %xmm0,%xmm5 + por %xmm9,%xmm8 + psrld $31,%xmm5 + paddd %xmm6,%xmm10 + paddd %xmm0,%xmm0 + + psrld $2,%xmm12 + paddd %xmm8,%xmm10 + por %xmm5,%xmm0 + por %xmm7,%xmm12 + pxor %xmm3,%xmm1 + movdqa 144-128(%rax),%xmm3 + + movdqa %xmm10,%xmm8 + movdqa %xmm13,%xmm6 + pxor 240-128(%rax),%xmm1 + paddd %xmm15,%xmm14 + pslld $5,%xmm8 + pxor %xmm11,%xmm6 + + movdqa %xmm10,%xmm9 + movdqa %xmm0,96-128(%rax) + paddd %xmm0,%xmm14 + pxor %xmm3,%xmm1 + psrld $27,%xmm9 + pxor %xmm12,%xmm6 + movdqa %xmm11,%xmm7 + + pslld $30,%xmm7 + movdqa %xmm1,%xmm5 + por %xmm9,%xmm8 + psrld $31,%xmm5 + paddd %xmm6,%xmm14 + paddd %xmm1,%xmm1 + + psrld $2,%xmm11 + paddd %xmm8,%xmm14 + por %xmm5,%xmm1 + por %xmm7,%xmm11 + pxor %xmm4,%xmm2 + movdqa 160-128(%rax),%xmm4 + + movdqa %xmm14,%xmm8 + movdqa %xmm12,%xmm6 + pxor 0-128(%rax),%xmm2 + paddd %xmm15,%xmm13 + pslld $5,%xmm8 + pxor %xmm10,%xmm6 + + movdqa %xmm14,%xmm9 + movdqa %xmm1,112-128(%rax) + paddd %xmm1,%xmm13 + pxor %xmm4,%xmm2 + psrld $27,%xmm9 + pxor %xmm11,%xmm6 + movdqa %xmm10,%xmm7 + + pslld $30,%xmm7 + movdqa %xmm2,%xmm5 + por %xmm9,%xmm8 + psrld $31,%xmm5 + paddd %xmm6,%xmm13 + paddd %xmm2,%xmm2 + + psrld $2,%xmm10 + paddd %xmm8,%xmm13 + por %xmm5,%xmm2 + por %xmm7,%xmm10 + pxor %xmm0,%xmm3 + movdqa 176-128(%rax),%xmm0 + + movdqa %xmm13,%xmm8 + movdqa %xmm11,%xmm6 + pxor 16-128(%rax),%xmm3 + paddd %xmm15,%xmm12 + pslld $5,%xmm8 + pxor %xmm14,%xmm6 + + movdqa %xmm13,%xmm9 + paddd %xmm2,%xmm12 + pxor %xmm0,%xmm3 + psrld $27,%xmm9 + pxor %xmm10,%xmm6 + movdqa %xmm14,%xmm7 + + pslld $30,%xmm7 + movdqa %xmm3,%xmm5 + por %xmm9,%xmm8 + psrld $31,%xmm5 + paddd %xmm6,%xmm12 + paddd %xmm3,%xmm3 + + psrld $2,%xmm14 + paddd %xmm8,%xmm12 + por %xmm5,%xmm3 + por %xmm7,%xmm14 + pxor %xmm1,%xmm4 + movdqa 192-128(%rax),%xmm1 + + movdqa %xmm12,%xmm8 + movdqa %xmm10,%xmm6 + pxor 32-128(%rax),%xmm4 + paddd %xmm15,%xmm11 + pslld $5,%xmm8 + pxor %xmm13,%xmm6 + + movdqa %xmm12,%xmm9 + paddd %xmm3,%xmm11 + pxor %xmm1,%xmm4 + psrld $27,%xmm9 + pxor %xmm14,%xmm6 + movdqa %xmm13,%xmm7 + + pslld $30,%xmm7 + movdqa %xmm4,%xmm5 + por %xmm9,%xmm8 + psrld $31,%xmm5 + paddd %xmm6,%xmm11 + paddd %xmm4,%xmm4 + + psrld $2,%xmm13 + paddd %xmm8,%xmm11 + por %xmm5,%xmm4 + por %xmm7,%xmm13 + pxor %xmm2,%xmm0 + movdqa 208-128(%rax),%xmm2 + + movdqa %xmm11,%xmm8 + movdqa %xmm14,%xmm6 + pxor 48-128(%rax),%xmm0 + paddd %xmm15,%xmm10 + pslld $5,%xmm8 + pxor %xmm12,%xmm6 + + movdqa %xmm11,%xmm9 + paddd %xmm4,%xmm10 + pxor %xmm2,%xmm0 + psrld $27,%xmm9 + pxor %xmm13,%xmm6 + movdqa %xmm12,%xmm7 + + pslld $30,%xmm7 + movdqa %xmm0,%xmm5 + por %xmm9,%xmm8 + psrld $31,%xmm5 + paddd %xmm6,%xmm10 + paddd %xmm0,%xmm0 + + psrld $2,%xmm12 + paddd %xmm8,%xmm10 + por %xmm5,%xmm0 + por %xmm7,%xmm12 + pxor %xmm3,%xmm1 + movdqa 224-128(%rax),%xmm3 + + movdqa %xmm10,%xmm8 + movdqa %xmm13,%xmm6 + pxor 64-128(%rax),%xmm1 + paddd %xmm15,%xmm14 + pslld $5,%xmm8 + pxor %xmm11,%xmm6 + + movdqa %xmm10,%xmm9 + paddd %xmm0,%xmm14 + pxor %xmm3,%xmm1 + psrld $27,%xmm9 + pxor %xmm12,%xmm6 + movdqa %xmm11,%xmm7 + + pslld $30,%xmm7 + movdqa %xmm1,%xmm5 + por %xmm9,%xmm8 + psrld $31,%xmm5 + paddd %xmm6,%xmm14 + paddd %xmm1,%xmm1 + + psrld $2,%xmm11 + paddd %xmm8,%xmm14 + por %xmm5,%xmm1 + por %xmm7,%xmm11 + pxor %xmm4,%xmm2 + movdqa 240-128(%rax),%xmm4 + + movdqa %xmm14,%xmm8 + movdqa %xmm12,%xmm6 + pxor 80-128(%rax),%xmm2 + paddd %xmm15,%xmm13 + pslld $5,%xmm8 + pxor %xmm10,%xmm6 + + movdqa %xmm14,%xmm9 + paddd %xmm1,%xmm13 + pxor %xmm4,%xmm2 + psrld $27,%xmm9 + pxor %xmm11,%xmm6 + movdqa %xmm10,%xmm7 + + pslld $30,%xmm7 + movdqa %xmm2,%xmm5 + por %xmm9,%xmm8 + psrld $31,%xmm5 + paddd %xmm6,%xmm13 + paddd %xmm2,%xmm2 + + psrld $2,%xmm10 + paddd %xmm8,%xmm13 + por %xmm5,%xmm2 + por %xmm7,%xmm10 + pxor %xmm0,%xmm3 + movdqa 0-128(%rax),%xmm0 + + movdqa %xmm13,%xmm8 + movdqa %xmm11,%xmm6 + pxor 96-128(%rax),%xmm3 + paddd %xmm15,%xmm12 + pslld $5,%xmm8 + pxor %xmm14,%xmm6 + + movdqa %xmm13,%xmm9 + paddd %xmm2,%xmm12 + pxor %xmm0,%xmm3 + psrld $27,%xmm9 + pxor %xmm10,%xmm6 + movdqa %xmm14,%xmm7 + + pslld $30,%xmm7 + movdqa %xmm3,%xmm5 + por %xmm9,%xmm8 + psrld $31,%xmm5 + paddd %xmm6,%xmm12 + paddd %xmm3,%xmm3 + + psrld $2,%xmm14 + paddd %xmm8,%xmm12 + por %xmm5,%xmm3 + por %xmm7,%xmm14 + pxor %xmm1,%xmm4 + movdqa 16-128(%rax),%xmm1 + + movdqa %xmm12,%xmm8 + movdqa %xmm10,%xmm6 + pxor 112-128(%rax),%xmm4 + paddd %xmm15,%xmm11 + pslld $5,%xmm8 + pxor %xmm13,%xmm6 + + movdqa %xmm12,%xmm9 + paddd %xmm3,%xmm11 + pxor %xmm1,%xmm4 + psrld $27,%xmm9 + pxor %xmm14,%xmm6 + movdqa %xmm13,%xmm7 + + pslld $30,%xmm7 + movdqa %xmm4,%xmm5 + por %xmm9,%xmm8 + psrld $31,%xmm5 + paddd %xmm6,%xmm11 + paddd %xmm4,%xmm4 + + psrld $2,%xmm13 + paddd %xmm8,%xmm11 + por %xmm5,%xmm4 + por %xmm7,%xmm13 + movdqa %xmm11,%xmm8 + paddd %xmm15,%xmm10 + movdqa %xmm14,%xmm6 + pslld $5,%xmm8 + pxor %xmm12,%xmm6 + + movdqa %xmm11,%xmm9 + paddd %xmm4,%xmm10 + psrld $27,%xmm9 + movdqa %xmm12,%xmm7 + pxor %xmm13,%xmm6 + + pslld $30,%xmm7 + por %xmm9,%xmm8 + paddd %xmm6,%xmm10 + + psrld $2,%xmm12 + paddd %xmm8,%xmm10 + por %xmm7,%xmm12 + movdqa (%rbx),%xmm0 + movl $1,%ecx + cmpl 0(%rbx),%ecx + pxor %xmm8,%xmm8 + cmovgeq %rbp,%r8 + cmpl 4(%rbx),%ecx + movdqa %xmm0,%xmm1 + cmovgeq %rbp,%r9 + cmpl 8(%rbx),%ecx + pcmpgtd %xmm8,%xmm1 + cmovgeq %rbp,%r10 + cmpl 12(%rbx),%ecx + paddd %xmm1,%xmm0 + cmovgeq %rbp,%r11 + + movdqu 0(%rdi),%xmm6 + pand %xmm1,%xmm10 + movdqu 32(%rdi),%xmm7 + pand %xmm1,%xmm11 + paddd %xmm6,%xmm10 + movdqu 64(%rdi),%xmm8 + pand %xmm1,%xmm12 + paddd %xmm7,%xmm11 + movdqu 96(%rdi),%xmm9 + pand %xmm1,%xmm13 + paddd %xmm8,%xmm12 + movdqu 128(%rdi),%xmm5 + pand %xmm1,%xmm14 + movdqu %xmm10,0(%rdi) + paddd %xmm9,%xmm13 + movdqu %xmm11,32(%rdi) + paddd %xmm5,%xmm14 + movdqu %xmm12,64(%rdi) + movdqu %xmm13,96(%rdi) + movdqu %xmm14,128(%rdi) + + movdqa %xmm0,(%rbx) + movdqa 96(%rbp),%xmm5 + movdqa -32(%rbp),%xmm15 + decl %edx + jnz .Loop + + movl 280(%rsp),%edx + leaq 16(%rdi),%rdi + leaq 64(%rsi),%rsi + decl %edx + jnz .Loop_grande + +.Ldone: + movq 272(%rsp),%rax + movq -16(%rax),%rbp + movq -8(%rax),%rbx + leaq (%rax),%rsp +.Lepilogue: + .byte 0xf3,0xc3 +.size sha1_multi_block,.-sha1_multi_block +.type sha1_multi_block_shaext,@function +.align 32 +sha1_multi_block_shaext: +_shaext_shortcut: + movq %rsp,%rax + pushq %rbx + pushq %rbp + subq $288,%rsp + shll $1,%edx + andq $-256,%rsp + leaq 64(%rdi),%rdi + movq %rax,272(%rsp) +.Lbody_shaext: + leaq 256(%rsp),%rbx + movdqa K_XX_XX+128(%rip),%xmm3 + +.Loop_grande_shaext: + movl %edx,280(%rsp) + xorl %edx,%edx + movq 0(%rsi),%r8 + movl 8(%rsi),%ecx + cmpl %edx,%ecx + cmovgl %ecx,%edx + testl %ecx,%ecx + movl %ecx,0(%rbx) + cmovleq %rsp,%r8 + movq 16(%rsi),%r9 + movl 24(%rsi),%ecx + cmpl %edx,%ecx + cmovgl %ecx,%edx + testl %ecx,%ecx + movl %ecx,4(%rbx) + cmovleq %rsp,%r9 + testl %edx,%edx + jz .Ldone_shaext + + movq 0-64(%rdi),%xmm0 + movq 32-64(%rdi),%xmm4 + movq 64-64(%rdi),%xmm5 + movq 96-64(%rdi),%xmm6 + movq 128-64(%rdi),%xmm7 + + punpckldq %xmm4,%xmm0 + punpckldq %xmm6,%xmm5 + + movdqa %xmm0,%xmm8 + punpcklqdq %xmm5,%xmm0 + punpckhqdq %xmm5,%xmm8 + + pshufd $63,%xmm7,%xmm1 + pshufd $127,%xmm7,%xmm9 + pshufd $27,%xmm0,%xmm0 + pshufd $27,%xmm8,%xmm8 + jmp .Loop_shaext + +.align 32 +.Loop_shaext: + movdqu 0(%r8),%xmm4 + movdqu 0(%r9),%xmm11 + movdqu 16(%r8),%xmm5 + movdqu 16(%r9),%xmm12 + movdqu 32(%r8),%xmm6 +.byte 102,15,56,0,227 + movdqu 32(%r9),%xmm13 +.byte 102,68,15,56,0,219 + movdqu 48(%r8),%xmm7 + leaq 64(%r8),%r8 +.byte 102,15,56,0,235 + movdqu 48(%r9),%xmm14 + leaq 64(%r9),%r9 +.byte 102,68,15,56,0,227 + + movdqa %xmm1,80(%rsp) + paddd %xmm4,%xmm1 + movdqa %xmm9,112(%rsp) + paddd %xmm11,%xmm9 + movdqa %xmm0,64(%rsp) + movdqa %xmm0,%xmm2 + movdqa %xmm8,96(%rsp) + movdqa %xmm8,%xmm10 +.byte 15,58,204,193,0 +.byte 15,56,200,213 +.byte 69,15,58,204,193,0 +.byte 69,15,56,200,212 +.byte 102,15,56,0,243 + prefetcht0 127(%r8) +.byte 15,56,201,229 +.byte 102,68,15,56,0,235 + prefetcht0 127(%r9) +.byte 69,15,56,201,220 + +.byte 102,15,56,0,251 + movdqa %xmm0,%xmm1 +.byte 102,68,15,56,0,243 + movdqa %xmm8,%xmm9 +.byte 15,58,204,194,0 +.byte 15,56,200,206 +.byte 69,15,58,204,194,0 +.byte 69,15,56,200,205 + pxor %xmm6,%xmm4 +.byte 15,56,201,238 + pxor %xmm13,%xmm11 +.byte 69,15,56,201,229 + movdqa %xmm0,%xmm2 + movdqa %xmm8,%xmm10 +.byte 15,58,204,193,0 +.byte 15,56,200,215 +.byte 69,15,58,204,193,0 +.byte 69,15,56,200,214 +.byte 15,56,202,231 +.byte 69,15,56,202,222 + pxor %xmm7,%xmm5 +.byte 15,56,201,247 + pxor %xmm14,%xmm12 +.byte 69,15,56,201,238 + movdqa %xmm0,%xmm1 + movdqa %xmm8,%xmm9 +.byte 15,58,204,194,0 +.byte 15,56,200,204 +.byte 69,15,58,204,194,0 +.byte 69,15,56,200,203 +.byte 15,56,202,236 +.byte 69,15,56,202,227 + pxor %xmm4,%xmm6 +.byte 15,56,201,252 + pxor %xmm11,%xmm13 +.byte 69,15,56,201,243 + movdqa %xmm0,%xmm2 + movdqa %xmm8,%xmm10 +.byte 15,58,204,193,0 +.byte 15,56,200,213 +.byte 69,15,58,204,193,0 +.byte 69,15,56,200,212 +.byte 15,56,202,245 +.byte 69,15,56,202,236 + pxor %xmm5,%xmm7 +.byte 15,56,201,229 + pxor %xmm12,%xmm14 +.byte 69,15,56,201,220 + movdqa %xmm0,%xmm1 + movdqa %xmm8,%xmm9 +.byte 15,58,204,194,1 +.byte 15,56,200,206 +.byte 69,15,58,204,194,1 +.byte 69,15,56,200,205 +.byte 15,56,202,254 +.byte 69,15,56,202,245 + pxor %xmm6,%xmm4 +.byte 15,56,201,238 + pxor %xmm13,%xmm11 +.byte 69,15,56,201,229 + movdqa %xmm0,%xmm2 + movdqa %xmm8,%xmm10 +.byte 15,58,204,193,1 +.byte 15,56,200,215 +.byte 69,15,58,204,193,1 +.byte 69,15,56,200,214 +.byte 15,56,202,231 +.byte 69,15,56,202,222 + pxor %xmm7,%xmm5 +.byte 15,56,201,247 + pxor %xmm14,%xmm12 +.byte 69,15,56,201,238 + movdqa %xmm0,%xmm1 + movdqa %xmm8,%xmm9 +.byte 15,58,204,194,1 +.byte 15,56,200,204 +.byte 69,15,58,204,194,1 +.byte 69,15,56,200,203 +.byte 15,56,202,236 +.byte 69,15,56,202,227 + pxor %xmm4,%xmm6 +.byte 15,56,201,252 + pxor %xmm11,%xmm13 +.byte 69,15,56,201,243 + movdqa %xmm0,%xmm2 + movdqa %xmm8,%xmm10 +.byte 15,58,204,193,1 +.byte 15,56,200,213 +.byte 69,15,58,204,193,1 +.byte 69,15,56,200,212 +.byte 15,56,202,245 +.byte 69,15,56,202,236 + pxor %xmm5,%xmm7 +.byte 15,56,201,229 + pxor %xmm12,%xmm14 +.byte 69,15,56,201,220 + movdqa %xmm0,%xmm1 + movdqa %xmm8,%xmm9 +.byte 15,58,204,194,1 +.byte 15,56,200,206 +.byte 69,15,58,204,194,1 +.byte 69,15,56,200,205 +.byte 15,56,202,254 +.byte 69,15,56,202,245 + pxor %xmm6,%xmm4 +.byte 15,56,201,238 + pxor %xmm13,%xmm11 +.byte 69,15,56,201,229 + movdqa %xmm0,%xmm2 + movdqa %xmm8,%xmm10 +.byte 15,58,204,193,2 +.byte 15,56,200,215 +.byte 69,15,58,204,193,2 +.byte 69,15,56,200,214 +.byte 15,56,202,231 +.byte 69,15,56,202,222 + pxor %xmm7,%xmm5 +.byte 15,56,201,247 + pxor %xmm14,%xmm12 +.byte 69,15,56,201,238 + movdqa %xmm0,%xmm1 + movdqa %xmm8,%xmm9 +.byte 15,58,204,194,2 +.byte 15,56,200,204 +.byte 69,15,58,204,194,2 +.byte 69,15,56,200,203 +.byte 15,56,202,236 +.byte 69,15,56,202,227 + pxor %xmm4,%xmm6 +.byte 15,56,201,252 + pxor %xmm11,%xmm13 +.byte 69,15,56,201,243 + movdqa %xmm0,%xmm2 + movdqa %xmm8,%xmm10 +.byte 15,58,204,193,2 +.byte 15,56,200,213 +.byte 69,15,58,204,193,2 +.byte 69,15,56,200,212 +.byte 15,56,202,245 +.byte 69,15,56,202,236 + pxor %xmm5,%xmm7 +.byte 15,56,201,229 + pxor %xmm12,%xmm14 +.byte 69,15,56,201,220 + movdqa %xmm0,%xmm1 + movdqa %xmm8,%xmm9 +.byte 15,58,204,194,2 +.byte 15,56,200,206 +.byte 69,15,58,204,194,2 +.byte 69,15,56,200,205 +.byte 15,56,202,254 +.byte 69,15,56,202,245 + pxor %xmm6,%xmm4 +.byte 15,56,201,238 + pxor %xmm13,%xmm11 +.byte 69,15,56,201,229 + movdqa %xmm0,%xmm2 + movdqa %xmm8,%xmm10 +.byte 15,58,204,193,2 +.byte 15,56,200,215 +.byte 69,15,58,204,193,2 +.byte 69,15,56,200,214 +.byte 15,56,202,231 +.byte 69,15,56,202,222 + pxor %xmm7,%xmm5 +.byte 15,56,201,247 + pxor %xmm14,%xmm12 +.byte 69,15,56,201,238 + movdqa %xmm0,%xmm1 + movdqa %xmm8,%xmm9 +.byte 15,58,204,194,3 +.byte 15,56,200,204 +.byte 69,15,58,204,194,3 +.byte 69,15,56,200,203 +.byte 15,56,202,236 +.byte 69,15,56,202,227 + pxor %xmm4,%xmm6 +.byte 15,56,201,252 + pxor %xmm11,%xmm13 +.byte 69,15,56,201,243 + movdqa %xmm0,%xmm2 + movdqa %xmm8,%xmm10 +.byte 15,58,204,193,3 +.byte 15,56,200,213 +.byte 69,15,58,204,193,3 +.byte 69,15,56,200,212 +.byte 15,56,202,245 +.byte 69,15,56,202,236 + pxor %xmm5,%xmm7 + pxor %xmm12,%xmm14 + + movl $1,%ecx + pxor %xmm4,%xmm4 + cmpl 0(%rbx),%ecx + cmovgeq %rsp,%r8 + + movdqa %xmm0,%xmm1 + movdqa %xmm8,%xmm9 +.byte 15,58,204,194,3 +.byte 15,56,200,206 +.byte 69,15,58,204,194,3 +.byte 69,15,56,200,205 +.byte 15,56,202,254 +.byte 69,15,56,202,245 + + cmpl 4(%rbx),%ecx + cmovgeq %rsp,%r9 + movq (%rbx),%xmm6 + + movdqa %xmm0,%xmm2 + movdqa %xmm8,%xmm10 +.byte 15,58,204,193,3 +.byte 15,56,200,215 +.byte 69,15,58,204,193,3 +.byte 69,15,56,200,214 + + pshufd $0,%xmm6,%xmm11 + pshufd $85,%xmm6,%xmm12 + movdqa %xmm6,%xmm7 + pcmpgtd %xmm4,%xmm11 + pcmpgtd %xmm4,%xmm12 + + movdqa %xmm0,%xmm1 + movdqa %xmm8,%xmm9 +.byte 15,58,204,194,3 +.byte 15,56,200,204 +.byte 69,15,58,204,194,3 +.byte 68,15,56,200,204 + + pcmpgtd %xmm4,%xmm7 + pand %xmm11,%xmm0 + pand %xmm11,%xmm1 + pand %xmm12,%xmm8 + pand %xmm12,%xmm9 + paddd %xmm7,%xmm6 + + paddd 64(%rsp),%xmm0 + paddd 80(%rsp),%xmm1 + paddd 96(%rsp),%xmm8 + paddd 112(%rsp),%xmm9 + + movq %xmm6,(%rbx) + decl %edx + jnz .Loop_shaext + + movl 280(%rsp),%edx + + pshufd $27,%xmm0,%xmm0 + pshufd $27,%xmm8,%xmm8 + + movdqa %xmm0,%xmm6 + punpckldq %xmm8,%xmm0 + punpckhdq %xmm8,%xmm6 + punpckhdq %xmm9,%xmm1 + movq %xmm0,0-64(%rdi) + psrldq $8,%xmm0 + movq %xmm6,64-64(%rdi) + psrldq $8,%xmm6 + movq %xmm0,32-64(%rdi) + psrldq $8,%xmm1 + movq %xmm6,96-64(%rdi) + movq %xmm1,128-64(%rdi) + + leaq 8(%rdi),%rdi + leaq 32(%rsi),%rsi + decl %edx + jnz .Loop_grande_shaext + +.Ldone_shaext: + + movq -16(%rax),%rbp + movq -8(%rax),%rbx + leaq (%rax),%rsp +.Lepilogue_shaext: + .byte 0xf3,0xc3 +.size sha1_multi_block_shaext,.-sha1_multi_block_shaext + +.align 256 +.long 0x5a827999,0x5a827999,0x5a827999,0x5a827999 +.long 0x5a827999,0x5a827999,0x5a827999,0x5a827999 +K_XX_XX: +.long 0x6ed9eba1,0x6ed9eba1,0x6ed9eba1,0x6ed9eba1 +.long 0x6ed9eba1,0x6ed9eba1,0x6ed9eba1,0x6ed9eba1 +.long 0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc +.long 0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc +.long 0xca62c1d6,0xca62c1d6,0xca62c1d6,0xca62c1d6 +.long 0xca62c1d6,0xca62c1d6,0xca62c1d6,0xca62c1d6 +.long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f +.long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f +.byte 0xf,0xe,0xd,0xc,0xb,0xa,0x9,0x8,0x7,0x6,0x5,0x4,0x3,0x2,0x1,0x0 +.byte 83,72,65,49,32,109,117,108,116,105,45,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 diff --git a/deps/openssl/asm_obsolete/x64-elf-gas/sha/sha1-x86_64.s b/deps/openssl/asm_obsolete/x64-elf-gas/sha/sha1-x86_64.s new file mode 100644 index 00000000000000..38b7df19704ba2 --- /dev/null +++ b/deps/openssl/asm_obsolete/x64-elf-gas/sha/sha1-x86_64.s @@ -0,0 +1,2591 @@ +.text + + +.globl sha1_block_data_order +.type sha1_block_data_order,@function +.align 16 +sha1_block_data_order: + movl OPENSSL_ia32cap_P+0(%rip),%r9d + movl OPENSSL_ia32cap_P+4(%rip),%r8d + movl OPENSSL_ia32cap_P+8(%rip),%r10d + testl $512,%r8d + jz .Lialu + testl $536870912,%r10d + jnz _shaext_shortcut + jmp _ssse3_shortcut + +.align 16 +.Lialu: + movq %rsp,%rax + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + movq %rdi,%r8 + subq $72,%rsp + movq %rsi,%r9 + andq $-64,%rsp + movq %rdx,%r10 + movq %rax,64(%rsp) +.Lprologue: + + movl 0(%r8),%esi + movl 4(%r8),%edi + movl 8(%r8),%r11d + movl 12(%r8),%r12d + movl 16(%r8),%r13d + jmp .Lloop + +.align 16 +.Lloop: + movl 0(%r9),%edx + bswapl %edx + movl 4(%r9),%ebp + movl %r12d,%eax + movl %edx,0(%rsp) + movl %esi,%ecx + bswapl %ebp + xorl %r11d,%eax + roll $5,%ecx + andl %edi,%eax + leal 1518500249(%rdx,%r13,1),%r13d + addl %ecx,%r13d + xorl %r12d,%eax + roll $30,%edi + addl %eax,%r13d + movl 8(%r9),%r14d + movl %r11d,%eax + movl %ebp,4(%rsp) + movl %r13d,%ecx + bswapl %r14d + xorl %edi,%eax + roll $5,%ecx + andl %esi,%eax + leal 1518500249(%rbp,%r12,1),%r12d + addl %ecx,%r12d + xorl %r11d,%eax + roll $30,%esi + addl %eax,%r12d + movl 12(%r9),%edx + movl %edi,%eax + movl %r14d,8(%rsp) + movl %r12d,%ecx + bswapl %edx + xorl %esi,%eax + roll $5,%ecx + andl %r13d,%eax + leal 1518500249(%r14,%r11,1),%r11d + addl %ecx,%r11d + xorl %edi,%eax + roll $30,%r13d + addl %eax,%r11d + movl 16(%r9),%ebp + movl %esi,%eax + movl %edx,12(%rsp) + movl %r11d,%ecx + bswapl %ebp + xorl %r13d,%eax + roll $5,%ecx + andl %r12d,%eax + leal 1518500249(%rdx,%rdi,1),%edi + addl %ecx,%edi + xorl %esi,%eax + roll $30,%r12d + addl %eax,%edi + movl 20(%r9),%r14d + movl %r13d,%eax + movl %ebp,16(%rsp) + movl %edi,%ecx + bswapl %r14d + xorl %r12d,%eax + roll $5,%ecx + andl %r11d,%eax + leal 1518500249(%rbp,%rsi,1),%esi + addl %ecx,%esi + xorl %r13d,%eax + roll $30,%r11d + addl %eax,%esi + movl 24(%r9),%edx + movl %r12d,%eax + movl %r14d,20(%rsp) + movl %esi,%ecx + bswapl %edx + xorl %r11d,%eax + roll $5,%ecx + andl %edi,%eax + leal 1518500249(%r14,%r13,1),%r13d + addl %ecx,%r13d + xorl %r12d,%eax + roll $30,%edi + addl %eax,%r13d + movl 28(%r9),%ebp + movl %r11d,%eax + movl %edx,24(%rsp) + movl %r13d,%ecx + bswapl %ebp + xorl %edi,%eax + roll $5,%ecx + andl %esi,%eax + leal 1518500249(%rdx,%r12,1),%r12d + addl %ecx,%r12d + xorl %r11d,%eax + roll $30,%esi + addl %eax,%r12d + movl 32(%r9),%r14d + movl %edi,%eax + movl %ebp,28(%rsp) + movl %r12d,%ecx + bswapl %r14d + xorl %esi,%eax + roll $5,%ecx + andl %r13d,%eax + leal 1518500249(%rbp,%r11,1),%r11d + addl %ecx,%r11d + xorl %edi,%eax + roll $30,%r13d + addl %eax,%r11d + movl 36(%r9),%edx + movl %esi,%eax + movl %r14d,32(%rsp) + movl %r11d,%ecx + bswapl %edx + xorl %r13d,%eax + roll $5,%ecx + andl %r12d,%eax + leal 1518500249(%r14,%rdi,1),%edi + addl %ecx,%edi + xorl %esi,%eax + roll $30,%r12d + addl %eax,%edi + movl 40(%r9),%ebp + movl %r13d,%eax + movl %edx,36(%rsp) + movl %edi,%ecx + bswapl %ebp + xorl %r12d,%eax + roll $5,%ecx + andl %r11d,%eax + leal 1518500249(%rdx,%rsi,1),%esi + addl %ecx,%esi + xorl %r13d,%eax + roll $30,%r11d + addl %eax,%esi + movl 44(%r9),%r14d + movl %r12d,%eax + movl %ebp,40(%rsp) + movl %esi,%ecx + bswapl %r14d + xorl %r11d,%eax + roll $5,%ecx + andl %edi,%eax + leal 1518500249(%rbp,%r13,1),%r13d + addl %ecx,%r13d + xorl %r12d,%eax + roll $30,%edi + addl %eax,%r13d + movl 48(%r9),%edx + movl %r11d,%eax + movl %r14d,44(%rsp) + movl %r13d,%ecx + bswapl %edx + xorl %edi,%eax + roll $5,%ecx + andl %esi,%eax + leal 1518500249(%r14,%r12,1),%r12d + addl %ecx,%r12d + xorl %r11d,%eax + roll $30,%esi + addl %eax,%r12d + movl 52(%r9),%ebp + movl %edi,%eax + movl %edx,48(%rsp) + movl %r12d,%ecx + bswapl %ebp + xorl %esi,%eax + roll $5,%ecx + andl %r13d,%eax + leal 1518500249(%rdx,%r11,1),%r11d + addl %ecx,%r11d + xorl %edi,%eax + roll $30,%r13d + addl %eax,%r11d + movl 56(%r9),%r14d + movl %esi,%eax + movl %ebp,52(%rsp) + movl %r11d,%ecx + bswapl %r14d + xorl %r13d,%eax + roll $5,%ecx + andl %r12d,%eax + leal 1518500249(%rbp,%rdi,1),%edi + addl %ecx,%edi + xorl %esi,%eax + roll $30,%r12d + addl %eax,%edi + movl 60(%r9),%edx + movl %r13d,%eax + movl %r14d,56(%rsp) + movl %edi,%ecx + bswapl %edx + xorl %r12d,%eax + roll $5,%ecx + andl %r11d,%eax + leal 1518500249(%r14,%rsi,1),%esi + addl %ecx,%esi + xorl %r13d,%eax + roll $30,%r11d + addl %eax,%esi + xorl 0(%rsp),%ebp + movl %r12d,%eax + movl %edx,60(%rsp) + movl %esi,%ecx + xorl 8(%rsp),%ebp + xorl %r11d,%eax + roll $5,%ecx + xorl 32(%rsp),%ebp + andl %edi,%eax + leal 1518500249(%rdx,%r13,1),%r13d + roll $30,%edi + xorl %r12d,%eax + addl %ecx,%r13d + roll $1,%ebp + addl %eax,%r13d + xorl 4(%rsp),%r14d + movl %r11d,%eax + movl %ebp,0(%rsp) + movl %r13d,%ecx + xorl 12(%rsp),%r14d + xorl %edi,%eax + roll $5,%ecx + xorl 36(%rsp),%r14d + andl %esi,%eax + leal 1518500249(%rbp,%r12,1),%r12d + roll $30,%esi + xorl %r11d,%eax + addl %ecx,%r12d + roll $1,%r14d + addl %eax,%r12d + xorl 8(%rsp),%edx + movl %edi,%eax + movl %r14d,4(%rsp) + movl %r12d,%ecx + xorl 16(%rsp),%edx + xorl %esi,%eax + roll $5,%ecx + xorl 40(%rsp),%edx + andl %r13d,%eax + leal 1518500249(%r14,%r11,1),%r11d + roll $30,%r13d + xorl %edi,%eax + addl %ecx,%r11d + roll $1,%edx + addl %eax,%r11d + xorl 12(%rsp),%ebp + movl %esi,%eax + movl %edx,8(%rsp) + movl %r11d,%ecx + xorl 20(%rsp),%ebp + xorl %r13d,%eax + roll $5,%ecx + xorl 44(%rsp),%ebp + andl %r12d,%eax + leal 1518500249(%rdx,%rdi,1),%edi + roll $30,%r12d + xorl %esi,%eax + addl %ecx,%edi + roll $1,%ebp + addl %eax,%edi + xorl 16(%rsp),%r14d + movl %r13d,%eax + movl %ebp,12(%rsp) + movl %edi,%ecx + xorl 24(%rsp),%r14d + xorl %r12d,%eax + roll $5,%ecx + xorl 48(%rsp),%r14d + andl %r11d,%eax + leal 1518500249(%rbp,%rsi,1),%esi + roll $30,%r11d + xorl %r13d,%eax + addl %ecx,%esi + roll $1,%r14d + addl %eax,%esi + xorl 20(%rsp),%edx + movl %edi,%eax + movl %r14d,16(%rsp) + movl %esi,%ecx + xorl 28(%rsp),%edx + xorl %r12d,%eax + roll $5,%ecx + xorl 52(%rsp),%edx + leal 1859775393(%r14,%r13,1),%r13d + xorl %r11d,%eax + addl %ecx,%r13d + roll $30,%edi + addl %eax,%r13d + roll $1,%edx + xorl 24(%rsp),%ebp + movl %esi,%eax + movl %edx,20(%rsp) + movl %r13d,%ecx + xorl 32(%rsp),%ebp + xorl %r11d,%eax + roll $5,%ecx + xorl 56(%rsp),%ebp + leal 1859775393(%rdx,%r12,1),%r12d + xorl %edi,%eax + addl %ecx,%r12d + roll $30,%esi + addl %eax,%r12d + roll $1,%ebp + xorl 28(%rsp),%r14d + movl %r13d,%eax + movl %ebp,24(%rsp) + movl %r12d,%ecx + xorl 36(%rsp),%r14d + xorl %edi,%eax + roll $5,%ecx + xorl 60(%rsp),%r14d + leal 1859775393(%rbp,%r11,1),%r11d + xorl %esi,%eax + addl %ecx,%r11d + roll $30,%r13d + addl %eax,%r11d + roll $1,%r14d + xorl 32(%rsp),%edx + movl %r12d,%eax + movl %r14d,28(%rsp) + movl %r11d,%ecx + xorl 40(%rsp),%edx + xorl %esi,%eax + roll $5,%ecx + xorl 0(%rsp),%edx + leal 1859775393(%r14,%rdi,1),%edi + xorl %r13d,%eax + addl %ecx,%edi + roll $30,%r12d + addl %eax,%edi + roll $1,%edx + xorl 36(%rsp),%ebp + movl %r11d,%eax + movl %edx,32(%rsp) + movl %edi,%ecx + xorl 44(%rsp),%ebp + xorl %r13d,%eax + roll $5,%ecx + xorl 4(%rsp),%ebp + leal 1859775393(%rdx,%rsi,1),%esi + xorl %r12d,%eax + addl %ecx,%esi + roll $30,%r11d + addl %eax,%esi + roll $1,%ebp + xorl 40(%rsp),%r14d + movl %edi,%eax + movl %ebp,36(%rsp) + movl %esi,%ecx + xorl 48(%rsp),%r14d + xorl %r12d,%eax + roll $5,%ecx + xorl 8(%rsp),%r14d + leal 1859775393(%rbp,%r13,1),%r13d + xorl %r11d,%eax + addl %ecx,%r13d + roll $30,%edi + addl %eax,%r13d + roll $1,%r14d + xorl 44(%rsp),%edx + movl %esi,%eax + movl %r14d,40(%rsp) + movl %r13d,%ecx + xorl 52(%rsp),%edx + xorl %r11d,%eax + roll $5,%ecx + xorl 12(%rsp),%edx + leal 1859775393(%r14,%r12,1),%r12d + xorl %edi,%eax + addl %ecx,%r12d + roll $30,%esi + addl %eax,%r12d + roll $1,%edx + xorl 48(%rsp),%ebp + movl %r13d,%eax + movl %edx,44(%rsp) + movl %r12d,%ecx + xorl 56(%rsp),%ebp + xorl %edi,%eax + roll $5,%ecx + xorl 16(%rsp),%ebp + leal 1859775393(%rdx,%r11,1),%r11d + xorl %esi,%eax + addl %ecx,%r11d + roll $30,%r13d + addl %eax,%r11d + roll $1,%ebp + xorl 52(%rsp),%r14d + movl %r12d,%eax + movl %ebp,48(%rsp) + movl %r11d,%ecx + xorl 60(%rsp),%r14d + xorl %esi,%eax + roll $5,%ecx + xorl 20(%rsp),%r14d + leal 1859775393(%rbp,%rdi,1),%edi + xorl %r13d,%eax + addl %ecx,%edi + roll $30,%r12d + addl %eax,%edi + roll $1,%r14d + xorl 56(%rsp),%edx + movl %r11d,%eax + movl %r14d,52(%rsp) + movl %edi,%ecx + xorl 0(%rsp),%edx + xorl %r13d,%eax + roll $5,%ecx + xorl 24(%rsp),%edx + leal 1859775393(%r14,%rsi,1),%esi + xorl %r12d,%eax + addl %ecx,%esi + roll $30,%r11d + addl %eax,%esi + roll $1,%edx + xorl 60(%rsp),%ebp + movl %edi,%eax + movl %edx,56(%rsp) + movl %esi,%ecx + xorl 4(%rsp),%ebp + xorl %r12d,%eax + roll $5,%ecx + xorl 28(%rsp),%ebp + leal 1859775393(%rdx,%r13,1),%r13d + xorl %r11d,%eax + addl %ecx,%r13d + roll $30,%edi + addl %eax,%r13d + roll $1,%ebp + xorl 0(%rsp),%r14d + movl %esi,%eax + movl %ebp,60(%rsp) + movl %r13d,%ecx + xorl 8(%rsp),%r14d + xorl %r11d,%eax + roll $5,%ecx + xorl 32(%rsp),%r14d + leal 1859775393(%rbp,%r12,1),%r12d + xorl %edi,%eax + addl %ecx,%r12d + roll $30,%esi + addl %eax,%r12d + roll $1,%r14d + xorl 4(%rsp),%edx + movl %r13d,%eax + movl %r14d,0(%rsp) + movl %r12d,%ecx + xorl 12(%rsp),%edx + xorl %edi,%eax + roll $5,%ecx + xorl 36(%rsp),%edx + leal 1859775393(%r14,%r11,1),%r11d + xorl %esi,%eax + addl %ecx,%r11d + roll $30,%r13d + addl %eax,%r11d + roll $1,%edx + xorl 8(%rsp),%ebp + movl %r12d,%eax + movl %edx,4(%rsp) + movl %r11d,%ecx + xorl 16(%rsp),%ebp + xorl %esi,%eax + roll $5,%ecx + xorl 40(%rsp),%ebp + leal 1859775393(%rdx,%rdi,1),%edi + xorl %r13d,%eax + addl %ecx,%edi + roll $30,%r12d + addl %eax,%edi + roll $1,%ebp + xorl 12(%rsp),%r14d + movl %r11d,%eax + movl %ebp,8(%rsp) + movl %edi,%ecx + xorl 20(%rsp),%r14d + xorl %r13d,%eax + roll $5,%ecx + xorl 44(%rsp),%r14d + leal 1859775393(%rbp,%rsi,1),%esi + xorl %r12d,%eax + addl %ecx,%esi + roll $30,%r11d + addl %eax,%esi + roll $1,%r14d + xorl 16(%rsp),%edx + movl %edi,%eax + movl %r14d,12(%rsp) + movl %esi,%ecx + xorl 24(%rsp),%edx + xorl %r12d,%eax + roll $5,%ecx + xorl 48(%rsp),%edx + leal 1859775393(%r14,%r13,1),%r13d + xorl %r11d,%eax + addl %ecx,%r13d + roll $30,%edi + addl %eax,%r13d + roll $1,%edx + xorl 20(%rsp),%ebp + movl %esi,%eax + movl %edx,16(%rsp) + movl %r13d,%ecx + xorl 28(%rsp),%ebp + xorl %r11d,%eax + roll $5,%ecx + xorl 52(%rsp),%ebp + leal 1859775393(%rdx,%r12,1),%r12d + xorl %edi,%eax + addl %ecx,%r12d + roll $30,%esi + addl %eax,%r12d + roll $1,%ebp + xorl 24(%rsp),%r14d + movl %r13d,%eax + movl %ebp,20(%rsp) + movl %r12d,%ecx + xorl 32(%rsp),%r14d + xorl %edi,%eax + roll $5,%ecx + xorl 56(%rsp),%r14d + leal 1859775393(%rbp,%r11,1),%r11d + xorl %esi,%eax + addl %ecx,%r11d + roll $30,%r13d + addl %eax,%r11d + roll $1,%r14d + xorl 28(%rsp),%edx + movl %r12d,%eax + movl %r14d,24(%rsp) + movl %r11d,%ecx + xorl 36(%rsp),%edx + xorl %esi,%eax + roll $5,%ecx + xorl 60(%rsp),%edx + leal 1859775393(%r14,%rdi,1),%edi + xorl %r13d,%eax + addl %ecx,%edi + roll $30,%r12d + addl %eax,%edi + roll $1,%edx + xorl 32(%rsp),%ebp + movl %r11d,%eax + movl %edx,28(%rsp) + movl %edi,%ecx + xorl 40(%rsp),%ebp + xorl %r13d,%eax + roll $5,%ecx + xorl 0(%rsp),%ebp + leal 1859775393(%rdx,%rsi,1),%esi + xorl %r12d,%eax + addl %ecx,%esi + roll $30,%r11d + addl %eax,%esi + roll $1,%ebp + xorl 36(%rsp),%r14d + movl %r12d,%eax + movl %ebp,32(%rsp) + movl %r12d,%ebx + xorl 44(%rsp),%r14d + andl %r11d,%eax + movl %esi,%ecx + xorl 4(%rsp),%r14d + leal -1894007588(%rbp,%r13,1),%r13d + xorl %r11d,%ebx + roll $5,%ecx + addl %eax,%r13d + roll $1,%r14d + andl %edi,%ebx + addl %ecx,%r13d + roll $30,%edi + addl %ebx,%r13d + xorl 40(%rsp),%edx + movl %r11d,%eax + movl %r14d,36(%rsp) + movl %r11d,%ebx + xorl 48(%rsp),%edx + andl %edi,%eax + movl %r13d,%ecx + xorl 8(%rsp),%edx + leal -1894007588(%r14,%r12,1),%r12d + xorl %edi,%ebx + roll $5,%ecx + addl %eax,%r12d + roll $1,%edx + andl %esi,%ebx + addl %ecx,%r12d + roll $30,%esi + addl %ebx,%r12d + xorl 44(%rsp),%ebp + movl %edi,%eax + movl %edx,40(%rsp) + movl %edi,%ebx + xorl 52(%rsp),%ebp + andl %esi,%eax + movl %r12d,%ecx + xorl 12(%rsp),%ebp + leal -1894007588(%rdx,%r11,1),%r11d + xorl %esi,%ebx + roll $5,%ecx + addl %eax,%r11d + roll $1,%ebp + andl %r13d,%ebx + addl %ecx,%r11d + roll $30,%r13d + addl %ebx,%r11d + xorl 48(%rsp),%r14d + movl %esi,%eax + movl %ebp,44(%rsp) + movl %esi,%ebx + xorl 56(%rsp),%r14d + andl %r13d,%eax + movl %r11d,%ecx + xorl 16(%rsp),%r14d + leal -1894007588(%rbp,%rdi,1),%edi + xorl %r13d,%ebx + roll $5,%ecx + addl %eax,%edi + roll $1,%r14d + andl %r12d,%ebx + addl %ecx,%edi + roll $30,%r12d + addl %ebx,%edi + xorl 52(%rsp),%edx + movl %r13d,%eax + movl %r14d,48(%rsp) + movl %r13d,%ebx + xorl 60(%rsp),%edx + andl %r12d,%eax + movl %edi,%ecx + xorl 20(%rsp),%edx + leal -1894007588(%r14,%rsi,1),%esi + xorl %r12d,%ebx + roll $5,%ecx + addl %eax,%esi + roll $1,%edx + andl %r11d,%ebx + addl %ecx,%esi + roll $30,%r11d + addl %ebx,%esi + xorl 56(%rsp),%ebp + movl %r12d,%eax + movl %edx,52(%rsp) + movl %r12d,%ebx + xorl 0(%rsp),%ebp + andl %r11d,%eax + movl %esi,%ecx + xorl 24(%rsp),%ebp + leal -1894007588(%rdx,%r13,1),%r13d + xorl %r11d,%ebx + roll $5,%ecx + addl %eax,%r13d + roll $1,%ebp + andl %edi,%ebx + addl %ecx,%r13d + roll $30,%edi + addl %ebx,%r13d + xorl 60(%rsp),%r14d + movl %r11d,%eax + movl %ebp,56(%rsp) + movl %r11d,%ebx + xorl 4(%rsp),%r14d + andl %edi,%eax + movl %r13d,%ecx + xorl 28(%rsp),%r14d + leal -1894007588(%rbp,%r12,1),%r12d + xorl %edi,%ebx + roll $5,%ecx + addl %eax,%r12d + roll $1,%r14d + andl %esi,%ebx + addl %ecx,%r12d + roll $30,%esi + addl %ebx,%r12d + xorl 0(%rsp),%edx + movl %edi,%eax + movl %r14d,60(%rsp) + movl %edi,%ebx + xorl 8(%rsp),%edx + andl %esi,%eax + movl %r12d,%ecx + xorl 32(%rsp),%edx + leal -1894007588(%r14,%r11,1),%r11d + xorl %esi,%ebx + roll $5,%ecx + addl %eax,%r11d + roll $1,%edx + andl %r13d,%ebx + addl %ecx,%r11d + roll $30,%r13d + addl %ebx,%r11d + xorl 4(%rsp),%ebp + movl %esi,%eax + movl %edx,0(%rsp) + movl %esi,%ebx + xorl 12(%rsp),%ebp + andl %r13d,%eax + movl %r11d,%ecx + xorl 36(%rsp),%ebp + leal -1894007588(%rdx,%rdi,1),%edi + xorl %r13d,%ebx + roll $5,%ecx + addl %eax,%edi + roll $1,%ebp + andl %r12d,%ebx + addl %ecx,%edi + roll $30,%r12d + addl %ebx,%edi + xorl 8(%rsp),%r14d + movl %r13d,%eax + movl %ebp,4(%rsp) + movl %r13d,%ebx + xorl 16(%rsp),%r14d + andl %r12d,%eax + movl %edi,%ecx + xorl 40(%rsp),%r14d + leal -1894007588(%rbp,%rsi,1),%esi + xorl %r12d,%ebx + roll $5,%ecx + addl %eax,%esi + roll $1,%r14d + andl %r11d,%ebx + addl %ecx,%esi + roll $30,%r11d + addl %ebx,%esi + xorl 12(%rsp),%edx + movl %r12d,%eax + movl %r14d,8(%rsp) + movl %r12d,%ebx + xorl 20(%rsp),%edx + andl %r11d,%eax + movl %esi,%ecx + xorl 44(%rsp),%edx + leal -1894007588(%r14,%r13,1),%r13d + xorl %r11d,%ebx + roll $5,%ecx + addl %eax,%r13d + roll $1,%edx + andl %edi,%ebx + addl %ecx,%r13d + roll $30,%edi + addl %ebx,%r13d + xorl 16(%rsp),%ebp + movl %r11d,%eax + movl %edx,12(%rsp) + movl %r11d,%ebx + xorl 24(%rsp),%ebp + andl %edi,%eax + movl %r13d,%ecx + xorl 48(%rsp),%ebp + leal -1894007588(%rdx,%r12,1),%r12d + xorl %edi,%ebx + roll $5,%ecx + addl %eax,%r12d + roll $1,%ebp + andl %esi,%ebx + addl %ecx,%r12d + roll $30,%esi + addl %ebx,%r12d + xorl 20(%rsp),%r14d + movl %edi,%eax + movl %ebp,16(%rsp) + movl %edi,%ebx + xorl 28(%rsp),%r14d + andl %esi,%eax + movl %r12d,%ecx + xorl 52(%rsp),%r14d + leal -1894007588(%rbp,%r11,1),%r11d + xorl %esi,%ebx + roll $5,%ecx + addl %eax,%r11d + roll $1,%r14d + andl %r13d,%ebx + addl %ecx,%r11d + roll $30,%r13d + addl %ebx,%r11d + xorl 24(%rsp),%edx + movl %esi,%eax + movl %r14d,20(%rsp) + movl %esi,%ebx + xorl 32(%rsp),%edx + andl %r13d,%eax + movl %r11d,%ecx + xorl 56(%rsp),%edx + leal -1894007588(%r14,%rdi,1),%edi + xorl %r13d,%ebx + roll $5,%ecx + addl %eax,%edi + roll $1,%edx + andl %r12d,%ebx + addl %ecx,%edi + roll $30,%r12d + addl %ebx,%edi + xorl 28(%rsp),%ebp + movl %r13d,%eax + movl %edx,24(%rsp) + movl %r13d,%ebx + xorl 36(%rsp),%ebp + andl %r12d,%eax + movl %edi,%ecx + xorl 60(%rsp),%ebp + leal -1894007588(%rdx,%rsi,1),%esi + xorl %r12d,%ebx + roll $5,%ecx + addl %eax,%esi + roll $1,%ebp + andl %r11d,%ebx + addl %ecx,%esi + roll $30,%r11d + addl %ebx,%esi + xorl 32(%rsp),%r14d + movl %r12d,%eax + movl %ebp,28(%rsp) + movl %r12d,%ebx + xorl 40(%rsp),%r14d + andl %r11d,%eax + movl %esi,%ecx + xorl 0(%rsp),%r14d + leal -1894007588(%rbp,%r13,1),%r13d + xorl %r11d,%ebx + roll $5,%ecx + addl %eax,%r13d + roll $1,%r14d + andl %edi,%ebx + addl %ecx,%r13d + roll $30,%edi + addl %ebx,%r13d + xorl 36(%rsp),%edx + movl %r11d,%eax + movl %r14d,32(%rsp) + movl %r11d,%ebx + xorl 44(%rsp),%edx + andl %edi,%eax + movl %r13d,%ecx + xorl 4(%rsp),%edx + leal -1894007588(%r14,%r12,1),%r12d + xorl %edi,%ebx + roll $5,%ecx + addl %eax,%r12d + roll $1,%edx + andl %esi,%ebx + addl %ecx,%r12d + roll $30,%esi + addl %ebx,%r12d + xorl 40(%rsp),%ebp + movl %edi,%eax + movl %edx,36(%rsp) + movl %edi,%ebx + xorl 48(%rsp),%ebp + andl %esi,%eax + movl %r12d,%ecx + xorl 8(%rsp),%ebp + leal -1894007588(%rdx,%r11,1),%r11d + xorl %esi,%ebx + roll $5,%ecx + addl %eax,%r11d + roll $1,%ebp + andl %r13d,%ebx + addl %ecx,%r11d + roll $30,%r13d + addl %ebx,%r11d + xorl 44(%rsp),%r14d + movl %esi,%eax + movl %ebp,40(%rsp) + movl %esi,%ebx + xorl 52(%rsp),%r14d + andl %r13d,%eax + movl %r11d,%ecx + xorl 12(%rsp),%r14d + leal -1894007588(%rbp,%rdi,1),%edi + xorl %r13d,%ebx + roll $5,%ecx + addl %eax,%edi + roll $1,%r14d + andl %r12d,%ebx + addl %ecx,%edi + roll $30,%r12d + addl %ebx,%edi + xorl 48(%rsp),%edx + movl %r13d,%eax + movl %r14d,44(%rsp) + movl %r13d,%ebx + xorl 56(%rsp),%edx + andl %r12d,%eax + movl %edi,%ecx + xorl 16(%rsp),%edx + leal -1894007588(%r14,%rsi,1),%esi + xorl %r12d,%ebx + roll $5,%ecx + addl %eax,%esi + roll $1,%edx + andl %r11d,%ebx + addl %ecx,%esi + roll $30,%r11d + addl %ebx,%esi + xorl 52(%rsp),%ebp + movl %edi,%eax + movl %edx,48(%rsp) + movl %esi,%ecx + xorl 60(%rsp),%ebp + xorl %r12d,%eax + roll $5,%ecx + xorl 20(%rsp),%ebp + leal -899497514(%rdx,%r13,1),%r13d + xorl %r11d,%eax + addl %ecx,%r13d + roll $30,%edi + addl %eax,%r13d + roll $1,%ebp + xorl 56(%rsp),%r14d + movl %esi,%eax + movl %ebp,52(%rsp) + movl %r13d,%ecx + xorl 0(%rsp),%r14d + xorl %r11d,%eax + roll $5,%ecx + xorl 24(%rsp),%r14d + leal -899497514(%rbp,%r12,1),%r12d + xorl %edi,%eax + addl %ecx,%r12d + roll $30,%esi + addl %eax,%r12d + roll $1,%r14d + xorl 60(%rsp),%edx + movl %r13d,%eax + movl %r14d,56(%rsp) + movl %r12d,%ecx + xorl 4(%rsp),%edx + xorl %edi,%eax + roll $5,%ecx + xorl 28(%rsp),%edx + leal -899497514(%r14,%r11,1),%r11d + xorl %esi,%eax + addl %ecx,%r11d + roll $30,%r13d + addl %eax,%r11d + roll $1,%edx + xorl 0(%rsp),%ebp + movl %r12d,%eax + movl %edx,60(%rsp) + movl %r11d,%ecx + xorl 8(%rsp),%ebp + xorl %esi,%eax + roll $5,%ecx + xorl 32(%rsp),%ebp + leal -899497514(%rdx,%rdi,1),%edi + xorl %r13d,%eax + addl %ecx,%edi + roll $30,%r12d + addl %eax,%edi + roll $1,%ebp + xorl 4(%rsp),%r14d + movl %r11d,%eax + movl %ebp,0(%rsp) + movl %edi,%ecx + xorl 12(%rsp),%r14d + xorl %r13d,%eax + roll $5,%ecx + xorl 36(%rsp),%r14d + leal -899497514(%rbp,%rsi,1),%esi + xorl %r12d,%eax + addl %ecx,%esi + roll $30,%r11d + addl %eax,%esi + roll $1,%r14d + xorl 8(%rsp),%edx + movl %edi,%eax + movl %r14d,4(%rsp) + movl %esi,%ecx + xorl 16(%rsp),%edx + xorl %r12d,%eax + roll $5,%ecx + xorl 40(%rsp),%edx + leal -899497514(%r14,%r13,1),%r13d + xorl %r11d,%eax + addl %ecx,%r13d + roll $30,%edi + addl %eax,%r13d + roll $1,%edx + xorl 12(%rsp),%ebp + movl %esi,%eax + movl %edx,8(%rsp) + movl %r13d,%ecx + xorl 20(%rsp),%ebp + xorl %r11d,%eax + roll $5,%ecx + xorl 44(%rsp),%ebp + leal -899497514(%rdx,%r12,1),%r12d + xorl %edi,%eax + addl %ecx,%r12d + roll $30,%esi + addl %eax,%r12d + roll $1,%ebp + xorl 16(%rsp),%r14d + movl %r13d,%eax + movl %ebp,12(%rsp) + movl %r12d,%ecx + xorl 24(%rsp),%r14d + xorl %edi,%eax + roll $5,%ecx + xorl 48(%rsp),%r14d + leal -899497514(%rbp,%r11,1),%r11d + xorl %esi,%eax + addl %ecx,%r11d + roll $30,%r13d + addl %eax,%r11d + roll $1,%r14d + xorl 20(%rsp),%edx + movl %r12d,%eax + movl %r14d,16(%rsp) + movl %r11d,%ecx + xorl 28(%rsp),%edx + xorl %esi,%eax + roll $5,%ecx + xorl 52(%rsp),%edx + leal -899497514(%r14,%rdi,1),%edi + xorl %r13d,%eax + addl %ecx,%edi + roll $30,%r12d + addl %eax,%edi + roll $1,%edx + xorl 24(%rsp),%ebp + movl %r11d,%eax + movl %edx,20(%rsp) + movl %edi,%ecx + xorl 32(%rsp),%ebp + xorl %r13d,%eax + roll $5,%ecx + xorl 56(%rsp),%ebp + leal -899497514(%rdx,%rsi,1),%esi + xorl %r12d,%eax + addl %ecx,%esi + roll $30,%r11d + addl %eax,%esi + roll $1,%ebp + xorl 28(%rsp),%r14d + movl %edi,%eax + movl %ebp,24(%rsp) + movl %esi,%ecx + xorl 36(%rsp),%r14d + xorl %r12d,%eax + roll $5,%ecx + xorl 60(%rsp),%r14d + leal -899497514(%rbp,%r13,1),%r13d + xorl %r11d,%eax + addl %ecx,%r13d + roll $30,%edi + addl %eax,%r13d + roll $1,%r14d + xorl 32(%rsp),%edx + movl %esi,%eax + movl %r14d,28(%rsp) + movl %r13d,%ecx + xorl 40(%rsp),%edx + xorl %r11d,%eax + roll $5,%ecx + xorl 0(%rsp),%edx + leal -899497514(%r14,%r12,1),%r12d + xorl %edi,%eax + addl %ecx,%r12d + roll $30,%esi + addl %eax,%r12d + roll $1,%edx + xorl 36(%rsp),%ebp + movl %r13d,%eax + + movl %r12d,%ecx + xorl 44(%rsp),%ebp + xorl %edi,%eax + roll $5,%ecx + xorl 4(%rsp),%ebp + leal -899497514(%rdx,%r11,1),%r11d + xorl %esi,%eax + addl %ecx,%r11d + roll $30,%r13d + addl %eax,%r11d + roll $1,%ebp + xorl 40(%rsp),%r14d + movl %r12d,%eax + + movl %r11d,%ecx + xorl 48(%rsp),%r14d + xorl %esi,%eax + roll $5,%ecx + xorl 8(%rsp),%r14d + leal -899497514(%rbp,%rdi,1),%edi + xorl %r13d,%eax + addl %ecx,%edi + roll $30,%r12d + addl %eax,%edi + roll $1,%r14d + xorl 44(%rsp),%edx + movl %r11d,%eax + + movl %edi,%ecx + xorl 52(%rsp),%edx + xorl %r13d,%eax + roll $5,%ecx + xorl 12(%rsp),%edx + leal -899497514(%r14,%rsi,1),%esi + xorl %r12d,%eax + addl %ecx,%esi + roll $30,%r11d + addl %eax,%esi + roll $1,%edx + xorl 48(%rsp),%ebp + movl %edi,%eax + + movl %esi,%ecx + xorl 56(%rsp),%ebp + xorl %r12d,%eax + roll $5,%ecx + xorl 16(%rsp),%ebp + leal -899497514(%rdx,%r13,1),%r13d + xorl %r11d,%eax + addl %ecx,%r13d + roll $30,%edi + addl %eax,%r13d + roll $1,%ebp + xorl 52(%rsp),%r14d + movl %esi,%eax + + movl %r13d,%ecx + xorl 60(%rsp),%r14d + xorl %r11d,%eax + roll $5,%ecx + xorl 20(%rsp),%r14d + leal -899497514(%rbp,%r12,1),%r12d + xorl %edi,%eax + addl %ecx,%r12d + roll $30,%esi + addl %eax,%r12d + roll $1,%r14d + xorl 56(%rsp),%edx + movl %r13d,%eax + + movl %r12d,%ecx + xorl 0(%rsp),%edx + xorl %edi,%eax + roll $5,%ecx + xorl 24(%rsp),%edx + leal -899497514(%r14,%r11,1),%r11d + xorl %esi,%eax + addl %ecx,%r11d + roll $30,%r13d + addl %eax,%r11d + roll $1,%edx + xorl 60(%rsp),%ebp + movl %r12d,%eax + + movl %r11d,%ecx + xorl 4(%rsp),%ebp + xorl %esi,%eax + roll $5,%ecx + xorl 28(%rsp),%ebp + leal -899497514(%rdx,%rdi,1),%edi + xorl %r13d,%eax + addl %ecx,%edi + roll $30,%r12d + addl %eax,%edi + roll $1,%ebp + movl %r11d,%eax + movl %edi,%ecx + xorl %r13d,%eax + leal -899497514(%rbp,%rsi,1),%esi + roll $5,%ecx + xorl %r12d,%eax + addl %ecx,%esi + roll $30,%r11d + addl %eax,%esi + addl 0(%r8),%esi + addl 4(%r8),%edi + addl 8(%r8),%r11d + addl 12(%r8),%r12d + addl 16(%r8),%r13d + movl %esi,0(%r8) + movl %edi,4(%r8) + movl %r11d,8(%r8) + movl %r12d,12(%r8) + movl %r13d,16(%r8) + + subq $1,%r10 + leaq 64(%r9),%r9 + jnz .Lloop + + movq 64(%rsp),%rsi + movq -40(%rsi),%r14 + movq -32(%rsi),%r13 + movq -24(%rsi),%r12 + movq -16(%rsi),%rbp + movq -8(%rsi),%rbx + leaq (%rsi),%rsp +.Lepilogue: + .byte 0xf3,0xc3 +.size sha1_block_data_order,.-sha1_block_data_order +.type sha1_block_data_order_shaext,@function +.align 32 +sha1_block_data_order_shaext: +_shaext_shortcut: + movdqu (%rdi),%xmm0 + movd 16(%rdi),%xmm1 + movdqa K_XX_XX+160(%rip),%xmm3 + + movdqu (%rsi),%xmm4 + pshufd $27,%xmm0,%xmm0 + movdqu 16(%rsi),%xmm5 + pshufd $27,%xmm1,%xmm1 + movdqu 32(%rsi),%xmm6 +.byte 102,15,56,0,227 + movdqu 48(%rsi),%xmm7 +.byte 102,15,56,0,235 +.byte 102,15,56,0,243 + movdqa %xmm1,%xmm9 +.byte 102,15,56,0,251 + jmp .Loop_shaext + +.align 16 +.Loop_shaext: + decq %rdx + leaq 64(%rsi),%rax + paddd %xmm4,%xmm1 + cmovneq %rax,%rsi + movdqa %xmm0,%xmm8 +.byte 15,56,201,229 + movdqa %xmm0,%xmm2 +.byte 15,58,204,193,0 +.byte 15,56,200,213 + pxor %xmm6,%xmm4 +.byte 15,56,201,238 +.byte 15,56,202,231 + + movdqa %xmm0,%xmm1 +.byte 15,58,204,194,0 +.byte 15,56,200,206 + pxor %xmm7,%xmm5 +.byte 15,56,202,236 +.byte 15,56,201,247 + movdqa %xmm0,%xmm2 +.byte 15,58,204,193,0 +.byte 15,56,200,215 + pxor %xmm4,%xmm6 +.byte 15,56,201,252 +.byte 15,56,202,245 + + movdqa %xmm0,%xmm1 +.byte 15,58,204,194,0 +.byte 15,56,200,204 + pxor %xmm5,%xmm7 +.byte 15,56,202,254 +.byte 15,56,201,229 + movdqa %xmm0,%xmm2 +.byte 15,58,204,193,0 +.byte 15,56,200,213 + pxor %xmm6,%xmm4 +.byte 15,56,201,238 +.byte 15,56,202,231 + + movdqa %xmm0,%xmm1 +.byte 15,58,204,194,1 +.byte 15,56,200,206 + pxor %xmm7,%xmm5 +.byte 15,56,202,236 +.byte 15,56,201,247 + movdqa %xmm0,%xmm2 +.byte 15,58,204,193,1 +.byte 15,56,200,215 + pxor %xmm4,%xmm6 +.byte 15,56,201,252 +.byte 15,56,202,245 + + movdqa %xmm0,%xmm1 +.byte 15,58,204,194,1 +.byte 15,56,200,204 + pxor %xmm5,%xmm7 +.byte 15,56,202,254 +.byte 15,56,201,229 + movdqa %xmm0,%xmm2 +.byte 15,58,204,193,1 +.byte 15,56,200,213 + pxor %xmm6,%xmm4 +.byte 15,56,201,238 +.byte 15,56,202,231 + + movdqa %xmm0,%xmm1 +.byte 15,58,204,194,1 +.byte 15,56,200,206 + pxor %xmm7,%xmm5 +.byte 15,56,202,236 +.byte 15,56,201,247 + movdqa %xmm0,%xmm2 +.byte 15,58,204,193,2 +.byte 15,56,200,215 + pxor %xmm4,%xmm6 +.byte 15,56,201,252 +.byte 15,56,202,245 + + movdqa %xmm0,%xmm1 +.byte 15,58,204,194,2 +.byte 15,56,200,204 + pxor %xmm5,%xmm7 +.byte 15,56,202,254 +.byte 15,56,201,229 + movdqa %xmm0,%xmm2 +.byte 15,58,204,193,2 +.byte 15,56,200,213 + pxor %xmm6,%xmm4 +.byte 15,56,201,238 +.byte 15,56,202,231 + + movdqa %xmm0,%xmm1 +.byte 15,58,204,194,2 +.byte 15,56,200,206 + pxor %xmm7,%xmm5 +.byte 15,56,202,236 +.byte 15,56,201,247 + movdqa %xmm0,%xmm2 +.byte 15,58,204,193,2 +.byte 15,56,200,215 + pxor %xmm4,%xmm6 +.byte 15,56,201,252 +.byte 15,56,202,245 + + movdqa %xmm0,%xmm1 +.byte 15,58,204,194,3 +.byte 15,56,200,204 + pxor %xmm5,%xmm7 +.byte 15,56,202,254 + movdqu (%rsi),%xmm4 + movdqa %xmm0,%xmm2 +.byte 15,58,204,193,3 +.byte 15,56,200,213 + movdqu 16(%rsi),%xmm5 +.byte 102,15,56,0,227 + + movdqa %xmm0,%xmm1 +.byte 15,58,204,194,3 +.byte 15,56,200,206 + movdqu 32(%rsi),%xmm6 +.byte 102,15,56,0,235 + + movdqa %xmm0,%xmm2 +.byte 15,58,204,193,3 +.byte 15,56,200,215 + movdqu 48(%rsi),%xmm7 +.byte 102,15,56,0,243 + + movdqa %xmm0,%xmm1 +.byte 15,58,204,194,3 +.byte 65,15,56,200,201 +.byte 102,15,56,0,251 + + paddd %xmm8,%xmm0 + movdqa %xmm1,%xmm9 + + jnz .Loop_shaext + + pshufd $27,%xmm0,%xmm0 + pshufd $27,%xmm1,%xmm1 + movdqu %xmm0,(%rdi) + movd %xmm1,16(%rdi) + .byte 0xf3,0xc3 +.size sha1_block_data_order_shaext,.-sha1_block_data_order_shaext +.type sha1_block_data_order_ssse3,@function +.align 16 +sha1_block_data_order_ssse3: +_ssse3_shortcut: + movq %rsp,%rax + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + leaq -64(%rsp),%rsp + movq %rax,%r14 + andq $-64,%rsp + movq %rdi,%r8 + movq %rsi,%r9 + movq %rdx,%r10 + + shlq $6,%r10 + addq %r9,%r10 + leaq K_XX_XX+64(%rip),%r11 + + movl 0(%r8),%eax + movl 4(%r8),%ebx + movl 8(%r8),%ecx + movl 12(%r8),%edx + movl %ebx,%esi + movl 16(%r8),%ebp + movl %ecx,%edi + xorl %edx,%edi + andl %edi,%esi + + movdqa 64(%r11),%xmm6 + movdqa -64(%r11),%xmm9 + movdqu 0(%r9),%xmm0 + movdqu 16(%r9),%xmm1 + movdqu 32(%r9),%xmm2 + movdqu 48(%r9),%xmm3 +.byte 102,15,56,0,198 +.byte 102,15,56,0,206 +.byte 102,15,56,0,214 + addq $64,%r9 + paddd %xmm9,%xmm0 +.byte 102,15,56,0,222 + paddd %xmm9,%xmm1 + paddd %xmm9,%xmm2 + movdqa %xmm0,0(%rsp) + psubd %xmm9,%xmm0 + movdqa %xmm1,16(%rsp) + psubd %xmm9,%xmm1 + movdqa %xmm2,32(%rsp) + psubd %xmm9,%xmm2 + jmp .Loop_ssse3 +.align 16 +.Loop_ssse3: + rorl $2,%ebx + pshufd $238,%xmm0,%xmm4 + xorl %edx,%esi + movdqa %xmm3,%xmm8 + paddd %xmm3,%xmm9 + movl %eax,%edi + addl 0(%rsp),%ebp + punpcklqdq %xmm1,%xmm4 + xorl %ecx,%ebx + roll $5,%eax + addl %esi,%ebp + psrldq $4,%xmm8 + andl %ebx,%edi + xorl %ecx,%ebx + pxor %xmm0,%xmm4 + addl %eax,%ebp + rorl $7,%eax + pxor %xmm2,%xmm8 + xorl %ecx,%edi + movl %ebp,%esi + addl 4(%rsp),%edx + pxor %xmm8,%xmm4 + xorl %ebx,%eax + roll $5,%ebp + movdqa %xmm9,48(%rsp) + addl %edi,%edx + andl %eax,%esi + movdqa %xmm4,%xmm10 + xorl %ebx,%eax + addl %ebp,%edx + rorl $7,%ebp + movdqa %xmm4,%xmm8 + xorl %ebx,%esi + pslldq $12,%xmm10 + paddd %xmm4,%xmm4 + movl %edx,%edi + addl 8(%rsp),%ecx + psrld $31,%xmm8 + xorl %eax,%ebp + roll $5,%edx + addl %esi,%ecx + movdqa %xmm10,%xmm9 + andl %ebp,%edi + xorl %eax,%ebp + psrld $30,%xmm10 + addl %edx,%ecx + rorl $7,%edx + por %xmm8,%xmm4 + xorl %eax,%edi + movl %ecx,%esi + addl 12(%rsp),%ebx + pslld $2,%xmm9 + pxor %xmm10,%xmm4 + xorl %ebp,%edx + movdqa -64(%r11),%xmm10 + roll $5,%ecx + addl %edi,%ebx + andl %edx,%esi + pxor %xmm9,%xmm4 + xorl %ebp,%edx + addl %ecx,%ebx + rorl $7,%ecx + pshufd $238,%xmm1,%xmm5 + xorl %ebp,%esi + movdqa %xmm4,%xmm9 + paddd %xmm4,%xmm10 + movl %ebx,%edi + addl 16(%rsp),%eax + punpcklqdq %xmm2,%xmm5 + xorl %edx,%ecx + roll $5,%ebx + addl %esi,%eax + psrldq $4,%xmm9 + andl %ecx,%edi + xorl %edx,%ecx + pxor %xmm1,%xmm5 + addl %ebx,%eax + rorl $7,%ebx + pxor %xmm3,%xmm9 + xorl %edx,%edi + movl %eax,%esi + addl 20(%rsp),%ebp + pxor %xmm9,%xmm5 + xorl %ecx,%ebx + roll $5,%eax + movdqa %xmm10,0(%rsp) + addl %edi,%ebp + andl %ebx,%esi + movdqa %xmm5,%xmm8 + xorl %ecx,%ebx + addl %eax,%ebp + rorl $7,%eax + movdqa %xmm5,%xmm9 + xorl %ecx,%esi + pslldq $12,%xmm8 + paddd %xmm5,%xmm5 + movl %ebp,%edi + addl 24(%rsp),%edx + psrld $31,%xmm9 + xorl %ebx,%eax + roll $5,%ebp + addl %esi,%edx + movdqa %xmm8,%xmm10 + andl %eax,%edi + xorl %ebx,%eax + psrld $30,%xmm8 + addl %ebp,%edx + rorl $7,%ebp + por %xmm9,%xmm5 + xorl %ebx,%edi + movl %edx,%esi + addl 28(%rsp),%ecx + pslld $2,%xmm10 + pxor %xmm8,%xmm5 + xorl %eax,%ebp + movdqa -32(%r11),%xmm8 + roll $5,%edx + addl %edi,%ecx + andl %ebp,%esi + pxor %xmm10,%xmm5 + xorl %eax,%ebp + addl %edx,%ecx + rorl $7,%edx + pshufd $238,%xmm2,%xmm6 + xorl %eax,%esi + movdqa %xmm5,%xmm10 + paddd %xmm5,%xmm8 + movl %ecx,%edi + addl 32(%rsp),%ebx + punpcklqdq %xmm3,%xmm6 + xorl %ebp,%edx + roll $5,%ecx + addl %esi,%ebx + psrldq $4,%xmm10 + andl %edx,%edi + xorl %ebp,%edx + pxor %xmm2,%xmm6 + addl %ecx,%ebx + rorl $7,%ecx + pxor %xmm4,%xmm10 + xorl %ebp,%edi + movl %ebx,%esi + addl 36(%rsp),%eax + pxor %xmm10,%xmm6 + xorl %edx,%ecx + roll $5,%ebx + movdqa %xmm8,16(%rsp) + addl %edi,%eax + andl %ecx,%esi + movdqa %xmm6,%xmm9 + xorl %edx,%ecx + addl %ebx,%eax + rorl $7,%ebx + movdqa %xmm6,%xmm10 + xorl %edx,%esi + pslldq $12,%xmm9 + paddd %xmm6,%xmm6 + movl %eax,%edi + addl 40(%rsp),%ebp + psrld $31,%xmm10 + xorl %ecx,%ebx + roll $5,%eax + addl %esi,%ebp + movdqa %xmm9,%xmm8 + andl %ebx,%edi + xorl %ecx,%ebx + psrld $30,%xmm9 + addl %eax,%ebp + rorl $7,%eax + por %xmm10,%xmm6 + xorl %ecx,%edi + movl %ebp,%esi + addl 44(%rsp),%edx + pslld $2,%xmm8 + pxor %xmm9,%xmm6 + xorl %ebx,%eax + movdqa -32(%r11),%xmm9 + roll $5,%ebp + addl %edi,%edx + andl %eax,%esi + pxor %xmm8,%xmm6 + xorl %ebx,%eax + addl %ebp,%edx + rorl $7,%ebp + pshufd $238,%xmm3,%xmm7 + xorl %ebx,%esi + movdqa %xmm6,%xmm8 + paddd %xmm6,%xmm9 + movl %edx,%edi + addl 48(%rsp),%ecx + punpcklqdq %xmm4,%xmm7 + xorl %eax,%ebp + roll $5,%edx + addl %esi,%ecx + psrldq $4,%xmm8 + andl %ebp,%edi + xorl %eax,%ebp + pxor %xmm3,%xmm7 + addl %edx,%ecx + rorl $7,%edx + pxor %xmm5,%xmm8 + xorl %eax,%edi + movl %ecx,%esi + addl 52(%rsp),%ebx + pxor %xmm8,%xmm7 + xorl %ebp,%edx + roll $5,%ecx + movdqa %xmm9,32(%rsp) + addl %edi,%ebx + andl %edx,%esi + movdqa %xmm7,%xmm10 + xorl %ebp,%edx + addl %ecx,%ebx + rorl $7,%ecx + movdqa %xmm7,%xmm8 + xorl %ebp,%esi + pslldq $12,%xmm10 + paddd %xmm7,%xmm7 + movl %ebx,%edi + addl 56(%rsp),%eax + psrld $31,%xmm8 + xorl %edx,%ecx + roll $5,%ebx + addl %esi,%eax + movdqa %xmm10,%xmm9 + andl %ecx,%edi + xorl %edx,%ecx + psrld $30,%xmm10 + addl %ebx,%eax + rorl $7,%ebx + por %xmm8,%xmm7 + xorl %edx,%edi + movl %eax,%esi + addl 60(%rsp),%ebp + pslld $2,%xmm9 + pxor %xmm10,%xmm7 + xorl %ecx,%ebx + movdqa -32(%r11),%xmm10 + roll $5,%eax + addl %edi,%ebp + andl %ebx,%esi + pxor %xmm9,%xmm7 + pshufd $238,%xmm6,%xmm9 + xorl %ecx,%ebx + addl %eax,%ebp + rorl $7,%eax + pxor %xmm4,%xmm0 + xorl %ecx,%esi + movl %ebp,%edi + addl 0(%rsp),%edx + punpcklqdq %xmm7,%xmm9 + xorl %ebx,%eax + roll $5,%ebp + pxor %xmm1,%xmm0 + addl %esi,%edx + andl %eax,%edi + movdqa %xmm10,%xmm8 + xorl %ebx,%eax + paddd %xmm7,%xmm10 + addl %ebp,%edx + pxor %xmm9,%xmm0 + rorl $7,%ebp + xorl %ebx,%edi + movl %edx,%esi + addl 4(%rsp),%ecx + movdqa %xmm0,%xmm9 + xorl %eax,%ebp + roll $5,%edx + movdqa %xmm10,48(%rsp) + addl %edi,%ecx + andl %ebp,%esi + xorl %eax,%ebp + pslld $2,%xmm0 + addl %edx,%ecx + rorl $7,%edx + psrld $30,%xmm9 + xorl %eax,%esi + movl %ecx,%edi + addl 8(%rsp),%ebx + por %xmm9,%xmm0 + xorl %ebp,%edx + roll $5,%ecx + pshufd $238,%xmm7,%xmm10 + addl %esi,%ebx + andl %edx,%edi + xorl %ebp,%edx + addl %ecx,%ebx + addl 12(%rsp),%eax + xorl %ebp,%edi + movl %ebx,%esi + roll $5,%ebx + addl %edi,%eax + xorl %edx,%esi + rorl $7,%ecx + addl %ebx,%eax + pxor %xmm5,%xmm1 + addl 16(%rsp),%ebp + xorl %ecx,%esi + punpcklqdq %xmm0,%xmm10 + movl %eax,%edi + roll $5,%eax + pxor %xmm2,%xmm1 + addl %esi,%ebp + xorl %ecx,%edi + movdqa %xmm8,%xmm9 + rorl $7,%ebx + paddd %xmm0,%xmm8 + addl %eax,%ebp + pxor %xmm10,%xmm1 + addl 20(%rsp),%edx + xorl %ebx,%edi + movl %ebp,%esi + roll $5,%ebp + movdqa %xmm1,%xmm10 + addl %edi,%edx + xorl %ebx,%esi + movdqa %xmm8,0(%rsp) + rorl $7,%eax + addl %ebp,%edx + addl 24(%rsp),%ecx + pslld $2,%xmm1 + xorl %eax,%esi + movl %edx,%edi + psrld $30,%xmm10 + roll $5,%edx + addl %esi,%ecx + xorl %eax,%edi + rorl $7,%ebp + por %xmm10,%xmm1 + addl %edx,%ecx + addl 28(%rsp),%ebx + pshufd $238,%xmm0,%xmm8 + xorl %ebp,%edi + movl %ecx,%esi + roll $5,%ecx + addl %edi,%ebx + xorl %ebp,%esi + rorl $7,%edx + addl %ecx,%ebx + pxor %xmm6,%xmm2 + addl 32(%rsp),%eax + xorl %edx,%esi + punpcklqdq %xmm1,%xmm8 + movl %ebx,%edi + roll $5,%ebx + pxor %xmm3,%xmm2 + addl %esi,%eax + xorl %edx,%edi + movdqa 0(%r11),%xmm10 + rorl $7,%ecx + paddd %xmm1,%xmm9 + addl %ebx,%eax + pxor %xmm8,%xmm2 + addl 36(%rsp),%ebp + xorl %ecx,%edi + movl %eax,%esi + roll $5,%eax + movdqa %xmm2,%xmm8 + addl %edi,%ebp + xorl %ecx,%esi + movdqa %xmm9,16(%rsp) + rorl $7,%ebx + addl %eax,%ebp + addl 40(%rsp),%edx + pslld $2,%xmm2 + xorl %ebx,%esi + movl %ebp,%edi + psrld $30,%xmm8 + roll $5,%ebp + addl %esi,%edx + xorl %ebx,%edi + rorl $7,%eax + por %xmm8,%xmm2 + addl %ebp,%edx + addl 44(%rsp),%ecx + pshufd $238,%xmm1,%xmm9 + xorl %eax,%edi + movl %edx,%esi + roll $5,%edx + addl %edi,%ecx + xorl %eax,%esi + rorl $7,%ebp + addl %edx,%ecx + pxor %xmm7,%xmm3 + addl 48(%rsp),%ebx + xorl %ebp,%esi + punpcklqdq %xmm2,%xmm9 + movl %ecx,%edi + roll $5,%ecx + pxor %xmm4,%xmm3 + addl %esi,%ebx + xorl %ebp,%edi + movdqa %xmm10,%xmm8 + rorl $7,%edx + paddd %xmm2,%xmm10 + addl %ecx,%ebx + pxor %xmm9,%xmm3 + addl 52(%rsp),%eax + xorl %edx,%edi + movl %ebx,%esi + roll $5,%ebx + movdqa %xmm3,%xmm9 + addl %edi,%eax + xorl %edx,%esi + movdqa %xmm10,32(%rsp) + rorl $7,%ecx + addl %ebx,%eax + addl 56(%rsp),%ebp + pslld $2,%xmm3 + xorl %ecx,%esi + movl %eax,%edi + psrld $30,%xmm9 + roll $5,%eax + addl %esi,%ebp + xorl %ecx,%edi + rorl $7,%ebx + por %xmm9,%xmm3 + addl %eax,%ebp + addl 60(%rsp),%edx + pshufd $238,%xmm2,%xmm10 + xorl %ebx,%edi + movl %ebp,%esi + roll $5,%ebp + addl %edi,%edx + xorl %ebx,%esi + rorl $7,%eax + addl %ebp,%edx + pxor %xmm0,%xmm4 + addl 0(%rsp),%ecx + xorl %eax,%esi + punpcklqdq %xmm3,%xmm10 + movl %edx,%edi + roll $5,%edx + pxor %xmm5,%xmm4 + addl %esi,%ecx + xorl %eax,%edi + movdqa %xmm8,%xmm9 + rorl $7,%ebp + paddd %xmm3,%xmm8 + addl %edx,%ecx + pxor %xmm10,%xmm4 + addl 4(%rsp),%ebx + xorl %ebp,%edi + movl %ecx,%esi + roll $5,%ecx + movdqa %xmm4,%xmm10 + addl %edi,%ebx + xorl %ebp,%esi + movdqa %xmm8,48(%rsp) + rorl $7,%edx + addl %ecx,%ebx + addl 8(%rsp),%eax + pslld $2,%xmm4 + xorl %edx,%esi + movl %ebx,%edi + psrld $30,%xmm10 + roll $5,%ebx + addl %esi,%eax + xorl %edx,%edi + rorl $7,%ecx + por %xmm10,%xmm4 + addl %ebx,%eax + addl 12(%rsp),%ebp + pshufd $238,%xmm3,%xmm8 + xorl %ecx,%edi + movl %eax,%esi + roll $5,%eax + addl %edi,%ebp + xorl %ecx,%esi + rorl $7,%ebx + addl %eax,%ebp + pxor %xmm1,%xmm5 + addl 16(%rsp),%edx + xorl %ebx,%esi + punpcklqdq %xmm4,%xmm8 + movl %ebp,%edi + roll $5,%ebp + pxor %xmm6,%xmm5 + addl %esi,%edx + xorl %ebx,%edi + movdqa %xmm9,%xmm10 + rorl $7,%eax + paddd %xmm4,%xmm9 + addl %ebp,%edx + pxor %xmm8,%xmm5 + addl 20(%rsp),%ecx + xorl %eax,%edi + movl %edx,%esi + roll $5,%edx + movdqa %xmm5,%xmm8 + addl %edi,%ecx + xorl %eax,%esi + movdqa %xmm9,0(%rsp) + rorl $7,%ebp + addl %edx,%ecx + addl 24(%rsp),%ebx + pslld $2,%xmm5 + xorl %ebp,%esi + movl %ecx,%edi + psrld $30,%xmm8 + roll $5,%ecx + addl %esi,%ebx + xorl %ebp,%edi + rorl $7,%edx + por %xmm8,%xmm5 + addl %ecx,%ebx + addl 28(%rsp),%eax + pshufd $238,%xmm4,%xmm9 + rorl $7,%ecx + movl %ebx,%esi + xorl %edx,%edi + roll $5,%ebx + addl %edi,%eax + xorl %ecx,%esi + xorl %edx,%ecx + addl %ebx,%eax + pxor %xmm2,%xmm6 + addl 32(%rsp),%ebp + andl %ecx,%esi + xorl %edx,%ecx + rorl $7,%ebx + punpcklqdq %xmm5,%xmm9 + movl %eax,%edi + xorl %ecx,%esi + pxor %xmm7,%xmm6 + roll $5,%eax + addl %esi,%ebp + movdqa %xmm10,%xmm8 + xorl %ebx,%edi + paddd %xmm5,%xmm10 + xorl %ecx,%ebx + pxor %xmm9,%xmm6 + addl %eax,%ebp + addl 36(%rsp),%edx + andl %ebx,%edi + xorl %ecx,%ebx + rorl $7,%eax + movdqa %xmm6,%xmm9 + movl %ebp,%esi + xorl %ebx,%edi + movdqa %xmm10,16(%rsp) + roll $5,%ebp + addl %edi,%edx + xorl %eax,%esi + pslld $2,%xmm6 + xorl %ebx,%eax + addl %ebp,%edx + psrld $30,%xmm9 + addl 40(%rsp),%ecx + andl %eax,%esi + xorl %ebx,%eax + por %xmm9,%xmm6 + rorl $7,%ebp + movl %edx,%edi + xorl %eax,%esi + roll $5,%edx + pshufd $238,%xmm5,%xmm10 + addl %esi,%ecx + xorl %ebp,%edi + xorl %eax,%ebp + addl %edx,%ecx + addl 44(%rsp),%ebx + andl %ebp,%edi + xorl %eax,%ebp + rorl $7,%edx + movl %ecx,%esi + xorl %ebp,%edi + roll $5,%ecx + addl %edi,%ebx + xorl %edx,%esi + xorl %ebp,%edx + addl %ecx,%ebx + pxor %xmm3,%xmm7 + addl 48(%rsp),%eax + andl %edx,%esi + xorl %ebp,%edx + rorl $7,%ecx + punpcklqdq %xmm6,%xmm10 + movl %ebx,%edi + xorl %edx,%esi + pxor %xmm0,%xmm7 + roll $5,%ebx + addl %esi,%eax + movdqa 32(%r11),%xmm9 + xorl %ecx,%edi + paddd %xmm6,%xmm8 + xorl %edx,%ecx + pxor %xmm10,%xmm7 + addl %ebx,%eax + addl 52(%rsp),%ebp + andl %ecx,%edi + xorl %edx,%ecx + rorl $7,%ebx + movdqa %xmm7,%xmm10 + movl %eax,%esi + xorl %ecx,%edi + movdqa %xmm8,32(%rsp) + roll $5,%eax + addl %edi,%ebp + xorl %ebx,%esi + pslld $2,%xmm7 + xorl %ecx,%ebx + addl %eax,%ebp + psrld $30,%xmm10 + addl 56(%rsp),%edx + andl %ebx,%esi + xorl %ecx,%ebx + por %xmm10,%xmm7 + rorl $7,%eax + movl %ebp,%edi + xorl %ebx,%esi + roll $5,%ebp + pshufd $238,%xmm6,%xmm8 + addl %esi,%edx + xorl %eax,%edi + xorl %ebx,%eax + addl %ebp,%edx + addl 60(%rsp),%ecx + andl %eax,%edi + xorl %ebx,%eax + rorl $7,%ebp + movl %edx,%esi + xorl %eax,%edi + roll $5,%edx + addl %edi,%ecx + xorl %ebp,%esi + xorl %eax,%ebp + addl %edx,%ecx + pxor %xmm4,%xmm0 + addl 0(%rsp),%ebx + andl %ebp,%esi + xorl %eax,%ebp + rorl $7,%edx + punpcklqdq %xmm7,%xmm8 + movl %ecx,%edi + xorl %ebp,%esi + pxor %xmm1,%xmm0 + roll $5,%ecx + addl %esi,%ebx + movdqa %xmm9,%xmm10 + xorl %edx,%edi + paddd %xmm7,%xmm9 + xorl %ebp,%edx + pxor %xmm8,%xmm0 + addl %ecx,%ebx + addl 4(%rsp),%eax + andl %edx,%edi + xorl %ebp,%edx + rorl $7,%ecx + movdqa %xmm0,%xmm8 + movl %ebx,%esi + xorl %edx,%edi + movdqa %xmm9,48(%rsp) + roll $5,%ebx + addl %edi,%eax + xorl %ecx,%esi + pslld $2,%xmm0 + xorl %edx,%ecx + addl %ebx,%eax + psrld $30,%xmm8 + addl 8(%rsp),%ebp + andl %ecx,%esi + xorl %edx,%ecx + por %xmm8,%xmm0 + rorl $7,%ebx + movl %eax,%edi + xorl %ecx,%esi + roll $5,%eax + pshufd $238,%xmm7,%xmm9 + addl %esi,%ebp + xorl %ebx,%edi + xorl %ecx,%ebx + addl %eax,%ebp + addl 12(%rsp),%edx + andl %ebx,%edi + xorl %ecx,%ebx + rorl $7,%eax + movl %ebp,%esi + xorl %ebx,%edi + roll $5,%ebp + addl %edi,%edx + xorl %eax,%esi + xorl %ebx,%eax + addl %ebp,%edx + pxor %xmm5,%xmm1 + addl 16(%rsp),%ecx + andl %eax,%esi + xorl %ebx,%eax + rorl $7,%ebp + punpcklqdq %xmm0,%xmm9 + movl %edx,%edi + xorl %eax,%esi + pxor %xmm2,%xmm1 + roll $5,%edx + addl %esi,%ecx + movdqa %xmm10,%xmm8 + xorl %ebp,%edi + paddd %xmm0,%xmm10 + xorl %eax,%ebp + pxor %xmm9,%xmm1 + addl %edx,%ecx + addl 20(%rsp),%ebx + andl %ebp,%edi + xorl %eax,%ebp + rorl $7,%edx + movdqa %xmm1,%xmm9 + movl %ecx,%esi + xorl %ebp,%edi + movdqa %xmm10,0(%rsp) + roll $5,%ecx + addl %edi,%ebx + xorl %edx,%esi + pslld $2,%xmm1 + xorl %ebp,%edx + addl %ecx,%ebx + psrld $30,%xmm9 + addl 24(%rsp),%eax + andl %edx,%esi + xorl %ebp,%edx + por %xmm9,%xmm1 + rorl $7,%ecx + movl %ebx,%edi + xorl %edx,%esi + roll $5,%ebx + pshufd $238,%xmm0,%xmm10 + addl %esi,%eax + xorl %ecx,%edi + xorl %edx,%ecx + addl %ebx,%eax + addl 28(%rsp),%ebp + andl %ecx,%edi + xorl %edx,%ecx + rorl $7,%ebx + movl %eax,%esi + xorl %ecx,%edi + roll $5,%eax + addl %edi,%ebp + xorl %ebx,%esi + xorl %ecx,%ebx + addl %eax,%ebp + pxor %xmm6,%xmm2 + addl 32(%rsp),%edx + andl %ebx,%esi + xorl %ecx,%ebx + rorl $7,%eax + punpcklqdq %xmm1,%xmm10 + movl %ebp,%edi + xorl %ebx,%esi + pxor %xmm3,%xmm2 + roll $5,%ebp + addl %esi,%edx + movdqa %xmm8,%xmm9 + xorl %eax,%edi + paddd %xmm1,%xmm8 + xorl %ebx,%eax + pxor %xmm10,%xmm2 + addl %ebp,%edx + addl 36(%rsp),%ecx + andl %eax,%edi + xorl %ebx,%eax + rorl $7,%ebp + movdqa %xmm2,%xmm10 + movl %edx,%esi + xorl %eax,%edi + movdqa %xmm8,16(%rsp) + roll $5,%edx + addl %edi,%ecx + xorl %ebp,%esi + pslld $2,%xmm2 + xorl %eax,%ebp + addl %edx,%ecx + psrld $30,%xmm10 + addl 40(%rsp),%ebx + andl %ebp,%esi + xorl %eax,%ebp + por %xmm10,%xmm2 + rorl $7,%edx + movl %ecx,%edi + xorl %ebp,%esi + roll $5,%ecx + pshufd $238,%xmm1,%xmm8 + addl %esi,%ebx + xorl %edx,%edi + xorl %ebp,%edx + addl %ecx,%ebx + addl 44(%rsp),%eax + andl %edx,%edi + xorl %ebp,%edx + rorl $7,%ecx + movl %ebx,%esi + xorl %edx,%edi + roll $5,%ebx + addl %edi,%eax + xorl %edx,%esi + addl %ebx,%eax + pxor %xmm7,%xmm3 + addl 48(%rsp),%ebp + xorl %ecx,%esi + punpcklqdq %xmm2,%xmm8 + movl %eax,%edi + roll $5,%eax + pxor %xmm4,%xmm3 + addl %esi,%ebp + xorl %ecx,%edi + movdqa %xmm9,%xmm10 + rorl $7,%ebx + paddd %xmm2,%xmm9 + addl %eax,%ebp + pxor %xmm8,%xmm3 + addl 52(%rsp),%edx + xorl %ebx,%edi + movl %ebp,%esi + roll $5,%ebp + movdqa %xmm3,%xmm8 + addl %edi,%edx + xorl %ebx,%esi + movdqa %xmm9,32(%rsp) + rorl $7,%eax + addl %ebp,%edx + addl 56(%rsp),%ecx + pslld $2,%xmm3 + xorl %eax,%esi + movl %edx,%edi + psrld $30,%xmm8 + roll $5,%edx + addl %esi,%ecx + xorl %eax,%edi + rorl $7,%ebp + por %xmm8,%xmm3 + addl %edx,%ecx + addl 60(%rsp),%ebx + xorl %ebp,%edi + movl %ecx,%esi + roll $5,%ecx + addl %edi,%ebx + xorl %ebp,%esi + rorl $7,%edx + addl %ecx,%ebx + addl 0(%rsp),%eax + xorl %edx,%esi + movl %ebx,%edi + roll $5,%ebx + paddd %xmm3,%xmm10 + addl %esi,%eax + xorl %edx,%edi + movdqa %xmm10,48(%rsp) + rorl $7,%ecx + addl %ebx,%eax + addl 4(%rsp),%ebp + xorl %ecx,%edi + movl %eax,%esi + roll $5,%eax + addl %edi,%ebp + xorl %ecx,%esi + rorl $7,%ebx + addl %eax,%ebp + addl 8(%rsp),%edx + xorl %ebx,%esi + movl %ebp,%edi + roll $5,%ebp + addl %esi,%edx + xorl %ebx,%edi + rorl $7,%eax + addl %ebp,%edx + addl 12(%rsp),%ecx + xorl %eax,%edi + movl %edx,%esi + roll $5,%edx + addl %edi,%ecx + xorl %eax,%esi + rorl $7,%ebp + addl %edx,%ecx + cmpq %r10,%r9 + je .Ldone_ssse3 + movdqa 64(%r11),%xmm6 + movdqa -64(%r11),%xmm9 + movdqu 0(%r9),%xmm0 + movdqu 16(%r9),%xmm1 + movdqu 32(%r9),%xmm2 + movdqu 48(%r9),%xmm3 +.byte 102,15,56,0,198 + addq $64,%r9 + addl 16(%rsp),%ebx + xorl %ebp,%esi + movl %ecx,%edi +.byte 102,15,56,0,206 + roll $5,%ecx + addl %esi,%ebx + xorl %ebp,%edi + rorl $7,%edx + paddd %xmm9,%xmm0 + addl %ecx,%ebx + addl 20(%rsp),%eax + xorl %edx,%edi + movl %ebx,%esi + movdqa %xmm0,0(%rsp) + roll $5,%ebx + addl %edi,%eax + xorl %edx,%esi + rorl $7,%ecx + psubd %xmm9,%xmm0 + addl %ebx,%eax + addl 24(%rsp),%ebp + xorl %ecx,%esi + movl %eax,%edi + roll $5,%eax + addl %esi,%ebp + xorl %ecx,%edi + rorl $7,%ebx + addl %eax,%ebp + addl 28(%rsp),%edx + xorl %ebx,%edi + movl %ebp,%esi + roll $5,%ebp + addl %edi,%edx + xorl %ebx,%esi + rorl $7,%eax + addl %ebp,%edx + addl 32(%rsp),%ecx + xorl %eax,%esi + movl %edx,%edi +.byte 102,15,56,0,214 + roll $5,%edx + addl %esi,%ecx + xorl %eax,%edi + rorl $7,%ebp + paddd %xmm9,%xmm1 + addl %edx,%ecx + addl 36(%rsp),%ebx + xorl %ebp,%edi + movl %ecx,%esi + movdqa %xmm1,16(%rsp) + roll $5,%ecx + addl %edi,%ebx + xorl %ebp,%esi + rorl $7,%edx + psubd %xmm9,%xmm1 + addl %ecx,%ebx + addl 40(%rsp),%eax + xorl %edx,%esi + movl %ebx,%edi + roll $5,%ebx + addl %esi,%eax + xorl %edx,%edi + rorl $7,%ecx + addl %ebx,%eax + addl 44(%rsp),%ebp + xorl %ecx,%edi + movl %eax,%esi + roll $5,%eax + addl %edi,%ebp + xorl %ecx,%esi + rorl $7,%ebx + addl %eax,%ebp + addl 48(%rsp),%edx + xorl %ebx,%esi + movl %ebp,%edi +.byte 102,15,56,0,222 + roll $5,%ebp + addl %esi,%edx + xorl %ebx,%edi + rorl $7,%eax + paddd %xmm9,%xmm2 + addl %ebp,%edx + addl 52(%rsp),%ecx + xorl %eax,%edi + movl %edx,%esi + movdqa %xmm2,32(%rsp) + roll $5,%edx + addl %edi,%ecx + xorl %eax,%esi + rorl $7,%ebp + psubd %xmm9,%xmm2 + addl %edx,%ecx + addl 56(%rsp),%ebx + xorl %ebp,%esi + movl %ecx,%edi + roll $5,%ecx + addl %esi,%ebx + xorl %ebp,%edi + rorl $7,%edx + addl %ecx,%ebx + addl 60(%rsp),%eax + xorl %edx,%edi + movl %ebx,%esi + roll $5,%ebx + addl %edi,%eax + rorl $7,%ecx + addl %ebx,%eax + addl 0(%r8),%eax + addl 4(%r8),%esi + addl 8(%r8),%ecx + addl 12(%r8),%edx + movl %eax,0(%r8) + addl 16(%r8),%ebp + movl %esi,4(%r8) + movl %esi,%ebx + movl %ecx,8(%r8) + movl %ecx,%edi + movl %edx,12(%r8) + xorl %edx,%edi + movl %ebp,16(%r8) + andl %edi,%esi + jmp .Loop_ssse3 + +.align 16 +.Ldone_ssse3: + addl 16(%rsp),%ebx + xorl %ebp,%esi + movl %ecx,%edi + roll $5,%ecx + addl %esi,%ebx + xorl %ebp,%edi + rorl $7,%edx + addl %ecx,%ebx + addl 20(%rsp),%eax + xorl %edx,%edi + movl %ebx,%esi + roll $5,%ebx + addl %edi,%eax + xorl %edx,%esi + rorl $7,%ecx + addl %ebx,%eax + addl 24(%rsp),%ebp + xorl %ecx,%esi + movl %eax,%edi + roll $5,%eax + addl %esi,%ebp + xorl %ecx,%edi + rorl $7,%ebx + addl %eax,%ebp + addl 28(%rsp),%edx + xorl %ebx,%edi + movl %ebp,%esi + roll $5,%ebp + addl %edi,%edx + xorl %ebx,%esi + rorl $7,%eax + addl %ebp,%edx + addl 32(%rsp),%ecx + xorl %eax,%esi + movl %edx,%edi + roll $5,%edx + addl %esi,%ecx + xorl %eax,%edi + rorl $7,%ebp + addl %edx,%ecx + addl 36(%rsp),%ebx + xorl %ebp,%edi + movl %ecx,%esi + roll $5,%ecx + addl %edi,%ebx + xorl %ebp,%esi + rorl $7,%edx + addl %ecx,%ebx + addl 40(%rsp),%eax + xorl %edx,%esi + movl %ebx,%edi + roll $5,%ebx + addl %esi,%eax + xorl %edx,%edi + rorl $7,%ecx + addl %ebx,%eax + addl 44(%rsp),%ebp + xorl %ecx,%edi + movl %eax,%esi + roll $5,%eax + addl %edi,%ebp + xorl %ecx,%esi + rorl $7,%ebx + addl %eax,%ebp + addl 48(%rsp),%edx + xorl %ebx,%esi + movl %ebp,%edi + roll $5,%ebp + addl %esi,%edx + xorl %ebx,%edi + rorl $7,%eax + addl %ebp,%edx + addl 52(%rsp),%ecx + xorl %eax,%edi + movl %edx,%esi + roll $5,%edx + addl %edi,%ecx + xorl %eax,%esi + rorl $7,%ebp + addl %edx,%ecx + addl 56(%rsp),%ebx + xorl %ebp,%esi + movl %ecx,%edi + roll $5,%ecx + addl %esi,%ebx + xorl %ebp,%edi + rorl $7,%edx + addl %ecx,%ebx + addl 60(%rsp),%eax + xorl %edx,%edi + movl %ebx,%esi + roll $5,%ebx + addl %edi,%eax + rorl $7,%ecx + addl %ebx,%eax + addl 0(%r8),%eax + addl 4(%r8),%esi + addl 8(%r8),%ecx + movl %eax,0(%r8) + addl 12(%r8),%edx + movl %esi,4(%r8) + addl 16(%r8),%ebp + movl %ecx,8(%r8) + movl %edx,12(%r8) + movl %ebp,16(%r8) + leaq (%r14),%rsi + movq -40(%rsi),%r14 + movq -32(%rsi),%r13 + movq -24(%rsi),%r12 + movq -16(%rsi),%rbp + movq -8(%rsi),%rbx + leaq (%rsi),%rsp +.Lepilogue_ssse3: + .byte 0xf3,0xc3 +.size sha1_block_data_order_ssse3,.-sha1_block_data_order_ssse3 +.align 64 +K_XX_XX: +.long 0x5a827999,0x5a827999,0x5a827999,0x5a827999 +.long 0x5a827999,0x5a827999,0x5a827999,0x5a827999 +.long 0x6ed9eba1,0x6ed9eba1,0x6ed9eba1,0x6ed9eba1 +.long 0x6ed9eba1,0x6ed9eba1,0x6ed9eba1,0x6ed9eba1 +.long 0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc +.long 0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc +.long 0xca62c1d6,0xca62c1d6,0xca62c1d6,0xca62c1d6 +.long 0xca62c1d6,0xca62c1d6,0xca62c1d6,0xca62c1d6 +.long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f +.long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f +.byte 0xf,0xe,0xd,0xc,0xb,0xa,0x9,0x8,0x7,0x6,0x5,0x4,0x3,0x2,0x1,0x0 +.byte 83,72,65,49,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 +.align 64 diff --git a/deps/openssl/asm_obsolete/x64-elf-gas/sha/sha256-mb-x86_64.s b/deps/openssl/asm_obsolete/x64-elf-gas/sha/sha256-mb-x86_64.s new file mode 100644 index 00000000000000..7f8e35a92e0a15 --- /dev/null +++ b/deps/openssl/asm_obsolete/x64-elf-gas/sha/sha256-mb-x86_64.s @@ -0,0 +1,3258 @@ +.text + + + +.globl sha256_multi_block +.type sha256_multi_block,@function +.align 32 +sha256_multi_block: + movq OPENSSL_ia32cap_P+4(%rip),%rcx + btq $61,%rcx + jc _shaext_shortcut + movq %rsp,%rax + pushq %rbx + pushq %rbp + subq $288,%rsp + andq $-256,%rsp + movq %rax,272(%rsp) +.Lbody: + leaq K256+128(%rip),%rbp + leaq 256(%rsp),%rbx + leaq 128(%rdi),%rdi + +.Loop_grande: + movl %edx,280(%rsp) + xorl %edx,%edx + movq 0(%rsi),%r8 + movl 8(%rsi),%ecx + cmpl %edx,%ecx + cmovgl %ecx,%edx + testl %ecx,%ecx + movl %ecx,0(%rbx) + cmovleq %rbp,%r8 + movq 16(%rsi),%r9 + movl 24(%rsi),%ecx + cmpl %edx,%ecx + cmovgl %ecx,%edx + testl %ecx,%ecx + movl %ecx,4(%rbx) + cmovleq %rbp,%r9 + movq 32(%rsi),%r10 + movl 40(%rsi),%ecx + cmpl %edx,%ecx + cmovgl %ecx,%edx + testl %ecx,%ecx + movl %ecx,8(%rbx) + cmovleq %rbp,%r10 + movq 48(%rsi),%r11 + movl 56(%rsi),%ecx + cmpl %edx,%ecx + cmovgl %ecx,%edx + testl %ecx,%ecx + movl %ecx,12(%rbx) + cmovleq %rbp,%r11 + testl %edx,%edx + jz .Ldone + + movdqu 0-128(%rdi),%xmm8 + leaq 128(%rsp),%rax + movdqu 32-128(%rdi),%xmm9 + movdqu 64-128(%rdi),%xmm10 + movdqu 96-128(%rdi),%xmm11 + movdqu 128-128(%rdi),%xmm12 + movdqu 160-128(%rdi),%xmm13 + movdqu 192-128(%rdi),%xmm14 + movdqu 224-128(%rdi),%xmm15 + movdqu .Lpbswap(%rip),%xmm6 + jmp .Loop + +.align 32 +.Loop: + movdqa %xmm10,%xmm4 + pxor %xmm9,%xmm4 + movd 0(%r8),%xmm5 + movd 0(%r9),%xmm0 + movd 0(%r10),%xmm1 + movd 0(%r11),%xmm2 + punpckldq %xmm1,%xmm5 + punpckldq %xmm2,%xmm0 + punpckldq %xmm0,%xmm5 + movdqa %xmm12,%xmm7 +.byte 102,15,56,0,238 + movdqa %xmm12,%xmm2 + + psrld $6,%xmm7 + movdqa %xmm12,%xmm1 + pslld $7,%xmm2 + movdqa %xmm5,0-128(%rax) + paddd %xmm15,%xmm5 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd -128(%rbp),%xmm5 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm12,%xmm0 + + pxor %xmm2,%xmm7 + movdqa %xmm12,%xmm3 + pslld $26-21,%xmm2 + pandn %xmm14,%xmm0 + pand %xmm13,%xmm3 + pxor %xmm1,%xmm7 + + + movdqa %xmm8,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm8,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm3,%xmm0 + movdqa %xmm9,%xmm3 + movdqa %xmm8,%xmm7 + pslld $10,%xmm2 + pxor %xmm8,%xmm3 + + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm5 + pslld $19-10,%xmm2 + pand %xmm3,%xmm4 + pxor %xmm7,%xmm1 + + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm9,%xmm15 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm4,%xmm15 + paddd %xmm5,%xmm11 + pxor %xmm2,%xmm7 + + paddd %xmm5,%xmm15 + paddd %xmm7,%xmm15 + movd 4(%r8),%xmm5 + movd 4(%r9),%xmm0 + movd 4(%r10),%xmm1 + movd 4(%r11),%xmm2 + punpckldq %xmm1,%xmm5 + punpckldq %xmm2,%xmm0 + punpckldq %xmm0,%xmm5 + movdqa %xmm11,%xmm7 + + movdqa %xmm11,%xmm2 +.byte 102,15,56,0,238 + psrld $6,%xmm7 + movdqa %xmm11,%xmm1 + pslld $7,%xmm2 + movdqa %xmm5,16-128(%rax) + paddd %xmm14,%xmm5 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd -96(%rbp),%xmm5 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm11,%xmm0 + + pxor %xmm2,%xmm7 + movdqa %xmm11,%xmm4 + pslld $26-21,%xmm2 + pandn %xmm13,%xmm0 + pand %xmm12,%xmm4 + pxor %xmm1,%xmm7 + + + movdqa %xmm15,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm15,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm4,%xmm0 + movdqa %xmm8,%xmm4 + movdqa %xmm15,%xmm7 + pslld $10,%xmm2 + pxor %xmm15,%xmm4 + + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm5 + pslld $19-10,%xmm2 + pand %xmm4,%xmm3 + pxor %xmm7,%xmm1 + + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm8,%xmm14 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm3,%xmm14 + paddd %xmm5,%xmm10 + pxor %xmm2,%xmm7 + + paddd %xmm5,%xmm14 + paddd %xmm7,%xmm14 + movd 8(%r8),%xmm5 + movd 8(%r9),%xmm0 + movd 8(%r10),%xmm1 + movd 8(%r11),%xmm2 + punpckldq %xmm1,%xmm5 + punpckldq %xmm2,%xmm0 + punpckldq %xmm0,%xmm5 + movdqa %xmm10,%xmm7 +.byte 102,15,56,0,238 + movdqa %xmm10,%xmm2 + + psrld $6,%xmm7 + movdqa %xmm10,%xmm1 + pslld $7,%xmm2 + movdqa %xmm5,32-128(%rax) + paddd %xmm13,%xmm5 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd -64(%rbp),%xmm5 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm10,%xmm0 + + pxor %xmm2,%xmm7 + movdqa %xmm10,%xmm3 + pslld $26-21,%xmm2 + pandn %xmm12,%xmm0 + pand %xmm11,%xmm3 + pxor %xmm1,%xmm7 + + + movdqa %xmm14,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm14,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm3,%xmm0 + movdqa %xmm15,%xmm3 + movdqa %xmm14,%xmm7 + pslld $10,%xmm2 + pxor %xmm14,%xmm3 + + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm5 + pslld $19-10,%xmm2 + pand %xmm3,%xmm4 + pxor %xmm7,%xmm1 + + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm15,%xmm13 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm4,%xmm13 + paddd %xmm5,%xmm9 + pxor %xmm2,%xmm7 + + paddd %xmm5,%xmm13 + paddd %xmm7,%xmm13 + movd 12(%r8),%xmm5 + movd 12(%r9),%xmm0 + movd 12(%r10),%xmm1 + movd 12(%r11),%xmm2 + punpckldq %xmm1,%xmm5 + punpckldq %xmm2,%xmm0 + punpckldq %xmm0,%xmm5 + movdqa %xmm9,%xmm7 + + movdqa %xmm9,%xmm2 +.byte 102,15,56,0,238 + psrld $6,%xmm7 + movdqa %xmm9,%xmm1 + pslld $7,%xmm2 + movdqa %xmm5,48-128(%rax) + paddd %xmm12,%xmm5 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd -32(%rbp),%xmm5 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm9,%xmm0 + + pxor %xmm2,%xmm7 + movdqa %xmm9,%xmm4 + pslld $26-21,%xmm2 + pandn %xmm11,%xmm0 + pand %xmm10,%xmm4 + pxor %xmm1,%xmm7 + + + movdqa %xmm13,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm13,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm4,%xmm0 + movdqa %xmm14,%xmm4 + movdqa %xmm13,%xmm7 + pslld $10,%xmm2 + pxor %xmm13,%xmm4 + + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm5 + pslld $19-10,%xmm2 + pand %xmm4,%xmm3 + pxor %xmm7,%xmm1 + + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm14,%xmm12 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm3,%xmm12 + paddd %xmm5,%xmm8 + pxor %xmm2,%xmm7 + + paddd %xmm5,%xmm12 + paddd %xmm7,%xmm12 + movd 16(%r8),%xmm5 + movd 16(%r9),%xmm0 + movd 16(%r10),%xmm1 + movd 16(%r11),%xmm2 + punpckldq %xmm1,%xmm5 + punpckldq %xmm2,%xmm0 + punpckldq %xmm0,%xmm5 + movdqa %xmm8,%xmm7 +.byte 102,15,56,0,238 + movdqa %xmm8,%xmm2 + + psrld $6,%xmm7 + movdqa %xmm8,%xmm1 + pslld $7,%xmm2 + movdqa %xmm5,64-128(%rax) + paddd %xmm11,%xmm5 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd 0(%rbp),%xmm5 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm8,%xmm0 + + pxor %xmm2,%xmm7 + movdqa %xmm8,%xmm3 + pslld $26-21,%xmm2 + pandn %xmm10,%xmm0 + pand %xmm9,%xmm3 + pxor %xmm1,%xmm7 + + + movdqa %xmm12,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm12,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm3,%xmm0 + movdqa %xmm13,%xmm3 + movdqa %xmm12,%xmm7 + pslld $10,%xmm2 + pxor %xmm12,%xmm3 + + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm5 + pslld $19-10,%xmm2 + pand %xmm3,%xmm4 + pxor %xmm7,%xmm1 + + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm13,%xmm11 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm4,%xmm11 + paddd %xmm5,%xmm15 + pxor %xmm2,%xmm7 + + paddd %xmm5,%xmm11 + paddd %xmm7,%xmm11 + movd 20(%r8),%xmm5 + movd 20(%r9),%xmm0 + movd 20(%r10),%xmm1 + movd 20(%r11),%xmm2 + punpckldq %xmm1,%xmm5 + punpckldq %xmm2,%xmm0 + punpckldq %xmm0,%xmm5 + movdqa %xmm15,%xmm7 + + movdqa %xmm15,%xmm2 +.byte 102,15,56,0,238 + psrld $6,%xmm7 + movdqa %xmm15,%xmm1 + pslld $7,%xmm2 + movdqa %xmm5,80-128(%rax) + paddd %xmm10,%xmm5 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd 32(%rbp),%xmm5 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm15,%xmm0 + + pxor %xmm2,%xmm7 + movdqa %xmm15,%xmm4 + pslld $26-21,%xmm2 + pandn %xmm9,%xmm0 + pand %xmm8,%xmm4 + pxor %xmm1,%xmm7 + + + movdqa %xmm11,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm11,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm4,%xmm0 + movdqa %xmm12,%xmm4 + movdqa %xmm11,%xmm7 + pslld $10,%xmm2 + pxor %xmm11,%xmm4 + + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm5 + pslld $19-10,%xmm2 + pand %xmm4,%xmm3 + pxor %xmm7,%xmm1 + + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm12,%xmm10 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm3,%xmm10 + paddd %xmm5,%xmm14 + pxor %xmm2,%xmm7 + + paddd %xmm5,%xmm10 + paddd %xmm7,%xmm10 + movd 24(%r8),%xmm5 + movd 24(%r9),%xmm0 + movd 24(%r10),%xmm1 + movd 24(%r11),%xmm2 + punpckldq %xmm1,%xmm5 + punpckldq %xmm2,%xmm0 + punpckldq %xmm0,%xmm5 + movdqa %xmm14,%xmm7 +.byte 102,15,56,0,238 + movdqa %xmm14,%xmm2 + + psrld $6,%xmm7 + movdqa %xmm14,%xmm1 + pslld $7,%xmm2 + movdqa %xmm5,96-128(%rax) + paddd %xmm9,%xmm5 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd 64(%rbp),%xmm5 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm14,%xmm0 + + pxor %xmm2,%xmm7 + movdqa %xmm14,%xmm3 + pslld $26-21,%xmm2 + pandn %xmm8,%xmm0 + pand %xmm15,%xmm3 + pxor %xmm1,%xmm7 + + + movdqa %xmm10,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm10,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm3,%xmm0 + movdqa %xmm11,%xmm3 + movdqa %xmm10,%xmm7 + pslld $10,%xmm2 + pxor %xmm10,%xmm3 + + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm5 + pslld $19-10,%xmm2 + pand %xmm3,%xmm4 + pxor %xmm7,%xmm1 + + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm11,%xmm9 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm4,%xmm9 + paddd %xmm5,%xmm13 + pxor %xmm2,%xmm7 + + paddd %xmm5,%xmm9 + paddd %xmm7,%xmm9 + movd 28(%r8),%xmm5 + movd 28(%r9),%xmm0 + movd 28(%r10),%xmm1 + movd 28(%r11),%xmm2 + punpckldq %xmm1,%xmm5 + punpckldq %xmm2,%xmm0 + punpckldq %xmm0,%xmm5 + movdqa %xmm13,%xmm7 + + movdqa %xmm13,%xmm2 +.byte 102,15,56,0,238 + psrld $6,%xmm7 + movdqa %xmm13,%xmm1 + pslld $7,%xmm2 + movdqa %xmm5,112-128(%rax) + paddd %xmm8,%xmm5 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd 96(%rbp),%xmm5 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm13,%xmm0 + + pxor %xmm2,%xmm7 + movdqa %xmm13,%xmm4 + pslld $26-21,%xmm2 + pandn %xmm15,%xmm0 + pand %xmm14,%xmm4 + pxor %xmm1,%xmm7 + + + movdqa %xmm9,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm9,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm4,%xmm0 + movdqa %xmm10,%xmm4 + movdqa %xmm9,%xmm7 + pslld $10,%xmm2 + pxor %xmm9,%xmm4 + + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm5 + pslld $19-10,%xmm2 + pand %xmm4,%xmm3 + pxor %xmm7,%xmm1 + + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm10,%xmm8 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm3,%xmm8 + paddd %xmm5,%xmm12 + pxor %xmm2,%xmm7 + + paddd %xmm5,%xmm8 + paddd %xmm7,%xmm8 + leaq 256(%rbp),%rbp + movd 32(%r8),%xmm5 + movd 32(%r9),%xmm0 + movd 32(%r10),%xmm1 + movd 32(%r11),%xmm2 + punpckldq %xmm1,%xmm5 + punpckldq %xmm2,%xmm0 + punpckldq %xmm0,%xmm5 + movdqa %xmm12,%xmm7 +.byte 102,15,56,0,238 + movdqa %xmm12,%xmm2 + + psrld $6,%xmm7 + movdqa %xmm12,%xmm1 + pslld $7,%xmm2 + movdqa %xmm5,128-128(%rax) + paddd %xmm15,%xmm5 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd -128(%rbp),%xmm5 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm12,%xmm0 + + pxor %xmm2,%xmm7 + movdqa %xmm12,%xmm3 + pslld $26-21,%xmm2 + pandn %xmm14,%xmm0 + pand %xmm13,%xmm3 + pxor %xmm1,%xmm7 + + + movdqa %xmm8,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm8,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm3,%xmm0 + movdqa %xmm9,%xmm3 + movdqa %xmm8,%xmm7 + pslld $10,%xmm2 + pxor %xmm8,%xmm3 + + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm5 + pslld $19-10,%xmm2 + pand %xmm3,%xmm4 + pxor %xmm7,%xmm1 + + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm9,%xmm15 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm4,%xmm15 + paddd %xmm5,%xmm11 + pxor %xmm2,%xmm7 + + paddd %xmm5,%xmm15 + paddd %xmm7,%xmm15 + movd 36(%r8),%xmm5 + movd 36(%r9),%xmm0 + movd 36(%r10),%xmm1 + movd 36(%r11),%xmm2 + punpckldq %xmm1,%xmm5 + punpckldq %xmm2,%xmm0 + punpckldq %xmm0,%xmm5 + movdqa %xmm11,%xmm7 + + movdqa %xmm11,%xmm2 +.byte 102,15,56,0,238 + psrld $6,%xmm7 + movdqa %xmm11,%xmm1 + pslld $7,%xmm2 + movdqa %xmm5,144-128(%rax) + paddd %xmm14,%xmm5 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd -96(%rbp),%xmm5 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm11,%xmm0 + + pxor %xmm2,%xmm7 + movdqa %xmm11,%xmm4 + pslld $26-21,%xmm2 + pandn %xmm13,%xmm0 + pand %xmm12,%xmm4 + pxor %xmm1,%xmm7 + + + movdqa %xmm15,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm15,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm4,%xmm0 + movdqa %xmm8,%xmm4 + movdqa %xmm15,%xmm7 + pslld $10,%xmm2 + pxor %xmm15,%xmm4 + + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm5 + pslld $19-10,%xmm2 + pand %xmm4,%xmm3 + pxor %xmm7,%xmm1 + + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm8,%xmm14 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm3,%xmm14 + paddd %xmm5,%xmm10 + pxor %xmm2,%xmm7 + + paddd %xmm5,%xmm14 + paddd %xmm7,%xmm14 + movd 40(%r8),%xmm5 + movd 40(%r9),%xmm0 + movd 40(%r10),%xmm1 + movd 40(%r11),%xmm2 + punpckldq %xmm1,%xmm5 + punpckldq %xmm2,%xmm0 + punpckldq %xmm0,%xmm5 + movdqa %xmm10,%xmm7 +.byte 102,15,56,0,238 + movdqa %xmm10,%xmm2 + + psrld $6,%xmm7 + movdqa %xmm10,%xmm1 + pslld $7,%xmm2 + movdqa %xmm5,160-128(%rax) + paddd %xmm13,%xmm5 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd -64(%rbp),%xmm5 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm10,%xmm0 + + pxor %xmm2,%xmm7 + movdqa %xmm10,%xmm3 + pslld $26-21,%xmm2 + pandn %xmm12,%xmm0 + pand %xmm11,%xmm3 + pxor %xmm1,%xmm7 + + + movdqa %xmm14,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm14,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm3,%xmm0 + movdqa %xmm15,%xmm3 + movdqa %xmm14,%xmm7 + pslld $10,%xmm2 + pxor %xmm14,%xmm3 + + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm5 + pslld $19-10,%xmm2 + pand %xmm3,%xmm4 + pxor %xmm7,%xmm1 + + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm15,%xmm13 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm4,%xmm13 + paddd %xmm5,%xmm9 + pxor %xmm2,%xmm7 + + paddd %xmm5,%xmm13 + paddd %xmm7,%xmm13 + movd 44(%r8),%xmm5 + movd 44(%r9),%xmm0 + movd 44(%r10),%xmm1 + movd 44(%r11),%xmm2 + punpckldq %xmm1,%xmm5 + punpckldq %xmm2,%xmm0 + punpckldq %xmm0,%xmm5 + movdqa %xmm9,%xmm7 + + movdqa %xmm9,%xmm2 +.byte 102,15,56,0,238 + psrld $6,%xmm7 + movdqa %xmm9,%xmm1 + pslld $7,%xmm2 + movdqa %xmm5,176-128(%rax) + paddd %xmm12,%xmm5 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd -32(%rbp),%xmm5 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm9,%xmm0 + + pxor %xmm2,%xmm7 + movdqa %xmm9,%xmm4 + pslld $26-21,%xmm2 + pandn %xmm11,%xmm0 + pand %xmm10,%xmm4 + pxor %xmm1,%xmm7 + + + movdqa %xmm13,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm13,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm4,%xmm0 + movdqa %xmm14,%xmm4 + movdqa %xmm13,%xmm7 + pslld $10,%xmm2 + pxor %xmm13,%xmm4 + + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm5 + pslld $19-10,%xmm2 + pand %xmm4,%xmm3 + pxor %xmm7,%xmm1 + + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm14,%xmm12 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm3,%xmm12 + paddd %xmm5,%xmm8 + pxor %xmm2,%xmm7 + + paddd %xmm5,%xmm12 + paddd %xmm7,%xmm12 + movd 48(%r8),%xmm5 + movd 48(%r9),%xmm0 + movd 48(%r10),%xmm1 + movd 48(%r11),%xmm2 + punpckldq %xmm1,%xmm5 + punpckldq %xmm2,%xmm0 + punpckldq %xmm0,%xmm5 + movdqa %xmm8,%xmm7 +.byte 102,15,56,0,238 + movdqa %xmm8,%xmm2 + + psrld $6,%xmm7 + movdqa %xmm8,%xmm1 + pslld $7,%xmm2 + movdqa %xmm5,192-128(%rax) + paddd %xmm11,%xmm5 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd 0(%rbp),%xmm5 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm8,%xmm0 + + pxor %xmm2,%xmm7 + movdqa %xmm8,%xmm3 + pslld $26-21,%xmm2 + pandn %xmm10,%xmm0 + pand %xmm9,%xmm3 + pxor %xmm1,%xmm7 + + + movdqa %xmm12,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm12,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm3,%xmm0 + movdqa %xmm13,%xmm3 + movdqa %xmm12,%xmm7 + pslld $10,%xmm2 + pxor %xmm12,%xmm3 + + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm5 + pslld $19-10,%xmm2 + pand %xmm3,%xmm4 + pxor %xmm7,%xmm1 + + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm13,%xmm11 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm4,%xmm11 + paddd %xmm5,%xmm15 + pxor %xmm2,%xmm7 + + paddd %xmm5,%xmm11 + paddd %xmm7,%xmm11 + movd 52(%r8),%xmm5 + movd 52(%r9),%xmm0 + movd 52(%r10),%xmm1 + movd 52(%r11),%xmm2 + punpckldq %xmm1,%xmm5 + punpckldq %xmm2,%xmm0 + punpckldq %xmm0,%xmm5 + movdqa %xmm15,%xmm7 + + movdqa %xmm15,%xmm2 +.byte 102,15,56,0,238 + psrld $6,%xmm7 + movdqa %xmm15,%xmm1 + pslld $7,%xmm2 + movdqa %xmm5,208-128(%rax) + paddd %xmm10,%xmm5 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd 32(%rbp),%xmm5 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm15,%xmm0 + + pxor %xmm2,%xmm7 + movdqa %xmm15,%xmm4 + pslld $26-21,%xmm2 + pandn %xmm9,%xmm0 + pand %xmm8,%xmm4 + pxor %xmm1,%xmm7 + + + movdqa %xmm11,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm11,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm4,%xmm0 + movdqa %xmm12,%xmm4 + movdqa %xmm11,%xmm7 + pslld $10,%xmm2 + pxor %xmm11,%xmm4 + + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm5 + pslld $19-10,%xmm2 + pand %xmm4,%xmm3 + pxor %xmm7,%xmm1 + + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm12,%xmm10 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm3,%xmm10 + paddd %xmm5,%xmm14 + pxor %xmm2,%xmm7 + + paddd %xmm5,%xmm10 + paddd %xmm7,%xmm10 + movd 56(%r8),%xmm5 + movd 56(%r9),%xmm0 + movd 56(%r10),%xmm1 + movd 56(%r11),%xmm2 + punpckldq %xmm1,%xmm5 + punpckldq %xmm2,%xmm0 + punpckldq %xmm0,%xmm5 + movdqa %xmm14,%xmm7 +.byte 102,15,56,0,238 + movdqa %xmm14,%xmm2 + + psrld $6,%xmm7 + movdqa %xmm14,%xmm1 + pslld $7,%xmm2 + movdqa %xmm5,224-128(%rax) + paddd %xmm9,%xmm5 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd 64(%rbp),%xmm5 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm14,%xmm0 + + pxor %xmm2,%xmm7 + movdqa %xmm14,%xmm3 + pslld $26-21,%xmm2 + pandn %xmm8,%xmm0 + pand %xmm15,%xmm3 + pxor %xmm1,%xmm7 + + + movdqa %xmm10,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm10,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm3,%xmm0 + movdqa %xmm11,%xmm3 + movdqa %xmm10,%xmm7 + pslld $10,%xmm2 + pxor %xmm10,%xmm3 + + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm5 + pslld $19-10,%xmm2 + pand %xmm3,%xmm4 + pxor %xmm7,%xmm1 + + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm11,%xmm9 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm4,%xmm9 + paddd %xmm5,%xmm13 + pxor %xmm2,%xmm7 + + paddd %xmm5,%xmm9 + paddd %xmm7,%xmm9 + movd 60(%r8),%xmm5 + leaq 64(%r8),%r8 + movd 60(%r9),%xmm0 + leaq 64(%r9),%r9 + movd 60(%r10),%xmm1 + leaq 64(%r10),%r10 + movd 60(%r11),%xmm2 + leaq 64(%r11),%r11 + punpckldq %xmm1,%xmm5 + punpckldq %xmm2,%xmm0 + punpckldq %xmm0,%xmm5 + movdqa %xmm13,%xmm7 + + movdqa %xmm13,%xmm2 +.byte 102,15,56,0,238 + psrld $6,%xmm7 + movdqa %xmm13,%xmm1 + pslld $7,%xmm2 + movdqa %xmm5,240-128(%rax) + paddd %xmm8,%xmm5 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd 96(%rbp),%xmm5 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm13,%xmm0 + prefetcht0 63(%r8) + pxor %xmm2,%xmm7 + movdqa %xmm13,%xmm4 + pslld $26-21,%xmm2 + pandn %xmm15,%xmm0 + pand %xmm14,%xmm4 + pxor %xmm1,%xmm7 + + prefetcht0 63(%r9) + movdqa %xmm9,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm9,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm4,%xmm0 + movdqa %xmm10,%xmm4 + movdqa %xmm9,%xmm7 + pslld $10,%xmm2 + pxor %xmm9,%xmm4 + + prefetcht0 63(%r10) + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm5 + pslld $19-10,%xmm2 + pand %xmm4,%xmm3 + pxor %xmm7,%xmm1 + + prefetcht0 63(%r11) + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm10,%xmm8 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm3,%xmm8 + paddd %xmm5,%xmm12 + pxor %xmm2,%xmm7 + + paddd %xmm5,%xmm8 + paddd %xmm7,%xmm8 + leaq 256(%rbp),%rbp + movdqu 0-128(%rax),%xmm5 + movl $3,%ecx + jmp .Loop_16_xx +.align 32 +.Loop_16_xx: + movdqa 16-128(%rax),%xmm6 + paddd 144-128(%rax),%xmm5 + + movdqa %xmm6,%xmm7 + movdqa %xmm6,%xmm1 + psrld $3,%xmm7 + movdqa %xmm6,%xmm2 + + psrld $7,%xmm1 + movdqa 224-128(%rax),%xmm0 + pslld $14,%xmm2 + pxor %xmm1,%xmm7 + psrld $18-7,%xmm1 + movdqa %xmm0,%xmm3 + pxor %xmm2,%xmm7 + pslld $25-14,%xmm2 + pxor %xmm1,%xmm7 + psrld $10,%xmm0 + movdqa %xmm3,%xmm1 + + psrld $17,%xmm3 + pxor %xmm2,%xmm7 + pslld $13,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm3,%xmm0 + psrld $19-17,%xmm3 + pxor %xmm1,%xmm0 + pslld $15-13,%xmm1 + pxor %xmm3,%xmm0 + pxor %xmm1,%xmm0 + paddd %xmm0,%xmm5 + movdqa %xmm12,%xmm7 + + movdqa %xmm12,%xmm2 + + psrld $6,%xmm7 + movdqa %xmm12,%xmm1 + pslld $7,%xmm2 + movdqa %xmm5,0-128(%rax) + paddd %xmm15,%xmm5 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd -128(%rbp),%xmm5 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm12,%xmm0 + + pxor %xmm2,%xmm7 + movdqa %xmm12,%xmm3 + pslld $26-21,%xmm2 + pandn %xmm14,%xmm0 + pand %xmm13,%xmm3 + pxor %xmm1,%xmm7 + + + movdqa %xmm8,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm8,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm3,%xmm0 + movdqa %xmm9,%xmm3 + movdqa %xmm8,%xmm7 + pslld $10,%xmm2 + pxor %xmm8,%xmm3 + + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm5 + pslld $19-10,%xmm2 + pand %xmm3,%xmm4 + pxor %xmm7,%xmm1 + + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm9,%xmm15 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm4,%xmm15 + paddd %xmm5,%xmm11 + pxor %xmm2,%xmm7 + + paddd %xmm5,%xmm15 + paddd %xmm7,%xmm15 + movdqa 32-128(%rax),%xmm5 + paddd 160-128(%rax),%xmm6 + + movdqa %xmm5,%xmm7 + movdqa %xmm5,%xmm1 + psrld $3,%xmm7 + movdqa %xmm5,%xmm2 + + psrld $7,%xmm1 + movdqa 240-128(%rax),%xmm0 + pslld $14,%xmm2 + pxor %xmm1,%xmm7 + psrld $18-7,%xmm1 + movdqa %xmm0,%xmm4 + pxor %xmm2,%xmm7 + pslld $25-14,%xmm2 + pxor %xmm1,%xmm7 + psrld $10,%xmm0 + movdqa %xmm4,%xmm1 + + psrld $17,%xmm4 + pxor %xmm2,%xmm7 + pslld $13,%xmm1 + paddd %xmm7,%xmm6 + pxor %xmm4,%xmm0 + psrld $19-17,%xmm4 + pxor %xmm1,%xmm0 + pslld $15-13,%xmm1 + pxor %xmm4,%xmm0 + pxor %xmm1,%xmm0 + paddd %xmm0,%xmm6 + movdqa %xmm11,%xmm7 + + movdqa %xmm11,%xmm2 + + psrld $6,%xmm7 + movdqa %xmm11,%xmm1 + pslld $7,%xmm2 + movdqa %xmm6,16-128(%rax) + paddd %xmm14,%xmm6 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd -96(%rbp),%xmm6 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm11,%xmm0 + + pxor %xmm2,%xmm7 + movdqa %xmm11,%xmm4 + pslld $26-21,%xmm2 + pandn %xmm13,%xmm0 + pand %xmm12,%xmm4 + pxor %xmm1,%xmm7 + + + movdqa %xmm15,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm15,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm6 + pxor %xmm4,%xmm0 + movdqa %xmm8,%xmm4 + movdqa %xmm15,%xmm7 + pslld $10,%xmm2 + pxor %xmm15,%xmm4 + + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm6 + pslld $19-10,%xmm2 + pand %xmm4,%xmm3 + pxor %xmm7,%xmm1 + + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm8,%xmm14 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm3,%xmm14 + paddd %xmm6,%xmm10 + pxor %xmm2,%xmm7 + + paddd %xmm6,%xmm14 + paddd %xmm7,%xmm14 + movdqa 48-128(%rax),%xmm6 + paddd 176-128(%rax),%xmm5 + + movdqa %xmm6,%xmm7 + movdqa %xmm6,%xmm1 + psrld $3,%xmm7 + movdqa %xmm6,%xmm2 + + psrld $7,%xmm1 + movdqa 0-128(%rax),%xmm0 + pslld $14,%xmm2 + pxor %xmm1,%xmm7 + psrld $18-7,%xmm1 + movdqa %xmm0,%xmm3 + pxor %xmm2,%xmm7 + pslld $25-14,%xmm2 + pxor %xmm1,%xmm7 + psrld $10,%xmm0 + movdqa %xmm3,%xmm1 + + psrld $17,%xmm3 + pxor %xmm2,%xmm7 + pslld $13,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm3,%xmm0 + psrld $19-17,%xmm3 + pxor %xmm1,%xmm0 + pslld $15-13,%xmm1 + pxor %xmm3,%xmm0 + pxor %xmm1,%xmm0 + paddd %xmm0,%xmm5 + movdqa %xmm10,%xmm7 + + movdqa %xmm10,%xmm2 + + psrld $6,%xmm7 + movdqa %xmm10,%xmm1 + pslld $7,%xmm2 + movdqa %xmm5,32-128(%rax) + paddd %xmm13,%xmm5 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd -64(%rbp),%xmm5 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm10,%xmm0 + + pxor %xmm2,%xmm7 + movdqa %xmm10,%xmm3 + pslld $26-21,%xmm2 + pandn %xmm12,%xmm0 + pand %xmm11,%xmm3 + pxor %xmm1,%xmm7 + + + movdqa %xmm14,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm14,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm3,%xmm0 + movdqa %xmm15,%xmm3 + movdqa %xmm14,%xmm7 + pslld $10,%xmm2 + pxor %xmm14,%xmm3 + + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm5 + pslld $19-10,%xmm2 + pand %xmm3,%xmm4 + pxor %xmm7,%xmm1 + + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm15,%xmm13 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm4,%xmm13 + paddd %xmm5,%xmm9 + pxor %xmm2,%xmm7 + + paddd %xmm5,%xmm13 + paddd %xmm7,%xmm13 + movdqa 64-128(%rax),%xmm5 + paddd 192-128(%rax),%xmm6 + + movdqa %xmm5,%xmm7 + movdqa %xmm5,%xmm1 + psrld $3,%xmm7 + movdqa %xmm5,%xmm2 + + psrld $7,%xmm1 + movdqa 16-128(%rax),%xmm0 + pslld $14,%xmm2 + pxor %xmm1,%xmm7 + psrld $18-7,%xmm1 + movdqa %xmm0,%xmm4 + pxor %xmm2,%xmm7 + pslld $25-14,%xmm2 + pxor %xmm1,%xmm7 + psrld $10,%xmm0 + movdqa %xmm4,%xmm1 + + psrld $17,%xmm4 + pxor %xmm2,%xmm7 + pslld $13,%xmm1 + paddd %xmm7,%xmm6 + pxor %xmm4,%xmm0 + psrld $19-17,%xmm4 + pxor %xmm1,%xmm0 + pslld $15-13,%xmm1 + pxor %xmm4,%xmm0 + pxor %xmm1,%xmm0 + paddd %xmm0,%xmm6 + movdqa %xmm9,%xmm7 + + movdqa %xmm9,%xmm2 + + psrld $6,%xmm7 + movdqa %xmm9,%xmm1 + pslld $7,%xmm2 + movdqa %xmm6,48-128(%rax) + paddd %xmm12,%xmm6 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd -32(%rbp),%xmm6 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm9,%xmm0 + + pxor %xmm2,%xmm7 + movdqa %xmm9,%xmm4 + pslld $26-21,%xmm2 + pandn %xmm11,%xmm0 + pand %xmm10,%xmm4 + pxor %xmm1,%xmm7 + + + movdqa %xmm13,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm13,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm6 + pxor %xmm4,%xmm0 + movdqa %xmm14,%xmm4 + movdqa %xmm13,%xmm7 + pslld $10,%xmm2 + pxor %xmm13,%xmm4 + + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm6 + pslld $19-10,%xmm2 + pand %xmm4,%xmm3 + pxor %xmm7,%xmm1 + + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm14,%xmm12 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm3,%xmm12 + paddd %xmm6,%xmm8 + pxor %xmm2,%xmm7 + + paddd %xmm6,%xmm12 + paddd %xmm7,%xmm12 + movdqa 80-128(%rax),%xmm6 + paddd 208-128(%rax),%xmm5 + + movdqa %xmm6,%xmm7 + movdqa %xmm6,%xmm1 + psrld $3,%xmm7 + movdqa %xmm6,%xmm2 + + psrld $7,%xmm1 + movdqa 32-128(%rax),%xmm0 + pslld $14,%xmm2 + pxor %xmm1,%xmm7 + psrld $18-7,%xmm1 + movdqa %xmm0,%xmm3 + pxor %xmm2,%xmm7 + pslld $25-14,%xmm2 + pxor %xmm1,%xmm7 + psrld $10,%xmm0 + movdqa %xmm3,%xmm1 + + psrld $17,%xmm3 + pxor %xmm2,%xmm7 + pslld $13,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm3,%xmm0 + psrld $19-17,%xmm3 + pxor %xmm1,%xmm0 + pslld $15-13,%xmm1 + pxor %xmm3,%xmm0 + pxor %xmm1,%xmm0 + paddd %xmm0,%xmm5 + movdqa %xmm8,%xmm7 + + movdqa %xmm8,%xmm2 + + psrld $6,%xmm7 + movdqa %xmm8,%xmm1 + pslld $7,%xmm2 + movdqa %xmm5,64-128(%rax) + paddd %xmm11,%xmm5 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd 0(%rbp),%xmm5 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm8,%xmm0 + + pxor %xmm2,%xmm7 + movdqa %xmm8,%xmm3 + pslld $26-21,%xmm2 + pandn %xmm10,%xmm0 + pand %xmm9,%xmm3 + pxor %xmm1,%xmm7 + + + movdqa %xmm12,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm12,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm3,%xmm0 + movdqa %xmm13,%xmm3 + movdqa %xmm12,%xmm7 + pslld $10,%xmm2 + pxor %xmm12,%xmm3 + + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm5 + pslld $19-10,%xmm2 + pand %xmm3,%xmm4 + pxor %xmm7,%xmm1 + + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm13,%xmm11 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm4,%xmm11 + paddd %xmm5,%xmm15 + pxor %xmm2,%xmm7 + + paddd %xmm5,%xmm11 + paddd %xmm7,%xmm11 + movdqa 96-128(%rax),%xmm5 + paddd 224-128(%rax),%xmm6 + + movdqa %xmm5,%xmm7 + movdqa %xmm5,%xmm1 + psrld $3,%xmm7 + movdqa %xmm5,%xmm2 + + psrld $7,%xmm1 + movdqa 48-128(%rax),%xmm0 + pslld $14,%xmm2 + pxor %xmm1,%xmm7 + psrld $18-7,%xmm1 + movdqa %xmm0,%xmm4 + pxor %xmm2,%xmm7 + pslld $25-14,%xmm2 + pxor %xmm1,%xmm7 + psrld $10,%xmm0 + movdqa %xmm4,%xmm1 + + psrld $17,%xmm4 + pxor %xmm2,%xmm7 + pslld $13,%xmm1 + paddd %xmm7,%xmm6 + pxor %xmm4,%xmm0 + psrld $19-17,%xmm4 + pxor %xmm1,%xmm0 + pslld $15-13,%xmm1 + pxor %xmm4,%xmm0 + pxor %xmm1,%xmm0 + paddd %xmm0,%xmm6 + movdqa %xmm15,%xmm7 + + movdqa %xmm15,%xmm2 + + psrld $6,%xmm7 + movdqa %xmm15,%xmm1 + pslld $7,%xmm2 + movdqa %xmm6,80-128(%rax) + paddd %xmm10,%xmm6 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd 32(%rbp),%xmm6 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm15,%xmm0 + + pxor %xmm2,%xmm7 + movdqa %xmm15,%xmm4 + pslld $26-21,%xmm2 + pandn %xmm9,%xmm0 + pand %xmm8,%xmm4 + pxor %xmm1,%xmm7 + + + movdqa %xmm11,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm11,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm6 + pxor %xmm4,%xmm0 + movdqa %xmm12,%xmm4 + movdqa %xmm11,%xmm7 + pslld $10,%xmm2 + pxor %xmm11,%xmm4 + + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm6 + pslld $19-10,%xmm2 + pand %xmm4,%xmm3 + pxor %xmm7,%xmm1 + + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm12,%xmm10 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm3,%xmm10 + paddd %xmm6,%xmm14 + pxor %xmm2,%xmm7 + + paddd %xmm6,%xmm10 + paddd %xmm7,%xmm10 + movdqa 112-128(%rax),%xmm6 + paddd 240-128(%rax),%xmm5 + + movdqa %xmm6,%xmm7 + movdqa %xmm6,%xmm1 + psrld $3,%xmm7 + movdqa %xmm6,%xmm2 + + psrld $7,%xmm1 + movdqa 64-128(%rax),%xmm0 + pslld $14,%xmm2 + pxor %xmm1,%xmm7 + psrld $18-7,%xmm1 + movdqa %xmm0,%xmm3 + pxor %xmm2,%xmm7 + pslld $25-14,%xmm2 + pxor %xmm1,%xmm7 + psrld $10,%xmm0 + movdqa %xmm3,%xmm1 + + psrld $17,%xmm3 + pxor %xmm2,%xmm7 + pslld $13,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm3,%xmm0 + psrld $19-17,%xmm3 + pxor %xmm1,%xmm0 + pslld $15-13,%xmm1 + pxor %xmm3,%xmm0 + pxor %xmm1,%xmm0 + paddd %xmm0,%xmm5 + movdqa %xmm14,%xmm7 + + movdqa %xmm14,%xmm2 + + psrld $6,%xmm7 + movdqa %xmm14,%xmm1 + pslld $7,%xmm2 + movdqa %xmm5,96-128(%rax) + paddd %xmm9,%xmm5 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd 64(%rbp),%xmm5 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm14,%xmm0 + + pxor %xmm2,%xmm7 + movdqa %xmm14,%xmm3 + pslld $26-21,%xmm2 + pandn %xmm8,%xmm0 + pand %xmm15,%xmm3 + pxor %xmm1,%xmm7 + + + movdqa %xmm10,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm10,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm3,%xmm0 + movdqa %xmm11,%xmm3 + movdqa %xmm10,%xmm7 + pslld $10,%xmm2 + pxor %xmm10,%xmm3 + + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm5 + pslld $19-10,%xmm2 + pand %xmm3,%xmm4 + pxor %xmm7,%xmm1 + + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm11,%xmm9 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm4,%xmm9 + paddd %xmm5,%xmm13 + pxor %xmm2,%xmm7 + + paddd %xmm5,%xmm9 + paddd %xmm7,%xmm9 + movdqa 128-128(%rax),%xmm5 + paddd 0-128(%rax),%xmm6 + + movdqa %xmm5,%xmm7 + movdqa %xmm5,%xmm1 + psrld $3,%xmm7 + movdqa %xmm5,%xmm2 + + psrld $7,%xmm1 + movdqa 80-128(%rax),%xmm0 + pslld $14,%xmm2 + pxor %xmm1,%xmm7 + psrld $18-7,%xmm1 + movdqa %xmm0,%xmm4 + pxor %xmm2,%xmm7 + pslld $25-14,%xmm2 + pxor %xmm1,%xmm7 + psrld $10,%xmm0 + movdqa %xmm4,%xmm1 + + psrld $17,%xmm4 + pxor %xmm2,%xmm7 + pslld $13,%xmm1 + paddd %xmm7,%xmm6 + pxor %xmm4,%xmm0 + psrld $19-17,%xmm4 + pxor %xmm1,%xmm0 + pslld $15-13,%xmm1 + pxor %xmm4,%xmm0 + pxor %xmm1,%xmm0 + paddd %xmm0,%xmm6 + movdqa %xmm13,%xmm7 + + movdqa %xmm13,%xmm2 + + psrld $6,%xmm7 + movdqa %xmm13,%xmm1 + pslld $7,%xmm2 + movdqa %xmm6,112-128(%rax) + paddd %xmm8,%xmm6 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd 96(%rbp),%xmm6 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm13,%xmm0 + + pxor %xmm2,%xmm7 + movdqa %xmm13,%xmm4 + pslld $26-21,%xmm2 + pandn %xmm15,%xmm0 + pand %xmm14,%xmm4 + pxor %xmm1,%xmm7 + + + movdqa %xmm9,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm9,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm6 + pxor %xmm4,%xmm0 + movdqa %xmm10,%xmm4 + movdqa %xmm9,%xmm7 + pslld $10,%xmm2 + pxor %xmm9,%xmm4 + + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm6 + pslld $19-10,%xmm2 + pand %xmm4,%xmm3 + pxor %xmm7,%xmm1 + + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm10,%xmm8 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm3,%xmm8 + paddd %xmm6,%xmm12 + pxor %xmm2,%xmm7 + + paddd %xmm6,%xmm8 + paddd %xmm7,%xmm8 + leaq 256(%rbp),%rbp + movdqa 144-128(%rax),%xmm6 + paddd 16-128(%rax),%xmm5 + + movdqa %xmm6,%xmm7 + movdqa %xmm6,%xmm1 + psrld $3,%xmm7 + movdqa %xmm6,%xmm2 + + psrld $7,%xmm1 + movdqa 96-128(%rax),%xmm0 + pslld $14,%xmm2 + pxor %xmm1,%xmm7 + psrld $18-7,%xmm1 + movdqa %xmm0,%xmm3 + pxor %xmm2,%xmm7 + pslld $25-14,%xmm2 + pxor %xmm1,%xmm7 + psrld $10,%xmm0 + movdqa %xmm3,%xmm1 + + psrld $17,%xmm3 + pxor %xmm2,%xmm7 + pslld $13,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm3,%xmm0 + psrld $19-17,%xmm3 + pxor %xmm1,%xmm0 + pslld $15-13,%xmm1 + pxor %xmm3,%xmm0 + pxor %xmm1,%xmm0 + paddd %xmm0,%xmm5 + movdqa %xmm12,%xmm7 + + movdqa %xmm12,%xmm2 + + psrld $6,%xmm7 + movdqa %xmm12,%xmm1 + pslld $7,%xmm2 + movdqa %xmm5,128-128(%rax) + paddd %xmm15,%xmm5 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd -128(%rbp),%xmm5 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm12,%xmm0 + + pxor %xmm2,%xmm7 + movdqa %xmm12,%xmm3 + pslld $26-21,%xmm2 + pandn %xmm14,%xmm0 + pand %xmm13,%xmm3 + pxor %xmm1,%xmm7 + + + movdqa %xmm8,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm8,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm3,%xmm0 + movdqa %xmm9,%xmm3 + movdqa %xmm8,%xmm7 + pslld $10,%xmm2 + pxor %xmm8,%xmm3 + + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm5 + pslld $19-10,%xmm2 + pand %xmm3,%xmm4 + pxor %xmm7,%xmm1 + + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm9,%xmm15 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm4,%xmm15 + paddd %xmm5,%xmm11 + pxor %xmm2,%xmm7 + + paddd %xmm5,%xmm15 + paddd %xmm7,%xmm15 + movdqa 160-128(%rax),%xmm5 + paddd 32-128(%rax),%xmm6 + + movdqa %xmm5,%xmm7 + movdqa %xmm5,%xmm1 + psrld $3,%xmm7 + movdqa %xmm5,%xmm2 + + psrld $7,%xmm1 + movdqa 112-128(%rax),%xmm0 + pslld $14,%xmm2 + pxor %xmm1,%xmm7 + psrld $18-7,%xmm1 + movdqa %xmm0,%xmm4 + pxor %xmm2,%xmm7 + pslld $25-14,%xmm2 + pxor %xmm1,%xmm7 + psrld $10,%xmm0 + movdqa %xmm4,%xmm1 + + psrld $17,%xmm4 + pxor %xmm2,%xmm7 + pslld $13,%xmm1 + paddd %xmm7,%xmm6 + pxor %xmm4,%xmm0 + psrld $19-17,%xmm4 + pxor %xmm1,%xmm0 + pslld $15-13,%xmm1 + pxor %xmm4,%xmm0 + pxor %xmm1,%xmm0 + paddd %xmm0,%xmm6 + movdqa %xmm11,%xmm7 + + movdqa %xmm11,%xmm2 + + psrld $6,%xmm7 + movdqa %xmm11,%xmm1 + pslld $7,%xmm2 + movdqa %xmm6,144-128(%rax) + paddd %xmm14,%xmm6 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd -96(%rbp),%xmm6 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm11,%xmm0 + + pxor %xmm2,%xmm7 + movdqa %xmm11,%xmm4 + pslld $26-21,%xmm2 + pandn %xmm13,%xmm0 + pand %xmm12,%xmm4 + pxor %xmm1,%xmm7 + + + movdqa %xmm15,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm15,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm6 + pxor %xmm4,%xmm0 + movdqa %xmm8,%xmm4 + movdqa %xmm15,%xmm7 + pslld $10,%xmm2 + pxor %xmm15,%xmm4 + + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm6 + pslld $19-10,%xmm2 + pand %xmm4,%xmm3 + pxor %xmm7,%xmm1 + + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm8,%xmm14 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm3,%xmm14 + paddd %xmm6,%xmm10 + pxor %xmm2,%xmm7 + + paddd %xmm6,%xmm14 + paddd %xmm7,%xmm14 + movdqa 176-128(%rax),%xmm6 + paddd 48-128(%rax),%xmm5 + + movdqa %xmm6,%xmm7 + movdqa %xmm6,%xmm1 + psrld $3,%xmm7 + movdqa %xmm6,%xmm2 + + psrld $7,%xmm1 + movdqa 128-128(%rax),%xmm0 + pslld $14,%xmm2 + pxor %xmm1,%xmm7 + psrld $18-7,%xmm1 + movdqa %xmm0,%xmm3 + pxor %xmm2,%xmm7 + pslld $25-14,%xmm2 + pxor %xmm1,%xmm7 + psrld $10,%xmm0 + movdqa %xmm3,%xmm1 + + psrld $17,%xmm3 + pxor %xmm2,%xmm7 + pslld $13,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm3,%xmm0 + psrld $19-17,%xmm3 + pxor %xmm1,%xmm0 + pslld $15-13,%xmm1 + pxor %xmm3,%xmm0 + pxor %xmm1,%xmm0 + paddd %xmm0,%xmm5 + movdqa %xmm10,%xmm7 + + movdqa %xmm10,%xmm2 + + psrld $6,%xmm7 + movdqa %xmm10,%xmm1 + pslld $7,%xmm2 + movdqa %xmm5,160-128(%rax) + paddd %xmm13,%xmm5 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd -64(%rbp),%xmm5 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm10,%xmm0 + + pxor %xmm2,%xmm7 + movdqa %xmm10,%xmm3 + pslld $26-21,%xmm2 + pandn %xmm12,%xmm0 + pand %xmm11,%xmm3 + pxor %xmm1,%xmm7 + + + movdqa %xmm14,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm14,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm3,%xmm0 + movdqa %xmm15,%xmm3 + movdqa %xmm14,%xmm7 + pslld $10,%xmm2 + pxor %xmm14,%xmm3 + + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm5 + pslld $19-10,%xmm2 + pand %xmm3,%xmm4 + pxor %xmm7,%xmm1 + + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm15,%xmm13 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm4,%xmm13 + paddd %xmm5,%xmm9 + pxor %xmm2,%xmm7 + + paddd %xmm5,%xmm13 + paddd %xmm7,%xmm13 + movdqa 192-128(%rax),%xmm5 + paddd 64-128(%rax),%xmm6 + + movdqa %xmm5,%xmm7 + movdqa %xmm5,%xmm1 + psrld $3,%xmm7 + movdqa %xmm5,%xmm2 + + psrld $7,%xmm1 + movdqa 144-128(%rax),%xmm0 + pslld $14,%xmm2 + pxor %xmm1,%xmm7 + psrld $18-7,%xmm1 + movdqa %xmm0,%xmm4 + pxor %xmm2,%xmm7 + pslld $25-14,%xmm2 + pxor %xmm1,%xmm7 + psrld $10,%xmm0 + movdqa %xmm4,%xmm1 + + psrld $17,%xmm4 + pxor %xmm2,%xmm7 + pslld $13,%xmm1 + paddd %xmm7,%xmm6 + pxor %xmm4,%xmm0 + psrld $19-17,%xmm4 + pxor %xmm1,%xmm0 + pslld $15-13,%xmm1 + pxor %xmm4,%xmm0 + pxor %xmm1,%xmm0 + paddd %xmm0,%xmm6 + movdqa %xmm9,%xmm7 + + movdqa %xmm9,%xmm2 + + psrld $6,%xmm7 + movdqa %xmm9,%xmm1 + pslld $7,%xmm2 + movdqa %xmm6,176-128(%rax) + paddd %xmm12,%xmm6 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd -32(%rbp),%xmm6 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm9,%xmm0 + + pxor %xmm2,%xmm7 + movdqa %xmm9,%xmm4 + pslld $26-21,%xmm2 + pandn %xmm11,%xmm0 + pand %xmm10,%xmm4 + pxor %xmm1,%xmm7 + + + movdqa %xmm13,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm13,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm6 + pxor %xmm4,%xmm0 + movdqa %xmm14,%xmm4 + movdqa %xmm13,%xmm7 + pslld $10,%xmm2 + pxor %xmm13,%xmm4 + + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm6 + pslld $19-10,%xmm2 + pand %xmm4,%xmm3 + pxor %xmm7,%xmm1 + + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm14,%xmm12 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm3,%xmm12 + paddd %xmm6,%xmm8 + pxor %xmm2,%xmm7 + + paddd %xmm6,%xmm12 + paddd %xmm7,%xmm12 + movdqa 208-128(%rax),%xmm6 + paddd 80-128(%rax),%xmm5 + + movdqa %xmm6,%xmm7 + movdqa %xmm6,%xmm1 + psrld $3,%xmm7 + movdqa %xmm6,%xmm2 + + psrld $7,%xmm1 + movdqa 160-128(%rax),%xmm0 + pslld $14,%xmm2 + pxor %xmm1,%xmm7 + psrld $18-7,%xmm1 + movdqa %xmm0,%xmm3 + pxor %xmm2,%xmm7 + pslld $25-14,%xmm2 + pxor %xmm1,%xmm7 + psrld $10,%xmm0 + movdqa %xmm3,%xmm1 + + psrld $17,%xmm3 + pxor %xmm2,%xmm7 + pslld $13,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm3,%xmm0 + psrld $19-17,%xmm3 + pxor %xmm1,%xmm0 + pslld $15-13,%xmm1 + pxor %xmm3,%xmm0 + pxor %xmm1,%xmm0 + paddd %xmm0,%xmm5 + movdqa %xmm8,%xmm7 + + movdqa %xmm8,%xmm2 + + psrld $6,%xmm7 + movdqa %xmm8,%xmm1 + pslld $7,%xmm2 + movdqa %xmm5,192-128(%rax) + paddd %xmm11,%xmm5 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd 0(%rbp),%xmm5 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm8,%xmm0 + + pxor %xmm2,%xmm7 + movdqa %xmm8,%xmm3 + pslld $26-21,%xmm2 + pandn %xmm10,%xmm0 + pand %xmm9,%xmm3 + pxor %xmm1,%xmm7 + + + movdqa %xmm12,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm12,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm3,%xmm0 + movdqa %xmm13,%xmm3 + movdqa %xmm12,%xmm7 + pslld $10,%xmm2 + pxor %xmm12,%xmm3 + + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm5 + pslld $19-10,%xmm2 + pand %xmm3,%xmm4 + pxor %xmm7,%xmm1 + + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm13,%xmm11 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm4,%xmm11 + paddd %xmm5,%xmm15 + pxor %xmm2,%xmm7 + + paddd %xmm5,%xmm11 + paddd %xmm7,%xmm11 + movdqa 224-128(%rax),%xmm5 + paddd 96-128(%rax),%xmm6 + + movdqa %xmm5,%xmm7 + movdqa %xmm5,%xmm1 + psrld $3,%xmm7 + movdqa %xmm5,%xmm2 + + psrld $7,%xmm1 + movdqa 176-128(%rax),%xmm0 + pslld $14,%xmm2 + pxor %xmm1,%xmm7 + psrld $18-7,%xmm1 + movdqa %xmm0,%xmm4 + pxor %xmm2,%xmm7 + pslld $25-14,%xmm2 + pxor %xmm1,%xmm7 + psrld $10,%xmm0 + movdqa %xmm4,%xmm1 + + psrld $17,%xmm4 + pxor %xmm2,%xmm7 + pslld $13,%xmm1 + paddd %xmm7,%xmm6 + pxor %xmm4,%xmm0 + psrld $19-17,%xmm4 + pxor %xmm1,%xmm0 + pslld $15-13,%xmm1 + pxor %xmm4,%xmm0 + pxor %xmm1,%xmm0 + paddd %xmm0,%xmm6 + movdqa %xmm15,%xmm7 + + movdqa %xmm15,%xmm2 + + psrld $6,%xmm7 + movdqa %xmm15,%xmm1 + pslld $7,%xmm2 + movdqa %xmm6,208-128(%rax) + paddd %xmm10,%xmm6 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd 32(%rbp),%xmm6 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm15,%xmm0 + + pxor %xmm2,%xmm7 + movdqa %xmm15,%xmm4 + pslld $26-21,%xmm2 + pandn %xmm9,%xmm0 + pand %xmm8,%xmm4 + pxor %xmm1,%xmm7 + + + movdqa %xmm11,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm11,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm6 + pxor %xmm4,%xmm0 + movdqa %xmm12,%xmm4 + movdqa %xmm11,%xmm7 + pslld $10,%xmm2 + pxor %xmm11,%xmm4 + + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm6 + pslld $19-10,%xmm2 + pand %xmm4,%xmm3 + pxor %xmm7,%xmm1 + + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm12,%xmm10 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm3,%xmm10 + paddd %xmm6,%xmm14 + pxor %xmm2,%xmm7 + + paddd %xmm6,%xmm10 + paddd %xmm7,%xmm10 + movdqa 240-128(%rax),%xmm6 + paddd 112-128(%rax),%xmm5 + + movdqa %xmm6,%xmm7 + movdqa %xmm6,%xmm1 + psrld $3,%xmm7 + movdqa %xmm6,%xmm2 + + psrld $7,%xmm1 + movdqa 192-128(%rax),%xmm0 + pslld $14,%xmm2 + pxor %xmm1,%xmm7 + psrld $18-7,%xmm1 + movdqa %xmm0,%xmm3 + pxor %xmm2,%xmm7 + pslld $25-14,%xmm2 + pxor %xmm1,%xmm7 + psrld $10,%xmm0 + movdqa %xmm3,%xmm1 + + psrld $17,%xmm3 + pxor %xmm2,%xmm7 + pslld $13,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm3,%xmm0 + psrld $19-17,%xmm3 + pxor %xmm1,%xmm0 + pslld $15-13,%xmm1 + pxor %xmm3,%xmm0 + pxor %xmm1,%xmm0 + paddd %xmm0,%xmm5 + movdqa %xmm14,%xmm7 + + movdqa %xmm14,%xmm2 + + psrld $6,%xmm7 + movdqa %xmm14,%xmm1 + pslld $7,%xmm2 + movdqa %xmm5,224-128(%rax) + paddd %xmm9,%xmm5 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd 64(%rbp),%xmm5 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm14,%xmm0 + + pxor %xmm2,%xmm7 + movdqa %xmm14,%xmm3 + pslld $26-21,%xmm2 + pandn %xmm8,%xmm0 + pand %xmm15,%xmm3 + pxor %xmm1,%xmm7 + + + movdqa %xmm10,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm10,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm3,%xmm0 + movdqa %xmm11,%xmm3 + movdqa %xmm10,%xmm7 + pslld $10,%xmm2 + pxor %xmm10,%xmm3 + + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm5 + pslld $19-10,%xmm2 + pand %xmm3,%xmm4 + pxor %xmm7,%xmm1 + + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm11,%xmm9 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm4,%xmm9 + paddd %xmm5,%xmm13 + pxor %xmm2,%xmm7 + + paddd %xmm5,%xmm9 + paddd %xmm7,%xmm9 + movdqa 0-128(%rax),%xmm5 + paddd 128-128(%rax),%xmm6 + + movdqa %xmm5,%xmm7 + movdqa %xmm5,%xmm1 + psrld $3,%xmm7 + movdqa %xmm5,%xmm2 + + psrld $7,%xmm1 + movdqa 208-128(%rax),%xmm0 + pslld $14,%xmm2 + pxor %xmm1,%xmm7 + psrld $18-7,%xmm1 + movdqa %xmm0,%xmm4 + pxor %xmm2,%xmm7 + pslld $25-14,%xmm2 + pxor %xmm1,%xmm7 + psrld $10,%xmm0 + movdqa %xmm4,%xmm1 + + psrld $17,%xmm4 + pxor %xmm2,%xmm7 + pslld $13,%xmm1 + paddd %xmm7,%xmm6 + pxor %xmm4,%xmm0 + psrld $19-17,%xmm4 + pxor %xmm1,%xmm0 + pslld $15-13,%xmm1 + pxor %xmm4,%xmm0 + pxor %xmm1,%xmm0 + paddd %xmm0,%xmm6 + movdqa %xmm13,%xmm7 + + movdqa %xmm13,%xmm2 + + psrld $6,%xmm7 + movdqa %xmm13,%xmm1 + pslld $7,%xmm2 + movdqa %xmm6,240-128(%rax) + paddd %xmm8,%xmm6 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd 96(%rbp),%xmm6 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm13,%xmm0 + + pxor %xmm2,%xmm7 + movdqa %xmm13,%xmm4 + pslld $26-21,%xmm2 + pandn %xmm15,%xmm0 + pand %xmm14,%xmm4 + pxor %xmm1,%xmm7 + + + movdqa %xmm9,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm9,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm6 + pxor %xmm4,%xmm0 + movdqa %xmm10,%xmm4 + movdqa %xmm9,%xmm7 + pslld $10,%xmm2 + pxor %xmm9,%xmm4 + + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm6 + pslld $19-10,%xmm2 + pand %xmm4,%xmm3 + pxor %xmm7,%xmm1 + + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm10,%xmm8 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm3,%xmm8 + paddd %xmm6,%xmm12 + pxor %xmm2,%xmm7 + + paddd %xmm6,%xmm8 + paddd %xmm7,%xmm8 + leaq 256(%rbp),%rbp + decl %ecx + jnz .Loop_16_xx + + movl $1,%ecx + leaq K256+128(%rip),%rbp + + movdqa (%rbx),%xmm7 + cmpl 0(%rbx),%ecx + pxor %xmm0,%xmm0 + cmovgeq %rbp,%r8 + cmpl 4(%rbx),%ecx + movdqa %xmm7,%xmm6 + cmovgeq %rbp,%r9 + cmpl 8(%rbx),%ecx + pcmpgtd %xmm0,%xmm6 + cmovgeq %rbp,%r10 + cmpl 12(%rbx),%ecx + paddd %xmm6,%xmm7 + cmovgeq %rbp,%r11 + + movdqu 0-128(%rdi),%xmm0 + pand %xmm6,%xmm8 + movdqu 32-128(%rdi),%xmm1 + pand %xmm6,%xmm9 + movdqu 64-128(%rdi),%xmm2 + pand %xmm6,%xmm10 + movdqu 96-128(%rdi),%xmm5 + pand %xmm6,%xmm11 + paddd %xmm0,%xmm8 + movdqu 128-128(%rdi),%xmm0 + pand %xmm6,%xmm12 + paddd %xmm1,%xmm9 + movdqu 160-128(%rdi),%xmm1 + pand %xmm6,%xmm13 + paddd %xmm2,%xmm10 + movdqu 192-128(%rdi),%xmm2 + pand %xmm6,%xmm14 + paddd %xmm5,%xmm11 + movdqu 224-128(%rdi),%xmm5 + pand %xmm6,%xmm15 + paddd %xmm0,%xmm12 + paddd %xmm1,%xmm13 + movdqu %xmm8,0-128(%rdi) + paddd %xmm2,%xmm14 + movdqu %xmm9,32-128(%rdi) + paddd %xmm5,%xmm15 + movdqu %xmm10,64-128(%rdi) + movdqu %xmm11,96-128(%rdi) + movdqu %xmm12,128-128(%rdi) + movdqu %xmm13,160-128(%rdi) + movdqu %xmm14,192-128(%rdi) + movdqu %xmm15,224-128(%rdi) + + movdqa %xmm7,(%rbx) + movdqa .Lpbswap(%rip),%xmm6 + decl %edx + jnz .Loop + + movl 280(%rsp),%edx + leaq 16(%rdi),%rdi + leaq 64(%rsi),%rsi + decl %edx + jnz .Loop_grande + +.Ldone: + movq 272(%rsp),%rax + movq -16(%rax),%rbp + movq -8(%rax),%rbx + leaq (%rax),%rsp +.Lepilogue: + .byte 0xf3,0xc3 +.size sha256_multi_block,.-sha256_multi_block +.type sha256_multi_block_shaext,@function +.align 32 +sha256_multi_block_shaext: +_shaext_shortcut: + movq %rsp,%rax + pushq %rbx + pushq %rbp + subq $288,%rsp + shll $1,%edx + andq $-256,%rsp + leaq 128(%rdi),%rdi + movq %rax,272(%rsp) +.Lbody_shaext: + leaq 256(%rsp),%rbx + leaq K256_shaext+128(%rip),%rbp + +.Loop_grande_shaext: + movl %edx,280(%rsp) + xorl %edx,%edx + movq 0(%rsi),%r8 + movl 8(%rsi),%ecx + cmpl %edx,%ecx + cmovgl %ecx,%edx + testl %ecx,%ecx + movl %ecx,0(%rbx) + cmovleq %rsp,%r8 + movq 16(%rsi),%r9 + movl 24(%rsi),%ecx + cmpl %edx,%ecx + cmovgl %ecx,%edx + testl %ecx,%ecx + movl %ecx,4(%rbx) + cmovleq %rsp,%r9 + testl %edx,%edx + jz .Ldone_shaext + + movq 0-128(%rdi),%xmm12 + movq 32-128(%rdi),%xmm4 + movq 64-128(%rdi),%xmm13 + movq 96-128(%rdi),%xmm5 + movq 128-128(%rdi),%xmm8 + movq 160-128(%rdi),%xmm9 + movq 192-128(%rdi),%xmm10 + movq 224-128(%rdi),%xmm11 + + punpckldq %xmm4,%xmm12 + punpckldq %xmm5,%xmm13 + punpckldq %xmm9,%xmm8 + punpckldq %xmm11,%xmm10 + movdqa K256_shaext-16(%rip),%xmm3 + + movdqa %xmm12,%xmm14 + movdqa %xmm13,%xmm15 + punpcklqdq %xmm8,%xmm12 + punpcklqdq %xmm10,%xmm13 + punpckhqdq %xmm8,%xmm14 + punpckhqdq %xmm10,%xmm15 + + pshufd $27,%xmm12,%xmm12 + pshufd $27,%xmm13,%xmm13 + pshufd $27,%xmm14,%xmm14 + pshufd $27,%xmm15,%xmm15 + jmp .Loop_shaext + +.align 32 +.Loop_shaext: + movdqu 0(%r8),%xmm4 + movdqu 0(%r9),%xmm8 + movdqu 16(%r8),%xmm5 + movdqu 16(%r9),%xmm9 + movdqu 32(%r8),%xmm6 +.byte 102,15,56,0,227 + movdqu 32(%r9),%xmm10 +.byte 102,68,15,56,0,195 + movdqu 48(%r8),%xmm7 + leaq 64(%r8),%r8 + movdqu 48(%r9),%xmm11 + leaq 64(%r9),%r9 + + movdqa 0-128(%rbp),%xmm0 +.byte 102,15,56,0,235 + paddd %xmm4,%xmm0 + pxor %xmm12,%xmm4 + movdqa %xmm0,%xmm1 + movdqa 0-128(%rbp),%xmm2 +.byte 102,68,15,56,0,203 + paddd %xmm8,%xmm2 + movdqa %xmm13,80(%rsp) +.byte 69,15,56,203,236 + pxor %xmm14,%xmm8 + movdqa %xmm2,%xmm0 + movdqa %xmm15,112(%rsp) +.byte 69,15,56,203,254 + pshufd $14,%xmm1,%xmm0 + pxor %xmm12,%xmm4 + movdqa %xmm12,64(%rsp) +.byte 69,15,56,203,229 + pshufd $14,%xmm2,%xmm0 + pxor %xmm14,%xmm8 + movdqa %xmm14,96(%rsp) + movdqa 16-128(%rbp),%xmm1 + paddd %xmm5,%xmm1 +.byte 102,15,56,0,243 +.byte 69,15,56,203,247 + + movdqa %xmm1,%xmm0 + movdqa 16-128(%rbp),%xmm2 + paddd %xmm9,%xmm2 +.byte 69,15,56,203,236 + movdqa %xmm2,%xmm0 + prefetcht0 127(%r8) +.byte 102,15,56,0,251 +.byte 102,68,15,56,0,211 + prefetcht0 127(%r9) +.byte 69,15,56,203,254 + pshufd $14,%xmm1,%xmm0 +.byte 102,68,15,56,0,219 +.byte 15,56,204,229 +.byte 69,15,56,203,229 + pshufd $14,%xmm2,%xmm0 + movdqa 32-128(%rbp),%xmm1 + paddd %xmm6,%xmm1 +.byte 69,15,56,203,247 + + movdqa %xmm1,%xmm0 + movdqa 32-128(%rbp),%xmm2 + paddd %xmm10,%xmm2 +.byte 69,15,56,203,236 +.byte 69,15,56,204,193 + movdqa %xmm2,%xmm0 + movdqa %xmm7,%xmm3 +.byte 69,15,56,203,254 + pshufd $14,%xmm1,%xmm0 +.byte 102,15,58,15,222,4 + paddd %xmm3,%xmm4 + movdqa %xmm11,%xmm3 +.byte 102,65,15,58,15,218,4 +.byte 15,56,204,238 +.byte 69,15,56,203,229 + pshufd $14,%xmm2,%xmm0 + movdqa 48-128(%rbp),%xmm1 + paddd %xmm7,%xmm1 +.byte 69,15,56,203,247 +.byte 69,15,56,204,202 + + movdqa %xmm1,%xmm0 + movdqa 48-128(%rbp),%xmm2 + paddd %xmm3,%xmm8 + paddd %xmm11,%xmm2 +.byte 15,56,205,231 +.byte 69,15,56,203,236 + movdqa %xmm2,%xmm0 + movdqa %xmm4,%xmm3 +.byte 102,15,58,15,223,4 +.byte 69,15,56,203,254 +.byte 69,15,56,205,195 + pshufd $14,%xmm1,%xmm0 + paddd %xmm3,%xmm5 + movdqa %xmm8,%xmm3 +.byte 102,65,15,58,15,219,4 +.byte 15,56,204,247 +.byte 69,15,56,203,229 + pshufd $14,%xmm2,%xmm0 + movdqa 64-128(%rbp),%xmm1 + paddd %xmm4,%xmm1 +.byte 69,15,56,203,247 +.byte 69,15,56,204,211 + movdqa %xmm1,%xmm0 + movdqa 64-128(%rbp),%xmm2 + paddd %xmm3,%xmm9 + paddd %xmm8,%xmm2 +.byte 15,56,205,236 +.byte 69,15,56,203,236 + movdqa %xmm2,%xmm0 + movdqa %xmm5,%xmm3 +.byte 102,15,58,15,220,4 +.byte 69,15,56,203,254 +.byte 69,15,56,205,200 + pshufd $14,%xmm1,%xmm0 + paddd %xmm3,%xmm6 + movdqa %xmm9,%xmm3 +.byte 102,65,15,58,15,216,4 +.byte 15,56,204,252 +.byte 69,15,56,203,229 + pshufd $14,%xmm2,%xmm0 + movdqa 80-128(%rbp),%xmm1 + paddd %xmm5,%xmm1 +.byte 69,15,56,203,247 +.byte 69,15,56,204,216 + movdqa %xmm1,%xmm0 + movdqa 80-128(%rbp),%xmm2 + paddd %xmm3,%xmm10 + paddd %xmm9,%xmm2 +.byte 15,56,205,245 +.byte 69,15,56,203,236 + movdqa %xmm2,%xmm0 + movdqa %xmm6,%xmm3 +.byte 102,15,58,15,221,4 +.byte 69,15,56,203,254 +.byte 69,15,56,205,209 + pshufd $14,%xmm1,%xmm0 + paddd %xmm3,%xmm7 + movdqa %xmm10,%xmm3 +.byte 102,65,15,58,15,217,4 +.byte 15,56,204,229 +.byte 69,15,56,203,229 + pshufd $14,%xmm2,%xmm0 + movdqa 96-128(%rbp),%xmm1 + paddd %xmm6,%xmm1 +.byte 69,15,56,203,247 +.byte 69,15,56,204,193 + movdqa %xmm1,%xmm0 + movdqa 96-128(%rbp),%xmm2 + paddd %xmm3,%xmm11 + paddd %xmm10,%xmm2 +.byte 15,56,205,254 +.byte 69,15,56,203,236 + movdqa %xmm2,%xmm0 + movdqa %xmm7,%xmm3 +.byte 102,15,58,15,222,4 +.byte 69,15,56,203,254 +.byte 69,15,56,205,218 + pshufd $14,%xmm1,%xmm0 + paddd %xmm3,%xmm4 + movdqa %xmm11,%xmm3 +.byte 102,65,15,58,15,218,4 +.byte 15,56,204,238 +.byte 69,15,56,203,229 + pshufd $14,%xmm2,%xmm0 + movdqa 112-128(%rbp),%xmm1 + paddd %xmm7,%xmm1 +.byte 69,15,56,203,247 +.byte 69,15,56,204,202 + movdqa %xmm1,%xmm0 + movdqa 112-128(%rbp),%xmm2 + paddd %xmm3,%xmm8 + paddd %xmm11,%xmm2 +.byte 15,56,205,231 +.byte 69,15,56,203,236 + movdqa %xmm2,%xmm0 + movdqa %xmm4,%xmm3 +.byte 102,15,58,15,223,4 +.byte 69,15,56,203,254 +.byte 69,15,56,205,195 + pshufd $14,%xmm1,%xmm0 + paddd %xmm3,%xmm5 + movdqa %xmm8,%xmm3 +.byte 102,65,15,58,15,219,4 +.byte 15,56,204,247 +.byte 69,15,56,203,229 + pshufd $14,%xmm2,%xmm0 + movdqa 128-128(%rbp),%xmm1 + paddd %xmm4,%xmm1 +.byte 69,15,56,203,247 +.byte 69,15,56,204,211 + movdqa %xmm1,%xmm0 + movdqa 128-128(%rbp),%xmm2 + paddd %xmm3,%xmm9 + paddd %xmm8,%xmm2 +.byte 15,56,205,236 +.byte 69,15,56,203,236 + movdqa %xmm2,%xmm0 + movdqa %xmm5,%xmm3 +.byte 102,15,58,15,220,4 +.byte 69,15,56,203,254 +.byte 69,15,56,205,200 + pshufd $14,%xmm1,%xmm0 + paddd %xmm3,%xmm6 + movdqa %xmm9,%xmm3 +.byte 102,65,15,58,15,216,4 +.byte 15,56,204,252 +.byte 69,15,56,203,229 + pshufd $14,%xmm2,%xmm0 + movdqa 144-128(%rbp),%xmm1 + paddd %xmm5,%xmm1 +.byte 69,15,56,203,247 +.byte 69,15,56,204,216 + movdqa %xmm1,%xmm0 + movdqa 144-128(%rbp),%xmm2 + paddd %xmm3,%xmm10 + paddd %xmm9,%xmm2 +.byte 15,56,205,245 +.byte 69,15,56,203,236 + movdqa %xmm2,%xmm0 + movdqa %xmm6,%xmm3 +.byte 102,15,58,15,221,4 +.byte 69,15,56,203,254 +.byte 69,15,56,205,209 + pshufd $14,%xmm1,%xmm0 + paddd %xmm3,%xmm7 + movdqa %xmm10,%xmm3 +.byte 102,65,15,58,15,217,4 +.byte 15,56,204,229 +.byte 69,15,56,203,229 + pshufd $14,%xmm2,%xmm0 + movdqa 160-128(%rbp),%xmm1 + paddd %xmm6,%xmm1 +.byte 69,15,56,203,247 +.byte 69,15,56,204,193 + movdqa %xmm1,%xmm0 + movdqa 160-128(%rbp),%xmm2 + paddd %xmm3,%xmm11 + paddd %xmm10,%xmm2 +.byte 15,56,205,254 +.byte 69,15,56,203,236 + movdqa %xmm2,%xmm0 + movdqa %xmm7,%xmm3 +.byte 102,15,58,15,222,4 +.byte 69,15,56,203,254 +.byte 69,15,56,205,218 + pshufd $14,%xmm1,%xmm0 + paddd %xmm3,%xmm4 + movdqa %xmm11,%xmm3 +.byte 102,65,15,58,15,218,4 +.byte 15,56,204,238 +.byte 69,15,56,203,229 + pshufd $14,%xmm2,%xmm0 + movdqa 176-128(%rbp),%xmm1 + paddd %xmm7,%xmm1 +.byte 69,15,56,203,247 +.byte 69,15,56,204,202 + movdqa %xmm1,%xmm0 + movdqa 176-128(%rbp),%xmm2 + paddd %xmm3,%xmm8 + paddd %xmm11,%xmm2 +.byte 15,56,205,231 +.byte 69,15,56,203,236 + movdqa %xmm2,%xmm0 + movdqa %xmm4,%xmm3 +.byte 102,15,58,15,223,4 +.byte 69,15,56,203,254 +.byte 69,15,56,205,195 + pshufd $14,%xmm1,%xmm0 + paddd %xmm3,%xmm5 + movdqa %xmm8,%xmm3 +.byte 102,65,15,58,15,219,4 +.byte 15,56,204,247 +.byte 69,15,56,203,229 + pshufd $14,%xmm2,%xmm0 + movdqa 192-128(%rbp),%xmm1 + paddd %xmm4,%xmm1 +.byte 69,15,56,203,247 +.byte 69,15,56,204,211 + movdqa %xmm1,%xmm0 + movdqa 192-128(%rbp),%xmm2 + paddd %xmm3,%xmm9 + paddd %xmm8,%xmm2 +.byte 15,56,205,236 +.byte 69,15,56,203,236 + movdqa %xmm2,%xmm0 + movdqa %xmm5,%xmm3 +.byte 102,15,58,15,220,4 +.byte 69,15,56,203,254 +.byte 69,15,56,205,200 + pshufd $14,%xmm1,%xmm0 + paddd %xmm3,%xmm6 + movdqa %xmm9,%xmm3 +.byte 102,65,15,58,15,216,4 +.byte 15,56,204,252 +.byte 69,15,56,203,229 + pshufd $14,%xmm2,%xmm0 + movdqa 208-128(%rbp),%xmm1 + paddd %xmm5,%xmm1 +.byte 69,15,56,203,247 +.byte 69,15,56,204,216 + movdqa %xmm1,%xmm0 + movdqa 208-128(%rbp),%xmm2 + paddd %xmm3,%xmm10 + paddd %xmm9,%xmm2 +.byte 15,56,205,245 +.byte 69,15,56,203,236 + movdqa %xmm2,%xmm0 + movdqa %xmm6,%xmm3 +.byte 102,15,58,15,221,4 +.byte 69,15,56,203,254 +.byte 69,15,56,205,209 + pshufd $14,%xmm1,%xmm0 + paddd %xmm3,%xmm7 + movdqa %xmm10,%xmm3 +.byte 102,65,15,58,15,217,4 + nop +.byte 69,15,56,203,229 + pshufd $14,%xmm2,%xmm0 + movdqa 224-128(%rbp),%xmm1 + paddd %xmm6,%xmm1 +.byte 69,15,56,203,247 + + movdqa %xmm1,%xmm0 + movdqa 224-128(%rbp),%xmm2 + paddd %xmm3,%xmm11 + paddd %xmm10,%xmm2 +.byte 15,56,205,254 + nop +.byte 69,15,56,203,236 + movdqa %xmm2,%xmm0 + movl $1,%ecx + pxor %xmm6,%xmm6 +.byte 69,15,56,203,254 +.byte 69,15,56,205,218 + pshufd $14,%xmm1,%xmm0 + movdqa 240-128(%rbp),%xmm1 + paddd %xmm7,%xmm1 + movq (%rbx),%xmm7 + nop +.byte 69,15,56,203,229 + pshufd $14,%xmm2,%xmm0 + movdqa 240-128(%rbp),%xmm2 + paddd %xmm11,%xmm2 +.byte 69,15,56,203,247 + + movdqa %xmm1,%xmm0 + cmpl 0(%rbx),%ecx + cmovgeq %rsp,%r8 + cmpl 4(%rbx),%ecx + cmovgeq %rsp,%r9 + pshufd $0,%xmm7,%xmm9 +.byte 69,15,56,203,236 + movdqa %xmm2,%xmm0 + pshufd $85,%xmm7,%xmm10 + movdqa %xmm7,%xmm11 +.byte 69,15,56,203,254 + pshufd $14,%xmm1,%xmm0 + pcmpgtd %xmm6,%xmm9 + pcmpgtd %xmm6,%xmm10 +.byte 69,15,56,203,229 + pshufd $14,%xmm2,%xmm0 + pcmpgtd %xmm6,%xmm11 + movdqa K256_shaext-16(%rip),%xmm3 +.byte 69,15,56,203,247 + + pand %xmm9,%xmm13 + pand %xmm10,%xmm15 + pand %xmm9,%xmm12 + pand %xmm10,%xmm14 + paddd %xmm7,%xmm11 + + paddd 80(%rsp),%xmm13 + paddd 112(%rsp),%xmm15 + paddd 64(%rsp),%xmm12 + paddd 96(%rsp),%xmm14 + + movq %xmm11,(%rbx) + decl %edx + jnz .Loop_shaext + + movl 280(%rsp),%edx + + pshufd $27,%xmm12,%xmm12 + pshufd $27,%xmm13,%xmm13 + pshufd $27,%xmm14,%xmm14 + pshufd $27,%xmm15,%xmm15 + + movdqa %xmm12,%xmm5 + movdqa %xmm13,%xmm6 + punpckldq %xmm14,%xmm12 + punpckhdq %xmm14,%xmm5 + punpckldq %xmm15,%xmm13 + punpckhdq %xmm15,%xmm6 + + movq %xmm12,0-128(%rdi) + psrldq $8,%xmm12 + movq %xmm5,128-128(%rdi) + psrldq $8,%xmm5 + movq %xmm12,32-128(%rdi) + movq %xmm5,160-128(%rdi) + + movq %xmm13,64-128(%rdi) + psrldq $8,%xmm13 + movq %xmm6,192-128(%rdi) + psrldq $8,%xmm6 + movq %xmm13,96-128(%rdi) + movq %xmm6,224-128(%rdi) + + leaq 8(%rdi),%rdi + leaq 32(%rsi),%rsi + decl %edx + jnz .Loop_grande_shaext + +.Ldone_shaext: + + movq -16(%rax),%rbp + movq -8(%rax),%rbx + leaq (%rax),%rsp +.Lepilogue_shaext: + .byte 0xf3,0xc3 +.size sha256_multi_block_shaext,.-sha256_multi_block_shaext +.align 256 +K256: +.long 1116352408,1116352408,1116352408,1116352408 +.long 1116352408,1116352408,1116352408,1116352408 +.long 1899447441,1899447441,1899447441,1899447441 +.long 1899447441,1899447441,1899447441,1899447441 +.long 3049323471,3049323471,3049323471,3049323471 +.long 3049323471,3049323471,3049323471,3049323471 +.long 3921009573,3921009573,3921009573,3921009573 +.long 3921009573,3921009573,3921009573,3921009573 +.long 961987163,961987163,961987163,961987163 +.long 961987163,961987163,961987163,961987163 +.long 1508970993,1508970993,1508970993,1508970993 +.long 1508970993,1508970993,1508970993,1508970993 +.long 2453635748,2453635748,2453635748,2453635748 +.long 2453635748,2453635748,2453635748,2453635748 +.long 2870763221,2870763221,2870763221,2870763221 +.long 2870763221,2870763221,2870763221,2870763221 +.long 3624381080,3624381080,3624381080,3624381080 +.long 3624381080,3624381080,3624381080,3624381080 +.long 310598401,310598401,310598401,310598401 +.long 310598401,310598401,310598401,310598401 +.long 607225278,607225278,607225278,607225278 +.long 607225278,607225278,607225278,607225278 +.long 1426881987,1426881987,1426881987,1426881987 +.long 1426881987,1426881987,1426881987,1426881987 +.long 1925078388,1925078388,1925078388,1925078388 +.long 1925078388,1925078388,1925078388,1925078388 +.long 2162078206,2162078206,2162078206,2162078206 +.long 2162078206,2162078206,2162078206,2162078206 +.long 2614888103,2614888103,2614888103,2614888103 +.long 2614888103,2614888103,2614888103,2614888103 +.long 3248222580,3248222580,3248222580,3248222580 +.long 3248222580,3248222580,3248222580,3248222580 +.long 3835390401,3835390401,3835390401,3835390401 +.long 3835390401,3835390401,3835390401,3835390401 +.long 4022224774,4022224774,4022224774,4022224774 +.long 4022224774,4022224774,4022224774,4022224774 +.long 264347078,264347078,264347078,264347078 +.long 264347078,264347078,264347078,264347078 +.long 604807628,604807628,604807628,604807628 +.long 604807628,604807628,604807628,604807628 +.long 770255983,770255983,770255983,770255983 +.long 770255983,770255983,770255983,770255983 +.long 1249150122,1249150122,1249150122,1249150122 +.long 1249150122,1249150122,1249150122,1249150122 +.long 1555081692,1555081692,1555081692,1555081692 +.long 1555081692,1555081692,1555081692,1555081692 +.long 1996064986,1996064986,1996064986,1996064986 +.long 1996064986,1996064986,1996064986,1996064986 +.long 2554220882,2554220882,2554220882,2554220882 +.long 2554220882,2554220882,2554220882,2554220882 +.long 2821834349,2821834349,2821834349,2821834349 +.long 2821834349,2821834349,2821834349,2821834349 +.long 2952996808,2952996808,2952996808,2952996808 +.long 2952996808,2952996808,2952996808,2952996808 +.long 3210313671,3210313671,3210313671,3210313671 +.long 3210313671,3210313671,3210313671,3210313671 +.long 3336571891,3336571891,3336571891,3336571891 +.long 3336571891,3336571891,3336571891,3336571891 +.long 3584528711,3584528711,3584528711,3584528711 +.long 3584528711,3584528711,3584528711,3584528711 +.long 113926993,113926993,113926993,113926993 +.long 113926993,113926993,113926993,113926993 +.long 338241895,338241895,338241895,338241895 +.long 338241895,338241895,338241895,338241895 +.long 666307205,666307205,666307205,666307205 +.long 666307205,666307205,666307205,666307205 +.long 773529912,773529912,773529912,773529912 +.long 773529912,773529912,773529912,773529912 +.long 1294757372,1294757372,1294757372,1294757372 +.long 1294757372,1294757372,1294757372,1294757372 +.long 1396182291,1396182291,1396182291,1396182291 +.long 1396182291,1396182291,1396182291,1396182291 +.long 1695183700,1695183700,1695183700,1695183700 +.long 1695183700,1695183700,1695183700,1695183700 +.long 1986661051,1986661051,1986661051,1986661051 +.long 1986661051,1986661051,1986661051,1986661051 +.long 2177026350,2177026350,2177026350,2177026350 +.long 2177026350,2177026350,2177026350,2177026350 +.long 2456956037,2456956037,2456956037,2456956037 +.long 2456956037,2456956037,2456956037,2456956037 +.long 2730485921,2730485921,2730485921,2730485921 +.long 2730485921,2730485921,2730485921,2730485921 +.long 2820302411,2820302411,2820302411,2820302411 +.long 2820302411,2820302411,2820302411,2820302411 +.long 3259730800,3259730800,3259730800,3259730800 +.long 3259730800,3259730800,3259730800,3259730800 +.long 3345764771,3345764771,3345764771,3345764771 +.long 3345764771,3345764771,3345764771,3345764771 +.long 3516065817,3516065817,3516065817,3516065817 +.long 3516065817,3516065817,3516065817,3516065817 +.long 3600352804,3600352804,3600352804,3600352804 +.long 3600352804,3600352804,3600352804,3600352804 +.long 4094571909,4094571909,4094571909,4094571909 +.long 4094571909,4094571909,4094571909,4094571909 +.long 275423344,275423344,275423344,275423344 +.long 275423344,275423344,275423344,275423344 +.long 430227734,430227734,430227734,430227734 +.long 430227734,430227734,430227734,430227734 +.long 506948616,506948616,506948616,506948616 +.long 506948616,506948616,506948616,506948616 +.long 659060556,659060556,659060556,659060556 +.long 659060556,659060556,659060556,659060556 +.long 883997877,883997877,883997877,883997877 +.long 883997877,883997877,883997877,883997877 +.long 958139571,958139571,958139571,958139571 +.long 958139571,958139571,958139571,958139571 +.long 1322822218,1322822218,1322822218,1322822218 +.long 1322822218,1322822218,1322822218,1322822218 +.long 1537002063,1537002063,1537002063,1537002063 +.long 1537002063,1537002063,1537002063,1537002063 +.long 1747873779,1747873779,1747873779,1747873779 +.long 1747873779,1747873779,1747873779,1747873779 +.long 1955562222,1955562222,1955562222,1955562222 +.long 1955562222,1955562222,1955562222,1955562222 +.long 2024104815,2024104815,2024104815,2024104815 +.long 2024104815,2024104815,2024104815,2024104815 +.long 2227730452,2227730452,2227730452,2227730452 +.long 2227730452,2227730452,2227730452,2227730452 +.long 2361852424,2361852424,2361852424,2361852424 +.long 2361852424,2361852424,2361852424,2361852424 +.long 2428436474,2428436474,2428436474,2428436474 +.long 2428436474,2428436474,2428436474,2428436474 +.long 2756734187,2756734187,2756734187,2756734187 +.long 2756734187,2756734187,2756734187,2756734187 +.long 3204031479,3204031479,3204031479,3204031479 +.long 3204031479,3204031479,3204031479,3204031479 +.long 3329325298,3329325298,3329325298,3329325298 +.long 3329325298,3329325298,3329325298,3329325298 +.Lpbswap: +.long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f +.long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f +K256_shaext: +.long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 +.long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 +.long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 +.long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 +.long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc +.long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da +.long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 +.long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967 +.long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 +.long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 +.long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 +.long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070 +.long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 +.long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 +.long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 +.long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 +.byte 83,72,65,50,53,54,32,109,117,108,116,105,45,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 diff --git a/deps/openssl/asm_obsolete/x64-elf-gas/sha/sha256-x86_64.s b/deps/openssl/asm_obsolete/x64-elf-gas/sha/sha256-x86_64.s new file mode 100644 index 00000000000000..d2951d8ea3be3a --- /dev/null +++ b/deps/openssl/asm_obsolete/x64-elf-gas/sha/sha256-x86_64.s @@ -0,0 +1,3049 @@ +.text + + +.globl sha256_block_data_order +.type sha256_block_data_order,@function +.align 16 +sha256_block_data_order: + leaq OPENSSL_ia32cap_P(%rip),%r11 + movl 0(%r11),%r9d + movl 4(%r11),%r10d + movl 8(%r11),%r11d + testl $536870912,%r11d + jnz _shaext_shortcut + testl $512,%r10d + jnz .Lssse3_shortcut + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + movq %rsp,%r11 + shlq $4,%rdx + subq $64+32,%rsp + leaq (%rsi,%rdx,4),%rdx + andq $-64,%rsp + movq %rdi,64+0(%rsp) + movq %rsi,64+8(%rsp) + movq %rdx,64+16(%rsp) + movq %r11,64+24(%rsp) +.Lprologue: + + movl 0(%rdi),%eax + movl 4(%rdi),%ebx + movl 8(%rdi),%ecx + movl 12(%rdi),%edx + movl 16(%rdi),%r8d + movl 20(%rdi),%r9d + movl 24(%rdi),%r10d + movl 28(%rdi),%r11d + jmp .Lloop + +.align 16 +.Lloop: + movl %ebx,%edi + leaq K256(%rip),%rbp + xorl %ecx,%edi + movl 0(%rsi),%r12d + movl %r8d,%r13d + movl %eax,%r14d + bswapl %r12d + rorl $14,%r13d + movl %r9d,%r15d + + xorl %r8d,%r13d + rorl $9,%r14d + xorl %r10d,%r15d + + movl %r12d,0(%rsp) + xorl %eax,%r14d + andl %r8d,%r15d + + rorl $5,%r13d + addl %r11d,%r12d + xorl %r10d,%r15d + + rorl $11,%r14d + xorl %r8d,%r13d + addl %r15d,%r12d + + movl %eax,%r15d + addl (%rbp),%r12d + xorl %eax,%r14d + + xorl %ebx,%r15d + rorl $6,%r13d + movl %ebx,%r11d + + andl %r15d,%edi + rorl $2,%r14d + addl %r13d,%r12d + + xorl %edi,%r11d + addl %r12d,%edx + addl %r12d,%r11d + + leaq 4(%rbp),%rbp + addl %r14d,%r11d + movl 4(%rsi),%r12d + movl %edx,%r13d + movl %r11d,%r14d + bswapl %r12d + rorl $14,%r13d + movl %r8d,%edi + + xorl %edx,%r13d + rorl $9,%r14d + xorl %r9d,%edi + + movl %r12d,4(%rsp) + xorl %r11d,%r14d + andl %edx,%edi + + rorl $5,%r13d + addl %r10d,%r12d + xorl %r9d,%edi + + rorl $11,%r14d + xorl %edx,%r13d + addl %edi,%r12d + + movl %r11d,%edi + addl (%rbp),%r12d + xorl %r11d,%r14d + + xorl %eax,%edi + rorl $6,%r13d + movl %eax,%r10d + + andl %edi,%r15d + rorl $2,%r14d + addl %r13d,%r12d + + xorl %r15d,%r10d + addl %r12d,%ecx + addl %r12d,%r10d + + leaq 4(%rbp),%rbp + addl %r14d,%r10d + movl 8(%rsi),%r12d + movl %ecx,%r13d + movl %r10d,%r14d + bswapl %r12d + rorl $14,%r13d + movl %edx,%r15d + + xorl %ecx,%r13d + rorl $9,%r14d + xorl %r8d,%r15d + + movl %r12d,8(%rsp) + xorl %r10d,%r14d + andl %ecx,%r15d + + rorl $5,%r13d + addl %r9d,%r12d + xorl %r8d,%r15d + + rorl $11,%r14d + xorl %ecx,%r13d + addl %r15d,%r12d + + movl %r10d,%r15d + addl (%rbp),%r12d + xorl %r10d,%r14d + + xorl %r11d,%r15d + rorl $6,%r13d + movl %r11d,%r9d + + andl %r15d,%edi + rorl $2,%r14d + addl %r13d,%r12d + + xorl %edi,%r9d + addl %r12d,%ebx + addl %r12d,%r9d + + leaq 4(%rbp),%rbp + addl %r14d,%r9d + movl 12(%rsi),%r12d + movl %ebx,%r13d + movl %r9d,%r14d + bswapl %r12d + rorl $14,%r13d + movl %ecx,%edi + + xorl %ebx,%r13d + rorl $9,%r14d + xorl %edx,%edi + + movl %r12d,12(%rsp) + xorl %r9d,%r14d + andl %ebx,%edi + + rorl $5,%r13d + addl %r8d,%r12d + xorl %edx,%edi + + rorl $11,%r14d + xorl %ebx,%r13d + addl %edi,%r12d + + movl %r9d,%edi + addl (%rbp),%r12d + xorl %r9d,%r14d + + xorl %r10d,%edi + rorl $6,%r13d + movl %r10d,%r8d + + andl %edi,%r15d + rorl $2,%r14d + addl %r13d,%r12d + + xorl %r15d,%r8d + addl %r12d,%eax + addl %r12d,%r8d + + leaq 20(%rbp),%rbp + addl %r14d,%r8d + movl 16(%rsi),%r12d + movl %eax,%r13d + movl %r8d,%r14d + bswapl %r12d + rorl $14,%r13d + movl %ebx,%r15d + + xorl %eax,%r13d + rorl $9,%r14d + xorl %ecx,%r15d + + movl %r12d,16(%rsp) + xorl %r8d,%r14d + andl %eax,%r15d + + rorl $5,%r13d + addl %edx,%r12d + xorl %ecx,%r15d + + rorl $11,%r14d + xorl %eax,%r13d + addl %r15d,%r12d + + movl %r8d,%r15d + addl (%rbp),%r12d + xorl %r8d,%r14d + + xorl %r9d,%r15d + rorl $6,%r13d + movl %r9d,%edx + + andl %r15d,%edi + rorl $2,%r14d + addl %r13d,%r12d + + xorl %edi,%edx + addl %r12d,%r11d + addl %r12d,%edx + + leaq 4(%rbp),%rbp + addl %r14d,%edx + movl 20(%rsi),%r12d + movl %r11d,%r13d + movl %edx,%r14d + bswapl %r12d + rorl $14,%r13d + movl %eax,%edi + + xorl %r11d,%r13d + rorl $9,%r14d + xorl %ebx,%edi + + movl %r12d,20(%rsp) + xorl %edx,%r14d + andl %r11d,%edi + + rorl $5,%r13d + addl %ecx,%r12d + xorl %ebx,%edi + + rorl $11,%r14d + xorl %r11d,%r13d + addl %edi,%r12d + + movl %edx,%edi + addl (%rbp),%r12d + xorl %edx,%r14d + + xorl %r8d,%edi + rorl $6,%r13d + movl %r8d,%ecx + + andl %edi,%r15d + rorl $2,%r14d + addl %r13d,%r12d + + xorl %r15d,%ecx + addl %r12d,%r10d + addl %r12d,%ecx + + leaq 4(%rbp),%rbp + addl %r14d,%ecx + movl 24(%rsi),%r12d + movl %r10d,%r13d + movl %ecx,%r14d + bswapl %r12d + rorl $14,%r13d + movl %r11d,%r15d + + xorl %r10d,%r13d + rorl $9,%r14d + xorl %eax,%r15d + + movl %r12d,24(%rsp) + xorl %ecx,%r14d + andl %r10d,%r15d + + rorl $5,%r13d + addl %ebx,%r12d + xorl %eax,%r15d + + rorl $11,%r14d + xorl %r10d,%r13d + addl %r15d,%r12d + + movl %ecx,%r15d + addl (%rbp),%r12d + xorl %ecx,%r14d + + xorl %edx,%r15d + rorl $6,%r13d + movl %edx,%ebx + + andl %r15d,%edi + rorl $2,%r14d + addl %r13d,%r12d + + xorl %edi,%ebx + addl %r12d,%r9d + addl %r12d,%ebx + + leaq 4(%rbp),%rbp + addl %r14d,%ebx + movl 28(%rsi),%r12d + movl %r9d,%r13d + movl %ebx,%r14d + bswapl %r12d + rorl $14,%r13d + movl %r10d,%edi + + xorl %r9d,%r13d + rorl $9,%r14d + xorl %r11d,%edi + + movl %r12d,28(%rsp) + xorl %ebx,%r14d + andl %r9d,%edi + + rorl $5,%r13d + addl %eax,%r12d + xorl %r11d,%edi + + rorl $11,%r14d + xorl %r9d,%r13d + addl %edi,%r12d + + movl %ebx,%edi + addl (%rbp),%r12d + xorl %ebx,%r14d + + xorl %ecx,%edi + rorl $6,%r13d + movl %ecx,%eax + + andl %edi,%r15d + rorl $2,%r14d + addl %r13d,%r12d + + xorl %r15d,%eax + addl %r12d,%r8d + addl %r12d,%eax + + leaq 20(%rbp),%rbp + addl %r14d,%eax + movl 32(%rsi),%r12d + movl %r8d,%r13d + movl %eax,%r14d + bswapl %r12d + rorl $14,%r13d + movl %r9d,%r15d + + xorl %r8d,%r13d + rorl $9,%r14d + xorl %r10d,%r15d + + movl %r12d,32(%rsp) + xorl %eax,%r14d + andl %r8d,%r15d + + rorl $5,%r13d + addl %r11d,%r12d + xorl %r10d,%r15d + + rorl $11,%r14d + xorl %r8d,%r13d + addl %r15d,%r12d + + movl %eax,%r15d + addl (%rbp),%r12d + xorl %eax,%r14d + + xorl %ebx,%r15d + rorl $6,%r13d + movl %ebx,%r11d + + andl %r15d,%edi + rorl $2,%r14d + addl %r13d,%r12d + + xorl %edi,%r11d + addl %r12d,%edx + addl %r12d,%r11d + + leaq 4(%rbp),%rbp + addl %r14d,%r11d + movl 36(%rsi),%r12d + movl %edx,%r13d + movl %r11d,%r14d + bswapl %r12d + rorl $14,%r13d + movl %r8d,%edi + + xorl %edx,%r13d + rorl $9,%r14d + xorl %r9d,%edi + + movl %r12d,36(%rsp) + xorl %r11d,%r14d + andl %edx,%edi + + rorl $5,%r13d + addl %r10d,%r12d + xorl %r9d,%edi + + rorl $11,%r14d + xorl %edx,%r13d + addl %edi,%r12d + + movl %r11d,%edi + addl (%rbp),%r12d + xorl %r11d,%r14d + + xorl %eax,%edi + rorl $6,%r13d + movl %eax,%r10d + + andl %edi,%r15d + rorl $2,%r14d + addl %r13d,%r12d + + xorl %r15d,%r10d + addl %r12d,%ecx + addl %r12d,%r10d + + leaq 4(%rbp),%rbp + addl %r14d,%r10d + movl 40(%rsi),%r12d + movl %ecx,%r13d + movl %r10d,%r14d + bswapl %r12d + rorl $14,%r13d + movl %edx,%r15d + + xorl %ecx,%r13d + rorl $9,%r14d + xorl %r8d,%r15d + + movl %r12d,40(%rsp) + xorl %r10d,%r14d + andl %ecx,%r15d + + rorl $5,%r13d + addl %r9d,%r12d + xorl %r8d,%r15d + + rorl $11,%r14d + xorl %ecx,%r13d + addl %r15d,%r12d + + movl %r10d,%r15d + addl (%rbp),%r12d + xorl %r10d,%r14d + + xorl %r11d,%r15d + rorl $6,%r13d + movl %r11d,%r9d + + andl %r15d,%edi + rorl $2,%r14d + addl %r13d,%r12d + + xorl %edi,%r9d + addl %r12d,%ebx + addl %r12d,%r9d + + leaq 4(%rbp),%rbp + addl %r14d,%r9d + movl 44(%rsi),%r12d + movl %ebx,%r13d + movl %r9d,%r14d + bswapl %r12d + rorl $14,%r13d + movl %ecx,%edi + + xorl %ebx,%r13d + rorl $9,%r14d + xorl %edx,%edi + + movl %r12d,44(%rsp) + xorl %r9d,%r14d + andl %ebx,%edi + + rorl $5,%r13d + addl %r8d,%r12d + xorl %edx,%edi + + rorl $11,%r14d + xorl %ebx,%r13d + addl %edi,%r12d + + movl %r9d,%edi + addl (%rbp),%r12d + xorl %r9d,%r14d + + xorl %r10d,%edi + rorl $6,%r13d + movl %r10d,%r8d + + andl %edi,%r15d + rorl $2,%r14d + addl %r13d,%r12d + + xorl %r15d,%r8d + addl %r12d,%eax + addl %r12d,%r8d + + leaq 20(%rbp),%rbp + addl %r14d,%r8d + movl 48(%rsi),%r12d + movl %eax,%r13d + movl %r8d,%r14d + bswapl %r12d + rorl $14,%r13d + movl %ebx,%r15d + + xorl %eax,%r13d + rorl $9,%r14d + xorl %ecx,%r15d + + movl %r12d,48(%rsp) + xorl %r8d,%r14d + andl %eax,%r15d + + rorl $5,%r13d + addl %edx,%r12d + xorl %ecx,%r15d + + rorl $11,%r14d + xorl %eax,%r13d + addl %r15d,%r12d + + movl %r8d,%r15d + addl (%rbp),%r12d + xorl %r8d,%r14d + + xorl %r9d,%r15d + rorl $6,%r13d + movl %r9d,%edx + + andl %r15d,%edi + rorl $2,%r14d + addl %r13d,%r12d + + xorl %edi,%edx + addl %r12d,%r11d + addl %r12d,%edx + + leaq 4(%rbp),%rbp + addl %r14d,%edx + movl 52(%rsi),%r12d + movl %r11d,%r13d + movl %edx,%r14d + bswapl %r12d + rorl $14,%r13d + movl %eax,%edi + + xorl %r11d,%r13d + rorl $9,%r14d + xorl %ebx,%edi + + movl %r12d,52(%rsp) + xorl %edx,%r14d + andl %r11d,%edi + + rorl $5,%r13d + addl %ecx,%r12d + xorl %ebx,%edi + + rorl $11,%r14d + xorl %r11d,%r13d + addl %edi,%r12d + + movl %edx,%edi + addl (%rbp),%r12d + xorl %edx,%r14d + + xorl %r8d,%edi + rorl $6,%r13d + movl %r8d,%ecx + + andl %edi,%r15d + rorl $2,%r14d + addl %r13d,%r12d + + xorl %r15d,%ecx + addl %r12d,%r10d + addl %r12d,%ecx + + leaq 4(%rbp),%rbp + addl %r14d,%ecx + movl 56(%rsi),%r12d + movl %r10d,%r13d + movl %ecx,%r14d + bswapl %r12d + rorl $14,%r13d + movl %r11d,%r15d + + xorl %r10d,%r13d + rorl $9,%r14d + xorl %eax,%r15d + + movl %r12d,56(%rsp) + xorl %ecx,%r14d + andl %r10d,%r15d + + rorl $5,%r13d + addl %ebx,%r12d + xorl %eax,%r15d + + rorl $11,%r14d + xorl %r10d,%r13d + addl %r15d,%r12d + + movl %ecx,%r15d + addl (%rbp),%r12d + xorl %ecx,%r14d + + xorl %edx,%r15d + rorl $6,%r13d + movl %edx,%ebx + + andl %r15d,%edi + rorl $2,%r14d + addl %r13d,%r12d + + xorl %edi,%ebx + addl %r12d,%r9d + addl %r12d,%ebx + + leaq 4(%rbp),%rbp + addl %r14d,%ebx + movl 60(%rsi),%r12d + movl %r9d,%r13d + movl %ebx,%r14d + bswapl %r12d + rorl $14,%r13d + movl %r10d,%edi + + xorl %r9d,%r13d + rorl $9,%r14d + xorl %r11d,%edi + + movl %r12d,60(%rsp) + xorl %ebx,%r14d + andl %r9d,%edi + + rorl $5,%r13d + addl %eax,%r12d + xorl %r11d,%edi + + rorl $11,%r14d + xorl %r9d,%r13d + addl %edi,%r12d + + movl %ebx,%edi + addl (%rbp),%r12d + xorl %ebx,%r14d + + xorl %ecx,%edi + rorl $6,%r13d + movl %ecx,%eax + + andl %edi,%r15d + rorl $2,%r14d + addl %r13d,%r12d + + xorl %r15d,%eax + addl %r12d,%r8d + addl %r12d,%eax + + leaq 20(%rbp),%rbp + jmp .Lrounds_16_xx +.align 16 +.Lrounds_16_xx: + movl 4(%rsp),%r13d + movl 56(%rsp),%r15d + + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%eax + movl %r15d,%r14d + rorl $2,%r15d + + xorl %r12d,%r13d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%r15d + shrl $10,%r14d + + rorl $17,%r15d + xorl %r13d,%r12d + xorl %r14d,%r15d + addl 36(%rsp),%r12d + + addl 0(%rsp),%r12d + movl %r8d,%r13d + addl %r15d,%r12d + movl %eax,%r14d + rorl $14,%r13d + movl %r9d,%r15d + + xorl %r8d,%r13d + rorl $9,%r14d + xorl %r10d,%r15d + + movl %r12d,0(%rsp) + xorl %eax,%r14d + andl %r8d,%r15d + + rorl $5,%r13d + addl %r11d,%r12d + xorl %r10d,%r15d + + rorl $11,%r14d + xorl %r8d,%r13d + addl %r15d,%r12d + + movl %eax,%r15d + addl (%rbp),%r12d + xorl %eax,%r14d + + xorl %ebx,%r15d + rorl $6,%r13d + movl %ebx,%r11d + + andl %r15d,%edi + rorl $2,%r14d + addl %r13d,%r12d + + xorl %edi,%r11d + addl %r12d,%edx + addl %r12d,%r11d + + leaq 4(%rbp),%rbp + movl 8(%rsp),%r13d + movl 60(%rsp),%edi + + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%r11d + movl %edi,%r14d + rorl $2,%edi + + xorl %r12d,%r13d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%edi + shrl $10,%r14d + + rorl $17,%edi + xorl %r13d,%r12d + xorl %r14d,%edi + addl 40(%rsp),%r12d + + addl 4(%rsp),%r12d + movl %edx,%r13d + addl %edi,%r12d + movl %r11d,%r14d + rorl $14,%r13d + movl %r8d,%edi + + xorl %edx,%r13d + rorl $9,%r14d + xorl %r9d,%edi + + movl %r12d,4(%rsp) + xorl %r11d,%r14d + andl %edx,%edi + + rorl $5,%r13d + addl %r10d,%r12d + xorl %r9d,%edi + + rorl $11,%r14d + xorl %edx,%r13d + addl %edi,%r12d + + movl %r11d,%edi + addl (%rbp),%r12d + xorl %r11d,%r14d + + xorl %eax,%edi + rorl $6,%r13d + movl %eax,%r10d + + andl %edi,%r15d + rorl $2,%r14d + addl %r13d,%r12d + + xorl %r15d,%r10d + addl %r12d,%ecx + addl %r12d,%r10d + + leaq 4(%rbp),%rbp + movl 12(%rsp),%r13d + movl 0(%rsp),%r15d + + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%r10d + movl %r15d,%r14d + rorl $2,%r15d + + xorl %r12d,%r13d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%r15d + shrl $10,%r14d + + rorl $17,%r15d + xorl %r13d,%r12d + xorl %r14d,%r15d + addl 44(%rsp),%r12d + + addl 8(%rsp),%r12d + movl %ecx,%r13d + addl %r15d,%r12d + movl %r10d,%r14d + rorl $14,%r13d + movl %edx,%r15d + + xorl %ecx,%r13d + rorl $9,%r14d + xorl %r8d,%r15d + + movl %r12d,8(%rsp) + xorl %r10d,%r14d + andl %ecx,%r15d + + rorl $5,%r13d + addl %r9d,%r12d + xorl %r8d,%r15d + + rorl $11,%r14d + xorl %ecx,%r13d + addl %r15d,%r12d + + movl %r10d,%r15d + addl (%rbp),%r12d + xorl %r10d,%r14d + + xorl %r11d,%r15d + rorl $6,%r13d + movl %r11d,%r9d + + andl %r15d,%edi + rorl $2,%r14d + addl %r13d,%r12d + + xorl %edi,%r9d + addl %r12d,%ebx + addl %r12d,%r9d + + leaq 4(%rbp),%rbp + movl 16(%rsp),%r13d + movl 4(%rsp),%edi + + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%r9d + movl %edi,%r14d + rorl $2,%edi + + xorl %r12d,%r13d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%edi + shrl $10,%r14d + + rorl $17,%edi + xorl %r13d,%r12d + xorl %r14d,%edi + addl 48(%rsp),%r12d + + addl 12(%rsp),%r12d + movl %ebx,%r13d + addl %edi,%r12d + movl %r9d,%r14d + rorl $14,%r13d + movl %ecx,%edi + + xorl %ebx,%r13d + rorl $9,%r14d + xorl %edx,%edi + + movl %r12d,12(%rsp) + xorl %r9d,%r14d + andl %ebx,%edi + + rorl $5,%r13d + addl %r8d,%r12d + xorl %edx,%edi + + rorl $11,%r14d + xorl %ebx,%r13d + addl %edi,%r12d + + movl %r9d,%edi + addl (%rbp),%r12d + xorl %r9d,%r14d + + xorl %r10d,%edi + rorl $6,%r13d + movl %r10d,%r8d + + andl %edi,%r15d + rorl $2,%r14d + addl %r13d,%r12d + + xorl %r15d,%r8d + addl %r12d,%eax + addl %r12d,%r8d + + leaq 20(%rbp),%rbp + movl 20(%rsp),%r13d + movl 8(%rsp),%r15d + + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%r8d + movl %r15d,%r14d + rorl $2,%r15d + + xorl %r12d,%r13d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%r15d + shrl $10,%r14d + + rorl $17,%r15d + xorl %r13d,%r12d + xorl %r14d,%r15d + addl 52(%rsp),%r12d + + addl 16(%rsp),%r12d + movl %eax,%r13d + addl %r15d,%r12d + movl %r8d,%r14d + rorl $14,%r13d + movl %ebx,%r15d + + xorl %eax,%r13d + rorl $9,%r14d + xorl %ecx,%r15d + + movl %r12d,16(%rsp) + xorl %r8d,%r14d + andl %eax,%r15d + + rorl $5,%r13d + addl %edx,%r12d + xorl %ecx,%r15d + + rorl $11,%r14d + xorl %eax,%r13d + addl %r15d,%r12d + + movl %r8d,%r15d + addl (%rbp),%r12d + xorl %r8d,%r14d + + xorl %r9d,%r15d + rorl $6,%r13d + movl %r9d,%edx + + andl %r15d,%edi + rorl $2,%r14d + addl %r13d,%r12d + + xorl %edi,%edx + addl %r12d,%r11d + addl %r12d,%edx + + leaq 4(%rbp),%rbp + movl 24(%rsp),%r13d + movl 12(%rsp),%edi + + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%edx + movl %edi,%r14d + rorl $2,%edi + + xorl %r12d,%r13d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%edi + shrl $10,%r14d + + rorl $17,%edi + xorl %r13d,%r12d + xorl %r14d,%edi + addl 56(%rsp),%r12d + + addl 20(%rsp),%r12d + movl %r11d,%r13d + addl %edi,%r12d + movl %edx,%r14d + rorl $14,%r13d + movl %eax,%edi + + xorl %r11d,%r13d + rorl $9,%r14d + xorl %ebx,%edi + + movl %r12d,20(%rsp) + xorl %edx,%r14d + andl %r11d,%edi + + rorl $5,%r13d + addl %ecx,%r12d + xorl %ebx,%edi + + rorl $11,%r14d + xorl %r11d,%r13d + addl %edi,%r12d + + movl %edx,%edi + addl (%rbp),%r12d + xorl %edx,%r14d + + xorl %r8d,%edi + rorl $6,%r13d + movl %r8d,%ecx + + andl %edi,%r15d + rorl $2,%r14d + addl %r13d,%r12d + + xorl %r15d,%ecx + addl %r12d,%r10d + addl %r12d,%ecx + + leaq 4(%rbp),%rbp + movl 28(%rsp),%r13d + movl 16(%rsp),%r15d + + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%ecx + movl %r15d,%r14d + rorl $2,%r15d + + xorl %r12d,%r13d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%r15d + shrl $10,%r14d + + rorl $17,%r15d + xorl %r13d,%r12d + xorl %r14d,%r15d + addl 60(%rsp),%r12d + + addl 24(%rsp),%r12d + movl %r10d,%r13d + addl %r15d,%r12d + movl %ecx,%r14d + rorl $14,%r13d + movl %r11d,%r15d + + xorl %r10d,%r13d + rorl $9,%r14d + xorl %eax,%r15d + + movl %r12d,24(%rsp) + xorl %ecx,%r14d + andl %r10d,%r15d + + rorl $5,%r13d + addl %ebx,%r12d + xorl %eax,%r15d + + rorl $11,%r14d + xorl %r10d,%r13d + addl %r15d,%r12d + + movl %ecx,%r15d + addl (%rbp),%r12d + xorl %ecx,%r14d + + xorl %edx,%r15d + rorl $6,%r13d + movl %edx,%ebx + + andl %r15d,%edi + rorl $2,%r14d + addl %r13d,%r12d + + xorl %edi,%ebx + addl %r12d,%r9d + addl %r12d,%ebx + + leaq 4(%rbp),%rbp + movl 32(%rsp),%r13d + movl 20(%rsp),%edi + + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%ebx + movl %edi,%r14d + rorl $2,%edi + + xorl %r12d,%r13d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%edi + shrl $10,%r14d + + rorl $17,%edi + xorl %r13d,%r12d + xorl %r14d,%edi + addl 0(%rsp),%r12d + + addl 28(%rsp),%r12d + movl %r9d,%r13d + addl %edi,%r12d + movl %ebx,%r14d + rorl $14,%r13d + movl %r10d,%edi + + xorl %r9d,%r13d + rorl $9,%r14d + xorl %r11d,%edi + + movl %r12d,28(%rsp) + xorl %ebx,%r14d + andl %r9d,%edi + + rorl $5,%r13d + addl %eax,%r12d + xorl %r11d,%edi + + rorl $11,%r14d + xorl %r9d,%r13d + addl %edi,%r12d + + movl %ebx,%edi + addl (%rbp),%r12d + xorl %ebx,%r14d + + xorl %ecx,%edi + rorl $6,%r13d + movl %ecx,%eax + + andl %edi,%r15d + rorl $2,%r14d + addl %r13d,%r12d + + xorl %r15d,%eax + addl %r12d,%r8d + addl %r12d,%eax + + leaq 20(%rbp),%rbp + movl 36(%rsp),%r13d + movl 24(%rsp),%r15d + + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%eax + movl %r15d,%r14d + rorl $2,%r15d + + xorl %r12d,%r13d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%r15d + shrl $10,%r14d + + rorl $17,%r15d + xorl %r13d,%r12d + xorl %r14d,%r15d + addl 4(%rsp),%r12d + + addl 32(%rsp),%r12d + movl %r8d,%r13d + addl %r15d,%r12d + movl %eax,%r14d + rorl $14,%r13d + movl %r9d,%r15d + + xorl %r8d,%r13d + rorl $9,%r14d + xorl %r10d,%r15d + + movl %r12d,32(%rsp) + xorl %eax,%r14d + andl %r8d,%r15d + + rorl $5,%r13d + addl %r11d,%r12d + xorl %r10d,%r15d + + rorl $11,%r14d + xorl %r8d,%r13d + addl %r15d,%r12d + + movl %eax,%r15d + addl (%rbp),%r12d + xorl %eax,%r14d + + xorl %ebx,%r15d + rorl $6,%r13d + movl %ebx,%r11d + + andl %r15d,%edi + rorl $2,%r14d + addl %r13d,%r12d + + xorl %edi,%r11d + addl %r12d,%edx + addl %r12d,%r11d + + leaq 4(%rbp),%rbp + movl 40(%rsp),%r13d + movl 28(%rsp),%edi + + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%r11d + movl %edi,%r14d + rorl $2,%edi + + xorl %r12d,%r13d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%edi + shrl $10,%r14d + + rorl $17,%edi + xorl %r13d,%r12d + xorl %r14d,%edi + addl 8(%rsp),%r12d + + addl 36(%rsp),%r12d + movl %edx,%r13d + addl %edi,%r12d + movl %r11d,%r14d + rorl $14,%r13d + movl %r8d,%edi + + xorl %edx,%r13d + rorl $9,%r14d + xorl %r9d,%edi + + movl %r12d,36(%rsp) + xorl %r11d,%r14d + andl %edx,%edi + + rorl $5,%r13d + addl %r10d,%r12d + xorl %r9d,%edi + + rorl $11,%r14d + xorl %edx,%r13d + addl %edi,%r12d + + movl %r11d,%edi + addl (%rbp),%r12d + xorl %r11d,%r14d + + xorl %eax,%edi + rorl $6,%r13d + movl %eax,%r10d + + andl %edi,%r15d + rorl $2,%r14d + addl %r13d,%r12d + + xorl %r15d,%r10d + addl %r12d,%ecx + addl %r12d,%r10d + + leaq 4(%rbp),%rbp + movl 44(%rsp),%r13d + movl 32(%rsp),%r15d + + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%r10d + movl %r15d,%r14d + rorl $2,%r15d + + xorl %r12d,%r13d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%r15d + shrl $10,%r14d + + rorl $17,%r15d + xorl %r13d,%r12d + xorl %r14d,%r15d + addl 12(%rsp),%r12d + + addl 40(%rsp),%r12d + movl %ecx,%r13d + addl %r15d,%r12d + movl %r10d,%r14d + rorl $14,%r13d + movl %edx,%r15d + + xorl %ecx,%r13d + rorl $9,%r14d + xorl %r8d,%r15d + + movl %r12d,40(%rsp) + xorl %r10d,%r14d + andl %ecx,%r15d + + rorl $5,%r13d + addl %r9d,%r12d + xorl %r8d,%r15d + + rorl $11,%r14d + xorl %ecx,%r13d + addl %r15d,%r12d + + movl %r10d,%r15d + addl (%rbp),%r12d + xorl %r10d,%r14d + + xorl %r11d,%r15d + rorl $6,%r13d + movl %r11d,%r9d + + andl %r15d,%edi + rorl $2,%r14d + addl %r13d,%r12d + + xorl %edi,%r9d + addl %r12d,%ebx + addl %r12d,%r9d + + leaq 4(%rbp),%rbp + movl 48(%rsp),%r13d + movl 36(%rsp),%edi + + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%r9d + movl %edi,%r14d + rorl $2,%edi + + xorl %r12d,%r13d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%edi + shrl $10,%r14d + + rorl $17,%edi + xorl %r13d,%r12d + xorl %r14d,%edi + addl 16(%rsp),%r12d + + addl 44(%rsp),%r12d + movl %ebx,%r13d + addl %edi,%r12d + movl %r9d,%r14d + rorl $14,%r13d + movl %ecx,%edi + + xorl %ebx,%r13d + rorl $9,%r14d + xorl %edx,%edi + + movl %r12d,44(%rsp) + xorl %r9d,%r14d + andl %ebx,%edi + + rorl $5,%r13d + addl %r8d,%r12d + xorl %edx,%edi + + rorl $11,%r14d + xorl %ebx,%r13d + addl %edi,%r12d + + movl %r9d,%edi + addl (%rbp),%r12d + xorl %r9d,%r14d + + xorl %r10d,%edi + rorl $6,%r13d + movl %r10d,%r8d + + andl %edi,%r15d + rorl $2,%r14d + addl %r13d,%r12d + + xorl %r15d,%r8d + addl %r12d,%eax + addl %r12d,%r8d + + leaq 20(%rbp),%rbp + movl 52(%rsp),%r13d + movl 40(%rsp),%r15d + + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%r8d + movl %r15d,%r14d + rorl $2,%r15d + + xorl %r12d,%r13d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%r15d + shrl $10,%r14d + + rorl $17,%r15d + xorl %r13d,%r12d + xorl %r14d,%r15d + addl 20(%rsp),%r12d + + addl 48(%rsp),%r12d + movl %eax,%r13d + addl %r15d,%r12d + movl %r8d,%r14d + rorl $14,%r13d + movl %ebx,%r15d + + xorl %eax,%r13d + rorl $9,%r14d + xorl %ecx,%r15d + + movl %r12d,48(%rsp) + xorl %r8d,%r14d + andl %eax,%r15d + + rorl $5,%r13d + addl %edx,%r12d + xorl %ecx,%r15d + + rorl $11,%r14d + xorl %eax,%r13d + addl %r15d,%r12d + + movl %r8d,%r15d + addl (%rbp),%r12d + xorl %r8d,%r14d + + xorl %r9d,%r15d + rorl $6,%r13d + movl %r9d,%edx + + andl %r15d,%edi + rorl $2,%r14d + addl %r13d,%r12d + + xorl %edi,%edx + addl %r12d,%r11d + addl %r12d,%edx + + leaq 4(%rbp),%rbp + movl 56(%rsp),%r13d + movl 44(%rsp),%edi + + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%edx + movl %edi,%r14d + rorl $2,%edi + + xorl %r12d,%r13d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%edi + shrl $10,%r14d + + rorl $17,%edi + xorl %r13d,%r12d + xorl %r14d,%edi + addl 24(%rsp),%r12d + + addl 52(%rsp),%r12d + movl %r11d,%r13d + addl %edi,%r12d + movl %edx,%r14d + rorl $14,%r13d + movl %eax,%edi + + xorl %r11d,%r13d + rorl $9,%r14d + xorl %ebx,%edi + + movl %r12d,52(%rsp) + xorl %edx,%r14d + andl %r11d,%edi + + rorl $5,%r13d + addl %ecx,%r12d + xorl %ebx,%edi + + rorl $11,%r14d + xorl %r11d,%r13d + addl %edi,%r12d + + movl %edx,%edi + addl (%rbp),%r12d + xorl %edx,%r14d + + xorl %r8d,%edi + rorl $6,%r13d + movl %r8d,%ecx + + andl %edi,%r15d + rorl $2,%r14d + addl %r13d,%r12d + + xorl %r15d,%ecx + addl %r12d,%r10d + addl %r12d,%ecx + + leaq 4(%rbp),%rbp + movl 60(%rsp),%r13d + movl 48(%rsp),%r15d + + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%ecx + movl %r15d,%r14d + rorl $2,%r15d + + xorl %r12d,%r13d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%r15d + shrl $10,%r14d + + rorl $17,%r15d + xorl %r13d,%r12d + xorl %r14d,%r15d + addl 28(%rsp),%r12d + + addl 56(%rsp),%r12d + movl %r10d,%r13d + addl %r15d,%r12d + movl %ecx,%r14d + rorl $14,%r13d + movl %r11d,%r15d + + xorl %r10d,%r13d + rorl $9,%r14d + xorl %eax,%r15d + + movl %r12d,56(%rsp) + xorl %ecx,%r14d + andl %r10d,%r15d + + rorl $5,%r13d + addl %ebx,%r12d + xorl %eax,%r15d + + rorl $11,%r14d + xorl %r10d,%r13d + addl %r15d,%r12d + + movl %ecx,%r15d + addl (%rbp),%r12d + xorl %ecx,%r14d + + xorl %edx,%r15d + rorl $6,%r13d + movl %edx,%ebx + + andl %r15d,%edi + rorl $2,%r14d + addl %r13d,%r12d + + xorl %edi,%ebx + addl %r12d,%r9d + addl %r12d,%ebx + + leaq 4(%rbp),%rbp + movl 0(%rsp),%r13d + movl 52(%rsp),%edi + + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%ebx + movl %edi,%r14d + rorl $2,%edi + + xorl %r12d,%r13d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%edi + shrl $10,%r14d + + rorl $17,%edi + xorl %r13d,%r12d + xorl %r14d,%edi + addl 32(%rsp),%r12d + + addl 60(%rsp),%r12d + movl %r9d,%r13d + addl %edi,%r12d + movl %ebx,%r14d + rorl $14,%r13d + movl %r10d,%edi + + xorl %r9d,%r13d + rorl $9,%r14d + xorl %r11d,%edi + + movl %r12d,60(%rsp) + xorl %ebx,%r14d + andl %r9d,%edi + + rorl $5,%r13d + addl %eax,%r12d + xorl %r11d,%edi + + rorl $11,%r14d + xorl %r9d,%r13d + addl %edi,%r12d + + movl %ebx,%edi + addl (%rbp),%r12d + xorl %ebx,%r14d + + xorl %ecx,%edi + rorl $6,%r13d + movl %ecx,%eax + + andl %edi,%r15d + rorl $2,%r14d + addl %r13d,%r12d + + xorl %r15d,%eax + addl %r12d,%r8d + addl %r12d,%eax + + leaq 20(%rbp),%rbp + cmpb $0,3(%rbp) + jnz .Lrounds_16_xx + + movq 64+0(%rsp),%rdi + addl %r14d,%eax + leaq 64(%rsi),%rsi + + addl 0(%rdi),%eax + addl 4(%rdi),%ebx + addl 8(%rdi),%ecx + addl 12(%rdi),%edx + addl 16(%rdi),%r8d + addl 20(%rdi),%r9d + addl 24(%rdi),%r10d + addl 28(%rdi),%r11d + + cmpq 64+16(%rsp),%rsi + + movl %eax,0(%rdi) + movl %ebx,4(%rdi) + movl %ecx,8(%rdi) + movl %edx,12(%rdi) + movl %r8d,16(%rdi) + movl %r9d,20(%rdi) + movl %r10d,24(%rdi) + movl %r11d,28(%rdi) + jb .Lloop + + movq 64+24(%rsp),%rsi + movq (%rsi),%r15 + movq 8(%rsi),%r14 + movq 16(%rsi),%r13 + movq 24(%rsi),%r12 + movq 32(%rsi),%rbp + movq 40(%rsi),%rbx + leaq 48(%rsi),%rsp +.Lepilogue: + .byte 0xf3,0xc3 +.size sha256_block_data_order,.-sha256_block_data_order +.align 64 +.type K256,@object +K256: +.long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 +.long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 +.long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 +.long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 +.long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 +.long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 +.long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 +.long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 +.long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc +.long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc +.long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da +.long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da +.long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 +.long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 +.long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967 +.long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967 +.long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 +.long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 +.long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 +.long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 +.long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 +.long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 +.long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070 +.long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070 +.long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 +.long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 +.long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 +.long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 +.long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 +.long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 +.long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 +.long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 + +.long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f +.long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f +.long 0x03020100,0x0b0a0908,0xffffffff,0xffffffff +.long 0x03020100,0x0b0a0908,0xffffffff,0xffffffff +.long 0xffffffff,0xffffffff,0x03020100,0x0b0a0908 +.long 0xffffffff,0xffffffff,0x03020100,0x0b0a0908 +.byte 83,72,65,50,53,54,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 +.type sha256_block_data_order_shaext,@function +.align 64 +sha256_block_data_order_shaext: +_shaext_shortcut: + leaq K256+128(%rip),%rcx + movdqu (%rdi),%xmm1 + movdqu 16(%rdi),%xmm2 + movdqa 512-128(%rcx),%xmm7 + + pshufd $27,%xmm1,%xmm0 + pshufd $177,%xmm1,%xmm1 + pshufd $27,%xmm2,%xmm2 + movdqa %xmm7,%xmm8 +.byte 102,15,58,15,202,8 + punpcklqdq %xmm0,%xmm2 + jmp .Loop_shaext + +.align 16 +.Loop_shaext: + movdqu (%rsi),%xmm3 + movdqu 16(%rsi),%xmm4 + movdqu 32(%rsi),%xmm5 +.byte 102,15,56,0,223 + movdqu 48(%rsi),%xmm6 + + movdqa 0-128(%rcx),%xmm0 + paddd %xmm3,%xmm0 +.byte 102,15,56,0,231 + movdqa %xmm2,%xmm10 +.byte 15,56,203,209 + pshufd $14,%xmm0,%xmm0 + nop + movdqa %xmm1,%xmm9 +.byte 15,56,203,202 + + movdqa 32-128(%rcx),%xmm0 + paddd %xmm4,%xmm0 +.byte 102,15,56,0,239 +.byte 15,56,203,209 + pshufd $14,%xmm0,%xmm0 + leaq 64(%rsi),%rsi +.byte 15,56,204,220 +.byte 15,56,203,202 + + movdqa 64-128(%rcx),%xmm0 + paddd %xmm5,%xmm0 +.byte 102,15,56,0,247 +.byte 15,56,203,209 + pshufd $14,%xmm0,%xmm0 + movdqa %xmm6,%xmm7 +.byte 102,15,58,15,253,4 + nop + paddd %xmm7,%xmm3 +.byte 15,56,204,229 +.byte 15,56,203,202 + + movdqa 96-128(%rcx),%xmm0 + paddd %xmm6,%xmm0 +.byte 15,56,205,222 +.byte 15,56,203,209 + pshufd $14,%xmm0,%xmm0 + movdqa %xmm3,%xmm7 +.byte 102,15,58,15,254,4 + nop + paddd %xmm7,%xmm4 +.byte 15,56,204,238 +.byte 15,56,203,202 + movdqa 128-128(%rcx),%xmm0 + paddd %xmm3,%xmm0 +.byte 15,56,205,227 +.byte 15,56,203,209 + pshufd $14,%xmm0,%xmm0 + movdqa %xmm4,%xmm7 +.byte 102,15,58,15,251,4 + nop + paddd %xmm7,%xmm5 +.byte 15,56,204,243 +.byte 15,56,203,202 + movdqa 160-128(%rcx),%xmm0 + paddd %xmm4,%xmm0 +.byte 15,56,205,236 +.byte 15,56,203,209 + pshufd $14,%xmm0,%xmm0 + movdqa %xmm5,%xmm7 +.byte 102,15,58,15,252,4 + nop + paddd %xmm7,%xmm6 +.byte 15,56,204,220 +.byte 15,56,203,202 + movdqa 192-128(%rcx),%xmm0 + paddd %xmm5,%xmm0 +.byte 15,56,205,245 +.byte 15,56,203,209 + pshufd $14,%xmm0,%xmm0 + movdqa %xmm6,%xmm7 +.byte 102,15,58,15,253,4 + nop + paddd %xmm7,%xmm3 +.byte 15,56,204,229 +.byte 15,56,203,202 + movdqa 224-128(%rcx),%xmm0 + paddd %xmm6,%xmm0 +.byte 15,56,205,222 +.byte 15,56,203,209 + pshufd $14,%xmm0,%xmm0 + movdqa %xmm3,%xmm7 +.byte 102,15,58,15,254,4 + nop + paddd %xmm7,%xmm4 +.byte 15,56,204,238 +.byte 15,56,203,202 + movdqa 256-128(%rcx),%xmm0 + paddd %xmm3,%xmm0 +.byte 15,56,205,227 +.byte 15,56,203,209 + pshufd $14,%xmm0,%xmm0 + movdqa %xmm4,%xmm7 +.byte 102,15,58,15,251,4 + nop + paddd %xmm7,%xmm5 +.byte 15,56,204,243 +.byte 15,56,203,202 + movdqa 288-128(%rcx),%xmm0 + paddd %xmm4,%xmm0 +.byte 15,56,205,236 +.byte 15,56,203,209 + pshufd $14,%xmm0,%xmm0 + movdqa %xmm5,%xmm7 +.byte 102,15,58,15,252,4 + nop + paddd %xmm7,%xmm6 +.byte 15,56,204,220 +.byte 15,56,203,202 + movdqa 320-128(%rcx),%xmm0 + paddd %xmm5,%xmm0 +.byte 15,56,205,245 +.byte 15,56,203,209 + pshufd $14,%xmm0,%xmm0 + movdqa %xmm6,%xmm7 +.byte 102,15,58,15,253,4 + nop + paddd %xmm7,%xmm3 +.byte 15,56,204,229 +.byte 15,56,203,202 + movdqa 352-128(%rcx),%xmm0 + paddd %xmm6,%xmm0 +.byte 15,56,205,222 +.byte 15,56,203,209 + pshufd $14,%xmm0,%xmm0 + movdqa %xmm3,%xmm7 +.byte 102,15,58,15,254,4 + nop + paddd %xmm7,%xmm4 +.byte 15,56,204,238 +.byte 15,56,203,202 + movdqa 384-128(%rcx),%xmm0 + paddd %xmm3,%xmm0 +.byte 15,56,205,227 +.byte 15,56,203,209 + pshufd $14,%xmm0,%xmm0 + movdqa %xmm4,%xmm7 +.byte 102,15,58,15,251,4 + nop + paddd %xmm7,%xmm5 +.byte 15,56,204,243 +.byte 15,56,203,202 + movdqa 416-128(%rcx),%xmm0 + paddd %xmm4,%xmm0 +.byte 15,56,205,236 +.byte 15,56,203,209 + pshufd $14,%xmm0,%xmm0 + movdqa %xmm5,%xmm7 +.byte 102,15,58,15,252,4 +.byte 15,56,203,202 + paddd %xmm7,%xmm6 + + movdqa 448-128(%rcx),%xmm0 + paddd %xmm5,%xmm0 +.byte 15,56,203,209 + pshufd $14,%xmm0,%xmm0 +.byte 15,56,205,245 + movdqa %xmm8,%xmm7 +.byte 15,56,203,202 + + movdqa 480-128(%rcx),%xmm0 + paddd %xmm6,%xmm0 + nop +.byte 15,56,203,209 + pshufd $14,%xmm0,%xmm0 + decq %rdx + nop +.byte 15,56,203,202 + + paddd %xmm10,%xmm2 + paddd %xmm9,%xmm1 + jnz .Loop_shaext + + pshufd $177,%xmm2,%xmm2 + pshufd $27,%xmm1,%xmm7 + pshufd $177,%xmm1,%xmm1 + punpckhqdq %xmm2,%xmm1 +.byte 102,15,58,15,215,8 + + movdqu %xmm1,(%rdi) + movdqu %xmm2,16(%rdi) + .byte 0xf3,0xc3 +.size sha256_block_data_order_shaext,.-sha256_block_data_order_shaext +.type sha256_block_data_order_ssse3,@function +.align 64 +sha256_block_data_order_ssse3: +.Lssse3_shortcut: + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + movq %rsp,%r11 + shlq $4,%rdx + subq $96,%rsp + leaq (%rsi,%rdx,4),%rdx + andq $-64,%rsp + movq %rdi,64+0(%rsp) + movq %rsi,64+8(%rsp) + movq %rdx,64+16(%rsp) + movq %r11,64+24(%rsp) +.Lprologue_ssse3: + + movl 0(%rdi),%eax + movl 4(%rdi),%ebx + movl 8(%rdi),%ecx + movl 12(%rdi),%edx + movl 16(%rdi),%r8d + movl 20(%rdi),%r9d + movl 24(%rdi),%r10d + movl 28(%rdi),%r11d + + + jmp .Lloop_ssse3 +.align 16 +.Lloop_ssse3: + movdqa K256+512(%rip),%xmm7 + movdqu 0(%rsi),%xmm0 + movdqu 16(%rsi),%xmm1 + movdqu 32(%rsi),%xmm2 +.byte 102,15,56,0,199 + movdqu 48(%rsi),%xmm3 + leaq K256(%rip),%rbp +.byte 102,15,56,0,207 + movdqa 0(%rbp),%xmm4 + movdqa 32(%rbp),%xmm5 +.byte 102,15,56,0,215 + paddd %xmm0,%xmm4 + movdqa 64(%rbp),%xmm6 +.byte 102,15,56,0,223 + movdqa 96(%rbp),%xmm7 + paddd %xmm1,%xmm5 + paddd %xmm2,%xmm6 + paddd %xmm3,%xmm7 + movdqa %xmm4,0(%rsp) + movl %eax,%r14d + movdqa %xmm5,16(%rsp) + movl %ebx,%edi + movdqa %xmm6,32(%rsp) + xorl %ecx,%edi + movdqa %xmm7,48(%rsp) + movl %r8d,%r13d + jmp .Lssse3_00_47 + +.align 16 +.Lssse3_00_47: + subq $-128,%rbp + rorl $14,%r13d + movdqa %xmm1,%xmm4 + movl %r14d,%eax + movl %r9d,%r12d + movdqa %xmm3,%xmm7 + rorl $9,%r14d + xorl %r8d,%r13d + xorl %r10d,%r12d + rorl $5,%r13d + xorl %eax,%r14d +.byte 102,15,58,15,224,4 + andl %r8d,%r12d + xorl %r8d,%r13d +.byte 102,15,58,15,250,4 + addl 0(%rsp),%r11d + movl %eax,%r15d + xorl %r10d,%r12d + rorl $11,%r14d + movdqa %xmm4,%xmm5 + xorl %ebx,%r15d + addl %r12d,%r11d + movdqa %xmm4,%xmm6 + rorl $6,%r13d + andl %r15d,%edi + psrld $3,%xmm4 + xorl %eax,%r14d + addl %r13d,%r11d + xorl %ebx,%edi + paddd %xmm7,%xmm0 + rorl $2,%r14d + addl %r11d,%edx + psrld $7,%xmm6 + addl %edi,%r11d + movl %edx,%r13d + pshufd $250,%xmm3,%xmm7 + addl %r11d,%r14d + rorl $14,%r13d + pslld $14,%xmm5 + movl %r14d,%r11d + movl %r8d,%r12d + pxor %xmm6,%xmm4 + rorl $9,%r14d + xorl %edx,%r13d + xorl %r9d,%r12d + rorl $5,%r13d + psrld $11,%xmm6 + xorl %r11d,%r14d + pxor %xmm5,%xmm4 + andl %edx,%r12d + xorl %edx,%r13d + pslld $11,%xmm5 + addl 4(%rsp),%r10d + movl %r11d,%edi + pxor %xmm6,%xmm4 + xorl %r9d,%r12d + rorl $11,%r14d + movdqa %xmm7,%xmm6 + xorl %eax,%edi + addl %r12d,%r10d + pxor %xmm5,%xmm4 + rorl $6,%r13d + andl %edi,%r15d + xorl %r11d,%r14d + psrld $10,%xmm7 + addl %r13d,%r10d + xorl %eax,%r15d + paddd %xmm4,%xmm0 + rorl $2,%r14d + addl %r10d,%ecx + psrlq $17,%xmm6 + addl %r15d,%r10d + movl %ecx,%r13d + addl %r10d,%r14d + pxor %xmm6,%xmm7 + rorl $14,%r13d + movl %r14d,%r10d + movl %edx,%r12d + rorl $9,%r14d + psrlq $2,%xmm6 + xorl %ecx,%r13d + xorl %r8d,%r12d + pxor %xmm6,%xmm7 + rorl $5,%r13d + xorl %r10d,%r14d + andl %ecx,%r12d + pshufd $128,%xmm7,%xmm7 + xorl %ecx,%r13d + addl 8(%rsp),%r9d + movl %r10d,%r15d + psrldq $8,%xmm7 + xorl %r8d,%r12d + rorl $11,%r14d + xorl %r11d,%r15d + addl %r12d,%r9d + rorl $6,%r13d + paddd %xmm7,%xmm0 + andl %r15d,%edi + xorl %r10d,%r14d + addl %r13d,%r9d + pshufd $80,%xmm0,%xmm7 + xorl %r11d,%edi + rorl $2,%r14d + addl %r9d,%ebx + movdqa %xmm7,%xmm6 + addl %edi,%r9d + movl %ebx,%r13d + psrld $10,%xmm7 + addl %r9d,%r14d + rorl $14,%r13d + psrlq $17,%xmm6 + movl %r14d,%r9d + movl %ecx,%r12d + pxor %xmm6,%xmm7 + rorl $9,%r14d + xorl %ebx,%r13d + xorl %edx,%r12d + rorl $5,%r13d + xorl %r9d,%r14d + psrlq $2,%xmm6 + andl %ebx,%r12d + xorl %ebx,%r13d + addl 12(%rsp),%r8d + pxor %xmm6,%xmm7 + movl %r9d,%edi + xorl %edx,%r12d + rorl $11,%r14d + pshufd $8,%xmm7,%xmm7 + xorl %r10d,%edi + addl %r12d,%r8d + movdqa 0(%rbp),%xmm6 + rorl $6,%r13d + andl %edi,%r15d + pslldq $8,%xmm7 + xorl %r9d,%r14d + addl %r13d,%r8d + xorl %r10d,%r15d + paddd %xmm7,%xmm0 + rorl $2,%r14d + addl %r8d,%eax + addl %r15d,%r8d + paddd %xmm0,%xmm6 + movl %eax,%r13d + addl %r8d,%r14d + movdqa %xmm6,0(%rsp) + rorl $14,%r13d + movdqa %xmm2,%xmm4 + movl %r14d,%r8d + movl %ebx,%r12d + movdqa %xmm0,%xmm7 + rorl $9,%r14d + xorl %eax,%r13d + xorl %ecx,%r12d + rorl $5,%r13d + xorl %r8d,%r14d +.byte 102,15,58,15,225,4 + andl %eax,%r12d + xorl %eax,%r13d +.byte 102,15,58,15,251,4 + addl 16(%rsp),%edx + movl %r8d,%r15d + xorl %ecx,%r12d + rorl $11,%r14d + movdqa %xmm4,%xmm5 + xorl %r9d,%r15d + addl %r12d,%edx + movdqa %xmm4,%xmm6 + rorl $6,%r13d + andl %r15d,%edi + psrld $3,%xmm4 + xorl %r8d,%r14d + addl %r13d,%edx + xorl %r9d,%edi + paddd %xmm7,%xmm1 + rorl $2,%r14d + addl %edx,%r11d + psrld $7,%xmm6 + addl %edi,%edx + movl %r11d,%r13d + pshufd $250,%xmm0,%xmm7 + addl %edx,%r14d + rorl $14,%r13d + pslld $14,%xmm5 + movl %r14d,%edx + movl %eax,%r12d + pxor %xmm6,%xmm4 + rorl $9,%r14d + xorl %r11d,%r13d + xorl %ebx,%r12d + rorl $5,%r13d + psrld $11,%xmm6 + xorl %edx,%r14d + pxor %xmm5,%xmm4 + andl %r11d,%r12d + xorl %r11d,%r13d + pslld $11,%xmm5 + addl 20(%rsp),%ecx + movl %edx,%edi + pxor %xmm6,%xmm4 + xorl %ebx,%r12d + rorl $11,%r14d + movdqa %xmm7,%xmm6 + xorl %r8d,%edi + addl %r12d,%ecx + pxor %xmm5,%xmm4 + rorl $6,%r13d + andl %edi,%r15d + xorl %edx,%r14d + psrld $10,%xmm7 + addl %r13d,%ecx + xorl %r8d,%r15d + paddd %xmm4,%xmm1 + rorl $2,%r14d + addl %ecx,%r10d + psrlq $17,%xmm6 + addl %r15d,%ecx + movl %r10d,%r13d + addl %ecx,%r14d + pxor %xmm6,%xmm7 + rorl $14,%r13d + movl %r14d,%ecx + movl %r11d,%r12d + rorl $9,%r14d + psrlq $2,%xmm6 + xorl %r10d,%r13d + xorl %eax,%r12d + pxor %xmm6,%xmm7 + rorl $5,%r13d + xorl %ecx,%r14d + andl %r10d,%r12d + pshufd $128,%xmm7,%xmm7 + xorl %r10d,%r13d + addl 24(%rsp),%ebx + movl %ecx,%r15d + psrldq $8,%xmm7 + xorl %eax,%r12d + rorl $11,%r14d + xorl %edx,%r15d + addl %r12d,%ebx + rorl $6,%r13d + paddd %xmm7,%xmm1 + andl %r15d,%edi + xorl %ecx,%r14d + addl %r13d,%ebx + pshufd $80,%xmm1,%xmm7 + xorl %edx,%edi + rorl $2,%r14d + addl %ebx,%r9d + movdqa %xmm7,%xmm6 + addl %edi,%ebx + movl %r9d,%r13d + psrld $10,%xmm7 + addl %ebx,%r14d + rorl $14,%r13d + psrlq $17,%xmm6 + movl %r14d,%ebx + movl %r10d,%r12d + pxor %xmm6,%xmm7 + rorl $9,%r14d + xorl %r9d,%r13d + xorl %r11d,%r12d + rorl $5,%r13d + xorl %ebx,%r14d + psrlq $2,%xmm6 + andl %r9d,%r12d + xorl %r9d,%r13d + addl 28(%rsp),%eax + pxor %xmm6,%xmm7 + movl %ebx,%edi + xorl %r11d,%r12d + rorl $11,%r14d + pshufd $8,%xmm7,%xmm7 + xorl %ecx,%edi + addl %r12d,%eax + movdqa 32(%rbp),%xmm6 + rorl $6,%r13d + andl %edi,%r15d + pslldq $8,%xmm7 + xorl %ebx,%r14d + addl %r13d,%eax + xorl %ecx,%r15d + paddd %xmm7,%xmm1 + rorl $2,%r14d + addl %eax,%r8d + addl %r15d,%eax + paddd %xmm1,%xmm6 + movl %r8d,%r13d + addl %eax,%r14d + movdqa %xmm6,16(%rsp) + rorl $14,%r13d + movdqa %xmm3,%xmm4 + movl %r14d,%eax + movl %r9d,%r12d + movdqa %xmm1,%xmm7 + rorl $9,%r14d + xorl %r8d,%r13d + xorl %r10d,%r12d + rorl $5,%r13d + xorl %eax,%r14d +.byte 102,15,58,15,226,4 + andl %r8d,%r12d + xorl %r8d,%r13d +.byte 102,15,58,15,248,4 + addl 32(%rsp),%r11d + movl %eax,%r15d + xorl %r10d,%r12d + rorl $11,%r14d + movdqa %xmm4,%xmm5 + xorl %ebx,%r15d + addl %r12d,%r11d + movdqa %xmm4,%xmm6 + rorl $6,%r13d + andl %r15d,%edi + psrld $3,%xmm4 + xorl %eax,%r14d + addl %r13d,%r11d + xorl %ebx,%edi + paddd %xmm7,%xmm2 + rorl $2,%r14d + addl %r11d,%edx + psrld $7,%xmm6 + addl %edi,%r11d + movl %edx,%r13d + pshufd $250,%xmm1,%xmm7 + addl %r11d,%r14d + rorl $14,%r13d + pslld $14,%xmm5 + movl %r14d,%r11d + movl %r8d,%r12d + pxor %xmm6,%xmm4 + rorl $9,%r14d + xorl %edx,%r13d + xorl %r9d,%r12d + rorl $5,%r13d + psrld $11,%xmm6 + xorl %r11d,%r14d + pxor %xmm5,%xmm4 + andl %edx,%r12d + xorl %edx,%r13d + pslld $11,%xmm5 + addl 36(%rsp),%r10d + movl %r11d,%edi + pxor %xmm6,%xmm4 + xorl %r9d,%r12d + rorl $11,%r14d + movdqa %xmm7,%xmm6 + xorl %eax,%edi + addl %r12d,%r10d + pxor %xmm5,%xmm4 + rorl $6,%r13d + andl %edi,%r15d + xorl %r11d,%r14d + psrld $10,%xmm7 + addl %r13d,%r10d + xorl %eax,%r15d + paddd %xmm4,%xmm2 + rorl $2,%r14d + addl %r10d,%ecx + psrlq $17,%xmm6 + addl %r15d,%r10d + movl %ecx,%r13d + addl %r10d,%r14d + pxor %xmm6,%xmm7 + rorl $14,%r13d + movl %r14d,%r10d + movl %edx,%r12d + rorl $9,%r14d + psrlq $2,%xmm6 + xorl %ecx,%r13d + xorl %r8d,%r12d + pxor %xmm6,%xmm7 + rorl $5,%r13d + xorl %r10d,%r14d + andl %ecx,%r12d + pshufd $128,%xmm7,%xmm7 + xorl %ecx,%r13d + addl 40(%rsp),%r9d + movl %r10d,%r15d + psrldq $8,%xmm7 + xorl %r8d,%r12d + rorl $11,%r14d + xorl %r11d,%r15d + addl %r12d,%r9d + rorl $6,%r13d + paddd %xmm7,%xmm2 + andl %r15d,%edi + xorl %r10d,%r14d + addl %r13d,%r9d + pshufd $80,%xmm2,%xmm7 + xorl %r11d,%edi + rorl $2,%r14d + addl %r9d,%ebx + movdqa %xmm7,%xmm6 + addl %edi,%r9d + movl %ebx,%r13d + psrld $10,%xmm7 + addl %r9d,%r14d + rorl $14,%r13d + psrlq $17,%xmm6 + movl %r14d,%r9d + movl %ecx,%r12d + pxor %xmm6,%xmm7 + rorl $9,%r14d + xorl %ebx,%r13d + xorl %edx,%r12d + rorl $5,%r13d + xorl %r9d,%r14d + psrlq $2,%xmm6 + andl %ebx,%r12d + xorl %ebx,%r13d + addl 44(%rsp),%r8d + pxor %xmm6,%xmm7 + movl %r9d,%edi + xorl %edx,%r12d + rorl $11,%r14d + pshufd $8,%xmm7,%xmm7 + xorl %r10d,%edi + addl %r12d,%r8d + movdqa 64(%rbp),%xmm6 + rorl $6,%r13d + andl %edi,%r15d + pslldq $8,%xmm7 + xorl %r9d,%r14d + addl %r13d,%r8d + xorl %r10d,%r15d + paddd %xmm7,%xmm2 + rorl $2,%r14d + addl %r8d,%eax + addl %r15d,%r8d + paddd %xmm2,%xmm6 + movl %eax,%r13d + addl %r8d,%r14d + movdqa %xmm6,32(%rsp) + rorl $14,%r13d + movdqa %xmm0,%xmm4 + movl %r14d,%r8d + movl %ebx,%r12d + movdqa %xmm2,%xmm7 + rorl $9,%r14d + xorl %eax,%r13d + xorl %ecx,%r12d + rorl $5,%r13d + xorl %r8d,%r14d +.byte 102,15,58,15,227,4 + andl %eax,%r12d + xorl %eax,%r13d +.byte 102,15,58,15,249,4 + addl 48(%rsp),%edx + movl %r8d,%r15d + xorl %ecx,%r12d + rorl $11,%r14d + movdqa %xmm4,%xmm5 + xorl %r9d,%r15d + addl %r12d,%edx + movdqa %xmm4,%xmm6 + rorl $6,%r13d + andl %r15d,%edi + psrld $3,%xmm4 + xorl %r8d,%r14d + addl %r13d,%edx + xorl %r9d,%edi + paddd %xmm7,%xmm3 + rorl $2,%r14d + addl %edx,%r11d + psrld $7,%xmm6 + addl %edi,%edx + movl %r11d,%r13d + pshufd $250,%xmm2,%xmm7 + addl %edx,%r14d + rorl $14,%r13d + pslld $14,%xmm5 + movl %r14d,%edx + movl %eax,%r12d + pxor %xmm6,%xmm4 + rorl $9,%r14d + xorl %r11d,%r13d + xorl %ebx,%r12d + rorl $5,%r13d + psrld $11,%xmm6 + xorl %edx,%r14d + pxor %xmm5,%xmm4 + andl %r11d,%r12d + xorl %r11d,%r13d + pslld $11,%xmm5 + addl 52(%rsp),%ecx + movl %edx,%edi + pxor %xmm6,%xmm4 + xorl %ebx,%r12d + rorl $11,%r14d + movdqa %xmm7,%xmm6 + xorl %r8d,%edi + addl %r12d,%ecx + pxor %xmm5,%xmm4 + rorl $6,%r13d + andl %edi,%r15d + xorl %edx,%r14d + psrld $10,%xmm7 + addl %r13d,%ecx + xorl %r8d,%r15d + paddd %xmm4,%xmm3 + rorl $2,%r14d + addl %ecx,%r10d + psrlq $17,%xmm6 + addl %r15d,%ecx + movl %r10d,%r13d + addl %ecx,%r14d + pxor %xmm6,%xmm7 + rorl $14,%r13d + movl %r14d,%ecx + movl %r11d,%r12d + rorl $9,%r14d + psrlq $2,%xmm6 + xorl %r10d,%r13d + xorl %eax,%r12d + pxor %xmm6,%xmm7 + rorl $5,%r13d + xorl %ecx,%r14d + andl %r10d,%r12d + pshufd $128,%xmm7,%xmm7 + xorl %r10d,%r13d + addl 56(%rsp),%ebx + movl %ecx,%r15d + psrldq $8,%xmm7 + xorl %eax,%r12d + rorl $11,%r14d + xorl %edx,%r15d + addl %r12d,%ebx + rorl $6,%r13d + paddd %xmm7,%xmm3 + andl %r15d,%edi + xorl %ecx,%r14d + addl %r13d,%ebx + pshufd $80,%xmm3,%xmm7 + xorl %edx,%edi + rorl $2,%r14d + addl %ebx,%r9d + movdqa %xmm7,%xmm6 + addl %edi,%ebx + movl %r9d,%r13d + psrld $10,%xmm7 + addl %ebx,%r14d + rorl $14,%r13d + psrlq $17,%xmm6 + movl %r14d,%ebx + movl %r10d,%r12d + pxor %xmm6,%xmm7 + rorl $9,%r14d + xorl %r9d,%r13d + xorl %r11d,%r12d + rorl $5,%r13d + xorl %ebx,%r14d + psrlq $2,%xmm6 + andl %r9d,%r12d + xorl %r9d,%r13d + addl 60(%rsp),%eax + pxor %xmm6,%xmm7 + movl %ebx,%edi + xorl %r11d,%r12d + rorl $11,%r14d + pshufd $8,%xmm7,%xmm7 + xorl %ecx,%edi + addl %r12d,%eax + movdqa 96(%rbp),%xmm6 + rorl $6,%r13d + andl %edi,%r15d + pslldq $8,%xmm7 + xorl %ebx,%r14d + addl %r13d,%eax + xorl %ecx,%r15d + paddd %xmm7,%xmm3 + rorl $2,%r14d + addl %eax,%r8d + addl %r15d,%eax + paddd %xmm3,%xmm6 + movl %r8d,%r13d + addl %eax,%r14d + movdqa %xmm6,48(%rsp) + cmpb $0,131(%rbp) + jne .Lssse3_00_47 + rorl $14,%r13d + movl %r14d,%eax + movl %r9d,%r12d + rorl $9,%r14d + xorl %r8d,%r13d + xorl %r10d,%r12d + rorl $5,%r13d + xorl %eax,%r14d + andl %r8d,%r12d + xorl %r8d,%r13d + addl 0(%rsp),%r11d + movl %eax,%r15d + xorl %r10d,%r12d + rorl $11,%r14d + xorl %ebx,%r15d + addl %r12d,%r11d + rorl $6,%r13d + andl %r15d,%edi + xorl %eax,%r14d + addl %r13d,%r11d + xorl %ebx,%edi + rorl $2,%r14d + addl %r11d,%edx + addl %edi,%r11d + movl %edx,%r13d + addl %r11d,%r14d + rorl $14,%r13d + movl %r14d,%r11d + movl %r8d,%r12d + rorl $9,%r14d + xorl %edx,%r13d + xorl %r9d,%r12d + rorl $5,%r13d + xorl %r11d,%r14d + andl %edx,%r12d + xorl %edx,%r13d + addl 4(%rsp),%r10d + movl %r11d,%edi + xorl %r9d,%r12d + rorl $11,%r14d + xorl %eax,%edi + addl %r12d,%r10d + rorl $6,%r13d + andl %edi,%r15d + xorl %r11d,%r14d + addl %r13d,%r10d + xorl %eax,%r15d + rorl $2,%r14d + addl %r10d,%ecx + addl %r15d,%r10d + movl %ecx,%r13d + addl %r10d,%r14d + rorl $14,%r13d + movl %r14d,%r10d + movl %edx,%r12d + rorl $9,%r14d + xorl %ecx,%r13d + xorl %r8d,%r12d + rorl $5,%r13d + xorl %r10d,%r14d + andl %ecx,%r12d + xorl %ecx,%r13d + addl 8(%rsp),%r9d + movl %r10d,%r15d + xorl %r8d,%r12d + rorl $11,%r14d + xorl %r11d,%r15d + addl %r12d,%r9d + rorl $6,%r13d + andl %r15d,%edi + xorl %r10d,%r14d + addl %r13d,%r9d + xorl %r11d,%edi + rorl $2,%r14d + addl %r9d,%ebx + addl %edi,%r9d + movl %ebx,%r13d + addl %r9d,%r14d + rorl $14,%r13d + movl %r14d,%r9d + movl %ecx,%r12d + rorl $9,%r14d + xorl %ebx,%r13d + xorl %edx,%r12d + rorl $5,%r13d + xorl %r9d,%r14d + andl %ebx,%r12d + xorl %ebx,%r13d + addl 12(%rsp),%r8d + movl %r9d,%edi + xorl %edx,%r12d + rorl $11,%r14d + xorl %r10d,%edi + addl %r12d,%r8d + rorl $6,%r13d + andl %edi,%r15d + xorl %r9d,%r14d + addl %r13d,%r8d + xorl %r10d,%r15d + rorl $2,%r14d + addl %r8d,%eax + addl %r15d,%r8d + movl %eax,%r13d + addl %r8d,%r14d + rorl $14,%r13d + movl %r14d,%r8d + movl %ebx,%r12d + rorl $9,%r14d + xorl %eax,%r13d + xorl %ecx,%r12d + rorl $5,%r13d + xorl %r8d,%r14d + andl %eax,%r12d + xorl %eax,%r13d + addl 16(%rsp),%edx + movl %r8d,%r15d + xorl %ecx,%r12d + rorl $11,%r14d + xorl %r9d,%r15d + addl %r12d,%edx + rorl $6,%r13d + andl %r15d,%edi + xorl %r8d,%r14d + addl %r13d,%edx + xorl %r9d,%edi + rorl $2,%r14d + addl %edx,%r11d + addl %edi,%edx + movl %r11d,%r13d + addl %edx,%r14d + rorl $14,%r13d + movl %r14d,%edx + movl %eax,%r12d + rorl $9,%r14d + xorl %r11d,%r13d + xorl %ebx,%r12d + rorl $5,%r13d + xorl %edx,%r14d + andl %r11d,%r12d + xorl %r11d,%r13d + addl 20(%rsp),%ecx + movl %edx,%edi + xorl %ebx,%r12d + rorl $11,%r14d + xorl %r8d,%edi + addl %r12d,%ecx + rorl $6,%r13d + andl %edi,%r15d + xorl %edx,%r14d + addl %r13d,%ecx + xorl %r8d,%r15d + rorl $2,%r14d + addl %ecx,%r10d + addl %r15d,%ecx + movl %r10d,%r13d + addl %ecx,%r14d + rorl $14,%r13d + movl %r14d,%ecx + movl %r11d,%r12d + rorl $9,%r14d + xorl %r10d,%r13d + xorl %eax,%r12d + rorl $5,%r13d + xorl %ecx,%r14d + andl %r10d,%r12d + xorl %r10d,%r13d + addl 24(%rsp),%ebx + movl %ecx,%r15d + xorl %eax,%r12d + rorl $11,%r14d + xorl %edx,%r15d + addl %r12d,%ebx + rorl $6,%r13d + andl %r15d,%edi + xorl %ecx,%r14d + addl %r13d,%ebx + xorl %edx,%edi + rorl $2,%r14d + addl %ebx,%r9d + addl %edi,%ebx + movl %r9d,%r13d + addl %ebx,%r14d + rorl $14,%r13d + movl %r14d,%ebx + movl %r10d,%r12d + rorl $9,%r14d + xorl %r9d,%r13d + xorl %r11d,%r12d + rorl $5,%r13d + xorl %ebx,%r14d + andl %r9d,%r12d + xorl %r9d,%r13d + addl 28(%rsp),%eax + movl %ebx,%edi + xorl %r11d,%r12d + rorl $11,%r14d + xorl %ecx,%edi + addl %r12d,%eax + rorl $6,%r13d + andl %edi,%r15d + xorl %ebx,%r14d + addl %r13d,%eax + xorl %ecx,%r15d + rorl $2,%r14d + addl %eax,%r8d + addl %r15d,%eax + movl %r8d,%r13d + addl %eax,%r14d + rorl $14,%r13d + movl %r14d,%eax + movl %r9d,%r12d + rorl $9,%r14d + xorl %r8d,%r13d + xorl %r10d,%r12d + rorl $5,%r13d + xorl %eax,%r14d + andl %r8d,%r12d + xorl %r8d,%r13d + addl 32(%rsp),%r11d + movl %eax,%r15d + xorl %r10d,%r12d + rorl $11,%r14d + xorl %ebx,%r15d + addl %r12d,%r11d + rorl $6,%r13d + andl %r15d,%edi + xorl %eax,%r14d + addl %r13d,%r11d + xorl %ebx,%edi + rorl $2,%r14d + addl %r11d,%edx + addl %edi,%r11d + movl %edx,%r13d + addl %r11d,%r14d + rorl $14,%r13d + movl %r14d,%r11d + movl %r8d,%r12d + rorl $9,%r14d + xorl %edx,%r13d + xorl %r9d,%r12d + rorl $5,%r13d + xorl %r11d,%r14d + andl %edx,%r12d + xorl %edx,%r13d + addl 36(%rsp),%r10d + movl %r11d,%edi + xorl %r9d,%r12d + rorl $11,%r14d + xorl %eax,%edi + addl %r12d,%r10d + rorl $6,%r13d + andl %edi,%r15d + xorl %r11d,%r14d + addl %r13d,%r10d + xorl %eax,%r15d + rorl $2,%r14d + addl %r10d,%ecx + addl %r15d,%r10d + movl %ecx,%r13d + addl %r10d,%r14d + rorl $14,%r13d + movl %r14d,%r10d + movl %edx,%r12d + rorl $9,%r14d + xorl %ecx,%r13d + xorl %r8d,%r12d + rorl $5,%r13d + xorl %r10d,%r14d + andl %ecx,%r12d + xorl %ecx,%r13d + addl 40(%rsp),%r9d + movl %r10d,%r15d + xorl %r8d,%r12d + rorl $11,%r14d + xorl %r11d,%r15d + addl %r12d,%r9d + rorl $6,%r13d + andl %r15d,%edi + xorl %r10d,%r14d + addl %r13d,%r9d + xorl %r11d,%edi + rorl $2,%r14d + addl %r9d,%ebx + addl %edi,%r9d + movl %ebx,%r13d + addl %r9d,%r14d + rorl $14,%r13d + movl %r14d,%r9d + movl %ecx,%r12d + rorl $9,%r14d + xorl %ebx,%r13d + xorl %edx,%r12d + rorl $5,%r13d + xorl %r9d,%r14d + andl %ebx,%r12d + xorl %ebx,%r13d + addl 44(%rsp),%r8d + movl %r9d,%edi + xorl %edx,%r12d + rorl $11,%r14d + xorl %r10d,%edi + addl %r12d,%r8d + rorl $6,%r13d + andl %edi,%r15d + xorl %r9d,%r14d + addl %r13d,%r8d + xorl %r10d,%r15d + rorl $2,%r14d + addl %r8d,%eax + addl %r15d,%r8d + movl %eax,%r13d + addl %r8d,%r14d + rorl $14,%r13d + movl %r14d,%r8d + movl %ebx,%r12d + rorl $9,%r14d + xorl %eax,%r13d + xorl %ecx,%r12d + rorl $5,%r13d + xorl %r8d,%r14d + andl %eax,%r12d + xorl %eax,%r13d + addl 48(%rsp),%edx + movl %r8d,%r15d + xorl %ecx,%r12d + rorl $11,%r14d + xorl %r9d,%r15d + addl %r12d,%edx + rorl $6,%r13d + andl %r15d,%edi + xorl %r8d,%r14d + addl %r13d,%edx + xorl %r9d,%edi + rorl $2,%r14d + addl %edx,%r11d + addl %edi,%edx + movl %r11d,%r13d + addl %edx,%r14d + rorl $14,%r13d + movl %r14d,%edx + movl %eax,%r12d + rorl $9,%r14d + xorl %r11d,%r13d + xorl %ebx,%r12d + rorl $5,%r13d + xorl %edx,%r14d + andl %r11d,%r12d + xorl %r11d,%r13d + addl 52(%rsp),%ecx + movl %edx,%edi + xorl %ebx,%r12d + rorl $11,%r14d + xorl %r8d,%edi + addl %r12d,%ecx + rorl $6,%r13d + andl %edi,%r15d + xorl %edx,%r14d + addl %r13d,%ecx + xorl %r8d,%r15d + rorl $2,%r14d + addl %ecx,%r10d + addl %r15d,%ecx + movl %r10d,%r13d + addl %ecx,%r14d + rorl $14,%r13d + movl %r14d,%ecx + movl %r11d,%r12d + rorl $9,%r14d + xorl %r10d,%r13d + xorl %eax,%r12d + rorl $5,%r13d + xorl %ecx,%r14d + andl %r10d,%r12d + xorl %r10d,%r13d + addl 56(%rsp),%ebx + movl %ecx,%r15d + xorl %eax,%r12d + rorl $11,%r14d + xorl %edx,%r15d + addl %r12d,%ebx + rorl $6,%r13d + andl %r15d,%edi + xorl %ecx,%r14d + addl %r13d,%ebx + xorl %edx,%edi + rorl $2,%r14d + addl %ebx,%r9d + addl %edi,%ebx + movl %r9d,%r13d + addl %ebx,%r14d + rorl $14,%r13d + movl %r14d,%ebx + movl %r10d,%r12d + rorl $9,%r14d + xorl %r9d,%r13d + xorl %r11d,%r12d + rorl $5,%r13d + xorl %ebx,%r14d + andl %r9d,%r12d + xorl %r9d,%r13d + addl 60(%rsp),%eax + movl %ebx,%edi + xorl %r11d,%r12d + rorl $11,%r14d + xorl %ecx,%edi + addl %r12d,%eax + rorl $6,%r13d + andl %edi,%r15d + xorl %ebx,%r14d + addl %r13d,%eax + xorl %ecx,%r15d + rorl $2,%r14d + addl %eax,%r8d + addl %r15d,%eax + movl %r8d,%r13d + addl %eax,%r14d + movq 64+0(%rsp),%rdi + movl %r14d,%eax + + addl 0(%rdi),%eax + leaq 64(%rsi),%rsi + addl 4(%rdi),%ebx + addl 8(%rdi),%ecx + addl 12(%rdi),%edx + addl 16(%rdi),%r8d + addl 20(%rdi),%r9d + addl 24(%rdi),%r10d + addl 28(%rdi),%r11d + + cmpq 64+16(%rsp),%rsi + + movl %eax,0(%rdi) + movl %ebx,4(%rdi) + movl %ecx,8(%rdi) + movl %edx,12(%rdi) + movl %r8d,16(%rdi) + movl %r9d,20(%rdi) + movl %r10d,24(%rdi) + movl %r11d,28(%rdi) + jb .Lloop_ssse3 + + movq 64+24(%rsp),%rsi + movq (%rsi),%r15 + movq 8(%rsi),%r14 + movq 16(%rsi),%r13 + movq 24(%rsi),%r12 + movq 32(%rsi),%rbp + movq 40(%rsi),%rbx + leaq 48(%rsi),%rsp +.Lepilogue_ssse3: + .byte 0xf3,0xc3 +.size sha256_block_data_order_ssse3,.-sha256_block_data_order_ssse3 diff --git a/deps/openssl/asm_obsolete/x64-elf-gas/sha/sha512-x86_64.s b/deps/openssl/asm_obsolete/x64-elf-gas/sha/sha512-x86_64.s new file mode 100644 index 00000000000000..f6638db30e9fad --- /dev/null +++ b/deps/openssl/asm_obsolete/x64-elf-gas/sha/sha512-x86_64.s @@ -0,0 +1,1783 @@ +.text + + +.globl sha512_block_data_order +.type sha512_block_data_order,@function +.align 16 +sha512_block_data_order: + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + movq %rsp,%r11 + shlq $4,%rdx + subq $128+32,%rsp + leaq (%rsi,%rdx,8),%rdx + andq $-64,%rsp + movq %rdi,128+0(%rsp) + movq %rsi,128+8(%rsp) + movq %rdx,128+16(%rsp) + movq %r11,128+24(%rsp) +.Lprologue: + + movq 0(%rdi),%rax + movq 8(%rdi),%rbx + movq 16(%rdi),%rcx + movq 24(%rdi),%rdx + movq 32(%rdi),%r8 + movq 40(%rdi),%r9 + movq 48(%rdi),%r10 + movq 56(%rdi),%r11 + jmp .Lloop + +.align 16 +.Lloop: + movq %rbx,%rdi + leaq K512(%rip),%rbp + xorq %rcx,%rdi + movq 0(%rsi),%r12 + movq %r8,%r13 + movq %rax,%r14 + bswapq %r12 + rorq $23,%r13 + movq %r9,%r15 + + xorq %r8,%r13 + rorq $5,%r14 + xorq %r10,%r15 + + movq %r12,0(%rsp) + xorq %rax,%r14 + andq %r8,%r15 + + rorq $4,%r13 + addq %r11,%r12 + xorq %r10,%r15 + + rorq $6,%r14 + xorq %r8,%r13 + addq %r15,%r12 + + movq %rax,%r15 + addq (%rbp),%r12 + xorq %rax,%r14 + + xorq %rbx,%r15 + rorq $14,%r13 + movq %rbx,%r11 + + andq %r15,%rdi + rorq $28,%r14 + addq %r13,%r12 + + xorq %rdi,%r11 + addq %r12,%rdx + addq %r12,%r11 + + leaq 8(%rbp),%rbp + addq %r14,%r11 + movq 8(%rsi),%r12 + movq %rdx,%r13 + movq %r11,%r14 + bswapq %r12 + rorq $23,%r13 + movq %r8,%rdi + + xorq %rdx,%r13 + rorq $5,%r14 + xorq %r9,%rdi + + movq %r12,8(%rsp) + xorq %r11,%r14 + andq %rdx,%rdi + + rorq $4,%r13 + addq %r10,%r12 + xorq %r9,%rdi + + rorq $6,%r14 + xorq %rdx,%r13 + addq %rdi,%r12 + + movq %r11,%rdi + addq (%rbp),%r12 + xorq %r11,%r14 + + xorq %rax,%rdi + rorq $14,%r13 + movq %rax,%r10 + + andq %rdi,%r15 + rorq $28,%r14 + addq %r13,%r12 + + xorq %r15,%r10 + addq %r12,%rcx + addq %r12,%r10 + + leaq 24(%rbp),%rbp + addq %r14,%r10 + movq 16(%rsi),%r12 + movq %rcx,%r13 + movq %r10,%r14 + bswapq %r12 + rorq $23,%r13 + movq %rdx,%r15 + + xorq %rcx,%r13 + rorq $5,%r14 + xorq %r8,%r15 + + movq %r12,16(%rsp) + xorq %r10,%r14 + andq %rcx,%r15 + + rorq $4,%r13 + addq %r9,%r12 + xorq %r8,%r15 + + rorq $6,%r14 + xorq %rcx,%r13 + addq %r15,%r12 + + movq %r10,%r15 + addq (%rbp),%r12 + xorq %r10,%r14 + + xorq %r11,%r15 + rorq $14,%r13 + movq %r11,%r9 + + andq %r15,%rdi + rorq $28,%r14 + addq %r13,%r12 + + xorq %rdi,%r9 + addq %r12,%rbx + addq %r12,%r9 + + leaq 8(%rbp),%rbp + addq %r14,%r9 + movq 24(%rsi),%r12 + movq %rbx,%r13 + movq %r9,%r14 + bswapq %r12 + rorq $23,%r13 + movq %rcx,%rdi + + xorq %rbx,%r13 + rorq $5,%r14 + xorq %rdx,%rdi + + movq %r12,24(%rsp) + xorq %r9,%r14 + andq %rbx,%rdi + + rorq $4,%r13 + addq %r8,%r12 + xorq %rdx,%rdi + + rorq $6,%r14 + xorq %rbx,%r13 + addq %rdi,%r12 + + movq %r9,%rdi + addq (%rbp),%r12 + xorq %r9,%r14 + + xorq %r10,%rdi + rorq $14,%r13 + movq %r10,%r8 + + andq %rdi,%r15 + rorq $28,%r14 + addq %r13,%r12 + + xorq %r15,%r8 + addq %r12,%rax + addq %r12,%r8 + + leaq 24(%rbp),%rbp + addq %r14,%r8 + movq 32(%rsi),%r12 + movq %rax,%r13 + movq %r8,%r14 + bswapq %r12 + rorq $23,%r13 + movq %rbx,%r15 + + xorq %rax,%r13 + rorq $5,%r14 + xorq %rcx,%r15 + + movq %r12,32(%rsp) + xorq %r8,%r14 + andq %rax,%r15 + + rorq $4,%r13 + addq %rdx,%r12 + xorq %rcx,%r15 + + rorq $6,%r14 + xorq %rax,%r13 + addq %r15,%r12 + + movq %r8,%r15 + addq (%rbp),%r12 + xorq %r8,%r14 + + xorq %r9,%r15 + rorq $14,%r13 + movq %r9,%rdx + + andq %r15,%rdi + rorq $28,%r14 + addq %r13,%r12 + + xorq %rdi,%rdx + addq %r12,%r11 + addq %r12,%rdx + + leaq 8(%rbp),%rbp + addq %r14,%rdx + movq 40(%rsi),%r12 + movq %r11,%r13 + movq %rdx,%r14 + bswapq %r12 + rorq $23,%r13 + movq %rax,%rdi + + xorq %r11,%r13 + rorq $5,%r14 + xorq %rbx,%rdi + + movq %r12,40(%rsp) + xorq %rdx,%r14 + andq %r11,%rdi + + rorq $4,%r13 + addq %rcx,%r12 + xorq %rbx,%rdi + + rorq $6,%r14 + xorq %r11,%r13 + addq %rdi,%r12 + + movq %rdx,%rdi + addq (%rbp),%r12 + xorq %rdx,%r14 + + xorq %r8,%rdi + rorq $14,%r13 + movq %r8,%rcx + + andq %rdi,%r15 + rorq $28,%r14 + addq %r13,%r12 + + xorq %r15,%rcx + addq %r12,%r10 + addq %r12,%rcx + + leaq 24(%rbp),%rbp + addq %r14,%rcx + movq 48(%rsi),%r12 + movq %r10,%r13 + movq %rcx,%r14 + bswapq %r12 + rorq $23,%r13 + movq %r11,%r15 + + xorq %r10,%r13 + rorq $5,%r14 + xorq %rax,%r15 + + movq %r12,48(%rsp) + xorq %rcx,%r14 + andq %r10,%r15 + + rorq $4,%r13 + addq %rbx,%r12 + xorq %rax,%r15 + + rorq $6,%r14 + xorq %r10,%r13 + addq %r15,%r12 + + movq %rcx,%r15 + addq (%rbp),%r12 + xorq %rcx,%r14 + + xorq %rdx,%r15 + rorq $14,%r13 + movq %rdx,%rbx + + andq %r15,%rdi + rorq $28,%r14 + addq %r13,%r12 + + xorq %rdi,%rbx + addq %r12,%r9 + addq %r12,%rbx + + leaq 8(%rbp),%rbp + addq %r14,%rbx + movq 56(%rsi),%r12 + movq %r9,%r13 + movq %rbx,%r14 + bswapq %r12 + rorq $23,%r13 + movq %r10,%rdi + + xorq %r9,%r13 + rorq $5,%r14 + xorq %r11,%rdi + + movq %r12,56(%rsp) + xorq %rbx,%r14 + andq %r9,%rdi + + rorq $4,%r13 + addq %rax,%r12 + xorq %r11,%rdi + + rorq $6,%r14 + xorq %r9,%r13 + addq %rdi,%r12 + + movq %rbx,%rdi + addq (%rbp),%r12 + xorq %rbx,%r14 + + xorq %rcx,%rdi + rorq $14,%r13 + movq %rcx,%rax + + andq %rdi,%r15 + rorq $28,%r14 + addq %r13,%r12 + + xorq %r15,%rax + addq %r12,%r8 + addq %r12,%rax + + leaq 24(%rbp),%rbp + addq %r14,%rax + movq 64(%rsi),%r12 + movq %r8,%r13 + movq %rax,%r14 + bswapq %r12 + rorq $23,%r13 + movq %r9,%r15 + + xorq %r8,%r13 + rorq $5,%r14 + xorq %r10,%r15 + + movq %r12,64(%rsp) + xorq %rax,%r14 + andq %r8,%r15 + + rorq $4,%r13 + addq %r11,%r12 + xorq %r10,%r15 + + rorq $6,%r14 + xorq %r8,%r13 + addq %r15,%r12 + + movq %rax,%r15 + addq (%rbp),%r12 + xorq %rax,%r14 + + xorq %rbx,%r15 + rorq $14,%r13 + movq %rbx,%r11 + + andq %r15,%rdi + rorq $28,%r14 + addq %r13,%r12 + + xorq %rdi,%r11 + addq %r12,%rdx + addq %r12,%r11 + + leaq 8(%rbp),%rbp + addq %r14,%r11 + movq 72(%rsi),%r12 + movq %rdx,%r13 + movq %r11,%r14 + bswapq %r12 + rorq $23,%r13 + movq %r8,%rdi + + xorq %rdx,%r13 + rorq $5,%r14 + xorq %r9,%rdi + + movq %r12,72(%rsp) + xorq %r11,%r14 + andq %rdx,%rdi + + rorq $4,%r13 + addq %r10,%r12 + xorq %r9,%rdi + + rorq $6,%r14 + xorq %rdx,%r13 + addq %rdi,%r12 + + movq %r11,%rdi + addq (%rbp),%r12 + xorq %r11,%r14 + + xorq %rax,%rdi + rorq $14,%r13 + movq %rax,%r10 + + andq %rdi,%r15 + rorq $28,%r14 + addq %r13,%r12 + + xorq %r15,%r10 + addq %r12,%rcx + addq %r12,%r10 + + leaq 24(%rbp),%rbp + addq %r14,%r10 + movq 80(%rsi),%r12 + movq %rcx,%r13 + movq %r10,%r14 + bswapq %r12 + rorq $23,%r13 + movq %rdx,%r15 + + xorq %rcx,%r13 + rorq $5,%r14 + xorq %r8,%r15 + + movq %r12,80(%rsp) + xorq %r10,%r14 + andq %rcx,%r15 + + rorq $4,%r13 + addq %r9,%r12 + xorq %r8,%r15 + + rorq $6,%r14 + xorq %rcx,%r13 + addq %r15,%r12 + + movq %r10,%r15 + addq (%rbp),%r12 + xorq %r10,%r14 + + xorq %r11,%r15 + rorq $14,%r13 + movq %r11,%r9 + + andq %r15,%rdi + rorq $28,%r14 + addq %r13,%r12 + + xorq %rdi,%r9 + addq %r12,%rbx + addq %r12,%r9 + + leaq 8(%rbp),%rbp + addq %r14,%r9 + movq 88(%rsi),%r12 + movq %rbx,%r13 + movq %r9,%r14 + bswapq %r12 + rorq $23,%r13 + movq %rcx,%rdi + + xorq %rbx,%r13 + rorq $5,%r14 + xorq %rdx,%rdi + + movq %r12,88(%rsp) + xorq %r9,%r14 + andq %rbx,%rdi + + rorq $4,%r13 + addq %r8,%r12 + xorq %rdx,%rdi + + rorq $6,%r14 + xorq %rbx,%r13 + addq %rdi,%r12 + + movq %r9,%rdi + addq (%rbp),%r12 + xorq %r9,%r14 + + xorq %r10,%rdi + rorq $14,%r13 + movq %r10,%r8 + + andq %rdi,%r15 + rorq $28,%r14 + addq %r13,%r12 + + xorq %r15,%r8 + addq %r12,%rax + addq %r12,%r8 + + leaq 24(%rbp),%rbp + addq %r14,%r8 + movq 96(%rsi),%r12 + movq %rax,%r13 + movq %r8,%r14 + bswapq %r12 + rorq $23,%r13 + movq %rbx,%r15 + + xorq %rax,%r13 + rorq $5,%r14 + xorq %rcx,%r15 + + movq %r12,96(%rsp) + xorq %r8,%r14 + andq %rax,%r15 + + rorq $4,%r13 + addq %rdx,%r12 + xorq %rcx,%r15 + + rorq $6,%r14 + xorq %rax,%r13 + addq %r15,%r12 + + movq %r8,%r15 + addq (%rbp),%r12 + xorq %r8,%r14 + + xorq %r9,%r15 + rorq $14,%r13 + movq %r9,%rdx + + andq %r15,%rdi + rorq $28,%r14 + addq %r13,%r12 + + xorq %rdi,%rdx + addq %r12,%r11 + addq %r12,%rdx + + leaq 8(%rbp),%rbp + addq %r14,%rdx + movq 104(%rsi),%r12 + movq %r11,%r13 + movq %rdx,%r14 + bswapq %r12 + rorq $23,%r13 + movq %rax,%rdi + + xorq %r11,%r13 + rorq $5,%r14 + xorq %rbx,%rdi + + movq %r12,104(%rsp) + xorq %rdx,%r14 + andq %r11,%rdi + + rorq $4,%r13 + addq %rcx,%r12 + xorq %rbx,%rdi + + rorq $6,%r14 + xorq %r11,%r13 + addq %rdi,%r12 + + movq %rdx,%rdi + addq (%rbp),%r12 + xorq %rdx,%r14 + + xorq %r8,%rdi + rorq $14,%r13 + movq %r8,%rcx + + andq %rdi,%r15 + rorq $28,%r14 + addq %r13,%r12 + + xorq %r15,%rcx + addq %r12,%r10 + addq %r12,%rcx + + leaq 24(%rbp),%rbp + addq %r14,%rcx + movq 112(%rsi),%r12 + movq %r10,%r13 + movq %rcx,%r14 + bswapq %r12 + rorq $23,%r13 + movq %r11,%r15 + + xorq %r10,%r13 + rorq $5,%r14 + xorq %rax,%r15 + + movq %r12,112(%rsp) + xorq %rcx,%r14 + andq %r10,%r15 + + rorq $4,%r13 + addq %rbx,%r12 + xorq %rax,%r15 + + rorq $6,%r14 + xorq %r10,%r13 + addq %r15,%r12 + + movq %rcx,%r15 + addq (%rbp),%r12 + xorq %rcx,%r14 + + xorq %rdx,%r15 + rorq $14,%r13 + movq %rdx,%rbx + + andq %r15,%rdi + rorq $28,%r14 + addq %r13,%r12 + + xorq %rdi,%rbx + addq %r12,%r9 + addq %r12,%rbx + + leaq 8(%rbp),%rbp + addq %r14,%rbx + movq 120(%rsi),%r12 + movq %r9,%r13 + movq %rbx,%r14 + bswapq %r12 + rorq $23,%r13 + movq %r10,%rdi + + xorq %r9,%r13 + rorq $5,%r14 + xorq %r11,%rdi + + movq %r12,120(%rsp) + xorq %rbx,%r14 + andq %r9,%rdi + + rorq $4,%r13 + addq %rax,%r12 + xorq %r11,%rdi + + rorq $6,%r14 + xorq %r9,%r13 + addq %rdi,%r12 + + movq %rbx,%rdi + addq (%rbp),%r12 + xorq %rbx,%r14 + + xorq %rcx,%rdi + rorq $14,%r13 + movq %rcx,%rax + + andq %rdi,%r15 + rorq $28,%r14 + addq %r13,%r12 + + xorq %r15,%rax + addq %r12,%r8 + addq %r12,%rax + + leaq 24(%rbp),%rbp + jmp .Lrounds_16_xx +.align 16 +.Lrounds_16_xx: + movq 8(%rsp),%r13 + movq 112(%rsp),%r15 + + movq %r13,%r12 + rorq $7,%r13 + addq %r14,%rax + movq %r15,%r14 + rorq $42,%r15 + + xorq %r12,%r13 + shrq $7,%r12 + rorq $1,%r13 + xorq %r14,%r15 + shrq $6,%r14 + + rorq $19,%r15 + xorq %r13,%r12 + xorq %r14,%r15 + addq 72(%rsp),%r12 + + addq 0(%rsp),%r12 + movq %r8,%r13 + addq %r15,%r12 + movq %rax,%r14 + rorq $23,%r13 + movq %r9,%r15 + + xorq %r8,%r13 + rorq $5,%r14 + xorq %r10,%r15 + + movq %r12,0(%rsp) + xorq %rax,%r14 + andq %r8,%r15 + + rorq $4,%r13 + addq %r11,%r12 + xorq %r10,%r15 + + rorq $6,%r14 + xorq %r8,%r13 + addq %r15,%r12 + + movq %rax,%r15 + addq (%rbp),%r12 + xorq %rax,%r14 + + xorq %rbx,%r15 + rorq $14,%r13 + movq %rbx,%r11 + + andq %r15,%rdi + rorq $28,%r14 + addq %r13,%r12 + + xorq %rdi,%r11 + addq %r12,%rdx + addq %r12,%r11 + + leaq 8(%rbp),%rbp + movq 16(%rsp),%r13 + movq 120(%rsp),%rdi + + movq %r13,%r12 + rorq $7,%r13 + addq %r14,%r11 + movq %rdi,%r14 + rorq $42,%rdi + + xorq %r12,%r13 + shrq $7,%r12 + rorq $1,%r13 + xorq %r14,%rdi + shrq $6,%r14 + + rorq $19,%rdi + xorq %r13,%r12 + xorq %r14,%rdi + addq 80(%rsp),%r12 + + addq 8(%rsp),%r12 + movq %rdx,%r13 + addq %rdi,%r12 + movq %r11,%r14 + rorq $23,%r13 + movq %r8,%rdi + + xorq %rdx,%r13 + rorq $5,%r14 + xorq %r9,%rdi + + movq %r12,8(%rsp) + xorq %r11,%r14 + andq %rdx,%rdi + + rorq $4,%r13 + addq %r10,%r12 + xorq %r9,%rdi + + rorq $6,%r14 + xorq %rdx,%r13 + addq %rdi,%r12 + + movq %r11,%rdi + addq (%rbp),%r12 + xorq %r11,%r14 + + xorq %rax,%rdi + rorq $14,%r13 + movq %rax,%r10 + + andq %rdi,%r15 + rorq $28,%r14 + addq %r13,%r12 + + xorq %r15,%r10 + addq %r12,%rcx + addq %r12,%r10 + + leaq 24(%rbp),%rbp + movq 24(%rsp),%r13 + movq 0(%rsp),%r15 + + movq %r13,%r12 + rorq $7,%r13 + addq %r14,%r10 + movq %r15,%r14 + rorq $42,%r15 + + xorq %r12,%r13 + shrq $7,%r12 + rorq $1,%r13 + xorq %r14,%r15 + shrq $6,%r14 + + rorq $19,%r15 + xorq %r13,%r12 + xorq %r14,%r15 + addq 88(%rsp),%r12 + + addq 16(%rsp),%r12 + movq %rcx,%r13 + addq %r15,%r12 + movq %r10,%r14 + rorq $23,%r13 + movq %rdx,%r15 + + xorq %rcx,%r13 + rorq $5,%r14 + xorq %r8,%r15 + + movq %r12,16(%rsp) + xorq %r10,%r14 + andq %rcx,%r15 + + rorq $4,%r13 + addq %r9,%r12 + xorq %r8,%r15 + + rorq $6,%r14 + xorq %rcx,%r13 + addq %r15,%r12 + + movq %r10,%r15 + addq (%rbp),%r12 + xorq %r10,%r14 + + xorq %r11,%r15 + rorq $14,%r13 + movq %r11,%r9 + + andq %r15,%rdi + rorq $28,%r14 + addq %r13,%r12 + + xorq %rdi,%r9 + addq %r12,%rbx + addq %r12,%r9 + + leaq 8(%rbp),%rbp + movq 32(%rsp),%r13 + movq 8(%rsp),%rdi + + movq %r13,%r12 + rorq $7,%r13 + addq %r14,%r9 + movq %rdi,%r14 + rorq $42,%rdi + + xorq %r12,%r13 + shrq $7,%r12 + rorq $1,%r13 + xorq %r14,%rdi + shrq $6,%r14 + + rorq $19,%rdi + xorq %r13,%r12 + xorq %r14,%rdi + addq 96(%rsp),%r12 + + addq 24(%rsp),%r12 + movq %rbx,%r13 + addq %rdi,%r12 + movq %r9,%r14 + rorq $23,%r13 + movq %rcx,%rdi + + xorq %rbx,%r13 + rorq $5,%r14 + xorq %rdx,%rdi + + movq %r12,24(%rsp) + xorq %r9,%r14 + andq %rbx,%rdi + + rorq $4,%r13 + addq %r8,%r12 + xorq %rdx,%rdi + + rorq $6,%r14 + xorq %rbx,%r13 + addq %rdi,%r12 + + movq %r9,%rdi + addq (%rbp),%r12 + xorq %r9,%r14 + + xorq %r10,%rdi + rorq $14,%r13 + movq %r10,%r8 + + andq %rdi,%r15 + rorq $28,%r14 + addq %r13,%r12 + + xorq %r15,%r8 + addq %r12,%rax + addq %r12,%r8 + + leaq 24(%rbp),%rbp + movq 40(%rsp),%r13 + movq 16(%rsp),%r15 + + movq %r13,%r12 + rorq $7,%r13 + addq %r14,%r8 + movq %r15,%r14 + rorq $42,%r15 + + xorq %r12,%r13 + shrq $7,%r12 + rorq $1,%r13 + xorq %r14,%r15 + shrq $6,%r14 + + rorq $19,%r15 + xorq %r13,%r12 + xorq %r14,%r15 + addq 104(%rsp),%r12 + + addq 32(%rsp),%r12 + movq %rax,%r13 + addq %r15,%r12 + movq %r8,%r14 + rorq $23,%r13 + movq %rbx,%r15 + + xorq %rax,%r13 + rorq $5,%r14 + xorq %rcx,%r15 + + movq %r12,32(%rsp) + xorq %r8,%r14 + andq %rax,%r15 + + rorq $4,%r13 + addq %rdx,%r12 + xorq %rcx,%r15 + + rorq $6,%r14 + xorq %rax,%r13 + addq %r15,%r12 + + movq %r8,%r15 + addq (%rbp),%r12 + xorq %r8,%r14 + + xorq %r9,%r15 + rorq $14,%r13 + movq %r9,%rdx + + andq %r15,%rdi + rorq $28,%r14 + addq %r13,%r12 + + xorq %rdi,%rdx + addq %r12,%r11 + addq %r12,%rdx + + leaq 8(%rbp),%rbp + movq 48(%rsp),%r13 + movq 24(%rsp),%rdi + + movq %r13,%r12 + rorq $7,%r13 + addq %r14,%rdx + movq %rdi,%r14 + rorq $42,%rdi + + xorq %r12,%r13 + shrq $7,%r12 + rorq $1,%r13 + xorq %r14,%rdi + shrq $6,%r14 + + rorq $19,%rdi + xorq %r13,%r12 + xorq %r14,%rdi + addq 112(%rsp),%r12 + + addq 40(%rsp),%r12 + movq %r11,%r13 + addq %rdi,%r12 + movq %rdx,%r14 + rorq $23,%r13 + movq %rax,%rdi + + xorq %r11,%r13 + rorq $5,%r14 + xorq %rbx,%rdi + + movq %r12,40(%rsp) + xorq %rdx,%r14 + andq %r11,%rdi + + rorq $4,%r13 + addq %rcx,%r12 + xorq %rbx,%rdi + + rorq $6,%r14 + xorq %r11,%r13 + addq %rdi,%r12 + + movq %rdx,%rdi + addq (%rbp),%r12 + xorq %rdx,%r14 + + xorq %r8,%rdi + rorq $14,%r13 + movq %r8,%rcx + + andq %rdi,%r15 + rorq $28,%r14 + addq %r13,%r12 + + xorq %r15,%rcx + addq %r12,%r10 + addq %r12,%rcx + + leaq 24(%rbp),%rbp + movq 56(%rsp),%r13 + movq 32(%rsp),%r15 + + movq %r13,%r12 + rorq $7,%r13 + addq %r14,%rcx + movq %r15,%r14 + rorq $42,%r15 + + xorq %r12,%r13 + shrq $7,%r12 + rorq $1,%r13 + xorq %r14,%r15 + shrq $6,%r14 + + rorq $19,%r15 + xorq %r13,%r12 + xorq %r14,%r15 + addq 120(%rsp),%r12 + + addq 48(%rsp),%r12 + movq %r10,%r13 + addq %r15,%r12 + movq %rcx,%r14 + rorq $23,%r13 + movq %r11,%r15 + + xorq %r10,%r13 + rorq $5,%r14 + xorq %rax,%r15 + + movq %r12,48(%rsp) + xorq %rcx,%r14 + andq %r10,%r15 + + rorq $4,%r13 + addq %rbx,%r12 + xorq %rax,%r15 + + rorq $6,%r14 + xorq %r10,%r13 + addq %r15,%r12 + + movq %rcx,%r15 + addq (%rbp),%r12 + xorq %rcx,%r14 + + xorq %rdx,%r15 + rorq $14,%r13 + movq %rdx,%rbx + + andq %r15,%rdi + rorq $28,%r14 + addq %r13,%r12 + + xorq %rdi,%rbx + addq %r12,%r9 + addq %r12,%rbx + + leaq 8(%rbp),%rbp + movq 64(%rsp),%r13 + movq 40(%rsp),%rdi + + movq %r13,%r12 + rorq $7,%r13 + addq %r14,%rbx + movq %rdi,%r14 + rorq $42,%rdi + + xorq %r12,%r13 + shrq $7,%r12 + rorq $1,%r13 + xorq %r14,%rdi + shrq $6,%r14 + + rorq $19,%rdi + xorq %r13,%r12 + xorq %r14,%rdi + addq 0(%rsp),%r12 + + addq 56(%rsp),%r12 + movq %r9,%r13 + addq %rdi,%r12 + movq %rbx,%r14 + rorq $23,%r13 + movq %r10,%rdi + + xorq %r9,%r13 + rorq $5,%r14 + xorq %r11,%rdi + + movq %r12,56(%rsp) + xorq %rbx,%r14 + andq %r9,%rdi + + rorq $4,%r13 + addq %rax,%r12 + xorq %r11,%rdi + + rorq $6,%r14 + xorq %r9,%r13 + addq %rdi,%r12 + + movq %rbx,%rdi + addq (%rbp),%r12 + xorq %rbx,%r14 + + xorq %rcx,%rdi + rorq $14,%r13 + movq %rcx,%rax + + andq %rdi,%r15 + rorq $28,%r14 + addq %r13,%r12 + + xorq %r15,%rax + addq %r12,%r8 + addq %r12,%rax + + leaq 24(%rbp),%rbp + movq 72(%rsp),%r13 + movq 48(%rsp),%r15 + + movq %r13,%r12 + rorq $7,%r13 + addq %r14,%rax + movq %r15,%r14 + rorq $42,%r15 + + xorq %r12,%r13 + shrq $7,%r12 + rorq $1,%r13 + xorq %r14,%r15 + shrq $6,%r14 + + rorq $19,%r15 + xorq %r13,%r12 + xorq %r14,%r15 + addq 8(%rsp),%r12 + + addq 64(%rsp),%r12 + movq %r8,%r13 + addq %r15,%r12 + movq %rax,%r14 + rorq $23,%r13 + movq %r9,%r15 + + xorq %r8,%r13 + rorq $5,%r14 + xorq %r10,%r15 + + movq %r12,64(%rsp) + xorq %rax,%r14 + andq %r8,%r15 + + rorq $4,%r13 + addq %r11,%r12 + xorq %r10,%r15 + + rorq $6,%r14 + xorq %r8,%r13 + addq %r15,%r12 + + movq %rax,%r15 + addq (%rbp),%r12 + xorq %rax,%r14 + + xorq %rbx,%r15 + rorq $14,%r13 + movq %rbx,%r11 + + andq %r15,%rdi + rorq $28,%r14 + addq %r13,%r12 + + xorq %rdi,%r11 + addq %r12,%rdx + addq %r12,%r11 + + leaq 8(%rbp),%rbp + movq 80(%rsp),%r13 + movq 56(%rsp),%rdi + + movq %r13,%r12 + rorq $7,%r13 + addq %r14,%r11 + movq %rdi,%r14 + rorq $42,%rdi + + xorq %r12,%r13 + shrq $7,%r12 + rorq $1,%r13 + xorq %r14,%rdi + shrq $6,%r14 + + rorq $19,%rdi + xorq %r13,%r12 + xorq %r14,%rdi + addq 16(%rsp),%r12 + + addq 72(%rsp),%r12 + movq %rdx,%r13 + addq %rdi,%r12 + movq %r11,%r14 + rorq $23,%r13 + movq %r8,%rdi + + xorq %rdx,%r13 + rorq $5,%r14 + xorq %r9,%rdi + + movq %r12,72(%rsp) + xorq %r11,%r14 + andq %rdx,%rdi + + rorq $4,%r13 + addq %r10,%r12 + xorq %r9,%rdi + + rorq $6,%r14 + xorq %rdx,%r13 + addq %rdi,%r12 + + movq %r11,%rdi + addq (%rbp),%r12 + xorq %r11,%r14 + + xorq %rax,%rdi + rorq $14,%r13 + movq %rax,%r10 + + andq %rdi,%r15 + rorq $28,%r14 + addq %r13,%r12 + + xorq %r15,%r10 + addq %r12,%rcx + addq %r12,%r10 + + leaq 24(%rbp),%rbp + movq 88(%rsp),%r13 + movq 64(%rsp),%r15 + + movq %r13,%r12 + rorq $7,%r13 + addq %r14,%r10 + movq %r15,%r14 + rorq $42,%r15 + + xorq %r12,%r13 + shrq $7,%r12 + rorq $1,%r13 + xorq %r14,%r15 + shrq $6,%r14 + + rorq $19,%r15 + xorq %r13,%r12 + xorq %r14,%r15 + addq 24(%rsp),%r12 + + addq 80(%rsp),%r12 + movq %rcx,%r13 + addq %r15,%r12 + movq %r10,%r14 + rorq $23,%r13 + movq %rdx,%r15 + + xorq %rcx,%r13 + rorq $5,%r14 + xorq %r8,%r15 + + movq %r12,80(%rsp) + xorq %r10,%r14 + andq %rcx,%r15 + + rorq $4,%r13 + addq %r9,%r12 + xorq %r8,%r15 + + rorq $6,%r14 + xorq %rcx,%r13 + addq %r15,%r12 + + movq %r10,%r15 + addq (%rbp),%r12 + xorq %r10,%r14 + + xorq %r11,%r15 + rorq $14,%r13 + movq %r11,%r9 + + andq %r15,%rdi + rorq $28,%r14 + addq %r13,%r12 + + xorq %rdi,%r9 + addq %r12,%rbx + addq %r12,%r9 + + leaq 8(%rbp),%rbp + movq 96(%rsp),%r13 + movq 72(%rsp),%rdi + + movq %r13,%r12 + rorq $7,%r13 + addq %r14,%r9 + movq %rdi,%r14 + rorq $42,%rdi + + xorq %r12,%r13 + shrq $7,%r12 + rorq $1,%r13 + xorq %r14,%rdi + shrq $6,%r14 + + rorq $19,%rdi + xorq %r13,%r12 + xorq %r14,%rdi + addq 32(%rsp),%r12 + + addq 88(%rsp),%r12 + movq %rbx,%r13 + addq %rdi,%r12 + movq %r9,%r14 + rorq $23,%r13 + movq %rcx,%rdi + + xorq %rbx,%r13 + rorq $5,%r14 + xorq %rdx,%rdi + + movq %r12,88(%rsp) + xorq %r9,%r14 + andq %rbx,%rdi + + rorq $4,%r13 + addq %r8,%r12 + xorq %rdx,%rdi + + rorq $6,%r14 + xorq %rbx,%r13 + addq %rdi,%r12 + + movq %r9,%rdi + addq (%rbp),%r12 + xorq %r9,%r14 + + xorq %r10,%rdi + rorq $14,%r13 + movq %r10,%r8 + + andq %rdi,%r15 + rorq $28,%r14 + addq %r13,%r12 + + xorq %r15,%r8 + addq %r12,%rax + addq %r12,%r8 + + leaq 24(%rbp),%rbp + movq 104(%rsp),%r13 + movq 80(%rsp),%r15 + + movq %r13,%r12 + rorq $7,%r13 + addq %r14,%r8 + movq %r15,%r14 + rorq $42,%r15 + + xorq %r12,%r13 + shrq $7,%r12 + rorq $1,%r13 + xorq %r14,%r15 + shrq $6,%r14 + + rorq $19,%r15 + xorq %r13,%r12 + xorq %r14,%r15 + addq 40(%rsp),%r12 + + addq 96(%rsp),%r12 + movq %rax,%r13 + addq %r15,%r12 + movq %r8,%r14 + rorq $23,%r13 + movq %rbx,%r15 + + xorq %rax,%r13 + rorq $5,%r14 + xorq %rcx,%r15 + + movq %r12,96(%rsp) + xorq %r8,%r14 + andq %rax,%r15 + + rorq $4,%r13 + addq %rdx,%r12 + xorq %rcx,%r15 + + rorq $6,%r14 + xorq %rax,%r13 + addq %r15,%r12 + + movq %r8,%r15 + addq (%rbp),%r12 + xorq %r8,%r14 + + xorq %r9,%r15 + rorq $14,%r13 + movq %r9,%rdx + + andq %r15,%rdi + rorq $28,%r14 + addq %r13,%r12 + + xorq %rdi,%rdx + addq %r12,%r11 + addq %r12,%rdx + + leaq 8(%rbp),%rbp + movq 112(%rsp),%r13 + movq 88(%rsp),%rdi + + movq %r13,%r12 + rorq $7,%r13 + addq %r14,%rdx + movq %rdi,%r14 + rorq $42,%rdi + + xorq %r12,%r13 + shrq $7,%r12 + rorq $1,%r13 + xorq %r14,%rdi + shrq $6,%r14 + + rorq $19,%rdi + xorq %r13,%r12 + xorq %r14,%rdi + addq 48(%rsp),%r12 + + addq 104(%rsp),%r12 + movq %r11,%r13 + addq %rdi,%r12 + movq %rdx,%r14 + rorq $23,%r13 + movq %rax,%rdi + + xorq %r11,%r13 + rorq $5,%r14 + xorq %rbx,%rdi + + movq %r12,104(%rsp) + xorq %rdx,%r14 + andq %r11,%rdi + + rorq $4,%r13 + addq %rcx,%r12 + xorq %rbx,%rdi + + rorq $6,%r14 + xorq %r11,%r13 + addq %rdi,%r12 + + movq %rdx,%rdi + addq (%rbp),%r12 + xorq %rdx,%r14 + + xorq %r8,%rdi + rorq $14,%r13 + movq %r8,%rcx + + andq %rdi,%r15 + rorq $28,%r14 + addq %r13,%r12 + + xorq %r15,%rcx + addq %r12,%r10 + addq %r12,%rcx + + leaq 24(%rbp),%rbp + movq 120(%rsp),%r13 + movq 96(%rsp),%r15 + + movq %r13,%r12 + rorq $7,%r13 + addq %r14,%rcx + movq %r15,%r14 + rorq $42,%r15 + + xorq %r12,%r13 + shrq $7,%r12 + rorq $1,%r13 + xorq %r14,%r15 + shrq $6,%r14 + + rorq $19,%r15 + xorq %r13,%r12 + xorq %r14,%r15 + addq 56(%rsp),%r12 + + addq 112(%rsp),%r12 + movq %r10,%r13 + addq %r15,%r12 + movq %rcx,%r14 + rorq $23,%r13 + movq %r11,%r15 + + xorq %r10,%r13 + rorq $5,%r14 + xorq %rax,%r15 + + movq %r12,112(%rsp) + xorq %rcx,%r14 + andq %r10,%r15 + + rorq $4,%r13 + addq %rbx,%r12 + xorq %rax,%r15 + + rorq $6,%r14 + xorq %r10,%r13 + addq %r15,%r12 + + movq %rcx,%r15 + addq (%rbp),%r12 + xorq %rcx,%r14 + + xorq %rdx,%r15 + rorq $14,%r13 + movq %rdx,%rbx + + andq %r15,%rdi + rorq $28,%r14 + addq %r13,%r12 + + xorq %rdi,%rbx + addq %r12,%r9 + addq %r12,%rbx + + leaq 8(%rbp),%rbp + movq 0(%rsp),%r13 + movq 104(%rsp),%rdi + + movq %r13,%r12 + rorq $7,%r13 + addq %r14,%rbx + movq %rdi,%r14 + rorq $42,%rdi + + xorq %r12,%r13 + shrq $7,%r12 + rorq $1,%r13 + xorq %r14,%rdi + shrq $6,%r14 + + rorq $19,%rdi + xorq %r13,%r12 + xorq %r14,%rdi + addq 64(%rsp),%r12 + + addq 120(%rsp),%r12 + movq %r9,%r13 + addq %rdi,%r12 + movq %rbx,%r14 + rorq $23,%r13 + movq %r10,%rdi + + xorq %r9,%r13 + rorq $5,%r14 + xorq %r11,%rdi + + movq %r12,120(%rsp) + xorq %rbx,%r14 + andq %r9,%rdi + + rorq $4,%r13 + addq %rax,%r12 + xorq %r11,%rdi + + rorq $6,%r14 + xorq %r9,%r13 + addq %rdi,%r12 + + movq %rbx,%rdi + addq (%rbp),%r12 + xorq %rbx,%r14 + + xorq %rcx,%rdi + rorq $14,%r13 + movq %rcx,%rax + + andq %rdi,%r15 + rorq $28,%r14 + addq %r13,%r12 + + xorq %r15,%rax + addq %r12,%r8 + addq %r12,%rax + + leaq 24(%rbp),%rbp + cmpb $0,7(%rbp) + jnz .Lrounds_16_xx + + movq 128+0(%rsp),%rdi + addq %r14,%rax + leaq 128(%rsi),%rsi + + addq 0(%rdi),%rax + addq 8(%rdi),%rbx + addq 16(%rdi),%rcx + addq 24(%rdi),%rdx + addq 32(%rdi),%r8 + addq 40(%rdi),%r9 + addq 48(%rdi),%r10 + addq 56(%rdi),%r11 + + cmpq 128+16(%rsp),%rsi + + movq %rax,0(%rdi) + movq %rbx,8(%rdi) + movq %rcx,16(%rdi) + movq %rdx,24(%rdi) + movq %r8,32(%rdi) + movq %r9,40(%rdi) + movq %r10,48(%rdi) + movq %r11,56(%rdi) + jb .Lloop + + movq 128+24(%rsp),%rsi + movq (%rsi),%r15 + movq 8(%rsi),%r14 + movq 16(%rsi),%r13 + movq 24(%rsi),%r12 + movq 32(%rsi),%rbp + movq 40(%rsi),%rbx + leaq 48(%rsi),%rsp +.Lepilogue: + .byte 0xf3,0xc3 +.size sha512_block_data_order,.-sha512_block_data_order +.align 64 +.type K512,@object +K512: +.quad 0x428a2f98d728ae22,0x7137449123ef65cd +.quad 0x428a2f98d728ae22,0x7137449123ef65cd +.quad 0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc +.quad 0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc +.quad 0x3956c25bf348b538,0x59f111f1b605d019 +.quad 0x3956c25bf348b538,0x59f111f1b605d019 +.quad 0x923f82a4af194f9b,0xab1c5ed5da6d8118 +.quad 0x923f82a4af194f9b,0xab1c5ed5da6d8118 +.quad 0xd807aa98a3030242,0x12835b0145706fbe +.quad 0xd807aa98a3030242,0x12835b0145706fbe +.quad 0x243185be4ee4b28c,0x550c7dc3d5ffb4e2 +.quad 0x243185be4ee4b28c,0x550c7dc3d5ffb4e2 +.quad 0x72be5d74f27b896f,0x80deb1fe3b1696b1 +.quad 0x72be5d74f27b896f,0x80deb1fe3b1696b1 +.quad 0x9bdc06a725c71235,0xc19bf174cf692694 +.quad 0x9bdc06a725c71235,0xc19bf174cf692694 +.quad 0xe49b69c19ef14ad2,0xefbe4786384f25e3 +.quad 0xe49b69c19ef14ad2,0xefbe4786384f25e3 +.quad 0x0fc19dc68b8cd5b5,0x240ca1cc77ac9c65 +.quad 0x0fc19dc68b8cd5b5,0x240ca1cc77ac9c65 +.quad 0x2de92c6f592b0275,0x4a7484aa6ea6e483 +.quad 0x2de92c6f592b0275,0x4a7484aa6ea6e483 +.quad 0x5cb0a9dcbd41fbd4,0x76f988da831153b5 +.quad 0x5cb0a9dcbd41fbd4,0x76f988da831153b5 +.quad 0x983e5152ee66dfab,0xa831c66d2db43210 +.quad 0x983e5152ee66dfab,0xa831c66d2db43210 +.quad 0xb00327c898fb213f,0xbf597fc7beef0ee4 +.quad 0xb00327c898fb213f,0xbf597fc7beef0ee4 +.quad 0xc6e00bf33da88fc2,0xd5a79147930aa725 +.quad 0xc6e00bf33da88fc2,0xd5a79147930aa725 +.quad 0x06ca6351e003826f,0x142929670a0e6e70 +.quad 0x06ca6351e003826f,0x142929670a0e6e70 +.quad 0x27b70a8546d22ffc,0x2e1b21385c26c926 +.quad 0x27b70a8546d22ffc,0x2e1b21385c26c926 +.quad 0x4d2c6dfc5ac42aed,0x53380d139d95b3df +.quad 0x4d2c6dfc5ac42aed,0x53380d139d95b3df +.quad 0x650a73548baf63de,0x766a0abb3c77b2a8 +.quad 0x650a73548baf63de,0x766a0abb3c77b2a8 +.quad 0x81c2c92e47edaee6,0x92722c851482353b +.quad 0x81c2c92e47edaee6,0x92722c851482353b +.quad 0xa2bfe8a14cf10364,0xa81a664bbc423001 +.quad 0xa2bfe8a14cf10364,0xa81a664bbc423001 +.quad 0xc24b8b70d0f89791,0xc76c51a30654be30 +.quad 0xc24b8b70d0f89791,0xc76c51a30654be30 +.quad 0xd192e819d6ef5218,0xd69906245565a910 +.quad 0xd192e819d6ef5218,0xd69906245565a910 +.quad 0xf40e35855771202a,0x106aa07032bbd1b8 +.quad 0xf40e35855771202a,0x106aa07032bbd1b8 +.quad 0x19a4c116b8d2d0c8,0x1e376c085141ab53 +.quad 0x19a4c116b8d2d0c8,0x1e376c085141ab53 +.quad 0x2748774cdf8eeb99,0x34b0bcb5e19b48a8 +.quad 0x2748774cdf8eeb99,0x34b0bcb5e19b48a8 +.quad 0x391c0cb3c5c95a63,0x4ed8aa4ae3418acb +.quad 0x391c0cb3c5c95a63,0x4ed8aa4ae3418acb +.quad 0x5b9cca4f7763e373,0x682e6ff3d6b2b8a3 +.quad 0x5b9cca4f7763e373,0x682e6ff3d6b2b8a3 +.quad 0x748f82ee5defb2fc,0x78a5636f43172f60 +.quad 0x748f82ee5defb2fc,0x78a5636f43172f60 +.quad 0x84c87814a1f0ab72,0x8cc702081a6439ec +.quad 0x84c87814a1f0ab72,0x8cc702081a6439ec +.quad 0x90befffa23631e28,0xa4506cebde82bde9 +.quad 0x90befffa23631e28,0xa4506cebde82bde9 +.quad 0xbef9a3f7b2c67915,0xc67178f2e372532b +.quad 0xbef9a3f7b2c67915,0xc67178f2e372532b +.quad 0xca273eceea26619c,0xd186b8c721c0c207 +.quad 0xca273eceea26619c,0xd186b8c721c0c207 +.quad 0xeada7dd6cde0eb1e,0xf57d4f7fee6ed178 +.quad 0xeada7dd6cde0eb1e,0xf57d4f7fee6ed178 +.quad 0x06f067aa72176fba,0x0a637dc5a2c898a6 +.quad 0x06f067aa72176fba,0x0a637dc5a2c898a6 +.quad 0x113f9804bef90dae,0x1b710b35131c471b +.quad 0x113f9804bef90dae,0x1b710b35131c471b +.quad 0x28db77f523047d84,0x32caab7b40c72493 +.quad 0x28db77f523047d84,0x32caab7b40c72493 +.quad 0x3c9ebe0a15c9bebc,0x431d67c49c100d4c +.quad 0x3c9ebe0a15c9bebc,0x431d67c49c100d4c +.quad 0x4cc5d4becb3e42b6,0x597f299cfc657e2a +.quad 0x4cc5d4becb3e42b6,0x597f299cfc657e2a +.quad 0x5fcb6fab3ad6faec,0x6c44198c4a475817 +.quad 0x5fcb6fab3ad6faec,0x6c44198c4a475817 + +.quad 0x0001020304050607,0x08090a0b0c0d0e0f +.quad 0x0001020304050607,0x08090a0b0c0d0e0f +.byte 83,72,65,53,49,50,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 diff --git a/deps/openssl/asm_obsolete/x64-elf-gas/whrlpool/wp-x86_64.s b/deps/openssl/asm_obsolete/x64-elf-gas/whrlpool/wp-x86_64.s new file mode 100644 index 00000000000000..f83130ea68634b --- /dev/null +++ b/deps/openssl/asm_obsolete/x64-elf-gas/whrlpool/wp-x86_64.s @@ -0,0 +1,861 @@ +.text + +.globl whirlpool_block +.type whirlpool_block,@function +.align 16 +whirlpool_block: + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + + movq %rsp,%r11 + subq $128+40,%rsp + andq $-64,%rsp + + leaq 128(%rsp),%r10 + movq %rdi,0(%r10) + movq %rsi,8(%r10) + movq %rdx,16(%r10) + movq %r11,32(%r10) +.Lprologue: + + movq %r10,%rbx + leaq .Ltable(%rip),%rbp + + xorq %rcx,%rcx + xorq %rdx,%rdx + movq 0(%rdi),%r8 + movq 8(%rdi),%r9 + movq 16(%rdi),%r10 + movq 24(%rdi),%r11 + movq 32(%rdi),%r12 + movq 40(%rdi),%r13 + movq 48(%rdi),%r14 + movq 56(%rdi),%r15 +.Louterloop: + movq %r8,0(%rsp) + movq %r9,8(%rsp) + movq %r10,16(%rsp) + movq %r11,24(%rsp) + movq %r12,32(%rsp) + movq %r13,40(%rsp) + movq %r14,48(%rsp) + movq %r15,56(%rsp) + xorq 0(%rsi),%r8 + xorq 8(%rsi),%r9 + xorq 16(%rsi),%r10 + xorq 24(%rsi),%r11 + xorq 32(%rsi),%r12 + xorq 40(%rsi),%r13 + xorq 48(%rsi),%r14 + xorq 56(%rsi),%r15 + movq %r8,64+0(%rsp) + movq %r9,64+8(%rsp) + movq %r10,64+16(%rsp) + movq %r11,64+24(%rsp) + movq %r12,64+32(%rsp) + movq %r13,64+40(%rsp) + movq %r14,64+48(%rsp) + movq %r15,64+56(%rsp) + xorq %rsi,%rsi + movq %rsi,24(%rbx) + jmp .Lround +.align 16 +.Lround: + movq 4096(%rbp,%rsi,8),%r8 + movl 0(%rsp),%eax + movl 4(%rsp),%ebx + movzbl %al,%ecx + movzbl %ah,%edx + shrl $16,%eax + leaq (%rcx,%rcx,1),%rsi + movzbl %al,%ecx + leaq (%rdx,%rdx,1),%rdi + movzbl %ah,%edx + xorq 0(%rbp,%rsi,8),%r8 + movq 7(%rbp,%rdi,8),%r9 + movl 0+8(%rsp),%eax + leaq (%rcx,%rcx,1),%rsi + movzbl %bl,%ecx + leaq (%rdx,%rdx,1),%rdi + movzbl %bh,%edx + movq 6(%rbp,%rsi,8),%r10 + movq 5(%rbp,%rdi,8),%r11 + shrl $16,%ebx + leaq (%rcx,%rcx,1),%rsi + movzbl %bl,%ecx + leaq (%rdx,%rdx,1),%rdi + movzbl %bh,%edx + movq 4(%rbp,%rsi,8),%r12 + movq 3(%rbp,%rdi,8),%r13 + movl 0+8+4(%rsp),%ebx + leaq (%rcx,%rcx,1),%rsi + movzbl %al,%ecx + leaq (%rdx,%rdx,1),%rdi + movzbl %ah,%edx + movq 2(%rbp,%rsi,8),%r14 + movq 1(%rbp,%rdi,8),%r15 + shrl $16,%eax + leaq (%rcx,%rcx,1),%rsi + movzbl %al,%ecx + leaq (%rdx,%rdx,1),%rdi + movzbl %ah,%edx + xorq 0(%rbp,%rsi,8),%r9 + xorq 7(%rbp,%rdi,8),%r10 + movl 8+8(%rsp),%eax + leaq (%rcx,%rcx,1),%rsi + movzbl %bl,%ecx + leaq (%rdx,%rdx,1),%rdi + movzbl %bh,%edx + xorq 6(%rbp,%rsi,8),%r11 + xorq 5(%rbp,%rdi,8),%r12 + shrl $16,%ebx + leaq (%rcx,%rcx,1),%rsi + movzbl %bl,%ecx + leaq (%rdx,%rdx,1),%rdi + movzbl %bh,%edx + xorq 4(%rbp,%rsi,8),%r13 + xorq 3(%rbp,%rdi,8),%r14 + movl 8+8+4(%rsp),%ebx + leaq (%rcx,%rcx,1),%rsi + movzbl %al,%ecx + leaq (%rdx,%rdx,1),%rdi + movzbl %ah,%edx + xorq 2(%rbp,%rsi,8),%r15 + xorq 1(%rbp,%rdi,8),%r8 + shrl $16,%eax + leaq (%rcx,%rcx,1),%rsi + movzbl %al,%ecx + leaq (%rdx,%rdx,1),%rdi + movzbl %ah,%edx + xorq 0(%rbp,%rsi,8),%r10 + xorq 7(%rbp,%rdi,8),%r11 + movl 16+8(%rsp),%eax + leaq (%rcx,%rcx,1),%rsi + movzbl %bl,%ecx + leaq (%rdx,%rdx,1),%rdi + movzbl %bh,%edx + xorq 6(%rbp,%rsi,8),%r12 + xorq 5(%rbp,%rdi,8),%r13 + shrl $16,%ebx + leaq (%rcx,%rcx,1),%rsi + movzbl %bl,%ecx + leaq (%rdx,%rdx,1),%rdi + movzbl %bh,%edx + xorq 4(%rbp,%rsi,8),%r14 + xorq 3(%rbp,%rdi,8),%r15 + movl 16+8+4(%rsp),%ebx + leaq (%rcx,%rcx,1),%rsi + movzbl %al,%ecx + leaq (%rdx,%rdx,1),%rdi + movzbl %ah,%edx + xorq 2(%rbp,%rsi,8),%r8 + xorq 1(%rbp,%rdi,8),%r9 + shrl $16,%eax + leaq (%rcx,%rcx,1),%rsi + movzbl %al,%ecx + leaq (%rdx,%rdx,1),%rdi + movzbl %ah,%edx + xorq 0(%rbp,%rsi,8),%r11 + xorq 7(%rbp,%rdi,8),%r12 + movl 24+8(%rsp),%eax + leaq (%rcx,%rcx,1),%rsi + movzbl %bl,%ecx + leaq (%rdx,%rdx,1),%rdi + movzbl %bh,%edx + xorq 6(%rbp,%rsi,8),%r13 + xorq 5(%rbp,%rdi,8),%r14 + shrl $16,%ebx + leaq (%rcx,%rcx,1),%rsi + movzbl %bl,%ecx + leaq (%rdx,%rdx,1),%rdi + movzbl %bh,%edx + xorq 4(%rbp,%rsi,8),%r15 + xorq 3(%rbp,%rdi,8),%r8 + movl 24+8+4(%rsp),%ebx + leaq (%rcx,%rcx,1),%rsi + movzbl %al,%ecx + leaq (%rdx,%rdx,1),%rdi + movzbl %ah,%edx + xorq 2(%rbp,%rsi,8),%r9 + xorq 1(%rbp,%rdi,8),%r10 + shrl $16,%eax + leaq (%rcx,%rcx,1),%rsi + movzbl %al,%ecx + leaq (%rdx,%rdx,1),%rdi + movzbl %ah,%edx + xorq 0(%rbp,%rsi,8),%r12 + xorq 7(%rbp,%rdi,8),%r13 + movl 32+8(%rsp),%eax + leaq (%rcx,%rcx,1),%rsi + movzbl %bl,%ecx + leaq (%rdx,%rdx,1),%rdi + movzbl %bh,%edx + xorq 6(%rbp,%rsi,8),%r14 + xorq 5(%rbp,%rdi,8),%r15 + shrl $16,%ebx + leaq (%rcx,%rcx,1),%rsi + movzbl %bl,%ecx + leaq (%rdx,%rdx,1),%rdi + movzbl %bh,%edx + xorq 4(%rbp,%rsi,8),%r8 + xorq 3(%rbp,%rdi,8),%r9 + movl 32+8+4(%rsp),%ebx + leaq (%rcx,%rcx,1),%rsi + movzbl %al,%ecx + leaq (%rdx,%rdx,1),%rdi + movzbl %ah,%edx + xorq 2(%rbp,%rsi,8),%r10 + xorq 1(%rbp,%rdi,8),%r11 + shrl $16,%eax + leaq (%rcx,%rcx,1),%rsi + movzbl %al,%ecx + leaq (%rdx,%rdx,1),%rdi + movzbl %ah,%edx + xorq 0(%rbp,%rsi,8),%r13 + xorq 7(%rbp,%rdi,8),%r14 + movl 40+8(%rsp),%eax + leaq (%rcx,%rcx,1),%rsi + movzbl %bl,%ecx + leaq (%rdx,%rdx,1),%rdi + movzbl %bh,%edx + xorq 6(%rbp,%rsi,8),%r15 + xorq 5(%rbp,%rdi,8),%r8 + shrl $16,%ebx + leaq (%rcx,%rcx,1),%rsi + movzbl %bl,%ecx + leaq (%rdx,%rdx,1),%rdi + movzbl %bh,%edx + xorq 4(%rbp,%rsi,8),%r9 + xorq 3(%rbp,%rdi,8),%r10 + movl 40+8+4(%rsp),%ebx + leaq (%rcx,%rcx,1),%rsi + movzbl %al,%ecx + leaq (%rdx,%rdx,1),%rdi + movzbl %ah,%edx + xorq 2(%rbp,%rsi,8),%r11 + xorq 1(%rbp,%rdi,8),%r12 + shrl $16,%eax + leaq (%rcx,%rcx,1),%rsi + movzbl %al,%ecx + leaq (%rdx,%rdx,1),%rdi + movzbl %ah,%edx + xorq 0(%rbp,%rsi,8),%r14 + xorq 7(%rbp,%rdi,8),%r15 + movl 48+8(%rsp),%eax + leaq (%rcx,%rcx,1),%rsi + movzbl %bl,%ecx + leaq (%rdx,%rdx,1),%rdi + movzbl %bh,%edx + xorq 6(%rbp,%rsi,8),%r8 + xorq 5(%rbp,%rdi,8),%r9 + shrl $16,%ebx + leaq (%rcx,%rcx,1),%rsi + movzbl %bl,%ecx + leaq (%rdx,%rdx,1),%rdi + movzbl %bh,%edx + xorq 4(%rbp,%rsi,8),%r10 + xorq 3(%rbp,%rdi,8),%r11 + movl 48+8+4(%rsp),%ebx + leaq (%rcx,%rcx,1),%rsi + movzbl %al,%ecx + leaq (%rdx,%rdx,1),%rdi + movzbl %ah,%edx + xorq 2(%rbp,%rsi,8),%r12 + xorq 1(%rbp,%rdi,8),%r13 + shrl $16,%eax + leaq (%rcx,%rcx,1),%rsi + movzbl %al,%ecx + leaq (%rdx,%rdx,1),%rdi + movzbl %ah,%edx + xorq 0(%rbp,%rsi,8),%r15 + xorq 7(%rbp,%rdi,8),%r8 + movl 56+8(%rsp),%eax + leaq (%rcx,%rcx,1),%rsi + movzbl %bl,%ecx + leaq (%rdx,%rdx,1),%rdi + movzbl %bh,%edx + xorq 6(%rbp,%rsi,8),%r9 + xorq 5(%rbp,%rdi,8),%r10 + shrl $16,%ebx + leaq (%rcx,%rcx,1),%rsi + movzbl %bl,%ecx + leaq (%rdx,%rdx,1),%rdi + movzbl %bh,%edx + xorq 4(%rbp,%rsi,8),%r11 + xorq 3(%rbp,%rdi,8),%r12 + movl 56+8+4(%rsp),%ebx + leaq (%rcx,%rcx,1),%rsi + movzbl %al,%ecx + leaq (%rdx,%rdx,1),%rdi + movzbl %ah,%edx + xorq 2(%rbp,%rsi,8),%r13 + xorq 1(%rbp,%rdi,8),%r14 + movq %r8,0(%rsp) + movq %r9,8(%rsp) + movq %r10,16(%rsp) + movq %r11,24(%rsp) + movq %r12,32(%rsp) + movq %r13,40(%rsp) + movq %r14,48(%rsp) + movq %r15,56(%rsp) + shrl $16,%eax + leaq (%rcx,%rcx,1),%rsi + movzbl %al,%ecx + leaq (%rdx,%rdx,1),%rdi + movzbl %ah,%edx + xorq 0(%rbp,%rsi,8),%r8 + xorq 7(%rbp,%rdi,8),%r9 + movl 64+0+8(%rsp),%eax + leaq (%rcx,%rcx,1),%rsi + movzbl %bl,%ecx + leaq (%rdx,%rdx,1),%rdi + movzbl %bh,%edx + xorq 6(%rbp,%rsi,8),%r10 + xorq 5(%rbp,%rdi,8),%r11 + shrl $16,%ebx + leaq (%rcx,%rcx,1),%rsi + movzbl %bl,%ecx + leaq (%rdx,%rdx,1),%rdi + movzbl %bh,%edx + xorq 4(%rbp,%rsi,8),%r12 + xorq 3(%rbp,%rdi,8),%r13 + movl 64+0+8+4(%rsp),%ebx + leaq (%rcx,%rcx,1),%rsi + movzbl %al,%ecx + leaq (%rdx,%rdx,1),%rdi + movzbl %ah,%edx + xorq 2(%rbp,%rsi,8),%r14 + xorq 1(%rbp,%rdi,8),%r15 + shrl $16,%eax + leaq (%rcx,%rcx,1),%rsi + movzbl %al,%ecx + leaq (%rdx,%rdx,1),%rdi + movzbl %ah,%edx + xorq 0(%rbp,%rsi,8),%r9 + xorq 7(%rbp,%rdi,8),%r10 + movl 64+8+8(%rsp),%eax + leaq (%rcx,%rcx,1),%rsi + movzbl %bl,%ecx + leaq (%rdx,%rdx,1),%rdi + movzbl %bh,%edx + xorq 6(%rbp,%rsi,8),%r11 + xorq 5(%rbp,%rdi,8),%r12 + shrl $16,%ebx + leaq (%rcx,%rcx,1),%rsi + movzbl %bl,%ecx + leaq (%rdx,%rdx,1),%rdi + movzbl %bh,%edx + xorq 4(%rbp,%rsi,8),%r13 + xorq 3(%rbp,%rdi,8),%r14 + movl 64+8+8+4(%rsp),%ebx + leaq (%rcx,%rcx,1),%rsi + movzbl %al,%ecx + leaq (%rdx,%rdx,1),%rdi + movzbl %ah,%edx + xorq 2(%rbp,%rsi,8),%r15 + xorq 1(%rbp,%rdi,8),%r8 + shrl $16,%eax + leaq (%rcx,%rcx,1),%rsi + movzbl %al,%ecx + leaq (%rdx,%rdx,1),%rdi + movzbl %ah,%edx + xorq 0(%rbp,%rsi,8),%r10 + xorq 7(%rbp,%rdi,8),%r11 + movl 64+16+8(%rsp),%eax + leaq (%rcx,%rcx,1),%rsi + movzbl %bl,%ecx + leaq (%rdx,%rdx,1),%rdi + movzbl %bh,%edx + xorq 6(%rbp,%rsi,8),%r12 + xorq 5(%rbp,%rdi,8),%r13 + shrl $16,%ebx + leaq (%rcx,%rcx,1),%rsi + movzbl %bl,%ecx + leaq (%rdx,%rdx,1),%rdi + movzbl %bh,%edx + xorq 4(%rbp,%rsi,8),%r14 + xorq 3(%rbp,%rdi,8),%r15 + movl 64+16+8+4(%rsp),%ebx + leaq (%rcx,%rcx,1),%rsi + movzbl %al,%ecx + leaq (%rdx,%rdx,1),%rdi + movzbl %ah,%edx + xorq 2(%rbp,%rsi,8),%r8 + xorq 1(%rbp,%rdi,8),%r9 + shrl $16,%eax + leaq (%rcx,%rcx,1),%rsi + movzbl %al,%ecx + leaq (%rdx,%rdx,1),%rdi + movzbl %ah,%edx + xorq 0(%rbp,%rsi,8),%r11 + xorq 7(%rbp,%rdi,8),%r12 + movl 64+24+8(%rsp),%eax + leaq (%rcx,%rcx,1),%rsi + movzbl %bl,%ecx + leaq (%rdx,%rdx,1),%rdi + movzbl %bh,%edx + xorq 6(%rbp,%rsi,8),%r13 + xorq 5(%rbp,%rdi,8),%r14 + shrl $16,%ebx + leaq (%rcx,%rcx,1),%rsi + movzbl %bl,%ecx + leaq (%rdx,%rdx,1),%rdi + movzbl %bh,%edx + xorq 4(%rbp,%rsi,8),%r15 + xorq 3(%rbp,%rdi,8),%r8 + movl 64+24+8+4(%rsp),%ebx + leaq (%rcx,%rcx,1),%rsi + movzbl %al,%ecx + leaq (%rdx,%rdx,1),%rdi + movzbl %ah,%edx + xorq 2(%rbp,%rsi,8),%r9 + xorq 1(%rbp,%rdi,8),%r10 + shrl $16,%eax + leaq (%rcx,%rcx,1),%rsi + movzbl %al,%ecx + leaq (%rdx,%rdx,1),%rdi + movzbl %ah,%edx + xorq 0(%rbp,%rsi,8),%r12 + xorq 7(%rbp,%rdi,8),%r13 + movl 64+32+8(%rsp),%eax + leaq (%rcx,%rcx,1),%rsi + movzbl %bl,%ecx + leaq (%rdx,%rdx,1),%rdi + movzbl %bh,%edx + xorq 6(%rbp,%rsi,8),%r14 + xorq 5(%rbp,%rdi,8),%r15 + shrl $16,%ebx + leaq (%rcx,%rcx,1),%rsi + movzbl %bl,%ecx + leaq (%rdx,%rdx,1),%rdi + movzbl %bh,%edx + xorq 4(%rbp,%rsi,8),%r8 + xorq 3(%rbp,%rdi,8),%r9 + movl 64+32+8+4(%rsp),%ebx + leaq (%rcx,%rcx,1),%rsi + movzbl %al,%ecx + leaq (%rdx,%rdx,1),%rdi + movzbl %ah,%edx + xorq 2(%rbp,%rsi,8),%r10 + xorq 1(%rbp,%rdi,8),%r11 + shrl $16,%eax + leaq (%rcx,%rcx,1),%rsi + movzbl %al,%ecx + leaq (%rdx,%rdx,1),%rdi + movzbl %ah,%edx + xorq 0(%rbp,%rsi,8),%r13 + xorq 7(%rbp,%rdi,8),%r14 + movl 64+40+8(%rsp),%eax + leaq (%rcx,%rcx,1),%rsi + movzbl %bl,%ecx + leaq (%rdx,%rdx,1),%rdi + movzbl %bh,%edx + xorq 6(%rbp,%rsi,8),%r15 + xorq 5(%rbp,%rdi,8),%r8 + shrl $16,%ebx + leaq (%rcx,%rcx,1),%rsi + movzbl %bl,%ecx + leaq (%rdx,%rdx,1),%rdi + movzbl %bh,%edx + xorq 4(%rbp,%rsi,8),%r9 + xorq 3(%rbp,%rdi,8),%r10 + movl 64+40+8+4(%rsp),%ebx + leaq (%rcx,%rcx,1),%rsi + movzbl %al,%ecx + leaq (%rdx,%rdx,1),%rdi + movzbl %ah,%edx + xorq 2(%rbp,%rsi,8),%r11 + xorq 1(%rbp,%rdi,8),%r12 + shrl $16,%eax + leaq (%rcx,%rcx,1),%rsi + movzbl %al,%ecx + leaq (%rdx,%rdx,1),%rdi + movzbl %ah,%edx + xorq 0(%rbp,%rsi,8),%r14 + xorq 7(%rbp,%rdi,8),%r15 + movl 64+48+8(%rsp),%eax + leaq (%rcx,%rcx,1),%rsi + movzbl %bl,%ecx + leaq (%rdx,%rdx,1),%rdi + movzbl %bh,%edx + xorq 6(%rbp,%rsi,8),%r8 + xorq 5(%rbp,%rdi,8),%r9 + shrl $16,%ebx + leaq (%rcx,%rcx,1),%rsi + movzbl %bl,%ecx + leaq (%rdx,%rdx,1),%rdi + movzbl %bh,%edx + xorq 4(%rbp,%rsi,8),%r10 + xorq 3(%rbp,%rdi,8),%r11 + movl 64+48+8+4(%rsp),%ebx + leaq (%rcx,%rcx,1),%rsi + movzbl %al,%ecx + leaq (%rdx,%rdx,1),%rdi + movzbl %ah,%edx + xorq 2(%rbp,%rsi,8),%r12 + xorq 1(%rbp,%rdi,8),%r13 + shrl $16,%eax + leaq (%rcx,%rcx,1),%rsi + movzbl %al,%ecx + leaq (%rdx,%rdx,1),%rdi + movzbl %ah,%edx + xorq 0(%rbp,%rsi,8),%r15 + xorq 7(%rbp,%rdi,8),%r8 + + leaq (%rcx,%rcx,1),%rsi + movzbl %bl,%ecx + leaq (%rdx,%rdx,1),%rdi + movzbl %bh,%edx + xorq 6(%rbp,%rsi,8),%r9 + xorq 5(%rbp,%rdi,8),%r10 + shrl $16,%ebx + leaq (%rcx,%rcx,1),%rsi + movzbl %bl,%ecx + leaq (%rdx,%rdx,1),%rdi + movzbl %bh,%edx + xorq 4(%rbp,%rsi,8),%r11 + xorq 3(%rbp,%rdi,8),%r12 + + leaq (%rcx,%rcx,1),%rsi + movzbl %al,%ecx + leaq (%rdx,%rdx,1),%rdi + movzbl %ah,%edx + xorq 2(%rbp,%rsi,8),%r13 + xorq 1(%rbp,%rdi,8),%r14 + leaq 128(%rsp),%rbx + movq 24(%rbx),%rsi + addq $1,%rsi + cmpq $10,%rsi + je .Lroundsdone + + movq %rsi,24(%rbx) + movq %r8,64+0(%rsp) + movq %r9,64+8(%rsp) + movq %r10,64+16(%rsp) + movq %r11,64+24(%rsp) + movq %r12,64+32(%rsp) + movq %r13,64+40(%rsp) + movq %r14,64+48(%rsp) + movq %r15,64+56(%rsp) + jmp .Lround +.align 16 +.Lroundsdone: + movq 0(%rbx),%rdi + movq 8(%rbx),%rsi + movq 16(%rbx),%rax + xorq 0(%rsi),%r8 + xorq 8(%rsi),%r9 + xorq 16(%rsi),%r10 + xorq 24(%rsi),%r11 + xorq 32(%rsi),%r12 + xorq 40(%rsi),%r13 + xorq 48(%rsi),%r14 + xorq 56(%rsi),%r15 + xorq 0(%rdi),%r8 + xorq 8(%rdi),%r9 + xorq 16(%rdi),%r10 + xorq 24(%rdi),%r11 + xorq 32(%rdi),%r12 + xorq 40(%rdi),%r13 + xorq 48(%rdi),%r14 + xorq 56(%rdi),%r15 + movq %r8,0(%rdi) + movq %r9,8(%rdi) + movq %r10,16(%rdi) + movq %r11,24(%rdi) + movq %r12,32(%rdi) + movq %r13,40(%rdi) + movq %r14,48(%rdi) + movq %r15,56(%rdi) + leaq 64(%rsi),%rsi + subq $1,%rax + jz .Lalldone + movq %rsi,8(%rbx) + movq %rax,16(%rbx) + jmp .Louterloop +.Lalldone: + movq 32(%rbx),%rsi + movq (%rsi),%r15 + movq 8(%rsi),%r14 + movq 16(%rsi),%r13 + movq 24(%rsi),%r12 + movq 32(%rsi),%rbp + movq 40(%rsi),%rbx + leaq 48(%rsi),%rsp +.Lepilogue: + .byte 0xf3,0xc3 +.size whirlpool_block,.-whirlpool_block + +.align 64 +.type .Ltable,@object +.Ltable: +.byte 24,24,96,24,192,120,48,216,24,24,96,24,192,120,48,216 +.byte 35,35,140,35,5,175,70,38,35,35,140,35,5,175,70,38 +.byte 198,198,63,198,126,249,145,184,198,198,63,198,126,249,145,184 +.byte 232,232,135,232,19,111,205,251,232,232,135,232,19,111,205,251 +.byte 135,135,38,135,76,161,19,203,135,135,38,135,76,161,19,203 +.byte 184,184,218,184,169,98,109,17,184,184,218,184,169,98,109,17 +.byte 1,1,4,1,8,5,2,9,1,1,4,1,8,5,2,9 +.byte 79,79,33,79,66,110,158,13,79,79,33,79,66,110,158,13 +.byte 54,54,216,54,173,238,108,155,54,54,216,54,173,238,108,155 +.byte 166,166,162,166,89,4,81,255,166,166,162,166,89,4,81,255 +.byte 210,210,111,210,222,189,185,12,210,210,111,210,222,189,185,12 +.byte 245,245,243,245,251,6,247,14,245,245,243,245,251,6,247,14 +.byte 121,121,249,121,239,128,242,150,121,121,249,121,239,128,242,150 +.byte 111,111,161,111,95,206,222,48,111,111,161,111,95,206,222,48 +.byte 145,145,126,145,252,239,63,109,145,145,126,145,252,239,63,109 +.byte 82,82,85,82,170,7,164,248,82,82,85,82,170,7,164,248 +.byte 96,96,157,96,39,253,192,71,96,96,157,96,39,253,192,71 +.byte 188,188,202,188,137,118,101,53,188,188,202,188,137,118,101,53 +.byte 155,155,86,155,172,205,43,55,155,155,86,155,172,205,43,55 +.byte 142,142,2,142,4,140,1,138,142,142,2,142,4,140,1,138 +.byte 163,163,182,163,113,21,91,210,163,163,182,163,113,21,91,210 +.byte 12,12,48,12,96,60,24,108,12,12,48,12,96,60,24,108 +.byte 123,123,241,123,255,138,246,132,123,123,241,123,255,138,246,132 +.byte 53,53,212,53,181,225,106,128,53,53,212,53,181,225,106,128 +.byte 29,29,116,29,232,105,58,245,29,29,116,29,232,105,58,245 +.byte 224,224,167,224,83,71,221,179,224,224,167,224,83,71,221,179 +.byte 215,215,123,215,246,172,179,33,215,215,123,215,246,172,179,33 +.byte 194,194,47,194,94,237,153,156,194,194,47,194,94,237,153,156 +.byte 46,46,184,46,109,150,92,67,46,46,184,46,109,150,92,67 +.byte 75,75,49,75,98,122,150,41,75,75,49,75,98,122,150,41 +.byte 254,254,223,254,163,33,225,93,254,254,223,254,163,33,225,93 +.byte 87,87,65,87,130,22,174,213,87,87,65,87,130,22,174,213 +.byte 21,21,84,21,168,65,42,189,21,21,84,21,168,65,42,189 +.byte 119,119,193,119,159,182,238,232,119,119,193,119,159,182,238,232 +.byte 55,55,220,55,165,235,110,146,55,55,220,55,165,235,110,146 +.byte 229,229,179,229,123,86,215,158,229,229,179,229,123,86,215,158 +.byte 159,159,70,159,140,217,35,19,159,159,70,159,140,217,35,19 +.byte 240,240,231,240,211,23,253,35,240,240,231,240,211,23,253,35 +.byte 74,74,53,74,106,127,148,32,74,74,53,74,106,127,148,32 +.byte 218,218,79,218,158,149,169,68,218,218,79,218,158,149,169,68 +.byte 88,88,125,88,250,37,176,162,88,88,125,88,250,37,176,162 +.byte 201,201,3,201,6,202,143,207,201,201,3,201,6,202,143,207 +.byte 41,41,164,41,85,141,82,124,41,41,164,41,85,141,82,124 +.byte 10,10,40,10,80,34,20,90,10,10,40,10,80,34,20,90 +.byte 177,177,254,177,225,79,127,80,177,177,254,177,225,79,127,80 +.byte 160,160,186,160,105,26,93,201,160,160,186,160,105,26,93,201 +.byte 107,107,177,107,127,218,214,20,107,107,177,107,127,218,214,20 +.byte 133,133,46,133,92,171,23,217,133,133,46,133,92,171,23,217 +.byte 189,189,206,189,129,115,103,60,189,189,206,189,129,115,103,60 +.byte 93,93,105,93,210,52,186,143,93,93,105,93,210,52,186,143 +.byte 16,16,64,16,128,80,32,144,16,16,64,16,128,80,32,144 +.byte 244,244,247,244,243,3,245,7,244,244,247,244,243,3,245,7 +.byte 203,203,11,203,22,192,139,221,203,203,11,203,22,192,139,221 +.byte 62,62,248,62,237,198,124,211,62,62,248,62,237,198,124,211 +.byte 5,5,20,5,40,17,10,45,5,5,20,5,40,17,10,45 +.byte 103,103,129,103,31,230,206,120,103,103,129,103,31,230,206,120 +.byte 228,228,183,228,115,83,213,151,228,228,183,228,115,83,213,151 +.byte 39,39,156,39,37,187,78,2,39,39,156,39,37,187,78,2 +.byte 65,65,25,65,50,88,130,115,65,65,25,65,50,88,130,115 +.byte 139,139,22,139,44,157,11,167,139,139,22,139,44,157,11,167 +.byte 167,167,166,167,81,1,83,246,167,167,166,167,81,1,83,246 +.byte 125,125,233,125,207,148,250,178,125,125,233,125,207,148,250,178 +.byte 149,149,110,149,220,251,55,73,149,149,110,149,220,251,55,73 +.byte 216,216,71,216,142,159,173,86,216,216,71,216,142,159,173,86 +.byte 251,251,203,251,139,48,235,112,251,251,203,251,139,48,235,112 +.byte 238,238,159,238,35,113,193,205,238,238,159,238,35,113,193,205 +.byte 124,124,237,124,199,145,248,187,124,124,237,124,199,145,248,187 +.byte 102,102,133,102,23,227,204,113,102,102,133,102,23,227,204,113 +.byte 221,221,83,221,166,142,167,123,221,221,83,221,166,142,167,123 +.byte 23,23,92,23,184,75,46,175,23,23,92,23,184,75,46,175 +.byte 71,71,1,71,2,70,142,69,71,71,1,71,2,70,142,69 +.byte 158,158,66,158,132,220,33,26,158,158,66,158,132,220,33,26 +.byte 202,202,15,202,30,197,137,212,202,202,15,202,30,197,137,212 +.byte 45,45,180,45,117,153,90,88,45,45,180,45,117,153,90,88 +.byte 191,191,198,191,145,121,99,46,191,191,198,191,145,121,99,46 +.byte 7,7,28,7,56,27,14,63,7,7,28,7,56,27,14,63 +.byte 173,173,142,173,1,35,71,172,173,173,142,173,1,35,71,172 +.byte 90,90,117,90,234,47,180,176,90,90,117,90,234,47,180,176 +.byte 131,131,54,131,108,181,27,239,131,131,54,131,108,181,27,239 +.byte 51,51,204,51,133,255,102,182,51,51,204,51,133,255,102,182 +.byte 99,99,145,99,63,242,198,92,99,99,145,99,63,242,198,92 +.byte 2,2,8,2,16,10,4,18,2,2,8,2,16,10,4,18 +.byte 170,170,146,170,57,56,73,147,170,170,146,170,57,56,73,147 +.byte 113,113,217,113,175,168,226,222,113,113,217,113,175,168,226,222 +.byte 200,200,7,200,14,207,141,198,200,200,7,200,14,207,141,198 +.byte 25,25,100,25,200,125,50,209,25,25,100,25,200,125,50,209 +.byte 73,73,57,73,114,112,146,59,73,73,57,73,114,112,146,59 +.byte 217,217,67,217,134,154,175,95,217,217,67,217,134,154,175,95 +.byte 242,242,239,242,195,29,249,49,242,242,239,242,195,29,249,49 +.byte 227,227,171,227,75,72,219,168,227,227,171,227,75,72,219,168 +.byte 91,91,113,91,226,42,182,185,91,91,113,91,226,42,182,185 +.byte 136,136,26,136,52,146,13,188,136,136,26,136,52,146,13,188 +.byte 154,154,82,154,164,200,41,62,154,154,82,154,164,200,41,62 +.byte 38,38,152,38,45,190,76,11,38,38,152,38,45,190,76,11 +.byte 50,50,200,50,141,250,100,191,50,50,200,50,141,250,100,191 +.byte 176,176,250,176,233,74,125,89,176,176,250,176,233,74,125,89 +.byte 233,233,131,233,27,106,207,242,233,233,131,233,27,106,207,242 +.byte 15,15,60,15,120,51,30,119,15,15,60,15,120,51,30,119 +.byte 213,213,115,213,230,166,183,51,213,213,115,213,230,166,183,51 +.byte 128,128,58,128,116,186,29,244,128,128,58,128,116,186,29,244 +.byte 190,190,194,190,153,124,97,39,190,190,194,190,153,124,97,39 +.byte 205,205,19,205,38,222,135,235,205,205,19,205,38,222,135,235 +.byte 52,52,208,52,189,228,104,137,52,52,208,52,189,228,104,137 +.byte 72,72,61,72,122,117,144,50,72,72,61,72,122,117,144,50 +.byte 255,255,219,255,171,36,227,84,255,255,219,255,171,36,227,84 +.byte 122,122,245,122,247,143,244,141,122,122,245,122,247,143,244,141 +.byte 144,144,122,144,244,234,61,100,144,144,122,144,244,234,61,100 +.byte 95,95,97,95,194,62,190,157,95,95,97,95,194,62,190,157 +.byte 32,32,128,32,29,160,64,61,32,32,128,32,29,160,64,61 +.byte 104,104,189,104,103,213,208,15,104,104,189,104,103,213,208,15 +.byte 26,26,104,26,208,114,52,202,26,26,104,26,208,114,52,202 +.byte 174,174,130,174,25,44,65,183,174,174,130,174,25,44,65,183 +.byte 180,180,234,180,201,94,117,125,180,180,234,180,201,94,117,125 +.byte 84,84,77,84,154,25,168,206,84,84,77,84,154,25,168,206 +.byte 147,147,118,147,236,229,59,127,147,147,118,147,236,229,59,127 +.byte 34,34,136,34,13,170,68,47,34,34,136,34,13,170,68,47 +.byte 100,100,141,100,7,233,200,99,100,100,141,100,7,233,200,99 +.byte 241,241,227,241,219,18,255,42,241,241,227,241,219,18,255,42 +.byte 115,115,209,115,191,162,230,204,115,115,209,115,191,162,230,204 +.byte 18,18,72,18,144,90,36,130,18,18,72,18,144,90,36,130 +.byte 64,64,29,64,58,93,128,122,64,64,29,64,58,93,128,122 +.byte 8,8,32,8,64,40,16,72,8,8,32,8,64,40,16,72 +.byte 195,195,43,195,86,232,155,149,195,195,43,195,86,232,155,149 +.byte 236,236,151,236,51,123,197,223,236,236,151,236,51,123,197,223 +.byte 219,219,75,219,150,144,171,77,219,219,75,219,150,144,171,77 +.byte 161,161,190,161,97,31,95,192,161,161,190,161,97,31,95,192 +.byte 141,141,14,141,28,131,7,145,141,141,14,141,28,131,7,145 +.byte 61,61,244,61,245,201,122,200,61,61,244,61,245,201,122,200 +.byte 151,151,102,151,204,241,51,91,151,151,102,151,204,241,51,91 +.byte 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 +.byte 207,207,27,207,54,212,131,249,207,207,27,207,54,212,131,249 +.byte 43,43,172,43,69,135,86,110,43,43,172,43,69,135,86,110 +.byte 118,118,197,118,151,179,236,225,118,118,197,118,151,179,236,225 +.byte 130,130,50,130,100,176,25,230,130,130,50,130,100,176,25,230 +.byte 214,214,127,214,254,169,177,40,214,214,127,214,254,169,177,40 +.byte 27,27,108,27,216,119,54,195,27,27,108,27,216,119,54,195 +.byte 181,181,238,181,193,91,119,116,181,181,238,181,193,91,119,116 +.byte 175,175,134,175,17,41,67,190,175,175,134,175,17,41,67,190 +.byte 106,106,181,106,119,223,212,29,106,106,181,106,119,223,212,29 +.byte 80,80,93,80,186,13,160,234,80,80,93,80,186,13,160,234 +.byte 69,69,9,69,18,76,138,87,69,69,9,69,18,76,138,87 +.byte 243,243,235,243,203,24,251,56,243,243,235,243,203,24,251,56 +.byte 48,48,192,48,157,240,96,173,48,48,192,48,157,240,96,173 +.byte 239,239,155,239,43,116,195,196,239,239,155,239,43,116,195,196 +.byte 63,63,252,63,229,195,126,218,63,63,252,63,229,195,126,218 +.byte 85,85,73,85,146,28,170,199,85,85,73,85,146,28,170,199 +.byte 162,162,178,162,121,16,89,219,162,162,178,162,121,16,89,219 +.byte 234,234,143,234,3,101,201,233,234,234,143,234,3,101,201,233 +.byte 101,101,137,101,15,236,202,106,101,101,137,101,15,236,202,106 +.byte 186,186,210,186,185,104,105,3,186,186,210,186,185,104,105,3 +.byte 47,47,188,47,101,147,94,74,47,47,188,47,101,147,94,74 +.byte 192,192,39,192,78,231,157,142,192,192,39,192,78,231,157,142 +.byte 222,222,95,222,190,129,161,96,222,222,95,222,190,129,161,96 +.byte 28,28,112,28,224,108,56,252,28,28,112,28,224,108,56,252 +.byte 253,253,211,253,187,46,231,70,253,253,211,253,187,46,231,70 +.byte 77,77,41,77,82,100,154,31,77,77,41,77,82,100,154,31 +.byte 146,146,114,146,228,224,57,118,146,146,114,146,228,224,57,118 +.byte 117,117,201,117,143,188,234,250,117,117,201,117,143,188,234,250 +.byte 6,6,24,6,48,30,12,54,6,6,24,6,48,30,12,54 +.byte 138,138,18,138,36,152,9,174,138,138,18,138,36,152,9,174 +.byte 178,178,242,178,249,64,121,75,178,178,242,178,249,64,121,75 +.byte 230,230,191,230,99,89,209,133,230,230,191,230,99,89,209,133 +.byte 14,14,56,14,112,54,28,126,14,14,56,14,112,54,28,126 +.byte 31,31,124,31,248,99,62,231,31,31,124,31,248,99,62,231 +.byte 98,98,149,98,55,247,196,85,98,98,149,98,55,247,196,85 +.byte 212,212,119,212,238,163,181,58,212,212,119,212,238,163,181,58 +.byte 168,168,154,168,41,50,77,129,168,168,154,168,41,50,77,129 +.byte 150,150,98,150,196,244,49,82,150,150,98,150,196,244,49,82 +.byte 249,249,195,249,155,58,239,98,249,249,195,249,155,58,239,98 +.byte 197,197,51,197,102,246,151,163,197,197,51,197,102,246,151,163 +.byte 37,37,148,37,53,177,74,16,37,37,148,37,53,177,74,16 +.byte 89,89,121,89,242,32,178,171,89,89,121,89,242,32,178,171 +.byte 132,132,42,132,84,174,21,208,132,132,42,132,84,174,21,208 +.byte 114,114,213,114,183,167,228,197,114,114,213,114,183,167,228,197 +.byte 57,57,228,57,213,221,114,236,57,57,228,57,213,221,114,236 +.byte 76,76,45,76,90,97,152,22,76,76,45,76,90,97,152,22 +.byte 94,94,101,94,202,59,188,148,94,94,101,94,202,59,188,148 +.byte 120,120,253,120,231,133,240,159,120,120,253,120,231,133,240,159 +.byte 56,56,224,56,221,216,112,229,56,56,224,56,221,216,112,229 +.byte 140,140,10,140,20,134,5,152,140,140,10,140,20,134,5,152 +.byte 209,209,99,209,198,178,191,23,209,209,99,209,198,178,191,23 +.byte 165,165,174,165,65,11,87,228,165,165,174,165,65,11,87,228 +.byte 226,226,175,226,67,77,217,161,226,226,175,226,67,77,217,161 +.byte 97,97,153,97,47,248,194,78,97,97,153,97,47,248,194,78 +.byte 179,179,246,179,241,69,123,66,179,179,246,179,241,69,123,66 +.byte 33,33,132,33,21,165,66,52,33,33,132,33,21,165,66,52 +.byte 156,156,74,156,148,214,37,8,156,156,74,156,148,214,37,8 +.byte 30,30,120,30,240,102,60,238,30,30,120,30,240,102,60,238 +.byte 67,67,17,67,34,82,134,97,67,67,17,67,34,82,134,97 +.byte 199,199,59,199,118,252,147,177,199,199,59,199,118,252,147,177 +.byte 252,252,215,252,179,43,229,79,252,252,215,252,179,43,229,79 +.byte 4,4,16,4,32,20,8,36,4,4,16,4,32,20,8,36 +.byte 81,81,89,81,178,8,162,227,81,81,89,81,178,8,162,227 +.byte 153,153,94,153,188,199,47,37,153,153,94,153,188,199,47,37 +.byte 109,109,169,109,79,196,218,34,109,109,169,109,79,196,218,34 +.byte 13,13,52,13,104,57,26,101,13,13,52,13,104,57,26,101 +.byte 250,250,207,250,131,53,233,121,250,250,207,250,131,53,233,121 +.byte 223,223,91,223,182,132,163,105,223,223,91,223,182,132,163,105 +.byte 126,126,229,126,215,155,252,169,126,126,229,126,215,155,252,169 +.byte 36,36,144,36,61,180,72,25,36,36,144,36,61,180,72,25 +.byte 59,59,236,59,197,215,118,254,59,59,236,59,197,215,118,254 +.byte 171,171,150,171,49,61,75,154,171,171,150,171,49,61,75,154 +.byte 206,206,31,206,62,209,129,240,206,206,31,206,62,209,129,240 +.byte 17,17,68,17,136,85,34,153,17,17,68,17,136,85,34,153 +.byte 143,143,6,143,12,137,3,131,143,143,6,143,12,137,3,131 +.byte 78,78,37,78,74,107,156,4,78,78,37,78,74,107,156,4 +.byte 183,183,230,183,209,81,115,102,183,183,230,183,209,81,115,102 +.byte 235,235,139,235,11,96,203,224,235,235,139,235,11,96,203,224 +.byte 60,60,240,60,253,204,120,193,60,60,240,60,253,204,120,193 +.byte 129,129,62,129,124,191,31,253,129,129,62,129,124,191,31,253 +.byte 148,148,106,148,212,254,53,64,148,148,106,148,212,254,53,64 +.byte 247,247,251,247,235,12,243,28,247,247,251,247,235,12,243,28 +.byte 185,185,222,185,161,103,111,24,185,185,222,185,161,103,111,24 +.byte 19,19,76,19,152,95,38,139,19,19,76,19,152,95,38,139 +.byte 44,44,176,44,125,156,88,81,44,44,176,44,125,156,88,81 +.byte 211,211,107,211,214,184,187,5,211,211,107,211,214,184,187,5 +.byte 231,231,187,231,107,92,211,140,231,231,187,231,107,92,211,140 +.byte 110,110,165,110,87,203,220,57,110,110,165,110,87,203,220,57 +.byte 196,196,55,196,110,243,149,170,196,196,55,196,110,243,149,170 +.byte 3,3,12,3,24,15,6,27,3,3,12,3,24,15,6,27 +.byte 86,86,69,86,138,19,172,220,86,86,69,86,138,19,172,220 +.byte 68,68,13,68,26,73,136,94,68,68,13,68,26,73,136,94 +.byte 127,127,225,127,223,158,254,160,127,127,225,127,223,158,254,160 +.byte 169,169,158,169,33,55,79,136,169,169,158,169,33,55,79,136 +.byte 42,42,168,42,77,130,84,103,42,42,168,42,77,130,84,103 +.byte 187,187,214,187,177,109,107,10,187,187,214,187,177,109,107,10 +.byte 193,193,35,193,70,226,159,135,193,193,35,193,70,226,159,135 +.byte 83,83,81,83,162,2,166,241,83,83,81,83,162,2,166,241 +.byte 220,220,87,220,174,139,165,114,220,220,87,220,174,139,165,114 +.byte 11,11,44,11,88,39,22,83,11,11,44,11,88,39,22,83 +.byte 157,157,78,157,156,211,39,1,157,157,78,157,156,211,39,1 +.byte 108,108,173,108,71,193,216,43,108,108,173,108,71,193,216,43 +.byte 49,49,196,49,149,245,98,164,49,49,196,49,149,245,98,164 +.byte 116,116,205,116,135,185,232,243,116,116,205,116,135,185,232,243 +.byte 246,246,255,246,227,9,241,21,246,246,255,246,227,9,241,21 +.byte 70,70,5,70,10,67,140,76,70,70,5,70,10,67,140,76 +.byte 172,172,138,172,9,38,69,165,172,172,138,172,9,38,69,165 +.byte 137,137,30,137,60,151,15,181,137,137,30,137,60,151,15,181 +.byte 20,20,80,20,160,68,40,180,20,20,80,20,160,68,40,180 +.byte 225,225,163,225,91,66,223,186,225,225,163,225,91,66,223,186 +.byte 22,22,88,22,176,78,44,166,22,22,88,22,176,78,44,166 +.byte 58,58,232,58,205,210,116,247,58,58,232,58,205,210,116,247 +.byte 105,105,185,105,111,208,210,6,105,105,185,105,111,208,210,6 +.byte 9,9,36,9,72,45,18,65,9,9,36,9,72,45,18,65 +.byte 112,112,221,112,167,173,224,215,112,112,221,112,167,173,224,215 +.byte 182,182,226,182,217,84,113,111,182,182,226,182,217,84,113,111 +.byte 208,208,103,208,206,183,189,30,208,208,103,208,206,183,189,30 +.byte 237,237,147,237,59,126,199,214,237,237,147,237,59,126,199,214 +.byte 204,204,23,204,46,219,133,226,204,204,23,204,46,219,133,226 +.byte 66,66,21,66,42,87,132,104,66,66,21,66,42,87,132,104 +.byte 152,152,90,152,180,194,45,44,152,152,90,152,180,194,45,44 +.byte 164,164,170,164,73,14,85,237,164,164,170,164,73,14,85,237 +.byte 40,40,160,40,93,136,80,117,40,40,160,40,93,136,80,117 +.byte 92,92,109,92,218,49,184,134,92,92,109,92,218,49,184,134 +.byte 248,248,199,248,147,63,237,107,248,248,199,248,147,63,237,107 +.byte 134,134,34,134,68,164,17,194,134,134,34,134,68,164,17,194 +.byte 24,35,198,232,135,184,1,79 +.byte 54,166,210,245,121,111,145,82 +.byte 96,188,155,142,163,12,123,53 +.byte 29,224,215,194,46,75,254,87 +.byte 21,119,55,229,159,240,74,218 +.byte 88,201,41,10,177,160,107,133 +.byte 189,93,16,244,203,62,5,103 +.byte 228,39,65,139,167,125,149,216 +.byte 251,238,124,102,221,23,71,158 +.byte 202,45,191,7,173,90,131,51 diff --git a/deps/openssl/asm_obsolete/x64-elf-gas/x86_64cpuid.s b/deps/openssl/asm_obsolete/x64-elf-gas/x86_64cpuid.s new file mode 100644 index 00000000000000..656a5ce85576e0 --- /dev/null +++ b/deps/openssl/asm_obsolete/x64-elf-gas/x86_64cpuid.s @@ -0,0 +1,259 @@ + +.hidden OPENSSL_cpuid_setup +.section .init + call OPENSSL_cpuid_setup + +.hidden OPENSSL_ia32cap_P +.comm OPENSSL_ia32cap_P,16,4 + +.text + +.globl OPENSSL_atomic_add +.type OPENSSL_atomic_add,@function +.align 16 +OPENSSL_atomic_add: + movl (%rdi),%eax +.Lspin: leaq (%rsi,%rax,1),%r8 +.byte 0xf0 + cmpxchgl %r8d,(%rdi) + jne .Lspin + movl %r8d,%eax +.byte 0x48,0x98 + .byte 0xf3,0xc3 +.size OPENSSL_atomic_add,.-OPENSSL_atomic_add + +.globl OPENSSL_rdtsc +.type OPENSSL_rdtsc,@function +.align 16 +OPENSSL_rdtsc: + rdtsc + shlq $32,%rdx + orq %rdx,%rax + .byte 0xf3,0xc3 +.size OPENSSL_rdtsc,.-OPENSSL_rdtsc + +.globl OPENSSL_ia32_cpuid +.type OPENSSL_ia32_cpuid,@function +.align 16 +OPENSSL_ia32_cpuid: + movq %rbx,%r8 + + xorl %eax,%eax + movl %eax,8(%rdi) + cpuid + movl %eax,%r11d + + xorl %eax,%eax + cmpl $1970169159,%ebx + setne %al + movl %eax,%r9d + cmpl $1231384169,%edx + setne %al + orl %eax,%r9d + cmpl $1818588270,%ecx + setne %al + orl %eax,%r9d + jz .Lintel + + cmpl $1752462657,%ebx + setne %al + movl %eax,%r10d + cmpl $1769238117,%edx + setne %al + orl %eax,%r10d + cmpl $1145913699,%ecx + setne %al + orl %eax,%r10d + jnz .Lintel + + + movl $2147483648,%eax + cpuid + cmpl $2147483649,%eax + jb .Lintel + movl %eax,%r10d + movl $2147483649,%eax + cpuid + orl %ecx,%r9d + andl $2049,%r9d + + cmpl $2147483656,%r10d + jb .Lintel + + movl $2147483656,%eax + cpuid + movzbq %cl,%r10 + incq %r10 + + movl $1,%eax + cpuid + btl $28,%edx + jnc .Lgeneric + shrl $16,%ebx + cmpb %r10b,%bl + ja .Lgeneric + andl $4026531839,%edx + jmp .Lgeneric + +.Lintel: + cmpl $4,%r11d + movl $-1,%r10d + jb .Lnocacheinfo + + movl $4,%eax + movl $0,%ecx + cpuid + movl %eax,%r10d + shrl $14,%r10d + andl $4095,%r10d + + cmpl $7,%r11d + jb .Lnocacheinfo + + movl $7,%eax + xorl %ecx,%ecx + cpuid + movl %ebx,8(%rdi) + +.Lnocacheinfo: + movl $1,%eax + cpuid + andl $3220176895,%edx + cmpl $0,%r9d + jne .Lnotintel + orl $1073741824,%edx + andb $15,%ah + cmpb $15,%ah + jne .Lnotintel + orl $1048576,%edx +.Lnotintel: + btl $28,%edx + jnc .Lgeneric + andl $4026531839,%edx + cmpl $0,%r10d + je .Lgeneric + + orl $268435456,%edx + shrl $16,%ebx + cmpb $1,%bl + ja .Lgeneric + andl $4026531839,%edx +.Lgeneric: + andl $2048,%r9d + andl $4294965247,%ecx + orl %ecx,%r9d + + movl %edx,%r10d + btl $27,%r9d + jnc .Lclear_avx + xorl %ecx,%ecx +.byte 0x0f,0x01,0xd0 + andl $6,%eax + cmpl $6,%eax + je .Ldone +.Lclear_avx: + movl $4026525695,%eax + andl %eax,%r9d + andl $4294967263,8(%rdi) +.Ldone: + shlq $32,%r9 + movl %r10d,%eax + movq %r8,%rbx + orq %r9,%rax + .byte 0xf3,0xc3 +.size OPENSSL_ia32_cpuid,.-OPENSSL_ia32_cpuid + +.globl OPENSSL_cleanse +.type OPENSSL_cleanse,@function +.align 16 +OPENSSL_cleanse: + xorq %rax,%rax + cmpq $15,%rsi + jae .Lot + cmpq $0,%rsi + je .Lret +.Little: + movb %al,(%rdi) + subq $1,%rsi + leaq 1(%rdi),%rdi + jnz .Little +.Lret: + .byte 0xf3,0xc3 +.align 16 +.Lot: + testq $7,%rdi + jz .Laligned + movb %al,(%rdi) + leaq -1(%rsi),%rsi + leaq 1(%rdi),%rdi + jmp .Lot +.Laligned: + movq %rax,(%rdi) + leaq -8(%rsi),%rsi + testq $-8,%rsi + leaq 8(%rdi),%rdi + jnz .Laligned + cmpq $0,%rsi + jne .Little + .byte 0xf3,0xc3 +.size OPENSSL_cleanse,.-OPENSSL_cleanse +.globl OPENSSL_wipe_cpu +.type OPENSSL_wipe_cpu,@function +.align 16 +OPENSSL_wipe_cpu: + pxor %xmm0,%xmm0 + pxor %xmm1,%xmm1 + pxor %xmm2,%xmm2 + pxor %xmm3,%xmm3 + pxor %xmm4,%xmm4 + pxor %xmm5,%xmm5 + pxor %xmm6,%xmm6 + pxor %xmm7,%xmm7 + pxor %xmm8,%xmm8 + pxor %xmm9,%xmm9 + pxor %xmm10,%xmm10 + pxor %xmm11,%xmm11 + pxor %xmm12,%xmm12 + pxor %xmm13,%xmm13 + pxor %xmm14,%xmm14 + pxor %xmm15,%xmm15 + xorq %rcx,%rcx + xorq %rdx,%rdx + xorq %rsi,%rsi + xorq %rdi,%rdi + xorq %r8,%r8 + xorq %r9,%r9 + xorq %r10,%r10 + xorq %r11,%r11 + leaq 8(%rsp),%rax + .byte 0xf3,0xc3 +.size OPENSSL_wipe_cpu,.-OPENSSL_wipe_cpu +.globl OPENSSL_ia32_rdrand +.type OPENSSL_ia32_rdrand,@function +.align 16 +OPENSSL_ia32_rdrand: + movl $8,%ecx +.Loop_rdrand: +.byte 72,15,199,240 + jc .Lbreak_rdrand + loop .Loop_rdrand +.Lbreak_rdrand: + cmpq $0,%rax + cmoveq %rcx,%rax + .byte 0xf3,0xc3 +.size OPENSSL_ia32_rdrand,.-OPENSSL_ia32_rdrand + +.globl OPENSSL_ia32_rdseed +.type OPENSSL_ia32_rdseed,@function +.align 16 +OPENSSL_ia32_rdseed: + movl $8,%ecx +.Loop_rdseed: +.byte 72,15,199,248 + jc .Lbreak_rdseed + loop .Loop_rdseed +.Lbreak_rdseed: + cmpq $0,%rax + cmoveq %rcx,%rax + .byte 0xf3,0xc3 +.size OPENSSL_ia32_rdseed,.-OPENSSL_ia32_rdseed diff --git a/deps/openssl/asm_obsolete/x64-macosx-gas/aes/aes-x86_64.s b/deps/openssl/asm_obsolete/x64-macosx-gas/aes/aes-x86_64.s new file mode 100644 index 00000000000000..a50170a9a201f5 --- /dev/null +++ b/deps/openssl/asm_obsolete/x64-macosx-gas/aes/aes-x86_64.s @@ -0,0 +1,2534 @@ +.text + +.p2align 4 +_x86_64_AES_encrypt: + xorl 0(%r15),%eax + xorl 4(%r15),%ebx + xorl 8(%r15),%ecx + xorl 12(%r15),%edx + + movl 240(%r15),%r13d + subl $1,%r13d + jmp L$enc_loop +.p2align 4 +L$enc_loop: + + movzbl %al,%esi + movzbl %bl,%edi + movzbl %cl,%ebp + movl 0(%r14,%rsi,8),%r10d + movl 0(%r14,%rdi,8),%r11d + movl 0(%r14,%rbp,8),%r12d + + movzbl %bh,%esi + movzbl %ch,%edi + movzbl %dl,%ebp + xorl 3(%r14,%rsi,8),%r10d + xorl 3(%r14,%rdi,8),%r11d + movl 0(%r14,%rbp,8),%r8d + + movzbl %dh,%esi + shrl $16,%ecx + movzbl %ah,%ebp + xorl 3(%r14,%rsi,8),%r12d + shrl $16,%edx + xorl 3(%r14,%rbp,8),%r8d + + shrl $16,%ebx + leaq 16(%r15),%r15 + shrl $16,%eax + + movzbl %cl,%esi + movzbl %dl,%edi + movzbl %al,%ebp + xorl 2(%r14,%rsi,8),%r10d + xorl 2(%r14,%rdi,8),%r11d + xorl 2(%r14,%rbp,8),%r12d + + movzbl %dh,%esi + movzbl %ah,%edi + movzbl %bl,%ebp + xorl 1(%r14,%rsi,8),%r10d + xorl 1(%r14,%rdi,8),%r11d + xorl 2(%r14,%rbp,8),%r8d + + movl 12(%r15),%edx + movzbl %bh,%edi + movzbl %ch,%ebp + movl 0(%r15),%eax + xorl 1(%r14,%rdi,8),%r12d + xorl 1(%r14,%rbp,8),%r8d + + movl 4(%r15),%ebx + movl 8(%r15),%ecx + xorl %r10d,%eax + xorl %r11d,%ebx + xorl %r12d,%ecx + xorl %r8d,%edx + subl $1,%r13d + jnz L$enc_loop + movzbl %al,%esi + movzbl %bl,%edi + movzbl %cl,%ebp + movzbl 2(%r14,%rsi,8),%r10d + movzbl 2(%r14,%rdi,8),%r11d + movzbl 2(%r14,%rbp,8),%r12d + + movzbl %dl,%esi + movzbl %bh,%edi + movzbl %ch,%ebp + movzbl 2(%r14,%rsi,8),%r8d + movl 0(%r14,%rdi,8),%edi + movl 0(%r14,%rbp,8),%ebp + + andl $65280,%edi + andl $65280,%ebp + + xorl %edi,%r10d + xorl %ebp,%r11d + shrl $16,%ecx + + movzbl %dh,%esi + movzbl %ah,%edi + shrl $16,%edx + movl 0(%r14,%rsi,8),%esi + movl 0(%r14,%rdi,8),%edi + + andl $65280,%esi + andl $65280,%edi + shrl $16,%ebx + xorl %esi,%r12d + xorl %edi,%r8d + shrl $16,%eax + + movzbl %cl,%esi + movzbl %dl,%edi + movzbl %al,%ebp + movl 0(%r14,%rsi,8),%esi + movl 0(%r14,%rdi,8),%edi + movl 0(%r14,%rbp,8),%ebp + + andl $16711680,%esi + andl $16711680,%edi + andl $16711680,%ebp + + xorl %esi,%r10d + xorl %edi,%r11d + xorl %ebp,%r12d + + movzbl %bl,%esi + movzbl %dh,%edi + movzbl %ah,%ebp + movl 0(%r14,%rsi,8),%esi + movl 2(%r14,%rdi,8),%edi + movl 2(%r14,%rbp,8),%ebp + + andl $16711680,%esi + andl $4278190080,%edi + andl $4278190080,%ebp + + xorl %esi,%r8d + xorl %edi,%r10d + xorl %ebp,%r11d + + movzbl %bh,%esi + movzbl %ch,%edi + movl 16+12(%r15),%edx + movl 2(%r14,%rsi,8),%esi + movl 2(%r14,%rdi,8),%edi + movl 16+0(%r15),%eax + + andl $4278190080,%esi + andl $4278190080,%edi + + xorl %esi,%r12d + xorl %edi,%r8d + + movl 16+4(%r15),%ebx + movl 16+8(%r15),%ecx + xorl %r10d,%eax + xorl %r11d,%ebx + xorl %r12d,%ecx + xorl %r8d,%edx +.byte 0xf3,0xc3 + + +.p2align 4 +_x86_64_AES_encrypt_compact: + leaq 128(%r14),%r8 + movl 0-128(%r8),%edi + movl 32-128(%r8),%ebp + movl 64-128(%r8),%r10d + movl 96-128(%r8),%r11d + movl 128-128(%r8),%edi + movl 160-128(%r8),%ebp + movl 192-128(%r8),%r10d + movl 224-128(%r8),%r11d + jmp L$enc_loop_compact +.p2align 4 +L$enc_loop_compact: + xorl 0(%r15),%eax + xorl 4(%r15),%ebx + xorl 8(%r15),%ecx + xorl 12(%r15),%edx + leaq 16(%r15),%r15 + movzbl %al,%r10d + movzbl %bl,%r11d + movzbl %cl,%r12d + movzbl %dl,%r8d + movzbl %bh,%esi + movzbl %ch,%edi + shrl $16,%ecx + movzbl %dh,%ebp + movzbl (%r14,%r10,1),%r10d + movzbl (%r14,%r11,1),%r11d + movzbl (%r14,%r12,1),%r12d + movzbl (%r14,%r8,1),%r8d + + movzbl (%r14,%rsi,1),%r9d + movzbl %ah,%esi + movzbl (%r14,%rdi,1),%r13d + movzbl %cl,%edi + movzbl (%r14,%rbp,1),%ebp + movzbl (%r14,%rsi,1),%esi + + shll $8,%r9d + shrl $16,%edx + shll $8,%r13d + xorl %r9d,%r10d + shrl $16,%eax + movzbl %dl,%r9d + shrl $16,%ebx + xorl %r13d,%r11d + shll $8,%ebp + movzbl %al,%r13d + movzbl (%r14,%rdi,1),%edi + xorl %ebp,%r12d + + shll $8,%esi + movzbl %bl,%ebp + shll $16,%edi + xorl %esi,%r8d + movzbl (%r14,%r9,1),%r9d + movzbl %dh,%esi + movzbl (%r14,%r13,1),%r13d + xorl %edi,%r10d + + shrl $8,%ecx + movzbl %ah,%edi + shll $16,%r9d + shrl $8,%ebx + shll $16,%r13d + xorl %r9d,%r11d + movzbl (%r14,%rbp,1),%ebp + movzbl (%r14,%rsi,1),%esi + movzbl (%r14,%rdi,1),%edi + movzbl (%r14,%rcx,1),%edx + movzbl (%r14,%rbx,1),%ecx + + shll $16,%ebp + xorl %r13d,%r12d + shll $24,%esi + xorl %ebp,%r8d + shll $24,%edi + xorl %esi,%r10d + shll $24,%edx + xorl %edi,%r11d + shll $24,%ecx + movl %r10d,%eax + movl %r11d,%ebx + xorl %r12d,%ecx + xorl %r8d,%edx + cmpq 16(%rsp),%r15 + je L$enc_compact_done + movl $2155905152,%r10d + movl $2155905152,%r11d + andl %eax,%r10d + andl %ebx,%r11d + movl %r10d,%esi + movl %r11d,%edi + shrl $7,%r10d + leal (%rax,%rax,1),%r8d + shrl $7,%r11d + leal (%rbx,%rbx,1),%r9d + subl %r10d,%esi + subl %r11d,%edi + andl $4278124286,%r8d + andl $4278124286,%r9d + andl $454761243,%esi + andl $454761243,%edi + movl %eax,%r10d + movl %ebx,%r11d + xorl %esi,%r8d + xorl %edi,%r9d + + xorl %r8d,%eax + xorl %r9d,%ebx + movl $2155905152,%r12d + roll $24,%eax + movl $2155905152,%ebp + roll $24,%ebx + andl %ecx,%r12d + andl %edx,%ebp + xorl %r8d,%eax + xorl %r9d,%ebx + movl %r12d,%esi + rorl $16,%r10d + movl %ebp,%edi + rorl $16,%r11d + leal (%rcx,%rcx,1),%r8d + shrl $7,%r12d + xorl %r10d,%eax + shrl $7,%ebp + xorl %r11d,%ebx + rorl $8,%r10d + leal (%rdx,%rdx,1),%r9d + rorl $8,%r11d + subl %r12d,%esi + subl %ebp,%edi + xorl %r10d,%eax + xorl %r11d,%ebx + + andl $4278124286,%r8d + andl $4278124286,%r9d + andl $454761243,%esi + andl $454761243,%edi + movl %ecx,%r12d + movl %edx,%ebp + xorl %esi,%r8d + xorl %edi,%r9d + + rorl $16,%r12d + xorl %r8d,%ecx + rorl $16,%ebp + xorl %r9d,%edx + roll $24,%ecx + movl 0(%r14),%esi + roll $24,%edx + xorl %r8d,%ecx + movl 64(%r14),%edi + xorl %r9d,%edx + movl 128(%r14),%r8d + xorl %r12d,%ecx + rorl $8,%r12d + xorl %ebp,%edx + rorl $8,%ebp + xorl %r12d,%ecx + movl 192(%r14),%r9d + xorl %ebp,%edx + jmp L$enc_loop_compact +.p2align 4 +L$enc_compact_done: + xorl 0(%r15),%eax + xorl 4(%r15),%ebx + xorl 8(%r15),%ecx + xorl 12(%r15),%edx +.byte 0xf3,0xc3 + +.globl _AES_encrypt + +.p2align 4 +.globl _asm_AES_encrypt +.private_extern _asm_AES_encrypt +_asm_AES_encrypt: +_AES_encrypt: + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + + + movq %rsp,%r10 + leaq -63(%rdx),%rcx + andq $-64,%rsp + subq %rsp,%rcx + negq %rcx + andq $960,%rcx + subq %rcx,%rsp + subq $32,%rsp + + movq %rsi,16(%rsp) + movq %r10,24(%rsp) +L$enc_prologue: + + movq %rdx,%r15 + movl 240(%r15),%r13d + + movl 0(%rdi),%eax + movl 4(%rdi),%ebx + movl 8(%rdi),%ecx + movl 12(%rdi),%edx + + shll $4,%r13d + leaq (%r15,%r13,1),%rbp + movq %r15,(%rsp) + movq %rbp,8(%rsp) + + + leaq L$AES_Te+2048(%rip),%r14 + leaq 768(%rsp),%rbp + subq %r14,%rbp + andq $768,%rbp + leaq (%r14,%rbp,1),%r14 + + call _x86_64_AES_encrypt_compact + + movq 16(%rsp),%r9 + movq 24(%rsp),%rsi + movl %eax,0(%r9) + movl %ebx,4(%r9) + movl %ecx,8(%r9) + movl %edx,12(%r9) + + movq (%rsi),%r15 + movq 8(%rsi),%r14 + movq 16(%rsi),%r13 + movq 24(%rsi),%r12 + movq 32(%rsi),%rbp + movq 40(%rsi),%rbx + leaq 48(%rsi),%rsp +L$enc_epilogue: + .byte 0xf3,0xc3 + + +.p2align 4 +_x86_64_AES_decrypt: + xorl 0(%r15),%eax + xorl 4(%r15),%ebx + xorl 8(%r15),%ecx + xorl 12(%r15),%edx + + movl 240(%r15),%r13d + subl $1,%r13d + jmp L$dec_loop +.p2align 4 +L$dec_loop: + + movzbl %al,%esi + movzbl %bl,%edi + movzbl %cl,%ebp + movl 0(%r14,%rsi,8),%r10d + movl 0(%r14,%rdi,8),%r11d + movl 0(%r14,%rbp,8),%r12d + + movzbl %dh,%esi + movzbl %ah,%edi + movzbl %dl,%ebp + xorl 3(%r14,%rsi,8),%r10d + xorl 3(%r14,%rdi,8),%r11d + movl 0(%r14,%rbp,8),%r8d + + movzbl %bh,%esi + shrl $16,%eax + movzbl %ch,%ebp + xorl 3(%r14,%rsi,8),%r12d + shrl $16,%edx + xorl 3(%r14,%rbp,8),%r8d + + shrl $16,%ebx + leaq 16(%r15),%r15 + shrl $16,%ecx + + movzbl %cl,%esi + movzbl %dl,%edi + movzbl %al,%ebp + xorl 2(%r14,%rsi,8),%r10d + xorl 2(%r14,%rdi,8),%r11d + xorl 2(%r14,%rbp,8),%r12d + + movzbl %bh,%esi + movzbl %ch,%edi + movzbl %bl,%ebp + xorl 1(%r14,%rsi,8),%r10d + xorl 1(%r14,%rdi,8),%r11d + xorl 2(%r14,%rbp,8),%r8d + + movzbl %dh,%esi + movl 12(%r15),%edx + movzbl %ah,%ebp + xorl 1(%r14,%rsi,8),%r12d + movl 0(%r15),%eax + xorl 1(%r14,%rbp,8),%r8d + + xorl %r10d,%eax + movl 4(%r15),%ebx + movl 8(%r15),%ecx + xorl %r12d,%ecx + xorl %r11d,%ebx + xorl %r8d,%edx + subl $1,%r13d + jnz L$dec_loop + leaq 2048(%r14),%r14 + movzbl %al,%esi + movzbl %bl,%edi + movzbl %cl,%ebp + movzbl (%r14,%rsi,1),%r10d + movzbl (%r14,%rdi,1),%r11d + movzbl (%r14,%rbp,1),%r12d + + movzbl %dl,%esi + movzbl %dh,%edi + movzbl %ah,%ebp + movzbl (%r14,%rsi,1),%r8d + movzbl (%r14,%rdi,1),%edi + movzbl (%r14,%rbp,1),%ebp + + shll $8,%edi + shll $8,%ebp + + xorl %edi,%r10d + xorl %ebp,%r11d + shrl $16,%edx + + movzbl %bh,%esi + movzbl %ch,%edi + shrl $16,%eax + movzbl (%r14,%rsi,1),%esi + movzbl (%r14,%rdi,1),%edi + + shll $8,%esi + shll $8,%edi + shrl $16,%ebx + xorl %esi,%r12d + xorl %edi,%r8d + shrl $16,%ecx + + movzbl %cl,%esi + movzbl %dl,%edi + movzbl %al,%ebp + movzbl (%r14,%rsi,1),%esi + movzbl (%r14,%rdi,1),%edi + movzbl (%r14,%rbp,1),%ebp + + shll $16,%esi + shll $16,%edi + shll $16,%ebp + + xorl %esi,%r10d + xorl %edi,%r11d + xorl %ebp,%r12d + + movzbl %bl,%esi + movzbl %bh,%edi + movzbl %ch,%ebp + movzbl (%r14,%rsi,1),%esi + movzbl (%r14,%rdi,1),%edi + movzbl (%r14,%rbp,1),%ebp + + shll $16,%esi + shll $24,%edi + shll $24,%ebp + + xorl %esi,%r8d + xorl %edi,%r10d + xorl %ebp,%r11d + + movzbl %dh,%esi + movzbl %ah,%edi + movl 16+12(%r15),%edx + movzbl (%r14,%rsi,1),%esi + movzbl (%r14,%rdi,1),%edi + movl 16+0(%r15),%eax + + shll $24,%esi + shll $24,%edi + + xorl %esi,%r12d + xorl %edi,%r8d + + movl 16+4(%r15),%ebx + movl 16+8(%r15),%ecx + leaq -2048(%r14),%r14 + xorl %r10d,%eax + xorl %r11d,%ebx + xorl %r12d,%ecx + xorl %r8d,%edx +.byte 0xf3,0xc3 + + +.p2align 4 +_x86_64_AES_decrypt_compact: + leaq 128(%r14),%r8 + movl 0-128(%r8),%edi + movl 32-128(%r8),%ebp + movl 64-128(%r8),%r10d + movl 96-128(%r8),%r11d + movl 128-128(%r8),%edi + movl 160-128(%r8),%ebp + movl 192-128(%r8),%r10d + movl 224-128(%r8),%r11d + jmp L$dec_loop_compact + +.p2align 4 +L$dec_loop_compact: + xorl 0(%r15),%eax + xorl 4(%r15),%ebx + xorl 8(%r15),%ecx + xorl 12(%r15),%edx + leaq 16(%r15),%r15 + movzbl %al,%r10d + movzbl %bl,%r11d + movzbl %cl,%r12d + movzbl %dl,%r8d + movzbl %dh,%esi + movzbl %ah,%edi + shrl $16,%edx + movzbl %bh,%ebp + movzbl (%r14,%r10,1),%r10d + movzbl (%r14,%r11,1),%r11d + movzbl (%r14,%r12,1),%r12d + movzbl (%r14,%r8,1),%r8d + + movzbl (%r14,%rsi,1),%r9d + movzbl %ch,%esi + movzbl (%r14,%rdi,1),%r13d + movzbl (%r14,%rbp,1),%ebp + movzbl (%r14,%rsi,1),%esi + + shrl $16,%ecx + shll $8,%r13d + shll $8,%r9d + movzbl %cl,%edi + shrl $16,%eax + xorl %r9d,%r10d + shrl $16,%ebx + movzbl %dl,%r9d + + shll $8,%ebp + xorl %r13d,%r11d + shll $8,%esi + movzbl %al,%r13d + movzbl (%r14,%rdi,1),%edi + xorl %ebp,%r12d + movzbl %bl,%ebp + + shll $16,%edi + xorl %esi,%r8d + movzbl (%r14,%r9,1),%r9d + movzbl %bh,%esi + movzbl (%r14,%rbp,1),%ebp + xorl %edi,%r10d + movzbl (%r14,%r13,1),%r13d + movzbl %ch,%edi + + shll $16,%ebp + shll $16,%r9d + shll $16,%r13d + xorl %ebp,%r8d + movzbl %dh,%ebp + xorl %r9d,%r11d + shrl $8,%eax + xorl %r13d,%r12d + + movzbl (%r14,%rsi,1),%esi + movzbl (%r14,%rdi,1),%ebx + movzbl (%r14,%rbp,1),%ecx + movzbl (%r14,%rax,1),%edx + + movl %r10d,%eax + shll $24,%esi + shll $24,%ebx + shll $24,%ecx + xorl %esi,%eax + shll $24,%edx + xorl %r11d,%ebx + xorl %r12d,%ecx + xorl %r8d,%edx + cmpq 16(%rsp),%r15 + je L$dec_compact_done + + movq 256+0(%r14),%rsi + shlq $32,%rbx + shlq $32,%rdx + movq 256+8(%r14),%rdi + orq %rbx,%rax + orq %rdx,%rcx + movq 256+16(%r14),%rbp + movq %rsi,%r9 + movq %rsi,%r12 + andq %rax,%r9 + andq %rcx,%r12 + movq %r9,%rbx + movq %r12,%rdx + shrq $7,%r9 + leaq (%rax,%rax,1),%r8 + shrq $7,%r12 + leaq (%rcx,%rcx,1),%r11 + subq %r9,%rbx + subq %r12,%rdx + andq %rdi,%r8 + andq %rdi,%r11 + andq %rbp,%rbx + andq %rbp,%rdx + xorq %rbx,%r8 + xorq %rdx,%r11 + movq %rsi,%r10 + movq %rsi,%r13 + + andq %r8,%r10 + andq %r11,%r13 + movq %r10,%rbx + movq %r13,%rdx + shrq $7,%r10 + leaq (%r8,%r8,1),%r9 + shrq $7,%r13 + leaq (%r11,%r11,1),%r12 + subq %r10,%rbx + subq %r13,%rdx + andq %rdi,%r9 + andq %rdi,%r12 + andq %rbp,%rbx + andq %rbp,%rdx + xorq %rbx,%r9 + xorq %rdx,%r12 + movq %rsi,%r10 + movq %rsi,%r13 + + andq %r9,%r10 + andq %r12,%r13 + movq %r10,%rbx + movq %r13,%rdx + shrq $7,%r10 + xorq %rax,%r8 + shrq $7,%r13 + xorq %rcx,%r11 + subq %r10,%rbx + subq %r13,%rdx + leaq (%r9,%r9,1),%r10 + leaq (%r12,%r12,1),%r13 + xorq %rax,%r9 + xorq %rcx,%r12 + andq %rdi,%r10 + andq %rdi,%r13 + andq %rbp,%rbx + andq %rbp,%rdx + xorq %rbx,%r10 + xorq %rdx,%r13 + + xorq %r10,%rax + xorq %r13,%rcx + xorq %r10,%r8 + xorq %r13,%r11 + movq %rax,%rbx + movq %rcx,%rdx + xorq %r10,%r9 + shrq $32,%rbx + xorq %r13,%r12 + shrq $32,%rdx + xorq %r8,%r10 + roll $8,%eax + xorq %r11,%r13 + roll $8,%ecx + xorq %r9,%r10 + roll $8,%ebx + xorq %r12,%r13 + + roll $8,%edx + xorl %r10d,%eax + shrq $32,%r10 + xorl %r13d,%ecx + shrq $32,%r13 + xorl %r10d,%ebx + xorl %r13d,%edx + + movq %r8,%r10 + roll $24,%r8d + movq %r11,%r13 + roll $24,%r11d + shrq $32,%r10 + xorl %r8d,%eax + shrq $32,%r13 + xorl %r11d,%ecx + roll $24,%r10d + movq %r9,%r8 + roll $24,%r13d + movq %r12,%r11 + shrq $32,%r8 + xorl %r10d,%ebx + shrq $32,%r11 + xorl %r13d,%edx + + movq 0(%r14),%rsi + roll $16,%r9d + movq 64(%r14),%rdi + roll $16,%r12d + movq 128(%r14),%rbp + roll $16,%r8d + movq 192(%r14),%r10 + xorl %r9d,%eax + roll $16,%r11d + xorl %r12d,%ecx + movq 256(%r14),%r13 + xorl %r8d,%ebx + xorl %r11d,%edx + jmp L$dec_loop_compact +.p2align 4 +L$dec_compact_done: + xorl 0(%r15),%eax + xorl 4(%r15),%ebx + xorl 8(%r15),%ecx + xorl 12(%r15),%edx +.byte 0xf3,0xc3 + +.globl _AES_decrypt + +.p2align 4 +.globl _asm_AES_decrypt +.private_extern _asm_AES_decrypt +_asm_AES_decrypt: +_AES_decrypt: + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + + + movq %rsp,%r10 + leaq -63(%rdx),%rcx + andq $-64,%rsp + subq %rsp,%rcx + negq %rcx + andq $960,%rcx + subq %rcx,%rsp + subq $32,%rsp + + movq %rsi,16(%rsp) + movq %r10,24(%rsp) +L$dec_prologue: + + movq %rdx,%r15 + movl 240(%r15),%r13d + + movl 0(%rdi),%eax + movl 4(%rdi),%ebx + movl 8(%rdi),%ecx + movl 12(%rdi),%edx + + shll $4,%r13d + leaq (%r15,%r13,1),%rbp + movq %r15,(%rsp) + movq %rbp,8(%rsp) + + + leaq L$AES_Td+2048(%rip),%r14 + leaq 768(%rsp),%rbp + subq %r14,%rbp + andq $768,%rbp + leaq (%r14,%rbp,1),%r14 + shrq $3,%rbp + addq %rbp,%r14 + + call _x86_64_AES_decrypt_compact + + movq 16(%rsp),%r9 + movq 24(%rsp),%rsi + movl %eax,0(%r9) + movl %ebx,4(%r9) + movl %ecx,8(%r9) + movl %edx,12(%r9) + + movq (%rsi),%r15 + movq 8(%rsi),%r14 + movq 16(%rsi),%r13 + movq 24(%rsi),%r12 + movq 32(%rsi),%rbp + movq 40(%rsi),%rbx + leaq 48(%rsi),%rsp +L$dec_epilogue: + .byte 0xf3,0xc3 + +.globl _private_AES_set_encrypt_key + +.p2align 4 +_private_AES_set_encrypt_key: + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + subq $8,%rsp +L$enc_key_prologue: + + call _x86_64_AES_set_encrypt_key + + movq 40(%rsp),%rbp + movq 48(%rsp),%rbx + addq $56,%rsp +L$enc_key_epilogue: + .byte 0xf3,0xc3 + + + +.p2align 4 +_x86_64_AES_set_encrypt_key: + movl %esi,%ecx + movq %rdi,%rsi + movq %rdx,%rdi + + testq $-1,%rsi + jz L$badpointer + testq $-1,%rdi + jz L$badpointer + + leaq L$AES_Te(%rip),%rbp + leaq 2048+128(%rbp),%rbp + + + movl 0-128(%rbp),%eax + movl 32-128(%rbp),%ebx + movl 64-128(%rbp),%r8d + movl 96-128(%rbp),%edx + movl 128-128(%rbp),%eax + movl 160-128(%rbp),%ebx + movl 192-128(%rbp),%r8d + movl 224-128(%rbp),%edx + + cmpl $128,%ecx + je L$10rounds + cmpl $192,%ecx + je L$12rounds + cmpl $256,%ecx + je L$14rounds + movq $-2,%rax + jmp L$exit + +L$10rounds: + movq 0(%rsi),%rax + movq 8(%rsi),%rdx + movq %rax,0(%rdi) + movq %rdx,8(%rdi) + + shrq $32,%rdx + xorl %ecx,%ecx + jmp L$10shortcut +.p2align 2 +L$10loop: + movl 0(%rdi),%eax + movl 12(%rdi),%edx +L$10shortcut: + movzbl %dl,%esi + movzbl -128(%rbp,%rsi,1),%ebx + movzbl %dh,%esi + shll $24,%ebx + xorl %ebx,%eax + + movzbl -128(%rbp,%rsi,1),%ebx + shrl $16,%edx + movzbl %dl,%esi + xorl %ebx,%eax + + movzbl -128(%rbp,%rsi,1),%ebx + movzbl %dh,%esi + shll $8,%ebx + xorl %ebx,%eax + + movzbl -128(%rbp,%rsi,1),%ebx + shll $16,%ebx + xorl %ebx,%eax + + xorl 1024-128(%rbp,%rcx,4),%eax + movl %eax,16(%rdi) + xorl 4(%rdi),%eax + movl %eax,20(%rdi) + xorl 8(%rdi),%eax + movl %eax,24(%rdi) + xorl 12(%rdi),%eax + movl %eax,28(%rdi) + addl $1,%ecx + leaq 16(%rdi),%rdi + cmpl $10,%ecx + jl L$10loop + + movl $10,80(%rdi) + xorq %rax,%rax + jmp L$exit + +L$12rounds: + movq 0(%rsi),%rax + movq 8(%rsi),%rbx + movq 16(%rsi),%rdx + movq %rax,0(%rdi) + movq %rbx,8(%rdi) + movq %rdx,16(%rdi) + + shrq $32,%rdx + xorl %ecx,%ecx + jmp L$12shortcut +.p2align 2 +L$12loop: + movl 0(%rdi),%eax + movl 20(%rdi),%edx +L$12shortcut: + movzbl %dl,%esi + movzbl -128(%rbp,%rsi,1),%ebx + movzbl %dh,%esi + shll $24,%ebx + xorl %ebx,%eax + + movzbl -128(%rbp,%rsi,1),%ebx + shrl $16,%edx + movzbl %dl,%esi + xorl %ebx,%eax + + movzbl -128(%rbp,%rsi,1),%ebx + movzbl %dh,%esi + shll $8,%ebx + xorl %ebx,%eax + + movzbl -128(%rbp,%rsi,1),%ebx + shll $16,%ebx + xorl %ebx,%eax + + xorl 1024-128(%rbp,%rcx,4),%eax + movl %eax,24(%rdi) + xorl 4(%rdi),%eax + movl %eax,28(%rdi) + xorl 8(%rdi),%eax + movl %eax,32(%rdi) + xorl 12(%rdi),%eax + movl %eax,36(%rdi) + + cmpl $7,%ecx + je L$12break + addl $1,%ecx + + xorl 16(%rdi),%eax + movl %eax,40(%rdi) + xorl 20(%rdi),%eax + movl %eax,44(%rdi) + + leaq 24(%rdi),%rdi + jmp L$12loop +L$12break: + movl $12,72(%rdi) + xorq %rax,%rax + jmp L$exit + +L$14rounds: + movq 0(%rsi),%rax + movq 8(%rsi),%rbx + movq 16(%rsi),%rcx + movq 24(%rsi),%rdx + movq %rax,0(%rdi) + movq %rbx,8(%rdi) + movq %rcx,16(%rdi) + movq %rdx,24(%rdi) + + shrq $32,%rdx + xorl %ecx,%ecx + jmp L$14shortcut +.p2align 2 +L$14loop: + movl 0(%rdi),%eax + movl 28(%rdi),%edx +L$14shortcut: + movzbl %dl,%esi + movzbl -128(%rbp,%rsi,1),%ebx + movzbl %dh,%esi + shll $24,%ebx + xorl %ebx,%eax + + movzbl -128(%rbp,%rsi,1),%ebx + shrl $16,%edx + movzbl %dl,%esi + xorl %ebx,%eax + + movzbl -128(%rbp,%rsi,1),%ebx + movzbl %dh,%esi + shll $8,%ebx + xorl %ebx,%eax + + movzbl -128(%rbp,%rsi,1),%ebx + shll $16,%ebx + xorl %ebx,%eax + + xorl 1024-128(%rbp,%rcx,4),%eax + movl %eax,32(%rdi) + xorl 4(%rdi),%eax + movl %eax,36(%rdi) + xorl 8(%rdi),%eax + movl %eax,40(%rdi) + xorl 12(%rdi),%eax + movl %eax,44(%rdi) + + cmpl $6,%ecx + je L$14break + addl $1,%ecx + + movl %eax,%edx + movl 16(%rdi),%eax + movzbl %dl,%esi + movzbl -128(%rbp,%rsi,1),%ebx + movzbl %dh,%esi + xorl %ebx,%eax + + movzbl -128(%rbp,%rsi,1),%ebx + shrl $16,%edx + shll $8,%ebx + movzbl %dl,%esi + xorl %ebx,%eax + + movzbl -128(%rbp,%rsi,1),%ebx + movzbl %dh,%esi + shll $16,%ebx + xorl %ebx,%eax + + movzbl -128(%rbp,%rsi,1),%ebx + shll $24,%ebx + xorl %ebx,%eax + + movl %eax,48(%rdi) + xorl 20(%rdi),%eax + movl %eax,52(%rdi) + xorl 24(%rdi),%eax + movl %eax,56(%rdi) + xorl 28(%rdi),%eax + movl %eax,60(%rdi) + + leaq 32(%rdi),%rdi + jmp L$14loop +L$14break: + movl $14,48(%rdi) + xorq %rax,%rax + jmp L$exit + +L$badpointer: + movq $-1,%rax +L$exit: +.byte 0xf3,0xc3 + +.globl _private_AES_set_decrypt_key + +.p2align 4 +_private_AES_set_decrypt_key: + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + pushq %rdx +L$dec_key_prologue: + + call _x86_64_AES_set_encrypt_key + movq (%rsp),%r8 + cmpl $0,%eax + jne L$abort + + movl 240(%r8),%r14d + xorq %rdi,%rdi + leaq (%rdi,%r14,4),%rcx + movq %r8,%rsi + leaq (%r8,%rcx,4),%rdi +.p2align 2 +L$invert: + movq 0(%rsi),%rax + movq 8(%rsi),%rbx + movq 0(%rdi),%rcx + movq 8(%rdi),%rdx + movq %rax,0(%rdi) + movq %rbx,8(%rdi) + movq %rcx,0(%rsi) + movq %rdx,8(%rsi) + leaq 16(%rsi),%rsi + leaq -16(%rdi),%rdi + cmpq %rsi,%rdi + jne L$invert + + leaq L$AES_Te+2048+1024(%rip),%rax + + movq 40(%rax),%rsi + movq 48(%rax),%rdi + movq 56(%rax),%rbp + + movq %r8,%r15 + subl $1,%r14d +.p2align 2 +L$permute: + leaq 16(%r15),%r15 + movq 0(%r15),%rax + movq 8(%r15),%rcx + movq %rsi,%r9 + movq %rsi,%r12 + andq %rax,%r9 + andq %rcx,%r12 + movq %r9,%rbx + movq %r12,%rdx + shrq $7,%r9 + leaq (%rax,%rax,1),%r8 + shrq $7,%r12 + leaq (%rcx,%rcx,1),%r11 + subq %r9,%rbx + subq %r12,%rdx + andq %rdi,%r8 + andq %rdi,%r11 + andq %rbp,%rbx + andq %rbp,%rdx + xorq %rbx,%r8 + xorq %rdx,%r11 + movq %rsi,%r10 + movq %rsi,%r13 + + andq %r8,%r10 + andq %r11,%r13 + movq %r10,%rbx + movq %r13,%rdx + shrq $7,%r10 + leaq (%r8,%r8,1),%r9 + shrq $7,%r13 + leaq (%r11,%r11,1),%r12 + subq %r10,%rbx + subq %r13,%rdx + andq %rdi,%r9 + andq %rdi,%r12 + andq %rbp,%rbx + andq %rbp,%rdx + xorq %rbx,%r9 + xorq %rdx,%r12 + movq %rsi,%r10 + movq %rsi,%r13 + + andq %r9,%r10 + andq %r12,%r13 + movq %r10,%rbx + movq %r13,%rdx + shrq $7,%r10 + xorq %rax,%r8 + shrq $7,%r13 + xorq %rcx,%r11 + subq %r10,%rbx + subq %r13,%rdx + leaq (%r9,%r9,1),%r10 + leaq (%r12,%r12,1),%r13 + xorq %rax,%r9 + xorq %rcx,%r12 + andq %rdi,%r10 + andq %rdi,%r13 + andq %rbp,%rbx + andq %rbp,%rdx + xorq %rbx,%r10 + xorq %rdx,%r13 + + xorq %r10,%rax + xorq %r13,%rcx + xorq %r10,%r8 + xorq %r13,%r11 + movq %rax,%rbx + movq %rcx,%rdx + xorq %r10,%r9 + shrq $32,%rbx + xorq %r13,%r12 + shrq $32,%rdx + xorq %r8,%r10 + roll $8,%eax + xorq %r11,%r13 + roll $8,%ecx + xorq %r9,%r10 + roll $8,%ebx + xorq %r12,%r13 + + roll $8,%edx + xorl %r10d,%eax + shrq $32,%r10 + xorl %r13d,%ecx + shrq $32,%r13 + xorl %r10d,%ebx + xorl %r13d,%edx + + movq %r8,%r10 + roll $24,%r8d + movq %r11,%r13 + roll $24,%r11d + shrq $32,%r10 + xorl %r8d,%eax + shrq $32,%r13 + xorl %r11d,%ecx + roll $24,%r10d + movq %r9,%r8 + roll $24,%r13d + movq %r12,%r11 + shrq $32,%r8 + xorl %r10d,%ebx + shrq $32,%r11 + xorl %r13d,%edx + + + roll $16,%r9d + + roll $16,%r12d + + roll $16,%r8d + + xorl %r9d,%eax + roll $16,%r11d + xorl %r12d,%ecx + + xorl %r8d,%ebx + xorl %r11d,%edx + movl %eax,0(%r15) + movl %ebx,4(%r15) + movl %ecx,8(%r15) + movl %edx,12(%r15) + subl $1,%r14d + jnz L$permute + + xorq %rax,%rax +L$abort: + movq 8(%rsp),%r15 + movq 16(%rsp),%r14 + movq 24(%rsp),%r13 + movq 32(%rsp),%r12 + movq 40(%rsp),%rbp + movq 48(%rsp),%rbx + addq $56,%rsp +L$dec_key_epilogue: + .byte 0xf3,0xc3 + +.globl _AES_cbc_encrypt + +.p2align 4 + +.globl _asm_AES_cbc_encrypt +.private_extern _asm_AES_cbc_encrypt +_asm_AES_cbc_encrypt: +_AES_cbc_encrypt: + cmpq $0,%rdx + je L$cbc_epilogue + pushfq + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 +L$cbc_prologue: + + cld + movl %r9d,%r9d + + leaq L$AES_Te(%rip),%r14 + cmpq $0,%r9 + jne L$cbc_picked_te + leaq L$AES_Td(%rip),%r14 +L$cbc_picked_te: + + movl _OPENSSL_ia32cap_P(%rip),%r10d + cmpq $512,%rdx + jb L$cbc_slow_prologue + testq $15,%rdx + jnz L$cbc_slow_prologue + btl $28,%r10d + jc L$cbc_slow_prologue + + + leaq -88-248(%rsp),%r15 + andq $-64,%r15 + + + movq %r14,%r10 + leaq 2304(%r14),%r11 + movq %r15,%r12 + andq $4095,%r10 + andq $4095,%r11 + andq $4095,%r12 + + cmpq %r11,%r12 + jb L$cbc_te_break_out + subq %r11,%r12 + subq %r12,%r15 + jmp L$cbc_te_ok +L$cbc_te_break_out: + subq %r10,%r12 + andq $4095,%r12 + addq $320,%r12 + subq %r12,%r15 +.p2align 2 +L$cbc_te_ok: + + xchgq %rsp,%r15 + + movq %r15,16(%rsp) +L$cbc_fast_body: + movq %rdi,24(%rsp) + movq %rsi,32(%rsp) + movq %rdx,40(%rsp) + movq %rcx,48(%rsp) + movq %r8,56(%rsp) + movl $0,80+240(%rsp) + movq %r8,%rbp + movq %r9,%rbx + movq %rsi,%r9 + movq %rdi,%r8 + movq %rcx,%r15 + + movl 240(%r15),%eax + + movq %r15,%r10 + subq %r14,%r10 + andq $4095,%r10 + cmpq $2304,%r10 + jb L$cbc_do_ecopy + cmpq $4096-248,%r10 + jb L$cbc_skip_ecopy +.p2align 2 +L$cbc_do_ecopy: + movq %r15,%rsi + leaq 80(%rsp),%rdi + leaq 80(%rsp),%r15 + movl $30,%ecx +.long 0x90A548F3 + movl %eax,(%rdi) +L$cbc_skip_ecopy: + movq %r15,0(%rsp) + + movl $18,%ecx +.p2align 2 +L$cbc_prefetch_te: + movq 0(%r14),%r10 + movq 32(%r14),%r11 + movq 64(%r14),%r12 + movq 96(%r14),%r13 + leaq 128(%r14),%r14 + subl $1,%ecx + jnz L$cbc_prefetch_te + leaq -2304(%r14),%r14 + + cmpq $0,%rbx + je L$FAST_DECRYPT + + + movl 0(%rbp),%eax + movl 4(%rbp),%ebx + movl 8(%rbp),%ecx + movl 12(%rbp),%edx + +.p2align 2 +L$cbc_fast_enc_loop: + xorl 0(%r8),%eax + xorl 4(%r8),%ebx + xorl 8(%r8),%ecx + xorl 12(%r8),%edx + movq 0(%rsp),%r15 + movq %r8,24(%rsp) + + call _x86_64_AES_encrypt + + movq 24(%rsp),%r8 + movq 40(%rsp),%r10 + movl %eax,0(%r9) + movl %ebx,4(%r9) + movl %ecx,8(%r9) + movl %edx,12(%r9) + + leaq 16(%r8),%r8 + leaq 16(%r9),%r9 + subq $16,%r10 + testq $-16,%r10 + movq %r10,40(%rsp) + jnz L$cbc_fast_enc_loop + movq 56(%rsp),%rbp + movl %eax,0(%rbp) + movl %ebx,4(%rbp) + movl %ecx,8(%rbp) + movl %edx,12(%rbp) + + jmp L$cbc_fast_cleanup + + +.p2align 4 +L$FAST_DECRYPT: + cmpq %r8,%r9 + je L$cbc_fast_dec_in_place + + movq %rbp,64(%rsp) +.p2align 2 +L$cbc_fast_dec_loop: + movl 0(%r8),%eax + movl 4(%r8),%ebx + movl 8(%r8),%ecx + movl 12(%r8),%edx + movq 0(%rsp),%r15 + movq %r8,24(%rsp) + + call _x86_64_AES_decrypt + + movq 64(%rsp),%rbp + movq 24(%rsp),%r8 + movq 40(%rsp),%r10 + xorl 0(%rbp),%eax + xorl 4(%rbp),%ebx + xorl 8(%rbp),%ecx + xorl 12(%rbp),%edx + movq %r8,%rbp + + subq $16,%r10 + movq %r10,40(%rsp) + movq %rbp,64(%rsp) + + movl %eax,0(%r9) + movl %ebx,4(%r9) + movl %ecx,8(%r9) + movl %edx,12(%r9) + + leaq 16(%r8),%r8 + leaq 16(%r9),%r9 + jnz L$cbc_fast_dec_loop + movq 56(%rsp),%r12 + movq 0(%rbp),%r10 + movq 8(%rbp),%r11 + movq %r10,0(%r12) + movq %r11,8(%r12) + jmp L$cbc_fast_cleanup + +.p2align 4 +L$cbc_fast_dec_in_place: + movq 0(%rbp),%r10 + movq 8(%rbp),%r11 + movq %r10,0+64(%rsp) + movq %r11,8+64(%rsp) +.p2align 2 +L$cbc_fast_dec_in_place_loop: + movl 0(%r8),%eax + movl 4(%r8),%ebx + movl 8(%r8),%ecx + movl 12(%r8),%edx + movq 0(%rsp),%r15 + movq %r8,24(%rsp) + + call _x86_64_AES_decrypt + + movq 24(%rsp),%r8 + movq 40(%rsp),%r10 + xorl 0+64(%rsp),%eax + xorl 4+64(%rsp),%ebx + xorl 8+64(%rsp),%ecx + xorl 12+64(%rsp),%edx + + movq 0(%r8),%r11 + movq 8(%r8),%r12 + subq $16,%r10 + jz L$cbc_fast_dec_in_place_done + + movq %r11,0+64(%rsp) + movq %r12,8+64(%rsp) + + movl %eax,0(%r9) + movl %ebx,4(%r9) + movl %ecx,8(%r9) + movl %edx,12(%r9) + + leaq 16(%r8),%r8 + leaq 16(%r9),%r9 + movq %r10,40(%rsp) + jmp L$cbc_fast_dec_in_place_loop +L$cbc_fast_dec_in_place_done: + movq 56(%rsp),%rdi + movq %r11,0(%rdi) + movq %r12,8(%rdi) + + movl %eax,0(%r9) + movl %ebx,4(%r9) + movl %ecx,8(%r9) + movl %edx,12(%r9) + +.p2align 2 +L$cbc_fast_cleanup: + cmpl $0,80+240(%rsp) + leaq 80(%rsp),%rdi + je L$cbc_exit + movl $30,%ecx + xorq %rax,%rax +.long 0x90AB48F3 + + jmp L$cbc_exit + + +.p2align 4 +L$cbc_slow_prologue: + + leaq -88(%rsp),%rbp + andq $-64,%rbp + + leaq -88-63(%rcx),%r10 + subq %rbp,%r10 + negq %r10 + andq $960,%r10 + subq %r10,%rbp + + xchgq %rsp,%rbp + + movq %rbp,16(%rsp) +L$cbc_slow_body: + + + + + movq %r8,56(%rsp) + movq %r8,%rbp + movq %r9,%rbx + movq %rsi,%r9 + movq %rdi,%r8 + movq %rcx,%r15 + movq %rdx,%r10 + + movl 240(%r15),%eax + movq %r15,0(%rsp) + shll $4,%eax + leaq (%r15,%rax,1),%rax + movq %rax,8(%rsp) + + + leaq 2048(%r14),%r14 + leaq 768-8(%rsp),%rax + subq %r14,%rax + andq $768,%rax + leaq (%r14,%rax,1),%r14 + + cmpq $0,%rbx + je L$SLOW_DECRYPT + + + testq $-16,%r10 + movl 0(%rbp),%eax + movl 4(%rbp),%ebx + movl 8(%rbp),%ecx + movl 12(%rbp),%edx + jz L$cbc_slow_enc_tail + +.p2align 2 +L$cbc_slow_enc_loop: + xorl 0(%r8),%eax + xorl 4(%r8),%ebx + xorl 8(%r8),%ecx + xorl 12(%r8),%edx + movq 0(%rsp),%r15 + movq %r8,24(%rsp) + movq %r9,32(%rsp) + movq %r10,40(%rsp) + + call _x86_64_AES_encrypt_compact + + movq 24(%rsp),%r8 + movq 32(%rsp),%r9 + movq 40(%rsp),%r10 + movl %eax,0(%r9) + movl %ebx,4(%r9) + movl %ecx,8(%r9) + movl %edx,12(%r9) + + leaq 16(%r8),%r8 + leaq 16(%r9),%r9 + subq $16,%r10 + testq $-16,%r10 + jnz L$cbc_slow_enc_loop + testq $15,%r10 + jnz L$cbc_slow_enc_tail + movq 56(%rsp),%rbp + movl %eax,0(%rbp) + movl %ebx,4(%rbp) + movl %ecx,8(%rbp) + movl %edx,12(%rbp) + + jmp L$cbc_exit + +.p2align 2 +L$cbc_slow_enc_tail: + movq %rax,%r11 + movq %rcx,%r12 + movq %r10,%rcx + movq %r8,%rsi + movq %r9,%rdi +.long 0x9066A4F3 + movq $16,%rcx + subq %r10,%rcx + xorq %rax,%rax +.long 0x9066AAF3 + movq %r9,%r8 + movq $16,%r10 + movq %r11,%rax + movq %r12,%rcx + jmp L$cbc_slow_enc_loop + +.p2align 4 +L$SLOW_DECRYPT: + shrq $3,%rax + addq %rax,%r14 + + movq 0(%rbp),%r11 + movq 8(%rbp),%r12 + movq %r11,0+64(%rsp) + movq %r12,8+64(%rsp) + +.p2align 2 +L$cbc_slow_dec_loop: + movl 0(%r8),%eax + movl 4(%r8),%ebx + movl 8(%r8),%ecx + movl 12(%r8),%edx + movq 0(%rsp),%r15 + movq %r8,24(%rsp) + movq %r9,32(%rsp) + movq %r10,40(%rsp) + + call _x86_64_AES_decrypt_compact + + movq 24(%rsp),%r8 + movq 32(%rsp),%r9 + movq 40(%rsp),%r10 + xorl 0+64(%rsp),%eax + xorl 4+64(%rsp),%ebx + xorl 8+64(%rsp),%ecx + xorl 12+64(%rsp),%edx + + movq 0(%r8),%r11 + movq 8(%r8),%r12 + subq $16,%r10 + jc L$cbc_slow_dec_partial + jz L$cbc_slow_dec_done + + movq %r11,0+64(%rsp) + movq %r12,8+64(%rsp) + + movl %eax,0(%r9) + movl %ebx,4(%r9) + movl %ecx,8(%r9) + movl %edx,12(%r9) + + leaq 16(%r8),%r8 + leaq 16(%r9),%r9 + jmp L$cbc_slow_dec_loop +L$cbc_slow_dec_done: + movq 56(%rsp),%rdi + movq %r11,0(%rdi) + movq %r12,8(%rdi) + + movl %eax,0(%r9) + movl %ebx,4(%r9) + movl %ecx,8(%r9) + movl %edx,12(%r9) + + jmp L$cbc_exit + +.p2align 2 +L$cbc_slow_dec_partial: + movq 56(%rsp),%rdi + movq %r11,0(%rdi) + movq %r12,8(%rdi) + + movl %eax,0+64(%rsp) + movl %ebx,4+64(%rsp) + movl %ecx,8+64(%rsp) + movl %edx,12+64(%rsp) + + movq %r9,%rdi + leaq 64(%rsp),%rsi + leaq 16(%r10),%rcx +.long 0x9066A4F3 + jmp L$cbc_exit + +.p2align 4 +L$cbc_exit: + movq 16(%rsp),%rsi + movq (%rsi),%r15 + movq 8(%rsi),%r14 + movq 16(%rsi),%r13 + movq 24(%rsi),%r12 + movq 32(%rsi),%rbp + movq 40(%rsi),%rbx + leaq 48(%rsi),%rsp +L$cbc_popfq: + popfq +L$cbc_epilogue: + .byte 0xf3,0xc3 + +.p2align 6 +L$AES_Te: +.long 0xa56363c6,0xa56363c6 +.long 0x847c7cf8,0x847c7cf8 +.long 0x997777ee,0x997777ee +.long 0x8d7b7bf6,0x8d7b7bf6 +.long 0x0df2f2ff,0x0df2f2ff +.long 0xbd6b6bd6,0xbd6b6bd6 +.long 0xb16f6fde,0xb16f6fde +.long 0x54c5c591,0x54c5c591 +.long 0x50303060,0x50303060 +.long 0x03010102,0x03010102 +.long 0xa96767ce,0xa96767ce +.long 0x7d2b2b56,0x7d2b2b56 +.long 0x19fefee7,0x19fefee7 +.long 0x62d7d7b5,0x62d7d7b5 +.long 0xe6abab4d,0xe6abab4d +.long 0x9a7676ec,0x9a7676ec +.long 0x45caca8f,0x45caca8f +.long 0x9d82821f,0x9d82821f +.long 0x40c9c989,0x40c9c989 +.long 0x877d7dfa,0x877d7dfa +.long 0x15fafaef,0x15fafaef +.long 0xeb5959b2,0xeb5959b2 +.long 0xc947478e,0xc947478e +.long 0x0bf0f0fb,0x0bf0f0fb +.long 0xecadad41,0xecadad41 +.long 0x67d4d4b3,0x67d4d4b3 +.long 0xfda2a25f,0xfda2a25f +.long 0xeaafaf45,0xeaafaf45 +.long 0xbf9c9c23,0xbf9c9c23 +.long 0xf7a4a453,0xf7a4a453 +.long 0x967272e4,0x967272e4 +.long 0x5bc0c09b,0x5bc0c09b +.long 0xc2b7b775,0xc2b7b775 +.long 0x1cfdfde1,0x1cfdfde1 +.long 0xae93933d,0xae93933d +.long 0x6a26264c,0x6a26264c +.long 0x5a36366c,0x5a36366c +.long 0x413f3f7e,0x413f3f7e +.long 0x02f7f7f5,0x02f7f7f5 +.long 0x4fcccc83,0x4fcccc83 +.long 0x5c343468,0x5c343468 +.long 0xf4a5a551,0xf4a5a551 +.long 0x34e5e5d1,0x34e5e5d1 +.long 0x08f1f1f9,0x08f1f1f9 +.long 0x937171e2,0x937171e2 +.long 0x73d8d8ab,0x73d8d8ab +.long 0x53313162,0x53313162 +.long 0x3f15152a,0x3f15152a +.long 0x0c040408,0x0c040408 +.long 0x52c7c795,0x52c7c795 +.long 0x65232346,0x65232346 +.long 0x5ec3c39d,0x5ec3c39d +.long 0x28181830,0x28181830 +.long 0xa1969637,0xa1969637 +.long 0x0f05050a,0x0f05050a +.long 0xb59a9a2f,0xb59a9a2f +.long 0x0907070e,0x0907070e +.long 0x36121224,0x36121224 +.long 0x9b80801b,0x9b80801b +.long 0x3de2e2df,0x3de2e2df +.long 0x26ebebcd,0x26ebebcd +.long 0x6927274e,0x6927274e +.long 0xcdb2b27f,0xcdb2b27f +.long 0x9f7575ea,0x9f7575ea +.long 0x1b090912,0x1b090912 +.long 0x9e83831d,0x9e83831d +.long 0x742c2c58,0x742c2c58 +.long 0x2e1a1a34,0x2e1a1a34 +.long 0x2d1b1b36,0x2d1b1b36 +.long 0xb26e6edc,0xb26e6edc +.long 0xee5a5ab4,0xee5a5ab4 +.long 0xfba0a05b,0xfba0a05b +.long 0xf65252a4,0xf65252a4 +.long 0x4d3b3b76,0x4d3b3b76 +.long 0x61d6d6b7,0x61d6d6b7 +.long 0xceb3b37d,0xceb3b37d +.long 0x7b292952,0x7b292952 +.long 0x3ee3e3dd,0x3ee3e3dd +.long 0x712f2f5e,0x712f2f5e +.long 0x97848413,0x97848413 +.long 0xf55353a6,0xf55353a6 +.long 0x68d1d1b9,0x68d1d1b9 +.long 0x00000000,0x00000000 +.long 0x2cededc1,0x2cededc1 +.long 0x60202040,0x60202040 +.long 0x1ffcfce3,0x1ffcfce3 +.long 0xc8b1b179,0xc8b1b179 +.long 0xed5b5bb6,0xed5b5bb6 +.long 0xbe6a6ad4,0xbe6a6ad4 +.long 0x46cbcb8d,0x46cbcb8d +.long 0xd9bebe67,0xd9bebe67 +.long 0x4b393972,0x4b393972 +.long 0xde4a4a94,0xde4a4a94 +.long 0xd44c4c98,0xd44c4c98 +.long 0xe85858b0,0xe85858b0 +.long 0x4acfcf85,0x4acfcf85 +.long 0x6bd0d0bb,0x6bd0d0bb +.long 0x2aefefc5,0x2aefefc5 +.long 0xe5aaaa4f,0xe5aaaa4f +.long 0x16fbfbed,0x16fbfbed +.long 0xc5434386,0xc5434386 +.long 0xd74d4d9a,0xd74d4d9a +.long 0x55333366,0x55333366 +.long 0x94858511,0x94858511 +.long 0xcf45458a,0xcf45458a +.long 0x10f9f9e9,0x10f9f9e9 +.long 0x06020204,0x06020204 +.long 0x817f7ffe,0x817f7ffe +.long 0xf05050a0,0xf05050a0 +.long 0x443c3c78,0x443c3c78 +.long 0xba9f9f25,0xba9f9f25 +.long 0xe3a8a84b,0xe3a8a84b +.long 0xf35151a2,0xf35151a2 +.long 0xfea3a35d,0xfea3a35d +.long 0xc0404080,0xc0404080 +.long 0x8a8f8f05,0x8a8f8f05 +.long 0xad92923f,0xad92923f +.long 0xbc9d9d21,0xbc9d9d21 +.long 0x48383870,0x48383870 +.long 0x04f5f5f1,0x04f5f5f1 +.long 0xdfbcbc63,0xdfbcbc63 +.long 0xc1b6b677,0xc1b6b677 +.long 0x75dadaaf,0x75dadaaf +.long 0x63212142,0x63212142 +.long 0x30101020,0x30101020 +.long 0x1affffe5,0x1affffe5 +.long 0x0ef3f3fd,0x0ef3f3fd +.long 0x6dd2d2bf,0x6dd2d2bf +.long 0x4ccdcd81,0x4ccdcd81 +.long 0x140c0c18,0x140c0c18 +.long 0x35131326,0x35131326 +.long 0x2fececc3,0x2fececc3 +.long 0xe15f5fbe,0xe15f5fbe +.long 0xa2979735,0xa2979735 +.long 0xcc444488,0xcc444488 +.long 0x3917172e,0x3917172e +.long 0x57c4c493,0x57c4c493 +.long 0xf2a7a755,0xf2a7a755 +.long 0x827e7efc,0x827e7efc +.long 0x473d3d7a,0x473d3d7a +.long 0xac6464c8,0xac6464c8 +.long 0xe75d5dba,0xe75d5dba +.long 0x2b191932,0x2b191932 +.long 0x957373e6,0x957373e6 +.long 0xa06060c0,0xa06060c0 +.long 0x98818119,0x98818119 +.long 0xd14f4f9e,0xd14f4f9e +.long 0x7fdcdca3,0x7fdcdca3 +.long 0x66222244,0x66222244 +.long 0x7e2a2a54,0x7e2a2a54 +.long 0xab90903b,0xab90903b +.long 0x8388880b,0x8388880b +.long 0xca46468c,0xca46468c +.long 0x29eeeec7,0x29eeeec7 +.long 0xd3b8b86b,0xd3b8b86b +.long 0x3c141428,0x3c141428 +.long 0x79dedea7,0x79dedea7 +.long 0xe25e5ebc,0xe25e5ebc +.long 0x1d0b0b16,0x1d0b0b16 +.long 0x76dbdbad,0x76dbdbad +.long 0x3be0e0db,0x3be0e0db +.long 0x56323264,0x56323264 +.long 0x4e3a3a74,0x4e3a3a74 +.long 0x1e0a0a14,0x1e0a0a14 +.long 0xdb494992,0xdb494992 +.long 0x0a06060c,0x0a06060c +.long 0x6c242448,0x6c242448 +.long 0xe45c5cb8,0xe45c5cb8 +.long 0x5dc2c29f,0x5dc2c29f +.long 0x6ed3d3bd,0x6ed3d3bd +.long 0xefacac43,0xefacac43 +.long 0xa66262c4,0xa66262c4 +.long 0xa8919139,0xa8919139 +.long 0xa4959531,0xa4959531 +.long 0x37e4e4d3,0x37e4e4d3 +.long 0x8b7979f2,0x8b7979f2 +.long 0x32e7e7d5,0x32e7e7d5 +.long 0x43c8c88b,0x43c8c88b +.long 0x5937376e,0x5937376e +.long 0xb76d6dda,0xb76d6dda +.long 0x8c8d8d01,0x8c8d8d01 +.long 0x64d5d5b1,0x64d5d5b1 +.long 0xd24e4e9c,0xd24e4e9c +.long 0xe0a9a949,0xe0a9a949 +.long 0xb46c6cd8,0xb46c6cd8 +.long 0xfa5656ac,0xfa5656ac +.long 0x07f4f4f3,0x07f4f4f3 +.long 0x25eaeacf,0x25eaeacf +.long 0xaf6565ca,0xaf6565ca +.long 0x8e7a7af4,0x8e7a7af4 +.long 0xe9aeae47,0xe9aeae47 +.long 0x18080810,0x18080810 +.long 0xd5baba6f,0xd5baba6f +.long 0x887878f0,0x887878f0 +.long 0x6f25254a,0x6f25254a +.long 0x722e2e5c,0x722e2e5c +.long 0x241c1c38,0x241c1c38 +.long 0xf1a6a657,0xf1a6a657 +.long 0xc7b4b473,0xc7b4b473 +.long 0x51c6c697,0x51c6c697 +.long 0x23e8e8cb,0x23e8e8cb +.long 0x7cdddda1,0x7cdddda1 +.long 0x9c7474e8,0x9c7474e8 +.long 0x211f1f3e,0x211f1f3e +.long 0xdd4b4b96,0xdd4b4b96 +.long 0xdcbdbd61,0xdcbdbd61 +.long 0x868b8b0d,0x868b8b0d +.long 0x858a8a0f,0x858a8a0f +.long 0x907070e0,0x907070e0 +.long 0x423e3e7c,0x423e3e7c +.long 0xc4b5b571,0xc4b5b571 +.long 0xaa6666cc,0xaa6666cc +.long 0xd8484890,0xd8484890 +.long 0x05030306,0x05030306 +.long 0x01f6f6f7,0x01f6f6f7 +.long 0x120e0e1c,0x120e0e1c +.long 0xa36161c2,0xa36161c2 +.long 0x5f35356a,0x5f35356a +.long 0xf95757ae,0xf95757ae +.long 0xd0b9b969,0xd0b9b969 +.long 0x91868617,0x91868617 +.long 0x58c1c199,0x58c1c199 +.long 0x271d1d3a,0x271d1d3a +.long 0xb99e9e27,0xb99e9e27 +.long 0x38e1e1d9,0x38e1e1d9 +.long 0x13f8f8eb,0x13f8f8eb +.long 0xb398982b,0xb398982b +.long 0x33111122,0x33111122 +.long 0xbb6969d2,0xbb6969d2 +.long 0x70d9d9a9,0x70d9d9a9 +.long 0x898e8e07,0x898e8e07 +.long 0xa7949433,0xa7949433 +.long 0xb69b9b2d,0xb69b9b2d +.long 0x221e1e3c,0x221e1e3c +.long 0x92878715,0x92878715 +.long 0x20e9e9c9,0x20e9e9c9 +.long 0x49cece87,0x49cece87 +.long 0xff5555aa,0xff5555aa +.long 0x78282850,0x78282850 +.long 0x7adfdfa5,0x7adfdfa5 +.long 0x8f8c8c03,0x8f8c8c03 +.long 0xf8a1a159,0xf8a1a159 +.long 0x80898909,0x80898909 +.long 0x170d0d1a,0x170d0d1a +.long 0xdabfbf65,0xdabfbf65 +.long 0x31e6e6d7,0x31e6e6d7 +.long 0xc6424284,0xc6424284 +.long 0xb86868d0,0xb86868d0 +.long 0xc3414182,0xc3414182 +.long 0xb0999929,0xb0999929 +.long 0x772d2d5a,0x772d2d5a +.long 0x110f0f1e,0x110f0f1e +.long 0xcbb0b07b,0xcbb0b07b +.long 0xfc5454a8,0xfc5454a8 +.long 0xd6bbbb6d,0xd6bbbb6d +.long 0x3a16162c,0x3a16162c +.byte 0x63,0x7c,0x77,0x7b,0xf2,0x6b,0x6f,0xc5 +.byte 0x30,0x01,0x67,0x2b,0xfe,0xd7,0xab,0x76 +.byte 0xca,0x82,0xc9,0x7d,0xfa,0x59,0x47,0xf0 +.byte 0xad,0xd4,0xa2,0xaf,0x9c,0xa4,0x72,0xc0 +.byte 0xb7,0xfd,0x93,0x26,0x36,0x3f,0xf7,0xcc +.byte 0x34,0xa5,0xe5,0xf1,0x71,0xd8,0x31,0x15 +.byte 0x04,0xc7,0x23,0xc3,0x18,0x96,0x05,0x9a +.byte 0x07,0x12,0x80,0xe2,0xeb,0x27,0xb2,0x75 +.byte 0x09,0x83,0x2c,0x1a,0x1b,0x6e,0x5a,0xa0 +.byte 0x52,0x3b,0xd6,0xb3,0x29,0xe3,0x2f,0x84 +.byte 0x53,0xd1,0x00,0xed,0x20,0xfc,0xb1,0x5b +.byte 0x6a,0xcb,0xbe,0x39,0x4a,0x4c,0x58,0xcf +.byte 0xd0,0xef,0xaa,0xfb,0x43,0x4d,0x33,0x85 +.byte 0x45,0xf9,0x02,0x7f,0x50,0x3c,0x9f,0xa8 +.byte 0x51,0xa3,0x40,0x8f,0x92,0x9d,0x38,0xf5 +.byte 0xbc,0xb6,0xda,0x21,0x10,0xff,0xf3,0xd2 +.byte 0xcd,0x0c,0x13,0xec,0x5f,0x97,0x44,0x17 +.byte 0xc4,0xa7,0x7e,0x3d,0x64,0x5d,0x19,0x73 +.byte 0x60,0x81,0x4f,0xdc,0x22,0x2a,0x90,0x88 +.byte 0x46,0xee,0xb8,0x14,0xde,0x5e,0x0b,0xdb +.byte 0xe0,0x32,0x3a,0x0a,0x49,0x06,0x24,0x5c +.byte 0xc2,0xd3,0xac,0x62,0x91,0x95,0xe4,0x79 +.byte 0xe7,0xc8,0x37,0x6d,0x8d,0xd5,0x4e,0xa9 +.byte 0x6c,0x56,0xf4,0xea,0x65,0x7a,0xae,0x08 +.byte 0xba,0x78,0x25,0x2e,0x1c,0xa6,0xb4,0xc6 +.byte 0xe8,0xdd,0x74,0x1f,0x4b,0xbd,0x8b,0x8a +.byte 0x70,0x3e,0xb5,0x66,0x48,0x03,0xf6,0x0e +.byte 0x61,0x35,0x57,0xb9,0x86,0xc1,0x1d,0x9e +.byte 0xe1,0xf8,0x98,0x11,0x69,0xd9,0x8e,0x94 +.byte 0x9b,0x1e,0x87,0xe9,0xce,0x55,0x28,0xdf +.byte 0x8c,0xa1,0x89,0x0d,0xbf,0xe6,0x42,0x68 +.byte 0x41,0x99,0x2d,0x0f,0xb0,0x54,0xbb,0x16 +.byte 0x63,0x7c,0x77,0x7b,0xf2,0x6b,0x6f,0xc5 +.byte 0x30,0x01,0x67,0x2b,0xfe,0xd7,0xab,0x76 +.byte 0xca,0x82,0xc9,0x7d,0xfa,0x59,0x47,0xf0 +.byte 0xad,0xd4,0xa2,0xaf,0x9c,0xa4,0x72,0xc0 +.byte 0xb7,0xfd,0x93,0x26,0x36,0x3f,0xf7,0xcc +.byte 0x34,0xa5,0xe5,0xf1,0x71,0xd8,0x31,0x15 +.byte 0x04,0xc7,0x23,0xc3,0x18,0x96,0x05,0x9a +.byte 0x07,0x12,0x80,0xe2,0xeb,0x27,0xb2,0x75 +.byte 0x09,0x83,0x2c,0x1a,0x1b,0x6e,0x5a,0xa0 +.byte 0x52,0x3b,0xd6,0xb3,0x29,0xe3,0x2f,0x84 +.byte 0x53,0xd1,0x00,0xed,0x20,0xfc,0xb1,0x5b +.byte 0x6a,0xcb,0xbe,0x39,0x4a,0x4c,0x58,0xcf +.byte 0xd0,0xef,0xaa,0xfb,0x43,0x4d,0x33,0x85 +.byte 0x45,0xf9,0x02,0x7f,0x50,0x3c,0x9f,0xa8 +.byte 0x51,0xa3,0x40,0x8f,0x92,0x9d,0x38,0xf5 +.byte 0xbc,0xb6,0xda,0x21,0x10,0xff,0xf3,0xd2 +.byte 0xcd,0x0c,0x13,0xec,0x5f,0x97,0x44,0x17 +.byte 0xc4,0xa7,0x7e,0x3d,0x64,0x5d,0x19,0x73 +.byte 0x60,0x81,0x4f,0xdc,0x22,0x2a,0x90,0x88 +.byte 0x46,0xee,0xb8,0x14,0xde,0x5e,0x0b,0xdb +.byte 0xe0,0x32,0x3a,0x0a,0x49,0x06,0x24,0x5c +.byte 0xc2,0xd3,0xac,0x62,0x91,0x95,0xe4,0x79 +.byte 0xe7,0xc8,0x37,0x6d,0x8d,0xd5,0x4e,0xa9 +.byte 0x6c,0x56,0xf4,0xea,0x65,0x7a,0xae,0x08 +.byte 0xba,0x78,0x25,0x2e,0x1c,0xa6,0xb4,0xc6 +.byte 0xe8,0xdd,0x74,0x1f,0x4b,0xbd,0x8b,0x8a +.byte 0x70,0x3e,0xb5,0x66,0x48,0x03,0xf6,0x0e +.byte 0x61,0x35,0x57,0xb9,0x86,0xc1,0x1d,0x9e +.byte 0xe1,0xf8,0x98,0x11,0x69,0xd9,0x8e,0x94 +.byte 0x9b,0x1e,0x87,0xe9,0xce,0x55,0x28,0xdf +.byte 0x8c,0xa1,0x89,0x0d,0xbf,0xe6,0x42,0x68 +.byte 0x41,0x99,0x2d,0x0f,0xb0,0x54,0xbb,0x16 +.byte 0x63,0x7c,0x77,0x7b,0xf2,0x6b,0x6f,0xc5 +.byte 0x30,0x01,0x67,0x2b,0xfe,0xd7,0xab,0x76 +.byte 0xca,0x82,0xc9,0x7d,0xfa,0x59,0x47,0xf0 +.byte 0xad,0xd4,0xa2,0xaf,0x9c,0xa4,0x72,0xc0 +.byte 0xb7,0xfd,0x93,0x26,0x36,0x3f,0xf7,0xcc +.byte 0x34,0xa5,0xe5,0xf1,0x71,0xd8,0x31,0x15 +.byte 0x04,0xc7,0x23,0xc3,0x18,0x96,0x05,0x9a +.byte 0x07,0x12,0x80,0xe2,0xeb,0x27,0xb2,0x75 +.byte 0x09,0x83,0x2c,0x1a,0x1b,0x6e,0x5a,0xa0 +.byte 0x52,0x3b,0xd6,0xb3,0x29,0xe3,0x2f,0x84 +.byte 0x53,0xd1,0x00,0xed,0x20,0xfc,0xb1,0x5b +.byte 0x6a,0xcb,0xbe,0x39,0x4a,0x4c,0x58,0xcf +.byte 0xd0,0xef,0xaa,0xfb,0x43,0x4d,0x33,0x85 +.byte 0x45,0xf9,0x02,0x7f,0x50,0x3c,0x9f,0xa8 +.byte 0x51,0xa3,0x40,0x8f,0x92,0x9d,0x38,0xf5 +.byte 0xbc,0xb6,0xda,0x21,0x10,0xff,0xf3,0xd2 +.byte 0xcd,0x0c,0x13,0xec,0x5f,0x97,0x44,0x17 +.byte 0xc4,0xa7,0x7e,0x3d,0x64,0x5d,0x19,0x73 +.byte 0x60,0x81,0x4f,0xdc,0x22,0x2a,0x90,0x88 +.byte 0x46,0xee,0xb8,0x14,0xde,0x5e,0x0b,0xdb +.byte 0xe0,0x32,0x3a,0x0a,0x49,0x06,0x24,0x5c +.byte 0xc2,0xd3,0xac,0x62,0x91,0x95,0xe4,0x79 +.byte 0xe7,0xc8,0x37,0x6d,0x8d,0xd5,0x4e,0xa9 +.byte 0x6c,0x56,0xf4,0xea,0x65,0x7a,0xae,0x08 +.byte 0xba,0x78,0x25,0x2e,0x1c,0xa6,0xb4,0xc6 +.byte 0xe8,0xdd,0x74,0x1f,0x4b,0xbd,0x8b,0x8a +.byte 0x70,0x3e,0xb5,0x66,0x48,0x03,0xf6,0x0e +.byte 0x61,0x35,0x57,0xb9,0x86,0xc1,0x1d,0x9e +.byte 0xe1,0xf8,0x98,0x11,0x69,0xd9,0x8e,0x94 +.byte 0x9b,0x1e,0x87,0xe9,0xce,0x55,0x28,0xdf +.byte 0x8c,0xa1,0x89,0x0d,0xbf,0xe6,0x42,0x68 +.byte 0x41,0x99,0x2d,0x0f,0xb0,0x54,0xbb,0x16 +.byte 0x63,0x7c,0x77,0x7b,0xf2,0x6b,0x6f,0xc5 +.byte 0x30,0x01,0x67,0x2b,0xfe,0xd7,0xab,0x76 +.byte 0xca,0x82,0xc9,0x7d,0xfa,0x59,0x47,0xf0 +.byte 0xad,0xd4,0xa2,0xaf,0x9c,0xa4,0x72,0xc0 +.byte 0xb7,0xfd,0x93,0x26,0x36,0x3f,0xf7,0xcc +.byte 0x34,0xa5,0xe5,0xf1,0x71,0xd8,0x31,0x15 +.byte 0x04,0xc7,0x23,0xc3,0x18,0x96,0x05,0x9a +.byte 0x07,0x12,0x80,0xe2,0xeb,0x27,0xb2,0x75 +.byte 0x09,0x83,0x2c,0x1a,0x1b,0x6e,0x5a,0xa0 +.byte 0x52,0x3b,0xd6,0xb3,0x29,0xe3,0x2f,0x84 +.byte 0x53,0xd1,0x00,0xed,0x20,0xfc,0xb1,0x5b +.byte 0x6a,0xcb,0xbe,0x39,0x4a,0x4c,0x58,0xcf +.byte 0xd0,0xef,0xaa,0xfb,0x43,0x4d,0x33,0x85 +.byte 0x45,0xf9,0x02,0x7f,0x50,0x3c,0x9f,0xa8 +.byte 0x51,0xa3,0x40,0x8f,0x92,0x9d,0x38,0xf5 +.byte 0xbc,0xb6,0xda,0x21,0x10,0xff,0xf3,0xd2 +.byte 0xcd,0x0c,0x13,0xec,0x5f,0x97,0x44,0x17 +.byte 0xc4,0xa7,0x7e,0x3d,0x64,0x5d,0x19,0x73 +.byte 0x60,0x81,0x4f,0xdc,0x22,0x2a,0x90,0x88 +.byte 0x46,0xee,0xb8,0x14,0xde,0x5e,0x0b,0xdb +.byte 0xe0,0x32,0x3a,0x0a,0x49,0x06,0x24,0x5c +.byte 0xc2,0xd3,0xac,0x62,0x91,0x95,0xe4,0x79 +.byte 0xe7,0xc8,0x37,0x6d,0x8d,0xd5,0x4e,0xa9 +.byte 0x6c,0x56,0xf4,0xea,0x65,0x7a,0xae,0x08 +.byte 0xba,0x78,0x25,0x2e,0x1c,0xa6,0xb4,0xc6 +.byte 0xe8,0xdd,0x74,0x1f,0x4b,0xbd,0x8b,0x8a +.byte 0x70,0x3e,0xb5,0x66,0x48,0x03,0xf6,0x0e +.byte 0x61,0x35,0x57,0xb9,0x86,0xc1,0x1d,0x9e +.byte 0xe1,0xf8,0x98,0x11,0x69,0xd9,0x8e,0x94 +.byte 0x9b,0x1e,0x87,0xe9,0xce,0x55,0x28,0xdf +.byte 0x8c,0xa1,0x89,0x0d,0xbf,0xe6,0x42,0x68 +.byte 0x41,0x99,0x2d,0x0f,0xb0,0x54,0xbb,0x16 +.long 0x00000001, 0x00000002, 0x00000004, 0x00000008 +.long 0x00000010, 0x00000020, 0x00000040, 0x00000080 +.long 0x0000001b, 0x00000036, 0x80808080, 0x80808080 +.long 0xfefefefe, 0xfefefefe, 0x1b1b1b1b, 0x1b1b1b1b +.p2align 6 +L$AES_Td: +.long 0x50a7f451,0x50a7f451 +.long 0x5365417e,0x5365417e +.long 0xc3a4171a,0xc3a4171a +.long 0x965e273a,0x965e273a +.long 0xcb6bab3b,0xcb6bab3b +.long 0xf1459d1f,0xf1459d1f +.long 0xab58faac,0xab58faac +.long 0x9303e34b,0x9303e34b +.long 0x55fa3020,0x55fa3020 +.long 0xf66d76ad,0xf66d76ad +.long 0x9176cc88,0x9176cc88 +.long 0x254c02f5,0x254c02f5 +.long 0xfcd7e54f,0xfcd7e54f +.long 0xd7cb2ac5,0xd7cb2ac5 +.long 0x80443526,0x80443526 +.long 0x8fa362b5,0x8fa362b5 +.long 0x495ab1de,0x495ab1de +.long 0x671bba25,0x671bba25 +.long 0x980eea45,0x980eea45 +.long 0xe1c0fe5d,0xe1c0fe5d +.long 0x02752fc3,0x02752fc3 +.long 0x12f04c81,0x12f04c81 +.long 0xa397468d,0xa397468d +.long 0xc6f9d36b,0xc6f9d36b +.long 0xe75f8f03,0xe75f8f03 +.long 0x959c9215,0x959c9215 +.long 0xeb7a6dbf,0xeb7a6dbf +.long 0xda595295,0xda595295 +.long 0x2d83bed4,0x2d83bed4 +.long 0xd3217458,0xd3217458 +.long 0x2969e049,0x2969e049 +.long 0x44c8c98e,0x44c8c98e +.long 0x6a89c275,0x6a89c275 +.long 0x78798ef4,0x78798ef4 +.long 0x6b3e5899,0x6b3e5899 +.long 0xdd71b927,0xdd71b927 +.long 0xb64fe1be,0xb64fe1be +.long 0x17ad88f0,0x17ad88f0 +.long 0x66ac20c9,0x66ac20c9 +.long 0xb43ace7d,0xb43ace7d +.long 0x184adf63,0x184adf63 +.long 0x82311ae5,0x82311ae5 +.long 0x60335197,0x60335197 +.long 0x457f5362,0x457f5362 +.long 0xe07764b1,0xe07764b1 +.long 0x84ae6bbb,0x84ae6bbb +.long 0x1ca081fe,0x1ca081fe +.long 0x942b08f9,0x942b08f9 +.long 0x58684870,0x58684870 +.long 0x19fd458f,0x19fd458f +.long 0x876cde94,0x876cde94 +.long 0xb7f87b52,0xb7f87b52 +.long 0x23d373ab,0x23d373ab +.long 0xe2024b72,0xe2024b72 +.long 0x578f1fe3,0x578f1fe3 +.long 0x2aab5566,0x2aab5566 +.long 0x0728ebb2,0x0728ebb2 +.long 0x03c2b52f,0x03c2b52f +.long 0x9a7bc586,0x9a7bc586 +.long 0xa50837d3,0xa50837d3 +.long 0xf2872830,0xf2872830 +.long 0xb2a5bf23,0xb2a5bf23 +.long 0xba6a0302,0xba6a0302 +.long 0x5c8216ed,0x5c8216ed +.long 0x2b1ccf8a,0x2b1ccf8a +.long 0x92b479a7,0x92b479a7 +.long 0xf0f207f3,0xf0f207f3 +.long 0xa1e2694e,0xa1e2694e +.long 0xcdf4da65,0xcdf4da65 +.long 0xd5be0506,0xd5be0506 +.long 0x1f6234d1,0x1f6234d1 +.long 0x8afea6c4,0x8afea6c4 +.long 0x9d532e34,0x9d532e34 +.long 0xa055f3a2,0xa055f3a2 +.long 0x32e18a05,0x32e18a05 +.long 0x75ebf6a4,0x75ebf6a4 +.long 0x39ec830b,0x39ec830b +.long 0xaaef6040,0xaaef6040 +.long 0x069f715e,0x069f715e +.long 0x51106ebd,0x51106ebd +.long 0xf98a213e,0xf98a213e +.long 0x3d06dd96,0x3d06dd96 +.long 0xae053edd,0xae053edd +.long 0x46bde64d,0x46bde64d +.long 0xb58d5491,0xb58d5491 +.long 0x055dc471,0x055dc471 +.long 0x6fd40604,0x6fd40604 +.long 0xff155060,0xff155060 +.long 0x24fb9819,0x24fb9819 +.long 0x97e9bdd6,0x97e9bdd6 +.long 0xcc434089,0xcc434089 +.long 0x779ed967,0x779ed967 +.long 0xbd42e8b0,0xbd42e8b0 +.long 0x888b8907,0x888b8907 +.long 0x385b19e7,0x385b19e7 +.long 0xdbeec879,0xdbeec879 +.long 0x470a7ca1,0x470a7ca1 +.long 0xe90f427c,0xe90f427c +.long 0xc91e84f8,0xc91e84f8 +.long 0x00000000,0x00000000 +.long 0x83868009,0x83868009 +.long 0x48ed2b32,0x48ed2b32 +.long 0xac70111e,0xac70111e +.long 0x4e725a6c,0x4e725a6c +.long 0xfbff0efd,0xfbff0efd +.long 0x5638850f,0x5638850f +.long 0x1ed5ae3d,0x1ed5ae3d +.long 0x27392d36,0x27392d36 +.long 0x64d90f0a,0x64d90f0a +.long 0x21a65c68,0x21a65c68 +.long 0xd1545b9b,0xd1545b9b +.long 0x3a2e3624,0x3a2e3624 +.long 0xb1670a0c,0xb1670a0c +.long 0x0fe75793,0x0fe75793 +.long 0xd296eeb4,0xd296eeb4 +.long 0x9e919b1b,0x9e919b1b +.long 0x4fc5c080,0x4fc5c080 +.long 0xa220dc61,0xa220dc61 +.long 0x694b775a,0x694b775a +.long 0x161a121c,0x161a121c +.long 0x0aba93e2,0x0aba93e2 +.long 0xe52aa0c0,0xe52aa0c0 +.long 0x43e0223c,0x43e0223c +.long 0x1d171b12,0x1d171b12 +.long 0x0b0d090e,0x0b0d090e +.long 0xadc78bf2,0xadc78bf2 +.long 0xb9a8b62d,0xb9a8b62d +.long 0xc8a91e14,0xc8a91e14 +.long 0x8519f157,0x8519f157 +.long 0x4c0775af,0x4c0775af +.long 0xbbdd99ee,0xbbdd99ee +.long 0xfd607fa3,0xfd607fa3 +.long 0x9f2601f7,0x9f2601f7 +.long 0xbcf5725c,0xbcf5725c +.long 0xc53b6644,0xc53b6644 +.long 0x347efb5b,0x347efb5b +.long 0x7629438b,0x7629438b +.long 0xdcc623cb,0xdcc623cb +.long 0x68fcedb6,0x68fcedb6 +.long 0x63f1e4b8,0x63f1e4b8 +.long 0xcadc31d7,0xcadc31d7 +.long 0x10856342,0x10856342 +.long 0x40229713,0x40229713 +.long 0x2011c684,0x2011c684 +.long 0x7d244a85,0x7d244a85 +.long 0xf83dbbd2,0xf83dbbd2 +.long 0x1132f9ae,0x1132f9ae +.long 0x6da129c7,0x6da129c7 +.long 0x4b2f9e1d,0x4b2f9e1d +.long 0xf330b2dc,0xf330b2dc +.long 0xec52860d,0xec52860d +.long 0xd0e3c177,0xd0e3c177 +.long 0x6c16b32b,0x6c16b32b +.long 0x99b970a9,0x99b970a9 +.long 0xfa489411,0xfa489411 +.long 0x2264e947,0x2264e947 +.long 0xc48cfca8,0xc48cfca8 +.long 0x1a3ff0a0,0x1a3ff0a0 +.long 0xd82c7d56,0xd82c7d56 +.long 0xef903322,0xef903322 +.long 0xc74e4987,0xc74e4987 +.long 0xc1d138d9,0xc1d138d9 +.long 0xfea2ca8c,0xfea2ca8c +.long 0x360bd498,0x360bd498 +.long 0xcf81f5a6,0xcf81f5a6 +.long 0x28de7aa5,0x28de7aa5 +.long 0x268eb7da,0x268eb7da +.long 0xa4bfad3f,0xa4bfad3f +.long 0xe49d3a2c,0xe49d3a2c +.long 0x0d927850,0x0d927850 +.long 0x9bcc5f6a,0x9bcc5f6a +.long 0x62467e54,0x62467e54 +.long 0xc2138df6,0xc2138df6 +.long 0xe8b8d890,0xe8b8d890 +.long 0x5ef7392e,0x5ef7392e +.long 0xf5afc382,0xf5afc382 +.long 0xbe805d9f,0xbe805d9f +.long 0x7c93d069,0x7c93d069 +.long 0xa92dd56f,0xa92dd56f +.long 0xb31225cf,0xb31225cf +.long 0x3b99acc8,0x3b99acc8 +.long 0xa77d1810,0xa77d1810 +.long 0x6e639ce8,0x6e639ce8 +.long 0x7bbb3bdb,0x7bbb3bdb +.long 0x097826cd,0x097826cd +.long 0xf418596e,0xf418596e +.long 0x01b79aec,0x01b79aec +.long 0xa89a4f83,0xa89a4f83 +.long 0x656e95e6,0x656e95e6 +.long 0x7ee6ffaa,0x7ee6ffaa +.long 0x08cfbc21,0x08cfbc21 +.long 0xe6e815ef,0xe6e815ef +.long 0xd99be7ba,0xd99be7ba +.long 0xce366f4a,0xce366f4a +.long 0xd4099fea,0xd4099fea +.long 0xd67cb029,0xd67cb029 +.long 0xafb2a431,0xafb2a431 +.long 0x31233f2a,0x31233f2a +.long 0x3094a5c6,0x3094a5c6 +.long 0xc066a235,0xc066a235 +.long 0x37bc4e74,0x37bc4e74 +.long 0xa6ca82fc,0xa6ca82fc +.long 0xb0d090e0,0xb0d090e0 +.long 0x15d8a733,0x15d8a733 +.long 0x4a9804f1,0x4a9804f1 +.long 0xf7daec41,0xf7daec41 +.long 0x0e50cd7f,0x0e50cd7f +.long 0x2ff69117,0x2ff69117 +.long 0x8dd64d76,0x8dd64d76 +.long 0x4db0ef43,0x4db0ef43 +.long 0x544daacc,0x544daacc +.long 0xdf0496e4,0xdf0496e4 +.long 0xe3b5d19e,0xe3b5d19e +.long 0x1b886a4c,0x1b886a4c +.long 0xb81f2cc1,0xb81f2cc1 +.long 0x7f516546,0x7f516546 +.long 0x04ea5e9d,0x04ea5e9d +.long 0x5d358c01,0x5d358c01 +.long 0x737487fa,0x737487fa +.long 0x2e410bfb,0x2e410bfb +.long 0x5a1d67b3,0x5a1d67b3 +.long 0x52d2db92,0x52d2db92 +.long 0x335610e9,0x335610e9 +.long 0x1347d66d,0x1347d66d +.long 0x8c61d79a,0x8c61d79a +.long 0x7a0ca137,0x7a0ca137 +.long 0x8e14f859,0x8e14f859 +.long 0x893c13eb,0x893c13eb +.long 0xee27a9ce,0xee27a9ce +.long 0x35c961b7,0x35c961b7 +.long 0xede51ce1,0xede51ce1 +.long 0x3cb1477a,0x3cb1477a +.long 0x59dfd29c,0x59dfd29c +.long 0x3f73f255,0x3f73f255 +.long 0x79ce1418,0x79ce1418 +.long 0xbf37c773,0xbf37c773 +.long 0xeacdf753,0xeacdf753 +.long 0x5baafd5f,0x5baafd5f +.long 0x146f3ddf,0x146f3ddf +.long 0x86db4478,0x86db4478 +.long 0x81f3afca,0x81f3afca +.long 0x3ec468b9,0x3ec468b9 +.long 0x2c342438,0x2c342438 +.long 0x5f40a3c2,0x5f40a3c2 +.long 0x72c31d16,0x72c31d16 +.long 0x0c25e2bc,0x0c25e2bc +.long 0x8b493c28,0x8b493c28 +.long 0x41950dff,0x41950dff +.long 0x7101a839,0x7101a839 +.long 0xdeb30c08,0xdeb30c08 +.long 0x9ce4b4d8,0x9ce4b4d8 +.long 0x90c15664,0x90c15664 +.long 0x6184cb7b,0x6184cb7b +.long 0x70b632d5,0x70b632d5 +.long 0x745c6c48,0x745c6c48 +.long 0x4257b8d0,0x4257b8d0 +.byte 0x52,0x09,0x6a,0xd5,0x30,0x36,0xa5,0x38 +.byte 0xbf,0x40,0xa3,0x9e,0x81,0xf3,0xd7,0xfb +.byte 0x7c,0xe3,0x39,0x82,0x9b,0x2f,0xff,0x87 +.byte 0x34,0x8e,0x43,0x44,0xc4,0xde,0xe9,0xcb +.byte 0x54,0x7b,0x94,0x32,0xa6,0xc2,0x23,0x3d +.byte 0xee,0x4c,0x95,0x0b,0x42,0xfa,0xc3,0x4e +.byte 0x08,0x2e,0xa1,0x66,0x28,0xd9,0x24,0xb2 +.byte 0x76,0x5b,0xa2,0x49,0x6d,0x8b,0xd1,0x25 +.byte 0x72,0xf8,0xf6,0x64,0x86,0x68,0x98,0x16 +.byte 0xd4,0xa4,0x5c,0xcc,0x5d,0x65,0xb6,0x92 +.byte 0x6c,0x70,0x48,0x50,0xfd,0xed,0xb9,0xda +.byte 0x5e,0x15,0x46,0x57,0xa7,0x8d,0x9d,0x84 +.byte 0x90,0xd8,0xab,0x00,0x8c,0xbc,0xd3,0x0a +.byte 0xf7,0xe4,0x58,0x05,0xb8,0xb3,0x45,0x06 +.byte 0xd0,0x2c,0x1e,0x8f,0xca,0x3f,0x0f,0x02 +.byte 0xc1,0xaf,0xbd,0x03,0x01,0x13,0x8a,0x6b +.byte 0x3a,0x91,0x11,0x41,0x4f,0x67,0xdc,0xea +.byte 0x97,0xf2,0xcf,0xce,0xf0,0xb4,0xe6,0x73 +.byte 0x96,0xac,0x74,0x22,0xe7,0xad,0x35,0x85 +.byte 0xe2,0xf9,0x37,0xe8,0x1c,0x75,0xdf,0x6e +.byte 0x47,0xf1,0x1a,0x71,0x1d,0x29,0xc5,0x89 +.byte 0x6f,0xb7,0x62,0x0e,0xaa,0x18,0xbe,0x1b +.byte 0xfc,0x56,0x3e,0x4b,0xc6,0xd2,0x79,0x20 +.byte 0x9a,0xdb,0xc0,0xfe,0x78,0xcd,0x5a,0xf4 +.byte 0x1f,0xdd,0xa8,0x33,0x88,0x07,0xc7,0x31 +.byte 0xb1,0x12,0x10,0x59,0x27,0x80,0xec,0x5f +.byte 0x60,0x51,0x7f,0xa9,0x19,0xb5,0x4a,0x0d +.byte 0x2d,0xe5,0x7a,0x9f,0x93,0xc9,0x9c,0xef +.byte 0xa0,0xe0,0x3b,0x4d,0xae,0x2a,0xf5,0xb0 +.byte 0xc8,0xeb,0xbb,0x3c,0x83,0x53,0x99,0x61 +.byte 0x17,0x2b,0x04,0x7e,0xba,0x77,0xd6,0x26 +.byte 0xe1,0x69,0x14,0x63,0x55,0x21,0x0c,0x7d +.long 0x80808080, 0x80808080, 0xfefefefe, 0xfefefefe +.long 0x1b1b1b1b, 0x1b1b1b1b, 0, 0 +.byte 0x52,0x09,0x6a,0xd5,0x30,0x36,0xa5,0x38 +.byte 0xbf,0x40,0xa3,0x9e,0x81,0xf3,0xd7,0xfb +.byte 0x7c,0xe3,0x39,0x82,0x9b,0x2f,0xff,0x87 +.byte 0x34,0x8e,0x43,0x44,0xc4,0xde,0xe9,0xcb +.byte 0x54,0x7b,0x94,0x32,0xa6,0xc2,0x23,0x3d +.byte 0xee,0x4c,0x95,0x0b,0x42,0xfa,0xc3,0x4e +.byte 0x08,0x2e,0xa1,0x66,0x28,0xd9,0x24,0xb2 +.byte 0x76,0x5b,0xa2,0x49,0x6d,0x8b,0xd1,0x25 +.byte 0x72,0xf8,0xf6,0x64,0x86,0x68,0x98,0x16 +.byte 0xd4,0xa4,0x5c,0xcc,0x5d,0x65,0xb6,0x92 +.byte 0x6c,0x70,0x48,0x50,0xfd,0xed,0xb9,0xda +.byte 0x5e,0x15,0x46,0x57,0xa7,0x8d,0x9d,0x84 +.byte 0x90,0xd8,0xab,0x00,0x8c,0xbc,0xd3,0x0a +.byte 0xf7,0xe4,0x58,0x05,0xb8,0xb3,0x45,0x06 +.byte 0xd0,0x2c,0x1e,0x8f,0xca,0x3f,0x0f,0x02 +.byte 0xc1,0xaf,0xbd,0x03,0x01,0x13,0x8a,0x6b +.byte 0x3a,0x91,0x11,0x41,0x4f,0x67,0xdc,0xea +.byte 0x97,0xf2,0xcf,0xce,0xf0,0xb4,0xe6,0x73 +.byte 0x96,0xac,0x74,0x22,0xe7,0xad,0x35,0x85 +.byte 0xe2,0xf9,0x37,0xe8,0x1c,0x75,0xdf,0x6e +.byte 0x47,0xf1,0x1a,0x71,0x1d,0x29,0xc5,0x89 +.byte 0x6f,0xb7,0x62,0x0e,0xaa,0x18,0xbe,0x1b +.byte 0xfc,0x56,0x3e,0x4b,0xc6,0xd2,0x79,0x20 +.byte 0x9a,0xdb,0xc0,0xfe,0x78,0xcd,0x5a,0xf4 +.byte 0x1f,0xdd,0xa8,0x33,0x88,0x07,0xc7,0x31 +.byte 0xb1,0x12,0x10,0x59,0x27,0x80,0xec,0x5f +.byte 0x60,0x51,0x7f,0xa9,0x19,0xb5,0x4a,0x0d +.byte 0x2d,0xe5,0x7a,0x9f,0x93,0xc9,0x9c,0xef +.byte 0xa0,0xe0,0x3b,0x4d,0xae,0x2a,0xf5,0xb0 +.byte 0xc8,0xeb,0xbb,0x3c,0x83,0x53,0x99,0x61 +.byte 0x17,0x2b,0x04,0x7e,0xba,0x77,0xd6,0x26 +.byte 0xe1,0x69,0x14,0x63,0x55,0x21,0x0c,0x7d +.long 0x80808080, 0x80808080, 0xfefefefe, 0xfefefefe +.long 0x1b1b1b1b, 0x1b1b1b1b, 0, 0 +.byte 0x52,0x09,0x6a,0xd5,0x30,0x36,0xa5,0x38 +.byte 0xbf,0x40,0xa3,0x9e,0x81,0xf3,0xd7,0xfb +.byte 0x7c,0xe3,0x39,0x82,0x9b,0x2f,0xff,0x87 +.byte 0x34,0x8e,0x43,0x44,0xc4,0xde,0xe9,0xcb +.byte 0x54,0x7b,0x94,0x32,0xa6,0xc2,0x23,0x3d +.byte 0xee,0x4c,0x95,0x0b,0x42,0xfa,0xc3,0x4e +.byte 0x08,0x2e,0xa1,0x66,0x28,0xd9,0x24,0xb2 +.byte 0x76,0x5b,0xa2,0x49,0x6d,0x8b,0xd1,0x25 +.byte 0x72,0xf8,0xf6,0x64,0x86,0x68,0x98,0x16 +.byte 0xd4,0xa4,0x5c,0xcc,0x5d,0x65,0xb6,0x92 +.byte 0x6c,0x70,0x48,0x50,0xfd,0xed,0xb9,0xda +.byte 0x5e,0x15,0x46,0x57,0xa7,0x8d,0x9d,0x84 +.byte 0x90,0xd8,0xab,0x00,0x8c,0xbc,0xd3,0x0a +.byte 0xf7,0xe4,0x58,0x05,0xb8,0xb3,0x45,0x06 +.byte 0xd0,0x2c,0x1e,0x8f,0xca,0x3f,0x0f,0x02 +.byte 0xc1,0xaf,0xbd,0x03,0x01,0x13,0x8a,0x6b +.byte 0x3a,0x91,0x11,0x41,0x4f,0x67,0xdc,0xea +.byte 0x97,0xf2,0xcf,0xce,0xf0,0xb4,0xe6,0x73 +.byte 0x96,0xac,0x74,0x22,0xe7,0xad,0x35,0x85 +.byte 0xe2,0xf9,0x37,0xe8,0x1c,0x75,0xdf,0x6e +.byte 0x47,0xf1,0x1a,0x71,0x1d,0x29,0xc5,0x89 +.byte 0x6f,0xb7,0x62,0x0e,0xaa,0x18,0xbe,0x1b +.byte 0xfc,0x56,0x3e,0x4b,0xc6,0xd2,0x79,0x20 +.byte 0x9a,0xdb,0xc0,0xfe,0x78,0xcd,0x5a,0xf4 +.byte 0x1f,0xdd,0xa8,0x33,0x88,0x07,0xc7,0x31 +.byte 0xb1,0x12,0x10,0x59,0x27,0x80,0xec,0x5f +.byte 0x60,0x51,0x7f,0xa9,0x19,0xb5,0x4a,0x0d +.byte 0x2d,0xe5,0x7a,0x9f,0x93,0xc9,0x9c,0xef +.byte 0xa0,0xe0,0x3b,0x4d,0xae,0x2a,0xf5,0xb0 +.byte 0xc8,0xeb,0xbb,0x3c,0x83,0x53,0x99,0x61 +.byte 0x17,0x2b,0x04,0x7e,0xba,0x77,0xd6,0x26 +.byte 0xe1,0x69,0x14,0x63,0x55,0x21,0x0c,0x7d +.long 0x80808080, 0x80808080, 0xfefefefe, 0xfefefefe +.long 0x1b1b1b1b, 0x1b1b1b1b, 0, 0 +.byte 0x52,0x09,0x6a,0xd5,0x30,0x36,0xa5,0x38 +.byte 0xbf,0x40,0xa3,0x9e,0x81,0xf3,0xd7,0xfb +.byte 0x7c,0xe3,0x39,0x82,0x9b,0x2f,0xff,0x87 +.byte 0x34,0x8e,0x43,0x44,0xc4,0xde,0xe9,0xcb +.byte 0x54,0x7b,0x94,0x32,0xa6,0xc2,0x23,0x3d +.byte 0xee,0x4c,0x95,0x0b,0x42,0xfa,0xc3,0x4e +.byte 0x08,0x2e,0xa1,0x66,0x28,0xd9,0x24,0xb2 +.byte 0x76,0x5b,0xa2,0x49,0x6d,0x8b,0xd1,0x25 +.byte 0x72,0xf8,0xf6,0x64,0x86,0x68,0x98,0x16 +.byte 0xd4,0xa4,0x5c,0xcc,0x5d,0x65,0xb6,0x92 +.byte 0x6c,0x70,0x48,0x50,0xfd,0xed,0xb9,0xda +.byte 0x5e,0x15,0x46,0x57,0xa7,0x8d,0x9d,0x84 +.byte 0x90,0xd8,0xab,0x00,0x8c,0xbc,0xd3,0x0a +.byte 0xf7,0xe4,0x58,0x05,0xb8,0xb3,0x45,0x06 +.byte 0xd0,0x2c,0x1e,0x8f,0xca,0x3f,0x0f,0x02 +.byte 0xc1,0xaf,0xbd,0x03,0x01,0x13,0x8a,0x6b +.byte 0x3a,0x91,0x11,0x41,0x4f,0x67,0xdc,0xea +.byte 0x97,0xf2,0xcf,0xce,0xf0,0xb4,0xe6,0x73 +.byte 0x96,0xac,0x74,0x22,0xe7,0xad,0x35,0x85 +.byte 0xe2,0xf9,0x37,0xe8,0x1c,0x75,0xdf,0x6e +.byte 0x47,0xf1,0x1a,0x71,0x1d,0x29,0xc5,0x89 +.byte 0x6f,0xb7,0x62,0x0e,0xaa,0x18,0xbe,0x1b +.byte 0xfc,0x56,0x3e,0x4b,0xc6,0xd2,0x79,0x20 +.byte 0x9a,0xdb,0xc0,0xfe,0x78,0xcd,0x5a,0xf4 +.byte 0x1f,0xdd,0xa8,0x33,0x88,0x07,0xc7,0x31 +.byte 0xb1,0x12,0x10,0x59,0x27,0x80,0xec,0x5f +.byte 0x60,0x51,0x7f,0xa9,0x19,0xb5,0x4a,0x0d +.byte 0x2d,0xe5,0x7a,0x9f,0x93,0xc9,0x9c,0xef +.byte 0xa0,0xe0,0x3b,0x4d,0xae,0x2a,0xf5,0xb0 +.byte 0xc8,0xeb,0xbb,0x3c,0x83,0x53,0x99,0x61 +.byte 0x17,0x2b,0x04,0x7e,0xba,0x77,0xd6,0x26 +.byte 0xe1,0x69,0x14,0x63,0x55,0x21,0x0c,0x7d +.long 0x80808080, 0x80808080, 0xfefefefe, 0xfefefefe +.long 0x1b1b1b1b, 0x1b1b1b1b, 0, 0 +.byte 65,69,83,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 +.p2align 6 diff --git a/deps/openssl/asm_obsolete/x64-macosx-gas/aes/aesni-mb-x86_64.s b/deps/openssl/asm_obsolete/x64-macosx-gas/aes/aesni-mb-x86_64.s new file mode 100644 index 00000000000000..e45c622a525e82 --- /dev/null +++ b/deps/openssl/asm_obsolete/x64-macosx-gas/aes/aesni-mb-x86_64.s @@ -0,0 +1,505 @@ +.text + + + +.globl _aesni_multi_cbc_encrypt + +.p2align 5 +_aesni_multi_cbc_encrypt: + movq %rsp,%rax + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + + + + + + + subq $48,%rsp + andq $-64,%rsp + movq %rax,16(%rsp) + +L$enc4x_body: + movdqu (%rsi),%xmm12 + leaq 120(%rsi),%rsi + leaq 80(%rdi),%rdi + +L$enc4x_loop_grande: + movl %edx,24(%rsp) + xorl %edx,%edx + movl -64(%rdi),%ecx + movq -80(%rdi),%r8 + cmpl %edx,%ecx + movq -72(%rdi),%r12 + cmovgl %ecx,%edx + testl %ecx,%ecx + movdqu -56(%rdi),%xmm2 + movl %ecx,32(%rsp) + cmovleq %rsp,%r8 + movl -24(%rdi),%ecx + movq -40(%rdi),%r9 + cmpl %edx,%ecx + movq -32(%rdi),%r13 + cmovgl %ecx,%edx + testl %ecx,%ecx + movdqu -16(%rdi),%xmm3 + movl %ecx,36(%rsp) + cmovleq %rsp,%r9 + movl 16(%rdi),%ecx + movq 0(%rdi),%r10 + cmpl %edx,%ecx + movq 8(%rdi),%r14 + cmovgl %ecx,%edx + testl %ecx,%ecx + movdqu 24(%rdi),%xmm4 + movl %ecx,40(%rsp) + cmovleq %rsp,%r10 + movl 56(%rdi),%ecx + movq 40(%rdi),%r11 + cmpl %edx,%ecx + movq 48(%rdi),%r15 + cmovgl %ecx,%edx + testl %ecx,%ecx + movdqu 64(%rdi),%xmm5 + movl %ecx,44(%rsp) + cmovleq %rsp,%r11 + testl %edx,%edx + jz L$enc4x_done + + movups 16-120(%rsi),%xmm1 + pxor %xmm12,%xmm2 + movups 32-120(%rsi),%xmm0 + pxor %xmm12,%xmm3 + movl 240-120(%rsi),%eax + pxor %xmm12,%xmm4 + movdqu (%r8),%xmm6 + pxor %xmm12,%xmm5 + movdqu (%r9),%xmm7 + pxor %xmm6,%xmm2 + movdqu (%r10),%xmm8 + pxor %xmm7,%xmm3 + movdqu (%r11),%xmm9 + pxor %xmm8,%xmm4 + pxor %xmm9,%xmm5 + movdqa 32(%rsp),%xmm10 + xorq %rbx,%rbx + jmp L$oop_enc4x + +.p2align 5 +L$oop_enc4x: + addq $16,%rbx + leaq 16(%rsp),%rbp + movl $1,%ecx + subq %rbx,%rbp + +.byte 102,15,56,220,209 + prefetcht0 31(%r8,%rbx,1) + prefetcht0 31(%r9,%rbx,1) +.byte 102,15,56,220,217 + prefetcht0 31(%r10,%rbx,1) + prefetcht0 31(%r10,%rbx,1) +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 + movups 48-120(%rsi),%xmm1 + cmpl 32(%rsp),%ecx +.byte 102,15,56,220,208 +.byte 102,15,56,220,216 +.byte 102,15,56,220,224 + cmovgeq %rbp,%r8 + cmovgq %rbp,%r12 +.byte 102,15,56,220,232 + movups -56(%rsi),%xmm0 + cmpl 36(%rsp),%ecx +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 +.byte 102,15,56,220,225 + cmovgeq %rbp,%r9 + cmovgq %rbp,%r13 +.byte 102,15,56,220,233 + movups -40(%rsi),%xmm1 + cmpl 40(%rsp),%ecx +.byte 102,15,56,220,208 +.byte 102,15,56,220,216 +.byte 102,15,56,220,224 + cmovgeq %rbp,%r10 + cmovgq %rbp,%r14 +.byte 102,15,56,220,232 + movups -24(%rsi),%xmm0 + cmpl 44(%rsp),%ecx +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 +.byte 102,15,56,220,225 + cmovgeq %rbp,%r11 + cmovgq %rbp,%r15 +.byte 102,15,56,220,233 + movups -8(%rsi),%xmm1 + movdqa %xmm10,%xmm11 +.byte 102,15,56,220,208 + prefetcht0 15(%r12,%rbx,1) + prefetcht0 15(%r13,%rbx,1) +.byte 102,15,56,220,216 + prefetcht0 15(%r14,%rbx,1) + prefetcht0 15(%r15,%rbx,1) +.byte 102,15,56,220,224 +.byte 102,15,56,220,232 + movups 128-120(%rsi),%xmm0 + pxor %xmm12,%xmm12 + +.byte 102,15,56,220,209 + pcmpgtd %xmm12,%xmm11 + movdqu -120(%rsi),%xmm12 +.byte 102,15,56,220,217 + paddd %xmm11,%xmm10 + movdqa %xmm10,32(%rsp) +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 + movups 144-120(%rsi),%xmm1 + + cmpl $11,%eax + +.byte 102,15,56,220,208 +.byte 102,15,56,220,216 +.byte 102,15,56,220,224 +.byte 102,15,56,220,232 + movups 160-120(%rsi),%xmm0 + + jb L$enc4x_tail + +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 + movups 176-120(%rsi),%xmm1 + +.byte 102,15,56,220,208 +.byte 102,15,56,220,216 +.byte 102,15,56,220,224 +.byte 102,15,56,220,232 + movups 192-120(%rsi),%xmm0 + + je L$enc4x_tail + +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 + movups 208-120(%rsi),%xmm1 + +.byte 102,15,56,220,208 +.byte 102,15,56,220,216 +.byte 102,15,56,220,224 +.byte 102,15,56,220,232 + movups 224-120(%rsi),%xmm0 + jmp L$enc4x_tail + +.p2align 5 +L$enc4x_tail: +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 + movdqu (%r8,%rbx,1),%xmm6 + movdqu 16-120(%rsi),%xmm1 + +.byte 102,15,56,221,208 + movdqu (%r9,%rbx,1),%xmm7 + pxor %xmm12,%xmm6 +.byte 102,15,56,221,216 + movdqu (%r10,%rbx,1),%xmm8 + pxor %xmm12,%xmm7 +.byte 102,15,56,221,224 + movdqu (%r11,%rbx,1),%xmm9 + pxor %xmm12,%xmm8 +.byte 102,15,56,221,232 + movdqu 32-120(%rsi),%xmm0 + pxor %xmm12,%xmm9 + + movups %xmm2,-16(%r12,%rbx,1) + pxor %xmm6,%xmm2 + movups %xmm3,-16(%r13,%rbx,1) + pxor %xmm7,%xmm3 + movups %xmm4,-16(%r14,%rbx,1) + pxor %xmm8,%xmm4 + movups %xmm5,-16(%r15,%rbx,1) + pxor %xmm9,%xmm5 + + decl %edx + jnz L$oop_enc4x + + movq 16(%rsp),%rax + movl 24(%rsp),%edx + + + + + + + + + + + leaq 160(%rdi),%rdi + decl %edx + jnz L$enc4x_loop_grande + +L$enc4x_done: + movq -48(%rax),%r15 + movq -40(%rax),%r14 + movq -32(%rax),%r13 + movq -24(%rax),%r12 + movq -16(%rax),%rbp + movq -8(%rax),%rbx + leaq (%rax),%rsp +L$enc4x_epilogue: + .byte 0xf3,0xc3 + + +.globl _aesni_multi_cbc_decrypt + +.p2align 5 +_aesni_multi_cbc_decrypt: + movq %rsp,%rax + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + + + + + + + subq $48,%rsp + andq $-64,%rsp + movq %rax,16(%rsp) + +L$dec4x_body: + movdqu (%rsi),%xmm12 + leaq 120(%rsi),%rsi + leaq 80(%rdi),%rdi + +L$dec4x_loop_grande: + movl %edx,24(%rsp) + xorl %edx,%edx + movl -64(%rdi),%ecx + movq -80(%rdi),%r8 + cmpl %edx,%ecx + movq -72(%rdi),%r12 + cmovgl %ecx,%edx + testl %ecx,%ecx + movdqu -56(%rdi),%xmm6 + movl %ecx,32(%rsp) + cmovleq %rsp,%r8 + movl -24(%rdi),%ecx + movq -40(%rdi),%r9 + cmpl %edx,%ecx + movq -32(%rdi),%r13 + cmovgl %ecx,%edx + testl %ecx,%ecx + movdqu -16(%rdi),%xmm7 + movl %ecx,36(%rsp) + cmovleq %rsp,%r9 + movl 16(%rdi),%ecx + movq 0(%rdi),%r10 + cmpl %edx,%ecx + movq 8(%rdi),%r14 + cmovgl %ecx,%edx + testl %ecx,%ecx + movdqu 24(%rdi),%xmm8 + movl %ecx,40(%rsp) + cmovleq %rsp,%r10 + movl 56(%rdi),%ecx + movq 40(%rdi),%r11 + cmpl %edx,%ecx + movq 48(%rdi),%r15 + cmovgl %ecx,%edx + testl %ecx,%ecx + movdqu 64(%rdi),%xmm9 + movl %ecx,44(%rsp) + cmovleq %rsp,%r11 + testl %edx,%edx + jz L$dec4x_done + + movups 16-120(%rsi),%xmm1 + movups 32-120(%rsi),%xmm0 + movl 240-120(%rsi),%eax + movdqu (%r8),%xmm2 + movdqu (%r9),%xmm3 + pxor %xmm12,%xmm2 + movdqu (%r10),%xmm4 + pxor %xmm12,%xmm3 + movdqu (%r11),%xmm5 + pxor %xmm12,%xmm4 + pxor %xmm12,%xmm5 + movdqa 32(%rsp),%xmm10 + xorq %rbx,%rbx + jmp L$oop_dec4x + +.p2align 5 +L$oop_dec4x: + addq $16,%rbx + leaq 16(%rsp),%rbp + movl $1,%ecx + subq %rbx,%rbp + +.byte 102,15,56,222,209 + prefetcht0 31(%r8,%rbx,1) + prefetcht0 31(%r9,%rbx,1) +.byte 102,15,56,222,217 + prefetcht0 31(%r10,%rbx,1) + prefetcht0 31(%r11,%rbx,1) +.byte 102,15,56,222,225 +.byte 102,15,56,222,233 + movups 48-120(%rsi),%xmm1 + cmpl 32(%rsp),%ecx +.byte 102,15,56,222,208 +.byte 102,15,56,222,216 +.byte 102,15,56,222,224 + cmovgeq %rbp,%r8 + cmovgq %rbp,%r12 +.byte 102,15,56,222,232 + movups -56(%rsi),%xmm0 + cmpl 36(%rsp),%ecx +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 +.byte 102,15,56,222,225 + cmovgeq %rbp,%r9 + cmovgq %rbp,%r13 +.byte 102,15,56,222,233 + movups -40(%rsi),%xmm1 + cmpl 40(%rsp),%ecx +.byte 102,15,56,222,208 +.byte 102,15,56,222,216 +.byte 102,15,56,222,224 + cmovgeq %rbp,%r10 + cmovgq %rbp,%r14 +.byte 102,15,56,222,232 + movups -24(%rsi),%xmm0 + cmpl 44(%rsp),%ecx +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 +.byte 102,15,56,222,225 + cmovgeq %rbp,%r11 + cmovgq %rbp,%r15 +.byte 102,15,56,222,233 + movups -8(%rsi),%xmm1 + movdqa %xmm10,%xmm11 +.byte 102,15,56,222,208 + prefetcht0 15(%r12,%rbx,1) + prefetcht0 15(%r13,%rbx,1) +.byte 102,15,56,222,216 + prefetcht0 15(%r14,%rbx,1) + prefetcht0 15(%r15,%rbx,1) +.byte 102,15,56,222,224 +.byte 102,15,56,222,232 + movups 128-120(%rsi),%xmm0 + pxor %xmm12,%xmm12 + +.byte 102,15,56,222,209 + pcmpgtd %xmm12,%xmm11 + movdqu -120(%rsi),%xmm12 +.byte 102,15,56,222,217 + paddd %xmm11,%xmm10 + movdqa %xmm10,32(%rsp) +.byte 102,15,56,222,225 +.byte 102,15,56,222,233 + movups 144-120(%rsi),%xmm1 + + cmpl $11,%eax + +.byte 102,15,56,222,208 +.byte 102,15,56,222,216 +.byte 102,15,56,222,224 +.byte 102,15,56,222,232 + movups 160-120(%rsi),%xmm0 + + jb L$dec4x_tail + +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 +.byte 102,15,56,222,225 +.byte 102,15,56,222,233 + movups 176-120(%rsi),%xmm1 + +.byte 102,15,56,222,208 +.byte 102,15,56,222,216 +.byte 102,15,56,222,224 +.byte 102,15,56,222,232 + movups 192-120(%rsi),%xmm0 + + je L$dec4x_tail + +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 +.byte 102,15,56,222,225 +.byte 102,15,56,222,233 + movups 208-120(%rsi),%xmm1 + +.byte 102,15,56,222,208 +.byte 102,15,56,222,216 +.byte 102,15,56,222,224 +.byte 102,15,56,222,232 + movups 224-120(%rsi),%xmm0 + jmp L$dec4x_tail + +.p2align 5 +L$dec4x_tail: +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 +.byte 102,15,56,222,225 + pxor %xmm0,%xmm6 + pxor %xmm0,%xmm7 +.byte 102,15,56,222,233 + movdqu 16-120(%rsi),%xmm1 + pxor %xmm0,%xmm8 + pxor %xmm0,%xmm9 + movdqu 32-120(%rsi),%xmm0 + +.byte 102,15,56,223,214 +.byte 102,15,56,223,223 + movdqu -16(%r8,%rbx,1),%xmm6 + movdqu -16(%r9,%rbx,1),%xmm7 +.byte 102,65,15,56,223,224 +.byte 102,65,15,56,223,233 + movdqu -16(%r10,%rbx,1),%xmm8 + movdqu -16(%r11,%rbx,1),%xmm9 + + movups %xmm2,-16(%r12,%rbx,1) + movdqu (%r8,%rbx,1),%xmm2 + movups %xmm3,-16(%r13,%rbx,1) + movdqu (%r9,%rbx,1),%xmm3 + pxor %xmm12,%xmm2 + movups %xmm4,-16(%r14,%rbx,1) + movdqu (%r10,%rbx,1),%xmm4 + pxor %xmm12,%xmm3 + movups %xmm5,-16(%r15,%rbx,1) + movdqu (%r11,%rbx,1),%xmm5 + pxor %xmm12,%xmm4 + pxor %xmm12,%xmm5 + + decl %edx + jnz L$oop_dec4x + + movq 16(%rsp),%rax + movl 24(%rsp),%edx + + leaq 160(%rdi),%rdi + decl %edx + jnz L$dec4x_loop_grande + +L$dec4x_done: + movq -48(%rax),%r15 + movq -40(%rax),%r14 + movq -32(%rax),%r13 + movq -24(%rax),%r12 + movq -16(%rax),%rbp + movq -8(%rax),%rbx + leaq (%rax),%rsp +L$dec4x_epilogue: + .byte 0xf3,0xc3 diff --git a/deps/openssl/asm_obsolete/x64-macosx-gas/aes/aesni-sha1-x86_64.s b/deps/openssl/asm_obsolete/x64-macosx-gas/aes/aesni-sha1-x86_64.s new file mode 100644 index 00000000000000..015db5faa7facd --- /dev/null +++ b/deps/openssl/asm_obsolete/x64-macosx-gas/aes/aesni-sha1-x86_64.s @@ -0,0 +1,1680 @@ +.text + + +.globl _aesni_cbc_sha1_enc + +.p2align 5 +_aesni_cbc_sha1_enc: + + movl _OPENSSL_ia32cap_P+0(%rip),%r10d + movq _OPENSSL_ia32cap_P+4(%rip),%r11 + btq $61,%r11 + jc aesni_cbc_sha1_enc_shaext + jmp aesni_cbc_sha1_enc_ssse3 + .byte 0xf3,0xc3 + + +.p2align 5 +aesni_cbc_sha1_enc_ssse3: + movq 8(%rsp),%r10 + + + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + leaq -104(%rsp),%rsp + + + movq %rdi,%r12 + movq %rsi,%r13 + movq %rdx,%r14 + leaq 112(%rcx),%r15 + movdqu (%r8),%xmm2 + movq %r8,88(%rsp) + shlq $6,%r14 + subq %r12,%r13 + movl 240-112(%r15),%r8d + addq %r10,%r14 + + leaq K_XX_XX(%rip),%r11 + movl 0(%r9),%eax + movl 4(%r9),%ebx + movl 8(%r9),%ecx + movl 12(%r9),%edx + movl %ebx,%esi + movl 16(%r9),%ebp + movl %ecx,%edi + xorl %edx,%edi + andl %edi,%esi + + movdqa 64(%r11),%xmm3 + movdqa 0(%r11),%xmm13 + movdqu 0(%r10),%xmm4 + movdqu 16(%r10),%xmm5 + movdqu 32(%r10),%xmm6 + movdqu 48(%r10),%xmm7 +.byte 102,15,56,0,227 +.byte 102,15,56,0,235 +.byte 102,15,56,0,243 + addq $64,%r10 + paddd %xmm13,%xmm4 +.byte 102,15,56,0,251 + paddd %xmm13,%xmm5 + paddd %xmm13,%xmm6 + movdqa %xmm4,0(%rsp) + psubd %xmm13,%xmm4 + movdqa %xmm5,16(%rsp) + psubd %xmm13,%xmm5 + movdqa %xmm6,32(%rsp) + psubd %xmm13,%xmm6 + movups -112(%r15),%xmm15 + movups 16-112(%r15),%xmm0 + jmp L$oop_ssse3 +.p2align 5 +L$oop_ssse3: + rorl $2,%ebx + movups 0(%r12),%xmm14 + xorps %xmm15,%xmm14 + xorps %xmm14,%xmm2 + movups -80(%r15),%xmm1 +.byte 102,15,56,220,208 + pshufd $238,%xmm4,%xmm8 + xorl %edx,%esi + movdqa %xmm7,%xmm12 + paddd %xmm7,%xmm13 + movl %eax,%edi + addl 0(%rsp),%ebp + punpcklqdq %xmm5,%xmm8 + xorl %ecx,%ebx + roll $5,%eax + addl %esi,%ebp + psrldq $4,%xmm12 + andl %ebx,%edi + xorl %ecx,%ebx + pxor %xmm4,%xmm8 + addl %eax,%ebp + rorl $7,%eax + pxor %xmm6,%xmm12 + xorl %ecx,%edi + movl %ebp,%esi + addl 4(%rsp),%edx + pxor %xmm12,%xmm8 + xorl %ebx,%eax + roll $5,%ebp + movdqa %xmm13,48(%rsp) + addl %edi,%edx + movups -64(%r15),%xmm0 +.byte 102,15,56,220,209 + andl %eax,%esi + movdqa %xmm8,%xmm3 + xorl %ebx,%eax + addl %ebp,%edx + rorl $7,%ebp + movdqa %xmm8,%xmm12 + xorl %ebx,%esi + pslldq $12,%xmm3 + paddd %xmm8,%xmm8 + movl %edx,%edi + addl 8(%rsp),%ecx + psrld $31,%xmm12 + xorl %eax,%ebp + roll $5,%edx + addl %esi,%ecx + movdqa %xmm3,%xmm13 + andl %ebp,%edi + xorl %eax,%ebp + psrld $30,%xmm3 + addl %edx,%ecx + rorl $7,%edx + por %xmm12,%xmm8 + xorl %eax,%edi + movl %ecx,%esi + addl 12(%rsp),%ebx + movups -48(%r15),%xmm1 +.byte 102,15,56,220,208 + pslld $2,%xmm13 + pxor %xmm3,%xmm8 + xorl %ebp,%edx + movdqa 0(%r11),%xmm3 + roll $5,%ecx + addl %edi,%ebx + andl %edx,%esi + pxor %xmm13,%xmm8 + xorl %ebp,%edx + addl %ecx,%ebx + rorl $7,%ecx + pshufd $238,%xmm5,%xmm9 + xorl %ebp,%esi + movdqa %xmm8,%xmm13 + paddd %xmm8,%xmm3 + movl %ebx,%edi + addl 16(%rsp),%eax + punpcklqdq %xmm6,%xmm9 + xorl %edx,%ecx + roll $5,%ebx + addl %esi,%eax + psrldq $4,%xmm13 + andl %ecx,%edi + xorl %edx,%ecx + pxor %xmm5,%xmm9 + addl %ebx,%eax + rorl $7,%ebx + movups -32(%r15),%xmm0 +.byte 102,15,56,220,209 + pxor %xmm7,%xmm13 + xorl %edx,%edi + movl %eax,%esi + addl 20(%rsp),%ebp + pxor %xmm13,%xmm9 + xorl %ecx,%ebx + roll $5,%eax + movdqa %xmm3,0(%rsp) + addl %edi,%ebp + andl %ebx,%esi + movdqa %xmm9,%xmm12 + xorl %ecx,%ebx + addl %eax,%ebp + rorl $7,%eax + movdqa %xmm9,%xmm13 + xorl %ecx,%esi + pslldq $12,%xmm12 + paddd %xmm9,%xmm9 + movl %ebp,%edi + addl 24(%rsp),%edx + psrld $31,%xmm13 + xorl %ebx,%eax + roll $5,%ebp + addl %esi,%edx + movups -16(%r15),%xmm1 +.byte 102,15,56,220,208 + movdqa %xmm12,%xmm3 + andl %eax,%edi + xorl %ebx,%eax + psrld $30,%xmm12 + addl %ebp,%edx + rorl $7,%ebp + por %xmm13,%xmm9 + xorl %ebx,%edi + movl %edx,%esi + addl 28(%rsp),%ecx + pslld $2,%xmm3 + pxor %xmm12,%xmm9 + xorl %eax,%ebp + movdqa 16(%r11),%xmm12 + roll $5,%edx + addl %edi,%ecx + andl %ebp,%esi + pxor %xmm3,%xmm9 + xorl %eax,%ebp + addl %edx,%ecx + rorl $7,%edx + pshufd $238,%xmm6,%xmm10 + xorl %eax,%esi + movdqa %xmm9,%xmm3 + paddd %xmm9,%xmm12 + movl %ecx,%edi + addl 32(%rsp),%ebx + movups 0(%r15),%xmm0 +.byte 102,15,56,220,209 + punpcklqdq %xmm7,%xmm10 + xorl %ebp,%edx + roll $5,%ecx + addl %esi,%ebx + psrldq $4,%xmm3 + andl %edx,%edi + xorl %ebp,%edx + pxor %xmm6,%xmm10 + addl %ecx,%ebx + rorl $7,%ecx + pxor %xmm8,%xmm3 + xorl %ebp,%edi + movl %ebx,%esi + addl 36(%rsp),%eax + pxor %xmm3,%xmm10 + xorl %edx,%ecx + roll $5,%ebx + movdqa %xmm12,16(%rsp) + addl %edi,%eax + andl %ecx,%esi + movdqa %xmm10,%xmm13 + xorl %edx,%ecx + addl %ebx,%eax + rorl $7,%ebx + movups 16(%r15),%xmm1 +.byte 102,15,56,220,208 + movdqa %xmm10,%xmm3 + xorl %edx,%esi + pslldq $12,%xmm13 + paddd %xmm10,%xmm10 + movl %eax,%edi + addl 40(%rsp),%ebp + psrld $31,%xmm3 + xorl %ecx,%ebx + roll $5,%eax + addl %esi,%ebp + movdqa %xmm13,%xmm12 + andl %ebx,%edi + xorl %ecx,%ebx + psrld $30,%xmm13 + addl %eax,%ebp + rorl $7,%eax + por %xmm3,%xmm10 + xorl %ecx,%edi + movl %ebp,%esi + addl 44(%rsp),%edx + pslld $2,%xmm12 + pxor %xmm13,%xmm10 + xorl %ebx,%eax + movdqa 16(%r11),%xmm13 + roll $5,%ebp + addl %edi,%edx + movups 32(%r15),%xmm0 +.byte 102,15,56,220,209 + andl %eax,%esi + pxor %xmm12,%xmm10 + xorl %ebx,%eax + addl %ebp,%edx + rorl $7,%ebp + pshufd $238,%xmm7,%xmm11 + xorl %ebx,%esi + movdqa %xmm10,%xmm12 + paddd %xmm10,%xmm13 + movl %edx,%edi + addl 48(%rsp),%ecx + punpcklqdq %xmm8,%xmm11 + xorl %eax,%ebp + roll $5,%edx + addl %esi,%ecx + psrldq $4,%xmm12 + andl %ebp,%edi + xorl %eax,%ebp + pxor %xmm7,%xmm11 + addl %edx,%ecx + rorl $7,%edx + pxor %xmm9,%xmm12 + xorl %eax,%edi + movl %ecx,%esi + addl 52(%rsp),%ebx + movups 48(%r15),%xmm1 +.byte 102,15,56,220,208 + pxor %xmm12,%xmm11 + xorl %ebp,%edx + roll $5,%ecx + movdqa %xmm13,32(%rsp) + addl %edi,%ebx + andl %edx,%esi + movdqa %xmm11,%xmm3 + xorl %ebp,%edx + addl %ecx,%ebx + rorl $7,%ecx + movdqa %xmm11,%xmm12 + xorl %ebp,%esi + pslldq $12,%xmm3 + paddd %xmm11,%xmm11 + movl %ebx,%edi + addl 56(%rsp),%eax + psrld $31,%xmm12 + xorl %edx,%ecx + roll $5,%ebx + addl %esi,%eax + movdqa %xmm3,%xmm13 + andl %ecx,%edi + xorl %edx,%ecx + psrld $30,%xmm3 + addl %ebx,%eax + rorl $7,%ebx + cmpl $11,%r8d + jb L$aesenclast1 + movups 64(%r15),%xmm0 +.byte 102,15,56,220,209 + movups 80(%r15),%xmm1 +.byte 102,15,56,220,208 + je L$aesenclast1 + movups 96(%r15),%xmm0 +.byte 102,15,56,220,209 + movups 112(%r15),%xmm1 +.byte 102,15,56,220,208 +L$aesenclast1: +.byte 102,15,56,221,209 + movups 16-112(%r15),%xmm0 + por %xmm12,%xmm11 + xorl %edx,%edi + movl %eax,%esi + addl 60(%rsp),%ebp + pslld $2,%xmm13 + pxor %xmm3,%xmm11 + xorl %ecx,%ebx + movdqa 16(%r11),%xmm3 + roll $5,%eax + addl %edi,%ebp + andl %ebx,%esi + pxor %xmm13,%xmm11 + pshufd $238,%xmm10,%xmm13 + xorl %ecx,%ebx + addl %eax,%ebp + rorl $7,%eax + pxor %xmm8,%xmm4 + xorl %ecx,%esi + movl %ebp,%edi + addl 0(%rsp),%edx + punpcklqdq %xmm11,%xmm13 + xorl %ebx,%eax + roll $5,%ebp + pxor %xmm5,%xmm4 + addl %esi,%edx + movups 16(%r12),%xmm14 + xorps %xmm15,%xmm14 + movups %xmm2,0(%r12,%r13,1) + xorps %xmm14,%xmm2 + movups -80(%r15),%xmm1 +.byte 102,15,56,220,208 + andl %eax,%edi + movdqa %xmm3,%xmm12 + xorl %ebx,%eax + paddd %xmm11,%xmm3 + addl %ebp,%edx + pxor %xmm13,%xmm4 + rorl $7,%ebp + xorl %ebx,%edi + movl %edx,%esi + addl 4(%rsp),%ecx + movdqa %xmm4,%xmm13 + xorl %eax,%ebp + roll $5,%edx + movdqa %xmm3,48(%rsp) + addl %edi,%ecx + andl %ebp,%esi + xorl %eax,%ebp + pslld $2,%xmm4 + addl %edx,%ecx + rorl $7,%edx + psrld $30,%xmm13 + xorl %eax,%esi + movl %ecx,%edi + addl 8(%rsp),%ebx + movups -64(%r15),%xmm0 +.byte 102,15,56,220,209 + por %xmm13,%xmm4 + xorl %ebp,%edx + roll $5,%ecx + pshufd $238,%xmm11,%xmm3 + addl %esi,%ebx + andl %edx,%edi + xorl %ebp,%edx + addl %ecx,%ebx + addl 12(%rsp),%eax + xorl %ebp,%edi + movl %ebx,%esi + roll $5,%ebx + addl %edi,%eax + xorl %edx,%esi + rorl $7,%ecx + addl %ebx,%eax + pxor %xmm9,%xmm5 + addl 16(%rsp),%ebp + movups -48(%r15),%xmm1 +.byte 102,15,56,220,208 + xorl %ecx,%esi + punpcklqdq %xmm4,%xmm3 + movl %eax,%edi + roll $5,%eax + pxor %xmm6,%xmm5 + addl %esi,%ebp + xorl %ecx,%edi + movdqa %xmm12,%xmm13 + rorl $7,%ebx + paddd %xmm4,%xmm12 + addl %eax,%ebp + pxor %xmm3,%xmm5 + addl 20(%rsp),%edx + xorl %ebx,%edi + movl %ebp,%esi + roll $5,%ebp + movdqa %xmm5,%xmm3 + addl %edi,%edx + xorl %ebx,%esi + movdqa %xmm12,0(%rsp) + rorl $7,%eax + addl %ebp,%edx + addl 24(%rsp),%ecx + pslld $2,%xmm5 + xorl %eax,%esi + movl %edx,%edi + psrld $30,%xmm3 + roll $5,%edx + addl %esi,%ecx + movups -32(%r15),%xmm0 +.byte 102,15,56,220,209 + xorl %eax,%edi + rorl $7,%ebp + por %xmm3,%xmm5 + addl %edx,%ecx + addl 28(%rsp),%ebx + pshufd $238,%xmm4,%xmm12 + xorl %ebp,%edi + movl %ecx,%esi + roll $5,%ecx + addl %edi,%ebx + xorl %ebp,%esi + rorl $7,%edx + addl %ecx,%ebx + pxor %xmm10,%xmm6 + addl 32(%rsp),%eax + xorl %edx,%esi + punpcklqdq %xmm5,%xmm12 + movl %ebx,%edi + roll $5,%ebx + pxor %xmm7,%xmm6 + addl %esi,%eax + xorl %edx,%edi + movdqa 32(%r11),%xmm3 + rorl $7,%ecx + paddd %xmm5,%xmm13 + addl %ebx,%eax + pxor %xmm12,%xmm6 + addl 36(%rsp),%ebp + movups -16(%r15),%xmm1 +.byte 102,15,56,220,208 + xorl %ecx,%edi + movl %eax,%esi + roll $5,%eax + movdqa %xmm6,%xmm12 + addl %edi,%ebp + xorl %ecx,%esi + movdqa %xmm13,16(%rsp) + rorl $7,%ebx + addl %eax,%ebp + addl 40(%rsp),%edx + pslld $2,%xmm6 + xorl %ebx,%esi + movl %ebp,%edi + psrld $30,%xmm12 + roll $5,%ebp + addl %esi,%edx + xorl %ebx,%edi + rorl $7,%eax + por %xmm12,%xmm6 + addl %ebp,%edx + addl 44(%rsp),%ecx + pshufd $238,%xmm5,%xmm13 + xorl %eax,%edi + movl %edx,%esi + roll $5,%edx + addl %edi,%ecx + movups 0(%r15),%xmm0 +.byte 102,15,56,220,209 + xorl %eax,%esi + rorl $7,%ebp + addl %edx,%ecx + pxor %xmm11,%xmm7 + addl 48(%rsp),%ebx + xorl %ebp,%esi + punpcklqdq %xmm6,%xmm13 + movl %ecx,%edi + roll $5,%ecx + pxor %xmm8,%xmm7 + addl %esi,%ebx + xorl %ebp,%edi + movdqa %xmm3,%xmm12 + rorl $7,%edx + paddd %xmm6,%xmm3 + addl %ecx,%ebx + pxor %xmm13,%xmm7 + addl 52(%rsp),%eax + xorl %edx,%edi + movl %ebx,%esi + roll $5,%ebx + movdqa %xmm7,%xmm13 + addl %edi,%eax + xorl %edx,%esi + movdqa %xmm3,32(%rsp) + rorl $7,%ecx + addl %ebx,%eax + addl 56(%rsp),%ebp + movups 16(%r15),%xmm1 +.byte 102,15,56,220,208 + pslld $2,%xmm7 + xorl %ecx,%esi + movl %eax,%edi + psrld $30,%xmm13 + roll $5,%eax + addl %esi,%ebp + xorl %ecx,%edi + rorl $7,%ebx + por %xmm13,%xmm7 + addl %eax,%ebp + addl 60(%rsp),%edx + pshufd $238,%xmm6,%xmm3 + xorl %ebx,%edi + movl %ebp,%esi + roll $5,%ebp + addl %edi,%edx + xorl %ebx,%esi + rorl $7,%eax + addl %ebp,%edx + pxor %xmm4,%xmm8 + addl 0(%rsp),%ecx + xorl %eax,%esi + punpcklqdq %xmm7,%xmm3 + movl %edx,%edi + roll $5,%edx + pxor %xmm9,%xmm8 + addl %esi,%ecx + movups 32(%r15),%xmm0 +.byte 102,15,56,220,209 + xorl %eax,%edi + movdqa %xmm12,%xmm13 + rorl $7,%ebp + paddd %xmm7,%xmm12 + addl %edx,%ecx + pxor %xmm3,%xmm8 + addl 4(%rsp),%ebx + xorl %ebp,%edi + movl %ecx,%esi + roll $5,%ecx + movdqa %xmm8,%xmm3 + addl %edi,%ebx + xorl %ebp,%esi + movdqa %xmm12,48(%rsp) + rorl $7,%edx + addl %ecx,%ebx + addl 8(%rsp),%eax + pslld $2,%xmm8 + xorl %edx,%esi + movl %ebx,%edi + psrld $30,%xmm3 + roll $5,%ebx + addl %esi,%eax + xorl %edx,%edi + rorl $7,%ecx + por %xmm3,%xmm8 + addl %ebx,%eax + addl 12(%rsp),%ebp + movups 48(%r15),%xmm1 +.byte 102,15,56,220,208 + pshufd $238,%xmm7,%xmm12 + xorl %ecx,%edi + movl %eax,%esi + roll $5,%eax + addl %edi,%ebp + xorl %ecx,%esi + rorl $7,%ebx + addl %eax,%ebp + pxor %xmm5,%xmm9 + addl 16(%rsp),%edx + xorl %ebx,%esi + punpcklqdq %xmm8,%xmm12 + movl %ebp,%edi + roll $5,%ebp + pxor %xmm10,%xmm9 + addl %esi,%edx + xorl %ebx,%edi + movdqa %xmm13,%xmm3 + rorl $7,%eax + paddd %xmm8,%xmm13 + addl %ebp,%edx + pxor %xmm12,%xmm9 + addl 20(%rsp),%ecx + xorl %eax,%edi + movl %edx,%esi + roll $5,%edx + movdqa %xmm9,%xmm12 + addl %edi,%ecx + cmpl $11,%r8d + jb L$aesenclast2 + movups 64(%r15),%xmm0 +.byte 102,15,56,220,209 + movups 80(%r15),%xmm1 +.byte 102,15,56,220,208 + je L$aesenclast2 + movups 96(%r15),%xmm0 +.byte 102,15,56,220,209 + movups 112(%r15),%xmm1 +.byte 102,15,56,220,208 +L$aesenclast2: +.byte 102,15,56,221,209 + movups 16-112(%r15),%xmm0 + xorl %eax,%esi + movdqa %xmm13,0(%rsp) + rorl $7,%ebp + addl %edx,%ecx + addl 24(%rsp),%ebx + pslld $2,%xmm9 + xorl %ebp,%esi + movl %ecx,%edi + psrld $30,%xmm12 + roll $5,%ecx + addl %esi,%ebx + xorl %ebp,%edi + rorl $7,%edx + por %xmm12,%xmm9 + addl %ecx,%ebx + addl 28(%rsp),%eax + pshufd $238,%xmm8,%xmm13 + rorl $7,%ecx + movl %ebx,%esi + xorl %edx,%edi + roll $5,%ebx + addl %edi,%eax + xorl %ecx,%esi + xorl %edx,%ecx + addl %ebx,%eax + pxor %xmm6,%xmm10 + addl 32(%rsp),%ebp + movups 32(%r12),%xmm14 + xorps %xmm15,%xmm14 + movups %xmm2,16(%r13,%r12,1) + xorps %xmm14,%xmm2 + movups -80(%r15),%xmm1 +.byte 102,15,56,220,208 + andl %ecx,%esi + xorl %edx,%ecx + rorl $7,%ebx + punpcklqdq %xmm9,%xmm13 + movl %eax,%edi + xorl %ecx,%esi + pxor %xmm11,%xmm10 + roll $5,%eax + addl %esi,%ebp + movdqa %xmm3,%xmm12 + xorl %ebx,%edi + paddd %xmm9,%xmm3 + xorl %ecx,%ebx + pxor %xmm13,%xmm10 + addl %eax,%ebp + addl 36(%rsp),%edx + andl %ebx,%edi + xorl %ecx,%ebx + rorl $7,%eax + movdqa %xmm10,%xmm13 + movl %ebp,%esi + xorl %ebx,%edi + movdqa %xmm3,16(%rsp) + roll $5,%ebp + addl %edi,%edx + movups -64(%r15),%xmm0 +.byte 102,15,56,220,209 + xorl %eax,%esi + pslld $2,%xmm10 + xorl %ebx,%eax + addl %ebp,%edx + psrld $30,%xmm13 + addl 40(%rsp),%ecx + andl %eax,%esi + xorl %ebx,%eax + por %xmm13,%xmm10 + rorl $7,%ebp + movl %edx,%edi + xorl %eax,%esi + roll $5,%edx + pshufd $238,%xmm9,%xmm3 + addl %esi,%ecx + xorl %ebp,%edi + xorl %eax,%ebp + addl %edx,%ecx + addl 44(%rsp),%ebx + andl %ebp,%edi + xorl %eax,%ebp + rorl $7,%edx + movups -48(%r15),%xmm1 +.byte 102,15,56,220,208 + movl %ecx,%esi + xorl %ebp,%edi + roll $5,%ecx + addl %edi,%ebx + xorl %edx,%esi + xorl %ebp,%edx + addl %ecx,%ebx + pxor %xmm7,%xmm11 + addl 48(%rsp),%eax + andl %edx,%esi + xorl %ebp,%edx + rorl $7,%ecx + punpcklqdq %xmm10,%xmm3 + movl %ebx,%edi + xorl %edx,%esi + pxor %xmm4,%xmm11 + roll $5,%ebx + addl %esi,%eax + movdqa 48(%r11),%xmm13 + xorl %ecx,%edi + paddd %xmm10,%xmm12 + xorl %edx,%ecx + pxor %xmm3,%xmm11 + addl %ebx,%eax + addl 52(%rsp),%ebp + movups -32(%r15),%xmm0 +.byte 102,15,56,220,209 + andl %ecx,%edi + xorl %edx,%ecx + rorl $7,%ebx + movdqa %xmm11,%xmm3 + movl %eax,%esi + xorl %ecx,%edi + movdqa %xmm12,32(%rsp) + roll $5,%eax + addl %edi,%ebp + xorl %ebx,%esi + pslld $2,%xmm11 + xorl %ecx,%ebx + addl %eax,%ebp + psrld $30,%xmm3 + addl 56(%rsp),%edx + andl %ebx,%esi + xorl %ecx,%ebx + por %xmm3,%xmm11 + rorl $7,%eax + movl %ebp,%edi + xorl %ebx,%esi + roll $5,%ebp + pshufd $238,%xmm10,%xmm12 + addl %esi,%edx + movups -16(%r15),%xmm1 +.byte 102,15,56,220,208 + xorl %eax,%edi + xorl %ebx,%eax + addl %ebp,%edx + addl 60(%rsp),%ecx + andl %eax,%edi + xorl %ebx,%eax + rorl $7,%ebp + movl %edx,%esi + xorl %eax,%edi + roll $5,%edx + addl %edi,%ecx + xorl %ebp,%esi + xorl %eax,%ebp + addl %edx,%ecx + pxor %xmm8,%xmm4 + addl 0(%rsp),%ebx + andl %ebp,%esi + xorl %eax,%ebp + rorl $7,%edx + movups 0(%r15),%xmm0 +.byte 102,15,56,220,209 + punpcklqdq %xmm11,%xmm12 + movl %ecx,%edi + xorl %ebp,%esi + pxor %xmm5,%xmm4 + roll $5,%ecx + addl %esi,%ebx + movdqa %xmm13,%xmm3 + xorl %edx,%edi + paddd %xmm11,%xmm13 + xorl %ebp,%edx + pxor %xmm12,%xmm4 + addl %ecx,%ebx + addl 4(%rsp),%eax + andl %edx,%edi + xorl %ebp,%edx + rorl $7,%ecx + movdqa %xmm4,%xmm12 + movl %ebx,%esi + xorl %edx,%edi + movdqa %xmm13,48(%rsp) + roll $5,%ebx + addl %edi,%eax + xorl %ecx,%esi + pslld $2,%xmm4 + xorl %edx,%ecx + addl %ebx,%eax + psrld $30,%xmm12 + addl 8(%rsp),%ebp + movups 16(%r15),%xmm1 +.byte 102,15,56,220,208 + andl %ecx,%esi + xorl %edx,%ecx + por %xmm12,%xmm4 + rorl $7,%ebx + movl %eax,%edi + xorl %ecx,%esi + roll $5,%eax + pshufd $238,%xmm11,%xmm13 + addl %esi,%ebp + xorl %ebx,%edi + xorl %ecx,%ebx + addl %eax,%ebp + addl 12(%rsp),%edx + andl %ebx,%edi + xorl %ecx,%ebx + rorl $7,%eax + movl %ebp,%esi + xorl %ebx,%edi + roll $5,%ebp + addl %edi,%edx + movups 32(%r15),%xmm0 +.byte 102,15,56,220,209 + xorl %eax,%esi + xorl %ebx,%eax + addl %ebp,%edx + pxor %xmm9,%xmm5 + addl 16(%rsp),%ecx + andl %eax,%esi + xorl %ebx,%eax + rorl $7,%ebp + punpcklqdq %xmm4,%xmm13 + movl %edx,%edi + xorl %eax,%esi + pxor %xmm6,%xmm5 + roll $5,%edx + addl %esi,%ecx + movdqa %xmm3,%xmm12 + xorl %ebp,%edi + paddd %xmm4,%xmm3 + xorl %eax,%ebp + pxor %xmm13,%xmm5 + addl %edx,%ecx + addl 20(%rsp),%ebx + andl %ebp,%edi + xorl %eax,%ebp + rorl $7,%edx + movups 48(%r15),%xmm1 +.byte 102,15,56,220,208 + movdqa %xmm5,%xmm13 + movl %ecx,%esi + xorl %ebp,%edi + movdqa %xmm3,0(%rsp) + roll $5,%ecx + addl %edi,%ebx + xorl %edx,%esi + pslld $2,%xmm5 + xorl %ebp,%edx + addl %ecx,%ebx + psrld $30,%xmm13 + addl 24(%rsp),%eax + andl %edx,%esi + xorl %ebp,%edx + por %xmm13,%xmm5 + rorl $7,%ecx + movl %ebx,%edi + xorl %edx,%esi + roll $5,%ebx + pshufd $238,%xmm4,%xmm3 + addl %esi,%eax + xorl %ecx,%edi + xorl %edx,%ecx + addl %ebx,%eax + addl 28(%rsp),%ebp + cmpl $11,%r8d + jb L$aesenclast3 + movups 64(%r15),%xmm0 +.byte 102,15,56,220,209 + movups 80(%r15),%xmm1 +.byte 102,15,56,220,208 + je L$aesenclast3 + movups 96(%r15),%xmm0 +.byte 102,15,56,220,209 + movups 112(%r15),%xmm1 +.byte 102,15,56,220,208 +L$aesenclast3: +.byte 102,15,56,221,209 + movups 16-112(%r15),%xmm0 + andl %ecx,%edi + xorl %edx,%ecx + rorl $7,%ebx + movl %eax,%esi + xorl %ecx,%edi + roll $5,%eax + addl %edi,%ebp + xorl %ebx,%esi + xorl %ecx,%ebx + addl %eax,%ebp + pxor %xmm10,%xmm6 + addl 32(%rsp),%edx + andl %ebx,%esi + xorl %ecx,%ebx + rorl $7,%eax + punpcklqdq %xmm5,%xmm3 + movl %ebp,%edi + xorl %ebx,%esi + pxor %xmm7,%xmm6 + roll $5,%ebp + addl %esi,%edx + movups 48(%r12),%xmm14 + xorps %xmm15,%xmm14 + movups %xmm2,32(%r13,%r12,1) + xorps %xmm14,%xmm2 + movups -80(%r15),%xmm1 +.byte 102,15,56,220,208 + movdqa %xmm12,%xmm13 + xorl %eax,%edi + paddd %xmm5,%xmm12 + xorl %ebx,%eax + pxor %xmm3,%xmm6 + addl %ebp,%edx + addl 36(%rsp),%ecx + andl %eax,%edi + xorl %ebx,%eax + rorl $7,%ebp + movdqa %xmm6,%xmm3 + movl %edx,%esi + xorl %eax,%edi + movdqa %xmm12,16(%rsp) + roll $5,%edx + addl %edi,%ecx + xorl %ebp,%esi + pslld $2,%xmm6 + xorl %eax,%ebp + addl %edx,%ecx + psrld $30,%xmm3 + addl 40(%rsp),%ebx + andl %ebp,%esi + xorl %eax,%ebp + por %xmm3,%xmm6 + rorl $7,%edx + movups -64(%r15),%xmm0 +.byte 102,15,56,220,209 + movl %ecx,%edi + xorl %ebp,%esi + roll $5,%ecx + pshufd $238,%xmm5,%xmm12 + addl %esi,%ebx + xorl %edx,%edi + xorl %ebp,%edx + addl %ecx,%ebx + addl 44(%rsp),%eax + andl %edx,%edi + xorl %ebp,%edx + rorl $7,%ecx + movl %ebx,%esi + xorl %edx,%edi + roll $5,%ebx + addl %edi,%eax + xorl %edx,%esi + addl %ebx,%eax + pxor %xmm11,%xmm7 + addl 48(%rsp),%ebp + movups -48(%r15),%xmm1 +.byte 102,15,56,220,208 + xorl %ecx,%esi + punpcklqdq %xmm6,%xmm12 + movl %eax,%edi + roll $5,%eax + pxor %xmm8,%xmm7 + addl %esi,%ebp + xorl %ecx,%edi + movdqa %xmm13,%xmm3 + rorl $7,%ebx + paddd %xmm6,%xmm13 + addl %eax,%ebp + pxor %xmm12,%xmm7 + addl 52(%rsp),%edx + xorl %ebx,%edi + movl %ebp,%esi + roll $5,%ebp + movdqa %xmm7,%xmm12 + addl %edi,%edx + xorl %ebx,%esi + movdqa %xmm13,32(%rsp) + rorl $7,%eax + addl %ebp,%edx + addl 56(%rsp),%ecx + pslld $2,%xmm7 + xorl %eax,%esi + movl %edx,%edi + psrld $30,%xmm12 + roll $5,%edx + addl %esi,%ecx + movups -32(%r15),%xmm0 +.byte 102,15,56,220,209 + xorl %eax,%edi + rorl $7,%ebp + por %xmm12,%xmm7 + addl %edx,%ecx + addl 60(%rsp),%ebx + xorl %ebp,%edi + movl %ecx,%esi + roll $5,%ecx + addl %edi,%ebx + xorl %ebp,%esi + rorl $7,%edx + addl %ecx,%ebx + addl 0(%rsp),%eax + xorl %edx,%esi + movl %ebx,%edi + roll $5,%ebx + paddd %xmm7,%xmm3 + addl %esi,%eax + xorl %edx,%edi + movdqa %xmm3,48(%rsp) + rorl $7,%ecx + addl %ebx,%eax + addl 4(%rsp),%ebp + movups -16(%r15),%xmm1 +.byte 102,15,56,220,208 + xorl %ecx,%edi + movl %eax,%esi + roll $5,%eax + addl %edi,%ebp + xorl %ecx,%esi + rorl $7,%ebx + addl %eax,%ebp + addl 8(%rsp),%edx + xorl %ebx,%esi + movl %ebp,%edi + roll $5,%ebp + addl %esi,%edx + xorl %ebx,%edi + rorl $7,%eax + addl %ebp,%edx + addl 12(%rsp),%ecx + xorl %eax,%edi + movl %edx,%esi + roll $5,%edx + addl %edi,%ecx + movups 0(%r15),%xmm0 +.byte 102,15,56,220,209 + xorl %eax,%esi + rorl $7,%ebp + addl %edx,%ecx + cmpq %r14,%r10 + je L$done_ssse3 + movdqa 64(%r11),%xmm3 + movdqa 0(%r11),%xmm13 + movdqu 0(%r10),%xmm4 + movdqu 16(%r10),%xmm5 + movdqu 32(%r10),%xmm6 + movdqu 48(%r10),%xmm7 +.byte 102,15,56,0,227 + addq $64,%r10 + addl 16(%rsp),%ebx + xorl %ebp,%esi + movl %ecx,%edi +.byte 102,15,56,0,235 + roll $5,%ecx + addl %esi,%ebx + xorl %ebp,%edi + rorl $7,%edx + paddd %xmm13,%xmm4 + addl %ecx,%ebx + addl 20(%rsp),%eax + xorl %edx,%edi + movl %ebx,%esi + movdqa %xmm4,0(%rsp) + roll $5,%ebx + addl %edi,%eax + xorl %edx,%esi + rorl $7,%ecx + psubd %xmm13,%xmm4 + addl %ebx,%eax + addl 24(%rsp),%ebp + movups 16(%r15),%xmm1 +.byte 102,15,56,220,208 + xorl %ecx,%esi + movl %eax,%edi + roll $5,%eax + addl %esi,%ebp + xorl %ecx,%edi + rorl $7,%ebx + addl %eax,%ebp + addl 28(%rsp),%edx + xorl %ebx,%edi + movl %ebp,%esi + roll $5,%ebp + addl %edi,%edx + xorl %ebx,%esi + rorl $7,%eax + addl %ebp,%edx + addl 32(%rsp),%ecx + xorl %eax,%esi + movl %edx,%edi +.byte 102,15,56,0,243 + roll $5,%edx + addl %esi,%ecx + movups 32(%r15),%xmm0 +.byte 102,15,56,220,209 + xorl %eax,%edi + rorl $7,%ebp + paddd %xmm13,%xmm5 + addl %edx,%ecx + addl 36(%rsp),%ebx + xorl %ebp,%edi + movl %ecx,%esi + movdqa %xmm5,16(%rsp) + roll $5,%ecx + addl %edi,%ebx + xorl %ebp,%esi + rorl $7,%edx + psubd %xmm13,%xmm5 + addl %ecx,%ebx + addl 40(%rsp),%eax + xorl %edx,%esi + movl %ebx,%edi + roll $5,%ebx + addl %esi,%eax + xorl %edx,%edi + rorl $7,%ecx + addl %ebx,%eax + addl 44(%rsp),%ebp + movups 48(%r15),%xmm1 +.byte 102,15,56,220,208 + xorl %ecx,%edi + movl %eax,%esi + roll $5,%eax + addl %edi,%ebp + xorl %ecx,%esi + rorl $7,%ebx + addl %eax,%ebp + addl 48(%rsp),%edx + xorl %ebx,%esi + movl %ebp,%edi +.byte 102,15,56,0,251 + roll $5,%ebp + addl %esi,%edx + xorl %ebx,%edi + rorl $7,%eax + paddd %xmm13,%xmm6 + addl %ebp,%edx + addl 52(%rsp),%ecx + xorl %eax,%edi + movl %edx,%esi + movdqa %xmm6,32(%rsp) + roll $5,%edx + addl %edi,%ecx + cmpl $11,%r8d + jb L$aesenclast4 + movups 64(%r15),%xmm0 +.byte 102,15,56,220,209 + movups 80(%r15),%xmm1 +.byte 102,15,56,220,208 + je L$aesenclast4 + movups 96(%r15),%xmm0 +.byte 102,15,56,220,209 + movups 112(%r15),%xmm1 +.byte 102,15,56,220,208 +L$aesenclast4: +.byte 102,15,56,221,209 + movups 16-112(%r15),%xmm0 + xorl %eax,%esi + rorl $7,%ebp + psubd %xmm13,%xmm6 + addl %edx,%ecx + addl 56(%rsp),%ebx + xorl %ebp,%esi + movl %ecx,%edi + roll $5,%ecx + addl %esi,%ebx + xorl %ebp,%edi + rorl $7,%edx + addl %ecx,%ebx + addl 60(%rsp),%eax + xorl %edx,%edi + movl %ebx,%esi + roll $5,%ebx + addl %edi,%eax + rorl $7,%ecx + addl %ebx,%eax + movups %xmm2,48(%r13,%r12,1) + leaq 64(%r12),%r12 + + addl 0(%r9),%eax + addl 4(%r9),%esi + addl 8(%r9),%ecx + addl 12(%r9),%edx + movl %eax,0(%r9) + addl 16(%r9),%ebp + movl %esi,4(%r9) + movl %esi,%ebx + movl %ecx,8(%r9) + movl %ecx,%edi + movl %edx,12(%r9) + xorl %edx,%edi + movl %ebp,16(%r9) + andl %edi,%esi + jmp L$oop_ssse3 + +L$done_ssse3: + addl 16(%rsp),%ebx + xorl %ebp,%esi + movl %ecx,%edi + roll $5,%ecx + addl %esi,%ebx + xorl %ebp,%edi + rorl $7,%edx + addl %ecx,%ebx + addl 20(%rsp),%eax + xorl %edx,%edi + movl %ebx,%esi + roll $5,%ebx + addl %edi,%eax + xorl %edx,%esi + rorl $7,%ecx + addl %ebx,%eax + addl 24(%rsp),%ebp + movups 16(%r15),%xmm1 +.byte 102,15,56,220,208 + xorl %ecx,%esi + movl %eax,%edi + roll $5,%eax + addl %esi,%ebp + xorl %ecx,%edi + rorl $7,%ebx + addl %eax,%ebp + addl 28(%rsp),%edx + xorl %ebx,%edi + movl %ebp,%esi + roll $5,%ebp + addl %edi,%edx + xorl %ebx,%esi + rorl $7,%eax + addl %ebp,%edx + addl 32(%rsp),%ecx + xorl %eax,%esi + movl %edx,%edi + roll $5,%edx + addl %esi,%ecx + movups 32(%r15),%xmm0 +.byte 102,15,56,220,209 + xorl %eax,%edi + rorl $7,%ebp + addl %edx,%ecx + addl 36(%rsp),%ebx + xorl %ebp,%edi + movl %ecx,%esi + roll $5,%ecx + addl %edi,%ebx + xorl %ebp,%esi + rorl $7,%edx + addl %ecx,%ebx + addl 40(%rsp),%eax + xorl %edx,%esi + movl %ebx,%edi + roll $5,%ebx + addl %esi,%eax + xorl %edx,%edi + rorl $7,%ecx + addl %ebx,%eax + addl 44(%rsp),%ebp + movups 48(%r15),%xmm1 +.byte 102,15,56,220,208 + xorl %ecx,%edi + movl %eax,%esi + roll $5,%eax + addl %edi,%ebp + xorl %ecx,%esi + rorl $7,%ebx + addl %eax,%ebp + addl 48(%rsp),%edx + xorl %ebx,%esi + movl %ebp,%edi + roll $5,%ebp + addl %esi,%edx + xorl %ebx,%edi + rorl $7,%eax + addl %ebp,%edx + addl 52(%rsp),%ecx + xorl %eax,%edi + movl %edx,%esi + roll $5,%edx + addl %edi,%ecx + cmpl $11,%r8d + jb L$aesenclast5 + movups 64(%r15),%xmm0 +.byte 102,15,56,220,209 + movups 80(%r15),%xmm1 +.byte 102,15,56,220,208 + je L$aesenclast5 + movups 96(%r15),%xmm0 +.byte 102,15,56,220,209 + movups 112(%r15),%xmm1 +.byte 102,15,56,220,208 +L$aesenclast5: +.byte 102,15,56,221,209 + movups 16-112(%r15),%xmm0 + xorl %eax,%esi + rorl $7,%ebp + addl %edx,%ecx + addl 56(%rsp),%ebx + xorl %ebp,%esi + movl %ecx,%edi + roll $5,%ecx + addl %esi,%ebx + xorl %ebp,%edi + rorl $7,%edx + addl %ecx,%ebx + addl 60(%rsp),%eax + xorl %edx,%edi + movl %ebx,%esi + roll $5,%ebx + addl %edi,%eax + rorl $7,%ecx + addl %ebx,%eax + movups %xmm2,48(%r13,%r12,1) + movq 88(%rsp),%r8 + + addl 0(%r9),%eax + addl 4(%r9),%esi + addl 8(%r9),%ecx + movl %eax,0(%r9) + addl 12(%r9),%edx + movl %esi,4(%r9) + addl 16(%r9),%ebp + movl %ecx,8(%r9) + movl %edx,12(%r9) + movl %ebp,16(%r9) + movups %xmm2,(%r8) + leaq 104(%rsp),%rsi + movq 0(%rsi),%r15 + movq 8(%rsi),%r14 + movq 16(%rsi),%r13 + movq 24(%rsi),%r12 + movq 32(%rsi),%rbp + movq 40(%rsi),%rbx + leaq 48(%rsi),%rsp +L$epilogue_ssse3: + .byte 0xf3,0xc3 + +.p2align 6 +K_XX_XX: +.long 0x5a827999,0x5a827999,0x5a827999,0x5a827999 +.long 0x6ed9eba1,0x6ed9eba1,0x6ed9eba1,0x6ed9eba1 +.long 0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc +.long 0xca62c1d6,0xca62c1d6,0xca62c1d6,0xca62c1d6 +.long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f +.byte 0xf,0xe,0xd,0xc,0xb,0xa,0x9,0x8,0x7,0x6,0x5,0x4,0x3,0x2,0x1,0x0 + +.byte 65,69,83,78,73,45,67,66,67,43,83,72,65,49,32,115,116,105,116,99,104,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 +.p2align 6 + +.p2align 5 +aesni_cbc_sha1_enc_shaext: + movq 8(%rsp),%r10 + movdqu (%r9),%xmm8 + movd 16(%r9),%xmm9 + movdqa K_XX_XX+80(%rip),%xmm7 + + movl 240(%rcx),%r11d + subq %rdi,%rsi + movups (%rcx),%xmm15 + movups 16(%rcx),%xmm0 + leaq 112(%rcx),%rcx + + pshufd $27,%xmm8,%xmm8 + pshufd $27,%xmm9,%xmm9 + jmp L$oop_shaext + +.p2align 4 +L$oop_shaext: + movups 0(%rdi),%xmm14 + xorps %xmm15,%xmm14 + xorps %xmm14,%xmm2 + movups -80(%rcx),%xmm1 +.byte 102,15,56,220,208 + movdqu (%r10),%xmm3 + movdqa %xmm9,%xmm12 +.byte 102,15,56,0,223 + movdqu 16(%r10),%xmm4 + movdqa %xmm8,%xmm11 + movups -64(%rcx),%xmm0 +.byte 102,15,56,220,209 +.byte 102,15,56,0,231 + + paddd %xmm3,%xmm9 + movdqu 32(%r10),%xmm5 + leaq 64(%r10),%r10 + pxor %xmm12,%xmm3 + movups -48(%rcx),%xmm1 +.byte 102,15,56,220,208 + pxor %xmm12,%xmm3 + movdqa %xmm8,%xmm10 +.byte 102,15,56,0,239 +.byte 69,15,58,204,193,0 +.byte 68,15,56,200,212 + movups -32(%rcx),%xmm0 +.byte 102,15,56,220,209 +.byte 15,56,201,220 + movdqu -16(%r10),%xmm6 + movdqa %xmm8,%xmm9 +.byte 102,15,56,0,247 + movups -16(%rcx),%xmm1 +.byte 102,15,56,220,208 +.byte 69,15,58,204,194,0 +.byte 68,15,56,200,205 + pxor %xmm5,%xmm3 +.byte 15,56,201,229 + movups 0(%rcx),%xmm0 +.byte 102,15,56,220,209 + movdqa %xmm8,%xmm10 +.byte 69,15,58,204,193,0 +.byte 68,15,56,200,214 + movups 16(%rcx),%xmm1 +.byte 102,15,56,220,208 +.byte 15,56,202,222 + pxor %xmm6,%xmm4 +.byte 15,56,201,238 + movups 32(%rcx),%xmm0 +.byte 102,15,56,220,209 + movdqa %xmm8,%xmm9 +.byte 69,15,58,204,194,0 +.byte 68,15,56,200,203 + movups 48(%rcx),%xmm1 +.byte 102,15,56,220,208 +.byte 15,56,202,227 + pxor %xmm3,%xmm5 +.byte 15,56,201,243 + cmpl $11,%r11d + jb L$aesenclast6 + movups 64(%rcx),%xmm0 +.byte 102,15,56,220,209 + movups 80(%rcx),%xmm1 +.byte 102,15,56,220,208 + je L$aesenclast6 + movups 96(%rcx),%xmm0 +.byte 102,15,56,220,209 + movups 112(%rcx),%xmm1 +.byte 102,15,56,220,208 +L$aesenclast6: +.byte 102,15,56,221,209 + movups 16-112(%rcx),%xmm0 + movdqa %xmm8,%xmm10 +.byte 69,15,58,204,193,0 +.byte 68,15,56,200,212 + movups 16(%rdi),%xmm14 + xorps %xmm15,%xmm14 + movups %xmm2,0(%rsi,%rdi,1) + xorps %xmm14,%xmm2 + movups -80(%rcx),%xmm1 +.byte 102,15,56,220,208 +.byte 15,56,202,236 + pxor %xmm4,%xmm6 +.byte 15,56,201,220 + movups -64(%rcx),%xmm0 +.byte 102,15,56,220,209 + movdqa %xmm8,%xmm9 +.byte 69,15,58,204,194,1 +.byte 68,15,56,200,205 + movups -48(%rcx),%xmm1 +.byte 102,15,56,220,208 +.byte 15,56,202,245 + pxor %xmm5,%xmm3 +.byte 15,56,201,229 + movups -32(%rcx),%xmm0 +.byte 102,15,56,220,209 + movdqa %xmm8,%xmm10 +.byte 69,15,58,204,193,1 +.byte 68,15,56,200,214 + movups -16(%rcx),%xmm1 +.byte 102,15,56,220,208 +.byte 15,56,202,222 + pxor %xmm6,%xmm4 +.byte 15,56,201,238 + movups 0(%rcx),%xmm0 +.byte 102,15,56,220,209 + movdqa %xmm8,%xmm9 +.byte 69,15,58,204,194,1 +.byte 68,15,56,200,203 + movups 16(%rcx),%xmm1 +.byte 102,15,56,220,208 +.byte 15,56,202,227 + pxor %xmm3,%xmm5 +.byte 15,56,201,243 + movups 32(%rcx),%xmm0 +.byte 102,15,56,220,209 + movdqa %xmm8,%xmm10 +.byte 69,15,58,204,193,1 +.byte 68,15,56,200,212 + movups 48(%rcx),%xmm1 +.byte 102,15,56,220,208 +.byte 15,56,202,236 + pxor %xmm4,%xmm6 +.byte 15,56,201,220 + cmpl $11,%r11d + jb L$aesenclast7 + movups 64(%rcx),%xmm0 +.byte 102,15,56,220,209 + movups 80(%rcx),%xmm1 +.byte 102,15,56,220,208 + je L$aesenclast7 + movups 96(%rcx),%xmm0 +.byte 102,15,56,220,209 + movups 112(%rcx),%xmm1 +.byte 102,15,56,220,208 +L$aesenclast7: +.byte 102,15,56,221,209 + movups 16-112(%rcx),%xmm0 + movdqa %xmm8,%xmm9 +.byte 69,15,58,204,194,1 +.byte 68,15,56,200,205 + movups 32(%rdi),%xmm14 + xorps %xmm15,%xmm14 + movups %xmm2,16(%rsi,%rdi,1) + xorps %xmm14,%xmm2 + movups -80(%rcx),%xmm1 +.byte 102,15,56,220,208 +.byte 15,56,202,245 + pxor %xmm5,%xmm3 +.byte 15,56,201,229 + movups -64(%rcx),%xmm0 +.byte 102,15,56,220,209 + movdqa %xmm8,%xmm10 +.byte 69,15,58,204,193,2 +.byte 68,15,56,200,214 + movups -48(%rcx),%xmm1 +.byte 102,15,56,220,208 +.byte 15,56,202,222 + pxor %xmm6,%xmm4 +.byte 15,56,201,238 + movups -32(%rcx),%xmm0 +.byte 102,15,56,220,209 + movdqa %xmm8,%xmm9 +.byte 69,15,58,204,194,2 +.byte 68,15,56,200,203 + movups -16(%rcx),%xmm1 +.byte 102,15,56,220,208 +.byte 15,56,202,227 + pxor %xmm3,%xmm5 +.byte 15,56,201,243 + movups 0(%rcx),%xmm0 +.byte 102,15,56,220,209 + movdqa %xmm8,%xmm10 +.byte 69,15,58,204,193,2 +.byte 68,15,56,200,212 + movups 16(%rcx),%xmm1 +.byte 102,15,56,220,208 +.byte 15,56,202,236 + pxor %xmm4,%xmm6 +.byte 15,56,201,220 + movups 32(%rcx),%xmm0 +.byte 102,15,56,220,209 + movdqa %xmm8,%xmm9 +.byte 69,15,58,204,194,2 +.byte 68,15,56,200,205 + movups 48(%rcx),%xmm1 +.byte 102,15,56,220,208 +.byte 15,56,202,245 + pxor %xmm5,%xmm3 +.byte 15,56,201,229 + cmpl $11,%r11d + jb L$aesenclast8 + movups 64(%rcx),%xmm0 +.byte 102,15,56,220,209 + movups 80(%rcx),%xmm1 +.byte 102,15,56,220,208 + je L$aesenclast8 + movups 96(%rcx),%xmm0 +.byte 102,15,56,220,209 + movups 112(%rcx),%xmm1 +.byte 102,15,56,220,208 +L$aesenclast8: +.byte 102,15,56,221,209 + movups 16-112(%rcx),%xmm0 + movdqa %xmm8,%xmm10 +.byte 69,15,58,204,193,2 +.byte 68,15,56,200,214 + movups 48(%rdi),%xmm14 + xorps %xmm15,%xmm14 + movups %xmm2,32(%rsi,%rdi,1) + xorps %xmm14,%xmm2 + movups -80(%rcx),%xmm1 +.byte 102,15,56,220,208 +.byte 15,56,202,222 + pxor %xmm6,%xmm4 +.byte 15,56,201,238 + movups -64(%rcx),%xmm0 +.byte 102,15,56,220,209 + movdqa %xmm8,%xmm9 +.byte 69,15,58,204,194,3 +.byte 68,15,56,200,203 + movups -48(%rcx),%xmm1 +.byte 102,15,56,220,208 +.byte 15,56,202,227 + pxor %xmm3,%xmm5 +.byte 15,56,201,243 + movups -32(%rcx),%xmm0 +.byte 102,15,56,220,209 + movdqa %xmm8,%xmm10 +.byte 69,15,58,204,193,3 +.byte 68,15,56,200,212 +.byte 15,56,202,236 + pxor %xmm4,%xmm6 + movups -16(%rcx),%xmm1 +.byte 102,15,56,220,208 + movdqa %xmm8,%xmm9 +.byte 69,15,58,204,194,3 +.byte 68,15,56,200,205 +.byte 15,56,202,245 + movups 0(%rcx),%xmm0 +.byte 102,15,56,220,209 + movdqa %xmm12,%xmm5 + movdqa %xmm8,%xmm10 +.byte 69,15,58,204,193,3 +.byte 68,15,56,200,214 + movups 16(%rcx),%xmm1 +.byte 102,15,56,220,208 + movdqa %xmm8,%xmm9 +.byte 69,15,58,204,194,3 +.byte 68,15,56,200,205 + movups 32(%rcx),%xmm0 +.byte 102,15,56,220,209 + movups 48(%rcx),%xmm1 +.byte 102,15,56,220,208 + cmpl $11,%r11d + jb L$aesenclast9 + movups 64(%rcx),%xmm0 +.byte 102,15,56,220,209 + movups 80(%rcx),%xmm1 +.byte 102,15,56,220,208 + je L$aesenclast9 + movups 96(%rcx),%xmm0 +.byte 102,15,56,220,209 + movups 112(%rcx),%xmm1 +.byte 102,15,56,220,208 +L$aesenclast9: +.byte 102,15,56,221,209 + movups 16-112(%rcx),%xmm0 + decq %rdx + + paddd %xmm11,%xmm8 + movups %xmm2,48(%rsi,%rdi,1) + leaq 64(%rdi),%rdi + jnz L$oop_shaext + + pshufd $27,%xmm8,%xmm8 + pshufd $27,%xmm9,%xmm9 + movups %xmm2,(%r8) + movdqu %xmm8,(%r9) + movd %xmm9,16(%r9) + .byte 0xf3,0xc3 diff --git a/deps/openssl/asm_obsolete/x64-macosx-gas/aes/aesni-sha256-x86_64.s b/deps/openssl/asm_obsolete/x64-macosx-gas/aes/aesni-sha256-x86_64.s new file mode 100644 index 00000000000000..53307da35815e1 --- /dev/null +++ b/deps/openssl/asm_obsolete/x64-macosx-gas/aes/aesni-sha256-x86_64.s @@ -0,0 +1,57 @@ +.text + + +.globl _aesni_cbc_sha256_enc + +.p2align 4 +_aesni_cbc_sha256_enc: + xorl %eax,%eax + cmpq $0,%rdi + je L$probe + ud2 +L$probe: + .byte 0xf3,0xc3 + + +.p2align 6 + +K256: +.long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 +.long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 +.long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 +.long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 +.long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 +.long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 +.long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 +.long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 +.long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc +.long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc +.long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da +.long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da +.long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 +.long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 +.long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967 +.long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967 +.long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 +.long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 +.long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 +.long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 +.long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 +.long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 +.long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070 +.long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070 +.long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 +.long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 +.long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 +.long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 +.long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 +.long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 +.long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 +.long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 + +.long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f +.long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f +.long 0,0,0,0, 0,0,0,0, -1,-1,-1,-1 +.long 0,0,0,0, 0,0,0,0 +.byte 65,69,83,78,73,45,67,66,67,43,83,72,65,50,53,54,32,115,116,105,116,99,104,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 +.p2align 6 diff --git a/deps/openssl/asm_obsolete/x64-macosx-gas/aes/aesni-x86_64.s b/deps/openssl/asm_obsolete/x64-macosx-gas/aes/aesni-x86_64.s new file mode 100644 index 00000000000000..57509ae7196c08 --- /dev/null +++ b/deps/openssl/asm_obsolete/x64-macosx-gas/aes/aesni-x86_64.s @@ -0,0 +1,3165 @@ +.text + +.globl _aesni_encrypt + +.p2align 4 +_aesni_encrypt: + movups (%rdi),%xmm2 + movl 240(%rdx),%eax + movups (%rdx),%xmm0 + movups 16(%rdx),%xmm1 + leaq 32(%rdx),%rdx + xorps %xmm0,%xmm2 +L$oop_enc1_1: +.byte 102,15,56,220,209 + decl %eax + movups (%rdx),%xmm1 + leaq 16(%rdx),%rdx + jnz L$oop_enc1_1 +.byte 102,15,56,221,209 + movups %xmm2,(%rsi) + .byte 0xf3,0xc3 + + +.globl _aesni_decrypt + +.p2align 4 +_aesni_decrypt: + movups (%rdi),%xmm2 + movl 240(%rdx),%eax + movups (%rdx),%xmm0 + movups 16(%rdx),%xmm1 + leaq 32(%rdx),%rdx + xorps %xmm0,%xmm2 +L$oop_dec1_2: +.byte 102,15,56,222,209 + decl %eax + movups (%rdx),%xmm1 + leaq 16(%rdx),%rdx + jnz L$oop_dec1_2 +.byte 102,15,56,223,209 + movups %xmm2,(%rsi) + .byte 0xf3,0xc3 + + +.p2align 4 +_aesni_encrypt2: + movups (%rcx),%xmm0 + shll $4,%eax + movups 16(%rcx),%xmm1 + xorps %xmm0,%xmm2 + xorps %xmm0,%xmm3 + movups 32(%rcx),%xmm0 + leaq 32(%rcx,%rax,1),%rcx + negq %rax + addq $16,%rax + +L$enc_loop2: +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 + movups (%rcx,%rax,1),%xmm1 + addq $32,%rax +.byte 102,15,56,220,208 +.byte 102,15,56,220,216 + movups -16(%rcx,%rax,1),%xmm0 + jnz L$enc_loop2 + +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 +.byte 102,15,56,221,208 +.byte 102,15,56,221,216 + .byte 0xf3,0xc3 + + +.p2align 4 +_aesni_decrypt2: + movups (%rcx),%xmm0 + shll $4,%eax + movups 16(%rcx),%xmm1 + xorps %xmm0,%xmm2 + xorps %xmm0,%xmm3 + movups 32(%rcx),%xmm0 + leaq 32(%rcx,%rax,1),%rcx + negq %rax + addq $16,%rax + +L$dec_loop2: +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 + movups (%rcx,%rax,1),%xmm1 + addq $32,%rax +.byte 102,15,56,222,208 +.byte 102,15,56,222,216 + movups -16(%rcx,%rax,1),%xmm0 + jnz L$dec_loop2 + +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 +.byte 102,15,56,223,208 +.byte 102,15,56,223,216 + .byte 0xf3,0xc3 + + +.p2align 4 +_aesni_encrypt3: + movups (%rcx),%xmm0 + shll $4,%eax + movups 16(%rcx),%xmm1 + xorps %xmm0,%xmm2 + xorps %xmm0,%xmm3 + xorps %xmm0,%xmm4 + movups 32(%rcx),%xmm0 + leaq 32(%rcx,%rax,1),%rcx + negq %rax + addq $16,%rax + +L$enc_loop3: +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 +.byte 102,15,56,220,225 + movups (%rcx,%rax,1),%xmm1 + addq $32,%rax +.byte 102,15,56,220,208 +.byte 102,15,56,220,216 +.byte 102,15,56,220,224 + movups -16(%rcx,%rax,1),%xmm0 + jnz L$enc_loop3 + +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 +.byte 102,15,56,220,225 +.byte 102,15,56,221,208 +.byte 102,15,56,221,216 +.byte 102,15,56,221,224 + .byte 0xf3,0xc3 + + +.p2align 4 +_aesni_decrypt3: + movups (%rcx),%xmm0 + shll $4,%eax + movups 16(%rcx),%xmm1 + xorps %xmm0,%xmm2 + xorps %xmm0,%xmm3 + xorps %xmm0,%xmm4 + movups 32(%rcx),%xmm0 + leaq 32(%rcx,%rax,1),%rcx + negq %rax + addq $16,%rax + +L$dec_loop3: +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 +.byte 102,15,56,222,225 + movups (%rcx,%rax,1),%xmm1 + addq $32,%rax +.byte 102,15,56,222,208 +.byte 102,15,56,222,216 +.byte 102,15,56,222,224 + movups -16(%rcx,%rax,1),%xmm0 + jnz L$dec_loop3 + +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 +.byte 102,15,56,222,225 +.byte 102,15,56,223,208 +.byte 102,15,56,223,216 +.byte 102,15,56,223,224 + .byte 0xf3,0xc3 + + +.p2align 4 +_aesni_encrypt4: + movups (%rcx),%xmm0 + shll $4,%eax + movups 16(%rcx),%xmm1 + xorps %xmm0,%xmm2 + xorps %xmm0,%xmm3 + xorps %xmm0,%xmm4 + xorps %xmm0,%xmm5 + movups 32(%rcx),%xmm0 + leaq 32(%rcx,%rax,1),%rcx + negq %rax +.byte 0x0f,0x1f,0x00 + addq $16,%rax + +L$enc_loop4: +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 + movups (%rcx,%rax,1),%xmm1 + addq $32,%rax +.byte 102,15,56,220,208 +.byte 102,15,56,220,216 +.byte 102,15,56,220,224 +.byte 102,15,56,220,232 + movups -16(%rcx,%rax,1),%xmm0 + jnz L$enc_loop4 + +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 +.byte 102,15,56,221,208 +.byte 102,15,56,221,216 +.byte 102,15,56,221,224 +.byte 102,15,56,221,232 + .byte 0xf3,0xc3 + + +.p2align 4 +_aesni_decrypt4: + movups (%rcx),%xmm0 + shll $4,%eax + movups 16(%rcx),%xmm1 + xorps %xmm0,%xmm2 + xorps %xmm0,%xmm3 + xorps %xmm0,%xmm4 + xorps %xmm0,%xmm5 + movups 32(%rcx),%xmm0 + leaq 32(%rcx,%rax,1),%rcx + negq %rax +.byte 0x0f,0x1f,0x00 + addq $16,%rax + +L$dec_loop4: +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 +.byte 102,15,56,222,225 +.byte 102,15,56,222,233 + movups (%rcx,%rax,1),%xmm1 + addq $32,%rax +.byte 102,15,56,222,208 +.byte 102,15,56,222,216 +.byte 102,15,56,222,224 +.byte 102,15,56,222,232 + movups -16(%rcx,%rax,1),%xmm0 + jnz L$dec_loop4 + +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 +.byte 102,15,56,222,225 +.byte 102,15,56,222,233 +.byte 102,15,56,223,208 +.byte 102,15,56,223,216 +.byte 102,15,56,223,224 +.byte 102,15,56,223,232 + .byte 0xf3,0xc3 + + +.p2align 4 +_aesni_encrypt6: + movups (%rcx),%xmm0 + shll $4,%eax + movups 16(%rcx),%xmm1 + xorps %xmm0,%xmm2 + pxor %xmm0,%xmm3 + pxor %xmm0,%xmm4 +.byte 102,15,56,220,209 + leaq 32(%rcx,%rax,1),%rcx + negq %rax +.byte 102,15,56,220,217 + pxor %xmm0,%xmm5 + pxor %xmm0,%xmm6 +.byte 102,15,56,220,225 + pxor %xmm0,%xmm7 + addq $16,%rax +.byte 102,15,56,220,233 +.byte 102,15,56,220,241 +.byte 102,15,56,220,249 + movups -16(%rcx,%rax,1),%xmm0 + jmp L$enc_loop6_enter +.p2align 4 +L$enc_loop6: +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 +.byte 102,15,56,220,241 +.byte 102,15,56,220,249 +L$enc_loop6_enter: + movups (%rcx,%rax,1),%xmm1 + addq $32,%rax +.byte 102,15,56,220,208 +.byte 102,15,56,220,216 +.byte 102,15,56,220,224 +.byte 102,15,56,220,232 +.byte 102,15,56,220,240 +.byte 102,15,56,220,248 + movups -16(%rcx,%rax,1),%xmm0 + jnz L$enc_loop6 + +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 +.byte 102,15,56,220,241 +.byte 102,15,56,220,249 +.byte 102,15,56,221,208 +.byte 102,15,56,221,216 +.byte 102,15,56,221,224 +.byte 102,15,56,221,232 +.byte 102,15,56,221,240 +.byte 102,15,56,221,248 + .byte 0xf3,0xc3 + + +.p2align 4 +_aesni_decrypt6: + movups (%rcx),%xmm0 + shll $4,%eax + movups 16(%rcx),%xmm1 + xorps %xmm0,%xmm2 + pxor %xmm0,%xmm3 + pxor %xmm0,%xmm4 +.byte 102,15,56,222,209 + leaq 32(%rcx,%rax,1),%rcx + negq %rax +.byte 102,15,56,222,217 + pxor %xmm0,%xmm5 + pxor %xmm0,%xmm6 +.byte 102,15,56,222,225 + pxor %xmm0,%xmm7 + addq $16,%rax +.byte 102,15,56,222,233 +.byte 102,15,56,222,241 +.byte 102,15,56,222,249 + movups -16(%rcx,%rax,1),%xmm0 + jmp L$dec_loop6_enter +.p2align 4 +L$dec_loop6: +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 +.byte 102,15,56,222,225 +.byte 102,15,56,222,233 +.byte 102,15,56,222,241 +.byte 102,15,56,222,249 +L$dec_loop6_enter: + movups (%rcx,%rax,1),%xmm1 + addq $32,%rax +.byte 102,15,56,222,208 +.byte 102,15,56,222,216 +.byte 102,15,56,222,224 +.byte 102,15,56,222,232 +.byte 102,15,56,222,240 +.byte 102,15,56,222,248 + movups -16(%rcx,%rax,1),%xmm0 + jnz L$dec_loop6 + +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 +.byte 102,15,56,222,225 +.byte 102,15,56,222,233 +.byte 102,15,56,222,241 +.byte 102,15,56,222,249 +.byte 102,15,56,223,208 +.byte 102,15,56,223,216 +.byte 102,15,56,223,224 +.byte 102,15,56,223,232 +.byte 102,15,56,223,240 +.byte 102,15,56,223,248 + .byte 0xf3,0xc3 + + +.p2align 4 +_aesni_encrypt8: + movups (%rcx),%xmm0 + shll $4,%eax + movups 16(%rcx),%xmm1 + xorps %xmm0,%xmm2 + xorps %xmm0,%xmm3 + pxor %xmm0,%xmm4 + pxor %xmm0,%xmm5 + pxor %xmm0,%xmm6 + leaq 32(%rcx,%rax,1),%rcx + negq %rax +.byte 102,15,56,220,209 + addq $16,%rax + pxor %xmm0,%xmm7 +.byte 102,15,56,220,217 + pxor %xmm0,%xmm8 + pxor %xmm0,%xmm9 +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 +.byte 102,15,56,220,241 +.byte 102,15,56,220,249 +.byte 102,68,15,56,220,193 +.byte 102,68,15,56,220,201 + movups -16(%rcx,%rax,1),%xmm0 + jmp L$enc_loop8_enter +.p2align 4 +L$enc_loop8: +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 +.byte 102,15,56,220,241 +.byte 102,15,56,220,249 +.byte 102,68,15,56,220,193 +.byte 102,68,15,56,220,201 +L$enc_loop8_enter: + movups (%rcx,%rax,1),%xmm1 + addq $32,%rax +.byte 102,15,56,220,208 +.byte 102,15,56,220,216 +.byte 102,15,56,220,224 +.byte 102,15,56,220,232 +.byte 102,15,56,220,240 +.byte 102,15,56,220,248 +.byte 102,68,15,56,220,192 +.byte 102,68,15,56,220,200 + movups -16(%rcx,%rax,1),%xmm0 + jnz L$enc_loop8 + +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 +.byte 102,15,56,220,241 +.byte 102,15,56,220,249 +.byte 102,68,15,56,220,193 +.byte 102,68,15,56,220,201 +.byte 102,15,56,221,208 +.byte 102,15,56,221,216 +.byte 102,15,56,221,224 +.byte 102,15,56,221,232 +.byte 102,15,56,221,240 +.byte 102,15,56,221,248 +.byte 102,68,15,56,221,192 +.byte 102,68,15,56,221,200 + .byte 0xf3,0xc3 + + +.p2align 4 +_aesni_decrypt8: + movups (%rcx),%xmm0 + shll $4,%eax + movups 16(%rcx),%xmm1 + xorps %xmm0,%xmm2 + xorps %xmm0,%xmm3 + pxor %xmm0,%xmm4 + pxor %xmm0,%xmm5 + pxor %xmm0,%xmm6 + leaq 32(%rcx,%rax,1),%rcx + negq %rax +.byte 102,15,56,222,209 + addq $16,%rax + pxor %xmm0,%xmm7 +.byte 102,15,56,222,217 + pxor %xmm0,%xmm8 + pxor %xmm0,%xmm9 +.byte 102,15,56,222,225 +.byte 102,15,56,222,233 +.byte 102,15,56,222,241 +.byte 102,15,56,222,249 +.byte 102,68,15,56,222,193 +.byte 102,68,15,56,222,201 + movups -16(%rcx,%rax,1),%xmm0 + jmp L$dec_loop8_enter +.p2align 4 +L$dec_loop8: +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 +.byte 102,15,56,222,225 +.byte 102,15,56,222,233 +.byte 102,15,56,222,241 +.byte 102,15,56,222,249 +.byte 102,68,15,56,222,193 +.byte 102,68,15,56,222,201 +L$dec_loop8_enter: + movups (%rcx,%rax,1),%xmm1 + addq $32,%rax +.byte 102,15,56,222,208 +.byte 102,15,56,222,216 +.byte 102,15,56,222,224 +.byte 102,15,56,222,232 +.byte 102,15,56,222,240 +.byte 102,15,56,222,248 +.byte 102,68,15,56,222,192 +.byte 102,68,15,56,222,200 + movups -16(%rcx,%rax,1),%xmm0 + jnz L$dec_loop8 + +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 +.byte 102,15,56,222,225 +.byte 102,15,56,222,233 +.byte 102,15,56,222,241 +.byte 102,15,56,222,249 +.byte 102,68,15,56,222,193 +.byte 102,68,15,56,222,201 +.byte 102,15,56,223,208 +.byte 102,15,56,223,216 +.byte 102,15,56,223,224 +.byte 102,15,56,223,232 +.byte 102,15,56,223,240 +.byte 102,15,56,223,248 +.byte 102,68,15,56,223,192 +.byte 102,68,15,56,223,200 + .byte 0xf3,0xc3 + +.globl _aesni_ecb_encrypt + +.p2align 4 +_aesni_ecb_encrypt: + andq $-16,%rdx + jz L$ecb_ret + + movl 240(%rcx),%eax + movups (%rcx),%xmm0 + movq %rcx,%r11 + movl %eax,%r10d + testl %r8d,%r8d + jz L$ecb_decrypt + + cmpq $128,%rdx + jb L$ecb_enc_tail + + movdqu (%rdi),%xmm2 + movdqu 16(%rdi),%xmm3 + movdqu 32(%rdi),%xmm4 + movdqu 48(%rdi),%xmm5 + movdqu 64(%rdi),%xmm6 + movdqu 80(%rdi),%xmm7 + movdqu 96(%rdi),%xmm8 + movdqu 112(%rdi),%xmm9 + leaq 128(%rdi),%rdi + subq $128,%rdx + jmp L$ecb_enc_loop8_enter +.p2align 4 +L$ecb_enc_loop8: + movups %xmm2,(%rsi) + movq %r11,%rcx + movdqu (%rdi),%xmm2 + movl %r10d,%eax + movups %xmm3,16(%rsi) + movdqu 16(%rdi),%xmm3 + movups %xmm4,32(%rsi) + movdqu 32(%rdi),%xmm4 + movups %xmm5,48(%rsi) + movdqu 48(%rdi),%xmm5 + movups %xmm6,64(%rsi) + movdqu 64(%rdi),%xmm6 + movups %xmm7,80(%rsi) + movdqu 80(%rdi),%xmm7 + movups %xmm8,96(%rsi) + movdqu 96(%rdi),%xmm8 + movups %xmm9,112(%rsi) + leaq 128(%rsi),%rsi + movdqu 112(%rdi),%xmm9 + leaq 128(%rdi),%rdi +L$ecb_enc_loop8_enter: + + call _aesni_encrypt8 + + subq $128,%rdx + jnc L$ecb_enc_loop8 + + movups %xmm2,(%rsi) + movq %r11,%rcx + movups %xmm3,16(%rsi) + movl %r10d,%eax + movups %xmm4,32(%rsi) + movups %xmm5,48(%rsi) + movups %xmm6,64(%rsi) + movups %xmm7,80(%rsi) + movups %xmm8,96(%rsi) + movups %xmm9,112(%rsi) + leaq 128(%rsi),%rsi + addq $128,%rdx + jz L$ecb_ret + +L$ecb_enc_tail: + movups (%rdi),%xmm2 + cmpq $32,%rdx + jb L$ecb_enc_one + movups 16(%rdi),%xmm3 + je L$ecb_enc_two + movups 32(%rdi),%xmm4 + cmpq $64,%rdx + jb L$ecb_enc_three + movups 48(%rdi),%xmm5 + je L$ecb_enc_four + movups 64(%rdi),%xmm6 + cmpq $96,%rdx + jb L$ecb_enc_five + movups 80(%rdi),%xmm7 + je L$ecb_enc_six + movdqu 96(%rdi),%xmm8 + call _aesni_encrypt8 + movups %xmm2,(%rsi) + movups %xmm3,16(%rsi) + movups %xmm4,32(%rsi) + movups %xmm5,48(%rsi) + movups %xmm6,64(%rsi) + movups %xmm7,80(%rsi) + movups %xmm8,96(%rsi) + jmp L$ecb_ret +.p2align 4 +L$ecb_enc_one: + movups (%rcx),%xmm0 + movups 16(%rcx),%xmm1 + leaq 32(%rcx),%rcx + xorps %xmm0,%xmm2 +L$oop_enc1_3: +.byte 102,15,56,220,209 + decl %eax + movups (%rcx),%xmm1 + leaq 16(%rcx),%rcx + jnz L$oop_enc1_3 +.byte 102,15,56,221,209 + movups %xmm2,(%rsi) + jmp L$ecb_ret +.p2align 4 +L$ecb_enc_two: + call _aesni_encrypt2 + movups %xmm2,(%rsi) + movups %xmm3,16(%rsi) + jmp L$ecb_ret +.p2align 4 +L$ecb_enc_three: + call _aesni_encrypt3 + movups %xmm2,(%rsi) + movups %xmm3,16(%rsi) + movups %xmm4,32(%rsi) + jmp L$ecb_ret +.p2align 4 +L$ecb_enc_four: + call _aesni_encrypt4 + movups %xmm2,(%rsi) + movups %xmm3,16(%rsi) + movups %xmm4,32(%rsi) + movups %xmm5,48(%rsi) + jmp L$ecb_ret +.p2align 4 +L$ecb_enc_five: + xorps %xmm7,%xmm7 + call _aesni_encrypt6 + movups %xmm2,(%rsi) + movups %xmm3,16(%rsi) + movups %xmm4,32(%rsi) + movups %xmm5,48(%rsi) + movups %xmm6,64(%rsi) + jmp L$ecb_ret +.p2align 4 +L$ecb_enc_six: + call _aesni_encrypt6 + movups %xmm2,(%rsi) + movups %xmm3,16(%rsi) + movups %xmm4,32(%rsi) + movups %xmm5,48(%rsi) + movups %xmm6,64(%rsi) + movups %xmm7,80(%rsi) + jmp L$ecb_ret + +.p2align 4 +L$ecb_decrypt: + cmpq $128,%rdx + jb L$ecb_dec_tail + + movdqu (%rdi),%xmm2 + movdqu 16(%rdi),%xmm3 + movdqu 32(%rdi),%xmm4 + movdqu 48(%rdi),%xmm5 + movdqu 64(%rdi),%xmm6 + movdqu 80(%rdi),%xmm7 + movdqu 96(%rdi),%xmm8 + movdqu 112(%rdi),%xmm9 + leaq 128(%rdi),%rdi + subq $128,%rdx + jmp L$ecb_dec_loop8_enter +.p2align 4 +L$ecb_dec_loop8: + movups %xmm2,(%rsi) + movq %r11,%rcx + movdqu (%rdi),%xmm2 + movl %r10d,%eax + movups %xmm3,16(%rsi) + movdqu 16(%rdi),%xmm3 + movups %xmm4,32(%rsi) + movdqu 32(%rdi),%xmm4 + movups %xmm5,48(%rsi) + movdqu 48(%rdi),%xmm5 + movups %xmm6,64(%rsi) + movdqu 64(%rdi),%xmm6 + movups %xmm7,80(%rsi) + movdqu 80(%rdi),%xmm7 + movups %xmm8,96(%rsi) + movdqu 96(%rdi),%xmm8 + movups %xmm9,112(%rsi) + leaq 128(%rsi),%rsi + movdqu 112(%rdi),%xmm9 + leaq 128(%rdi),%rdi +L$ecb_dec_loop8_enter: + + call _aesni_decrypt8 + + movups (%r11),%xmm0 + subq $128,%rdx + jnc L$ecb_dec_loop8 + + movups %xmm2,(%rsi) + movq %r11,%rcx + movups %xmm3,16(%rsi) + movl %r10d,%eax + movups %xmm4,32(%rsi) + movups %xmm5,48(%rsi) + movups %xmm6,64(%rsi) + movups %xmm7,80(%rsi) + movups %xmm8,96(%rsi) + movups %xmm9,112(%rsi) + leaq 128(%rsi),%rsi + addq $128,%rdx + jz L$ecb_ret + +L$ecb_dec_tail: + movups (%rdi),%xmm2 + cmpq $32,%rdx + jb L$ecb_dec_one + movups 16(%rdi),%xmm3 + je L$ecb_dec_two + movups 32(%rdi),%xmm4 + cmpq $64,%rdx + jb L$ecb_dec_three + movups 48(%rdi),%xmm5 + je L$ecb_dec_four + movups 64(%rdi),%xmm6 + cmpq $96,%rdx + jb L$ecb_dec_five + movups 80(%rdi),%xmm7 + je L$ecb_dec_six + movups 96(%rdi),%xmm8 + movups (%rcx),%xmm0 + call _aesni_decrypt8 + movups %xmm2,(%rsi) + movups %xmm3,16(%rsi) + movups %xmm4,32(%rsi) + movups %xmm5,48(%rsi) + movups %xmm6,64(%rsi) + movups %xmm7,80(%rsi) + movups %xmm8,96(%rsi) + jmp L$ecb_ret +.p2align 4 +L$ecb_dec_one: + movups (%rcx),%xmm0 + movups 16(%rcx),%xmm1 + leaq 32(%rcx),%rcx + xorps %xmm0,%xmm2 +L$oop_dec1_4: +.byte 102,15,56,222,209 + decl %eax + movups (%rcx),%xmm1 + leaq 16(%rcx),%rcx + jnz L$oop_dec1_4 +.byte 102,15,56,223,209 + movups %xmm2,(%rsi) + jmp L$ecb_ret +.p2align 4 +L$ecb_dec_two: + call _aesni_decrypt2 + movups %xmm2,(%rsi) + movups %xmm3,16(%rsi) + jmp L$ecb_ret +.p2align 4 +L$ecb_dec_three: + call _aesni_decrypt3 + movups %xmm2,(%rsi) + movups %xmm3,16(%rsi) + movups %xmm4,32(%rsi) + jmp L$ecb_ret +.p2align 4 +L$ecb_dec_four: + call _aesni_decrypt4 + movups %xmm2,(%rsi) + movups %xmm3,16(%rsi) + movups %xmm4,32(%rsi) + movups %xmm5,48(%rsi) + jmp L$ecb_ret +.p2align 4 +L$ecb_dec_five: + xorps %xmm7,%xmm7 + call _aesni_decrypt6 + movups %xmm2,(%rsi) + movups %xmm3,16(%rsi) + movups %xmm4,32(%rsi) + movups %xmm5,48(%rsi) + movups %xmm6,64(%rsi) + jmp L$ecb_ret +.p2align 4 +L$ecb_dec_six: + call _aesni_decrypt6 + movups %xmm2,(%rsi) + movups %xmm3,16(%rsi) + movups %xmm4,32(%rsi) + movups %xmm5,48(%rsi) + movups %xmm6,64(%rsi) + movups %xmm7,80(%rsi) + +L$ecb_ret: + .byte 0xf3,0xc3 + +.globl _aesni_ccm64_encrypt_blocks + +.p2align 4 +_aesni_ccm64_encrypt_blocks: + movl 240(%rcx),%eax + movdqu (%r8),%xmm6 + movdqa L$increment64(%rip),%xmm9 + movdqa L$bswap_mask(%rip),%xmm7 + + shll $4,%eax + movl $16,%r10d + leaq 0(%rcx),%r11 + movdqu (%r9),%xmm3 + movdqa %xmm6,%xmm2 + leaq 32(%rcx,%rax,1),%rcx +.byte 102,15,56,0,247 + subq %rax,%r10 + jmp L$ccm64_enc_outer +.p2align 4 +L$ccm64_enc_outer: + movups (%r11),%xmm0 + movq %r10,%rax + movups (%rdi),%xmm8 + + xorps %xmm0,%xmm2 + movups 16(%r11),%xmm1 + xorps %xmm8,%xmm0 + xorps %xmm0,%xmm3 + movups 32(%r11),%xmm0 + +L$ccm64_enc2_loop: +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 + movups (%rcx,%rax,1),%xmm1 + addq $32,%rax +.byte 102,15,56,220,208 +.byte 102,15,56,220,216 + movups -16(%rcx,%rax,1),%xmm0 + jnz L$ccm64_enc2_loop +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 + paddq %xmm9,%xmm6 + decq %rdx +.byte 102,15,56,221,208 +.byte 102,15,56,221,216 + + leaq 16(%rdi),%rdi + xorps %xmm2,%xmm8 + movdqa %xmm6,%xmm2 + movups %xmm8,(%rsi) +.byte 102,15,56,0,215 + leaq 16(%rsi),%rsi + jnz L$ccm64_enc_outer + + movups %xmm3,(%r9) + .byte 0xf3,0xc3 + +.globl _aesni_ccm64_decrypt_blocks + +.p2align 4 +_aesni_ccm64_decrypt_blocks: + movl 240(%rcx),%eax + movups (%r8),%xmm6 + movdqu (%r9),%xmm3 + movdqa L$increment64(%rip),%xmm9 + movdqa L$bswap_mask(%rip),%xmm7 + + movaps %xmm6,%xmm2 + movl %eax,%r10d + movq %rcx,%r11 +.byte 102,15,56,0,247 + movups (%rcx),%xmm0 + movups 16(%rcx),%xmm1 + leaq 32(%rcx),%rcx + xorps %xmm0,%xmm2 +L$oop_enc1_5: +.byte 102,15,56,220,209 + decl %eax + movups (%rcx),%xmm1 + leaq 16(%rcx),%rcx + jnz L$oop_enc1_5 +.byte 102,15,56,221,209 + shll $4,%r10d + movl $16,%eax + movups (%rdi),%xmm8 + paddq %xmm9,%xmm6 + leaq 16(%rdi),%rdi + subq %r10,%rax + leaq 32(%r11,%r10,1),%rcx + movq %rax,%r10 + jmp L$ccm64_dec_outer +.p2align 4 +L$ccm64_dec_outer: + xorps %xmm2,%xmm8 + movdqa %xmm6,%xmm2 + movups %xmm8,(%rsi) + leaq 16(%rsi),%rsi +.byte 102,15,56,0,215 + + subq $1,%rdx + jz L$ccm64_dec_break + + movups (%r11),%xmm0 + movq %r10,%rax + movups 16(%r11),%xmm1 + xorps %xmm0,%xmm8 + xorps %xmm0,%xmm2 + xorps %xmm8,%xmm3 + movups 32(%r11),%xmm0 + jmp L$ccm64_dec2_loop +.p2align 4 +L$ccm64_dec2_loop: +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 + movups (%rcx,%rax,1),%xmm1 + addq $32,%rax +.byte 102,15,56,220,208 +.byte 102,15,56,220,216 + movups -16(%rcx,%rax,1),%xmm0 + jnz L$ccm64_dec2_loop + movups (%rdi),%xmm8 + paddq %xmm9,%xmm6 +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 +.byte 102,15,56,221,208 +.byte 102,15,56,221,216 + leaq 16(%rdi),%rdi + jmp L$ccm64_dec_outer + +.p2align 4 +L$ccm64_dec_break: + + movl 240(%r11),%eax + movups (%r11),%xmm0 + movups 16(%r11),%xmm1 + xorps %xmm0,%xmm8 + leaq 32(%r11),%r11 + xorps %xmm8,%xmm3 +L$oop_enc1_6: +.byte 102,15,56,220,217 + decl %eax + movups (%r11),%xmm1 + leaq 16(%r11),%r11 + jnz L$oop_enc1_6 +.byte 102,15,56,221,217 + movups %xmm3,(%r9) + .byte 0xf3,0xc3 + +.globl _aesni_ctr32_encrypt_blocks + +.p2align 4 +_aesni_ctr32_encrypt_blocks: + leaq (%rsp),%rax + pushq %rbp + subq $128,%rsp + andq $-16,%rsp + leaq -8(%rax),%rbp + + cmpq $1,%rdx + je L$ctr32_one_shortcut + + movdqu (%r8),%xmm2 + movdqu (%rcx),%xmm0 + movl 12(%r8),%r8d + pxor %xmm0,%xmm2 + movl 12(%rcx),%r11d + movdqa %xmm2,0(%rsp) + bswapl %r8d + movdqa %xmm2,%xmm3 + movdqa %xmm2,%xmm4 + movdqa %xmm2,%xmm5 + movdqa %xmm2,64(%rsp) + movdqa %xmm2,80(%rsp) + movdqa %xmm2,96(%rsp) + movq %rdx,%r10 + movdqa %xmm2,112(%rsp) + + leaq 1(%r8),%rax + leaq 2(%r8),%rdx + bswapl %eax + bswapl %edx + xorl %r11d,%eax + xorl %r11d,%edx +.byte 102,15,58,34,216,3 + leaq 3(%r8),%rax + movdqa %xmm3,16(%rsp) +.byte 102,15,58,34,226,3 + bswapl %eax + movq %r10,%rdx + leaq 4(%r8),%r10 + movdqa %xmm4,32(%rsp) + xorl %r11d,%eax + bswapl %r10d +.byte 102,15,58,34,232,3 + xorl %r11d,%r10d + movdqa %xmm5,48(%rsp) + leaq 5(%r8),%r9 + movl %r10d,64+12(%rsp) + bswapl %r9d + leaq 6(%r8),%r10 + movl 240(%rcx),%eax + xorl %r11d,%r9d + bswapl %r10d + movl %r9d,80+12(%rsp) + xorl %r11d,%r10d + leaq 7(%r8),%r9 + movl %r10d,96+12(%rsp) + bswapl %r9d + movl _OPENSSL_ia32cap_P+4(%rip),%r10d + xorl %r11d,%r9d + andl $71303168,%r10d + movl %r9d,112+12(%rsp) + + movups 16(%rcx),%xmm1 + + movdqa 64(%rsp),%xmm6 + movdqa 80(%rsp),%xmm7 + + cmpq $8,%rdx + jb L$ctr32_tail + + subq $6,%rdx + cmpl $4194304,%r10d + je L$ctr32_6x + + leaq 128(%rcx),%rcx + subq $2,%rdx + jmp L$ctr32_loop8 + +.p2align 4 +L$ctr32_6x: + shll $4,%eax + movl $48,%r10d + bswapl %r11d + leaq 32(%rcx,%rax,1),%rcx + subq %rax,%r10 + jmp L$ctr32_loop6 + +.p2align 4 +L$ctr32_loop6: + addl $6,%r8d + movups -48(%rcx,%r10,1),%xmm0 +.byte 102,15,56,220,209 + movl %r8d,%eax + xorl %r11d,%eax +.byte 102,15,56,220,217 +.byte 0x0f,0x38,0xf1,0x44,0x24,12 + leal 1(%r8),%eax +.byte 102,15,56,220,225 + xorl %r11d,%eax +.byte 0x0f,0x38,0xf1,0x44,0x24,28 +.byte 102,15,56,220,233 + leal 2(%r8),%eax + xorl %r11d,%eax +.byte 102,15,56,220,241 +.byte 0x0f,0x38,0xf1,0x44,0x24,44 + leal 3(%r8),%eax +.byte 102,15,56,220,249 + movups -32(%rcx,%r10,1),%xmm1 + xorl %r11d,%eax + +.byte 102,15,56,220,208 +.byte 0x0f,0x38,0xf1,0x44,0x24,60 + leal 4(%r8),%eax +.byte 102,15,56,220,216 + xorl %r11d,%eax +.byte 0x0f,0x38,0xf1,0x44,0x24,76 +.byte 102,15,56,220,224 + leal 5(%r8),%eax + xorl %r11d,%eax +.byte 102,15,56,220,232 +.byte 0x0f,0x38,0xf1,0x44,0x24,92 + movq %r10,%rax +.byte 102,15,56,220,240 +.byte 102,15,56,220,248 + movups -16(%rcx,%r10,1),%xmm0 + + call L$enc_loop6 + + movdqu (%rdi),%xmm8 + movdqu 16(%rdi),%xmm9 + movdqu 32(%rdi),%xmm10 + movdqu 48(%rdi),%xmm11 + movdqu 64(%rdi),%xmm12 + movdqu 80(%rdi),%xmm13 + leaq 96(%rdi),%rdi + movups -64(%rcx,%r10,1),%xmm1 + pxor %xmm2,%xmm8 + movaps 0(%rsp),%xmm2 + pxor %xmm3,%xmm9 + movaps 16(%rsp),%xmm3 + pxor %xmm4,%xmm10 + movaps 32(%rsp),%xmm4 + pxor %xmm5,%xmm11 + movaps 48(%rsp),%xmm5 + pxor %xmm6,%xmm12 + movaps 64(%rsp),%xmm6 + pxor %xmm7,%xmm13 + movaps 80(%rsp),%xmm7 + movdqu %xmm8,(%rsi) + movdqu %xmm9,16(%rsi) + movdqu %xmm10,32(%rsi) + movdqu %xmm11,48(%rsi) + movdqu %xmm12,64(%rsi) + movdqu %xmm13,80(%rsi) + leaq 96(%rsi),%rsi + + subq $6,%rdx + jnc L$ctr32_loop6 + + addq $6,%rdx + jz L$ctr32_done + + leal -48(%r10),%eax + leaq -80(%rcx,%r10,1),%rcx + negl %eax + shrl $4,%eax + jmp L$ctr32_tail + +.p2align 5 +L$ctr32_loop8: + addl $8,%r8d + movdqa 96(%rsp),%xmm8 +.byte 102,15,56,220,209 + movl %r8d,%r9d + movdqa 112(%rsp),%xmm9 +.byte 102,15,56,220,217 + bswapl %r9d + movups 32-128(%rcx),%xmm0 +.byte 102,15,56,220,225 + xorl %r11d,%r9d + nop +.byte 102,15,56,220,233 + movl %r9d,0+12(%rsp) + leaq 1(%r8),%r9 +.byte 102,15,56,220,241 +.byte 102,15,56,220,249 +.byte 102,68,15,56,220,193 +.byte 102,68,15,56,220,201 + movups 48-128(%rcx),%xmm1 + bswapl %r9d +.byte 102,15,56,220,208 +.byte 102,15,56,220,216 + xorl %r11d,%r9d +.byte 0x66,0x90 +.byte 102,15,56,220,224 +.byte 102,15,56,220,232 + movl %r9d,16+12(%rsp) + leaq 2(%r8),%r9 +.byte 102,15,56,220,240 +.byte 102,15,56,220,248 +.byte 102,68,15,56,220,192 +.byte 102,68,15,56,220,200 + movups 64-128(%rcx),%xmm0 + bswapl %r9d +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 + xorl %r11d,%r9d +.byte 0x66,0x90 +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 + movl %r9d,32+12(%rsp) + leaq 3(%r8),%r9 +.byte 102,15,56,220,241 +.byte 102,15,56,220,249 +.byte 102,68,15,56,220,193 +.byte 102,68,15,56,220,201 + movups 80-128(%rcx),%xmm1 + bswapl %r9d +.byte 102,15,56,220,208 +.byte 102,15,56,220,216 + xorl %r11d,%r9d +.byte 0x66,0x90 +.byte 102,15,56,220,224 +.byte 102,15,56,220,232 + movl %r9d,48+12(%rsp) + leaq 4(%r8),%r9 +.byte 102,15,56,220,240 +.byte 102,15,56,220,248 +.byte 102,68,15,56,220,192 +.byte 102,68,15,56,220,200 + movups 96-128(%rcx),%xmm0 + bswapl %r9d +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 + xorl %r11d,%r9d +.byte 0x66,0x90 +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 + movl %r9d,64+12(%rsp) + leaq 5(%r8),%r9 +.byte 102,15,56,220,241 +.byte 102,15,56,220,249 +.byte 102,68,15,56,220,193 +.byte 102,68,15,56,220,201 + movups 112-128(%rcx),%xmm1 + bswapl %r9d +.byte 102,15,56,220,208 +.byte 102,15,56,220,216 + xorl %r11d,%r9d +.byte 0x66,0x90 +.byte 102,15,56,220,224 +.byte 102,15,56,220,232 + movl %r9d,80+12(%rsp) + leaq 6(%r8),%r9 +.byte 102,15,56,220,240 +.byte 102,15,56,220,248 +.byte 102,68,15,56,220,192 +.byte 102,68,15,56,220,200 + movups 128-128(%rcx),%xmm0 + bswapl %r9d +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 + xorl %r11d,%r9d +.byte 0x66,0x90 +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 + movl %r9d,96+12(%rsp) + leaq 7(%r8),%r9 +.byte 102,15,56,220,241 +.byte 102,15,56,220,249 +.byte 102,68,15,56,220,193 +.byte 102,68,15,56,220,201 + movups 144-128(%rcx),%xmm1 + bswapl %r9d +.byte 102,15,56,220,208 +.byte 102,15,56,220,216 +.byte 102,15,56,220,224 + xorl %r11d,%r9d + movdqu 0(%rdi),%xmm10 +.byte 102,15,56,220,232 + movl %r9d,112+12(%rsp) + cmpl $11,%eax +.byte 102,15,56,220,240 +.byte 102,15,56,220,248 +.byte 102,68,15,56,220,192 +.byte 102,68,15,56,220,200 + movups 160-128(%rcx),%xmm0 + + jb L$ctr32_enc_done + +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 +.byte 102,15,56,220,241 +.byte 102,15,56,220,249 +.byte 102,68,15,56,220,193 +.byte 102,68,15,56,220,201 + movups 176-128(%rcx),%xmm1 + +.byte 102,15,56,220,208 +.byte 102,15,56,220,216 +.byte 102,15,56,220,224 +.byte 102,15,56,220,232 +.byte 102,15,56,220,240 +.byte 102,15,56,220,248 +.byte 102,68,15,56,220,192 +.byte 102,68,15,56,220,200 + movups 192-128(%rcx),%xmm0 + je L$ctr32_enc_done + +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 +.byte 102,15,56,220,241 +.byte 102,15,56,220,249 +.byte 102,68,15,56,220,193 +.byte 102,68,15,56,220,201 + movups 208-128(%rcx),%xmm1 + +.byte 102,15,56,220,208 +.byte 102,15,56,220,216 +.byte 102,15,56,220,224 +.byte 102,15,56,220,232 +.byte 102,15,56,220,240 +.byte 102,15,56,220,248 +.byte 102,68,15,56,220,192 +.byte 102,68,15,56,220,200 + movups 224-128(%rcx),%xmm0 + jmp L$ctr32_enc_done + +.p2align 4 +L$ctr32_enc_done: + movdqu 16(%rdi),%xmm11 + pxor %xmm0,%xmm10 + movdqu 32(%rdi),%xmm12 + pxor %xmm0,%xmm11 + movdqu 48(%rdi),%xmm13 + pxor %xmm0,%xmm12 + movdqu 64(%rdi),%xmm14 + pxor %xmm0,%xmm13 + movdqu 80(%rdi),%xmm15 + pxor %xmm0,%xmm14 + pxor %xmm0,%xmm15 +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 +.byte 102,15,56,220,241 +.byte 102,15,56,220,249 +.byte 102,68,15,56,220,193 +.byte 102,68,15,56,220,201 + movdqu 96(%rdi),%xmm1 + leaq 128(%rdi),%rdi + +.byte 102,65,15,56,221,210 + pxor %xmm0,%xmm1 + movdqu 112-128(%rdi),%xmm10 +.byte 102,65,15,56,221,219 + pxor %xmm0,%xmm10 + movdqa 0(%rsp),%xmm11 +.byte 102,65,15,56,221,228 +.byte 102,65,15,56,221,237 + movdqa 16(%rsp),%xmm12 + movdqa 32(%rsp),%xmm13 +.byte 102,65,15,56,221,246 +.byte 102,65,15,56,221,255 + movdqa 48(%rsp),%xmm14 + movdqa 64(%rsp),%xmm15 +.byte 102,68,15,56,221,193 + movdqa 80(%rsp),%xmm0 + movups 16-128(%rcx),%xmm1 +.byte 102,69,15,56,221,202 + + movups %xmm2,(%rsi) + movdqa %xmm11,%xmm2 + movups %xmm3,16(%rsi) + movdqa %xmm12,%xmm3 + movups %xmm4,32(%rsi) + movdqa %xmm13,%xmm4 + movups %xmm5,48(%rsi) + movdqa %xmm14,%xmm5 + movups %xmm6,64(%rsi) + movdqa %xmm15,%xmm6 + movups %xmm7,80(%rsi) + movdqa %xmm0,%xmm7 + movups %xmm8,96(%rsi) + movups %xmm9,112(%rsi) + leaq 128(%rsi),%rsi + + subq $8,%rdx + jnc L$ctr32_loop8 + + addq $8,%rdx + jz L$ctr32_done + leaq -128(%rcx),%rcx + +L$ctr32_tail: + leaq 16(%rcx),%rcx + cmpq $4,%rdx + jb L$ctr32_loop3 + je L$ctr32_loop4 + + shll $4,%eax + movdqa 96(%rsp),%xmm8 + pxor %xmm9,%xmm9 + + movups 16(%rcx),%xmm0 +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 + leaq 32-16(%rcx,%rax,1),%rcx + negq %rax +.byte 102,15,56,220,225 + addq $16,%rax + movups (%rdi),%xmm10 +.byte 102,15,56,220,233 +.byte 102,15,56,220,241 + movups 16(%rdi),%xmm11 + movups 32(%rdi),%xmm12 +.byte 102,15,56,220,249 +.byte 102,68,15,56,220,193 + + call L$enc_loop8_enter + + movdqu 48(%rdi),%xmm13 + pxor %xmm10,%xmm2 + movdqu 64(%rdi),%xmm10 + pxor %xmm11,%xmm3 + movdqu %xmm2,(%rsi) + pxor %xmm12,%xmm4 + movdqu %xmm3,16(%rsi) + pxor %xmm13,%xmm5 + movdqu %xmm4,32(%rsi) + pxor %xmm10,%xmm6 + movdqu %xmm5,48(%rsi) + movdqu %xmm6,64(%rsi) + cmpq $6,%rdx + jb L$ctr32_done + + movups 80(%rdi),%xmm11 + xorps %xmm11,%xmm7 + movups %xmm7,80(%rsi) + je L$ctr32_done + + movups 96(%rdi),%xmm12 + xorps %xmm12,%xmm8 + movups %xmm8,96(%rsi) + jmp L$ctr32_done + +.p2align 5 +L$ctr32_loop4: +.byte 102,15,56,220,209 + leaq 16(%rcx),%rcx + decl %eax +.byte 102,15,56,220,217 +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 + movups (%rcx),%xmm1 + jnz L$ctr32_loop4 +.byte 102,15,56,221,209 +.byte 102,15,56,221,217 + movups (%rdi),%xmm10 + movups 16(%rdi),%xmm11 +.byte 102,15,56,221,225 +.byte 102,15,56,221,233 + movups 32(%rdi),%xmm12 + movups 48(%rdi),%xmm13 + + xorps %xmm10,%xmm2 + movups %xmm2,(%rsi) + xorps %xmm11,%xmm3 + movups %xmm3,16(%rsi) + pxor %xmm12,%xmm4 + movdqu %xmm4,32(%rsi) + pxor %xmm13,%xmm5 + movdqu %xmm5,48(%rsi) + jmp L$ctr32_done + +.p2align 5 +L$ctr32_loop3: +.byte 102,15,56,220,209 + leaq 16(%rcx),%rcx + decl %eax +.byte 102,15,56,220,217 +.byte 102,15,56,220,225 + movups (%rcx),%xmm1 + jnz L$ctr32_loop3 +.byte 102,15,56,221,209 +.byte 102,15,56,221,217 +.byte 102,15,56,221,225 + + movups (%rdi),%xmm10 + xorps %xmm10,%xmm2 + movups %xmm2,(%rsi) + cmpq $2,%rdx + jb L$ctr32_done + + movups 16(%rdi),%xmm11 + xorps %xmm11,%xmm3 + movups %xmm3,16(%rsi) + je L$ctr32_done + + movups 32(%rdi),%xmm12 + xorps %xmm12,%xmm4 + movups %xmm4,32(%rsi) + jmp L$ctr32_done + +.p2align 4 +L$ctr32_one_shortcut: + movups (%r8),%xmm2 + movups (%rdi),%xmm10 + movl 240(%rcx),%eax + movups (%rcx),%xmm0 + movups 16(%rcx),%xmm1 + leaq 32(%rcx),%rcx + xorps %xmm0,%xmm2 +L$oop_enc1_7: +.byte 102,15,56,220,209 + decl %eax + movups (%rcx),%xmm1 + leaq 16(%rcx),%rcx + jnz L$oop_enc1_7 +.byte 102,15,56,221,209 + xorps %xmm10,%xmm2 + movups %xmm2,(%rsi) + jmp L$ctr32_done + +.p2align 4 +L$ctr32_done: + leaq (%rbp),%rsp + popq %rbp +L$ctr32_epilogue: + .byte 0xf3,0xc3 + +.globl _aesni_xts_encrypt + +.p2align 4 +_aesni_xts_encrypt: + leaq (%rsp),%rax + pushq %rbp + subq $112,%rsp + andq $-16,%rsp + leaq -8(%rax),%rbp + movups (%r9),%xmm2 + movl 240(%r8),%eax + movl 240(%rcx),%r10d + movups (%r8),%xmm0 + movups 16(%r8),%xmm1 + leaq 32(%r8),%r8 + xorps %xmm0,%xmm2 +L$oop_enc1_8: +.byte 102,15,56,220,209 + decl %eax + movups (%r8),%xmm1 + leaq 16(%r8),%r8 + jnz L$oop_enc1_8 +.byte 102,15,56,221,209 + movups (%rcx),%xmm0 + movq %rcx,%r11 + movl %r10d,%eax + shll $4,%r10d + movq %rdx,%r9 + andq $-16,%rdx + + movups 16(%rcx,%r10,1),%xmm1 + + movdqa L$xts_magic(%rip),%xmm8 + movdqa %xmm2,%xmm15 + pshufd $95,%xmm2,%xmm9 + pxor %xmm0,%xmm1 + movdqa %xmm9,%xmm14 + paddd %xmm9,%xmm9 + movdqa %xmm15,%xmm10 + psrad $31,%xmm14 + paddq %xmm15,%xmm15 + pand %xmm8,%xmm14 + pxor %xmm0,%xmm10 + pxor %xmm14,%xmm15 + movdqa %xmm9,%xmm14 + paddd %xmm9,%xmm9 + movdqa %xmm15,%xmm11 + psrad $31,%xmm14 + paddq %xmm15,%xmm15 + pand %xmm8,%xmm14 + pxor %xmm0,%xmm11 + pxor %xmm14,%xmm15 + movdqa %xmm9,%xmm14 + paddd %xmm9,%xmm9 + movdqa %xmm15,%xmm12 + psrad $31,%xmm14 + paddq %xmm15,%xmm15 + pand %xmm8,%xmm14 + pxor %xmm0,%xmm12 + pxor %xmm14,%xmm15 + movdqa %xmm9,%xmm14 + paddd %xmm9,%xmm9 + movdqa %xmm15,%xmm13 + psrad $31,%xmm14 + paddq %xmm15,%xmm15 + pand %xmm8,%xmm14 + pxor %xmm0,%xmm13 + pxor %xmm14,%xmm15 + movdqa %xmm15,%xmm14 + psrad $31,%xmm9 + paddq %xmm15,%xmm15 + pand %xmm8,%xmm9 + pxor %xmm0,%xmm14 + pxor %xmm9,%xmm15 + movaps %xmm1,96(%rsp) + + subq $96,%rdx + jc L$xts_enc_short + + movl $16+96,%eax + leaq 32(%r11,%r10,1),%rcx + subq %r10,%rax + movups 16(%r11),%xmm1 + movq %rax,%r10 + leaq L$xts_magic(%rip),%r8 + jmp L$xts_enc_grandloop + +.p2align 5 +L$xts_enc_grandloop: + movdqu 0(%rdi),%xmm2 + movdqa %xmm0,%xmm8 + movdqu 16(%rdi),%xmm3 + pxor %xmm10,%xmm2 + movdqu 32(%rdi),%xmm4 + pxor %xmm11,%xmm3 +.byte 102,15,56,220,209 + movdqu 48(%rdi),%xmm5 + pxor %xmm12,%xmm4 +.byte 102,15,56,220,217 + movdqu 64(%rdi),%xmm6 + pxor %xmm13,%xmm5 +.byte 102,15,56,220,225 + movdqu 80(%rdi),%xmm7 + pxor %xmm15,%xmm8 + movdqa 96(%rsp),%xmm9 + pxor %xmm14,%xmm6 +.byte 102,15,56,220,233 + movups 32(%r11),%xmm0 + leaq 96(%rdi),%rdi + pxor %xmm8,%xmm7 + + pxor %xmm9,%xmm10 +.byte 102,15,56,220,241 + pxor %xmm9,%xmm11 + movdqa %xmm10,0(%rsp) +.byte 102,15,56,220,249 + movups 48(%r11),%xmm1 + pxor %xmm9,%xmm12 + +.byte 102,15,56,220,208 + pxor %xmm9,%xmm13 + movdqa %xmm11,16(%rsp) +.byte 102,15,56,220,216 + pxor %xmm9,%xmm14 + movdqa %xmm12,32(%rsp) +.byte 102,15,56,220,224 +.byte 102,15,56,220,232 + pxor %xmm9,%xmm8 + movdqa %xmm14,64(%rsp) +.byte 102,15,56,220,240 +.byte 102,15,56,220,248 + movups 64(%r11),%xmm0 + movdqa %xmm8,80(%rsp) + pshufd $95,%xmm15,%xmm9 + jmp L$xts_enc_loop6 +.p2align 5 +L$xts_enc_loop6: +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 +.byte 102,15,56,220,241 +.byte 102,15,56,220,249 + movups -64(%rcx,%rax,1),%xmm1 + addq $32,%rax + +.byte 102,15,56,220,208 +.byte 102,15,56,220,216 +.byte 102,15,56,220,224 +.byte 102,15,56,220,232 +.byte 102,15,56,220,240 +.byte 102,15,56,220,248 + movups -80(%rcx,%rax,1),%xmm0 + jnz L$xts_enc_loop6 + + movdqa (%r8),%xmm8 + movdqa %xmm9,%xmm14 + paddd %xmm9,%xmm9 +.byte 102,15,56,220,209 + paddq %xmm15,%xmm15 + psrad $31,%xmm14 +.byte 102,15,56,220,217 + pand %xmm8,%xmm14 + movups (%r11),%xmm10 +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 +.byte 102,15,56,220,241 + pxor %xmm14,%xmm15 + movaps %xmm10,%xmm11 +.byte 102,15,56,220,249 + movups -64(%rcx),%xmm1 + + movdqa %xmm9,%xmm14 +.byte 102,15,56,220,208 + paddd %xmm9,%xmm9 + pxor %xmm15,%xmm10 +.byte 102,15,56,220,216 + psrad $31,%xmm14 + paddq %xmm15,%xmm15 +.byte 102,15,56,220,224 +.byte 102,15,56,220,232 + pand %xmm8,%xmm14 + movaps %xmm11,%xmm12 +.byte 102,15,56,220,240 + pxor %xmm14,%xmm15 + movdqa %xmm9,%xmm14 +.byte 102,15,56,220,248 + movups -48(%rcx),%xmm0 + + paddd %xmm9,%xmm9 +.byte 102,15,56,220,209 + pxor %xmm15,%xmm11 + psrad $31,%xmm14 +.byte 102,15,56,220,217 + paddq %xmm15,%xmm15 + pand %xmm8,%xmm14 +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 + movdqa %xmm13,48(%rsp) + pxor %xmm14,%xmm15 +.byte 102,15,56,220,241 + movaps %xmm12,%xmm13 + movdqa %xmm9,%xmm14 +.byte 102,15,56,220,249 + movups -32(%rcx),%xmm1 + + paddd %xmm9,%xmm9 +.byte 102,15,56,220,208 + pxor %xmm15,%xmm12 + psrad $31,%xmm14 +.byte 102,15,56,220,216 + paddq %xmm15,%xmm15 + pand %xmm8,%xmm14 +.byte 102,15,56,220,224 +.byte 102,15,56,220,232 +.byte 102,15,56,220,240 + pxor %xmm14,%xmm15 + movaps %xmm13,%xmm14 +.byte 102,15,56,220,248 + + movdqa %xmm9,%xmm0 + paddd %xmm9,%xmm9 +.byte 102,15,56,220,209 + pxor %xmm15,%xmm13 + psrad $31,%xmm0 +.byte 102,15,56,220,217 + paddq %xmm15,%xmm15 + pand %xmm8,%xmm0 +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 + pxor %xmm0,%xmm15 + movups (%r11),%xmm0 +.byte 102,15,56,220,241 +.byte 102,15,56,220,249 + movups 16(%r11),%xmm1 + + pxor %xmm15,%xmm14 +.byte 102,15,56,221,84,36,0 + psrad $31,%xmm9 + paddq %xmm15,%xmm15 +.byte 102,15,56,221,92,36,16 +.byte 102,15,56,221,100,36,32 + pand %xmm8,%xmm9 + movq %r10,%rax +.byte 102,15,56,221,108,36,48 +.byte 102,15,56,221,116,36,64 +.byte 102,15,56,221,124,36,80 + pxor %xmm9,%xmm15 + + leaq 96(%rsi),%rsi + movups %xmm2,-96(%rsi) + movups %xmm3,-80(%rsi) + movups %xmm4,-64(%rsi) + movups %xmm5,-48(%rsi) + movups %xmm6,-32(%rsi) + movups %xmm7,-16(%rsi) + subq $96,%rdx + jnc L$xts_enc_grandloop + + movl $16+96,%eax + subl %r10d,%eax + movq %r11,%rcx + shrl $4,%eax + +L$xts_enc_short: + movl %eax,%r10d + pxor %xmm0,%xmm10 + addq $96,%rdx + jz L$xts_enc_done + + pxor %xmm0,%xmm11 + cmpq $32,%rdx + jb L$xts_enc_one + pxor %xmm0,%xmm12 + je L$xts_enc_two + + pxor %xmm0,%xmm13 + cmpq $64,%rdx + jb L$xts_enc_three + pxor %xmm0,%xmm14 + je L$xts_enc_four + + movdqu (%rdi),%xmm2 + movdqu 16(%rdi),%xmm3 + movdqu 32(%rdi),%xmm4 + pxor %xmm10,%xmm2 + movdqu 48(%rdi),%xmm5 + pxor %xmm11,%xmm3 + movdqu 64(%rdi),%xmm6 + leaq 80(%rdi),%rdi + pxor %xmm12,%xmm4 + pxor %xmm13,%xmm5 + pxor %xmm14,%xmm6 + + call _aesni_encrypt6 + + xorps %xmm10,%xmm2 + movdqa %xmm15,%xmm10 + xorps %xmm11,%xmm3 + xorps %xmm12,%xmm4 + movdqu %xmm2,(%rsi) + xorps %xmm13,%xmm5 + movdqu %xmm3,16(%rsi) + xorps %xmm14,%xmm6 + movdqu %xmm4,32(%rsi) + movdqu %xmm5,48(%rsi) + movdqu %xmm6,64(%rsi) + leaq 80(%rsi),%rsi + jmp L$xts_enc_done + +.p2align 4 +L$xts_enc_one: + movups (%rdi),%xmm2 + leaq 16(%rdi),%rdi + xorps %xmm10,%xmm2 + movups (%rcx),%xmm0 + movups 16(%rcx),%xmm1 + leaq 32(%rcx),%rcx + xorps %xmm0,%xmm2 +L$oop_enc1_9: +.byte 102,15,56,220,209 + decl %eax + movups (%rcx),%xmm1 + leaq 16(%rcx),%rcx + jnz L$oop_enc1_9 +.byte 102,15,56,221,209 + xorps %xmm10,%xmm2 + movdqa %xmm11,%xmm10 + movups %xmm2,(%rsi) + leaq 16(%rsi),%rsi + jmp L$xts_enc_done + +.p2align 4 +L$xts_enc_two: + movups (%rdi),%xmm2 + movups 16(%rdi),%xmm3 + leaq 32(%rdi),%rdi + xorps %xmm10,%xmm2 + xorps %xmm11,%xmm3 + + call _aesni_encrypt2 + + xorps %xmm10,%xmm2 + movdqa %xmm12,%xmm10 + xorps %xmm11,%xmm3 + movups %xmm2,(%rsi) + movups %xmm3,16(%rsi) + leaq 32(%rsi),%rsi + jmp L$xts_enc_done + +.p2align 4 +L$xts_enc_three: + movups (%rdi),%xmm2 + movups 16(%rdi),%xmm3 + movups 32(%rdi),%xmm4 + leaq 48(%rdi),%rdi + xorps %xmm10,%xmm2 + xorps %xmm11,%xmm3 + xorps %xmm12,%xmm4 + + call _aesni_encrypt3 + + xorps %xmm10,%xmm2 + movdqa %xmm13,%xmm10 + xorps %xmm11,%xmm3 + xorps %xmm12,%xmm4 + movups %xmm2,(%rsi) + movups %xmm3,16(%rsi) + movups %xmm4,32(%rsi) + leaq 48(%rsi),%rsi + jmp L$xts_enc_done + +.p2align 4 +L$xts_enc_four: + movups (%rdi),%xmm2 + movups 16(%rdi),%xmm3 + movups 32(%rdi),%xmm4 + xorps %xmm10,%xmm2 + movups 48(%rdi),%xmm5 + leaq 64(%rdi),%rdi + xorps %xmm11,%xmm3 + xorps %xmm12,%xmm4 + xorps %xmm13,%xmm5 + + call _aesni_encrypt4 + + pxor %xmm10,%xmm2 + movdqa %xmm14,%xmm10 + pxor %xmm11,%xmm3 + pxor %xmm12,%xmm4 + movdqu %xmm2,(%rsi) + pxor %xmm13,%xmm5 + movdqu %xmm3,16(%rsi) + movdqu %xmm4,32(%rsi) + movdqu %xmm5,48(%rsi) + leaq 64(%rsi),%rsi + jmp L$xts_enc_done + +.p2align 4 +L$xts_enc_done: + andq $15,%r9 + jz L$xts_enc_ret + movq %r9,%rdx + +L$xts_enc_steal: + movzbl (%rdi),%eax + movzbl -16(%rsi),%ecx + leaq 1(%rdi),%rdi + movb %al,-16(%rsi) + movb %cl,0(%rsi) + leaq 1(%rsi),%rsi + subq $1,%rdx + jnz L$xts_enc_steal + + subq %r9,%rsi + movq %r11,%rcx + movl %r10d,%eax + + movups -16(%rsi),%xmm2 + xorps %xmm10,%xmm2 + movups (%rcx),%xmm0 + movups 16(%rcx),%xmm1 + leaq 32(%rcx),%rcx + xorps %xmm0,%xmm2 +L$oop_enc1_10: +.byte 102,15,56,220,209 + decl %eax + movups (%rcx),%xmm1 + leaq 16(%rcx),%rcx + jnz L$oop_enc1_10 +.byte 102,15,56,221,209 + xorps %xmm10,%xmm2 + movups %xmm2,-16(%rsi) + +L$xts_enc_ret: + leaq (%rbp),%rsp + popq %rbp +L$xts_enc_epilogue: + .byte 0xf3,0xc3 + +.globl _aesni_xts_decrypt + +.p2align 4 +_aesni_xts_decrypt: + leaq (%rsp),%rax + pushq %rbp + subq $112,%rsp + andq $-16,%rsp + leaq -8(%rax),%rbp + movups (%r9),%xmm2 + movl 240(%r8),%eax + movl 240(%rcx),%r10d + movups (%r8),%xmm0 + movups 16(%r8),%xmm1 + leaq 32(%r8),%r8 + xorps %xmm0,%xmm2 +L$oop_enc1_11: +.byte 102,15,56,220,209 + decl %eax + movups (%r8),%xmm1 + leaq 16(%r8),%r8 + jnz L$oop_enc1_11 +.byte 102,15,56,221,209 + xorl %eax,%eax + testq $15,%rdx + setnz %al + shlq $4,%rax + subq %rax,%rdx + + movups (%rcx),%xmm0 + movq %rcx,%r11 + movl %r10d,%eax + shll $4,%r10d + movq %rdx,%r9 + andq $-16,%rdx + + movups 16(%rcx,%r10,1),%xmm1 + + movdqa L$xts_magic(%rip),%xmm8 + movdqa %xmm2,%xmm15 + pshufd $95,%xmm2,%xmm9 + pxor %xmm0,%xmm1 + movdqa %xmm9,%xmm14 + paddd %xmm9,%xmm9 + movdqa %xmm15,%xmm10 + psrad $31,%xmm14 + paddq %xmm15,%xmm15 + pand %xmm8,%xmm14 + pxor %xmm0,%xmm10 + pxor %xmm14,%xmm15 + movdqa %xmm9,%xmm14 + paddd %xmm9,%xmm9 + movdqa %xmm15,%xmm11 + psrad $31,%xmm14 + paddq %xmm15,%xmm15 + pand %xmm8,%xmm14 + pxor %xmm0,%xmm11 + pxor %xmm14,%xmm15 + movdqa %xmm9,%xmm14 + paddd %xmm9,%xmm9 + movdqa %xmm15,%xmm12 + psrad $31,%xmm14 + paddq %xmm15,%xmm15 + pand %xmm8,%xmm14 + pxor %xmm0,%xmm12 + pxor %xmm14,%xmm15 + movdqa %xmm9,%xmm14 + paddd %xmm9,%xmm9 + movdqa %xmm15,%xmm13 + psrad $31,%xmm14 + paddq %xmm15,%xmm15 + pand %xmm8,%xmm14 + pxor %xmm0,%xmm13 + pxor %xmm14,%xmm15 + movdqa %xmm15,%xmm14 + psrad $31,%xmm9 + paddq %xmm15,%xmm15 + pand %xmm8,%xmm9 + pxor %xmm0,%xmm14 + pxor %xmm9,%xmm15 + movaps %xmm1,96(%rsp) + + subq $96,%rdx + jc L$xts_dec_short + + movl $16+96,%eax + leaq 32(%r11,%r10,1),%rcx + subq %r10,%rax + movups 16(%r11),%xmm1 + movq %rax,%r10 + leaq L$xts_magic(%rip),%r8 + jmp L$xts_dec_grandloop + +.p2align 5 +L$xts_dec_grandloop: + movdqu 0(%rdi),%xmm2 + movdqa %xmm0,%xmm8 + movdqu 16(%rdi),%xmm3 + pxor %xmm10,%xmm2 + movdqu 32(%rdi),%xmm4 + pxor %xmm11,%xmm3 +.byte 102,15,56,222,209 + movdqu 48(%rdi),%xmm5 + pxor %xmm12,%xmm4 +.byte 102,15,56,222,217 + movdqu 64(%rdi),%xmm6 + pxor %xmm13,%xmm5 +.byte 102,15,56,222,225 + movdqu 80(%rdi),%xmm7 + pxor %xmm15,%xmm8 + movdqa 96(%rsp),%xmm9 + pxor %xmm14,%xmm6 +.byte 102,15,56,222,233 + movups 32(%r11),%xmm0 + leaq 96(%rdi),%rdi + pxor %xmm8,%xmm7 + + pxor %xmm9,%xmm10 +.byte 102,15,56,222,241 + pxor %xmm9,%xmm11 + movdqa %xmm10,0(%rsp) +.byte 102,15,56,222,249 + movups 48(%r11),%xmm1 + pxor %xmm9,%xmm12 + +.byte 102,15,56,222,208 + pxor %xmm9,%xmm13 + movdqa %xmm11,16(%rsp) +.byte 102,15,56,222,216 + pxor %xmm9,%xmm14 + movdqa %xmm12,32(%rsp) +.byte 102,15,56,222,224 +.byte 102,15,56,222,232 + pxor %xmm9,%xmm8 + movdqa %xmm14,64(%rsp) +.byte 102,15,56,222,240 +.byte 102,15,56,222,248 + movups 64(%r11),%xmm0 + movdqa %xmm8,80(%rsp) + pshufd $95,%xmm15,%xmm9 + jmp L$xts_dec_loop6 +.p2align 5 +L$xts_dec_loop6: +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 +.byte 102,15,56,222,225 +.byte 102,15,56,222,233 +.byte 102,15,56,222,241 +.byte 102,15,56,222,249 + movups -64(%rcx,%rax,1),%xmm1 + addq $32,%rax + +.byte 102,15,56,222,208 +.byte 102,15,56,222,216 +.byte 102,15,56,222,224 +.byte 102,15,56,222,232 +.byte 102,15,56,222,240 +.byte 102,15,56,222,248 + movups -80(%rcx,%rax,1),%xmm0 + jnz L$xts_dec_loop6 + + movdqa (%r8),%xmm8 + movdqa %xmm9,%xmm14 + paddd %xmm9,%xmm9 +.byte 102,15,56,222,209 + paddq %xmm15,%xmm15 + psrad $31,%xmm14 +.byte 102,15,56,222,217 + pand %xmm8,%xmm14 + movups (%r11),%xmm10 +.byte 102,15,56,222,225 +.byte 102,15,56,222,233 +.byte 102,15,56,222,241 + pxor %xmm14,%xmm15 + movaps %xmm10,%xmm11 +.byte 102,15,56,222,249 + movups -64(%rcx),%xmm1 + + movdqa %xmm9,%xmm14 +.byte 102,15,56,222,208 + paddd %xmm9,%xmm9 + pxor %xmm15,%xmm10 +.byte 102,15,56,222,216 + psrad $31,%xmm14 + paddq %xmm15,%xmm15 +.byte 102,15,56,222,224 +.byte 102,15,56,222,232 + pand %xmm8,%xmm14 + movaps %xmm11,%xmm12 +.byte 102,15,56,222,240 + pxor %xmm14,%xmm15 + movdqa %xmm9,%xmm14 +.byte 102,15,56,222,248 + movups -48(%rcx),%xmm0 + + paddd %xmm9,%xmm9 +.byte 102,15,56,222,209 + pxor %xmm15,%xmm11 + psrad $31,%xmm14 +.byte 102,15,56,222,217 + paddq %xmm15,%xmm15 + pand %xmm8,%xmm14 +.byte 102,15,56,222,225 +.byte 102,15,56,222,233 + movdqa %xmm13,48(%rsp) + pxor %xmm14,%xmm15 +.byte 102,15,56,222,241 + movaps %xmm12,%xmm13 + movdqa %xmm9,%xmm14 +.byte 102,15,56,222,249 + movups -32(%rcx),%xmm1 + + paddd %xmm9,%xmm9 +.byte 102,15,56,222,208 + pxor %xmm15,%xmm12 + psrad $31,%xmm14 +.byte 102,15,56,222,216 + paddq %xmm15,%xmm15 + pand %xmm8,%xmm14 +.byte 102,15,56,222,224 +.byte 102,15,56,222,232 +.byte 102,15,56,222,240 + pxor %xmm14,%xmm15 + movaps %xmm13,%xmm14 +.byte 102,15,56,222,248 + + movdqa %xmm9,%xmm0 + paddd %xmm9,%xmm9 +.byte 102,15,56,222,209 + pxor %xmm15,%xmm13 + psrad $31,%xmm0 +.byte 102,15,56,222,217 + paddq %xmm15,%xmm15 + pand %xmm8,%xmm0 +.byte 102,15,56,222,225 +.byte 102,15,56,222,233 + pxor %xmm0,%xmm15 + movups (%r11),%xmm0 +.byte 102,15,56,222,241 +.byte 102,15,56,222,249 + movups 16(%r11),%xmm1 + + pxor %xmm15,%xmm14 +.byte 102,15,56,223,84,36,0 + psrad $31,%xmm9 + paddq %xmm15,%xmm15 +.byte 102,15,56,223,92,36,16 +.byte 102,15,56,223,100,36,32 + pand %xmm8,%xmm9 + movq %r10,%rax +.byte 102,15,56,223,108,36,48 +.byte 102,15,56,223,116,36,64 +.byte 102,15,56,223,124,36,80 + pxor %xmm9,%xmm15 + + leaq 96(%rsi),%rsi + movups %xmm2,-96(%rsi) + movups %xmm3,-80(%rsi) + movups %xmm4,-64(%rsi) + movups %xmm5,-48(%rsi) + movups %xmm6,-32(%rsi) + movups %xmm7,-16(%rsi) + subq $96,%rdx + jnc L$xts_dec_grandloop + + movl $16+96,%eax + subl %r10d,%eax + movq %r11,%rcx + shrl $4,%eax + +L$xts_dec_short: + movl %eax,%r10d + pxor %xmm0,%xmm10 + pxor %xmm0,%xmm11 + addq $96,%rdx + jz L$xts_dec_done + + pxor %xmm0,%xmm12 + cmpq $32,%rdx + jb L$xts_dec_one + pxor %xmm0,%xmm13 + je L$xts_dec_two + + pxor %xmm0,%xmm14 + cmpq $64,%rdx + jb L$xts_dec_three + je L$xts_dec_four + + movdqu (%rdi),%xmm2 + movdqu 16(%rdi),%xmm3 + movdqu 32(%rdi),%xmm4 + pxor %xmm10,%xmm2 + movdqu 48(%rdi),%xmm5 + pxor %xmm11,%xmm3 + movdqu 64(%rdi),%xmm6 + leaq 80(%rdi),%rdi + pxor %xmm12,%xmm4 + pxor %xmm13,%xmm5 + pxor %xmm14,%xmm6 + + call _aesni_decrypt6 + + xorps %xmm10,%xmm2 + xorps %xmm11,%xmm3 + xorps %xmm12,%xmm4 + movdqu %xmm2,(%rsi) + xorps %xmm13,%xmm5 + movdqu %xmm3,16(%rsi) + xorps %xmm14,%xmm6 + movdqu %xmm4,32(%rsi) + pxor %xmm14,%xmm14 + movdqu %xmm5,48(%rsi) + pcmpgtd %xmm15,%xmm14 + movdqu %xmm6,64(%rsi) + leaq 80(%rsi),%rsi + pshufd $19,%xmm14,%xmm11 + andq $15,%r9 + jz L$xts_dec_ret + + movdqa %xmm15,%xmm10 + paddq %xmm15,%xmm15 + pand %xmm8,%xmm11 + pxor %xmm15,%xmm11 + jmp L$xts_dec_done2 + +.p2align 4 +L$xts_dec_one: + movups (%rdi),%xmm2 + leaq 16(%rdi),%rdi + xorps %xmm10,%xmm2 + movups (%rcx),%xmm0 + movups 16(%rcx),%xmm1 + leaq 32(%rcx),%rcx + xorps %xmm0,%xmm2 +L$oop_dec1_12: +.byte 102,15,56,222,209 + decl %eax + movups (%rcx),%xmm1 + leaq 16(%rcx),%rcx + jnz L$oop_dec1_12 +.byte 102,15,56,223,209 + xorps %xmm10,%xmm2 + movdqa %xmm11,%xmm10 + movups %xmm2,(%rsi) + movdqa %xmm12,%xmm11 + leaq 16(%rsi),%rsi + jmp L$xts_dec_done + +.p2align 4 +L$xts_dec_two: + movups (%rdi),%xmm2 + movups 16(%rdi),%xmm3 + leaq 32(%rdi),%rdi + xorps %xmm10,%xmm2 + xorps %xmm11,%xmm3 + + call _aesni_decrypt2 + + xorps %xmm10,%xmm2 + movdqa %xmm12,%xmm10 + xorps %xmm11,%xmm3 + movdqa %xmm13,%xmm11 + movups %xmm2,(%rsi) + movups %xmm3,16(%rsi) + leaq 32(%rsi),%rsi + jmp L$xts_dec_done + +.p2align 4 +L$xts_dec_three: + movups (%rdi),%xmm2 + movups 16(%rdi),%xmm3 + movups 32(%rdi),%xmm4 + leaq 48(%rdi),%rdi + xorps %xmm10,%xmm2 + xorps %xmm11,%xmm3 + xorps %xmm12,%xmm4 + + call _aesni_decrypt3 + + xorps %xmm10,%xmm2 + movdqa %xmm13,%xmm10 + xorps %xmm11,%xmm3 + movdqa %xmm14,%xmm11 + xorps %xmm12,%xmm4 + movups %xmm2,(%rsi) + movups %xmm3,16(%rsi) + movups %xmm4,32(%rsi) + leaq 48(%rsi),%rsi + jmp L$xts_dec_done + +.p2align 4 +L$xts_dec_four: + movups (%rdi),%xmm2 + movups 16(%rdi),%xmm3 + movups 32(%rdi),%xmm4 + xorps %xmm10,%xmm2 + movups 48(%rdi),%xmm5 + leaq 64(%rdi),%rdi + xorps %xmm11,%xmm3 + xorps %xmm12,%xmm4 + xorps %xmm13,%xmm5 + + call _aesni_decrypt4 + + pxor %xmm10,%xmm2 + movdqa %xmm14,%xmm10 + pxor %xmm11,%xmm3 + movdqa %xmm15,%xmm11 + pxor %xmm12,%xmm4 + movdqu %xmm2,(%rsi) + pxor %xmm13,%xmm5 + movdqu %xmm3,16(%rsi) + movdqu %xmm4,32(%rsi) + movdqu %xmm5,48(%rsi) + leaq 64(%rsi),%rsi + jmp L$xts_dec_done + +.p2align 4 +L$xts_dec_done: + andq $15,%r9 + jz L$xts_dec_ret +L$xts_dec_done2: + movq %r9,%rdx + movq %r11,%rcx + movl %r10d,%eax + + movups (%rdi),%xmm2 + xorps %xmm11,%xmm2 + movups (%rcx),%xmm0 + movups 16(%rcx),%xmm1 + leaq 32(%rcx),%rcx + xorps %xmm0,%xmm2 +L$oop_dec1_13: +.byte 102,15,56,222,209 + decl %eax + movups (%rcx),%xmm1 + leaq 16(%rcx),%rcx + jnz L$oop_dec1_13 +.byte 102,15,56,223,209 + xorps %xmm11,%xmm2 + movups %xmm2,(%rsi) + +L$xts_dec_steal: + movzbl 16(%rdi),%eax + movzbl (%rsi),%ecx + leaq 1(%rdi),%rdi + movb %al,(%rsi) + movb %cl,16(%rsi) + leaq 1(%rsi),%rsi + subq $1,%rdx + jnz L$xts_dec_steal + + subq %r9,%rsi + movq %r11,%rcx + movl %r10d,%eax + + movups (%rsi),%xmm2 + xorps %xmm10,%xmm2 + movups (%rcx),%xmm0 + movups 16(%rcx),%xmm1 + leaq 32(%rcx),%rcx + xorps %xmm0,%xmm2 +L$oop_dec1_14: +.byte 102,15,56,222,209 + decl %eax + movups (%rcx),%xmm1 + leaq 16(%rcx),%rcx + jnz L$oop_dec1_14 +.byte 102,15,56,223,209 + xorps %xmm10,%xmm2 + movups %xmm2,(%rsi) + +L$xts_dec_ret: + leaq (%rbp),%rsp + popq %rbp +L$xts_dec_epilogue: + .byte 0xf3,0xc3 + +.globl _aesni_cbc_encrypt + +.p2align 4 +_aesni_cbc_encrypt: + testq %rdx,%rdx + jz L$cbc_ret + + movl 240(%rcx),%r10d + movq %rcx,%r11 + testl %r9d,%r9d + jz L$cbc_decrypt + + movups (%r8),%xmm2 + movl %r10d,%eax + cmpq $16,%rdx + jb L$cbc_enc_tail + subq $16,%rdx + jmp L$cbc_enc_loop +.p2align 4 +L$cbc_enc_loop: + movups (%rdi),%xmm3 + leaq 16(%rdi),%rdi + + movups (%rcx),%xmm0 + movups 16(%rcx),%xmm1 + xorps %xmm0,%xmm3 + leaq 32(%rcx),%rcx + xorps %xmm3,%xmm2 +L$oop_enc1_15: +.byte 102,15,56,220,209 + decl %eax + movups (%rcx),%xmm1 + leaq 16(%rcx),%rcx + jnz L$oop_enc1_15 +.byte 102,15,56,221,209 + movl %r10d,%eax + movq %r11,%rcx + movups %xmm2,0(%rsi) + leaq 16(%rsi),%rsi + subq $16,%rdx + jnc L$cbc_enc_loop + addq $16,%rdx + jnz L$cbc_enc_tail + movups %xmm2,(%r8) + jmp L$cbc_ret + +L$cbc_enc_tail: + movq %rdx,%rcx + xchgq %rdi,%rsi +.long 0x9066A4F3 + movl $16,%ecx + subq %rdx,%rcx + xorl %eax,%eax +.long 0x9066AAF3 + leaq -16(%rdi),%rdi + movl %r10d,%eax + movq %rdi,%rsi + movq %r11,%rcx + xorq %rdx,%rdx + jmp L$cbc_enc_loop + +.p2align 4 +L$cbc_decrypt: + leaq (%rsp),%rax + pushq %rbp + subq $16,%rsp + andq $-16,%rsp + leaq -8(%rax),%rbp + movups (%r8),%xmm10 + movl %r10d,%eax + cmpq $80,%rdx + jbe L$cbc_dec_tail + + movups (%rcx),%xmm0 + movdqu 0(%rdi),%xmm2 + movdqu 16(%rdi),%xmm3 + movdqa %xmm2,%xmm11 + movdqu 32(%rdi),%xmm4 + movdqa %xmm3,%xmm12 + movdqu 48(%rdi),%xmm5 + movdqa %xmm4,%xmm13 + movdqu 64(%rdi),%xmm6 + movdqa %xmm5,%xmm14 + movdqu 80(%rdi),%xmm7 + movdqa %xmm6,%xmm15 + movl _OPENSSL_ia32cap_P+4(%rip),%r9d + cmpq $112,%rdx + jbe L$cbc_dec_six_or_seven + + andl $71303168,%r9d + subq $80,%rdx + cmpl $4194304,%r9d + je L$cbc_dec_loop6_enter + subq $32,%rdx + leaq 112(%rcx),%rcx + jmp L$cbc_dec_loop8_enter +.p2align 4 +L$cbc_dec_loop8: + movups %xmm9,(%rsi) + leaq 16(%rsi),%rsi +L$cbc_dec_loop8_enter: + movdqu 96(%rdi),%xmm8 + pxor %xmm0,%xmm2 + movdqu 112(%rdi),%xmm9 + pxor %xmm0,%xmm3 + movups 16-112(%rcx),%xmm1 + pxor %xmm0,%xmm4 + xorq %r11,%r11 + cmpq $112,%rdx + pxor %xmm0,%xmm5 + pxor %xmm0,%xmm6 + pxor %xmm0,%xmm7 + pxor %xmm0,%xmm8 + +.byte 102,15,56,222,209 + pxor %xmm0,%xmm9 + movups 32-112(%rcx),%xmm0 +.byte 102,15,56,222,217 +.byte 102,15,56,222,225 +.byte 102,15,56,222,233 +.byte 102,15,56,222,241 +.byte 102,15,56,222,249 +.byte 102,68,15,56,222,193 + setnc %r11b + shlq $7,%r11 +.byte 102,68,15,56,222,201 + addq %rdi,%r11 + movups 48-112(%rcx),%xmm1 +.byte 102,15,56,222,208 +.byte 102,15,56,222,216 +.byte 102,15,56,222,224 +.byte 102,15,56,222,232 +.byte 102,15,56,222,240 +.byte 102,15,56,222,248 +.byte 102,68,15,56,222,192 +.byte 102,68,15,56,222,200 + movups 64-112(%rcx),%xmm0 + nop +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 +.byte 102,15,56,222,225 +.byte 102,15,56,222,233 +.byte 102,15,56,222,241 +.byte 102,15,56,222,249 +.byte 102,68,15,56,222,193 +.byte 102,68,15,56,222,201 + movups 80-112(%rcx),%xmm1 + nop +.byte 102,15,56,222,208 +.byte 102,15,56,222,216 +.byte 102,15,56,222,224 +.byte 102,15,56,222,232 +.byte 102,15,56,222,240 +.byte 102,15,56,222,248 +.byte 102,68,15,56,222,192 +.byte 102,68,15,56,222,200 + movups 96-112(%rcx),%xmm0 + nop +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 +.byte 102,15,56,222,225 +.byte 102,15,56,222,233 +.byte 102,15,56,222,241 +.byte 102,15,56,222,249 +.byte 102,68,15,56,222,193 +.byte 102,68,15,56,222,201 + movups 112-112(%rcx),%xmm1 + nop +.byte 102,15,56,222,208 +.byte 102,15,56,222,216 +.byte 102,15,56,222,224 +.byte 102,15,56,222,232 +.byte 102,15,56,222,240 +.byte 102,15,56,222,248 +.byte 102,68,15,56,222,192 +.byte 102,68,15,56,222,200 + movups 128-112(%rcx),%xmm0 + nop +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 +.byte 102,15,56,222,225 +.byte 102,15,56,222,233 +.byte 102,15,56,222,241 +.byte 102,15,56,222,249 +.byte 102,68,15,56,222,193 +.byte 102,68,15,56,222,201 + movups 144-112(%rcx),%xmm1 + cmpl $11,%eax +.byte 102,15,56,222,208 +.byte 102,15,56,222,216 +.byte 102,15,56,222,224 +.byte 102,15,56,222,232 +.byte 102,15,56,222,240 +.byte 102,15,56,222,248 +.byte 102,68,15,56,222,192 +.byte 102,68,15,56,222,200 + movups 160-112(%rcx),%xmm0 + jb L$cbc_dec_done +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 +.byte 102,15,56,222,225 +.byte 102,15,56,222,233 +.byte 102,15,56,222,241 +.byte 102,15,56,222,249 +.byte 102,68,15,56,222,193 +.byte 102,68,15,56,222,201 + movups 176-112(%rcx),%xmm1 + nop +.byte 102,15,56,222,208 +.byte 102,15,56,222,216 +.byte 102,15,56,222,224 +.byte 102,15,56,222,232 +.byte 102,15,56,222,240 +.byte 102,15,56,222,248 +.byte 102,68,15,56,222,192 +.byte 102,68,15,56,222,200 + movups 192-112(%rcx),%xmm0 + je L$cbc_dec_done +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 +.byte 102,15,56,222,225 +.byte 102,15,56,222,233 +.byte 102,15,56,222,241 +.byte 102,15,56,222,249 +.byte 102,68,15,56,222,193 +.byte 102,68,15,56,222,201 + movups 208-112(%rcx),%xmm1 + nop +.byte 102,15,56,222,208 +.byte 102,15,56,222,216 +.byte 102,15,56,222,224 +.byte 102,15,56,222,232 +.byte 102,15,56,222,240 +.byte 102,15,56,222,248 +.byte 102,68,15,56,222,192 +.byte 102,68,15,56,222,200 + movups 224-112(%rcx),%xmm0 + jmp L$cbc_dec_done +.p2align 4 +L$cbc_dec_done: +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 + pxor %xmm0,%xmm10 + pxor %xmm0,%xmm11 +.byte 102,15,56,222,225 +.byte 102,15,56,222,233 + pxor %xmm0,%xmm12 + pxor %xmm0,%xmm13 +.byte 102,15,56,222,241 +.byte 102,15,56,222,249 + pxor %xmm0,%xmm14 + pxor %xmm0,%xmm15 +.byte 102,68,15,56,222,193 +.byte 102,68,15,56,222,201 + movdqu 80(%rdi),%xmm1 + +.byte 102,65,15,56,223,210 + movdqu 96(%rdi),%xmm10 + pxor %xmm0,%xmm1 +.byte 102,65,15,56,223,219 + pxor %xmm0,%xmm10 + movdqu 112(%rdi),%xmm0 +.byte 102,65,15,56,223,228 + leaq 128(%rdi),%rdi + movdqu 0(%r11),%xmm11 +.byte 102,65,15,56,223,237 +.byte 102,65,15,56,223,246 + movdqu 16(%r11),%xmm12 + movdqu 32(%r11),%xmm13 +.byte 102,65,15,56,223,255 +.byte 102,68,15,56,223,193 + movdqu 48(%r11),%xmm14 + movdqu 64(%r11),%xmm15 +.byte 102,69,15,56,223,202 + movdqa %xmm0,%xmm10 + movdqu 80(%r11),%xmm1 + movups -112(%rcx),%xmm0 + + movups %xmm2,(%rsi) + movdqa %xmm11,%xmm2 + movups %xmm3,16(%rsi) + movdqa %xmm12,%xmm3 + movups %xmm4,32(%rsi) + movdqa %xmm13,%xmm4 + movups %xmm5,48(%rsi) + movdqa %xmm14,%xmm5 + movups %xmm6,64(%rsi) + movdqa %xmm15,%xmm6 + movups %xmm7,80(%rsi) + movdqa %xmm1,%xmm7 + movups %xmm8,96(%rsi) + leaq 112(%rsi),%rsi + + subq $128,%rdx + ja L$cbc_dec_loop8 + + movaps %xmm9,%xmm2 + leaq -112(%rcx),%rcx + addq $112,%rdx + jle L$cbc_dec_tail_collected + movups %xmm9,(%rsi) + leaq 16(%rsi),%rsi + cmpq $80,%rdx + jbe L$cbc_dec_tail + + movaps %xmm11,%xmm2 +L$cbc_dec_six_or_seven: + cmpq $96,%rdx + ja L$cbc_dec_seven + + movaps %xmm7,%xmm8 + call _aesni_decrypt6 + pxor %xmm10,%xmm2 + movaps %xmm8,%xmm10 + pxor %xmm11,%xmm3 + movdqu %xmm2,(%rsi) + pxor %xmm12,%xmm4 + movdqu %xmm3,16(%rsi) + pxor %xmm13,%xmm5 + movdqu %xmm4,32(%rsi) + pxor %xmm14,%xmm6 + movdqu %xmm5,48(%rsi) + pxor %xmm15,%xmm7 + movdqu %xmm6,64(%rsi) + leaq 80(%rsi),%rsi + movdqa %xmm7,%xmm2 + jmp L$cbc_dec_tail_collected + +.p2align 4 +L$cbc_dec_seven: + movups 96(%rdi),%xmm8 + xorps %xmm9,%xmm9 + call _aesni_decrypt8 + movups 80(%rdi),%xmm9 + pxor %xmm10,%xmm2 + movups 96(%rdi),%xmm10 + pxor %xmm11,%xmm3 + movdqu %xmm2,(%rsi) + pxor %xmm12,%xmm4 + movdqu %xmm3,16(%rsi) + pxor %xmm13,%xmm5 + movdqu %xmm4,32(%rsi) + pxor %xmm14,%xmm6 + movdqu %xmm5,48(%rsi) + pxor %xmm15,%xmm7 + movdqu %xmm6,64(%rsi) + pxor %xmm9,%xmm8 + movdqu %xmm7,80(%rsi) + leaq 96(%rsi),%rsi + movdqa %xmm8,%xmm2 + jmp L$cbc_dec_tail_collected + +.p2align 4 +L$cbc_dec_loop6: + movups %xmm7,(%rsi) + leaq 16(%rsi),%rsi + movdqu 0(%rdi),%xmm2 + movdqu 16(%rdi),%xmm3 + movdqa %xmm2,%xmm11 + movdqu 32(%rdi),%xmm4 + movdqa %xmm3,%xmm12 + movdqu 48(%rdi),%xmm5 + movdqa %xmm4,%xmm13 + movdqu 64(%rdi),%xmm6 + movdqa %xmm5,%xmm14 + movdqu 80(%rdi),%xmm7 + movdqa %xmm6,%xmm15 +L$cbc_dec_loop6_enter: + leaq 96(%rdi),%rdi + movdqa %xmm7,%xmm8 + + call _aesni_decrypt6 + + pxor %xmm10,%xmm2 + movdqa %xmm8,%xmm10 + pxor %xmm11,%xmm3 + movdqu %xmm2,(%rsi) + pxor %xmm12,%xmm4 + movdqu %xmm3,16(%rsi) + pxor %xmm13,%xmm5 + movdqu %xmm4,32(%rsi) + pxor %xmm14,%xmm6 + movq %r11,%rcx + movdqu %xmm5,48(%rsi) + pxor %xmm15,%xmm7 + movl %r10d,%eax + movdqu %xmm6,64(%rsi) + leaq 80(%rsi),%rsi + subq $96,%rdx + ja L$cbc_dec_loop6 + + movdqa %xmm7,%xmm2 + addq $80,%rdx + jle L$cbc_dec_tail_collected + movups %xmm7,(%rsi) + leaq 16(%rsi),%rsi + +L$cbc_dec_tail: + movups (%rdi),%xmm2 + subq $16,%rdx + jbe L$cbc_dec_one + + movups 16(%rdi),%xmm3 + movaps %xmm2,%xmm11 + subq $16,%rdx + jbe L$cbc_dec_two + + movups 32(%rdi),%xmm4 + movaps %xmm3,%xmm12 + subq $16,%rdx + jbe L$cbc_dec_three + + movups 48(%rdi),%xmm5 + movaps %xmm4,%xmm13 + subq $16,%rdx + jbe L$cbc_dec_four + + movups 64(%rdi),%xmm6 + movaps %xmm5,%xmm14 + movaps %xmm6,%xmm15 + xorps %xmm7,%xmm7 + call _aesni_decrypt6 + pxor %xmm10,%xmm2 + movaps %xmm15,%xmm10 + pxor %xmm11,%xmm3 + movdqu %xmm2,(%rsi) + pxor %xmm12,%xmm4 + movdqu %xmm3,16(%rsi) + pxor %xmm13,%xmm5 + movdqu %xmm4,32(%rsi) + pxor %xmm14,%xmm6 + movdqu %xmm5,48(%rsi) + leaq 64(%rsi),%rsi + movdqa %xmm6,%xmm2 + subq $16,%rdx + jmp L$cbc_dec_tail_collected + +.p2align 4 +L$cbc_dec_one: + movaps %xmm2,%xmm11 + movups (%rcx),%xmm0 + movups 16(%rcx),%xmm1 + leaq 32(%rcx),%rcx + xorps %xmm0,%xmm2 +L$oop_dec1_16: +.byte 102,15,56,222,209 + decl %eax + movups (%rcx),%xmm1 + leaq 16(%rcx),%rcx + jnz L$oop_dec1_16 +.byte 102,15,56,223,209 + xorps %xmm10,%xmm2 + movaps %xmm11,%xmm10 + jmp L$cbc_dec_tail_collected +.p2align 4 +L$cbc_dec_two: + movaps %xmm3,%xmm12 + call _aesni_decrypt2 + pxor %xmm10,%xmm2 + movaps %xmm12,%xmm10 + pxor %xmm11,%xmm3 + movdqu %xmm2,(%rsi) + movdqa %xmm3,%xmm2 + leaq 16(%rsi),%rsi + jmp L$cbc_dec_tail_collected +.p2align 4 +L$cbc_dec_three: + movaps %xmm4,%xmm13 + call _aesni_decrypt3 + pxor %xmm10,%xmm2 + movaps %xmm13,%xmm10 + pxor %xmm11,%xmm3 + movdqu %xmm2,(%rsi) + pxor %xmm12,%xmm4 + movdqu %xmm3,16(%rsi) + movdqa %xmm4,%xmm2 + leaq 32(%rsi),%rsi + jmp L$cbc_dec_tail_collected +.p2align 4 +L$cbc_dec_four: + movaps %xmm5,%xmm14 + call _aesni_decrypt4 + pxor %xmm10,%xmm2 + movaps %xmm14,%xmm10 + pxor %xmm11,%xmm3 + movdqu %xmm2,(%rsi) + pxor %xmm12,%xmm4 + movdqu %xmm3,16(%rsi) + pxor %xmm13,%xmm5 + movdqu %xmm4,32(%rsi) + movdqa %xmm5,%xmm2 + leaq 48(%rsi),%rsi + jmp L$cbc_dec_tail_collected + +.p2align 4 +L$cbc_dec_tail_collected: + movups %xmm10,(%r8) + andq $15,%rdx + jnz L$cbc_dec_tail_partial + movups %xmm2,(%rsi) + jmp L$cbc_dec_ret +.p2align 4 +L$cbc_dec_tail_partial: + movaps %xmm2,(%rsp) + movq $16,%rcx + movq %rsi,%rdi + subq %rdx,%rcx + leaq (%rsp),%rsi +.long 0x9066A4F3 + +L$cbc_dec_ret: + leaq (%rbp),%rsp + popq %rbp +L$cbc_ret: + .byte 0xf3,0xc3 + +.globl _aesni_set_decrypt_key + +.p2align 4 +_aesni_set_decrypt_key: +.byte 0x48,0x83,0xEC,0x08 + call __aesni_set_encrypt_key + shll $4,%esi + testl %eax,%eax + jnz L$dec_key_ret + leaq 16(%rdx,%rsi,1),%rdi + + movups (%rdx),%xmm0 + movups (%rdi),%xmm1 + movups %xmm0,(%rdi) + movups %xmm1,(%rdx) + leaq 16(%rdx),%rdx + leaq -16(%rdi),%rdi + +L$dec_key_inverse: + movups (%rdx),%xmm0 + movups (%rdi),%xmm1 +.byte 102,15,56,219,192 +.byte 102,15,56,219,201 + leaq 16(%rdx),%rdx + leaq -16(%rdi),%rdi + movups %xmm0,16(%rdi) + movups %xmm1,-16(%rdx) + cmpq %rdx,%rdi + ja L$dec_key_inverse + + movups (%rdx),%xmm0 +.byte 102,15,56,219,192 + movups %xmm0,(%rdi) +L$dec_key_ret: + addq $8,%rsp + .byte 0xf3,0xc3 +L$SEH_end_set_decrypt_key: + +.globl _aesni_set_encrypt_key + +.p2align 4 +_aesni_set_encrypt_key: +__aesni_set_encrypt_key: +.byte 0x48,0x83,0xEC,0x08 + movq $-1,%rax + testq %rdi,%rdi + jz L$enc_key_ret + testq %rdx,%rdx + jz L$enc_key_ret + + movups (%rdi),%xmm0 + xorps %xmm4,%xmm4 + leaq 16(%rdx),%rax + cmpl $256,%esi + je L$14rounds + cmpl $192,%esi + je L$12rounds + cmpl $128,%esi + jne L$bad_keybits + +L$10rounds: + movl $9,%esi + movups %xmm0,(%rdx) +.byte 102,15,58,223,200,1 + call L$key_expansion_128_cold +.byte 102,15,58,223,200,2 + call L$key_expansion_128 +.byte 102,15,58,223,200,4 + call L$key_expansion_128 +.byte 102,15,58,223,200,8 + call L$key_expansion_128 +.byte 102,15,58,223,200,16 + call L$key_expansion_128 +.byte 102,15,58,223,200,32 + call L$key_expansion_128 +.byte 102,15,58,223,200,64 + call L$key_expansion_128 +.byte 102,15,58,223,200,128 + call L$key_expansion_128 +.byte 102,15,58,223,200,27 + call L$key_expansion_128 +.byte 102,15,58,223,200,54 + call L$key_expansion_128 + movups %xmm0,(%rax) + movl %esi,80(%rax) + xorl %eax,%eax + jmp L$enc_key_ret + +.p2align 4 +L$12rounds: + movq 16(%rdi),%xmm2 + movl $11,%esi + movups %xmm0,(%rdx) +.byte 102,15,58,223,202,1 + call L$key_expansion_192a_cold +.byte 102,15,58,223,202,2 + call L$key_expansion_192b +.byte 102,15,58,223,202,4 + call L$key_expansion_192a +.byte 102,15,58,223,202,8 + call L$key_expansion_192b +.byte 102,15,58,223,202,16 + call L$key_expansion_192a +.byte 102,15,58,223,202,32 + call L$key_expansion_192b +.byte 102,15,58,223,202,64 + call L$key_expansion_192a +.byte 102,15,58,223,202,128 + call L$key_expansion_192b + movups %xmm0,(%rax) + movl %esi,48(%rax) + xorq %rax,%rax + jmp L$enc_key_ret + +.p2align 4 +L$14rounds: + movups 16(%rdi),%xmm2 + movl $13,%esi + leaq 16(%rax),%rax + movups %xmm0,(%rdx) + movups %xmm2,16(%rdx) +.byte 102,15,58,223,202,1 + call L$key_expansion_256a_cold +.byte 102,15,58,223,200,1 + call L$key_expansion_256b +.byte 102,15,58,223,202,2 + call L$key_expansion_256a +.byte 102,15,58,223,200,2 + call L$key_expansion_256b +.byte 102,15,58,223,202,4 + call L$key_expansion_256a +.byte 102,15,58,223,200,4 + call L$key_expansion_256b +.byte 102,15,58,223,202,8 + call L$key_expansion_256a +.byte 102,15,58,223,200,8 + call L$key_expansion_256b +.byte 102,15,58,223,202,16 + call L$key_expansion_256a +.byte 102,15,58,223,200,16 + call L$key_expansion_256b +.byte 102,15,58,223,202,32 + call L$key_expansion_256a +.byte 102,15,58,223,200,32 + call L$key_expansion_256b +.byte 102,15,58,223,202,64 + call L$key_expansion_256a + movups %xmm0,(%rax) + movl %esi,16(%rax) + xorq %rax,%rax + jmp L$enc_key_ret + +.p2align 4 +L$bad_keybits: + movq $-2,%rax +L$enc_key_ret: + addq $8,%rsp + .byte 0xf3,0xc3 +L$SEH_end_set_encrypt_key: + +.p2align 4 +L$key_expansion_128: + movups %xmm0,(%rax) + leaq 16(%rax),%rax +L$key_expansion_128_cold: + shufps $16,%xmm0,%xmm4 + xorps %xmm4,%xmm0 + shufps $140,%xmm0,%xmm4 + xorps %xmm4,%xmm0 + shufps $255,%xmm1,%xmm1 + xorps %xmm1,%xmm0 + .byte 0xf3,0xc3 + +.p2align 4 +L$key_expansion_192a: + movups %xmm0,(%rax) + leaq 16(%rax),%rax +L$key_expansion_192a_cold: + movaps %xmm2,%xmm5 +L$key_expansion_192b_warm: + shufps $16,%xmm0,%xmm4 + movdqa %xmm2,%xmm3 + xorps %xmm4,%xmm0 + shufps $140,%xmm0,%xmm4 + pslldq $4,%xmm3 + xorps %xmm4,%xmm0 + pshufd $85,%xmm1,%xmm1 + pxor %xmm3,%xmm2 + pxor %xmm1,%xmm0 + pshufd $255,%xmm0,%xmm3 + pxor %xmm3,%xmm2 + .byte 0xf3,0xc3 + +.p2align 4 +L$key_expansion_192b: + movaps %xmm0,%xmm3 + shufps $68,%xmm0,%xmm5 + movups %xmm5,(%rax) + shufps $78,%xmm2,%xmm3 + movups %xmm3,16(%rax) + leaq 32(%rax),%rax + jmp L$key_expansion_192b_warm + +.p2align 4 +L$key_expansion_256a: + movups %xmm2,(%rax) + leaq 16(%rax),%rax +L$key_expansion_256a_cold: + shufps $16,%xmm0,%xmm4 + xorps %xmm4,%xmm0 + shufps $140,%xmm0,%xmm4 + xorps %xmm4,%xmm0 + shufps $255,%xmm1,%xmm1 + xorps %xmm1,%xmm0 + .byte 0xf3,0xc3 + +.p2align 4 +L$key_expansion_256b: + movups %xmm0,(%rax) + leaq 16(%rax),%rax + + shufps $16,%xmm2,%xmm4 + xorps %xmm4,%xmm2 + shufps $140,%xmm2,%xmm4 + xorps %xmm4,%xmm2 + shufps $170,%xmm1,%xmm1 + xorps %xmm1,%xmm2 + .byte 0xf3,0xc3 + + +.p2align 6 +L$bswap_mask: +.byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0 +L$increment32: +.long 6,6,6,0 +L$increment64: +.long 1,0,0,0 +L$xts_magic: +.long 0x87,0,1,0 +L$increment1: +.byte 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1 + +.byte 65,69,83,32,102,111,114,32,73,110,116,101,108,32,65,69,83,45,78,73,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 +.p2align 6 diff --git a/deps/openssl/asm_obsolete/x64-macosx-gas/aes/bsaes-x86_64.s b/deps/openssl/asm_obsolete/x64-macosx-gas/aes/bsaes-x86_64.s new file mode 100644 index 00000000000000..2af36a90b05f91 --- /dev/null +++ b/deps/openssl/asm_obsolete/x64-macosx-gas/aes/bsaes-x86_64.s @@ -0,0 +1,2497 @@ +.text + + + + + +.p2align 6 +_bsaes_encrypt8: + leaq L$BS0(%rip),%r11 + + movdqa (%rax),%xmm8 + leaq 16(%rax),%rax + movdqa 80(%r11),%xmm7 + pxor %xmm8,%xmm15 + pxor %xmm8,%xmm0 + pxor %xmm8,%xmm1 + pxor %xmm8,%xmm2 +.byte 102,68,15,56,0,255 +.byte 102,15,56,0,199 + pxor %xmm8,%xmm3 + pxor %xmm8,%xmm4 +.byte 102,15,56,0,207 +.byte 102,15,56,0,215 + pxor %xmm8,%xmm5 + pxor %xmm8,%xmm6 +.byte 102,15,56,0,223 +.byte 102,15,56,0,231 +.byte 102,15,56,0,239 +.byte 102,15,56,0,247 +_bsaes_encrypt8_bitslice: + movdqa 0(%r11),%xmm7 + movdqa 16(%r11),%xmm8 + movdqa %xmm5,%xmm9 + psrlq $1,%xmm5 + movdqa %xmm3,%xmm10 + psrlq $1,%xmm3 + pxor %xmm6,%xmm5 + pxor %xmm4,%xmm3 + pand %xmm7,%xmm5 + pand %xmm7,%xmm3 + pxor %xmm5,%xmm6 + psllq $1,%xmm5 + pxor %xmm3,%xmm4 + psllq $1,%xmm3 + pxor %xmm9,%xmm5 + pxor %xmm10,%xmm3 + movdqa %xmm1,%xmm9 + psrlq $1,%xmm1 + movdqa %xmm15,%xmm10 + psrlq $1,%xmm15 + pxor %xmm2,%xmm1 + pxor %xmm0,%xmm15 + pand %xmm7,%xmm1 + pand %xmm7,%xmm15 + pxor %xmm1,%xmm2 + psllq $1,%xmm1 + pxor %xmm15,%xmm0 + psllq $1,%xmm15 + pxor %xmm9,%xmm1 + pxor %xmm10,%xmm15 + movdqa 32(%r11),%xmm7 + movdqa %xmm4,%xmm9 + psrlq $2,%xmm4 + movdqa %xmm3,%xmm10 + psrlq $2,%xmm3 + pxor %xmm6,%xmm4 + pxor %xmm5,%xmm3 + pand %xmm8,%xmm4 + pand %xmm8,%xmm3 + pxor %xmm4,%xmm6 + psllq $2,%xmm4 + pxor %xmm3,%xmm5 + psllq $2,%xmm3 + pxor %xmm9,%xmm4 + pxor %xmm10,%xmm3 + movdqa %xmm0,%xmm9 + psrlq $2,%xmm0 + movdqa %xmm15,%xmm10 + psrlq $2,%xmm15 + pxor %xmm2,%xmm0 + pxor %xmm1,%xmm15 + pand %xmm8,%xmm0 + pand %xmm8,%xmm15 + pxor %xmm0,%xmm2 + psllq $2,%xmm0 + pxor %xmm15,%xmm1 + psllq $2,%xmm15 + pxor %xmm9,%xmm0 + pxor %xmm10,%xmm15 + movdqa %xmm2,%xmm9 + psrlq $4,%xmm2 + movdqa %xmm1,%xmm10 + psrlq $4,%xmm1 + pxor %xmm6,%xmm2 + pxor %xmm5,%xmm1 + pand %xmm7,%xmm2 + pand %xmm7,%xmm1 + pxor %xmm2,%xmm6 + psllq $4,%xmm2 + pxor %xmm1,%xmm5 + psllq $4,%xmm1 + pxor %xmm9,%xmm2 + pxor %xmm10,%xmm1 + movdqa %xmm0,%xmm9 + psrlq $4,%xmm0 + movdqa %xmm15,%xmm10 + psrlq $4,%xmm15 + pxor %xmm4,%xmm0 + pxor %xmm3,%xmm15 + pand %xmm7,%xmm0 + pand %xmm7,%xmm15 + pxor %xmm0,%xmm4 + psllq $4,%xmm0 + pxor %xmm15,%xmm3 + psllq $4,%xmm15 + pxor %xmm9,%xmm0 + pxor %xmm10,%xmm15 + decl %r10d + jmp L$enc_sbox +.p2align 4 +L$enc_loop: + pxor 0(%rax),%xmm15 + pxor 16(%rax),%xmm0 + pxor 32(%rax),%xmm1 + pxor 48(%rax),%xmm2 +.byte 102,68,15,56,0,255 +.byte 102,15,56,0,199 + pxor 64(%rax),%xmm3 + pxor 80(%rax),%xmm4 +.byte 102,15,56,0,207 +.byte 102,15,56,0,215 + pxor 96(%rax),%xmm5 + pxor 112(%rax),%xmm6 +.byte 102,15,56,0,223 +.byte 102,15,56,0,231 +.byte 102,15,56,0,239 +.byte 102,15,56,0,247 + leaq 128(%rax),%rax +L$enc_sbox: + pxor %xmm5,%xmm4 + pxor %xmm0,%xmm1 + pxor %xmm15,%xmm2 + pxor %xmm1,%xmm5 + pxor %xmm15,%xmm4 + + pxor %xmm2,%xmm5 + pxor %xmm6,%xmm2 + pxor %xmm4,%xmm6 + pxor %xmm3,%xmm2 + pxor %xmm4,%xmm3 + pxor %xmm0,%xmm2 + + pxor %xmm6,%xmm1 + pxor %xmm4,%xmm0 + movdqa %xmm6,%xmm10 + movdqa %xmm0,%xmm9 + movdqa %xmm4,%xmm8 + movdqa %xmm1,%xmm12 + movdqa %xmm5,%xmm11 + + pxor %xmm3,%xmm10 + pxor %xmm1,%xmm9 + pxor %xmm2,%xmm8 + movdqa %xmm10,%xmm13 + pxor %xmm3,%xmm12 + movdqa %xmm9,%xmm7 + pxor %xmm15,%xmm11 + movdqa %xmm10,%xmm14 + + por %xmm8,%xmm9 + por %xmm11,%xmm10 + pxor %xmm7,%xmm14 + pand %xmm11,%xmm13 + pxor %xmm8,%xmm11 + pand %xmm8,%xmm7 + pand %xmm11,%xmm14 + movdqa %xmm2,%xmm11 + pxor %xmm15,%xmm11 + pand %xmm11,%xmm12 + pxor %xmm12,%xmm10 + pxor %xmm12,%xmm9 + movdqa %xmm6,%xmm12 + movdqa %xmm4,%xmm11 + pxor %xmm0,%xmm12 + pxor %xmm5,%xmm11 + movdqa %xmm12,%xmm8 + pand %xmm11,%xmm12 + por %xmm11,%xmm8 + pxor %xmm12,%xmm7 + pxor %xmm14,%xmm10 + pxor %xmm13,%xmm9 + pxor %xmm14,%xmm8 + movdqa %xmm1,%xmm11 + pxor %xmm13,%xmm7 + movdqa %xmm3,%xmm12 + pxor %xmm13,%xmm8 + movdqa %xmm0,%xmm13 + pand %xmm2,%xmm11 + movdqa %xmm6,%xmm14 + pand %xmm15,%xmm12 + pand %xmm4,%xmm13 + por %xmm5,%xmm14 + pxor %xmm11,%xmm10 + pxor %xmm12,%xmm9 + pxor %xmm13,%xmm8 + pxor %xmm14,%xmm7 + + + + + + movdqa %xmm10,%xmm11 + pand %xmm8,%xmm10 + pxor %xmm9,%xmm11 + + movdqa %xmm7,%xmm13 + movdqa %xmm11,%xmm14 + pxor %xmm10,%xmm13 + pand %xmm13,%xmm14 + + movdqa %xmm8,%xmm12 + pxor %xmm9,%xmm14 + pxor %xmm7,%xmm12 + + pxor %xmm9,%xmm10 + + pand %xmm10,%xmm12 + + movdqa %xmm13,%xmm9 + pxor %xmm7,%xmm12 + + pxor %xmm12,%xmm9 + pxor %xmm12,%xmm8 + + pand %xmm7,%xmm9 + + pxor %xmm9,%xmm13 + pxor %xmm9,%xmm8 + + pand %xmm14,%xmm13 + + pxor %xmm11,%xmm13 + movdqa %xmm5,%xmm11 + movdqa %xmm4,%xmm7 + movdqa %xmm14,%xmm9 + pxor %xmm13,%xmm9 + pand %xmm5,%xmm9 + pxor %xmm4,%xmm5 + pand %xmm14,%xmm4 + pand %xmm13,%xmm5 + pxor %xmm4,%xmm5 + pxor %xmm9,%xmm4 + pxor %xmm15,%xmm11 + pxor %xmm2,%xmm7 + pxor %xmm12,%xmm14 + pxor %xmm8,%xmm13 + movdqa %xmm14,%xmm10 + movdqa %xmm12,%xmm9 + pxor %xmm13,%xmm10 + pxor %xmm8,%xmm9 + pand %xmm11,%xmm10 + pand %xmm15,%xmm9 + pxor %xmm7,%xmm11 + pxor %xmm2,%xmm15 + pand %xmm14,%xmm7 + pand %xmm12,%xmm2 + pand %xmm13,%xmm11 + pand %xmm8,%xmm15 + pxor %xmm11,%xmm7 + pxor %xmm2,%xmm15 + pxor %xmm10,%xmm11 + pxor %xmm9,%xmm2 + pxor %xmm11,%xmm5 + pxor %xmm11,%xmm15 + pxor %xmm7,%xmm4 + pxor %xmm7,%xmm2 + + movdqa %xmm6,%xmm11 + movdqa %xmm0,%xmm7 + pxor %xmm3,%xmm11 + pxor %xmm1,%xmm7 + movdqa %xmm14,%xmm10 + movdqa %xmm12,%xmm9 + pxor %xmm13,%xmm10 + pxor %xmm8,%xmm9 + pand %xmm11,%xmm10 + pand %xmm3,%xmm9 + pxor %xmm7,%xmm11 + pxor %xmm1,%xmm3 + pand %xmm14,%xmm7 + pand %xmm12,%xmm1 + pand %xmm13,%xmm11 + pand %xmm8,%xmm3 + pxor %xmm11,%xmm7 + pxor %xmm1,%xmm3 + pxor %xmm10,%xmm11 + pxor %xmm9,%xmm1 + pxor %xmm12,%xmm14 + pxor %xmm8,%xmm13 + movdqa %xmm14,%xmm10 + pxor %xmm13,%xmm10 + pand %xmm6,%xmm10 + pxor %xmm0,%xmm6 + pand %xmm14,%xmm0 + pand %xmm13,%xmm6 + pxor %xmm0,%xmm6 + pxor %xmm10,%xmm0 + pxor %xmm11,%xmm6 + pxor %xmm11,%xmm3 + pxor %xmm7,%xmm0 + pxor %xmm7,%xmm1 + pxor %xmm15,%xmm6 + pxor %xmm5,%xmm0 + pxor %xmm6,%xmm3 + pxor %xmm15,%xmm5 + pxor %xmm0,%xmm15 + + pxor %xmm4,%xmm0 + pxor %xmm1,%xmm4 + pxor %xmm2,%xmm1 + pxor %xmm4,%xmm2 + pxor %xmm4,%xmm3 + + pxor %xmm2,%xmm5 + decl %r10d + jl L$enc_done + pshufd $147,%xmm15,%xmm7 + pshufd $147,%xmm0,%xmm8 + pxor %xmm7,%xmm15 + pshufd $147,%xmm3,%xmm9 + pxor %xmm8,%xmm0 + pshufd $147,%xmm5,%xmm10 + pxor %xmm9,%xmm3 + pshufd $147,%xmm2,%xmm11 + pxor %xmm10,%xmm5 + pshufd $147,%xmm6,%xmm12 + pxor %xmm11,%xmm2 + pshufd $147,%xmm1,%xmm13 + pxor %xmm12,%xmm6 + pshufd $147,%xmm4,%xmm14 + pxor %xmm13,%xmm1 + pxor %xmm14,%xmm4 + + pxor %xmm15,%xmm8 + pxor %xmm4,%xmm7 + pxor %xmm4,%xmm8 + pshufd $78,%xmm15,%xmm15 + pxor %xmm0,%xmm9 + pshufd $78,%xmm0,%xmm0 + pxor %xmm2,%xmm12 + pxor %xmm7,%xmm15 + pxor %xmm6,%xmm13 + pxor %xmm8,%xmm0 + pxor %xmm5,%xmm11 + pshufd $78,%xmm2,%xmm7 + pxor %xmm1,%xmm14 + pshufd $78,%xmm6,%xmm8 + pxor %xmm3,%xmm10 + pshufd $78,%xmm5,%xmm2 + pxor %xmm4,%xmm10 + pshufd $78,%xmm4,%xmm6 + pxor %xmm4,%xmm11 + pshufd $78,%xmm1,%xmm5 + pxor %xmm11,%xmm7 + pshufd $78,%xmm3,%xmm1 + pxor %xmm12,%xmm8 + pxor %xmm10,%xmm2 + pxor %xmm14,%xmm6 + pxor %xmm13,%xmm5 + movdqa %xmm7,%xmm3 + pxor %xmm9,%xmm1 + movdqa %xmm8,%xmm4 + movdqa 48(%r11),%xmm7 + jnz L$enc_loop + movdqa 64(%r11),%xmm7 + jmp L$enc_loop +.p2align 4 +L$enc_done: + movdqa 0(%r11),%xmm7 + movdqa 16(%r11),%xmm8 + movdqa %xmm1,%xmm9 + psrlq $1,%xmm1 + movdqa %xmm2,%xmm10 + psrlq $1,%xmm2 + pxor %xmm4,%xmm1 + pxor %xmm6,%xmm2 + pand %xmm7,%xmm1 + pand %xmm7,%xmm2 + pxor %xmm1,%xmm4 + psllq $1,%xmm1 + pxor %xmm2,%xmm6 + psllq $1,%xmm2 + pxor %xmm9,%xmm1 + pxor %xmm10,%xmm2 + movdqa %xmm3,%xmm9 + psrlq $1,%xmm3 + movdqa %xmm15,%xmm10 + psrlq $1,%xmm15 + pxor %xmm5,%xmm3 + pxor %xmm0,%xmm15 + pand %xmm7,%xmm3 + pand %xmm7,%xmm15 + pxor %xmm3,%xmm5 + psllq $1,%xmm3 + pxor %xmm15,%xmm0 + psllq $1,%xmm15 + pxor %xmm9,%xmm3 + pxor %xmm10,%xmm15 + movdqa 32(%r11),%xmm7 + movdqa %xmm6,%xmm9 + psrlq $2,%xmm6 + movdqa %xmm2,%xmm10 + psrlq $2,%xmm2 + pxor %xmm4,%xmm6 + pxor %xmm1,%xmm2 + pand %xmm8,%xmm6 + pand %xmm8,%xmm2 + pxor %xmm6,%xmm4 + psllq $2,%xmm6 + pxor %xmm2,%xmm1 + psllq $2,%xmm2 + pxor %xmm9,%xmm6 + pxor %xmm10,%xmm2 + movdqa %xmm0,%xmm9 + psrlq $2,%xmm0 + movdqa %xmm15,%xmm10 + psrlq $2,%xmm15 + pxor %xmm5,%xmm0 + pxor %xmm3,%xmm15 + pand %xmm8,%xmm0 + pand %xmm8,%xmm15 + pxor %xmm0,%xmm5 + psllq $2,%xmm0 + pxor %xmm15,%xmm3 + psllq $2,%xmm15 + pxor %xmm9,%xmm0 + pxor %xmm10,%xmm15 + movdqa %xmm5,%xmm9 + psrlq $4,%xmm5 + movdqa %xmm3,%xmm10 + psrlq $4,%xmm3 + pxor %xmm4,%xmm5 + pxor %xmm1,%xmm3 + pand %xmm7,%xmm5 + pand %xmm7,%xmm3 + pxor %xmm5,%xmm4 + psllq $4,%xmm5 + pxor %xmm3,%xmm1 + psllq $4,%xmm3 + pxor %xmm9,%xmm5 + pxor %xmm10,%xmm3 + movdqa %xmm0,%xmm9 + psrlq $4,%xmm0 + movdqa %xmm15,%xmm10 + psrlq $4,%xmm15 + pxor %xmm6,%xmm0 + pxor %xmm2,%xmm15 + pand %xmm7,%xmm0 + pand %xmm7,%xmm15 + pxor %xmm0,%xmm6 + psllq $4,%xmm0 + pxor %xmm15,%xmm2 + psllq $4,%xmm15 + pxor %xmm9,%xmm0 + pxor %xmm10,%xmm15 + movdqa (%rax),%xmm7 + pxor %xmm7,%xmm3 + pxor %xmm7,%xmm5 + pxor %xmm7,%xmm2 + pxor %xmm7,%xmm6 + pxor %xmm7,%xmm1 + pxor %xmm7,%xmm4 + pxor %xmm7,%xmm15 + pxor %xmm7,%xmm0 + .byte 0xf3,0xc3 + + + +.p2align 6 +_bsaes_decrypt8: + leaq L$BS0(%rip),%r11 + + movdqa (%rax),%xmm8 + leaq 16(%rax),%rax + movdqa -48(%r11),%xmm7 + pxor %xmm8,%xmm15 + pxor %xmm8,%xmm0 + pxor %xmm8,%xmm1 + pxor %xmm8,%xmm2 +.byte 102,68,15,56,0,255 +.byte 102,15,56,0,199 + pxor %xmm8,%xmm3 + pxor %xmm8,%xmm4 +.byte 102,15,56,0,207 +.byte 102,15,56,0,215 + pxor %xmm8,%xmm5 + pxor %xmm8,%xmm6 +.byte 102,15,56,0,223 +.byte 102,15,56,0,231 +.byte 102,15,56,0,239 +.byte 102,15,56,0,247 + movdqa 0(%r11),%xmm7 + movdqa 16(%r11),%xmm8 + movdqa %xmm5,%xmm9 + psrlq $1,%xmm5 + movdqa %xmm3,%xmm10 + psrlq $1,%xmm3 + pxor %xmm6,%xmm5 + pxor %xmm4,%xmm3 + pand %xmm7,%xmm5 + pand %xmm7,%xmm3 + pxor %xmm5,%xmm6 + psllq $1,%xmm5 + pxor %xmm3,%xmm4 + psllq $1,%xmm3 + pxor %xmm9,%xmm5 + pxor %xmm10,%xmm3 + movdqa %xmm1,%xmm9 + psrlq $1,%xmm1 + movdqa %xmm15,%xmm10 + psrlq $1,%xmm15 + pxor %xmm2,%xmm1 + pxor %xmm0,%xmm15 + pand %xmm7,%xmm1 + pand %xmm7,%xmm15 + pxor %xmm1,%xmm2 + psllq $1,%xmm1 + pxor %xmm15,%xmm0 + psllq $1,%xmm15 + pxor %xmm9,%xmm1 + pxor %xmm10,%xmm15 + movdqa 32(%r11),%xmm7 + movdqa %xmm4,%xmm9 + psrlq $2,%xmm4 + movdqa %xmm3,%xmm10 + psrlq $2,%xmm3 + pxor %xmm6,%xmm4 + pxor %xmm5,%xmm3 + pand %xmm8,%xmm4 + pand %xmm8,%xmm3 + pxor %xmm4,%xmm6 + psllq $2,%xmm4 + pxor %xmm3,%xmm5 + psllq $2,%xmm3 + pxor %xmm9,%xmm4 + pxor %xmm10,%xmm3 + movdqa %xmm0,%xmm9 + psrlq $2,%xmm0 + movdqa %xmm15,%xmm10 + psrlq $2,%xmm15 + pxor %xmm2,%xmm0 + pxor %xmm1,%xmm15 + pand %xmm8,%xmm0 + pand %xmm8,%xmm15 + pxor %xmm0,%xmm2 + psllq $2,%xmm0 + pxor %xmm15,%xmm1 + psllq $2,%xmm15 + pxor %xmm9,%xmm0 + pxor %xmm10,%xmm15 + movdqa %xmm2,%xmm9 + psrlq $4,%xmm2 + movdqa %xmm1,%xmm10 + psrlq $4,%xmm1 + pxor %xmm6,%xmm2 + pxor %xmm5,%xmm1 + pand %xmm7,%xmm2 + pand %xmm7,%xmm1 + pxor %xmm2,%xmm6 + psllq $4,%xmm2 + pxor %xmm1,%xmm5 + psllq $4,%xmm1 + pxor %xmm9,%xmm2 + pxor %xmm10,%xmm1 + movdqa %xmm0,%xmm9 + psrlq $4,%xmm0 + movdqa %xmm15,%xmm10 + psrlq $4,%xmm15 + pxor %xmm4,%xmm0 + pxor %xmm3,%xmm15 + pand %xmm7,%xmm0 + pand %xmm7,%xmm15 + pxor %xmm0,%xmm4 + psllq $4,%xmm0 + pxor %xmm15,%xmm3 + psllq $4,%xmm15 + pxor %xmm9,%xmm0 + pxor %xmm10,%xmm15 + decl %r10d + jmp L$dec_sbox +.p2align 4 +L$dec_loop: + pxor 0(%rax),%xmm15 + pxor 16(%rax),%xmm0 + pxor 32(%rax),%xmm1 + pxor 48(%rax),%xmm2 +.byte 102,68,15,56,0,255 +.byte 102,15,56,0,199 + pxor 64(%rax),%xmm3 + pxor 80(%rax),%xmm4 +.byte 102,15,56,0,207 +.byte 102,15,56,0,215 + pxor 96(%rax),%xmm5 + pxor 112(%rax),%xmm6 +.byte 102,15,56,0,223 +.byte 102,15,56,0,231 +.byte 102,15,56,0,239 +.byte 102,15,56,0,247 + leaq 128(%rax),%rax +L$dec_sbox: + pxor %xmm3,%xmm2 + + pxor %xmm6,%xmm3 + pxor %xmm6,%xmm1 + pxor %xmm3,%xmm5 + pxor %xmm5,%xmm6 + pxor %xmm6,%xmm0 + + pxor %xmm0,%xmm15 + pxor %xmm4,%xmm1 + pxor %xmm15,%xmm2 + pxor %xmm15,%xmm4 + pxor %xmm2,%xmm0 + movdqa %xmm2,%xmm10 + movdqa %xmm6,%xmm9 + movdqa %xmm0,%xmm8 + movdqa %xmm3,%xmm12 + movdqa %xmm4,%xmm11 + + pxor %xmm15,%xmm10 + pxor %xmm3,%xmm9 + pxor %xmm5,%xmm8 + movdqa %xmm10,%xmm13 + pxor %xmm15,%xmm12 + movdqa %xmm9,%xmm7 + pxor %xmm1,%xmm11 + movdqa %xmm10,%xmm14 + + por %xmm8,%xmm9 + por %xmm11,%xmm10 + pxor %xmm7,%xmm14 + pand %xmm11,%xmm13 + pxor %xmm8,%xmm11 + pand %xmm8,%xmm7 + pand %xmm11,%xmm14 + movdqa %xmm5,%xmm11 + pxor %xmm1,%xmm11 + pand %xmm11,%xmm12 + pxor %xmm12,%xmm10 + pxor %xmm12,%xmm9 + movdqa %xmm2,%xmm12 + movdqa %xmm0,%xmm11 + pxor %xmm6,%xmm12 + pxor %xmm4,%xmm11 + movdqa %xmm12,%xmm8 + pand %xmm11,%xmm12 + por %xmm11,%xmm8 + pxor %xmm12,%xmm7 + pxor %xmm14,%xmm10 + pxor %xmm13,%xmm9 + pxor %xmm14,%xmm8 + movdqa %xmm3,%xmm11 + pxor %xmm13,%xmm7 + movdqa %xmm15,%xmm12 + pxor %xmm13,%xmm8 + movdqa %xmm6,%xmm13 + pand %xmm5,%xmm11 + movdqa %xmm2,%xmm14 + pand %xmm1,%xmm12 + pand %xmm0,%xmm13 + por %xmm4,%xmm14 + pxor %xmm11,%xmm10 + pxor %xmm12,%xmm9 + pxor %xmm13,%xmm8 + pxor %xmm14,%xmm7 + + + + + + movdqa %xmm10,%xmm11 + pand %xmm8,%xmm10 + pxor %xmm9,%xmm11 + + movdqa %xmm7,%xmm13 + movdqa %xmm11,%xmm14 + pxor %xmm10,%xmm13 + pand %xmm13,%xmm14 + + movdqa %xmm8,%xmm12 + pxor %xmm9,%xmm14 + pxor %xmm7,%xmm12 + + pxor %xmm9,%xmm10 + + pand %xmm10,%xmm12 + + movdqa %xmm13,%xmm9 + pxor %xmm7,%xmm12 + + pxor %xmm12,%xmm9 + pxor %xmm12,%xmm8 + + pand %xmm7,%xmm9 + + pxor %xmm9,%xmm13 + pxor %xmm9,%xmm8 + + pand %xmm14,%xmm13 + + pxor %xmm11,%xmm13 + movdqa %xmm4,%xmm11 + movdqa %xmm0,%xmm7 + movdqa %xmm14,%xmm9 + pxor %xmm13,%xmm9 + pand %xmm4,%xmm9 + pxor %xmm0,%xmm4 + pand %xmm14,%xmm0 + pand %xmm13,%xmm4 + pxor %xmm0,%xmm4 + pxor %xmm9,%xmm0 + pxor %xmm1,%xmm11 + pxor %xmm5,%xmm7 + pxor %xmm12,%xmm14 + pxor %xmm8,%xmm13 + movdqa %xmm14,%xmm10 + movdqa %xmm12,%xmm9 + pxor %xmm13,%xmm10 + pxor %xmm8,%xmm9 + pand %xmm11,%xmm10 + pand %xmm1,%xmm9 + pxor %xmm7,%xmm11 + pxor %xmm5,%xmm1 + pand %xmm14,%xmm7 + pand %xmm12,%xmm5 + pand %xmm13,%xmm11 + pand %xmm8,%xmm1 + pxor %xmm11,%xmm7 + pxor %xmm5,%xmm1 + pxor %xmm10,%xmm11 + pxor %xmm9,%xmm5 + pxor %xmm11,%xmm4 + pxor %xmm11,%xmm1 + pxor %xmm7,%xmm0 + pxor %xmm7,%xmm5 + + movdqa %xmm2,%xmm11 + movdqa %xmm6,%xmm7 + pxor %xmm15,%xmm11 + pxor %xmm3,%xmm7 + movdqa %xmm14,%xmm10 + movdqa %xmm12,%xmm9 + pxor %xmm13,%xmm10 + pxor %xmm8,%xmm9 + pand %xmm11,%xmm10 + pand %xmm15,%xmm9 + pxor %xmm7,%xmm11 + pxor %xmm3,%xmm15 + pand %xmm14,%xmm7 + pand %xmm12,%xmm3 + pand %xmm13,%xmm11 + pand %xmm8,%xmm15 + pxor %xmm11,%xmm7 + pxor %xmm3,%xmm15 + pxor %xmm10,%xmm11 + pxor %xmm9,%xmm3 + pxor %xmm12,%xmm14 + pxor %xmm8,%xmm13 + movdqa %xmm14,%xmm10 + pxor %xmm13,%xmm10 + pand %xmm2,%xmm10 + pxor %xmm6,%xmm2 + pand %xmm14,%xmm6 + pand %xmm13,%xmm2 + pxor %xmm6,%xmm2 + pxor %xmm10,%xmm6 + pxor %xmm11,%xmm2 + pxor %xmm11,%xmm15 + pxor %xmm7,%xmm6 + pxor %xmm7,%xmm3 + pxor %xmm6,%xmm0 + pxor %xmm4,%xmm5 + + pxor %xmm0,%xmm3 + pxor %xmm6,%xmm1 + pxor %xmm6,%xmm4 + pxor %xmm1,%xmm3 + pxor %xmm15,%xmm6 + pxor %xmm4,%xmm3 + pxor %xmm5,%xmm2 + pxor %xmm0,%xmm5 + pxor %xmm3,%xmm2 + + pxor %xmm15,%xmm3 + pxor %xmm2,%xmm6 + decl %r10d + jl L$dec_done + + pshufd $78,%xmm15,%xmm7 + pshufd $78,%xmm2,%xmm13 + pxor %xmm15,%xmm7 + pshufd $78,%xmm4,%xmm14 + pxor %xmm2,%xmm13 + pshufd $78,%xmm0,%xmm8 + pxor %xmm4,%xmm14 + pshufd $78,%xmm5,%xmm9 + pxor %xmm0,%xmm8 + pshufd $78,%xmm3,%xmm10 + pxor %xmm5,%xmm9 + pxor %xmm13,%xmm15 + pxor %xmm13,%xmm0 + pshufd $78,%xmm1,%xmm11 + pxor %xmm3,%xmm10 + pxor %xmm7,%xmm5 + pxor %xmm8,%xmm3 + pshufd $78,%xmm6,%xmm12 + pxor %xmm1,%xmm11 + pxor %xmm14,%xmm0 + pxor %xmm9,%xmm1 + pxor %xmm6,%xmm12 + + pxor %xmm14,%xmm5 + pxor %xmm13,%xmm3 + pxor %xmm13,%xmm1 + pxor %xmm10,%xmm6 + pxor %xmm11,%xmm2 + pxor %xmm14,%xmm1 + pxor %xmm14,%xmm6 + pxor %xmm12,%xmm4 + pshufd $147,%xmm15,%xmm7 + pshufd $147,%xmm0,%xmm8 + pxor %xmm7,%xmm15 + pshufd $147,%xmm5,%xmm9 + pxor %xmm8,%xmm0 + pshufd $147,%xmm3,%xmm10 + pxor %xmm9,%xmm5 + pshufd $147,%xmm1,%xmm11 + pxor %xmm10,%xmm3 + pshufd $147,%xmm6,%xmm12 + pxor %xmm11,%xmm1 + pshufd $147,%xmm2,%xmm13 + pxor %xmm12,%xmm6 + pshufd $147,%xmm4,%xmm14 + pxor %xmm13,%xmm2 + pxor %xmm14,%xmm4 + + pxor %xmm15,%xmm8 + pxor %xmm4,%xmm7 + pxor %xmm4,%xmm8 + pshufd $78,%xmm15,%xmm15 + pxor %xmm0,%xmm9 + pshufd $78,%xmm0,%xmm0 + pxor %xmm1,%xmm12 + pxor %xmm7,%xmm15 + pxor %xmm6,%xmm13 + pxor %xmm8,%xmm0 + pxor %xmm3,%xmm11 + pshufd $78,%xmm1,%xmm7 + pxor %xmm2,%xmm14 + pshufd $78,%xmm6,%xmm8 + pxor %xmm5,%xmm10 + pshufd $78,%xmm3,%xmm1 + pxor %xmm4,%xmm10 + pshufd $78,%xmm4,%xmm6 + pxor %xmm4,%xmm11 + pshufd $78,%xmm2,%xmm3 + pxor %xmm11,%xmm7 + pshufd $78,%xmm5,%xmm2 + pxor %xmm12,%xmm8 + pxor %xmm1,%xmm10 + pxor %xmm14,%xmm6 + pxor %xmm3,%xmm13 + movdqa %xmm7,%xmm3 + pxor %xmm9,%xmm2 + movdqa %xmm13,%xmm5 + movdqa %xmm8,%xmm4 + movdqa %xmm2,%xmm1 + movdqa %xmm10,%xmm2 + movdqa -16(%r11),%xmm7 + jnz L$dec_loop + movdqa -32(%r11),%xmm7 + jmp L$dec_loop +.p2align 4 +L$dec_done: + movdqa 0(%r11),%xmm7 + movdqa 16(%r11),%xmm8 + movdqa %xmm2,%xmm9 + psrlq $1,%xmm2 + movdqa %xmm1,%xmm10 + psrlq $1,%xmm1 + pxor %xmm4,%xmm2 + pxor %xmm6,%xmm1 + pand %xmm7,%xmm2 + pand %xmm7,%xmm1 + pxor %xmm2,%xmm4 + psllq $1,%xmm2 + pxor %xmm1,%xmm6 + psllq $1,%xmm1 + pxor %xmm9,%xmm2 + pxor %xmm10,%xmm1 + movdqa %xmm5,%xmm9 + psrlq $1,%xmm5 + movdqa %xmm15,%xmm10 + psrlq $1,%xmm15 + pxor %xmm3,%xmm5 + pxor %xmm0,%xmm15 + pand %xmm7,%xmm5 + pand %xmm7,%xmm15 + pxor %xmm5,%xmm3 + psllq $1,%xmm5 + pxor %xmm15,%xmm0 + psllq $1,%xmm15 + pxor %xmm9,%xmm5 + pxor %xmm10,%xmm15 + movdqa 32(%r11),%xmm7 + movdqa %xmm6,%xmm9 + psrlq $2,%xmm6 + movdqa %xmm1,%xmm10 + psrlq $2,%xmm1 + pxor %xmm4,%xmm6 + pxor %xmm2,%xmm1 + pand %xmm8,%xmm6 + pand %xmm8,%xmm1 + pxor %xmm6,%xmm4 + psllq $2,%xmm6 + pxor %xmm1,%xmm2 + psllq $2,%xmm1 + pxor %xmm9,%xmm6 + pxor %xmm10,%xmm1 + movdqa %xmm0,%xmm9 + psrlq $2,%xmm0 + movdqa %xmm15,%xmm10 + psrlq $2,%xmm15 + pxor %xmm3,%xmm0 + pxor %xmm5,%xmm15 + pand %xmm8,%xmm0 + pand %xmm8,%xmm15 + pxor %xmm0,%xmm3 + psllq $2,%xmm0 + pxor %xmm15,%xmm5 + psllq $2,%xmm15 + pxor %xmm9,%xmm0 + pxor %xmm10,%xmm15 + movdqa %xmm3,%xmm9 + psrlq $4,%xmm3 + movdqa %xmm5,%xmm10 + psrlq $4,%xmm5 + pxor %xmm4,%xmm3 + pxor %xmm2,%xmm5 + pand %xmm7,%xmm3 + pand %xmm7,%xmm5 + pxor %xmm3,%xmm4 + psllq $4,%xmm3 + pxor %xmm5,%xmm2 + psllq $4,%xmm5 + pxor %xmm9,%xmm3 + pxor %xmm10,%xmm5 + movdqa %xmm0,%xmm9 + psrlq $4,%xmm0 + movdqa %xmm15,%xmm10 + psrlq $4,%xmm15 + pxor %xmm6,%xmm0 + pxor %xmm1,%xmm15 + pand %xmm7,%xmm0 + pand %xmm7,%xmm15 + pxor %xmm0,%xmm6 + psllq $4,%xmm0 + pxor %xmm15,%xmm1 + psllq $4,%xmm15 + pxor %xmm9,%xmm0 + pxor %xmm10,%xmm15 + movdqa (%rax),%xmm7 + pxor %xmm7,%xmm5 + pxor %xmm7,%xmm3 + pxor %xmm7,%xmm1 + pxor %xmm7,%xmm6 + pxor %xmm7,%xmm2 + pxor %xmm7,%xmm4 + pxor %xmm7,%xmm15 + pxor %xmm7,%xmm0 + .byte 0xf3,0xc3 + + +.p2align 4 +_bsaes_key_convert: + leaq L$masks(%rip),%r11 + movdqu (%rcx),%xmm7 + leaq 16(%rcx),%rcx + movdqa 0(%r11),%xmm0 + movdqa 16(%r11),%xmm1 + movdqa 32(%r11),%xmm2 + movdqa 48(%r11),%xmm3 + movdqa 64(%r11),%xmm4 + pcmpeqd %xmm5,%xmm5 + + movdqu (%rcx),%xmm6 + movdqa %xmm7,(%rax) + leaq 16(%rax),%rax + decl %r10d + jmp L$key_loop +.p2align 4 +L$key_loop: +.byte 102,15,56,0,244 + + movdqa %xmm0,%xmm8 + movdqa %xmm1,%xmm9 + + pand %xmm6,%xmm8 + pand %xmm6,%xmm9 + movdqa %xmm2,%xmm10 + pcmpeqb %xmm0,%xmm8 + psllq $4,%xmm0 + movdqa %xmm3,%xmm11 + pcmpeqb %xmm1,%xmm9 + psllq $4,%xmm1 + + pand %xmm6,%xmm10 + pand %xmm6,%xmm11 + movdqa %xmm0,%xmm12 + pcmpeqb %xmm2,%xmm10 + psllq $4,%xmm2 + movdqa %xmm1,%xmm13 + pcmpeqb %xmm3,%xmm11 + psllq $4,%xmm3 + + movdqa %xmm2,%xmm14 + movdqa %xmm3,%xmm15 + pxor %xmm5,%xmm8 + pxor %xmm5,%xmm9 + + pand %xmm6,%xmm12 + pand %xmm6,%xmm13 + movdqa %xmm8,0(%rax) + pcmpeqb %xmm0,%xmm12 + psrlq $4,%xmm0 + movdqa %xmm9,16(%rax) + pcmpeqb %xmm1,%xmm13 + psrlq $4,%xmm1 + leaq 16(%rcx),%rcx + + pand %xmm6,%xmm14 + pand %xmm6,%xmm15 + movdqa %xmm10,32(%rax) + pcmpeqb %xmm2,%xmm14 + psrlq $4,%xmm2 + movdqa %xmm11,48(%rax) + pcmpeqb %xmm3,%xmm15 + psrlq $4,%xmm3 + movdqu (%rcx),%xmm6 + + pxor %xmm5,%xmm13 + pxor %xmm5,%xmm14 + movdqa %xmm12,64(%rax) + movdqa %xmm13,80(%rax) + movdqa %xmm14,96(%rax) + movdqa %xmm15,112(%rax) + leaq 128(%rax),%rax + decl %r10d + jnz L$key_loop + + movdqa 80(%r11),%xmm7 + + .byte 0xf3,0xc3 + + +.globl _bsaes_cbc_encrypt + +.p2align 4 +_bsaes_cbc_encrypt: + cmpl $0,%r9d + jne _asm_AES_cbc_encrypt + cmpq $128,%rdx + jb _asm_AES_cbc_encrypt + + movq %rsp,%rax +L$cbc_dec_prologue: + pushq %rbp + pushq %rbx + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + leaq -72(%rsp),%rsp + movq %rsp,%rbp + movl 240(%rcx),%eax + movq %rdi,%r12 + movq %rsi,%r13 + movq %rdx,%r14 + movq %rcx,%r15 + movq %r8,%rbx + shrq $4,%r14 + + movl %eax,%edx + shlq $7,%rax + subq $96,%rax + subq %rax,%rsp + + movq %rsp,%rax + movq %r15,%rcx + movl %edx,%r10d + call _bsaes_key_convert + pxor (%rsp),%xmm7 + movdqa %xmm6,(%rax) + movdqa %xmm7,(%rsp) + + movdqu (%rbx),%xmm14 + subq $8,%r14 +L$cbc_dec_loop: + movdqu 0(%r12),%xmm15 + movdqu 16(%r12),%xmm0 + movdqu 32(%r12),%xmm1 + movdqu 48(%r12),%xmm2 + movdqu 64(%r12),%xmm3 + movdqu 80(%r12),%xmm4 + movq %rsp,%rax + movdqu 96(%r12),%xmm5 + movl %edx,%r10d + movdqu 112(%r12),%xmm6 + movdqa %xmm14,32(%rbp) + + call _bsaes_decrypt8 + + pxor 32(%rbp),%xmm15 + movdqu 0(%r12),%xmm7 + movdqu 16(%r12),%xmm8 + pxor %xmm7,%xmm0 + movdqu 32(%r12),%xmm9 + pxor %xmm8,%xmm5 + movdqu 48(%r12),%xmm10 + pxor %xmm9,%xmm3 + movdqu 64(%r12),%xmm11 + pxor %xmm10,%xmm1 + movdqu 80(%r12),%xmm12 + pxor %xmm11,%xmm6 + movdqu 96(%r12),%xmm13 + pxor %xmm12,%xmm2 + movdqu 112(%r12),%xmm14 + pxor %xmm13,%xmm4 + movdqu %xmm15,0(%r13) + leaq 128(%r12),%r12 + movdqu %xmm0,16(%r13) + movdqu %xmm5,32(%r13) + movdqu %xmm3,48(%r13) + movdqu %xmm1,64(%r13) + movdqu %xmm6,80(%r13) + movdqu %xmm2,96(%r13) + movdqu %xmm4,112(%r13) + leaq 128(%r13),%r13 + subq $8,%r14 + jnc L$cbc_dec_loop + + addq $8,%r14 + jz L$cbc_dec_done + + movdqu 0(%r12),%xmm15 + movq %rsp,%rax + movl %edx,%r10d + cmpq $2,%r14 + jb L$cbc_dec_one + movdqu 16(%r12),%xmm0 + je L$cbc_dec_two + movdqu 32(%r12),%xmm1 + cmpq $4,%r14 + jb L$cbc_dec_three + movdqu 48(%r12),%xmm2 + je L$cbc_dec_four + movdqu 64(%r12),%xmm3 + cmpq $6,%r14 + jb L$cbc_dec_five + movdqu 80(%r12),%xmm4 + je L$cbc_dec_six + movdqu 96(%r12),%xmm5 + movdqa %xmm14,32(%rbp) + call _bsaes_decrypt8 + pxor 32(%rbp),%xmm15 + movdqu 0(%r12),%xmm7 + movdqu 16(%r12),%xmm8 + pxor %xmm7,%xmm0 + movdqu 32(%r12),%xmm9 + pxor %xmm8,%xmm5 + movdqu 48(%r12),%xmm10 + pxor %xmm9,%xmm3 + movdqu 64(%r12),%xmm11 + pxor %xmm10,%xmm1 + movdqu 80(%r12),%xmm12 + pxor %xmm11,%xmm6 + movdqu 96(%r12),%xmm14 + pxor %xmm12,%xmm2 + movdqu %xmm15,0(%r13) + movdqu %xmm0,16(%r13) + movdqu %xmm5,32(%r13) + movdqu %xmm3,48(%r13) + movdqu %xmm1,64(%r13) + movdqu %xmm6,80(%r13) + movdqu %xmm2,96(%r13) + jmp L$cbc_dec_done +.p2align 4 +L$cbc_dec_six: + movdqa %xmm14,32(%rbp) + call _bsaes_decrypt8 + pxor 32(%rbp),%xmm15 + movdqu 0(%r12),%xmm7 + movdqu 16(%r12),%xmm8 + pxor %xmm7,%xmm0 + movdqu 32(%r12),%xmm9 + pxor %xmm8,%xmm5 + movdqu 48(%r12),%xmm10 + pxor %xmm9,%xmm3 + movdqu 64(%r12),%xmm11 + pxor %xmm10,%xmm1 + movdqu 80(%r12),%xmm14 + pxor %xmm11,%xmm6 + movdqu %xmm15,0(%r13) + movdqu %xmm0,16(%r13) + movdqu %xmm5,32(%r13) + movdqu %xmm3,48(%r13) + movdqu %xmm1,64(%r13) + movdqu %xmm6,80(%r13) + jmp L$cbc_dec_done +.p2align 4 +L$cbc_dec_five: + movdqa %xmm14,32(%rbp) + call _bsaes_decrypt8 + pxor 32(%rbp),%xmm15 + movdqu 0(%r12),%xmm7 + movdqu 16(%r12),%xmm8 + pxor %xmm7,%xmm0 + movdqu 32(%r12),%xmm9 + pxor %xmm8,%xmm5 + movdqu 48(%r12),%xmm10 + pxor %xmm9,%xmm3 + movdqu 64(%r12),%xmm14 + pxor %xmm10,%xmm1 + movdqu %xmm15,0(%r13) + movdqu %xmm0,16(%r13) + movdqu %xmm5,32(%r13) + movdqu %xmm3,48(%r13) + movdqu %xmm1,64(%r13) + jmp L$cbc_dec_done +.p2align 4 +L$cbc_dec_four: + movdqa %xmm14,32(%rbp) + call _bsaes_decrypt8 + pxor 32(%rbp),%xmm15 + movdqu 0(%r12),%xmm7 + movdqu 16(%r12),%xmm8 + pxor %xmm7,%xmm0 + movdqu 32(%r12),%xmm9 + pxor %xmm8,%xmm5 + movdqu 48(%r12),%xmm14 + pxor %xmm9,%xmm3 + movdqu %xmm15,0(%r13) + movdqu %xmm0,16(%r13) + movdqu %xmm5,32(%r13) + movdqu %xmm3,48(%r13) + jmp L$cbc_dec_done +.p2align 4 +L$cbc_dec_three: + movdqa %xmm14,32(%rbp) + call _bsaes_decrypt8 + pxor 32(%rbp),%xmm15 + movdqu 0(%r12),%xmm7 + movdqu 16(%r12),%xmm8 + pxor %xmm7,%xmm0 + movdqu 32(%r12),%xmm14 + pxor %xmm8,%xmm5 + movdqu %xmm15,0(%r13) + movdqu %xmm0,16(%r13) + movdqu %xmm5,32(%r13) + jmp L$cbc_dec_done +.p2align 4 +L$cbc_dec_two: + movdqa %xmm14,32(%rbp) + call _bsaes_decrypt8 + pxor 32(%rbp),%xmm15 + movdqu 0(%r12),%xmm7 + movdqu 16(%r12),%xmm14 + pxor %xmm7,%xmm0 + movdqu %xmm15,0(%r13) + movdqu %xmm0,16(%r13) + jmp L$cbc_dec_done +.p2align 4 +L$cbc_dec_one: + leaq (%r12),%rdi + leaq 32(%rbp),%rsi + leaq (%r15),%rdx + call _asm_AES_decrypt + pxor 32(%rbp),%xmm14 + movdqu %xmm14,(%r13) + movdqa %xmm15,%xmm14 + +L$cbc_dec_done: + movdqu %xmm14,(%rbx) + leaq (%rsp),%rax + pxor %xmm0,%xmm0 +L$cbc_dec_bzero: + movdqa %xmm0,0(%rax) + movdqa %xmm0,16(%rax) + leaq 32(%rax),%rax + cmpq %rax,%rbp + ja L$cbc_dec_bzero + + leaq (%rbp),%rsp + movq 72(%rsp),%r15 + movq 80(%rsp),%r14 + movq 88(%rsp),%r13 + movq 96(%rsp),%r12 + movq 104(%rsp),%rbx + movq 112(%rsp),%rax + leaq 120(%rsp),%rsp + movq %rax,%rbp +L$cbc_dec_epilogue: + .byte 0xf3,0xc3 + + +.globl _bsaes_ctr32_encrypt_blocks + +.p2align 4 +_bsaes_ctr32_encrypt_blocks: + movq %rsp,%rax +L$ctr_enc_prologue: + pushq %rbp + pushq %rbx + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + leaq -72(%rsp),%rsp + movq %rsp,%rbp + movdqu (%r8),%xmm0 + movl 240(%rcx),%eax + movq %rdi,%r12 + movq %rsi,%r13 + movq %rdx,%r14 + movq %rcx,%r15 + movdqa %xmm0,32(%rbp) + cmpq $8,%rdx + jb L$ctr_enc_short + + movl %eax,%ebx + shlq $7,%rax + subq $96,%rax + subq %rax,%rsp + + movq %rsp,%rax + movq %r15,%rcx + movl %ebx,%r10d + call _bsaes_key_convert + pxor %xmm6,%xmm7 + movdqa %xmm7,(%rax) + + movdqa (%rsp),%xmm8 + leaq L$ADD1(%rip),%r11 + movdqa 32(%rbp),%xmm15 + movdqa -32(%r11),%xmm7 +.byte 102,68,15,56,0,199 +.byte 102,68,15,56,0,255 + movdqa %xmm8,(%rsp) + jmp L$ctr_enc_loop +.p2align 4 +L$ctr_enc_loop: + movdqa %xmm15,32(%rbp) + movdqa %xmm15,%xmm0 + movdqa %xmm15,%xmm1 + paddd 0(%r11),%xmm0 + movdqa %xmm15,%xmm2 + paddd 16(%r11),%xmm1 + movdqa %xmm15,%xmm3 + paddd 32(%r11),%xmm2 + movdqa %xmm15,%xmm4 + paddd 48(%r11),%xmm3 + movdqa %xmm15,%xmm5 + paddd 64(%r11),%xmm4 + movdqa %xmm15,%xmm6 + paddd 80(%r11),%xmm5 + paddd 96(%r11),%xmm6 + + + + movdqa (%rsp),%xmm8 + leaq 16(%rsp),%rax + movdqa -16(%r11),%xmm7 + pxor %xmm8,%xmm15 + pxor %xmm8,%xmm0 + pxor %xmm8,%xmm1 + pxor %xmm8,%xmm2 +.byte 102,68,15,56,0,255 +.byte 102,15,56,0,199 + pxor %xmm8,%xmm3 + pxor %xmm8,%xmm4 +.byte 102,15,56,0,207 +.byte 102,15,56,0,215 + pxor %xmm8,%xmm5 + pxor %xmm8,%xmm6 +.byte 102,15,56,0,223 +.byte 102,15,56,0,231 +.byte 102,15,56,0,239 +.byte 102,15,56,0,247 + leaq L$BS0(%rip),%r11 + movl %ebx,%r10d + + call _bsaes_encrypt8_bitslice + + subq $8,%r14 + jc L$ctr_enc_loop_done + + movdqu 0(%r12),%xmm7 + movdqu 16(%r12),%xmm8 + movdqu 32(%r12),%xmm9 + movdqu 48(%r12),%xmm10 + movdqu 64(%r12),%xmm11 + movdqu 80(%r12),%xmm12 + movdqu 96(%r12),%xmm13 + movdqu 112(%r12),%xmm14 + leaq 128(%r12),%r12 + pxor %xmm15,%xmm7 + movdqa 32(%rbp),%xmm15 + pxor %xmm8,%xmm0 + movdqu %xmm7,0(%r13) + pxor %xmm9,%xmm3 + movdqu %xmm0,16(%r13) + pxor %xmm10,%xmm5 + movdqu %xmm3,32(%r13) + pxor %xmm11,%xmm2 + movdqu %xmm5,48(%r13) + pxor %xmm12,%xmm6 + movdqu %xmm2,64(%r13) + pxor %xmm13,%xmm1 + movdqu %xmm6,80(%r13) + pxor %xmm14,%xmm4 + movdqu %xmm1,96(%r13) + leaq L$ADD1(%rip),%r11 + movdqu %xmm4,112(%r13) + leaq 128(%r13),%r13 + paddd 112(%r11),%xmm15 + jnz L$ctr_enc_loop + + jmp L$ctr_enc_done +.p2align 4 +L$ctr_enc_loop_done: + addq $8,%r14 + movdqu 0(%r12),%xmm7 + pxor %xmm7,%xmm15 + movdqu %xmm15,0(%r13) + cmpq $2,%r14 + jb L$ctr_enc_done + movdqu 16(%r12),%xmm8 + pxor %xmm8,%xmm0 + movdqu %xmm0,16(%r13) + je L$ctr_enc_done + movdqu 32(%r12),%xmm9 + pxor %xmm9,%xmm3 + movdqu %xmm3,32(%r13) + cmpq $4,%r14 + jb L$ctr_enc_done + movdqu 48(%r12),%xmm10 + pxor %xmm10,%xmm5 + movdqu %xmm5,48(%r13) + je L$ctr_enc_done + movdqu 64(%r12),%xmm11 + pxor %xmm11,%xmm2 + movdqu %xmm2,64(%r13) + cmpq $6,%r14 + jb L$ctr_enc_done + movdqu 80(%r12),%xmm12 + pxor %xmm12,%xmm6 + movdqu %xmm6,80(%r13) + je L$ctr_enc_done + movdqu 96(%r12),%xmm13 + pxor %xmm13,%xmm1 + movdqu %xmm1,96(%r13) + jmp L$ctr_enc_done + +.p2align 4 +L$ctr_enc_short: + leaq 32(%rbp),%rdi + leaq 48(%rbp),%rsi + leaq (%r15),%rdx + call _asm_AES_encrypt + movdqu (%r12),%xmm0 + leaq 16(%r12),%r12 + movl 44(%rbp),%eax + bswapl %eax + pxor 48(%rbp),%xmm0 + incl %eax + movdqu %xmm0,(%r13) + bswapl %eax + leaq 16(%r13),%r13 + movl %eax,44(%rsp) + decq %r14 + jnz L$ctr_enc_short + +L$ctr_enc_done: + leaq (%rsp),%rax + pxor %xmm0,%xmm0 +L$ctr_enc_bzero: + movdqa %xmm0,0(%rax) + movdqa %xmm0,16(%rax) + leaq 32(%rax),%rax + cmpq %rax,%rbp + ja L$ctr_enc_bzero + + leaq (%rbp),%rsp + movq 72(%rsp),%r15 + movq 80(%rsp),%r14 + movq 88(%rsp),%r13 + movq 96(%rsp),%r12 + movq 104(%rsp),%rbx + movq 112(%rsp),%rax + leaq 120(%rsp),%rsp + movq %rax,%rbp +L$ctr_enc_epilogue: + .byte 0xf3,0xc3 + +.globl _bsaes_xts_encrypt + +.p2align 4 +_bsaes_xts_encrypt: + movq %rsp,%rax +L$xts_enc_prologue: + pushq %rbp + pushq %rbx + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + leaq -72(%rsp),%rsp + movq %rsp,%rbp + movq %rdi,%r12 + movq %rsi,%r13 + movq %rdx,%r14 + movq %rcx,%r15 + + leaq (%r9),%rdi + leaq 32(%rbp),%rsi + leaq (%r8),%rdx + call _asm_AES_encrypt + + movl 240(%r15),%eax + movq %r14,%rbx + + movl %eax,%edx + shlq $7,%rax + subq $96,%rax + subq %rax,%rsp + + movq %rsp,%rax + movq %r15,%rcx + movl %edx,%r10d + call _bsaes_key_convert + pxor %xmm6,%xmm7 + movdqa %xmm7,(%rax) + + andq $-16,%r14 + subq $128,%rsp + movdqa 32(%rbp),%xmm6 + + pxor %xmm14,%xmm14 + movdqa L$xts_magic(%rip),%xmm12 + pcmpgtd %xmm6,%xmm14 + + subq $128,%r14 + jc L$xts_enc_short + jmp L$xts_enc_loop + +.p2align 4 +L$xts_enc_loop: + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm15 + movdqa %xmm6,0(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm0 + movdqa %xmm6,16(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + movdqu 0(%r12),%xmm7 + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm1 + movdqa %xmm6,32(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + movdqu 16(%r12),%xmm8 + pxor %xmm7,%xmm15 + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm2 + movdqa %xmm6,48(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + movdqu 32(%r12),%xmm9 + pxor %xmm8,%xmm0 + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm3 + movdqa %xmm6,64(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + movdqu 48(%r12),%xmm10 + pxor %xmm9,%xmm1 + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm4 + movdqa %xmm6,80(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + movdqu 64(%r12),%xmm11 + pxor %xmm10,%xmm2 + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm5 + movdqa %xmm6,96(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + movdqu 80(%r12),%xmm12 + pxor %xmm11,%xmm3 + movdqu 96(%r12),%xmm13 + pxor %xmm12,%xmm4 + movdqu 112(%r12),%xmm14 + leaq 128(%r12),%r12 + movdqa %xmm6,112(%rsp) + pxor %xmm13,%xmm5 + leaq 128(%rsp),%rax + pxor %xmm14,%xmm6 + movl %edx,%r10d + + call _bsaes_encrypt8 + + pxor 0(%rsp),%xmm15 + pxor 16(%rsp),%xmm0 + movdqu %xmm15,0(%r13) + pxor 32(%rsp),%xmm3 + movdqu %xmm0,16(%r13) + pxor 48(%rsp),%xmm5 + movdqu %xmm3,32(%r13) + pxor 64(%rsp),%xmm2 + movdqu %xmm5,48(%r13) + pxor 80(%rsp),%xmm6 + movdqu %xmm2,64(%r13) + pxor 96(%rsp),%xmm1 + movdqu %xmm6,80(%r13) + pxor 112(%rsp),%xmm4 + movdqu %xmm1,96(%r13) + movdqu %xmm4,112(%r13) + leaq 128(%r13),%r13 + + movdqa 112(%rsp),%xmm6 + pxor %xmm14,%xmm14 + movdqa L$xts_magic(%rip),%xmm12 + pcmpgtd %xmm6,%xmm14 + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + + subq $128,%r14 + jnc L$xts_enc_loop + +L$xts_enc_short: + addq $128,%r14 + jz L$xts_enc_done + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm15 + movdqa %xmm6,0(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm0 + movdqa %xmm6,16(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + movdqu 0(%r12),%xmm7 + cmpq $16,%r14 + je L$xts_enc_1 + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm1 + movdqa %xmm6,32(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + movdqu 16(%r12),%xmm8 + cmpq $32,%r14 + je L$xts_enc_2 + pxor %xmm7,%xmm15 + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm2 + movdqa %xmm6,48(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + movdqu 32(%r12),%xmm9 + cmpq $48,%r14 + je L$xts_enc_3 + pxor %xmm8,%xmm0 + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm3 + movdqa %xmm6,64(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + movdqu 48(%r12),%xmm10 + cmpq $64,%r14 + je L$xts_enc_4 + pxor %xmm9,%xmm1 + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm4 + movdqa %xmm6,80(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + movdqu 64(%r12),%xmm11 + cmpq $80,%r14 + je L$xts_enc_5 + pxor %xmm10,%xmm2 + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm5 + movdqa %xmm6,96(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + movdqu 80(%r12),%xmm12 + cmpq $96,%r14 + je L$xts_enc_6 + pxor %xmm11,%xmm3 + movdqu 96(%r12),%xmm13 + pxor %xmm12,%xmm4 + movdqa %xmm6,112(%rsp) + leaq 112(%r12),%r12 + pxor %xmm13,%xmm5 + leaq 128(%rsp),%rax + movl %edx,%r10d + + call _bsaes_encrypt8 + + pxor 0(%rsp),%xmm15 + pxor 16(%rsp),%xmm0 + movdqu %xmm15,0(%r13) + pxor 32(%rsp),%xmm3 + movdqu %xmm0,16(%r13) + pxor 48(%rsp),%xmm5 + movdqu %xmm3,32(%r13) + pxor 64(%rsp),%xmm2 + movdqu %xmm5,48(%r13) + pxor 80(%rsp),%xmm6 + movdqu %xmm2,64(%r13) + pxor 96(%rsp),%xmm1 + movdqu %xmm6,80(%r13) + movdqu %xmm1,96(%r13) + leaq 112(%r13),%r13 + + movdqa 112(%rsp),%xmm6 + jmp L$xts_enc_done +.p2align 4 +L$xts_enc_6: + pxor %xmm11,%xmm3 + leaq 96(%r12),%r12 + pxor %xmm12,%xmm4 + leaq 128(%rsp),%rax + movl %edx,%r10d + + call _bsaes_encrypt8 + + pxor 0(%rsp),%xmm15 + pxor 16(%rsp),%xmm0 + movdqu %xmm15,0(%r13) + pxor 32(%rsp),%xmm3 + movdqu %xmm0,16(%r13) + pxor 48(%rsp),%xmm5 + movdqu %xmm3,32(%r13) + pxor 64(%rsp),%xmm2 + movdqu %xmm5,48(%r13) + pxor 80(%rsp),%xmm6 + movdqu %xmm2,64(%r13) + movdqu %xmm6,80(%r13) + leaq 96(%r13),%r13 + + movdqa 96(%rsp),%xmm6 + jmp L$xts_enc_done +.p2align 4 +L$xts_enc_5: + pxor %xmm10,%xmm2 + leaq 80(%r12),%r12 + pxor %xmm11,%xmm3 + leaq 128(%rsp),%rax + movl %edx,%r10d + + call _bsaes_encrypt8 + + pxor 0(%rsp),%xmm15 + pxor 16(%rsp),%xmm0 + movdqu %xmm15,0(%r13) + pxor 32(%rsp),%xmm3 + movdqu %xmm0,16(%r13) + pxor 48(%rsp),%xmm5 + movdqu %xmm3,32(%r13) + pxor 64(%rsp),%xmm2 + movdqu %xmm5,48(%r13) + movdqu %xmm2,64(%r13) + leaq 80(%r13),%r13 + + movdqa 80(%rsp),%xmm6 + jmp L$xts_enc_done +.p2align 4 +L$xts_enc_4: + pxor %xmm9,%xmm1 + leaq 64(%r12),%r12 + pxor %xmm10,%xmm2 + leaq 128(%rsp),%rax + movl %edx,%r10d + + call _bsaes_encrypt8 + + pxor 0(%rsp),%xmm15 + pxor 16(%rsp),%xmm0 + movdqu %xmm15,0(%r13) + pxor 32(%rsp),%xmm3 + movdqu %xmm0,16(%r13) + pxor 48(%rsp),%xmm5 + movdqu %xmm3,32(%r13) + movdqu %xmm5,48(%r13) + leaq 64(%r13),%r13 + + movdqa 64(%rsp),%xmm6 + jmp L$xts_enc_done +.p2align 4 +L$xts_enc_3: + pxor %xmm8,%xmm0 + leaq 48(%r12),%r12 + pxor %xmm9,%xmm1 + leaq 128(%rsp),%rax + movl %edx,%r10d + + call _bsaes_encrypt8 + + pxor 0(%rsp),%xmm15 + pxor 16(%rsp),%xmm0 + movdqu %xmm15,0(%r13) + pxor 32(%rsp),%xmm3 + movdqu %xmm0,16(%r13) + movdqu %xmm3,32(%r13) + leaq 48(%r13),%r13 + + movdqa 48(%rsp),%xmm6 + jmp L$xts_enc_done +.p2align 4 +L$xts_enc_2: + pxor %xmm7,%xmm15 + leaq 32(%r12),%r12 + pxor %xmm8,%xmm0 + leaq 128(%rsp),%rax + movl %edx,%r10d + + call _bsaes_encrypt8 + + pxor 0(%rsp),%xmm15 + pxor 16(%rsp),%xmm0 + movdqu %xmm15,0(%r13) + movdqu %xmm0,16(%r13) + leaq 32(%r13),%r13 + + movdqa 32(%rsp),%xmm6 + jmp L$xts_enc_done +.p2align 4 +L$xts_enc_1: + pxor %xmm15,%xmm7 + leaq 16(%r12),%r12 + movdqa %xmm7,32(%rbp) + leaq 32(%rbp),%rdi + leaq 32(%rbp),%rsi + leaq (%r15),%rdx + call _asm_AES_encrypt + pxor 32(%rbp),%xmm15 + + + + + + movdqu %xmm15,0(%r13) + leaq 16(%r13),%r13 + + movdqa 16(%rsp),%xmm6 + +L$xts_enc_done: + andl $15,%ebx + jz L$xts_enc_ret + movq %r13,%rdx + +L$xts_enc_steal: + movzbl (%r12),%eax + movzbl -16(%rdx),%ecx + leaq 1(%r12),%r12 + movb %al,-16(%rdx) + movb %cl,0(%rdx) + leaq 1(%rdx),%rdx + subl $1,%ebx + jnz L$xts_enc_steal + + movdqu -16(%r13),%xmm15 + leaq 32(%rbp),%rdi + pxor %xmm6,%xmm15 + leaq 32(%rbp),%rsi + movdqa %xmm15,32(%rbp) + leaq (%r15),%rdx + call _asm_AES_encrypt + pxor 32(%rbp),%xmm6 + movdqu %xmm6,-16(%r13) + +L$xts_enc_ret: + leaq (%rsp),%rax + pxor %xmm0,%xmm0 +L$xts_enc_bzero: + movdqa %xmm0,0(%rax) + movdqa %xmm0,16(%rax) + leaq 32(%rax),%rax + cmpq %rax,%rbp + ja L$xts_enc_bzero + + leaq (%rbp),%rsp + movq 72(%rsp),%r15 + movq 80(%rsp),%r14 + movq 88(%rsp),%r13 + movq 96(%rsp),%r12 + movq 104(%rsp),%rbx + movq 112(%rsp),%rax + leaq 120(%rsp),%rsp + movq %rax,%rbp +L$xts_enc_epilogue: + .byte 0xf3,0xc3 + + +.globl _bsaes_xts_decrypt + +.p2align 4 +_bsaes_xts_decrypt: + movq %rsp,%rax +L$xts_dec_prologue: + pushq %rbp + pushq %rbx + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + leaq -72(%rsp),%rsp + movq %rsp,%rbp + movq %rdi,%r12 + movq %rsi,%r13 + movq %rdx,%r14 + movq %rcx,%r15 + + leaq (%r9),%rdi + leaq 32(%rbp),%rsi + leaq (%r8),%rdx + call _asm_AES_encrypt + + movl 240(%r15),%eax + movq %r14,%rbx + + movl %eax,%edx + shlq $7,%rax + subq $96,%rax + subq %rax,%rsp + + movq %rsp,%rax + movq %r15,%rcx + movl %edx,%r10d + call _bsaes_key_convert + pxor (%rsp),%xmm7 + movdqa %xmm6,(%rax) + movdqa %xmm7,(%rsp) + + xorl %eax,%eax + andq $-16,%r14 + testl $15,%ebx + setnz %al + shlq $4,%rax + subq %rax,%r14 + + subq $128,%rsp + movdqa 32(%rbp),%xmm6 + + pxor %xmm14,%xmm14 + movdqa L$xts_magic(%rip),%xmm12 + pcmpgtd %xmm6,%xmm14 + + subq $128,%r14 + jc L$xts_dec_short + jmp L$xts_dec_loop + +.p2align 4 +L$xts_dec_loop: + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm15 + movdqa %xmm6,0(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm0 + movdqa %xmm6,16(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + movdqu 0(%r12),%xmm7 + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm1 + movdqa %xmm6,32(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + movdqu 16(%r12),%xmm8 + pxor %xmm7,%xmm15 + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm2 + movdqa %xmm6,48(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + movdqu 32(%r12),%xmm9 + pxor %xmm8,%xmm0 + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm3 + movdqa %xmm6,64(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + movdqu 48(%r12),%xmm10 + pxor %xmm9,%xmm1 + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm4 + movdqa %xmm6,80(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + movdqu 64(%r12),%xmm11 + pxor %xmm10,%xmm2 + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm5 + movdqa %xmm6,96(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + movdqu 80(%r12),%xmm12 + pxor %xmm11,%xmm3 + movdqu 96(%r12),%xmm13 + pxor %xmm12,%xmm4 + movdqu 112(%r12),%xmm14 + leaq 128(%r12),%r12 + movdqa %xmm6,112(%rsp) + pxor %xmm13,%xmm5 + leaq 128(%rsp),%rax + pxor %xmm14,%xmm6 + movl %edx,%r10d + + call _bsaes_decrypt8 + + pxor 0(%rsp),%xmm15 + pxor 16(%rsp),%xmm0 + movdqu %xmm15,0(%r13) + pxor 32(%rsp),%xmm5 + movdqu %xmm0,16(%r13) + pxor 48(%rsp),%xmm3 + movdqu %xmm5,32(%r13) + pxor 64(%rsp),%xmm1 + movdqu %xmm3,48(%r13) + pxor 80(%rsp),%xmm6 + movdqu %xmm1,64(%r13) + pxor 96(%rsp),%xmm2 + movdqu %xmm6,80(%r13) + pxor 112(%rsp),%xmm4 + movdqu %xmm2,96(%r13) + movdqu %xmm4,112(%r13) + leaq 128(%r13),%r13 + + movdqa 112(%rsp),%xmm6 + pxor %xmm14,%xmm14 + movdqa L$xts_magic(%rip),%xmm12 + pcmpgtd %xmm6,%xmm14 + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + + subq $128,%r14 + jnc L$xts_dec_loop + +L$xts_dec_short: + addq $128,%r14 + jz L$xts_dec_done + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm15 + movdqa %xmm6,0(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm0 + movdqa %xmm6,16(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + movdqu 0(%r12),%xmm7 + cmpq $16,%r14 + je L$xts_dec_1 + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm1 + movdqa %xmm6,32(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + movdqu 16(%r12),%xmm8 + cmpq $32,%r14 + je L$xts_dec_2 + pxor %xmm7,%xmm15 + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm2 + movdqa %xmm6,48(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + movdqu 32(%r12),%xmm9 + cmpq $48,%r14 + je L$xts_dec_3 + pxor %xmm8,%xmm0 + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm3 + movdqa %xmm6,64(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + movdqu 48(%r12),%xmm10 + cmpq $64,%r14 + je L$xts_dec_4 + pxor %xmm9,%xmm1 + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm4 + movdqa %xmm6,80(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + movdqu 64(%r12),%xmm11 + cmpq $80,%r14 + je L$xts_dec_5 + pxor %xmm10,%xmm2 + pshufd $19,%xmm14,%xmm13 + pxor %xmm14,%xmm14 + movdqa %xmm6,%xmm5 + movdqa %xmm6,96(%rsp) + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + pcmpgtd %xmm6,%xmm14 + pxor %xmm13,%xmm6 + movdqu 80(%r12),%xmm12 + cmpq $96,%r14 + je L$xts_dec_6 + pxor %xmm11,%xmm3 + movdqu 96(%r12),%xmm13 + pxor %xmm12,%xmm4 + movdqa %xmm6,112(%rsp) + leaq 112(%r12),%r12 + pxor %xmm13,%xmm5 + leaq 128(%rsp),%rax + movl %edx,%r10d + + call _bsaes_decrypt8 + + pxor 0(%rsp),%xmm15 + pxor 16(%rsp),%xmm0 + movdqu %xmm15,0(%r13) + pxor 32(%rsp),%xmm5 + movdqu %xmm0,16(%r13) + pxor 48(%rsp),%xmm3 + movdqu %xmm5,32(%r13) + pxor 64(%rsp),%xmm1 + movdqu %xmm3,48(%r13) + pxor 80(%rsp),%xmm6 + movdqu %xmm1,64(%r13) + pxor 96(%rsp),%xmm2 + movdqu %xmm6,80(%r13) + movdqu %xmm2,96(%r13) + leaq 112(%r13),%r13 + + movdqa 112(%rsp),%xmm6 + jmp L$xts_dec_done +.p2align 4 +L$xts_dec_6: + pxor %xmm11,%xmm3 + leaq 96(%r12),%r12 + pxor %xmm12,%xmm4 + leaq 128(%rsp),%rax + movl %edx,%r10d + + call _bsaes_decrypt8 + + pxor 0(%rsp),%xmm15 + pxor 16(%rsp),%xmm0 + movdqu %xmm15,0(%r13) + pxor 32(%rsp),%xmm5 + movdqu %xmm0,16(%r13) + pxor 48(%rsp),%xmm3 + movdqu %xmm5,32(%r13) + pxor 64(%rsp),%xmm1 + movdqu %xmm3,48(%r13) + pxor 80(%rsp),%xmm6 + movdqu %xmm1,64(%r13) + movdqu %xmm6,80(%r13) + leaq 96(%r13),%r13 + + movdqa 96(%rsp),%xmm6 + jmp L$xts_dec_done +.p2align 4 +L$xts_dec_5: + pxor %xmm10,%xmm2 + leaq 80(%r12),%r12 + pxor %xmm11,%xmm3 + leaq 128(%rsp),%rax + movl %edx,%r10d + + call _bsaes_decrypt8 + + pxor 0(%rsp),%xmm15 + pxor 16(%rsp),%xmm0 + movdqu %xmm15,0(%r13) + pxor 32(%rsp),%xmm5 + movdqu %xmm0,16(%r13) + pxor 48(%rsp),%xmm3 + movdqu %xmm5,32(%r13) + pxor 64(%rsp),%xmm1 + movdqu %xmm3,48(%r13) + movdqu %xmm1,64(%r13) + leaq 80(%r13),%r13 + + movdqa 80(%rsp),%xmm6 + jmp L$xts_dec_done +.p2align 4 +L$xts_dec_4: + pxor %xmm9,%xmm1 + leaq 64(%r12),%r12 + pxor %xmm10,%xmm2 + leaq 128(%rsp),%rax + movl %edx,%r10d + + call _bsaes_decrypt8 + + pxor 0(%rsp),%xmm15 + pxor 16(%rsp),%xmm0 + movdqu %xmm15,0(%r13) + pxor 32(%rsp),%xmm5 + movdqu %xmm0,16(%r13) + pxor 48(%rsp),%xmm3 + movdqu %xmm5,32(%r13) + movdqu %xmm3,48(%r13) + leaq 64(%r13),%r13 + + movdqa 64(%rsp),%xmm6 + jmp L$xts_dec_done +.p2align 4 +L$xts_dec_3: + pxor %xmm8,%xmm0 + leaq 48(%r12),%r12 + pxor %xmm9,%xmm1 + leaq 128(%rsp),%rax + movl %edx,%r10d + + call _bsaes_decrypt8 + + pxor 0(%rsp),%xmm15 + pxor 16(%rsp),%xmm0 + movdqu %xmm15,0(%r13) + pxor 32(%rsp),%xmm5 + movdqu %xmm0,16(%r13) + movdqu %xmm5,32(%r13) + leaq 48(%r13),%r13 + + movdqa 48(%rsp),%xmm6 + jmp L$xts_dec_done +.p2align 4 +L$xts_dec_2: + pxor %xmm7,%xmm15 + leaq 32(%r12),%r12 + pxor %xmm8,%xmm0 + leaq 128(%rsp),%rax + movl %edx,%r10d + + call _bsaes_decrypt8 + + pxor 0(%rsp),%xmm15 + pxor 16(%rsp),%xmm0 + movdqu %xmm15,0(%r13) + movdqu %xmm0,16(%r13) + leaq 32(%r13),%r13 + + movdqa 32(%rsp),%xmm6 + jmp L$xts_dec_done +.p2align 4 +L$xts_dec_1: + pxor %xmm15,%xmm7 + leaq 16(%r12),%r12 + movdqa %xmm7,32(%rbp) + leaq 32(%rbp),%rdi + leaq 32(%rbp),%rsi + leaq (%r15),%rdx + call _asm_AES_decrypt + pxor 32(%rbp),%xmm15 + + + + + + movdqu %xmm15,0(%r13) + leaq 16(%r13),%r13 + + movdqa 16(%rsp),%xmm6 + +L$xts_dec_done: + andl $15,%ebx + jz L$xts_dec_ret + + pxor %xmm14,%xmm14 + movdqa L$xts_magic(%rip),%xmm12 + pcmpgtd %xmm6,%xmm14 + pshufd $19,%xmm14,%xmm13 + movdqa %xmm6,%xmm5 + paddq %xmm6,%xmm6 + pand %xmm12,%xmm13 + movdqu (%r12),%xmm15 + pxor %xmm13,%xmm6 + + leaq 32(%rbp),%rdi + pxor %xmm6,%xmm15 + leaq 32(%rbp),%rsi + movdqa %xmm15,32(%rbp) + leaq (%r15),%rdx + call _asm_AES_decrypt + pxor 32(%rbp),%xmm6 + movq %r13,%rdx + movdqu %xmm6,(%r13) + +L$xts_dec_steal: + movzbl 16(%r12),%eax + movzbl (%rdx),%ecx + leaq 1(%r12),%r12 + movb %al,(%rdx) + movb %cl,16(%rdx) + leaq 1(%rdx),%rdx + subl $1,%ebx + jnz L$xts_dec_steal + + movdqu (%r13),%xmm15 + leaq 32(%rbp),%rdi + pxor %xmm5,%xmm15 + leaq 32(%rbp),%rsi + movdqa %xmm15,32(%rbp) + leaq (%r15),%rdx + call _asm_AES_decrypt + pxor 32(%rbp),%xmm5 + movdqu %xmm5,(%r13) + +L$xts_dec_ret: + leaq (%rsp),%rax + pxor %xmm0,%xmm0 +L$xts_dec_bzero: + movdqa %xmm0,0(%rax) + movdqa %xmm0,16(%rax) + leaq 32(%rax),%rax + cmpq %rax,%rbp + ja L$xts_dec_bzero + + leaq (%rbp),%rsp + movq 72(%rsp),%r15 + movq 80(%rsp),%r14 + movq 88(%rsp),%r13 + movq 96(%rsp),%r12 + movq 104(%rsp),%rbx + movq 112(%rsp),%rax + leaq 120(%rsp),%rsp + movq %rax,%rbp +L$xts_dec_epilogue: + .byte 0xf3,0xc3 + + +.p2align 6 +_bsaes_const: +L$M0ISR: +.quad 0x0a0e0206070b0f03, 0x0004080c0d010509 +L$ISRM0: +.quad 0x01040b0e0205080f, 0x0306090c00070a0d +L$ISR: +.quad 0x0504070602010003, 0x0f0e0d0c080b0a09 +L$BS0: +.quad 0x5555555555555555, 0x5555555555555555 +L$BS1: +.quad 0x3333333333333333, 0x3333333333333333 +L$BS2: +.quad 0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f +L$SR: +.quad 0x0504070600030201, 0x0f0e0d0c0a09080b +L$SRM0: +.quad 0x0304090e00050a0f, 0x01060b0c0207080d +L$M0SR: +.quad 0x0a0e02060f03070b, 0x0004080c05090d01 +L$SWPUP: +.quad 0x0706050403020100, 0x0c0d0e0f0b0a0908 +L$SWPUPM0SR: +.quad 0x0a0d02060c03070b, 0x0004080f05090e01 +L$ADD1: +.quad 0x0000000000000000, 0x0000000100000000 +L$ADD2: +.quad 0x0000000000000000, 0x0000000200000000 +L$ADD3: +.quad 0x0000000000000000, 0x0000000300000000 +L$ADD4: +.quad 0x0000000000000000, 0x0000000400000000 +L$ADD5: +.quad 0x0000000000000000, 0x0000000500000000 +L$ADD6: +.quad 0x0000000000000000, 0x0000000600000000 +L$ADD7: +.quad 0x0000000000000000, 0x0000000700000000 +L$ADD8: +.quad 0x0000000000000000, 0x0000000800000000 +L$xts_magic: +.long 0x87,0,1,0 +L$masks: +.quad 0x0101010101010101, 0x0101010101010101 +.quad 0x0202020202020202, 0x0202020202020202 +.quad 0x0404040404040404, 0x0404040404040404 +.quad 0x0808080808080808, 0x0808080808080808 +L$M0: +.quad 0x02060a0e03070b0f, 0x0004080c0105090d +L$63: +.quad 0x6363636363636363, 0x6363636363636363 +.byte 66,105,116,45,115,108,105,99,101,100,32,65,69,83,32,102,111,114,32,120,56,54,95,54,52,47,83,83,83,69,51,44,32,69,109,105,108,105,97,32,75,195,164,115,112,101,114,44,32,80,101,116,101,114,32,83,99,104,119,97,98,101,44,32,65,110,100,121,32,80,111,108,121,97,107,111,118,0 +.p2align 6 diff --git a/deps/openssl/asm_obsolete/x64-macosx-gas/aes/vpaes-x86_64.s b/deps/openssl/asm_obsolete/x64-macosx-gas/aes/vpaes-x86_64.s new file mode 100644 index 00000000000000..c724170ce99e1b --- /dev/null +++ b/deps/openssl/asm_obsolete/x64-macosx-gas/aes/vpaes-x86_64.s @@ -0,0 +1,826 @@ +.text + + + + + + + + + + + + + + + + + +.p2align 4 +_vpaes_encrypt_core: + movq %rdx,%r9 + movq $16,%r11 + movl 240(%rdx),%eax + movdqa %xmm9,%xmm1 + movdqa L$k_ipt(%rip),%xmm2 + pandn %xmm0,%xmm1 + movdqu (%r9),%xmm5 + psrld $4,%xmm1 + pand %xmm9,%xmm0 +.byte 102,15,56,0,208 + movdqa L$k_ipt+16(%rip),%xmm0 +.byte 102,15,56,0,193 + pxor %xmm5,%xmm2 + addq $16,%r9 + pxor %xmm2,%xmm0 + leaq L$k_mc_backward(%rip),%r10 + jmp L$enc_entry + +.p2align 4 +L$enc_loop: + + movdqa %xmm13,%xmm4 + movdqa %xmm12,%xmm0 +.byte 102,15,56,0,226 +.byte 102,15,56,0,195 + pxor %xmm5,%xmm4 + movdqa %xmm15,%xmm5 + pxor %xmm4,%xmm0 + movdqa -64(%r11,%r10,1),%xmm1 +.byte 102,15,56,0,234 + movdqa (%r11,%r10,1),%xmm4 + movdqa %xmm14,%xmm2 +.byte 102,15,56,0,211 + movdqa %xmm0,%xmm3 + pxor %xmm5,%xmm2 +.byte 102,15,56,0,193 + addq $16,%r9 + pxor %xmm2,%xmm0 +.byte 102,15,56,0,220 + addq $16,%r11 + pxor %xmm0,%xmm3 +.byte 102,15,56,0,193 + andq $48,%r11 + subq $1,%rax + pxor %xmm3,%xmm0 + +L$enc_entry: + + movdqa %xmm9,%xmm1 + movdqa %xmm11,%xmm5 + pandn %xmm0,%xmm1 + psrld $4,%xmm1 + pand %xmm9,%xmm0 +.byte 102,15,56,0,232 + movdqa %xmm10,%xmm3 + pxor %xmm1,%xmm0 +.byte 102,15,56,0,217 + movdqa %xmm10,%xmm4 + pxor %xmm5,%xmm3 +.byte 102,15,56,0,224 + movdqa %xmm10,%xmm2 + pxor %xmm5,%xmm4 +.byte 102,15,56,0,211 + movdqa %xmm10,%xmm3 + pxor %xmm0,%xmm2 +.byte 102,15,56,0,220 + movdqu (%r9),%xmm5 + pxor %xmm1,%xmm3 + jnz L$enc_loop + + + movdqa -96(%r10),%xmm4 + movdqa -80(%r10),%xmm0 +.byte 102,15,56,0,226 + pxor %xmm5,%xmm4 +.byte 102,15,56,0,195 + movdqa 64(%r11,%r10,1),%xmm1 + pxor %xmm4,%xmm0 +.byte 102,15,56,0,193 + .byte 0xf3,0xc3 + + + + + + + + +.p2align 4 +_vpaes_decrypt_core: + movq %rdx,%r9 + movl 240(%rdx),%eax + movdqa %xmm9,%xmm1 + movdqa L$k_dipt(%rip),%xmm2 + pandn %xmm0,%xmm1 + movq %rax,%r11 + psrld $4,%xmm1 + movdqu (%r9),%xmm5 + shlq $4,%r11 + pand %xmm9,%xmm0 +.byte 102,15,56,0,208 + movdqa L$k_dipt+16(%rip),%xmm0 + xorq $48,%r11 + leaq L$k_dsbd(%rip),%r10 +.byte 102,15,56,0,193 + andq $48,%r11 + pxor %xmm5,%xmm2 + movdqa L$k_mc_forward+48(%rip),%xmm5 + pxor %xmm2,%xmm0 + addq $16,%r9 + addq %r10,%r11 + jmp L$dec_entry + +.p2align 4 +L$dec_loop: + + + + movdqa -32(%r10),%xmm4 + movdqa -16(%r10),%xmm1 +.byte 102,15,56,0,226 +.byte 102,15,56,0,203 + pxor %xmm4,%xmm0 + movdqa 0(%r10),%xmm4 + pxor %xmm1,%xmm0 + movdqa 16(%r10),%xmm1 + +.byte 102,15,56,0,226 +.byte 102,15,56,0,197 +.byte 102,15,56,0,203 + pxor %xmm4,%xmm0 + movdqa 32(%r10),%xmm4 + pxor %xmm1,%xmm0 + movdqa 48(%r10),%xmm1 + +.byte 102,15,56,0,226 +.byte 102,15,56,0,197 +.byte 102,15,56,0,203 + pxor %xmm4,%xmm0 + movdqa 64(%r10),%xmm4 + pxor %xmm1,%xmm0 + movdqa 80(%r10),%xmm1 + +.byte 102,15,56,0,226 +.byte 102,15,56,0,197 +.byte 102,15,56,0,203 + pxor %xmm4,%xmm0 + addq $16,%r9 +.byte 102,15,58,15,237,12 + pxor %xmm1,%xmm0 + subq $1,%rax + +L$dec_entry: + + movdqa %xmm9,%xmm1 + pandn %xmm0,%xmm1 + movdqa %xmm11,%xmm2 + psrld $4,%xmm1 + pand %xmm9,%xmm0 +.byte 102,15,56,0,208 + movdqa %xmm10,%xmm3 + pxor %xmm1,%xmm0 +.byte 102,15,56,0,217 + movdqa %xmm10,%xmm4 + pxor %xmm2,%xmm3 +.byte 102,15,56,0,224 + pxor %xmm2,%xmm4 + movdqa %xmm10,%xmm2 +.byte 102,15,56,0,211 + movdqa %xmm10,%xmm3 + pxor %xmm0,%xmm2 +.byte 102,15,56,0,220 + movdqu (%r9),%xmm0 + pxor %xmm1,%xmm3 + jnz L$dec_loop + + + movdqa 96(%r10),%xmm4 +.byte 102,15,56,0,226 + pxor %xmm0,%xmm4 + movdqa 112(%r10),%xmm0 + movdqa -352(%r11),%xmm2 +.byte 102,15,56,0,195 + pxor %xmm4,%xmm0 +.byte 102,15,56,0,194 + .byte 0xf3,0xc3 + + + + + + + + +.p2align 4 +_vpaes_schedule_core: + + + + + + call _vpaes_preheat + movdqa L$k_rcon(%rip),%xmm8 + movdqu (%rdi),%xmm0 + + + movdqa %xmm0,%xmm3 + leaq L$k_ipt(%rip),%r11 + call _vpaes_schedule_transform + movdqa %xmm0,%xmm7 + + leaq L$k_sr(%rip),%r10 + testq %rcx,%rcx + jnz L$schedule_am_decrypting + + + movdqu %xmm0,(%rdx) + jmp L$schedule_go + +L$schedule_am_decrypting: + + movdqa (%r8,%r10,1),%xmm1 +.byte 102,15,56,0,217 + movdqu %xmm3,(%rdx) + xorq $48,%r8 + +L$schedule_go: + cmpl $192,%esi + ja L$schedule_256 + je L$schedule_192 + + + + + + + + + + +L$schedule_128: + movl $10,%esi + +L$oop_schedule_128: + call _vpaes_schedule_round + decq %rsi + jz L$schedule_mangle_last + call _vpaes_schedule_mangle + jmp L$oop_schedule_128 + + + + + + + + + + + + + + + + +.p2align 4 +L$schedule_192: + movdqu 8(%rdi),%xmm0 + call _vpaes_schedule_transform + movdqa %xmm0,%xmm6 + pxor %xmm4,%xmm4 + movhlps %xmm4,%xmm6 + movl $4,%esi + +L$oop_schedule_192: + call _vpaes_schedule_round +.byte 102,15,58,15,198,8 + call _vpaes_schedule_mangle + call _vpaes_schedule_192_smear + call _vpaes_schedule_mangle + call _vpaes_schedule_round + decq %rsi + jz L$schedule_mangle_last + call _vpaes_schedule_mangle + call _vpaes_schedule_192_smear + jmp L$oop_schedule_192 + + + + + + + + + + + +.p2align 4 +L$schedule_256: + movdqu 16(%rdi),%xmm0 + call _vpaes_schedule_transform + movl $7,%esi + +L$oop_schedule_256: + call _vpaes_schedule_mangle + movdqa %xmm0,%xmm6 + + + call _vpaes_schedule_round + decq %rsi + jz L$schedule_mangle_last + call _vpaes_schedule_mangle + + + pshufd $255,%xmm0,%xmm0 + movdqa %xmm7,%xmm5 + movdqa %xmm6,%xmm7 + call _vpaes_schedule_low_round + movdqa %xmm5,%xmm7 + + jmp L$oop_schedule_256 + + + + + + + + + + + + +.p2align 4 +L$schedule_mangle_last: + + leaq L$k_deskew(%rip),%r11 + testq %rcx,%rcx + jnz L$schedule_mangle_last_dec + + + movdqa (%r8,%r10,1),%xmm1 +.byte 102,15,56,0,193 + leaq L$k_opt(%rip),%r11 + addq $32,%rdx + +L$schedule_mangle_last_dec: + addq $-16,%rdx + pxor L$k_s63(%rip),%xmm0 + call _vpaes_schedule_transform + movdqu %xmm0,(%rdx) + + + pxor %xmm0,%xmm0 + pxor %xmm1,%xmm1 + pxor %xmm2,%xmm2 + pxor %xmm3,%xmm3 + pxor %xmm4,%xmm4 + pxor %xmm5,%xmm5 + pxor %xmm6,%xmm6 + pxor %xmm7,%xmm7 + .byte 0xf3,0xc3 + + + + + + + + + + + + + + + + + +.p2align 4 +_vpaes_schedule_192_smear: + pshufd $128,%xmm6,%xmm1 + pshufd $254,%xmm7,%xmm0 + pxor %xmm1,%xmm6 + pxor %xmm1,%xmm1 + pxor %xmm0,%xmm6 + movdqa %xmm6,%xmm0 + movhlps %xmm1,%xmm6 + .byte 0xf3,0xc3 + + + + + + + + + + + + + + + + + + + + + +.p2align 4 +_vpaes_schedule_round: + + pxor %xmm1,%xmm1 +.byte 102,65,15,58,15,200,15 +.byte 102,69,15,58,15,192,15 + pxor %xmm1,%xmm7 + + + pshufd $255,%xmm0,%xmm0 +.byte 102,15,58,15,192,1 + + + + +_vpaes_schedule_low_round: + + movdqa %xmm7,%xmm1 + pslldq $4,%xmm7 + pxor %xmm1,%xmm7 + movdqa %xmm7,%xmm1 + pslldq $8,%xmm7 + pxor %xmm1,%xmm7 + pxor L$k_s63(%rip),%xmm7 + + + movdqa %xmm9,%xmm1 + pandn %xmm0,%xmm1 + psrld $4,%xmm1 + pand %xmm9,%xmm0 + movdqa %xmm11,%xmm2 +.byte 102,15,56,0,208 + pxor %xmm1,%xmm0 + movdqa %xmm10,%xmm3 +.byte 102,15,56,0,217 + pxor %xmm2,%xmm3 + movdqa %xmm10,%xmm4 +.byte 102,15,56,0,224 + pxor %xmm2,%xmm4 + movdqa %xmm10,%xmm2 +.byte 102,15,56,0,211 + pxor %xmm0,%xmm2 + movdqa %xmm10,%xmm3 +.byte 102,15,56,0,220 + pxor %xmm1,%xmm3 + movdqa %xmm13,%xmm4 +.byte 102,15,56,0,226 + movdqa %xmm12,%xmm0 +.byte 102,15,56,0,195 + pxor %xmm4,%xmm0 + + + pxor %xmm7,%xmm0 + movdqa %xmm0,%xmm7 + .byte 0xf3,0xc3 + + + + + + + + + + + + +.p2align 4 +_vpaes_schedule_transform: + movdqa %xmm9,%xmm1 + pandn %xmm0,%xmm1 + psrld $4,%xmm1 + pand %xmm9,%xmm0 + movdqa (%r11),%xmm2 +.byte 102,15,56,0,208 + movdqa 16(%r11),%xmm0 +.byte 102,15,56,0,193 + pxor %xmm2,%xmm0 + .byte 0xf3,0xc3 + + + + + + + + + + + + + + + + + + + + + + + + + + +.p2align 4 +_vpaes_schedule_mangle: + movdqa %xmm0,%xmm4 + movdqa L$k_mc_forward(%rip),%xmm5 + testq %rcx,%rcx + jnz L$schedule_mangle_dec + + + addq $16,%rdx + pxor L$k_s63(%rip),%xmm4 +.byte 102,15,56,0,229 + movdqa %xmm4,%xmm3 +.byte 102,15,56,0,229 + pxor %xmm4,%xmm3 +.byte 102,15,56,0,229 + pxor %xmm4,%xmm3 + + jmp L$schedule_mangle_both +.p2align 4 +L$schedule_mangle_dec: + + leaq L$k_dksd(%rip),%r11 + movdqa %xmm9,%xmm1 + pandn %xmm4,%xmm1 + psrld $4,%xmm1 + pand %xmm9,%xmm4 + + movdqa 0(%r11),%xmm2 +.byte 102,15,56,0,212 + movdqa 16(%r11),%xmm3 +.byte 102,15,56,0,217 + pxor %xmm2,%xmm3 +.byte 102,15,56,0,221 + + movdqa 32(%r11),%xmm2 +.byte 102,15,56,0,212 + pxor %xmm3,%xmm2 + movdqa 48(%r11),%xmm3 +.byte 102,15,56,0,217 + pxor %xmm2,%xmm3 +.byte 102,15,56,0,221 + + movdqa 64(%r11),%xmm2 +.byte 102,15,56,0,212 + pxor %xmm3,%xmm2 + movdqa 80(%r11),%xmm3 +.byte 102,15,56,0,217 + pxor %xmm2,%xmm3 +.byte 102,15,56,0,221 + + movdqa 96(%r11),%xmm2 +.byte 102,15,56,0,212 + pxor %xmm3,%xmm2 + movdqa 112(%r11),%xmm3 +.byte 102,15,56,0,217 + pxor %xmm2,%xmm3 + + addq $-16,%rdx + +L$schedule_mangle_both: + movdqa (%r8,%r10,1),%xmm1 +.byte 102,15,56,0,217 + addq $-16,%r8 + andq $48,%r8 + movdqu %xmm3,(%rdx) + .byte 0xf3,0xc3 + + + + + +.globl _vpaes_set_encrypt_key + +.p2align 4 +_vpaes_set_encrypt_key: + movl %esi,%eax + shrl $5,%eax + addl $5,%eax + movl %eax,240(%rdx) + + movl $0,%ecx + movl $48,%r8d + call _vpaes_schedule_core + xorl %eax,%eax + .byte 0xf3,0xc3 + + +.globl _vpaes_set_decrypt_key + +.p2align 4 +_vpaes_set_decrypt_key: + movl %esi,%eax + shrl $5,%eax + addl $5,%eax + movl %eax,240(%rdx) + shll $4,%eax + leaq 16(%rdx,%rax,1),%rdx + + movl $1,%ecx + movl %esi,%r8d + shrl $1,%r8d + andl $32,%r8d + xorl $32,%r8d + call _vpaes_schedule_core + xorl %eax,%eax + .byte 0xf3,0xc3 + + +.globl _vpaes_encrypt + +.p2align 4 +_vpaes_encrypt: + movdqu (%rdi),%xmm0 + call _vpaes_preheat + call _vpaes_encrypt_core + movdqu %xmm0,(%rsi) + .byte 0xf3,0xc3 + + +.globl _vpaes_decrypt + +.p2align 4 +_vpaes_decrypt: + movdqu (%rdi),%xmm0 + call _vpaes_preheat + call _vpaes_decrypt_core + movdqu %xmm0,(%rsi) + .byte 0xf3,0xc3 + +.globl _vpaes_cbc_encrypt + +.p2align 4 +_vpaes_cbc_encrypt: + xchgq %rcx,%rdx + subq $16,%rcx + jc L$cbc_abort + movdqu (%r8),%xmm6 + subq %rdi,%rsi + call _vpaes_preheat + cmpl $0,%r9d + je L$cbc_dec_loop + jmp L$cbc_enc_loop +.p2align 4 +L$cbc_enc_loop: + movdqu (%rdi),%xmm0 + pxor %xmm6,%xmm0 + call _vpaes_encrypt_core + movdqa %xmm0,%xmm6 + movdqu %xmm0,(%rsi,%rdi,1) + leaq 16(%rdi),%rdi + subq $16,%rcx + jnc L$cbc_enc_loop + jmp L$cbc_done +.p2align 4 +L$cbc_dec_loop: + movdqu (%rdi),%xmm0 + movdqa %xmm0,%xmm7 + call _vpaes_decrypt_core + pxor %xmm6,%xmm0 + movdqa %xmm7,%xmm6 + movdqu %xmm0,(%rsi,%rdi,1) + leaq 16(%rdi),%rdi + subq $16,%rcx + jnc L$cbc_dec_loop +L$cbc_done: + movdqu %xmm6,(%r8) +L$cbc_abort: + .byte 0xf3,0xc3 + + + + + + + + +.p2align 4 +_vpaes_preheat: + leaq L$k_s0F(%rip),%r10 + movdqa -32(%r10),%xmm10 + movdqa -16(%r10),%xmm11 + movdqa 0(%r10),%xmm9 + movdqa 48(%r10),%xmm13 + movdqa 64(%r10),%xmm12 + movdqa 80(%r10),%xmm15 + movdqa 96(%r10),%xmm14 + .byte 0xf3,0xc3 + + + + + + + +.p2align 6 +_vpaes_consts: +L$k_inv: +.quad 0x0E05060F0D080180, 0x040703090A0B0C02 +.quad 0x01040A060F0B0780, 0x030D0E0C02050809 + +L$k_s0F: +.quad 0x0F0F0F0F0F0F0F0F, 0x0F0F0F0F0F0F0F0F + +L$k_ipt: +.quad 0xC2B2E8985A2A7000, 0xCABAE09052227808 +.quad 0x4C01307D317C4D00, 0xCD80B1FCB0FDCC81 + +L$k_sb1: +.quad 0xB19BE18FCB503E00, 0xA5DF7A6E142AF544 +.quad 0x3618D415FAE22300, 0x3BF7CCC10D2ED9EF +L$k_sb2: +.quad 0xE27A93C60B712400, 0x5EB7E955BC982FCD +.quad 0x69EB88400AE12900, 0xC2A163C8AB82234A +L$k_sbo: +.quad 0xD0D26D176FBDC700, 0x15AABF7AC502A878 +.quad 0xCFE474A55FBB6A00, 0x8E1E90D1412B35FA + +L$k_mc_forward: +.quad 0x0407060500030201, 0x0C0F0E0D080B0A09 +.quad 0x080B0A0904070605, 0x000302010C0F0E0D +.quad 0x0C0F0E0D080B0A09, 0x0407060500030201 +.quad 0x000302010C0F0E0D, 0x080B0A0904070605 + +L$k_mc_backward: +.quad 0x0605040702010003, 0x0E0D0C0F0A09080B +.quad 0x020100030E0D0C0F, 0x0A09080B06050407 +.quad 0x0E0D0C0F0A09080B, 0x0605040702010003 +.quad 0x0A09080B06050407, 0x020100030E0D0C0F + +L$k_sr: +.quad 0x0706050403020100, 0x0F0E0D0C0B0A0908 +.quad 0x030E09040F0A0500, 0x0B06010C07020D08 +.quad 0x0F060D040B020900, 0x070E050C030A0108 +.quad 0x0B0E0104070A0D00, 0x0306090C0F020508 + +L$k_rcon: +.quad 0x1F8391B9AF9DEEB6, 0x702A98084D7C7D81 + +L$k_s63: +.quad 0x5B5B5B5B5B5B5B5B, 0x5B5B5B5B5B5B5B5B + +L$k_opt: +.quad 0xFF9F4929D6B66000, 0xF7974121DEBE6808 +.quad 0x01EDBD5150BCEC00, 0xE10D5DB1B05C0CE0 + +L$k_deskew: +.quad 0x07E4A34047A4E300, 0x1DFEB95A5DBEF91A +.quad 0x5F36B5DC83EA6900, 0x2841C2ABF49D1E77 + + + + + +L$k_dksd: +.quad 0xFEB91A5DA3E44700, 0x0740E3A45A1DBEF9 +.quad 0x41C277F4B5368300, 0x5FDC69EAAB289D1E +L$k_dksb: +.quad 0x9A4FCA1F8550D500, 0x03D653861CC94C99 +.quad 0x115BEDA7B6FC4A00, 0xD993256F7E3482C8 +L$k_dkse: +.quad 0xD5031CCA1FC9D600, 0x53859A4C994F5086 +.quad 0xA23196054FDC7BE8, 0xCD5EF96A20B31487 +L$k_dks9: +.quad 0xB6116FC87ED9A700, 0x4AED933482255BFC +.quad 0x4576516227143300, 0x8BB89FACE9DAFDCE + + + + + +L$k_dipt: +.quad 0x0F505B040B545F00, 0x154A411E114E451A +.quad 0x86E383E660056500, 0x12771772F491F194 + +L$k_dsb9: +.quad 0x851C03539A86D600, 0xCAD51F504F994CC9 +.quad 0xC03B1789ECD74900, 0x725E2C9EB2FBA565 +L$k_dsbd: +.quad 0x7D57CCDFE6B1A200, 0xF56E9B13882A4439 +.quad 0x3CE2FAF724C6CB00, 0x2931180D15DEEFD3 +L$k_dsbb: +.quad 0xD022649296B44200, 0x602646F6B0F2D404 +.quad 0xC19498A6CD596700, 0xF3FF0C3E3255AA6B +L$k_dsbe: +.quad 0x46F2929626D4D000, 0x2242600464B4F6B0 +.quad 0x0C55A6CDFFAAC100, 0x9467F36B98593E32 +L$k_dsbo: +.quad 0x1387EA537EF94000, 0xC7AA6DB9D4943E2D +.quad 0x12D7560F93441D00, 0xCA4B8159D8C58E9C +.byte 86,101,99,116,111,114,32,80,101,114,109,117,116,97,116,105,111,110,32,65,69,83,32,102,111,114,32,120,56,54,95,54,52,47,83,83,83,69,51,44,32,77,105,107,101,32,72,97,109,98,117,114,103,32,40,83,116,97,110,102,111,114,100,32,85,110,105,118,101,114,115,105,116,121,41,0 +.p2align 6 diff --git a/deps/openssl/asm_obsolete/x64-macosx-gas/bn/rsaz-avx2.s b/deps/openssl/asm_obsolete/x64-macosx-gas/bn/rsaz-avx2.s new file mode 100644 index 00000000000000..02f7f562ea827d --- /dev/null +++ b/deps/openssl/asm_obsolete/x64-macosx-gas/bn/rsaz-avx2.s @@ -0,0 +1,24 @@ +.text + +.globl _rsaz_avx2_eligible + +_rsaz_avx2_eligible: + xorl %eax,%eax + .byte 0xf3,0xc3 + + +.globl _rsaz_1024_sqr_avx2 +.globl _rsaz_1024_mul_avx2 +.globl _rsaz_1024_norm2red_avx2 +.globl _rsaz_1024_red2norm_avx2 +.globl _rsaz_1024_scatter5_avx2 +.globl _rsaz_1024_gather5_avx2 + +_rsaz_1024_sqr_avx2: +_rsaz_1024_mul_avx2: +_rsaz_1024_norm2red_avx2: +_rsaz_1024_red2norm_avx2: +_rsaz_1024_scatter5_avx2: +_rsaz_1024_gather5_avx2: +.byte 0x0f,0x0b + .byte 0xf3,0xc3 diff --git a/deps/openssl/asm_obsolete/x64-macosx-gas/bn/rsaz-x86_64.s b/deps/openssl/asm_obsolete/x64-macosx-gas/bn/rsaz-x86_64.s new file mode 100644 index 00000000000000..4e70deabbd3f6a --- /dev/null +++ b/deps/openssl/asm_obsolete/x64-macosx-gas/bn/rsaz-x86_64.s @@ -0,0 +1,1116 @@ +.text + + + +.globl _rsaz_512_sqr + +.p2align 5 +_rsaz_512_sqr: + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + + subq $128+24,%rsp +L$sqr_body: + movq %rdx,%rbp + movq (%rsi),%rdx + movq 8(%rsi),%rax + movq %rcx,128(%rsp) + jmp L$oop_sqr + +.p2align 5 +L$oop_sqr: + movl %r8d,128+8(%rsp) + + movq %rdx,%rbx + mulq %rdx + movq %rax,%r8 + movq 16(%rsi),%rax + movq %rdx,%r9 + + mulq %rbx + addq %rax,%r9 + movq 24(%rsi),%rax + movq %rdx,%r10 + adcq $0,%r10 + + mulq %rbx + addq %rax,%r10 + movq 32(%rsi),%rax + movq %rdx,%r11 + adcq $0,%r11 + + mulq %rbx + addq %rax,%r11 + movq 40(%rsi),%rax + movq %rdx,%r12 + adcq $0,%r12 + + mulq %rbx + addq %rax,%r12 + movq 48(%rsi),%rax + movq %rdx,%r13 + adcq $0,%r13 + + mulq %rbx + addq %rax,%r13 + movq 56(%rsi),%rax + movq %rdx,%r14 + adcq $0,%r14 + + mulq %rbx + addq %rax,%r14 + movq %rbx,%rax + movq %rdx,%r15 + adcq $0,%r15 + + addq %r8,%r8 + movq %r9,%rcx + adcq %r9,%r9 + + mulq %rax + movq %rax,(%rsp) + addq %rdx,%r8 + adcq $0,%r9 + + movq %r8,8(%rsp) + shrq $63,%rcx + + + movq 8(%rsi),%r8 + movq 16(%rsi),%rax + mulq %r8 + addq %rax,%r10 + movq 24(%rsi),%rax + movq %rdx,%rbx + adcq $0,%rbx + + mulq %r8 + addq %rax,%r11 + movq 32(%rsi),%rax + adcq $0,%rdx + addq %rbx,%r11 + movq %rdx,%rbx + adcq $0,%rbx + + mulq %r8 + addq %rax,%r12 + movq 40(%rsi),%rax + adcq $0,%rdx + addq %rbx,%r12 + movq %rdx,%rbx + adcq $0,%rbx + + mulq %r8 + addq %rax,%r13 + movq 48(%rsi),%rax + adcq $0,%rdx + addq %rbx,%r13 + movq %rdx,%rbx + adcq $0,%rbx + + mulq %r8 + addq %rax,%r14 + movq 56(%rsi),%rax + adcq $0,%rdx + addq %rbx,%r14 + movq %rdx,%rbx + adcq $0,%rbx + + mulq %r8 + addq %rax,%r15 + movq %r8,%rax + adcq $0,%rdx + addq %rbx,%r15 + movq %rdx,%r8 + movq %r10,%rdx + adcq $0,%r8 + + addq %rdx,%rdx + leaq (%rcx,%r10,2),%r10 + movq %r11,%rbx + adcq %r11,%r11 + + mulq %rax + addq %rax,%r9 + adcq %rdx,%r10 + adcq $0,%r11 + + movq %r9,16(%rsp) + movq %r10,24(%rsp) + shrq $63,%rbx + + + movq 16(%rsi),%r9 + movq 24(%rsi),%rax + mulq %r9 + addq %rax,%r12 + movq 32(%rsi),%rax + movq %rdx,%rcx + adcq $0,%rcx + + mulq %r9 + addq %rax,%r13 + movq 40(%rsi),%rax + adcq $0,%rdx + addq %rcx,%r13 + movq %rdx,%rcx + adcq $0,%rcx + + mulq %r9 + addq %rax,%r14 + movq 48(%rsi),%rax + adcq $0,%rdx + addq %rcx,%r14 + movq %rdx,%rcx + adcq $0,%rcx + + mulq %r9 + movq %r12,%r10 + leaq (%rbx,%r12,2),%r12 + addq %rax,%r15 + movq 56(%rsi),%rax + adcq $0,%rdx + addq %rcx,%r15 + movq %rdx,%rcx + adcq $0,%rcx + + mulq %r9 + shrq $63,%r10 + addq %rax,%r8 + movq %r9,%rax + adcq $0,%rdx + addq %rcx,%r8 + movq %rdx,%r9 + adcq $0,%r9 + + movq %r13,%rcx + leaq (%r10,%r13,2),%r13 + + mulq %rax + addq %rax,%r11 + adcq %rdx,%r12 + adcq $0,%r13 + + movq %r11,32(%rsp) + movq %r12,40(%rsp) + shrq $63,%rcx + + + movq 24(%rsi),%r10 + movq 32(%rsi),%rax + mulq %r10 + addq %rax,%r14 + movq 40(%rsi),%rax + movq %rdx,%rbx + adcq $0,%rbx + + mulq %r10 + addq %rax,%r15 + movq 48(%rsi),%rax + adcq $0,%rdx + addq %rbx,%r15 + movq %rdx,%rbx + adcq $0,%rbx + + mulq %r10 + movq %r14,%r12 + leaq (%rcx,%r14,2),%r14 + addq %rax,%r8 + movq 56(%rsi),%rax + adcq $0,%rdx + addq %rbx,%r8 + movq %rdx,%rbx + adcq $0,%rbx + + mulq %r10 + shrq $63,%r12 + addq %rax,%r9 + movq %r10,%rax + adcq $0,%rdx + addq %rbx,%r9 + movq %rdx,%r10 + adcq $0,%r10 + + movq %r15,%rbx + leaq (%r12,%r15,2),%r15 + + mulq %rax + addq %rax,%r13 + adcq %rdx,%r14 + adcq $0,%r15 + + movq %r13,48(%rsp) + movq %r14,56(%rsp) + shrq $63,%rbx + + + movq 32(%rsi),%r11 + movq 40(%rsi),%rax + mulq %r11 + addq %rax,%r8 + movq 48(%rsi),%rax + movq %rdx,%rcx + adcq $0,%rcx + + mulq %r11 + addq %rax,%r9 + movq 56(%rsi),%rax + adcq $0,%rdx + movq %r8,%r12 + leaq (%rbx,%r8,2),%r8 + addq %rcx,%r9 + movq %rdx,%rcx + adcq $0,%rcx + + mulq %r11 + shrq $63,%r12 + addq %rax,%r10 + movq %r11,%rax + adcq $0,%rdx + addq %rcx,%r10 + movq %rdx,%r11 + adcq $0,%r11 + + movq %r9,%rcx + leaq (%r12,%r9,2),%r9 + + mulq %rax + addq %rax,%r15 + adcq %rdx,%r8 + adcq $0,%r9 + + movq %r15,64(%rsp) + movq %r8,72(%rsp) + shrq $63,%rcx + + + movq 40(%rsi),%r12 + movq 48(%rsi),%rax + mulq %r12 + addq %rax,%r10 + movq 56(%rsi),%rax + movq %rdx,%rbx + adcq $0,%rbx + + mulq %r12 + addq %rax,%r11 + movq %r12,%rax + movq %r10,%r15 + leaq (%rcx,%r10,2),%r10 + adcq $0,%rdx + shrq $63,%r15 + addq %rbx,%r11 + movq %rdx,%r12 + adcq $0,%r12 + + movq %r11,%rbx + leaq (%r15,%r11,2),%r11 + + mulq %rax + addq %rax,%r9 + adcq %rdx,%r10 + adcq $0,%r11 + + movq %r9,80(%rsp) + movq %r10,88(%rsp) + + + movq 48(%rsi),%r13 + movq 56(%rsi),%rax + mulq %r13 + addq %rax,%r12 + movq %r13,%rax + movq %rdx,%r13 + adcq $0,%r13 + + xorq %r14,%r14 + shlq $1,%rbx + adcq %r12,%r12 + adcq %r13,%r13 + adcq %r14,%r14 + + mulq %rax + addq %rax,%r11 + adcq %rdx,%r12 + adcq $0,%r13 + + movq %r11,96(%rsp) + movq %r12,104(%rsp) + + + movq 56(%rsi),%rax + mulq %rax + addq %rax,%r13 + adcq $0,%rdx + + addq %rdx,%r14 + + movq %r13,112(%rsp) + movq %r14,120(%rsp) + + movq (%rsp),%r8 + movq 8(%rsp),%r9 + movq 16(%rsp),%r10 + movq 24(%rsp),%r11 + movq 32(%rsp),%r12 + movq 40(%rsp),%r13 + movq 48(%rsp),%r14 + movq 56(%rsp),%r15 + + call __rsaz_512_reduce + + addq 64(%rsp),%r8 + adcq 72(%rsp),%r9 + adcq 80(%rsp),%r10 + adcq 88(%rsp),%r11 + adcq 96(%rsp),%r12 + adcq 104(%rsp),%r13 + adcq 112(%rsp),%r14 + adcq 120(%rsp),%r15 + sbbq %rcx,%rcx + + call __rsaz_512_subtract + + movq %r8,%rdx + movq %r9,%rax + movl 128+8(%rsp),%r8d + movq %rdi,%rsi + + decl %r8d + jnz L$oop_sqr + + leaq 128+24+48(%rsp),%rax + movq -48(%rax),%r15 + movq -40(%rax),%r14 + movq -32(%rax),%r13 + movq -24(%rax),%r12 + movq -16(%rax),%rbp + movq -8(%rax),%rbx + leaq (%rax),%rsp +L$sqr_epilogue: + .byte 0xf3,0xc3 + +.globl _rsaz_512_mul + +.p2align 5 +_rsaz_512_mul: + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + + subq $128+24,%rsp +L$mul_body: +.byte 102,72,15,110,199 +.byte 102,72,15,110,201 + movq %r8,128(%rsp) + movq (%rdx),%rbx + movq %rdx,%rbp + call __rsaz_512_mul + +.byte 102,72,15,126,199 +.byte 102,72,15,126,205 + + movq (%rsp),%r8 + movq 8(%rsp),%r9 + movq 16(%rsp),%r10 + movq 24(%rsp),%r11 + movq 32(%rsp),%r12 + movq 40(%rsp),%r13 + movq 48(%rsp),%r14 + movq 56(%rsp),%r15 + + call __rsaz_512_reduce + addq 64(%rsp),%r8 + adcq 72(%rsp),%r9 + adcq 80(%rsp),%r10 + adcq 88(%rsp),%r11 + adcq 96(%rsp),%r12 + adcq 104(%rsp),%r13 + adcq 112(%rsp),%r14 + adcq 120(%rsp),%r15 + sbbq %rcx,%rcx + + call __rsaz_512_subtract + + leaq 128+24+48(%rsp),%rax + movq -48(%rax),%r15 + movq -40(%rax),%r14 + movq -32(%rax),%r13 + movq -24(%rax),%r12 + movq -16(%rax),%rbp + movq -8(%rax),%rbx + leaq (%rax),%rsp +L$mul_epilogue: + .byte 0xf3,0xc3 + +.globl _rsaz_512_mul_gather4 + +.p2align 5 +_rsaz_512_mul_gather4: + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + + movl %r9d,%r9d + subq $128+24,%rsp +L$mul_gather4_body: + movl 64(%rdx,%r9,4),%eax +.byte 102,72,15,110,199 + movl (%rdx,%r9,4),%ebx +.byte 102,72,15,110,201 + movq %r8,128(%rsp) + + shlq $32,%rax + orq %rax,%rbx + movq (%rsi),%rax + movq 8(%rsi),%rcx + leaq 128(%rdx,%r9,4),%rbp + mulq %rbx + movq %rax,(%rsp) + movq %rcx,%rax + movq %rdx,%r8 + + mulq %rbx + movd (%rbp),%xmm4 + addq %rax,%r8 + movq 16(%rsi),%rax + movq %rdx,%r9 + adcq $0,%r9 + + mulq %rbx + movd 64(%rbp),%xmm5 + addq %rax,%r9 + movq 24(%rsi),%rax + movq %rdx,%r10 + adcq $0,%r10 + + mulq %rbx + pslldq $4,%xmm5 + addq %rax,%r10 + movq 32(%rsi),%rax + movq %rdx,%r11 + adcq $0,%r11 + + mulq %rbx + por %xmm5,%xmm4 + addq %rax,%r11 + movq 40(%rsi),%rax + movq %rdx,%r12 + adcq $0,%r12 + + mulq %rbx + addq %rax,%r12 + movq 48(%rsi),%rax + movq %rdx,%r13 + adcq $0,%r13 + + mulq %rbx + leaq 128(%rbp),%rbp + addq %rax,%r13 + movq 56(%rsi),%rax + movq %rdx,%r14 + adcq $0,%r14 + + mulq %rbx +.byte 102,72,15,126,227 + addq %rax,%r14 + movq (%rsi),%rax + movq %rdx,%r15 + adcq $0,%r15 + + leaq 8(%rsp),%rdi + movl $7,%ecx + jmp L$oop_mul_gather + +.p2align 5 +L$oop_mul_gather: + mulq %rbx + addq %rax,%r8 + movq 8(%rsi),%rax + movq %r8,(%rdi) + movq %rdx,%r8 + adcq $0,%r8 + + mulq %rbx + movd (%rbp),%xmm4 + addq %rax,%r9 + movq 16(%rsi),%rax + adcq $0,%rdx + addq %r9,%r8 + movq %rdx,%r9 + adcq $0,%r9 + + mulq %rbx + movd 64(%rbp),%xmm5 + addq %rax,%r10 + movq 24(%rsi),%rax + adcq $0,%rdx + addq %r10,%r9 + movq %rdx,%r10 + adcq $0,%r10 + + mulq %rbx + pslldq $4,%xmm5 + addq %rax,%r11 + movq 32(%rsi),%rax + adcq $0,%rdx + addq %r11,%r10 + movq %rdx,%r11 + adcq $0,%r11 + + mulq %rbx + por %xmm5,%xmm4 + addq %rax,%r12 + movq 40(%rsi),%rax + adcq $0,%rdx + addq %r12,%r11 + movq %rdx,%r12 + adcq $0,%r12 + + mulq %rbx + addq %rax,%r13 + movq 48(%rsi),%rax + adcq $0,%rdx + addq %r13,%r12 + movq %rdx,%r13 + adcq $0,%r13 + + mulq %rbx + addq %rax,%r14 + movq 56(%rsi),%rax + adcq $0,%rdx + addq %r14,%r13 + movq %rdx,%r14 + adcq $0,%r14 + + mulq %rbx +.byte 102,72,15,126,227 + addq %rax,%r15 + movq (%rsi),%rax + adcq $0,%rdx + addq %r15,%r14 + movq %rdx,%r15 + adcq $0,%r15 + + leaq 128(%rbp),%rbp + leaq 8(%rdi),%rdi + + decl %ecx + jnz L$oop_mul_gather + + movq %r8,(%rdi) + movq %r9,8(%rdi) + movq %r10,16(%rdi) + movq %r11,24(%rdi) + movq %r12,32(%rdi) + movq %r13,40(%rdi) + movq %r14,48(%rdi) + movq %r15,56(%rdi) + +.byte 102,72,15,126,199 +.byte 102,72,15,126,205 + + movq (%rsp),%r8 + movq 8(%rsp),%r9 + movq 16(%rsp),%r10 + movq 24(%rsp),%r11 + movq 32(%rsp),%r12 + movq 40(%rsp),%r13 + movq 48(%rsp),%r14 + movq 56(%rsp),%r15 + + call __rsaz_512_reduce + addq 64(%rsp),%r8 + adcq 72(%rsp),%r9 + adcq 80(%rsp),%r10 + adcq 88(%rsp),%r11 + adcq 96(%rsp),%r12 + adcq 104(%rsp),%r13 + adcq 112(%rsp),%r14 + adcq 120(%rsp),%r15 + sbbq %rcx,%rcx + + call __rsaz_512_subtract + + leaq 128+24+48(%rsp),%rax + movq -48(%rax),%r15 + movq -40(%rax),%r14 + movq -32(%rax),%r13 + movq -24(%rax),%r12 + movq -16(%rax),%rbp + movq -8(%rax),%rbx + leaq (%rax),%rsp +L$mul_gather4_epilogue: + .byte 0xf3,0xc3 + +.globl _rsaz_512_mul_scatter4 + +.p2align 5 +_rsaz_512_mul_scatter4: + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + + movl %r9d,%r9d + subq $128+24,%rsp +L$mul_scatter4_body: + leaq (%r8,%r9,4),%r8 +.byte 102,72,15,110,199 +.byte 102,72,15,110,202 +.byte 102,73,15,110,208 + movq %rcx,128(%rsp) + + movq %rdi,%rbp + movq (%rdi),%rbx + call __rsaz_512_mul + +.byte 102,72,15,126,199 +.byte 102,72,15,126,205 + + movq (%rsp),%r8 + movq 8(%rsp),%r9 + movq 16(%rsp),%r10 + movq 24(%rsp),%r11 + movq 32(%rsp),%r12 + movq 40(%rsp),%r13 + movq 48(%rsp),%r14 + movq 56(%rsp),%r15 + + call __rsaz_512_reduce + addq 64(%rsp),%r8 + adcq 72(%rsp),%r9 + adcq 80(%rsp),%r10 + adcq 88(%rsp),%r11 + adcq 96(%rsp),%r12 + adcq 104(%rsp),%r13 + adcq 112(%rsp),%r14 + adcq 120(%rsp),%r15 +.byte 102,72,15,126,214 + sbbq %rcx,%rcx + + call __rsaz_512_subtract + + movl %r8d,0(%rsi) + shrq $32,%r8 + movl %r9d,128(%rsi) + shrq $32,%r9 + movl %r10d,256(%rsi) + shrq $32,%r10 + movl %r11d,384(%rsi) + shrq $32,%r11 + movl %r12d,512(%rsi) + shrq $32,%r12 + movl %r13d,640(%rsi) + shrq $32,%r13 + movl %r14d,768(%rsi) + shrq $32,%r14 + movl %r15d,896(%rsi) + shrq $32,%r15 + movl %r8d,64(%rsi) + movl %r9d,192(%rsi) + movl %r10d,320(%rsi) + movl %r11d,448(%rsi) + movl %r12d,576(%rsi) + movl %r13d,704(%rsi) + movl %r14d,832(%rsi) + movl %r15d,960(%rsi) + + leaq 128+24+48(%rsp),%rax + movq -48(%rax),%r15 + movq -40(%rax),%r14 + movq -32(%rax),%r13 + movq -24(%rax),%r12 + movq -16(%rax),%rbp + movq -8(%rax),%rbx + leaq (%rax),%rsp +L$mul_scatter4_epilogue: + .byte 0xf3,0xc3 + +.globl _rsaz_512_mul_by_one + +.p2align 5 +_rsaz_512_mul_by_one: + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + + subq $128+24,%rsp +L$mul_by_one_body: + movq %rdx,%rbp + movq %rcx,128(%rsp) + + movq (%rsi),%r8 + pxor %xmm0,%xmm0 + movq 8(%rsi),%r9 + movq 16(%rsi),%r10 + movq 24(%rsi),%r11 + movq 32(%rsi),%r12 + movq 40(%rsi),%r13 + movq 48(%rsi),%r14 + movq 56(%rsi),%r15 + + movdqa %xmm0,(%rsp) + movdqa %xmm0,16(%rsp) + movdqa %xmm0,32(%rsp) + movdqa %xmm0,48(%rsp) + movdqa %xmm0,64(%rsp) + movdqa %xmm0,80(%rsp) + movdqa %xmm0,96(%rsp) + call __rsaz_512_reduce + movq %r8,(%rdi) + movq %r9,8(%rdi) + movq %r10,16(%rdi) + movq %r11,24(%rdi) + movq %r12,32(%rdi) + movq %r13,40(%rdi) + movq %r14,48(%rdi) + movq %r15,56(%rdi) + + leaq 128+24+48(%rsp),%rax + movq -48(%rax),%r15 + movq -40(%rax),%r14 + movq -32(%rax),%r13 + movq -24(%rax),%r12 + movq -16(%rax),%rbp + movq -8(%rax),%rbx + leaq (%rax),%rsp +L$mul_by_one_epilogue: + .byte 0xf3,0xc3 + + +.p2align 5 +__rsaz_512_reduce: + movq %r8,%rbx + imulq 128+8(%rsp),%rbx + movq 0(%rbp),%rax + movl $8,%ecx + jmp L$reduction_loop + +.p2align 5 +L$reduction_loop: + mulq %rbx + movq 8(%rbp),%rax + negq %r8 + movq %rdx,%r8 + adcq $0,%r8 + + mulq %rbx + addq %rax,%r9 + movq 16(%rbp),%rax + adcq $0,%rdx + addq %r9,%r8 + movq %rdx,%r9 + adcq $0,%r9 + + mulq %rbx + addq %rax,%r10 + movq 24(%rbp),%rax + adcq $0,%rdx + addq %r10,%r9 + movq %rdx,%r10 + adcq $0,%r10 + + mulq %rbx + addq %rax,%r11 + movq 32(%rbp),%rax + adcq $0,%rdx + addq %r11,%r10 + movq 128+8(%rsp),%rsi + + + adcq $0,%rdx + movq %rdx,%r11 + + mulq %rbx + addq %rax,%r12 + movq 40(%rbp),%rax + adcq $0,%rdx + imulq %r8,%rsi + addq %r12,%r11 + movq %rdx,%r12 + adcq $0,%r12 + + mulq %rbx + addq %rax,%r13 + movq 48(%rbp),%rax + adcq $0,%rdx + addq %r13,%r12 + movq %rdx,%r13 + adcq $0,%r13 + + mulq %rbx + addq %rax,%r14 + movq 56(%rbp),%rax + adcq $0,%rdx + addq %r14,%r13 + movq %rdx,%r14 + adcq $0,%r14 + + mulq %rbx + movq %rsi,%rbx + addq %rax,%r15 + movq 0(%rbp),%rax + adcq $0,%rdx + addq %r15,%r14 + movq %rdx,%r15 + adcq $0,%r15 + + decl %ecx + jne L$reduction_loop + + .byte 0xf3,0xc3 + + +.p2align 5 +__rsaz_512_subtract: + movq %r8,(%rdi) + movq %r9,8(%rdi) + movq %r10,16(%rdi) + movq %r11,24(%rdi) + movq %r12,32(%rdi) + movq %r13,40(%rdi) + movq %r14,48(%rdi) + movq %r15,56(%rdi) + + movq 0(%rbp),%r8 + movq 8(%rbp),%r9 + negq %r8 + notq %r9 + andq %rcx,%r8 + movq 16(%rbp),%r10 + andq %rcx,%r9 + notq %r10 + movq 24(%rbp),%r11 + andq %rcx,%r10 + notq %r11 + movq 32(%rbp),%r12 + andq %rcx,%r11 + notq %r12 + movq 40(%rbp),%r13 + andq %rcx,%r12 + notq %r13 + movq 48(%rbp),%r14 + andq %rcx,%r13 + notq %r14 + movq 56(%rbp),%r15 + andq %rcx,%r14 + notq %r15 + andq %rcx,%r15 + + addq (%rdi),%r8 + adcq 8(%rdi),%r9 + adcq 16(%rdi),%r10 + adcq 24(%rdi),%r11 + adcq 32(%rdi),%r12 + adcq 40(%rdi),%r13 + adcq 48(%rdi),%r14 + adcq 56(%rdi),%r15 + + movq %r8,(%rdi) + movq %r9,8(%rdi) + movq %r10,16(%rdi) + movq %r11,24(%rdi) + movq %r12,32(%rdi) + movq %r13,40(%rdi) + movq %r14,48(%rdi) + movq %r15,56(%rdi) + + .byte 0xf3,0xc3 + + +.p2align 5 +__rsaz_512_mul: + leaq 8(%rsp),%rdi + + movq (%rsi),%rax + mulq %rbx + movq %rax,(%rdi) + movq 8(%rsi),%rax + movq %rdx,%r8 + + mulq %rbx + addq %rax,%r8 + movq 16(%rsi),%rax + movq %rdx,%r9 + adcq $0,%r9 + + mulq %rbx + addq %rax,%r9 + movq 24(%rsi),%rax + movq %rdx,%r10 + adcq $0,%r10 + + mulq %rbx + addq %rax,%r10 + movq 32(%rsi),%rax + movq %rdx,%r11 + adcq $0,%r11 + + mulq %rbx + addq %rax,%r11 + movq 40(%rsi),%rax + movq %rdx,%r12 + adcq $0,%r12 + + mulq %rbx + addq %rax,%r12 + movq 48(%rsi),%rax + movq %rdx,%r13 + adcq $0,%r13 + + mulq %rbx + addq %rax,%r13 + movq 56(%rsi),%rax + movq %rdx,%r14 + adcq $0,%r14 + + mulq %rbx + addq %rax,%r14 + movq (%rsi),%rax + movq %rdx,%r15 + adcq $0,%r15 + + leaq 8(%rbp),%rbp + leaq 8(%rdi),%rdi + + movl $7,%ecx + jmp L$oop_mul + +.p2align 5 +L$oop_mul: + movq (%rbp),%rbx + mulq %rbx + addq %rax,%r8 + movq 8(%rsi),%rax + movq %r8,(%rdi) + movq %rdx,%r8 + adcq $0,%r8 + + mulq %rbx + addq %rax,%r9 + movq 16(%rsi),%rax + adcq $0,%rdx + addq %r9,%r8 + movq %rdx,%r9 + adcq $0,%r9 + + mulq %rbx + addq %rax,%r10 + movq 24(%rsi),%rax + adcq $0,%rdx + addq %r10,%r9 + movq %rdx,%r10 + adcq $0,%r10 + + mulq %rbx + addq %rax,%r11 + movq 32(%rsi),%rax + adcq $0,%rdx + addq %r11,%r10 + movq %rdx,%r11 + adcq $0,%r11 + + mulq %rbx + addq %rax,%r12 + movq 40(%rsi),%rax + adcq $0,%rdx + addq %r12,%r11 + movq %rdx,%r12 + adcq $0,%r12 + + mulq %rbx + addq %rax,%r13 + movq 48(%rsi),%rax + adcq $0,%rdx + addq %r13,%r12 + movq %rdx,%r13 + adcq $0,%r13 + + mulq %rbx + addq %rax,%r14 + movq 56(%rsi),%rax + adcq $0,%rdx + addq %r14,%r13 + movq %rdx,%r14 + leaq 8(%rbp),%rbp + adcq $0,%r14 + + mulq %rbx + addq %rax,%r15 + movq (%rsi),%rax + adcq $0,%rdx + addq %r15,%r14 + movq %rdx,%r15 + adcq $0,%r15 + + leaq 8(%rdi),%rdi + + decl %ecx + jnz L$oop_mul + + movq %r8,(%rdi) + movq %r9,8(%rdi) + movq %r10,16(%rdi) + movq %r11,24(%rdi) + movq %r12,32(%rdi) + movq %r13,40(%rdi) + movq %r14,48(%rdi) + movq %r15,56(%rdi) + + .byte 0xf3,0xc3 + +.globl _rsaz_512_scatter4 + +.p2align 4 +_rsaz_512_scatter4: + leaq (%rdi,%rdx,4),%rdi + movl $8,%r9d + jmp L$oop_scatter +.p2align 4 +L$oop_scatter: + movq (%rsi),%rax + leaq 8(%rsi),%rsi + movl %eax,(%rdi) + shrq $32,%rax + movl %eax,64(%rdi) + leaq 128(%rdi),%rdi + decl %r9d + jnz L$oop_scatter + .byte 0xf3,0xc3 + + +.globl _rsaz_512_gather4 + +.p2align 4 +_rsaz_512_gather4: + leaq (%rsi,%rdx,4),%rsi + movl $8,%r9d + jmp L$oop_gather +.p2align 4 +L$oop_gather: + movl (%rsi),%eax + movl 64(%rsi),%r8d + leaq 128(%rsi),%rsi + shlq $32,%r8 + orq %r8,%rax + movq %rax,(%rdi) + leaq 8(%rdi),%rdi + decl %r9d + jnz L$oop_gather + .byte 0xf3,0xc3 diff --git a/deps/openssl/asm_obsolete/x64-macosx-gas/bn/x86_64-gf2m.s b/deps/openssl/asm_obsolete/x64-macosx-gas/bn/x86_64-gf2m.s new file mode 100644 index 00000000000000..040c324c49a753 --- /dev/null +++ b/deps/openssl/asm_obsolete/x64-macosx-gas/bn/x86_64-gf2m.s @@ -0,0 +1,291 @@ +.text + + +.p2align 4 +_mul_1x1: + subq $128+8,%rsp + movq $-1,%r9 + leaq (%rax,%rax,1),%rsi + shrq $3,%r9 + leaq (,%rax,4),%rdi + andq %rax,%r9 + leaq (,%rax,8),%r12 + sarq $63,%rax + leaq (%r9,%r9,1),%r10 + sarq $63,%rsi + leaq (,%r9,4),%r11 + andq %rbp,%rax + sarq $63,%rdi + movq %rax,%rdx + shlq $63,%rax + andq %rbp,%rsi + shrq $1,%rdx + movq %rsi,%rcx + shlq $62,%rsi + andq %rbp,%rdi + shrq $2,%rcx + xorq %rsi,%rax + movq %rdi,%rbx + shlq $61,%rdi + xorq %rcx,%rdx + shrq $3,%rbx + xorq %rdi,%rax + xorq %rbx,%rdx + + movq %r9,%r13 + movq $0,0(%rsp) + xorq %r10,%r13 + movq %r9,8(%rsp) + movq %r11,%r14 + movq %r10,16(%rsp) + xorq %r12,%r14 + movq %r13,24(%rsp) + + xorq %r11,%r9 + movq %r11,32(%rsp) + xorq %r11,%r10 + movq %r9,40(%rsp) + xorq %r11,%r13 + movq %r10,48(%rsp) + xorq %r14,%r9 + movq %r13,56(%rsp) + xorq %r14,%r10 + + movq %r12,64(%rsp) + xorq %r14,%r13 + movq %r9,72(%rsp) + xorq %r11,%r9 + movq %r10,80(%rsp) + xorq %r11,%r10 + movq %r13,88(%rsp) + + xorq %r11,%r13 + movq %r14,96(%rsp) + movq %r8,%rsi + movq %r9,104(%rsp) + andq %rbp,%rsi + movq %r10,112(%rsp) + shrq $4,%rbp + movq %r13,120(%rsp) + movq %r8,%rdi + andq %rbp,%rdi + shrq $4,%rbp + + movq (%rsp,%rsi,8),%xmm0 + movq %r8,%rsi + andq %rbp,%rsi + shrq $4,%rbp + movq (%rsp,%rdi,8),%rcx + movq %r8,%rdi + movq %rcx,%rbx + shlq $4,%rcx + andq %rbp,%rdi + movq (%rsp,%rsi,8),%xmm1 + shrq $60,%rbx + xorq %rcx,%rax + pslldq $1,%xmm1 + movq %r8,%rsi + shrq $4,%rbp + xorq %rbx,%rdx + andq %rbp,%rsi + shrq $4,%rbp + pxor %xmm1,%xmm0 + movq (%rsp,%rdi,8),%rcx + movq %r8,%rdi + movq %rcx,%rbx + shlq $12,%rcx + andq %rbp,%rdi + movq (%rsp,%rsi,8),%xmm1 + shrq $52,%rbx + xorq %rcx,%rax + pslldq $2,%xmm1 + movq %r8,%rsi + shrq $4,%rbp + xorq %rbx,%rdx + andq %rbp,%rsi + shrq $4,%rbp + pxor %xmm1,%xmm0 + movq (%rsp,%rdi,8),%rcx + movq %r8,%rdi + movq %rcx,%rbx + shlq $20,%rcx + andq %rbp,%rdi + movq (%rsp,%rsi,8),%xmm1 + shrq $44,%rbx + xorq %rcx,%rax + pslldq $3,%xmm1 + movq %r8,%rsi + shrq $4,%rbp + xorq %rbx,%rdx + andq %rbp,%rsi + shrq $4,%rbp + pxor %xmm1,%xmm0 + movq (%rsp,%rdi,8),%rcx + movq %r8,%rdi + movq %rcx,%rbx + shlq $28,%rcx + andq %rbp,%rdi + movq (%rsp,%rsi,8),%xmm1 + shrq $36,%rbx + xorq %rcx,%rax + pslldq $4,%xmm1 + movq %r8,%rsi + shrq $4,%rbp + xorq %rbx,%rdx + andq %rbp,%rsi + shrq $4,%rbp + pxor %xmm1,%xmm0 + movq (%rsp,%rdi,8),%rcx + movq %r8,%rdi + movq %rcx,%rbx + shlq $36,%rcx + andq %rbp,%rdi + movq (%rsp,%rsi,8),%xmm1 + shrq $28,%rbx + xorq %rcx,%rax + pslldq $5,%xmm1 + movq %r8,%rsi + shrq $4,%rbp + xorq %rbx,%rdx + andq %rbp,%rsi + shrq $4,%rbp + pxor %xmm1,%xmm0 + movq (%rsp,%rdi,8),%rcx + movq %r8,%rdi + movq %rcx,%rbx + shlq $44,%rcx + andq %rbp,%rdi + movq (%rsp,%rsi,8),%xmm1 + shrq $20,%rbx + xorq %rcx,%rax + pslldq $6,%xmm1 + movq %r8,%rsi + shrq $4,%rbp + xorq %rbx,%rdx + andq %rbp,%rsi + shrq $4,%rbp + pxor %xmm1,%xmm0 + movq (%rsp,%rdi,8),%rcx + movq %r8,%rdi + movq %rcx,%rbx + shlq $52,%rcx + andq %rbp,%rdi + movq (%rsp,%rsi,8),%xmm1 + shrq $12,%rbx + xorq %rcx,%rax + pslldq $7,%xmm1 + movq %r8,%rsi + shrq $4,%rbp + xorq %rbx,%rdx + andq %rbp,%rsi + shrq $4,%rbp + pxor %xmm1,%xmm0 + movq (%rsp,%rdi,8),%rcx + movq %rcx,%rbx + shlq $60,%rcx +.byte 102,72,15,126,198 + shrq $4,%rbx + xorq %rcx,%rax + psrldq $8,%xmm0 + xorq %rbx,%rdx +.byte 102,72,15,126,199 + xorq %rsi,%rax + xorq %rdi,%rdx + + addq $128+8,%rsp + .byte 0xf3,0xc3 +L$end_mul_1x1: + + +.globl _bn_GF2m_mul_2x2 + +.p2align 4 +_bn_GF2m_mul_2x2: + movq _OPENSSL_ia32cap_P(%rip),%rax + btq $33,%rax + jnc L$vanilla_mul_2x2 + +.byte 102,72,15,110,198 +.byte 102,72,15,110,201 +.byte 102,72,15,110,210 +.byte 102,73,15,110,216 + movdqa %xmm0,%xmm4 + movdqa %xmm1,%xmm5 +.byte 102,15,58,68,193,0 + pxor %xmm2,%xmm4 + pxor %xmm3,%xmm5 +.byte 102,15,58,68,211,0 +.byte 102,15,58,68,229,0 + xorps %xmm0,%xmm4 + xorps %xmm2,%xmm4 + movdqa %xmm4,%xmm5 + pslldq $8,%xmm4 + psrldq $8,%xmm5 + pxor %xmm4,%xmm2 + pxor %xmm5,%xmm0 + movdqu %xmm2,0(%rdi) + movdqu %xmm0,16(%rdi) + .byte 0xf3,0xc3 + +.p2align 4 +L$vanilla_mul_2x2: + leaq -136(%rsp),%rsp + movq %r14,80(%rsp) + movq %r13,88(%rsp) + movq %r12,96(%rsp) + movq %rbp,104(%rsp) + movq %rbx,112(%rsp) +L$body_mul_2x2: + movq %rdi,32(%rsp) + movq %rsi,40(%rsp) + movq %rdx,48(%rsp) + movq %rcx,56(%rsp) + movq %r8,64(%rsp) + + movq $15,%r8 + movq %rsi,%rax + movq %rcx,%rbp + call _mul_1x1 + movq %rax,16(%rsp) + movq %rdx,24(%rsp) + + movq 48(%rsp),%rax + movq 64(%rsp),%rbp + call _mul_1x1 + movq %rax,0(%rsp) + movq %rdx,8(%rsp) + + movq 40(%rsp),%rax + movq 56(%rsp),%rbp + xorq 48(%rsp),%rax + xorq 64(%rsp),%rbp + call _mul_1x1 + movq 0(%rsp),%rbx + movq 8(%rsp),%rcx + movq 16(%rsp),%rdi + movq 24(%rsp),%rsi + movq 32(%rsp),%rbp + + xorq %rdx,%rax + xorq %rcx,%rdx + xorq %rbx,%rax + movq %rbx,0(%rbp) + xorq %rdi,%rdx + movq %rsi,24(%rbp) + xorq %rsi,%rax + xorq %rsi,%rdx + xorq %rdx,%rax + movq %rdx,16(%rbp) + movq %rax,8(%rbp) + + movq 80(%rsp),%r14 + movq 88(%rsp),%r13 + movq 96(%rsp),%r12 + movq 104(%rsp),%rbp + movq 112(%rsp),%rbx + leaq 136(%rsp),%rsp + .byte 0xf3,0xc3 +L$end_mul_2x2: + +.byte 71,70,40,50,94,109,41,32,77,117,108,116,105,112,108,105,99,97,116,105,111,110,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 +.p2align 4 diff --git a/deps/openssl/asm_obsolete/x64-macosx-gas/bn/x86_64-mont.s b/deps/openssl/asm_obsolete/x64-macosx-gas/bn/x86_64-mont.s new file mode 100644 index 00000000000000..2ed1c0ff4233de --- /dev/null +++ b/deps/openssl/asm_obsolete/x64-macosx-gas/bn/x86_64-mont.s @@ -0,0 +1,724 @@ +.text + + + +.globl _bn_mul_mont + +.p2align 4 +_bn_mul_mont: + testl $3,%r9d + jnz L$mul_enter + cmpl $8,%r9d + jb L$mul_enter + cmpq %rsi,%rdx + jne L$mul4x_enter + testl $7,%r9d + jz L$sqr8x_enter + jmp L$mul4x_enter + +.p2align 4 +L$mul_enter: + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + + movl %r9d,%r9d + leaq 2(%r9),%r10 + movq %rsp,%r11 + negq %r10 + leaq (%rsp,%r10,8),%rsp + andq $-1024,%rsp + + movq %r11,8(%rsp,%r9,8) +L$mul_body: + movq %rdx,%r12 + movq (%r8),%r8 + movq (%r12),%rbx + movq (%rsi),%rax + + xorq %r14,%r14 + xorq %r15,%r15 + + movq %r8,%rbp + mulq %rbx + movq %rax,%r10 + movq (%rcx),%rax + + imulq %r10,%rbp + movq %rdx,%r11 + + mulq %rbp + addq %rax,%r10 + movq 8(%rsi),%rax + adcq $0,%rdx + movq %rdx,%r13 + + leaq 1(%r15),%r15 + jmp L$1st_enter + +.p2align 4 +L$1st: + addq %rax,%r13 + movq (%rsi,%r15,8),%rax + adcq $0,%rdx + addq %r11,%r13 + movq %r10,%r11 + adcq $0,%rdx + movq %r13,-16(%rsp,%r15,8) + movq %rdx,%r13 + +L$1st_enter: + mulq %rbx + addq %rax,%r11 + movq (%rcx,%r15,8),%rax + adcq $0,%rdx + leaq 1(%r15),%r15 + movq %rdx,%r10 + + mulq %rbp + cmpq %r9,%r15 + jne L$1st + + addq %rax,%r13 + movq (%rsi),%rax + adcq $0,%rdx + addq %r11,%r13 + adcq $0,%rdx + movq %r13,-16(%rsp,%r15,8) + movq %rdx,%r13 + movq %r10,%r11 + + xorq %rdx,%rdx + addq %r11,%r13 + adcq $0,%rdx + movq %r13,-8(%rsp,%r9,8) + movq %rdx,(%rsp,%r9,8) + + leaq 1(%r14),%r14 + jmp L$outer +.p2align 4 +L$outer: + movq (%r12,%r14,8),%rbx + xorq %r15,%r15 + movq %r8,%rbp + movq (%rsp),%r10 + mulq %rbx + addq %rax,%r10 + movq (%rcx),%rax + adcq $0,%rdx + + imulq %r10,%rbp + movq %rdx,%r11 + + mulq %rbp + addq %rax,%r10 + movq 8(%rsi),%rax + adcq $0,%rdx + movq 8(%rsp),%r10 + movq %rdx,%r13 + + leaq 1(%r15),%r15 + jmp L$inner_enter + +.p2align 4 +L$inner: + addq %rax,%r13 + movq (%rsi,%r15,8),%rax + adcq $0,%rdx + addq %r10,%r13 + movq (%rsp,%r15,8),%r10 + adcq $0,%rdx + movq %r13,-16(%rsp,%r15,8) + movq %rdx,%r13 + +L$inner_enter: + mulq %rbx + addq %rax,%r11 + movq (%rcx,%r15,8),%rax + adcq $0,%rdx + addq %r11,%r10 + movq %rdx,%r11 + adcq $0,%r11 + leaq 1(%r15),%r15 + + mulq %rbp + cmpq %r9,%r15 + jne L$inner + + addq %rax,%r13 + movq (%rsi),%rax + adcq $0,%rdx + addq %r10,%r13 + movq (%rsp,%r15,8),%r10 + adcq $0,%rdx + movq %r13,-16(%rsp,%r15,8) + movq %rdx,%r13 + + xorq %rdx,%rdx + addq %r11,%r13 + adcq $0,%rdx + addq %r10,%r13 + adcq $0,%rdx + movq %r13,-8(%rsp,%r9,8) + movq %rdx,(%rsp,%r9,8) + + leaq 1(%r14),%r14 + cmpq %r9,%r14 + jb L$outer + + xorq %r14,%r14 + movq (%rsp),%rax + leaq (%rsp),%rsi + movq %r9,%r15 + jmp L$sub +.p2align 4 +L$sub: sbbq (%rcx,%r14,8),%rax + movq %rax,(%rdi,%r14,8) + movq 8(%rsi,%r14,8),%rax + leaq 1(%r14),%r14 + decq %r15 + jnz L$sub + + sbbq $0,%rax + xorq %r14,%r14 + andq %rax,%rsi + notq %rax + movq %rdi,%rcx + andq %rax,%rcx + movq %r9,%r15 + orq %rcx,%rsi +.p2align 4 +L$copy: + movq (%rsi,%r14,8),%rax + movq %r14,(%rsp,%r14,8) + movq %rax,(%rdi,%r14,8) + leaq 1(%r14),%r14 + subq $1,%r15 + jnz L$copy + + movq 8(%rsp,%r9,8),%rsi + movq $1,%rax + movq (%rsi),%r15 + movq 8(%rsi),%r14 + movq 16(%rsi),%r13 + movq 24(%rsi),%r12 + movq 32(%rsi),%rbp + movq 40(%rsi),%rbx + leaq 48(%rsi),%rsp +L$mul_epilogue: + .byte 0xf3,0xc3 + + +.p2align 4 +bn_mul4x_mont: +L$mul4x_enter: + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + + movl %r9d,%r9d + leaq 4(%r9),%r10 + movq %rsp,%r11 + negq %r10 + leaq (%rsp,%r10,8),%rsp + andq $-1024,%rsp + + movq %r11,8(%rsp,%r9,8) +L$mul4x_body: + movq %rdi,16(%rsp,%r9,8) + movq %rdx,%r12 + movq (%r8),%r8 + movq (%r12),%rbx + movq (%rsi),%rax + + xorq %r14,%r14 + xorq %r15,%r15 + + movq %r8,%rbp + mulq %rbx + movq %rax,%r10 + movq (%rcx),%rax + + imulq %r10,%rbp + movq %rdx,%r11 + + mulq %rbp + addq %rax,%r10 + movq 8(%rsi),%rax + adcq $0,%rdx + movq %rdx,%rdi + + mulq %rbx + addq %rax,%r11 + movq 8(%rcx),%rax + adcq $0,%rdx + movq %rdx,%r10 + + mulq %rbp + addq %rax,%rdi + movq 16(%rsi),%rax + adcq $0,%rdx + addq %r11,%rdi + leaq 4(%r15),%r15 + adcq $0,%rdx + movq %rdi,(%rsp) + movq %rdx,%r13 + jmp L$1st4x +.p2align 4 +L$1st4x: + mulq %rbx + addq %rax,%r10 + movq -16(%rcx,%r15,8),%rax + adcq $0,%rdx + movq %rdx,%r11 + + mulq %rbp + addq %rax,%r13 + movq -8(%rsi,%r15,8),%rax + adcq $0,%rdx + addq %r10,%r13 + adcq $0,%rdx + movq %r13,-24(%rsp,%r15,8) + movq %rdx,%rdi + + mulq %rbx + addq %rax,%r11 + movq -8(%rcx,%r15,8),%rax + adcq $0,%rdx + movq %rdx,%r10 + + mulq %rbp + addq %rax,%rdi + movq (%rsi,%r15,8),%rax + adcq $0,%rdx + addq %r11,%rdi + adcq $0,%rdx + movq %rdi,-16(%rsp,%r15,8) + movq %rdx,%r13 + + mulq %rbx + addq %rax,%r10 + movq (%rcx,%r15,8),%rax + adcq $0,%rdx + movq %rdx,%r11 + + mulq %rbp + addq %rax,%r13 + movq 8(%rsi,%r15,8),%rax + adcq $0,%rdx + addq %r10,%r13 + adcq $0,%rdx + movq %r13,-8(%rsp,%r15,8) + movq %rdx,%rdi + + mulq %rbx + addq %rax,%r11 + movq 8(%rcx,%r15,8),%rax + adcq $0,%rdx + leaq 4(%r15),%r15 + movq %rdx,%r10 + + mulq %rbp + addq %rax,%rdi + movq -16(%rsi,%r15,8),%rax + adcq $0,%rdx + addq %r11,%rdi + adcq $0,%rdx + movq %rdi,-32(%rsp,%r15,8) + movq %rdx,%r13 + cmpq %r9,%r15 + jb L$1st4x + + mulq %rbx + addq %rax,%r10 + movq -16(%rcx,%r15,8),%rax + adcq $0,%rdx + movq %rdx,%r11 + + mulq %rbp + addq %rax,%r13 + movq -8(%rsi,%r15,8),%rax + adcq $0,%rdx + addq %r10,%r13 + adcq $0,%rdx + movq %r13,-24(%rsp,%r15,8) + movq %rdx,%rdi + + mulq %rbx + addq %rax,%r11 + movq -8(%rcx,%r15,8),%rax + adcq $0,%rdx + movq %rdx,%r10 + + mulq %rbp + addq %rax,%rdi + movq (%rsi),%rax + adcq $0,%rdx + addq %r11,%rdi + adcq $0,%rdx + movq %rdi,-16(%rsp,%r15,8) + movq %rdx,%r13 + + xorq %rdi,%rdi + addq %r10,%r13 + adcq $0,%rdi + movq %r13,-8(%rsp,%r15,8) + movq %rdi,(%rsp,%r15,8) + + leaq 1(%r14),%r14 +.p2align 2 +L$outer4x: + movq (%r12,%r14,8),%rbx + xorq %r15,%r15 + movq (%rsp),%r10 + movq %r8,%rbp + mulq %rbx + addq %rax,%r10 + movq (%rcx),%rax + adcq $0,%rdx + + imulq %r10,%rbp + movq %rdx,%r11 + + mulq %rbp + addq %rax,%r10 + movq 8(%rsi),%rax + adcq $0,%rdx + movq %rdx,%rdi + + mulq %rbx + addq %rax,%r11 + movq 8(%rcx),%rax + adcq $0,%rdx + addq 8(%rsp),%r11 + adcq $0,%rdx + movq %rdx,%r10 + + mulq %rbp + addq %rax,%rdi + movq 16(%rsi),%rax + adcq $0,%rdx + addq %r11,%rdi + leaq 4(%r15),%r15 + adcq $0,%rdx + movq %rdi,(%rsp) + movq %rdx,%r13 + jmp L$inner4x +.p2align 4 +L$inner4x: + mulq %rbx + addq %rax,%r10 + movq -16(%rcx,%r15,8),%rax + adcq $0,%rdx + addq -16(%rsp,%r15,8),%r10 + adcq $0,%rdx + movq %rdx,%r11 + + mulq %rbp + addq %rax,%r13 + movq -8(%rsi,%r15,8),%rax + adcq $0,%rdx + addq %r10,%r13 + adcq $0,%rdx + movq %r13,-24(%rsp,%r15,8) + movq %rdx,%rdi + + mulq %rbx + addq %rax,%r11 + movq -8(%rcx,%r15,8),%rax + adcq $0,%rdx + addq -8(%rsp,%r15,8),%r11 + adcq $0,%rdx + movq %rdx,%r10 + + mulq %rbp + addq %rax,%rdi + movq (%rsi,%r15,8),%rax + adcq $0,%rdx + addq %r11,%rdi + adcq $0,%rdx + movq %rdi,-16(%rsp,%r15,8) + movq %rdx,%r13 + + mulq %rbx + addq %rax,%r10 + movq (%rcx,%r15,8),%rax + adcq $0,%rdx + addq (%rsp,%r15,8),%r10 + adcq $0,%rdx + movq %rdx,%r11 + + mulq %rbp + addq %rax,%r13 + movq 8(%rsi,%r15,8),%rax + adcq $0,%rdx + addq %r10,%r13 + adcq $0,%rdx + movq %r13,-8(%rsp,%r15,8) + movq %rdx,%rdi + + mulq %rbx + addq %rax,%r11 + movq 8(%rcx,%r15,8),%rax + adcq $0,%rdx + addq 8(%rsp,%r15,8),%r11 + adcq $0,%rdx + leaq 4(%r15),%r15 + movq %rdx,%r10 + + mulq %rbp + addq %rax,%rdi + movq -16(%rsi,%r15,8),%rax + adcq $0,%rdx + addq %r11,%rdi + adcq $0,%rdx + movq %rdi,-32(%rsp,%r15,8) + movq %rdx,%r13 + cmpq %r9,%r15 + jb L$inner4x + + mulq %rbx + addq %rax,%r10 + movq -16(%rcx,%r15,8),%rax + adcq $0,%rdx + addq -16(%rsp,%r15,8),%r10 + adcq $0,%rdx + movq %rdx,%r11 + + mulq %rbp + addq %rax,%r13 + movq -8(%rsi,%r15,8),%rax + adcq $0,%rdx + addq %r10,%r13 + adcq $0,%rdx + movq %r13,-24(%rsp,%r15,8) + movq %rdx,%rdi + + mulq %rbx + addq %rax,%r11 + movq -8(%rcx,%r15,8),%rax + adcq $0,%rdx + addq -8(%rsp,%r15,8),%r11 + adcq $0,%rdx + leaq 1(%r14),%r14 + movq %rdx,%r10 + + mulq %rbp + addq %rax,%rdi + movq (%rsi),%rax + adcq $0,%rdx + addq %r11,%rdi + adcq $0,%rdx + movq %rdi,-16(%rsp,%r15,8) + movq %rdx,%r13 + + xorq %rdi,%rdi + addq %r10,%r13 + adcq $0,%rdi + addq (%rsp,%r9,8),%r13 + adcq $0,%rdi + movq %r13,-8(%rsp,%r15,8) + movq %rdi,(%rsp,%r15,8) + + cmpq %r9,%r14 + jb L$outer4x + movq 16(%rsp,%r9,8),%rdi + movq 0(%rsp),%rax + pxor %xmm0,%xmm0 + movq 8(%rsp),%rdx + shrq $2,%r9 + leaq (%rsp),%rsi + xorq %r14,%r14 + + subq 0(%rcx),%rax + movq 16(%rsi),%rbx + movq 24(%rsi),%rbp + sbbq 8(%rcx),%rdx + leaq -1(%r9),%r15 + jmp L$sub4x +.p2align 4 +L$sub4x: + movq %rax,0(%rdi,%r14,8) + movq %rdx,8(%rdi,%r14,8) + sbbq 16(%rcx,%r14,8),%rbx + movq 32(%rsi,%r14,8),%rax + movq 40(%rsi,%r14,8),%rdx + sbbq 24(%rcx,%r14,8),%rbp + movq %rbx,16(%rdi,%r14,8) + movq %rbp,24(%rdi,%r14,8) + sbbq 32(%rcx,%r14,8),%rax + movq 48(%rsi,%r14,8),%rbx + movq 56(%rsi,%r14,8),%rbp + sbbq 40(%rcx,%r14,8),%rdx + leaq 4(%r14),%r14 + decq %r15 + jnz L$sub4x + + movq %rax,0(%rdi,%r14,8) + movq 32(%rsi,%r14,8),%rax + sbbq 16(%rcx,%r14,8),%rbx + movq %rdx,8(%rdi,%r14,8) + sbbq 24(%rcx,%r14,8),%rbp + movq %rbx,16(%rdi,%r14,8) + + sbbq $0,%rax + movq %rbp,24(%rdi,%r14,8) + xorq %r14,%r14 + andq %rax,%rsi + notq %rax + movq %rdi,%rcx + andq %rax,%rcx + leaq -1(%r9),%r15 + orq %rcx,%rsi + + movdqu (%rsi),%xmm1 + movdqa %xmm0,(%rsp) + movdqu %xmm1,(%rdi) + jmp L$copy4x +.p2align 4 +L$copy4x: + movdqu 16(%rsi,%r14,1),%xmm2 + movdqu 32(%rsi,%r14,1),%xmm1 + movdqa %xmm0,16(%rsp,%r14,1) + movdqu %xmm2,16(%rdi,%r14,1) + movdqa %xmm0,32(%rsp,%r14,1) + movdqu %xmm1,32(%rdi,%r14,1) + leaq 32(%r14),%r14 + decq %r15 + jnz L$copy4x + + shlq $2,%r9 + movdqu 16(%rsi,%r14,1),%xmm2 + movdqa %xmm0,16(%rsp,%r14,1) + movdqu %xmm2,16(%rdi,%r14,1) + movq 8(%rsp,%r9,8),%rsi + movq $1,%rax + movq (%rsi),%r15 + movq 8(%rsi),%r14 + movq 16(%rsi),%r13 + movq 24(%rsi),%r12 + movq 32(%rsi),%rbp + movq 40(%rsi),%rbx + leaq 48(%rsi),%rsp +L$mul4x_epilogue: + .byte 0xf3,0xc3 + + + + +.p2align 5 +bn_sqr8x_mont: +L$sqr8x_enter: + movq %rsp,%rax + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + + movl %r9d,%r10d + shll $3,%r9d + shlq $3+2,%r10 + negq %r9 + + + + + + + leaq -64(%rsp,%r9,4),%r11 + movq (%r8),%r8 + subq %rsi,%r11 + andq $4095,%r11 + cmpq %r11,%r10 + jb L$sqr8x_sp_alt + subq %r11,%rsp + leaq -64(%rsp,%r9,4),%rsp + jmp L$sqr8x_sp_done + +.p2align 5 +L$sqr8x_sp_alt: + leaq 4096-64(,%r9,4),%r10 + leaq -64(%rsp,%r9,4),%rsp + subq %r10,%r11 + movq $0,%r10 + cmovcq %r10,%r11 + subq %r11,%rsp +L$sqr8x_sp_done: + andq $-64,%rsp + movq %r9,%r10 + negq %r9 + + leaq 64(%rsp,%r9,2),%r11 + movq %r8,32(%rsp) + movq %rax,40(%rsp) +L$sqr8x_body: + + movq %r9,%rbp +.byte 102,73,15,110,211 + shrq $3+2,%rbp + movl _OPENSSL_ia32cap_P+8(%rip),%eax + jmp L$sqr8x_copy_n + +.p2align 5 +L$sqr8x_copy_n: + movq 0(%rcx),%xmm0 + movq 8(%rcx),%xmm1 + movq 16(%rcx),%xmm3 + movq 24(%rcx),%xmm4 + leaq 32(%rcx),%rcx + movdqa %xmm0,0(%r11) + movdqa %xmm1,16(%r11) + movdqa %xmm3,32(%r11) + movdqa %xmm4,48(%r11) + leaq 64(%r11),%r11 + decq %rbp + jnz L$sqr8x_copy_n + + pxor %xmm0,%xmm0 +.byte 102,72,15,110,207 +.byte 102,73,15,110,218 + call _bn_sqr8x_internal + + pxor %xmm0,%xmm0 + leaq 48(%rsp),%rax + leaq 64(%rsp,%r9,2),%rdx + shrq $3+2,%r9 + movq 40(%rsp),%rsi + jmp L$sqr8x_zero + +.p2align 5 +L$sqr8x_zero: + movdqa %xmm0,0(%rax) + movdqa %xmm0,16(%rax) + movdqa %xmm0,32(%rax) + movdqa %xmm0,48(%rax) + leaq 64(%rax),%rax + movdqa %xmm0,0(%rdx) + movdqa %xmm0,16(%rdx) + movdqa %xmm0,32(%rdx) + movdqa %xmm0,48(%rdx) + leaq 64(%rdx),%rdx + decq %r9 + jnz L$sqr8x_zero + + movq $1,%rax + movq -48(%rsi),%r15 + movq -40(%rsi),%r14 + movq -32(%rsi),%r13 + movq -24(%rsi),%r12 + movq -16(%rsi),%rbp + movq -8(%rsi),%rbx + leaq (%rsi),%rsp +L$sqr8x_epilogue: + .byte 0xf3,0xc3 + +.byte 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105,112,108,105,99,97,116,105,111,110,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 +.p2align 4 diff --git a/deps/openssl/asm_obsolete/x64-macosx-gas/bn/x86_64-mont5.s b/deps/openssl/asm_obsolete/x64-macosx-gas/bn/x86_64-mont5.s new file mode 100644 index 00000000000000..ba4d62157cc5f4 --- /dev/null +++ b/deps/openssl/asm_obsolete/x64-macosx-gas/bn/x86_64-mont5.s @@ -0,0 +1,1829 @@ +.text + + + +.globl _bn_mul_mont_gather5 + +.p2align 6 +_bn_mul_mont_gather5: + testl $7,%r9d + jnz L$mul_enter + jmp L$mul4x_enter + +.p2align 4 +L$mul_enter: + movl %r9d,%r9d + movq %rsp,%rax + movl 8(%rsp),%r10d + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + leaq 2(%r9),%r11 + negq %r11 + leaq (%rsp,%r11,8),%rsp + andq $-1024,%rsp + + movq %rax,8(%rsp,%r9,8) +L$mul_body: + movq %rdx,%r12 + movq %r10,%r11 + shrq $3,%r10 + andq $7,%r11 + notq %r10 + leaq L$magic_masks(%rip),%rax + andq $3,%r10 + leaq 96(%r12,%r11,8),%r12 + movq 0(%rax,%r10,8),%xmm4 + movq 8(%rax,%r10,8),%xmm5 + movq 16(%rax,%r10,8),%xmm6 + movq 24(%rax,%r10,8),%xmm7 + + movq -96(%r12),%xmm0 + movq -32(%r12),%xmm1 + pand %xmm4,%xmm0 + movq 32(%r12),%xmm2 + pand %xmm5,%xmm1 + movq 96(%r12),%xmm3 + pand %xmm6,%xmm2 + por %xmm1,%xmm0 + pand %xmm7,%xmm3 + por %xmm2,%xmm0 + leaq 256(%r12),%r12 + por %xmm3,%xmm0 + +.byte 102,72,15,126,195 + + movq (%r8),%r8 + movq (%rsi),%rax + + xorq %r14,%r14 + xorq %r15,%r15 + + movq -96(%r12),%xmm0 + movq -32(%r12),%xmm1 + pand %xmm4,%xmm0 + movq 32(%r12),%xmm2 + pand %xmm5,%xmm1 + + movq %r8,%rbp + mulq %rbx + movq %rax,%r10 + movq (%rcx),%rax + + movq 96(%r12),%xmm3 + pand %xmm6,%xmm2 + por %xmm1,%xmm0 + pand %xmm7,%xmm3 + + imulq %r10,%rbp + movq %rdx,%r11 + + por %xmm2,%xmm0 + leaq 256(%r12),%r12 + por %xmm3,%xmm0 + + mulq %rbp + addq %rax,%r10 + movq 8(%rsi),%rax + adcq $0,%rdx + movq %rdx,%r13 + + leaq 1(%r15),%r15 + jmp L$1st_enter + +.p2align 4 +L$1st: + addq %rax,%r13 + movq (%rsi,%r15,8),%rax + adcq $0,%rdx + addq %r11,%r13 + movq %r10,%r11 + adcq $0,%rdx + movq %r13,-16(%rsp,%r15,8) + movq %rdx,%r13 + +L$1st_enter: + mulq %rbx + addq %rax,%r11 + movq (%rcx,%r15,8),%rax + adcq $0,%rdx + leaq 1(%r15),%r15 + movq %rdx,%r10 + + mulq %rbp + cmpq %r9,%r15 + jne L$1st + +.byte 102,72,15,126,195 + + addq %rax,%r13 + movq (%rsi),%rax + adcq $0,%rdx + addq %r11,%r13 + adcq $0,%rdx + movq %r13,-16(%rsp,%r15,8) + movq %rdx,%r13 + movq %r10,%r11 + + xorq %rdx,%rdx + addq %r11,%r13 + adcq $0,%rdx + movq %r13,-8(%rsp,%r9,8) + movq %rdx,(%rsp,%r9,8) + + leaq 1(%r14),%r14 + jmp L$outer +.p2align 4 +L$outer: + xorq %r15,%r15 + movq %r8,%rbp + movq (%rsp),%r10 + + movq -96(%r12),%xmm0 + movq -32(%r12),%xmm1 + pand %xmm4,%xmm0 + movq 32(%r12),%xmm2 + pand %xmm5,%xmm1 + + mulq %rbx + addq %rax,%r10 + movq (%rcx),%rax + adcq $0,%rdx + + movq 96(%r12),%xmm3 + pand %xmm6,%xmm2 + por %xmm1,%xmm0 + pand %xmm7,%xmm3 + + imulq %r10,%rbp + movq %rdx,%r11 + + por %xmm2,%xmm0 + leaq 256(%r12),%r12 + por %xmm3,%xmm0 + + mulq %rbp + addq %rax,%r10 + movq 8(%rsi),%rax + adcq $0,%rdx + movq 8(%rsp),%r10 + movq %rdx,%r13 + + leaq 1(%r15),%r15 + jmp L$inner_enter + +.p2align 4 +L$inner: + addq %rax,%r13 + movq (%rsi,%r15,8),%rax + adcq $0,%rdx + addq %r10,%r13 + movq (%rsp,%r15,8),%r10 + adcq $0,%rdx + movq %r13,-16(%rsp,%r15,8) + movq %rdx,%r13 + +L$inner_enter: + mulq %rbx + addq %rax,%r11 + movq (%rcx,%r15,8),%rax + adcq $0,%rdx + addq %r11,%r10 + movq %rdx,%r11 + adcq $0,%r11 + leaq 1(%r15),%r15 + + mulq %rbp + cmpq %r9,%r15 + jne L$inner + +.byte 102,72,15,126,195 + + addq %rax,%r13 + movq (%rsi),%rax + adcq $0,%rdx + addq %r10,%r13 + movq (%rsp,%r15,8),%r10 + adcq $0,%rdx + movq %r13,-16(%rsp,%r15,8) + movq %rdx,%r13 + + xorq %rdx,%rdx + addq %r11,%r13 + adcq $0,%rdx + addq %r10,%r13 + adcq $0,%rdx + movq %r13,-8(%rsp,%r9,8) + movq %rdx,(%rsp,%r9,8) + + leaq 1(%r14),%r14 + cmpq %r9,%r14 + jb L$outer + + xorq %r14,%r14 + movq (%rsp),%rax + leaq (%rsp),%rsi + movq %r9,%r15 + jmp L$sub +.p2align 4 +L$sub: sbbq (%rcx,%r14,8),%rax + movq %rax,(%rdi,%r14,8) + movq 8(%rsi,%r14,8),%rax + leaq 1(%r14),%r14 + decq %r15 + jnz L$sub + + sbbq $0,%rax + xorq %r14,%r14 + andq %rax,%rsi + notq %rax + movq %rdi,%rcx + andq %rax,%rcx + movq %r9,%r15 + orq %rcx,%rsi +.p2align 4 +L$copy: + movq (%rsi,%r14,8),%rax + movq %r14,(%rsp,%r14,8) + movq %rax,(%rdi,%r14,8) + leaq 1(%r14),%r14 + subq $1,%r15 + jnz L$copy + + movq 8(%rsp,%r9,8),%rsi + movq $1,%rax + movq -48(%rsi),%r15 + movq -40(%rsi),%r14 + movq -32(%rsi),%r13 + movq -24(%rsi),%r12 + movq -16(%rsi),%rbp + movq -8(%rsi),%rbx + leaq (%rsi),%rsp +L$mul_epilogue: + .byte 0xf3,0xc3 + + +.p2align 5 +bn_mul4x_mont_gather5: +L$mul4x_enter: +.byte 0x67 + movq %rsp,%rax + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 +.byte 0x67 + movl %r9d,%r10d + shll $3,%r9d + shll $3+2,%r10d + negq %r9 + + + + + + + + + leaq -64(%rsp,%r9,2),%r11 + subq %rsi,%r11 + andq $4095,%r11 + cmpq %r11,%r10 + jb L$mul4xsp_alt + subq %r11,%rsp + leaq -64(%rsp,%r9,2),%rsp + jmp L$mul4xsp_done + +.p2align 5 +L$mul4xsp_alt: + leaq 4096-64(,%r9,2),%r10 + leaq -64(%rsp,%r9,2),%rsp + subq %r10,%r11 + movq $0,%r10 + cmovcq %r10,%r11 + subq %r11,%rsp +L$mul4xsp_done: + andq $-64,%rsp + negq %r9 + + movq %rax,40(%rsp) +L$mul4x_body: + + call mul4x_internal + + movq 40(%rsp),%rsi + movq $1,%rax + movq -48(%rsi),%r15 + movq -40(%rsi),%r14 + movq -32(%rsi),%r13 + movq -24(%rsi),%r12 + movq -16(%rsi),%rbp + movq -8(%rsi),%rbx + leaq (%rsi),%rsp +L$mul4x_epilogue: + .byte 0xf3,0xc3 + + + +.p2align 5 +mul4x_internal: + shlq $5,%r9 + movl 8(%rax),%r10d + leaq 256(%rdx,%r9,1),%r13 + shrq $5,%r9 + movq %r10,%r11 + shrq $3,%r10 + andq $7,%r11 + notq %r10 + leaq L$magic_masks(%rip),%rax + andq $3,%r10 + leaq 96(%rdx,%r11,8),%r12 + movq 0(%rax,%r10,8),%xmm4 + movq 8(%rax,%r10,8),%xmm5 + addq $7,%r11 + movq 16(%rax,%r10,8),%xmm6 + movq 24(%rax,%r10,8),%xmm7 + andq $7,%r11 + + movq -96(%r12),%xmm0 + leaq 256(%r12),%r14 + movq -32(%r12),%xmm1 + pand %xmm4,%xmm0 + movq 32(%r12),%xmm2 + pand %xmm5,%xmm1 + movq 96(%r12),%xmm3 + pand %xmm6,%xmm2 +.byte 0x67 + por %xmm1,%xmm0 + movq -96(%r14),%xmm1 +.byte 0x67 + pand %xmm7,%xmm3 +.byte 0x67 + por %xmm2,%xmm0 + movq -32(%r14),%xmm2 +.byte 0x67 + pand %xmm4,%xmm1 +.byte 0x67 + por %xmm3,%xmm0 + movq 32(%r14),%xmm3 + +.byte 102,72,15,126,195 + movq 96(%r14),%xmm0 + movq %r13,16+8(%rsp) + movq %rdi,56+8(%rsp) + + movq (%r8),%r8 + movq (%rsi),%rax + leaq (%rsi,%r9,1),%rsi + negq %r9 + + movq %r8,%rbp + mulq %rbx + movq %rax,%r10 + movq (%rcx),%rax + + pand %xmm5,%xmm2 + pand %xmm6,%xmm3 + por %xmm2,%xmm1 + + imulq %r10,%rbp + + + + + + + + leaq 64+8(%rsp,%r11,8),%r14 + movq %rdx,%r11 + + pand %xmm7,%xmm0 + por %xmm3,%xmm1 + leaq 512(%r12),%r12 + por %xmm1,%xmm0 + + mulq %rbp + addq %rax,%r10 + movq 8(%rsi,%r9,1),%rax + adcq $0,%rdx + movq %rdx,%rdi + + mulq %rbx + addq %rax,%r11 + movq 16(%rcx),%rax + adcq $0,%rdx + movq %rdx,%r10 + + mulq %rbp + addq %rax,%rdi + movq 16(%rsi,%r9,1),%rax + adcq $0,%rdx + addq %r11,%rdi + leaq 32(%r9),%r15 + leaq 64(%rcx),%rcx + adcq $0,%rdx + movq %rdi,(%r14) + movq %rdx,%r13 + jmp L$1st4x + +.p2align 5 +L$1st4x: + mulq %rbx + addq %rax,%r10 + movq -32(%rcx),%rax + leaq 32(%r14),%r14 + adcq $0,%rdx + movq %rdx,%r11 + + mulq %rbp + addq %rax,%r13 + movq -8(%rsi,%r15,1),%rax + adcq $0,%rdx + addq %r10,%r13 + adcq $0,%rdx + movq %r13,-24(%r14) + movq %rdx,%rdi + + mulq %rbx + addq %rax,%r11 + movq -16(%rcx),%rax + adcq $0,%rdx + movq %rdx,%r10 + + mulq %rbp + addq %rax,%rdi + movq (%rsi,%r15,1),%rax + adcq $0,%rdx + addq %r11,%rdi + adcq $0,%rdx + movq %rdi,-16(%r14) + movq %rdx,%r13 + + mulq %rbx + addq %rax,%r10 + movq 0(%rcx),%rax + adcq $0,%rdx + movq %rdx,%r11 + + mulq %rbp + addq %rax,%r13 + movq 8(%rsi,%r15,1),%rax + adcq $0,%rdx + addq %r10,%r13 + adcq $0,%rdx + movq %r13,-8(%r14) + movq %rdx,%rdi + + mulq %rbx + addq %rax,%r11 + movq 16(%rcx),%rax + adcq $0,%rdx + movq %rdx,%r10 + + mulq %rbp + addq %rax,%rdi + movq 16(%rsi,%r15,1),%rax + adcq $0,%rdx + addq %r11,%rdi + leaq 64(%rcx),%rcx + adcq $0,%rdx + movq %rdi,(%r14) + movq %rdx,%r13 + + addq $32,%r15 + jnz L$1st4x + + mulq %rbx + addq %rax,%r10 + movq -32(%rcx),%rax + leaq 32(%r14),%r14 + adcq $0,%rdx + movq %rdx,%r11 + + mulq %rbp + addq %rax,%r13 + movq -8(%rsi),%rax + adcq $0,%rdx + addq %r10,%r13 + adcq $0,%rdx + movq %r13,-24(%r14) + movq %rdx,%rdi + + mulq %rbx + addq %rax,%r11 + movq -16(%rcx),%rax + adcq $0,%rdx + movq %rdx,%r10 + + mulq %rbp + addq %rax,%rdi + movq (%rsi,%r9,1),%rax + adcq $0,%rdx + addq %r11,%rdi + adcq $0,%rdx + movq %rdi,-16(%r14) + movq %rdx,%r13 + +.byte 102,72,15,126,195 + leaq (%rcx,%r9,2),%rcx + + xorq %rdi,%rdi + addq %r10,%r13 + adcq $0,%rdi + movq %r13,-8(%r14) + + jmp L$outer4x + +.p2align 5 +L$outer4x: + movq (%r14,%r9,1),%r10 + movq %r8,%rbp + mulq %rbx + addq %rax,%r10 + movq (%rcx),%rax + adcq $0,%rdx + + movq -96(%r12),%xmm0 + movq -32(%r12),%xmm1 + pand %xmm4,%xmm0 + movq 32(%r12),%xmm2 + pand %xmm5,%xmm1 + movq 96(%r12),%xmm3 + + imulq %r10,%rbp +.byte 0x67 + movq %rdx,%r11 + movq %rdi,(%r14) + + pand %xmm6,%xmm2 + por %xmm1,%xmm0 + pand %xmm7,%xmm3 + por %xmm2,%xmm0 + leaq (%r14,%r9,1),%r14 + leaq 256(%r12),%r12 + por %xmm3,%xmm0 + + mulq %rbp + addq %rax,%r10 + movq 8(%rsi,%r9,1),%rax + adcq $0,%rdx + movq %rdx,%rdi + + mulq %rbx + addq %rax,%r11 + movq 16(%rcx),%rax + adcq $0,%rdx + addq 8(%r14),%r11 + adcq $0,%rdx + movq %rdx,%r10 + + mulq %rbp + addq %rax,%rdi + movq 16(%rsi,%r9,1),%rax + adcq $0,%rdx + addq %r11,%rdi + leaq 32(%r9),%r15 + leaq 64(%rcx),%rcx + adcq $0,%rdx + movq %rdx,%r13 + jmp L$inner4x + +.p2align 5 +L$inner4x: + mulq %rbx + addq %rax,%r10 + movq -32(%rcx),%rax + adcq $0,%rdx + addq 16(%r14),%r10 + leaq 32(%r14),%r14 + adcq $0,%rdx + movq %rdx,%r11 + + mulq %rbp + addq %rax,%r13 + movq -8(%rsi,%r15,1),%rax + adcq $0,%rdx + addq %r10,%r13 + adcq $0,%rdx + movq %rdi,-32(%r14) + movq %rdx,%rdi + + mulq %rbx + addq %rax,%r11 + movq -16(%rcx),%rax + adcq $0,%rdx + addq -8(%r14),%r11 + adcq $0,%rdx + movq %rdx,%r10 + + mulq %rbp + addq %rax,%rdi + movq (%rsi,%r15,1),%rax + adcq $0,%rdx + addq %r11,%rdi + adcq $0,%rdx + movq %r13,-24(%r14) + movq %rdx,%r13 + + mulq %rbx + addq %rax,%r10 + movq 0(%rcx),%rax + adcq $0,%rdx + addq (%r14),%r10 + adcq $0,%rdx + movq %rdx,%r11 + + mulq %rbp + addq %rax,%r13 + movq 8(%rsi,%r15,1),%rax + adcq $0,%rdx + addq %r10,%r13 + adcq $0,%rdx + movq %rdi,-16(%r14) + movq %rdx,%rdi + + mulq %rbx + addq %rax,%r11 + movq 16(%rcx),%rax + adcq $0,%rdx + addq 8(%r14),%r11 + adcq $0,%rdx + movq %rdx,%r10 + + mulq %rbp + addq %rax,%rdi + movq 16(%rsi,%r15,1),%rax + adcq $0,%rdx + addq %r11,%rdi + leaq 64(%rcx),%rcx + adcq $0,%rdx + movq %r13,-8(%r14) + movq %rdx,%r13 + + addq $32,%r15 + jnz L$inner4x + + mulq %rbx + addq %rax,%r10 + movq -32(%rcx),%rax + adcq $0,%rdx + addq 16(%r14),%r10 + leaq 32(%r14),%r14 + adcq $0,%rdx + movq %rdx,%r11 + + mulq %rbp + addq %rax,%r13 + movq -8(%rsi),%rax + adcq $0,%rdx + addq %r10,%r13 + adcq $0,%rdx + movq %rdi,-32(%r14) + movq %rdx,%rdi + + mulq %rbx + addq %rax,%r11 + movq %rbp,%rax + movq -16(%rcx),%rbp + adcq $0,%rdx + addq -8(%r14),%r11 + adcq $0,%rdx + movq %rdx,%r10 + + mulq %rbp + addq %rax,%rdi + movq (%rsi,%r9,1),%rax + adcq $0,%rdx + addq %r11,%rdi + adcq $0,%rdx + movq %r13,-24(%r14) + movq %rdx,%r13 + +.byte 102,72,15,126,195 + movq %rdi,-16(%r14) + leaq (%rcx,%r9,2),%rcx + + xorq %rdi,%rdi + addq %r10,%r13 + adcq $0,%rdi + addq (%r14),%r13 + adcq $0,%rdi + movq %r13,-8(%r14) + + cmpq 16+8(%rsp),%r12 + jb L$outer4x + subq %r13,%rbp + adcq %r15,%r15 + orq %r15,%rdi + xorq $1,%rdi + leaq (%r14,%r9,1),%rbx + leaq (%rcx,%rdi,8),%rbp + movq %r9,%rcx + sarq $3+2,%rcx + movq 56+8(%rsp),%rdi + jmp L$sqr4x_sub + +.globl _bn_power5 + +.p2align 5 +_bn_power5: + movq %rsp,%rax + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + movl %r9d,%r10d + shll $3,%r9d + shll $3+2,%r10d + negq %r9 + movq (%r8),%r8 + + + + + + + + leaq -64(%rsp,%r9,2),%r11 + subq %rsi,%r11 + andq $4095,%r11 + cmpq %r11,%r10 + jb L$pwr_sp_alt + subq %r11,%rsp + leaq -64(%rsp,%r9,2),%rsp + jmp L$pwr_sp_done + +.p2align 5 +L$pwr_sp_alt: + leaq 4096-64(,%r9,2),%r10 + leaq -64(%rsp,%r9,2),%rsp + subq %r10,%r11 + movq $0,%r10 + cmovcq %r10,%r11 + subq %r11,%rsp +L$pwr_sp_done: + andq $-64,%rsp + movq %r9,%r10 + negq %r9 + + + + + + + + + + + movq %r8,32(%rsp) + movq %rax,40(%rsp) +L$power5_body: +.byte 102,72,15,110,207 +.byte 102,72,15,110,209 +.byte 102,73,15,110,218 +.byte 102,72,15,110,226 + + call __bn_sqr8x_internal + call __bn_sqr8x_internal + call __bn_sqr8x_internal + call __bn_sqr8x_internal + call __bn_sqr8x_internal + +.byte 102,72,15,126,209 +.byte 102,72,15,126,226 + movq %rsi,%rdi + movq 40(%rsp),%rax + leaq 32(%rsp),%r8 + + call mul4x_internal + + movq 40(%rsp),%rsi + movq $1,%rax + movq -48(%rsi),%r15 + movq -40(%rsi),%r14 + movq -32(%rsi),%r13 + movq -24(%rsi),%r12 + movq -16(%rsi),%rbp + movq -8(%rsi),%rbx + leaq (%rsi),%rsp +L$power5_epilogue: + .byte 0xf3,0xc3 + + +.globl _bn_sqr8x_internal +.private_extern _bn_sqr8x_internal + +.p2align 5 +_bn_sqr8x_internal: +__bn_sqr8x_internal: + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + leaq 32(%r10),%rbp + leaq (%rsi,%r9,1),%rsi + + movq %r9,%rcx + + + movq -32(%rsi,%rbp,1),%r14 + leaq 48+8(%rsp,%r9,2),%rdi + movq -24(%rsi,%rbp,1),%rax + leaq -32(%rdi,%rbp,1),%rdi + movq -16(%rsi,%rbp,1),%rbx + movq %rax,%r15 + + mulq %r14 + movq %rax,%r10 + movq %rbx,%rax + movq %rdx,%r11 + movq %r10,-24(%rdi,%rbp,1) + + mulq %r14 + addq %rax,%r11 + movq %rbx,%rax + adcq $0,%rdx + movq %r11,-16(%rdi,%rbp,1) + movq %rdx,%r10 + + + movq -8(%rsi,%rbp,1),%rbx + mulq %r15 + movq %rax,%r12 + movq %rbx,%rax + movq %rdx,%r13 + + leaq (%rbp),%rcx + mulq %r14 + addq %rax,%r10 + movq %rbx,%rax + movq %rdx,%r11 + adcq $0,%r11 + addq %r12,%r10 + adcq $0,%r11 + movq %r10,-8(%rdi,%rcx,1) + jmp L$sqr4x_1st + +.p2align 5 +L$sqr4x_1st: + movq (%rsi,%rcx,1),%rbx + mulq %r15 + addq %rax,%r13 + movq %rbx,%rax + movq %rdx,%r12 + adcq $0,%r12 + + mulq %r14 + addq %rax,%r11 + movq %rbx,%rax + movq 8(%rsi,%rcx,1),%rbx + movq %rdx,%r10 + adcq $0,%r10 + addq %r13,%r11 + adcq $0,%r10 + + + mulq %r15 + addq %rax,%r12 + movq %rbx,%rax + movq %r11,(%rdi,%rcx,1) + movq %rdx,%r13 + adcq $0,%r13 + + mulq %r14 + addq %rax,%r10 + movq %rbx,%rax + movq 16(%rsi,%rcx,1),%rbx + movq %rdx,%r11 + adcq $0,%r11 + addq %r12,%r10 + adcq $0,%r11 + + mulq %r15 + addq %rax,%r13 + movq %rbx,%rax + movq %r10,8(%rdi,%rcx,1) + movq %rdx,%r12 + adcq $0,%r12 + + mulq %r14 + addq %rax,%r11 + movq %rbx,%rax + movq 24(%rsi,%rcx,1),%rbx + movq %rdx,%r10 + adcq $0,%r10 + addq %r13,%r11 + adcq $0,%r10 + + + mulq %r15 + addq %rax,%r12 + movq %rbx,%rax + movq %r11,16(%rdi,%rcx,1) + movq %rdx,%r13 + adcq $0,%r13 + leaq 32(%rcx),%rcx + + mulq %r14 + addq %rax,%r10 + movq %rbx,%rax + movq %rdx,%r11 + adcq $0,%r11 + addq %r12,%r10 + adcq $0,%r11 + movq %r10,-8(%rdi,%rcx,1) + + cmpq $0,%rcx + jne L$sqr4x_1st + + mulq %r15 + addq %rax,%r13 + leaq 16(%rbp),%rbp + adcq $0,%rdx + addq %r11,%r13 + adcq $0,%rdx + + movq %r13,(%rdi) + movq %rdx,%r12 + movq %rdx,8(%rdi) + jmp L$sqr4x_outer + +.p2align 5 +L$sqr4x_outer: + movq -32(%rsi,%rbp,1),%r14 + leaq 48+8(%rsp,%r9,2),%rdi + movq -24(%rsi,%rbp,1),%rax + leaq -32(%rdi,%rbp,1),%rdi + movq -16(%rsi,%rbp,1),%rbx + movq %rax,%r15 + + mulq %r14 + movq -24(%rdi,%rbp,1),%r10 + addq %rax,%r10 + movq %rbx,%rax + adcq $0,%rdx + movq %r10,-24(%rdi,%rbp,1) + movq %rdx,%r11 + + mulq %r14 + addq %rax,%r11 + movq %rbx,%rax + adcq $0,%rdx + addq -16(%rdi,%rbp,1),%r11 + movq %rdx,%r10 + adcq $0,%r10 + movq %r11,-16(%rdi,%rbp,1) + + xorq %r12,%r12 + + movq -8(%rsi,%rbp,1),%rbx + mulq %r15 + addq %rax,%r12 + movq %rbx,%rax + adcq $0,%rdx + addq -8(%rdi,%rbp,1),%r12 + movq %rdx,%r13 + adcq $0,%r13 + + mulq %r14 + addq %rax,%r10 + movq %rbx,%rax + adcq $0,%rdx + addq %r12,%r10 + movq %rdx,%r11 + adcq $0,%r11 + movq %r10,-8(%rdi,%rbp,1) + + leaq (%rbp),%rcx + jmp L$sqr4x_inner + +.p2align 5 +L$sqr4x_inner: + movq (%rsi,%rcx,1),%rbx + mulq %r15 + addq %rax,%r13 + movq %rbx,%rax + movq %rdx,%r12 + adcq $0,%r12 + addq (%rdi,%rcx,1),%r13 + adcq $0,%r12 + +.byte 0x67 + mulq %r14 + addq %rax,%r11 + movq %rbx,%rax + movq 8(%rsi,%rcx,1),%rbx + movq %rdx,%r10 + adcq $0,%r10 + addq %r13,%r11 + adcq $0,%r10 + + mulq %r15 + addq %rax,%r12 + movq %r11,(%rdi,%rcx,1) + movq %rbx,%rax + movq %rdx,%r13 + adcq $0,%r13 + addq 8(%rdi,%rcx,1),%r12 + leaq 16(%rcx),%rcx + adcq $0,%r13 + + mulq %r14 + addq %rax,%r10 + movq %rbx,%rax + adcq $0,%rdx + addq %r12,%r10 + movq %rdx,%r11 + adcq $0,%r11 + movq %r10,-8(%rdi,%rcx,1) + + cmpq $0,%rcx + jne L$sqr4x_inner + +.byte 0x67 + mulq %r15 + addq %rax,%r13 + adcq $0,%rdx + addq %r11,%r13 + adcq $0,%rdx + + movq %r13,(%rdi) + movq %rdx,%r12 + movq %rdx,8(%rdi) + + addq $16,%rbp + jnz L$sqr4x_outer + + + movq -32(%rsi),%r14 + leaq 48+8(%rsp,%r9,2),%rdi + movq -24(%rsi),%rax + leaq -32(%rdi,%rbp,1),%rdi + movq -16(%rsi),%rbx + movq %rax,%r15 + + mulq %r14 + addq %rax,%r10 + movq %rbx,%rax + movq %rdx,%r11 + adcq $0,%r11 + + mulq %r14 + addq %rax,%r11 + movq %rbx,%rax + movq %r10,-24(%rdi) + movq %rdx,%r10 + adcq $0,%r10 + addq %r13,%r11 + movq -8(%rsi),%rbx + adcq $0,%r10 + + mulq %r15 + addq %rax,%r12 + movq %rbx,%rax + movq %r11,-16(%rdi) + movq %rdx,%r13 + adcq $0,%r13 + + mulq %r14 + addq %rax,%r10 + movq %rbx,%rax + movq %rdx,%r11 + adcq $0,%r11 + addq %r12,%r10 + adcq $0,%r11 + movq %r10,-8(%rdi) + + mulq %r15 + addq %rax,%r13 + movq -16(%rsi),%rax + adcq $0,%rdx + addq %r11,%r13 + adcq $0,%rdx + + movq %r13,(%rdi) + movq %rdx,%r12 + movq %rdx,8(%rdi) + + mulq %rbx + addq $16,%rbp + xorq %r14,%r14 + subq %r9,%rbp + xorq %r15,%r15 + + addq %r12,%rax + adcq $0,%rdx + movq %rax,8(%rdi) + movq %rdx,16(%rdi) + movq %r15,24(%rdi) + + movq -16(%rsi,%rbp,1),%rax + leaq 48+8(%rsp),%rdi + xorq %r10,%r10 + movq 8(%rdi),%r11 + + leaq (%r14,%r10,2),%r12 + shrq $63,%r10 + leaq (%rcx,%r11,2),%r13 + shrq $63,%r11 + orq %r10,%r13 + movq 16(%rdi),%r10 + movq %r11,%r14 + mulq %rax + negq %r15 + movq 24(%rdi),%r11 + adcq %rax,%r12 + movq -8(%rsi,%rbp,1),%rax + movq %r12,(%rdi) + adcq %rdx,%r13 + + leaq (%r14,%r10,2),%rbx + movq %r13,8(%rdi) + sbbq %r15,%r15 + shrq $63,%r10 + leaq (%rcx,%r11,2),%r8 + shrq $63,%r11 + orq %r10,%r8 + movq 32(%rdi),%r10 + movq %r11,%r14 + mulq %rax + negq %r15 + movq 40(%rdi),%r11 + adcq %rax,%rbx + movq 0(%rsi,%rbp,1),%rax + movq %rbx,16(%rdi) + adcq %rdx,%r8 + leaq 16(%rbp),%rbp + movq %r8,24(%rdi) + sbbq %r15,%r15 + leaq 64(%rdi),%rdi + jmp L$sqr4x_shift_n_add + +.p2align 5 +L$sqr4x_shift_n_add: + leaq (%r14,%r10,2),%r12 + shrq $63,%r10 + leaq (%rcx,%r11,2),%r13 + shrq $63,%r11 + orq %r10,%r13 + movq -16(%rdi),%r10 + movq %r11,%r14 + mulq %rax + negq %r15 + movq -8(%rdi),%r11 + adcq %rax,%r12 + movq -8(%rsi,%rbp,1),%rax + movq %r12,-32(%rdi) + adcq %rdx,%r13 + + leaq (%r14,%r10,2),%rbx + movq %r13,-24(%rdi) + sbbq %r15,%r15 + shrq $63,%r10 + leaq (%rcx,%r11,2),%r8 + shrq $63,%r11 + orq %r10,%r8 + movq 0(%rdi),%r10 + movq %r11,%r14 + mulq %rax + negq %r15 + movq 8(%rdi),%r11 + adcq %rax,%rbx + movq 0(%rsi,%rbp,1),%rax + movq %rbx,-16(%rdi) + adcq %rdx,%r8 + + leaq (%r14,%r10,2),%r12 + movq %r8,-8(%rdi) + sbbq %r15,%r15 + shrq $63,%r10 + leaq (%rcx,%r11,2),%r13 + shrq $63,%r11 + orq %r10,%r13 + movq 16(%rdi),%r10 + movq %r11,%r14 + mulq %rax + negq %r15 + movq 24(%rdi),%r11 + adcq %rax,%r12 + movq 8(%rsi,%rbp,1),%rax + movq %r12,0(%rdi) + adcq %rdx,%r13 + + leaq (%r14,%r10,2),%rbx + movq %r13,8(%rdi) + sbbq %r15,%r15 + shrq $63,%r10 + leaq (%rcx,%r11,2),%r8 + shrq $63,%r11 + orq %r10,%r8 + movq 32(%rdi),%r10 + movq %r11,%r14 + mulq %rax + negq %r15 + movq 40(%rdi),%r11 + adcq %rax,%rbx + movq 16(%rsi,%rbp,1),%rax + movq %rbx,16(%rdi) + adcq %rdx,%r8 + movq %r8,24(%rdi) + sbbq %r15,%r15 + leaq 64(%rdi),%rdi + addq $32,%rbp + jnz L$sqr4x_shift_n_add + + leaq (%r14,%r10,2),%r12 +.byte 0x67 + shrq $63,%r10 + leaq (%rcx,%r11,2),%r13 + shrq $63,%r11 + orq %r10,%r13 + movq -16(%rdi),%r10 + movq %r11,%r14 + mulq %rax + negq %r15 + movq -8(%rdi),%r11 + adcq %rax,%r12 + movq -8(%rsi),%rax + movq %r12,-32(%rdi) + adcq %rdx,%r13 + + leaq (%r14,%r10,2),%rbx + movq %r13,-24(%rdi) + sbbq %r15,%r15 + shrq $63,%r10 + leaq (%rcx,%r11,2),%r8 + shrq $63,%r11 + orq %r10,%r8 + mulq %rax + negq %r15 + adcq %rax,%rbx + adcq %rdx,%r8 + movq %rbx,-16(%rdi) + movq %r8,-8(%rdi) +.byte 102,72,15,126,213 +sqr8x_reduction: + xorq %rax,%rax + leaq (%rbp,%r9,2),%rcx + leaq 48+8(%rsp,%r9,2),%rdx + movq %rcx,0+8(%rsp) + leaq 48+8(%rsp,%r9,1),%rdi + movq %rdx,8+8(%rsp) + negq %r9 + jmp L$8x_reduction_loop + +.p2align 5 +L$8x_reduction_loop: + leaq (%rdi,%r9,1),%rdi +.byte 0x66 + movq 0(%rdi),%rbx + movq 8(%rdi),%r9 + movq 16(%rdi),%r10 + movq 24(%rdi),%r11 + movq 32(%rdi),%r12 + movq 40(%rdi),%r13 + movq 48(%rdi),%r14 + movq 56(%rdi),%r15 + movq %rax,(%rdx) + leaq 64(%rdi),%rdi + +.byte 0x67 + movq %rbx,%r8 + imulq 32+8(%rsp),%rbx + movq 0(%rbp),%rax + movl $8,%ecx + jmp L$8x_reduce + +.p2align 5 +L$8x_reduce: + mulq %rbx + movq 16(%rbp),%rax + negq %r8 + movq %rdx,%r8 + adcq $0,%r8 + + mulq %rbx + addq %rax,%r9 + movq 32(%rbp),%rax + adcq $0,%rdx + addq %r9,%r8 + movq %rbx,48-8+8(%rsp,%rcx,8) + movq %rdx,%r9 + adcq $0,%r9 + + mulq %rbx + addq %rax,%r10 + movq 48(%rbp),%rax + adcq $0,%rdx + addq %r10,%r9 + movq 32+8(%rsp),%rsi + movq %rdx,%r10 + adcq $0,%r10 + + mulq %rbx + addq %rax,%r11 + movq 64(%rbp),%rax + adcq $0,%rdx + imulq %r8,%rsi + addq %r11,%r10 + movq %rdx,%r11 + adcq $0,%r11 + + mulq %rbx + addq %rax,%r12 + movq 80(%rbp),%rax + adcq $0,%rdx + addq %r12,%r11 + movq %rdx,%r12 + adcq $0,%r12 + + mulq %rbx + addq %rax,%r13 + movq 96(%rbp),%rax + adcq $0,%rdx + addq %r13,%r12 + movq %rdx,%r13 + adcq $0,%r13 + + mulq %rbx + addq %rax,%r14 + movq 112(%rbp),%rax + adcq $0,%rdx + addq %r14,%r13 + movq %rdx,%r14 + adcq $0,%r14 + + mulq %rbx + movq %rsi,%rbx + addq %rax,%r15 + movq 0(%rbp),%rax + adcq $0,%rdx + addq %r15,%r14 + movq %rdx,%r15 + adcq $0,%r15 + + decl %ecx + jnz L$8x_reduce + + leaq 128(%rbp),%rbp + xorq %rax,%rax + movq 8+8(%rsp),%rdx + cmpq 0+8(%rsp),%rbp + jae L$8x_no_tail + +.byte 0x66 + addq 0(%rdi),%r8 + adcq 8(%rdi),%r9 + adcq 16(%rdi),%r10 + adcq 24(%rdi),%r11 + adcq 32(%rdi),%r12 + adcq 40(%rdi),%r13 + adcq 48(%rdi),%r14 + adcq 56(%rdi),%r15 + sbbq %rsi,%rsi + + movq 48+56+8(%rsp),%rbx + movl $8,%ecx + movq 0(%rbp),%rax + jmp L$8x_tail + +.p2align 5 +L$8x_tail: + mulq %rbx + addq %rax,%r8 + movq 16(%rbp),%rax + movq %r8,(%rdi) + movq %rdx,%r8 + adcq $0,%r8 + + mulq %rbx + addq %rax,%r9 + movq 32(%rbp),%rax + adcq $0,%rdx + addq %r9,%r8 + leaq 8(%rdi),%rdi + movq %rdx,%r9 + adcq $0,%r9 + + mulq %rbx + addq %rax,%r10 + movq 48(%rbp),%rax + adcq $0,%rdx + addq %r10,%r9 + movq %rdx,%r10 + adcq $0,%r10 + + mulq %rbx + addq %rax,%r11 + movq 64(%rbp),%rax + adcq $0,%rdx + addq %r11,%r10 + movq %rdx,%r11 + adcq $0,%r11 + + mulq %rbx + addq %rax,%r12 + movq 80(%rbp),%rax + adcq $0,%rdx + addq %r12,%r11 + movq %rdx,%r12 + adcq $0,%r12 + + mulq %rbx + addq %rax,%r13 + movq 96(%rbp),%rax + adcq $0,%rdx + addq %r13,%r12 + movq %rdx,%r13 + adcq $0,%r13 + + mulq %rbx + addq %rax,%r14 + movq 112(%rbp),%rax + adcq $0,%rdx + addq %r14,%r13 + movq %rdx,%r14 + adcq $0,%r14 + + mulq %rbx + movq 48-16+8(%rsp,%rcx,8),%rbx + addq %rax,%r15 + adcq $0,%rdx + addq %r15,%r14 + movq 0(%rbp),%rax + movq %rdx,%r15 + adcq $0,%r15 + + decl %ecx + jnz L$8x_tail + + leaq 128(%rbp),%rbp + movq 8+8(%rsp),%rdx + cmpq 0+8(%rsp),%rbp + jae L$8x_tail_done + + movq 48+56+8(%rsp),%rbx + negq %rsi + movq 0(%rbp),%rax + adcq 0(%rdi),%r8 + adcq 8(%rdi),%r9 + adcq 16(%rdi),%r10 + adcq 24(%rdi),%r11 + adcq 32(%rdi),%r12 + adcq 40(%rdi),%r13 + adcq 48(%rdi),%r14 + adcq 56(%rdi),%r15 + sbbq %rsi,%rsi + + movl $8,%ecx + jmp L$8x_tail + +.p2align 5 +L$8x_tail_done: + addq (%rdx),%r8 + xorq %rax,%rax + + negq %rsi +L$8x_no_tail: + adcq 0(%rdi),%r8 + adcq 8(%rdi),%r9 + adcq 16(%rdi),%r10 + adcq 24(%rdi),%r11 + adcq 32(%rdi),%r12 + adcq 40(%rdi),%r13 + adcq 48(%rdi),%r14 + adcq 56(%rdi),%r15 + adcq $0,%rax + movq -16(%rbp),%rcx + xorq %rsi,%rsi + +.byte 102,72,15,126,213 + + movq %r8,0(%rdi) + movq %r9,8(%rdi) +.byte 102,73,15,126,217 + movq %r10,16(%rdi) + movq %r11,24(%rdi) + movq %r12,32(%rdi) + movq %r13,40(%rdi) + movq %r14,48(%rdi) + movq %r15,56(%rdi) + leaq 64(%rdi),%rdi + + cmpq %rdx,%rdi + jb L$8x_reduction_loop + + subq %r15,%rcx + leaq (%rdi,%r9,1),%rbx + adcq %rsi,%rsi + movq %r9,%rcx + orq %rsi,%rax +.byte 102,72,15,126,207 + xorq $1,%rax +.byte 102,72,15,126,206 + leaq (%rbp,%rax,8),%rbp + sarq $3+2,%rcx + jmp L$sqr4x_sub + +.p2align 5 +L$sqr4x_sub: +.byte 0x66 + movq 0(%rbx),%r12 + movq 8(%rbx),%r13 + sbbq 0(%rbp),%r12 + movq 16(%rbx),%r14 + sbbq 16(%rbp),%r13 + movq 24(%rbx),%r15 + leaq 32(%rbx),%rbx + sbbq 32(%rbp),%r14 + movq %r12,0(%rdi) + sbbq 48(%rbp),%r15 + leaq 64(%rbp),%rbp + movq %r13,8(%rdi) + movq %r14,16(%rdi) + movq %r15,24(%rdi) + leaq 32(%rdi),%rdi + + incq %rcx + jnz L$sqr4x_sub + movq %r9,%r10 + negq %r9 + .byte 0xf3,0xc3 + +.globl _bn_from_montgomery + +.p2align 5 +_bn_from_montgomery: + testl $7,%r9d + jz bn_from_mont8x + xorl %eax,%eax + .byte 0xf3,0xc3 + + + +.p2align 5 +bn_from_mont8x: +.byte 0x67 + movq %rsp,%rax + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 +.byte 0x67 + movl %r9d,%r10d + shll $3,%r9d + shll $3+2,%r10d + negq %r9 + movq (%r8),%r8 + + + + + + + + leaq -64(%rsp,%r9,2),%r11 + subq %rsi,%r11 + andq $4095,%r11 + cmpq %r11,%r10 + jb L$from_sp_alt + subq %r11,%rsp + leaq -64(%rsp,%r9,2),%rsp + jmp L$from_sp_done + +.p2align 5 +L$from_sp_alt: + leaq 4096-64(,%r9,2),%r10 + leaq -64(%rsp,%r9,2),%rsp + subq %r10,%r11 + movq $0,%r10 + cmovcq %r10,%r11 + subq %r11,%rsp +L$from_sp_done: + andq $-64,%rsp + movq %r9,%r10 + negq %r9 + + + + + + + + + + + movq %r8,32(%rsp) + movq %rax,40(%rsp) +L$from_body: + movq %r9,%r11 + leaq 48(%rsp),%rax + pxor %xmm0,%xmm0 + jmp L$mul_by_1 + +.p2align 5 +L$mul_by_1: + movdqu (%rsi),%xmm1 + movdqu 16(%rsi),%xmm2 + movdqu 32(%rsi),%xmm3 + movdqa %xmm0,(%rax,%r9,1) + movdqu 48(%rsi),%xmm4 + movdqa %xmm0,16(%rax,%r9,1) +.byte 0x48,0x8d,0xb6,0x40,0x00,0x00,0x00 + movdqa %xmm1,(%rax) + movdqa %xmm0,32(%rax,%r9,1) + movdqa %xmm2,16(%rax) + movdqa %xmm0,48(%rax,%r9,1) + movdqa %xmm3,32(%rax) + movdqa %xmm4,48(%rax) + leaq 64(%rax),%rax + subq $64,%r11 + jnz L$mul_by_1 + +.byte 102,72,15,110,207 +.byte 102,72,15,110,209 +.byte 0x67 + movq %rcx,%rbp +.byte 102,73,15,110,218 + call sqr8x_reduction + + pxor %xmm0,%xmm0 + leaq 48(%rsp),%rax + movq 40(%rsp),%rsi + jmp L$from_mont_zero + +.p2align 5 +L$from_mont_zero: + movdqa %xmm0,0(%rax) + movdqa %xmm0,16(%rax) + movdqa %xmm0,32(%rax) + movdqa %xmm0,48(%rax) + leaq 64(%rax),%rax + subq $32,%r9 + jnz L$from_mont_zero + + movq $1,%rax + movq -48(%rsi),%r15 + movq -40(%rsi),%r14 + movq -32(%rsi),%r13 + movq -24(%rsi),%r12 + movq -16(%rsi),%rbp + movq -8(%rsi),%rbx + leaq (%rsi),%rsp +L$from_epilogue: + .byte 0xf3,0xc3 + +.globl _bn_get_bits5 + +.p2align 4 +_bn_get_bits5: + movq %rdi,%r10 + movl %esi,%ecx + shrl $3,%esi + movzwl (%r10,%rsi,1),%eax + andl $7,%ecx + shrl %cl,%eax + andl $31,%eax + .byte 0xf3,0xc3 + + +.globl _bn_scatter5 + +.p2align 4 +_bn_scatter5: + cmpl $0,%esi + jz L$scatter_epilogue + leaq (%rdx,%rcx,8),%rdx +L$scatter: + movq (%rdi),%rax + leaq 8(%rdi),%rdi + movq %rax,(%rdx) + leaq 256(%rdx),%rdx + subl $1,%esi + jnz L$scatter +L$scatter_epilogue: + .byte 0xf3,0xc3 + + +.globl _bn_gather5 + +.p2align 4 +_bn_gather5: + movl %ecx,%r11d + shrl $3,%ecx + andq $7,%r11 + notl %ecx + leaq L$magic_masks(%rip),%rax + andl $3,%ecx + leaq 128(%rdx,%r11,8),%rdx + movq 0(%rax,%rcx,8),%xmm4 + movq 8(%rax,%rcx,8),%xmm5 + movq 16(%rax,%rcx,8),%xmm6 + movq 24(%rax,%rcx,8),%xmm7 + jmp L$gather +.p2align 4 +L$gather: + movq -128(%rdx),%xmm0 + movq -64(%rdx),%xmm1 + pand %xmm4,%xmm0 + movq 0(%rdx),%xmm2 + pand %xmm5,%xmm1 + movq 64(%rdx),%xmm3 + pand %xmm6,%xmm2 + por %xmm1,%xmm0 + pand %xmm7,%xmm3 +.byte 0x67,0x67 + por %xmm2,%xmm0 + leaq 256(%rdx),%rdx + por %xmm3,%xmm0 + + movq %xmm0,(%rdi) + leaq 8(%rdi),%rdi + subl $1,%esi + jnz L$gather + .byte 0xf3,0xc3 +L$SEH_end_bn_gather5: + +.p2align 6 +L$magic_masks: +.long 0,0, 0,0, 0,0, -1,-1 +.long 0,0, 0,0, 0,0, 0,0 +.byte 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105,112,108,105,99,97,116,105,111,110,32,119,105,116,104,32,115,99,97,116,116,101,114,47,103,97,116,104,101,114,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 diff --git a/deps/openssl/asm_obsolete/x64-macosx-gas/camellia/cmll-x86_64.s b/deps/openssl/asm_obsolete/x64-macosx-gas/camellia/cmll-x86_64.s new file mode 100644 index 00000000000000..0a3145ad4b8969 --- /dev/null +++ b/deps/openssl/asm_obsolete/x64-macosx-gas/camellia/cmll-x86_64.s @@ -0,0 +1,1838 @@ +.text + + +.globl _Camellia_EncryptBlock + +.p2align 4 +_Camellia_EncryptBlock: + movl $128,%eax + subl %edi,%eax + movl $3,%edi + adcl $0,%edi + jmp L$enc_rounds + + +.globl _Camellia_EncryptBlock_Rounds + +.p2align 4 +L$enc_rounds: +_Camellia_EncryptBlock_Rounds: + pushq %rbx + pushq %rbp + pushq %r13 + pushq %r14 + pushq %r15 +L$enc_prologue: + + + movq %rcx,%r13 + movq %rdx,%r14 + + shll $6,%edi + leaq L$Camellia_SBOX(%rip),%rbp + leaq (%r14,%rdi,1),%r15 + + movl 0(%rsi),%r8d + movl 4(%rsi),%r9d + movl 8(%rsi),%r10d + bswapl %r8d + movl 12(%rsi),%r11d + bswapl %r9d + bswapl %r10d + bswapl %r11d + + call _x86_64_Camellia_encrypt + + bswapl %r8d + bswapl %r9d + bswapl %r10d + movl %r8d,0(%r13) + bswapl %r11d + movl %r9d,4(%r13) + movl %r10d,8(%r13) + movl %r11d,12(%r13) + + movq 0(%rsp),%r15 + movq 8(%rsp),%r14 + movq 16(%rsp),%r13 + movq 24(%rsp),%rbp + movq 32(%rsp),%rbx + leaq 40(%rsp),%rsp +L$enc_epilogue: + .byte 0xf3,0xc3 + + + +.p2align 4 +_x86_64_Camellia_encrypt: + xorl 0(%r14),%r9d + xorl 4(%r14),%r8d + xorl 8(%r14),%r11d + xorl 12(%r14),%r10d +.p2align 4 +L$eloop: + movl 16(%r14),%ebx + movl 20(%r14),%eax + + xorl %r8d,%eax + xorl %r9d,%ebx + movzbl %ah,%esi + movzbl %bl,%edi + movl 2052(%rbp,%rsi,8),%edx + movl 0(%rbp,%rdi,8),%ecx + movzbl %al,%esi + shrl $16,%eax + movzbl %bh,%edi + xorl 4(%rbp,%rsi,8),%edx + shrl $16,%ebx + xorl 4(%rbp,%rdi,8),%ecx + movzbl %ah,%esi + movzbl %bl,%edi + xorl 0(%rbp,%rsi,8),%edx + xorl 2052(%rbp,%rdi,8),%ecx + movzbl %al,%esi + movzbl %bh,%edi + xorl 2048(%rbp,%rsi,8),%edx + xorl 2048(%rbp,%rdi,8),%ecx + movl 24(%r14),%ebx + movl 28(%r14),%eax + xorl %edx,%ecx + rorl $8,%edx + xorl %ecx,%r10d + xorl %ecx,%r11d + xorl %edx,%r11d + xorl %r10d,%eax + xorl %r11d,%ebx + movzbl %ah,%esi + movzbl %bl,%edi + movl 2052(%rbp,%rsi,8),%edx + movl 0(%rbp,%rdi,8),%ecx + movzbl %al,%esi + shrl $16,%eax + movzbl %bh,%edi + xorl 4(%rbp,%rsi,8),%edx + shrl $16,%ebx + xorl 4(%rbp,%rdi,8),%ecx + movzbl %ah,%esi + movzbl %bl,%edi + xorl 0(%rbp,%rsi,8),%edx + xorl 2052(%rbp,%rdi,8),%ecx + movzbl %al,%esi + movzbl %bh,%edi + xorl 2048(%rbp,%rsi,8),%edx + xorl 2048(%rbp,%rdi,8),%ecx + movl 32(%r14),%ebx + movl 36(%r14),%eax + xorl %edx,%ecx + rorl $8,%edx + xorl %ecx,%r8d + xorl %ecx,%r9d + xorl %edx,%r9d + xorl %r8d,%eax + xorl %r9d,%ebx + movzbl %ah,%esi + movzbl %bl,%edi + movl 2052(%rbp,%rsi,8),%edx + movl 0(%rbp,%rdi,8),%ecx + movzbl %al,%esi + shrl $16,%eax + movzbl %bh,%edi + xorl 4(%rbp,%rsi,8),%edx + shrl $16,%ebx + xorl 4(%rbp,%rdi,8),%ecx + movzbl %ah,%esi + movzbl %bl,%edi + xorl 0(%rbp,%rsi,8),%edx + xorl 2052(%rbp,%rdi,8),%ecx + movzbl %al,%esi + movzbl %bh,%edi + xorl 2048(%rbp,%rsi,8),%edx + xorl 2048(%rbp,%rdi,8),%ecx + movl 40(%r14),%ebx + movl 44(%r14),%eax + xorl %edx,%ecx + rorl $8,%edx + xorl %ecx,%r10d + xorl %ecx,%r11d + xorl %edx,%r11d + xorl %r10d,%eax + xorl %r11d,%ebx + movzbl %ah,%esi + movzbl %bl,%edi + movl 2052(%rbp,%rsi,8),%edx + movl 0(%rbp,%rdi,8),%ecx + movzbl %al,%esi + shrl $16,%eax + movzbl %bh,%edi + xorl 4(%rbp,%rsi,8),%edx + shrl $16,%ebx + xorl 4(%rbp,%rdi,8),%ecx + movzbl %ah,%esi + movzbl %bl,%edi + xorl 0(%rbp,%rsi,8),%edx + xorl 2052(%rbp,%rdi,8),%ecx + movzbl %al,%esi + movzbl %bh,%edi + xorl 2048(%rbp,%rsi,8),%edx + xorl 2048(%rbp,%rdi,8),%ecx + movl 48(%r14),%ebx + movl 52(%r14),%eax + xorl %edx,%ecx + rorl $8,%edx + xorl %ecx,%r8d + xorl %ecx,%r9d + xorl %edx,%r9d + xorl %r8d,%eax + xorl %r9d,%ebx + movzbl %ah,%esi + movzbl %bl,%edi + movl 2052(%rbp,%rsi,8),%edx + movl 0(%rbp,%rdi,8),%ecx + movzbl %al,%esi + shrl $16,%eax + movzbl %bh,%edi + xorl 4(%rbp,%rsi,8),%edx + shrl $16,%ebx + xorl 4(%rbp,%rdi,8),%ecx + movzbl %ah,%esi + movzbl %bl,%edi + xorl 0(%rbp,%rsi,8),%edx + xorl 2052(%rbp,%rdi,8),%ecx + movzbl %al,%esi + movzbl %bh,%edi + xorl 2048(%rbp,%rsi,8),%edx + xorl 2048(%rbp,%rdi,8),%ecx + movl 56(%r14),%ebx + movl 60(%r14),%eax + xorl %edx,%ecx + rorl $8,%edx + xorl %ecx,%r10d + xorl %ecx,%r11d + xorl %edx,%r11d + xorl %r10d,%eax + xorl %r11d,%ebx + movzbl %ah,%esi + movzbl %bl,%edi + movl 2052(%rbp,%rsi,8),%edx + movl 0(%rbp,%rdi,8),%ecx + movzbl %al,%esi + shrl $16,%eax + movzbl %bh,%edi + xorl 4(%rbp,%rsi,8),%edx + shrl $16,%ebx + xorl 4(%rbp,%rdi,8),%ecx + movzbl %ah,%esi + movzbl %bl,%edi + xorl 0(%rbp,%rsi,8),%edx + xorl 2052(%rbp,%rdi,8),%ecx + movzbl %al,%esi + movzbl %bh,%edi + xorl 2048(%rbp,%rsi,8),%edx + xorl 2048(%rbp,%rdi,8),%ecx + movl 64(%r14),%ebx + movl 68(%r14),%eax + xorl %edx,%ecx + rorl $8,%edx + xorl %ecx,%r8d + xorl %ecx,%r9d + xorl %edx,%r9d + leaq 64(%r14),%r14 + cmpq %r15,%r14 + movl 8(%r14),%edx + movl 12(%r14),%ecx + je L$edone + + andl %r8d,%eax + orl %r11d,%edx + roll $1,%eax + xorl %edx,%r10d + xorl %eax,%r9d + andl %r10d,%ecx + orl %r9d,%ebx + roll $1,%ecx + xorl %ebx,%r8d + xorl %ecx,%r11d + jmp L$eloop + +.p2align 4 +L$edone: + xorl %r10d,%eax + xorl %r11d,%ebx + xorl %r8d,%ecx + xorl %r9d,%edx + + movl %eax,%r8d + movl %ebx,%r9d + movl %ecx,%r10d + movl %edx,%r11d + +.byte 0xf3,0xc3 + + + +.globl _Camellia_DecryptBlock + +.p2align 4 +_Camellia_DecryptBlock: + movl $128,%eax + subl %edi,%eax + movl $3,%edi + adcl $0,%edi + jmp L$dec_rounds + + +.globl _Camellia_DecryptBlock_Rounds + +.p2align 4 +L$dec_rounds: +_Camellia_DecryptBlock_Rounds: + pushq %rbx + pushq %rbp + pushq %r13 + pushq %r14 + pushq %r15 +L$dec_prologue: + + + movq %rcx,%r13 + movq %rdx,%r15 + + shll $6,%edi + leaq L$Camellia_SBOX(%rip),%rbp + leaq (%r15,%rdi,1),%r14 + + movl 0(%rsi),%r8d + movl 4(%rsi),%r9d + movl 8(%rsi),%r10d + bswapl %r8d + movl 12(%rsi),%r11d + bswapl %r9d + bswapl %r10d + bswapl %r11d + + call _x86_64_Camellia_decrypt + + bswapl %r8d + bswapl %r9d + bswapl %r10d + movl %r8d,0(%r13) + bswapl %r11d + movl %r9d,4(%r13) + movl %r10d,8(%r13) + movl %r11d,12(%r13) + + movq 0(%rsp),%r15 + movq 8(%rsp),%r14 + movq 16(%rsp),%r13 + movq 24(%rsp),%rbp + movq 32(%rsp),%rbx + leaq 40(%rsp),%rsp +L$dec_epilogue: + .byte 0xf3,0xc3 + + + +.p2align 4 +_x86_64_Camellia_decrypt: + xorl 0(%r14),%r9d + xorl 4(%r14),%r8d + xorl 8(%r14),%r11d + xorl 12(%r14),%r10d +.p2align 4 +L$dloop: + movl -8(%r14),%ebx + movl -4(%r14),%eax + + xorl %r8d,%eax + xorl %r9d,%ebx + movzbl %ah,%esi + movzbl %bl,%edi + movl 2052(%rbp,%rsi,8),%edx + movl 0(%rbp,%rdi,8),%ecx + movzbl %al,%esi + shrl $16,%eax + movzbl %bh,%edi + xorl 4(%rbp,%rsi,8),%edx + shrl $16,%ebx + xorl 4(%rbp,%rdi,8),%ecx + movzbl %ah,%esi + movzbl %bl,%edi + xorl 0(%rbp,%rsi,8),%edx + xorl 2052(%rbp,%rdi,8),%ecx + movzbl %al,%esi + movzbl %bh,%edi + xorl 2048(%rbp,%rsi,8),%edx + xorl 2048(%rbp,%rdi,8),%ecx + movl -16(%r14),%ebx + movl -12(%r14),%eax + xorl %edx,%ecx + rorl $8,%edx + xorl %ecx,%r10d + xorl %ecx,%r11d + xorl %edx,%r11d + xorl %r10d,%eax + xorl %r11d,%ebx + movzbl %ah,%esi + movzbl %bl,%edi + movl 2052(%rbp,%rsi,8),%edx + movl 0(%rbp,%rdi,8),%ecx + movzbl %al,%esi + shrl $16,%eax + movzbl %bh,%edi + xorl 4(%rbp,%rsi,8),%edx + shrl $16,%ebx + xorl 4(%rbp,%rdi,8),%ecx + movzbl %ah,%esi + movzbl %bl,%edi + xorl 0(%rbp,%rsi,8),%edx + xorl 2052(%rbp,%rdi,8),%ecx + movzbl %al,%esi + movzbl %bh,%edi + xorl 2048(%rbp,%rsi,8),%edx + xorl 2048(%rbp,%rdi,8),%ecx + movl -24(%r14),%ebx + movl -20(%r14),%eax + xorl %edx,%ecx + rorl $8,%edx + xorl %ecx,%r8d + xorl %ecx,%r9d + xorl %edx,%r9d + xorl %r8d,%eax + xorl %r9d,%ebx + movzbl %ah,%esi + movzbl %bl,%edi + movl 2052(%rbp,%rsi,8),%edx + movl 0(%rbp,%rdi,8),%ecx + movzbl %al,%esi + shrl $16,%eax + movzbl %bh,%edi + xorl 4(%rbp,%rsi,8),%edx + shrl $16,%ebx + xorl 4(%rbp,%rdi,8),%ecx + movzbl %ah,%esi + movzbl %bl,%edi + xorl 0(%rbp,%rsi,8),%edx + xorl 2052(%rbp,%rdi,8),%ecx + movzbl %al,%esi + movzbl %bh,%edi + xorl 2048(%rbp,%rsi,8),%edx + xorl 2048(%rbp,%rdi,8),%ecx + movl -32(%r14),%ebx + movl -28(%r14),%eax + xorl %edx,%ecx + rorl $8,%edx + xorl %ecx,%r10d + xorl %ecx,%r11d + xorl %edx,%r11d + xorl %r10d,%eax + xorl %r11d,%ebx + movzbl %ah,%esi + movzbl %bl,%edi + movl 2052(%rbp,%rsi,8),%edx + movl 0(%rbp,%rdi,8),%ecx + movzbl %al,%esi + shrl $16,%eax + movzbl %bh,%edi + xorl 4(%rbp,%rsi,8),%edx + shrl $16,%ebx + xorl 4(%rbp,%rdi,8),%ecx + movzbl %ah,%esi + movzbl %bl,%edi + xorl 0(%rbp,%rsi,8),%edx + xorl 2052(%rbp,%rdi,8),%ecx + movzbl %al,%esi + movzbl %bh,%edi + xorl 2048(%rbp,%rsi,8),%edx + xorl 2048(%rbp,%rdi,8),%ecx + movl -40(%r14),%ebx + movl -36(%r14),%eax + xorl %edx,%ecx + rorl $8,%edx + xorl %ecx,%r8d + xorl %ecx,%r9d + xorl %edx,%r9d + xorl %r8d,%eax + xorl %r9d,%ebx + movzbl %ah,%esi + movzbl %bl,%edi + movl 2052(%rbp,%rsi,8),%edx + movl 0(%rbp,%rdi,8),%ecx + movzbl %al,%esi + shrl $16,%eax + movzbl %bh,%edi + xorl 4(%rbp,%rsi,8),%edx + shrl $16,%ebx + xorl 4(%rbp,%rdi,8),%ecx + movzbl %ah,%esi + movzbl %bl,%edi + xorl 0(%rbp,%rsi,8),%edx + xorl 2052(%rbp,%rdi,8),%ecx + movzbl %al,%esi + movzbl %bh,%edi + xorl 2048(%rbp,%rsi,8),%edx + xorl 2048(%rbp,%rdi,8),%ecx + movl -48(%r14),%ebx + movl -44(%r14),%eax + xorl %edx,%ecx + rorl $8,%edx + xorl %ecx,%r10d + xorl %ecx,%r11d + xorl %edx,%r11d + xorl %r10d,%eax + xorl %r11d,%ebx + movzbl %ah,%esi + movzbl %bl,%edi + movl 2052(%rbp,%rsi,8),%edx + movl 0(%rbp,%rdi,8),%ecx + movzbl %al,%esi + shrl $16,%eax + movzbl %bh,%edi + xorl 4(%rbp,%rsi,8),%edx + shrl $16,%ebx + xorl 4(%rbp,%rdi,8),%ecx + movzbl %ah,%esi + movzbl %bl,%edi + xorl 0(%rbp,%rsi,8),%edx + xorl 2052(%rbp,%rdi,8),%ecx + movzbl %al,%esi + movzbl %bh,%edi + xorl 2048(%rbp,%rsi,8),%edx + xorl 2048(%rbp,%rdi,8),%ecx + movl -56(%r14),%ebx + movl -52(%r14),%eax + xorl %edx,%ecx + rorl $8,%edx + xorl %ecx,%r8d + xorl %ecx,%r9d + xorl %edx,%r9d + leaq -64(%r14),%r14 + cmpq %r15,%r14 + movl 0(%r14),%edx + movl 4(%r14),%ecx + je L$ddone + + andl %r8d,%eax + orl %r11d,%edx + roll $1,%eax + xorl %edx,%r10d + xorl %eax,%r9d + andl %r10d,%ecx + orl %r9d,%ebx + roll $1,%ecx + xorl %ebx,%r8d + xorl %ecx,%r11d + + jmp L$dloop + +.p2align 4 +L$ddone: + xorl %r10d,%ecx + xorl %r11d,%edx + xorl %r8d,%eax + xorl %r9d,%ebx + + movl %ecx,%r8d + movl %edx,%r9d + movl %eax,%r10d + movl %ebx,%r11d + +.byte 0xf3,0xc3 + +.globl _Camellia_Ekeygen + +.p2align 4 +_Camellia_Ekeygen: + pushq %rbx + pushq %rbp + pushq %r13 + pushq %r14 + pushq %r15 +L$key_prologue: + + movl %edi,%r15d + movq %rdx,%r13 + + movl 0(%rsi),%r8d + movl 4(%rsi),%r9d + movl 8(%rsi),%r10d + movl 12(%rsi),%r11d + + bswapl %r8d + bswapl %r9d + bswapl %r10d + bswapl %r11d + movl %r9d,0(%r13) + movl %r8d,4(%r13) + movl %r11d,8(%r13) + movl %r10d,12(%r13) + cmpq $128,%r15 + je L$1st128 + + movl 16(%rsi),%r8d + movl 20(%rsi),%r9d + cmpq $192,%r15 + je L$1st192 + movl 24(%rsi),%r10d + movl 28(%rsi),%r11d + jmp L$1st256 +L$1st192: + movl %r8d,%r10d + movl %r9d,%r11d + notl %r10d + notl %r11d +L$1st256: + bswapl %r8d + bswapl %r9d + bswapl %r10d + bswapl %r11d + movl %r9d,32(%r13) + movl %r8d,36(%r13) + movl %r11d,40(%r13) + movl %r10d,44(%r13) + xorl 0(%r13),%r9d + xorl 4(%r13),%r8d + xorl 8(%r13),%r11d + xorl 12(%r13),%r10d + +L$1st128: + leaq L$Camellia_SIGMA(%rip),%r14 + leaq L$Camellia_SBOX(%rip),%rbp + + movl 0(%r14),%ebx + movl 4(%r14),%eax + xorl %r8d,%eax + xorl %r9d,%ebx + movzbl %ah,%esi + movzbl %bl,%edi + movl 2052(%rbp,%rsi,8),%edx + movl 0(%rbp,%rdi,8),%ecx + movzbl %al,%esi + shrl $16,%eax + movzbl %bh,%edi + xorl 4(%rbp,%rsi,8),%edx + shrl $16,%ebx + xorl 4(%rbp,%rdi,8),%ecx + movzbl %ah,%esi + movzbl %bl,%edi + xorl 0(%rbp,%rsi,8),%edx + xorl 2052(%rbp,%rdi,8),%ecx + movzbl %al,%esi + movzbl %bh,%edi + xorl 2048(%rbp,%rsi,8),%edx + xorl 2048(%rbp,%rdi,8),%ecx + movl 8(%r14),%ebx + movl 12(%r14),%eax + xorl %edx,%ecx + rorl $8,%edx + xorl %ecx,%r10d + xorl %ecx,%r11d + xorl %edx,%r11d + xorl %r10d,%eax + xorl %r11d,%ebx + movzbl %ah,%esi + movzbl %bl,%edi + movl 2052(%rbp,%rsi,8),%edx + movl 0(%rbp,%rdi,8),%ecx + movzbl %al,%esi + shrl $16,%eax + movzbl %bh,%edi + xorl 4(%rbp,%rsi,8),%edx + shrl $16,%ebx + xorl 4(%rbp,%rdi,8),%ecx + movzbl %ah,%esi + movzbl %bl,%edi + xorl 0(%rbp,%rsi,8),%edx + xorl 2052(%rbp,%rdi,8),%ecx + movzbl %al,%esi + movzbl %bh,%edi + xorl 2048(%rbp,%rsi,8),%edx + xorl 2048(%rbp,%rdi,8),%ecx + movl 16(%r14),%ebx + movl 20(%r14),%eax + xorl %edx,%ecx + rorl $8,%edx + xorl %ecx,%r8d + xorl %ecx,%r9d + xorl %edx,%r9d + xorl 0(%r13),%r9d + xorl 4(%r13),%r8d + xorl 8(%r13),%r11d + xorl 12(%r13),%r10d + xorl %r8d,%eax + xorl %r9d,%ebx + movzbl %ah,%esi + movzbl %bl,%edi + movl 2052(%rbp,%rsi,8),%edx + movl 0(%rbp,%rdi,8),%ecx + movzbl %al,%esi + shrl $16,%eax + movzbl %bh,%edi + xorl 4(%rbp,%rsi,8),%edx + shrl $16,%ebx + xorl 4(%rbp,%rdi,8),%ecx + movzbl %ah,%esi + movzbl %bl,%edi + xorl 0(%rbp,%rsi,8),%edx + xorl 2052(%rbp,%rdi,8),%ecx + movzbl %al,%esi + movzbl %bh,%edi + xorl 2048(%rbp,%rsi,8),%edx + xorl 2048(%rbp,%rdi,8),%ecx + movl 24(%r14),%ebx + movl 28(%r14),%eax + xorl %edx,%ecx + rorl $8,%edx + xorl %ecx,%r10d + xorl %ecx,%r11d + xorl %edx,%r11d + xorl %r10d,%eax + xorl %r11d,%ebx + movzbl %ah,%esi + movzbl %bl,%edi + movl 2052(%rbp,%rsi,8),%edx + movl 0(%rbp,%rdi,8),%ecx + movzbl %al,%esi + shrl $16,%eax + movzbl %bh,%edi + xorl 4(%rbp,%rsi,8),%edx + shrl $16,%ebx + xorl 4(%rbp,%rdi,8),%ecx + movzbl %ah,%esi + movzbl %bl,%edi + xorl 0(%rbp,%rsi,8),%edx + xorl 2052(%rbp,%rdi,8),%ecx + movzbl %al,%esi + movzbl %bh,%edi + xorl 2048(%rbp,%rsi,8),%edx + xorl 2048(%rbp,%rdi,8),%ecx + movl 32(%r14),%ebx + movl 36(%r14),%eax + xorl %edx,%ecx + rorl $8,%edx + xorl %ecx,%r8d + xorl %ecx,%r9d + xorl %edx,%r9d + cmpq $128,%r15 + jne L$2nd256 + + leaq 128(%r13),%r13 + shlq $32,%r8 + shlq $32,%r10 + orq %r9,%r8 + orq %r11,%r10 + movq -128(%r13),%rax + movq -120(%r13),%rbx + movq %r8,-112(%r13) + movq %r10,-104(%r13) + movq %rax,%r11 + shlq $15,%rax + movq %rbx,%r9 + shrq $49,%r9 + shrq $49,%r11 + orq %r9,%rax + shlq $15,%rbx + orq %r11,%rbx + movq %rax,-96(%r13) + movq %rbx,-88(%r13) + movq %r8,%r11 + shlq $15,%r8 + movq %r10,%r9 + shrq $49,%r9 + shrq $49,%r11 + orq %r9,%r8 + shlq $15,%r10 + orq %r11,%r10 + movq %r8,-80(%r13) + movq %r10,-72(%r13) + movq %r8,%r11 + shlq $15,%r8 + movq %r10,%r9 + shrq $49,%r9 + shrq $49,%r11 + orq %r9,%r8 + shlq $15,%r10 + orq %r11,%r10 + movq %r8,-64(%r13) + movq %r10,-56(%r13) + movq %rax,%r11 + shlq $30,%rax + movq %rbx,%r9 + shrq $34,%r9 + shrq $34,%r11 + orq %r9,%rax + shlq $30,%rbx + orq %r11,%rbx + movq %rax,-48(%r13) + movq %rbx,-40(%r13) + movq %r8,%r11 + shlq $15,%r8 + movq %r10,%r9 + shrq $49,%r9 + shrq $49,%r11 + orq %r9,%r8 + shlq $15,%r10 + orq %r11,%r10 + movq %r8,-32(%r13) + movq %rax,%r11 + shlq $15,%rax + movq %rbx,%r9 + shrq $49,%r9 + shrq $49,%r11 + orq %r9,%rax + shlq $15,%rbx + orq %r11,%rbx + movq %rbx,-24(%r13) + movq %r8,%r11 + shlq $15,%r8 + movq %r10,%r9 + shrq $49,%r9 + shrq $49,%r11 + orq %r9,%r8 + shlq $15,%r10 + orq %r11,%r10 + movq %r8,-16(%r13) + movq %r10,-8(%r13) + movq %rax,%r11 + shlq $17,%rax + movq %rbx,%r9 + shrq $47,%r9 + shrq $47,%r11 + orq %r9,%rax + shlq $17,%rbx + orq %r11,%rbx + movq %rax,0(%r13) + movq %rbx,8(%r13) + movq %rax,%r11 + shlq $17,%rax + movq %rbx,%r9 + shrq $47,%r9 + shrq $47,%r11 + orq %r9,%rax + shlq $17,%rbx + orq %r11,%rbx + movq %rax,16(%r13) + movq %rbx,24(%r13) + movq %r8,%r11 + shlq $34,%r8 + movq %r10,%r9 + shrq $30,%r9 + shrq $30,%r11 + orq %r9,%r8 + shlq $34,%r10 + orq %r11,%r10 + movq %r8,32(%r13) + movq %r10,40(%r13) + movq %rax,%r11 + shlq $17,%rax + movq %rbx,%r9 + shrq $47,%r9 + shrq $47,%r11 + orq %r9,%rax + shlq $17,%rbx + orq %r11,%rbx + movq %rax,48(%r13) + movq %rbx,56(%r13) + movq %r8,%r11 + shlq $17,%r8 + movq %r10,%r9 + shrq $47,%r9 + shrq $47,%r11 + orq %r9,%r8 + shlq $17,%r10 + orq %r11,%r10 + movq %r8,64(%r13) + movq %r10,72(%r13) + movl $3,%eax + jmp L$done +.p2align 4 +L$2nd256: + movl %r9d,48(%r13) + movl %r8d,52(%r13) + movl %r11d,56(%r13) + movl %r10d,60(%r13) + xorl 32(%r13),%r9d + xorl 36(%r13),%r8d + xorl 40(%r13),%r11d + xorl 44(%r13),%r10d + xorl %r8d,%eax + xorl %r9d,%ebx + movzbl %ah,%esi + movzbl %bl,%edi + movl 2052(%rbp,%rsi,8),%edx + movl 0(%rbp,%rdi,8),%ecx + movzbl %al,%esi + shrl $16,%eax + movzbl %bh,%edi + xorl 4(%rbp,%rsi,8),%edx + shrl $16,%ebx + xorl 4(%rbp,%rdi,8),%ecx + movzbl %ah,%esi + movzbl %bl,%edi + xorl 0(%rbp,%rsi,8),%edx + xorl 2052(%rbp,%rdi,8),%ecx + movzbl %al,%esi + movzbl %bh,%edi + xorl 2048(%rbp,%rsi,8),%edx + xorl 2048(%rbp,%rdi,8),%ecx + movl 40(%r14),%ebx + movl 44(%r14),%eax + xorl %edx,%ecx + rorl $8,%edx + xorl %ecx,%r10d + xorl %ecx,%r11d + xorl %edx,%r11d + xorl %r10d,%eax + xorl %r11d,%ebx + movzbl %ah,%esi + movzbl %bl,%edi + movl 2052(%rbp,%rsi,8),%edx + movl 0(%rbp,%rdi,8),%ecx + movzbl %al,%esi + shrl $16,%eax + movzbl %bh,%edi + xorl 4(%rbp,%rsi,8),%edx + shrl $16,%ebx + xorl 4(%rbp,%rdi,8),%ecx + movzbl %ah,%esi + movzbl %bl,%edi + xorl 0(%rbp,%rsi,8),%edx + xorl 2052(%rbp,%rdi,8),%ecx + movzbl %al,%esi + movzbl %bh,%edi + xorl 2048(%rbp,%rsi,8),%edx + xorl 2048(%rbp,%rdi,8),%ecx + movl 48(%r14),%ebx + movl 52(%r14),%eax + xorl %edx,%ecx + rorl $8,%edx + xorl %ecx,%r8d + xorl %ecx,%r9d + xorl %edx,%r9d + movq 0(%r13),%rax + movq 8(%r13),%rbx + movq 32(%r13),%rcx + movq 40(%r13),%rdx + movq 48(%r13),%r14 + movq 56(%r13),%r15 + leaq 128(%r13),%r13 + shlq $32,%r8 + shlq $32,%r10 + orq %r9,%r8 + orq %r11,%r10 + movq %r8,-112(%r13) + movq %r10,-104(%r13) + movq %rcx,%r11 + shlq $15,%rcx + movq %rdx,%r9 + shrq $49,%r9 + shrq $49,%r11 + orq %r9,%rcx + shlq $15,%rdx + orq %r11,%rdx + movq %rcx,-96(%r13) + movq %rdx,-88(%r13) + movq %r14,%r11 + shlq $15,%r14 + movq %r15,%r9 + shrq $49,%r9 + shrq $49,%r11 + orq %r9,%r14 + shlq $15,%r15 + orq %r11,%r15 + movq %r14,-80(%r13) + movq %r15,-72(%r13) + movq %rcx,%r11 + shlq $15,%rcx + movq %rdx,%r9 + shrq $49,%r9 + shrq $49,%r11 + orq %r9,%rcx + shlq $15,%rdx + orq %r11,%rdx + movq %rcx,-64(%r13) + movq %rdx,-56(%r13) + movq %r8,%r11 + shlq $30,%r8 + movq %r10,%r9 + shrq $34,%r9 + shrq $34,%r11 + orq %r9,%r8 + shlq $30,%r10 + orq %r11,%r10 + movq %r8,-48(%r13) + movq %r10,-40(%r13) + movq %rax,%r11 + shlq $45,%rax + movq %rbx,%r9 + shrq $19,%r9 + shrq $19,%r11 + orq %r9,%rax + shlq $45,%rbx + orq %r11,%rbx + movq %rax,-32(%r13) + movq %rbx,-24(%r13) + movq %r14,%r11 + shlq $30,%r14 + movq %r15,%r9 + shrq $34,%r9 + shrq $34,%r11 + orq %r9,%r14 + shlq $30,%r15 + orq %r11,%r15 + movq %r14,-16(%r13) + movq %r15,-8(%r13) + movq %rax,%r11 + shlq $15,%rax + movq %rbx,%r9 + shrq $49,%r9 + shrq $49,%r11 + orq %r9,%rax + shlq $15,%rbx + orq %r11,%rbx + movq %rax,0(%r13) + movq %rbx,8(%r13) + movq %rcx,%r11 + shlq $30,%rcx + movq %rdx,%r9 + shrq $34,%r9 + shrq $34,%r11 + orq %r9,%rcx + shlq $30,%rdx + orq %r11,%rdx + movq %rcx,16(%r13) + movq %rdx,24(%r13) + movq %r8,%r11 + shlq $30,%r8 + movq %r10,%r9 + shrq $34,%r9 + shrq $34,%r11 + orq %r9,%r8 + shlq $30,%r10 + orq %r11,%r10 + movq %r8,32(%r13) + movq %r10,40(%r13) + movq %rax,%r11 + shlq $17,%rax + movq %rbx,%r9 + shrq $47,%r9 + shrq $47,%r11 + orq %r9,%rax + shlq $17,%rbx + orq %r11,%rbx + movq %rax,48(%r13) + movq %rbx,56(%r13) + movq %r14,%r11 + shlq $32,%r14 + movq %r15,%r9 + shrq $32,%r9 + shrq $32,%r11 + orq %r9,%r14 + shlq $32,%r15 + orq %r11,%r15 + movq %r14,64(%r13) + movq %r15,72(%r13) + movq %rcx,%r11 + shlq $34,%rcx + movq %rdx,%r9 + shrq $30,%r9 + shrq $30,%r11 + orq %r9,%rcx + shlq $34,%rdx + orq %r11,%rdx + movq %rcx,80(%r13) + movq %rdx,88(%r13) + movq %r14,%r11 + shlq $17,%r14 + movq %r15,%r9 + shrq $47,%r9 + shrq $47,%r11 + orq %r9,%r14 + shlq $17,%r15 + orq %r11,%r15 + movq %r14,96(%r13) + movq %r15,104(%r13) + movq %rax,%r11 + shlq $34,%rax + movq %rbx,%r9 + shrq $30,%r9 + shrq $30,%r11 + orq %r9,%rax + shlq $34,%rbx + orq %r11,%rbx + movq %rax,112(%r13) + movq %rbx,120(%r13) + movq %r8,%r11 + shlq $51,%r8 + movq %r10,%r9 + shrq $13,%r9 + shrq $13,%r11 + orq %r9,%r8 + shlq $51,%r10 + orq %r11,%r10 + movq %r8,128(%r13) + movq %r10,136(%r13) + movl $4,%eax +L$done: + movq 0(%rsp),%r15 + movq 8(%rsp),%r14 + movq 16(%rsp),%r13 + movq 24(%rsp),%rbp + movq 32(%rsp),%rbx + leaq 40(%rsp),%rsp +L$key_epilogue: + .byte 0xf3,0xc3 + +.p2align 6 +L$Camellia_SIGMA: +.long 0x3bcc908b, 0xa09e667f, 0x4caa73b2, 0xb67ae858 +.long 0xe94f82be, 0xc6ef372f, 0xf1d36f1c, 0x54ff53a5 +.long 0xde682d1d, 0x10e527fa, 0xb3e6c1fd, 0xb05688c2 +.long 0, 0, 0, 0 +L$Camellia_SBOX: +.long 0x70707000,0x70700070 +.long 0x82828200,0x2c2c002c +.long 0x2c2c2c00,0xb3b300b3 +.long 0xececec00,0xc0c000c0 +.long 0xb3b3b300,0xe4e400e4 +.long 0x27272700,0x57570057 +.long 0xc0c0c000,0xeaea00ea +.long 0xe5e5e500,0xaeae00ae +.long 0xe4e4e400,0x23230023 +.long 0x85858500,0x6b6b006b +.long 0x57575700,0x45450045 +.long 0x35353500,0xa5a500a5 +.long 0xeaeaea00,0xeded00ed +.long 0x0c0c0c00,0x4f4f004f +.long 0xaeaeae00,0x1d1d001d +.long 0x41414100,0x92920092 +.long 0x23232300,0x86860086 +.long 0xefefef00,0xafaf00af +.long 0x6b6b6b00,0x7c7c007c +.long 0x93939300,0x1f1f001f +.long 0x45454500,0x3e3e003e +.long 0x19191900,0xdcdc00dc +.long 0xa5a5a500,0x5e5e005e +.long 0x21212100,0x0b0b000b +.long 0xededed00,0xa6a600a6 +.long 0x0e0e0e00,0x39390039 +.long 0x4f4f4f00,0xd5d500d5 +.long 0x4e4e4e00,0x5d5d005d +.long 0x1d1d1d00,0xd9d900d9 +.long 0x65656500,0x5a5a005a +.long 0x92929200,0x51510051 +.long 0xbdbdbd00,0x6c6c006c +.long 0x86868600,0x8b8b008b +.long 0xb8b8b800,0x9a9a009a +.long 0xafafaf00,0xfbfb00fb +.long 0x8f8f8f00,0xb0b000b0 +.long 0x7c7c7c00,0x74740074 +.long 0xebebeb00,0x2b2b002b +.long 0x1f1f1f00,0xf0f000f0 +.long 0xcecece00,0x84840084 +.long 0x3e3e3e00,0xdfdf00df +.long 0x30303000,0xcbcb00cb +.long 0xdcdcdc00,0x34340034 +.long 0x5f5f5f00,0x76760076 +.long 0x5e5e5e00,0x6d6d006d +.long 0xc5c5c500,0xa9a900a9 +.long 0x0b0b0b00,0xd1d100d1 +.long 0x1a1a1a00,0x04040004 +.long 0xa6a6a600,0x14140014 +.long 0xe1e1e100,0x3a3a003a +.long 0x39393900,0xdede00de +.long 0xcacaca00,0x11110011 +.long 0xd5d5d500,0x32320032 +.long 0x47474700,0x9c9c009c +.long 0x5d5d5d00,0x53530053 +.long 0x3d3d3d00,0xf2f200f2 +.long 0xd9d9d900,0xfefe00fe +.long 0x01010100,0xcfcf00cf +.long 0x5a5a5a00,0xc3c300c3 +.long 0xd6d6d600,0x7a7a007a +.long 0x51515100,0x24240024 +.long 0x56565600,0xe8e800e8 +.long 0x6c6c6c00,0x60600060 +.long 0x4d4d4d00,0x69690069 +.long 0x8b8b8b00,0xaaaa00aa +.long 0x0d0d0d00,0xa0a000a0 +.long 0x9a9a9a00,0xa1a100a1 +.long 0x66666600,0x62620062 +.long 0xfbfbfb00,0x54540054 +.long 0xcccccc00,0x1e1e001e +.long 0xb0b0b000,0xe0e000e0 +.long 0x2d2d2d00,0x64640064 +.long 0x74747400,0x10100010 +.long 0x12121200,0x00000000 +.long 0x2b2b2b00,0xa3a300a3 +.long 0x20202000,0x75750075 +.long 0xf0f0f000,0x8a8a008a +.long 0xb1b1b100,0xe6e600e6 +.long 0x84848400,0x09090009 +.long 0x99999900,0xdddd00dd +.long 0xdfdfdf00,0x87870087 +.long 0x4c4c4c00,0x83830083 +.long 0xcbcbcb00,0xcdcd00cd +.long 0xc2c2c200,0x90900090 +.long 0x34343400,0x73730073 +.long 0x7e7e7e00,0xf6f600f6 +.long 0x76767600,0x9d9d009d +.long 0x05050500,0xbfbf00bf +.long 0x6d6d6d00,0x52520052 +.long 0xb7b7b700,0xd8d800d8 +.long 0xa9a9a900,0xc8c800c8 +.long 0x31313100,0xc6c600c6 +.long 0xd1d1d100,0x81810081 +.long 0x17171700,0x6f6f006f +.long 0x04040400,0x13130013 +.long 0xd7d7d700,0x63630063 +.long 0x14141400,0xe9e900e9 +.long 0x58585800,0xa7a700a7 +.long 0x3a3a3a00,0x9f9f009f +.long 0x61616100,0xbcbc00bc +.long 0xdedede00,0x29290029 +.long 0x1b1b1b00,0xf9f900f9 +.long 0x11111100,0x2f2f002f +.long 0x1c1c1c00,0xb4b400b4 +.long 0x32323200,0x78780078 +.long 0x0f0f0f00,0x06060006 +.long 0x9c9c9c00,0xe7e700e7 +.long 0x16161600,0x71710071 +.long 0x53535300,0xd4d400d4 +.long 0x18181800,0xabab00ab +.long 0xf2f2f200,0x88880088 +.long 0x22222200,0x8d8d008d +.long 0xfefefe00,0x72720072 +.long 0x44444400,0xb9b900b9 +.long 0xcfcfcf00,0xf8f800f8 +.long 0xb2b2b200,0xacac00ac +.long 0xc3c3c300,0x36360036 +.long 0xb5b5b500,0x2a2a002a +.long 0x7a7a7a00,0x3c3c003c +.long 0x91919100,0xf1f100f1 +.long 0x24242400,0x40400040 +.long 0x08080800,0xd3d300d3 +.long 0xe8e8e800,0xbbbb00bb +.long 0xa8a8a800,0x43430043 +.long 0x60606000,0x15150015 +.long 0xfcfcfc00,0xadad00ad +.long 0x69696900,0x77770077 +.long 0x50505000,0x80800080 +.long 0xaaaaaa00,0x82820082 +.long 0xd0d0d000,0xecec00ec +.long 0xa0a0a000,0x27270027 +.long 0x7d7d7d00,0xe5e500e5 +.long 0xa1a1a100,0x85850085 +.long 0x89898900,0x35350035 +.long 0x62626200,0x0c0c000c +.long 0x97979700,0x41410041 +.long 0x54545400,0xefef00ef +.long 0x5b5b5b00,0x93930093 +.long 0x1e1e1e00,0x19190019 +.long 0x95959500,0x21210021 +.long 0xe0e0e000,0x0e0e000e +.long 0xffffff00,0x4e4e004e +.long 0x64646400,0x65650065 +.long 0xd2d2d200,0xbdbd00bd +.long 0x10101000,0xb8b800b8 +.long 0xc4c4c400,0x8f8f008f +.long 0x00000000,0xebeb00eb +.long 0x48484800,0xcece00ce +.long 0xa3a3a300,0x30300030 +.long 0xf7f7f700,0x5f5f005f +.long 0x75757500,0xc5c500c5 +.long 0xdbdbdb00,0x1a1a001a +.long 0x8a8a8a00,0xe1e100e1 +.long 0x03030300,0xcaca00ca +.long 0xe6e6e600,0x47470047 +.long 0xdadada00,0x3d3d003d +.long 0x09090900,0x01010001 +.long 0x3f3f3f00,0xd6d600d6 +.long 0xdddddd00,0x56560056 +.long 0x94949400,0x4d4d004d +.long 0x87878700,0x0d0d000d +.long 0x5c5c5c00,0x66660066 +.long 0x83838300,0xcccc00cc +.long 0x02020200,0x2d2d002d +.long 0xcdcdcd00,0x12120012 +.long 0x4a4a4a00,0x20200020 +.long 0x90909000,0xb1b100b1 +.long 0x33333300,0x99990099 +.long 0x73737300,0x4c4c004c +.long 0x67676700,0xc2c200c2 +.long 0xf6f6f600,0x7e7e007e +.long 0xf3f3f300,0x05050005 +.long 0x9d9d9d00,0xb7b700b7 +.long 0x7f7f7f00,0x31310031 +.long 0xbfbfbf00,0x17170017 +.long 0xe2e2e200,0xd7d700d7 +.long 0x52525200,0x58580058 +.long 0x9b9b9b00,0x61610061 +.long 0xd8d8d800,0x1b1b001b +.long 0x26262600,0x1c1c001c +.long 0xc8c8c800,0x0f0f000f +.long 0x37373700,0x16160016 +.long 0xc6c6c600,0x18180018 +.long 0x3b3b3b00,0x22220022 +.long 0x81818100,0x44440044 +.long 0x96969600,0xb2b200b2 +.long 0x6f6f6f00,0xb5b500b5 +.long 0x4b4b4b00,0x91910091 +.long 0x13131300,0x08080008 +.long 0xbebebe00,0xa8a800a8 +.long 0x63636300,0xfcfc00fc +.long 0x2e2e2e00,0x50500050 +.long 0xe9e9e900,0xd0d000d0 +.long 0x79797900,0x7d7d007d +.long 0xa7a7a700,0x89890089 +.long 0x8c8c8c00,0x97970097 +.long 0x9f9f9f00,0x5b5b005b +.long 0x6e6e6e00,0x95950095 +.long 0xbcbcbc00,0xffff00ff +.long 0x8e8e8e00,0xd2d200d2 +.long 0x29292900,0xc4c400c4 +.long 0xf5f5f500,0x48480048 +.long 0xf9f9f900,0xf7f700f7 +.long 0xb6b6b600,0xdbdb00db +.long 0x2f2f2f00,0x03030003 +.long 0xfdfdfd00,0xdada00da +.long 0xb4b4b400,0x3f3f003f +.long 0x59595900,0x94940094 +.long 0x78787800,0x5c5c005c +.long 0x98989800,0x02020002 +.long 0x06060600,0x4a4a004a +.long 0x6a6a6a00,0x33330033 +.long 0xe7e7e700,0x67670067 +.long 0x46464600,0xf3f300f3 +.long 0x71717100,0x7f7f007f +.long 0xbababa00,0xe2e200e2 +.long 0xd4d4d400,0x9b9b009b +.long 0x25252500,0x26260026 +.long 0xababab00,0x37370037 +.long 0x42424200,0x3b3b003b +.long 0x88888800,0x96960096 +.long 0xa2a2a200,0x4b4b004b +.long 0x8d8d8d00,0xbebe00be +.long 0xfafafa00,0x2e2e002e +.long 0x72727200,0x79790079 +.long 0x07070700,0x8c8c008c +.long 0xb9b9b900,0x6e6e006e +.long 0x55555500,0x8e8e008e +.long 0xf8f8f800,0xf5f500f5 +.long 0xeeeeee00,0xb6b600b6 +.long 0xacacac00,0xfdfd00fd +.long 0x0a0a0a00,0x59590059 +.long 0x36363600,0x98980098 +.long 0x49494900,0x6a6a006a +.long 0x2a2a2a00,0x46460046 +.long 0x68686800,0xbaba00ba +.long 0x3c3c3c00,0x25250025 +.long 0x38383800,0x42420042 +.long 0xf1f1f100,0xa2a200a2 +.long 0xa4a4a400,0xfafa00fa +.long 0x40404000,0x07070007 +.long 0x28282800,0x55550055 +.long 0xd3d3d300,0xeeee00ee +.long 0x7b7b7b00,0x0a0a000a +.long 0xbbbbbb00,0x49490049 +.long 0xc9c9c900,0x68680068 +.long 0x43434300,0x38380038 +.long 0xc1c1c100,0xa4a400a4 +.long 0x15151500,0x28280028 +.long 0xe3e3e300,0x7b7b007b +.long 0xadadad00,0xc9c900c9 +.long 0xf4f4f400,0xc1c100c1 +.long 0x77777700,0xe3e300e3 +.long 0xc7c7c700,0xf4f400f4 +.long 0x80808000,0xc7c700c7 +.long 0x9e9e9e00,0x9e9e009e +.long 0x00e0e0e0,0x38003838 +.long 0x00050505,0x41004141 +.long 0x00585858,0x16001616 +.long 0x00d9d9d9,0x76007676 +.long 0x00676767,0xd900d9d9 +.long 0x004e4e4e,0x93009393 +.long 0x00818181,0x60006060 +.long 0x00cbcbcb,0xf200f2f2 +.long 0x00c9c9c9,0x72007272 +.long 0x000b0b0b,0xc200c2c2 +.long 0x00aeaeae,0xab00abab +.long 0x006a6a6a,0x9a009a9a +.long 0x00d5d5d5,0x75007575 +.long 0x00181818,0x06000606 +.long 0x005d5d5d,0x57005757 +.long 0x00828282,0xa000a0a0 +.long 0x00464646,0x91009191 +.long 0x00dfdfdf,0xf700f7f7 +.long 0x00d6d6d6,0xb500b5b5 +.long 0x00272727,0xc900c9c9 +.long 0x008a8a8a,0xa200a2a2 +.long 0x00323232,0x8c008c8c +.long 0x004b4b4b,0xd200d2d2 +.long 0x00424242,0x90009090 +.long 0x00dbdbdb,0xf600f6f6 +.long 0x001c1c1c,0x07000707 +.long 0x009e9e9e,0xa700a7a7 +.long 0x009c9c9c,0x27002727 +.long 0x003a3a3a,0x8e008e8e +.long 0x00cacaca,0xb200b2b2 +.long 0x00252525,0x49004949 +.long 0x007b7b7b,0xde00dede +.long 0x000d0d0d,0x43004343 +.long 0x00717171,0x5c005c5c +.long 0x005f5f5f,0xd700d7d7 +.long 0x001f1f1f,0xc700c7c7 +.long 0x00f8f8f8,0x3e003e3e +.long 0x00d7d7d7,0xf500f5f5 +.long 0x003e3e3e,0x8f008f8f +.long 0x009d9d9d,0x67006767 +.long 0x007c7c7c,0x1f001f1f +.long 0x00606060,0x18001818 +.long 0x00b9b9b9,0x6e006e6e +.long 0x00bebebe,0xaf00afaf +.long 0x00bcbcbc,0x2f002f2f +.long 0x008b8b8b,0xe200e2e2 +.long 0x00161616,0x85008585 +.long 0x00343434,0x0d000d0d +.long 0x004d4d4d,0x53005353 +.long 0x00c3c3c3,0xf000f0f0 +.long 0x00727272,0x9c009c9c +.long 0x00959595,0x65006565 +.long 0x00ababab,0xea00eaea +.long 0x008e8e8e,0xa300a3a3 +.long 0x00bababa,0xae00aeae +.long 0x007a7a7a,0x9e009e9e +.long 0x00b3b3b3,0xec00ecec +.long 0x00020202,0x80008080 +.long 0x00b4b4b4,0x2d002d2d +.long 0x00adadad,0x6b006b6b +.long 0x00a2a2a2,0xa800a8a8 +.long 0x00acacac,0x2b002b2b +.long 0x00d8d8d8,0x36003636 +.long 0x009a9a9a,0xa600a6a6 +.long 0x00171717,0xc500c5c5 +.long 0x001a1a1a,0x86008686 +.long 0x00353535,0x4d004d4d +.long 0x00cccccc,0x33003333 +.long 0x00f7f7f7,0xfd00fdfd +.long 0x00999999,0x66006666 +.long 0x00616161,0x58005858 +.long 0x005a5a5a,0x96009696 +.long 0x00e8e8e8,0x3a003a3a +.long 0x00242424,0x09000909 +.long 0x00565656,0x95009595 +.long 0x00404040,0x10001010 +.long 0x00e1e1e1,0x78007878 +.long 0x00636363,0xd800d8d8 +.long 0x00090909,0x42004242 +.long 0x00333333,0xcc00cccc +.long 0x00bfbfbf,0xef00efef +.long 0x00989898,0x26002626 +.long 0x00979797,0xe500e5e5 +.long 0x00858585,0x61006161 +.long 0x00686868,0x1a001a1a +.long 0x00fcfcfc,0x3f003f3f +.long 0x00ececec,0x3b003b3b +.long 0x000a0a0a,0x82008282 +.long 0x00dadada,0xb600b6b6 +.long 0x006f6f6f,0xdb00dbdb +.long 0x00535353,0xd400d4d4 +.long 0x00626262,0x98009898 +.long 0x00a3a3a3,0xe800e8e8 +.long 0x002e2e2e,0x8b008b8b +.long 0x00080808,0x02000202 +.long 0x00afafaf,0xeb00ebeb +.long 0x00282828,0x0a000a0a +.long 0x00b0b0b0,0x2c002c2c +.long 0x00747474,0x1d001d1d +.long 0x00c2c2c2,0xb000b0b0 +.long 0x00bdbdbd,0x6f006f6f +.long 0x00363636,0x8d008d8d +.long 0x00222222,0x88008888 +.long 0x00383838,0x0e000e0e +.long 0x00646464,0x19001919 +.long 0x001e1e1e,0x87008787 +.long 0x00393939,0x4e004e4e +.long 0x002c2c2c,0x0b000b0b +.long 0x00a6a6a6,0xa900a9a9 +.long 0x00303030,0x0c000c0c +.long 0x00e5e5e5,0x79007979 +.long 0x00444444,0x11001111 +.long 0x00fdfdfd,0x7f007f7f +.long 0x00888888,0x22002222 +.long 0x009f9f9f,0xe700e7e7 +.long 0x00656565,0x59005959 +.long 0x00878787,0xe100e1e1 +.long 0x006b6b6b,0xda00dada +.long 0x00f4f4f4,0x3d003d3d +.long 0x00232323,0xc800c8c8 +.long 0x00484848,0x12001212 +.long 0x00101010,0x04000404 +.long 0x00d1d1d1,0x74007474 +.long 0x00515151,0x54005454 +.long 0x00c0c0c0,0x30003030 +.long 0x00f9f9f9,0x7e007e7e +.long 0x00d2d2d2,0xb400b4b4 +.long 0x00a0a0a0,0x28002828 +.long 0x00555555,0x55005555 +.long 0x00a1a1a1,0x68006868 +.long 0x00414141,0x50005050 +.long 0x00fafafa,0xbe00bebe +.long 0x00434343,0xd000d0d0 +.long 0x00131313,0xc400c4c4 +.long 0x00c4c4c4,0x31003131 +.long 0x002f2f2f,0xcb00cbcb +.long 0x00a8a8a8,0x2a002a2a +.long 0x00b6b6b6,0xad00adad +.long 0x003c3c3c,0x0f000f0f +.long 0x002b2b2b,0xca00caca +.long 0x00c1c1c1,0x70007070 +.long 0x00ffffff,0xff00ffff +.long 0x00c8c8c8,0x32003232 +.long 0x00a5a5a5,0x69006969 +.long 0x00202020,0x08000808 +.long 0x00898989,0x62006262 +.long 0x00000000,0x00000000 +.long 0x00909090,0x24002424 +.long 0x00474747,0xd100d1d1 +.long 0x00efefef,0xfb00fbfb +.long 0x00eaeaea,0xba00baba +.long 0x00b7b7b7,0xed00eded +.long 0x00151515,0x45004545 +.long 0x00060606,0x81008181 +.long 0x00cdcdcd,0x73007373 +.long 0x00b5b5b5,0x6d006d6d +.long 0x00121212,0x84008484 +.long 0x007e7e7e,0x9f009f9f +.long 0x00bbbbbb,0xee00eeee +.long 0x00292929,0x4a004a4a +.long 0x000f0f0f,0xc300c3c3 +.long 0x00b8b8b8,0x2e002e2e +.long 0x00070707,0xc100c1c1 +.long 0x00040404,0x01000101 +.long 0x009b9b9b,0xe600e6e6 +.long 0x00949494,0x25002525 +.long 0x00212121,0x48004848 +.long 0x00666666,0x99009999 +.long 0x00e6e6e6,0xb900b9b9 +.long 0x00cecece,0xb300b3b3 +.long 0x00ededed,0x7b007b7b +.long 0x00e7e7e7,0xf900f9f9 +.long 0x003b3b3b,0xce00cece +.long 0x00fefefe,0xbf00bfbf +.long 0x007f7f7f,0xdf00dfdf +.long 0x00c5c5c5,0x71007171 +.long 0x00a4a4a4,0x29002929 +.long 0x00373737,0xcd00cdcd +.long 0x00b1b1b1,0x6c006c6c +.long 0x004c4c4c,0x13001313 +.long 0x00919191,0x64006464 +.long 0x006e6e6e,0x9b009b9b +.long 0x008d8d8d,0x63006363 +.long 0x00767676,0x9d009d9d +.long 0x00030303,0xc000c0c0 +.long 0x002d2d2d,0x4b004b4b +.long 0x00dedede,0xb700b7b7 +.long 0x00969696,0xa500a5a5 +.long 0x00262626,0x89008989 +.long 0x007d7d7d,0x5f005f5f +.long 0x00c6c6c6,0xb100b1b1 +.long 0x005c5c5c,0x17001717 +.long 0x00d3d3d3,0xf400f4f4 +.long 0x00f2f2f2,0xbc00bcbc +.long 0x004f4f4f,0xd300d3d3 +.long 0x00191919,0x46004646 +.long 0x003f3f3f,0xcf00cfcf +.long 0x00dcdcdc,0x37003737 +.long 0x00797979,0x5e005e5e +.long 0x001d1d1d,0x47004747 +.long 0x00525252,0x94009494 +.long 0x00ebebeb,0xfa00fafa +.long 0x00f3f3f3,0xfc00fcfc +.long 0x006d6d6d,0x5b005b5b +.long 0x005e5e5e,0x97009797 +.long 0x00fbfbfb,0xfe00fefe +.long 0x00696969,0x5a005a5a +.long 0x00b2b2b2,0xac00acac +.long 0x00f0f0f0,0x3c003c3c +.long 0x00313131,0x4c004c4c +.long 0x000c0c0c,0x03000303 +.long 0x00d4d4d4,0x35003535 +.long 0x00cfcfcf,0xf300f3f3 +.long 0x008c8c8c,0x23002323 +.long 0x00e2e2e2,0xb800b8b8 +.long 0x00757575,0x5d005d5d +.long 0x00a9a9a9,0x6a006a6a +.long 0x004a4a4a,0x92009292 +.long 0x00575757,0xd500d5d5 +.long 0x00848484,0x21002121 +.long 0x00111111,0x44004444 +.long 0x00454545,0x51005151 +.long 0x001b1b1b,0xc600c6c6 +.long 0x00f5f5f5,0x7d007d7d +.long 0x00e4e4e4,0x39003939 +.long 0x000e0e0e,0x83008383 +.long 0x00737373,0xdc00dcdc +.long 0x00aaaaaa,0xaa00aaaa +.long 0x00f1f1f1,0x7c007c7c +.long 0x00dddddd,0x77007777 +.long 0x00595959,0x56005656 +.long 0x00141414,0x05000505 +.long 0x006c6c6c,0x1b001b1b +.long 0x00929292,0xa400a4a4 +.long 0x00545454,0x15001515 +.long 0x00d0d0d0,0x34003434 +.long 0x00787878,0x1e001e1e +.long 0x00707070,0x1c001c1c +.long 0x00e3e3e3,0xf800f8f8 +.long 0x00494949,0x52005252 +.long 0x00808080,0x20002020 +.long 0x00505050,0x14001414 +.long 0x00a7a7a7,0xe900e9e9 +.long 0x00f6f6f6,0xbd00bdbd +.long 0x00777777,0xdd00dddd +.long 0x00939393,0xe400e4e4 +.long 0x00868686,0xa100a1a1 +.long 0x00838383,0xe000e0e0 +.long 0x002a2a2a,0x8a008a8a +.long 0x00c7c7c7,0xf100f1f1 +.long 0x005b5b5b,0xd600d6d6 +.long 0x00e9e9e9,0x7a007a7a +.long 0x00eeeeee,0xbb00bbbb +.long 0x008f8f8f,0xe300e3e3 +.long 0x00010101,0x40004040 +.long 0x003d3d3d,0x4f004f4f +.globl _Camellia_cbc_encrypt + +.p2align 4 +_Camellia_cbc_encrypt: + cmpq $0,%rdx + je L$cbc_abort + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 +L$cbc_prologue: + + movq %rsp,%rbp + subq $64,%rsp + andq $-64,%rsp + + + + leaq -64-63(%rcx),%r10 + subq %rsp,%r10 + negq %r10 + andq $960,%r10 + subq %r10,%rsp + + + movq %rdi,%r12 + movq %rsi,%r13 + movq %r8,%rbx + movq %rcx,%r14 + movl 272(%rcx),%r15d + + movq %r8,40(%rsp) + movq %rbp,48(%rsp) + +L$cbc_body: + leaq L$Camellia_SBOX(%rip),%rbp + + movl $32,%ecx +.p2align 2 +L$cbc_prefetch_sbox: + movq 0(%rbp),%rax + movq 32(%rbp),%rsi + movq 64(%rbp),%rdi + movq 96(%rbp),%r11 + leaq 128(%rbp),%rbp + loop L$cbc_prefetch_sbox + subq $4096,%rbp + shlq $6,%r15 + movq %rdx,%rcx + leaq (%r14,%r15,1),%r15 + + cmpl $0,%r9d + je L$CBC_DECRYPT + + andq $-16,%rdx + andq $15,%rcx + leaq (%r12,%rdx,1),%rdx + movq %r14,0(%rsp) + movq %rdx,8(%rsp) + movq %rcx,16(%rsp) + + cmpq %r12,%rdx + movl 0(%rbx),%r8d + movl 4(%rbx),%r9d + movl 8(%rbx),%r10d + movl 12(%rbx),%r11d + je L$cbc_enc_tail + jmp L$cbc_eloop + +.p2align 4 +L$cbc_eloop: + xorl 0(%r12),%r8d + xorl 4(%r12),%r9d + xorl 8(%r12),%r10d + bswapl %r8d + xorl 12(%r12),%r11d + bswapl %r9d + bswapl %r10d + bswapl %r11d + + call _x86_64_Camellia_encrypt + + movq 0(%rsp),%r14 + bswapl %r8d + movq 8(%rsp),%rdx + bswapl %r9d + movq 16(%rsp),%rcx + bswapl %r10d + movl %r8d,0(%r13) + bswapl %r11d + movl %r9d,4(%r13) + movl %r10d,8(%r13) + leaq 16(%r12),%r12 + movl %r11d,12(%r13) + cmpq %rdx,%r12 + leaq 16(%r13),%r13 + jne L$cbc_eloop + + cmpq $0,%rcx + jne L$cbc_enc_tail + + movq 40(%rsp),%r13 + movl %r8d,0(%r13) + movl %r9d,4(%r13) + movl %r10d,8(%r13) + movl %r11d,12(%r13) + jmp L$cbc_done + +.p2align 4 +L$cbc_enc_tail: + xorq %rax,%rax + movq %rax,0+24(%rsp) + movq %rax,8+24(%rsp) + movq %rax,16(%rsp) + +L$cbc_enc_pushf: + pushfq + cld + movq %r12,%rsi + leaq 8+24(%rsp),%rdi +.long 0x9066A4F3 + popfq +L$cbc_enc_popf: + + leaq 24(%rsp),%r12 + leaq 16+24(%rsp),%rax + movq %rax,8(%rsp) + jmp L$cbc_eloop + +.p2align 4 +L$CBC_DECRYPT: + xchgq %r14,%r15 + addq $15,%rdx + andq $15,%rcx + andq $-16,%rdx + movq %r14,0(%rsp) + leaq (%r12,%rdx,1),%rdx + movq %rdx,8(%rsp) + movq %rcx,16(%rsp) + + movq (%rbx),%rax + movq 8(%rbx),%rbx + jmp L$cbc_dloop +.p2align 4 +L$cbc_dloop: + movl 0(%r12),%r8d + movl 4(%r12),%r9d + movl 8(%r12),%r10d + bswapl %r8d + movl 12(%r12),%r11d + bswapl %r9d + movq %rax,0+24(%rsp) + bswapl %r10d + movq %rbx,8+24(%rsp) + bswapl %r11d + + call _x86_64_Camellia_decrypt + + movq 0(%rsp),%r14 + movq 8(%rsp),%rdx + movq 16(%rsp),%rcx + + bswapl %r8d + movq (%r12),%rax + bswapl %r9d + movq 8(%r12),%rbx + bswapl %r10d + xorl 0+24(%rsp),%r8d + bswapl %r11d + xorl 4+24(%rsp),%r9d + xorl 8+24(%rsp),%r10d + leaq 16(%r12),%r12 + xorl 12+24(%rsp),%r11d + cmpq %rdx,%r12 + je L$cbc_ddone + + movl %r8d,0(%r13) + movl %r9d,4(%r13) + movl %r10d,8(%r13) + movl %r11d,12(%r13) + + leaq 16(%r13),%r13 + jmp L$cbc_dloop + +.p2align 4 +L$cbc_ddone: + movq 40(%rsp),%rdx + cmpq $0,%rcx + jne L$cbc_dec_tail + + movl %r8d,0(%r13) + movl %r9d,4(%r13) + movl %r10d,8(%r13) + movl %r11d,12(%r13) + + movq %rax,(%rdx) + movq %rbx,8(%rdx) + jmp L$cbc_done +.p2align 4 +L$cbc_dec_tail: + movl %r8d,0+24(%rsp) + movl %r9d,4+24(%rsp) + movl %r10d,8+24(%rsp) + movl %r11d,12+24(%rsp) + +L$cbc_dec_pushf: + pushfq + cld + leaq 8+24(%rsp),%rsi + leaq (%r13),%rdi +.long 0x9066A4F3 + popfq +L$cbc_dec_popf: + + movq %rax,(%rdx) + movq %rbx,8(%rdx) + jmp L$cbc_done + +.p2align 4 +L$cbc_done: + movq 48(%rsp),%rcx + movq 0(%rcx),%r15 + movq 8(%rcx),%r14 + movq 16(%rcx),%r13 + movq 24(%rcx),%r12 + movq 32(%rcx),%rbp + movq 40(%rcx),%rbx + leaq 48(%rcx),%rsp +L$cbc_abort: + .byte 0xf3,0xc3 + + +.byte 67,97,109,101,108,108,105,97,32,102,111,114,32,120,56,54,95,54,52,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 diff --git a/deps/openssl/asm_obsolete/x64-macosx-gas/ec/ecp_nistz256-x86_64.s b/deps/openssl/asm_obsolete/x64-macosx-gas/ec/ecp_nistz256-x86_64.s new file mode 100644 index 00000000000000..a63b602b9baa97 --- /dev/null +++ b/deps/openssl/asm_obsolete/x64-macosx-gas/ec/ecp_nistz256-x86_64.s @@ -0,0 +1,2003 @@ +.text + + + +.p2align 6 +L$poly: +.quad 0xffffffffffffffff, 0x00000000ffffffff, 0x0000000000000000, 0xffffffff00000001 + + +L$RR: +.quad 0x0000000000000003, 0xfffffffbffffffff, 0xfffffffffffffffe, 0x00000004fffffffd + +L$One: +.long 1,1,1,1,1,1,1,1 +L$Two: +.long 2,2,2,2,2,2,2,2 +L$Three: +.long 3,3,3,3,3,3,3,3 +L$ONE_mont: +.quad 0x0000000000000001, 0xffffffff00000000, 0xffffffffffffffff, 0x00000000fffffffe + +.globl _ecp_nistz256_mul_by_2 + +.p2align 6 +_ecp_nistz256_mul_by_2: + pushq %r12 + pushq %r13 + + movq 0(%rsi),%r8 + movq 8(%rsi),%r9 + addq %r8,%r8 + movq 16(%rsi),%r10 + adcq %r9,%r9 + movq 24(%rsi),%r11 + leaq L$poly(%rip),%rsi + movq %r8,%rax + adcq %r10,%r10 + adcq %r11,%r11 + movq %r9,%rdx + sbbq %r13,%r13 + + subq 0(%rsi),%r8 + movq %r10,%rcx + sbbq 8(%rsi),%r9 + sbbq 16(%rsi),%r10 + movq %r11,%r12 + sbbq 24(%rsi),%r11 + testq %r13,%r13 + + cmovzq %rax,%r8 + cmovzq %rdx,%r9 + movq %r8,0(%rdi) + cmovzq %rcx,%r10 + movq %r9,8(%rdi) + cmovzq %r12,%r11 + movq %r10,16(%rdi) + movq %r11,24(%rdi) + + popq %r13 + popq %r12 + .byte 0xf3,0xc3 + + + + +.globl _ecp_nistz256_div_by_2 + +.p2align 5 +_ecp_nistz256_div_by_2: + pushq %r12 + pushq %r13 + + movq 0(%rsi),%r8 + movq 8(%rsi),%r9 + movq 16(%rsi),%r10 + movq %r8,%rax + movq 24(%rsi),%r11 + leaq L$poly(%rip),%rsi + + movq %r9,%rdx + xorq %r13,%r13 + addq 0(%rsi),%r8 + movq %r10,%rcx + adcq 8(%rsi),%r9 + adcq 16(%rsi),%r10 + movq %r11,%r12 + adcq 24(%rsi),%r11 + adcq $0,%r13 + xorq %rsi,%rsi + testq $1,%rax + + cmovzq %rax,%r8 + cmovzq %rdx,%r9 + cmovzq %rcx,%r10 + cmovzq %r12,%r11 + cmovzq %rsi,%r13 + + movq %r9,%rax + shrq $1,%r8 + shlq $63,%rax + movq %r10,%rdx + shrq $1,%r9 + orq %rax,%r8 + shlq $63,%rdx + movq %r11,%rcx + shrq $1,%r10 + orq %rdx,%r9 + shlq $63,%rcx + shrq $1,%r11 + shlq $63,%r13 + orq %rcx,%r10 + orq %r13,%r11 + + movq %r8,0(%rdi) + movq %r9,8(%rdi) + movq %r10,16(%rdi) + movq %r11,24(%rdi) + + popq %r13 + popq %r12 + .byte 0xf3,0xc3 + + + + +.globl _ecp_nistz256_mul_by_3 + +.p2align 5 +_ecp_nistz256_mul_by_3: + pushq %r12 + pushq %r13 + + movq 0(%rsi),%r8 + xorq %r13,%r13 + movq 8(%rsi),%r9 + addq %r8,%r8 + movq 16(%rsi),%r10 + adcq %r9,%r9 + movq 24(%rsi),%r11 + movq %r8,%rax + adcq %r10,%r10 + adcq %r11,%r11 + movq %r9,%rdx + adcq $0,%r13 + + subq $-1,%r8 + movq %r10,%rcx + sbbq L$poly+8(%rip),%r9 + sbbq $0,%r10 + movq %r11,%r12 + sbbq L$poly+24(%rip),%r11 + testq %r13,%r13 + + cmovzq %rax,%r8 + cmovzq %rdx,%r9 + cmovzq %rcx,%r10 + cmovzq %r12,%r11 + + xorq %r13,%r13 + addq 0(%rsi),%r8 + adcq 8(%rsi),%r9 + movq %r8,%rax + adcq 16(%rsi),%r10 + adcq 24(%rsi),%r11 + movq %r9,%rdx + adcq $0,%r13 + + subq $-1,%r8 + movq %r10,%rcx + sbbq L$poly+8(%rip),%r9 + sbbq $0,%r10 + movq %r11,%r12 + sbbq L$poly+24(%rip),%r11 + testq %r13,%r13 + + cmovzq %rax,%r8 + cmovzq %rdx,%r9 + movq %r8,0(%rdi) + cmovzq %rcx,%r10 + movq %r9,8(%rdi) + cmovzq %r12,%r11 + movq %r10,16(%rdi) + movq %r11,24(%rdi) + + popq %r13 + popq %r12 + .byte 0xf3,0xc3 + + + + +.globl _ecp_nistz256_add + +.p2align 5 +_ecp_nistz256_add: + pushq %r12 + pushq %r13 + + movq 0(%rsi),%r8 + xorq %r13,%r13 + movq 8(%rsi),%r9 + movq 16(%rsi),%r10 + movq 24(%rsi),%r11 + leaq L$poly(%rip),%rsi + + addq 0(%rdx),%r8 + adcq 8(%rdx),%r9 + movq %r8,%rax + adcq 16(%rdx),%r10 + adcq 24(%rdx),%r11 + movq %r9,%rdx + adcq $0,%r13 + + subq 0(%rsi),%r8 + movq %r10,%rcx + sbbq 8(%rsi),%r9 + sbbq 16(%rsi),%r10 + movq %r11,%r12 + sbbq 24(%rsi),%r11 + testq %r13,%r13 + + cmovzq %rax,%r8 + cmovzq %rdx,%r9 + movq %r8,0(%rdi) + cmovzq %rcx,%r10 + movq %r9,8(%rdi) + cmovzq %r12,%r11 + movq %r10,16(%rdi) + movq %r11,24(%rdi) + + popq %r13 + popq %r12 + .byte 0xf3,0xc3 + + + + +.globl _ecp_nistz256_sub + +.p2align 5 +_ecp_nistz256_sub: + pushq %r12 + pushq %r13 + + movq 0(%rsi),%r8 + xorq %r13,%r13 + movq 8(%rsi),%r9 + movq 16(%rsi),%r10 + movq 24(%rsi),%r11 + leaq L$poly(%rip),%rsi + + subq 0(%rdx),%r8 + sbbq 8(%rdx),%r9 + movq %r8,%rax + sbbq 16(%rdx),%r10 + sbbq 24(%rdx),%r11 + movq %r9,%rdx + sbbq $0,%r13 + + addq 0(%rsi),%r8 + movq %r10,%rcx + adcq 8(%rsi),%r9 + adcq 16(%rsi),%r10 + movq %r11,%r12 + adcq 24(%rsi),%r11 + testq %r13,%r13 + + cmovzq %rax,%r8 + cmovzq %rdx,%r9 + movq %r8,0(%rdi) + cmovzq %rcx,%r10 + movq %r9,8(%rdi) + cmovzq %r12,%r11 + movq %r10,16(%rdi) + movq %r11,24(%rdi) + + popq %r13 + popq %r12 + .byte 0xf3,0xc3 + + + + +.globl _ecp_nistz256_neg + +.p2align 5 +_ecp_nistz256_neg: + pushq %r12 + pushq %r13 + + xorq %r8,%r8 + xorq %r9,%r9 + xorq %r10,%r10 + xorq %r11,%r11 + xorq %r13,%r13 + + subq 0(%rsi),%r8 + sbbq 8(%rsi),%r9 + sbbq 16(%rsi),%r10 + movq %r8,%rax + sbbq 24(%rsi),%r11 + leaq L$poly(%rip),%rsi + movq %r9,%rdx + sbbq $0,%r13 + + addq 0(%rsi),%r8 + movq %r10,%rcx + adcq 8(%rsi),%r9 + adcq 16(%rsi),%r10 + movq %r11,%r12 + adcq 24(%rsi),%r11 + testq %r13,%r13 + + cmovzq %rax,%r8 + cmovzq %rdx,%r9 + movq %r8,0(%rdi) + cmovzq %rcx,%r10 + movq %r9,8(%rdi) + cmovzq %r12,%r11 + movq %r10,16(%rdi) + movq %r11,24(%rdi) + + popq %r13 + popq %r12 + .byte 0xf3,0xc3 + + + + + +.globl _ecp_nistz256_to_mont + +.p2align 5 +_ecp_nistz256_to_mont: + leaq L$RR(%rip),%rdx + jmp L$mul_mont + + + + + + + + +.globl _ecp_nistz256_mul_mont + +.p2align 5 +_ecp_nistz256_mul_mont: +L$mul_mont: + pushq %rbp + pushq %rbx + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + movq %rdx,%rbx + movq 0(%rdx),%rax + movq 0(%rsi),%r9 + movq 8(%rsi),%r10 + movq 16(%rsi),%r11 + movq 24(%rsi),%r12 + + call __ecp_nistz256_mul_montq +L$mul_mont_done: + popq %r15 + popq %r14 + popq %r13 + popq %r12 + popq %rbx + popq %rbp + .byte 0xf3,0xc3 + + + +.p2align 5 +__ecp_nistz256_mul_montq: + + + movq %rax,%rbp + mulq %r9 + movq L$poly+8(%rip),%r14 + movq %rax,%r8 + movq %rbp,%rax + movq %rdx,%r9 + + mulq %r10 + movq L$poly+24(%rip),%r15 + addq %rax,%r9 + movq %rbp,%rax + adcq $0,%rdx + movq %rdx,%r10 + + mulq %r11 + addq %rax,%r10 + movq %rbp,%rax + adcq $0,%rdx + movq %rdx,%r11 + + mulq %r12 + addq %rax,%r11 + movq %r8,%rax + adcq $0,%rdx + xorq %r13,%r13 + movq %rdx,%r12 + + + + + + + + + + + movq %r8,%rbp + shlq $32,%r8 + mulq %r15 + shrq $32,%rbp + addq %r8,%r9 + adcq %rbp,%r10 + adcq %rax,%r11 + movq 8(%rbx),%rax + adcq %rdx,%r12 + adcq $0,%r13 + xorq %r8,%r8 + + + + movq %rax,%rbp + mulq 0(%rsi) + addq %rax,%r9 + movq %rbp,%rax + adcq $0,%rdx + movq %rdx,%rcx + + mulq 8(%rsi) + addq %rcx,%r10 + adcq $0,%rdx + addq %rax,%r10 + movq %rbp,%rax + adcq $0,%rdx + movq %rdx,%rcx + + mulq 16(%rsi) + addq %rcx,%r11 + adcq $0,%rdx + addq %rax,%r11 + movq %rbp,%rax + adcq $0,%rdx + movq %rdx,%rcx + + mulq 24(%rsi) + addq %rcx,%r12 + adcq $0,%rdx + addq %rax,%r12 + movq %r9,%rax + adcq %rdx,%r13 + adcq $0,%r8 + + + + movq %r9,%rbp + shlq $32,%r9 + mulq %r15 + shrq $32,%rbp + addq %r9,%r10 + adcq %rbp,%r11 + adcq %rax,%r12 + movq 16(%rbx),%rax + adcq %rdx,%r13 + adcq $0,%r8 + xorq %r9,%r9 + + + + movq %rax,%rbp + mulq 0(%rsi) + addq %rax,%r10 + movq %rbp,%rax + adcq $0,%rdx + movq %rdx,%rcx + + mulq 8(%rsi) + addq %rcx,%r11 + adcq $0,%rdx + addq %rax,%r11 + movq %rbp,%rax + adcq $0,%rdx + movq %rdx,%rcx + + mulq 16(%rsi) + addq %rcx,%r12 + adcq $0,%rdx + addq %rax,%r12 + movq %rbp,%rax + adcq $0,%rdx + movq %rdx,%rcx + + mulq 24(%rsi) + addq %rcx,%r13 + adcq $0,%rdx + addq %rax,%r13 + movq %r10,%rax + adcq %rdx,%r8 + adcq $0,%r9 + + + + movq %r10,%rbp + shlq $32,%r10 + mulq %r15 + shrq $32,%rbp + addq %r10,%r11 + adcq %rbp,%r12 + adcq %rax,%r13 + movq 24(%rbx),%rax + adcq %rdx,%r8 + adcq $0,%r9 + xorq %r10,%r10 + + + + movq %rax,%rbp + mulq 0(%rsi) + addq %rax,%r11 + movq %rbp,%rax + adcq $0,%rdx + movq %rdx,%rcx + + mulq 8(%rsi) + addq %rcx,%r12 + adcq $0,%rdx + addq %rax,%r12 + movq %rbp,%rax + adcq $0,%rdx + movq %rdx,%rcx + + mulq 16(%rsi) + addq %rcx,%r13 + adcq $0,%rdx + addq %rax,%r13 + movq %rbp,%rax + adcq $0,%rdx + movq %rdx,%rcx + + mulq 24(%rsi) + addq %rcx,%r8 + adcq $0,%rdx + addq %rax,%r8 + movq %r11,%rax + adcq %rdx,%r9 + adcq $0,%r10 + + + + movq %r11,%rbp + shlq $32,%r11 + mulq %r15 + shrq $32,%rbp + addq %r11,%r12 + adcq %rbp,%r13 + movq %r12,%rcx + adcq %rax,%r8 + adcq %rdx,%r9 + movq %r13,%rbp + adcq $0,%r10 + + + + subq $-1,%r12 + movq %r8,%rbx + sbbq %r14,%r13 + sbbq $0,%r8 + movq %r9,%rdx + sbbq %r15,%r9 + sbbq $0,%r10 + + cmovcq %rcx,%r12 + cmovcq %rbp,%r13 + movq %r12,0(%rdi) + cmovcq %rbx,%r8 + movq %r13,8(%rdi) + cmovcq %rdx,%r9 + movq %r8,16(%rdi) + movq %r9,24(%rdi) + + .byte 0xf3,0xc3 + + + + + + + + + +.globl _ecp_nistz256_sqr_mont + +.p2align 5 +_ecp_nistz256_sqr_mont: + pushq %rbp + pushq %rbx + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + movq 0(%rsi),%rax + movq 8(%rsi),%r14 + movq 16(%rsi),%r15 + movq 24(%rsi),%r8 + + call __ecp_nistz256_sqr_montq +L$sqr_mont_done: + popq %r15 + popq %r14 + popq %r13 + popq %r12 + popq %rbx + popq %rbp + .byte 0xf3,0xc3 + + + +.p2align 5 +__ecp_nistz256_sqr_montq: + movq %rax,%r13 + mulq %r14 + movq %rax,%r9 + movq %r15,%rax + movq %rdx,%r10 + + mulq %r13 + addq %rax,%r10 + movq %r8,%rax + adcq $0,%rdx + movq %rdx,%r11 + + mulq %r13 + addq %rax,%r11 + movq %r15,%rax + adcq $0,%rdx + movq %rdx,%r12 + + + mulq %r14 + addq %rax,%r11 + movq %r8,%rax + adcq $0,%rdx + movq %rdx,%rbp + + mulq %r14 + addq %rax,%r12 + movq %r8,%rax + adcq $0,%rdx + addq %rbp,%r12 + movq %rdx,%r13 + adcq $0,%r13 + + + mulq %r15 + xorq %r15,%r15 + addq %rax,%r13 + movq 0(%rsi),%rax + movq %rdx,%r14 + adcq $0,%r14 + + addq %r9,%r9 + adcq %r10,%r10 + adcq %r11,%r11 + adcq %r12,%r12 + adcq %r13,%r13 + adcq %r14,%r14 + adcq $0,%r15 + + mulq %rax + movq %rax,%r8 + movq 8(%rsi),%rax + movq %rdx,%rcx + + mulq %rax + addq %rcx,%r9 + adcq %rax,%r10 + movq 16(%rsi),%rax + adcq $0,%rdx + movq %rdx,%rcx + + mulq %rax + addq %rcx,%r11 + adcq %rax,%r12 + movq 24(%rsi),%rax + adcq $0,%rdx + movq %rdx,%rcx + + mulq %rax + addq %rcx,%r13 + adcq %rax,%r14 + movq %r8,%rax + adcq %rdx,%r15 + + movq L$poly+8(%rip),%rsi + movq L$poly+24(%rip),%rbp + + + + + movq %r8,%rcx + shlq $32,%r8 + mulq %rbp + shrq $32,%rcx + addq %r8,%r9 + adcq %rcx,%r10 + adcq %rax,%r11 + movq %r9,%rax + adcq $0,%rdx + + + + movq %r9,%rcx + shlq $32,%r9 + movq %rdx,%r8 + mulq %rbp + shrq $32,%rcx + addq %r9,%r10 + adcq %rcx,%r11 + adcq %rax,%r8 + movq %r10,%rax + adcq $0,%rdx + + + + movq %r10,%rcx + shlq $32,%r10 + movq %rdx,%r9 + mulq %rbp + shrq $32,%rcx + addq %r10,%r11 + adcq %rcx,%r8 + adcq %rax,%r9 + movq %r11,%rax + adcq $0,%rdx + + + + movq %r11,%rcx + shlq $32,%r11 + movq %rdx,%r10 + mulq %rbp + shrq $32,%rcx + addq %r11,%r8 + adcq %rcx,%r9 + adcq %rax,%r10 + adcq $0,%rdx + xorq %r11,%r11 + + + + addq %r8,%r12 + adcq %r9,%r13 + movq %r12,%r8 + adcq %r10,%r14 + adcq %rdx,%r15 + movq %r13,%r9 + adcq $0,%r11 + + subq $-1,%r12 + movq %r14,%r10 + sbbq %rsi,%r13 + sbbq $0,%r14 + movq %r15,%rcx + sbbq %rbp,%r15 + sbbq $0,%r11 + + cmovcq %r8,%r12 + cmovcq %r9,%r13 + movq %r12,0(%rdi) + cmovcq %r10,%r14 + movq %r13,8(%rdi) + cmovcq %rcx,%r15 + movq %r14,16(%rdi) + movq %r15,24(%rdi) + + .byte 0xf3,0xc3 + + + + + + + +.globl _ecp_nistz256_from_mont + +.p2align 5 +_ecp_nistz256_from_mont: + pushq %r12 + pushq %r13 + + movq 0(%rsi),%rax + movq L$poly+24(%rip),%r13 + movq 8(%rsi),%r9 + movq 16(%rsi),%r10 + movq 24(%rsi),%r11 + movq %rax,%r8 + movq L$poly+8(%rip),%r12 + + + + movq %rax,%rcx + shlq $32,%r8 + mulq %r13 + shrq $32,%rcx + addq %r8,%r9 + adcq %rcx,%r10 + adcq %rax,%r11 + movq %r9,%rax + adcq $0,%rdx + + + + movq %r9,%rcx + shlq $32,%r9 + movq %rdx,%r8 + mulq %r13 + shrq $32,%rcx + addq %r9,%r10 + adcq %rcx,%r11 + adcq %rax,%r8 + movq %r10,%rax + adcq $0,%rdx + + + + movq %r10,%rcx + shlq $32,%r10 + movq %rdx,%r9 + mulq %r13 + shrq $32,%rcx + addq %r10,%r11 + adcq %rcx,%r8 + adcq %rax,%r9 + movq %r11,%rax + adcq $0,%rdx + + + + movq %r11,%rcx + shlq $32,%r11 + movq %rdx,%r10 + mulq %r13 + shrq $32,%rcx + addq %r11,%r8 + adcq %rcx,%r9 + movq %r8,%rcx + adcq %rax,%r10 + movq %r9,%rsi + adcq $0,%rdx + + + + subq $-1,%r8 + movq %r10,%rax + sbbq %r12,%r9 + sbbq $0,%r10 + movq %rdx,%r11 + sbbq %r13,%rdx + sbbq %r13,%r13 + + cmovnzq %rcx,%r8 + cmovnzq %rsi,%r9 + movq %r8,0(%rdi) + cmovnzq %rax,%r10 + movq %r9,8(%rdi) + cmovzq %rdx,%r11 + movq %r10,16(%rdi) + movq %r11,24(%rdi) + + popq %r13 + popq %r12 + .byte 0xf3,0xc3 + + + +.globl _ecp_nistz256_select_w5 + +.p2align 5 +_ecp_nistz256_select_w5: + movdqa L$One(%rip),%xmm0 + movd %edx,%xmm1 + + pxor %xmm2,%xmm2 + pxor %xmm3,%xmm3 + pxor %xmm4,%xmm4 + pxor %xmm5,%xmm5 + pxor %xmm6,%xmm6 + pxor %xmm7,%xmm7 + + movdqa %xmm0,%xmm8 + pshufd $0,%xmm1,%xmm1 + + movq $16,%rax +L$select_loop_sse_w5: + + movdqa %xmm8,%xmm15 + paddd %xmm0,%xmm8 + pcmpeqd %xmm1,%xmm15 + + movdqa 0(%rsi),%xmm9 + movdqa 16(%rsi),%xmm10 + movdqa 32(%rsi),%xmm11 + movdqa 48(%rsi),%xmm12 + movdqa 64(%rsi),%xmm13 + movdqa 80(%rsi),%xmm14 + leaq 96(%rsi),%rsi + + pand %xmm15,%xmm9 + pand %xmm15,%xmm10 + por %xmm9,%xmm2 + pand %xmm15,%xmm11 + por %xmm10,%xmm3 + pand %xmm15,%xmm12 + por %xmm11,%xmm4 + pand %xmm15,%xmm13 + por %xmm12,%xmm5 + pand %xmm15,%xmm14 + por %xmm13,%xmm6 + por %xmm14,%xmm7 + + decq %rax + jnz L$select_loop_sse_w5 + + movdqu %xmm2,0(%rdi) + movdqu %xmm3,16(%rdi) + movdqu %xmm4,32(%rdi) + movdqu %xmm5,48(%rdi) + movdqu %xmm6,64(%rdi) + movdqu %xmm7,80(%rdi) + .byte 0xf3,0xc3 + + + + +.globl _ecp_nistz256_select_w7 + +.p2align 5 +_ecp_nistz256_select_w7: + movdqa L$One(%rip),%xmm8 + movd %edx,%xmm1 + + pxor %xmm2,%xmm2 + pxor %xmm3,%xmm3 + pxor %xmm4,%xmm4 + pxor %xmm5,%xmm5 + + movdqa %xmm8,%xmm0 + pshufd $0,%xmm1,%xmm1 + movq $64,%rax + +L$select_loop_sse_w7: + movdqa %xmm8,%xmm15 + paddd %xmm0,%xmm8 + movdqa 0(%rsi),%xmm9 + movdqa 16(%rsi),%xmm10 + pcmpeqd %xmm1,%xmm15 + movdqa 32(%rsi),%xmm11 + movdqa 48(%rsi),%xmm12 + leaq 64(%rsi),%rsi + + pand %xmm15,%xmm9 + pand %xmm15,%xmm10 + por %xmm9,%xmm2 + pand %xmm15,%xmm11 + por %xmm10,%xmm3 + pand %xmm15,%xmm12 + por %xmm11,%xmm4 + prefetcht0 255(%rsi) + por %xmm12,%xmm5 + + decq %rax + jnz L$select_loop_sse_w7 + + movdqu %xmm2,0(%rdi) + movdqu %xmm3,16(%rdi) + movdqu %xmm4,32(%rdi) + movdqu %xmm5,48(%rdi) + .byte 0xf3,0xc3 + +.globl _ecp_nistz256_avx2_select_w7 + +.p2align 5 +_ecp_nistz256_avx2_select_w7: +.byte 0x0f,0x0b + .byte 0xf3,0xc3 + + +.p2align 5 +__ecp_nistz256_add_toq: + addq 0(%rbx),%r12 + adcq 8(%rbx),%r13 + movq %r12,%rax + adcq 16(%rbx),%r8 + adcq 24(%rbx),%r9 + movq %r13,%rbp + sbbq %r11,%r11 + + subq $-1,%r12 + movq %r8,%rcx + sbbq %r14,%r13 + sbbq $0,%r8 + movq %r9,%r10 + sbbq %r15,%r9 + testq %r11,%r11 + + cmovzq %rax,%r12 + cmovzq %rbp,%r13 + movq %r12,0(%rdi) + cmovzq %rcx,%r8 + movq %r13,8(%rdi) + cmovzq %r10,%r9 + movq %r8,16(%rdi) + movq %r9,24(%rdi) + + .byte 0xf3,0xc3 + + + +.p2align 5 +__ecp_nistz256_sub_fromq: + subq 0(%rbx),%r12 + sbbq 8(%rbx),%r13 + movq %r12,%rax + sbbq 16(%rbx),%r8 + sbbq 24(%rbx),%r9 + movq %r13,%rbp + sbbq %r11,%r11 + + addq $-1,%r12 + movq %r8,%rcx + adcq %r14,%r13 + adcq $0,%r8 + movq %r9,%r10 + adcq %r15,%r9 + testq %r11,%r11 + + cmovzq %rax,%r12 + cmovzq %rbp,%r13 + movq %r12,0(%rdi) + cmovzq %rcx,%r8 + movq %r13,8(%rdi) + cmovzq %r10,%r9 + movq %r8,16(%rdi) + movq %r9,24(%rdi) + + .byte 0xf3,0xc3 + + + +.p2align 5 +__ecp_nistz256_subq: + subq %r12,%rax + sbbq %r13,%rbp + movq %rax,%r12 + sbbq %r8,%rcx + sbbq %r9,%r10 + movq %rbp,%r13 + sbbq %r11,%r11 + + addq $-1,%rax + movq %rcx,%r8 + adcq %r14,%rbp + adcq $0,%rcx + movq %r10,%r9 + adcq %r15,%r10 + testq %r11,%r11 + + cmovnzq %rax,%r12 + cmovnzq %rbp,%r13 + cmovnzq %rcx,%r8 + cmovnzq %r10,%r9 + + .byte 0xf3,0xc3 + + + +.p2align 5 +__ecp_nistz256_mul_by_2q: + addq %r12,%r12 + adcq %r13,%r13 + movq %r12,%rax + adcq %r8,%r8 + adcq %r9,%r9 + movq %r13,%rbp + sbbq %r11,%r11 + + subq $-1,%r12 + movq %r8,%rcx + sbbq %r14,%r13 + sbbq $0,%r8 + movq %r9,%r10 + sbbq %r15,%r9 + testq %r11,%r11 + + cmovzq %rax,%r12 + cmovzq %rbp,%r13 + movq %r12,0(%rdi) + cmovzq %rcx,%r8 + movq %r13,8(%rdi) + cmovzq %r10,%r9 + movq %r8,16(%rdi) + movq %r9,24(%rdi) + + .byte 0xf3,0xc3 + +.globl _ecp_nistz256_point_double + +.p2align 5 +_ecp_nistz256_point_double: + pushq %rbp + pushq %rbx + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + subq $160+8,%rsp + + movdqu 0(%rsi),%xmm0 + movq %rsi,%rbx + movdqu 16(%rsi),%xmm1 + movq 32+0(%rsi),%r12 + movq 32+8(%rsi),%r13 + movq 32+16(%rsi),%r8 + movq 32+24(%rsi),%r9 + movq L$poly+8(%rip),%r14 + movq L$poly+24(%rip),%r15 + movdqa %xmm0,96(%rsp) + movdqa %xmm1,96+16(%rsp) + leaq 32(%rdi),%r10 + leaq 64(%rdi),%r11 +.byte 102,72,15,110,199 +.byte 102,73,15,110,202 +.byte 102,73,15,110,211 + + leaq 0(%rsp),%rdi + call __ecp_nistz256_mul_by_2q + + movq 64+0(%rsi),%rax + movq 64+8(%rsi),%r14 + movq 64+16(%rsi),%r15 + movq 64+24(%rsi),%r8 + leaq 64-0(%rsi),%rsi + leaq 64(%rsp),%rdi + call __ecp_nistz256_sqr_montq + + movq 0+0(%rsp),%rax + movq 8+0(%rsp),%r14 + leaq 0+0(%rsp),%rsi + movq 16+0(%rsp),%r15 + movq 24+0(%rsp),%r8 + leaq 0(%rsp),%rdi + call __ecp_nistz256_sqr_montq + + movq 32(%rbx),%rax + movq 64+0(%rbx),%r9 + movq 64+8(%rbx),%r10 + movq 64+16(%rbx),%r11 + movq 64+24(%rbx),%r12 + leaq 64-0(%rbx),%rsi + leaq 32(%rbx),%rbx +.byte 102,72,15,126,215 + call __ecp_nistz256_mul_montq + call __ecp_nistz256_mul_by_2q + + movq 96+0(%rsp),%r12 + movq 96+8(%rsp),%r13 + leaq 64(%rsp),%rbx + movq 96+16(%rsp),%r8 + movq 96+24(%rsp),%r9 + leaq 32(%rsp),%rdi + call __ecp_nistz256_add_toq + + movq 96+0(%rsp),%r12 + movq 96+8(%rsp),%r13 + leaq 64(%rsp),%rbx + movq 96+16(%rsp),%r8 + movq 96+24(%rsp),%r9 + leaq 64(%rsp),%rdi + call __ecp_nistz256_sub_fromq + + movq 0+0(%rsp),%rax + movq 8+0(%rsp),%r14 + leaq 0+0(%rsp),%rsi + movq 16+0(%rsp),%r15 + movq 24+0(%rsp),%r8 +.byte 102,72,15,126,207 + call __ecp_nistz256_sqr_montq + xorq %r9,%r9 + movq %r12,%rax + addq $-1,%r12 + movq %r13,%r10 + adcq %rsi,%r13 + movq %r14,%rcx + adcq $0,%r14 + movq %r15,%r8 + adcq %rbp,%r15 + adcq $0,%r9 + xorq %rsi,%rsi + testq $1,%rax + + cmovzq %rax,%r12 + cmovzq %r10,%r13 + cmovzq %rcx,%r14 + cmovzq %r8,%r15 + cmovzq %rsi,%r9 + + movq %r13,%rax + shrq $1,%r12 + shlq $63,%rax + movq %r14,%r10 + shrq $1,%r13 + orq %rax,%r12 + shlq $63,%r10 + movq %r15,%rcx + shrq $1,%r14 + orq %r10,%r13 + shlq $63,%rcx + movq %r12,0(%rdi) + shrq $1,%r15 + movq %r13,8(%rdi) + shlq $63,%r9 + orq %rcx,%r14 + orq %r9,%r15 + movq %r14,16(%rdi) + movq %r15,24(%rdi) + movq 64(%rsp),%rax + leaq 64(%rsp),%rbx + movq 0+32(%rsp),%r9 + movq 8+32(%rsp),%r10 + leaq 0+32(%rsp),%rsi + movq 16+32(%rsp),%r11 + movq 24+32(%rsp),%r12 + leaq 32(%rsp),%rdi + call __ecp_nistz256_mul_montq + + leaq 128(%rsp),%rdi + call __ecp_nistz256_mul_by_2q + + leaq 32(%rsp),%rbx + leaq 32(%rsp),%rdi + call __ecp_nistz256_add_toq + + movq 96(%rsp),%rax + leaq 96(%rsp),%rbx + movq 0+0(%rsp),%r9 + movq 8+0(%rsp),%r10 + leaq 0+0(%rsp),%rsi + movq 16+0(%rsp),%r11 + movq 24+0(%rsp),%r12 + leaq 0(%rsp),%rdi + call __ecp_nistz256_mul_montq + + leaq 128(%rsp),%rdi + call __ecp_nistz256_mul_by_2q + + movq 0+32(%rsp),%rax + movq 8+32(%rsp),%r14 + leaq 0+32(%rsp),%rsi + movq 16+32(%rsp),%r15 + movq 24+32(%rsp),%r8 +.byte 102,72,15,126,199 + call __ecp_nistz256_sqr_montq + + leaq 128(%rsp),%rbx + movq %r14,%r8 + movq %r15,%r9 + movq %rsi,%r14 + movq %rbp,%r15 + call __ecp_nistz256_sub_fromq + + movq 0+0(%rsp),%rax + movq 0+8(%rsp),%rbp + movq 0+16(%rsp),%rcx + movq 0+24(%rsp),%r10 + leaq 0(%rsp),%rdi + call __ecp_nistz256_subq + + movq 32(%rsp),%rax + leaq 32(%rsp),%rbx + movq %r12,%r14 + xorl %ecx,%ecx + movq %r12,0+0(%rsp) + movq %r13,%r10 + movq %r13,0+8(%rsp) + cmovzq %r8,%r11 + movq %r8,0+16(%rsp) + leaq 0-0(%rsp),%rsi + cmovzq %r9,%r12 + movq %r9,0+24(%rsp) + movq %r14,%r9 + leaq 0(%rsp),%rdi + call __ecp_nistz256_mul_montq + +.byte 102,72,15,126,203 +.byte 102,72,15,126,207 + call __ecp_nistz256_sub_fromq + + addq $160+8,%rsp + popq %r15 + popq %r14 + popq %r13 + popq %r12 + popq %rbx + popq %rbp + .byte 0xf3,0xc3 + +.globl _ecp_nistz256_point_add + +.p2align 5 +_ecp_nistz256_point_add: + pushq %rbp + pushq %rbx + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + subq $576+8,%rsp + + movdqu 0(%rsi),%xmm0 + movdqu 16(%rsi),%xmm1 + movdqu 32(%rsi),%xmm2 + movdqu 48(%rsi),%xmm3 + movdqu 64(%rsi),%xmm4 + movdqu 80(%rsi),%xmm5 + movq %rsi,%rbx + movq %rdx,%rsi + movdqa %xmm0,384(%rsp) + movdqa %xmm1,384+16(%rsp) + por %xmm0,%xmm1 + movdqa %xmm2,416(%rsp) + movdqa %xmm3,416+16(%rsp) + por %xmm2,%xmm3 + movdqa %xmm4,448(%rsp) + movdqa %xmm5,448+16(%rsp) + por %xmm1,%xmm3 + + movdqu 0(%rsi),%xmm0 + pshufd $177,%xmm3,%xmm5 + movdqu 16(%rsi),%xmm1 + movdqu 32(%rsi),%xmm2 + por %xmm3,%xmm5 + movdqu 48(%rsi),%xmm3 + movq 64+0(%rsi),%rax + movq 64+8(%rsi),%r14 + movq 64+16(%rsi),%r15 + movq 64+24(%rsi),%r8 + movdqa %xmm0,480(%rsp) + pshufd $30,%xmm5,%xmm4 + movdqa %xmm1,480+16(%rsp) + por %xmm0,%xmm1 +.byte 102,72,15,110,199 + movdqa %xmm2,512(%rsp) + movdqa %xmm3,512+16(%rsp) + por %xmm2,%xmm3 + por %xmm4,%xmm5 + pxor %xmm4,%xmm4 + por %xmm1,%xmm3 + + leaq 64-0(%rsi),%rsi + movq %rax,544+0(%rsp) + movq %r14,544+8(%rsp) + movq %r15,544+16(%rsp) + movq %r8,544+24(%rsp) + leaq 96(%rsp),%rdi + call __ecp_nistz256_sqr_montq + + pcmpeqd %xmm4,%xmm5 + pshufd $177,%xmm3,%xmm4 + por %xmm3,%xmm4 + pshufd $0,%xmm5,%xmm5 + pshufd $30,%xmm4,%xmm3 + por %xmm3,%xmm4 + pxor %xmm3,%xmm3 + pcmpeqd %xmm3,%xmm4 + pshufd $0,%xmm4,%xmm4 + movq 64+0(%rbx),%rax + movq 64+8(%rbx),%r14 + movq 64+16(%rbx),%r15 + movq 64+24(%rbx),%r8 + + leaq 64-0(%rbx),%rsi + leaq 32(%rsp),%rdi + call __ecp_nistz256_sqr_montq + + movq 544(%rsp),%rax + leaq 544(%rsp),%rbx + movq 0+96(%rsp),%r9 + movq 8+96(%rsp),%r10 + leaq 0+96(%rsp),%rsi + movq 16+96(%rsp),%r11 + movq 24+96(%rsp),%r12 + leaq 224(%rsp),%rdi + call __ecp_nistz256_mul_montq + + movq 448(%rsp),%rax + leaq 448(%rsp),%rbx + movq 0+32(%rsp),%r9 + movq 8+32(%rsp),%r10 + leaq 0+32(%rsp),%rsi + movq 16+32(%rsp),%r11 + movq 24+32(%rsp),%r12 + leaq 256(%rsp),%rdi + call __ecp_nistz256_mul_montq + + movq 416(%rsp),%rax + leaq 416(%rsp),%rbx + movq 0+224(%rsp),%r9 + movq 8+224(%rsp),%r10 + leaq 0+224(%rsp),%rsi + movq 16+224(%rsp),%r11 + movq 24+224(%rsp),%r12 + leaq 224(%rsp),%rdi + call __ecp_nistz256_mul_montq + + movq 512(%rsp),%rax + leaq 512(%rsp),%rbx + movq 0+256(%rsp),%r9 + movq 8+256(%rsp),%r10 + leaq 0+256(%rsp),%rsi + movq 16+256(%rsp),%r11 + movq 24+256(%rsp),%r12 + leaq 256(%rsp),%rdi + call __ecp_nistz256_mul_montq + + leaq 224(%rsp),%rbx + leaq 64(%rsp),%rdi + call __ecp_nistz256_sub_fromq + + orq %r13,%r12 + movdqa %xmm4,%xmm2 + orq %r8,%r12 + orq %r9,%r12 + por %xmm5,%xmm2 +.byte 102,73,15,110,220 + + movq 384(%rsp),%rax + leaq 384(%rsp),%rbx + movq 0+96(%rsp),%r9 + movq 8+96(%rsp),%r10 + leaq 0+96(%rsp),%rsi + movq 16+96(%rsp),%r11 + movq 24+96(%rsp),%r12 + leaq 160(%rsp),%rdi + call __ecp_nistz256_mul_montq + + movq 480(%rsp),%rax + leaq 480(%rsp),%rbx + movq 0+32(%rsp),%r9 + movq 8+32(%rsp),%r10 + leaq 0+32(%rsp),%rsi + movq 16+32(%rsp),%r11 + movq 24+32(%rsp),%r12 + leaq 192(%rsp),%rdi + call __ecp_nistz256_mul_montq + + leaq 160(%rsp),%rbx + leaq 0(%rsp),%rdi + call __ecp_nistz256_sub_fromq + + orq %r13,%r12 + orq %r8,%r12 + orq %r9,%r12 + +.byte 0x3e + jnz L$add_proceedq +.byte 102,73,15,126,208 +.byte 102,73,15,126,217 + testq %r8,%r8 + jnz L$add_proceedq + testq %r9,%r9 + jz L$add_proceedq + +.byte 102,72,15,126,199 + pxor %xmm0,%xmm0 + movdqu %xmm0,0(%rdi) + movdqu %xmm0,16(%rdi) + movdqu %xmm0,32(%rdi) + movdqu %xmm0,48(%rdi) + movdqu %xmm0,64(%rdi) + movdqu %xmm0,80(%rdi) + jmp L$add_doneq + +.p2align 5 +L$add_proceedq: + movq 0+64(%rsp),%rax + movq 8+64(%rsp),%r14 + leaq 0+64(%rsp),%rsi + movq 16+64(%rsp),%r15 + movq 24+64(%rsp),%r8 + leaq 96(%rsp),%rdi + call __ecp_nistz256_sqr_montq + + movq 448(%rsp),%rax + leaq 448(%rsp),%rbx + movq 0+0(%rsp),%r9 + movq 8+0(%rsp),%r10 + leaq 0+0(%rsp),%rsi + movq 16+0(%rsp),%r11 + movq 24+0(%rsp),%r12 + leaq 352(%rsp),%rdi + call __ecp_nistz256_mul_montq + + movq 0+0(%rsp),%rax + movq 8+0(%rsp),%r14 + leaq 0+0(%rsp),%rsi + movq 16+0(%rsp),%r15 + movq 24+0(%rsp),%r8 + leaq 32(%rsp),%rdi + call __ecp_nistz256_sqr_montq + + movq 544(%rsp),%rax + leaq 544(%rsp),%rbx + movq 0+352(%rsp),%r9 + movq 8+352(%rsp),%r10 + leaq 0+352(%rsp),%rsi + movq 16+352(%rsp),%r11 + movq 24+352(%rsp),%r12 + leaq 352(%rsp),%rdi + call __ecp_nistz256_mul_montq + + movq 0(%rsp),%rax + leaq 0(%rsp),%rbx + movq 0+32(%rsp),%r9 + movq 8+32(%rsp),%r10 + leaq 0+32(%rsp),%rsi + movq 16+32(%rsp),%r11 + movq 24+32(%rsp),%r12 + leaq 128(%rsp),%rdi + call __ecp_nistz256_mul_montq + + movq 160(%rsp),%rax + leaq 160(%rsp),%rbx + movq 0+32(%rsp),%r9 + movq 8+32(%rsp),%r10 + leaq 0+32(%rsp),%rsi + movq 16+32(%rsp),%r11 + movq 24+32(%rsp),%r12 + leaq 192(%rsp),%rdi + call __ecp_nistz256_mul_montq + + + + + addq %r12,%r12 + leaq 96(%rsp),%rsi + adcq %r13,%r13 + movq %r12,%rax + adcq %r8,%r8 + adcq %r9,%r9 + movq %r13,%rbp + sbbq %r11,%r11 + + subq $-1,%r12 + movq %r8,%rcx + sbbq %r14,%r13 + sbbq $0,%r8 + movq %r9,%r10 + sbbq %r15,%r9 + testq %r11,%r11 + + cmovzq %rax,%r12 + movq 0(%rsi),%rax + cmovzq %rbp,%r13 + movq 8(%rsi),%rbp + cmovzq %rcx,%r8 + movq 16(%rsi),%rcx + cmovzq %r10,%r9 + movq 24(%rsi),%r10 + + call __ecp_nistz256_subq + + leaq 128(%rsp),%rbx + leaq 288(%rsp),%rdi + call __ecp_nistz256_sub_fromq + + movq 192+0(%rsp),%rax + movq 192+8(%rsp),%rbp + movq 192+16(%rsp),%rcx + movq 192+24(%rsp),%r10 + leaq 320(%rsp),%rdi + + call __ecp_nistz256_subq + + movq %r12,0(%rdi) + movq %r13,8(%rdi) + movq %r8,16(%rdi) + movq %r9,24(%rdi) + movq 128(%rsp),%rax + leaq 128(%rsp),%rbx + movq 0+224(%rsp),%r9 + movq 8+224(%rsp),%r10 + leaq 0+224(%rsp),%rsi + movq 16+224(%rsp),%r11 + movq 24+224(%rsp),%r12 + leaq 256(%rsp),%rdi + call __ecp_nistz256_mul_montq + + movq 320(%rsp),%rax + leaq 320(%rsp),%rbx + movq 0+64(%rsp),%r9 + movq 8+64(%rsp),%r10 + leaq 0+64(%rsp),%rsi + movq 16+64(%rsp),%r11 + movq 24+64(%rsp),%r12 + leaq 320(%rsp),%rdi + call __ecp_nistz256_mul_montq + + leaq 256(%rsp),%rbx + leaq 320(%rsp),%rdi + call __ecp_nistz256_sub_fromq + +.byte 102,72,15,126,199 + + movdqa %xmm5,%xmm0 + movdqa %xmm5,%xmm1 + pandn 352(%rsp),%xmm0 + movdqa %xmm5,%xmm2 + pandn 352+16(%rsp),%xmm1 + movdqa %xmm5,%xmm3 + pand 544(%rsp),%xmm2 + pand 544+16(%rsp),%xmm3 + por %xmm0,%xmm2 + por %xmm1,%xmm3 + + movdqa %xmm4,%xmm0 + movdqa %xmm4,%xmm1 + pandn %xmm2,%xmm0 + movdqa %xmm4,%xmm2 + pandn %xmm3,%xmm1 + movdqa %xmm4,%xmm3 + pand 448(%rsp),%xmm2 + pand 448+16(%rsp),%xmm3 + por %xmm0,%xmm2 + por %xmm1,%xmm3 + movdqu %xmm2,64(%rdi) + movdqu %xmm3,80(%rdi) + + movdqa %xmm5,%xmm0 + movdqa %xmm5,%xmm1 + pandn 288(%rsp),%xmm0 + movdqa %xmm5,%xmm2 + pandn 288+16(%rsp),%xmm1 + movdqa %xmm5,%xmm3 + pand 480(%rsp),%xmm2 + pand 480+16(%rsp),%xmm3 + por %xmm0,%xmm2 + por %xmm1,%xmm3 + + movdqa %xmm4,%xmm0 + movdqa %xmm4,%xmm1 + pandn %xmm2,%xmm0 + movdqa %xmm4,%xmm2 + pandn %xmm3,%xmm1 + movdqa %xmm4,%xmm3 + pand 384(%rsp),%xmm2 + pand 384+16(%rsp),%xmm3 + por %xmm0,%xmm2 + por %xmm1,%xmm3 + movdqu %xmm2,0(%rdi) + movdqu %xmm3,16(%rdi) + + movdqa %xmm5,%xmm0 + movdqa %xmm5,%xmm1 + pandn 320(%rsp),%xmm0 + movdqa %xmm5,%xmm2 + pandn 320+16(%rsp),%xmm1 + movdqa %xmm5,%xmm3 + pand 512(%rsp),%xmm2 + pand 512+16(%rsp),%xmm3 + por %xmm0,%xmm2 + por %xmm1,%xmm3 + + movdqa %xmm4,%xmm0 + movdqa %xmm4,%xmm1 + pandn %xmm2,%xmm0 + movdqa %xmm4,%xmm2 + pandn %xmm3,%xmm1 + movdqa %xmm4,%xmm3 + pand 416(%rsp),%xmm2 + pand 416+16(%rsp),%xmm3 + por %xmm0,%xmm2 + por %xmm1,%xmm3 + movdqu %xmm2,32(%rdi) + movdqu %xmm3,48(%rdi) + +L$add_doneq: + addq $576+8,%rsp + popq %r15 + popq %r14 + popq %r13 + popq %r12 + popq %rbx + popq %rbp + .byte 0xf3,0xc3 + +.globl _ecp_nistz256_point_add_affine + +.p2align 5 +_ecp_nistz256_point_add_affine: + pushq %rbp + pushq %rbx + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + subq $480+8,%rsp + + movdqu 0(%rsi),%xmm0 + movq %rdx,%rbx + movdqu 16(%rsi),%xmm1 + movdqu 32(%rsi),%xmm2 + movdqu 48(%rsi),%xmm3 + movdqu 64(%rsi),%xmm4 + movdqu 80(%rsi),%xmm5 + movq 64+0(%rsi),%rax + movq 64+8(%rsi),%r14 + movq 64+16(%rsi),%r15 + movq 64+24(%rsi),%r8 + movdqa %xmm0,320(%rsp) + movdqa %xmm1,320+16(%rsp) + por %xmm0,%xmm1 + movdqa %xmm2,352(%rsp) + movdqa %xmm3,352+16(%rsp) + por %xmm2,%xmm3 + movdqa %xmm4,384(%rsp) + movdqa %xmm5,384+16(%rsp) + por %xmm1,%xmm3 + + movdqu 0(%rbx),%xmm0 + pshufd $177,%xmm3,%xmm5 + movdqu 16(%rbx),%xmm1 + movdqu 32(%rbx),%xmm2 + por %xmm3,%xmm5 + movdqu 48(%rbx),%xmm3 + movdqa %xmm0,416(%rsp) + pshufd $30,%xmm5,%xmm4 + movdqa %xmm1,416+16(%rsp) + por %xmm0,%xmm1 +.byte 102,72,15,110,199 + movdqa %xmm2,448(%rsp) + movdqa %xmm3,448+16(%rsp) + por %xmm2,%xmm3 + por %xmm4,%xmm5 + pxor %xmm4,%xmm4 + por %xmm1,%xmm3 + + leaq 64-0(%rsi),%rsi + leaq 32(%rsp),%rdi + call __ecp_nistz256_sqr_montq + + pcmpeqd %xmm4,%xmm5 + pshufd $177,%xmm3,%xmm4 + movq 0(%rbx),%rax + + movq %r12,%r9 + por %xmm3,%xmm4 + pshufd $0,%xmm5,%xmm5 + pshufd $30,%xmm4,%xmm3 + movq %r13,%r10 + por %xmm3,%xmm4 + pxor %xmm3,%xmm3 + movq %r14,%r11 + pcmpeqd %xmm3,%xmm4 + pshufd $0,%xmm4,%xmm4 + + leaq 32-0(%rsp),%rsi + movq %r15,%r12 + leaq 0(%rsp),%rdi + call __ecp_nistz256_mul_montq + + leaq 320(%rsp),%rbx + leaq 64(%rsp),%rdi + call __ecp_nistz256_sub_fromq + + movq 384(%rsp),%rax + leaq 384(%rsp),%rbx + movq 0+32(%rsp),%r9 + movq 8+32(%rsp),%r10 + leaq 0+32(%rsp),%rsi + movq 16+32(%rsp),%r11 + movq 24+32(%rsp),%r12 + leaq 32(%rsp),%rdi + call __ecp_nistz256_mul_montq + + movq 384(%rsp),%rax + leaq 384(%rsp),%rbx + movq 0+64(%rsp),%r9 + movq 8+64(%rsp),%r10 + leaq 0+64(%rsp),%rsi + movq 16+64(%rsp),%r11 + movq 24+64(%rsp),%r12 + leaq 288(%rsp),%rdi + call __ecp_nistz256_mul_montq + + movq 448(%rsp),%rax + leaq 448(%rsp),%rbx + movq 0+32(%rsp),%r9 + movq 8+32(%rsp),%r10 + leaq 0+32(%rsp),%rsi + movq 16+32(%rsp),%r11 + movq 24+32(%rsp),%r12 + leaq 32(%rsp),%rdi + call __ecp_nistz256_mul_montq + + leaq 352(%rsp),%rbx + leaq 96(%rsp),%rdi + call __ecp_nistz256_sub_fromq + + movq 0+64(%rsp),%rax + movq 8+64(%rsp),%r14 + leaq 0+64(%rsp),%rsi + movq 16+64(%rsp),%r15 + movq 24+64(%rsp),%r8 + leaq 128(%rsp),%rdi + call __ecp_nistz256_sqr_montq + + movq 0+96(%rsp),%rax + movq 8+96(%rsp),%r14 + leaq 0+96(%rsp),%rsi + movq 16+96(%rsp),%r15 + movq 24+96(%rsp),%r8 + leaq 192(%rsp),%rdi + call __ecp_nistz256_sqr_montq + + movq 128(%rsp),%rax + leaq 128(%rsp),%rbx + movq 0+64(%rsp),%r9 + movq 8+64(%rsp),%r10 + leaq 0+64(%rsp),%rsi + movq 16+64(%rsp),%r11 + movq 24+64(%rsp),%r12 + leaq 160(%rsp),%rdi + call __ecp_nistz256_mul_montq + + movq 320(%rsp),%rax + leaq 320(%rsp),%rbx + movq 0+128(%rsp),%r9 + movq 8+128(%rsp),%r10 + leaq 0+128(%rsp),%rsi + movq 16+128(%rsp),%r11 + movq 24+128(%rsp),%r12 + leaq 0(%rsp),%rdi + call __ecp_nistz256_mul_montq + + + + + addq %r12,%r12 + leaq 192(%rsp),%rsi + adcq %r13,%r13 + movq %r12,%rax + adcq %r8,%r8 + adcq %r9,%r9 + movq %r13,%rbp + sbbq %r11,%r11 + + subq $-1,%r12 + movq %r8,%rcx + sbbq %r14,%r13 + sbbq $0,%r8 + movq %r9,%r10 + sbbq %r15,%r9 + testq %r11,%r11 + + cmovzq %rax,%r12 + movq 0(%rsi),%rax + cmovzq %rbp,%r13 + movq 8(%rsi),%rbp + cmovzq %rcx,%r8 + movq 16(%rsi),%rcx + cmovzq %r10,%r9 + movq 24(%rsi),%r10 + + call __ecp_nistz256_subq + + leaq 160(%rsp),%rbx + leaq 224(%rsp),%rdi + call __ecp_nistz256_sub_fromq + + movq 0+0(%rsp),%rax + movq 0+8(%rsp),%rbp + movq 0+16(%rsp),%rcx + movq 0+24(%rsp),%r10 + leaq 64(%rsp),%rdi + + call __ecp_nistz256_subq + + movq %r12,0(%rdi) + movq %r13,8(%rdi) + movq %r8,16(%rdi) + movq %r9,24(%rdi) + movq 352(%rsp),%rax + leaq 352(%rsp),%rbx + movq 0+160(%rsp),%r9 + movq 8+160(%rsp),%r10 + leaq 0+160(%rsp),%rsi + movq 16+160(%rsp),%r11 + movq 24+160(%rsp),%r12 + leaq 32(%rsp),%rdi + call __ecp_nistz256_mul_montq + + movq 96(%rsp),%rax + leaq 96(%rsp),%rbx + movq 0+64(%rsp),%r9 + movq 8+64(%rsp),%r10 + leaq 0+64(%rsp),%rsi + movq 16+64(%rsp),%r11 + movq 24+64(%rsp),%r12 + leaq 64(%rsp),%rdi + call __ecp_nistz256_mul_montq + + leaq 32(%rsp),%rbx + leaq 256(%rsp),%rdi + call __ecp_nistz256_sub_fromq + +.byte 102,72,15,126,199 + + movdqa %xmm5,%xmm0 + movdqa %xmm5,%xmm1 + pandn 288(%rsp),%xmm0 + movdqa %xmm5,%xmm2 + pandn 288+16(%rsp),%xmm1 + movdqa %xmm5,%xmm3 + pand L$ONE_mont(%rip),%xmm2 + pand L$ONE_mont+16(%rip),%xmm3 + por %xmm0,%xmm2 + por %xmm1,%xmm3 + + movdqa %xmm4,%xmm0 + movdqa %xmm4,%xmm1 + pandn %xmm2,%xmm0 + movdqa %xmm4,%xmm2 + pandn %xmm3,%xmm1 + movdqa %xmm4,%xmm3 + pand 384(%rsp),%xmm2 + pand 384+16(%rsp),%xmm3 + por %xmm0,%xmm2 + por %xmm1,%xmm3 + movdqu %xmm2,64(%rdi) + movdqu %xmm3,80(%rdi) + + movdqa %xmm5,%xmm0 + movdqa %xmm5,%xmm1 + pandn 224(%rsp),%xmm0 + movdqa %xmm5,%xmm2 + pandn 224+16(%rsp),%xmm1 + movdqa %xmm5,%xmm3 + pand 416(%rsp),%xmm2 + pand 416+16(%rsp),%xmm3 + por %xmm0,%xmm2 + por %xmm1,%xmm3 + + movdqa %xmm4,%xmm0 + movdqa %xmm4,%xmm1 + pandn %xmm2,%xmm0 + movdqa %xmm4,%xmm2 + pandn %xmm3,%xmm1 + movdqa %xmm4,%xmm3 + pand 320(%rsp),%xmm2 + pand 320+16(%rsp),%xmm3 + por %xmm0,%xmm2 + por %xmm1,%xmm3 + movdqu %xmm2,0(%rdi) + movdqu %xmm3,16(%rdi) + + movdqa %xmm5,%xmm0 + movdqa %xmm5,%xmm1 + pandn 256(%rsp),%xmm0 + movdqa %xmm5,%xmm2 + pandn 256+16(%rsp),%xmm1 + movdqa %xmm5,%xmm3 + pand 448(%rsp),%xmm2 + pand 448+16(%rsp),%xmm3 + por %xmm0,%xmm2 + por %xmm1,%xmm3 + + movdqa %xmm4,%xmm0 + movdqa %xmm4,%xmm1 + pandn %xmm2,%xmm0 + movdqa %xmm4,%xmm2 + pandn %xmm3,%xmm1 + movdqa %xmm4,%xmm3 + pand 352(%rsp),%xmm2 + pand 352+16(%rsp),%xmm3 + por %xmm0,%xmm2 + por %xmm1,%xmm3 + movdqu %xmm2,32(%rdi) + movdqu %xmm3,48(%rdi) + + addq $480+8,%rsp + popq %r15 + popq %r14 + popq %r13 + popq %r12 + popq %rbx + popq %rbp + .byte 0xf3,0xc3 diff --git a/deps/openssl/asm_obsolete/x64-macosx-gas/md5/md5-x86_64.s b/deps/openssl/asm_obsolete/x64-macosx-gas/md5/md5-x86_64.s new file mode 100644 index 00000000000000..712a871d343760 --- /dev/null +++ b/deps/openssl/asm_obsolete/x64-macosx-gas/md5/md5-x86_64.s @@ -0,0 +1,667 @@ +.text +.p2align 4 + +.globl _md5_block_asm_data_order + +_md5_block_asm_data_order: + pushq %rbp + pushq %rbx + pushq %r12 + pushq %r14 + pushq %r15 +L$prologue: + + + + + movq %rdi,%rbp + shlq $6,%rdx + leaq (%rsi,%rdx,1),%rdi + movl 0(%rbp),%eax + movl 4(%rbp),%ebx + movl 8(%rbp),%ecx + movl 12(%rbp),%edx + + + + + + + + cmpq %rdi,%rsi + je L$end + + +L$loop: + movl %eax,%r8d + movl %ebx,%r9d + movl %ecx,%r14d + movl %edx,%r15d + movl 0(%rsi),%r10d + movl %edx,%r11d + xorl %ecx,%r11d + leal -680876936(%rax,%r10,1),%eax + andl %ebx,%r11d + xorl %edx,%r11d + movl 4(%rsi),%r10d + addl %r11d,%eax + roll $7,%eax + movl %ecx,%r11d + addl %ebx,%eax + xorl %ebx,%r11d + leal -389564586(%rdx,%r10,1),%edx + andl %eax,%r11d + xorl %ecx,%r11d + movl 8(%rsi),%r10d + addl %r11d,%edx + roll $12,%edx + movl %ebx,%r11d + addl %eax,%edx + xorl %eax,%r11d + leal 606105819(%rcx,%r10,1),%ecx + andl %edx,%r11d + xorl %ebx,%r11d + movl 12(%rsi),%r10d + addl %r11d,%ecx + roll $17,%ecx + movl %eax,%r11d + addl %edx,%ecx + xorl %edx,%r11d + leal -1044525330(%rbx,%r10,1),%ebx + andl %ecx,%r11d + xorl %eax,%r11d + movl 16(%rsi),%r10d + addl %r11d,%ebx + roll $22,%ebx + movl %edx,%r11d + addl %ecx,%ebx + xorl %ecx,%r11d + leal -176418897(%rax,%r10,1),%eax + andl %ebx,%r11d + xorl %edx,%r11d + movl 20(%rsi),%r10d + addl %r11d,%eax + roll $7,%eax + movl %ecx,%r11d + addl %ebx,%eax + xorl %ebx,%r11d + leal 1200080426(%rdx,%r10,1),%edx + andl %eax,%r11d + xorl %ecx,%r11d + movl 24(%rsi),%r10d + addl %r11d,%edx + roll $12,%edx + movl %ebx,%r11d + addl %eax,%edx + xorl %eax,%r11d + leal -1473231341(%rcx,%r10,1),%ecx + andl %edx,%r11d + xorl %ebx,%r11d + movl 28(%rsi),%r10d + addl %r11d,%ecx + roll $17,%ecx + movl %eax,%r11d + addl %edx,%ecx + xorl %edx,%r11d + leal -45705983(%rbx,%r10,1),%ebx + andl %ecx,%r11d + xorl %eax,%r11d + movl 32(%rsi),%r10d + addl %r11d,%ebx + roll $22,%ebx + movl %edx,%r11d + addl %ecx,%ebx + xorl %ecx,%r11d + leal 1770035416(%rax,%r10,1),%eax + andl %ebx,%r11d + xorl %edx,%r11d + movl 36(%rsi),%r10d + addl %r11d,%eax + roll $7,%eax + movl %ecx,%r11d + addl %ebx,%eax + xorl %ebx,%r11d + leal -1958414417(%rdx,%r10,1),%edx + andl %eax,%r11d + xorl %ecx,%r11d + movl 40(%rsi),%r10d + addl %r11d,%edx + roll $12,%edx + movl %ebx,%r11d + addl %eax,%edx + xorl %eax,%r11d + leal -42063(%rcx,%r10,1),%ecx + andl %edx,%r11d + xorl %ebx,%r11d + movl 44(%rsi),%r10d + addl %r11d,%ecx + roll $17,%ecx + movl %eax,%r11d + addl %edx,%ecx + xorl %edx,%r11d + leal -1990404162(%rbx,%r10,1),%ebx + andl %ecx,%r11d + xorl %eax,%r11d + movl 48(%rsi),%r10d + addl %r11d,%ebx + roll $22,%ebx + movl %edx,%r11d + addl %ecx,%ebx + xorl %ecx,%r11d + leal 1804603682(%rax,%r10,1),%eax + andl %ebx,%r11d + xorl %edx,%r11d + movl 52(%rsi),%r10d + addl %r11d,%eax + roll $7,%eax + movl %ecx,%r11d + addl %ebx,%eax + xorl %ebx,%r11d + leal -40341101(%rdx,%r10,1),%edx + andl %eax,%r11d + xorl %ecx,%r11d + movl 56(%rsi),%r10d + addl %r11d,%edx + roll $12,%edx + movl %ebx,%r11d + addl %eax,%edx + xorl %eax,%r11d + leal -1502002290(%rcx,%r10,1),%ecx + andl %edx,%r11d + xorl %ebx,%r11d + movl 60(%rsi),%r10d + addl %r11d,%ecx + roll $17,%ecx + movl %eax,%r11d + addl %edx,%ecx + xorl %edx,%r11d + leal 1236535329(%rbx,%r10,1),%ebx + andl %ecx,%r11d + xorl %eax,%r11d + movl 0(%rsi),%r10d + addl %r11d,%ebx + roll $22,%ebx + movl %edx,%r11d + addl %ecx,%ebx + movl 4(%rsi),%r10d + movl %edx,%r11d + movl %edx,%r12d + notl %r11d + leal -165796510(%rax,%r10,1),%eax + andl %ebx,%r12d + andl %ecx,%r11d + movl 24(%rsi),%r10d + orl %r11d,%r12d + movl %ecx,%r11d + addl %r12d,%eax + movl %ecx,%r12d + roll $5,%eax + addl %ebx,%eax + notl %r11d + leal -1069501632(%rdx,%r10,1),%edx + andl %eax,%r12d + andl %ebx,%r11d + movl 44(%rsi),%r10d + orl %r11d,%r12d + movl %ebx,%r11d + addl %r12d,%edx + movl %ebx,%r12d + roll $9,%edx + addl %eax,%edx + notl %r11d + leal 643717713(%rcx,%r10,1),%ecx + andl %edx,%r12d + andl %eax,%r11d + movl 0(%rsi),%r10d + orl %r11d,%r12d + movl %eax,%r11d + addl %r12d,%ecx + movl %eax,%r12d + roll $14,%ecx + addl %edx,%ecx + notl %r11d + leal -373897302(%rbx,%r10,1),%ebx + andl %ecx,%r12d + andl %edx,%r11d + movl 20(%rsi),%r10d + orl %r11d,%r12d + movl %edx,%r11d + addl %r12d,%ebx + movl %edx,%r12d + roll $20,%ebx + addl %ecx,%ebx + notl %r11d + leal -701558691(%rax,%r10,1),%eax + andl %ebx,%r12d + andl %ecx,%r11d + movl 40(%rsi),%r10d + orl %r11d,%r12d + movl %ecx,%r11d + addl %r12d,%eax + movl %ecx,%r12d + roll $5,%eax + addl %ebx,%eax + notl %r11d + leal 38016083(%rdx,%r10,1),%edx + andl %eax,%r12d + andl %ebx,%r11d + movl 60(%rsi),%r10d + orl %r11d,%r12d + movl %ebx,%r11d + addl %r12d,%edx + movl %ebx,%r12d + roll $9,%edx + addl %eax,%edx + notl %r11d + leal -660478335(%rcx,%r10,1),%ecx + andl %edx,%r12d + andl %eax,%r11d + movl 16(%rsi),%r10d + orl %r11d,%r12d + movl %eax,%r11d + addl %r12d,%ecx + movl %eax,%r12d + roll $14,%ecx + addl %edx,%ecx + notl %r11d + leal -405537848(%rbx,%r10,1),%ebx + andl %ecx,%r12d + andl %edx,%r11d + movl 36(%rsi),%r10d + orl %r11d,%r12d + movl %edx,%r11d + addl %r12d,%ebx + movl %edx,%r12d + roll $20,%ebx + addl %ecx,%ebx + notl %r11d + leal 568446438(%rax,%r10,1),%eax + andl %ebx,%r12d + andl %ecx,%r11d + movl 56(%rsi),%r10d + orl %r11d,%r12d + movl %ecx,%r11d + addl %r12d,%eax + movl %ecx,%r12d + roll $5,%eax + addl %ebx,%eax + notl %r11d + leal -1019803690(%rdx,%r10,1),%edx + andl %eax,%r12d + andl %ebx,%r11d + movl 12(%rsi),%r10d + orl %r11d,%r12d + movl %ebx,%r11d + addl %r12d,%edx + movl %ebx,%r12d + roll $9,%edx + addl %eax,%edx + notl %r11d + leal -187363961(%rcx,%r10,1),%ecx + andl %edx,%r12d + andl %eax,%r11d + movl 32(%rsi),%r10d + orl %r11d,%r12d + movl %eax,%r11d + addl %r12d,%ecx + movl %eax,%r12d + roll $14,%ecx + addl %edx,%ecx + notl %r11d + leal 1163531501(%rbx,%r10,1),%ebx + andl %ecx,%r12d + andl %edx,%r11d + movl 52(%rsi),%r10d + orl %r11d,%r12d + movl %edx,%r11d + addl %r12d,%ebx + movl %edx,%r12d + roll $20,%ebx + addl %ecx,%ebx + notl %r11d + leal -1444681467(%rax,%r10,1),%eax + andl %ebx,%r12d + andl %ecx,%r11d + movl 8(%rsi),%r10d + orl %r11d,%r12d + movl %ecx,%r11d + addl %r12d,%eax + movl %ecx,%r12d + roll $5,%eax + addl %ebx,%eax + notl %r11d + leal -51403784(%rdx,%r10,1),%edx + andl %eax,%r12d + andl %ebx,%r11d + movl 28(%rsi),%r10d + orl %r11d,%r12d + movl %ebx,%r11d + addl %r12d,%edx + movl %ebx,%r12d + roll $9,%edx + addl %eax,%edx + notl %r11d + leal 1735328473(%rcx,%r10,1),%ecx + andl %edx,%r12d + andl %eax,%r11d + movl 48(%rsi),%r10d + orl %r11d,%r12d + movl %eax,%r11d + addl %r12d,%ecx + movl %eax,%r12d + roll $14,%ecx + addl %edx,%ecx + notl %r11d + leal -1926607734(%rbx,%r10,1),%ebx + andl %ecx,%r12d + andl %edx,%r11d + movl 0(%rsi),%r10d + orl %r11d,%r12d + movl %edx,%r11d + addl %r12d,%ebx + movl %edx,%r12d + roll $20,%ebx + addl %ecx,%ebx + movl 20(%rsi),%r10d + movl %ecx,%r11d + leal -378558(%rax,%r10,1),%eax + movl 32(%rsi),%r10d + xorl %edx,%r11d + xorl %ebx,%r11d + addl %r11d,%eax + roll $4,%eax + movl %ebx,%r11d + addl %ebx,%eax + leal -2022574463(%rdx,%r10,1),%edx + movl 44(%rsi),%r10d + xorl %ecx,%r11d + xorl %eax,%r11d + addl %r11d,%edx + roll $11,%edx + movl %eax,%r11d + addl %eax,%edx + leal 1839030562(%rcx,%r10,1),%ecx + movl 56(%rsi),%r10d + xorl %ebx,%r11d + xorl %edx,%r11d + addl %r11d,%ecx + roll $16,%ecx + movl %edx,%r11d + addl %edx,%ecx + leal -35309556(%rbx,%r10,1),%ebx + movl 4(%rsi),%r10d + xorl %eax,%r11d + xorl %ecx,%r11d + addl %r11d,%ebx + roll $23,%ebx + movl %ecx,%r11d + addl %ecx,%ebx + leal -1530992060(%rax,%r10,1),%eax + movl 16(%rsi),%r10d + xorl %edx,%r11d + xorl %ebx,%r11d + addl %r11d,%eax + roll $4,%eax + movl %ebx,%r11d + addl %ebx,%eax + leal 1272893353(%rdx,%r10,1),%edx + movl 28(%rsi),%r10d + xorl %ecx,%r11d + xorl %eax,%r11d + addl %r11d,%edx + roll $11,%edx + movl %eax,%r11d + addl %eax,%edx + leal -155497632(%rcx,%r10,1),%ecx + movl 40(%rsi),%r10d + xorl %ebx,%r11d + xorl %edx,%r11d + addl %r11d,%ecx + roll $16,%ecx + movl %edx,%r11d + addl %edx,%ecx + leal -1094730640(%rbx,%r10,1),%ebx + movl 52(%rsi),%r10d + xorl %eax,%r11d + xorl %ecx,%r11d + addl %r11d,%ebx + roll $23,%ebx + movl %ecx,%r11d + addl %ecx,%ebx + leal 681279174(%rax,%r10,1),%eax + movl 0(%rsi),%r10d + xorl %edx,%r11d + xorl %ebx,%r11d + addl %r11d,%eax + roll $4,%eax + movl %ebx,%r11d + addl %ebx,%eax + leal -358537222(%rdx,%r10,1),%edx + movl 12(%rsi),%r10d + xorl %ecx,%r11d + xorl %eax,%r11d + addl %r11d,%edx + roll $11,%edx + movl %eax,%r11d + addl %eax,%edx + leal -722521979(%rcx,%r10,1),%ecx + movl 24(%rsi),%r10d + xorl %ebx,%r11d + xorl %edx,%r11d + addl %r11d,%ecx + roll $16,%ecx + movl %edx,%r11d + addl %edx,%ecx + leal 76029189(%rbx,%r10,1),%ebx + movl 36(%rsi),%r10d + xorl %eax,%r11d + xorl %ecx,%r11d + addl %r11d,%ebx + roll $23,%ebx + movl %ecx,%r11d + addl %ecx,%ebx + leal -640364487(%rax,%r10,1),%eax + movl 48(%rsi),%r10d + xorl %edx,%r11d + xorl %ebx,%r11d + addl %r11d,%eax + roll $4,%eax + movl %ebx,%r11d + addl %ebx,%eax + leal -421815835(%rdx,%r10,1),%edx + movl 60(%rsi),%r10d + xorl %ecx,%r11d + xorl %eax,%r11d + addl %r11d,%edx + roll $11,%edx + movl %eax,%r11d + addl %eax,%edx + leal 530742520(%rcx,%r10,1),%ecx + movl 8(%rsi),%r10d + xorl %ebx,%r11d + xorl %edx,%r11d + addl %r11d,%ecx + roll $16,%ecx + movl %edx,%r11d + addl %edx,%ecx + leal -995338651(%rbx,%r10,1),%ebx + movl 0(%rsi),%r10d + xorl %eax,%r11d + xorl %ecx,%r11d + addl %r11d,%ebx + roll $23,%ebx + movl %ecx,%r11d + addl %ecx,%ebx + movl 0(%rsi),%r10d + movl $4294967295,%r11d + xorl %edx,%r11d + leal -198630844(%rax,%r10,1),%eax + orl %ebx,%r11d + xorl %ecx,%r11d + addl %r11d,%eax + movl 28(%rsi),%r10d + movl $4294967295,%r11d + roll $6,%eax + xorl %ecx,%r11d + addl %ebx,%eax + leal 1126891415(%rdx,%r10,1),%edx + orl %eax,%r11d + xorl %ebx,%r11d + addl %r11d,%edx + movl 56(%rsi),%r10d + movl $4294967295,%r11d + roll $10,%edx + xorl %ebx,%r11d + addl %eax,%edx + leal -1416354905(%rcx,%r10,1),%ecx + orl %edx,%r11d + xorl %eax,%r11d + addl %r11d,%ecx + movl 20(%rsi),%r10d + movl $4294967295,%r11d + roll $15,%ecx + xorl %eax,%r11d + addl %edx,%ecx + leal -57434055(%rbx,%r10,1),%ebx + orl %ecx,%r11d + xorl %edx,%r11d + addl %r11d,%ebx + movl 48(%rsi),%r10d + movl $4294967295,%r11d + roll $21,%ebx + xorl %edx,%r11d + addl %ecx,%ebx + leal 1700485571(%rax,%r10,1),%eax + orl %ebx,%r11d + xorl %ecx,%r11d + addl %r11d,%eax + movl 12(%rsi),%r10d + movl $4294967295,%r11d + roll $6,%eax + xorl %ecx,%r11d + addl %ebx,%eax + leal -1894986606(%rdx,%r10,1),%edx + orl %eax,%r11d + xorl %ebx,%r11d + addl %r11d,%edx + movl 40(%rsi),%r10d + movl $4294967295,%r11d + roll $10,%edx + xorl %ebx,%r11d + addl %eax,%edx + leal -1051523(%rcx,%r10,1),%ecx + orl %edx,%r11d + xorl %eax,%r11d + addl %r11d,%ecx + movl 4(%rsi),%r10d + movl $4294967295,%r11d + roll $15,%ecx + xorl %eax,%r11d + addl %edx,%ecx + leal -2054922799(%rbx,%r10,1),%ebx + orl %ecx,%r11d + xorl %edx,%r11d + addl %r11d,%ebx + movl 32(%rsi),%r10d + movl $4294967295,%r11d + roll $21,%ebx + xorl %edx,%r11d + addl %ecx,%ebx + leal 1873313359(%rax,%r10,1),%eax + orl %ebx,%r11d + xorl %ecx,%r11d + addl %r11d,%eax + movl 60(%rsi),%r10d + movl $4294967295,%r11d + roll $6,%eax + xorl %ecx,%r11d + addl %ebx,%eax + leal -30611744(%rdx,%r10,1),%edx + orl %eax,%r11d + xorl %ebx,%r11d + addl %r11d,%edx + movl 24(%rsi),%r10d + movl $4294967295,%r11d + roll $10,%edx + xorl %ebx,%r11d + addl %eax,%edx + leal -1560198380(%rcx,%r10,1),%ecx + orl %edx,%r11d + xorl %eax,%r11d + addl %r11d,%ecx + movl 52(%rsi),%r10d + movl $4294967295,%r11d + roll $15,%ecx + xorl %eax,%r11d + addl %edx,%ecx + leal 1309151649(%rbx,%r10,1),%ebx + orl %ecx,%r11d + xorl %edx,%r11d + addl %r11d,%ebx + movl 16(%rsi),%r10d + movl $4294967295,%r11d + roll $21,%ebx + xorl %edx,%r11d + addl %ecx,%ebx + leal -145523070(%rax,%r10,1),%eax + orl %ebx,%r11d + xorl %ecx,%r11d + addl %r11d,%eax + movl 44(%rsi),%r10d + movl $4294967295,%r11d + roll $6,%eax + xorl %ecx,%r11d + addl %ebx,%eax + leal -1120210379(%rdx,%r10,1),%edx + orl %eax,%r11d + xorl %ebx,%r11d + addl %r11d,%edx + movl 8(%rsi),%r10d + movl $4294967295,%r11d + roll $10,%edx + xorl %ebx,%r11d + addl %eax,%edx + leal 718787259(%rcx,%r10,1),%ecx + orl %edx,%r11d + xorl %eax,%r11d + addl %r11d,%ecx + movl 36(%rsi),%r10d + movl $4294967295,%r11d + roll $15,%ecx + xorl %eax,%r11d + addl %edx,%ecx + leal -343485551(%rbx,%r10,1),%ebx + orl %ecx,%r11d + xorl %edx,%r11d + addl %r11d,%ebx + movl 0(%rsi),%r10d + movl $4294967295,%r11d + roll $21,%ebx + xorl %edx,%r11d + addl %ecx,%ebx + + addl %r8d,%eax + addl %r9d,%ebx + addl %r14d,%ecx + addl %r15d,%edx + + + addq $64,%rsi + cmpq %rdi,%rsi + jb L$loop + + +L$end: + movl %eax,0(%rbp) + movl %ebx,4(%rbp) + movl %ecx,8(%rbp) + movl %edx,12(%rbp) + + movq (%rsp),%r15 + movq 8(%rsp),%r14 + movq 16(%rsp),%r12 + movq 24(%rsp),%rbx + movq 32(%rsp),%rbp + addq $40,%rsp +L$epilogue: + .byte 0xf3,0xc3 diff --git a/deps/openssl/asm_obsolete/x64-macosx-gas/modes/aesni-gcm-x86_64.s b/deps/openssl/asm_obsolete/x64-macosx-gas/modes/aesni-gcm-x86_64.s new file mode 100644 index 00000000000000..e2bf1bb53a19be --- /dev/null +++ b/deps/openssl/asm_obsolete/x64-macosx-gas/modes/aesni-gcm-x86_64.s @@ -0,0 +1,14 @@ +.text + +.globl _aesni_gcm_encrypt + +_aesni_gcm_encrypt: + xorl %eax,%eax + .byte 0xf3,0xc3 + + +.globl _aesni_gcm_decrypt + +_aesni_gcm_decrypt: + xorl %eax,%eax + .byte 0xf3,0xc3 diff --git a/deps/openssl/asm_obsolete/x64-macosx-gas/modes/ghash-x86_64.s b/deps/openssl/asm_obsolete/x64-macosx-gas/modes/ghash-x86_64.s new file mode 100644 index 00000000000000..f21b3013c55f51 --- /dev/null +++ b/deps/openssl/asm_obsolete/x64-macosx-gas/modes/ghash-x86_64.s @@ -0,0 +1,1318 @@ +.text + + +.globl _gcm_gmult_4bit + +.p2align 4 +_gcm_gmult_4bit: + pushq %rbx + pushq %rbp + pushq %r12 +L$gmult_prologue: + + movzbq 15(%rdi),%r8 + leaq L$rem_4bit(%rip),%r11 + xorq %rax,%rax + xorq %rbx,%rbx + movb %r8b,%al + movb %r8b,%bl + shlb $4,%al + movq $14,%rcx + movq 8(%rsi,%rax,1),%r8 + movq (%rsi,%rax,1),%r9 + andb $240,%bl + movq %r8,%rdx + jmp L$oop1 + +.p2align 4 +L$oop1: + shrq $4,%r8 + andq $15,%rdx + movq %r9,%r10 + movb (%rdi,%rcx,1),%al + shrq $4,%r9 + xorq 8(%rsi,%rbx,1),%r8 + shlq $60,%r10 + xorq (%rsi,%rbx,1),%r9 + movb %al,%bl + xorq (%r11,%rdx,8),%r9 + movq %r8,%rdx + shlb $4,%al + xorq %r10,%r8 + decq %rcx + js L$break1 + + shrq $4,%r8 + andq $15,%rdx + movq %r9,%r10 + shrq $4,%r9 + xorq 8(%rsi,%rax,1),%r8 + shlq $60,%r10 + xorq (%rsi,%rax,1),%r9 + andb $240,%bl + xorq (%r11,%rdx,8),%r9 + movq %r8,%rdx + xorq %r10,%r8 + jmp L$oop1 + +.p2align 4 +L$break1: + shrq $4,%r8 + andq $15,%rdx + movq %r9,%r10 + shrq $4,%r9 + xorq 8(%rsi,%rax,1),%r8 + shlq $60,%r10 + xorq (%rsi,%rax,1),%r9 + andb $240,%bl + xorq (%r11,%rdx,8),%r9 + movq %r8,%rdx + xorq %r10,%r8 + + shrq $4,%r8 + andq $15,%rdx + movq %r9,%r10 + shrq $4,%r9 + xorq 8(%rsi,%rbx,1),%r8 + shlq $60,%r10 + xorq (%rsi,%rbx,1),%r9 + xorq %r10,%r8 + xorq (%r11,%rdx,8),%r9 + + bswapq %r8 + bswapq %r9 + movq %r8,8(%rdi) + movq %r9,(%rdi) + + movq 16(%rsp),%rbx + leaq 24(%rsp),%rsp +L$gmult_epilogue: + .byte 0xf3,0xc3 + +.globl _gcm_ghash_4bit + +.p2align 4 +_gcm_ghash_4bit: + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + subq $280,%rsp +L$ghash_prologue: + movq %rdx,%r14 + movq %rcx,%r15 + subq $-128,%rsi + leaq 16+128(%rsp),%rbp + xorl %edx,%edx + movq 0+0-128(%rsi),%r8 + movq 0+8-128(%rsi),%rax + movb %al,%dl + shrq $4,%rax + movq %r8,%r10 + shrq $4,%r8 + movq 16+0-128(%rsi),%r9 + shlb $4,%dl + movq 16+8-128(%rsi),%rbx + shlq $60,%r10 + movb %dl,0(%rsp) + orq %r10,%rax + movb %bl,%dl + shrq $4,%rbx + movq %r9,%r10 + shrq $4,%r9 + movq %r8,0(%rbp) + movq 32+0-128(%rsi),%r8 + shlb $4,%dl + movq %rax,0-128(%rbp) + movq 32+8-128(%rsi),%rax + shlq $60,%r10 + movb %dl,1(%rsp) + orq %r10,%rbx + movb %al,%dl + shrq $4,%rax + movq %r8,%r10 + shrq $4,%r8 + movq %r9,8(%rbp) + movq 48+0-128(%rsi),%r9 + shlb $4,%dl + movq %rbx,8-128(%rbp) + movq 48+8-128(%rsi),%rbx + shlq $60,%r10 + movb %dl,2(%rsp) + orq %r10,%rax + movb %bl,%dl + shrq $4,%rbx + movq %r9,%r10 + shrq $4,%r9 + movq %r8,16(%rbp) + movq 64+0-128(%rsi),%r8 + shlb $4,%dl + movq %rax,16-128(%rbp) + movq 64+8-128(%rsi),%rax + shlq $60,%r10 + movb %dl,3(%rsp) + orq %r10,%rbx + movb %al,%dl + shrq $4,%rax + movq %r8,%r10 + shrq $4,%r8 + movq %r9,24(%rbp) + movq 80+0-128(%rsi),%r9 + shlb $4,%dl + movq %rbx,24-128(%rbp) + movq 80+8-128(%rsi),%rbx + shlq $60,%r10 + movb %dl,4(%rsp) + orq %r10,%rax + movb %bl,%dl + shrq $4,%rbx + movq %r9,%r10 + shrq $4,%r9 + movq %r8,32(%rbp) + movq 96+0-128(%rsi),%r8 + shlb $4,%dl + movq %rax,32-128(%rbp) + movq 96+8-128(%rsi),%rax + shlq $60,%r10 + movb %dl,5(%rsp) + orq %r10,%rbx + movb %al,%dl + shrq $4,%rax + movq %r8,%r10 + shrq $4,%r8 + movq %r9,40(%rbp) + movq 112+0-128(%rsi),%r9 + shlb $4,%dl + movq %rbx,40-128(%rbp) + movq 112+8-128(%rsi),%rbx + shlq $60,%r10 + movb %dl,6(%rsp) + orq %r10,%rax + movb %bl,%dl + shrq $4,%rbx + movq %r9,%r10 + shrq $4,%r9 + movq %r8,48(%rbp) + movq 128+0-128(%rsi),%r8 + shlb $4,%dl + movq %rax,48-128(%rbp) + movq 128+8-128(%rsi),%rax + shlq $60,%r10 + movb %dl,7(%rsp) + orq %r10,%rbx + movb %al,%dl + shrq $4,%rax + movq %r8,%r10 + shrq $4,%r8 + movq %r9,56(%rbp) + movq 144+0-128(%rsi),%r9 + shlb $4,%dl + movq %rbx,56-128(%rbp) + movq 144+8-128(%rsi),%rbx + shlq $60,%r10 + movb %dl,8(%rsp) + orq %r10,%rax + movb %bl,%dl + shrq $4,%rbx + movq %r9,%r10 + shrq $4,%r9 + movq %r8,64(%rbp) + movq 160+0-128(%rsi),%r8 + shlb $4,%dl + movq %rax,64-128(%rbp) + movq 160+8-128(%rsi),%rax + shlq $60,%r10 + movb %dl,9(%rsp) + orq %r10,%rbx + movb %al,%dl + shrq $4,%rax + movq %r8,%r10 + shrq $4,%r8 + movq %r9,72(%rbp) + movq 176+0-128(%rsi),%r9 + shlb $4,%dl + movq %rbx,72-128(%rbp) + movq 176+8-128(%rsi),%rbx + shlq $60,%r10 + movb %dl,10(%rsp) + orq %r10,%rax + movb %bl,%dl + shrq $4,%rbx + movq %r9,%r10 + shrq $4,%r9 + movq %r8,80(%rbp) + movq 192+0-128(%rsi),%r8 + shlb $4,%dl + movq %rax,80-128(%rbp) + movq 192+8-128(%rsi),%rax + shlq $60,%r10 + movb %dl,11(%rsp) + orq %r10,%rbx + movb %al,%dl + shrq $4,%rax + movq %r8,%r10 + shrq $4,%r8 + movq %r9,88(%rbp) + movq 208+0-128(%rsi),%r9 + shlb $4,%dl + movq %rbx,88-128(%rbp) + movq 208+8-128(%rsi),%rbx + shlq $60,%r10 + movb %dl,12(%rsp) + orq %r10,%rax + movb %bl,%dl + shrq $4,%rbx + movq %r9,%r10 + shrq $4,%r9 + movq %r8,96(%rbp) + movq 224+0-128(%rsi),%r8 + shlb $4,%dl + movq %rax,96-128(%rbp) + movq 224+8-128(%rsi),%rax + shlq $60,%r10 + movb %dl,13(%rsp) + orq %r10,%rbx + movb %al,%dl + shrq $4,%rax + movq %r8,%r10 + shrq $4,%r8 + movq %r9,104(%rbp) + movq 240+0-128(%rsi),%r9 + shlb $4,%dl + movq %rbx,104-128(%rbp) + movq 240+8-128(%rsi),%rbx + shlq $60,%r10 + movb %dl,14(%rsp) + orq %r10,%rax + movb %bl,%dl + shrq $4,%rbx + movq %r9,%r10 + shrq $4,%r9 + movq %r8,112(%rbp) + shlb $4,%dl + movq %rax,112-128(%rbp) + shlq $60,%r10 + movb %dl,15(%rsp) + orq %r10,%rbx + movq %r9,120(%rbp) + movq %rbx,120-128(%rbp) + addq $-128,%rsi + movq 8(%rdi),%r8 + movq 0(%rdi),%r9 + addq %r14,%r15 + leaq L$rem_8bit(%rip),%r11 + jmp L$outer_loop +.p2align 4 +L$outer_loop: + xorq (%r14),%r9 + movq 8(%r14),%rdx + leaq 16(%r14),%r14 + xorq %r8,%rdx + movq %r9,(%rdi) + movq %rdx,8(%rdi) + shrq $32,%rdx + xorq %rax,%rax + roll $8,%edx + movb %dl,%al + movzbl %dl,%ebx + shlb $4,%al + shrl $4,%ebx + roll $8,%edx + movq 8(%rsi,%rax,1),%r8 + movq (%rsi,%rax,1),%r9 + movb %dl,%al + movzbl %dl,%ecx + shlb $4,%al + movzbq (%rsp,%rbx,1),%r12 + shrl $4,%ecx + xorq %r8,%r12 + movq %r9,%r10 + shrq $8,%r8 + movzbq %r12b,%r12 + shrq $8,%r9 + xorq -128(%rbp,%rbx,8),%r8 + shlq $56,%r10 + xorq (%rbp,%rbx,8),%r9 + roll $8,%edx + xorq 8(%rsi,%rax,1),%r8 + xorq (%rsi,%rax,1),%r9 + movb %dl,%al + xorq %r10,%r8 + movzwq (%r11,%r12,2),%r12 + movzbl %dl,%ebx + shlb $4,%al + movzbq (%rsp,%rcx,1),%r13 + shrl $4,%ebx + shlq $48,%r12 + xorq %r8,%r13 + movq %r9,%r10 + xorq %r12,%r9 + shrq $8,%r8 + movzbq %r13b,%r13 + shrq $8,%r9 + xorq -128(%rbp,%rcx,8),%r8 + shlq $56,%r10 + xorq (%rbp,%rcx,8),%r9 + roll $8,%edx + xorq 8(%rsi,%rax,1),%r8 + xorq (%rsi,%rax,1),%r9 + movb %dl,%al + xorq %r10,%r8 + movzwq (%r11,%r13,2),%r13 + movzbl %dl,%ecx + shlb $4,%al + movzbq (%rsp,%rbx,1),%r12 + shrl $4,%ecx + shlq $48,%r13 + xorq %r8,%r12 + movq %r9,%r10 + xorq %r13,%r9 + shrq $8,%r8 + movzbq %r12b,%r12 + movl 8(%rdi),%edx + shrq $8,%r9 + xorq -128(%rbp,%rbx,8),%r8 + shlq $56,%r10 + xorq (%rbp,%rbx,8),%r9 + roll $8,%edx + xorq 8(%rsi,%rax,1),%r8 + xorq (%rsi,%rax,1),%r9 + movb %dl,%al + xorq %r10,%r8 + movzwq (%r11,%r12,2),%r12 + movzbl %dl,%ebx + shlb $4,%al + movzbq (%rsp,%rcx,1),%r13 + shrl $4,%ebx + shlq $48,%r12 + xorq %r8,%r13 + movq %r9,%r10 + xorq %r12,%r9 + shrq $8,%r8 + movzbq %r13b,%r13 + shrq $8,%r9 + xorq -128(%rbp,%rcx,8),%r8 + shlq $56,%r10 + xorq (%rbp,%rcx,8),%r9 + roll $8,%edx + xorq 8(%rsi,%rax,1),%r8 + xorq (%rsi,%rax,1),%r9 + movb %dl,%al + xorq %r10,%r8 + movzwq (%r11,%r13,2),%r13 + movzbl %dl,%ecx + shlb $4,%al + movzbq (%rsp,%rbx,1),%r12 + shrl $4,%ecx + shlq $48,%r13 + xorq %r8,%r12 + movq %r9,%r10 + xorq %r13,%r9 + shrq $8,%r8 + movzbq %r12b,%r12 + shrq $8,%r9 + xorq -128(%rbp,%rbx,8),%r8 + shlq $56,%r10 + xorq (%rbp,%rbx,8),%r9 + roll $8,%edx + xorq 8(%rsi,%rax,1),%r8 + xorq (%rsi,%rax,1),%r9 + movb %dl,%al + xorq %r10,%r8 + movzwq (%r11,%r12,2),%r12 + movzbl %dl,%ebx + shlb $4,%al + movzbq (%rsp,%rcx,1),%r13 + shrl $4,%ebx + shlq $48,%r12 + xorq %r8,%r13 + movq %r9,%r10 + xorq %r12,%r9 + shrq $8,%r8 + movzbq %r13b,%r13 + shrq $8,%r9 + xorq -128(%rbp,%rcx,8),%r8 + shlq $56,%r10 + xorq (%rbp,%rcx,8),%r9 + roll $8,%edx + xorq 8(%rsi,%rax,1),%r8 + xorq (%rsi,%rax,1),%r9 + movb %dl,%al + xorq %r10,%r8 + movzwq (%r11,%r13,2),%r13 + movzbl %dl,%ecx + shlb $4,%al + movzbq (%rsp,%rbx,1),%r12 + shrl $4,%ecx + shlq $48,%r13 + xorq %r8,%r12 + movq %r9,%r10 + xorq %r13,%r9 + shrq $8,%r8 + movzbq %r12b,%r12 + movl 4(%rdi),%edx + shrq $8,%r9 + xorq -128(%rbp,%rbx,8),%r8 + shlq $56,%r10 + xorq (%rbp,%rbx,8),%r9 + roll $8,%edx + xorq 8(%rsi,%rax,1),%r8 + xorq (%rsi,%rax,1),%r9 + movb %dl,%al + xorq %r10,%r8 + movzwq (%r11,%r12,2),%r12 + movzbl %dl,%ebx + shlb $4,%al + movzbq (%rsp,%rcx,1),%r13 + shrl $4,%ebx + shlq $48,%r12 + xorq %r8,%r13 + movq %r9,%r10 + xorq %r12,%r9 + shrq $8,%r8 + movzbq %r13b,%r13 + shrq $8,%r9 + xorq -128(%rbp,%rcx,8),%r8 + shlq $56,%r10 + xorq (%rbp,%rcx,8),%r9 + roll $8,%edx + xorq 8(%rsi,%rax,1),%r8 + xorq (%rsi,%rax,1),%r9 + movb %dl,%al + xorq %r10,%r8 + movzwq (%r11,%r13,2),%r13 + movzbl %dl,%ecx + shlb $4,%al + movzbq (%rsp,%rbx,1),%r12 + shrl $4,%ecx + shlq $48,%r13 + xorq %r8,%r12 + movq %r9,%r10 + xorq %r13,%r9 + shrq $8,%r8 + movzbq %r12b,%r12 + shrq $8,%r9 + xorq -128(%rbp,%rbx,8),%r8 + shlq $56,%r10 + xorq (%rbp,%rbx,8),%r9 + roll $8,%edx + xorq 8(%rsi,%rax,1),%r8 + xorq (%rsi,%rax,1),%r9 + movb %dl,%al + xorq %r10,%r8 + movzwq (%r11,%r12,2),%r12 + movzbl %dl,%ebx + shlb $4,%al + movzbq (%rsp,%rcx,1),%r13 + shrl $4,%ebx + shlq $48,%r12 + xorq %r8,%r13 + movq %r9,%r10 + xorq %r12,%r9 + shrq $8,%r8 + movzbq %r13b,%r13 + shrq $8,%r9 + xorq -128(%rbp,%rcx,8),%r8 + shlq $56,%r10 + xorq (%rbp,%rcx,8),%r9 + roll $8,%edx + xorq 8(%rsi,%rax,1),%r8 + xorq (%rsi,%rax,1),%r9 + movb %dl,%al + xorq %r10,%r8 + movzwq (%r11,%r13,2),%r13 + movzbl %dl,%ecx + shlb $4,%al + movzbq (%rsp,%rbx,1),%r12 + shrl $4,%ecx + shlq $48,%r13 + xorq %r8,%r12 + movq %r9,%r10 + xorq %r13,%r9 + shrq $8,%r8 + movzbq %r12b,%r12 + movl 0(%rdi),%edx + shrq $8,%r9 + xorq -128(%rbp,%rbx,8),%r8 + shlq $56,%r10 + xorq (%rbp,%rbx,8),%r9 + roll $8,%edx + xorq 8(%rsi,%rax,1),%r8 + xorq (%rsi,%rax,1),%r9 + movb %dl,%al + xorq %r10,%r8 + movzwq (%r11,%r12,2),%r12 + movzbl %dl,%ebx + shlb $4,%al + movzbq (%rsp,%rcx,1),%r13 + shrl $4,%ebx + shlq $48,%r12 + xorq %r8,%r13 + movq %r9,%r10 + xorq %r12,%r9 + shrq $8,%r8 + movzbq %r13b,%r13 + shrq $8,%r9 + xorq -128(%rbp,%rcx,8),%r8 + shlq $56,%r10 + xorq (%rbp,%rcx,8),%r9 + roll $8,%edx + xorq 8(%rsi,%rax,1),%r8 + xorq (%rsi,%rax,1),%r9 + movb %dl,%al + xorq %r10,%r8 + movzwq (%r11,%r13,2),%r13 + movzbl %dl,%ecx + shlb $4,%al + movzbq (%rsp,%rbx,1),%r12 + shrl $4,%ecx + shlq $48,%r13 + xorq %r8,%r12 + movq %r9,%r10 + xorq %r13,%r9 + shrq $8,%r8 + movzbq %r12b,%r12 + shrq $8,%r9 + xorq -128(%rbp,%rbx,8),%r8 + shlq $56,%r10 + xorq (%rbp,%rbx,8),%r9 + roll $8,%edx + xorq 8(%rsi,%rax,1),%r8 + xorq (%rsi,%rax,1),%r9 + movb %dl,%al + xorq %r10,%r8 + movzwq (%r11,%r12,2),%r12 + movzbl %dl,%ebx + shlb $4,%al + movzbq (%rsp,%rcx,1),%r13 + shrl $4,%ebx + shlq $48,%r12 + xorq %r8,%r13 + movq %r9,%r10 + xorq %r12,%r9 + shrq $8,%r8 + movzbq %r13b,%r13 + shrq $8,%r9 + xorq -128(%rbp,%rcx,8),%r8 + shlq $56,%r10 + xorq (%rbp,%rcx,8),%r9 + roll $8,%edx + xorq 8(%rsi,%rax,1),%r8 + xorq (%rsi,%rax,1),%r9 + movb %dl,%al + xorq %r10,%r8 + movzwq (%r11,%r13,2),%r13 + movzbl %dl,%ecx + shlb $4,%al + movzbq (%rsp,%rbx,1),%r12 + andl $240,%ecx + shlq $48,%r13 + xorq %r8,%r12 + movq %r9,%r10 + xorq %r13,%r9 + shrq $8,%r8 + movzbq %r12b,%r12 + movl -4(%rdi),%edx + shrq $8,%r9 + xorq -128(%rbp,%rbx,8),%r8 + shlq $56,%r10 + xorq (%rbp,%rbx,8),%r9 + movzwq (%r11,%r12,2),%r12 + xorq 8(%rsi,%rax,1),%r8 + xorq (%rsi,%rax,1),%r9 + shlq $48,%r12 + xorq %r10,%r8 + xorq %r12,%r9 + movzbq %r8b,%r13 + shrq $4,%r8 + movq %r9,%r10 + shlb $4,%r13b + shrq $4,%r9 + xorq 8(%rsi,%rcx,1),%r8 + movzwq (%r11,%r13,2),%r13 + shlq $60,%r10 + xorq (%rsi,%rcx,1),%r9 + xorq %r10,%r8 + shlq $48,%r13 + bswapq %r8 + xorq %r13,%r9 + bswapq %r9 + cmpq %r15,%r14 + jb L$outer_loop + movq %r8,8(%rdi) + movq %r9,(%rdi) + + leaq 280(%rsp),%rsi + movq 0(%rsi),%r15 + movq 8(%rsi),%r14 + movq 16(%rsi),%r13 + movq 24(%rsi),%r12 + movq 32(%rsi),%rbp + movq 40(%rsi),%rbx + leaq 48(%rsi),%rsp +L$ghash_epilogue: + .byte 0xf3,0xc3 + +.globl _gcm_init_clmul + +.p2align 4 +_gcm_init_clmul: +L$_init_clmul: + movdqu (%rsi),%xmm2 + pshufd $78,%xmm2,%xmm2 + + + pshufd $255,%xmm2,%xmm4 + movdqa %xmm2,%xmm3 + psllq $1,%xmm2 + pxor %xmm5,%xmm5 + psrlq $63,%xmm3 + pcmpgtd %xmm4,%xmm5 + pslldq $8,%xmm3 + por %xmm3,%xmm2 + + + pand L$0x1c2_polynomial(%rip),%xmm5 + pxor %xmm5,%xmm2 + + + pshufd $78,%xmm2,%xmm6 + movdqa %xmm2,%xmm0 + pxor %xmm2,%xmm6 + movdqa %xmm0,%xmm1 + pshufd $78,%xmm0,%xmm3 + pxor %xmm0,%xmm3 +.byte 102,15,58,68,194,0 +.byte 102,15,58,68,202,17 +.byte 102,15,58,68,222,0 + pxor %xmm0,%xmm3 + pxor %xmm1,%xmm3 + + movdqa %xmm3,%xmm4 + psrldq $8,%xmm3 + pslldq $8,%xmm4 + pxor %xmm3,%xmm1 + pxor %xmm4,%xmm0 + + movdqa %xmm0,%xmm4 + movdqa %xmm0,%xmm3 + psllq $5,%xmm0 + pxor %xmm0,%xmm3 + psllq $1,%xmm0 + pxor %xmm3,%xmm0 + psllq $57,%xmm0 + movdqa %xmm0,%xmm3 + pslldq $8,%xmm0 + psrldq $8,%xmm3 + pxor %xmm4,%xmm0 + pxor %xmm3,%xmm1 + + + movdqa %xmm0,%xmm4 + psrlq $1,%xmm0 + pxor %xmm4,%xmm1 + pxor %xmm0,%xmm4 + psrlq $5,%xmm0 + pxor %xmm4,%xmm0 + psrlq $1,%xmm0 + pxor %xmm1,%xmm0 + pshufd $78,%xmm2,%xmm3 + pshufd $78,%xmm0,%xmm4 + pxor %xmm2,%xmm3 + movdqu %xmm2,0(%rdi) + pxor %xmm0,%xmm4 + movdqu %xmm0,16(%rdi) +.byte 102,15,58,15,227,8 + movdqu %xmm4,32(%rdi) + movdqa %xmm0,%xmm1 + pshufd $78,%xmm0,%xmm3 + pxor %xmm0,%xmm3 +.byte 102,15,58,68,194,0 +.byte 102,15,58,68,202,17 +.byte 102,15,58,68,222,0 + pxor %xmm0,%xmm3 + pxor %xmm1,%xmm3 + + movdqa %xmm3,%xmm4 + psrldq $8,%xmm3 + pslldq $8,%xmm4 + pxor %xmm3,%xmm1 + pxor %xmm4,%xmm0 + + movdqa %xmm0,%xmm4 + movdqa %xmm0,%xmm3 + psllq $5,%xmm0 + pxor %xmm0,%xmm3 + psllq $1,%xmm0 + pxor %xmm3,%xmm0 + psllq $57,%xmm0 + movdqa %xmm0,%xmm3 + pslldq $8,%xmm0 + psrldq $8,%xmm3 + pxor %xmm4,%xmm0 + pxor %xmm3,%xmm1 + + + movdqa %xmm0,%xmm4 + psrlq $1,%xmm0 + pxor %xmm4,%xmm1 + pxor %xmm0,%xmm4 + psrlq $5,%xmm0 + pxor %xmm4,%xmm0 + psrlq $1,%xmm0 + pxor %xmm1,%xmm0 + movdqa %xmm0,%xmm5 + movdqa %xmm0,%xmm1 + pshufd $78,%xmm0,%xmm3 + pxor %xmm0,%xmm3 +.byte 102,15,58,68,194,0 +.byte 102,15,58,68,202,17 +.byte 102,15,58,68,222,0 + pxor %xmm0,%xmm3 + pxor %xmm1,%xmm3 + + movdqa %xmm3,%xmm4 + psrldq $8,%xmm3 + pslldq $8,%xmm4 + pxor %xmm3,%xmm1 + pxor %xmm4,%xmm0 + + movdqa %xmm0,%xmm4 + movdqa %xmm0,%xmm3 + psllq $5,%xmm0 + pxor %xmm0,%xmm3 + psllq $1,%xmm0 + pxor %xmm3,%xmm0 + psllq $57,%xmm0 + movdqa %xmm0,%xmm3 + pslldq $8,%xmm0 + psrldq $8,%xmm3 + pxor %xmm4,%xmm0 + pxor %xmm3,%xmm1 + + + movdqa %xmm0,%xmm4 + psrlq $1,%xmm0 + pxor %xmm4,%xmm1 + pxor %xmm0,%xmm4 + psrlq $5,%xmm0 + pxor %xmm4,%xmm0 + psrlq $1,%xmm0 + pxor %xmm1,%xmm0 + pshufd $78,%xmm5,%xmm3 + pshufd $78,%xmm0,%xmm4 + pxor %xmm5,%xmm3 + movdqu %xmm5,48(%rdi) + pxor %xmm0,%xmm4 + movdqu %xmm0,64(%rdi) +.byte 102,15,58,15,227,8 + movdqu %xmm4,80(%rdi) + .byte 0xf3,0xc3 + +.globl _gcm_gmult_clmul + +.p2align 4 +_gcm_gmult_clmul: +L$_gmult_clmul: + movdqu (%rdi),%xmm0 + movdqa L$bswap_mask(%rip),%xmm5 + movdqu (%rsi),%xmm2 + movdqu 32(%rsi),%xmm4 +.byte 102,15,56,0,197 + movdqa %xmm0,%xmm1 + pshufd $78,%xmm0,%xmm3 + pxor %xmm0,%xmm3 +.byte 102,15,58,68,194,0 +.byte 102,15,58,68,202,17 +.byte 102,15,58,68,220,0 + pxor %xmm0,%xmm3 + pxor %xmm1,%xmm3 + + movdqa %xmm3,%xmm4 + psrldq $8,%xmm3 + pslldq $8,%xmm4 + pxor %xmm3,%xmm1 + pxor %xmm4,%xmm0 + + movdqa %xmm0,%xmm4 + movdqa %xmm0,%xmm3 + psllq $5,%xmm0 + pxor %xmm0,%xmm3 + psllq $1,%xmm0 + pxor %xmm3,%xmm0 + psllq $57,%xmm0 + movdqa %xmm0,%xmm3 + pslldq $8,%xmm0 + psrldq $8,%xmm3 + pxor %xmm4,%xmm0 + pxor %xmm3,%xmm1 + + + movdqa %xmm0,%xmm4 + psrlq $1,%xmm0 + pxor %xmm4,%xmm1 + pxor %xmm0,%xmm4 + psrlq $5,%xmm0 + pxor %xmm4,%xmm0 + psrlq $1,%xmm0 + pxor %xmm1,%xmm0 +.byte 102,15,56,0,197 + movdqu %xmm0,(%rdi) + .byte 0xf3,0xc3 + +.globl _gcm_ghash_clmul + +.p2align 5 +_gcm_ghash_clmul: +L$_ghash_clmul: + movdqa L$bswap_mask(%rip),%xmm10 + + movdqu (%rdi),%xmm0 + movdqu (%rsi),%xmm2 + movdqu 32(%rsi),%xmm7 +.byte 102,65,15,56,0,194 + + subq $16,%rcx + jz L$odd_tail + + movdqu 16(%rsi),%xmm6 + movl _OPENSSL_ia32cap_P+4(%rip),%eax + cmpq $48,%rcx + jb L$skip4x + + andl $71303168,%eax + cmpl $4194304,%eax + je L$skip4x + + subq $48,%rcx + movq $11547335547999543296,%rax + movdqu 48(%rsi),%xmm14 + movdqu 64(%rsi),%xmm15 + + + + + movdqu 48(%rdx),%xmm3 + movdqu 32(%rdx),%xmm11 +.byte 102,65,15,56,0,218 +.byte 102,69,15,56,0,218 + movdqa %xmm3,%xmm5 + pshufd $78,%xmm3,%xmm4 + pxor %xmm3,%xmm4 +.byte 102,15,58,68,218,0 +.byte 102,15,58,68,234,17 +.byte 102,15,58,68,231,0 + + movdqa %xmm11,%xmm13 + pshufd $78,%xmm11,%xmm12 + pxor %xmm11,%xmm12 +.byte 102,68,15,58,68,222,0 +.byte 102,68,15,58,68,238,17 +.byte 102,68,15,58,68,231,16 + xorps %xmm11,%xmm3 + xorps %xmm13,%xmm5 + movups 80(%rsi),%xmm7 + xorps %xmm12,%xmm4 + + movdqu 16(%rdx),%xmm11 + movdqu 0(%rdx),%xmm8 +.byte 102,69,15,56,0,218 +.byte 102,69,15,56,0,194 + movdqa %xmm11,%xmm13 + pshufd $78,%xmm11,%xmm12 + pxor %xmm8,%xmm0 + pxor %xmm11,%xmm12 +.byte 102,69,15,58,68,222,0 + movdqa %xmm0,%xmm1 + pshufd $78,%xmm0,%xmm8 + pxor %xmm0,%xmm8 +.byte 102,69,15,58,68,238,17 +.byte 102,68,15,58,68,231,0 + xorps %xmm11,%xmm3 + xorps %xmm13,%xmm5 + + leaq 64(%rdx),%rdx + subq $64,%rcx + jc L$tail4x + + jmp L$mod4_loop +.p2align 5 +L$mod4_loop: +.byte 102,65,15,58,68,199,0 + xorps %xmm12,%xmm4 + movdqu 48(%rdx),%xmm11 +.byte 102,69,15,56,0,218 +.byte 102,65,15,58,68,207,17 + xorps %xmm3,%xmm0 + movdqu 32(%rdx),%xmm3 + movdqa %xmm11,%xmm13 +.byte 102,68,15,58,68,199,16 + pshufd $78,%xmm11,%xmm12 + xorps %xmm5,%xmm1 + pxor %xmm11,%xmm12 +.byte 102,65,15,56,0,218 + movups 32(%rsi),%xmm7 + xorps %xmm4,%xmm8 +.byte 102,68,15,58,68,218,0 + pshufd $78,%xmm3,%xmm4 + + pxor %xmm0,%xmm8 + movdqa %xmm3,%xmm5 + pxor %xmm1,%xmm8 + pxor %xmm3,%xmm4 + movdqa %xmm8,%xmm9 +.byte 102,68,15,58,68,234,17 + pslldq $8,%xmm8 + psrldq $8,%xmm9 + pxor %xmm8,%xmm0 + movdqa L$7_mask(%rip),%xmm8 + pxor %xmm9,%xmm1 +.byte 102,76,15,110,200 + + pand %xmm0,%xmm8 +.byte 102,69,15,56,0,200 + pxor %xmm0,%xmm9 +.byte 102,68,15,58,68,231,0 + psllq $57,%xmm9 + movdqa %xmm9,%xmm8 + pslldq $8,%xmm9 +.byte 102,15,58,68,222,0 + psrldq $8,%xmm8 + pxor %xmm9,%xmm0 + pxor %xmm8,%xmm1 + movdqu 0(%rdx),%xmm8 + + movdqa %xmm0,%xmm9 + psrlq $1,%xmm0 +.byte 102,15,58,68,238,17 + xorps %xmm11,%xmm3 + movdqu 16(%rdx),%xmm11 +.byte 102,69,15,56,0,218 +.byte 102,15,58,68,231,16 + xorps %xmm13,%xmm5 + movups 80(%rsi),%xmm7 +.byte 102,69,15,56,0,194 + pxor %xmm9,%xmm1 + pxor %xmm0,%xmm9 + psrlq $5,%xmm0 + + movdqa %xmm11,%xmm13 + pxor %xmm12,%xmm4 + pshufd $78,%xmm11,%xmm12 + pxor %xmm9,%xmm0 + pxor %xmm8,%xmm1 + pxor %xmm11,%xmm12 +.byte 102,69,15,58,68,222,0 + psrlq $1,%xmm0 + pxor %xmm1,%xmm0 + movdqa %xmm0,%xmm1 +.byte 102,69,15,58,68,238,17 + xorps %xmm11,%xmm3 + pshufd $78,%xmm0,%xmm8 + pxor %xmm0,%xmm8 + +.byte 102,68,15,58,68,231,0 + xorps %xmm13,%xmm5 + + leaq 64(%rdx),%rdx + subq $64,%rcx + jnc L$mod4_loop + +L$tail4x: +.byte 102,65,15,58,68,199,0 +.byte 102,65,15,58,68,207,17 +.byte 102,68,15,58,68,199,16 + xorps %xmm12,%xmm4 + xorps %xmm3,%xmm0 + xorps %xmm5,%xmm1 + pxor %xmm0,%xmm1 + pxor %xmm4,%xmm8 + + pxor %xmm1,%xmm8 + pxor %xmm0,%xmm1 + + movdqa %xmm8,%xmm9 + psrldq $8,%xmm8 + pslldq $8,%xmm9 + pxor %xmm8,%xmm1 + pxor %xmm9,%xmm0 + + movdqa %xmm0,%xmm4 + movdqa %xmm0,%xmm3 + psllq $5,%xmm0 + pxor %xmm0,%xmm3 + psllq $1,%xmm0 + pxor %xmm3,%xmm0 + psllq $57,%xmm0 + movdqa %xmm0,%xmm3 + pslldq $8,%xmm0 + psrldq $8,%xmm3 + pxor %xmm4,%xmm0 + pxor %xmm3,%xmm1 + + + movdqa %xmm0,%xmm4 + psrlq $1,%xmm0 + pxor %xmm4,%xmm1 + pxor %xmm0,%xmm4 + psrlq $5,%xmm0 + pxor %xmm4,%xmm0 + psrlq $1,%xmm0 + pxor %xmm1,%xmm0 + addq $64,%rcx + jz L$done + movdqu 32(%rsi),%xmm7 + subq $16,%rcx + jz L$odd_tail +L$skip4x: + + + + + + movdqu (%rdx),%xmm8 + movdqu 16(%rdx),%xmm3 +.byte 102,69,15,56,0,194 +.byte 102,65,15,56,0,218 + pxor %xmm8,%xmm0 + + movdqa %xmm3,%xmm5 + pshufd $78,%xmm3,%xmm4 + pxor %xmm3,%xmm4 +.byte 102,15,58,68,218,0 +.byte 102,15,58,68,234,17 +.byte 102,15,58,68,231,0 + + leaq 32(%rdx),%rdx + nop + subq $32,%rcx + jbe L$even_tail + nop + jmp L$mod_loop + +.p2align 5 +L$mod_loop: + movdqa %xmm0,%xmm1 + movdqa %xmm4,%xmm8 + pshufd $78,%xmm0,%xmm4 + pxor %xmm0,%xmm4 + +.byte 102,15,58,68,198,0 +.byte 102,15,58,68,206,17 +.byte 102,15,58,68,231,16 + + pxor %xmm3,%xmm0 + pxor %xmm5,%xmm1 + movdqu (%rdx),%xmm9 + pxor %xmm0,%xmm8 +.byte 102,69,15,56,0,202 + movdqu 16(%rdx),%xmm3 + + pxor %xmm1,%xmm8 + pxor %xmm9,%xmm1 + pxor %xmm8,%xmm4 +.byte 102,65,15,56,0,218 + movdqa %xmm4,%xmm8 + psrldq $8,%xmm8 + pslldq $8,%xmm4 + pxor %xmm8,%xmm1 + pxor %xmm4,%xmm0 + + movdqa %xmm3,%xmm5 + + movdqa %xmm0,%xmm9 + movdqa %xmm0,%xmm8 + psllq $5,%xmm0 + pxor %xmm0,%xmm8 +.byte 102,15,58,68,218,0 + psllq $1,%xmm0 + pxor %xmm8,%xmm0 + psllq $57,%xmm0 + movdqa %xmm0,%xmm8 + pslldq $8,%xmm0 + psrldq $8,%xmm8 + pxor %xmm9,%xmm0 + pshufd $78,%xmm5,%xmm4 + pxor %xmm8,%xmm1 + pxor %xmm5,%xmm4 + + movdqa %xmm0,%xmm9 + psrlq $1,%xmm0 +.byte 102,15,58,68,234,17 + pxor %xmm9,%xmm1 + pxor %xmm0,%xmm9 + psrlq $5,%xmm0 + pxor %xmm9,%xmm0 + leaq 32(%rdx),%rdx + psrlq $1,%xmm0 +.byte 102,15,58,68,231,0 + pxor %xmm1,%xmm0 + + subq $32,%rcx + ja L$mod_loop + +L$even_tail: + movdqa %xmm0,%xmm1 + movdqa %xmm4,%xmm8 + pshufd $78,%xmm0,%xmm4 + pxor %xmm0,%xmm4 + +.byte 102,15,58,68,198,0 +.byte 102,15,58,68,206,17 +.byte 102,15,58,68,231,16 + + pxor %xmm3,%xmm0 + pxor %xmm5,%xmm1 + pxor %xmm0,%xmm8 + pxor %xmm1,%xmm8 + pxor %xmm8,%xmm4 + movdqa %xmm4,%xmm8 + psrldq $8,%xmm8 + pslldq $8,%xmm4 + pxor %xmm8,%xmm1 + pxor %xmm4,%xmm0 + + movdqa %xmm0,%xmm4 + movdqa %xmm0,%xmm3 + psllq $5,%xmm0 + pxor %xmm0,%xmm3 + psllq $1,%xmm0 + pxor %xmm3,%xmm0 + psllq $57,%xmm0 + movdqa %xmm0,%xmm3 + pslldq $8,%xmm0 + psrldq $8,%xmm3 + pxor %xmm4,%xmm0 + pxor %xmm3,%xmm1 + + + movdqa %xmm0,%xmm4 + psrlq $1,%xmm0 + pxor %xmm4,%xmm1 + pxor %xmm0,%xmm4 + psrlq $5,%xmm0 + pxor %xmm4,%xmm0 + psrlq $1,%xmm0 + pxor %xmm1,%xmm0 + testq %rcx,%rcx + jnz L$done + +L$odd_tail: + movdqu (%rdx),%xmm8 +.byte 102,69,15,56,0,194 + pxor %xmm8,%xmm0 + movdqa %xmm0,%xmm1 + pshufd $78,%xmm0,%xmm3 + pxor %xmm0,%xmm3 +.byte 102,15,58,68,194,0 +.byte 102,15,58,68,202,17 +.byte 102,15,58,68,223,0 + pxor %xmm0,%xmm3 + pxor %xmm1,%xmm3 + + movdqa %xmm3,%xmm4 + psrldq $8,%xmm3 + pslldq $8,%xmm4 + pxor %xmm3,%xmm1 + pxor %xmm4,%xmm0 + + movdqa %xmm0,%xmm4 + movdqa %xmm0,%xmm3 + psllq $5,%xmm0 + pxor %xmm0,%xmm3 + psllq $1,%xmm0 + pxor %xmm3,%xmm0 + psllq $57,%xmm0 + movdqa %xmm0,%xmm3 + pslldq $8,%xmm0 + psrldq $8,%xmm3 + pxor %xmm4,%xmm0 + pxor %xmm3,%xmm1 + + + movdqa %xmm0,%xmm4 + psrlq $1,%xmm0 + pxor %xmm4,%xmm1 + pxor %xmm0,%xmm4 + psrlq $5,%xmm0 + pxor %xmm4,%xmm0 + psrlq $1,%xmm0 + pxor %xmm1,%xmm0 +L$done: +.byte 102,65,15,56,0,194 + movdqu %xmm0,(%rdi) + .byte 0xf3,0xc3 + +.globl _gcm_init_avx + +.p2align 5 +_gcm_init_avx: + jmp L$_init_clmul + +.globl _gcm_gmult_avx + +.p2align 5 +_gcm_gmult_avx: + jmp L$_gmult_clmul + +.globl _gcm_ghash_avx + +.p2align 5 +_gcm_ghash_avx: + jmp L$_ghash_clmul + +.p2align 6 +L$bswap_mask: +.byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0 +L$0x1c2_polynomial: +.byte 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xc2 +L$7_mask: +.long 7,0,7,0 +L$7_mask_poly: +.long 7,0,450,0 +.p2align 6 + +L$rem_4bit: +.long 0,0,0,471859200,0,943718400,0,610271232 +.long 0,1887436800,0,1822425088,0,1220542464,0,1423966208 +.long 0,3774873600,0,4246732800,0,3644850176,0,3311403008 +.long 0,2441084928,0,2376073216,0,2847932416,0,3051356160 + +L$rem_8bit: +.value 0x0000,0x01C2,0x0384,0x0246,0x0708,0x06CA,0x048C,0x054E +.value 0x0E10,0x0FD2,0x0D94,0x0C56,0x0918,0x08DA,0x0A9C,0x0B5E +.value 0x1C20,0x1DE2,0x1FA4,0x1E66,0x1B28,0x1AEA,0x18AC,0x196E +.value 0x1230,0x13F2,0x11B4,0x1076,0x1538,0x14FA,0x16BC,0x177E +.value 0x3840,0x3982,0x3BC4,0x3A06,0x3F48,0x3E8A,0x3CCC,0x3D0E +.value 0x3650,0x3792,0x35D4,0x3416,0x3158,0x309A,0x32DC,0x331E +.value 0x2460,0x25A2,0x27E4,0x2626,0x2368,0x22AA,0x20EC,0x212E +.value 0x2A70,0x2BB2,0x29F4,0x2836,0x2D78,0x2CBA,0x2EFC,0x2F3E +.value 0x7080,0x7142,0x7304,0x72C6,0x7788,0x764A,0x740C,0x75CE +.value 0x7E90,0x7F52,0x7D14,0x7CD6,0x7998,0x785A,0x7A1C,0x7BDE +.value 0x6CA0,0x6D62,0x6F24,0x6EE6,0x6BA8,0x6A6A,0x682C,0x69EE +.value 0x62B0,0x6372,0x6134,0x60F6,0x65B8,0x647A,0x663C,0x67FE +.value 0x48C0,0x4902,0x4B44,0x4A86,0x4FC8,0x4E0A,0x4C4C,0x4D8E +.value 0x46D0,0x4712,0x4554,0x4496,0x41D8,0x401A,0x425C,0x439E +.value 0x54E0,0x5522,0x5764,0x56A6,0x53E8,0x522A,0x506C,0x51AE +.value 0x5AF0,0x5B32,0x5974,0x58B6,0x5DF8,0x5C3A,0x5E7C,0x5FBE +.value 0xE100,0xE0C2,0xE284,0xE346,0xE608,0xE7CA,0xE58C,0xE44E +.value 0xEF10,0xEED2,0xEC94,0xED56,0xE818,0xE9DA,0xEB9C,0xEA5E +.value 0xFD20,0xFCE2,0xFEA4,0xFF66,0xFA28,0xFBEA,0xF9AC,0xF86E +.value 0xF330,0xF2F2,0xF0B4,0xF176,0xF438,0xF5FA,0xF7BC,0xF67E +.value 0xD940,0xD882,0xDAC4,0xDB06,0xDE48,0xDF8A,0xDDCC,0xDC0E +.value 0xD750,0xD692,0xD4D4,0xD516,0xD058,0xD19A,0xD3DC,0xD21E +.value 0xC560,0xC4A2,0xC6E4,0xC726,0xC268,0xC3AA,0xC1EC,0xC02E +.value 0xCB70,0xCAB2,0xC8F4,0xC936,0xCC78,0xCDBA,0xCFFC,0xCE3E +.value 0x9180,0x9042,0x9204,0x93C6,0x9688,0x974A,0x950C,0x94CE +.value 0x9F90,0x9E52,0x9C14,0x9DD6,0x9898,0x995A,0x9B1C,0x9ADE +.value 0x8DA0,0x8C62,0x8E24,0x8FE6,0x8AA8,0x8B6A,0x892C,0x88EE +.value 0x83B0,0x8272,0x8034,0x81F6,0x84B8,0x857A,0x873C,0x86FE +.value 0xA9C0,0xA802,0xAA44,0xAB86,0xAEC8,0xAF0A,0xAD4C,0xAC8E +.value 0xA7D0,0xA612,0xA454,0xA596,0xA0D8,0xA11A,0xA35C,0xA29E +.value 0xB5E0,0xB422,0xB664,0xB7A6,0xB2E8,0xB32A,0xB16C,0xB0AE +.value 0xBBF0,0xBA32,0xB874,0xB9B6,0xBCF8,0xBD3A,0xBF7C,0xBEBE + +.byte 71,72,65,83,72,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 +.p2align 6 diff --git a/deps/openssl/asm_obsolete/x64-macosx-gas/sha/sha1-mb-x86_64.s b/deps/openssl/asm_obsolete/x64-macosx-gas/sha/sha1-mb-x86_64.s new file mode 100644 index 00000000000000..010924530be391 --- /dev/null +++ b/deps/openssl/asm_obsolete/x64-macosx-gas/sha/sha1-mb-x86_64.s @@ -0,0 +1,2934 @@ +.text + + + +.globl _sha1_multi_block + +.p2align 5 +_sha1_multi_block: + movq _OPENSSL_ia32cap_P+4(%rip),%rcx + btq $61,%rcx + jc _shaext_shortcut + movq %rsp,%rax + pushq %rbx + pushq %rbp + subq $288,%rsp + andq $-256,%rsp + movq %rax,272(%rsp) +L$body: + leaq K_XX_XX(%rip),%rbp + leaq 256(%rsp),%rbx + +L$oop_grande: + movl %edx,280(%rsp) + xorl %edx,%edx + movq 0(%rsi),%r8 + movl 8(%rsi),%ecx + cmpl %edx,%ecx + cmovgl %ecx,%edx + testl %ecx,%ecx + movl %ecx,0(%rbx) + cmovleq %rbp,%r8 + movq 16(%rsi),%r9 + movl 24(%rsi),%ecx + cmpl %edx,%ecx + cmovgl %ecx,%edx + testl %ecx,%ecx + movl %ecx,4(%rbx) + cmovleq %rbp,%r9 + movq 32(%rsi),%r10 + movl 40(%rsi),%ecx + cmpl %edx,%ecx + cmovgl %ecx,%edx + testl %ecx,%ecx + movl %ecx,8(%rbx) + cmovleq %rbp,%r10 + movq 48(%rsi),%r11 + movl 56(%rsi),%ecx + cmpl %edx,%ecx + cmovgl %ecx,%edx + testl %ecx,%ecx + movl %ecx,12(%rbx) + cmovleq %rbp,%r11 + testl %edx,%edx + jz L$done + + movdqu 0(%rdi),%xmm10 + leaq 128(%rsp),%rax + movdqu 32(%rdi),%xmm11 + movdqu 64(%rdi),%xmm12 + movdqu 96(%rdi),%xmm13 + movdqu 128(%rdi),%xmm14 + movdqa 96(%rbp),%xmm5 + movdqa -32(%rbp),%xmm15 + jmp L$oop + +.p2align 5 +L$oop: + movd (%r8),%xmm0 + leaq 64(%r8),%r8 + movd (%r9),%xmm2 + leaq 64(%r9),%r9 + movd (%r10),%xmm3 + leaq 64(%r10),%r10 + movd (%r11),%xmm4 + leaq 64(%r11),%r11 + punpckldq %xmm3,%xmm0 + movd -60(%r8),%xmm1 + punpckldq %xmm4,%xmm2 + movd -60(%r9),%xmm9 + punpckldq %xmm2,%xmm0 + movd -60(%r10),%xmm8 +.byte 102,15,56,0,197 + movd -60(%r11),%xmm7 + punpckldq %xmm8,%xmm1 + movdqa %xmm10,%xmm8 + paddd %xmm15,%xmm14 + punpckldq %xmm7,%xmm9 + movdqa %xmm11,%xmm7 + movdqa %xmm11,%xmm6 + pslld $5,%xmm8 + pandn %xmm13,%xmm7 + pand %xmm12,%xmm6 + punpckldq %xmm9,%xmm1 + movdqa %xmm10,%xmm9 + + movdqa %xmm0,0-128(%rax) + paddd %xmm0,%xmm14 + movd -56(%r8),%xmm2 + psrld $27,%xmm9 + pxor %xmm7,%xmm6 + movdqa %xmm11,%xmm7 + + por %xmm9,%xmm8 + movd -56(%r9),%xmm9 + pslld $30,%xmm7 + paddd %xmm6,%xmm14 + + psrld $2,%xmm11 + paddd %xmm8,%xmm14 +.byte 102,15,56,0,205 + movd -56(%r10),%xmm8 + por %xmm7,%xmm11 + movd -56(%r11),%xmm7 + punpckldq %xmm8,%xmm2 + movdqa %xmm14,%xmm8 + paddd %xmm15,%xmm13 + punpckldq %xmm7,%xmm9 + movdqa %xmm10,%xmm7 + movdqa %xmm10,%xmm6 + pslld $5,%xmm8 + pandn %xmm12,%xmm7 + pand %xmm11,%xmm6 + punpckldq %xmm9,%xmm2 + movdqa %xmm14,%xmm9 + + movdqa %xmm1,16-128(%rax) + paddd %xmm1,%xmm13 + movd -52(%r8),%xmm3 + psrld $27,%xmm9 + pxor %xmm7,%xmm6 + movdqa %xmm10,%xmm7 + + por %xmm9,%xmm8 + movd -52(%r9),%xmm9 + pslld $30,%xmm7 + paddd %xmm6,%xmm13 + + psrld $2,%xmm10 + paddd %xmm8,%xmm13 +.byte 102,15,56,0,213 + movd -52(%r10),%xmm8 + por %xmm7,%xmm10 + movd -52(%r11),%xmm7 + punpckldq %xmm8,%xmm3 + movdqa %xmm13,%xmm8 + paddd %xmm15,%xmm12 + punpckldq %xmm7,%xmm9 + movdqa %xmm14,%xmm7 + movdqa %xmm14,%xmm6 + pslld $5,%xmm8 + pandn %xmm11,%xmm7 + pand %xmm10,%xmm6 + punpckldq %xmm9,%xmm3 + movdqa %xmm13,%xmm9 + + movdqa %xmm2,32-128(%rax) + paddd %xmm2,%xmm12 + movd -48(%r8),%xmm4 + psrld $27,%xmm9 + pxor %xmm7,%xmm6 + movdqa %xmm14,%xmm7 + + por %xmm9,%xmm8 + movd -48(%r9),%xmm9 + pslld $30,%xmm7 + paddd %xmm6,%xmm12 + + psrld $2,%xmm14 + paddd %xmm8,%xmm12 +.byte 102,15,56,0,221 + movd -48(%r10),%xmm8 + por %xmm7,%xmm14 + movd -48(%r11),%xmm7 + punpckldq %xmm8,%xmm4 + movdqa %xmm12,%xmm8 + paddd %xmm15,%xmm11 + punpckldq %xmm7,%xmm9 + movdqa %xmm13,%xmm7 + movdqa %xmm13,%xmm6 + pslld $5,%xmm8 + pandn %xmm10,%xmm7 + pand %xmm14,%xmm6 + punpckldq %xmm9,%xmm4 + movdqa %xmm12,%xmm9 + + movdqa %xmm3,48-128(%rax) + paddd %xmm3,%xmm11 + movd -44(%r8),%xmm0 + psrld $27,%xmm9 + pxor %xmm7,%xmm6 + movdqa %xmm13,%xmm7 + + por %xmm9,%xmm8 + movd -44(%r9),%xmm9 + pslld $30,%xmm7 + paddd %xmm6,%xmm11 + + psrld $2,%xmm13 + paddd %xmm8,%xmm11 +.byte 102,15,56,0,229 + movd -44(%r10),%xmm8 + por %xmm7,%xmm13 + movd -44(%r11),%xmm7 + punpckldq %xmm8,%xmm0 + movdqa %xmm11,%xmm8 + paddd %xmm15,%xmm10 + punpckldq %xmm7,%xmm9 + movdqa %xmm12,%xmm7 + movdqa %xmm12,%xmm6 + pslld $5,%xmm8 + pandn %xmm14,%xmm7 + pand %xmm13,%xmm6 + punpckldq %xmm9,%xmm0 + movdqa %xmm11,%xmm9 + + movdqa %xmm4,64-128(%rax) + paddd %xmm4,%xmm10 + movd -40(%r8),%xmm1 + psrld $27,%xmm9 + pxor %xmm7,%xmm6 + movdqa %xmm12,%xmm7 + + por %xmm9,%xmm8 + movd -40(%r9),%xmm9 + pslld $30,%xmm7 + paddd %xmm6,%xmm10 + + psrld $2,%xmm12 + paddd %xmm8,%xmm10 +.byte 102,15,56,0,197 + movd -40(%r10),%xmm8 + por %xmm7,%xmm12 + movd -40(%r11),%xmm7 + punpckldq %xmm8,%xmm1 + movdqa %xmm10,%xmm8 + paddd %xmm15,%xmm14 + punpckldq %xmm7,%xmm9 + movdqa %xmm11,%xmm7 + movdqa %xmm11,%xmm6 + pslld $5,%xmm8 + pandn %xmm13,%xmm7 + pand %xmm12,%xmm6 + punpckldq %xmm9,%xmm1 + movdqa %xmm10,%xmm9 + + movdqa %xmm0,80-128(%rax) + paddd %xmm0,%xmm14 + movd -36(%r8),%xmm2 + psrld $27,%xmm9 + pxor %xmm7,%xmm6 + movdqa %xmm11,%xmm7 + + por %xmm9,%xmm8 + movd -36(%r9),%xmm9 + pslld $30,%xmm7 + paddd %xmm6,%xmm14 + + psrld $2,%xmm11 + paddd %xmm8,%xmm14 +.byte 102,15,56,0,205 + movd -36(%r10),%xmm8 + por %xmm7,%xmm11 + movd -36(%r11),%xmm7 + punpckldq %xmm8,%xmm2 + movdqa %xmm14,%xmm8 + paddd %xmm15,%xmm13 + punpckldq %xmm7,%xmm9 + movdqa %xmm10,%xmm7 + movdqa %xmm10,%xmm6 + pslld $5,%xmm8 + pandn %xmm12,%xmm7 + pand %xmm11,%xmm6 + punpckldq %xmm9,%xmm2 + movdqa %xmm14,%xmm9 + + movdqa %xmm1,96-128(%rax) + paddd %xmm1,%xmm13 + movd -32(%r8),%xmm3 + psrld $27,%xmm9 + pxor %xmm7,%xmm6 + movdqa %xmm10,%xmm7 + + por %xmm9,%xmm8 + movd -32(%r9),%xmm9 + pslld $30,%xmm7 + paddd %xmm6,%xmm13 + + psrld $2,%xmm10 + paddd %xmm8,%xmm13 +.byte 102,15,56,0,213 + movd -32(%r10),%xmm8 + por %xmm7,%xmm10 + movd -32(%r11),%xmm7 + punpckldq %xmm8,%xmm3 + movdqa %xmm13,%xmm8 + paddd %xmm15,%xmm12 + punpckldq %xmm7,%xmm9 + movdqa %xmm14,%xmm7 + movdqa %xmm14,%xmm6 + pslld $5,%xmm8 + pandn %xmm11,%xmm7 + pand %xmm10,%xmm6 + punpckldq %xmm9,%xmm3 + movdqa %xmm13,%xmm9 + + movdqa %xmm2,112-128(%rax) + paddd %xmm2,%xmm12 + movd -28(%r8),%xmm4 + psrld $27,%xmm9 + pxor %xmm7,%xmm6 + movdqa %xmm14,%xmm7 + + por %xmm9,%xmm8 + movd -28(%r9),%xmm9 + pslld $30,%xmm7 + paddd %xmm6,%xmm12 + + psrld $2,%xmm14 + paddd %xmm8,%xmm12 +.byte 102,15,56,0,221 + movd -28(%r10),%xmm8 + por %xmm7,%xmm14 + movd -28(%r11),%xmm7 + punpckldq %xmm8,%xmm4 + movdqa %xmm12,%xmm8 + paddd %xmm15,%xmm11 + punpckldq %xmm7,%xmm9 + movdqa %xmm13,%xmm7 + movdqa %xmm13,%xmm6 + pslld $5,%xmm8 + pandn %xmm10,%xmm7 + pand %xmm14,%xmm6 + punpckldq %xmm9,%xmm4 + movdqa %xmm12,%xmm9 + + movdqa %xmm3,128-128(%rax) + paddd %xmm3,%xmm11 + movd -24(%r8),%xmm0 + psrld $27,%xmm9 + pxor %xmm7,%xmm6 + movdqa %xmm13,%xmm7 + + por %xmm9,%xmm8 + movd -24(%r9),%xmm9 + pslld $30,%xmm7 + paddd %xmm6,%xmm11 + + psrld $2,%xmm13 + paddd %xmm8,%xmm11 +.byte 102,15,56,0,229 + movd -24(%r10),%xmm8 + por %xmm7,%xmm13 + movd -24(%r11),%xmm7 + punpckldq %xmm8,%xmm0 + movdqa %xmm11,%xmm8 + paddd %xmm15,%xmm10 + punpckldq %xmm7,%xmm9 + movdqa %xmm12,%xmm7 + movdqa %xmm12,%xmm6 + pslld $5,%xmm8 + pandn %xmm14,%xmm7 + pand %xmm13,%xmm6 + punpckldq %xmm9,%xmm0 + movdqa %xmm11,%xmm9 + + movdqa %xmm4,144-128(%rax) + paddd %xmm4,%xmm10 + movd -20(%r8),%xmm1 + psrld $27,%xmm9 + pxor %xmm7,%xmm6 + movdqa %xmm12,%xmm7 + + por %xmm9,%xmm8 + movd -20(%r9),%xmm9 + pslld $30,%xmm7 + paddd %xmm6,%xmm10 + + psrld $2,%xmm12 + paddd %xmm8,%xmm10 +.byte 102,15,56,0,197 + movd -20(%r10),%xmm8 + por %xmm7,%xmm12 + movd -20(%r11),%xmm7 + punpckldq %xmm8,%xmm1 + movdqa %xmm10,%xmm8 + paddd %xmm15,%xmm14 + punpckldq %xmm7,%xmm9 + movdqa %xmm11,%xmm7 + movdqa %xmm11,%xmm6 + pslld $5,%xmm8 + pandn %xmm13,%xmm7 + pand %xmm12,%xmm6 + punpckldq %xmm9,%xmm1 + movdqa %xmm10,%xmm9 + + movdqa %xmm0,160-128(%rax) + paddd %xmm0,%xmm14 + movd -16(%r8),%xmm2 + psrld $27,%xmm9 + pxor %xmm7,%xmm6 + movdqa %xmm11,%xmm7 + + por %xmm9,%xmm8 + movd -16(%r9),%xmm9 + pslld $30,%xmm7 + paddd %xmm6,%xmm14 + + psrld $2,%xmm11 + paddd %xmm8,%xmm14 +.byte 102,15,56,0,205 + movd -16(%r10),%xmm8 + por %xmm7,%xmm11 + movd -16(%r11),%xmm7 + punpckldq %xmm8,%xmm2 + movdqa %xmm14,%xmm8 + paddd %xmm15,%xmm13 + punpckldq %xmm7,%xmm9 + movdqa %xmm10,%xmm7 + movdqa %xmm10,%xmm6 + pslld $5,%xmm8 + pandn %xmm12,%xmm7 + pand %xmm11,%xmm6 + punpckldq %xmm9,%xmm2 + movdqa %xmm14,%xmm9 + + movdqa %xmm1,176-128(%rax) + paddd %xmm1,%xmm13 + movd -12(%r8),%xmm3 + psrld $27,%xmm9 + pxor %xmm7,%xmm6 + movdqa %xmm10,%xmm7 + + por %xmm9,%xmm8 + movd -12(%r9),%xmm9 + pslld $30,%xmm7 + paddd %xmm6,%xmm13 + + psrld $2,%xmm10 + paddd %xmm8,%xmm13 +.byte 102,15,56,0,213 + movd -12(%r10),%xmm8 + por %xmm7,%xmm10 + movd -12(%r11),%xmm7 + punpckldq %xmm8,%xmm3 + movdqa %xmm13,%xmm8 + paddd %xmm15,%xmm12 + punpckldq %xmm7,%xmm9 + movdqa %xmm14,%xmm7 + movdqa %xmm14,%xmm6 + pslld $5,%xmm8 + pandn %xmm11,%xmm7 + pand %xmm10,%xmm6 + punpckldq %xmm9,%xmm3 + movdqa %xmm13,%xmm9 + + movdqa %xmm2,192-128(%rax) + paddd %xmm2,%xmm12 + movd -8(%r8),%xmm4 + psrld $27,%xmm9 + pxor %xmm7,%xmm6 + movdqa %xmm14,%xmm7 + + por %xmm9,%xmm8 + movd -8(%r9),%xmm9 + pslld $30,%xmm7 + paddd %xmm6,%xmm12 + + psrld $2,%xmm14 + paddd %xmm8,%xmm12 +.byte 102,15,56,0,221 + movd -8(%r10),%xmm8 + por %xmm7,%xmm14 + movd -8(%r11),%xmm7 + punpckldq %xmm8,%xmm4 + movdqa %xmm12,%xmm8 + paddd %xmm15,%xmm11 + punpckldq %xmm7,%xmm9 + movdqa %xmm13,%xmm7 + movdqa %xmm13,%xmm6 + pslld $5,%xmm8 + pandn %xmm10,%xmm7 + pand %xmm14,%xmm6 + punpckldq %xmm9,%xmm4 + movdqa %xmm12,%xmm9 + + movdqa %xmm3,208-128(%rax) + paddd %xmm3,%xmm11 + movd -4(%r8),%xmm0 + psrld $27,%xmm9 + pxor %xmm7,%xmm6 + movdqa %xmm13,%xmm7 + + por %xmm9,%xmm8 + movd -4(%r9),%xmm9 + pslld $30,%xmm7 + paddd %xmm6,%xmm11 + + psrld $2,%xmm13 + paddd %xmm8,%xmm11 +.byte 102,15,56,0,229 + movd -4(%r10),%xmm8 + por %xmm7,%xmm13 + movdqa 0-128(%rax),%xmm1 + movd -4(%r11),%xmm7 + punpckldq %xmm8,%xmm0 + movdqa %xmm11,%xmm8 + paddd %xmm15,%xmm10 + punpckldq %xmm7,%xmm9 + movdqa %xmm12,%xmm7 + movdqa %xmm12,%xmm6 + pslld $5,%xmm8 + prefetcht0 63(%r8) + pandn %xmm14,%xmm7 + pand %xmm13,%xmm6 + punpckldq %xmm9,%xmm0 + movdqa %xmm11,%xmm9 + + movdqa %xmm4,224-128(%rax) + paddd %xmm4,%xmm10 + psrld $27,%xmm9 + pxor %xmm7,%xmm6 + movdqa %xmm12,%xmm7 + prefetcht0 63(%r9) + + por %xmm9,%xmm8 + pslld $30,%xmm7 + paddd %xmm6,%xmm10 + prefetcht0 63(%r10) + + psrld $2,%xmm12 + paddd %xmm8,%xmm10 +.byte 102,15,56,0,197 + prefetcht0 63(%r11) + por %xmm7,%xmm12 + movdqa 16-128(%rax),%xmm2 + pxor %xmm3,%xmm1 + movdqa 32-128(%rax),%xmm3 + + movdqa %xmm10,%xmm8 + pxor 128-128(%rax),%xmm1 + paddd %xmm15,%xmm14 + movdqa %xmm11,%xmm7 + pslld $5,%xmm8 + pxor %xmm3,%xmm1 + movdqa %xmm11,%xmm6 + pandn %xmm13,%xmm7 + movdqa %xmm1,%xmm5 + pand %xmm12,%xmm6 + movdqa %xmm10,%xmm9 + psrld $31,%xmm5 + paddd %xmm1,%xmm1 + + movdqa %xmm0,240-128(%rax) + paddd %xmm0,%xmm14 + psrld $27,%xmm9 + pxor %xmm7,%xmm6 + + movdqa %xmm11,%xmm7 + por %xmm9,%xmm8 + pslld $30,%xmm7 + paddd %xmm6,%xmm14 + + psrld $2,%xmm11 + paddd %xmm8,%xmm14 + por %xmm5,%xmm1 + por %xmm7,%xmm11 + pxor %xmm4,%xmm2 + movdqa 48-128(%rax),%xmm4 + + movdqa %xmm14,%xmm8 + pxor 144-128(%rax),%xmm2 + paddd %xmm15,%xmm13 + movdqa %xmm10,%xmm7 + pslld $5,%xmm8 + pxor %xmm4,%xmm2 + movdqa %xmm10,%xmm6 + pandn %xmm12,%xmm7 + movdqa %xmm2,%xmm5 + pand %xmm11,%xmm6 + movdqa %xmm14,%xmm9 + psrld $31,%xmm5 + paddd %xmm2,%xmm2 + + movdqa %xmm1,0-128(%rax) + paddd %xmm1,%xmm13 + psrld $27,%xmm9 + pxor %xmm7,%xmm6 + + movdqa %xmm10,%xmm7 + por %xmm9,%xmm8 + pslld $30,%xmm7 + paddd %xmm6,%xmm13 + + psrld $2,%xmm10 + paddd %xmm8,%xmm13 + por %xmm5,%xmm2 + por %xmm7,%xmm10 + pxor %xmm0,%xmm3 + movdqa 64-128(%rax),%xmm0 + + movdqa %xmm13,%xmm8 + pxor 160-128(%rax),%xmm3 + paddd %xmm15,%xmm12 + movdqa %xmm14,%xmm7 + pslld $5,%xmm8 + pxor %xmm0,%xmm3 + movdqa %xmm14,%xmm6 + pandn %xmm11,%xmm7 + movdqa %xmm3,%xmm5 + pand %xmm10,%xmm6 + movdqa %xmm13,%xmm9 + psrld $31,%xmm5 + paddd %xmm3,%xmm3 + + movdqa %xmm2,16-128(%rax) + paddd %xmm2,%xmm12 + psrld $27,%xmm9 + pxor %xmm7,%xmm6 + + movdqa %xmm14,%xmm7 + por %xmm9,%xmm8 + pslld $30,%xmm7 + paddd %xmm6,%xmm12 + + psrld $2,%xmm14 + paddd %xmm8,%xmm12 + por %xmm5,%xmm3 + por %xmm7,%xmm14 + pxor %xmm1,%xmm4 + movdqa 80-128(%rax),%xmm1 + + movdqa %xmm12,%xmm8 + pxor 176-128(%rax),%xmm4 + paddd %xmm15,%xmm11 + movdqa %xmm13,%xmm7 + pslld $5,%xmm8 + pxor %xmm1,%xmm4 + movdqa %xmm13,%xmm6 + pandn %xmm10,%xmm7 + movdqa %xmm4,%xmm5 + pand %xmm14,%xmm6 + movdqa %xmm12,%xmm9 + psrld $31,%xmm5 + paddd %xmm4,%xmm4 + + movdqa %xmm3,32-128(%rax) + paddd %xmm3,%xmm11 + psrld $27,%xmm9 + pxor %xmm7,%xmm6 + + movdqa %xmm13,%xmm7 + por %xmm9,%xmm8 + pslld $30,%xmm7 + paddd %xmm6,%xmm11 + + psrld $2,%xmm13 + paddd %xmm8,%xmm11 + por %xmm5,%xmm4 + por %xmm7,%xmm13 + pxor %xmm2,%xmm0 + movdqa 96-128(%rax),%xmm2 + + movdqa %xmm11,%xmm8 + pxor 192-128(%rax),%xmm0 + paddd %xmm15,%xmm10 + movdqa %xmm12,%xmm7 + pslld $5,%xmm8 + pxor %xmm2,%xmm0 + movdqa %xmm12,%xmm6 + pandn %xmm14,%xmm7 + movdqa %xmm0,%xmm5 + pand %xmm13,%xmm6 + movdqa %xmm11,%xmm9 + psrld $31,%xmm5 + paddd %xmm0,%xmm0 + + movdqa %xmm4,48-128(%rax) + paddd %xmm4,%xmm10 + psrld $27,%xmm9 + pxor %xmm7,%xmm6 + + movdqa %xmm12,%xmm7 + por %xmm9,%xmm8 + pslld $30,%xmm7 + paddd %xmm6,%xmm10 + + psrld $2,%xmm12 + paddd %xmm8,%xmm10 + por %xmm5,%xmm0 + por %xmm7,%xmm12 + movdqa 0(%rbp),%xmm15 + pxor %xmm3,%xmm1 + movdqa 112-128(%rax),%xmm3 + + movdqa %xmm10,%xmm8 + movdqa %xmm13,%xmm6 + pxor 208-128(%rax),%xmm1 + paddd %xmm15,%xmm14 + pslld $5,%xmm8 + pxor %xmm11,%xmm6 + + movdqa %xmm10,%xmm9 + movdqa %xmm0,64-128(%rax) + paddd %xmm0,%xmm14 + pxor %xmm3,%xmm1 + psrld $27,%xmm9 + pxor %xmm12,%xmm6 + movdqa %xmm11,%xmm7 + + pslld $30,%xmm7 + movdqa %xmm1,%xmm5 + por %xmm9,%xmm8 + psrld $31,%xmm5 + paddd %xmm6,%xmm14 + paddd %xmm1,%xmm1 + + psrld $2,%xmm11 + paddd %xmm8,%xmm14 + por %xmm5,%xmm1 + por %xmm7,%xmm11 + pxor %xmm4,%xmm2 + movdqa 128-128(%rax),%xmm4 + + movdqa %xmm14,%xmm8 + movdqa %xmm12,%xmm6 + pxor 224-128(%rax),%xmm2 + paddd %xmm15,%xmm13 + pslld $5,%xmm8 + pxor %xmm10,%xmm6 + + movdqa %xmm14,%xmm9 + movdqa %xmm1,80-128(%rax) + paddd %xmm1,%xmm13 + pxor %xmm4,%xmm2 + psrld $27,%xmm9 + pxor %xmm11,%xmm6 + movdqa %xmm10,%xmm7 + + pslld $30,%xmm7 + movdqa %xmm2,%xmm5 + por %xmm9,%xmm8 + psrld $31,%xmm5 + paddd %xmm6,%xmm13 + paddd %xmm2,%xmm2 + + psrld $2,%xmm10 + paddd %xmm8,%xmm13 + por %xmm5,%xmm2 + por %xmm7,%xmm10 + pxor %xmm0,%xmm3 + movdqa 144-128(%rax),%xmm0 + + movdqa %xmm13,%xmm8 + movdqa %xmm11,%xmm6 + pxor 240-128(%rax),%xmm3 + paddd %xmm15,%xmm12 + pslld $5,%xmm8 + pxor %xmm14,%xmm6 + + movdqa %xmm13,%xmm9 + movdqa %xmm2,96-128(%rax) + paddd %xmm2,%xmm12 + pxor %xmm0,%xmm3 + psrld $27,%xmm9 + pxor %xmm10,%xmm6 + movdqa %xmm14,%xmm7 + + pslld $30,%xmm7 + movdqa %xmm3,%xmm5 + por %xmm9,%xmm8 + psrld $31,%xmm5 + paddd %xmm6,%xmm12 + paddd %xmm3,%xmm3 + + psrld $2,%xmm14 + paddd %xmm8,%xmm12 + por %xmm5,%xmm3 + por %xmm7,%xmm14 + pxor %xmm1,%xmm4 + movdqa 160-128(%rax),%xmm1 + + movdqa %xmm12,%xmm8 + movdqa %xmm10,%xmm6 + pxor 0-128(%rax),%xmm4 + paddd %xmm15,%xmm11 + pslld $5,%xmm8 + pxor %xmm13,%xmm6 + + movdqa %xmm12,%xmm9 + movdqa %xmm3,112-128(%rax) + paddd %xmm3,%xmm11 + pxor %xmm1,%xmm4 + psrld $27,%xmm9 + pxor %xmm14,%xmm6 + movdqa %xmm13,%xmm7 + + pslld $30,%xmm7 + movdqa %xmm4,%xmm5 + por %xmm9,%xmm8 + psrld $31,%xmm5 + paddd %xmm6,%xmm11 + paddd %xmm4,%xmm4 + + psrld $2,%xmm13 + paddd %xmm8,%xmm11 + por %xmm5,%xmm4 + por %xmm7,%xmm13 + pxor %xmm2,%xmm0 + movdqa 176-128(%rax),%xmm2 + + movdqa %xmm11,%xmm8 + movdqa %xmm14,%xmm6 + pxor 16-128(%rax),%xmm0 + paddd %xmm15,%xmm10 + pslld $5,%xmm8 + pxor %xmm12,%xmm6 + + movdqa %xmm11,%xmm9 + movdqa %xmm4,128-128(%rax) + paddd %xmm4,%xmm10 + pxor %xmm2,%xmm0 + psrld $27,%xmm9 + pxor %xmm13,%xmm6 + movdqa %xmm12,%xmm7 + + pslld $30,%xmm7 + movdqa %xmm0,%xmm5 + por %xmm9,%xmm8 + psrld $31,%xmm5 + paddd %xmm6,%xmm10 + paddd %xmm0,%xmm0 + + psrld $2,%xmm12 + paddd %xmm8,%xmm10 + por %xmm5,%xmm0 + por %xmm7,%xmm12 + pxor %xmm3,%xmm1 + movdqa 192-128(%rax),%xmm3 + + movdqa %xmm10,%xmm8 + movdqa %xmm13,%xmm6 + pxor 32-128(%rax),%xmm1 + paddd %xmm15,%xmm14 + pslld $5,%xmm8 + pxor %xmm11,%xmm6 + + movdqa %xmm10,%xmm9 + movdqa %xmm0,144-128(%rax) + paddd %xmm0,%xmm14 + pxor %xmm3,%xmm1 + psrld $27,%xmm9 + pxor %xmm12,%xmm6 + movdqa %xmm11,%xmm7 + + pslld $30,%xmm7 + movdqa %xmm1,%xmm5 + por %xmm9,%xmm8 + psrld $31,%xmm5 + paddd %xmm6,%xmm14 + paddd %xmm1,%xmm1 + + psrld $2,%xmm11 + paddd %xmm8,%xmm14 + por %xmm5,%xmm1 + por %xmm7,%xmm11 + pxor %xmm4,%xmm2 + movdqa 208-128(%rax),%xmm4 + + movdqa %xmm14,%xmm8 + movdqa %xmm12,%xmm6 + pxor 48-128(%rax),%xmm2 + paddd %xmm15,%xmm13 + pslld $5,%xmm8 + pxor %xmm10,%xmm6 + + movdqa %xmm14,%xmm9 + movdqa %xmm1,160-128(%rax) + paddd %xmm1,%xmm13 + pxor %xmm4,%xmm2 + psrld $27,%xmm9 + pxor %xmm11,%xmm6 + movdqa %xmm10,%xmm7 + + pslld $30,%xmm7 + movdqa %xmm2,%xmm5 + por %xmm9,%xmm8 + psrld $31,%xmm5 + paddd %xmm6,%xmm13 + paddd %xmm2,%xmm2 + + psrld $2,%xmm10 + paddd %xmm8,%xmm13 + por %xmm5,%xmm2 + por %xmm7,%xmm10 + pxor %xmm0,%xmm3 + movdqa 224-128(%rax),%xmm0 + + movdqa %xmm13,%xmm8 + movdqa %xmm11,%xmm6 + pxor 64-128(%rax),%xmm3 + paddd %xmm15,%xmm12 + pslld $5,%xmm8 + pxor %xmm14,%xmm6 + + movdqa %xmm13,%xmm9 + movdqa %xmm2,176-128(%rax) + paddd %xmm2,%xmm12 + pxor %xmm0,%xmm3 + psrld $27,%xmm9 + pxor %xmm10,%xmm6 + movdqa %xmm14,%xmm7 + + pslld $30,%xmm7 + movdqa %xmm3,%xmm5 + por %xmm9,%xmm8 + psrld $31,%xmm5 + paddd %xmm6,%xmm12 + paddd %xmm3,%xmm3 + + psrld $2,%xmm14 + paddd %xmm8,%xmm12 + por %xmm5,%xmm3 + por %xmm7,%xmm14 + pxor %xmm1,%xmm4 + movdqa 240-128(%rax),%xmm1 + + movdqa %xmm12,%xmm8 + movdqa %xmm10,%xmm6 + pxor 80-128(%rax),%xmm4 + paddd %xmm15,%xmm11 + pslld $5,%xmm8 + pxor %xmm13,%xmm6 + + movdqa %xmm12,%xmm9 + movdqa %xmm3,192-128(%rax) + paddd %xmm3,%xmm11 + pxor %xmm1,%xmm4 + psrld $27,%xmm9 + pxor %xmm14,%xmm6 + movdqa %xmm13,%xmm7 + + pslld $30,%xmm7 + movdqa %xmm4,%xmm5 + por %xmm9,%xmm8 + psrld $31,%xmm5 + paddd %xmm6,%xmm11 + paddd %xmm4,%xmm4 + + psrld $2,%xmm13 + paddd %xmm8,%xmm11 + por %xmm5,%xmm4 + por %xmm7,%xmm13 + pxor %xmm2,%xmm0 + movdqa 0-128(%rax),%xmm2 + + movdqa %xmm11,%xmm8 + movdqa %xmm14,%xmm6 + pxor 96-128(%rax),%xmm0 + paddd %xmm15,%xmm10 + pslld $5,%xmm8 + pxor %xmm12,%xmm6 + + movdqa %xmm11,%xmm9 + movdqa %xmm4,208-128(%rax) + paddd %xmm4,%xmm10 + pxor %xmm2,%xmm0 + psrld $27,%xmm9 + pxor %xmm13,%xmm6 + movdqa %xmm12,%xmm7 + + pslld $30,%xmm7 + movdqa %xmm0,%xmm5 + por %xmm9,%xmm8 + psrld $31,%xmm5 + paddd %xmm6,%xmm10 + paddd %xmm0,%xmm0 + + psrld $2,%xmm12 + paddd %xmm8,%xmm10 + por %xmm5,%xmm0 + por %xmm7,%xmm12 + pxor %xmm3,%xmm1 + movdqa 16-128(%rax),%xmm3 + + movdqa %xmm10,%xmm8 + movdqa %xmm13,%xmm6 + pxor 112-128(%rax),%xmm1 + paddd %xmm15,%xmm14 + pslld $5,%xmm8 + pxor %xmm11,%xmm6 + + movdqa %xmm10,%xmm9 + movdqa %xmm0,224-128(%rax) + paddd %xmm0,%xmm14 + pxor %xmm3,%xmm1 + psrld $27,%xmm9 + pxor %xmm12,%xmm6 + movdqa %xmm11,%xmm7 + + pslld $30,%xmm7 + movdqa %xmm1,%xmm5 + por %xmm9,%xmm8 + psrld $31,%xmm5 + paddd %xmm6,%xmm14 + paddd %xmm1,%xmm1 + + psrld $2,%xmm11 + paddd %xmm8,%xmm14 + por %xmm5,%xmm1 + por %xmm7,%xmm11 + pxor %xmm4,%xmm2 + movdqa 32-128(%rax),%xmm4 + + movdqa %xmm14,%xmm8 + movdqa %xmm12,%xmm6 + pxor 128-128(%rax),%xmm2 + paddd %xmm15,%xmm13 + pslld $5,%xmm8 + pxor %xmm10,%xmm6 + + movdqa %xmm14,%xmm9 + movdqa %xmm1,240-128(%rax) + paddd %xmm1,%xmm13 + pxor %xmm4,%xmm2 + psrld $27,%xmm9 + pxor %xmm11,%xmm6 + movdqa %xmm10,%xmm7 + + pslld $30,%xmm7 + movdqa %xmm2,%xmm5 + por %xmm9,%xmm8 + psrld $31,%xmm5 + paddd %xmm6,%xmm13 + paddd %xmm2,%xmm2 + + psrld $2,%xmm10 + paddd %xmm8,%xmm13 + por %xmm5,%xmm2 + por %xmm7,%xmm10 + pxor %xmm0,%xmm3 + movdqa 48-128(%rax),%xmm0 + + movdqa %xmm13,%xmm8 + movdqa %xmm11,%xmm6 + pxor 144-128(%rax),%xmm3 + paddd %xmm15,%xmm12 + pslld $5,%xmm8 + pxor %xmm14,%xmm6 + + movdqa %xmm13,%xmm9 + movdqa %xmm2,0-128(%rax) + paddd %xmm2,%xmm12 + pxor %xmm0,%xmm3 + psrld $27,%xmm9 + pxor %xmm10,%xmm6 + movdqa %xmm14,%xmm7 + + pslld $30,%xmm7 + movdqa %xmm3,%xmm5 + por %xmm9,%xmm8 + psrld $31,%xmm5 + paddd %xmm6,%xmm12 + paddd %xmm3,%xmm3 + + psrld $2,%xmm14 + paddd %xmm8,%xmm12 + por %xmm5,%xmm3 + por %xmm7,%xmm14 + pxor %xmm1,%xmm4 + movdqa 64-128(%rax),%xmm1 + + movdqa %xmm12,%xmm8 + movdqa %xmm10,%xmm6 + pxor 160-128(%rax),%xmm4 + paddd %xmm15,%xmm11 + pslld $5,%xmm8 + pxor %xmm13,%xmm6 + + movdqa %xmm12,%xmm9 + movdqa %xmm3,16-128(%rax) + paddd %xmm3,%xmm11 + pxor %xmm1,%xmm4 + psrld $27,%xmm9 + pxor %xmm14,%xmm6 + movdqa %xmm13,%xmm7 + + pslld $30,%xmm7 + movdqa %xmm4,%xmm5 + por %xmm9,%xmm8 + psrld $31,%xmm5 + paddd %xmm6,%xmm11 + paddd %xmm4,%xmm4 + + psrld $2,%xmm13 + paddd %xmm8,%xmm11 + por %xmm5,%xmm4 + por %xmm7,%xmm13 + pxor %xmm2,%xmm0 + movdqa 80-128(%rax),%xmm2 + + movdqa %xmm11,%xmm8 + movdqa %xmm14,%xmm6 + pxor 176-128(%rax),%xmm0 + paddd %xmm15,%xmm10 + pslld $5,%xmm8 + pxor %xmm12,%xmm6 + + movdqa %xmm11,%xmm9 + movdqa %xmm4,32-128(%rax) + paddd %xmm4,%xmm10 + pxor %xmm2,%xmm0 + psrld $27,%xmm9 + pxor %xmm13,%xmm6 + movdqa %xmm12,%xmm7 + + pslld $30,%xmm7 + movdqa %xmm0,%xmm5 + por %xmm9,%xmm8 + psrld $31,%xmm5 + paddd %xmm6,%xmm10 + paddd %xmm0,%xmm0 + + psrld $2,%xmm12 + paddd %xmm8,%xmm10 + por %xmm5,%xmm0 + por %xmm7,%xmm12 + pxor %xmm3,%xmm1 + movdqa 96-128(%rax),%xmm3 + + movdqa %xmm10,%xmm8 + movdqa %xmm13,%xmm6 + pxor 192-128(%rax),%xmm1 + paddd %xmm15,%xmm14 + pslld $5,%xmm8 + pxor %xmm11,%xmm6 + + movdqa %xmm10,%xmm9 + movdqa %xmm0,48-128(%rax) + paddd %xmm0,%xmm14 + pxor %xmm3,%xmm1 + psrld $27,%xmm9 + pxor %xmm12,%xmm6 + movdqa %xmm11,%xmm7 + + pslld $30,%xmm7 + movdqa %xmm1,%xmm5 + por %xmm9,%xmm8 + psrld $31,%xmm5 + paddd %xmm6,%xmm14 + paddd %xmm1,%xmm1 + + psrld $2,%xmm11 + paddd %xmm8,%xmm14 + por %xmm5,%xmm1 + por %xmm7,%xmm11 + pxor %xmm4,%xmm2 + movdqa 112-128(%rax),%xmm4 + + movdqa %xmm14,%xmm8 + movdqa %xmm12,%xmm6 + pxor 208-128(%rax),%xmm2 + paddd %xmm15,%xmm13 + pslld $5,%xmm8 + pxor %xmm10,%xmm6 + + movdqa %xmm14,%xmm9 + movdqa %xmm1,64-128(%rax) + paddd %xmm1,%xmm13 + pxor %xmm4,%xmm2 + psrld $27,%xmm9 + pxor %xmm11,%xmm6 + movdqa %xmm10,%xmm7 + + pslld $30,%xmm7 + movdqa %xmm2,%xmm5 + por %xmm9,%xmm8 + psrld $31,%xmm5 + paddd %xmm6,%xmm13 + paddd %xmm2,%xmm2 + + psrld $2,%xmm10 + paddd %xmm8,%xmm13 + por %xmm5,%xmm2 + por %xmm7,%xmm10 + pxor %xmm0,%xmm3 + movdqa 128-128(%rax),%xmm0 + + movdqa %xmm13,%xmm8 + movdqa %xmm11,%xmm6 + pxor 224-128(%rax),%xmm3 + paddd %xmm15,%xmm12 + pslld $5,%xmm8 + pxor %xmm14,%xmm6 + + movdqa %xmm13,%xmm9 + movdqa %xmm2,80-128(%rax) + paddd %xmm2,%xmm12 + pxor %xmm0,%xmm3 + psrld $27,%xmm9 + pxor %xmm10,%xmm6 + movdqa %xmm14,%xmm7 + + pslld $30,%xmm7 + movdqa %xmm3,%xmm5 + por %xmm9,%xmm8 + psrld $31,%xmm5 + paddd %xmm6,%xmm12 + paddd %xmm3,%xmm3 + + psrld $2,%xmm14 + paddd %xmm8,%xmm12 + por %xmm5,%xmm3 + por %xmm7,%xmm14 + pxor %xmm1,%xmm4 + movdqa 144-128(%rax),%xmm1 + + movdqa %xmm12,%xmm8 + movdqa %xmm10,%xmm6 + pxor 240-128(%rax),%xmm4 + paddd %xmm15,%xmm11 + pslld $5,%xmm8 + pxor %xmm13,%xmm6 + + movdqa %xmm12,%xmm9 + movdqa %xmm3,96-128(%rax) + paddd %xmm3,%xmm11 + pxor %xmm1,%xmm4 + psrld $27,%xmm9 + pxor %xmm14,%xmm6 + movdqa %xmm13,%xmm7 + + pslld $30,%xmm7 + movdqa %xmm4,%xmm5 + por %xmm9,%xmm8 + psrld $31,%xmm5 + paddd %xmm6,%xmm11 + paddd %xmm4,%xmm4 + + psrld $2,%xmm13 + paddd %xmm8,%xmm11 + por %xmm5,%xmm4 + por %xmm7,%xmm13 + pxor %xmm2,%xmm0 + movdqa 160-128(%rax),%xmm2 + + movdqa %xmm11,%xmm8 + movdqa %xmm14,%xmm6 + pxor 0-128(%rax),%xmm0 + paddd %xmm15,%xmm10 + pslld $5,%xmm8 + pxor %xmm12,%xmm6 + + movdqa %xmm11,%xmm9 + movdqa %xmm4,112-128(%rax) + paddd %xmm4,%xmm10 + pxor %xmm2,%xmm0 + psrld $27,%xmm9 + pxor %xmm13,%xmm6 + movdqa %xmm12,%xmm7 + + pslld $30,%xmm7 + movdqa %xmm0,%xmm5 + por %xmm9,%xmm8 + psrld $31,%xmm5 + paddd %xmm6,%xmm10 + paddd %xmm0,%xmm0 + + psrld $2,%xmm12 + paddd %xmm8,%xmm10 + por %xmm5,%xmm0 + por %xmm7,%xmm12 + movdqa 32(%rbp),%xmm15 + pxor %xmm3,%xmm1 + movdqa 176-128(%rax),%xmm3 + + movdqa %xmm10,%xmm8 + movdqa %xmm13,%xmm7 + pxor 16-128(%rax),%xmm1 + pxor %xmm3,%xmm1 + paddd %xmm15,%xmm14 + pslld $5,%xmm8 + movdqa %xmm10,%xmm9 + pand %xmm12,%xmm7 + + movdqa %xmm13,%xmm6 + movdqa %xmm1,%xmm5 + psrld $27,%xmm9 + paddd %xmm7,%xmm14 + pxor %xmm12,%xmm6 + + movdqa %xmm0,128-128(%rax) + paddd %xmm0,%xmm14 + por %xmm9,%xmm8 + psrld $31,%xmm5 + pand %xmm11,%xmm6 + movdqa %xmm11,%xmm7 + + pslld $30,%xmm7 + paddd %xmm1,%xmm1 + paddd %xmm6,%xmm14 + + psrld $2,%xmm11 + paddd %xmm8,%xmm14 + por %xmm5,%xmm1 + por %xmm7,%xmm11 + pxor %xmm4,%xmm2 + movdqa 192-128(%rax),%xmm4 + + movdqa %xmm14,%xmm8 + movdqa %xmm12,%xmm7 + pxor 32-128(%rax),%xmm2 + pxor %xmm4,%xmm2 + paddd %xmm15,%xmm13 + pslld $5,%xmm8 + movdqa %xmm14,%xmm9 + pand %xmm11,%xmm7 + + movdqa %xmm12,%xmm6 + movdqa %xmm2,%xmm5 + psrld $27,%xmm9 + paddd %xmm7,%xmm13 + pxor %xmm11,%xmm6 + + movdqa %xmm1,144-128(%rax) + paddd %xmm1,%xmm13 + por %xmm9,%xmm8 + psrld $31,%xmm5 + pand %xmm10,%xmm6 + movdqa %xmm10,%xmm7 + + pslld $30,%xmm7 + paddd %xmm2,%xmm2 + paddd %xmm6,%xmm13 + + psrld $2,%xmm10 + paddd %xmm8,%xmm13 + por %xmm5,%xmm2 + por %xmm7,%xmm10 + pxor %xmm0,%xmm3 + movdqa 208-128(%rax),%xmm0 + + movdqa %xmm13,%xmm8 + movdqa %xmm11,%xmm7 + pxor 48-128(%rax),%xmm3 + pxor %xmm0,%xmm3 + paddd %xmm15,%xmm12 + pslld $5,%xmm8 + movdqa %xmm13,%xmm9 + pand %xmm10,%xmm7 + + movdqa %xmm11,%xmm6 + movdqa %xmm3,%xmm5 + psrld $27,%xmm9 + paddd %xmm7,%xmm12 + pxor %xmm10,%xmm6 + + movdqa %xmm2,160-128(%rax) + paddd %xmm2,%xmm12 + por %xmm9,%xmm8 + psrld $31,%xmm5 + pand %xmm14,%xmm6 + movdqa %xmm14,%xmm7 + + pslld $30,%xmm7 + paddd %xmm3,%xmm3 + paddd %xmm6,%xmm12 + + psrld $2,%xmm14 + paddd %xmm8,%xmm12 + por %xmm5,%xmm3 + por %xmm7,%xmm14 + pxor %xmm1,%xmm4 + movdqa 224-128(%rax),%xmm1 + + movdqa %xmm12,%xmm8 + movdqa %xmm10,%xmm7 + pxor 64-128(%rax),%xmm4 + pxor %xmm1,%xmm4 + paddd %xmm15,%xmm11 + pslld $5,%xmm8 + movdqa %xmm12,%xmm9 + pand %xmm14,%xmm7 + + movdqa %xmm10,%xmm6 + movdqa %xmm4,%xmm5 + psrld $27,%xmm9 + paddd %xmm7,%xmm11 + pxor %xmm14,%xmm6 + + movdqa %xmm3,176-128(%rax) + paddd %xmm3,%xmm11 + por %xmm9,%xmm8 + psrld $31,%xmm5 + pand %xmm13,%xmm6 + movdqa %xmm13,%xmm7 + + pslld $30,%xmm7 + paddd %xmm4,%xmm4 + paddd %xmm6,%xmm11 + + psrld $2,%xmm13 + paddd %xmm8,%xmm11 + por %xmm5,%xmm4 + por %xmm7,%xmm13 + pxor %xmm2,%xmm0 + movdqa 240-128(%rax),%xmm2 + + movdqa %xmm11,%xmm8 + movdqa %xmm14,%xmm7 + pxor 80-128(%rax),%xmm0 + pxor %xmm2,%xmm0 + paddd %xmm15,%xmm10 + pslld $5,%xmm8 + movdqa %xmm11,%xmm9 + pand %xmm13,%xmm7 + + movdqa %xmm14,%xmm6 + movdqa %xmm0,%xmm5 + psrld $27,%xmm9 + paddd %xmm7,%xmm10 + pxor %xmm13,%xmm6 + + movdqa %xmm4,192-128(%rax) + paddd %xmm4,%xmm10 + por %xmm9,%xmm8 + psrld $31,%xmm5 + pand %xmm12,%xmm6 + movdqa %xmm12,%xmm7 + + pslld $30,%xmm7 + paddd %xmm0,%xmm0 + paddd %xmm6,%xmm10 + + psrld $2,%xmm12 + paddd %xmm8,%xmm10 + por %xmm5,%xmm0 + por %xmm7,%xmm12 + pxor %xmm3,%xmm1 + movdqa 0-128(%rax),%xmm3 + + movdqa %xmm10,%xmm8 + movdqa %xmm13,%xmm7 + pxor 96-128(%rax),%xmm1 + pxor %xmm3,%xmm1 + paddd %xmm15,%xmm14 + pslld $5,%xmm8 + movdqa %xmm10,%xmm9 + pand %xmm12,%xmm7 + + movdqa %xmm13,%xmm6 + movdqa %xmm1,%xmm5 + psrld $27,%xmm9 + paddd %xmm7,%xmm14 + pxor %xmm12,%xmm6 + + movdqa %xmm0,208-128(%rax) + paddd %xmm0,%xmm14 + por %xmm9,%xmm8 + psrld $31,%xmm5 + pand %xmm11,%xmm6 + movdqa %xmm11,%xmm7 + + pslld $30,%xmm7 + paddd %xmm1,%xmm1 + paddd %xmm6,%xmm14 + + psrld $2,%xmm11 + paddd %xmm8,%xmm14 + por %xmm5,%xmm1 + por %xmm7,%xmm11 + pxor %xmm4,%xmm2 + movdqa 16-128(%rax),%xmm4 + + movdqa %xmm14,%xmm8 + movdqa %xmm12,%xmm7 + pxor 112-128(%rax),%xmm2 + pxor %xmm4,%xmm2 + paddd %xmm15,%xmm13 + pslld $5,%xmm8 + movdqa %xmm14,%xmm9 + pand %xmm11,%xmm7 + + movdqa %xmm12,%xmm6 + movdqa %xmm2,%xmm5 + psrld $27,%xmm9 + paddd %xmm7,%xmm13 + pxor %xmm11,%xmm6 + + movdqa %xmm1,224-128(%rax) + paddd %xmm1,%xmm13 + por %xmm9,%xmm8 + psrld $31,%xmm5 + pand %xmm10,%xmm6 + movdqa %xmm10,%xmm7 + + pslld $30,%xmm7 + paddd %xmm2,%xmm2 + paddd %xmm6,%xmm13 + + psrld $2,%xmm10 + paddd %xmm8,%xmm13 + por %xmm5,%xmm2 + por %xmm7,%xmm10 + pxor %xmm0,%xmm3 + movdqa 32-128(%rax),%xmm0 + + movdqa %xmm13,%xmm8 + movdqa %xmm11,%xmm7 + pxor 128-128(%rax),%xmm3 + pxor %xmm0,%xmm3 + paddd %xmm15,%xmm12 + pslld $5,%xmm8 + movdqa %xmm13,%xmm9 + pand %xmm10,%xmm7 + + movdqa %xmm11,%xmm6 + movdqa %xmm3,%xmm5 + psrld $27,%xmm9 + paddd %xmm7,%xmm12 + pxor %xmm10,%xmm6 + + movdqa %xmm2,240-128(%rax) + paddd %xmm2,%xmm12 + por %xmm9,%xmm8 + psrld $31,%xmm5 + pand %xmm14,%xmm6 + movdqa %xmm14,%xmm7 + + pslld $30,%xmm7 + paddd %xmm3,%xmm3 + paddd %xmm6,%xmm12 + + psrld $2,%xmm14 + paddd %xmm8,%xmm12 + por %xmm5,%xmm3 + por %xmm7,%xmm14 + pxor %xmm1,%xmm4 + movdqa 48-128(%rax),%xmm1 + + movdqa %xmm12,%xmm8 + movdqa %xmm10,%xmm7 + pxor 144-128(%rax),%xmm4 + pxor %xmm1,%xmm4 + paddd %xmm15,%xmm11 + pslld $5,%xmm8 + movdqa %xmm12,%xmm9 + pand %xmm14,%xmm7 + + movdqa %xmm10,%xmm6 + movdqa %xmm4,%xmm5 + psrld $27,%xmm9 + paddd %xmm7,%xmm11 + pxor %xmm14,%xmm6 + + movdqa %xmm3,0-128(%rax) + paddd %xmm3,%xmm11 + por %xmm9,%xmm8 + psrld $31,%xmm5 + pand %xmm13,%xmm6 + movdqa %xmm13,%xmm7 + + pslld $30,%xmm7 + paddd %xmm4,%xmm4 + paddd %xmm6,%xmm11 + + psrld $2,%xmm13 + paddd %xmm8,%xmm11 + por %xmm5,%xmm4 + por %xmm7,%xmm13 + pxor %xmm2,%xmm0 + movdqa 64-128(%rax),%xmm2 + + movdqa %xmm11,%xmm8 + movdqa %xmm14,%xmm7 + pxor 160-128(%rax),%xmm0 + pxor %xmm2,%xmm0 + paddd %xmm15,%xmm10 + pslld $5,%xmm8 + movdqa %xmm11,%xmm9 + pand %xmm13,%xmm7 + + movdqa %xmm14,%xmm6 + movdqa %xmm0,%xmm5 + psrld $27,%xmm9 + paddd %xmm7,%xmm10 + pxor %xmm13,%xmm6 + + movdqa %xmm4,16-128(%rax) + paddd %xmm4,%xmm10 + por %xmm9,%xmm8 + psrld $31,%xmm5 + pand %xmm12,%xmm6 + movdqa %xmm12,%xmm7 + + pslld $30,%xmm7 + paddd %xmm0,%xmm0 + paddd %xmm6,%xmm10 + + psrld $2,%xmm12 + paddd %xmm8,%xmm10 + por %xmm5,%xmm0 + por %xmm7,%xmm12 + pxor %xmm3,%xmm1 + movdqa 80-128(%rax),%xmm3 + + movdqa %xmm10,%xmm8 + movdqa %xmm13,%xmm7 + pxor 176-128(%rax),%xmm1 + pxor %xmm3,%xmm1 + paddd %xmm15,%xmm14 + pslld $5,%xmm8 + movdqa %xmm10,%xmm9 + pand %xmm12,%xmm7 + + movdqa %xmm13,%xmm6 + movdqa %xmm1,%xmm5 + psrld $27,%xmm9 + paddd %xmm7,%xmm14 + pxor %xmm12,%xmm6 + + movdqa %xmm0,32-128(%rax) + paddd %xmm0,%xmm14 + por %xmm9,%xmm8 + psrld $31,%xmm5 + pand %xmm11,%xmm6 + movdqa %xmm11,%xmm7 + + pslld $30,%xmm7 + paddd %xmm1,%xmm1 + paddd %xmm6,%xmm14 + + psrld $2,%xmm11 + paddd %xmm8,%xmm14 + por %xmm5,%xmm1 + por %xmm7,%xmm11 + pxor %xmm4,%xmm2 + movdqa 96-128(%rax),%xmm4 + + movdqa %xmm14,%xmm8 + movdqa %xmm12,%xmm7 + pxor 192-128(%rax),%xmm2 + pxor %xmm4,%xmm2 + paddd %xmm15,%xmm13 + pslld $5,%xmm8 + movdqa %xmm14,%xmm9 + pand %xmm11,%xmm7 + + movdqa %xmm12,%xmm6 + movdqa %xmm2,%xmm5 + psrld $27,%xmm9 + paddd %xmm7,%xmm13 + pxor %xmm11,%xmm6 + + movdqa %xmm1,48-128(%rax) + paddd %xmm1,%xmm13 + por %xmm9,%xmm8 + psrld $31,%xmm5 + pand %xmm10,%xmm6 + movdqa %xmm10,%xmm7 + + pslld $30,%xmm7 + paddd %xmm2,%xmm2 + paddd %xmm6,%xmm13 + + psrld $2,%xmm10 + paddd %xmm8,%xmm13 + por %xmm5,%xmm2 + por %xmm7,%xmm10 + pxor %xmm0,%xmm3 + movdqa 112-128(%rax),%xmm0 + + movdqa %xmm13,%xmm8 + movdqa %xmm11,%xmm7 + pxor 208-128(%rax),%xmm3 + pxor %xmm0,%xmm3 + paddd %xmm15,%xmm12 + pslld $5,%xmm8 + movdqa %xmm13,%xmm9 + pand %xmm10,%xmm7 + + movdqa %xmm11,%xmm6 + movdqa %xmm3,%xmm5 + psrld $27,%xmm9 + paddd %xmm7,%xmm12 + pxor %xmm10,%xmm6 + + movdqa %xmm2,64-128(%rax) + paddd %xmm2,%xmm12 + por %xmm9,%xmm8 + psrld $31,%xmm5 + pand %xmm14,%xmm6 + movdqa %xmm14,%xmm7 + + pslld $30,%xmm7 + paddd %xmm3,%xmm3 + paddd %xmm6,%xmm12 + + psrld $2,%xmm14 + paddd %xmm8,%xmm12 + por %xmm5,%xmm3 + por %xmm7,%xmm14 + pxor %xmm1,%xmm4 + movdqa 128-128(%rax),%xmm1 + + movdqa %xmm12,%xmm8 + movdqa %xmm10,%xmm7 + pxor 224-128(%rax),%xmm4 + pxor %xmm1,%xmm4 + paddd %xmm15,%xmm11 + pslld $5,%xmm8 + movdqa %xmm12,%xmm9 + pand %xmm14,%xmm7 + + movdqa %xmm10,%xmm6 + movdqa %xmm4,%xmm5 + psrld $27,%xmm9 + paddd %xmm7,%xmm11 + pxor %xmm14,%xmm6 + + movdqa %xmm3,80-128(%rax) + paddd %xmm3,%xmm11 + por %xmm9,%xmm8 + psrld $31,%xmm5 + pand %xmm13,%xmm6 + movdqa %xmm13,%xmm7 + + pslld $30,%xmm7 + paddd %xmm4,%xmm4 + paddd %xmm6,%xmm11 + + psrld $2,%xmm13 + paddd %xmm8,%xmm11 + por %xmm5,%xmm4 + por %xmm7,%xmm13 + pxor %xmm2,%xmm0 + movdqa 144-128(%rax),%xmm2 + + movdqa %xmm11,%xmm8 + movdqa %xmm14,%xmm7 + pxor 240-128(%rax),%xmm0 + pxor %xmm2,%xmm0 + paddd %xmm15,%xmm10 + pslld $5,%xmm8 + movdqa %xmm11,%xmm9 + pand %xmm13,%xmm7 + + movdqa %xmm14,%xmm6 + movdqa %xmm0,%xmm5 + psrld $27,%xmm9 + paddd %xmm7,%xmm10 + pxor %xmm13,%xmm6 + + movdqa %xmm4,96-128(%rax) + paddd %xmm4,%xmm10 + por %xmm9,%xmm8 + psrld $31,%xmm5 + pand %xmm12,%xmm6 + movdqa %xmm12,%xmm7 + + pslld $30,%xmm7 + paddd %xmm0,%xmm0 + paddd %xmm6,%xmm10 + + psrld $2,%xmm12 + paddd %xmm8,%xmm10 + por %xmm5,%xmm0 + por %xmm7,%xmm12 + pxor %xmm3,%xmm1 + movdqa 160-128(%rax),%xmm3 + + movdqa %xmm10,%xmm8 + movdqa %xmm13,%xmm7 + pxor 0-128(%rax),%xmm1 + pxor %xmm3,%xmm1 + paddd %xmm15,%xmm14 + pslld $5,%xmm8 + movdqa %xmm10,%xmm9 + pand %xmm12,%xmm7 + + movdqa %xmm13,%xmm6 + movdqa %xmm1,%xmm5 + psrld $27,%xmm9 + paddd %xmm7,%xmm14 + pxor %xmm12,%xmm6 + + movdqa %xmm0,112-128(%rax) + paddd %xmm0,%xmm14 + por %xmm9,%xmm8 + psrld $31,%xmm5 + pand %xmm11,%xmm6 + movdqa %xmm11,%xmm7 + + pslld $30,%xmm7 + paddd %xmm1,%xmm1 + paddd %xmm6,%xmm14 + + psrld $2,%xmm11 + paddd %xmm8,%xmm14 + por %xmm5,%xmm1 + por %xmm7,%xmm11 + pxor %xmm4,%xmm2 + movdqa 176-128(%rax),%xmm4 + + movdqa %xmm14,%xmm8 + movdqa %xmm12,%xmm7 + pxor 16-128(%rax),%xmm2 + pxor %xmm4,%xmm2 + paddd %xmm15,%xmm13 + pslld $5,%xmm8 + movdqa %xmm14,%xmm9 + pand %xmm11,%xmm7 + + movdqa %xmm12,%xmm6 + movdqa %xmm2,%xmm5 + psrld $27,%xmm9 + paddd %xmm7,%xmm13 + pxor %xmm11,%xmm6 + + movdqa %xmm1,128-128(%rax) + paddd %xmm1,%xmm13 + por %xmm9,%xmm8 + psrld $31,%xmm5 + pand %xmm10,%xmm6 + movdqa %xmm10,%xmm7 + + pslld $30,%xmm7 + paddd %xmm2,%xmm2 + paddd %xmm6,%xmm13 + + psrld $2,%xmm10 + paddd %xmm8,%xmm13 + por %xmm5,%xmm2 + por %xmm7,%xmm10 + pxor %xmm0,%xmm3 + movdqa 192-128(%rax),%xmm0 + + movdqa %xmm13,%xmm8 + movdqa %xmm11,%xmm7 + pxor 32-128(%rax),%xmm3 + pxor %xmm0,%xmm3 + paddd %xmm15,%xmm12 + pslld $5,%xmm8 + movdqa %xmm13,%xmm9 + pand %xmm10,%xmm7 + + movdqa %xmm11,%xmm6 + movdqa %xmm3,%xmm5 + psrld $27,%xmm9 + paddd %xmm7,%xmm12 + pxor %xmm10,%xmm6 + + movdqa %xmm2,144-128(%rax) + paddd %xmm2,%xmm12 + por %xmm9,%xmm8 + psrld $31,%xmm5 + pand %xmm14,%xmm6 + movdqa %xmm14,%xmm7 + + pslld $30,%xmm7 + paddd %xmm3,%xmm3 + paddd %xmm6,%xmm12 + + psrld $2,%xmm14 + paddd %xmm8,%xmm12 + por %xmm5,%xmm3 + por %xmm7,%xmm14 + pxor %xmm1,%xmm4 + movdqa 208-128(%rax),%xmm1 + + movdqa %xmm12,%xmm8 + movdqa %xmm10,%xmm7 + pxor 48-128(%rax),%xmm4 + pxor %xmm1,%xmm4 + paddd %xmm15,%xmm11 + pslld $5,%xmm8 + movdqa %xmm12,%xmm9 + pand %xmm14,%xmm7 + + movdqa %xmm10,%xmm6 + movdqa %xmm4,%xmm5 + psrld $27,%xmm9 + paddd %xmm7,%xmm11 + pxor %xmm14,%xmm6 + + movdqa %xmm3,160-128(%rax) + paddd %xmm3,%xmm11 + por %xmm9,%xmm8 + psrld $31,%xmm5 + pand %xmm13,%xmm6 + movdqa %xmm13,%xmm7 + + pslld $30,%xmm7 + paddd %xmm4,%xmm4 + paddd %xmm6,%xmm11 + + psrld $2,%xmm13 + paddd %xmm8,%xmm11 + por %xmm5,%xmm4 + por %xmm7,%xmm13 + pxor %xmm2,%xmm0 + movdqa 224-128(%rax),%xmm2 + + movdqa %xmm11,%xmm8 + movdqa %xmm14,%xmm7 + pxor 64-128(%rax),%xmm0 + pxor %xmm2,%xmm0 + paddd %xmm15,%xmm10 + pslld $5,%xmm8 + movdqa %xmm11,%xmm9 + pand %xmm13,%xmm7 + + movdqa %xmm14,%xmm6 + movdqa %xmm0,%xmm5 + psrld $27,%xmm9 + paddd %xmm7,%xmm10 + pxor %xmm13,%xmm6 + + movdqa %xmm4,176-128(%rax) + paddd %xmm4,%xmm10 + por %xmm9,%xmm8 + psrld $31,%xmm5 + pand %xmm12,%xmm6 + movdqa %xmm12,%xmm7 + + pslld $30,%xmm7 + paddd %xmm0,%xmm0 + paddd %xmm6,%xmm10 + + psrld $2,%xmm12 + paddd %xmm8,%xmm10 + por %xmm5,%xmm0 + por %xmm7,%xmm12 + movdqa 64(%rbp),%xmm15 + pxor %xmm3,%xmm1 + movdqa 240-128(%rax),%xmm3 + + movdqa %xmm10,%xmm8 + movdqa %xmm13,%xmm6 + pxor 80-128(%rax),%xmm1 + paddd %xmm15,%xmm14 + pslld $5,%xmm8 + pxor %xmm11,%xmm6 + + movdqa %xmm10,%xmm9 + movdqa %xmm0,192-128(%rax) + paddd %xmm0,%xmm14 + pxor %xmm3,%xmm1 + psrld $27,%xmm9 + pxor %xmm12,%xmm6 + movdqa %xmm11,%xmm7 + + pslld $30,%xmm7 + movdqa %xmm1,%xmm5 + por %xmm9,%xmm8 + psrld $31,%xmm5 + paddd %xmm6,%xmm14 + paddd %xmm1,%xmm1 + + psrld $2,%xmm11 + paddd %xmm8,%xmm14 + por %xmm5,%xmm1 + por %xmm7,%xmm11 + pxor %xmm4,%xmm2 + movdqa 0-128(%rax),%xmm4 + + movdqa %xmm14,%xmm8 + movdqa %xmm12,%xmm6 + pxor 96-128(%rax),%xmm2 + paddd %xmm15,%xmm13 + pslld $5,%xmm8 + pxor %xmm10,%xmm6 + + movdqa %xmm14,%xmm9 + movdqa %xmm1,208-128(%rax) + paddd %xmm1,%xmm13 + pxor %xmm4,%xmm2 + psrld $27,%xmm9 + pxor %xmm11,%xmm6 + movdqa %xmm10,%xmm7 + + pslld $30,%xmm7 + movdqa %xmm2,%xmm5 + por %xmm9,%xmm8 + psrld $31,%xmm5 + paddd %xmm6,%xmm13 + paddd %xmm2,%xmm2 + + psrld $2,%xmm10 + paddd %xmm8,%xmm13 + por %xmm5,%xmm2 + por %xmm7,%xmm10 + pxor %xmm0,%xmm3 + movdqa 16-128(%rax),%xmm0 + + movdqa %xmm13,%xmm8 + movdqa %xmm11,%xmm6 + pxor 112-128(%rax),%xmm3 + paddd %xmm15,%xmm12 + pslld $5,%xmm8 + pxor %xmm14,%xmm6 + + movdqa %xmm13,%xmm9 + movdqa %xmm2,224-128(%rax) + paddd %xmm2,%xmm12 + pxor %xmm0,%xmm3 + psrld $27,%xmm9 + pxor %xmm10,%xmm6 + movdqa %xmm14,%xmm7 + + pslld $30,%xmm7 + movdqa %xmm3,%xmm5 + por %xmm9,%xmm8 + psrld $31,%xmm5 + paddd %xmm6,%xmm12 + paddd %xmm3,%xmm3 + + psrld $2,%xmm14 + paddd %xmm8,%xmm12 + por %xmm5,%xmm3 + por %xmm7,%xmm14 + pxor %xmm1,%xmm4 + movdqa 32-128(%rax),%xmm1 + + movdqa %xmm12,%xmm8 + movdqa %xmm10,%xmm6 + pxor 128-128(%rax),%xmm4 + paddd %xmm15,%xmm11 + pslld $5,%xmm8 + pxor %xmm13,%xmm6 + + movdqa %xmm12,%xmm9 + movdqa %xmm3,240-128(%rax) + paddd %xmm3,%xmm11 + pxor %xmm1,%xmm4 + psrld $27,%xmm9 + pxor %xmm14,%xmm6 + movdqa %xmm13,%xmm7 + + pslld $30,%xmm7 + movdqa %xmm4,%xmm5 + por %xmm9,%xmm8 + psrld $31,%xmm5 + paddd %xmm6,%xmm11 + paddd %xmm4,%xmm4 + + psrld $2,%xmm13 + paddd %xmm8,%xmm11 + por %xmm5,%xmm4 + por %xmm7,%xmm13 + pxor %xmm2,%xmm0 + movdqa 48-128(%rax),%xmm2 + + movdqa %xmm11,%xmm8 + movdqa %xmm14,%xmm6 + pxor 144-128(%rax),%xmm0 + paddd %xmm15,%xmm10 + pslld $5,%xmm8 + pxor %xmm12,%xmm6 + + movdqa %xmm11,%xmm9 + movdqa %xmm4,0-128(%rax) + paddd %xmm4,%xmm10 + pxor %xmm2,%xmm0 + psrld $27,%xmm9 + pxor %xmm13,%xmm6 + movdqa %xmm12,%xmm7 + + pslld $30,%xmm7 + movdqa %xmm0,%xmm5 + por %xmm9,%xmm8 + psrld $31,%xmm5 + paddd %xmm6,%xmm10 + paddd %xmm0,%xmm0 + + psrld $2,%xmm12 + paddd %xmm8,%xmm10 + por %xmm5,%xmm0 + por %xmm7,%xmm12 + pxor %xmm3,%xmm1 + movdqa 64-128(%rax),%xmm3 + + movdqa %xmm10,%xmm8 + movdqa %xmm13,%xmm6 + pxor 160-128(%rax),%xmm1 + paddd %xmm15,%xmm14 + pslld $5,%xmm8 + pxor %xmm11,%xmm6 + + movdqa %xmm10,%xmm9 + movdqa %xmm0,16-128(%rax) + paddd %xmm0,%xmm14 + pxor %xmm3,%xmm1 + psrld $27,%xmm9 + pxor %xmm12,%xmm6 + movdqa %xmm11,%xmm7 + + pslld $30,%xmm7 + movdqa %xmm1,%xmm5 + por %xmm9,%xmm8 + psrld $31,%xmm5 + paddd %xmm6,%xmm14 + paddd %xmm1,%xmm1 + + psrld $2,%xmm11 + paddd %xmm8,%xmm14 + por %xmm5,%xmm1 + por %xmm7,%xmm11 + pxor %xmm4,%xmm2 + movdqa 80-128(%rax),%xmm4 + + movdqa %xmm14,%xmm8 + movdqa %xmm12,%xmm6 + pxor 176-128(%rax),%xmm2 + paddd %xmm15,%xmm13 + pslld $5,%xmm8 + pxor %xmm10,%xmm6 + + movdqa %xmm14,%xmm9 + movdqa %xmm1,32-128(%rax) + paddd %xmm1,%xmm13 + pxor %xmm4,%xmm2 + psrld $27,%xmm9 + pxor %xmm11,%xmm6 + movdqa %xmm10,%xmm7 + + pslld $30,%xmm7 + movdqa %xmm2,%xmm5 + por %xmm9,%xmm8 + psrld $31,%xmm5 + paddd %xmm6,%xmm13 + paddd %xmm2,%xmm2 + + psrld $2,%xmm10 + paddd %xmm8,%xmm13 + por %xmm5,%xmm2 + por %xmm7,%xmm10 + pxor %xmm0,%xmm3 + movdqa 96-128(%rax),%xmm0 + + movdqa %xmm13,%xmm8 + movdqa %xmm11,%xmm6 + pxor 192-128(%rax),%xmm3 + paddd %xmm15,%xmm12 + pslld $5,%xmm8 + pxor %xmm14,%xmm6 + + movdqa %xmm13,%xmm9 + movdqa %xmm2,48-128(%rax) + paddd %xmm2,%xmm12 + pxor %xmm0,%xmm3 + psrld $27,%xmm9 + pxor %xmm10,%xmm6 + movdqa %xmm14,%xmm7 + + pslld $30,%xmm7 + movdqa %xmm3,%xmm5 + por %xmm9,%xmm8 + psrld $31,%xmm5 + paddd %xmm6,%xmm12 + paddd %xmm3,%xmm3 + + psrld $2,%xmm14 + paddd %xmm8,%xmm12 + por %xmm5,%xmm3 + por %xmm7,%xmm14 + pxor %xmm1,%xmm4 + movdqa 112-128(%rax),%xmm1 + + movdqa %xmm12,%xmm8 + movdqa %xmm10,%xmm6 + pxor 208-128(%rax),%xmm4 + paddd %xmm15,%xmm11 + pslld $5,%xmm8 + pxor %xmm13,%xmm6 + + movdqa %xmm12,%xmm9 + movdqa %xmm3,64-128(%rax) + paddd %xmm3,%xmm11 + pxor %xmm1,%xmm4 + psrld $27,%xmm9 + pxor %xmm14,%xmm6 + movdqa %xmm13,%xmm7 + + pslld $30,%xmm7 + movdqa %xmm4,%xmm5 + por %xmm9,%xmm8 + psrld $31,%xmm5 + paddd %xmm6,%xmm11 + paddd %xmm4,%xmm4 + + psrld $2,%xmm13 + paddd %xmm8,%xmm11 + por %xmm5,%xmm4 + por %xmm7,%xmm13 + pxor %xmm2,%xmm0 + movdqa 128-128(%rax),%xmm2 + + movdqa %xmm11,%xmm8 + movdqa %xmm14,%xmm6 + pxor 224-128(%rax),%xmm0 + paddd %xmm15,%xmm10 + pslld $5,%xmm8 + pxor %xmm12,%xmm6 + + movdqa %xmm11,%xmm9 + movdqa %xmm4,80-128(%rax) + paddd %xmm4,%xmm10 + pxor %xmm2,%xmm0 + psrld $27,%xmm9 + pxor %xmm13,%xmm6 + movdqa %xmm12,%xmm7 + + pslld $30,%xmm7 + movdqa %xmm0,%xmm5 + por %xmm9,%xmm8 + psrld $31,%xmm5 + paddd %xmm6,%xmm10 + paddd %xmm0,%xmm0 + + psrld $2,%xmm12 + paddd %xmm8,%xmm10 + por %xmm5,%xmm0 + por %xmm7,%xmm12 + pxor %xmm3,%xmm1 + movdqa 144-128(%rax),%xmm3 + + movdqa %xmm10,%xmm8 + movdqa %xmm13,%xmm6 + pxor 240-128(%rax),%xmm1 + paddd %xmm15,%xmm14 + pslld $5,%xmm8 + pxor %xmm11,%xmm6 + + movdqa %xmm10,%xmm9 + movdqa %xmm0,96-128(%rax) + paddd %xmm0,%xmm14 + pxor %xmm3,%xmm1 + psrld $27,%xmm9 + pxor %xmm12,%xmm6 + movdqa %xmm11,%xmm7 + + pslld $30,%xmm7 + movdqa %xmm1,%xmm5 + por %xmm9,%xmm8 + psrld $31,%xmm5 + paddd %xmm6,%xmm14 + paddd %xmm1,%xmm1 + + psrld $2,%xmm11 + paddd %xmm8,%xmm14 + por %xmm5,%xmm1 + por %xmm7,%xmm11 + pxor %xmm4,%xmm2 + movdqa 160-128(%rax),%xmm4 + + movdqa %xmm14,%xmm8 + movdqa %xmm12,%xmm6 + pxor 0-128(%rax),%xmm2 + paddd %xmm15,%xmm13 + pslld $5,%xmm8 + pxor %xmm10,%xmm6 + + movdqa %xmm14,%xmm9 + movdqa %xmm1,112-128(%rax) + paddd %xmm1,%xmm13 + pxor %xmm4,%xmm2 + psrld $27,%xmm9 + pxor %xmm11,%xmm6 + movdqa %xmm10,%xmm7 + + pslld $30,%xmm7 + movdqa %xmm2,%xmm5 + por %xmm9,%xmm8 + psrld $31,%xmm5 + paddd %xmm6,%xmm13 + paddd %xmm2,%xmm2 + + psrld $2,%xmm10 + paddd %xmm8,%xmm13 + por %xmm5,%xmm2 + por %xmm7,%xmm10 + pxor %xmm0,%xmm3 + movdqa 176-128(%rax),%xmm0 + + movdqa %xmm13,%xmm8 + movdqa %xmm11,%xmm6 + pxor 16-128(%rax),%xmm3 + paddd %xmm15,%xmm12 + pslld $5,%xmm8 + pxor %xmm14,%xmm6 + + movdqa %xmm13,%xmm9 + paddd %xmm2,%xmm12 + pxor %xmm0,%xmm3 + psrld $27,%xmm9 + pxor %xmm10,%xmm6 + movdqa %xmm14,%xmm7 + + pslld $30,%xmm7 + movdqa %xmm3,%xmm5 + por %xmm9,%xmm8 + psrld $31,%xmm5 + paddd %xmm6,%xmm12 + paddd %xmm3,%xmm3 + + psrld $2,%xmm14 + paddd %xmm8,%xmm12 + por %xmm5,%xmm3 + por %xmm7,%xmm14 + pxor %xmm1,%xmm4 + movdqa 192-128(%rax),%xmm1 + + movdqa %xmm12,%xmm8 + movdqa %xmm10,%xmm6 + pxor 32-128(%rax),%xmm4 + paddd %xmm15,%xmm11 + pslld $5,%xmm8 + pxor %xmm13,%xmm6 + + movdqa %xmm12,%xmm9 + paddd %xmm3,%xmm11 + pxor %xmm1,%xmm4 + psrld $27,%xmm9 + pxor %xmm14,%xmm6 + movdqa %xmm13,%xmm7 + + pslld $30,%xmm7 + movdqa %xmm4,%xmm5 + por %xmm9,%xmm8 + psrld $31,%xmm5 + paddd %xmm6,%xmm11 + paddd %xmm4,%xmm4 + + psrld $2,%xmm13 + paddd %xmm8,%xmm11 + por %xmm5,%xmm4 + por %xmm7,%xmm13 + pxor %xmm2,%xmm0 + movdqa 208-128(%rax),%xmm2 + + movdqa %xmm11,%xmm8 + movdqa %xmm14,%xmm6 + pxor 48-128(%rax),%xmm0 + paddd %xmm15,%xmm10 + pslld $5,%xmm8 + pxor %xmm12,%xmm6 + + movdqa %xmm11,%xmm9 + paddd %xmm4,%xmm10 + pxor %xmm2,%xmm0 + psrld $27,%xmm9 + pxor %xmm13,%xmm6 + movdqa %xmm12,%xmm7 + + pslld $30,%xmm7 + movdqa %xmm0,%xmm5 + por %xmm9,%xmm8 + psrld $31,%xmm5 + paddd %xmm6,%xmm10 + paddd %xmm0,%xmm0 + + psrld $2,%xmm12 + paddd %xmm8,%xmm10 + por %xmm5,%xmm0 + por %xmm7,%xmm12 + pxor %xmm3,%xmm1 + movdqa 224-128(%rax),%xmm3 + + movdqa %xmm10,%xmm8 + movdqa %xmm13,%xmm6 + pxor 64-128(%rax),%xmm1 + paddd %xmm15,%xmm14 + pslld $5,%xmm8 + pxor %xmm11,%xmm6 + + movdqa %xmm10,%xmm9 + paddd %xmm0,%xmm14 + pxor %xmm3,%xmm1 + psrld $27,%xmm9 + pxor %xmm12,%xmm6 + movdqa %xmm11,%xmm7 + + pslld $30,%xmm7 + movdqa %xmm1,%xmm5 + por %xmm9,%xmm8 + psrld $31,%xmm5 + paddd %xmm6,%xmm14 + paddd %xmm1,%xmm1 + + psrld $2,%xmm11 + paddd %xmm8,%xmm14 + por %xmm5,%xmm1 + por %xmm7,%xmm11 + pxor %xmm4,%xmm2 + movdqa 240-128(%rax),%xmm4 + + movdqa %xmm14,%xmm8 + movdqa %xmm12,%xmm6 + pxor 80-128(%rax),%xmm2 + paddd %xmm15,%xmm13 + pslld $5,%xmm8 + pxor %xmm10,%xmm6 + + movdqa %xmm14,%xmm9 + paddd %xmm1,%xmm13 + pxor %xmm4,%xmm2 + psrld $27,%xmm9 + pxor %xmm11,%xmm6 + movdqa %xmm10,%xmm7 + + pslld $30,%xmm7 + movdqa %xmm2,%xmm5 + por %xmm9,%xmm8 + psrld $31,%xmm5 + paddd %xmm6,%xmm13 + paddd %xmm2,%xmm2 + + psrld $2,%xmm10 + paddd %xmm8,%xmm13 + por %xmm5,%xmm2 + por %xmm7,%xmm10 + pxor %xmm0,%xmm3 + movdqa 0-128(%rax),%xmm0 + + movdqa %xmm13,%xmm8 + movdqa %xmm11,%xmm6 + pxor 96-128(%rax),%xmm3 + paddd %xmm15,%xmm12 + pslld $5,%xmm8 + pxor %xmm14,%xmm6 + + movdqa %xmm13,%xmm9 + paddd %xmm2,%xmm12 + pxor %xmm0,%xmm3 + psrld $27,%xmm9 + pxor %xmm10,%xmm6 + movdqa %xmm14,%xmm7 + + pslld $30,%xmm7 + movdqa %xmm3,%xmm5 + por %xmm9,%xmm8 + psrld $31,%xmm5 + paddd %xmm6,%xmm12 + paddd %xmm3,%xmm3 + + psrld $2,%xmm14 + paddd %xmm8,%xmm12 + por %xmm5,%xmm3 + por %xmm7,%xmm14 + pxor %xmm1,%xmm4 + movdqa 16-128(%rax),%xmm1 + + movdqa %xmm12,%xmm8 + movdqa %xmm10,%xmm6 + pxor 112-128(%rax),%xmm4 + paddd %xmm15,%xmm11 + pslld $5,%xmm8 + pxor %xmm13,%xmm6 + + movdqa %xmm12,%xmm9 + paddd %xmm3,%xmm11 + pxor %xmm1,%xmm4 + psrld $27,%xmm9 + pxor %xmm14,%xmm6 + movdqa %xmm13,%xmm7 + + pslld $30,%xmm7 + movdqa %xmm4,%xmm5 + por %xmm9,%xmm8 + psrld $31,%xmm5 + paddd %xmm6,%xmm11 + paddd %xmm4,%xmm4 + + psrld $2,%xmm13 + paddd %xmm8,%xmm11 + por %xmm5,%xmm4 + por %xmm7,%xmm13 + movdqa %xmm11,%xmm8 + paddd %xmm15,%xmm10 + movdqa %xmm14,%xmm6 + pslld $5,%xmm8 + pxor %xmm12,%xmm6 + + movdqa %xmm11,%xmm9 + paddd %xmm4,%xmm10 + psrld $27,%xmm9 + movdqa %xmm12,%xmm7 + pxor %xmm13,%xmm6 + + pslld $30,%xmm7 + por %xmm9,%xmm8 + paddd %xmm6,%xmm10 + + psrld $2,%xmm12 + paddd %xmm8,%xmm10 + por %xmm7,%xmm12 + movdqa (%rbx),%xmm0 + movl $1,%ecx + cmpl 0(%rbx),%ecx + pxor %xmm8,%xmm8 + cmovgeq %rbp,%r8 + cmpl 4(%rbx),%ecx + movdqa %xmm0,%xmm1 + cmovgeq %rbp,%r9 + cmpl 8(%rbx),%ecx + pcmpgtd %xmm8,%xmm1 + cmovgeq %rbp,%r10 + cmpl 12(%rbx),%ecx + paddd %xmm1,%xmm0 + cmovgeq %rbp,%r11 + + movdqu 0(%rdi),%xmm6 + pand %xmm1,%xmm10 + movdqu 32(%rdi),%xmm7 + pand %xmm1,%xmm11 + paddd %xmm6,%xmm10 + movdqu 64(%rdi),%xmm8 + pand %xmm1,%xmm12 + paddd %xmm7,%xmm11 + movdqu 96(%rdi),%xmm9 + pand %xmm1,%xmm13 + paddd %xmm8,%xmm12 + movdqu 128(%rdi),%xmm5 + pand %xmm1,%xmm14 + movdqu %xmm10,0(%rdi) + paddd %xmm9,%xmm13 + movdqu %xmm11,32(%rdi) + paddd %xmm5,%xmm14 + movdqu %xmm12,64(%rdi) + movdqu %xmm13,96(%rdi) + movdqu %xmm14,128(%rdi) + + movdqa %xmm0,(%rbx) + movdqa 96(%rbp),%xmm5 + movdqa -32(%rbp),%xmm15 + decl %edx + jnz L$oop + + movl 280(%rsp),%edx + leaq 16(%rdi),%rdi + leaq 64(%rsi),%rsi + decl %edx + jnz L$oop_grande + +L$done: + movq 272(%rsp),%rax + movq -16(%rax),%rbp + movq -8(%rax),%rbx + leaq (%rax),%rsp +L$epilogue: + .byte 0xf3,0xc3 + + +.p2align 5 +sha1_multi_block_shaext: +_shaext_shortcut: + movq %rsp,%rax + pushq %rbx + pushq %rbp + subq $288,%rsp + shll $1,%edx + andq $-256,%rsp + leaq 64(%rdi),%rdi + movq %rax,272(%rsp) +L$body_shaext: + leaq 256(%rsp),%rbx + movdqa K_XX_XX+128(%rip),%xmm3 + +L$oop_grande_shaext: + movl %edx,280(%rsp) + xorl %edx,%edx + movq 0(%rsi),%r8 + movl 8(%rsi),%ecx + cmpl %edx,%ecx + cmovgl %ecx,%edx + testl %ecx,%ecx + movl %ecx,0(%rbx) + cmovleq %rsp,%r8 + movq 16(%rsi),%r9 + movl 24(%rsi),%ecx + cmpl %edx,%ecx + cmovgl %ecx,%edx + testl %ecx,%ecx + movl %ecx,4(%rbx) + cmovleq %rsp,%r9 + testl %edx,%edx + jz L$done_shaext + + movq 0-64(%rdi),%xmm0 + movq 32-64(%rdi),%xmm4 + movq 64-64(%rdi),%xmm5 + movq 96-64(%rdi),%xmm6 + movq 128-64(%rdi),%xmm7 + + punpckldq %xmm4,%xmm0 + punpckldq %xmm6,%xmm5 + + movdqa %xmm0,%xmm8 + punpcklqdq %xmm5,%xmm0 + punpckhqdq %xmm5,%xmm8 + + pshufd $63,%xmm7,%xmm1 + pshufd $127,%xmm7,%xmm9 + pshufd $27,%xmm0,%xmm0 + pshufd $27,%xmm8,%xmm8 + jmp L$oop_shaext + +.p2align 5 +L$oop_shaext: + movdqu 0(%r8),%xmm4 + movdqu 0(%r9),%xmm11 + movdqu 16(%r8),%xmm5 + movdqu 16(%r9),%xmm12 + movdqu 32(%r8),%xmm6 +.byte 102,15,56,0,227 + movdqu 32(%r9),%xmm13 +.byte 102,68,15,56,0,219 + movdqu 48(%r8),%xmm7 + leaq 64(%r8),%r8 +.byte 102,15,56,0,235 + movdqu 48(%r9),%xmm14 + leaq 64(%r9),%r9 +.byte 102,68,15,56,0,227 + + movdqa %xmm1,80(%rsp) + paddd %xmm4,%xmm1 + movdqa %xmm9,112(%rsp) + paddd %xmm11,%xmm9 + movdqa %xmm0,64(%rsp) + movdqa %xmm0,%xmm2 + movdqa %xmm8,96(%rsp) + movdqa %xmm8,%xmm10 +.byte 15,58,204,193,0 +.byte 15,56,200,213 +.byte 69,15,58,204,193,0 +.byte 69,15,56,200,212 +.byte 102,15,56,0,243 + prefetcht0 127(%r8) +.byte 15,56,201,229 +.byte 102,68,15,56,0,235 + prefetcht0 127(%r9) +.byte 69,15,56,201,220 + +.byte 102,15,56,0,251 + movdqa %xmm0,%xmm1 +.byte 102,68,15,56,0,243 + movdqa %xmm8,%xmm9 +.byte 15,58,204,194,0 +.byte 15,56,200,206 +.byte 69,15,58,204,194,0 +.byte 69,15,56,200,205 + pxor %xmm6,%xmm4 +.byte 15,56,201,238 + pxor %xmm13,%xmm11 +.byte 69,15,56,201,229 + movdqa %xmm0,%xmm2 + movdqa %xmm8,%xmm10 +.byte 15,58,204,193,0 +.byte 15,56,200,215 +.byte 69,15,58,204,193,0 +.byte 69,15,56,200,214 +.byte 15,56,202,231 +.byte 69,15,56,202,222 + pxor %xmm7,%xmm5 +.byte 15,56,201,247 + pxor %xmm14,%xmm12 +.byte 69,15,56,201,238 + movdqa %xmm0,%xmm1 + movdqa %xmm8,%xmm9 +.byte 15,58,204,194,0 +.byte 15,56,200,204 +.byte 69,15,58,204,194,0 +.byte 69,15,56,200,203 +.byte 15,56,202,236 +.byte 69,15,56,202,227 + pxor %xmm4,%xmm6 +.byte 15,56,201,252 + pxor %xmm11,%xmm13 +.byte 69,15,56,201,243 + movdqa %xmm0,%xmm2 + movdqa %xmm8,%xmm10 +.byte 15,58,204,193,0 +.byte 15,56,200,213 +.byte 69,15,58,204,193,0 +.byte 69,15,56,200,212 +.byte 15,56,202,245 +.byte 69,15,56,202,236 + pxor %xmm5,%xmm7 +.byte 15,56,201,229 + pxor %xmm12,%xmm14 +.byte 69,15,56,201,220 + movdqa %xmm0,%xmm1 + movdqa %xmm8,%xmm9 +.byte 15,58,204,194,1 +.byte 15,56,200,206 +.byte 69,15,58,204,194,1 +.byte 69,15,56,200,205 +.byte 15,56,202,254 +.byte 69,15,56,202,245 + pxor %xmm6,%xmm4 +.byte 15,56,201,238 + pxor %xmm13,%xmm11 +.byte 69,15,56,201,229 + movdqa %xmm0,%xmm2 + movdqa %xmm8,%xmm10 +.byte 15,58,204,193,1 +.byte 15,56,200,215 +.byte 69,15,58,204,193,1 +.byte 69,15,56,200,214 +.byte 15,56,202,231 +.byte 69,15,56,202,222 + pxor %xmm7,%xmm5 +.byte 15,56,201,247 + pxor %xmm14,%xmm12 +.byte 69,15,56,201,238 + movdqa %xmm0,%xmm1 + movdqa %xmm8,%xmm9 +.byte 15,58,204,194,1 +.byte 15,56,200,204 +.byte 69,15,58,204,194,1 +.byte 69,15,56,200,203 +.byte 15,56,202,236 +.byte 69,15,56,202,227 + pxor %xmm4,%xmm6 +.byte 15,56,201,252 + pxor %xmm11,%xmm13 +.byte 69,15,56,201,243 + movdqa %xmm0,%xmm2 + movdqa %xmm8,%xmm10 +.byte 15,58,204,193,1 +.byte 15,56,200,213 +.byte 69,15,58,204,193,1 +.byte 69,15,56,200,212 +.byte 15,56,202,245 +.byte 69,15,56,202,236 + pxor %xmm5,%xmm7 +.byte 15,56,201,229 + pxor %xmm12,%xmm14 +.byte 69,15,56,201,220 + movdqa %xmm0,%xmm1 + movdqa %xmm8,%xmm9 +.byte 15,58,204,194,1 +.byte 15,56,200,206 +.byte 69,15,58,204,194,1 +.byte 69,15,56,200,205 +.byte 15,56,202,254 +.byte 69,15,56,202,245 + pxor %xmm6,%xmm4 +.byte 15,56,201,238 + pxor %xmm13,%xmm11 +.byte 69,15,56,201,229 + movdqa %xmm0,%xmm2 + movdqa %xmm8,%xmm10 +.byte 15,58,204,193,2 +.byte 15,56,200,215 +.byte 69,15,58,204,193,2 +.byte 69,15,56,200,214 +.byte 15,56,202,231 +.byte 69,15,56,202,222 + pxor %xmm7,%xmm5 +.byte 15,56,201,247 + pxor %xmm14,%xmm12 +.byte 69,15,56,201,238 + movdqa %xmm0,%xmm1 + movdqa %xmm8,%xmm9 +.byte 15,58,204,194,2 +.byte 15,56,200,204 +.byte 69,15,58,204,194,2 +.byte 69,15,56,200,203 +.byte 15,56,202,236 +.byte 69,15,56,202,227 + pxor %xmm4,%xmm6 +.byte 15,56,201,252 + pxor %xmm11,%xmm13 +.byte 69,15,56,201,243 + movdqa %xmm0,%xmm2 + movdqa %xmm8,%xmm10 +.byte 15,58,204,193,2 +.byte 15,56,200,213 +.byte 69,15,58,204,193,2 +.byte 69,15,56,200,212 +.byte 15,56,202,245 +.byte 69,15,56,202,236 + pxor %xmm5,%xmm7 +.byte 15,56,201,229 + pxor %xmm12,%xmm14 +.byte 69,15,56,201,220 + movdqa %xmm0,%xmm1 + movdqa %xmm8,%xmm9 +.byte 15,58,204,194,2 +.byte 15,56,200,206 +.byte 69,15,58,204,194,2 +.byte 69,15,56,200,205 +.byte 15,56,202,254 +.byte 69,15,56,202,245 + pxor %xmm6,%xmm4 +.byte 15,56,201,238 + pxor %xmm13,%xmm11 +.byte 69,15,56,201,229 + movdqa %xmm0,%xmm2 + movdqa %xmm8,%xmm10 +.byte 15,58,204,193,2 +.byte 15,56,200,215 +.byte 69,15,58,204,193,2 +.byte 69,15,56,200,214 +.byte 15,56,202,231 +.byte 69,15,56,202,222 + pxor %xmm7,%xmm5 +.byte 15,56,201,247 + pxor %xmm14,%xmm12 +.byte 69,15,56,201,238 + movdqa %xmm0,%xmm1 + movdqa %xmm8,%xmm9 +.byte 15,58,204,194,3 +.byte 15,56,200,204 +.byte 69,15,58,204,194,3 +.byte 69,15,56,200,203 +.byte 15,56,202,236 +.byte 69,15,56,202,227 + pxor %xmm4,%xmm6 +.byte 15,56,201,252 + pxor %xmm11,%xmm13 +.byte 69,15,56,201,243 + movdqa %xmm0,%xmm2 + movdqa %xmm8,%xmm10 +.byte 15,58,204,193,3 +.byte 15,56,200,213 +.byte 69,15,58,204,193,3 +.byte 69,15,56,200,212 +.byte 15,56,202,245 +.byte 69,15,56,202,236 + pxor %xmm5,%xmm7 + pxor %xmm12,%xmm14 + + movl $1,%ecx + pxor %xmm4,%xmm4 + cmpl 0(%rbx),%ecx + cmovgeq %rsp,%r8 + + movdqa %xmm0,%xmm1 + movdqa %xmm8,%xmm9 +.byte 15,58,204,194,3 +.byte 15,56,200,206 +.byte 69,15,58,204,194,3 +.byte 69,15,56,200,205 +.byte 15,56,202,254 +.byte 69,15,56,202,245 + + cmpl 4(%rbx),%ecx + cmovgeq %rsp,%r9 + movq (%rbx),%xmm6 + + movdqa %xmm0,%xmm2 + movdqa %xmm8,%xmm10 +.byte 15,58,204,193,3 +.byte 15,56,200,215 +.byte 69,15,58,204,193,3 +.byte 69,15,56,200,214 + + pshufd $0,%xmm6,%xmm11 + pshufd $85,%xmm6,%xmm12 + movdqa %xmm6,%xmm7 + pcmpgtd %xmm4,%xmm11 + pcmpgtd %xmm4,%xmm12 + + movdqa %xmm0,%xmm1 + movdqa %xmm8,%xmm9 +.byte 15,58,204,194,3 +.byte 15,56,200,204 +.byte 69,15,58,204,194,3 +.byte 68,15,56,200,204 + + pcmpgtd %xmm4,%xmm7 + pand %xmm11,%xmm0 + pand %xmm11,%xmm1 + pand %xmm12,%xmm8 + pand %xmm12,%xmm9 + paddd %xmm7,%xmm6 + + paddd 64(%rsp),%xmm0 + paddd 80(%rsp),%xmm1 + paddd 96(%rsp),%xmm8 + paddd 112(%rsp),%xmm9 + + movq %xmm6,(%rbx) + decl %edx + jnz L$oop_shaext + + movl 280(%rsp),%edx + + pshufd $27,%xmm0,%xmm0 + pshufd $27,%xmm8,%xmm8 + + movdqa %xmm0,%xmm6 + punpckldq %xmm8,%xmm0 + punpckhdq %xmm8,%xmm6 + punpckhdq %xmm9,%xmm1 + movq %xmm0,0-64(%rdi) + psrldq $8,%xmm0 + movq %xmm6,64-64(%rdi) + psrldq $8,%xmm6 + movq %xmm0,32-64(%rdi) + psrldq $8,%xmm1 + movq %xmm6,96-64(%rdi) + movq %xmm1,128-64(%rdi) + + leaq 8(%rdi),%rdi + leaq 32(%rsi),%rsi + decl %edx + jnz L$oop_grande_shaext + +L$done_shaext: + + movq -16(%rax),%rbp + movq -8(%rax),%rbx + leaq (%rax),%rsp +L$epilogue_shaext: + .byte 0xf3,0xc3 + + +.p2align 8 +.long 0x5a827999,0x5a827999,0x5a827999,0x5a827999 +.long 0x5a827999,0x5a827999,0x5a827999,0x5a827999 +K_XX_XX: +.long 0x6ed9eba1,0x6ed9eba1,0x6ed9eba1,0x6ed9eba1 +.long 0x6ed9eba1,0x6ed9eba1,0x6ed9eba1,0x6ed9eba1 +.long 0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc +.long 0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc +.long 0xca62c1d6,0xca62c1d6,0xca62c1d6,0xca62c1d6 +.long 0xca62c1d6,0xca62c1d6,0xca62c1d6,0xca62c1d6 +.long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f +.long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f +.byte 0xf,0xe,0xd,0xc,0xb,0xa,0x9,0x8,0x7,0x6,0x5,0x4,0x3,0x2,0x1,0x0 +.byte 83,72,65,49,32,109,117,108,116,105,45,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 diff --git a/deps/openssl/asm_obsolete/x64-macosx-gas/sha/sha1-x86_64.s b/deps/openssl/asm_obsolete/x64-macosx-gas/sha/sha1-x86_64.s new file mode 100644 index 00000000000000..671034cdafa3c4 --- /dev/null +++ b/deps/openssl/asm_obsolete/x64-macosx-gas/sha/sha1-x86_64.s @@ -0,0 +1,2591 @@ +.text + + +.globl _sha1_block_data_order + +.p2align 4 +_sha1_block_data_order: + movl _OPENSSL_ia32cap_P+0(%rip),%r9d + movl _OPENSSL_ia32cap_P+4(%rip),%r8d + movl _OPENSSL_ia32cap_P+8(%rip),%r10d + testl $512,%r8d + jz L$ialu + testl $536870912,%r10d + jnz _shaext_shortcut + jmp _ssse3_shortcut + +.p2align 4 +L$ialu: + movq %rsp,%rax + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + movq %rdi,%r8 + subq $72,%rsp + movq %rsi,%r9 + andq $-64,%rsp + movq %rdx,%r10 + movq %rax,64(%rsp) +L$prologue: + + movl 0(%r8),%esi + movl 4(%r8),%edi + movl 8(%r8),%r11d + movl 12(%r8),%r12d + movl 16(%r8),%r13d + jmp L$loop + +.p2align 4 +L$loop: + movl 0(%r9),%edx + bswapl %edx + movl 4(%r9),%ebp + movl %r12d,%eax + movl %edx,0(%rsp) + movl %esi,%ecx + bswapl %ebp + xorl %r11d,%eax + roll $5,%ecx + andl %edi,%eax + leal 1518500249(%rdx,%r13,1),%r13d + addl %ecx,%r13d + xorl %r12d,%eax + roll $30,%edi + addl %eax,%r13d + movl 8(%r9),%r14d + movl %r11d,%eax + movl %ebp,4(%rsp) + movl %r13d,%ecx + bswapl %r14d + xorl %edi,%eax + roll $5,%ecx + andl %esi,%eax + leal 1518500249(%rbp,%r12,1),%r12d + addl %ecx,%r12d + xorl %r11d,%eax + roll $30,%esi + addl %eax,%r12d + movl 12(%r9),%edx + movl %edi,%eax + movl %r14d,8(%rsp) + movl %r12d,%ecx + bswapl %edx + xorl %esi,%eax + roll $5,%ecx + andl %r13d,%eax + leal 1518500249(%r14,%r11,1),%r11d + addl %ecx,%r11d + xorl %edi,%eax + roll $30,%r13d + addl %eax,%r11d + movl 16(%r9),%ebp + movl %esi,%eax + movl %edx,12(%rsp) + movl %r11d,%ecx + bswapl %ebp + xorl %r13d,%eax + roll $5,%ecx + andl %r12d,%eax + leal 1518500249(%rdx,%rdi,1),%edi + addl %ecx,%edi + xorl %esi,%eax + roll $30,%r12d + addl %eax,%edi + movl 20(%r9),%r14d + movl %r13d,%eax + movl %ebp,16(%rsp) + movl %edi,%ecx + bswapl %r14d + xorl %r12d,%eax + roll $5,%ecx + andl %r11d,%eax + leal 1518500249(%rbp,%rsi,1),%esi + addl %ecx,%esi + xorl %r13d,%eax + roll $30,%r11d + addl %eax,%esi + movl 24(%r9),%edx + movl %r12d,%eax + movl %r14d,20(%rsp) + movl %esi,%ecx + bswapl %edx + xorl %r11d,%eax + roll $5,%ecx + andl %edi,%eax + leal 1518500249(%r14,%r13,1),%r13d + addl %ecx,%r13d + xorl %r12d,%eax + roll $30,%edi + addl %eax,%r13d + movl 28(%r9),%ebp + movl %r11d,%eax + movl %edx,24(%rsp) + movl %r13d,%ecx + bswapl %ebp + xorl %edi,%eax + roll $5,%ecx + andl %esi,%eax + leal 1518500249(%rdx,%r12,1),%r12d + addl %ecx,%r12d + xorl %r11d,%eax + roll $30,%esi + addl %eax,%r12d + movl 32(%r9),%r14d + movl %edi,%eax + movl %ebp,28(%rsp) + movl %r12d,%ecx + bswapl %r14d + xorl %esi,%eax + roll $5,%ecx + andl %r13d,%eax + leal 1518500249(%rbp,%r11,1),%r11d + addl %ecx,%r11d + xorl %edi,%eax + roll $30,%r13d + addl %eax,%r11d + movl 36(%r9),%edx + movl %esi,%eax + movl %r14d,32(%rsp) + movl %r11d,%ecx + bswapl %edx + xorl %r13d,%eax + roll $5,%ecx + andl %r12d,%eax + leal 1518500249(%r14,%rdi,1),%edi + addl %ecx,%edi + xorl %esi,%eax + roll $30,%r12d + addl %eax,%edi + movl 40(%r9),%ebp + movl %r13d,%eax + movl %edx,36(%rsp) + movl %edi,%ecx + bswapl %ebp + xorl %r12d,%eax + roll $5,%ecx + andl %r11d,%eax + leal 1518500249(%rdx,%rsi,1),%esi + addl %ecx,%esi + xorl %r13d,%eax + roll $30,%r11d + addl %eax,%esi + movl 44(%r9),%r14d + movl %r12d,%eax + movl %ebp,40(%rsp) + movl %esi,%ecx + bswapl %r14d + xorl %r11d,%eax + roll $5,%ecx + andl %edi,%eax + leal 1518500249(%rbp,%r13,1),%r13d + addl %ecx,%r13d + xorl %r12d,%eax + roll $30,%edi + addl %eax,%r13d + movl 48(%r9),%edx + movl %r11d,%eax + movl %r14d,44(%rsp) + movl %r13d,%ecx + bswapl %edx + xorl %edi,%eax + roll $5,%ecx + andl %esi,%eax + leal 1518500249(%r14,%r12,1),%r12d + addl %ecx,%r12d + xorl %r11d,%eax + roll $30,%esi + addl %eax,%r12d + movl 52(%r9),%ebp + movl %edi,%eax + movl %edx,48(%rsp) + movl %r12d,%ecx + bswapl %ebp + xorl %esi,%eax + roll $5,%ecx + andl %r13d,%eax + leal 1518500249(%rdx,%r11,1),%r11d + addl %ecx,%r11d + xorl %edi,%eax + roll $30,%r13d + addl %eax,%r11d + movl 56(%r9),%r14d + movl %esi,%eax + movl %ebp,52(%rsp) + movl %r11d,%ecx + bswapl %r14d + xorl %r13d,%eax + roll $5,%ecx + andl %r12d,%eax + leal 1518500249(%rbp,%rdi,1),%edi + addl %ecx,%edi + xorl %esi,%eax + roll $30,%r12d + addl %eax,%edi + movl 60(%r9),%edx + movl %r13d,%eax + movl %r14d,56(%rsp) + movl %edi,%ecx + bswapl %edx + xorl %r12d,%eax + roll $5,%ecx + andl %r11d,%eax + leal 1518500249(%r14,%rsi,1),%esi + addl %ecx,%esi + xorl %r13d,%eax + roll $30,%r11d + addl %eax,%esi + xorl 0(%rsp),%ebp + movl %r12d,%eax + movl %edx,60(%rsp) + movl %esi,%ecx + xorl 8(%rsp),%ebp + xorl %r11d,%eax + roll $5,%ecx + xorl 32(%rsp),%ebp + andl %edi,%eax + leal 1518500249(%rdx,%r13,1),%r13d + roll $30,%edi + xorl %r12d,%eax + addl %ecx,%r13d + roll $1,%ebp + addl %eax,%r13d + xorl 4(%rsp),%r14d + movl %r11d,%eax + movl %ebp,0(%rsp) + movl %r13d,%ecx + xorl 12(%rsp),%r14d + xorl %edi,%eax + roll $5,%ecx + xorl 36(%rsp),%r14d + andl %esi,%eax + leal 1518500249(%rbp,%r12,1),%r12d + roll $30,%esi + xorl %r11d,%eax + addl %ecx,%r12d + roll $1,%r14d + addl %eax,%r12d + xorl 8(%rsp),%edx + movl %edi,%eax + movl %r14d,4(%rsp) + movl %r12d,%ecx + xorl 16(%rsp),%edx + xorl %esi,%eax + roll $5,%ecx + xorl 40(%rsp),%edx + andl %r13d,%eax + leal 1518500249(%r14,%r11,1),%r11d + roll $30,%r13d + xorl %edi,%eax + addl %ecx,%r11d + roll $1,%edx + addl %eax,%r11d + xorl 12(%rsp),%ebp + movl %esi,%eax + movl %edx,8(%rsp) + movl %r11d,%ecx + xorl 20(%rsp),%ebp + xorl %r13d,%eax + roll $5,%ecx + xorl 44(%rsp),%ebp + andl %r12d,%eax + leal 1518500249(%rdx,%rdi,1),%edi + roll $30,%r12d + xorl %esi,%eax + addl %ecx,%edi + roll $1,%ebp + addl %eax,%edi + xorl 16(%rsp),%r14d + movl %r13d,%eax + movl %ebp,12(%rsp) + movl %edi,%ecx + xorl 24(%rsp),%r14d + xorl %r12d,%eax + roll $5,%ecx + xorl 48(%rsp),%r14d + andl %r11d,%eax + leal 1518500249(%rbp,%rsi,1),%esi + roll $30,%r11d + xorl %r13d,%eax + addl %ecx,%esi + roll $1,%r14d + addl %eax,%esi + xorl 20(%rsp),%edx + movl %edi,%eax + movl %r14d,16(%rsp) + movl %esi,%ecx + xorl 28(%rsp),%edx + xorl %r12d,%eax + roll $5,%ecx + xorl 52(%rsp),%edx + leal 1859775393(%r14,%r13,1),%r13d + xorl %r11d,%eax + addl %ecx,%r13d + roll $30,%edi + addl %eax,%r13d + roll $1,%edx + xorl 24(%rsp),%ebp + movl %esi,%eax + movl %edx,20(%rsp) + movl %r13d,%ecx + xorl 32(%rsp),%ebp + xorl %r11d,%eax + roll $5,%ecx + xorl 56(%rsp),%ebp + leal 1859775393(%rdx,%r12,1),%r12d + xorl %edi,%eax + addl %ecx,%r12d + roll $30,%esi + addl %eax,%r12d + roll $1,%ebp + xorl 28(%rsp),%r14d + movl %r13d,%eax + movl %ebp,24(%rsp) + movl %r12d,%ecx + xorl 36(%rsp),%r14d + xorl %edi,%eax + roll $5,%ecx + xorl 60(%rsp),%r14d + leal 1859775393(%rbp,%r11,1),%r11d + xorl %esi,%eax + addl %ecx,%r11d + roll $30,%r13d + addl %eax,%r11d + roll $1,%r14d + xorl 32(%rsp),%edx + movl %r12d,%eax + movl %r14d,28(%rsp) + movl %r11d,%ecx + xorl 40(%rsp),%edx + xorl %esi,%eax + roll $5,%ecx + xorl 0(%rsp),%edx + leal 1859775393(%r14,%rdi,1),%edi + xorl %r13d,%eax + addl %ecx,%edi + roll $30,%r12d + addl %eax,%edi + roll $1,%edx + xorl 36(%rsp),%ebp + movl %r11d,%eax + movl %edx,32(%rsp) + movl %edi,%ecx + xorl 44(%rsp),%ebp + xorl %r13d,%eax + roll $5,%ecx + xorl 4(%rsp),%ebp + leal 1859775393(%rdx,%rsi,1),%esi + xorl %r12d,%eax + addl %ecx,%esi + roll $30,%r11d + addl %eax,%esi + roll $1,%ebp + xorl 40(%rsp),%r14d + movl %edi,%eax + movl %ebp,36(%rsp) + movl %esi,%ecx + xorl 48(%rsp),%r14d + xorl %r12d,%eax + roll $5,%ecx + xorl 8(%rsp),%r14d + leal 1859775393(%rbp,%r13,1),%r13d + xorl %r11d,%eax + addl %ecx,%r13d + roll $30,%edi + addl %eax,%r13d + roll $1,%r14d + xorl 44(%rsp),%edx + movl %esi,%eax + movl %r14d,40(%rsp) + movl %r13d,%ecx + xorl 52(%rsp),%edx + xorl %r11d,%eax + roll $5,%ecx + xorl 12(%rsp),%edx + leal 1859775393(%r14,%r12,1),%r12d + xorl %edi,%eax + addl %ecx,%r12d + roll $30,%esi + addl %eax,%r12d + roll $1,%edx + xorl 48(%rsp),%ebp + movl %r13d,%eax + movl %edx,44(%rsp) + movl %r12d,%ecx + xorl 56(%rsp),%ebp + xorl %edi,%eax + roll $5,%ecx + xorl 16(%rsp),%ebp + leal 1859775393(%rdx,%r11,1),%r11d + xorl %esi,%eax + addl %ecx,%r11d + roll $30,%r13d + addl %eax,%r11d + roll $1,%ebp + xorl 52(%rsp),%r14d + movl %r12d,%eax + movl %ebp,48(%rsp) + movl %r11d,%ecx + xorl 60(%rsp),%r14d + xorl %esi,%eax + roll $5,%ecx + xorl 20(%rsp),%r14d + leal 1859775393(%rbp,%rdi,1),%edi + xorl %r13d,%eax + addl %ecx,%edi + roll $30,%r12d + addl %eax,%edi + roll $1,%r14d + xorl 56(%rsp),%edx + movl %r11d,%eax + movl %r14d,52(%rsp) + movl %edi,%ecx + xorl 0(%rsp),%edx + xorl %r13d,%eax + roll $5,%ecx + xorl 24(%rsp),%edx + leal 1859775393(%r14,%rsi,1),%esi + xorl %r12d,%eax + addl %ecx,%esi + roll $30,%r11d + addl %eax,%esi + roll $1,%edx + xorl 60(%rsp),%ebp + movl %edi,%eax + movl %edx,56(%rsp) + movl %esi,%ecx + xorl 4(%rsp),%ebp + xorl %r12d,%eax + roll $5,%ecx + xorl 28(%rsp),%ebp + leal 1859775393(%rdx,%r13,1),%r13d + xorl %r11d,%eax + addl %ecx,%r13d + roll $30,%edi + addl %eax,%r13d + roll $1,%ebp + xorl 0(%rsp),%r14d + movl %esi,%eax + movl %ebp,60(%rsp) + movl %r13d,%ecx + xorl 8(%rsp),%r14d + xorl %r11d,%eax + roll $5,%ecx + xorl 32(%rsp),%r14d + leal 1859775393(%rbp,%r12,1),%r12d + xorl %edi,%eax + addl %ecx,%r12d + roll $30,%esi + addl %eax,%r12d + roll $1,%r14d + xorl 4(%rsp),%edx + movl %r13d,%eax + movl %r14d,0(%rsp) + movl %r12d,%ecx + xorl 12(%rsp),%edx + xorl %edi,%eax + roll $5,%ecx + xorl 36(%rsp),%edx + leal 1859775393(%r14,%r11,1),%r11d + xorl %esi,%eax + addl %ecx,%r11d + roll $30,%r13d + addl %eax,%r11d + roll $1,%edx + xorl 8(%rsp),%ebp + movl %r12d,%eax + movl %edx,4(%rsp) + movl %r11d,%ecx + xorl 16(%rsp),%ebp + xorl %esi,%eax + roll $5,%ecx + xorl 40(%rsp),%ebp + leal 1859775393(%rdx,%rdi,1),%edi + xorl %r13d,%eax + addl %ecx,%edi + roll $30,%r12d + addl %eax,%edi + roll $1,%ebp + xorl 12(%rsp),%r14d + movl %r11d,%eax + movl %ebp,8(%rsp) + movl %edi,%ecx + xorl 20(%rsp),%r14d + xorl %r13d,%eax + roll $5,%ecx + xorl 44(%rsp),%r14d + leal 1859775393(%rbp,%rsi,1),%esi + xorl %r12d,%eax + addl %ecx,%esi + roll $30,%r11d + addl %eax,%esi + roll $1,%r14d + xorl 16(%rsp),%edx + movl %edi,%eax + movl %r14d,12(%rsp) + movl %esi,%ecx + xorl 24(%rsp),%edx + xorl %r12d,%eax + roll $5,%ecx + xorl 48(%rsp),%edx + leal 1859775393(%r14,%r13,1),%r13d + xorl %r11d,%eax + addl %ecx,%r13d + roll $30,%edi + addl %eax,%r13d + roll $1,%edx + xorl 20(%rsp),%ebp + movl %esi,%eax + movl %edx,16(%rsp) + movl %r13d,%ecx + xorl 28(%rsp),%ebp + xorl %r11d,%eax + roll $5,%ecx + xorl 52(%rsp),%ebp + leal 1859775393(%rdx,%r12,1),%r12d + xorl %edi,%eax + addl %ecx,%r12d + roll $30,%esi + addl %eax,%r12d + roll $1,%ebp + xorl 24(%rsp),%r14d + movl %r13d,%eax + movl %ebp,20(%rsp) + movl %r12d,%ecx + xorl 32(%rsp),%r14d + xorl %edi,%eax + roll $5,%ecx + xorl 56(%rsp),%r14d + leal 1859775393(%rbp,%r11,1),%r11d + xorl %esi,%eax + addl %ecx,%r11d + roll $30,%r13d + addl %eax,%r11d + roll $1,%r14d + xorl 28(%rsp),%edx + movl %r12d,%eax + movl %r14d,24(%rsp) + movl %r11d,%ecx + xorl 36(%rsp),%edx + xorl %esi,%eax + roll $5,%ecx + xorl 60(%rsp),%edx + leal 1859775393(%r14,%rdi,1),%edi + xorl %r13d,%eax + addl %ecx,%edi + roll $30,%r12d + addl %eax,%edi + roll $1,%edx + xorl 32(%rsp),%ebp + movl %r11d,%eax + movl %edx,28(%rsp) + movl %edi,%ecx + xorl 40(%rsp),%ebp + xorl %r13d,%eax + roll $5,%ecx + xorl 0(%rsp),%ebp + leal 1859775393(%rdx,%rsi,1),%esi + xorl %r12d,%eax + addl %ecx,%esi + roll $30,%r11d + addl %eax,%esi + roll $1,%ebp + xorl 36(%rsp),%r14d + movl %r12d,%eax + movl %ebp,32(%rsp) + movl %r12d,%ebx + xorl 44(%rsp),%r14d + andl %r11d,%eax + movl %esi,%ecx + xorl 4(%rsp),%r14d + leal -1894007588(%rbp,%r13,1),%r13d + xorl %r11d,%ebx + roll $5,%ecx + addl %eax,%r13d + roll $1,%r14d + andl %edi,%ebx + addl %ecx,%r13d + roll $30,%edi + addl %ebx,%r13d + xorl 40(%rsp),%edx + movl %r11d,%eax + movl %r14d,36(%rsp) + movl %r11d,%ebx + xorl 48(%rsp),%edx + andl %edi,%eax + movl %r13d,%ecx + xorl 8(%rsp),%edx + leal -1894007588(%r14,%r12,1),%r12d + xorl %edi,%ebx + roll $5,%ecx + addl %eax,%r12d + roll $1,%edx + andl %esi,%ebx + addl %ecx,%r12d + roll $30,%esi + addl %ebx,%r12d + xorl 44(%rsp),%ebp + movl %edi,%eax + movl %edx,40(%rsp) + movl %edi,%ebx + xorl 52(%rsp),%ebp + andl %esi,%eax + movl %r12d,%ecx + xorl 12(%rsp),%ebp + leal -1894007588(%rdx,%r11,1),%r11d + xorl %esi,%ebx + roll $5,%ecx + addl %eax,%r11d + roll $1,%ebp + andl %r13d,%ebx + addl %ecx,%r11d + roll $30,%r13d + addl %ebx,%r11d + xorl 48(%rsp),%r14d + movl %esi,%eax + movl %ebp,44(%rsp) + movl %esi,%ebx + xorl 56(%rsp),%r14d + andl %r13d,%eax + movl %r11d,%ecx + xorl 16(%rsp),%r14d + leal -1894007588(%rbp,%rdi,1),%edi + xorl %r13d,%ebx + roll $5,%ecx + addl %eax,%edi + roll $1,%r14d + andl %r12d,%ebx + addl %ecx,%edi + roll $30,%r12d + addl %ebx,%edi + xorl 52(%rsp),%edx + movl %r13d,%eax + movl %r14d,48(%rsp) + movl %r13d,%ebx + xorl 60(%rsp),%edx + andl %r12d,%eax + movl %edi,%ecx + xorl 20(%rsp),%edx + leal -1894007588(%r14,%rsi,1),%esi + xorl %r12d,%ebx + roll $5,%ecx + addl %eax,%esi + roll $1,%edx + andl %r11d,%ebx + addl %ecx,%esi + roll $30,%r11d + addl %ebx,%esi + xorl 56(%rsp),%ebp + movl %r12d,%eax + movl %edx,52(%rsp) + movl %r12d,%ebx + xorl 0(%rsp),%ebp + andl %r11d,%eax + movl %esi,%ecx + xorl 24(%rsp),%ebp + leal -1894007588(%rdx,%r13,1),%r13d + xorl %r11d,%ebx + roll $5,%ecx + addl %eax,%r13d + roll $1,%ebp + andl %edi,%ebx + addl %ecx,%r13d + roll $30,%edi + addl %ebx,%r13d + xorl 60(%rsp),%r14d + movl %r11d,%eax + movl %ebp,56(%rsp) + movl %r11d,%ebx + xorl 4(%rsp),%r14d + andl %edi,%eax + movl %r13d,%ecx + xorl 28(%rsp),%r14d + leal -1894007588(%rbp,%r12,1),%r12d + xorl %edi,%ebx + roll $5,%ecx + addl %eax,%r12d + roll $1,%r14d + andl %esi,%ebx + addl %ecx,%r12d + roll $30,%esi + addl %ebx,%r12d + xorl 0(%rsp),%edx + movl %edi,%eax + movl %r14d,60(%rsp) + movl %edi,%ebx + xorl 8(%rsp),%edx + andl %esi,%eax + movl %r12d,%ecx + xorl 32(%rsp),%edx + leal -1894007588(%r14,%r11,1),%r11d + xorl %esi,%ebx + roll $5,%ecx + addl %eax,%r11d + roll $1,%edx + andl %r13d,%ebx + addl %ecx,%r11d + roll $30,%r13d + addl %ebx,%r11d + xorl 4(%rsp),%ebp + movl %esi,%eax + movl %edx,0(%rsp) + movl %esi,%ebx + xorl 12(%rsp),%ebp + andl %r13d,%eax + movl %r11d,%ecx + xorl 36(%rsp),%ebp + leal -1894007588(%rdx,%rdi,1),%edi + xorl %r13d,%ebx + roll $5,%ecx + addl %eax,%edi + roll $1,%ebp + andl %r12d,%ebx + addl %ecx,%edi + roll $30,%r12d + addl %ebx,%edi + xorl 8(%rsp),%r14d + movl %r13d,%eax + movl %ebp,4(%rsp) + movl %r13d,%ebx + xorl 16(%rsp),%r14d + andl %r12d,%eax + movl %edi,%ecx + xorl 40(%rsp),%r14d + leal -1894007588(%rbp,%rsi,1),%esi + xorl %r12d,%ebx + roll $5,%ecx + addl %eax,%esi + roll $1,%r14d + andl %r11d,%ebx + addl %ecx,%esi + roll $30,%r11d + addl %ebx,%esi + xorl 12(%rsp),%edx + movl %r12d,%eax + movl %r14d,8(%rsp) + movl %r12d,%ebx + xorl 20(%rsp),%edx + andl %r11d,%eax + movl %esi,%ecx + xorl 44(%rsp),%edx + leal -1894007588(%r14,%r13,1),%r13d + xorl %r11d,%ebx + roll $5,%ecx + addl %eax,%r13d + roll $1,%edx + andl %edi,%ebx + addl %ecx,%r13d + roll $30,%edi + addl %ebx,%r13d + xorl 16(%rsp),%ebp + movl %r11d,%eax + movl %edx,12(%rsp) + movl %r11d,%ebx + xorl 24(%rsp),%ebp + andl %edi,%eax + movl %r13d,%ecx + xorl 48(%rsp),%ebp + leal -1894007588(%rdx,%r12,1),%r12d + xorl %edi,%ebx + roll $5,%ecx + addl %eax,%r12d + roll $1,%ebp + andl %esi,%ebx + addl %ecx,%r12d + roll $30,%esi + addl %ebx,%r12d + xorl 20(%rsp),%r14d + movl %edi,%eax + movl %ebp,16(%rsp) + movl %edi,%ebx + xorl 28(%rsp),%r14d + andl %esi,%eax + movl %r12d,%ecx + xorl 52(%rsp),%r14d + leal -1894007588(%rbp,%r11,1),%r11d + xorl %esi,%ebx + roll $5,%ecx + addl %eax,%r11d + roll $1,%r14d + andl %r13d,%ebx + addl %ecx,%r11d + roll $30,%r13d + addl %ebx,%r11d + xorl 24(%rsp),%edx + movl %esi,%eax + movl %r14d,20(%rsp) + movl %esi,%ebx + xorl 32(%rsp),%edx + andl %r13d,%eax + movl %r11d,%ecx + xorl 56(%rsp),%edx + leal -1894007588(%r14,%rdi,1),%edi + xorl %r13d,%ebx + roll $5,%ecx + addl %eax,%edi + roll $1,%edx + andl %r12d,%ebx + addl %ecx,%edi + roll $30,%r12d + addl %ebx,%edi + xorl 28(%rsp),%ebp + movl %r13d,%eax + movl %edx,24(%rsp) + movl %r13d,%ebx + xorl 36(%rsp),%ebp + andl %r12d,%eax + movl %edi,%ecx + xorl 60(%rsp),%ebp + leal -1894007588(%rdx,%rsi,1),%esi + xorl %r12d,%ebx + roll $5,%ecx + addl %eax,%esi + roll $1,%ebp + andl %r11d,%ebx + addl %ecx,%esi + roll $30,%r11d + addl %ebx,%esi + xorl 32(%rsp),%r14d + movl %r12d,%eax + movl %ebp,28(%rsp) + movl %r12d,%ebx + xorl 40(%rsp),%r14d + andl %r11d,%eax + movl %esi,%ecx + xorl 0(%rsp),%r14d + leal -1894007588(%rbp,%r13,1),%r13d + xorl %r11d,%ebx + roll $5,%ecx + addl %eax,%r13d + roll $1,%r14d + andl %edi,%ebx + addl %ecx,%r13d + roll $30,%edi + addl %ebx,%r13d + xorl 36(%rsp),%edx + movl %r11d,%eax + movl %r14d,32(%rsp) + movl %r11d,%ebx + xorl 44(%rsp),%edx + andl %edi,%eax + movl %r13d,%ecx + xorl 4(%rsp),%edx + leal -1894007588(%r14,%r12,1),%r12d + xorl %edi,%ebx + roll $5,%ecx + addl %eax,%r12d + roll $1,%edx + andl %esi,%ebx + addl %ecx,%r12d + roll $30,%esi + addl %ebx,%r12d + xorl 40(%rsp),%ebp + movl %edi,%eax + movl %edx,36(%rsp) + movl %edi,%ebx + xorl 48(%rsp),%ebp + andl %esi,%eax + movl %r12d,%ecx + xorl 8(%rsp),%ebp + leal -1894007588(%rdx,%r11,1),%r11d + xorl %esi,%ebx + roll $5,%ecx + addl %eax,%r11d + roll $1,%ebp + andl %r13d,%ebx + addl %ecx,%r11d + roll $30,%r13d + addl %ebx,%r11d + xorl 44(%rsp),%r14d + movl %esi,%eax + movl %ebp,40(%rsp) + movl %esi,%ebx + xorl 52(%rsp),%r14d + andl %r13d,%eax + movl %r11d,%ecx + xorl 12(%rsp),%r14d + leal -1894007588(%rbp,%rdi,1),%edi + xorl %r13d,%ebx + roll $5,%ecx + addl %eax,%edi + roll $1,%r14d + andl %r12d,%ebx + addl %ecx,%edi + roll $30,%r12d + addl %ebx,%edi + xorl 48(%rsp),%edx + movl %r13d,%eax + movl %r14d,44(%rsp) + movl %r13d,%ebx + xorl 56(%rsp),%edx + andl %r12d,%eax + movl %edi,%ecx + xorl 16(%rsp),%edx + leal -1894007588(%r14,%rsi,1),%esi + xorl %r12d,%ebx + roll $5,%ecx + addl %eax,%esi + roll $1,%edx + andl %r11d,%ebx + addl %ecx,%esi + roll $30,%r11d + addl %ebx,%esi + xorl 52(%rsp),%ebp + movl %edi,%eax + movl %edx,48(%rsp) + movl %esi,%ecx + xorl 60(%rsp),%ebp + xorl %r12d,%eax + roll $5,%ecx + xorl 20(%rsp),%ebp + leal -899497514(%rdx,%r13,1),%r13d + xorl %r11d,%eax + addl %ecx,%r13d + roll $30,%edi + addl %eax,%r13d + roll $1,%ebp + xorl 56(%rsp),%r14d + movl %esi,%eax + movl %ebp,52(%rsp) + movl %r13d,%ecx + xorl 0(%rsp),%r14d + xorl %r11d,%eax + roll $5,%ecx + xorl 24(%rsp),%r14d + leal -899497514(%rbp,%r12,1),%r12d + xorl %edi,%eax + addl %ecx,%r12d + roll $30,%esi + addl %eax,%r12d + roll $1,%r14d + xorl 60(%rsp),%edx + movl %r13d,%eax + movl %r14d,56(%rsp) + movl %r12d,%ecx + xorl 4(%rsp),%edx + xorl %edi,%eax + roll $5,%ecx + xorl 28(%rsp),%edx + leal -899497514(%r14,%r11,1),%r11d + xorl %esi,%eax + addl %ecx,%r11d + roll $30,%r13d + addl %eax,%r11d + roll $1,%edx + xorl 0(%rsp),%ebp + movl %r12d,%eax + movl %edx,60(%rsp) + movl %r11d,%ecx + xorl 8(%rsp),%ebp + xorl %esi,%eax + roll $5,%ecx + xorl 32(%rsp),%ebp + leal -899497514(%rdx,%rdi,1),%edi + xorl %r13d,%eax + addl %ecx,%edi + roll $30,%r12d + addl %eax,%edi + roll $1,%ebp + xorl 4(%rsp),%r14d + movl %r11d,%eax + movl %ebp,0(%rsp) + movl %edi,%ecx + xorl 12(%rsp),%r14d + xorl %r13d,%eax + roll $5,%ecx + xorl 36(%rsp),%r14d + leal -899497514(%rbp,%rsi,1),%esi + xorl %r12d,%eax + addl %ecx,%esi + roll $30,%r11d + addl %eax,%esi + roll $1,%r14d + xorl 8(%rsp),%edx + movl %edi,%eax + movl %r14d,4(%rsp) + movl %esi,%ecx + xorl 16(%rsp),%edx + xorl %r12d,%eax + roll $5,%ecx + xorl 40(%rsp),%edx + leal -899497514(%r14,%r13,1),%r13d + xorl %r11d,%eax + addl %ecx,%r13d + roll $30,%edi + addl %eax,%r13d + roll $1,%edx + xorl 12(%rsp),%ebp + movl %esi,%eax + movl %edx,8(%rsp) + movl %r13d,%ecx + xorl 20(%rsp),%ebp + xorl %r11d,%eax + roll $5,%ecx + xorl 44(%rsp),%ebp + leal -899497514(%rdx,%r12,1),%r12d + xorl %edi,%eax + addl %ecx,%r12d + roll $30,%esi + addl %eax,%r12d + roll $1,%ebp + xorl 16(%rsp),%r14d + movl %r13d,%eax + movl %ebp,12(%rsp) + movl %r12d,%ecx + xorl 24(%rsp),%r14d + xorl %edi,%eax + roll $5,%ecx + xorl 48(%rsp),%r14d + leal -899497514(%rbp,%r11,1),%r11d + xorl %esi,%eax + addl %ecx,%r11d + roll $30,%r13d + addl %eax,%r11d + roll $1,%r14d + xorl 20(%rsp),%edx + movl %r12d,%eax + movl %r14d,16(%rsp) + movl %r11d,%ecx + xorl 28(%rsp),%edx + xorl %esi,%eax + roll $5,%ecx + xorl 52(%rsp),%edx + leal -899497514(%r14,%rdi,1),%edi + xorl %r13d,%eax + addl %ecx,%edi + roll $30,%r12d + addl %eax,%edi + roll $1,%edx + xorl 24(%rsp),%ebp + movl %r11d,%eax + movl %edx,20(%rsp) + movl %edi,%ecx + xorl 32(%rsp),%ebp + xorl %r13d,%eax + roll $5,%ecx + xorl 56(%rsp),%ebp + leal -899497514(%rdx,%rsi,1),%esi + xorl %r12d,%eax + addl %ecx,%esi + roll $30,%r11d + addl %eax,%esi + roll $1,%ebp + xorl 28(%rsp),%r14d + movl %edi,%eax + movl %ebp,24(%rsp) + movl %esi,%ecx + xorl 36(%rsp),%r14d + xorl %r12d,%eax + roll $5,%ecx + xorl 60(%rsp),%r14d + leal -899497514(%rbp,%r13,1),%r13d + xorl %r11d,%eax + addl %ecx,%r13d + roll $30,%edi + addl %eax,%r13d + roll $1,%r14d + xorl 32(%rsp),%edx + movl %esi,%eax + movl %r14d,28(%rsp) + movl %r13d,%ecx + xorl 40(%rsp),%edx + xorl %r11d,%eax + roll $5,%ecx + xorl 0(%rsp),%edx + leal -899497514(%r14,%r12,1),%r12d + xorl %edi,%eax + addl %ecx,%r12d + roll $30,%esi + addl %eax,%r12d + roll $1,%edx + xorl 36(%rsp),%ebp + movl %r13d,%eax + + movl %r12d,%ecx + xorl 44(%rsp),%ebp + xorl %edi,%eax + roll $5,%ecx + xorl 4(%rsp),%ebp + leal -899497514(%rdx,%r11,1),%r11d + xorl %esi,%eax + addl %ecx,%r11d + roll $30,%r13d + addl %eax,%r11d + roll $1,%ebp + xorl 40(%rsp),%r14d + movl %r12d,%eax + + movl %r11d,%ecx + xorl 48(%rsp),%r14d + xorl %esi,%eax + roll $5,%ecx + xorl 8(%rsp),%r14d + leal -899497514(%rbp,%rdi,1),%edi + xorl %r13d,%eax + addl %ecx,%edi + roll $30,%r12d + addl %eax,%edi + roll $1,%r14d + xorl 44(%rsp),%edx + movl %r11d,%eax + + movl %edi,%ecx + xorl 52(%rsp),%edx + xorl %r13d,%eax + roll $5,%ecx + xorl 12(%rsp),%edx + leal -899497514(%r14,%rsi,1),%esi + xorl %r12d,%eax + addl %ecx,%esi + roll $30,%r11d + addl %eax,%esi + roll $1,%edx + xorl 48(%rsp),%ebp + movl %edi,%eax + + movl %esi,%ecx + xorl 56(%rsp),%ebp + xorl %r12d,%eax + roll $5,%ecx + xorl 16(%rsp),%ebp + leal -899497514(%rdx,%r13,1),%r13d + xorl %r11d,%eax + addl %ecx,%r13d + roll $30,%edi + addl %eax,%r13d + roll $1,%ebp + xorl 52(%rsp),%r14d + movl %esi,%eax + + movl %r13d,%ecx + xorl 60(%rsp),%r14d + xorl %r11d,%eax + roll $5,%ecx + xorl 20(%rsp),%r14d + leal -899497514(%rbp,%r12,1),%r12d + xorl %edi,%eax + addl %ecx,%r12d + roll $30,%esi + addl %eax,%r12d + roll $1,%r14d + xorl 56(%rsp),%edx + movl %r13d,%eax + + movl %r12d,%ecx + xorl 0(%rsp),%edx + xorl %edi,%eax + roll $5,%ecx + xorl 24(%rsp),%edx + leal -899497514(%r14,%r11,1),%r11d + xorl %esi,%eax + addl %ecx,%r11d + roll $30,%r13d + addl %eax,%r11d + roll $1,%edx + xorl 60(%rsp),%ebp + movl %r12d,%eax + + movl %r11d,%ecx + xorl 4(%rsp),%ebp + xorl %esi,%eax + roll $5,%ecx + xorl 28(%rsp),%ebp + leal -899497514(%rdx,%rdi,1),%edi + xorl %r13d,%eax + addl %ecx,%edi + roll $30,%r12d + addl %eax,%edi + roll $1,%ebp + movl %r11d,%eax + movl %edi,%ecx + xorl %r13d,%eax + leal -899497514(%rbp,%rsi,1),%esi + roll $5,%ecx + xorl %r12d,%eax + addl %ecx,%esi + roll $30,%r11d + addl %eax,%esi + addl 0(%r8),%esi + addl 4(%r8),%edi + addl 8(%r8),%r11d + addl 12(%r8),%r12d + addl 16(%r8),%r13d + movl %esi,0(%r8) + movl %edi,4(%r8) + movl %r11d,8(%r8) + movl %r12d,12(%r8) + movl %r13d,16(%r8) + + subq $1,%r10 + leaq 64(%r9),%r9 + jnz L$loop + + movq 64(%rsp),%rsi + movq -40(%rsi),%r14 + movq -32(%rsi),%r13 + movq -24(%rsi),%r12 + movq -16(%rsi),%rbp + movq -8(%rsi),%rbx + leaq (%rsi),%rsp +L$epilogue: + .byte 0xf3,0xc3 + + +.p2align 5 +sha1_block_data_order_shaext: +_shaext_shortcut: + movdqu (%rdi),%xmm0 + movd 16(%rdi),%xmm1 + movdqa K_XX_XX+160(%rip),%xmm3 + + movdqu (%rsi),%xmm4 + pshufd $27,%xmm0,%xmm0 + movdqu 16(%rsi),%xmm5 + pshufd $27,%xmm1,%xmm1 + movdqu 32(%rsi),%xmm6 +.byte 102,15,56,0,227 + movdqu 48(%rsi),%xmm7 +.byte 102,15,56,0,235 +.byte 102,15,56,0,243 + movdqa %xmm1,%xmm9 +.byte 102,15,56,0,251 + jmp L$oop_shaext + +.p2align 4 +L$oop_shaext: + decq %rdx + leaq 64(%rsi),%rax + paddd %xmm4,%xmm1 + cmovneq %rax,%rsi + movdqa %xmm0,%xmm8 +.byte 15,56,201,229 + movdqa %xmm0,%xmm2 +.byte 15,58,204,193,0 +.byte 15,56,200,213 + pxor %xmm6,%xmm4 +.byte 15,56,201,238 +.byte 15,56,202,231 + + movdqa %xmm0,%xmm1 +.byte 15,58,204,194,0 +.byte 15,56,200,206 + pxor %xmm7,%xmm5 +.byte 15,56,202,236 +.byte 15,56,201,247 + movdqa %xmm0,%xmm2 +.byte 15,58,204,193,0 +.byte 15,56,200,215 + pxor %xmm4,%xmm6 +.byte 15,56,201,252 +.byte 15,56,202,245 + + movdqa %xmm0,%xmm1 +.byte 15,58,204,194,0 +.byte 15,56,200,204 + pxor %xmm5,%xmm7 +.byte 15,56,202,254 +.byte 15,56,201,229 + movdqa %xmm0,%xmm2 +.byte 15,58,204,193,0 +.byte 15,56,200,213 + pxor %xmm6,%xmm4 +.byte 15,56,201,238 +.byte 15,56,202,231 + + movdqa %xmm0,%xmm1 +.byte 15,58,204,194,1 +.byte 15,56,200,206 + pxor %xmm7,%xmm5 +.byte 15,56,202,236 +.byte 15,56,201,247 + movdqa %xmm0,%xmm2 +.byte 15,58,204,193,1 +.byte 15,56,200,215 + pxor %xmm4,%xmm6 +.byte 15,56,201,252 +.byte 15,56,202,245 + + movdqa %xmm0,%xmm1 +.byte 15,58,204,194,1 +.byte 15,56,200,204 + pxor %xmm5,%xmm7 +.byte 15,56,202,254 +.byte 15,56,201,229 + movdqa %xmm0,%xmm2 +.byte 15,58,204,193,1 +.byte 15,56,200,213 + pxor %xmm6,%xmm4 +.byte 15,56,201,238 +.byte 15,56,202,231 + + movdqa %xmm0,%xmm1 +.byte 15,58,204,194,1 +.byte 15,56,200,206 + pxor %xmm7,%xmm5 +.byte 15,56,202,236 +.byte 15,56,201,247 + movdqa %xmm0,%xmm2 +.byte 15,58,204,193,2 +.byte 15,56,200,215 + pxor %xmm4,%xmm6 +.byte 15,56,201,252 +.byte 15,56,202,245 + + movdqa %xmm0,%xmm1 +.byte 15,58,204,194,2 +.byte 15,56,200,204 + pxor %xmm5,%xmm7 +.byte 15,56,202,254 +.byte 15,56,201,229 + movdqa %xmm0,%xmm2 +.byte 15,58,204,193,2 +.byte 15,56,200,213 + pxor %xmm6,%xmm4 +.byte 15,56,201,238 +.byte 15,56,202,231 + + movdqa %xmm0,%xmm1 +.byte 15,58,204,194,2 +.byte 15,56,200,206 + pxor %xmm7,%xmm5 +.byte 15,56,202,236 +.byte 15,56,201,247 + movdqa %xmm0,%xmm2 +.byte 15,58,204,193,2 +.byte 15,56,200,215 + pxor %xmm4,%xmm6 +.byte 15,56,201,252 +.byte 15,56,202,245 + + movdqa %xmm0,%xmm1 +.byte 15,58,204,194,3 +.byte 15,56,200,204 + pxor %xmm5,%xmm7 +.byte 15,56,202,254 + movdqu (%rsi),%xmm4 + movdqa %xmm0,%xmm2 +.byte 15,58,204,193,3 +.byte 15,56,200,213 + movdqu 16(%rsi),%xmm5 +.byte 102,15,56,0,227 + + movdqa %xmm0,%xmm1 +.byte 15,58,204,194,3 +.byte 15,56,200,206 + movdqu 32(%rsi),%xmm6 +.byte 102,15,56,0,235 + + movdqa %xmm0,%xmm2 +.byte 15,58,204,193,3 +.byte 15,56,200,215 + movdqu 48(%rsi),%xmm7 +.byte 102,15,56,0,243 + + movdqa %xmm0,%xmm1 +.byte 15,58,204,194,3 +.byte 65,15,56,200,201 +.byte 102,15,56,0,251 + + paddd %xmm8,%xmm0 + movdqa %xmm1,%xmm9 + + jnz L$oop_shaext + + pshufd $27,%xmm0,%xmm0 + pshufd $27,%xmm1,%xmm1 + movdqu %xmm0,(%rdi) + movd %xmm1,16(%rdi) + .byte 0xf3,0xc3 + + +.p2align 4 +sha1_block_data_order_ssse3: +_ssse3_shortcut: + movq %rsp,%rax + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + leaq -64(%rsp),%rsp + movq %rax,%r14 + andq $-64,%rsp + movq %rdi,%r8 + movq %rsi,%r9 + movq %rdx,%r10 + + shlq $6,%r10 + addq %r9,%r10 + leaq K_XX_XX+64(%rip),%r11 + + movl 0(%r8),%eax + movl 4(%r8),%ebx + movl 8(%r8),%ecx + movl 12(%r8),%edx + movl %ebx,%esi + movl 16(%r8),%ebp + movl %ecx,%edi + xorl %edx,%edi + andl %edi,%esi + + movdqa 64(%r11),%xmm6 + movdqa -64(%r11),%xmm9 + movdqu 0(%r9),%xmm0 + movdqu 16(%r9),%xmm1 + movdqu 32(%r9),%xmm2 + movdqu 48(%r9),%xmm3 +.byte 102,15,56,0,198 +.byte 102,15,56,0,206 +.byte 102,15,56,0,214 + addq $64,%r9 + paddd %xmm9,%xmm0 +.byte 102,15,56,0,222 + paddd %xmm9,%xmm1 + paddd %xmm9,%xmm2 + movdqa %xmm0,0(%rsp) + psubd %xmm9,%xmm0 + movdqa %xmm1,16(%rsp) + psubd %xmm9,%xmm1 + movdqa %xmm2,32(%rsp) + psubd %xmm9,%xmm2 + jmp L$oop_ssse3 +.p2align 4 +L$oop_ssse3: + rorl $2,%ebx + pshufd $238,%xmm0,%xmm4 + xorl %edx,%esi + movdqa %xmm3,%xmm8 + paddd %xmm3,%xmm9 + movl %eax,%edi + addl 0(%rsp),%ebp + punpcklqdq %xmm1,%xmm4 + xorl %ecx,%ebx + roll $5,%eax + addl %esi,%ebp + psrldq $4,%xmm8 + andl %ebx,%edi + xorl %ecx,%ebx + pxor %xmm0,%xmm4 + addl %eax,%ebp + rorl $7,%eax + pxor %xmm2,%xmm8 + xorl %ecx,%edi + movl %ebp,%esi + addl 4(%rsp),%edx + pxor %xmm8,%xmm4 + xorl %ebx,%eax + roll $5,%ebp + movdqa %xmm9,48(%rsp) + addl %edi,%edx + andl %eax,%esi + movdqa %xmm4,%xmm10 + xorl %ebx,%eax + addl %ebp,%edx + rorl $7,%ebp + movdqa %xmm4,%xmm8 + xorl %ebx,%esi + pslldq $12,%xmm10 + paddd %xmm4,%xmm4 + movl %edx,%edi + addl 8(%rsp),%ecx + psrld $31,%xmm8 + xorl %eax,%ebp + roll $5,%edx + addl %esi,%ecx + movdqa %xmm10,%xmm9 + andl %ebp,%edi + xorl %eax,%ebp + psrld $30,%xmm10 + addl %edx,%ecx + rorl $7,%edx + por %xmm8,%xmm4 + xorl %eax,%edi + movl %ecx,%esi + addl 12(%rsp),%ebx + pslld $2,%xmm9 + pxor %xmm10,%xmm4 + xorl %ebp,%edx + movdqa -64(%r11),%xmm10 + roll $5,%ecx + addl %edi,%ebx + andl %edx,%esi + pxor %xmm9,%xmm4 + xorl %ebp,%edx + addl %ecx,%ebx + rorl $7,%ecx + pshufd $238,%xmm1,%xmm5 + xorl %ebp,%esi + movdqa %xmm4,%xmm9 + paddd %xmm4,%xmm10 + movl %ebx,%edi + addl 16(%rsp),%eax + punpcklqdq %xmm2,%xmm5 + xorl %edx,%ecx + roll $5,%ebx + addl %esi,%eax + psrldq $4,%xmm9 + andl %ecx,%edi + xorl %edx,%ecx + pxor %xmm1,%xmm5 + addl %ebx,%eax + rorl $7,%ebx + pxor %xmm3,%xmm9 + xorl %edx,%edi + movl %eax,%esi + addl 20(%rsp),%ebp + pxor %xmm9,%xmm5 + xorl %ecx,%ebx + roll $5,%eax + movdqa %xmm10,0(%rsp) + addl %edi,%ebp + andl %ebx,%esi + movdqa %xmm5,%xmm8 + xorl %ecx,%ebx + addl %eax,%ebp + rorl $7,%eax + movdqa %xmm5,%xmm9 + xorl %ecx,%esi + pslldq $12,%xmm8 + paddd %xmm5,%xmm5 + movl %ebp,%edi + addl 24(%rsp),%edx + psrld $31,%xmm9 + xorl %ebx,%eax + roll $5,%ebp + addl %esi,%edx + movdqa %xmm8,%xmm10 + andl %eax,%edi + xorl %ebx,%eax + psrld $30,%xmm8 + addl %ebp,%edx + rorl $7,%ebp + por %xmm9,%xmm5 + xorl %ebx,%edi + movl %edx,%esi + addl 28(%rsp),%ecx + pslld $2,%xmm10 + pxor %xmm8,%xmm5 + xorl %eax,%ebp + movdqa -32(%r11),%xmm8 + roll $5,%edx + addl %edi,%ecx + andl %ebp,%esi + pxor %xmm10,%xmm5 + xorl %eax,%ebp + addl %edx,%ecx + rorl $7,%edx + pshufd $238,%xmm2,%xmm6 + xorl %eax,%esi + movdqa %xmm5,%xmm10 + paddd %xmm5,%xmm8 + movl %ecx,%edi + addl 32(%rsp),%ebx + punpcklqdq %xmm3,%xmm6 + xorl %ebp,%edx + roll $5,%ecx + addl %esi,%ebx + psrldq $4,%xmm10 + andl %edx,%edi + xorl %ebp,%edx + pxor %xmm2,%xmm6 + addl %ecx,%ebx + rorl $7,%ecx + pxor %xmm4,%xmm10 + xorl %ebp,%edi + movl %ebx,%esi + addl 36(%rsp),%eax + pxor %xmm10,%xmm6 + xorl %edx,%ecx + roll $5,%ebx + movdqa %xmm8,16(%rsp) + addl %edi,%eax + andl %ecx,%esi + movdqa %xmm6,%xmm9 + xorl %edx,%ecx + addl %ebx,%eax + rorl $7,%ebx + movdqa %xmm6,%xmm10 + xorl %edx,%esi + pslldq $12,%xmm9 + paddd %xmm6,%xmm6 + movl %eax,%edi + addl 40(%rsp),%ebp + psrld $31,%xmm10 + xorl %ecx,%ebx + roll $5,%eax + addl %esi,%ebp + movdqa %xmm9,%xmm8 + andl %ebx,%edi + xorl %ecx,%ebx + psrld $30,%xmm9 + addl %eax,%ebp + rorl $7,%eax + por %xmm10,%xmm6 + xorl %ecx,%edi + movl %ebp,%esi + addl 44(%rsp),%edx + pslld $2,%xmm8 + pxor %xmm9,%xmm6 + xorl %ebx,%eax + movdqa -32(%r11),%xmm9 + roll $5,%ebp + addl %edi,%edx + andl %eax,%esi + pxor %xmm8,%xmm6 + xorl %ebx,%eax + addl %ebp,%edx + rorl $7,%ebp + pshufd $238,%xmm3,%xmm7 + xorl %ebx,%esi + movdqa %xmm6,%xmm8 + paddd %xmm6,%xmm9 + movl %edx,%edi + addl 48(%rsp),%ecx + punpcklqdq %xmm4,%xmm7 + xorl %eax,%ebp + roll $5,%edx + addl %esi,%ecx + psrldq $4,%xmm8 + andl %ebp,%edi + xorl %eax,%ebp + pxor %xmm3,%xmm7 + addl %edx,%ecx + rorl $7,%edx + pxor %xmm5,%xmm8 + xorl %eax,%edi + movl %ecx,%esi + addl 52(%rsp),%ebx + pxor %xmm8,%xmm7 + xorl %ebp,%edx + roll $5,%ecx + movdqa %xmm9,32(%rsp) + addl %edi,%ebx + andl %edx,%esi + movdqa %xmm7,%xmm10 + xorl %ebp,%edx + addl %ecx,%ebx + rorl $7,%ecx + movdqa %xmm7,%xmm8 + xorl %ebp,%esi + pslldq $12,%xmm10 + paddd %xmm7,%xmm7 + movl %ebx,%edi + addl 56(%rsp),%eax + psrld $31,%xmm8 + xorl %edx,%ecx + roll $5,%ebx + addl %esi,%eax + movdqa %xmm10,%xmm9 + andl %ecx,%edi + xorl %edx,%ecx + psrld $30,%xmm10 + addl %ebx,%eax + rorl $7,%ebx + por %xmm8,%xmm7 + xorl %edx,%edi + movl %eax,%esi + addl 60(%rsp),%ebp + pslld $2,%xmm9 + pxor %xmm10,%xmm7 + xorl %ecx,%ebx + movdqa -32(%r11),%xmm10 + roll $5,%eax + addl %edi,%ebp + andl %ebx,%esi + pxor %xmm9,%xmm7 + pshufd $238,%xmm6,%xmm9 + xorl %ecx,%ebx + addl %eax,%ebp + rorl $7,%eax + pxor %xmm4,%xmm0 + xorl %ecx,%esi + movl %ebp,%edi + addl 0(%rsp),%edx + punpcklqdq %xmm7,%xmm9 + xorl %ebx,%eax + roll $5,%ebp + pxor %xmm1,%xmm0 + addl %esi,%edx + andl %eax,%edi + movdqa %xmm10,%xmm8 + xorl %ebx,%eax + paddd %xmm7,%xmm10 + addl %ebp,%edx + pxor %xmm9,%xmm0 + rorl $7,%ebp + xorl %ebx,%edi + movl %edx,%esi + addl 4(%rsp),%ecx + movdqa %xmm0,%xmm9 + xorl %eax,%ebp + roll $5,%edx + movdqa %xmm10,48(%rsp) + addl %edi,%ecx + andl %ebp,%esi + xorl %eax,%ebp + pslld $2,%xmm0 + addl %edx,%ecx + rorl $7,%edx + psrld $30,%xmm9 + xorl %eax,%esi + movl %ecx,%edi + addl 8(%rsp),%ebx + por %xmm9,%xmm0 + xorl %ebp,%edx + roll $5,%ecx + pshufd $238,%xmm7,%xmm10 + addl %esi,%ebx + andl %edx,%edi + xorl %ebp,%edx + addl %ecx,%ebx + addl 12(%rsp),%eax + xorl %ebp,%edi + movl %ebx,%esi + roll $5,%ebx + addl %edi,%eax + xorl %edx,%esi + rorl $7,%ecx + addl %ebx,%eax + pxor %xmm5,%xmm1 + addl 16(%rsp),%ebp + xorl %ecx,%esi + punpcklqdq %xmm0,%xmm10 + movl %eax,%edi + roll $5,%eax + pxor %xmm2,%xmm1 + addl %esi,%ebp + xorl %ecx,%edi + movdqa %xmm8,%xmm9 + rorl $7,%ebx + paddd %xmm0,%xmm8 + addl %eax,%ebp + pxor %xmm10,%xmm1 + addl 20(%rsp),%edx + xorl %ebx,%edi + movl %ebp,%esi + roll $5,%ebp + movdqa %xmm1,%xmm10 + addl %edi,%edx + xorl %ebx,%esi + movdqa %xmm8,0(%rsp) + rorl $7,%eax + addl %ebp,%edx + addl 24(%rsp),%ecx + pslld $2,%xmm1 + xorl %eax,%esi + movl %edx,%edi + psrld $30,%xmm10 + roll $5,%edx + addl %esi,%ecx + xorl %eax,%edi + rorl $7,%ebp + por %xmm10,%xmm1 + addl %edx,%ecx + addl 28(%rsp),%ebx + pshufd $238,%xmm0,%xmm8 + xorl %ebp,%edi + movl %ecx,%esi + roll $5,%ecx + addl %edi,%ebx + xorl %ebp,%esi + rorl $7,%edx + addl %ecx,%ebx + pxor %xmm6,%xmm2 + addl 32(%rsp),%eax + xorl %edx,%esi + punpcklqdq %xmm1,%xmm8 + movl %ebx,%edi + roll $5,%ebx + pxor %xmm3,%xmm2 + addl %esi,%eax + xorl %edx,%edi + movdqa 0(%r11),%xmm10 + rorl $7,%ecx + paddd %xmm1,%xmm9 + addl %ebx,%eax + pxor %xmm8,%xmm2 + addl 36(%rsp),%ebp + xorl %ecx,%edi + movl %eax,%esi + roll $5,%eax + movdqa %xmm2,%xmm8 + addl %edi,%ebp + xorl %ecx,%esi + movdqa %xmm9,16(%rsp) + rorl $7,%ebx + addl %eax,%ebp + addl 40(%rsp),%edx + pslld $2,%xmm2 + xorl %ebx,%esi + movl %ebp,%edi + psrld $30,%xmm8 + roll $5,%ebp + addl %esi,%edx + xorl %ebx,%edi + rorl $7,%eax + por %xmm8,%xmm2 + addl %ebp,%edx + addl 44(%rsp),%ecx + pshufd $238,%xmm1,%xmm9 + xorl %eax,%edi + movl %edx,%esi + roll $5,%edx + addl %edi,%ecx + xorl %eax,%esi + rorl $7,%ebp + addl %edx,%ecx + pxor %xmm7,%xmm3 + addl 48(%rsp),%ebx + xorl %ebp,%esi + punpcklqdq %xmm2,%xmm9 + movl %ecx,%edi + roll $5,%ecx + pxor %xmm4,%xmm3 + addl %esi,%ebx + xorl %ebp,%edi + movdqa %xmm10,%xmm8 + rorl $7,%edx + paddd %xmm2,%xmm10 + addl %ecx,%ebx + pxor %xmm9,%xmm3 + addl 52(%rsp),%eax + xorl %edx,%edi + movl %ebx,%esi + roll $5,%ebx + movdqa %xmm3,%xmm9 + addl %edi,%eax + xorl %edx,%esi + movdqa %xmm10,32(%rsp) + rorl $7,%ecx + addl %ebx,%eax + addl 56(%rsp),%ebp + pslld $2,%xmm3 + xorl %ecx,%esi + movl %eax,%edi + psrld $30,%xmm9 + roll $5,%eax + addl %esi,%ebp + xorl %ecx,%edi + rorl $7,%ebx + por %xmm9,%xmm3 + addl %eax,%ebp + addl 60(%rsp),%edx + pshufd $238,%xmm2,%xmm10 + xorl %ebx,%edi + movl %ebp,%esi + roll $5,%ebp + addl %edi,%edx + xorl %ebx,%esi + rorl $7,%eax + addl %ebp,%edx + pxor %xmm0,%xmm4 + addl 0(%rsp),%ecx + xorl %eax,%esi + punpcklqdq %xmm3,%xmm10 + movl %edx,%edi + roll $5,%edx + pxor %xmm5,%xmm4 + addl %esi,%ecx + xorl %eax,%edi + movdqa %xmm8,%xmm9 + rorl $7,%ebp + paddd %xmm3,%xmm8 + addl %edx,%ecx + pxor %xmm10,%xmm4 + addl 4(%rsp),%ebx + xorl %ebp,%edi + movl %ecx,%esi + roll $5,%ecx + movdqa %xmm4,%xmm10 + addl %edi,%ebx + xorl %ebp,%esi + movdqa %xmm8,48(%rsp) + rorl $7,%edx + addl %ecx,%ebx + addl 8(%rsp),%eax + pslld $2,%xmm4 + xorl %edx,%esi + movl %ebx,%edi + psrld $30,%xmm10 + roll $5,%ebx + addl %esi,%eax + xorl %edx,%edi + rorl $7,%ecx + por %xmm10,%xmm4 + addl %ebx,%eax + addl 12(%rsp),%ebp + pshufd $238,%xmm3,%xmm8 + xorl %ecx,%edi + movl %eax,%esi + roll $5,%eax + addl %edi,%ebp + xorl %ecx,%esi + rorl $7,%ebx + addl %eax,%ebp + pxor %xmm1,%xmm5 + addl 16(%rsp),%edx + xorl %ebx,%esi + punpcklqdq %xmm4,%xmm8 + movl %ebp,%edi + roll $5,%ebp + pxor %xmm6,%xmm5 + addl %esi,%edx + xorl %ebx,%edi + movdqa %xmm9,%xmm10 + rorl $7,%eax + paddd %xmm4,%xmm9 + addl %ebp,%edx + pxor %xmm8,%xmm5 + addl 20(%rsp),%ecx + xorl %eax,%edi + movl %edx,%esi + roll $5,%edx + movdqa %xmm5,%xmm8 + addl %edi,%ecx + xorl %eax,%esi + movdqa %xmm9,0(%rsp) + rorl $7,%ebp + addl %edx,%ecx + addl 24(%rsp),%ebx + pslld $2,%xmm5 + xorl %ebp,%esi + movl %ecx,%edi + psrld $30,%xmm8 + roll $5,%ecx + addl %esi,%ebx + xorl %ebp,%edi + rorl $7,%edx + por %xmm8,%xmm5 + addl %ecx,%ebx + addl 28(%rsp),%eax + pshufd $238,%xmm4,%xmm9 + rorl $7,%ecx + movl %ebx,%esi + xorl %edx,%edi + roll $5,%ebx + addl %edi,%eax + xorl %ecx,%esi + xorl %edx,%ecx + addl %ebx,%eax + pxor %xmm2,%xmm6 + addl 32(%rsp),%ebp + andl %ecx,%esi + xorl %edx,%ecx + rorl $7,%ebx + punpcklqdq %xmm5,%xmm9 + movl %eax,%edi + xorl %ecx,%esi + pxor %xmm7,%xmm6 + roll $5,%eax + addl %esi,%ebp + movdqa %xmm10,%xmm8 + xorl %ebx,%edi + paddd %xmm5,%xmm10 + xorl %ecx,%ebx + pxor %xmm9,%xmm6 + addl %eax,%ebp + addl 36(%rsp),%edx + andl %ebx,%edi + xorl %ecx,%ebx + rorl $7,%eax + movdqa %xmm6,%xmm9 + movl %ebp,%esi + xorl %ebx,%edi + movdqa %xmm10,16(%rsp) + roll $5,%ebp + addl %edi,%edx + xorl %eax,%esi + pslld $2,%xmm6 + xorl %ebx,%eax + addl %ebp,%edx + psrld $30,%xmm9 + addl 40(%rsp),%ecx + andl %eax,%esi + xorl %ebx,%eax + por %xmm9,%xmm6 + rorl $7,%ebp + movl %edx,%edi + xorl %eax,%esi + roll $5,%edx + pshufd $238,%xmm5,%xmm10 + addl %esi,%ecx + xorl %ebp,%edi + xorl %eax,%ebp + addl %edx,%ecx + addl 44(%rsp),%ebx + andl %ebp,%edi + xorl %eax,%ebp + rorl $7,%edx + movl %ecx,%esi + xorl %ebp,%edi + roll $5,%ecx + addl %edi,%ebx + xorl %edx,%esi + xorl %ebp,%edx + addl %ecx,%ebx + pxor %xmm3,%xmm7 + addl 48(%rsp),%eax + andl %edx,%esi + xorl %ebp,%edx + rorl $7,%ecx + punpcklqdq %xmm6,%xmm10 + movl %ebx,%edi + xorl %edx,%esi + pxor %xmm0,%xmm7 + roll $5,%ebx + addl %esi,%eax + movdqa 32(%r11),%xmm9 + xorl %ecx,%edi + paddd %xmm6,%xmm8 + xorl %edx,%ecx + pxor %xmm10,%xmm7 + addl %ebx,%eax + addl 52(%rsp),%ebp + andl %ecx,%edi + xorl %edx,%ecx + rorl $7,%ebx + movdqa %xmm7,%xmm10 + movl %eax,%esi + xorl %ecx,%edi + movdqa %xmm8,32(%rsp) + roll $5,%eax + addl %edi,%ebp + xorl %ebx,%esi + pslld $2,%xmm7 + xorl %ecx,%ebx + addl %eax,%ebp + psrld $30,%xmm10 + addl 56(%rsp),%edx + andl %ebx,%esi + xorl %ecx,%ebx + por %xmm10,%xmm7 + rorl $7,%eax + movl %ebp,%edi + xorl %ebx,%esi + roll $5,%ebp + pshufd $238,%xmm6,%xmm8 + addl %esi,%edx + xorl %eax,%edi + xorl %ebx,%eax + addl %ebp,%edx + addl 60(%rsp),%ecx + andl %eax,%edi + xorl %ebx,%eax + rorl $7,%ebp + movl %edx,%esi + xorl %eax,%edi + roll $5,%edx + addl %edi,%ecx + xorl %ebp,%esi + xorl %eax,%ebp + addl %edx,%ecx + pxor %xmm4,%xmm0 + addl 0(%rsp),%ebx + andl %ebp,%esi + xorl %eax,%ebp + rorl $7,%edx + punpcklqdq %xmm7,%xmm8 + movl %ecx,%edi + xorl %ebp,%esi + pxor %xmm1,%xmm0 + roll $5,%ecx + addl %esi,%ebx + movdqa %xmm9,%xmm10 + xorl %edx,%edi + paddd %xmm7,%xmm9 + xorl %ebp,%edx + pxor %xmm8,%xmm0 + addl %ecx,%ebx + addl 4(%rsp),%eax + andl %edx,%edi + xorl %ebp,%edx + rorl $7,%ecx + movdqa %xmm0,%xmm8 + movl %ebx,%esi + xorl %edx,%edi + movdqa %xmm9,48(%rsp) + roll $5,%ebx + addl %edi,%eax + xorl %ecx,%esi + pslld $2,%xmm0 + xorl %edx,%ecx + addl %ebx,%eax + psrld $30,%xmm8 + addl 8(%rsp),%ebp + andl %ecx,%esi + xorl %edx,%ecx + por %xmm8,%xmm0 + rorl $7,%ebx + movl %eax,%edi + xorl %ecx,%esi + roll $5,%eax + pshufd $238,%xmm7,%xmm9 + addl %esi,%ebp + xorl %ebx,%edi + xorl %ecx,%ebx + addl %eax,%ebp + addl 12(%rsp),%edx + andl %ebx,%edi + xorl %ecx,%ebx + rorl $7,%eax + movl %ebp,%esi + xorl %ebx,%edi + roll $5,%ebp + addl %edi,%edx + xorl %eax,%esi + xorl %ebx,%eax + addl %ebp,%edx + pxor %xmm5,%xmm1 + addl 16(%rsp),%ecx + andl %eax,%esi + xorl %ebx,%eax + rorl $7,%ebp + punpcklqdq %xmm0,%xmm9 + movl %edx,%edi + xorl %eax,%esi + pxor %xmm2,%xmm1 + roll $5,%edx + addl %esi,%ecx + movdqa %xmm10,%xmm8 + xorl %ebp,%edi + paddd %xmm0,%xmm10 + xorl %eax,%ebp + pxor %xmm9,%xmm1 + addl %edx,%ecx + addl 20(%rsp),%ebx + andl %ebp,%edi + xorl %eax,%ebp + rorl $7,%edx + movdqa %xmm1,%xmm9 + movl %ecx,%esi + xorl %ebp,%edi + movdqa %xmm10,0(%rsp) + roll $5,%ecx + addl %edi,%ebx + xorl %edx,%esi + pslld $2,%xmm1 + xorl %ebp,%edx + addl %ecx,%ebx + psrld $30,%xmm9 + addl 24(%rsp),%eax + andl %edx,%esi + xorl %ebp,%edx + por %xmm9,%xmm1 + rorl $7,%ecx + movl %ebx,%edi + xorl %edx,%esi + roll $5,%ebx + pshufd $238,%xmm0,%xmm10 + addl %esi,%eax + xorl %ecx,%edi + xorl %edx,%ecx + addl %ebx,%eax + addl 28(%rsp),%ebp + andl %ecx,%edi + xorl %edx,%ecx + rorl $7,%ebx + movl %eax,%esi + xorl %ecx,%edi + roll $5,%eax + addl %edi,%ebp + xorl %ebx,%esi + xorl %ecx,%ebx + addl %eax,%ebp + pxor %xmm6,%xmm2 + addl 32(%rsp),%edx + andl %ebx,%esi + xorl %ecx,%ebx + rorl $7,%eax + punpcklqdq %xmm1,%xmm10 + movl %ebp,%edi + xorl %ebx,%esi + pxor %xmm3,%xmm2 + roll $5,%ebp + addl %esi,%edx + movdqa %xmm8,%xmm9 + xorl %eax,%edi + paddd %xmm1,%xmm8 + xorl %ebx,%eax + pxor %xmm10,%xmm2 + addl %ebp,%edx + addl 36(%rsp),%ecx + andl %eax,%edi + xorl %ebx,%eax + rorl $7,%ebp + movdqa %xmm2,%xmm10 + movl %edx,%esi + xorl %eax,%edi + movdqa %xmm8,16(%rsp) + roll $5,%edx + addl %edi,%ecx + xorl %ebp,%esi + pslld $2,%xmm2 + xorl %eax,%ebp + addl %edx,%ecx + psrld $30,%xmm10 + addl 40(%rsp),%ebx + andl %ebp,%esi + xorl %eax,%ebp + por %xmm10,%xmm2 + rorl $7,%edx + movl %ecx,%edi + xorl %ebp,%esi + roll $5,%ecx + pshufd $238,%xmm1,%xmm8 + addl %esi,%ebx + xorl %edx,%edi + xorl %ebp,%edx + addl %ecx,%ebx + addl 44(%rsp),%eax + andl %edx,%edi + xorl %ebp,%edx + rorl $7,%ecx + movl %ebx,%esi + xorl %edx,%edi + roll $5,%ebx + addl %edi,%eax + xorl %edx,%esi + addl %ebx,%eax + pxor %xmm7,%xmm3 + addl 48(%rsp),%ebp + xorl %ecx,%esi + punpcklqdq %xmm2,%xmm8 + movl %eax,%edi + roll $5,%eax + pxor %xmm4,%xmm3 + addl %esi,%ebp + xorl %ecx,%edi + movdqa %xmm9,%xmm10 + rorl $7,%ebx + paddd %xmm2,%xmm9 + addl %eax,%ebp + pxor %xmm8,%xmm3 + addl 52(%rsp),%edx + xorl %ebx,%edi + movl %ebp,%esi + roll $5,%ebp + movdqa %xmm3,%xmm8 + addl %edi,%edx + xorl %ebx,%esi + movdqa %xmm9,32(%rsp) + rorl $7,%eax + addl %ebp,%edx + addl 56(%rsp),%ecx + pslld $2,%xmm3 + xorl %eax,%esi + movl %edx,%edi + psrld $30,%xmm8 + roll $5,%edx + addl %esi,%ecx + xorl %eax,%edi + rorl $7,%ebp + por %xmm8,%xmm3 + addl %edx,%ecx + addl 60(%rsp),%ebx + xorl %ebp,%edi + movl %ecx,%esi + roll $5,%ecx + addl %edi,%ebx + xorl %ebp,%esi + rorl $7,%edx + addl %ecx,%ebx + addl 0(%rsp),%eax + xorl %edx,%esi + movl %ebx,%edi + roll $5,%ebx + paddd %xmm3,%xmm10 + addl %esi,%eax + xorl %edx,%edi + movdqa %xmm10,48(%rsp) + rorl $7,%ecx + addl %ebx,%eax + addl 4(%rsp),%ebp + xorl %ecx,%edi + movl %eax,%esi + roll $5,%eax + addl %edi,%ebp + xorl %ecx,%esi + rorl $7,%ebx + addl %eax,%ebp + addl 8(%rsp),%edx + xorl %ebx,%esi + movl %ebp,%edi + roll $5,%ebp + addl %esi,%edx + xorl %ebx,%edi + rorl $7,%eax + addl %ebp,%edx + addl 12(%rsp),%ecx + xorl %eax,%edi + movl %edx,%esi + roll $5,%edx + addl %edi,%ecx + xorl %eax,%esi + rorl $7,%ebp + addl %edx,%ecx + cmpq %r10,%r9 + je L$done_ssse3 + movdqa 64(%r11),%xmm6 + movdqa -64(%r11),%xmm9 + movdqu 0(%r9),%xmm0 + movdqu 16(%r9),%xmm1 + movdqu 32(%r9),%xmm2 + movdqu 48(%r9),%xmm3 +.byte 102,15,56,0,198 + addq $64,%r9 + addl 16(%rsp),%ebx + xorl %ebp,%esi + movl %ecx,%edi +.byte 102,15,56,0,206 + roll $5,%ecx + addl %esi,%ebx + xorl %ebp,%edi + rorl $7,%edx + paddd %xmm9,%xmm0 + addl %ecx,%ebx + addl 20(%rsp),%eax + xorl %edx,%edi + movl %ebx,%esi + movdqa %xmm0,0(%rsp) + roll $5,%ebx + addl %edi,%eax + xorl %edx,%esi + rorl $7,%ecx + psubd %xmm9,%xmm0 + addl %ebx,%eax + addl 24(%rsp),%ebp + xorl %ecx,%esi + movl %eax,%edi + roll $5,%eax + addl %esi,%ebp + xorl %ecx,%edi + rorl $7,%ebx + addl %eax,%ebp + addl 28(%rsp),%edx + xorl %ebx,%edi + movl %ebp,%esi + roll $5,%ebp + addl %edi,%edx + xorl %ebx,%esi + rorl $7,%eax + addl %ebp,%edx + addl 32(%rsp),%ecx + xorl %eax,%esi + movl %edx,%edi +.byte 102,15,56,0,214 + roll $5,%edx + addl %esi,%ecx + xorl %eax,%edi + rorl $7,%ebp + paddd %xmm9,%xmm1 + addl %edx,%ecx + addl 36(%rsp),%ebx + xorl %ebp,%edi + movl %ecx,%esi + movdqa %xmm1,16(%rsp) + roll $5,%ecx + addl %edi,%ebx + xorl %ebp,%esi + rorl $7,%edx + psubd %xmm9,%xmm1 + addl %ecx,%ebx + addl 40(%rsp),%eax + xorl %edx,%esi + movl %ebx,%edi + roll $5,%ebx + addl %esi,%eax + xorl %edx,%edi + rorl $7,%ecx + addl %ebx,%eax + addl 44(%rsp),%ebp + xorl %ecx,%edi + movl %eax,%esi + roll $5,%eax + addl %edi,%ebp + xorl %ecx,%esi + rorl $7,%ebx + addl %eax,%ebp + addl 48(%rsp),%edx + xorl %ebx,%esi + movl %ebp,%edi +.byte 102,15,56,0,222 + roll $5,%ebp + addl %esi,%edx + xorl %ebx,%edi + rorl $7,%eax + paddd %xmm9,%xmm2 + addl %ebp,%edx + addl 52(%rsp),%ecx + xorl %eax,%edi + movl %edx,%esi + movdqa %xmm2,32(%rsp) + roll $5,%edx + addl %edi,%ecx + xorl %eax,%esi + rorl $7,%ebp + psubd %xmm9,%xmm2 + addl %edx,%ecx + addl 56(%rsp),%ebx + xorl %ebp,%esi + movl %ecx,%edi + roll $5,%ecx + addl %esi,%ebx + xorl %ebp,%edi + rorl $7,%edx + addl %ecx,%ebx + addl 60(%rsp),%eax + xorl %edx,%edi + movl %ebx,%esi + roll $5,%ebx + addl %edi,%eax + rorl $7,%ecx + addl %ebx,%eax + addl 0(%r8),%eax + addl 4(%r8),%esi + addl 8(%r8),%ecx + addl 12(%r8),%edx + movl %eax,0(%r8) + addl 16(%r8),%ebp + movl %esi,4(%r8) + movl %esi,%ebx + movl %ecx,8(%r8) + movl %ecx,%edi + movl %edx,12(%r8) + xorl %edx,%edi + movl %ebp,16(%r8) + andl %edi,%esi + jmp L$oop_ssse3 + +.p2align 4 +L$done_ssse3: + addl 16(%rsp),%ebx + xorl %ebp,%esi + movl %ecx,%edi + roll $5,%ecx + addl %esi,%ebx + xorl %ebp,%edi + rorl $7,%edx + addl %ecx,%ebx + addl 20(%rsp),%eax + xorl %edx,%edi + movl %ebx,%esi + roll $5,%ebx + addl %edi,%eax + xorl %edx,%esi + rorl $7,%ecx + addl %ebx,%eax + addl 24(%rsp),%ebp + xorl %ecx,%esi + movl %eax,%edi + roll $5,%eax + addl %esi,%ebp + xorl %ecx,%edi + rorl $7,%ebx + addl %eax,%ebp + addl 28(%rsp),%edx + xorl %ebx,%edi + movl %ebp,%esi + roll $5,%ebp + addl %edi,%edx + xorl %ebx,%esi + rorl $7,%eax + addl %ebp,%edx + addl 32(%rsp),%ecx + xorl %eax,%esi + movl %edx,%edi + roll $5,%edx + addl %esi,%ecx + xorl %eax,%edi + rorl $7,%ebp + addl %edx,%ecx + addl 36(%rsp),%ebx + xorl %ebp,%edi + movl %ecx,%esi + roll $5,%ecx + addl %edi,%ebx + xorl %ebp,%esi + rorl $7,%edx + addl %ecx,%ebx + addl 40(%rsp),%eax + xorl %edx,%esi + movl %ebx,%edi + roll $5,%ebx + addl %esi,%eax + xorl %edx,%edi + rorl $7,%ecx + addl %ebx,%eax + addl 44(%rsp),%ebp + xorl %ecx,%edi + movl %eax,%esi + roll $5,%eax + addl %edi,%ebp + xorl %ecx,%esi + rorl $7,%ebx + addl %eax,%ebp + addl 48(%rsp),%edx + xorl %ebx,%esi + movl %ebp,%edi + roll $5,%ebp + addl %esi,%edx + xorl %ebx,%edi + rorl $7,%eax + addl %ebp,%edx + addl 52(%rsp),%ecx + xorl %eax,%edi + movl %edx,%esi + roll $5,%edx + addl %edi,%ecx + xorl %eax,%esi + rorl $7,%ebp + addl %edx,%ecx + addl 56(%rsp),%ebx + xorl %ebp,%esi + movl %ecx,%edi + roll $5,%ecx + addl %esi,%ebx + xorl %ebp,%edi + rorl $7,%edx + addl %ecx,%ebx + addl 60(%rsp),%eax + xorl %edx,%edi + movl %ebx,%esi + roll $5,%ebx + addl %edi,%eax + rorl $7,%ecx + addl %ebx,%eax + addl 0(%r8),%eax + addl 4(%r8),%esi + addl 8(%r8),%ecx + movl %eax,0(%r8) + addl 12(%r8),%edx + movl %esi,4(%r8) + addl 16(%r8),%ebp + movl %ecx,8(%r8) + movl %edx,12(%r8) + movl %ebp,16(%r8) + leaq (%r14),%rsi + movq -40(%rsi),%r14 + movq -32(%rsi),%r13 + movq -24(%rsi),%r12 + movq -16(%rsi),%rbp + movq -8(%rsi),%rbx + leaq (%rsi),%rsp +L$epilogue_ssse3: + .byte 0xf3,0xc3 + +.p2align 6 +K_XX_XX: +.long 0x5a827999,0x5a827999,0x5a827999,0x5a827999 +.long 0x5a827999,0x5a827999,0x5a827999,0x5a827999 +.long 0x6ed9eba1,0x6ed9eba1,0x6ed9eba1,0x6ed9eba1 +.long 0x6ed9eba1,0x6ed9eba1,0x6ed9eba1,0x6ed9eba1 +.long 0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc +.long 0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc +.long 0xca62c1d6,0xca62c1d6,0xca62c1d6,0xca62c1d6 +.long 0xca62c1d6,0xca62c1d6,0xca62c1d6,0xca62c1d6 +.long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f +.long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f +.byte 0xf,0xe,0xd,0xc,0xb,0xa,0x9,0x8,0x7,0x6,0x5,0x4,0x3,0x2,0x1,0x0 +.byte 83,72,65,49,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 +.p2align 6 diff --git a/deps/openssl/asm_obsolete/x64-macosx-gas/sha/sha256-mb-x86_64.s b/deps/openssl/asm_obsolete/x64-macosx-gas/sha/sha256-mb-x86_64.s new file mode 100644 index 00000000000000..5ad4c7bb100a2f --- /dev/null +++ b/deps/openssl/asm_obsolete/x64-macosx-gas/sha/sha256-mb-x86_64.s @@ -0,0 +1,3258 @@ +.text + + + +.globl _sha256_multi_block + +.p2align 5 +_sha256_multi_block: + movq _OPENSSL_ia32cap_P+4(%rip),%rcx + btq $61,%rcx + jc _shaext_shortcut + movq %rsp,%rax + pushq %rbx + pushq %rbp + subq $288,%rsp + andq $-256,%rsp + movq %rax,272(%rsp) +L$body: + leaq K256+128(%rip),%rbp + leaq 256(%rsp),%rbx + leaq 128(%rdi),%rdi + +L$oop_grande: + movl %edx,280(%rsp) + xorl %edx,%edx + movq 0(%rsi),%r8 + movl 8(%rsi),%ecx + cmpl %edx,%ecx + cmovgl %ecx,%edx + testl %ecx,%ecx + movl %ecx,0(%rbx) + cmovleq %rbp,%r8 + movq 16(%rsi),%r9 + movl 24(%rsi),%ecx + cmpl %edx,%ecx + cmovgl %ecx,%edx + testl %ecx,%ecx + movl %ecx,4(%rbx) + cmovleq %rbp,%r9 + movq 32(%rsi),%r10 + movl 40(%rsi),%ecx + cmpl %edx,%ecx + cmovgl %ecx,%edx + testl %ecx,%ecx + movl %ecx,8(%rbx) + cmovleq %rbp,%r10 + movq 48(%rsi),%r11 + movl 56(%rsi),%ecx + cmpl %edx,%ecx + cmovgl %ecx,%edx + testl %ecx,%ecx + movl %ecx,12(%rbx) + cmovleq %rbp,%r11 + testl %edx,%edx + jz L$done + + movdqu 0-128(%rdi),%xmm8 + leaq 128(%rsp),%rax + movdqu 32-128(%rdi),%xmm9 + movdqu 64-128(%rdi),%xmm10 + movdqu 96-128(%rdi),%xmm11 + movdqu 128-128(%rdi),%xmm12 + movdqu 160-128(%rdi),%xmm13 + movdqu 192-128(%rdi),%xmm14 + movdqu 224-128(%rdi),%xmm15 + movdqu L$pbswap(%rip),%xmm6 + jmp L$oop + +.p2align 5 +L$oop: + movdqa %xmm10,%xmm4 + pxor %xmm9,%xmm4 + movd 0(%r8),%xmm5 + movd 0(%r9),%xmm0 + movd 0(%r10),%xmm1 + movd 0(%r11),%xmm2 + punpckldq %xmm1,%xmm5 + punpckldq %xmm2,%xmm0 + punpckldq %xmm0,%xmm5 + movdqa %xmm12,%xmm7 +.byte 102,15,56,0,238 + movdqa %xmm12,%xmm2 + + psrld $6,%xmm7 + movdqa %xmm12,%xmm1 + pslld $7,%xmm2 + movdqa %xmm5,0-128(%rax) + paddd %xmm15,%xmm5 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd -128(%rbp),%xmm5 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm12,%xmm0 + + pxor %xmm2,%xmm7 + movdqa %xmm12,%xmm3 + pslld $26-21,%xmm2 + pandn %xmm14,%xmm0 + pand %xmm13,%xmm3 + pxor %xmm1,%xmm7 + + + movdqa %xmm8,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm8,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm3,%xmm0 + movdqa %xmm9,%xmm3 + movdqa %xmm8,%xmm7 + pslld $10,%xmm2 + pxor %xmm8,%xmm3 + + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm5 + pslld $19-10,%xmm2 + pand %xmm3,%xmm4 + pxor %xmm7,%xmm1 + + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm9,%xmm15 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm4,%xmm15 + paddd %xmm5,%xmm11 + pxor %xmm2,%xmm7 + + paddd %xmm5,%xmm15 + paddd %xmm7,%xmm15 + movd 4(%r8),%xmm5 + movd 4(%r9),%xmm0 + movd 4(%r10),%xmm1 + movd 4(%r11),%xmm2 + punpckldq %xmm1,%xmm5 + punpckldq %xmm2,%xmm0 + punpckldq %xmm0,%xmm5 + movdqa %xmm11,%xmm7 + + movdqa %xmm11,%xmm2 +.byte 102,15,56,0,238 + psrld $6,%xmm7 + movdqa %xmm11,%xmm1 + pslld $7,%xmm2 + movdqa %xmm5,16-128(%rax) + paddd %xmm14,%xmm5 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd -96(%rbp),%xmm5 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm11,%xmm0 + + pxor %xmm2,%xmm7 + movdqa %xmm11,%xmm4 + pslld $26-21,%xmm2 + pandn %xmm13,%xmm0 + pand %xmm12,%xmm4 + pxor %xmm1,%xmm7 + + + movdqa %xmm15,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm15,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm4,%xmm0 + movdqa %xmm8,%xmm4 + movdqa %xmm15,%xmm7 + pslld $10,%xmm2 + pxor %xmm15,%xmm4 + + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm5 + pslld $19-10,%xmm2 + pand %xmm4,%xmm3 + pxor %xmm7,%xmm1 + + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm8,%xmm14 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm3,%xmm14 + paddd %xmm5,%xmm10 + pxor %xmm2,%xmm7 + + paddd %xmm5,%xmm14 + paddd %xmm7,%xmm14 + movd 8(%r8),%xmm5 + movd 8(%r9),%xmm0 + movd 8(%r10),%xmm1 + movd 8(%r11),%xmm2 + punpckldq %xmm1,%xmm5 + punpckldq %xmm2,%xmm0 + punpckldq %xmm0,%xmm5 + movdqa %xmm10,%xmm7 +.byte 102,15,56,0,238 + movdqa %xmm10,%xmm2 + + psrld $6,%xmm7 + movdqa %xmm10,%xmm1 + pslld $7,%xmm2 + movdqa %xmm5,32-128(%rax) + paddd %xmm13,%xmm5 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd -64(%rbp),%xmm5 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm10,%xmm0 + + pxor %xmm2,%xmm7 + movdqa %xmm10,%xmm3 + pslld $26-21,%xmm2 + pandn %xmm12,%xmm0 + pand %xmm11,%xmm3 + pxor %xmm1,%xmm7 + + + movdqa %xmm14,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm14,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm3,%xmm0 + movdqa %xmm15,%xmm3 + movdqa %xmm14,%xmm7 + pslld $10,%xmm2 + pxor %xmm14,%xmm3 + + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm5 + pslld $19-10,%xmm2 + pand %xmm3,%xmm4 + pxor %xmm7,%xmm1 + + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm15,%xmm13 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm4,%xmm13 + paddd %xmm5,%xmm9 + pxor %xmm2,%xmm7 + + paddd %xmm5,%xmm13 + paddd %xmm7,%xmm13 + movd 12(%r8),%xmm5 + movd 12(%r9),%xmm0 + movd 12(%r10),%xmm1 + movd 12(%r11),%xmm2 + punpckldq %xmm1,%xmm5 + punpckldq %xmm2,%xmm0 + punpckldq %xmm0,%xmm5 + movdqa %xmm9,%xmm7 + + movdqa %xmm9,%xmm2 +.byte 102,15,56,0,238 + psrld $6,%xmm7 + movdqa %xmm9,%xmm1 + pslld $7,%xmm2 + movdqa %xmm5,48-128(%rax) + paddd %xmm12,%xmm5 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd -32(%rbp),%xmm5 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm9,%xmm0 + + pxor %xmm2,%xmm7 + movdqa %xmm9,%xmm4 + pslld $26-21,%xmm2 + pandn %xmm11,%xmm0 + pand %xmm10,%xmm4 + pxor %xmm1,%xmm7 + + + movdqa %xmm13,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm13,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm4,%xmm0 + movdqa %xmm14,%xmm4 + movdqa %xmm13,%xmm7 + pslld $10,%xmm2 + pxor %xmm13,%xmm4 + + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm5 + pslld $19-10,%xmm2 + pand %xmm4,%xmm3 + pxor %xmm7,%xmm1 + + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm14,%xmm12 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm3,%xmm12 + paddd %xmm5,%xmm8 + pxor %xmm2,%xmm7 + + paddd %xmm5,%xmm12 + paddd %xmm7,%xmm12 + movd 16(%r8),%xmm5 + movd 16(%r9),%xmm0 + movd 16(%r10),%xmm1 + movd 16(%r11),%xmm2 + punpckldq %xmm1,%xmm5 + punpckldq %xmm2,%xmm0 + punpckldq %xmm0,%xmm5 + movdqa %xmm8,%xmm7 +.byte 102,15,56,0,238 + movdqa %xmm8,%xmm2 + + psrld $6,%xmm7 + movdqa %xmm8,%xmm1 + pslld $7,%xmm2 + movdqa %xmm5,64-128(%rax) + paddd %xmm11,%xmm5 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd 0(%rbp),%xmm5 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm8,%xmm0 + + pxor %xmm2,%xmm7 + movdqa %xmm8,%xmm3 + pslld $26-21,%xmm2 + pandn %xmm10,%xmm0 + pand %xmm9,%xmm3 + pxor %xmm1,%xmm7 + + + movdqa %xmm12,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm12,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm3,%xmm0 + movdqa %xmm13,%xmm3 + movdqa %xmm12,%xmm7 + pslld $10,%xmm2 + pxor %xmm12,%xmm3 + + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm5 + pslld $19-10,%xmm2 + pand %xmm3,%xmm4 + pxor %xmm7,%xmm1 + + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm13,%xmm11 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm4,%xmm11 + paddd %xmm5,%xmm15 + pxor %xmm2,%xmm7 + + paddd %xmm5,%xmm11 + paddd %xmm7,%xmm11 + movd 20(%r8),%xmm5 + movd 20(%r9),%xmm0 + movd 20(%r10),%xmm1 + movd 20(%r11),%xmm2 + punpckldq %xmm1,%xmm5 + punpckldq %xmm2,%xmm0 + punpckldq %xmm0,%xmm5 + movdqa %xmm15,%xmm7 + + movdqa %xmm15,%xmm2 +.byte 102,15,56,0,238 + psrld $6,%xmm7 + movdqa %xmm15,%xmm1 + pslld $7,%xmm2 + movdqa %xmm5,80-128(%rax) + paddd %xmm10,%xmm5 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd 32(%rbp),%xmm5 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm15,%xmm0 + + pxor %xmm2,%xmm7 + movdqa %xmm15,%xmm4 + pslld $26-21,%xmm2 + pandn %xmm9,%xmm0 + pand %xmm8,%xmm4 + pxor %xmm1,%xmm7 + + + movdqa %xmm11,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm11,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm4,%xmm0 + movdqa %xmm12,%xmm4 + movdqa %xmm11,%xmm7 + pslld $10,%xmm2 + pxor %xmm11,%xmm4 + + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm5 + pslld $19-10,%xmm2 + pand %xmm4,%xmm3 + pxor %xmm7,%xmm1 + + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm12,%xmm10 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm3,%xmm10 + paddd %xmm5,%xmm14 + pxor %xmm2,%xmm7 + + paddd %xmm5,%xmm10 + paddd %xmm7,%xmm10 + movd 24(%r8),%xmm5 + movd 24(%r9),%xmm0 + movd 24(%r10),%xmm1 + movd 24(%r11),%xmm2 + punpckldq %xmm1,%xmm5 + punpckldq %xmm2,%xmm0 + punpckldq %xmm0,%xmm5 + movdqa %xmm14,%xmm7 +.byte 102,15,56,0,238 + movdqa %xmm14,%xmm2 + + psrld $6,%xmm7 + movdqa %xmm14,%xmm1 + pslld $7,%xmm2 + movdqa %xmm5,96-128(%rax) + paddd %xmm9,%xmm5 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd 64(%rbp),%xmm5 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm14,%xmm0 + + pxor %xmm2,%xmm7 + movdqa %xmm14,%xmm3 + pslld $26-21,%xmm2 + pandn %xmm8,%xmm0 + pand %xmm15,%xmm3 + pxor %xmm1,%xmm7 + + + movdqa %xmm10,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm10,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm3,%xmm0 + movdqa %xmm11,%xmm3 + movdqa %xmm10,%xmm7 + pslld $10,%xmm2 + pxor %xmm10,%xmm3 + + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm5 + pslld $19-10,%xmm2 + pand %xmm3,%xmm4 + pxor %xmm7,%xmm1 + + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm11,%xmm9 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm4,%xmm9 + paddd %xmm5,%xmm13 + pxor %xmm2,%xmm7 + + paddd %xmm5,%xmm9 + paddd %xmm7,%xmm9 + movd 28(%r8),%xmm5 + movd 28(%r9),%xmm0 + movd 28(%r10),%xmm1 + movd 28(%r11),%xmm2 + punpckldq %xmm1,%xmm5 + punpckldq %xmm2,%xmm0 + punpckldq %xmm0,%xmm5 + movdqa %xmm13,%xmm7 + + movdqa %xmm13,%xmm2 +.byte 102,15,56,0,238 + psrld $6,%xmm7 + movdqa %xmm13,%xmm1 + pslld $7,%xmm2 + movdqa %xmm5,112-128(%rax) + paddd %xmm8,%xmm5 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd 96(%rbp),%xmm5 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm13,%xmm0 + + pxor %xmm2,%xmm7 + movdqa %xmm13,%xmm4 + pslld $26-21,%xmm2 + pandn %xmm15,%xmm0 + pand %xmm14,%xmm4 + pxor %xmm1,%xmm7 + + + movdqa %xmm9,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm9,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm4,%xmm0 + movdqa %xmm10,%xmm4 + movdqa %xmm9,%xmm7 + pslld $10,%xmm2 + pxor %xmm9,%xmm4 + + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm5 + pslld $19-10,%xmm2 + pand %xmm4,%xmm3 + pxor %xmm7,%xmm1 + + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm10,%xmm8 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm3,%xmm8 + paddd %xmm5,%xmm12 + pxor %xmm2,%xmm7 + + paddd %xmm5,%xmm8 + paddd %xmm7,%xmm8 + leaq 256(%rbp),%rbp + movd 32(%r8),%xmm5 + movd 32(%r9),%xmm0 + movd 32(%r10),%xmm1 + movd 32(%r11),%xmm2 + punpckldq %xmm1,%xmm5 + punpckldq %xmm2,%xmm0 + punpckldq %xmm0,%xmm5 + movdqa %xmm12,%xmm7 +.byte 102,15,56,0,238 + movdqa %xmm12,%xmm2 + + psrld $6,%xmm7 + movdqa %xmm12,%xmm1 + pslld $7,%xmm2 + movdqa %xmm5,128-128(%rax) + paddd %xmm15,%xmm5 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd -128(%rbp),%xmm5 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm12,%xmm0 + + pxor %xmm2,%xmm7 + movdqa %xmm12,%xmm3 + pslld $26-21,%xmm2 + pandn %xmm14,%xmm0 + pand %xmm13,%xmm3 + pxor %xmm1,%xmm7 + + + movdqa %xmm8,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm8,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm3,%xmm0 + movdqa %xmm9,%xmm3 + movdqa %xmm8,%xmm7 + pslld $10,%xmm2 + pxor %xmm8,%xmm3 + + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm5 + pslld $19-10,%xmm2 + pand %xmm3,%xmm4 + pxor %xmm7,%xmm1 + + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm9,%xmm15 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm4,%xmm15 + paddd %xmm5,%xmm11 + pxor %xmm2,%xmm7 + + paddd %xmm5,%xmm15 + paddd %xmm7,%xmm15 + movd 36(%r8),%xmm5 + movd 36(%r9),%xmm0 + movd 36(%r10),%xmm1 + movd 36(%r11),%xmm2 + punpckldq %xmm1,%xmm5 + punpckldq %xmm2,%xmm0 + punpckldq %xmm0,%xmm5 + movdqa %xmm11,%xmm7 + + movdqa %xmm11,%xmm2 +.byte 102,15,56,0,238 + psrld $6,%xmm7 + movdqa %xmm11,%xmm1 + pslld $7,%xmm2 + movdqa %xmm5,144-128(%rax) + paddd %xmm14,%xmm5 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd -96(%rbp),%xmm5 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm11,%xmm0 + + pxor %xmm2,%xmm7 + movdqa %xmm11,%xmm4 + pslld $26-21,%xmm2 + pandn %xmm13,%xmm0 + pand %xmm12,%xmm4 + pxor %xmm1,%xmm7 + + + movdqa %xmm15,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm15,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm4,%xmm0 + movdqa %xmm8,%xmm4 + movdqa %xmm15,%xmm7 + pslld $10,%xmm2 + pxor %xmm15,%xmm4 + + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm5 + pslld $19-10,%xmm2 + pand %xmm4,%xmm3 + pxor %xmm7,%xmm1 + + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm8,%xmm14 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm3,%xmm14 + paddd %xmm5,%xmm10 + pxor %xmm2,%xmm7 + + paddd %xmm5,%xmm14 + paddd %xmm7,%xmm14 + movd 40(%r8),%xmm5 + movd 40(%r9),%xmm0 + movd 40(%r10),%xmm1 + movd 40(%r11),%xmm2 + punpckldq %xmm1,%xmm5 + punpckldq %xmm2,%xmm0 + punpckldq %xmm0,%xmm5 + movdqa %xmm10,%xmm7 +.byte 102,15,56,0,238 + movdqa %xmm10,%xmm2 + + psrld $6,%xmm7 + movdqa %xmm10,%xmm1 + pslld $7,%xmm2 + movdqa %xmm5,160-128(%rax) + paddd %xmm13,%xmm5 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd -64(%rbp),%xmm5 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm10,%xmm0 + + pxor %xmm2,%xmm7 + movdqa %xmm10,%xmm3 + pslld $26-21,%xmm2 + pandn %xmm12,%xmm0 + pand %xmm11,%xmm3 + pxor %xmm1,%xmm7 + + + movdqa %xmm14,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm14,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm3,%xmm0 + movdqa %xmm15,%xmm3 + movdqa %xmm14,%xmm7 + pslld $10,%xmm2 + pxor %xmm14,%xmm3 + + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm5 + pslld $19-10,%xmm2 + pand %xmm3,%xmm4 + pxor %xmm7,%xmm1 + + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm15,%xmm13 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm4,%xmm13 + paddd %xmm5,%xmm9 + pxor %xmm2,%xmm7 + + paddd %xmm5,%xmm13 + paddd %xmm7,%xmm13 + movd 44(%r8),%xmm5 + movd 44(%r9),%xmm0 + movd 44(%r10),%xmm1 + movd 44(%r11),%xmm2 + punpckldq %xmm1,%xmm5 + punpckldq %xmm2,%xmm0 + punpckldq %xmm0,%xmm5 + movdqa %xmm9,%xmm7 + + movdqa %xmm9,%xmm2 +.byte 102,15,56,0,238 + psrld $6,%xmm7 + movdqa %xmm9,%xmm1 + pslld $7,%xmm2 + movdqa %xmm5,176-128(%rax) + paddd %xmm12,%xmm5 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd -32(%rbp),%xmm5 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm9,%xmm0 + + pxor %xmm2,%xmm7 + movdqa %xmm9,%xmm4 + pslld $26-21,%xmm2 + pandn %xmm11,%xmm0 + pand %xmm10,%xmm4 + pxor %xmm1,%xmm7 + + + movdqa %xmm13,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm13,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm4,%xmm0 + movdqa %xmm14,%xmm4 + movdqa %xmm13,%xmm7 + pslld $10,%xmm2 + pxor %xmm13,%xmm4 + + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm5 + pslld $19-10,%xmm2 + pand %xmm4,%xmm3 + pxor %xmm7,%xmm1 + + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm14,%xmm12 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm3,%xmm12 + paddd %xmm5,%xmm8 + pxor %xmm2,%xmm7 + + paddd %xmm5,%xmm12 + paddd %xmm7,%xmm12 + movd 48(%r8),%xmm5 + movd 48(%r9),%xmm0 + movd 48(%r10),%xmm1 + movd 48(%r11),%xmm2 + punpckldq %xmm1,%xmm5 + punpckldq %xmm2,%xmm0 + punpckldq %xmm0,%xmm5 + movdqa %xmm8,%xmm7 +.byte 102,15,56,0,238 + movdqa %xmm8,%xmm2 + + psrld $6,%xmm7 + movdqa %xmm8,%xmm1 + pslld $7,%xmm2 + movdqa %xmm5,192-128(%rax) + paddd %xmm11,%xmm5 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd 0(%rbp),%xmm5 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm8,%xmm0 + + pxor %xmm2,%xmm7 + movdqa %xmm8,%xmm3 + pslld $26-21,%xmm2 + pandn %xmm10,%xmm0 + pand %xmm9,%xmm3 + pxor %xmm1,%xmm7 + + + movdqa %xmm12,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm12,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm3,%xmm0 + movdqa %xmm13,%xmm3 + movdqa %xmm12,%xmm7 + pslld $10,%xmm2 + pxor %xmm12,%xmm3 + + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm5 + pslld $19-10,%xmm2 + pand %xmm3,%xmm4 + pxor %xmm7,%xmm1 + + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm13,%xmm11 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm4,%xmm11 + paddd %xmm5,%xmm15 + pxor %xmm2,%xmm7 + + paddd %xmm5,%xmm11 + paddd %xmm7,%xmm11 + movd 52(%r8),%xmm5 + movd 52(%r9),%xmm0 + movd 52(%r10),%xmm1 + movd 52(%r11),%xmm2 + punpckldq %xmm1,%xmm5 + punpckldq %xmm2,%xmm0 + punpckldq %xmm0,%xmm5 + movdqa %xmm15,%xmm7 + + movdqa %xmm15,%xmm2 +.byte 102,15,56,0,238 + psrld $6,%xmm7 + movdqa %xmm15,%xmm1 + pslld $7,%xmm2 + movdqa %xmm5,208-128(%rax) + paddd %xmm10,%xmm5 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd 32(%rbp),%xmm5 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm15,%xmm0 + + pxor %xmm2,%xmm7 + movdqa %xmm15,%xmm4 + pslld $26-21,%xmm2 + pandn %xmm9,%xmm0 + pand %xmm8,%xmm4 + pxor %xmm1,%xmm7 + + + movdqa %xmm11,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm11,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm4,%xmm0 + movdqa %xmm12,%xmm4 + movdqa %xmm11,%xmm7 + pslld $10,%xmm2 + pxor %xmm11,%xmm4 + + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm5 + pslld $19-10,%xmm2 + pand %xmm4,%xmm3 + pxor %xmm7,%xmm1 + + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm12,%xmm10 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm3,%xmm10 + paddd %xmm5,%xmm14 + pxor %xmm2,%xmm7 + + paddd %xmm5,%xmm10 + paddd %xmm7,%xmm10 + movd 56(%r8),%xmm5 + movd 56(%r9),%xmm0 + movd 56(%r10),%xmm1 + movd 56(%r11),%xmm2 + punpckldq %xmm1,%xmm5 + punpckldq %xmm2,%xmm0 + punpckldq %xmm0,%xmm5 + movdqa %xmm14,%xmm7 +.byte 102,15,56,0,238 + movdqa %xmm14,%xmm2 + + psrld $6,%xmm7 + movdqa %xmm14,%xmm1 + pslld $7,%xmm2 + movdqa %xmm5,224-128(%rax) + paddd %xmm9,%xmm5 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd 64(%rbp),%xmm5 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm14,%xmm0 + + pxor %xmm2,%xmm7 + movdqa %xmm14,%xmm3 + pslld $26-21,%xmm2 + pandn %xmm8,%xmm0 + pand %xmm15,%xmm3 + pxor %xmm1,%xmm7 + + + movdqa %xmm10,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm10,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm3,%xmm0 + movdqa %xmm11,%xmm3 + movdqa %xmm10,%xmm7 + pslld $10,%xmm2 + pxor %xmm10,%xmm3 + + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm5 + pslld $19-10,%xmm2 + pand %xmm3,%xmm4 + pxor %xmm7,%xmm1 + + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm11,%xmm9 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm4,%xmm9 + paddd %xmm5,%xmm13 + pxor %xmm2,%xmm7 + + paddd %xmm5,%xmm9 + paddd %xmm7,%xmm9 + movd 60(%r8),%xmm5 + leaq 64(%r8),%r8 + movd 60(%r9),%xmm0 + leaq 64(%r9),%r9 + movd 60(%r10),%xmm1 + leaq 64(%r10),%r10 + movd 60(%r11),%xmm2 + leaq 64(%r11),%r11 + punpckldq %xmm1,%xmm5 + punpckldq %xmm2,%xmm0 + punpckldq %xmm0,%xmm5 + movdqa %xmm13,%xmm7 + + movdqa %xmm13,%xmm2 +.byte 102,15,56,0,238 + psrld $6,%xmm7 + movdqa %xmm13,%xmm1 + pslld $7,%xmm2 + movdqa %xmm5,240-128(%rax) + paddd %xmm8,%xmm5 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd 96(%rbp),%xmm5 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm13,%xmm0 + prefetcht0 63(%r8) + pxor %xmm2,%xmm7 + movdqa %xmm13,%xmm4 + pslld $26-21,%xmm2 + pandn %xmm15,%xmm0 + pand %xmm14,%xmm4 + pxor %xmm1,%xmm7 + + prefetcht0 63(%r9) + movdqa %xmm9,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm9,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm4,%xmm0 + movdqa %xmm10,%xmm4 + movdqa %xmm9,%xmm7 + pslld $10,%xmm2 + pxor %xmm9,%xmm4 + + prefetcht0 63(%r10) + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm5 + pslld $19-10,%xmm2 + pand %xmm4,%xmm3 + pxor %xmm7,%xmm1 + + prefetcht0 63(%r11) + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm10,%xmm8 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm3,%xmm8 + paddd %xmm5,%xmm12 + pxor %xmm2,%xmm7 + + paddd %xmm5,%xmm8 + paddd %xmm7,%xmm8 + leaq 256(%rbp),%rbp + movdqu 0-128(%rax),%xmm5 + movl $3,%ecx + jmp L$oop_16_xx +.p2align 5 +L$oop_16_xx: + movdqa 16-128(%rax),%xmm6 + paddd 144-128(%rax),%xmm5 + + movdqa %xmm6,%xmm7 + movdqa %xmm6,%xmm1 + psrld $3,%xmm7 + movdqa %xmm6,%xmm2 + + psrld $7,%xmm1 + movdqa 224-128(%rax),%xmm0 + pslld $14,%xmm2 + pxor %xmm1,%xmm7 + psrld $18-7,%xmm1 + movdqa %xmm0,%xmm3 + pxor %xmm2,%xmm7 + pslld $25-14,%xmm2 + pxor %xmm1,%xmm7 + psrld $10,%xmm0 + movdqa %xmm3,%xmm1 + + psrld $17,%xmm3 + pxor %xmm2,%xmm7 + pslld $13,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm3,%xmm0 + psrld $19-17,%xmm3 + pxor %xmm1,%xmm0 + pslld $15-13,%xmm1 + pxor %xmm3,%xmm0 + pxor %xmm1,%xmm0 + paddd %xmm0,%xmm5 + movdqa %xmm12,%xmm7 + + movdqa %xmm12,%xmm2 + + psrld $6,%xmm7 + movdqa %xmm12,%xmm1 + pslld $7,%xmm2 + movdqa %xmm5,0-128(%rax) + paddd %xmm15,%xmm5 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd -128(%rbp),%xmm5 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm12,%xmm0 + + pxor %xmm2,%xmm7 + movdqa %xmm12,%xmm3 + pslld $26-21,%xmm2 + pandn %xmm14,%xmm0 + pand %xmm13,%xmm3 + pxor %xmm1,%xmm7 + + + movdqa %xmm8,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm8,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm3,%xmm0 + movdqa %xmm9,%xmm3 + movdqa %xmm8,%xmm7 + pslld $10,%xmm2 + pxor %xmm8,%xmm3 + + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm5 + pslld $19-10,%xmm2 + pand %xmm3,%xmm4 + pxor %xmm7,%xmm1 + + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm9,%xmm15 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm4,%xmm15 + paddd %xmm5,%xmm11 + pxor %xmm2,%xmm7 + + paddd %xmm5,%xmm15 + paddd %xmm7,%xmm15 + movdqa 32-128(%rax),%xmm5 + paddd 160-128(%rax),%xmm6 + + movdqa %xmm5,%xmm7 + movdqa %xmm5,%xmm1 + psrld $3,%xmm7 + movdqa %xmm5,%xmm2 + + psrld $7,%xmm1 + movdqa 240-128(%rax),%xmm0 + pslld $14,%xmm2 + pxor %xmm1,%xmm7 + psrld $18-7,%xmm1 + movdqa %xmm0,%xmm4 + pxor %xmm2,%xmm7 + pslld $25-14,%xmm2 + pxor %xmm1,%xmm7 + psrld $10,%xmm0 + movdqa %xmm4,%xmm1 + + psrld $17,%xmm4 + pxor %xmm2,%xmm7 + pslld $13,%xmm1 + paddd %xmm7,%xmm6 + pxor %xmm4,%xmm0 + psrld $19-17,%xmm4 + pxor %xmm1,%xmm0 + pslld $15-13,%xmm1 + pxor %xmm4,%xmm0 + pxor %xmm1,%xmm0 + paddd %xmm0,%xmm6 + movdqa %xmm11,%xmm7 + + movdqa %xmm11,%xmm2 + + psrld $6,%xmm7 + movdqa %xmm11,%xmm1 + pslld $7,%xmm2 + movdqa %xmm6,16-128(%rax) + paddd %xmm14,%xmm6 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd -96(%rbp),%xmm6 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm11,%xmm0 + + pxor %xmm2,%xmm7 + movdqa %xmm11,%xmm4 + pslld $26-21,%xmm2 + pandn %xmm13,%xmm0 + pand %xmm12,%xmm4 + pxor %xmm1,%xmm7 + + + movdqa %xmm15,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm15,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm6 + pxor %xmm4,%xmm0 + movdqa %xmm8,%xmm4 + movdqa %xmm15,%xmm7 + pslld $10,%xmm2 + pxor %xmm15,%xmm4 + + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm6 + pslld $19-10,%xmm2 + pand %xmm4,%xmm3 + pxor %xmm7,%xmm1 + + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm8,%xmm14 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm3,%xmm14 + paddd %xmm6,%xmm10 + pxor %xmm2,%xmm7 + + paddd %xmm6,%xmm14 + paddd %xmm7,%xmm14 + movdqa 48-128(%rax),%xmm6 + paddd 176-128(%rax),%xmm5 + + movdqa %xmm6,%xmm7 + movdqa %xmm6,%xmm1 + psrld $3,%xmm7 + movdqa %xmm6,%xmm2 + + psrld $7,%xmm1 + movdqa 0-128(%rax),%xmm0 + pslld $14,%xmm2 + pxor %xmm1,%xmm7 + psrld $18-7,%xmm1 + movdqa %xmm0,%xmm3 + pxor %xmm2,%xmm7 + pslld $25-14,%xmm2 + pxor %xmm1,%xmm7 + psrld $10,%xmm0 + movdqa %xmm3,%xmm1 + + psrld $17,%xmm3 + pxor %xmm2,%xmm7 + pslld $13,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm3,%xmm0 + psrld $19-17,%xmm3 + pxor %xmm1,%xmm0 + pslld $15-13,%xmm1 + pxor %xmm3,%xmm0 + pxor %xmm1,%xmm0 + paddd %xmm0,%xmm5 + movdqa %xmm10,%xmm7 + + movdqa %xmm10,%xmm2 + + psrld $6,%xmm7 + movdqa %xmm10,%xmm1 + pslld $7,%xmm2 + movdqa %xmm5,32-128(%rax) + paddd %xmm13,%xmm5 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd -64(%rbp),%xmm5 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm10,%xmm0 + + pxor %xmm2,%xmm7 + movdqa %xmm10,%xmm3 + pslld $26-21,%xmm2 + pandn %xmm12,%xmm0 + pand %xmm11,%xmm3 + pxor %xmm1,%xmm7 + + + movdqa %xmm14,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm14,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm3,%xmm0 + movdqa %xmm15,%xmm3 + movdqa %xmm14,%xmm7 + pslld $10,%xmm2 + pxor %xmm14,%xmm3 + + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm5 + pslld $19-10,%xmm2 + pand %xmm3,%xmm4 + pxor %xmm7,%xmm1 + + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm15,%xmm13 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm4,%xmm13 + paddd %xmm5,%xmm9 + pxor %xmm2,%xmm7 + + paddd %xmm5,%xmm13 + paddd %xmm7,%xmm13 + movdqa 64-128(%rax),%xmm5 + paddd 192-128(%rax),%xmm6 + + movdqa %xmm5,%xmm7 + movdqa %xmm5,%xmm1 + psrld $3,%xmm7 + movdqa %xmm5,%xmm2 + + psrld $7,%xmm1 + movdqa 16-128(%rax),%xmm0 + pslld $14,%xmm2 + pxor %xmm1,%xmm7 + psrld $18-7,%xmm1 + movdqa %xmm0,%xmm4 + pxor %xmm2,%xmm7 + pslld $25-14,%xmm2 + pxor %xmm1,%xmm7 + psrld $10,%xmm0 + movdqa %xmm4,%xmm1 + + psrld $17,%xmm4 + pxor %xmm2,%xmm7 + pslld $13,%xmm1 + paddd %xmm7,%xmm6 + pxor %xmm4,%xmm0 + psrld $19-17,%xmm4 + pxor %xmm1,%xmm0 + pslld $15-13,%xmm1 + pxor %xmm4,%xmm0 + pxor %xmm1,%xmm0 + paddd %xmm0,%xmm6 + movdqa %xmm9,%xmm7 + + movdqa %xmm9,%xmm2 + + psrld $6,%xmm7 + movdqa %xmm9,%xmm1 + pslld $7,%xmm2 + movdqa %xmm6,48-128(%rax) + paddd %xmm12,%xmm6 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd -32(%rbp),%xmm6 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm9,%xmm0 + + pxor %xmm2,%xmm7 + movdqa %xmm9,%xmm4 + pslld $26-21,%xmm2 + pandn %xmm11,%xmm0 + pand %xmm10,%xmm4 + pxor %xmm1,%xmm7 + + + movdqa %xmm13,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm13,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm6 + pxor %xmm4,%xmm0 + movdqa %xmm14,%xmm4 + movdqa %xmm13,%xmm7 + pslld $10,%xmm2 + pxor %xmm13,%xmm4 + + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm6 + pslld $19-10,%xmm2 + pand %xmm4,%xmm3 + pxor %xmm7,%xmm1 + + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm14,%xmm12 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm3,%xmm12 + paddd %xmm6,%xmm8 + pxor %xmm2,%xmm7 + + paddd %xmm6,%xmm12 + paddd %xmm7,%xmm12 + movdqa 80-128(%rax),%xmm6 + paddd 208-128(%rax),%xmm5 + + movdqa %xmm6,%xmm7 + movdqa %xmm6,%xmm1 + psrld $3,%xmm7 + movdqa %xmm6,%xmm2 + + psrld $7,%xmm1 + movdqa 32-128(%rax),%xmm0 + pslld $14,%xmm2 + pxor %xmm1,%xmm7 + psrld $18-7,%xmm1 + movdqa %xmm0,%xmm3 + pxor %xmm2,%xmm7 + pslld $25-14,%xmm2 + pxor %xmm1,%xmm7 + psrld $10,%xmm0 + movdqa %xmm3,%xmm1 + + psrld $17,%xmm3 + pxor %xmm2,%xmm7 + pslld $13,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm3,%xmm0 + psrld $19-17,%xmm3 + pxor %xmm1,%xmm0 + pslld $15-13,%xmm1 + pxor %xmm3,%xmm0 + pxor %xmm1,%xmm0 + paddd %xmm0,%xmm5 + movdqa %xmm8,%xmm7 + + movdqa %xmm8,%xmm2 + + psrld $6,%xmm7 + movdqa %xmm8,%xmm1 + pslld $7,%xmm2 + movdqa %xmm5,64-128(%rax) + paddd %xmm11,%xmm5 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd 0(%rbp),%xmm5 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm8,%xmm0 + + pxor %xmm2,%xmm7 + movdqa %xmm8,%xmm3 + pslld $26-21,%xmm2 + pandn %xmm10,%xmm0 + pand %xmm9,%xmm3 + pxor %xmm1,%xmm7 + + + movdqa %xmm12,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm12,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm3,%xmm0 + movdqa %xmm13,%xmm3 + movdqa %xmm12,%xmm7 + pslld $10,%xmm2 + pxor %xmm12,%xmm3 + + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm5 + pslld $19-10,%xmm2 + pand %xmm3,%xmm4 + pxor %xmm7,%xmm1 + + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm13,%xmm11 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm4,%xmm11 + paddd %xmm5,%xmm15 + pxor %xmm2,%xmm7 + + paddd %xmm5,%xmm11 + paddd %xmm7,%xmm11 + movdqa 96-128(%rax),%xmm5 + paddd 224-128(%rax),%xmm6 + + movdqa %xmm5,%xmm7 + movdqa %xmm5,%xmm1 + psrld $3,%xmm7 + movdqa %xmm5,%xmm2 + + psrld $7,%xmm1 + movdqa 48-128(%rax),%xmm0 + pslld $14,%xmm2 + pxor %xmm1,%xmm7 + psrld $18-7,%xmm1 + movdqa %xmm0,%xmm4 + pxor %xmm2,%xmm7 + pslld $25-14,%xmm2 + pxor %xmm1,%xmm7 + psrld $10,%xmm0 + movdqa %xmm4,%xmm1 + + psrld $17,%xmm4 + pxor %xmm2,%xmm7 + pslld $13,%xmm1 + paddd %xmm7,%xmm6 + pxor %xmm4,%xmm0 + psrld $19-17,%xmm4 + pxor %xmm1,%xmm0 + pslld $15-13,%xmm1 + pxor %xmm4,%xmm0 + pxor %xmm1,%xmm0 + paddd %xmm0,%xmm6 + movdqa %xmm15,%xmm7 + + movdqa %xmm15,%xmm2 + + psrld $6,%xmm7 + movdqa %xmm15,%xmm1 + pslld $7,%xmm2 + movdqa %xmm6,80-128(%rax) + paddd %xmm10,%xmm6 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd 32(%rbp),%xmm6 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm15,%xmm0 + + pxor %xmm2,%xmm7 + movdqa %xmm15,%xmm4 + pslld $26-21,%xmm2 + pandn %xmm9,%xmm0 + pand %xmm8,%xmm4 + pxor %xmm1,%xmm7 + + + movdqa %xmm11,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm11,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm6 + pxor %xmm4,%xmm0 + movdqa %xmm12,%xmm4 + movdqa %xmm11,%xmm7 + pslld $10,%xmm2 + pxor %xmm11,%xmm4 + + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm6 + pslld $19-10,%xmm2 + pand %xmm4,%xmm3 + pxor %xmm7,%xmm1 + + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm12,%xmm10 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm3,%xmm10 + paddd %xmm6,%xmm14 + pxor %xmm2,%xmm7 + + paddd %xmm6,%xmm10 + paddd %xmm7,%xmm10 + movdqa 112-128(%rax),%xmm6 + paddd 240-128(%rax),%xmm5 + + movdqa %xmm6,%xmm7 + movdqa %xmm6,%xmm1 + psrld $3,%xmm7 + movdqa %xmm6,%xmm2 + + psrld $7,%xmm1 + movdqa 64-128(%rax),%xmm0 + pslld $14,%xmm2 + pxor %xmm1,%xmm7 + psrld $18-7,%xmm1 + movdqa %xmm0,%xmm3 + pxor %xmm2,%xmm7 + pslld $25-14,%xmm2 + pxor %xmm1,%xmm7 + psrld $10,%xmm0 + movdqa %xmm3,%xmm1 + + psrld $17,%xmm3 + pxor %xmm2,%xmm7 + pslld $13,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm3,%xmm0 + psrld $19-17,%xmm3 + pxor %xmm1,%xmm0 + pslld $15-13,%xmm1 + pxor %xmm3,%xmm0 + pxor %xmm1,%xmm0 + paddd %xmm0,%xmm5 + movdqa %xmm14,%xmm7 + + movdqa %xmm14,%xmm2 + + psrld $6,%xmm7 + movdqa %xmm14,%xmm1 + pslld $7,%xmm2 + movdqa %xmm5,96-128(%rax) + paddd %xmm9,%xmm5 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd 64(%rbp),%xmm5 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm14,%xmm0 + + pxor %xmm2,%xmm7 + movdqa %xmm14,%xmm3 + pslld $26-21,%xmm2 + pandn %xmm8,%xmm0 + pand %xmm15,%xmm3 + pxor %xmm1,%xmm7 + + + movdqa %xmm10,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm10,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm3,%xmm0 + movdqa %xmm11,%xmm3 + movdqa %xmm10,%xmm7 + pslld $10,%xmm2 + pxor %xmm10,%xmm3 + + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm5 + pslld $19-10,%xmm2 + pand %xmm3,%xmm4 + pxor %xmm7,%xmm1 + + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm11,%xmm9 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm4,%xmm9 + paddd %xmm5,%xmm13 + pxor %xmm2,%xmm7 + + paddd %xmm5,%xmm9 + paddd %xmm7,%xmm9 + movdqa 128-128(%rax),%xmm5 + paddd 0-128(%rax),%xmm6 + + movdqa %xmm5,%xmm7 + movdqa %xmm5,%xmm1 + psrld $3,%xmm7 + movdqa %xmm5,%xmm2 + + psrld $7,%xmm1 + movdqa 80-128(%rax),%xmm0 + pslld $14,%xmm2 + pxor %xmm1,%xmm7 + psrld $18-7,%xmm1 + movdqa %xmm0,%xmm4 + pxor %xmm2,%xmm7 + pslld $25-14,%xmm2 + pxor %xmm1,%xmm7 + psrld $10,%xmm0 + movdqa %xmm4,%xmm1 + + psrld $17,%xmm4 + pxor %xmm2,%xmm7 + pslld $13,%xmm1 + paddd %xmm7,%xmm6 + pxor %xmm4,%xmm0 + psrld $19-17,%xmm4 + pxor %xmm1,%xmm0 + pslld $15-13,%xmm1 + pxor %xmm4,%xmm0 + pxor %xmm1,%xmm0 + paddd %xmm0,%xmm6 + movdqa %xmm13,%xmm7 + + movdqa %xmm13,%xmm2 + + psrld $6,%xmm7 + movdqa %xmm13,%xmm1 + pslld $7,%xmm2 + movdqa %xmm6,112-128(%rax) + paddd %xmm8,%xmm6 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd 96(%rbp),%xmm6 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm13,%xmm0 + + pxor %xmm2,%xmm7 + movdqa %xmm13,%xmm4 + pslld $26-21,%xmm2 + pandn %xmm15,%xmm0 + pand %xmm14,%xmm4 + pxor %xmm1,%xmm7 + + + movdqa %xmm9,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm9,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm6 + pxor %xmm4,%xmm0 + movdqa %xmm10,%xmm4 + movdqa %xmm9,%xmm7 + pslld $10,%xmm2 + pxor %xmm9,%xmm4 + + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm6 + pslld $19-10,%xmm2 + pand %xmm4,%xmm3 + pxor %xmm7,%xmm1 + + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm10,%xmm8 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm3,%xmm8 + paddd %xmm6,%xmm12 + pxor %xmm2,%xmm7 + + paddd %xmm6,%xmm8 + paddd %xmm7,%xmm8 + leaq 256(%rbp),%rbp + movdqa 144-128(%rax),%xmm6 + paddd 16-128(%rax),%xmm5 + + movdqa %xmm6,%xmm7 + movdqa %xmm6,%xmm1 + psrld $3,%xmm7 + movdqa %xmm6,%xmm2 + + psrld $7,%xmm1 + movdqa 96-128(%rax),%xmm0 + pslld $14,%xmm2 + pxor %xmm1,%xmm7 + psrld $18-7,%xmm1 + movdqa %xmm0,%xmm3 + pxor %xmm2,%xmm7 + pslld $25-14,%xmm2 + pxor %xmm1,%xmm7 + psrld $10,%xmm0 + movdqa %xmm3,%xmm1 + + psrld $17,%xmm3 + pxor %xmm2,%xmm7 + pslld $13,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm3,%xmm0 + psrld $19-17,%xmm3 + pxor %xmm1,%xmm0 + pslld $15-13,%xmm1 + pxor %xmm3,%xmm0 + pxor %xmm1,%xmm0 + paddd %xmm0,%xmm5 + movdqa %xmm12,%xmm7 + + movdqa %xmm12,%xmm2 + + psrld $6,%xmm7 + movdqa %xmm12,%xmm1 + pslld $7,%xmm2 + movdqa %xmm5,128-128(%rax) + paddd %xmm15,%xmm5 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd -128(%rbp),%xmm5 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm12,%xmm0 + + pxor %xmm2,%xmm7 + movdqa %xmm12,%xmm3 + pslld $26-21,%xmm2 + pandn %xmm14,%xmm0 + pand %xmm13,%xmm3 + pxor %xmm1,%xmm7 + + + movdqa %xmm8,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm8,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm3,%xmm0 + movdqa %xmm9,%xmm3 + movdqa %xmm8,%xmm7 + pslld $10,%xmm2 + pxor %xmm8,%xmm3 + + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm5 + pslld $19-10,%xmm2 + pand %xmm3,%xmm4 + pxor %xmm7,%xmm1 + + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm9,%xmm15 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm4,%xmm15 + paddd %xmm5,%xmm11 + pxor %xmm2,%xmm7 + + paddd %xmm5,%xmm15 + paddd %xmm7,%xmm15 + movdqa 160-128(%rax),%xmm5 + paddd 32-128(%rax),%xmm6 + + movdqa %xmm5,%xmm7 + movdqa %xmm5,%xmm1 + psrld $3,%xmm7 + movdqa %xmm5,%xmm2 + + psrld $7,%xmm1 + movdqa 112-128(%rax),%xmm0 + pslld $14,%xmm2 + pxor %xmm1,%xmm7 + psrld $18-7,%xmm1 + movdqa %xmm0,%xmm4 + pxor %xmm2,%xmm7 + pslld $25-14,%xmm2 + pxor %xmm1,%xmm7 + psrld $10,%xmm0 + movdqa %xmm4,%xmm1 + + psrld $17,%xmm4 + pxor %xmm2,%xmm7 + pslld $13,%xmm1 + paddd %xmm7,%xmm6 + pxor %xmm4,%xmm0 + psrld $19-17,%xmm4 + pxor %xmm1,%xmm0 + pslld $15-13,%xmm1 + pxor %xmm4,%xmm0 + pxor %xmm1,%xmm0 + paddd %xmm0,%xmm6 + movdqa %xmm11,%xmm7 + + movdqa %xmm11,%xmm2 + + psrld $6,%xmm7 + movdqa %xmm11,%xmm1 + pslld $7,%xmm2 + movdqa %xmm6,144-128(%rax) + paddd %xmm14,%xmm6 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd -96(%rbp),%xmm6 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm11,%xmm0 + + pxor %xmm2,%xmm7 + movdqa %xmm11,%xmm4 + pslld $26-21,%xmm2 + pandn %xmm13,%xmm0 + pand %xmm12,%xmm4 + pxor %xmm1,%xmm7 + + + movdqa %xmm15,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm15,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm6 + pxor %xmm4,%xmm0 + movdqa %xmm8,%xmm4 + movdqa %xmm15,%xmm7 + pslld $10,%xmm2 + pxor %xmm15,%xmm4 + + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm6 + pslld $19-10,%xmm2 + pand %xmm4,%xmm3 + pxor %xmm7,%xmm1 + + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm8,%xmm14 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm3,%xmm14 + paddd %xmm6,%xmm10 + pxor %xmm2,%xmm7 + + paddd %xmm6,%xmm14 + paddd %xmm7,%xmm14 + movdqa 176-128(%rax),%xmm6 + paddd 48-128(%rax),%xmm5 + + movdqa %xmm6,%xmm7 + movdqa %xmm6,%xmm1 + psrld $3,%xmm7 + movdqa %xmm6,%xmm2 + + psrld $7,%xmm1 + movdqa 128-128(%rax),%xmm0 + pslld $14,%xmm2 + pxor %xmm1,%xmm7 + psrld $18-7,%xmm1 + movdqa %xmm0,%xmm3 + pxor %xmm2,%xmm7 + pslld $25-14,%xmm2 + pxor %xmm1,%xmm7 + psrld $10,%xmm0 + movdqa %xmm3,%xmm1 + + psrld $17,%xmm3 + pxor %xmm2,%xmm7 + pslld $13,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm3,%xmm0 + psrld $19-17,%xmm3 + pxor %xmm1,%xmm0 + pslld $15-13,%xmm1 + pxor %xmm3,%xmm0 + pxor %xmm1,%xmm0 + paddd %xmm0,%xmm5 + movdqa %xmm10,%xmm7 + + movdqa %xmm10,%xmm2 + + psrld $6,%xmm7 + movdqa %xmm10,%xmm1 + pslld $7,%xmm2 + movdqa %xmm5,160-128(%rax) + paddd %xmm13,%xmm5 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd -64(%rbp),%xmm5 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm10,%xmm0 + + pxor %xmm2,%xmm7 + movdqa %xmm10,%xmm3 + pslld $26-21,%xmm2 + pandn %xmm12,%xmm0 + pand %xmm11,%xmm3 + pxor %xmm1,%xmm7 + + + movdqa %xmm14,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm14,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm3,%xmm0 + movdqa %xmm15,%xmm3 + movdqa %xmm14,%xmm7 + pslld $10,%xmm2 + pxor %xmm14,%xmm3 + + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm5 + pslld $19-10,%xmm2 + pand %xmm3,%xmm4 + pxor %xmm7,%xmm1 + + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm15,%xmm13 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm4,%xmm13 + paddd %xmm5,%xmm9 + pxor %xmm2,%xmm7 + + paddd %xmm5,%xmm13 + paddd %xmm7,%xmm13 + movdqa 192-128(%rax),%xmm5 + paddd 64-128(%rax),%xmm6 + + movdqa %xmm5,%xmm7 + movdqa %xmm5,%xmm1 + psrld $3,%xmm7 + movdqa %xmm5,%xmm2 + + psrld $7,%xmm1 + movdqa 144-128(%rax),%xmm0 + pslld $14,%xmm2 + pxor %xmm1,%xmm7 + psrld $18-7,%xmm1 + movdqa %xmm0,%xmm4 + pxor %xmm2,%xmm7 + pslld $25-14,%xmm2 + pxor %xmm1,%xmm7 + psrld $10,%xmm0 + movdqa %xmm4,%xmm1 + + psrld $17,%xmm4 + pxor %xmm2,%xmm7 + pslld $13,%xmm1 + paddd %xmm7,%xmm6 + pxor %xmm4,%xmm0 + psrld $19-17,%xmm4 + pxor %xmm1,%xmm0 + pslld $15-13,%xmm1 + pxor %xmm4,%xmm0 + pxor %xmm1,%xmm0 + paddd %xmm0,%xmm6 + movdqa %xmm9,%xmm7 + + movdqa %xmm9,%xmm2 + + psrld $6,%xmm7 + movdqa %xmm9,%xmm1 + pslld $7,%xmm2 + movdqa %xmm6,176-128(%rax) + paddd %xmm12,%xmm6 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd -32(%rbp),%xmm6 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm9,%xmm0 + + pxor %xmm2,%xmm7 + movdqa %xmm9,%xmm4 + pslld $26-21,%xmm2 + pandn %xmm11,%xmm0 + pand %xmm10,%xmm4 + pxor %xmm1,%xmm7 + + + movdqa %xmm13,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm13,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm6 + pxor %xmm4,%xmm0 + movdqa %xmm14,%xmm4 + movdqa %xmm13,%xmm7 + pslld $10,%xmm2 + pxor %xmm13,%xmm4 + + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm6 + pslld $19-10,%xmm2 + pand %xmm4,%xmm3 + pxor %xmm7,%xmm1 + + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm14,%xmm12 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm3,%xmm12 + paddd %xmm6,%xmm8 + pxor %xmm2,%xmm7 + + paddd %xmm6,%xmm12 + paddd %xmm7,%xmm12 + movdqa 208-128(%rax),%xmm6 + paddd 80-128(%rax),%xmm5 + + movdqa %xmm6,%xmm7 + movdqa %xmm6,%xmm1 + psrld $3,%xmm7 + movdqa %xmm6,%xmm2 + + psrld $7,%xmm1 + movdqa 160-128(%rax),%xmm0 + pslld $14,%xmm2 + pxor %xmm1,%xmm7 + psrld $18-7,%xmm1 + movdqa %xmm0,%xmm3 + pxor %xmm2,%xmm7 + pslld $25-14,%xmm2 + pxor %xmm1,%xmm7 + psrld $10,%xmm0 + movdqa %xmm3,%xmm1 + + psrld $17,%xmm3 + pxor %xmm2,%xmm7 + pslld $13,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm3,%xmm0 + psrld $19-17,%xmm3 + pxor %xmm1,%xmm0 + pslld $15-13,%xmm1 + pxor %xmm3,%xmm0 + pxor %xmm1,%xmm0 + paddd %xmm0,%xmm5 + movdqa %xmm8,%xmm7 + + movdqa %xmm8,%xmm2 + + psrld $6,%xmm7 + movdqa %xmm8,%xmm1 + pslld $7,%xmm2 + movdqa %xmm5,192-128(%rax) + paddd %xmm11,%xmm5 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd 0(%rbp),%xmm5 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm8,%xmm0 + + pxor %xmm2,%xmm7 + movdqa %xmm8,%xmm3 + pslld $26-21,%xmm2 + pandn %xmm10,%xmm0 + pand %xmm9,%xmm3 + pxor %xmm1,%xmm7 + + + movdqa %xmm12,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm12,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm3,%xmm0 + movdqa %xmm13,%xmm3 + movdqa %xmm12,%xmm7 + pslld $10,%xmm2 + pxor %xmm12,%xmm3 + + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm5 + pslld $19-10,%xmm2 + pand %xmm3,%xmm4 + pxor %xmm7,%xmm1 + + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm13,%xmm11 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm4,%xmm11 + paddd %xmm5,%xmm15 + pxor %xmm2,%xmm7 + + paddd %xmm5,%xmm11 + paddd %xmm7,%xmm11 + movdqa 224-128(%rax),%xmm5 + paddd 96-128(%rax),%xmm6 + + movdqa %xmm5,%xmm7 + movdqa %xmm5,%xmm1 + psrld $3,%xmm7 + movdqa %xmm5,%xmm2 + + psrld $7,%xmm1 + movdqa 176-128(%rax),%xmm0 + pslld $14,%xmm2 + pxor %xmm1,%xmm7 + psrld $18-7,%xmm1 + movdqa %xmm0,%xmm4 + pxor %xmm2,%xmm7 + pslld $25-14,%xmm2 + pxor %xmm1,%xmm7 + psrld $10,%xmm0 + movdqa %xmm4,%xmm1 + + psrld $17,%xmm4 + pxor %xmm2,%xmm7 + pslld $13,%xmm1 + paddd %xmm7,%xmm6 + pxor %xmm4,%xmm0 + psrld $19-17,%xmm4 + pxor %xmm1,%xmm0 + pslld $15-13,%xmm1 + pxor %xmm4,%xmm0 + pxor %xmm1,%xmm0 + paddd %xmm0,%xmm6 + movdqa %xmm15,%xmm7 + + movdqa %xmm15,%xmm2 + + psrld $6,%xmm7 + movdqa %xmm15,%xmm1 + pslld $7,%xmm2 + movdqa %xmm6,208-128(%rax) + paddd %xmm10,%xmm6 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd 32(%rbp),%xmm6 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm15,%xmm0 + + pxor %xmm2,%xmm7 + movdqa %xmm15,%xmm4 + pslld $26-21,%xmm2 + pandn %xmm9,%xmm0 + pand %xmm8,%xmm4 + pxor %xmm1,%xmm7 + + + movdqa %xmm11,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm11,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm6 + pxor %xmm4,%xmm0 + movdqa %xmm12,%xmm4 + movdqa %xmm11,%xmm7 + pslld $10,%xmm2 + pxor %xmm11,%xmm4 + + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm6 + pslld $19-10,%xmm2 + pand %xmm4,%xmm3 + pxor %xmm7,%xmm1 + + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm12,%xmm10 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm3,%xmm10 + paddd %xmm6,%xmm14 + pxor %xmm2,%xmm7 + + paddd %xmm6,%xmm10 + paddd %xmm7,%xmm10 + movdqa 240-128(%rax),%xmm6 + paddd 112-128(%rax),%xmm5 + + movdqa %xmm6,%xmm7 + movdqa %xmm6,%xmm1 + psrld $3,%xmm7 + movdqa %xmm6,%xmm2 + + psrld $7,%xmm1 + movdqa 192-128(%rax),%xmm0 + pslld $14,%xmm2 + pxor %xmm1,%xmm7 + psrld $18-7,%xmm1 + movdqa %xmm0,%xmm3 + pxor %xmm2,%xmm7 + pslld $25-14,%xmm2 + pxor %xmm1,%xmm7 + psrld $10,%xmm0 + movdqa %xmm3,%xmm1 + + psrld $17,%xmm3 + pxor %xmm2,%xmm7 + pslld $13,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm3,%xmm0 + psrld $19-17,%xmm3 + pxor %xmm1,%xmm0 + pslld $15-13,%xmm1 + pxor %xmm3,%xmm0 + pxor %xmm1,%xmm0 + paddd %xmm0,%xmm5 + movdqa %xmm14,%xmm7 + + movdqa %xmm14,%xmm2 + + psrld $6,%xmm7 + movdqa %xmm14,%xmm1 + pslld $7,%xmm2 + movdqa %xmm5,224-128(%rax) + paddd %xmm9,%xmm5 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd 64(%rbp),%xmm5 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm14,%xmm0 + + pxor %xmm2,%xmm7 + movdqa %xmm14,%xmm3 + pslld $26-21,%xmm2 + pandn %xmm8,%xmm0 + pand %xmm15,%xmm3 + pxor %xmm1,%xmm7 + + + movdqa %xmm10,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm10,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm3,%xmm0 + movdqa %xmm11,%xmm3 + movdqa %xmm10,%xmm7 + pslld $10,%xmm2 + pxor %xmm10,%xmm3 + + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm5 + pslld $19-10,%xmm2 + pand %xmm3,%xmm4 + pxor %xmm7,%xmm1 + + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm11,%xmm9 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm4,%xmm9 + paddd %xmm5,%xmm13 + pxor %xmm2,%xmm7 + + paddd %xmm5,%xmm9 + paddd %xmm7,%xmm9 + movdqa 0-128(%rax),%xmm5 + paddd 128-128(%rax),%xmm6 + + movdqa %xmm5,%xmm7 + movdqa %xmm5,%xmm1 + psrld $3,%xmm7 + movdqa %xmm5,%xmm2 + + psrld $7,%xmm1 + movdqa 208-128(%rax),%xmm0 + pslld $14,%xmm2 + pxor %xmm1,%xmm7 + psrld $18-7,%xmm1 + movdqa %xmm0,%xmm4 + pxor %xmm2,%xmm7 + pslld $25-14,%xmm2 + pxor %xmm1,%xmm7 + psrld $10,%xmm0 + movdqa %xmm4,%xmm1 + + psrld $17,%xmm4 + pxor %xmm2,%xmm7 + pslld $13,%xmm1 + paddd %xmm7,%xmm6 + pxor %xmm4,%xmm0 + psrld $19-17,%xmm4 + pxor %xmm1,%xmm0 + pslld $15-13,%xmm1 + pxor %xmm4,%xmm0 + pxor %xmm1,%xmm0 + paddd %xmm0,%xmm6 + movdqa %xmm13,%xmm7 + + movdqa %xmm13,%xmm2 + + psrld $6,%xmm7 + movdqa %xmm13,%xmm1 + pslld $7,%xmm2 + movdqa %xmm6,240-128(%rax) + paddd %xmm8,%xmm6 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd 96(%rbp),%xmm6 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm13,%xmm0 + + pxor %xmm2,%xmm7 + movdqa %xmm13,%xmm4 + pslld $26-21,%xmm2 + pandn %xmm15,%xmm0 + pand %xmm14,%xmm4 + pxor %xmm1,%xmm7 + + + movdqa %xmm9,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm9,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm6 + pxor %xmm4,%xmm0 + movdqa %xmm10,%xmm4 + movdqa %xmm9,%xmm7 + pslld $10,%xmm2 + pxor %xmm9,%xmm4 + + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm6 + pslld $19-10,%xmm2 + pand %xmm4,%xmm3 + pxor %xmm7,%xmm1 + + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm10,%xmm8 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm3,%xmm8 + paddd %xmm6,%xmm12 + pxor %xmm2,%xmm7 + + paddd %xmm6,%xmm8 + paddd %xmm7,%xmm8 + leaq 256(%rbp),%rbp + decl %ecx + jnz L$oop_16_xx + + movl $1,%ecx + leaq K256+128(%rip),%rbp + + movdqa (%rbx),%xmm7 + cmpl 0(%rbx),%ecx + pxor %xmm0,%xmm0 + cmovgeq %rbp,%r8 + cmpl 4(%rbx),%ecx + movdqa %xmm7,%xmm6 + cmovgeq %rbp,%r9 + cmpl 8(%rbx),%ecx + pcmpgtd %xmm0,%xmm6 + cmovgeq %rbp,%r10 + cmpl 12(%rbx),%ecx + paddd %xmm6,%xmm7 + cmovgeq %rbp,%r11 + + movdqu 0-128(%rdi),%xmm0 + pand %xmm6,%xmm8 + movdqu 32-128(%rdi),%xmm1 + pand %xmm6,%xmm9 + movdqu 64-128(%rdi),%xmm2 + pand %xmm6,%xmm10 + movdqu 96-128(%rdi),%xmm5 + pand %xmm6,%xmm11 + paddd %xmm0,%xmm8 + movdqu 128-128(%rdi),%xmm0 + pand %xmm6,%xmm12 + paddd %xmm1,%xmm9 + movdqu 160-128(%rdi),%xmm1 + pand %xmm6,%xmm13 + paddd %xmm2,%xmm10 + movdqu 192-128(%rdi),%xmm2 + pand %xmm6,%xmm14 + paddd %xmm5,%xmm11 + movdqu 224-128(%rdi),%xmm5 + pand %xmm6,%xmm15 + paddd %xmm0,%xmm12 + paddd %xmm1,%xmm13 + movdqu %xmm8,0-128(%rdi) + paddd %xmm2,%xmm14 + movdqu %xmm9,32-128(%rdi) + paddd %xmm5,%xmm15 + movdqu %xmm10,64-128(%rdi) + movdqu %xmm11,96-128(%rdi) + movdqu %xmm12,128-128(%rdi) + movdqu %xmm13,160-128(%rdi) + movdqu %xmm14,192-128(%rdi) + movdqu %xmm15,224-128(%rdi) + + movdqa %xmm7,(%rbx) + movdqa L$pbswap(%rip),%xmm6 + decl %edx + jnz L$oop + + movl 280(%rsp),%edx + leaq 16(%rdi),%rdi + leaq 64(%rsi),%rsi + decl %edx + jnz L$oop_grande + +L$done: + movq 272(%rsp),%rax + movq -16(%rax),%rbp + movq -8(%rax),%rbx + leaq (%rax),%rsp +L$epilogue: + .byte 0xf3,0xc3 + + +.p2align 5 +sha256_multi_block_shaext: +_shaext_shortcut: + movq %rsp,%rax + pushq %rbx + pushq %rbp + subq $288,%rsp + shll $1,%edx + andq $-256,%rsp + leaq 128(%rdi),%rdi + movq %rax,272(%rsp) +L$body_shaext: + leaq 256(%rsp),%rbx + leaq K256_shaext+128(%rip),%rbp + +L$oop_grande_shaext: + movl %edx,280(%rsp) + xorl %edx,%edx + movq 0(%rsi),%r8 + movl 8(%rsi),%ecx + cmpl %edx,%ecx + cmovgl %ecx,%edx + testl %ecx,%ecx + movl %ecx,0(%rbx) + cmovleq %rsp,%r8 + movq 16(%rsi),%r9 + movl 24(%rsi),%ecx + cmpl %edx,%ecx + cmovgl %ecx,%edx + testl %ecx,%ecx + movl %ecx,4(%rbx) + cmovleq %rsp,%r9 + testl %edx,%edx + jz L$done_shaext + + movq 0-128(%rdi),%xmm12 + movq 32-128(%rdi),%xmm4 + movq 64-128(%rdi),%xmm13 + movq 96-128(%rdi),%xmm5 + movq 128-128(%rdi),%xmm8 + movq 160-128(%rdi),%xmm9 + movq 192-128(%rdi),%xmm10 + movq 224-128(%rdi),%xmm11 + + punpckldq %xmm4,%xmm12 + punpckldq %xmm5,%xmm13 + punpckldq %xmm9,%xmm8 + punpckldq %xmm11,%xmm10 + movdqa K256_shaext-16(%rip),%xmm3 + + movdqa %xmm12,%xmm14 + movdqa %xmm13,%xmm15 + punpcklqdq %xmm8,%xmm12 + punpcklqdq %xmm10,%xmm13 + punpckhqdq %xmm8,%xmm14 + punpckhqdq %xmm10,%xmm15 + + pshufd $27,%xmm12,%xmm12 + pshufd $27,%xmm13,%xmm13 + pshufd $27,%xmm14,%xmm14 + pshufd $27,%xmm15,%xmm15 + jmp L$oop_shaext + +.p2align 5 +L$oop_shaext: + movdqu 0(%r8),%xmm4 + movdqu 0(%r9),%xmm8 + movdqu 16(%r8),%xmm5 + movdqu 16(%r9),%xmm9 + movdqu 32(%r8),%xmm6 +.byte 102,15,56,0,227 + movdqu 32(%r9),%xmm10 +.byte 102,68,15,56,0,195 + movdqu 48(%r8),%xmm7 + leaq 64(%r8),%r8 + movdqu 48(%r9),%xmm11 + leaq 64(%r9),%r9 + + movdqa 0-128(%rbp),%xmm0 +.byte 102,15,56,0,235 + paddd %xmm4,%xmm0 + pxor %xmm12,%xmm4 + movdqa %xmm0,%xmm1 + movdqa 0-128(%rbp),%xmm2 +.byte 102,68,15,56,0,203 + paddd %xmm8,%xmm2 + movdqa %xmm13,80(%rsp) +.byte 69,15,56,203,236 + pxor %xmm14,%xmm8 + movdqa %xmm2,%xmm0 + movdqa %xmm15,112(%rsp) +.byte 69,15,56,203,254 + pshufd $14,%xmm1,%xmm0 + pxor %xmm12,%xmm4 + movdqa %xmm12,64(%rsp) +.byte 69,15,56,203,229 + pshufd $14,%xmm2,%xmm0 + pxor %xmm14,%xmm8 + movdqa %xmm14,96(%rsp) + movdqa 16-128(%rbp),%xmm1 + paddd %xmm5,%xmm1 +.byte 102,15,56,0,243 +.byte 69,15,56,203,247 + + movdqa %xmm1,%xmm0 + movdqa 16-128(%rbp),%xmm2 + paddd %xmm9,%xmm2 +.byte 69,15,56,203,236 + movdqa %xmm2,%xmm0 + prefetcht0 127(%r8) +.byte 102,15,56,0,251 +.byte 102,68,15,56,0,211 + prefetcht0 127(%r9) +.byte 69,15,56,203,254 + pshufd $14,%xmm1,%xmm0 +.byte 102,68,15,56,0,219 +.byte 15,56,204,229 +.byte 69,15,56,203,229 + pshufd $14,%xmm2,%xmm0 + movdqa 32-128(%rbp),%xmm1 + paddd %xmm6,%xmm1 +.byte 69,15,56,203,247 + + movdqa %xmm1,%xmm0 + movdqa 32-128(%rbp),%xmm2 + paddd %xmm10,%xmm2 +.byte 69,15,56,203,236 +.byte 69,15,56,204,193 + movdqa %xmm2,%xmm0 + movdqa %xmm7,%xmm3 +.byte 69,15,56,203,254 + pshufd $14,%xmm1,%xmm0 +.byte 102,15,58,15,222,4 + paddd %xmm3,%xmm4 + movdqa %xmm11,%xmm3 +.byte 102,65,15,58,15,218,4 +.byte 15,56,204,238 +.byte 69,15,56,203,229 + pshufd $14,%xmm2,%xmm0 + movdqa 48-128(%rbp),%xmm1 + paddd %xmm7,%xmm1 +.byte 69,15,56,203,247 +.byte 69,15,56,204,202 + + movdqa %xmm1,%xmm0 + movdqa 48-128(%rbp),%xmm2 + paddd %xmm3,%xmm8 + paddd %xmm11,%xmm2 +.byte 15,56,205,231 +.byte 69,15,56,203,236 + movdqa %xmm2,%xmm0 + movdqa %xmm4,%xmm3 +.byte 102,15,58,15,223,4 +.byte 69,15,56,203,254 +.byte 69,15,56,205,195 + pshufd $14,%xmm1,%xmm0 + paddd %xmm3,%xmm5 + movdqa %xmm8,%xmm3 +.byte 102,65,15,58,15,219,4 +.byte 15,56,204,247 +.byte 69,15,56,203,229 + pshufd $14,%xmm2,%xmm0 + movdqa 64-128(%rbp),%xmm1 + paddd %xmm4,%xmm1 +.byte 69,15,56,203,247 +.byte 69,15,56,204,211 + movdqa %xmm1,%xmm0 + movdqa 64-128(%rbp),%xmm2 + paddd %xmm3,%xmm9 + paddd %xmm8,%xmm2 +.byte 15,56,205,236 +.byte 69,15,56,203,236 + movdqa %xmm2,%xmm0 + movdqa %xmm5,%xmm3 +.byte 102,15,58,15,220,4 +.byte 69,15,56,203,254 +.byte 69,15,56,205,200 + pshufd $14,%xmm1,%xmm0 + paddd %xmm3,%xmm6 + movdqa %xmm9,%xmm3 +.byte 102,65,15,58,15,216,4 +.byte 15,56,204,252 +.byte 69,15,56,203,229 + pshufd $14,%xmm2,%xmm0 + movdqa 80-128(%rbp),%xmm1 + paddd %xmm5,%xmm1 +.byte 69,15,56,203,247 +.byte 69,15,56,204,216 + movdqa %xmm1,%xmm0 + movdqa 80-128(%rbp),%xmm2 + paddd %xmm3,%xmm10 + paddd %xmm9,%xmm2 +.byte 15,56,205,245 +.byte 69,15,56,203,236 + movdqa %xmm2,%xmm0 + movdqa %xmm6,%xmm3 +.byte 102,15,58,15,221,4 +.byte 69,15,56,203,254 +.byte 69,15,56,205,209 + pshufd $14,%xmm1,%xmm0 + paddd %xmm3,%xmm7 + movdqa %xmm10,%xmm3 +.byte 102,65,15,58,15,217,4 +.byte 15,56,204,229 +.byte 69,15,56,203,229 + pshufd $14,%xmm2,%xmm0 + movdqa 96-128(%rbp),%xmm1 + paddd %xmm6,%xmm1 +.byte 69,15,56,203,247 +.byte 69,15,56,204,193 + movdqa %xmm1,%xmm0 + movdqa 96-128(%rbp),%xmm2 + paddd %xmm3,%xmm11 + paddd %xmm10,%xmm2 +.byte 15,56,205,254 +.byte 69,15,56,203,236 + movdqa %xmm2,%xmm0 + movdqa %xmm7,%xmm3 +.byte 102,15,58,15,222,4 +.byte 69,15,56,203,254 +.byte 69,15,56,205,218 + pshufd $14,%xmm1,%xmm0 + paddd %xmm3,%xmm4 + movdqa %xmm11,%xmm3 +.byte 102,65,15,58,15,218,4 +.byte 15,56,204,238 +.byte 69,15,56,203,229 + pshufd $14,%xmm2,%xmm0 + movdqa 112-128(%rbp),%xmm1 + paddd %xmm7,%xmm1 +.byte 69,15,56,203,247 +.byte 69,15,56,204,202 + movdqa %xmm1,%xmm0 + movdqa 112-128(%rbp),%xmm2 + paddd %xmm3,%xmm8 + paddd %xmm11,%xmm2 +.byte 15,56,205,231 +.byte 69,15,56,203,236 + movdqa %xmm2,%xmm0 + movdqa %xmm4,%xmm3 +.byte 102,15,58,15,223,4 +.byte 69,15,56,203,254 +.byte 69,15,56,205,195 + pshufd $14,%xmm1,%xmm0 + paddd %xmm3,%xmm5 + movdqa %xmm8,%xmm3 +.byte 102,65,15,58,15,219,4 +.byte 15,56,204,247 +.byte 69,15,56,203,229 + pshufd $14,%xmm2,%xmm0 + movdqa 128-128(%rbp),%xmm1 + paddd %xmm4,%xmm1 +.byte 69,15,56,203,247 +.byte 69,15,56,204,211 + movdqa %xmm1,%xmm0 + movdqa 128-128(%rbp),%xmm2 + paddd %xmm3,%xmm9 + paddd %xmm8,%xmm2 +.byte 15,56,205,236 +.byte 69,15,56,203,236 + movdqa %xmm2,%xmm0 + movdqa %xmm5,%xmm3 +.byte 102,15,58,15,220,4 +.byte 69,15,56,203,254 +.byte 69,15,56,205,200 + pshufd $14,%xmm1,%xmm0 + paddd %xmm3,%xmm6 + movdqa %xmm9,%xmm3 +.byte 102,65,15,58,15,216,4 +.byte 15,56,204,252 +.byte 69,15,56,203,229 + pshufd $14,%xmm2,%xmm0 + movdqa 144-128(%rbp),%xmm1 + paddd %xmm5,%xmm1 +.byte 69,15,56,203,247 +.byte 69,15,56,204,216 + movdqa %xmm1,%xmm0 + movdqa 144-128(%rbp),%xmm2 + paddd %xmm3,%xmm10 + paddd %xmm9,%xmm2 +.byte 15,56,205,245 +.byte 69,15,56,203,236 + movdqa %xmm2,%xmm0 + movdqa %xmm6,%xmm3 +.byte 102,15,58,15,221,4 +.byte 69,15,56,203,254 +.byte 69,15,56,205,209 + pshufd $14,%xmm1,%xmm0 + paddd %xmm3,%xmm7 + movdqa %xmm10,%xmm3 +.byte 102,65,15,58,15,217,4 +.byte 15,56,204,229 +.byte 69,15,56,203,229 + pshufd $14,%xmm2,%xmm0 + movdqa 160-128(%rbp),%xmm1 + paddd %xmm6,%xmm1 +.byte 69,15,56,203,247 +.byte 69,15,56,204,193 + movdqa %xmm1,%xmm0 + movdqa 160-128(%rbp),%xmm2 + paddd %xmm3,%xmm11 + paddd %xmm10,%xmm2 +.byte 15,56,205,254 +.byte 69,15,56,203,236 + movdqa %xmm2,%xmm0 + movdqa %xmm7,%xmm3 +.byte 102,15,58,15,222,4 +.byte 69,15,56,203,254 +.byte 69,15,56,205,218 + pshufd $14,%xmm1,%xmm0 + paddd %xmm3,%xmm4 + movdqa %xmm11,%xmm3 +.byte 102,65,15,58,15,218,4 +.byte 15,56,204,238 +.byte 69,15,56,203,229 + pshufd $14,%xmm2,%xmm0 + movdqa 176-128(%rbp),%xmm1 + paddd %xmm7,%xmm1 +.byte 69,15,56,203,247 +.byte 69,15,56,204,202 + movdqa %xmm1,%xmm0 + movdqa 176-128(%rbp),%xmm2 + paddd %xmm3,%xmm8 + paddd %xmm11,%xmm2 +.byte 15,56,205,231 +.byte 69,15,56,203,236 + movdqa %xmm2,%xmm0 + movdqa %xmm4,%xmm3 +.byte 102,15,58,15,223,4 +.byte 69,15,56,203,254 +.byte 69,15,56,205,195 + pshufd $14,%xmm1,%xmm0 + paddd %xmm3,%xmm5 + movdqa %xmm8,%xmm3 +.byte 102,65,15,58,15,219,4 +.byte 15,56,204,247 +.byte 69,15,56,203,229 + pshufd $14,%xmm2,%xmm0 + movdqa 192-128(%rbp),%xmm1 + paddd %xmm4,%xmm1 +.byte 69,15,56,203,247 +.byte 69,15,56,204,211 + movdqa %xmm1,%xmm0 + movdqa 192-128(%rbp),%xmm2 + paddd %xmm3,%xmm9 + paddd %xmm8,%xmm2 +.byte 15,56,205,236 +.byte 69,15,56,203,236 + movdqa %xmm2,%xmm0 + movdqa %xmm5,%xmm3 +.byte 102,15,58,15,220,4 +.byte 69,15,56,203,254 +.byte 69,15,56,205,200 + pshufd $14,%xmm1,%xmm0 + paddd %xmm3,%xmm6 + movdqa %xmm9,%xmm3 +.byte 102,65,15,58,15,216,4 +.byte 15,56,204,252 +.byte 69,15,56,203,229 + pshufd $14,%xmm2,%xmm0 + movdqa 208-128(%rbp),%xmm1 + paddd %xmm5,%xmm1 +.byte 69,15,56,203,247 +.byte 69,15,56,204,216 + movdqa %xmm1,%xmm0 + movdqa 208-128(%rbp),%xmm2 + paddd %xmm3,%xmm10 + paddd %xmm9,%xmm2 +.byte 15,56,205,245 +.byte 69,15,56,203,236 + movdqa %xmm2,%xmm0 + movdqa %xmm6,%xmm3 +.byte 102,15,58,15,221,4 +.byte 69,15,56,203,254 +.byte 69,15,56,205,209 + pshufd $14,%xmm1,%xmm0 + paddd %xmm3,%xmm7 + movdqa %xmm10,%xmm3 +.byte 102,65,15,58,15,217,4 + nop +.byte 69,15,56,203,229 + pshufd $14,%xmm2,%xmm0 + movdqa 224-128(%rbp),%xmm1 + paddd %xmm6,%xmm1 +.byte 69,15,56,203,247 + + movdqa %xmm1,%xmm0 + movdqa 224-128(%rbp),%xmm2 + paddd %xmm3,%xmm11 + paddd %xmm10,%xmm2 +.byte 15,56,205,254 + nop +.byte 69,15,56,203,236 + movdqa %xmm2,%xmm0 + movl $1,%ecx + pxor %xmm6,%xmm6 +.byte 69,15,56,203,254 +.byte 69,15,56,205,218 + pshufd $14,%xmm1,%xmm0 + movdqa 240-128(%rbp),%xmm1 + paddd %xmm7,%xmm1 + movq (%rbx),%xmm7 + nop +.byte 69,15,56,203,229 + pshufd $14,%xmm2,%xmm0 + movdqa 240-128(%rbp),%xmm2 + paddd %xmm11,%xmm2 +.byte 69,15,56,203,247 + + movdqa %xmm1,%xmm0 + cmpl 0(%rbx),%ecx + cmovgeq %rsp,%r8 + cmpl 4(%rbx),%ecx + cmovgeq %rsp,%r9 + pshufd $0,%xmm7,%xmm9 +.byte 69,15,56,203,236 + movdqa %xmm2,%xmm0 + pshufd $85,%xmm7,%xmm10 + movdqa %xmm7,%xmm11 +.byte 69,15,56,203,254 + pshufd $14,%xmm1,%xmm0 + pcmpgtd %xmm6,%xmm9 + pcmpgtd %xmm6,%xmm10 +.byte 69,15,56,203,229 + pshufd $14,%xmm2,%xmm0 + pcmpgtd %xmm6,%xmm11 + movdqa K256_shaext-16(%rip),%xmm3 +.byte 69,15,56,203,247 + + pand %xmm9,%xmm13 + pand %xmm10,%xmm15 + pand %xmm9,%xmm12 + pand %xmm10,%xmm14 + paddd %xmm7,%xmm11 + + paddd 80(%rsp),%xmm13 + paddd 112(%rsp),%xmm15 + paddd 64(%rsp),%xmm12 + paddd 96(%rsp),%xmm14 + + movq %xmm11,(%rbx) + decl %edx + jnz L$oop_shaext + + movl 280(%rsp),%edx + + pshufd $27,%xmm12,%xmm12 + pshufd $27,%xmm13,%xmm13 + pshufd $27,%xmm14,%xmm14 + pshufd $27,%xmm15,%xmm15 + + movdqa %xmm12,%xmm5 + movdqa %xmm13,%xmm6 + punpckldq %xmm14,%xmm12 + punpckhdq %xmm14,%xmm5 + punpckldq %xmm15,%xmm13 + punpckhdq %xmm15,%xmm6 + + movq %xmm12,0-128(%rdi) + psrldq $8,%xmm12 + movq %xmm5,128-128(%rdi) + psrldq $8,%xmm5 + movq %xmm12,32-128(%rdi) + movq %xmm5,160-128(%rdi) + + movq %xmm13,64-128(%rdi) + psrldq $8,%xmm13 + movq %xmm6,192-128(%rdi) + psrldq $8,%xmm6 + movq %xmm13,96-128(%rdi) + movq %xmm6,224-128(%rdi) + + leaq 8(%rdi),%rdi + leaq 32(%rsi),%rsi + decl %edx + jnz L$oop_grande_shaext + +L$done_shaext: + + movq -16(%rax),%rbp + movq -8(%rax),%rbx + leaq (%rax),%rsp +L$epilogue_shaext: + .byte 0xf3,0xc3 + +.p2align 8 +K256: +.long 1116352408,1116352408,1116352408,1116352408 +.long 1116352408,1116352408,1116352408,1116352408 +.long 1899447441,1899447441,1899447441,1899447441 +.long 1899447441,1899447441,1899447441,1899447441 +.long 3049323471,3049323471,3049323471,3049323471 +.long 3049323471,3049323471,3049323471,3049323471 +.long 3921009573,3921009573,3921009573,3921009573 +.long 3921009573,3921009573,3921009573,3921009573 +.long 961987163,961987163,961987163,961987163 +.long 961987163,961987163,961987163,961987163 +.long 1508970993,1508970993,1508970993,1508970993 +.long 1508970993,1508970993,1508970993,1508970993 +.long 2453635748,2453635748,2453635748,2453635748 +.long 2453635748,2453635748,2453635748,2453635748 +.long 2870763221,2870763221,2870763221,2870763221 +.long 2870763221,2870763221,2870763221,2870763221 +.long 3624381080,3624381080,3624381080,3624381080 +.long 3624381080,3624381080,3624381080,3624381080 +.long 310598401,310598401,310598401,310598401 +.long 310598401,310598401,310598401,310598401 +.long 607225278,607225278,607225278,607225278 +.long 607225278,607225278,607225278,607225278 +.long 1426881987,1426881987,1426881987,1426881987 +.long 1426881987,1426881987,1426881987,1426881987 +.long 1925078388,1925078388,1925078388,1925078388 +.long 1925078388,1925078388,1925078388,1925078388 +.long 2162078206,2162078206,2162078206,2162078206 +.long 2162078206,2162078206,2162078206,2162078206 +.long 2614888103,2614888103,2614888103,2614888103 +.long 2614888103,2614888103,2614888103,2614888103 +.long 3248222580,3248222580,3248222580,3248222580 +.long 3248222580,3248222580,3248222580,3248222580 +.long 3835390401,3835390401,3835390401,3835390401 +.long 3835390401,3835390401,3835390401,3835390401 +.long 4022224774,4022224774,4022224774,4022224774 +.long 4022224774,4022224774,4022224774,4022224774 +.long 264347078,264347078,264347078,264347078 +.long 264347078,264347078,264347078,264347078 +.long 604807628,604807628,604807628,604807628 +.long 604807628,604807628,604807628,604807628 +.long 770255983,770255983,770255983,770255983 +.long 770255983,770255983,770255983,770255983 +.long 1249150122,1249150122,1249150122,1249150122 +.long 1249150122,1249150122,1249150122,1249150122 +.long 1555081692,1555081692,1555081692,1555081692 +.long 1555081692,1555081692,1555081692,1555081692 +.long 1996064986,1996064986,1996064986,1996064986 +.long 1996064986,1996064986,1996064986,1996064986 +.long 2554220882,2554220882,2554220882,2554220882 +.long 2554220882,2554220882,2554220882,2554220882 +.long 2821834349,2821834349,2821834349,2821834349 +.long 2821834349,2821834349,2821834349,2821834349 +.long 2952996808,2952996808,2952996808,2952996808 +.long 2952996808,2952996808,2952996808,2952996808 +.long 3210313671,3210313671,3210313671,3210313671 +.long 3210313671,3210313671,3210313671,3210313671 +.long 3336571891,3336571891,3336571891,3336571891 +.long 3336571891,3336571891,3336571891,3336571891 +.long 3584528711,3584528711,3584528711,3584528711 +.long 3584528711,3584528711,3584528711,3584528711 +.long 113926993,113926993,113926993,113926993 +.long 113926993,113926993,113926993,113926993 +.long 338241895,338241895,338241895,338241895 +.long 338241895,338241895,338241895,338241895 +.long 666307205,666307205,666307205,666307205 +.long 666307205,666307205,666307205,666307205 +.long 773529912,773529912,773529912,773529912 +.long 773529912,773529912,773529912,773529912 +.long 1294757372,1294757372,1294757372,1294757372 +.long 1294757372,1294757372,1294757372,1294757372 +.long 1396182291,1396182291,1396182291,1396182291 +.long 1396182291,1396182291,1396182291,1396182291 +.long 1695183700,1695183700,1695183700,1695183700 +.long 1695183700,1695183700,1695183700,1695183700 +.long 1986661051,1986661051,1986661051,1986661051 +.long 1986661051,1986661051,1986661051,1986661051 +.long 2177026350,2177026350,2177026350,2177026350 +.long 2177026350,2177026350,2177026350,2177026350 +.long 2456956037,2456956037,2456956037,2456956037 +.long 2456956037,2456956037,2456956037,2456956037 +.long 2730485921,2730485921,2730485921,2730485921 +.long 2730485921,2730485921,2730485921,2730485921 +.long 2820302411,2820302411,2820302411,2820302411 +.long 2820302411,2820302411,2820302411,2820302411 +.long 3259730800,3259730800,3259730800,3259730800 +.long 3259730800,3259730800,3259730800,3259730800 +.long 3345764771,3345764771,3345764771,3345764771 +.long 3345764771,3345764771,3345764771,3345764771 +.long 3516065817,3516065817,3516065817,3516065817 +.long 3516065817,3516065817,3516065817,3516065817 +.long 3600352804,3600352804,3600352804,3600352804 +.long 3600352804,3600352804,3600352804,3600352804 +.long 4094571909,4094571909,4094571909,4094571909 +.long 4094571909,4094571909,4094571909,4094571909 +.long 275423344,275423344,275423344,275423344 +.long 275423344,275423344,275423344,275423344 +.long 430227734,430227734,430227734,430227734 +.long 430227734,430227734,430227734,430227734 +.long 506948616,506948616,506948616,506948616 +.long 506948616,506948616,506948616,506948616 +.long 659060556,659060556,659060556,659060556 +.long 659060556,659060556,659060556,659060556 +.long 883997877,883997877,883997877,883997877 +.long 883997877,883997877,883997877,883997877 +.long 958139571,958139571,958139571,958139571 +.long 958139571,958139571,958139571,958139571 +.long 1322822218,1322822218,1322822218,1322822218 +.long 1322822218,1322822218,1322822218,1322822218 +.long 1537002063,1537002063,1537002063,1537002063 +.long 1537002063,1537002063,1537002063,1537002063 +.long 1747873779,1747873779,1747873779,1747873779 +.long 1747873779,1747873779,1747873779,1747873779 +.long 1955562222,1955562222,1955562222,1955562222 +.long 1955562222,1955562222,1955562222,1955562222 +.long 2024104815,2024104815,2024104815,2024104815 +.long 2024104815,2024104815,2024104815,2024104815 +.long 2227730452,2227730452,2227730452,2227730452 +.long 2227730452,2227730452,2227730452,2227730452 +.long 2361852424,2361852424,2361852424,2361852424 +.long 2361852424,2361852424,2361852424,2361852424 +.long 2428436474,2428436474,2428436474,2428436474 +.long 2428436474,2428436474,2428436474,2428436474 +.long 2756734187,2756734187,2756734187,2756734187 +.long 2756734187,2756734187,2756734187,2756734187 +.long 3204031479,3204031479,3204031479,3204031479 +.long 3204031479,3204031479,3204031479,3204031479 +.long 3329325298,3329325298,3329325298,3329325298 +.long 3329325298,3329325298,3329325298,3329325298 +L$pbswap: +.long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f +.long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f +K256_shaext: +.long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 +.long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 +.long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 +.long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 +.long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc +.long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da +.long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 +.long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967 +.long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 +.long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 +.long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 +.long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070 +.long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 +.long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 +.long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 +.long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 +.byte 83,72,65,50,53,54,32,109,117,108,116,105,45,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 diff --git a/deps/openssl/asm_obsolete/x64-macosx-gas/sha/sha256-x86_64.s b/deps/openssl/asm_obsolete/x64-macosx-gas/sha/sha256-x86_64.s new file mode 100644 index 00000000000000..aa507cada6bb4f --- /dev/null +++ b/deps/openssl/asm_obsolete/x64-macosx-gas/sha/sha256-x86_64.s @@ -0,0 +1,3048 @@ +.text + + +.globl _sha256_block_data_order + +.p2align 4 +_sha256_block_data_order: + leaq _OPENSSL_ia32cap_P(%rip),%r11 + movl 0(%r11),%r9d + movl 4(%r11),%r10d + movl 8(%r11),%r11d + testl $536870912,%r11d + jnz _shaext_shortcut + testl $512,%r10d + jnz L$ssse3_shortcut + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + movq %rsp,%r11 + shlq $4,%rdx + subq $64+32,%rsp + leaq (%rsi,%rdx,4),%rdx + andq $-64,%rsp + movq %rdi,64+0(%rsp) + movq %rsi,64+8(%rsp) + movq %rdx,64+16(%rsp) + movq %r11,64+24(%rsp) +L$prologue: + + movl 0(%rdi),%eax + movl 4(%rdi),%ebx + movl 8(%rdi),%ecx + movl 12(%rdi),%edx + movl 16(%rdi),%r8d + movl 20(%rdi),%r9d + movl 24(%rdi),%r10d + movl 28(%rdi),%r11d + jmp L$loop + +.p2align 4 +L$loop: + movl %ebx,%edi + leaq K256(%rip),%rbp + xorl %ecx,%edi + movl 0(%rsi),%r12d + movl %r8d,%r13d + movl %eax,%r14d + bswapl %r12d + rorl $14,%r13d + movl %r9d,%r15d + + xorl %r8d,%r13d + rorl $9,%r14d + xorl %r10d,%r15d + + movl %r12d,0(%rsp) + xorl %eax,%r14d + andl %r8d,%r15d + + rorl $5,%r13d + addl %r11d,%r12d + xorl %r10d,%r15d + + rorl $11,%r14d + xorl %r8d,%r13d + addl %r15d,%r12d + + movl %eax,%r15d + addl (%rbp),%r12d + xorl %eax,%r14d + + xorl %ebx,%r15d + rorl $6,%r13d + movl %ebx,%r11d + + andl %r15d,%edi + rorl $2,%r14d + addl %r13d,%r12d + + xorl %edi,%r11d + addl %r12d,%edx + addl %r12d,%r11d + + leaq 4(%rbp),%rbp + addl %r14d,%r11d + movl 4(%rsi),%r12d + movl %edx,%r13d + movl %r11d,%r14d + bswapl %r12d + rorl $14,%r13d + movl %r8d,%edi + + xorl %edx,%r13d + rorl $9,%r14d + xorl %r9d,%edi + + movl %r12d,4(%rsp) + xorl %r11d,%r14d + andl %edx,%edi + + rorl $5,%r13d + addl %r10d,%r12d + xorl %r9d,%edi + + rorl $11,%r14d + xorl %edx,%r13d + addl %edi,%r12d + + movl %r11d,%edi + addl (%rbp),%r12d + xorl %r11d,%r14d + + xorl %eax,%edi + rorl $6,%r13d + movl %eax,%r10d + + andl %edi,%r15d + rorl $2,%r14d + addl %r13d,%r12d + + xorl %r15d,%r10d + addl %r12d,%ecx + addl %r12d,%r10d + + leaq 4(%rbp),%rbp + addl %r14d,%r10d + movl 8(%rsi),%r12d + movl %ecx,%r13d + movl %r10d,%r14d + bswapl %r12d + rorl $14,%r13d + movl %edx,%r15d + + xorl %ecx,%r13d + rorl $9,%r14d + xorl %r8d,%r15d + + movl %r12d,8(%rsp) + xorl %r10d,%r14d + andl %ecx,%r15d + + rorl $5,%r13d + addl %r9d,%r12d + xorl %r8d,%r15d + + rorl $11,%r14d + xorl %ecx,%r13d + addl %r15d,%r12d + + movl %r10d,%r15d + addl (%rbp),%r12d + xorl %r10d,%r14d + + xorl %r11d,%r15d + rorl $6,%r13d + movl %r11d,%r9d + + andl %r15d,%edi + rorl $2,%r14d + addl %r13d,%r12d + + xorl %edi,%r9d + addl %r12d,%ebx + addl %r12d,%r9d + + leaq 4(%rbp),%rbp + addl %r14d,%r9d + movl 12(%rsi),%r12d + movl %ebx,%r13d + movl %r9d,%r14d + bswapl %r12d + rorl $14,%r13d + movl %ecx,%edi + + xorl %ebx,%r13d + rorl $9,%r14d + xorl %edx,%edi + + movl %r12d,12(%rsp) + xorl %r9d,%r14d + andl %ebx,%edi + + rorl $5,%r13d + addl %r8d,%r12d + xorl %edx,%edi + + rorl $11,%r14d + xorl %ebx,%r13d + addl %edi,%r12d + + movl %r9d,%edi + addl (%rbp),%r12d + xorl %r9d,%r14d + + xorl %r10d,%edi + rorl $6,%r13d + movl %r10d,%r8d + + andl %edi,%r15d + rorl $2,%r14d + addl %r13d,%r12d + + xorl %r15d,%r8d + addl %r12d,%eax + addl %r12d,%r8d + + leaq 20(%rbp),%rbp + addl %r14d,%r8d + movl 16(%rsi),%r12d + movl %eax,%r13d + movl %r8d,%r14d + bswapl %r12d + rorl $14,%r13d + movl %ebx,%r15d + + xorl %eax,%r13d + rorl $9,%r14d + xorl %ecx,%r15d + + movl %r12d,16(%rsp) + xorl %r8d,%r14d + andl %eax,%r15d + + rorl $5,%r13d + addl %edx,%r12d + xorl %ecx,%r15d + + rorl $11,%r14d + xorl %eax,%r13d + addl %r15d,%r12d + + movl %r8d,%r15d + addl (%rbp),%r12d + xorl %r8d,%r14d + + xorl %r9d,%r15d + rorl $6,%r13d + movl %r9d,%edx + + andl %r15d,%edi + rorl $2,%r14d + addl %r13d,%r12d + + xorl %edi,%edx + addl %r12d,%r11d + addl %r12d,%edx + + leaq 4(%rbp),%rbp + addl %r14d,%edx + movl 20(%rsi),%r12d + movl %r11d,%r13d + movl %edx,%r14d + bswapl %r12d + rorl $14,%r13d + movl %eax,%edi + + xorl %r11d,%r13d + rorl $9,%r14d + xorl %ebx,%edi + + movl %r12d,20(%rsp) + xorl %edx,%r14d + andl %r11d,%edi + + rorl $5,%r13d + addl %ecx,%r12d + xorl %ebx,%edi + + rorl $11,%r14d + xorl %r11d,%r13d + addl %edi,%r12d + + movl %edx,%edi + addl (%rbp),%r12d + xorl %edx,%r14d + + xorl %r8d,%edi + rorl $6,%r13d + movl %r8d,%ecx + + andl %edi,%r15d + rorl $2,%r14d + addl %r13d,%r12d + + xorl %r15d,%ecx + addl %r12d,%r10d + addl %r12d,%ecx + + leaq 4(%rbp),%rbp + addl %r14d,%ecx + movl 24(%rsi),%r12d + movl %r10d,%r13d + movl %ecx,%r14d + bswapl %r12d + rorl $14,%r13d + movl %r11d,%r15d + + xorl %r10d,%r13d + rorl $9,%r14d + xorl %eax,%r15d + + movl %r12d,24(%rsp) + xorl %ecx,%r14d + andl %r10d,%r15d + + rorl $5,%r13d + addl %ebx,%r12d + xorl %eax,%r15d + + rorl $11,%r14d + xorl %r10d,%r13d + addl %r15d,%r12d + + movl %ecx,%r15d + addl (%rbp),%r12d + xorl %ecx,%r14d + + xorl %edx,%r15d + rorl $6,%r13d + movl %edx,%ebx + + andl %r15d,%edi + rorl $2,%r14d + addl %r13d,%r12d + + xorl %edi,%ebx + addl %r12d,%r9d + addl %r12d,%ebx + + leaq 4(%rbp),%rbp + addl %r14d,%ebx + movl 28(%rsi),%r12d + movl %r9d,%r13d + movl %ebx,%r14d + bswapl %r12d + rorl $14,%r13d + movl %r10d,%edi + + xorl %r9d,%r13d + rorl $9,%r14d + xorl %r11d,%edi + + movl %r12d,28(%rsp) + xorl %ebx,%r14d + andl %r9d,%edi + + rorl $5,%r13d + addl %eax,%r12d + xorl %r11d,%edi + + rorl $11,%r14d + xorl %r9d,%r13d + addl %edi,%r12d + + movl %ebx,%edi + addl (%rbp),%r12d + xorl %ebx,%r14d + + xorl %ecx,%edi + rorl $6,%r13d + movl %ecx,%eax + + andl %edi,%r15d + rorl $2,%r14d + addl %r13d,%r12d + + xorl %r15d,%eax + addl %r12d,%r8d + addl %r12d,%eax + + leaq 20(%rbp),%rbp + addl %r14d,%eax + movl 32(%rsi),%r12d + movl %r8d,%r13d + movl %eax,%r14d + bswapl %r12d + rorl $14,%r13d + movl %r9d,%r15d + + xorl %r8d,%r13d + rorl $9,%r14d + xorl %r10d,%r15d + + movl %r12d,32(%rsp) + xorl %eax,%r14d + andl %r8d,%r15d + + rorl $5,%r13d + addl %r11d,%r12d + xorl %r10d,%r15d + + rorl $11,%r14d + xorl %r8d,%r13d + addl %r15d,%r12d + + movl %eax,%r15d + addl (%rbp),%r12d + xorl %eax,%r14d + + xorl %ebx,%r15d + rorl $6,%r13d + movl %ebx,%r11d + + andl %r15d,%edi + rorl $2,%r14d + addl %r13d,%r12d + + xorl %edi,%r11d + addl %r12d,%edx + addl %r12d,%r11d + + leaq 4(%rbp),%rbp + addl %r14d,%r11d + movl 36(%rsi),%r12d + movl %edx,%r13d + movl %r11d,%r14d + bswapl %r12d + rorl $14,%r13d + movl %r8d,%edi + + xorl %edx,%r13d + rorl $9,%r14d + xorl %r9d,%edi + + movl %r12d,36(%rsp) + xorl %r11d,%r14d + andl %edx,%edi + + rorl $5,%r13d + addl %r10d,%r12d + xorl %r9d,%edi + + rorl $11,%r14d + xorl %edx,%r13d + addl %edi,%r12d + + movl %r11d,%edi + addl (%rbp),%r12d + xorl %r11d,%r14d + + xorl %eax,%edi + rorl $6,%r13d + movl %eax,%r10d + + andl %edi,%r15d + rorl $2,%r14d + addl %r13d,%r12d + + xorl %r15d,%r10d + addl %r12d,%ecx + addl %r12d,%r10d + + leaq 4(%rbp),%rbp + addl %r14d,%r10d + movl 40(%rsi),%r12d + movl %ecx,%r13d + movl %r10d,%r14d + bswapl %r12d + rorl $14,%r13d + movl %edx,%r15d + + xorl %ecx,%r13d + rorl $9,%r14d + xorl %r8d,%r15d + + movl %r12d,40(%rsp) + xorl %r10d,%r14d + andl %ecx,%r15d + + rorl $5,%r13d + addl %r9d,%r12d + xorl %r8d,%r15d + + rorl $11,%r14d + xorl %ecx,%r13d + addl %r15d,%r12d + + movl %r10d,%r15d + addl (%rbp),%r12d + xorl %r10d,%r14d + + xorl %r11d,%r15d + rorl $6,%r13d + movl %r11d,%r9d + + andl %r15d,%edi + rorl $2,%r14d + addl %r13d,%r12d + + xorl %edi,%r9d + addl %r12d,%ebx + addl %r12d,%r9d + + leaq 4(%rbp),%rbp + addl %r14d,%r9d + movl 44(%rsi),%r12d + movl %ebx,%r13d + movl %r9d,%r14d + bswapl %r12d + rorl $14,%r13d + movl %ecx,%edi + + xorl %ebx,%r13d + rorl $9,%r14d + xorl %edx,%edi + + movl %r12d,44(%rsp) + xorl %r9d,%r14d + andl %ebx,%edi + + rorl $5,%r13d + addl %r8d,%r12d + xorl %edx,%edi + + rorl $11,%r14d + xorl %ebx,%r13d + addl %edi,%r12d + + movl %r9d,%edi + addl (%rbp),%r12d + xorl %r9d,%r14d + + xorl %r10d,%edi + rorl $6,%r13d + movl %r10d,%r8d + + andl %edi,%r15d + rorl $2,%r14d + addl %r13d,%r12d + + xorl %r15d,%r8d + addl %r12d,%eax + addl %r12d,%r8d + + leaq 20(%rbp),%rbp + addl %r14d,%r8d + movl 48(%rsi),%r12d + movl %eax,%r13d + movl %r8d,%r14d + bswapl %r12d + rorl $14,%r13d + movl %ebx,%r15d + + xorl %eax,%r13d + rorl $9,%r14d + xorl %ecx,%r15d + + movl %r12d,48(%rsp) + xorl %r8d,%r14d + andl %eax,%r15d + + rorl $5,%r13d + addl %edx,%r12d + xorl %ecx,%r15d + + rorl $11,%r14d + xorl %eax,%r13d + addl %r15d,%r12d + + movl %r8d,%r15d + addl (%rbp),%r12d + xorl %r8d,%r14d + + xorl %r9d,%r15d + rorl $6,%r13d + movl %r9d,%edx + + andl %r15d,%edi + rorl $2,%r14d + addl %r13d,%r12d + + xorl %edi,%edx + addl %r12d,%r11d + addl %r12d,%edx + + leaq 4(%rbp),%rbp + addl %r14d,%edx + movl 52(%rsi),%r12d + movl %r11d,%r13d + movl %edx,%r14d + bswapl %r12d + rorl $14,%r13d + movl %eax,%edi + + xorl %r11d,%r13d + rorl $9,%r14d + xorl %ebx,%edi + + movl %r12d,52(%rsp) + xorl %edx,%r14d + andl %r11d,%edi + + rorl $5,%r13d + addl %ecx,%r12d + xorl %ebx,%edi + + rorl $11,%r14d + xorl %r11d,%r13d + addl %edi,%r12d + + movl %edx,%edi + addl (%rbp),%r12d + xorl %edx,%r14d + + xorl %r8d,%edi + rorl $6,%r13d + movl %r8d,%ecx + + andl %edi,%r15d + rorl $2,%r14d + addl %r13d,%r12d + + xorl %r15d,%ecx + addl %r12d,%r10d + addl %r12d,%ecx + + leaq 4(%rbp),%rbp + addl %r14d,%ecx + movl 56(%rsi),%r12d + movl %r10d,%r13d + movl %ecx,%r14d + bswapl %r12d + rorl $14,%r13d + movl %r11d,%r15d + + xorl %r10d,%r13d + rorl $9,%r14d + xorl %eax,%r15d + + movl %r12d,56(%rsp) + xorl %ecx,%r14d + andl %r10d,%r15d + + rorl $5,%r13d + addl %ebx,%r12d + xorl %eax,%r15d + + rorl $11,%r14d + xorl %r10d,%r13d + addl %r15d,%r12d + + movl %ecx,%r15d + addl (%rbp),%r12d + xorl %ecx,%r14d + + xorl %edx,%r15d + rorl $6,%r13d + movl %edx,%ebx + + andl %r15d,%edi + rorl $2,%r14d + addl %r13d,%r12d + + xorl %edi,%ebx + addl %r12d,%r9d + addl %r12d,%ebx + + leaq 4(%rbp),%rbp + addl %r14d,%ebx + movl 60(%rsi),%r12d + movl %r9d,%r13d + movl %ebx,%r14d + bswapl %r12d + rorl $14,%r13d + movl %r10d,%edi + + xorl %r9d,%r13d + rorl $9,%r14d + xorl %r11d,%edi + + movl %r12d,60(%rsp) + xorl %ebx,%r14d + andl %r9d,%edi + + rorl $5,%r13d + addl %eax,%r12d + xorl %r11d,%edi + + rorl $11,%r14d + xorl %r9d,%r13d + addl %edi,%r12d + + movl %ebx,%edi + addl (%rbp),%r12d + xorl %ebx,%r14d + + xorl %ecx,%edi + rorl $6,%r13d + movl %ecx,%eax + + andl %edi,%r15d + rorl $2,%r14d + addl %r13d,%r12d + + xorl %r15d,%eax + addl %r12d,%r8d + addl %r12d,%eax + + leaq 20(%rbp),%rbp + jmp L$rounds_16_xx +.p2align 4 +L$rounds_16_xx: + movl 4(%rsp),%r13d + movl 56(%rsp),%r15d + + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%eax + movl %r15d,%r14d + rorl $2,%r15d + + xorl %r12d,%r13d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%r15d + shrl $10,%r14d + + rorl $17,%r15d + xorl %r13d,%r12d + xorl %r14d,%r15d + addl 36(%rsp),%r12d + + addl 0(%rsp),%r12d + movl %r8d,%r13d + addl %r15d,%r12d + movl %eax,%r14d + rorl $14,%r13d + movl %r9d,%r15d + + xorl %r8d,%r13d + rorl $9,%r14d + xorl %r10d,%r15d + + movl %r12d,0(%rsp) + xorl %eax,%r14d + andl %r8d,%r15d + + rorl $5,%r13d + addl %r11d,%r12d + xorl %r10d,%r15d + + rorl $11,%r14d + xorl %r8d,%r13d + addl %r15d,%r12d + + movl %eax,%r15d + addl (%rbp),%r12d + xorl %eax,%r14d + + xorl %ebx,%r15d + rorl $6,%r13d + movl %ebx,%r11d + + andl %r15d,%edi + rorl $2,%r14d + addl %r13d,%r12d + + xorl %edi,%r11d + addl %r12d,%edx + addl %r12d,%r11d + + leaq 4(%rbp),%rbp + movl 8(%rsp),%r13d + movl 60(%rsp),%edi + + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%r11d + movl %edi,%r14d + rorl $2,%edi + + xorl %r12d,%r13d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%edi + shrl $10,%r14d + + rorl $17,%edi + xorl %r13d,%r12d + xorl %r14d,%edi + addl 40(%rsp),%r12d + + addl 4(%rsp),%r12d + movl %edx,%r13d + addl %edi,%r12d + movl %r11d,%r14d + rorl $14,%r13d + movl %r8d,%edi + + xorl %edx,%r13d + rorl $9,%r14d + xorl %r9d,%edi + + movl %r12d,4(%rsp) + xorl %r11d,%r14d + andl %edx,%edi + + rorl $5,%r13d + addl %r10d,%r12d + xorl %r9d,%edi + + rorl $11,%r14d + xorl %edx,%r13d + addl %edi,%r12d + + movl %r11d,%edi + addl (%rbp),%r12d + xorl %r11d,%r14d + + xorl %eax,%edi + rorl $6,%r13d + movl %eax,%r10d + + andl %edi,%r15d + rorl $2,%r14d + addl %r13d,%r12d + + xorl %r15d,%r10d + addl %r12d,%ecx + addl %r12d,%r10d + + leaq 4(%rbp),%rbp + movl 12(%rsp),%r13d + movl 0(%rsp),%r15d + + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%r10d + movl %r15d,%r14d + rorl $2,%r15d + + xorl %r12d,%r13d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%r15d + shrl $10,%r14d + + rorl $17,%r15d + xorl %r13d,%r12d + xorl %r14d,%r15d + addl 44(%rsp),%r12d + + addl 8(%rsp),%r12d + movl %ecx,%r13d + addl %r15d,%r12d + movl %r10d,%r14d + rorl $14,%r13d + movl %edx,%r15d + + xorl %ecx,%r13d + rorl $9,%r14d + xorl %r8d,%r15d + + movl %r12d,8(%rsp) + xorl %r10d,%r14d + andl %ecx,%r15d + + rorl $5,%r13d + addl %r9d,%r12d + xorl %r8d,%r15d + + rorl $11,%r14d + xorl %ecx,%r13d + addl %r15d,%r12d + + movl %r10d,%r15d + addl (%rbp),%r12d + xorl %r10d,%r14d + + xorl %r11d,%r15d + rorl $6,%r13d + movl %r11d,%r9d + + andl %r15d,%edi + rorl $2,%r14d + addl %r13d,%r12d + + xorl %edi,%r9d + addl %r12d,%ebx + addl %r12d,%r9d + + leaq 4(%rbp),%rbp + movl 16(%rsp),%r13d + movl 4(%rsp),%edi + + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%r9d + movl %edi,%r14d + rorl $2,%edi + + xorl %r12d,%r13d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%edi + shrl $10,%r14d + + rorl $17,%edi + xorl %r13d,%r12d + xorl %r14d,%edi + addl 48(%rsp),%r12d + + addl 12(%rsp),%r12d + movl %ebx,%r13d + addl %edi,%r12d + movl %r9d,%r14d + rorl $14,%r13d + movl %ecx,%edi + + xorl %ebx,%r13d + rorl $9,%r14d + xorl %edx,%edi + + movl %r12d,12(%rsp) + xorl %r9d,%r14d + andl %ebx,%edi + + rorl $5,%r13d + addl %r8d,%r12d + xorl %edx,%edi + + rorl $11,%r14d + xorl %ebx,%r13d + addl %edi,%r12d + + movl %r9d,%edi + addl (%rbp),%r12d + xorl %r9d,%r14d + + xorl %r10d,%edi + rorl $6,%r13d + movl %r10d,%r8d + + andl %edi,%r15d + rorl $2,%r14d + addl %r13d,%r12d + + xorl %r15d,%r8d + addl %r12d,%eax + addl %r12d,%r8d + + leaq 20(%rbp),%rbp + movl 20(%rsp),%r13d + movl 8(%rsp),%r15d + + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%r8d + movl %r15d,%r14d + rorl $2,%r15d + + xorl %r12d,%r13d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%r15d + shrl $10,%r14d + + rorl $17,%r15d + xorl %r13d,%r12d + xorl %r14d,%r15d + addl 52(%rsp),%r12d + + addl 16(%rsp),%r12d + movl %eax,%r13d + addl %r15d,%r12d + movl %r8d,%r14d + rorl $14,%r13d + movl %ebx,%r15d + + xorl %eax,%r13d + rorl $9,%r14d + xorl %ecx,%r15d + + movl %r12d,16(%rsp) + xorl %r8d,%r14d + andl %eax,%r15d + + rorl $5,%r13d + addl %edx,%r12d + xorl %ecx,%r15d + + rorl $11,%r14d + xorl %eax,%r13d + addl %r15d,%r12d + + movl %r8d,%r15d + addl (%rbp),%r12d + xorl %r8d,%r14d + + xorl %r9d,%r15d + rorl $6,%r13d + movl %r9d,%edx + + andl %r15d,%edi + rorl $2,%r14d + addl %r13d,%r12d + + xorl %edi,%edx + addl %r12d,%r11d + addl %r12d,%edx + + leaq 4(%rbp),%rbp + movl 24(%rsp),%r13d + movl 12(%rsp),%edi + + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%edx + movl %edi,%r14d + rorl $2,%edi + + xorl %r12d,%r13d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%edi + shrl $10,%r14d + + rorl $17,%edi + xorl %r13d,%r12d + xorl %r14d,%edi + addl 56(%rsp),%r12d + + addl 20(%rsp),%r12d + movl %r11d,%r13d + addl %edi,%r12d + movl %edx,%r14d + rorl $14,%r13d + movl %eax,%edi + + xorl %r11d,%r13d + rorl $9,%r14d + xorl %ebx,%edi + + movl %r12d,20(%rsp) + xorl %edx,%r14d + andl %r11d,%edi + + rorl $5,%r13d + addl %ecx,%r12d + xorl %ebx,%edi + + rorl $11,%r14d + xorl %r11d,%r13d + addl %edi,%r12d + + movl %edx,%edi + addl (%rbp),%r12d + xorl %edx,%r14d + + xorl %r8d,%edi + rorl $6,%r13d + movl %r8d,%ecx + + andl %edi,%r15d + rorl $2,%r14d + addl %r13d,%r12d + + xorl %r15d,%ecx + addl %r12d,%r10d + addl %r12d,%ecx + + leaq 4(%rbp),%rbp + movl 28(%rsp),%r13d + movl 16(%rsp),%r15d + + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%ecx + movl %r15d,%r14d + rorl $2,%r15d + + xorl %r12d,%r13d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%r15d + shrl $10,%r14d + + rorl $17,%r15d + xorl %r13d,%r12d + xorl %r14d,%r15d + addl 60(%rsp),%r12d + + addl 24(%rsp),%r12d + movl %r10d,%r13d + addl %r15d,%r12d + movl %ecx,%r14d + rorl $14,%r13d + movl %r11d,%r15d + + xorl %r10d,%r13d + rorl $9,%r14d + xorl %eax,%r15d + + movl %r12d,24(%rsp) + xorl %ecx,%r14d + andl %r10d,%r15d + + rorl $5,%r13d + addl %ebx,%r12d + xorl %eax,%r15d + + rorl $11,%r14d + xorl %r10d,%r13d + addl %r15d,%r12d + + movl %ecx,%r15d + addl (%rbp),%r12d + xorl %ecx,%r14d + + xorl %edx,%r15d + rorl $6,%r13d + movl %edx,%ebx + + andl %r15d,%edi + rorl $2,%r14d + addl %r13d,%r12d + + xorl %edi,%ebx + addl %r12d,%r9d + addl %r12d,%ebx + + leaq 4(%rbp),%rbp + movl 32(%rsp),%r13d + movl 20(%rsp),%edi + + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%ebx + movl %edi,%r14d + rorl $2,%edi + + xorl %r12d,%r13d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%edi + shrl $10,%r14d + + rorl $17,%edi + xorl %r13d,%r12d + xorl %r14d,%edi + addl 0(%rsp),%r12d + + addl 28(%rsp),%r12d + movl %r9d,%r13d + addl %edi,%r12d + movl %ebx,%r14d + rorl $14,%r13d + movl %r10d,%edi + + xorl %r9d,%r13d + rorl $9,%r14d + xorl %r11d,%edi + + movl %r12d,28(%rsp) + xorl %ebx,%r14d + andl %r9d,%edi + + rorl $5,%r13d + addl %eax,%r12d + xorl %r11d,%edi + + rorl $11,%r14d + xorl %r9d,%r13d + addl %edi,%r12d + + movl %ebx,%edi + addl (%rbp),%r12d + xorl %ebx,%r14d + + xorl %ecx,%edi + rorl $6,%r13d + movl %ecx,%eax + + andl %edi,%r15d + rorl $2,%r14d + addl %r13d,%r12d + + xorl %r15d,%eax + addl %r12d,%r8d + addl %r12d,%eax + + leaq 20(%rbp),%rbp + movl 36(%rsp),%r13d + movl 24(%rsp),%r15d + + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%eax + movl %r15d,%r14d + rorl $2,%r15d + + xorl %r12d,%r13d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%r15d + shrl $10,%r14d + + rorl $17,%r15d + xorl %r13d,%r12d + xorl %r14d,%r15d + addl 4(%rsp),%r12d + + addl 32(%rsp),%r12d + movl %r8d,%r13d + addl %r15d,%r12d + movl %eax,%r14d + rorl $14,%r13d + movl %r9d,%r15d + + xorl %r8d,%r13d + rorl $9,%r14d + xorl %r10d,%r15d + + movl %r12d,32(%rsp) + xorl %eax,%r14d + andl %r8d,%r15d + + rorl $5,%r13d + addl %r11d,%r12d + xorl %r10d,%r15d + + rorl $11,%r14d + xorl %r8d,%r13d + addl %r15d,%r12d + + movl %eax,%r15d + addl (%rbp),%r12d + xorl %eax,%r14d + + xorl %ebx,%r15d + rorl $6,%r13d + movl %ebx,%r11d + + andl %r15d,%edi + rorl $2,%r14d + addl %r13d,%r12d + + xorl %edi,%r11d + addl %r12d,%edx + addl %r12d,%r11d + + leaq 4(%rbp),%rbp + movl 40(%rsp),%r13d + movl 28(%rsp),%edi + + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%r11d + movl %edi,%r14d + rorl $2,%edi + + xorl %r12d,%r13d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%edi + shrl $10,%r14d + + rorl $17,%edi + xorl %r13d,%r12d + xorl %r14d,%edi + addl 8(%rsp),%r12d + + addl 36(%rsp),%r12d + movl %edx,%r13d + addl %edi,%r12d + movl %r11d,%r14d + rorl $14,%r13d + movl %r8d,%edi + + xorl %edx,%r13d + rorl $9,%r14d + xorl %r9d,%edi + + movl %r12d,36(%rsp) + xorl %r11d,%r14d + andl %edx,%edi + + rorl $5,%r13d + addl %r10d,%r12d + xorl %r9d,%edi + + rorl $11,%r14d + xorl %edx,%r13d + addl %edi,%r12d + + movl %r11d,%edi + addl (%rbp),%r12d + xorl %r11d,%r14d + + xorl %eax,%edi + rorl $6,%r13d + movl %eax,%r10d + + andl %edi,%r15d + rorl $2,%r14d + addl %r13d,%r12d + + xorl %r15d,%r10d + addl %r12d,%ecx + addl %r12d,%r10d + + leaq 4(%rbp),%rbp + movl 44(%rsp),%r13d + movl 32(%rsp),%r15d + + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%r10d + movl %r15d,%r14d + rorl $2,%r15d + + xorl %r12d,%r13d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%r15d + shrl $10,%r14d + + rorl $17,%r15d + xorl %r13d,%r12d + xorl %r14d,%r15d + addl 12(%rsp),%r12d + + addl 40(%rsp),%r12d + movl %ecx,%r13d + addl %r15d,%r12d + movl %r10d,%r14d + rorl $14,%r13d + movl %edx,%r15d + + xorl %ecx,%r13d + rorl $9,%r14d + xorl %r8d,%r15d + + movl %r12d,40(%rsp) + xorl %r10d,%r14d + andl %ecx,%r15d + + rorl $5,%r13d + addl %r9d,%r12d + xorl %r8d,%r15d + + rorl $11,%r14d + xorl %ecx,%r13d + addl %r15d,%r12d + + movl %r10d,%r15d + addl (%rbp),%r12d + xorl %r10d,%r14d + + xorl %r11d,%r15d + rorl $6,%r13d + movl %r11d,%r9d + + andl %r15d,%edi + rorl $2,%r14d + addl %r13d,%r12d + + xorl %edi,%r9d + addl %r12d,%ebx + addl %r12d,%r9d + + leaq 4(%rbp),%rbp + movl 48(%rsp),%r13d + movl 36(%rsp),%edi + + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%r9d + movl %edi,%r14d + rorl $2,%edi + + xorl %r12d,%r13d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%edi + shrl $10,%r14d + + rorl $17,%edi + xorl %r13d,%r12d + xorl %r14d,%edi + addl 16(%rsp),%r12d + + addl 44(%rsp),%r12d + movl %ebx,%r13d + addl %edi,%r12d + movl %r9d,%r14d + rorl $14,%r13d + movl %ecx,%edi + + xorl %ebx,%r13d + rorl $9,%r14d + xorl %edx,%edi + + movl %r12d,44(%rsp) + xorl %r9d,%r14d + andl %ebx,%edi + + rorl $5,%r13d + addl %r8d,%r12d + xorl %edx,%edi + + rorl $11,%r14d + xorl %ebx,%r13d + addl %edi,%r12d + + movl %r9d,%edi + addl (%rbp),%r12d + xorl %r9d,%r14d + + xorl %r10d,%edi + rorl $6,%r13d + movl %r10d,%r8d + + andl %edi,%r15d + rorl $2,%r14d + addl %r13d,%r12d + + xorl %r15d,%r8d + addl %r12d,%eax + addl %r12d,%r8d + + leaq 20(%rbp),%rbp + movl 52(%rsp),%r13d + movl 40(%rsp),%r15d + + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%r8d + movl %r15d,%r14d + rorl $2,%r15d + + xorl %r12d,%r13d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%r15d + shrl $10,%r14d + + rorl $17,%r15d + xorl %r13d,%r12d + xorl %r14d,%r15d + addl 20(%rsp),%r12d + + addl 48(%rsp),%r12d + movl %eax,%r13d + addl %r15d,%r12d + movl %r8d,%r14d + rorl $14,%r13d + movl %ebx,%r15d + + xorl %eax,%r13d + rorl $9,%r14d + xorl %ecx,%r15d + + movl %r12d,48(%rsp) + xorl %r8d,%r14d + andl %eax,%r15d + + rorl $5,%r13d + addl %edx,%r12d + xorl %ecx,%r15d + + rorl $11,%r14d + xorl %eax,%r13d + addl %r15d,%r12d + + movl %r8d,%r15d + addl (%rbp),%r12d + xorl %r8d,%r14d + + xorl %r9d,%r15d + rorl $6,%r13d + movl %r9d,%edx + + andl %r15d,%edi + rorl $2,%r14d + addl %r13d,%r12d + + xorl %edi,%edx + addl %r12d,%r11d + addl %r12d,%edx + + leaq 4(%rbp),%rbp + movl 56(%rsp),%r13d + movl 44(%rsp),%edi + + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%edx + movl %edi,%r14d + rorl $2,%edi + + xorl %r12d,%r13d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%edi + shrl $10,%r14d + + rorl $17,%edi + xorl %r13d,%r12d + xorl %r14d,%edi + addl 24(%rsp),%r12d + + addl 52(%rsp),%r12d + movl %r11d,%r13d + addl %edi,%r12d + movl %edx,%r14d + rorl $14,%r13d + movl %eax,%edi + + xorl %r11d,%r13d + rorl $9,%r14d + xorl %ebx,%edi + + movl %r12d,52(%rsp) + xorl %edx,%r14d + andl %r11d,%edi + + rorl $5,%r13d + addl %ecx,%r12d + xorl %ebx,%edi + + rorl $11,%r14d + xorl %r11d,%r13d + addl %edi,%r12d + + movl %edx,%edi + addl (%rbp),%r12d + xorl %edx,%r14d + + xorl %r8d,%edi + rorl $6,%r13d + movl %r8d,%ecx + + andl %edi,%r15d + rorl $2,%r14d + addl %r13d,%r12d + + xorl %r15d,%ecx + addl %r12d,%r10d + addl %r12d,%ecx + + leaq 4(%rbp),%rbp + movl 60(%rsp),%r13d + movl 48(%rsp),%r15d + + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%ecx + movl %r15d,%r14d + rorl $2,%r15d + + xorl %r12d,%r13d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%r15d + shrl $10,%r14d + + rorl $17,%r15d + xorl %r13d,%r12d + xorl %r14d,%r15d + addl 28(%rsp),%r12d + + addl 56(%rsp),%r12d + movl %r10d,%r13d + addl %r15d,%r12d + movl %ecx,%r14d + rorl $14,%r13d + movl %r11d,%r15d + + xorl %r10d,%r13d + rorl $9,%r14d + xorl %eax,%r15d + + movl %r12d,56(%rsp) + xorl %ecx,%r14d + andl %r10d,%r15d + + rorl $5,%r13d + addl %ebx,%r12d + xorl %eax,%r15d + + rorl $11,%r14d + xorl %r10d,%r13d + addl %r15d,%r12d + + movl %ecx,%r15d + addl (%rbp),%r12d + xorl %ecx,%r14d + + xorl %edx,%r15d + rorl $6,%r13d + movl %edx,%ebx + + andl %r15d,%edi + rorl $2,%r14d + addl %r13d,%r12d + + xorl %edi,%ebx + addl %r12d,%r9d + addl %r12d,%ebx + + leaq 4(%rbp),%rbp + movl 0(%rsp),%r13d + movl 52(%rsp),%edi + + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%ebx + movl %edi,%r14d + rorl $2,%edi + + xorl %r12d,%r13d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%edi + shrl $10,%r14d + + rorl $17,%edi + xorl %r13d,%r12d + xorl %r14d,%edi + addl 32(%rsp),%r12d + + addl 60(%rsp),%r12d + movl %r9d,%r13d + addl %edi,%r12d + movl %ebx,%r14d + rorl $14,%r13d + movl %r10d,%edi + + xorl %r9d,%r13d + rorl $9,%r14d + xorl %r11d,%edi + + movl %r12d,60(%rsp) + xorl %ebx,%r14d + andl %r9d,%edi + + rorl $5,%r13d + addl %eax,%r12d + xorl %r11d,%edi + + rorl $11,%r14d + xorl %r9d,%r13d + addl %edi,%r12d + + movl %ebx,%edi + addl (%rbp),%r12d + xorl %ebx,%r14d + + xorl %ecx,%edi + rorl $6,%r13d + movl %ecx,%eax + + andl %edi,%r15d + rorl $2,%r14d + addl %r13d,%r12d + + xorl %r15d,%eax + addl %r12d,%r8d + addl %r12d,%eax + + leaq 20(%rbp),%rbp + cmpb $0,3(%rbp) + jnz L$rounds_16_xx + + movq 64+0(%rsp),%rdi + addl %r14d,%eax + leaq 64(%rsi),%rsi + + addl 0(%rdi),%eax + addl 4(%rdi),%ebx + addl 8(%rdi),%ecx + addl 12(%rdi),%edx + addl 16(%rdi),%r8d + addl 20(%rdi),%r9d + addl 24(%rdi),%r10d + addl 28(%rdi),%r11d + + cmpq 64+16(%rsp),%rsi + + movl %eax,0(%rdi) + movl %ebx,4(%rdi) + movl %ecx,8(%rdi) + movl %edx,12(%rdi) + movl %r8d,16(%rdi) + movl %r9d,20(%rdi) + movl %r10d,24(%rdi) + movl %r11d,28(%rdi) + jb L$loop + + movq 64+24(%rsp),%rsi + movq (%rsi),%r15 + movq 8(%rsi),%r14 + movq 16(%rsi),%r13 + movq 24(%rsi),%r12 + movq 32(%rsi),%rbp + movq 40(%rsi),%rbx + leaq 48(%rsi),%rsp +L$epilogue: + .byte 0xf3,0xc3 + +.p2align 6 + +K256: +.long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 +.long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 +.long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 +.long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 +.long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 +.long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 +.long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 +.long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 +.long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc +.long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc +.long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da +.long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da +.long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 +.long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 +.long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967 +.long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967 +.long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 +.long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 +.long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 +.long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 +.long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 +.long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 +.long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070 +.long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070 +.long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 +.long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 +.long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 +.long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 +.long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 +.long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 +.long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 +.long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 + +.long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f +.long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f +.long 0x03020100,0x0b0a0908,0xffffffff,0xffffffff +.long 0x03020100,0x0b0a0908,0xffffffff,0xffffffff +.long 0xffffffff,0xffffffff,0x03020100,0x0b0a0908 +.long 0xffffffff,0xffffffff,0x03020100,0x0b0a0908 +.byte 83,72,65,50,53,54,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 + +.p2align 6 +sha256_block_data_order_shaext: +_shaext_shortcut: + leaq K256+128(%rip),%rcx + movdqu (%rdi),%xmm1 + movdqu 16(%rdi),%xmm2 + movdqa 512-128(%rcx),%xmm7 + + pshufd $27,%xmm1,%xmm0 + pshufd $177,%xmm1,%xmm1 + pshufd $27,%xmm2,%xmm2 + movdqa %xmm7,%xmm8 +.byte 102,15,58,15,202,8 + punpcklqdq %xmm0,%xmm2 + jmp L$oop_shaext + +.p2align 4 +L$oop_shaext: + movdqu (%rsi),%xmm3 + movdqu 16(%rsi),%xmm4 + movdqu 32(%rsi),%xmm5 +.byte 102,15,56,0,223 + movdqu 48(%rsi),%xmm6 + + movdqa 0-128(%rcx),%xmm0 + paddd %xmm3,%xmm0 +.byte 102,15,56,0,231 + movdqa %xmm2,%xmm10 +.byte 15,56,203,209 + pshufd $14,%xmm0,%xmm0 + nop + movdqa %xmm1,%xmm9 +.byte 15,56,203,202 + + movdqa 32-128(%rcx),%xmm0 + paddd %xmm4,%xmm0 +.byte 102,15,56,0,239 +.byte 15,56,203,209 + pshufd $14,%xmm0,%xmm0 + leaq 64(%rsi),%rsi +.byte 15,56,204,220 +.byte 15,56,203,202 + + movdqa 64-128(%rcx),%xmm0 + paddd %xmm5,%xmm0 +.byte 102,15,56,0,247 +.byte 15,56,203,209 + pshufd $14,%xmm0,%xmm0 + movdqa %xmm6,%xmm7 +.byte 102,15,58,15,253,4 + nop + paddd %xmm7,%xmm3 +.byte 15,56,204,229 +.byte 15,56,203,202 + + movdqa 96-128(%rcx),%xmm0 + paddd %xmm6,%xmm0 +.byte 15,56,205,222 +.byte 15,56,203,209 + pshufd $14,%xmm0,%xmm0 + movdqa %xmm3,%xmm7 +.byte 102,15,58,15,254,4 + nop + paddd %xmm7,%xmm4 +.byte 15,56,204,238 +.byte 15,56,203,202 + movdqa 128-128(%rcx),%xmm0 + paddd %xmm3,%xmm0 +.byte 15,56,205,227 +.byte 15,56,203,209 + pshufd $14,%xmm0,%xmm0 + movdqa %xmm4,%xmm7 +.byte 102,15,58,15,251,4 + nop + paddd %xmm7,%xmm5 +.byte 15,56,204,243 +.byte 15,56,203,202 + movdqa 160-128(%rcx),%xmm0 + paddd %xmm4,%xmm0 +.byte 15,56,205,236 +.byte 15,56,203,209 + pshufd $14,%xmm0,%xmm0 + movdqa %xmm5,%xmm7 +.byte 102,15,58,15,252,4 + nop + paddd %xmm7,%xmm6 +.byte 15,56,204,220 +.byte 15,56,203,202 + movdqa 192-128(%rcx),%xmm0 + paddd %xmm5,%xmm0 +.byte 15,56,205,245 +.byte 15,56,203,209 + pshufd $14,%xmm0,%xmm0 + movdqa %xmm6,%xmm7 +.byte 102,15,58,15,253,4 + nop + paddd %xmm7,%xmm3 +.byte 15,56,204,229 +.byte 15,56,203,202 + movdqa 224-128(%rcx),%xmm0 + paddd %xmm6,%xmm0 +.byte 15,56,205,222 +.byte 15,56,203,209 + pshufd $14,%xmm0,%xmm0 + movdqa %xmm3,%xmm7 +.byte 102,15,58,15,254,4 + nop + paddd %xmm7,%xmm4 +.byte 15,56,204,238 +.byte 15,56,203,202 + movdqa 256-128(%rcx),%xmm0 + paddd %xmm3,%xmm0 +.byte 15,56,205,227 +.byte 15,56,203,209 + pshufd $14,%xmm0,%xmm0 + movdqa %xmm4,%xmm7 +.byte 102,15,58,15,251,4 + nop + paddd %xmm7,%xmm5 +.byte 15,56,204,243 +.byte 15,56,203,202 + movdqa 288-128(%rcx),%xmm0 + paddd %xmm4,%xmm0 +.byte 15,56,205,236 +.byte 15,56,203,209 + pshufd $14,%xmm0,%xmm0 + movdqa %xmm5,%xmm7 +.byte 102,15,58,15,252,4 + nop + paddd %xmm7,%xmm6 +.byte 15,56,204,220 +.byte 15,56,203,202 + movdqa 320-128(%rcx),%xmm0 + paddd %xmm5,%xmm0 +.byte 15,56,205,245 +.byte 15,56,203,209 + pshufd $14,%xmm0,%xmm0 + movdqa %xmm6,%xmm7 +.byte 102,15,58,15,253,4 + nop + paddd %xmm7,%xmm3 +.byte 15,56,204,229 +.byte 15,56,203,202 + movdqa 352-128(%rcx),%xmm0 + paddd %xmm6,%xmm0 +.byte 15,56,205,222 +.byte 15,56,203,209 + pshufd $14,%xmm0,%xmm0 + movdqa %xmm3,%xmm7 +.byte 102,15,58,15,254,4 + nop + paddd %xmm7,%xmm4 +.byte 15,56,204,238 +.byte 15,56,203,202 + movdqa 384-128(%rcx),%xmm0 + paddd %xmm3,%xmm0 +.byte 15,56,205,227 +.byte 15,56,203,209 + pshufd $14,%xmm0,%xmm0 + movdqa %xmm4,%xmm7 +.byte 102,15,58,15,251,4 + nop + paddd %xmm7,%xmm5 +.byte 15,56,204,243 +.byte 15,56,203,202 + movdqa 416-128(%rcx),%xmm0 + paddd %xmm4,%xmm0 +.byte 15,56,205,236 +.byte 15,56,203,209 + pshufd $14,%xmm0,%xmm0 + movdqa %xmm5,%xmm7 +.byte 102,15,58,15,252,4 +.byte 15,56,203,202 + paddd %xmm7,%xmm6 + + movdqa 448-128(%rcx),%xmm0 + paddd %xmm5,%xmm0 +.byte 15,56,203,209 + pshufd $14,%xmm0,%xmm0 +.byte 15,56,205,245 + movdqa %xmm8,%xmm7 +.byte 15,56,203,202 + + movdqa 480-128(%rcx),%xmm0 + paddd %xmm6,%xmm0 + nop +.byte 15,56,203,209 + pshufd $14,%xmm0,%xmm0 + decq %rdx + nop +.byte 15,56,203,202 + + paddd %xmm10,%xmm2 + paddd %xmm9,%xmm1 + jnz L$oop_shaext + + pshufd $177,%xmm2,%xmm2 + pshufd $27,%xmm1,%xmm7 + pshufd $177,%xmm1,%xmm1 + punpckhqdq %xmm2,%xmm1 +.byte 102,15,58,15,215,8 + + movdqu %xmm1,(%rdi) + movdqu %xmm2,16(%rdi) + .byte 0xf3,0xc3 + + +.p2align 6 +sha256_block_data_order_ssse3: +L$ssse3_shortcut: + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + movq %rsp,%r11 + shlq $4,%rdx + subq $96,%rsp + leaq (%rsi,%rdx,4),%rdx + andq $-64,%rsp + movq %rdi,64+0(%rsp) + movq %rsi,64+8(%rsp) + movq %rdx,64+16(%rsp) + movq %r11,64+24(%rsp) +L$prologue_ssse3: + + movl 0(%rdi),%eax + movl 4(%rdi),%ebx + movl 8(%rdi),%ecx + movl 12(%rdi),%edx + movl 16(%rdi),%r8d + movl 20(%rdi),%r9d + movl 24(%rdi),%r10d + movl 28(%rdi),%r11d + + + jmp L$loop_ssse3 +.p2align 4 +L$loop_ssse3: + movdqa K256+512(%rip),%xmm7 + movdqu 0(%rsi),%xmm0 + movdqu 16(%rsi),%xmm1 + movdqu 32(%rsi),%xmm2 +.byte 102,15,56,0,199 + movdqu 48(%rsi),%xmm3 + leaq K256(%rip),%rbp +.byte 102,15,56,0,207 + movdqa 0(%rbp),%xmm4 + movdqa 32(%rbp),%xmm5 +.byte 102,15,56,0,215 + paddd %xmm0,%xmm4 + movdqa 64(%rbp),%xmm6 +.byte 102,15,56,0,223 + movdqa 96(%rbp),%xmm7 + paddd %xmm1,%xmm5 + paddd %xmm2,%xmm6 + paddd %xmm3,%xmm7 + movdqa %xmm4,0(%rsp) + movl %eax,%r14d + movdqa %xmm5,16(%rsp) + movl %ebx,%edi + movdqa %xmm6,32(%rsp) + xorl %ecx,%edi + movdqa %xmm7,48(%rsp) + movl %r8d,%r13d + jmp L$ssse3_00_47 + +.p2align 4 +L$ssse3_00_47: + subq $-128,%rbp + rorl $14,%r13d + movdqa %xmm1,%xmm4 + movl %r14d,%eax + movl %r9d,%r12d + movdqa %xmm3,%xmm7 + rorl $9,%r14d + xorl %r8d,%r13d + xorl %r10d,%r12d + rorl $5,%r13d + xorl %eax,%r14d +.byte 102,15,58,15,224,4 + andl %r8d,%r12d + xorl %r8d,%r13d +.byte 102,15,58,15,250,4 + addl 0(%rsp),%r11d + movl %eax,%r15d + xorl %r10d,%r12d + rorl $11,%r14d + movdqa %xmm4,%xmm5 + xorl %ebx,%r15d + addl %r12d,%r11d + movdqa %xmm4,%xmm6 + rorl $6,%r13d + andl %r15d,%edi + psrld $3,%xmm4 + xorl %eax,%r14d + addl %r13d,%r11d + xorl %ebx,%edi + paddd %xmm7,%xmm0 + rorl $2,%r14d + addl %r11d,%edx + psrld $7,%xmm6 + addl %edi,%r11d + movl %edx,%r13d + pshufd $250,%xmm3,%xmm7 + addl %r11d,%r14d + rorl $14,%r13d + pslld $14,%xmm5 + movl %r14d,%r11d + movl %r8d,%r12d + pxor %xmm6,%xmm4 + rorl $9,%r14d + xorl %edx,%r13d + xorl %r9d,%r12d + rorl $5,%r13d + psrld $11,%xmm6 + xorl %r11d,%r14d + pxor %xmm5,%xmm4 + andl %edx,%r12d + xorl %edx,%r13d + pslld $11,%xmm5 + addl 4(%rsp),%r10d + movl %r11d,%edi + pxor %xmm6,%xmm4 + xorl %r9d,%r12d + rorl $11,%r14d + movdqa %xmm7,%xmm6 + xorl %eax,%edi + addl %r12d,%r10d + pxor %xmm5,%xmm4 + rorl $6,%r13d + andl %edi,%r15d + xorl %r11d,%r14d + psrld $10,%xmm7 + addl %r13d,%r10d + xorl %eax,%r15d + paddd %xmm4,%xmm0 + rorl $2,%r14d + addl %r10d,%ecx + psrlq $17,%xmm6 + addl %r15d,%r10d + movl %ecx,%r13d + addl %r10d,%r14d + pxor %xmm6,%xmm7 + rorl $14,%r13d + movl %r14d,%r10d + movl %edx,%r12d + rorl $9,%r14d + psrlq $2,%xmm6 + xorl %ecx,%r13d + xorl %r8d,%r12d + pxor %xmm6,%xmm7 + rorl $5,%r13d + xorl %r10d,%r14d + andl %ecx,%r12d + pshufd $128,%xmm7,%xmm7 + xorl %ecx,%r13d + addl 8(%rsp),%r9d + movl %r10d,%r15d + psrldq $8,%xmm7 + xorl %r8d,%r12d + rorl $11,%r14d + xorl %r11d,%r15d + addl %r12d,%r9d + rorl $6,%r13d + paddd %xmm7,%xmm0 + andl %r15d,%edi + xorl %r10d,%r14d + addl %r13d,%r9d + pshufd $80,%xmm0,%xmm7 + xorl %r11d,%edi + rorl $2,%r14d + addl %r9d,%ebx + movdqa %xmm7,%xmm6 + addl %edi,%r9d + movl %ebx,%r13d + psrld $10,%xmm7 + addl %r9d,%r14d + rorl $14,%r13d + psrlq $17,%xmm6 + movl %r14d,%r9d + movl %ecx,%r12d + pxor %xmm6,%xmm7 + rorl $9,%r14d + xorl %ebx,%r13d + xorl %edx,%r12d + rorl $5,%r13d + xorl %r9d,%r14d + psrlq $2,%xmm6 + andl %ebx,%r12d + xorl %ebx,%r13d + addl 12(%rsp),%r8d + pxor %xmm6,%xmm7 + movl %r9d,%edi + xorl %edx,%r12d + rorl $11,%r14d + pshufd $8,%xmm7,%xmm7 + xorl %r10d,%edi + addl %r12d,%r8d + movdqa 0(%rbp),%xmm6 + rorl $6,%r13d + andl %edi,%r15d + pslldq $8,%xmm7 + xorl %r9d,%r14d + addl %r13d,%r8d + xorl %r10d,%r15d + paddd %xmm7,%xmm0 + rorl $2,%r14d + addl %r8d,%eax + addl %r15d,%r8d + paddd %xmm0,%xmm6 + movl %eax,%r13d + addl %r8d,%r14d + movdqa %xmm6,0(%rsp) + rorl $14,%r13d + movdqa %xmm2,%xmm4 + movl %r14d,%r8d + movl %ebx,%r12d + movdqa %xmm0,%xmm7 + rorl $9,%r14d + xorl %eax,%r13d + xorl %ecx,%r12d + rorl $5,%r13d + xorl %r8d,%r14d +.byte 102,15,58,15,225,4 + andl %eax,%r12d + xorl %eax,%r13d +.byte 102,15,58,15,251,4 + addl 16(%rsp),%edx + movl %r8d,%r15d + xorl %ecx,%r12d + rorl $11,%r14d + movdqa %xmm4,%xmm5 + xorl %r9d,%r15d + addl %r12d,%edx + movdqa %xmm4,%xmm6 + rorl $6,%r13d + andl %r15d,%edi + psrld $3,%xmm4 + xorl %r8d,%r14d + addl %r13d,%edx + xorl %r9d,%edi + paddd %xmm7,%xmm1 + rorl $2,%r14d + addl %edx,%r11d + psrld $7,%xmm6 + addl %edi,%edx + movl %r11d,%r13d + pshufd $250,%xmm0,%xmm7 + addl %edx,%r14d + rorl $14,%r13d + pslld $14,%xmm5 + movl %r14d,%edx + movl %eax,%r12d + pxor %xmm6,%xmm4 + rorl $9,%r14d + xorl %r11d,%r13d + xorl %ebx,%r12d + rorl $5,%r13d + psrld $11,%xmm6 + xorl %edx,%r14d + pxor %xmm5,%xmm4 + andl %r11d,%r12d + xorl %r11d,%r13d + pslld $11,%xmm5 + addl 20(%rsp),%ecx + movl %edx,%edi + pxor %xmm6,%xmm4 + xorl %ebx,%r12d + rorl $11,%r14d + movdqa %xmm7,%xmm6 + xorl %r8d,%edi + addl %r12d,%ecx + pxor %xmm5,%xmm4 + rorl $6,%r13d + andl %edi,%r15d + xorl %edx,%r14d + psrld $10,%xmm7 + addl %r13d,%ecx + xorl %r8d,%r15d + paddd %xmm4,%xmm1 + rorl $2,%r14d + addl %ecx,%r10d + psrlq $17,%xmm6 + addl %r15d,%ecx + movl %r10d,%r13d + addl %ecx,%r14d + pxor %xmm6,%xmm7 + rorl $14,%r13d + movl %r14d,%ecx + movl %r11d,%r12d + rorl $9,%r14d + psrlq $2,%xmm6 + xorl %r10d,%r13d + xorl %eax,%r12d + pxor %xmm6,%xmm7 + rorl $5,%r13d + xorl %ecx,%r14d + andl %r10d,%r12d + pshufd $128,%xmm7,%xmm7 + xorl %r10d,%r13d + addl 24(%rsp),%ebx + movl %ecx,%r15d + psrldq $8,%xmm7 + xorl %eax,%r12d + rorl $11,%r14d + xorl %edx,%r15d + addl %r12d,%ebx + rorl $6,%r13d + paddd %xmm7,%xmm1 + andl %r15d,%edi + xorl %ecx,%r14d + addl %r13d,%ebx + pshufd $80,%xmm1,%xmm7 + xorl %edx,%edi + rorl $2,%r14d + addl %ebx,%r9d + movdqa %xmm7,%xmm6 + addl %edi,%ebx + movl %r9d,%r13d + psrld $10,%xmm7 + addl %ebx,%r14d + rorl $14,%r13d + psrlq $17,%xmm6 + movl %r14d,%ebx + movl %r10d,%r12d + pxor %xmm6,%xmm7 + rorl $9,%r14d + xorl %r9d,%r13d + xorl %r11d,%r12d + rorl $5,%r13d + xorl %ebx,%r14d + psrlq $2,%xmm6 + andl %r9d,%r12d + xorl %r9d,%r13d + addl 28(%rsp),%eax + pxor %xmm6,%xmm7 + movl %ebx,%edi + xorl %r11d,%r12d + rorl $11,%r14d + pshufd $8,%xmm7,%xmm7 + xorl %ecx,%edi + addl %r12d,%eax + movdqa 32(%rbp),%xmm6 + rorl $6,%r13d + andl %edi,%r15d + pslldq $8,%xmm7 + xorl %ebx,%r14d + addl %r13d,%eax + xorl %ecx,%r15d + paddd %xmm7,%xmm1 + rorl $2,%r14d + addl %eax,%r8d + addl %r15d,%eax + paddd %xmm1,%xmm6 + movl %r8d,%r13d + addl %eax,%r14d + movdqa %xmm6,16(%rsp) + rorl $14,%r13d + movdqa %xmm3,%xmm4 + movl %r14d,%eax + movl %r9d,%r12d + movdqa %xmm1,%xmm7 + rorl $9,%r14d + xorl %r8d,%r13d + xorl %r10d,%r12d + rorl $5,%r13d + xorl %eax,%r14d +.byte 102,15,58,15,226,4 + andl %r8d,%r12d + xorl %r8d,%r13d +.byte 102,15,58,15,248,4 + addl 32(%rsp),%r11d + movl %eax,%r15d + xorl %r10d,%r12d + rorl $11,%r14d + movdqa %xmm4,%xmm5 + xorl %ebx,%r15d + addl %r12d,%r11d + movdqa %xmm4,%xmm6 + rorl $6,%r13d + andl %r15d,%edi + psrld $3,%xmm4 + xorl %eax,%r14d + addl %r13d,%r11d + xorl %ebx,%edi + paddd %xmm7,%xmm2 + rorl $2,%r14d + addl %r11d,%edx + psrld $7,%xmm6 + addl %edi,%r11d + movl %edx,%r13d + pshufd $250,%xmm1,%xmm7 + addl %r11d,%r14d + rorl $14,%r13d + pslld $14,%xmm5 + movl %r14d,%r11d + movl %r8d,%r12d + pxor %xmm6,%xmm4 + rorl $9,%r14d + xorl %edx,%r13d + xorl %r9d,%r12d + rorl $5,%r13d + psrld $11,%xmm6 + xorl %r11d,%r14d + pxor %xmm5,%xmm4 + andl %edx,%r12d + xorl %edx,%r13d + pslld $11,%xmm5 + addl 36(%rsp),%r10d + movl %r11d,%edi + pxor %xmm6,%xmm4 + xorl %r9d,%r12d + rorl $11,%r14d + movdqa %xmm7,%xmm6 + xorl %eax,%edi + addl %r12d,%r10d + pxor %xmm5,%xmm4 + rorl $6,%r13d + andl %edi,%r15d + xorl %r11d,%r14d + psrld $10,%xmm7 + addl %r13d,%r10d + xorl %eax,%r15d + paddd %xmm4,%xmm2 + rorl $2,%r14d + addl %r10d,%ecx + psrlq $17,%xmm6 + addl %r15d,%r10d + movl %ecx,%r13d + addl %r10d,%r14d + pxor %xmm6,%xmm7 + rorl $14,%r13d + movl %r14d,%r10d + movl %edx,%r12d + rorl $9,%r14d + psrlq $2,%xmm6 + xorl %ecx,%r13d + xorl %r8d,%r12d + pxor %xmm6,%xmm7 + rorl $5,%r13d + xorl %r10d,%r14d + andl %ecx,%r12d + pshufd $128,%xmm7,%xmm7 + xorl %ecx,%r13d + addl 40(%rsp),%r9d + movl %r10d,%r15d + psrldq $8,%xmm7 + xorl %r8d,%r12d + rorl $11,%r14d + xorl %r11d,%r15d + addl %r12d,%r9d + rorl $6,%r13d + paddd %xmm7,%xmm2 + andl %r15d,%edi + xorl %r10d,%r14d + addl %r13d,%r9d + pshufd $80,%xmm2,%xmm7 + xorl %r11d,%edi + rorl $2,%r14d + addl %r9d,%ebx + movdqa %xmm7,%xmm6 + addl %edi,%r9d + movl %ebx,%r13d + psrld $10,%xmm7 + addl %r9d,%r14d + rorl $14,%r13d + psrlq $17,%xmm6 + movl %r14d,%r9d + movl %ecx,%r12d + pxor %xmm6,%xmm7 + rorl $9,%r14d + xorl %ebx,%r13d + xorl %edx,%r12d + rorl $5,%r13d + xorl %r9d,%r14d + psrlq $2,%xmm6 + andl %ebx,%r12d + xorl %ebx,%r13d + addl 44(%rsp),%r8d + pxor %xmm6,%xmm7 + movl %r9d,%edi + xorl %edx,%r12d + rorl $11,%r14d + pshufd $8,%xmm7,%xmm7 + xorl %r10d,%edi + addl %r12d,%r8d + movdqa 64(%rbp),%xmm6 + rorl $6,%r13d + andl %edi,%r15d + pslldq $8,%xmm7 + xorl %r9d,%r14d + addl %r13d,%r8d + xorl %r10d,%r15d + paddd %xmm7,%xmm2 + rorl $2,%r14d + addl %r8d,%eax + addl %r15d,%r8d + paddd %xmm2,%xmm6 + movl %eax,%r13d + addl %r8d,%r14d + movdqa %xmm6,32(%rsp) + rorl $14,%r13d + movdqa %xmm0,%xmm4 + movl %r14d,%r8d + movl %ebx,%r12d + movdqa %xmm2,%xmm7 + rorl $9,%r14d + xorl %eax,%r13d + xorl %ecx,%r12d + rorl $5,%r13d + xorl %r8d,%r14d +.byte 102,15,58,15,227,4 + andl %eax,%r12d + xorl %eax,%r13d +.byte 102,15,58,15,249,4 + addl 48(%rsp),%edx + movl %r8d,%r15d + xorl %ecx,%r12d + rorl $11,%r14d + movdqa %xmm4,%xmm5 + xorl %r9d,%r15d + addl %r12d,%edx + movdqa %xmm4,%xmm6 + rorl $6,%r13d + andl %r15d,%edi + psrld $3,%xmm4 + xorl %r8d,%r14d + addl %r13d,%edx + xorl %r9d,%edi + paddd %xmm7,%xmm3 + rorl $2,%r14d + addl %edx,%r11d + psrld $7,%xmm6 + addl %edi,%edx + movl %r11d,%r13d + pshufd $250,%xmm2,%xmm7 + addl %edx,%r14d + rorl $14,%r13d + pslld $14,%xmm5 + movl %r14d,%edx + movl %eax,%r12d + pxor %xmm6,%xmm4 + rorl $9,%r14d + xorl %r11d,%r13d + xorl %ebx,%r12d + rorl $5,%r13d + psrld $11,%xmm6 + xorl %edx,%r14d + pxor %xmm5,%xmm4 + andl %r11d,%r12d + xorl %r11d,%r13d + pslld $11,%xmm5 + addl 52(%rsp),%ecx + movl %edx,%edi + pxor %xmm6,%xmm4 + xorl %ebx,%r12d + rorl $11,%r14d + movdqa %xmm7,%xmm6 + xorl %r8d,%edi + addl %r12d,%ecx + pxor %xmm5,%xmm4 + rorl $6,%r13d + andl %edi,%r15d + xorl %edx,%r14d + psrld $10,%xmm7 + addl %r13d,%ecx + xorl %r8d,%r15d + paddd %xmm4,%xmm3 + rorl $2,%r14d + addl %ecx,%r10d + psrlq $17,%xmm6 + addl %r15d,%ecx + movl %r10d,%r13d + addl %ecx,%r14d + pxor %xmm6,%xmm7 + rorl $14,%r13d + movl %r14d,%ecx + movl %r11d,%r12d + rorl $9,%r14d + psrlq $2,%xmm6 + xorl %r10d,%r13d + xorl %eax,%r12d + pxor %xmm6,%xmm7 + rorl $5,%r13d + xorl %ecx,%r14d + andl %r10d,%r12d + pshufd $128,%xmm7,%xmm7 + xorl %r10d,%r13d + addl 56(%rsp),%ebx + movl %ecx,%r15d + psrldq $8,%xmm7 + xorl %eax,%r12d + rorl $11,%r14d + xorl %edx,%r15d + addl %r12d,%ebx + rorl $6,%r13d + paddd %xmm7,%xmm3 + andl %r15d,%edi + xorl %ecx,%r14d + addl %r13d,%ebx + pshufd $80,%xmm3,%xmm7 + xorl %edx,%edi + rorl $2,%r14d + addl %ebx,%r9d + movdqa %xmm7,%xmm6 + addl %edi,%ebx + movl %r9d,%r13d + psrld $10,%xmm7 + addl %ebx,%r14d + rorl $14,%r13d + psrlq $17,%xmm6 + movl %r14d,%ebx + movl %r10d,%r12d + pxor %xmm6,%xmm7 + rorl $9,%r14d + xorl %r9d,%r13d + xorl %r11d,%r12d + rorl $5,%r13d + xorl %ebx,%r14d + psrlq $2,%xmm6 + andl %r9d,%r12d + xorl %r9d,%r13d + addl 60(%rsp),%eax + pxor %xmm6,%xmm7 + movl %ebx,%edi + xorl %r11d,%r12d + rorl $11,%r14d + pshufd $8,%xmm7,%xmm7 + xorl %ecx,%edi + addl %r12d,%eax + movdqa 96(%rbp),%xmm6 + rorl $6,%r13d + andl %edi,%r15d + pslldq $8,%xmm7 + xorl %ebx,%r14d + addl %r13d,%eax + xorl %ecx,%r15d + paddd %xmm7,%xmm3 + rorl $2,%r14d + addl %eax,%r8d + addl %r15d,%eax + paddd %xmm3,%xmm6 + movl %r8d,%r13d + addl %eax,%r14d + movdqa %xmm6,48(%rsp) + cmpb $0,131(%rbp) + jne L$ssse3_00_47 + rorl $14,%r13d + movl %r14d,%eax + movl %r9d,%r12d + rorl $9,%r14d + xorl %r8d,%r13d + xorl %r10d,%r12d + rorl $5,%r13d + xorl %eax,%r14d + andl %r8d,%r12d + xorl %r8d,%r13d + addl 0(%rsp),%r11d + movl %eax,%r15d + xorl %r10d,%r12d + rorl $11,%r14d + xorl %ebx,%r15d + addl %r12d,%r11d + rorl $6,%r13d + andl %r15d,%edi + xorl %eax,%r14d + addl %r13d,%r11d + xorl %ebx,%edi + rorl $2,%r14d + addl %r11d,%edx + addl %edi,%r11d + movl %edx,%r13d + addl %r11d,%r14d + rorl $14,%r13d + movl %r14d,%r11d + movl %r8d,%r12d + rorl $9,%r14d + xorl %edx,%r13d + xorl %r9d,%r12d + rorl $5,%r13d + xorl %r11d,%r14d + andl %edx,%r12d + xorl %edx,%r13d + addl 4(%rsp),%r10d + movl %r11d,%edi + xorl %r9d,%r12d + rorl $11,%r14d + xorl %eax,%edi + addl %r12d,%r10d + rorl $6,%r13d + andl %edi,%r15d + xorl %r11d,%r14d + addl %r13d,%r10d + xorl %eax,%r15d + rorl $2,%r14d + addl %r10d,%ecx + addl %r15d,%r10d + movl %ecx,%r13d + addl %r10d,%r14d + rorl $14,%r13d + movl %r14d,%r10d + movl %edx,%r12d + rorl $9,%r14d + xorl %ecx,%r13d + xorl %r8d,%r12d + rorl $5,%r13d + xorl %r10d,%r14d + andl %ecx,%r12d + xorl %ecx,%r13d + addl 8(%rsp),%r9d + movl %r10d,%r15d + xorl %r8d,%r12d + rorl $11,%r14d + xorl %r11d,%r15d + addl %r12d,%r9d + rorl $6,%r13d + andl %r15d,%edi + xorl %r10d,%r14d + addl %r13d,%r9d + xorl %r11d,%edi + rorl $2,%r14d + addl %r9d,%ebx + addl %edi,%r9d + movl %ebx,%r13d + addl %r9d,%r14d + rorl $14,%r13d + movl %r14d,%r9d + movl %ecx,%r12d + rorl $9,%r14d + xorl %ebx,%r13d + xorl %edx,%r12d + rorl $5,%r13d + xorl %r9d,%r14d + andl %ebx,%r12d + xorl %ebx,%r13d + addl 12(%rsp),%r8d + movl %r9d,%edi + xorl %edx,%r12d + rorl $11,%r14d + xorl %r10d,%edi + addl %r12d,%r8d + rorl $6,%r13d + andl %edi,%r15d + xorl %r9d,%r14d + addl %r13d,%r8d + xorl %r10d,%r15d + rorl $2,%r14d + addl %r8d,%eax + addl %r15d,%r8d + movl %eax,%r13d + addl %r8d,%r14d + rorl $14,%r13d + movl %r14d,%r8d + movl %ebx,%r12d + rorl $9,%r14d + xorl %eax,%r13d + xorl %ecx,%r12d + rorl $5,%r13d + xorl %r8d,%r14d + andl %eax,%r12d + xorl %eax,%r13d + addl 16(%rsp),%edx + movl %r8d,%r15d + xorl %ecx,%r12d + rorl $11,%r14d + xorl %r9d,%r15d + addl %r12d,%edx + rorl $6,%r13d + andl %r15d,%edi + xorl %r8d,%r14d + addl %r13d,%edx + xorl %r9d,%edi + rorl $2,%r14d + addl %edx,%r11d + addl %edi,%edx + movl %r11d,%r13d + addl %edx,%r14d + rorl $14,%r13d + movl %r14d,%edx + movl %eax,%r12d + rorl $9,%r14d + xorl %r11d,%r13d + xorl %ebx,%r12d + rorl $5,%r13d + xorl %edx,%r14d + andl %r11d,%r12d + xorl %r11d,%r13d + addl 20(%rsp),%ecx + movl %edx,%edi + xorl %ebx,%r12d + rorl $11,%r14d + xorl %r8d,%edi + addl %r12d,%ecx + rorl $6,%r13d + andl %edi,%r15d + xorl %edx,%r14d + addl %r13d,%ecx + xorl %r8d,%r15d + rorl $2,%r14d + addl %ecx,%r10d + addl %r15d,%ecx + movl %r10d,%r13d + addl %ecx,%r14d + rorl $14,%r13d + movl %r14d,%ecx + movl %r11d,%r12d + rorl $9,%r14d + xorl %r10d,%r13d + xorl %eax,%r12d + rorl $5,%r13d + xorl %ecx,%r14d + andl %r10d,%r12d + xorl %r10d,%r13d + addl 24(%rsp),%ebx + movl %ecx,%r15d + xorl %eax,%r12d + rorl $11,%r14d + xorl %edx,%r15d + addl %r12d,%ebx + rorl $6,%r13d + andl %r15d,%edi + xorl %ecx,%r14d + addl %r13d,%ebx + xorl %edx,%edi + rorl $2,%r14d + addl %ebx,%r9d + addl %edi,%ebx + movl %r9d,%r13d + addl %ebx,%r14d + rorl $14,%r13d + movl %r14d,%ebx + movl %r10d,%r12d + rorl $9,%r14d + xorl %r9d,%r13d + xorl %r11d,%r12d + rorl $5,%r13d + xorl %ebx,%r14d + andl %r9d,%r12d + xorl %r9d,%r13d + addl 28(%rsp),%eax + movl %ebx,%edi + xorl %r11d,%r12d + rorl $11,%r14d + xorl %ecx,%edi + addl %r12d,%eax + rorl $6,%r13d + andl %edi,%r15d + xorl %ebx,%r14d + addl %r13d,%eax + xorl %ecx,%r15d + rorl $2,%r14d + addl %eax,%r8d + addl %r15d,%eax + movl %r8d,%r13d + addl %eax,%r14d + rorl $14,%r13d + movl %r14d,%eax + movl %r9d,%r12d + rorl $9,%r14d + xorl %r8d,%r13d + xorl %r10d,%r12d + rorl $5,%r13d + xorl %eax,%r14d + andl %r8d,%r12d + xorl %r8d,%r13d + addl 32(%rsp),%r11d + movl %eax,%r15d + xorl %r10d,%r12d + rorl $11,%r14d + xorl %ebx,%r15d + addl %r12d,%r11d + rorl $6,%r13d + andl %r15d,%edi + xorl %eax,%r14d + addl %r13d,%r11d + xorl %ebx,%edi + rorl $2,%r14d + addl %r11d,%edx + addl %edi,%r11d + movl %edx,%r13d + addl %r11d,%r14d + rorl $14,%r13d + movl %r14d,%r11d + movl %r8d,%r12d + rorl $9,%r14d + xorl %edx,%r13d + xorl %r9d,%r12d + rorl $5,%r13d + xorl %r11d,%r14d + andl %edx,%r12d + xorl %edx,%r13d + addl 36(%rsp),%r10d + movl %r11d,%edi + xorl %r9d,%r12d + rorl $11,%r14d + xorl %eax,%edi + addl %r12d,%r10d + rorl $6,%r13d + andl %edi,%r15d + xorl %r11d,%r14d + addl %r13d,%r10d + xorl %eax,%r15d + rorl $2,%r14d + addl %r10d,%ecx + addl %r15d,%r10d + movl %ecx,%r13d + addl %r10d,%r14d + rorl $14,%r13d + movl %r14d,%r10d + movl %edx,%r12d + rorl $9,%r14d + xorl %ecx,%r13d + xorl %r8d,%r12d + rorl $5,%r13d + xorl %r10d,%r14d + andl %ecx,%r12d + xorl %ecx,%r13d + addl 40(%rsp),%r9d + movl %r10d,%r15d + xorl %r8d,%r12d + rorl $11,%r14d + xorl %r11d,%r15d + addl %r12d,%r9d + rorl $6,%r13d + andl %r15d,%edi + xorl %r10d,%r14d + addl %r13d,%r9d + xorl %r11d,%edi + rorl $2,%r14d + addl %r9d,%ebx + addl %edi,%r9d + movl %ebx,%r13d + addl %r9d,%r14d + rorl $14,%r13d + movl %r14d,%r9d + movl %ecx,%r12d + rorl $9,%r14d + xorl %ebx,%r13d + xorl %edx,%r12d + rorl $5,%r13d + xorl %r9d,%r14d + andl %ebx,%r12d + xorl %ebx,%r13d + addl 44(%rsp),%r8d + movl %r9d,%edi + xorl %edx,%r12d + rorl $11,%r14d + xorl %r10d,%edi + addl %r12d,%r8d + rorl $6,%r13d + andl %edi,%r15d + xorl %r9d,%r14d + addl %r13d,%r8d + xorl %r10d,%r15d + rorl $2,%r14d + addl %r8d,%eax + addl %r15d,%r8d + movl %eax,%r13d + addl %r8d,%r14d + rorl $14,%r13d + movl %r14d,%r8d + movl %ebx,%r12d + rorl $9,%r14d + xorl %eax,%r13d + xorl %ecx,%r12d + rorl $5,%r13d + xorl %r8d,%r14d + andl %eax,%r12d + xorl %eax,%r13d + addl 48(%rsp),%edx + movl %r8d,%r15d + xorl %ecx,%r12d + rorl $11,%r14d + xorl %r9d,%r15d + addl %r12d,%edx + rorl $6,%r13d + andl %r15d,%edi + xorl %r8d,%r14d + addl %r13d,%edx + xorl %r9d,%edi + rorl $2,%r14d + addl %edx,%r11d + addl %edi,%edx + movl %r11d,%r13d + addl %edx,%r14d + rorl $14,%r13d + movl %r14d,%edx + movl %eax,%r12d + rorl $9,%r14d + xorl %r11d,%r13d + xorl %ebx,%r12d + rorl $5,%r13d + xorl %edx,%r14d + andl %r11d,%r12d + xorl %r11d,%r13d + addl 52(%rsp),%ecx + movl %edx,%edi + xorl %ebx,%r12d + rorl $11,%r14d + xorl %r8d,%edi + addl %r12d,%ecx + rorl $6,%r13d + andl %edi,%r15d + xorl %edx,%r14d + addl %r13d,%ecx + xorl %r8d,%r15d + rorl $2,%r14d + addl %ecx,%r10d + addl %r15d,%ecx + movl %r10d,%r13d + addl %ecx,%r14d + rorl $14,%r13d + movl %r14d,%ecx + movl %r11d,%r12d + rorl $9,%r14d + xorl %r10d,%r13d + xorl %eax,%r12d + rorl $5,%r13d + xorl %ecx,%r14d + andl %r10d,%r12d + xorl %r10d,%r13d + addl 56(%rsp),%ebx + movl %ecx,%r15d + xorl %eax,%r12d + rorl $11,%r14d + xorl %edx,%r15d + addl %r12d,%ebx + rorl $6,%r13d + andl %r15d,%edi + xorl %ecx,%r14d + addl %r13d,%ebx + xorl %edx,%edi + rorl $2,%r14d + addl %ebx,%r9d + addl %edi,%ebx + movl %r9d,%r13d + addl %ebx,%r14d + rorl $14,%r13d + movl %r14d,%ebx + movl %r10d,%r12d + rorl $9,%r14d + xorl %r9d,%r13d + xorl %r11d,%r12d + rorl $5,%r13d + xorl %ebx,%r14d + andl %r9d,%r12d + xorl %r9d,%r13d + addl 60(%rsp),%eax + movl %ebx,%edi + xorl %r11d,%r12d + rorl $11,%r14d + xorl %ecx,%edi + addl %r12d,%eax + rorl $6,%r13d + andl %edi,%r15d + xorl %ebx,%r14d + addl %r13d,%eax + xorl %ecx,%r15d + rorl $2,%r14d + addl %eax,%r8d + addl %r15d,%eax + movl %r8d,%r13d + addl %eax,%r14d + movq 64+0(%rsp),%rdi + movl %r14d,%eax + + addl 0(%rdi),%eax + leaq 64(%rsi),%rsi + addl 4(%rdi),%ebx + addl 8(%rdi),%ecx + addl 12(%rdi),%edx + addl 16(%rdi),%r8d + addl 20(%rdi),%r9d + addl 24(%rdi),%r10d + addl 28(%rdi),%r11d + + cmpq 64+16(%rsp),%rsi + + movl %eax,0(%rdi) + movl %ebx,4(%rdi) + movl %ecx,8(%rdi) + movl %edx,12(%rdi) + movl %r8d,16(%rdi) + movl %r9d,20(%rdi) + movl %r10d,24(%rdi) + movl %r11d,28(%rdi) + jb L$loop_ssse3 + + movq 64+24(%rsp),%rsi + movq (%rsi),%r15 + movq 8(%rsi),%r14 + movq 16(%rsi),%r13 + movq 24(%rsi),%r12 + movq 32(%rsi),%rbp + movq 40(%rsi),%rbx + leaq 48(%rsi),%rsp +L$epilogue_ssse3: + .byte 0xf3,0xc3 diff --git a/deps/openssl/asm_obsolete/x64-macosx-gas/sha/sha512-x86_64.s b/deps/openssl/asm_obsolete/x64-macosx-gas/sha/sha512-x86_64.s new file mode 100644 index 00000000000000..1cae4e11fb33db --- /dev/null +++ b/deps/openssl/asm_obsolete/x64-macosx-gas/sha/sha512-x86_64.s @@ -0,0 +1,1783 @@ +.text + + +.globl _sha512_block_data_order + +.p2align 4 +_sha512_block_data_order: + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + movq %rsp,%r11 + shlq $4,%rdx + subq $128+32,%rsp + leaq (%rsi,%rdx,8),%rdx + andq $-64,%rsp + movq %rdi,128+0(%rsp) + movq %rsi,128+8(%rsp) + movq %rdx,128+16(%rsp) + movq %r11,128+24(%rsp) +L$prologue: + + movq 0(%rdi),%rax + movq 8(%rdi),%rbx + movq 16(%rdi),%rcx + movq 24(%rdi),%rdx + movq 32(%rdi),%r8 + movq 40(%rdi),%r9 + movq 48(%rdi),%r10 + movq 56(%rdi),%r11 + jmp L$loop + +.p2align 4 +L$loop: + movq %rbx,%rdi + leaq K512(%rip),%rbp + xorq %rcx,%rdi + movq 0(%rsi),%r12 + movq %r8,%r13 + movq %rax,%r14 + bswapq %r12 + rorq $23,%r13 + movq %r9,%r15 + + xorq %r8,%r13 + rorq $5,%r14 + xorq %r10,%r15 + + movq %r12,0(%rsp) + xorq %rax,%r14 + andq %r8,%r15 + + rorq $4,%r13 + addq %r11,%r12 + xorq %r10,%r15 + + rorq $6,%r14 + xorq %r8,%r13 + addq %r15,%r12 + + movq %rax,%r15 + addq (%rbp),%r12 + xorq %rax,%r14 + + xorq %rbx,%r15 + rorq $14,%r13 + movq %rbx,%r11 + + andq %r15,%rdi + rorq $28,%r14 + addq %r13,%r12 + + xorq %rdi,%r11 + addq %r12,%rdx + addq %r12,%r11 + + leaq 8(%rbp),%rbp + addq %r14,%r11 + movq 8(%rsi),%r12 + movq %rdx,%r13 + movq %r11,%r14 + bswapq %r12 + rorq $23,%r13 + movq %r8,%rdi + + xorq %rdx,%r13 + rorq $5,%r14 + xorq %r9,%rdi + + movq %r12,8(%rsp) + xorq %r11,%r14 + andq %rdx,%rdi + + rorq $4,%r13 + addq %r10,%r12 + xorq %r9,%rdi + + rorq $6,%r14 + xorq %rdx,%r13 + addq %rdi,%r12 + + movq %r11,%rdi + addq (%rbp),%r12 + xorq %r11,%r14 + + xorq %rax,%rdi + rorq $14,%r13 + movq %rax,%r10 + + andq %rdi,%r15 + rorq $28,%r14 + addq %r13,%r12 + + xorq %r15,%r10 + addq %r12,%rcx + addq %r12,%r10 + + leaq 24(%rbp),%rbp + addq %r14,%r10 + movq 16(%rsi),%r12 + movq %rcx,%r13 + movq %r10,%r14 + bswapq %r12 + rorq $23,%r13 + movq %rdx,%r15 + + xorq %rcx,%r13 + rorq $5,%r14 + xorq %r8,%r15 + + movq %r12,16(%rsp) + xorq %r10,%r14 + andq %rcx,%r15 + + rorq $4,%r13 + addq %r9,%r12 + xorq %r8,%r15 + + rorq $6,%r14 + xorq %rcx,%r13 + addq %r15,%r12 + + movq %r10,%r15 + addq (%rbp),%r12 + xorq %r10,%r14 + + xorq %r11,%r15 + rorq $14,%r13 + movq %r11,%r9 + + andq %r15,%rdi + rorq $28,%r14 + addq %r13,%r12 + + xorq %rdi,%r9 + addq %r12,%rbx + addq %r12,%r9 + + leaq 8(%rbp),%rbp + addq %r14,%r9 + movq 24(%rsi),%r12 + movq %rbx,%r13 + movq %r9,%r14 + bswapq %r12 + rorq $23,%r13 + movq %rcx,%rdi + + xorq %rbx,%r13 + rorq $5,%r14 + xorq %rdx,%rdi + + movq %r12,24(%rsp) + xorq %r9,%r14 + andq %rbx,%rdi + + rorq $4,%r13 + addq %r8,%r12 + xorq %rdx,%rdi + + rorq $6,%r14 + xorq %rbx,%r13 + addq %rdi,%r12 + + movq %r9,%rdi + addq (%rbp),%r12 + xorq %r9,%r14 + + xorq %r10,%rdi + rorq $14,%r13 + movq %r10,%r8 + + andq %rdi,%r15 + rorq $28,%r14 + addq %r13,%r12 + + xorq %r15,%r8 + addq %r12,%rax + addq %r12,%r8 + + leaq 24(%rbp),%rbp + addq %r14,%r8 + movq 32(%rsi),%r12 + movq %rax,%r13 + movq %r8,%r14 + bswapq %r12 + rorq $23,%r13 + movq %rbx,%r15 + + xorq %rax,%r13 + rorq $5,%r14 + xorq %rcx,%r15 + + movq %r12,32(%rsp) + xorq %r8,%r14 + andq %rax,%r15 + + rorq $4,%r13 + addq %rdx,%r12 + xorq %rcx,%r15 + + rorq $6,%r14 + xorq %rax,%r13 + addq %r15,%r12 + + movq %r8,%r15 + addq (%rbp),%r12 + xorq %r8,%r14 + + xorq %r9,%r15 + rorq $14,%r13 + movq %r9,%rdx + + andq %r15,%rdi + rorq $28,%r14 + addq %r13,%r12 + + xorq %rdi,%rdx + addq %r12,%r11 + addq %r12,%rdx + + leaq 8(%rbp),%rbp + addq %r14,%rdx + movq 40(%rsi),%r12 + movq %r11,%r13 + movq %rdx,%r14 + bswapq %r12 + rorq $23,%r13 + movq %rax,%rdi + + xorq %r11,%r13 + rorq $5,%r14 + xorq %rbx,%rdi + + movq %r12,40(%rsp) + xorq %rdx,%r14 + andq %r11,%rdi + + rorq $4,%r13 + addq %rcx,%r12 + xorq %rbx,%rdi + + rorq $6,%r14 + xorq %r11,%r13 + addq %rdi,%r12 + + movq %rdx,%rdi + addq (%rbp),%r12 + xorq %rdx,%r14 + + xorq %r8,%rdi + rorq $14,%r13 + movq %r8,%rcx + + andq %rdi,%r15 + rorq $28,%r14 + addq %r13,%r12 + + xorq %r15,%rcx + addq %r12,%r10 + addq %r12,%rcx + + leaq 24(%rbp),%rbp + addq %r14,%rcx + movq 48(%rsi),%r12 + movq %r10,%r13 + movq %rcx,%r14 + bswapq %r12 + rorq $23,%r13 + movq %r11,%r15 + + xorq %r10,%r13 + rorq $5,%r14 + xorq %rax,%r15 + + movq %r12,48(%rsp) + xorq %rcx,%r14 + andq %r10,%r15 + + rorq $4,%r13 + addq %rbx,%r12 + xorq %rax,%r15 + + rorq $6,%r14 + xorq %r10,%r13 + addq %r15,%r12 + + movq %rcx,%r15 + addq (%rbp),%r12 + xorq %rcx,%r14 + + xorq %rdx,%r15 + rorq $14,%r13 + movq %rdx,%rbx + + andq %r15,%rdi + rorq $28,%r14 + addq %r13,%r12 + + xorq %rdi,%rbx + addq %r12,%r9 + addq %r12,%rbx + + leaq 8(%rbp),%rbp + addq %r14,%rbx + movq 56(%rsi),%r12 + movq %r9,%r13 + movq %rbx,%r14 + bswapq %r12 + rorq $23,%r13 + movq %r10,%rdi + + xorq %r9,%r13 + rorq $5,%r14 + xorq %r11,%rdi + + movq %r12,56(%rsp) + xorq %rbx,%r14 + andq %r9,%rdi + + rorq $4,%r13 + addq %rax,%r12 + xorq %r11,%rdi + + rorq $6,%r14 + xorq %r9,%r13 + addq %rdi,%r12 + + movq %rbx,%rdi + addq (%rbp),%r12 + xorq %rbx,%r14 + + xorq %rcx,%rdi + rorq $14,%r13 + movq %rcx,%rax + + andq %rdi,%r15 + rorq $28,%r14 + addq %r13,%r12 + + xorq %r15,%rax + addq %r12,%r8 + addq %r12,%rax + + leaq 24(%rbp),%rbp + addq %r14,%rax + movq 64(%rsi),%r12 + movq %r8,%r13 + movq %rax,%r14 + bswapq %r12 + rorq $23,%r13 + movq %r9,%r15 + + xorq %r8,%r13 + rorq $5,%r14 + xorq %r10,%r15 + + movq %r12,64(%rsp) + xorq %rax,%r14 + andq %r8,%r15 + + rorq $4,%r13 + addq %r11,%r12 + xorq %r10,%r15 + + rorq $6,%r14 + xorq %r8,%r13 + addq %r15,%r12 + + movq %rax,%r15 + addq (%rbp),%r12 + xorq %rax,%r14 + + xorq %rbx,%r15 + rorq $14,%r13 + movq %rbx,%r11 + + andq %r15,%rdi + rorq $28,%r14 + addq %r13,%r12 + + xorq %rdi,%r11 + addq %r12,%rdx + addq %r12,%r11 + + leaq 8(%rbp),%rbp + addq %r14,%r11 + movq 72(%rsi),%r12 + movq %rdx,%r13 + movq %r11,%r14 + bswapq %r12 + rorq $23,%r13 + movq %r8,%rdi + + xorq %rdx,%r13 + rorq $5,%r14 + xorq %r9,%rdi + + movq %r12,72(%rsp) + xorq %r11,%r14 + andq %rdx,%rdi + + rorq $4,%r13 + addq %r10,%r12 + xorq %r9,%rdi + + rorq $6,%r14 + xorq %rdx,%r13 + addq %rdi,%r12 + + movq %r11,%rdi + addq (%rbp),%r12 + xorq %r11,%r14 + + xorq %rax,%rdi + rorq $14,%r13 + movq %rax,%r10 + + andq %rdi,%r15 + rorq $28,%r14 + addq %r13,%r12 + + xorq %r15,%r10 + addq %r12,%rcx + addq %r12,%r10 + + leaq 24(%rbp),%rbp + addq %r14,%r10 + movq 80(%rsi),%r12 + movq %rcx,%r13 + movq %r10,%r14 + bswapq %r12 + rorq $23,%r13 + movq %rdx,%r15 + + xorq %rcx,%r13 + rorq $5,%r14 + xorq %r8,%r15 + + movq %r12,80(%rsp) + xorq %r10,%r14 + andq %rcx,%r15 + + rorq $4,%r13 + addq %r9,%r12 + xorq %r8,%r15 + + rorq $6,%r14 + xorq %rcx,%r13 + addq %r15,%r12 + + movq %r10,%r15 + addq (%rbp),%r12 + xorq %r10,%r14 + + xorq %r11,%r15 + rorq $14,%r13 + movq %r11,%r9 + + andq %r15,%rdi + rorq $28,%r14 + addq %r13,%r12 + + xorq %rdi,%r9 + addq %r12,%rbx + addq %r12,%r9 + + leaq 8(%rbp),%rbp + addq %r14,%r9 + movq 88(%rsi),%r12 + movq %rbx,%r13 + movq %r9,%r14 + bswapq %r12 + rorq $23,%r13 + movq %rcx,%rdi + + xorq %rbx,%r13 + rorq $5,%r14 + xorq %rdx,%rdi + + movq %r12,88(%rsp) + xorq %r9,%r14 + andq %rbx,%rdi + + rorq $4,%r13 + addq %r8,%r12 + xorq %rdx,%rdi + + rorq $6,%r14 + xorq %rbx,%r13 + addq %rdi,%r12 + + movq %r9,%rdi + addq (%rbp),%r12 + xorq %r9,%r14 + + xorq %r10,%rdi + rorq $14,%r13 + movq %r10,%r8 + + andq %rdi,%r15 + rorq $28,%r14 + addq %r13,%r12 + + xorq %r15,%r8 + addq %r12,%rax + addq %r12,%r8 + + leaq 24(%rbp),%rbp + addq %r14,%r8 + movq 96(%rsi),%r12 + movq %rax,%r13 + movq %r8,%r14 + bswapq %r12 + rorq $23,%r13 + movq %rbx,%r15 + + xorq %rax,%r13 + rorq $5,%r14 + xorq %rcx,%r15 + + movq %r12,96(%rsp) + xorq %r8,%r14 + andq %rax,%r15 + + rorq $4,%r13 + addq %rdx,%r12 + xorq %rcx,%r15 + + rorq $6,%r14 + xorq %rax,%r13 + addq %r15,%r12 + + movq %r8,%r15 + addq (%rbp),%r12 + xorq %r8,%r14 + + xorq %r9,%r15 + rorq $14,%r13 + movq %r9,%rdx + + andq %r15,%rdi + rorq $28,%r14 + addq %r13,%r12 + + xorq %rdi,%rdx + addq %r12,%r11 + addq %r12,%rdx + + leaq 8(%rbp),%rbp + addq %r14,%rdx + movq 104(%rsi),%r12 + movq %r11,%r13 + movq %rdx,%r14 + bswapq %r12 + rorq $23,%r13 + movq %rax,%rdi + + xorq %r11,%r13 + rorq $5,%r14 + xorq %rbx,%rdi + + movq %r12,104(%rsp) + xorq %rdx,%r14 + andq %r11,%rdi + + rorq $4,%r13 + addq %rcx,%r12 + xorq %rbx,%rdi + + rorq $6,%r14 + xorq %r11,%r13 + addq %rdi,%r12 + + movq %rdx,%rdi + addq (%rbp),%r12 + xorq %rdx,%r14 + + xorq %r8,%rdi + rorq $14,%r13 + movq %r8,%rcx + + andq %rdi,%r15 + rorq $28,%r14 + addq %r13,%r12 + + xorq %r15,%rcx + addq %r12,%r10 + addq %r12,%rcx + + leaq 24(%rbp),%rbp + addq %r14,%rcx + movq 112(%rsi),%r12 + movq %r10,%r13 + movq %rcx,%r14 + bswapq %r12 + rorq $23,%r13 + movq %r11,%r15 + + xorq %r10,%r13 + rorq $5,%r14 + xorq %rax,%r15 + + movq %r12,112(%rsp) + xorq %rcx,%r14 + andq %r10,%r15 + + rorq $4,%r13 + addq %rbx,%r12 + xorq %rax,%r15 + + rorq $6,%r14 + xorq %r10,%r13 + addq %r15,%r12 + + movq %rcx,%r15 + addq (%rbp),%r12 + xorq %rcx,%r14 + + xorq %rdx,%r15 + rorq $14,%r13 + movq %rdx,%rbx + + andq %r15,%rdi + rorq $28,%r14 + addq %r13,%r12 + + xorq %rdi,%rbx + addq %r12,%r9 + addq %r12,%rbx + + leaq 8(%rbp),%rbp + addq %r14,%rbx + movq 120(%rsi),%r12 + movq %r9,%r13 + movq %rbx,%r14 + bswapq %r12 + rorq $23,%r13 + movq %r10,%rdi + + xorq %r9,%r13 + rorq $5,%r14 + xorq %r11,%rdi + + movq %r12,120(%rsp) + xorq %rbx,%r14 + andq %r9,%rdi + + rorq $4,%r13 + addq %rax,%r12 + xorq %r11,%rdi + + rorq $6,%r14 + xorq %r9,%r13 + addq %rdi,%r12 + + movq %rbx,%rdi + addq (%rbp),%r12 + xorq %rbx,%r14 + + xorq %rcx,%rdi + rorq $14,%r13 + movq %rcx,%rax + + andq %rdi,%r15 + rorq $28,%r14 + addq %r13,%r12 + + xorq %r15,%rax + addq %r12,%r8 + addq %r12,%rax + + leaq 24(%rbp),%rbp + jmp L$rounds_16_xx +.p2align 4 +L$rounds_16_xx: + movq 8(%rsp),%r13 + movq 112(%rsp),%r15 + + movq %r13,%r12 + rorq $7,%r13 + addq %r14,%rax + movq %r15,%r14 + rorq $42,%r15 + + xorq %r12,%r13 + shrq $7,%r12 + rorq $1,%r13 + xorq %r14,%r15 + shrq $6,%r14 + + rorq $19,%r15 + xorq %r13,%r12 + xorq %r14,%r15 + addq 72(%rsp),%r12 + + addq 0(%rsp),%r12 + movq %r8,%r13 + addq %r15,%r12 + movq %rax,%r14 + rorq $23,%r13 + movq %r9,%r15 + + xorq %r8,%r13 + rorq $5,%r14 + xorq %r10,%r15 + + movq %r12,0(%rsp) + xorq %rax,%r14 + andq %r8,%r15 + + rorq $4,%r13 + addq %r11,%r12 + xorq %r10,%r15 + + rorq $6,%r14 + xorq %r8,%r13 + addq %r15,%r12 + + movq %rax,%r15 + addq (%rbp),%r12 + xorq %rax,%r14 + + xorq %rbx,%r15 + rorq $14,%r13 + movq %rbx,%r11 + + andq %r15,%rdi + rorq $28,%r14 + addq %r13,%r12 + + xorq %rdi,%r11 + addq %r12,%rdx + addq %r12,%r11 + + leaq 8(%rbp),%rbp + movq 16(%rsp),%r13 + movq 120(%rsp),%rdi + + movq %r13,%r12 + rorq $7,%r13 + addq %r14,%r11 + movq %rdi,%r14 + rorq $42,%rdi + + xorq %r12,%r13 + shrq $7,%r12 + rorq $1,%r13 + xorq %r14,%rdi + shrq $6,%r14 + + rorq $19,%rdi + xorq %r13,%r12 + xorq %r14,%rdi + addq 80(%rsp),%r12 + + addq 8(%rsp),%r12 + movq %rdx,%r13 + addq %rdi,%r12 + movq %r11,%r14 + rorq $23,%r13 + movq %r8,%rdi + + xorq %rdx,%r13 + rorq $5,%r14 + xorq %r9,%rdi + + movq %r12,8(%rsp) + xorq %r11,%r14 + andq %rdx,%rdi + + rorq $4,%r13 + addq %r10,%r12 + xorq %r9,%rdi + + rorq $6,%r14 + xorq %rdx,%r13 + addq %rdi,%r12 + + movq %r11,%rdi + addq (%rbp),%r12 + xorq %r11,%r14 + + xorq %rax,%rdi + rorq $14,%r13 + movq %rax,%r10 + + andq %rdi,%r15 + rorq $28,%r14 + addq %r13,%r12 + + xorq %r15,%r10 + addq %r12,%rcx + addq %r12,%r10 + + leaq 24(%rbp),%rbp + movq 24(%rsp),%r13 + movq 0(%rsp),%r15 + + movq %r13,%r12 + rorq $7,%r13 + addq %r14,%r10 + movq %r15,%r14 + rorq $42,%r15 + + xorq %r12,%r13 + shrq $7,%r12 + rorq $1,%r13 + xorq %r14,%r15 + shrq $6,%r14 + + rorq $19,%r15 + xorq %r13,%r12 + xorq %r14,%r15 + addq 88(%rsp),%r12 + + addq 16(%rsp),%r12 + movq %rcx,%r13 + addq %r15,%r12 + movq %r10,%r14 + rorq $23,%r13 + movq %rdx,%r15 + + xorq %rcx,%r13 + rorq $5,%r14 + xorq %r8,%r15 + + movq %r12,16(%rsp) + xorq %r10,%r14 + andq %rcx,%r15 + + rorq $4,%r13 + addq %r9,%r12 + xorq %r8,%r15 + + rorq $6,%r14 + xorq %rcx,%r13 + addq %r15,%r12 + + movq %r10,%r15 + addq (%rbp),%r12 + xorq %r10,%r14 + + xorq %r11,%r15 + rorq $14,%r13 + movq %r11,%r9 + + andq %r15,%rdi + rorq $28,%r14 + addq %r13,%r12 + + xorq %rdi,%r9 + addq %r12,%rbx + addq %r12,%r9 + + leaq 8(%rbp),%rbp + movq 32(%rsp),%r13 + movq 8(%rsp),%rdi + + movq %r13,%r12 + rorq $7,%r13 + addq %r14,%r9 + movq %rdi,%r14 + rorq $42,%rdi + + xorq %r12,%r13 + shrq $7,%r12 + rorq $1,%r13 + xorq %r14,%rdi + shrq $6,%r14 + + rorq $19,%rdi + xorq %r13,%r12 + xorq %r14,%rdi + addq 96(%rsp),%r12 + + addq 24(%rsp),%r12 + movq %rbx,%r13 + addq %rdi,%r12 + movq %r9,%r14 + rorq $23,%r13 + movq %rcx,%rdi + + xorq %rbx,%r13 + rorq $5,%r14 + xorq %rdx,%rdi + + movq %r12,24(%rsp) + xorq %r9,%r14 + andq %rbx,%rdi + + rorq $4,%r13 + addq %r8,%r12 + xorq %rdx,%rdi + + rorq $6,%r14 + xorq %rbx,%r13 + addq %rdi,%r12 + + movq %r9,%rdi + addq (%rbp),%r12 + xorq %r9,%r14 + + xorq %r10,%rdi + rorq $14,%r13 + movq %r10,%r8 + + andq %rdi,%r15 + rorq $28,%r14 + addq %r13,%r12 + + xorq %r15,%r8 + addq %r12,%rax + addq %r12,%r8 + + leaq 24(%rbp),%rbp + movq 40(%rsp),%r13 + movq 16(%rsp),%r15 + + movq %r13,%r12 + rorq $7,%r13 + addq %r14,%r8 + movq %r15,%r14 + rorq $42,%r15 + + xorq %r12,%r13 + shrq $7,%r12 + rorq $1,%r13 + xorq %r14,%r15 + shrq $6,%r14 + + rorq $19,%r15 + xorq %r13,%r12 + xorq %r14,%r15 + addq 104(%rsp),%r12 + + addq 32(%rsp),%r12 + movq %rax,%r13 + addq %r15,%r12 + movq %r8,%r14 + rorq $23,%r13 + movq %rbx,%r15 + + xorq %rax,%r13 + rorq $5,%r14 + xorq %rcx,%r15 + + movq %r12,32(%rsp) + xorq %r8,%r14 + andq %rax,%r15 + + rorq $4,%r13 + addq %rdx,%r12 + xorq %rcx,%r15 + + rorq $6,%r14 + xorq %rax,%r13 + addq %r15,%r12 + + movq %r8,%r15 + addq (%rbp),%r12 + xorq %r8,%r14 + + xorq %r9,%r15 + rorq $14,%r13 + movq %r9,%rdx + + andq %r15,%rdi + rorq $28,%r14 + addq %r13,%r12 + + xorq %rdi,%rdx + addq %r12,%r11 + addq %r12,%rdx + + leaq 8(%rbp),%rbp + movq 48(%rsp),%r13 + movq 24(%rsp),%rdi + + movq %r13,%r12 + rorq $7,%r13 + addq %r14,%rdx + movq %rdi,%r14 + rorq $42,%rdi + + xorq %r12,%r13 + shrq $7,%r12 + rorq $1,%r13 + xorq %r14,%rdi + shrq $6,%r14 + + rorq $19,%rdi + xorq %r13,%r12 + xorq %r14,%rdi + addq 112(%rsp),%r12 + + addq 40(%rsp),%r12 + movq %r11,%r13 + addq %rdi,%r12 + movq %rdx,%r14 + rorq $23,%r13 + movq %rax,%rdi + + xorq %r11,%r13 + rorq $5,%r14 + xorq %rbx,%rdi + + movq %r12,40(%rsp) + xorq %rdx,%r14 + andq %r11,%rdi + + rorq $4,%r13 + addq %rcx,%r12 + xorq %rbx,%rdi + + rorq $6,%r14 + xorq %r11,%r13 + addq %rdi,%r12 + + movq %rdx,%rdi + addq (%rbp),%r12 + xorq %rdx,%r14 + + xorq %r8,%rdi + rorq $14,%r13 + movq %r8,%rcx + + andq %rdi,%r15 + rorq $28,%r14 + addq %r13,%r12 + + xorq %r15,%rcx + addq %r12,%r10 + addq %r12,%rcx + + leaq 24(%rbp),%rbp + movq 56(%rsp),%r13 + movq 32(%rsp),%r15 + + movq %r13,%r12 + rorq $7,%r13 + addq %r14,%rcx + movq %r15,%r14 + rorq $42,%r15 + + xorq %r12,%r13 + shrq $7,%r12 + rorq $1,%r13 + xorq %r14,%r15 + shrq $6,%r14 + + rorq $19,%r15 + xorq %r13,%r12 + xorq %r14,%r15 + addq 120(%rsp),%r12 + + addq 48(%rsp),%r12 + movq %r10,%r13 + addq %r15,%r12 + movq %rcx,%r14 + rorq $23,%r13 + movq %r11,%r15 + + xorq %r10,%r13 + rorq $5,%r14 + xorq %rax,%r15 + + movq %r12,48(%rsp) + xorq %rcx,%r14 + andq %r10,%r15 + + rorq $4,%r13 + addq %rbx,%r12 + xorq %rax,%r15 + + rorq $6,%r14 + xorq %r10,%r13 + addq %r15,%r12 + + movq %rcx,%r15 + addq (%rbp),%r12 + xorq %rcx,%r14 + + xorq %rdx,%r15 + rorq $14,%r13 + movq %rdx,%rbx + + andq %r15,%rdi + rorq $28,%r14 + addq %r13,%r12 + + xorq %rdi,%rbx + addq %r12,%r9 + addq %r12,%rbx + + leaq 8(%rbp),%rbp + movq 64(%rsp),%r13 + movq 40(%rsp),%rdi + + movq %r13,%r12 + rorq $7,%r13 + addq %r14,%rbx + movq %rdi,%r14 + rorq $42,%rdi + + xorq %r12,%r13 + shrq $7,%r12 + rorq $1,%r13 + xorq %r14,%rdi + shrq $6,%r14 + + rorq $19,%rdi + xorq %r13,%r12 + xorq %r14,%rdi + addq 0(%rsp),%r12 + + addq 56(%rsp),%r12 + movq %r9,%r13 + addq %rdi,%r12 + movq %rbx,%r14 + rorq $23,%r13 + movq %r10,%rdi + + xorq %r9,%r13 + rorq $5,%r14 + xorq %r11,%rdi + + movq %r12,56(%rsp) + xorq %rbx,%r14 + andq %r9,%rdi + + rorq $4,%r13 + addq %rax,%r12 + xorq %r11,%rdi + + rorq $6,%r14 + xorq %r9,%r13 + addq %rdi,%r12 + + movq %rbx,%rdi + addq (%rbp),%r12 + xorq %rbx,%r14 + + xorq %rcx,%rdi + rorq $14,%r13 + movq %rcx,%rax + + andq %rdi,%r15 + rorq $28,%r14 + addq %r13,%r12 + + xorq %r15,%rax + addq %r12,%r8 + addq %r12,%rax + + leaq 24(%rbp),%rbp + movq 72(%rsp),%r13 + movq 48(%rsp),%r15 + + movq %r13,%r12 + rorq $7,%r13 + addq %r14,%rax + movq %r15,%r14 + rorq $42,%r15 + + xorq %r12,%r13 + shrq $7,%r12 + rorq $1,%r13 + xorq %r14,%r15 + shrq $6,%r14 + + rorq $19,%r15 + xorq %r13,%r12 + xorq %r14,%r15 + addq 8(%rsp),%r12 + + addq 64(%rsp),%r12 + movq %r8,%r13 + addq %r15,%r12 + movq %rax,%r14 + rorq $23,%r13 + movq %r9,%r15 + + xorq %r8,%r13 + rorq $5,%r14 + xorq %r10,%r15 + + movq %r12,64(%rsp) + xorq %rax,%r14 + andq %r8,%r15 + + rorq $4,%r13 + addq %r11,%r12 + xorq %r10,%r15 + + rorq $6,%r14 + xorq %r8,%r13 + addq %r15,%r12 + + movq %rax,%r15 + addq (%rbp),%r12 + xorq %rax,%r14 + + xorq %rbx,%r15 + rorq $14,%r13 + movq %rbx,%r11 + + andq %r15,%rdi + rorq $28,%r14 + addq %r13,%r12 + + xorq %rdi,%r11 + addq %r12,%rdx + addq %r12,%r11 + + leaq 8(%rbp),%rbp + movq 80(%rsp),%r13 + movq 56(%rsp),%rdi + + movq %r13,%r12 + rorq $7,%r13 + addq %r14,%r11 + movq %rdi,%r14 + rorq $42,%rdi + + xorq %r12,%r13 + shrq $7,%r12 + rorq $1,%r13 + xorq %r14,%rdi + shrq $6,%r14 + + rorq $19,%rdi + xorq %r13,%r12 + xorq %r14,%rdi + addq 16(%rsp),%r12 + + addq 72(%rsp),%r12 + movq %rdx,%r13 + addq %rdi,%r12 + movq %r11,%r14 + rorq $23,%r13 + movq %r8,%rdi + + xorq %rdx,%r13 + rorq $5,%r14 + xorq %r9,%rdi + + movq %r12,72(%rsp) + xorq %r11,%r14 + andq %rdx,%rdi + + rorq $4,%r13 + addq %r10,%r12 + xorq %r9,%rdi + + rorq $6,%r14 + xorq %rdx,%r13 + addq %rdi,%r12 + + movq %r11,%rdi + addq (%rbp),%r12 + xorq %r11,%r14 + + xorq %rax,%rdi + rorq $14,%r13 + movq %rax,%r10 + + andq %rdi,%r15 + rorq $28,%r14 + addq %r13,%r12 + + xorq %r15,%r10 + addq %r12,%rcx + addq %r12,%r10 + + leaq 24(%rbp),%rbp + movq 88(%rsp),%r13 + movq 64(%rsp),%r15 + + movq %r13,%r12 + rorq $7,%r13 + addq %r14,%r10 + movq %r15,%r14 + rorq $42,%r15 + + xorq %r12,%r13 + shrq $7,%r12 + rorq $1,%r13 + xorq %r14,%r15 + shrq $6,%r14 + + rorq $19,%r15 + xorq %r13,%r12 + xorq %r14,%r15 + addq 24(%rsp),%r12 + + addq 80(%rsp),%r12 + movq %rcx,%r13 + addq %r15,%r12 + movq %r10,%r14 + rorq $23,%r13 + movq %rdx,%r15 + + xorq %rcx,%r13 + rorq $5,%r14 + xorq %r8,%r15 + + movq %r12,80(%rsp) + xorq %r10,%r14 + andq %rcx,%r15 + + rorq $4,%r13 + addq %r9,%r12 + xorq %r8,%r15 + + rorq $6,%r14 + xorq %rcx,%r13 + addq %r15,%r12 + + movq %r10,%r15 + addq (%rbp),%r12 + xorq %r10,%r14 + + xorq %r11,%r15 + rorq $14,%r13 + movq %r11,%r9 + + andq %r15,%rdi + rorq $28,%r14 + addq %r13,%r12 + + xorq %rdi,%r9 + addq %r12,%rbx + addq %r12,%r9 + + leaq 8(%rbp),%rbp + movq 96(%rsp),%r13 + movq 72(%rsp),%rdi + + movq %r13,%r12 + rorq $7,%r13 + addq %r14,%r9 + movq %rdi,%r14 + rorq $42,%rdi + + xorq %r12,%r13 + shrq $7,%r12 + rorq $1,%r13 + xorq %r14,%rdi + shrq $6,%r14 + + rorq $19,%rdi + xorq %r13,%r12 + xorq %r14,%rdi + addq 32(%rsp),%r12 + + addq 88(%rsp),%r12 + movq %rbx,%r13 + addq %rdi,%r12 + movq %r9,%r14 + rorq $23,%r13 + movq %rcx,%rdi + + xorq %rbx,%r13 + rorq $5,%r14 + xorq %rdx,%rdi + + movq %r12,88(%rsp) + xorq %r9,%r14 + andq %rbx,%rdi + + rorq $4,%r13 + addq %r8,%r12 + xorq %rdx,%rdi + + rorq $6,%r14 + xorq %rbx,%r13 + addq %rdi,%r12 + + movq %r9,%rdi + addq (%rbp),%r12 + xorq %r9,%r14 + + xorq %r10,%rdi + rorq $14,%r13 + movq %r10,%r8 + + andq %rdi,%r15 + rorq $28,%r14 + addq %r13,%r12 + + xorq %r15,%r8 + addq %r12,%rax + addq %r12,%r8 + + leaq 24(%rbp),%rbp + movq 104(%rsp),%r13 + movq 80(%rsp),%r15 + + movq %r13,%r12 + rorq $7,%r13 + addq %r14,%r8 + movq %r15,%r14 + rorq $42,%r15 + + xorq %r12,%r13 + shrq $7,%r12 + rorq $1,%r13 + xorq %r14,%r15 + shrq $6,%r14 + + rorq $19,%r15 + xorq %r13,%r12 + xorq %r14,%r15 + addq 40(%rsp),%r12 + + addq 96(%rsp),%r12 + movq %rax,%r13 + addq %r15,%r12 + movq %r8,%r14 + rorq $23,%r13 + movq %rbx,%r15 + + xorq %rax,%r13 + rorq $5,%r14 + xorq %rcx,%r15 + + movq %r12,96(%rsp) + xorq %r8,%r14 + andq %rax,%r15 + + rorq $4,%r13 + addq %rdx,%r12 + xorq %rcx,%r15 + + rorq $6,%r14 + xorq %rax,%r13 + addq %r15,%r12 + + movq %r8,%r15 + addq (%rbp),%r12 + xorq %r8,%r14 + + xorq %r9,%r15 + rorq $14,%r13 + movq %r9,%rdx + + andq %r15,%rdi + rorq $28,%r14 + addq %r13,%r12 + + xorq %rdi,%rdx + addq %r12,%r11 + addq %r12,%rdx + + leaq 8(%rbp),%rbp + movq 112(%rsp),%r13 + movq 88(%rsp),%rdi + + movq %r13,%r12 + rorq $7,%r13 + addq %r14,%rdx + movq %rdi,%r14 + rorq $42,%rdi + + xorq %r12,%r13 + shrq $7,%r12 + rorq $1,%r13 + xorq %r14,%rdi + shrq $6,%r14 + + rorq $19,%rdi + xorq %r13,%r12 + xorq %r14,%rdi + addq 48(%rsp),%r12 + + addq 104(%rsp),%r12 + movq %r11,%r13 + addq %rdi,%r12 + movq %rdx,%r14 + rorq $23,%r13 + movq %rax,%rdi + + xorq %r11,%r13 + rorq $5,%r14 + xorq %rbx,%rdi + + movq %r12,104(%rsp) + xorq %rdx,%r14 + andq %r11,%rdi + + rorq $4,%r13 + addq %rcx,%r12 + xorq %rbx,%rdi + + rorq $6,%r14 + xorq %r11,%r13 + addq %rdi,%r12 + + movq %rdx,%rdi + addq (%rbp),%r12 + xorq %rdx,%r14 + + xorq %r8,%rdi + rorq $14,%r13 + movq %r8,%rcx + + andq %rdi,%r15 + rorq $28,%r14 + addq %r13,%r12 + + xorq %r15,%rcx + addq %r12,%r10 + addq %r12,%rcx + + leaq 24(%rbp),%rbp + movq 120(%rsp),%r13 + movq 96(%rsp),%r15 + + movq %r13,%r12 + rorq $7,%r13 + addq %r14,%rcx + movq %r15,%r14 + rorq $42,%r15 + + xorq %r12,%r13 + shrq $7,%r12 + rorq $1,%r13 + xorq %r14,%r15 + shrq $6,%r14 + + rorq $19,%r15 + xorq %r13,%r12 + xorq %r14,%r15 + addq 56(%rsp),%r12 + + addq 112(%rsp),%r12 + movq %r10,%r13 + addq %r15,%r12 + movq %rcx,%r14 + rorq $23,%r13 + movq %r11,%r15 + + xorq %r10,%r13 + rorq $5,%r14 + xorq %rax,%r15 + + movq %r12,112(%rsp) + xorq %rcx,%r14 + andq %r10,%r15 + + rorq $4,%r13 + addq %rbx,%r12 + xorq %rax,%r15 + + rorq $6,%r14 + xorq %r10,%r13 + addq %r15,%r12 + + movq %rcx,%r15 + addq (%rbp),%r12 + xorq %rcx,%r14 + + xorq %rdx,%r15 + rorq $14,%r13 + movq %rdx,%rbx + + andq %r15,%rdi + rorq $28,%r14 + addq %r13,%r12 + + xorq %rdi,%rbx + addq %r12,%r9 + addq %r12,%rbx + + leaq 8(%rbp),%rbp + movq 0(%rsp),%r13 + movq 104(%rsp),%rdi + + movq %r13,%r12 + rorq $7,%r13 + addq %r14,%rbx + movq %rdi,%r14 + rorq $42,%rdi + + xorq %r12,%r13 + shrq $7,%r12 + rorq $1,%r13 + xorq %r14,%rdi + shrq $6,%r14 + + rorq $19,%rdi + xorq %r13,%r12 + xorq %r14,%rdi + addq 64(%rsp),%r12 + + addq 120(%rsp),%r12 + movq %r9,%r13 + addq %rdi,%r12 + movq %rbx,%r14 + rorq $23,%r13 + movq %r10,%rdi + + xorq %r9,%r13 + rorq $5,%r14 + xorq %r11,%rdi + + movq %r12,120(%rsp) + xorq %rbx,%r14 + andq %r9,%rdi + + rorq $4,%r13 + addq %rax,%r12 + xorq %r11,%rdi + + rorq $6,%r14 + xorq %r9,%r13 + addq %rdi,%r12 + + movq %rbx,%rdi + addq (%rbp),%r12 + xorq %rbx,%r14 + + xorq %rcx,%rdi + rorq $14,%r13 + movq %rcx,%rax + + andq %rdi,%r15 + rorq $28,%r14 + addq %r13,%r12 + + xorq %r15,%rax + addq %r12,%r8 + addq %r12,%rax + + leaq 24(%rbp),%rbp + cmpb $0,7(%rbp) + jnz L$rounds_16_xx + + movq 128+0(%rsp),%rdi + addq %r14,%rax + leaq 128(%rsi),%rsi + + addq 0(%rdi),%rax + addq 8(%rdi),%rbx + addq 16(%rdi),%rcx + addq 24(%rdi),%rdx + addq 32(%rdi),%r8 + addq 40(%rdi),%r9 + addq 48(%rdi),%r10 + addq 56(%rdi),%r11 + + cmpq 128+16(%rsp),%rsi + + movq %rax,0(%rdi) + movq %rbx,8(%rdi) + movq %rcx,16(%rdi) + movq %rdx,24(%rdi) + movq %r8,32(%rdi) + movq %r9,40(%rdi) + movq %r10,48(%rdi) + movq %r11,56(%rdi) + jb L$loop + + movq 128+24(%rsp),%rsi + movq (%rsi),%r15 + movq 8(%rsi),%r14 + movq 16(%rsi),%r13 + movq 24(%rsi),%r12 + movq 32(%rsi),%rbp + movq 40(%rsi),%rbx + leaq 48(%rsi),%rsp +L$epilogue: + .byte 0xf3,0xc3 + +.p2align 6 + +K512: +.quad 0x428a2f98d728ae22,0x7137449123ef65cd +.quad 0x428a2f98d728ae22,0x7137449123ef65cd +.quad 0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc +.quad 0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc +.quad 0x3956c25bf348b538,0x59f111f1b605d019 +.quad 0x3956c25bf348b538,0x59f111f1b605d019 +.quad 0x923f82a4af194f9b,0xab1c5ed5da6d8118 +.quad 0x923f82a4af194f9b,0xab1c5ed5da6d8118 +.quad 0xd807aa98a3030242,0x12835b0145706fbe +.quad 0xd807aa98a3030242,0x12835b0145706fbe +.quad 0x243185be4ee4b28c,0x550c7dc3d5ffb4e2 +.quad 0x243185be4ee4b28c,0x550c7dc3d5ffb4e2 +.quad 0x72be5d74f27b896f,0x80deb1fe3b1696b1 +.quad 0x72be5d74f27b896f,0x80deb1fe3b1696b1 +.quad 0x9bdc06a725c71235,0xc19bf174cf692694 +.quad 0x9bdc06a725c71235,0xc19bf174cf692694 +.quad 0xe49b69c19ef14ad2,0xefbe4786384f25e3 +.quad 0xe49b69c19ef14ad2,0xefbe4786384f25e3 +.quad 0x0fc19dc68b8cd5b5,0x240ca1cc77ac9c65 +.quad 0x0fc19dc68b8cd5b5,0x240ca1cc77ac9c65 +.quad 0x2de92c6f592b0275,0x4a7484aa6ea6e483 +.quad 0x2de92c6f592b0275,0x4a7484aa6ea6e483 +.quad 0x5cb0a9dcbd41fbd4,0x76f988da831153b5 +.quad 0x5cb0a9dcbd41fbd4,0x76f988da831153b5 +.quad 0x983e5152ee66dfab,0xa831c66d2db43210 +.quad 0x983e5152ee66dfab,0xa831c66d2db43210 +.quad 0xb00327c898fb213f,0xbf597fc7beef0ee4 +.quad 0xb00327c898fb213f,0xbf597fc7beef0ee4 +.quad 0xc6e00bf33da88fc2,0xd5a79147930aa725 +.quad 0xc6e00bf33da88fc2,0xd5a79147930aa725 +.quad 0x06ca6351e003826f,0x142929670a0e6e70 +.quad 0x06ca6351e003826f,0x142929670a0e6e70 +.quad 0x27b70a8546d22ffc,0x2e1b21385c26c926 +.quad 0x27b70a8546d22ffc,0x2e1b21385c26c926 +.quad 0x4d2c6dfc5ac42aed,0x53380d139d95b3df +.quad 0x4d2c6dfc5ac42aed,0x53380d139d95b3df +.quad 0x650a73548baf63de,0x766a0abb3c77b2a8 +.quad 0x650a73548baf63de,0x766a0abb3c77b2a8 +.quad 0x81c2c92e47edaee6,0x92722c851482353b +.quad 0x81c2c92e47edaee6,0x92722c851482353b +.quad 0xa2bfe8a14cf10364,0xa81a664bbc423001 +.quad 0xa2bfe8a14cf10364,0xa81a664bbc423001 +.quad 0xc24b8b70d0f89791,0xc76c51a30654be30 +.quad 0xc24b8b70d0f89791,0xc76c51a30654be30 +.quad 0xd192e819d6ef5218,0xd69906245565a910 +.quad 0xd192e819d6ef5218,0xd69906245565a910 +.quad 0xf40e35855771202a,0x106aa07032bbd1b8 +.quad 0xf40e35855771202a,0x106aa07032bbd1b8 +.quad 0x19a4c116b8d2d0c8,0x1e376c085141ab53 +.quad 0x19a4c116b8d2d0c8,0x1e376c085141ab53 +.quad 0x2748774cdf8eeb99,0x34b0bcb5e19b48a8 +.quad 0x2748774cdf8eeb99,0x34b0bcb5e19b48a8 +.quad 0x391c0cb3c5c95a63,0x4ed8aa4ae3418acb +.quad 0x391c0cb3c5c95a63,0x4ed8aa4ae3418acb +.quad 0x5b9cca4f7763e373,0x682e6ff3d6b2b8a3 +.quad 0x5b9cca4f7763e373,0x682e6ff3d6b2b8a3 +.quad 0x748f82ee5defb2fc,0x78a5636f43172f60 +.quad 0x748f82ee5defb2fc,0x78a5636f43172f60 +.quad 0x84c87814a1f0ab72,0x8cc702081a6439ec +.quad 0x84c87814a1f0ab72,0x8cc702081a6439ec +.quad 0x90befffa23631e28,0xa4506cebde82bde9 +.quad 0x90befffa23631e28,0xa4506cebde82bde9 +.quad 0xbef9a3f7b2c67915,0xc67178f2e372532b +.quad 0xbef9a3f7b2c67915,0xc67178f2e372532b +.quad 0xca273eceea26619c,0xd186b8c721c0c207 +.quad 0xca273eceea26619c,0xd186b8c721c0c207 +.quad 0xeada7dd6cde0eb1e,0xf57d4f7fee6ed178 +.quad 0xeada7dd6cde0eb1e,0xf57d4f7fee6ed178 +.quad 0x06f067aa72176fba,0x0a637dc5a2c898a6 +.quad 0x06f067aa72176fba,0x0a637dc5a2c898a6 +.quad 0x113f9804bef90dae,0x1b710b35131c471b +.quad 0x113f9804bef90dae,0x1b710b35131c471b +.quad 0x28db77f523047d84,0x32caab7b40c72493 +.quad 0x28db77f523047d84,0x32caab7b40c72493 +.quad 0x3c9ebe0a15c9bebc,0x431d67c49c100d4c +.quad 0x3c9ebe0a15c9bebc,0x431d67c49c100d4c +.quad 0x4cc5d4becb3e42b6,0x597f299cfc657e2a +.quad 0x4cc5d4becb3e42b6,0x597f299cfc657e2a +.quad 0x5fcb6fab3ad6faec,0x6c44198c4a475817 +.quad 0x5fcb6fab3ad6faec,0x6c44198c4a475817 + +.quad 0x0001020304050607,0x08090a0b0c0d0e0f +.quad 0x0001020304050607,0x08090a0b0c0d0e0f +.byte 83,72,65,53,49,50,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 diff --git a/deps/openssl/asm_obsolete/x64-macosx-gas/whrlpool/wp-x86_64.s b/deps/openssl/asm_obsolete/x64-macosx-gas/whrlpool/wp-x86_64.s new file mode 100644 index 00000000000000..ad43b5a1b35149 --- /dev/null +++ b/deps/openssl/asm_obsolete/x64-macosx-gas/whrlpool/wp-x86_64.s @@ -0,0 +1,861 @@ +.text + +.globl _whirlpool_block + +.p2align 4 +_whirlpool_block: + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + + movq %rsp,%r11 + subq $128+40,%rsp + andq $-64,%rsp + + leaq 128(%rsp),%r10 + movq %rdi,0(%r10) + movq %rsi,8(%r10) + movq %rdx,16(%r10) + movq %r11,32(%r10) +L$prologue: + + movq %r10,%rbx + leaq L$table(%rip),%rbp + + xorq %rcx,%rcx + xorq %rdx,%rdx + movq 0(%rdi),%r8 + movq 8(%rdi),%r9 + movq 16(%rdi),%r10 + movq 24(%rdi),%r11 + movq 32(%rdi),%r12 + movq 40(%rdi),%r13 + movq 48(%rdi),%r14 + movq 56(%rdi),%r15 +L$outerloop: + movq %r8,0(%rsp) + movq %r9,8(%rsp) + movq %r10,16(%rsp) + movq %r11,24(%rsp) + movq %r12,32(%rsp) + movq %r13,40(%rsp) + movq %r14,48(%rsp) + movq %r15,56(%rsp) + xorq 0(%rsi),%r8 + xorq 8(%rsi),%r9 + xorq 16(%rsi),%r10 + xorq 24(%rsi),%r11 + xorq 32(%rsi),%r12 + xorq 40(%rsi),%r13 + xorq 48(%rsi),%r14 + xorq 56(%rsi),%r15 + movq %r8,64+0(%rsp) + movq %r9,64+8(%rsp) + movq %r10,64+16(%rsp) + movq %r11,64+24(%rsp) + movq %r12,64+32(%rsp) + movq %r13,64+40(%rsp) + movq %r14,64+48(%rsp) + movq %r15,64+56(%rsp) + xorq %rsi,%rsi + movq %rsi,24(%rbx) + jmp L$round +.p2align 4 +L$round: + movq 4096(%rbp,%rsi,8),%r8 + movl 0(%rsp),%eax + movl 4(%rsp),%ebx + movzbl %al,%ecx + movzbl %ah,%edx + shrl $16,%eax + leaq (%rcx,%rcx,1),%rsi + movzbl %al,%ecx + leaq (%rdx,%rdx,1),%rdi + movzbl %ah,%edx + xorq 0(%rbp,%rsi,8),%r8 + movq 7(%rbp,%rdi,8),%r9 + movl 0+8(%rsp),%eax + leaq (%rcx,%rcx,1),%rsi + movzbl %bl,%ecx + leaq (%rdx,%rdx,1),%rdi + movzbl %bh,%edx + movq 6(%rbp,%rsi,8),%r10 + movq 5(%rbp,%rdi,8),%r11 + shrl $16,%ebx + leaq (%rcx,%rcx,1),%rsi + movzbl %bl,%ecx + leaq (%rdx,%rdx,1),%rdi + movzbl %bh,%edx + movq 4(%rbp,%rsi,8),%r12 + movq 3(%rbp,%rdi,8),%r13 + movl 0+8+4(%rsp),%ebx + leaq (%rcx,%rcx,1),%rsi + movzbl %al,%ecx + leaq (%rdx,%rdx,1),%rdi + movzbl %ah,%edx + movq 2(%rbp,%rsi,8),%r14 + movq 1(%rbp,%rdi,8),%r15 + shrl $16,%eax + leaq (%rcx,%rcx,1),%rsi + movzbl %al,%ecx + leaq (%rdx,%rdx,1),%rdi + movzbl %ah,%edx + xorq 0(%rbp,%rsi,8),%r9 + xorq 7(%rbp,%rdi,8),%r10 + movl 8+8(%rsp),%eax + leaq (%rcx,%rcx,1),%rsi + movzbl %bl,%ecx + leaq (%rdx,%rdx,1),%rdi + movzbl %bh,%edx + xorq 6(%rbp,%rsi,8),%r11 + xorq 5(%rbp,%rdi,8),%r12 + shrl $16,%ebx + leaq (%rcx,%rcx,1),%rsi + movzbl %bl,%ecx + leaq (%rdx,%rdx,1),%rdi + movzbl %bh,%edx + xorq 4(%rbp,%rsi,8),%r13 + xorq 3(%rbp,%rdi,8),%r14 + movl 8+8+4(%rsp),%ebx + leaq (%rcx,%rcx,1),%rsi + movzbl %al,%ecx + leaq (%rdx,%rdx,1),%rdi + movzbl %ah,%edx + xorq 2(%rbp,%rsi,8),%r15 + xorq 1(%rbp,%rdi,8),%r8 + shrl $16,%eax + leaq (%rcx,%rcx,1),%rsi + movzbl %al,%ecx + leaq (%rdx,%rdx,1),%rdi + movzbl %ah,%edx + xorq 0(%rbp,%rsi,8),%r10 + xorq 7(%rbp,%rdi,8),%r11 + movl 16+8(%rsp),%eax + leaq (%rcx,%rcx,1),%rsi + movzbl %bl,%ecx + leaq (%rdx,%rdx,1),%rdi + movzbl %bh,%edx + xorq 6(%rbp,%rsi,8),%r12 + xorq 5(%rbp,%rdi,8),%r13 + shrl $16,%ebx + leaq (%rcx,%rcx,1),%rsi + movzbl %bl,%ecx + leaq (%rdx,%rdx,1),%rdi + movzbl %bh,%edx + xorq 4(%rbp,%rsi,8),%r14 + xorq 3(%rbp,%rdi,8),%r15 + movl 16+8+4(%rsp),%ebx + leaq (%rcx,%rcx,1),%rsi + movzbl %al,%ecx + leaq (%rdx,%rdx,1),%rdi + movzbl %ah,%edx + xorq 2(%rbp,%rsi,8),%r8 + xorq 1(%rbp,%rdi,8),%r9 + shrl $16,%eax + leaq (%rcx,%rcx,1),%rsi + movzbl %al,%ecx + leaq (%rdx,%rdx,1),%rdi + movzbl %ah,%edx + xorq 0(%rbp,%rsi,8),%r11 + xorq 7(%rbp,%rdi,8),%r12 + movl 24+8(%rsp),%eax + leaq (%rcx,%rcx,1),%rsi + movzbl %bl,%ecx + leaq (%rdx,%rdx,1),%rdi + movzbl %bh,%edx + xorq 6(%rbp,%rsi,8),%r13 + xorq 5(%rbp,%rdi,8),%r14 + shrl $16,%ebx + leaq (%rcx,%rcx,1),%rsi + movzbl %bl,%ecx + leaq (%rdx,%rdx,1),%rdi + movzbl %bh,%edx + xorq 4(%rbp,%rsi,8),%r15 + xorq 3(%rbp,%rdi,8),%r8 + movl 24+8+4(%rsp),%ebx + leaq (%rcx,%rcx,1),%rsi + movzbl %al,%ecx + leaq (%rdx,%rdx,1),%rdi + movzbl %ah,%edx + xorq 2(%rbp,%rsi,8),%r9 + xorq 1(%rbp,%rdi,8),%r10 + shrl $16,%eax + leaq (%rcx,%rcx,1),%rsi + movzbl %al,%ecx + leaq (%rdx,%rdx,1),%rdi + movzbl %ah,%edx + xorq 0(%rbp,%rsi,8),%r12 + xorq 7(%rbp,%rdi,8),%r13 + movl 32+8(%rsp),%eax + leaq (%rcx,%rcx,1),%rsi + movzbl %bl,%ecx + leaq (%rdx,%rdx,1),%rdi + movzbl %bh,%edx + xorq 6(%rbp,%rsi,8),%r14 + xorq 5(%rbp,%rdi,8),%r15 + shrl $16,%ebx + leaq (%rcx,%rcx,1),%rsi + movzbl %bl,%ecx + leaq (%rdx,%rdx,1),%rdi + movzbl %bh,%edx + xorq 4(%rbp,%rsi,8),%r8 + xorq 3(%rbp,%rdi,8),%r9 + movl 32+8+4(%rsp),%ebx + leaq (%rcx,%rcx,1),%rsi + movzbl %al,%ecx + leaq (%rdx,%rdx,1),%rdi + movzbl %ah,%edx + xorq 2(%rbp,%rsi,8),%r10 + xorq 1(%rbp,%rdi,8),%r11 + shrl $16,%eax + leaq (%rcx,%rcx,1),%rsi + movzbl %al,%ecx + leaq (%rdx,%rdx,1),%rdi + movzbl %ah,%edx + xorq 0(%rbp,%rsi,8),%r13 + xorq 7(%rbp,%rdi,8),%r14 + movl 40+8(%rsp),%eax + leaq (%rcx,%rcx,1),%rsi + movzbl %bl,%ecx + leaq (%rdx,%rdx,1),%rdi + movzbl %bh,%edx + xorq 6(%rbp,%rsi,8),%r15 + xorq 5(%rbp,%rdi,8),%r8 + shrl $16,%ebx + leaq (%rcx,%rcx,1),%rsi + movzbl %bl,%ecx + leaq (%rdx,%rdx,1),%rdi + movzbl %bh,%edx + xorq 4(%rbp,%rsi,8),%r9 + xorq 3(%rbp,%rdi,8),%r10 + movl 40+8+4(%rsp),%ebx + leaq (%rcx,%rcx,1),%rsi + movzbl %al,%ecx + leaq (%rdx,%rdx,1),%rdi + movzbl %ah,%edx + xorq 2(%rbp,%rsi,8),%r11 + xorq 1(%rbp,%rdi,8),%r12 + shrl $16,%eax + leaq (%rcx,%rcx,1),%rsi + movzbl %al,%ecx + leaq (%rdx,%rdx,1),%rdi + movzbl %ah,%edx + xorq 0(%rbp,%rsi,8),%r14 + xorq 7(%rbp,%rdi,8),%r15 + movl 48+8(%rsp),%eax + leaq (%rcx,%rcx,1),%rsi + movzbl %bl,%ecx + leaq (%rdx,%rdx,1),%rdi + movzbl %bh,%edx + xorq 6(%rbp,%rsi,8),%r8 + xorq 5(%rbp,%rdi,8),%r9 + shrl $16,%ebx + leaq (%rcx,%rcx,1),%rsi + movzbl %bl,%ecx + leaq (%rdx,%rdx,1),%rdi + movzbl %bh,%edx + xorq 4(%rbp,%rsi,8),%r10 + xorq 3(%rbp,%rdi,8),%r11 + movl 48+8+4(%rsp),%ebx + leaq (%rcx,%rcx,1),%rsi + movzbl %al,%ecx + leaq (%rdx,%rdx,1),%rdi + movzbl %ah,%edx + xorq 2(%rbp,%rsi,8),%r12 + xorq 1(%rbp,%rdi,8),%r13 + shrl $16,%eax + leaq (%rcx,%rcx,1),%rsi + movzbl %al,%ecx + leaq (%rdx,%rdx,1),%rdi + movzbl %ah,%edx + xorq 0(%rbp,%rsi,8),%r15 + xorq 7(%rbp,%rdi,8),%r8 + movl 56+8(%rsp),%eax + leaq (%rcx,%rcx,1),%rsi + movzbl %bl,%ecx + leaq (%rdx,%rdx,1),%rdi + movzbl %bh,%edx + xorq 6(%rbp,%rsi,8),%r9 + xorq 5(%rbp,%rdi,8),%r10 + shrl $16,%ebx + leaq (%rcx,%rcx,1),%rsi + movzbl %bl,%ecx + leaq (%rdx,%rdx,1),%rdi + movzbl %bh,%edx + xorq 4(%rbp,%rsi,8),%r11 + xorq 3(%rbp,%rdi,8),%r12 + movl 56+8+4(%rsp),%ebx + leaq (%rcx,%rcx,1),%rsi + movzbl %al,%ecx + leaq (%rdx,%rdx,1),%rdi + movzbl %ah,%edx + xorq 2(%rbp,%rsi,8),%r13 + xorq 1(%rbp,%rdi,8),%r14 + movq %r8,0(%rsp) + movq %r9,8(%rsp) + movq %r10,16(%rsp) + movq %r11,24(%rsp) + movq %r12,32(%rsp) + movq %r13,40(%rsp) + movq %r14,48(%rsp) + movq %r15,56(%rsp) + shrl $16,%eax + leaq (%rcx,%rcx,1),%rsi + movzbl %al,%ecx + leaq (%rdx,%rdx,1),%rdi + movzbl %ah,%edx + xorq 0(%rbp,%rsi,8),%r8 + xorq 7(%rbp,%rdi,8),%r9 + movl 64+0+8(%rsp),%eax + leaq (%rcx,%rcx,1),%rsi + movzbl %bl,%ecx + leaq (%rdx,%rdx,1),%rdi + movzbl %bh,%edx + xorq 6(%rbp,%rsi,8),%r10 + xorq 5(%rbp,%rdi,8),%r11 + shrl $16,%ebx + leaq (%rcx,%rcx,1),%rsi + movzbl %bl,%ecx + leaq (%rdx,%rdx,1),%rdi + movzbl %bh,%edx + xorq 4(%rbp,%rsi,8),%r12 + xorq 3(%rbp,%rdi,8),%r13 + movl 64+0+8+4(%rsp),%ebx + leaq (%rcx,%rcx,1),%rsi + movzbl %al,%ecx + leaq (%rdx,%rdx,1),%rdi + movzbl %ah,%edx + xorq 2(%rbp,%rsi,8),%r14 + xorq 1(%rbp,%rdi,8),%r15 + shrl $16,%eax + leaq (%rcx,%rcx,1),%rsi + movzbl %al,%ecx + leaq (%rdx,%rdx,1),%rdi + movzbl %ah,%edx + xorq 0(%rbp,%rsi,8),%r9 + xorq 7(%rbp,%rdi,8),%r10 + movl 64+8+8(%rsp),%eax + leaq (%rcx,%rcx,1),%rsi + movzbl %bl,%ecx + leaq (%rdx,%rdx,1),%rdi + movzbl %bh,%edx + xorq 6(%rbp,%rsi,8),%r11 + xorq 5(%rbp,%rdi,8),%r12 + shrl $16,%ebx + leaq (%rcx,%rcx,1),%rsi + movzbl %bl,%ecx + leaq (%rdx,%rdx,1),%rdi + movzbl %bh,%edx + xorq 4(%rbp,%rsi,8),%r13 + xorq 3(%rbp,%rdi,8),%r14 + movl 64+8+8+4(%rsp),%ebx + leaq (%rcx,%rcx,1),%rsi + movzbl %al,%ecx + leaq (%rdx,%rdx,1),%rdi + movzbl %ah,%edx + xorq 2(%rbp,%rsi,8),%r15 + xorq 1(%rbp,%rdi,8),%r8 + shrl $16,%eax + leaq (%rcx,%rcx,1),%rsi + movzbl %al,%ecx + leaq (%rdx,%rdx,1),%rdi + movzbl %ah,%edx + xorq 0(%rbp,%rsi,8),%r10 + xorq 7(%rbp,%rdi,8),%r11 + movl 64+16+8(%rsp),%eax + leaq (%rcx,%rcx,1),%rsi + movzbl %bl,%ecx + leaq (%rdx,%rdx,1),%rdi + movzbl %bh,%edx + xorq 6(%rbp,%rsi,8),%r12 + xorq 5(%rbp,%rdi,8),%r13 + shrl $16,%ebx + leaq (%rcx,%rcx,1),%rsi + movzbl %bl,%ecx + leaq (%rdx,%rdx,1),%rdi + movzbl %bh,%edx + xorq 4(%rbp,%rsi,8),%r14 + xorq 3(%rbp,%rdi,8),%r15 + movl 64+16+8+4(%rsp),%ebx + leaq (%rcx,%rcx,1),%rsi + movzbl %al,%ecx + leaq (%rdx,%rdx,1),%rdi + movzbl %ah,%edx + xorq 2(%rbp,%rsi,8),%r8 + xorq 1(%rbp,%rdi,8),%r9 + shrl $16,%eax + leaq (%rcx,%rcx,1),%rsi + movzbl %al,%ecx + leaq (%rdx,%rdx,1),%rdi + movzbl %ah,%edx + xorq 0(%rbp,%rsi,8),%r11 + xorq 7(%rbp,%rdi,8),%r12 + movl 64+24+8(%rsp),%eax + leaq (%rcx,%rcx,1),%rsi + movzbl %bl,%ecx + leaq (%rdx,%rdx,1),%rdi + movzbl %bh,%edx + xorq 6(%rbp,%rsi,8),%r13 + xorq 5(%rbp,%rdi,8),%r14 + shrl $16,%ebx + leaq (%rcx,%rcx,1),%rsi + movzbl %bl,%ecx + leaq (%rdx,%rdx,1),%rdi + movzbl %bh,%edx + xorq 4(%rbp,%rsi,8),%r15 + xorq 3(%rbp,%rdi,8),%r8 + movl 64+24+8+4(%rsp),%ebx + leaq (%rcx,%rcx,1),%rsi + movzbl %al,%ecx + leaq (%rdx,%rdx,1),%rdi + movzbl %ah,%edx + xorq 2(%rbp,%rsi,8),%r9 + xorq 1(%rbp,%rdi,8),%r10 + shrl $16,%eax + leaq (%rcx,%rcx,1),%rsi + movzbl %al,%ecx + leaq (%rdx,%rdx,1),%rdi + movzbl %ah,%edx + xorq 0(%rbp,%rsi,8),%r12 + xorq 7(%rbp,%rdi,8),%r13 + movl 64+32+8(%rsp),%eax + leaq (%rcx,%rcx,1),%rsi + movzbl %bl,%ecx + leaq (%rdx,%rdx,1),%rdi + movzbl %bh,%edx + xorq 6(%rbp,%rsi,8),%r14 + xorq 5(%rbp,%rdi,8),%r15 + shrl $16,%ebx + leaq (%rcx,%rcx,1),%rsi + movzbl %bl,%ecx + leaq (%rdx,%rdx,1),%rdi + movzbl %bh,%edx + xorq 4(%rbp,%rsi,8),%r8 + xorq 3(%rbp,%rdi,8),%r9 + movl 64+32+8+4(%rsp),%ebx + leaq (%rcx,%rcx,1),%rsi + movzbl %al,%ecx + leaq (%rdx,%rdx,1),%rdi + movzbl %ah,%edx + xorq 2(%rbp,%rsi,8),%r10 + xorq 1(%rbp,%rdi,8),%r11 + shrl $16,%eax + leaq (%rcx,%rcx,1),%rsi + movzbl %al,%ecx + leaq (%rdx,%rdx,1),%rdi + movzbl %ah,%edx + xorq 0(%rbp,%rsi,8),%r13 + xorq 7(%rbp,%rdi,8),%r14 + movl 64+40+8(%rsp),%eax + leaq (%rcx,%rcx,1),%rsi + movzbl %bl,%ecx + leaq (%rdx,%rdx,1),%rdi + movzbl %bh,%edx + xorq 6(%rbp,%rsi,8),%r15 + xorq 5(%rbp,%rdi,8),%r8 + shrl $16,%ebx + leaq (%rcx,%rcx,1),%rsi + movzbl %bl,%ecx + leaq (%rdx,%rdx,1),%rdi + movzbl %bh,%edx + xorq 4(%rbp,%rsi,8),%r9 + xorq 3(%rbp,%rdi,8),%r10 + movl 64+40+8+4(%rsp),%ebx + leaq (%rcx,%rcx,1),%rsi + movzbl %al,%ecx + leaq (%rdx,%rdx,1),%rdi + movzbl %ah,%edx + xorq 2(%rbp,%rsi,8),%r11 + xorq 1(%rbp,%rdi,8),%r12 + shrl $16,%eax + leaq (%rcx,%rcx,1),%rsi + movzbl %al,%ecx + leaq (%rdx,%rdx,1),%rdi + movzbl %ah,%edx + xorq 0(%rbp,%rsi,8),%r14 + xorq 7(%rbp,%rdi,8),%r15 + movl 64+48+8(%rsp),%eax + leaq (%rcx,%rcx,1),%rsi + movzbl %bl,%ecx + leaq (%rdx,%rdx,1),%rdi + movzbl %bh,%edx + xorq 6(%rbp,%rsi,8),%r8 + xorq 5(%rbp,%rdi,8),%r9 + shrl $16,%ebx + leaq (%rcx,%rcx,1),%rsi + movzbl %bl,%ecx + leaq (%rdx,%rdx,1),%rdi + movzbl %bh,%edx + xorq 4(%rbp,%rsi,8),%r10 + xorq 3(%rbp,%rdi,8),%r11 + movl 64+48+8+4(%rsp),%ebx + leaq (%rcx,%rcx,1),%rsi + movzbl %al,%ecx + leaq (%rdx,%rdx,1),%rdi + movzbl %ah,%edx + xorq 2(%rbp,%rsi,8),%r12 + xorq 1(%rbp,%rdi,8),%r13 + shrl $16,%eax + leaq (%rcx,%rcx,1),%rsi + movzbl %al,%ecx + leaq (%rdx,%rdx,1),%rdi + movzbl %ah,%edx + xorq 0(%rbp,%rsi,8),%r15 + xorq 7(%rbp,%rdi,8),%r8 + + leaq (%rcx,%rcx,1),%rsi + movzbl %bl,%ecx + leaq (%rdx,%rdx,1),%rdi + movzbl %bh,%edx + xorq 6(%rbp,%rsi,8),%r9 + xorq 5(%rbp,%rdi,8),%r10 + shrl $16,%ebx + leaq (%rcx,%rcx,1),%rsi + movzbl %bl,%ecx + leaq (%rdx,%rdx,1),%rdi + movzbl %bh,%edx + xorq 4(%rbp,%rsi,8),%r11 + xorq 3(%rbp,%rdi,8),%r12 + + leaq (%rcx,%rcx,1),%rsi + movzbl %al,%ecx + leaq (%rdx,%rdx,1),%rdi + movzbl %ah,%edx + xorq 2(%rbp,%rsi,8),%r13 + xorq 1(%rbp,%rdi,8),%r14 + leaq 128(%rsp),%rbx + movq 24(%rbx),%rsi + addq $1,%rsi + cmpq $10,%rsi + je L$roundsdone + + movq %rsi,24(%rbx) + movq %r8,64+0(%rsp) + movq %r9,64+8(%rsp) + movq %r10,64+16(%rsp) + movq %r11,64+24(%rsp) + movq %r12,64+32(%rsp) + movq %r13,64+40(%rsp) + movq %r14,64+48(%rsp) + movq %r15,64+56(%rsp) + jmp L$round +.p2align 4 +L$roundsdone: + movq 0(%rbx),%rdi + movq 8(%rbx),%rsi + movq 16(%rbx),%rax + xorq 0(%rsi),%r8 + xorq 8(%rsi),%r9 + xorq 16(%rsi),%r10 + xorq 24(%rsi),%r11 + xorq 32(%rsi),%r12 + xorq 40(%rsi),%r13 + xorq 48(%rsi),%r14 + xorq 56(%rsi),%r15 + xorq 0(%rdi),%r8 + xorq 8(%rdi),%r9 + xorq 16(%rdi),%r10 + xorq 24(%rdi),%r11 + xorq 32(%rdi),%r12 + xorq 40(%rdi),%r13 + xorq 48(%rdi),%r14 + xorq 56(%rdi),%r15 + movq %r8,0(%rdi) + movq %r9,8(%rdi) + movq %r10,16(%rdi) + movq %r11,24(%rdi) + movq %r12,32(%rdi) + movq %r13,40(%rdi) + movq %r14,48(%rdi) + movq %r15,56(%rdi) + leaq 64(%rsi),%rsi + subq $1,%rax + jz L$alldone + movq %rsi,8(%rbx) + movq %rax,16(%rbx) + jmp L$outerloop +L$alldone: + movq 32(%rbx),%rsi + movq (%rsi),%r15 + movq 8(%rsi),%r14 + movq 16(%rsi),%r13 + movq 24(%rsi),%r12 + movq 32(%rsi),%rbp + movq 40(%rsi),%rbx + leaq 48(%rsi),%rsp +L$epilogue: + .byte 0xf3,0xc3 + + +.p2align 6 + +L$table: +.byte 24,24,96,24,192,120,48,216,24,24,96,24,192,120,48,216 +.byte 35,35,140,35,5,175,70,38,35,35,140,35,5,175,70,38 +.byte 198,198,63,198,126,249,145,184,198,198,63,198,126,249,145,184 +.byte 232,232,135,232,19,111,205,251,232,232,135,232,19,111,205,251 +.byte 135,135,38,135,76,161,19,203,135,135,38,135,76,161,19,203 +.byte 184,184,218,184,169,98,109,17,184,184,218,184,169,98,109,17 +.byte 1,1,4,1,8,5,2,9,1,1,4,1,8,5,2,9 +.byte 79,79,33,79,66,110,158,13,79,79,33,79,66,110,158,13 +.byte 54,54,216,54,173,238,108,155,54,54,216,54,173,238,108,155 +.byte 166,166,162,166,89,4,81,255,166,166,162,166,89,4,81,255 +.byte 210,210,111,210,222,189,185,12,210,210,111,210,222,189,185,12 +.byte 245,245,243,245,251,6,247,14,245,245,243,245,251,6,247,14 +.byte 121,121,249,121,239,128,242,150,121,121,249,121,239,128,242,150 +.byte 111,111,161,111,95,206,222,48,111,111,161,111,95,206,222,48 +.byte 145,145,126,145,252,239,63,109,145,145,126,145,252,239,63,109 +.byte 82,82,85,82,170,7,164,248,82,82,85,82,170,7,164,248 +.byte 96,96,157,96,39,253,192,71,96,96,157,96,39,253,192,71 +.byte 188,188,202,188,137,118,101,53,188,188,202,188,137,118,101,53 +.byte 155,155,86,155,172,205,43,55,155,155,86,155,172,205,43,55 +.byte 142,142,2,142,4,140,1,138,142,142,2,142,4,140,1,138 +.byte 163,163,182,163,113,21,91,210,163,163,182,163,113,21,91,210 +.byte 12,12,48,12,96,60,24,108,12,12,48,12,96,60,24,108 +.byte 123,123,241,123,255,138,246,132,123,123,241,123,255,138,246,132 +.byte 53,53,212,53,181,225,106,128,53,53,212,53,181,225,106,128 +.byte 29,29,116,29,232,105,58,245,29,29,116,29,232,105,58,245 +.byte 224,224,167,224,83,71,221,179,224,224,167,224,83,71,221,179 +.byte 215,215,123,215,246,172,179,33,215,215,123,215,246,172,179,33 +.byte 194,194,47,194,94,237,153,156,194,194,47,194,94,237,153,156 +.byte 46,46,184,46,109,150,92,67,46,46,184,46,109,150,92,67 +.byte 75,75,49,75,98,122,150,41,75,75,49,75,98,122,150,41 +.byte 254,254,223,254,163,33,225,93,254,254,223,254,163,33,225,93 +.byte 87,87,65,87,130,22,174,213,87,87,65,87,130,22,174,213 +.byte 21,21,84,21,168,65,42,189,21,21,84,21,168,65,42,189 +.byte 119,119,193,119,159,182,238,232,119,119,193,119,159,182,238,232 +.byte 55,55,220,55,165,235,110,146,55,55,220,55,165,235,110,146 +.byte 229,229,179,229,123,86,215,158,229,229,179,229,123,86,215,158 +.byte 159,159,70,159,140,217,35,19,159,159,70,159,140,217,35,19 +.byte 240,240,231,240,211,23,253,35,240,240,231,240,211,23,253,35 +.byte 74,74,53,74,106,127,148,32,74,74,53,74,106,127,148,32 +.byte 218,218,79,218,158,149,169,68,218,218,79,218,158,149,169,68 +.byte 88,88,125,88,250,37,176,162,88,88,125,88,250,37,176,162 +.byte 201,201,3,201,6,202,143,207,201,201,3,201,6,202,143,207 +.byte 41,41,164,41,85,141,82,124,41,41,164,41,85,141,82,124 +.byte 10,10,40,10,80,34,20,90,10,10,40,10,80,34,20,90 +.byte 177,177,254,177,225,79,127,80,177,177,254,177,225,79,127,80 +.byte 160,160,186,160,105,26,93,201,160,160,186,160,105,26,93,201 +.byte 107,107,177,107,127,218,214,20,107,107,177,107,127,218,214,20 +.byte 133,133,46,133,92,171,23,217,133,133,46,133,92,171,23,217 +.byte 189,189,206,189,129,115,103,60,189,189,206,189,129,115,103,60 +.byte 93,93,105,93,210,52,186,143,93,93,105,93,210,52,186,143 +.byte 16,16,64,16,128,80,32,144,16,16,64,16,128,80,32,144 +.byte 244,244,247,244,243,3,245,7,244,244,247,244,243,3,245,7 +.byte 203,203,11,203,22,192,139,221,203,203,11,203,22,192,139,221 +.byte 62,62,248,62,237,198,124,211,62,62,248,62,237,198,124,211 +.byte 5,5,20,5,40,17,10,45,5,5,20,5,40,17,10,45 +.byte 103,103,129,103,31,230,206,120,103,103,129,103,31,230,206,120 +.byte 228,228,183,228,115,83,213,151,228,228,183,228,115,83,213,151 +.byte 39,39,156,39,37,187,78,2,39,39,156,39,37,187,78,2 +.byte 65,65,25,65,50,88,130,115,65,65,25,65,50,88,130,115 +.byte 139,139,22,139,44,157,11,167,139,139,22,139,44,157,11,167 +.byte 167,167,166,167,81,1,83,246,167,167,166,167,81,1,83,246 +.byte 125,125,233,125,207,148,250,178,125,125,233,125,207,148,250,178 +.byte 149,149,110,149,220,251,55,73,149,149,110,149,220,251,55,73 +.byte 216,216,71,216,142,159,173,86,216,216,71,216,142,159,173,86 +.byte 251,251,203,251,139,48,235,112,251,251,203,251,139,48,235,112 +.byte 238,238,159,238,35,113,193,205,238,238,159,238,35,113,193,205 +.byte 124,124,237,124,199,145,248,187,124,124,237,124,199,145,248,187 +.byte 102,102,133,102,23,227,204,113,102,102,133,102,23,227,204,113 +.byte 221,221,83,221,166,142,167,123,221,221,83,221,166,142,167,123 +.byte 23,23,92,23,184,75,46,175,23,23,92,23,184,75,46,175 +.byte 71,71,1,71,2,70,142,69,71,71,1,71,2,70,142,69 +.byte 158,158,66,158,132,220,33,26,158,158,66,158,132,220,33,26 +.byte 202,202,15,202,30,197,137,212,202,202,15,202,30,197,137,212 +.byte 45,45,180,45,117,153,90,88,45,45,180,45,117,153,90,88 +.byte 191,191,198,191,145,121,99,46,191,191,198,191,145,121,99,46 +.byte 7,7,28,7,56,27,14,63,7,7,28,7,56,27,14,63 +.byte 173,173,142,173,1,35,71,172,173,173,142,173,1,35,71,172 +.byte 90,90,117,90,234,47,180,176,90,90,117,90,234,47,180,176 +.byte 131,131,54,131,108,181,27,239,131,131,54,131,108,181,27,239 +.byte 51,51,204,51,133,255,102,182,51,51,204,51,133,255,102,182 +.byte 99,99,145,99,63,242,198,92,99,99,145,99,63,242,198,92 +.byte 2,2,8,2,16,10,4,18,2,2,8,2,16,10,4,18 +.byte 170,170,146,170,57,56,73,147,170,170,146,170,57,56,73,147 +.byte 113,113,217,113,175,168,226,222,113,113,217,113,175,168,226,222 +.byte 200,200,7,200,14,207,141,198,200,200,7,200,14,207,141,198 +.byte 25,25,100,25,200,125,50,209,25,25,100,25,200,125,50,209 +.byte 73,73,57,73,114,112,146,59,73,73,57,73,114,112,146,59 +.byte 217,217,67,217,134,154,175,95,217,217,67,217,134,154,175,95 +.byte 242,242,239,242,195,29,249,49,242,242,239,242,195,29,249,49 +.byte 227,227,171,227,75,72,219,168,227,227,171,227,75,72,219,168 +.byte 91,91,113,91,226,42,182,185,91,91,113,91,226,42,182,185 +.byte 136,136,26,136,52,146,13,188,136,136,26,136,52,146,13,188 +.byte 154,154,82,154,164,200,41,62,154,154,82,154,164,200,41,62 +.byte 38,38,152,38,45,190,76,11,38,38,152,38,45,190,76,11 +.byte 50,50,200,50,141,250,100,191,50,50,200,50,141,250,100,191 +.byte 176,176,250,176,233,74,125,89,176,176,250,176,233,74,125,89 +.byte 233,233,131,233,27,106,207,242,233,233,131,233,27,106,207,242 +.byte 15,15,60,15,120,51,30,119,15,15,60,15,120,51,30,119 +.byte 213,213,115,213,230,166,183,51,213,213,115,213,230,166,183,51 +.byte 128,128,58,128,116,186,29,244,128,128,58,128,116,186,29,244 +.byte 190,190,194,190,153,124,97,39,190,190,194,190,153,124,97,39 +.byte 205,205,19,205,38,222,135,235,205,205,19,205,38,222,135,235 +.byte 52,52,208,52,189,228,104,137,52,52,208,52,189,228,104,137 +.byte 72,72,61,72,122,117,144,50,72,72,61,72,122,117,144,50 +.byte 255,255,219,255,171,36,227,84,255,255,219,255,171,36,227,84 +.byte 122,122,245,122,247,143,244,141,122,122,245,122,247,143,244,141 +.byte 144,144,122,144,244,234,61,100,144,144,122,144,244,234,61,100 +.byte 95,95,97,95,194,62,190,157,95,95,97,95,194,62,190,157 +.byte 32,32,128,32,29,160,64,61,32,32,128,32,29,160,64,61 +.byte 104,104,189,104,103,213,208,15,104,104,189,104,103,213,208,15 +.byte 26,26,104,26,208,114,52,202,26,26,104,26,208,114,52,202 +.byte 174,174,130,174,25,44,65,183,174,174,130,174,25,44,65,183 +.byte 180,180,234,180,201,94,117,125,180,180,234,180,201,94,117,125 +.byte 84,84,77,84,154,25,168,206,84,84,77,84,154,25,168,206 +.byte 147,147,118,147,236,229,59,127,147,147,118,147,236,229,59,127 +.byte 34,34,136,34,13,170,68,47,34,34,136,34,13,170,68,47 +.byte 100,100,141,100,7,233,200,99,100,100,141,100,7,233,200,99 +.byte 241,241,227,241,219,18,255,42,241,241,227,241,219,18,255,42 +.byte 115,115,209,115,191,162,230,204,115,115,209,115,191,162,230,204 +.byte 18,18,72,18,144,90,36,130,18,18,72,18,144,90,36,130 +.byte 64,64,29,64,58,93,128,122,64,64,29,64,58,93,128,122 +.byte 8,8,32,8,64,40,16,72,8,8,32,8,64,40,16,72 +.byte 195,195,43,195,86,232,155,149,195,195,43,195,86,232,155,149 +.byte 236,236,151,236,51,123,197,223,236,236,151,236,51,123,197,223 +.byte 219,219,75,219,150,144,171,77,219,219,75,219,150,144,171,77 +.byte 161,161,190,161,97,31,95,192,161,161,190,161,97,31,95,192 +.byte 141,141,14,141,28,131,7,145,141,141,14,141,28,131,7,145 +.byte 61,61,244,61,245,201,122,200,61,61,244,61,245,201,122,200 +.byte 151,151,102,151,204,241,51,91,151,151,102,151,204,241,51,91 +.byte 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 +.byte 207,207,27,207,54,212,131,249,207,207,27,207,54,212,131,249 +.byte 43,43,172,43,69,135,86,110,43,43,172,43,69,135,86,110 +.byte 118,118,197,118,151,179,236,225,118,118,197,118,151,179,236,225 +.byte 130,130,50,130,100,176,25,230,130,130,50,130,100,176,25,230 +.byte 214,214,127,214,254,169,177,40,214,214,127,214,254,169,177,40 +.byte 27,27,108,27,216,119,54,195,27,27,108,27,216,119,54,195 +.byte 181,181,238,181,193,91,119,116,181,181,238,181,193,91,119,116 +.byte 175,175,134,175,17,41,67,190,175,175,134,175,17,41,67,190 +.byte 106,106,181,106,119,223,212,29,106,106,181,106,119,223,212,29 +.byte 80,80,93,80,186,13,160,234,80,80,93,80,186,13,160,234 +.byte 69,69,9,69,18,76,138,87,69,69,9,69,18,76,138,87 +.byte 243,243,235,243,203,24,251,56,243,243,235,243,203,24,251,56 +.byte 48,48,192,48,157,240,96,173,48,48,192,48,157,240,96,173 +.byte 239,239,155,239,43,116,195,196,239,239,155,239,43,116,195,196 +.byte 63,63,252,63,229,195,126,218,63,63,252,63,229,195,126,218 +.byte 85,85,73,85,146,28,170,199,85,85,73,85,146,28,170,199 +.byte 162,162,178,162,121,16,89,219,162,162,178,162,121,16,89,219 +.byte 234,234,143,234,3,101,201,233,234,234,143,234,3,101,201,233 +.byte 101,101,137,101,15,236,202,106,101,101,137,101,15,236,202,106 +.byte 186,186,210,186,185,104,105,3,186,186,210,186,185,104,105,3 +.byte 47,47,188,47,101,147,94,74,47,47,188,47,101,147,94,74 +.byte 192,192,39,192,78,231,157,142,192,192,39,192,78,231,157,142 +.byte 222,222,95,222,190,129,161,96,222,222,95,222,190,129,161,96 +.byte 28,28,112,28,224,108,56,252,28,28,112,28,224,108,56,252 +.byte 253,253,211,253,187,46,231,70,253,253,211,253,187,46,231,70 +.byte 77,77,41,77,82,100,154,31,77,77,41,77,82,100,154,31 +.byte 146,146,114,146,228,224,57,118,146,146,114,146,228,224,57,118 +.byte 117,117,201,117,143,188,234,250,117,117,201,117,143,188,234,250 +.byte 6,6,24,6,48,30,12,54,6,6,24,6,48,30,12,54 +.byte 138,138,18,138,36,152,9,174,138,138,18,138,36,152,9,174 +.byte 178,178,242,178,249,64,121,75,178,178,242,178,249,64,121,75 +.byte 230,230,191,230,99,89,209,133,230,230,191,230,99,89,209,133 +.byte 14,14,56,14,112,54,28,126,14,14,56,14,112,54,28,126 +.byte 31,31,124,31,248,99,62,231,31,31,124,31,248,99,62,231 +.byte 98,98,149,98,55,247,196,85,98,98,149,98,55,247,196,85 +.byte 212,212,119,212,238,163,181,58,212,212,119,212,238,163,181,58 +.byte 168,168,154,168,41,50,77,129,168,168,154,168,41,50,77,129 +.byte 150,150,98,150,196,244,49,82,150,150,98,150,196,244,49,82 +.byte 249,249,195,249,155,58,239,98,249,249,195,249,155,58,239,98 +.byte 197,197,51,197,102,246,151,163,197,197,51,197,102,246,151,163 +.byte 37,37,148,37,53,177,74,16,37,37,148,37,53,177,74,16 +.byte 89,89,121,89,242,32,178,171,89,89,121,89,242,32,178,171 +.byte 132,132,42,132,84,174,21,208,132,132,42,132,84,174,21,208 +.byte 114,114,213,114,183,167,228,197,114,114,213,114,183,167,228,197 +.byte 57,57,228,57,213,221,114,236,57,57,228,57,213,221,114,236 +.byte 76,76,45,76,90,97,152,22,76,76,45,76,90,97,152,22 +.byte 94,94,101,94,202,59,188,148,94,94,101,94,202,59,188,148 +.byte 120,120,253,120,231,133,240,159,120,120,253,120,231,133,240,159 +.byte 56,56,224,56,221,216,112,229,56,56,224,56,221,216,112,229 +.byte 140,140,10,140,20,134,5,152,140,140,10,140,20,134,5,152 +.byte 209,209,99,209,198,178,191,23,209,209,99,209,198,178,191,23 +.byte 165,165,174,165,65,11,87,228,165,165,174,165,65,11,87,228 +.byte 226,226,175,226,67,77,217,161,226,226,175,226,67,77,217,161 +.byte 97,97,153,97,47,248,194,78,97,97,153,97,47,248,194,78 +.byte 179,179,246,179,241,69,123,66,179,179,246,179,241,69,123,66 +.byte 33,33,132,33,21,165,66,52,33,33,132,33,21,165,66,52 +.byte 156,156,74,156,148,214,37,8,156,156,74,156,148,214,37,8 +.byte 30,30,120,30,240,102,60,238,30,30,120,30,240,102,60,238 +.byte 67,67,17,67,34,82,134,97,67,67,17,67,34,82,134,97 +.byte 199,199,59,199,118,252,147,177,199,199,59,199,118,252,147,177 +.byte 252,252,215,252,179,43,229,79,252,252,215,252,179,43,229,79 +.byte 4,4,16,4,32,20,8,36,4,4,16,4,32,20,8,36 +.byte 81,81,89,81,178,8,162,227,81,81,89,81,178,8,162,227 +.byte 153,153,94,153,188,199,47,37,153,153,94,153,188,199,47,37 +.byte 109,109,169,109,79,196,218,34,109,109,169,109,79,196,218,34 +.byte 13,13,52,13,104,57,26,101,13,13,52,13,104,57,26,101 +.byte 250,250,207,250,131,53,233,121,250,250,207,250,131,53,233,121 +.byte 223,223,91,223,182,132,163,105,223,223,91,223,182,132,163,105 +.byte 126,126,229,126,215,155,252,169,126,126,229,126,215,155,252,169 +.byte 36,36,144,36,61,180,72,25,36,36,144,36,61,180,72,25 +.byte 59,59,236,59,197,215,118,254,59,59,236,59,197,215,118,254 +.byte 171,171,150,171,49,61,75,154,171,171,150,171,49,61,75,154 +.byte 206,206,31,206,62,209,129,240,206,206,31,206,62,209,129,240 +.byte 17,17,68,17,136,85,34,153,17,17,68,17,136,85,34,153 +.byte 143,143,6,143,12,137,3,131,143,143,6,143,12,137,3,131 +.byte 78,78,37,78,74,107,156,4,78,78,37,78,74,107,156,4 +.byte 183,183,230,183,209,81,115,102,183,183,230,183,209,81,115,102 +.byte 235,235,139,235,11,96,203,224,235,235,139,235,11,96,203,224 +.byte 60,60,240,60,253,204,120,193,60,60,240,60,253,204,120,193 +.byte 129,129,62,129,124,191,31,253,129,129,62,129,124,191,31,253 +.byte 148,148,106,148,212,254,53,64,148,148,106,148,212,254,53,64 +.byte 247,247,251,247,235,12,243,28,247,247,251,247,235,12,243,28 +.byte 185,185,222,185,161,103,111,24,185,185,222,185,161,103,111,24 +.byte 19,19,76,19,152,95,38,139,19,19,76,19,152,95,38,139 +.byte 44,44,176,44,125,156,88,81,44,44,176,44,125,156,88,81 +.byte 211,211,107,211,214,184,187,5,211,211,107,211,214,184,187,5 +.byte 231,231,187,231,107,92,211,140,231,231,187,231,107,92,211,140 +.byte 110,110,165,110,87,203,220,57,110,110,165,110,87,203,220,57 +.byte 196,196,55,196,110,243,149,170,196,196,55,196,110,243,149,170 +.byte 3,3,12,3,24,15,6,27,3,3,12,3,24,15,6,27 +.byte 86,86,69,86,138,19,172,220,86,86,69,86,138,19,172,220 +.byte 68,68,13,68,26,73,136,94,68,68,13,68,26,73,136,94 +.byte 127,127,225,127,223,158,254,160,127,127,225,127,223,158,254,160 +.byte 169,169,158,169,33,55,79,136,169,169,158,169,33,55,79,136 +.byte 42,42,168,42,77,130,84,103,42,42,168,42,77,130,84,103 +.byte 187,187,214,187,177,109,107,10,187,187,214,187,177,109,107,10 +.byte 193,193,35,193,70,226,159,135,193,193,35,193,70,226,159,135 +.byte 83,83,81,83,162,2,166,241,83,83,81,83,162,2,166,241 +.byte 220,220,87,220,174,139,165,114,220,220,87,220,174,139,165,114 +.byte 11,11,44,11,88,39,22,83,11,11,44,11,88,39,22,83 +.byte 157,157,78,157,156,211,39,1,157,157,78,157,156,211,39,1 +.byte 108,108,173,108,71,193,216,43,108,108,173,108,71,193,216,43 +.byte 49,49,196,49,149,245,98,164,49,49,196,49,149,245,98,164 +.byte 116,116,205,116,135,185,232,243,116,116,205,116,135,185,232,243 +.byte 246,246,255,246,227,9,241,21,246,246,255,246,227,9,241,21 +.byte 70,70,5,70,10,67,140,76,70,70,5,70,10,67,140,76 +.byte 172,172,138,172,9,38,69,165,172,172,138,172,9,38,69,165 +.byte 137,137,30,137,60,151,15,181,137,137,30,137,60,151,15,181 +.byte 20,20,80,20,160,68,40,180,20,20,80,20,160,68,40,180 +.byte 225,225,163,225,91,66,223,186,225,225,163,225,91,66,223,186 +.byte 22,22,88,22,176,78,44,166,22,22,88,22,176,78,44,166 +.byte 58,58,232,58,205,210,116,247,58,58,232,58,205,210,116,247 +.byte 105,105,185,105,111,208,210,6,105,105,185,105,111,208,210,6 +.byte 9,9,36,9,72,45,18,65,9,9,36,9,72,45,18,65 +.byte 112,112,221,112,167,173,224,215,112,112,221,112,167,173,224,215 +.byte 182,182,226,182,217,84,113,111,182,182,226,182,217,84,113,111 +.byte 208,208,103,208,206,183,189,30,208,208,103,208,206,183,189,30 +.byte 237,237,147,237,59,126,199,214,237,237,147,237,59,126,199,214 +.byte 204,204,23,204,46,219,133,226,204,204,23,204,46,219,133,226 +.byte 66,66,21,66,42,87,132,104,66,66,21,66,42,87,132,104 +.byte 152,152,90,152,180,194,45,44,152,152,90,152,180,194,45,44 +.byte 164,164,170,164,73,14,85,237,164,164,170,164,73,14,85,237 +.byte 40,40,160,40,93,136,80,117,40,40,160,40,93,136,80,117 +.byte 92,92,109,92,218,49,184,134,92,92,109,92,218,49,184,134 +.byte 248,248,199,248,147,63,237,107,248,248,199,248,147,63,237,107 +.byte 134,134,34,134,68,164,17,194,134,134,34,134,68,164,17,194 +.byte 24,35,198,232,135,184,1,79 +.byte 54,166,210,245,121,111,145,82 +.byte 96,188,155,142,163,12,123,53 +.byte 29,224,215,194,46,75,254,87 +.byte 21,119,55,229,159,240,74,218 +.byte 88,201,41,10,177,160,107,133 +.byte 189,93,16,244,203,62,5,103 +.byte 228,39,65,139,167,125,149,216 +.byte 251,238,124,102,221,23,71,158 +.byte 202,45,191,7,173,90,131,51 diff --git a/deps/openssl/asm_obsolete/x64-macosx-gas/x86_64cpuid.s b/deps/openssl/asm_obsolete/x64-macosx-gas/x86_64cpuid.s new file mode 100644 index 00000000000000..5d69baad8f4ab7 --- /dev/null +++ b/deps/openssl/asm_obsolete/x64-macosx-gas/x86_64cpuid.s @@ -0,0 +1,259 @@ + +.private_extern _OPENSSL_cpuid_setup +.mod_init_func + .p2align 3 + .quad _OPENSSL_cpuid_setup + +.private_extern _OPENSSL_ia32cap_P +.comm _OPENSSL_ia32cap_P,16,2 + +.text + +.globl _OPENSSL_atomic_add + +.p2align 4 +_OPENSSL_atomic_add: + movl (%rdi),%eax +L$spin: leaq (%rsi,%rax,1),%r8 +.byte 0xf0 + cmpxchgl %r8d,(%rdi) + jne L$spin + movl %r8d,%eax +.byte 0x48,0x98 + .byte 0xf3,0xc3 + + +.globl _OPENSSL_rdtsc + +.p2align 4 +_OPENSSL_rdtsc: + rdtsc + shlq $32,%rdx + orq %rdx,%rax + .byte 0xf3,0xc3 + + +.globl _OPENSSL_ia32_cpuid + +.p2align 4 +_OPENSSL_ia32_cpuid: + movq %rbx,%r8 + + xorl %eax,%eax + movl %eax,8(%rdi) + cpuid + movl %eax,%r11d + + xorl %eax,%eax + cmpl $1970169159,%ebx + setne %al + movl %eax,%r9d + cmpl $1231384169,%edx + setne %al + orl %eax,%r9d + cmpl $1818588270,%ecx + setne %al + orl %eax,%r9d + jz L$intel + + cmpl $1752462657,%ebx + setne %al + movl %eax,%r10d + cmpl $1769238117,%edx + setne %al + orl %eax,%r10d + cmpl $1145913699,%ecx + setne %al + orl %eax,%r10d + jnz L$intel + + + movl $2147483648,%eax + cpuid + cmpl $2147483649,%eax + jb L$intel + movl %eax,%r10d + movl $2147483649,%eax + cpuid + orl %ecx,%r9d + andl $2049,%r9d + + cmpl $2147483656,%r10d + jb L$intel + + movl $2147483656,%eax + cpuid + movzbq %cl,%r10 + incq %r10 + + movl $1,%eax + cpuid + btl $28,%edx + jnc L$generic + shrl $16,%ebx + cmpb %r10b,%bl + ja L$generic + andl $4026531839,%edx + jmp L$generic + +L$intel: + cmpl $4,%r11d + movl $-1,%r10d + jb L$nocacheinfo + + movl $4,%eax + movl $0,%ecx + cpuid + movl %eax,%r10d + shrl $14,%r10d + andl $4095,%r10d + + cmpl $7,%r11d + jb L$nocacheinfo + + movl $7,%eax + xorl %ecx,%ecx + cpuid + movl %ebx,8(%rdi) + +L$nocacheinfo: + movl $1,%eax + cpuid + andl $3220176895,%edx + cmpl $0,%r9d + jne L$notintel + orl $1073741824,%edx + andb $15,%ah + cmpb $15,%ah + jne L$notintel + orl $1048576,%edx +L$notintel: + btl $28,%edx + jnc L$generic + andl $4026531839,%edx + cmpl $0,%r10d + je L$generic + + orl $268435456,%edx + shrl $16,%ebx + cmpb $1,%bl + ja L$generic + andl $4026531839,%edx +L$generic: + andl $2048,%r9d + andl $4294965247,%ecx + orl %ecx,%r9d + + movl %edx,%r10d + btl $27,%r9d + jnc L$clear_avx + xorl %ecx,%ecx +.byte 0x0f,0x01,0xd0 + andl $6,%eax + cmpl $6,%eax + je L$done +L$clear_avx: + movl $4026525695,%eax + andl %eax,%r9d + andl $4294967263,8(%rdi) +L$done: + shlq $32,%r9 + movl %r10d,%eax + movq %r8,%rbx + orq %r9,%rax + .byte 0xf3,0xc3 + + +.globl _OPENSSL_cleanse + +.p2align 4 +_OPENSSL_cleanse: + xorq %rax,%rax + cmpq $15,%rsi + jae L$ot + cmpq $0,%rsi + je L$ret +L$ittle: + movb %al,(%rdi) + subq $1,%rsi + leaq 1(%rdi),%rdi + jnz L$ittle +L$ret: + .byte 0xf3,0xc3 +.p2align 4 +L$ot: + testq $7,%rdi + jz L$aligned + movb %al,(%rdi) + leaq -1(%rsi),%rsi + leaq 1(%rdi),%rdi + jmp L$ot +L$aligned: + movq %rax,(%rdi) + leaq -8(%rsi),%rsi + testq $-8,%rsi + leaq 8(%rdi),%rdi + jnz L$aligned + cmpq $0,%rsi + jne L$ittle + .byte 0xf3,0xc3 + +.globl _OPENSSL_wipe_cpu + +.p2align 4 +_OPENSSL_wipe_cpu: + pxor %xmm0,%xmm0 + pxor %xmm1,%xmm1 + pxor %xmm2,%xmm2 + pxor %xmm3,%xmm3 + pxor %xmm4,%xmm4 + pxor %xmm5,%xmm5 + pxor %xmm6,%xmm6 + pxor %xmm7,%xmm7 + pxor %xmm8,%xmm8 + pxor %xmm9,%xmm9 + pxor %xmm10,%xmm10 + pxor %xmm11,%xmm11 + pxor %xmm12,%xmm12 + pxor %xmm13,%xmm13 + pxor %xmm14,%xmm14 + pxor %xmm15,%xmm15 + xorq %rcx,%rcx + xorq %rdx,%rdx + xorq %rsi,%rsi + xorq %rdi,%rdi + xorq %r8,%r8 + xorq %r9,%r9 + xorq %r10,%r10 + xorq %r11,%r11 + leaq 8(%rsp),%rax + .byte 0xf3,0xc3 + +.globl _OPENSSL_ia32_rdrand + +.p2align 4 +_OPENSSL_ia32_rdrand: + movl $8,%ecx +L$oop_rdrand: +.byte 72,15,199,240 + jc L$break_rdrand + loop L$oop_rdrand +L$break_rdrand: + cmpq $0,%rax + cmoveq %rcx,%rax + .byte 0xf3,0xc3 + + +.globl _OPENSSL_ia32_rdseed + +.p2align 4 +_OPENSSL_ia32_rdseed: + movl $8,%ecx +L$oop_rdseed: +.byte 72,15,199,248 + jc L$break_rdseed + loop L$oop_rdseed +L$break_rdseed: + cmpq $0,%rax + cmoveq %rcx,%rax + .byte 0xf3,0xc3 diff --git a/deps/openssl/asm_obsolete/x64-win32-masm/aes/aes-x86_64.asm b/deps/openssl/asm_obsolete/x64-win32-masm/aes/aes-x86_64.asm new file mode 100644 index 00000000000000..64f07ad589a53d --- /dev/null +++ b/deps/openssl/asm_obsolete/x64-win32-masm/aes/aes-x86_64.asm @@ -0,0 +1,2870 @@ +OPTION DOTNAME +.text$ SEGMENT ALIGN(256) 'CODE' + +ALIGN 16 +_x86_64_AES_encrypt PROC PRIVATE + xor eax,DWORD PTR[r15] + xor ebx,DWORD PTR[4+r15] + xor ecx,DWORD PTR[8+r15] + xor edx,DWORD PTR[12+r15] + + mov r13d,DWORD PTR[240+r15] + sub r13d,1 + jmp $L$enc_loop +ALIGN 16 +$L$enc_loop:: + + movzx esi,al + movzx edi,bl + movzx ebp,cl + mov r10d,DWORD PTR[rsi*8+r14] + mov r11d,DWORD PTR[rdi*8+r14] + mov r12d,DWORD PTR[rbp*8+r14] + + movzx esi,bh + movzx edi,ch + movzx ebp,dl + xor r10d,DWORD PTR[3+rsi*8+r14] + xor r11d,DWORD PTR[3+rdi*8+r14] + mov r8d,DWORD PTR[rbp*8+r14] + + movzx esi,dh + shr ecx,16 + movzx ebp,ah + xor r12d,DWORD PTR[3+rsi*8+r14] + shr edx,16 + xor r8d,DWORD PTR[3+rbp*8+r14] + + shr ebx,16 + lea r15,QWORD PTR[16+r15] + shr eax,16 + + movzx esi,cl + movzx edi,dl + movzx ebp,al + xor r10d,DWORD PTR[2+rsi*8+r14] + xor r11d,DWORD PTR[2+rdi*8+r14] + xor r12d,DWORD PTR[2+rbp*8+r14] + + movzx esi,dh + movzx edi,ah + movzx ebp,bl + xor r10d,DWORD PTR[1+rsi*8+r14] + xor r11d,DWORD PTR[1+rdi*8+r14] + xor r8d,DWORD PTR[2+rbp*8+r14] + + mov edx,DWORD PTR[12+r15] + movzx edi,bh + movzx ebp,ch + mov eax,DWORD PTR[r15] + xor r12d,DWORD PTR[1+rdi*8+r14] + xor r8d,DWORD PTR[1+rbp*8+r14] + + mov ebx,DWORD PTR[4+r15] + mov ecx,DWORD PTR[8+r15] + xor eax,r10d + xor ebx,r11d + xor ecx,r12d + xor edx,r8d + sub r13d,1 + jnz $L$enc_loop + movzx esi,al + movzx edi,bl + movzx ebp,cl + movzx r10d,BYTE PTR[2+rsi*8+r14] + movzx r11d,BYTE PTR[2+rdi*8+r14] + movzx r12d,BYTE PTR[2+rbp*8+r14] + + movzx esi,dl + movzx edi,bh + movzx ebp,ch + movzx r8d,BYTE PTR[2+rsi*8+r14] + mov edi,DWORD PTR[rdi*8+r14] + mov ebp,DWORD PTR[rbp*8+r14] + + and edi,00000ff00h + and ebp,00000ff00h + + xor r10d,edi + xor r11d,ebp + shr ecx,16 + + movzx esi,dh + movzx edi,ah + shr edx,16 + mov esi,DWORD PTR[rsi*8+r14] + mov edi,DWORD PTR[rdi*8+r14] + + and esi,00000ff00h + and edi,00000ff00h + shr ebx,16 + xor r12d,esi + xor r8d,edi + shr eax,16 + + movzx esi,cl + movzx edi,dl + movzx ebp,al + mov esi,DWORD PTR[rsi*8+r14] + mov edi,DWORD PTR[rdi*8+r14] + mov ebp,DWORD PTR[rbp*8+r14] + + and esi,000ff0000h + and edi,000ff0000h + and ebp,000ff0000h + + xor r10d,esi + xor r11d,edi + xor r12d,ebp + + movzx esi,bl + movzx edi,dh + movzx ebp,ah + mov esi,DWORD PTR[rsi*8+r14] + mov edi,DWORD PTR[2+rdi*8+r14] + mov ebp,DWORD PTR[2+rbp*8+r14] + + and esi,000ff0000h + and edi,0ff000000h + and ebp,0ff000000h + + xor r8d,esi + xor r10d,edi + xor r11d,ebp + + movzx esi,bh + movzx edi,ch + mov edx,DWORD PTR[((16+12))+r15] + mov esi,DWORD PTR[2+rsi*8+r14] + mov edi,DWORD PTR[2+rdi*8+r14] + mov eax,DWORD PTR[((16+0))+r15] + + and esi,0ff000000h + and edi,0ff000000h + + xor r12d,esi + xor r8d,edi + + mov ebx,DWORD PTR[((16+4))+r15] + mov ecx,DWORD PTR[((16+8))+r15] + xor eax,r10d + xor ebx,r11d + xor ecx,r12d + xor edx,r8d +DB 0f3h,0c3h +_x86_64_AES_encrypt ENDP + +ALIGN 16 +_x86_64_AES_encrypt_compact PROC PRIVATE + lea r8,QWORD PTR[128+r14] + mov edi,DWORD PTR[((0-128))+r8] + mov ebp,DWORD PTR[((32-128))+r8] + mov r10d,DWORD PTR[((64-128))+r8] + mov r11d,DWORD PTR[((96-128))+r8] + mov edi,DWORD PTR[((128-128))+r8] + mov ebp,DWORD PTR[((160-128))+r8] + mov r10d,DWORD PTR[((192-128))+r8] + mov r11d,DWORD PTR[((224-128))+r8] + jmp $L$enc_loop_compact +ALIGN 16 +$L$enc_loop_compact:: + xor eax,DWORD PTR[r15] + xor ebx,DWORD PTR[4+r15] + xor ecx,DWORD PTR[8+r15] + xor edx,DWORD PTR[12+r15] + lea r15,QWORD PTR[16+r15] + movzx r10d,al + movzx r11d,bl + movzx r12d,cl + movzx r8d,dl + movzx esi,bh + movzx edi,ch + shr ecx,16 + movzx ebp,dh + movzx r10d,BYTE PTR[r10*1+r14] + movzx r11d,BYTE PTR[r11*1+r14] + movzx r12d,BYTE PTR[r12*1+r14] + movzx r8d,BYTE PTR[r8*1+r14] + + movzx r9d,BYTE PTR[rsi*1+r14] + movzx esi,ah + movzx r13d,BYTE PTR[rdi*1+r14] + movzx edi,cl + movzx ebp,BYTE PTR[rbp*1+r14] + movzx esi,BYTE PTR[rsi*1+r14] + + shl r9d,8 + shr edx,16 + shl r13d,8 + xor r10d,r9d + shr eax,16 + movzx r9d,dl + shr ebx,16 + xor r11d,r13d + shl ebp,8 + movzx r13d,al + movzx edi,BYTE PTR[rdi*1+r14] + xor r12d,ebp + + shl esi,8 + movzx ebp,bl + shl edi,16 + xor r8d,esi + movzx r9d,BYTE PTR[r9*1+r14] + movzx esi,dh + movzx r13d,BYTE PTR[r13*1+r14] + xor r10d,edi + + shr ecx,8 + movzx edi,ah + shl r9d,16 + shr ebx,8 + shl r13d,16 + xor r11d,r9d + movzx ebp,BYTE PTR[rbp*1+r14] + movzx esi,BYTE PTR[rsi*1+r14] + movzx edi,BYTE PTR[rdi*1+r14] + movzx edx,BYTE PTR[rcx*1+r14] + movzx ecx,BYTE PTR[rbx*1+r14] + + shl ebp,16 + xor r12d,r13d + shl esi,24 + xor r8d,ebp + shl edi,24 + xor r10d,esi + shl edx,24 + xor r11d,edi + shl ecx,24 + mov eax,r10d + mov ebx,r11d + xor ecx,r12d + xor edx,r8d + cmp r15,QWORD PTR[16+rsp] + je $L$enc_compact_done + mov r10d,080808080h + mov r11d,080808080h + and r10d,eax + and r11d,ebx + mov esi,r10d + mov edi,r11d + shr r10d,7 + lea r8d,DWORD PTR[rax*1+rax] + shr r11d,7 + lea r9d,DWORD PTR[rbx*1+rbx] + sub esi,r10d + sub edi,r11d + and r8d,0fefefefeh + and r9d,0fefefefeh + and esi,01b1b1b1bh + and edi,01b1b1b1bh + mov r10d,eax + mov r11d,ebx + xor r8d,esi + xor r9d,edi + + xor eax,r8d + xor ebx,r9d + mov r12d,080808080h + rol eax,24 + mov ebp,080808080h + rol ebx,24 + and r12d,ecx + and ebp,edx + xor eax,r8d + xor ebx,r9d + mov esi,r12d + ror r10d,16 + mov edi,ebp + ror r11d,16 + lea r8d,DWORD PTR[rcx*1+rcx] + shr r12d,7 + xor eax,r10d + shr ebp,7 + xor ebx,r11d + ror r10d,8 + lea r9d,DWORD PTR[rdx*1+rdx] + ror r11d,8 + sub esi,r12d + sub edi,ebp + xor eax,r10d + xor ebx,r11d + + and r8d,0fefefefeh + and r9d,0fefefefeh + and esi,01b1b1b1bh + and edi,01b1b1b1bh + mov r12d,ecx + mov ebp,edx + xor r8d,esi + xor r9d,edi + + ror r12d,16 + xor ecx,r8d + ror ebp,16 + xor edx,r9d + rol ecx,24 + mov esi,DWORD PTR[r14] + rol edx,24 + xor ecx,r8d + mov edi,DWORD PTR[64+r14] + xor edx,r9d + mov r8d,DWORD PTR[128+r14] + xor ecx,r12d + ror r12d,8 + xor edx,ebp + ror ebp,8 + xor ecx,r12d + mov r9d,DWORD PTR[192+r14] + xor edx,ebp + jmp $L$enc_loop_compact +ALIGN 16 +$L$enc_compact_done:: + xor eax,DWORD PTR[r15] + xor ebx,DWORD PTR[4+r15] + xor ecx,DWORD PTR[8+r15] + xor edx,DWORD PTR[12+r15] +DB 0f3h,0c3h +_x86_64_AES_encrypt_compact ENDP +PUBLIC AES_encrypt + +ALIGN 16 +PUBLIC asm_AES_encrypt + +asm_AES_encrypt:: +AES_encrypt PROC PUBLIC + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_AES_encrypt:: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + + + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + + + mov r10,rsp + lea rcx,QWORD PTR[((-63))+rdx] + and rsp,-64 + sub rcx,rsp + neg rcx + and rcx,03c0h + sub rsp,rcx + sub rsp,32 + + mov QWORD PTR[16+rsp],rsi + mov QWORD PTR[24+rsp],r10 +$L$enc_prologue:: + + mov r15,rdx + mov r13d,DWORD PTR[240+r15] + + mov eax,DWORD PTR[rdi] + mov ebx,DWORD PTR[4+rdi] + mov ecx,DWORD PTR[8+rdi] + mov edx,DWORD PTR[12+rdi] + + shl r13d,4 + lea rbp,QWORD PTR[r13*1+r15] + mov QWORD PTR[rsp],r15 + mov QWORD PTR[8+rsp],rbp + + + lea r14,QWORD PTR[(($L$AES_Te+2048))] + lea rbp,QWORD PTR[768+rsp] + sub rbp,r14 + and rbp,0300h + lea r14,QWORD PTR[rbp*1+r14] + + call _x86_64_AES_encrypt_compact + + mov r9,QWORD PTR[16+rsp] + mov rsi,QWORD PTR[24+rsp] + mov DWORD PTR[r9],eax + mov DWORD PTR[4+r9],ebx + mov DWORD PTR[8+r9],ecx + mov DWORD PTR[12+r9],edx + + mov r15,QWORD PTR[rsi] + mov r14,QWORD PTR[8+rsi] + mov r13,QWORD PTR[16+rsi] + mov r12,QWORD PTR[24+rsi] + mov rbp,QWORD PTR[32+rsi] + mov rbx,QWORD PTR[40+rsi] + lea rsp,QWORD PTR[48+rsi] +$L$enc_epilogue:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_AES_encrypt:: +AES_encrypt ENDP + +ALIGN 16 +_x86_64_AES_decrypt PROC PRIVATE + xor eax,DWORD PTR[r15] + xor ebx,DWORD PTR[4+r15] + xor ecx,DWORD PTR[8+r15] + xor edx,DWORD PTR[12+r15] + + mov r13d,DWORD PTR[240+r15] + sub r13d,1 + jmp $L$dec_loop +ALIGN 16 +$L$dec_loop:: + + movzx esi,al + movzx edi,bl + movzx ebp,cl + mov r10d,DWORD PTR[rsi*8+r14] + mov r11d,DWORD PTR[rdi*8+r14] + mov r12d,DWORD PTR[rbp*8+r14] + + movzx esi,dh + movzx edi,ah + movzx ebp,dl + xor r10d,DWORD PTR[3+rsi*8+r14] + xor r11d,DWORD PTR[3+rdi*8+r14] + mov r8d,DWORD PTR[rbp*8+r14] + + movzx esi,bh + shr eax,16 + movzx ebp,ch + xor r12d,DWORD PTR[3+rsi*8+r14] + shr edx,16 + xor r8d,DWORD PTR[3+rbp*8+r14] + + shr ebx,16 + lea r15,QWORD PTR[16+r15] + shr ecx,16 + + movzx esi,cl + movzx edi,dl + movzx ebp,al + xor r10d,DWORD PTR[2+rsi*8+r14] + xor r11d,DWORD PTR[2+rdi*8+r14] + xor r12d,DWORD PTR[2+rbp*8+r14] + + movzx esi,bh + movzx edi,ch + movzx ebp,bl + xor r10d,DWORD PTR[1+rsi*8+r14] + xor r11d,DWORD PTR[1+rdi*8+r14] + xor r8d,DWORD PTR[2+rbp*8+r14] + + movzx esi,dh + mov edx,DWORD PTR[12+r15] + movzx ebp,ah + xor r12d,DWORD PTR[1+rsi*8+r14] + mov eax,DWORD PTR[r15] + xor r8d,DWORD PTR[1+rbp*8+r14] + + xor eax,r10d + mov ebx,DWORD PTR[4+r15] + mov ecx,DWORD PTR[8+r15] + xor ecx,r12d + xor ebx,r11d + xor edx,r8d + sub r13d,1 + jnz $L$dec_loop + lea r14,QWORD PTR[2048+r14] + movzx esi,al + movzx edi,bl + movzx ebp,cl + movzx r10d,BYTE PTR[rsi*1+r14] + movzx r11d,BYTE PTR[rdi*1+r14] + movzx r12d,BYTE PTR[rbp*1+r14] + + movzx esi,dl + movzx edi,dh + movzx ebp,ah + movzx r8d,BYTE PTR[rsi*1+r14] + movzx edi,BYTE PTR[rdi*1+r14] + movzx ebp,BYTE PTR[rbp*1+r14] + + shl edi,8 + shl ebp,8 + + xor r10d,edi + xor r11d,ebp + shr edx,16 + + movzx esi,bh + movzx edi,ch + shr eax,16 + movzx esi,BYTE PTR[rsi*1+r14] + movzx edi,BYTE PTR[rdi*1+r14] + + shl esi,8 + shl edi,8 + shr ebx,16 + xor r12d,esi + xor r8d,edi + shr ecx,16 + + movzx esi,cl + movzx edi,dl + movzx ebp,al + movzx esi,BYTE PTR[rsi*1+r14] + movzx edi,BYTE PTR[rdi*1+r14] + movzx ebp,BYTE PTR[rbp*1+r14] + + shl esi,16 + shl edi,16 + shl ebp,16 + + xor r10d,esi + xor r11d,edi + xor r12d,ebp + + movzx esi,bl + movzx edi,bh + movzx ebp,ch + movzx esi,BYTE PTR[rsi*1+r14] + movzx edi,BYTE PTR[rdi*1+r14] + movzx ebp,BYTE PTR[rbp*1+r14] + + shl esi,16 + shl edi,24 + shl ebp,24 + + xor r8d,esi + xor r10d,edi + xor r11d,ebp + + movzx esi,dh + movzx edi,ah + mov edx,DWORD PTR[((16+12))+r15] + movzx esi,BYTE PTR[rsi*1+r14] + movzx edi,BYTE PTR[rdi*1+r14] + mov eax,DWORD PTR[((16+0))+r15] + + shl esi,24 + shl edi,24 + + xor r12d,esi + xor r8d,edi + + mov ebx,DWORD PTR[((16+4))+r15] + mov ecx,DWORD PTR[((16+8))+r15] + lea r14,QWORD PTR[((-2048))+r14] + xor eax,r10d + xor ebx,r11d + xor ecx,r12d + xor edx,r8d +DB 0f3h,0c3h +_x86_64_AES_decrypt ENDP + +ALIGN 16 +_x86_64_AES_decrypt_compact PROC PRIVATE + lea r8,QWORD PTR[128+r14] + mov edi,DWORD PTR[((0-128))+r8] + mov ebp,DWORD PTR[((32-128))+r8] + mov r10d,DWORD PTR[((64-128))+r8] + mov r11d,DWORD PTR[((96-128))+r8] + mov edi,DWORD PTR[((128-128))+r8] + mov ebp,DWORD PTR[((160-128))+r8] + mov r10d,DWORD PTR[((192-128))+r8] + mov r11d,DWORD PTR[((224-128))+r8] + jmp $L$dec_loop_compact + +ALIGN 16 +$L$dec_loop_compact:: + xor eax,DWORD PTR[r15] + xor ebx,DWORD PTR[4+r15] + xor ecx,DWORD PTR[8+r15] + xor edx,DWORD PTR[12+r15] + lea r15,QWORD PTR[16+r15] + movzx r10d,al + movzx r11d,bl + movzx r12d,cl + movzx r8d,dl + movzx esi,dh + movzx edi,ah + shr edx,16 + movzx ebp,bh + movzx r10d,BYTE PTR[r10*1+r14] + movzx r11d,BYTE PTR[r11*1+r14] + movzx r12d,BYTE PTR[r12*1+r14] + movzx r8d,BYTE PTR[r8*1+r14] + + movzx r9d,BYTE PTR[rsi*1+r14] + movzx esi,ch + movzx r13d,BYTE PTR[rdi*1+r14] + movzx ebp,BYTE PTR[rbp*1+r14] + movzx esi,BYTE PTR[rsi*1+r14] + + shr ecx,16 + shl r13d,8 + shl r9d,8 + movzx edi,cl + shr eax,16 + xor r10d,r9d + shr ebx,16 + movzx r9d,dl + + shl ebp,8 + xor r11d,r13d + shl esi,8 + movzx r13d,al + movzx edi,BYTE PTR[rdi*1+r14] + xor r12d,ebp + movzx ebp,bl + + shl edi,16 + xor r8d,esi + movzx r9d,BYTE PTR[r9*1+r14] + movzx esi,bh + movzx ebp,BYTE PTR[rbp*1+r14] + xor r10d,edi + movzx r13d,BYTE PTR[r13*1+r14] + movzx edi,ch + + shl ebp,16 + shl r9d,16 + shl r13d,16 + xor r8d,ebp + movzx ebp,dh + xor r11d,r9d + shr eax,8 + xor r12d,r13d + + movzx esi,BYTE PTR[rsi*1+r14] + movzx ebx,BYTE PTR[rdi*1+r14] + movzx ecx,BYTE PTR[rbp*1+r14] + movzx edx,BYTE PTR[rax*1+r14] + + mov eax,r10d + shl esi,24 + shl ebx,24 + shl ecx,24 + xor eax,esi + shl edx,24 + xor ebx,r11d + xor ecx,r12d + xor edx,r8d + cmp r15,QWORD PTR[16+rsp] + je $L$dec_compact_done + + mov rsi,QWORD PTR[((256+0))+r14] + shl rbx,32 + shl rdx,32 + mov rdi,QWORD PTR[((256+8))+r14] + or rax,rbx + or rcx,rdx + mov rbp,QWORD PTR[((256+16))+r14] + mov r9,rsi + mov r12,rsi + and r9,rax + and r12,rcx + mov rbx,r9 + mov rdx,r12 + shr r9,7 + lea r8,QWORD PTR[rax*1+rax] + shr r12,7 + lea r11,QWORD PTR[rcx*1+rcx] + sub rbx,r9 + sub rdx,r12 + and r8,rdi + and r11,rdi + and rbx,rbp + and rdx,rbp + xor r8,rbx + xor r11,rdx + mov r10,rsi + mov r13,rsi + + and r10,r8 + and r13,r11 + mov rbx,r10 + mov rdx,r13 + shr r10,7 + lea r9,QWORD PTR[r8*1+r8] + shr r13,7 + lea r12,QWORD PTR[r11*1+r11] + sub rbx,r10 + sub rdx,r13 + and r9,rdi + and r12,rdi + and rbx,rbp + and rdx,rbp + xor r9,rbx + xor r12,rdx + mov r10,rsi + mov r13,rsi + + and r10,r9 + and r13,r12 + mov rbx,r10 + mov rdx,r13 + shr r10,7 + xor r8,rax + shr r13,7 + xor r11,rcx + sub rbx,r10 + sub rdx,r13 + lea r10,QWORD PTR[r9*1+r9] + lea r13,QWORD PTR[r12*1+r12] + xor r9,rax + xor r12,rcx + and r10,rdi + and r13,rdi + and rbx,rbp + and rdx,rbp + xor r10,rbx + xor r13,rdx + + xor rax,r10 + xor rcx,r13 + xor r8,r10 + xor r11,r13 + mov rbx,rax + mov rdx,rcx + xor r9,r10 + shr rbx,32 + xor r12,r13 + shr rdx,32 + xor r10,r8 + rol eax,8 + xor r13,r11 + rol ecx,8 + xor r10,r9 + rol ebx,8 + xor r13,r12 + + rol edx,8 + xor eax,r10d + shr r10,32 + xor ecx,r13d + shr r13,32 + xor ebx,r10d + xor edx,r13d + + mov r10,r8 + rol r8d,24 + mov r13,r11 + rol r11d,24 + shr r10,32 + xor eax,r8d + shr r13,32 + xor ecx,r11d + rol r10d,24 + mov r8,r9 + rol r13d,24 + mov r11,r12 + shr r8,32 + xor ebx,r10d + shr r11,32 + xor edx,r13d + + mov rsi,QWORD PTR[r14] + rol r9d,16 + mov rdi,QWORD PTR[64+r14] + rol r12d,16 + mov rbp,QWORD PTR[128+r14] + rol r8d,16 + mov r10,QWORD PTR[192+r14] + xor eax,r9d + rol r11d,16 + xor ecx,r12d + mov r13,QWORD PTR[256+r14] + xor ebx,r8d + xor edx,r11d + jmp $L$dec_loop_compact +ALIGN 16 +$L$dec_compact_done:: + xor eax,DWORD PTR[r15] + xor ebx,DWORD PTR[4+r15] + xor ecx,DWORD PTR[8+r15] + xor edx,DWORD PTR[12+r15] +DB 0f3h,0c3h +_x86_64_AES_decrypt_compact ENDP +PUBLIC AES_decrypt + +ALIGN 16 +PUBLIC asm_AES_decrypt + +asm_AES_decrypt:: +AES_decrypt PROC PUBLIC + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_AES_decrypt:: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + + + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + + + mov r10,rsp + lea rcx,QWORD PTR[((-63))+rdx] + and rsp,-64 + sub rcx,rsp + neg rcx + and rcx,03c0h + sub rsp,rcx + sub rsp,32 + + mov QWORD PTR[16+rsp],rsi + mov QWORD PTR[24+rsp],r10 +$L$dec_prologue:: + + mov r15,rdx + mov r13d,DWORD PTR[240+r15] + + mov eax,DWORD PTR[rdi] + mov ebx,DWORD PTR[4+rdi] + mov ecx,DWORD PTR[8+rdi] + mov edx,DWORD PTR[12+rdi] + + shl r13d,4 + lea rbp,QWORD PTR[r13*1+r15] + mov QWORD PTR[rsp],r15 + mov QWORD PTR[8+rsp],rbp + + + lea r14,QWORD PTR[(($L$AES_Td+2048))] + lea rbp,QWORD PTR[768+rsp] + sub rbp,r14 + and rbp,0300h + lea r14,QWORD PTR[rbp*1+r14] + shr rbp,3 + add r14,rbp + + call _x86_64_AES_decrypt_compact + + mov r9,QWORD PTR[16+rsp] + mov rsi,QWORD PTR[24+rsp] + mov DWORD PTR[r9],eax + mov DWORD PTR[4+r9],ebx + mov DWORD PTR[8+r9],ecx + mov DWORD PTR[12+r9],edx + + mov r15,QWORD PTR[rsi] + mov r14,QWORD PTR[8+rsi] + mov r13,QWORD PTR[16+rsi] + mov r12,QWORD PTR[24+rsi] + mov rbp,QWORD PTR[32+rsi] + mov rbx,QWORD PTR[40+rsi] + lea rsp,QWORD PTR[48+rsi] +$L$dec_epilogue:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_AES_decrypt:: +AES_decrypt ENDP +PUBLIC private_AES_set_encrypt_key + +ALIGN 16 +private_AES_set_encrypt_key PROC PUBLIC + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_private_AES_set_encrypt_key:: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + + + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + sub rsp,8 +$L$enc_key_prologue:: + + call _x86_64_AES_set_encrypt_key + + mov rbp,QWORD PTR[40+rsp] + mov rbx,QWORD PTR[48+rsp] + add rsp,56 +$L$enc_key_epilogue:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_private_AES_set_encrypt_key:: +private_AES_set_encrypt_key ENDP + + +ALIGN 16 +_x86_64_AES_set_encrypt_key PROC PRIVATE + mov ecx,esi + mov rsi,rdi + mov rdi,rdx + + test rsi,-1 + jz $L$badpointer + test rdi,-1 + jz $L$badpointer + + lea rbp,QWORD PTR[$L$AES_Te] + lea rbp,QWORD PTR[((2048+128))+rbp] + + + mov eax,DWORD PTR[((0-128))+rbp] + mov ebx,DWORD PTR[((32-128))+rbp] + mov r8d,DWORD PTR[((64-128))+rbp] + mov edx,DWORD PTR[((96-128))+rbp] + mov eax,DWORD PTR[((128-128))+rbp] + mov ebx,DWORD PTR[((160-128))+rbp] + mov r8d,DWORD PTR[((192-128))+rbp] + mov edx,DWORD PTR[((224-128))+rbp] + + cmp ecx,128 + je $L$10rounds + cmp ecx,192 + je $L$12rounds + cmp ecx,256 + je $L$14rounds + mov rax,-2 + jmp $L$exit + +$L$10rounds:: + mov rax,QWORD PTR[rsi] + mov rdx,QWORD PTR[8+rsi] + mov QWORD PTR[rdi],rax + mov QWORD PTR[8+rdi],rdx + + shr rdx,32 + xor ecx,ecx + jmp $L$10shortcut +ALIGN 4 +$L$10loop:: + mov eax,DWORD PTR[rdi] + mov edx,DWORD PTR[12+rdi] +$L$10shortcut:: + movzx esi,dl + movzx ebx,BYTE PTR[((-128))+rsi*1+rbp] + movzx esi,dh + shl ebx,24 + xor eax,ebx + + movzx ebx,BYTE PTR[((-128))+rsi*1+rbp] + shr edx,16 + movzx esi,dl + xor eax,ebx + + movzx ebx,BYTE PTR[((-128))+rsi*1+rbp] + movzx esi,dh + shl ebx,8 + xor eax,ebx + + movzx ebx,BYTE PTR[((-128))+rsi*1+rbp] + shl ebx,16 + xor eax,ebx + + xor eax,DWORD PTR[((1024-128))+rcx*4+rbp] + mov DWORD PTR[16+rdi],eax + xor eax,DWORD PTR[4+rdi] + mov DWORD PTR[20+rdi],eax + xor eax,DWORD PTR[8+rdi] + mov DWORD PTR[24+rdi],eax + xor eax,DWORD PTR[12+rdi] + mov DWORD PTR[28+rdi],eax + add ecx,1 + lea rdi,QWORD PTR[16+rdi] + cmp ecx,10 + jl $L$10loop + + mov DWORD PTR[80+rdi],10 + xor rax,rax + jmp $L$exit + +$L$12rounds:: + mov rax,QWORD PTR[rsi] + mov rbx,QWORD PTR[8+rsi] + mov rdx,QWORD PTR[16+rsi] + mov QWORD PTR[rdi],rax + mov QWORD PTR[8+rdi],rbx + mov QWORD PTR[16+rdi],rdx + + shr rdx,32 + xor ecx,ecx + jmp $L$12shortcut +ALIGN 4 +$L$12loop:: + mov eax,DWORD PTR[rdi] + mov edx,DWORD PTR[20+rdi] +$L$12shortcut:: + movzx esi,dl + movzx ebx,BYTE PTR[((-128))+rsi*1+rbp] + movzx esi,dh + shl ebx,24 + xor eax,ebx + + movzx ebx,BYTE PTR[((-128))+rsi*1+rbp] + shr edx,16 + movzx esi,dl + xor eax,ebx + + movzx ebx,BYTE PTR[((-128))+rsi*1+rbp] + movzx esi,dh + shl ebx,8 + xor eax,ebx + + movzx ebx,BYTE PTR[((-128))+rsi*1+rbp] + shl ebx,16 + xor eax,ebx + + xor eax,DWORD PTR[((1024-128))+rcx*4+rbp] + mov DWORD PTR[24+rdi],eax + xor eax,DWORD PTR[4+rdi] + mov DWORD PTR[28+rdi],eax + xor eax,DWORD PTR[8+rdi] + mov DWORD PTR[32+rdi],eax + xor eax,DWORD PTR[12+rdi] + mov DWORD PTR[36+rdi],eax + + cmp ecx,7 + je $L$12break + add ecx,1 + + xor eax,DWORD PTR[16+rdi] + mov DWORD PTR[40+rdi],eax + xor eax,DWORD PTR[20+rdi] + mov DWORD PTR[44+rdi],eax + + lea rdi,QWORD PTR[24+rdi] + jmp $L$12loop +$L$12break:: + mov DWORD PTR[72+rdi],12 + xor rax,rax + jmp $L$exit + +$L$14rounds:: + mov rax,QWORD PTR[rsi] + mov rbx,QWORD PTR[8+rsi] + mov rcx,QWORD PTR[16+rsi] + mov rdx,QWORD PTR[24+rsi] + mov QWORD PTR[rdi],rax + mov QWORD PTR[8+rdi],rbx + mov QWORD PTR[16+rdi],rcx + mov QWORD PTR[24+rdi],rdx + + shr rdx,32 + xor ecx,ecx + jmp $L$14shortcut +ALIGN 4 +$L$14loop:: + mov eax,DWORD PTR[rdi] + mov edx,DWORD PTR[28+rdi] +$L$14shortcut:: + movzx esi,dl + movzx ebx,BYTE PTR[((-128))+rsi*1+rbp] + movzx esi,dh + shl ebx,24 + xor eax,ebx + + movzx ebx,BYTE PTR[((-128))+rsi*1+rbp] + shr edx,16 + movzx esi,dl + xor eax,ebx + + movzx ebx,BYTE PTR[((-128))+rsi*1+rbp] + movzx esi,dh + shl ebx,8 + xor eax,ebx + + movzx ebx,BYTE PTR[((-128))+rsi*1+rbp] + shl ebx,16 + xor eax,ebx + + xor eax,DWORD PTR[((1024-128))+rcx*4+rbp] + mov DWORD PTR[32+rdi],eax + xor eax,DWORD PTR[4+rdi] + mov DWORD PTR[36+rdi],eax + xor eax,DWORD PTR[8+rdi] + mov DWORD PTR[40+rdi],eax + xor eax,DWORD PTR[12+rdi] + mov DWORD PTR[44+rdi],eax + + cmp ecx,6 + je $L$14break + add ecx,1 + + mov edx,eax + mov eax,DWORD PTR[16+rdi] + movzx esi,dl + movzx ebx,BYTE PTR[((-128))+rsi*1+rbp] + movzx esi,dh + xor eax,ebx + + movzx ebx,BYTE PTR[((-128))+rsi*1+rbp] + shr edx,16 + shl ebx,8 + movzx esi,dl + xor eax,ebx + + movzx ebx,BYTE PTR[((-128))+rsi*1+rbp] + movzx esi,dh + shl ebx,16 + xor eax,ebx + + movzx ebx,BYTE PTR[((-128))+rsi*1+rbp] + shl ebx,24 + xor eax,ebx + + mov DWORD PTR[48+rdi],eax + xor eax,DWORD PTR[20+rdi] + mov DWORD PTR[52+rdi],eax + xor eax,DWORD PTR[24+rdi] + mov DWORD PTR[56+rdi],eax + xor eax,DWORD PTR[28+rdi] + mov DWORD PTR[60+rdi],eax + + lea rdi,QWORD PTR[32+rdi] + jmp $L$14loop +$L$14break:: + mov DWORD PTR[48+rdi],14 + xor rax,rax + jmp $L$exit + +$L$badpointer:: + mov rax,-1 +$L$exit:: +DB 0f3h,0c3h +_x86_64_AES_set_encrypt_key ENDP +PUBLIC private_AES_set_decrypt_key + +ALIGN 16 +private_AES_set_decrypt_key PROC PUBLIC + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_private_AES_set_decrypt_key:: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + + + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + push rdx +$L$dec_key_prologue:: + + call _x86_64_AES_set_encrypt_key + mov r8,QWORD PTR[rsp] + cmp eax,0 + jne $L$abort + + mov r14d,DWORD PTR[240+r8] + xor rdi,rdi + lea rcx,QWORD PTR[r14*4+rdi] + mov rsi,r8 + lea rdi,QWORD PTR[rcx*4+r8] +ALIGN 4 +$L$invert:: + mov rax,QWORD PTR[rsi] + mov rbx,QWORD PTR[8+rsi] + mov rcx,QWORD PTR[rdi] + mov rdx,QWORD PTR[8+rdi] + mov QWORD PTR[rdi],rax + mov QWORD PTR[8+rdi],rbx + mov QWORD PTR[rsi],rcx + mov QWORD PTR[8+rsi],rdx + lea rsi,QWORD PTR[16+rsi] + lea rdi,QWORD PTR[((-16))+rdi] + cmp rdi,rsi + jne $L$invert + + lea rax,QWORD PTR[(($L$AES_Te+2048+1024))] + + mov rsi,QWORD PTR[40+rax] + mov rdi,QWORD PTR[48+rax] + mov rbp,QWORD PTR[56+rax] + + mov r15,r8 + sub r14d,1 +ALIGN 4 +$L$permute:: + lea r15,QWORD PTR[16+r15] + mov rax,QWORD PTR[r15] + mov rcx,QWORD PTR[8+r15] + mov r9,rsi + mov r12,rsi + and r9,rax + and r12,rcx + mov rbx,r9 + mov rdx,r12 + shr r9,7 + lea r8,QWORD PTR[rax*1+rax] + shr r12,7 + lea r11,QWORD PTR[rcx*1+rcx] + sub rbx,r9 + sub rdx,r12 + and r8,rdi + and r11,rdi + and rbx,rbp + and rdx,rbp + xor r8,rbx + xor r11,rdx + mov r10,rsi + mov r13,rsi + + and r10,r8 + and r13,r11 + mov rbx,r10 + mov rdx,r13 + shr r10,7 + lea r9,QWORD PTR[r8*1+r8] + shr r13,7 + lea r12,QWORD PTR[r11*1+r11] + sub rbx,r10 + sub rdx,r13 + and r9,rdi + and r12,rdi + and rbx,rbp + and rdx,rbp + xor r9,rbx + xor r12,rdx + mov r10,rsi + mov r13,rsi + + and r10,r9 + and r13,r12 + mov rbx,r10 + mov rdx,r13 + shr r10,7 + xor r8,rax + shr r13,7 + xor r11,rcx + sub rbx,r10 + sub rdx,r13 + lea r10,QWORD PTR[r9*1+r9] + lea r13,QWORD PTR[r12*1+r12] + xor r9,rax + xor r12,rcx + and r10,rdi + and r13,rdi + and rbx,rbp + and rdx,rbp + xor r10,rbx + xor r13,rdx + + xor rax,r10 + xor rcx,r13 + xor r8,r10 + xor r11,r13 + mov rbx,rax + mov rdx,rcx + xor r9,r10 + shr rbx,32 + xor r12,r13 + shr rdx,32 + xor r10,r8 + rol eax,8 + xor r13,r11 + rol ecx,8 + xor r10,r9 + rol ebx,8 + xor r13,r12 + + rol edx,8 + xor eax,r10d + shr r10,32 + xor ecx,r13d + shr r13,32 + xor ebx,r10d + xor edx,r13d + + mov r10,r8 + rol r8d,24 + mov r13,r11 + rol r11d,24 + shr r10,32 + xor eax,r8d + shr r13,32 + xor ecx,r11d + rol r10d,24 + mov r8,r9 + rol r13d,24 + mov r11,r12 + shr r8,32 + xor ebx,r10d + shr r11,32 + xor edx,r13d + + + rol r9d,16 + + rol r12d,16 + + rol r8d,16 + + xor eax,r9d + rol r11d,16 + xor ecx,r12d + + xor ebx,r8d + xor edx,r11d + mov DWORD PTR[r15],eax + mov DWORD PTR[4+r15],ebx + mov DWORD PTR[8+r15],ecx + mov DWORD PTR[12+r15],edx + sub r14d,1 + jnz $L$permute + + xor rax,rax +$L$abort:: + mov r15,QWORD PTR[8+rsp] + mov r14,QWORD PTR[16+rsp] + mov r13,QWORD PTR[24+rsp] + mov r12,QWORD PTR[32+rsp] + mov rbp,QWORD PTR[40+rsp] + mov rbx,QWORD PTR[48+rsp] + add rsp,56 +$L$dec_key_epilogue:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_private_AES_set_decrypt_key:: +private_AES_set_decrypt_key ENDP +PUBLIC AES_cbc_encrypt + +ALIGN 16 +EXTERN OPENSSL_ia32cap_P:NEAR +PUBLIC asm_AES_cbc_encrypt + +asm_AES_cbc_encrypt:: +AES_cbc_encrypt PROC PUBLIC + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_AES_cbc_encrypt:: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + mov rcx,r9 + mov r8,QWORD PTR[40+rsp] + mov r9,QWORD PTR[48+rsp] + + + cmp rdx,0 + je $L$cbc_epilogue + pushfq + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 +$L$cbc_prologue:: + + cld + mov r9d,r9d + + lea r14,QWORD PTR[$L$AES_Te] + cmp r9,0 + jne $L$cbc_picked_te + lea r14,QWORD PTR[$L$AES_Td] +$L$cbc_picked_te:: + + mov r10d,DWORD PTR[OPENSSL_ia32cap_P] + cmp rdx,512 + jb $L$cbc_slow_prologue + test rdx,15 + jnz $L$cbc_slow_prologue + bt r10d,28 + jc $L$cbc_slow_prologue + + + lea r15,QWORD PTR[((-88-248))+rsp] + and r15,-64 + + + mov r10,r14 + lea r11,QWORD PTR[2304+r14] + mov r12,r15 + and r10,0FFFh + and r11,0FFFh + and r12,0FFFh + + cmp r12,r11 + jb $L$cbc_te_break_out + sub r12,r11 + sub r15,r12 + jmp $L$cbc_te_ok +$L$cbc_te_break_out:: + sub r12,r10 + and r12,0FFFh + add r12,320 + sub r15,r12 +ALIGN 4 +$L$cbc_te_ok:: + + xchg r15,rsp + + mov QWORD PTR[16+rsp],r15 +$L$cbc_fast_body:: + mov QWORD PTR[24+rsp],rdi + mov QWORD PTR[32+rsp],rsi + mov QWORD PTR[40+rsp],rdx + mov QWORD PTR[48+rsp],rcx + mov QWORD PTR[56+rsp],r8 + mov DWORD PTR[((80+240))+rsp],0 + mov rbp,r8 + mov rbx,r9 + mov r9,rsi + mov r8,rdi + mov r15,rcx + + mov eax,DWORD PTR[240+r15] + + mov r10,r15 + sub r10,r14 + and r10,0fffh + cmp r10,2304 + jb $L$cbc_do_ecopy + cmp r10,4096-248 + jb $L$cbc_skip_ecopy +ALIGN 4 +$L$cbc_do_ecopy:: + mov rsi,r15 + lea rdi,QWORD PTR[80+rsp] + lea r15,QWORD PTR[80+rsp] + mov ecx,240/8 + DD 090A548F3h + mov DWORD PTR[rdi],eax +$L$cbc_skip_ecopy:: + mov QWORD PTR[rsp],r15 + + mov ecx,18 +ALIGN 4 +$L$cbc_prefetch_te:: + mov r10,QWORD PTR[r14] + mov r11,QWORD PTR[32+r14] + mov r12,QWORD PTR[64+r14] + mov r13,QWORD PTR[96+r14] + lea r14,QWORD PTR[128+r14] + sub ecx,1 + jnz $L$cbc_prefetch_te + lea r14,QWORD PTR[((-2304))+r14] + + cmp rbx,0 + je $L$FAST_DECRYPT + + + mov eax,DWORD PTR[rbp] + mov ebx,DWORD PTR[4+rbp] + mov ecx,DWORD PTR[8+rbp] + mov edx,DWORD PTR[12+rbp] + +ALIGN 4 +$L$cbc_fast_enc_loop:: + xor eax,DWORD PTR[r8] + xor ebx,DWORD PTR[4+r8] + xor ecx,DWORD PTR[8+r8] + xor edx,DWORD PTR[12+r8] + mov r15,QWORD PTR[rsp] + mov QWORD PTR[24+rsp],r8 + + call _x86_64_AES_encrypt + + mov r8,QWORD PTR[24+rsp] + mov r10,QWORD PTR[40+rsp] + mov DWORD PTR[r9],eax + mov DWORD PTR[4+r9],ebx + mov DWORD PTR[8+r9],ecx + mov DWORD PTR[12+r9],edx + + lea r8,QWORD PTR[16+r8] + lea r9,QWORD PTR[16+r9] + sub r10,16 + test r10,-16 + mov QWORD PTR[40+rsp],r10 + jnz $L$cbc_fast_enc_loop + mov rbp,QWORD PTR[56+rsp] + mov DWORD PTR[rbp],eax + mov DWORD PTR[4+rbp],ebx + mov DWORD PTR[8+rbp],ecx + mov DWORD PTR[12+rbp],edx + + jmp $L$cbc_fast_cleanup + + +ALIGN 16 +$L$FAST_DECRYPT:: + cmp r9,r8 + je $L$cbc_fast_dec_in_place + + mov QWORD PTR[64+rsp],rbp +ALIGN 4 +$L$cbc_fast_dec_loop:: + mov eax,DWORD PTR[r8] + mov ebx,DWORD PTR[4+r8] + mov ecx,DWORD PTR[8+r8] + mov edx,DWORD PTR[12+r8] + mov r15,QWORD PTR[rsp] + mov QWORD PTR[24+rsp],r8 + + call _x86_64_AES_decrypt + + mov rbp,QWORD PTR[64+rsp] + mov r8,QWORD PTR[24+rsp] + mov r10,QWORD PTR[40+rsp] + xor eax,DWORD PTR[rbp] + xor ebx,DWORD PTR[4+rbp] + xor ecx,DWORD PTR[8+rbp] + xor edx,DWORD PTR[12+rbp] + mov rbp,r8 + + sub r10,16 + mov QWORD PTR[40+rsp],r10 + mov QWORD PTR[64+rsp],rbp + + mov DWORD PTR[r9],eax + mov DWORD PTR[4+r9],ebx + mov DWORD PTR[8+r9],ecx + mov DWORD PTR[12+r9],edx + + lea r8,QWORD PTR[16+r8] + lea r9,QWORD PTR[16+r9] + jnz $L$cbc_fast_dec_loop + mov r12,QWORD PTR[56+rsp] + mov r10,QWORD PTR[rbp] + mov r11,QWORD PTR[8+rbp] + mov QWORD PTR[r12],r10 + mov QWORD PTR[8+r12],r11 + jmp $L$cbc_fast_cleanup + +ALIGN 16 +$L$cbc_fast_dec_in_place:: + mov r10,QWORD PTR[rbp] + mov r11,QWORD PTR[8+rbp] + mov QWORD PTR[((0+64))+rsp],r10 + mov QWORD PTR[((8+64))+rsp],r11 +ALIGN 4 +$L$cbc_fast_dec_in_place_loop:: + mov eax,DWORD PTR[r8] + mov ebx,DWORD PTR[4+r8] + mov ecx,DWORD PTR[8+r8] + mov edx,DWORD PTR[12+r8] + mov r15,QWORD PTR[rsp] + mov QWORD PTR[24+rsp],r8 + + call _x86_64_AES_decrypt + + mov r8,QWORD PTR[24+rsp] + mov r10,QWORD PTR[40+rsp] + xor eax,DWORD PTR[((0+64))+rsp] + xor ebx,DWORD PTR[((4+64))+rsp] + xor ecx,DWORD PTR[((8+64))+rsp] + xor edx,DWORD PTR[((12+64))+rsp] + + mov r11,QWORD PTR[r8] + mov r12,QWORD PTR[8+r8] + sub r10,16 + jz $L$cbc_fast_dec_in_place_done + + mov QWORD PTR[((0+64))+rsp],r11 + mov QWORD PTR[((8+64))+rsp],r12 + + mov DWORD PTR[r9],eax + mov DWORD PTR[4+r9],ebx + mov DWORD PTR[8+r9],ecx + mov DWORD PTR[12+r9],edx + + lea r8,QWORD PTR[16+r8] + lea r9,QWORD PTR[16+r9] + mov QWORD PTR[40+rsp],r10 + jmp $L$cbc_fast_dec_in_place_loop +$L$cbc_fast_dec_in_place_done:: + mov rdi,QWORD PTR[56+rsp] + mov QWORD PTR[rdi],r11 + mov QWORD PTR[8+rdi],r12 + + mov DWORD PTR[r9],eax + mov DWORD PTR[4+r9],ebx + mov DWORD PTR[8+r9],ecx + mov DWORD PTR[12+r9],edx + +ALIGN 4 +$L$cbc_fast_cleanup:: + cmp DWORD PTR[((80+240))+rsp],0 + lea rdi,QWORD PTR[80+rsp] + je $L$cbc_exit + mov ecx,240/8 + xor rax,rax + DD 090AB48F3h + + jmp $L$cbc_exit + + +ALIGN 16 +$L$cbc_slow_prologue:: + + lea rbp,QWORD PTR[((-88))+rsp] + and rbp,-64 + + lea r10,QWORD PTR[((-88-63))+rcx] + sub r10,rbp + neg r10 + and r10,03c0h + sub rbp,r10 + + xchg rbp,rsp + + mov QWORD PTR[16+rsp],rbp +$L$cbc_slow_body:: + + + + + mov QWORD PTR[56+rsp],r8 + mov rbp,r8 + mov rbx,r9 + mov r9,rsi + mov r8,rdi + mov r15,rcx + mov r10,rdx + + mov eax,DWORD PTR[240+r15] + mov QWORD PTR[rsp],r15 + shl eax,4 + lea rax,QWORD PTR[rax*1+r15] + mov QWORD PTR[8+rsp],rax + + + lea r14,QWORD PTR[2048+r14] + lea rax,QWORD PTR[((768-8))+rsp] + sub rax,r14 + and rax,0300h + lea r14,QWORD PTR[rax*1+r14] + + cmp rbx,0 + je $L$SLOW_DECRYPT + + + test r10,-16 + mov eax,DWORD PTR[rbp] + mov ebx,DWORD PTR[4+rbp] + mov ecx,DWORD PTR[8+rbp] + mov edx,DWORD PTR[12+rbp] + jz $L$cbc_slow_enc_tail + +ALIGN 4 +$L$cbc_slow_enc_loop:: + xor eax,DWORD PTR[r8] + xor ebx,DWORD PTR[4+r8] + xor ecx,DWORD PTR[8+r8] + xor edx,DWORD PTR[12+r8] + mov r15,QWORD PTR[rsp] + mov QWORD PTR[24+rsp],r8 + mov QWORD PTR[32+rsp],r9 + mov QWORD PTR[40+rsp],r10 + + call _x86_64_AES_encrypt_compact + + mov r8,QWORD PTR[24+rsp] + mov r9,QWORD PTR[32+rsp] + mov r10,QWORD PTR[40+rsp] + mov DWORD PTR[r9],eax + mov DWORD PTR[4+r9],ebx + mov DWORD PTR[8+r9],ecx + mov DWORD PTR[12+r9],edx + + lea r8,QWORD PTR[16+r8] + lea r9,QWORD PTR[16+r9] + sub r10,16 + test r10,-16 + jnz $L$cbc_slow_enc_loop + test r10,15 + jnz $L$cbc_slow_enc_tail + mov rbp,QWORD PTR[56+rsp] + mov DWORD PTR[rbp],eax + mov DWORD PTR[4+rbp],ebx + mov DWORD PTR[8+rbp],ecx + mov DWORD PTR[12+rbp],edx + + jmp $L$cbc_exit + +ALIGN 4 +$L$cbc_slow_enc_tail:: + mov r11,rax + mov r12,rcx + mov rcx,r10 + mov rsi,r8 + mov rdi,r9 + DD 09066A4F3h + mov rcx,16 + sub rcx,r10 + xor rax,rax + DD 09066AAF3h + mov r8,r9 + mov r10,16 + mov rax,r11 + mov rcx,r12 + jmp $L$cbc_slow_enc_loop + +ALIGN 16 +$L$SLOW_DECRYPT:: + shr rax,3 + add r14,rax + + mov r11,QWORD PTR[rbp] + mov r12,QWORD PTR[8+rbp] + mov QWORD PTR[((0+64))+rsp],r11 + mov QWORD PTR[((8+64))+rsp],r12 + +ALIGN 4 +$L$cbc_slow_dec_loop:: + mov eax,DWORD PTR[r8] + mov ebx,DWORD PTR[4+r8] + mov ecx,DWORD PTR[8+r8] + mov edx,DWORD PTR[12+r8] + mov r15,QWORD PTR[rsp] + mov QWORD PTR[24+rsp],r8 + mov QWORD PTR[32+rsp],r9 + mov QWORD PTR[40+rsp],r10 + + call _x86_64_AES_decrypt_compact + + mov r8,QWORD PTR[24+rsp] + mov r9,QWORD PTR[32+rsp] + mov r10,QWORD PTR[40+rsp] + xor eax,DWORD PTR[((0+64))+rsp] + xor ebx,DWORD PTR[((4+64))+rsp] + xor ecx,DWORD PTR[((8+64))+rsp] + xor edx,DWORD PTR[((12+64))+rsp] + + mov r11,QWORD PTR[r8] + mov r12,QWORD PTR[8+r8] + sub r10,16 + jc $L$cbc_slow_dec_partial + jz $L$cbc_slow_dec_done + + mov QWORD PTR[((0+64))+rsp],r11 + mov QWORD PTR[((8+64))+rsp],r12 + + mov DWORD PTR[r9],eax + mov DWORD PTR[4+r9],ebx + mov DWORD PTR[8+r9],ecx + mov DWORD PTR[12+r9],edx + + lea r8,QWORD PTR[16+r8] + lea r9,QWORD PTR[16+r9] + jmp $L$cbc_slow_dec_loop +$L$cbc_slow_dec_done:: + mov rdi,QWORD PTR[56+rsp] + mov QWORD PTR[rdi],r11 + mov QWORD PTR[8+rdi],r12 + + mov DWORD PTR[r9],eax + mov DWORD PTR[4+r9],ebx + mov DWORD PTR[8+r9],ecx + mov DWORD PTR[12+r9],edx + + jmp $L$cbc_exit + +ALIGN 4 +$L$cbc_slow_dec_partial:: + mov rdi,QWORD PTR[56+rsp] + mov QWORD PTR[rdi],r11 + mov QWORD PTR[8+rdi],r12 + + mov DWORD PTR[((0+64))+rsp],eax + mov DWORD PTR[((4+64))+rsp],ebx + mov DWORD PTR[((8+64))+rsp],ecx + mov DWORD PTR[((12+64))+rsp],edx + + mov rdi,r9 + lea rsi,QWORD PTR[64+rsp] + lea rcx,QWORD PTR[16+r10] + DD 09066A4F3h + jmp $L$cbc_exit + +ALIGN 16 +$L$cbc_exit:: + mov rsi,QWORD PTR[16+rsp] + mov r15,QWORD PTR[rsi] + mov r14,QWORD PTR[8+rsi] + mov r13,QWORD PTR[16+rsi] + mov r12,QWORD PTR[24+rsi] + mov rbp,QWORD PTR[32+rsi] + mov rbx,QWORD PTR[40+rsi] + lea rsp,QWORD PTR[48+rsi] +$L$cbc_popfq:: + popfq +$L$cbc_epilogue:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_AES_cbc_encrypt:: +AES_cbc_encrypt ENDP +ALIGN 64 +$L$AES_Te:: + DD 0a56363c6h,0a56363c6h + DD 0847c7cf8h,0847c7cf8h + DD 0997777eeh,0997777eeh + DD 08d7b7bf6h,08d7b7bf6h + DD 00df2f2ffh,00df2f2ffh + DD 0bd6b6bd6h,0bd6b6bd6h + DD 0b16f6fdeh,0b16f6fdeh + DD 054c5c591h,054c5c591h + DD 050303060h,050303060h + DD 003010102h,003010102h + DD 0a96767ceh,0a96767ceh + DD 07d2b2b56h,07d2b2b56h + DD 019fefee7h,019fefee7h + DD 062d7d7b5h,062d7d7b5h + DD 0e6abab4dh,0e6abab4dh + DD 09a7676ech,09a7676ech + DD 045caca8fh,045caca8fh + DD 09d82821fh,09d82821fh + DD 040c9c989h,040c9c989h + DD 0877d7dfah,0877d7dfah + DD 015fafaefh,015fafaefh + DD 0eb5959b2h,0eb5959b2h + DD 0c947478eh,0c947478eh + DD 00bf0f0fbh,00bf0f0fbh + DD 0ecadad41h,0ecadad41h + DD 067d4d4b3h,067d4d4b3h + DD 0fda2a25fh,0fda2a25fh + DD 0eaafaf45h,0eaafaf45h + DD 0bf9c9c23h,0bf9c9c23h + DD 0f7a4a453h,0f7a4a453h + DD 0967272e4h,0967272e4h + DD 05bc0c09bh,05bc0c09bh + DD 0c2b7b775h,0c2b7b775h + DD 01cfdfde1h,01cfdfde1h + DD 0ae93933dh,0ae93933dh + DD 06a26264ch,06a26264ch + DD 05a36366ch,05a36366ch + DD 0413f3f7eh,0413f3f7eh + DD 002f7f7f5h,002f7f7f5h + DD 04fcccc83h,04fcccc83h + DD 05c343468h,05c343468h + DD 0f4a5a551h,0f4a5a551h + DD 034e5e5d1h,034e5e5d1h + DD 008f1f1f9h,008f1f1f9h + DD 0937171e2h,0937171e2h + DD 073d8d8abh,073d8d8abh + DD 053313162h,053313162h + DD 03f15152ah,03f15152ah + DD 00c040408h,00c040408h + DD 052c7c795h,052c7c795h + DD 065232346h,065232346h + DD 05ec3c39dh,05ec3c39dh + DD 028181830h,028181830h + DD 0a1969637h,0a1969637h + DD 00f05050ah,00f05050ah + DD 0b59a9a2fh,0b59a9a2fh + DD 00907070eh,00907070eh + DD 036121224h,036121224h + DD 09b80801bh,09b80801bh + DD 03de2e2dfh,03de2e2dfh + DD 026ebebcdh,026ebebcdh + DD 06927274eh,06927274eh + DD 0cdb2b27fh,0cdb2b27fh + DD 09f7575eah,09f7575eah + DD 01b090912h,01b090912h + DD 09e83831dh,09e83831dh + DD 0742c2c58h,0742c2c58h + DD 02e1a1a34h,02e1a1a34h + DD 02d1b1b36h,02d1b1b36h + DD 0b26e6edch,0b26e6edch + DD 0ee5a5ab4h,0ee5a5ab4h + DD 0fba0a05bh,0fba0a05bh + DD 0f65252a4h,0f65252a4h + DD 04d3b3b76h,04d3b3b76h + DD 061d6d6b7h,061d6d6b7h + DD 0ceb3b37dh,0ceb3b37dh + DD 07b292952h,07b292952h + DD 03ee3e3ddh,03ee3e3ddh + DD 0712f2f5eh,0712f2f5eh + DD 097848413h,097848413h + DD 0f55353a6h,0f55353a6h + DD 068d1d1b9h,068d1d1b9h + DD 000000000h,000000000h + DD 02cededc1h,02cededc1h + DD 060202040h,060202040h + DD 01ffcfce3h,01ffcfce3h + DD 0c8b1b179h,0c8b1b179h + DD 0ed5b5bb6h,0ed5b5bb6h + DD 0be6a6ad4h,0be6a6ad4h + DD 046cbcb8dh,046cbcb8dh + DD 0d9bebe67h,0d9bebe67h + DD 04b393972h,04b393972h + DD 0de4a4a94h,0de4a4a94h + DD 0d44c4c98h,0d44c4c98h + DD 0e85858b0h,0e85858b0h + DD 04acfcf85h,04acfcf85h + DD 06bd0d0bbh,06bd0d0bbh + DD 02aefefc5h,02aefefc5h + DD 0e5aaaa4fh,0e5aaaa4fh + DD 016fbfbedh,016fbfbedh + DD 0c5434386h,0c5434386h + DD 0d74d4d9ah,0d74d4d9ah + DD 055333366h,055333366h + DD 094858511h,094858511h + DD 0cf45458ah,0cf45458ah + DD 010f9f9e9h,010f9f9e9h + DD 006020204h,006020204h + DD 0817f7ffeh,0817f7ffeh + DD 0f05050a0h,0f05050a0h + DD 0443c3c78h,0443c3c78h + DD 0ba9f9f25h,0ba9f9f25h + DD 0e3a8a84bh,0e3a8a84bh + DD 0f35151a2h,0f35151a2h + DD 0fea3a35dh,0fea3a35dh + DD 0c0404080h,0c0404080h + DD 08a8f8f05h,08a8f8f05h + DD 0ad92923fh,0ad92923fh + DD 0bc9d9d21h,0bc9d9d21h + DD 048383870h,048383870h + DD 004f5f5f1h,004f5f5f1h + DD 0dfbcbc63h,0dfbcbc63h + DD 0c1b6b677h,0c1b6b677h + DD 075dadaafh,075dadaafh + DD 063212142h,063212142h + DD 030101020h,030101020h + DD 01affffe5h,01affffe5h + DD 00ef3f3fdh,00ef3f3fdh + DD 06dd2d2bfh,06dd2d2bfh + DD 04ccdcd81h,04ccdcd81h + DD 0140c0c18h,0140c0c18h + DD 035131326h,035131326h + DD 02fececc3h,02fececc3h + DD 0e15f5fbeh,0e15f5fbeh + DD 0a2979735h,0a2979735h + DD 0cc444488h,0cc444488h + DD 03917172eh,03917172eh + DD 057c4c493h,057c4c493h + DD 0f2a7a755h,0f2a7a755h + DD 0827e7efch,0827e7efch + DD 0473d3d7ah,0473d3d7ah + DD 0ac6464c8h,0ac6464c8h + DD 0e75d5dbah,0e75d5dbah + DD 02b191932h,02b191932h + DD 0957373e6h,0957373e6h + DD 0a06060c0h,0a06060c0h + DD 098818119h,098818119h + DD 0d14f4f9eh,0d14f4f9eh + DD 07fdcdca3h,07fdcdca3h + DD 066222244h,066222244h + DD 07e2a2a54h,07e2a2a54h + DD 0ab90903bh,0ab90903bh + DD 08388880bh,08388880bh + DD 0ca46468ch,0ca46468ch + DD 029eeeec7h,029eeeec7h + DD 0d3b8b86bh,0d3b8b86bh + DD 03c141428h,03c141428h + DD 079dedea7h,079dedea7h + DD 0e25e5ebch,0e25e5ebch + DD 01d0b0b16h,01d0b0b16h + DD 076dbdbadh,076dbdbadh + DD 03be0e0dbh,03be0e0dbh + DD 056323264h,056323264h + DD 04e3a3a74h,04e3a3a74h + DD 01e0a0a14h,01e0a0a14h + DD 0db494992h,0db494992h + DD 00a06060ch,00a06060ch + DD 06c242448h,06c242448h + DD 0e45c5cb8h,0e45c5cb8h + DD 05dc2c29fh,05dc2c29fh + DD 06ed3d3bdh,06ed3d3bdh + DD 0efacac43h,0efacac43h + DD 0a66262c4h,0a66262c4h + DD 0a8919139h,0a8919139h + DD 0a4959531h,0a4959531h + DD 037e4e4d3h,037e4e4d3h + DD 08b7979f2h,08b7979f2h + DD 032e7e7d5h,032e7e7d5h + DD 043c8c88bh,043c8c88bh + DD 05937376eh,05937376eh + DD 0b76d6ddah,0b76d6ddah + DD 08c8d8d01h,08c8d8d01h + DD 064d5d5b1h,064d5d5b1h + DD 0d24e4e9ch,0d24e4e9ch + DD 0e0a9a949h,0e0a9a949h + DD 0b46c6cd8h,0b46c6cd8h + DD 0fa5656ach,0fa5656ach + DD 007f4f4f3h,007f4f4f3h + DD 025eaeacfh,025eaeacfh + DD 0af6565cah,0af6565cah + DD 08e7a7af4h,08e7a7af4h + DD 0e9aeae47h,0e9aeae47h + DD 018080810h,018080810h + DD 0d5baba6fh,0d5baba6fh + DD 0887878f0h,0887878f0h + DD 06f25254ah,06f25254ah + DD 0722e2e5ch,0722e2e5ch + DD 0241c1c38h,0241c1c38h + DD 0f1a6a657h,0f1a6a657h + DD 0c7b4b473h,0c7b4b473h + DD 051c6c697h,051c6c697h + DD 023e8e8cbh,023e8e8cbh + DD 07cdddda1h,07cdddda1h + DD 09c7474e8h,09c7474e8h + DD 0211f1f3eh,0211f1f3eh + DD 0dd4b4b96h,0dd4b4b96h + DD 0dcbdbd61h,0dcbdbd61h + DD 0868b8b0dh,0868b8b0dh + DD 0858a8a0fh,0858a8a0fh + DD 0907070e0h,0907070e0h + DD 0423e3e7ch,0423e3e7ch + DD 0c4b5b571h,0c4b5b571h + DD 0aa6666cch,0aa6666cch + DD 0d8484890h,0d8484890h + DD 005030306h,005030306h + DD 001f6f6f7h,001f6f6f7h + DD 0120e0e1ch,0120e0e1ch + DD 0a36161c2h,0a36161c2h + DD 05f35356ah,05f35356ah + DD 0f95757aeh,0f95757aeh + DD 0d0b9b969h,0d0b9b969h + DD 091868617h,091868617h + DD 058c1c199h,058c1c199h + DD 0271d1d3ah,0271d1d3ah + DD 0b99e9e27h,0b99e9e27h + DD 038e1e1d9h,038e1e1d9h + DD 013f8f8ebh,013f8f8ebh + DD 0b398982bh,0b398982bh + DD 033111122h,033111122h + DD 0bb6969d2h,0bb6969d2h + DD 070d9d9a9h,070d9d9a9h + DD 0898e8e07h,0898e8e07h + DD 0a7949433h,0a7949433h + DD 0b69b9b2dh,0b69b9b2dh + DD 0221e1e3ch,0221e1e3ch + DD 092878715h,092878715h + DD 020e9e9c9h,020e9e9c9h + DD 049cece87h,049cece87h + DD 0ff5555aah,0ff5555aah + DD 078282850h,078282850h + DD 07adfdfa5h,07adfdfa5h + DD 08f8c8c03h,08f8c8c03h + DD 0f8a1a159h,0f8a1a159h + DD 080898909h,080898909h + DD 0170d0d1ah,0170d0d1ah + DD 0dabfbf65h,0dabfbf65h + DD 031e6e6d7h,031e6e6d7h + DD 0c6424284h,0c6424284h + DD 0b86868d0h,0b86868d0h + DD 0c3414182h,0c3414182h + DD 0b0999929h,0b0999929h + DD 0772d2d5ah,0772d2d5ah + DD 0110f0f1eh,0110f0f1eh + DD 0cbb0b07bh,0cbb0b07bh + DD 0fc5454a8h,0fc5454a8h + DD 0d6bbbb6dh,0d6bbbb6dh + DD 03a16162ch,03a16162ch +DB 063h,07ch,077h,07bh,0f2h,06bh,06fh,0c5h +DB 030h,001h,067h,02bh,0feh,0d7h,0abh,076h +DB 0cah,082h,0c9h,07dh,0fah,059h,047h,0f0h +DB 0adh,0d4h,0a2h,0afh,09ch,0a4h,072h,0c0h +DB 0b7h,0fdh,093h,026h,036h,03fh,0f7h,0cch +DB 034h,0a5h,0e5h,0f1h,071h,0d8h,031h,015h +DB 004h,0c7h,023h,0c3h,018h,096h,005h,09ah +DB 007h,012h,080h,0e2h,0ebh,027h,0b2h,075h +DB 009h,083h,02ch,01ah,01bh,06eh,05ah,0a0h +DB 052h,03bh,0d6h,0b3h,029h,0e3h,02fh,084h +DB 053h,0d1h,000h,0edh,020h,0fch,0b1h,05bh +DB 06ah,0cbh,0beh,039h,04ah,04ch,058h,0cfh +DB 0d0h,0efh,0aah,0fbh,043h,04dh,033h,085h +DB 045h,0f9h,002h,07fh,050h,03ch,09fh,0a8h +DB 051h,0a3h,040h,08fh,092h,09dh,038h,0f5h +DB 0bch,0b6h,0dah,021h,010h,0ffh,0f3h,0d2h +DB 0cdh,00ch,013h,0ech,05fh,097h,044h,017h +DB 0c4h,0a7h,07eh,03dh,064h,05dh,019h,073h +DB 060h,081h,04fh,0dch,022h,02ah,090h,088h +DB 046h,0eeh,0b8h,014h,0deh,05eh,00bh,0dbh +DB 0e0h,032h,03ah,00ah,049h,006h,024h,05ch +DB 0c2h,0d3h,0ach,062h,091h,095h,0e4h,079h +DB 0e7h,0c8h,037h,06dh,08dh,0d5h,04eh,0a9h +DB 06ch,056h,0f4h,0eah,065h,07ah,0aeh,008h +DB 0bah,078h,025h,02eh,01ch,0a6h,0b4h,0c6h +DB 0e8h,0ddh,074h,01fh,04bh,0bdh,08bh,08ah +DB 070h,03eh,0b5h,066h,048h,003h,0f6h,00eh +DB 061h,035h,057h,0b9h,086h,0c1h,01dh,09eh +DB 0e1h,0f8h,098h,011h,069h,0d9h,08eh,094h +DB 09bh,01eh,087h,0e9h,0ceh,055h,028h,0dfh +DB 08ch,0a1h,089h,00dh,0bfh,0e6h,042h,068h +DB 041h,099h,02dh,00fh,0b0h,054h,0bbh,016h +DB 063h,07ch,077h,07bh,0f2h,06bh,06fh,0c5h +DB 030h,001h,067h,02bh,0feh,0d7h,0abh,076h +DB 0cah,082h,0c9h,07dh,0fah,059h,047h,0f0h +DB 0adh,0d4h,0a2h,0afh,09ch,0a4h,072h,0c0h +DB 0b7h,0fdh,093h,026h,036h,03fh,0f7h,0cch +DB 034h,0a5h,0e5h,0f1h,071h,0d8h,031h,015h +DB 004h,0c7h,023h,0c3h,018h,096h,005h,09ah +DB 007h,012h,080h,0e2h,0ebh,027h,0b2h,075h +DB 009h,083h,02ch,01ah,01bh,06eh,05ah,0a0h +DB 052h,03bh,0d6h,0b3h,029h,0e3h,02fh,084h +DB 053h,0d1h,000h,0edh,020h,0fch,0b1h,05bh +DB 06ah,0cbh,0beh,039h,04ah,04ch,058h,0cfh +DB 0d0h,0efh,0aah,0fbh,043h,04dh,033h,085h +DB 045h,0f9h,002h,07fh,050h,03ch,09fh,0a8h +DB 051h,0a3h,040h,08fh,092h,09dh,038h,0f5h +DB 0bch,0b6h,0dah,021h,010h,0ffh,0f3h,0d2h +DB 0cdh,00ch,013h,0ech,05fh,097h,044h,017h +DB 0c4h,0a7h,07eh,03dh,064h,05dh,019h,073h +DB 060h,081h,04fh,0dch,022h,02ah,090h,088h +DB 046h,0eeh,0b8h,014h,0deh,05eh,00bh,0dbh +DB 0e0h,032h,03ah,00ah,049h,006h,024h,05ch +DB 0c2h,0d3h,0ach,062h,091h,095h,0e4h,079h +DB 0e7h,0c8h,037h,06dh,08dh,0d5h,04eh,0a9h +DB 06ch,056h,0f4h,0eah,065h,07ah,0aeh,008h +DB 0bah,078h,025h,02eh,01ch,0a6h,0b4h,0c6h +DB 0e8h,0ddh,074h,01fh,04bh,0bdh,08bh,08ah +DB 070h,03eh,0b5h,066h,048h,003h,0f6h,00eh +DB 061h,035h,057h,0b9h,086h,0c1h,01dh,09eh +DB 0e1h,0f8h,098h,011h,069h,0d9h,08eh,094h +DB 09bh,01eh,087h,0e9h,0ceh,055h,028h,0dfh +DB 08ch,0a1h,089h,00dh,0bfh,0e6h,042h,068h +DB 041h,099h,02dh,00fh,0b0h,054h,0bbh,016h +DB 063h,07ch,077h,07bh,0f2h,06bh,06fh,0c5h +DB 030h,001h,067h,02bh,0feh,0d7h,0abh,076h +DB 0cah,082h,0c9h,07dh,0fah,059h,047h,0f0h +DB 0adh,0d4h,0a2h,0afh,09ch,0a4h,072h,0c0h +DB 0b7h,0fdh,093h,026h,036h,03fh,0f7h,0cch +DB 034h,0a5h,0e5h,0f1h,071h,0d8h,031h,015h +DB 004h,0c7h,023h,0c3h,018h,096h,005h,09ah +DB 007h,012h,080h,0e2h,0ebh,027h,0b2h,075h +DB 009h,083h,02ch,01ah,01bh,06eh,05ah,0a0h +DB 052h,03bh,0d6h,0b3h,029h,0e3h,02fh,084h +DB 053h,0d1h,000h,0edh,020h,0fch,0b1h,05bh +DB 06ah,0cbh,0beh,039h,04ah,04ch,058h,0cfh +DB 0d0h,0efh,0aah,0fbh,043h,04dh,033h,085h +DB 045h,0f9h,002h,07fh,050h,03ch,09fh,0a8h +DB 051h,0a3h,040h,08fh,092h,09dh,038h,0f5h +DB 0bch,0b6h,0dah,021h,010h,0ffh,0f3h,0d2h +DB 0cdh,00ch,013h,0ech,05fh,097h,044h,017h +DB 0c4h,0a7h,07eh,03dh,064h,05dh,019h,073h +DB 060h,081h,04fh,0dch,022h,02ah,090h,088h +DB 046h,0eeh,0b8h,014h,0deh,05eh,00bh,0dbh +DB 0e0h,032h,03ah,00ah,049h,006h,024h,05ch +DB 0c2h,0d3h,0ach,062h,091h,095h,0e4h,079h +DB 0e7h,0c8h,037h,06dh,08dh,0d5h,04eh,0a9h +DB 06ch,056h,0f4h,0eah,065h,07ah,0aeh,008h +DB 0bah,078h,025h,02eh,01ch,0a6h,0b4h,0c6h +DB 0e8h,0ddh,074h,01fh,04bh,0bdh,08bh,08ah +DB 070h,03eh,0b5h,066h,048h,003h,0f6h,00eh +DB 061h,035h,057h,0b9h,086h,0c1h,01dh,09eh +DB 0e1h,0f8h,098h,011h,069h,0d9h,08eh,094h +DB 09bh,01eh,087h,0e9h,0ceh,055h,028h,0dfh +DB 08ch,0a1h,089h,00dh,0bfh,0e6h,042h,068h +DB 041h,099h,02dh,00fh,0b0h,054h,0bbh,016h +DB 063h,07ch,077h,07bh,0f2h,06bh,06fh,0c5h +DB 030h,001h,067h,02bh,0feh,0d7h,0abh,076h +DB 0cah,082h,0c9h,07dh,0fah,059h,047h,0f0h +DB 0adh,0d4h,0a2h,0afh,09ch,0a4h,072h,0c0h +DB 0b7h,0fdh,093h,026h,036h,03fh,0f7h,0cch +DB 034h,0a5h,0e5h,0f1h,071h,0d8h,031h,015h +DB 004h,0c7h,023h,0c3h,018h,096h,005h,09ah +DB 007h,012h,080h,0e2h,0ebh,027h,0b2h,075h +DB 009h,083h,02ch,01ah,01bh,06eh,05ah,0a0h +DB 052h,03bh,0d6h,0b3h,029h,0e3h,02fh,084h +DB 053h,0d1h,000h,0edh,020h,0fch,0b1h,05bh +DB 06ah,0cbh,0beh,039h,04ah,04ch,058h,0cfh +DB 0d0h,0efh,0aah,0fbh,043h,04dh,033h,085h +DB 045h,0f9h,002h,07fh,050h,03ch,09fh,0a8h +DB 051h,0a3h,040h,08fh,092h,09dh,038h,0f5h +DB 0bch,0b6h,0dah,021h,010h,0ffh,0f3h,0d2h +DB 0cdh,00ch,013h,0ech,05fh,097h,044h,017h +DB 0c4h,0a7h,07eh,03dh,064h,05dh,019h,073h +DB 060h,081h,04fh,0dch,022h,02ah,090h,088h +DB 046h,0eeh,0b8h,014h,0deh,05eh,00bh,0dbh +DB 0e0h,032h,03ah,00ah,049h,006h,024h,05ch +DB 0c2h,0d3h,0ach,062h,091h,095h,0e4h,079h +DB 0e7h,0c8h,037h,06dh,08dh,0d5h,04eh,0a9h +DB 06ch,056h,0f4h,0eah,065h,07ah,0aeh,008h +DB 0bah,078h,025h,02eh,01ch,0a6h,0b4h,0c6h +DB 0e8h,0ddh,074h,01fh,04bh,0bdh,08bh,08ah +DB 070h,03eh,0b5h,066h,048h,003h,0f6h,00eh +DB 061h,035h,057h,0b9h,086h,0c1h,01dh,09eh +DB 0e1h,0f8h,098h,011h,069h,0d9h,08eh,094h +DB 09bh,01eh,087h,0e9h,0ceh,055h,028h,0dfh +DB 08ch,0a1h,089h,00dh,0bfh,0e6h,042h,068h +DB 041h,099h,02dh,00fh,0b0h,054h,0bbh,016h + DD 000000001h,000000002h,000000004h,000000008h + DD 000000010h,000000020h,000000040h,000000080h + DD 00000001bh,000000036h,080808080h,080808080h + DD 0fefefefeh,0fefefefeh,01b1b1b1bh,01b1b1b1bh +ALIGN 64 +$L$AES_Td:: + DD 050a7f451h,050a7f451h + DD 05365417eh,05365417eh + DD 0c3a4171ah,0c3a4171ah + DD 0965e273ah,0965e273ah + DD 0cb6bab3bh,0cb6bab3bh + DD 0f1459d1fh,0f1459d1fh + DD 0ab58faach,0ab58faach + DD 09303e34bh,09303e34bh + DD 055fa3020h,055fa3020h + DD 0f66d76adh,0f66d76adh + DD 09176cc88h,09176cc88h + DD 0254c02f5h,0254c02f5h + DD 0fcd7e54fh,0fcd7e54fh + DD 0d7cb2ac5h,0d7cb2ac5h + DD 080443526h,080443526h + DD 08fa362b5h,08fa362b5h + DD 0495ab1deh,0495ab1deh + DD 0671bba25h,0671bba25h + DD 0980eea45h,0980eea45h + DD 0e1c0fe5dh,0e1c0fe5dh + DD 002752fc3h,002752fc3h + DD 012f04c81h,012f04c81h + DD 0a397468dh,0a397468dh + DD 0c6f9d36bh,0c6f9d36bh + DD 0e75f8f03h,0e75f8f03h + DD 0959c9215h,0959c9215h + DD 0eb7a6dbfh,0eb7a6dbfh + DD 0da595295h,0da595295h + DD 02d83bed4h,02d83bed4h + DD 0d3217458h,0d3217458h + DD 02969e049h,02969e049h + DD 044c8c98eh,044c8c98eh + DD 06a89c275h,06a89c275h + DD 078798ef4h,078798ef4h + DD 06b3e5899h,06b3e5899h + DD 0dd71b927h,0dd71b927h + DD 0b64fe1beh,0b64fe1beh + DD 017ad88f0h,017ad88f0h + DD 066ac20c9h,066ac20c9h + DD 0b43ace7dh,0b43ace7dh + DD 0184adf63h,0184adf63h + DD 082311ae5h,082311ae5h + DD 060335197h,060335197h + DD 0457f5362h,0457f5362h + DD 0e07764b1h,0e07764b1h + DD 084ae6bbbh,084ae6bbbh + DD 01ca081feh,01ca081feh + DD 0942b08f9h,0942b08f9h + DD 058684870h,058684870h + DD 019fd458fh,019fd458fh + DD 0876cde94h,0876cde94h + DD 0b7f87b52h,0b7f87b52h + DD 023d373abh,023d373abh + DD 0e2024b72h,0e2024b72h + DD 0578f1fe3h,0578f1fe3h + DD 02aab5566h,02aab5566h + DD 00728ebb2h,00728ebb2h + DD 003c2b52fh,003c2b52fh + DD 09a7bc586h,09a7bc586h + DD 0a50837d3h,0a50837d3h + DD 0f2872830h,0f2872830h + DD 0b2a5bf23h,0b2a5bf23h + DD 0ba6a0302h,0ba6a0302h + DD 05c8216edh,05c8216edh + DD 02b1ccf8ah,02b1ccf8ah + DD 092b479a7h,092b479a7h + DD 0f0f207f3h,0f0f207f3h + DD 0a1e2694eh,0a1e2694eh + DD 0cdf4da65h,0cdf4da65h + DD 0d5be0506h,0d5be0506h + DD 01f6234d1h,01f6234d1h + DD 08afea6c4h,08afea6c4h + DD 09d532e34h,09d532e34h + DD 0a055f3a2h,0a055f3a2h + DD 032e18a05h,032e18a05h + DD 075ebf6a4h,075ebf6a4h + DD 039ec830bh,039ec830bh + DD 0aaef6040h,0aaef6040h + DD 0069f715eh,0069f715eh + DD 051106ebdh,051106ebdh + DD 0f98a213eh,0f98a213eh + DD 03d06dd96h,03d06dd96h + DD 0ae053eddh,0ae053eddh + DD 046bde64dh,046bde64dh + DD 0b58d5491h,0b58d5491h + DD 0055dc471h,0055dc471h + DD 06fd40604h,06fd40604h + DD 0ff155060h,0ff155060h + DD 024fb9819h,024fb9819h + DD 097e9bdd6h,097e9bdd6h + DD 0cc434089h,0cc434089h + DD 0779ed967h,0779ed967h + DD 0bd42e8b0h,0bd42e8b0h + DD 0888b8907h,0888b8907h + DD 0385b19e7h,0385b19e7h + DD 0dbeec879h,0dbeec879h + DD 0470a7ca1h,0470a7ca1h + DD 0e90f427ch,0e90f427ch + DD 0c91e84f8h,0c91e84f8h + DD 000000000h,000000000h + DD 083868009h,083868009h + DD 048ed2b32h,048ed2b32h + DD 0ac70111eh,0ac70111eh + DD 04e725a6ch,04e725a6ch + DD 0fbff0efdh,0fbff0efdh + DD 05638850fh,05638850fh + DD 01ed5ae3dh,01ed5ae3dh + DD 027392d36h,027392d36h + DD 064d90f0ah,064d90f0ah + DD 021a65c68h,021a65c68h + DD 0d1545b9bh,0d1545b9bh + DD 03a2e3624h,03a2e3624h + DD 0b1670a0ch,0b1670a0ch + DD 00fe75793h,00fe75793h + DD 0d296eeb4h,0d296eeb4h + DD 09e919b1bh,09e919b1bh + DD 04fc5c080h,04fc5c080h + DD 0a220dc61h,0a220dc61h + DD 0694b775ah,0694b775ah + DD 0161a121ch,0161a121ch + DD 00aba93e2h,00aba93e2h + DD 0e52aa0c0h,0e52aa0c0h + DD 043e0223ch,043e0223ch + DD 01d171b12h,01d171b12h + DD 00b0d090eh,00b0d090eh + DD 0adc78bf2h,0adc78bf2h + DD 0b9a8b62dh,0b9a8b62dh + DD 0c8a91e14h,0c8a91e14h + DD 08519f157h,08519f157h + DD 04c0775afh,04c0775afh + DD 0bbdd99eeh,0bbdd99eeh + DD 0fd607fa3h,0fd607fa3h + DD 09f2601f7h,09f2601f7h + DD 0bcf5725ch,0bcf5725ch + DD 0c53b6644h,0c53b6644h + DD 0347efb5bh,0347efb5bh + DD 07629438bh,07629438bh + DD 0dcc623cbh,0dcc623cbh + DD 068fcedb6h,068fcedb6h + DD 063f1e4b8h,063f1e4b8h + DD 0cadc31d7h,0cadc31d7h + DD 010856342h,010856342h + DD 040229713h,040229713h + DD 02011c684h,02011c684h + DD 07d244a85h,07d244a85h + DD 0f83dbbd2h,0f83dbbd2h + DD 01132f9aeh,01132f9aeh + DD 06da129c7h,06da129c7h + DD 04b2f9e1dh,04b2f9e1dh + DD 0f330b2dch,0f330b2dch + DD 0ec52860dh,0ec52860dh + DD 0d0e3c177h,0d0e3c177h + DD 06c16b32bh,06c16b32bh + DD 099b970a9h,099b970a9h + DD 0fa489411h,0fa489411h + DD 02264e947h,02264e947h + DD 0c48cfca8h,0c48cfca8h + DD 01a3ff0a0h,01a3ff0a0h + DD 0d82c7d56h,0d82c7d56h + DD 0ef903322h,0ef903322h + DD 0c74e4987h,0c74e4987h + DD 0c1d138d9h,0c1d138d9h + DD 0fea2ca8ch,0fea2ca8ch + DD 0360bd498h,0360bd498h + DD 0cf81f5a6h,0cf81f5a6h + DD 028de7aa5h,028de7aa5h + DD 0268eb7dah,0268eb7dah + DD 0a4bfad3fh,0a4bfad3fh + DD 0e49d3a2ch,0e49d3a2ch + DD 00d927850h,00d927850h + DD 09bcc5f6ah,09bcc5f6ah + DD 062467e54h,062467e54h + DD 0c2138df6h,0c2138df6h + DD 0e8b8d890h,0e8b8d890h + DD 05ef7392eh,05ef7392eh + DD 0f5afc382h,0f5afc382h + DD 0be805d9fh,0be805d9fh + DD 07c93d069h,07c93d069h + DD 0a92dd56fh,0a92dd56fh + DD 0b31225cfh,0b31225cfh + DD 03b99acc8h,03b99acc8h + DD 0a77d1810h,0a77d1810h + DD 06e639ce8h,06e639ce8h + DD 07bbb3bdbh,07bbb3bdbh + DD 0097826cdh,0097826cdh + DD 0f418596eh,0f418596eh + DD 001b79aech,001b79aech + DD 0a89a4f83h,0a89a4f83h + DD 0656e95e6h,0656e95e6h + DD 07ee6ffaah,07ee6ffaah + DD 008cfbc21h,008cfbc21h + DD 0e6e815efh,0e6e815efh + DD 0d99be7bah,0d99be7bah + DD 0ce366f4ah,0ce366f4ah + DD 0d4099feah,0d4099feah + DD 0d67cb029h,0d67cb029h + DD 0afb2a431h,0afb2a431h + DD 031233f2ah,031233f2ah + DD 03094a5c6h,03094a5c6h + DD 0c066a235h,0c066a235h + DD 037bc4e74h,037bc4e74h + DD 0a6ca82fch,0a6ca82fch + DD 0b0d090e0h,0b0d090e0h + DD 015d8a733h,015d8a733h + DD 04a9804f1h,04a9804f1h + DD 0f7daec41h,0f7daec41h + DD 00e50cd7fh,00e50cd7fh + DD 02ff69117h,02ff69117h + DD 08dd64d76h,08dd64d76h + DD 04db0ef43h,04db0ef43h + DD 0544daacch,0544daacch + DD 0df0496e4h,0df0496e4h + DD 0e3b5d19eh,0e3b5d19eh + DD 01b886a4ch,01b886a4ch + DD 0b81f2cc1h,0b81f2cc1h + DD 07f516546h,07f516546h + DD 004ea5e9dh,004ea5e9dh + DD 05d358c01h,05d358c01h + DD 0737487fah,0737487fah + DD 02e410bfbh,02e410bfbh + DD 05a1d67b3h,05a1d67b3h + DD 052d2db92h,052d2db92h + DD 0335610e9h,0335610e9h + DD 01347d66dh,01347d66dh + DD 08c61d79ah,08c61d79ah + DD 07a0ca137h,07a0ca137h + DD 08e14f859h,08e14f859h + DD 0893c13ebh,0893c13ebh + DD 0ee27a9ceh,0ee27a9ceh + DD 035c961b7h,035c961b7h + DD 0ede51ce1h,0ede51ce1h + DD 03cb1477ah,03cb1477ah + DD 059dfd29ch,059dfd29ch + DD 03f73f255h,03f73f255h + DD 079ce1418h,079ce1418h + DD 0bf37c773h,0bf37c773h + DD 0eacdf753h,0eacdf753h + DD 05baafd5fh,05baafd5fh + DD 0146f3ddfh,0146f3ddfh + DD 086db4478h,086db4478h + DD 081f3afcah,081f3afcah + DD 03ec468b9h,03ec468b9h + DD 02c342438h,02c342438h + DD 05f40a3c2h,05f40a3c2h + DD 072c31d16h,072c31d16h + DD 00c25e2bch,00c25e2bch + DD 08b493c28h,08b493c28h + DD 041950dffh,041950dffh + DD 07101a839h,07101a839h + DD 0deb30c08h,0deb30c08h + DD 09ce4b4d8h,09ce4b4d8h + DD 090c15664h,090c15664h + DD 06184cb7bh,06184cb7bh + DD 070b632d5h,070b632d5h + DD 0745c6c48h,0745c6c48h + DD 04257b8d0h,04257b8d0h +DB 052h,009h,06ah,0d5h,030h,036h,0a5h,038h +DB 0bfh,040h,0a3h,09eh,081h,0f3h,0d7h,0fbh +DB 07ch,0e3h,039h,082h,09bh,02fh,0ffh,087h +DB 034h,08eh,043h,044h,0c4h,0deh,0e9h,0cbh +DB 054h,07bh,094h,032h,0a6h,0c2h,023h,03dh +DB 0eeh,04ch,095h,00bh,042h,0fah,0c3h,04eh +DB 008h,02eh,0a1h,066h,028h,0d9h,024h,0b2h +DB 076h,05bh,0a2h,049h,06dh,08bh,0d1h,025h +DB 072h,0f8h,0f6h,064h,086h,068h,098h,016h +DB 0d4h,0a4h,05ch,0cch,05dh,065h,0b6h,092h +DB 06ch,070h,048h,050h,0fdh,0edh,0b9h,0dah +DB 05eh,015h,046h,057h,0a7h,08dh,09dh,084h +DB 090h,0d8h,0abh,000h,08ch,0bch,0d3h,00ah +DB 0f7h,0e4h,058h,005h,0b8h,0b3h,045h,006h +DB 0d0h,02ch,01eh,08fh,0cah,03fh,00fh,002h +DB 0c1h,0afh,0bdh,003h,001h,013h,08ah,06bh +DB 03ah,091h,011h,041h,04fh,067h,0dch,0eah +DB 097h,0f2h,0cfh,0ceh,0f0h,0b4h,0e6h,073h +DB 096h,0ach,074h,022h,0e7h,0adh,035h,085h +DB 0e2h,0f9h,037h,0e8h,01ch,075h,0dfh,06eh +DB 047h,0f1h,01ah,071h,01dh,029h,0c5h,089h +DB 06fh,0b7h,062h,00eh,0aah,018h,0beh,01bh +DB 0fch,056h,03eh,04bh,0c6h,0d2h,079h,020h +DB 09ah,0dbh,0c0h,0feh,078h,0cdh,05ah,0f4h +DB 01fh,0ddh,0a8h,033h,088h,007h,0c7h,031h +DB 0b1h,012h,010h,059h,027h,080h,0ech,05fh +DB 060h,051h,07fh,0a9h,019h,0b5h,04ah,00dh +DB 02dh,0e5h,07ah,09fh,093h,0c9h,09ch,0efh +DB 0a0h,0e0h,03bh,04dh,0aeh,02ah,0f5h,0b0h +DB 0c8h,0ebh,0bbh,03ch,083h,053h,099h,061h +DB 017h,02bh,004h,07eh,0bah,077h,0d6h,026h +DB 0e1h,069h,014h,063h,055h,021h,00ch,07dh + DD 080808080h,080808080h,0fefefefeh,0fefefefeh + DD 01b1b1b1bh,01b1b1b1bh,0,0 +DB 052h,009h,06ah,0d5h,030h,036h,0a5h,038h +DB 0bfh,040h,0a3h,09eh,081h,0f3h,0d7h,0fbh +DB 07ch,0e3h,039h,082h,09bh,02fh,0ffh,087h +DB 034h,08eh,043h,044h,0c4h,0deh,0e9h,0cbh +DB 054h,07bh,094h,032h,0a6h,0c2h,023h,03dh +DB 0eeh,04ch,095h,00bh,042h,0fah,0c3h,04eh +DB 008h,02eh,0a1h,066h,028h,0d9h,024h,0b2h +DB 076h,05bh,0a2h,049h,06dh,08bh,0d1h,025h +DB 072h,0f8h,0f6h,064h,086h,068h,098h,016h +DB 0d4h,0a4h,05ch,0cch,05dh,065h,0b6h,092h +DB 06ch,070h,048h,050h,0fdh,0edh,0b9h,0dah +DB 05eh,015h,046h,057h,0a7h,08dh,09dh,084h +DB 090h,0d8h,0abh,000h,08ch,0bch,0d3h,00ah +DB 0f7h,0e4h,058h,005h,0b8h,0b3h,045h,006h +DB 0d0h,02ch,01eh,08fh,0cah,03fh,00fh,002h +DB 0c1h,0afh,0bdh,003h,001h,013h,08ah,06bh +DB 03ah,091h,011h,041h,04fh,067h,0dch,0eah +DB 097h,0f2h,0cfh,0ceh,0f0h,0b4h,0e6h,073h +DB 096h,0ach,074h,022h,0e7h,0adh,035h,085h +DB 0e2h,0f9h,037h,0e8h,01ch,075h,0dfh,06eh +DB 047h,0f1h,01ah,071h,01dh,029h,0c5h,089h +DB 06fh,0b7h,062h,00eh,0aah,018h,0beh,01bh +DB 0fch,056h,03eh,04bh,0c6h,0d2h,079h,020h +DB 09ah,0dbh,0c0h,0feh,078h,0cdh,05ah,0f4h +DB 01fh,0ddh,0a8h,033h,088h,007h,0c7h,031h +DB 0b1h,012h,010h,059h,027h,080h,0ech,05fh +DB 060h,051h,07fh,0a9h,019h,0b5h,04ah,00dh +DB 02dh,0e5h,07ah,09fh,093h,0c9h,09ch,0efh +DB 0a0h,0e0h,03bh,04dh,0aeh,02ah,0f5h,0b0h +DB 0c8h,0ebh,0bbh,03ch,083h,053h,099h,061h +DB 017h,02bh,004h,07eh,0bah,077h,0d6h,026h +DB 0e1h,069h,014h,063h,055h,021h,00ch,07dh + DD 080808080h,080808080h,0fefefefeh,0fefefefeh + DD 01b1b1b1bh,01b1b1b1bh,0,0 +DB 052h,009h,06ah,0d5h,030h,036h,0a5h,038h +DB 0bfh,040h,0a3h,09eh,081h,0f3h,0d7h,0fbh +DB 07ch,0e3h,039h,082h,09bh,02fh,0ffh,087h +DB 034h,08eh,043h,044h,0c4h,0deh,0e9h,0cbh +DB 054h,07bh,094h,032h,0a6h,0c2h,023h,03dh +DB 0eeh,04ch,095h,00bh,042h,0fah,0c3h,04eh +DB 008h,02eh,0a1h,066h,028h,0d9h,024h,0b2h +DB 076h,05bh,0a2h,049h,06dh,08bh,0d1h,025h +DB 072h,0f8h,0f6h,064h,086h,068h,098h,016h +DB 0d4h,0a4h,05ch,0cch,05dh,065h,0b6h,092h +DB 06ch,070h,048h,050h,0fdh,0edh,0b9h,0dah +DB 05eh,015h,046h,057h,0a7h,08dh,09dh,084h +DB 090h,0d8h,0abh,000h,08ch,0bch,0d3h,00ah +DB 0f7h,0e4h,058h,005h,0b8h,0b3h,045h,006h +DB 0d0h,02ch,01eh,08fh,0cah,03fh,00fh,002h +DB 0c1h,0afh,0bdh,003h,001h,013h,08ah,06bh +DB 03ah,091h,011h,041h,04fh,067h,0dch,0eah +DB 097h,0f2h,0cfh,0ceh,0f0h,0b4h,0e6h,073h +DB 096h,0ach,074h,022h,0e7h,0adh,035h,085h +DB 0e2h,0f9h,037h,0e8h,01ch,075h,0dfh,06eh +DB 047h,0f1h,01ah,071h,01dh,029h,0c5h,089h +DB 06fh,0b7h,062h,00eh,0aah,018h,0beh,01bh +DB 0fch,056h,03eh,04bh,0c6h,0d2h,079h,020h +DB 09ah,0dbh,0c0h,0feh,078h,0cdh,05ah,0f4h +DB 01fh,0ddh,0a8h,033h,088h,007h,0c7h,031h +DB 0b1h,012h,010h,059h,027h,080h,0ech,05fh +DB 060h,051h,07fh,0a9h,019h,0b5h,04ah,00dh +DB 02dh,0e5h,07ah,09fh,093h,0c9h,09ch,0efh +DB 0a0h,0e0h,03bh,04dh,0aeh,02ah,0f5h,0b0h +DB 0c8h,0ebh,0bbh,03ch,083h,053h,099h,061h +DB 017h,02bh,004h,07eh,0bah,077h,0d6h,026h +DB 0e1h,069h,014h,063h,055h,021h,00ch,07dh + DD 080808080h,080808080h,0fefefefeh,0fefefefeh + DD 01b1b1b1bh,01b1b1b1bh,0,0 +DB 052h,009h,06ah,0d5h,030h,036h,0a5h,038h +DB 0bfh,040h,0a3h,09eh,081h,0f3h,0d7h,0fbh +DB 07ch,0e3h,039h,082h,09bh,02fh,0ffh,087h +DB 034h,08eh,043h,044h,0c4h,0deh,0e9h,0cbh +DB 054h,07bh,094h,032h,0a6h,0c2h,023h,03dh +DB 0eeh,04ch,095h,00bh,042h,0fah,0c3h,04eh +DB 008h,02eh,0a1h,066h,028h,0d9h,024h,0b2h +DB 076h,05bh,0a2h,049h,06dh,08bh,0d1h,025h +DB 072h,0f8h,0f6h,064h,086h,068h,098h,016h +DB 0d4h,0a4h,05ch,0cch,05dh,065h,0b6h,092h +DB 06ch,070h,048h,050h,0fdh,0edh,0b9h,0dah +DB 05eh,015h,046h,057h,0a7h,08dh,09dh,084h +DB 090h,0d8h,0abh,000h,08ch,0bch,0d3h,00ah +DB 0f7h,0e4h,058h,005h,0b8h,0b3h,045h,006h +DB 0d0h,02ch,01eh,08fh,0cah,03fh,00fh,002h +DB 0c1h,0afh,0bdh,003h,001h,013h,08ah,06bh +DB 03ah,091h,011h,041h,04fh,067h,0dch,0eah +DB 097h,0f2h,0cfh,0ceh,0f0h,0b4h,0e6h,073h +DB 096h,0ach,074h,022h,0e7h,0adh,035h,085h +DB 0e2h,0f9h,037h,0e8h,01ch,075h,0dfh,06eh +DB 047h,0f1h,01ah,071h,01dh,029h,0c5h,089h +DB 06fh,0b7h,062h,00eh,0aah,018h,0beh,01bh +DB 0fch,056h,03eh,04bh,0c6h,0d2h,079h,020h +DB 09ah,0dbh,0c0h,0feh,078h,0cdh,05ah,0f4h +DB 01fh,0ddh,0a8h,033h,088h,007h,0c7h,031h +DB 0b1h,012h,010h,059h,027h,080h,0ech,05fh +DB 060h,051h,07fh,0a9h,019h,0b5h,04ah,00dh +DB 02dh,0e5h,07ah,09fh,093h,0c9h,09ch,0efh +DB 0a0h,0e0h,03bh,04dh,0aeh,02ah,0f5h,0b0h +DB 0c8h,0ebh,0bbh,03ch,083h,053h,099h,061h +DB 017h,02bh,004h,07eh,0bah,077h,0d6h,026h +DB 0e1h,069h,014h,063h,055h,021h,00ch,07dh + DD 080808080h,080808080h,0fefefefeh,0fefefefeh + DD 01b1b1b1bh,01b1b1b1bh,0,0 +DB 65,69,83,32,102,111,114,32,120,56,54,95,54,52,44,32 +DB 67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97 +DB 112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103 +DB 62,0 +ALIGN 64 +EXTERN __imp_RtlVirtualUnwind:NEAR + +ALIGN 16 +block_se_handler PROC PRIVATE + push rsi + push rdi + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + pushfq + sub rsp,64 + + mov rax,QWORD PTR[120+r8] + mov rbx,QWORD PTR[248+r8] + + mov rsi,QWORD PTR[8+r9] + mov r11,QWORD PTR[56+r9] + + mov r10d,DWORD PTR[r11] + lea r10,QWORD PTR[r10*1+rsi] + cmp rbx,r10 + jb $L$in_block_prologue + + mov rax,QWORD PTR[152+r8] + + mov r10d,DWORD PTR[4+r11] + lea r10,QWORD PTR[r10*1+rsi] + cmp rbx,r10 + jae $L$in_block_prologue + + mov rax,QWORD PTR[24+rax] + lea rax,QWORD PTR[48+rax] + + mov rbx,QWORD PTR[((-8))+rax] + mov rbp,QWORD PTR[((-16))+rax] + mov r12,QWORD PTR[((-24))+rax] + mov r13,QWORD PTR[((-32))+rax] + mov r14,QWORD PTR[((-40))+rax] + mov r15,QWORD PTR[((-48))+rax] + mov QWORD PTR[144+r8],rbx + mov QWORD PTR[160+r8],rbp + mov QWORD PTR[216+r8],r12 + mov QWORD PTR[224+r8],r13 + mov QWORD PTR[232+r8],r14 + mov QWORD PTR[240+r8],r15 + +$L$in_block_prologue:: + mov rdi,QWORD PTR[8+rax] + mov rsi,QWORD PTR[16+rax] + mov QWORD PTR[152+r8],rax + mov QWORD PTR[168+r8],rsi + mov QWORD PTR[176+r8],rdi + + jmp $L$common_seh_exit +block_se_handler ENDP + + +ALIGN 16 +key_se_handler PROC PRIVATE + push rsi + push rdi + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + pushfq + sub rsp,64 + + mov rax,QWORD PTR[120+r8] + mov rbx,QWORD PTR[248+r8] + + mov rsi,QWORD PTR[8+r9] + mov r11,QWORD PTR[56+r9] + + mov r10d,DWORD PTR[r11] + lea r10,QWORD PTR[r10*1+rsi] + cmp rbx,r10 + jb $L$in_key_prologue + + mov rax,QWORD PTR[152+r8] + + mov r10d,DWORD PTR[4+r11] + lea r10,QWORD PTR[r10*1+rsi] + cmp rbx,r10 + jae $L$in_key_prologue + + lea rax,QWORD PTR[56+rax] + + mov rbx,QWORD PTR[((-8))+rax] + mov rbp,QWORD PTR[((-16))+rax] + mov r12,QWORD PTR[((-24))+rax] + mov r13,QWORD PTR[((-32))+rax] + mov r14,QWORD PTR[((-40))+rax] + mov r15,QWORD PTR[((-48))+rax] + mov QWORD PTR[144+r8],rbx + mov QWORD PTR[160+r8],rbp + mov QWORD PTR[216+r8],r12 + mov QWORD PTR[224+r8],r13 + mov QWORD PTR[232+r8],r14 + mov QWORD PTR[240+r8],r15 + +$L$in_key_prologue:: + mov rdi,QWORD PTR[8+rax] + mov rsi,QWORD PTR[16+rax] + mov QWORD PTR[152+r8],rax + mov QWORD PTR[168+r8],rsi + mov QWORD PTR[176+r8],rdi + + jmp $L$common_seh_exit +key_se_handler ENDP + + +ALIGN 16 +cbc_se_handler PROC PRIVATE + push rsi + push rdi + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + pushfq + sub rsp,64 + + mov rax,QWORD PTR[120+r8] + mov rbx,QWORD PTR[248+r8] + + lea r10,QWORD PTR[$L$cbc_prologue] + cmp rbx,r10 + jb $L$in_cbc_prologue + + lea r10,QWORD PTR[$L$cbc_fast_body] + cmp rbx,r10 + jb $L$in_cbc_frame_setup + + lea r10,QWORD PTR[$L$cbc_slow_prologue] + cmp rbx,r10 + jb $L$in_cbc_body + + lea r10,QWORD PTR[$L$cbc_slow_body] + cmp rbx,r10 + jb $L$in_cbc_frame_setup + +$L$in_cbc_body:: + mov rax,QWORD PTR[152+r8] + + lea r10,QWORD PTR[$L$cbc_epilogue] + cmp rbx,r10 + jae $L$in_cbc_prologue + + lea rax,QWORD PTR[8+rax] + + lea r10,QWORD PTR[$L$cbc_popfq] + cmp rbx,r10 + jae $L$in_cbc_prologue + + mov rax,QWORD PTR[8+rax] + lea rax,QWORD PTR[56+rax] + +$L$in_cbc_frame_setup:: + mov rbx,QWORD PTR[((-16))+rax] + mov rbp,QWORD PTR[((-24))+rax] + mov r12,QWORD PTR[((-32))+rax] + mov r13,QWORD PTR[((-40))+rax] + mov r14,QWORD PTR[((-48))+rax] + mov r15,QWORD PTR[((-56))+rax] + mov QWORD PTR[144+r8],rbx + mov QWORD PTR[160+r8],rbp + mov QWORD PTR[216+r8],r12 + mov QWORD PTR[224+r8],r13 + mov QWORD PTR[232+r8],r14 + mov QWORD PTR[240+r8],r15 + +$L$in_cbc_prologue:: + mov rdi,QWORD PTR[8+rax] + mov rsi,QWORD PTR[16+rax] + mov QWORD PTR[152+r8],rax + mov QWORD PTR[168+r8],rsi + mov QWORD PTR[176+r8],rdi + +$L$common_seh_exit:: + + mov rdi,QWORD PTR[40+r9] + mov rsi,r8 + mov ecx,154 + DD 0a548f3fch + + mov rsi,r9 + xor rcx,rcx + mov rdx,QWORD PTR[8+rsi] + mov r8,QWORD PTR[rsi] + mov r9,QWORD PTR[16+rsi] + mov r10,QWORD PTR[40+rsi] + lea r11,QWORD PTR[56+rsi] + lea r12,QWORD PTR[24+rsi] + mov QWORD PTR[32+rsp],r10 + mov QWORD PTR[40+rsp],r11 + mov QWORD PTR[48+rsp],r12 + mov QWORD PTR[56+rsp],rcx + call QWORD PTR[__imp_RtlVirtualUnwind] + + mov eax,1 + add rsp,64 + popfq + pop r15 + pop r14 + pop r13 + pop r12 + pop rbp + pop rbx + pop rdi + pop rsi + DB 0F3h,0C3h ;repret +cbc_se_handler ENDP + +.text$ ENDS +.pdata SEGMENT READONLY ALIGN(4) +ALIGN 4 + DD imagerel $L$SEH_begin_AES_encrypt + DD imagerel $L$SEH_end_AES_encrypt + DD imagerel $L$SEH_info_AES_encrypt + + DD imagerel $L$SEH_begin_AES_decrypt + DD imagerel $L$SEH_end_AES_decrypt + DD imagerel $L$SEH_info_AES_decrypt + + DD imagerel $L$SEH_begin_private_AES_set_encrypt_key + DD imagerel $L$SEH_end_private_AES_set_encrypt_key + DD imagerel $L$SEH_info_private_AES_set_encrypt_key + + DD imagerel $L$SEH_begin_private_AES_set_decrypt_key + DD imagerel $L$SEH_end_private_AES_set_decrypt_key + DD imagerel $L$SEH_info_private_AES_set_decrypt_key + + DD imagerel $L$SEH_begin_AES_cbc_encrypt + DD imagerel $L$SEH_end_AES_cbc_encrypt + DD imagerel $L$SEH_info_AES_cbc_encrypt + +.pdata ENDS +.xdata SEGMENT READONLY ALIGN(8) +ALIGN 8 +$L$SEH_info_AES_encrypt:: +DB 9,0,0,0 + DD imagerel block_se_handler + DD imagerel $L$enc_prologue,imagerel $L$enc_epilogue +$L$SEH_info_AES_decrypt:: +DB 9,0,0,0 + DD imagerel block_se_handler + DD imagerel $L$dec_prologue,imagerel $L$dec_epilogue +$L$SEH_info_private_AES_set_encrypt_key:: +DB 9,0,0,0 + DD imagerel key_se_handler + DD imagerel $L$enc_key_prologue,imagerel $L$enc_key_epilogue +$L$SEH_info_private_AES_set_decrypt_key:: +DB 9,0,0,0 + DD imagerel key_se_handler + DD imagerel $L$dec_key_prologue,imagerel $L$dec_key_epilogue +$L$SEH_info_AES_cbc_encrypt:: +DB 9,0,0,0 + DD imagerel cbc_se_handler + +.xdata ENDS +END diff --git a/deps/openssl/asm_obsolete/x64-win32-masm/aes/aesni-mb-x86_64.asm b/deps/openssl/asm_obsolete/x64-win32-masm/aes/aesni-mb-x86_64.asm new file mode 100644 index 00000000000000..be408eec687ddb --- /dev/null +++ b/deps/openssl/asm_obsolete/x64-win32-masm/aes/aesni-mb-x86_64.asm @@ -0,0 +1,689 @@ +OPTION DOTNAME +.text$ SEGMENT ALIGN(256) 'CODE' + +EXTERN OPENSSL_ia32cap_P:NEAR + +PUBLIC aesni_multi_cbc_encrypt + +ALIGN 32 +aesni_multi_cbc_encrypt PROC PUBLIC + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_aesni_multi_cbc_encrypt:: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + + + mov rax,rsp + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + lea rsp,QWORD PTR[((-168))+rsp] + movaps XMMWORD PTR[rsp],xmm6 + movaps XMMWORD PTR[16+rsp],xmm7 + movaps XMMWORD PTR[32+rsp],xmm8 + movaps XMMWORD PTR[48+rsp],xmm9 + movaps XMMWORD PTR[64+rsp],xmm10 + movaps XMMWORD PTR[80+rsp],xmm11 + movaps XMMWORD PTR[96+rsp],xmm12 + movaps XMMWORD PTR[(-104)+rax],xmm13 + movaps XMMWORD PTR[(-88)+rax],xmm14 + movaps XMMWORD PTR[(-72)+rax],xmm15 + + + + + + + sub rsp,48 + and rsp,-64 + mov QWORD PTR[16+rsp],rax + +$L$enc4x_body:: + movdqu xmm12,XMMWORD PTR[rsi] + lea rsi,QWORD PTR[120+rsi] + lea rdi,QWORD PTR[80+rdi] + +$L$enc4x_loop_grande:: + mov DWORD PTR[24+rsp],edx + xor edx,edx + mov ecx,DWORD PTR[((-64))+rdi] + mov r8,QWORD PTR[((-80))+rdi] + cmp ecx,edx + mov r12,QWORD PTR[((-72))+rdi] + cmovg edx,ecx + test ecx,ecx + movdqu xmm2,XMMWORD PTR[((-56))+rdi] + mov DWORD PTR[32+rsp],ecx + cmovle r8,rsp + mov ecx,DWORD PTR[((-24))+rdi] + mov r9,QWORD PTR[((-40))+rdi] + cmp ecx,edx + mov r13,QWORD PTR[((-32))+rdi] + cmovg edx,ecx + test ecx,ecx + movdqu xmm3,XMMWORD PTR[((-16))+rdi] + mov DWORD PTR[36+rsp],ecx + cmovle r9,rsp + mov ecx,DWORD PTR[16+rdi] + mov r10,QWORD PTR[rdi] + cmp ecx,edx + mov r14,QWORD PTR[8+rdi] + cmovg edx,ecx + test ecx,ecx + movdqu xmm4,XMMWORD PTR[24+rdi] + mov DWORD PTR[40+rsp],ecx + cmovle r10,rsp + mov ecx,DWORD PTR[56+rdi] + mov r11,QWORD PTR[40+rdi] + cmp ecx,edx + mov r15,QWORD PTR[48+rdi] + cmovg edx,ecx + test ecx,ecx + movdqu xmm5,XMMWORD PTR[64+rdi] + mov DWORD PTR[44+rsp],ecx + cmovle r11,rsp + test edx,edx + jz $L$enc4x_done + + movups xmm1,XMMWORD PTR[((16-120))+rsi] + pxor xmm2,xmm12 + movups xmm0,XMMWORD PTR[((32-120))+rsi] + pxor xmm3,xmm12 + mov eax,DWORD PTR[((240-120))+rsi] + pxor xmm4,xmm12 + movdqu xmm6,XMMWORD PTR[r8] + pxor xmm5,xmm12 + movdqu xmm7,XMMWORD PTR[r9] + pxor xmm2,xmm6 + movdqu xmm8,XMMWORD PTR[r10] + pxor xmm3,xmm7 + movdqu xmm9,XMMWORD PTR[r11] + pxor xmm4,xmm8 + pxor xmm5,xmm9 + movdqa xmm10,XMMWORD PTR[32+rsp] + xor rbx,rbx + jmp $L$oop_enc4x + +ALIGN 32 +$L$oop_enc4x:: + add rbx,16 + lea rbp,QWORD PTR[16+rsp] + mov ecx,1 + sub rbp,rbx + +DB 102,15,56,220,209 + prefetcht0 [31+rbx*1+r8] + prefetcht0 [31+rbx*1+r9] +DB 102,15,56,220,217 + prefetcht0 [31+rbx*1+r10] + prefetcht0 [31+rbx*1+r10] +DB 102,15,56,220,225 +DB 102,15,56,220,233 + movups xmm1,XMMWORD PTR[((48-120))+rsi] + cmp ecx,DWORD PTR[32+rsp] +DB 102,15,56,220,208 +DB 102,15,56,220,216 +DB 102,15,56,220,224 + cmovge r8,rbp + cmovg r12,rbp +DB 102,15,56,220,232 + movups xmm0,XMMWORD PTR[((-56))+rsi] + cmp ecx,DWORD PTR[36+rsp] +DB 102,15,56,220,209 +DB 102,15,56,220,217 +DB 102,15,56,220,225 + cmovge r9,rbp + cmovg r13,rbp +DB 102,15,56,220,233 + movups xmm1,XMMWORD PTR[((-40))+rsi] + cmp ecx,DWORD PTR[40+rsp] +DB 102,15,56,220,208 +DB 102,15,56,220,216 +DB 102,15,56,220,224 + cmovge r10,rbp + cmovg r14,rbp +DB 102,15,56,220,232 + movups xmm0,XMMWORD PTR[((-24))+rsi] + cmp ecx,DWORD PTR[44+rsp] +DB 102,15,56,220,209 +DB 102,15,56,220,217 +DB 102,15,56,220,225 + cmovge r11,rbp + cmovg r15,rbp +DB 102,15,56,220,233 + movups xmm1,XMMWORD PTR[((-8))+rsi] + movdqa xmm11,xmm10 +DB 102,15,56,220,208 + prefetcht0 [15+rbx*1+r12] + prefetcht0 [15+rbx*1+r13] +DB 102,15,56,220,216 + prefetcht0 [15+rbx*1+r14] + prefetcht0 [15+rbx*1+r15] +DB 102,15,56,220,224 +DB 102,15,56,220,232 + movups xmm0,XMMWORD PTR[((128-120))+rsi] + pxor xmm12,xmm12 + +DB 102,15,56,220,209 + pcmpgtd xmm11,xmm12 + movdqu xmm12,XMMWORD PTR[((-120))+rsi] +DB 102,15,56,220,217 + paddd xmm10,xmm11 + movdqa XMMWORD PTR[32+rsp],xmm10 +DB 102,15,56,220,225 +DB 102,15,56,220,233 + movups xmm1,XMMWORD PTR[((144-120))+rsi] + + cmp eax,11 + +DB 102,15,56,220,208 +DB 102,15,56,220,216 +DB 102,15,56,220,224 +DB 102,15,56,220,232 + movups xmm0,XMMWORD PTR[((160-120))+rsi] + + jb $L$enc4x_tail + +DB 102,15,56,220,209 +DB 102,15,56,220,217 +DB 102,15,56,220,225 +DB 102,15,56,220,233 + movups xmm1,XMMWORD PTR[((176-120))+rsi] + +DB 102,15,56,220,208 +DB 102,15,56,220,216 +DB 102,15,56,220,224 +DB 102,15,56,220,232 + movups xmm0,XMMWORD PTR[((192-120))+rsi] + + je $L$enc4x_tail + +DB 102,15,56,220,209 +DB 102,15,56,220,217 +DB 102,15,56,220,225 +DB 102,15,56,220,233 + movups xmm1,XMMWORD PTR[((208-120))+rsi] + +DB 102,15,56,220,208 +DB 102,15,56,220,216 +DB 102,15,56,220,224 +DB 102,15,56,220,232 + movups xmm0,XMMWORD PTR[((224-120))+rsi] + jmp $L$enc4x_tail + +ALIGN 32 +$L$enc4x_tail:: +DB 102,15,56,220,209 +DB 102,15,56,220,217 +DB 102,15,56,220,225 +DB 102,15,56,220,233 + movdqu xmm6,XMMWORD PTR[rbx*1+r8] + movdqu xmm1,XMMWORD PTR[((16-120))+rsi] + +DB 102,15,56,221,208 + movdqu xmm7,XMMWORD PTR[rbx*1+r9] + pxor xmm6,xmm12 +DB 102,15,56,221,216 + movdqu xmm8,XMMWORD PTR[rbx*1+r10] + pxor xmm7,xmm12 +DB 102,15,56,221,224 + movdqu xmm9,XMMWORD PTR[rbx*1+r11] + pxor xmm8,xmm12 +DB 102,15,56,221,232 + movdqu xmm0,XMMWORD PTR[((32-120))+rsi] + pxor xmm9,xmm12 + + movups XMMWORD PTR[(-16)+rbx*1+r12],xmm2 + pxor xmm2,xmm6 + movups XMMWORD PTR[(-16)+rbx*1+r13],xmm3 + pxor xmm3,xmm7 + movups XMMWORD PTR[(-16)+rbx*1+r14],xmm4 + pxor xmm4,xmm8 + movups XMMWORD PTR[(-16)+rbx*1+r15],xmm5 + pxor xmm5,xmm9 + + dec edx + jnz $L$oop_enc4x + + mov rax,QWORD PTR[16+rsp] + mov edx,DWORD PTR[24+rsp] + + + + + + + + + + + lea rdi,QWORD PTR[160+rdi] + dec edx + jnz $L$enc4x_loop_grande + +$L$enc4x_done:: + movaps xmm6,XMMWORD PTR[((-216))+rax] + movaps xmm7,XMMWORD PTR[((-200))+rax] + movaps xmm8,XMMWORD PTR[((-184))+rax] + movaps xmm9,XMMWORD PTR[((-168))+rax] + movaps xmm10,XMMWORD PTR[((-152))+rax] + movaps xmm11,XMMWORD PTR[((-136))+rax] + movaps xmm12,XMMWORD PTR[((-120))+rax] + + + + mov r15,QWORD PTR[((-48))+rax] + mov r14,QWORD PTR[((-40))+rax] + mov r13,QWORD PTR[((-32))+rax] + mov r12,QWORD PTR[((-24))+rax] + mov rbp,QWORD PTR[((-16))+rax] + mov rbx,QWORD PTR[((-8))+rax] + lea rsp,QWORD PTR[rax] +$L$enc4x_epilogue:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_aesni_multi_cbc_encrypt:: +aesni_multi_cbc_encrypt ENDP + +PUBLIC aesni_multi_cbc_decrypt + +ALIGN 32 +aesni_multi_cbc_decrypt PROC PUBLIC + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_aesni_multi_cbc_decrypt:: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + + + mov rax,rsp + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + lea rsp,QWORD PTR[((-168))+rsp] + movaps XMMWORD PTR[rsp],xmm6 + movaps XMMWORD PTR[16+rsp],xmm7 + movaps XMMWORD PTR[32+rsp],xmm8 + movaps XMMWORD PTR[48+rsp],xmm9 + movaps XMMWORD PTR[64+rsp],xmm10 + movaps XMMWORD PTR[80+rsp],xmm11 + movaps XMMWORD PTR[96+rsp],xmm12 + movaps XMMWORD PTR[(-104)+rax],xmm13 + movaps XMMWORD PTR[(-88)+rax],xmm14 + movaps XMMWORD PTR[(-72)+rax],xmm15 + + + + + + + sub rsp,48 + and rsp,-64 + mov QWORD PTR[16+rsp],rax + +$L$dec4x_body:: + movdqu xmm12,XMMWORD PTR[rsi] + lea rsi,QWORD PTR[120+rsi] + lea rdi,QWORD PTR[80+rdi] + +$L$dec4x_loop_grande:: + mov DWORD PTR[24+rsp],edx + xor edx,edx + mov ecx,DWORD PTR[((-64))+rdi] + mov r8,QWORD PTR[((-80))+rdi] + cmp ecx,edx + mov r12,QWORD PTR[((-72))+rdi] + cmovg edx,ecx + test ecx,ecx + movdqu xmm6,XMMWORD PTR[((-56))+rdi] + mov DWORD PTR[32+rsp],ecx + cmovle r8,rsp + mov ecx,DWORD PTR[((-24))+rdi] + mov r9,QWORD PTR[((-40))+rdi] + cmp ecx,edx + mov r13,QWORD PTR[((-32))+rdi] + cmovg edx,ecx + test ecx,ecx + movdqu xmm7,XMMWORD PTR[((-16))+rdi] + mov DWORD PTR[36+rsp],ecx + cmovle r9,rsp + mov ecx,DWORD PTR[16+rdi] + mov r10,QWORD PTR[rdi] + cmp ecx,edx + mov r14,QWORD PTR[8+rdi] + cmovg edx,ecx + test ecx,ecx + movdqu xmm8,XMMWORD PTR[24+rdi] + mov DWORD PTR[40+rsp],ecx + cmovle r10,rsp + mov ecx,DWORD PTR[56+rdi] + mov r11,QWORD PTR[40+rdi] + cmp ecx,edx + mov r15,QWORD PTR[48+rdi] + cmovg edx,ecx + test ecx,ecx + movdqu xmm9,XMMWORD PTR[64+rdi] + mov DWORD PTR[44+rsp],ecx + cmovle r11,rsp + test edx,edx + jz $L$dec4x_done + + movups xmm1,XMMWORD PTR[((16-120))+rsi] + movups xmm0,XMMWORD PTR[((32-120))+rsi] + mov eax,DWORD PTR[((240-120))+rsi] + movdqu xmm2,XMMWORD PTR[r8] + movdqu xmm3,XMMWORD PTR[r9] + pxor xmm2,xmm12 + movdqu xmm4,XMMWORD PTR[r10] + pxor xmm3,xmm12 + movdqu xmm5,XMMWORD PTR[r11] + pxor xmm4,xmm12 + pxor xmm5,xmm12 + movdqa xmm10,XMMWORD PTR[32+rsp] + xor rbx,rbx + jmp $L$oop_dec4x + +ALIGN 32 +$L$oop_dec4x:: + add rbx,16 + lea rbp,QWORD PTR[16+rsp] + mov ecx,1 + sub rbp,rbx + +DB 102,15,56,222,209 + prefetcht0 [31+rbx*1+r8] + prefetcht0 [31+rbx*1+r9] +DB 102,15,56,222,217 + prefetcht0 [31+rbx*1+r10] + prefetcht0 [31+rbx*1+r11] +DB 102,15,56,222,225 +DB 102,15,56,222,233 + movups xmm1,XMMWORD PTR[((48-120))+rsi] + cmp ecx,DWORD PTR[32+rsp] +DB 102,15,56,222,208 +DB 102,15,56,222,216 +DB 102,15,56,222,224 + cmovge r8,rbp + cmovg r12,rbp +DB 102,15,56,222,232 + movups xmm0,XMMWORD PTR[((-56))+rsi] + cmp ecx,DWORD PTR[36+rsp] +DB 102,15,56,222,209 +DB 102,15,56,222,217 +DB 102,15,56,222,225 + cmovge r9,rbp + cmovg r13,rbp +DB 102,15,56,222,233 + movups xmm1,XMMWORD PTR[((-40))+rsi] + cmp ecx,DWORD PTR[40+rsp] +DB 102,15,56,222,208 +DB 102,15,56,222,216 +DB 102,15,56,222,224 + cmovge r10,rbp + cmovg r14,rbp +DB 102,15,56,222,232 + movups xmm0,XMMWORD PTR[((-24))+rsi] + cmp ecx,DWORD PTR[44+rsp] +DB 102,15,56,222,209 +DB 102,15,56,222,217 +DB 102,15,56,222,225 + cmovge r11,rbp + cmovg r15,rbp +DB 102,15,56,222,233 + movups xmm1,XMMWORD PTR[((-8))+rsi] + movdqa xmm11,xmm10 +DB 102,15,56,222,208 + prefetcht0 [15+rbx*1+r12] + prefetcht0 [15+rbx*1+r13] +DB 102,15,56,222,216 + prefetcht0 [15+rbx*1+r14] + prefetcht0 [15+rbx*1+r15] +DB 102,15,56,222,224 +DB 102,15,56,222,232 + movups xmm0,XMMWORD PTR[((128-120))+rsi] + pxor xmm12,xmm12 + +DB 102,15,56,222,209 + pcmpgtd xmm11,xmm12 + movdqu xmm12,XMMWORD PTR[((-120))+rsi] +DB 102,15,56,222,217 + paddd xmm10,xmm11 + movdqa XMMWORD PTR[32+rsp],xmm10 +DB 102,15,56,222,225 +DB 102,15,56,222,233 + movups xmm1,XMMWORD PTR[((144-120))+rsi] + + cmp eax,11 + +DB 102,15,56,222,208 +DB 102,15,56,222,216 +DB 102,15,56,222,224 +DB 102,15,56,222,232 + movups xmm0,XMMWORD PTR[((160-120))+rsi] + + jb $L$dec4x_tail + +DB 102,15,56,222,209 +DB 102,15,56,222,217 +DB 102,15,56,222,225 +DB 102,15,56,222,233 + movups xmm1,XMMWORD PTR[((176-120))+rsi] + +DB 102,15,56,222,208 +DB 102,15,56,222,216 +DB 102,15,56,222,224 +DB 102,15,56,222,232 + movups xmm0,XMMWORD PTR[((192-120))+rsi] + + je $L$dec4x_tail + +DB 102,15,56,222,209 +DB 102,15,56,222,217 +DB 102,15,56,222,225 +DB 102,15,56,222,233 + movups xmm1,XMMWORD PTR[((208-120))+rsi] + +DB 102,15,56,222,208 +DB 102,15,56,222,216 +DB 102,15,56,222,224 +DB 102,15,56,222,232 + movups xmm0,XMMWORD PTR[((224-120))+rsi] + jmp $L$dec4x_tail + +ALIGN 32 +$L$dec4x_tail:: +DB 102,15,56,222,209 +DB 102,15,56,222,217 +DB 102,15,56,222,225 + pxor xmm6,xmm0 + pxor xmm7,xmm0 +DB 102,15,56,222,233 + movdqu xmm1,XMMWORD PTR[((16-120))+rsi] + pxor xmm8,xmm0 + pxor xmm9,xmm0 + movdqu xmm0,XMMWORD PTR[((32-120))+rsi] + +DB 102,15,56,223,214 +DB 102,15,56,223,223 + movdqu xmm6,XMMWORD PTR[((-16))+rbx*1+r8] + movdqu xmm7,XMMWORD PTR[((-16))+rbx*1+r9] +DB 102,65,15,56,223,224 +DB 102,65,15,56,223,233 + movdqu xmm8,XMMWORD PTR[((-16))+rbx*1+r10] + movdqu xmm9,XMMWORD PTR[((-16))+rbx*1+r11] + + movups XMMWORD PTR[(-16)+rbx*1+r12],xmm2 + movdqu xmm2,XMMWORD PTR[rbx*1+r8] + movups XMMWORD PTR[(-16)+rbx*1+r13],xmm3 + movdqu xmm3,XMMWORD PTR[rbx*1+r9] + pxor xmm2,xmm12 + movups XMMWORD PTR[(-16)+rbx*1+r14],xmm4 + movdqu xmm4,XMMWORD PTR[rbx*1+r10] + pxor xmm3,xmm12 + movups XMMWORD PTR[(-16)+rbx*1+r15],xmm5 + movdqu xmm5,XMMWORD PTR[rbx*1+r11] + pxor xmm4,xmm12 + pxor xmm5,xmm12 + + dec edx + jnz $L$oop_dec4x + + mov rax,QWORD PTR[16+rsp] + mov edx,DWORD PTR[24+rsp] + + lea rdi,QWORD PTR[160+rdi] + dec edx + jnz $L$dec4x_loop_grande + +$L$dec4x_done:: + movaps xmm6,XMMWORD PTR[((-216))+rax] + movaps xmm7,XMMWORD PTR[((-200))+rax] + movaps xmm8,XMMWORD PTR[((-184))+rax] + movaps xmm9,XMMWORD PTR[((-168))+rax] + movaps xmm10,XMMWORD PTR[((-152))+rax] + movaps xmm11,XMMWORD PTR[((-136))+rax] + movaps xmm12,XMMWORD PTR[((-120))+rax] + + + + mov r15,QWORD PTR[((-48))+rax] + mov r14,QWORD PTR[((-40))+rax] + mov r13,QWORD PTR[((-32))+rax] + mov r12,QWORD PTR[((-24))+rax] + mov rbp,QWORD PTR[((-16))+rax] + mov rbx,QWORD PTR[((-8))+rax] + lea rsp,QWORD PTR[rax] +$L$dec4x_epilogue:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_aesni_multi_cbc_decrypt:: +aesni_multi_cbc_decrypt ENDP +EXTERN __imp_RtlVirtualUnwind:NEAR + +ALIGN 16 +se_handler PROC PRIVATE + push rsi + push rdi + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + pushfq + sub rsp,64 + + mov rax,QWORD PTR[120+r8] + mov rbx,QWORD PTR[248+r8] + + mov rsi,QWORD PTR[8+r9] + mov r11,QWORD PTR[56+r9] + + mov r10d,DWORD PTR[r11] + lea r10,QWORD PTR[r10*1+rsi] + cmp rbx,r10 + jb $L$in_prologue + + mov rax,QWORD PTR[152+r8] + + mov r10d,DWORD PTR[4+r11] + lea r10,QWORD PTR[r10*1+rsi] + cmp rbx,r10 + jae $L$in_prologue + + mov rax,QWORD PTR[16+rax] + + mov rbx,QWORD PTR[((-8))+rax] + mov rbp,QWORD PTR[((-16))+rax] + mov r12,QWORD PTR[((-24))+rax] + mov r13,QWORD PTR[((-32))+rax] + mov r14,QWORD PTR[((-40))+rax] + mov r15,QWORD PTR[((-48))+rax] + mov QWORD PTR[144+r8],rbx + mov QWORD PTR[160+r8],rbp + mov QWORD PTR[216+r8],r12 + mov QWORD PTR[224+r8],r13 + mov QWORD PTR[232+r8],r14 + mov QWORD PTR[240+r8],r15 + + lea rsi,QWORD PTR[((-56-160))+rax] + lea rdi,QWORD PTR[512+r8] + mov ecx,20 + DD 0a548f3fch + +$L$in_prologue:: + mov rdi,QWORD PTR[8+rax] + mov rsi,QWORD PTR[16+rax] + mov QWORD PTR[152+r8],rax + mov QWORD PTR[168+r8],rsi + mov QWORD PTR[176+r8],rdi + + mov rdi,QWORD PTR[40+r9] + mov rsi,r8 + mov ecx,154 + DD 0a548f3fch + + mov rsi,r9 + xor rcx,rcx + mov rdx,QWORD PTR[8+rsi] + mov r8,QWORD PTR[rsi] + mov r9,QWORD PTR[16+rsi] + mov r10,QWORD PTR[40+rsi] + lea r11,QWORD PTR[56+rsi] + lea r12,QWORD PTR[24+rsi] + mov QWORD PTR[32+rsp],r10 + mov QWORD PTR[40+rsp],r11 + mov QWORD PTR[48+rsp],r12 + mov QWORD PTR[56+rsp],rcx + call QWORD PTR[__imp_RtlVirtualUnwind] + + mov eax,1 + add rsp,64 + popfq + pop r15 + pop r14 + pop r13 + pop r12 + pop rbp + pop rbx + pop rdi + pop rsi + DB 0F3h,0C3h ;repret +se_handler ENDP + +.text$ ENDS +.pdata SEGMENT READONLY ALIGN(4) +ALIGN 4 + DD imagerel $L$SEH_begin_aesni_multi_cbc_encrypt + DD imagerel $L$SEH_end_aesni_multi_cbc_encrypt + DD imagerel $L$SEH_info_aesni_multi_cbc_encrypt + DD imagerel $L$SEH_begin_aesni_multi_cbc_decrypt + DD imagerel $L$SEH_end_aesni_multi_cbc_decrypt + DD imagerel $L$SEH_info_aesni_multi_cbc_decrypt +.pdata ENDS +.xdata SEGMENT READONLY ALIGN(8) +ALIGN 8 +$L$SEH_info_aesni_multi_cbc_encrypt:: +DB 9,0,0,0 + DD imagerel se_handler + DD imagerel $L$enc4x_body,imagerel $L$enc4x_epilogue +$L$SEH_info_aesni_multi_cbc_decrypt:: +DB 9,0,0,0 + DD imagerel se_handler + DD imagerel $L$dec4x_body,imagerel $L$dec4x_epilogue + +.xdata ENDS +END diff --git a/deps/openssl/asm_obsolete/x64-win32-masm/aes/aesni-sha1-x86_64.asm b/deps/openssl/asm_obsolete/x64-win32-masm/aes/aesni-sha1-x86_64.asm new file mode 100644 index 00000000000000..bfc5fa17b4aa70 --- /dev/null +++ b/deps/openssl/asm_obsolete/x64-win32-masm/aes/aesni-sha1-x86_64.asm @@ -0,0 +1,1887 @@ +OPTION DOTNAME +.text$ SEGMENT ALIGN(256) 'CODE' +EXTERN OPENSSL_ia32cap_P:NEAR + +PUBLIC aesni_cbc_sha1_enc + +ALIGN 32 +aesni_cbc_sha1_enc PROC PUBLIC + + mov r10d,DWORD PTR[((OPENSSL_ia32cap_P+0))] + mov r11,QWORD PTR[((OPENSSL_ia32cap_P+4))] + bt r11,61 + jc aesni_cbc_sha1_enc_shaext + jmp aesni_cbc_sha1_enc_ssse3 + DB 0F3h,0C3h ;repret +aesni_cbc_sha1_enc ENDP + +ALIGN 32 +aesni_cbc_sha1_enc_ssse3 PROC PRIVATE + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_aesni_cbc_sha1_enc_ssse3:: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + mov rcx,r9 + mov r8,QWORD PTR[40+rsp] + mov r9,QWORD PTR[48+rsp] + + + mov r10,QWORD PTR[56+rsp] + + + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + lea rsp,QWORD PTR[((-264))+rsp] + + + movaps XMMWORD PTR[(96+0)+rsp],xmm6 + movaps XMMWORD PTR[(96+16)+rsp],xmm7 + movaps XMMWORD PTR[(96+32)+rsp],xmm8 + movaps XMMWORD PTR[(96+48)+rsp],xmm9 + movaps XMMWORD PTR[(96+64)+rsp],xmm10 + movaps XMMWORD PTR[(96+80)+rsp],xmm11 + movaps XMMWORD PTR[(96+96)+rsp],xmm12 + movaps XMMWORD PTR[(96+112)+rsp],xmm13 + movaps XMMWORD PTR[(96+128)+rsp],xmm14 + movaps XMMWORD PTR[(96+144)+rsp],xmm15 +$L$prologue_ssse3:: + mov r12,rdi + mov r13,rsi + mov r14,rdx + lea r15,QWORD PTR[112+rcx] + movdqu xmm2,XMMWORD PTR[r8] + mov QWORD PTR[88+rsp],r8 + shl r14,6 + sub r13,r12 + mov r8d,DWORD PTR[((240-112))+r15] + add r14,r10 + + lea r11,QWORD PTR[K_XX_XX] + mov eax,DWORD PTR[r9] + mov ebx,DWORD PTR[4+r9] + mov ecx,DWORD PTR[8+r9] + mov edx,DWORD PTR[12+r9] + mov esi,ebx + mov ebp,DWORD PTR[16+r9] + mov edi,ecx + xor edi,edx + and esi,edi + + movdqa xmm3,XMMWORD PTR[64+r11] + movdqa xmm13,XMMWORD PTR[r11] + movdqu xmm4,XMMWORD PTR[r10] + movdqu xmm5,XMMWORD PTR[16+r10] + movdqu xmm6,XMMWORD PTR[32+r10] + movdqu xmm7,XMMWORD PTR[48+r10] +DB 102,15,56,0,227 +DB 102,15,56,0,235 +DB 102,15,56,0,243 + add r10,64 + paddd xmm4,xmm13 +DB 102,15,56,0,251 + paddd xmm5,xmm13 + paddd xmm6,xmm13 + movdqa XMMWORD PTR[rsp],xmm4 + psubd xmm4,xmm13 + movdqa XMMWORD PTR[16+rsp],xmm5 + psubd xmm5,xmm13 + movdqa XMMWORD PTR[32+rsp],xmm6 + psubd xmm6,xmm13 + movups xmm15,XMMWORD PTR[((-112))+r15] + movups xmm0,XMMWORD PTR[((16-112))+r15] + jmp $L$oop_ssse3 +ALIGN 32 +$L$oop_ssse3:: + ror ebx,2 + movups xmm14,XMMWORD PTR[r12] + xorps xmm14,xmm15 + xorps xmm2,xmm14 + movups xmm1,XMMWORD PTR[((-80))+r15] +DB 102,15,56,220,208 + pshufd xmm8,xmm4,238 + xor esi,edx + movdqa xmm12,xmm7 + paddd xmm13,xmm7 + mov edi,eax + add ebp,DWORD PTR[rsp] + punpcklqdq xmm8,xmm5 + xor ebx,ecx + rol eax,5 + add ebp,esi + psrldq xmm12,4 + and edi,ebx + xor ebx,ecx + pxor xmm8,xmm4 + add ebp,eax + ror eax,7 + pxor xmm12,xmm6 + xor edi,ecx + mov esi,ebp + add edx,DWORD PTR[4+rsp] + pxor xmm8,xmm12 + xor eax,ebx + rol ebp,5 + movdqa XMMWORD PTR[48+rsp],xmm13 + add edx,edi + movups xmm0,XMMWORD PTR[((-64))+r15] +DB 102,15,56,220,209 + and esi,eax + movdqa xmm3,xmm8 + xor eax,ebx + add edx,ebp + ror ebp,7 + movdqa xmm12,xmm8 + xor esi,ebx + pslldq xmm3,12 + paddd xmm8,xmm8 + mov edi,edx + add ecx,DWORD PTR[8+rsp] + psrld xmm12,31 + xor ebp,eax + rol edx,5 + add ecx,esi + movdqa xmm13,xmm3 + and edi,ebp + xor ebp,eax + psrld xmm3,30 + add ecx,edx + ror edx,7 + por xmm8,xmm12 + xor edi,eax + mov esi,ecx + add ebx,DWORD PTR[12+rsp] + movups xmm1,XMMWORD PTR[((-48))+r15] +DB 102,15,56,220,208 + pslld xmm13,2 + pxor xmm8,xmm3 + xor edx,ebp + movdqa xmm3,XMMWORD PTR[r11] + rol ecx,5 + add ebx,edi + and esi,edx + pxor xmm8,xmm13 + xor edx,ebp + add ebx,ecx + ror ecx,7 + pshufd xmm9,xmm5,238 + xor esi,ebp + movdqa xmm13,xmm8 + paddd xmm3,xmm8 + mov edi,ebx + add eax,DWORD PTR[16+rsp] + punpcklqdq xmm9,xmm6 + xor ecx,edx + rol ebx,5 + add eax,esi + psrldq xmm13,4 + and edi,ecx + xor ecx,edx + pxor xmm9,xmm5 + add eax,ebx + ror ebx,7 + movups xmm0,XMMWORD PTR[((-32))+r15] +DB 102,15,56,220,209 + pxor xmm13,xmm7 + xor edi,edx + mov esi,eax + add ebp,DWORD PTR[20+rsp] + pxor xmm9,xmm13 + xor ebx,ecx + rol eax,5 + movdqa XMMWORD PTR[rsp],xmm3 + add ebp,edi + and esi,ebx + movdqa xmm12,xmm9 + xor ebx,ecx + add ebp,eax + ror eax,7 + movdqa xmm13,xmm9 + xor esi,ecx + pslldq xmm12,12 + paddd xmm9,xmm9 + mov edi,ebp + add edx,DWORD PTR[24+rsp] + psrld xmm13,31 + xor eax,ebx + rol ebp,5 + add edx,esi + movups xmm1,XMMWORD PTR[((-16))+r15] +DB 102,15,56,220,208 + movdqa xmm3,xmm12 + and edi,eax + xor eax,ebx + psrld xmm12,30 + add edx,ebp + ror ebp,7 + por xmm9,xmm13 + xor edi,ebx + mov esi,edx + add ecx,DWORD PTR[28+rsp] + pslld xmm3,2 + pxor xmm9,xmm12 + xor ebp,eax + movdqa xmm12,XMMWORD PTR[16+r11] + rol edx,5 + add ecx,edi + and esi,ebp + pxor xmm9,xmm3 + xor ebp,eax + add ecx,edx + ror edx,7 + pshufd xmm10,xmm6,238 + xor esi,eax + movdqa xmm3,xmm9 + paddd xmm12,xmm9 + mov edi,ecx + add ebx,DWORD PTR[32+rsp] + movups xmm0,XMMWORD PTR[r15] +DB 102,15,56,220,209 + punpcklqdq xmm10,xmm7 + xor edx,ebp + rol ecx,5 + add ebx,esi + psrldq xmm3,4 + and edi,edx + xor edx,ebp + pxor xmm10,xmm6 + add ebx,ecx + ror ecx,7 + pxor xmm3,xmm8 + xor edi,ebp + mov esi,ebx + add eax,DWORD PTR[36+rsp] + pxor xmm10,xmm3 + xor ecx,edx + rol ebx,5 + movdqa XMMWORD PTR[16+rsp],xmm12 + add eax,edi + and esi,ecx + movdqa xmm13,xmm10 + xor ecx,edx + add eax,ebx + ror ebx,7 + movups xmm1,XMMWORD PTR[16+r15] +DB 102,15,56,220,208 + movdqa xmm3,xmm10 + xor esi,edx + pslldq xmm13,12 + paddd xmm10,xmm10 + mov edi,eax + add ebp,DWORD PTR[40+rsp] + psrld xmm3,31 + xor ebx,ecx + rol eax,5 + add ebp,esi + movdqa xmm12,xmm13 + and edi,ebx + xor ebx,ecx + psrld xmm13,30 + add ebp,eax + ror eax,7 + por xmm10,xmm3 + xor edi,ecx + mov esi,ebp + add edx,DWORD PTR[44+rsp] + pslld xmm12,2 + pxor xmm10,xmm13 + xor eax,ebx + movdqa xmm13,XMMWORD PTR[16+r11] + rol ebp,5 + add edx,edi + movups xmm0,XMMWORD PTR[32+r15] +DB 102,15,56,220,209 + and esi,eax + pxor xmm10,xmm12 + xor eax,ebx + add edx,ebp + ror ebp,7 + pshufd xmm11,xmm7,238 + xor esi,ebx + movdqa xmm12,xmm10 + paddd xmm13,xmm10 + mov edi,edx + add ecx,DWORD PTR[48+rsp] + punpcklqdq xmm11,xmm8 + xor ebp,eax + rol edx,5 + add ecx,esi + psrldq xmm12,4 + and edi,ebp + xor ebp,eax + pxor xmm11,xmm7 + add ecx,edx + ror edx,7 + pxor xmm12,xmm9 + xor edi,eax + mov esi,ecx + add ebx,DWORD PTR[52+rsp] + movups xmm1,XMMWORD PTR[48+r15] +DB 102,15,56,220,208 + pxor xmm11,xmm12 + xor edx,ebp + rol ecx,5 + movdqa XMMWORD PTR[32+rsp],xmm13 + add ebx,edi + and esi,edx + movdqa xmm3,xmm11 + xor edx,ebp + add ebx,ecx + ror ecx,7 + movdqa xmm12,xmm11 + xor esi,ebp + pslldq xmm3,12 + paddd xmm11,xmm11 + mov edi,ebx + add eax,DWORD PTR[56+rsp] + psrld xmm12,31 + xor ecx,edx + rol ebx,5 + add eax,esi + movdqa xmm13,xmm3 + and edi,ecx + xor ecx,edx + psrld xmm3,30 + add eax,ebx + ror ebx,7 + cmp r8d,11 + jb $L$aesenclast1 + movups xmm0,XMMWORD PTR[64+r15] +DB 102,15,56,220,209 + movups xmm1,XMMWORD PTR[80+r15] +DB 102,15,56,220,208 + je $L$aesenclast1 + movups xmm0,XMMWORD PTR[96+r15] +DB 102,15,56,220,209 + movups xmm1,XMMWORD PTR[112+r15] +DB 102,15,56,220,208 +$L$aesenclast1:: +DB 102,15,56,221,209 + movups xmm0,XMMWORD PTR[((16-112))+r15] + por xmm11,xmm12 + xor edi,edx + mov esi,eax + add ebp,DWORD PTR[60+rsp] + pslld xmm13,2 + pxor xmm11,xmm3 + xor ebx,ecx + movdqa xmm3,XMMWORD PTR[16+r11] + rol eax,5 + add ebp,edi + and esi,ebx + pxor xmm11,xmm13 + pshufd xmm13,xmm10,238 + xor ebx,ecx + add ebp,eax + ror eax,7 + pxor xmm4,xmm8 + xor esi,ecx + mov edi,ebp + add edx,DWORD PTR[rsp] + punpcklqdq xmm13,xmm11 + xor eax,ebx + rol ebp,5 + pxor xmm4,xmm5 + add edx,esi + movups xmm14,XMMWORD PTR[16+r12] + xorps xmm14,xmm15 + movups XMMWORD PTR[r13*1+r12],xmm2 + xorps xmm2,xmm14 + movups xmm1,XMMWORD PTR[((-80))+r15] +DB 102,15,56,220,208 + and edi,eax + movdqa xmm12,xmm3 + xor eax,ebx + paddd xmm3,xmm11 + add edx,ebp + pxor xmm4,xmm13 + ror ebp,7 + xor edi,ebx + mov esi,edx + add ecx,DWORD PTR[4+rsp] + movdqa xmm13,xmm4 + xor ebp,eax + rol edx,5 + movdqa XMMWORD PTR[48+rsp],xmm3 + add ecx,edi + and esi,ebp + xor ebp,eax + pslld xmm4,2 + add ecx,edx + ror edx,7 + psrld xmm13,30 + xor esi,eax + mov edi,ecx + add ebx,DWORD PTR[8+rsp] + movups xmm0,XMMWORD PTR[((-64))+r15] +DB 102,15,56,220,209 + por xmm4,xmm13 + xor edx,ebp + rol ecx,5 + pshufd xmm3,xmm11,238 + add ebx,esi + and edi,edx + xor edx,ebp + add ebx,ecx + add eax,DWORD PTR[12+rsp] + xor edi,ebp + mov esi,ebx + rol ebx,5 + add eax,edi + xor esi,edx + ror ecx,7 + add eax,ebx + pxor xmm5,xmm9 + add ebp,DWORD PTR[16+rsp] + movups xmm1,XMMWORD PTR[((-48))+r15] +DB 102,15,56,220,208 + xor esi,ecx + punpcklqdq xmm3,xmm4 + mov edi,eax + rol eax,5 + pxor xmm5,xmm6 + add ebp,esi + xor edi,ecx + movdqa xmm13,xmm12 + ror ebx,7 + paddd xmm12,xmm4 + add ebp,eax + pxor xmm5,xmm3 + add edx,DWORD PTR[20+rsp] + xor edi,ebx + mov esi,ebp + rol ebp,5 + movdqa xmm3,xmm5 + add edx,edi + xor esi,ebx + movdqa XMMWORD PTR[rsp],xmm12 + ror eax,7 + add edx,ebp + add ecx,DWORD PTR[24+rsp] + pslld xmm5,2 + xor esi,eax + mov edi,edx + psrld xmm3,30 + rol edx,5 + add ecx,esi + movups xmm0,XMMWORD PTR[((-32))+r15] +DB 102,15,56,220,209 + xor edi,eax + ror ebp,7 + por xmm5,xmm3 + add ecx,edx + add ebx,DWORD PTR[28+rsp] + pshufd xmm12,xmm4,238 + xor edi,ebp + mov esi,ecx + rol ecx,5 + add ebx,edi + xor esi,ebp + ror edx,7 + add ebx,ecx + pxor xmm6,xmm10 + add eax,DWORD PTR[32+rsp] + xor esi,edx + punpcklqdq xmm12,xmm5 + mov edi,ebx + rol ebx,5 + pxor xmm6,xmm7 + add eax,esi + xor edi,edx + movdqa xmm3,XMMWORD PTR[32+r11] + ror ecx,7 + paddd xmm13,xmm5 + add eax,ebx + pxor xmm6,xmm12 + add ebp,DWORD PTR[36+rsp] + movups xmm1,XMMWORD PTR[((-16))+r15] +DB 102,15,56,220,208 + xor edi,ecx + mov esi,eax + rol eax,5 + movdqa xmm12,xmm6 + add ebp,edi + xor esi,ecx + movdqa XMMWORD PTR[16+rsp],xmm13 + ror ebx,7 + add ebp,eax + add edx,DWORD PTR[40+rsp] + pslld xmm6,2 + xor esi,ebx + mov edi,ebp + psrld xmm12,30 + rol ebp,5 + add edx,esi + xor edi,ebx + ror eax,7 + por xmm6,xmm12 + add edx,ebp + add ecx,DWORD PTR[44+rsp] + pshufd xmm13,xmm5,238 + xor edi,eax + mov esi,edx + rol edx,5 + add ecx,edi + movups xmm0,XMMWORD PTR[r15] +DB 102,15,56,220,209 + xor esi,eax + ror ebp,7 + add ecx,edx + pxor xmm7,xmm11 + add ebx,DWORD PTR[48+rsp] + xor esi,ebp + punpcklqdq xmm13,xmm6 + mov edi,ecx + rol ecx,5 + pxor xmm7,xmm8 + add ebx,esi + xor edi,ebp + movdqa xmm12,xmm3 + ror edx,7 + paddd xmm3,xmm6 + add ebx,ecx + pxor xmm7,xmm13 + add eax,DWORD PTR[52+rsp] + xor edi,edx + mov esi,ebx + rol ebx,5 + movdqa xmm13,xmm7 + add eax,edi + xor esi,edx + movdqa XMMWORD PTR[32+rsp],xmm3 + ror ecx,7 + add eax,ebx + add ebp,DWORD PTR[56+rsp] + movups xmm1,XMMWORD PTR[16+r15] +DB 102,15,56,220,208 + pslld xmm7,2 + xor esi,ecx + mov edi,eax + psrld xmm13,30 + rol eax,5 + add ebp,esi + xor edi,ecx + ror ebx,7 + por xmm7,xmm13 + add ebp,eax + add edx,DWORD PTR[60+rsp] + pshufd xmm3,xmm6,238 + xor edi,ebx + mov esi,ebp + rol ebp,5 + add edx,edi + xor esi,ebx + ror eax,7 + add edx,ebp + pxor xmm8,xmm4 + add ecx,DWORD PTR[rsp] + xor esi,eax + punpcklqdq xmm3,xmm7 + mov edi,edx + rol edx,5 + pxor xmm8,xmm9 + add ecx,esi + movups xmm0,XMMWORD PTR[32+r15] +DB 102,15,56,220,209 + xor edi,eax + movdqa xmm13,xmm12 + ror ebp,7 + paddd xmm12,xmm7 + add ecx,edx + pxor xmm8,xmm3 + add ebx,DWORD PTR[4+rsp] + xor edi,ebp + mov esi,ecx + rol ecx,5 + movdqa xmm3,xmm8 + add ebx,edi + xor esi,ebp + movdqa XMMWORD PTR[48+rsp],xmm12 + ror edx,7 + add ebx,ecx + add eax,DWORD PTR[8+rsp] + pslld xmm8,2 + xor esi,edx + mov edi,ebx + psrld xmm3,30 + rol ebx,5 + add eax,esi + xor edi,edx + ror ecx,7 + por xmm8,xmm3 + add eax,ebx + add ebp,DWORD PTR[12+rsp] + movups xmm1,XMMWORD PTR[48+r15] +DB 102,15,56,220,208 + pshufd xmm12,xmm7,238 + xor edi,ecx + mov esi,eax + rol eax,5 + add ebp,edi + xor esi,ecx + ror ebx,7 + add ebp,eax + pxor xmm9,xmm5 + add edx,DWORD PTR[16+rsp] + xor esi,ebx + punpcklqdq xmm12,xmm8 + mov edi,ebp + rol ebp,5 + pxor xmm9,xmm10 + add edx,esi + xor edi,ebx + movdqa xmm3,xmm13 + ror eax,7 + paddd xmm13,xmm8 + add edx,ebp + pxor xmm9,xmm12 + add ecx,DWORD PTR[20+rsp] + xor edi,eax + mov esi,edx + rol edx,5 + movdqa xmm12,xmm9 + add ecx,edi + cmp r8d,11 + jb $L$aesenclast2 + movups xmm0,XMMWORD PTR[64+r15] +DB 102,15,56,220,209 + movups xmm1,XMMWORD PTR[80+r15] +DB 102,15,56,220,208 + je $L$aesenclast2 + movups xmm0,XMMWORD PTR[96+r15] +DB 102,15,56,220,209 + movups xmm1,XMMWORD PTR[112+r15] +DB 102,15,56,220,208 +$L$aesenclast2:: +DB 102,15,56,221,209 + movups xmm0,XMMWORD PTR[((16-112))+r15] + xor esi,eax + movdqa XMMWORD PTR[rsp],xmm13 + ror ebp,7 + add ecx,edx + add ebx,DWORD PTR[24+rsp] + pslld xmm9,2 + xor esi,ebp + mov edi,ecx + psrld xmm12,30 + rol ecx,5 + add ebx,esi + xor edi,ebp + ror edx,7 + por xmm9,xmm12 + add ebx,ecx + add eax,DWORD PTR[28+rsp] + pshufd xmm13,xmm8,238 + ror ecx,7 + mov esi,ebx + xor edi,edx + rol ebx,5 + add eax,edi + xor esi,ecx + xor ecx,edx + add eax,ebx + pxor xmm10,xmm6 + add ebp,DWORD PTR[32+rsp] + movups xmm14,XMMWORD PTR[32+r12] + xorps xmm14,xmm15 + movups XMMWORD PTR[16+r12*1+r13],xmm2 + xorps xmm2,xmm14 + movups xmm1,XMMWORD PTR[((-80))+r15] +DB 102,15,56,220,208 + and esi,ecx + xor ecx,edx + ror ebx,7 + punpcklqdq xmm13,xmm9 + mov edi,eax + xor esi,ecx + pxor xmm10,xmm11 + rol eax,5 + add ebp,esi + movdqa xmm12,xmm3 + xor edi,ebx + paddd xmm3,xmm9 + xor ebx,ecx + pxor xmm10,xmm13 + add ebp,eax + add edx,DWORD PTR[36+rsp] + and edi,ebx + xor ebx,ecx + ror eax,7 + movdqa xmm13,xmm10 + mov esi,ebp + xor edi,ebx + movdqa XMMWORD PTR[16+rsp],xmm3 + rol ebp,5 + add edx,edi + movups xmm0,XMMWORD PTR[((-64))+r15] +DB 102,15,56,220,209 + xor esi,eax + pslld xmm10,2 + xor eax,ebx + add edx,ebp + psrld xmm13,30 + add ecx,DWORD PTR[40+rsp] + and esi,eax + xor eax,ebx + por xmm10,xmm13 + ror ebp,7 + mov edi,edx + xor esi,eax + rol edx,5 + pshufd xmm3,xmm9,238 + add ecx,esi + xor edi,ebp + xor ebp,eax + add ecx,edx + add ebx,DWORD PTR[44+rsp] + and edi,ebp + xor ebp,eax + ror edx,7 + movups xmm1,XMMWORD PTR[((-48))+r15] +DB 102,15,56,220,208 + mov esi,ecx + xor edi,ebp + rol ecx,5 + add ebx,edi + xor esi,edx + xor edx,ebp + add ebx,ecx + pxor xmm11,xmm7 + add eax,DWORD PTR[48+rsp] + and esi,edx + xor edx,ebp + ror ecx,7 + punpcklqdq xmm3,xmm10 + mov edi,ebx + xor esi,edx + pxor xmm11,xmm4 + rol ebx,5 + add eax,esi + movdqa xmm13,XMMWORD PTR[48+r11] + xor edi,ecx + paddd xmm12,xmm10 + xor ecx,edx + pxor xmm11,xmm3 + add eax,ebx + add ebp,DWORD PTR[52+rsp] + movups xmm0,XMMWORD PTR[((-32))+r15] +DB 102,15,56,220,209 + and edi,ecx + xor ecx,edx + ror ebx,7 + movdqa xmm3,xmm11 + mov esi,eax + xor edi,ecx + movdqa XMMWORD PTR[32+rsp],xmm12 + rol eax,5 + add ebp,edi + xor esi,ebx + pslld xmm11,2 + xor ebx,ecx + add ebp,eax + psrld xmm3,30 + add edx,DWORD PTR[56+rsp] + and esi,ebx + xor ebx,ecx + por xmm11,xmm3 + ror eax,7 + mov edi,ebp + xor esi,ebx + rol ebp,5 + pshufd xmm12,xmm10,238 + add edx,esi + movups xmm1,XMMWORD PTR[((-16))+r15] +DB 102,15,56,220,208 + xor edi,eax + xor eax,ebx + add edx,ebp + add ecx,DWORD PTR[60+rsp] + and edi,eax + xor eax,ebx + ror ebp,7 + mov esi,edx + xor edi,eax + rol edx,5 + add ecx,edi + xor esi,ebp + xor ebp,eax + add ecx,edx + pxor xmm4,xmm8 + add ebx,DWORD PTR[rsp] + and esi,ebp + xor ebp,eax + ror edx,7 + movups xmm0,XMMWORD PTR[r15] +DB 102,15,56,220,209 + punpcklqdq xmm12,xmm11 + mov edi,ecx + xor esi,ebp + pxor xmm4,xmm5 + rol ecx,5 + add ebx,esi + movdqa xmm3,xmm13 + xor edi,edx + paddd xmm13,xmm11 + xor edx,ebp + pxor xmm4,xmm12 + add ebx,ecx + add eax,DWORD PTR[4+rsp] + and edi,edx + xor edx,ebp + ror ecx,7 + movdqa xmm12,xmm4 + mov esi,ebx + xor edi,edx + movdqa XMMWORD PTR[48+rsp],xmm13 + rol ebx,5 + add eax,edi + xor esi,ecx + pslld xmm4,2 + xor ecx,edx + add eax,ebx + psrld xmm12,30 + add ebp,DWORD PTR[8+rsp] + movups xmm1,XMMWORD PTR[16+r15] +DB 102,15,56,220,208 + and esi,ecx + xor ecx,edx + por xmm4,xmm12 + ror ebx,7 + mov edi,eax + xor esi,ecx + rol eax,5 + pshufd xmm13,xmm11,238 + add ebp,esi + xor edi,ebx + xor ebx,ecx + add ebp,eax + add edx,DWORD PTR[12+rsp] + and edi,ebx + xor ebx,ecx + ror eax,7 + mov esi,ebp + xor edi,ebx + rol ebp,5 + add edx,edi + movups xmm0,XMMWORD PTR[32+r15] +DB 102,15,56,220,209 + xor esi,eax + xor eax,ebx + add edx,ebp + pxor xmm5,xmm9 + add ecx,DWORD PTR[16+rsp] + and esi,eax + xor eax,ebx + ror ebp,7 + punpcklqdq xmm13,xmm4 + mov edi,edx + xor esi,eax + pxor xmm5,xmm6 + rol edx,5 + add ecx,esi + movdqa xmm12,xmm3 + xor edi,ebp + paddd xmm3,xmm4 + xor ebp,eax + pxor xmm5,xmm13 + add ecx,edx + add ebx,DWORD PTR[20+rsp] + and edi,ebp + xor ebp,eax + ror edx,7 + movups xmm1,XMMWORD PTR[48+r15] +DB 102,15,56,220,208 + movdqa xmm13,xmm5 + mov esi,ecx + xor edi,ebp + movdqa XMMWORD PTR[rsp],xmm3 + rol ecx,5 + add ebx,edi + xor esi,edx + pslld xmm5,2 + xor edx,ebp + add ebx,ecx + psrld xmm13,30 + add eax,DWORD PTR[24+rsp] + and esi,edx + xor edx,ebp + por xmm5,xmm13 + ror ecx,7 + mov edi,ebx + xor esi,edx + rol ebx,5 + pshufd xmm3,xmm4,238 + add eax,esi + xor edi,ecx + xor ecx,edx + add eax,ebx + add ebp,DWORD PTR[28+rsp] + cmp r8d,11 + jb $L$aesenclast3 + movups xmm0,XMMWORD PTR[64+r15] +DB 102,15,56,220,209 + movups xmm1,XMMWORD PTR[80+r15] +DB 102,15,56,220,208 + je $L$aesenclast3 + movups xmm0,XMMWORD PTR[96+r15] +DB 102,15,56,220,209 + movups xmm1,XMMWORD PTR[112+r15] +DB 102,15,56,220,208 +$L$aesenclast3:: +DB 102,15,56,221,209 + movups xmm0,XMMWORD PTR[((16-112))+r15] + and edi,ecx + xor ecx,edx + ror ebx,7 + mov esi,eax + xor edi,ecx + rol eax,5 + add ebp,edi + xor esi,ebx + xor ebx,ecx + add ebp,eax + pxor xmm6,xmm10 + add edx,DWORD PTR[32+rsp] + and esi,ebx + xor ebx,ecx + ror eax,7 + punpcklqdq xmm3,xmm5 + mov edi,ebp + xor esi,ebx + pxor xmm6,xmm7 + rol ebp,5 + add edx,esi + movups xmm14,XMMWORD PTR[48+r12] + xorps xmm14,xmm15 + movups XMMWORD PTR[32+r12*1+r13],xmm2 + xorps xmm2,xmm14 + movups xmm1,XMMWORD PTR[((-80))+r15] +DB 102,15,56,220,208 + movdqa xmm13,xmm12 + xor edi,eax + paddd xmm12,xmm5 + xor eax,ebx + pxor xmm6,xmm3 + add edx,ebp + add ecx,DWORD PTR[36+rsp] + and edi,eax + xor eax,ebx + ror ebp,7 + movdqa xmm3,xmm6 + mov esi,edx + xor edi,eax + movdqa XMMWORD PTR[16+rsp],xmm12 + rol edx,5 + add ecx,edi + xor esi,ebp + pslld xmm6,2 + xor ebp,eax + add ecx,edx + psrld xmm3,30 + add ebx,DWORD PTR[40+rsp] + and esi,ebp + xor ebp,eax + por xmm6,xmm3 + ror edx,7 + movups xmm0,XMMWORD PTR[((-64))+r15] +DB 102,15,56,220,209 + mov edi,ecx + xor esi,ebp + rol ecx,5 + pshufd xmm12,xmm5,238 + add ebx,esi + xor edi,edx + xor edx,ebp + add ebx,ecx + add eax,DWORD PTR[44+rsp] + and edi,edx + xor edx,ebp + ror ecx,7 + mov esi,ebx + xor edi,edx + rol ebx,5 + add eax,edi + xor esi,edx + add eax,ebx + pxor xmm7,xmm11 + add ebp,DWORD PTR[48+rsp] + movups xmm1,XMMWORD PTR[((-48))+r15] +DB 102,15,56,220,208 + xor esi,ecx + punpcklqdq xmm12,xmm6 + mov edi,eax + rol eax,5 + pxor xmm7,xmm8 + add ebp,esi + xor edi,ecx + movdqa xmm3,xmm13 + ror ebx,7 + paddd xmm13,xmm6 + add ebp,eax + pxor xmm7,xmm12 + add edx,DWORD PTR[52+rsp] + xor edi,ebx + mov esi,ebp + rol ebp,5 + movdqa xmm12,xmm7 + add edx,edi + xor esi,ebx + movdqa XMMWORD PTR[32+rsp],xmm13 + ror eax,7 + add edx,ebp + add ecx,DWORD PTR[56+rsp] + pslld xmm7,2 + xor esi,eax + mov edi,edx + psrld xmm12,30 + rol edx,5 + add ecx,esi + movups xmm0,XMMWORD PTR[((-32))+r15] +DB 102,15,56,220,209 + xor edi,eax + ror ebp,7 + por xmm7,xmm12 + add ecx,edx + add ebx,DWORD PTR[60+rsp] + xor edi,ebp + mov esi,ecx + rol ecx,5 + add ebx,edi + xor esi,ebp + ror edx,7 + add ebx,ecx + add eax,DWORD PTR[rsp] + xor esi,edx + mov edi,ebx + rol ebx,5 + paddd xmm3,xmm7 + add eax,esi + xor edi,edx + movdqa XMMWORD PTR[48+rsp],xmm3 + ror ecx,7 + add eax,ebx + add ebp,DWORD PTR[4+rsp] + movups xmm1,XMMWORD PTR[((-16))+r15] +DB 102,15,56,220,208 + xor edi,ecx + mov esi,eax + rol eax,5 + add ebp,edi + xor esi,ecx + ror ebx,7 + add ebp,eax + add edx,DWORD PTR[8+rsp] + xor esi,ebx + mov edi,ebp + rol ebp,5 + add edx,esi + xor edi,ebx + ror eax,7 + add edx,ebp + add ecx,DWORD PTR[12+rsp] + xor edi,eax + mov esi,edx + rol edx,5 + add ecx,edi + movups xmm0,XMMWORD PTR[r15] +DB 102,15,56,220,209 + xor esi,eax + ror ebp,7 + add ecx,edx + cmp r10,r14 + je $L$done_ssse3 + movdqa xmm3,XMMWORD PTR[64+r11] + movdqa xmm13,XMMWORD PTR[r11] + movdqu xmm4,XMMWORD PTR[r10] + movdqu xmm5,XMMWORD PTR[16+r10] + movdqu xmm6,XMMWORD PTR[32+r10] + movdqu xmm7,XMMWORD PTR[48+r10] +DB 102,15,56,0,227 + add r10,64 + add ebx,DWORD PTR[16+rsp] + xor esi,ebp + mov edi,ecx +DB 102,15,56,0,235 + rol ecx,5 + add ebx,esi + xor edi,ebp + ror edx,7 + paddd xmm4,xmm13 + add ebx,ecx + add eax,DWORD PTR[20+rsp] + xor edi,edx + mov esi,ebx + movdqa XMMWORD PTR[rsp],xmm4 + rol ebx,5 + add eax,edi + xor esi,edx + ror ecx,7 + psubd xmm4,xmm13 + add eax,ebx + add ebp,DWORD PTR[24+rsp] + movups xmm1,XMMWORD PTR[16+r15] +DB 102,15,56,220,208 + xor esi,ecx + mov edi,eax + rol eax,5 + add ebp,esi + xor edi,ecx + ror ebx,7 + add ebp,eax + add edx,DWORD PTR[28+rsp] + xor edi,ebx + mov esi,ebp + rol ebp,5 + add edx,edi + xor esi,ebx + ror eax,7 + add edx,ebp + add ecx,DWORD PTR[32+rsp] + xor esi,eax + mov edi,edx +DB 102,15,56,0,243 + rol edx,5 + add ecx,esi + movups xmm0,XMMWORD PTR[32+r15] +DB 102,15,56,220,209 + xor edi,eax + ror ebp,7 + paddd xmm5,xmm13 + add ecx,edx + add ebx,DWORD PTR[36+rsp] + xor edi,ebp + mov esi,ecx + movdqa XMMWORD PTR[16+rsp],xmm5 + rol ecx,5 + add ebx,edi + xor esi,ebp + ror edx,7 + psubd xmm5,xmm13 + add ebx,ecx + add eax,DWORD PTR[40+rsp] + xor esi,edx + mov edi,ebx + rol ebx,5 + add eax,esi + xor edi,edx + ror ecx,7 + add eax,ebx + add ebp,DWORD PTR[44+rsp] + movups xmm1,XMMWORD PTR[48+r15] +DB 102,15,56,220,208 + xor edi,ecx + mov esi,eax + rol eax,5 + add ebp,edi + xor esi,ecx + ror ebx,7 + add ebp,eax + add edx,DWORD PTR[48+rsp] + xor esi,ebx + mov edi,ebp +DB 102,15,56,0,251 + rol ebp,5 + add edx,esi + xor edi,ebx + ror eax,7 + paddd xmm6,xmm13 + add edx,ebp + add ecx,DWORD PTR[52+rsp] + xor edi,eax + mov esi,edx + movdqa XMMWORD PTR[32+rsp],xmm6 + rol edx,5 + add ecx,edi + cmp r8d,11 + jb $L$aesenclast4 + movups xmm0,XMMWORD PTR[64+r15] +DB 102,15,56,220,209 + movups xmm1,XMMWORD PTR[80+r15] +DB 102,15,56,220,208 + je $L$aesenclast4 + movups xmm0,XMMWORD PTR[96+r15] +DB 102,15,56,220,209 + movups xmm1,XMMWORD PTR[112+r15] +DB 102,15,56,220,208 +$L$aesenclast4:: +DB 102,15,56,221,209 + movups xmm0,XMMWORD PTR[((16-112))+r15] + xor esi,eax + ror ebp,7 + psubd xmm6,xmm13 + add ecx,edx + add ebx,DWORD PTR[56+rsp] + xor esi,ebp + mov edi,ecx + rol ecx,5 + add ebx,esi + xor edi,ebp + ror edx,7 + add ebx,ecx + add eax,DWORD PTR[60+rsp] + xor edi,edx + mov esi,ebx + rol ebx,5 + add eax,edi + ror ecx,7 + add eax,ebx + movups XMMWORD PTR[48+r12*1+r13],xmm2 + lea r12,QWORD PTR[64+r12] + + add eax,DWORD PTR[r9] + add esi,DWORD PTR[4+r9] + add ecx,DWORD PTR[8+r9] + add edx,DWORD PTR[12+r9] + mov DWORD PTR[r9],eax + add ebp,DWORD PTR[16+r9] + mov DWORD PTR[4+r9],esi + mov ebx,esi + mov DWORD PTR[8+r9],ecx + mov edi,ecx + mov DWORD PTR[12+r9],edx + xor edi,edx + mov DWORD PTR[16+r9],ebp + and esi,edi + jmp $L$oop_ssse3 + +$L$done_ssse3:: + add ebx,DWORD PTR[16+rsp] + xor esi,ebp + mov edi,ecx + rol ecx,5 + add ebx,esi + xor edi,ebp + ror edx,7 + add ebx,ecx + add eax,DWORD PTR[20+rsp] + xor edi,edx + mov esi,ebx + rol ebx,5 + add eax,edi + xor esi,edx + ror ecx,7 + add eax,ebx + add ebp,DWORD PTR[24+rsp] + movups xmm1,XMMWORD PTR[16+r15] +DB 102,15,56,220,208 + xor esi,ecx + mov edi,eax + rol eax,5 + add ebp,esi + xor edi,ecx + ror ebx,7 + add ebp,eax + add edx,DWORD PTR[28+rsp] + xor edi,ebx + mov esi,ebp + rol ebp,5 + add edx,edi + xor esi,ebx + ror eax,7 + add edx,ebp + add ecx,DWORD PTR[32+rsp] + xor esi,eax + mov edi,edx + rol edx,5 + add ecx,esi + movups xmm0,XMMWORD PTR[32+r15] +DB 102,15,56,220,209 + xor edi,eax + ror ebp,7 + add ecx,edx + add ebx,DWORD PTR[36+rsp] + xor edi,ebp + mov esi,ecx + rol ecx,5 + add ebx,edi + xor esi,ebp + ror edx,7 + add ebx,ecx + add eax,DWORD PTR[40+rsp] + xor esi,edx + mov edi,ebx + rol ebx,5 + add eax,esi + xor edi,edx + ror ecx,7 + add eax,ebx + add ebp,DWORD PTR[44+rsp] + movups xmm1,XMMWORD PTR[48+r15] +DB 102,15,56,220,208 + xor edi,ecx + mov esi,eax + rol eax,5 + add ebp,edi + xor esi,ecx + ror ebx,7 + add ebp,eax + add edx,DWORD PTR[48+rsp] + xor esi,ebx + mov edi,ebp + rol ebp,5 + add edx,esi + xor edi,ebx + ror eax,7 + add edx,ebp + add ecx,DWORD PTR[52+rsp] + xor edi,eax + mov esi,edx + rol edx,5 + add ecx,edi + cmp r8d,11 + jb $L$aesenclast5 + movups xmm0,XMMWORD PTR[64+r15] +DB 102,15,56,220,209 + movups xmm1,XMMWORD PTR[80+r15] +DB 102,15,56,220,208 + je $L$aesenclast5 + movups xmm0,XMMWORD PTR[96+r15] +DB 102,15,56,220,209 + movups xmm1,XMMWORD PTR[112+r15] +DB 102,15,56,220,208 +$L$aesenclast5:: +DB 102,15,56,221,209 + movups xmm0,XMMWORD PTR[((16-112))+r15] + xor esi,eax + ror ebp,7 + add ecx,edx + add ebx,DWORD PTR[56+rsp] + xor esi,ebp + mov edi,ecx + rol ecx,5 + add ebx,esi + xor edi,ebp + ror edx,7 + add ebx,ecx + add eax,DWORD PTR[60+rsp] + xor edi,edx + mov esi,ebx + rol ebx,5 + add eax,edi + ror ecx,7 + add eax,ebx + movups XMMWORD PTR[48+r12*1+r13],xmm2 + mov r8,QWORD PTR[88+rsp] + + add eax,DWORD PTR[r9] + add esi,DWORD PTR[4+r9] + add ecx,DWORD PTR[8+r9] + mov DWORD PTR[r9],eax + add edx,DWORD PTR[12+r9] + mov DWORD PTR[4+r9],esi + add ebp,DWORD PTR[16+r9] + mov DWORD PTR[8+r9],ecx + mov DWORD PTR[12+r9],edx + mov DWORD PTR[16+r9],ebp + movups XMMWORD PTR[r8],xmm2 + movaps xmm6,XMMWORD PTR[((96+0))+rsp] + movaps xmm7,XMMWORD PTR[((96+16))+rsp] + movaps xmm8,XMMWORD PTR[((96+32))+rsp] + movaps xmm9,XMMWORD PTR[((96+48))+rsp] + movaps xmm10,XMMWORD PTR[((96+64))+rsp] + movaps xmm11,XMMWORD PTR[((96+80))+rsp] + movaps xmm12,XMMWORD PTR[((96+96))+rsp] + movaps xmm13,XMMWORD PTR[((96+112))+rsp] + movaps xmm14,XMMWORD PTR[((96+128))+rsp] + movaps xmm15,XMMWORD PTR[((96+144))+rsp] + lea rsi,QWORD PTR[264+rsp] + mov r15,QWORD PTR[rsi] + mov r14,QWORD PTR[8+rsi] + mov r13,QWORD PTR[16+rsi] + mov r12,QWORD PTR[24+rsi] + mov rbp,QWORD PTR[32+rsi] + mov rbx,QWORD PTR[40+rsi] + lea rsp,QWORD PTR[48+rsi] +$L$epilogue_ssse3:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_aesni_cbc_sha1_enc_ssse3:: +aesni_cbc_sha1_enc_ssse3 ENDP +ALIGN 64 +K_XX_XX:: + DD 05a827999h,05a827999h,05a827999h,05a827999h + DD 06ed9eba1h,06ed9eba1h,06ed9eba1h,06ed9eba1h + DD 08f1bbcdch,08f1bbcdch,08f1bbcdch,08f1bbcdch + DD 0ca62c1d6h,0ca62c1d6h,0ca62c1d6h,0ca62c1d6h + DD 000010203h,004050607h,008090a0bh,00c0d0e0fh +DB 0fh,0eh,0dh,0ch,0bh,0ah,09h,08h,07h,06h,05h,04h,03h,02h,01h,00h + +DB 65,69,83,78,73,45,67,66,67,43,83,72,65,49,32,115 +DB 116,105,116,99,104,32,102,111,114,32,120,56,54,95,54,52 +DB 44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32 +DB 60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111 +DB 114,103,62,0 +ALIGN 64 + +ALIGN 32 +aesni_cbc_sha1_enc_shaext PROC PRIVATE + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_aesni_cbc_sha1_enc_shaext:: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + mov rcx,r9 + mov r8,QWORD PTR[40+rsp] + mov r9,QWORD PTR[48+rsp] + + + mov r10,QWORD PTR[56+rsp] + lea rsp,QWORD PTR[((-168))+rsp] + movaps XMMWORD PTR[(-8-160)+rax],xmm6 + movaps XMMWORD PTR[(-8-144)+rax],xmm7 + movaps XMMWORD PTR[(-8-128)+rax],xmm8 + movaps XMMWORD PTR[(-8-112)+rax],xmm9 + movaps XMMWORD PTR[(-8-96)+rax],xmm10 + movaps XMMWORD PTR[(-8-80)+rax],xmm11 + movaps XMMWORD PTR[(-8-64)+rax],xmm12 + movaps XMMWORD PTR[(-8-48)+rax],xmm13 + movaps XMMWORD PTR[(-8-32)+rax],xmm14 + movaps XMMWORD PTR[(-8-16)+rax],xmm15 +$L$prologue_shaext:: + movdqu xmm8,XMMWORD PTR[r9] + movd xmm9,DWORD PTR[16+r9] + movdqa xmm7,XMMWORD PTR[((K_XX_XX+80))] + + mov r11d,DWORD PTR[240+rcx] + sub rsi,rdi + movups xmm15,XMMWORD PTR[rcx] + movups xmm0,XMMWORD PTR[16+rcx] + lea rcx,QWORD PTR[112+rcx] + + pshufd xmm8,xmm8,27 + pshufd xmm9,xmm9,27 + jmp $L$oop_shaext + +ALIGN 16 +$L$oop_shaext:: + movups xmm14,XMMWORD PTR[rdi] + xorps xmm14,xmm15 + xorps xmm2,xmm14 + movups xmm1,XMMWORD PTR[((-80))+rcx] +DB 102,15,56,220,208 + movdqu xmm3,XMMWORD PTR[r10] + movdqa xmm12,xmm9 +DB 102,15,56,0,223 + movdqu xmm4,XMMWORD PTR[16+r10] + movdqa xmm11,xmm8 + movups xmm0,XMMWORD PTR[((-64))+rcx] +DB 102,15,56,220,209 +DB 102,15,56,0,231 + + paddd xmm9,xmm3 + movdqu xmm5,XMMWORD PTR[32+r10] + lea r10,QWORD PTR[64+r10] + pxor xmm3,xmm12 + movups xmm1,XMMWORD PTR[((-48))+rcx] +DB 102,15,56,220,208 + pxor xmm3,xmm12 + movdqa xmm10,xmm8 +DB 102,15,56,0,239 +DB 69,15,58,204,193,0 +DB 68,15,56,200,212 + movups xmm0,XMMWORD PTR[((-32))+rcx] +DB 102,15,56,220,209 +DB 15,56,201,220 + movdqu xmm6,XMMWORD PTR[((-16))+r10] + movdqa xmm9,xmm8 +DB 102,15,56,0,247 + movups xmm1,XMMWORD PTR[((-16))+rcx] +DB 102,15,56,220,208 +DB 69,15,58,204,194,0 +DB 68,15,56,200,205 + pxor xmm3,xmm5 +DB 15,56,201,229 + movups xmm0,XMMWORD PTR[rcx] +DB 102,15,56,220,209 + movdqa xmm10,xmm8 +DB 69,15,58,204,193,0 +DB 68,15,56,200,214 + movups xmm1,XMMWORD PTR[16+rcx] +DB 102,15,56,220,208 +DB 15,56,202,222 + pxor xmm4,xmm6 +DB 15,56,201,238 + movups xmm0,XMMWORD PTR[32+rcx] +DB 102,15,56,220,209 + movdqa xmm9,xmm8 +DB 69,15,58,204,194,0 +DB 68,15,56,200,203 + movups xmm1,XMMWORD PTR[48+rcx] +DB 102,15,56,220,208 +DB 15,56,202,227 + pxor xmm5,xmm3 +DB 15,56,201,243 + cmp r11d,11 + jb $L$aesenclast6 + movups xmm0,XMMWORD PTR[64+rcx] +DB 102,15,56,220,209 + movups xmm1,XMMWORD PTR[80+rcx] +DB 102,15,56,220,208 + je $L$aesenclast6 + movups xmm0,XMMWORD PTR[96+rcx] +DB 102,15,56,220,209 + movups xmm1,XMMWORD PTR[112+rcx] +DB 102,15,56,220,208 +$L$aesenclast6:: +DB 102,15,56,221,209 + movups xmm0,XMMWORD PTR[((16-112))+rcx] + movdqa xmm10,xmm8 +DB 69,15,58,204,193,0 +DB 68,15,56,200,212 + movups xmm14,XMMWORD PTR[16+rdi] + xorps xmm14,xmm15 + movups XMMWORD PTR[rdi*1+rsi],xmm2 + xorps xmm2,xmm14 + movups xmm1,XMMWORD PTR[((-80))+rcx] +DB 102,15,56,220,208 +DB 15,56,202,236 + pxor xmm6,xmm4 +DB 15,56,201,220 + movups xmm0,XMMWORD PTR[((-64))+rcx] +DB 102,15,56,220,209 + movdqa xmm9,xmm8 +DB 69,15,58,204,194,1 +DB 68,15,56,200,205 + movups xmm1,XMMWORD PTR[((-48))+rcx] +DB 102,15,56,220,208 +DB 15,56,202,245 + pxor xmm3,xmm5 +DB 15,56,201,229 + movups xmm0,XMMWORD PTR[((-32))+rcx] +DB 102,15,56,220,209 + movdqa xmm10,xmm8 +DB 69,15,58,204,193,1 +DB 68,15,56,200,214 + movups xmm1,XMMWORD PTR[((-16))+rcx] +DB 102,15,56,220,208 +DB 15,56,202,222 + pxor xmm4,xmm6 +DB 15,56,201,238 + movups xmm0,XMMWORD PTR[rcx] +DB 102,15,56,220,209 + movdqa xmm9,xmm8 +DB 69,15,58,204,194,1 +DB 68,15,56,200,203 + movups xmm1,XMMWORD PTR[16+rcx] +DB 102,15,56,220,208 +DB 15,56,202,227 + pxor xmm5,xmm3 +DB 15,56,201,243 + movups xmm0,XMMWORD PTR[32+rcx] +DB 102,15,56,220,209 + movdqa xmm10,xmm8 +DB 69,15,58,204,193,1 +DB 68,15,56,200,212 + movups xmm1,XMMWORD PTR[48+rcx] +DB 102,15,56,220,208 +DB 15,56,202,236 + pxor xmm6,xmm4 +DB 15,56,201,220 + cmp r11d,11 + jb $L$aesenclast7 + movups xmm0,XMMWORD PTR[64+rcx] +DB 102,15,56,220,209 + movups xmm1,XMMWORD PTR[80+rcx] +DB 102,15,56,220,208 + je $L$aesenclast7 + movups xmm0,XMMWORD PTR[96+rcx] +DB 102,15,56,220,209 + movups xmm1,XMMWORD PTR[112+rcx] +DB 102,15,56,220,208 +$L$aesenclast7:: +DB 102,15,56,221,209 + movups xmm0,XMMWORD PTR[((16-112))+rcx] + movdqa xmm9,xmm8 +DB 69,15,58,204,194,1 +DB 68,15,56,200,205 + movups xmm14,XMMWORD PTR[32+rdi] + xorps xmm14,xmm15 + movups XMMWORD PTR[16+rdi*1+rsi],xmm2 + xorps xmm2,xmm14 + movups xmm1,XMMWORD PTR[((-80))+rcx] +DB 102,15,56,220,208 +DB 15,56,202,245 + pxor xmm3,xmm5 +DB 15,56,201,229 + movups xmm0,XMMWORD PTR[((-64))+rcx] +DB 102,15,56,220,209 + movdqa xmm10,xmm8 +DB 69,15,58,204,193,2 +DB 68,15,56,200,214 + movups xmm1,XMMWORD PTR[((-48))+rcx] +DB 102,15,56,220,208 +DB 15,56,202,222 + pxor xmm4,xmm6 +DB 15,56,201,238 + movups xmm0,XMMWORD PTR[((-32))+rcx] +DB 102,15,56,220,209 + movdqa xmm9,xmm8 +DB 69,15,58,204,194,2 +DB 68,15,56,200,203 + movups xmm1,XMMWORD PTR[((-16))+rcx] +DB 102,15,56,220,208 +DB 15,56,202,227 + pxor xmm5,xmm3 +DB 15,56,201,243 + movups xmm0,XMMWORD PTR[rcx] +DB 102,15,56,220,209 + movdqa xmm10,xmm8 +DB 69,15,58,204,193,2 +DB 68,15,56,200,212 + movups xmm1,XMMWORD PTR[16+rcx] +DB 102,15,56,220,208 +DB 15,56,202,236 + pxor xmm6,xmm4 +DB 15,56,201,220 + movups xmm0,XMMWORD PTR[32+rcx] +DB 102,15,56,220,209 + movdqa xmm9,xmm8 +DB 69,15,58,204,194,2 +DB 68,15,56,200,205 + movups xmm1,XMMWORD PTR[48+rcx] +DB 102,15,56,220,208 +DB 15,56,202,245 + pxor xmm3,xmm5 +DB 15,56,201,229 + cmp r11d,11 + jb $L$aesenclast8 + movups xmm0,XMMWORD PTR[64+rcx] +DB 102,15,56,220,209 + movups xmm1,XMMWORD PTR[80+rcx] +DB 102,15,56,220,208 + je $L$aesenclast8 + movups xmm0,XMMWORD PTR[96+rcx] +DB 102,15,56,220,209 + movups xmm1,XMMWORD PTR[112+rcx] +DB 102,15,56,220,208 +$L$aesenclast8:: +DB 102,15,56,221,209 + movups xmm0,XMMWORD PTR[((16-112))+rcx] + movdqa xmm10,xmm8 +DB 69,15,58,204,193,2 +DB 68,15,56,200,214 + movups xmm14,XMMWORD PTR[48+rdi] + xorps xmm14,xmm15 + movups XMMWORD PTR[32+rdi*1+rsi],xmm2 + xorps xmm2,xmm14 + movups xmm1,XMMWORD PTR[((-80))+rcx] +DB 102,15,56,220,208 +DB 15,56,202,222 + pxor xmm4,xmm6 +DB 15,56,201,238 + movups xmm0,XMMWORD PTR[((-64))+rcx] +DB 102,15,56,220,209 + movdqa xmm9,xmm8 +DB 69,15,58,204,194,3 +DB 68,15,56,200,203 + movups xmm1,XMMWORD PTR[((-48))+rcx] +DB 102,15,56,220,208 +DB 15,56,202,227 + pxor xmm5,xmm3 +DB 15,56,201,243 + movups xmm0,XMMWORD PTR[((-32))+rcx] +DB 102,15,56,220,209 + movdqa xmm10,xmm8 +DB 69,15,58,204,193,3 +DB 68,15,56,200,212 +DB 15,56,202,236 + pxor xmm6,xmm4 + movups xmm1,XMMWORD PTR[((-16))+rcx] +DB 102,15,56,220,208 + movdqa xmm9,xmm8 +DB 69,15,58,204,194,3 +DB 68,15,56,200,205 +DB 15,56,202,245 + movups xmm0,XMMWORD PTR[rcx] +DB 102,15,56,220,209 + movdqa xmm5,xmm12 + movdqa xmm10,xmm8 +DB 69,15,58,204,193,3 +DB 68,15,56,200,214 + movups xmm1,XMMWORD PTR[16+rcx] +DB 102,15,56,220,208 + movdqa xmm9,xmm8 +DB 69,15,58,204,194,3 +DB 68,15,56,200,205 + movups xmm0,XMMWORD PTR[32+rcx] +DB 102,15,56,220,209 + movups xmm1,XMMWORD PTR[48+rcx] +DB 102,15,56,220,208 + cmp r11d,11 + jb $L$aesenclast9 + movups xmm0,XMMWORD PTR[64+rcx] +DB 102,15,56,220,209 + movups xmm1,XMMWORD PTR[80+rcx] +DB 102,15,56,220,208 + je $L$aesenclast9 + movups xmm0,XMMWORD PTR[96+rcx] +DB 102,15,56,220,209 + movups xmm1,XMMWORD PTR[112+rcx] +DB 102,15,56,220,208 +$L$aesenclast9:: +DB 102,15,56,221,209 + movups xmm0,XMMWORD PTR[((16-112))+rcx] + dec rdx + + paddd xmm8,xmm11 + movups XMMWORD PTR[48+rdi*1+rsi],xmm2 + lea rdi,QWORD PTR[64+rdi] + jnz $L$oop_shaext + + pshufd xmm8,xmm8,27 + pshufd xmm9,xmm9,27 + movups XMMWORD PTR[r8],xmm2 + movdqu XMMWORD PTR[r9],xmm8 + movd DWORD PTR[16+r9],xmm9 + movaps xmm6,XMMWORD PTR[((-8-160))+rax] + movaps xmm7,XMMWORD PTR[((-8-144))+rax] + movaps xmm8,XMMWORD PTR[((-8-128))+rax] + movaps xmm9,XMMWORD PTR[((-8-112))+rax] + movaps xmm10,XMMWORD PTR[((-8-96))+rax] + movaps xmm11,XMMWORD PTR[((-8-80))+rax] + movaps xmm12,XMMWORD PTR[((-8-64))+rax] + movaps xmm13,XMMWORD PTR[((-8-48))+rax] + movaps xmm14,XMMWORD PTR[((-8-32))+rax] + movaps xmm15,XMMWORD PTR[((-8-16))+rax] + mov rsp,rax +$L$epilogue_shaext:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_aesni_cbc_sha1_enc_shaext:: +aesni_cbc_sha1_enc_shaext ENDP +EXTERN __imp_RtlVirtualUnwind:NEAR + +ALIGN 16 +ssse3_handler PROC PRIVATE + push rsi + push rdi + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + pushfq + sub rsp,64 + + mov rax,QWORD PTR[120+r8] + mov rbx,QWORD PTR[248+r8] + + mov rsi,QWORD PTR[8+r9] + mov r11,QWORD PTR[56+r9] + + mov r10d,DWORD PTR[r11] + lea r10,QWORD PTR[r10*1+rsi] + cmp rbx,r10 + jb $L$common_seh_tail + + mov rax,QWORD PTR[152+r8] + + mov r10d,DWORD PTR[4+r11] + lea r10,QWORD PTR[r10*1+rsi] + cmp rbx,r10 + jae $L$common_seh_tail + lea r10,QWORD PTR[aesni_cbc_sha1_enc_shaext] + cmp rbx,r10 + jb $L$seh_no_shaext + + lea rsi,QWORD PTR[rax] + lea rdi,QWORD PTR[512+r8] + mov ecx,20 + DD 0a548f3fch + lea rax,QWORD PTR[168+rax] + jmp $L$common_seh_tail +$L$seh_no_shaext:: + lea rsi,QWORD PTR[96+rax] + lea rdi,QWORD PTR[512+r8] + mov ecx,20 + DD 0a548f3fch + lea rax,QWORD PTR[264+rax] + + mov r15,QWORD PTR[rax] + mov r14,QWORD PTR[8+rax] + mov r13,QWORD PTR[16+rax] + mov r12,QWORD PTR[24+rax] + mov rbp,QWORD PTR[32+rax] + mov rbx,QWORD PTR[40+rax] + lea rax,QWORD PTR[48+rax] + mov QWORD PTR[144+r8],rbx + mov QWORD PTR[160+r8],rbp + mov QWORD PTR[216+r8],r12 + mov QWORD PTR[224+r8],r13 + mov QWORD PTR[232+r8],r14 + mov QWORD PTR[240+r8],r15 + +$L$common_seh_tail:: + mov rdi,QWORD PTR[8+rax] + mov rsi,QWORD PTR[16+rax] + mov QWORD PTR[152+r8],rax + mov QWORD PTR[168+r8],rsi + mov QWORD PTR[176+r8],rdi + + mov rdi,QWORD PTR[40+r9] + mov rsi,r8 + mov ecx,154 + DD 0a548f3fch + + mov rsi,r9 + xor rcx,rcx + mov rdx,QWORD PTR[8+rsi] + mov r8,QWORD PTR[rsi] + mov r9,QWORD PTR[16+rsi] + mov r10,QWORD PTR[40+rsi] + lea r11,QWORD PTR[56+rsi] + lea r12,QWORD PTR[24+rsi] + mov QWORD PTR[32+rsp],r10 + mov QWORD PTR[40+rsp],r11 + mov QWORD PTR[48+rsp],r12 + mov QWORD PTR[56+rsp],rcx + call QWORD PTR[__imp_RtlVirtualUnwind] + + mov eax,1 + add rsp,64 + popfq + pop r15 + pop r14 + pop r13 + pop r12 + pop rbp + pop rbx + pop rdi + pop rsi + DB 0F3h,0C3h ;repret +ssse3_handler ENDP + +.text$ ENDS +.pdata SEGMENT READONLY ALIGN(4) +ALIGN 4 + DD imagerel $L$SEH_begin_aesni_cbc_sha1_enc_ssse3 + DD imagerel $L$SEH_end_aesni_cbc_sha1_enc_ssse3 + DD imagerel $L$SEH_info_aesni_cbc_sha1_enc_ssse3 + DD imagerel $L$SEH_begin_aesni_cbc_sha1_enc_shaext + DD imagerel $L$SEH_end_aesni_cbc_sha1_enc_shaext + DD imagerel $L$SEH_info_aesni_cbc_sha1_enc_shaext +.pdata ENDS +.xdata SEGMENT READONLY ALIGN(8) +ALIGN 8 +$L$SEH_info_aesni_cbc_sha1_enc_ssse3:: +DB 9,0,0,0 + DD imagerel ssse3_handler + DD imagerel $L$prologue_ssse3,imagerel $L$epilogue_ssse3 +$L$SEH_info_aesni_cbc_sha1_enc_shaext:: +DB 9,0,0,0 + DD imagerel ssse3_handler + DD imagerel $L$prologue_shaext,imagerel $L$epilogue_shaext + +.xdata ENDS +END diff --git a/deps/openssl/asm_obsolete/x64-win32-masm/aes/aesni-sha256-x86_64.asm b/deps/openssl/asm_obsolete/x64-win32-masm/aes/aesni-sha256-x86_64.asm new file mode 100644 index 00000000000000..94733526388550 --- /dev/null +++ b/deps/openssl/asm_obsolete/x64-win32-masm/aes/aesni-sha256-x86_64.asm @@ -0,0 +1,136 @@ +OPTION DOTNAME +.text$ SEGMENT ALIGN(256) 'CODE' + +EXTERN OPENSSL_ia32cap_P:NEAR +PUBLIC aesni_cbc_sha256_enc + +ALIGN 16 +aesni_cbc_sha256_enc PROC PUBLIC + xor eax,eax + cmp rcx,0 + je $L$probe + ud2 +$L$probe:: + DB 0F3h,0C3h ;repret +aesni_cbc_sha256_enc ENDP + +ALIGN 64 + +K256:: + DD 0428a2f98h,071374491h,0b5c0fbcfh,0e9b5dba5h + DD 0428a2f98h,071374491h,0b5c0fbcfh,0e9b5dba5h + DD 03956c25bh,059f111f1h,0923f82a4h,0ab1c5ed5h + DD 03956c25bh,059f111f1h,0923f82a4h,0ab1c5ed5h + DD 0d807aa98h,012835b01h,0243185beh,0550c7dc3h + DD 0d807aa98h,012835b01h,0243185beh,0550c7dc3h + DD 072be5d74h,080deb1feh,09bdc06a7h,0c19bf174h + DD 072be5d74h,080deb1feh,09bdc06a7h,0c19bf174h + DD 0e49b69c1h,0efbe4786h,00fc19dc6h,0240ca1cch + DD 0e49b69c1h,0efbe4786h,00fc19dc6h,0240ca1cch + DD 02de92c6fh,04a7484aah,05cb0a9dch,076f988dah + DD 02de92c6fh,04a7484aah,05cb0a9dch,076f988dah + DD 0983e5152h,0a831c66dh,0b00327c8h,0bf597fc7h + DD 0983e5152h,0a831c66dh,0b00327c8h,0bf597fc7h + DD 0c6e00bf3h,0d5a79147h,006ca6351h,014292967h + DD 0c6e00bf3h,0d5a79147h,006ca6351h,014292967h + DD 027b70a85h,02e1b2138h,04d2c6dfch,053380d13h + DD 027b70a85h,02e1b2138h,04d2c6dfch,053380d13h + DD 0650a7354h,0766a0abbh,081c2c92eh,092722c85h + DD 0650a7354h,0766a0abbh,081c2c92eh,092722c85h + DD 0a2bfe8a1h,0a81a664bh,0c24b8b70h,0c76c51a3h + DD 0a2bfe8a1h,0a81a664bh,0c24b8b70h,0c76c51a3h + DD 0d192e819h,0d6990624h,0f40e3585h,0106aa070h + DD 0d192e819h,0d6990624h,0f40e3585h,0106aa070h + DD 019a4c116h,01e376c08h,02748774ch,034b0bcb5h + DD 019a4c116h,01e376c08h,02748774ch,034b0bcb5h + DD 0391c0cb3h,04ed8aa4ah,05b9cca4fh,0682e6ff3h + DD 0391c0cb3h,04ed8aa4ah,05b9cca4fh,0682e6ff3h + DD 0748f82eeh,078a5636fh,084c87814h,08cc70208h + DD 0748f82eeh,078a5636fh,084c87814h,08cc70208h + DD 090befffah,0a4506cebh,0bef9a3f7h,0c67178f2h + DD 090befffah,0a4506cebh,0bef9a3f7h,0c67178f2h + + DD 000010203h,004050607h,008090a0bh,00c0d0e0fh + DD 000010203h,004050607h,008090a0bh,00c0d0e0fh + DD 0,0,0,0,0,0,0,0,-1,-1,-1,-1 + DD 0,0,0,0,0,0,0,0 +DB 65,69,83,78,73,45,67,66,67,43,83,72,65,50,53,54 +DB 32,115,116,105,116,99,104,32,102,111,114,32,120,56,54,95 +DB 54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98 +DB 121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108 +DB 46,111,114,103,62,0 +ALIGN 64 + mov rsi,rax + mov rax,QWORD PTR[((64+56))+rax] + lea rax,QWORD PTR[48+rax] + + mov rbx,QWORD PTR[((-8))+rax] + mov rbp,QWORD PTR[((-16))+rax] + mov r12,QWORD PTR[((-24))+rax] + mov r13,QWORD PTR[((-32))+rax] + mov r14,QWORD PTR[((-40))+rax] + mov r15,QWORD PTR[((-48))+rax] + mov QWORD PTR[144+r8],rbx + mov QWORD PTR[160+r8],rbp + mov QWORD PTR[216+r8],r12 + mov QWORD PTR[224+r8],r13 + mov QWORD PTR[232+r8],r14 + mov QWORD PTR[240+r8],r15 + + lea rsi,QWORD PTR[((64+64))+rsi] + lea rdi,QWORD PTR[512+r8] + mov ecx,20 + DD 0a548f3fch + +$L$in_prologue:: + mov rdi,QWORD PTR[8+rax] + mov rsi,QWORD PTR[16+rax] + mov QWORD PTR[152+r8],rax + mov QWORD PTR[168+r8],rsi + mov QWORD PTR[176+r8],rdi + + mov rdi,QWORD PTR[40+r9] + mov rsi,r8 + mov ecx,154 + DD 0a548f3fch + + mov rsi,r9 + xor rcx,rcx + mov rdx,QWORD PTR[8+rsi] + mov r8,QWORD PTR[rsi] + mov r9,QWORD PTR[16+rsi] + mov r10,QWORD PTR[40+rsi] + lea r11,QWORD PTR[56+rsi] + lea r12,QWORD PTR[24+rsi] + mov QWORD PTR[32+rsp],r10 + mov QWORD PTR[40+rsp],r11 + mov QWORD PTR[48+rsp],r12 + mov QWORD PTR[56+rsp],rcx + call QWORD PTR[__imp_RtlVirtualUnwind] + + mov eax,1 + add rsp,64 + popfq + pop r15 + pop r14 + pop r13 + pop r12 + pop rbp + pop rbx + pop rdi + pop rsi + DB 0F3h,0C3h ;repret + + +.text$ ENDS +.pdata SEGMENT READONLY ALIGN(4) + DD imagerel $L$SEH_begin_aesni_cbc_sha256_enc_xop + DD imagerel $L$SEH_end_aesni_cbc_sha256_enc_xop + DD imagerel $L$SEH_info_aesni_cbc_sha256_enc_xop + + DD imagerel $L$SEH_begin_aesni_cbc_sha256_enc_avx + DD imagerel $L$SEH_end_aesni_cbc_sha256_enc_avx + DD imagerel $L$SEH_info_aesni_cbc_sha256_enc_avx + +.pdata ENDS +END diff --git a/deps/openssl/asm_obsolete/x64-win32-masm/aes/aesni-x86_64.asm b/deps/openssl/asm_obsolete/x64-win32-masm/aes/aesni-x86_64.asm new file mode 100644 index 00000000000000..53d8afc950f174 --- /dev/null +++ b/deps/openssl/asm_obsolete/x64-win32-masm/aes/aesni-x86_64.asm @@ -0,0 +1,3631 @@ +OPTION DOTNAME +.text$ SEGMENT ALIGN(256) 'CODE' +EXTERN OPENSSL_ia32cap_P:NEAR +PUBLIC aesni_encrypt + +ALIGN 16 +aesni_encrypt PROC PUBLIC + movups xmm2,XMMWORD PTR[rcx] + mov eax,DWORD PTR[240+r8] + movups xmm0,XMMWORD PTR[r8] + movups xmm1,XMMWORD PTR[16+r8] + lea r8,QWORD PTR[32+r8] + xorps xmm2,xmm0 +$L$oop_enc1_1:: +DB 102,15,56,220,209 + dec eax + movups xmm1,XMMWORD PTR[r8] + lea r8,QWORD PTR[16+r8] + jnz $L$oop_enc1_1 +DB 102,15,56,221,209 + movups XMMWORD PTR[rdx],xmm2 + DB 0F3h,0C3h ;repret +aesni_encrypt ENDP + +PUBLIC aesni_decrypt + +ALIGN 16 +aesni_decrypt PROC PUBLIC + movups xmm2,XMMWORD PTR[rcx] + mov eax,DWORD PTR[240+r8] + movups xmm0,XMMWORD PTR[r8] + movups xmm1,XMMWORD PTR[16+r8] + lea r8,QWORD PTR[32+r8] + xorps xmm2,xmm0 +$L$oop_dec1_2:: +DB 102,15,56,222,209 + dec eax + movups xmm1,XMMWORD PTR[r8] + lea r8,QWORD PTR[16+r8] + jnz $L$oop_dec1_2 +DB 102,15,56,223,209 + movups XMMWORD PTR[rdx],xmm2 + DB 0F3h,0C3h ;repret +aesni_decrypt ENDP + +ALIGN 16 +_aesni_encrypt2 PROC PRIVATE + movups xmm0,XMMWORD PTR[rcx] + shl eax,4 + movups xmm1,XMMWORD PTR[16+rcx] + xorps xmm2,xmm0 + xorps xmm3,xmm0 + movups xmm0,XMMWORD PTR[32+rcx] + lea rcx,QWORD PTR[32+rax*1+rcx] + neg rax + add rax,16 + +$L$enc_loop2:: +DB 102,15,56,220,209 +DB 102,15,56,220,217 + movups xmm1,XMMWORD PTR[rax*1+rcx] + add rax,32 +DB 102,15,56,220,208 +DB 102,15,56,220,216 + movups xmm0,XMMWORD PTR[((-16))+rax*1+rcx] + jnz $L$enc_loop2 + +DB 102,15,56,220,209 +DB 102,15,56,220,217 +DB 102,15,56,221,208 +DB 102,15,56,221,216 + DB 0F3h,0C3h ;repret +_aesni_encrypt2 ENDP + +ALIGN 16 +_aesni_decrypt2 PROC PRIVATE + movups xmm0,XMMWORD PTR[rcx] + shl eax,4 + movups xmm1,XMMWORD PTR[16+rcx] + xorps xmm2,xmm0 + xorps xmm3,xmm0 + movups xmm0,XMMWORD PTR[32+rcx] + lea rcx,QWORD PTR[32+rax*1+rcx] + neg rax + add rax,16 + +$L$dec_loop2:: +DB 102,15,56,222,209 +DB 102,15,56,222,217 + movups xmm1,XMMWORD PTR[rax*1+rcx] + add rax,32 +DB 102,15,56,222,208 +DB 102,15,56,222,216 + movups xmm0,XMMWORD PTR[((-16))+rax*1+rcx] + jnz $L$dec_loop2 + +DB 102,15,56,222,209 +DB 102,15,56,222,217 +DB 102,15,56,223,208 +DB 102,15,56,223,216 + DB 0F3h,0C3h ;repret +_aesni_decrypt2 ENDP + +ALIGN 16 +_aesni_encrypt3 PROC PRIVATE + movups xmm0,XMMWORD PTR[rcx] + shl eax,4 + movups xmm1,XMMWORD PTR[16+rcx] + xorps xmm2,xmm0 + xorps xmm3,xmm0 + xorps xmm4,xmm0 + movups xmm0,XMMWORD PTR[32+rcx] + lea rcx,QWORD PTR[32+rax*1+rcx] + neg rax + add rax,16 + +$L$enc_loop3:: +DB 102,15,56,220,209 +DB 102,15,56,220,217 +DB 102,15,56,220,225 + movups xmm1,XMMWORD PTR[rax*1+rcx] + add rax,32 +DB 102,15,56,220,208 +DB 102,15,56,220,216 +DB 102,15,56,220,224 + movups xmm0,XMMWORD PTR[((-16))+rax*1+rcx] + jnz $L$enc_loop3 + +DB 102,15,56,220,209 +DB 102,15,56,220,217 +DB 102,15,56,220,225 +DB 102,15,56,221,208 +DB 102,15,56,221,216 +DB 102,15,56,221,224 + DB 0F3h,0C3h ;repret +_aesni_encrypt3 ENDP + +ALIGN 16 +_aesni_decrypt3 PROC PRIVATE + movups xmm0,XMMWORD PTR[rcx] + shl eax,4 + movups xmm1,XMMWORD PTR[16+rcx] + xorps xmm2,xmm0 + xorps xmm3,xmm0 + xorps xmm4,xmm0 + movups xmm0,XMMWORD PTR[32+rcx] + lea rcx,QWORD PTR[32+rax*1+rcx] + neg rax + add rax,16 + +$L$dec_loop3:: +DB 102,15,56,222,209 +DB 102,15,56,222,217 +DB 102,15,56,222,225 + movups xmm1,XMMWORD PTR[rax*1+rcx] + add rax,32 +DB 102,15,56,222,208 +DB 102,15,56,222,216 +DB 102,15,56,222,224 + movups xmm0,XMMWORD PTR[((-16))+rax*1+rcx] + jnz $L$dec_loop3 + +DB 102,15,56,222,209 +DB 102,15,56,222,217 +DB 102,15,56,222,225 +DB 102,15,56,223,208 +DB 102,15,56,223,216 +DB 102,15,56,223,224 + DB 0F3h,0C3h ;repret +_aesni_decrypt3 ENDP + +ALIGN 16 +_aesni_encrypt4 PROC PRIVATE + movups xmm0,XMMWORD PTR[rcx] + shl eax,4 + movups xmm1,XMMWORD PTR[16+rcx] + xorps xmm2,xmm0 + xorps xmm3,xmm0 + xorps xmm4,xmm0 + xorps xmm5,xmm0 + movups xmm0,XMMWORD PTR[32+rcx] + lea rcx,QWORD PTR[32+rax*1+rcx] + neg rax +DB 00fh,01fh,000h + add rax,16 + +$L$enc_loop4:: +DB 102,15,56,220,209 +DB 102,15,56,220,217 +DB 102,15,56,220,225 +DB 102,15,56,220,233 + movups xmm1,XMMWORD PTR[rax*1+rcx] + add rax,32 +DB 102,15,56,220,208 +DB 102,15,56,220,216 +DB 102,15,56,220,224 +DB 102,15,56,220,232 + movups xmm0,XMMWORD PTR[((-16))+rax*1+rcx] + jnz $L$enc_loop4 + +DB 102,15,56,220,209 +DB 102,15,56,220,217 +DB 102,15,56,220,225 +DB 102,15,56,220,233 +DB 102,15,56,221,208 +DB 102,15,56,221,216 +DB 102,15,56,221,224 +DB 102,15,56,221,232 + DB 0F3h,0C3h ;repret +_aesni_encrypt4 ENDP + +ALIGN 16 +_aesni_decrypt4 PROC PRIVATE + movups xmm0,XMMWORD PTR[rcx] + shl eax,4 + movups xmm1,XMMWORD PTR[16+rcx] + xorps xmm2,xmm0 + xorps xmm3,xmm0 + xorps xmm4,xmm0 + xorps xmm5,xmm0 + movups xmm0,XMMWORD PTR[32+rcx] + lea rcx,QWORD PTR[32+rax*1+rcx] + neg rax +DB 00fh,01fh,000h + add rax,16 + +$L$dec_loop4:: +DB 102,15,56,222,209 +DB 102,15,56,222,217 +DB 102,15,56,222,225 +DB 102,15,56,222,233 + movups xmm1,XMMWORD PTR[rax*1+rcx] + add rax,32 +DB 102,15,56,222,208 +DB 102,15,56,222,216 +DB 102,15,56,222,224 +DB 102,15,56,222,232 + movups xmm0,XMMWORD PTR[((-16))+rax*1+rcx] + jnz $L$dec_loop4 + +DB 102,15,56,222,209 +DB 102,15,56,222,217 +DB 102,15,56,222,225 +DB 102,15,56,222,233 +DB 102,15,56,223,208 +DB 102,15,56,223,216 +DB 102,15,56,223,224 +DB 102,15,56,223,232 + DB 0F3h,0C3h ;repret +_aesni_decrypt4 ENDP + +ALIGN 16 +_aesni_encrypt6 PROC PRIVATE + movups xmm0,XMMWORD PTR[rcx] + shl eax,4 + movups xmm1,XMMWORD PTR[16+rcx] + xorps xmm2,xmm0 + pxor xmm3,xmm0 + pxor xmm4,xmm0 +DB 102,15,56,220,209 + lea rcx,QWORD PTR[32+rax*1+rcx] + neg rax +DB 102,15,56,220,217 + pxor xmm5,xmm0 + pxor xmm6,xmm0 +DB 102,15,56,220,225 + pxor xmm7,xmm0 + add rax,16 +DB 102,15,56,220,233 +DB 102,15,56,220,241 +DB 102,15,56,220,249 + movups xmm0,XMMWORD PTR[((-16))+rax*1+rcx] + jmp $L$enc_loop6_enter +ALIGN 16 +$L$enc_loop6:: +DB 102,15,56,220,209 +DB 102,15,56,220,217 +DB 102,15,56,220,225 +DB 102,15,56,220,233 +DB 102,15,56,220,241 +DB 102,15,56,220,249 +$L$enc_loop6_enter:: + movups xmm1,XMMWORD PTR[rax*1+rcx] + add rax,32 +DB 102,15,56,220,208 +DB 102,15,56,220,216 +DB 102,15,56,220,224 +DB 102,15,56,220,232 +DB 102,15,56,220,240 +DB 102,15,56,220,248 + movups xmm0,XMMWORD PTR[((-16))+rax*1+rcx] + jnz $L$enc_loop6 + +DB 102,15,56,220,209 +DB 102,15,56,220,217 +DB 102,15,56,220,225 +DB 102,15,56,220,233 +DB 102,15,56,220,241 +DB 102,15,56,220,249 +DB 102,15,56,221,208 +DB 102,15,56,221,216 +DB 102,15,56,221,224 +DB 102,15,56,221,232 +DB 102,15,56,221,240 +DB 102,15,56,221,248 + DB 0F3h,0C3h ;repret +_aesni_encrypt6 ENDP + +ALIGN 16 +_aesni_decrypt6 PROC PRIVATE + movups xmm0,XMMWORD PTR[rcx] + shl eax,4 + movups xmm1,XMMWORD PTR[16+rcx] + xorps xmm2,xmm0 + pxor xmm3,xmm0 + pxor xmm4,xmm0 +DB 102,15,56,222,209 + lea rcx,QWORD PTR[32+rax*1+rcx] + neg rax +DB 102,15,56,222,217 + pxor xmm5,xmm0 + pxor xmm6,xmm0 +DB 102,15,56,222,225 + pxor xmm7,xmm0 + add rax,16 +DB 102,15,56,222,233 +DB 102,15,56,222,241 +DB 102,15,56,222,249 + movups xmm0,XMMWORD PTR[((-16))+rax*1+rcx] + jmp $L$dec_loop6_enter +ALIGN 16 +$L$dec_loop6:: +DB 102,15,56,222,209 +DB 102,15,56,222,217 +DB 102,15,56,222,225 +DB 102,15,56,222,233 +DB 102,15,56,222,241 +DB 102,15,56,222,249 +$L$dec_loop6_enter:: + movups xmm1,XMMWORD PTR[rax*1+rcx] + add rax,32 +DB 102,15,56,222,208 +DB 102,15,56,222,216 +DB 102,15,56,222,224 +DB 102,15,56,222,232 +DB 102,15,56,222,240 +DB 102,15,56,222,248 + movups xmm0,XMMWORD PTR[((-16))+rax*1+rcx] + jnz $L$dec_loop6 + +DB 102,15,56,222,209 +DB 102,15,56,222,217 +DB 102,15,56,222,225 +DB 102,15,56,222,233 +DB 102,15,56,222,241 +DB 102,15,56,222,249 +DB 102,15,56,223,208 +DB 102,15,56,223,216 +DB 102,15,56,223,224 +DB 102,15,56,223,232 +DB 102,15,56,223,240 +DB 102,15,56,223,248 + DB 0F3h,0C3h ;repret +_aesni_decrypt6 ENDP + +ALIGN 16 +_aesni_encrypt8 PROC PRIVATE + movups xmm0,XMMWORD PTR[rcx] + shl eax,4 + movups xmm1,XMMWORD PTR[16+rcx] + xorps xmm2,xmm0 + xorps xmm3,xmm0 + pxor xmm4,xmm0 + pxor xmm5,xmm0 + pxor xmm6,xmm0 + lea rcx,QWORD PTR[32+rax*1+rcx] + neg rax +DB 102,15,56,220,209 + add rax,16 + pxor xmm7,xmm0 +DB 102,15,56,220,217 + pxor xmm8,xmm0 + pxor xmm9,xmm0 +DB 102,15,56,220,225 +DB 102,15,56,220,233 +DB 102,15,56,220,241 +DB 102,15,56,220,249 +DB 102,68,15,56,220,193 +DB 102,68,15,56,220,201 + movups xmm0,XMMWORD PTR[((-16))+rax*1+rcx] + jmp $L$enc_loop8_enter +ALIGN 16 +$L$enc_loop8:: +DB 102,15,56,220,209 +DB 102,15,56,220,217 +DB 102,15,56,220,225 +DB 102,15,56,220,233 +DB 102,15,56,220,241 +DB 102,15,56,220,249 +DB 102,68,15,56,220,193 +DB 102,68,15,56,220,201 +$L$enc_loop8_enter:: + movups xmm1,XMMWORD PTR[rax*1+rcx] + add rax,32 +DB 102,15,56,220,208 +DB 102,15,56,220,216 +DB 102,15,56,220,224 +DB 102,15,56,220,232 +DB 102,15,56,220,240 +DB 102,15,56,220,248 +DB 102,68,15,56,220,192 +DB 102,68,15,56,220,200 + movups xmm0,XMMWORD PTR[((-16))+rax*1+rcx] + jnz $L$enc_loop8 + +DB 102,15,56,220,209 +DB 102,15,56,220,217 +DB 102,15,56,220,225 +DB 102,15,56,220,233 +DB 102,15,56,220,241 +DB 102,15,56,220,249 +DB 102,68,15,56,220,193 +DB 102,68,15,56,220,201 +DB 102,15,56,221,208 +DB 102,15,56,221,216 +DB 102,15,56,221,224 +DB 102,15,56,221,232 +DB 102,15,56,221,240 +DB 102,15,56,221,248 +DB 102,68,15,56,221,192 +DB 102,68,15,56,221,200 + DB 0F3h,0C3h ;repret +_aesni_encrypt8 ENDP + +ALIGN 16 +_aesni_decrypt8 PROC PRIVATE + movups xmm0,XMMWORD PTR[rcx] + shl eax,4 + movups xmm1,XMMWORD PTR[16+rcx] + xorps xmm2,xmm0 + xorps xmm3,xmm0 + pxor xmm4,xmm0 + pxor xmm5,xmm0 + pxor xmm6,xmm0 + lea rcx,QWORD PTR[32+rax*1+rcx] + neg rax +DB 102,15,56,222,209 + add rax,16 + pxor xmm7,xmm0 +DB 102,15,56,222,217 + pxor xmm8,xmm0 + pxor xmm9,xmm0 +DB 102,15,56,222,225 +DB 102,15,56,222,233 +DB 102,15,56,222,241 +DB 102,15,56,222,249 +DB 102,68,15,56,222,193 +DB 102,68,15,56,222,201 + movups xmm0,XMMWORD PTR[((-16))+rax*1+rcx] + jmp $L$dec_loop8_enter +ALIGN 16 +$L$dec_loop8:: +DB 102,15,56,222,209 +DB 102,15,56,222,217 +DB 102,15,56,222,225 +DB 102,15,56,222,233 +DB 102,15,56,222,241 +DB 102,15,56,222,249 +DB 102,68,15,56,222,193 +DB 102,68,15,56,222,201 +$L$dec_loop8_enter:: + movups xmm1,XMMWORD PTR[rax*1+rcx] + add rax,32 +DB 102,15,56,222,208 +DB 102,15,56,222,216 +DB 102,15,56,222,224 +DB 102,15,56,222,232 +DB 102,15,56,222,240 +DB 102,15,56,222,248 +DB 102,68,15,56,222,192 +DB 102,68,15,56,222,200 + movups xmm0,XMMWORD PTR[((-16))+rax*1+rcx] + jnz $L$dec_loop8 + +DB 102,15,56,222,209 +DB 102,15,56,222,217 +DB 102,15,56,222,225 +DB 102,15,56,222,233 +DB 102,15,56,222,241 +DB 102,15,56,222,249 +DB 102,68,15,56,222,193 +DB 102,68,15,56,222,201 +DB 102,15,56,223,208 +DB 102,15,56,223,216 +DB 102,15,56,223,224 +DB 102,15,56,223,232 +DB 102,15,56,223,240 +DB 102,15,56,223,248 +DB 102,68,15,56,223,192 +DB 102,68,15,56,223,200 + DB 0F3h,0C3h ;repret +_aesni_decrypt8 ENDP +PUBLIC aesni_ecb_encrypt + +ALIGN 16 +aesni_ecb_encrypt PROC PUBLIC + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_aesni_ecb_encrypt:: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + mov rcx,r9 + mov r8,QWORD PTR[40+rsp] + + + lea rsp,QWORD PTR[((-88))+rsp] + movaps XMMWORD PTR[rsp],xmm6 + movaps XMMWORD PTR[16+rsp],xmm7 + movaps XMMWORD PTR[32+rsp],xmm8 + movaps XMMWORD PTR[48+rsp],xmm9 +$L$ecb_enc_body:: + and rdx,-16 + jz $L$ecb_ret + + mov eax,DWORD PTR[240+rcx] + movups xmm0,XMMWORD PTR[rcx] + mov r11,rcx + mov r10d,eax + test r8d,r8d + jz $L$ecb_decrypt + + cmp rdx,080h + jb $L$ecb_enc_tail + + movdqu xmm2,XMMWORD PTR[rdi] + movdqu xmm3,XMMWORD PTR[16+rdi] + movdqu xmm4,XMMWORD PTR[32+rdi] + movdqu xmm5,XMMWORD PTR[48+rdi] + movdqu xmm6,XMMWORD PTR[64+rdi] + movdqu xmm7,XMMWORD PTR[80+rdi] + movdqu xmm8,XMMWORD PTR[96+rdi] + movdqu xmm9,XMMWORD PTR[112+rdi] + lea rdi,QWORD PTR[128+rdi] + sub rdx,080h + jmp $L$ecb_enc_loop8_enter +ALIGN 16 +$L$ecb_enc_loop8:: + movups XMMWORD PTR[rsi],xmm2 + mov rcx,r11 + movdqu xmm2,XMMWORD PTR[rdi] + mov eax,r10d + movups XMMWORD PTR[16+rsi],xmm3 + movdqu xmm3,XMMWORD PTR[16+rdi] + movups XMMWORD PTR[32+rsi],xmm4 + movdqu xmm4,XMMWORD PTR[32+rdi] + movups XMMWORD PTR[48+rsi],xmm5 + movdqu xmm5,XMMWORD PTR[48+rdi] + movups XMMWORD PTR[64+rsi],xmm6 + movdqu xmm6,XMMWORD PTR[64+rdi] + movups XMMWORD PTR[80+rsi],xmm7 + movdqu xmm7,XMMWORD PTR[80+rdi] + movups XMMWORD PTR[96+rsi],xmm8 + movdqu xmm8,XMMWORD PTR[96+rdi] + movups XMMWORD PTR[112+rsi],xmm9 + lea rsi,QWORD PTR[128+rsi] + movdqu xmm9,XMMWORD PTR[112+rdi] + lea rdi,QWORD PTR[128+rdi] +$L$ecb_enc_loop8_enter:: + + call _aesni_encrypt8 + + sub rdx,080h + jnc $L$ecb_enc_loop8 + + movups XMMWORD PTR[rsi],xmm2 + mov rcx,r11 + movups XMMWORD PTR[16+rsi],xmm3 + mov eax,r10d + movups XMMWORD PTR[32+rsi],xmm4 + movups XMMWORD PTR[48+rsi],xmm5 + movups XMMWORD PTR[64+rsi],xmm6 + movups XMMWORD PTR[80+rsi],xmm7 + movups XMMWORD PTR[96+rsi],xmm8 + movups XMMWORD PTR[112+rsi],xmm9 + lea rsi,QWORD PTR[128+rsi] + add rdx,080h + jz $L$ecb_ret + +$L$ecb_enc_tail:: + movups xmm2,XMMWORD PTR[rdi] + cmp rdx,020h + jb $L$ecb_enc_one + movups xmm3,XMMWORD PTR[16+rdi] + je $L$ecb_enc_two + movups xmm4,XMMWORD PTR[32+rdi] + cmp rdx,040h + jb $L$ecb_enc_three + movups xmm5,XMMWORD PTR[48+rdi] + je $L$ecb_enc_four + movups xmm6,XMMWORD PTR[64+rdi] + cmp rdx,060h + jb $L$ecb_enc_five + movups xmm7,XMMWORD PTR[80+rdi] + je $L$ecb_enc_six + movdqu xmm8,XMMWORD PTR[96+rdi] + call _aesni_encrypt8 + movups XMMWORD PTR[rsi],xmm2 + movups XMMWORD PTR[16+rsi],xmm3 + movups XMMWORD PTR[32+rsi],xmm4 + movups XMMWORD PTR[48+rsi],xmm5 + movups XMMWORD PTR[64+rsi],xmm6 + movups XMMWORD PTR[80+rsi],xmm7 + movups XMMWORD PTR[96+rsi],xmm8 + jmp $L$ecb_ret +ALIGN 16 +$L$ecb_enc_one:: + movups xmm0,XMMWORD PTR[rcx] + movups xmm1,XMMWORD PTR[16+rcx] + lea rcx,QWORD PTR[32+rcx] + xorps xmm2,xmm0 +$L$oop_enc1_3:: +DB 102,15,56,220,209 + dec eax + movups xmm1,XMMWORD PTR[rcx] + lea rcx,QWORD PTR[16+rcx] + jnz $L$oop_enc1_3 +DB 102,15,56,221,209 + movups XMMWORD PTR[rsi],xmm2 + jmp $L$ecb_ret +ALIGN 16 +$L$ecb_enc_two:: + call _aesni_encrypt2 + movups XMMWORD PTR[rsi],xmm2 + movups XMMWORD PTR[16+rsi],xmm3 + jmp $L$ecb_ret +ALIGN 16 +$L$ecb_enc_three:: + call _aesni_encrypt3 + movups XMMWORD PTR[rsi],xmm2 + movups XMMWORD PTR[16+rsi],xmm3 + movups XMMWORD PTR[32+rsi],xmm4 + jmp $L$ecb_ret +ALIGN 16 +$L$ecb_enc_four:: + call _aesni_encrypt4 + movups XMMWORD PTR[rsi],xmm2 + movups XMMWORD PTR[16+rsi],xmm3 + movups XMMWORD PTR[32+rsi],xmm4 + movups XMMWORD PTR[48+rsi],xmm5 + jmp $L$ecb_ret +ALIGN 16 +$L$ecb_enc_five:: + xorps xmm7,xmm7 + call _aesni_encrypt6 + movups XMMWORD PTR[rsi],xmm2 + movups XMMWORD PTR[16+rsi],xmm3 + movups XMMWORD PTR[32+rsi],xmm4 + movups XMMWORD PTR[48+rsi],xmm5 + movups XMMWORD PTR[64+rsi],xmm6 + jmp $L$ecb_ret +ALIGN 16 +$L$ecb_enc_six:: + call _aesni_encrypt6 + movups XMMWORD PTR[rsi],xmm2 + movups XMMWORD PTR[16+rsi],xmm3 + movups XMMWORD PTR[32+rsi],xmm4 + movups XMMWORD PTR[48+rsi],xmm5 + movups XMMWORD PTR[64+rsi],xmm6 + movups XMMWORD PTR[80+rsi],xmm7 + jmp $L$ecb_ret + +ALIGN 16 +$L$ecb_decrypt:: + cmp rdx,080h + jb $L$ecb_dec_tail + + movdqu xmm2,XMMWORD PTR[rdi] + movdqu xmm3,XMMWORD PTR[16+rdi] + movdqu xmm4,XMMWORD PTR[32+rdi] + movdqu xmm5,XMMWORD PTR[48+rdi] + movdqu xmm6,XMMWORD PTR[64+rdi] + movdqu xmm7,XMMWORD PTR[80+rdi] + movdqu xmm8,XMMWORD PTR[96+rdi] + movdqu xmm9,XMMWORD PTR[112+rdi] + lea rdi,QWORD PTR[128+rdi] + sub rdx,080h + jmp $L$ecb_dec_loop8_enter +ALIGN 16 +$L$ecb_dec_loop8:: + movups XMMWORD PTR[rsi],xmm2 + mov rcx,r11 + movdqu xmm2,XMMWORD PTR[rdi] + mov eax,r10d + movups XMMWORD PTR[16+rsi],xmm3 + movdqu xmm3,XMMWORD PTR[16+rdi] + movups XMMWORD PTR[32+rsi],xmm4 + movdqu xmm4,XMMWORD PTR[32+rdi] + movups XMMWORD PTR[48+rsi],xmm5 + movdqu xmm5,XMMWORD PTR[48+rdi] + movups XMMWORD PTR[64+rsi],xmm6 + movdqu xmm6,XMMWORD PTR[64+rdi] + movups XMMWORD PTR[80+rsi],xmm7 + movdqu xmm7,XMMWORD PTR[80+rdi] + movups XMMWORD PTR[96+rsi],xmm8 + movdqu xmm8,XMMWORD PTR[96+rdi] + movups XMMWORD PTR[112+rsi],xmm9 + lea rsi,QWORD PTR[128+rsi] + movdqu xmm9,XMMWORD PTR[112+rdi] + lea rdi,QWORD PTR[128+rdi] +$L$ecb_dec_loop8_enter:: + + call _aesni_decrypt8 + + movups xmm0,XMMWORD PTR[r11] + sub rdx,080h + jnc $L$ecb_dec_loop8 + + movups XMMWORD PTR[rsi],xmm2 + mov rcx,r11 + movups XMMWORD PTR[16+rsi],xmm3 + mov eax,r10d + movups XMMWORD PTR[32+rsi],xmm4 + movups XMMWORD PTR[48+rsi],xmm5 + movups XMMWORD PTR[64+rsi],xmm6 + movups XMMWORD PTR[80+rsi],xmm7 + movups XMMWORD PTR[96+rsi],xmm8 + movups XMMWORD PTR[112+rsi],xmm9 + lea rsi,QWORD PTR[128+rsi] + add rdx,080h + jz $L$ecb_ret + +$L$ecb_dec_tail:: + movups xmm2,XMMWORD PTR[rdi] + cmp rdx,020h + jb $L$ecb_dec_one + movups xmm3,XMMWORD PTR[16+rdi] + je $L$ecb_dec_two + movups xmm4,XMMWORD PTR[32+rdi] + cmp rdx,040h + jb $L$ecb_dec_three + movups xmm5,XMMWORD PTR[48+rdi] + je $L$ecb_dec_four + movups xmm6,XMMWORD PTR[64+rdi] + cmp rdx,060h + jb $L$ecb_dec_five + movups xmm7,XMMWORD PTR[80+rdi] + je $L$ecb_dec_six + movups xmm8,XMMWORD PTR[96+rdi] + movups xmm0,XMMWORD PTR[rcx] + call _aesni_decrypt8 + movups XMMWORD PTR[rsi],xmm2 + movups XMMWORD PTR[16+rsi],xmm3 + movups XMMWORD PTR[32+rsi],xmm4 + movups XMMWORD PTR[48+rsi],xmm5 + movups XMMWORD PTR[64+rsi],xmm6 + movups XMMWORD PTR[80+rsi],xmm7 + movups XMMWORD PTR[96+rsi],xmm8 + jmp $L$ecb_ret +ALIGN 16 +$L$ecb_dec_one:: + movups xmm0,XMMWORD PTR[rcx] + movups xmm1,XMMWORD PTR[16+rcx] + lea rcx,QWORD PTR[32+rcx] + xorps xmm2,xmm0 +$L$oop_dec1_4:: +DB 102,15,56,222,209 + dec eax + movups xmm1,XMMWORD PTR[rcx] + lea rcx,QWORD PTR[16+rcx] + jnz $L$oop_dec1_4 +DB 102,15,56,223,209 + movups XMMWORD PTR[rsi],xmm2 + jmp $L$ecb_ret +ALIGN 16 +$L$ecb_dec_two:: + call _aesni_decrypt2 + movups XMMWORD PTR[rsi],xmm2 + movups XMMWORD PTR[16+rsi],xmm3 + jmp $L$ecb_ret +ALIGN 16 +$L$ecb_dec_three:: + call _aesni_decrypt3 + movups XMMWORD PTR[rsi],xmm2 + movups XMMWORD PTR[16+rsi],xmm3 + movups XMMWORD PTR[32+rsi],xmm4 + jmp $L$ecb_ret +ALIGN 16 +$L$ecb_dec_four:: + call _aesni_decrypt4 + movups XMMWORD PTR[rsi],xmm2 + movups XMMWORD PTR[16+rsi],xmm3 + movups XMMWORD PTR[32+rsi],xmm4 + movups XMMWORD PTR[48+rsi],xmm5 + jmp $L$ecb_ret +ALIGN 16 +$L$ecb_dec_five:: + xorps xmm7,xmm7 + call _aesni_decrypt6 + movups XMMWORD PTR[rsi],xmm2 + movups XMMWORD PTR[16+rsi],xmm3 + movups XMMWORD PTR[32+rsi],xmm4 + movups XMMWORD PTR[48+rsi],xmm5 + movups XMMWORD PTR[64+rsi],xmm6 + jmp $L$ecb_ret +ALIGN 16 +$L$ecb_dec_six:: + call _aesni_decrypt6 + movups XMMWORD PTR[rsi],xmm2 + movups XMMWORD PTR[16+rsi],xmm3 + movups XMMWORD PTR[32+rsi],xmm4 + movups XMMWORD PTR[48+rsi],xmm5 + movups XMMWORD PTR[64+rsi],xmm6 + movups XMMWORD PTR[80+rsi],xmm7 + +$L$ecb_ret:: + movaps xmm6,XMMWORD PTR[rsp] + movaps xmm7,XMMWORD PTR[16+rsp] + movaps xmm8,XMMWORD PTR[32+rsp] + movaps xmm9,XMMWORD PTR[48+rsp] + lea rsp,QWORD PTR[88+rsp] +$L$ecb_enc_ret:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_aesni_ecb_encrypt:: +aesni_ecb_encrypt ENDP +PUBLIC aesni_ccm64_encrypt_blocks + +ALIGN 16 +aesni_ccm64_encrypt_blocks PROC PUBLIC + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_aesni_ccm64_encrypt_blocks:: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + mov rcx,r9 + mov r8,QWORD PTR[40+rsp] + mov r9,QWORD PTR[48+rsp] + + + lea rsp,QWORD PTR[((-88))+rsp] + movaps XMMWORD PTR[rsp],xmm6 + movaps XMMWORD PTR[16+rsp],xmm7 + movaps XMMWORD PTR[32+rsp],xmm8 + movaps XMMWORD PTR[48+rsp],xmm9 +$L$ccm64_enc_body:: + mov eax,DWORD PTR[240+rcx] + movdqu xmm6,XMMWORD PTR[r8] + movdqa xmm9,XMMWORD PTR[$L$increment64] + movdqa xmm7,XMMWORD PTR[$L$bswap_mask] + + shl eax,4 + mov r10d,16 + lea r11,QWORD PTR[rcx] + movdqu xmm3,XMMWORD PTR[r9] + movdqa xmm2,xmm6 + lea rcx,QWORD PTR[32+rax*1+rcx] +DB 102,15,56,0,247 + sub r10,rax + jmp $L$ccm64_enc_outer +ALIGN 16 +$L$ccm64_enc_outer:: + movups xmm0,XMMWORD PTR[r11] + mov rax,r10 + movups xmm8,XMMWORD PTR[rdi] + + xorps xmm2,xmm0 + movups xmm1,XMMWORD PTR[16+r11] + xorps xmm0,xmm8 + xorps xmm3,xmm0 + movups xmm0,XMMWORD PTR[32+r11] + +$L$ccm64_enc2_loop:: +DB 102,15,56,220,209 +DB 102,15,56,220,217 + movups xmm1,XMMWORD PTR[rax*1+rcx] + add rax,32 +DB 102,15,56,220,208 +DB 102,15,56,220,216 + movups xmm0,XMMWORD PTR[((-16))+rax*1+rcx] + jnz $L$ccm64_enc2_loop +DB 102,15,56,220,209 +DB 102,15,56,220,217 + paddq xmm6,xmm9 + dec rdx +DB 102,15,56,221,208 +DB 102,15,56,221,216 + + lea rdi,QWORD PTR[16+rdi] + xorps xmm8,xmm2 + movdqa xmm2,xmm6 + movups XMMWORD PTR[rsi],xmm8 +DB 102,15,56,0,215 + lea rsi,QWORD PTR[16+rsi] + jnz $L$ccm64_enc_outer + + movups XMMWORD PTR[r9],xmm3 + movaps xmm6,XMMWORD PTR[rsp] + movaps xmm7,XMMWORD PTR[16+rsp] + movaps xmm8,XMMWORD PTR[32+rsp] + movaps xmm9,XMMWORD PTR[48+rsp] + lea rsp,QWORD PTR[88+rsp] +$L$ccm64_enc_ret:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_aesni_ccm64_encrypt_blocks:: +aesni_ccm64_encrypt_blocks ENDP +PUBLIC aesni_ccm64_decrypt_blocks + +ALIGN 16 +aesni_ccm64_decrypt_blocks PROC PUBLIC + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_aesni_ccm64_decrypt_blocks:: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + mov rcx,r9 + mov r8,QWORD PTR[40+rsp] + mov r9,QWORD PTR[48+rsp] + + + lea rsp,QWORD PTR[((-88))+rsp] + movaps XMMWORD PTR[rsp],xmm6 + movaps XMMWORD PTR[16+rsp],xmm7 + movaps XMMWORD PTR[32+rsp],xmm8 + movaps XMMWORD PTR[48+rsp],xmm9 +$L$ccm64_dec_body:: + mov eax,DWORD PTR[240+rcx] + movups xmm6,XMMWORD PTR[r8] + movdqu xmm3,XMMWORD PTR[r9] + movdqa xmm9,XMMWORD PTR[$L$increment64] + movdqa xmm7,XMMWORD PTR[$L$bswap_mask] + + movaps xmm2,xmm6 + mov r10d,eax + mov r11,rcx +DB 102,15,56,0,247 + movups xmm0,XMMWORD PTR[rcx] + movups xmm1,XMMWORD PTR[16+rcx] + lea rcx,QWORD PTR[32+rcx] + xorps xmm2,xmm0 +$L$oop_enc1_5:: +DB 102,15,56,220,209 + dec eax + movups xmm1,XMMWORD PTR[rcx] + lea rcx,QWORD PTR[16+rcx] + jnz $L$oop_enc1_5 +DB 102,15,56,221,209 + shl r10d,4 + mov eax,16 + movups xmm8,XMMWORD PTR[rdi] + paddq xmm6,xmm9 + lea rdi,QWORD PTR[16+rdi] + sub rax,r10 + lea rcx,QWORD PTR[32+r10*1+r11] + mov r10,rax + jmp $L$ccm64_dec_outer +ALIGN 16 +$L$ccm64_dec_outer:: + xorps xmm8,xmm2 + movdqa xmm2,xmm6 + movups XMMWORD PTR[rsi],xmm8 + lea rsi,QWORD PTR[16+rsi] +DB 102,15,56,0,215 + + sub rdx,1 + jz $L$ccm64_dec_break + + movups xmm0,XMMWORD PTR[r11] + mov rax,r10 + movups xmm1,XMMWORD PTR[16+r11] + xorps xmm8,xmm0 + xorps xmm2,xmm0 + xorps xmm3,xmm8 + movups xmm0,XMMWORD PTR[32+r11] + jmp $L$ccm64_dec2_loop +ALIGN 16 +$L$ccm64_dec2_loop:: +DB 102,15,56,220,209 +DB 102,15,56,220,217 + movups xmm1,XMMWORD PTR[rax*1+rcx] + add rax,32 +DB 102,15,56,220,208 +DB 102,15,56,220,216 + movups xmm0,XMMWORD PTR[((-16))+rax*1+rcx] + jnz $L$ccm64_dec2_loop + movups xmm8,XMMWORD PTR[rdi] + paddq xmm6,xmm9 +DB 102,15,56,220,209 +DB 102,15,56,220,217 +DB 102,15,56,221,208 +DB 102,15,56,221,216 + lea rdi,QWORD PTR[16+rdi] + jmp $L$ccm64_dec_outer + +ALIGN 16 +$L$ccm64_dec_break:: + + mov eax,DWORD PTR[240+r11] + movups xmm0,XMMWORD PTR[r11] + movups xmm1,XMMWORD PTR[16+r11] + xorps xmm8,xmm0 + lea r11,QWORD PTR[32+r11] + xorps xmm3,xmm8 +$L$oop_enc1_6:: +DB 102,15,56,220,217 + dec eax + movups xmm1,XMMWORD PTR[r11] + lea r11,QWORD PTR[16+r11] + jnz $L$oop_enc1_6 +DB 102,15,56,221,217 + movups XMMWORD PTR[r9],xmm3 + movaps xmm6,XMMWORD PTR[rsp] + movaps xmm7,XMMWORD PTR[16+rsp] + movaps xmm8,XMMWORD PTR[32+rsp] + movaps xmm9,XMMWORD PTR[48+rsp] + lea rsp,QWORD PTR[88+rsp] +$L$ccm64_dec_ret:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_aesni_ccm64_decrypt_blocks:: +aesni_ccm64_decrypt_blocks ENDP +PUBLIC aesni_ctr32_encrypt_blocks + +ALIGN 16 +aesni_ctr32_encrypt_blocks PROC PUBLIC + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_aesni_ctr32_encrypt_blocks:: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + mov rcx,r9 + mov r8,QWORD PTR[40+rsp] + + + lea rax,QWORD PTR[rsp] + push rbp + sub rsp,288 + and rsp,-16 + movaps XMMWORD PTR[(-168)+rax],xmm6 + movaps XMMWORD PTR[(-152)+rax],xmm7 + movaps XMMWORD PTR[(-136)+rax],xmm8 + movaps XMMWORD PTR[(-120)+rax],xmm9 + movaps XMMWORD PTR[(-104)+rax],xmm10 + movaps XMMWORD PTR[(-88)+rax],xmm11 + movaps XMMWORD PTR[(-72)+rax],xmm12 + movaps XMMWORD PTR[(-56)+rax],xmm13 + movaps XMMWORD PTR[(-40)+rax],xmm14 + movaps XMMWORD PTR[(-24)+rax],xmm15 +$L$ctr32_body:: + lea rbp,QWORD PTR[((-8))+rax] + + cmp rdx,1 + je $L$ctr32_one_shortcut + + movdqu xmm2,XMMWORD PTR[r8] + movdqu xmm0,XMMWORD PTR[rcx] + mov r8d,DWORD PTR[12+r8] + pxor xmm2,xmm0 + mov r11d,DWORD PTR[12+rcx] + movdqa XMMWORD PTR[rsp],xmm2 + bswap r8d + movdqa xmm3,xmm2 + movdqa xmm4,xmm2 + movdqa xmm5,xmm2 + movdqa XMMWORD PTR[64+rsp],xmm2 + movdqa XMMWORD PTR[80+rsp],xmm2 + movdqa XMMWORD PTR[96+rsp],xmm2 + mov r10,rdx + movdqa XMMWORD PTR[112+rsp],xmm2 + + lea rax,QWORD PTR[1+r8] + lea rdx,QWORD PTR[2+r8] + bswap eax + bswap edx + xor eax,r11d + xor edx,r11d +DB 102,15,58,34,216,3 + lea rax,QWORD PTR[3+r8] + movdqa XMMWORD PTR[16+rsp],xmm3 +DB 102,15,58,34,226,3 + bswap eax + mov rdx,r10 + lea r10,QWORD PTR[4+r8] + movdqa XMMWORD PTR[32+rsp],xmm4 + xor eax,r11d + bswap r10d +DB 102,15,58,34,232,3 + xor r10d,r11d + movdqa XMMWORD PTR[48+rsp],xmm5 + lea r9,QWORD PTR[5+r8] + mov DWORD PTR[((64+12))+rsp],r10d + bswap r9d + lea r10,QWORD PTR[6+r8] + mov eax,DWORD PTR[240+rcx] + xor r9d,r11d + bswap r10d + mov DWORD PTR[((80+12))+rsp],r9d + xor r10d,r11d + lea r9,QWORD PTR[7+r8] + mov DWORD PTR[((96+12))+rsp],r10d + bswap r9d + mov r10d,DWORD PTR[((OPENSSL_ia32cap_P+4))] + xor r9d,r11d + and r10d,71303168 + mov DWORD PTR[((112+12))+rsp],r9d + + movups xmm1,XMMWORD PTR[16+rcx] + + movdqa xmm6,XMMWORD PTR[64+rsp] + movdqa xmm7,XMMWORD PTR[80+rsp] + + cmp rdx,8 + jb $L$ctr32_tail + + sub rdx,6 + cmp r10d,4194304 + je $L$ctr32_6x + + lea rcx,QWORD PTR[128+rcx] + sub rdx,2 + jmp $L$ctr32_loop8 + +ALIGN 16 +$L$ctr32_6x:: + shl eax,4 + mov r10d,48 + bswap r11d + lea rcx,QWORD PTR[32+rax*1+rcx] + sub r10,rax + jmp $L$ctr32_loop6 + +ALIGN 16 +$L$ctr32_loop6:: + add r8d,6 + movups xmm0,XMMWORD PTR[((-48))+r10*1+rcx] +DB 102,15,56,220,209 + mov eax,r8d + xor eax,r11d +DB 102,15,56,220,217 +DB 00fh,038h,0f1h,044h,024h,12 + lea eax,DWORD PTR[1+r8] +DB 102,15,56,220,225 + xor eax,r11d +DB 00fh,038h,0f1h,044h,024h,28 +DB 102,15,56,220,233 + lea eax,DWORD PTR[2+r8] + xor eax,r11d +DB 102,15,56,220,241 +DB 00fh,038h,0f1h,044h,024h,44 + lea eax,DWORD PTR[3+r8] +DB 102,15,56,220,249 + movups xmm1,XMMWORD PTR[((-32))+r10*1+rcx] + xor eax,r11d + +DB 102,15,56,220,208 +DB 00fh,038h,0f1h,044h,024h,60 + lea eax,DWORD PTR[4+r8] +DB 102,15,56,220,216 + xor eax,r11d +DB 00fh,038h,0f1h,044h,024h,76 +DB 102,15,56,220,224 + lea eax,DWORD PTR[5+r8] + xor eax,r11d +DB 102,15,56,220,232 +DB 00fh,038h,0f1h,044h,024h,92 + mov rax,r10 +DB 102,15,56,220,240 +DB 102,15,56,220,248 + movups xmm0,XMMWORD PTR[((-16))+r10*1+rcx] + + call $L$enc_loop6 + + movdqu xmm8,XMMWORD PTR[rdi] + movdqu xmm9,XMMWORD PTR[16+rdi] + movdqu xmm10,XMMWORD PTR[32+rdi] + movdqu xmm11,XMMWORD PTR[48+rdi] + movdqu xmm12,XMMWORD PTR[64+rdi] + movdqu xmm13,XMMWORD PTR[80+rdi] + lea rdi,QWORD PTR[96+rdi] + movups xmm1,XMMWORD PTR[((-64))+r10*1+rcx] + pxor xmm8,xmm2 + movaps xmm2,XMMWORD PTR[rsp] + pxor xmm9,xmm3 + movaps xmm3,XMMWORD PTR[16+rsp] + pxor xmm10,xmm4 + movaps xmm4,XMMWORD PTR[32+rsp] + pxor xmm11,xmm5 + movaps xmm5,XMMWORD PTR[48+rsp] + pxor xmm12,xmm6 + movaps xmm6,XMMWORD PTR[64+rsp] + pxor xmm13,xmm7 + movaps xmm7,XMMWORD PTR[80+rsp] + movdqu XMMWORD PTR[rsi],xmm8 + movdqu XMMWORD PTR[16+rsi],xmm9 + movdqu XMMWORD PTR[32+rsi],xmm10 + movdqu XMMWORD PTR[48+rsi],xmm11 + movdqu XMMWORD PTR[64+rsi],xmm12 + movdqu XMMWORD PTR[80+rsi],xmm13 + lea rsi,QWORD PTR[96+rsi] + + sub rdx,6 + jnc $L$ctr32_loop6 + + add rdx,6 + jz $L$ctr32_done + + lea eax,DWORD PTR[((-48))+r10] + lea rcx,QWORD PTR[((-80))+r10*1+rcx] + neg eax + shr eax,4 + jmp $L$ctr32_tail + +ALIGN 32 +$L$ctr32_loop8:: + add r8d,8 + movdqa xmm8,XMMWORD PTR[96+rsp] +DB 102,15,56,220,209 + mov r9d,r8d + movdqa xmm9,XMMWORD PTR[112+rsp] +DB 102,15,56,220,217 + bswap r9d + movups xmm0,XMMWORD PTR[((32-128))+rcx] +DB 102,15,56,220,225 + xor r9d,r11d + nop +DB 102,15,56,220,233 + mov DWORD PTR[((0+12))+rsp],r9d + lea r9,QWORD PTR[1+r8] +DB 102,15,56,220,241 +DB 102,15,56,220,249 +DB 102,68,15,56,220,193 +DB 102,68,15,56,220,201 + movups xmm1,XMMWORD PTR[((48-128))+rcx] + bswap r9d +DB 102,15,56,220,208 +DB 102,15,56,220,216 + xor r9d,r11d +DB 066h,090h +DB 102,15,56,220,224 +DB 102,15,56,220,232 + mov DWORD PTR[((16+12))+rsp],r9d + lea r9,QWORD PTR[2+r8] +DB 102,15,56,220,240 +DB 102,15,56,220,248 +DB 102,68,15,56,220,192 +DB 102,68,15,56,220,200 + movups xmm0,XMMWORD PTR[((64-128))+rcx] + bswap r9d +DB 102,15,56,220,209 +DB 102,15,56,220,217 + xor r9d,r11d +DB 066h,090h +DB 102,15,56,220,225 +DB 102,15,56,220,233 + mov DWORD PTR[((32+12))+rsp],r9d + lea r9,QWORD PTR[3+r8] +DB 102,15,56,220,241 +DB 102,15,56,220,249 +DB 102,68,15,56,220,193 +DB 102,68,15,56,220,201 + movups xmm1,XMMWORD PTR[((80-128))+rcx] + bswap r9d +DB 102,15,56,220,208 +DB 102,15,56,220,216 + xor r9d,r11d +DB 066h,090h +DB 102,15,56,220,224 +DB 102,15,56,220,232 + mov DWORD PTR[((48+12))+rsp],r9d + lea r9,QWORD PTR[4+r8] +DB 102,15,56,220,240 +DB 102,15,56,220,248 +DB 102,68,15,56,220,192 +DB 102,68,15,56,220,200 + movups xmm0,XMMWORD PTR[((96-128))+rcx] + bswap r9d +DB 102,15,56,220,209 +DB 102,15,56,220,217 + xor r9d,r11d +DB 066h,090h +DB 102,15,56,220,225 +DB 102,15,56,220,233 + mov DWORD PTR[((64+12))+rsp],r9d + lea r9,QWORD PTR[5+r8] +DB 102,15,56,220,241 +DB 102,15,56,220,249 +DB 102,68,15,56,220,193 +DB 102,68,15,56,220,201 + movups xmm1,XMMWORD PTR[((112-128))+rcx] + bswap r9d +DB 102,15,56,220,208 +DB 102,15,56,220,216 + xor r9d,r11d +DB 066h,090h +DB 102,15,56,220,224 +DB 102,15,56,220,232 + mov DWORD PTR[((80+12))+rsp],r9d + lea r9,QWORD PTR[6+r8] +DB 102,15,56,220,240 +DB 102,15,56,220,248 +DB 102,68,15,56,220,192 +DB 102,68,15,56,220,200 + movups xmm0,XMMWORD PTR[((128-128))+rcx] + bswap r9d +DB 102,15,56,220,209 +DB 102,15,56,220,217 + xor r9d,r11d +DB 066h,090h +DB 102,15,56,220,225 +DB 102,15,56,220,233 + mov DWORD PTR[((96+12))+rsp],r9d + lea r9,QWORD PTR[7+r8] +DB 102,15,56,220,241 +DB 102,15,56,220,249 +DB 102,68,15,56,220,193 +DB 102,68,15,56,220,201 + movups xmm1,XMMWORD PTR[((144-128))+rcx] + bswap r9d +DB 102,15,56,220,208 +DB 102,15,56,220,216 +DB 102,15,56,220,224 + xor r9d,r11d + movdqu xmm10,XMMWORD PTR[rdi] +DB 102,15,56,220,232 + mov DWORD PTR[((112+12))+rsp],r9d + cmp eax,11 +DB 102,15,56,220,240 +DB 102,15,56,220,248 +DB 102,68,15,56,220,192 +DB 102,68,15,56,220,200 + movups xmm0,XMMWORD PTR[((160-128))+rcx] + + jb $L$ctr32_enc_done + +DB 102,15,56,220,209 +DB 102,15,56,220,217 +DB 102,15,56,220,225 +DB 102,15,56,220,233 +DB 102,15,56,220,241 +DB 102,15,56,220,249 +DB 102,68,15,56,220,193 +DB 102,68,15,56,220,201 + movups xmm1,XMMWORD PTR[((176-128))+rcx] + +DB 102,15,56,220,208 +DB 102,15,56,220,216 +DB 102,15,56,220,224 +DB 102,15,56,220,232 +DB 102,15,56,220,240 +DB 102,15,56,220,248 +DB 102,68,15,56,220,192 +DB 102,68,15,56,220,200 + movups xmm0,XMMWORD PTR[((192-128))+rcx] + je $L$ctr32_enc_done + +DB 102,15,56,220,209 +DB 102,15,56,220,217 +DB 102,15,56,220,225 +DB 102,15,56,220,233 +DB 102,15,56,220,241 +DB 102,15,56,220,249 +DB 102,68,15,56,220,193 +DB 102,68,15,56,220,201 + movups xmm1,XMMWORD PTR[((208-128))+rcx] + +DB 102,15,56,220,208 +DB 102,15,56,220,216 +DB 102,15,56,220,224 +DB 102,15,56,220,232 +DB 102,15,56,220,240 +DB 102,15,56,220,248 +DB 102,68,15,56,220,192 +DB 102,68,15,56,220,200 + movups xmm0,XMMWORD PTR[((224-128))+rcx] + jmp $L$ctr32_enc_done + +ALIGN 16 +$L$ctr32_enc_done:: + movdqu xmm11,XMMWORD PTR[16+rdi] + pxor xmm10,xmm0 + movdqu xmm12,XMMWORD PTR[32+rdi] + pxor xmm11,xmm0 + movdqu xmm13,XMMWORD PTR[48+rdi] + pxor xmm12,xmm0 + movdqu xmm14,XMMWORD PTR[64+rdi] + pxor xmm13,xmm0 + movdqu xmm15,XMMWORD PTR[80+rdi] + pxor xmm14,xmm0 + pxor xmm15,xmm0 +DB 102,15,56,220,209 +DB 102,15,56,220,217 +DB 102,15,56,220,225 +DB 102,15,56,220,233 +DB 102,15,56,220,241 +DB 102,15,56,220,249 +DB 102,68,15,56,220,193 +DB 102,68,15,56,220,201 + movdqu xmm1,XMMWORD PTR[96+rdi] + lea rdi,QWORD PTR[128+rdi] + +DB 102,65,15,56,221,210 + pxor xmm1,xmm0 + movdqu xmm10,XMMWORD PTR[((112-128))+rdi] +DB 102,65,15,56,221,219 + pxor xmm10,xmm0 + movdqa xmm11,XMMWORD PTR[rsp] +DB 102,65,15,56,221,228 +DB 102,65,15,56,221,237 + movdqa xmm12,XMMWORD PTR[16+rsp] + movdqa xmm13,XMMWORD PTR[32+rsp] +DB 102,65,15,56,221,246 +DB 102,65,15,56,221,255 + movdqa xmm14,XMMWORD PTR[48+rsp] + movdqa xmm15,XMMWORD PTR[64+rsp] +DB 102,68,15,56,221,193 + movdqa xmm0,XMMWORD PTR[80+rsp] + movups xmm1,XMMWORD PTR[((16-128))+rcx] +DB 102,69,15,56,221,202 + + movups XMMWORD PTR[rsi],xmm2 + movdqa xmm2,xmm11 + movups XMMWORD PTR[16+rsi],xmm3 + movdqa xmm3,xmm12 + movups XMMWORD PTR[32+rsi],xmm4 + movdqa xmm4,xmm13 + movups XMMWORD PTR[48+rsi],xmm5 + movdqa xmm5,xmm14 + movups XMMWORD PTR[64+rsi],xmm6 + movdqa xmm6,xmm15 + movups XMMWORD PTR[80+rsi],xmm7 + movdqa xmm7,xmm0 + movups XMMWORD PTR[96+rsi],xmm8 + movups XMMWORD PTR[112+rsi],xmm9 + lea rsi,QWORD PTR[128+rsi] + + sub rdx,8 + jnc $L$ctr32_loop8 + + add rdx,8 + jz $L$ctr32_done + lea rcx,QWORD PTR[((-128))+rcx] + +$L$ctr32_tail:: + lea rcx,QWORD PTR[16+rcx] + cmp rdx,4 + jb $L$ctr32_loop3 + je $L$ctr32_loop4 + + shl eax,4 + movdqa xmm8,XMMWORD PTR[96+rsp] + pxor xmm9,xmm9 + + movups xmm0,XMMWORD PTR[16+rcx] +DB 102,15,56,220,209 +DB 102,15,56,220,217 + lea rcx,QWORD PTR[((32-16))+rax*1+rcx] + neg rax +DB 102,15,56,220,225 + add rax,16 + movups xmm10,XMMWORD PTR[rdi] +DB 102,15,56,220,233 +DB 102,15,56,220,241 + movups xmm11,XMMWORD PTR[16+rdi] + movups xmm12,XMMWORD PTR[32+rdi] +DB 102,15,56,220,249 +DB 102,68,15,56,220,193 + + call $L$enc_loop8_enter + + movdqu xmm13,XMMWORD PTR[48+rdi] + pxor xmm2,xmm10 + movdqu xmm10,XMMWORD PTR[64+rdi] + pxor xmm3,xmm11 + movdqu XMMWORD PTR[rsi],xmm2 + pxor xmm4,xmm12 + movdqu XMMWORD PTR[16+rsi],xmm3 + pxor xmm5,xmm13 + movdqu XMMWORD PTR[32+rsi],xmm4 + pxor xmm6,xmm10 + movdqu XMMWORD PTR[48+rsi],xmm5 + movdqu XMMWORD PTR[64+rsi],xmm6 + cmp rdx,6 + jb $L$ctr32_done + + movups xmm11,XMMWORD PTR[80+rdi] + xorps xmm7,xmm11 + movups XMMWORD PTR[80+rsi],xmm7 + je $L$ctr32_done + + movups xmm12,XMMWORD PTR[96+rdi] + xorps xmm8,xmm12 + movups XMMWORD PTR[96+rsi],xmm8 + jmp $L$ctr32_done + +ALIGN 32 +$L$ctr32_loop4:: +DB 102,15,56,220,209 + lea rcx,QWORD PTR[16+rcx] + dec eax +DB 102,15,56,220,217 +DB 102,15,56,220,225 +DB 102,15,56,220,233 + movups xmm1,XMMWORD PTR[rcx] + jnz $L$ctr32_loop4 +DB 102,15,56,221,209 +DB 102,15,56,221,217 + movups xmm10,XMMWORD PTR[rdi] + movups xmm11,XMMWORD PTR[16+rdi] +DB 102,15,56,221,225 +DB 102,15,56,221,233 + movups xmm12,XMMWORD PTR[32+rdi] + movups xmm13,XMMWORD PTR[48+rdi] + + xorps xmm2,xmm10 + movups XMMWORD PTR[rsi],xmm2 + xorps xmm3,xmm11 + movups XMMWORD PTR[16+rsi],xmm3 + pxor xmm4,xmm12 + movdqu XMMWORD PTR[32+rsi],xmm4 + pxor xmm5,xmm13 + movdqu XMMWORD PTR[48+rsi],xmm5 + jmp $L$ctr32_done + +ALIGN 32 +$L$ctr32_loop3:: +DB 102,15,56,220,209 + lea rcx,QWORD PTR[16+rcx] + dec eax +DB 102,15,56,220,217 +DB 102,15,56,220,225 + movups xmm1,XMMWORD PTR[rcx] + jnz $L$ctr32_loop3 +DB 102,15,56,221,209 +DB 102,15,56,221,217 +DB 102,15,56,221,225 + + movups xmm10,XMMWORD PTR[rdi] + xorps xmm2,xmm10 + movups XMMWORD PTR[rsi],xmm2 + cmp rdx,2 + jb $L$ctr32_done + + movups xmm11,XMMWORD PTR[16+rdi] + xorps xmm3,xmm11 + movups XMMWORD PTR[16+rsi],xmm3 + je $L$ctr32_done + + movups xmm12,XMMWORD PTR[32+rdi] + xorps xmm4,xmm12 + movups XMMWORD PTR[32+rsi],xmm4 + jmp $L$ctr32_done + +ALIGN 16 +$L$ctr32_one_shortcut:: + movups xmm2,XMMWORD PTR[r8] + movups xmm10,XMMWORD PTR[rdi] + mov eax,DWORD PTR[240+rcx] + movups xmm0,XMMWORD PTR[rcx] + movups xmm1,XMMWORD PTR[16+rcx] + lea rcx,QWORD PTR[32+rcx] + xorps xmm2,xmm0 +$L$oop_enc1_7:: +DB 102,15,56,220,209 + dec eax + movups xmm1,XMMWORD PTR[rcx] + lea rcx,QWORD PTR[16+rcx] + jnz $L$oop_enc1_7 +DB 102,15,56,221,209 + xorps xmm2,xmm10 + movups XMMWORD PTR[rsi],xmm2 + jmp $L$ctr32_done + +ALIGN 16 +$L$ctr32_done:: + movaps xmm6,XMMWORD PTR[((-160))+rbp] + movaps xmm7,XMMWORD PTR[((-144))+rbp] + movaps xmm8,XMMWORD PTR[((-128))+rbp] + movaps xmm9,XMMWORD PTR[((-112))+rbp] + movaps xmm10,XMMWORD PTR[((-96))+rbp] + movaps xmm11,XMMWORD PTR[((-80))+rbp] + movaps xmm12,XMMWORD PTR[((-64))+rbp] + movaps xmm13,XMMWORD PTR[((-48))+rbp] + movaps xmm14,XMMWORD PTR[((-32))+rbp] + movaps xmm15,XMMWORD PTR[((-16))+rbp] + lea rsp,QWORD PTR[rbp] + pop rbp +$L$ctr32_epilogue:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_aesni_ctr32_encrypt_blocks:: +aesni_ctr32_encrypt_blocks ENDP +PUBLIC aesni_xts_encrypt + +ALIGN 16 +aesni_xts_encrypt PROC PUBLIC + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_aesni_xts_encrypt:: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + mov rcx,r9 + mov r8,QWORD PTR[40+rsp] + mov r9,QWORD PTR[48+rsp] + + + lea rax,QWORD PTR[rsp] + push rbp + sub rsp,272 + and rsp,-16 + movaps XMMWORD PTR[(-168)+rax],xmm6 + movaps XMMWORD PTR[(-152)+rax],xmm7 + movaps XMMWORD PTR[(-136)+rax],xmm8 + movaps XMMWORD PTR[(-120)+rax],xmm9 + movaps XMMWORD PTR[(-104)+rax],xmm10 + movaps XMMWORD PTR[(-88)+rax],xmm11 + movaps XMMWORD PTR[(-72)+rax],xmm12 + movaps XMMWORD PTR[(-56)+rax],xmm13 + movaps XMMWORD PTR[(-40)+rax],xmm14 + movaps XMMWORD PTR[(-24)+rax],xmm15 +$L$xts_enc_body:: + lea rbp,QWORD PTR[((-8))+rax] + movups xmm2,XMMWORD PTR[r9] + mov eax,DWORD PTR[240+r8] + mov r10d,DWORD PTR[240+rcx] + movups xmm0,XMMWORD PTR[r8] + movups xmm1,XMMWORD PTR[16+r8] + lea r8,QWORD PTR[32+r8] + xorps xmm2,xmm0 +$L$oop_enc1_8:: +DB 102,15,56,220,209 + dec eax + movups xmm1,XMMWORD PTR[r8] + lea r8,QWORD PTR[16+r8] + jnz $L$oop_enc1_8 +DB 102,15,56,221,209 + movups xmm0,XMMWORD PTR[rcx] + mov r11,rcx + mov eax,r10d + shl r10d,4 + mov r9,rdx + and rdx,-16 + + movups xmm1,XMMWORD PTR[16+r10*1+rcx] + + movdqa xmm8,XMMWORD PTR[$L$xts_magic] + movdqa xmm15,xmm2 + pshufd xmm9,xmm2,05fh + pxor xmm1,xmm0 + movdqa xmm14,xmm9 + paddd xmm9,xmm9 + movdqa xmm10,xmm15 + psrad xmm14,31 + paddq xmm15,xmm15 + pand xmm14,xmm8 + pxor xmm10,xmm0 + pxor xmm15,xmm14 + movdqa xmm14,xmm9 + paddd xmm9,xmm9 + movdqa xmm11,xmm15 + psrad xmm14,31 + paddq xmm15,xmm15 + pand xmm14,xmm8 + pxor xmm11,xmm0 + pxor xmm15,xmm14 + movdqa xmm14,xmm9 + paddd xmm9,xmm9 + movdqa xmm12,xmm15 + psrad xmm14,31 + paddq xmm15,xmm15 + pand xmm14,xmm8 + pxor xmm12,xmm0 + pxor xmm15,xmm14 + movdqa xmm14,xmm9 + paddd xmm9,xmm9 + movdqa xmm13,xmm15 + psrad xmm14,31 + paddq xmm15,xmm15 + pand xmm14,xmm8 + pxor xmm13,xmm0 + pxor xmm15,xmm14 + movdqa xmm14,xmm15 + psrad xmm9,31 + paddq xmm15,xmm15 + pand xmm9,xmm8 + pxor xmm14,xmm0 + pxor xmm15,xmm9 + movaps XMMWORD PTR[96+rsp],xmm1 + + sub rdx,16*6 + jc $L$xts_enc_short + + mov eax,16+96 + lea rcx,QWORD PTR[32+r10*1+r11] + sub rax,r10 + movups xmm1,XMMWORD PTR[16+r11] + mov r10,rax + lea r8,QWORD PTR[$L$xts_magic] + jmp $L$xts_enc_grandloop + +ALIGN 32 +$L$xts_enc_grandloop:: + movdqu xmm2,XMMWORD PTR[rdi] + movdqa xmm8,xmm0 + movdqu xmm3,XMMWORD PTR[16+rdi] + pxor xmm2,xmm10 + movdqu xmm4,XMMWORD PTR[32+rdi] + pxor xmm3,xmm11 +DB 102,15,56,220,209 + movdqu xmm5,XMMWORD PTR[48+rdi] + pxor xmm4,xmm12 +DB 102,15,56,220,217 + movdqu xmm6,XMMWORD PTR[64+rdi] + pxor xmm5,xmm13 +DB 102,15,56,220,225 + movdqu xmm7,XMMWORD PTR[80+rdi] + pxor xmm8,xmm15 + movdqa xmm9,XMMWORD PTR[96+rsp] + pxor xmm6,xmm14 +DB 102,15,56,220,233 + movups xmm0,XMMWORD PTR[32+r11] + lea rdi,QWORD PTR[96+rdi] + pxor xmm7,xmm8 + + pxor xmm10,xmm9 +DB 102,15,56,220,241 + pxor xmm11,xmm9 + movdqa XMMWORD PTR[rsp],xmm10 +DB 102,15,56,220,249 + movups xmm1,XMMWORD PTR[48+r11] + pxor xmm12,xmm9 + +DB 102,15,56,220,208 + pxor xmm13,xmm9 + movdqa XMMWORD PTR[16+rsp],xmm11 +DB 102,15,56,220,216 + pxor xmm14,xmm9 + movdqa XMMWORD PTR[32+rsp],xmm12 +DB 102,15,56,220,224 +DB 102,15,56,220,232 + pxor xmm8,xmm9 + movdqa XMMWORD PTR[64+rsp],xmm14 +DB 102,15,56,220,240 +DB 102,15,56,220,248 + movups xmm0,XMMWORD PTR[64+r11] + movdqa XMMWORD PTR[80+rsp],xmm8 + pshufd xmm9,xmm15,05fh + jmp $L$xts_enc_loop6 +ALIGN 32 +$L$xts_enc_loop6:: +DB 102,15,56,220,209 +DB 102,15,56,220,217 +DB 102,15,56,220,225 +DB 102,15,56,220,233 +DB 102,15,56,220,241 +DB 102,15,56,220,249 + movups xmm1,XMMWORD PTR[((-64))+rax*1+rcx] + add rax,32 + +DB 102,15,56,220,208 +DB 102,15,56,220,216 +DB 102,15,56,220,224 +DB 102,15,56,220,232 +DB 102,15,56,220,240 +DB 102,15,56,220,248 + movups xmm0,XMMWORD PTR[((-80))+rax*1+rcx] + jnz $L$xts_enc_loop6 + + movdqa xmm8,XMMWORD PTR[r8] + movdqa xmm14,xmm9 + paddd xmm9,xmm9 +DB 102,15,56,220,209 + paddq xmm15,xmm15 + psrad xmm14,31 +DB 102,15,56,220,217 + pand xmm14,xmm8 + movups xmm10,XMMWORD PTR[r11] +DB 102,15,56,220,225 +DB 102,15,56,220,233 +DB 102,15,56,220,241 + pxor xmm15,xmm14 + movaps xmm11,xmm10 +DB 102,15,56,220,249 + movups xmm1,XMMWORD PTR[((-64))+rcx] + + movdqa xmm14,xmm9 +DB 102,15,56,220,208 + paddd xmm9,xmm9 + pxor xmm10,xmm15 +DB 102,15,56,220,216 + psrad xmm14,31 + paddq xmm15,xmm15 +DB 102,15,56,220,224 +DB 102,15,56,220,232 + pand xmm14,xmm8 + movaps xmm12,xmm11 +DB 102,15,56,220,240 + pxor xmm15,xmm14 + movdqa xmm14,xmm9 +DB 102,15,56,220,248 + movups xmm0,XMMWORD PTR[((-48))+rcx] + + paddd xmm9,xmm9 +DB 102,15,56,220,209 + pxor xmm11,xmm15 + psrad xmm14,31 +DB 102,15,56,220,217 + paddq xmm15,xmm15 + pand xmm14,xmm8 +DB 102,15,56,220,225 +DB 102,15,56,220,233 + movdqa XMMWORD PTR[48+rsp],xmm13 + pxor xmm15,xmm14 +DB 102,15,56,220,241 + movaps xmm13,xmm12 + movdqa xmm14,xmm9 +DB 102,15,56,220,249 + movups xmm1,XMMWORD PTR[((-32))+rcx] + + paddd xmm9,xmm9 +DB 102,15,56,220,208 + pxor xmm12,xmm15 + psrad xmm14,31 +DB 102,15,56,220,216 + paddq xmm15,xmm15 + pand xmm14,xmm8 +DB 102,15,56,220,224 +DB 102,15,56,220,232 +DB 102,15,56,220,240 + pxor xmm15,xmm14 + movaps xmm14,xmm13 +DB 102,15,56,220,248 + + movdqa xmm0,xmm9 + paddd xmm9,xmm9 +DB 102,15,56,220,209 + pxor xmm13,xmm15 + psrad xmm0,31 +DB 102,15,56,220,217 + paddq xmm15,xmm15 + pand xmm0,xmm8 +DB 102,15,56,220,225 +DB 102,15,56,220,233 + pxor xmm15,xmm0 + movups xmm0,XMMWORD PTR[r11] +DB 102,15,56,220,241 +DB 102,15,56,220,249 + movups xmm1,XMMWORD PTR[16+r11] + + pxor xmm14,xmm15 +DB 102,15,56,221,84,36,0 + psrad xmm9,31 + paddq xmm15,xmm15 +DB 102,15,56,221,92,36,16 +DB 102,15,56,221,100,36,32 + pand xmm9,xmm8 + mov rax,r10 +DB 102,15,56,221,108,36,48 +DB 102,15,56,221,116,36,64 +DB 102,15,56,221,124,36,80 + pxor xmm15,xmm9 + + lea rsi,QWORD PTR[96+rsi] + movups XMMWORD PTR[(-96)+rsi],xmm2 + movups XMMWORD PTR[(-80)+rsi],xmm3 + movups XMMWORD PTR[(-64)+rsi],xmm4 + movups XMMWORD PTR[(-48)+rsi],xmm5 + movups XMMWORD PTR[(-32)+rsi],xmm6 + movups XMMWORD PTR[(-16)+rsi],xmm7 + sub rdx,16*6 + jnc $L$xts_enc_grandloop + + mov eax,16+96 + sub eax,r10d + mov rcx,r11 + shr eax,4 + +$L$xts_enc_short:: + mov r10d,eax + pxor xmm10,xmm0 + add rdx,16*6 + jz $L$xts_enc_done + + pxor xmm11,xmm0 + cmp rdx,020h + jb $L$xts_enc_one + pxor xmm12,xmm0 + je $L$xts_enc_two + + pxor xmm13,xmm0 + cmp rdx,040h + jb $L$xts_enc_three + pxor xmm14,xmm0 + je $L$xts_enc_four + + movdqu xmm2,XMMWORD PTR[rdi] + movdqu xmm3,XMMWORD PTR[16+rdi] + movdqu xmm4,XMMWORD PTR[32+rdi] + pxor xmm2,xmm10 + movdqu xmm5,XMMWORD PTR[48+rdi] + pxor xmm3,xmm11 + movdqu xmm6,XMMWORD PTR[64+rdi] + lea rdi,QWORD PTR[80+rdi] + pxor xmm4,xmm12 + pxor xmm5,xmm13 + pxor xmm6,xmm14 + + call _aesni_encrypt6 + + xorps xmm2,xmm10 + movdqa xmm10,xmm15 + xorps xmm3,xmm11 + xorps xmm4,xmm12 + movdqu XMMWORD PTR[rsi],xmm2 + xorps xmm5,xmm13 + movdqu XMMWORD PTR[16+rsi],xmm3 + xorps xmm6,xmm14 + movdqu XMMWORD PTR[32+rsi],xmm4 + movdqu XMMWORD PTR[48+rsi],xmm5 + movdqu XMMWORD PTR[64+rsi],xmm6 + lea rsi,QWORD PTR[80+rsi] + jmp $L$xts_enc_done + +ALIGN 16 +$L$xts_enc_one:: + movups xmm2,XMMWORD PTR[rdi] + lea rdi,QWORD PTR[16+rdi] + xorps xmm2,xmm10 + movups xmm0,XMMWORD PTR[rcx] + movups xmm1,XMMWORD PTR[16+rcx] + lea rcx,QWORD PTR[32+rcx] + xorps xmm2,xmm0 +$L$oop_enc1_9:: +DB 102,15,56,220,209 + dec eax + movups xmm1,XMMWORD PTR[rcx] + lea rcx,QWORD PTR[16+rcx] + jnz $L$oop_enc1_9 +DB 102,15,56,221,209 + xorps xmm2,xmm10 + movdqa xmm10,xmm11 + movups XMMWORD PTR[rsi],xmm2 + lea rsi,QWORD PTR[16+rsi] + jmp $L$xts_enc_done + +ALIGN 16 +$L$xts_enc_two:: + movups xmm2,XMMWORD PTR[rdi] + movups xmm3,XMMWORD PTR[16+rdi] + lea rdi,QWORD PTR[32+rdi] + xorps xmm2,xmm10 + xorps xmm3,xmm11 + + call _aesni_encrypt2 + + xorps xmm2,xmm10 + movdqa xmm10,xmm12 + xorps xmm3,xmm11 + movups XMMWORD PTR[rsi],xmm2 + movups XMMWORD PTR[16+rsi],xmm3 + lea rsi,QWORD PTR[32+rsi] + jmp $L$xts_enc_done + +ALIGN 16 +$L$xts_enc_three:: + movups xmm2,XMMWORD PTR[rdi] + movups xmm3,XMMWORD PTR[16+rdi] + movups xmm4,XMMWORD PTR[32+rdi] + lea rdi,QWORD PTR[48+rdi] + xorps xmm2,xmm10 + xorps xmm3,xmm11 + xorps xmm4,xmm12 + + call _aesni_encrypt3 + + xorps xmm2,xmm10 + movdqa xmm10,xmm13 + xorps xmm3,xmm11 + xorps xmm4,xmm12 + movups XMMWORD PTR[rsi],xmm2 + movups XMMWORD PTR[16+rsi],xmm3 + movups XMMWORD PTR[32+rsi],xmm4 + lea rsi,QWORD PTR[48+rsi] + jmp $L$xts_enc_done + +ALIGN 16 +$L$xts_enc_four:: + movups xmm2,XMMWORD PTR[rdi] + movups xmm3,XMMWORD PTR[16+rdi] + movups xmm4,XMMWORD PTR[32+rdi] + xorps xmm2,xmm10 + movups xmm5,XMMWORD PTR[48+rdi] + lea rdi,QWORD PTR[64+rdi] + xorps xmm3,xmm11 + xorps xmm4,xmm12 + xorps xmm5,xmm13 + + call _aesni_encrypt4 + + pxor xmm2,xmm10 + movdqa xmm10,xmm14 + pxor xmm3,xmm11 + pxor xmm4,xmm12 + movdqu XMMWORD PTR[rsi],xmm2 + pxor xmm5,xmm13 + movdqu XMMWORD PTR[16+rsi],xmm3 + movdqu XMMWORD PTR[32+rsi],xmm4 + movdqu XMMWORD PTR[48+rsi],xmm5 + lea rsi,QWORD PTR[64+rsi] + jmp $L$xts_enc_done + +ALIGN 16 +$L$xts_enc_done:: + and r9,15 + jz $L$xts_enc_ret + mov rdx,r9 + +$L$xts_enc_steal:: + movzx eax,BYTE PTR[rdi] + movzx ecx,BYTE PTR[((-16))+rsi] + lea rdi,QWORD PTR[1+rdi] + mov BYTE PTR[((-16))+rsi],al + mov BYTE PTR[rsi],cl + lea rsi,QWORD PTR[1+rsi] + sub rdx,1 + jnz $L$xts_enc_steal + + sub rsi,r9 + mov rcx,r11 + mov eax,r10d + + movups xmm2,XMMWORD PTR[((-16))+rsi] + xorps xmm2,xmm10 + movups xmm0,XMMWORD PTR[rcx] + movups xmm1,XMMWORD PTR[16+rcx] + lea rcx,QWORD PTR[32+rcx] + xorps xmm2,xmm0 +$L$oop_enc1_10:: +DB 102,15,56,220,209 + dec eax + movups xmm1,XMMWORD PTR[rcx] + lea rcx,QWORD PTR[16+rcx] + jnz $L$oop_enc1_10 +DB 102,15,56,221,209 + xorps xmm2,xmm10 + movups XMMWORD PTR[(-16)+rsi],xmm2 + +$L$xts_enc_ret:: + movaps xmm6,XMMWORD PTR[((-160))+rbp] + movaps xmm7,XMMWORD PTR[((-144))+rbp] + movaps xmm8,XMMWORD PTR[((-128))+rbp] + movaps xmm9,XMMWORD PTR[((-112))+rbp] + movaps xmm10,XMMWORD PTR[((-96))+rbp] + movaps xmm11,XMMWORD PTR[((-80))+rbp] + movaps xmm12,XMMWORD PTR[((-64))+rbp] + movaps xmm13,XMMWORD PTR[((-48))+rbp] + movaps xmm14,XMMWORD PTR[((-32))+rbp] + movaps xmm15,XMMWORD PTR[((-16))+rbp] + lea rsp,QWORD PTR[rbp] + pop rbp +$L$xts_enc_epilogue:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_aesni_xts_encrypt:: +aesni_xts_encrypt ENDP +PUBLIC aesni_xts_decrypt + +ALIGN 16 +aesni_xts_decrypt PROC PUBLIC + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_aesni_xts_decrypt:: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + mov rcx,r9 + mov r8,QWORD PTR[40+rsp] + mov r9,QWORD PTR[48+rsp] + + + lea rax,QWORD PTR[rsp] + push rbp + sub rsp,272 + and rsp,-16 + movaps XMMWORD PTR[(-168)+rax],xmm6 + movaps XMMWORD PTR[(-152)+rax],xmm7 + movaps XMMWORD PTR[(-136)+rax],xmm8 + movaps XMMWORD PTR[(-120)+rax],xmm9 + movaps XMMWORD PTR[(-104)+rax],xmm10 + movaps XMMWORD PTR[(-88)+rax],xmm11 + movaps XMMWORD PTR[(-72)+rax],xmm12 + movaps XMMWORD PTR[(-56)+rax],xmm13 + movaps XMMWORD PTR[(-40)+rax],xmm14 + movaps XMMWORD PTR[(-24)+rax],xmm15 +$L$xts_dec_body:: + lea rbp,QWORD PTR[((-8))+rax] + movups xmm2,XMMWORD PTR[r9] + mov eax,DWORD PTR[240+r8] + mov r10d,DWORD PTR[240+rcx] + movups xmm0,XMMWORD PTR[r8] + movups xmm1,XMMWORD PTR[16+r8] + lea r8,QWORD PTR[32+r8] + xorps xmm2,xmm0 +$L$oop_enc1_11:: +DB 102,15,56,220,209 + dec eax + movups xmm1,XMMWORD PTR[r8] + lea r8,QWORD PTR[16+r8] + jnz $L$oop_enc1_11 +DB 102,15,56,221,209 + xor eax,eax + test rdx,15 + setnz al + shl rax,4 + sub rdx,rax + + movups xmm0,XMMWORD PTR[rcx] + mov r11,rcx + mov eax,r10d + shl r10d,4 + mov r9,rdx + and rdx,-16 + + movups xmm1,XMMWORD PTR[16+r10*1+rcx] + + movdqa xmm8,XMMWORD PTR[$L$xts_magic] + movdqa xmm15,xmm2 + pshufd xmm9,xmm2,05fh + pxor xmm1,xmm0 + movdqa xmm14,xmm9 + paddd xmm9,xmm9 + movdqa xmm10,xmm15 + psrad xmm14,31 + paddq xmm15,xmm15 + pand xmm14,xmm8 + pxor xmm10,xmm0 + pxor xmm15,xmm14 + movdqa xmm14,xmm9 + paddd xmm9,xmm9 + movdqa xmm11,xmm15 + psrad xmm14,31 + paddq xmm15,xmm15 + pand xmm14,xmm8 + pxor xmm11,xmm0 + pxor xmm15,xmm14 + movdqa xmm14,xmm9 + paddd xmm9,xmm9 + movdqa xmm12,xmm15 + psrad xmm14,31 + paddq xmm15,xmm15 + pand xmm14,xmm8 + pxor xmm12,xmm0 + pxor xmm15,xmm14 + movdqa xmm14,xmm9 + paddd xmm9,xmm9 + movdqa xmm13,xmm15 + psrad xmm14,31 + paddq xmm15,xmm15 + pand xmm14,xmm8 + pxor xmm13,xmm0 + pxor xmm15,xmm14 + movdqa xmm14,xmm15 + psrad xmm9,31 + paddq xmm15,xmm15 + pand xmm9,xmm8 + pxor xmm14,xmm0 + pxor xmm15,xmm9 + movaps XMMWORD PTR[96+rsp],xmm1 + + sub rdx,16*6 + jc $L$xts_dec_short + + mov eax,16+96 + lea rcx,QWORD PTR[32+r10*1+r11] + sub rax,r10 + movups xmm1,XMMWORD PTR[16+r11] + mov r10,rax + lea r8,QWORD PTR[$L$xts_magic] + jmp $L$xts_dec_grandloop + +ALIGN 32 +$L$xts_dec_grandloop:: + movdqu xmm2,XMMWORD PTR[rdi] + movdqa xmm8,xmm0 + movdqu xmm3,XMMWORD PTR[16+rdi] + pxor xmm2,xmm10 + movdqu xmm4,XMMWORD PTR[32+rdi] + pxor xmm3,xmm11 +DB 102,15,56,222,209 + movdqu xmm5,XMMWORD PTR[48+rdi] + pxor xmm4,xmm12 +DB 102,15,56,222,217 + movdqu xmm6,XMMWORD PTR[64+rdi] + pxor xmm5,xmm13 +DB 102,15,56,222,225 + movdqu xmm7,XMMWORD PTR[80+rdi] + pxor xmm8,xmm15 + movdqa xmm9,XMMWORD PTR[96+rsp] + pxor xmm6,xmm14 +DB 102,15,56,222,233 + movups xmm0,XMMWORD PTR[32+r11] + lea rdi,QWORD PTR[96+rdi] + pxor xmm7,xmm8 + + pxor xmm10,xmm9 +DB 102,15,56,222,241 + pxor xmm11,xmm9 + movdqa XMMWORD PTR[rsp],xmm10 +DB 102,15,56,222,249 + movups xmm1,XMMWORD PTR[48+r11] + pxor xmm12,xmm9 + +DB 102,15,56,222,208 + pxor xmm13,xmm9 + movdqa XMMWORD PTR[16+rsp],xmm11 +DB 102,15,56,222,216 + pxor xmm14,xmm9 + movdqa XMMWORD PTR[32+rsp],xmm12 +DB 102,15,56,222,224 +DB 102,15,56,222,232 + pxor xmm8,xmm9 + movdqa XMMWORD PTR[64+rsp],xmm14 +DB 102,15,56,222,240 +DB 102,15,56,222,248 + movups xmm0,XMMWORD PTR[64+r11] + movdqa XMMWORD PTR[80+rsp],xmm8 + pshufd xmm9,xmm15,05fh + jmp $L$xts_dec_loop6 +ALIGN 32 +$L$xts_dec_loop6:: +DB 102,15,56,222,209 +DB 102,15,56,222,217 +DB 102,15,56,222,225 +DB 102,15,56,222,233 +DB 102,15,56,222,241 +DB 102,15,56,222,249 + movups xmm1,XMMWORD PTR[((-64))+rax*1+rcx] + add rax,32 + +DB 102,15,56,222,208 +DB 102,15,56,222,216 +DB 102,15,56,222,224 +DB 102,15,56,222,232 +DB 102,15,56,222,240 +DB 102,15,56,222,248 + movups xmm0,XMMWORD PTR[((-80))+rax*1+rcx] + jnz $L$xts_dec_loop6 + + movdqa xmm8,XMMWORD PTR[r8] + movdqa xmm14,xmm9 + paddd xmm9,xmm9 +DB 102,15,56,222,209 + paddq xmm15,xmm15 + psrad xmm14,31 +DB 102,15,56,222,217 + pand xmm14,xmm8 + movups xmm10,XMMWORD PTR[r11] +DB 102,15,56,222,225 +DB 102,15,56,222,233 +DB 102,15,56,222,241 + pxor xmm15,xmm14 + movaps xmm11,xmm10 +DB 102,15,56,222,249 + movups xmm1,XMMWORD PTR[((-64))+rcx] + + movdqa xmm14,xmm9 +DB 102,15,56,222,208 + paddd xmm9,xmm9 + pxor xmm10,xmm15 +DB 102,15,56,222,216 + psrad xmm14,31 + paddq xmm15,xmm15 +DB 102,15,56,222,224 +DB 102,15,56,222,232 + pand xmm14,xmm8 + movaps xmm12,xmm11 +DB 102,15,56,222,240 + pxor xmm15,xmm14 + movdqa xmm14,xmm9 +DB 102,15,56,222,248 + movups xmm0,XMMWORD PTR[((-48))+rcx] + + paddd xmm9,xmm9 +DB 102,15,56,222,209 + pxor xmm11,xmm15 + psrad xmm14,31 +DB 102,15,56,222,217 + paddq xmm15,xmm15 + pand xmm14,xmm8 +DB 102,15,56,222,225 +DB 102,15,56,222,233 + movdqa XMMWORD PTR[48+rsp],xmm13 + pxor xmm15,xmm14 +DB 102,15,56,222,241 + movaps xmm13,xmm12 + movdqa xmm14,xmm9 +DB 102,15,56,222,249 + movups xmm1,XMMWORD PTR[((-32))+rcx] + + paddd xmm9,xmm9 +DB 102,15,56,222,208 + pxor xmm12,xmm15 + psrad xmm14,31 +DB 102,15,56,222,216 + paddq xmm15,xmm15 + pand xmm14,xmm8 +DB 102,15,56,222,224 +DB 102,15,56,222,232 +DB 102,15,56,222,240 + pxor xmm15,xmm14 + movaps xmm14,xmm13 +DB 102,15,56,222,248 + + movdqa xmm0,xmm9 + paddd xmm9,xmm9 +DB 102,15,56,222,209 + pxor xmm13,xmm15 + psrad xmm0,31 +DB 102,15,56,222,217 + paddq xmm15,xmm15 + pand xmm0,xmm8 +DB 102,15,56,222,225 +DB 102,15,56,222,233 + pxor xmm15,xmm0 + movups xmm0,XMMWORD PTR[r11] +DB 102,15,56,222,241 +DB 102,15,56,222,249 + movups xmm1,XMMWORD PTR[16+r11] + + pxor xmm14,xmm15 +DB 102,15,56,223,84,36,0 + psrad xmm9,31 + paddq xmm15,xmm15 +DB 102,15,56,223,92,36,16 +DB 102,15,56,223,100,36,32 + pand xmm9,xmm8 + mov rax,r10 +DB 102,15,56,223,108,36,48 +DB 102,15,56,223,116,36,64 +DB 102,15,56,223,124,36,80 + pxor xmm15,xmm9 + + lea rsi,QWORD PTR[96+rsi] + movups XMMWORD PTR[(-96)+rsi],xmm2 + movups XMMWORD PTR[(-80)+rsi],xmm3 + movups XMMWORD PTR[(-64)+rsi],xmm4 + movups XMMWORD PTR[(-48)+rsi],xmm5 + movups XMMWORD PTR[(-32)+rsi],xmm6 + movups XMMWORD PTR[(-16)+rsi],xmm7 + sub rdx,16*6 + jnc $L$xts_dec_grandloop + + mov eax,16+96 + sub eax,r10d + mov rcx,r11 + shr eax,4 + +$L$xts_dec_short:: + mov r10d,eax + pxor xmm10,xmm0 + pxor xmm11,xmm0 + add rdx,16*6 + jz $L$xts_dec_done + + pxor xmm12,xmm0 + cmp rdx,020h + jb $L$xts_dec_one + pxor xmm13,xmm0 + je $L$xts_dec_two + + pxor xmm14,xmm0 + cmp rdx,040h + jb $L$xts_dec_three + je $L$xts_dec_four + + movdqu xmm2,XMMWORD PTR[rdi] + movdqu xmm3,XMMWORD PTR[16+rdi] + movdqu xmm4,XMMWORD PTR[32+rdi] + pxor xmm2,xmm10 + movdqu xmm5,XMMWORD PTR[48+rdi] + pxor xmm3,xmm11 + movdqu xmm6,XMMWORD PTR[64+rdi] + lea rdi,QWORD PTR[80+rdi] + pxor xmm4,xmm12 + pxor xmm5,xmm13 + pxor xmm6,xmm14 + + call _aesni_decrypt6 + + xorps xmm2,xmm10 + xorps xmm3,xmm11 + xorps xmm4,xmm12 + movdqu XMMWORD PTR[rsi],xmm2 + xorps xmm5,xmm13 + movdqu XMMWORD PTR[16+rsi],xmm3 + xorps xmm6,xmm14 + movdqu XMMWORD PTR[32+rsi],xmm4 + pxor xmm14,xmm14 + movdqu XMMWORD PTR[48+rsi],xmm5 + pcmpgtd xmm14,xmm15 + movdqu XMMWORD PTR[64+rsi],xmm6 + lea rsi,QWORD PTR[80+rsi] + pshufd xmm11,xmm14,013h + and r9,15 + jz $L$xts_dec_ret + + movdqa xmm10,xmm15 + paddq xmm15,xmm15 + pand xmm11,xmm8 + pxor xmm11,xmm15 + jmp $L$xts_dec_done2 + +ALIGN 16 +$L$xts_dec_one:: + movups xmm2,XMMWORD PTR[rdi] + lea rdi,QWORD PTR[16+rdi] + xorps xmm2,xmm10 + movups xmm0,XMMWORD PTR[rcx] + movups xmm1,XMMWORD PTR[16+rcx] + lea rcx,QWORD PTR[32+rcx] + xorps xmm2,xmm0 +$L$oop_dec1_12:: +DB 102,15,56,222,209 + dec eax + movups xmm1,XMMWORD PTR[rcx] + lea rcx,QWORD PTR[16+rcx] + jnz $L$oop_dec1_12 +DB 102,15,56,223,209 + xorps xmm2,xmm10 + movdqa xmm10,xmm11 + movups XMMWORD PTR[rsi],xmm2 + movdqa xmm11,xmm12 + lea rsi,QWORD PTR[16+rsi] + jmp $L$xts_dec_done + +ALIGN 16 +$L$xts_dec_two:: + movups xmm2,XMMWORD PTR[rdi] + movups xmm3,XMMWORD PTR[16+rdi] + lea rdi,QWORD PTR[32+rdi] + xorps xmm2,xmm10 + xorps xmm3,xmm11 + + call _aesni_decrypt2 + + xorps xmm2,xmm10 + movdqa xmm10,xmm12 + xorps xmm3,xmm11 + movdqa xmm11,xmm13 + movups XMMWORD PTR[rsi],xmm2 + movups XMMWORD PTR[16+rsi],xmm3 + lea rsi,QWORD PTR[32+rsi] + jmp $L$xts_dec_done + +ALIGN 16 +$L$xts_dec_three:: + movups xmm2,XMMWORD PTR[rdi] + movups xmm3,XMMWORD PTR[16+rdi] + movups xmm4,XMMWORD PTR[32+rdi] + lea rdi,QWORD PTR[48+rdi] + xorps xmm2,xmm10 + xorps xmm3,xmm11 + xorps xmm4,xmm12 + + call _aesni_decrypt3 + + xorps xmm2,xmm10 + movdqa xmm10,xmm13 + xorps xmm3,xmm11 + movdqa xmm11,xmm14 + xorps xmm4,xmm12 + movups XMMWORD PTR[rsi],xmm2 + movups XMMWORD PTR[16+rsi],xmm3 + movups XMMWORD PTR[32+rsi],xmm4 + lea rsi,QWORD PTR[48+rsi] + jmp $L$xts_dec_done + +ALIGN 16 +$L$xts_dec_four:: + movups xmm2,XMMWORD PTR[rdi] + movups xmm3,XMMWORD PTR[16+rdi] + movups xmm4,XMMWORD PTR[32+rdi] + xorps xmm2,xmm10 + movups xmm5,XMMWORD PTR[48+rdi] + lea rdi,QWORD PTR[64+rdi] + xorps xmm3,xmm11 + xorps xmm4,xmm12 + xorps xmm5,xmm13 + + call _aesni_decrypt4 + + pxor xmm2,xmm10 + movdqa xmm10,xmm14 + pxor xmm3,xmm11 + movdqa xmm11,xmm15 + pxor xmm4,xmm12 + movdqu XMMWORD PTR[rsi],xmm2 + pxor xmm5,xmm13 + movdqu XMMWORD PTR[16+rsi],xmm3 + movdqu XMMWORD PTR[32+rsi],xmm4 + movdqu XMMWORD PTR[48+rsi],xmm5 + lea rsi,QWORD PTR[64+rsi] + jmp $L$xts_dec_done + +ALIGN 16 +$L$xts_dec_done:: + and r9,15 + jz $L$xts_dec_ret +$L$xts_dec_done2:: + mov rdx,r9 + mov rcx,r11 + mov eax,r10d + + movups xmm2,XMMWORD PTR[rdi] + xorps xmm2,xmm11 + movups xmm0,XMMWORD PTR[rcx] + movups xmm1,XMMWORD PTR[16+rcx] + lea rcx,QWORD PTR[32+rcx] + xorps xmm2,xmm0 +$L$oop_dec1_13:: +DB 102,15,56,222,209 + dec eax + movups xmm1,XMMWORD PTR[rcx] + lea rcx,QWORD PTR[16+rcx] + jnz $L$oop_dec1_13 +DB 102,15,56,223,209 + xorps xmm2,xmm11 + movups XMMWORD PTR[rsi],xmm2 + +$L$xts_dec_steal:: + movzx eax,BYTE PTR[16+rdi] + movzx ecx,BYTE PTR[rsi] + lea rdi,QWORD PTR[1+rdi] + mov BYTE PTR[rsi],al + mov BYTE PTR[16+rsi],cl + lea rsi,QWORD PTR[1+rsi] + sub rdx,1 + jnz $L$xts_dec_steal + + sub rsi,r9 + mov rcx,r11 + mov eax,r10d + + movups xmm2,XMMWORD PTR[rsi] + xorps xmm2,xmm10 + movups xmm0,XMMWORD PTR[rcx] + movups xmm1,XMMWORD PTR[16+rcx] + lea rcx,QWORD PTR[32+rcx] + xorps xmm2,xmm0 +$L$oop_dec1_14:: +DB 102,15,56,222,209 + dec eax + movups xmm1,XMMWORD PTR[rcx] + lea rcx,QWORD PTR[16+rcx] + jnz $L$oop_dec1_14 +DB 102,15,56,223,209 + xorps xmm2,xmm10 + movups XMMWORD PTR[rsi],xmm2 + +$L$xts_dec_ret:: + movaps xmm6,XMMWORD PTR[((-160))+rbp] + movaps xmm7,XMMWORD PTR[((-144))+rbp] + movaps xmm8,XMMWORD PTR[((-128))+rbp] + movaps xmm9,XMMWORD PTR[((-112))+rbp] + movaps xmm10,XMMWORD PTR[((-96))+rbp] + movaps xmm11,XMMWORD PTR[((-80))+rbp] + movaps xmm12,XMMWORD PTR[((-64))+rbp] + movaps xmm13,XMMWORD PTR[((-48))+rbp] + movaps xmm14,XMMWORD PTR[((-32))+rbp] + movaps xmm15,XMMWORD PTR[((-16))+rbp] + lea rsp,QWORD PTR[rbp] + pop rbp +$L$xts_dec_epilogue:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_aesni_xts_decrypt:: +aesni_xts_decrypt ENDP +PUBLIC aesni_cbc_encrypt + +ALIGN 16 +aesni_cbc_encrypt PROC PUBLIC + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_aesni_cbc_encrypt:: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + mov rcx,r9 + mov r8,QWORD PTR[40+rsp] + mov r9,QWORD PTR[48+rsp] + + + test rdx,rdx + jz $L$cbc_ret + + mov r10d,DWORD PTR[240+rcx] + mov r11,rcx + test r9d,r9d + jz $L$cbc_decrypt + + movups xmm2,XMMWORD PTR[r8] + mov eax,r10d + cmp rdx,16 + jb $L$cbc_enc_tail + sub rdx,16 + jmp $L$cbc_enc_loop +ALIGN 16 +$L$cbc_enc_loop:: + movups xmm3,XMMWORD PTR[rdi] + lea rdi,QWORD PTR[16+rdi] + + movups xmm0,XMMWORD PTR[rcx] + movups xmm1,XMMWORD PTR[16+rcx] + xorps xmm3,xmm0 + lea rcx,QWORD PTR[32+rcx] + xorps xmm2,xmm3 +$L$oop_enc1_15:: +DB 102,15,56,220,209 + dec eax + movups xmm1,XMMWORD PTR[rcx] + lea rcx,QWORD PTR[16+rcx] + jnz $L$oop_enc1_15 +DB 102,15,56,221,209 + mov eax,r10d + mov rcx,r11 + movups XMMWORD PTR[rsi],xmm2 + lea rsi,QWORD PTR[16+rsi] + sub rdx,16 + jnc $L$cbc_enc_loop + add rdx,16 + jnz $L$cbc_enc_tail + movups XMMWORD PTR[r8],xmm2 + jmp $L$cbc_ret + +$L$cbc_enc_tail:: + mov rcx,rdx + xchg rsi,rdi + DD 09066A4F3h + mov ecx,16 + sub rcx,rdx + xor eax,eax + DD 09066AAF3h + lea rdi,QWORD PTR[((-16))+rdi] + mov eax,r10d + mov rsi,rdi + mov rcx,r11 + xor rdx,rdx + jmp $L$cbc_enc_loop + +ALIGN 16 +$L$cbc_decrypt:: + lea rax,QWORD PTR[rsp] + push rbp + sub rsp,176 + and rsp,-16 + movaps XMMWORD PTR[16+rsp],xmm6 + movaps XMMWORD PTR[32+rsp],xmm7 + movaps XMMWORD PTR[48+rsp],xmm8 + movaps XMMWORD PTR[64+rsp],xmm9 + movaps XMMWORD PTR[80+rsp],xmm10 + movaps XMMWORD PTR[96+rsp],xmm11 + movaps XMMWORD PTR[112+rsp],xmm12 + movaps XMMWORD PTR[128+rsp],xmm13 + movaps XMMWORD PTR[144+rsp],xmm14 + movaps XMMWORD PTR[160+rsp],xmm15 +$L$cbc_decrypt_body:: + lea rbp,QWORD PTR[((-8))+rax] + movups xmm10,XMMWORD PTR[r8] + mov eax,r10d + cmp rdx,050h + jbe $L$cbc_dec_tail + + movups xmm0,XMMWORD PTR[rcx] + movdqu xmm2,XMMWORD PTR[rdi] + movdqu xmm3,XMMWORD PTR[16+rdi] + movdqa xmm11,xmm2 + movdqu xmm4,XMMWORD PTR[32+rdi] + movdqa xmm12,xmm3 + movdqu xmm5,XMMWORD PTR[48+rdi] + movdqa xmm13,xmm4 + movdqu xmm6,XMMWORD PTR[64+rdi] + movdqa xmm14,xmm5 + movdqu xmm7,XMMWORD PTR[80+rdi] + movdqa xmm15,xmm6 + mov r9d,DWORD PTR[((OPENSSL_ia32cap_P+4))] + cmp rdx,070h + jbe $L$cbc_dec_six_or_seven + + and r9d,71303168 + sub rdx,050h + cmp r9d,4194304 + je $L$cbc_dec_loop6_enter + sub rdx,020h + lea rcx,QWORD PTR[112+rcx] + jmp $L$cbc_dec_loop8_enter +ALIGN 16 +$L$cbc_dec_loop8:: + movups XMMWORD PTR[rsi],xmm9 + lea rsi,QWORD PTR[16+rsi] +$L$cbc_dec_loop8_enter:: + movdqu xmm8,XMMWORD PTR[96+rdi] + pxor xmm2,xmm0 + movdqu xmm9,XMMWORD PTR[112+rdi] + pxor xmm3,xmm0 + movups xmm1,XMMWORD PTR[((16-112))+rcx] + pxor xmm4,xmm0 + xor r11,r11 + cmp rdx,070h + pxor xmm5,xmm0 + pxor xmm6,xmm0 + pxor xmm7,xmm0 + pxor xmm8,xmm0 + +DB 102,15,56,222,209 + pxor xmm9,xmm0 + movups xmm0,XMMWORD PTR[((32-112))+rcx] +DB 102,15,56,222,217 +DB 102,15,56,222,225 +DB 102,15,56,222,233 +DB 102,15,56,222,241 +DB 102,15,56,222,249 +DB 102,68,15,56,222,193 + setnc r11b + shl r11,7 +DB 102,68,15,56,222,201 + add r11,rdi + movups xmm1,XMMWORD PTR[((48-112))+rcx] +DB 102,15,56,222,208 +DB 102,15,56,222,216 +DB 102,15,56,222,224 +DB 102,15,56,222,232 +DB 102,15,56,222,240 +DB 102,15,56,222,248 +DB 102,68,15,56,222,192 +DB 102,68,15,56,222,200 + movups xmm0,XMMWORD PTR[((64-112))+rcx] + nop +DB 102,15,56,222,209 +DB 102,15,56,222,217 +DB 102,15,56,222,225 +DB 102,15,56,222,233 +DB 102,15,56,222,241 +DB 102,15,56,222,249 +DB 102,68,15,56,222,193 +DB 102,68,15,56,222,201 + movups xmm1,XMMWORD PTR[((80-112))+rcx] + nop +DB 102,15,56,222,208 +DB 102,15,56,222,216 +DB 102,15,56,222,224 +DB 102,15,56,222,232 +DB 102,15,56,222,240 +DB 102,15,56,222,248 +DB 102,68,15,56,222,192 +DB 102,68,15,56,222,200 + movups xmm0,XMMWORD PTR[((96-112))+rcx] + nop +DB 102,15,56,222,209 +DB 102,15,56,222,217 +DB 102,15,56,222,225 +DB 102,15,56,222,233 +DB 102,15,56,222,241 +DB 102,15,56,222,249 +DB 102,68,15,56,222,193 +DB 102,68,15,56,222,201 + movups xmm1,XMMWORD PTR[((112-112))+rcx] + nop +DB 102,15,56,222,208 +DB 102,15,56,222,216 +DB 102,15,56,222,224 +DB 102,15,56,222,232 +DB 102,15,56,222,240 +DB 102,15,56,222,248 +DB 102,68,15,56,222,192 +DB 102,68,15,56,222,200 + movups xmm0,XMMWORD PTR[((128-112))+rcx] + nop +DB 102,15,56,222,209 +DB 102,15,56,222,217 +DB 102,15,56,222,225 +DB 102,15,56,222,233 +DB 102,15,56,222,241 +DB 102,15,56,222,249 +DB 102,68,15,56,222,193 +DB 102,68,15,56,222,201 + movups xmm1,XMMWORD PTR[((144-112))+rcx] + cmp eax,11 +DB 102,15,56,222,208 +DB 102,15,56,222,216 +DB 102,15,56,222,224 +DB 102,15,56,222,232 +DB 102,15,56,222,240 +DB 102,15,56,222,248 +DB 102,68,15,56,222,192 +DB 102,68,15,56,222,200 + movups xmm0,XMMWORD PTR[((160-112))+rcx] + jb $L$cbc_dec_done +DB 102,15,56,222,209 +DB 102,15,56,222,217 +DB 102,15,56,222,225 +DB 102,15,56,222,233 +DB 102,15,56,222,241 +DB 102,15,56,222,249 +DB 102,68,15,56,222,193 +DB 102,68,15,56,222,201 + movups xmm1,XMMWORD PTR[((176-112))+rcx] + nop +DB 102,15,56,222,208 +DB 102,15,56,222,216 +DB 102,15,56,222,224 +DB 102,15,56,222,232 +DB 102,15,56,222,240 +DB 102,15,56,222,248 +DB 102,68,15,56,222,192 +DB 102,68,15,56,222,200 + movups xmm0,XMMWORD PTR[((192-112))+rcx] + je $L$cbc_dec_done +DB 102,15,56,222,209 +DB 102,15,56,222,217 +DB 102,15,56,222,225 +DB 102,15,56,222,233 +DB 102,15,56,222,241 +DB 102,15,56,222,249 +DB 102,68,15,56,222,193 +DB 102,68,15,56,222,201 + movups xmm1,XMMWORD PTR[((208-112))+rcx] + nop +DB 102,15,56,222,208 +DB 102,15,56,222,216 +DB 102,15,56,222,224 +DB 102,15,56,222,232 +DB 102,15,56,222,240 +DB 102,15,56,222,248 +DB 102,68,15,56,222,192 +DB 102,68,15,56,222,200 + movups xmm0,XMMWORD PTR[((224-112))+rcx] + jmp $L$cbc_dec_done +ALIGN 16 +$L$cbc_dec_done:: +DB 102,15,56,222,209 +DB 102,15,56,222,217 + pxor xmm10,xmm0 + pxor xmm11,xmm0 +DB 102,15,56,222,225 +DB 102,15,56,222,233 + pxor xmm12,xmm0 + pxor xmm13,xmm0 +DB 102,15,56,222,241 +DB 102,15,56,222,249 + pxor xmm14,xmm0 + pxor xmm15,xmm0 +DB 102,68,15,56,222,193 +DB 102,68,15,56,222,201 + movdqu xmm1,XMMWORD PTR[80+rdi] + +DB 102,65,15,56,223,210 + movdqu xmm10,XMMWORD PTR[96+rdi] + pxor xmm1,xmm0 +DB 102,65,15,56,223,219 + pxor xmm10,xmm0 + movdqu xmm0,XMMWORD PTR[112+rdi] +DB 102,65,15,56,223,228 + lea rdi,QWORD PTR[128+rdi] + movdqu xmm11,XMMWORD PTR[r11] +DB 102,65,15,56,223,237 +DB 102,65,15,56,223,246 + movdqu xmm12,XMMWORD PTR[16+r11] + movdqu xmm13,XMMWORD PTR[32+r11] +DB 102,65,15,56,223,255 +DB 102,68,15,56,223,193 + movdqu xmm14,XMMWORD PTR[48+r11] + movdqu xmm15,XMMWORD PTR[64+r11] +DB 102,69,15,56,223,202 + movdqa xmm10,xmm0 + movdqu xmm1,XMMWORD PTR[80+r11] + movups xmm0,XMMWORD PTR[((-112))+rcx] + + movups XMMWORD PTR[rsi],xmm2 + movdqa xmm2,xmm11 + movups XMMWORD PTR[16+rsi],xmm3 + movdqa xmm3,xmm12 + movups XMMWORD PTR[32+rsi],xmm4 + movdqa xmm4,xmm13 + movups XMMWORD PTR[48+rsi],xmm5 + movdqa xmm5,xmm14 + movups XMMWORD PTR[64+rsi],xmm6 + movdqa xmm6,xmm15 + movups XMMWORD PTR[80+rsi],xmm7 + movdqa xmm7,xmm1 + movups XMMWORD PTR[96+rsi],xmm8 + lea rsi,QWORD PTR[112+rsi] + + sub rdx,080h + ja $L$cbc_dec_loop8 + + movaps xmm2,xmm9 + lea rcx,QWORD PTR[((-112))+rcx] + add rdx,070h + jle $L$cbc_dec_tail_collected + movups XMMWORD PTR[rsi],xmm9 + lea rsi,QWORD PTR[16+rsi] + cmp rdx,050h + jbe $L$cbc_dec_tail + + movaps xmm2,xmm11 +$L$cbc_dec_six_or_seven:: + cmp rdx,060h + ja $L$cbc_dec_seven + + movaps xmm8,xmm7 + call _aesni_decrypt6 + pxor xmm2,xmm10 + movaps xmm10,xmm8 + pxor xmm3,xmm11 + movdqu XMMWORD PTR[rsi],xmm2 + pxor xmm4,xmm12 + movdqu XMMWORD PTR[16+rsi],xmm3 + pxor xmm5,xmm13 + movdqu XMMWORD PTR[32+rsi],xmm4 + pxor xmm6,xmm14 + movdqu XMMWORD PTR[48+rsi],xmm5 + pxor xmm7,xmm15 + movdqu XMMWORD PTR[64+rsi],xmm6 + lea rsi,QWORD PTR[80+rsi] + movdqa xmm2,xmm7 + jmp $L$cbc_dec_tail_collected + +ALIGN 16 +$L$cbc_dec_seven:: + movups xmm8,XMMWORD PTR[96+rdi] + xorps xmm9,xmm9 + call _aesni_decrypt8 + movups xmm9,XMMWORD PTR[80+rdi] + pxor xmm2,xmm10 + movups xmm10,XMMWORD PTR[96+rdi] + pxor xmm3,xmm11 + movdqu XMMWORD PTR[rsi],xmm2 + pxor xmm4,xmm12 + movdqu XMMWORD PTR[16+rsi],xmm3 + pxor xmm5,xmm13 + movdqu XMMWORD PTR[32+rsi],xmm4 + pxor xmm6,xmm14 + movdqu XMMWORD PTR[48+rsi],xmm5 + pxor xmm7,xmm15 + movdqu XMMWORD PTR[64+rsi],xmm6 + pxor xmm8,xmm9 + movdqu XMMWORD PTR[80+rsi],xmm7 + lea rsi,QWORD PTR[96+rsi] + movdqa xmm2,xmm8 + jmp $L$cbc_dec_tail_collected + +ALIGN 16 +$L$cbc_dec_loop6:: + movups XMMWORD PTR[rsi],xmm7 + lea rsi,QWORD PTR[16+rsi] + movdqu xmm2,XMMWORD PTR[rdi] + movdqu xmm3,XMMWORD PTR[16+rdi] + movdqa xmm11,xmm2 + movdqu xmm4,XMMWORD PTR[32+rdi] + movdqa xmm12,xmm3 + movdqu xmm5,XMMWORD PTR[48+rdi] + movdqa xmm13,xmm4 + movdqu xmm6,XMMWORD PTR[64+rdi] + movdqa xmm14,xmm5 + movdqu xmm7,XMMWORD PTR[80+rdi] + movdqa xmm15,xmm6 +$L$cbc_dec_loop6_enter:: + lea rdi,QWORD PTR[96+rdi] + movdqa xmm8,xmm7 + + call _aesni_decrypt6 + + pxor xmm2,xmm10 + movdqa xmm10,xmm8 + pxor xmm3,xmm11 + movdqu XMMWORD PTR[rsi],xmm2 + pxor xmm4,xmm12 + movdqu XMMWORD PTR[16+rsi],xmm3 + pxor xmm5,xmm13 + movdqu XMMWORD PTR[32+rsi],xmm4 + pxor xmm6,xmm14 + mov rcx,r11 + movdqu XMMWORD PTR[48+rsi],xmm5 + pxor xmm7,xmm15 + mov eax,r10d + movdqu XMMWORD PTR[64+rsi],xmm6 + lea rsi,QWORD PTR[80+rsi] + sub rdx,060h + ja $L$cbc_dec_loop6 + + movdqa xmm2,xmm7 + add rdx,050h + jle $L$cbc_dec_tail_collected + movups XMMWORD PTR[rsi],xmm7 + lea rsi,QWORD PTR[16+rsi] + +$L$cbc_dec_tail:: + movups xmm2,XMMWORD PTR[rdi] + sub rdx,010h + jbe $L$cbc_dec_one + + movups xmm3,XMMWORD PTR[16+rdi] + movaps xmm11,xmm2 + sub rdx,010h + jbe $L$cbc_dec_two + + movups xmm4,XMMWORD PTR[32+rdi] + movaps xmm12,xmm3 + sub rdx,010h + jbe $L$cbc_dec_three + + movups xmm5,XMMWORD PTR[48+rdi] + movaps xmm13,xmm4 + sub rdx,010h + jbe $L$cbc_dec_four + + movups xmm6,XMMWORD PTR[64+rdi] + movaps xmm14,xmm5 + movaps xmm15,xmm6 + xorps xmm7,xmm7 + call _aesni_decrypt6 + pxor xmm2,xmm10 + movaps xmm10,xmm15 + pxor xmm3,xmm11 + movdqu XMMWORD PTR[rsi],xmm2 + pxor xmm4,xmm12 + movdqu XMMWORD PTR[16+rsi],xmm3 + pxor xmm5,xmm13 + movdqu XMMWORD PTR[32+rsi],xmm4 + pxor xmm6,xmm14 + movdqu XMMWORD PTR[48+rsi],xmm5 + lea rsi,QWORD PTR[64+rsi] + movdqa xmm2,xmm6 + sub rdx,010h + jmp $L$cbc_dec_tail_collected + +ALIGN 16 +$L$cbc_dec_one:: + movaps xmm11,xmm2 + movups xmm0,XMMWORD PTR[rcx] + movups xmm1,XMMWORD PTR[16+rcx] + lea rcx,QWORD PTR[32+rcx] + xorps xmm2,xmm0 +$L$oop_dec1_16:: +DB 102,15,56,222,209 + dec eax + movups xmm1,XMMWORD PTR[rcx] + lea rcx,QWORD PTR[16+rcx] + jnz $L$oop_dec1_16 +DB 102,15,56,223,209 + xorps xmm2,xmm10 + movaps xmm10,xmm11 + jmp $L$cbc_dec_tail_collected +ALIGN 16 +$L$cbc_dec_two:: + movaps xmm12,xmm3 + call _aesni_decrypt2 + pxor xmm2,xmm10 + movaps xmm10,xmm12 + pxor xmm3,xmm11 + movdqu XMMWORD PTR[rsi],xmm2 + movdqa xmm2,xmm3 + lea rsi,QWORD PTR[16+rsi] + jmp $L$cbc_dec_tail_collected +ALIGN 16 +$L$cbc_dec_three:: + movaps xmm13,xmm4 + call _aesni_decrypt3 + pxor xmm2,xmm10 + movaps xmm10,xmm13 + pxor xmm3,xmm11 + movdqu XMMWORD PTR[rsi],xmm2 + pxor xmm4,xmm12 + movdqu XMMWORD PTR[16+rsi],xmm3 + movdqa xmm2,xmm4 + lea rsi,QWORD PTR[32+rsi] + jmp $L$cbc_dec_tail_collected +ALIGN 16 +$L$cbc_dec_four:: + movaps xmm14,xmm5 + call _aesni_decrypt4 + pxor xmm2,xmm10 + movaps xmm10,xmm14 + pxor xmm3,xmm11 + movdqu XMMWORD PTR[rsi],xmm2 + pxor xmm4,xmm12 + movdqu XMMWORD PTR[16+rsi],xmm3 + pxor xmm5,xmm13 + movdqu XMMWORD PTR[32+rsi],xmm4 + movdqa xmm2,xmm5 + lea rsi,QWORD PTR[48+rsi] + jmp $L$cbc_dec_tail_collected + +ALIGN 16 +$L$cbc_dec_tail_collected:: + movups XMMWORD PTR[r8],xmm10 + and rdx,15 + jnz $L$cbc_dec_tail_partial + movups XMMWORD PTR[rsi],xmm2 + jmp $L$cbc_dec_ret +ALIGN 16 +$L$cbc_dec_tail_partial:: + movaps XMMWORD PTR[rsp],xmm2 + mov rcx,16 + mov rdi,rsi + sub rcx,rdx + lea rsi,QWORD PTR[rsp] + DD 09066A4F3h + +$L$cbc_dec_ret:: + movaps xmm6,XMMWORD PTR[16+rsp] + movaps xmm7,XMMWORD PTR[32+rsp] + movaps xmm8,XMMWORD PTR[48+rsp] + movaps xmm9,XMMWORD PTR[64+rsp] + movaps xmm10,XMMWORD PTR[80+rsp] + movaps xmm11,XMMWORD PTR[96+rsp] + movaps xmm12,XMMWORD PTR[112+rsp] + movaps xmm13,XMMWORD PTR[128+rsp] + movaps xmm14,XMMWORD PTR[144+rsp] + movaps xmm15,XMMWORD PTR[160+rsp] + lea rsp,QWORD PTR[rbp] + pop rbp +$L$cbc_ret:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_aesni_cbc_encrypt:: +aesni_cbc_encrypt ENDP +PUBLIC aesni_set_decrypt_key + +ALIGN 16 +aesni_set_decrypt_key PROC PUBLIC +DB 048h,083h,0ECh,008h + call __aesni_set_encrypt_key + shl edx,4 + test eax,eax + jnz $L$dec_key_ret + lea rcx,QWORD PTR[16+rdx*1+r8] + + movups xmm0,XMMWORD PTR[r8] + movups xmm1,XMMWORD PTR[rcx] + movups XMMWORD PTR[rcx],xmm0 + movups XMMWORD PTR[r8],xmm1 + lea r8,QWORD PTR[16+r8] + lea rcx,QWORD PTR[((-16))+rcx] + +$L$dec_key_inverse:: + movups xmm0,XMMWORD PTR[r8] + movups xmm1,XMMWORD PTR[rcx] +DB 102,15,56,219,192 +DB 102,15,56,219,201 + lea r8,QWORD PTR[16+r8] + lea rcx,QWORD PTR[((-16))+rcx] + movups XMMWORD PTR[16+rcx],xmm0 + movups XMMWORD PTR[(-16)+r8],xmm1 + cmp rcx,r8 + ja $L$dec_key_inverse + + movups xmm0,XMMWORD PTR[r8] +DB 102,15,56,219,192 + movups XMMWORD PTR[rcx],xmm0 +$L$dec_key_ret:: + add rsp,8 + DB 0F3h,0C3h ;repret +$L$SEH_end_set_decrypt_key:: +aesni_set_decrypt_key ENDP +PUBLIC aesni_set_encrypt_key + +ALIGN 16 +aesni_set_encrypt_key PROC PUBLIC +__aesni_set_encrypt_key:: +DB 048h,083h,0ECh,008h + mov rax,-1 + test rcx,rcx + jz $L$enc_key_ret + test r8,r8 + jz $L$enc_key_ret + + movups xmm0,XMMWORD PTR[rcx] + xorps xmm4,xmm4 + lea rax,QWORD PTR[16+r8] + cmp edx,256 + je $L$14rounds + cmp edx,192 + je $L$12rounds + cmp edx,128 + jne $L$bad_keybits + +$L$10rounds:: + mov edx,9 + movups XMMWORD PTR[r8],xmm0 +DB 102,15,58,223,200,1 + call $L$key_expansion_128_cold +DB 102,15,58,223,200,2 + call $L$key_expansion_128 +DB 102,15,58,223,200,4 + call $L$key_expansion_128 +DB 102,15,58,223,200,8 + call $L$key_expansion_128 +DB 102,15,58,223,200,16 + call $L$key_expansion_128 +DB 102,15,58,223,200,32 + call $L$key_expansion_128 +DB 102,15,58,223,200,64 + call $L$key_expansion_128 +DB 102,15,58,223,200,128 + call $L$key_expansion_128 +DB 102,15,58,223,200,27 + call $L$key_expansion_128 +DB 102,15,58,223,200,54 + call $L$key_expansion_128 + movups XMMWORD PTR[rax],xmm0 + mov DWORD PTR[80+rax],edx + xor eax,eax + jmp $L$enc_key_ret + +ALIGN 16 +$L$12rounds:: + movq xmm2,QWORD PTR[16+rcx] + mov edx,11 + movups XMMWORD PTR[r8],xmm0 +DB 102,15,58,223,202,1 + call $L$key_expansion_192a_cold +DB 102,15,58,223,202,2 + call $L$key_expansion_192b +DB 102,15,58,223,202,4 + call $L$key_expansion_192a +DB 102,15,58,223,202,8 + call $L$key_expansion_192b +DB 102,15,58,223,202,16 + call $L$key_expansion_192a +DB 102,15,58,223,202,32 + call $L$key_expansion_192b +DB 102,15,58,223,202,64 + call $L$key_expansion_192a +DB 102,15,58,223,202,128 + call $L$key_expansion_192b + movups XMMWORD PTR[rax],xmm0 + mov DWORD PTR[48+rax],edx + xor rax,rax + jmp $L$enc_key_ret + +ALIGN 16 +$L$14rounds:: + movups xmm2,XMMWORD PTR[16+rcx] + mov edx,13 + lea rax,QWORD PTR[16+rax] + movups XMMWORD PTR[r8],xmm0 + movups XMMWORD PTR[16+r8],xmm2 +DB 102,15,58,223,202,1 + call $L$key_expansion_256a_cold +DB 102,15,58,223,200,1 + call $L$key_expansion_256b +DB 102,15,58,223,202,2 + call $L$key_expansion_256a +DB 102,15,58,223,200,2 + call $L$key_expansion_256b +DB 102,15,58,223,202,4 + call $L$key_expansion_256a +DB 102,15,58,223,200,4 + call $L$key_expansion_256b +DB 102,15,58,223,202,8 + call $L$key_expansion_256a +DB 102,15,58,223,200,8 + call $L$key_expansion_256b +DB 102,15,58,223,202,16 + call $L$key_expansion_256a +DB 102,15,58,223,200,16 + call $L$key_expansion_256b +DB 102,15,58,223,202,32 + call $L$key_expansion_256a +DB 102,15,58,223,200,32 + call $L$key_expansion_256b +DB 102,15,58,223,202,64 + call $L$key_expansion_256a + movups XMMWORD PTR[rax],xmm0 + mov DWORD PTR[16+rax],edx + xor rax,rax + jmp $L$enc_key_ret + +ALIGN 16 +$L$bad_keybits:: + mov rax,-2 +$L$enc_key_ret:: + add rsp,8 + DB 0F3h,0C3h ;repret +$L$SEH_end_set_encrypt_key:: + +ALIGN 16 +$L$key_expansion_128:: + movups XMMWORD PTR[rax],xmm0 + lea rax,QWORD PTR[16+rax] +$L$key_expansion_128_cold:: + shufps xmm4,xmm0,16 + xorps xmm0,xmm4 + shufps xmm4,xmm0,140 + xorps xmm0,xmm4 + shufps xmm1,xmm1,255 + xorps xmm0,xmm1 + DB 0F3h,0C3h ;repret + +ALIGN 16 +$L$key_expansion_192a:: + movups XMMWORD PTR[rax],xmm0 + lea rax,QWORD PTR[16+rax] +$L$key_expansion_192a_cold:: + movaps xmm5,xmm2 +$L$key_expansion_192b_warm:: + shufps xmm4,xmm0,16 + movdqa xmm3,xmm2 + xorps xmm0,xmm4 + shufps xmm4,xmm0,140 + pslldq xmm3,4 + xorps xmm0,xmm4 + pshufd xmm1,xmm1,85 + pxor xmm2,xmm3 + pxor xmm0,xmm1 + pshufd xmm3,xmm0,255 + pxor xmm2,xmm3 + DB 0F3h,0C3h ;repret + +ALIGN 16 +$L$key_expansion_192b:: + movaps xmm3,xmm0 + shufps xmm5,xmm0,68 + movups XMMWORD PTR[rax],xmm5 + shufps xmm3,xmm2,78 + movups XMMWORD PTR[16+rax],xmm3 + lea rax,QWORD PTR[32+rax] + jmp $L$key_expansion_192b_warm + +ALIGN 16 +$L$key_expansion_256a:: + movups XMMWORD PTR[rax],xmm2 + lea rax,QWORD PTR[16+rax] +$L$key_expansion_256a_cold:: + shufps xmm4,xmm0,16 + xorps xmm0,xmm4 + shufps xmm4,xmm0,140 + xorps xmm0,xmm4 + shufps xmm1,xmm1,255 + xorps xmm0,xmm1 + DB 0F3h,0C3h ;repret + +ALIGN 16 +$L$key_expansion_256b:: + movups XMMWORD PTR[rax],xmm0 + lea rax,QWORD PTR[16+rax] + + shufps xmm4,xmm2,16 + xorps xmm2,xmm4 + shufps xmm4,xmm2,140 + xorps xmm2,xmm4 + shufps xmm1,xmm1,170 + xorps xmm2,xmm1 + DB 0F3h,0C3h ;repret +aesni_set_encrypt_key ENDP + +ALIGN 64 +$L$bswap_mask:: +DB 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0 +$L$increment32:: + DD 6,6,6,0 +$L$increment64:: + DD 1,0,0,0 +$L$xts_magic:: + DD 087h,0,1,0 +$L$increment1:: +DB 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1 + +DB 65,69,83,32,102,111,114,32,73,110,116,101,108,32,65,69 +DB 83,45,78,73,44,32,67,82,89,80,84,79,71,65,77,83 +DB 32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115 +DB 115,108,46,111,114,103,62,0 +ALIGN 64 +EXTERN __imp_RtlVirtualUnwind:NEAR + +ALIGN 16 +ecb_ccm64_se_handler PROC PRIVATE + push rsi + push rdi + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + pushfq + sub rsp,64 + + mov rax,QWORD PTR[120+r8] + mov rbx,QWORD PTR[248+r8] + + mov rsi,QWORD PTR[8+r9] + mov r11,QWORD PTR[56+r9] + + mov r10d,DWORD PTR[r11] + lea r10,QWORD PTR[r10*1+rsi] + cmp rbx,r10 + jb $L$common_seh_tail + + mov rax,QWORD PTR[152+r8] + + mov r10d,DWORD PTR[4+r11] + lea r10,QWORD PTR[r10*1+rsi] + cmp rbx,r10 + jae $L$common_seh_tail + + lea rsi,QWORD PTR[rax] + lea rdi,QWORD PTR[512+r8] + mov ecx,8 + DD 0a548f3fch + lea rax,QWORD PTR[88+rax] + + jmp $L$common_seh_tail +ecb_ccm64_se_handler ENDP + + +ALIGN 16 +ctr_xts_se_handler PROC PRIVATE + push rsi + push rdi + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + pushfq + sub rsp,64 + + mov rax,QWORD PTR[120+r8] + mov rbx,QWORD PTR[248+r8] + + mov rsi,QWORD PTR[8+r9] + mov r11,QWORD PTR[56+r9] + + mov r10d,DWORD PTR[r11] + lea r10,QWORD PTR[r10*1+rsi] + cmp rbx,r10 + jb $L$common_seh_tail + + mov rax,QWORD PTR[152+r8] + + mov r10d,DWORD PTR[4+r11] + lea r10,QWORD PTR[r10*1+rsi] + cmp rbx,r10 + jae $L$common_seh_tail + + mov rax,QWORD PTR[160+r8] + lea rsi,QWORD PTR[((-160))+rax] + lea rdi,QWORD PTR[512+r8] + mov ecx,20 + DD 0a548f3fch + + jmp $L$common_rbp_tail +ctr_xts_se_handler ENDP + +ALIGN 16 +cbc_se_handler PROC PRIVATE + push rsi + push rdi + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + pushfq + sub rsp,64 + + mov rax,QWORD PTR[152+r8] + mov rbx,QWORD PTR[248+r8] + + lea r10,QWORD PTR[$L$cbc_decrypt] + cmp rbx,r10 + jb $L$common_seh_tail + + lea r10,QWORD PTR[$L$cbc_decrypt_body] + cmp rbx,r10 + jb $L$restore_cbc_rax + + lea r10,QWORD PTR[$L$cbc_ret] + cmp rbx,r10 + jae $L$common_seh_tail + + lea rsi,QWORD PTR[16+rax] + lea rdi,QWORD PTR[512+r8] + mov ecx,20 + DD 0a548f3fch + +$L$common_rbp_tail:: + mov rax,QWORD PTR[160+r8] + mov rbp,QWORD PTR[rax] + lea rax,QWORD PTR[8+rax] + mov QWORD PTR[160+r8],rbp + jmp $L$common_seh_tail + +$L$restore_cbc_rax:: + mov rax,QWORD PTR[120+r8] + +$L$common_seh_tail:: + mov rdi,QWORD PTR[8+rax] + mov rsi,QWORD PTR[16+rax] + mov QWORD PTR[152+r8],rax + mov QWORD PTR[168+r8],rsi + mov QWORD PTR[176+r8],rdi + + mov rdi,QWORD PTR[40+r9] + mov rsi,r8 + mov ecx,154 + DD 0a548f3fch + + mov rsi,r9 + xor rcx,rcx + mov rdx,QWORD PTR[8+rsi] + mov r8,QWORD PTR[rsi] + mov r9,QWORD PTR[16+rsi] + mov r10,QWORD PTR[40+rsi] + lea r11,QWORD PTR[56+rsi] + lea r12,QWORD PTR[24+rsi] + mov QWORD PTR[32+rsp],r10 + mov QWORD PTR[40+rsp],r11 + mov QWORD PTR[48+rsp],r12 + mov QWORD PTR[56+rsp],rcx + call QWORD PTR[__imp_RtlVirtualUnwind] + + mov eax,1 + add rsp,64 + popfq + pop r15 + pop r14 + pop r13 + pop r12 + pop rbp + pop rbx + pop rdi + pop rsi + DB 0F3h,0C3h ;repret +cbc_se_handler ENDP + +.text$ ENDS +.pdata SEGMENT READONLY ALIGN(4) +ALIGN 4 + DD imagerel $L$SEH_begin_aesni_ecb_encrypt + DD imagerel $L$SEH_end_aesni_ecb_encrypt + DD imagerel $L$SEH_info_ecb + + DD imagerel $L$SEH_begin_aesni_ccm64_encrypt_blocks + DD imagerel $L$SEH_end_aesni_ccm64_encrypt_blocks + DD imagerel $L$SEH_info_ccm64_enc + + DD imagerel $L$SEH_begin_aesni_ccm64_decrypt_blocks + DD imagerel $L$SEH_end_aesni_ccm64_decrypt_blocks + DD imagerel $L$SEH_info_ccm64_dec + + DD imagerel $L$SEH_begin_aesni_ctr32_encrypt_blocks + DD imagerel $L$SEH_end_aesni_ctr32_encrypt_blocks + DD imagerel $L$SEH_info_ctr32 + + DD imagerel $L$SEH_begin_aesni_xts_encrypt + DD imagerel $L$SEH_end_aesni_xts_encrypt + DD imagerel $L$SEH_info_xts_enc + + DD imagerel $L$SEH_begin_aesni_xts_decrypt + DD imagerel $L$SEH_end_aesni_xts_decrypt + DD imagerel $L$SEH_info_xts_dec + DD imagerel $L$SEH_begin_aesni_cbc_encrypt + DD imagerel $L$SEH_end_aesni_cbc_encrypt + DD imagerel $L$SEH_info_cbc + + DD imagerel aesni_set_decrypt_key + DD imagerel $L$SEH_end_set_decrypt_key + DD imagerel $L$SEH_info_key + + DD imagerel aesni_set_encrypt_key + DD imagerel $L$SEH_end_set_encrypt_key + DD imagerel $L$SEH_info_key +.pdata ENDS +.xdata SEGMENT READONLY ALIGN(8) +ALIGN 8 +$L$SEH_info_ecb:: +DB 9,0,0,0 + DD imagerel ecb_ccm64_se_handler + DD imagerel $L$ecb_enc_body,imagerel $L$ecb_enc_ret +$L$SEH_info_ccm64_enc:: +DB 9,0,0,0 + DD imagerel ecb_ccm64_se_handler + DD imagerel $L$ccm64_enc_body,imagerel $L$ccm64_enc_ret +$L$SEH_info_ccm64_dec:: +DB 9,0,0,0 + DD imagerel ecb_ccm64_se_handler + DD imagerel $L$ccm64_dec_body,imagerel $L$ccm64_dec_ret +$L$SEH_info_ctr32:: +DB 9,0,0,0 + DD imagerel ctr_xts_se_handler + DD imagerel $L$ctr32_body,imagerel $L$ctr32_epilogue +$L$SEH_info_xts_enc:: +DB 9,0,0,0 + DD imagerel ctr_xts_se_handler + DD imagerel $L$xts_enc_body,imagerel $L$xts_enc_epilogue +$L$SEH_info_xts_dec:: +DB 9,0,0,0 + DD imagerel ctr_xts_se_handler + DD imagerel $L$xts_dec_body,imagerel $L$xts_dec_epilogue +$L$SEH_info_cbc:: +DB 9,0,0,0 + DD imagerel cbc_se_handler +$L$SEH_info_key:: +DB 001h,004h,001h,000h +DB 004h,002h,000h,000h + +.xdata ENDS +END diff --git a/deps/openssl/asm_obsolete/x64-win32-masm/aes/bsaes-x86_64.asm b/deps/openssl/asm_obsolete/x64-win32-masm/aes/bsaes-x86_64.asm new file mode 100644 index 00000000000000..3346a7e25b4657 --- /dev/null +++ b/deps/openssl/asm_obsolete/x64-win32-masm/aes/bsaes-x86_64.asm @@ -0,0 +1,2734 @@ +OPTION DOTNAME +.text$ SEGMENT ALIGN(256) 'CODE' + +EXTERN asm_AES_encrypt:NEAR +EXTERN asm_AES_decrypt:NEAR + + +ALIGN 64 +_bsaes_encrypt8 PROC PRIVATE + lea r11,QWORD PTR[$L$BS0] + + movdqa xmm8,XMMWORD PTR[rax] + lea rax,QWORD PTR[16+rax] + movdqa xmm7,XMMWORD PTR[80+r11] + pxor xmm15,xmm8 + pxor xmm0,xmm8 + pxor xmm1,xmm8 + pxor xmm2,xmm8 +DB 102,68,15,56,0,255 +DB 102,15,56,0,199 + pxor xmm3,xmm8 + pxor xmm4,xmm8 +DB 102,15,56,0,207 +DB 102,15,56,0,215 + pxor xmm5,xmm8 + pxor xmm6,xmm8 +DB 102,15,56,0,223 +DB 102,15,56,0,231 +DB 102,15,56,0,239 +DB 102,15,56,0,247 +_bsaes_encrypt8_bitslice:: + movdqa xmm7,XMMWORD PTR[r11] + movdqa xmm8,XMMWORD PTR[16+r11] + movdqa xmm9,xmm5 + psrlq xmm5,1 + movdqa xmm10,xmm3 + psrlq xmm3,1 + pxor xmm5,xmm6 + pxor xmm3,xmm4 + pand xmm5,xmm7 + pand xmm3,xmm7 + pxor xmm6,xmm5 + psllq xmm5,1 + pxor xmm4,xmm3 + psllq xmm3,1 + pxor xmm5,xmm9 + pxor xmm3,xmm10 + movdqa xmm9,xmm1 + psrlq xmm1,1 + movdqa xmm10,xmm15 + psrlq xmm15,1 + pxor xmm1,xmm2 + pxor xmm15,xmm0 + pand xmm1,xmm7 + pand xmm15,xmm7 + pxor xmm2,xmm1 + psllq xmm1,1 + pxor xmm0,xmm15 + psllq xmm15,1 + pxor xmm1,xmm9 + pxor xmm15,xmm10 + movdqa xmm7,XMMWORD PTR[32+r11] + movdqa xmm9,xmm4 + psrlq xmm4,2 + movdqa xmm10,xmm3 + psrlq xmm3,2 + pxor xmm4,xmm6 + pxor xmm3,xmm5 + pand xmm4,xmm8 + pand xmm3,xmm8 + pxor xmm6,xmm4 + psllq xmm4,2 + pxor xmm5,xmm3 + psllq xmm3,2 + pxor xmm4,xmm9 + pxor xmm3,xmm10 + movdqa xmm9,xmm0 + psrlq xmm0,2 + movdqa xmm10,xmm15 + psrlq xmm15,2 + pxor xmm0,xmm2 + pxor xmm15,xmm1 + pand xmm0,xmm8 + pand xmm15,xmm8 + pxor xmm2,xmm0 + psllq xmm0,2 + pxor xmm1,xmm15 + psllq xmm15,2 + pxor xmm0,xmm9 + pxor xmm15,xmm10 + movdqa xmm9,xmm2 + psrlq xmm2,4 + movdqa xmm10,xmm1 + psrlq xmm1,4 + pxor xmm2,xmm6 + pxor xmm1,xmm5 + pand xmm2,xmm7 + pand xmm1,xmm7 + pxor xmm6,xmm2 + psllq xmm2,4 + pxor xmm5,xmm1 + psllq xmm1,4 + pxor xmm2,xmm9 + pxor xmm1,xmm10 + movdqa xmm9,xmm0 + psrlq xmm0,4 + movdqa xmm10,xmm15 + psrlq xmm15,4 + pxor xmm0,xmm4 + pxor xmm15,xmm3 + pand xmm0,xmm7 + pand xmm15,xmm7 + pxor xmm4,xmm0 + psllq xmm0,4 + pxor xmm3,xmm15 + psllq xmm15,4 + pxor xmm0,xmm9 + pxor xmm15,xmm10 + dec r10d + jmp $L$enc_sbox +ALIGN 16 +$L$enc_loop:: + pxor xmm15,XMMWORD PTR[rax] + pxor xmm0,XMMWORD PTR[16+rax] + pxor xmm1,XMMWORD PTR[32+rax] + pxor xmm2,XMMWORD PTR[48+rax] +DB 102,68,15,56,0,255 +DB 102,15,56,0,199 + pxor xmm3,XMMWORD PTR[64+rax] + pxor xmm4,XMMWORD PTR[80+rax] +DB 102,15,56,0,207 +DB 102,15,56,0,215 + pxor xmm5,XMMWORD PTR[96+rax] + pxor xmm6,XMMWORD PTR[112+rax] +DB 102,15,56,0,223 +DB 102,15,56,0,231 +DB 102,15,56,0,239 +DB 102,15,56,0,247 + lea rax,QWORD PTR[128+rax] +$L$enc_sbox:: + pxor xmm4,xmm5 + pxor xmm1,xmm0 + pxor xmm2,xmm15 + pxor xmm5,xmm1 + pxor xmm4,xmm15 + + pxor xmm5,xmm2 + pxor xmm2,xmm6 + pxor xmm6,xmm4 + pxor xmm2,xmm3 + pxor xmm3,xmm4 + pxor xmm2,xmm0 + + pxor xmm1,xmm6 + pxor xmm0,xmm4 + movdqa xmm10,xmm6 + movdqa xmm9,xmm0 + movdqa xmm8,xmm4 + movdqa xmm12,xmm1 + movdqa xmm11,xmm5 + + pxor xmm10,xmm3 + pxor xmm9,xmm1 + pxor xmm8,xmm2 + movdqa xmm13,xmm10 + pxor xmm12,xmm3 + movdqa xmm7,xmm9 + pxor xmm11,xmm15 + movdqa xmm14,xmm10 + + por xmm9,xmm8 + por xmm10,xmm11 + pxor xmm14,xmm7 + pand xmm13,xmm11 + pxor xmm11,xmm8 + pand xmm7,xmm8 + pand xmm14,xmm11 + movdqa xmm11,xmm2 + pxor xmm11,xmm15 + pand xmm12,xmm11 + pxor xmm10,xmm12 + pxor xmm9,xmm12 + movdqa xmm12,xmm6 + movdqa xmm11,xmm4 + pxor xmm12,xmm0 + pxor xmm11,xmm5 + movdqa xmm8,xmm12 + pand xmm12,xmm11 + por xmm8,xmm11 + pxor xmm7,xmm12 + pxor xmm10,xmm14 + pxor xmm9,xmm13 + pxor xmm8,xmm14 + movdqa xmm11,xmm1 + pxor xmm7,xmm13 + movdqa xmm12,xmm3 + pxor xmm8,xmm13 + movdqa xmm13,xmm0 + pand xmm11,xmm2 + movdqa xmm14,xmm6 + pand xmm12,xmm15 + pand xmm13,xmm4 + por xmm14,xmm5 + pxor xmm10,xmm11 + pxor xmm9,xmm12 + pxor xmm8,xmm13 + pxor xmm7,xmm14 + + + + + + movdqa xmm11,xmm10 + pand xmm10,xmm8 + pxor xmm11,xmm9 + + movdqa xmm13,xmm7 + movdqa xmm14,xmm11 + pxor xmm13,xmm10 + pand xmm14,xmm13 + + movdqa xmm12,xmm8 + pxor xmm14,xmm9 + pxor xmm12,xmm7 + + pxor xmm10,xmm9 + + pand xmm12,xmm10 + + movdqa xmm9,xmm13 + pxor xmm12,xmm7 + + pxor xmm9,xmm12 + pxor xmm8,xmm12 + + pand xmm9,xmm7 + + pxor xmm13,xmm9 + pxor xmm8,xmm9 + + pand xmm13,xmm14 + + pxor xmm13,xmm11 + movdqa xmm11,xmm5 + movdqa xmm7,xmm4 + movdqa xmm9,xmm14 + pxor xmm9,xmm13 + pand xmm9,xmm5 + pxor xmm5,xmm4 + pand xmm4,xmm14 + pand xmm5,xmm13 + pxor xmm5,xmm4 + pxor xmm4,xmm9 + pxor xmm11,xmm15 + pxor xmm7,xmm2 + pxor xmm14,xmm12 + pxor xmm13,xmm8 + movdqa xmm10,xmm14 + movdqa xmm9,xmm12 + pxor xmm10,xmm13 + pxor xmm9,xmm8 + pand xmm10,xmm11 + pand xmm9,xmm15 + pxor xmm11,xmm7 + pxor xmm15,xmm2 + pand xmm7,xmm14 + pand xmm2,xmm12 + pand xmm11,xmm13 + pand xmm15,xmm8 + pxor xmm7,xmm11 + pxor xmm15,xmm2 + pxor xmm11,xmm10 + pxor xmm2,xmm9 + pxor xmm5,xmm11 + pxor xmm15,xmm11 + pxor xmm4,xmm7 + pxor xmm2,xmm7 + + movdqa xmm11,xmm6 + movdqa xmm7,xmm0 + pxor xmm11,xmm3 + pxor xmm7,xmm1 + movdqa xmm10,xmm14 + movdqa xmm9,xmm12 + pxor xmm10,xmm13 + pxor xmm9,xmm8 + pand xmm10,xmm11 + pand xmm9,xmm3 + pxor xmm11,xmm7 + pxor xmm3,xmm1 + pand xmm7,xmm14 + pand xmm1,xmm12 + pand xmm11,xmm13 + pand xmm3,xmm8 + pxor xmm7,xmm11 + pxor xmm3,xmm1 + pxor xmm11,xmm10 + pxor xmm1,xmm9 + pxor xmm14,xmm12 + pxor xmm13,xmm8 + movdqa xmm10,xmm14 + pxor xmm10,xmm13 + pand xmm10,xmm6 + pxor xmm6,xmm0 + pand xmm0,xmm14 + pand xmm6,xmm13 + pxor xmm6,xmm0 + pxor xmm0,xmm10 + pxor xmm6,xmm11 + pxor xmm3,xmm11 + pxor xmm0,xmm7 + pxor xmm1,xmm7 + pxor xmm6,xmm15 + pxor xmm0,xmm5 + pxor xmm3,xmm6 + pxor xmm5,xmm15 + pxor xmm15,xmm0 + + pxor xmm0,xmm4 + pxor xmm4,xmm1 + pxor xmm1,xmm2 + pxor xmm2,xmm4 + pxor xmm3,xmm4 + + pxor xmm5,xmm2 + dec r10d + jl $L$enc_done + pshufd xmm7,xmm15,093h + pshufd xmm8,xmm0,093h + pxor xmm15,xmm7 + pshufd xmm9,xmm3,093h + pxor xmm0,xmm8 + pshufd xmm10,xmm5,093h + pxor xmm3,xmm9 + pshufd xmm11,xmm2,093h + pxor xmm5,xmm10 + pshufd xmm12,xmm6,093h + pxor xmm2,xmm11 + pshufd xmm13,xmm1,093h + pxor xmm6,xmm12 + pshufd xmm14,xmm4,093h + pxor xmm1,xmm13 + pxor xmm4,xmm14 + + pxor xmm8,xmm15 + pxor xmm7,xmm4 + pxor xmm8,xmm4 + pshufd xmm15,xmm15,04Eh + pxor xmm9,xmm0 + pshufd xmm0,xmm0,04Eh + pxor xmm12,xmm2 + pxor xmm15,xmm7 + pxor xmm13,xmm6 + pxor xmm0,xmm8 + pxor xmm11,xmm5 + pshufd xmm7,xmm2,04Eh + pxor xmm14,xmm1 + pshufd xmm8,xmm6,04Eh + pxor xmm10,xmm3 + pshufd xmm2,xmm5,04Eh + pxor xmm10,xmm4 + pshufd xmm6,xmm4,04Eh + pxor xmm11,xmm4 + pshufd xmm5,xmm1,04Eh + pxor xmm7,xmm11 + pshufd xmm1,xmm3,04Eh + pxor xmm8,xmm12 + pxor xmm2,xmm10 + pxor xmm6,xmm14 + pxor xmm5,xmm13 + movdqa xmm3,xmm7 + pxor xmm1,xmm9 + movdqa xmm4,xmm8 + movdqa xmm7,XMMWORD PTR[48+r11] + jnz $L$enc_loop + movdqa xmm7,XMMWORD PTR[64+r11] + jmp $L$enc_loop +ALIGN 16 +$L$enc_done:: + movdqa xmm7,XMMWORD PTR[r11] + movdqa xmm8,XMMWORD PTR[16+r11] + movdqa xmm9,xmm1 + psrlq xmm1,1 + movdqa xmm10,xmm2 + psrlq xmm2,1 + pxor xmm1,xmm4 + pxor xmm2,xmm6 + pand xmm1,xmm7 + pand xmm2,xmm7 + pxor xmm4,xmm1 + psllq xmm1,1 + pxor xmm6,xmm2 + psllq xmm2,1 + pxor xmm1,xmm9 + pxor xmm2,xmm10 + movdqa xmm9,xmm3 + psrlq xmm3,1 + movdqa xmm10,xmm15 + psrlq xmm15,1 + pxor xmm3,xmm5 + pxor xmm15,xmm0 + pand xmm3,xmm7 + pand xmm15,xmm7 + pxor xmm5,xmm3 + psllq xmm3,1 + pxor xmm0,xmm15 + psllq xmm15,1 + pxor xmm3,xmm9 + pxor xmm15,xmm10 + movdqa xmm7,XMMWORD PTR[32+r11] + movdqa xmm9,xmm6 + psrlq xmm6,2 + movdqa xmm10,xmm2 + psrlq xmm2,2 + pxor xmm6,xmm4 + pxor xmm2,xmm1 + pand xmm6,xmm8 + pand xmm2,xmm8 + pxor xmm4,xmm6 + psllq xmm6,2 + pxor xmm1,xmm2 + psllq xmm2,2 + pxor xmm6,xmm9 + pxor xmm2,xmm10 + movdqa xmm9,xmm0 + psrlq xmm0,2 + movdqa xmm10,xmm15 + psrlq xmm15,2 + pxor xmm0,xmm5 + pxor xmm15,xmm3 + pand xmm0,xmm8 + pand xmm15,xmm8 + pxor xmm5,xmm0 + psllq xmm0,2 + pxor xmm3,xmm15 + psllq xmm15,2 + pxor xmm0,xmm9 + pxor xmm15,xmm10 + movdqa xmm9,xmm5 + psrlq xmm5,4 + movdqa xmm10,xmm3 + psrlq xmm3,4 + pxor xmm5,xmm4 + pxor xmm3,xmm1 + pand xmm5,xmm7 + pand xmm3,xmm7 + pxor xmm4,xmm5 + psllq xmm5,4 + pxor xmm1,xmm3 + psllq xmm3,4 + pxor xmm5,xmm9 + pxor xmm3,xmm10 + movdqa xmm9,xmm0 + psrlq xmm0,4 + movdqa xmm10,xmm15 + psrlq xmm15,4 + pxor xmm0,xmm6 + pxor xmm15,xmm2 + pand xmm0,xmm7 + pand xmm15,xmm7 + pxor xmm6,xmm0 + psllq xmm0,4 + pxor xmm2,xmm15 + psllq xmm15,4 + pxor xmm0,xmm9 + pxor xmm15,xmm10 + movdqa xmm7,XMMWORD PTR[rax] + pxor xmm3,xmm7 + pxor xmm5,xmm7 + pxor xmm2,xmm7 + pxor xmm6,xmm7 + pxor xmm1,xmm7 + pxor xmm4,xmm7 + pxor xmm15,xmm7 + pxor xmm0,xmm7 + DB 0F3h,0C3h ;repret +_bsaes_encrypt8 ENDP + + +ALIGN 64 +_bsaes_decrypt8 PROC PRIVATE + lea r11,QWORD PTR[$L$BS0] + + movdqa xmm8,XMMWORD PTR[rax] + lea rax,QWORD PTR[16+rax] + movdqa xmm7,XMMWORD PTR[((-48))+r11] + pxor xmm15,xmm8 + pxor xmm0,xmm8 + pxor xmm1,xmm8 + pxor xmm2,xmm8 +DB 102,68,15,56,0,255 +DB 102,15,56,0,199 + pxor xmm3,xmm8 + pxor xmm4,xmm8 +DB 102,15,56,0,207 +DB 102,15,56,0,215 + pxor xmm5,xmm8 + pxor xmm6,xmm8 +DB 102,15,56,0,223 +DB 102,15,56,0,231 +DB 102,15,56,0,239 +DB 102,15,56,0,247 + movdqa xmm7,XMMWORD PTR[r11] + movdqa xmm8,XMMWORD PTR[16+r11] + movdqa xmm9,xmm5 + psrlq xmm5,1 + movdqa xmm10,xmm3 + psrlq xmm3,1 + pxor xmm5,xmm6 + pxor xmm3,xmm4 + pand xmm5,xmm7 + pand xmm3,xmm7 + pxor xmm6,xmm5 + psllq xmm5,1 + pxor xmm4,xmm3 + psllq xmm3,1 + pxor xmm5,xmm9 + pxor xmm3,xmm10 + movdqa xmm9,xmm1 + psrlq xmm1,1 + movdqa xmm10,xmm15 + psrlq xmm15,1 + pxor xmm1,xmm2 + pxor xmm15,xmm0 + pand xmm1,xmm7 + pand xmm15,xmm7 + pxor xmm2,xmm1 + psllq xmm1,1 + pxor xmm0,xmm15 + psllq xmm15,1 + pxor xmm1,xmm9 + pxor xmm15,xmm10 + movdqa xmm7,XMMWORD PTR[32+r11] + movdqa xmm9,xmm4 + psrlq xmm4,2 + movdqa xmm10,xmm3 + psrlq xmm3,2 + pxor xmm4,xmm6 + pxor xmm3,xmm5 + pand xmm4,xmm8 + pand xmm3,xmm8 + pxor xmm6,xmm4 + psllq xmm4,2 + pxor xmm5,xmm3 + psllq xmm3,2 + pxor xmm4,xmm9 + pxor xmm3,xmm10 + movdqa xmm9,xmm0 + psrlq xmm0,2 + movdqa xmm10,xmm15 + psrlq xmm15,2 + pxor xmm0,xmm2 + pxor xmm15,xmm1 + pand xmm0,xmm8 + pand xmm15,xmm8 + pxor xmm2,xmm0 + psllq xmm0,2 + pxor xmm1,xmm15 + psllq xmm15,2 + pxor xmm0,xmm9 + pxor xmm15,xmm10 + movdqa xmm9,xmm2 + psrlq xmm2,4 + movdqa xmm10,xmm1 + psrlq xmm1,4 + pxor xmm2,xmm6 + pxor xmm1,xmm5 + pand xmm2,xmm7 + pand xmm1,xmm7 + pxor xmm6,xmm2 + psllq xmm2,4 + pxor xmm5,xmm1 + psllq xmm1,4 + pxor xmm2,xmm9 + pxor xmm1,xmm10 + movdqa xmm9,xmm0 + psrlq xmm0,4 + movdqa xmm10,xmm15 + psrlq xmm15,4 + pxor xmm0,xmm4 + pxor xmm15,xmm3 + pand xmm0,xmm7 + pand xmm15,xmm7 + pxor xmm4,xmm0 + psllq xmm0,4 + pxor xmm3,xmm15 + psllq xmm15,4 + pxor xmm0,xmm9 + pxor xmm15,xmm10 + dec r10d + jmp $L$dec_sbox +ALIGN 16 +$L$dec_loop:: + pxor xmm15,XMMWORD PTR[rax] + pxor xmm0,XMMWORD PTR[16+rax] + pxor xmm1,XMMWORD PTR[32+rax] + pxor xmm2,XMMWORD PTR[48+rax] +DB 102,68,15,56,0,255 +DB 102,15,56,0,199 + pxor xmm3,XMMWORD PTR[64+rax] + pxor xmm4,XMMWORD PTR[80+rax] +DB 102,15,56,0,207 +DB 102,15,56,0,215 + pxor xmm5,XMMWORD PTR[96+rax] + pxor xmm6,XMMWORD PTR[112+rax] +DB 102,15,56,0,223 +DB 102,15,56,0,231 +DB 102,15,56,0,239 +DB 102,15,56,0,247 + lea rax,QWORD PTR[128+rax] +$L$dec_sbox:: + pxor xmm2,xmm3 + + pxor xmm3,xmm6 + pxor xmm1,xmm6 + pxor xmm5,xmm3 + pxor xmm6,xmm5 + pxor xmm0,xmm6 + + pxor xmm15,xmm0 + pxor xmm1,xmm4 + pxor xmm2,xmm15 + pxor xmm4,xmm15 + pxor xmm0,xmm2 + movdqa xmm10,xmm2 + movdqa xmm9,xmm6 + movdqa xmm8,xmm0 + movdqa xmm12,xmm3 + movdqa xmm11,xmm4 + + pxor xmm10,xmm15 + pxor xmm9,xmm3 + pxor xmm8,xmm5 + movdqa xmm13,xmm10 + pxor xmm12,xmm15 + movdqa xmm7,xmm9 + pxor xmm11,xmm1 + movdqa xmm14,xmm10 + + por xmm9,xmm8 + por xmm10,xmm11 + pxor xmm14,xmm7 + pand xmm13,xmm11 + pxor xmm11,xmm8 + pand xmm7,xmm8 + pand xmm14,xmm11 + movdqa xmm11,xmm5 + pxor xmm11,xmm1 + pand xmm12,xmm11 + pxor xmm10,xmm12 + pxor xmm9,xmm12 + movdqa xmm12,xmm2 + movdqa xmm11,xmm0 + pxor xmm12,xmm6 + pxor xmm11,xmm4 + movdqa xmm8,xmm12 + pand xmm12,xmm11 + por xmm8,xmm11 + pxor xmm7,xmm12 + pxor xmm10,xmm14 + pxor xmm9,xmm13 + pxor xmm8,xmm14 + movdqa xmm11,xmm3 + pxor xmm7,xmm13 + movdqa xmm12,xmm15 + pxor xmm8,xmm13 + movdqa xmm13,xmm6 + pand xmm11,xmm5 + movdqa xmm14,xmm2 + pand xmm12,xmm1 + pand xmm13,xmm0 + por xmm14,xmm4 + pxor xmm10,xmm11 + pxor xmm9,xmm12 + pxor xmm8,xmm13 + pxor xmm7,xmm14 + + + + + + movdqa xmm11,xmm10 + pand xmm10,xmm8 + pxor xmm11,xmm9 + + movdqa xmm13,xmm7 + movdqa xmm14,xmm11 + pxor xmm13,xmm10 + pand xmm14,xmm13 + + movdqa xmm12,xmm8 + pxor xmm14,xmm9 + pxor xmm12,xmm7 + + pxor xmm10,xmm9 + + pand xmm12,xmm10 + + movdqa xmm9,xmm13 + pxor xmm12,xmm7 + + pxor xmm9,xmm12 + pxor xmm8,xmm12 + + pand xmm9,xmm7 + + pxor xmm13,xmm9 + pxor xmm8,xmm9 + + pand xmm13,xmm14 + + pxor xmm13,xmm11 + movdqa xmm11,xmm4 + movdqa xmm7,xmm0 + movdqa xmm9,xmm14 + pxor xmm9,xmm13 + pand xmm9,xmm4 + pxor xmm4,xmm0 + pand xmm0,xmm14 + pand xmm4,xmm13 + pxor xmm4,xmm0 + pxor xmm0,xmm9 + pxor xmm11,xmm1 + pxor xmm7,xmm5 + pxor xmm14,xmm12 + pxor xmm13,xmm8 + movdqa xmm10,xmm14 + movdqa xmm9,xmm12 + pxor xmm10,xmm13 + pxor xmm9,xmm8 + pand xmm10,xmm11 + pand xmm9,xmm1 + pxor xmm11,xmm7 + pxor xmm1,xmm5 + pand xmm7,xmm14 + pand xmm5,xmm12 + pand xmm11,xmm13 + pand xmm1,xmm8 + pxor xmm7,xmm11 + pxor xmm1,xmm5 + pxor xmm11,xmm10 + pxor xmm5,xmm9 + pxor xmm4,xmm11 + pxor xmm1,xmm11 + pxor xmm0,xmm7 + pxor xmm5,xmm7 + + movdqa xmm11,xmm2 + movdqa xmm7,xmm6 + pxor xmm11,xmm15 + pxor xmm7,xmm3 + movdqa xmm10,xmm14 + movdqa xmm9,xmm12 + pxor xmm10,xmm13 + pxor xmm9,xmm8 + pand xmm10,xmm11 + pand xmm9,xmm15 + pxor xmm11,xmm7 + pxor xmm15,xmm3 + pand xmm7,xmm14 + pand xmm3,xmm12 + pand xmm11,xmm13 + pand xmm15,xmm8 + pxor xmm7,xmm11 + pxor xmm15,xmm3 + pxor xmm11,xmm10 + pxor xmm3,xmm9 + pxor xmm14,xmm12 + pxor xmm13,xmm8 + movdqa xmm10,xmm14 + pxor xmm10,xmm13 + pand xmm10,xmm2 + pxor xmm2,xmm6 + pand xmm6,xmm14 + pand xmm2,xmm13 + pxor xmm2,xmm6 + pxor xmm6,xmm10 + pxor xmm2,xmm11 + pxor xmm15,xmm11 + pxor xmm6,xmm7 + pxor xmm3,xmm7 + pxor xmm0,xmm6 + pxor xmm5,xmm4 + + pxor xmm3,xmm0 + pxor xmm1,xmm6 + pxor xmm4,xmm6 + pxor xmm3,xmm1 + pxor xmm6,xmm15 + pxor xmm3,xmm4 + pxor xmm2,xmm5 + pxor xmm5,xmm0 + pxor xmm2,xmm3 + + pxor xmm3,xmm15 + pxor xmm6,xmm2 + dec r10d + jl $L$dec_done + + pshufd xmm7,xmm15,04Eh + pshufd xmm13,xmm2,04Eh + pxor xmm7,xmm15 + pshufd xmm14,xmm4,04Eh + pxor xmm13,xmm2 + pshufd xmm8,xmm0,04Eh + pxor xmm14,xmm4 + pshufd xmm9,xmm5,04Eh + pxor xmm8,xmm0 + pshufd xmm10,xmm3,04Eh + pxor xmm9,xmm5 + pxor xmm15,xmm13 + pxor xmm0,xmm13 + pshufd xmm11,xmm1,04Eh + pxor xmm10,xmm3 + pxor xmm5,xmm7 + pxor xmm3,xmm8 + pshufd xmm12,xmm6,04Eh + pxor xmm11,xmm1 + pxor xmm0,xmm14 + pxor xmm1,xmm9 + pxor xmm12,xmm6 + + pxor xmm5,xmm14 + pxor xmm3,xmm13 + pxor xmm1,xmm13 + pxor xmm6,xmm10 + pxor xmm2,xmm11 + pxor xmm1,xmm14 + pxor xmm6,xmm14 + pxor xmm4,xmm12 + pshufd xmm7,xmm15,093h + pshufd xmm8,xmm0,093h + pxor xmm15,xmm7 + pshufd xmm9,xmm5,093h + pxor xmm0,xmm8 + pshufd xmm10,xmm3,093h + pxor xmm5,xmm9 + pshufd xmm11,xmm1,093h + pxor xmm3,xmm10 + pshufd xmm12,xmm6,093h + pxor xmm1,xmm11 + pshufd xmm13,xmm2,093h + pxor xmm6,xmm12 + pshufd xmm14,xmm4,093h + pxor xmm2,xmm13 + pxor xmm4,xmm14 + + pxor xmm8,xmm15 + pxor xmm7,xmm4 + pxor xmm8,xmm4 + pshufd xmm15,xmm15,04Eh + pxor xmm9,xmm0 + pshufd xmm0,xmm0,04Eh + pxor xmm12,xmm1 + pxor xmm15,xmm7 + pxor xmm13,xmm6 + pxor xmm0,xmm8 + pxor xmm11,xmm3 + pshufd xmm7,xmm1,04Eh + pxor xmm14,xmm2 + pshufd xmm8,xmm6,04Eh + pxor xmm10,xmm5 + pshufd xmm1,xmm3,04Eh + pxor xmm10,xmm4 + pshufd xmm6,xmm4,04Eh + pxor xmm11,xmm4 + pshufd xmm3,xmm2,04Eh + pxor xmm7,xmm11 + pshufd xmm2,xmm5,04Eh + pxor xmm8,xmm12 + pxor xmm10,xmm1 + pxor xmm6,xmm14 + pxor xmm13,xmm3 + movdqa xmm3,xmm7 + pxor xmm2,xmm9 + movdqa xmm5,xmm13 + movdqa xmm4,xmm8 + movdqa xmm1,xmm2 + movdqa xmm2,xmm10 + movdqa xmm7,XMMWORD PTR[((-16))+r11] + jnz $L$dec_loop + movdqa xmm7,XMMWORD PTR[((-32))+r11] + jmp $L$dec_loop +ALIGN 16 +$L$dec_done:: + movdqa xmm7,XMMWORD PTR[r11] + movdqa xmm8,XMMWORD PTR[16+r11] + movdqa xmm9,xmm2 + psrlq xmm2,1 + movdqa xmm10,xmm1 + psrlq xmm1,1 + pxor xmm2,xmm4 + pxor xmm1,xmm6 + pand xmm2,xmm7 + pand xmm1,xmm7 + pxor xmm4,xmm2 + psllq xmm2,1 + pxor xmm6,xmm1 + psllq xmm1,1 + pxor xmm2,xmm9 + pxor xmm1,xmm10 + movdqa xmm9,xmm5 + psrlq xmm5,1 + movdqa xmm10,xmm15 + psrlq xmm15,1 + pxor xmm5,xmm3 + pxor xmm15,xmm0 + pand xmm5,xmm7 + pand xmm15,xmm7 + pxor xmm3,xmm5 + psllq xmm5,1 + pxor xmm0,xmm15 + psllq xmm15,1 + pxor xmm5,xmm9 + pxor xmm15,xmm10 + movdqa xmm7,XMMWORD PTR[32+r11] + movdqa xmm9,xmm6 + psrlq xmm6,2 + movdqa xmm10,xmm1 + psrlq xmm1,2 + pxor xmm6,xmm4 + pxor xmm1,xmm2 + pand xmm6,xmm8 + pand xmm1,xmm8 + pxor xmm4,xmm6 + psllq xmm6,2 + pxor xmm2,xmm1 + psllq xmm1,2 + pxor xmm6,xmm9 + pxor xmm1,xmm10 + movdqa xmm9,xmm0 + psrlq xmm0,2 + movdqa xmm10,xmm15 + psrlq xmm15,2 + pxor xmm0,xmm3 + pxor xmm15,xmm5 + pand xmm0,xmm8 + pand xmm15,xmm8 + pxor xmm3,xmm0 + psllq xmm0,2 + pxor xmm5,xmm15 + psllq xmm15,2 + pxor xmm0,xmm9 + pxor xmm15,xmm10 + movdqa xmm9,xmm3 + psrlq xmm3,4 + movdqa xmm10,xmm5 + psrlq xmm5,4 + pxor xmm3,xmm4 + pxor xmm5,xmm2 + pand xmm3,xmm7 + pand xmm5,xmm7 + pxor xmm4,xmm3 + psllq xmm3,4 + pxor xmm2,xmm5 + psllq xmm5,4 + pxor xmm3,xmm9 + pxor xmm5,xmm10 + movdqa xmm9,xmm0 + psrlq xmm0,4 + movdqa xmm10,xmm15 + psrlq xmm15,4 + pxor xmm0,xmm6 + pxor xmm15,xmm1 + pand xmm0,xmm7 + pand xmm15,xmm7 + pxor xmm6,xmm0 + psllq xmm0,4 + pxor xmm1,xmm15 + psllq xmm15,4 + pxor xmm0,xmm9 + pxor xmm15,xmm10 + movdqa xmm7,XMMWORD PTR[rax] + pxor xmm5,xmm7 + pxor xmm3,xmm7 + pxor xmm1,xmm7 + pxor xmm6,xmm7 + pxor xmm2,xmm7 + pxor xmm4,xmm7 + pxor xmm15,xmm7 + pxor xmm0,xmm7 + DB 0F3h,0C3h ;repret +_bsaes_decrypt8 ENDP + +ALIGN 16 +_bsaes_key_convert PROC PRIVATE + lea r11,QWORD PTR[$L$masks] + movdqu xmm7,XMMWORD PTR[rcx] + lea rcx,QWORD PTR[16+rcx] + movdqa xmm0,XMMWORD PTR[r11] + movdqa xmm1,XMMWORD PTR[16+r11] + movdqa xmm2,XMMWORD PTR[32+r11] + movdqa xmm3,XMMWORD PTR[48+r11] + movdqa xmm4,XMMWORD PTR[64+r11] + pcmpeqd xmm5,xmm5 + + movdqu xmm6,XMMWORD PTR[rcx] + movdqa XMMWORD PTR[rax],xmm7 + lea rax,QWORD PTR[16+rax] + dec r10d + jmp $L$key_loop +ALIGN 16 +$L$key_loop:: +DB 102,15,56,0,244 + + movdqa xmm8,xmm0 + movdqa xmm9,xmm1 + + pand xmm8,xmm6 + pand xmm9,xmm6 + movdqa xmm10,xmm2 + pcmpeqb xmm8,xmm0 + psllq xmm0,4 + movdqa xmm11,xmm3 + pcmpeqb xmm9,xmm1 + psllq xmm1,4 + + pand xmm10,xmm6 + pand xmm11,xmm6 + movdqa xmm12,xmm0 + pcmpeqb xmm10,xmm2 + psllq xmm2,4 + movdqa xmm13,xmm1 + pcmpeqb xmm11,xmm3 + psllq xmm3,4 + + movdqa xmm14,xmm2 + movdqa xmm15,xmm3 + pxor xmm8,xmm5 + pxor xmm9,xmm5 + + pand xmm12,xmm6 + pand xmm13,xmm6 + movdqa XMMWORD PTR[rax],xmm8 + pcmpeqb xmm12,xmm0 + psrlq xmm0,4 + movdqa XMMWORD PTR[16+rax],xmm9 + pcmpeqb xmm13,xmm1 + psrlq xmm1,4 + lea rcx,QWORD PTR[16+rcx] + + pand xmm14,xmm6 + pand xmm15,xmm6 + movdqa XMMWORD PTR[32+rax],xmm10 + pcmpeqb xmm14,xmm2 + psrlq xmm2,4 + movdqa XMMWORD PTR[48+rax],xmm11 + pcmpeqb xmm15,xmm3 + psrlq xmm3,4 + movdqu xmm6,XMMWORD PTR[rcx] + + pxor xmm13,xmm5 + pxor xmm14,xmm5 + movdqa XMMWORD PTR[64+rax],xmm12 + movdqa XMMWORD PTR[80+rax],xmm13 + movdqa XMMWORD PTR[96+rax],xmm14 + movdqa XMMWORD PTR[112+rax],xmm15 + lea rax,QWORD PTR[128+rax] + dec r10d + jnz $L$key_loop + + movdqa xmm7,XMMWORD PTR[80+r11] + + DB 0F3h,0C3h ;repret +_bsaes_key_convert ENDP +EXTERN asm_AES_cbc_encrypt:NEAR +PUBLIC bsaes_cbc_encrypt + +ALIGN 16 +bsaes_cbc_encrypt PROC PUBLIC + mov r11d,DWORD PTR[48+rsp] + cmp r11d,0 + jne asm_AES_cbc_encrypt + cmp r8,128 + jb asm_AES_cbc_encrypt + + mov rax,rsp +$L$cbc_dec_prologue:: + push rbp + push rbx + push r12 + push r13 + push r14 + push r15 + lea rsp,QWORD PTR[((-72))+rsp] + mov r10,QWORD PTR[160+rsp] + lea rsp,QWORD PTR[((-160))+rsp] + movaps XMMWORD PTR[64+rsp],xmm6 + movaps XMMWORD PTR[80+rsp],xmm7 + movaps XMMWORD PTR[96+rsp],xmm8 + movaps XMMWORD PTR[112+rsp],xmm9 + movaps XMMWORD PTR[128+rsp],xmm10 + movaps XMMWORD PTR[144+rsp],xmm11 + movaps XMMWORD PTR[160+rsp],xmm12 + movaps XMMWORD PTR[176+rsp],xmm13 + movaps XMMWORD PTR[192+rsp],xmm14 + movaps XMMWORD PTR[208+rsp],xmm15 +$L$cbc_dec_body:: + mov rbp,rsp + mov eax,DWORD PTR[240+r9] + mov r12,rcx + mov r13,rdx + mov r14,r8 + mov r15,r9 + mov rbx,r10 + shr r14,4 + + mov edx,eax + shl rax,7 + sub rax,96 + sub rsp,rax + + mov rax,rsp + mov rcx,r15 + mov r10d,edx + call _bsaes_key_convert + pxor xmm7,XMMWORD PTR[rsp] + movdqa XMMWORD PTR[rax],xmm6 + movdqa XMMWORD PTR[rsp],xmm7 + + movdqu xmm14,XMMWORD PTR[rbx] + sub r14,8 +$L$cbc_dec_loop:: + movdqu xmm15,XMMWORD PTR[r12] + movdqu xmm0,XMMWORD PTR[16+r12] + movdqu xmm1,XMMWORD PTR[32+r12] + movdqu xmm2,XMMWORD PTR[48+r12] + movdqu xmm3,XMMWORD PTR[64+r12] + movdqu xmm4,XMMWORD PTR[80+r12] + mov rax,rsp + movdqu xmm5,XMMWORD PTR[96+r12] + mov r10d,edx + movdqu xmm6,XMMWORD PTR[112+r12] + movdqa XMMWORD PTR[32+rbp],xmm14 + + call _bsaes_decrypt8 + + pxor xmm15,XMMWORD PTR[32+rbp] + movdqu xmm7,XMMWORD PTR[r12] + movdqu xmm8,XMMWORD PTR[16+r12] + pxor xmm0,xmm7 + movdqu xmm9,XMMWORD PTR[32+r12] + pxor xmm5,xmm8 + movdqu xmm10,XMMWORD PTR[48+r12] + pxor xmm3,xmm9 + movdqu xmm11,XMMWORD PTR[64+r12] + pxor xmm1,xmm10 + movdqu xmm12,XMMWORD PTR[80+r12] + pxor xmm6,xmm11 + movdqu xmm13,XMMWORD PTR[96+r12] + pxor xmm2,xmm12 + movdqu xmm14,XMMWORD PTR[112+r12] + pxor xmm4,xmm13 + movdqu XMMWORD PTR[r13],xmm15 + lea r12,QWORD PTR[128+r12] + movdqu XMMWORD PTR[16+r13],xmm0 + movdqu XMMWORD PTR[32+r13],xmm5 + movdqu XMMWORD PTR[48+r13],xmm3 + movdqu XMMWORD PTR[64+r13],xmm1 + movdqu XMMWORD PTR[80+r13],xmm6 + movdqu XMMWORD PTR[96+r13],xmm2 + movdqu XMMWORD PTR[112+r13],xmm4 + lea r13,QWORD PTR[128+r13] + sub r14,8 + jnc $L$cbc_dec_loop + + add r14,8 + jz $L$cbc_dec_done + + movdqu xmm15,XMMWORD PTR[r12] + mov rax,rsp + mov r10d,edx + cmp r14,2 + jb $L$cbc_dec_one + movdqu xmm0,XMMWORD PTR[16+r12] + je $L$cbc_dec_two + movdqu xmm1,XMMWORD PTR[32+r12] + cmp r14,4 + jb $L$cbc_dec_three + movdqu xmm2,XMMWORD PTR[48+r12] + je $L$cbc_dec_four + movdqu xmm3,XMMWORD PTR[64+r12] + cmp r14,6 + jb $L$cbc_dec_five + movdqu xmm4,XMMWORD PTR[80+r12] + je $L$cbc_dec_six + movdqu xmm5,XMMWORD PTR[96+r12] + movdqa XMMWORD PTR[32+rbp],xmm14 + call _bsaes_decrypt8 + pxor xmm15,XMMWORD PTR[32+rbp] + movdqu xmm7,XMMWORD PTR[r12] + movdqu xmm8,XMMWORD PTR[16+r12] + pxor xmm0,xmm7 + movdqu xmm9,XMMWORD PTR[32+r12] + pxor xmm5,xmm8 + movdqu xmm10,XMMWORD PTR[48+r12] + pxor xmm3,xmm9 + movdqu xmm11,XMMWORD PTR[64+r12] + pxor xmm1,xmm10 + movdqu xmm12,XMMWORD PTR[80+r12] + pxor xmm6,xmm11 + movdqu xmm14,XMMWORD PTR[96+r12] + pxor xmm2,xmm12 + movdqu XMMWORD PTR[r13],xmm15 + movdqu XMMWORD PTR[16+r13],xmm0 + movdqu XMMWORD PTR[32+r13],xmm5 + movdqu XMMWORD PTR[48+r13],xmm3 + movdqu XMMWORD PTR[64+r13],xmm1 + movdqu XMMWORD PTR[80+r13],xmm6 + movdqu XMMWORD PTR[96+r13],xmm2 + jmp $L$cbc_dec_done +ALIGN 16 +$L$cbc_dec_six:: + movdqa XMMWORD PTR[32+rbp],xmm14 + call _bsaes_decrypt8 + pxor xmm15,XMMWORD PTR[32+rbp] + movdqu xmm7,XMMWORD PTR[r12] + movdqu xmm8,XMMWORD PTR[16+r12] + pxor xmm0,xmm7 + movdqu xmm9,XMMWORD PTR[32+r12] + pxor xmm5,xmm8 + movdqu xmm10,XMMWORD PTR[48+r12] + pxor xmm3,xmm9 + movdqu xmm11,XMMWORD PTR[64+r12] + pxor xmm1,xmm10 + movdqu xmm14,XMMWORD PTR[80+r12] + pxor xmm6,xmm11 + movdqu XMMWORD PTR[r13],xmm15 + movdqu XMMWORD PTR[16+r13],xmm0 + movdqu XMMWORD PTR[32+r13],xmm5 + movdqu XMMWORD PTR[48+r13],xmm3 + movdqu XMMWORD PTR[64+r13],xmm1 + movdqu XMMWORD PTR[80+r13],xmm6 + jmp $L$cbc_dec_done +ALIGN 16 +$L$cbc_dec_five:: + movdqa XMMWORD PTR[32+rbp],xmm14 + call _bsaes_decrypt8 + pxor xmm15,XMMWORD PTR[32+rbp] + movdqu xmm7,XMMWORD PTR[r12] + movdqu xmm8,XMMWORD PTR[16+r12] + pxor xmm0,xmm7 + movdqu xmm9,XMMWORD PTR[32+r12] + pxor xmm5,xmm8 + movdqu xmm10,XMMWORD PTR[48+r12] + pxor xmm3,xmm9 + movdqu xmm14,XMMWORD PTR[64+r12] + pxor xmm1,xmm10 + movdqu XMMWORD PTR[r13],xmm15 + movdqu XMMWORD PTR[16+r13],xmm0 + movdqu XMMWORD PTR[32+r13],xmm5 + movdqu XMMWORD PTR[48+r13],xmm3 + movdqu XMMWORD PTR[64+r13],xmm1 + jmp $L$cbc_dec_done +ALIGN 16 +$L$cbc_dec_four:: + movdqa XMMWORD PTR[32+rbp],xmm14 + call _bsaes_decrypt8 + pxor xmm15,XMMWORD PTR[32+rbp] + movdqu xmm7,XMMWORD PTR[r12] + movdqu xmm8,XMMWORD PTR[16+r12] + pxor xmm0,xmm7 + movdqu xmm9,XMMWORD PTR[32+r12] + pxor xmm5,xmm8 + movdqu xmm14,XMMWORD PTR[48+r12] + pxor xmm3,xmm9 + movdqu XMMWORD PTR[r13],xmm15 + movdqu XMMWORD PTR[16+r13],xmm0 + movdqu XMMWORD PTR[32+r13],xmm5 + movdqu XMMWORD PTR[48+r13],xmm3 + jmp $L$cbc_dec_done +ALIGN 16 +$L$cbc_dec_three:: + movdqa XMMWORD PTR[32+rbp],xmm14 + call _bsaes_decrypt8 + pxor xmm15,XMMWORD PTR[32+rbp] + movdqu xmm7,XMMWORD PTR[r12] + movdqu xmm8,XMMWORD PTR[16+r12] + pxor xmm0,xmm7 + movdqu xmm14,XMMWORD PTR[32+r12] + pxor xmm5,xmm8 + movdqu XMMWORD PTR[r13],xmm15 + movdqu XMMWORD PTR[16+r13],xmm0 + movdqu XMMWORD PTR[32+r13],xmm5 + jmp $L$cbc_dec_done +ALIGN 16 +$L$cbc_dec_two:: + movdqa XMMWORD PTR[32+rbp],xmm14 + call _bsaes_decrypt8 + pxor xmm15,XMMWORD PTR[32+rbp] + movdqu xmm7,XMMWORD PTR[r12] + movdqu xmm14,XMMWORD PTR[16+r12] + pxor xmm0,xmm7 + movdqu XMMWORD PTR[r13],xmm15 + movdqu XMMWORD PTR[16+r13],xmm0 + jmp $L$cbc_dec_done +ALIGN 16 +$L$cbc_dec_one:: + lea rcx,QWORD PTR[r12] + lea rdx,QWORD PTR[32+rbp] + lea r8,QWORD PTR[r15] + call asm_AES_decrypt + pxor xmm14,XMMWORD PTR[32+rbp] + movdqu XMMWORD PTR[r13],xmm14 + movdqa xmm14,xmm15 + +$L$cbc_dec_done:: + movdqu XMMWORD PTR[rbx],xmm14 + lea rax,QWORD PTR[rsp] + pxor xmm0,xmm0 +$L$cbc_dec_bzero:: + movdqa XMMWORD PTR[rax],xmm0 + movdqa XMMWORD PTR[16+rax],xmm0 + lea rax,QWORD PTR[32+rax] + cmp rbp,rax + ja $L$cbc_dec_bzero + + lea rsp,QWORD PTR[rbp] + movaps xmm6,XMMWORD PTR[64+rbp] + movaps xmm7,XMMWORD PTR[80+rbp] + movaps xmm8,XMMWORD PTR[96+rbp] + movaps xmm9,XMMWORD PTR[112+rbp] + movaps xmm10,XMMWORD PTR[128+rbp] + movaps xmm11,XMMWORD PTR[144+rbp] + movaps xmm12,XMMWORD PTR[160+rbp] + movaps xmm13,XMMWORD PTR[176+rbp] + movaps xmm14,XMMWORD PTR[192+rbp] + movaps xmm15,XMMWORD PTR[208+rbp] + lea rsp,QWORD PTR[160+rbp] + mov r15,QWORD PTR[72+rsp] + mov r14,QWORD PTR[80+rsp] + mov r13,QWORD PTR[88+rsp] + mov r12,QWORD PTR[96+rsp] + mov rbx,QWORD PTR[104+rsp] + mov rax,QWORD PTR[112+rsp] + lea rsp,QWORD PTR[120+rsp] + mov rbp,rax +$L$cbc_dec_epilogue:: + DB 0F3h,0C3h ;repret +bsaes_cbc_encrypt ENDP + +PUBLIC bsaes_ctr32_encrypt_blocks + +ALIGN 16 +bsaes_ctr32_encrypt_blocks PROC PUBLIC + mov rax,rsp +$L$ctr_enc_prologue:: + push rbp + push rbx + push r12 + push r13 + push r14 + push r15 + lea rsp,QWORD PTR[((-72))+rsp] + mov r10,QWORD PTR[160+rsp] + lea rsp,QWORD PTR[((-160))+rsp] + movaps XMMWORD PTR[64+rsp],xmm6 + movaps XMMWORD PTR[80+rsp],xmm7 + movaps XMMWORD PTR[96+rsp],xmm8 + movaps XMMWORD PTR[112+rsp],xmm9 + movaps XMMWORD PTR[128+rsp],xmm10 + movaps XMMWORD PTR[144+rsp],xmm11 + movaps XMMWORD PTR[160+rsp],xmm12 + movaps XMMWORD PTR[176+rsp],xmm13 + movaps XMMWORD PTR[192+rsp],xmm14 + movaps XMMWORD PTR[208+rsp],xmm15 +$L$ctr_enc_body:: + mov rbp,rsp + movdqu xmm0,XMMWORD PTR[r10] + mov eax,DWORD PTR[240+r9] + mov r12,rcx + mov r13,rdx + mov r14,r8 + mov r15,r9 + movdqa XMMWORD PTR[32+rbp],xmm0 + cmp r8,8 + jb $L$ctr_enc_short + + mov ebx,eax + shl rax,7 + sub rax,96 + sub rsp,rax + + mov rax,rsp + mov rcx,r15 + mov r10d,ebx + call _bsaes_key_convert + pxor xmm7,xmm6 + movdqa XMMWORD PTR[rax],xmm7 + + movdqa xmm8,XMMWORD PTR[rsp] + lea r11,QWORD PTR[$L$ADD1] + movdqa xmm15,XMMWORD PTR[32+rbp] + movdqa xmm7,XMMWORD PTR[((-32))+r11] +DB 102,68,15,56,0,199 +DB 102,68,15,56,0,255 + movdqa XMMWORD PTR[rsp],xmm8 + jmp $L$ctr_enc_loop +ALIGN 16 +$L$ctr_enc_loop:: + movdqa XMMWORD PTR[32+rbp],xmm15 + movdqa xmm0,xmm15 + movdqa xmm1,xmm15 + paddd xmm0,XMMWORD PTR[r11] + movdqa xmm2,xmm15 + paddd xmm1,XMMWORD PTR[16+r11] + movdqa xmm3,xmm15 + paddd xmm2,XMMWORD PTR[32+r11] + movdqa xmm4,xmm15 + paddd xmm3,XMMWORD PTR[48+r11] + movdqa xmm5,xmm15 + paddd xmm4,XMMWORD PTR[64+r11] + movdqa xmm6,xmm15 + paddd xmm5,XMMWORD PTR[80+r11] + paddd xmm6,XMMWORD PTR[96+r11] + + + + movdqa xmm8,XMMWORD PTR[rsp] + lea rax,QWORD PTR[16+rsp] + movdqa xmm7,XMMWORD PTR[((-16))+r11] + pxor xmm15,xmm8 + pxor xmm0,xmm8 + pxor xmm1,xmm8 + pxor xmm2,xmm8 +DB 102,68,15,56,0,255 +DB 102,15,56,0,199 + pxor xmm3,xmm8 + pxor xmm4,xmm8 +DB 102,15,56,0,207 +DB 102,15,56,0,215 + pxor xmm5,xmm8 + pxor xmm6,xmm8 +DB 102,15,56,0,223 +DB 102,15,56,0,231 +DB 102,15,56,0,239 +DB 102,15,56,0,247 + lea r11,QWORD PTR[$L$BS0] + mov r10d,ebx + + call _bsaes_encrypt8_bitslice + + sub r14,8 + jc $L$ctr_enc_loop_done + + movdqu xmm7,XMMWORD PTR[r12] + movdqu xmm8,XMMWORD PTR[16+r12] + movdqu xmm9,XMMWORD PTR[32+r12] + movdqu xmm10,XMMWORD PTR[48+r12] + movdqu xmm11,XMMWORD PTR[64+r12] + movdqu xmm12,XMMWORD PTR[80+r12] + movdqu xmm13,XMMWORD PTR[96+r12] + movdqu xmm14,XMMWORD PTR[112+r12] + lea r12,QWORD PTR[128+r12] + pxor xmm7,xmm15 + movdqa xmm15,XMMWORD PTR[32+rbp] + pxor xmm0,xmm8 + movdqu XMMWORD PTR[r13],xmm7 + pxor xmm3,xmm9 + movdqu XMMWORD PTR[16+r13],xmm0 + pxor xmm5,xmm10 + movdqu XMMWORD PTR[32+r13],xmm3 + pxor xmm2,xmm11 + movdqu XMMWORD PTR[48+r13],xmm5 + pxor xmm6,xmm12 + movdqu XMMWORD PTR[64+r13],xmm2 + pxor xmm1,xmm13 + movdqu XMMWORD PTR[80+r13],xmm6 + pxor xmm4,xmm14 + movdqu XMMWORD PTR[96+r13],xmm1 + lea r11,QWORD PTR[$L$ADD1] + movdqu XMMWORD PTR[112+r13],xmm4 + lea r13,QWORD PTR[128+r13] + paddd xmm15,XMMWORD PTR[112+r11] + jnz $L$ctr_enc_loop + + jmp $L$ctr_enc_done +ALIGN 16 +$L$ctr_enc_loop_done:: + add r14,8 + movdqu xmm7,XMMWORD PTR[r12] + pxor xmm15,xmm7 + movdqu XMMWORD PTR[r13],xmm15 + cmp r14,2 + jb $L$ctr_enc_done + movdqu xmm8,XMMWORD PTR[16+r12] + pxor xmm0,xmm8 + movdqu XMMWORD PTR[16+r13],xmm0 + je $L$ctr_enc_done + movdqu xmm9,XMMWORD PTR[32+r12] + pxor xmm3,xmm9 + movdqu XMMWORD PTR[32+r13],xmm3 + cmp r14,4 + jb $L$ctr_enc_done + movdqu xmm10,XMMWORD PTR[48+r12] + pxor xmm5,xmm10 + movdqu XMMWORD PTR[48+r13],xmm5 + je $L$ctr_enc_done + movdqu xmm11,XMMWORD PTR[64+r12] + pxor xmm2,xmm11 + movdqu XMMWORD PTR[64+r13],xmm2 + cmp r14,6 + jb $L$ctr_enc_done + movdqu xmm12,XMMWORD PTR[80+r12] + pxor xmm6,xmm12 + movdqu XMMWORD PTR[80+r13],xmm6 + je $L$ctr_enc_done + movdqu xmm13,XMMWORD PTR[96+r12] + pxor xmm1,xmm13 + movdqu XMMWORD PTR[96+r13],xmm1 + jmp $L$ctr_enc_done + +ALIGN 16 +$L$ctr_enc_short:: + lea rcx,QWORD PTR[32+rbp] + lea rdx,QWORD PTR[48+rbp] + lea r8,QWORD PTR[r15] + call asm_AES_encrypt + movdqu xmm0,XMMWORD PTR[r12] + lea r12,QWORD PTR[16+r12] + mov eax,DWORD PTR[44+rbp] + bswap eax + pxor xmm0,XMMWORD PTR[48+rbp] + inc eax + movdqu XMMWORD PTR[r13],xmm0 + bswap eax + lea r13,QWORD PTR[16+r13] + mov DWORD PTR[44+rsp],eax + dec r14 + jnz $L$ctr_enc_short + +$L$ctr_enc_done:: + lea rax,QWORD PTR[rsp] + pxor xmm0,xmm0 +$L$ctr_enc_bzero:: + movdqa XMMWORD PTR[rax],xmm0 + movdqa XMMWORD PTR[16+rax],xmm0 + lea rax,QWORD PTR[32+rax] + cmp rbp,rax + ja $L$ctr_enc_bzero + + lea rsp,QWORD PTR[rbp] + movaps xmm6,XMMWORD PTR[64+rbp] + movaps xmm7,XMMWORD PTR[80+rbp] + movaps xmm8,XMMWORD PTR[96+rbp] + movaps xmm9,XMMWORD PTR[112+rbp] + movaps xmm10,XMMWORD PTR[128+rbp] + movaps xmm11,XMMWORD PTR[144+rbp] + movaps xmm12,XMMWORD PTR[160+rbp] + movaps xmm13,XMMWORD PTR[176+rbp] + movaps xmm14,XMMWORD PTR[192+rbp] + movaps xmm15,XMMWORD PTR[208+rbp] + lea rsp,QWORD PTR[160+rbp] + mov r15,QWORD PTR[72+rsp] + mov r14,QWORD PTR[80+rsp] + mov r13,QWORD PTR[88+rsp] + mov r12,QWORD PTR[96+rsp] + mov rbx,QWORD PTR[104+rsp] + mov rax,QWORD PTR[112+rsp] + lea rsp,QWORD PTR[120+rsp] + mov rbp,rax +$L$ctr_enc_epilogue:: + DB 0F3h,0C3h ;repret +bsaes_ctr32_encrypt_blocks ENDP +PUBLIC bsaes_xts_encrypt + +ALIGN 16 +bsaes_xts_encrypt PROC PUBLIC + mov rax,rsp +$L$xts_enc_prologue:: + push rbp + push rbx + push r12 + push r13 + push r14 + push r15 + lea rsp,QWORD PTR[((-72))+rsp] + mov r10,QWORD PTR[160+rsp] + mov r11,QWORD PTR[168+rsp] + lea rsp,QWORD PTR[((-160))+rsp] + movaps XMMWORD PTR[64+rsp],xmm6 + movaps XMMWORD PTR[80+rsp],xmm7 + movaps XMMWORD PTR[96+rsp],xmm8 + movaps XMMWORD PTR[112+rsp],xmm9 + movaps XMMWORD PTR[128+rsp],xmm10 + movaps XMMWORD PTR[144+rsp],xmm11 + movaps XMMWORD PTR[160+rsp],xmm12 + movaps XMMWORD PTR[176+rsp],xmm13 + movaps XMMWORD PTR[192+rsp],xmm14 + movaps XMMWORD PTR[208+rsp],xmm15 +$L$xts_enc_body:: + mov rbp,rsp + mov r12,rcx + mov r13,rdx + mov r14,r8 + mov r15,r9 + + lea rcx,QWORD PTR[r11] + lea rdx,QWORD PTR[32+rbp] + lea r8,QWORD PTR[r10] + call asm_AES_encrypt + + mov eax,DWORD PTR[240+r15] + mov rbx,r14 + + mov edx,eax + shl rax,7 + sub rax,96 + sub rsp,rax + + mov rax,rsp + mov rcx,r15 + mov r10d,edx + call _bsaes_key_convert + pxor xmm7,xmm6 + movdqa XMMWORD PTR[rax],xmm7 + + and r14,-16 + sub rsp,080h + movdqa xmm6,XMMWORD PTR[32+rbp] + + pxor xmm14,xmm14 + movdqa xmm12,XMMWORD PTR[$L$xts_magic] + pcmpgtd xmm14,xmm6 + + sub r14,080h + jc $L$xts_enc_short + jmp $L$xts_enc_loop + +ALIGN 16 +$L$xts_enc_loop:: + pshufd xmm13,xmm14,013h + pxor xmm14,xmm14 + movdqa xmm15,xmm6 + movdqa XMMWORD PTR[rsp],xmm6 + paddq xmm6,xmm6 + pand xmm13,xmm12 + pcmpgtd xmm14,xmm6 + pxor xmm6,xmm13 + pshufd xmm13,xmm14,013h + pxor xmm14,xmm14 + movdqa xmm0,xmm6 + movdqa XMMWORD PTR[16+rsp],xmm6 + paddq xmm6,xmm6 + pand xmm13,xmm12 + pcmpgtd xmm14,xmm6 + pxor xmm6,xmm13 + movdqu xmm7,XMMWORD PTR[r12] + pshufd xmm13,xmm14,013h + pxor xmm14,xmm14 + movdqa xmm1,xmm6 + movdqa XMMWORD PTR[32+rsp],xmm6 + paddq xmm6,xmm6 + pand xmm13,xmm12 + pcmpgtd xmm14,xmm6 + pxor xmm6,xmm13 + movdqu xmm8,XMMWORD PTR[16+r12] + pxor xmm15,xmm7 + pshufd xmm13,xmm14,013h + pxor xmm14,xmm14 + movdqa xmm2,xmm6 + movdqa XMMWORD PTR[48+rsp],xmm6 + paddq xmm6,xmm6 + pand xmm13,xmm12 + pcmpgtd xmm14,xmm6 + pxor xmm6,xmm13 + movdqu xmm9,XMMWORD PTR[32+r12] + pxor xmm0,xmm8 + pshufd xmm13,xmm14,013h + pxor xmm14,xmm14 + movdqa xmm3,xmm6 + movdqa XMMWORD PTR[64+rsp],xmm6 + paddq xmm6,xmm6 + pand xmm13,xmm12 + pcmpgtd xmm14,xmm6 + pxor xmm6,xmm13 + movdqu xmm10,XMMWORD PTR[48+r12] + pxor xmm1,xmm9 + pshufd xmm13,xmm14,013h + pxor xmm14,xmm14 + movdqa xmm4,xmm6 + movdqa XMMWORD PTR[80+rsp],xmm6 + paddq xmm6,xmm6 + pand xmm13,xmm12 + pcmpgtd xmm14,xmm6 + pxor xmm6,xmm13 + movdqu xmm11,XMMWORD PTR[64+r12] + pxor xmm2,xmm10 + pshufd xmm13,xmm14,013h + pxor xmm14,xmm14 + movdqa xmm5,xmm6 + movdqa XMMWORD PTR[96+rsp],xmm6 + paddq xmm6,xmm6 + pand xmm13,xmm12 + pcmpgtd xmm14,xmm6 + pxor xmm6,xmm13 + movdqu xmm12,XMMWORD PTR[80+r12] + pxor xmm3,xmm11 + movdqu xmm13,XMMWORD PTR[96+r12] + pxor xmm4,xmm12 + movdqu xmm14,XMMWORD PTR[112+r12] + lea r12,QWORD PTR[128+r12] + movdqa XMMWORD PTR[112+rsp],xmm6 + pxor xmm5,xmm13 + lea rax,QWORD PTR[128+rsp] + pxor xmm6,xmm14 + mov r10d,edx + + call _bsaes_encrypt8 + + pxor xmm15,XMMWORD PTR[rsp] + pxor xmm0,XMMWORD PTR[16+rsp] + movdqu XMMWORD PTR[r13],xmm15 + pxor xmm3,XMMWORD PTR[32+rsp] + movdqu XMMWORD PTR[16+r13],xmm0 + pxor xmm5,XMMWORD PTR[48+rsp] + movdqu XMMWORD PTR[32+r13],xmm3 + pxor xmm2,XMMWORD PTR[64+rsp] + movdqu XMMWORD PTR[48+r13],xmm5 + pxor xmm6,XMMWORD PTR[80+rsp] + movdqu XMMWORD PTR[64+r13],xmm2 + pxor xmm1,XMMWORD PTR[96+rsp] + movdqu XMMWORD PTR[80+r13],xmm6 + pxor xmm4,XMMWORD PTR[112+rsp] + movdqu XMMWORD PTR[96+r13],xmm1 + movdqu XMMWORD PTR[112+r13],xmm4 + lea r13,QWORD PTR[128+r13] + + movdqa xmm6,XMMWORD PTR[112+rsp] + pxor xmm14,xmm14 + movdqa xmm12,XMMWORD PTR[$L$xts_magic] + pcmpgtd xmm14,xmm6 + pshufd xmm13,xmm14,013h + pxor xmm14,xmm14 + paddq xmm6,xmm6 + pand xmm13,xmm12 + pcmpgtd xmm14,xmm6 + pxor xmm6,xmm13 + + sub r14,080h + jnc $L$xts_enc_loop + +$L$xts_enc_short:: + add r14,080h + jz $L$xts_enc_done + pshufd xmm13,xmm14,013h + pxor xmm14,xmm14 + movdqa xmm15,xmm6 + movdqa XMMWORD PTR[rsp],xmm6 + paddq xmm6,xmm6 + pand xmm13,xmm12 + pcmpgtd xmm14,xmm6 + pxor xmm6,xmm13 + pshufd xmm13,xmm14,013h + pxor xmm14,xmm14 + movdqa xmm0,xmm6 + movdqa XMMWORD PTR[16+rsp],xmm6 + paddq xmm6,xmm6 + pand xmm13,xmm12 + pcmpgtd xmm14,xmm6 + pxor xmm6,xmm13 + movdqu xmm7,XMMWORD PTR[r12] + cmp r14,16 + je $L$xts_enc_1 + pshufd xmm13,xmm14,013h + pxor xmm14,xmm14 + movdqa xmm1,xmm6 + movdqa XMMWORD PTR[32+rsp],xmm6 + paddq xmm6,xmm6 + pand xmm13,xmm12 + pcmpgtd xmm14,xmm6 + pxor xmm6,xmm13 + movdqu xmm8,XMMWORD PTR[16+r12] + cmp r14,32 + je $L$xts_enc_2 + pxor xmm15,xmm7 + pshufd xmm13,xmm14,013h + pxor xmm14,xmm14 + movdqa xmm2,xmm6 + movdqa XMMWORD PTR[48+rsp],xmm6 + paddq xmm6,xmm6 + pand xmm13,xmm12 + pcmpgtd xmm14,xmm6 + pxor xmm6,xmm13 + movdqu xmm9,XMMWORD PTR[32+r12] + cmp r14,48 + je $L$xts_enc_3 + pxor xmm0,xmm8 + pshufd xmm13,xmm14,013h + pxor xmm14,xmm14 + movdqa xmm3,xmm6 + movdqa XMMWORD PTR[64+rsp],xmm6 + paddq xmm6,xmm6 + pand xmm13,xmm12 + pcmpgtd xmm14,xmm6 + pxor xmm6,xmm13 + movdqu xmm10,XMMWORD PTR[48+r12] + cmp r14,64 + je $L$xts_enc_4 + pxor xmm1,xmm9 + pshufd xmm13,xmm14,013h + pxor xmm14,xmm14 + movdqa xmm4,xmm6 + movdqa XMMWORD PTR[80+rsp],xmm6 + paddq xmm6,xmm6 + pand xmm13,xmm12 + pcmpgtd xmm14,xmm6 + pxor xmm6,xmm13 + movdqu xmm11,XMMWORD PTR[64+r12] + cmp r14,80 + je $L$xts_enc_5 + pxor xmm2,xmm10 + pshufd xmm13,xmm14,013h + pxor xmm14,xmm14 + movdqa xmm5,xmm6 + movdqa XMMWORD PTR[96+rsp],xmm6 + paddq xmm6,xmm6 + pand xmm13,xmm12 + pcmpgtd xmm14,xmm6 + pxor xmm6,xmm13 + movdqu xmm12,XMMWORD PTR[80+r12] + cmp r14,96 + je $L$xts_enc_6 + pxor xmm3,xmm11 + movdqu xmm13,XMMWORD PTR[96+r12] + pxor xmm4,xmm12 + movdqa XMMWORD PTR[112+rsp],xmm6 + lea r12,QWORD PTR[112+r12] + pxor xmm5,xmm13 + lea rax,QWORD PTR[128+rsp] + mov r10d,edx + + call _bsaes_encrypt8 + + pxor xmm15,XMMWORD PTR[rsp] + pxor xmm0,XMMWORD PTR[16+rsp] + movdqu XMMWORD PTR[r13],xmm15 + pxor xmm3,XMMWORD PTR[32+rsp] + movdqu XMMWORD PTR[16+r13],xmm0 + pxor xmm5,XMMWORD PTR[48+rsp] + movdqu XMMWORD PTR[32+r13],xmm3 + pxor xmm2,XMMWORD PTR[64+rsp] + movdqu XMMWORD PTR[48+r13],xmm5 + pxor xmm6,XMMWORD PTR[80+rsp] + movdqu XMMWORD PTR[64+r13],xmm2 + pxor xmm1,XMMWORD PTR[96+rsp] + movdqu XMMWORD PTR[80+r13],xmm6 + movdqu XMMWORD PTR[96+r13],xmm1 + lea r13,QWORD PTR[112+r13] + + movdqa xmm6,XMMWORD PTR[112+rsp] + jmp $L$xts_enc_done +ALIGN 16 +$L$xts_enc_6:: + pxor xmm3,xmm11 + lea r12,QWORD PTR[96+r12] + pxor xmm4,xmm12 + lea rax,QWORD PTR[128+rsp] + mov r10d,edx + + call _bsaes_encrypt8 + + pxor xmm15,XMMWORD PTR[rsp] + pxor xmm0,XMMWORD PTR[16+rsp] + movdqu XMMWORD PTR[r13],xmm15 + pxor xmm3,XMMWORD PTR[32+rsp] + movdqu XMMWORD PTR[16+r13],xmm0 + pxor xmm5,XMMWORD PTR[48+rsp] + movdqu XMMWORD PTR[32+r13],xmm3 + pxor xmm2,XMMWORD PTR[64+rsp] + movdqu XMMWORD PTR[48+r13],xmm5 + pxor xmm6,XMMWORD PTR[80+rsp] + movdqu XMMWORD PTR[64+r13],xmm2 + movdqu XMMWORD PTR[80+r13],xmm6 + lea r13,QWORD PTR[96+r13] + + movdqa xmm6,XMMWORD PTR[96+rsp] + jmp $L$xts_enc_done +ALIGN 16 +$L$xts_enc_5:: + pxor xmm2,xmm10 + lea r12,QWORD PTR[80+r12] + pxor xmm3,xmm11 + lea rax,QWORD PTR[128+rsp] + mov r10d,edx + + call _bsaes_encrypt8 + + pxor xmm15,XMMWORD PTR[rsp] + pxor xmm0,XMMWORD PTR[16+rsp] + movdqu XMMWORD PTR[r13],xmm15 + pxor xmm3,XMMWORD PTR[32+rsp] + movdqu XMMWORD PTR[16+r13],xmm0 + pxor xmm5,XMMWORD PTR[48+rsp] + movdqu XMMWORD PTR[32+r13],xmm3 + pxor xmm2,XMMWORD PTR[64+rsp] + movdqu XMMWORD PTR[48+r13],xmm5 + movdqu XMMWORD PTR[64+r13],xmm2 + lea r13,QWORD PTR[80+r13] + + movdqa xmm6,XMMWORD PTR[80+rsp] + jmp $L$xts_enc_done +ALIGN 16 +$L$xts_enc_4:: + pxor xmm1,xmm9 + lea r12,QWORD PTR[64+r12] + pxor xmm2,xmm10 + lea rax,QWORD PTR[128+rsp] + mov r10d,edx + + call _bsaes_encrypt8 + + pxor xmm15,XMMWORD PTR[rsp] + pxor xmm0,XMMWORD PTR[16+rsp] + movdqu XMMWORD PTR[r13],xmm15 + pxor xmm3,XMMWORD PTR[32+rsp] + movdqu XMMWORD PTR[16+r13],xmm0 + pxor xmm5,XMMWORD PTR[48+rsp] + movdqu XMMWORD PTR[32+r13],xmm3 + movdqu XMMWORD PTR[48+r13],xmm5 + lea r13,QWORD PTR[64+r13] + + movdqa xmm6,XMMWORD PTR[64+rsp] + jmp $L$xts_enc_done +ALIGN 16 +$L$xts_enc_3:: + pxor xmm0,xmm8 + lea r12,QWORD PTR[48+r12] + pxor xmm1,xmm9 + lea rax,QWORD PTR[128+rsp] + mov r10d,edx + + call _bsaes_encrypt8 + + pxor xmm15,XMMWORD PTR[rsp] + pxor xmm0,XMMWORD PTR[16+rsp] + movdqu XMMWORD PTR[r13],xmm15 + pxor xmm3,XMMWORD PTR[32+rsp] + movdqu XMMWORD PTR[16+r13],xmm0 + movdqu XMMWORD PTR[32+r13],xmm3 + lea r13,QWORD PTR[48+r13] + + movdqa xmm6,XMMWORD PTR[48+rsp] + jmp $L$xts_enc_done +ALIGN 16 +$L$xts_enc_2:: + pxor xmm15,xmm7 + lea r12,QWORD PTR[32+r12] + pxor xmm0,xmm8 + lea rax,QWORD PTR[128+rsp] + mov r10d,edx + + call _bsaes_encrypt8 + + pxor xmm15,XMMWORD PTR[rsp] + pxor xmm0,XMMWORD PTR[16+rsp] + movdqu XMMWORD PTR[r13],xmm15 + movdqu XMMWORD PTR[16+r13],xmm0 + lea r13,QWORD PTR[32+r13] + + movdqa xmm6,XMMWORD PTR[32+rsp] + jmp $L$xts_enc_done +ALIGN 16 +$L$xts_enc_1:: + pxor xmm7,xmm15 + lea r12,QWORD PTR[16+r12] + movdqa XMMWORD PTR[32+rbp],xmm7 + lea rcx,QWORD PTR[32+rbp] + lea rdx,QWORD PTR[32+rbp] + lea r8,QWORD PTR[r15] + call asm_AES_encrypt + pxor xmm15,XMMWORD PTR[32+rbp] + + + + + + movdqu XMMWORD PTR[r13],xmm15 + lea r13,QWORD PTR[16+r13] + + movdqa xmm6,XMMWORD PTR[16+rsp] + +$L$xts_enc_done:: + and ebx,15 + jz $L$xts_enc_ret + mov rdx,r13 + +$L$xts_enc_steal:: + movzx eax,BYTE PTR[r12] + movzx ecx,BYTE PTR[((-16))+rdx] + lea r12,QWORD PTR[1+r12] + mov BYTE PTR[((-16))+rdx],al + mov BYTE PTR[rdx],cl + lea rdx,QWORD PTR[1+rdx] + sub ebx,1 + jnz $L$xts_enc_steal + + movdqu xmm15,XMMWORD PTR[((-16))+r13] + lea rcx,QWORD PTR[32+rbp] + pxor xmm15,xmm6 + lea rdx,QWORD PTR[32+rbp] + movdqa XMMWORD PTR[32+rbp],xmm15 + lea r8,QWORD PTR[r15] + call asm_AES_encrypt + pxor xmm6,XMMWORD PTR[32+rbp] + movdqu XMMWORD PTR[(-16)+r13],xmm6 + +$L$xts_enc_ret:: + lea rax,QWORD PTR[rsp] + pxor xmm0,xmm0 +$L$xts_enc_bzero:: + movdqa XMMWORD PTR[rax],xmm0 + movdqa XMMWORD PTR[16+rax],xmm0 + lea rax,QWORD PTR[32+rax] + cmp rbp,rax + ja $L$xts_enc_bzero + + lea rsp,QWORD PTR[rbp] + movaps xmm6,XMMWORD PTR[64+rbp] + movaps xmm7,XMMWORD PTR[80+rbp] + movaps xmm8,XMMWORD PTR[96+rbp] + movaps xmm9,XMMWORD PTR[112+rbp] + movaps xmm10,XMMWORD PTR[128+rbp] + movaps xmm11,XMMWORD PTR[144+rbp] + movaps xmm12,XMMWORD PTR[160+rbp] + movaps xmm13,XMMWORD PTR[176+rbp] + movaps xmm14,XMMWORD PTR[192+rbp] + movaps xmm15,XMMWORD PTR[208+rbp] + lea rsp,QWORD PTR[160+rbp] + mov r15,QWORD PTR[72+rsp] + mov r14,QWORD PTR[80+rsp] + mov r13,QWORD PTR[88+rsp] + mov r12,QWORD PTR[96+rsp] + mov rbx,QWORD PTR[104+rsp] + mov rax,QWORD PTR[112+rsp] + lea rsp,QWORD PTR[120+rsp] + mov rbp,rax +$L$xts_enc_epilogue:: + DB 0F3h,0C3h ;repret +bsaes_xts_encrypt ENDP + +PUBLIC bsaes_xts_decrypt + +ALIGN 16 +bsaes_xts_decrypt PROC PUBLIC + mov rax,rsp +$L$xts_dec_prologue:: + push rbp + push rbx + push r12 + push r13 + push r14 + push r15 + lea rsp,QWORD PTR[((-72))+rsp] + mov r10,QWORD PTR[160+rsp] + mov r11,QWORD PTR[168+rsp] + lea rsp,QWORD PTR[((-160))+rsp] + movaps XMMWORD PTR[64+rsp],xmm6 + movaps XMMWORD PTR[80+rsp],xmm7 + movaps XMMWORD PTR[96+rsp],xmm8 + movaps XMMWORD PTR[112+rsp],xmm9 + movaps XMMWORD PTR[128+rsp],xmm10 + movaps XMMWORD PTR[144+rsp],xmm11 + movaps XMMWORD PTR[160+rsp],xmm12 + movaps XMMWORD PTR[176+rsp],xmm13 + movaps XMMWORD PTR[192+rsp],xmm14 + movaps XMMWORD PTR[208+rsp],xmm15 +$L$xts_dec_body:: + mov rbp,rsp + mov r12,rcx + mov r13,rdx + mov r14,r8 + mov r15,r9 + + lea rcx,QWORD PTR[r11] + lea rdx,QWORD PTR[32+rbp] + lea r8,QWORD PTR[r10] + call asm_AES_encrypt + + mov eax,DWORD PTR[240+r15] + mov rbx,r14 + + mov edx,eax + shl rax,7 + sub rax,96 + sub rsp,rax + + mov rax,rsp + mov rcx,r15 + mov r10d,edx + call _bsaes_key_convert + pxor xmm7,XMMWORD PTR[rsp] + movdqa XMMWORD PTR[rax],xmm6 + movdqa XMMWORD PTR[rsp],xmm7 + + xor eax,eax + and r14,-16 + test ebx,15 + setnz al + shl rax,4 + sub r14,rax + + sub rsp,080h + movdqa xmm6,XMMWORD PTR[32+rbp] + + pxor xmm14,xmm14 + movdqa xmm12,XMMWORD PTR[$L$xts_magic] + pcmpgtd xmm14,xmm6 + + sub r14,080h + jc $L$xts_dec_short + jmp $L$xts_dec_loop + +ALIGN 16 +$L$xts_dec_loop:: + pshufd xmm13,xmm14,013h + pxor xmm14,xmm14 + movdqa xmm15,xmm6 + movdqa XMMWORD PTR[rsp],xmm6 + paddq xmm6,xmm6 + pand xmm13,xmm12 + pcmpgtd xmm14,xmm6 + pxor xmm6,xmm13 + pshufd xmm13,xmm14,013h + pxor xmm14,xmm14 + movdqa xmm0,xmm6 + movdqa XMMWORD PTR[16+rsp],xmm6 + paddq xmm6,xmm6 + pand xmm13,xmm12 + pcmpgtd xmm14,xmm6 + pxor xmm6,xmm13 + movdqu xmm7,XMMWORD PTR[r12] + pshufd xmm13,xmm14,013h + pxor xmm14,xmm14 + movdqa xmm1,xmm6 + movdqa XMMWORD PTR[32+rsp],xmm6 + paddq xmm6,xmm6 + pand xmm13,xmm12 + pcmpgtd xmm14,xmm6 + pxor xmm6,xmm13 + movdqu xmm8,XMMWORD PTR[16+r12] + pxor xmm15,xmm7 + pshufd xmm13,xmm14,013h + pxor xmm14,xmm14 + movdqa xmm2,xmm6 + movdqa XMMWORD PTR[48+rsp],xmm6 + paddq xmm6,xmm6 + pand xmm13,xmm12 + pcmpgtd xmm14,xmm6 + pxor xmm6,xmm13 + movdqu xmm9,XMMWORD PTR[32+r12] + pxor xmm0,xmm8 + pshufd xmm13,xmm14,013h + pxor xmm14,xmm14 + movdqa xmm3,xmm6 + movdqa XMMWORD PTR[64+rsp],xmm6 + paddq xmm6,xmm6 + pand xmm13,xmm12 + pcmpgtd xmm14,xmm6 + pxor xmm6,xmm13 + movdqu xmm10,XMMWORD PTR[48+r12] + pxor xmm1,xmm9 + pshufd xmm13,xmm14,013h + pxor xmm14,xmm14 + movdqa xmm4,xmm6 + movdqa XMMWORD PTR[80+rsp],xmm6 + paddq xmm6,xmm6 + pand xmm13,xmm12 + pcmpgtd xmm14,xmm6 + pxor xmm6,xmm13 + movdqu xmm11,XMMWORD PTR[64+r12] + pxor xmm2,xmm10 + pshufd xmm13,xmm14,013h + pxor xmm14,xmm14 + movdqa xmm5,xmm6 + movdqa XMMWORD PTR[96+rsp],xmm6 + paddq xmm6,xmm6 + pand xmm13,xmm12 + pcmpgtd xmm14,xmm6 + pxor xmm6,xmm13 + movdqu xmm12,XMMWORD PTR[80+r12] + pxor xmm3,xmm11 + movdqu xmm13,XMMWORD PTR[96+r12] + pxor xmm4,xmm12 + movdqu xmm14,XMMWORD PTR[112+r12] + lea r12,QWORD PTR[128+r12] + movdqa XMMWORD PTR[112+rsp],xmm6 + pxor xmm5,xmm13 + lea rax,QWORD PTR[128+rsp] + pxor xmm6,xmm14 + mov r10d,edx + + call _bsaes_decrypt8 + + pxor xmm15,XMMWORD PTR[rsp] + pxor xmm0,XMMWORD PTR[16+rsp] + movdqu XMMWORD PTR[r13],xmm15 + pxor xmm5,XMMWORD PTR[32+rsp] + movdqu XMMWORD PTR[16+r13],xmm0 + pxor xmm3,XMMWORD PTR[48+rsp] + movdqu XMMWORD PTR[32+r13],xmm5 + pxor xmm1,XMMWORD PTR[64+rsp] + movdqu XMMWORD PTR[48+r13],xmm3 + pxor xmm6,XMMWORD PTR[80+rsp] + movdqu XMMWORD PTR[64+r13],xmm1 + pxor xmm2,XMMWORD PTR[96+rsp] + movdqu XMMWORD PTR[80+r13],xmm6 + pxor xmm4,XMMWORD PTR[112+rsp] + movdqu XMMWORD PTR[96+r13],xmm2 + movdqu XMMWORD PTR[112+r13],xmm4 + lea r13,QWORD PTR[128+r13] + + movdqa xmm6,XMMWORD PTR[112+rsp] + pxor xmm14,xmm14 + movdqa xmm12,XMMWORD PTR[$L$xts_magic] + pcmpgtd xmm14,xmm6 + pshufd xmm13,xmm14,013h + pxor xmm14,xmm14 + paddq xmm6,xmm6 + pand xmm13,xmm12 + pcmpgtd xmm14,xmm6 + pxor xmm6,xmm13 + + sub r14,080h + jnc $L$xts_dec_loop + +$L$xts_dec_short:: + add r14,080h + jz $L$xts_dec_done + pshufd xmm13,xmm14,013h + pxor xmm14,xmm14 + movdqa xmm15,xmm6 + movdqa XMMWORD PTR[rsp],xmm6 + paddq xmm6,xmm6 + pand xmm13,xmm12 + pcmpgtd xmm14,xmm6 + pxor xmm6,xmm13 + pshufd xmm13,xmm14,013h + pxor xmm14,xmm14 + movdqa xmm0,xmm6 + movdqa XMMWORD PTR[16+rsp],xmm6 + paddq xmm6,xmm6 + pand xmm13,xmm12 + pcmpgtd xmm14,xmm6 + pxor xmm6,xmm13 + movdqu xmm7,XMMWORD PTR[r12] + cmp r14,16 + je $L$xts_dec_1 + pshufd xmm13,xmm14,013h + pxor xmm14,xmm14 + movdqa xmm1,xmm6 + movdqa XMMWORD PTR[32+rsp],xmm6 + paddq xmm6,xmm6 + pand xmm13,xmm12 + pcmpgtd xmm14,xmm6 + pxor xmm6,xmm13 + movdqu xmm8,XMMWORD PTR[16+r12] + cmp r14,32 + je $L$xts_dec_2 + pxor xmm15,xmm7 + pshufd xmm13,xmm14,013h + pxor xmm14,xmm14 + movdqa xmm2,xmm6 + movdqa XMMWORD PTR[48+rsp],xmm6 + paddq xmm6,xmm6 + pand xmm13,xmm12 + pcmpgtd xmm14,xmm6 + pxor xmm6,xmm13 + movdqu xmm9,XMMWORD PTR[32+r12] + cmp r14,48 + je $L$xts_dec_3 + pxor xmm0,xmm8 + pshufd xmm13,xmm14,013h + pxor xmm14,xmm14 + movdqa xmm3,xmm6 + movdqa XMMWORD PTR[64+rsp],xmm6 + paddq xmm6,xmm6 + pand xmm13,xmm12 + pcmpgtd xmm14,xmm6 + pxor xmm6,xmm13 + movdqu xmm10,XMMWORD PTR[48+r12] + cmp r14,64 + je $L$xts_dec_4 + pxor xmm1,xmm9 + pshufd xmm13,xmm14,013h + pxor xmm14,xmm14 + movdqa xmm4,xmm6 + movdqa XMMWORD PTR[80+rsp],xmm6 + paddq xmm6,xmm6 + pand xmm13,xmm12 + pcmpgtd xmm14,xmm6 + pxor xmm6,xmm13 + movdqu xmm11,XMMWORD PTR[64+r12] + cmp r14,80 + je $L$xts_dec_5 + pxor xmm2,xmm10 + pshufd xmm13,xmm14,013h + pxor xmm14,xmm14 + movdqa xmm5,xmm6 + movdqa XMMWORD PTR[96+rsp],xmm6 + paddq xmm6,xmm6 + pand xmm13,xmm12 + pcmpgtd xmm14,xmm6 + pxor xmm6,xmm13 + movdqu xmm12,XMMWORD PTR[80+r12] + cmp r14,96 + je $L$xts_dec_6 + pxor xmm3,xmm11 + movdqu xmm13,XMMWORD PTR[96+r12] + pxor xmm4,xmm12 + movdqa XMMWORD PTR[112+rsp],xmm6 + lea r12,QWORD PTR[112+r12] + pxor xmm5,xmm13 + lea rax,QWORD PTR[128+rsp] + mov r10d,edx + + call _bsaes_decrypt8 + + pxor xmm15,XMMWORD PTR[rsp] + pxor xmm0,XMMWORD PTR[16+rsp] + movdqu XMMWORD PTR[r13],xmm15 + pxor xmm5,XMMWORD PTR[32+rsp] + movdqu XMMWORD PTR[16+r13],xmm0 + pxor xmm3,XMMWORD PTR[48+rsp] + movdqu XMMWORD PTR[32+r13],xmm5 + pxor xmm1,XMMWORD PTR[64+rsp] + movdqu XMMWORD PTR[48+r13],xmm3 + pxor xmm6,XMMWORD PTR[80+rsp] + movdqu XMMWORD PTR[64+r13],xmm1 + pxor xmm2,XMMWORD PTR[96+rsp] + movdqu XMMWORD PTR[80+r13],xmm6 + movdqu XMMWORD PTR[96+r13],xmm2 + lea r13,QWORD PTR[112+r13] + + movdqa xmm6,XMMWORD PTR[112+rsp] + jmp $L$xts_dec_done +ALIGN 16 +$L$xts_dec_6:: + pxor xmm3,xmm11 + lea r12,QWORD PTR[96+r12] + pxor xmm4,xmm12 + lea rax,QWORD PTR[128+rsp] + mov r10d,edx + + call _bsaes_decrypt8 + + pxor xmm15,XMMWORD PTR[rsp] + pxor xmm0,XMMWORD PTR[16+rsp] + movdqu XMMWORD PTR[r13],xmm15 + pxor xmm5,XMMWORD PTR[32+rsp] + movdqu XMMWORD PTR[16+r13],xmm0 + pxor xmm3,XMMWORD PTR[48+rsp] + movdqu XMMWORD PTR[32+r13],xmm5 + pxor xmm1,XMMWORD PTR[64+rsp] + movdqu XMMWORD PTR[48+r13],xmm3 + pxor xmm6,XMMWORD PTR[80+rsp] + movdqu XMMWORD PTR[64+r13],xmm1 + movdqu XMMWORD PTR[80+r13],xmm6 + lea r13,QWORD PTR[96+r13] + + movdqa xmm6,XMMWORD PTR[96+rsp] + jmp $L$xts_dec_done +ALIGN 16 +$L$xts_dec_5:: + pxor xmm2,xmm10 + lea r12,QWORD PTR[80+r12] + pxor xmm3,xmm11 + lea rax,QWORD PTR[128+rsp] + mov r10d,edx + + call _bsaes_decrypt8 + + pxor xmm15,XMMWORD PTR[rsp] + pxor xmm0,XMMWORD PTR[16+rsp] + movdqu XMMWORD PTR[r13],xmm15 + pxor xmm5,XMMWORD PTR[32+rsp] + movdqu XMMWORD PTR[16+r13],xmm0 + pxor xmm3,XMMWORD PTR[48+rsp] + movdqu XMMWORD PTR[32+r13],xmm5 + pxor xmm1,XMMWORD PTR[64+rsp] + movdqu XMMWORD PTR[48+r13],xmm3 + movdqu XMMWORD PTR[64+r13],xmm1 + lea r13,QWORD PTR[80+r13] + + movdqa xmm6,XMMWORD PTR[80+rsp] + jmp $L$xts_dec_done +ALIGN 16 +$L$xts_dec_4:: + pxor xmm1,xmm9 + lea r12,QWORD PTR[64+r12] + pxor xmm2,xmm10 + lea rax,QWORD PTR[128+rsp] + mov r10d,edx + + call _bsaes_decrypt8 + + pxor xmm15,XMMWORD PTR[rsp] + pxor xmm0,XMMWORD PTR[16+rsp] + movdqu XMMWORD PTR[r13],xmm15 + pxor xmm5,XMMWORD PTR[32+rsp] + movdqu XMMWORD PTR[16+r13],xmm0 + pxor xmm3,XMMWORD PTR[48+rsp] + movdqu XMMWORD PTR[32+r13],xmm5 + movdqu XMMWORD PTR[48+r13],xmm3 + lea r13,QWORD PTR[64+r13] + + movdqa xmm6,XMMWORD PTR[64+rsp] + jmp $L$xts_dec_done +ALIGN 16 +$L$xts_dec_3:: + pxor xmm0,xmm8 + lea r12,QWORD PTR[48+r12] + pxor xmm1,xmm9 + lea rax,QWORD PTR[128+rsp] + mov r10d,edx + + call _bsaes_decrypt8 + + pxor xmm15,XMMWORD PTR[rsp] + pxor xmm0,XMMWORD PTR[16+rsp] + movdqu XMMWORD PTR[r13],xmm15 + pxor xmm5,XMMWORD PTR[32+rsp] + movdqu XMMWORD PTR[16+r13],xmm0 + movdqu XMMWORD PTR[32+r13],xmm5 + lea r13,QWORD PTR[48+r13] + + movdqa xmm6,XMMWORD PTR[48+rsp] + jmp $L$xts_dec_done +ALIGN 16 +$L$xts_dec_2:: + pxor xmm15,xmm7 + lea r12,QWORD PTR[32+r12] + pxor xmm0,xmm8 + lea rax,QWORD PTR[128+rsp] + mov r10d,edx + + call _bsaes_decrypt8 + + pxor xmm15,XMMWORD PTR[rsp] + pxor xmm0,XMMWORD PTR[16+rsp] + movdqu XMMWORD PTR[r13],xmm15 + movdqu XMMWORD PTR[16+r13],xmm0 + lea r13,QWORD PTR[32+r13] + + movdqa xmm6,XMMWORD PTR[32+rsp] + jmp $L$xts_dec_done +ALIGN 16 +$L$xts_dec_1:: + pxor xmm7,xmm15 + lea r12,QWORD PTR[16+r12] + movdqa XMMWORD PTR[32+rbp],xmm7 + lea rcx,QWORD PTR[32+rbp] + lea rdx,QWORD PTR[32+rbp] + lea r8,QWORD PTR[r15] + call asm_AES_decrypt + pxor xmm15,XMMWORD PTR[32+rbp] + + + + + + movdqu XMMWORD PTR[r13],xmm15 + lea r13,QWORD PTR[16+r13] + + movdqa xmm6,XMMWORD PTR[16+rsp] + +$L$xts_dec_done:: + and ebx,15 + jz $L$xts_dec_ret + + pxor xmm14,xmm14 + movdqa xmm12,XMMWORD PTR[$L$xts_magic] + pcmpgtd xmm14,xmm6 + pshufd xmm13,xmm14,013h + movdqa xmm5,xmm6 + paddq xmm6,xmm6 + pand xmm13,xmm12 + movdqu xmm15,XMMWORD PTR[r12] + pxor xmm6,xmm13 + + lea rcx,QWORD PTR[32+rbp] + pxor xmm15,xmm6 + lea rdx,QWORD PTR[32+rbp] + movdqa XMMWORD PTR[32+rbp],xmm15 + lea r8,QWORD PTR[r15] + call asm_AES_decrypt + pxor xmm6,XMMWORD PTR[32+rbp] + mov rdx,r13 + movdqu XMMWORD PTR[r13],xmm6 + +$L$xts_dec_steal:: + movzx eax,BYTE PTR[16+r12] + movzx ecx,BYTE PTR[rdx] + lea r12,QWORD PTR[1+r12] + mov BYTE PTR[rdx],al + mov BYTE PTR[16+rdx],cl + lea rdx,QWORD PTR[1+rdx] + sub ebx,1 + jnz $L$xts_dec_steal + + movdqu xmm15,XMMWORD PTR[r13] + lea rcx,QWORD PTR[32+rbp] + pxor xmm15,xmm5 + lea rdx,QWORD PTR[32+rbp] + movdqa XMMWORD PTR[32+rbp],xmm15 + lea r8,QWORD PTR[r15] + call asm_AES_decrypt + pxor xmm5,XMMWORD PTR[32+rbp] + movdqu XMMWORD PTR[r13],xmm5 + +$L$xts_dec_ret:: + lea rax,QWORD PTR[rsp] + pxor xmm0,xmm0 +$L$xts_dec_bzero:: + movdqa XMMWORD PTR[rax],xmm0 + movdqa XMMWORD PTR[16+rax],xmm0 + lea rax,QWORD PTR[32+rax] + cmp rbp,rax + ja $L$xts_dec_bzero + + lea rsp,QWORD PTR[rbp] + movaps xmm6,XMMWORD PTR[64+rbp] + movaps xmm7,XMMWORD PTR[80+rbp] + movaps xmm8,XMMWORD PTR[96+rbp] + movaps xmm9,XMMWORD PTR[112+rbp] + movaps xmm10,XMMWORD PTR[128+rbp] + movaps xmm11,XMMWORD PTR[144+rbp] + movaps xmm12,XMMWORD PTR[160+rbp] + movaps xmm13,XMMWORD PTR[176+rbp] + movaps xmm14,XMMWORD PTR[192+rbp] + movaps xmm15,XMMWORD PTR[208+rbp] + lea rsp,QWORD PTR[160+rbp] + mov r15,QWORD PTR[72+rsp] + mov r14,QWORD PTR[80+rsp] + mov r13,QWORD PTR[88+rsp] + mov r12,QWORD PTR[96+rsp] + mov rbx,QWORD PTR[104+rsp] + mov rax,QWORD PTR[112+rsp] + lea rsp,QWORD PTR[120+rsp] + mov rbp,rax +$L$xts_dec_epilogue:: + DB 0F3h,0C3h ;repret +bsaes_xts_decrypt ENDP + +ALIGN 64 +_bsaes_const:: +$L$M0ISR:: + DQ 00a0e0206070b0f03h,00004080c0d010509h +$L$ISRM0:: + DQ 001040b0e0205080fh,00306090c00070a0dh +$L$ISR:: + DQ 00504070602010003h,00f0e0d0c080b0a09h +$L$BS0:: + DQ 05555555555555555h,05555555555555555h +$L$BS1:: + DQ 03333333333333333h,03333333333333333h +$L$BS2:: + DQ 00f0f0f0f0f0f0f0fh,00f0f0f0f0f0f0f0fh +$L$SR:: + DQ 00504070600030201h,00f0e0d0c0a09080bh +$L$SRM0:: + DQ 00304090e00050a0fh,001060b0c0207080dh +$L$M0SR:: + DQ 00a0e02060f03070bh,00004080c05090d01h +$L$SWPUP:: + DQ 00706050403020100h,00c0d0e0f0b0a0908h +$L$SWPUPM0SR:: + DQ 00a0d02060c03070bh,00004080f05090e01h +$L$ADD1:: + DQ 00000000000000000h,00000000100000000h +$L$ADD2:: + DQ 00000000000000000h,00000000200000000h +$L$ADD3:: + DQ 00000000000000000h,00000000300000000h +$L$ADD4:: + DQ 00000000000000000h,00000000400000000h +$L$ADD5:: + DQ 00000000000000000h,00000000500000000h +$L$ADD6:: + DQ 00000000000000000h,00000000600000000h +$L$ADD7:: + DQ 00000000000000000h,00000000700000000h +$L$ADD8:: + DQ 00000000000000000h,00000000800000000h +$L$xts_magic:: + DD 087h,0,1,0 +$L$masks:: + DQ 00101010101010101h,00101010101010101h + DQ 00202020202020202h,00202020202020202h + DQ 00404040404040404h,00404040404040404h + DQ 00808080808080808h,00808080808080808h +$L$M0:: + DQ 002060a0e03070b0fh,00004080c0105090dh +$L$63:: + DQ 06363636363636363h,06363636363636363h +DB 66,105,116,45,115,108,105,99,101,100,32,65,69,83,32,102 +DB 111,114,32,120,56,54,95,54,52,47,83,83,83,69,51,44 +DB 32,69,109,105,108,105,97,32,75,195,164,115,112,101,114,44 +DB 32,80,101,116,101,114,32,83,99,104,119,97,98,101,44,32 +DB 65,110,100,121,32,80,111,108,121,97,107,111,118,0 +ALIGN 64 + +EXTERN __imp_RtlVirtualUnwind:NEAR + +ALIGN 16 +se_handler PROC PRIVATE + push rsi + push rdi + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + pushfq + sub rsp,64 + + mov rax,QWORD PTR[120+r8] + mov rbx,QWORD PTR[248+r8] + + mov rsi,QWORD PTR[8+r9] + mov r11,QWORD PTR[56+r9] + + mov r10d,DWORD PTR[r11] + lea r10,QWORD PTR[r10*1+rsi] + cmp rbx,r10 + jb $L$in_prologue + + mov rax,QWORD PTR[152+r8] + + mov r10d,DWORD PTR[4+r11] + lea r10,QWORD PTR[r10*1+rsi] + cmp rbx,r10 + jae $L$in_prologue + + mov rax,QWORD PTR[160+r8] + + lea rsi,QWORD PTR[64+rax] + lea rdi,QWORD PTR[512+r8] + mov ecx,20 + DD 0a548f3fch + lea rax,QWORD PTR[160+rax] + + mov rbp,QWORD PTR[112+rax] + mov rbx,QWORD PTR[104+rax] + mov r12,QWORD PTR[96+rax] + mov r13,QWORD PTR[88+rax] + mov r14,QWORD PTR[80+rax] + mov r15,QWORD PTR[72+rax] + lea rax,QWORD PTR[120+rax] + mov QWORD PTR[144+r8],rbx + mov QWORD PTR[160+r8],rbp + mov QWORD PTR[216+r8],r12 + mov QWORD PTR[224+r8],r13 + mov QWORD PTR[232+r8],r14 + mov QWORD PTR[240+r8],r15 + +$L$in_prologue:: + mov QWORD PTR[152+r8],rax + + mov rdi,QWORD PTR[40+r9] + mov rsi,r8 + mov ecx,154 + DD 0a548f3fch + + mov rsi,r9 + xor rcx,rcx + mov rdx,QWORD PTR[8+rsi] + mov r8,QWORD PTR[rsi] + mov r9,QWORD PTR[16+rsi] + mov r10,QWORD PTR[40+rsi] + lea r11,QWORD PTR[56+rsi] + lea r12,QWORD PTR[24+rsi] + mov QWORD PTR[32+rsp],r10 + mov QWORD PTR[40+rsp],r11 + mov QWORD PTR[48+rsp],r12 + mov QWORD PTR[56+rsp],rcx + call QWORD PTR[__imp_RtlVirtualUnwind] + + mov eax,1 + add rsp,64 + popfq + pop r15 + pop r14 + pop r13 + pop r12 + pop rbp + pop rbx + pop rdi + pop rsi + DB 0F3h,0C3h ;repret +se_handler ENDP + +.text$ ENDS +.pdata SEGMENT READONLY ALIGN(4) +ALIGN 4 + DD imagerel $L$cbc_dec_prologue + DD imagerel $L$cbc_dec_epilogue + DD imagerel $L$cbc_dec_info + + DD imagerel $L$ctr_enc_prologue + DD imagerel $L$ctr_enc_epilogue + DD imagerel $L$ctr_enc_info + + DD imagerel $L$xts_enc_prologue + DD imagerel $L$xts_enc_epilogue + DD imagerel $L$xts_enc_info + + DD imagerel $L$xts_dec_prologue + DD imagerel $L$xts_dec_epilogue + DD imagerel $L$xts_dec_info + +.pdata ENDS +.xdata SEGMENT READONLY ALIGN(8) +ALIGN 8 +$L$cbc_dec_info:: +DB 9,0,0,0 + DD imagerel se_handler + DD imagerel $L$cbc_dec_body,imagerel $L$cbc_dec_epilogue +$L$ctr_enc_info:: +DB 9,0,0,0 + DD imagerel se_handler + DD imagerel $L$ctr_enc_body,imagerel $L$ctr_enc_epilogue +$L$xts_enc_info:: +DB 9,0,0,0 + DD imagerel se_handler + DD imagerel $L$xts_enc_body,imagerel $L$xts_enc_epilogue +$L$xts_dec_info:: +DB 9,0,0,0 + DD imagerel se_handler + DD imagerel $L$xts_dec_body,imagerel $L$xts_dec_epilogue + +.xdata ENDS +END diff --git a/deps/openssl/asm_obsolete/x64-win32-masm/aes/vpaes-x86_64.asm b/deps/openssl/asm_obsolete/x64-win32-masm/aes/vpaes-x86_64.asm new file mode 100644 index 00000000000000..292f64d58debb1 --- /dev/null +++ b/deps/openssl/asm_obsolete/x64-win32-masm/aes/vpaes-x86_64.asm @@ -0,0 +1,1143 @@ +OPTION DOTNAME +.text$ SEGMENT ALIGN(256) 'CODE' + + + + + + + + + + + + + + + + + +ALIGN 16 +_vpaes_encrypt_core PROC PRIVATE + mov r9,rdx + mov r11,16 + mov eax,DWORD PTR[240+rdx] + movdqa xmm1,xmm9 + movdqa xmm2,XMMWORD PTR[$L$k_ipt] + pandn xmm1,xmm0 + movdqu xmm5,XMMWORD PTR[r9] + psrld xmm1,4 + pand xmm0,xmm9 +DB 102,15,56,0,208 + movdqa xmm0,XMMWORD PTR[(($L$k_ipt+16))] +DB 102,15,56,0,193 + pxor xmm2,xmm5 + add r9,16 + pxor xmm0,xmm2 + lea r10,QWORD PTR[$L$k_mc_backward] + jmp $L$enc_entry + +ALIGN 16 +$L$enc_loop:: + + movdqa xmm4,xmm13 + movdqa xmm0,xmm12 +DB 102,15,56,0,226 +DB 102,15,56,0,195 + pxor xmm4,xmm5 + movdqa xmm5,xmm15 + pxor xmm0,xmm4 + movdqa xmm1,XMMWORD PTR[((-64))+r10*1+r11] +DB 102,15,56,0,234 + movdqa xmm4,XMMWORD PTR[r10*1+r11] + movdqa xmm2,xmm14 +DB 102,15,56,0,211 + movdqa xmm3,xmm0 + pxor xmm2,xmm5 +DB 102,15,56,0,193 + add r9,16 + pxor xmm0,xmm2 +DB 102,15,56,0,220 + add r11,16 + pxor xmm3,xmm0 +DB 102,15,56,0,193 + and r11,030h + sub rax,1 + pxor xmm0,xmm3 + +$L$enc_entry:: + + movdqa xmm1,xmm9 + movdqa xmm5,xmm11 + pandn xmm1,xmm0 + psrld xmm1,4 + pand xmm0,xmm9 +DB 102,15,56,0,232 + movdqa xmm3,xmm10 + pxor xmm0,xmm1 +DB 102,15,56,0,217 + movdqa xmm4,xmm10 + pxor xmm3,xmm5 +DB 102,15,56,0,224 + movdqa xmm2,xmm10 + pxor xmm4,xmm5 +DB 102,15,56,0,211 + movdqa xmm3,xmm10 + pxor xmm2,xmm0 +DB 102,15,56,0,220 + movdqu xmm5,XMMWORD PTR[r9] + pxor xmm3,xmm1 + jnz $L$enc_loop + + + movdqa xmm4,XMMWORD PTR[((-96))+r10] + movdqa xmm0,XMMWORD PTR[((-80))+r10] +DB 102,15,56,0,226 + pxor xmm4,xmm5 +DB 102,15,56,0,195 + movdqa xmm1,XMMWORD PTR[64+r10*1+r11] + pxor xmm0,xmm4 +DB 102,15,56,0,193 + DB 0F3h,0C3h ;repret +_vpaes_encrypt_core ENDP + + + + + + + +ALIGN 16 +_vpaes_decrypt_core PROC PRIVATE + mov r9,rdx + mov eax,DWORD PTR[240+rdx] + movdqa xmm1,xmm9 + movdqa xmm2,XMMWORD PTR[$L$k_dipt] + pandn xmm1,xmm0 + mov r11,rax + psrld xmm1,4 + movdqu xmm5,XMMWORD PTR[r9] + shl r11,4 + pand xmm0,xmm9 +DB 102,15,56,0,208 + movdqa xmm0,XMMWORD PTR[(($L$k_dipt+16))] + xor r11,030h + lea r10,QWORD PTR[$L$k_dsbd] +DB 102,15,56,0,193 + and r11,030h + pxor xmm2,xmm5 + movdqa xmm5,XMMWORD PTR[(($L$k_mc_forward+48))] + pxor xmm0,xmm2 + add r9,16 + add r11,r10 + jmp $L$dec_entry + +ALIGN 16 +$L$dec_loop:: + + + + movdqa xmm4,XMMWORD PTR[((-32))+r10] + movdqa xmm1,XMMWORD PTR[((-16))+r10] +DB 102,15,56,0,226 +DB 102,15,56,0,203 + pxor xmm0,xmm4 + movdqa xmm4,XMMWORD PTR[r10] + pxor xmm0,xmm1 + movdqa xmm1,XMMWORD PTR[16+r10] + +DB 102,15,56,0,226 +DB 102,15,56,0,197 +DB 102,15,56,0,203 + pxor xmm0,xmm4 + movdqa xmm4,XMMWORD PTR[32+r10] + pxor xmm0,xmm1 + movdqa xmm1,XMMWORD PTR[48+r10] + +DB 102,15,56,0,226 +DB 102,15,56,0,197 +DB 102,15,56,0,203 + pxor xmm0,xmm4 + movdqa xmm4,XMMWORD PTR[64+r10] + pxor xmm0,xmm1 + movdqa xmm1,XMMWORD PTR[80+r10] + +DB 102,15,56,0,226 +DB 102,15,56,0,197 +DB 102,15,56,0,203 + pxor xmm0,xmm4 + add r9,16 +DB 102,15,58,15,237,12 + pxor xmm0,xmm1 + sub rax,1 + +$L$dec_entry:: + + movdqa xmm1,xmm9 + pandn xmm1,xmm0 + movdqa xmm2,xmm11 + psrld xmm1,4 + pand xmm0,xmm9 +DB 102,15,56,0,208 + movdqa xmm3,xmm10 + pxor xmm0,xmm1 +DB 102,15,56,0,217 + movdqa xmm4,xmm10 + pxor xmm3,xmm2 +DB 102,15,56,0,224 + pxor xmm4,xmm2 + movdqa xmm2,xmm10 +DB 102,15,56,0,211 + movdqa xmm3,xmm10 + pxor xmm2,xmm0 +DB 102,15,56,0,220 + movdqu xmm0,XMMWORD PTR[r9] + pxor xmm3,xmm1 + jnz $L$dec_loop + + + movdqa xmm4,XMMWORD PTR[96+r10] +DB 102,15,56,0,226 + pxor xmm4,xmm0 + movdqa xmm0,XMMWORD PTR[112+r10] + movdqa xmm2,XMMWORD PTR[((-352))+r11] +DB 102,15,56,0,195 + pxor xmm0,xmm4 +DB 102,15,56,0,194 + DB 0F3h,0C3h ;repret +_vpaes_decrypt_core ENDP + + + + + + + +ALIGN 16 +_vpaes_schedule_core PROC PRIVATE + + + + + + call _vpaes_preheat + movdqa xmm8,XMMWORD PTR[$L$k_rcon] + movdqu xmm0,XMMWORD PTR[rdi] + + + movdqa xmm3,xmm0 + lea r11,QWORD PTR[$L$k_ipt] + call _vpaes_schedule_transform + movdqa xmm7,xmm0 + + lea r10,QWORD PTR[$L$k_sr] + test rcx,rcx + jnz $L$schedule_am_decrypting + + + movdqu XMMWORD PTR[rdx],xmm0 + jmp $L$schedule_go + +$L$schedule_am_decrypting:: + + movdqa xmm1,XMMWORD PTR[r10*1+r8] +DB 102,15,56,0,217 + movdqu XMMWORD PTR[rdx],xmm3 + xor r8,030h + +$L$schedule_go:: + cmp esi,192 + ja $L$schedule_256 + je $L$schedule_192 + + + + + + + + + + +$L$schedule_128:: + mov esi,10 + +$L$oop_schedule_128:: + call _vpaes_schedule_round + dec rsi + jz $L$schedule_mangle_last + call _vpaes_schedule_mangle + jmp $L$oop_schedule_128 + + + + + + + + + + + + + + + + +ALIGN 16 +$L$schedule_192:: + movdqu xmm0,XMMWORD PTR[8+rdi] + call _vpaes_schedule_transform + movdqa xmm6,xmm0 + pxor xmm4,xmm4 + movhlps xmm6,xmm4 + mov esi,4 + +$L$oop_schedule_192:: + call _vpaes_schedule_round +DB 102,15,58,15,198,8 + call _vpaes_schedule_mangle + call _vpaes_schedule_192_smear + call _vpaes_schedule_mangle + call _vpaes_schedule_round + dec rsi + jz $L$schedule_mangle_last + call _vpaes_schedule_mangle + call _vpaes_schedule_192_smear + jmp $L$oop_schedule_192 + + + + + + + + + + + +ALIGN 16 +$L$schedule_256:: + movdqu xmm0,XMMWORD PTR[16+rdi] + call _vpaes_schedule_transform + mov esi,7 + +$L$oop_schedule_256:: + call _vpaes_schedule_mangle + movdqa xmm6,xmm0 + + + call _vpaes_schedule_round + dec rsi + jz $L$schedule_mangle_last + call _vpaes_schedule_mangle + + + pshufd xmm0,xmm0,0FFh + movdqa xmm5,xmm7 + movdqa xmm7,xmm6 + call _vpaes_schedule_low_round + movdqa xmm7,xmm5 + + jmp $L$oop_schedule_256 + + + + + + + + + + + + +ALIGN 16 +$L$schedule_mangle_last:: + + lea r11,QWORD PTR[$L$k_deskew] + test rcx,rcx + jnz $L$schedule_mangle_last_dec + + + movdqa xmm1,XMMWORD PTR[r10*1+r8] +DB 102,15,56,0,193 + lea r11,QWORD PTR[$L$k_opt] + add rdx,32 + +$L$schedule_mangle_last_dec:: + add rdx,-16 + pxor xmm0,XMMWORD PTR[$L$k_s63] + call _vpaes_schedule_transform + movdqu XMMWORD PTR[rdx],xmm0 + + + pxor xmm0,xmm0 + pxor xmm1,xmm1 + pxor xmm2,xmm2 + pxor xmm3,xmm3 + pxor xmm4,xmm4 + pxor xmm5,xmm5 + pxor xmm6,xmm6 + pxor xmm7,xmm7 + DB 0F3h,0C3h ;repret +_vpaes_schedule_core ENDP + + + + + + + + + + + + + + + + +ALIGN 16 +_vpaes_schedule_192_smear PROC PRIVATE + pshufd xmm1,xmm6,080h + pshufd xmm0,xmm7,0FEh + pxor xmm6,xmm1 + pxor xmm1,xmm1 + pxor xmm6,xmm0 + movdqa xmm0,xmm6 + movhlps xmm6,xmm1 + DB 0F3h,0C3h ;repret +_vpaes_schedule_192_smear ENDP + + + + + + + + + + + + + + + + + + + + +ALIGN 16 +_vpaes_schedule_round PROC PRIVATE + + pxor xmm1,xmm1 +DB 102,65,15,58,15,200,15 +DB 102,69,15,58,15,192,15 + pxor xmm7,xmm1 + + + pshufd xmm0,xmm0,0FFh +DB 102,15,58,15,192,1 + + + + +_vpaes_schedule_low_round:: + + movdqa xmm1,xmm7 + pslldq xmm7,4 + pxor xmm7,xmm1 + movdqa xmm1,xmm7 + pslldq xmm7,8 + pxor xmm7,xmm1 + pxor xmm7,XMMWORD PTR[$L$k_s63] + + + movdqa xmm1,xmm9 + pandn xmm1,xmm0 + psrld xmm1,4 + pand xmm0,xmm9 + movdqa xmm2,xmm11 +DB 102,15,56,0,208 + pxor xmm0,xmm1 + movdqa xmm3,xmm10 +DB 102,15,56,0,217 + pxor xmm3,xmm2 + movdqa xmm4,xmm10 +DB 102,15,56,0,224 + pxor xmm4,xmm2 + movdqa xmm2,xmm10 +DB 102,15,56,0,211 + pxor xmm2,xmm0 + movdqa xmm3,xmm10 +DB 102,15,56,0,220 + pxor xmm3,xmm1 + movdqa xmm4,xmm13 +DB 102,15,56,0,226 + movdqa xmm0,xmm12 +DB 102,15,56,0,195 + pxor xmm0,xmm4 + + + pxor xmm0,xmm7 + movdqa xmm7,xmm0 + DB 0F3h,0C3h ;repret +_vpaes_schedule_round ENDP + + + + + + + + + + + +ALIGN 16 +_vpaes_schedule_transform PROC PRIVATE + movdqa xmm1,xmm9 + pandn xmm1,xmm0 + psrld xmm1,4 + pand xmm0,xmm9 + movdqa xmm2,XMMWORD PTR[r11] +DB 102,15,56,0,208 + movdqa xmm0,XMMWORD PTR[16+r11] +DB 102,15,56,0,193 + pxor xmm0,xmm2 + DB 0F3h,0C3h ;repret +_vpaes_schedule_transform ENDP + + + + + + + + + + + + + + + + + + + + + + + + + +ALIGN 16 +_vpaes_schedule_mangle PROC PRIVATE + movdqa xmm4,xmm0 + movdqa xmm5,XMMWORD PTR[$L$k_mc_forward] + test rcx,rcx + jnz $L$schedule_mangle_dec + + + add rdx,16 + pxor xmm4,XMMWORD PTR[$L$k_s63] +DB 102,15,56,0,229 + movdqa xmm3,xmm4 +DB 102,15,56,0,229 + pxor xmm3,xmm4 +DB 102,15,56,0,229 + pxor xmm3,xmm4 + + jmp $L$schedule_mangle_both +ALIGN 16 +$L$schedule_mangle_dec:: + + lea r11,QWORD PTR[$L$k_dksd] + movdqa xmm1,xmm9 + pandn xmm1,xmm4 + psrld xmm1,4 + pand xmm4,xmm9 + + movdqa xmm2,XMMWORD PTR[r11] +DB 102,15,56,0,212 + movdqa xmm3,XMMWORD PTR[16+r11] +DB 102,15,56,0,217 + pxor xmm3,xmm2 +DB 102,15,56,0,221 + + movdqa xmm2,XMMWORD PTR[32+r11] +DB 102,15,56,0,212 + pxor xmm2,xmm3 + movdqa xmm3,XMMWORD PTR[48+r11] +DB 102,15,56,0,217 + pxor xmm3,xmm2 +DB 102,15,56,0,221 + + movdqa xmm2,XMMWORD PTR[64+r11] +DB 102,15,56,0,212 + pxor xmm2,xmm3 + movdqa xmm3,XMMWORD PTR[80+r11] +DB 102,15,56,0,217 + pxor xmm3,xmm2 +DB 102,15,56,0,221 + + movdqa xmm2,XMMWORD PTR[96+r11] +DB 102,15,56,0,212 + pxor xmm2,xmm3 + movdqa xmm3,XMMWORD PTR[112+r11] +DB 102,15,56,0,217 + pxor xmm3,xmm2 + + add rdx,-16 + +$L$schedule_mangle_both:: + movdqa xmm1,XMMWORD PTR[r10*1+r8] +DB 102,15,56,0,217 + add r8,-16 + and r8,030h + movdqu XMMWORD PTR[rdx],xmm3 + DB 0F3h,0C3h ;repret +_vpaes_schedule_mangle ENDP + + + + +PUBLIC vpaes_set_encrypt_key + +ALIGN 16 +vpaes_set_encrypt_key PROC PUBLIC + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_vpaes_set_encrypt_key:: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + + + lea rsp,QWORD PTR[((-184))+rsp] + movaps XMMWORD PTR[16+rsp],xmm6 + movaps XMMWORD PTR[32+rsp],xmm7 + movaps XMMWORD PTR[48+rsp],xmm8 + movaps XMMWORD PTR[64+rsp],xmm9 + movaps XMMWORD PTR[80+rsp],xmm10 + movaps XMMWORD PTR[96+rsp],xmm11 + movaps XMMWORD PTR[112+rsp],xmm12 + movaps XMMWORD PTR[128+rsp],xmm13 + movaps XMMWORD PTR[144+rsp],xmm14 + movaps XMMWORD PTR[160+rsp],xmm15 +$L$enc_key_body:: + mov eax,esi + shr eax,5 + add eax,5 + mov DWORD PTR[240+rdx],eax + + mov ecx,0 + mov r8d,030h + call _vpaes_schedule_core + movaps xmm6,XMMWORD PTR[16+rsp] + movaps xmm7,XMMWORD PTR[32+rsp] + movaps xmm8,XMMWORD PTR[48+rsp] + movaps xmm9,XMMWORD PTR[64+rsp] + movaps xmm10,XMMWORD PTR[80+rsp] + movaps xmm11,XMMWORD PTR[96+rsp] + movaps xmm12,XMMWORD PTR[112+rsp] + movaps xmm13,XMMWORD PTR[128+rsp] + movaps xmm14,XMMWORD PTR[144+rsp] + movaps xmm15,XMMWORD PTR[160+rsp] + lea rsp,QWORD PTR[184+rsp] +$L$enc_key_epilogue:: + xor eax,eax + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_vpaes_set_encrypt_key:: +vpaes_set_encrypt_key ENDP + +PUBLIC vpaes_set_decrypt_key + +ALIGN 16 +vpaes_set_decrypt_key PROC PUBLIC + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_vpaes_set_decrypt_key:: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + + + lea rsp,QWORD PTR[((-184))+rsp] + movaps XMMWORD PTR[16+rsp],xmm6 + movaps XMMWORD PTR[32+rsp],xmm7 + movaps XMMWORD PTR[48+rsp],xmm8 + movaps XMMWORD PTR[64+rsp],xmm9 + movaps XMMWORD PTR[80+rsp],xmm10 + movaps XMMWORD PTR[96+rsp],xmm11 + movaps XMMWORD PTR[112+rsp],xmm12 + movaps XMMWORD PTR[128+rsp],xmm13 + movaps XMMWORD PTR[144+rsp],xmm14 + movaps XMMWORD PTR[160+rsp],xmm15 +$L$dec_key_body:: + mov eax,esi + shr eax,5 + add eax,5 + mov DWORD PTR[240+rdx],eax + shl eax,4 + lea rdx,QWORD PTR[16+rax*1+rdx] + + mov ecx,1 + mov r8d,esi + shr r8d,1 + and r8d,32 + xor r8d,32 + call _vpaes_schedule_core + movaps xmm6,XMMWORD PTR[16+rsp] + movaps xmm7,XMMWORD PTR[32+rsp] + movaps xmm8,XMMWORD PTR[48+rsp] + movaps xmm9,XMMWORD PTR[64+rsp] + movaps xmm10,XMMWORD PTR[80+rsp] + movaps xmm11,XMMWORD PTR[96+rsp] + movaps xmm12,XMMWORD PTR[112+rsp] + movaps xmm13,XMMWORD PTR[128+rsp] + movaps xmm14,XMMWORD PTR[144+rsp] + movaps xmm15,XMMWORD PTR[160+rsp] + lea rsp,QWORD PTR[184+rsp] +$L$dec_key_epilogue:: + xor eax,eax + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_vpaes_set_decrypt_key:: +vpaes_set_decrypt_key ENDP + +PUBLIC vpaes_encrypt + +ALIGN 16 +vpaes_encrypt PROC PUBLIC + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_vpaes_encrypt:: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + + + lea rsp,QWORD PTR[((-184))+rsp] + movaps XMMWORD PTR[16+rsp],xmm6 + movaps XMMWORD PTR[32+rsp],xmm7 + movaps XMMWORD PTR[48+rsp],xmm8 + movaps XMMWORD PTR[64+rsp],xmm9 + movaps XMMWORD PTR[80+rsp],xmm10 + movaps XMMWORD PTR[96+rsp],xmm11 + movaps XMMWORD PTR[112+rsp],xmm12 + movaps XMMWORD PTR[128+rsp],xmm13 + movaps XMMWORD PTR[144+rsp],xmm14 + movaps XMMWORD PTR[160+rsp],xmm15 +$L$enc_body:: + movdqu xmm0,XMMWORD PTR[rdi] + call _vpaes_preheat + call _vpaes_encrypt_core + movdqu XMMWORD PTR[rsi],xmm0 + movaps xmm6,XMMWORD PTR[16+rsp] + movaps xmm7,XMMWORD PTR[32+rsp] + movaps xmm8,XMMWORD PTR[48+rsp] + movaps xmm9,XMMWORD PTR[64+rsp] + movaps xmm10,XMMWORD PTR[80+rsp] + movaps xmm11,XMMWORD PTR[96+rsp] + movaps xmm12,XMMWORD PTR[112+rsp] + movaps xmm13,XMMWORD PTR[128+rsp] + movaps xmm14,XMMWORD PTR[144+rsp] + movaps xmm15,XMMWORD PTR[160+rsp] + lea rsp,QWORD PTR[184+rsp] +$L$enc_epilogue:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_vpaes_encrypt:: +vpaes_encrypt ENDP + +PUBLIC vpaes_decrypt + +ALIGN 16 +vpaes_decrypt PROC PUBLIC + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_vpaes_decrypt:: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + + + lea rsp,QWORD PTR[((-184))+rsp] + movaps XMMWORD PTR[16+rsp],xmm6 + movaps XMMWORD PTR[32+rsp],xmm7 + movaps XMMWORD PTR[48+rsp],xmm8 + movaps XMMWORD PTR[64+rsp],xmm9 + movaps XMMWORD PTR[80+rsp],xmm10 + movaps XMMWORD PTR[96+rsp],xmm11 + movaps XMMWORD PTR[112+rsp],xmm12 + movaps XMMWORD PTR[128+rsp],xmm13 + movaps XMMWORD PTR[144+rsp],xmm14 + movaps XMMWORD PTR[160+rsp],xmm15 +$L$dec_body:: + movdqu xmm0,XMMWORD PTR[rdi] + call _vpaes_preheat + call _vpaes_decrypt_core + movdqu XMMWORD PTR[rsi],xmm0 + movaps xmm6,XMMWORD PTR[16+rsp] + movaps xmm7,XMMWORD PTR[32+rsp] + movaps xmm8,XMMWORD PTR[48+rsp] + movaps xmm9,XMMWORD PTR[64+rsp] + movaps xmm10,XMMWORD PTR[80+rsp] + movaps xmm11,XMMWORD PTR[96+rsp] + movaps xmm12,XMMWORD PTR[112+rsp] + movaps xmm13,XMMWORD PTR[128+rsp] + movaps xmm14,XMMWORD PTR[144+rsp] + movaps xmm15,XMMWORD PTR[160+rsp] + lea rsp,QWORD PTR[184+rsp] +$L$dec_epilogue:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_vpaes_decrypt:: +vpaes_decrypt ENDP +PUBLIC vpaes_cbc_encrypt + +ALIGN 16 +vpaes_cbc_encrypt PROC PUBLIC + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_vpaes_cbc_encrypt:: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + mov rcx,r9 + mov r8,QWORD PTR[40+rsp] + mov r9,QWORD PTR[48+rsp] + + + xchg rdx,rcx + sub rcx,16 + jc $L$cbc_abort + lea rsp,QWORD PTR[((-184))+rsp] + movaps XMMWORD PTR[16+rsp],xmm6 + movaps XMMWORD PTR[32+rsp],xmm7 + movaps XMMWORD PTR[48+rsp],xmm8 + movaps XMMWORD PTR[64+rsp],xmm9 + movaps XMMWORD PTR[80+rsp],xmm10 + movaps XMMWORD PTR[96+rsp],xmm11 + movaps XMMWORD PTR[112+rsp],xmm12 + movaps XMMWORD PTR[128+rsp],xmm13 + movaps XMMWORD PTR[144+rsp],xmm14 + movaps XMMWORD PTR[160+rsp],xmm15 +$L$cbc_body:: + movdqu xmm6,XMMWORD PTR[r8] + sub rsi,rdi + call _vpaes_preheat + cmp r9d,0 + je $L$cbc_dec_loop + jmp $L$cbc_enc_loop +ALIGN 16 +$L$cbc_enc_loop:: + movdqu xmm0,XMMWORD PTR[rdi] + pxor xmm0,xmm6 + call _vpaes_encrypt_core + movdqa xmm6,xmm0 + movdqu XMMWORD PTR[rdi*1+rsi],xmm0 + lea rdi,QWORD PTR[16+rdi] + sub rcx,16 + jnc $L$cbc_enc_loop + jmp $L$cbc_done +ALIGN 16 +$L$cbc_dec_loop:: + movdqu xmm0,XMMWORD PTR[rdi] + movdqa xmm7,xmm0 + call _vpaes_decrypt_core + pxor xmm0,xmm6 + movdqa xmm6,xmm7 + movdqu XMMWORD PTR[rdi*1+rsi],xmm0 + lea rdi,QWORD PTR[16+rdi] + sub rcx,16 + jnc $L$cbc_dec_loop +$L$cbc_done:: + movdqu XMMWORD PTR[r8],xmm6 + movaps xmm6,XMMWORD PTR[16+rsp] + movaps xmm7,XMMWORD PTR[32+rsp] + movaps xmm8,XMMWORD PTR[48+rsp] + movaps xmm9,XMMWORD PTR[64+rsp] + movaps xmm10,XMMWORD PTR[80+rsp] + movaps xmm11,XMMWORD PTR[96+rsp] + movaps xmm12,XMMWORD PTR[112+rsp] + movaps xmm13,XMMWORD PTR[128+rsp] + movaps xmm14,XMMWORD PTR[144+rsp] + movaps xmm15,XMMWORD PTR[160+rsp] + lea rsp,QWORD PTR[184+rsp] +$L$cbc_epilogue:: +$L$cbc_abort:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_vpaes_cbc_encrypt:: +vpaes_cbc_encrypt ENDP + + + + + + + +ALIGN 16 +_vpaes_preheat PROC PRIVATE + lea r10,QWORD PTR[$L$k_s0F] + movdqa xmm10,XMMWORD PTR[((-32))+r10] + movdqa xmm11,XMMWORD PTR[((-16))+r10] + movdqa xmm9,XMMWORD PTR[r10] + movdqa xmm13,XMMWORD PTR[48+r10] + movdqa xmm12,XMMWORD PTR[64+r10] + movdqa xmm15,XMMWORD PTR[80+r10] + movdqa xmm14,XMMWORD PTR[96+r10] + DB 0F3h,0C3h ;repret +_vpaes_preheat ENDP + + + + + + +ALIGN 64 +_vpaes_consts:: +$L$k_inv:: + DQ 00E05060F0D080180h,0040703090A0B0C02h + DQ 001040A060F0B0780h,0030D0E0C02050809h + +$L$k_s0F:: + DQ 00F0F0F0F0F0F0F0Fh,00F0F0F0F0F0F0F0Fh + +$L$k_ipt:: + DQ 0C2B2E8985A2A7000h,0CABAE09052227808h + DQ 04C01307D317C4D00h,0CD80B1FCB0FDCC81h + +$L$k_sb1:: + DQ 0B19BE18FCB503E00h,0A5DF7A6E142AF544h + DQ 03618D415FAE22300h,03BF7CCC10D2ED9EFh +$L$k_sb2:: + DQ 0E27A93C60B712400h,05EB7E955BC982FCDh + DQ 069EB88400AE12900h,0C2A163C8AB82234Ah +$L$k_sbo:: + DQ 0D0D26D176FBDC700h,015AABF7AC502A878h + DQ 0CFE474A55FBB6A00h,08E1E90D1412B35FAh + +$L$k_mc_forward:: + DQ 00407060500030201h,00C0F0E0D080B0A09h + DQ 0080B0A0904070605h,0000302010C0F0E0Dh + DQ 00C0F0E0D080B0A09h,00407060500030201h + DQ 0000302010C0F0E0Dh,0080B0A0904070605h + +$L$k_mc_backward:: + DQ 00605040702010003h,00E0D0C0F0A09080Bh + DQ 0020100030E0D0C0Fh,00A09080B06050407h + DQ 00E0D0C0F0A09080Bh,00605040702010003h + DQ 00A09080B06050407h,0020100030E0D0C0Fh + +$L$k_sr:: + DQ 00706050403020100h,00F0E0D0C0B0A0908h + DQ 0030E09040F0A0500h,00B06010C07020D08h + DQ 00F060D040B020900h,0070E050C030A0108h + DQ 00B0E0104070A0D00h,00306090C0F020508h + +$L$k_rcon:: + DQ 01F8391B9AF9DEEB6h,0702A98084D7C7D81h + +$L$k_s63:: + DQ 05B5B5B5B5B5B5B5Bh,05B5B5B5B5B5B5B5Bh + +$L$k_opt:: + DQ 0FF9F4929D6B66000h,0F7974121DEBE6808h + DQ 001EDBD5150BCEC00h,0E10D5DB1B05C0CE0h + +$L$k_deskew:: + DQ 007E4A34047A4E300h,01DFEB95A5DBEF91Ah + DQ 05F36B5DC83EA6900h,02841C2ABF49D1E77h + + + + + +$L$k_dksd:: + DQ 0FEB91A5DA3E44700h,00740E3A45A1DBEF9h + DQ 041C277F4B5368300h,05FDC69EAAB289D1Eh +$L$k_dksb:: + DQ 09A4FCA1F8550D500h,003D653861CC94C99h + DQ 0115BEDA7B6FC4A00h,0D993256F7E3482C8h +$L$k_dkse:: + DQ 0D5031CCA1FC9D600h,053859A4C994F5086h + DQ 0A23196054FDC7BE8h,0CD5EF96A20B31487h +$L$k_dks9:: + DQ 0B6116FC87ED9A700h,04AED933482255BFCh + DQ 04576516227143300h,08BB89FACE9DAFDCEh + + + + + +$L$k_dipt:: + DQ 00F505B040B545F00h,0154A411E114E451Ah + DQ 086E383E660056500h,012771772F491F194h + +$L$k_dsb9:: + DQ 0851C03539A86D600h,0CAD51F504F994CC9h + DQ 0C03B1789ECD74900h,0725E2C9EB2FBA565h +$L$k_dsbd:: + DQ 07D57CCDFE6B1A200h,0F56E9B13882A4439h + DQ 03CE2FAF724C6CB00h,02931180D15DEEFD3h +$L$k_dsbb:: + DQ 0D022649296B44200h,0602646F6B0F2D404h + DQ 0C19498A6CD596700h,0F3FF0C3E3255AA6Bh +$L$k_dsbe:: + DQ 046F2929626D4D000h,02242600464B4F6B0h + DQ 00C55A6CDFFAAC100h,09467F36B98593E32h +$L$k_dsbo:: + DQ 01387EA537EF94000h,0C7AA6DB9D4943E2Dh + DQ 012D7560F93441D00h,0CA4B8159D8C58E9Ch +DB 86,101,99,116,111,114,32,80,101,114,109,117,116,97,116,105 +DB 111,110,32,65,69,83,32,102,111,114,32,120,56,54,95,54 +DB 52,47,83,83,83,69,51,44,32,77,105,107,101,32,72,97 +DB 109,98,117,114,103,32,40,83,116,97,110,102,111,114,100,32 +DB 85,110,105,118,101,114,115,105,116,121,41,0 +ALIGN 64 + +EXTERN __imp_RtlVirtualUnwind:NEAR + +ALIGN 16 +se_handler PROC PRIVATE + push rsi + push rdi + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + pushfq + sub rsp,64 + + mov rax,QWORD PTR[120+r8] + mov rbx,QWORD PTR[248+r8] + + mov rsi,QWORD PTR[8+r9] + mov r11,QWORD PTR[56+r9] + + mov r10d,DWORD PTR[r11] + lea r10,QWORD PTR[r10*1+rsi] + cmp rbx,r10 + jb $L$in_prologue + + mov rax,QWORD PTR[152+r8] + + mov r10d,DWORD PTR[4+r11] + lea r10,QWORD PTR[r10*1+rsi] + cmp rbx,r10 + jae $L$in_prologue + + lea rsi,QWORD PTR[16+rax] + lea rdi,QWORD PTR[512+r8] + mov ecx,20 + DD 0a548f3fch + lea rax,QWORD PTR[184+rax] + +$L$in_prologue:: + mov rdi,QWORD PTR[8+rax] + mov rsi,QWORD PTR[16+rax] + mov QWORD PTR[152+r8],rax + mov QWORD PTR[168+r8],rsi + mov QWORD PTR[176+r8],rdi + + mov rdi,QWORD PTR[40+r9] + mov rsi,r8 + mov ecx,154 + DD 0a548f3fch + + mov rsi,r9 + xor rcx,rcx + mov rdx,QWORD PTR[8+rsi] + mov r8,QWORD PTR[rsi] + mov r9,QWORD PTR[16+rsi] + mov r10,QWORD PTR[40+rsi] + lea r11,QWORD PTR[56+rsi] + lea r12,QWORD PTR[24+rsi] + mov QWORD PTR[32+rsp],r10 + mov QWORD PTR[40+rsp],r11 + mov QWORD PTR[48+rsp],r12 + mov QWORD PTR[56+rsp],rcx + call QWORD PTR[__imp_RtlVirtualUnwind] + + mov eax,1 + add rsp,64 + popfq + pop r15 + pop r14 + pop r13 + pop r12 + pop rbp + pop rbx + pop rdi + pop rsi + DB 0F3h,0C3h ;repret +se_handler ENDP + +.text$ ENDS +.pdata SEGMENT READONLY ALIGN(4) +ALIGN 4 + DD imagerel $L$SEH_begin_vpaes_set_encrypt_key + DD imagerel $L$SEH_end_vpaes_set_encrypt_key + DD imagerel $L$SEH_info_vpaes_set_encrypt_key + + DD imagerel $L$SEH_begin_vpaes_set_decrypt_key + DD imagerel $L$SEH_end_vpaes_set_decrypt_key + DD imagerel $L$SEH_info_vpaes_set_decrypt_key + + DD imagerel $L$SEH_begin_vpaes_encrypt + DD imagerel $L$SEH_end_vpaes_encrypt + DD imagerel $L$SEH_info_vpaes_encrypt + + DD imagerel $L$SEH_begin_vpaes_decrypt + DD imagerel $L$SEH_end_vpaes_decrypt + DD imagerel $L$SEH_info_vpaes_decrypt + + DD imagerel $L$SEH_begin_vpaes_cbc_encrypt + DD imagerel $L$SEH_end_vpaes_cbc_encrypt + DD imagerel $L$SEH_info_vpaes_cbc_encrypt + +.pdata ENDS +.xdata SEGMENT READONLY ALIGN(8) +ALIGN 8 +$L$SEH_info_vpaes_set_encrypt_key:: +DB 9,0,0,0 + DD imagerel se_handler + DD imagerel $L$enc_key_body,imagerel $L$enc_key_epilogue +$L$SEH_info_vpaes_set_decrypt_key:: +DB 9,0,0,0 + DD imagerel se_handler + DD imagerel $L$dec_key_body,imagerel $L$dec_key_epilogue +$L$SEH_info_vpaes_encrypt:: +DB 9,0,0,0 + DD imagerel se_handler + DD imagerel $L$enc_body,imagerel $L$enc_epilogue +$L$SEH_info_vpaes_decrypt:: +DB 9,0,0,0 + DD imagerel se_handler + DD imagerel $L$dec_body,imagerel $L$dec_epilogue +$L$SEH_info_vpaes_cbc_encrypt:: +DB 9,0,0,0 + DD imagerel se_handler + DD imagerel $L$cbc_body,imagerel $L$cbc_epilogue + +.xdata ENDS +END diff --git a/deps/openssl/asm_obsolete/x64-win32-masm/bn/rsaz-avx2.asm b/deps/openssl/asm_obsolete/x64-win32-masm/bn/rsaz-avx2.asm new file mode 100644 index 00000000000000..f9188f510e722a --- /dev/null +++ b/deps/openssl/asm_obsolete/x64-win32-masm/bn/rsaz-avx2.asm @@ -0,0 +1,29 @@ +OPTION DOTNAME +.text$ SEGMENT ALIGN(256) 'CODE' + +PUBLIC rsaz_avx2_eligible + +rsaz_avx2_eligible PROC PUBLIC + xor eax,eax + DB 0F3h,0C3h ;repret +rsaz_avx2_eligible ENDP + +PUBLIC rsaz_1024_sqr_avx2 +PUBLIC rsaz_1024_mul_avx2 +PUBLIC rsaz_1024_norm2red_avx2 +PUBLIC rsaz_1024_red2norm_avx2 +PUBLIC rsaz_1024_scatter5_avx2 +PUBLIC rsaz_1024_gather5_avx2 + +rsaz_1024_sqr_avx2 PROC PUBLIC +rsaz_1024_mul_avx2:: +rsaz_1024_norm2red_avx2:: +rsaz_1024_red2norm_avx2:: +rsaz_1024_scatter5_avx2:: +rsaz_1024_gather5_avx2:: +DB 00fh,00bh + DB 0F3h,0C3h ;repret +rsaz_1024_sqr_avx2 ENDP + +.text$ ENDS +END diff --git a/deps/openssl/asm_obsolete/x64-win32-masm/bn/rsaz-x86_64.asm b/deps/openssl/asm_obsolete/x64-win32-masm/bn/rsaz-x86_64.asm new file mode 100644 index 00000000000000..86e828d3dc651b --- /dev/null +++ b/deps/openssl/asm_obsolete/x64-win32-masm/bn/rsaz-x86_64.asm @@ -0,0 +1,1326 @@ +OPTION DOTNAME +.text$ SEGMENT ALIGN(256) 'CODE' + +EXTERN OPENSSL_ia32cap_P:NEAR + +PUBLIC rsaz_512_sqr + +ALIGN 32 +rsaz_512_sqr PROC PUBLIC + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_rsaz_512_sqr:: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + mov rcx,r9 + mov r8,QWORD PTR[40+rsp] + + + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + + sub rsp,128+24 +$L$sqr_body:: + mov rbp,rdx + mov rdx,QWORD PTR[rsi] + mov rax,QWORD PTR[8+rsi] + mov QWORD PTR[128+rsp],rcx + jmp $L$oop_sqr + +ALIGN 32 +$L$oop_sqr:: + mov DWORD PTR[((128+8))+rsp],r8d + + mov rbx,rdx + mul rdx + mov r8,rax + mov rax,QWORD PTR[16+rsi] + mov r9,rdx + + mul rbx + add r9,rax + mov rax,QWORD PTR[24+rsi] + mov r10,rdx + adc r10,0 + + mul rbx + add r10,rax + mov rax,QWORD PTR[32+rsi] + mov r11,rdx + adc r11,0 + + mul rbx + add r11,rax + mov rax,QWORD PTR[40+rsi] + mov r12,rdx + adc r12,0 + + mul rbx + add r12,rax + mov rax,QWORD PTR[48+rsi] + mov r13,rdx + adc r13,0 + + mul rbx + add r13,rax + mov rax,QWORD PTR[56+rsi] + mov r14,rdx + adc r14,0 + + mul rbx + add r14,rax + mov rax,rbx + mov r15,rdx + adc r15,0 + + add r8,r8 + mov rcx,r9 + adc r9,r9 + + mul rax + mov QWORD PTR[rsp],rax + add r8,rdx + adc r9,0 + + mov QWORD PTR[8+rsp],r8 + shr rcx,63 + + + mov r8,QWORD PTR[8+rsi] + mov rax,QWORD PTR[16+rsi] + mul r8 + add r10,rax + mov rax,QWORD PTR[24+rsi] + mov rbx,rdx + adc rbx,0 + + mul r8 + add r11,rax + mov rax,QWORD PTR[32+rsi] + adc rdx,0 + add r11,rbx + mov rbx,rdx + adc rbx,0 + + mul r8 + add r12,rax + mov rax,QWORD PTR[40+rsi] + adc rdx,0 + add r12,rbx + mov rbx,rdx + adc rbx,0 + + mul r8 + add r13,rax + mov rax,QWORD PTR[48+rsi] + adc rdx,0 + add r13,rbx + mov rbx,rdx + adc rbx,0 + + mul r8 + add r14,rax + mov rax,QWORD PTR[56+rsi] + adc rdx,0 + add r14,rbx + mov rbx,rdx + adc rbx,0 + + mul r8 + add r15,rax + mov rax,r8 + adc rdx,0 + add r15,rbx + mov r8,rdx + mov rdx,r10 + adc r8,0 + + add rdx,rdx + lea r10,QWORD PTR[r10*2+rcx] + mov rbx,r11 + adc r11,r11 + + mul rax + add r9,rax + adc r10,rdx + adc r11,0 + + mov QWORD PTR[16+rsp],r9 + mov QWORD PTR[24+rsp],r10 + shr rbx,63 + + + mov r9,QWORD PTR[16+rsi] + mov rax,QWORD PTR[24+rsi] + mul r9 + add r12,rax + mov rax,QWORD PTR[32+rsi] + mov rcx,rdx + adc rcx,0 + + mul r9 + add r13,rax + mov rax,QWORD PTR[40+rsi] + adc rdx,0 + add r13,rcx + mov rcx,rdx + adc rcx,0 + + mul r9 + add r14,rax + mov rax,QWORD PTR[48+rsi] + adc rdx,0 + add r14,rcx + mov rcx,rdx + adc rcx,0 + + mul r9 + mov r10,r12 + lea r12,QWORD PTR[r12*2+rbx] + add r15,rax + mov rax,QWORD PTR[56+rsi] + adc rdx,0 + add r15,rcx + mov rcx,rdx + adc rcx,0 + + mul r9 + shr r10,63 + add r8,rax + mov rax,r9 + adc rdx,0 + add r8,rcx + mov r9,rdx + adc r9,0 + + mov rcx,r13 + lea r13,QWORD PTR[r13*2+r10] + + mul rax + add r11,rax + adc r12,rdx + adc r13,0 + + mov QWORD PTR[32+rsp],r11 + mov QWORD PTR[40+rsp],r12 + shr rcx,63 + + + mov r10,QWORD PTR[24+rsi] + mov rax,QWORD PTR[32+rsi] + mul r10 + add r14,rax + mov rax,QWORD PTR[40+rsi] + mov rbx,rdx + adc rbx,0 + + mul r10 + add r15,rax + mov rax,QWORD PTR[48+rsi] + adc rdx,0 + add r15,rbx + mov rbx,rdx + adc rbx,0 + + mul r10 + mov r12,r14 + lea r14,QWORD PTR[r14*2+rcx] + add r8,rax + mov rax,QWORD PTR[56+rsi] + adc rdx,0 + add r8,rbx + mov rbx,rdx + adc rbx,0 + + mul r10 + shr r12,63 + add r9,rax + mov rax,r10 + adc rdx,0 + add r9,rbx + mov r10,rdx + adc r10,0 + + mov rbx,r15 + lea r15,QWORD PTR[r15*2+r12] + + mul rax + add r13,rax + adc r14,rdx + adc r15,0 + + mov QWORD PTR[48+rsp],r13 + mov QWORD PTR[56+rsp],r14 + shr rbx,63 + + + mov r11,QWORD PTR[32+rsi] + mov rax,QWORD PTR[40+rsi] + mul r11 + add r8,rax + mov rax,QWORD PTR[48+rsi] + mov rcx,rdx + adc rcx,0 + + mul r11 + add r9,rax + mov rax,QWORD PTR[56+rsi] + adc rdx,0 + mov r12,r8 + lea r8,QWORD PTR[r8*2+rbx] + add r9,rcx + mov rcx,rdx + adc rcx,0 + + mul r11 + shr r12,63 + add r10,rax + mov rax,r11 + adc rdx,0 + add r10,rcx + mov r11,rdx + adc r11,0 + + mov rcx,r9 + lea r9,QWORD PTR[r9*2+r12] + + mul rax + add r15,rax + adc r8,rdx + adc r9,0 + + mov QWORD PTR[64+rsp],r15 + mov QWORD PTR[72+rsp],r8 + shr rcx,63 + + + mov r12,QWORD PTR[40+rsi] + mov rax,QWORD PTR[48+rsi] + mul r12 + add r10,rax + mov rax,QWORD PTR[56+rsi] + mov rbx,rdx + adc rbx,0 + + mul r12 + add r11,rax + mov rax,r12 + mov r15,r10 + lea r10,QWORD PTR[r10*2+rcx] + adc rdx,0 + shr r15,63 + add r11,rbx + mov r12,rdx + adc r12,0 + + mov rbx,r11 + lea r11,QWORD PTR[r11*2+r15] + + mul rax + add r9,rax + adc r10,rdx + adc r11,0 + + mov QWORD PTR[80+rsp],r9 + mov QWORD PTR[88+rsp],r10 + + + mov r13,QWORD PTR[48+rsi] + mov rax,QWORD PTR[56+rsi] + mul r13 + add r12,rax + mov rax,r13 + mov r13,rdx + adc r13,0 + + xor r14,r14 + shl rbx,1 + adc r12,r12 + adc r13,r13 + adc r14,r14 + + mul rax + add r11,rax + adc r12,rdx + adc r13,0 + + mov QWORD PTR[96+rsp],r11 + mov QWORD PTR[104+rsp],r12 + + + mov rax,QWORD PTR[56+rsi] + mul rax + add r13,rax + adc rdx,0 + + add r14,rdx + + mov QWORD PTR[112+rsp],r13 + mov QWORD PTR[120+rsp],r14 + + mov r8,QWORD PTR[rsp] + mov r9,QWORD PTR[8+rsp] + mov r10,QWORD PTR[16+rsp] + mov r11,QWORD PTR[24+rsp] + mov r12,QWORD PTR[32+rsp] + mov r13,QWORD PTR[40+rsp] + mov r14,QWORD PTR[48+rsp] + mov r15,QWORD PTR[56+rsp] + + call __rsaz_512_reduce + + add r8,QWORD PTR[64+rsp] + adc r9,QWORD PTR[72+rsp] + adc r10,QWORD PTR[80+rsp] + adc r11,QWORD PTR[88+rsp] + adc r12,QWORD PTR[96+rsp] + adc r13,QWORD PTR[104+rsp] + adc r14,QWORD PTR[112+rsp] + adc r15,QWORD PTR[120+rsp] + sbb rcx,rcx + + call __rsaz_512_subtract + + mov rdx,r8 + mov rax,r9 + mov r8d,DWORD PTR[((128+8))+rsp] + mov rsi,rdi + + dec r8d + jnz $L$oop_sqr + + lea rax,QWORD PTR[((128+24+48))+rsp] + mov r15,QWORD PTR[((-48))+rax] + mov r14,QWORD PTR[((-40))+rax] + mov r13,QWORD PTR[((-32))+rax] + mov r12,QWORD PTR[((-24))+rax] + mov rbp,QWORD PTR[((-16))+rax] + mov rbx,QWORD PTR[((-8))+rax] + lea rsp,QWORD PTR[rax] +$L$sqr_epilogue:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_rsaz_512_sqr:: +rsaz_512_sqr ENDP +PUBLIC rsaz_512_mul + +ALIGN 32 +rsaz_512_mul PROC PUBLIC + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_rsaz_512_mul:: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + mov rcx,r9 + mov r8,QWORD PTR[40+rsp] + + + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + + sub rsp,128+24 +$L$mul_body:: +DB 102,72,15,110,199 +DB 102,72,15,110,201 + mov QWORD PTR[128+rsp],r8 + mov rbx,QWORD PTR[rdx] + mov rbp,rdx + call __rsaz_512_mul + +DB 102,72,15,126,199 +DB 102,72,15,126,205 + + mov r8,QWORD PTR[rsp] + mov r9,QWORD PTR[8+rsp] + mov r10,QWORD PTR[16+rsp] + mov r11,QWORD PTR[24+rsp] + mov r12,QWORD PTR[32+rsp] + mov r13,QWORD PTR[40+rsp] + mov r14,QWORD PTR[48+rsp] + mov r15,QWORD PTR[56+rsp] + + call __rsaz_512_reduce + add r8,QWORD PTR[64+rsp] + adc r9,QWORD PTR[72+rsp] + adc r10,QWORD PTR[80+rsp] + adc r11,QWORD PTR[88+rsp] + adc r12,QWORD PTR[96+rsp] + adc r13,QWORD PTR[104+rsp] + adc r14,QWORD PTR[112+rsp] + adc r15,QWORD PTR[120+rsp] + sbb rcx,rcx + + call __rsaz_512_subtract + + lea rax,QWORD PTR[((128+24+48))+rsp] + mov r15,QWORD PTR[((-48))+rax] + mov r14,QWORD PTR[((-40))+rax] + mov r13,QWORD PTR[((-32))+rax] + mov r12,QWORD PTR[((-24))+rax] + mov rbp,QWORD PTR[((-16))+rax] + mov rbx,QWORD PTR[((-8))+rax] + lea rsp,QWORD PTR[rax] +$L$mul_epilogue:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_rsaz_512_mul:: +rsaz_512_mul ENDP +PUBLIC rsaz_512_mul_gather4 + +ALIGN 32 +rsaz_512_mul_gather4 PROC PUBLIC + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_rsaz_512_mul_gather4:: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + mov rcx,r9 + mov r8,QWORD PTR[40+rsp] + mov r9,QWORD PTR[48+rsp] + + + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + + mov r9d,r9d + sub rsp,128+24 +$L$mul_gather4_body:: + mov eax,DWORD PTR[64+r9*4+rdx] +DB 102,72,15,110,199 + mov ebx,DWORD PTR[r9*4+rdx] +DB 102,72,15,110,201 + mov QWORD PTR[128+rsp],r8 + + shl rax,32 + or rbx,rax + mov rax,QWORD PTR[rsi] + mov rcx,QWORD PTR[8+rsi] + lea rbp,QWORD PTR[128+r9*4+rdx] + mul rbx + mov QWORD PTR[rsp],rax + mov rax,rcx + mov r8,rdx + + mul rbx + movd xmm4,DWORD PTR[rbp] + add r8,rax + mov rax,QWORD PTR[16+rsi] + mov r9,rdx + adc r9,0 + + mul rbx + movd xmm5,DWORD PTR[64+rbp] + add r9,rax + mov rax,QWORD PTR[24+rsi] + mov r10,rdx + adc r10,0 + + mul rbx + pslldq xmm5,4 + add r10,rax + mov rax,QWORD PTR[32+rsi] + mov r11,rdx + adc r11,0 + + mul rbx + por xmm4,xmm5 + add r11,rax + mov rax,QWORD PTR[40+rsi] + mov r12,rdx + adc r12,0 + + mul rbx + add r12,rax + mov rax,QWORD PTR[48+rsi] + mov r13,rdx + adc r13,0 + + mul rbx + lea rbp,QWORD PTR[128+rbp] + add r13,rax + mov rax,QWORD PTR[56+rsi] + mov r14,rdx + adc r14,0 + + mul rbx +DB 102,72,15,126,227 + add r14,rax + mov rax,QWORD PTR[rsi] + mov r15,rdx + adc r15,0 + + lea rdi,QWORD PTR[8+rsp] + mov ecx,7 + jmp $L$oop_mul_gather + +ALIGN 32 +$L$oop_mul_gather:: + mul rbx + add r8,rax + mov rax,QWORD PTR[8+rsi] + mov QWORD PTR[rdi],r8 + mov r8,rdx + adc r8,0 + + mul rbx + movd xmm4,DWORD PTR[rbp] + add r9,rax + mov rax,QWORD PTR[16+rsi] + adc rdx,0 + add r8,r9 + mov r9,rdx + adc r9,0 + + mul rbx + movd xmm5,DWORD PTR[64+rbp] + add r10,rax + mov rax,QWORD PTR[24+rsi] + adc rdx,0 + add r9,r10 + mov r10,rdx + adc r10,0 + + mul rbx + pslldq xmm5,4 + add r11,rax + mov rax,QWORD PTR[32+rsi] + adc rdx,0 + add r10,r11 + mov r11,rdx + adc r11,0 + + mul rbx + por xmm4,xmm5 + add r12,rax + mov rax,QWORD PTR[40+rsi] + adc rdx,0 + add r11,r12 + mov r12,rdx + adc r12,0 + + mul rbx + add r13,rax + mov rax,QWORD PTR[48+rsi] + adc rdx,0 + add r12,r13 + mov r13,rdx + adc r13,0 + + mul rbx + add r14,rax + mov rax,QWORD PTR[56+rsi] + adc rdx,0 + add r13,r14 + mov r14,rdx + adc r14,0 + + mul rbx +DB 102,72,15,126,227 + add r15,rax + mov rax,QWORD PTR[rsi] + adc rdx,0 + add r14,r15 + mov r15,rdx + adc r15,0 + + lea rbp,QWORD PTR[128+rbp] + lea rdi,QWORD PTR[8+rdi] + + dec ecx + jnz $L$oop_mul_gather + + mov QWORD PTR[rdi],r8 + mov QWORD PTR[8+rdi],r9 + mov QWORD PTR[16+rdi],r10 + mov QWORD PTR[24+rdi],r11 + mov QWORD PTR[32+rdi],r12 + mov QWORD PTR[40+rdi],r13 + mov QWORD PTR[48+rdi],r14 + mov QWORD PTR[56+rdi],r15 + +DB 102,72,15,126,199 +DB 102,72,15,126,205 + + mov r8,QWORD PTR[rsp] + mov r9,QWORD PTR[8+rsp] + mov r10,QWORD PTR[16+rsp] + mov r11,QWORD PTR[24+rsp] + mov r12,QWORD PTR[32+rsp] + mov r13,QWORD PTR[40+rsp] + mov r14,QWORD PTR[48+rsp] + mov r15,QWORD PTR[56+rsp] + + call __rsaz_512_reduce + add r8,QWORD PTR[64+rsp] + adc r9,QWORD PTR[72+rsp] + adc r10,QWORD PTR[80+rsp] + adc r11,QWORD PTR[88+rsp] + adc r12,QWORD PTR[96+rsp] + adc r13,QWORD PTR[104+rsp] + adc r14,QWORD PTR[112+rsp] + adc r15,QWORD PTR[120+rsp] + sbb rcx,rcx + + call __rsaz_512_subtract + + lea rax,QWORD PTR[((128+24+48))+rsp] + mov r15,QWORD PTR[((-48))+rax] + mov r14,QWORD PTR[((-40))+rax] + mov r13,QWORD PTR[((-32))+rax] + mov r12,QWORD PTR[((-24))+rax] + mov rbp,QWORD PTR[((-16))+rax] + mov rbx,QWORD PTR[((-8))+rax] + lea rsp,QWORD PTR[rax] +$L$mul_gather4_epilogue:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_rsaz_512_mul_gather4:: +rsaz_512_mul_gather4 ENDP +PUBLIC rsaz_512_mul_scatter4 + +ALIGN 32 +rsaz_512_mul_scatter4 PROC PUBLIC + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_rsaz_512_mul_scatter4:: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + mov rcx,r9 + mov r8,QWORD PTR[40+rsp] + mov r9,QWORD PTR[48+rsp] + + + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + + mov r9d,r9d + sub rsp,128+24 +$L$mul_scatter4_body:: + lea r8,QWORD PTR[r9*4+r8] +DB 102,72,15,110,199 +DB 102,72,15,110,202 +DB 102,73,15,110,208 + mov QWORD PTR[128+rsp],rcx + + mov rbp,rdi + mov rbx,QWORD PTR[rdi] + call __rsaz_512_mul + +DB 102,72,15,126,199 +DB 102,72,15,126,205 + + mov r8,QWORD PTR[rsp] + mov r9,QWORD PTR[8+rsp] + mov r10,QWORD PTR[16+rsp] + mov r11,QWORD PTR[24+rsp] + mov r12,QWORD PTR[32+rsp] + mov r13,QWORD PTR[40+rsp] + mov r14,QWORD PTR[48+rsp] + mov r15,QWORD PTR[56+rsp] + + call __rsaz_512_reduce + add r8,QWORD PTR[64+rsp] + adc r9,QWORD PTR[72+rsp] + adc r10,QWORD PTR[80+rsp] + adc r11,QWORD PTR[88+rsp] + adc r12,QWORD PTR[96+rsp] + adc r13,QWORD PTR[104+rsp] + adc r14,QWORD PTR[112+rsp] + adc r15,QWORD PTR[120+rsp] +DB 102,72,15,126,214 + sbb rcx,rcx + + call __rsaz_512_subtract + + mov DWORD PTR[rsi],r8d + shr r8,32 + mov DWORD PTR[128+rsi],r9d + shr r9,32 + mov DWORD PTR[256+rsi],r10d + shr r10,32 + mov DWORD PTR[384+rsi],r11d + shr r11,32 + mov DWORD PTR[512+rsi],r12d + shr r12,32 + mov DWORD PTR[640+rsi],r13d + shr r13,32 + mov DWORD PTR[768+rsi],r14d + shr r14,32 + mov DWORD PTR[896+rsi],r15d + shr r15,32 + mov DWORD PTR[64+rsi],r8d + mov DWORD PTR[192+rsi],r9d + mov DWORD PTR[320+rsi],r10d + mov DWORD PTR[448+rsi],r11d + mov DWORD PTR[576+rsi],r12d + mov DWORD PTR[704+rsi],r13d + mov DWORD PTR[832+rsi],r14d + mov DWORD PTR[960+rsi],r15d + + lea rax,QWORD PTR[((128+24+48))+rsp] + mov r15,QWORD PTR[((-48))+rax] + mov r14,QWORD PTR[((-40))+rax] + mov r13,QWORD PTR[((-32))+rax] + mov r12,QWORD PTR[((-24))+rax] + mov rbp,QWORD PTR[((-16))+rax] + mov rbx,QWORD PTR[((-8))+rax] + lea rsp,QWORD PTR[rax] +$L$mul_scatter4_epilogue:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_rsaz_512_mul_scatter4:: +rsaz_512_mul_scatter4 ENDP +PUBLIC rsaz_512_mul_by_one + +ALIGN 32 +rsaz_512_mul_by_one PROC PUBLIC + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_rsaz_512_mul_by_one:: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + mov rcx,r9 + + + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + + sub rsp,128+24 +$L$mul_by_one_body:: + mov rbp,rdx + mov QWORD PTR[128+rsp],rcx + + mov r8,QWORD PTR[rsi] + pxor xmm0,xmm0 + mov r9,QWORD PTR[8+rsi] + mov r10,QWORD PTR[16+rsi] + mov r11,QWORD PTR[24+rsi] + mov r12,QWORD PTR[32+rsi] + mov r13,QWORD PTR[40+rsi] + mov r14,QWORD PTR[48+rsi] + mov r15,QWORD PTR[56+rsi] + + movdqa XMMWORD PTR[rsp],xmm0 + movdqa XMMWORD PTR[16+rsp],xmm0 + movdqa XMMWORD PTR[32+rsp],xmm0 + movdqa XMMWORD PTR[48+rsp],xmm0 + movdqa XMMWORD PTR[64+rsp],xmm0 + movdqa XMMWORD PTR[80+rsp],xmm0 + movdqa XMMWORD PTR[96+rsp],xmm0 + call __rsaz_512_reduce + mov QWORD PTR[rdi],r8 + mov QWORD PTR[8+rdi],r9 + mov QWORD PTR[16+rdi],r10 + mov QWORD PTR[24+rdi],r11 + mov QWORD PTR[32+rdi],r12 + mov QWORD PTR[40+rdi],r13 + mov QWORD PTR[48+rdi],r14 + mov QWORD PTR[56+rdi],r15 + + lea rax,QWORD PTR[((128+24+48))+rsp] + mov r15,QWORD PTR[((-48))+rax] + mov r14,QWORD PTR[((-40))+rax] + mov r13,QWORD PTR[((-32))+rax] + mov r12,QWORD PTR[((-24))+rax] + mov rbp,QWORD PTR[((-16))+rax] + mov rbx,QWORD PTR[((-8))+rax] + lea rsp,QWORD PTR[rax] +$L$mul_by_one_epilogue:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_rsaz_512_mul_by_one:: +rsaz_512_mul_by_one ENDP + +ALIGN 32 +__rsaz_512_reduce PROC PRIVATE + mov rbx,r8 + imul rbx,QWORD PTR[((128+8))+rsp] + mov rax,QWORD PTR[rbp] + mov ecx,8 + jmp $L$reduction_loop + +ALIGN 32 +$L$reduction_loop:: + mul rbx + mov rax,QWORD PTR[8+rbp] + neg r8 + mov r8,rdx + adc r8,0 + + mul rbx + add r9,rax + mov rax,QWORD PTR[16+rbp] + adc rdx,0 + add r8,r9 + mov r9,rdx + adc r9,0 + + mul rbx + add r10,rax + mov rax,QWORD PTR[24+rbp] + adc rdx,0 + add r9,r10 + mov r10,rdx + adc r10,0 + + mul rbx + add r11,rax + mov rax,QWORD PTR[32+rbp] + adc rdx,0 + add r10,r11 + mov rsi,QWORD PTR[((128+8))+rsp] + + + adc rdx,0 + mov r11,rdx + + mul rbx + add r12,rax + mov rax,QWORD PTR[40+rbp] + adc rdx,0 + imul rsi,r8 + add r11,r12 + mov r12,rdx + adc r12,0 + + mul rbx + add r13,rax + mov rax,QWORD PTR[48+rbp] + adc rdx,0 + add r12,r13 + mov r13,rdx + adc r13,0 + + mul rbx + add r14,rax + mov rax,QWORD PTR[56+rbp] + adc rdx,0 + add r13,r14 + mov r14,rdx + adc r14,0 + + mul rbx + mov rbx,rsi + add r15,rax + mov rax,QWORD PTR[rbp] + adc rdx,0 + add r14,r15 + mov r15,rdx + adc r15,0 + + dec ecx + jne $L$reduction_loop + + DB 0F3h,0C3h ;repret +__rsaz_512_reduce ENDP + +ALIGN 32 +__rsaz_512_subtract PROC PRIVATE + mov QWORD PTR[rdi],r8 + mov QWORD PTR[8+rdi],r9 + mov QWORD PTR[16+rdi],r10 + mov QWORD PTR[24+rdi],r11 + mov QWORD PTR[32+rdi],r12 + mov QWORD PTR[40+rdi],r13 + mov QWORD PTR[48+rdi],r14 + mov QWORD PTR[56+rdi],r15 + + mov r8,QWORD PTR[rbp] + mov r9,QWORD PTR[8+rbp] + neg r8 + not r9 + and r8,rcx + mov r10,QWORD PTR[16+rbp] + and r9,rcx + not r10 + mov r11,QWORD PTR[24+rbp] + and r10,rcx + not r11 + mov r12,QWORD PTR[32+rbp] + and r11,rcx + not r12 + mov r13,QWORD PTR[40+rbp] + and r12,rcx + not r13 + mov r14,QWORD PTR[48+rbp] + and r13,rcx + not r14 + mov r15,QWORD PTR[56+rbp] + and r14,rcx + not r15 + and r15,rcx + + add r8,QWORD PTR[rdi] + adc r9,QWORD PTR[8+rdi] + adc r10,QWORD PTR[16+rdi] + adc r11,QWORD PTR[24+rdi] + adc r12,QWORD PTR[32+rdi] + adc r13,QWORD PTR[40+rdi] + adc r14,QWORD PTR[48+rdi] + adc r15,QWORD PTR[56+rdi] + + mov QWORD PTR[rdi],r8 + mov QWORD PTR[8+rdi],r9 + mov QWORD PTR[16+rdi],r10 + mov QWORD PTR[24+rdi],r11 + mov QWORD PTR[32+rdi],r12 + mov QWORD PTR[40+rdi],r13 + mov QWORD PTR[48+rdi],r14 + mov QWORD PTR[56+rdi],r15 + + DB 0F3h,0C3h ;repret +__rsaz_512_subtract ENDP + +ALIGN 32 +__rsaz_512_mul PROC PRIVATE + lea rdi,QWORD PTR[8+rsp] + + mov rax,QWORD PTR[rsi] + mul rbx + mov QWORD PTR[rdi],rax + mov rax,QWORD PTR[8+rsi] + mov r8,rdx + + mul rbx + add r8,rax + mov rax,QWORD PTR[16+rsi] + mov r9,rdx + adc r9,0 + + mul rbx + add r9,rax + mov rax,QWORD PTR[24+rsi] + mov r10,rdx + adc r10,0 + + mul rbx + add r10,rax + mov rax,QWORD PTR[32+rsi] + mov r11,rdx + adc r11,0 + + mul rbx + add r11,rax + mov rax,QWORD PTR[40+rsi] + mov r12,rdx + adc r12,0 + + mul rbx + add r12,rax + mov rax,QWORD PTR[48+rsi] + mov r13,rdx + adc r13,0 + + mul rbx + add r13,rax + mov rax,QWORD PTR[56+rsi] + mov r14,rdx + adc r14,0 + + mul rbx + add r14,rax + mov rax,QWORD PTR[rsi] + mov r15,rdx + adc r15,0 + + lea rbp,QWORD PTR[8+rbp] + lea rdi,QWORD PTR[8+rdi] + + mov ecx,7 + jmp $L$oop_mul + +ALIGN 32 +$L$oop_mul:: + mov rbx,QWORD PTR[rbp] + mul rbx + add r8,rax + mov rax,QWORD PTR[8+rsi] + mov QWORD PTR[rdi],r8 + mov r8,rdx + adc r8,0 + + mul rbx + add r9,rax + mov rax,QWORD PTR[16+rsi] + adc rdx,0 + add r8,r9 + mov r9,rdx + adc r9,0 + + mul rbx + add r10,rax + mov rax,QWORD PTR[24+rsi] + adc rdx,0 + add r9,r10 + mov r10,rdx + adc r10,0 + + mul rbx + add r11,rax + mov rax,QWORD PTR[32+rsi] + adc rdx,0 + add r10,r11 + mov r11,rdx + adc r11,0 + + mul rbx + add r12,rax + mov rax,QWORD PTR[40+rsi] + adc rdx,0 + add r11,r12 + mov r12,rdx + adc r12,0 + + mul rbx + add r13,rax + mov rax,QWORD PTR[48+rsi] + adc rdx,0 + add r12,r13 + mov r13,rdx + adc r13,0 + + mul rbx + add r14,rax + mov rax,QWORD PTR[56+rsi] + adc rdx,0 + add r13,r14 + mov r14,rdx + lea rbp,QWORD PTR[8+rbp] + adc r14,0 + + mul rbx + add r15,rax + mov rax,QWORD PTR[rsi] + adc rdx,0 + add r14,r15 + mov r15,rdx + adc r15,0 + + lea rdi,QWORD PTR[8+rdi] + + dec ecx + jnz $L$oop_mul + + mov QWORD PTR[rdi],r8 + mov QWORD PTR[8+rdi],r9 + mov QWORD PTR[16+rdi],r10 + mov QWORD PTR[24+rdi],r11 + mov QWORD PTR[32+rdi],r12 + mov QWORD PTR[40+rdi],r13 + mov QWORD PTR[48+rdi],r14 + mov QWORD PTR[56+rdi],r15 + + DB 0F3h,0C3h ;repret +__rsaz_512_mul ENDP +PUBLIC rsaz_512_scatter4 + +ALIGN 16 +rsaz_512_scatter4 PROC PUBLIC + lea rcx,QWORD PTR[r8*4+rcx] + mov r9d,8 + jmp $L$oop_scatter +ALIGN 16 +$L$oop_scatter:: + mov rax,QWORD PTR[rdx] + lea rdx,QWORD PTR[8+rdx] + mov DWORD PTR[rcx],eax + shr rax,32 + mov DWORD PTR[64+rcx],eax + lea rcx,QWORD PTR[128+rcx] + dec r9d + jnz $L$oop_scatter + DB 0F3h,0C3h ;repret +rsaz_512_scatter4 ENDP + +PUBLIC rsaz_512_gather4 + +ALIGN 16 +rsaz_512_gather4 PROC PUBLIC + lea rdx,QWORD PTR[r8*4+rdx] + mov r9d,8 + jmp $L$oop_gather +ALIGN 16 +$L$oop_gather:: + mov eax,DWORD PTR[rdx] + mov r8d,DWORD PTR[64+rdx] + lea rdx,QWORD PTR[128+rdx] + shl r8,32 + or rax,r8 + mov QWORD PTR[rcx],rax + lea rcx,QWORD PTR[8+rcx] + dec r9d + jnz $L$oop_gather + DB 0F3h,0C3h ;repret +rsaz_512_gather4 ENDP +EXTERN __imp_RtlVirtualUnwind:NEAR + +ALIGN 16 +se_handler PROC PRIVATE + push rsi + push rdi + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + pushfq + sub rsp,64 + + mov rax,QWORD PTR[120+r8] + mov rbx,QWORD PTR[248+r8] + + mov rsi,QWORD PTR[8+r9] + mov r11,QWORD PTR[56+r9] + + mov r10d,DWORD PTR[r11] + lea r10,QWORD PTR[r10*1+rsi] + cmp rbx,r10 + jb $L$common_seh_tail + + mov rax,QWORD PTR[152+r8] + + mov r10d,DWORD PTR[4+r11] + lea r10,QWORD PTR[r10*1+rsi] + cmp rbx,r10 + jae $L$common_seh_tail + + lea rax,QWORD PTR[((128+24+48))+rax] + + mov rbx,QWORD PTR[((-8))+rax] + mov rbp,QWORD PTR[((-16))+rax] + mov r12,QWORD PTR[((-24))+rax] + mov r13,QWORD PTR[((-32))+rax] + mov r14,QWORD PTR[((-40))+rax] + mov r15,QWORD PTR[((-48))+rax] + mov QWORD PTR[144+r8],rbx + mov QWORD PTR[160+r8],rbp + mov QWORD PTR[216+r8],r12 + mov QWORD PTR[224+r8],r13 + mov QWORD PTR[232+r8],r14 + mov QWORD PTR[240+r8],r15 + +$L$common_seh_tail:: + mov rdi,QWORD PTR[8+rax] + mov rsi,QWORD PTR[16+rax] + mov QWORD PTR[152+r8],rax + mov QWORD PTR[168+r8],rsi + mov QWORD PTR[176+r8],rdi + + mov rdi,QWORD PTR[40+r9] + mov rsi,r8 + mov ecx,154 + DD 0a548f3fch + + mov rsi,r9 + xor rcx,rcx + mov rdx,QWORD PTR[8+rsi] + mov r8,QWORD PTR[rsi] + mov r9,QWORD PTR[16+rsi] + mov r10,QWORD PTR[40+rsi] + lea r11,QWORD PTR[56+rsi] + lea r12,QWORD PTR[24+rsi] + mov QWORD PTR[32+rsp],r10 + mov QWORD PTR[40+rsp],r11 + mov QWORD PTR[48+rsp],r12 + mov QWORD PTR[56+rsp],rcx + call QWORD PTR[__imp_RtlVirtualUnwind] + + mov eax,1 + add rsp,64 + popfq + pop r15 + pop r14 + pop r13 + pop r12 + pop rbp + pop rbx + pop rdi + pop rsi + DB 0F3h,0C3h ;repret +se_handler ENDP + +.text$ ENDS +.pdata SEGMENT READONLY ALIGN(4) +ALIGN 4 + DD imagerel $L$SEH_begin_rsaz_512_sqr + DD imagerel $L$SEH_end_rsaz_512_sqr + DD imagerel $L$SEH_info_rsaz_512_sqr + + DD imagerel $L$SEH_begin_rsaz_512_mul + DD imagerel $L$SEH_end_rsaz_512_mul + DD imagerel $L$SEH_info_rsaz_512_mul + + DD imagerel $L$SEH_begin_rsaz_512_mul_gather4 + DD imagerel $L$SEH_end_rsaz_512_mul_gather4 + DD imagerel $L$SEH_info_rsaz_512_mul_gather4 + + DD imagerel $L$SEH_begin_rsaz_512_mul_scatter4 + DD imagerel $L$SEH_end_rsaz_512_mul_scatter4 + DD imagerel $L$SEH_info_rsaz_512_mul_scatter4 + + DD imagerel $L$SEH_begin_rsaz_512_mul_by_one + DD imagerel $L$SEH_end_rsaz_512_mul_by_one + DD imagerel $L$SEH_info_rsaz_512_mul_by_one + +.pdata ENDS +.xdata SEGMENT READONLY ALIGN(8) +ALIGN 8 +$L$SEH_info_rsaz_512_sqr:: +DB 9,0,0,0 + DD imagerel se_handler + DD imagerel $L$sqr_body,imagerel $L$sqr_epilogue +$L$SEH_info_rsaz_512_mul:: +DB 9,0,0,0 + DD imagerel se_handler + DD imagerel $L$mul_body,imagerel $L$mul_epilogue +$L$SEH_info_rsaz_512_mul_gather4:: +DB 9,0,0,0 + DD imagerel se_handler + DD imagerel $L$mul_gather4_body,imagerel $L$mul_gather4_epilogue +$L$SEH_info_rsaz_512_mul_scatter4:: +DB 9,0,0,0 + DD imagerel se_handler + DD imagerel $L$mul_scatter4_body,imagerel $L$mul_scatter4_epilogue +$L$SEH_info_rsaz_512_mul_by_one:: +DB 9,0,0,0 + DD imagerel se_handler + DD imagerel $L$mul_by_one_body,imagerel $L$mul_by_one_epilogue + +.xdata ENDS +END diff --git a/deps/openssl/asm_obsolete/x64-win32-masm/bn/x86_64-gf2m.asm b/deps/openssl/asm_obsolete/x64-win32-masm/bn/x86_64-gf2m.asm new file mode 100644 index 00000000000000..d2e9c6600bfc47 --- /dev/null +++ b/deps/openssl/asm_obsolete/x64-win32-masm/bn/x86_64-gf2m.asm @@ -0,0 +1,399 @@ +OPTION DOTNAME +.text$ SEGMENT ALIGN(256) 'CODE' + + +ALIGN 16 +_mul_1x1 PROC PRIVATE + sub rsp,128+8 + mov r9,-1 + lea rsi,QWORD PTR[rax*1+rax] + shr r9,3 + lea rdi,QWORD PTR[rax*4] + and r9,rax + lea r12,QWORD PTR[rax*8] + sar rax,63 + lea r10,QWORD PTR[r9*1+r9] + sar rsi,63 + lea r11,QWORD PTR[r9*4] + and rax,rbp + sar rdi,63 + mov rdx,rax + shl rax,63 + and rsi,rbp + shr rdx,1 + mov rcx,rsi + shl rsi,62 + and rdi,rbp + shr rcx,2 + xor rax,rsi + mov rbx,rdi + shl rdi,61 + xor rdx,rcx + shr rbx,3 + xor rax,rdi + xor rdx,rbx + + mov r13,r9 + mov QWORD PTR[rsp],0 + xor r13,r10 + mov QWORD PTR[8+rsp],r9 + mov r14,r11 + mov QWORD PTR[16+rsp],r10 + xor r14,r12 + mov QWORD PTR[24+rsp],r13 + + xor r9,r11 + mov QWORD PTR[32+rsp],r11 + xor r10,r11 + mov QWORD PTR[40+rsp],r9 + xor r13,r11 + mov QWORD PTR[48+rsp],r10 + xor r9,r14 + mov QWORD PTR[56+rsp],r13 + xor r10,r14 + + mov QWORD PTR[64+rsp],r12 + xor r13,r14 + mov QWORD PTR[72+rsp],r9 + xor r9,r11 + mov QWORD PTR[80+rsp],r10 + xor r10,r11 + mov QWORD PTR[88+rsp],r13 + + xor r13,r11 + mov QWORD PTR[96+rsp],r14 + mov rsi,r8 + mov QWORD PTR[104+rsp],r9 + and rsi,rbp + mov QWORD PTR[112+rsp],r10 + shr rbp,4 + mov QWORD PTR[120+rsp],r13 + mov rdi,r8 + and rdi,rbp + shr rbp,4 + + movq xmm0,QWORD PTR[rsi*8+rsp] + mov rsi,r8 + and rsi,rbp + shr rbp,4 + mov rcx,QWORD PTR[rdi*8+rsp] + mov rdi,r8 + mov rbx,rcx + shl rcx,4 + and rdi,rbp + movq xmm1,QWORD PTR[rsi*8+rsp] + shr rbx,60 + xor rax,rcx + pslldq xmm1,1 + mov rsi,r8 + shr rbp,4 + xor rdx,rbx + and rsi,rbp + shr rbp,4 + pxor xmm0,xmm1 + mov rcx,QWORD PTR[rdi*8+rsp] + mov rdi,r8 + mov rbx,rcx + shl rcx,12 + and rdi,rbp + movq xmm1,QWORD PTR[rsi*8+rsp] + shr rbx,52 + xor rax,rcx + pslldq xmm1,2 + mov rsi,r8 + shr rbp,4 + xor rdx,rbx + and rsi,rbp + shr rbp,4 + pxor xmm0,xmm1 + mov rcx,QWORD PTR[rdi*8+rsp] + mov rdi,r8 + mov rbx,rcx + shl rcx,20 + and rdi,rbp + movq xmm1,QWORD PTR[rsi*8+rsp] + shr rbx,44 + xor rax,rcx + pslldq xmm1,3 + mov rsi,r8 + shr rbp,4 + xor rdx,rbx + and rsi,rbp + shr rbp,4 + pxor xmm0,xmm1 + mov rcx,QWORD PTR[rdi*8+rsp] + mov rdi,r8 + mov rbx,rcx + shl rcx,28 + and rdi,rbp + movq xmm1,QWORD PTR[rsi*8+rsp] + shr rbx,36 + xor rax,rcx + pslldq xmm1,4 + mov rsi,r8 + shr rbp,4 + xor rdx,rbx + and rsi,rbp + shr rbp,4 + pxor xmm0,xmm1 + mov rcx,QWORD PTR[rdi*8+rsp] + mov rdi,r8 + mov rbx,rcx + shl rcx,36 + and rdi,rbp + movq xmm1,QWORD PTR[rsi*8+rsp] + shr rbx,28 + xor rax,rcx + pslldq xmm1,5 + mov rsi,r8 + shr rbp,4 + xor rdx,rbx + and rsi,rbp + shr rbp,4 + pxor xmm0,xmm1 + mov rcx,QWORD PTR[rdi*8+rsp] + mov rdi,r8 + mov rbx,rcx + shl rcx,44 + and rdi,rbp + movq xmm1,QWORD PTR[rsi*8+rsp] + shr rbx,20 + xor rax,rcx + pslldq xmm1,6 + mov rsi,r8 + shr rbp,4 + xor rdx,rbx + and rsi,rbp + shr rbp,4 + pxor xmm0,xmm1 + mov rcx,QWORD PTR[rdi*8+rsp] + mov rdi,r8 + mov rbx,rcx + shl rcx,52 + and rdi,rbp + movq xmm1,QWORD PTR[rsi*8+rsp] + shr rbx,12 + xor rax,rcx + pslldq xmm1,7 + mov rsi,r8 + shr rbp,4 + xor rdx,rbx + and rsi,rbp + shr rbp,4 + pxor xmm0,xmm1 + mov rcx,QWORD PTR[rdi*8+rsp] + mov rbx,rcx + shl rcx,60 +DB 102,72,15,126,198 + shr rbx,4 + xor rax,rcx + psrldq xmm0,8 + xor rdx,rbx +DB 102,72,15,126,199 + xor rax,rsi + xor rdx,rdi + + add rsp,128+8 + DB 0F3h,0C3h ;repret +$L$end_mul_1x1:: +_mul_1x1 ENDP +EXTERN OPENSSL_ia32cap_P:NEAR +PUBLIC bn_GF2m_mul_2x2 + +ALIGN 16 +bn_GF2m_mul_2x2 PROC PUBLIC + mov rax,QWORD PTR[OPENSSL_ia32cap_P] + bt rax,33 + jnc $L$vanilla_mul_2x2 + +DB 102,72,15,110,194 +DB 102,73,15,110,201 +DB 102,73,15,110,208 + movq xmm3,QWORD PTR[40+rsp] + movdqa xmm4,xmm0 + movdqa xmm5,xmm1 +DB 102,15,58,68,193,0 + pxor xmm4,xmm2 + pxor xmm5,xmm3 +DB 102,15,58,68,211,0 +DB 102,15,58,68,229,0 + xorps xmm4,xmm0 + xorps xmm4,xmm2 + movdqa xmm5,xmm4 + pslldq xmm4,8 + psrldq xmm5,8 + pxor xmm2,xmm4 + pxor xmm0,xmm5 + movdqu XMMWORD PTR[rcx],xmm2 + movdqu XMMWORD PTR[16+rcx],xmm0 + DB 0F3h,0C3h ;repret + +ALIGN 16 +$L$vanilla_mul_2x2:: + lea rsp,QWORD PTR[((-136))+rsp] + mov r10,QWORD PTR[176+rsp] + mov QWORD PTR[120+rsp],rdi + mov QWORD PTR[128+rsp],rsi + mov QWORD PTR[80+rsp],r14 + mov QWORD PTR[88+rsp],r13 + mov QWORD PTR[96+rsp],r12 + mov QWORD PTR[104+rsp],rbp + mov QWORD PTR[112+rsp],rbx +$L$body_mul_2x2:: + mov QWORD PTR[32+rsp],rcx + mov QWORD PTR[40+rsp],rdx + mov QWORD PTR[48+rsp],r8 + mov QWORD PTR[56+rsp],r9 + mov QWORD PTR[64+rsp],r10 + + mov r8,0fh + mov rax,rdx + mov rbp,r9 + call _mul_1x1 + mov QWORD PTR[16+rsp],rax + mov QWORD PTR[24+rsp],rdx + + mov rax,QWORD PTR[48+rsp] + mov rbp,QWORD PTR[64+rsp] + call _mul_1x1 + mov QWORD PTR[rsp],rax + mov QWORD PTR[8+rsp],rdx + + mov rax,QWORD PTR[40+rsp] + mov rbp,QWORD PTR[56+rsp] + xor rax,QWORD PTR[48+rsp] + xor rbp,QWORD PTR[64+rsp] + call _mul_1x1 + mov rbx,QWORD PTR[rsp] + mov rcx,QWORD PTR[8+rsp] + mov rdi,QWORD PTR[16+rsp] + mov rsi,QWORD PTR[24+rsp] + mov rbp,QWORD PTR[32+rsp] + + xor rax,rdx + xor rdx,rcx + xor rax,rbx + mov QWORD PTR[rbp],rbx + xor rdx,rdi + mov QWORD PTR[24+rbp],rsi + xor rax,rsi + xor rdx,rsi + xor rax,rdx + mov QWORD PTR[16+rbp],rdx + mov QWORD PTR[8+rbp],rax + + mov r14,QWORD PTR[80+rsp] + mov r13,QWORD PTR[88+rsp] + mov r12,QWORD PTR[96+rsp] + mov rbp,QWORD PTR[104+rsp] + mov rbx,QWORD PTR[112+rsp] + mov rdi,QWORD PTR[120+rsp] + mov rsi,QWORD PTR[128+rsp] + lea rsp,QWORD PTR[136+rsp] + DB 0F3h,0C3h ;repret +$L$end_mul_2x2:: +bn_GF2m_mul_2x2 ENDP +DB 71,70,40,50,94,109,41,32,77,117,108,116,105,112,108,105 +DB 99,97,116,105,111,110,32,102,111,114,32,120,56,54,95,54 +DB 52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121 +DB 32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46 +DB 111,114,103,62,0 +ALIGN 16 +EXTERN __imp_RtlVirtualUnwind:NEAR + + +ALIGN 16 +se_handler PROC PRIVATE + push rsi + push rdi + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + pushfq + sub rsp,64 + + mov rax,QWORD PTR[152+r8] + mov rbx,QWORD PTR[248+r8] + + lea r10,QWORD PTR[$L$body_mul_2x2] + cmp rbx,r10 + jb $L$in_prologue + + mov r14,QWORD PTR[80+rax] + mov r13,QWORD PTR[88+rax] + mov r12,QWORD PTR[96+rax] + mov rbp,QWORD PTR[104+rax] + mov rbx,QWORD PTR[112+rax] + mov rdi,QWORD PTR[120+rax] + mov rsi,QWORD PTR[128+rax] + + mov QWORD PTR[144+r8],rbx + mov QWORD PTR[160+r8],rbp + mov QWORD PTR[168+r8],rsi + mov QWORD PTR[176+r8],rdi + mov QWORD PTR[216+r8],r12 + mov QWORD PTR[224+r8],r13 + mov QWORD PTR[232+r8],r14 + +$L$in_prologue:: + lea rax,QWORD PTR[136+rax] + mov QWORD PTR[152+r8],rax + + mov rdi,QWORD PTR[40+r9] + mov rsi,r8 + mov ecx,154 + DD 0a548f3fch + + mov rsi,r9 + xor rcx,rcx + mov rdx,QWORD PTR[8+rsi] + mov r8,QWORD PTR[rsi] + mov r9,QWORD PTR[16+rsi] + mov r10,QWORD PTR[40+rsi] + lea r11,QWORD PTR[56+rsi] + lea r12,QWORD PTR[24+rsi] + mov QWORD PTR[32+rsp],r10 + mov QWORD PTR[40+rsp],r11 + mov QWORD PTR[48+rsp],r12 + mov QWORD PTR[56+rsp],rcx + call QWORD PTR[__imp_RtlVirtualUnwind] + + mov eax,1 + add rsp,64 + popfq + pop r15 + pop r14 + pop r13 + pop r12 + pop rbp + pop rbx + pop rdi + pop rsi + DB 0F3h,0C3h ;repret +se_handler ENDP + +.text$ ENDS +.pdata SEGMENT READONLY ALIGN(4) +ALIGN 4 + DD imagerel _mul_1x1 + DD imagerel $L$end_mul_1x1 + DD imagerel $L$SEH_info_1x1 + + DD imagerel $L$vanilla_mul_2x2 + DD imagerel $L$end_mul_2x2 + DD imagerel $L$SEH_info_2x2 +.pdata ENDS +.xdata SEGMENT READONLY ALIGN(8) +ALIGN 8 +$L$SEH_info_1x1:: +DB 001h,007h,002h,000h +DB 007h,001h,011h,000h +$L$SEH_info_2x2:: +DB 9,0,0,0 + DD imagerel se_handler + +.xdata ENDS +END diff --git a/deps/openssl/asm_obsolete/x64-win32-masm/bn/x86_64-mont.asm b/deps/openssl/asm_obsolete/x64-win32-masm/bn/x86_64-mont.asm new file mode 100644 index 00000000000000..afec83bd17596c --- /dev/null +++ b/deps/openssl/asm_obsolete/x64-win32-masm/bn/x86_64-mont.asm @@ -0,0 +1,946 @@ +OPTION DOTNAME +.text$ SEGMENT ALIGN(256) 'CODE' + +EXTERN OPENSSL_ia32cap_P:NEAR + +PUBLIC bn_mul_mont + +ALIGN 16 +bn_mul_mont PROC PUBLIC + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_bn_mul_mont:: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + mov rcx,r9 + mov r8,QWORD PTR[40+rsp] + mov r9,QWORD PTR[48+rsp] + + + test r9d,3 + jnz $L$mul_enter + cmp r9d,8 + jb $L$mul_enter + cmp rdx,rsi + jne $L$mul4x_enter + test r9d,7 + jz $L$sqr8x_enter + jmp $L$mul4x_enter + +ALIGN 16 +$L$mul_enter:: + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + + mov r9d,r9d + lea r10,QWORD PTR[2+r9] + mov r11,rsp + neg r10 + lea rsp,QWORD PTR[r10*8+rsp] + and rsp,-1024 + + mov QWORD PTR[8+r9*8+rsp],r11 +$L$mul_body:: + mov r12,rdx + mov r8,QWORD PTR[r8] + mov rbx,QWORD PTR[r12] + mov rax,QWORD PTR[rsi] + + xor r14,r14 + xor r15,r15 + + mov rbp,r8 + mul rbx + mov r10,rax + mov rax,QWORD PTR[rcx] + + imul rbp,r10 + mov r11,rdx + + mul rbp + add r10,rax + mov rax,QWORD PTR[8+rsi] + adc rdx,0 + mov r13,rdx + + lea r15,QWORD PTR[1+r15] + jmp $L$1st_enter + +ALIGN 16 +$L$1st:: + add r13,rax + mov rax,QWORD PTR[r15*8+rsi] + adc rdx,0 + add r13,r11 + mov r11,r10 + adc rdx,0 + mov QWORD PTR[((-16))+r15*8+rsp],r13 + mov r13,rdx + +$L$1st_enter:: + mul rbx + add r11,rax + mov rax,QWORD PTR[r15*8+rcx] + adc rdx,0 + lea r15,QWORD PTR[1+r15] + mov r10,rdx + + mul rbp + cmp r15,r9 + jne $L$1st + + add r13,rax + mov rax,QWORD PTR[rsi] + adc rdx,0 + add r13,r11 + adc rdx,0 + mov QWORD PTR[((-16))+r15*8+rsp],r13 + mov r13,rdx + mov r11,r10 + + xor rdx,rdx + add r13,r11 + adc rdx,0 + mov QWORD PTR[((-8))+r9*8+rsp],r13 + mov QWORD PTR[r9*8+rsp],rdx + + lea r14,QWORD PTR[1+r14] + jmp $L$outer +ALIGN 16 +$L$outer:: + mov rbx,QWORD PTR[r14*8+r12] + xor r15,r15 + mov rbp,r8 + mov r10,QWORD PTR[rsp] + mul rbx + add r10,rax + mov rax,QWORD PTR[rcx] + adc rdx,0 + + imul rbp,r10 + mov r11,rdx + + mul rbp + add r10,rax + mov rax,QWORD PTR[8+rsi] + adc rdx,0 + mov r10,QWORD PTR[8+rsp] + mov r13,rdx + + lea r15,QWORD PTR[1+r15] + jmp $L$inner_enter + +ALIGN 16 +$L$inner:: + add r13,rax + mov rax,QWORD PTR[r15*8+rsi] + adc rdx,0 + add r13,r10 + mov r10,QWORD PTR[r15*8+rsp] + adc rdx,0 + mov QWORD PTR[((-16))+r15*8+rsp],r13 + mov r13,rdx + +$L$inner_enter:: + mul rbx + add r11,rax + mov rax,QWORD PTR[r15*8+rcx] + adc rdx,0 + add r10,r11 + mov r11,rdx + adc r11,0 + lea r15,QWORD PTR[1+r15] + + mul rbp + cmp r15,r9 + jne $L$inner + + add r13,rax + mov rax,QWORD PTR[rsi] + adc rdx,0 + add r13,r10 + mov r10,QWORD PTR[r15*8+rsp] + adc rdx,0 + mov QWORD PTR[((-16))+r15*8+rsp],r13 + mov r13,rdx + + xor rdx,rdx + add r13,r11 + adc rdx,0 + add r13,r10 + adc rdx,0 + mov QWORD PTR[((-8))+r9*8+rsp],r13 + mov QWORD PTR[r9*8+rsp],rdx + + lea r14,QWORD PTR[1+r14] + cmp r14,r9 + jb $L$outer + + xor r14,r14 + mov rax,QWORD PTR[rsp] + lea rsi,QWORD PTR[rsp] + mov r15,r9 + jmp $L$sub +ALIGN 16 +$L$sub:: sbb rax,QWORD PTR[r14*8+rcx] + mov QWORD PTR[r14*8+rdi],rax + mov rax,QWORD PTR[8+r14*8+rsi] + lea r14,QWORD PTR[1+r14] + dec r15 + jnz $L$sub + + sbb rax,0 + xor r14,r14 + and rsi,rax + not rax + mov rcx,rdi + and rcx,rax + mov r15,r9 + or rsi,rcx +ALIGN 16 +$L$copy:: + mov rax,QWORD PTR[r14*8+rsi] + mov QWORD PTR[r14*8+rsp],r14 + mov QWORD PTR[r14*8+rdi],rax + lea r14,QWORD PTR[1+r14] + sub r15,1 + jnz $L$copy + + mov rsi,QWORD PTR[8+r9*8+rsp] + mov rax,1 + mov r15,QWORD PTR[rsi] + mov r14,QWORD PTR[8+rsi] + mov r13,QWORD PTR[16+rsi] + mov r12,QWORD PTR[24+rsi] + mov rbp,QWORD PTR[32+rsi] + mov rbx,QWORD PTR[40+rsi] + lea rsp,QWORD PTR[48+rsi] +$L$mul_epilogue:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_bn_mul_mont:: +bn_mul_mont ENDP + +ALIGN 16 +bn_mul4x_mont PROC PRIVATE + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_bn_mul4x_mont:: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + mov rcx,r9 + mov r8,QWORD PTR[40+rsp] + mov r9,QWORD PTR[48+rsp] + + +$L$mul4x_enter:: + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + + mov r9d,r9d + lea r10,QWORD PTR[4+r9] + mov r11,rsp + neg r10 + lea rsp,QWORD PTR[r10*8+rsp] + and rsp,-1024 + + mov QWORD PTR[8+r9*8+rsp],r11 +$L$mul4x_body:: + mov QWORD PTR[16+r9*8+rsp],rdi + mov r12,rdx + mov r8,QWORD PTR[r8] + mov rbx,QWORD PTR[r12] + mov rax,QWORD PTR[rsi] + + xor r14,r14 + xor r15,r15 + + mov rbp,r8 + mul rbx + mov r10,rax + mov rax,QWORD PTR[rcx] + + imul rbp,r10 + mov r11,rdx + + mul rbp + add r10,rax + mov rax,QWORD PTR[8+rsi] + adc rdx,0 + mov rdi,rdx + + mul rbx + add r11,rax + mov rax,QWORD PTR[8+rcx] + adc rdx,0 + mov r10,rdx + + mul rbp + add rdi,rax + mov rax,QWORD PTR[16+rsi] + adc rdx,0 + add rdi,r11 + lea r15,QWORD PTR[4+r15] + adc rdx,0 + mov QWORD PTR[rsp],rdi + mov r13,rdx + jmp $L$1st4x +ALIGN 16 +$L$1st4x:: + mul rbx + add r10,rax + mov rax,QWORD PTR[((-16))+r15*8+rcx] + adc rdx,0 + mov r11,rdx + + mul rbp + add r13,rax + mov rax,QWORD PTR[((-8))+r15*8+rsi] + adc rdx,0 + add r13,r10 + adc rdx,0 + mov QWORD PTR[((-24))+r15*8+rsp],r13 + mov rdi,rdx + + mul rbx + add r11,rax + mov rax,QWORD PTR[((-8))+r15*8+rcx] + adc rdx,0 + mov r10,rdx + + mul rbp + add rdi,rax + mov rax,QWORD PTR[r15*8+rsi] + adc rdx,0 + add rdi,r11 + adc rdx,0 + mov QWORD PTR[((-16))+r15*8+rsp],rdi + mov r13,rdx + + mul rbx + add r10,rax + mov rax,QWORD PTR[r15*8+rcx] + adc rdx,0 + mov r11,rdx + + mul rbp + add r13,rax + mov rax,QWORD PTR[8+r15*8+rsi] + adc rdx,0 + add r13,r10 + adc rdx,0 + mov QWORD PTR[((-8))+r15*8+rsp],r13 + mov rdi,rdx + + mul rbx + add r11,rax + mov rax,QWORD PTR[8+r15*8+rcx] + adc rdx,0 + lea r15,QWORD PTR[4+r15] + mov r10,rdx + + mul rbp + add rdi,rax + mov rax,QWORD PTR[((-16))+r15*8+rsi] + adc rdx,0 + add rdi,r11 + adc rdx,0 + mov QWORD PTR[((-32))+r15*8+rsp],rdi + mov r13,rdx + cmp r15,r9 + jb $L$1st4x + + mul rbx + add r10,rax + mov rax,QWORD PTR[((-16))+r15*8+rcx] + adc rdx,0 + mov r11,rdx + + mul rbp + add r13,rax + mov rax,QWORD PTR[((-8))+r15*8+rsi] + adc rdx,0 + add r13,r10 + adc rdx,0 + mov QWORD PTR[((-24))+r15*8+rsp],r13 + mov rdi,rdx + + mul rbx + add r11,rax + mov rax,QWORD PTR[((-8))+r15*8+rcx] + adc rdx,0 + mov r10,rdx + + mul rbp + add rdi,rax + mov rax,QWORD PTR[rsi] + adc rdx,0 + add rdi,r11 + adc rdx,0 + mov QWORD PTR[((-16))+r15*8+rsp],rdi + mov r13,rdx + + xor rdi,rdi + add r13,r10 + adc rdi,0 + mov QWORD PTR[((-8))+r15*8+rsp],r13 + mov QWORD PTR[r15*8+rsp],rdi + + lea r14,QWORD PTR[1+r14] +ALIGN 4 +$L$outer4x:: + mov rbx,QWORD PTR[r14*8+r12] + xor r15,r15 + mov r10,QWORD PTR[rsp] + mov rbp,r8 + mul rbx + add r10,rax + mov rax,QWORD PTR[rcx] + adc rdx,0 + + imul rbp,r10 + mov r11,rdx + + mul rbp + add r10,rax + mov rax,QWORD PTR[8+rsi] + adc rdx,0 + mov rdi,rdx + + mul rbx + add r11,rax + mov rax,QWORD PTR[8+rcx] + adc rdx,0 + add r11,QWORD PTR[8+rsp] + adc rdx,0 + mov r10,rdx + + mul rbp + add rdi,rax + mov rax,QWORD PTR[16+rsi] + adc rdx,0 + add rdi,r11 + lea r15,QWORD PTR[4+r15] + adc rdx,0 + mov QWORD PTR[rsp],rdi + mov r13,rdx + jmp $L$inner4x +ALIGN 16 +$L$inner4x:: + mul rbx + add r10,rax + mov rax,QWORD PTR[((-16))+r15*8+rcx] + adc rdx,0 + add r10,QWORD PTR[((-16))+r15*8+rsp] + adc rdx,0 + mov r11,rdx + + mul rbp + add r13,rax + mov rax,QWORD PTR[((-8))+r15*8+rsi] + adc rdx,0 + add r13,r10 + adc rdx,0 + mov QWORD PTR[((-24))+r15*8+rsp],r13 + mov rdi,rdx + + mul rbx + add r11,rax + mov rax,QWORD PTR[((-8))+r15*8+rcx] + adc rdx,0 + add r11,QWORD PTR[((-8))+r15*8+rsp] + adc rdx,0 + mov r10,rdx + + mul rbp + add rdi,rax + mov rax,QWORD PTR[r15*8+rsi] + adc rdx,0 + add rdi,r11 + adc rdx,0 + mov QWORD PTR[((-16))+r15*8+rsp],rdi + mov r13,rdx + + mul rbx + add r10,rax + mov rax,QWORD PTR[r15*8+rcx] + adc rdx,0 + add r10,QWORD PTR[r15*8+rsp] + adc rdx,0 + mov r11,rdx + + mul rbp + add r13,rax + mov rax,QWORD PTR[8+r15*8+rsi] + adc rdx,0 + add r13,r10 + adc rdx,0 + mov QWORD PTR[((-8))+r15*8+rsp],r13 + mov rdi,rdx + + mul rbx + add r11,rax + mov rax,QWORD PTR[8+r15*8+rcx] + adc rdx,0 + add r11,QWORD PTR[8+r15*8+rsp] + adc rdx,0 + lea r15,QWORD PTR[4+r15] + mov r10,rdx + + mul rbp + add rdi,rax + mov rax,QWORD PTR[((-16))+r15*8+rsi] + adc rdx,0 + add rdi,r11 + adc rdx,0 + mov QWORD PTR[((-32))+r15*8+rsp],rdi + mov r13,rdx + cmp r15,r9 + jb $L$inner4x + + mul rbx + add r10,rax + mov rax,QWORD PTR[((-16))+r15*8+rcx] + adc rdx,0 + add r10,QWORD PTR[((-16))+r15*8+rsp] + adc rdx,0 + mov r11,rdx + + mul rbp + add r13,rax + mov rax,QWORD PTR[((-8))+r15*8+rsi] + adc rdx,0 + add r13,r10 + adc rdx,0 + mov QWORD PTR[((-24))+r15*8+rsp],r13 + mov rdi,rdx + + mul rbx + add r11,rax + mov rax,QWORD PTR[((-8))+r15*8+rcx] + adc rdx,0 + add r11,QWORD PTR[((-8))+r15*8+rsp] + adc rdx,0 + lea r14,QWORD PTR[1+r14] + mov r10,rdx + + mul rbp + add rdi,rax + mov rax,QWORD PTR[rsi] + adc rdx,0 + add rdi,r11 + adc rdx,0 + mov QWORD PTR[((-16))+r15*8+rsp],rdi + mov r13,rdx + + xor rdi,rdi + add r13,r10 + adc rdi,0 + add r13,QWORD PTR[r9*8+rsp] + adc rdi,0 + mov QWORD PTR[((-8))+r15*8+rsp],r13 + mov QWORD PTR[r15*8+rsp],rdi + + cmp r14,r9 + jb $L$outer4x + mov rdi,QWORD PTR[16+r9*8+rsp] + mov rax,QWORD PTR[rsp] + pxor xmm0,xmm0 + mov rdx,QWORD PTR[8+rsp] + shr r9,2 + lea rsi,QWORD PTR[rsp] + xor r14,r14 + + sub rax,QWORD PTR[rcx] + mov rbx,QWORD PTR[16+rsi] + mov rbp,QWORD PTR[24+rsi] + sbb rdx,QWORD PTR[8+rcx] + lea r15,QWORD PTR[((-1))+r9] + jmp $L$sub4x +ALIGN 16 +$L$sub4x:: + mov QWORD PTR[r14*8+rdi],rax + mov QWORD PTR[8+r14*8+rdi],rdx + sbb rbx,QWORD PTR[16+r14*8+rcx] + mov rax,QWORD PTR[32+r14*8+rsi] + mov rdx,QWORD PTR[40+r14*8+rsi] + sbb rbp,QWORD PTR[24+r14*8+rcx] + mov QWORD PTR[16+r14*8+rdi],rbx + mov QWORD PTR[24+r14*8+rdi],rbp + sbb rax,QWORD PTR[32+r14*8+rcx] + mov rbx,QWORD PTR[48+r14*8+rsi] + mov rbp,QWORD PTR[56+r14*8+rsi] + sbb rdx,QWORD PTR[40+r14*8+rcx] + lea r14,QWORD PTR[4+r14] + dec r15 + jnz $L$sub4x + + mov QWORD PTR[r14*8+rdi],rax + mov rax,QWORD PTR[32+r14*8+rsi] + sbb rbx,QWORD PTR[16+r14*8+rcx] + mov QWORD PTR[8+r14*8+rdi],rdx + sbb rbp,QWORD PTR[24+r14*8+rcx] + mov QWORD PTR[16+r14*8+rdi],rbx + + sbb rax,0 + mov QWORD PTR[24+r14*8+rdi],rbp + xor r14,r14 + and rsi,rax + not rax + mov rcx,rdi + and rcx,rax + lea r15,QWORD PTR[((-1))+r9] + or rsi,rcx + + movdqu xmm1,XMMWORD PTR[rsi] + movdqa XMMWORD PTR[rsp],xmm0 + movdqu XMMWORD PTR[rdi],xmm1 + jmp $L$copy4x +ALIGN 16 +$L$copy4x:: + movdqu xmm2,XMMWORD PTR[16+r14*1+rsi] + movdqu xmm1,XMMWORD PTR[32+r14*1+rsi] + movdqa XMMWORD PTR[16+r14*1+rsp],xmm0 + movdqu XMMWORD PTR[16+r14*1+rdi],xmm2 + movdqa XMMWORD PTR[32+r14*1+rsp],xmm0 + movdqu XMMWORD PTR[32+r14*1+rdi],xmm1 + lea r14,QWORD PTR[32+r14] + dec r15 + jnz $L$copy4x + + shl r9,2 + movdqu xmm2,XMMWORD PTR[16+r14*1+rsi] + movdqa XMMWORD PTR[16+r14*1+rsp],xmm0 + movdqu XMMWORD PTR[16+r14*1+rdi],xmm2 + mov rsi,QWORD PTR[8+r9*8+rsp] + mov rax,1 + mov r15,QWORD PTR[rsi] + mov r14,QWORD PTR[8+rsi] + mov r13,QWORD PTR[16+rsi] + mov r12,QWORD PTR[24+rsi] + mov rbp,QWORD PTR[32+rsi] + mov rbx,QWORD PTR[40+rsi] + lea rsp,QWORD PTR[48+rsi] +$L$mul4x_epilogue:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_bn_mul4x_mont:: +bn_mul4x_mont ENDP +EXTERN bn_sqr8x_internal:NEAR + + +ALIGN 32 +bn_sqr8x_mont PROC PRIVATE + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_bn_sqr8x_mont:: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + mov rcx,r9 + mov r8,QWORD PTR[40+rsp] + mov r9,QWORD PTR[48+rsp] + + +$L$sqr8x_enter:: + mov rax,rsp + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + + mov r10d,r9d + shl r9d,3 + shl r10,3+2 + neg r9 + + + + + + + lea r11,QWORD PTR[((-64))+r9*4+rsp] + mov r8,QWORD PTR[r8] + sub r11,rsi + and r11,4095 + cmp r10,r11 + jb $L$sqr8x_sp_alt + sub rsp,r11 + lea rsp,QWORD PTR[((-64))+r9*4+rsp] + jmp $L$sqr8x_sp_done + +ALIGN 32 +$L$sqr8x_sp_alt:: + lea r10,QWORD PTR[((4096-64))+r9*4] + lea rsp,QWORD PTR[((-64))+r9*4+rsp] + sub r11,r10 + mov r10,0 + cmovc r11,r10 + sub rsp,r11 +$L$sqr8x_sp_done:: + and rsp,-64 + mov r10,r9 + neg r9 + + lea r11,QWORD PTR[64+r9*2+rsp] + mov QWORD PTR[32+rsp],r8 + mov QWORD PTR[40+rsp],rax +$L$sqr8x_body:: + + mov rbp,r9 +DB 102,73,15,110,211 + shr rbp,3+2 + mov eax,DWORD PTR[((OPENSSL_ia32cap_P+8))] + jmp $L$sqr8x_copy_n + +ALIGN 32 +$L$sqr8x_copy_n:: + movq xmm0,QWORD PTR[rcx] + movq xmm1,QWORD PTR[8+rcx] + movq xmm3,QWORD PTR[16+rcx] + movq xmm4,QWORD PTR[24+rcx] + lea rcx,QWORD PTR[32+rcx] + movdqa XMMWORD PTR[r11],xmm0 + movdqa XMMWORD PTR[16+r11],xmm1 + movdqa XMMWORD PTR[32+r11],xmm3 + movdqa XMMWORD PTR[48+r11],xmm4 + lea r11,QWORD PTR[64+r11] + dec rbp + jnz $L$sqr8x_copy_n + + pxor xmm0,xmm0 +DB 102,72,15,110,207 +DB 102,73,15,110,218 + call bn_sqr8x_internal + + pxor xmm0,xmm0 + lea rax,QWORD PTR[48+rsp] + lea rdx,QWORD PTR[64+r9*2+rsp] + shr r9,3+2 + mov rsi,QWORD PTR[40+rsp] + jmp $L$sqr8x_zero + +ALIGN 32 +$L$sqr8x_zero:: + movdqa XMMWORD PTR[rax],xmm0 + movdqa XMMWORD PTR[16+rax],xmm0 + movdqa XMMWORD PTR[32+rax],xmm0 + movdqa XMMWORD PTR[48+rax],xmm0 + lea rax,QWORD PTR[64+rax] + movdqa XMMWORD PTR[rdx],xmm0 + movdqa XMMWORD PTR[16+rdx],xmm0 + movdqa XMMWORD PTR[32+rdx],xmm0 + movdqa XMMWORD PTR[48+rdx],xmm0 + lea rdx,QWORD PTR[64+rdx] + dec r9 + jnz $L$sqr8x_zero + + mov rax,1 + mov r15,QWORD PTR[((-48))+rsi] + mov r14,QWORD PTR[((-40))+rsi] + mov r13,QWORD PTR[((-32))+rsi] + mov r12,QWORD PTR[((-24))+rsi] + mov rbp,QWORD PTR[((-16))+rsi] + mov rbx,QWORD PTR[((-8))+rsi] + lea rsp,QWORD PTR[rsi] +$L$sqr8x_epilogue:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_bn_sqr8x_mont:: +bn_sqr8x_mont ENDP +DB 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105 +DB 112,108,105,99,97,116,105,111,110,32,102,111,114,32,120,56 +DB 54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83 +DB 32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115 +DB 115,108,46,111,114,103,62,0 +ALIGN 16 +EXTERN __imp_RtlVirtualUnwind:NEAR + +ALIGN 16 +mul_handler PROC PRIVATE + push rsi + push rdi + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + pushfq + sub rsp,64 + + mov rax,QWORD PTR[120+r8] + mov rbx,QWORD PTR[248+r8] + + mov rsi,QWORD PTR[8+r9] + mov r11,QWORD PTR[56+r9] + + mov r10d,DWORD PTR[r11] + lea r10,QWORD PTR[r10*1+rsi] + cmp rbx,r10 + jb $L$common_seh_tail + + mov rax,QWORD PTR[152+r8] + + mov r10d,DWORD PTR[4+r11] + lea r10,QWORD PTR[r10*1+rsi] + cmp rbx,r10 + jae $L$common_seh_tail + + mov r10,QWORD PTR[192+r8] + mov rax,QWORD PTR[8+r10*8+rax] + lea rax,QWORD PTR[48+rax] + + mov rbx,QWORD PTR[((-8))+rax] + mov rbp,QWORD PTR[((-16))+rax] + mov r12,QWORD PTR[((-24))+rax] + mov r13,QWORD PTR[((-32))+rax] + mov r14,QWORD PTR[((-40))+rax] + mov r15,QWORD PTR[((-48))+rax] + mov QWORD PTR[144+r8],rbx + mov QWORD PTR[160+r8],rbp + mov QWORD PTR[216+r8],r12 + mov QWORD PTR[224+r8],r13 + mov QWORD PTR[232+r8],r14 + mov QWORD PTR[240+r8],r15 + + jmp $L$common_seh_tail +mul_handler ENDP + + +ALIGN 16 +sqr_handler PROC PRIVATE + push rsi + push rdi + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + pushfq + sub rsp,64 + + mov rax,QWORD PTR[120+r8] + mov rbx,QWORD PTR[248+r8] + + mov rsi,QWORD PTR[8+r9] + mov r11,QWORD PTR[56+r9] + + mov r10d,DWORD PTR[r11] + lea r10,QWORD PTR[r10*1+rsi] + cmp rbx,r10 + jb $L$common_seh_tail + + mov rax,QWORD PTR[152+r8] + + mov r10d,DWORD PTR[4+r11] + lea r10,QWORD PTR[r10*1+rsi] + cmp rbx,r10 + jae $L$common_seh_tail + + mov rax,QWORD PTR[40+rax] + + mov rbx,QWORD PTR[((-8))+rax] + mov rbp,QWORD PTR[((-16))+rax] + mov r12,QWORD PTR[((-24))+rax] + mov r13,QWORD PTR[((-32))+rax] + mov r14,QWORD PTR[((-40))+rax] + mov r15,QWORD PTR[((-48))+rax] + mov QWORD PTR[144+r8],rbx + mov QWORD PTR[160+r8],rbp + mov QWORD PTR[216+r8],r12 + mov QWORD PTR[224+r8],r13 + mov QWORD PTR[232+r8],r14 + mov QWORD PTR[240+r8],r15 + +$L$common_seh_tail:: + mov rdi,QWORD PTR[8+rax] + mov rsi,QWORD PTR[16+rax] + mov QWORD PTR[152+r8],rax + mov QWORD PTR[168+r8],rsi + mov QWORD PTR[176+r8],rdi + + mov rdi,QWORD PTR[40+r9] + mov rsi,r8 + mov ecx,154 + DD 0a548f3fch + + mov rsi,r9 + xor rcx,rcx + mov rdx,QWORD PTR[8+rsi] + mov r8,QWORD PTR[rsi] + mov r9,QWORD PTR[16+rsi] + mov r10,QWORD PTR[40+rsi] + lea r11,QWORD PTR[56+rsi] + lea r12,QWORD PTR[24+rsi] + mov QWORD PTR[32+rsp],r10 + mov QWORD PTR[40+rsp],r11 + mov QWORD PTR[48+rsp],r12 + mov QWORD PTR[56+rsp],rcx + call QWORD PTR[__imp_RtlVirtualUnwind] + + mov eax,1 + add rsp,64 + popfq + pop r15 + pop r14 + pop r13 + pop r12 + pop rbp + pop rbx + pop rdi + pop rsi + DB 0F3h,0C3h ;repret +sqr_handler ENDP + +.text$ ENDS +.pdata SEGMENT READONLY ALIGN(4) +ALIGN 4 + DD imagerel $L$SEH_begin_bn_mul_mont + DD imagerel $L$SEH_end_bn_mul_mont + DD imagerel $L$SEH_info_bn_mul_mont + + DD imagerel $L$SEH_begin_bn_mul4x_mont + DD imagerel $L$SEH_end_bn_mul4x_mont + DD imagerel $L$SEH_info_bn_mul4x_mont + + DD imagerel $L$SEH_begin_bn_sqr8x_mont + DD imagerel $L$SEH_end_bn_sqr8x_mont + DD imagerel $L$SEH_info_bn_sqr8x_mont +.pdata ENDS +.xdata SEGMENT READONLY ALIGN(8) +ALIGN 8 +$L$SEH_info_bn_mul_mont:: +DB 9,0,0,0 + DD imagerel mul_handler + DD imagerel $L$mul_body,imagerel $L$mul_epilogue +$L$SEH_info_bn_mul4x_mont:: +DB 9,0,0,0 + DD imagerel mul_handler + DD imagerel $L$mul4x_body,imagerel $L$mul4x_epilogue +$L$SEH_info_bn_sqr8x_mont:: +DB 9,0,0,0 + DD imagerel sqr_handler + DD imagerel $L$sqr8x_body,imagerel $L$sqr8x_epilogue + +.xdata ENDS +END diff --git a/deps/openssl/asm_obsolete/x64-win32-masm/bn/x86_64-mont5.asm b/deps/openssl/asm_obsolete/x64-win32-masm/bn/x86_64-mont5.asm new file mode 100644 index 00000000000000..c47130f44c90a1 --- /dev/null +++ b/deps/openssl/asm_obsolete/x64-win32-masm/bn/x86_64-mont5.asm @@ -0,0 +1,2076 @@ +OPTION DOTNAME +.text$ SEGMENT ALIGN(256) 'CODE' + +EXTERN OPENSSL_ia32cap_P:NEAR + +PUBLIC bn_mul_mont_gather5 + +ALIGN 64 +bn_mul_mont_gather5 PROC PUBLIC + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_bn_mul_mont_gather5:: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + mov rcx,r9 + mov r8,QWORD PTR[40+rsp] + mov r9,QWORD PTR[48+rsp] + + + test r9d,7 + jnz $L$mul_enter + jmp $L$mul4x_enter + +ALIGN 16 +$L$mul_enter:: + mov r9d,r9d + mov rax,rsp + mov r10d,DWORD PTR[56+rsp] + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + lea rsp,QWORD PTR[((-40))+rsp] + movaps XMMWORD PTR[rsp],xmm6 + movaps XMMWORD PTR[16+rsp],xmm7 + lea r11,QWORD PTR[2+r9] + neg r11 + lea rsp,QWORD PTR[r11*8+rsp] + and rsp,-1024 + + mov QWORD PTR[8+r9*8+rsp],rax +$L$mul_body:: + mov r12,rdx + mov r11,r10 + shr r10,3 + and r11,7 + not r10 + lea rax,QWORD PTR[$L$magic_masks] + and r10,3 + lea r12,QWORD PTR[96+r11*8+r12] + movq xmm4,QWORD PTR[r10*8+rax] + movq xmm5,QWORD PTR[8+r10*8+rax] + movq xmm6,QWORD PTR[16+r10*8+rax] + movq xmm7,QWORD PTR[24+r10*8+rax] + + movq xmm0,QWORD PTR[((-96))+r12] + movq xmm1,QWORD PTR[((-32))+r12] + pand xmm0,xmm4 + movq xmm2,QWORD PTR[32+r12] + pand xmm1,xmm5 + movq xmm3,QWORD PTR[96+r12] + pand xmm2,xmm6 + por xmm0,xmm1 + pand xmm3,xmm7 + por xmm0,xmm2 + lea r12,QWORD PTR[256+r12] + por xmm0,xmm3 + +DB 102,72,15,126,195 + + mov r8,QWORD PTR[r8] + mov rax,QWORD PTR[rsi] + + xor r14,r14 + xor r15,r15 + + movq xmm0,QWORD PTR[((-96))+r12] + movq xmm1,QWORD PTR[((-32))+r12] + pand xmm0,xmm4 + movq xmm2,QWORD PTR[32+r12] + pand xmm1,xmm5 + + mov rbp,r8 + mul rbx + mov r10,rax + mov rax,QWORD PTR[rcx] + + movq xmm3,QWORD PTR[96+r12] + pand xmm2,xmm6 + por xmm0,xmm1 + pand xmm3,xmm7 + + imul rbp,r10 + mov r11,rdx + + por xmm0,xmm2 + lea r12,QWORD PTR[256+r12] + por xmm0,xmm3 + + mul rbp + add r10,rax + mov rax,QWORD PTR[8+rsi] + adc rdx,0 + mov r13,rdx + + lea r15,QWORD PTR[1+r15] + jmp $L$1st_enter + +ALIGN 16 +$L$1st:: + add r13,rax + mov rax,QWORD PTR[r15*8+rsi] + adc rdx,0 + add r13,r11 + mov r11,r10 + adc rdx,0 + mov QWORD PTR[((-16))+r15*8+rsp],r13 + mov r13,rdx + +$L$1st_enter:: + mul rbx + add r11,rax + mov rax,QWORD PTR[r15*8+rcx] + adc rdx,0 + lea r15,QWORD PTR[1+r15] + mov r10,rdx + + mul rbp + cmp r15,r9 + jne $L$1st + +DB 102,72,15,126,195 + + add r13,rax + mov rax,QWORD PTR[rsi] + adc rdx,0 + add r13,r11 + adc rdx,0 + mov QWORD PTR[((-16))+r15*8+rsp],r13 + mov r13,rdx + mov r11,r10 + + xor rdx,rdx + add r13,r11 + adc rdx,0 + mov QWORD PTR[((-8))+r9*8+rsp],r13 + mov QWORD PTR[r9*8+rsp],rdx + + lea r14,QWORD PTR[1+r14] + jmp $L$outer +ALIGN 16 +$L$outer:: + xor r15,r15 + mov rbp,r8 + mov r10,QWORD PTR[rsp] + + movq xmm0,QWORD PTR[((-96))+r12] + movq xmm1,QWORD PTR[((-32))+r12] + pand xmm0,xmm4 + movq xmm2,QWORD PTR[32+r12] + pand xmm1,xmm5 + + mul rbx + add r10,rax + mov rax,QWORD PTR[rcx] + adc rdx,0 + + movq xmm3,QWORD PTR[96+r12] + pand xmm2,xmm6 + por xmm0,xmm1 + pand xmm3,xmm7 + + imul rbp,r10 + mov r11,rdx + + por xmm0,xmm2 + lea r12,QWORD PTR[256+r12] + por xmm0,xmm3 + + mul rbp + add r10,rax + mov rax,QWORD PTR[8+rsi] + adc rdx,0 + mov r10,QWORD PTR[8+rsp] + mov r13,rdx + + lea r15,QWORD PTR[1+r15] + jmp $L$inner_enter + +ALIGN 16 +$L$inner:: + add r13,rax + mov rax,QWORD PTR[r15*8+rsi] + adc rdx,0 + add r13,r10 + mov r10,QWORD PTR[r15*8+rsp] + adc rdx,0 + mov QWORD PTR[((-16))+r15*8+rsp],r13 + mov r13,rdx + +$L$inner_enter:: + mul rbx + add r11,rax + mov rax,QWORD PTR[r15*8+rcx] + adc rdx,0 + add r10,r11 + mov r11,rdx + adc r11,0 + lea r15,QWORD PTR[1+r15] + + mul rbp + cmp r15,r9 + jne $L$inner + +DB 102,72,15,126,195 + + add r13,rax + mov rax,QWORD PTR[rsi] + adc rdx,0 + add r13,r10 + mov r10,QWORD PTR[r15*8+rsp] + adc rdx,0 + mov QWORD PTR[((-16))+r15*8+rsp],r13 + mov r13,rdx + + xor rdx,rdx + add r13,r11 + adc rdx,0 + add r13,r10 + adc rdx,0 + mov QWORD PTR[((-8))+r9*8+rsp],r13 + mov QWORD PTR[r9*8+rsp],rdx + + lea r14,QWORD PTR[1+r14] + cmp r14,r9 + jb $L$outer + + xor r14,r14 + mov rax,QWORD PTR[rsp] + lea rsi,QWORD PTR[rsp] + mov r15,r9 + jmp $L$sub +ALIGN 16 +$L$sub:: sbb rax,QWORD PTR[r14*8+rcx] + mov QWORD PTR[r14*8+rdi],rax + mov rax,QWORD PTR[8+r14*8+rsi] + lea r14,QWORD PTR[1+r14] + dec r15 + jnz $L$sub + + sbb rax,0 + xor r14,r14 + and rsi,rax + not rax + mov rcx,rdi + and rcx,rax + mov r15,r9 + or rsi,rcx +ALIGN 16 +$L$copy:: + mov rax,QWORD PTR[r14*8+rsi] + mov QWORD PTR[r14*8+rsp],r14 + mov QWORD PTR[r14*8+rdi],rax + lea r14,QWORD PTR[1+r14] + sub r15,1 + jnz $L$copy + + mov rsi,QWORD PTR[8+r9*8+rsp] + mov rax,1 + movaps xmm6,XMMWORD PTR[((-88))+rsi] + movaps xmm7,XMMWORD PTR[((-72))+rsi] + mov r15,QWORD PTR[((-48))+rsi] + mov r14,QWORD PTR[((-40))+rsi] + mov r13,QWORD PTR[((-32))+rsi] + mov r12,QWORD PTR[((-24))+rsi] + mov rbp,QWORD PTR[((-16))+rsi] + mov rbx,QWORD PTR[((-8))+rsi] + lea rsp,QWORD PTR[rsi] +$L$mul_epilogue:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_bn_mul_mont_gather5:: +bn_mul_mont_gather5 ENDP + +ALIGN 32 +bn_mul4x_mont_gather5 PROC PRIVATE + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_bn_mul4x_mont_gather5:: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + mov rcx,r9 + mov r8,QWORD PTR[40+rsp] + mov r9,QWORD PTR[48+rsp] + + +$L$mul4x_enter:: +DB 067h + mov rax,rsp + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + lea rsp,QWORD PTR[((-40))+rsp] + movaps XMMWORD PTR[rsp],xmm6 + movaps XMMWORD PTR[16+rsp],xmm7 +DB 067h + mov r10d,r9d + shl r9d,3 + shl r10d,3+2 + neg r9 + + + + + + + + + lea r11,QWORD PTR[((-64))+r9*2+rsp] + sub r11,rsi + and r11,4095 + cmp r10,r11 + jb $L$mul4xsp_alt + sub rsp,r11 + lea rsp,QWORD PTR[((-64))+r9*2+rsp] + jmp $L$mul4xsp_done + +ALIGN 32 +$L$mul4xsp_alt:: + lea r10,QWORD PTR[((4096-64))+r9*2] + lea rsp,QWORD PTR[((-64))+r9*2+rsp] + sub r11,r10 + mov r10,0 + cmovc r11,r10 + sub rsp,r11 +$L$mul4xsp_done:: + and rsp,-64 + neg r9 + + mov QWORD PTR[40+rsp],rax +$L$mul4x_body:: + + call mul4x_internal + + mov rsi,QWORD PTR[40+rsp] + mov rax,1 + movaps xmm6,XMMWORD PTR[((-88))+rsi] + movaps xmm7,XMMWORD PTR[((-72))+rsi] + mov r15,QWORD PTR[((-48))+rsi] + mov r14,QWORD PTR[((-40))+rsi] + mov r13,QWORD PTR[((-32))+rsi] + mov r12,QWORD PTR[((-24))+rsi] + mov rbp,QWORD PTR[((-16))+rsi] + mov rbx,QWORD PTR[((-8))+rsi] + lea rsp,QWORD PTR[rsi] +$L$mul4x_epilogue:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_bn_mul4x_mont_gather5:: +bn_mul4x_mont_gather5 ENDP + + +ALIGN 32 +mul4x_internal PROC PRIVATE + shl r9,5 + mov r10d,DWORD PTR[56+rax] + lea r13,QWORD PTR[256+r9*1+rdx] + shr r9,5 + mov r11,r10 + shr r10,3 + and r11,7 + not r10 + lea rax,QWORD PTR[$L$magic_masks] + and r10,3 + lea r12,QWORD PTR[96+r11*8+rdx] + movq xmm4,QWORD PTR[r10*8+rax] + movq xmm5,QWORD PTR[8+r10*8+rax] + add r11,7 + movq xmm6,QWORD PTR[16+r10*8+rax] + movq xmm7,QWORD PTR[24+r10*8+rax] + and r11,7 + + movq xmm0,QWORD PTR[((-96))+r12] + lea r14,QWORD PTR[256+r12] + movq xmm1,QWORD PTR[((-32))+r12] + pand xmm0,xmm4 + movq xmm2,QWORD PTR[32+r12] + pand xmm1,xmm5 + movq xmm3,QWORD PTR[96+r12] + pand xmm2,xmm6 +DB 067h + por xmm0,xmm1 + movq xmm1,QWORD PTR[((-96))+r14] +DB 067h + pand xmm3,xmm7 +DB 067h + por xmm0,xmm2 + movq xmm2,QWORD PTR[((-32))+r14] +DB 067h + pand xmm1,xmm4 +DB 067h + por xmm0,xmm3 + movq xmm3,QWORD PTR[32+r14] + +DB 102,72,15,126,195 + movq xmm0,QWORD PTR[96+r14] + mov QWORD PTR[((16+8))+rsp],r13 + mov QWORD PTR[((56+8))+rsp],rdi + + mov r8,QWORD PTR[r8] + mov rax,QWORD PTR[rsi] + lea rsi,QWORD PTR[r9*1+rsi] + neg r9 + + mov rbp,r8 + mul rbx + mov r10,rax + mov rax,QWORD PTR[rcx] + + pand xmm2,xmm5 + pand xmm3,xmm6 + por xmm1,xmm2 + + imul rbp,r10 + + + + + + + + lea r14,QWORD PTR[((64+8))+r11*8+rsp] + mov r11,rdx + + pand xmm0,xmm7 + por xmm1,xmm3 + lea r12,QWORD PTR[512+r12] + por xmm0,xmm1 + + mul rbp + add r10,rax + mov rax,QWORD PTR[8+r9*1+rsi] + adc rdx,0 + mov rdi,rdx + + mul rbx + add r11,rax + mov rax,QWORD PTR[16+rcx] + adc rdx,0 + mov r10,rdx + + mul rbp + add rdi,rax + mov rax,QWORD PTR[16+r9*1+rsi] + adc rdx,0 + add rdi,r11 + lea r15,QWORD PTR[32+r9] + lea rcx,QWORD PTR[64+rcx] + adc rdx,0 + mov QWORD PTR[r14],rdi + mov r13,rdx + jmp $L$1st4x + +ALIGN 32 +$L$1st4x:: + mul rbx + add r10,rax + mov rax,QWORD PTR[((-32))+rcx] + lea r14,QWORD PTR[32+r14] + adc rdx,0 + mov r11,rdx + + mul rbp + add r13,rax + mov rax,QWORD PTR[((-8))+r15*1+rsi] + adc rdx,0 + add r13,r10 + adc rdx,0 + mov QWORD PTR[((-24))+r14],r13 + mov rdi,rdx + + mul rbx + add r11,rax + mov rax,QWORD PTR[((-16))+rcx] + adc rdx,0 + mov r10,rdx + + mul rbp + add rdi,rax + mov rax,QWORD PTR[r15*1+rsi] + adc rdx,0 + add rdi,r11 + adc rdx,0 + mov QWORD PTR[((-16))+r14],rdi + mov r13,rdx + + mul rbx + add r10,rax + mov rax,QWORD PTR[rcx] + adc rdx,0 + mov r11,rdx + + mul rbp + add r13,rax + mov rax,QWORD PTR[8+r15*1+rsi] + adc rdx,0 + add r13,r10 + adc rdx,0 + mov QWORD PTR[((-8))+r14],r13 + mov rdi,rdx + + mul rbx + add r11,rax + mov rax,QWORD PTR[16+rcx] + adc rdx,0 + mov r10,rdx + + mul rbp + add rdi,rax + mov rax,QWORD PTR[16+r15*1+rsi] + adc rdx,0 + add rdi,r11 + lea rcx,QWORD PTR[64+rcx] + adc rdx,0 + mov QWORD PTR[r14],rdi + mov r13,rdx + + add r15,32 + jnz $L$1st4x + + mul rbx + add r10,rax + mov rax,QWORD PTR[((-32))+rcx] + lea r14,QWORD PTR[32+r14] + adc rdx,0 + mov r11,rdx + + mul rbp + add r13,rax + mov rax,QWORD PTR[((-8))+rsi] + adc rdx,0 + add r13,r10 + adc rdx,0 + mov QWORD PTR[((-24))+r14],r13 + mov rdi,rdx + + mul rbx + add r11,rax + mov rax,QWORD PTR[((-16))+rcx] + adc rdx,0 + mov r10,rdx + + mul rbp + add rdi,rax + mov rax,QWORD PTR[r9*1+rsi] + adc rdx,0 + add rdi,r11 + adc rdx,0 + mov QWORD PTR[((-16))+r14],rdi + mov r13,rdx + +DB 102,72,15,126,195 + lea rcx,QWORD PTR[r9*2+rcx] + + xor rdi,rdi + add r13,r10 + adc rdi,0 + mov QWORD PTR[((-8))+r14],r13 + + jmp $L$outer4x + +ALIGN 32 +$L$outer4x:: + mov r10,QWORD PTR[r9*1+r14] + mov rbp,r8 + mul rbx + add r10,rax + mov rax,QWORD PTR[rcx] + adc rdx,0 + + movq xmm0,QWORD PTR[((-96))+r12] + movq xmm1,QWORD PTR[((-32))+r12] + pand xmm0,xmm4 + movq xmm2,QWORD PTR[32+r12] + pand xmm1,xmm5 + movq xmm3,QWORD PTR[96+r12] + + imul rbp,r10 +DB 067h + mov r11,rdx + mov QWORD PTR[r14],rdi + + pand xmm2,xmm6 + por xmm0,xmm1 + pand xmm3,xmm7 + por xmm0,xmm2 + lea r14,QWORD PTR[r9*1+r14] + lea r12,QWORD PTR[256+r12] + por xmm0,xmm3 + + mul rbp + add r10,rax + mov rax,QWORD PTR[8+r9*1+rsi] + adc rdx,0 + mov rdi,rdx + + mul rbx + add r11,rax + mov rax,QWORD PTR[16+rcx] + adc rdx,0 + add r11,QWORD PTR[8+r14] + adc rdx,0 + mov r10,rdx + + mul rbp + add rdi,rax + mov rax,QWORD PTR[16+r9*1+rsi] + adc rdx,0 + add rdi,r11 + lea r15,QWORD PTR[32+r9] + lea rcx,QWORD PTR[64+rcx] + adc rdx,0 + mov r13,rdx + jmp $L$inner4x + +ALIGN 32 +$L$inner4x:: + mul rbx + add r10,rax + mov rax,QWORD PTR[((-32))+rcx] + adc rdx,0 + add r10,QWORD PTR[16+r14] + lea r14,QWORD PTR[32+r14] + adc rdx,0 + mov r11,rdx + + mul rbp + add r13,rax + mov rax,QWORD PTR[((-8))+r15*1+rsi] + adc rdx,0 + add r13,r10 + adc rdx,0 + mov QWORD PTR[((-32))+r14],rdi + mov rdi,rdx + + mul rbx + add r11,rax + mov rax,QWORD PTR[((-16))+rcx] + adc rdx,0 + add r11,QWORD PTR[((-8))+r14] + adc rdx,0 + mov r10,rdx + + mul rbp + add rdi,rax + mov rax,QWORD PTR[r15*1+rsi] + adc rdx,0 + add rdi,r11 + adc rdx,0 + mov QWORD PTR[((-24))+r14],r13 + mov r13,rdx + + mul rbx + add r10,rax + mov rax,QWORD PTR[rcx] + adc rdx,0 + add r10,QWORD PTR[r14] + adc rdx,0 + mov r11,rdx + + mul rbp + add r13,rax + mov rax,QWORD PTR[8+r15*1+rsi] + adc rdx,0 + add r13,r10 + adc rdx,0 + mov QWORD PTR[((-16))+r14],rdi + mov rdi,rdx + + mul rbx + add r11,rax + mov rax,QWORD PTR[16+rcx] + adc rdx,0 + add r11,QWORD PTR[8+r14] + adc rdx,0 + mov r10,rdx + + mul rbp + add rdi,rax + mov rax,QWORD PTR[16+r15*1+rsi] + adc rdx,0 + add rdi,r11 + lea rcx,QWORD PTR[64+rcx] + adc rdx,0 + mov QWORD PTR[((-8))+r14],r13 + mov r13,rdx + + add r15,32 + jnz $L$inner4x + + mul rbx + add r10,rax + mov rax,QWORD PTR[((-32))+rcx] + adc rdx,0 + add r10,QWORD PTR[16+r14] + lea r14,QWORD PTR[32+r14] + adc rdx,0 + mov r11,rdx + + mul rbp + add r13,rax + mov rax,QWORD PTR[((-8))+rsi] + adc rdx,0 + add r13,r10 + adc rdx,0 + mov QWORD PTR[((-32))+r14],rdi + mov rdi,rdx + + mul rbx + add r11,rax + mov rax,rbp + mov rbp,QWORD PTR[((-16))+rcx] + adc rdx,0 + add r11,QWORD PTR[((-8))+r14] + adc rdx,0 + mov r10,rdx + + mul rbp + add rdi,rax + mov rax,QWORD PTR[r9*1+rsi] + adc rdx,0 + add rdi,r11 + adc rdx,0 + mov QWORD PTR[((-24))+r14],r13 + mov r13,rdx + +DB 102,72,15,126,195 + mov QWORD PTR[((-16))+r14],rdi + lea rcx,QWORD PTR[r9*2+rcx] + + xor rdi,rdi + add r13,r10 + adc rdi,0 + add r13,QWORD PTR[r14] + adc rdi,0 + mov QWORD PTR[((-8))+r14],r13 + + cmp r12,QWORD PTR[((16+8))+rsp] + jb $L$outer4x + sub rbp,r13 + adc r15,r15 + or rdi,r15 + xor rdi,1 + lea rbx,QWORD PTR[r9*1+r14] + lea rbp,QWORD PTR[rdi*8+rcx] + mov rcx,r9 + sar rcx,3+2 + mov rdi,QWORD PTR[((56+8))+rsp] + jmp $L$sqr4x_sub +mul4x_internal ENDP +PUBLIC bn_power5 + +ALIGN 32 +bn_power5 PROC PUBLIC + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_bn_power5:: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + mov rcx,r9 + mov r8,QWORD PTR[40+rsp] + mov r9,QWORD PTR[48+rsp] + + + mov rax,rsp + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + lea rsp,QWORD PTR[((-40))+rsp] + movaps XMMWORD PTR[rsp],xmm6 + movaps XMMWORD PTR[16+rsp],xmm7 + mov r10d,r9d + shl r9d,3 + shl r10d,3+2 + neg r9 + mov r8,QWORD PTR[r8] + + + + + + + + lea r11,QWORD PTR[((-64))+r9*2+rsp] + sub r11,rsi + and r11,4095 + cmp r10,r11 + jb $L$pwr_sp_alt + sub rsp,r11 + lea rsp,QWORD PTR[((-64))+r9*2+rsp] + jmp $L$pwr_sp_done + +ALIGN 32 +$L$pwr_sp_alt:: + lea r10,QWORD PTR[((4096-64))+r9*2] + lea rsp,QWORD PTR[((-64))+r9*2+rsp] + sub r11,r10 + mov r10,0 + cmovc r11,r10 + sub rsp,r11 +$L$pwr_sp_done:: + and rsp,-64 + mov r10,r9 + neg r9 + + + + + + + + + + + mov QWORD PTR[32+rsp],r8 + mov QWORD PTR[40+rsp],rax +$L$power5_body:: +DB 102,72,15,110,207 +DB 102,72,15,110,209 +DB 102,73,15,110,218 +DB 102,72,15,110,226 + + call __bn_sqr8x_internal + call __bn_sqr8x_internal + call __bn_sqr8x_internal + call __bn_sqr8x_internal + call __bn_sqr8x_internal + +DB 102,72,15,126,209 +DB 102,72,15,126,226 + mov rdi,rsi + mov rax,QWORD PTR[40+rsp] + lea r8,QWORD PTR[32+rsp] + + call mul4x_internal + + mov rsi,QWORD PTR[40+rsp] + mov rax,1 + mov r15,QWORD PTR[((-48))+rsi] + mov r14,QWORD PTR[((-40))+rsi] + mov r13,QWORD PTR[((-32))+rsi] + mov r12,QWORD PTR[((-24))+rsi] + mov rbp,QWORD PTR[((-16))+rsi] + mov rbx,QWORD PTR[((-8))+rsi] + lea rsp,QWORD PTR[rsi] +$L$power5_epilogue:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_bn_power5:: +bn_power5 ENDP + +PUBLIC bn_sqr8x_internal + + +ALIGN 32 +bn_sqr8x_internal PROC PUBLIC +__bn_sqr8x_internal:: + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + lea rbp,QWORD PTR[32+r10] + lea rsi,QWORD PTR[r9*1+rsi] + + mov rcx,r9 + + + mov r14,QWORD PTR[((-32))+rbp*1+rsi] + lea rdi,QWORD PTR[((48+8))+r9*2+rsp] + mov rax,QWORD PTR[((-24))+rbp*1+rsi] + lea rdi,QWORD PTR[((-32))+rbp*1+rdi] + mov rbx,QWORD PTR[((-16))+rbp*1+rsi] + mov r15,rax + + mul r14 + mov r10,rax + mov rax,rbx + mov r11,rdx + mov QWORD PTR[((-24))+rbp*1+rdi],r10 + + mul r14 + add r11,rax + mov rax,rbx + adc rdx,0 + mov QWORD PTR[((-16))+rbp*1+rdi],r11 + mov r10,rdx + + + mov rbx,QWORD PTR[((-8))+rbp*1+rsi] + mul r15 + mov r12,rax + mov rax,rbx + mov r13,rdx + + lea rcx,QWORD PTR[rbp] + mul r14 + add r10,rax + mov rax,rbx + mov r11,rdx + adc r11,0 + add r10,r12 + adc r11,0 + mov QWORD PTR[((-8))+rcx*1+rdi],r10 + jmp $L$sqr4x_1st + +ALIGN 32 +$L$sqr4x_1st:: + mov rbx,QWORD PTR[rcx*1+rsi] + mul r15 + add r13,rax + mov rax,rbx + mov r12,rdx + adc r12,0 + + mul r14 + add r11,rax + mov rax,rbx + mov rbx,QWORD PTR[8+rcx*1+rsi] + mov r10,rdx + adc r10,0 + add r11,r13 + adc r10,0 + + + mul r15 + add r12,rax + mov rax,rbx + mov QWORD PTR[rcx*1+rdi],r11 + mov r13,rdx + adc r13,0 + + mul r14 + add r10,rax + mov rax,rbx + mov rbx,QWORD PTR[16+rcx*1+rsi] + mov r11,rdx + adc r11,0 + add r10,r12 + adc r11,0 + + mul r15 + add r13,rax + mov rax,rbx + mov QWORD PTR[8+rcx*1+rdi],r10 + mov r12,rdx + adc r12,0 + + mul r14 + add r11,rax + mov rax,rbx + mov rbx,QWORD PTR[24+rcx*1+rsi] + mov r10,rdx + adc r10,0 + add r11,r13 + adc r10,0 + + + mul r15 + add r12,rax + mov rax,rbx + mov QWORD PTR[16+rcx*1+rdi],r11 + mov r13,rdx + adc r13,0 + lea rcx,QWORD PTR[32+rcx] + + mul r14 + add r10,rax + mov rax,rbx + mov r11,rdx + adc r11,0 + add r10,r12 + adc r11,0 + mov QWORD PTR[((-8))+rcx*1+rdi],r10 + + cmp rcx,0 + jne $L$sqr4x_1st + + mul r15 + add r13,rax + lea rbp,QWORD PTR[16+rbp] + adc rdx,0 + add r13,r11 + adc rdx,0 + + mov QWORD PTR[rdi],r13 + mov r12,rdx + mov QWORD PTR[8+rdi],rdx + jmp $L$sqr4x_outer + +ALIGN 32 +$L$sqr4x_outer:: + mov r14,QWORD PTR[((-32))+rbp*1+rsi] + lea rdi,QWORD PTR[((48+8))+r9*2+rsp] + mov rax,QWORD PTR[((-24))+rbp*1+rsi] + lea rdi,QWORD PTR[((-32))+rbp*1+rdi] + mov rbx,QWORD PTR[((-16))+rbp*1+rsi] + mov r15,rax + + mul r14 + mov r10,QWORD PTR[((-24))+rbp*1+rdi] + add r10,rax + mov rax,rbx + adc rdx,0 + mov QWORD PTR[((-24))+rbp*1+rdi],r10 + mov r11,rdx + + mul r14 + add r11,rax + mov rax,rbx + adc rdx,0 + add r11,QWORD PTR[((-16))+rbp*1+rdi] + mov r10,rdx + adc r10,0 + mov QWORD PTR[((-16))+rbp*1+rdi],r11 + + xor r12,r12 + + mov rbx,QWORD PTR[((-8))+rbp*1+rsi] + mul r15 + add r12,rax + mov rax,rbx + adc rdx,0 + add r12,QWORD PTR[((-8))+rbp*1+rdi] + mov r13,rdx + adc r13,0 + + mul r14 + add r10,rax + mov rax,rbx + adc rdx,0 + add r10,r12 + mov r11,rdx + adc r11,0 + mov QWORD PTR[((-8))+rbp*1+rdi],r10 + + lea rcx,QWORD PTR[rbp] + jmp $L$sqr4x_inner + +ALIGN 32 +$L$sqr4x_inner:: + mov rbx,QWORD PTR[rcx*1+rsi] + mul r15 + add r13,rax + mov rax,rbx + mov r12,rdx + adc r12,0 + add r13,QWORD PTR[rcx*1+rdi] + adc r12,0 + +DB 067h + mul r14 + add r11,rax + mov rax,rbx + mov rbx,QWORD PTR[8+rcx*1+rsi] + mov r10,rdx + adc r10,0 + add r11,r13 + adc r10,0 + + mul r15 + add r12,rax + mov QWORD PTR[rcx*1+rdi],r11 + mov rax,rbx + mov r13,rdx + adc r13,0 + add r12,QWORD PTR[8+rcx*1+rdi] + lea rcx,QWORD PTR[16+rcx] + adc r13,0 + + mul r14 + add r10,rax + mov rax,rbx + adc rdx,0 + add r10,r12 + mov r11,rdx + adc r11,0 + mov QWORD PTR[((-8))+rcx*1+rdi],r10 + + cmp rcx,0 + jne $L$sqr4x_inner + +DB 067h + mul r15 + add r13,rax + adc rdx,0 + add r13,r11 + adc rdx,0 + + mov QWORD PTR[rdi],r13 + mov r12,rdx + mov QWORD PTR[8+rdi],rdx + + add rbp,16 + jnz $L$sqr4x_outer + + + mov r14,QWORD PTR[((-32))+rsi] + lea rdi,QWORD PTR[((48+8))+r9*2+rsp] + mov rax,QWORD PTR[((-24))+rsi] + lea rdi,QWORD PTR[((-32))+rbp*1+rdi] + mov rbx,QWORD PTR[((-16))+rsi] + mov r15,rax + + mul r14 + add r10,rax + mov rax,rbx + mov r11,rdx + adc r11,0 + + mul r14 + add r11,rax + mov rax,rbx + mov QWORD PTR[((-24))+rdi],r10 + mov r10,rdx + adc r10,0 + add r11,r13 + mov rbx,QWORD PTR[((-8))+rsi] + adc r10,0 + + mul r15 + add r12,rax + mov rax,rbx + mov QWORD PTR[((-16))+rdi],r11 + mov r13,rdx + adc r13,0 + + mul r14 + add r10,rax + mov rax,rbx + mov r11,rdx + adc r11,0 + add r10,r12 + adc r11,0 + mov QWORD PTR[((-8))+rdi],r10 + + mul r15 + add r13,rax + mov rax,QWORD PTR[((-16))+rsi] + adc rdx,0 + add r13,r11 + adc rdx,0 + + mov QWORD PTR[rdi],r13 + mov r12,rdx + mov QWORD PTR[8+rdi],rdx + + mul rbx + add rbp,16 + xor r14,r14 + sub rbp,r9 + xor r15,r15 + + add rax,r12 + adc rdx,0 + mov QWORD PTR[8+rdi],rax + mov QWORD PTR[16+rdi],rdx + mov QWORD PTR[24+rdi],r15 + + mov rax,QWORD PTR[((-16))+rbp*1+rsi] + lea rdi,QWORD PTR[((48+8))+rsp] + xor r10,r10 + mov r11,QWORD PTR[8+rdi] + + lea r12,QWORD PTR[r10*2+r14] + shr r10,63 + lea r13,QWORD PTR[r11*2+rcx] + shr r11,63 + or r13,r10 + mov r10,QWORD PTR[16+rdi] + mov r14,r11 + mul rax + neg r15 + mov r11,QWORD PTR[24+rdi] + adc r12,rax + mov rax,QWORD PTR[((-8))+rbp*1+rsi] + mov QWORD PTR[rdi],r12 + adc r13,rdx + + lea rbx,QWORD PTR[r10*2+r14] + mov QWORD PTR[8+rdi],r13 + sbb r15,r15 + shr r10,63 + lea r8,QWORD PTR[r11*2+rcx] + shr r11,63 + or r8,r10 + mov r10,QWORD PTR[32+rdi] + mov r14,r11 + mul rax + neg r15 + mov r11,QWORD PTR[40+rdi] + adc rbx,rax + mov rax,QWORD PTR[rbp*1+rsi] + mov QWORD PTR[16+rdi],rbx + adc r8,rdx + lea rbp,QWORD PTR[16+rbp] + mov QWORD PTR[24+rdi],r8 + sbb r15,r15 + lea rdi,QWORD PTR[64+rdi] + jmp $L$sqr4x_shift_n_add + +ALIGN 32 +$L$sqr4x_shift_n_add:: + lea r12,QWORD PTR[r10*2+r14] + shr r10,63 + lea r13,QWORD PTR[r11*2+rcx] + shr r11,63 + or r13,r10 + mov r10,QWORD PTR[((-16))+rdi] + mov r14,r11 + mul rax + neg r15 + mov r11,QWORD PTR[((-8))+rdi] + adc r12,rax + mov rax,QWORD PTR[((-8))+rbp*1+rsi] + mov QWORD PTR[((-32))+rdi],r12 + adc r13,rdx + + lea rbx,QWORD PTR[r10*2+r14] + mov QWORD PTR[((-24))+rdi],r13 + sbb r15,r15 + shr r10,63 + lea r8,QWORD PTR[r11*2+rcx] + shr r11,63 + or r8,r10 + mov r10,QWORD PTR[rdi] + mov r14,r11 + mul rax + neg r15 + mov r11,QWORD PTR[8+rdi] + adc rbx,rax + mov rax,QWORD PTR[rbp*1+rsi] + mov QWORD PTR[((-16))+rdi],rbx + adc r8,rdx + + lea r12,QWORD PTR[r10*2+r14] + mov QWORD PTR[((-8))+rdi],r8 + sbb r15,r15 + shr r10,63 + lea r13,QWORD PTR[r11*2+rcx] + shr r11,63 + or r13,r10 + mov r10,QWORD PTR[16+rdi] + mov r14,r11 + mul rax + neg r15 + mov r11,QWORD PTR[24+rdi] + adc r12,rax + mov rax,QWORD PTR[8+rbp*1+rsi] + mov QWORD PTR[rdi],r12 + adc r13,rdx + + lea rbx,QWORD PTR[r10*2+r14] + mov QWORD PTR[8+rdi],r13 + sbb r15,r15 + shr r10,63 + lea r8,QWORD PTR[r11*2+rcx] + shr r11,63 + or r8,r10 + mov r10,QWORD PTR[32+rdi] + mov r14,r11 + mul rax + neg r15 + mov r11,QWORD PTR[40+rdi] + adc rbx,rax + mov rax,QWORD PTR[16+rbp*1+rsi] + mov QWORD PTR[16+rdi],rbx + adc r8,rdx + mov QWORD PTR[24+rdi],r8 + sbb r15,r15 + lea rdi,QWORD PTR[64+rdi] + add rbp,32 + jnz $L$sqr4x_shift_n_add + + lea r12,QWORD PTR[r10*2+r14] +DB 067h + shr r10,63 + lea r13,QWORD PTR[r11*2+rcx] + shr r11,63 + or r13,r10 + mov r10,QWORD PTR[((-16))+rdi] + mov r14,r11 + mul rax + neg r15 + mov r11,QWORD PTR[((-8))+rdi] + adc r12,rax + mov rax,QWORD PTR[((-8))+rsi] + mov QWORD PTR[((-32))+rdi],r12 + adc r13,rdx + + lea rbx,QWORD PTR[r10*2+r14] + mov QWORD PTR[((-24))+rdi],r13 + sbb r15,r15 + shr r10,63 + lea r8,QWORD PTR[r11*2+rcx] + shr r11,63 + or r8,r10 + mul rax + neg r15 + adc rbx,rax + adc r8,rdx + mov QWORD PTR[((-16))+rdi],rbx + mov QWORD PTR[((-8))+rdi],r8 +DB 102,72,15,126,213 +sqr8x_reduction:: + xor rax,rax + lea rcx,QWORD PTR[r9*2+rbp] + lea rdx,QWORD PTR[((48+8))+r9*2+rsp] + mov QWORD PTR[((0+8))+rsp],rcx + lea rdi,QWORD PTR[((48+8))+r9*1+rsp] + mov QWORD PTR[((8+8))+rsp],rdx + neg r9 + jmp $L$8x_reduction_loop + +ALIGN 32 +$L$8x_reduction_loop:: + lea rdi,QWORD PTR[r9*1+rdi] +DB 066h + mov rbx,QWORD PTR[rdi] + mov r9,QWORD PTR[8+rdi] + mov r10,QWORD PTR[16+rdi] + mov r11,QWORD PTR[24+rdi] + mov r12,QWORD PTR[32+rdi] + mov r13,QWORD PTR[40+rdi] + mov r14,QWORD PTR[48+rdi] + mov r15,QWORD PTR[56+rdi] + mov QWORD PTR[rdx],rax + lea rdi,QWORD PTR[64+rdi] + +DB 067h + mov r8,rbx + imul rbx,QWORD PTR[((32+8))+rsp] + mov rax,QWORD PTR[rbp] + mov ecx,8 + jmp $L$8x_reduce + +ALIGN 32 +$L$8x_reduce:: + mul rbx + mov rax,QWORD PTR[16+rbp] + neg r8 + mov r8,rdx + adc r8,0 + + mul rbx + add r9,rax + mov rax,QWORD PTR[32+rbp] + adc rdx,0 + add r8,r9 + mov QWORD PTR[((48-8+8))+rcx*8+rsp],rbx + mov r9,rdx + adc r9,0 + + mul rbx + add r10,rax + mov rax,QWORD PTR[48+rbp] + adc rdx,0 + add r9,r10 + mov rsi,QWORD PTR[((32+8))+rsp] + mov r10,rdx + adc r10,0 + + mul rbx + add r11,rax + mov rax,QWORD PTR[64+rbp] + adc rdx,0 + imul rsi,r8 + add r10,r11 + mov r11,rdx + adc r11,0 + + mul rbx + add r12,rax + mov rax,QWORD PTR[80+rbp] + adc rdx,0 + add r11,r12 + mov r12,rdx + adc r12,0 + + mul rbx + add r13,rax + mov rax,QWORD PTR[96+rbp] + adc rdx,0 + add r12,r13 + mov r13,rdx + adc r13,0 + + mul rbx + add r14,rax + mov rax,QWORD PTR[112+rbp] + adc rdx,0 + add r13,r14 + mov r14,rdx + adc r14,0 + + mul rbx + mov rbx,rsi + add r15,rax + mov rax,QWORD PTR[rbp] + adc rdx,0 + add r14,r15 + mov r15,rdx + adc r15,0 + + dec ecx + jnz $L$8x_reduce + + lea rbp,QWORD PTR[128+rbp] + xor rax,rax + mov rdx,QWORD PTR[((8+8))+rsp] + cmp rbp,QWORD PTR[((0+8))+rsp] + jae $L$8x_no_tail + +DB 066h + add r8,QWORD PTR[rdi] + adc r9,QWORD PTR[8+rdi] + adc r10,QWORD PTR[16+rdi] + adc r11,QWORD PTR[24+rdi] + adc r12,QWORD PTR[32+rdi] + adc r13,QWORD PTR[40+rdi] + adc r14,QWORD PTR[48+rdi] + adc r15,QWORD PTR[56+rdi] + sbb rsi,rsi + + mov rbx,QWORD PTR[((48+56+8))+rsp] + mov ecx,8 + mov rax,QWORD PTR[rbp] + jmp $L$8x_tail + +ALIGN 32 +$L$8x_tail:: + mul rbx + add r8,rax + mov rax,QWORD PTR[16+rbp] + mov QWORD PTR[rdi],r8 + mov r8,rdx + adc r8,0 + + mul rbx + add r9,rax + mov rax,QWORD PTR[32+rbp] + adc rdx,0 + add r8,r9 + lea rdi,QWORD PTR[8+rdi] + mov r9,rdx + adc r9,0 + + mul rbx + add r10,rax + mov rax,QWORD PTR[48+rbp] + adc rdx,0 + add r9,r10 + mov r10,rdx + adc r10,0 + + mul rbx + add r11,rax + mov rax,QWORD PTR[64+rbp] + adc rdx,0 + add r10,r11 + mov r11,rdx + adc r11,0 + + mul rbx + add r12,rax + mov rax,QWORD PTR[80+rbp] + adc rdx,0 + add r11,r12 + mov r12,rdx + adc r12,0 + + mul rbx + add r13,rax + mov rax,QWORD PTR[96+rbp] + adc rdx,0 + add r12,r13 + mov r13,rdx + adc r13,0 + + mul rbx + add r14,rax + mov rax,QWORD PTR[112+rbp] + adc rdx,0 + add r13,r14 + mov r14,rdx + adc r14,0 + + mul rbx + mov rbx,QWORD PTR[((48-16+8))+rcx*8+rsp] + add r15,rax + adc rdx,0 + add r14,r15 + mov rax,QWORD PTR[rbp] + mov r15,rdx + adc r15,0 + + dec ecx + jnz $L$8x_tail + + lea rbp,QWORD PTR[128+rbp] + mov rdx,QWORD PTR[((8+8))+rsp] + cmp rbp,QWORD PTR[((0+8))+rsp] + jae $L$8x_tail_done + + mov rbx,QWORD PTR[((48+56+8))+rsp] + neg rsi + mov rax,QWORD PTR[rbp] + adc r8,QWORD PTR[rdi] + adc r9,QWORD PTR[8+rdi] + adc r10,QWORD PTR[16+rdi] + adc r11,QWORD PTR[24+rdi] + adc r12,QWORD PTR[32+rdi] + adc r13,QWORD PTR[40+rdi] + adc r14,QWORD PTR[48+rdi] + adc r15,QWORD PTR[56+rdi] + sbb rsi,rsi + + mov ecx,8 + jmp $L$8x_tail + +ALIGN 32 +$L$8x_tail_done:: + add r8,QWORD PTR[rdx] + xor rax,rax + + neg rsi +$L$8x_no_tail:: + adc r8,QWORD PTR[rdi] + adc r9,QWORD PTR[8+rdi] + adc r10,QWORD PTR[16+rdi] + adc r11,QWORD PTR[24+rdi] + adc r12,QWORD PTR[32+rdi] + adc r13,QWORD PTR[40+rdi] + adc r14,QWORD PTR[48+rdi] + adc r15,QWORD PTR[56+rdi] + adc rax,0 + mov rcx,QWORD PTR[((-16))+rbp] + xor rsi,rsi + +DB 102,72,15,126,213 + + mov QWORD PTR[rdi],r8 + mov QWORD PTR[8+rdi],r9 +DB 102,73,15,126,217 + mov QWORD PTR[16+rdi],r10 + mov QWORD PTR[24+rdi],r11 + mov QWORD PTR[32+rdi],r12 + mov QWORD PTR[40+rdi],r13 + mov QWORD PTR[48+rdi],r14 + mov QWORD PTR[56+rdi],r15 + lea rdi,QWORD PTR[64+rdi] + + cmp rdi,rdx + jb $L$8x_reduction_loop + + sub rcx,r15 + lea rbx,QWORD PTR[r9*1+rdi] + adc rsi,rsi + mov rcx,r9 + or rax,rsi +DB 102,72,15,126,207 + xor rax,1 +DB 102,72,15,126,206 + lea rbp,QWORD PTR[rax*8+rbp] + sar rcx,3+2 + jmp $L$sqr4x_sub + +ALIGN 32 +$L$sqr4x_sub:: +DB 066h + mov r12,QWORD PTR[rbx] + mov r13,QWORD PTR[8+rbx] + sbb r12,QWORD PTR[rbp] + mov r14,QWORD PTR[16+rbx] + sbb r13,QWORD PTR[16+rbp] + mov r15,QWORD PTR[24+rbx] + lea rbx,QWORD PTR[32+rbx] + sbb r14,QWORD PTR[32+rbp] + mov QWORD PTR[rdi],r12 + sbb r15,QWORD PTR[48+rbp] + lea rbp,QWORD PTR[64+rbp] + mov QWORD PTR[8+rdi],r13 + mov QWORD PTR[16+rdi],r14 + mov QWORD PTR[24+rdi],r15 + lea rdi,QWORD PTR[32+rdi] + + inc rcx + jnz $L$sqr4x_sub + mov r10,r9 + neg r9 + DB 0F3h,0C3h ;repret +bn_sqr8x_internal ENDP +PUBLIC bn_from_montgomery + +ALIGN 32 +bn_from_montgomery PROC PUBLIC + test DWORD PTR[48+rsp],7 + jz bn_from_mont8x + xor eax,eax + DB 0F3h,0C3h ;repret +bn_from_montgomery ENDP + + +ALIGN 32 +bn_from_mont8x PROC PRIVATE + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_bn_from_mont8x:: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + mov rcx,r9 + mov r8,QWORD PTR[40+rsp] + mov r9,QWORD PTR[48+rsp] + + +DB 067h + mov rax,rsp + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + lea rsp,QWORD PTR[((-40))+rsp] + movaps XMMWORD PTR[rsp],xmm6 + movaps XMMWORD PTR[16+rsp],xmm7 +DB 067h + mov r10d,r9d + shl r9d,3 + shl r10d,3+2 + neg r9 + mov r8,QWORD PTR[r8] + + + + + + + + lea r11,QWORD PTR[((-64))+r9*2+rsp] + sub r11,rsi + and r11,4095 + cmp r10,r11 + jb $L$from_sp_alt + sub rsp,r11 + lea rsp,QWORD PTR[((-64))+r9*2+rsp] + jmp $L$from_sp_done + +ALIGN 32 +$L$from_sp_alt:: + lea r10,QWORD PTR[((4096-64))+r9*2] + lea rsp,QWORD PTR[((-64))+r9*2+rsp] + sub r11,r10 + mov r10,0 + cmovc r11,r10 + sub rsp,r11 +$L$from_sp_done:: + and rsp,-64 + mov r10,r9 + neg r9 + + + + + + + + + + + mov QWORD PTR[32+rsp],r8 + mov QWORD PTR[40+rsp],rax +$L$from_body:: + mov r11,r9 + lea rax,QWORD PTR[48+rsp] + pxor xmm0,xmm0 + jmp $L$mul_by_1 + +ALIGN 32 +$L$mul_by_1:: + movdqu xmm1,XMMWORD PTR[rsi] + movdqu xmm2,XMMWORD PTR[16+rsi] + movdqu xmm3,XMMWORD PTR[32+rsi] + movdqa XMMWORD PTR[r9*1+rax],xmm0 + movdqu xmm4,XMMWORD PTR[48+rsi] + movdqa XMMWORD PTR[16+r9*1+rax],xmm0 +DB 048h,08dh,0b6h,040h,000h,000h,000h + movdqa XMMWORD PTR[rax],xmm1 + movdqa XMMWORD PTR[32+r9*1+rax],xmm0 + movdqa XMMWORD PTR[16+rax],xmm2 + movdqa XMMWORD PTR[48+r9*1+rax],xmm0 + movdqa XMMWORD PTR[32+rax],xmm3 + movdqa XMMWORD PTR[48+rax],xmm4 + lea rax,QWORD PTR[64+rax] + sub r11,64 + jnz $L$mul_by_1 + +DB 102,72,15,110,207 +DB 102,72,15,110,209 +DB 067h + mov rbp,rcx +DB 102,73,15,110,218 + call sqr8x_reduction + + pxor xmm0,xmm0 + lea rax,QWORD PTR[48+rsp] + mov rsi,QWORD PTR[40+rsp] + jmp $L$from_mont_zero + +ALIGN 32 +$L$from_mont_zero:: + movdqa XMMWORD PTR[rax],xmm0 + movdqa XMMWORD PTR[16+rax],xmm0 + movdqa XMMWORD PTR[32+rax],xmm0 + movdqa XMMWORD PTR[48+rax],xmm0 + lea rax,QWORD PTR[64+rax] + sub r9,32 + jnz $L$from_mont_zero + + mov rax,1 + mov r15,QWORD PTR[((-48))+rsi] + mov r14,QWORD PTR[((-40))+rsi] + mov r13,QWORD PTR[((-32))+rsi] + mov r12,QWORD PTR[((-24))+rsi] + mov rbp,QWORD PTR[((-16))+rsi] + mov rbx,QWORD PTR[((-8))+rsi] + lea rsp,QWORD PTR[rsi] +$L$from_epilogue:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_bn_from_mont8x:: +bn_from_mont8x ENDP +PUBLIC bn_get_bits5 + +ALIGN 16 +bn_get_bits5 PROC PUBLIC + mov r10,rcx + mov ecx,edx + shr edx,3 + movzx eax,WORD PTR[rdx*1+r10] + and ecx,7 + shr eax,cl + and eax,31 + DB 0F3h,0C3h ;repret +bn_get_bits5 ENDP + +PUBLIC bn_scatter5 + +ALIGN 16 +bn_scatter5 PROC PUBLIC + cmp edx,0 + jz $L$scatter_epilogue + lea r8,QWORD PTR[r9*8+r8] +$L$scatter:: + mov rax,QWORD PTR[rcx] + lea rcx,QWORD PTR[8+rcx] + mov QWORD PTR[r8],rax + lea r8,QWORD PTR[256+r8] + sub edx,1 + jnz $L$scatter +$L$scatter_epilogue:: + DB 0F3h,0C3h ;repret +bn_scatter5 ENDP + +PUBLIC bn_gather5 + +ALIGN 16 +bn_gather5 PROC PUBLIC +$L$SEH_begin_bn_gather5:: + +DB 048h,083h,0ech,028h +DB 00fh,029h,034h,024h +DB 00fh,029h,07ch,024h,010h + mov r11d,r9d + shr r9d,3 + and r11,7 + not r9d + lea rax,QWORD PTR[$L$magic_masks] + and r9d,3 + lea r8,QWORD PTR[128+r11*8+r8] + movq xmm4,QWORD PTR[r9*8+rax] + movq xmm5,QWORD PTR[8+r9*8+rax] + movq xmm6,QWORD PTR[16+r9*8+rax] + movq xmm7,QWORD PTR[24+r9*8+rax] + jmp $L$gather +ALIGN 16 +$L$gather:: + movq xmm0,QWORD PTR[((-128))+r8] + movq xmm1,QWORD PTR[((-64))+r8] + pand xmm0,xmm4 + movq xmm2,QWORD PTR[r8] + pand xmm1,xmm5 + movq xmm3,QWORD PTR[64+r8] + pand xmm2,xmm6 + por xmm0,xmm1 + pand xmm3,xmm7 +DB 067h,067h + por xmm0,xmm2 + lea r8,QWORD PTR[256+r8] + por xmm0,xmm3 + + movq QWORD PTR[rcx],xmm0 + lea rcx,QWORD PTR[8+rcx] + sub edx,1 + jnz $L$gather + movaps xmm6,XMMWORD PTR[rsp] + movaps xmm7,XMMWORD PTR[16+rsp] + lea rsp,QWORD PTR[40+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_bn_gather5:: +bn_gather5 ENDP +ALIGN 64 +$L$magic_masks:: + DD 0,0,0,0,0,0,-1,-1 + DD 0,0,0,0,0,0,0,0 +DB 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105 +DB 112,108,105,99,97,116,105,111,110,32,119,105,116,104,32,115 +DB 99,97,116,116,101,114,47,103,97,116,104,101,114,32,102,111 +DB 114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79 +DB 71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111 +DB 112,101,110,115,115,108,46,111,114,103,62,0 +EXTERN __imp_RtlVirtualUnwind:NEAR + +ALIGN 16 +mul_handler PROC PRIVATE + push rsi + push rdi + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + pushfq + sub rsp,64 + + mov rax,QWORD PTR[120+r8] + mov rbx,QWORD PTR[248+r8] + + mov rsi,QWORD PTR[8+r9] + mov r11,QWORD PTR[56+r9] + + mov r10d,DWORD PTR[r11] + lea r10,QWORD PTR[r10*1+rsi] + cmp rbx,r10 + jb $L$common_seh_tail + + mov rax,QWORD PTR[152+r8] + + mov r10d,DWORD PTR[4+r11] + lea r10,QWORD PTR[r10*1+rsi] + cmp rbx,r10 + jae $L$common_seh_tail + + lea r10,QWORD PTR[$L$mul_epilogue] + cmp rbx,r10 + jb $L$body_40 + + mov r10,QWORD PTR[192+r8] + mov rax,QWORD PTR[8+r10*8+rax] + jmp $L$body_proceed + +$L$body_40:: + mov rax,QWORD PTR[40+rax] +$L$body_proceed:: + + movaps xmm0,XMMWORD PTR[((-88))+rax] + movaps xmm1,XMMWORD PTR[((-72))+rax] + + mov rbx,QWORD PTR[((-8))+rax] + mov rbp,QWORD PTR[((-16))+rax] + mov r12,QWORD PTR[((-24))+rax] + mov r13,QWORD PTR[((-32))+rax] + mov r14,QWORD PTR[((-40))+rax] + mov r15,QWORD PTR[((-48))+rax] + mov QWORD PTR[144+r8],rbx + mov QWORD PTR[160+r8],rbp + mov QWORD PTR[216+r8],r12 + mov QWORD PTR[224+r8],r13 + mov QWORD PTR[232+r8],r14 + mov QWORD PTR[240+r8],r15 + movups XMMWORD PTR[512+r8],xmm0 + movups XMMWORD PTR[528+r8],xmm1 + +$L$common_seh_tail:: + mov rdi,QWORD PTR[8+rax] + mov rsi,QWORD PTR[16+rax] + mov QWORD PTR[152+r8],rax + mov QWORD PTR[168+r8],rsi + mov QWORD PTR[176+r8],rdi + + mov rdi,QWORD PTR[40+r9] + mov rsi,r8 + mov ecx,154 + DD 0a548f3fch + + mov rsi,r9 + xor rcx,rcx + mov rdx,QWORD PTR[8+rsi] + mov r8,QWORD PTR[rsi] + mov r9,QWORD PTR[16+rsi] + mov r10,QWORD PTR[40+rsi] + lea r11,QWORD PTR[56+rsi] + lea r12,QWORD PTR[24+rsi] + mov QWORD PTR[32+rsp],r10 + mov QWORD PTR[40+rsp],r11 + mov QWORD PTR[48+rsp],r12 + mov QWORD PTR[56+rsp],rcx + call QWORD PTR[__imp_RtlVirtualUnwind] + + mov eax,1 + add rsp,64 + popfq + pop r15 + pop r14 + pop r13 + pop r12 + pop rbp + pop rbx + pop rdi + pop rsi + DB 0F3h,0C3h ;repret +mul_handler ENDP + +.text$ ENDS +.pdata SEGMENT READONLY ALIGN(4) +ALIGN 4 + DD imagerel $L$SEH_begin_bn_mul_mont_gather5 + DD imagerel $L$SEH_end_bn_mul_mont_gather5 + DD imagerel $L$SEH_info_bn_mul_mont_gather5 + + DD imagerel $L$SEH_begin_bn_mul4x_mont_gather5 + DD imagerel $L$SEH_end_bn_mul4x_mont_gather5 + DD imagerel $L$SEH_info_bn_mul4x_mont_gather5 + + DD imagerel $L$SEH_begin_bn_power5 + DD imagerel $L$SEH_end_bn_power5 + DD imagerel $L$SEH_info_bn_power5 + + DD imagerel $L$SEH_begin_bn_from_mont8x + DD imagerel $L$SEH_end_bn_from_mont8x + DD imagerel $L$SEH_info_bn_from_mont8x + DD imagerel $L$SEH_begin_bn_gather5 + DD imagerel $L$SEH_end_bn_gather5 + DD imagerel $L$SEH_info_bn_gather5 + +.pdata ENDS +.xdata SEGMENT READONLY ALIGN(8) +ALIGN 8 +$L$SEH_info_bn_mul_mont_gather5:: +DB 9,0,0,0 + DD imagerel mul_handler + DD imagerel $L$mul_body,imagerel $L$mul_epilogue +ALIGN 8 +$L$SEH_info_bn_mul4x_mont_gather5:: +DB 9,0,0,0 + DD imagerel mul_handler + DD imagerel $L$mul4x_body,imagerel $L$mul4x_epilogue +ALIGN 8 +$L$SEH_info_bn_power5:: +DB 9,0,0,0 + DD imagerel mul_handler + DD imagerel $L$power5_body,imagerel $L$power5_epilogue +ALIGN 8 +$L$SEH_info_bn_from_mont8x:: +DB 9,0,0,0 + DD imagerel mul_handler + DD imagerel $L$from_body,imagerel $L$from_epilogue +ALIGN 8 +$L$SEH_info_bn_gather5:: +DB 001h,00dh,005h,000h +DB 00dh,078h,001h,000h +DB 008h,068h,000h,000h +DB 004h,042h,000h,000h +ALIGN 8 + +.xdata ENDS +END diff --git a/deps/openssl/asm_obsolete/x64-win32-masm/camellia/cmll-x86_64.asm b/deps/openssl/asm_obsolete/x64-win32-masm/camellia/cmll-x86_64.asm new file mode 100644 index 00000000000000..5e6d45157645dc --- /dev/null +++ b/deps/openssl/asm_obsolete/x64-win32-masm/camellia/cmll-x86_64.asm @@ -0,0 +1,2099 @@ +OPTION DOTNAME +.text$ SEGMENT ALIGN(256) 'CODE' + + +PUBLIC Camellia_EncryptBlock + +ALIGN 16 +Camellia_EncryptBlock PROC PUBLIC + mov eax,128 + sub eax,ecx + mov ecx,3 + adc ecx,0 + jmp $L$enc_rounds +Camellia_EncryptBlock ENDP + +PUBLIC Camellia_EncryptBlock_Rounds + +ALIGN 16 +$L$enc_rounds:: +Camellia_EncryptBlock_Rounds PROC PUBLIC + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_Camellia_EncryptBlock_Rounds:: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + mov rcx,r9 + + + push rbx + push rbp + push r13 + push r14 + push r15 +$L$enc_prologue:: + + + mov r13,rcx + mov r14,rdx + + shl edi,6 + lea rbp,QWORD PTR[$L$Camellia_SBOX] + lea r15,QWORD PTR[rdi*1+r14] + + mov r8d,DWORD PTR[rsi] + mov r9d,DWORD PTR[4+rsi] + mov r10d,DWORD PTR[8+rsi] + bswap r8d + mov r11d,DWORD PTR[12+rsi] + bswap r9d + bswap r10d + bswap r11d + + call _x86_64_Camellia_encrypt + + bswap r8d + bswap r9d + bswap r10d + mov DWORD PTR[r13],r8d + bswap r11d + mov DWORD PTR[4+r13],r9d + mov DWORD PTR[8+r13],r10d + mov DWORD PTR[12+r13],r11d + + mov r15,QWORD PTR[rsp] + mov r14,QWORD PTR[8+rsp] + mov r13,QWORD PTR[16+rsp] + mov rbp,QWORD PTR[24+rsp] + mov rbx,QWORD PTR[32+rsp] + lea rsp,QWORD PTR[40+rsp] +$L$enc_epilogue:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_Camellia_EncryptBlock_Rounds:: +Camellia_EncryptBlock_Rounds ENDP + + +ALIGN 16 +_x86_64_Camellia_encrypt PROC PRIVATE + xor r9d,DWORD PTR[r14] + xor r8d,DWORD PTR[4+r14] + xor r11d,DWORD PTR[8+r14] + xor r10d,DWORD PTR[12+r14] +ALIGN 16 +$L$eloop:: + mov ebx,DWORD PTR[16+r14] + mov eax,DWORD PTR[20+r14] + + xor eax,r8d + xor ebx,r9d + movzx esi,ah + movzx edi,bl + mov edx,DWORD PTR[2052+rsi*8+rbp] + mov ecx,DWORD PTR[rdi*8+rbp] + movzx esi,al + shr eax,16 + movzx edi,bh + xor edx,DWORD PTR[4+rsi*8+rbp] + shr ebx,16 + xor ecx,DWORD PTR[4+rdi*8+rbp] + movzx esi,ah + movzx edi,bl + xor edx,DWORD PTR[rsi*8+rbp] + xor ecx,DWORD PTR[2052+rdi*8+rbp] + movzx esi,al + movzx edi,bh + xor edx,DWORD PTR[2048+rsi*8+rbp] + xor ecx,DWORD PTR[2048+rdi*8+rbp] + mov ebx,DWORD PTR[24+r14] + mov eax,DWORD PTR[28+r14] + xor ecx,edx + ror edx,8 + xor r10d,ecx + xor r11d,ecx + xor r11d,edx + xor eax,r10d + xor ebx,r11d + movzx esi,ah + movzx edi,bl + mov edx,DWORD PTR[2052+rsi*8+rbp] + mov ecx,DWORD PTR[rdi*8+rbp] + movzx esi,al + shr eax,16 + movzx edi,bh + xor edx,DWORD PTR[4+rsi*8+rbp] + shr ebx,16 + xor ecx,DWORD PTR[4+rdi*8+rbp] + movzx esi,ah + movzx edi,bl + xor edx,DWORD PTR[rsi*8+rbp] + xor ecx,DWORD PTR[2052+rdi*8+rbp] + movzx esi,al + movzx edi,bh + xor edx,DWORD PTR[2048+rsi*8+rbp] + xor ecx,DWORD PTR[2048+rdi*8+rbp] + mov ebx,DWORD PTR[32+r14] + mov eax,DWORD PTR[36+r14] + xor ecx,edx + ror edx,8 + xor r8d,ecx + xor r9d,ecx + xor r9d,edx + xor eax,r8d + xor ebx,r9d + movzx esi,ah + movzx edi,bl + mov edx,DWORD PTR[2052+rsi*8+rbp] + mov ecx,DWORD PTR[rdi*8+rbp] + movzx esi,al + shr eax,16 + movzx edi,bh + xor edx,DWORD PTR[4+rsi*8+rbp] + shr ebx,16 + xor ecx,DWORD PTR[4+rdi*8+rbp] + movzx esi,ah + movzx edi,bl + xor edx,DWORD PTR[rsi*8+rbp] + xor ecx,DWORD PTR[2052+rdi*8+rbp] + movzx esi,al + movzx edi,bh + xor edx,DWORD PTR[2048+rsi*8+rbp] + xor ecx,DWORD PTR[2048+rdi*8+rbp] + mov ebx,DWORD PTR[40+r14] + mov eax,DWORD PTR[44+r14] + xor ecx,edx + ror edx,8 + xor r10d,ecx + xor r11d,ecx + xor r11d,edx + xor eax,r10d + xor ebx,r11d + movzx esi,ah + movzx edi,bl + mov edx,DWORD PTR[2052+rsi*8+rbp] + mov ecx,DWORD PTR[rdi*8+rbp] + movzx esi,al + shr eax,16 + movzx edi,bh + xor edx,DWORD PTR[4+rsi*8+rbp] + shr ebx,16 + xor ecx,DWORD PTR[4+rdi*8+rbp] + movzx esi,ah + movzx edi,bl + xor edx,DWORD PTR[rsi*8+rbp] + xor ecx,DWORD PTR[2052+rdi*8+rbp] + movzx esi,al + movzx edi,bh + xor edx,DWORD PTR[2048+rsi*8+rbp] + xor ecx,DWORD PTR[2048+rdi*8+rbp] + mov ebx,DWORD PTR[48+r14] + mov eax,DWORD PTR[52+r14] + xor ecx,edx + ror edx,8 + xor r8d,ecx + xor r9d,ecx + xor r9d,edx + xor eax,r8d + xor ebx,r9d + movzx esi,ah + movzx edi,bl + mov edx,DWORD PTR[2052+rsi*8+rbp] + mov ecx,DWORD PTR[rdi*8+rbp] + movzx esi,al + shr eax,16 + movzx edi,bh + xor edx,DWORD PTR[4+rsi*8+rbp] + shr ebx,16 + xor ecx,DWORD PTR[4+rdi*8+rbp] + movzx esi,ah + movzx edi,bl + xor edx,DWORD PTR[rsi*8+rbp] + xor ecx,DWORD PTR[2052+rdi*8+rbp] + movzx esi,al + movzx edi,bh + xor edx,DWORD PTR[2048+rsi*8+rbp] + xor ecx,DWORD PTR[2048+rdi*8+rbp] + mov ebx,DWORD PTR[56+r14] + mov eax,DWORD PTR[60+r14] + xor ecx,edx + ror edx,8 + xor r10d,ecx + xor r11d,ecx + xor r11d,edx + xor eax,r10d + xor ebx,r11d + movzx esi,ah + movzx edi,bl + mov edx,DWORD PTR[2052+rsi*8+rbp] + mov ecx,DWORD PTR[rdi*8+rbp] + movzx esi,al + shr eax,16 + movzx edi,bh + xor edx,DWORD PTR[4+rsi*8+rbp] + shr ebx,16 + xor ecx,DWORD PTR[4+rdi*8+rbp] + movzx esi,ah + movzx edi,bl + xor edx,DWORD PTR[rsi*8+rbp] + xor ecx,DWORD PTR[2052+rdi*8+rbp] + movzx esi,al + movzx edi,bh + xor edx,DWORD PTR[2048+rsi*8+rbp] + xor ecx,DWORD PTR[2048+rdi*8+rbp] + mov ebx,DWORD PTR[64+r14] + mov eax,DWORD PTR[68+r14] + xor ecx,edx + ror edx,8 + xor r8d,ecx + xor r9d,ecx + xor r9d,edx + lea r14,QWORD PTR[64+r14] + cmp r14,r15 + mov edx,DWORD PTR[8+r14] + mov ecx,DWORD PTR[12+r14] + je $L$edone + + and eax,r8d + or edx,r11d + rol eax,1 + xor r10d,edx + xor r9d,eax + and ecx,r10d + or ebx,r9d + rol ecx,1 + xor r8d,ebx + xor r11d,ecx + jmp $L$eloop + +ALIGN 16 +$L$edone:: + xor eax,r10d + xor ebx,r11d + xor ecx,r8d + xor edx,r9d + + mov r8d,eax + mov r9d,ebx + mov r10d,ecx + mov r11d,edx + +DB 0f3h,0c3h +_x86_64_Camellia_encrypt ENDP + + +PUBLIC Camellia_DecryptBlock + +ALIGN 16 +Camellia_DecryptBlock PROC PUBLIC + mov eax,128 + sub eax,ecx + mov ecx,3 + adc ecx,0 + jmp $L$dec_rounds +Camellia_DecryptBlock ENDP + +PUBLIC Camellia_DecryptBlock_Rounds + +ALIGN 16 +$L$dec_rounds:: +Camellia_DecryptBlock_Rounds PROC PUBLIC + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_Camellia_DecryptBlock_Rounds:: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + mov rcx,r9 + + + push rbx + push rbp + push r13 + push r14 + push r15 +$L$dec_prologue:: + + + mov r13,rcx + mov r15,rdx + + shl edi,6 + lea rbp,QWORD PTR[$L$Camellia_SBOX] + lea r14,QWORD PTR[rdi*1+r15] + + mov r8d,DWORD PTR[rsi] + mov r9d,DWORD PTR[4+rsi] + mov r10d,DWORD PTR[8+rsi] + bswap r8d + mov r11d,DWORD PTR[12+rsi] + bswap r9d + bswap r10d + bswap r11d + + call _x86_64_Camellia_decrypt + + bswap r8d + bswap r9d + bswap r10d + mov DWORD PTR[r13],r8d + bswap r11d + mov DWORD PTR[4+r13],r9d + mov DWORD PTR[8+r13],r10d + mov DWORD PTR[12+r13],r11d + + mov r15,QWORD PTR[rsp] + mov r14,QWORD PTR[8+rsp] + mov r13,QWORD PTR[16+rsp] + mov rbp,QWORD PTR[24+rsp] + mov rbx,QWORD PTR[32+rsp] + lea rsp,QWORD PTR[40+rsp] +$L$dec_epilogue:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_Camellia_DecryptBlock_Rounds:: +Camellia_DecryptBlock_Rounds ENDP + + +ALIGN 16 +_x86_64_Camellia_decrypt PROC PRIVATE + xor r9d,DWORD PTR[r14] + xor r8d,DWORD PTR[4+r14] + xor r11d,DWORD PTR[8+r14] + xor r10d,DWORD PTR[12+r14] +ALIGN 16 +$L$dloop:: + mov ebx,DWORD PTR[((-8))+r14] + mov eax,DWORD PTR[((-4))+r14] + + xor eax,r8d + xor ebx,r9d + movzx esi,ah + movzx edi,bl + mov edx,DWORD PTR[2052+rsi*8+rbp] + mov ecx,DWORD PTR[rdi*8+rbp] + movzx esi,al + shr eax,16 + movzx edi,bh + xor edx,DWORD PTR[4+rsi*8+rbp] + shr ebx,16 + xor ecx,DWORD PTR[4+rdi*8+rbp] + movzx esi,ah + movzx edi,bl + xor edx,DWORD PTR[rsi*8+rbp] + xor ecx,DWORD PTR[2052+rdi*8+rbp] + movzx esi,al + movzx edi,bh + xor edx,DWORD PTR[2048+rsi*8+rbp] + xor ecx,DWORD PTR[2048+rdi*8+rbp] + mov ebx,DWORD PTR[((-16))+r14] + mov eax,DWORD PTR[((-12))+r14] + xor ecx,edx + ror edx,8 + xor r10d,ecx + xor r11d,ecx + xor r11d,edx + xor eax,r10d + xor ebx,r11d + movzx esi,ah + movzx edi,bl + mov edx,DWORD PTR[2052+rsi*8+rbp] + mov ecx,DWORD PTR[rdi*8+rbp] + movzx esi,al + shr eax,16 + movzx edi,bh + xor edx,DWORD PTR[4+rsi*8+rbp] + shr ebx,16 + xor ecx,DWORD PTR[4+rdi*8+rbp] + movzx esi,ah + movzx edi,bl + xor edx,DWORD PTR[rsi*8+rbp] + xor ecx,DWORD PTR[2052+rdi*8+rbp] + movzx esi,al + movzx edi,bh + xor edx,DWORD PTR[2048+rsi*8+rbp] + xor ecx,DWORD PTR[2048+rdi*8+rbp] + mov ebx,DWORD PTR[((-24))+r14] + mov eax,DWORD PTR[((-20))+r14] + xor ecx,edx + ror edx,8 + xor r8d,ecx + xor r9d,ecx + xor r9d,edx + xor eax,r8d + xor ebx,r9d + movzx esi,ah + movzx edi,bl + mov edx,DWORD PTR[2052+rsi*8+rbp] + mov ecx,DWORD PTR[rdi*8+rbp] + movzx esi,al + shr eax,16 + movzx edi,bh + xor edx,DWORD PTR[4+rsi*8+rbp] + shr ebx,16 + xor ecx,DWORD PTR[4+rdi*8+rbp] + movzx esi,ah + movzx edi,bl + xor edx,DWORD PTR[rsi*8+rbp] + xor ecx,DWORD PTR[2052+rdi*8+rbp] + movzx esi,al + movzx edi,bh + xor edx,DWORD PTR[2048+rsi*8+rbp] + xor ecx,DWORD PTR[2048+rdi*8+rbp] + mov ebx,DWORD PTR[((-32))+r14] + mov eax,DWORD PTR[((-28))+r14] + xor ecx,edx + ror edx,8 + xor r10d,ecx + xor r11d,ecx + xor r11d,edx + xor eax,r10d + xor ebx,r11d + movzx esi,ah + movzx edi,bl + mov edx,DWORD PTR[2052+rsi*8+rbp] + mov ecx,DWORD PTR[rdi*8+rbp] + movzx esi,al + shr eax,16 + movzx edi,bh + xor edx,DWORD PTR[4+rsi*8+rbp] + shr ebx,16 + xor ecx,DWORD PTR[4+rdi*8+rbp] + movzx esi,ah + movzx edi,bl + xor edx,DWORD PTR[rsi*8+rbp] + xor ecx,DWORD PTR[2052+rdi*8+rbp] + movzx esi,al + movzx edi,bh + xor edx,DWORD PTR[2048+rsi*8+rbp] + xor ecx,DWORD PTR[2048+rdi*8+rbp] + mov ebx,DWORD PTR[((-40))+r14] + mov eax,DWORD PTR[((-36))+r14] + xor ecx,edx + ror edx,8 + xor r8d,ecx + xor r9d,ecx + xor r9d,edx + xor eax,r8d + xor ebx,r9d + movzx esi,ah + movzx edi,bl + mov edx,DWORD PTR[2052+rsi*8+rbp] + mov ecx,DWORD PTR[rdi*8+rbp] + movzx esi,al + shr eax,16 + movzx edi,bh + xor edx,DWORD PTR[4+rsi*8+rbp] + shr ebx,16 + xor ecx,DWORD PTR[4+rdi*8+rbp] + movzx esi,ah + movzx edi,bl + xor edx,DWORD PTR[rsi*8+rbp] + xor ecx,DWORD PTR[2052+rdi*8+rbp] + movzx esi,al + movzx edi,bh + xor edx,DWORD PTR[2048+rsi*8+rbp] + xor ecx,DWORD PTR[2048+rdi*8+rbp] + mov ebx,DWORD PTR[((-48))+r14] + mov eax,DWORD PTR[((-44))+r14] + xor ecx,edx + ror edx,8 + xor r10d,ecx + xor r11d,ecx + xor r11d,edx + xor eax,r10d + xor ebx,r11d + movzx esi,ah + movzx edi,bl + mov edx,DWORD PTR[2052+rsi*8+rbp] + mov ecx,DWORD PTR[rdi*8+rbp] + movzx esi,al + shr eax,16 + movzx edi,bh + xor edx,DWORD PTR[4+rsi*8+rbp] + shr ebx,16 + xor ecx,DWORD PTR[4+rdi*8+rbp] + movzx esi,ah + movzx edi,bl + xor edx,DWORD PTR[rsi*8+rbp] + xor ecx,DWORD PTR[2052+rdi*8+rbp] + movzx esi,al + movzx edi,bh + xor edx,DWORD PTR[2048+rsi*8+rbp] + xor ecx,DWORD PTR[2048+rdi*8+rbp] + mov ebx,DWORD PTR[((-56))+r14] + mov eax,DWORD PTR[((-52))+r14] + xor ecx,edx + ror edx,8 + xor r8d,ecx + xor r9d,ecx + xor r9d,edx + lea r14,QWORD PTR[((-64))+r14] + cmp r14,r15 + mov edx,DWORD PTR[r14] + mov ecx,DWORD PTR[4+r14] + je $L$ddone + + and eax,r8d + or edx,r11d + rol eax,1 + xor r10d,edx + xor r9d,eax + and ecx,r10d + or ebx,r9d + rol ecx,1 + xor r8d,ebx + xor r11d,ecx + + jmp $L$dloop + +ALIGN 16 +$L$ddone:: + xor ecx,r10d + xor edx,r11d + xor eax,r8d + xor ebx,r9d + + mov r8d,ecx + mov r9d,edx + mov r10d,eax + mov r11d,ebx + +DB 0f3h,0c3h +_x86_64_Camellia_decrypt ENDP +PUBLIC Camellia_Ekeygen + +ALIGN 16 +Camellia_Ekeygen PROC PUBLIC + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_Camellia_Ekeygen:: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + + + push rbx + push rbp + push r13 + push r14 + push r15 +$L$key_prologue:: + + mov r15d,edi + mov r13,rdx + + mov r8d,DWORD PTR[rsi] + mov r9d,DWORD PTR[4+rsi] + mov r10d,DWORD PTR[8+rsi] + mov r11d,DWORD PTR[12+rsi] + + bswap r8d + bswap r9d + bswap r10d + bswap r11d + mov DWORD PTR[r13],r9d + mov DWORD PTR[4+r13],r8d + mov DWORD PTR[8+r13],r11d + mov DWORD PTR[12+r13],r10d + cmp r15,128 + je $L$1st128 + + mov r8d,DWORD PTR[16+rsi] + mov r9d,DWORD PTR[20+rsi] + cmp r15,192 + je $L$1st192 + mov r10d,DWORD PTR[24+rsi] + mov r11d,DWORD PTR[28+rsi] + jmp $L$1st256 +$L$1st192:: + mov r10d,r8d + mov r11d,r9d + not r10d + not r11d +$L$1st256:: + bswap r8d + bswap r9d + bswap r10d + bswap r11d + mov DWORD PTR[32+r13],r9d + mov DWORD PTR[36+r13],r8d + mov DWORD PTR[40+r13],r11d + mov DWORD PTR[44+r13],r10d + xor r9d,DWORD PTR[r13] + xor r8d,DWORD PTR[4+r13] + xor r11d,DWORD PTR[8+r13] + xor r10d,DWORD PTR[12+r13] + +$L$1st128:: + lea r14,QWORD PTR[$L$Camellia_SIGMA] + lea rbp,QWORD PTR[$L$Camellia_SBOX] + + mov ebx,DWORD PTR[r14] + mov eax,DWORD PTR[4+r14] + xor eax,r8d + xor ebx,r9d + movzx esi,ah + movzx edi,bl + mov edx,DWORD PTR[2052+rsi*8+rbp] + mov ecx,DWORD PTR[rdi*8+rbp] + movzx esi,al + shr eax,16 + movzx edi,bh + xor edx,DWORD PTR[4+rsi*8+rbp] + shr ebx,16 + xor ecx,DWORD PTR[4+rdi*8+rbp] + movzx esi,ah + movzx edi,bl + xor edx,DWORD PTR[rsi*8+rbp] + xor ecx,DWORD PTR[2052+rdi*8+rbp] + movzx esi,al + movzx edi,bh + xor edx,DWORD PTR[2048+rsi*8+rbp] + xor ecx,DWORD PTR[2048+rdi*8+rbp] + mov ebx,DWORD PTR[8+r14] + mov eax,DWORD PTR[12+r14] + xor ecx,edx + ror edx,8 + xor r10d,ecx + xor r11d,ecx + xor r11d,edx + xor eax,r10d + xor ebx,r11d + movzx esi,ah + movzx edi,bl + mov edx,DWORD PTR[2052+rsi*8+rbp] + mov ecx,DWORD PTR[rdi*8+rbp] + movzx esi,al + shr eax,16 + movzx edi,bh + xor edx,DWORD PTR[4+rsi*8+rbp] + shr ebx,16 + xor ecx,DWORD PTR[4+rdi*8+rbp] + movzx esi,ah + movzx edi,bl + xor edx,DWORD PTR[rsi*8+rbp] + xor ecx,DWORD PTR[2052+rdi*8+rbp] + movzx esi,al + movzx edi,bh + xor edx,DWORD PTR[2048+rsi*8+rbp] + xor ecx,DWORD PTR[2048+rdi*8+rbp] + mov ebx,DWORD PTR[16+r14] + mov eax,DWORD PTR[20+r14] + xor ecx,edx + ror edx,8 + xor r8d,ecx + xor r9d,ecx + xor r9d,edx + xor r9d,DWORD PTR[r13] + xor r8d,DWORD PTR[4+r13] + xor r11d,DWORD PTR[8+r13] + xor r10d,DWORD PTR[12+r13] + xor eax,r8d + xor ebx,r9d + movzx esi,ah + movzx edi,bl + mov edx,DWORD PTR[2052+rsi*8+rbp] + mov ecx,DWORD PTR[rdi*8+rbp] + movzx esi,al + shr eax,16 + movzx edi,bh + xor edx,DWORD PTR[4+rsi*8+rbp] + shr ebx,16 + xor ecx,DWORD PTR[4+rdi*8+rbp] + movzx esi,ah + movzx edi,bl + xor edx,DWORD PTR[rsi*8+rbp] + xor ecx,DWORD PTR[2052+rdi*8+rbp] + movzx esi,al + movzx edi,bh + xor edx,DWORD PTR[2048+rsi*8+rbp] + xor ecx,DWORD PTR[2048+rdi*8+rbp] + mov ebx,DWORD PTR[24+r14] + mov eax,DWORD PTR[28+r14] + xor ecx,edx + ror edx,8 + xor r10d,ecx + xor r11d,ecx + xor r11d,edx + xor eax,r10d + xor ebx,r11d + movzx esi,ah + movzx edi,bl + mov edx,DWORD PTR[2052+rsi*8+rbp] + mov ecx,DWORD PTR[rdi*8+rbp] + movzx esi,al + shr eax,16 + movzx edi,bh + xor edx,DWORD PTR[4+rsi*8+rbp] + shr ebx,16 + xor ecx,DWORD PTR[4+rdi*8+rbp] + movzx esi,ah + movzx edi,bl + xor edx,DWORD PTR[rsi*8+rbp] + xor ecx,DWORD PTR[2052+rdi*8+rbp] + movzx esi,al + movzx edi,bh + xor edx,DWORD PTR[2048+rsi*8+rbp] + xor ecx,DWORD PTR[2048+rdi*8+rbp] + mov ebx,DWORD PTR[32+r14] + mov eax,DWORD PTR[36+r14] + xor ecx,edx + ror edx,8 + xor r8d,ecx + xor r9d,ecx + xor r9d,edx + cmp r15,128 + jne $L$2nd256 + + lea r13,QWORD PTR[128+r13] + shl r8,32 + shl r10,32 + or r8,r9 + or r10,r11 + mov rax,QWORD PTR[((-128))+r13] + mov rbx,QWORD PTR[((-120))+r13] + mov QWORD PTR[((-112))+r13],r8 + mov QWORD PTR[((-104))+r13],r10 + mov r11,rax + shl rax,15 + mov r9,rbx + shr r9,49 + shr r11,49 + or rax,r9 + shl rbx,15 + or rbx,r11 + mov QWORD PTR[((-96))+r13],rax + mov QWORD PTR[((-88))+r13],rbx + mov r11,r8 + shl r8,15 + mov r9,r10 + shr r9,49 + shr r11,49 + or r8,r9 + shl r10,15 + or r10,r11 + mov QWORD PTR[((-80))+r13],r8 + mov QWORD PTR[((-72))+r13],r10 + mov r11,r8 + shl r8,15 + mov r9,r10 + shr r9,49 + shr r11,49 + or r8,r9 + shl r10,15 + or r10,r11 + mov QWORD PTR[((-64))+r13],r8 + mov QWORD PTR[((-56))+r13],r10 + mov r11,rax + shl rax,30 + mov r9,rbx + shr r9,34 + shr r11,34 + or rax,r9 + shl rbx,30 + or rbx,r11 + mov QWORD PTR[((-48))+r13],rax + mov QWORD PTR[((-40))+r13],rbx + mov r11,r8 + shl r8,15 + mov r9,r10 + shr r9,49 + shr r11,49 + or r8,r9 + shl r10,15 + or r10,r11 + mov QWORD PTR[((-32))+r13],r8 + mov r11,rax + shl rax,15 + mov r9,rbx + shr r9,49 + shr r11,49 + or rax,r9 + shl rbx,15 + or rbx,r11 + mov QWORD PTR[((-24))+r13],rbx + mov r11,r8 + shl r8,15 + mov r9,r10 + shr r9,49 + shr r11,49 + or r8,r9 + shl r10,15 + or r10,r11 + mov QWORD PTR[((-16))+r13],r8 + mov QWORD PTR[((-8))+r13],r10 + mov r11,rax + shl rax,17 + mov r9,rbx + shr r9,47 + shr r11,47 + or rax,r9 + shl rbx,17 + or rbx,r11 + mov QWORD PTR[r13],rax + mov QWORD PTR[8+r13],rbx + mov r11,rax + shl rax,17 + mov r9,rbx + shr r9,47 + shr r11,47 + or rax,r9 + shl rbx,17 + or rbx,r11 + mov QWORD PTR[16+r13],rax + mov QWORD PTR[24+r13],rbx + mov r11,r8 + shl r8,34 + mov r9,r10 + shr r9,30 + shr r11,30 + or r8,r9 + shl r10,34 + or r10,r11 + mov QWORD PTR[32+r13],r8 + mov QWORD PTR[40+r13],r10 + mov r11,rax + shl rax,17 + mov r9,rbx + shr r9,47 + shr r11,47 + or rax,r9 + shl rbx,17 + or rbx,r11 + mov QWORD PTR[48+r13],rax + mov QWORD PTR[56+r13],rbx + mov r11,r8 + shl r8,17 + mov r9,r10 + shr r9,47 + shr r11,47 + or r8,r9 + shl r10,17 + or r10,r11 + mov QWORD PTR[64+r13],r8 + mov QWORD PTR[72+r13],r10 + mov eax,3 + jmp $L$done +ALIGN 16 +$L$2nd256:: + mov DWORD PTR[48+r13],r9d + mov DWORD PTR[52+r13],r8d + mov DWORD PTR[56+r13],r11d + mov DWORD PTR[60+r13],r10d + xor r9d,DWORD PTR[32+r13] + xor r8d,DWORD PTR[36+r13] + xor r11d,DWORD PTR[40+r13] + xor r10d,DWORD PTR[44+r13] + xor eax,r8d + xor ebx,r9d + movzx esi,ah + movzx edi,bl + mov edx,DWORD PTR[2052+rsi*8+rbp] + mov ecx,DWORD PTR[rdi*8+rbp] + movzx esi,al + shr eax,16 + movzx edi,bh + xor edx,DWORD PTR[4+rsi*8+rbp] + shr ebx,16 + xor ecx,DWORD PTR[4+rdi*8+rbp] + movzx esi,ah + movzx edi,bl + xor edx,DWORD PTR[rsi*8+rbp] + xor ecx,DWORD PTR[2052+rdi*8+rbp] + movzx esi,al + movzx edi,bh + xor edx,DWORD PTR[2048+rsi*8+rbp] + xor ecx,DWORD PTR[2048+rdi*8+rbp] + mov ebx,DWORD PTR[40+r14] + mov eax,DWORD PTR[44+r14] + xor ecx,edx + ror edx,8 + xor r10d,ecx + xor r11d,ecx + xor r11d,edx + xor eax,r10d + xor ebx,r11d + movzx esi,ah + movzx edi,bl + mov edx,DWORD PTR[2052+rsi*8+rbp] + mov ecx,DWORD PTR[rdi*8+rbp] + movzx esi,al + shr eax,16 + movzx edi,bh + xor edx,DWORD PTR[4+rsi*8+rbp] + shr ebx,16 + xor ecx,DWORD PTR[4+rdi*8+rbp] + movzx esi,ah + movzx edi,bl + xor edx,DWORD PTR[rsi*8+rbp] + xor ecx,DWORD PTR[2052+rdi*8+rbp] + movzx esi,al + movzx edi,bh + xor edx,DWORD PTR[2048+rsi*8+rbp] + xor ecx,DWORD PTR[2048+rdi*8+rbp] + mov ebx,DWORD PTR[48+r14] + mov eax,DWORD PTR[52+r14] + xor ecx,edx + ror edx,8 + xor r8d,ecx + xor r9d,ecx + xor r9d,edx + mov rax,QWORD PTR[r13] + mov rbx,QWORD PTR[8+r13] + mov rcx,QWORD PTR[32+r13] + mov rdx,QWORD PTR[40+r13] + mov r14,QWORD PTR[48+r13] + mov r15,QWORD PTR[56+r13] + lea r13,QWORD PTR[128+r13] + shl r8,32 + shl r10,32 + or r8,r9 + or r10,r11 + mov QWORD PTR[((-112))+r13],r8 + mov QWORD PTR[((-104))+r13],r10 + mov r11,rcx + shl rcx,15 + mov r9,rdx + shr r9,49 + shr r11,49 + or rcx,r9 + shl rdx,15 + or rdx,r11 + mov QWORD PTR[((-96))+r13],rcx + mov QWORD PTR[((-88))+r13],rdx + mov r11,r14 + shl r14,15 + mov r9,r15 + shr r9,49 + shr r11,49 + or r14,r9 + shl r15,15 + or r15,r11 + mov QWORD PTR[((-80))+r13],r14 + mov QWORD PTR[((-72))+r13],r15 + mov r11,rcx + shl rcx,15 + mov r9,rdx + shr r9,49 + shr r11,49 + or rcx,r9 + shl rdx,15 + or rdx,r11 + mov QWORD PTR[((-64))+r13],rcx + mov QWORD PTR[((-56))+r13],rdx + mov r11,r8 + shl r8,30 + mov r9,r10 + shr r9,34 + shr r11,34 + or r8,r9 + shl r10,30 + or r10,r11 + mov QWORD PTR[((-48))+r13],r8 + mov QWORD PTR[((-40))+r13],r10 + mov r11,rax + shl rax,45 + mov r9,rbx + shr r9,19 + shr r11,19 + or rax,r9 + shl rbx,45 + or rbx,r11 + mov QWORD PTR[((-32))+r13],rax + mov QWORD PTR[((-24))+r13],rbx + mov r11,r14 + shl r14,30 + mov r9,r15 + shr r9,34 + shr r11,34 + or r14,r9 + shl r15,30 + or r15,r11 + mov QWORD PTR[((-16))+r13],r14 + mov QWORD PTR[((-8))+r13],r15 + mov r11,rax + shl rax,15 + mov r9,rbx + shr r9,49 + shr r11,49 + or rax,r9 + shl rbx,15 + or rbx,r11 + mov QWORD PTR[r13],rax + mov QWORD PTR[8+r13],rbx + mov r11,rcx + shl rcx,30 + mov r9,rdx + shr r9,34 + shr r11,34 + or rcx,r9 + shl rdx,30 + or rdx,r11 + mov QWORD PTR[16+r13],rcx + mov QWORD PTR[24+r13],rdx + mov r11,r8 + shl r8,30 + mov r9,r10 + shr r9,34 + shr r11,34 + or r8,r9 + shl r10,30 + or r10,r11 + mov QWORD PTR[32+r13],r8 + mov QWORD PTR[40+r13],r10 + mov r11,rax + shl rax,17 + mov r9,rbx + shr r9,47 + shr r11,47 + or rax,r9 + shl rbx,17 + or rbx,r11 + mov QWORD PTR[48+r13],rax + mov QWORD PTR[56+r13],rbx + mov r11,r14 + shl r14,32 + mov r9,r15 + shr r9,32 + shr r11,32 + or r14,r9 + shl r15,32 + or r15,r11 + mov QWORD PTR[64+r13],r14 + mov QWORD PTR[72+r13],r15 + mov r11,rcx + shl rcx,34 + mov r9,rdx + shr r9,30 + shr r11,30 + or rcx,r9 + shl rdx,34 + or rdx,r11 + mov QWORD PTR[80+r13],rcx + mov QWORD PTR[88+r13],rdx + mov r11,r14 + shl r14,17 + mov r9,r15 + shr r9,47 + shr r11,47 + or r14,r9 + shl r15,17 + or r15,r11 + mov QWORD PTR[96+r13],r14 + mov QWORD PTR[104+r13],r15 + mov r11,rax + shl rax,34 + mov r9,rbx + shr r9,30 + shr r11,30 + or rax,r9 + shl rbx,34 + or rbx,r11 + mov QWORD PTR[112+r13],rax + mov QWORD PTR[120+r13],rbx + mov r11,r8 + shl r8,51 + mov r9,r10 + shr r9,13 + shr r11,13 + or r8,r9 + shl r10,51 + or r10,r11 + mov QWORD PTR[128+r13],r8 + mov QWORD PTR[136+r13],r10 + mov eax,4 +$L$done:: + mov r15,QWORD PTR[rsp] + mov r14,QWORD PTR[8+rsp] + mov r13,QWORD PTR[16+rsp] + mov rbp,QWORD PTR[24+rsp] + mov rbx,QWORD PTR[32+rsp] + lea rsp,QWORD PTR[40+rsp] +$L$key_epilogue:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_Camellia_Ekeygen:: +Camellia_Ekeygen ENDP +ALIGN 64 +$L$Camellia_SIGMA:: + DD 03bcc908bh,0a09e667fh,04caa73b2h,0b67ae858h + DD 0e94f82beh,0c6ef372fh,0f1d36f1ch,054ff53a5h + DD 0de682d1dh,010e527fah,0b3e6c1fdh,0b05688c2h + DD 0,0,0,0 +$L$Camellia_SBOX:: + DD 070707000h,070700070h + DD 082828200h,02c2c002ch + DD 02c2c2c00h,0b3b300b3h + DD 0ececec00h,0c0c000c0h + DD 0b3b3b300h,0e4e400e4h + DD 027272700h,057570057h + DD 0c0c0c000h,0eaea00eah + DD 0e5e5e500h,0aeae00aeh + DD 0e4e4e400h,023230023h + DD 085858500h,06b6b006bh + DD 057575700h,045450045h + DD 035353500h,0a5a500a5h + DD 0eaeaea00h,0eded00edh + DD 00c0c0c00h,04f4f004fh + DD 0aeaeae00h,01d1d001dh + DD 041414100h,092920092h + DD 023232300h,086860086h + DD 0efefef00h,0afaf00afh + DD 06b6b6b00h,07c7c007ch + DD 093939300h,01f1f001fh + DD 045454500h,03e3e003eh + DD 019191900h,0dcdc00dch + DD 0a5a5a500h,05e5e005eh + DD 021212100h,00b0b000bh + DD 0ededed00h,0a6a600a6h + DD 00e0e0e00h,039390039h + DD 04f4f4f00h,0d5d500d5h + DD 04e4e4e00h,05d5d005dh + DD 01d1d1d00h,0d9d900d9h + DD 065656500h,05a5a005ah + DD 092929200h,051510051h + DD 0bdbdbd00h,06c6c006ch + DD 086868600h,08b8b008bh + DD 0b8b8b800h,09a9a009ah + DD 0afafaf00h,0fbfb00fbh + DD 08f8f8f00h,0b0b000b0h + DD 07c7c7c00h,074740074h + DD 0ebebeb00h,02b2b002bh + DD 01f1f1f00h,0f0f000f0h + DD 0cecece00h,084840084h + DD 03e3e3e00h,0dfdf00dfh + DD 030303000h,0cbcb00cbh + DD 0dcdcdc00h,034340034h + DD 05f5f5f00h,076760076h + DD 05e5e5e00h,06d6d006dh + DD 0c5c5c500h,0a9a900a9h + DD 00b0b0b00h,0d1d100d1h + DD 01a1a1a00h,004040004h + DD 0a6a6a600h,014140014h + DD 0e1e1e100h,03a3a003ah + DD 039393900h,0dede00deh + DD 0cacaca00h,011110011h + DD 0d5d5d500h,032320032h + DD 047474700h,09c9c009ch + DD 05d5d5d00h,053530053h + DD 03d3d3d00h,0f2f200f2h + DD 0d9d9d900h,0fefe00feh + DD 001010100h,0cfcf00cfh + DD 05a5a5a00h,0c3c300c3h + DD 0d6d6d600h,07a7a007ah + DD 051515100h,024240024h + DD 056565600h,0e8e800e8h + DD 06c6c6c00h,060600060h + DD 04d4d4d00h,069690069h + DD 08b8b8b00h,0aaaa00aah + DD 00d0d0d00h,0a0a000a0h + DD 09a9a9a00h,0a1a100a1h + DD 066666600h,062620062h + DD 0fbfbfb00h,054540054h + DD 0cccccc00h,01e1e001eh + DD 0b0b0b000h,0e0e000e0h + DD 02d2d2d00h,064640064h + DD 074747400h,010100010h + DD 012121200h,000000000h + DD 02b2b2b00h,0a3a300a3h + DD 020202000h,075750075h + DD 0f0f0f000h,08a8a008ah + DD 0b1b1b100h,0e6e600e6h + DD 084848400h,009090009h + DD 099999900h,0dddd00ddh + DD 0dfdfdf00h,087870087h + DD 04c4c4c00h,083830083h + DD 0cbcbcb00h,0cdcd00cdh + DD 0c2c2c200h,090900090h + DD 034343400h,073730073h + DD 07e7e7e00h,0f6f600f6h + DD 076767600h,09d9d009dh + DD 005050500h,0bfbf00bfh + DD 06d6d6d00h,052520052h + DD 0b7b7b700h,0d8d800d8h + DD 0a9a9a900h,0c8c800c8h + DD 031313100h,0c6c600c6h + DD 0d1d1d100h,081810081h + DD 017171700h,06f6f006fh + DD 004040400h,013130013h + DD 0d7d7d700h,063630063h + DD 014141400h,0e9e900e9h + DD 058585800h,0a7a700a7h + DD 03a3a3a00h,09f9f009fh + DD 061616100h,0bcbc00bch + DD 0dedede00h,029290029h + DD 01b1b1b00h,0f9f900f9h + DD 011111100h,02f2f002fh + DD 01c1c1c00h,0b4b400b4h + DD 032323200h,078780078h + DD 00f0f0f00h,006060006h + DD 09c9c9c00h,0e7e700e7h + DD 016161600h,071710071h + DD 053535300h,0d4d400d4h + DD 018181800h,0abab00abh + DD 0f2f2f200h,088880088h + DD 022222200h,08d8d008dh + DD 0fefefe00h,072720072h + DD 044444400h,0b9b900b9h + DD 0cfcfcf00h,0f8f800f8h + DD 0b2b2b200h,0acac00ach + DD 0c3c3c300h,036360036h + DD 0b5b5b500h,02a2a002ah + DD 07a7a7a00h,03c3c003ch + DD 091919100h,0f1f100f1h + DD 024242400h,040400040h + DD 008080800h,0d3d300d3h + DD 0e8e8e800h,0bbbb00bbh + DD 0a8a8a800h,043430043h + DD 060606000h,015150015h + DD 0fcfcfc00h,0adad00adh + DD 069696900h,077770077h + DD 050505000h,080800080h + DD 0aaaaaa00h,082820082h + DD 0d0d0d000h,0ecec00ech + DD 0a0a0a000h,027270027h + DD 07d7d7d00h,0e5e500e5h + DD 0a1a1a100h,085850085h + DD 089898900h,035350035h + DD 062626200h,00c0c000ch + DD 097979700h,041410041h + DD 054545400h,0efef00efh + DD 05b5b5b00h,093930093h + DD 01e1e1e00h,019190019h + DD 095959500h,021210021h + DD 0e0e0e000h,00e0e000eh + DD 0ffffff00h,04e4e004eh + DD 064646400h,065650065h + DD 0d2d2d200h,0bdbd00bdh + DD 010101000h,0b8b800b8h + DD 0c4c4c400h,08f8f008fh + DD 000000000h,0ebeb00ebh + DD 048484800h,0cece00ceh + DD 0a3a3a300h,030300030h + DD 0f7f7f700h,05f5f005fh + DD 075757500h,0c5c500c5h + DD 0dbdbdb00h,01a1a001ah + DD 08a8a8a00h,0e1e100e1h + DD 003030300h,0caca00cah + DD 0e6e6e600h,047470047h + DD 0dadada00h,03d3d003dh + DD 009090900h,001010001h + DD 03f3f3f00h,0d6d600d6h + DD 0dddddd00h,056560056h + DD 094949400h,04d4d004dh + DD 087878700h,00d0d000dh + DD 05c5c5c00h,066660066h + DD 083838300h,0cccc00cch + DD 002020200h,02d2d002dh + DD 0cdcdcd00h,012120012h + DD 04a4a4a00h,020200020h + DD 090909000h,0b1b100b1h + DD 033333300h,099990099h + DD 073737300h,04c4c004ch + DD 067676700h,0c2c200c2h + DD 0f6f6f600h,07e7e007eh + DD 0f3f3f300h,005050005h + DD 09d9d9d00h,0b7b700b7h + DD 07f7f7f00h,031310031h + DD 0bfbfbf00h,017170017h + DD 0e2e2e200h,0d7d700d7h + DD 052525200h,058580058h + DD 09b9b9b00h,061610061h + DD 0d8d8d800h,01b1b001bh + DD 026262600h,01c1c001ch + DD 0c8c8c800h,00f0f000fh + DD 037373700h,016160016h + DD 0c6c6c600h,018180018h + DD 03b3b3b00h,022220022h + DD 081818100h,044440044h + DD 096969600h,0b2b200b2h + DD 06f6f6f00h,0b5b500b5h + DD 04b4b4b00h,091910091h + DD 013131300h,008080008h + DD 0bebebe00h,0a8a800a8h + DD 063636300h,0fcfc00fch + DD 02e2e2e00h,050500050h + DD 0e9e9e900h,0d0d000d0h + DD 079797900h,07d7d007dh + DD 0a7a7a700h,089890089h + DD 08c8c8c00h,097970097h + DD 09f9f9f00h,05b5b005bh + DD 06e6e6e00h,095950095h + DD 0bcbcbc00h,0ffff00ffh + DD 08e8e8e00h,0d2d200d2h + DD 029292900h,0c4c400c4h + DD 0f5f5f500h,048480048h + DD 0f9f9f900h,0f7f700f7h + DD 0b6b6b600h,0dbdb00dbh + DD 02f2f2f00h,003030003h + DD 0fdfdfd00h,0dada00dah + DD 0b4b4b400h,03f3f003fh + DD 059595900h,094940094h + DD 078787800h,05c5c005ch + DD 098989800h,002020002h + DD 006060600h,04a4a004ah + DD 06a6a6a00h,033330033h + DD 0e7e7e700h,067670067h + DD 046464600h,0f3f300f3h + DD 071717100h,07f7f007fh + DD 0bababa00h,0e2e200e2h + DD 0d4d4d400h,09b9b009bh + DD 025252500h,026260026h + DD 0ababab00h,037370037h + DD 042424200h,03b3b003bh + DD 088888800h,096960096h + DD 0a2a2a200h,04b4b004bh + DD 08d8d8d00h,0bebe00beh + DD 0fafafa00h,02e2e002eh + DD 072727200h,079790079h + DD 007070700h,08c8c008ch + DD 0b9b9b900h,06e6e006eh + DD 055555500h,08e8e008eh + DD 0f8f8f800h,0f5f500f5h + DD 0eeeeee00h,0b6b600b6h + DD 0acacac00h,0fdfd00fdh + DD 00a0a0a00h,059590059h + DD 036363600h,098980098h + DD 049494900h,06a6a006ah + DD 02a2a2a00h,046460046h + DD 068686800h,0baba00bah + DD 03c3c3c00h,025250025h + DD 038383800h,042420042h + DD 0f1f1f100h,0a2a200a2h + DD 0a4a4a400h,0fafa00fah + DD 040404000h,007070007h + DD 028282800h,055550055h + DD 0d3d3d300h,0eeee00eeh + DD 07b7b7b00h,00a0a000ah + DD 0bbbbbb00h,049490049h + DD 0c9c9c900h,068680068h + DD 043434300h,038380038h + DD 0c1c1c100h,0a4a400a4h + DD 015151500h,028280028h + DD 0e3e3e300h,07b7b007bh + DD 0adadad00h,0c9c900c9h + DD 0f4f4f400h,0c1c100c1h + DD 077777700h,0e3e300e3h + DD 0c7c7c700h,0f4f400f4h + DD 080808000h,0c7c700c7h + DD 09e9e9e00h,09e9e009eh + DD 000e0e0e0h,038003838h + DD 000050505h,041004141h + DD 000585858h,016001616h + DD 000d9d9d9h,076007676h + DD 000676767h,0d900d9d9h + DD 0004e4e4eh,093009393h + DD 000818181h,060006060h + DD 000cbcbcbh,0f200f2f2h + DD 000c9c9c9h,072007272h + DD 0000b0b0bh,0c200c2c2h + DD 000aeaeaeh,0ab00ababh + DD 0006a6a6ah,09a009a9ah + DD 000d5d5d5h,075007575h + DD 000181818h,006000606h + DD 0005d5d5dh,057005757h + DD 000828282h,0a000a0a0h + DD 000464646h,091009191h + DD 000dfdfdfh,0f700f7f7h + DD 000d6d6d6h,0b500b5b5h + DD 000272727h,0c900c9c9h + DD 0008a8a8ah,0a200a2a2h + DD 000323232h,08c008c8ch + DD 0004b4b4bh,0d200d2d2h + DD 000424242h,090009090h + DD 000dbdbdbh,0f600f6f6h + DD 0001c1c1ch,007000707h + DD 0009e9e9eh,0a700a7a7h + DD 0009c9c9ch,027002727h + DD 0003a3a3ah,08e008e8eh + DD 000cacacah,0b200b2b2h + DD 000252525h,049004949h + DD 0007b7b7bh,0de00dedeh + DD 0000d0d0dh,043004343h + DD 000717171h,05c005c5ch + DD 0005f5f5fh,0d700d7d7h + DD 0001f1f1fh,0c700c7c7h + DD 000f8f8f8h,03e003e3eh + DD 000d7d7d7h,0f500f5f5h + DD 0003e3e3eh,08f008f8fh + DD 0009d9d9dh,067006767h + DD 0007c7c7ch,01f001f1fh + DD 000606060h,018001818h + DD 000b9b9b9h,06e006e6eh + DD 000bebebeh,0af00afafh + DD 000bcbcbch,02f002f2fh + DD 0008b8b8bh,0e200e2e2h + DD 000161616h,085008585h + DD 000343434h,00d000d0dh + DD 0004d4d4dh,053005353h + DD 000c3c3c3h,0f000f0f0h + DD 000727272h,09c009c9ch + DD 000959595h,065006565h + DD 000abababh,0ea00eaeah + DD 0008e8e8eh,0a300a3a3h + DD 000bababah,0ae00aeaeh + DD 0007a7a7ah,09e009e9eh + DD 000b3b3b3h,0ec00ecech + DD 000020202h,080008080h + DD 000b4b4b4h,02d002d2dh + DD 000adadadh,06b006b6bh + DD 000a2a2a2h,0a800a8a8h + DD 000acacach,02b002b2bh + DD 000d8d8d8h,036003636h + DD 0009a9a9ah,0a600a6a6h + DD 000171717h,0c500c5c5h + DD 0001a1a1ah,086008686h + DD 000353535h,04d004d4dh + DD 000cccccch,033003333h + DD 000f7f7f7h,0fd00fdfdh + DD 000999999h,066006666h + DD 000616161h,058005858h + DD 0005a5a5ah,096009696h + DD 000e8e8e8h,03a003a3ah + DD 000242424h,009000909h + DD 000565656h,095009595h + DD 000404040h,010001010h + DD 000e1e1e1h,078007878h + DD 000636363h,0d800d8d8h + DD 000090909h,042004242h + DD 000333333h,0cc00cccch + DD 000bfbfbfh,0ef00efefh + DD 000989898h,026002626h + DD 000979797h,0e500e5e5h + DD 000858585h,061006161h + DD 000686868h,01a001a1ah + DD 000fcfcfch,03f003f3fh + DD 000ececech,03b003b3bh + DD 0000a0a0ah,082008282h + DD 000dadadah,0b600b6b6h + DD 0006f6f6fh,0db00dbdbh + DD 000535353h,0d400d4d4h + DD 000626262h,098009898h + DD 000a3a3a3h,0e800e8e8h + DD 0002e2e2eh,08b008b8bh + DD 000080808h,002000202h + DD 000afafafh,0eb00ebebh + DD 000282828h,00a000a0ah + DD 000b0b0b0h,02c002c2ch + DD 000747474h,01d001d1dh + DD 000c2c2c2h,0b000b0b0h + DD 000bdbdbdh,06f006f6fh + DD 000363636h,08d008d8dh + DD 000222222h,088008888h + DD 000383838h,00e000e0eh + DD 000646464h,019001919h + DD 0001e1e1eh,087008787h + DD 000393939h,04e004e4eh + DD 0002c2c2ch,00b000b0bh + DD 000a6a6a6h,0a900a9a9h + DD 000303030h,00c000c0ch + DD 000e5e5e5h,079007979h + DD 000444444h,011001111h + DD 000fdfdfdh,07f007f7fh + DD 000888888h,022002222h + DD 0009f9f9fh,0e700e7e7h + DD 000656565h,059005959h + DD 000878787h,0e100e1e1h + DD 0006b6b6bh,0da00dadah + DD 000f4f4f4h,03d003d3dh + DD 000232323h,0c800c8c8h + DD 000484848h,012001212h + DD 000101010h,004000404h + DD 000d1d1d1h,074007474h + DD 000515151h,054005454h + DD 000c0c0c0h,030003030h + DD 000f9f9f9h,07e007e7eh + DD 000d2d2d2h,0b400b4b4h + DD 000a0a0a0h,028002828h + DD 000555555h,055005555h + DD 000a1a1a1h,068006868h + DD 000414141h,050005050h + DD 000fafafah,0be00bebeh + DD 000434343h,0d000d0d0h + DD 000131313h,0c400c4c4h + DD 000c4c4c4h,031003131h + DD 0002f2f2fh,0cb00cbcbh + DD 000a8a8a8h,02a002a2ah + DD 000b6b6b6h,0ad00adadh + DD 0003c3c3ch,00f000f0fh + DD 0002b2b2bh,0ca00cacah + DD 000c1c1c1h,070007070h + DD 000ffffffh,0ff00ffffh + DD 000c8c8c8h,032003232h + DD 000a5a5a5h,069006969h + DD 000202020h,008000808h + DD 000898989h,062006262h + DD 000000000h,000000000h + DD 000909090h,024002424h + DD 000474747h,0d100d1d1h + DD 000efefefh,0fb00fbfbh + DD 000eaeaeah,0ba00babah + DD 000b7b7b7h,0ed00ededh + DD 000151515h,045004545h + DD 000060606h,081008181h + DD 000cdcdcdh,073007373h + DD 000b5b5b5h,06d006d6dh + DD 000121212h,084008484h + DD 0007e7e7eh,09f009f9fh + DD 000bbbbbbh,0ee00eeeeh + DD 000292929h,04a004a4ah + DD 0000f0f0fh,0c300c3c3h + DD 000b8b8b8h,02e002e2eh + DD 000070707h,0c100c1c1h + DD 000040404h,001000101h + DD 0009b9b9bh,0e600e6e6h + DD 000949494h,025002525h + DD 000212121h,048004848h + DD 000666666h,099009999h + DD 000e6e6e6h,0b900b9b9h + DD 000cececeh,0b300b3b3h + DD 000edededh,07b007b7bh + DD 000e7e7e7h,0f900f9f9h + DD 0003b3b3bh,0ce00ceceh + DD 000fefefeh,0bf00bfbfh + DD 0007f7f7fh,0df00dfdfh + DD 000c5c5c5h,071007171h + DD 000a4a4a4h,029002929h + DD 000373737h,0cd00cdcdh + DD 000b1b1b1h,06c006c6ch + DD 0004c4c4ch,013001313h + DD 000919191h,064006464h + DD 0006e6e6eh,09b009b9bh + DD 0008d8d8dh,063006363h + DD 000767676h,09d009d9dh + DD 000030303h,0c000c0c0h + DD 0002d2d2dh,04b004b4bh + DD 000dededeh,0b700b7b7h + DD 000969696h,0a500a5a5h + DD 000262626h,089008989h + DD 0007d7d7dh,05f005f5fh + DD 000c6c6c6h,0b100b1b1h + DD 0005c5c5ch,017001717h + DD 000d3d3d3h,0f400f4f4h + DD 000f2f2f2h,0bc00bcbch + DD 0004f4f4fh,0d300d3d3h + DD 000191919h,046004646h + DD 0003f3f3fh,0cf00cfcfh + DD 000dcdcdch,037003737h + DD 000797979h,05e005e5eh + DD 0001d1d1dh,047004747h + DD 000525252h,094009494h + DD 000ebebebh,0fa00fafah + DD 000f3f3f3h,0fc00fcfch + DD 0006d6d6dh,05b005b5bh + DD 0005e5e5eh,097009797h + DD 000fbfbfbh,0fe00fefeh + DD 000696969h,05a005a5ah + DD 000b2b2b2h,0ac00acach + DD 000f0f0f0h,03c003c3ch + DD 000313131h,04c004c4ch + DD 0000c0c0ch,003000303h + DD 000d4d4d4h,035003535h + DD 000cfcfcfh,0f300f3f3h + DD 0008c8c8ch,023002323h + DD 000e2e2e2h,0b800b8b8h + DD 000757575h,05d005d5dh + DD 000a9a9a9h,06a006a6ah + DD 0004a4a4ah,092009292h + DD 000575757h,0d500d5d5h + DD 000848484h,021002121h + DD 000111111h,044004444h + DD 000454545h,051005151h + DD 0001b1b1bh,0c600c6c6h + DD 000f5f5f5h,07d007d7dh + DD 000e4e4e4h,039003939h + DD 0000e0e0eh,083008383h + DD 000737373h,0dc00dcdch + DD 000aaaaaah,0aa00aaaah + DD 000f1f1f1h,07c007c7ch + DD 000ddddddh,077007777h + DD 000595959h,056005656h + DD 000141414h,005000505h + DD 0006c6c6ch,01b001b1bh + DD 000929292h,0a400a4a4h + DD 000545454h,015001515h + DD 000d0d0d0h,034003434h + DD 000787878h,01e001e1eh + DD 000707070h,01c001c1ch + DD 000e3e3e3h,0f800f8f8h + DD 000494949h,052005252h + DD 000808080h,020002020h + DD 000505050h,014001414h + DD 000a7a7a7h,0e900e9e9h + DD 000f6f6f6h,0bd00bdbdh + DD 000777777h,0dd00ddddh + DD 000939393h,0e400e4e4h + DD 000868686h,0a100a1a1h + DD 000838383h,0e000e0e0h + DD 0002a2a2ah,08a008a8ah + DD 000c7c7c7h,0f100f1f1h + DD 0005b5b5bh,0d600d6d6h + DD 000e9e9e9h,07a007a7ah + DD 000eeeeeeh,0bb00bbbbh + DD 0008f8f8fh,0e300e3e3h + DD 000010101h,040004040h + DD 0003d3d3dh,04f004f4fh +PUBLIC Camellia_cbc_encrypt + +ALIGN 16 +Camellia_cbc_encrypt PROC PUBLIC + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_Camellia_cbc_encrypt:: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + mov rcx,r9 + mov r8,QWORD PTR[40+rsp] + mov r9,QWORD PTR[48+rsp] + + + cmp rdx,0 + je $L$cbc_abort + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 +$L$cbc_prologue:: + + mov rbp,rsp + sub rsp,64 + and rsp,-64 + + + + lea r10,QWORD PTR[((-64-63))+rcx] + sub r10,rsp + neg r10 + and r10,03C0h + sub rsp,r10 + + + mov r12,rdi + mov r13,rsi + mov rbx,r8 + mov r14,rcx + mov r15d,DWORD PTR[272+rcx] + + mov QWORD PTR[40+rsp],r8 + mov QWORD PTR[48+rsp],rbp + +$L$cbc_body:: + lea rbp,QWORD PTR[$L$Camellia_SBOX] + + mov ecx,32 +ALIGN 4 +$L$cbc_prefetch_sbox:: + mov rax,QWORD PTR[rbp] + mov rsi,QWORD PTR[32+rbp] + mov rdi,QWORD PTR[64+rbp] + mov r11,QWORD PTR[96+rbp] + lea rbp,QWORD PTR[128+rbp] + loop $L$cbc_prefetch_sbox + sub rbp,4096 + shl r15,6 + mov rcx,rdx + lea r15,QWORD PTR[r15*1+r14] + + cmp r9d,0 + je $L$CBC_DECRYPT + + and rdx,-16 + and rcx,15 + lea rdx,QWORD PTR[rdx*1+r12] + mov QWORD PTR[rsp],r14 + mov QWORD PTR[8+rsp],rdx + mov QWORD PTR[16+rsp],rcx + + cmp rdx,r12 + mov r8d,DWORD PTR[rbx] + mov r9d,DWORD PTR[4+rbx] + mov r10d,DWORD PTR[8+rbx] + mov r11d,DWORD PTR[12+rbx] + je $L$cbc_enc_tail + jmp $L$cbc_eloop + +ALIGN 16 +$L$cbc_eloop:: + xor r8d,DWORD PTR[r12] + xor r9d,DWORD PTR[4+r12] + xor r10d,DWORD PTR[8+r12] + bswap r8d + xor r11d,DWORD PTR[12+r12] + bswap r9d + bswap r10d + bswap r11d + + call _x86_64_Camellia_encrypt + + mov r14,QWORD PTR[rsp] + bswap r8d + mov rdx,QWORD PTR[8+rsp] + bswap r9d + mov rcx,QWORD PTR[16+rsp] + bswap r10d + mov DWORD PTR[r13],r8d + bswap r11d + mov DWORD PTR[4+r13],r9d + mov DWORD PTR[8+r13],r10d + lea r12,QWORD PTR[16+r12] + mov DWORD PTR[12+r13],r11d + cmp r12,rdx + lea r13,QWORD PTR[16+r13] + jne $L$cbc_eloop + + cmp rcx,0 + jne $L$cbc_enc_tail + + mov r13,QWORD PTR[40+rsp] + mov DWORD PTR[r13],r8d + mov DWORD PTR[4+r13],r9d + mov DWORD PTR[8+r13],r10d + mov DWORD PTR[12+r13],r11d + jmp $L$cbc_done + +ALIGN 16 +$L$cbc_enc_tail:: + xor rax,rax + mov QWORD PTR[((0+24))+rsp],rax + mov QWORD PTR[((8+24))+rsp],rax + mov QWORD PTR[16+rsp],rax + +$L$cbc_enc_pushf:: + pushfq + cld + mov rsi,r12 + lea rdi,QWORD PTR[((8+24))+rsp] + DD 09066A4F3h + popfq +$L$cbc_enc_popf:: + + lea r12,QWORD PTR[24+rsp] + lea rax,QWORD PTR[((16+24))+rsp] + mov QWORD PTR[8+rsp],rax + jmp $L$cbc_eloop + +ALIGN 16 +$L$CBC_DECRYPT:: + xchg r15,r14 + add rdx,15 + and rcx,15 + and rdx,-16 + mov QWORD PTR[rsp],r14 + lea rdx,QWORD PTR[rdx*1+r12] + mov QWORD PTR[8+rsp],rdx + mov QWORD PTR[16+rsp],rcx + + mov rax,QWORD PTR[rbx] + mov rbx,QWORD PTR[8+rbx] + jmp $L$cbc_dloop +ALIGN 16 +$L$cbc_dloop:: + mov r8d,DWORD PTR[r12] + mov r9d,DWORD PTR[4+r12] + mov r10d,DWORD PTR[8+r12] + bswap r8d + mov r11d,DWORD PTR[12+r12] + bswap r9d + mov QWORD PTR[((0+24))+rsp],rax + bswap r10d + mov QWORD PTR[((8+24))+rsp],rbx + bswap r11d + + call _x86_64_Camellia_decrypt + + mov r14,QWORD PTR[rsp] + mov rdx,QWORD PTR[8+rsp] + mov rcx,QWORD PTR[16+rsp] + + bswap r8d + mov rax,QWORD PTR[r12] + bswap r9d + mov rbx,QWORD PTR[8+r12] + bswap r10d + xor r8d,DWORD PTR[((0+24))+rsp] + bswap r11d + xor r9d,DWORD PTR[((4+24))+rsp] + xor r10d,DWORD PTR[((8+24))+rsp] + lea r12,QWORD PTR[16+r12] + xor r11d,DWORD PTR[((12+24))+rsp] + cmp r12,rdx + je $L$cbc_ddone + + mov DWORD PTR[r13],r8d + mov DWORD PTR[4+r13],r9d + mov DWORD PTR[8+r13],r10d + mov DWORD PTR[12+r13],r11d + + lea r13,QWORD PTR[16+r13] + jmp $L$cbc_dloop + +ALIGN 16 +$L$cbc_ddone:: + mov rdx,QWORD PTR[40+rsp] + cmp rcx,0 + jne $L$cbc_dec_tail + + mov DWORD PTR[r13],r8d + mov DWORD PTR[4+r13],r9d + mov DWORD PTR[8+r13],r10d + mov DWORD PTR[12+r13],r11d + + mov QWORD PTR[rdx],rax + mov QWORD PTR[8+rdx],rbx + jmp $L$cbc_done +ALIGN 16 +$L$cbc_dec_tail:: + mov DWORD PTR[((0+24))+rsp],r8d + mov DWORD PTR[((4+24))+rsp],r9d + mov DWORD PTR[((8+24))+rsp],r10d + mov DWORD PTR[((12+24))+rsp],r11d + +$L$cbc_dec_pushf:: + pushfq + cld + lea rsi,QWORD PTR[((8+24))+rsp] + lea rdi,QWORD PTR[r13] + DD 09066A4F3h + popfq +$L$cbc_dec_popf:: + + mov QWORD PTR[rdx],rax + mov QWORD PTR[8+rdx],rbx + jmp $L$cbc_done + +ALIGN 16 +$L$cbc_done:: + mov rcx,QWORD PTR[48+rsp] + mov r15,QWORD PTR[rcx] + mov r14,QWORD PTR[8+rcx] + mov r13,QWORD PTR[16+rcx] + mov r12,QWORD PTR[24+rcx] + mov rbp,QWORD PTR[32+rcx] + mov rbx,QWORD PTR[40+rcx] + lea rsp,QWORD PTR[48+rcx] +$L$cbc_abort:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_Camellia_cbc_encrypt:: +Camellia_cbc_encrypt ENDP + +DB 67,97,109,101,108,108,105,97,32,102,111,114,32,120,56,54 +DB 95,54,52,32,98,121,32,60,97,112,112,114,111,64,111,112 +DB 101,110,115,115,108,46,111,114,103,62,0 +EXTERN __imp_RtlVirtualUnwind:NEAR + +ALIGN 16 +common_se_handler PROC PRIVATE + push rsi + push rdi + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + pushfq + lea rsp,QWORD PTR[((-64))+rsp] + + mov rax,QWORD PTR[120+r8] + mov rbx,QWORD PTR[248+r8] + + mov rsi,QWORD PTR[8+r9] + mov r11,QWORD PTR[56+r9] + + mov r10d,DWORD PTR[r11] + lea r10,QWORD PTR[r10*1+rsi] + cmp rbx,r10 + jb $L$in_prologue + + mov rax,QWORD PTR[152+r8] + + mov r10d,DWORD PTR[4+r11] + lea r10,QWORD PTR[r10*1+rsi] + cmp rbx,r10 + jae $L$in_prologue + + lea rax,QWORD PTR[40+rax] + mov rbx,QWORD PTR[((-8))+rax] + mov rbp,QWORD PTR[((-16))+rax] + mov r13,QWORD PTR[((-24))+rax] + mov r14,QWORD PTR[((-32))+rax] + mov r15,QWORD PTR[((-40))+rax] + mov QWORD PTR[144+r8],rbx + mov QWORD PTR[160+r8],rbp + mov QWORD PTR[224+r8],r13 + mov QWORD PTR[232+r8],r14 + mov QWORD PTR[240+r8],r15 + +$L$in_prologue:: + mov rdi,QWORD PTR[8+rax] + mov rsi,QWORD PTR[16+rax] + mov QWORD PTR[152+r8],rax + mov QWORD PTR[168+r8],rsi + mov QWORD PTR[176+r8],rdi + + jmp $L$common_seh_exit +common_se_handler ENDP + + +ALIGN 16 +cbc_se_handler PROC PRIVATE + push rsi + push rdi + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + pushfq + lea rsp,QWORD PTR[((-64))+rsp] + + mov rax,QWORD PTR[120+r8] + mov rbx,QWORD PTR[248+r8] + + lea r10,QWORD PTR[$L$cbc_prologue] + cmp rbx,r10 + jb $L$in_cbc_prologue + + lea r10,QWORD PTR[$L$cbc_body] + cmp rbx,r10 + jb $L$in_cbc_frame_setup + + mov rax,QWORD PTR[152+r8] + + lea r10,QWORD PTR[$L$cbc_abort] + cmp rbx,r10 + jae $L$in_cbc_prologue + + + lea r10,QWORD PTR[$L$cbc_enc_pushf] + cmp rbx,r10 + jbe $L$in_cbc_no_flag + lea rax,QWORD PTR[8+rax] + lea r10,QWORD PTR[$L$cbc_enc_popf] + cmp rbx,r10 + jb $L$in_cbc_no_flag + lea rax,QWORD PTR[((-8))+rax] + lea r10,QWORD PTR[$L$cbc_dec_pushf] + cmp rbx,r10 + jbe $L$in_cbc_no_flag + lea rax,QWORD PTR[8+rax] + lea r10,QWORD PTR[$L$cbc_dec_popf] + cmp rbx,r10 + jb $L$in_cbc_no_flag + lea rax,QWORD PTR[((-8))+rax] + +$L$in_cbc_no_flag:: + mov rax,QWORD PTR[48+rax] + lea rax,QWORD PTR[48+rax] + +$L$in_cbc_frame_setup:: + mov rbx,QWORD PTR[((-8))+rax] + mov rbp,QWORD PTR[((-16))+rax] + mov r12,QWORD PTR[((-24))+rax] + mov r13,QWORD PTR[((-32))+rax] + mov r14,QWORD PTR[((-40))+rax] + mov r15,QWORD PTR[((-48))+rax] + mov QWORD PTR[144+r8],rbx + mov QWORD PTR[160+r8],rbp + mov QWORD PTR[216+r8],r12 + mov QWORD PTR[224+r8],r13 + mov QWORD PTR[232+r8],r14 + mov QWORD PTR[240+r8],r15 + +$L$in_cbc_prologue:: + mov rdi,QWORD PTR[8+rax] + mov rsi,QWORD PTR[16+rax] + mov QWORD PTR[152+r8],rax + mov QWORD PTR[168+r8],rsi + mov QWORD PTR[176+r8],rdi + +ALIGN 4 +$L$common_seh_exit:: + + mov rdi,QWORD PTR[40+r9] + mov rsi,r8 + mov ecx,154 + DD 0a548f3fch + + mov rsi,r9 + xor rcx,rcx + mov rdx,QWORD PTR[8+rsi] + mov r8,QWORD PTR[rsi] + mov r9,QWORD PTR[16+rsi] + mov r10,QWORD PTR[40+rsi] + lea r11,QWORD PTR[56+rsi] + lea r12,QWORD PTR[24+rsi] + mov QWORD PTR[32+rsp],r10 + mov QWORD PTR[40+rsp],r11 + mov QWORD PTR[48+rsp],r12 + mov QWORD PTR[56+rsp],rcx + call QWORD PTR[__imp_RtlVirtualUnwind] + + mov eax,1 + lea rsp,QWORD PTR[64+rsp] + popfq + pop r15 + pop r14 + pop r13 + pop r12 + pop rbp + pop rbx + pop rdi + pop rsi + DB 0F3h,0C3h ;repret +cbc_se_handler ENDP + +.text$ ENDS +.pdata SEGMENT READONLY ALIGN(4) +ALIGN 4 + DD imagerel $L$SEH_begin_Camellia_EncryptBlock_Rounds + DD imagerel $L$SEH_end_Camellia_EncryptBlock_Rounds + DD imagerel $L$SEH_info_Camellia_EncryptBlock_Rounds + + DD imagerel $L$SEH_begin_Camellia_DecryptBlock_Rounds + DD imagerel $L$SEH_end_Camellia_DecryptBlock_Rounds + DD imagerel $L$SEH_info_Camellia_DecryptBlock_Rounds + + DD imagerel $L$SEH_begin_Camellia_Ekeygen + DD imagerel $L$SEH_end_Camellia_Ekeygen + DD imagerel $L$SEH_info_Camellia_Ekeygen + + DD imagerel $L$SEH_begin_Camellia_cbc_encrypt + DD imagerel $L$SEH_end_Camellia_cbc_encrypt + DD imagerel $L$SEH_info_Camellia_cbc_encrypt + +.pdata ENDS +.xdata SEGMENT READONLY ALIGN(8) +ALIGN 8 +$L$SEH_info_Camellia_EncryptBlock_Rounds:: +DB 9,0,0,0 + DD imagerel common_se_handler + DD imagerel $L$enc_prologue,imagerel $L$enc_epilogue +$L$SEH_info_Camellia_DecryptBlock_Rounds:: +DB 9,0,0,0 + DD imagerel common_se_handler + DD imagerel $L$dec_prologue,imagerel $L$dec_epilogue +$L$SEH_info_Camellia_Ekeygen:: +DB 9,0,0,0 + DD imagerel common_se_handler + DD imagerel $L$key_prologue,imagerel $L$key_epilogue +$L$SEH_info_Camellia_cbc_encrypt:: +DB 9,0,0,0 + DD imagerel cbc_se_handler + +.xdata ENDS +END diff --git a/deps/openssl/asm_obsolete/x64-win32-masm/ec/ecp_nistz256-x86_64.asm b/deps/openssl/asm_obsolete/x64-win32-masm/ec/ecp_nistz256-x86_64.asm new file mode 100644 index 00000000000000..ef9b22fbfddc11 --- /dev/null +++ b/deps/openssl/asm_obsolete/x64-win32-masm/ec/ecp_nistz256-x86_64.asm @@ -0,0 +1,2216 @@ +OPTION DOTNAME +.text$ SEGMENT ALIGN(256) 'CODE' +EXTERN OPENSSL_ia32cap_P:NEAR + + +ALIGN 64 +$L$poly:: + DQ 0ffffffffffffffffh,000000000ffffffffh,00000000000000000h,0ffffffff00000001h + + +$L$RR:: + DQ 00000000000000003h,0fffffffbffffffffh,0fffffffffffffffeh,000000004fffffffdh + +$L$One:: + DD 1,1,1,1,1,1,1,1 +$L$Two:: + DD 2,2,2,2,2,2,2,2 +$L$Three:: + DD 3,3,3,3,3,3,3,3 +$L$ONE_mont:: + DQ 00000000000000001h,0ffffffff00000000h,0ffffffffffffffffh,000000000fffffffeh + +PUBLIC ecp_nistz256_mul_by_2 + +ALIGN 64 +ecp_nistz256_mul_by_2 PROC PUBLIC + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_ecp_nistz256_mul_by_2:: + mov rdi,rcx + mov rsi,rdx + + + push r12 + push r13 + + mov r8,QWORD PTR[rsi] + mov r9,QWORD PTR[8+rsi] + add r8,r8 + mov r10,QWORD PTR[16+rsi] + adc r9,r9 + mov r11,QWORD PTR[24+rsi] + lea rsi,QWORD PTR[$L$poly] + mov rax,r8 + adc r10,r10 + adc r11,r11 + mov rdx,r9 + sbb r13,r13 + + sub r8,QWORD PTR[rsi] + mov rcx,r10 + sbb r9,QWORD PTR[8+rsi] + sbb r10,QWORD PTR[16+rsi] + mov r12,r11 + sbb r11,QWORD PTR[24+rsi] + test r13,r13 + + cmovz r8,rax + cmovz r9,rdx + mov QWORD PTR[rdi],r8 + cmovz r10,rcx + mov QWORD PTR[8+rdi],r9 + cmovz r11,r12 + mov QWORD PTR[16+rdi],r10 + mov QWORD PTR[24+rdi],r11 + + pop r13 + pop r12 + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_ecp_nistz256_mul_by_2:: +ecp_nistz256_mul_by_2 ENDP + + + +PUBLIC ecp_nistz256_div_by_2 + +ALIGN 32 +ecp_nistz256_div_by_2 PROC PUBLIC + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_ecp_nistz256_div_by_2:: + mov rdi,rcx + mov rsi,rdx + + + push r12 + push r13 + + mov r8,QWORD PTR[rsi] + mov r9,QWORD PTR[8+rsi] + mov r10,QWORD PTR[16+rsi] + mov rax,r8 + mov r11,QWORD PTR[24+rsi] + lea rsi,QWORD PTR[$L$poly] + + mov rdx,r9 + xor r13,r13 + add r8,QWORD PTR[rsi] + mov rcx,r10 + adc r9,QWORD PTR[8+rsi] + adc r10,QWORD PTR[16+rsi] + mov r12,r11 + adc r11,QWORD PTR[24+rsi] + adc r13,0 + xor rsi,rsi + test rax,1 + + cmovz r8,rax + cmovz r9,rdx + cmovz r10,rcx + cmovz r11,r12 + cmovz r13,rsi + + mov rax,r9 + shr r8,1 + shl rax,63 + mov rdx,r10 + shr r9,1 + or r8,rax + shl rdx,63 + mov rcx,r11 + shr r10,1 + or r9,rdx + shl rcx,63 + shr r11,1 + shl r13,63 + or r10,rcx + or r11,r13 + + mov QWORD PTR[rdi],r8 + mov QWORD PTR[8+rdi],r9 + mov QWORD PTR[16+rdi],r10 + mov QWORD PTR[24+rdi],r11 + + pop r13 + pop r12 + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_ecp_nistz256_div_by_2:: +ecp_nistz256_div_by_2 ENDP + + + +PUBLIC ecp_nistz256_mul_by_3 + +ALIGN 32 +ecp_nistz256_mul_by_3 PROC PUBLIC + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_ecp_nistz256_mul_by_3:: + mov rdi,rcx + mov rsi,rdx + + + push r12 + push r13 + + mov r8,QWORD PTR[rsi] + xor r13,r13 + mov r9,QWORD PTR[8+rsi] + add r8,r8 + mov r10,QWORD PTR[16+rsi] + adc r9,r9 + mov r11,QWORD PTR[24+rsi] + mov rax,r8 + adc r10,r10 + adc r11,r11 + mov rdx,r9 + adc r13,0 + + sub r8,-1 + mov rcx,r10 + sbb r9,QWORD PTR[(($L$poly+8))] + sbb r10,0 + mov r12,r11 + sbb r11,QWORD PTR[(($L$poly+24))] + test r13,r13 + + cmovz r8,rax + cmovz r9,rdx + cmovz r10,rcx + cmovz r11,r12 + + xor r13,r13 + add r8,QWORD PTR[rsi] + adc r9,QWORD PTR[8+rsi] + mov rax,r8 + adc r10,QWORD PTR[16+rsi] + adc r11,QWORD PTR[24+rsi] + mov rdx,r9 + adc r13,0 + + sub r8,-1 + mov rcx,r10 + sbb r9,QWORD PTR[(($L$poly+8))] + sbb r10,0 + mov r12,r11 + sbb r11,QWORD PTR[(($L$poly+24))] + test r13,r13 + + cmovz r8,rax + cmovz r9,rdx + mov QWORD PTR[rdi],r8 + cmovz r10,rcx + mov QWORD PTR[8+rdi],r9 + cmovz r11,r12 + mov QWORD PTR[16+rdi],r10 + mov QWORD PTR[24+rdi],r11 + + pop r13 + pop r12 + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_ecp_nistz256_mul_by_3:: +ecp_nistz256_mul_by_3 ENDP + + + +PUBLIC ecp_nistz256_add + +ALIGN 32 +ecp_nistz256_add PROC PUBLIC + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_ecp_nistz256_add:: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + + + push r12 + push r13 + + mov r8,QWORD PTR[rsi] + xor r13,r13 + mov r9,QWORD PTR[8+rsi] + mov r10,QWORD PTR[16+rsi] + mov r11,QWORD PTR[24+rsi] + lea rsi,QWORD PTR[$L$poly] + + add r8,QWORD PTR[rdx] + adc r9,QWORD PTR[8+rdx] + mov rax,r8 + adc r10,QWORD PTR[16+rdx] + adc r11,QWORD PTR[24+rdx] + mov rdx,r9 + adc r13,0 + + sub r8,QWORD PTR[rsi] + mov rcx,r10 + sbb r9,QWORD PTR[8+rsi] + sbb r10,QWORD PTR[16+rsi] + mov r12,r11 + sbb r11,QWORD PTR[24+rsi] + test r13,r13 + + cmovz r8,rax + cmovz r9,rdx + mov QWORD PTR[rdi],r8 + cmovz r10,rcx + mov QWORD PTR[8+rdi],r9 + cmovz r11,r12 + mov QWORD PTR[16+rdi],r10 + mov QWORD PTR[24+rdi],r11 + + pop r13 + pop r12 + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_ecp_nistz256_add:: +ecp_nistz256_add ENDP + + + +PUBLIC ecp_nistz256_sub + +ALIGN 32 +ecp_nistz256_sub PROC PUBLIC + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_ecp_nistz256_sub:: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + + + push r12 + push r13 + + mov r8,QWORD PTR[rsi] + xor r13,r13 + mov r9,QWORD PTR[8+rsi] + mov r10,QWORD PTR[16+rsi] + mov r11,QWORD PTR[24+rsi] + lea rsi,QWORD PTR[$L$poly] + + sub r8,QWORD PTR[rdx] + sbb r9,QWORD PTR[8+rdx] + mov rax,r8 + sbb r10,QWORD PTR[16+rdx] + sbb r11,QWORD PTR[24+rdx] + mov rdx,r9 + sbb r13,0 + + add r8,QWORD PTR[rsi] + mov rcx,r10 + adc r9,QWORD PTR[8+rsi] + adc r10,QWORD PTR[16+rsi] + mov r12,r11 + adc r11,QWORD PTR[24+rsi] + test r13,r13 + + cmovz r8,rax + cmovz r9,rdx + mov QWORD PTR[rdi],r8 + cmovz r10,rcx + mov QWORD PTR[8+rdi],r9 + cmovz r11,r12 + mov QWORD PTR[16+rdi],r10 + mov QWORD PTR[24+rdi],r11 + + pop r13 + pop r12 + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_ecp_nistz256_sub:: +ecp_nistz256_sub ENDP + + + +PUBLIC ecp_nistz256_neg + +ALIGN 32 +ecp_nistz256_neg PROC PUBLIC + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_ecp_nistz256_neg:: + mov rdi,rcx + mov rsi,rdx + + + push r12 + push r13 + + xor r8,r8 + xor r9,r9 + xor r10,r10 + xor r11,r11 + xor r13,r13 + + sub r8,QWORD PTR[rsi] + sbb r9,QWORD PTR[8+rsi] + sbb r10,QWORD PTR[16+rsi] + mov rax,r8 + sbb r11,QWORD PTR[24+rsi] + lea rsi,QWORD PTR[$L$poly] + mov rdx,r9 + sbb r13,0 + + add r8,QWORD PTR[rsi] + mov rcx,r10 + adc r9,QWORD PTR[8+rsi] + adc r10,QWORD PTR[16+rsi] + mov r12,r11 + adc r11,QWORD PTR[24+rsi] + test r13,r13 + + cmovz r8,rax + cmovz r9,rdx + mov QWORD PTR[rdi],r8 + cmovz r10,rcx + mov QWORD PTR[8+rdi],r9 + cmovz r11,r12 + mov QWORD PTR[16+rdi],r10 + mov QWORD PTR[24+rdi],r11 + + pop r13 + pop r12 + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_ecp_nistz256_neg:: +ecp_nistz256_neg ENDP + + + + +PUBLIC ecp_nistz256_to_mont + +ALIGN 32 +ecp_nistz256_to_mont PROC PUBLIC + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_ecp_nistz256_to_mont:: + mov rdi,rcx + mov rsi,rdx + + + lea rdx,QWORD PTR[$L$RR] + jmp $L$mul_mont +$L$SEH_end_ecp_nistz256_to_mont:: +ecp_nistz256_to_mont ENDP + + + + + + + +PUBLIC ecp_nistz256_mul_mont + +ALIGN 32 +ecp_nistz256_mul_mont PROC PUBLIC + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_ecp_nistz256_mul_mont:: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + + +$L$mul_mont:: + push rbp + push rbx + push r12 + push r13 + push r14 + push r15 + mov rbx,rdx + mov rax,QWORD PTR[rdx] + mov r9,QWORD PTR[rsi] + mov r10,QWORD PTR[8+rsi] + mov r11,QWORD PTR[16+rsi] + mov r12,QWORD PTR[24+rsi] + + call __ecp_nistz256_mul_montq +$L$mul_mont_done:: + pop r15 + pop r14 + pop r13 + pop r12 + pop rbx + pop rbp + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_ecp_nistz256_mul_mont:: +ecp_nistz256_mul_mont ENDP + + +ALIGN 32 +__ecp_nistz256_mul_montq PROC PRIVATE + + + mov rbp,rax + mul r9 + mov r14,QWORD PTR[(($L$poly+8))] + mov r8,rax + mov rax,rbp + mov r9,rdx + + mul r10 + mov r15,QWORD PTR[(($L$poly+24))] + add r9,rax + mov rax,rbp + adc rdx,0 + mov r10,rdx + + mul r11 + add r10,rax + mov rax,rbp + adc rdx,0 + mov r11,rdx + + mul r12 + add r11,rax + mov rax,r8 + adc rdx,0 + xor r13,r13 + mov r12,rdx + + + + + + + + + + + mov rbp,r8 + shl r8,32 + mul r15 + shr rbp,32 + add r9,r8 + adc r10,rbp + adc r11,rax + mov rax,QWORD PTR[8+rbx] + adc r12,rdx + adc r13,0 + xor r8,r8 + + + + mov rbp,rax + mul QWORD PTR[rsi] + add r9,rax + mov rax,rbp + adc rdx,0 + mov rcx,rdx + + mul QWORD PTR[8+rsi] + add r10,rcx + adc rdx,0 + add r10,rax + mov rax,rbp + adc rdx,0 + mov rcx,rdx + + mul QWORD PTR[16+rsi] + add r11,rcx + adc rdx,0 + add r11,rax + mov rax,rbp + adc rdx,0 + mov rcx,rdx + + mul QWORD PTR[24+rsi] + add r12,rcx + adc rdx,0 + add r12,rax + mov rax,r9 + adc r13,rdx + adc r8,0 + + + + mov rbp,r9 + shl r9,32 + mul r15 + shr rbp,32 + add r10,r9 + adc r11,rbp + adc r12,rax + mov rax,QWORD PTR[16+rbx] + adc r13,rdx + adc r8,0 + xor r9,r9 + + + + mov rbp,rax + mul QWORD PTR[rsi] + add r10,rax + mov rax,rbp + adc rdx,0 + mov rcx,rdx + + mul QWORD PTR[8+rsi] + add r11,rcx + adc rdx,0 + add r11,rax + mov rax,rbp + adc rdx,0 + mov rcx,rdx + + mul QWORD PTR[16+rsi] + add r12,rcx + adc rdx,0 + add r12,rax + mov rax,rbp + adc rdx,0 + mov rcx,rdx + + mul QWORD PTR[24+rsi] + add r13,rcx + adc rdx,0 + add r13,rax + mov rax,r10 + adc r8,rdx + adc r9,0 + + + + mov rbp,r10 + shl r10,32 + mul r15 + shr rbp,32 + add r11,r10 + adc r12,rbp + adc r13,rax + mov rax,QWORD PTR[24+rbx] + adc r8,rdx + adc r9,0 + xor r10,r10 + + + + mov rbp,rax + mul QWORD PTR[rsi] + add r11,rax + mov rax,rbp + adc rdx,0 + mov rcx,rdx + + mul QWORD PTR[8+rsi] + add r12,rcx + adc rdx,0 + add r12,rax + mov rax,rbp + adc rdx,0 + mov rcx,rdx + + mul QWORD PTR[16+rsi] + add r13,rcx + adc rdx,0 + add r13,rax + mov rax,rbp + adc rdx,0 + mov rcx,rdx + + mul QWORD PTR[24+rsi] + add r8,rcx + adc rdx,0 + add r8,rax + mov rax,r11 + adc r9,rdx + adc r10,0 + + + + mov rbp,r11 + shl r11,32 + mul r15 + shr rbp,32 + add r12,r11 + adc r13,rbp + mov rcx,r12 + adc r8,rax + adc r9,rdx + mov rbp,r13 + adc r10,0 + + + + sub r12,-1 + mov rbx,r8 + sbb r13,r14 + sbb r8,0 + mov rdx,r9 + sbb r9,r15 + sbb r10,0 + + cmovc r12,rcx + cmovc r13,rbp + mov QWORD PTR[rdi],r12 + cmovc r8,rbx + mov QWORD PTR[8+rdi],r13 + cmovc r9,rdx + mov QWORD PTR[16+rdi],r8 + mov QWORD PTR[24+rdi],r9 + + DB 0F3h,0C3h ;repret +__ecp_nistz256_mul_montq ENDP + + + + + + + + +PUBLIC ecp_nistz256_sqr_mont + +ALIGN 32 +ecp_nistz256_sqr_mont PROC PUBLIC + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_ecp_nistz256_sqr_mont:: + mov rdi,rcx + mov rsi,rdx + + + push rbp + push rbx + push r12 + push r13 + push r14 + push r15 + mov rax,QWORD PTR[rsi] + mov r14,QWORD PTR[8+rsi] + mov r15,QWORD PTR[16+rsi] + mov r8,QWORD PTR[24+rsi] + + call __ecp_nistz256_sqr_montq +$L$sqr_mont_done:: + pop r15 + pop r14 + pop r13 + pop r12 + pop rbx + pop rbp + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_ecp_nistz256_sqr_mont:: +ecp_nistz256_sqr_mont ENDP + + +ALIGN 32 +__ecp_nistz256_sqr_montq PROC PRIVATE + mov r13,rax + mul r14 + mov r9,rax + mov rax,r15 + mov r10,rdx + + mul r13 + add r10,rax + mov rax,r8 + adc rdx,0 + mov r11,rdx + + mul r13 + add r11,rax + mov rax,r15 + adc rdx,0 + mov r12,rdx + + + mul r14 + add r11,rax + mov rax,r8 + adc rdx,0 + mov rbp,rdx + + mul r14 + add r12,rax + mov rax,r8 + adc rdx,0 + add r12,rbp + mov r13,rdx + adc r13,0 + + + mul r15 + xor r15,r15 + add r13,rax + mov rax,QWORD PTR[rsi] + mov r14,rdx + adc r14,0 + + add r9,r9 + adc r10,r10 + adc r11,r11 + adc r12,r12 + adc r13,r13 + adc r14,r14 + adc r15,0 + + mul rax + mov r8,rax + mov rax,QWORD PTR[8+rsi] + mov rcx,rdx + + mul rax + add r9,rcx + adc r10,rax + mov rax,QWORD PTR[16+rsi] + adc rdx,0 + mov rcx,rdx + + mul rax + add r11,rcx + adc r12,rax + mov rax,QWORD PTR[24+rsi] + adc rdx,0 + mov rcx,rdx + + mul rax + add r13,rcx + adc r14,rax + mov rax,r8 + adc r15,rdx + + mov rsi,QWORD PTR[(($L$poly+8))] + mov rbp,QWORD PTR[(($L$poly+24))] + + + + + mov rcx,r8 + shl r8,32 + mul rbp + shr rcx,32 + add r9,r8 + adc r10,rcx + adc r11,rax + mov rax,r9 + adc rdx,0 + + + + mov rcx,r9 + shl r9,32 + mov r8,rdx + mul rbp + shr rcx,32 + add r10,r9 + adc r11,rcx + adc r8,rax + mov rax,r10 + adc rdx,0 + + + + mov rcx,r10 + shl r10,32 + mov r9,rdx + mul rbp + shr rcx,32 + add r11,r10 + adc r8,rcx + adc r9,rax + mov rax,r11 + adc rdx,0 + + + + mov rcx,r11 + shl r11,32 + mov r10,rdx + mul rbp + shr rcx,32 + add r8,r11 + adc r9,rcx + adc r10,rax + adc rdx,0 + xor r11,r11 + + + + add r12,r8 + adc r13,r9 + mov r8,r12 + adc r14,r10 + adc r15,rdx + mov r9,r13 + adc r11,0 + + sub r12,-1 + mov r10,r14 + sbb r13,rsi + sbb r14,0 + mov rcx,r15 + sbb r15,rbp + sbb r11,0 + + cmovc r12,r8 + cmovc r13,r9 + mov QWORD PTR[rdi],r12 + cmovc r14,r10 + mov QWORD PTR[8+rdi],r13 + cmovc r15,rcx + mov QWORD PTR[16+rdi],r14 + mov QWORD PTR[24+rdi],r15 + + DB 0F3h,0C3h ;repret +__ecp_nistz256_sqr_montq ENDP + + + + + + +PUBLIC ecp_nistz256_from_mont + +ALIGN 32 +ecp_nistz256_from_mont PROC PUBLIC + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_ecp_nistz256_from_mont:: + mov rdi,rcx + mov rsi,rdx + + + push r12 + push r13 + + mov rax,QWORD PTR[rsi] + mov r13,QWORD PTR[(($L$poly+24))] + mov r9,QWORD PTR[8+rsi] + mov r10,QWORD PTR[16+rsi] + mov r11,QWORD PTR[24+rsi] + mov r8,rax + mov r12,QWORD PTR[(($L$poly+8))] + + + + mov rcx,rax + shl r8,32 + mul r13 + shr rcx,32 + add r9,r8 + adc r10,rcx + adc r11,rax + mov rax,r9 + adc rdx,0 + + + + mov rcx,r9 + shl r9,32 + mov r8,rdx + mul r13 + shr rcx,32 + add r10,r9 + adc r11,rcx + adc r8,rax + mov rax,r10 + adc rdx,0 + + + + mov rcx,r10 + shl r10,32 + mov r9,rdx + mul r13 + shr rcx,32 + add r11,r10 + adc r8,rcx + adc r9,rax + mov rax,r11 + adc rdx,0 + + + + mov rcx,r11 + shl r11,32 + mov r10,rdx + mul r13 + shr rcx,32 + add r8,r11 + adc r9,rcx + mov rcx,r8 + adc r10,rax + mov rsi,r9 + adc rdx,0 + + + + sub r8,-1 + mov rax,r10 + sbb r9,r12 + sbb r10,0 + mov r11,rdx + sbb rdx,r13 + sbb r13,r13 + + cmovnz r8,rcx + cmovnz r9,rsi + mov QWORD PTR[rdi],r8 + cmovnz r10,rax + mov QWORD PTR[8+rdi],r9 + cmovz r11,rdx + mov QWORD PTR[16+rdi],r10 + mov QWORD PTR[24+rdi],r11 + + pop r13 + pop r12 + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_ecp_nistz256_from_mont:: +ecp_nistz256_from_mont ENDP + + +PUBLIC ecp_nistz256_select_w5 + +ALIGN 32 +ecp_nistz256_select_w5 PROC PUBLIC + lea rax,QWORD PTR[((-136))+rsp] +$L$SEH_begin_ecp_nistz256_select_w5:: +DB 048h,08dh,060h,0e0h +DB 00fh,029h,070h,0e0h +DB 00fh,029h,078h,0f0h +DB 044h,00fh,029h,000h +DB 044h,00fh,029h,048h,010h +DB 044h,00fh,029h,050h,020h +DB 044h,00fh,029h,058h,030h +DB 044h,00fh,029h,060h,040h +DB 044h,00fh,029h,068h,050h +DB 044h,00fh,029h,070h,060h +DB 044h,00fh,029h,078h,070h + movdqa xmm0,XMMWORD PTR[$L$One] + movd xmm1,r8d + + pxor xmm2,xmm2 + pxor xmm3,xmm3 + pxor xmm4,xmm4 + pxor xmm5,xmm5 + pxor xmm6,xmm6 + pxor xmm7,xmm7 + + movdqa xmm8,xmm0 + pshufd xmm1,xmm1,0 + + mov rax,16 +$L$select_loop_sse_w5:: + + movdqa xmm15,xmm8 + paddd xmm8,xmm0 + pcmpeqd xmm15,xmm1 + + movdqa xmm9,XMMWORD PTR[rdx] + movdqa xmm10,XMMWORD PTR[16+rdx] + movdqa xmm11,XMMWORD PTR[32+rdx] + movdqa xmm12,XMMWORD PTR[48+rdx] + movdqa xmm13,XMMWORD PTR[64+rdx] + movdqa xmm14,XMMWORD PTR[80+rdx] + lea rdx,QWORD PTR[96+rdx] + + pand xmm9,xmm15 + pand xmm10,xmm15 + por xmm2,xmm9 + pand xmm11,xmm15 + por xmm3,xmm10 + pand xmm12,xmm15 + por xmm4,xmm11 + pand xmm13,xmm15 + por xmm5,xmm12 + pand xmm14,xmm15 + por xmm6,xmm13 + por xmm7,xmm14 + + dec rax + jnz $L$select_loop_sse_w5 + + movdqu XMMWORD PTR[rcx],xmm2 + movdqu XMMWORD PTR[16+rcx],xmm3 + movdqu XMMWORD PTR[32+rcx],xmm4 + movdqu XMMWORD PTR[48+rcx],xmm5 + movdqu XMMWORD PTR[64+rcx],xmm6 + movdqu XMMWORD PTR[80+rcx],xmm7 + movaps xmm6,XMMWORD PTR[rsp] + movaps xmm7,XMMWORD PTR[16+rsp] + movaps xmm8,XMMWORD PTR[32+rsp] + movaps xmm9,XMMWORD PTR[48+rsp] + movaps xmm10,XMMWORD PTR[64+rsp] + movaps xmm11,XMMWORD PTR[80+rsp] + movaps xmm12,XMMWORD PTR[96+rsp] + movaps xmm13,XMMWORD PTR[112+rsp] + movaps xmm14,XMMWORD PTR[128+rsp] + movaps xmm15,XMMWORD PTR[144+rsp] + lea rsp,QWORD PTR[168+rsp] +$L$SEH_end_ecp_nistz256_select_w5:: + DB 0F3h,0C3h ;repret +ecp_nistz256_select_w5 ENDP + + + +PUBLIC ecp_nistz256_select_w7 + +ALIGN 32 +ecp_nistz256_select_w7 PROC PUBLIC + lea rax,QWORD PTR[((-136))+rsp] +$L$SEH_begin_ecp_nistz256_select_w7:: +DB 048h,08dh,060h,0e0h +DB 00fh,029h,070h,0e0h +DB 00fh,029h,078h,0f0h +DB 044h,00fh,029h,000h +DB 044h,00fh,029h,048h,010h +DB 044h,00fh,029h,050h,020h +DB 044h,00fh,029h,058h,030h +DB 044h,00fh,029h,060h,040h +DB 044h,00fh,029h,068h,050h +DB 044h,00fh,029h,070h,060h +DB 044h,00fh,029h,078h,070h + movdqa xmm8,XMMWORD PTR[$L$One] + movd xmm1,r8d + + pxor xmm2,xmm2 + pxor xmm3,xmm3 + pxor xmm4,xmm4 + pxor xmm5,xmm5 + + movdqa xmm0,xmm8 + pshufd xmm1,xmm1,0 + mov rax,64 + +$L$select_loop_sse_w7:: + movdqa xmm15,xmm8 + paddd xmm8,xmm0 + movdqa xmm9,XMMWORD PTR[rdx] + movdqa xmm10,XMMWORD PTR[16+rdx] + pcmpeqd xmm15,xmm1 + movdqa xmm11,XMMWORD PTR[32+rdx] + movdqa xmm12,XMMWORD PTR[48+rdx] + lea rdx,QWORD PTR[64+rdx] + + pand xmm9,xmm15 + pand xmm10,xmm15 + por xmm2,xmm9 + pand xmm11,xmm15 + por xmm3,xmm10 + pand xmm12,xmm15 + por xmm4,xmm11 + prefetcht0 [255+rdx] + por xmm5,xmm12 + + dec rax + jnz $L$select_loop_sse_w7 + + movdqu XMMWORD PTR[rcx],xmm2 + movdqu XMMWORD PTR[16+rcx],xmm3 + movdqu XMMWORD PTR[32+rcx],xmm4 + movdqu XMMWORD PTR[48+rcx],xmm5 + movaps xmm6,XMMWORD PTR[rsp] + movaps xmm7,XMMWORD PTR[16+rsp] + movaps xmm8,XMMWORD PTR[32+rsp] + movaps xmm9,XMMWORD PTR[48+rsp] + movaps xmm10,XMMWORD PTR[64+rsp] + movaps xmm11,XMMWORD PTR[80+rsp] + movaps xmm12,XMMWORD PTR[96+rsp] + movaps xmm13,XMMWORD PTR[112+rsp] + movaps xmm14,XMMWORD PTR[128+rsp] + movaps xmm15,XMMWORD PTR[144+rsp] + lea rsp,QWORD PTR[168+rsp] +$L$SEH_end_ecp_nistz256_select_w7:: + DB 0F3h,0C3h ;repret +ecp_nistz256_select_w7 ENDP +PUBLIC ecp_nistz256_avx2_select_w7 + +ALIGN 32 +ecp_nistz256_avx2_select_w7 PROC PUBLIC + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_ecp_nistz256_avx2_select_w7:: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + + +DB 00fh,00bh + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_ecp_nistz256_avx2_select_w7:: +ecp_nistz256_avx2_select_w7 ENDP + +ALIGN 32 +__ecp_nistz256_add_toq PROC PRIVATE + add r12,QWORD PTR[rbx] + adc r13,QWORD PTR[8+rbx] + mov rax,r12 + adc r8,QWORD PTR[16+rbx] + adc r9,QWORD PTR[24+rbx] + mov rbp,r13 + sbb r11,r11 + + sub r12,-1 + mov rcx,r8 + sbb r13,r14 + sbb r8,0 + mov r10,r9 + sbb r9,r15 + test r11,r11 + + cmovz r12,rax + cmovz r13,rbp + mov QWORD PTR[rdi],r12 + cmovz r8,rcx + mov QWORD PTR[8+rdi],r13 + cmovz r9,r10 + mov QWORD PTR[16+rdi],r8 + mov QWORD PTR[24+rdi],r9 + + DB 0F3h,0C3h ;repret +__ecp_nistz256_add_toq ENDP + + +ALIGN 32 +__ecp_nistz256_sub_fromq PROC PRIVATE + sub r12,QWORD PTR[rbx] + sbb r13,QWORD PTR[8+rbx] + mov rax,r12 + sbb r8,QWORD PTR[16+rbx] + sbb r9,QWORD PTR[24+rbx] + mov rbp,r13 + sbb r11,r11 + + add r12,-1 + mov rcx,r8 + adc r13,r14 + adc r8,0 + mov r10,r9 + adc r9,r15 + test r11,r11 + + cmovz r12,rax + cmovz r13,rbp + mov QWORD PTR[rdi],r12 + cmovz r8,rcx + mov QWORD PTR[8+rdi],r13 + cmovz r9,r10 + mov QWORD PTR[16+rdi],r8 + mov QWORD PTR[24+rdi],r9 + + DB 0F3h,0C3h ;repret +__ecp_nistz256_sub_fromq ENDP + + +ALIGN 32 +__ecp_nistz256_subq PROC PRIVATE + sub rax,r12 + sbb rbp,r13 + mov r12,rax + sbb rcx,r8 + sbb r10,r9 + mov r13,rbp + sbb r11,r11 + + add rax,-1 + mov r8,rcx + adc rbp,r14 + adc rcx,0 + mov r9,r10 + adc r10,r15 + test r11,r11 + + cmovnz r12,rax + cmovnz r13,rbp + cmovnz r8,rcx + cmovnz r9,r10 + + DB 0F3h,0C3h ;repret +__ecp_nistz256_subq ENDP + + +ALIGN 32 +__ecp_nistz256_mul_by_2q PROC PRIVATE + add r12,r12 + adc r13,r13 + mov rax,r12 + adc r8,r8 + adc r9,r9 + mov rbp,r13 + sbb r11,r11 + + sub r12,-1 + mov rcx,r8 + sbb r13,r14 + sbb r8,0 + mov r10,r9 + sbb r9,r15 + test r11,r11 + + cmovz r12,rax + cmovz r13,rbp + mov QWORD PTR[rdi],r12 + cmovz r8,rcx + mov QWORD PTR[8+rdi],r13 + cmovz r9,r10 + mov QWORD PTR[16+rdi],r8 + mov QWORD PTR[24+rdi],r9 + + DB 0F3h,0C3h ;repret +__ecp_nistz256_mul_by_2q ENDP +PUBLIC ecp_nistz256_point_double + +ALIGN 32 +ecp_nistz256_point_double PROC PUBLIC + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_ecp_nistz256_point_double:: + mov rdi,rcx + mov rsi,rdx + + + push rbp + push rbx + push r12 + push r13 + push r14 + push r15 + sub rsp,32*5+8 + + movdqu xmm0,XMMWORD PTR[rsi] + mov rbx,rsi + movdqu xmm1,XMMWORD PTR[16+rsi] + mov r12,QWORD PTR[((32+0))+rsi] + mov r13,QWORD PTR[((32+8))+rsi] + mov r8,QWORD PTR[((32+16))+rsi] + mov r9,QWORD PTR[((32+24))+rsi] + mov r14,QWORD PTR[(($L$poly+8))] + mov r15,QWORD PTR[(($L$poly+24))] + movdqa XMMWORD PTR[96+rsp],xmm0 + movdqa XMMWORD PTR[(96+16)+rsp],xmm1 + lea r10,QWORD PTR[32+rdi] + lea r11,QWORD PTR[64+rdi] +DB 102,72,15,110,199 +DB 102,73,15,110,202 +DB 102,73,15,110,211 + + lea rdi,QWORD PTR[rsp] + call __ecp_nistz256_mul_by_2q + + mov rax,QWORD PTR[((64+0))+rsi] + mov r14,QWORD PTR[((64+8))+rsi] + mov r15,QWORD PTR[((64+16))+rsi] + mov r8,QWORD PTR[((64+24))+rsi] + lea rsi,QWORD PTR[((64-0))+rsi] + lea rdi,QWORD PTR[64+rsp] + call __ecp_nistz256_sqr_montq + + mov rax,QWORD PTR[((0+0))+rsp] + mov r14,QWORD PTR[((8+0))+rsp] + lea rsi,QWORD PTR[((0+0))+rsp] + mov r15,QWORD PTR[((16+0))+rsp] + mov r8,QWORD PTR[((24+0))+rsp] + lea rdi,QWORD PTR[rsp] + call __ecp_nistz256_sqr_montq + + mov rax,QWORD PTR[32+rbx] + mov r9,QWORD PTR[((64+0))+rbx] + mov r10,QWORD PTR[((64+8))+rbx] + mov r11,QWORD PTR[((64+16))+rbx] + mov r12,QWORD PTR[((64+24))+rbx] + lea rsi,QWORD PTR[((64-0))+rbx] + lea rbx,QWORD PTR[32+rbx] +DB 102,72,15,126,215 + call __ecp_nistz256_mul_montq + call __ecp_nistz256_mul_by_2q + + mov r12,QWORD PTR[((96+0))+rsp] + mov r13,QWORD PTR[((96+8))+rsp] + lea rbx,QWORD PTR[64+rsp] + mov r8,QWORD PTR[((96+16))+rsp] + mov r9,QWORD PTR[((96+24))+rsp] + lea rdi,QWORD PTR[32+rsp] + call __ecp_nistz256_add_toq + + mov r12,QWORD PTR[((96+0))+rsp] + mov r13,QWORD PTR[((96+8))+rsp] + lea rbx,QWORD PTR[64+rsp] + mov r8,QWORD PTR[((96+16))+rsp] + mov r9,QWORD PTR[((96+24))+rsp] + lea rdi,QWORD PTR[64+rsp] + call __ecp_nistz256_sub_fromq + + mov rax,QWORD PTR[((0+0))+rsp] + mov r14,QWORD PTR[((8+0))+rsp] + lea rsi,QWORD PTR[((0+0))+rsp] + mov r15,QWORD PTR[((16+0))+rsp] + mov r8,QWORD PTR[((24+0))+rsp] +DB 102,72,15,126,207 + call __ecp_nistz256_sqr_montq + xor r9,r9 + mov rax,r12 + add r12,-1 + mov r10,r13 + adc r13,rsi + mov rcx,r14 + adc r14,0 + mov r8,r15 + adc r15,rbp + adc r9,0 + xor rsi,rsi + test rax,1 + + cmovz r12,rax + cmovz r13,r10 + cmovz r14,rcx + cmovz r15,r8 + cmovz r9,rsi + + mov rax,r13 + shr r12,1 + shl rax,63 + mov r10,r14 + shr r13,1 + or r12,rax + shl r10,63 + mov rcx,r15 + shr r14,1 + or r13,r10 + shl rcx,63 + mov QWORD PTR[rdi],r12 + shr r15,1 + mov QWORD PTR[8+rdi],r13 + shl r9,63 + or r14,rcx + or r15,r9 + mov QWORD PTR[16+rdi],r14 + mov QWORD PTR[24+rdi],r15 + mov rax,QWORD PTR[64+rsp] + lea rbx,QWORD PTR[64+rsp] + mov r9,QWORD PTR[((0+32))+rsp] + mov r10,QWORD PTR[((8+32))+rsp] + lea rsi,QWORD PTR[((0+32))+rsp] + mov r11,QWORD PTR[((16+32))+rsp] + mov r12,QWORD PTR[((24+32))+rsp] + lea rdi,QWORD PTR[32+rsp] + call __ecp_nistz256_mul_montq + + lea rdi,QWORD PTR[128+rsp] + call __ecp_nistz256_mul_by_2q + + lea rbx,QWORD PTR[32+rsp] + lea rdi,QWORD PTR[32+rsp] + call __ecp_nistz256_add_toq + + mov rax,QWORD PTR[96+rsp] + lea rbx,QWORD PTR[96+rsp] + mov r9,QWORD PTR[((0+0))+rsp] + mov r10,QWORD PTR[((8+0))+rsp] + lea rsi,QWORD PTR[((0+0))+rsp] + mov r11,QWORD PTR[((16+0))+rsp] + mov r12,QWORD PTR[((24+0))+rsp] + lea rdi,QWORD PTR[rsp] + call __ecp_nistz256_mul_montq + + lea rdi,QWORD PTR[128+rsp] + call __ecp_nistz256_mul_by_2q + + mov rax,QWORD PTR[((0+32))+rsp] + mov r14,QWORD PTR[((8+32))+rsp] + lea rsi,QWORD PTR[((0+32))+rsp] + mov r15,QWORD PTR[((16+32))+rsp] + mov r8,QWORD PTR[((24+32))+rsp] +DB 102,72,15,126,199 + call __ecp_nistz256_sqr_montq + + lea rbx,QWORD PTR[128+rsp] + mov r8,r14 + mov r9,r15 + mov r14,rsi + mov r15,rbp + call __ecp_nistz256_sub_fromq + + mov rax,QWORD PTR[((0+0))+rsp] + mov rbp,QWORD PTR[((0+8))+rsp] + mov rcx,QWORD PTR[((0+16))+rsp] + mov r10,QWORD PTR[((0+24))+rsp] + lea rdi,QWORD PTR[rsp] + call __ecp_nistz256_subq + + mov rax,QWORD PTR[32+rsp] + lea rbx,QWORD PTR[32+rsp] + mov r14,r12 + xor ecx,ecx + mov QWORD PTR[((0+0))+rsp],r12 + mov r10,r13 + mov QWORD PTR[((0+8))+rsp],r13 + cmovz r11,r8 + mov QWORD PTR[((0+16))+rsp],r8 + lea rsi,QWORD PTR[((0-0))+rsp] + cmovz r12,r9 + mov QWORD PTR[((0+24))+rsp],r9 + mov r9,r14 + lea rdi,QWORD PTR[rsp] + call __ecp_nistz256_mul_montq + +DB 102,72,15,126,203 +DB 102,72,15,126,207 + call __ecp_nistz256_sub_fromq + + add rsp,32*5+8 + pop r15 + pop r14 + pop r13 + pop r12 + pop rbx + pop rbp + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_ecp_nistz256_point_double:: +ecp_nistz256_point_double ENDP +PUBLIC ecp_nistz256_point_add + +ALIGN 32 +ecp_nistz256_point_add PROC PUBLIC + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_ecp_nistz256_point_add:: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + + + push rbp + push rbx + push r12 + push r13 + push r14 + push r15 + sub rsp,32*18+8 + + movdqu xmm0,XMMWORD PTR[rsi] + movdqu xmm1,XMMWORD PTR[16+rsi] + movdqu xmm2,XMMWORD PTR[32+rsi] + movdqu xmm3,XMMWORD PTR[48+rsi] + movdqu xmm4,XMMWORD PTR[64+rsi] + movdqu xmm5,XMMWORD PTR[80+rsi] + mov rbx,rsi + mov rsi,rdx + movdqa XMMWORD PTR[384+rsp],xmm0 + movdqa XMMWORD PTR[(384+16)+rsp],xmm1 + por xmm1,xmm0 + movdqa XMMWORD PTR[416+rsp],xmm2 + movdqa XMMWORD PTR[(416+16)+rsp],xmm3 + por xmm3,xmm2 + movdqa XMMWORD PTR[448+rsp],xmm4 + movdqa XMMWORD PTR[(448+16)+rsp],xmm5 + por xmm3,xmm1 + + movdqu xmm0,XMMWORD PTR[rsi] + pshufd xmm5,xmm3,1h + movdqu xmm1,XMMWORD PTR[16+rsi] + movdqu xmm2,XMMWORD PTR[32+rsi] + por xmm5,xmm3 + movdqu xmm3,XMMWORD PTR[48+rsi] + mov rax,QWORD PTR[((64+0))+rsi] + mov r14,QWORD PTR[((64+8))+rsi] + mov r15,QWORD PTR[((64+16))+rsi] + mov r8,QWORD PTR[((64+24))+rsi] + movdqa XMMWORD PTR[480+rsp],xmm0 + pshufd xmm4,xmm5,01eh + movdqa XMMWORD PTR[(480+16)+rsp],xmm1 + por xmm1,xmm0 +DB 102,72,15,110,199 + movdqa XMMWORD PTR[512+rsp],xmm2 + movdqa XMMWORD PTR[(512+16)+rsp],xmm3 + por xmm3,xmm2 + por xmm5,xmm4 + pxor xmm4,xmm4 + por xmm3,xmm1 + + lea rsi,QWORD PTR[((64-0))+rsi] + mov QWORD PTR[((544+0))+rsp],rax + mov QWORD PTR[((544+8))+rsp],r14 + mov QWORD PTR[((544+16))+rsp],r15 + mov QWORD PTR[((544+24))+rsp],r8 + lea rdi,QWORD PTR[96+rsp] + call __ecp_nistz256_sqr_montq + + pcmpeqd xmm5,xmm4 + pshufd xmm4,xmm3,1h + por xmm4,xmm3 + pshufd xmm5,xmm5,0 + pshufd xmm3,xmm4,01eh + por xmm4,xmm3 + pxor xmm3,xmm3 + pcmpeqd xmm4,xmm3 + pshufd xmm4,xmm4,0 + mov rax,QWORD PTR[((64+0))+rbx] + mov r14,QWORD PTR[((64+8))+rbx] + mov r15,QWORD PTR[((64+16))+rbx] + mov r8,QWORD PTR[((64+24))+rbx] + + lea rsi,QWORD PTR[((64-0))+rbx] + lea rdi,QWORD PTR[32+rsp] + call __ecp_nistz256_sqr_montq + + mov rax,QWORD PTR[544+rsp] + lea rbx,QWORD PTR[544+rsp] + mov r9,QWORD PTR[((0+96))+rsp] + mov r10,QWORD PTR[((8+96))+rsp] + lea rsi,QWORD PTR[((0+96))+rsp] + mov r11,QWORD PTR[((16+96))+rsp] + mov r12,QWORD PTR[((24+96))+rsp] + lea rdi,QWORD PTR[224+rsp] + call __ecp_nistz256_mul_montq + + mov rax,QWORD PTR[448+rsp] + lea rbx,QWORD PTR[448+rsp] + mov r9,QWORD PTR[((0+32))+rsp] + mov r10,QWORD PTR[((8+32))+rsp] + lea rsi,QWORD PTR[((0+32))+rsp] + mov r11,QWORD PTR[((16+32))+rsp] + mov r12,QWORD PTR[((24+32))+rsp] + lea rdi,QWORD PTR[256+rsp] + call __ecp_nistz256_mul_montq + + mov rax,QWORD PTR[416+rsp] + lea rbx,QWORD PTR[416+rsp] + mov r9,QWORD PTR[((0+224))+rsp] + mov r10,QWORD PTR[((8+224))+rsp] + lea rsi,QWORD PTR[((0+224))+rsp] + mov r11,QWORD PTR[((16+224))+rsp] + mov r12,QWORD PTR[((24+224))+rsp] + lea rdi,QWORD PTR[224+rsp] + call __ecp_nistz256_mul_montq + + mov rax,QWORD PTR[512+rsp] + lea rbx,QWORD PTR[512+rsp] + mov r9,QWORD PTR[((0+256))+rsp] + mov r10,QWORD PTR[((8+256))+rsp] + lea rsi,QWORD PTR[((0+256))+rsp] + mov r11,QWORD PTR[((16+256))+rsp] + mov r12,QWORD PTR[((24+256))+rsp] + lea rdi,QWORD PTR[256+rsp] + call __ecp_nistz256_mul_montq + + lea rbx,QWORD PTR[224+rsp] + lea rdi,QWORD PTR[64+rsp] + call __ecp_nistz256_sub_fromq + + or r12,r13 + movdqa xmm2,xmm4 + or r12,r8 + or r12,r9 + por xmm2,xmm5 +DB 102,73,15,110,220 + + mov rax,QWORD PTR[384+rsp] + lea rbx,QWORD PTR[384+rsp] + mov r9,QWORD PTR[((0+96))+rsp] + mov r10,QWORD PTR[((8+96))+rsp] + lea rsi,QWORD PTR[((0+96))+rsp] + mov r11,QWORD PTR[((16+96))+rsp] + mov r12,QWORD PTR[((24+96))+rsp] + lea rdi,QWORD PTR[160+rsp] + call __ecp_nistz256_mul_montq + + mov rax,QWORD PTR[480+rsp] + lea rbx,QWORD PTR[480+rsp] + mov r9,QWORD PTR[((0+32))+rsp] + mov r10,QWORD PTR[((8+32))+rsp] + lea rsi,QWORD PTR[((0+32))+rsp] + mov r11,QWORD PTR[((16+32))+rsp] + mov r12,QWORD PTR[((24+32))+rsp] + lea rdi,QWORD PTR[192+rsp] + call __ecp_nistz256_mul_montq + + lea rbx,QWORD PTR[160+rsp] + lea rdi,QWORD PTR[rsp] + call __ecp_nistz256_sub_fromq + + or r12,r13 + or r12,r8 + or r12,r9 + +DB 03eh + jnz $L$add_proceedq +DB 102,73,15,126,208 +DB 102,73,15,126,217 + test r8,r8 + jnz $L$add_proceedq + test r9,r9 + jz $L$add_proceedq + +DB 102,72,15,126,199 + pxor xmm0,xmm0 + movdqu XMMWORD PTR[rdi],xmm0 + movdqu XMMWORD PTR[16+rdi],xmm0 + movdqu XMMWORD PTR[32+rdi],xmm0 + movdqu XMMWORD PTR[48+rdi],xmm0 + movdqu XMMWORD PTR[64+rdi],xmm0 + movdqu XMMWORD PTR[80+rdi],xmm0 + jmp $L$add_doneq + +ALIGN 32 +$L$add_proceedq:: + mov rax,QWORD PTR[((0+64))+rsp] + mov r14,QWORD PTR[((8+64))+rsp] + lea rsi,QWORD PTR[((0+64))+rsp] + mov r15,QWORD PTR[((16+64))+rsp] + mov r8,QWORD PTR[((24+64))+rsp] + lea rdi,QWORD PTR[96+rsp] + call __ecp_nistz256_sqr_montq + + mov rax,QWORD PTR[448+rsp] + lea rbx,QWORD PTR[448+rsp] + mov r9,QWORD PTR[((0+0))+rsp] + mov r10,QWORD PTR[((8+0))+rsp] + lea rsi,QWORD PTR[((0+0))+rsp] + mov r11,QWORD PTR[((16+0))+rsp] + mov r12,QWORD PTR[((24+0))+rsp] + lea rdi,QWORD PTR[352+rsp] + call __ecp_nistz256_mul_montq + + mov rax,QWORD PTR[((0+0))+rsp] + mov r14,QWORD PTR[((8+0))+rsp] + lea rsi,QWORD PTR[((0+0))+rsp] + mov r15,QWORD PTR[((16+0))+rsp] + mov r8,QWORD PTR[((24+0))+rsp] + lea rdi,QWORD PTR[32+rsp] + call __ecp_nistz256_sqr_montq + + mov rax,QWORD PTR[544+rsp] + lea rbx,QWORD PTR[544+rsp] + mov r9,QWORD PTR[((0+352))+rsp] + mov r10,QWORD PTR[((8+352))+rsp] + lea rsi,QWORD PTR[((0+352))+rsp] + mov r11,QWORD PTR[((16+352))+rsp] + mov r12,QWORD PTR[((24+352))+rsp] + lea rdi,QWORD PTR[352+rsp] + call __ecp_nistz256_mul_montq + + mov rax,QWORD PTR[rsp] + lea rbx,QWORD PTR[rsp] + mov r9,QWORD PTR[((0+32))+rsp] + mov r10,QWORD PTR[((8+32))+rsp] + lea rsi,QWORD PTR[((0+32))+rsp] + mov r11,QWORD PTR[((16+32))+rsp] + mov r12,QWORD PTR[((24+32))+rsp] + lea rdi,QWORD PTR[128+rsp] + call __ecp_nistz256_mul_montq + + mov rax,QWORD PTR[160+rsp] + lea rbx,QWORD PTR[160+rsp] + mov r9,QWORD PTR[((0+32))+rsp] + mov r10,QWORD PTR[((8+32))+rsp] + lea rsi,QWORD PTR[((0+32))+rsp] + mov r11,QWORD PTR[((16+32))+rsp] + mov r12,QWORD PTR[((24+32))+rsp] + lea rdi,QWORD PTR[192+rsp] + call __ecp_nistz256_mul_montq + + + + + add r12,r12 + lea rsi,QWORD PTR[96+rsp] + adc r13,r13 + mov rax,r12 + adc r8,r8 + adc r9,r9 + mov rbp,r13 + sbb r11,r11 + + sub r12,-1 + mov rcx,r8 + sbb r13,r14 + sbb r8,0 + mov r10,r9 + sbb r9,r15 + test r11,r11 + + cmovz r12,rax + mov rax,QWORD PTR[rsi] + cmovz r13,rbp + mov rbp,QWORD PTR[8+rsi] + cmovz r8,rcx + mov rcx,QWORD PTR[16+rsi] + cmovz r9,r10 + mov r10,QWORD PTR[24+rsi] + + call __ecp_nistz256_subq + + lea rbx,QWORD PTR[128+rsp] + lea rdi,QWORD PTR[288+rsp] + call __ecp_nistz256_sub_fromq + + mov rax,QWORD PTR[((192+0))+rsp] + mov rbp,QWORD PTR[((192+8))+rsp] + mov rcx,QWORD PTR[((192+16))+rsp] + mov r10,QWORD PTR[((192+24))+rsp] + lea rdi,QWORD PTR[320+rsp] + + call __ecp_nistz256_subq + + mov QWORD PTR[rdi],r12 + mov QWORD PTR[8+rdi],r13 + mov QWORD PTR[16+rdi],r8 + mov QWORD PTR[24+rdi],r9 + mov rax,QWORD PTR[128+rsp] + lea rbx,QWORD PTR[128+rsp] + mov r9,QWORD PTR[((0+224))+rsp] + mov r10,QWORD PTR[((8+224))+rsp] + lea rsi,QWORD PTR[((0+224))+rsp] + mov r11,QWORD PTR[((16+224))+rsp] + mov r12,QWORD PTR[((24+224))+rsp] + lea rdi,QWORD PTR[256+rsp] + call __ecp_nistz256_mul_montq + + mov rax,QWORD PTR[320+rsp] + lea rbx,QWORD PTR[320+rsp] + mov r9,QWORD PTR[((0+64))+rsp] + mov r10,QWORD PTR[((8+64))+rsp] + lea rsi,QWORD PTR[((0+64))+rsp] + mov r11,QWORD PTR[((16+64))+rsp] + mov r12,QWORD PTR[((24+64))+rsp] + lea rdi,QWORD PTR[320+rsp] + call __ecp_nistz256_mul_montq + + lea rbx,QWORD PTR[256+rsp] + lea rdi,QWORD PTR[320+rsp] + call __ecp_nistz256_sub_fromq + +DB 102,72,15,126,199 + + movdqa xmm0,xmm5 + movdqa xmm1,xmm5 + pandn xmm0,XMMWORD PTR[352+rsp] + movdqa xmm2,xmm5 + pandn xmm1,XMMWORD PTR[((352+16))+rsp] + movdqa xmm3,xmm5 + pand xmm2,XMMWORD PTR[544+rsp] + pand xmm3,XMMWORD PTR[((544+16))+rsp] + por xmm2,xmm0 + por xmm3,xmm1 + + movdqa xmm0,xmm4 + movdqa xmm1,xmm4 + pandn xmm0,xmm2 + movdqa xmm2,xmm4 + pandn xmm1,xmm3 + movdqa xmm3,xmm4 + pand xmm2,XMMWORD PTR[448+rsp] + pand xmm3,XMMWORD PTR[((448+16))+rsp] + por xmm2,xmm0 + por xmm3,xmm1 + movdqu XMMWORD PTR[64+rdi],xmm2 + movdqu XMMWORD PTR[80+rdi],xmm3 + + movdqa xmm0,xmm5 + movdqa xmm1,xmm5 + pandn xmm0,XMMWORD PTR[288+rsp] + movdqa xmm2,xmm5 + pandn xmm1,XMMWORD PTR[((288+16))+rsp] + movdqa xmm3,xmm5 + pand xmm2,XMMWORD PTR[480+rsp] + pand xmm3,XMMWORD PTR[((480+16))+rsp] + por xmm2,xmm0 + por xmm3,xmm1 + + movdqa xmm0,xmm4 + movdqa xmm1,xmm4 + pandn xmm0,xmm2 + movdqa xmm2,xmm4 + pandn xmm1,xmm3 + movdqa xmm3,xmm4 + pand xmm2,XMMWORD PTR[384+rsp] + pand xmm3,XMMWORD PTR[((384+16))+rsp] + por xmm2,xmm0 + por xmm3,xmm1 + movdqu XMMWORD PTR[rdi],xmm2 + movdqu XMMWORD PTR[16+rdi],xmm3 + + movdqa xmm0,xmm5 + movdqa xmm1,xmm5 + pandn xmm0,XMMWORD PTR[320+rsp] + movdqa xmm2,xmm5 + pandn xmm1,XMMWORD PTR[((320+16))+rsp] + movdqa xmm3,xmm5 + pand xmm2,XMMWORD PTR[512+rsp] + pand xmm3,XMMWORD PTR[((512+16))+rsp] + por xmm2,xmm0 + por xmm3,xmm1 + + movdqa xmm0,xmm4 + movdqa xmm1,xmm4 + pandn xmm0,xmm2 + movdqa xmm2,xmm4 + pandn xmm1,xmm3 + movdqa xmm3,xmm4 + pand xmm2,XMMWORD PTR[416+rsp] + pand xmm3,XMMWORD PTR[((416+16))+rsp] + por xmm2,xmm0 + por xmm3,xmm1 + movdqu XMMWORD PTR[32+rdi],xmm2 + movdqu XMMWORD PTR[48+rdi],xmm3 + +$L$add_doneq:: + add rsp,32*18+8 + pop r15 + pop r14 + pop r13 + pop r12 + pop rbx + pop rbp + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_ecp_nistz256_point_add:: +ecp_nistz256_point_add ENDP +PUBLIC ecp_nistz256_point_add_affine + +ALIGN 32 +ecp_nistz256_point_add_affine PROC PUBLIC + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_ecp_nistz256_point_add_affine:: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + + + push rbp + push rbx + push r12 + push r13 + push r14 + push r15 + sub rsp,32*15+8 + + movdqu xmm0,XMMWORD PTR[rsi] + mov rbx,rdx + movdqu xmm1,XMMWORD PTR[16+rsi] + movdqu xmm2,XMMWORD PTR[32+rsi] + movdqu xmm3,XMMWORD PTR[48+rsi] + movdqu xmm4,XMMWORD PTR[64+rsi] + movdqu xmm5,XMMWORD PTR[80+rsi] + mov rax,QWORD PTR[((64+0))+rsi] + mov r14,QWORD PTR[((64+8))+rsi] + mov r15,QWORD PTR[((64+16))+rsi] + mov r8,QWORD PTR[((64+24))+rsi] + movdqa XMMWORD PTR[320+rsp],xmm0 + movdqa XMMWORD PTR[(320+16)+rsp],xmm1 + por xmm1,xmm0 + movdqa XMMWORD PTR[352+rsp],xmm2 + movdqa XMMWORD PTR[(352+16)+rsp],xmm3 + por xmm3,xmm2 + movdqa XMMWORD PTR[384+rsp],xmm4 + movdqa XMMWORD PTR[(384+16)+rsp],xmm5 + por xmm3,xmm1 + + movdqu xmm0,XMMWORD PTR[rbx] + pshufd xmm5,xmm3,1h + movdqu xmm1,XMMWORD PTR[16+rbx] + movdqu xmm2,XMMWORD PTR[32+rbx] + por xmm5,xmm3 + movdqu xmm3,XMMWORD PTR[48+rbx] + movdqa XMMWORD PTR[416+rsp],xmm0 + pshufd xmm4,xmm5,01eh + movdqa XMMWORD PTR[(416+16)+rsp],xmm1 + por xmm1,xmm0 +DB 102,72,15,110,199 + movdqa XMMWORD PTR[448+rsp],xmm2 + movdqa XMMWORD PTR[(448+16)+rsp],xmm3 + por xmm3,xmm2 + por xmm5,xmm4 + pxor xmm4,xmm4 + por xmm3,xmm1 + + lea rsi,QWORD PTR[((64-0))+rsi] + lea rdi,QWORD PTR[32+rsp] + call __ecp_nistz256_sqr_montq + + pcmpeqd xmm5,xmm4 + pshufd xmm4,xmm3,1h + mov rax,QWORD PTR[rbx] + + mov r9,r12 + por xmm4,xmm3 + pshufd xmm5,xmm5,0 + pshufd xmm3,xmm4,01eh + mov r10,r13 + por xmm4,xmm3 + pxor xmm3,xmm3 + mov r11,r14 + pcmpeqd xmm4,xmm3 + pshufd xmm4,xmm4,0 + + lea rsi,QWORD PTR[((32-0))+rsp] + mov r12,r15 + lea rdi,QWORD PTR[rsp] + call __ecp_nistz256_mul_montq + + lea rbx,QWORD PTR[320+rsp] + lea rdi,QWORD PTR[64+rsp] + call __ecp_nistz256_sub_fromq + + mov rax,QWORD PTR[384+rsp] + lea rbx,QWORD PTR[384+rsp] + mov r9,QWORD PTR[((0+32))+rsp] + mov r10,QWORD PTR[((8+32))+rsp] + lea rsi,QWORD PTR[((0+32))+rsp] + mov r11,QWORD PTR[((16+32))+rsp] + mov r12,QWORD PTR[((24+32))+rsp] + lea rdi,QWORD PTR[32+rsp] + call __ecp_nistz256_mul_montq + + mov rax,QWORD PTR[384+rsp] + lea rbx,QWORD PTR[384+rsp] + mov r9,QWORD PTR[((0+64))+rsp] + mov r10,QWORD PTR[((8+64))+rsp] + lea rsi,QWORD PTR[((0+64))+rsp] + mov r11,QWORD PTR[((16+64))+rsp] + mov r12,QWORD PTR[((24+64))+rsp] + lea rdi,QWORD PTR[288+rsp] + call __ecp_nistz256_mul_montq + + mov rax,QWORD PTR[448+rsp] + lea rbx,QWORD PTR[448+rsp] + mov r9,QWORD PTR[((0+32))+rsp] + mov r10,QWORD PTR[((8+32))+rsp] + lea rsi,QWORD PTR[((0+32))+rsp] + mov r11,QWORD PTR[((16+32))+rsp] + mov r12,QWORD PTR[((24+32))+rsp] + lea rdi,QWORD PTR[32+rsp] + call __ecp_nistz256_mul_montq + + lea rbx,QWORD PTR[352+rsp] + lea rdi,QWORD PTR[96+rsp] + call __ecp_nistz256_sub_fromq + + mov rax,QWORD PTR[((0+64))+rsp] + mov r14,QWORD PTR[((8+64))+rsp] + lea rsi,QWORD PTR[((0+64))+rsp] + mov r15,QWORD PTR[((16+64))+rsp] + mov r8,QWORD PTR[((24+64))+rsp] + lea rdi,QWORD PTR[128+rsp] + call __ecp_nistz256_sqr_montq + + mov rax,QWORD PTR[((0+96))+rsp] + mov r14,QWORD PTR[((8+96))+rsp] + lea rsi,QWORD PTR[((0+96))+rsp] + mov r15,QWORD PTR[((16+96))+rsp] + mov r8,QWORD PTR[((24+96))+rsp] + lea rdi,QWORD PTR[192+rsp] + call __ecp_nistz256_sqr_montq + + mov rax,QWORD PTR[128+rsp] + lea rbx,QWORD PTR[128+rsp] + mov r9,QWORD PTR[((0+64))+rsp] + mov r10,QWORD PTR[((8+64))+rsp] + lea rsi,QWORD PTR[((0+64))+rsp] + mov r11,QWORD PTR[((16+64))+rsp] + mov r12,QWORD PTR[((24+64))+rsp] + lea rdi,QWORD PTR[160+rsp] + call __ecp_nistz256_mul_montq + + mov rax,QWORD PTR[320+rsp] + lea rbx,QWORD PTR[320+rsp] + mov r9,QWORD PTR[((0+128))+rsp] + mov r10,QWORD PTR[((8+128))+rsp] + lea rsi,QWORD PTR[((0+128))+rsp] + mov r11,QWORD PTR[((16+128))+rsp] + mov r12,QWORD PTR[((24+128))+rsp] + lea rdi,QWORD PTR[rsp] + call __ecp_nistz256_mul_montq + + + + + add r12,r12 + lea rsi,QWORD PTR[192+rsp] + adc r13,r13 + mov rax,r12 + adc r8,r8 + adc r9,r9 + mov rbp,r13 + sbb r11,r11 + + sub r12,-1 + mov rcx,r8 + sbb r13,r14 + sbb r8,0 + mov r10,r9 + sbb r9,r15 + test r11,r11 + + cmovz r12,rax + mov rax,QWORD PTR[rsi] + cmovz r13,rbp + mov rbp,QWORD PTR[8+rsi] + cmovz r8,rcx + mov rcx,QWORD PTR[16+rsi] + cmovz r9,r10 + mov r10,QWORD PTR[24+rsi] + + call __ecp_nistz256_subq + + lea rbx,QWORD PTR[160+rsp] + lea rdi,QWORD PTR[224+rsp] + call __ecp_nistz256_sub_fromq + + mov rax,QWORD PTR[((0+0))+rsp] + mov rbp,QWORD PTR[((0+8))+rsp] + mov rcx,QWORD PTR[((0+16))+rsp] + mov r10,QWORD PTR[((0+24))+rsp] + lea rdi,QWORD PTR[64+rsp] + + call __ecp_nistz256_subq + + mov QWORD PTR[rdi],r12 + mov QWORD PTR[8+rdi],r13 + mov QWORD PTR[16+rdi],r8 + mov QWORD PTR[24+rdi],r9 + mov rax,QWORD PTR[352+rsp] + lea rbx,QWORD PTR[352+rsp] + mov r9,QWORD PTR[((0+160))+rsp] + mov r10,QWORD PTR[((8+160))+rsp] + lea rsi,QWORD PTR[((0+160))+rsp] + mov r11,QWORD PTR[((16+160))+rsp] + mov r12,QWORD PTR[((24+160))+rsp] + lea rdi,QWORD PTR[32+rsp] + call __ecp_nistz256_mul_montq + + mov rax,QWORD PTR[96+rsp] + lea rbx,QWORD PTR[96+rsp] + mov r9,QWORD PTR[((0+64))+rsp] + mov r10,QWORD PTR[((8+64))+rsp] + lea rsi,QWORD PTR[((0+64))+rsp] + mov r11,QWORD PTR[((16+64))+rsp] + mov r12,QWORD PTR[((24+64))+rsp] + lea rdi,QWORD PTR[64+rsp] + call __ecp_nistz256_mul_montq + + lea rbx,QWORD PTR[32+rsp] + lea rdi,QWORD PTR[256+rsp] + call __ecp_nistz256_sub_fromq + +DB 102,72,15,126,199 + + movdqa xmm0,xmm5 + movdqa xmm1,xmm5 + pandn xmm0,XMMWORD PTR[288+rsp] + movdqa xmm2,xmm5 + pandn xmm1,XMMWORD PTR[((288+16))+rsp] + movdqa xmm3,xmm5 + pand xmm2,XMMWORD PTR[$L$ONE_mont] + pand xmm3,XMMWORD PTR[(($L$ONE_mont+16))] + por xmm2,xmm0 + por xmm3,xmm1 + + movdqa xmm0,xmm4 + movdqa xmm1,xmm4 + pandn xmm0,xmm2 + movdqa xmm2,xmm4 + pandn xmm1,xmm3 + movdqa xmm3,xmm4 + pand xmm2,XMMWORD PTR[384+rsp] + pand xmm3,XMMWORD PTR[((384+16))+rsp] + por xmm2,xmm0 + por xmm3,xmm1 + movdqu XMMWORD PTR[64+rdi],xmm2 + movdqu XMMWORD PTR[80+rdi],xmm3 + + movdqa xmm0,xmm5 + movdqa xmm1,xmm5 + pandn xmm0,XMMWORD PTR[224+rsp] + movdqa xmm2,xmm5 + pandn xmm1,XMMWORD PTR[((224+16))+rsp] + movdqa xmm3,xmm5 + pand xmm2,XMMWORD PTR[416+rsp] + pand xmm3,XMMWORD PTR[((416+16))+rsp] + por xmm2,xmm0 + por xmm3,xmm1 + + movdqa xmm0,xmm4 + movdqa xmm1,xmm4 + pandn xmm0,xmm2 + movdqa xmm2,xmm4 + pandn xmm1,xmm3 + movdqa xmm3,xmm4 + pand xmm2,XMMWORD PTR[320+rsp] + pand xmm3,XMMWORD PTR[((320+16))+rsp] + por xmm2,xmm0 + por xmm3,xmm1 + movdqu XMMWORD PTR[rdi],xmm2 + movdqu XMMWORD PTR[16+rdi],xmm3 + + movdqa xmm0,xmm5 + movdqa xmm1,xmm5 + pandn xmm0,XMMWORD PTR[256+rsp] + movdqa xmm2,xmm5 + pandn xmm1,XMMWORD PTR[((256+16))+rsp] + movdqa xmm3,xmm5 + pand xmm2,XMMWORD PTR[448+rsp] + pand xmm3,XMMWORD PTR[((448+16))+rsp] + por xmm2,xmm0 + por xmm3,xmm1 + + movdqa xmm0,xmm4 + movdqa xmm1,xmm4 + pandn xmm0,xmm2 + movdqa xmm2,xmm4 + pandn xmm1,xmm3 + movdqa xmm3,xmm4 + pand xmm2,XMMWORD PTR[352+rsp] + pand xmm3,XMMWORD PTR[((352+16))+rsp] + por xmm2,xmm0 + por xmm3,xmm1 + movdqu XMMWORD PTR[32+rdi],xmm2 + movdqu XMMWORD PTR[48+rdi],xmm3 + + add rsp,32*15+8 + pop r15 + pop r14 + pop r13 + pop r12 + pop rbx + pop rbp + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_ecp_nistz256_point_add_affine:: +ecp_nistz256_point_add_affine ENDP + +.text$ ENDS +END diff --git a/deps/openssl/asm_obsolete/x64-win32-masm/md5/md5-x86_64.asm b/deps/openssl/asm_obsolete/x64-win32-masm/md5/md5-x86_64.asm new file mode 100644 index 00000000000000..d2faa88e8a5f9d --- /dev/null +++ b/deps/openssl/asm_obsolete/x64-win32-masm/md5/md5-x86_64.asm @@ -0,0 +1,778 @@ +OPTION DOTNAME +.text$ SEGMENT ALIGN(256) 'CODE' +ALIGN 16 + +PUBLIC md5_block_asm_data_order + +md5_block_asm_data_order PROC PUBLIC + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_md5_block_asm_data_order:: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + + + push rbp + push rbx + push r12 + push r14 + push r15 +$L$prologue:: + + + + + mov rbp,rdi + shl rdx,6 + lea rdi,QWORD PTR[rdx*1+rsi] + mov eax,DWORD PTR[rbp] + mov ebx,DWORD PTR[4+rbp] + mov ecx,DWORD PTR[8+rbp] + mov edx,DWORD PTR[12+rbp] + + + + + + + + cmp rsi,rdi + je $L$end + + +$L$loop:: + mov r8d,eax + mov r9d,ebx + mov r14d,ecx + mov r15d,edx + mov r10d,DWORD PTR[rsi] + mov r11d,edx + xor r11d,ecx + lea eax,DWORD PTR[((-680876936))+r10*1+rax] + and r11d,ebx + xor r11d,edx + mov r10d,DWORD PTR[4+rsi] + add eax,r11d + rol eax,7 + mov r11d,ecx + add eax,ebx + xor r11d,ebx + lea edx,DWORD PTR[((-389564586))+r10*1+rdx] + and r11d,eax + xor r11d,ecx + mov r10d,DWORD PTR[8+rsi] + add edx,r11d + rol edx,12 + mov r11d,ebx + add edx,eax + xor r11d,eax + lea ecx,DWORD PTR[606105819+r10*1+rcx] + and r11d,edx + xor r11d,ebx + mov r10d,DWORD PTR[12+rsi] + add ecx,r11d + rol ecx,17 + mov r11d,eax + add ecx,edx + xor r11d,edx + lea ebx,DWORD PTR[((-1044525330))+r10*1+rbx] + and r11d,ecx + xor r11d,eax + mov r10d,DWORD PTR[16+rsi] + add ebx,r11d + rol ebx,22 + mov r11d,edx + add ebx,ecx + xor r11d,ecx + lea eax,DWORD PTR[((-176418897))+r10*1+rax] + and r11d,ebx + xor r11d,edx + mov r10d,DWORD PTR[20+rsi] + add eax,r11d + rol eax,7 + mov r11d,ecx + add eax,ebx + xor r11d,ebx + lea edx,DWORD PTR[1200080426+r10*1+rdx] + and r11d,eax + xor r11d,ecx + mov r10d,DWORD PTR[24+rsi] + add edx,r11d + rol edx,12 + mov r11d,ebx + add edx,eax + xor r11d,eax + lea ecx,DWORD PTR[((-1473231341))+r10*1+rcx] + and r11d,edx + xor r11d,ebx + mov r10d,DWORD PTR[28+rsi] + add ecx,r11d + rol ecx,17 + mov r11d,eax + add ecx,edx + xor r11d,edx + lea ebx,DWORD PTR[((-45705983))+r10*1+rbx] + and r11d,ecx + xor r11d,eax + mov r10d,DWORD PTR[32+rsi] + add ebx,r11d + rol ebx,22 + mov r11d,edx + add ebx,ecx + xor r11d,ecx + lea eax,DWORD PTR[1770035416+r10*1+rax] + and r11d,ebx + xor r11d,edx + mov r10d,DWORD PTR[36+rsi] + add eax,r11d + rol eax,7 + mov r11d,ecx + add eax,ebx + xor r11d,ebx + lea edx,DWORD PTR[((-1958414417))+r10*1+rdx] + and r11d,eax + xor r11d,ecx + mov r10d,DWORD PTR[40+rsi] + add edx,r11d + rol edx,12 + mov r11d,ebx + add edx,eax + xor r11d,eax + lea ecx,DWORD PTR[((-42063))+r10*1+rcx] + and r11d,edx + xor r11d,ebx + mov r10d,DWORD PTR[44+rsi] + add ecx,r11d + rol ecx,17 + mov r11d,eax + add ecx,edx + xor r11d,edx + lea ebx,DWORD PTR[((-1990404162))+r10*1+rbx] + and r11d,ecx + xor r11d,eax + mov r10d,DWORD PTR[48+rsi] + add ebx,r11d + rol ebx,22 + mov r11d,edx + add ebx,ecx + xor r11d,ecx + lea eax,DWORD PTR[1804603682+r10*1+rax] + and r11d,ebx + xor r11d,edx + mov r10d,DWORD PTR[52+rsi] + add eax,r11d + rol eax,7 + mov r11d,ecx + add eax,ebx + xor r11d,ebx + lea edx,DWORD PTR[((-40341101))+r10*1+rdx] + and r11d,eax + xor r11d,ecx + mov r10d,DWORD PTR[56+rsi] + add edx,r11d + rol edx,12 + mov r11d,ebx + add edx,eax + xor r11d,eax + lea ecx,DWORD PTR[((-1502002290))+r10*1+rcx] + and r11d,edx + xor r11d,ebx + mov r10d,DWORD PTR[60+rsi] + add ecx,r11d + rol ecx,17 + mov r11d,eax + add ecx,edx + xor r11d,edx + lea ebx,DWORD PTR[1236535329+r10*1+rbx] + and r11d,ecx + xor r11d,eax + mov r10d,DWORD PTR[rsi] + add ebx,r11d + rol ebx,22 + mov r11d,edx + add ebx,ecx + mov r10d,DWORD PTR[4+rsi] + mov r11d,edx + mov r12d,edx + not r11d + lea eax,DWORD PTR[((-165796510))+r10*1+rax] + and r12d,ebx + and r11d,ecx + mov r10d,DWORD PTR[24+rsi] + or r12d,r11d + mov r11d,ecx + add eax,r12d + mov r12d,ecx + rol eax,5 + add eax,ebx + not r11d + lea edx,DWORD PTR[((-1069501632))+r10*1+rdx] + and r12d,eax + and r11d,ebx + mov r10d,DWORD PTR[44+rsi] + or r12d,r11d + mov r11d,ebx + add edx,r12d + mov r12d,ebx + rol edx,9 + add edx,eax + not r11d + lea ecx,DWORD PTR[643717713+r10*1+rcx] + and r12d,edx + and r11d,eax + mov r10d,DWORD PTR[rsi] + or r12d,r11d + mov r11d,eax + add ecx,r12d + mov r12d,eax + rol ecx,14 + add ecx,edx + not r11d + lea ebx,DWORD PTR[((-373897302))+r10*1+rbx] + and r12d,ecx + and r11d,edx + mov r10d,DWORD PTR[20+rsi] + or r12d,r11d + mov r11d,edx + add ebx,r12d + mov r12d,edx + rol ebx,20 + add ebx,ecx + not r11d + lea eax,DWORD PTR[((-701558691))+r10*1+rax] + and r12d,ebx + and r11d,ecx + mov r10d,DWORD PTR[40+rsi] + or r12d,r11d + mov r11d,ecx + add eax,r12d + mov r12d,ecx + rol eax,5 + add eax,ebx + not r11d + lea edx,DWORD PTR[38016083+r10*1+rdx] + and r12d,eax + and r11d,ebx + mov r10d,DWORD PTR[60+rsi] + or r12d,r11d + mov r11d,ebx + add edx,r12d + mov r12d,ebx + rol edx,9 + add edx,eax + not r11d + lea ecx,DWORD PTR[((-660478335))+r10*1+rcx] + and r12d,edx + and r11d,eax + mov r10d,DWORD PTR[16+rsi] + or r12d,r11d + mov r11d,eax + add ecx,r12d + mov r12d,eax + rol ecx,14 + add ecx,edx + not r11d + lea ebx,DWORD PTR[((-405537848))+r10*1+rbx] + and r12d,ecx + and r11d,edx + mov r10d,DWORD PTR[36+rsi] + or r12d,r11d + mov r11d,edx + add ebx,r12d + mov r12d,edx + rol ebx,20 + add ebx,ecx + not r11d + lea eax,DWORD PTR[568446438+r10*1+rax] + and r12d,ebx + and r11d,ecx + mov r10d,DWORD PTR[56+rsi] + or r12d,r11d + mov r11d,ecx + add eax,r12d + mov r12d,ecx + rol eax,5 + add eax,ebx + not r11d + lea edx,DWORD PTR[((-1019803690))+r10*1+rdx] + and r12d,eax + and r11d,ebx + mov r10d,DWORD PTR[12+rsi] + or r12d,r11d + mov r11d,ebx + add edx,r12d + mov r12d,ebx + rol edx,9 + add edx,eax + not r11d + lea ecx,DWORD PTR[((-187363961))+r10*1+rcx] + and r12d,edx + and r11d,eax + mov r10d,DWORD PTR[32+rsi] + or r12d,r11d + mov r11d,eax + add ecx,r12d + mov r12d,eax + rol ecx,14 + add ecx,edx + not r11d + lea ebx,DWORD PTR[1163531501+r10*1+rbx] + and r12d,ecx + and r11d,edx + mov r10d,DWORD PTR[52+rsi] + or r12d,r11d + mov r11d,edx + add ebx,r12d + mov r12d,edx + rol ebx,20 + add ebx,ecx + not r11d + lea eax,DWORD PTR[((-1444681467))+r10*1+rax] + and r12d,ebx + and r11d,ecx + mov r10d,DWORD PTR[8+rsi] + or r12d,r11d + mov r11d,ecx + add eax,r12d + mov r12d,ecx + rol eax,5 + add eax,ebx + not r11d + lea edx,DWORD PTR[((-51403784))+r10*1+rdx] + and r12d,eax + and r11d,ebx + mov r10d,DWORD PTR[28+rsi] + or r12d,r11d + mov r11d,ebx + add edx,r12d + mov r12d,ebx + rol edx,9 + add edx,eax + not r11d + lea ecx,DWORD PTR[1735328473+r10*1+rcx] + and r12d,edx + and r11d,eax + mov r10d,DWORD PTR[48+rsi] + or r12d,r11d + mov r11d,eax + add ecx,r12d + mov r12d,eax + rol ecx,14 + add ecx,edx + not r11d + lea ebx,DWORD PTR[((-1926607734))+r10*1+rbx] + and r12d,ecx + and r11d,edx + mov r10d,DWORD PTR[rsi] + or r12d,r11d + mov r11d,edx + add ebx,r12d + mov r12d,edx + rol ebx,20 + add ebx,ecx + mov r10d,DWORD PTR[20+rsi] + mov r11d,ecx + lea eax,DWORD PTR[((-378558))+r10*1+rax] + mov r10d,DWORD PTR[32+rsi] + xor r11d,edx + xor r11d,ebx + add eax,r11d + rol eax,4 + mov r11d,ebx + add eax,ebx + lea edx,DWORD PTR[((-2022574463))+r10*1+rdx] + mov r10d,DWORD PTR[44+rsi] + xor r11d,ecx + xor r11d,eax + add edx,r11d + rol edx,11 + mov r11d,eax + add edx,eax + lea ecx,DWORD PTR[1839030562+r10*1+rcx] + mov r10d,DWORD PTR[56+rsi] + xor r11d,ebx + xor r11d,edx + add ecx,r11d + rol ecx,16 + mov r11d,edx + add ecx,edx + lea ebx,DWORD PTR[((-35309556))+r10*1+rbx] + mov r10d,DWORD PTR[4+rsi] + xor r11d,eax + xor r11d,ecx + add ebx,r11d + rol ebx,23 + mov r11d,ecx + add ebx,ecx + lea eax,DWORD PTR[((-1530992060))+r10*1+rax] + mov r10d,DWORD PTR[16+rsi] + xor r11d,edx + xor r11d,ebx + add eax,r11d + rol eax,4 + mov r11d,ebx + add eax,ebx + lea edx,DWORD PTR[1272893353+r10*1+rdx] + mov r10d,DWORD PTR[28+rsi] + xor r11d,ecx + xor r11d,eax + add edx,r11d + rol edx,11 + mov r11d,eax + add edx,eax + lea ecx,DWORD PTR[((-155497632))+r10*1+rcx] + mov r10d,DWORD PTR[40+rsi] + xor r11d,ebx + xor r11d,edx + add ecx,r11d + rol ecx,16 + mov r11d,edx + add ecx,edx + lea ebx,DWORD PTR[((-1094730640))+r10*1+rbx] + mov r10d,DWORD PTR[52+rsi] + xor r11d,eax + xor r11d,ecx + add ebx,r11d + rol ebx,23 + mov r11d,ecx + add ebx,ecx + lea eax,DWORD PTR[681279174+r10*1+rax] + mov r10d,DWORD PTR[rsi] + xor r11d,edx + xor r11d,ebx + add eax,r11d + rol eax,4 + mov r11d,ebx + add eax,ebx + lea edx,DWORD PTR[((-358537222))+r10*1+rdx] + mov r10d,DWORD PTR[12+rsi] + xor r11d,ecx + xor r11d,eax + add edx,r11d + rol edx,11 + mov r11d,eax + add edx,eax + lea ecx,DWORD PTR[((-722521979))+r10*1+rcx] + mov r10d,DWORD PTR[24+rsi] + xor r11d,ebx + xor r11d,edx + add ecx,r11d + rol ecx,16 + mov r11d,edx + add ecx,edx + lea ebx,DWORD PTR[76029189+r10*1+rbx] + mov r10d,DWORD PTR[36+rsi] + xor r11d,eax + xor r11d,ecx + add ebx,r11d + rol ebx,23 + mov r11d,ecx + add ebx,ecx + lea eax,DWORD PTR[((-640364487))+r10*1+rax] + mov r10d,DWORD PTR[48+rsi] + xor r11d,edx + xor r11d,ebx + add eax,r11d + rol eax,4 + mov r11d,ebx + add eax,ebx + lea edx,DWORD PTR[((-421815835))+r10*1+rdx] + mov r10d,DWORD PTR[60+rsi] + xor r11d,ecx + xor r11d,eax + add edx,r11d + rol edx,11 + mov r11d,eax + add edx,eax + lea ecx,DWORD PTR[530742520+r10*1+rcx] + mov r10d,DWORD PTR[8+rsi] + xor r11d,ebx + xor r11d,edx + add ecx,r11d + rol ecx,16 + mov r11d,edx + add ecx,edx + lea ebx,DWORD PTR[((-995338651))+r10*1+rbx] + mov r10d,DWORD PTR[rsi] + xor r11d,eax + xor r11d,ecx + add ebx,r11d + rol ebx,23 + mov r11d,ecx + add ebx,ecx + mov r10d,DWORD PTR[rsi] + mov r11d,0ffffffffh + xor r11d,edx + lea eax,DWORD PTR[((-198630844))+r10*1+rax] + or r11d,ebx + xor r11d,ecx + add eax,r11d + mov r10d,DWORD PTR[28+rsi] + mov r11d,0ffffffffh + rol eax,6 + xor r11d,ecx + add eax,ebx + lea edx,DWORD PTR[1126891415+r10*1+rdx] + or r11d,eax + xor r11d,ebx + add edx,r11d + mov r10d,DWORD PTR[56+rsi] + mov r11d,0ffffffffh + rol edx,10 + xor r11d,ebx + add edx,eax + lea ecx,DWORD PTR[((-1416354905))+r10*1+rcx] + or r11d,edx + xor r11d,eax + add ecx,r11d + mov r10d,DWORD PTR[20+rsi] + mov r11d,0ffffffffh + rol ecx,15 + xor r11d,eax + add ecx,edx + lea ebx,DWORD PTR[((-57434055))+r10*1+rbx] + or r11d,ecx + xor r11d,edx + add ebx,r11d + mov r10d,DWORD PTR[48+rsi] + mov r11d,0ffffffffh + rol ebx,21 + xor r11d,edx + add ebx,ecx + lea eax,DWORD PTR[1700485571+r10*1+rax] + or r11d,ebx + xor r11d,ecx + add eax,r11d + mov r10d,DWORD PTR[12+rsi] + mov r11d,0ffffffffh + rol eax,6 + xor r11d,ecx + add eax,ebx + lea edx,DWORD PTR[((-1894986606))+r10*1+rdx] + or r11d,eax + xor r11d,ebx + add edx,r11d + mov r10d,DWORD PTR[40+rsi] + mov r11d,0ffffffffh + rol edx,10 + xor r11d,ebx + add edx,eax + lea ecx,DWORD PTR[((-1051523))+r10*1+rcx] + or r11d,edx + xor r11d,eax + add ecx,r11d + mov r10d,DWORD PTR[4+rsi] + mov r11d,0ffffffffh + rol ecx,15 + xor r11d,eax + add ecx,edx + lea ebx,DWORD PTR[((-2054922799))+r10*1+rbx] + or r11d,ecx + xor r11d,edx + add ebx,r11d + mov r10d,DWORD PTR[32+rsi] + mov r11d,0ffffffffh + rol ebx,21 + xor r11d,edx + add ebx,ecx + lea eax,DWORD PTR[1873313359+r10*1+rax] + or r11d,ebx + xor r11d,ecx + add eax,r11d + mov r10d,DWORD PTR[60+rsi] + mov r11d,0ffffffffh + rol eax,6 + xor r11d,ecx + add eax,ebx + lea edx,DWORD PTR[((-30611744))+r10*1+rdx] + or r11d,eax + xor r11d,ebx + add edx,r11d + mov r10d,DWORD PTR[24+rsi] + mov r11d,0ffffffffh + rol edx,10 + xor r11d,ebx + add edx,eax + lea ecx,DWORD PTR[((-1560198380))+r10*1+rcx] + or r11d,edx + xor r11d,eax + add ecx,r11d + mov r10d,DWORD PTR[52+rsi] + mov r11d,0ffffffffh + rol ecx,15 + xor r11d,eax + add ecx,edx + lea ebx,DWORD PTR[1309151649+r10*1+rbx] + or r11d,ecx + xor r11d,edx + add ebx,r11d + mov r10d,DWORD PTR[16+rsi] + mov r11d,0ffffffffh + rol ebx,21 + xor r11d,edx + add ebx,ecx + lea eax,DWORD PTR[((-145523070))+r10*1+rax] + or r11d,ebx + xor r11d,ecx + add eax,r11d + mov r10d,DWORD PTR[44+rsi] + mov r11d,0ffffffffh + rol eax,6 + xor r11d,ecx + add eax,ebx + lea edx,DWORD PTR[((-1120210379))+r10*1+rdx] + or r11d,eax + xor r11d,ebx + add edx,r11d + mov r10d,DWORD PTR[8+rsi] + mov r11d,0ffffffffh + rol edx,10 + xor r11d,ebx + add edx,eax + lea ecx,DWORD PTR[718787259+r10*1+rcx] + or r11d,edx + xor r11d,eax + add ecx,r11d + mov r10d,DWORD PTR[36+rsi] + mov r11d,0ffffffffh + rol ecx,15 + xor r11d,eax + add ecx,edx + lea ebx,DWORD PTR[((-343485551))+r10*1+rbx] + or r11d,ecx + xor r11d,edx + add ebx,r11d + mov r10d,DWORD PTR[rsi] + mov r11d,0ffffffffh + rol ebx,21 + xor r11d,edx + add ebx,ecx + + add eax,r8d + add ebx,r9d + add ecx,r14d + add edx,r15d + + + add rsi,64 + cmp rsi,rdi + jb $L$loop + + +$L$end:: + mov DWORD PTR[rbp],eax + mov DWORD PTR[4+rbp],ebx + mov DWORD PTR[8+rbp],ecx + mov DWORD PTR[12+rbp],edx + + mov r15,QWORD PTR[rsp] + mov r14,QWORD PTR[8+rsp] + mov r12,QWORD PTR[16+rsp] + mov rbx,QWORD PTR[24+rsp] + mov rbp,QWORD PTR[32+rsp] + add rsp,40 +$L$epilogue:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_md5_block_asm_data_order:: +md5_block_asm_data_order ENDP +EXTERN __imp_RtlVirtualUnwind:NEAR + +ALIGN 16 +se_handler PROC PRIVATE + push rsi + push rdi + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + pushfq + sub rsp,64 + + mov rax,QWORD PTR[120+r8] + mov rbx,QWORD PTR[248+r8] + + lea r10,QWORD PTR[$L$prologue] + cmp rbx,r10 + jb $L$in_prologue + + mov rax,QWORD PTR[152+r8] + + lea r10,QWORD PTR[$L$epilogue] + cmp rbx,r10 + jae $L$in_prologue + + lea rax,QWORD PTR[40+rax] + + mov rbp,QWORD PTR[((-8))+rax] + mov rbx,QWORD PTR[((-16))+rax] + mov r12,QWORD PTR[((-24))+rax] + mov r14,QWORD PTR[((-32))+rax] + mov r15,QWORD PTR[((-40))+rax] + mov QWORD PTR[144+r8],rbx + mov QWORD PTR[160+r8],rbp + mov QWORD PTR[216+r8],r12 + mov QWORD PTR[232+r8],r14 + mov QWORD PTR[240+r8],r15 + +$L$in_prologue:: + mov rdi,QWORD PTR[8+rax] + mov rsi,QWORD PTR[16+rax] + mov QWORD PTR[152+r8],rax + mov QWORD PTR[168+r8],rsi + mov QWORD PTR[176+r8],rdi + + mov rdi,QWORD PTR[40+r9] + mov rsi,r8 + mov ecx,154 + DD 0a548f3fch + + mov rsi,r9 + xor rcx,rcx + mov rdx,QWORD PTR[8+rsi] + mov r8,QWORD PTR[rsi] + mov r9,QWORD PTR[16+rsi] + mov r10,QWORD PTR[40+rsi] + lea r11,QWORD PTR[56+rsi] + lea r12,QWORD PTR[24+rsi] + mov QWORD PTR[32+rsp],r10 + mov QWORD PTR[40+rsp],r11 + mov QWORD PTR[48+rsp],r12 + mov QWORD PTR[56+rsp],rcx + call QWORD PTR[__imp_RtlVirtualUnwind] + + mov eax,1 + add rsp,64 + popfq + pop r15 + pop r14 + pop r13 + pop r12 + pop rbp + pop rbx + pop rdi + pop rsi + DB 0F3h,0C3h ;repret +se_handler ENDP + +.text$ ENDS +.pdata SEGMENT READONLY ALIGN(4) +ALIGN 4 + DD imagerel $L$SEH_begin_md5_block_asm_data_order + DD imagerel $L$SEH_end_md5_block_asm_data_order + DD imagerel $L$SEH_info_md5_block_asm_data_order + +.pdata ENDS +.xdata SEGMENT READONLY ALIGN(8) +ALIGN 8 +$L$SEH_info_md5_block_asm_data_order:: +DB 9,0,0,0 + DD imagerel se_handler + +.xdata ENDS +END diff --git a/deps/openssl/asm_obsolete/x64-win32-masm/modes/aesni-gcm-x86_64.asm b/deps/openssl/asm_obsolete/x64-win32-masm/modes/aesni-gcm-x86_64.asm new file mode 100644 index 00000000000000..828be8d124e34e --- /dev/null +++ b/deps/openssl/asm_obsolete/x64-win32-masm/modes/aesni-gcm-x86_64.asm @@ -0,0 +1,19 @@ +OPTION DOTNAME +.text$ SEGMENT ALIGN(256) 'CODE' + +PUBLIC aesni_gcm_encrypt + +aesni_gcm_encrypt PROC PUBLIC + xor eax,eax + DB 0F3h,0C3h ;repret +aesni_gcm_encrypt ENDP + +PUBLIC aesni_gcm_decrypt + +aesni_gcm_decrypt PROC PUBLIC + xor eax,eax + DB 0F3h,0C3h ;repret +aesni_gcm_decrypt ENDP + +.text$ ENDS +END diff --git a/deps/openssl/asm_obsolete/x64-win32-masm/modes/ghash-x86_64.asm b/deps/openssl/asm_obsolete/x64-win32-masm/modes/ghash-x86_64.asm new file mode 100644 index 00000000000000..9993d75939bc58 --- /dev/null +++ b/deps/openssl/asm_obsolete/x64-win32-masm/modes/ghash-x86_64.asm @@ -0,0 +1,1510 @@ +OPTION DOTNAME +.text$ SEGMENT ALIGN(256) 'CODE' +EXTERN OPENSSL_ia32cap_P:NEAR + +PUBLIC gcm_gmult_4bit + +ALIGN 16 +gcm_gmult_4bit PROC PUBLIC + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_gcm_gmult_4bit:: + mov rdi,rcx + mov rsi,rdx + + + push rbx + push rbp + push r12 +$L$gmult_prologue:: + + movzx r8,BYTE PTR[15+rdi] + lea r11,QWORD PTR[$L$rem_4bit] + xor rax,rax + xor rbx,rbx + mov al,r8b + mov bl,r8b + shl al,4 + mov rcx,14 + mov r8,QWORD PTR[8+rax*1+rsi] + mov r9,QWORD PTR[rax*1+rsi] + and bl,0f0h + mov rdx,r8 + jmp $L$oop1 + +ALIGN 16 +$L$oop1:: + shr r8,4 + and rdx,0fh + mov r10,r9 + mov al,BYTE PTR[rcx*1+rdi] + shr r9,4 + xor r8,QWORD PTR[8+rbx*1+rsi] + shl r10,60 + xor r9,QWORD PTR[rbx*1+rsi] + mov bl,al + xor r9,QWORD PTR[rdx*8+r11] + mov rdx,r8 + shl al,4 + xor r8,r10 + dec rcx + js $L$break1 + + shr r8,4 + and rdx,0fh + mov r10,r9 + shr r9,4 + xor r8,QWORD PTR[8+rax*1+rsi] + shl r10,60 + xor r9,QWORD PTR[rax*1+rsi] + and bl,0f0h + xor r9,QWORD PTR[rdx*8+r11] + mov rdx,r8 + xor r8,r10 + jmp $L$oop1 + +ALIGN 16 +$L$break1:: + shr r8,4 + and rdx,0fh + mov r10,r9 + shr r9,4 + xor r8,QWORD PTR[8+rax*1+rsi] + shl r10,60 + xor r9,QWORD PTR[rax*1+rsi] + and bl,0f0h + xor r9,QWORD PTR[rdx*8+r11] + mov rdx,r8 + xor r8,r10 + + shr r8,4 + and rdx,0fh + mov r10,r9 + shr r9,4 + xor r8,QWORD PTR[8+rbx*1+rsi] + shl r10,60 + xor r9,QWORD PTR[rbx*1+rsi] + xor r8,r10 + xor r9,QWORD PTR[rdx*8+r11] + + bswap r8 + bswap r9 + mov QWORD PTR[8+rdi],r8 + mov QWORD PTR[rdi],r9 + + mov rbx,QWORD PTR[16+rsp] + lea rsp,QWORD PTR[24+rsp] +$L$gmult_epilogue:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_gcm_gmult_4bit:: +gcm_gmult_4bit ENDP +PUBLIC gcm_ghash_4bit + +ALIGN 16 +gcm_ghash_4bit PROC PUBLIC + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_gcm_ghash_4bit:: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + mov rcx,r9 + + + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + sub rsp,280 +$L$ghash_prologue:: + mov r14,rdx + mov r15,rcx + sub rsi,-128 + lea rbp,QWORD PTR[((16+128))+rsp] + xor edx,edx + mov r8,QWORD PTR[((0+0-128))+rsi] + mov rax,QWORD PTR[((0+8-128))+rsi] + mov dl,al + shr rax,4 + mov r10,r8 + shr r8,4 + mov r9,QWORD PTR[((16+0-128))+rsi] + shl dl,4 + mov rbx,QWORD PTR[((16+8-128))+rsi] + shl r10,60 + mov BYTE PTR[rsp],dl + or rax,r10 + mov dl,bl + shr rbx,4 + mov r10,r9 + shr r9,4 + mov QWORD PTR[rbp],r8 + mov r8,QWORD PTR[((32+0-128))+rsi] + shl dl,4 + mov QWORD PTR[((0-128))+rbp],rax + mov rax,QWORD PTR[((32+8-128))+rsi] + shl r10,60 + mov BYTE PTR[1+rsp],dl + or rbx,r10 + mov dl,al + shr rax,4 + mov r10,r8 + shr r8,4 + mov QWORD PTR[8+rbp],r9 + mov r9,QWORD PTR[((48+0-128))+rsi] + shl dl,4 + mov QWORD PTR[((8-128))+rbp],rbx + mov rbx,QWORD PTR[((48+8-128))+rsi] + shl r10,60 + mov BYTE PTR[2+rsp],dl + or rax,r10 + mov dl,bl + shr rbx,4 + mov r10,r9 + shr r9,4 + mov QWORD PTR[16+rbp],r8 + mov r8,QWORD PTR[((64+0-128))+rsi] + shl dl,4 + mov QWORD PTR[((16-128))+rbp],rax + mov rax,QWORD PTR[((64+8-128))+rsi] + shl r10,60 + mov BYTE PTR[3+rsp],dl + or rbx,r10 + mov dl,al + shr rax,4 + mov r10,r8 + shr r8,4 + mov QWORD PTR[24+rbp],r9 + mov r9,QWORD PTR[((80+0-128))+rsi] + shl dl,4 + mov QWORD PTR[((24-128))+rbp],rbx + mov rbx,QWORD PTR[((80+8-128))+rsi] + shl r10,60 + mov BYTE PTR[4+rsp],dl + or rax,r10 + mov dl,bl + shr rbx,4 + mov r10,r9 + shr r9,4 + mov QWORD PTR[32+rbp],r8 + mov r8,QWORD PTR[((96+0-128))+rsi] + shl dl,4 + mov QWORD PTR[((32-128))+rbp],rax + mov rax,QWORD PTR[((96+8-128))+rsi] + shl r10,60 + mov BYTE PTR[5+rsp],dl + or rbx,r10 + mov dl,al + shr rax,4 + mov r10,r8 + shr r8,4 + mov QWORD PTR[40+rbp],r9 + mov r9,QWORD PTR[((112+0-128))+rsi] + shl dl,4 + mov QWORD PTR[((40-128))+rbp],rbx + mov rbx,QWORD PTR[((112+8-128))+rsi] + shl r10,60 + mov BYTE PTR[6+rsp],dl + or rax,r10 + mov dl,bl + shr rbx,4 + mov r10,r9 + shr r9,4 + mov QWORD PTR[48+rbp],r8 + mov r8,QWORD PTR[((128+0-128))+rsi] + shl dl,4 + mov QWORD PTR[((48-128))+rbp],rax + mov rax,QWORD PTR[((128+8-128))+rsi] + shl r10,60 + mov BYTE PTR[7+rsp],dl + or rbx,r10 + mov dl,al + shr rax,4 + mov r10,r8 + shr r8,4 + mov QWORD PTR[56+rbp],r9 + mov r9,QWORD PTR[((144+0-128))+rsi] + shl dl,4 + mov QWORD PTR[((56-128))+rbp],rbx + mov rbx,QWORD PTR[((144+8-128))+rsi] + shl r10,60 + mov BYTE PTR[8+rsp],dl + or rax,r10 + mov dl,bl + shr rbx,4 + mov r10,r9 + shr r9,4 + mov QWORD PTR[64+rbp],r8 + mov r8,QWORD PTR[((160+0-128))+rsi] + shl dl,4 + mov QWORD PTR[((64-128))+rbp],rax + mov rax,QWORD PTR[((160+8-128))+rsi] + shl r10,60 + mov BYTE PTR[9+rsp],dl + or rbx,r10 + mov dl,al + shr rax,4 + mov r10,r8 + shr r8,4 + mov QWORD PTR[72+rbp],r9 + mov r9,QWORD PTR[((176+0-128))+rsi] + shl dl,4 + mov QWORD PTR[((72-128))+rbp],rbx + mov rbx,QWORD PTR[((176+8-128))+rsi] + shl r10,60 + mov BYTE PTR[10+rsp],dl + or rax,r10 + mov dl,bl + shr rbx,4 + mov r10,r9 + shr r9,4 + mov QWORD PTR[80+rbp],r8 + mov r8,QWORD PTR[((192+0-128))+rsi] + shl dl,4 + mov QWORD PTR[((80-128))+rbp],rax + mov rax,QWORD PTR[((192+8-128))+rsi] + shl r10,60 + mov BYTE PTR[11+rsp],dl + or rbx,r10 + mov dl,al + shr rax,4 + mov r10,r8 + shr r8,4 + mov QWORD PTR[88+rbp],r9 + mov r9,QWORD PTR[((208+0-128))+rsi] + shl dl,4 + mov QWORD PTR[((88-128))+rbp],rbx + mov rbx,QWORD PTR[((208+8-128))+rsi] + shl r10,60 + mov BYTE PTR[12+rsp],dl + or rax,r10 + mov dl,bl + shr rbx,4 + mov r10,r9 + shr r9,4 + mov QWORD PTR[96+rbp],r8 + mov r8,QWORD PTR[((224+0-128))+rsi] + shl dl,4 + mov QWORD PTR[((96-128))+rbp],rax + mov rax,QWORD PTR[((224+8-128))+rsi] + shl r10,60 + mov BYTE PTR[13+rsp],dl + or rbx,r10 + mov dl,al + shr rax,4 + mov r10,r8 + shr r8,4 + mov QWORD PTR[104+rbp],r9 + mov r9,QWORD PTR[((240+0-128))+rsi] + shl dl,4 + mov QWORD PTR[((104-128))+rbp],rbx + mov rbx,QWORD PTR[((240+8-128))+rsi] + shl r10,60 + mov BYTE PTR[14+rsp],dl + or rax,r10 + mov dl,bl + shr rbx,4 + mov r10,r9 + shr r9,4 + mov QWORD PTR[112+rbp],r8 + shl dl,4 + mov QWORD PTR[((112-128))+rbp],rax + shl r10,60 + mov BYTE PTR[15+rsp],dl + or rbx,r10 + mov QWORD PTR[120+rbp],r9 + mov QWORD PTR[((120-128))+rbp],rbx + add rsi,-128 + mov r8,QWORD PTR[8+rdi] + mov r9,QWORD PTR[rdi] + add r15,r14 + lea r11,QWORD PTR[$L$rem_8bit] + jmp $L$outer_loop +ALIGN 16 +$L$outer_loop:: + xor r9,QWORD PTR[r14] + mov rdx,QWORD PTR[8+r14] + lea r14,QWORD PTR[16+r14] + xor rdx,r8 + mov QWORD PTR[rdi],r9 + mov QWORD PTR[8+rdi],rdx + shr rdx,32 + xor rax,rax + rol edx,8 + mov al,dl + movzx ebx,dl + shl al,4 + shr ebx,4 + rol edx,8 + mov r8,QWORD PTR[8+rax*1+rsi] + mov r9,QWORD PTR[rax*1+rsi] + mov al,dl + movzx ecx,dl + shl al,4 + movzx r12,BYTE PTR[rbx*1+rsp] + shr ecx,4 + xor r12,r8 + mov r10,r9 + shr r8,8 + movzx r12,r12b + shr r9,8 + xor r8,QWORD PTR[((-128))+rbx*8+rbp] + shl r10,56 + xor r9,QWORD PTR[rbx*8+rbp] + rol edx,8 + xor r8,QWORD PTR[8+rax*1+rsi] + xor r9,QWORD PTR[rax*1+rsi] + mov al,dl + xor r8,r10 + movzx r12,WORD PTR[r12*2+r11] + movzx ebx,dl + shl al,4 + movzx r13,BYTE PTR[rcx*1+rsp] + shr ebx,4 + shl r12,48 + xor r13,r8 + mov r10,r9 + xor r9,r12 + shr r8,8 + movzx r13,r13b + shr r9,8 + xor r8,QWORD PTR[((-128))+rcx*8+rbp] + shl r10,56 + xor r9,QWORD PTR[rcx*8+rbp] + rol edx,8 + xor r8,QWORD PTR[8+rax*1+rsi] + xor r9,QWORD PTR[rax*1+rsi] + mov al,dl + xor r8,r10 + movzx r13,WORD PTR[r13*2+r11] + movzx ecx,dl + shl al,4 + movzx r12,BYTE PTR[rbx*1+rsp] + shr ecx,4 + shl r13,48 + xor r12,r8 + mov r10,r9 + xor r9,r13 + shr r8,8 + movzx r12,r12b + mov edx,DWORD PTR[8+rdi] + shr r9,8 + xor r8,QWORD PTR[((-128))+rbx*8+rbp] + shl r10,56 + xor r9,QWORD PTR[rbx*8+rbp] + rol edx,8 + xor r8,QWORD PTR[8+rax*1+rsi] + xor r9,QWORD PTR[rax*1+rsi] + mov al,dl + xor r8,r10 + movzx r12,WORD PTR[r12*2+r11] + movzx ebx,dl + shl al,4 + movzx r13,BYTE PTR[rcx*1+rsp] + shr ebx,4 + shl r12,48 + xor r13,r8 + mov r10,r9 + xor r9,r12 + shr r8,8 + movzx r13,r13b + shr r9,8 + xor r8,QWORD PTR[((-128))+rcx*8+rbp] + shl r10,56 + xor r9,QWORD PTR[rcx*8+rbp] + rol edx,8 + xor r8,QWORD PTR[8+rax*1+rsi] + xor r9,QWORD PTR[rax*1+rsi] + mov al,dl + xor r8,r10 + movzx r13,WORD PTR[r13*2+r11] + movzx ecx,dl + shl al,4 + movzx r12,BYTE PTR[rbx*1+rsp] + shr ecx,4 + shl r13,48 + xor r12,r8 + mov r10,r9 + xor r9,r13 + shr r8,8 + movzx r12,r12b + shr r9,8 + xor r8,QWORD PTR[((-128))+rbx*8+rbp] + shl r10,56 + xor r9,QWORD PTR[rbx*8+rbp] + rol edx,8 + xor r8,QWORD PTR[8+rax*1+rsi] + xor r9,QWORD PTR[rax*1+rsi] + mov al,dl + xor r8,r10 + movzx r12,WORD PTR[r12*2+r11] + movzx ebx,dl + shl al,4 + movzx r13,BYTE PTR[rcx*1+rsp] + shr ebx,4 + shl r12,48 + xor r13,r8 + mov r10,r9 + xor r9,r12 + shr r8,8 + movzx r13,r13b + shr r9,8 + xor r8,QWORD PTR[((-128))+rcx*8+rbp] + shl r10,56 + xor r9,QWORD PTR[rcx*8+rbp] + rol edx,8 + xor r8,QWORD PTR[8+rax*1+rsi] + xor r9,QWORD PTR[rax*1+rsi] + mov al,dl + xor r8,r10 + movzx r13,WORD PTR[r13*2+r11] + movzx ecx,dl + shl al,4 + movzx r12,BYTE PTR[rbx*1+rsp] + shr ecx,4 + shl r13,48 + xor r12,r8 + mov r10,r9 + xor r9,r13 + shr r8,8 + movzx r12,r12b + mov edx,DWORD PTR[4+rdi] + shr r9,8 + xor r8,QWORD PTR[((-128))+rbx*8+rbp] + shl r10,56 + xor r9,QWORD PTR[rbx*8+rbp] + rol edx,8 + xor r8,QWORD PTR[8+rax*1+rsi] + xor r9,QWORD PTR[rax*1+rsi] + mov al,dl + xor r8,r10 + movzx r12,WORD PTR[r12*2+r11] + movzx ebx,dl + shl al,4 + movzx r13,BYTE PTR[rcx*1+rsp] + shr ebx,4 + shl r12,48 + xor r13,r8 + mov r10,r9 + xor r9,r12 + shr r8,8 + movzx r13,r13b + shr r9,8 + xor r8,QWORD PTR[((-128))+rcx*8+rbp] + shl r10,56 + xor r9,QWORD PTR[rcx*8+rbp] + rol edx,8 + xor r8,QWORD PTR[8+rax*1+rsi] + xor r9,QWORD PTR[rax*1+rsi] + mov al,dl + xor r8,r10 + movzx r13,WORD PTR[r13*2+r11] + movzx ecx,dl + shl al,4 + movzx r12,BYTE PTR[rbx*1+rsp] + shr ecx,4 + shl r13,48 + xor r12,r8 + mov r10,r9 + xor r9,r13 + shr r8,8 + movzx r12,r12b + shr r9,8 + xor r8,QWORD PTR[((-128))+rbx*8+rbp] + shl r10,56 + xor r9,QWORD PTR[rbx*8+rbp] + rol edx,8 + xor r8,QWORD PTR[8+rax*1+rsi] + xor r9,QWORD PTR[rax*1+rsi] + mov al,dl + xor r8,r10 + movzx r12,WORD PTR[r12*2+r11] + movzx ebx,dl + shl al,4 + movzx r13,BYTE PTR[rcx*1+rsp] + shr ebx,4 + shl r12,48 + xor r13,r8 + mov r10,r9 + xor r9,r12 + shr r8,8 + movzx r13,r13b + shr r9,8 + xor r8,QWORD PTR[((-128))+rcx*8+rbp] + shl r10,56 + xor r9,QWORD PTR[rcx*8+rbp] + rol edx,8 + xor r8,QWORD PTR[8+rax*1+rsi] + xor r9,QWORD PTR[rax*1+rsi] + mov al,dl + xor r8,r10 + movzx r13,WORD PTR[r13*2+r11] + movzx ecx,dl + shl al,4 + movzx r12,BYTE PTR[rbx*1+rsp] + shr ecx,4 + shl r13,48 + xor r12,r8 + mov r10,r9 + xor r9,r13 + shr r8,8 + movzx r12,r12b + mov edx,DWORD PTR[rdi] + shr r9,8 + xor r8,QWORD PTR[((-128))+rbx*8+rbp] + shl r10,56 + xor r9,QWORD PTR[rbx*8+rbp] + rol edx,8 + xor r8,QWORD PTR[8+rax*1+rsi] + xor r9,QWORD PTR[rax*1+rsi] + mov al,dl + xor r8,r10 + movzx r12,WORD PTR[r12*2+r11] + movzx ebx,dl + shl al,4 + movzx r13,BYTE PTR[rcx*1+rsp] + shr ebx,4 + shl r12,48 + xor r13,r8 + mov r10,r9 + xor r9,r12 + shr r8,8 + movzx r13,r13b + shr r9,8 + xor r8,QWORD PTR[((-128))+rcx*8+rbp] + shl r10,56 + xor r9,QWORD PTR[rcx*8+rbp] + rol edx,8 + xor r8,QWORD PTR[8+rax*1+rsi] + xor r9,QWORD PTR[rax*1+rsi] + mov al,dl + xor r8,r10 + movzx r13,WORD PTR[r13*2+r11] + movzx ecx,dl + shl al,4 + movzx r12,BYTE PTR[rbx*1+rsp] + shr ecx,4 + shl r13,48 + xor r12,r8 + mov r10,r9 + xor r9,r13 + shr r8,8 + movzx r12,r12b + shr r9,8 + xor r8,QWORD PTR[((-128))+rbx*8+rbp] + shl r10,56 + xor r9,QWORD PTR[rbx*8+rbp] + rol edx,8 + xor r8,QWORD PTR[8+rax*1+rsi] + xor r9,QWORD PTR[rax*1+rsi] + mov al,dl + xor r8,r10 + movzx r12,WORD PTR[r12*2+r11] + movzx ebx,dl + shl al,4 + movzx r13,BYTE PTR[rcx*1+rsp] + shr ebx,4 + shl r12,48 + xor r13,r8 + mov r10,r9 + xor r9,r12 + shr r8,8 + movzx r13,r13b + shr r9,8 + xor r8,QWORD PTR[((-128))+rcx*8+rbp] + shl r10,56 + xor r9,QWORD PTR[rcx*8+rbp] + rol edx,8 + xor r8,QWORD PTR[8+rax*1+rsi] + xor r9,QWORD PTR[rax*1+rsi] + mov al,dl + xor r8,r10 + movzx r13,WORD PTR[r13*2+r11] + movzx ecx,dl + shl al,4 + movzx r12,BYTE PTR[rbx*1+rsp] + and ecx,240 + shl r13,48 + xor r12,r8 + mov r10,r9 + xor r9,r13 + shr r8,8 + movzx r12,r12b + mov edx,DWORD PTR[((-4))+rdi] + shr r9,8 + xor r8,QWORD PTR[((-128))+rbx*8+rbp] + shl r10,56 + xor r9,QWORD PTR[rbx*8+rbp] + movzx r12,WORD PTR[r12*2+r11] + xor r8,QWORD PTR[8+rax*1+rsi] + xor r9,QWORD PTR[rax*1+rsi] + shl r12,48 + xor r8,r10 + xor r9,r12 + movzx r13,r8b + shr r8,4 + mov r10,r9 + shl r13b,4 + shr r9,4 + xor r8,QWORD PTR[8+rcx*1+rsi] + movzx r13,WORD PTR[r13*2+r11] + shl r10,60 + xor r9,QWORD PTR[rcx*1+rsi] + xor r8,r10 + shl r13,48 + bswap r8 + xor r9,r13 + bswap r9 + cmp r14,r15 + jb $L$outer_loop + mov QWORD PTR[8+rdi],r8 + mov QWORD PTR[rdi],r9 + + lea rsi,QWORD PTR[280+rsp] + mov r15,QWORD PTR[rsi] + mov r14,QWORD PTR[8+rsi] + mov r13,QWORD PTR[16+rsi] + mov r12,QWORD PTR[24+rsi] + mov rbp,QWORD PTR[32+rsi] + mov rbx,QWORD PTR[40+rsi] + lea rsp,QWORD PTR[48+rsi] +$L$ghash_epilogue:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_gcm_ghash_4bit:: +gcm_ghash_4bit ENDP +PUBLIC gcm_init_clmul + +ALIGN 16 +gcm_init_clmul PROC PUBLIC +$L$_init_clmul:: +$L$SEH_begin_gcm_init_clmul:: + +DB 048h,083h,0ech,018h +DB 00fh,029h,034h,024h + movdqu xmm2,XMMWORD PTR[rdx] + pshufd xmm2,xmm2,78 + + + pshufd xmm4,xmm2,255 + movdqa xmm3,xmm2 + psllq xmm2,1 + pxor xmm5,xmm5 + psrlq xmm3,63 + pcmpgtd xmm5,xmm4 + pslldq xmm3,8 + por xmm2,xmm3 + + + pand xmm5,XMMWORD PTR[$L$0x1c2_polynomial] + pxor xmm2,xmm5 + + + pshufd xmm6,xmm2,78 + movdqa xmm0,xmm2 + pxor xmm6,xmm2 + movdqa xmm1,xmm0 + pshufd xmm3,xmm0,78 + pxor xmm3,xmm0 +DB 102,15,58,68,194,0 +DB 102,15,58,68,202,17 +DB 102,15,58,68,222,0 + pxor xmm3,xmm0 + pxor xmm3,xmm1 + + movdqa xmm4,xmm3 + psrldq xmm3,8 + pslldq xmm4,8 + pxor xmm1,xmm3 + pxor xmm0,xmm4 + + movdqa xmm4,xmm0 + movdqa xmm3,xmm0 + psllq xmm0,5 + pxor xmm3,xmm0 + psllq xmm0,1 + pxor xmm0,xmm3 + psllq xmm0,57 + movdqa xmm3,xmm0 + pslldq xmm0,8 + psrldq xmm3,8 + pxor xmm0,xmm4 + pxor xmm1,xmm3 + + + movdqa xmm4,xmm0 + psrlq xmm0,1 + pxor xmm1,xmm4 + pxor xmm4,xmm0 + psrlq xmm0,5 + pxor xmm0,xmm4 + psrlq xmm0,1 + pxor xmm0,xmm1 + pshufd xmm3,xmm2,78 + pshufd xmm4,xmm0,78 + pxor xmm3,xmm2 + movdqu XMMWORD PTR[rcx],xmm2 + pxor xmm4,xmm0 + movdqu XMMWORD PTR[16+rcx],xmm0 +DB 102,15,58,15,227,8 + movdqu XMMWORD PTR[32+rcx],xmm4 + movdqa xmm1,xmm0 + pshufd xmm3,xmm0,78 + pxor xmm3,xmm0 +DB 102,15,58,68,194,0 +DB 102,15,58,68,202,17 +DB 102,15,58,68,222,0 + pxor xmm3,xmm0 + pxor xmm3,xmm1 + + movdqa xmm4,xmm3 + psrldq xmm3,8 + pslldq xmm4,8 + pxor xmm1,xmm3 + pxor xmm0,xmm4 + + movdqa xmm4,xmm0 + movdqa xmm3,xmm0 + psllq xmm0,5 + pxor xmm3,xmm0 + psllq xmm0,1 + pxor xmm0,xmm3 + psllq xmm0,57 + movdqa xmm3,xmm0 + pslldq xmm0,8 + psrldq xmm3,8 + pxor xmm0,xmm4 + pxor xmm1,xmm3 + + + movdqa xmm4,xmm0 + psrlq xmm0,1 + pxor xmm1,xmm4 + pxor xmm4,xmm0 + psrlq xmm0,5 + pxor xmm0,xmm4 + psrlq xmm0,1 + pxor xmm0,xmm1 + movdqa xmm5,xmm0 + movdqa xmm1,xmm0 + pshufd xmm3,xmm0,78 + pxor xmm3,xmm0 +DB 102,15,58,68,194,0 +DB 102,15,58,68,202,17 +DB 102,15,58,68,222,0 + pxor xmm3,xmm0 + pxor xmm3,xmm1 + + movdqa xmm4,xmm3 + psrldq xmm3,8 + pslldq xmm4,8 + pxor xmm1,xmm3 + pxor xmm0,xmm4 + + movdqa xmm4,xmm0 + movdqa xmm3,xmm0 + psllq xmm0,5 + pxor xmm3,xmm0 + psllq xmm0,1 + pxor xmm0,xmm3 + psllq xmm0,57 + movdqa xmm3,xmm0 + pslldq xmm0,8 + psrldq xmm3,8 + pxor xmm0,xmm4 + pxor xmm1,xmm3 + + + movdqa xmm4,xmm0 + psrlq xmm0,1 + pxor xmm1,xmm4 + pxor xmm4,xmm0 + psrlq xmm0,5 + pxor xmm0,xmm4 + psrlq xmm0,1 + pxor xmm0,xmm1 + pshufd xmm3,xmm5,78 + pshufd xmm4,xmm0,78 + pxor xmm3,xmm5 + movdqu XMMWORD PTR[48+rcx],xmm5 + pxor xmm4,xmm0 + movdqu XMMWORD PTR[64+rcx],xmm0 +DB 102,15,58,15,227,8 + movdqu XMMWORD PTR[80+rcx],xmm4 + movaps xmm6,XMMWORD PTR[rsp] + lea rsp,QWORD PTR[24+rsp] +$L$SEH_end_gcm_init_clmul:: + DB 0F3h,0C3h ;repret +gcm_init_clmul ENDP +PUBLIC gcm_gmult_clmul + +ALIGN 16 +gcm_gmult_clmul PROC PUBLIC +$L$_gmult_clmul:: + movdqu xmm0,XMMWORD PTR[rcx] + movdqa xmm5,XMMWORD PTR[$L$bswap_mask] + movdqu xmm2,XMMWORD PTR[rdx] + movdqu xmm4,XMMWORD PTR[32+rdx] +DB 102,15,56,0,197 + movdqa xmm1,xmm0 + pshufd xmm3,xmm0,78 + pxor xmm3,xmm0 +DB 102,15,58,68,194,0 +DB 102,15,58,68,202,17 +DB 102,15,58,68,220,0 + pxor xmm3,xmm0 + pxor xmm3,xmm1 + + movdqa xmm4,xmm3 + psrldq xmm3,8 + pslldq xmm4,8 + pxor xmm1,xmm3 + pxor xmm0,xmm4 + + movdqa xmm4,xmm0 + movdqa xmm3,xmm0 + psllq xmm0,5 + pxor xmm3,xmm0 + psllq xmm0,1 + pxor xmm0,xmm3 + psllq xmm0,57 + movdqa xmm3,xmm0 + pslldq xmm0,8 + psrldq xmm3,8 + pxor xmm0,xmm4 + pxor xmm1,xmm3 + + + movdqa xmm4,xmm0 + psrlq xmm0,1 + pxor xmm1,xmm4 + pxor xmm4,xmm0 + psrlq xmm0,5 + pxor xmm0,xmm4 + psrlq xmm0,1 + pxor xmm0,xmm1 +DB 102,15,56,0,197 + movdqu XMMWORD PTR[rcx],xmm0 + DB 0F3h,0C3h ;repret +gcm_gmult_clmul ENDP +PUBLIC gcm_ghash_clmul + +ALIGN 32 +gcm_ghash_clmul PROC PUBLIC +$L$_ghash_clmul:: + lea rax,QWORD PTR[((-136))+rsp] +$L$SEH_begin_gcm_ghash_clmul:: + +DB 048h,08dh,060h,0e0h +DB 00fh,029h,070h,0e0h +DB 00fh,029h,078h,0f0h +DB 044h,00fh,029h,000h +DB 044h,00fh,029h,048h,010h +DB 044h,00fh,029h,050h,020h +DB 044h,00fh,029h,058h,030h +DB 044h,00fh,029h,060h,040h +DB 044h,00fh,029h,068h,050h +DB 044h,00fh,029h,070h,060h +DB 044h,00fh,029h,078h,070h + movdqa xmm10,XMMWORD PTR[$L$bswap_mask] + + movdqu xmm0,XMMWORD PTR[rcx] + movdqu xmm2,XMMWORD PTR[rdx] + movdqu xmm7,XMMWORD PTR[32+rdx] +DB 102,65,15,56,0,194 + + sub r9,010h + jz $L$odd_tail + + movdqu xmm6,XMMWORD PTR[16+rdx] + mov eax,DWORD PTR[((OPENSSL_ia32cap_P+4))] + cmp r9,030h + jb $L$skip4x + + and eax,71303168 + cmp eax,4194304 + je $L$skip4x + + sub r9,030h + mov rax,0A040608020C0E000h + movdqu xmm14,XMMWORD PTR[48+rdx] + movdqu xmm15,XMMWORD PTR[64+rdx] + + + + + movdqu xmm3,XMMWORD PTR[48+r8] + movdqu xmm11,XMMWORD PTR[32+r8] +DB 102,65,15,56,0,218 +DB 102,69,15,56,0,218 + movdqa xmm5,xmm3 + pshufd xmm4,xmm3,78 + pxor xmm4,xmm3 +DB 102,15,58,68,218,0 +DB 102,15,58,68,234,17 +DB 102,15,58,68,231,0 + + movdqa xmm13,xmm11 + pshufd xmm12,xmm11,78 + pxor xmm12,xmm11 +DB 102,68,15,58,68,222,0 +DB 102,68,15,58,68,238,17 +DB 102,68,15,58,68,231,16 + xorps xmm3,xmm11 + xorps xmm5,xmm13 + movups xmm7,XMMWORD PTR[80+rdx] + xorps xmm4,xmm12 + + movdqu xmm11,XMMWORD PTR[16+r8] + movdqu xmm8,XMMWORD PTR[r8] +DB 102,69,15,56,0,218 +DB 102,69,15,56,0,194 + movdqa xmm13,xmm11 + pshufd xmm12,xmm11,78 + pxor xmm0,xmm8 + pxor xmm12,xmm11 +DB 102,69,15,58,68,222,0 + movdqa xmm1,xmm0 + pshufd xmm8,xmm0,78 + pxor xmm8,xmm0 +DB 102,69,15,58,68,238,17 +DB 102,68,15,58,68,231,0 + xorps xmm3,xmm11 + xorps xmm5,xmm13 + + lea r8,QWORD PTR[64+r8] + sub r9,040h + jc $L$tail4x + + jmp $L$mod4_loop +ALIGN 32 +$L$mod4_loop:: +DB 102,65,15,58,68,199,0 + xorps xmm4,xmm12 + movdqu xmm11,XMMWORD PTR[48+r8] +DB 102,69,15,56,0,218 +DB 102,65,15,58,68,207,17 + xorps xmm0,xmm3 + movdqu xmm3,XMMWORD PTR[32+r8] + movdqa xmm13,xmm11 +DB 102,68,15,58,68,199,16 + pshufd xmm12,xmm11,78 + xorps xmm1,xmm5 + pxor xmm12,xmm11 +DB 102,65,15,56,0,218 + movups xmm7,XMMWORD PTR[32+rdx] + xorps xmm8,xmm4 +DB 102,68,15,58,68,218,0 + pshufd xmm4,xmm3,78 + + pxor xmm8,xmm0 + movdqa xmm5,xmm3 + pxor xmm8,xmm1 + pxor xmm4,xmm3 + movdqa xmm9,xmm8 +DB 102,68,15,58,68,234,17 + pslldq xmm8,8 + psrldq xmm9,8 + pxor xmm0,xmm8 + movdqa xmm8,XMMWORD PTR[$L$7_mask] + pxor xmm1,xmm9 +DB 102,76,15,110,200 + + pand xmm8,xmm0 +DB 102,69,15,56,0,200 + pxor xmm9,xmm0 +DB 102,68,15,58,68,231,0 + psllq xmm9,57 + movdqa xmm8,xmm9 + pslldq xmm9,8 +DB 102,15,58,68,222,0 + psrldq xmm8,8 + pxor xmm0,xmm9 + pxor xmm1,xmm8 + movdqu xmm8,XMMWORD PTR[r8] + + movdqa xmm9,xmm0 + psrlq xmm0,1 +DB 102,15,58,68,238,17 + xorps xmm3,xmm11 + movdqu xmm11,XMMWORD PTR[16+r8] +DB 102,69,15,56,0,218 +DB 102,15,58,68,231,16 + xorps xmm5,xmm13 + movups xmm7,XMMWORD PTR[80+rdx] +DB 102,69,15,56,0,194 + pxor xmm1,xmm9 + pxor xmm9,xmm0 + psrlq xmm0,5 + + movdqa xmm13,xmm11 + pxor xmm4,xmm12 + pshufd xmm12,xmm11,78 + pxor xmm0,xmm9 + pxor xmm1,xmm8 + pxor xmm12,xmm11 +DB 102,69,15,58,68,222,0 + psrlq xmm0,1 + pxor xmm0,xmm1 + movdqa xmm1,xmm0 +DB 102,69,15,58,68,238,17 + xorps xmm3,xmm11 + pshufd xmm8,xmm0,78 + pxor xmm8,xmm0 + +DB 102,68,15,58,68,231,0 + xorps xmm5,xmm13 + + lea r8,QWORD PTR[64+r8] + sub r9,040h + jnc $L$mod4_loop + +$L$tail4x:: +DB 102,65,15,58,68,199,0 +DB 102,65,15,58,68,207,17 +DB 102,68,15,58,68,199,16 + xorps xmm4,xmm12 + xorps xmm0,xmm3 + xorps xmm1,xmm5 + pxor xmm1,xmm0 + pxor xmm8,xmm4 + + pxor xmm8,xmm1 + pxor xmm1,xmm0 + + movdqa xmm9,xmm8 + psrldq xmm8,8 + pslldq xmm9,8 + pxor xmm1,xmm8 + pxor xmm0,xmm9 + + movdqa xmm4,xmm0 + movdqa xmm3,xmm0 + psllq xmm0,5 + pxor xmm3,xmm0 + psllq xmm0,1 + pxor xmm0,xmm3 + psllq xmm0,57 + movdqa xmm3,xmm0 + pslldq xmm0,8 + psrldq xmm3,8 + pxor xmm0,xmm4 + pxor xmm1,xmm3 + + + movdqa xmm4,xmm0 + psrlq xmm0,1 + pxor xmm1,xmm4 + pxor xmm4,xmm0 + psrlq xmm0,5 + pxor xmm0,xmm4 + psrlq xmm0,1 + pxor xmm0,xmm1 + add r9,040h + jz $L$done + movdqu xmm7,XMMWORD PTR[32+rdx] + sub r9,010h + jz $L$odd_tail +$L$skip4x:: + + + + + + movdqu xmm8,XMMWORD PTR[r8] + movdqu xmm3,XMMWORD PTR[16+r8] +DB 102,69,15,56,0,194 +DB 102,65,15,56,0,218 + pxor xmm0,xmm8 + + movdqa xmm5,xmm3 + pshufd xmm4,xmm3,78 + pxor xmm4,xmm3 +DB 102,15,58,68,218,0 +DB 102,15,58,68,234,17 +DB 102,15,58,68,231,0 + + lea r8,QWORD PTR[32+r8] + nop + sub r9,020h + jbe $L$even_tail + nop + jmp $L$mod_loop + +ALIGN 32 +$L$mod_loop:: + movdqa xmm1,xmm0 + movdqa xmm8,xmm4 + pshufd xmm4,xmm0,78 + pxor xmm4,xmm0 + +DB 102,15,58,68,198,0 +DB 102,15,58,68,206,17 +DB 102,15,58,68,231,16 + + pxor xmm0,xmm3 + pxor xmm1,xmm5 + movdqu xmm9,XMMWORD PTR[r8] + pxor xmm8,xmm0 +DB 102,69,15,56,0,202 + movdqu xmm3,XMMWORD PTR[16+r8] + + pxor xmm8,xmm1 + pxor xmm1,xmm9 + pxor xmm4,xmm8 +DB 102,65,15,56,0,218 + movdqa xmm8,xmm4 + psrldq xmm8,8 + pslldq xmm4,8 + pxor xmm1,xmm8 + pxor xmm0,xmm4 + + movdqa xmm5,xmm3 + + movdqa xmm9,xmm0 + movdqa xmm8,xmm0 + psllq xmm0,5 + pxor xmm8,xmm0 +DB 102,15,58,68,218,0 + psllq xmm0,1 + pxor xmm0,xmm8 + psllq xmm0,57 + movdqa xmm8,xmm0 + pslldq xmm0,8 + psrldq xmm8,8 + pxor xmm0,xmm9 + pshufd xmm4,xmm5,78 + pxor xmm1,xmm8 + pxor xmm4,xmm5 + + movdqa xmm9,xmm0 + psrlq xmm0,1 +DB 102,15,58,68,234,17 + pxor xmm1,xmm9 + pxor xmm9,xmm0 + psrlq xmm0,5 + pxor xmm0,xmm9 + lea r8,QWORD PTR[32+r8] + psrlq xmm0,1 +DB 102,15,58,68,231,0 + pxor xmm0,xmm1 + + sub r9,020h + ja $L$mod_loop + +$L$even_tail:: + movdqa xmm1,xmm0 + movdqa xmm8,xmm4 + pshufd xmm4,xmm0,78 + pxor xmm4,xmm0 + +DB 102,15,58,68,198,0 +DB 102,15,58,68,206,17 +DB 102,15,58,68,231,16 + + pxor xmm0,xmm3 + pxor xmm1,xmm5 + pxor xmm8,xmm0 + pxor xmm8,xmm1 + pxor xmm4,xmm8 + movdqa xmm8,xmm4 + psrldq xmm8,8 + pslldq xmm4,8 + pxor xmm1,xmm8 + pxor xmm0,xmm4 + + movdqa xmm4,xmm0 + movdqa xmm3,xmm0 + psllq xmm0,5 + pxor xmm3,xmm0 + psllq xmm0,1 + pxor xmm0,xmm3 + psllq xmm0,57 + movdqa xmm3,xmm0 + pslldq xmm0,8 + psrldq xmm3,8 + pxor xmm0,xmm4 + pxor xmm1,xmm3 + + + movdqa xmm4,xmm0 + psrlq xmm0,1 + pxor xmm1,xmm4 + pxor xmm4,xmm0 + psrlq xmm0,5 + pxor xmm0,xmm4 + psrlq xmm0,1 + pxor xmm0,xmm1 + test r9,r9 + jnz $L$done + +$L$odd_tail:: + movdqu xmm8,XMMWORD PTR[r8] +DB 102,69,15,56,0,194 + pxor xmm0,xmm8 + movdqa xmm1,xmm0 + pshufd xmm3,xmm0,78 + pxor xmm3,xmm0 +DB 102,15,58,68,194,0 +DB 102,15,58,68,202,17 +DB 102,15,58,68,223,0 + pxor xmm3,xmm0 + pxor xmm3,xmm1 + + movdqa xmm4,xmm3 + psrldq xmm3,8 + pslldq xmm4,8 + pxor xmm1,xmm3 + pxor xmm0,xmm4 + + movdqa xmm4,xmm0 + movdqa xmm3,xmm0 + psllq xmm0,5 + pxor xmm3,xmm0 + psllq xmm0,1 + pxor xmm0,xmm3 + psllq xmm0,57 + movdqa xmm3,xmm0 + pslldq xmm0,8 + psrldq xmm3,8 + pxor xmm0,xmm4 + pxor xmm1,xmm3 + + + movdqa xmm4,xmm0 + psrlq xmm0,1 + pxor xmm1,xmm4 + pxor xmm4,xmm0 + psrlq xmm0,5 + pxor xmm0,xmm4 + psrlq xmm0,1 + pxor xmm0,xmm1 +$L$done:: +DB 102,65,15,56,0,194 + movdqu XMMWORD PTR[rcx],xmm0 + movaps xmm6,XMMWORD PTR[rsp] + movaps xmm7,XMMWORD PTR[16+rsp] + movaps xmm8,XMMWORD PTR[32+rsp] + movaps xmm9,XMMWORD PTR[48+rsp] + movaps xmm10,XMMWORD PTR[64+rsp] + movaps xmm11,XMMWORD PTR[80+rsp] + movaps xmm12,XMMWORD PTR[96+rsp] + movaps xmm13,XMMWORD PTR[112+rsp] + movaps xmm14,XMMWORD PTR[128+rsp] + movaps xmm15,XMMWORD PTR[144+rsp] + lea rsp,QWORD PTR[168+rsp] +$L$SEH_end_gcm_ghash_clmul:: + DB 0F3h,0C3h ;repret +gcm_ghash_clmul ENDP +PUBLIC gcm_init_avx + +ALIGN 32 +gcm_init_avx PROC PUBLIC + jmp $L$_init_clmul +gcm_init_avx ENDP +PUBLIC gcm_gmult_avx + +ALIGN 32 +gcm_gmult_avx PROC PUBLIC + jmp $L$_gmult_clmul +gcm_gmult_avx ENDP +PUBLIC gcm_ghash_avx + +ALIGN 32 +gcm_ghash_avx PROC PUBLIC + jmp $L$_ghash_clmul +gcm_ghash_avx ENDP +ALIGN 64 +$L$bswap_mask:: +DB 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0 +$L$0x1c2_polynomial:: +DB 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0c2h +$L$7_mask:: + DD 7,0,7,0 +$L$7_mask_poly:: + DD 7,0,450,0 +ALIGN 64 + +$L$rem_4bit:: + DD 0,0,0,471859200,0,943718400,0,610271232 + DD 0,1887436800,0,1822425088,0,1220542464,0,1423966208 + DD 0,3774873600,0,4246732800,0,3644850176,0,3311403008 + DD 0,2441084928,0,2376073216,0,2847932416,0,3051356160 + +$L$rem_8bit:: + DW 00000h,001C2h,00384h,00246h,00708h,006CAh,0048Ch,0054Eh + DW 00E10h,00FD2h,00D94h,00C56h,00918h,008DAh,00A9Ch,00B5Eh + DW 01C20h,01DE2h,01FA4h,01E66h,01B28h,01AEAh,018ACh,0196Eh + DW 01230h,013F2h,011B4h,01076h,01538h,014FAh,016BCh,0177Eh + DW 03840h,03982h,03BC4h,03A06h,03F48h,03E8Ah,03CCCh,03D0Eh + DW 03650h,03792h,035D4h,03416h,03158h,0309Ah,032DCh,0331Eh + DW 02460h,025A2h,027E4h,02626h,02368h,022AAh,020ECh,0212Eh + DW 02A70h,02BB2h,029F4h,02836h,02D78h,02CBAh,02EFCh,02F3Eh + DW 07080h,07142h,07304h,072C6h,07788h,0764Ah,0740Ch,075CEh + DW 07E90h,07F52h,07D14h,07CD6h,07998h,0785Ah,07A1Ch,07BDEh + DW 06CA0h,06D62h,06F24h,06EE6h,06BA8h,06A6Ah,0682Ch,069EEh + DW 062B0h,06372h,06134h,060F6h,065B8h,0647Ah,0663Ch,067FEh + DW 048C0h,04902h,04B44h,04A86h,04FC8h,04E0Ah,04C4Ch,04D8Eh + DW 046D0h,04712h,04554h,04496h,041D8h,0401Ah,0425Ch,0439Eh + DW 054E0h,05522h,05764h,056A6h,053E8h,0522Ah,0506Ch,051AEh + DW 05AF0h,05B32h,05974h,058B6h,05DF8h,05C3Ah,05E7Ch,05FBEh + DW 0E100h,0E0C2h,0E284h,0E346h,0E608h,0E7CAh,0E58Ch,0E44Eh + DW 0EF10h,0EED2h,0EC94h,0ED56h,0E818h,0E9DAh,0EB9Ch,0EA5Eh + DW 0FD20h,0FCE2h,0FEA4h,0FF66h,0FA28h,0FBEAh,0F9ACh,0F86Eh + DW 0F330h,0F2F2h,0F0B4h,0F176h,0F438h,0F5FAh,0F7BCh,0F67Eh + DW 0D940h,0D882h,0DAC4h,0DB06h,0DE48h,0DF8Ah,0DDCCh,0DC0Eh + DW 0D750h,0D692h,0D4D4h,0D516h,0D058h,0D19Ah,0D3DCh,0D21Eh + DW 0C560h,0C4A2h,0C6E4h,0C726h,0C268h,0C3AAh,0C1ECh,0C02Eh + DW 0CB70h,0CAB2h,0C8F4h,0C936h,0CC78h,0CDBAh,0CFFCh,0CE3Eh + DW 09180h,09042h,09204h,093C6h,09688h,0974Ah,0950Ch,094CEh + DW 09F90h,09E52h,09C14h,09DD6h,09898h,0995Ah,09B1Ch,09ADEh + DW 08DA0h,08C62h,08E24h,08FE6h,08AA8h,08B6Ah,0892Ch,088EEh + DW 083B0h,08272h,08034h,081F6h,084B8h,0857Ah,0873Ch,086FEh + DW 0A9C0h,0A802h,0AA44h,0AB86h,0AEC8h,0AF0Ah,0AD4Ch,0AC8Eh + DW 0A7D0h,0A612h,0A454h,0A596h,0A0D8h,0A11Ah,0A35Ch,0A29Eh + DW 0B5E0h,0B422h,0B664h,0B7A6h,0B2E8h,0B32Ah,0B16Ch,0B0AEh + DW 0BBF0h,0BA32h,0B874h,0B9B6h,0BCF8h,0BD3Ah,0BF7Ch,0BEBEh + +DB 71,72,65,83,72,32,102,111,114,32,120,56,54,95,54,52 +DB 44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32 +DB 60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111 +DB 114,103,62,0 +ALIGN 64 +EXTERN __imp_RtlVirtualUnwind:NEAR + +ALIGN 16 +se_handler PROC PRIVATE + push rsi + push rdi + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + pushfq + sub rsp,64 + + mov rax,QWORD PTR[120+r8] + mov rbx,QWORD PTR[248+r8] + + mov rsi,QWORD PTR[8+r9] + mov r11,QWORD PTR[56+r9] + + mov r10d,DWORD PTR[r11] + lea r10,QWORD PTR[r10*1+rsi] + cmp rbx,r10 + jb $L$in_prologue + + mov rax,QWORD PTR[152+r8] + + mov r10d,DWORD PTR[4+r11] + lea r10,QWORD PTR[r10*1+rsi] + cmp rbx,r10 + jae $L$in_prologue + + lea rax,QWORD PTR[24+rax] + + mov rbx,QWORD PTR[((-8))+rax] + mov rbp,QWORD PTR[((-16))+rax] + mov r12,QWORD PTR[((-24))+rax] + mov QWORD PTR[144+r8],rbx + mov QWORD PTR[160+r8],rbp + mov QWORD PTR[216+r8],r12 + +$L$in_prologue:: + mov rdi,QWORD PTR[8+rax] + mov rsi,QWORD PTR[16+rax] + mov QWORD PTR[152+r8],rax + mov QWORD PTR[168+r8],rsi + mov QWORD PTR[176+r8],rdi + + mov rdi,QWORD PTR[40+r9] + mov rsi,r8 + mov ecx,154 + DD 0a548f3fch + + mov rsi,r9 + xor rcx,rcx + mov rdx,QWORD PTR[8+rsi] + mov r8,QWORD PTR[rsi] + mov r9,QWORD PTR[16+rsi] + mov r10,QWORD PTR[40+rsi] + lea r11,QWORD PTR[56+rsi] + lea r12,QWORD PTR[24+rsi] + mov QWORD PTR[32+rsp],r10 + mov QWORD PTR[40+rsp],r11 + mov QWORD PTR[48+rsp],r12 + mov QWORD PTR[56+rsp],rcx + call QWORD PTR[__imp_RtlVirtualUnwind] + + mov eax,1 + add rsp,64 + popfq + pop r15 + pop r14 + pop r13 + pop r12 + pop rbp + pop rbx + pop rdi + pop rsi + DB 0F3h,0C3h ;repret +se_handler ENDP + +.text$ ENDS +.pdata SEGMENT READONLY ALIGN(4) +ALIGN 4 + DD imagerel $L$SEH_begin_gcm_gmult_4bit + DD imagerel $L$SEH_end_gcm_gmult_4bit + DD imagerel $L$SEH_info_gcm_gmult_4bit + + DD imagerel $L$SEH_begin_gcm_ghash_4bit + DD imagerel $L$SEH_end_gcm_ghash_4bit + DD imagerel $L$SEH_info_gcm_ghash_4bit + + DD imagerel $L$SEH_begin_gcm_init_clmul + DD imagerel $L$SEH_end_gcm_init_clmul + DD imagerel $L$SEH_info_gcm_init_clmul + + DD imagerel $L$SEH_begin_gcm_ghash_clmul + DD imagerel $L$SEH_end_gcm_ghash_clmul + DD imagerel $L$SEH_info_gcm_ghash_clmul +.pdata ENDS +.xdata SEGMENT READONLY ALIGN(8) +ALIGN 8 +$L$SEH_info_gcm_gmult_4bit:: +DB 9,0,0,0 + DD imagerel se_handler + DD imagerel $L$gmult_prologue,imagerel $L$gmult_epilogue +$L$SEH_info_gcm_ghash_4bit:: +DB 9,0,0,0 + DD imagerel se_handler + DD imagerel $L$ghash_prologue,imagerel $L$ghash_epilogue +$L$SEH_info_gcm_init_clmul:: +DB 001h,008h,003h,000h +DB 008h,068h,000h,000h +DB 004h,022h,000h,000h +$L$SEH_info_gcm_ghash_clmul:: +DB 001h,033h,016h,000h +DB 033h,0f8h,009h,000h +DB 02eh,0e8h,008h,000h +DB 029h,0d8h,007h,000h +DB 024h,0c8h,006h,000h +DB 01fh,0b8h,005h,000h +DB 01ah,0a8h,004h,000h +DB 015h,098h,003h,000h +DB 010h,088h,002h,000h +DB 00ch,078h,001h,000h +DB 008h,068h,000h,000h +DB 004h,001h,015h,000h + +.xdata ENDS +END diff --git a/deps/openssl/asm_obsolete/x64-win32-masm/rc4/rc4-md5-x86_64.asm b/deps/openssl/asm_obsolete/x64-win32-masm/rc4/rc4-md5-x86_64.asm new file mode 100644 index 00000000000000..9d823aed573007 --- /dev/null +++ b/deps/openssl/asm_obsolete/x64-win32-masm/rc4/rc4-md5-x86_64.asm @@ -0,0 +1,1374 @@ +OPTION DOTNAME +.text$ SEGMENT ALIGN(256) 'CODE' +ALIGN 16 + +PUBLIC rc4_md5_enc + +rc4_md5_enc PROC PUBLIC + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_rc4_md5_enc:: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + mov rcx,r9 + mov r8,QWORD PTR[40+rsp] + mov r9,QWORD PTR[48+rsp] + + + cmp r9,0 + je $L$abort + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + sub rsp,40 +$L$body:: + mov r11,rcx + mov r12,r9 + mov r13,rsi + mov r14,rdx + mov r15,r8 + xor rbp,rbp + xor rcx,rcx + + lea rdi,QWORD PTR[8+rdi] + mov bpl,BYTE PTR[((-8))+rdi] + mov cl,BYTE PTR[((-4))+rdi] + + inc bpl + sub r14,r13 + mov eax,DWORD PTR[rbp*4+rdi] + add cl,al + lea rsi,QWORD PTR[rbp*4+rdi] + shl r12,6 + add r12,r15 + mov QWORD PTR[16+rsp],r12 + + mov QWORD PTR[24+rsp],r11 + mov r8d,DWORD PTR[r11] + mov r9d,DWORD PTR[4+r11] + mov r10d,DWORD PTR[8+r11] + mov r11d,DWORD PTR[12+r11] + jmp $L$oop + +ALIGN 16 +$L$oop:: + mov DWORD PTR[rsp],r8d + mov DWORD PTR[4+rsp],r9d + mov DWORD PTR[8+rsp],r10d + mov r12d,r11d + mov DWORD PTR[12+rsp],r11d + pxor xmm0,xmm0 + mov edx,DWORD PTR[rcx*4+rdi] + xor r12d,r10d + mov DWORD PTR[rcx*4+rdi],eax + and r12d,r9d + add r8d,DWORD PTR[r15] + add al,dl + mov ebx,DWORD PTR[4+rsi] + add r8d,3614090360 + xor r12d,r11d + movzx eax,al + mov DWORD PTR[rsi],edx + add r8d,r12d + add cl,bl + rol r8d,7 + mov r12d,r10d + movd xmm0,DWORD PTR[rax*4+rdi] + + add r8d,r9d + pxor xmm1,xmm1 + mov edx,DWORD PTR[rcx*4+rdi] + xor r12d,r9d + mov DWORD PTR[rcx*4+rdi],ebx + and r12d,r8d + add r11d,DWORD PTR[4+r15] + add bl,dl + mov eax,DWORD PTR[8+rsi] + add r11d,3905402710 + xor r12d,r10d + movzx ebx,bl + mov DWORD PTR[4+rsi],edx + add r11d,r12d + add cl,al + rol r11d,12 + mov r12d,r9d + movd xmm1,DWORD PTR[rbx*4+rdi] + + add r11d,r8d + mov edx,DWORD PTR[rcx*4+rdi] + xor r12d,r8d + mov DWORD PTR[rcx*4+rdi],eax + and r12d,r11d + add r10d,DWORD PTR[8+r15] + add al,dl + mov ebx,DWORD PTR[12+rsi] + add r10d,606105819 + xor r12d,r9d + movzx eax,al + mov DWORD PTR[8+rsi],edx + add r10d,r12d + add cl,bl + rol r10d,17 + mov r12d,r8d + pinsrw xmm0,WORD PTR[rax*4+rdi],1 + + add r10d,r11d + mov edx,DWORD PTR[rcx*4+rdi] + xor r12d,r11d + mov DWORD PTR[rcx*4+rdi],ebx + and r12d,r10d + add r9d,DWORD PTR[12+r15] + add bl,dl + mov eax,DWORD PTR[16+rsi] + add r9d,3250441966 + xor r12d,r8d + movzx ebx,bl + mov DWORD PTR[12+rsi],edx + add r9d,r12d + add cl,al + rol r9d,22 + mov r12d,r11d + pinsrw xmm1,WORD PTR[rbx*4+rdi],1 + + add r9d,r10d + mov edx,DWORD PTR[rcx*4+rdi] + xor r12d,r10d + mov DWORD PTR[rcx*4+rdi],eax + and r12d,r9d + add r8d,DWORD PTR[16+r15] + add al,dl + mov ebx,DWORD PTR[20+rsi] + add r8d,4118548399 + xor r12d,r11d + movzx eax,al + mov DWORD PTR[16+rsi],edx + add r8d,r12d + add cl,bl + rol r8d,7 + mov r12d,r10d + pinsrw xmm0,WORD PTR[rax*4+rdi],2 + + add r8d,r9d + mov edx,DWORD PTR[rcx*4+rdi] + xor r12d,r9d + mov DWORD PTR[rcx*4+rdi],ebx + and r12d,r8d + add r11d,DWORD PTR[20+r15] + add bl,dl + mov eax,DWORD PTR[24+rsi] + add r11d,1200080426 + xor r12d,r10d + movzx ebx,bl + mov DWORD PTR[20+rsi],edx + add r11d,r12d + add cl,al + rol r11d,12 + mov r12d,r9d + pinsrw xmm1,WORD PTR[rbx*4+rdi],2 + + add r11d,r8d + mov edx,DWORD PTR[rcx*4+rdi] + xor r12d,r8d + mov DWORD PTR[rcx*4+rdi],eax + and r12d,r11d + add r10d,DWORD PTR[24+r15] + add al,dl + mov ebx,DWORD PTR[28+rsi] + add r10d,2821735955 + xor r12d,r9d + movzx eax,al + mov DWORD PTR[24+rsi],edx + add r10d,r12d + add cl,bl + rol r10d,17 + mov r12d,r8d + pinsrw xmm0,WORD PTR[rax*4+rdi],3 + + add r10d,r11d + mov edx,DWORD PTR[rcx*4+rdi] + xor r12d,r11d + mov DWORD PTR[rcx*4+rdi],ebx + and r12d,r10d + add r9d,DWORD PTR[28+r15] + add bl,dl + mov eax,DWORD PTR[32+rsi] + add r9d,4249261313 + xor r12d,r8d + movzx ebx,bl + mov DWORD PTR[28+rsi],edx + add r9d,r12d + add cl,al + rol r9d,22 + mov r12d,r11d + pinsrw xmm1,WORD PTR[rbx*4+rdi],3 + + add r9d,r10d + mov edx,DWORD PTR[rcx*4+rdi] + xor r12d,r10d + mov DWORD PTR[rcx*4+rdi],eax + and r12d,r9d + add r8d,DWORD PTR[32+r15] + add al,dl + mov ebx,DWORD PTR[36+rsi] + add r8d,1770035416 + xor r12d,r11d + movzx eax,al + mov DWORD PTR[32+rsi],edx + add r8d,r12d + add cl,bl + rol r8d,7 + mov r12d,r10d + pinsrw xmm0,WORD PTR[rax*4+rdi],4 + + add r8d,r9d + mov edx,DWORD PTR[rcx*4+rdi] + xor r12d,r9d + mov DWORD PTR[rcx*4+rdi],ebx + and r12d,r8d + add r11d,DWORD PTR[36+r15] + add bl,dl + mov eax,DWORD PTR[40+rsi] + add r11d,2336552879 + xor r12d,r10d + movzx ebx,bl + mov DWORD PTR[36+rsi],edx + add r11d,r12d + add cl,al + rol r11d,12 + mov r12d,r9d + pinsrw xmm1,WORD PTR[rbx*4+rdi],4 + + add r11d,r8d + mov edx,DWORD PTR[rcx*4+rdi] + xor r12d,r8d + mov DWORD PTR[rcx*4+rdi],eax + and r12d,r11d + add r10d,DWORD PTR[40+r15] + add al,dl + mov ebx,DWORD PTR[44+rsi] + add r10d,4294925233 + xor r12d,r9d + movzx eax,al + mov DWORD PTR[40+rsi],edx + add r10d,r12d + add cl,bl + rol r10d,17 + mov r12d,r8d + pinsrw xmm0,WORD PTR[rax*4+rdi],5 + + add r10d,r11d + mov edx,DWORD PTR[rcx*4+rdi] + xor r12d,r11d + mov DWORD PTR[rcx*4+rdi],ebx + and r12d,r10d + add r9d,DWORD PTR[44+r15] + add bl,dl + mov eax,DWORD PTR[48+rsi] + add r9d,2304563134 + xor r12d,r8d + movzx ebx,bl + mov DWORD PTR[44+rsi],edx + add r9d,r12d + add cl,al + rol r9d,22 + mov r12d,r11d + pinsrw xmm1,WORD PTR[rbx*4+rdi],5 + + add r9d,r10d + mov edx,DWORD PTR[rcx*4+rdi] + xor r12d,r10d + mov DWORD PTR[rcx*4+rdi],eax + and r12d,r9d + add r8d,DWORD PTR[48+r15] + add al,dl + mov ebx,DWORD PTR[52+rsi] + add r8d,1804603682 + xor r12d,r11d + movzx eax,al + mov DWORD PTR[48+rsi],edx + add r8d,r12d + add cl,bl + rol r8d,7 + mov r12d,r10d + pinsrw xmm0,WORD PTR[rax*4+rdi],6 + + add r8d,r9d + mov edx,DWORD PTR[rcx*4+rdi] + xor r12d,r9d + mov DWORD PTR[rcx*4+rdi],ebx + and r12d,r8d + add r11d,DWORD PTR[52+r15] + add bl,dl + mov eax,DWORD PTR[56+rsi] + add r11d,4254626195 + xor r12d,r10d + movzx ebx,bl + mov DWORD PTR[52+rsi],edx + add r11d,r12d + add cl,al + rol r11d,12 + mov r12d,r9d + pinsrw xmm1,WORD PTR[rbx*4+rdi],6 + + add r11d,r8d + mov edx,DWORD PTR[rcx*4+rdi] + xor r12d,r8d + mov DWORD PTR[rcx*4+rdi],eax + and r12d,r11d + add r10d,DWORD PTR[56+r15] + add al,dl + mov ebx,DWORD PTR[60+rsi] + add r10d,2792965006 + xor r12d,r9d + movzx eax,al + mov DWORD PTR[56+rsi],edx + add r10d,r12d + add cl,bl + rol r10d,17 + mov r12d,r8d + pinsrw xmm0,WORD PTR[rax*4+rdi],7 + + add r10d,r11d + movdqu xmm2,XMMWORD PTR[r13] + mov edx,DWORD PTR[rcx*4+rdi] + xor r12d,r11d + mov DWORD PTR[rcx*4+rdi],ebx + and r12d,r10d + add r9d,DWORD PTR[60+r15] + add bl,dl + mov eax,DWORD PTR[64+rsi] + add r9d,1236535329 + xor r12d,r8d + movzx ebx,bl + mov DWORD PTR[60+rsi],edx + add r9d,r12d + add cl,al + rol r9d,22 + mov r12d,r10d + pinsrw xmm1,WORD PTR[rbx*4+rdi],7 + + add r9d,r10d + psllq xmm1,8 + pxor xmm2,xmm0 + pxor xmm2,xmm1 + pxor xmm0,xmm0 + mov edx,DWORD PTR[rcx*4+rdi] + xor r12d,r9d + mov DWORD PTR[rcx*4+rdi],eax + and r12d,r11d + add r8d,DWORD PTR[4+r15] + add al,dl + mov ebx,DWORD PTR[68+rsi] + add r8d,4129170786 + xor r12d,r10d + movzx eax,al + mov DWORD PTR[64+rsi],edx + add r8d,r12d + add cl,bl + rol r8d,5 + mov r12d,r9d + movd xmm0,DWORD PTR[rax*4+rdi] + + add r8d,r9d + pxor xmm1,xmm1 + mov edx,DWORD PTR[rcx*4+rdi] + xor r12d,r8d + mov DWORD PTR[rcx*4+rdi],ebx + and r12d,r10d + add r11d,DWORD PTR[24+r15] + add bl,dl + mov eax,DWORD PTR[72+rsi] + add r11d,3225465664 + xor r12d,r9d + movzx ebx,bl + mov DWORD PTR[68+rsi],edx + add r11d,r12d + add cl,al + rol r11d,9 + mov r12d,r8d + movd xmm1,DWORD PTR[rbx*4+rdi] + + add r11d,r8d + mov edx,DWORD PTR[rcx*4+rdi] + xor r12d,r11d + mov DWORD PTR[rcx*4+rdi],eax + and r12d,r9d + add r10d,DWORD PTR[44+r15] + add al,dl + mov ebx,DWORD PTR[76+rsi] + add r10d,643717713 + xor r12d,r8d + movzx eax,al + mov DWORD PTR[72+rsi],edx + add r10d,r12d + add cl,bl + rol r10d,14 + mov r12d,r11d + pinsrw xmm0,WORD PTR[rax*4+rdi],1 + + add r10d,r11d + mov edx,DWORD PTR[rcx*4+rdi] + xor r12d,r10d + mov DWORD PTR[rcx*4+rdi],ebx + and r12d,r8d + add r9d,DWORD PTR[r15] + add bl,dl + mov eax,DWORD PTR[80+rsi] + add r9d,3921069994 + xor r12d,r11d + movzx ebx,bl + mov DWORD PTR[76+rsi],edx + add r9d,r12d + add cl,al + rol r9d,20 + mov r12d,r10d + pinsrw xmm1,WORD PTR[rbx*4+rdi],1 + + add r9d,r10d + mov edx,DWORD PTR[rcx*4+rdi] + xor r12d,r9d + mov DWORD PTR[rcx*4+rdi],eax + and r12d,r11d + add r8d,DWORD PTR[20+r15] + add al,dl + mov ebx,DWORD PTR[84+rsi] + add r8d,3593408605 + xor r12d,r10d + movzx eax,al + mov DWORD PTR[80+rsi],edx + add r8d,r12d + add cl,bl + rol r8d,5 + mov r12d,r9d + pinsrw xmm0,WORD PTR[rax*4+rdi],2 + + add r8d,r9d + mov edx,DWORD PTR[rcx*4+rdi] + xor r12d,r8d + mov DWORD PTR[rcx*4+rdi],ebx + and r12d,r10d + add r11d,DWORD PTR[40+r15] + add bl,dl + mov eax,DWORD PTR[88+rsi] + add r11d,38016083 + xor r12d,r9d + movzx ebx,bl + mov DWORD PTR[84+rsi],edx + add r11d,r12d + add cl,al + rol r11d,9 + mov r12d,r8d + pinsrw xmm1,WORD PTR[rbx*4+rdi],2 + + add r11d,r8d + mov edx,DWORD PTR[rcx*4+rdi] + xor r12d,r11d + mov DWORD PTR[rcx*4+rdi],eax + and r12d,r9d + add r10d,DWORD PTR[60+r15] + add al,dl + mov ebx,DWORD PTR[92+rsi] + add r10d,3634488961 + xor r12d,r8d + movzx eax,al + mov DWORD PTR[88+rsi],edx + add r10d,r12d + add cl,bl + rol r10d,14 + mov r12d,r11d + pinsrw xmm0,WORD PTR[rax*4+rdi],3 + + add r10d,r11d + mov edx,DWORD PTR[rcx*4+rdi] + xor r12d,r10d + mov DWORD PTR[rcx*4+rdi],ebx + and r12d,r8d + add r9d,DWORD PTR[16+r15] + add bl,dl + mov eax,DWORD PTR[96+rsi] + add r9d,3889429448 + xor r12d,r11d + movzx ebx,bl + mov DWORD PTR[92+rsi],edx + add r9d,r12d + add cl,al + rol r9d,20 + mov r12d,r10d + pinsrw xmm1,WORD PTR[rbx*4+rdi],3 + + add r9d,r10d + mov edx,DWORD PTR[rcx*4+rdi] + xor r12d,r9d + mov DWORD PTR[rcx*4+rdi],eax + and r12d,r11d + add r8d,DWORD PTR[36+r15] + add al,dl + mov ebx,DWORD PTR[100+rsi] + add r8d,568446438 + xor r12d,r10d + movzx eax,al + mov DWORD PTR[96+rsi],edx + add r8d,r12d + add cl,bl + rol r8d,5 + mov r12d,r9d + pinsrw xmm0,WORD PTR[rax*4+rdi],4 + + add r8d,r9d + mov edx,DWORD PTR[rcx*4+rdi] + xor r12d,r8d + mov DWORD PTR[rcx*4+rdi],ebx + and r12d,r10d + add r11d,DWORD PTR[56+r15] + add bl,dl + mov eax,DWORD PTR[104+rsi] + add r11d,3275163606 + xor r12d,r9d + movzx ebx,bl + mov DWORD PTR[100+rsi],edx + add r11d,r12d + add cl,al + rol r11d,9 + mov r12d,r8d + pinsrw xmm1,WORD PTR[rbx*4+rdi],4 + + add r11d,r8d + mov edx,DWORD PTR[rcx*4+rdi] + xor r12d,r11d + mov DWORD PTR[rcx*4+rdi],eax + and r12d,r9d + add r10d,DWORD PTR[12+r15] + add al,dl + mov ebx,DWORD PTR[108+rsi] + add r10d,4107603335 + xor r12d,r8d + movzx eax,al + mov DWORD PTR[104+rsi],edx + add r10d,r12d + add cl,bl + rol r10d,14 + mov r12d,r11d + pinsrw xmm0,WORD PTR[rax*4+rdi],5 + + add r10d,r11d + mov edx,DWORD PTR[rcx*4+rdi] + xor r12d,r10d + mov DWORD PTR[rcx*4+rdi],ebx + and r12d,r8d + add r9d,DWORD PTR[32+r15] + add bl,dl + mov eax,DWORD PTR[112+rsi] + add r9d,1163531501 + xor r12d,r11d + movzx ebx,bl + mov DWORD PTR[108+rsi],edx + add r9d,r12d + add cl,al + rol r9d,20 + mov r12d,r10d + pinsrw xmm1,WORD PTR[rbx*4+rdi],5 + + add r9d,r10d + mov edx,DWORD PTR[rcx*4+rdi] + xor r12d,r9d + mov DWORD PTR[rcx*4+rdi],eax + and r12d,r11d + add r8d,DWORD PTR[52+r15] + add al,dl + mov ebx,DWORD PTR[116+rsi] + add r8d,2850285829 + xor r12d,r10d + movzx eax,al + mov DWORD PTR[112+rsi],edx + add r8d,r12d + add cl,bl + rol r8d,5 + mov r12d,r9d + pinsrw xmm0,WORD PTR[rax*4+rdi],6 + + add r8d,r9d + mov edx,DWORD PTR[rcx*4+rdi] + xor r12d,r8d + mov DWORD PTR[rcx*4+rdi],ebx + and r12d,r10d + add r11d,DWORD PTR[8+r15] + add bl,dl + mov eax,DWORD PTR[120+rsi] + add r11d,4243563512 + xor r12d,r9d + movzx ebx,bl + mov DWORD PTR[116+rsi],edx + add r11d,r12d + add cl,al + rol r11d,9 + mov r12d,r8d + pinsrw xmm1,WORD PTR[rbx*4+rdi],6 + + add r11d,r8d + mov edx,DWORD PTR[rcx*4+rdi] + xor r12d,r11d + mov DWORD PTR[rcx*4+rdi],eax + and r12d,r9d + add r10d,DWORD PTR[28+r15] + add al,dl + mov ebx,DWORD PTR[124+rsi] + add r10d,1735328473 + xor r12d,r8d + movzx eax,al + mov DWORD PTR[120+rsi],edx + add r10d,r12d + add cl,bl + rol r10d,14 + mov r12d,r11d + pinsrw xmm0,WORD PTR[rax*4+rdi],7 + + add r10d,r11d + movdqu xmm3,XMMWORD PTR[16+r13] + add bpl,32 + mov edx,DWORD PTR[rcx*4+rdi] + xor r12d,r10d + mov DWORD PTR[rcx*4+rdi],ebx + and r12d,r8d + add r9d,DWORD PTR[48+r15] + add bl,dl + mov eax,DWORD PTR[rbp*4+rdi] + add r9d,2368359562 + xor r12d,r11d + movzx ebx,bl + mov DWORD PTR[124+rsi],edx + add r9d,r12d + add cl,al + rol r9d,20 + mov r12d,r11d + pinsrw xmm1,WORD PTR[rbx*4+rdi],7 + + add r9d,r10d + mov rsi,rcx + xor rcx,rcx + mov cl,sil + lea rsi,QWORD PTR[rbp*4+rdi] + psllq xmm1,8 + pxor xmm3,xmm0 + pxor xmm3,xmm1 + pxor xmm0,xmm0 + mov edx,DWORD PTR[rcx*4+rdi] + xor r12d,r10d + mov DWORD PTR[rcx*4+rdi],eax + xor r12d,r9d + add r8d,DWORD PTR[20+r15] + add al,dl + mov ebx,DWORD PTR[4+rsi] + add r8d,4294588738 + movzx eax,al + add r8d,r12d + mov DWORD PTR[rsi],edx + add cl,bl + rol r8d,4 + mov r12d,r10d + movd xmm0,DWORD PTR[rax*4+rdi] + + add r8d,r9d + pxor xmm1,xmm1 + mov edx,DWORD PTR[rcx*4+rdi] + xor r12d,r9d + mov DWORD PTR[rcx*4+rdi],ebx + xor r12d,r8d + add r11d,DWORD PTR[32+r15] + add bl,dl + mov eax,DWORD PTR[8+rsi] + add r11d,2272392833 + movzx ebx,bl + add r11d,r12d + mov DWORD PTR[4+rsi],edx + add cl,al + rol r11d,11 + mov r12d,r9d + movd xmm1,DWORD PTR[rbx*4+rdi] + + add r11d,r8d + mov edx,DWORD PTR[rcx*4+rdi] + xor r12d,r8d + mov DWORD PTR[rcx*4+rdi],eax + xor r12d,r11d + add r10d,DWORD PTR[44+r15] + add al,dl + mov ebx,DWORD PTR[12+rsi] + add r10d,1839030562 + movzx eax,al + add r10d,r12d + mov DWORD PTR[8+rsi],edx + add cl,bl + rol r10d,16 + mov r12d,r8d + pinsrw xmm0,WORD PTR[rax*4+rdi],1 + + add r10d,r11d + mov edx,DWORD PTR[rcx*4+rdi] + xor r12d,r11d + mov DWORD PTR[rcx*4+rdi],ebx + xor r12d,r10d + add r9d,DWORD PTR[56+r15] + add bl,dl + mov eax,DWORD PTR[16+rsi] + add r9d,4259657740 + movzx ebx,bl + add r9d,r12d + mov DWORD PTR[12+rsi],edx + add cl,al + rol r9d,23 + mov r12d,r11d + pinsrw xmm1,WORD PTR[rbx*4+rdi],1 + + add r9d,r10d + mov edx,DWORD PTR[rcx*4+rdi] + xor r12d,r10d + mov DWORD PTR[rcx*4+rdi],eax + xor r12d,r9d + add r8d,DWORD PTR[4+r15] + add al,dl + mov ebx,DWORD PTR[20+rsi] + add r8d,2763975236 + movzx eax,al + add r8d,r12d + mov DWORD PTR[16+rsi],edx + add cl,bl + rol r8d,4 + mov r12d,r10d + pinsrw xmm0,WORD PTR[rax*4+rdi],2 + + add r8d,r9d + mov edx,DWORD PTR[rcx*4+rdi] + xor r12d,r9d + mov DWORD PTR[rcx*4+rdi],ebx + xor r12d,r8d + add r11d,DWORD PTR[16+r15] + add bl,dl + mov eax,DWORD PTR[24+rsi] + add r11d,1272893353 + movzx ebx,bl + add r11d,r12d + mov DWORD PTR[20+rsi],edx + add cl,al + rol r11d,11 + mov r12d,r9d + pinsrw xmm1,WORD PTR[rbx*4+rdi],2 + + add r11d,r8d + mov edx,DWORD PTR[rcx*4+rdi] + xor r12d,r8d + mov DWORD PTR[rcx*4+rdi],eax + xor r12d,r11d + add r10d,DWORD PTR[28+r15] + add al,dl + mov ebx,DWORD PTR[28+rsi] + add r10d,4139469664 + movzx eax,al + add r10d,r12d + mov DWORD PTR[24+rsi],edx + add cl,bl + rol r10d,16 + mov r12d,r8d + pinsrw xmm0,WORD PTR[rax*4+rdi],3 + + add r10d,r11d + mov edx,DWORD PTR[rcx*4+rdi] + xor r12d,r11d + mov DWORD PTR[rcx*4+rdi],ebx + xor r12d,r10d + add r9d,DWORD PTR[40+r15] + add bl,dl + mov eax,DWORD PTR[32+rsi] + add r9d,3200236656 + movzx ebx,bl + add r9d,r12d + mov DWORD PTR[28+rsi],edx + add cl,al + rol r9d,23 + mov r12d,r11d + pinsrw xmm1,WORD PTR[rbx*4+rdi],3 + + add r9d,r10d + mov edx,DWORD PTR[rcx*4+rdi] + xor r12d,r10d + mov DWORD PTR[rcx*4+rdi],eax + xor r12d,r9d + add r8d,DWORD PTR[52+r15] + add al,dl + mov ebx,DWORD PTR[36+rsi] + add r8d,681279174 + movzx eax,al + add r8d,r12d + mov DWORD PTR[32+rsi],edx + add cl,bl + rol r8d,4 + mov r12d,r10d + pinsrw xmm0,WORD PTR[rax*4+rdi],4 + + add r8d,r9d + mov edx,DWORD PTR[rcx*4+rdi] + xor r12d,r9d + mov DWORD PTR[rcx*4+rdi],ebx + xor r12d,r8d + add r11d,DWORD PTR[r15] + add bl,dl + mov eax,DWORD PTR[40+rsi] + add r11d,3936430074 + movzx ebx,bl + add r11d,r12d + mov DWORD PTR[36+rsi],edx + add cl,al + rol r11d,11 + mov r12d,r9d + pinsrw xmm1,WORD PTR[rbx*4+rdi],4 + + add r11d,r8d + mov edx,DWORD PTR[rcx*4+rdi] + xor r12d,r8d + mov DWORD PTR[rcx*4+rdi],eax + xor r12d,r11d + add r10d,DWORD PTR[12+r15] + add al,dl + mov ebx,DWORD PTR[44+rsi] + add r10d,3572445317 + movzx eax,al + add r10d,r12d + mov DWORD PTR[40+rsi],edx + add cl,bl + rol r10d,16 + mov r12d,r8d + pinsrw xmm0,WORD PTR[rax*4+rdi],5 + + add r10d,r11d + mov edx,DWORD PTR[rcx*4+rdi] + xor r12d,r11d + mov DWORD PTR[rcx*4+rdi],ebx + xor r12d,r10d + add r9d,DWORD PTR[24+r15] + add bl,dl + mov eax,DWORD PTR[48+rsi] + add r9d,76029189 + movzx ebx,bl + add r9d,r12d + mov DWORD PTR[44+rsi],edx + add cl,al + rol r9d,23 + mov r12d,r11d + pinsrw xmm1,WORD PTR[rbx*4+rdi],5 + + add r9d,r10d + mov edx,DWORD PTR[rcx*4+rdi] + xor r12d,r10d + mov DWORD PTR[rcx*4+rdi],eax + xor r12d,r9d + add r8d,DWORD PTR[36+r15] + add al,dl + mov ebx,DWORD PTR[52+rsi] + add r8d,3654602809 + movzx eax,al + add r8d,r12d + mov DWORD PTR[48+rsi],edx + add cl,bl + rol r8d,4 + mov r12d,r10d + pinsrw xmm0,WORD PTR[rax*4+rdi],6 + + add r8d,r9d + mov edx,DWORD PTR[rcx*4+rdi] + xor r12d,r9d + mov DWORD PTR[rcx*4+rdi],ebx + xor r12d,r8d + add r11d,DWORD PTR[48+r15] + add bl,dl + mov eax,DWORD PTR[56+rsi] + add r11d,3873151461 + movzx ebx,bl + add r11d,r12d + mov DWORD PTR[52+rsi],edx + add cl,al + rol r11d,11 + mov r12d,r9d + pinsrw xmm1,WORD PTR[rbx*4+rdi],6 + + add r11d,r8d + mov edx,DWORD PTR[rcx*4+rdi] + xor r12d,r8d + mov DWORD PTR[rcx*4+rdi],eax + xor r12d,r11d + add r10d,DWORD PTR[60+r15] + add al,dl + mov ebx,DWORD PTR[60+rsi] + add r10d,530742520 + movzx eax,al + add r10d,r12d + mov DWORD PTR[56+rsi],edx + add cl,bl + rol r10d,16 + mov r12d,r8d + pinsrw xmm0,WORD PTR[rax*4+rdi],7 + + add r10d,r11d + movdqu xmm4,XMMWORD PTR[32+r13] + mov edx,DWORD PTR[rcx*4+rdi] + xor r12d,r11d + mov DWORD PTR[rcx*4+rdi],ebx + xor r12d,r10d + add r9d,DWORD PTR[8+r15] + add bl,dl + mov eax,DWORD PTR[64+rsi] + add r9d,3299628645 + movzx ebx,bl + add r9d,r12d + mov DWORD PTR[60+rsi],edx + add cl,al + rol r9d,23 + mov r12d,-1 + pinsrw xmm1,WORD PTR[rbx*4+rdi],7 + + add r9d,r10d + psllq xmm1,8 + pxor xmm4,xmm0 + pxor xmm4,xmm1 + pxor xmm0,xmm0 + mov edx,DWORD PTR[rcx*4+rdi] + xor r12d,r11d + mov DWORD PTR[rcx*4+rdi],eax + or r12d,r9d + add r8d,DWORD PTR[r15] + add al,dl + mov ebx,DWORD PTR[68+rsi] + add r8d,4096336452 + movzx eax,al + xor r12d,r10d + mov DWORD PTR[64+rsi],edx + add r8d,r12d + add cl,bl + rol r8d,6 + mov r12d,-1 + movd xmm0,DWORD PTR[rax*4+rdi] + + add r8d,r9d + pxor xmm1,xmm1 + mov edx,DWORD PTR[rcx*4+rdi] + xor r12d,r10d + mov DWORD PTR[rcx*4+rdi],ebx + or r12d,r8d + add r11d,DWORD PTR[28+r15] + add bl,dl + mov eax,DWORD PTR[72+rsi] + add r11d,1126891415 + movzx ebx,bl + xor r12d,r9d + mov DWORD PTR[68+rsi],edx + add r11d,r12d + add cl,al + rol r11d,10 + mov r12d,-1 + movd xmm1,DWORD PTR[rbx*4+rdi] + + add r11d,r8d + mov edx,DWORD PTR[rcx*4+rdi] + xor r12d,r9d + mov DWORD PTR[rcx*4+rdi],eax + or r12d,r11d + add r10d,DWORD PTR[56+r15] + add al,dl + mov ebx,DWORD PTR[76+rsi] + add r10d,2878612391 + movzx eax,al + xor r12d,r8d + mov DWORD PTR[72+rsi],edx + add r10d,r12d + add cl,bl + rol r10d,15 + mov r12d,-1 + pinsrw xmm0,WORD PTR[rax*4+rdi],1 + + add r10d,r11d + mov edx,DWORD PTR[rcx*4+rdi] + xor r12d,r8d + mov DWORD PTR[rcx*4+rdi],ebx + or r12d,r10d + add r9d,DWORD PTR[20+r15] + add bl,dl + mov eax,DWORD PTR[80+rsi] + add r9d,4237533241 + movzx ebx,bl + xor r12d,r11d + mov DWORD PTR[76+rsi],edx + add r9d,r12d + add cl,al + rol r9d,21 + mov r12d,-1 + pinsrw xmm1,WORD PTR[rbx*4+rdi],1 + + add r9d,r10d + mov edx,DWORD PTR[rcx*4+rdi] + xor r12d,r11d + mov DWORD PTR[rcx*4+rdi],eax + or r12d,r9d + add r8d,DWORD PTR[48+r15] + add al,dl + mov ebx,DWORD PTR[84+rsi] + add r8d,1700485571 + movzx eax,al + xor r12d,r10d + mov DWORD PTR[80+rsi],edx + add r8d,r12d + add cl,bl + rol r8d,6 + mov r12d,-1 + pinsrw xmm0,WORD PTR[rax*4+rdi],2 + + add r8d,r9d + mov edx,DWORD PTR[rcx*4+rdi] + xor r12d,r10d + mov DWORD PTR[rcx*4+rdi],ebx + or r12d,r8d + add r11d,DWORD PTR[12+r15] + add bl,dl + mov eax,DWORD PTR[88+rsi] + add r11d,2399980690 + movzx ebx,bl + xor r12d,r9d + mov DWORD PTR[84+rsi],edx + add r11d,r12d + add cl,al + rol r11d,10 + mov r12d,-1 + pinsrw xmm1,WORD PTR[rbx*4+rdi],2 + + add r11d,r8d + mov edx,DWORD PTR[rcx*4+rdi] + xor r12d,r9d + mov DWORD PTR[rcx*4+rdi],eax + or r12d,r11d + add r10d,DWORD PTR[40+r15] + add al,dl + mov ebx,DWORD PTR[92+rsi] + add r10d,4293915773 + movzx eax,al + xor r12d,r8d + mov DWORD PTR[88+rsi],edx + add r10d,r12d + add cl,bl + rol r10d,15 + mov r12d,-1 + pinsrw xmm0,WORD PTR[rax*4+rdi],3 + + add r10d,r11d + mov edx,DWORD PTR[rcx*4+rdi] + xor r12d,r8d + mov DWORD PTR[rcx*4+rdi],ebx + or r12d,r10d + add r9d,DWORD PTR[4+r15] + add bl,dl + mov eax,DWORD PTR[96+rsi] + add r9d,2240044497 + movzx ebx,bl + xor r12d,r11d + mov DWORD PTR[92+rsi],edx + add r9d,r12d + add cl,al + rol r9d,21 + mov r12d,-1 + pinsrw xmm1,WORD PTR[rbx*4+rdi],3 + + add r9d,r10d + mov edx,DWORD PTR[rcx*4+rdi] + xor r12d,r11d + mov DWORD PTR[rcx*4+rdi],eax + or r12d,r9d + add r8d,DWORD PTR[32+r15] + add al,dl + mov ebx,DWORD PTR[100+rsi] + add r8d,1873313359 + movzx eax,al + xor r12d,r10d + mov DWORD PTR[96+rsi],edx + add r8d,r12d + add cl,bl + rol r8d,6 + mov r12d,-1 + pinsrw xmm0,WORD PTR[rax*4+rdi],4 + + add r8d,r9d + mov edx,DWORD PTR[rcx*4+rdi] + xor r12d,r10d + mov DWORD PTR[rcx*4+rdi],ebx + or r12d,r8d + add r11d,DWORD PTR[60+r15] + add bl,dl + mov eax,DWORD PTR[104+rsi] + add r11d,4264355552 + movzx ebx,bl + xor r12d,r9d + mov DWORD PTR[100+rsi],edx + add r11d,r12d + add cl,al + rol r11d,10 + mov r12d,-1 + pinsrw xmm1,WORD PTR[rbx*4+rdi],4 + + add r11d,r8d + mov edx,DWORD PTR[rcx*4+rdi] + xor r12d,r9d + mov DWORD PTR[rcx*4+rdi],eax + or r12d,r11d + add r10d,DWORD PTR[24+r15] + add al,dl + mov ebx,DWORD PTR[108+rsi] + add r10d,2734768916 + movzx eax,al + xor r12d,r8d + mov DWORD PTR[104+rsi],edx + add r10d,r12d + add cl,bl + rol r10d,15 + mov r12d,-1 + pinsrw xmm0,WORD PTR[rax*4+rdi],5 + + add r10d,r11d + mov edx,DWORD PTR[rcx*4+rdi] + xor r12d,r8d + mov DWORD PTR[rcx*4+rdi],ebx + or r12d,r10d + add r9d,DWORD PTR[52+r15] + add bl,dl + mov eax,DWORD PTR[112+rsi] + add r9d,1309151649 + movzx ebx,bl + xor r12d,r11d + mov DWORD PTR[108+rsi],edx + add r9d,r12d + add cl,al + rol r9d,21 + mov r12d,-1 + pinsrw xmm1,WORD PTR[rbx*4+rdi],5 + + add r9d,r10d + mov edx,DWORD PTR[rcx*4+rdi] + xor r12d,r11d + mov DWORD PTR[rcx*4+rdi],eax + or r12d,r9d + add r8d,DWORD PTR[16+r15] + add al,dl + mov ebx,DWORD PTR[116+rsi] + add r8d,4149444226 + movzx eax,al + xor r12d,r10d + mov DWORD PTR[112+rsi],edx + add r8d,r12d + add cl,bl + rol r8d,6 + mov r12d,-1 + pinsrw xmm0,WORD PTR[rax*4+rdi],6 + + add r8d,r9d + mov edx,DWORD PTR[rcx*4+rdi] + xor r12d,r10d + mov DWORD PTR[rcx*4+rdi],ebx + or r12d,r8d + add r11d,DWORD PTR[44+r15] + add bl,dl + mov eax,DWORD PTR[120+rsi] + add r11d,3174756917 + movzx ebx,bl + xor r12d,r9d + mov DWORD PTR[116+rsi],edx + add r11d,r12d + add cl,al + rol r11d,10 + mov r12d,-1 + pinsrw xmm1,WORD PTR[rbx*4+rdi],6 + + add r11d,r8d + mov edx,DWORD PTR[rcx*4+rdi] + xor r12d,r9d + mov DWORD PTR[rcx*4+rdi],eax + or r12d,r11d + add r10d,DWORD PTR[8+r15] + add al,dl + mov ebx,DWORD PTR[124+rsi] + add r10d,718787259 + movzx eax,al + xor r12d,r8d + mov DWORD PTR[120+rsi],edx + add r10d,r12d + add cl,bl + rol r10d,15 + mov r12d,-1 + pinsrw xmm0,WORD PTR[rax*4+rdi],7 + + add r10d,r11d + movdqu xmm5,XMMWORD PTR[48+r13] + add bpl,32 + mov edx,DWORD PTR[rcx*4+rdi] + xor r12d,r8d + mov DWORD PTR[rcx*4+rdi],ebx + or r12d,r10d + add r9d,DWORD PTR[36+r15] + add bl,dl + mov eax,DWORD PTR[rbp*4+rdi] + add r9d,3951481745 + movzx ebx,bl + xor r12d,r11d + mov DWORD PTR[124+rsi],edx + add r9d,r12d + add cl,al + rol r9d,21 + mov r12d,-1 + pinsrw xmm1,WORD PTR[rbx*4+rdi],7 + + add r9d,r10d + mov rsi,rbp + xor rbp,rbp + mov bpl,sil + mov rsi,rcx + xor rcx,rcx + mov cl,sil + lea rsi,QWORD PTR[rbp*4+rdi] + psllq xmm1,8 + pxor xmm5,xmm0 + pxor xmm5,xmm1 + add r8d,DWORD PTR[rsp] + add r9d,DWORD PTR[4+rsp] + add r10d,DWORD PTR[8+rsp] + add r11d,DWORD PTR[12+rsp] + + movdqu XMMWORD PTR[r13*1+r14],xmm2 + movdqu XMMWORD PTR[16+r13*1+r14],xmm3 + movdqu XMMWORD PTR[32+r13*1+r14],xmm4 + movdqu XMMWORD PTR[48+r13*1+r14],xmm5 + lea r15,QWORD PTR[64+r15] + lea r13,QWORD PTR[64+r13] + cmp r15,QWORD PTR[16+rsp] + jb $L$oop + + mov r12,QWORD PTR[24+rsp] + sub cl,al + mov DWORD PTR[r12],r8d + mov DWORD PTR[4+r12],r9d + mov DWORD PTR[8+r12],r10d + mov DWORD PTR[12+r12],r11d + sub bpl,1 + mov DWORD PTR[((-8))+rdi],ebp + mov DWORD PTR[((-4))+rdi],ecx + + mov r15,QWORD PTR[40+rsp] + mov r14,QWORD PTR[48+rsp] + mov r13,QWORD PTR[56+rsp] + mov r12,QWORD PTR[64+rsp] + mov rbp,QWORD PTR[72+rsp] + mov rbx,QWORD PTR[80+rsp] + lea rsp,QWORD PTR[88+rsp] +$L$epilogue:: +$L$abort:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_rc4_md5_enc:: +rc4_md5_enc ENDP +EXTERN __imp_RtlVirtualUnwind:NEAR + +ALIGN 16 +se_handler PROC PRIVATE + push rsi + push rdi + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + pushfq + sub rsp,64 + + mov rax,QWORD PTR[120+r8] + mov rbx,QWORD PTR[248+r8] + + lea r10,QWORD PTR[$L$body] + cmp rbx,r10 + jb $L$in_prologue + + mov rax,QWORD PTR[152+r8] + + lea r10,QWORD PTR[$L$epilogue] + cmp rbx,r10 + jae $L$in_prologue + + mov r15,QWORD PTR[40+rax] + mov r14,QWORD PTR[48+rax] + mov r13,QWORD PTR[56+rax] + mov r12,QWORD PTR[64+rax] + mov rbp,QWORD PTR[72+rax] + mov rbx,QWORD PTR[80+rax] + lea rax,QWORD PTR[88+rax] + + mov QWORD PTR[144+r8],rbx + mov QWORD PTR[160+r8],rbp + mov QWORD PTR[216+r8],r12 + mov QWORD PTR[224+r8],r13 + mov QWORD PTR[232+r8],r14 + mov QWORD PTR[240+r8],r15 + +$L$in_prologue:: + mov rdi,QWORD PTR[8+rax] + mov rsi,QWORD PTR[16+rax] + mov QWORD PTR[152+r8],rax + mov QWORD PTR[168+r8],rsi + mov QWORD PTR[176+r8],rdi + + mov rdi,QWORD PTR[40+r9] + mov rsi,r8 + mov ecx,154 + DD 0a548f3fch + + mov rsi,r9 + xor rcx,rcx + mov rdx,QWORD PTR[8+rsi] + mov r8,QWORD PTR[rsi] + mov r9,QWORD PTR[16+rsi] + mov r10,QWORD PTR[40+rsi] + lea r11,QWORD PTR[56+rsi] + lea r12,QWORD PTR[24+rsi] + mov QWORD PTR[32+rsp],r10 + mov QWORD PTR[40+rsp],r11 + mov QWORD PTR[48+rsp],r12 + mov QWORD PTR[56+rsp],rcx + call QWORD PTR[__imp_RtlVirtualUnwind] + + mov eax,1 + add rsp,64 + popfq + pop r15 + pop r14 + pop r13 + pop r12 + pop rbp + pop rbx + pop rdi + pop rsi + DB 0F3h,0C3h ;repret +se_handler ENDP + +.text$ ENDS +.pdata SEGMENT READONLY ALIGN(4) +ALIGN 4 + DD imagerel $L$SEH_begin_rc4_md5_enc + DD imagerel $L$SEH_end_rc4_md5_enc + DD imagerel $L$SEH_info_rc4_md5_enc + +.pdata ENDS +.xdata SEGMENT READONLY ALIGN(8) +ALIGN 8 +$L$SEH_info_rc4_md5_enc:: +DB 9,0,0,0 + DD imagerel se_handler + +.xdata ENDS +END diff --git a/deps/openssl/asm_obsolete/x64-win32-masm/rc4/rc4-x86_64.asm b/deps/openssl/asm_obsolete/x64-win32-masm/rc4/rc4-x86_64.asm new file mode 100644 index 00000000000000..a0e5553247b032 --- /dev/null +++ b/deps/openssl/asm_obsolete/x64-win32-masm/rc4/rc4-x86_64.asm @@ -0,0 +1,771 @@ +OPTION DOTNAME +.text$ SEGMENT ALIGN(256) 'CODE' +EXTERN OPENSSL_ia32cap_P:NEAR + +PUBLIC RC4 + +ALIGN 16 +RC4 PROC PUBLIC + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_RC4:: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + mov rcx,r9 + + or rsi,rsi + jne $L$entry + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + DB 0F3h,0C3h ;repret +$L$entry:: + push rbx + push r12 + push r13 +$L$prologue:: + mov r11,rsi + mov r12,rdx + mov r13,rcx + xor r10,r10 + xor rcx,rcx + + lea rdi,QWORD PTR[8+rdi] + mov r10b,BYTE PTR[((-8))+rdi] + mov cl,BYTE PTR[((-4))+rdi] + cmp DWORD PTR[256+rdi],-1 + je $L$RC4_CHAR + mov r8d,DWORD PTR[OPENSSL_ia32cap_P] + xor rbx,rbx + inc r10b + sub rbx,r10 + sub r13,r12 + mov eax,DWORD PTR[r10*4+rdi] + test r11,-16 + jz $L$loop1 + bt r8d,30 + jc $L$intel + and rbx,7 + lea rsi,QWORD PTR[1+r10] + jz $L$oop8 + sub r11,rbx +$L$oop8_warmup:: + add cl,al + mov edx,DWORD PTR[rcx*4+rdi] + mov DWORD PTR[rcx*4+rdi],eax + mov DWORD PTR[r10*4+rdi],edx + add al,dl + inc r10b + mov edx,DWORD PTR[rax*4+rdi] + mov eax,DWORD PTR[r10*4+rdi] + xor dl,BYTE PTR[r12] + mov BYTE PTR[r13*1+r12],dl + lea r12,QWORD PTR[1+r12] + dec rbx + jnz $L$oop8_warmup + + lea rsi,QWORD PTR[1+r10] + jmp $L$oop8 +ALIGN 16 +$L$oop8:: + add cl,al + mov edx,DWORD PTR[rcx*4+rdi] + mov DWORD PTR[rcx*4+rdi],eax + mov ebx,DWORD PTR[rsi*4+rdi] + ror r8,8 + mov DWORD PTR[r10*4+rdi],edx + add dl,al + mov r8b,BYTE PTR[rdx*4+rdi] + add cl,bl + mov edx,DWORD PTR[rcx*4+rdi] + mov DWORD PTR[rcx*4+rdi],ebx + mov eax,DWORD PTR[4+rsi*4+rdi] + ror r8,8 + mov DWORD PTR[4+r10*4+rdi],edx + add dl,bl + mov r8b,BYTE PTR[rdx*4+rdi] + add cl,al + mov edx,DWORD PTR[rcx*4+rdi] + mov DWORD PTR[rcx*4+rdi],eax + mov ebx,DWORD PTR[8+rsi*4+rdi] + ror r8,8 + mov DWORD PTR[8+r10*4+rdi],edx + add dl,al + mov r8b,BYTE PTR[rdx*4+rdi] + add cl,bl + mov edx,DWORD PTR[rcx*4+rdi] + mov DWORD PTR[rcx*4+rdi],ebx + mov eax,DWORD PTR[12+rsi*4+rdi] + ror r8,8 + mov DWORD PTR[12+r10*4+rdi],edx + add dl,bl + mov r8b,BYTE PTR[rdx*4+rdi] + add cl,al + mov edx,DWORD PTR[rcx*4+rdi] + mov DWORD PTR[rcx*4+rdi],eax + mov ebx,DWORD PTR[16+rsi*4+rdi] + ror r8,8 + mov DWORD PTR[16+r10*4+rdi],edx + add dl,al + mov r8b,BYTE PTR[rdx*4+rdi] + add cl,bl + mov edx,DWORD PTR[rcx*4+rdi] + mov DWORD PTR[rcx*4+rdi],ebx + mov eax,DWORD PTR[20+rsi*4+rdi] + ror r8,8 + mov DWORD PTR[20+r10*4+rdi],edx + add dl,bl + mov r8b,BYTE PTR[rdx*4+rdi] + add cl,al + mov edx,DWORD PTR[rcx*4+rdi] + mov DWORD PTR[rcx*4+rdi],eax + mov ebx,DWORD PTR[24+rsi*4+rdi] + ror r8,8 + mov DWORD PTR[24+r10*4+rdi],edx + add dl,al + mov r8b,BYTE PTR[rdx*4+rdi] + add sil,8 + add cl,bl + mov edx,DWORD PTR[rcx*4+rdi] + mov DWORD PTR[rcx*4+rdi],ebx + mov eax,DWORD PTR[((-4))+rsi*4+rdi] + ror r8,8 + mov DWORD PTR[28+r10*4+rdi],edx + add dl,bl + mov r8b,BYTE PTR[rdx*4+rdi] + add r10b,8 + ror r8,8 + sub r11,8 + + xor r8,QWORD PTR[r12] + mov QWORD PTR[r13*1+r12],r8 + lea r12,QWORD PTR[8+r12] + + test r11,-8 + jnz $L$oop8 + cmp r11,0 + jne $L$loop1 + jmp $L$exit + +ALIGN 16 +$L$intel:: + test r11,-32 + jz $L$loop1 + and rbx,15 + jz $L$oop16_is_hot + sub r11,rbx +$L$oop16_warmup:: + add cl,al + mov edx,DWORD PTR[rcx*4+rdi] + mov DWORD PTR[rcx*4+rdi],eax + mov DWORD PTR[r10*4+rdi],edx + add al,dl + inc r10b + mov edx,DWORD PTR[rax*4+rdi] + mov eax,DWORD PTR[r10*4+rdi] + xor dl,BYTE PTR[r12] + mov BYTE PTR[r13*1+r12],dl + lea r12,QWORD PTR[1+r12] + dec rbx + jnz $L$oop16_warmup + + mov rbx,rcx + xor rcx,rcx + mov cl,bl + +$L$oop16_is_hot:: + lea rsi,QWORD PTR[r10*4+rdi] + add cl,al + mov edx,DWORD PTR[rcx*4+rdi] + pxor xmm0,xmm0 + mov DWORD PTR[rcx*4+rdi],eax + add al,dl + mov ebx,DWORD PTR[4+rsi] + movzx eax,al + mov DWORD PTR[rsi],edx + add cl,bl + pinsrw xmm0,WORD PTR[rax*4+rdi],0 + jmp $L$oop16_enter +ALIGN 16 +$L$oop16:: + add cl,al + mov edx,DWORD PTR[rcx*4+rdi] + pxor xmm2,xmm0 + psllq xmm1,8 + pxor xmm0,xmm0 + mov DWORD PTR[rcx*4+rdi],eax + add al,dl + mov ebx,DWORD PTR[4+rsi] + movzx eax,al + mov DWORD PTR[rsi],edx + pxor xmm2,xmm1 + add cl,bl + pinsrw xmm0,WORD PTR[rax*4+rdi],0 + movdqu XMMWORD PTR[r13*1+r12],xmm2 + lea r12,QWORD PTR[16+r12] +$L$oop16_enter:: + mov edx,DWORD PTR[rcx*4+rdi] + pxor xmm1,xmm1 + mov DWORD PTR[rcx*4+rdi],ebx + add bl,dl + mov eax,DWORD PTR[8+rsi] + movzx ebx,bl + mov DWORD PTR[4+rsi],edx + add cl,al + pinsrw xmm1,WORD PTR[rbx*4+rdi],0 + mov edx,DWORD PTR[rcx*4+rdi] + mov DWORD PTR[rcx*4+rdi],eax + add al,dl + mov ebx,DWORD PTR[12+rsi] + movzx eax,al + mov DWORD PTR[8+rsi],edx + add cl,bl + pinsrw xmm0,WORD PTR[rax*4+rdi],1 + mov edx,DWORD PTR[rcx*4+rdi] + mov DWORD PTR[rcx*4+rdi],ebx + add bl,dl + mov eax,DWORD PTR[16+rsi] + movzx ebx,bl + mov DWORD PTR[12+rsi],edx + add cl,al + pinsrw xmm1,WORD PTR[rbx*4+rdi],1 + mov edx,DWORD PTR[rcx*4+rdi] + mov DWORD PTR[rcx*4+rdi],eax + add al,dl + mov ebx,DWORD PTR[20+rsi] + movzx eax,al + mov DWORD PTR[16+rsi],edx + add cl,bl + pinsrw xmm0,WORD PTR[rax*4+rdi],2 + mov edx,DWORD PTR[rcx*4+rdi] + mov DWORD PTR[rcx*4+rdi],ebx + add bl,dl + mov eax,DWORD PTR[24+rsi] + movzx ebx,bl + mov DWORD PTR[20+rsi],edx + add cl,al + pinsrw xmm1,WORD PTR[rbx*4+rdi],2 + mov edx,DWORD PTR[rcx*4+rdi] + mov DWORD PTR[rcx*4+rdi],eax + add al,dl + mov ebx,DWORD PTR[28+rsi] + movzx eax,al + mov DWORD PTR[24+rsi],edx + add cl,bl + pinsrw xmm0,WORD PTR[rax*4+rdi],3 + mov edx,DWORD PTR[rcx*4+rdi] + mov DWORD PTR[rcx*4+rdi],ebx + add bl,dl + mov eax,DWORD PTR[32+rsi] + movzx ebx,bl + mov DWORD PTR[28+rsi],edx + add cl,al + pinsrw xmm1,WORD PTR[rbx*4+rdi],3 + mov edx,DWORD PTR[rcx*4+rdi] + mov DWORD PTR[rcx*4+rdi],eax + add al,dl + mov ebx,DWORD PTR[36+rsi] + movzx eax,al + mov DWORD PTR[32+rsi],edx + add cl,bl + pinsrw xmm0,WORD PTR[rax*4+rdi],4 + mov edx,DWORD PTR[rcx*4+rdi] + mov DWORD PTR[rcx*4+rdi],ebx + add bl,dl + mov eax,DWORD PTR[40+rsi] + movzx ebx,bl + mov DWORD PTR[36+rsi],edx + add cl,al + pinsrw xmm1,WORD PTR[rbx*4+rdi],4 + mov edx,DWORD PTR[rcx*4+rdi] + mov DWORD PTR[rcx*4+rdi],eax + add al,dl + mov ebx,DWORD PTR[44+rsi] + movzx eax,al + mov DWORD PTR[40+rsi],edx + add cl,bl + pinsrw xmm0,WORD PTR[rax*4+rdi],5 + mov edx,DWORD PTR[rcx*4+rdi] + mov DWORD PTR[rcx*4+rdi],ebx + add bl,dl + mov eax,DWORD PTR[48+rsi] + movzx ebx,bl + mov DWORD PTR[44+rsi],edx + add cl,al + pinsrw xmm1,WORD PTR[rbx*4+rdi],5 + mov edx,DWORD PTR[rcx*4+rdi] + mov DWORD PTR[rcx*4+rdi],eax + add al,dl + mov ebx,DWORD PTR[52+rsi] + movzx eax,al + mov DWORD PTR[48+rsi],edx + add cl,bl + pinsrw xmm0,WORD PTR[rax*4+rdi],6 + mov edx,DWORD PTR[rcx*4+rdi] + mov DWORD PTR[rcx*4+rdi],ebx + add bl,dl + mov eax,DWORD PTR[56+rsi] + movzx ebx,bl + mov DWORD PTR[52+rsi],edx + add cl,al + pinsrw xmm1,WORD PTR[rbx*4+rdi],6 + mov edx,DWORD PTR[rcx*4+rdi] + mov DWORD PTR[rcx*4+rdi],eax + add al,dl + mov ebx,DWORD PTR[60+rsi] + movzx eax,al + mov DWORD PTR[56+rsi],edx + add cl,bl + pinsrw xmm0,WORD PTR[rax*4+rdi],7 + add r10b,16 + movdqu xmm2,XMMWORD PTR[r12] + mov edx,DWORD PTR[rcx*4+rdi] + mov DWORD PTR[rcx*4+rdi],ebx + add bl,dl + movzx ebx,bl + mov DWORD PTR[60+rsi],edx + lea rsi,QWORD PTR[r10*4+rdi] + pinsrw xmm1,WORD PTR[rbx*4+rdi],7 + mov eax,DWORD PTR[rsi] + mov rbx,rcx + xor rcx,rcx + sub r11,16 + mov cl,bl + test r11,-16 + jnz $L$oop16 + + psllq xmm1,8 + pxor xmm2,xmm0 + pxor xmm2,xmm1 + movdqu XMMWORD PTR[r13*1+r12],xmm2 + lea r12,QWORD PTR[16+r12] + + cmp r11,0 + jne $L$loop1 + jmp $L$exit + +ALIGN 16 +$L$loop1:: + add cl,al + mov edx,DWORD PTR[rcx*4+rdi] + mov DWORD PTR[rcx*4+rdi],eax + mov DWORD PTR[r10*4+rdi],edx + add al,dl + inc r10b + mov edx,DWORD PTR[rax*4+rdi] + mov eax,DWORD PTR[r10*4+rdi] + xor dl,BYTE PTR[r12] + mov BYTE PTR[r13*1+r12],dl + lea r12,QWORD PTR[1+r12] + dec r11 + jnz $L$loop1 + jmp $L$exit + +ALIGN 16 +$L$RC4_CHAR:: + add r10b,1 + movzx eax,BYTE PTR[r10*1+rdi] + test r11,-8 + jz $L$cloop1 + jmp $L$cloop8 +ALIGN 16 +$L$cloop8:: + mov r8d,DWORD PTR[r12] + mov r9d,DWORD PTR[4+r12] + add cl,al + lea rsi,QWORD PTR[1+r10] + movzx edx,BYTE PTR[rcx*1+rdi] + movzx esi,sil + movzx ebx,BYTE PTR[rsi*1+rdi] + mov BYTE PTR[rcx*1+rdi],al + cmp rcx,rsi + mov BYTE PTR[r10*1+rdi],dl + jne $L$cmov0 + mov rbx,rax +$L$cmov0:: + add dl,al + xor r8b,BYTE PTR[rdx*1+rdi] + ror r8d,8 + add cl,bl + lea r10,QWORD PTR[1+rsi] + movzx edx,BYTE PTR[rcx*1+rdi] + movzx r10d,r10b + movzx eax,BYTE PTR[r10*1+rdi] + mov BYTE PTR[rcx*1+rdi],bl + cmp rcx,r10 + mov BYTE PTR[rsi*1+rdi],dl + jne $L$cmov1 + mov rax,rbx +$L$cmov1:: + add dl,bl + xor r8b,BYTE PTR[rdx*1+rdi] + ror r8d,8 + add cl,al + lea rsi,QWORD PTR[1+r10] + movzx edx,BYTE PTR[rcx*1+rdi] + movzx esi,sil + movzx ebx,BYTE PTR[rsi*1+rdi] + mov BYTE PTR[rcx*1+rdi],al + cmp rcx,rsi + mov BYTE PTR[r10*1+rdi],dl + jne $L$cmov2 + mov rbx,rax +$L$cmov2:: + add dl,al + xor r8b,BYTE PTR[rdx*1+rdi] + ror r8d,8 + add cl,bl + lea r10,QWORD PTR[1+rsi] + movzx edx,BYTE PTR[rcx*1+rdi] + movzx r10d,r10b + movzx eax,BYTE PTR[r10*1+rdi] + mov BYTE PTR[rcx*1+rdi],bl + cmp rcx,r10 + mov BYTE PTR[rsi*1+rdi],dl + jne $L$cmov3 + mov rax,rbx +$L$cmov3:: + add dl,bl + xor r8b,BYTE PTR[rdx*1+rdi] + ror r8d,8 + add cl,al + lea rsi,QWORD PTR[1+r10] + movzx edx,BYTE PTR[rcx*1+rdi] + movzx esi,sil + movzx ebx,BYTE PTR[rsi*1+rdi] + mov BYTE PTR[rcx*1+rdi],al + cmp rcx,rsi + mov BYTE PTR[r10*1+rdi],dl + jne $L$cmov4 + mov rbx,rax +$L$cmov4:: + add dl,al + xor r9b,BYTE PTR[rdx*1+rdi] + ror r9d,8 + add cl,bl + lea r10,QWORD PTR[1+rsi] + movzx edx,BYTE PTR[rcx*1+rdi] + movzx r10d,r10b + movzx eax,BYTE PTR[r10*1+rdi] + mov BYTE PTR[rcx*1+rdi],bl + cmp rcx,r10 + mov BYTE PTR[rsi*1+rdi],dl + jne $L$cmov5 + mov rax,rbx +$L$cmov5:: + add dl,bl + xor r9b,BYTE PTR[rdx*1+rdi] + ror r9d,8 + add cl,al + lea rsi,QWORD PTR[1+r10] + movzx edx,BYTE PTR[rcx*1+rdi] + movzx esi,sil + movzx ebx,BYTE PTR[rsi*1+rdi] + mov BYTE PTR[rcx*1+rdi],al + cmp rcx,rsi + mov BYTE PTR[r10*1+rdi],dl + jne $L$cmov6 + mov rbx,rax +$L$cmov6:: + add dl,al + xor r9b,BYTE PTR[rdx*1+rdi] + ror r9d,8 + add cl,bl + lea r10,QWORD PTR[1+rsi] + movzx edx,BYTE PTR[rcx*1+rdi] + movzx r10d,r10b + movzx eax,BYTE PTR[r10*1+rdi] + mov BYTE PTR[rcx*1+rdi],bl + cmp rcx,r10 + mov BYTE PTR[rsi*1+rdi],dl + jne $L$cmov7 + mov rax,rbx +$L$cmov7:: + add dl,bl + xor r9b,BYTE PTR[rdx*1+rdi] + ror r9d,8 + lea r11,QWORD PTR[((-8))+r11] + mov DWORD PTR[r13],r8d + lea r12,QWORD PTR[8+r12] + mov DWORD PTR[4+r13],r9d + lea r13,QWORD PTR[8+r13] + + test r11,-8 + jnz $L$cloop8 + cmp r11,0 + jne $L$cloop1 + jmp $L$exit +ALIGN 16 +$L$cloop1:: + add cl,al + movzx ecx,cl + movzx edx,BYTE PTR[rcx*1+rdi] + mov BYTE PTR[rcx*1+rdi],al + mov BYTE PTR[r10*1+rdi],dl + add dl,al + add r10b,1 + movzx edx,dl + movzx r10d,r10b + movzx edx,BYTE PTR[rdx*1+rdi] + movzx eax,BYTE PTR[r10*1+rdi] + xor dl,BYTE PTR[r12] + lea r12,QWORD PTR[1+r12] + mov BYTE PTR[r13],dl + lea r13,QWORD PTR[1+r13] + sub r11,1 + jnz $L$cloop1 + jmp $L$exit + +ALIGN 16 +$L$exit:: + sub r10b,1 + mov DWORD PTR[((-8))+rdi],r10d + mov DWORD PTR[((-4))+rdi],ecx + + mov r13,QWORD PTR[rsp] + mov r12,QWORD PTR[8+rsp] + mov rbx,QWORD PTR[16+rsp] + add rsp,24 +$L$epilogue:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_RC4:: +RC4 ENDP +PUBLIC private_RC4_set_key + +ALIGN 16 +private_RC4_set_key PROC PUBLIC + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_private_RC4_set_key:: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + + + lea rdi,QWORD PTR[8+rdi] + lea rdx,QWORD PTR[rsi*1+rdx] + neg rsi + mov rcx,rsi + xor eax,eax + xor r9,r9 + xor r10,r10 + xor r11,r11 + + mov r8d,DWORD PTR[OPENSSL_ia32cap_P] + bt r8d,20 + jc $L$c1stloop + jmp $L$w1stloop + +ALIGN 16 +$L$w1stloop:: + mov DWORD PTR[rax*4+rdi],eax + add al,1 + jnc $L$w1stloop + + xor r9,r9 + xor r8,r8 +ALIGN 16 +$L$w2ndloop:: + mov r10d,DWORD PTR[r9*4+rdi] + add r8b,BYTE PTR[rsi*1+rdx] + add r8b,r10b + add rsi,1 + mov r11d,DWORD PTR[r8*4+rdi] + cmovz rsi,rcx + mov DWORD PTR[r8*4+rdi],r10d + mov DWORD PTR[r9*4+rdi],r11d + add r9b,1 + jnc $L$w2ndloop + jmp $L$exit_key + +ALIGN 16 +$L$c1stloop:: + mov BYTE PTR[rax*1+rdi],al + add al,1 + jnc $L$c1stloop + + xor r9,r9 + xor r8,r8 +ALIGN 16 +$L$c2ndloop:: + mov r10b,BYTE PTR[r9*1+rdi] + add r8b,BYTE PTR[rsi*1+rdx] + add r8b,r10b + add rsi,1 + mov r11b,BYTE PTR[r8*1+rdi] + jnz $L$cnowrap + mov rsi,rcx +$L$cnowrap:: + mov BYTE PTR[r8*1+rdi],r10b + mov BYTE PTR[r9*1+rdi],r11b + add r9b,1 + jnc $L$c2ndloop + mov DWORD PTR[256+rdi],-1 + +ALIGN 16 +$L$exit_key:: + xor eax,eax + mov DWORD PTR[((-8))+rdi],eax + mov DWORD PTR[((-4))+rdi],eax + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_private_RC4_set_key:: +private_RC4_set_key ENDP + +PUBLIC RC4_options + +ALIGN 16 +RC4_options PROC PUBLIC + lea rax,QWORD PTR[$L$opts] + mov edx,DWORD PTR[OPENSSL_ia32cap_P] + bt edx,20 + jc $L$8xchar + bt edx,30 + jnc $L$done + add rax,25 + DB 0F3h,0C3h ;repret +$L$8xchar:: + add rax,12 +$L$done:: + DB 0F3h,0C3h ;repret +ALIGN 64 +$L$opts:: +DB 114,99,52,40,56,120,44,105,110,116,41,0 +DB 114,99,52,40,56,120,44,99,104,97,114,41,0 +DB 114,99,52,40,49,54,120,44,105,110,116,41,0 +DB 82,67,52,32,102,111,114,32,120,56,54,95,54,52,44,32 +DB 67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97 +DB 112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103 +DB 62,0 +ALIGN 64 +RC4_options ENDP +EXTERN __imp_RtlVirtualUnwind:NEAR + +ALIGN 16 +stream_se_handler PROC PRIVATE + push rsi + push rdi + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + pushfq + sub rsp,64 + + mov rax,QWORD PTR[120+r8] + mov rbx,QWORD PTR[248+r8] + + lea r10,QWORD PTR[$L$prologue] + cmp rbx,r10 + jb $L$in_prologue + + mov rax,QWORD PTR[152+r8] + + lea r10,QWORD PTR[$L$epilogue] + cmp rbx,r10 + jae $L$in_prologue + + lea rax,QWORD PTR[24+rax] + + mov rbx,QWORD PTR[((-8))+rax] + mov r12,QWORD PTR[((-16))+rax] + mov r13,QWORD PTR[((-24))+rax] + mov QWORD PTR[144+r8],rbx + mov QWORD PTR[216+r8],r12 + mov QWORD PTR[224+r8],r13 + +$L$in_prologue:: + mov rdi,QWORD PTR[8+rax] + mov rsi,QWORD PTR[16+rax] + mov QWORD PTR[152+r8],rax + mov QWORD PTR[168+r8],rsi + mov QWORD PTR[176+r8],rdi + + jmp $L$common_seh_exit +stream_se_handler ENDP + + +ALIGN 16 +key_se_handler PROC PRIVATE + push rsi + push rdi + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + pushfq + sub rsp,64 + + mov rax,QWORD PTR[152+r8] + mov rdi,QWORD PTR[8+rax] + mov rsi,QWORD PTR[16+rax] + mov QWORD PTR[168+r8],rsi + mov QWORD PTR[176+r8],rdi + +$L$common_seh_exit:: + + mov rdi,QWORD PTR[40+r9] + mov rsi,r8 + mov ecx,154 + DD 0a548f3fch + + mov rsi,r9 + xor rcx,rcx + mov rdx,QWORD PTR[8+rsi] + mov r8,QWORD PTR[rsi] + mov r9,QWORD PTR[16+rsi] + mov r10,QWORD PTR[40+rsi] + lea r11,QWORD PTR[56+rsi] + lea r12,QWORD PTR[24+rsi] + mov QWORD PTR[32+rsp],r10 + mov QWORD PTR[40+rsp],r11 + mov QWORD PTR[48+rsp],r12 + mov QWORD PTR[56+rsp],rcx + call QWORD PTR[__imp_RtlVirtualUnwind] + + mov eax,1 + add rsp,64 + popfq + pop r15 + pop r14 + pop r13 + pop r12 + pop rbp + pop rbx + pop rdi + pop rsi + DB 0F3h,0C3h ;repret +key_se_handler ENDP + +.text$ ENDS +.pdata SEGMENT READONLY ALIGN(4) +ALIGN 4 + DD imagerel $L$SEH_begin_RC4 + DD imagerel $L$SEH_end_RC4 + DD imagerel $L$SEH_info_RC4 + + DD imagerel $L$SEH_begin_private_RC4_set_key + DD imagerel $L$SEH_end_private_RC4_set_key + DD imagerel $L$SEH_info_private_RC4_set_key + +.pdata ENDS +.xdata SEGMENT READONLY ALIGN(8) +ALIGN 8 +$L$SEH_info_RC4:: +DB 9,0,0,0 + DD imagerel stream_se_handler +$L$SEH_info_private_RC4_set_key:: +DB 9,0,0,0 + DD imagerel key_se_handler + +.xdata ENDS +END diff --git a/deps/openssl/asm_obsolete/x64-win32-masm/sha/sha1-mb-x86_64.asm b/deps/openssl/asm_obsolete/x64-win32-masm/sha/sha1-mb-x86_64.asm new file mode 100644 index 00000000000000..32919ea737333d --- /dev/null +++ b/deps/openssl/asm_obsolete/x64-win32-masm/sha/sha1-mb-x86_64.asm @@ -0,0 +1,3112 @@ +OPTION DOTNAME +.text$ SEGMENT ALIGN(256) 'CODE' + +EXTERN OPENSSL_ia32cap_P:NEAR + +PUBLIC sha1_multi_block + +ALIGN 32 +sha1_multi_block PROC PUBLIC + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_sha1_multi_block:: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + + + mov rcx,QWORD PTR[((OPENSSL_ia32cap_P+4))] + bt rcx,61 + jc _shaext_shortcut + mov rax,rsp + push rbx + push rbp + lea rsp,QWORD PTR[((-168))+rsp] + movaps XMMWORD PTR[rsp],xmm6 + movaps XMMWORD PTR[16+rsp],xmm7 + movaps XMMWORD PTR[32+rsp],xmm8 + movaps XMMWORD PTR[48+rsp],xmm9 + movaps XMMWORD PTR[(-120)+rax],xmm10 + movaps XMMWORD PTR[(-104)+rax],xmm11 + movaps XMMWORD PTR[(-88)+rax],xmm12 + movaps XMMWORD PTR[(-72)+rax],xmm13 + movaps XMMWORD PTR[(-56)+rax],xmm14 + movaps XMMWORD PTR[(-40)+rax],xmm15 + sub rsp,288 + and rsp,-256 + mov QWORD PTR[272+rsp],rax +$L$body:: + lea rbp,QWORD PTR[K_XX_XX] + lea rbx,QWORD PTR[256+rsp] + +$L$oop_grande:: + mov DWORD PTR[280+rsp],edx + xor edx,edx + mov r8,QWORD PTR[rsi] + mov ecx,DWORD PTR[8+rsi] + cmp ecx,edx + cmovg edx,ecx + test ecx,ecx + mov DWORD PTR[rbx],ecx + cmovle r8,rbp + mov r9,QWORD PTR[16+rsi] + mov ecx,DWORD PTR[24+rsi] + cmp ecx,edx + cmovg edx,ecx + test ecx,ecx + mov DWORD PTR[4+rbx],ecx + cmovle r9,rbp + mov r10,QWORD PTR[32+rsi] + mov ecx,DWORD PTR[40+rsi] + cmp ecx,edx + cmovg edx,ecx + test ecx,ecx + mov DWORD PTR[8+rbx],ecx + cmovle r10,rbp + mov r11,QWORD PTR[48+rsi] + mov ecx,DWORD PTR[56+rsi] + cmp ecx,edx + cmovg edx,ecx + test ecx,ecx + mov DWORD PTR[12+rbx],ecx + cmovle r11,rbp + test edx,edx + jz $L$done + + movdqu xmm10,XMMWORD PTR[rdi] + lea rax,QWORD PTR[128+rsp] + movdqu xmm11,XMMWORD PTR[32+rdi] + movdqu xmm12,XMMWORD PTR[64+rdi] + movdqu xmm13,XMMWORD PTR[96+rdi] + movdqu xmm14,XMMWORD PTR[128+rdi] + movdqa xmm5,XMMWORD PTR[96+rbp] + movdqa xmm15,XMMWORD PTR[((-32))+rbp] + jmp $L$oop + +ALIGN 32 +$L$oop:: + movd xmm0,DWORD PTR[r8] + lea r8,QWORD PTR[64+r8] + movd xmm2,DWORD PTR[r9] + lea r9,QWORD PTR[64+r9] + movd xmm3,DWORD PTR[r10] + lea r10,QWORD PTR[64+r10] + movd xmm4,DWORD PTR[r11] + lea r11,QWORD PTR[64+r11] + punpckldq xmm0,xmm3 + movd xmm1,DWORD PTR[((-60))+r8] + punpckldq xmm2,xmm4 + movd xmm9,DWORD PTR[((-60))+r9] + punpckldq xmm0,xmm2 + movd xmm8,DWORD PTR[((-60))+r10] +DB 102,15,56,0,197 + movd xmm7,DWORD PTR[((-60))+r11] + punpckldq xmm1,xmm8 + movdqa xmm8,xmm10 + paddd xmm14,xmm15 + punpckldq xmm9,xmm7 + movdqa xmm7,xmm11 + movdqa xmm6,xmm11 + pslld xmm8,5 + pandn xmm7,xmm13 + pand xmm6,xmm12 + punpckldq xmm1,xmm9 + movdqa xmm9,xmm10 + + movdqa XMMWORD PTR[(0-128)+rax],xmm0 + paddd xmm14,xmm0 + movd xmm2,DWORD PTR[((-56))+r8] + psrld xmm9,27 + pxor xmm6,xmm7 + movdqa xmm7,xmm11 + + por xmm8,xmm9 + movd xmm9,DWORD PTR[((-56))+r9] + pslld xmm7,30 + paddd xmm14,xmm6 + + psrld xmm11,2 + paddd xmm14,xmm8 +DB 102,15,56,0,205 + movd xmm8,DWORD PTR[((-56))+r10] + por xmm11,xmm7 + movd xmm7,DWORD PTR[((-56))+r11] + punpckldq xmm2,xmm8 + movdqa xmm8,xmm14 + paddd xmm13,xmm15 + punpckldq xmm9,xmm7 + movdqa xmm7,xmm10 + movdqa xmm6,xmm10 + pslld xmm8,5 + pandn xmm7,xmm12 + pand xmm6,xmm11 + punpckldq xmm2,xmm9 + movdqa xmm9,xmm14 + + movdqa XMMWORD PTR[(16-128)+rax],xmm1 + paddd xmm13,xmm1 + movd xmm3,DWORD PTR[((-52))+r8] + psrld xmm9,27 + pxor xmm6,xmm7 + movdqa xmm7,xmm10 + + por xmm8,xmm9 + movd xmm9,DWORD PTR[((-52))+r9] + pslld xmm7,30 + paddd xmm13,xmm6 + + psrld xmm10,2 + paddd xmm13,xmm8 +DB 102,15,56,0,213 + movd xmm8,DWORD PTR[((-52))+r10] + por xmm10,xmm7 + movd xmm7,DWORD PTR[((-52))+r11] + punpckldq xmm3,xmm8 + movdqa xmm8,xmm13 + paddd xmm12,xmm15 + punpckldq xmm9,xmm7 + movdqa xmm7,xmm14 + movdqa xmm6,xmm14 + pslld xmm8,5 + pandn xmm7,xmm11 + pand xmm6,xmm10 + punpckldq xmm3,xmm9 + movdqa xmm9,xmm13 + + movdqa XMMWORD PTR[(32-128)+rax],xmm2 + paddd xmm12,xmm2 + movd xmm4,DWORD PTR[((-48))+r8] + psrld xmm9,27 + pxor xmm6,xmm7 + movdqa xmm7,xmm14 + + por xmm8,xmm9 + movd xmm9,DWORD PTR[((-48))+r9] + pslld xmm7,30 + paddd xmm12,xmm6 + + psrld xmm14,2 + paddd xmm12,xmm8 +DB 102,15,56,0,221 + movd xmm8,DWORD PTR[((-48))+r10] + por xmm14,xmm7 + movd xmm7,DWORD PTR[((-48))+r11] + punpckldq xmm4,xmm8 + movdqa xmm8,xmm12 + paddd xmm11,xmm15 + punpckldq xmm9,xmm7 + movdqa xmm7,xmm13 + movdqa xmm6,xmm13 + pslld xmm8,5 + pandn xmm7,xmm10 + pand xmm6,xmm14 + punpckldq xmm4,xmm9 + movdqa xmm9,xmm12 + + movdqa XMMWORD PTR[(48-128)+rax],xmm3 + paddd xmm11,xmm3 + movd xmm0,DWORD PTR[((-44))+r8] + psrld xmm9,27 + pxor xmm6,xmm7 + movdqa xmm7,xmm13 + + por xmm8,xmm9 + movd xmm9,DWORD PTR[((-44))+r9] + pslld xmm7,30 + paddd xmm11,xmm6 + + psrld xmm13,2 + paddd xmm11,xmm8 +DB 102,15,56,0,229 + movd xmm8,DWORD PTR[((-44))+r10] + por xmm13,xmm7 + movd xmm7,DWORD PTR[((-44))+r11] + punpckldq xmm0,xmm8 + movdqa xmm8,xmm11 + paddd xmm10,xmm15 + punpckldq xmm9,xmm7 + movdqa xmm7,xmm12 + movdqa xmm6,xmm12 + pslld xmm8,5 + pandn xmm7,xmm14 + pand xmm6,xmm13 + punpckldq xmm0,xmm9 + movdqa xmm9,xmm11 + + movdqa XMMWORD PTR[(64-128)+rax],xmm4 + paddd xmm10,xmm4 + movd xmm1,DWORD PTR[((-40))+r8] + psrld xmm9,27 + pxor xmm6,xmm7 + movdqa xmm7,xmm12 + + por xmm8,xmm9 + movd xmm9,DWORD PTR[((-40))+r9] + pslld xmm7,30 + paddd xmm10,xmm6 + + psrld xmm12,2 + paddd xmm10,xmm8 +DB 102,15,56,0,197 + movd xmm8,DWORD PTR[((-40))+r10] + por xmm12,xmm7 + movd xmm7,DWORD PTR[((-40))+r11] + punpckldq xmm1,xmm8 + movdqa xmm8,xmm10 + paddd xmm14,xmm15 + punpckldq xmm9,xmm7 + movdqa xmm7,xmm11 + movdqa xmm6,xmm11 + pslld xmm8,5 + pandn xmm7,xmm13 + pand xmm6,xmm12 + punpckldq xmm1,xmm9 + movdqa xmm9,xmm10 + + movdqa XMMWORD PTR[(80-128)+rax],xmm0 + paddd xmm14,xmm0 + movd xmm2,DWORD PTR[((-36))+r8] + psrld xmm9,27 + pxor xmm6,xmm7 + movdqa xmm7,xmm11 + + por xmm8,xmm9 + movd xmm9,DWORD PTR[((-36))+r9] + pslld xmm7,30 + paddd xmm14,xmm6 + + psrld xmm11,2 + paddd xmm14,xmm8 +DB 102,15,56,0,205 + movd xmm8,DWORD PTR[((-36))+r10] + por xmm11,xmm7 + movd xmm7,DWORD PTR[((-36))+r11] + punpckldq xmm2,xmm8 + movdqa xmm8,xmm14 + paddd xmm13,xmm15 + punpckldq xmm9,xmm7 + movdqa xmm7,xmm10 + movdqa xmm6,xmm10 + pslld xmm8,5 + pandn xmm7,xmm12 + pand xmm6,xmm11 + punpckldq xmm2,xmm9 + movdqa xmm9,xmm14 + + movdqa XMMWORD PTR[(96-128)+rax],xmm1 + paddd xmm13,xmm1 + movd xmm3,DWORD PTR[((-32))+r8] + psrld xmm9,27 + pxor xmm6,xmm7 + movdqa xmm7,xmm10 + + por xmm8,xmm9 + movd xmm9,DWORD PTR[((-32))+r9] + pslld xmm7,30 + paddd xmm13,xmm6 + + psrld xmm10,2 + paddd xmm13,xmm8 +DB 102,15,56,0,213 + movd xmm8,DWORD PTR[((-32))+r10] + por xmm10,xmm7 + movd xmm7,DWORD PTR[((-32))+r11] + punpckldq xmm3,xmm8 + movdqa xmm8,xmm13 + paddd xmm12,xmm15 + punpckldq xmm9,xmm7 + movdqa xmm7,xmm14 + movdqa xmm6,xmm14 + pslld xmm8,5 + pandn xmm7,xmm11 + pand xmm6,xmm10 + punpckldq xmm3,xmm9 + movdqa xmm9,xmm13 + + movdqa XMMWORD PTR[(112-128)+rax],xmm2 + paddd xmm12,xmm2 + movd xmm4,DWORD PTR[((-28))+r8] + psrld xmm9,27 + pxor xmm6,xmm7 + movdqa xmm7,xmm14 + + por xmm8,xmm9 + movd xmm9,DWORD PTR[((-28))+r9] + pslld xmm7,30 + paddd xmm12,xmm6 + + psrld xmm14,2 + paddd xmm12,xmm8 +DB 102,15,56,0,221 + movd xmm8,DWORD PTR[((-28))+r10] + por xmm14,xmm7 + movd xmm7,DWORD PTR[((-28))+r11] + punpckldq xmm4,xmm8 + movdqa xmm8,xmm12 + paddd xmm11,xmm15 + punpckldq xmm9,xmm7 + movdqa xmm7,xmm13 + movdqa xmm6,xmm13 + pslld xmm8,5 + pandn xmm7,xmm10 + pand xmm6,xmm14 + punpckldq xmm4,xmm9 + movdqa xmm9,xmm12 + + movdqa XMMWORD PTR[(128-128)+rax],xmm3 + paddd xmm11,xmm3 + movd xmm0,DWORD PTR[((-24))+r8] + psrld xmm9,27 + pxor xmm6,xmm7 + movdqa xmm7,xmm13 + + por xmm8,xmm9 + movd xmm9,DWORD PTR[((-24))+r9] + pslld xmm7,30 + paddd xmm11,xmm6 + + psrld xmm13,2 + paddd xmm11,xmm8 +DB 102,15,56,0,229 + movd xmm8,DWORD PTR[((-24))+r10] + por xmm13,xmm7 + movd xmm7,DWORD PTR[((-24))+r11] + punpckldq xmm0,xmm8 + movdqa xmm8,xmm11 + paddd xmm10,xmm15 + punpckldq xmm9,xmm7 + movdqa xmm7,xmm12 + movdqa xmm6,xmm12 + pslld xmm8,5 + pandn xmm7,xmm14 + pand xmm6,xmm13 + punpckldq xmm0,xmm9 + movdqa xmm9,xmm11 + + movdqa XMMWORD PTR[(144-128)+rax],xmm4 + paddd xmm10,xmm4 + movd xmm1,DWORD PTR[((-20))+r8] + psrld xmm9,27 + pxor xmm6,xmm7 + movdqa xmm7,xmm12 + + por xmm8,xmm9 + movd xmm9,DWORD PTR[((-20))+r9] + pslld xmm7,30 + paddd xmm10,xmm6 + + psrld xmm12,2 + paddd xmm10,xmm8 +DB 102,15,56,0,197 + movd xmm8,DWORD PTR[((-20))+r10] + por xmm12,xmm7 + movd xmm7,DWORD PTR[((-20))+r11] + punpckldq xmm1,xmm8 + movdqa xmm8,xmm10 + paddd xmm14,xmm15 + punpckldq xmm9,xmm7 + movdqa xmm7,xmm11 + movdqa xmm6,xmm11 + pslld xmm8,5 + pandn xmm7,xmm13 + pand xmm6,xmm12 + punpckldq xmm1,xmm9 + movdqa xmm9,xmm10 + + movdqa XMMWORD PTR[(160-128)+rax],xmm0 + paddd xmm14,xmm0 + movd xmm2,DWORD PTR[((-16))+r8] + psrld xmm9,27 + pxor xmm6,xmm7 + movdqa xmm7,xmm11 + + por xmm8,xmm9 + movd xmm9,DWORD PTR[((-16))+r9] + pslld xmm7,30 + paddd xmm14,xmm6 + + psrld xmm11,2 + paddd xmm14,xmm8 +DB 102,15,56,0,205 + movd xmm8,DWORD PTR[((-16))+r10] + por xmm11,xmm7 + movd xmm7,DWORD PTR[((-16))+r11] + punpckldq xmm2,xmm8 + movdqa xmm8,xmm14 + paddd xmm13,xmm15 + punpckldq xmm9,xmm7 + movdqa xmm7,xmm10 + movdqa xmm6,xmm10 + pslld xmm8,5 + pandn xmm7,xmm12 + pand xmm6,xmm11 + punpckldq xmm2,xmm9 + movdqa xmm9,xmm14 + + movdqa XMMWORD PTR[(176-128)+rax],xmm1 + paddd xmm13,xmm1 + movd xmm3,DWORD PTR[((-12))+r8] + psrld xmm9,27 + pxor xmm6,xmm7 + movdqa xmm7,xmm10 + + por xmm8,xmm9 + movd xmm9,DWORD PTR[((-12))+r9] + pslld xmm7,30 + paddd xmm13,xmm6 + + psrld xmm10,2 + paddd xmm13,xmm8 +DB 102,15,56,0,213 + movd xmm8,DWORD PTR[((-12))+r10] + por xmm10,xmm7 + movd xmm7,DWORD PTR[((-12))+r11] + punpckldq xmm3,xmm8 + movdqa xmm8,xmm13 + paddd xmm12,xmm15 + punpckldq xmm9,xmm7 + movdqa xmm7,xmm14 + movdqa xmm6,xmm14 + pslld xmm8,5 + pandn xmm7,xmm11 + pand xmm6,xmm10 + punpckldq xmm3,xmm9 + movdqa xmm9,xmm13 + + movdqa XMMWORD PTR[(192-128)+rax],xmm2 + paddd xmm12,xmm2 + movd xmm4,DWORD PTR[((-8))+r8] + psrld xmm9,27 + pxor xmm6,xmm7 + movdqa xmm7,xmm14 + + por xmm8,xmm9 + movd xmm9,DWORD PTR[((-8))+r9] + pslld xmm7,30 + paddd xmm12,xmm6 + + psrld xmm14,2 + paddd xmm12,xmm8 +DB 102,15,56,0,221 + movd xmm8,DWORD PTR[((-8))+r10] + por xmm14,xmm7 + movd xmm7,DWORD PTR[((-8))+r11] + punpckldq xmm4,xmm8 + movdqa xmm8,xmm12 + paddd xmm11,xmm15 + punpckldq xmm9,xmm7 + movdqa xmm7,xmm13 + movdqa xmm6,xmm13 + pslld xmm8,5 + pandn xmm7,xmm10 + pand xmm6,xmm14 + punpckldq xmm4,xmm9 + movdqa xmm9,xmm12 + + movdqa XMMWORD PTR[(208-128)+rax],xmm3 + paddd xmm11,xmm3 + movd xmm0,DWORD PTR[((-4))+r8] + psrld xmm9,27 + pxor xmm6,xmm7 + movdqa xmm7,xmm13 + + por xmm8,xmm9 + movd xmm9,DWORD PTR[((-4))+r9] + pslld xmm7,30 + paddd xmm11,xmm6 + + psrld xmm13,2 + paddd xmm11,xmm8 +DB 102,15,56,0,229 + movd xmm8,DWORD PTR[((-4))+r10] + por xmm13,xmm7 + movdqa xmm1,XMMWORD PTR[((0-128))+rax] + movd xmm7,DWORD PTR[((-4))+r11] + punpckldq xmm0,xmm8 + movdqa xmm8,xmm11 + paddd xmm10,xmm15 + punpckldq xmm9,xmm7 + movdqa xmm7,xmm12 + movdqa xmm6,xmm12 + pslld xmm8,5 + prefetcht0 [63+r8] + pandn xmm7,xmm14 + pand xmm6,xmm13 + punpckldq xmm0,xmm9 + movdqa xmm9,xmm11 + + movdqa XMMWORD PTR[(224-128)+rax],xmm4 + paddd xmm10,xmm4 + psrld xmm9,27 + pxor xmm6,xmm7 + movdqa xmm7,xmm12 + prefetcht0 [63+r9] + + por xmm8,xmm9 + pslld xmm7,30 + paddd xmm10,xmm6 + prefetcht0 [63+r10] + + psrld xmm12,2 + paddd xmm10,xmm8 +DB 102,15,56,0,197 + prefetcht0 [63+r11] + por xmm12,xmm7 + movdqa xmm2,XMMWORD PTR[((16-128))+rax] + pxor xmm1,xmm3 + movdqa xmm3,XMMWORD PTR[((32-128))+rax] + + movdqa xmm8,xmm10 + pxor xmm1,XMMWORD PTR[((128-128))+rax] + paddd xmm14,xmm15 + movdqa xmm7,xmm11 + pslld xmm8,5 + pxor xmm1,xmm3 + movdqa xmm6,xmm11 + pandn xmm7,xmm13 + movdqa xmm5,xmm1 + pand xmm6,xmm12 + movdqa xmm9,xmm10 + psrld xmm5,31 + paddd xmm1,xmm1 + + movdqa XMMWORD PTR[(240-128)+rax],xmm0 + paddd xmm14,xmm0 + psrld xmm9,27 + pxor xmm6,xmm7 + + movdqa xmm7,xmm11 + por xmm8,xmm9 + pslld xmm7,30 + paddd xmm14,xmm6 + + psrld xmm11,2 + paddd xmm14,xmm8 + por xmm1,xmm5 + por xmm11,xmm7 + pxor xmm2,xmm4 + movdqa xmm4,XMMWORD PTR[((48-128))+rax] + + movdqa xmm8,xmm14 + pxor xmm2,XMMWORD PTR[((144-128))+rax] + paddd xmm13,xmm15 + movdqa xmm7,xmm10 + pslld xmm8,5 + pxor xmm2,xmm4 + movdqa xmm6,xmm10 + pandn xmm7,xmm12 + movdqa xmm5,xmm2 + pand xmm6,xmm11 + movdqa xmm9,xmm14 + psrld xmm5,31 + paddd xmm2,xmm2 + + movdqa XMMWORD PTR[(0-128)+rax],xmm1 + paddd xmm13,xmm1 + psrld xmm9,27 + pxor xmm6,xmm7 + + movdqa xmm7,xmm10 + por xmm8,xmm9 + pslld xmm7,30 + paddd xmm13,xmm6 + + psrld xmm10,2 + paddd xmm13,xmm8 + por xmm2,xmm5 + por xmm10,xmm7 + pxor xmm3,xmm0 + movdqa xmm0,XMMWORD PTR[((64-128))+rax] + + movdqa xmm8,xmm13 + pxor xmm3,XMMWORD PTR[((160-128))+rax] + paddd xmm12,xmm15 + movdqa xmm7,xmm14 + pslld xmm8,5 + pxor xmm3,xmm0 + movdqa xmm6,xmm14 + pandn xmm7,xmm11 + movdqa xmm5,xmm3 + pand xmm6,xmm10 + movdqa xmm9,xmm13 + psrld xmm5,31 + paddd xmm3,xmm3 + + movdqa XMMWORD PTR[(16-128)+rax],xmm2 + paddd xmm12,xmm2 + psrld xmm9,27 + pxor xmm6,xmm7 + + movdqa xmm7,xmm14 + por xmm8,xmm9 + pslld xmm7,30 + paddd xmm12,xmm6 + + psrld xmm14,2 + paddd xmm12,xmm8 + por xmm3,xmm5 + por xmm14,xmm7 + pxor xmm4,xmm1 + movdqa xmm1,XMMWORD PTR[((80-128))+rax] + + movdqa xmm8,xmm12 + pxor xmm4,XMMWORD PTR[((176-128))+rax] + paddd xmm11,xmm15 + movdqa xmm7,xmm13 + pslld xmm8,5 + pxor xmm4,xmm1 + movdqa xmm6,xmm13 + pandn xmm7,xmm10 + movdqa xmm5,xmm4 + pand xmm6,xmm14 + movdqa xmm9,xmm12 + psrld xmm5,31 + paddd xmm4,xmm4 + + movdqa XMMWORD PTR[(32-128)+rax],xmm3 + paddd xmm11,xmm3 + psrld xmm9,27 + pxor xmm6,xmm7 + + movdqa xmm7,xmm13 + por xmm8,xmm9 + pslld xmm7,30 + paddd xmm11,xmm6 + + psrld xmm13,2 + paddd xmm11,xmm8 + por xmm4,xmm5 + por xmm13,xmm7 + pxor xmm0,xmm2 + movdqa xmm2,XMMWORD PTR[((96-128))+rax] + + movdqa xmm8,xmm11 + pxor xmm0,XMMWORD PTR[((192-128))+rax] + paddd xmm10,xmm15 + movdqa xmm7,xmm12 + pslld xmm8,5 + pxor xmm0,xmm2 + movdqa xmm6,xmm12 + pandn xmm7,xmm14 + movdqa xmm5,xmm0 + pand xmm6,xmm13 + movdqa xmm9,xmm11 + psrld xmm5,31 + paddd xmm0,xmm0 + + movdqa XMMWORD PTR[(48-128)+rax],xmm4 + paddd xmm10,xmm4 + psrld xmm9,27 + pxor xmm6,xmm7 + + movdqa xmm7,xmm12 + por xmm8,xmm9 + pslld xmm7,30 + paddd xmm10,xmm6 + + psrld xmm12,2 + paddd xmm10,xmm8 + por xmm0,xmm5 + por xmm12,xmm7 + movdqa xmm15,XMMWORD PTR[rbp] + pxor xmm1,xmm3 + movdqa xmm3,XMMWORD PTR[((112-128))+rax] + + movdqa xmm8,xmm10 + movdqa xmm6,xmm13 + pxor xmm1,XMMWORD PTR[((208-128))+rax] + paddd xmm14,xmm15 + pslld xmm8,5 + pxor xmm6,xmm11 + + movdqa xmm9,xmm10 + movdqa XMMWORD PTR[(64-128)+rax],xmm0 + paddd xmm14,xmm0 + pxor xmm1,xmm3 + psrld xmm9,27 + pxor xmm6,xmm12 + movdqa xmm7,xmm11 + + pslld xmm7,30 + movdqa xmm5,xmm1 + por xmm8,xmm9 + psrld xmm5,31 + paddd xmm14,xmm6 + paddd xmm1,xmm1 + + psrld xmm11,2 + paddd xmm14,xmm8 + por xmm1,xmm5 + por xmm11,xmm7 + pxor xmm2,xmm4 + movdqa xmm4,XMMWORD PTR[((128-128))+rax] + + movdqa xmm8,xmm14 + movdqa xmm6,xmm12 + pxor xmm2,XMMWORD PTR[((224-128))+rax] + paddd xmm13,xmm15 + pslld xmm8,5 + pxor xmm6,xmm10 + + movdqa xmm9,xmm14 + movdqa XMMWORD PTR[(80-128)+rax],xmm1 + paddd xmm13,xmm1 + pxor xmm2,xmm4 + psrld xmm9,27 + pxor xmm6,xmm11 + movdqa xmm7,xmm10 + + pslld xmm7,30 + movdqa xmm5,xmm2 + por xmm8,xmm9 + psrld xmm5,31 + paddd xmm13,xmm6 + paddd xmm2,xmm2 + + psrld xmm10,2 + paddd xmm13,xmm8 + por xmm2,xmm5 + por xmm10,xmm7 + pxor xmm3,xmm0 + movdqa xmm0,XMMWORD PTR[((144-128))+rax] + + movdqa xmm8,xmm13 + movdqa xmm6,xmm11 + pxor xmm3,XMMWORD PTR[((240-128))+rax] + paddd xmm12,xmm15 + pslld xmm8,5 + pxor xmm6,xmm14 + + movdqa xmm9,xmm13 + movdqa XMMWORD PTR[(96-128)+rax],xmm2 + paddd xmm12,xmm2 + pxor xmm3,xmm0 + psrld xmm9,27 + pxor xmm6,xmm10 + movdqa xmm7,xmm14 + + pslld xmm7,30 + movdqa xmm5,xmm3 + por xmm8,xmm9 + psrld xmm5,31 + paddd xmm12,xmm6 + paddd xmm3,xmm3 + + psrld xmm14,2 + paddd xmm12,xmm8 + por xmm3,xmm5 + por xmm14,xmm7 + pxor xmm4,xmm1 + movdqa xmm1,XMMWORD PTR[((160-128))+rax] + + movdqa xmm8,xmm12 + movdqa xmm6,xmm10 + pxor xmm4,XMMWORD PTR[((0-128))+rax] + paddd xmm11,xmm15 + pslld xmm8,5 + pxor xmm6,xmm13 + + movdqa xmm9,xmm12 + movdqa XMMWORD PTR[(112-128)+rax],xmm3 + paddd xmm11,xmm3 + pxor xmm4,xmm1 + psrld xmm9,27 + pxor xmm6,xmm14 + movdqa xmm7,xmm13 + + pslld xmm7,30 + movdqa xmm5,xmm4 + por xmm8,xmm9 + psrld xmm5,31 + paddd xmm11,xmm6 + paddd xmm4,xmm4 + + psrld xmm13,2 + paddd xmm11,xmm8 + por xmm4,xmm5 + por xmm13,xmm7 + pxor xmm0,xmm2 + movdqa xmm2,XMMWORD PTR[((176-128))+rax] + + movdqa xmm8,xmm11 + movdqa xmm6,xmm14 + pxor xmm0,XMMWORD PTR[((16-128))+rax] + paddd xmm10,xmm15 + pslld xmm8,5 + pxor xmm6,xmm12 + + movdqa xmm9,xmm11 + movdqa XMMWORD PTR[(128-128)+rax],xmm4 + paddd xmm10,xmm4 + pxor xmm0,xmm2 + psrld xmm9,27 + pxor xmm6,xmm13 + movdqa xmm7,xmm12 + + pslld xmm7,30 + movdqa xmm5,xmm0 + por xmm8,xmm9 + psrld xmm5,31 + paddd xmm10,xmm6 + paddd xmm0,xmm0 + + psrld xmm12,2 + paddd xmm10,xmm8 + por xmm0,xmm5 + por xmm12,xmm7 + pxor xmm1,xmm3 + movdqa xmm3,XMMWORD PTR[((192-128))+rax] + + movdqa xmm8,xmm10 + movdqa xmm6,xmm13 + pxor xmm1,XMMWORD PTR[((32-128))+rax] + paddd xmm14,xmm15 + pslld xmm8,5 + pxor xmm6,xmm11 + + movdqa xmm9,xmm10 + movdqa XMMWORD PTR[(144-128)+rax],xmm0 + paddd xmm14,xmm0 + pxor xmm1,xmm3 + psrld xmm9,27 + pxor xmm6,xmm12 + movdqa xmm7,xmm11 + + pslld xmm7,30 + movdqa xmm5,xmm1 + por xmm8,xmm9 + psrld xmm5,31 + paddd xmm14,xmm6 + paddd xmm1,xmm1 + + psrld xmm11,2 + paddd xmm14,xmm8 + por xmm1,xmm5 + por xmm11,xmm7 + pxor xmm2,xmm4 + movdqa xmm4,XMMWORD PTR[((208-128))+rax] + + movdqa xmm8,xmm14 + movdqa xmm6,xmm12 + pxor xmm2,XMMWORD PTR[((48-128))+rax] + paddd xmm13,xmm15 + pslld xmm8,5 + pxor xmm6,xmm10 + + movdqa xmm9,xmm14 + movdqa XMMWORD PTR[(160-128)+rax],xmm1 + paddd xmm13,xmm1 + pxor xmm2,xmm4 + psrld xmm9,27 + pxor xmm6,xmm11 + movdqa xmm7,xmm10 + + pslld xmm7,30 + movdqa xmm5,xmm2 + por xmm8,xmm9 + psrld xmm5,31 + paddd xmm13,xmm6 + paddd xmm2,xmm2 + + psrld xmm10,2 + paddd xmm13,xmm8 + por xmm2,xmm5 + por xmm10,xmm7 + pxor xmm3,xmm0 + movdqa xmm0,XMMWORD PTR[((224-128))+rax] + + movdqa xmm8,xmm13 + movdqa xmm6,xmm11 + pxor xmm3,XMMWORD PTR[((64-128))+rax] + paddd xmm12,xmm15 + pslld xmm8,5 + pxor xmm6,xmm14 + + movdqa xmm9,xmm13 + movdqa XMMWORD PTR[(176-128)+rax],xmm2 + paddd xmm12,xmm2 + pxor xmm3,xmm0 + psrld xmm9,27 + pxor xmm6,xmm10 + movdqa xmm7,xmm14 + + pslld xmm7,30 + movdqa xmm5,xmm3 + por xmm8,xmm9 + psrld xmm5,31 + paddd xmm12,xmm6 + paddd xmm3,xmm3 + + psrld xmm14,2 + paddd xmm12,xmm8 + por xmm3,xmm5 + por xmm14,xmm7 + pxor xmm4,xmm1 + movdqa xmm1,XMMWORD PTR[((240-128))+rax] + + movdqa xmm8,xmm12 + movdqa xmm6,xmm10 + pxor xmm4,XMMWORD PTR[((80-128))+rax] + paddd xmm11,xmm15 + pslld xmm8,5 + pxor xmm6,xmm13 + + movdqa xmm9,xmm12 + movdqa XMMWORD PTR[(192-128)+rax],xmm3 + paddd xmm11,xmm3 + pxor xmm4,xmm1 + psrld xmm9,27 + pxor xmm6,xmm14 + movdqa xmm7,xmm13 + + pslld xmm7,30 + movdqa xmm5,xmm4 + por xmm8,xmm9 + psrld xmm5,31 + paddd xmm11,xmm6 + paddd xmm4,xmm4 + + psrld xmm13,2 + paddd xmm11,xmm8 + por xmm4,xmm5 + por xmm13,xmm7 + pxor xmm0,xmm2 + movdqa xmm2,XMMWORD PTR[((0-128))+rax] + + movdqa xmm8,xmm11 + movdqa xmm6,xmm14 + pxor xmm0,XMMWORD PTR[((96-128))+rax] + paddd xmm10,xmm15 + pslld xmm8,5 + pxor xmm6,xmm12 + + movdqa xmm9,xmm11 + movdqa XMMWORD PTR[(208-128)+rax],xmm4 + paddd xmm10,xmm4 + pxor xmm0,xmm2 + psrld xmm9,27 + pxor xmm6,xmm13 + movdqa xmm7,xmm12 + + pslld xmm7,30 + movdqa xmm5,xmm0 + por xmm8,xmm9 + psrld xmm5,31 + paddd xmm10,xmm6 + paddd xmm0,xmm0 + + psrld xmm12,2 + paddd xmm10,xmm8 + por xmm0,xmm5 + por xmm12,xmm7 + pxor xmm1,xmm3 + movdqa xmm3,XMMWORD PTR[((16-128))+rax] + + movdqa xmm8,xmm10 + movdqa xmm6,xmm13 + pxor xmm1,XMMWORD PTR[((112-128))+rax] + paddd xmm14,xmm15 + pslld xmm8,5 + pxor xmm6,xmm11 + + movdqa xmm9,xmm10 + movdqa XMMWORD PTR[(224-128)+rax],xmm0 + paddd xmm14,xmm0 + pxor xmm1,xmm3 + psrld xmm9,27 + pxor xmm6,xmm12 + movdqa xmm7,xmm11 + + pslld xmm7,30 + movdqa xmm5,xmm1 + por xmm8,xmm9 + psrld xmm5,31 + paddd xmm14,xmm6 + paddd xmm1,xmm1 + + psrld xmm11,2 + paddd xmm14,xmm8 + por xmm1,xmm5 + por xmm11,xmm7 + pxor xmm2,xmm4 + movdqa xmm4,XMMWORD PTR[((32-128))+rax] + + movdqa xmm8,xmm14 + movdqa xmm6,xmm12 + pxor xmm2,XMMWORD PTR[((128-128))+rax] + paddd xmm13,xmm15 + pslld xmm8,5 + pxor xmm6,xmm10 + + movdqa xmm9,xmm14 + movdqa XMMWORD PTR[(240-128)+rax],xmm1 + paddd xmm13,xmm1 + pxor xmm2,xmm4 + psrld xmm9,27 + pxor xmm6,xmm11 + movdqa xmm7,xmm10 + + pslld xmm7,30 + movdqa xmm5,xmm2 + por xmm8,xmm9 + psrld xmm5,31 + paddd xmm13,xmm6 + paddd xmm2,xmm2 + + psrld xmm10,2 + paddd xmm13,xmm8 + por xmm2,xmm5 + por xmm10,xmm7 + pxor xmm3,xmm0 + movdqa xmm0,XMMWORD PTR[((48-128))+rax] + + movdqa xmm8,xmm13 + movdqa xmm6,xmm11 + pxor xmm3,XMMWORD PTR[((144-128))+rax] + paddd xmm12,xmm15 + pslld xmm8,5 + pxor xmm6,xmm14 + + movdqa xmm9,xmm13 + movdqa XMMWORD PTR[(0-128)+rax],xmm2 + paddd xmm12,xmm2 + pxor xmm3,xmm0 + psrld xmm9,27 + pxor xmm6,xmm10 + movdqa xmm7,xmm14 + + pslld xmm7,30 + movdqa xmm5,xmm3 + por xmm8,xmm9 + psrld xmm5,31 + paddd xmm12,xmm6 + paddd xmm3,xmm3 + + psrld xmm14,2 + paddd xmm12,xmm8 + por xmm3,xmm5 + por xmm14,xmm7 + pxor xmm4,xmm1 + movdqa xmm1,XMMWORD PTR[((64-128))+rax] + + movdqa xmm8,xmm12 + movdqa xmm6,xmm10 + pxor xmm4,XMMWORD PTR[((160-128))+rax] + paddd xmm11,xmm15 + pslld xmm8,5 + pxor xmm6,xmm13 + + movdqa xmm9,xmm12 + movdqa XMMWORD PTR[(16-128)+rax],xmm3 + paddd xmm11,xmm3 + pxor xmm4,xmm1 + psrld xmm9,27 + pxor xmm6,xmm14 + movdqa xmm7,xmm13 + + pslld xmm7,30 + movdqa xmm5,xmm4 + por xmm8,xmm9 + psrld xmm5,31 + paddd xmm11,xmm6 + paddd xmm4,xmm4 + + psrld xmm13,2 + paddd xmm11,xmm8 + por xmm4,xmm5 + por xmm13,xmm7 + pxor xmm0,xmm2 + movdqa xmm2,XMMWORD PTR[((80-128))+rax] + + movdqa xmm8,xmm11 + movdqa xmm6,xmm14 + pxor xmm0,XMMWORD PTR[((176-128))+rax] + paddd xmm10,xmm15 + pslld xmm8,5 + pxor xmm6,xmm12 + + movdqa xmm9,xmm11 + movdqa XMMWORD PTR[(32-128)+rax],xmm4 + paddd xmm10,xmm4 + pxor xmm0,xmm2 + psrld xmm9,27 + pxor xmm6,xmm13 + movdqa xmm7,xmm12 + + pslld xmm7,30 + movdqa xmm5,xmm0 + por xmm8,xmm9 + psrld xmm5,31 + paddd xmm10,xmm6 + paddd xmm0,xmm0 + + psrld xmm12,2 + paddd xmm10,xmm8 + por xmm0,xmm5 + por xmm12,xmm7 + pxor xmm1,xmm3 + movdqa xmm3,XMMWORD PTR[((96-128))+rax] + + movdqa xmm8,xmm10 + movdqa xmm6,xmm13 + pxor xmm1,XMMWORD PTR[((192-128))+rax] + paddd xmm14,xmm15 + pslld xmm8,5 + pxor xmm6,xmm11 + + movdqa xmm9,xmm10 + movdqa XMMWORD PTR[(48-128)+rax],xmm0 + paddd xmm14,xmm0 + pxor xmm1,xmm3 + psrld xmm9,27 + pxor xmm6,xmm12 + movdqa xmm7,xmm11 + + pslld xmm7,30 + movdqa xmm5,xmm1 + por xmm8,xmm9 + psrld xmm5,31 + paddd xmm14,xmm6 + paddd xmm1,xmm1 + + psrld xmm11,2 + paddd xmm14,xmm8 + por xmm1,xmm5 + por xmm11,xmm7 + pxor xmm2,xmm4 + movdqa xmm4,XMMWORD PTR[((112-128))+rax] + + movdqa xmm8,xmm14 + movdqa xmm6,xmm12 + pxor xmm2,XMMWORD PTR[((208-128))+rax] + paddd xmm13,xmm15 + pslld xmm8,5 + pxor xmm6,xmm10 + + movdqa xmm9,xmm14 + movdqa XMMWORD PTR[(64-128)+rax],xmm1 + paddd xmm13,xmm1 + pxor xmm2,xmm4 + psrld xmm9,27 + pxor xmm6,xmm11 + movdqa xmm7,xmm10 + + pslld xmm7,30 + movdqa xmm5,xmm2 + por xmm8,xmm9 + psrld xmm5,31 + paddd xmm13,xmm6 + paddd xmm2,xmm2 + + psrld xmm10,2 + paddd xmm13,xmm8 + por xmm2,xmm5 + por xmm10,xmm7 + pxor xmm3,xmm0 + movdqa xmm0,XMMWORD PTR[((128-128))+rax] + + movdqa xmm8,xmm13 + movdqa xmm6,xmm11 + pxor xmm3,XMMWORD PTR[((224-128))+rax] + paddd xmm12,xmm15 + pslld xmm8,5 + pxor xmm6,xmm14 + + movdqa xmm9,xmm13 + movdqa XMMWORD PTR[(80-128)+rax],xmm2 + paddd xmm12,xmm2 + pxor xmm3,xmm0 + psrld xmm9,27 + pxor xmm6,xmm10 + movdqa xmm7,xmm14 + + pslld xmm7,30 + movdqa xmm5,xmm3 + por xmm8,xmm9 + psrld xmm5,31 + paddd xmm12,xmm6 + paddd xmm3,xmm3 + + psrld xmm14,2 + paddd xmm12,xmm8 + por xmm3,xmm5 + por xmm14,xmm7 + pxor xmm4,xmm1 + movdqa xmm1,XMMWORD PTR[((144-128))+rax] + + movdqa xmm8,xmm12 + movdqa xmm6,xmm10 + pxor xmm4,XMMWORD PTR[((240-128))+rax] + paddd xmm11,xmm15 + pslld xmm8,5 + pxor xmm6,xmm13 + + movdqa xmm9,xmm12 + movdqa XMMWORD PTR[(96-128)+rax],xmm3 + paddd xmm11,xmm3 + pxor xmm4,xmm1 + psrld xmm9,27 + pxor xmm6,xmm14 + movdqa xmm7,xmm13 + + pslld xmm7,30 + movdqa xmm5,xmm4 + por xmm8,xmm9 + psrld xmm5,31 + paddd xmm11,xmm6 + paddd xmm4,xmm4 + + psrld xmm13,2 + paddd xmm11,xmm8 + por xmm4,xmm5 + por xmm13,xmm7 + pxor xmm0,xmm2 + movdqa xmm2,XMMWORD PTR[((160-128))+rax] + + movdqa xmm8,xmm11 + movdqa xmm6,xmm14 + pxor xmm0,XMMWORD PTR[((0-128))+rax] + paddd xmm10,xmm15 + pslld xmm8,5 + pxor xmm6,xmm12 + + movdqa xmm9,xmm11 + movdqa XMMWORD PTR[(112-128)+rax],xmm4 + paddd xmm10,xmm4 + pxor xmm0,xmm2 + psrld xmm9,27 + pxor xmm6,xmm13 + movdqa xmm7,xmm12 + + pslld xmm7,30 + movdqa xmm5,xmm0 + por xmm8,xmm9 + psrld xmm5,31 + paddd xmm10,xmm6 + paddd xmm0,xmm0 + + psrld xmm12,2 + paddd xmm10,xmm8 + por xmm0,xmm5 + por xmm12,xmm7 + movdqa xmm15,XMMWORD PTR[32+rbp] + pxor xmm1,xmm3 + movdqa xmm3,XMMWORD PTR[((176-128))+rax] + + movdqa xmm8,xmm10 + movdqa xmm7,xmm13 + pxor xmm1,XMMWORD PTR[((16-128))+rax] + pxor xmm1,xmm3 + paddd xmm14,xmm15 + pslld xmm8,5 + movdqa xmm9,xmm10 + pand xmm7,xmm12 + + movdqa xmm6,xmm13 + movdqa xmm5,xmm1 + psrld xmm9,27 + paddd xmm14,xmm7 + pxor xmm6,xmm12 + + movdqa XMMWORD PTR[(128-128)+rax],xmm0 + paddd xmm14,xmm0 + por xmm8,xmm9 + psrld xmm5,31 + pand xmm6,xmm11 + movdqa xmm7,xmm11 + + pslld xmm7,30 + paddd xmm1,xmm1 + paddd xmm14,xmm6 + + psrld xmm11,2 + paddd xmm14,xmm8 + por xmm1,xmm5 + por xmm11,xmm7 + pxor xmm2,xmm4 + movdqa xmm4,XMMWORD PTR[((192-128))+rax] + + movdqa xmm8,xmm14 + movdqa xmm7,xmm12 + pxor xmm2,XMMWORD PTR[((32-128))+rax] + pxor xmm2,xmm4 + paddd xmm13,xmm15 + pslld xmm8,5 + movdqa xmm9,xmm14 + pand xmm7,xmm11 + + movdqa xmm6,xmm12 + movdqa xmm5,xmm2 + psrld xmm9,27 + paddd xmm13,xmm7 + pxor xmm6,xmm11 + + movdqa XMMWORD PTR[(144-128)+rax],xmm1 + paddd xmm13,xmm1 + por xmm8,xmm9 + psrld xmm5,31 + pand xmm6,xmm10 + movdqa xmm7,xmm10 + + pslld xmm7,30 + paddd xmm2,xmm2 + paddd xmm13,xmm6 + + psrld xmm10,2 + paddd xmm13,xmm8 + por xmm2,xmm5 + por xmm10,xmm7 + pxor xmm3,xmm0 + movdqa xmm0,XMMWORD PTR[((208-128))+rax] + + movdqa xmm8,xmm13 + movdqa xmm7,xmm11 + pxor xmm3,XMMWORD PTR[((48-128))+rax] + pxor xmm3,xmm0 + paddd xmm12,xmm15 + pslld xmm8,5 + movdqa xmm9,xmm13 + pand xmm7,xmm10 + + movdqa xmm6,xmm11 + movdqa xmm5,xmm3 + psrld xmm9,27 + paddd xmm12,xmm7 + pxor xmm6,xmm10 + + movdqa XMMWORD PTR[(160-128)+rax],xmm2 + paddd xmm12,xmm2 + por xmm8,xmm9 + psrld xmm5,31 + pand xmm6,xmm14 + movdqa xmm7,xmm14 + + pslld xmm7,30 + paddd xmm3,xmm3 + paddd xmm12,xmm6 + + psrld xmm14,2 + paddd xmm12,xmm8 + por xmm3,xmm5 + por xmm14,xmm7 + pxor xmm4,xmm1 + movdqa xmm1,XMMWORD PTR[((224-128))+rax] + + movdqa xmm8,xmm12 + movdqa xmm7,xmm10 + pxor xmm4,XMMWORD PTR[((64-128))+rax] + pxor xmm4,xmm1 + paddd xmm11,xmm15 + pslld xmm8,5 + movdqa xmm9,xmm12 + pand xmm7,xmm14 + + movdqa xmm6,xmm10 + movdqa xmm5,xmm4 + psrld xmm9,27 + paddd xmm11,xmm7 + pxor xmm6,xmm14 + + movdqa XMMWORD PTR[(176-128)+rax],xmm3 + paddd xmm11,xmm3 + por xmm8,xmm9 + psrld xmm5,31 + pand xmm6,xmm13 + movdqa xmm7,xmm13 + + pslld xmm7,30 + paddd xmm4,xmm4 + paddd xmm11,xmm6 + + psrld xmm13,2 + paddd xmm11,xmm8 + por xmm4,xmm5 + por xmm13,xmm7 + pxor xmm0,xmm2 + movdqa xmm2,XMMWORD PTR[((240-128))+rax] + + movdqa xmm8,xmm11 + movdqa xmm7,xmm14 + pxor xmm0,XMMWORD PTR[((80-128))+rax] + pxor xmm0,xmm2 + paddd xmm10,xmm15 + pslld xmm8,5 + movdqa xmm9,xmm11 + pand xmm7,xmm13 + + movdqa xmm6,xmm14 + movdqa xmm5,xmm0 + psrld xmm9,27 + paddd xmm10,xmm7 + pxor xmm6,xmm13 + + movdqa XMMWORD PTR[(192-128)+rax],xmm4 + paddd xmm10,xmm4 + por xmm8,xmm9 + psrld xmm5,31 + pand xmm6,xmm12 + movdqa xmm7,xmm12 + + pslld xmm7,30 + paddd xmm0,xmm0 + paddd xmm10,xmm6 + + psrld xmm12,2 + paddd xmm10,xmm8 + por xmm0,xmm5 + por xmm12,xmm7 + pxor xmm1,xmm3 + movdqa xmm3,XMMWORD PTR[((0-128))+rax] + + movdqa xmm8,xmm10 + movdqa xmm7,xmm13 + pxor xmm1,XMMWORD PTR[((96-128))+rax] + pxor xmm1,xmm3 + paddd xmm14,xmm15 + pslld xmm8,5 + movdqa xmm9,xmm10 + pand xmm7,xmm12 + + movdqa xmm6,xmm13 + movdqa xmm5,xmm1 + psrld xmm9,27 + paddd xmm14,xmm7 + pxor xmm6,xmm12 + + movdqa XMMWORD PTR[(208-128)+rax],xmm0 + paddd xmm14,xmm0 + por xmm8,xmm9 + psrld xmm5,31 + pand xmm6,xmm11 + movdqa xmm7,xmm11 + + pslld xmm7,30 + paddd xmm1,xmm1 + paddd xmm14,xmm6 + + psrld xmm11,2 + paddd xmm14,xmm8 + por xmm1,xmm5 + por xmm11,xmm7 + pxor xmm2,xmm4 + movdqa xmm4,XMMWORD PTR[((16-128))+rax] + + movdqa xmm8,xmm14 + movdqa xmm7,xmm12 + pxor xmm2,XMMWORD PTR[((112-128))+rax] + pxor xmm2,xmm4 + paddd xmm13,xmm15 + pslld xmm8,5 + movdqa xmm9,xmm14 + pand xmm7,xmm11 + + movdqa xmm6,xmm12 + movdqa xmm5,xmm2 + psrld xmm9,27 + paddd xmm13,xmm7 + pxor xmm6,xmm11 + + movdqa XMMWORD PTR[(224-128)+rax],xmm1 + paddd xmm13,xmm1 + por xmm8,xmm9 + psrld xmm5,31 + pand xmm6,xmm10 + movdqa xmm7,xmm10 + + pslld xmm7,30 + paddd xmm2,xmm2 + paddd xmm13,xmm6 + + psrld xmm10,2 + paddd xmm13,xmm8 + por xmm2,xmm5 + por xmm10,xmm7 + pxor xmm3,xmm0 + movdqa xmm0,XMMWORD PTR[((32-128))+rax] + + movdqa xmm8,xmm13 + movdqa xmm7,xmm11 + pxor xmm3,XMMWORD PTR[((128-128))+rax] + pxor xmm3,xmm0 + paddd xmm12,xmm15 + pslld xmm8,5 + movdqa xmm9,xmm13 + pand xmm7,xmm10 + + movdqa xmm6,xmm11 + movdqa xmm5,xmm3 + psrld xmm9,27 + paddd xmm12,xmm7 + pxor xmm6,xmm10 + + movdqa XMMWORD PTR[(240-128)+rax],xmm2 + paddd xmm12,xmm2 + por xmm8,xmm9 + psrld xmm5,31 + pand xmm6,xmm14 + movdqa xmm7,xmm14 + + pslld xmm7,30 + paddd xmm3,xmm3 + paddd xmm12,xmm6 + + psrld xmm14,2 + paddd xmm12,xmm8 + por xmm3,xmm5 + por xmm14,xmm7 + pxor xmm4,xmm1 + movdqa xmm1,XMMWORD PTR[((48-128))+rax] + + movdqa xmm8,xmm12 + movdqa xmm7,xmm10 + pxor xmm4,XMMWORD PTR[((144-128))+rax] + pxor xmm4,xmm1 + paddd xmm11,xmm15 + pslld xmm8,5 + movdqa xmm9,xmm12 + pand xmm7,xmm14 + + movdqa xmm6,xmm10 + movdqa xmm5,xmm4 + psrld xmm9,27 + paddd xmm11,xmm7 + pxor xmm6,xmm14 + + movdqa XMMWORD PTR[(0-128)+rax],xmm3 + paddd xmm11,xmm3 + por xmm8,xmm9 + psrld xmm5,31 + pand xmm6,xmm13 + movdqa xmm7,xmm13 + + pslld xmm7,30 + paddd xmm4,xmm4 + paddd xmm11,xmm6 + + psrld xmm13,2 + paddd xmm11,xmm8 + por xmm4,xmm5 + por xmm13,xmm7 + pxor xmm0,xmm2 + movdqa xmm2,XMMWORD PTR[((64-128))+rax] + + movdqa xmm8,xmm11 + movdqa xmm7,xmm14 + pxor xmm0,XMMWORD PTR[((160-128))+rax] + pxor xmm0,xmm2 + paddd xmm10,xmm15 + pslld xmm8,5 + movdqa xmm9,xmm11 + pand xmm7,xmm13 + + movdqa xmm6,xmm14 + movdqa xmm5,xmm0 + psrld xmm9,27 + paddd xmm10,xmm7 + pxor xmm6,xmm13 + + movdqa XMMWORD PTR[(16-128)+rax],xmm4 + paddd xmm10,xmm4 + por xmm8,xmm9 + psrld xmm5,31 + pand xmm6,xmm12 + movdqa xmm7,xmm12 + + pslld xmm7,30 + paddd xmm0,xmm0 + paddd xmm10,xmm6 + + psrld xmm12,2 + paddd xmm10,xmm8 + por xmm0,xmm5 + por xmm12,xmm7 + pxor xmm1,xmm3 + movdqa xmm3,XMMWORD PTR[((80-128))+rax] + + movdqa xmm8,xmm10 + movdqa xmm7,xmm13 + pxor xmm1,XMMWORD PTR[((176-128))+rax] + pxor xmm1,xmm3 + paddd xmm14,xmm15 + pslld xmm8,5 + movdqa xmm9,xmm10 + pand xmm7,xmm12 + + movdqa xmm6,xmm13 + movdqa xmm5,xmm1 + psrld xmm9,27 + paddd xmm14,xmm7 + pxor xmm6,xmm12 + + movdqa XMMWORD PTR[(32-128)+rax],xmm0 + paddd xmm14,xmm0 + por xmm8,xmm9 + psrld xmm5,31 + pand xmm6,xmm11 + movdqa xmm7,xmm11 + + pslld xmm7,30 + paddd xmm1,xmm1 + paddd xmm14,xmm6 + + psrld xmm11,2 + paddd xmm14,xmm8 + por xmm1,xmm5 + por xmm11,xmm7 + pxor xmm2,xmm4 + movdqa xmm4,XMMWORD PTR[((96-128))+rax] + + movdqa xmm8,xmm14 + movdqa xmm7,xmm12 + pxor xmm2,XMMWORD PTR[((192-128))+rax] + pxor xmm2,xmm4 + paddd xmm13,xmm15 + pslld xmm8,5 + movdqa xmm9,xmm14 + pand xmm7,xmm11 + + movdqa xmm6,xmm12 + movdqa xmm5,xmm2 + psrld xmm9,27 + paddd xmm13,xmm7 + pxor xmm6,xmm11 + + movdqa XMMWORD PTR[(48-128)+rax],xmm1 + paddd xmm13,xmm1 + por xmm8,xmm9 + psrld xmm5,31 + pand xmm6,xmm10 + movdqa xmm7,xmm10 + + pslld xmm7,30 + paddd xmm2,xmm2 + paddd xmm13,xmm6 + + psrld xmm10,2 + paddd xmm13,xmm8 + por xmm2,xmm5 + por xmm10,xmm7 + pxor xmm3,xmm0 + movdqa xmm0,XMMWORD PTR[((112-128))+rax] + + movdqa xmm8,xmm13 + movdqa xmm7,xmm11 + pxor xmm3,XMMWORD PTR[((208-128))+rax] + pxor xmm3,xmm0 + paddd xmm12,xmm15 + pslld xmm8,5 + movdqa xmm9,xmm13 + pand xmm7,xmm10 + + movdqa xmm6,xmm11 + movdqa xmm5,xmm3 + psrld xmm9,27 + paddd xmm12,xmm7 + pxor xmm6,xmm10 + + movdqa XMMWORD PTR[(64-128)+rax],xmm2 + paddd xmm12,xmm2 + por xmm8,xmm9 + psrld xmm5,31 + pand xmm6,xmm14 + movdqa xmm7,xmm14 + + pslld xmm7,30 + paddd xmm3,xmm3 + paddd xmm12,xmm6 + + psrld xmm14,2 + paddd xmm12,xmm8 + por xmm3,xmm5 + por xmm14,xmm7 + pxor xmm4,xmm1 + movdqa xmm1,XMMWORD PTR[((128-128))+rax] + + movdqa xmm8,xmm12 + movdqa xmm7,xmm10 + pxor xmm4,XMMWORD PTR[((224-128))+rax] + pxor xmm4,xmm1 + paddd xmm11,xmm15 + pslld xmm8,5 + movdqa xmm9,xmm12 + pand xmm7,xmm14 + + movdqa xmm6,xmm10 + movdqa xmm5,xmm4 + psrld xmm9,27 + paddd xmm11,xmm7 + pxor xmm6,xmm14 + + movdqa XMMWORD PTR[(80-128)+rax],xmm3 + paddd xmm11,xmm3 + por xmm8,xmm9 + psrld xmm5,31 + pand xmm6,xmm13 + movdqa xmm7,xmm13 + + pslld xmm7,30 + paddd xmm4,xmm4 + paddd xmm11,xmm6 + + psrld xmm13,2 + paddd xmm11,xmm8 + por xmm4,xmm5 + por xmm13,xmm7 + pxor xmm0,xmm2 + movdqa xmm2,XMMWORD PTR[((144-128))+rax] + + movdqa xmm8,xmm11 + movdqa xmm7,xmm14 + pxor xmm0,XMMWORD PTR[((240-128))+rax] + pxor xmm0,xmm2 + paddd xmm10,xmm15 + pslld xmm8,5 + movdqa xmm9,xmm11 + pand xmm7,xmm13 + + movdqa xmm6,xmm14 + movdqa xmm5,xmm0 + psrld xmm9,27 + paddd xmm10,xmm7 + pxor xmm6,xmm13 + + movdqa XMMWORD PTR[(96-128)+rax],xmm4 + paddd xmm10,xmm4 + por xmm8,xmm9 + psrld xmm5,31 + pand xmm6,xmm12 + movdqa xmm7,xmm12 + + pslld xmm7,30 + paddd xmm0,xmm0 + paddd xmm10,xmm6 + + psrld xmm12,2 + paddd xmm10,xmm8 + por xmm0,xmm5 + por xmm12,xmm7 + pxor xmm1,xmm3 + movdqa xmm3,XMMWORD PTR[((160-128))+rax] + + movdqa xmm8,xmm10 + movdqa xmm7,xmm13 + pxor xmm1,XMMWORD PTR[((0-128))+rax] + pxor xmm1,xmm3 + paddd xmm14,xmm15 + pslld xmm8,5 + movdqa xmm9,xmm10 + pand xmm7,xmm12 + + movdqa xmm6,xmm13 + movdqa xmm5,xmm1 + psrld xmm9,27 + paddd xmm14,xmm7 + pxor xmm6,xmm12 + + movdqa XMMWORD PTR[(112-128)+rax],xmm0 + paddd xmm14,xmm0 + por xmm8,xmm9 + psrld xmm5,31 + pand xmm6,xmm11 + movdqa xmm7,xmm11 + + pslld xmm7,30 + paddd xmm1,xmm1 + paddd xmm14,xmm6 + + psrld xmm11,2 + paddd xmm14,xmm8 + por xmm1,xmm5 + por xmm11,xmm7 + pxor xmm2,xmm4 + movdqa xmm4,XMMWORD PTR[((176-128))+rax] + + movdqa xmm8,xmm14 + movdqa xmm7,xmm12 + pxor xmm2,XMMWORD PTR[((16-128))+rax] + pxor xmm2,xmm4 + paddd xmm13,xmm15 + pslld xmm8,5 + movdqa xmm9,xmm14 + pand xmm7,xmm11 + + movdqa xmm6,xmm12 + movdqa xmm5,xmm2 + psrld xmm9,27 + paddd xmm13,xmm7 + pxor xmm6,xmm11 + + movdqa XMMWORD PTR[(128-128)+rax],xmm1 + paddd xmm13,xmm1 + por xmm8,xmm9 + psrld xmm5,31 + pand xmm6,xmm10 + movdqa xmm7,xmm10 + + pslld xmm7,30 + paddd xmm2,xmm2 + paddd xmm13,xmm6 + + psrld xmm10,2 + paddd xmm13,xmm8 + por xmm2,xmm5 + por xmm10,xmm7 + pxor xmm3,xmm0 + movdqa xmm0,XMMWORD PTR[((192-128))+rax] + + movdqa xmm8,xmm13 + movdqa xmm7,xmm11 + pxor xmm3,XMMWORD PTR[((32-128))+rax] + pxor xmm3,xmm0 + paddd xmm12,xmm15 + pslld xmm8,5 + movdqa xmm9,xmm13 + pand xmm7,xmm10 + + movdqa xmm6,xmm11 + movdqa xmm5,xmm3 + psrld xmm9,27 + paddd xmm12,xmm7 + pxor xmm6,xmm10 + + movdqa XMMWORD PTR[(144-128)+rax],xmm2 + paddd xmm12,xmm2 + por xmm8,xmm9 + psrld xmm5,31 + pand xmm6,xmm14 + movdqa xmm7,xmm14 + + pslld xmm7,30 + paddd xmm3,xmm3 + paddd xmm12,xmm6 + + psrld xmm14,2 + paddd xmm12,xmm8 + por xmm3,xmm5 + por xmm14,xmm7 + pxor xmm4,xmm1 + movdqa xmm1,XMMWORD PTR[((208-128))+rax] + + movdqa xmm8,xmm12 + movdqa xmm7,xmm10 + pxor xmm4,XMMWORD PTR[((48-128))+rax] + pxor xmm4,xmm1 + paddd xmm11,xmm15 + pslld xmm8,5 + movdqa xmm9,xmm12 + pand xmm7,xmm14 + + movdqa xmm6,xmm10 + movdqa xmm5,xmm4 + psrld xmm9,27 + paddd xmm11,xmm7 + pxor xmm6,xmm14 + + movdqa XMMWORD PTR[(160-128)+rax],xmm3 + paddd xmm11,xmm3 + por xmm8,xmm9 + psrld xmm5,31 + pand xmm6,xmm13 + movdqa xmm7,xmm13 + + pslld xmm7,30 + paddd xmm4,xmm4 + paddd xmm11,xmm6 + + psrld xmm13,2 + paddd xmm11,xmm8 + por xmm4,xmm5 + por xmm13,xmm7 + pxor xmm0,xmm2 + movdqa xmm2,XMMWORD PTR[((224-128))+rax] + + movdqa xmm8,xmm11 + movdqa xmm7,xmm14 + pxor xmm0,XMMWORD PTR[((64-128))+rax] + pxor xmm0,xmm2 + paddd xmm10,xmm15 + pslld xmm8,5 + movdqa xmm9,xmm11 + pand xmm7,xmm13 + + movdqa xmm6,xmm14 + movdqa xmm5,xmm0 + psrld xmm9,27 + paddd xmm10,xmm7 + pxor xmm6,xmm13 + + movdqa XMMWORD PTR[(176-128)+rax],xmm4 + paddd xmm10,xmm4 + por xmm8,xmm9 + psrld xmm5,31 + pand xmm6,xmm12 + movdqa xmm7,xmm12 + + pslld xmm7,30 + paddd xmm0,xmm0 + paddd xmm10,xmm6 + + psrld xmm12,2 + paddd xmm10,xmm8 + por xmm0,xmm5 + por xmm12,xmm7 + movdqa xmm15,XMMWORD PTR[64+rbp] + pxor xmm1,xmm3 + movdqa xmm3,XMMWORD PTR[((240-128))+rax] + + movdqa xmm8,xmm10 + movdqa xmm6,xmm13 + pxor xmm1,XMMWORD PTR[((80-128))+rax] + paddd xmm14,xmm15 + pslld xmm8,5 + pxor xmm6,xmm11 + + movdqa xmm9,xmm10 + movdqa XMMWORD PTR[(192-128)+rax],xmm0 + paddd xmm14,xmm0 + pxor xmm1,xmm3 + psrld xmm9,27 + pxor xmm6,xmm12 + movdqa xmm7,xmm11 + + pslld xmm7,30 + movdqa xmm5,xmm1 + por xmm8,xmm9 + psrld xmm5,31 + paddd xmm14,xmm6 + paddd xmm1,xmm1 + + psrld xmm11,2 + paddd xmm14,xmm8 + por xmm1,xmm5 + por xmm11,xmm7 + pxor xmm2,xmm4 + movdqa xmm4,XMMWORD PTR[((0-128))+rax] + + movdqa xmm8,xmm14 + movdqa xmm6,xmm12 + pxor xmm2,XMMWORD PTR[((96-128))+rax] + paddd xmm13,xmm15 + pslld xmm8,5 + pxor xmm6,xmm10 + + movdqa xmm9,xmm14 + movdqa XMMWORD PTR[(208-128)+rax],xmm1 + paddd xmm13,xmm1 + pxor xmm2,xmm4 + psrld xmm9,27 + pxor xmm6,xmm11 + movdqa xmm7,xmm10 + + pslld xmm7,30 + movdqa xmm5,xmm2 + por xmm8,xmm9 + psrld xmm5,31 + paddd xmm13,xmm6 + paddd xmm2,xmm2 + + psrld xmm10,2 + paddd xmm13,xmm8 + por xmm2,xmm5 + por xmm10,xmm7 + pxor xmm3,xmm0 + movdqa xmm0,XMMWORD PTR[((16-128))+rax] + + movdqa xmm8,xmm13 + movdqa xmm6,xmm11 + pxor xmm3,XMMWORD PTR[((112-128))+rax] + paddd xmm12,xmm15 + pslld xmm8,5 + pxor xmm6,xmm14 + + movdqa xmm9,xmm13 + movdqa XMMWORD PTR[(224-128)+rax],xmm2 + paddd xmm12,xmm2 + pxor xmm3,xmm0 + psrld xmm9,27 + pxor xmm6,xmm10 + movdqa xmm7,xmm14 + + pslld xmm7,30 + movdqa xmm5,xmm3 + por xmm8,xmm9 + psrld xmm5,31 + paddd xmm12,xmm6 + paddd xmm3,xmm3 + + psrld xmm14,2 + paddd xmm12,xmm8 + por xmm3,xmm5 + por xmm14,xmm7 + pxor xmm4,xmm1 + movdqa xmm1,XMMWORD PTR[((32-128))+rax] + + movdqa xmm8,xmm12 + movdqa xmm6,xmm10 + pxor xmm4,XMMWORD PTR[((128-128))+rax] + paddd xmm11,xmm15 + pslld xmm8,5 + pxor xmm6,xmm13 + + movdqa xmm9,xmm12 + movdqa XMMWORD PTR[(240-128)+rax],xmm3 + paddd xmm11,xmm3 + pxor xmm4,xmm1 + psrld xmm9,27 + pxor xmm6,xmm14 + movdqa xmm7,xmm13 + + pslld xmm7,30 + movdqa xmm5,xmm4 + por xmm8,xmm9 + psrld xmm5,31 + paddd xmm11,xmm6 + paddd xmm4,xmm4 + + psrld xmm13,2 + paddd xmm11,xmm8 + por xmm4,xmm5 + por xmm13,xmm7 + pxor xmm0,xmm2 + movdqa xmm2,XMMWORD PTR[((48-128))+rax] + + movdqa xmm8,xmm11 + movdqa xmm6,xmm14 + pxor xmm0,XMMWORD PTR[((144-128))+rax] + paddd xmm10,xmm15 + pslld xmm8,5 + pxor xmm6,xmm12 + + movdqa xmm9,xmm11 + movdqa XMMWORD PTR[(0-128)+rax],xmm4 + paddd xmm10,xmm4 + pxor xmm0,xmm2 + psrld xmm9,27 + pxor xmm6,xmm13 + movdqa xmm7,xmm12 + + pslld xmm7,30 + movdqa xmm5,xmm0 + por xmm8,xmm9 + psrld xmm5,31 + paddd xmm10,xmm6 + paddd xmm0,xmm0 + + psrld xmm12,2 + paddd xmm10,xmm8 + por xmm0,xmm5 + por xmm12,xmm7 + pxor xmm1,xmm3 + movdqa xmm3,XMMWORD PTR[((64-128))+rax] + + movdqa xmm8,xmm10 + movdqa xmm6,xmm13 + pxor xmm1,XMMWORD PTR[((160-128))+rax] + paddd xmm14,xmm15 + pslld xmm8,5 + pxor xmm6,xmm11 + + movdqa xmm9,xmm10 + movdqa XMMWORD PTR[(16-128)+rax],xmm0 + paddd xmm14,xmm0 + pxor xmm1,xmm3 + psrld xmm9,27 + pxor xmm6,xmm12 + movdqa xmm7,xmm11 + + pslld xmm7,30 + movdqa xmm5,xmm1 + por xmm8,xmm9 + psrld xmm5,31 + paddd xmm14,xmm6 + paddd xmm1,xmm1 + + psrld xmm11,2 + paddd xmm14,xmm8 + por xmm1,xmm5 + por xmm11,xmm7 + pxor xmm2,xmm4 + movdqa xmm4,XMMWORD PTR[((80-128))+rax] + + movdqa xmm8,xmm14 + movdqa xmm6,xmm12 + pxor xmm2,XMMWORD PTR[((176-128))+rax] + paddd xmm13,xmm15 + pslld xmm8,5 + pxor xmm6,xmm10 + + movdqa xmm9,xmm14 + movdqa XMMWORD PTR[(32-128)+rax],xmm1 + paddd xmm13,xmm1 + pxor xmm2,xmm4 + psrld xmm9,27 + pxor xmm6,xmm11 + movdqa xmm7,xmm10 + + pslld xmm7,30 + movdqa xmm5,xmm2 + por xmm8,xmm9 + psrld xmm5,31 + paddd xmm13,xmm6 + paddd xmm2,xmm2 + + psrld xmm10,2 + paddd xmm13,xmm8 + por xmm2,xmm5 + por xmm10,xmm7 + pxor xmm3,xmm0 + movdqa xmm0,XMMWORD PTR[((96-128))+rax] + + movdqa xmm8,xmm13 + movdqa xmm6,xmm11 + pxor xmm3,XMMWORD PTR[((192-128))+rax] + paddd xmm12,xmm15 + pslld xmm8,5 + pxor xmm6,xmm14 + + movdqa xmm9,xmm13 + movdqa XMMWORD PTR[(48-128)+rax],xmm2 + paddd xmm12,xmm2 + pxor xmm3,xmm0 + psrld xmm9,27 + pxor xmm6,xmm10 + movdqa xmm7,xmm14 + + pslld xmm7,30 + movdqa xmm5,xmm3 + por xmm8,xmm9 + psrld xmm5,31 + paddd xmm12,xmm6 + paddd xmm3,xmm3 + + psrld xmm14,2 + paddd xmm12,xmm8 + por xmm3,xmm5 + por xmm14,xmm7 + pxor xmm4,xmm1 + movdqa xmm1,XMMWORD PTR[((112-128))+rax] + + movdqa xmm8,xmm12 + movdqa xmm6,xmm10 + pxor xmm4,XMMWORD PTR[((208-128))+rax] + paddd xmm11,xmm15 + pslld xmm8,5 + pxor xmm6,xmm13 + + movdqa xmm9,xmm12 + movdqa XMMWORD PTR[(64-128)+rax],xmm3 + paddd xmm11,xmm3 + pxor xmm4,xmm1 + psrld xmm9,27 + pxor xmm6,xmm14 + movdqa xmm7,xmm13 + + pslld xmm7,30 + movdqa xmm5,xmm4 + por xmm8,xmm9 + psrld xmm5,31 + paddd xmm11,xmm6 + paddd xmm4,xmm4 + + psrld xmm13,2 + paddd xmm11,xmm8 + por xmm4,xmm5 + por xmm13,xmm7 + pxor xmm0,xmm2 + movdqa xmm2,XMMWORD PTR[((128-128))+rax] + + movdqa xmm8,xmm11 + movdqa xmm6,xmm14 + pxor xmm0,XMMWORD PTR[((224-128))+rax] + paddd xmm10,xmm15 + pslld xmm8,5 + pxor xmm6,xmm12 + + movdqa xmm9,xmm11 + movdqa XMMWORD PTR[(80-128)+rax],xmm4 + paddd xmm10,xmm4 + pxor xmm0,xmm2 + psrld xmm9,27 + pxor xmm6,xmm13 + movdqa xmm7,xmm12 + + pslld xmm7,30 + movdqa xmm5,xmm0 + por xmm8,xmm9 + psrld xmm5,31 + paddd xmm10,xmm6 + paddd xmm0,xmm0 + + psrld xmm12,2 + paddd xmm10,xmm8 + por xmm0,xmm5 + por xmm12,xmm7 + pxor xmm1,xmm3 + movdqa xmm3,XMMWORD PTR[((144-128))+rax] + + movdqa xmm8,xmm10 + movdqa xmm6,xmm13 + pxor xmm1,XMMWORD PTR[((240-128))+rax] + paddd xmm14,xmm15 + pslld xmm8,5 + pxor xmm6,xmm11 + + movdqa xmm9,xmm10 + movdqa XMMWORD PTR[(96-128)+rax],xmm0 + paddd xmm14,xmm0 + pxor xmm1,xmm3 + psrld xmm9,27 + pxor xmm6,xmm12 + movdqa xmm7,xmm11 + + pslld xmm7,30 + movdqa xmm5,xmm1 + por xmm8,xmm9 + psrld xmm5,31 + paddd xmm14,xmm6 + paddd xmm1,xmm1 + + psrld xmm11,2 + paddd xmm14,xmm8 + por xmm1,xmm5 + por xmm11,xmm7 + pxor xmm2,xmm4 + movdqa xmm4,XMMWORD PTR[((160-128))+rax] + + movdqa xmm8,xmm14 + movdqa xmm6,xmm12 + pxor xmm2,XMMWORD PTR[((0-128))+rax] + paddd xmm13,xmm15 + pslld xmm8,5 + pxor xmm6,xmm10 + + movdqa xmm9,xmm14 + movdqa XMMWORD PTR[(112-128)+rax],xmm1 + paddd xmm13,xmm1 + pxor xmm2,xmm4 + psrld xmm9,27 + pxor xmm6,xmm11 + movdqa xmm7,xmm10 + + pslld xmm7,30 + movdqa xmm5,xmm2 + por xmm8,xmm9 + psrld xmm5,31 + paddd xmm13,xmm6 + paddd xmm2,xmm2 + + psrld xmm10,2 + paddd xmm13,xmm8 + por xmm2,xmm5 + por xmm10,xmm7 + pxor xmm3,xmm0 + movdqa xmm0,XMMWORD PTR[((176-128))+rax] + + movdqa xmm8,xmm13 + movdqa xmm6,xmm11 + pxor xmm3,XMMWORD PTR[((16-128))+rax] + paddd xmm12,xmm15 + pslld xmm8,5 + pxor xmm6,xmm14 + + movdqa xmm9,xmm13 + paddd xmm12,xmm2 + pxor xmm3,xmm0 + psrld xmm9,27 + pxor xmm6,xmm10 + movdqa xmm7,xmm14 + + pslld xmm7,30 + movdqa xmm5,xmm3 + por xmm8,xmm9 + psrld xmm5,31 + paddd xmm12,xmm6 + paddd xmm3,xmm3 + + psrld xmm14,2 + paddd xmm12,xmm8 + por xmm3,xmm5 + por xmm14,xmm7 + pxor xmm4,xmm1 + movdqa xmm1,XMMWORD PTR[((192-128))+rax] + + movdqa xmm8,xmm12 + movdqa xmm6,xmm10 + pxor xmm4,XMMWORD PTR[((32-128))+rax] + paddd xmm11,xmm15 + pslld xmm8,5 + pxor xmm6,xmm13 + + movdqa xmm9,xmm12 + paddd xmm11,xmm3 + pxor xmm4,xmm1 + psrld xmm9,27 + pxor xmm6,xmm14 + movdqa xmm7,xmm13 + + pslld xmm7,30 + movdqa xmm5,xmm4 + por xmm8,xmm9 + psrld xmm5,31 + paddd xmm11,xmm6 + paddd xmm4,xmm4 + + psrld xmm13,2 + paddd xmm11,xmm8 + por xmm4,xmm5 + por xmm13,xmm7 + pxor xmm0,xmm2 + movdqa xmm2,XMMWORD PTR[((208-128))+rax] + + movdqa xmm8,xmm11 + movdqa xmm6,xmm14 + pxor xmm0,XMMWORD PTR[((48-128))+rax] + paddd xmm10,xmm15 + pslld xmm8,5 + pxor xmm6,xmm12 + + movdqa xmm9,xmm11 + paddd xmm10,xmm4 + pxor xmm0,xmm2 + psrld xmm9,27 + pxor xmm6,xmm13 + movdqa xmm7,xmm12 + + pslld xmm7,30 + movdqa xmm5,xmm0 + por xmm8,xmm9 + psrld xmm5,31 + paddd xmm10,xmm6 + paddd xmm0,xmm0 + + psrld xmm12,2 + paddd xmm10,xmm8 + por xmm0,xmm5 + por xmm12,xmm7 + pxor xmm1,xmm3 + movdqa xmm3,XMMWORD PTR[((224-128))+rax] + + movdqa xmm8,xmm10 + movdqa xmm6,xmm13 + pxor xmm1,XMMWORD PTR[((64-128))+rax] + paddd xmm14,xmm15 + pslld xmm8,5 + pxor xmm6,xmm11 + + movdqa xmm9,xmm10 + paddd xmm14,xmm0 + pxor xmm1,xmm3 + psrld xmm9,27 + pxor xmm6,xmm12 + movdqa xmm7,xmm11 + + pslld xmm7,30 + movdqa xmm5,xmm1 + por xmm8,xmm9 + psrld xmm5,31 + paddd xmm14,xmm6 + paddd xmm1,xmm1 + + psrld xmm11,2 + paddd xmm14,xmm8 + por xmm1,xmm5 + por xmm11,xmm7 + pxor xmm2,xmm4 + movdqa xmm4,XMMWORD PTR[((240-128))+rax] + + movdqa xmm8,xmm14 + movdqa xmm6,xmm12 + pxor xmm2,XMMWORD PTR[((80-128))+rax] + paddd xmm13,xmm15 + pslld xmm8,5 + pxor xmm6,xmm10 + + movdqa xmm9,xmm14 + paddd xmm13,xmm1 + pxor xmm2,xmm4 + psrld xmm9,27 + pxor xmm6,xmm11 + movdqa xmm7,xmm10 + + pslld xmm7,30 + movdqa xmm5,xmm2 + por xmm8,xmm9 + psrld xmm5,31 + paddd xmm13,xmm6 + paddd xmm2,xmm2 + + psrld xmm10,2 + paddd xmm13,xmm8 + por xmm2,xmm5 + por xmm10,xmm7 + pxor xmm3,xmm0 + movdqa xmm0,XMMWORD PTR[((0-128))+rax] + + movdqa xmm8,xmm13 + movdqa xmm6,xmm11 + pxor xmm3,XMMWORD PTR[((96-128))+rax] + paddd xmm12,xmm15 + pslld xmm8,5 + pxor xmm6,xmm14 + + movdqa xmm9,xmm13 + paddd xmm12,xmm2 + pxor xmm3,xmm0 + psrld xmm9,27 + pxor xmm6,xmm10 + movdqa xmm7,xmm14 + + pslld xmm7,30 + movdqa xmm5,xmm3 + por xmm8,xmm9 + psrld xmm5,31 + paddd xmm12,xmm6 + paddd xmm3,xmm3 + + psrld xmm14,2 + paddd xmm12,xmm8 + por xmm3,xmm5 + por xmm14,xmm7 + pxor xmm4,xmm1 + movdqa xmm1,XMMWORD PTR[((16-128))+rax] + + movdqa xmm8,xmm12 + movdqa xmm6,xmm10 + pxor xmm4,XMMWORD PTR[((112-128))+rax] + paddd xmm11,xmm15 + pslld xmm8,5 + pxor xmm6,xmm13 + + movdqa xmm9,xmm12 + paddd xmm11,xmm3 + pxor xmm4,xmm1 + psrld xmm9,27 + pxor xmm6,xmm14 + movdqa xmm7,xmm13 + + pslld xmm7,30 + movdqa xmm5,xmm4 + por xmm8,xmm9 + psrld xmm5,31 + paddd xmm11,xmm6 + paddd xmm4,xmm4 + + psrld xmm13,2 + paddd xmm11,xmm8 + por xmm4,xmm5 + por xmm13,xmm7 + movdqa xmm8,xmm11 + paddd xmm10,xmm15 + movdqa xmm6,xmm14 + pslld xmm8,5 + pxor xmm6,xmm12 + + movdqa xmm9,xmm11 + paddd xmm10,xmm4 + psrld xmm9,27 + movdqa xmm7,xmm12 + pxor xmm6,xmm13 + + pslld xmm7,30 + por xmm8,xmm9 + paddd xmm10,xmm6 + + psrld xmm12,2 + paddd xmm10,xmm8 + por xmm12,xmm7 + movdqa xmm0,XMMWORD PTR[rbx] + mov ecx,1 + cmp ecx,DWORD PTR[rbx] + pxor xmm8,xmm8 + cmovge r8,rbp + cmp ecx,DWORD PTR[4+rbx] + movdqa xmm1,xmm0 + cmovge r9,rbp + cmp ecx,DWORD PTR[8+rbx] + pcmpgtd xmm1,xmm8 + cmovge r10,rbp + cmp ecx,DWORD PTR[12+rbx] + paddd xmm0,xmm1 + cmovge r11,rbp + + movdqu xmm6,XMMWORD PTR[rdi] + pand xmm10,xmm1 + movdqu xmm7,XMMWORD PTR[32+rdi] + pand xmm11,xmm1 + paddd xmm10,xmm6 + movdqu xmm8,XMMWORD PTR[64+rdi] + pand xmm12,xmm1 + paddd xmm11,xmm7 + movdqu xmm9,XMMWORD PTR[96+rdi] + pand xmm13,xmm1 + paddd xmm12,xmm8 + movdqu xmm5,XMMWORD PTR[128+rdi] + pand xmm14,xmm1 + movdqu XMMWORD PTR[rdi],xmm10 + paddd xmm13,xmm9 + movdqu XMMWORD PTR[32+rdi],xmm11 + paddd xmm14,xmm5 + movdqu XMMWORD PTR[64+rdi],xmm12 + movdqu XMMWORD PTR[96+rdi],xmm13 + movdqu XMMWORD PTR[128+rdi],xmm14 + + movdqa XMMWORD PTR[rbx],xmm0 + movdqa xmm5,XMMWORD PTR[96+rbp] + movdqa xmm15,XMMWORD PTR[((-32))+rbp] + dec edx + jnz $L$oop + + mov edx,DWORD PTR[280+rsp] + lea rdi,QWORD PTR[16+rdi] + lea rsi,QWORD PTR[64+rsi] + dec edx + jnz $L$oop_grande + +$L$done:: + mov rax,QWORD PTR[272+rsp] + movaps xmm6,XMMWORD PTR[((-184))+rax] + movaps xmm7,XMMWORD PTR[((-168))+rax] + movaps xmm8,XMMWORD PTR[((-152))+rax] + movaps xmm9,XMMWORD PTR[((-136))+rax] + movaps xmm10,XMMWORD PTR[((-120))+rax] + movaps xmm11,XMMWORD PTR[((-104))+rax] + movaps xmm12,XMMWORD PTR[((-88))+rax] + movaps xmm13,XMMWORD PTR[((-72))+rax] + movaps xmm14,XMMWORD PTR[((-56))+rax] + movaps xmm15,XMMWORD PTR[((-40))+rax] + mov rbp,QWORD PTR[((-16))+rax] + mov rbx,QWORD PTR[((-8))+rax] + lea rsp,QWORD PTR[rax] +$L$epilogue:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_sha1_multi_block:: +sha1_multi_block ENDP + +ALIGN 32 +sha1_multi_block_shaext PROC PRIVATE + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_sha1_multi_block_shaext:: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + + +_shaext_shortcut:: + mov rax,rsp + push rbx + push rbp + lea rsp,QWORD PTR[((-168))+rsp] + movaps XMMWORD PTR[rsp],xmm6 + movaps XMMWORD PTR[16+rsp],xmm7 + movaps XMMWORD PTR[32+rsp],xmm8 + movaps XMMWORD PTR[48+rsp],xmm9 + movaps XMMWORD PTR[(-120)+rax],xmm10 + movaps XMMWORD PTR[(-104)+rax],xmm11 + movaps XMMWORD PTR[(-88)+rax],xmm12 + movaps XMMWORD PTR[(-72)+rax],xmm13 + movaps XMMWORD PTR[(-56)+rax],xmm14 + movaps XMMWORD PTR[(-40)+rax],xmm15 + sub rsp,288 + shl edx,1 + and rsp,-256 + lea rdi,QWORD PTR[64+rdi] + mov QWORD PTR[272+rsp],rax +$L$body_shaext:: + lea rbx,QWORD PTR[256+rsp] + movdqa xmm3,XMMWORD PTR[((K_XX_XX+128))] + +$L$oop_grande_shaext:: + mov DWORD PTR[280+rsp],edx + xor edx,edx + mov r8,QWORD PTR[rsi] + mov ecx,DWORD PTR[8+rsi] + cmp ecx,edx + cmovg edx,ecx + test ecx,ecx + mov DWORD PTR[rbx],ecx + cmovle r8,rsp + mov r9,QWORD PTR[16+rsi] + mov ecx,DWORD PTR[24+rsi] + cmp ecx,edx + cmovg edx,ecx + test ecx,ecx + mov DWORD PTR[4+rbx],ecx + cmovle r9,rsp + test edx,edx + jz $L$done_shaext + + movq xmm0,QWORD PTR[((0-64))+rdi] + movq xmm4,QWORD PTR[((32-64))+rdi] + movq xmm5,QWORD PTR[((64-64))+rdi] + movq xmm6,QWORD PTR[((96-64))+rdi] + movq xmm7,QWORD PTR[((128-64))+rdi] + + punpckldq xmm0,xmm4 + punpckldq xmm5,xmm6 + + movdqa xmm8,xmm0 + punpcklqdq xmm0,xmm5 + punpckhqdq xmm8,xmm5 + + pshufd xmm1,xmm7,63 + pshufd xmm9,xmm7,127 + pshufd xmm0,xmm0,27 + pshufd xmm8,xmm8,27 + jmp $L$oop_shaext + +ALIGN 32 +$L$oop_shaext:: + movdqu xmm4,XMMWORD PTR[r8] + movdqu xmm11,XMMWORD PTR[r9] + movdqu xmm5,XMMWORD PTR[16+r8] + movdqu xmm12,XMMWORD PTR[16+r9] + movdqu xmm6,XMMWORD PTR[32+r8] +DB 102,15,56,0,227 + movdqu xmm13,XMMWORD PTR[32+r9] +DB 102,68,15,56,0,219 + movdqu xmm7,XMMWORD PTR[48+r8] + lea r8,QWORD PTR[64+r8] +DB 102,15,56,0,235 + movdqu xmm14,XMMWORD PTR[48+r9] + lea r9,QWORD PTR[64+r9] +DB 102,68,15,56,0,227 + + movdqa XMMWORD PTR[80+rsp],xmm1 + paddd xmm1,xmm4 + movdqa XMMWORD PTR[112+rsp],xmm9 + paddd xmm9,xmm11 + movdqa XMMWORD PTR[64+rsp],xmm0 + movdqa xmm2,xmm0 + movdqa XMMWORD PTR[96+rsp],xmm8 + movdqa xmm10,xmm8 +DB 15,58,204,193,0 +DB 15,56,200,213 +DB 69,15,58,204,193,0 +DB 69,15,56,200,212 +DB 102,15,56,0,243 + prefetcht0 [127+r8] +DB 15,56,201,229 +DB 102,68,15,56,0,235 + prefetcht0 [127+r9] +DB 69,15,56,201,220 + +DB 102,15,56,0,251 + movdqa xmm1,xmm0 +DB 102,68,15,56,0,243 + movdqa xmm9,xmm8 +DB 15,58,204,194,0 +DB 15,56,200,206 +DB 69,15,58,204,194,0 +DB 69,15,56,200,205 + pxor xmm4,xmm6 +DB 15,56,201,238 + pxor xmm11,xmm13 +DB 69,15,56,201,229 + movdqa xmm2,xmm0 + movdqa xmm10,xmm8 +DB 15,58,204,193,0 +DB 15,56,200,215 +DB 69,15,58,204,193,0 +DB 69,15,56,200,214 +DB 15,56,202,231 +DB 69,15,56,202,222 + pxor xmm5,xmm7 +DB 15,56,201,247 + pxor xmm12,xmm14 +DB 69,15,56,201,238 + movdqa xmm1,xmm0 + movdqa xmm9,xmm8 +DB 15,58,204,194,0 +DB 15,56,200,204 +DB 69,15,58,204,194,0 +DB 69,15,56,200,203 +DB 15,56,202,236 +DB 69,15,56,202,227 + pxor xmm6,xmm4 +DB 15,56,201,252 + pxor xmm13,xmm11 +DB 69,15,56,201,243 + movdqa xmm2,xmm0 + movdqa xmm10,xmm8 +DB 15,58,204,193,0 +DB 15,56,200,213 +DB 69,15,58,204,193,0 +DB 69,15,56,200,212 +DB 15,56,202,245 +DB 69,15,56,202,236 + pxor xmm7,xmm5 +DB 15,56,201,229 + pxor xmm14,xmm12 +DB 69,15,56,201,220 + movdqa xmm1,xmm0 + movdqa xmm9,xmm8 +DB 15,58,204,194,1 +DB 15,56,200,206 +DB 69,15,58,204,194,1 +DB 69,15,56,200,205 +DB 15,56,202,254 +DB 69,15,56,202,245 + pxor xmm4,xmm6 +DB 15,56,201,238 + pxor xmm11,xmm13 +DB 69,15,56,201,229 + movdqa xmm2,xmm0 + movdqa xmm10,xmm8 +DB 15,58,204,193,1 +DB 15,56,200,215 +DB 69,15,58,204,193,1 +DB 69,15,56,200,214 +DB 15,56,202,231 +DB 69,15,56,202,222 + pxor xmm5,xmm7 +DB 15,56,201,247 + pxor xmm12,xmm14 +DB 69,15,56,201,238 + movdqa xmm1,xmm0 + movdqa xmm9,xmm8 +DB 15,58,204,194,1 +DB 15,56,200,204 +DB 69,15,58,204,194,1 +DB 69,15,56,200,203 +DB 15,56,202,236 +DB 69,15,56,202,227 + pxor xmm6,xmm4 +DB 15,56,201,252 + pxor xmm13,xmm11 +DB 69,15,56,201,243 + movdqa xmm2,xmm0 + movdqa xmm10,xmm8 +DB 15,58,204,193,1 +DB 15,56,200,213 +DB 69,15,58,204,193,1 +DB 69,15,56,200,212 +DB 15,56,202,245 +DB 69,15,56,202,236 + pxor xmm7,xmm5 +DB 15,56,201,229 + pxor xmm14,xmm12 +DB 69,15,56,201,220 + movdqa xmm1,xmm0 + movdqa xmm9,xmm8 +DB 15,58,204,194,1 +DB 15,56,200,206 +DB 69,15,58,204,194,1 +DB 69,15,56,200,205 +DB 15,56,202,254 +DB 69,15,56,202,245 + pxor xmm4,xmm6 +DB 15,56,201,238 + pxor xmm11,xmm13 +DB 69,15,56,201,229 + movdqa xmm2,xmm0 + movdqa xmm10,xmm8 +DB 15,58,204,193,2 +DB 15,56,200,215 +DB 69,15,58,204,193,2 +DB 69,15,56,200,214 +DB 15,56,202,231 +DB 69,15,56,202,222 + pxor xmm5,xmm7 +DB 15,56,201,247 + pxor xmm12,xmm14 +DB 69,15,56,201,238 + movdqa xmm1,xmm0 + movdqa xmm9,xmm8 +DB 15,58,204,194,2 +DB 15,56,200,204 +DB 69,15,58,204,194,2 +DB 69,15,56,200,203 +DB 15,56,202,236 +DB 69,15,56,202,227 + pxor xmm6,xmm4 +DB 15,56,201,252 + pxor xmm13,xmm11 +DB 69,15,56,201,243 + movdqa xmm2,xmm0 + movdqa xmm10,xmm8 +DB 15,58,204,193,2 +DB 15,56,200,213 +DB 69,15,58,204,193,2 +DB 69,15,56,200,212 +DB 15,56,202,245 +DB 69,15,56,202,236 + pxor xmm7,xmm5 +DB 15,56,201,229 + pxor xmm14,xmm12 +DB 69,15,56,201,220 + movdqa xmm1,xmm0 + movdqa xmm9,xmm8 +DB 15,58,204,194,2 +DB 15,56,200,206 +DB 69,15,58,204,194,2 +DB 69,15,56,200,205 +DB 15,56,202,254 +DB 69,15,56,202,245 + pxor xmm4,xmm6 +DB 15,56,201,238 + pxor xmm11,xmm13 +DB 69,15,56,201,229 + movdqa xmm2,xmm0 + movdqa xmm10,xmm8 +DB 15,58,204,193,2 +DB 15,56,200,215 +DB 69,15,58,204,193,2 +DB 69,15,56,200,214 +DB 15,56,202,231 +DB 69,15,56,202,222 + pxor xmm5,xmm7 +DB 15,56,201,247 + pxor xmm12,xmm14 +DB 69,15,56,201,238 + movdqa xmm1,xmm0 + movdqa xmm9,xmm8 +DB 15,58,204,194,3 +DB 15,56,200,204 +DB 69,15,58,204,194,3 +DB 69,15,56,200,203 +DB 15,56,202,236 +DB 69,15,56,202,227 + pxor xmm6,xmm4 +DB 15,56,201,252 + pxor xmm13,xmm11 +DB 69,15,56,201,243 + movdqa xmm2,xmm0 + movdqa xmm10,xmm8 +DB 15,58,204,193,3 +DB 15,56,200,213 +DB 69,15,58,204,193,3 +DB 69,15,56,200,212 +DB 15,56,202,245 +DB 69,15,56,202,236 + pxor xmm7,xmm5 + pxor xmm14,xmm12 + + mov ecx,1 + pxor xmm4,xmm4 + cmp ecx,DWORD PTR[rbx] + cmovge r8,rsp + + movdqa xmm1,xmm0 + movdqa xmm9,xmm8 +DB 15,58,204,194,3 +DB 15,56,200,206 +DB 69,15,58,204,194,3 +DB 69,15,56,200,205 +DB 15,56,202,254 +DB 69,15,56,202,245 + + cmp ecx,DWORD PTR[4+rbx] + cmovge r9,rsp + movq xmm6,QWORD PTR[rbx] + + movdqa xmm2,xmm0 + movdqa xmm10,xmm8 +DB 15,58,204,193,3 +DB 15,56,200,215 +DB 69,15,58,204,193,3 +DB 69,15,56,200,214 + + pshufd xmm11,xmm6,000h + pshufd xmm12,xmm6,055h + movdqa xmm7,xmm6 + pcmpgtd xmm11,xmm4 + pcmpgtd xmm12,xmm4 + + movdqa xmm1,xmm0 + movdqa xmm9,xmm8 +DB 15,58,204,194,3 +DB 15,56,200,204 +DB 69,15,58,204,194,3 +DB 68,15,56,200,204 + + pcmpgtd xmm7,xmm4 + pand xmm0,xmm11 + pand xmm1,xmm11 + pand xmm8,xmm12 + pand xmm9,xmm12 + paddd xmm6,xmm7 + + paddd xmm0,XMMWORD PTR[64+rsp] + paddd xmm1,XMMWORD PTR[80+rsp] + paddd xmm8,XMMWORD PTR[96+rsp] + paddd xmm9,XMMWORD PTR[112+rsp] + + movq QWORD PTR[rbx],xmm6 + dec edx + jnz $L$oop_shaext + + mov edx,DWORD PTR[280+rsp] + + pshufd xmm0,xmm0,27 + pshufd xmm8,xmm8,27 + + movdqa xmm6,xmm0 + punpckldq xmm0,xmm8 + punpckhdq xmm6,xmm8 + punpckhdq xmm1,xmm9 + movq QWORD PTR[(0-64)+rdi],xmm0 + psrldq xmm0,8 + movq QWORD PTR[(64-64)+rdi],xmm6 + psrldq xmm6,8 + movq QWORD PTR[(32-64)+rdi],xmm0 + psrldq xmm1,8 + movq QWORD PTR[(96-64)+rdi],xmm6 + movq QWORD PTR[(128-64)+rdi],xmm1 + + lea rdi,QWORD PTR[8+rdi] + lea rsi,QWORD PTR[32+rsi] + dec edx + jnz $L$oop_grande_shaext + +$L$done_shaext:: + + movaps xmm6,XMMWORD PTR[((-184))+rax] + movaps xmm7,XMMWORD PTR[((-168))+rax] + movaps xmm8,XMMWORD PTR[((-152))+rax] + movaps xmm9,XMMWORD PTR[((-136))+rax] + movaps xmm10,XMMWORD PTR[((-120))+rax] + movaps xmm11,XMMWORD PTR[((-104))+rax] + movaps xmm12,XMMWORD PTR[((-88))+rax] + movaps xmm13,XMMWORD PTR[((-72))+rax] + movaps xmm14,XMMWORD PTR[((-56))+rax] + movaps xmm15,XMMWORD PTR[((-40))+rax] + mov rbp,QWORD PTR[((-16))+rax] + mov rbx,QWORD PTR[((-8))+rax] + lea rsp,QWORD PTR[rax] +$L$epilogue_shaext:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_sha1_multi_block_shaext:: +sha1_multi_block_shaext ENDP + +ALIGN 256 + DD 05a827999h,05a827999h,05a827999h,05a827999h + DD 05a827999h,05a827999h,05a827999h,05a827999h +K_XX_XX:: + DD 06ed9eba1h,06ed9eba1h,06ed9eba1h,06ed9eba1h + DD 06ed9eba1h,06ed9eba1h,06ed9eba1h,06ed9eba1h + DD 08f1bbcdch,08f1bbcdch,08f1bbcdch,08f1bbcdch + DD 08f1bbcdch,08f1bbcdch,08f1bbcdch,08f1bbcdch + DD 0ca62c1d6h,0ca62c1d6h,0ca62c1d6h,0ca62c1d6h + DD 0ca62c1d6h,0ca62c1d6h,0ca62c1d6h,0ca62c1d6h + DD 000010203h,004050607h,008090a0bh,00c0d0e0fh + DD 000010203h,004050607h,008090a0bh,00c0d0e0fh +DB 0fh,0eh,0dh,0ch,0bh,0ah,09h,08h,07h,06h,05h,04h,03h,02h,01h,00h +DB 83,72,65,49,32,109,117,108,116,105,45,98,108,111,99,107 +DB 32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,120 +DB 56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77 +DB 83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110 +DB 115,115,108,46,111,114,103,62,0 +EXTERN __imp_RtlVirtualUnwind:NEAR + +ALIGN 16 +se_handler PROC PRIVATE + push rsi + push rdi + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + pushfq + sub rsp,64 + + mov rax,QWORD PTR[120+r8] + mov rbx,QWORD PTR[248+r8] + + mov rsi,QWORD PTR[8+r9] + mov r11,QWORD PTR[56+r9] + + mov r10d,DWORD PTR[r11] + lea r10,QWORD PTR[r10*1+rsi] + cmp rbx,r10 + jb $L$in_prologue + + mov rax,QWORD PTR[152+r8] + + mov r10d,DWORD PTR[4+r11] + lea r10,QWORD PTR[r10*1+rsi] + cmp rbx,r10 + jae $L$in_prologue + + mov rax,QWORD PTR[272+rax] + + mov rbx,QWORD PTR[((-8))+rax] + mov rbp,QWORD PTR[((-16))+rax] + mov QWORD PTR[144+r8],rbx + mov QWORD PTR[160+r8],rbp + + lea rsi,QWORD PTR[((-24-160))+rax] + lea rdi,QWORD PTR[512+r8] + mov ecx,20 + DD 0a548f3fch + +$L$in_prologue:: + mov rdi,QWORD PTR[8+rax] + mov rsi,QWORD PTR[16+rax] + mov QWORD PTR[152+r8],rax + mov QWORD PTR[168+r8],rsi + mov QWORD PTR[176+r8],rdi + + mov rdi,QWORD PTR[40+r9] + mov rsi,r8 + mov ecx,154 + DD 0a548f3fch + + mov rsi,r9 + xor rcx,rcx + mov rdx,QWORD PTR[8+rsi] + mov r8,QWORD PTR[rsi] + mov r9,QWORD PTR[16+rsi] + mov r10,QWORD PTR[40+rsi] + lea r11,QWORD PTR[56+rsi] + lea r12,QWORD PTR[24+rsi] + mov QWORD PTR[32+rsp],r10 + mov QWORD PTR[40+rsp],r11 + mov QWORD PTR[48+rsp],r12 + mov QWORD PTR[56+rsp],rcx + call QWORD PTR[__imp_RtlVirtualUnwind] + + mov eax,1 + add rsp,64 + popfq + pop r15 + pop r14 + pop r13 + pop r12 + pop rbp + pop rbx + pop rdi + pop rsi + DB 0F3h,0C3h ;repret +se_handler ENDP +.text$ ENDS +.pdata SEGMENT READONLY ALIGN(4) +ALIGN 4 + DD imagerel $L$SEH_begin_sha1_multi_block + DD imagerel $L$SEH_end_sha1_multi_block + DD imagerel $L$SEH_info_sha1_multi_block + DD imagerel $L$SEH_begin_sha1_multi_block_shaext + DD imagerel $L$SEH_end_sha1_multi_block_shaext + DD imagerel $L$SEH_info_sha1_multi_block_shaext +.pdata ENDS +.xdata SEGMENT READONLY ALIGN(8) +ALIGN 8 +$L$SEH_info_sha1_multi_block:: +DB 9,0,0,0 + DD imagerel se_handler + DD imagerel $L$body,imagerel $L$epilogue +$L$SEH_info_sha1_multi_block_shaext:: +DB 9,0,0,0 + DD imagerel se_handler + DD imagerel $L$body_shaext,imagerel $L$epilogue_shaext + +.xdata ENDS +END diff --git a/deps/openssl/asm_obsolete/x64-win32-masm/sha/sha1-x86_64.asm b/deps/openssl/asm_obsolete/x64-win32-masm/sha/sha1-x86_64.asm new file mode 100644 index 00000000000000..07b7882a96c0c6 --- /dev/null +++ b/deps/openssl/asm_obsolete/x64-win32-masm/sha/sha1-x86_64.asm @@ -0,0 +1,2850 @@ +OPTION DOTNAME +.text$ SEGMENT ALIGN(256) 'CODE' +EXTERN OPENSSL_ia32cap_P:NEAR + +PUBLIC sha1_block_data_order + +ALIGN 16 +sha1_block_data_order PROC PUBLIC + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_sha1_block_data_order:: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + + + mov r9d,DWORD PTR[((OPENSSL_ia32cap_P+0))] + mov r8d,DWORD PTR[((OPENSSL_ia32cap_P+4))] + mov r10d,DWORD PTR[((OPENSSL_ia32cap_P+8))] + test r8d,512 + jz $L$ialu + test r10d,536870912 + jnz _shaext_shortcut + jmp _ssse3_shortcut + +ALIGN 16 +$L$ialu:: + mov rax,rsp + push rbx + push rbp + push r12 + push r13 + push r14 + mov r8,rdi + sub rsp,72 + mov r9,rsi + and rsp,-64 + mov r10,rdx + mov QWORD PTR[64+rsp],rax +$L$prologue:: + + mov esi,DWORD PTR[r8] + mov edi,DWORD PTR[4+r8] + mov r11d,DWORD PTR[8+r8] + mov r12d,DWORD PTR[12+r8] + mov r13d,DWORD PTR[16+r8] + jmp $L$loop + +ALIGN 16 +$L$loop:: + mov edx,DWORD PTR[r9] + bswap edx + mov ebp,DWORD PTR[4+r9] + mov eax,r12d + mov DWORD PTR[rsp],edx + mov ecx,esi + bswap ebp + xor eax,r11d + rol ecx,5 + and eax,edi + lea r13d,DWORD PTR[1518500249+r13*1+rdx] + add r13d,ecx + xor eax,r12d + rol edi,30 + add r13d,eax + mov r14d,DWORD PTR[8+r9] + mov eax,r11d + mov DWORD PTR[4+rsp],ebp + mov ecx,r13d + bswap r14d + xor eax,edi + rol ecx,5 + and eax,esi + lea r12d,DWORD PTR[1518500249+r12*1+rbp] + add r12d,ecx + xor eax,r11d + rol esi,30 + add r12d,eax + mov edx,DWORD PTR[12+r9] + mov eax,edi + mov DWORD PTR[8+rsp],r14d + mov ecx,r12d + bswap edx + xor eax,esi + rol ecx,5 + and eax,r13d + lea r11d,DWORD PTR[1518500249+r11*1+r14] + add r11d,ecx + xor eax,edi + rol r13d,30 + add r11d,eax + mov ebp,DWORD PTR[16+r9] + mov eax,esi + mov DWORD PTR[12+rsp],edx + mov ecx,r11d + bswap ebp + xor eax,r13d + rol ecx,5 + and eax,r12d + lea edi,DWORD PTR[1518500249+rdi*1+rdx] + add edi,ecx + xor eax,esi + rol r12d,30 + add edi,eax + mov r14d,DWORD PTR[20+r9] + mov eax,r13d + mov DWORD PTR[16+rsp],ebp + mov ecx,edi + bswap r14d + xor eax,r12d + rol ecx,5 + and eax,r11d + lea esi,DWORD PTR[1518500249+rsi*1+rbp] + add esi,ecx + xor eax,r13d + rol r11d,30 + add esi,eax + mov edx,DWORD PTR[24+r9] + mov eax,r12d + mov DWORD PTR[20+rsp],r14d + mov ecx,esi + bswap edx + xor eax,r11d + rol ecx,5 + and eax,edi + lea r13d,DWORD PTR[1518500249+r13*1+r14] + add r13d,ecx + xor eax,r12d + rol edi,30 + add r13d,eax + mov ebp,DWORD PTR[28+r9] + mov eax,r11d + mov DWORD PTR[24+rsp],edx + mov ecx,r13d + bswap ebp + xor eax,edi + rol ecx,5 + and eax,esi + lea r12d,DWORD PTR[1518500249+r12*1+rdx] + add r12d,ecx + xor eax,r11d + rol esi,30 + add r12d,eax + mov r14d,DWORD PTR[32+r9] + mov eax,edi + mov DWORD PTR[28+rsp],ebp + mov ecx,r12d + bswap r14d + xor eax,esi + rol ecx,5 + and eax,r13d + lea r11d,DWORD PTR[1518500249+r11*1+rbp] + add r11d,ecx + xor eax,edi + rol r13d,30 + add r11d,eax + mov edx,DWORD PTR[36+r9] + mov eax,esi + mov DWORD PTR[32+rsp],r14d + mov ecx,r11d + bswap edx + xor eax,r13d + rol ecx,5 + and eax,r12d + lea edi,DWORD PTR[1518500249+rdi*1+r14] + add edi,ecx + xor eax,esi + rol r12d,30 + add edi,eax + mov ebp,DWORD PTR[40+r9] + mov eax,r13d + mov DWORD PTR[36+rsp],edx + mov ecx,edi + bswap ebp + xor eax,r12d + rol ecx,5 + and eax,r11d + lea esi,DWORD PTR[1518500249+rsi*1+rdx] + add esi,ecx + xor eax,r13d + rol r11d,30 + add esi,eax + mov r14d,DWORD PTR[44+r9] + mov eax,r12d + mov DWORD PTR[40+rsp],ebp + mov ecx,esi + bswap r14d + xor eax,r11d + rol ecx,5 + and eax,edi + lea r13d,DWORD PTR[1518500249+r13*1+rbp] + add r13d,ecx + xor eax,r12d + rol edi,30 + add r13d,eax + mov edx,DWORD PTR[48+r9] + mov eax,r11d + mov DWORD PTR[44+rsp],r14d + mov ecx,r13d + bswap edx + xor eax,edi + rol ecx,5 + and eax,esi + lea r12d,DWORD PTR[1518500249+r12*1+r14] + add r12d,ecx + xor eax,r11d + rol esi,30 + add r12d,eax + mov ebp,DWORD PTR[52+r9] + mov eax,edi + mov DWORD PTR[48+rsp],edx + mov ecx,r12d + bswap ebp + xor eax,esi + rol ecx,5 + and eax,r13d + lea r11d,DWORD PTR[1518500249+r11*1+rdx] + add r11d,ecx + xor eax,edi + rol r13d,30 + add r11d,eax + mov r14d,DWORD PTR[56+r9] + mov eax,esi + mov DWORD PTR[52+rsp],ebp + mov ecx,r11d + bswap r14d + xor eax,r13d + rol ecx,5 + and eax,r12d + lea edi,DWORD PTR[1518500249+rdi*1+rbp] + add edi,ecx + xor eax,esi + rol r12d,30 + add edi,eax + mov edx,DWORD PTR[60+r9] + mov eax,r13d + mov DWORD PTR[56+rsp],r14d + mov ecx,edi + bswap edx + xor eax,r12d + rol ecx,5 + and eax,r11d + lea esi,DWORD PTR[1518500249+rsi*1+r14] + add esi,ecx + xor eax,r13d + rol r11d,30 + add esi,eax + xor ebp,DWORD PTR[rsp] + mov eax,r12d + mov DWORD PTR[60+rsp],edx + mov ecx,esi + xor ebp,DWORD PTR[8+rsp] + xor eax,r11d + rol ecx,5 + xor ebp,DWORD PTR[32+rsp] + and eax,edi + lea r13d,DWORD PTR[1518500249+r13*1+rdx] + rol edi,30 + xor eax,r12d + add r13d,ecx + rol ebp,1 + add r13d,eax + xor r14d,DWORD PTR[4+rsp] + mov eax,r11d + mov DWORD PTR[rsp],ebp + mov ecx,r13d + xor r14d,DWORD PTR[12+rsp] + xor eax,edi + rol ecx,5 + xor r14d,DWORD PTR[36+rsp] + and eax,esi + lea r12d,DWORD PTR[1518500249+r12*1+rbp] + rol esi,30 + xor eax,r11d + add r12d,ecx + rol r14d,1 + add r12d,eax + xor edx,DWORD PTR[8+rsp] + mov eax,edi + mov DWORD PTR[4+rsp],r14d + mov ecx,r12d + xor edx,DWORD PTR[16+rsp] + xor eax,esi + rol ecx,5 + xor edx,DWORD PTR[40+rsp] + and eax,r13d + lea r11d,DWORD PTR[1518500249+r11*1+r14] + rol r13d,30 + xor eax,edi + add r11d,ecx + rol edx,1 + add r11d,eax + xor ebp,DWORD PTR[12+rsp] + mov eax,esi + mov DWORD PTR[8+rsp],edx + mov ecx,r11d + xor ebp,DWORD PTR[20+rsp] + xor eax,r13d + rol ecx,5 + xor ebp,DWORD PTR[44+rsp] + and eax,r12d + lea edi,DWORD PTR[1518500249+rdi*1+rdx] + rol r12d,30 + xor eax,esi + add edi,ecx + rol ebp,1 + add edi,eax + xor r14d,DWORD PTR[16+rsp] + mov eax,r13d + mov DWORD PTR[12+rsp],ebp + mov ecx,edi + xor r14d,DWORD PTR[24+rsp] + xor eax,r12d + rol ecx,5 + xor r14d,DWORD PTR[48+rsp] + and eax,r11d + lea esi,DWORD PTR[1518500249+rsi*1+rbp] + rol r11d,30 + xor eax,r13d + add esi,ecx + rol r14d,1 + add esi,eax + xor edx,DWORD PTR[20+rsp] + mov eax,edi + mov DWORD PTR[16+rsp],r14d + mov ecx,esi + xor edx,DWORD PTR[28+rsp] + xor eax,r12d + rol ecx,5 + xor edx,DWORD PTR[52+rsp] + lea r13d,DWORD PTR[1859775393+r13*1+r14] + xor eax,r11d + add r13d,ecx + rol edi,30 + add r13d,eax + rol edx,1 + xor ebp,DWORD PTR[24+rsp] + mov eax,esi + mov DWORD PTR[20+rsp],edx + mov ecx,r13d + xor ebp,DWORD PTR[32+rsp] + xor eax,r11d + rol ecx,5 + xor ebp,DWORD PTR[56+rsp] + lea r12d,DWORD PTR[1859775393+r12*1+rdx] + xor eax,edi + add r12d,ecx + rol esi,30 + add r12d,eax + rol ebp,1 + xor r14d,DWORD PTR[28+rsp] + mov eax,r13d + mov DWORD PTR[24+rsp],ebp + mov ecx,r12d + xor r14d,DWORD PTR[36+rsp] + xor eax,edi + rol ecx,5 + xor r14d,DWORD PTR[60+rsp] + lea r11d,DWORD PTR[1859775393+r11*1+rbp] + xor eax,esi + add r11d,ecx + rol r13d,30 + add r11d,eax + rol r14d,1 + xor edx,DWORD PTR[32+rsp] + mov eax,r12d + mov DWORD PTR[28+rsp],r14d + mov ecx,r11d + xor edx,DWORD PTR[40+rsp] + xor eax,esi + rol ecx,5 + xor edx,DWORD PTR[rsp] + lea edi,DWORD PTR[1859775393+rdi*1+r14] + xor eax,r13d + add edi,ecx + rol r12d,30 + add edi,eax + rol edx,1 + xor ebp,DWORD PTR[36+rsp] + mov eax,r11d + mov DWORD PTR[32+rsp],edx + mov ecx,edi + xor ebp,DWORD PTR[44+rsp] + xor eax,r13d + rol ecx,5 + xor ebp,DWORD PTR[4+rsp] + lea esi,DWORD PTR[1859775393+rsi*1+rdx] + xor eax,r12d + add esi,ecx + rol r11d,30 + add esi,eax + rol ebp,1 + xor r14d,DWORD PTR[40+rsp] + mov eax,edi + mov DWORD PTR[36+rsp],ebp + mov ecx,esi + xor r14d,DWORD PTR[48+rsp] + xor eax,r12d + rol ecx,5 + xor r14d,DWORD PTR[8+rsp] + lea r13d,DWORD PTR[1859775393+r13*1+rbp] + xor eax,r11d + add r13d,ecx + rol edi,30 + add r13d,eax + rol r14d,1 + xor edx,DWORD PTR[44+rsp] + mov eax,esi + mov DWORD PTR[40+rsp],r14d + mov ecx,r13d + xor edx,DWORD PTR[52+rsp] + xor eax,r11d + rol ecx,5 + xor edx,DWORD PTR[12+rsp] + lea r12d,DWORD PTR[1859775393+r12*1+r14] + xor eax,edi + add r12d,ecx + rol esi,30 + add r12d,eax + rol edx,1 + xor ebp,DWORD PTR[48+rsp] + mov eax,r13d + mov DWORD PTR[44+rsp],edx + mov ecx,r12d + xor ebp,DWORD PTR[56+rsp] + xor eax,edi + rol ecx,5 + xor ebp,DWORD PTR[16+rsp] + lea r11d,DWORD PTR[1859775393+r11*1+rdx] + xor eax,esi + add r11d,ecx + rol r13d,30 + add r11d,eax + rol ebp,1 + xor r14d,DWORD PTR[52+rsp] + mov eax,r12d + mov DWORD PTR[48+rsp],ebp + mov ecx,r11d + xor r14d,DWORD PTR[60+rsp] + xor eax,esi + rol ecx,5 + xor r14d,DWORD PTR[20+rsp] + lea edi,DWORD PTR[1859775393+rdi*1+rbp] + xor eax,r13d + add edi,ecx + rol r12d,30 + add edi,eax + rol r14d,1 + xor edx,DWORD PTR[56+rsp] + mov eax,r11d + mov DWORD PTR[52+rsp],r14d + mov ecx,edi + xor edx,DWORD PTR[rsp] + xor eax,r13d + rol ecx,5 + xor edx,DWORD PTR[24+rsp] + lea esi,DWORD PTR[1859775393+rsi*1+r14] + xor eax,r12d + add esi,ecx + rol r11d,30 + add esi,eax + rol edx,1 + xor ebp,DWORD PTR[60+rsp] + mov eax,edi + mov DWORD PTR[56+rsp],edx + mov ecx,esi + xor ebp,DWORD PTR[4+rsp] + xor eax,r12d + rol ecx,5 + xor ebp,DWORD PTR[28+rsp] + lea r13d,DWORD PTR[1859775393+r13*1+rdx] + xor eax,r11d + add r13d,ecx + rol edi,30 + add r13d,eax + rol ebp,1 + xor r14d,DWORD PTR[rsp] + mov eax,esi + mov DWORD PTR[60+rsp],ebp + mov ecx,r13d + xor r14d,DWORD PTR[8+rsp] + xor eax,r11d + rol ecx,5 + xor r14d,DWORD PTR[32+rsp] + lea r12d,DWORD PTR[1859775393+r12*1+rbp] + xor eax,edi + add r12d,ecx + rol esi,30 + add r12d,eax + rol r14d,1 + xor edx,DWORD PTR[4+rsp] + mov eax,r13d + mov DWORD PTR[rsp],r14d + mov ecx,r12d + xor edx,DWORD PTR[12+rsp] + xor eax,edi + rol ecx,5 + xor edx,DWORD PTR[36+rsp] + lea r11d,DWORD PTR[1859775393+r11*1+r14] + xor eax,esi + add r11d,ecx + rol r13d,30 + add r11d,eax + rol edx,1 + xor ebp,DWORD PTR[8+rsp] + mov eax,r12d + mov DWORD PTR[4+rsp],edx + mov ecx,r11d + xor ebp,DWORD PTR[16+rsp] + xor eax,esi + rol ecx,5 + xor ebp,DWORD PTR[40+rsp] + lea edi,DWORD PTR[1859775393+rdi*1+rdx] + xor eax,r13d + add edi,ecx + rol r12d,30 + add edi,eax + rol ebp,1 + xor r14d,DWORD PTR[12+rsp] + mov eax,r11d + mov DWORD PTR[8+rsp],ebp + mov ecx,edi + xor r14d,DWORD PTR[20+rsp] + xor eax,r13d + rol ecx,5 + xor r14d,DWORD PTR[44+rsp] + lea esi,DWORD PTR[1859775393+rsi*1+rbp] + xor eax,r12d + add esi,ecx + rol r11d,30 + add esi,eax + rol r14d,1 + xor edx,DWORD PTR[16+rsp] + mov eax,edi + mov DWORD PTR[12+rsp],r14d + mov ecx,esi + xor edx,DWORD PTR[24+rsp] + xor eax,r12d + rol ecx,5 + xor edx,DWORD PTR[48+rsp] + lea r13d,DWORD PTR[1859775393+r13*1+r14] + xor eax,r11d + add r13d,ecx + rol edi,30 + add r13d,eax + rol edx,1 + xor ebp,DWORD PTR[20+rsp] + mov eax,esi + mov DWORD PTR[16+rsp],edx + mov ecx,r13d + xor ebp,DWORD PTR[28+rsp] + xor eax,r11d + rol ecx,5 + xor ebp,DWORD PTR[52+rsp] + lea r12d,DWORD PTR[1859775393+r12*1+rdx] + xor eax,edi + add r12d,ecx + rol esi,30 + add r12d,eax + rol ebp,1 + xor r14d,DWORD PTR[24+rsp] + mov eax,r13d + mov DWORD PTR[20+rsp],ebp + mov ecx,r12d + xor r14d,DWORD PTR[32+rsp] + xor eax,edi + rol ecx,5 + xor r14d,DWORD PTR[56+rsp] + lea r11d,DWORD PTR[1859775393+r11*1+rbp] + xor eax,esi + add r11d,ecx + rol r13d,30 + add r11d,eax + rol r14d,1 + xor edx,DWORD PTR[28+rsp] + mov eax,r12d + mov DWORD PTR[24+rsp],r14d + mov ecx,r11d + xor edx,DWORD PTR[36+rsp] + xor eax,esi + rol ecx,5 + xor edx,DWORD PTR[60+rsp] + lea edi,DWORD PTR[1859775393+rdi*1+r14] + xor eax,r13d + add edi,ecx + rol r12d,30 + add edi,eax + rol edx,1 + xor ebp,DWORD PTR[32+rsp] + mov eax,r11d + mov DWORD PTR[28+rsp],edx + mov ecx,edi + xor ebp,DWORD PTR[40+rsp] + xor eax,r13d + rol ecx,5 + xor ebp,DWORD PTR[rsp] + lea esi,DWORD PTR[1859775393+rsi*1+rdx] + xor eax,r12d + add esi,ecx + rol r11d,30 + add esi,eax + rol ebp,1 + xor r14d,DWORD PTR[36+rsp] + mov eax,r12d + mov DWORD PTR[32+rsp],ebp + mov ebx,r12d + xor r14d,DWORD PTR[44+rsp] + and eax,r11d + mov ecx,esi + xor r14d,DWORD PTR[4+rsp] + lea r13d,DWORD PTR[((-1894007588))+r13*1+rbp] + xor ebx,r11d + rol ecx,5 + add r13d,eax + rol r14d,1 + and ebx,edi + add r13d,ecx + rol edi,30 + add r13d,ebx + xor edx,DWORD PTR[40+rsp] + mov eax,r11d + mov DWORD PTR[36+rsp],r14d + mov ebx,r11d + xor edx,DWORD PTR[48+rsp] + and eax,edi + mov ecx,r13d + xor edx,DWORD PTR[8+rsp] + lea r12d,DWORD PTR[((-1894007588))+r12*1+r14] + xor ebx,edi + rol ecx,5 + add r12d,eax + rol edx,1 + and ebx,esi + add r12d,ecx + rol esi,30 + add r12d,ebx + xor ebp,DWORD PTR[44+rsp] + mov eax,edi + mov DWORD PTR[40+rsp],edx + mov ebx,edi + xor ebp,DWORD PTR[52+rsp] + and eax,esi + mov ecx,r12d + xor ebp,DWORD PTR[12+rsp] + lea r11d,DWORD PTR[((-1894007588))+r11*1+rdx] + xor ebx,esi + rol ecx,5 + add r11d,eax + rol ebp,1 + and ebx,r13d + add r11d,ecx + rol r13d,30 + add r11d,ebx + xor r14d,DWORD PTR[48+rsp] + mov eax,esi + mov DWORD PTR[44+rsp],ebp + mov ebx,esi + xor r14d,DWORD PTR[56+rsp] + and eax,r13d + mov ecx,r11d + xor r14d,DWORD PTR[16+rsp] + lea edi,DWORD PTR[((-1894007588))+rdi*1+rbp] + xor ebx,r13d + rol ecx,5 + add edi,eax + rol r14d,1 + and ebx,r12d + add edi,ecx + rol r12d,30 + add edi,ebx + xor edx,DWORD PTR[52+rsp] + mov eax,r13d + mov DWORD PTR[48+rsp],r14d + mov ebx,r13d + xor edx,DWORD PTR[60+rsp] + and eax,r12d + mov ecx,edi + xor edx,DWORD PTR[20+rsp] + lea esi,DWORD PTR[((-1894007588))+rsi*1+r14] + xor ebx,r12d + rol ecx,5 + add esi,eax + rol edx,1 + and ebx,r11d + add esi,ecx + rol r11d,30 + add esi,ebx + xor ebp,DWORD PTR[56+rsp] + mov eax,r12d + mov DWORD PTR[52+rsp],edx + mov ebx,r12d + xor ebp,DWORD PTR[rsp] + and eax,r11d + mov ecx,esi + xor ebp,DWORD PTR[24+rsp] + lea r13d,DWORD PTR[((-1894007588))+r13*1+rdx] + xor ebx,r11d + rol ecx,5 + add r13d,eax + rol ebp,1 + and ebx,edi + add r13d,ecx + rol edi,30 + add r13d,ebx + xor r14d,DWORD PTR[60+rsp] + mov eax,r11d + mov DWORD PTR[56+rsp],ebp + mov ebx,r11d + xor r14d,DWORD PTR[4+rsp] + and eax,edi + mov ecx,r13d + xor r14d,DWORD PTR[28+rsp] + lea r12d,DWORD PTR[((-1894007588))+r12*1+rbp] + xor ebx,edi + rol ecx,5 + add r12d,eax + rol r14d,1 + and ebx,esi + add r12d,ecx + rol esi,30 + add r12d,ebx + xor edx,DWORD PTR[rsp] + mov eax,edi + mov DWORD PTR[60+rsp],r14d + mov ebx,edi + xor edx,DWORD PTR[8+rsp] + and eax,esi + mov ecx,r12d + xor edx,DWORD PTR[32+rsp] + lea r11d,DWORD PTR[((-1894007588))+r11*1+r14] + xor ebx,esi + rol ecx,5 + add r11d,eax + rol edx,1 + and ebx,r13d + add r11d,ecx + rol r13d,30 + add r11d,ebx + xor ebp,DWORD PTR[4+rsp] + mov eax,esi + mov DWORD PTR[rsp],edx + mov ebx,esi + xor ebp,DWORD PTR[12+rsp] + and eax,r13d + mov ecx,r11d + xor ebp,DWORD PTR[36+rsp] + lea edi,DWORD PTR[((-1894007588))+rdi*1+rdx] + xor ebx,r13d + rol ecx,5 + add edi,eax + rol ebp,1 + and ebx,r12d + add edi,ecx + rol r12d,30 + add edi,ebx + xor r14d,DWORD PTR[8+rsp] + mov eax,r13d + mov DWORD PTR[4+rsp],ebp + mov ebx,r13d + xor r14d,DWORD PTR[16+rsp] + and eax,r12d + mov ecx,edi + xor r14d,DWORD PTR[40+rsp] + lea esi,DWORD PTR[((-1894007588))+rsi*1+rbp] + xor ebx,r12d + rol ecx,5 + add esi,eax + rol r14d,1 + and ebx,r11d + add esi,ecx + rol r11d,30 + add esi,ebx + xor edx,DWORD PTR[12+rsp] + mov eax,r12d + mov DWORD PTR[8+rsp],r14d + mov ebx,r12d + xor edx,DWORD PTR[20+rsp] + and eax,r11d + mov ecx,esi + xor edx,DWORD PTR[44+rsp] + lea r13d,DWORD PTR[((-1894007588))+r13*1+r14] + xor ebx,r11d + rol ecx,5 + add r13d,eax + rol edx,1 + and ebx,edi + add r13d,ecx + rol edi,30 + add r13d,ebx + xor ebp,DWORD PTR[16+rsp] + mov eax,r11d + mov DWORD PTR[12+rsp],edx + mov ebx,r11d + xor ebp,DWORD PTR[24+rsp] + and eax,edi + mov ecx,r13d + xor ebp,DWORD PTR[48+rsp] + lea r12d,DWORD PTR[((-1894007588))+r12*1+rdx] + xor ebx,edi + rol ecx,5 + add r12d,eax + rol ebp,1 + and ebx,esi + add r12d,ecx + rol esi,30 + add r12d,ebx + xor r14d,DWORD PTR[20+rsp] + mov eax,edi + mov DWORD PTR[16+rsp],ebp + mov ebx,edi + xor r14d,DWORD PTR[28+rsp] + and eax,esi + mov ecx,r12d + xor r14d,DWORD PTR[52+rsp] + lea r11d,DWORD PTR[((-1894007588))+r11*1+rbp] + xor ebx,esi + rol ecx,5 + add r11d,eax + rol r14d,1 + and ebx,r13d + add r11d,ecx + rol r13d,30 + add r11d,ebx + xor edx,DWORD PTR[24+rsp] + mov eax,esi + mov DWORD PTR[20+rsp],r14d + mov ebx,esi + xor edx,DWORD PTR[32+rsp] + and eax,r13d + mov ecx,r11d + xor edx,DWORD PTR[56+rsp] + lea edi,DWORD PTR[((-1894007588))+rdi*1+r14] + xor ebx,r13d + rol ecx,5 + add edi,eax + rol edx,1 + and ebx,r12d + add edi,ecx + rol r12d,30 + add edi,ebx + xor ebp,DWORD PTR[28+rsp] + mov eax,r13d + mov DWORD PTR[24+rsp],edx + mov ebx,r13d + xor ebp,DWORD PTR[36+rsp] + and eax,r12d + mov ecx,edi + xor ebp,DWORD PTR[60+rsp] + lea esi,DWORD PTR[((-1894007588))+rsi*1+rdx] + xor ebx,r12d + rol ecx,5 + add esi,eax + rol ebp,1 + and ebx,r11d + add esi,ecx + rol r11d,30 + add esi,ebx + xor r14d,DWORD PTR[32+rsp] + mov eax,r12d + mov DWORD PTR[28+rsp],ebp + mov ebx,r12d + xor r14d,DWORD PTR[40+rsp] + and eax,r11d + mov ecx,esi + xor r14d,DWORD PTR[rsp] + lea r13d,DWORD PTR[((-1894007588))+r13*1+rbp] + xor ebx,r11d + rol ecx,5 + add r13d,eax + rol r14d,1 + and ebx,edi + add r13d,ecx + rol edi,30 + add r13d,ebx + xor edx,DWORD PTR[36+rsp] + mov eax,r11d + mov DWORD PTR[32+rsp],r14d + mov ebx,r11d + xor edx,DWORD PTR[44+rsp] + and eax,edi + mov ecx,r13d + xor edx,DWORD PTR[4+rsp] + lea r12d,DWORD PTR[((-1894007588))+r12*1+r14] + xor ebx,edi + rol ecx,5 + add r12d,eax + rol edx,1 + and ebx,esi + add r12d,ecx + rol esi,30 + add r12d,ebx + xor ebp,DWORD PTR[40+rsp] + mov eax,edi + mov DWORD PTR[36+rsp],edx + mov ebx,edi + xor ebp,DWORD PTR[48+rsp] + and eax,esi + mov ecx,r12d + xor ebp,DWORD PTR[8+rsp] + lea r11d,DWORD PTR[((-1894007588))+r11*1+rdx] + xor ebx,esi + rol ecx,5 + add r11d,eax + rol ebp,1 + and ebx,r13d + add r11d,ecx + rol r13d,30 + add r11d,ebx + xor r14d,DWORD PTR[44+rsp] + mov eax,esi + mov DWORD PTR[40+rsp],ebp + mov ebx,esi + xor r14d,DWORD PTR[52+rsp] + and eax,r13d + mov ecx,r11d + xor r14d,DWORD PTR[12+rsp] + lea edi,DWORD PTR[((-1894007588))+rdi*1+rbp] + xor ebx,r13d + rol ecx,5 + add edi,eax + rol r14d,1 + and ebx,r12d + add edi,ecx + rol r12d,30 + add edi,ebx + xor edx,DWORD PTR[48+rsp] + mov eax,r13d + mov DWORD PTR[44+rsp],r14d + mov ebx,r13d + xor edx,DWORD PTR[56+rsp] + and eax,r12d + mov ecx,edi + xor edx,DWORD PTR[16+rsp] + lea esi,DWORD PTR[((-1894007588))+rsi*1+r14] + xor ebx,r12d + rol ecx,5 + add esi,eax + rol edx,1 + and ebx,r11d + add esi,ecx + rol r11d,30 + add esi,ebx + xor ebp,DWORD PTR[52+rsp] + mov eax,edi + mov DWORD PTR[48+rsp],edx + mov ecx,esi + xor ebp,DWORD PTR[60+rsp] + xor eax,r12d + rol ecx,5 + xor ebp,DWORD PTR[20+rsp] + lea r13d,DWORD PTR[((-899497514))+r13*1+rdx] + xor eax,r11d + add r13d,ecx + rol edi,30 + add r13d,eax + rol ebp,1 + xor r14d,DWORD PTR[56+rsp] + mov eax,esi + mov DWORD PTR[52+rsp],ebp + mov ecx,r13d + xor r14d,DWORD PTR[rsp] + xor eax,r11d + rol ecx,5 + xor r14d,DWORD PTR[24+rsp] + lea r12d,DWORD PTR[((-899497514))+r12*1+rbp] + xor eax,edi + add r12d,ecx + rol esi,30 + add r12d,eax + rol r14d,1 + xor edx,DWORD PTR[60+rsp] + mov eax,r13d + mov DWORD PTR[56+rsp],r14d + mov ecx,r12d + xor edx,DWORD PTR[4+rsp] + xor eax,edi + rol ecx,5 + xor edx,DWORD PTR[28+rsp] + lea r11d,DWORD PTR[((-899497514))+r11*1+r14] + xor eax,esi + add r11d,ecx + rol r13d,30 + add r11d,eax + rol edx,1 + xor ebp,DWORD PTR[rsp] + mov eax,r12d + mov DWORD PTR[60+rsp],edx + mov ecx,r11d + xor ebp,DWORD PTR[8+rsp] + xor eax,esi + rol ecx,5 + xor ebp,DWORD PTR[32+rsp] + lea edi,DWORD PTR[((-899497514))+rdi*1+rdx] + xor eax,r13d + add edi,ecx + rol r12d,30 + add edi,eax + rol ebp,1 + xor r14d,DWORD PTR[4+rsp] + mov eax,r11d + mov DWORD PTR[rsp],ebp + mov ecx,edi + xor r14d,DWORD PTR[12+rsp] + xor eax,r13d + rol ecx,5 + xor r14d,DWORD PTR[36+rsp] + lea esi,DWORD PTR[((-899497514))+rsi*1+rbp] + xor eax,r12d + add esi,ecx + rol r11d,30 + add esi,eax + rol r14d,1 + xor edx,DWORD PTR[8+rsp] + mov eax,edi + mov DWORD PTR[4+rsp],r14d + mov ecx,esi + xor edx,DWORD PTR[16+rsp] + xor eax,r12d + rol ecx,5 + xor edx,DWORD PTR[40+rsp] + lea r13d,DWORD PTR[((-899497514))+r13*1+r14] + xor eax,r11d + add r13d,ecx + rol edi,30 + add r13d,eax + rol edx,1 + xor ebp,DWORD PTR[12+rsp] + mov eax,esi + mov DWORD PTR[8+rsp],edx + mov ecx,r13d + xor ebp,DWORD PTR[20+rsp] + xor eax,r11d + rol ecx,5 + xor ebp,DWORD PTR[44+rsp] + lea r12d,DWORD PTR[((-899497514))+r12*1+rdx] + xor eax,edi + add r12d,ecx + rol esi,30 + add r12d,eax + rol ebp,1 + xor r14d,DWORD PTR[16+rsp] + mov eax,r13d + mov DWORD PTR[12+rsp],ebp + mov ecx,r12d + xor r14d,DWORD PTR[24+rsp] + xor eax,edi + rol ecx,5 + xor r14d,DWORD PTR[48+rsp] + lea r11d,DWORD PTR[((-899497514))+r11*1+rbp] + xor eax,esi + add r11d,ecx + rol r13d,30 + add r11d,eax + rol r14d,1 + xor edx,DWORD PTR[20+rsp] + mov eax,r12d + mov DWORD PTR[16+rsp],r14d + mov ecx,r11d + xor edx,DWORD PTR[28+rsp] + xor eax,esi + rol ecx,5 + xor edx,DWORD PTR[52+rsp] + lea edi,DWORD PTR[((-899497514))+rdi*1+r14] + xor eax,r13d + add edi,ecx + rol r12d,30 + add edi,eax + rol edx,1 + xor ebp,DWORD PTR[24+rsp] + mov eax,r11d + mov DWORD PTR[20+rsp],edx + mov ecx,edi + xor ebp,DWORD PTR[32+rsp] + xor eax,r13d + rol ecx,5 + xor ebp,DWORD PTR[56+rsp] + lea esi,DWORD PTR[((-899497514))+rsi*1+rdx] + xor eax,r12d + add esi,ecx + rol r11d,30 + add esi,eax + rol ebp,1 + xor r14d,DWORD PTR[28+rsp] + mov eax,edi + mov DWORD PTR[24+rsp],ebp + mov ecx,esi + xor r14d,DWORD PTR[36+rsp] + xor eax,r12d + rol ecx,5 + xor r14d,DWORD PTR[60+rsp] + lea r13d,DWORD PTR[((-899497514))+r13*1+rbp] + xor eax,r11d + add r13d,ecx + rol edi,30 + add r13d,eax + rol r14d,1 + xor edx,DWORD PTR[32+rsp] + mov eax,esi + mov DWORD PTR[28+rsp],r14d + mov ecx,r13d + xor edx,DWORD PTR[40+rsp] + xor eax,r11d + rol ecx,5 + xor edx,DWORD PTR[rsp] + lea r12d,DWORD PTR[((-899497514))+r12*1+r14] + xor eax,edi + add r12d,ecx + rol esi,30 + add r12d,eax + rol edx,1 + xor ebp,DWORD PTR[36+rsp] + mov eax,r13d + + mov ecx,r12d + xor ebp,DWORD PTR[44+rsp] + xor eax,edi + rol ecx,5 + xor ebp,DWORD PTR[4+rsp] + lea r11d,DWORD PTR[((-899497514))+r11*1+rdx] + xor eax,esi + add r11d,ecx + rol r13d,30 + add r11d,eax + rol ebp,1 + xor r14d,DWORD PTR[40+rsp] + mov eax,r12d + + mov ecx,r11d + xor r14d,DWORD PTR[48+rsp] + xor eax,esi + rol ecx,5 + xor r14d,DWORD PTR[8+rsp] + lea edi,DWORD PTR[((-899497514))+rdi*1+rbp] + xor eax,r13d + add edi,ecx + rol r12d,30 + add edi,eax + rol r14d,1 + xor edx,DWORD PTR[44+rsp] + mov eax,r11d + + mov ecx,edi + xor edx,DWORD PTR[52+rsp] + xor eax,r13d + rol ecx,5 + xor edx,DWORD PTR[12+rsp] + lea esi,DWORD PTR[((-899497514))+rsi*1+r14] + xor eax,r12d + add esi,ecx + rol r11d,30 + add esi,eax + rol edx,1 + xor ebp,DWORD PTR[48+rsp] + mov eax,edi + + mov ecx,esi + xor ebp,DWORD PTR[56+rsp] + xor eax,r12d + rol ecx,5 + xor ebp,DWORD PTR[16+rsp] + lea r13d,DWORD PTR[((-899497514))+r13*1+rdx] + xor eax,r11d + add r13d,ecx + rol edi,30 + add r13d,eax + rol ebp,1 + xor r14d,DWORD PTR[52+rsp] + mov eax,esi + + mov ecx,r13d + xor r14d,DWORD PTR[60+rsp] + xor eax,r11d + rol ecx,5 + xor r14d,DWORD PTR[20+rsp] + lea r12d,DWORD PTR[((-899497514))+r12*1+rbp] + xor eax,edi + add r12d,ecx + rol esi,30 + add r12d,eax + rol r14d,1 + xor edx,DWORD PTR[56+rsp] + mov eax,r13d + + mov ecx,r12d + xor edx,DWORD PTR[rsp] + xor eax,edi + rol ecx,5 + xor edx,DWORD PTR[24+rsp] + lea r11d,DWORD PTR[((-899497514))+r11*1+r14] + xor eax,esi + add r11d,ecx + rol r13d,30 + add r11d,eax + rol edx,1 + xor ebp,DWORD PTR[60+rsp] + mov eax,r12d + + mov ecx,r11d + xor ebp,DWORD PTR[4+rsp] + xor eax,esi + rol ecx,5 + xor ebp,DWORD PTR[28+rsp] + lea edi,DWORD PTR[((-899497514))+rdi*1+rdx] + xor eax,r13d + add edi,ecx + rol r12d,30 + add edi,eax + rol ebp,1 + mov eax,r11d + mov ecx,edi + xor eax,r13d + lea esi,DWORD PTR[((-899497514))+rsi*1+rbp] + rol ecx,5 + xor eax,r12d + add esi,ecx + rol r11d,30 + add esi,eax + add esi,DWORD PTR[r8] + add edi,DWORD PTR[4+r8] + add r11d,DWORD PTR[8+r8] + add r12d,DWORD PTR[12+r8] + add r13d,DWORD PTR[16+r8] + mov DWORD PTR[r8],esi + mov DWORD PTR[4+r8],edi + mov DWORD PTR[8+r8],r11d + mov DWORD PTR[12+r8],r12d + mov DWORD PTR[16+r8],r13d + + sub r10,1 + lea r9,QWORD PTR[64+r9] + jnz $L$loop + + mov rsi,QWORD PTR[64+rsp] + mov r14,QWORD PTR[((-40))+rsi] + mov r13,QWORD PTR[((-32))+rsi] + mov r12,QWORD PTR[((-24))+rsi] + mov rbp,QWORD PTR[((-16))+rsi] + mov rbx,QWORD PTR[((-8))+rsi] + lea rsp,QWORD PTR[rsi] +$L$epilogue:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_sha1_block_data_order:: +sha1_block_data_order ENDP + +ALIGN 32 +sha1_block_data_order_shaext PROC PRIVATE + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_sha1_block_data_order_shaext:: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + + +_shaext_shortcut:: + lea rsp,QWORD PTR[((-72))+rsp] + movaps XMMWORD PTR[(-8-64)+rax],xmm6 + movaps XMMWORD PTR[(-8-48)+rax],xmm7 + movaps XMMWORD PTR[(-8-32)+rax],xmm8 + movaps XMMWORD PTR[(-8-16)+rax],xmm9 +$L$prologue_shaext:: + movdqu xmm0,XMMWORD PTR[rdi] + movd xmm1,DWORD PTR[16+rdi] + movdqa xmm3,XMMWORD PTR[((K_XX_XX+160))] + + movdqu xmm4,XMMWORD PTR[rsi] + pshufd xmm0,xmm0,27 + movdqu xmm5,XMMWORD PTR[16+rsi] + pshufd xmm1,xmm1,27 + movdqu xmm6,XMMWORD PTR[32+rsi] +DB 102,15,56,0,227 + movdqu xmm7,XMMWORD PTR[48+rsi] +DB 102,15,56,0,235 +DB 102,15,56,0,243 + movdqa xmm9,xmm1 +DB 102,15,56,0,251 + jmp $L$oop_shaext + +ALIGN 16 +$L$oop_shaext:: + dec rdx + lea rax,QWORD PTR[64+rsi] + paddd xmm1,xmm4 + cmovne rsi,rax + movdqa xmm8,xmm0 +DB 15,56,201,229 + movdqa xmm2,xmm0 +DB 15,58,204,193,0 +DB 15,56,200,213 + pxor xmm4,xmm6 +DB 15,56,201,238 +DB 15,56,202,231 + + movdqa xmm1,xmm0 +DB 15,58,204,194,0 +DB 15,56,200,206 + pxor xmm5,xmm7 +DB 15,56,202,236 +DB 15,56,201,247 + movdqa xmm2,xmm0 +DB 15,58,204,193,0 +DB 15,56,200,215 + pxor xmm6,xmm4 +DB 15,56,201,252 +DB 15,56,202,245 + + movdqa xmm1,xmm0 +DB 15,58,204,194,0 +DB 15,56,200,204 + pxor xmm7,xmm5 +DB 15,56,202,254 +DB 15,56,201,229 + movdqa xmm2,xmm0 +DB 15,58,204,193,0 +DB 15,56,200,213 + pxor xmm4,xmm6 +DB 15,56,201,238 +DB 15,56,202,231 + + movdqa xmm1,xmm0 +DB 15,58,204,194,1 +DB 15,56,200,206 + pxor xmm5,xmm7 +DB 15,56,202,236 +DB 15,56,201,247 + movdqa xmm2,xmm0 +DB 15,58,204,193,1 +DB 15,56,200,215 + pxor xmm6,xmm4 +DB 15,56,201,252 +DB 15,56,202,245 + + movdqa xmm1,xmm0 +DB 15,58,204,194,1 +DB 15,56,200,204 + pxor xmm7,xmm5 +DB 15,56,202,254 +DB 15,56,201,229 + movdqa xmm2,xmm0 +DB 15,58,204,193,1 +DB 15,56,200,213 + pxor xmm4,xmm6 +DB 15,56,201,238 +DB 15,56,202,231 + + movdqa xmm1,xmm0 +DB 15,58,204,194,1 +DB 15,56,200,206 + pxor xmm5,xmm7 +DB 15,56,202,236 +DB 15,56,201,247 + movdqa xmm2,xmm0 +DB 15,58,204,193,2 +DB 15,56,200,215 + pxor xmm6,xmm4 +DB 15,56,201,252 +DB 15,56,202,245 + + movdqa xmm1,xmm0 +DB 15,58,204,194,2 +DB 15,56,200,204 + pxor xmm7,xmm5 +DB 15,56,202,254 +DB 15,56,201,229 + movdqa xmm2,xmm0 +DB 15,58,204,193,2 +DB 15,56,200,213 + pxor xmm4,xmm6 +DB 15,56,201,238 +DB 15,56,202,231 + + movdqa xmm1,xmm0 +DB 15,58,204,194,2 +DB 15,56,200,206 + pxor xmm5,xmm7 +DB 15,56,202,236 +DB 15,56,201,247 + movdqa xmm2,xmm0 +DB 15,58,204,193,2 +DB 15,56,200,215 + pxor xmm6,xmm4 +DB 15,56,201,252 +DB 15,56,202,245 + + movdqa xmm1,xmm0 +DB 15,58,204,194,3 +DB 15,56,200,204 + pxor xmm7,xmm5 +DB 15,56,202,254 + movdqu xmm4,XMMWORD PTR[rsi] + movdqa xmm2,xmm0 +DB 15,58,204,193,3 +DB 15,56,200,213 + movdqu xmm5,XMMWORD PTR[16+rsi] +DB 102,15,56,0,227 + + movdqa xmm1,xmm0 +DB 15,58,204,194,3 +DB 15,56,200,206 + movdqu xmm6,XMMWORD PTR[32+rsi] +DB 102,15,56,0,235 + + movdqa xmm2,xmm0 +DB 15,58,204,193,3 +DB 15,56,200,215 + movdqu xmm7,XMMWORD PTR[48+rsi] +DB 102,15,56,0,243 + + movdqa xmm1,xmm0 +DB 15,58,204,194,3 +DB 65,15,56,200,201 +DB 102,15,56,0,251 + + paddd xmm0,xmm8 + movdqa xmm9,xmm1 + + jnz $L$oop_shaext + + pshufd xmm0,xmm0,27 + pshufd xmm1,xmm1,27 + movdqu XMMWORD PTR[rdi],xmm0 + movd DWORD PTR[16+rdi],xmm1 + movaps xmm6,XMMWORD PTR[((-8-64))+rax] + movaps xmm7,XMMWORD PTR[((-8-48))+rax] + movaps xmm8,XMMWORD PTR[((-8-32))+rax] + movaps xmm9,XMMWORD PTR[((-8-16))+rax] + mov rsp,rax +$L$epilogue_shaext:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_sha1_block_data_order_shaext:: +sha1_block_data_order_shaext ENDP + +ALIGN 16 +sha1_block_data_order_ssse3 PROC PRIVATE + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_sha1_block_data_order_ssse3:: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + + +_ssse3_shortcut:: + mov rax,rsp + push rbx + push rbp + push r12 + push r13 + push r14 + lea rsp,QWORD PTR[((-160))+rsp] + movaps XMMWORD PTR[(-40-96)+rax],xmm6 + movaps XMMWORD PTR[(-40-80)+rax],xmm7 + movaps XMMWORD PTR[(-40-64)+rax],xmm8 + movaps XMMWORD PTR[(-40-48)+rax],xmm9 + movaps XMMWORD PTR[(-40-32)+rax],xmm10 + movaps XMMWORD PTR[(-40-16)+rax],xmm11 +$L$prologue_ssse3:: + mov r14,rax + and rsp,-64 + mov r8,rdi + mov r9,rsi + mov r10,rdx + + shl r10,6 + add r10,r9 + lea r11,QWORD PTR[((K_XX_XX+64))] + + mov eax,DWORD PTR[r8] + mov ebx,DWORD PTR[4+r8] + mov ecx,DWORD PTR[8+r8] + mov edx,DWORD PTR[12+r8] + mov esi,ebx + mov ebp,DWORD PTR[16+r8] + mov edi,ecx + xor edi,edx + and esi,edi + + movdqa xmm6,XMMWORD PTR[64+r11] + movdqa xmm9,XMMWORD PTR[((-64))+r11] + movdqu xmm0,XMMWORD PTR[r9] + movdqu xmm1,XMMWORD PTR[16+r9] + movdqu xmm2,XMMWORD PTR[32+r9] + movdqu xmm3,XMMWORD PTR[48+r9] +DB 102,15,56,0,198 +DB 102,15,56,0,206 +DB 102,15,56,0,214 + add r9,64 + paddd xmm0,xmm9 +DB 102,15,56,0,222 + paddd xmm1,xmm9 + paddd xmm2,xmm9 + movdqa XMMWORD PTR[rsp],xmm0 + psubd xmm0,xmm9 + movdqa XMMWORD PTR[16+rsp],xmm1 + psubd xmm1,xmm9 + movdqa XMMWORD PTR[32+rsp],xmm2 + psubd xmm2,xmm9 + jmp $L$oop_ssse3 +ALIGN 16 +$L$oop_ssse3:: + ror ebx,2 + pshufd xmm4,xmm0,238 + xor esi,edx + movdqa xmm8,xmm3 + paddd xmm9,xmm3 + mov edi,eax + add ebp,DWORD PTR[rsp] + punpcklqdq xmm4,xmm1 + xor ebx,ecx + rol eax,5 + add ebp,esi + psrldq xmm8,4 + and edi,ebx + xor ebx,ecx + pxor xmm4,xmm0 + add ebp,eax + ror eax,7 + pxor xmm8,xmm2 + xor edi,ecx + mov esi,ebp + add edx,DWORD PTR[4+rsp] + pxor xmm4,xmm8 + xor eax,ebx + rol ebp,5 + movdqa XMMWORD PTR[48+rsp],xmm9 + add edx,edi + and esi,eax + movdqa xmm10,xmm4 + xor eax,ebx + add edx,ebp + ror ebp,7 + movdqa xmm8,xmm4 + xor esi,ebx + pslldq xmm10,12 + paddd xmm4,xmm4 + mov edi,edx + add ecx,DWORD PTR[8+rsp] + psrld xmm8,31 + xor ebp,eax + rol edx,5 + add ecx,esi + movdqa xmm9,xmm10 + and edi,ebp + xor ebp,eax + psrld xmm10,30 + add ecx,edx + ror edx,7 + por xmm4,xmm8 + xor edi,eax + mov esi,ecx + add ebx,DWORD PTR[12+rsp] + pslld xmm9,2 + pxor xmm4,xmm10 + xor edx,ebp + movdqa xmm10,XMMWORD PTR[((-64))+r11] + rol ecx,5 + add ebx,edi + and esi,edx + pxor xmm4,xmm9 + xor edx,ebp + add ebx,ecx + ror ecx,7 + pshufd xmm5,xmm1,238 + xor esi,ebp + movdqa xmm9,xmm4 + paddd xmm10,xmm4 + mov edi,ebx + add eax,DWORD PTR[16+rsp] + punpcklqdq xmm5,xmm2 + xor ecx,edx + rol ebx,5 + add eax,esi + psrldq xmm9,4 + and edi,ecx + xor ecx,edx + pxor xmm5,xmm1 + add eax,ebx + ror ebx,7 + pxor xmm9,xmm3 + xor edi,edx + mov esi,eax + add ebp,DWORD PTR[20+rsp] + pxor xmm5,xmm9 + xor ebx,ecx + rol eax,5 + movdqa XMMWORD PTR[rsp],xmm10 + add ebp,edi + and esi,ebx + movdqa xmm8,xmm5 + xor ebx,ecx + add ebp,eax + ror eax,7 + movdqa xmm9,xmm5 + xor esi,ecx + pslldq xmm8,12 + paddd xmm5,xmm5 + mov edi,ebp + add edx,DWORD PTR[24+rsp] + psrld xmm9,31 + xor eax,ebx + rol ebp,5 + add edx,esi + movdqa xmm10,xmm8 + and edi,eax + xor eax,ebx + psrld xmm8,30 + add edx,ebp + ror ebp,7 + por xmm5,xmm9 + xor edi,ebx + mov esi,edx + add ecx,DWORD PTR[28+rsp] + pslld xmm10,2 + pxor xmm5,xmm8 + xor ebp,eax + movdqa xmm8,XMMWORD PTR[((-32))+r11] + rol edx,5 + add ecx,edi + and esi,ebp + pxor xmm5,xmm10 + xor ebp,eax + add ecx,edx + ror edx,7 + pshufd xmm6,xmm2,238 + xor esi,eax + movdqa xmm10,xmm5 + paddd xmm8,xmm5 + mov edi,ecx + add ebx,DWORD PTR[32+rsp] + punpcklqdq xmm6,xmm3 + xor edx,ebp + rol ecx,5 + add ebx,esi + psrldq xmm10,4 + and edi,edx + xor edx,ebp + pxor xmm6,xmm2 + add ebx,ecx + ror ecx,7 + pxor xmm10,xmm4 + xor edi,ebp + mov esi,ebx + add eax,DWORD PTR[36+rsp] + pxor xmm6,xmm10 + xor ecx,edx + rol ebx,5 + movdqa XMMWORD PTR[16+rsp],xmm8 + add eax,edi + and esi,ecx + movdqa xmm9,xmm6 + xor ecx,edx + add eax,ebx + ror ebx,7 + movdqa xmm10,xmm6 + xor esi,edx + pslldq xmm9,12 + paddd xmm6,xmm6 + mov edi,eax + add ebp,DWORD PTR[40+rsp] + psrld xmm10,31 + xor ebx,ecx + rol eax,5 + add ebp,esi + movdqa xmm8,xmm9 + and edi,ebx + xor ebx,ecx + psrld xmm9,30 + add ebp,eax + ror eax,7 + por xmm6,xmm10 + xor edi,ecx + mov esi,ebp + add edx,DWORD PTR[44+rsp] + pslld xmm8,2 + pxor xmm6,xmm9 + xor eax,ebx + movdqa xmm9,XMMWORD PTR[((-32))+r11] + rol ebp,5 + add edx,edi + and esi,eax + pxor xmm6,xmm8 + xor eax,ebx + add edx,ebp + ror ebp,7 + pshufd xmm7,xmm3,238 + xor esi,ebx + movdqa xmm8,xmm6 + paddd xmm9,xmm6 + mov edi,edx + add ecx,DWORD PTR[48+rsp] + punpcklqdq xmm7,xmm4 + xor ebp,eax + rol edx,5 + add ecx,esi + psrldq xmm8,4 + and edi,ebp + xor ebp,eax + pxor xmm7,xmm3 + add ecx,edx + ror edx,7 + pxor xmm8,xmm5 + xor edi,eax + mov esi,ecx + add ebx,DWORD PTR[52+rsp] + pxor xmm7,xmm8 + xor edx,ebp + rol ecx,5 + movdqa XMMWORD PTR[32+rsp],xmm9 + add ebx,edi + and esi,edx + movdqa xmm10,xmm7 + xor edx,ebp + add ebx,ecx + ror ecx,7 + movdqa xmm8,xmm7 + xor esi,ebp + pslldq xmm10,12 + paddd xmm7,xmm7 + mov edi,ebx + add eax,DWORD PTR[56+rsp] + psrld xmm8,31 + xor ecx,edx + rol ebx,5 + add eax,esi + movdqa xmm9,xmm10 + and edi,ecx + xor ecx,edx + psrld xmm10,30 + add eax,ebx + ror ebx,7 + por xmm7,xmm8 + xor edi,edx + mov esi,eax + add ebp,DWORD PTR[60+rsp] + pslld xmm9,2 + pxor xmm7,xmm10 + xor ebx,ecx + movdqa xmm10,XMMWORD PTR[((-32))+r11] + rol eax,5 + add ebp,edi + and esi,ebx + pxor xmm7,xmm9 + pshufd xmm9,xmm6,238 + xor ebx,ecx + add ebp,eax + ror eax,7 + pxor xmm0,xmm4 + xor esi,ecx + mov edi,ebp + add edx,DWORD PTR[rsp] + punpcklqdq xmm9,xmm7 + xor eax,ebx + rol ebp,5 + pxor xmm0,xmm1 + add edx,esi + and edi,eax + movdqa xmm8,xmm10 + xor eax,ebx + paddd xmm10,xmm7 + add edx,ebp + pxor xmm0,xmm9 + ror ebp,7 + xor edi,ebx + mov esi,edx + add ecx,DWORD PTR[4+rsp] + movdqa xmm9,xmm0 + xor ebp,eax + rol edx,5 + movdqa XMMWORD PTR[48+rsp],xmm10 + add ecx,edi + and esi,ebp + xor ebp,eax + pslld xmm0,2 + add ecx,edx + ror edx,7 + psrld xmm9,30 + xor esi,eax + mov edi,ecx + add ebx,DWORD PTR[8+rsp] + por xmm0,xmm9 + xor edx,ebp + rol ecx,5 + pshufd xmm10,xmm7,238 + add ebx,esi + and edi,edx + xor edx,ebp + add ebx,ecx + add eax,DWORD PTR[12+rsp] + xor edi,ebp + mov esi,ebx + rol ebx,5 + add eax,edi + xor esi,edx + ror ecx,7 + add eax,ebx + pxor xmm1,xmm5 + add ebp,DWORD PTR[16+rsp] + xor esi,ecx + punpcklqdq xmm10,xmm0 + mov edi,eax + rol eax,5 + pxor xmm1,xmm2 + add ebp,esi + xor edi,ecx + movdqa xmm9,xmm8 + ror ebx,7 + paddd xmm8,xmm0 + add ebp,eax + pxor xmm1,xmm10 + add edx,DWORD PTR[20+rsp] + xor edi,ebx + mov esi,ebp + rol ebp,5 + movdqa xmm10,xmm1 + add edx,edi + xor esi,ebx + movdqa XMMWORD PTR[rsp],xmm8 + ror eax,7 + add edx,ebp + add ecx,DWORD PTR[24+rsp] + pslld xmm1,2 + xor esi,eax + mov edi,edx + psrld xmm10,30 + rol edx,5 + add ecx,esi + xor edi,eax + ror ebp,7 + por xmm1,xmm10 + add ecx,edx + add ebx,DWORD PTR[28+rsp] + pshufd xmm8,xmm0,238 + xor edi,ebp + mov esi,ecx + rol ecx,5 + add ebx,edi + xor esi,ebp + ror edx,7 + add ebx,ecx + pxor xmm2,xmm6 + add eax,DWORD PTR[32+rsp] + xor esi,edx + punpcklqdq xmm8,xmm1 + mov edi,ebx + rol ebx,5 + pxor xmm2,xmm3 + add eax,esi + xor edi,edx + movdqa xmm10,XMMWORD PTR[r11] + ror ecx,7 + paddd xmm9,xmm1 + add eax,ebx + pxor xmm2,xmm8 + add ebp,DWORD PTR[36+rsp] + xor edi,ecx + mov esi,eax + rol eax,5 + movdqa xmm8,xmm2 + add ebp,edi + xor esi,ecx + movdqa XMMWORD PTR[16+rsp],xmm9 + ror ebx,7 + add ebp,eax + add edx,DWORD PTR[40+rsp] + pslld xmm2,2 + xor esi,ebx + mov edi,ebp + psrld xmm8,30 + rol ebp,5 + add edx,esi + xor edi,ebx + ror eax,7 + por xmm2,xmm8 + add edx,ebp + add ecx,DWORD PTR[44+rsp] + pshufd xmm9,xmm1,238 + xor edi,eax + mov esi,edx + rol edx,5 + add ecx,edi + xor esi,eax + ror ebp,7 + add ecx,edx + pxor xmm3,xmm7 + add ebx,DWORD PTR[48+rsp] + xor esi,ebp + punpcklqdq xmm9,xmm2 + mov edi,ecx + rol ecx,5 + pxor xmm3,xmm4 + add ebx,esi + xor edi,ebp + movdqa xmm8,xmm10 + ror edx,7 + paddd xmm10,xmm2 + add ebx,ecx + pxor xmm3,xmm9 + add eax,DWORD PTR[52+rsp] + xor edi,edx + mov esi,ebx + rol ebx,5 + movdqa xmm9,xmm3 + add eax,edi + xor esi,edx + movdqa XMMWORD PTR[32+rsp],xmm10 + ror ecx,7 + add eax,ebx + add ebp,DWORD PTR[56+rsp] + pslld xmm3,2 + xor esi,ecx + mov edi,eax + psrld xmm9,30 + rol eax,5 + add ebp,esi + xor edi,ecx + ror ebx,7 + por xmm3,xmm9 + add ebp,eax + add edx,DWORD PTR[60+rsp] + pshufd xmm10,xmm2,238 + xor edi,ebx + mov esi,ebp + rol ebp,5 + add edx,edi + xor esi,ebx + ror eax,7 + add edx,ebp + pxor xmm4,xmm0 + add ecx,DWORD PTR[rsp] + xor esi,eax + punpcklqdq xmm10,xmm3 + mov edi,edx + rol edx,5 + pxor xmm4,xmm5 + add ecx,esi + xor edi,eax + movdqa xmm9,xmm8 + ror ebp,7 + paddd xmm8,xmm3 + add ecx,edx + pxor xmm4,xmm10 + add ebx,DWORD PTR[4+rsp] + xor edi,ebp + mov esi,ecx + rol ecx,5 + movdqa xmm10,xmm4 + add ebx,edi + xor esi,ebp + movdqa XMMWORD PTR[48+rsp],xmm8 + ror edx,7 + add ebx,ecx + add eax,DWORD PTR[8+rsp] + pslld xmm4,2 + xor esi,edx + mov edi,ebx + psrld xmm10,30 + rol ebx,5 + add eax,esi + xor edi,edx + ror ecx,7 + por xmm4,xmm10 + add eax,ebx + add ebp,DWORD PTR[12+rsp] + pshufd xmm8,xmm3,238 + xor edi,ecx + mov esi,eax + rol eax,5 + add ebp,edi + xor esi,ecx + ror ebx,7 + add ebp,eax + pxor xmm5,xmm1 + add edx,DWORD PTR[16+rsp] + xor esi,ebx + punpcklqdq xmm8,xmm4 + mov edi,ebp + rol ebp,5 + pxor xmm5,xmm6 + add edx,esi + xor edi,ebx + movdqa xmm10,xmm9 + ror eax,7 + paddd xmm9,xmm4 + add edx,ebp + pxor xmm5,xmm8 + add ecx,DWORD PTR[20+rsp] + xor edi,eax + mov esi,edx + rol edx,5 + movdqa xmm8,xmm5 + add ecx,edi + xor esi,eax + movdqa XMMWORD PTR[rsp],xmm9 + ror ebp,7 + add ecx,edx + add ebx,DWORD PTR[24+rsp] + pslld xmm5,2 + xor esi,ebp + mov edi,ecx + psrld xmm8,30 + rol ecx,5 + add ebx,esi + xor edi,ebp + ror edx,7 + por xmm5,xmm8 + add ebx,ecx + add eax,DWORD PTR[28+rsp] + pshufd xmm9,xmm4,238 + ror ecx,7 + mov esi,ebx + xor edi,edx + rol ebx,5 + add eax,edi + xor esi,ecx + xor ecx,edx + add eax,ebx + pxor xmm6,xmm2 + add ebp,DWORD PTR[32+rsp] + and esi,ecx + xor ecx,edx + ror ebx,7 + punpcklqdq xmm9,xmm5 + mov edi,eax + xor esi,ecx + pxor xmm6,xmm7 + rol eax,5 + add ebp,esi + movdqa xmm8,xmm10 + xor edi,ebx + paddd xmm10,xmm5 + xor ebx,ecx + pxor xmm6,xmm9 + add ebp,eax + add edx,DWORD PTR[36+rsp] + and edi,ebx + xor ebx,ecx + ror eax,7 + movdqa xmm9,xmm6 + mov esi,ebp + xor edi,ebx + movdqa XMMWORD PTR[16+rsp],xmm10 + rol ebp,5 + add edx,edi + xor esi,eax + pslld xmm6,2 + xor eax,ebx + add edx,ebp + psrld xmm9,30 + add ecx,DWORD PTR[40+rsp] + and esi,eax + xor eax,ebx + por xmm6,xmm9 + ror ebp,7 + mov edi,edx + xor esi,eax + rol edx,5 + pshufd xmm10,xmm5,238 + add ecx,esi + xor edi,ebp + xor ebp,eax + add ecx,edx + add ebx,DWORD PTR[44+rsp] + and edi,ebp + xor ebp,eax + ror edx,7 + mov esi,ecx + xor edi,ebp + rol ecx,5 + add ebx,edi + xor esi,edx + xor edx,ebp + add ebx,ecx + pxor xmm7,xmm3 + add eax,DWORD PTR[48+rsp] + and esi,edx + xor edx,ebp + ror ecx,7 + punpcklqdq xmm10,xmm6 + mov edi,ebx + xor esi,edx + pxor xmm7,xmm0 + rol ebx,5 + add eax,esi + movdqa xmm9,XMMWORD PTR[32+r11] + xor edi,ecx + paddd xmm8,xmm6 + xor ecx,edx + pxor xmm7,xmm10 + add eax,ebx + add ebp,DWORD PTR[52+rsp] + and edi,ecx + xor ecx,edx + ror ebx,7 + movdqa xmm10,xmm7 + mov esi,eax + xor edi,ecx + movdqa XMMWORD PTR[32+rsp],xmm8 + rol eax,5 + add ebp,edi + xor esi,ebx + pslld xmm7,2 + xor ebx,ecx + add ebp,eax + psrld xmm10,30 + add edx,DWORD PTR[56+rsp] + and esi,ebx + xor ebx,ecx + por xmm7,xmm10 + ror eax,7 + mov edi,ebp + xor esi,ebx + rol ebp,5 + pshufd xmm8,xmm6,238 + add edx,esi + xor edi,eax + xor eax,ebx + add edx,ebp + add ecx,DWORD PTR[60+rsp] + and edi,eax + xor eax,ebx + ror ebp,7 + mov esi,edx + xor edi,eax + rol edx,5 + add ecx,edi + xor esi,ebp + xor ebp,eax + add ecx,edx + pxor xmm0,xmm4 + add ebx,DWORD PTR[rsp] + and esi,ebp + xor ebp,eax + ror edx,7 + punpcklqdq xmm8,xmm7 + mov edi,ecx + xor esi,ebp + pxor xmm0,xmm1 + rol ecx,5 + add ebx,esi + movdqa xmm10,xmm9 + xor edi,edx + paddd xmm9,xmm7 + xor edx,ebp + pxor xmm0,xmm8 + add ebx,ecx + add eax,DWORD PTR[4+rsp] + and edi,edx + xor edx,ebp + ror ecx,7 + movdqa xmm8,xmm0 + mov esi,ebx + xor edi,edx + movdqa XMMWORD PTR[48+rsp],xmm9 + rol ebx,5 + add eax,edi + xor esi,ecx + pslld xmm0,2 + xor ecx,edx + add eax,ebx + psrld xmm8,30 + add ebp,DWORD PTR[8+rsp] + and esi,ecx + xor ecx,edx + por xmm0,xmm8 + ror ebx,7 + mov edi,eax + xor esi,ecx + rol eax,5 + pshufd xmm9,xmm7,238 + add ebp,esi + xor edi,ebx + xor ebx,ecx + add ebp,eax + add edx,DWORD PTR[12+rsp] + and edi,ebx + xor ebx,ecx + ror eax,7 + mov esi,ebp + xor edi,ebx + rol ebp,5 + add edx,edi + xor esi,eax + xor eax,ebx + add edx,ebp + pxor xmm1,xmm5 + add ecx,DWORD PTR[16+rsp] + and esi,eax + xor eax,ebx + ror ebp,7 + punpcklqdq xmm9,xmm0 + mov edi,edx + xor esi,eax + pxor xmm1,xmm2 + rol edx,5 + add ecx,esi + movdqa xmm8,xmm10 + xor edi,ebp + paddd xmm10,xmm0 + xor ebp,eax + pxor xmm1,xmm9 + add ecx,edx + add ebx,DWORD PTR[20+rsp] + and edi,ebp + xor ebp,eax + ror edx,7 + movdqa xmm9,xmm1 + mov esi,ecx + xor edi,ebp + movdqa XMMWORD PTR[rsp],xmm10 + rol ecx,5 + add ebx,edi + xor esi,edx + pslld xmm1,2 + xor edx,ebp + add ebx,ecx + psrld xmm9,30 + add eax,DWORD PTR[24+rsp] + and esi,edx + xor edx,ebp + por xmm1,xmm9 + ror ecx,7 + mov edi,ebx + xor esi,edx + rol ebx,5 + pshufd xmm10,xmm0,238 + add eax,esi + xor edi,ecx + xor ecx,edx + add eax,ebx + add ebp,DWORD PTR[28+rsp] + and edi,ecx + xor ecx,edx + ror ebx,7 + mov esi,eax + xor edi,ecx + rol eax,5 + add ebp,edi + xor esi,ebx + xor ebx,ecx + add ebp,eax + pxor xmm2,xmm6 + add edx,DWORD PTR[32+rsp] + and esi,ebx + xor ebx,ecx + ror eax,7 + punpcklqdq xmm10,xmm1 + mov edi,ebp + xor esi,ebx + pxor xmm2,xmm3 + rol ebp,5 + add edx,esi + movdqa xmm9,xmm8 + xor edi,eax + paddd xmm8,xmm1 + xor eax,ebx + pxor xmm2,xmm10 + add edx,ebp + add ecx,DWORD PTR[36+rsp] + and edi,eax + xor eax,ebx + ror ebp,7 + movdqa xmm10,xmm2 + mov esi,edx + xor edi,eax + movdqa XMMWORD PTR[16+rsp],xmm8 + rol edx,5 + add ecx,edi + xor esi,ebp + pslld xmm2,2 + xor ebp,eax + add ecx,edx + psrld xmm10,30 + add ebx,DWORD PTR[40+rsp] + and esi,ebp + xor ebp,eax + por xmm2,xmm10 + ror edx,7 + mov edi,ecx + xor esi,ebp + rol ecx,5 + pshufd xmm8,xmm1,238 + add ebx,esi + xor edi,edx + xor edx,ebp + add ebx,ecx + add eax,DWORD PTR[44+rsp] + and edi,edx + xor edx,ebp + ror ecx,7 + mov esi,ebx + xor edi,edx + rol ebx,5 + add eax,edi + xor esi,edx + add eax,ebx + pxor xmm3,xmm7 + add ebp,DWORD PTR[48+rsp] + xor esi,ecx + punpcklqdq xmm8,xmm2 + mov edi,eax + rol eax,5 + pxor xmm3,xmm4 + add ebp,esi + xor edi,ecx + movdqa xmm10,xmm9 + ror ebx,7 + paddd xmm9,xmm2 + add ebp,eax + pxor xmm3,xmm8 + add edx,DWORD PTR[52+rsp] + xor edi,ebx + mov esi,ebp + rol ebp,5 + movdqa xmm8,xmm3 + add edx,edi + xor esi,ebx + movdqa XMMWORD PTR[32+rsp],xmm9 + ror eax,7 + add edx,ebp + add ecx,DWORD PTR[56+rsp] + pslld xmm3,2 + xor esi,eax + mov edi,edx + psrld xmm8,30 + rol edx,5 + add ecx,esi + xor edi,eax + ror ebp,7 + por xmm3,xmm8 + add ecx,edx + add ebx,DWORD PTR[60+rsp] + xor edi,ebp + mov esi,ecx + rol ecx,5 + add ebx,edi + xor esi,ebp + ror edx,7 + add ebx,ecx + add eax,DWORD PTR[rsp] + xor esi,edx + mov edi,ebx + rol ebx,5 + paddd xmm10,xmm3 + add eax,esi + xor edi,edx + movdqa XMMWORD PTR[48+rsp],xmm10 + ror ecx,7 + add eax,ebx + add ebp,DWORD PTR[4+rsp] + xor edi,ecx + mov esi,eax + rol eax,5 + add ebp,edi + xor esi,ecx + ror ebx,7 + add ebp,eax + add edx,DWORD PTR[8+rsp] + xor esi,ebx + mov edi,ebp + rol ebp,5 + add edx,esi + xor edi,ebx + ror eax,7 + add edx,ebp + add ecx,DWORD PTR[12+rsp] + xor edi,eax + mov esi,edx + rol edx,5 + add ecx,edi + xor esi,eax + ror ebp,7 + add ecx,edx + cmp r9,r10 + je $L$done_ssse3 + movdqa xmm6,XMMWORD PTR[64+r11] + movdqa xmm9,XMMWORD PTR[((-64))+r11] + movdqu xmm0,XMMWORD PTR[r9] + movdqu xmm1,XMMWORD PTR[16+r9] + movdqu xmm2,XMMWORD PTR[32+r9] + movdqu xmm3,XMMWORD PTR[48+r9] +DB 102,15,56,0,198 + add r9,64 + add ebx,DWORD PTR[16+rsp] + xor esi,ebp + mov edi,ecx +DB 102,15,56,0,206 + rol ecx,5 + add ebx,esi + xor edi,ebp + ror edx,7 + paddd xmm0,xmm9 + add ebx,ecx + add eax,DWORD PTR[20+rsp] + xor edi,edx + mov esi,ebx + movdqa XMMWORD PTR[rsp],xmm0 + rol ebx,5 + add eax,edi + xor esi,edx + ror ecx,7 + psubd xmm0,xmm9 + add eax,ebx + add ebp,DWORD PTR[24+rsp] + xor esi,ecx + mov edi,eax + rol eax,5 + add ebp,esi + xor edi,ecx + ror ebx,7 + add ebp,eax + add edx,DWORD PTR[28+rsp] + xor edi,ebx + mov esi,ebp + rol ebp,5 + add edx,edi + xor esi,ebx + ror eax,7 + add edx,ebp + add ecx,DWORD PTR[32+rsp] + xor esi,eax + mov edi,edx +DB 102,15,56,0,214 + rol edx,5 + add ecx,esi + xor edi,eax + ror ebp,7 + paddd xmm1,xmm9 + add ecx,edx + add ebx,DWORD PTR[36+rsp] + xor edi,ebp + mov esi,ecx + movdqa XMMWORD PTR[16+rsp],xmm1 + rol ecx,5 + add ebx,edi + xor esi,ebp + ror edx,7 + psubd xmm1,xmm9 + add ebx,ecx + add eax,DWORD PTR[40+rsp] + xor esi,edx + mov edi,ebx + rol ebx,5 + add eax,esi + xor edi,edx + ror ecx,7 + add eax,ebx + add ebp,DWORD PTR[44+rsp] + xor edi,ecx + mov esi,eax + rol eax,5 + add ebp,edi + xor esi,ecx + ror ebx,7 + add ebp,eax + add edx,DWORD PTR[48+rsp] + xor esi,ebx + mov edi,ebp +DB 102,15,56,0,222 + rol ebp,5 + add edx,esi + xor edi,ebx + ror eax,7 + paddd xmm2,xmm9 + add edx,ebp + add ecx,DWORD PTR[52+rsp] + xor edi,eax + mov esi,edx + movdqa XMMWORD PTR[32+rsp],xmm2 + rol edx,5 + add ecx,edi + xor esi,eax + ror ebp,7 + psubd xmm2,xmm9 + add ecx,edx + add ebx,DWORD PTR[56+rsp] + xor esi,ebp + mov edi,ecx + rol ecx,5 + add ebx,esi + xor edi,ebp + ror edx,7 + add ebx,ecx + add eax,DWORD PTR[60+rsp] + xor edi,edx + mov esi,ebx + rol ebx,5 + add eax,edi + ror ecx,7 + add eax,ebx + add eax,DWORD PTR[r8] + add esi,DWORD PTR[4+r8] + add ecx,DWORD PTR[8+r8] + add edx,DWORD PTR[12+r8] + mov DWORD PTR[r8],eax + add ebp,DWORD PTR[16+r8] + mov DWORD PTR[4+r8],esi + mov ebx,esi + mov DWORD PTR[8+r8],ecx + mov edi,ecx + mov DWORD PTR[12+r8],edx + xor edi,edx + mov DWORD PTR[16+r8],ebp + and esi,edi + jmp $L$oop_ssse3 + +ALIGN 16 +$L$done_ssse3:: + add ebx,DWORD PTR[16+rsp] + xor esi,ebp + mov edi,ecx + rol ecx,5 + add ebx,esi + xor edi,ebp + ror edx,7 + add ebx,ecx + add eax,DWORD PTR[20+rsp] + xor edi,edx + mov esi,ebx + rol ebx,5 + add eax,edi + xor esi,edx + ror ecx,7 + add eax,ebx + add ebp,DWORD PTR[24+rsp] + xor esi,ecx + mov edi,eax + rol eax,5 + add ebp,esi + xor edi,ecx + ror ebx,7 + add ebp,eax + add edx,DWORD PTR[28+rsp] + xor edi,ebx + mov esi,ebp + rol ebp,5 + add edx,edi + xor esi,ebx + ror eax,7 + add edx,ebp + add ecx,DWORD PTR[32+rsp] + xor esi,eax + mov edi,edx + rol edx,5 + add ecx,esi + xor edi,eax + ror ebp,7 + add ecx,edx + add ebx,DWORD PTR[36+rsp] + xor edi,ebp + mov esi,ecx + rol ecx,5 + add ebx,edi + xor esi,ebp + ror edx,7 + add ebx,ecx + add eax,DWORD PTR[40+rsp] + xor esi,edx + mov edi,ebx + rol ebx,5 + add eax,esi + xor edi,edx + ror ecx,7 + add eax,ebx + add ebp,DWORD PTR[44+rsp] + xor edi,ecx + mov esi,eax + rol eax,5 + add ebp,edi + xor esi,ecx + ror ebx,7 + add ebp,eax + add edx,DWORD PTR[48+rsp] + xor esi,ebx + mov edi,ebp + rol ebp,5 + add edx,esi + xor edi,ebx + ror eax,7 + add edx,ebp + add ecx,DWORD PTR[52+rsp] + xor edi,eax + mov esi,edx + rol edx,5 + add ecx,edi + xor esi,eax + ror ebp,7 + add ecx,edx + add ebx,DWORD PTR[56+rsp] + xor esi,ebp + mov edi,ecx + rol ecx,5 + add ebx,esi + xor edi,ebp + ror edx,7 + add ebx,ecx + add eax,DWORD PTR[60+rsp] + xor edi,edx + mov esi,ebx + rol ebx,5 + add eax,edi + ror ecx,7 + add eax,ebx + add eax,DWORD PTR[r8] + add esi,DWORD PTR[4+r8] + add ecx,DWORD PTR[8+r8] + mov DWORD PTR[r8],eax + add edx,DWORD PTR[12+r8] + mov DWORD PTR[4+r8],esi + add ebp,DWORD PTR[16+r8] + mov DWORD PTR[8+r8],ecx + mov DWORD PTR[12+r8],edx + mov DWORD PTR[16+r8],ebp + movaps xmm6,XMMWORD PTR[((-40-96))+r14] + movaps xmm7,XMMWORD PTR[((-40-80))+r14] + movaps xmm8,XMMWORD PTR[((-40-64))+r14] + movaps xmm9,XMMWORD PTR[((-40-48))+r14] + movaps xmm10,XMMWORD PTR[((-40-32))+r14] + movaps xmm11,XMMWORD PTR[((-40-16))+r14] + lea rsi,QWORD PTR[r14] + mov r14,QWORD PTR[((-40))+rsi] + mov r13,QWORD PTR[((-32))+rsi] + mov r12,QWORD PTR[((-24))+rsi] + mov rbp,QWORD PTR[((-16))+rsi] + mov rbx,QWORD PTR[((-8))+rsi] + lea rsp,QWORD PTR[rsi] +$L$epilogue_ssse3:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_sha1_block_data_order_ssse3:: +sha1_block_data_order_ssse3 ENDP +ALIGN 64 +K_XX_XX:: + DD 05a827999h,05a827999h,05a827999h,05a827999h + DD 05a827999h,05a827999h,05a827999h,05a827999h + DD 06ed9eba1h,06ed9eba1h,06ed9eba1h,06ed9eba1h + DD 06ed9eba1h,06ed9eba1h,06ed9eba1h,06ed9eba1h + DD 08f1bbcdch,08f1bbcdch,08f1bbcdch,08f1bbcdch + DD 08f1bbcdch,08f1bbcdch,08f1bbcdch,08f1bbcdch + DD 0ca62c1d6h,0ca62c1d6h,0ca62c1d6h,0ca62c1d6h + DD 0ca62c1d6h,0ca62c1d6h,0ca62c1d6h,0ca62c1d6h + DD 000010203h,004050607h,008090a0bh,00c0d0e0fh + DD 000010203h,004050607h,008090a0bh,00c0d0e0fh +DB 0fh,0eh,0dh,0ch,0bh,0ah,09h,08h,07h,06h,05h,04h,03h,02h,01h,00h +DB 83,72,65,49,32,98,108,111,99,107,32,116,114,97,110,115 +DB 102,111,114,109,32,102,111,114,32,120,56,54,95,54,52,44 +DB 32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60 +DB 97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114 +DB 103,62,0 +ALIGN 64 +EXTERN __imp_RtlVirtualUnwind:NEAR + +ALIGN 16 +se_handler PROC PRIVATE + push rsi + push rdi + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + pushfq + sub rsp,64 + + mov rax,QWORD PTR[120+r8] + mov rbx,QWORD PTR[248+r8] + + lea r10,QWORD PTR[$L$prologue] + cmp rbx,r10 + jb $L$common_seh_tail + + mov rax,QWORD PTR[152+r8] + + lea r10,QWORD PTR[$L$epilogue] + cmp rbx,r10 + jae $L$common_seh_tail + + mov rax,QWORD PTR[64+rax] + + mov rbx,QWORD PTR[((-8))+rax] + mov rbp,QWORD PTR[((-16))+rax] + mov r12,QWORD PTR[((-24))+rax] + mov r13,QWORD PTR[((-32))+rax] + mov r14,QWORD PTR[((-40))+rax] + mov QWORD PTR[144+r8],rbx + mov QWORD PTR[160+r8],rbp + mov QWORD PTR[216+r8],r12 + mov QWORD PTR[224+r8],r13 + mov QWORD PTR[232+r8],r14 + + jmp $L$common_seh_tail +se_handler ENDP + +ALIGN 16 +shaext_handler PROC PRIVATE + push rsi + push rdi + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + pushfq + sub rsp,64 + + mov rax,QWORD PTR[120+r8] + mov rbx,QWORD PTR[248+r8] + + lea r10,QWORD PTR[$L$prologue_shaext] + cmp rbx,r10 + jb $L$common_seh_tail + + lea r10,QWORD PTR[$L$epilogue_shaext] + cmp rbx,r10 + jae $L$common_seh_tail + + lea rsi,QWORD PTR[((-8-64))+rax] + lea rdi,QWORD PTR[512+r8] + mov ecx,8 + DD 0a548f3fch + + jmp $L$common_seh_tail +shaext_handler ENDP + +ALIGN 16 +ssse3_handler PROC PRIVATE + push rsi + push rdi + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + pushfq + sub rsp,64 + + mov rax,QWORD PTR[120+r8] + mov rbx,QWORD PTR[248+r8] + + mov rsi,QWORD PTR[8+r9] + mov r11,QWORD PTR[56+r9] + + mov r10d,DWORD PTR[r11] + lea r10,QWORD PTR[r10*1+rsi] + cmp rbx,r10 + jb $L$common_seh_tail + + mov rax,QWORD PTR[152+r8] + + mov r10d,DWORD PTR[4+r11] + lea r10,QWORD PTR[r10*1+rsi] + cmp rbx,r10 + jae $L$common_seh_tail + + mov rax,QWORD PTR[232+r8] + + lea rsi,QWORD PTR[((-40-96))+rax] + lea rdi,QWORD PTR[512+r8] + mov ecx,12 + DD 0a548f3fch + + mov rbx,QWORD PTR[((-8))+rax] + mov rbp,QWORD PTR[((-16))+rax] + mov r12,QWORD PTR[((-24))+rax] + mov r13,QWORD PTR[((-32))+rax] + mov r14,QWORD PTR[((-40))+rax] + mov QWORD PTR[144+r8],rbx + mov QWORD PTR[160+r8],rbp + mov QWORD PTR[216+r8],r12 + mov QWORD PTR[224+r8],r13 + mov QWORD PTR[232+r8],r14 + +$L$common_seh_tail:: + mov rdi,QWORD PTR[8+rax] + mov rsi,QWORD PTR[16+rax] + mov QWORD PTR[152+r8],rax + mov QWORD PTR[168+r8],rsi + mov QWORD PTR[176+r8],rdi + + mov rdi,QWORD PTR[40+r9] + mov rsi,r8 + mov ecx,154 + DD 0a548f3fch + + mov rsi,r9 + xor rcx,rcx + mov rdx,QWORD PTR[8+rsi] + mov r8,QWORD PTR[rsi] + mov r9,QWORD PTR[16+rsi] + mov r10,QWORD PTR[40+rsi] + lea r11,QWORD PTR[56+rsi] + lea r12,QWORD PTR[24+rsi] + mov QWORD PTR[32+rsp],r10 + mov QWORD PTR[40+rsp],r11 + mov QWORD PTR[48+rsp],r12 + mov QWORD PTR[56+rsp],rcx + call QWORD PTR[__imp_RtlVirtualUnwind] + + mov eax,1 + add rsp,64 + popfq + pop r15 + pop r14 + pop r13 + pop r12 + pop rbp + pop rbx + pop rdi + pop rsi + DB 0F3h,0C3h ;repret +ssse3_handler ENDP + +.text$ ENDS +.pdata SEGMENT READONLY ALIGN(4) +ALIGN 4 + DD imagerel $L$SEH_begin_sha1_block_data_order + DD imagerel $L$SEH_end_sha1_block_data_order + DD imagerel $L$SEH_info_sha1_block_data_order + DD imagerel $L$SEH_begin_sha1_block_data_order_shaext + DD imagerel $L$SEH_end_sha1_block_data_order_shaext + DD imagerel $L$SEH_info_sha1_block_data_order_shaext + DD imagerel $L$SEH_begin_sha1_block_data_order_ssse3 + DD imagerel $L$SEH_end_sha1_block_data_order_ssse3 + DD imagerel $L$SEH_info_sha1_block_data_order_ssse3 +.pdata ENDS +.xdata SEGMENT READONLY ALIGN(8) +ALIGN 8 +$L$SEH_info_sha1_block_data_order:: +DB 9,0,0,0 + DD imagerel se_handler +$L$SEH_info_sha1_block_data_order_shaext:: +DB 9,0,0,0 + DD imagerel shaext_handler +$L$SEH_info_sha1_block_data_order_ssse3:: +DB 9,0,0,0 + DD imagerel ssse3_handler + DD imagerel $L$prologue_ssse3,imagerel $L$epilogue_ssse3 + +.xdata ENDS +END diff --git a/deps/openssl/asm_obsolete/x64-win32-masm/sha/sha256-mb-x86_64.asm b/deps/openssl/asm_obsolete/x64-win32-masm/sha/sha256-mb-x86_64.asm new file mode 100644 index 00000000000000..6e5f865bbed68b --- /dev/null +++ b/deps/openssl/asm_obsolete/x64-win32-masm/sha/sha256-mb-x86_64.asm @@ -0,0 +1,3436 @@ +OPTION DOTNAME +.text$ SEGMENT ALIGN(256) 'CODE' + +EXTERN OPENSSL_ia32cap_P:NEAR + +PUBLIC sha256_multi_block + +ALIGN 32 +sha256_multi_block PROC PUBLIC + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_sha256_multi_block:: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + + + mov rcx,QWORD PTR[((OPENSSL_ia32cap_P+4))] + bt rcx,61 + jc _shaext_shortcut + mov rax,rsp + push rbx + push rbp + lea rsp,QWORD PTR[((-168))+rsp] + movaps XMMWORD PTR[rsp],xmm6 + movaps XMMWORD PTR[16+rsp],xmm7 + movaps XMMWORD PTR[32+rsp],xmm8 + movaps XMMWORD PTR[48+rsp],xmm9 + movaps XMMWORD PTR[(-120)+rax],xmm10 + movaps XMMWORD PTR[(-104)+rax],xmm11 + movaps XMMWORD PTR[(-88)+rax],xmm12 + movaps XMMWORD PTR[(-72)+rax],xmm13 + movaps XMMWORD PTR[(-56)+rax],xmm14 + movaps XMMWORD PTR[(-40)+rax],xmm15 + sub rsp,288 + and rsp,-256 + mov QWORD PTR[272+rsp],rax +$L$body:: + lea rbp,QWORD PTR[((K256+128))] + lea rbx,QWORD PTR[256+rsp] + lea rdi,QWORD PTR[128+rdi] + +$L$oop_grande:: + mov DWORD PTR[280+rsp],edx + xor edx,edx + mov r8,QWORD PTR[rsi] + mov ecx,DWORD PTR[8+rsi] + cmp ecx,edx + cmovg edx,ecx + test ecx,ecx + mov DWORD PTR[rbx],ecx + cmovle r8,rbp + mov r9,QWORD PTR[16+rsi] + mov ecx,DWORD PTR[24+rsi] + cmp ecx,edx + cmovg edx,ecx + test ecx,ecx + mov DWORD PTR[4+rbx],ecx + cmovle r9,rbp + mov r10,QWORD PTR[32+rsi] + mov ecx,DWORD PTR[40+rsi] + cmp ecx,edx + cmovg edx,ecx + test ecx,ecx + mov DWORD PTR[8+rbx],ecx + cmovle r10,rbp + mov r11,QWORD PTR[48+rsi] + mov ecx,DWORD PTR[56+rsi] + cmp ecx,edx + cmovg edx,ecx + test ecx,ecx + mov DWORD PTR[12+rbx],ecx + cmovle r11,rbp + test edx,edx + jz $L$done + + movdqu xmm8,XMMWORD PTR[((0-128))+rdi] + lea rax,QWORD PTR[128+rsp] + movdqu xmm9,XMMWORD PTR[((32-128))+rdi] + movdqu xmm10,XMMWORD PTR[((64-128))+rdi] + movdqu xmm11,XMMWORD PTR[((96-128))+rdi] + movdqu xmm12,XMMWORD PTR[((128-128))+rdi] + movdqu xmm13,XMMWORD PTR[((160-128))+rdi] + movdqu xmm14,XMMWORD PTR[((192-128))+rdi] + movdqu xmm15,XMMWORD PTR[((224-128))+rdi] + movdqu xmm6,XMMWORD PTR[$L$pbswap] + jmp $L$oop + +ALIGN 32 +$L$oop:: + movdqa xmm4,xmm10 + pxor xmm4,xmm9 + movd xmm5,DWORD PTR[r8] + movd xmm0,DWORD PTR[r9] + movd xmm1,DWORD PTR[r10] + movd xmm2,DWORD PTR[r11] + punpckldq xmm5,xmm1 + punpckldq xmm0,xmm2 + punpckldq xmm5,xmm0 + movdqa xmm7,xmm12 +DB 102,15,56,0,238 + movdqa xmm2,xmm12 + + psrld xmm7,6 + movdqa xmm1,xmm12 + pslld xmm2,7 + movdqa XMMWORD PTR[(0-128)+rax],xmm5 + paddd xmm5,xmm15 + + psrld xmm1,11 + pxor xmm7,xmm2 + pslld xmm2,21-7 + paddd xmm5,XMMWORD PTR[((-128))+rbp] + pxor xmm7,xmm1 + + psrld xmm1,25-11 + movdqa xmm0,xmm12 + + pxor xmm7,xmm2 + movdqa xmm3,xmm12 + pslld xmm2,26-21 + pandn xmm0,xmm14 + pand xmm3,xmm13 + pxor xmm7,xmm1 + + + movdqa xmm1,xmm8 + pxor xmm7,xmm2 + movdqa xmm2,xmm8 + psrld xmm1,2 + paddd xmm5,xmm7 + pxor xmm0,xmm3 + movdqa xmm3,xmm9 + movdqa xmm7,xmm8 + pslld xmm2,10 + pxor xmm3,xmm8 + + + psrld xmm7,13 + pxor xmm1,xmm2 + paddd xmm5,xmm0 + pslld xmm2,19-10 + pand xmm4,xmm3 + pxor xmm1,xmm7 + + + psrld xmm7,22-13 + pxor xmm1,xmm2 + movdqa xmm15,xmm9 + pslld xmm2,30-19 + pxor xmm7,xmm1 + pxor xmm15,xmm4 + paddd xmm11,xmm5 + pxor xmm7,xmm2 + + paddd xmm15,xmm5 + paddd xmm15,xmm7 + movd xmm5,DWORD PTR[4+r8] + movd xmm0,DWORD PTR[4+r9] + movd xmm1,DWORD PTR[4+r10] + movd xmm2,DWORD PTR[4+r11] + punpckldq xmm5,xmm1 + punpckldq xmm0,xmm2 + punpckldq xmm5,xmm0 + movdqa xmm7,xmm11 + + movdqa xmm2,xmm11 +DB 102,15,56,0,238 + psrld xmm7,6 + movdqa xmm1,xmm11 + pslld xmm2,7 + movdqa XMMWORD PTR[(16-128)+rax],xmm5 + paddd xmm5,xmm14 + + psrld xmm1,11 + pxor xmm7,xmm2 + pslld xmm2,21-7 + paddd xmm5,XMMWORD PTR[((-96))+rbp] + pxor xmm7,xmm1 + + psrld xmm1,25-11 + movdqa xmm0,xmm11 + + pxor xmm7,xmm2 + movdqa xmm4,xmm11 + pslld xmm2,26-21 + pandn xmm0,xmm13 + pand xmm4,xmm12 + pxor xmm7,xmm1 + + + movdqa xmm1,xmm15 + pxor xmm7,xmm2 + movdqa xmm2,xmm15 + psrld xmm1,2 + paddd xmm5,xmm7 + pxor xmm0,xmm4 + movdqa xmm4,xmm8 + movdqa xmm7,xmm15 + pslld xmm2,10 + pxor xmm4,xmm15 + + + psrld xmm7,13 + pxor xmm1,xmm2 + paddd xmm5,xmm0 + pslld xmm2,19-10 + pand xmm3,xmm4 + pxor xmm1,xmm7 + + + psrld xmm7,22-13 + pxor xmm1,xmm2 + movdqa xmm14,xmm8 + pslld xmm2,30-19 + pxor xmm7,xmm1 + pxor xmm14,xmm3 + paddd xmm10,xmm5 + pxor xmm7,xmm2 + + paddd xmm14,xmm5 + paddd xmm14,xmm7 + movd xmm5,DWORD PTR[8+r8] + movd xmm0,DWORD PTR[8+r9] + movd xmm1,DWORD PTR[8+r10] + movd xmm2,DWORD PTR[8+r11] + punpckldq xmm5,xmm1 + punpckldq xmm0,xmm2 + punpckldq xmm5,xmm0 + movdqa xmm7,xmm10 +DB 102,15,56,0,238 + movdqa xmm2,xmm10 + + psrld xmm7,6 + movdqa xmm1,xmm10 + pslld xmm2,7 + movdqa XMMWORD PTR[(32-128)+rax],xmm5 + paddd xmm5,xmm13 + + psrld xmm1,11 + pxor xmm7,xmm2 + pslld xmm2,21-7 + paddd xmm5,XMMWORD PTR[((-64))+rbp] + pxor xmm7,xmm1 + + psrld xmm1,25-11 + movdqa xmm0,xmm10 + + pxor xmm7,xmm2 + movdqa xmm3,xmm10 + pslld xmm2,26-21 + pandn xmm0,xmm12 + pand xmm3,xmm11 + pxor xmm7,xmm1 + + + movdqa xmm1,xmm14 + pxor xmm7,xmm2 + movdqa xmm2,xmm14 + psrld xmm1,2 + paddd xmm5,xmm7 + pxor xmm0,xmm3 + movdqa xmm3,xmm15 + movdqa xmm7,xmm14 + pslld xmm2,10 + pxor xmm3,xmm14 + + + psrld xmm7,13 + pxor xmm1,xmm2 + paddd xmm5,xmm0 + pslld xmm2,19-10 + pand xmm4,xmm3 + pxor xmm1,xmm7 + + + psrld xmm7,22-13 + pxor xmm1,xmm2 + movdqa xmm13,xmm15 + pslld xmm2,30-19 + pxor xmm7,xmm1 + pxor xmm13,xmm4 + paddd xmm9,xmm5 + pxor xmm7,xmm2 + + paddd xmm13,xmm5 + paddd xmm13,xmm7 + movd xmm5,DWORD PTR[12+r8] + movd xmm0,DWORD PTR[12+r9] + movd xmm1,DWORD PTR[12+r10] + movd xmm2,DWORD PTR[12+r11] + punpckldq xmm5,xmm1 + punpckldq xmm0,xmm2 + punpckldq xmm5,xmm0 + movdqa xmm7,xmm9 + + movdqa xmm2,xmm9 +DB 102,15,56,0,238 + psrld xmm7,6 + movdqa xmm1,xmm9 + pslld xmm2,7 + movdqa XMMWORD PTR[(48-128)+rax],xmm5 + paddd xmm5,xmm12 + + psrld xmm1,11 + pxor xmm7,xmm2 + pslld xmm2,21-7 + paddd xmm5,XMMWORD PTR[((-32))+rbp] + pxor xmm7,xmm1 + + psrld xmm1,25-11 + movdqa xmm0,xmm9 + + pxor xmm7,xmm2 + movdqa xmm4,xmm9 + pslld xmm2,26-21 + pandn xmm0,xmm11 + pand xmm4,xmm10 + pxor xmm7,xmm1 + + + movdqa xmm1,xmm13 + pxor xmm7,xmm2 + movdqa xmm2,xmm13 + psrld xmm1,2 + paddd xmm5,xmm7 + pxor xmm0,xmm4 + movdqa xmm4,xmm14 + movdqa xmm7,xmm13 + pslld xmm2,10 + pxor xmm4,xmm13 + + + psrld xmm7,13 + pxor xmm1,xmm2 + paddd xmm5,xmm0 + pslld xmm2,19-10 + pand xmm3,xmm4 + pxor xmm1,xmm7 + + + psrld xmm7,22-13 + pxor xmm1,xmm2 + movdqa xmm12,xmm14 + pslld xmm2,30-19 + pxor xmm7,xmm1 + pxor xmm12,xmm3 + paddd xmm8,xmm5 + pxor xmm7,xmm2 + + paddd xmm12,xmm5 + paddd xmm12,xmm7 + movd xmm5,DWORD PTR[16+r8] + movd xmm0,DWORD PTR[16+r9] + movd xmm1,DWORD PTR[16+r10] + movd xmm2,DWORD PTR[16+r11] + punpckldq xmm5,xmm1 + punpckldq xmm0,xmm2 + punpckldq xmm5,xmm0 + movdqa xmm7,xmm8 +DB 102,15,56,0,238 + movdqa xmm2,xmm8 + + psrld xmm7,6 + movdqa xmm1,xmm8 + pslld xmm2,7 + movdqa XMMWORD PTR[(64-128)+rax],xmm5 + paddd xmm5,xmm11 + + psrld xmm1,11 + pxor xmm7,xmm2 + pslld xmm2,21-7 + paddd xmm5,XMMWORD PTR[rbp] + pxor xmm7,xmm1 + + psrld xmm1,25-11 + movdqa xmm0,xmm8 + + pxor xmm7,xmm2 + movdqa xmm3,xmm8 + pslld xmm2,26-21 + pandn xmm0,xmm10 + pand xmm3,xmm9 + pxor xmm7,xmm1 + + + movdqa xmm1,xmm12 + pxor xmm7,xmm2 + movdqa xmm2,xmm12 + psrld xmm1,2 + paddd xmm5,xmm7 + pxor xmm0,xmm3 + movdqa xmm3,xmm13 + movdqa xmm7,xmm12 + pslld xmm2,10 + pxor xmm3,xmm12 + + + psrld xmm7,13 + pxor xmm1,xmm2 + paddd xmm5,xmm0 + pslld xmm2,19-10 + pand xmm4,xmm3 + pxor xmm1,xmm7 + + + psrld xmm7,22-13 + pxor xmm1,xmm2 + movdqa xmm11,xmm13 + pslld xmm2,30-19 + pxor xmm7,xmm1 + pxor xmm11,xmm4 + paddd xmm15,xmm5 + pxor xmm7,xmm2 + + paddd xmm11,xmm5 + paddd xmm11,xmm7 + movd xmm5,DWORD PTR[20+r8] + movd xmm0,DWORD PTR[20+r9] + movd xmm1,DWORD PTR[20+r10] + movd xmm2,DWORD PTR[20+r11] + punpckldq xmm5,xmm1 + punpckldq xmm0,xmm2 + punpckldq xmm5,xmm0 + movdqa xmm7,xmm15 + + movdqa xmm2,xmm15 +DB 102,15,56,0,238 + psrld xmm7,6 + movdqa xmm1,xmm15 + pslld xmm2,7 + movdqa XMMWORD PTR[(80-128)+rax],xmm5 + paddd xmm5,xmm10 + + psrld xmm1,11 + pxor xmm7,xmm2 + pslld xmm2,21-7 + paddd xmm5,XMMWORD PTR[32+rbp] + pxor xmm7,xmm1 + + psrld xmm1,25-11 + movdqa xmm0,xmm15 + + pxor xmm7,xmm2 + movdqa xmm4,xmm15 + pslld xmm2,26-21 + pandn xmm0,xmm9 + pand xmm4,xmm8 + pxor xmm7,xmm1 + + + movdqa xmm1,xmm11 + pxor xmm7,xmm2 + movdqa xmm2,xmm11 + psrld xmm1,2 + paddd xmm5,xmm7 + pxor xmm0,xmm4 + movdqa xmm4,xmm12 + movdqa xmm7,xmm11 + pslld xmm2,10 + pxor xmm4,xmm11 + + + psrld xmm7,13 + pxor xmm1,xmm2 + paddd xmm5,xmm0 + pslld xmm2,19-10 + pand xmm3,xmm4 + pxor xmm1,xmm7 + + + psrld xmm7,22-13 + pxor xmm1,xmm2 + movdqa xmm10,xmm12 + pslld xmm2,30-19 + pxor xmm7,xmm1 + pxor xmm10,xmm3 + paddd xmm14,xmm5 + pxor xmm7,xmm2 + + paddd xmm10,xmm5 + paddd xmm10,xmm7 + movd xmm5,DWORD PTR[24+r8] + movd xmm0,DWORD PTR[24+r9] + movd xmm1,DWORD PTR[24+r10] + movd xmm2,DWORD PTR[24+r11] + punpckldq xmm5,xmm1 + punpckldq xmm0,xmm2 + punpckldq xmm5,xmm0 + movdqa xmm7,xmm14 +DB 102,15,56,0,238 + movdqa xmm2,xmm14 + + psrld xmm7,6 + movdqa xmm1,xmm14 + pslld xmm2,7 + movdqa XMMWORD PTR[(96-128)+rax],xmm5 + paddd xmm5,xmm9 + + psrld xmm1,11 + pxor xmm7,xmm2 + pslld xmm2,21-7 + paddd xmm5,XMMWORD PTR[64+rbp] + pxor xmm7,xmm1 + + psrld xmm1,25-11 + movdqa xmm0,xmm14 + + pxor xmm7,xmm2 + movdqa xmm3,xmm14 + pslld xmm2,26-21 + pandn xmm0,xmm8 + pand xmm3,xmm15 + pxor xmm7,xmm1 + + + movdqa xmm1,xmm10 + pxor xmm7,xmm2 + movdqa xmm2,xmm10 + psrld xmm1,2 + paddd xmm5,xmm7 + pxor xmm0,xmm3 + movdqa xmm3,xmm11 + movdqa xmm7,xmm10 + pslld xmm2,10 + pxor xmm3,xmm10 + + + psrld xmm7,13 + pxor xmm1,xmm2 + paddd xmm5,xmm0 + pslld xmm2,19-10 + pand xmm4,xmm3 + pxor xmm1,xmm7 + + + psrld xmm7,22-13 + pxor xmm1,xmm2 + movdqa xmm9,xmm11 + pslld xmm2,30-19 + pxor xmm7,xmm1 + pxor xmm9,xmm4 + paddd xmm13,xmm5 + pxor xmm7,xmm2 + + paddd xmm9,xmm5 + paddd xmm9,xmm7 + movd xmm5,DWORD PTR[28+r8] + movd xmm0,DWORD PTR[28+r9] + movd xmm1,DWORD PTR[28+r10] + movd xmm2,DWORD PTR[28+r11] + punpckldq xmm5,xmm1 + punpckldq xmm0,xmm2 + punpckldq xmm5,xmm0 + movdqa xmm7,xmm13 + + movdqa xmm2,xmm13 +DB 102,15,56,0,238 + psrld xmm7,6 + movdqa xmm1,xmm13 + pslld xmm2,7 + movdqa XMMWORD PTR[(112-128)+rax],xmm5 + paddd xmm5,xmm8 + + psrld xmm1,11 + pxor xmm7,xmm2 + pslld xmm2,21-7 + paddd xmm5,XMMWORD PTR[96+rbp] + pxor xmm7,xmm1 + + psrld xmm1,25-11 + movdqa xmm0,xmm13 + + pxor xmm7,xmm2 + movdqa xmm4,xmm13 + pslld xmm2,26-21 + pandn xmm0,xmm15 + pand xmm4,xmm14 + pxor xmm7,xmm1 + + + movdqa xmm1,xmm9 + pxor xmm7,xmm2 + movdqa xmm2,xmm9 + psrld xmm1,2 + paddd xmm5,xmm7 + pxor xmm0,xmm4 + movdqa xmm4,xmm10 + movdqa xmm7,xmm9 + pslld xmm2,10 + pxor xmm4,xmm9 + + + psrld xmm7,13 + pxor xmm1,xmm2 + paddd xmm5,xmm0 + pslld xmm2,19-10 + pand xmm3,xmm4 + pxor xmm1,xmm7 + + + psrld xmm7,22-13 + pxor xmm1,xmm2 + movdqa xmm8,xmm10 + pslld xmm2,30-19 + pxor xmm7,xmm1 + pxor xmm8,xmm3 + paddd xmm12,xmm5 + pxor xmm7,xmm2 + + paddd xmm8,xmm5 + paddd xmm8,xmm7 + lea rbp,QWORD PTR[256+rbp] + movd xmm5,DWORD PTR[32+r8] + movd xmm0,DWORD PTR[32+r9] + movd xmm1,DWORD PTR[32+r10] + movd xmm2,DWORD PTR[32+r11] + punpckldq xmm5,xmm1 + punpckldq xmm0,xmm2 + punpckldq xmm5,xmm0 + movdqa xmm7,xmm12 +DB 102,15,56,0,238 + movdqa xmm2,xmm12 + + psrld xmm7,6 + movdqa xmm1,xmm12 + pslld xmm2,7 + movdqa XMMWORD PTR[(128-128)+rax],xmm5 + paddd xmm5,xmm15 + + psrld xmm1,11 + pxor xmm7,xmm2 + pslld xmm2,21-7 + paddd xmm5,XMMWORD PTR[((-128))+rbp] + pxor xmm7,xmm1 + + psrld xmm1,25-11 + movdqa xmm0,xmm12 + + pxor xmm7,xmm2 + movdqa xmm3,xmm12 + pslld xmm2,26-21 + pandn xmm0,xmm14 + pand xmm3,xmm13 + pxor xmm7,xmm1 + + + movdqa xmm1,xmm8 + pxor xmm7,xmm2 + movdqa xmm2,xmm8 + psrld xmm1,2 + paddd xmm5,xmm7 + pxor xmm0,xmm3 + movdqa xmm3,xmm9 + movdqa xmm7,xmm8 + pslld xmm2,10 + pxor xmm3,xmm8 + + + psrld xmm7,13 + pxor xmm1,xmm2 + paddd xmm5,xmm0 + pslld xmm2,19-10 + pand xmm4,xmm3 + pxor xmm1,xmm7 + + + psrld xmm7,22-13 + pxor xmm1,xmm2 + movdqa xmm15,xmm9 + pslld xmm2,30-19 + pxor xmm7,xmm1 + pxor xmm15,xmm4 + paddd xmm11,xmm5 + pxor xmm7,xmm2 + + paddd xmm15,xmm5 + paddd xmm15,xmm7 + movd xmm5,DWORD PTR[36+r8] + movd xmm0,DWORD PTR[36+r9] + movd xmm1,DWORD PTR[36+r10] + movd xmm2,DWORD PTR[36+r11] + punpckldq xmm5,xmm1 + punpckldq xmm0,xmm2 + punpckldq xmm5,xmm0 + movdqa xmm7,xmm11 + + movdqa xmm2,xmm11 +DB 102,15,56,0,238 + psrld xmm7,6 + movdqa xmm1,xmm11 + pslld xmm2,7 + movdqa XMMWORD PTR[(144-128)+rax],xmm5 + paddd xmm5,xmm14 + + psrld xmm1,11 + pxor xmm7,xmm2 + pslld xmm2,21-7 + paddd xmm5,XMMWORD PTR[((-96))+rbp] + pxor xmm7,xmm1 + + psrld xmm1,25-11 + movdqa xmm0,xmm11 + + pxor xmm7,xmm2 + movdqa xmm4,xmm11 + pslld xmm2,26-21 + pandn xmm0,xmm13 + pand xmm4,xmm12 + pxor xmm7,xmm1 + + + movdqa xmm1,xmm15 + pxor xmm7,xmm2 + movdqa xmm2,xmm15 + psrld xmm1,2 + paddd xmm5,xmm7 + pxor xmm0,xmm4 + movdqa xmm4,xmm8 + movdqa xmm7,xmm15 + pslld xmm2,10 + pxor xmm4,xmm15 + + + psrld xmm7,13 + pxor xmm1,xmm2 + paddd xmm5,xmm0 + pslld xmm2,19-10 + pand xmm3,xmm4 + pxor xmm1,xmm7 + + + psrld xmm7,22-13 + pxor xmm1,xmm2 + movdqa xmm14,xmm8 + pslld xmm2,30-19 + pxor xmm7,xmm1 + pxor xmm14,xmm3 + paddd xmm10,xmm5 + pxor xmm7,xmm2 + + paddd xmm14,xmm5 + paddd xmm14,xmm7 + movd xmm5,DWORD PTR[40+r8] + movd xmm0,DWORD PTR[40+r9] + movd xmm1,DWORD PTR[40+r10] + movd xmm2,DWORD PTR[40+r11] + punpckldq xmm5,xmm1 + punpckldq xmm0,xmm2 + punpckldq xmm5,xmm0 + movdqa xmm7,xmm10 +DB 102,15,56,0,238 + movdqa xmm2,xmm10 + + psrld xmm7,6 + movdqa xmm1,xmm10 + pslld xmm2,7 + movdqa XMMWORD PTR[(160-128)+rax],xmm5 + paddd xmm5,xmm13 + + psrld xmm1,11 + pxor xmm7,xmm2 + pslld xmm2,21-7 + paddd xmm5,XMMWORD PTR[((-64))+rbp] + pxor xmm7,xmm1 + + psrld xmm1,25-11 + movdqa xmm0,xmm10 + + pxor xmm7,xmm2 + movdqa xmm3,xmm10 + pslld xmm2,26-21 + pandn xmm0,xmm12 + pand xmm3,xmm11 + pxor xmm7,xmm1 + + + movdqa xmm1,xmm14 + pxor xmm7,xmm2 + movdqa xmm2,xmm14 + psrld xmm1,2 + paddd xmm5,xmm7 + pxor xmm0,xmm3 + movdqa xmm3,xmm15 + movdqa xmm7,xmm14 + pslld xmm2,10 + pxor xmm3,xmm14 + + + psrld xmm7,13 + pxor xmm1,xmm2 + paddd xmm5,xmm0 + pslld xmm2,19-10 + pand xmm4,xmm3 + pxor xmm1,xmm7 + + + psrld xmm7,22-13 + pxor xmm1,xmm2 + movdqa xmm13,xmm15 + pslld xmm2,30-19 + pxor xmm7,xmm1 + pxor xmm13,xmm4 + paddd xmm9,xmm5 + pxor xmm7,xmm2 + + paddd xmm13,xmm5 + paddd xmm13,xmm7 + movd xmm5,DWORD PTR[44+r8] + movd xmm0,DWORD PTR[44+r9] + movd xmm1,DWORD PTR[44+r10] + movd xmm2,DWORD PTR[44+r11] + punpckldq xmm5,xmm1 + punpckldq xmm0,xmm2 + punpckldq xmm5,xmm0 + movdqa xmm7,xmm9 + + movdqa xmm2,xmm9 +DB 102,15,56,0,238 + psrld xmm7,6 + movdqa xmm1,xmm9 + pslld xmm2,7 + movdqa XMMWORD PTR[(176-128)+rax],xmm5 + paddd xmm5,xmm12 + + psrld xmm1,11 + pxor xmm7,xmm2 + pslld xmm2,21-7 + paddd xmm5,XMMWORD PTR[((-32))+rbp] + pxor xmm7,xmm1 + + psrld xmm1,25-11 + movdqa xmm0,xmm9 + + pxor xmm7,xmm2 + movdqa xmm4,xmm9 + pslld xmm2,26-21 + pandn xmm0,xmm11 + pand xmm4,xmm10 + pxor xmm7,xmm1 + + + movdqa xmm1,xmm13 + pxor xmm7,xmm2 + movdqa xmm2,xmm13 + psrld xmm1,2 + paddd xmm5,xmm7 + pxor xmm0,xmm4 + movdqa xmm4,xmm14 + movdqa xmm7,xmm13 + pslld xmm2,10 + pxor xmm4,xmm13 + + + psrld xmm7,13 + pxor xmm1,xmm2 + paddd xmm5,xmm0 + pslld xmm2,19-10 + pand xmm3,xmm4 + pxor xmm1,xmm7 + + + psrld xmm7,22-13 + pxor xmm1,xmm2 + movdqa xmm12,xmm14 + pslld xmm2,30-19 + pxor xmm7,xmm1 + pxor xmm12,xmm3 + paddd xmm8,xmm5 + pxor xmm7,xmm2 + + paddd xmm12,xmm5 + paddd xmm12,xmm7 + movd xmm5,DWORD PTR[48+r8] + movd xmm0,DWORD PTR[48+r9] + movd xmm1,DWORD PTR[48+r10] + movd xmm2,DWORD PTR[48+r11] + punpckldq xmm5,xmm1 + punpckldq xmm0,xmm2 + punpckldq xmm5,xmm0 + movdqa xmm7,xmm8 +DB 102,15,56,0,238 + movdqa xmm2,xmm8 + + psrld xmm7,6 + movdqa xmm1,xmm8 + pslld xmm2,7 + movdqa XMMWORD PTR[(192-128)+rax],xmm5 + paddd xmm5,xmm11 + + psrld xmm1,11 + pxor xmm7,xmm2 + pslld xmm2,21-7 + paddd xmm5,XMMWORD PTR[rbp] + pxor xmm7,xmm1 + + psrld xmm1,25-11 + movdqa xmm0,xmm8 + + pxor xmm7,xmm2 + movdqa xmm3,xmm8 + pslld xmm2,26-21 + pandn xmm0,xmm10 + pand xmm3,xmm9 + pxor xmm7,xmm1 + + + movdqa xmm1,xmm12 + pxor xmm7,xmm2 + movdqa xmm2,xmm12 + psrld xmm1,2 + paddd xmm5,xmm7 + pxor xmm0,xmm3 + movdqa xmm3,xmm13 + movdqa xmm7,xmm12 + pslld xmm2,10 + pxor xmm3,xmm12 + + + psrld xmm7,13 + pxor xmm1,xmm2 + paddd xmm5,xmm0 + pslld xmm2,19-10 + pand xmm4,xmm3 + pxor xmm1,xmm7 + + + psrld xmm7,22-13 + pxor xmm1,xmm2 + movdqa xmm11,xmm13 + pslld xmm2,30-19 + pxor xmm7,xmm1 + pxor xmm11,xmm4 + paddd xmm15,xmm5 + pxor xmm7,xmm2 + + paddd xmm11,xmm5 + paddd xmm11,xmm7 + movd xmm5,DWORD PTR[52+r8] + movd xmm0,DWORD PTR[52+r9] + movd xmm1,DWORD PTR[52+r10] + movd xmm2,DWORD PTR[52+r11] + punpckldq xmm5,xmm1 + punpckldq xmm0,xmm2 + punpckldq xmm5,xmm0 + movdqa xmm7,xmm15 + + movdqa xmm2,xmm15 +DB 102,15,56,0,238 + psrld xmm7,6 + movdqa xmm1,xmm15 + pslld xmm2,7 + movdqa XMMWORD PTR[(208-128)+rax],xmm5 + paddd xmm5,xmm10 + + psrld xmm1,11 + pxor xmm7,xmm2 + pslld xmm2,21-7 + paddd xmm5,XMMWORD PTR[32+rbp] + pxor xmm7,xmm1 + + psrld xmm1,25-11 + movdqa xmm0,xmm15 + + pxor xmm7,xmm2 + movdqa xmm4,xmm15 + pslld xmm2,26-21 + pandn xmm0,xmm9 + pand xmm4,xmm8 + pxor xmm7,xmm1 + + + movdqa xmm1,xmm11 + pxor xmm7,xmm2 + movdqa xmm2,xmm11 + psrld xmm1,2 + paddd xmm5,xmm7 + pxor xmm0,xmm4 + movdqa xmm4,xmm12 + movdqa xmm7,xmm11 + pslld xmm2,10 + pxor xmm4,xmm11 + + + psrld xmm7,13 + pxor xmm1,xmm2 + paddd xmm5,xmm0 + pslld xmm2,19-10 + pand xmm3,xmm4 + pxor xmm1,xmm7 + + + psrld xmm7,22-13 + pxor xmm1,xmm2 + movdqa xmm10,xmm12 + pslld xmm2,30-19 + pxor xmm7,xmm1 + pxor xmm10,xmm3 + paddd xmm14,xmm5 + pxor xmm7,xmm2 + + paddd xmm10,xmm5 + paddd xmm10,xmm7 + movd xmm5,DWORD PTR[56+r8] + movd xmm0,DWORD PTR[56+r9] + movd xmm1,DWORD PTR[56+r10] + movd xmm2,DWORD PTR[56+r11] + punpckldq xmm5,xmm1 + punpckldq xmm0,xmm2 + punpckldq xmm5,xmm0 + movdqa xmm7,xmm14 +DB 102,15,56,0,238 + movdqa xmm2,xmm14 + + psrld xmm7,6 + movdqa xmm1,xmm14 + pslld xmm2,7 + movdqa XMMWORD PTR[(224-128)+rax],xmm5 + paddd xmm5,xmm9 + + psrld xmm1,11 + pxor xmm7,xmm2 + pslld xmm2,21-7 + paddd xmm5,XMMWORD PTR[64+rbp] + pxor xmm7,xmm1 + + psrld xmm1,25-11 + movdqa xmm0,xmm14 + + pxor xmm7,xmm2 + movdqa xmm3,xmm14 + pslld xmm2,26-21 + pandn xmm0,xmm8 + pand xmm3,xmm15 + pxor xmm7,xmm1 + + + movdqa xmm1,xmm10 + pxor xmm7,xmm2 + movdqa xmm2,xmm10 + psrld xmm1,2 + paddd xmm5,xmm7 + pxor xmm0,xmm3 + movdqa xmm3,xmm11 + movdqa xmm7,xmm10 + pslld xmm2,10 + pxor xmm3,xmm10 + + + psrld xmm7,13 + pxor xmm1,xmm2 + paddd xmm5,xmm0 + pslld xmm2,19-10 + pand xmm4,xmm3 + pxor xmm1,xmm7 + + + psrld xmm7,22-13 + pxor xmm1,xmm2 + movdqa xmm9,xmm11 + pslld xmm2,30-19 + pxor xmm7,xmm1 + pxor xmm9,xmm4 + paddd xmm13,xmm5 + pxor xmm7,xmm2 + + paddd xmm9,xmm5 + paddd xmm9,xmm7 + movd xmm5,DWORD PTR[60+r8] + lea r8,QWORD PTR[64+r8] + movd xmm0,DWORD PTR[60+r9] + lea r9,QWORD PTR[64+r9] + movd xmm1,DWORD PTR[60+r10] + lea r10,QWORD PTR[64+r10] + movd xmm2,DWORD PTR[60+r11] + lea r11,QWORD PTR[64+r11] + punpckldq xmm5,xmm1 + punpckldq xmm0,xmm2 + punpckldq xmm5,xmm0 + movdqa xmm7,xmm13 + + movdqa xmm2,xmm13 +DB 102,15,56,0,238 + psrld xmm7,6 + movdqa xmm1,xmm13 + pslld xmm2,7 + movdqa XMMWORD PTR[(240-128)+rax],xmm5 + paddd xmm5,xmm8 + + psrld xmm1,11 + pxor xmm7,xmm2 + pslld xmm2,21-7 + paddd xmm5,XMMWORD PTR[96+rbp] + pxor xmm7,xmm1 + + psrld xmm1,25-11 + movdqa xmm0,xmm13 + prefetcht0 [63+r8] + pxor xmm7,xmm2 + movdqa xmm4,xmm13 + pslld xmm2,26-21 + pandn xmm0,xmm15 + pand xmm4,xmm14 + pxor xmm7,xmm1 + + prefetcht0 [63+r9] + movdqa xmm1,xmm9 + pxor xmm7,xmm2 + movdqa xmm2,xmm9 + psrld xmm1,2 + paddd xmm5,xmm7 + pxor xmm0,xmm4 + movdqa xmm4,xmm10 + movdqa xmm7,xmm9 + pslld xmm2,10 + pxor xmm4,xmm9 + + prefetcht0 [63+r10] + psrld xmm7,13 + pxor xmm1,xmm2 + paddd xmm5,xmm0 + pslld xmm2,19-10 + pand xmm3,xmm4 + pxor xmm1,xmm7 + + prefetcht0 [63+r11] + psrld xmm7,22-13 + pxor xmm1,xmm2 + movdqa xmm8,xmm10 + pslld xmm2,30-19 + pxor xmm7,xmm1 + pxor xmm8,xmm3 + paddd xmm12,xmm5 + pxor xmm7,xmm2 + + paddd xmm8,xmm5 + paddd xmm8,xmm7 + lea rbp,QWORD PTR[256+rbp] + movdqu xmm5,XMMWORD PTR[((0-128))+rax] + mov ecx,3 + jmp $L$oop_16_xx +ALIGN 32 +$L$oop_16_xx:: + movdqa xmm6,XMMWORD PTR[((16-128))+rax] + paddd xmm5,XMMWORD PTR[((144-128))+rax] + + movdqa xmm7,xmm6 + movdqa xmm1,xmm6 + psrld xmm7,3 + movdqa xmm2,xmm6 + + psrld xmm1,7 + movdqa xmm0,XMMWORD PTR[((224-128))+rax] + pslld xmm2,14 + pxor xmm7,xmm1 + psrld xmm1,18-7 + movdqa xmm3,xmm0 + pxor xmm7,xmm2 + pslld xmm2,25-14 + pxor xmm7,xmm1 + psrld xmm0,10 + movdqa xmm1,xmm3 + + psrld xmm3,17 + pxor xmm7,xmm2 + pslld xmm1,13 + paddd xmm5,xmm7 + pxor xmm0,xmm3 + psrld xmm3,19-17 + pxor xmm0,xmm1 + pslld xmm1,15-13 + pxor xmm0,xmm3 + pxor xmm0,xmm1 + paddd xmm5,xmm0 + movdqa xmm7,xmm12 + + movdqa xmm2,xmm12 + + psrld xmm7,6 + movdqa xmm1,xmm12 + pslld xmm2,7 + movdqa XMMWORD PTR[(0-128)+rax],xmm5 + paddd xmm5,xmm15 + + psrld xmm1,11 + pxor xmm7,xmm2 + pslld xmm2,21-7 + paddd xmm5,XMMWORD PTR[((-128))+rbp] + pxor xmm7,xmm1 + + psrld xmm1,25-11 + movdqa xmm0,xmm12 + + pxor xmm7,xmm2 + movdqa xmm3,xmm12 + pslld xmm2,26-21 + pandn xmm0,xmm14 + pand xmm3,xmm13 + pxor xmm7,xmm1 + + + movdqa xmm1,xmm8 + pxor xmm7,xmm2 + movdqa xmm2,xmm8 + psrld xmm1,2 + paddd xmm5,xmm7 + pxor xmm0,xmm3 + movdqa xmm3,xmm9 + movdqa xmm7,xmm8 + pslld xmm2,10 + pxor xmm3,xmm8 + + + psrld xmm7,13 + pxor xmm1,xmm2 + paddd xmm5,xmm0 + pslld xmm2,19-10 + pand xmm4,xmm3 + pxor xmm1,xmm7 + + + psrld xmm7,22-13 + pxor xmm1,xmm2 + movdqa xmm15,xmm9 + pslld xmm2,30-19 + pxor xmm7,xmm1 + pxor xmm15,xmm4 + paddd xmm11,xmm5 + pxor xmm7,xmm2 + + paddd xmm15,xmm5 + paddd xmm15,xmm7 + movdqa xmm5,XMMWORD PTR[((32-128))+rax] + paddd xmm6,XMMWORD PTR[((160-128))+rax] + + movdqa xmm7,xmm5 + movdqa xmm1,xmm5 + psrld xmm7,3 + movdqa xmm2,xmm5 + + psrld xmm1,7 + movdqa xmm0,XMMWORD PTR[((240-128))+rax] + pslld xmm2,14 + pxor xmm7,xmm1 + psrld xmm1,18-7 + movdqa xmm4,xmm0 + pxor xmm7,xmm2 + pslld xmm2,25-14 + pxor xmm7,xmm1 + psrld xmm0,10 + movdqa xmm1,xmm4 + + psrld xmm4,17 + pxor xmm7,xmm2 + pslld xmm1,13 + paddd xmm6,xmm7 + pxor xmm0,xmm4 + psrld xmm4,19-17 + pxor xmm0,xmm1 + pslld xmm1,15-13 + pxor xmm0,xmm4 + pxor xmm0,xmm1 + paddd xmm6,xmm0 + movdqa xmm7,xmm11 + + movdqa xmm2,xmm11 + + psrld xmm7,6 + movdqa xmm1,xmm11 + pslld xmm2,7 + movdqa XMMWORD PTR[(16-128)+rax],xmm6 + paddd xmm6,xmm14 + + psrld xmm1,11 + pxor xmm7,xmm2 + pslld xmm2,21-7 + paddd xmm6,XMMWORD PTR[((-96))+rbp] + pxor xmm7,xmm1 + + psrld xmm1,25-11 + movdqa xmm0,xmm11 + + pxor xmm7,xmm2 + movdqa xmm4,xmm11 + pslld xmm2,26-21 + pandn xmm0,xmm13 + pand xmm4,xmm12 + pxor xmm7,xmm1 + + + movdqa xmm1,xmm15 + pxor xmm7,xmm2 + movdqa xmm2,xmm15 + psrld xmm1,2 + paddd xmm6,xmm7 + pxor xmm0,xmm4 + movdqa xmm4,xmm8 + movdqa xmm7,xmm15 + pslld xmm2,10 + pxor xmm4,xmm15 + + + psrld xmm7,13 + pxor xmm1,xmm2 + paddd xmm6,xmm0 + pslld xmm2,19-10 + pand xmm3,xmm4 + pxor xmm1,xmm7 + + + psrld xmm7,22-13 + pxor xmm1,xmm2 + movdqa xmm14,xmm8 + pslld xmm2,30-19 + pxor xmm7,xmm1 + pxor xmm14,xmm3 + paddd xmm10,xmm6 + pxor xmm7,xmm2 + + paddd xmm14,xmm6 + paddd xmm14,xmm7 + movdqa xmm6,XMMWORD PTR[((48-128))+rax] + paddd xmm5,XMMWORD PTR[((176-128))+rax] + + movdqa xmm7,xmm6 + movdqa xmm1,xmm6 + psrld xmm7,3 + movdqa xmm2,xmm6 + + psrld xmm1,7 + movdqa xmm0,XMMWORD PTR[((0-128))+rax] + pslld xmm2,14 + pxor xmm7,xmm1 + psrld xmm1,18-7 + movdqa xmm3,xmm0 + pxor xmm7,xmm2 + pslld xmm2,25-14 + pxor xmm7,xmm1 + psrld xmm0,10 + movdqa xmm1,xmm3 + + psrld xmm3,17 + pxor xmm7,xmm2 + pslld xmm1,13 + paddd xmm5,xmm7 + pxor xmm0,xmm3 + psrld xmm3,19-17 + pxor xmm0,xmm1 + pslld xmm1,15-13 + pxor xmm0,xmm3 + pxor xmm0,xmm1 + paddd xmm5,xmm0 + movdqa xmm7,xmm10 + + movdqa xmm2,xmm10 + + psrld xmm7,6 + movdqa xmm1,xmm10 + pslld xmm2,7 + movdqa XMMWORD PTR[(32-128)+rax],xmm5 + paddd xmm5,xmm13 + + psrld xmm1,11 + pxor xmm7,xmm2 + pslld xmm2,21-7 + paddd xmm5,XMMWORD PTR[((-64))+rbp] + pxor xmm7,xmm1 + + psrld xmm1,25-11 + movdqa xmm0,xmm10 + + pxor xmm7,xmm2 + movdqa xmm3,xmm10 + pslld xmm2,26-21 + pandn xmm0,xmm12 + pand xmm3,xmm11 + pxor xmm7,xmm1 + + + movdqa xmm1,xmm14 + pxor xmm7,xmm2 + movdqa xmm2,xmm14 + psrld xmm1,2 + paddd xmm5,xmm7 + pxor xmm0,xmm3 + movdqa xmm3,xmm15 + movdqa xmm7,xmm14 + pslld xmm2,10 + pxor xmm3,xmm14 + + + psrld xmm7,13 + pxor xmm1,xmm2 + paddd xmm5,xmm0 + pslld xmm2,19-10 + pand xmm4,xmm3 + pxor xmm1,xmm7 + + + psrld xmm7,22-13 + pxor xmm1,xmm2 + movdqa xmm13,xmm15 + pslld xmm2,30-19 + pxor xmm7,xmm1 + pxor xmm13,xmm4 + paddd xmm9,xmm5 + pxor xmm7,xmm2 + + paddd xmm13,xmm5 + paddd xmm13,xmm7 + movdqa xmm5,XMMWORD PTR[((64-128))+rax] + paddd xmm6,XMMWORD PTR[((192-128))+rax] + + movdqa xmm7,xmm5 + movdqa xmm1,xmm5 + psrld xmm7,3 + movdqa xmm2,xmm5 + + psrld xmm1,7 + movdqa xmm0,XMMWORD PTR[((16-128))+rax] + pslld xmm2,14 + pxor xmm7,xmm1 + psrld xmm1,18-7 + movdqa xmm4,xmm0 + pxor xmm7,xmm2 + pslld xmm2,25-14 + pxor xmm7,xmm1 + psrld xmm0,10 + movdqa xmm1,xmm4 + + psrld xmm4,17 + pxor xmm7,xmm2 + pslld xmm1,13 + paddd xmm6,xmm7 + pxor xmm0,xmm4 + psrld xmm4,19-17 + pxor xmm0,xmm1 + pslld xmm1,15-13 + pxor xmm0,xmm4 + pxor xmm0,xmm1 + paddd xmm6,xmm0 + movdqa xmm7,xmm9 + + movdqa xmm2,xmm9 + + psrld xmm7,6 + movdqa xmm1,xmm9 + pslld xmm2,7 + movdqa XMMWORD PTR[(48-128)+rax],xmm6 + paddd xmm6,xmm12 + + psrld xmm1,11 + pxor xmm7,xmm2 + pslld xmm2,21-7 + paddd xmm6,XMMWORD PTR[((-32))+rbp] + pxor xmm7,xmm1 + + psrld xmm1,25-11 + movdqa xmm0,xmm9 + + pxor xmm7,xmm2 + movdqa xmm4,xmm9 + pslld xmm2,26-21 + pandn xmm0,xmm11 + pand xmm4,xmm10 + pxor xmm7,xmm1 + + + movdqa xmm1,xmm13 + pxor xmm7,xmm2 + movdqa xmm2,xmm13 + psrld xmm1,2 + paddd xmm6,xmm7 + pxor xmm0,xmm4 + movdqa xmm4,xmm14 + movdqa xmm7,xmm13 + pslld xmm2,10 + pxor xmm4,xmm13 + + + psrld xmm7,13 + pxor xmm1,xmm2 + paddd xmm6,xmm0 + pslld xmm2,19-10 + pand xmm3,xmm4 + pxor xmm1,xmm7 + + + psrld xmm7,22-13 + pxor xmm1,xmm2 + movdqa xmm12,xmm14 + pslld xmm2,30-19 + pxor xmm7,xmm1 + pxor xmm12,xmm3 + paddd xmm8,xmm6 + pxor xmm7,xmm2 + + paddd xmm12,xmm6 + paddd xmm12,xmm7 + movdqa xmm6,XMMWORD PTR[((80-128))+rax] + paddd xmm5,XMMWORD PTR[((208-128))+rax] + + movdqa xmm7,xmm6 + movdqa xmm1,xmm6 + psrld xmm7,3 + movdqa xmm2,xmm6 + + psrld xmm1,7 + movdqa xmm0,XMMWORD PTR[((32-128))+rax] + pslld xmm2,14 + pxor xmm7,xmm1 + psrld xmm1,18-7 + movdqa xmm3,xmm0 + pxor xmm7,xmm2 + pslld xmm2,25-14 + pxor xmm7,xmm1 + psrld xmm0,10 + movdqa xmm1,xmm3 + + psrld xmm3,17 + pxor xmm7,xmm2 + pslld xmm1,13 + paddd xmm5,xmm7 + pxor xmm0,xmm3 + psrld xmm3,19-17 + pxor xmm0,xmm1 + pslld xmm1,15-13 + pxor xmm0,xmm3 + pxor xmm0,xmm1 + paddd xmm5,xmm0 + movdqa xmm7,xmm8 + + movdqa xmm2,xmm8 + + psrld xmm7,6 + movdqa xmm1,xmm8 + pslld xmm2,7 + movdqa XMMWORD PTR[(64-128)+rax],xmm5 + paddd xmm5,xmm11 + + psrld xmm1,11 + pxor xmm7,xmm2 + pslld xmm2,21-7 + paddd xmm5,XMMWORD PTR[rbp] + pxor xmm7,xmm1 + + psrld xmm1,25-11 + movdqa xmm0,xmm8 + + pxor xmm7,xmm2 + movdqa xmm3,xmm8 + pslld xmm2,26-21 + pandn xmm0,xmm10 + pand xmm3,xmm9 + pxor xmm7,xmm1 + + + movdqa xmm1,xmm12 + pxor xmm7,xmm2 + movdqa xmm2,xmm12 + psrld xmm1,2 + paddd xmm5,xmm7 + pxor xmm0,xmm3 + movdqa xmm3,xmm13 + movdqa xmm7,xmm12 + pslld xmm2,10 + pxor xmm3,xmm12 + + + psrld xmm7,13 + pxor xmm1,xmm2 + paddd xmm5,xmm0 + pslld xmm2,19-10 + pand xmm4,xmm3 + pxor xmm1,xmm7 + + + psrld xmm7,22-13 + pxor xmm1,xmm2 + movdqa xmm11,xmm13 + pslld xmm2,30-19 + pxor xmm7,xmm1 + pxor xmm11,xmm4 + paddd xmm15,xmm5 + pxor xmm7,xmm2 + + paddd xmm11,xmm5 + paddd xmm11,xmm7 + movdqa xmm5,XMMWORD PTR[((96-128))+rax] + paddd xmm6,XMMWORD PTR[((224-128))+rax] + + movdqa xmm7,xmm5 + movdqa xmm1,xmm5 + psrld xmm7,3 + movdqa xmm2,xmm5 + + psrld xmm1,7 + movdqa xmm0,XMMWORD PTR[((48-128))+rax] + pslld xmm2,14 + pxor xmm7,xmm1 + psrld xmm1,18-7 + movdqa xmm4,xmm0 + pxor xmm7,xmm2 + pslld xmm2,25-14 + pxor xmm7,xmm1 + psrld xmm0,10 + movdqa xmm1,xmm4 + + psrld xmm4,17 + pxor xmm7,xmm2 + pslld xmm1,13 + paddd xmm6,xmm7 + pxor xmm0,xmm4 + psrld xmm4,19-17 + pxor xmm0,xmm1 + pslld xmm1,15-13 + pxor xmm0,xmm4 + pxor xmm0,xmm1 + paddd xmm6,xmm0 + movdqa xmm7,xmm15 + + movdqa xmm2,xmm15 + + psrld xmm7,6 + movdqa xmm1,xmm15 + pslld xmm2,7 + movdqa XMMWORD PTR[(80-128)+rax],xmm6 + paddd xmm6,xmm10 + + psrld xmm1,11 + pxor xmm7,xmm2 + pslld xmm2,21-7 + paddd xmm6,XMMWORD PTR[32+rbp] + pxor xmm7,xmm1 + + psrld xmm1,25-11 + movdqa xmm0,xmm15 + + pxor xmm7,xmm2 + movdqa xmm4,xmm15 + pslld xmm2,26-21 + pandn xmm0,xmm9 + pand xmm4,xmm8 + pxor xmm7,xmm1 + + + movdqa xmm1,xmm11 + pxor xmm7,xmm2 + movdqa xmm2,xmm11 + psrld xmm1,2 + paddd xmm6,xmm7 + pxor xmm0,xmm4 + movdqa xmm4,xmm12 + movdqa xmm7,xmm11 + pslld xmm2,10 + pxor xmm4,xmm11 + + + psrld xmm7,13 + pxor xmm1,xmm2 + paddd xmm6,xmm0 + pslld xmm2,19-10 + pand xmm3,xmm4 + pxor xmm1,xmm7 + + + psrld xmm7,22-13 + pxor xmm1,xmm2 + movdqa xmm10,xmm12 + pslld xmm2,30-19 + pxor xmm7,xmm1 + pxor xmm10,xmm3 + paddd xmm14,xmm6 + pxor xmm7,xmm2 + + paddd xmm10,xmm6 + paddd xmm10,xmm7 + movdqa xmm6,XMMWORD PTR[((112-128))+rax] + paddd xmm5,XMMWORD PTR[((240-128))+rax] + + movdqa xmm7,xmm6 + movdqa xmm1,xmm6 + psrld xmm7,3 + movdqa xmm2,xmm6 + + psrld xmm1,7 + movdqa xmm0,XMMWORD PTR[((64-128))+rax] + pslld xmm2,14 + pxor xmm7,xmm1 + psrld xmm1,18-7 + movdqa xmm3,xmm0 + pxor xmm7,xmm2 + pslld xmm2,25-14 + pxor xmm7,xmm1 + psrld xmm0,10 + movdqa xmm1,xmm3 + + psrld xmm3,17 + pxor xmm7,xmm2 + pslld xmm1,13 + paddd xmm5,xmm7 + pxor xmm0,xmm3 + psrld xmm3,19-17 + pxor xmm0,xmm1 + pslld xmm1,15-13 + pxor xmm0,xmm3 + pxor xmm0,xmm1 + paddd xmm5,xmm0 + movdqa xmm7,xmm14 + + movdqa xmm2,xmm14 + + psrld xmm7,6 + movdqa xmm1,xmm14 + pslld xmm2,7 + movdqa XMMWORD PTR[(96-128)+rax],xmm5 + paddd xmm5,xmm9 + + psrld xmm1,11 + pxor xmm7,xmm2 + pslld xmm2,21-7 + paddd xmm5,XMMWORD PTR[64+rbp] + pxor xmm7,xmm1 + + psrld xmm1,25-11 + movdqa xmm0,xmm14 + + pxor xmm7,xmm2 + movdqa xmm3,xmm14 + pslld xmm2,26-21 + pandn xmm0,xmm8 + pand xmm3,xmm15 + pxor xmm7,xmm1 + + + movdqa xmm1,xmm10 + pxor xmm7,xmm2 + movdqa xmm2,xmm10 + psrld xmm1,2 + paddd xmm5,xmm7 + pxor xmm0,xmm3 + movdqa xmm3,xmm11 + movdqa xmm7,xmm10 + pslld xmm2,10 + pxor xmm3,xmm10 + + + psrld xmm7,13 + pxor xmm1,xmm2 + paddd xmm5,xmm0 + pslld xmm2,19-10 + pand xmm4,xmm3 + pxor xmm1,xmm7 + + + psrld xmm7,22-13 + pxor xmm1,xmm2 + movdqa xmm9,xmm11 + pslld xmm2,30-19 + pxor xmm7,xmm1 + pxor xmm9,xmm4 + paddd xmm13,xmm5 + pxor xmm7,xmm2 + + paddd xmm9,xmm5 + paddd xmm9,xmm7 + movdqa xmm5,XMMWORD PTR[((128-128))+rax] + paddd xmm6,XMMWORD PTR[((0-128))+rax] + + movdqa xmm7,xmm5 + movdqa xmm1,xmm5 + psrld xmm7,3 + movdqa xmm2,xmm5 + + psrld xmm1,7 + movdqa xmm0,XMMWORD PTR[((80-128))+rax] + pslld xmm2,14 + pxor xmm7,xmm1 + psrld xmm1,18-7 + movdqa xmm4,xmm0 + pxor xmm7,xmm2 + pslld xmm2,25-14 + pxor xmm7,xmm1 + psrld xmm0,10 + movdqa xmm1,xmm4 + + psrld xmm4,17 + pxor xmm7,xmm2 + pslld xmm1,13 + paddd xmm6,xmm7 + pxor xmm0,xmm4 + psrld xmm4,19-17 + pxor xmm0,xmm1 + pslld xmm1,15-13 + pxor xmm0,xmm4 + pxor xmm0,xmm1 + paddd xmm6,xmm0 + movdqa xmm7,xmm13 + + movdqa xmm2,xmm13 + + psrld xmm7,6 + movdqa xmm1,xmm13 + pslld xmm2,7 + movdqa XMMWORD PTR[(112-128)+rax],xmm6 + paddd xmm6,xmm8 + + psrld xmm1,11 + pxor xmm7,xmm2 + pslld xmm2,21-7 + paddd xmm6,XMMWORD PTR[96+rbp] + pxor xmm7,xmm1 + + psrld xmm1,25-11 + movdqa xmm0,xmm13 + + pxor xmm7,xmm2 + movdqa xmm4,xmm13 + pslld xmm2,26-21 + pandn xmm0,xmm15 + pand xmm4,xmm14 + pxor xmm7,xmm1 + + + movdqa xmm1,xmm9 + pxor xmm7,xmm2 + movdqa xmm2,xmm9 + psrld xmm1,2 + paddd xmm6,xmm7 + pxor xmm0,xmm4 + movdqa xmm4,xmm10 + movdqa xmm7,xmm9 + pslld xmm2,10 + pxor xmm4,xmm9 + + + psrld xmm7,13 + pxor xmm1,xmm2 + paddd xmm6,xmm0 + pslld xmm2,19-10 + pand xmm3,xmm4 + pxor xmm1,xmm7 + + + psrld xmm7,22-13 + pxor xmm1,xmm2 + movdqa xmm8,xmm10 + pslld xmm2,30-19 + pxor xmm7,xmm1 + pxor xmm8,xmm3 + paddd xmm12,xmm6 + pxor xmm7,xmm2 + + paddd xmm8,xmm6 + paddd xmm8,xmm7 + lea rbp,QWORD PTR[256+rbp] + movdqa xmm6,XMMWORD PTR[((144-128))+rax] + paddd xmm5,XMMWORD PTR[((16-128))+rax] + + movdqa xmm7,xmm6 + movdqa xmm1,xmm6 + psrld xmm7,3 + movdqa xmm2,xmm6 + + psrld xmm1,7 + movdqa xmm0,XMMWORD PTR[((96-128))+rax] + pslld xmm2,14 + pxor xmm7,xmm1 + psrld xmm1,18-7 + movdqa xmm3,xmm0 + pxor xmm7,xmm2 + pslld xmm2,25-14 + pxor xmm7,xmm1 + psrld xmm0,10 + movdqa xmm1,xmm3 + + psrld xmm3,17 + pxor xmm7,xmm2 + pslld xmm1,13 + paddd xmm5,xmm7 + pxor xmm0,xmm3 + psrld xmm3,19-17 + pxor xmm0,xmm1 + pslld xmm1,15-13 + pxor xmm0,xmm3 + pxor xmm0,xmm1 + paddd xmm5,xmm0 + movdqa xmm7,xmm12 + + movdqa xmm2,xmm12 + + psrld xmm7,6 + movdqa xmm1,xmm12 + pslld xmm2,7 + movdqa XMMWORD PTR[(128-128)+rax],xmm5 + paddd xmm5,xmm15 + + psrld xmm1,11 + pxor xmm7,xmm2 + pslld xmm2,21-7 + paddd xmm5,XMMWORD PTR[((-128))+rbp] + pxor xmm7,xmm1 + + psrld xmm1,25-11 + movdqa xmm0,xmm12 + + pxor xmm7,xmm2 + movdqa xmm3,xmm12 + pslld xmm2,26-21 + pandn xmm0,xmm14 + pand xmm3,xmm13 + pxor xmm7,xmm1 + + + movdqa xmm1,xmm8 + pxor xmm7,xmm2 + movdqa xmm2,xmm8 + psrld xmm1,2 + paddd xmm5,xmm7 + pxor xmm0,xmm3 + movdqa xmm3,xmm9 + movdqa xmm7,xmm8 + pslld xmm2,10 + pxor xmm3,xmm8 + + + psrld xmm7,13 + pxor xmm1,xmm2 + paddd xmm5,xmm0 + pslld xmm2,19-10 + pand xmm4,xmm3 + pxor xmm1,xmm7 + + + psrld xmm7,22-13 + pxor xmm1,xmm2 + movdqa xmm15,xmm9 + pslld xmm2,30-19 + pxor xmm7,xmm1 + pxor xmm15,xmm4 + paddd xmm11,xmm5 + pxor xmm7,xmm2 + + paddd xmm15,xmm5 + paddd xmm15,xmm7 + movdqa xmm5,XMMWORD PTR[((160-128))+rax] + paddd xmm6,XMMWORD PTR[((32-128))+rax] + + movdqa xmm7,xmm5 + movdqa xmm1,xmm5 + psrld xmm7,3 + movdqa xmm2,xmm5 + + psrld xmm1,7 + movdqa xmm0,XMMWORD PTR[((112-128))+rax] + pslld xmm2,14 + pxor xmm7,xmm1 + psrld xmm1,18-7 + movdqa xmm4,xmm0 + pxor xmm7,xmm2 + pslld xmm2,25-14 + pxor xmm7,xmm1 + psrld xmm0,10 + movdqa xmm1,xmm4 + + psrld xmm4,17 + pxor xmm7,xmm2 + pslld xmm1,13 + paddd xmm6,xmm7 + pxor xmm0,xmm4 + psrld xmm4,19-17 + pxor xmm0,xmm1 + pslld xmm1,15-13 + pxor xmm0,xmm4 + pxor xmm0,xmm1 + paddd xmm6,xmm0 + movdqa xmm7,xmm11 + + movdqa xmm2,xmm11 + + psrld xmm7,6 + movdqa xmm1,xmm11 + pslld xmm2,7 + movdqa XMMWORD PTR[(144-128)+rax],xmm6 + paddd xmm6,xmm14 + + psrld xmm1,11 + pxor xmm7,xmm2 + pslld xmm2,21-7 + paddd xmm6,XMMWORD PTR[((-96))+rbp] + pxor xmm7,xmm1 + + psrld xmm1,25-11 + movdqa xmm0,xmm11 + + pxor xmm7,xmm2 + movdqa xmm4,xmm11 + pslld xmm2,26-21 + pandn xmm0,xmm13 + pand xmm4,xmm12 + pxor xmm7,xmm1 + + + movdqa xmm1,xmm15 + pxor xmm7,xmm2 + movdqa xmm2,xmm15 + psrld xmm1,2 + paddd xmm6,xmm7 + pxor xmm0,xmm4 + movdqa xmm4,xmm8 + movdqa xmm7,xmm15 + pslld xmm2,10 + pxor xmm4,xmm15 + + + psrld xmm7,13 + pxor xmm1,xmm2 + paddd xmm6,xmm0 + pslld xmm2,19-10 + pand xmm3,xmm4 + pxor xmm1,xmm7 + + + psrld xmm7,22-13 + pxor xmm1,xmm2 + movdqa xmm14,xmm8 + pslld xmm2,30-19 + pxor xmm7,xmm1 + pxor xmm14,xmm3 + paddd xmm10,xmm6 + pxor xmm7,xmm2 + + paddd xmm14,xmm6 + paddd xmm14,xmm7 + movdqa xmm6,XMMWORD PTR[((176-128))+rax] + paddd xmm5,XMMWORD PTR[((48-128))+rax] + + movdqa xmm7,xmm6 + movdqa xmm1,xmm6 + psrld xmm7,3 + movdqa xmm2,xmm6 + + psrld xmm1,7 + movdqa xmm0,XMMWORD PTR[((128-128))+rax] + pslld xmm2,14 + pxor xmm7,xmm1 + psrld xmm1,18-7 + movdqa xmm3,xmm0 + pxor xmm7,xmm2 + pslld xmm2,25-14 + pxor xmm7,xmm1 + psrld xmm0,10 + movdqa xmm1,xmm3 + + psrld xmm3,17 + pxor xmm7,xmm2 + pslld xmm1,13 + paddd xmm5,xmm7 + pxor xmm0,xmm3 + psrld xmm3,19-17 + pxor xmm0,xmm1 + pslld xmm1,15-13 + pxor xmm0,xmm3 + pxor xmm0,xmm1 + paddd xmm5,xmm0 + movdqa xmm7,xmm10 + + movdqa xmm2,xmm10 + + psrld xmm7,6 + movdqa xmm1,xmm10 + pslld xmm2,7 + movdqa XMMWORD PTR[(160-128)+rax],xmm5 + paddd xmm5,xmm13 + + psrld xmm1,11 + pxor xmm7,xmm2 + pslld xmm2,21-7 + paddd xmm5,XMMWORD PTR[((-64))+rbp] + pxor xmm7,xmm1 + + psrld xmm1,25-11 + movdqa xmm0,xmm10 + + pxor xmm7,xmm2 + movdqa xmm3,xmm10 + pslld xmm2,26-21 + pandn xmm0,xmm12 + pand xmm3,xmm11 + pxor xmm7,xmm1 + + + movdqa xmm1,xmm14 + pxor xmm7,xmm2 + movdqa xmm2,xmm14 + psrld xmm1,2 + paddd xmm5,xmm7 + pxor xmm0,xmm3 + movdqa xmm3,xmm15 + movdqa xmm7,xmm14 + pslld xmm2,10 + pxor xmm3,xmm14 + + + psrld xmm7,13 + pxor xmm1,xmm2 + paddd xmm5,xmm0 + pslld xmm2,19-10 + pand xmm4,xmm3 + pxor xmm1,xmm7 + + + psrld xmm7,22-13 + pxor xmm1,xmm2 + movdqa xmm13,xmm15 + pslld xmm2,30-19 + pxor xmm7,xmm1 + pxor xmm13,xmm4 + paddd xmm9,xmm5 + pxor xmm7,xmm2 + + paddd xmm13,xmm5 + paddd xmm13,xmm7 + movdqa xmm5,XMMWORD PTR[((192-128))+rax] + paddd xmm6,XMMWORD PTR[((64-128))+rax] + + movdqa xmm7,xmm5 + movdqa xmm1,xmm5 + psrld xmm7,3 + movdqa xmm2,xmm5 + + psrld xmm1,7 + movdqa xmm0,XMMWORD PTR[((144-128))+rax] + pslld xmm2,14 + pxor xmm7,xmm1 + psrld xmm1,18-7 + movdqa xmm4,xmm0 + pxor xmm7,xmm2 + pslld xmm2,25-14 + pxor xmm7,xmm1 + psrld xmm0,10 + movdqa xmm1,xmm4 + + psrld xmm4,17 + pxor xmm7,xmm2 + pslld xmm1,13 + paddd xmm6,xmm7 + pxor xmm0,xmm4 + psrld xmm4,19-17 + pxor xmm0,xmm1 + pslld xmm1,15-13 + pxor xmm0,xmm4 + pxor xmm0,xmm1 + paddd xmm6,xmm0 + movdqa xmm7,xmm9 + + movdqa xmm2,xmm9 + + psrld xmm7,6 + movdqa xmm1,xmm9 + pslld xmm2,7 + movdqa XMMWORD PTR[(176-128)+rax],xmm6 + paddd xmm6,xmm12 + + psrld xmm1,11 + pxor xmm7,xmm2 + pslld xmm2,21-7 + paddd xmm6,XMMWORD PTR[((-32))+rbp] + pxor xmm7,xmm1 + + psrld xmm1,25-11 + movdqa xmm0,xmm9 + + pxor xmm7,xmm2 + movdqa xmm4,xmm9 + pslld xmm2,26-21 + pandn xmm0,xmm11 + pand xmm4,xmm10 + pxor xmm7,xmm1 + + + movdqa xmm1,xmm13 + pxor xmm7,xmm2 + movdqa xmm2,xmm13 + psrld xmm1,2 + paddd xmm6,xmm7 + pxor xmm0,xmm4 + movdqa xmm4,xmm14 + movdqa xmm7,xmm13 + pslld xmm2,10 + pxor xmm4,xmm13 + + + psrld xmm7,13 + pxor xmm1,xmm2 + paddd xmm6,xmm0 + pslld xmm2,19-10 + pand xmm3,xmm4 + pxor xmm1,xmm7 + + + psrld xmm7,22-13 + pxor xmm1,xmm2 + movdqa xmm12,xmm14 + pslld xmm2,30-19 + pxor xmm7,xmm1 + pxor xmm12,xmm3 + paddd xmm8,xmm6 + pxor xmm7,xmm2 + + paddd xmm12,xmm6 + paddd xmm12,xmm7 + movdqa xmm6,XMMWORD PTR[((208-128))+rax] + paddd xmm5,XMMWORD PTR[((80-128))+rax] + + movdqa xmm7,xmm6 + movdqa xmm1,xmm6 + psrld xmm7,3 + movdqa xmm2,xmm6 + + psrld xmm1,7 + movdqa xmm0,XMMWORD PTR[((160-128))+rax] + pslld xmm2,14 + pxor xmm7,xmm1 + psrld xmm1,18-7 + movdqa xmm3,xmm0 + pxor xmm7,xmm2 + pslld xmm2,25-14 + pxor xmm7,xmm1 + psrld xmm0,10 + movdqa xmm1,xmm3 + + psrld xmm3,17 + pxor xmm7,xmm2 + pslld xmm1,13 + paddd xmm5,xmm7 + pxor xmm0,xmm3 + psrld xmm3,19-17 + pxor xmm0,xmm1 + pslld xmm1,15-13 + pxor xmm0,xmm3 + pxor xmm0,xmm1 + paddd xmm5,xmm0 + movdqa xmm7,xmm8 + + movdqa xmm2,xmm8 + + psrld xmm7,6 + movdqa xmm1,xmm8 + pslld xmm2,7 + movdqa XMMWORD PTR[(192-128)+rax],xmm5 + paddd xmm5,xmm11 + + psrld xmm1,11 + pxor xmm7,xmm2 + pslld xmm2,21-7 + paddd xmm5,XMMWORD PTR[rbp] + pxor xmm7,xmm1 + + psrld xmm1,25-11 + movdqa xmm0,xmm8 + + pxor xmm7,xmm2 + movdqa xmm3,xmm8 + pslld xmm2,26-21 + pandn xmm0,xmm10 + pand xmm3,xmm9 + pxor xmm7,xmm1 + + + movdqa xmm1,xmm12 + pxor xmm7,xmm2 + movdqa xmm2,xmm12 + psrld xmm1,2 + paddd xmm5,xmm7 + pxor xmm0,xmm3 + movdqa xmm3,xmm13 + movdqa xmm7,xmm12 + pslld xmm2,10 + pxor xmm3,xmm12 + + + psrld xmm7,13 + pxor xmm1,xmm2 + paddd xmm5,xmm0 + pslld xmm2,19-10 + pand xmm4,xmm3 + pxor xmm1,xmm7 + + + psrld xmm7,22-13 + pxor xmm1,xmm2 + movdqa xmm11,xmm13 + pslld xmm2,30-19 + pxor xmm7,xmm1 + pxor xmm11,xmm4 + paddd xmm15,xmm5 + pxor xmm7,xmm2 + + paddd xmm11,xmm5 + paddd xmm11,xmm7 + movdqa xmm5,XMMWORD PTR[((224-128))+rax] + paddd xmm6,XMMWORD PTR[((96-128))+rax] + + movdqa xmm7,xmm5 + movdqa xmm1,xmm5 + psrld xmm7,3 + movdqa xmm2,xmm5 + + psrld xmm1,7 + movdqa xmm0,XMMWORD PTR[((176-128))+rax] + pslld xmm2,14 + pxor xmm7,xmm1 + psrld xmm1,18-7 + movdqa xmm4,xmm0 + pxor xmm7,xmm2 + pslld xmm2,25-14 + pxor xmm7,xmm1 + psrld xmm0,10 + movdqa xmm1,xmm4 + + psrld xmm4,17 + pxor xmm7,xmm2 + pslld xmm1,13 + paddd xmm6,xmm7 + pxor xmm0,xmm4 + psrld xmm4,19-17 + pxor xmm0,xmm1 + pslld xmm1,15-13 + pxor xmm0,xmm4 + pxor xmm0,xmm1 + paddd xmm6,xmm0 + movdqa xmm7,xmm15 + + movdqa xmm2,xmm15 + + psrld xmm7,6 + movdqa xmm1,xmm15 + pslld xmm2,7 + movdqa XMMWORD PTR[(208-128)+rax],xmm6 + paddd xmm6,xmm10 + + psrld xmm1,11 + pxor xmm7,xmm2 + pslld xmm2,21-7 + paddd xmm6,XMMWORD PTR[32+rbp] + pxor xmm7,xmm1 + + psrld xmm1,25-11 + movdqa xmm0,xmm15 + + pxor xmm7,xmm2 + movdqa xmm4,xmm15 + pslld xmm2,26-21 + pandn xmm0,xmm9 + pand xmm4,xmm8 + pxor xmm7,xmm1 + + + movdqa xmm1,xmm11 + pxor xmm7,xmm2 + movdqa xmm2,xmm11 + psrld xmm1,2 + paddd xmm6,xmm7 + pxor xmm0,xmm4 + movdqa xmm4,xmm12 + movdqa xmm7,xmm11 + pslld xmm2,10 + pxor xmm4,xmm11 + + + psrld xmm7,13 + pxor xmm1,xmm2 + paddd xmm6,xmm0 + pslld xmm2,19-10 + pand xmm3,xmm4 + pxor xmm1,xmm7 + + + psrld xmm7,22-13 + pxor xmm1,xmm2 + movdqa xmm10,xmm12 + pslld xmm2,30-19 + pxor xmm7,xmm1 + pxor xmm10,xmm3 + paddd xmm14,xmm6 + pxor xmm7,xmm2 + + paddd xmm10,xmm6 + paddd xmm10,xmm7 + movdqa xmm6,XMMWORD PTR[((240-128))+rax] + paddd xmm5,XMMWORD PTR[((112-128))+rax] + + movdqa xmm7,xmm6 + movdqa xmm1,xmm6 + psrld xmm7,3 + movdqa xmm2,xmm6 + + psrld xmm1,7 + movdqa xmm0,XMMWORD PTR[((192-128))+rax] + pslld xmm2,14 + pxor xmm7,xmm1 + psrld xmm1,18-7 + movdqa xmm3,xmm0 + pxor xmm7,xmm2 + pslld xmm2,25-14 + pxor xmm7,xmm1 + psrld xmm0,10 + movdqa xmm1,xmm3 + + psrld xmm3,17 + pxor xmm7,xmm2 + pslld xmm1,13 + paddd xmm5,xmm7 + pxor xmm0,xmm3 + psrld xmm3,19-17 + pxor xmm0,xmm1 + pslld xmm1,15-13 + pxor xmm0,xmm3 + pxor xmm0,xmm1 + paddd xmm5,xmm0 + movdqa xmm7,xmm14 + + movdqa xmm2,xmm14 + + psrld xmm7,6 + movdqa xmm1,xmm14 + pslld xmm2,7 + movdqa XMMWORD PTR[(224-128)+rax],xmm5 + paddd xmm5,xmm9 + + psrld xmm1,11 + pxor xmm7,xmm2 + pslld xmm2,21-7 + paddd xmm5,XMMWORD PTR[64+rbp] + pxor xmm7,xmm1 + + psrld xmm1,25-11 + movdqa xmm0,xmm14 + + pxor xmm7,xmm2 + movdqa xmm3,xmm14 + pslld xmm2,26-21 + pandn xmm0,xmm8 + pand xmm3,xmm15 + pxor xmm7,xmm1 + + + movdqa xmm1,xmm10 + pxor xmm7,xmm2 + movdqa xmm2,xmm10 + psrld xmm1,2 + paddd xmm5,xmm7 + pxor xmm0,xmm3 + movdqa xmm3,xmm11 + movdqa xmm7,xmm10 + pslld xmm2,10 + pxor xmm3,xmm10 + + + psrld xmm7,13 + pxor xmm1,xmm2 + paddd xmm5,xmm0 + pslld xmm2,19-10 + pand xmm4,xmm3 + pxor xmm1,xmm7 + + + psrld xmm7,22-13 + pxor xmm1,xmm2 + movdqa xmm9,xmm11 + pslld xmm2,30-19 + pxor xmm7,xmm1 + pxor xmm9,xmm4 + paddd xmm13,xmm5 + pxor xmm7,xmm2 + + paddd xmm9,xmm5 + paddd xmm9,xmm7 + movdqa xmm5,XMMWORD PTR[((0-128))+rax] + paddd xmm6,XMMWORD PTR[((128-128))+rax] + + movdqa xmm7,xmm5 + movdqa xmm1,xmm5 + psrld xmm7,3 + movdqa xmm2,xmm5 + + psrld xmm1,7 + movdqa xmm0,XMMWORD PTR[((208-128))+rax] + pslld xmm2,14 + pxor xmm7,xmm1 + psrld xmm1,18-7 + movdqa xmm4,xmm0 + pxor xmm7,xmm2 + pslld xmm2,25-14 + pxor xmm7,xmm1 + psrld xmm0,10 + movdqa xmm1,xmm4 + + psrld xmm4,17 + pxor xmm7,xmm2 + pslld xmm1,13 + paddd xmm6,xmm7 + pxor xmm0,xmm4 + psrld xmm4,19-17 + pxor xmm0,xmm1 + pslld xmm1,15-13 + pxor xmm0,xmm4 + pxor xmm0,xmm1 + paddd xmm6,xmm0 + movdqa xmm7,xmm13 + + movdqa xmm2,xmm13 + + psrld xmm7,6 + movdqa xmm1,xmm13 + pslld xmm2,7 + movdqa XMMWORD PTR[(240-128)+rax],xmm6 + paddd xmm6,xmm8 + + psrld xmm1,11 + pxor xmm7,xmm2 + pslld xmm2,21-7 + paddd xmm6,XMMWORD PTR[96+rbp] + pxor xmm7,xmm1 + + psrld xmm1,25-11 + movdqa xmm0,xmm13 + + pxor xmm7,xmm2 + movdqa xmm4,xmm13 + pslld xmm2,26-21 + pandn xmm0,xmm15 + pand xmm4,xmm14 + pxor xmm7,xmm1 + + + movdqa xmm1,xmm9 + pxor xmm7,xmm2 + movdqa xmm2,xmm9 + psrld xmm1,2 + paddd xmm6,xmm7 + pxor xmm0,xmm4 + movdqa xmm4,xmm10 + movdqa xmm7,xmm9 + pslld xmm2,10 + pxor xmm4,xmm9 + + + psrld xmm7,13 + pxor xmm1,xmm2 + paddd xmm6,xmm0 + pslld xmm2,19-10 + pand xmm3,xmm4 + pxor xmm1,xmm7 + + + psrld xmm7,22-13 + pxor xmm1,xmm2 + movdqa xmm8,xmm10 + pslld xmm2,30-19 + pxor xmm7,xmm1 + pxor xmm8,xmm3 + paddd xmm12,xmm6 + pxor xmm7,xmm2 + + paddd xmm8,xmm6 + paddd xmm8,xmm7 + lea rbp,QWORD PTR[256+rbp] + dec ecx + jnz $L$oop_16_xx + + mov ecx,1 + lea rbp,QWORD PTR[((K256+128))] + + movdqa xmm7,XMMWORD PTR[rbx] + cmp ecx,DWORD PTR[rbx] + pxor xmm0,xmm0 + cmovge r8,rbp + cmp ecx,DWORD PTR[4+rbx] + movdqa xmm6,xmm7 + cmovge r9,rbp + cmp ecx,DWORD PTR[8+rbx] + pcmpgtd xmm6,xmm0 + cmovge r10,rbp + cmp ecx,DWORD PTR[12+rbx] + paddd xmm7,xmm6 + cmovge r11,rbp + + movdqu xmm0,XMMWORD PTR[((0-128))+rdi] + pand xmm8,xmm6 + movdqu xmm1,XMMWORD PTR[((32-128))+rdi] + pand xmm9,xmm6 + movdqu xmm2,XMMWORD PTR[((64-128))+rdi] + pand xmm10,xmm6 + movdqu xmm5,XMMWORD PTR[((96-128))+rdi] + pand xmm11,xmm6 + paddd xmm8,xmm0 + movdqu xmm0,XMMWORD PTR[((128-128))+rdi] + pand xmm12,xmm6 + paddd xmm9,xmm1 + movdqu xmm1,XMMWORD PTR[((160-128))+rdi] + pand xmm13,xmm6 + paddd xmm10,xmm2 + movdqu xmm2,XMMWORD PTR[((192-128))+rdi] + pand xmm14,xmm6 + paddd xmm11,xmm5 + movdqu xmm5,XMMWORD PTR[((224-128))+rdi] + pand xmm15,xmm6 + paddd xmm12,xmm0 + paddd xmm13,xmm1 + movdqu XMMWORD PTR[(0-128)+rdi],xmm8 + paddd xmm14,xmm2 + movdqu XMMWORD PTR[(32-128)+rdi],xmm9 + paddd xmm15,xmm5 + movdqu XMMWORD PTR[(64-128)+rdi],xmm10 + movdqu XMMWORD PTR[(96-128)+rdi],xmm11 + movdqu XMMWORD PTR[(128-128)+rdi],xmm12 + movdqu XMMWORD PTR[(160-128)+rdi],xmm13 + movdqu XMMWORD PTR[(192-128)+rdi],xmm14 + movdqu XMMWORD PTR[(224-128)+rdi],xmm15 + + movdqa XMMWORD PTR[rbx],xmm7 + movdqa xmm6,XMMWORD PTR[$L$pbswap] + dec edx + jnz $L$oop + + mov edx,DWORD PTR[280+rsp] + lea rdi,QWORD PTR[16+rdi] + lea rsi,QWORD PTR[64+rsi] + dec edx + jnz $L$oop_grande + +$L$done:: + mov rax,QWORD PTR[272+rsp] + movaps xmm6,XMMWORD PTR[((-184))+rax] + movaps xmm7,XMMWORD PTR[((-168))+rax] + movaps xmm8,XMMWORD PTR[((-152))+rax] + movaps xmm9,XMMWORD PTR[((-136))+rax] + movaps xmm10,XMMWORD PTR[((-120))+rax] + movaps xmm11,XMMWORD PTR[((-104))+rax] + movaps xmm12,XMMWORD PTR[((-88))+rax] + movaps xmm13,XMMWORD PTR[((-72))+rax] + movaps xmm14,XMMWORD PTR[((-56))+rax] + movaps xmm15,XMMWORD PTR[((-40))+rax] + mov rbp,QWORD PTR[((-16))+rax] + mov rbx,QWORD PTR[((-8))+rax] + lea rsp,QWORD PTR[rax] +$L$epilogue:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_sha256_multi_block:: +sha256_multi_block ENDP + +ALIGN 32 +sha256_multi_block_shaext PROC PRIVATE + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_sha256_multi_block_shaext:: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + + +_shaext_shortcut:: + mov rax,rsp + push rbx + push rbp + lea rsp,QWORD PTR[((-168))+rsp] + movaps XMMWORD PTR[rsp],xmm6 + movaps XMMWORD PTR[16+rsp],xmm7 + movaps XMMWORD PTR[32+rsp],xmm8 + movaps XMMWORD PTR[48+rsp],xmm9 + movaps XMMWORD PTR[(-120)+rax],xmm10 + movaps XMMWORD PTR[(-104)+rax],xmm11 + movaps XMMWORD PTR[(-88)+rax],xmm12 + movaps XMMWORD PTR[(-72)+rax],xmm13 + movaps XMMWORD PTR[(-56)+rax],xmm14 + movaps XMMWORD PTR[(-40)+rax],xmm15 + sub rsp,288 + shl edx,1 + and rsp,-256 + lea rdi,QWORD PTR[128+rdi] + mov QWORD PTR[272+rsp],rax +$L$body_shaext:: + lea rbx,QWORD PTR[256+rsp] + lea rbp,QWORD PTR[((K256_shaext+128))] + +$L$oop_grande_shaext:: + mov DWORD PTR[280+rsp],edx + xor edx,edx + mov r8,QWORD PTR[rsi] + mov ecx,DWORD PTR[8+rsi] + cmp ecx,edx + cmovg edx,ecx + test ecx,ecx + mov DWORD PTR[rbx],ecx + cmovle r8,rsp + mov r9,QWORD PTR[16+rsi] + mov ecx,DWORD PTR[24+rsi] + cmp ecx,edx + cmovg edx,ecx + test ecx,ecx + mov DWORD PTR[4+rbx],ecx + cmovle r9,rsp + test edx,edx + jz $L$done_shaext + + movq xmm12,QWORD PTR[((0-128))+rdi] + movq xmm4,QWORD PTR[((32-128))+rdi] + movq xmm13,QWORD PTR[((64-128))+rdi] + movq xmm5,QWORD PTR[((96-128))+rdi] + movq xmm8,QWORD PTR[((128-128))+rdi] + movq xmm9,QWORD PTR[((160-128))+rdi] + movq xmm10,QWORD PTR[((192-128))+rdi] + movq xmm11,QWORD PTR[((224-128))+rdi] + + punpckldq xmm12,xmm4 + punpckldq xmm13,xmm5 + punpckldq xmm8,xmm9 + punpckldq xmm10,xmm11 + movdqa xmm3,XMMWORD PTR[((K256_shaext-16))] + + movdqa xmm14,xmm12 + movdqa xmm15,xmm13 + punpcklqdq xmm12,xmm8 + punpcklqdq xmm13,xmm10 + punpckhqdq xmm14,xmm8 + punpckhqdq xmm15,xmm10 + + pshufd xmm12,xmm12,27 + pshufd xmm13,xmm13,27 + pshufd xmm14,xmm14,27 + pshufd xmm15,xmm15,27 + jmp $L$oop_shaext + +ALIGN 32 +$L$oop_shaext:: + movdqu xmm4,XMMWORD PTR[r8] + movdqu xmm8,XMMWORD PTR[r9] + movdqu xmm5,XMMWORD PTR[16+r8] + movdqu xmm9,XMMWORD PTR[16+r9] + movdqu xmm6,XMMWORD PTR[32+r8] +DB 102,15,56,0,227 + movdqu xmm10,XMMWORD PTR[32+r9] +DB 102,68,15,56,0,195 + movdqu xmm7,XMMWORD PTR[48+r8] + lea r8,QWORD PTR[64+r8] + movdqu xmm11,XMMWORD PTR[48+r9] + lea r9,QWORD PTR[64+r9] + + movdqa xmm0,XMMWORD PTR[((0-128))+rbp] +DB 102,15,56,0,235 + paddd xmm0,xmm4 + pxor xmm4,xmm12 + movdqa xmm1,xmm0 + movdqa xmm2,XMMWORD PTR[((0-128))+rbp] +DB 102,68,15,56,0,203 + paddd xmm2,xmm8 + movdqa XMMWORD PTR[80+rsp],xmm13 +DB 69,15,56,203,236 + pxor xmm8,xmm14 + movdqa xmm0,xmm2 + movdqa XMMWORD PTR[112+rsp],xmm15 +DB 69,15,56,203,254 + pshufd xmm0,xmm1,00eh + pxor xmm4,xmm12 + movdqa XMMWORD PTR[64+rsp],xmm12 +DB 69,15,56,203,229 + pshufd xmm0,xmm2,00eh + pxor xmm8,xmm14 + movdqa XMMWORD PTR[96+rsp],xmm14 + movdqa xmm1,XMMWORD PTR[((16-128))+rbp] + paddd xmm1,xmm5 +DB 102,15,56,0,243 +DB 69,15,56,203,247 + + movdqa xmm0,xmm1 + movdqa xmm2,XMMWORD PTR[((16-128))+rbp] + paddd xmm2,xmm9 +DB 69,15,56,203,236 + movdqa xmm0,xmm2 + prefetcht0 [127+r8] +DB 102,15,56,0,251 +DB 102,68,15,56,0,211 + prefetcht0 [127+r9] +DB 69,15,56,203,254 + pshufd xmm0,xmm1,00eh +DB 102,68,15,56,0,219 +DB 15,56,204,229 +DB 69,15,56,203,229 + pshufd xmm0,xmm2,00eh + movdqa xmm1,XMMWORD PTR[((32-128))+rbp] + paddd xmm1,xmm6 +DB 69,15,56,203,247 + + movdqa xmm0,xmm1 + movdqa xmm2,XMMWORD PTR[((32-128))+rbp] + paddd xmm2,xmm10 +DB 69,15,56,203,236 +DB 69,15,56,204,193 + movdqa xmm0,xmm2 + movdqa xmm3,xmm7 +DB 69,15,56,203,254 + pshufd xmm0,xmm1,00eh +DB 102,15,58,15,222,4 + paddd xmm4,xmm3 + movdqa xmm3,xmm11 +DB 102,65,15,58,15,218,4 +DB 15,56,204,238 +DB 69,15,56,203,229 + pshufd xmm0,xmm2,00eh + movdqa xmm1,XMMWORD PTR[((48-128))+rbp] + paddd xmm1,xmm7 +DB 69,15,56,203,247 +DB 69,15,56,204,202 + + movdqa xmm0,xmm1 + movdqa xmm2,XMMWORD PTR[((48-128))+rbp] + paddd xmm8,xmm3 + paddd xmm2,xmm11 +DB 15,56,205,231 +DB 69,15,56,203,236 + movdqa xmm0,xmm2 + movdqa xmm3,xmm4 +DB 102,15,58,15,223,4 +DB 69,15,56,203,254 +DB 69,15,56,205,195 + pshufd xmm0,xmm1,00eh + paddd xmm5,xmm3 + movdqa xmm3,xmm8 +DB 102,65,15,58,15,219,4 +DB 15,56,204,247 +DB 69,15,56,203,229 + pshufd xmm0,xmm2,00eh + movdqa xmm1,XMMWORD PTR[((64-128))+rbp] + paddd xmm1,xmm4 +DB 69,15,56,203,247 +DB 69,15,56,204,211 + movdqa xmm0,xmm1 + movdqa xmm2,XMMWORD PTR[((64-128))+rbp] + paddd xmm9,xmm3 + paddd xmm2,xmm8 +DB 15,56,205,236 +DB 69,15,56,203,236 + movdqa xmm0,xmm2 + movdqa xmm3,xmm5 +DB 102,15,58,15,220,4 +DB 69,15,56,203,254 +DB 69,15,56,205,200 + pshufd xmm0,xmm1,00eh + paddd xmm6,xmm3 + movdqa xmm3,xmm9 +DB 102,65,15,58,15,216,4 +DB 15,56,204,252 +DB 69,15,56,203,229 + pshufd xmm0,xmm2,00eh + movdqa xmm1,XMMWORD PTR[((80-128))+rbp] + paddd xmm1,xmm5 +DB 69,15,56,203,247 +DB 69,15,56,204,216 + movdqa xmm0,xmm1 + movdqa xmm2,XMMWORD PTR[((80-128))+rbp] + paddd xmm10,xmm3 + paddd xmm2,xmm9 +DB 15,56,205,245 +DB 69,15,56,203,236 + movdqa xmm0,xmm2 + movdqa xmm3,xmm6 +DB 102,15,58,15,221,4 +DB 69,15,56,203,254 +DB 69,15,56,205,209 + pshufd xmm0,xmm1,00eh + paddd xmm7,xmm3 + movdqa xmm3,xmm10 +DB 102,65,15,58,15,217,4 +DB 15,56,204,229 +DB 69,15,56,203,229 + pshufd xmm0,xmm2,00eh + movdqa xmm1,XMMWORD PTR[((96-128))+rbp] + paddd xmm1,xmm6 +DB 69,15,56,203,247 +DB 69,15,56,204,193 + movdqa xmm0,xmm1 + movdqa xmm2,XMMWORD PTR[((96-128))+rbp] + paddd xmm11,xmm3 + paddd xmm2,xmm10 +DB 15,56,205,254 +DB 69,15,56,203,236 + movdqa xmm0,xmm2 + movdqa xmm3,xmm7 +DB 102,15,58,15,222,4 +DB 69,15,56,203,254 +DB 69,15,56,205,218 + pshufd xmm0,xmm1,00eh + paddd xmm4,xmm3 + movdqa xmm3,xmm11 +DB 102,65,15,58,15,218,4 +DB 15,56,204,238 +DB 69,15,56,203,229 + pshufd xmm0,xmm2,00eh + movdqa xmm1,XMMWORD PTR[((112-128))+rbp] + paddd xmm1,xmm7 +DB 69,15,56,203,247 +DB 69,15,56,204,202 + movdqa xmm0,xmm1 + movdqa xmm2,XMMWORD PTR[((112-128))+rbp] + paddd xmm8,xmm3 + paddd xmm2,xmm11 +DB 15,56,205,231 +DB 69,15,56,203,236 + movdqa xmm0,xmm2 + movdqa xmm3,xmm4 +DB 102,15,58,15,223,4 +DB 69,15,56,203,254 +DB 69,15,56,205,195 + pshufd xmm0,xmm1,00eh + paddd xmm5,xmm3 + movdqa xmm3,xmm8 +DB 102,65,15,58,15,219,4 +DB 15,56,204,247 +DB 69,15,56,203,229 + pshufd xmm0,xmm2,00eh + movdqa xmm1,XMMWORD PTR[((128-128))+rbp] + paddd xmm1,xmm4 +DB 69,15,56,203,247 +DB 69,15,56,204,211 + movdqa xmm0,xmm1 + movdqa xmm2,XMMWORD PTR[((128-128))+rbp] + paddd xmm9,xmm3 + paddd xmm2,xmm8 +DB 15,56,205,236 +DB 69,15,56,203,236 + movdqa xmm0,xmm2 + movdqa xmm3,xmm5 +DB 102,15,58,15,220,4 +DB 69,15,56,203,254 +DB 69,15,56,205,200 + pshufd xmm0,xmm1,00eh + paddd xmm6,xmm3 + movdqa xmm3,xmm9 +DB 102,65,15,58,15,216,4 +DB 15,56,204,252 +DB 69,15,56,203,229 + pshufd xmm0,xmm2,00eh + movdqa xmm1,XMMWORD PTR[((144-128))+rbp] + paddd xmm1,xmm5 +DB 69,15,56,203,247 +DB 69,15,56,204,216 + movdqa xmm0,xmm1 + movdqa xmm2,XMMWORD PTR[((144-128))+rbp] + paddd xmm10,xmm3 + paddd xmm2,xmm9 +DB 15,56,205,245 +DB 69,15,56,203,236 + movdqa xmm0,xmm2 + movdqa xmm3,xmm6 +DB 102,15,58,15,221,4 +DB 69,15,56,203,254 +DB 69,15,56,205,209 + pshufd xmm0,xmm1,00eh + paddd xmm7,xmm3 + movdqa xmm3,xmm10 +DB 102,65,15,58,15,217,4 +DB 15,56,204,229 +DB 69,15,56,203,229 + pshufd xmm0,xmm2,00eh + movdqa xmm1,XMMWORD PTR[((160-128))+rbp] + paddd xmm1,xmm6 +DB 69,15,56,203,247 +DB 69,15,56,204,193 + movdqa xmm0,xmm1 + movdqa xmm2,XMMWORD PTR[((160-128))+rbp] + paddd xmm11,xmm3 + paddd xmm2,xmm10 +DB 15,56,205,254 +DB 69,15,56,203,236 + movdqa xmm0,xmm2 + movdqa xmm3,xmm7 +DB 102,15,58,15,222,4 +DB 69,15,56,203,254 +DB 69,15,56,205,218 + pshufd xmm0,xmm1,00eh + paddd xmm4,xmm3 + movdqa xmm3,xmm11 +DB 102,65,15,58,15,218,4 +DB 15,56,204,238 +DB 69,15,56,203,229 + pshufd xmm0,xmm2,00eh + movdqa xmm1,XMMWORD PTR[((176-128))+rbp] + paddd xmm1,xmm7 +DB 69,15,56,203,247 +DB 69,15,56,204,202 + movdqa xmm0,xmm1 + movdqa xmm2,XMMWORD PTR[((176-128))+rbp] + paddd xmm8,xmm3 + paddd xmm2,xmm11 +DB 15,56,205,231 +DB 69,15,56,203,236 + movdqa xmm0,xmm2 + movdqa xmm3,xmm4 +DB 102,15,58,15,223,4 +DB 69,15,56,203,254 +DB 69,15,56,205,195 + pshufd xmm0,xmm1,00eh + paddd xmm5,xmm3 + movdqa xmm3,xmm8 +DB 102,65,15,58,15,219,4 +DB 15,56,204,247 +DB 69,15,56,203,229 + pshufd xmm0,xmm2,00eh + movdqa xmm1,XMMWORD PTR[((192-128))+rbp] + paddd xmm1,xmm4 +DB 69,15,56,203,247 +DB 69,15,56,204,211 + movdqa xmm0,xmm1 + movdqa xmm2,XMMWORD PTR[((192-128))+rbp] + paddd xmm9,xmm3 + paddd xmm2,xmm8 +DB 15,56,205,236 +DB 69,15,56,203,236 + movdqa xmm0,xmm2 + movdqa xmm3,xmm5 +DB 102,15,58,15,220,4 +DB 69,15,56,203,254 +DB 69,15,56,205,200 + pshufd xmm0,xmm1,00eh + paddd xmm6,xmm3 + movdqa xmm3,xmm9 +DB 102,65,15,58,15,216,4 +DB 15,56,204,252 +DB 69,15,56,203,229 + pshufd xmm0,xmm2,00eh + movdqa xmm1,XMMWORD PTR[((208-128))+rbp] + paddd xmm1,xmm5 +DB 69,15,56,203,247 +DB 69,15,56,204,216 + movdqa xmm0,xmm1 + movdqa xmm2,XMMWORD PTR[((208-128))+rbp] + paddd xmm10,xmm3 + paddd xmm2,xmm9 +DB 15,56,205,245 +DB 69,15,56,203,236 + movdqa xmm0,xmm2 + movdqa xmm3,xmm6 +DB 102,15,58,15,221,4 +DB 69,15,56,203,254 +DB 69,15,56,205,209 + pshufd xmm0,xmm1,00eh + paddd xmm7,xmm3 + movdqa xmm3,xmm10 +DB 102,65,15,58,15,217,4 + nop +DB 69,15,56,203,229 + pshufd xmm0,xmm2,00eh + movdqa xmm1,XMMWORD PTR[((224-128))+rbp] + paddd xmm1,xmm6 +DB 69,15,56,203,247 + + movdqa xmm0,xmm1 + movdqa xmm2,XMMWORD PTR[((224-128))+rbp] + paddd xmm11,xmm3 + paddd xmm2,xmm10 +DB 15,56,205,254 + nop +DB 69,15,56,203,236 + movdqa xmm0,xmm2 + mov ecx,1 + pxor xmm6,xmm6 +DB 69,15,56,203,254 +DB 69,15,56,205,218 + pshufd xmm0,xmm1,00eh + movdqa xmm1,XMMWORD PTR[((240-128))+rbp] + paddd xmm1,xmm7 + movq xmm7,QWORD PTR[rbx] + nop +DB 69,15,56,203,229 + pshufd xmm0,xmm2,00eh + movdqa xmm2,XMMWORD PTR[((240-128))+rbp] + paddd xmm2,xmm11 +DB 69,15,56,203,247 + + movdqa xmm0,xmm1 + cmp ecx,DWORD PTR[rbx] + cmovge r8,rsp + cmp ecx,DWORD PTR[4+rbx] + cmovge r9,rsp + pshufd xmm9,xmm7,000h +DB 69,15,56,203,236 + movdqa xmm0,xmm2 + pshufd xmm10,xmm7,055h + movdqa xmm11,xmm7 +DB 69,15,56,203,254 + pshufd xmm0,xmm1,00eh + pcmpgtd xmm9,xmm6 + pcmpgtd xmm10,xmm6 +DB 69,15,56,203,229 + pshufd xmm0,xmm2,00eh + pcmpgtd xmm11,xmm6 + movdqa xmm3,XMMWORD PTR[((K256_shaext-16))] +DB 69,15,56,203,247 + + pand xmm13,xmm9 + pand xmm15,xmm10 + pand xmm12,xmm9 + pand xmm14,xmm10 + paddd xmm11,xmm7 + + paddd xmm13,XMMWORD PTR[80+rsp] + paddd xmm15,XMMWORD PTR[112+rsp] + paddd xmm12,XMMWORD PTR[64+rsp] + paddd xmm14,XMMWORD PTR[96+rsp] + + movq QWORD PTR[rbx],xmm11 + dec edx + jnz $L$oop_shaext + + mov edx,DWORD PTR[280+rsp] + + pshufd xmm12,xmm12,27 + pshufd xmm13,xmm13,27 + pshufd xmm14,xmm14,27 + pshufd xmm15,xmm15,27 + + movdqa xmm5,xmm12 + movdqa xmm6,xmm13 + punpckldq xmm12,xmm14 + punpckhdq xmm5,xmm14 + punpckldq xmm13,xmm15 + punpckhdq xmm6,xmm15 + + movq QWORD PTR[(0-128)+rdi],xmm12 + psrldq xmm12,8 + movq QWORD PTR[(128-128)+rdi],xmm5 + psrldq xmm5,8 + movq QWORD PTR[(32-128)+rdi],xmm12 + movq QWORD PTR[(160-128)+rdi],xmm5 + + movq QWORD PTR[(64-128)+rdi],xmm13 + psrldq xmm13,8 + movq QWORD PTR[(192-128)+rdi],xmm6 + psrldq xmm6,8 + movq QWORD PTR[(96-128)+rdi],xmm13 + movq QWORD PTR[(224-128)+rdi],xmm6 + + lea rdi,QWORD PTR[8+rdi] + lea rsi,QWORD PTR[32+rsi] + dec edx + jnz $L$oop_grande_shaext + +$L$done_shaext:: + + movaps xmm6,XMMWORD PTR[((-184))+rax] + movaps xmm7,XMMWORD PTR[((-168))+rax] + movaps xmm8,XMMWORD PTR[((-152))+rax] + movaps xmm9,XMMWORD PTR[((-136))+rax] + movaps xmm10,XMMWORD PTR[((-120))+rax] + movaps xmm11,XMMWORD PTR[((-104))+rax] + movaps xmm12,XMMWORD PTR[((-88))+rax] + movaps xmm13,XMMWORD PTR[((-72))+rax] + movaps xmm14,XMMWORD PTR[((-56))+rax] + movaps xmm15,XMMWORD PTR[((-40))+rax] + mov rbp,QWORD PTR[((-16))+rax] + mov rbx,QWORD PTR[((-8))+rax] + lea rsp,QWORD PTR[rax] +$L$epilogue_shaext:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_sha256_multi_block_shaext:: +sha256_multi_block_shaext ENDP +ALIGN 256 +K256:: + DD 1116352408,1116352408,1116352408,1116352408 + DD 1116352408,1116352408,1116352408,1116352408 + DD 1899447441,1899447441,1899447441,1899447441 + DD 1899447441,1899447441,1899447441,1899447441 + DD 3049323471,3049323471,3049323471,3049323471 + DD 3049323471,3049323471,3049323471,3049323471 + DD 3921009573,3921009573,3921009573,3921009573 + DD 3921009573,3921009573,3921009573,3921009573 + DD 961987163,961987163,961987163,961987163 + DD 961987163,961987163,961987163,961987163 + DD 1508970993,1508970993,1508970993,1508970993 + DD 1508970993,1508970993,1508970993,1508970993 + DD 2453635748,2453635748,2453635748,2453635748 + DD 2453635748,2453635748,2453635748,2453635748 + DD 2870763221,2870763221,2870763221,2870763221 + DD 2870763221,2870763221,2870763221,2870763221 + DD 3624381080,3624381080,3624381080,3624381080 + DD 3624381080,3624381080,3624381080,3624381080 + DD 310598401,310598401,310598401,310598401 + DD 310598401,310598401,310598401,310598401 + DD 607225278,607225278,607225278,607225278 + DD 607225278,607225278,607225278,607225278 + DD 1426881987,1426881987,1426881987,1426881987 + DD 1426881987,1426881987,1426881987,1426881987 + DD 1925078388,1925078388,1925078388,1925078388 + DD 1925078388,1925078388,1925078388,1925078388 + DD 2162078206,2162078206,2162078206,2162078206 + DD 2162078206,2162078206,2162078206,2162078206 + DD 2614888103,2614888103,2614888103,2614888103 + DD 2614888103,2614888103,2614888103,2614888103 + DD 3248222580,3248222580,3248222580,3248222580 + DD 3248222580,3248222580,3248222580,3248222580 + DD 3835390401,3835390401,3835390401,3835390401 + DD 3835390401,3835390401,3835390401,3835390401 + DD 4022224774,4022224774,4022224774,4022224774 + DD 4022224774,4022224774,4022224774,4022224774 + DD 264347078,264347078,264347078,264347078 + DD 264347078,264347078,264347078,264347078 + DD 604807628,604807628,604807628,604807628 + DD 604807628,604807628,604807628,604807628 + DD 770255983,770255983,770255983,770255983 + DD 770255983,770255983,770255983,770255983 + DD 1249150122,1249150122,1249150122,1249150122 + DD 1249150122,1249150122,1249150122,1249150122 + DD 1555081692,1555081692,1555081692,1555081692 + DD 1555081692,1555081692,1555081692,1555081692 + DD 1996064986,1996064986,1996064986,1996064986 + DD 1996064986,1996064986,1996064986,1996064986 + DD 2554220882,2554220882,2554220882,2554220882 + DD 2554220882,2554220882,2554220882,2554220882 + DD 2821834349,2821834349,2821834349,2821834349 + DD 2821834349,2821834349,2821834349,2821834349 + DD 2952996808,2952996808,2952996808,2952996808 + DD 2952996808,2952996808,2952996808,2952996808 + DD 3210313671,3210313671,3210313671,3210313671 + DD 3210313671,3210313671,3210313671,3210313671 + DD 3336571891,3336571891,3336571891,3336571891 + DD 3336571891,3336571891,3336571891,3336571891 + DD 3584528711,3584528711,3584528711,3584528711 + DD 3584528711,3584528711,3584528711,3584528711 + DD 113926993,113926993,113926993,113926993 + DD 113926993,113926993,113926993,113926993 + DD 338241895,338241895,338241895,338241895 + DD 338241895,338241895,338241895,338241895 + DD 666307205,666307205,666307205,666307205 + DD 666307205,666307205,666307205,666307205 + DD 773529912,773529912,773529912,773529912 + DD 773529912,773529912,773529912,773529912 + DD 1294757372,1294757372,1294757372,1294757372 + DD 1294757372,1294757372,1294757372,1294757372 + DD 1396182291,1396182291,1396182291,1396182291 + DD 1396182291,1396182291,1396182291,1396182291 + DD 1695183700,1695183700,1695183700,1695183700 + DD 1695183700,1695183700,1695183700,1695183700 + DD 1986661051,1986661051,1986661051,1986661051 + DD 1986661051,1986661051,1986661051,1986661051 + DD 2177026350,2177026350,2177026350,2177026350 + DD 2177026350,2177026350,2177026350,2177026350 + DD 2456956037,2456956037,2456956037,2456956037 + DD 2456956037,2456956037,2456956037,2456956037 + DD 2730485921,2730485921,2730485921,2730485921 + DD 2730485921,2730485921,2730485921,2730485921 + DD 2820302411,2820302411,2820302411,2820302411 + DD 2820302411,2820302411,2820302411,2820302411 + DD 3259730800,3259730800,3259730800,3259730800 + DD 3259730800,3259730800,3259730800,3259730800 + DD 3345764771,3345764771,3345764771,3345764771 + DD 3345764771,3345764771,3345764771,3345764771 + DD 3516065817,3516065817,3516065817,3516065817 + DD 3516065817,3516065817,3516065817,3516065817 + DD 3600352804,3600352804,3600352804,3600352804 + DD 3600352804,3600352804,3600352804,3600352804 + DD 4094571909,4094571909,4094571909,4094571909 + DD 4094571909,4094571909,4094571909,4094571909 + DD 275423344,275423344,275423344,275423344 + DD 275423344,275423344,275423344,275423344 + DD 430227734,430227734,430227734,430227734 + DD 430227734,430227734,430227734,430227734 + DD 506948616,506948616,506948616,506948616 + DD 506948616,506948616,506948616,506948616 + DD 659060556,659060556,659060556,659060556 + DD 659060556,659060556,659060556,659060556 + DD 883997877,883997877,883997877,883997877 + DD 883997877,883997877,883997877,883997877 + DD 958139571,958139571,958139571,958139571 + DD 958139571,958139571,958139571,958139571 + DD 1322822218,1322822218,1322822218,1322822218 + DD 1322822218,1322822218,1322822218,1322822218 + DD 1537002063,1537002063,1537002063,1537002063 + DD 1537002063,1537002063,1537002063,1537002063 + DD 1747873779,1747873779,1747873779,1747873779 + DD 1747873779,1747873779,1747873779,1747873779 + DD 1955562222,1955562222,1955562222,1955562222 + DD 1955562222,1955562222,1955562222,1955562222 + DD 2024104815,2024104815,2024104815,2024104815 + DD 2024104815,2024104815,2024104815,2024104815 + DD 2227730452,2227730452,2227730452,2227730452 + DD 2227730452,2227730452,2227730452,2227730452 + DD 2361852424,2361852424,2361852424,2361852424 + DD 2361852424,2361852424,2361852424,2361852424 + DD 2428436474,2428436474,2428436474,2428436474 + DD 2428436474,2428436474,2428436474,2428436474 + DD 2756734187,2756734187,2756734187,2756734187 + DD 2756734187,2756734187,2756734187,2756734187 + DD 3204031479,3204031479,3204031479,3204031479 + DD 3204031479,3204031479,3204031479,3204031479 + DD 3329325298,3329325298,3329325298,3329325298 + DD 3329325298,3329325298,3329325298,3329325298 +$L$pbswap:: + DD 000010203h,004050607h,008090a0bh,00c0d0e0fh + DD 000010203h,004050607h,008090a0bh,00c0d0e0fh +K256_shaext:: + DD 0428a2f98h,071374491h,0b5c0fbcfh,0e9b5dba5h + DD 03956c25bh,059f111f1h,0923f82a4h,0ab1c5ed5h + DD 0d807aa98h,012835b01h,0243185beh,0550c7dc3h + DD 072be5d74h,080deb1feh,09bdc06a7h,0c19bf174h + DD 0e49b69c1h,0efbe4786h,00fc19dc6h,0240ca1cch + DD 02de92c6fh,04a7484aah,05cb0a9dch,076f988dah + DD 0983e5152h,0a831c66dh,0b00327c8h,0bf597fc7h + DD 0c6e00bf3h,0d5a79147h,006ca6351h,014292967h + DD 027b70a85h,02e1b2138h,04d2c6dfch,053380d13h + DD 0650a7354h,0766a0abbh,081c2c92eh,092722c85h + DD 0a2bfe8a1h,0a81a664bh,0c24b8b70h,0c76c51a3h + DD 0d192e819h,0d6990624h,0f40e3585h,0106aa070h + DD 019a4c116h,01e376c08h,02748774ch,034b0bcb5h + DD 0391c0cb3h,04ed8aa4ah,05b9cca4fh,0682e6ff3h + DD 0748f82eeh,078a5636fh,084c87814h,08cc70208h + DD 090befffah,0a4506cebh,0bef9a3f7h,0c67178f2h +DB 83,72,65,50,53,54,32,109,117,108,116,105,45,98,108,111 +DB 99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114 +DB 32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71 +DB 65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112 +DB 101,110,115,115,108,46,111,114,103,62,0 +EXTERN __imp_RtlVirtualUnwind:NEAR + +ALIGN 16 +se_handler PROC PRIVATE + push rsi + push rdi + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + pushfq + sub rsp,64 + + mov rax,QWORD PTR[120+r8] + mov rbx,QWORD PTR[248+r8] + + mov rsi,QWORD PTR[8+r9] + mov r11,QWORD PTR[56+r9] + + mov r10d,DWORD PTR[r11] + lea r10,QWORD PTR[r10*1+rsi] + cmp rbx,r10 + jb $L$in_prologue + + mov rax,QWORD PTR[152+r8] + + mov r10d,DWORD PTR[4+r11] + lea r10,QWORD PTR[r10*1+rsi] + cmp rbx,r10 + jae $L$in_prologue + + mov rax,QWORD PTR[272+rax] + + mov rbx,QWORD PTR[((-8))+rax] + mov rbp,QWORD PTR[((-16))+rax] + mov QWORD PTR[144+r8],rbx + mov QWORD PTR[160+r8],rbp + + lea rsi,QWORD PTR[((-24-160))+rax] + lea rdi,QWORD PTR[512+r8] + mov ecx,20 + DD 0a548f3fch + +$L$in_prologue:: + mov rdi,QWORD PTR[8+rax] + mov rsi,QWORD PTR[16+rax] + mov QWORD PTR[152+r8],rax + mov QWORD PTR[168+r8],rsi + mov QWORD PTR[176+r8],rdi + + mov rdi,QWORD PTR[40+r9] + mov rsi,r8 + mov ecx,154 + DD 0a548f3fch + + mov rsi,r9 + xor rcx,rcx + mov rdx,QWORD PTR[8+rsi] + mov r8,QWORD PTR[rsi] + mov r9,QWORD PTR[16+rsi] + mov r10,QWORD PTR[40+rsi] + lea r11,QWORD PTR[56+rsi] + lea r12,QWORD PTR[24+rsi] + mov QWORD PTR[32+rsp],r10 + mov QWORD PTR[40+rsp],r11 + mov QWORD PTR[48+rsp],r12 + mov QWORD PTR[56+rsp],rcx + call QWORD PTR[__imp_RtlVirtualUnwind] + + mov eax,1 + add rsp,64 + popfq + pop r15 + pop r14 + pop r13 + pop r12 + pop rbp + pop rbx + pop rdi + pop rsi + DB 0F3h,0C3h ;repret +se_handler ENDP +.text$ ENDS +.pdata SEGMENT READONLY ALIGN(4) +ALIGN 4 + DD imagerel $L$SEH_begin_sha256_multi_block + DD imagerel $L$SEH_end_sha256_multi_block + DD imagerel $L$SEH_info_sha256_multi_block + DD imagerel $L$SEH_begin_sha256_multi_block_shaext + DD imagerel $L$SEH_end_sha256_multi_block_shaext + DD imagerel $L$SEH_info_sha256_multi_block_shaext +.pdata ENDS +.xdata SEGMENT READONLY ALIGN(8) +ALIGN 8 +$L$SEH_info_sha256_multi_block:: +DB 9,0,0,0 + DD imagerel se_handler + DD imagerel $L$body,imagerel $L$epilogue +$L$SEH_info_sha256_multi_block_shaext:: +DB 9,0,0,0 + DD imagerel se_handler + DD imagerel $L$body_shaext,imagerel $L$epilogue_shaext + +.xdata ENDS +END diff --git a/deps/openssl/asm_obsolete/x64-win32-masm/sha/sha256-x86_64.asm b/deps/openssl/asm_obsolete/x64-win32-masm/sha/sha256-x86_64.asm new file mode 100644 index 00000000000000..3b1559934f6c27 --- /dev/null +++ b/deps/openssl/asm_obsolete/x64-win32-masm/sha/sha256-x86_64.asm @@ -0,0 +1,3270 @@ +OPTION DOTNAME +.text$ SEGMENT ALIGN(256) 'CODE' + +EXTERN OPENSSL_ia32cap_P:NEAR +PUBLIC sha256_block_data_order + +ALIGN 16 +sha256_block_data_order PROC PUBLIC + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_sha256_block_data_order:: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + + + lea r11,QWORD PTR[OPENSSL_ia32cap_P] + mov r9d,DWORD PTR[r11] + mov r10d,DWORD PTR[4+r11] + mov r11d,DWORD PTR[8+r11] + test r11d,536870912 + jnz _shaext_shortcut + test r10d,512 + jnz $L$ssse3_shortcut + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + mov r11,rsp + shl rdx,4 + sub rsp,16*4+4*8 + lea rdx,QWORD PTR[rdx*4+rsi] + and rsp,-64 + mov QWORD PTR[((64+0))+rsp],rdi + mov QWORD PTR[((64+8))+rsp],rsi + mov QWORD PTR[((64+16))+rsp],rdx + mov QWORD PTR[((64+24))+rsp],r11 +$L$prologue:: + + mov eax,DWORD PTR[rdi] + mov ebx,DWORD PTR[4+rdi] + mov ecx,DWORD PTR[8+rdi] + mov edx,DWORD PTR[12+rdi] + mov r8d,DWORD PTR[16+rdi] + mov r9d,DWORD PTR[20+rdi] + mov r10d,DWORD PTR[24+rdi] + mov r11d,DWORD PTR[28+rdi] + jmp $L$loop + +ALIGN 16 +$L$loop:: + mov edi,ebx + lea rbp,QWORD PTR[K256] + xor edi,ecx + mov r12d,DWORD PTR[rsi] + mov r13d,r8d + mov r14d,eax + bswap r12d + ror r13d,14 + mov r15d,r9d + + xor r13d,r8d + ror r14d,9 + xor r15d,r10d + + mov DWORD PTR[rsp],r12d + xor r14d,eax + and r15d,r8d + + ror r13d,5 + add r12d,r11d + xor r15d,r10d + + ror r14d,11 + xor r13d,r8d + add r12d,r15d + + mov r15d,eax + add r12d,DWORD PTR[rbp] + xor r14d,eax + + xor r15d,ebx + ror r13d,6 + mov r11d,ebx + + and edi,r15d + ror r14d,2 + add r12d,r13d + + xor r11d,edi + add edx,r12d + add r11d,r12d + + lea rbp,QWORD PTR[4+rbp] + add r11d,r14d + mov r12d,DWORD PTR[4+rsi] + mov r13d,edx + mov r14d,r11d + bswap r12d + ror r13d,14 + mov edi,r8d + + xor r13d,edx + ror r14d,9 + xor edi,r9d + + mov DWORD PTR[4+rsp],r12d + xor r14d,r11d + and edi,edx + + ror r13d,5 + add r12d,r10d + xor edi,r9d + + ror r14d,11 + xor r13d,edx + add r12d,edi + + mov edi,r11d + add r12d,DWORD PTR[rbp] + xor r14d,r11d + + xor edi,eax + ror r13d,6 + mov r10d,eax + + and r15d,edi + ror r14d,2 + add r12d,r13d + + xor r10d,r15d + add ecx,r12d + add r10d,r12d + + lea rbp,QWORD PTR[4+rbp] + add r10d,r14d + mov r12d,DWORD PTR[8+rsi] + mov r13d,ecx + mov r14d,r10d + bswap r12d + ror r13d,14 + mov r15d,edx + + xor r13d,ecx + ror r14d,9 + xor r15d,r8d + + mov DWORD PTR[8+rsp],r12d + xor r14d,r10d + and r15d,ecx + + ror r13d,5 + add r12d,r9d + xor r15d,r8d + + ror r14d,11 + xor r13d,ecx + add r12d,r15d + + mov r15d,r10d + add r12d,DWORD PTR[rbp] + xor r14d,r10d + + xor r15d,r11d + ror r13d,6 + mov r9d,r11d + + and edi,r15d + ror r14d,2 + add r12d,r13d + + xor r9d,edi + add ebx,r12d + add r9d,r12d + + lea rbp,QWORD PTR[4+rbp] + add r9d,r14d + mov r12d,DWORD PTR[12+rsi] + mov r13d,ebx + mov r14d,r9d + bswap r12d + ror r13d,14 + mov edi,ecx + + xor r13d,ebx + ror r14d,9 + xor edi,edx + + mov DWORD PTR[12+rsp],r12d + xor r14d,r9d + and edi,ebx + + ror r13d,5 + add r12d,r8d + xor edi,edx + + ror r14d,11 + xor r13d,ebx + add r12d,edi + + mov edi,r9d + add r12d,DWORD PTR[rbp] + xor r14d,r9d + + xor edi,r10d + ror r13d,6 + mov r8d,r10d + + and r15d,edi + ror r14d,2 + add r12d,r13d + + xor r8d,r15d + add eax,r12d + add r8d,r12d + + lea rbp,QWORD PTR[20+rbp] + add r8d,r14d + mov r12d,DWORD PTR[16+rsi] + mov r13d,eax + mov r14d,r8d + bswap r12d + ror r13d,14 + mov r15d,ebx + + xor r13d,eax + ror r14d,9 + xor r15d,ecx + + mov DWORD PTR[16+rsp],r12d + xor r14d,r8d + and r15d,eax + + ror r13d,5 + add r12d,edx + xor r15d,ecx + + ror r14d,11 + xor r13d,eax + add r12d,r15d + + mov r15d,r8d + add r12d,DWORD PTR[rbp] + xor r14d,r8d + + xor r15d,r9d + ror r13d,6 + mov edx,r9d + + and edi,r15d + ror r14d,2 + add r12d,r13d + + xor edx,edi + add r11d,r12d + add edx,r12d + + lea rbp,QWORD PTR[4+rbp] + add edx,r14d + mov r12d,DWORD PTR[20+rsi] + mov r13d,r11d + mov r14d,edx + bswap r12d + ror r13d,14 + mov edi,eax + + xor r13d,r11d + ror r14d,9 + xor edi,ebx + + mov DWORD PTR[20+rsp],r12d + xor r14d,edx + and edi,r11d + + ror r13d,5 + add r12d,ecx + xor edi,ebx + + ror r14d,11 + xor r13d,r11d + add r12d,edi + + mov edi,edx + add r12d,DWORD PTR[rbp] + xor r14d,edx + + xor edi,r8d + ror r13d,6 + mov ecx,r8d + + and r15d,edi + ror r14d,2 + add r12d,r13d + + xor ecx,r15d + add r10d,r12d + add ecx,r12d + + lea rbp,QWORD PTR[4+rbp] + add ecx,r14d + mov r12d,DWORD PTR[24+rsi] + mov r13d,r10d + mov r14d,ecx + bswap r12d + ror r13d,14 + mov r15d,r11d + + xor r13d,r10d + ror r14d,9 + xor r15d,eax + + mov DWORD PTR[24+rsp],r12d + xor r14d,ecx + and r15d,r10d + + ror r13d,5 + add r12d,ebx + xor r15d,eax + + ror r14d,11 + xor r13d,r10d + add r12d,r15d + + mov r15d,ecx + add r12d,DWORD PTR[rbp] + xor r14d,ecx + + xor r15d,edx + ror r13d,6 + mov ebx,edx + + and edi,r15d + ror r14d,2 + add r12d,r13d + + xor ebx,edi + add r9d,r12d + add ebx,r12d + + lea rbp,QWORD PTR[4+rbp] + add ebx,r14d + mov r12d,DWORD PTR[28+rsi] + mov r13d,r9d + mov r14d,ebx + bswap r12d + ror r13d,14 + mov edi,r10d + + xor r13d,r9d + ror r14d,9 + xor edi,r11d + + mov DWORD PTR[28+rsp],r12d + xor r14d,ebx + and edi,r9d + + ror r13d,5 + add r12d,eax + xor edi,r11d + + ror r14d,11 + xor r13d,r9d + add r12d,edi + + mov edi,ebx + add r12d,DWORD PTR[rbp] + xor r14d,ebx + + xor edi,ecx + ror r13d,6 + mov eax,ecx + + and r15d,edi + ror r14d,2 + add r12d,r13d + + xor eax,r15d + add r8d,r12d + add eax,r12d + + lea rbp,QWORD PTR[20+rbp] + add eax,r14d + mov r12d,DWORD PTR[32+rsi] + mov r13d,r8d + mov r14d,eax + bswap r12d + ror r13d,14 + mov r15d,r9d + + xor r13d,r8d + ror r14d,9 + xor r15d,r10d + + mov DWORD PTR[32+rsp],r12d + xor r14d,eax + and r15d,r8d + + ror r13d,5 + add r12d,r11d + xor r15d,r10d + + ror r14d,11 + xor r13d,r8d + add r12d,r15d + + mov r15d,eax + add r12d,DWORD PTR[rbp] + xor r14d,eax + + xor r15d,ebx + ror r13d,6 + mov r11d,ebx + + and edi,r15d + ror r14d,2 + add r12d,r13d + + xor r11d,edi + add edx,r12d + add r11d,r12d + + lea rbp,QWORD PTR[4+rbp] + add r11d,r14d + mov r12d,DWORD PTR[36+rsi] + mov r13d,edx + mov r14d,r11d + bswap r12d + ror r13d,14 + mov edi,r8d + + xor r13d,edx + ror r14d,9 + xor edi,r9d + + mov DWORD PTR[36+rsp],r12d + xor r14d,r11d + and edi,edx + + ror r13d,5 + add r12d,r10d + xor edi,r9d + + ror r14d,11 + xor r13d,edx + add r12d,edi + + mov edi,r11d + add r12d,DWORD PTR[rbp] + xor r14d,r11d + + xor edi,eax + ror r13d,6 + mov r10d,eax + + and r15d,edi + ror r14d,2 + add r12d,r13d + + xor r10d,r15d + add ecx,r12d + add r10d,r12d + + lea rbp,QWORD PTR[4+rbp] + add r10d,r14d + mov r12d,DWORD PTR[40+rsi] + mov r13d,ecx + mov r14d,r10d + bswap r12d + ror r13d,14 + mov r15d,edx + + xor r13d,ecx + ror r14d,9 + xor r15d,r8d + + mov DWORD PTR[40+rsp],r12d + xor r14d,r10d + and r15d,ecx + + ror r13d,5 + add r12d,r9d + xor r15d,r8d + + ror r14d,11 + xor r13d,ecx + add r12d,r15d + + mov r15d,r10d + add r12d,DWORD PTR[rbp] + xor r14d,r10d + + xor r15d,r11d + ror r13d,6 + mov r9d,r11d + + and edi,r15d + ror r14d,2 + add r12d,r13d + + xor r9d,edi + add ebx,r12d + add r9d,r12d + + lea rbp,QWORD PTR[4+rbp] + add r9d,r14d + mov r12d,DWORD PTR[44+rsi] + mov r13d,ebx + mov r14d,r9d + bswap r12d + ror r13d,14 + mov edi,ecx + + xor r13d,ebx + ror r14d,9 + xor edi,edx + + mov DWORD PTR[44+rsp],r12d + xor r14d,r9d + and edi,ebx + + ror r13d,5 + add r12d,r8d + xor edi,edx + + ror r14d,11 + xor r13d,ebx + add r12d,edi + + mov edi,r9d + add r12d,DWORD PTR[rbp] + xor r14d,r9d + + xor edi,r10d + ror r13d,6 + mov r8d,r10d + + and r15d,edi + ror r14d,2 + add r12d,r13d + + xor r8d,r15d + add eax,r12d + add r8d,r12d + + lea rbp,QWORD PTR[20+rbp] + add r8d,r14d + mov r12d,DWORD PTR[48+rsi] + mov r13d,eax + mov r14d,r8d + bswap r12d + ror r13d,14 + mov r15d,ebx + + xor r13d,eax + ror r14d,9 + xor r15d,ecx + + mov DWORD PTR[48+rsp],r12d + xor r14d,r8d + and r15d,eax + + ror r13d,5 + add r12d,edx + xor r15d,ecx + + ror r14d,11 + xor r13d,eax + add r12d,r15d + + mov r15d,r8d + add r12d,DWORD PTR[rbp] + xor r14d,r8d + + xor r15d,r9d + ror r13d,6 + mov edx,r9d + + and edi,r15d + ror r14d,2 + add r12d,r13d + + xor edx,edi + add r11d,r12d + add edx,r12d + + lea rbp,QWORD PTR[4+rbp] + add edx,r14d + mov r12d,DWORD PTR[52+rsi] + mov r13d,r11d + mov r14d,edx + bswap r12d + ror r13d,14 + mov edi,eax + + xor r13d,r11d + ror r14d,9 + xor edi,ebx + + mov DWORD PTR[52+rsp],r12d + xor r14d,edx + and edi,r11d + + ror r13d,5 + add r12d,ecx + xor edi,ebx + + ror r14d,11 + xor r13d,r11d + add r12d,edi + + mov edi,edx + add r12d,DWORD PTR[rbp] + xor r14d,edx + + xor edi,r8d + ror r13d,6 + mov ecx,r8d + + and r15d,edi + ror r14d,2 + add r12d,r13d + + xor ecx,r15d + add r10d,r12d + add ecx,r12d + + lea rbp,QWORD PTR[4+rbp] + add ecx,r14d + mov r12d,DWORD PTR[56+rsi] + mov r13d,r10d + mov r14d,ecx + bswap r12d + ror r13d,14 + mov r15d,r11d + + xor r13d,r10d + ror r14d,9 + xor r15d,eax + + mov DWORD PTR[56+rsp],r12d + xor r14d,ecx + and r15d,r10d + + ror r13d,5 + add r12d,ebx + xor r15d,eax + + ror r14d,11 + xor r13d,r10d + add r12d,r15d + + mov r15d,ecx + add r12d,DWORD PTR[rbp] + xor r14d,ecx + + xor r15d,edx + ror r13d,6 + mov ebx,edx + + and edi,r15d + ror r14d,2 + add r12d,r13d + + xor ebx,edi + add r9d,r12d + add ebx,r12d + + lea rbp,QWORD PTR[4+rbp] + add ebx,r14d + mov r12d,DWORD PTR[60+rsi] + mov r13d,r9d + mov r14d,ebx + bswap r12d + ror r13d,14 + mov edi,r10d + + xor r13d,r9d + ror r14d,9 + xor edi,r11d + + mov DWORD PTR[60+rsp],r12d + xor r14d,ebx + and edi,r9d + + ror r13d,5 + add r12d,eax + xor edi,r11d + + ror r14d,11 + xor r13d,r9d + add r12d,edi + + mov edi,ebx + add r12d,DWORD PTR[rbp] + xor r14d,ebx + + xor edi,ecx + ror r13d,6 + mov eax,ecx + + and r15d,edi + ror r14d,2 + add r12d,r13d + + xor eax,r15d + add r8d,r12d + add eax,r12d + + lea rbp,QWORD PTR[20+rbp] + jmp $L$rounds_16_xx +ALIGN 16 +$L$rounds_16_xx:: + mov r13d,DWORD PTR[4+rsp] + mov r15d,DWORD PTR[56+rsp] + + mov r12d,r13d + ror r13d,11 + add eax,r14d + mov r14d,r15d + ror r15d,2 + + xor r13d,r12d + shr r12d,3 + ror r13d,7 + xor r15d,r14d + shr r14d,10 + + ror r15d,17 + xor r12d,r13d + xor r15d,r14d + add r12d,DWORD PTR[36+rsp] + + add r12d,DWORD PTR[rsp] + mov r13d,r8d + add r12d,r15d + mov r14d,eax + ror r13d,14 + mov r15d,r9d + + xor r13d,r8d + ror r14d,9 + xor r15d,r10d + + mov DWORD PTR[rsp],r12d + xor r14d,eax + and r15d,r8d + + ror r13d,5 + add r12d,r11d + xor r15d,r10d + + ror r14d,11 + xor r13d,r8d + add r12d,r15d + + mov r15d,eax + add r12d,DWORD PTR[rbp] + xor r14d,eax + + xor r15d,ebx + ror r13d,6 + mov r11d,ebx + + and edi,r15d + ror r14d,2 + add r12d,r13d + + xor r11d,edi + add edx,r12d + add r11d,r12d + + lea rbp,QWORD PTR[4+rbp] + mov r13d,DWORD PTR[8+rsp] + mov edi,DWORD PTR[60+rsp] + + mov r12d,r13d + ror r13d,11 + add r11d,r14d + mov r14d,edi + ror edi,2 + + xor r13d,r12d + shr r12d,3 + ror r13d,7 + xor edi,r14d + shr r14d,10 + + ror edi,17 + xor r12d,r13d + xor edi,r14d + add r12d,DWORD PTR[40+rsp] + + add r12d,DWORD PTR[4+rsp] + mov r13d,edx + add r12d,edi + mov r14d,r11d + ror r13d,14 + mov edi,r8d + + xor r13d,edx + ror r14d,9 + xor edi,r9d + + mov DWORD PTR[4+rsp],r12d + xor r14d,r11d + and edi,edx + + ror r13d,5 + add r12d,r10d + xor edi,r9d + + ror r14d,11 + xor r13d,edx + add r12d,edi + + mov edi,r11d + add r12d,DWORD PTR[rbp] + xor r14d,r11d + + xor edi,eax + ror r13d,6 + mov r10d,eax + + and r15d,edi + ror r14d,2 + add r12d,r13d + + xor r10d,r15d + add ecx,r12d + add r10d,r12d + + lea rbp,QWORD PTR[4+rbp] + mov r13d,DWORD PTR[12+rsp] + mov r15d,DWORD PTR[rsp] + + mov r12d,r13d + ror r13d,11 + add r10d,r14d + mov r14d,r15d + ror r15d,2 + + xor r13d,r12d + shr r12d,3 + ror r13d,7 + xor r15d,r14d + shr r14d,10 + + ror r15d,17 + xor r12d,r13d + xor r15d,r14d + add r12d,DWORD PTR[44+rsp] + + add r12d,DWORD PTR[8+rsp] + mov r13d,ecx + add r12d,r15d + mov r14d,r10d + ror r13d,14 + mov r15d,edx + + xor r13d,ecx + ror r14d,9 + xor r15d,r8d + + mov DWORD PTR[8+rsp],r12d + xor r14d,r10d + and r15d,ecx + + ror r13d,5 + add r12d,r9d + xor r15d,r8d + + ror r14d,11 + xor r13d,ecx + add r12d,r15d + + mov r15d,r10d + add r12d,DWORD PTR[rbp] + xor r14d,r10d + + xor r15d,r11d + ror r13d,6 + mov r9d,r11d + + and edi,r15d + ror r14d,2 + add r12d,r13d + + xor r9d,edi + add ebx,r12d + add r9d,r12d + + lea rbp,QWORD PTR[4+rbp] + mov r13d,DWORD PTR[16+rsp] + mov edi,DWORD PTR[4+rsp] + + mov r12d,r13d + ror r13d,11 + add r9d,r14d + mov r14d,edi + ror edi,2 + + xor r13d,r12d + shr r12d,3 + ror r13d,7 + xor edi,r14d + shr r14d,10 + + ror edi,17 + xor r12d,r13d + xor edi,r14d + add r12d,DWORD PTR[48+rsp] + + add r12d,DWORD PTR[12+rsp] + mov r13d,ebx + add r12d,edi + mov r14d,r9d + ror r13d,14 + mov edi,ecx + + xor r13d,ebx + ror r14d,9 + xor edi,edx + + mov DWORD PTR[12+rsp],r12d + xor r14d,r9d + and edi,ebx + + ror r13d,5 + add r12d,r8d + xor edi,edx + + ror r14d,11 + xor r13d,ebx + add r12d,edi + + mov edi,r9d + add r12d,DWORD PTR[rbp] + xor r14d,r9d + + xor edi,r10d + ror r13d,6 + mov r8d,r10d + + and r15d,edi + ror r14d,2 + add r12d,r13d + + xor r8d,r15d + add eax,r12d + add r8d,r12d + + lea rbp,QWORD PTR[20+rbp] + mov r13d,DWORD PTR[20+rsp] + mov r15d,DWORD PTR[8+rsp] + + mov r12d,r13d + ror r13d,11 + add r8d,r14d + mov r14d,r15d + ror r15d,2 + + xor r13d,r12d + shr r12d,3 + ror r13d,7 + xor r15d,r14d + shr r14d,10 + + ror r15d,17 + xor r12d,r13d + xor r15d,r14d + add r12d,DWORD PTR[52+rsp] + + add r12d,DWORD PTR[16+rsp] + mov r13d,eax + add r12d,r15d + mov r14d,r8d + ror r13d,14 + mov r15d,ebx + + xor r13d,eax + ror r14d,9 + xor r15d,ecx + + mov DWORD PTR[16+rsp],r12d + xor r14d,r8d + and r15d,eax + + ror r13d,5 + add r12d,edx + xor r15d,ecx + + ror r14d,11 + xor r13d,eax + add r12d,r15d + + mov r15d,r8d + add r12d,DWORD PTR[rbp] + xor r14d,r8d + + xor r15d,r9d + ror r13d,6 + mov edx,r9d + + and edi,r15d + ror r14d,2 + add r12d,r13d + + xor edx,edi + add r11d,r12d + add edx,r12d + + lea rbp,QWORD PTR[4+rbp] + mov r13d,DWORD PTR[24+rsp] + mov edi,DWORD PTR[12+rsp] + + mov r12d,r13d + ror r13d,11 + add edx,r14d + mov r14d,edi + ror edi,2 + + xor r13d,r12d + shr r12d,3 + ror r13d,7 + xor edi,r14d + shr r14d,10 + + ror edi,17 + xor r12d,r13d + xor edi,r14d + add r12d,DWORD PTR[56+rsp] + + add r12d,DWORD PTR[20+rsp] + mov r13d,r11d + add r12d,edi + mov r14d,edx + ror r13d,14 + mov edi,eax + + xor r13d,r11d + ror r14d,9 + xor edi,ebx + + mov DWORD PTR[20+rsp],r12d + xor r14d,edx + and edi,r11d + + ror r13d,5 + add r12d,ecx + xor edi,ebx + + ror r14d,11 + xor r13d,r11d + add r12d,edi + + mov edi,edx + add r12d,DWORD PTR[rbp] + xor r14d,edx + + xor edi,r8d + ror r13d,6 + mov ecx,r8d + + and r15d,edi + ror r14d,2 + add r12d,r13d + + xor ecx,r15d + add r10d,r12d + add ecx,r12d + + lea rbp,QWORD PTR[4+rbp] + mov r13d,DWORD PTR[28+rsp] + mov r15d,DWORD PTR[16+rsp] + + mov r12d,r13d + ror r13d,11 + add ecx,r14d + mov r14d,r15d + ror r15d,2 + + xor r13d,r12d + shr r12d,3 + ror r13d,7 + xor r15d,r14d + shr r14d,10 + + ror r15d,17 + xor r12d,r13d + xor r15d,r14d + add r12d,DWORD PTR[60+rsp] + + add r12d,DWORD PTR[24+rsp] + mov r13d,r10d + add r12d,r15d + mov r14d,ecx + ror r13d,14 + mov r15d,r11d + + xor r13d,r10d + ror r14d,9 + xor r15d,eax + + mov DWORD PTR[24+rsp],r12d + xor r14d,ecx + and r15d,r10d + + ror r13d,5 + add r12d,ebx + xor r15d,eax + + ror r14d,11 + xor r13d,r10d + add r12d,r15d + + mov r15d,ecx + add r12d,DWORD PTR[rbp] + xor r14d,ecx + + xor r15d,edx + ror r13d,6 + mov ebx,edx + + and edi,r15d + ror r14d,2 + add r12d,r13d + + xor ebx,edi + add r9d,r12d + add ebx,r12d + + lea rbp,QWORD PTR[4+rbp] + mov r13d,DWORD PTR[32+rsp] + mov edi,DWORD PTR[20+rsp] + + mov r12d,r13d + ror r13d,11 + add ebx,r14d + mov r14d,edi + ror edi,2 + + xor r13d,r12d + shr r12d,3 + ror r13d,7 + xor edi,r14d + shr r14d,10 + + ror edi,17 + xor r12d,r13d + xor edi,r14d + add r12d,DWORD PTR[rsp] + + add r12d,DWORD PTR[28+rsp] + mov r13d,r9d + add r12d,edi + mov r14d,ebx + ror r13d,14 + mov edi,r10d + + xor r13d,r9d + ror r14d,9 + xor edi,r11d + + mov DWORD PTR[28+rsp],r12d + xor r14d,ebx + and edi,r9d + + ror r13d,5 + add r12d,eax + xor edi,r11d + + ror r14d,11 + xor r13d,r9d + add r12d,edi + + mov edi,ebx + add r12d,DWORD PTR[rbp] + xor r14d,ebx + + xor edi,ecx + ror r13d,6 + mov eax,ecx + + and r15d,edi + ror r14d,2 + add r12d,r13d + + xor eax,r15d + add r8d,r12d + add eax,r12d + + lea rbp,QWORD PTR[20+rbp] + mov r13d,DWORD PTR[36+rsp] + mov r15d,DWORD PTR[24+rsp] + + mov r12d,r13d + ror r13d,11 + add eax,r14d + mov r14d,r15d + ror r15d,2 + + xor r13d,r12d + shr r12d,3 + ror r13d,7 + xor r15d,r14d + shr r14d,10 + + ror r15d,17 + xor r12d,r13d + xor r15d,r14d + add r12d,DWORD PTR[4+rsp] + + add r12d,DWORD PTR[32+rsp] + mov r13d,r8d + add r12d,r15d + mov r14d,eax + ror r13d,14 + mov r15d,r9d + + xor r13d,r8d + ror r14d,9 + xor r15d,r10d + + mov DWORD PTR[32+rsp],r12d + xor r14d,eax + and r15d,r8d + + ror r13d,5 + add r12d,r11d + xor r15d,r10d + + ror r14d,11 + xor r13d,r8d + add r12d,r15d + + mov r15d,eax + add r12d,DWORD PTR[rbp] + xor r14d,eax + + xor r15d,ebx + ror r13d,6 + mov r11d,ebx + + and edi,r15d + ror r14d,2 + add r12d,r13d + + xor r11d,edi + add edx,r12d + add r11d,r12d + + lea rbp,QWORD PTR[4+rbp] + mov r13d,DWORD PTR[40+rsp] + mov edi,DWORD PTR[28+rsp] + + mov r12d,r13d + ror r13d,11 + add r11d,r14d + mov r14d,edi + ror edi,2 + + xor r13d,r12d + shr r12d,3 + ror r13d,7 + xor edi,r14d + shr r14d,10 + + ror edi,17 + xor r12d,r13d + xor edi,r14d + add r12d,DWORD PTR[8+rsp] + + add r12d,DWORD PTR[36+rsp] + mov r13d,edx + add r12d,edi + mov r14d,r11d + ror r13d,14 + mov edi,r8d + + xor r13d,edx + ror r14d,9 + xor edi,r9d + + mov DWORD PTR[36+rsp],r12d + xor r14d,r11d + and edi,edx + + ror r13d,5 + add r12d,r10d + xor edi,r9d + + ror r14d,11 + xor r13d,edx + add r12d,edi + + mov edi,r11d + add r12d,DWORD PTR[rbp] + xor r14d,r11d + + xor edi,eax + ror r13d,6 + mov r10d,eax + + and r15d,edi + ror r14d,2 + add r12d,r13d + + xor r10d,r15d + add ecx,r12d + add r10d,r12d + + lea rbp,QWORD PTR[4+rbp] + mov r13d,DWORD PTR[44+rsp] + mov r15d,DWORD PTR[32+rsp] + + mov r12d,r13d + ror r13d,11 + add r10d,r14d + mov r14d,r15d + ror r15d,2 + + xor r13d,r12d + shr r12d,3 + ror r13d,7 + xor r15d,r14d + shr r14d,10 + + ror r15d,17 + xor r12d,r13d + xor r15d,r14d + add r12d,DWORD PTR[12+rsp] + + add r12d,DWORD PTR[40+rsp] + mov r13d,ecx + add r12d,r15d + mov r14d,r10d + ror r13d,14 + mov r15d,edx + + xor r13d,ecx + ror r14d,9 + xor r15d,r8d + + mov DWORD PTR[40+rsp],r12d + xor r14d,r10d + and r15d,ecx + + ror r13d,5 + add r12d,r9d + xor r15d,r8d + + ror r14d,11 + xor r13d,ecx + add r12d,r15d + + mov r15d,r10d + add r12d,DWORD PTR[rbp] + xor r14d,r10d + + xor r15d,r11d + ror r13d,6 + mov r9d,r11d + + and edi,r15d + ror r14d,2 + add r12d,r13d + + xor r9d,edi + add ebx,r12d + add r9d,r12d + + lea rbp,QWORD PTR[4+rbp] + mov r13d,DWORD PTR[48+rsp] + mov edi,DWORD PTR[36+rsp] + + mov r12d,r13d + ror r13d,11 + add r9d,r14d + mov r14d,edi + ror edi,2 + + xor r13d,r12d + shr r12d,3 + ror r13d,7 + xor edi,r14d + shr r14d,10 + + ror edi,17 + xor r12d,r13d + xor edi,r14d + add r12d,DWORD PTR[16+rsp] + + add r12d,DWORD PTR[44+rsp] + mov r13d,ebx + add r12d,edi + mov r14d,r9d + ror r13d,14 + mov edi,ecx + + xor r13d,ebx + ror r14d,9 + xor edi,edx + + mov DWORD PTR[44+rsp],r12d + xor r14d,r9d + and edi,ebx + + ror r13d,5 + add r12d,r8d + xor edi,edx + + ror r14d,11 + xor r13d,ebx + add r12d,edi + + mov edi,r9d + add r12d,DWORD PTR[rbp] + xor r14d,r9d + + xor edi,r10d + ror r13d,6 + mov r8d,r10d + + and r15d,edi + ror r14d,2 + add r12d,r13d + + xor r8d,r15d + add eax,r12d + add r8d,r12d + + lea rbp,QWORD PTR[20+rbp] + mov r13d,DWORD PTR[52+rsp] + mov r15d,DWORD PTR[40+rsp] + + mov r12d,r13d + ror r13d,11 + add r8d,r14d + mov r14d,r15d + ror r15d,2 + + xor r13d,r12d + shr r12d,3 + ror r13d,7 + xor r15d,r14d + shr r14d,10 + + ror r15d,17 + xor r12d,r13d + xor r15d,r14d + add r12d,DWORD PTR[20+rsp] + + add r12d,DWORD PTR[48+rsp] + mov r13d,eax + add r12d,r15d + mov r14d,r8d + ror r13d,14 + mov r15d,ebx + + xor r13d,eax + ror r14d,9 + xor r15d,ecx + + mov DWORD PTR[48+rsp],r12d + xor r14d,r8d + and r15d,eax + + ror r13d,5 + add r12d,edx + xor r15d,ecx + + ror r14d,11 + xor r13d,eax + add r12d,r15d + + mov r15d,r8d + add r12d,DWORD PTR[rbp] + xor r14d,r8d + + xor r15d,r9d + ror r13d,6 + mov edx,r9d + + and edi,r15d + ror r14d,2 + add r12d,r13d + + xor edx,edi + add r11d,r12d + add edx,r12d + + lea rbp,QWORD PTR[4+rbp] + mov r13d,DWORD PTR[56+rsp] + mov edi,DWORD PTR[44+rsp] + + mov r12d,r13d + ror r13d,11 + add edx,r14d + mov r14d,edi + ror edi,2 + + xor r13d,r12d + shr r12d,3 + ror r13d,7 + xor edi,r14d + shr r14d,10 + + ror edi,17 + xor r12d,r13d + xor edi,r14d + add r12d,DWORD PTR[24+rsp] + + add r12d,DWORD PTR[52+rsp] + mov r13d,r11d + add r12d,edi + mov r14d,edx + ror r13d,14 + mov edi,eax + + xor r13d,r11d + ror r14d,9 + xor edi,ebx + + mov DWORD PTR[52+rsp],r12d + xor r14d,edx + and edi,r11d + + ror r13d,5 + add r12d,ecx + xor edi,ebx + + ror r14d,11 + xor r13d,r11d + add r12d,edi + + mov edi,edx + add r12d,DWORD PTR[rbp] + xor r14d,edx + + xor edi,r8d + ror r13d,6 + mov ecx,r8d + + and r15d,edi + ror r14d,2 + add r12d,r13d + + xor ecx,r15d + add r10d,r12d + add ecx,r12d + + lea rbp,QWORD PTR[4+rbp] + mov r13d,DWORD PTR[60+rsp] + mov r15d,DWORD PTR[48+rsp] + + mov r12d,r13d + ror r13d,11 + add ecx,r14d + mov r14d,r15d + ror r15d,2 + + xor r13d,r12d + shr r12d,3 + ror r13d,7 + xor r15d,r14d + shr r14d,10 + + ror r15d,17 + xor r12d,r13d + xor r15d,r14d + add r12d,DWORD PTR[28+rsp] + + add r12d,DWORD PTR[56+rsp] + mov r13d,r10d + add r12d,r15d + mov r14d,ecx + ror r13d,14 + mov r15d,r11d + + xor r13d,r10d + ror r14d,9 + xor r15d,eax + + mov DWORD PTR[56+rsp],r12d + xor r14d,ecx + and r15d,r10d + + ror r13d,5 + add r12d,ebx + xor r15d,eax + + ror r14d,11 + xor r13d,r10d + add r12d,r15d + + mov r15d,ecx + add r12d,DWORD PTR[rbp] + xor r14d,ecx + + xor r15d,edx + ror r13d,6 + mov ebx,edx + + and edi,r15d + ror r14d,2 + add r12d,r13d + + xor ebx,edi + add r9d,r12d + add ebx,r12d + + lea rbp,QWORD PTR[4+rbp] + mov r13d,DWORD PTR[rsp] + mov edi,DWORD PTR[52+rsp] + + mov r12d,r13d + ror r13d,11 + add ebx,r14d + mov r14d,edi + ror edi,2 + + xor r13d,r12d + shr r12d,3 + ror r13d,7 + xor edi,r14d + shr r14d,10 + + ror edi,17 + xor r12d,r13d + xor edi,r14d + add r12d,DWORD PTR[32+rsp] + + add r12d,DWORD PTR[60+rsp] + mov r13d,r9d + add r12d,edi + mov r14d,ebx + ror r13d,14 + mov edi,r10d + + xor r13d,r9d + ror r14d,9 + xor edi,r11d + + mov DWORD PTR[60+rsp],r12d + xor r14d,ebx + and edi,r9d + + ror r13d,5 + add r12d,eax + xor edi,r11d + + ror r14d,11 + xor r13d,r9d + add r12d,edi + + mov edi,ebx + add r12d,DWORD PTR[rbp] + xor r14d,ebx + + xor edi,ecx + ror r13d,6 + mov eax,ecx + + and r15d,edi + ror r14d,2 + add r12d,r13d + + xor eax,r15d + add r8d,r12d + add eax,r12d + + lea rbp,QWORD PTR[20+rbp] + cmp BYTE PTR[3+rbp],0 + jnz $L$rounds_16_xx + + mov rdi,QWORD PTR[((64+0))+rsp] + add eax,r14d + lea rsi,QWORD PTR[64+rsi] + + add eax,DWORD PTR[rdi] + add ebx,DWORD PTR[4+rdi] + add ecx,DWORD PTR[8+rdi] + add edx,DWORD PTR[12+rdi] + add r8d,DWORD PTR[16+rdi] + add r9d,DWORD PTR[20+rdi] + add r10d,DWORD PTR[24+rdi] + add r11d,DWORD PTR[28+rdi] + + cmp rsi,QWORD PTR[((64+16))+rsp] + + mov DWORD PTR[rdi],eax + mov DWORD PTR[4+rdi],ebx + mov DWORD PTR[8+rdi],ecx + mov DWORD PTR[12+rdi],edx + mov DWORD PTR[16+rdi],r8d + mov DWORD PTR[20+rdi],r9d + mov DWORD PTR[24+rdi],r10d + mov DWORD PTR[28+rdi],r11d + jb $L$loop + + mov rsi,QWORD PTR[((64+24))+rsp] + mov r15,QWORD PTR[rsi] + mov r14,QWORD PTR[8+rsi] + mov r13,QWORD PTR[16+rsi] + mov r12,QWORD PTR[24+rsi] + mov rbp,QWORD PTR[32+rsi] + mov rbx,QWORD PTR[40+rsi] + lea rsp,QWORD PTR[48+rsi] +$L$epilogue:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_sha256_block_data_order:: +sha256_block_data_order ENDP +ALIGN 64 + +K256:: + DD 0428a2f98h,071374491h,0b5c0fbcfh,0e9b5dba5h + DD 0428a2f98h,071374491h,0b5c0fbcfh,0e9b5dba5h + DD 03956c25bh,059f111f1h,0923f82a4h,0ab1c5ed5h + DD 03956c25bh,059f111f1h,0923f82a4h,0ab1c5ed5h + DD 0d807aa98h,012835b01h,0243185beh,0550c7dc3h + DD 0d807aa98h,012835b01h,0243185beh,0550c7dc3h + DD 072be5d74h,080deb1feh,09bdc06a7h,0c19bf174h + DD 072be5d74h,080deb1feh,09bdc06a7h,0c19bf174h + DD 0e49b69c1h,0efbe4786h,00fc19dc6h,0240ca1cch + DD 0e49b69c1h,0efbe4786h,00fc19dc6h,0240ca1cch + DD 02de92c6fh,04a7484aah,05cb0a9dch,076f988dah + DD 02de92c6fh,04a7484aah,05cb0a9dch,076f988dah + DD 0983e5152h,0a831c66dh,0b00327c8h,0bf597fc7h + DD 0983e5152h,0a831c66dh,0b00327c8h,0bf597fc7h + DD 0c6e00bf3h,0d5a79147h,006ca6351h,014292967h + DD 0c6e00bf3h,0d5a79147h,006ca6351h,014292967h + DD 027b70a85h,02e1b2138h,04d2c6dfch,053380d13h + DD 027b70a85h,02e1b2138h,04d2c6dfch,053380d13h + DD 0650a7354h,0766a0abbh,081c2c92eh,092722c85h + DD 0650a7354h,0766a0abbh,081c2c92eh,092722c85h + DD 0a2bfe8a1h,0a81a664bh,0c24b8b70h,0c76c51a3h + DD 0a2bfe8a1h,0a81a664bh,0c24b8b70h,0c76c51a3h + DD 0d192e819h,0d6990624h,0f40e3585h,0106aa070h + DD 0d192e819h,0d6990624h,0f40e3585h,0106aa070h + DD 019a4c116h,01e376c08h,02748774ch,034b0bcb5h + DD 019a4c116h,01e376c08h,02748774ch,034b0bcb5h + DD 0391c0cb3h,04ed8aa4ah,05b9cca4fh,0682e6ff3h + DD 0391c0cb3h,04ed8aa4ah,05b9cca4fh,0682e6ff3h + DD 0748f82eeh,078a5636fh,084c87814h,08cc70208h + DD 0748f82eeh,078a5636fh,084c87814h,08cc70208h + DD 090befffah,0a4506cebh,0bef9a3f7h,0c67178f2h + DD 090befffah,0a4506cebh,0bef9a3f7h,0c67178f2h + + DD 000010203h,004050607h,008090a0bh,00c0d0e0fh + DD 000010203h,004050607h,008090a0bh,00c0d0e0fh + DD 003020100h,00b0a0908h,0ffffffffh,0ffffffffh + DD 003020100h,00b0a0908h,0ffffffffh,0ffffffffh + DD 0ffffffffh,0ffffffffh,003020100h,00b0a0908h + DD 0ffffffffh,0ffffffffh,003020100h,00b0a0908h +DB 83,72,65,50,53,54,32,98,108,111,99,107,32,116,114,97 +DB 110,115,102,111,114,109,32,102,111,114,32,120,56,54,95,54 +DB 52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121 +DB 32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46 +DB 111,114,103,62,0 + +ALIGN 64 +sha256_block_data_order_shaext PROC PRIVATE + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_sha256_block_data_order_shaext:: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + + +_shaext_shortcut:: + lea rsp,QWORD PTR[((-88))+rsp] + movaps XMMWORD PTR[(-8-80)+rax],xmm6 + movaps XMMWORD PTR[(-8-64)+rax],xmm7 + movaps XMMWORD PTR[(-8-48)+rax],xmm8 + movaps XMMWORD PTR[(-8-32)+rax],xmm9 + movaps XMMWORD PTR[(-8-16)+rax],xmm10 +$L$prologue_shaext:: + lea rcx,QWORD PTR[((K256+128))] + movdqu xmm1,XMMWORD PTR[rdi] + movdqu xmm2,XMMWORD PTR[16+rdi] + movdqa xmm7,XMMWORD PTR[((512-128))+rcx] + + pshufd xmm0,xmm1,01bh + pshufd xmm1,xmm1,1h + pshufd xmm2,xmm2,01bh + movdqa xmm8,xmm7 +DB 102,15,58,15,202,8 + punpcklqdq xmm2,xmm0 + jmp $L$oop_shaext + +ALIGN 16 +$L$oop_shaext:: + movdqu xmm3,XMMWORD PTR[rsi] + movdqu xmm4,XMMWORD PTR[16+rsi] + movdqu xmm5,XMMWORD PTR[32+rsi] +DB 102,15,56,0,223 + movdqu xmm6,XMMWORD PTR[48+rsi] + + movdqa xmm0,XMMWORD PTR[((0-128))+rcx] + paddd xmm0,xmm3 +DB 102,15,56,0,231 + movdqa xmm10,xmm2 +DB 15,56,203,209 + pshufd xmm0,xmm0,00eh + nop + movdqa xmm9,xmm1 +DB 15,56,203,202 + + movdqa xmm0,XMMWORD PTR[((32-128))+rcx] + paddd xmm0,xmm4 +DB 102,15,56,0,239 +DB 15,56,203,209 + pshufd xmm0,xmm0,00eh + lea rsi,QWORD PTR[64+rsi] +DB 15,56,204,220 +DB 15,56,203,202 + + movdqa xmm0,XMMWORD PTR[((64-128))+rcx] + paddd xmm0,xmm5 +DB 102,15,56,0,247 +DB 15,56,203,209 + pshufd xmm0,xmm0,00eh + movdqa xmm7,xmm6 +DB 102,15,58,15,253,4 + nop + paddd xmm3,xmm7 +DB 15,56,204,229 +DB 15,56,203,202 + + movdqa xmm0,XMMWORD PTR[((96-128))+rcx] + paddd xmm0,xmm6 +DB 15,56,205,222 +DB 15,56,203,209 + pshufd xmm0,xmm0,00eh + movdqa xmm7,xmm3 +DB 102,15,58,15,254,4 + nop + paddd xmm4,xmm7 +DB 15,56,204,238 +DB 15,56,203,202 + movdqa xmm0,XMMWORD PTR[((128-128))+rcx] + paddd xmm0,xmm3 +DB 15,56,205,227 +DB 15,56,203,209 + pshufd xmm0,xmm0,00eh + movdqa xmm7,xmm4 +DB 102,15,58,15,251,4 + nop + paddd xmm5,xmm7 +DB 15,56,204,243 +DB 15,56,203,202 + movdqa xmm0,XMMWORD PTR[((160-128))+rcx] + paddd xmm0,xmm4 +DB 15,56,205,236 +DB 15,56,203,209 + pshufd xmm0,xmm0,00eh + movdqa xmm7,xmm5 +DB 102,15,58,15,252,4 + nop + paddd xmm6,xmm7 +DB 15,56,204,220 +DB 15,56,203,202 + movdqa xmm0,XMMWORD PTR[((192-128))+rcx] + paddd xmm0,xmm5 +DB 15,56,205,245 +DB 15,56,203,209 + pshufd xmm0,xmm0,00eh + movdqa xmm7,xmm6 +DB 102,15,58,15,253,4 + nop + paddd xmm3,xmm7 +DB 15,56,204,229 +DB 15,56,203,202 + movdqa xmm0,XMMWORD PTR[((224-128))+rcx] + paddd xmm0,xmm6 +DB 15,56,205,222 +DB 15,56,203,209 + pshufd xmm0,xmm0,00eh + movdqa xmm7,xmm3 +DB 102,15,58,15,254,4 + nop + paddd xmm4,xmm7 +DB 15,56,204,238 +DB 15,56,203,202 + movdqa xmm0,XMMWORD PTR[((256-128))+rcx] + paddd xmm0,xmm3 +DB 15,56,205,227 +DB 15,56,203,209 + pshufd xmm0,xmm0,00eh + movdqa xmm7,xmm4 +DB 102,15,58,15,251,4 + nop + paddd xmm5,xmm7 +DB 15,56,204,243 +DB 15,56,203,202 + movdqa xmm0,XMMWORD PTR[((288-128))+rcx] + paddd xmm0,xmm4 +DB 15,56,205,236 +DB 15,56,203,209 + pshufd xmm0,xmm0,00eh + movdqa xmm7,xmm5 +DB 102,15,58,15,252,4 + nop + paddd xmm6,xmm7 +DB 15,56,204,220 +DB 15,56,203,202 + movdqa xmm0,XMMWORD PTR[((320-128))+rcx] + paddd xmm0,xmm5 +DB 15,56,205,245 +DB 15,56,203,209 + pshufd xmm0,xmm0,00eh + movdqa xmm7,xmm6 +DB 102,15,58,15,253,4 + nop + paddd xmm3,xmm7 +DB 15,56,204,229 +DB 15,56,203,202 + movdqa xmm0,XMMWORD PTR[((352-128))+rcx] + paddd xmm0,xmm6 +DB 15,56,205,222 +DB 15,56,203,209 + pshufd xmm0,xmm0,00eh + movdqa xmm7,xmm3 +DB 102,15,58,15,254,4 + nop + paddd xmm4,xmm7 +DB 15,56,204,238 +DB 15,56,203,202 + movdqa xmm0,XMMWORD PTR[((384-128))+rcx] + paddd xmm0,xmm3 +DB 15,56,205,227 +DB 15,56,203,209 + pshufd xmm0,xmm0,00eh + movdqa xmm7,xmm4 +DB 102,15,58,15,251,4 + nop + paddd xmm5,xmm7 +DB 15,56,204,243 +DB 15,56,203,202 + movdqa xmm0,XMMWORD PTR[((416-128))+rcx] + paddd xmm0,xmm4 +DB 15,56,205,236 +DB 15,56,203,209 + pshufd xmm0,xmm0,00eh + movdqa xmm7,xmm5 +DB 102,15,58,15,252,4 +DB 15,56,203,202 + paddd xmm6,xmm7 + + movdqa xmm0,XMMWORD PTR[((448-128))+rcx] + paddd xmm0,xmm5 +DB 15,56,203,209 + pshufd xmm0,xmm0,00eh +DB 15,56,205,245 + movdqa xmm7,xmm8 +DB 15,56,203,202 + + movdqa xmm0,XMMWORD PTR[((480-128))+rcx] + paddd xmm0,xmm6 + nop +DB 15,56,203,209 + pshufd xmm0,xmm0,00eh + dec rdx + nop +DB 15,56,203,202 + + paddd xmm2,xmm10 + paddd xmm1,xmm9 + jnz $L$oop_shaext + + pshufd xmm2,xmm2,1h + pshufd xmm7,xmm1,01bh + pshufd xmm1,xmm1,1h + punpckhqdq xmm1,xmm2 +DB 102,15,58,15,215,8 + + movdqu XMMWORD PTR[rdi],xmm1 + movdqu XMMWORD PTR[16+rdi],xmm2 + movaps xmm6,XMMWORD PTR[((-8-80))+rax] + movaps xmm7,XMMWORD PTR[((-8-64))+rax] + movaps xmm8,XMMWORD PTR[((-8-48))+rax] + movaps xmm9,XMMWORD PTR[((-8-32))+rax] + movaps xmm10,XMMWORD PTR[((-8-16))+rax] + mov rsp,rax +$L$epilogue_shaext:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_sha256_block_data_order_shaext:: +sha256_block_data_order_shaext ENDP + +ALIGN 64 +sha256_block_data_order_ssse3 PROC PRIVATE + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_sha256_block_data_order_ssse3:: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + + +$L$ssse3_shortcut:: + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + mov r11,rsp + shl rdx,4 + sub rsp,160 + lea rdx,QWORD PTR[rdx*4+rsi] + and rsp,-64 + mov QWORD PTR[((64+0))+rsp],rdi + mov QWORD PTR[((64+8))+rsp],rsi + mov QWORD PTR[((64+16))+rsp],rdx + mov QWORD PTR[((64+24))+rsp],r11 + movaps XMMWORD PTR[(64+32)+rsp],xmm6 + movaps XMMWORD PTR[(64+48)+rsp],xmm7 + movaps XMMWORD PTR[(64+64)+rsp],xmm8 + movaps XMMWORD PTR[(64+80)+rsp],xmm9 +$L$prologue_ssse3:: + + mov eax,DWORD PTR[rdi] + mov ebx,DWORD PTR[4+rdi] + mov ecx,DWORD PTR[8+rdi] + mov edx,DWORD PTR[12+rdi] + mov r8d,DWORD PTR[16+rdi] + mov r9d,DWORD PTR[20+rdi] + mov r10d,DWORD PTR[24+rdi] + mov r11d,DWORD PTR[28+rdi] + + + jmp $L$loop_ssse3 +ALIGN 16 +$L$loop_ssse3:: + movdqa xmm7,XMMWORD PTR[((K256+512))] + movdqu xmm0,XMMWORD PTR[rsi] + movdqu xmm1,XMMWORD PTR[16+rsi] + movdqu xmm2,XMMWORD PTR[32+rsi] +DB 102,15,56,0,199 + movdqu xmm3,XMMWORD PTR[48+rsi] + lea rbp,QWORD PTR[K256] +DB 102,15,56,0,207 + movdqa xmm4,XMMWORD PTR[rbp] + movdqa xmm5,XMMWORD PTR[32+rbp] +DB 102,15,56,0,215 + paddd xmm4,xmm0 + movdqa xmm6,XMMWORD PTR[64+rbp] +DB 102,15,56,0,223 + movdqa xmm7,XMMWORD PTR[96+rbp] + paddd xmm5,xmm1 + paddd xmm6,xmm2 + paddd xmm7,xmm3 + movdqa XMMWORD PTR[rsp],xmm4 + mov r14d,eax + movdqa XMMWORD PTR[16+rsp],xmm5 + mov edi,ebx + movdqa XMMWORD PTR[32+rsp],xmm6 + xor edi,ecx + movdqa XMMWORD PTR[48+rsp],xmm7 + mov r13d,r8d + jmp $L$ssse3_00_47 + +ALIGN 16 +$L$ssse3_00_47:: + sub rbp,-128 + ror r13d,14 + movdqa xmm4,xmm1 + mov eax,r14d + mov r12d,r9d + movdqa xmm7,xmm3 + ror r14d,9 + xor r13d,r8d + xor r12d,r10d + ror r13d,5 + xor r14d,eax +DB 102,15,58,15,224,4 + and r12d,r8d + xor r13d,r8d +DB 102,15,58,15,250,4 + add r11d,DWORD PTR[rsp] + mov r15d,eax + xor r12d,r10d + ror r14d,11 + movdqa xmm5,xmm4 + xor r15d,ebx + add r11d,r12d + movdqa xmm6,xmm4 + ror r13d,6 + and edi,r15d + psrld xmm4,3 + xor r14d,eax + add r11d,r13d + xor edi,ebx + paddd xmm0,xmm7 + ror r14d,2 + add edx,r11d + psrld xmm6,7 + add r11d,edi + mov r13d,edx + pshufd xmm7,xmm3,250 + add r14d,r11d + ror r13d,14 + pslld xmm5,14 + mov r11d,r14d + mov r12d,r8d + pxor xmm4,xmm6 + ror r14d,9 + xor r13d,edx + xor r12d,r9d + ror r13d,5 + psrld xmm6,11 + xor r14d,r11d + pxor xmm4,xmm5 + and r12d,edx + xor r13d,edx + pslld xmm5,11 + add r10d,DWORD PTR[4+rsp] + mov edi,r11d + pxor xmm4,xmm6 + xor r12d,r9d + ror r14d,11 + movdqa xmm6,xmm7 + xor edi,eax + add r10d,r12d + pxor xmm4,xmm5 + ror r13d,6 + and r15d,edi + xor r14d,r11d + psrld xmm7,10 + add r10d,r13d + xor r15d,eax + paddd xmm0,xmm4 + ror r14d,2 + add ecx,r10d + psrlq xmm6,17 + add r10d,r15d + mov r13d,ecx + add r14d,r10d + pxor xmm7,xmm6 + ror r13d,14 + mov r10d,r14d + mov r12d,edx + ror r14d,9 + psrlq xmm6,2 + xor r13d,ecx + xor r12d,r8d + pxor xmm7,xmm6 + ror r13d,5 + xor r14d,r10d + and r12d,ecx + pshufd xmm7,xmm7,128 + xor r13d,ecx + add r9d,DWORD PTR[8+rsp] + mov r15d,r10d + psrldq xmm7,8 + xor r12d,r8d + ror r14d,11 + xor r15d,r11d + add r9d,r12d + ror r13d,6 + paddd xmm0,xmm7 + and edi,r15d + xor r14d,r10d + add r9d,r13d + pshufd xmm7,xmm0,80 + xor edi,r11d + ror r14d,2 + add ebx,r9d + movdqa xmm6,xmm7 + add r9d,edi + mov r13d,ebx + psrld xmm7,10 + add r14d,r9d + ror r13d,14 + psrlq xmm6,17 + mov r9d,r14d + mov r12d,ecx + pxor xmm7,xmm6 + ror r14d,9 + xor r13d,ebx + xor r12d,edx + ror r13d,5 + xor r14d,r9d + psrlq xmm6,2 + and r12d,ebx + xor r13d,ebx + add r8d,DWORD PTR[12+rsp] + pxor xmm7,xmm6 + mov edi,r9d + xor r12d,edx + ror r14d,11 + pshufd xmm7,xmm7,8 + xor edi,r10d + add r8d,r12d + movdqa xmm6,XMMWORD PTR[rbp] + ror r13d,6 + and r15d,edi + pslldq xmm7,8 + xor r14d,r9d + add r8d,r13d + xor r15d,r10d + paddd xmm0,xmm7 + ror r14d,2 + add eax,r8d + add r8d,r15d + paddd xmm6,xmm0 + mov r13d,eax + add r14d,r8d + movdqa XMMWORD PTR[rsp],xmm6 + ror r13d,14 + movdqa xmm4,xmm2 + mov r8d,r14d + mov r12d,ebx + movdqa xmm7,xmm0 + ror r14d,9 + xor r13d,eax + xor r12d,ecx + ror r13d,5 + xor r14d,r8d +DB 102,15,58,15,225,4 + and r12d,eax + xor r13d,eax +DB 102,15,58,15,251,4 + add edx,DWORD PTR[16+rsp] + mov r15d,r8d + xor r12d,ecx + ror r14d,11 + movdqa xmm5,xmm4 + xor r15d,r9d + add edx,r12d + movdqa xmm6,xmm4 + ror r13d,6 + and edi,r15d + psrld xmm4,3 + xor r14d,r8d + add edx,r13d + xor edi,r9d + paddd xmm1,xmm7 + ror r14d,2 + add r11d,edx + psrld xmm6,7 + add edx,edi + mov r13d,r11d + pshufd xmm7,xmm0,250 + add r14d,edx + ror r13d,14 + pslld xmm5,14 + mov edx,r14d + mov r12d,eax + pxor xmm4,xmm6 + ror r14d,9 + xor r13d,r11d + xor r12d,ebx + ror r13d,5 + psrld xmm6,11 + xor r14d,edx + pxor xmm4,xmm5 + and r12d,r11d + xor r13d,r11d + pslld xmm5,11 + add ecx,DWORD PTR[20+rsp] + mov edi,edx + pxor xmm4,xmm6 + xor r12d,ebx + ror r14d,11 + movdqa xmm6,xmm7 + xor edi,r8d + add ecx,r12d + pxor xmm4,xmm5 + ror r13d,6 + and r15d,edi + xor r14d,edx + psrld xmm7,10 + add ecx,r13d + xor r15d,r8d + paddd xmm1,xmm4 + ror r14d,2 + add r10d,ecx + psrlq xmm6,17 + add ecx,r15d + mov r13d,r10d + add r14d,ecx + pxor xmm7,xmm6 + ror r13d,14 + mov ecx,r14d + mov r12d,r11d + ror r14d,9 + psrlq xmm6,2 + xor r13d,r10d + xor r12d,eax + pxor xmm7,xmm6 + ror r13d,5 + xor r14d,ecx + and r12d,r10d + pshufd xmm7,xmm7,128 + xor r13d,r10d + add ebx,DWORD PTR[24+rsp] + mov r15d,ecx + psrldq xmm7,8 + xor r12d,eax + ror r14d,11 + xor r15d,edx + add ebx,r12d + ror r13d,6 + paddd xmm1,xmm7 + and edi,r15d + xor r14d,ecx + add ebx,r13d + pshufd xmm7,xmm1,80 + xor edi,edx + ror r14d,2 + add r9d,ebx + movdqa xmm6,xmm7 + add ebx,edi + mov r13d,r9d + psrld xmm7,10 + add r14d,ebx + ror r13d,14 + psrlq xmm6,17 + mov ebx,r14d + mov r12d,r10d + pxor xmm7,xmm6 + ror r14d,9 + xor r13d,r9d + xor r12d,r11d + ror r13d,5 + xor r14d,ebx + psrlq xmm6,2 + and r12d,r9d + xor r13d,r9d + add eax,DWORD PTR[28+rsp] + pxor xmm7,xmm6 + mov edi,ebx + xor r12d,r11d + ror r14d,11 + pshufd xmm7,xmm7,8 + xor edi,ecx + add eax,r12d + movdqa xmm6,XMMWORD PTR[32+rbp] + ror r13d,6 + and r15d,edi + pslldq xmm7,8 + xor r14d,ebx + add eax,r13d + xor r15d,ecx + paddd xmm1,xmm7 + ror r14d,2 + add r8d,eax + add eax,r15d + paddd xmm6,xmm1 + mov r13d,r8d + add r14d,eax + movdqa XMMWORD PTR[16+rsp],xmm6 + ror r13d,14 + movdqa xmm4,xmm3 + mov eax,r14d + mov r12d,r9d + movdqa xmm7,xmm1 + ror r14d,9 + xor r13d,r8d + xor r12d,r10d + ror r13d,5 + xor r14d,eax +DB 102,15,58,15,226,4 + and r12d,r8d + xor r13d,r8d +DB 102,15,58,15,248,4 + add r11d,DWORD PTR[32+rsp] + mov r15d,eax + xor r12d,r10d + ror r14d,11 + movdqa xmm5,xmm4 + xor r15d,ebx + add r11d,r12d + movdqa xmm6,xmm4 + ror r13d,6 + and edi,r15d + psrld xmm4,3 + xor r14d,eax + add r11d,r13d + xor edi,ebx + paddd xmm2,xmm7 + ror r14d,2 + add edx,r11d + psrld xmm6,7 + add r11d,edi + mov r13d,edx + pshufd xmm7,xmm1,250 + add r14d,r11d + ror r13d,14 + pslld xmm5,14 + mov r11d,r14d + mov r12d,r8d + pxor xmm4,xmm6 + ror r14d,9 + xor r13d,edx + xor r12d,r9d + ror r13d,5 + psrld xmm6,11 + xor r14d,r11d + pxor xmm4,xmm5 + and r12d,edx + xor r13d,edx + pslld xmm5,11 + add r10d,DWORD PTR[36+rsp] + mov edi,r11d + pxor xmm4,xmm6 + xor r12d,r9d + ror r14d,11 + movdqa xmm6,xmm7 + xor edi,eax + add r10d,r12d + pxor xmm4,xmm5 + ror r13d,6 + and r15d,edi + xor r14d,r11d + psrld xmm7,10 + add r10d,r13d + xor r15d,eax + paddd xmm2,xmm4 + ror r14d,2 + add ecx,r10d + psrlq xmm6,17 + add r10d,r15d + mov r13d,ecx + add r14d,r10d + pxor xmm7,xmm6 + ror r13d,14 + mov r10d,r14d + mov r12d,edx + ror r14d,9 + psrlq xmm6,2 + xor r13d,ecx + xor r12d,r8d + pxor xmm7,xmm6 + ror r13d,5 + xor r14d,r10d + and r12d,ecx + pshufd xmm7,xmm7,128 + xor r13d,ecx + add r9d,DWORD PTR[40+rsp] + mov r15d,r10d + psrldq xmm7,8 + xor r12d,r8d + ror r14d,11 + xor r15d,r11d + add r9d,r12d + ror r13d,6 + paddd xmm2,xmm7 + and edi,r15d + xor r14d,r10d + add r9d,r13d + pshufd xmm7,xmm2,80 + xor edi,r11d + ror r14d,2 + add ebx,r9d + movdqa xmm6,xmm7 + add r9d,edi + mov r13d,ebx + psrld xmm7,10 + add r14d,r9d + ror r13d,14 + psrlq xmm6,17 + mov r9d,r14d + mov r12d,ecx + pxor xmm7,xmm6 + ror r14d,9 + xor r13d,ebx + xor r12d,edx + ror r13d,5 + xor r14d,r9d + psrlq xmm6,2 + and r12d,ebx + xor r13d,ebx + add r8d,DWORD PTR[44+rsp] + pxor xmm7,xmm6 + mov edi,r9d + xor r12d,edx + ror r14d,11 + pshufd xmm7,xmm7,8 + xor edi,r10d + add r8d,r12d + movdqa xmm6,XMMWORD PTR[64+rbp] + ror r13d,6 + and r15d,edi + pslldq xmm7,8 + xor r14d,r9d + add r8d,r13d + xor r15d,r10d + paddd xmm2,xmm7 + ror r14d,2 + add eax,r8d + add r8d,r15d + paddd xmm6,xmm2 + mov r13d,eax + add r14d,r8d + movdqa XMMWORD PTR[32+rsp],xmm6 + ror r13d,14 + movdqa xmm4,xmm0 + mov r8d,r14d + mov r12d,ebx + movdqa xmm7,xmm2 + ror r14d,9 + xor r13d,eax + xor r12d,ecx + ror r13d,5 + xor r14d,r8d +DB 102,15,58,15,227,4 + and r12d,eax + xor r13d,eax +DB 102,15,58,15,249,4 + add edx,DWORD PTR[48+rsp] + mov r15d,r8d + xor r12d,ecx + ror r14d,11 + movdqa xmm5,xmm4 + xor r15d,r9d + add edx,r12d + movdqa xmm6,xmm4 + ror r13d,6 + and edi,r15d + psrld xmm4,3 + xor r14d,r8d + add edx,r13d + xor edi,r9d + paddd xmm3,xmm7 + ror r14d,2 + add r11d,edx + psrld xmm6,7 + add edx,edi + mov r13d,r11d + pshufd xmm7,xmm2,250 + add r14d,edx + ror r13d,14 + pslld xmm5,14 + mov edx,r14d + mov r12d,eax + pxor xmm4,xmm6 + ror r14d,9 + xor r13d,r11d + xor r12d,ebx + ror r13d,5 + psrld xmm6,11 + xor r14d,edx + pxor xmm4,xmm5 + and r12d,r11d + xor r13d,r11d + pslld xmm5,11 + add ecx,DWORD PTR[52+rsp] + mov edi,edx + pxor xmm4,xmm6 + xor r12d,ebx + ror r14d,11 + movdqa xmm6,xmm7 + xor edi,r8d + add ecx,r12d + pxor xmm4,xmm5 + ror r13d,6 + and r15d,edi + xor r14d,edx + psrld xmm7,10 + add ecx,r13d + xor r15d,r8d + paddd xmm3,xmm4 + ror r14d,2 + add r10d,ecx + psrlq xmm6,17 + add ecx,r15d + mov r13d,r10d + add r14d,ecx + pxor xmm7,xmm6 + ror r13d,14 + mov ecx,r14d + mov r12d,r11d + ror r14d,9 + psrlq xmm6,2 + xor r13d,r10d + xor r12d,eax + pxor xmm7,xmm6 + ror r13d,5 + xor r14d,ecx + and r12d,r10d + pshufd xmm7,xmm7,128 + xor r13d,r10d + add ebx,DWORD PTR[56+rsp] + mov r15d,ecx + psrldq xmm7,8 + xor r12d,eax + ror r14d,11 + xor r15d,edx + add ebx,r12d + ror r13d,6 + paddd xmm3,xmm7 + and edi,r15d + xor r14d,ecx + add ebx,r13d + pshufd xmm7,xmm3,80 + xor edi,edx + ror r14d,2 + add r9d,ebx + movdqa xmm6,xmm7 + add ebx,edi + mov r13d,r9d + psrld xmm7,10 + add r14d,ebx + ror r13d,14 + psrlq xmm6,17 + mov ebx,r14d + mov r12d,r10d + pxor xmm7,xmm6 + ror r14d,9 + xor r13d,r9d + xor r12d,r11d + ror r13d,5 + xor r14d,ebx + psrlq xmm6,2 + and r12d,r9d + xor r13d,r9d + add eax,DWORD PTR[60+rsp] + pxor xmm7,xmm6 + mov edi,ebx + xor r12d,r11d + ror r14d,11 + pshufd xmm7,xmm7,8 + xor edi,ecx + add eax,r12d + movdqa xmm6,XMMWORD PTR[96+rbp] + ror r13d,6 + and r15d,edi + pslldq xmm7,8 + xor r14d,ebx + add eax,r13d + xor r15d,ecx + paddd xmm3,xmm7 + ror r14d,2 + add r8d,eax + add eax,r15d + paddd xmm6,xmm3 + mov r13d,r8d + add r14d,eax + movdqa XMMWORD PTR[48+rsp],xmm6 + cmp BYTE PTR[131+rbp],0 + jne $L$ssse3_00_47 + ror r13d,14 + mov eax,r14d + mov r12d,r9d + ror r14d,9 + xor r13d,r8d + xor r12d,r10d + ror r13d,5 + xor r14d,eax + and r12d,r8d + xor r13d,r8d + add r11d,DWORD PTR[rsp] + mov r15d,eax + xor r12d,r10d + ror r14d,11 + xor r15d,ebx + add r11d,r12d + ror r13d,6 + and edi,r15d + xor r14d,eax + add r11d,r13d + xor edi,ebx + ror r14d,2 + add edx,r11d + add r11d,edi + mov r13d,edx + add r14d,r11d + ror r13d,14 + mov r11d,r14d + mov r12d,r8d + ror r14d,9 + xor r13d,edx + xor r12d,r9d + ror r13d,5 + xor r14d,r11d + and r12d,edx + xor r13d,edx + add r10d,DWORD PTR[4+rsp] + mov edi,r11d + xor r12d,r9d + ror r14d,11 + xor edi,eax + add r10d,r12d + ror r13d,6 + and r15d,edi + xor r14d,r11d + add r10d,r13d + xor r15d,eax + ror r14d,2 + add ecx,r10d + add r10d,r15d + mov r13d,ecx + add r14d,r10d + ror r13d,14 + mov r10d,r14d + mov r12d,edx + ror r14d,9 + xor r13d,ecx + xor r12d,r8d + ror r13d,5 + xor r14d,r10d + and r12d,ecx + xor r13d,ecx + add r9d,DWORD PTR[8+rsp] + mov r15d,r10d + xor r12d,r8d + ror r14d,11 + xor r15d,r11d + add r9d,r12d + ror r13d,6 + and edi,r15d + xor r14d,r10d + add r9d,r13d + xor edi,r11d + ror r14d,2 + add ebx,r9d + add r9d,edi + mov r13d,ebx + add r14d,r9d + ror r13d,14 + mov r9d,r14d + mov r12d,ecx + ror r14d,9 + xor r13d,ebx + xor r12d,edx + ror r13d,5 + xor r14d,r9d + and r12d,ebx + xor r13d,ebx + add r8d,DWORD PTR[12+rsp] + mov edi,r9d + xor r12d,edx + ror r14d,11 + xor edi,r10d + add r8d,r12d + ror r13d,6 + and r15d,edi + xor r14d,r9d + add r8d,r13d + xor r15d,r10d + ror r14d,2 + add eax,r8d + add r8d,r15d + mov r13d,eax + add r14d,r8d + ror r13d,14 + mov r8d,r14d + mov r12d,ebx + ror r14d,9 + xor r13d,eax + xor r12d,ecx + ror r13d,5 + xor r14d,r8d + and r12d,eax + xor r13d,eax + add edx,DWORD PTR[16+rsp] + mov r15d,r8d + xor r12d,ecx + ror r14d,11 + xor r15d,r9d + add edx,r12d + ror r13d,6 + and edi,r15d + xor r14d,r8d + add edx,r13d + xor edi,r9d + ror r14d,2 + add r11d,edx + add edx,edi + mov r13d,r11d + add r14d,edx + ror r13d,14 + mov edx,r14d + mov r12d,eax + ror r14d,9 + xor r13d,r11d + xor r12d,ebx + ror r13d,5 + xor r14d,edx + and r12d,r11d + xor r13d,r11d + add ecx,DWORD PTR[20+rsp] + mov edi,edx + xor r12d,ebx + ror r14d,11 + xor edi,r8d + add ecx,r12d + ror r13d,6 + and r15d,edi + xor r14d,edx + add ecx,r13d + xor r15d,r8d + ror r14d,2 + add r10d,ecx + add ecx,r15d + mov r13d,r10d + add r14d,ecx + ror r13d,14 + mov ecx,r14d + mov r12d,r11d + ror r14d,9 + xor r13d,r10d + xor r12d,eax + ror r13d,5 + xor r14d,ecx + and r12d,r10d + xor r13d,r10d + add ebx,DWORD PTR[24+rsp] + mov r15d,ecx + xor r12d,eax + ror r14d,11 + xor r15d,edx + add ebx,r12d + ror r13d,6 + and edi,r15d + xor r14d,ecx + add ebx,r13d + xor edi,edx + ror r14d,2 + add r9d,ebx + add ebx,edi + mov r13d,r9d + add r14d,ebx + ror r13d,14 + mov ebx,r14d + mov r12d,r10d + ror r14d,9 + xor r13d,r9d + xor r12d,r11d + ror r13d,5 + xor r14d,ebx + and r12d,r9d + xor r13d,r9d + add eax,DWORD PTR[28+rsp] + mov edi,ebx + xor r12d,r11d + ror r14d,11 + xor edi,ecx + add eax,r12d + ror r13d,6 + and r15d,edi + xor r14d,ebx + add eax,r13d + xor r15d,ecx + ror r14d,2 + add r8d,eax + add eax,r15d + mov r13d,r8d + add r14d,eax + ror r13d,14 + mov eax,r14d + mov r12d,r9d + ror r14d,9 + xor r13d,r8d + xor r12d,r10d + ror r13d,5 + xor r14d,eax + and r12d,r8d + xor r13d,r8d + add r11d,DWORD PTR[32+rsp] + mov r15d,eax + xor r12d,r10d + ror r14d,11 + xor r15d,ebx + add r11d,r12d + ror r13d,6 + and edi,r15d + xor r14d,eax + add r11d,r13d + xor edi,ebx + ror r14d,2 + add edx,r11d + add r11d,edi + mov r13d,edx + add r14d,r11d + ror r13d,14 + mov r11d,r14d + mov r12d,r8d + ror r14d,9 + xor r13d,edx + xor r12d,r9d + ror r13d,5 + xor r14d,r11d + and r12d,edx + xor r13d,edx + add r10d,DWORD PTR[36+rsp] + mov edi,r11d + xor r12d,r9d + ror r14d,11 + xor edi,eax + add r10d,r12d + ror r13d,6 + and r15d,edi + xor r14d,r11d + add r10d,r13d + xor r15d,eax + ror r14d,2 + add ecx,r10d + add r10d,r15d + mov r13d,ecx + add r14d,r10d + ror r13d,14 + mov r10d,r14d + mov r12d,edx + ror r14d,9 + xor r13d,ecx + xor r12d,r8d + ror r13d,5 + xor r14d,r10d + and r12d,ecx + xor r13d,ecx + add r9d,DWORD PTR[40+rsp] + mov r15d,r10d + xor r12d,r8d + ror r14d,11 + xor r15d,r11d + add r9d,r12d + ror r13d,6 + and edi,r15d + xor r14d,r10d + add r9d,r13d + xor edi,r11d + ror r14d,2 + add ebx,r9d + add r9d,edi + mov r13d,ebx + add r14d,r9d + ror r13d,14 + mov r9d,r14d + mov r12d,ecx + ror r14d,9 + xor r13d,ebx + xor r12d,edx + ror r13d,5 + xor r14d,r9d + and r12d,ebx + xor r13d,ebx + add r8d,DWORD PTR[44+rsp] + mov edi,r9d + xor r12d,edx + ror r14d,11 + xor edi,r10d + add r8d,r12d + ror r13d,6 + and r15d,edi + xor r14d,r9d + add r8d,r13d + xor r15d,r10d + ror r14d,2 + add eax,r8d + add r8d,r15d + mov r13d,eax + add r14d,r8d + ror r13d,14 + mov r8d,r14d + mov r12d,ebx + ror r14d,9 + xor r13d,eax + xor r12d,ecx + ror r13d,5 + xor r14d,r8d + and r12d,eax + xor r13d,eax + add edx,DWORD PTR[48+rsp] + mov r15d,r8d + xor r12d,ecx + ror r14d,11 + xor r15d,r9d + add edx,r12d + ror r13d,6 + and edi,r15d + xor r14d,r8d + add edx,r13d + xor edi,r9d + ror r14d,2 + add r11d,edx + add edx,edi + mov r13d,r11d + add r14d,edx + ror r13d,14 + mov edx,r14d + mov r12d,eax + ror r14d,9 + xor r13d,r11d + xor r12d,ebx + ror r13d,5 + xor r14d,edx + and r12d,r11d + xor r13d,r11d + add ecx,DWORD PTR[52+rsp] + mov edi,edx + xor r12d,ebx + ror r14d,11 + xor edi,r8d + add ecx,r12d + ror r13d,6 + and r15d,edi + xor r14d,edx + add ecx,r13d + xor r15d,r8d + ror r14d,2 + add r10d,ecx + add ecx,r15d + mov r13d,r10d + add r14d,ecx + ror r13d,14 + mov ecx,r14d + mov r12d,r11d + ror r14d,9 + xor r13d,r10d + xor r12d,eax + ror r13d,5 + xor r14d,ecx + and r12d,r10d + xor r13d,r10d + add ebx,DWORD PTR[56+rsp] + mov r15d,ecx + xor r12d,eax + ror r14d,11 + xor r15d,edx + add ebx,r12d + ror r13d,6 + and edi,r15d + xor r14d,ecx + add ebx,r13d + xor edi,edx + ror r14d,2 + add r9d,ebx + add ebx,edi + mov r13d,r9d + add r14d,ebx + ror r13d,14 + mov ebx,r14d + mov r12d,r10d + ror r14d,9 + xor r13d,r9d + xor r12d,r11d + ror r13d,5 + xor r14d,ebx + and r12d,r9d + xor r13d,r9d + add eax,DWORD PTR[60+rsp] + mov edi,ebx + xor r12d,r11d + ror r14d,11 + xor edi,ecx + add eax,r12d + ror r13d,6 + and r15d,edi + xor r14d,ebx + add eax,r13d + xor r15d,ecx + ror r14d,2 + add r8d,eax + add eax,r15d + mov r13d,r8d + add r14d,eax + mov rdi,QWORD PTR[((64+0))+rsp] + mov eax,r14d + + add eax,DWORD PTR[rdi] + lea rsi,QWORD PTR[64+rsi] + add ebx,DWORD PTR[4+rdi] + add ecx,DWORD PTR[8+rdi] + add edx,DWORD PTR[12+rdi] + add r8d,DWORD PTR[16+rdi] + add r9d,DWORD PTR[20+rdi] + add r10d,DWORD PTR[24+rdi] + add r11d,DWORD PTR[28+rdi] + + cmp rsi,QWORD PTR[((64+16))+rsp] + + mov DWORD PTR[rdi],eax + mov DWORD PTR[4+rdi],ebx + mov DWORD PTR[8+rdi],ecx + mov DWORD PTR[12+rdi],edx + mov DWORD PTR[16+rdi],r8d + mov DWORD PTR[20+rdi],r9d + mov DWORD PTR[24+rdi],r10d + mov DWORD PTR[28+rdi],r11d + jb $L$loop_ssse3 + + mov rsi,QWORD PTR[((64+24))+rsp] + movaps xmm6,XMMWORD PTR[((64+32))+rsp] + movaps xmm7,XMMWORD PTR[((64+48))+rsp] + movaps xmm8,XMMWORD PTR[((64+64))+rsp] + movaps xmm9,XMMWORD PTR[((64+80))+rsp] + mov r15,QWORD PTR[rsi] + mov r14,QWORD PTR[8+rsi] + mov r13,QWORD PTR[16+rsi] + mov r12,QWORD PTR[24+rsi] + mov rbp,QWORD PTR[32+rsi] + mov rbx,QWORD PTR[40+rsi] + lea rsp,QWORD PTR[48+rsi] +$L$epilogue_ssse3:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_sha256_block_data_order_ssse3:: +sha256_block_data_order_ssse3 ENDP +EXTERN __imp_RtlVirtualUnwind:NEAR + +ALIGN 16 +se_handler PROC PRIVATE + push rsi + push rdi + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + pushfq + sub rsp,64 + + mov rax,QWORD PTR[120+r8] + mov rbx,QWORD PTR[248+r8] + + mov rsi,QWORD PTR[8+r9] + mov r11,QWORD PTR[56+r9] + + mov r10d,DWORD PTR[r11] + lea r10,QWORD PTR[r10*1+rsi] + cmp rbx,r10 + jb $L$in_prologue + + mov rax,QWORD PTR[152+r8] + + mov r10d,DWORD PTR[4+r11] + lea r10,QWORD PTR[r10*1+rsi] + cmp rbx,r10 + jae $L$in_prologue + mov rsi,rax + mov rax,QWORD PTR[((64+24))+rax] + lea rax,QWORD PTR[48+rax] + + mov rbx,QWORD PTR[((-8))+rax] + mov rbp,QWORD PTR[((-16))+rax] + mov r12,QWORD PTR[((-24))+rax] + mov r13,QWORD PTR[((-32))+rax] + mov r14,QWORD PTR[((-40))+rax] + mov r15,QWORD PTR[((-48))+rax] + mov QWORD PTR[144+r8],rbx + mov QWORD PTR[160+r8],rbp + mov QWORD PTR[216+r8],r12 + mov QWORD PTR[224+r8],r13 + mov QWORD PTR[232+r8],r14 + mov QWORD PTR[240+r8],r15 + + lea r10,QWORD PTR[$L$epilogue] + cmp rbx,r10 + jb $L$in_prologue + + lea rsi,QWORD PTR[((64+32))+rsi] + lea rdi,QWORD PTR[512+r8] + mov ecx,8 + DD 0a548f3fch + +$L$in_prologue:: + mov rdi,QWORD PTR[8+rax] + mov rsi,QWORD PTR[16+rax] + mov QWORD PTR[152+r8],rax + mov QWORD PTR[168+r8],rsi + mov QWORD PTR[176+r8],rdi + + mov rdi,QWORD PTR[40+r9] + mov rsi,r8 + mov ecx,154 + DD 0a548f3fch + + mov rsi,r9 + xor rcx,rcx + mov rdx,QWORD PTR[8+rsi] + mov r8,QWORD PTR[rsi] + mov r9,QWORD PTR[16+rsi] + mov r10,QWORD PTR[40+rsi] + lea r11,QWORD PTR[56+rsi] + lea r12,QWORD PTR[24+rsi] + mov QWORD PTR[32+rsp],r10 + mov QWORD PTR[40+rsp],r11 + mov QWORD PTR[48+rsp],r12 + mov QWORD PTR[56+rsp],rcx + call QWORD PTR[__imp_RtlVirtualUnwind] + + mov eax,1 + add rsp,64 + popfq + pop r15 + pop r14 + pop r13 + pop r12 + pop rbp + pop rbx + pop rdi + pop rsi + DB 0F3h,0C3h ;repret +se_handler ENDP + +ALIGN 16 +shaext_handler PROC PRIVATE + push rsi + push rdi + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + pushfq + sub rsp,64 + + mov rax,QWORD PTR[120+r8] + mov rbx,QWORD PTR[248+r8] + + lea r10,QWORD PTR[$L$prologue_shaext] + cmp rbx,r10 + jb $L$in_prologue + + lea r10,QWORD PTR[$L$epilogue_shaext] + cmp rbx,r10 + jae $L$in_prologue + + lea rsi,QWORD PTR[((-8-80))+rax] + lea rdi,QWORD PTR[512+r8] + mov ecx,10 + DD 0a548f3fch + + jmp $L$in_prologue +shaext_handler ENDP +.text$ ENDS +.pdata SEGMENT READONLY ALIGN(4) +ALIGN 4 + DD imagerel $L$SEH_begin_sha256_block_data_order + DD imagerel $L$SEH_end_sha256_block_data_order + DD imagerel $L$SEH_info_sha256_block_data_order + DD imagerel $L$SEH_begin_sha256_block_data_order_shaext + DD imagerel $L$SEH_end_sha256_block_data_order_shaext + DD imagerel $L$SEH_info_sha256_block_data_order_shaext + DD imagerel $L$SEH_begin_sha256_block_data_order_ssse3 + DD imagerel $L$SEH_end_sha256_block_data_order_ssse3 + DD imagerel $L$SEH_info_sha256_block_data_order_ssse3 +.pdata ENDS +.xdata SEGMENT READONLY ALIGN(8) +ALIGN 8 +$L$SEH_info_sha256_block_data_order:: +DB 9,0,0,0 + DD imagerel se_handler + DD imagerel $L$prologue,imagerel $L$epilogue +$L$SEH_info_sha256_block_data_order_shaext:: +DB 9,0,0,0 + DD imagerel shaext_handler +$L$SEH_info_sha256_block_data_order_ssse3:: +DB 9,0,0,0 + DD imagerel se_handler + DD imagerel $L$prologue_ssse3,imagerel $L$epilogue_ssse3 + +.xdata ENDS +END diff --git a/deps/openssl/asm_obsolete/x64-win32-masm/sha/sha512-x86_64.asm b/deps/openssl/asm_obsolete/x64-win32-masm/sha/sha512-x86_64.asm new file mode 100644 index 00000000000000..e993c3c35f336e --- /dev/null +++ b/deps/openssl/asm_obsolete/x64-win32-masm/sha/sha512-x86_64.asm @@ -0,0 +1,1913 @@ +OPTION DOTNAME +.text$ SEGMENT ALIGN(256) 'CODE' + +EXTERN OPENSSL_ia32cap_P:NEAR +PUBLIC sha512_block_data_order + +ALIGN 16 +sha512_block_data_order PROC PUBLIC + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_sha512_block_data_order:: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + + + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + mov r11,rsp + shl rdx,4 + sub rsp,16*8+4*8 + lea rdx,QWORD PTR[rdx*8+rsi] + and rsp,-64 + mov QWORD PTR[((128+0))+rsp],rdi + mov QWORD PTR[((128+8))+rsp],rsi + mov QWORD PTR[((128+16))+rsp],rdx + mov QWORD PTR[((128+24))+rsp],r11 +$L$prologue:: + + mov rax,QWORD PTR[rdi] + mov rbx,QWORD PTR[8+rdi] + mov rcx,QWORD PTR[16+rdi] + mov rdx,QWORD PTR[24+rdi] + mov r8,QWORD PTR[32+rdi] + mov r9,QWORD PTR[40+rdi] + mov r10,QWORD PTR[48+rdi] + mov r11,QWORD PTR[56+rdi] + jmp $L$loop + +ALIGN 16 +$L$loop:: + mov rdi,rbx + lea rbp,QWORD PTR[K512] + xor rdi,rcx + mov r12,QWORD PTR[rsi] + mov r13,r8 + mov r14,rax + bswap r12 + ror r13,23 + mov r15,r9 + + xor r13,r8 + ror r14,5 + xor r15,r10 + + mov QWORD PTR[rsp],r12 + xor r14,rax + and r15,r8 + + ror r13,4 + add r12,r11 + xor r15,r10 + + ror r14,6 + xor r13,r8 + add r12,r15 + + mov r15,rax + add r12,QWORD PTR[rbp] + xor r14,rax + + xor r15,rbx + ror r13,14 + mov r11,rbx + + and rdi,r15 + ror r14,28 + add r12,r13 + + xor r11,rdi + add rdx,r12 + add r11,r12 + + lea rbp,QWORD PTR[8+rbp] + add r11,r14 + mov r12,QWORD PTR[8+rsi] + mov r13,rdx + mov r14,r11 + bswap r12 + ror r13,23 + mov rdi,r8 + + xor r13,rdx + ror r14,5 + xor rdi,r9 + + mov QWORD PTR[8+rsp],r12 + xor r14,r11 + and rdi,rdx + + ror r13,4 + add r12,r10 + xor rdi,r9 + + ror r14,6 + xor r13,rdx + add r12,rdi + + mov rdi,r11 + add r12,QWORD PTR[rbp] + xor r14,r11 + + xor rdi,rax + ror r13,14 + mov r10,rax + + and r15,rdi + ror r14,28 + add r12,r13 + + xor r10,r15 + add rcx,r12 + add r10,r12 + + lea rbp,QWORD PTR[24+rbp] + add r10,r14 + mov r12,QWORD PTR[16+rsi] + mov r13,rcx + mov r14,r10 + bswap r12 + ror r13,23 + mov r15,rdx + + xor r13,rcx + ror r14,5 + xor r15,r8 + + mov QWORD PTR[16+rsp],r12 + xor r14,r10 + and r15,rcx + + ror r13,4 + add r12,r9 + xor r15,r8 + + ror r14,6 + xor r13,rcx + add r12,r15 + + mov r15,r10 + add r12,QWORD PTR[rbp] + xor r14,r10 + + xor r15,r11 + ror r13,14 + mov r9,r11 + + and rdi,r15 + ror r14,28 + add r12,r13 + + xor r9,rdi + add rbx,r12 + add r9,r12 + + lea rbp,QWORD PTR[8+rbp] + add r9,r14 + mov r12,QWORD PTR[24+rsi] + mov r13,rbx + mov r14,r9 + bswap r12 + ror r13,23 + mov rdi,rcx + + xor r13,rbx + ror r14,5 + xor rdi,rdx + + mov QWORD PTR[24+rsp],r12 + xor r14,r9 + and rdi,rbx + + ror r13,4 + add r12,r8 + xor rdi,rdx + + ror r14,6 + xor r13,rbx + add r12,rdi + + mov rdi,r9 + add r12,QWORD PTR[rbp] + xor r14,r9 + + xor rdi,r10 + ror r13,14 + mov r8,r10 + + and r15,rdi + ror r14,28 + add r12,r13 + + xor r8,r15 + add rax,r12 + add r8,r12 + + lea rbp,QWORD PTR[24+rbp] + add r8,r14 + mov r12,QWORD PTR[32+rsi] + mov r13,rax + mov r14,r8 + bswap r12 + ror r13,23 + mov r15,rbx + + xor r13,rax + ror r14,5 + xor r15,rcx + + mov QWORD PTR[32+rsp],r12 + xor r14,r8 + and r15,rax + + ror r13,4 + add r12,rdx + xor r15,rcx + + ror r14,6 + xor r13,rax + add r12,r15 + + mov r15,r8 + add r12,QWORD PTR[rbp] + xor r14,r8 + + xor r15,r9 + ror r13,14 + mov rdx,r9 + + and rdi,r15 + ror r14,28 + add r12,r13 + + xor rdx,rdi + add r11,r12 + add rdx,r12 + + lea rbp,QWORD PTR[8+rbp] + add rdx,r14 + mov r12,QWORD PTR[40+rsi] + mov r13,r11 + mov r14,rdx + bswap r12 + ror r13,23 + mov rdi,rax + + xor r13,r11 + ror r14,5 + xor rdi,rbx + + mov QWORD PTR[40+rsp],r12 + xor r14,rdx + and rdi,r11 + + ror r13,4 + add r12,rcx + xor rdi,rbx + + ror r14,6 + xor r13,r11 + add r12,rdi + + mov rdi,rdx + add r12,QWORD PTR[rbp] + xor r14,rdx + + xor rdi,r8 + ror r13,14 + mov rcx,r8 + + and r15,rdi + ror r14,28 + add r12,r13 + + xor rcx,r15 + add r10,r12 + add rcx,r12 + + lea rbp,QWORD PTR[24+rbp] + add rcx,r14 + mov r12,QWORD PTR[48+rsi] + mov r13,r10 + mov r14,rcx + bswap r12 + ror r13,23 + mov r15,r11 + + xor r13,r10 + ror r14,5 + xor r15,rax + + mov QWORD PTR[48+rsp],r12 + xor r14,rcx + and r15,r10 + + ror r13,4 + add r12,rbx + xor r15,rax + + ror r14,6 + xor r13,r10 + add r12,r15 + + mov r15,rcx + add r12,QWORD PTR[rbp] + xor r14,rcx + + xor r15,rdx + ror r13,14 + mov rbx,rdx + + and rdi,r15 + ror r14,28 + add r12,r13 + + xor rbx,rdi + add r9,r12 + add rbx,r12 + + lea rbp,QWORD PTR[8+rbp] + add rbx,r14 + mov r12,QWORD PTR[56+rsi] + mov r13,r9 + mov r14,rbx + bswap r12 + ror r13,23 + mov rdi,r10 + + xor r13,r9 + ror r14,5 + xor rdi,r11 + + mov QWORD PTR[56+rsp],r12 + xor r14,rbx + and rdi,r9 + + ror r13,4 + add r12,rax + xor rdi,r11 + + ror r14,6 + xor r13,r9 + add r12,rdi + + mov rdi,rbx + add r12,QWORD PTR[rbp] + xor r14,rbx + + xor rdi,rcx + ror r13,14 + mov rax,rcx + + and r15,rdi + ror r14,28 + add r12,r13 + + xor rax,r15 + add r8,r12 + add rax,r12 + + lea rbp,QWORD PTR[24+rbp] + add rax,r14 + mov r12,QWORD PTR[64+rsi] + mov r13,r8 + mov r14,rax + bswap r12 + ror r13,23 + mov r15,r9 + + xor r13,r8 + ror r14,5 + xor r15,r10 + + mov QWORD PTR[64+rsp],r12 + xor r14,rax + and r15,r8 + + ror r13,4 + add r12,r11 + xor r15,r10 + + ror r14,6 + xor r13,r8 + add r12,r15 + + mov r15,rax + add r12,QWORD PTR[rbp] + xor r14,rax + + xor r15,rbx + ror r13,14 + mov r11,rbx + + and rdi,r15 + ror r14,28 + add r12,r13 + + xor r11,rdi + add rdx,r12 + add r11,r12 + + lea rbp,QWORD PTR[8+rbp] + add r11,r14 + mov r12,QWORD PTR[72+rsi] + mov r13,rdx + mov r14,r11 + bswap r12 + ror r13,23 + mov rdi,r8 + + xor r13,rdx + ror r14,5 + xor rdi,r9 + + mov QWORD PTR[72+rsp],r12 + xor r14,r11 + and rdi,rdx + + ror r13,4 + add r12,r10 + xor rdi,r9 + + ror r14,6 + xor r13,rdx + add r12,rdi + + mov rdi,r11 + add r12,QWORD PTR[rbp] + xor r14,r11 + + xor rdi,rax + ror r13,14 + mov r10,rax + + and r15,rdi + ror r14,28 + add r12,r13 + + xor r10,r15 + add rcx,r12 + add r10,r12 + + lea rbp,QWORD PTR[24+rbp] + add r10,r14 + mov r12,QWORD PTR[80+rsi] + mov r13,rcx + mov r14,r10 + bswap r12 + ror r13,23 + mov r15,rdx + + xor r13,rcx + ror r14,5 + xor r15,r8 + + mov QWORD PTR[80+rsp],r12 + xor r14,r10 + and r15,rcx + + ror r13,4 + add r12,r9 + xor r15,r8 + + ror r14,6 + xor r13,rcx + add r12,r15 + + mov r15,r10 + add r12,QWORD PTR[rbp] + xor r14,r10 + + xor r15,r11 + ror r13,14 + mov r9,r11 + + and rdi,r15 + ror r14,28 + add r12,r13 + + xor r9,rdi + add rbx,r12 + add r9,r12 + + lea rbp,QWORD PTR[8+rbp] + add r9,r14 + mov r12,QWORD PTR[88+rsi] + mov r13,rbx + mov r14,r9 + bswap r12 + ror r13,23 + mov rdi,rcx + + xor r13,rbx + ror r14,5 + xor rdi,rdx + + mov QWORD PTR[88+rsp],r12 + xor r14,r9 + and rdi,rbx + + ror r13,4 + add r12,r8 + xor rdi,rdx + + ror r14,6 + xor r13,rbx + add r12,rdi + + mov rdi,r9 + add r12,QWORD PTR[rbp] + xor r14,r9 + + xor rdi,r10 + ror r13,14 + mov r8,r10 + + and r15,rdi + ror r14,28 + add r12,r13 + + xor r8,r15 + add rax,r12 + add r8,r12 + + lea rbp,QWORD PTR[24+rbp] + add r8,r14 + mov r12,QWORD PTR[96+rsi] + mov r13,rax + mov r14,r8 + bswap r12 + ror r13,23 + mov r15,rbx + + xor r13,rax + ror r14,5 + xor r15,rcx + + mov QWORD PTR[96+rsp],r12 + xor r14,r8 + and r15,rax + + ror r13,4 + add r12,rdx + xor r15,rcx + + ror r14,6 + xor r13,rax + add r12,r15 + + mov r15,r8 + add r12,QWORD PTR[rbp] + xor r14,r8 + + xor r15,r9 + ror r13,14 + mov rdx,r9 + + and rdi,r15 + ror r14,28 + add r12,r13 + + xor rdx,rdi + add r11,r12 + add rdx,r12 + + lea rbp,QWORD PTR[8+rbp] + add rdx,r14 + mov r12,QWORD PTR[104+rsi] + mov r13,r11 + mov r14,rdx + bswap r12 + ror r13,23 + mov rdi,rax + + xor r13,r11 + ror r14,5 + xor rdi,rbx + + mov QWORD PTR[104+rsp],r12 + xor r14,rdx + and rdi,r11 + + ror r13,4 + add r12,rcx + xor rdi,rbx + + ror r14,6 + xor r13,r11 + add r12,rdi + + mov rdi,rdx + add r12,QWORD PTR[rbp] + xor r14,rdx + + xor rdi,r8 + ror r13,14 + mov rcx,r8 + + and r15,rdi + ror r14,28 + add r12,r13 + + xor rcx,r15 + add r10,r12 + add rcx,r12 + + lea rbp,QWORD PTR[24+rbp] + add rcx,r14 + mov r12,QWORD PTR[112+rsi] + mov r13,r10 + mov r14,rcx + bswap r12 + ror r13,23 + mov r15,r11 + + xor r13,r10 + ror r14,5 + xor r15,rax + + mov QWORD PTR[112+rsp],r12 + xor r14,rcx + and r15,r10 + + ror r13,4 + add r12,rbx + xor r15,rax + + ror r14,6 + xor r13,r10 + add r12,r15 + + mov r15,rcx + add r12,QWORD PTR[rbp] + xor r14,rcx + + xor r15,rdx + ror r13,14 + mov rbx,rdx + + and rdi,r15 + ror r14,28 + add r12,r13 + + xor rbx,rdi + add r9,r12 + add rbx,r12 + + lea rbp,QWORD PTR[8+rbp] + add rbx,r14 + mov r12,QWORD PTR[120+rsi] + mov r13,r9 + mov r14,rbx + bswap r12 + ror r13,23 + mov rdi,r10 + + xor r13,r9 + ror r14,5 + xor rdi,r11 + + mov QWORD PTR[120+rsp],r12 + xor r14,rbx + and rdi,r9 + + ror r13,4 + add r12,rax + xor rdi,r11 + + ror r14,6 + xor r13,r9 + add r12,rdi + + mov rdi,rbx + add r12,QWORD PTR[rbp] + xor r14,rbx + + xor rdi,rcx + ror r13,14 + mov rax,rcx + + and r15,rdi + ror r14,28 + add r12,r13 + + xor rax,r15 + add r8,r12 + add rax,r12 + + lea rbp,QWORD PTR[24+rbp] + jmp $L$rounds_16_xx +ALIGN 16 +$L$rounds_16_xx:: + mov r13,QWORD PTR[8+rsp] + mov r15,QWORD PTR[112+rsp] + + mov r12,r13 + ror r13,7 + add rax,r14 + mov r14,r15 + ror r15,42 + + xor r13,r12 + shr r12,7 + ror r13,1 + xor r15,r14 + shr r14,6 + + ror r15,19 + xor r12,r13 + xor r15,r14 + add r12,QWORD PTR[72+rsp] + + add r12,QWORD PTR[rsp] + mov r13,r8 + add r12,r15 + mov r14,rax + ror r13,23 + mov r15,r9 + + xor r13,r8 + ror r14,5 + xor r15,r10 + + mov QWORD PTR[rsp],r12 + xor r14,rax + and r15,r8 + + ror r13,4 + add r12,r11 + xor r15,r10 + + ror r14,6 + xor r13,r8 + add r12,r15 + + mov r15,rax + add r12,QWORD PTR[rbp] + xor r14,rax + + xor r15,rbx + ror r13,14 + mov r11,rbx + + and rdi,r15 + ror r14,28 + add r12,r13 + + xor r11,rdi + add rdx,r12 + add r11,r12 + + lea rbp,QWORD PTR[8+rbp] + mov r13,QWORD PTR[16+rsp] + mov rdi,QWORD PTR[120+rsp] + + mov r12,r13 + ror r13,7 + add r11,r14 + mov r14,rdi + ror rdi,42 + + xor r13,r12 + shr r12,7 + ror r13,1 + xor rdi,r14 + shr r14,6 + + ror rdi,19 + xor r12,r13 + xor rdi,r14 + add r12,QWORD PTR[80+rsp] + + add r12,QWORD PTR[8+rsp] + mov r13,rdx + add r12,rdi + mov r14,r11 + ror r13,23 + mov rdi,r8 + + xor r13,rdx + ror r14,5 + xor rdi,r9 + + mov QWORD PTR[8+rsp],r12 + xor r14,r11 + and rdi,rdx + + ror r13,4 + add r12,r10 + xor rdi,r9 + + ror r14,6 + xor r13,rdx + add r12,rdi + + mov rdi,r11 + add r12,QWORD PTR[rbp] + xor r14,r11 + + xor rdi,rax + ror r13,14 + mov r10,rax + + and r15,rdi + ror r14,28 + add r12,r13 + + xor r10,r15 + add rcx,r12 + add r10,r12 + + lea rbp,QWORD PTR[24+rbp] + mov r13,QWORD PTR[24+rsp] + mov r15,QWORD PTR[rsp] + + mov r12,r13 + ror r13,7 + add r10,r14 + mov r14,r15 + ror r15,42 + + xor r13,r12 + shr r12,7 + ror r13,1 + xor r15,r14 + shr r14,6 + + ror r15,19 + xor r12,r13 + xor r15,r14 + add r12,QWORD PTR[88+rsp] + + add r12,QWORD PTR[16+rsp] + mov r13,rcx + add r12,r15 + mov r14,r10 + ror r13,23 + mov r15,rdx + + xor r13,rcx + ror r14,5 + xor r15,r8 + + mov QWORD PTR[16+rsp],r12 + xor r14,r10 + and r15,rcx + + ror r13,4 + add r12,r9 + xor r15,r8 + + ror r14,6 + xor r13,rcx + add r12,r15 + + mov r15,r10 + add r12,QWORD PTR[rbp] + xor r14,r10 + + xor r15,r11 + ror r13,14 + mov r9,r11 + + and rdi,r15 + ror r14,28 + add r12,r13 + + xor r9,rdi + add rbx,r12 + add r9,r12 + + lea rbp,QWORD PTR[8+rbp] + mov r13,QWORD PTR[32+rsp] + mov rdi,QWORD PTR[8+rsp] + + mov r12,r13 + ror r13,7 + add r9,r14 + mov r14,rdi + ror rdi,42 + + xor r13,r12 + shr r12,7 + ror r13,1 + xor rdi,r14 + shr r14,6 + + ror rdi,19 + xor r12,r13 + xor rdi,r14 + add r12,QWORD PTR[96+rsp] + + add r12,QWORD PTR[24+rsp] + mov r13,rbx + add r12,rdi + mov r14,r9 + ror r13,23 + mov rdi,rcx + + xor r13,rbx + ror r14,5 + xor rdi,rdx + + mov QWORD PTR[24+rsp],r12 + xor r14,r9 + and rdi,rbx + + ror r13,4 + add r12,r8 + xor rdi,rdx + + ror r14,6 + xor r13,rbx + add r12,rdi + + mov rdi,r9 + add r12,QWORD PTR[rbp] + xor r14,r9 + + xor rdi,r10 + ror r13,14 + mov r8,r10 + + and r15,rdi + ror r14,28 + add r12,r13 + + xor r8,r15 + add rax,r12 + add r8,r12 + + lea rbp,QWORD PTR[24+rbp] + mov r13,QWORD PTR[40+rsp] + mov r15,QWORD PTR[16+rsp] + + mov r12,r13 + ror r13,7 + add r8,r14 + mov r14,r15 + ror r15,42 + + xor r13,r12 + shr r12,7 + ror r13,1 + xor r15,r14 + shr r14,6 + + ror r15,19 + xor r12,r13 + xor r15,r14 + add r12,QWORD PTR[104+rsp] + + add r12,QWORD PTR[32+rsp] + mov r13,rax + add r12,r15 + mov r14,r8 + ror r13,23 + mov r15,rbx + + xor r13,rax + ror r14,5 + xor r15,rcx + + mov QWORD PTR[32+rsp],r12 + xor r14,r8 + and r15,rax + + ror r13,4 + add r12,rdx + xor r15,rcx + + ror r14,6 + xor r13,rax + add r12,r15 + + mov r15,r8 + add r12,QWORD PTR[rbp] + xor r14,r8 + + xor r15,r9 + ror r13,14 + mov rdx,r9 + + and rdi,r15 + ror r14,28 + add r12,r13 + + xor rdx,rdi + add r11,r12 + add rdx,r12 + + lea rbp,QWORD PTR[8+rbp] + mov r13,QWORD PTR[48+rsp] + mov rdi,QWORD PTR[24+rsp] + + mov r12,r13 + ror r13,7 + add rdx,r14 + mov r14,rdi + ror rdi,42 + + xor r13,r12 + shr r12,7 + ror r13,1 + xor rdi,r14 + shr r14,6 + + ror rdi,19 + xor r12,r13 + xor rdi,r14 + add r12,QWORD PTR[112+rsp] + + add r12,QWORD PTR[40+rsp] + mov r13,r11 + add r12,rdi + mov r14,rdx + ror r13,23 + mov rdi,rax + + xor r13,r11 + ror r14,5 + xor rdi,rbx + + mov QWORD PTR[40+rsp],r12 + xor r14,rdx + and rdi,r11 + + ror r13,4 + add r12,rcx + xor rdi,rbx + + ror r14,6 + xor r13,r11 + add r12,rdi + + mov rdi,rdx + add r12,QWORD PTR[rbp] + xor r14,rdx + + xor rdi,r8 + ror r13,14 + mov rcx,r8 + + and r15,rdi + ror r14,28 + add r12,r13 + + xor rcx,r15 + add r10,r12 + add rcx,r12 + + lea rbp,QWORD PTR[24+rbp] + mov r13,QWORD PTR[56+rsp] + mov r15,QWORD PTR[32+rsp] + + mov r12,r13 + ror r13,7 + add rcx,r14 + mov r14,r15 + ror r15,42 + + xor r13,r12 + shr r12,7 + ror r13,1 + xor r15,r14 + shr r14,6 + + ror r15,19 + xor r12,r13 + xor r15,r14 + add r12,QWORD PTR[120+rsp] + + add r12,QWORD PTR[48+rsp] + mov r13,r10 + add r12,r15 + mov r14,rcx + ror r13,23 + mov r15,r11 + + xor r13,r10 + ror r14,5 + xor r15,rax + + mov QWORD PTR[48+rsp],r12 + xor r14,rcx + and r15,r10 + + ror r13,4 + add r12,rbx + xor r15,rax + + ror r14,6 + xor r13,r10 + add r12,r15 + + mov r15,rcx + add r12,QWORD PTR[rbp] + xor r14,rcx + + xor r15,rdx + ror r13,14 + mov rbx,rdx + + and rdi,r15 + ror r14,28 + add r12,r13 + + xor rbx,rdi + add r9,r12 + add rbx,r12 + + lea rbp,QWORD PTR[8+rbp] + mov r13,QWORD PTR[64+rsp] + mov rdi,QWORD PTR[40+rsp] + + mov r12,r13 + ror r13,7 + add rbx,r14 + mov r14,rdi + ror rdi,42 + + xor r13,r12 + shr r12,7 + ror r13,1 + xor rdi,r14 + shr r14,6 + + ror rdi,19 + xor r12,r13 + xor rdi,r14 + add r12,QWORD PTR[rsp] + + add r12,QWORD PTR[56+rsp] + mov r13,r9 + add r12,rdi + mov r14,rbx + ror r13,23 + mov rdi,r10 + + xor r13,r9 + ror r14,5 + xor rdi,r11 + + mov QWORD PTR[56+rsp],r12 + xor r14,rbx + and rdi,r9 + + ror r13,4 + add r12,rax + xor rdi,r11 + + ror r14,6 + xor r13,r9 + add r12,rdi + + mov rdi,rbx + add r12,QWORD PTR[rbp] + xor r14,rbx + + xor rdi,rcx + ror r13,14 + mov rax,rcx + + and r15,rdi + ror r14,28 + add r12,r13 + + xor rax,r15 + add r8,r12 + add rax,r12 + + lea rbp,QWORD PTR[24+rbp] + mov r13,QWORD PTR[72+rsp] + mov r15,QWORD PTR[48+rsp] + + mov r12,r13 + ror r13,7 + add rax,r14 + mov r14,r15 + ror r15,42 + + xor r13,r12 + shr r12,7 + ror r13,1 + xor r15,r14 + shr r14,6 + + ror r15,19 + xor r12,r13 + xor r15,r14 + add r12,QWORD PTR[8+rsp] + + add r12,QWORD PTR[64+rsp] + mov r13,r8 + add r12,r15 + mov r14,rax + ror r13,23 + mov r15,r9 + + xor r13,r8 + ror r14,5 + xor r15,r10 + + mov QWORD PTR[64+rsp],r12 + xor r14,rax + and r15,r8 + + ror r13,4 + add r12,r11 + xor r15,r10 + + ror r14,6 + xor r13,r8 + add r12,r15 + + mov r15,rax + add r12,QWORD PTR[rbp] + xor r14,rax + + xor r15,rbx + ror r13,14 + mov r11,rbx + + and rdi,r15 + ror r14,28 + add r12,r13 + + xor r11,rdi + add rdx,r12 + add r11,r12 + + lea rbp,QWORD PTR[8+rbp] + mov r13,QWORD PTR[80+rsp] + mov rdi,QWORD PTR[56+rsp] + + mov r12,r13 + ror r13,7 + add r11,r14 + mov r14,rdi + ror rdi,42 + + xor r13,r12 + shr r12,7 + ror r13,1 + xor rdi,r14 + shr r14,6 + + ror rdi,19 + xor r12,r13 + xor rdi,r14 + add r12,QWORD PTR[16+rsp] + + add r12,QWORD PTR[72+rsp] + mov r13,rdx + add r12,rdi + mov r14,r11 + ror r13,23 + mov rdi,r8 + + xor r13,rdx + ror r14,5 + xor rdi,r9 + + mov QWORD PTR[72+rsp],r12 + xor r14,r11 + and rdi,rdx + + ror r13,4 + add r12,r10 + xor rdi,r9 + + ror r14,6 + xor r13,rdx + add r12,rdi + + mov rdi,r11 + add r12,QWORD PTR[rbp] + xor r14,r11 + + xor rdi,rax + ror r13,14 + mov r10,rax + + and r15,rdi + ror r14,28 + add r12,r13 + + xor r10,r15 + add rcx,r12 + add r10,r12 + + lea rbp,QWORD PTR[24+rbp] + mov r13,QWORD PTR[88+rsp] + mov r15,QWORD PTR[64+rsp] + + mov r12,r13 + ror r13,7 + add r10,r14 + mov r14,r15 + ror r15,42 + + xor r13,r12 + shr r12,7 + ror r13,1 + xor r15,r14 + shr r14,6 + + ror r15,19 + xor r12,r13 + xor r15,r14 + add r12,QWORD PTR[24+rsp] + + add r12,QWORD PTR[80+rsp] + mov r13,rcx + add r12,r15 + mov r14,r10 + ror r13,23 + mov r15,rdx + + xor r13,rcx + ror r14,5 + xor r15,r8 + + mov QWORD PTR[80+rsp],r12 + xor r14,r10 + and r15,rcx + + ror r13,4 + add r12,r9 + xor r15,r8 + + ror r14,6 + xor r13,rcx + add r12,r15 + + mov r15,r10 + add r12,QWORD PTR[rbp] + xor r14,r10 + + xor r15,r11 + ror r13,14 + mov r9,r11 + + and rdi,r15 + ror r14,28 + add r12,r13 + + xor r9,rdi + add rbx,r12 + add r9,r12 + + lea rbp,QWORD PTR[8+rbp] + mov r13,QWORD PTR[96+rsp] + mov rdi,QWORD PTR[72+rsp] + + mov r12,r13 + ror r13,7 + add r9,r14 + mov r14,rdi + ror rdi,42 + + xor r13,r12 + shr r12,7 + ror r13,1 + xor rdi,r14 + shr r14,6 + + ror rdi,19 + xor r12,r13 + xor rdi,r14 + add r12,QWORD PTR[32+rsp] + + add r12,QWORD PTR[88+rsp] + mov r13,rbx + add r12,rdi + mov r14,r9 + ror r13,23 + mov rdi,rcx + + xor r13,rbx + ror r14,5 + xor rdi,rdx + + mov QWORD PTR[88+rsp],r12 + xor r14,r9 + and rdi,rbx + + ror r13,4 + add r12,r8 + xor rdi,rdx + + ror r14,6 + xor r13,rbx + add r12,rdi + + mov rdi,r9 + add r12,QWORD PTR[rbp] + xor r14,r9 + + xor rdi,r10 + ror r13,14 + mov r8,r10 + + and r15,rdi + ror r14,28 + add r12,r13 + + xor r8,r15 + add rax,r12 + add r8,r12 + + lea rbp,QWORD PTR[24+rbp] + mov r13,QWORD PTR[104+rsp] + mov r15,QWORD PTR[80+rsp] + + mov r12,r13 + ror r13,7 + add r8,r14 + mov r14,r15 + ror r15,42 + + xor r13,r12 + shr r12,7 + ror r13,1 + xor r15,r14 + shr r14,6 + + ror r15,19 + xor r12,r13 + xor r15,r14 + add r12,QWORD PTR[40+rsp] + + add r12,QWORD PTR[96+rsp] + mov r13,rax + add r12,r15 + mov r14,r8 + ror r13,23 + mov r15,rbx + + xor r13,rax + ror r14,5 + xor r15,rcx + + mov QWORD PTR[96+rsp],r12 + xor r14,r8 + and r15,rax + + ror r13,4 + add r12,rdx + xor r15,rcx + + ror r14,6 + xor r13,rax + add r12,r15 + + mov r15,r8 + add r12,QWORD PTR[rbp] + xor r14,r8 + + xor r15,r9 + ror r13,14 + mov rdx,r9 + + and rdi,r15 + ror r14,28 + add r12,r13 + + xor rdx,rdi + add r11,r12 + add rdx,r12 + + lea rbp,QWORD PTR[8+rbp] + mov r13,QWORD PTR[112+rsp] + mov rdi,QWORD PTR[88+rsp] + + mov r12,r13 + ror r13,7 + add rdx,r14 + mov r14,rdi + ror rdi,42 + + xor r13,r12 + shr r12,7 + ror r13,1 + xor rdi,r14 + shr r14,6 + + ror rdi,19 + xor r12,r13 + xor rdi,r14 + add r12,QWORD PTR[48+rsp] + + add r12,QWORD PTR[104+rsp] + mov r13,r11 + add r12,rdi + mov r14,rdx + ror r13,23 + mov rdi,rax + + xor r13,r11 + ror r14,5 + xor rdi,rbx + + mov QWORD PTR[104+rsp],r12 + xor r14,rdx + and rdi,r11 + + ror r13,4 + add r12,rcx + xor rdi,rbx + + ror r14,6 + xor r13,r11 + add r12,rdi + + mov rdi,rdx + add r12,QWORD PTR[rbp] + xor r14,rdx + + xor rdi,r8 + ror r13,14 + mov rcx,r8 + + and r15,rdi + ror r14,28 + add r12,r13 + + xor rcx,r15 + add r10,r12 + add rcx,r12 + + lea rbp,QWORD PTR[24+rbp] + mov r13,QWORD PTR[120+rsp] + mov r15,QWORD PTR[96+rsp] + + mov r12,r13 + ror r13,7 + add rcx,r14 + mov r14,r15 + ror r15,42 + + xor r13,r12 + shr r12,7 + ror r13,1 + xor r15,r14 + shr r14,6 + + ror r15,19 + xor r12,r13 + xor r15,r14 + add r12,QWORD PTR[56+rsp] + + add r12,QWORD PTR[112+rsp] + mov r13,r10 + add r12,r15 + mov r14,rcx + ror r13,23 + mov r15,r11 + + xor r13,r10 + ror r14,5 + xor r15,rax + + mov QWORD PTR[112+rsp],r12 + xor r14,rcx + and r15,r10 + + ror r13,4 + add r12,rbx + xor r15,rax + + ror r14,6 + xor r13,r10 + add r12,r15 + + mov r15,rcx + add r12,QWORD PTR[rbp] + xor r14,rcx + + xor r15,rdx + ror r13,14 + mov rbx,rdx + + and rdi,r15 + ror r14,28 + add r12,r13 + + xor rbx,rdi + add r9,r12 + add rbx,r12 + + lea rbp,QWORD PTR[8+rbp] + mov r13,QWORD PTR[rsp] + mov rdi,QWORD PTR[104+rsp] + + mov r12,r13 + ror r13,7 + add rbx,r14 + mov r14,rdi + ror rdi,42 + + xor r13,r12 + shr r12,7 + ror r13,1 + xor rdi,r14 + shr r14,6 + + ror rdi,19 + xor r12,r13 + xor rdi,r14 + add r12,QWORD PTR[64+rsp] + + add r12,QWORD PTR[120+rsp] + mov r13,r9 + add r12,rdi + mov r14,rbx + ror r13,23 + mov rdi,r10 + + xor r13,r9 + ror r14,5 + xor rdi,r11 + + mov QWORD PTR[120+rsp],r12 + xor r14,rbx + and rdi,r9 + + ror r13,4 + add r12,rax + xor rdi,r11 + + ror r14,6 + xor r13,r9 + add r12,rdi + + mov rdi,rbx + add r12,QWORD PTR[rbp] + xor r14,rbx + + xor rdi,rcx + ror r13,14 + mov rax,rcx + + and r15,rdi + ror r14,28 + add r12,r13 + + xor rax,r15 + add r8,r12 + add rax,r12 + + lea rbp,QWORD PTR[24+rbp] + cmp BYTE PTR[7+rbp],0 + jnz $L$rounds_16_xx + + mov rdi,QWORD PTR[((128+0))+rsp] + add rax,r14 + lea rsi,QWORD PTR[128+rsi] + + add rax,QWORD PTR[rdi] + add rbx,QWORD PTR[8+rdi] + add rcx,QWORD PTR[16+rdi] + add rdx,QWORD PTR[24+rdi] + add r8,QWORD PTR[32+rdi] + add r9,QWORD PTR[40+rdi] + add r10,QWORD PTR[48+rdi] + add r11,QWORD PTR[56+rdi] + + cmp rsi,QWORD PTR[((128+16))+rsp] + + mov QWORD PTR[rdi],rax + mov QWORD PTR[8+rdi],rbx + mov QWORD PTR[16+rdi],rcx + mov QWORD PTR[24+rdi],rdx + mov QWORD PTR[32+rdi],r8 + mov QWORD PTR[40+rdi],r9 + mov QWORD PTR[48+rdi],r10 + mov QWORD PTR[56+rdi],r11 + jb $L$loop + + mov rsi,QWORD PTR[((128+24))+rsp] + mov r15,QWORD PTR[rsi] + mov r14,QWORD PTR[8+rsi] + mov r13,QWORD PTR[16+rsi] + mov r12,QWORD PTR[24+rsi] + mov rbp,QWORD PTR[32+rsi] + mov rbx,QWORD PTR[40+rsi] + lea rsp,QWORD PTR[48+rsi] +$L$epilogue:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_sha512_block_data_order:: +sha512_block_data_order ENDP +ALIGN 64 + +K512:: + DQ 0428a2f98d728ae22h,07137449123ef65cdh + DQ 0428a2f98d728ae22h,07137449123ef65cdh + DQ 0b5c0fbcfec4d3b2fh,0e9b5dba58189dbbch + DQ 0b5c0fbcfec4d3b2fh,0e9b5dba58189dbbch + DQ 03956c25bf348b538h,059f111f1b605d019h + DQ 03956c25bf348b538h,059f111f1b605d019h + DQ 0923f82a4af194f9bh,0ab1c5ed5da6d8118h + DQ 0923f82a4af194f9bh,0ab1c5ed5da6d8118h + DQ 0d807aa98a3030242h,012835b0145706fbeh + DQ 0d807aa98a3030242h,012835b0145706fbeh + DQ 0243185be4ee4b28ch,0550c7dc3d5ffb4e2h + DQ 0243185be4ee4b28ch,0550c7dc3d5ffb4e2h + DQ 072be5d74f27b896fh,080deb1fe3b1696b1h + DQ 072be5d74f27b896fh,080deb1fe3b1696b1h + DQ 09bdc06a725c71235h,0c19bf174cf692694h + DQ 09bdc06a725c71235h,0c19bf174cf692694h + DQ 0e49b69c19ef14ad2h,0efbe4786384f25e3h + DQ 0e49b69c19ef14ad2h,0efbe4786384f25e3h + DQ 00fc19dc68b8cd5b5h,0240ca1cc77ac9c65h + DQ 00fc19dc68b8cd5b5h,0240ca1cc77ac9c65h + DQ 02de92c6f592b0275h,04a7484aa6ea6e483h + DQ 02de92c6f592b0275h,04a7484aa6ea6e483h + DQ 05cb0a9dcbd41fbd4h,076f988da831153b5h + DQ 05cb0a9dcbd41fbd4h,076f988da831153b5h + DQ 0983e5152ee66dfabh,0a831c66d2db43210h + DQ 0983e5152ee66dfabh,0a831c66d2db43210h + DQ 0b00327c898fb213fh,0bf597fc7beef0ee4h + DQ 0b00327c898fb213fh,0bf597fc7beef0ee4h + DQ 0c6e00bf33da88fc2h,0d5a79147930aa725h + DQ 0c6e00bf33da88fc2h,0d5a79147930aa725h + DQ 006ca6351e003826fh,0142929670a0e6e70h + DQ 006ca6351e003826fh,0142929670a0e6e70h + DQ 027b70a8546d22ffch,02e1b21385c26c926h + DQ 027b70a8546d22ffch,02e1b21385c26c926h + DQ 04d2c6dfc5ac42aedh,053380d139d95b3dfh + DQ 04d2c6dfc5ac42aedh,053380d139d95b3dfh + DQ 0650a73548baf63deh,0766a0abb3c77b2a8h + DQ 0650a73548baf63deh,0766a0abb3c77b2a8h + DQ 081c2c92e47edaee6h,092722c851482353bh + DQ 081c2c92e47edaee6h,092722c851482353bh + DQ 0a2bfe8a14cf10364h,0a81a664bbc423001h + DQ 0a2bfe8a14cf10364h,0a81a664bbc423001h + DQ 0c24b8b70d0f89791h,0c76c51a30654be30h + DQ 0c24b8b70d0f89791h,0c76c51a30654be30h + DQ 0d192e819d6ef5218h,0d69906245565a910h + DQ 0d192e819d6ef5218h,0d69906245565a910h + DQ 0f40e35855771202ah,0106aa07032bbd1b8h + DQ 0f40e35855771202ah,0106aa07032bbd1b8h + DQ 019a4c116b8d2d0c8h,01e376c085141ab53h + DQ 019a4c116b8d2d0c8h,01e376c085141ab53h + DQ 02748774cdf8eeb99h,034b0bcb5e19b48a8h + DQ 02748774cdf8eeb99h,034b0bcb5e19b48a8h + DQ 0391c0cb3c5c95a63h,04ed8aa4ae3418acbh + DQ 0391c0cb3c5c95a63h,04ed8aa4ae3418acbh + DQ 05b9cca4f7763e373h,0682e6ff3d6b2b8a3h + DQ 05b9cca4f7763e373h,0682e6ff3d6b2b8a3h + DQ 0748f82ee5defb2fch,078a5636f43172f60h + DQ 0748f82ee5defb2fch,078a5636f43172f60h + DQ 084c87814a1f0ab72h,08cc702081a6439ech + DQ 084c87814a1f0ab72h,08cc702081a6439ech + DQ 090befffa23631e28h,0a4506cebde82bde9h + DQ 090befffa23631e28h,0a4506cebde82bde9h + DQ 0bef9a3f7b2c67915h,0c67178f2e372532bh + DQ 0bef9a3f7b2c67915h,0c67178f2e372532bh + DQ 0ca273eceea26619ch,0d186b8c721c0c207h + DQ 0ca273eceea26619ch,0d186b8c721c0c207h + DQ 0eada7dd6cde0eb1eh,0f57d4f7fee6ed178h + DQ 0eada7dd6cde0eb1eh,0f57d4f7fee6ed178h + DQ 006f067aa72176fbah,00a637dc5a2c898a6h + DQ 006f067aa72176fbah,00a637dc5a2c898a6h + DQ 0113f9804bef90daeh,01b710b35131c471bh + DQ 0113f9804bef90daeh,01b710b35131c471bh + DQ 028db77f523047d84h,032caab7b40c72493h + DQ 028db77f523047d84h,032caab7b40c72493h + DQ 03c9ebe0a15c9bebch,0431d67c49c100d4ch + DQ 03c9ebe0a15c9bebch,0431d67c49c100d4ch + DQ 04cc5d4becb3e42b6h,0597f299cfc657e2ah + DQ 04cc5d4becb3e42b6h,0597f299cfc657e2ah + DQ 05fcb6fab3ad6faech,06c44198c4a475817h + DQ 05fcb6fab3ad6faech,06c44198c4a475817h + + DQ 00001020304050607h,008090a0b0c0d0e0fh + DQ 00001020304050607h,008090a0b0c0d0e0fh +DB 83,72,65,53,49,50,32,98,108,111,99,107,32,116,114,97 +DB 110,115,102,111,114,109,32,102,111,114,32,120,56,54,95,54 +DB 52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121 +DB 32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46 +DB 111,114,103,62,0 +EXTERN __imp_RtlVirtualUnwind:NEAR + +ALIGN 16 +se_handler PROC PRIVATE + push rsi + push rdi + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + pushfq + sub rsp,64 + + mov rax,QWORD PTR[120+r8] + mov rbx,QWORD PTR[248+r8] + + mov rsi,QWORD PTR[8+r9] + mov r11,QWORD PTR[56+r9] + + mov r10d,DWORD PTR[r11] + lea r10,QWORD PTR[r10*1+rsi] + cmp rbx,r10 + jb $L$in_prologue + + mov rax,QWORD PTR[152+r8] + + mov r10d,DWORD PTR[4+r11] + lea r10,QWORD PTR[r10*1+rsi] + cmp rbx,r10 + jae $L$in_prologue + mov rsi,rax + mov rax,QWORD PTR[((128+24))+rax] + lea rax,QWORD PTR[48+rax] + + mov rbx,QWORD PTR[((-8))+rax] + mov rbp,QWORD PTR[((-16))+rax] + mov r12,QWORD PTR[((-24))+rax] + mov r13,QWORD PTR[((-32))+rax] + mov r14,QWORD PTR[((-40))+rax] + mov r15,QWORD PTR[((-48))+rax] + mov QWORD PTR[144+r8],rbx + mov QWORD PTR[160+r8],rbp + mov QWORD PTR[216+r8],r12 + mov QWORD PTR[224+r8],r13 + mov QWORD PTR[232+r8],r14 + mov QWORD PTR[240+r8],r15 + + lea r10,QWORD PTR[$L$epilogue] + cmp rbx,r10 + jb $L$in_prologue + + lea rsi,QWORD PTR[((128+32))+rsi] + lea rdi,QWORD PTR[512+r8] + mov ecx,12 + DD 0a548f3fch + +$L$in_prologue:: + mov rdi,QWORD PTR[8+rax] + mov rsi,QWORD PTR[16+rax] + mov QWORD PTR[152+r8],rax + mov QWORD PTR[168+r8],rsi + mov QWORD PTR[176+r8],rdi + + mov rdi,QWORD PTR[40+r9] + mov rsi,r8 + mov ecx,154 + DD 0a548f3fch + + mov rsi,r9 + xor rcx,rcx + mov rdx,QWORD PTR[8+rsi] + mov r8,QWORD PTR[rsi] + mov r9,QWORD PTR[16+rsi] + mov r10,QWORD PTR[40+rsi] + lea r11,QWORD PTR[56+rsi] + lea r12,QWORD PTR[24+rsi] + mov QWORD PTR[32+rsp],r10 + mov QWORD PTR[40+rsp],r11 + mov QWORD PTR[48+rsp],r12 + mov QWORD PTR[56+rsp],rcx + call QWORD PTR[__imp_RtlVirtualUnwind] + + mov eax,1 + add rsp,64 + popfq + pop r15 + pop r14 + pop r13 + pop r12 + pop rbp + pop rbx + pop rdi + pop rsi + DB 0F3h,0C3h ;repret +se_handler ENDP +.text$ ENDS +.pdata SEGMENT READONLY ALIGN(4) +ALIGN 4 + DD imagerel $L$SEH_begin_sha512_block_data_order + DD imagerel $L$SEH_end_sha512_block_data_order + DD imagerel $L$SEH_info_sha512_block_data_order +.pdata ENDS +.xdata SEGMENT READONLY ALIGN(8) +ALIGN 8 +$L$SEH_info_sha512_block_data_order:: +DB 9,0,0,0 + DD imagerel se_handler + DD imagerel $L$prologue,imagerel $L$epilogue + +.xdata ENDS +END diff --git a/deps/openssl/asm_obsolete/x64-win32-masm/whrlpool/wp-x86_64.asm b/deps/openssl/asm_obsolete/x64-win32-masm/whrlpool/wp-x86_64.asm new file mode 100644 index 00000000000000..fa5bfbc7b20cd6 --- /dev/null +++ b/deps/openssl/asm_obsolete/x64-win32-masm/whrlpool/wp-x86_64.asm @@ -0,0 +1,974 @@ +OPTION DOTNAME +.text$ SEGMENT ALIGN(256) 'CODE' + +PUBLIC whirlpool_block + +ALIGN 16 +whirlpool_block PROC PUBLIC + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_whirlpool_block:: + mov rdi,rcx + mov rsi,rdx + mov rdx,r8 + + + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + + mov r11,rsp + sub rsp,128+40 + and rsp,-64 + + lea r10,QWORD PTR[128+rsp] + mov QWORD PTR[r10],rdi + mov QWORD PTR[8+r10],rsi + mov QWORD PTR[16+r10],rdx + mov QWORD PTR[32+r10],r11 +$L$prologue:: + + mov rbx,r10 + lea rbp,QWORD PTR[$L$table] + + xor rcx,rcx + xor rdx,rdx + mov r8,QWORD PTR[rdi] + mov r9,QWORD PTR[8+rdi] + mov r10,QWORD PTR[16+rdi] + mov r11,QWORD PTR[24+rdi] + mov r12,QWORD PTR[32+rdi] + mov r13,QWORD PTR[40+rdi] + mov r14,QWORD PTR[48+rdi] + mov r15,QWORD PTR[56+rdi] +$L$outerloop:: + mov QWORD PTR[rsp],r8 + mov QWORD PTR[8+rsp],r9 + mov QWORD PTR[16+rsp],r10 + mov QWORD PTR[24+rsp],r11 + mov QWORD PTR[32+rsp],r12 + mov QWORD PTR[40+rsp],r13 + mov QWORD PTR[48+rsp],r14 + mov QWORD PTR[56+rsp],r15 + xor r8,QWORD PTR[rsi] + xor r9,QWORD PTR[8+rsi] + xor r10,QWORD PTR[16+rsi] + xor r11,QWORD PTR[24+rsi] + xor r12,QWORD PTR[32+rsi] + xor r13,QWORD PTR[40+rsi] + xor r14,QWORD PTR[48+rsi] + xor r15,QWORD PTR[56+rsi] + mov QWORD PTR[((64+0))+rsp],r8 + mov QWORD PTR[((64+8))+rsp],r9 + mov QWORD PTR[((64+16))+rsp],r10 + mov QWORD PTR[((64+24))+rsp],r11 + mov QWORD PTR[((64+32))+rsp],r12 + mov QWORD PTR[((64+40))+rsp],r13 + mov QWORD PTR[((64+48))+rsp],r14 + mov QWORD PTR[((64+56))+rsp],r15 + xor rsi,rsi + mov QWORD PTR[24+rbx],rsi + jmp $L$round +ALIGN 16 +$L$round:: + mov r8,QWORD PTR[4096+rsi*8+rbp] + mov eax,DWORD PTR[rsp] + mov ebx,DWORD PTR[4+rsp] + movzx ecx,al + movzx edx,ah + shr eax,16 + lea rsi,QWORD PTR[rcx*1+rcx] + movzx ecx,al + lea rdi,QWORD PTR[rdx*1+rdx] + movzx edx,ah + xor r8,QWORD PTR[rsi*8+rbp] + mov r9,QWORD PTR[7+rdi*8+rbp] + mov eax,DWORD PTR[((0+8))+rsp] + lea rsi,QWORD PTR[rcx*1+rcx] + movzx ecx,bl + lea rdi,QWORD PTR[rdx*1+rdx] + movzx edx,bh + mov r10,QWORD PTR[6+rsi*8+rbp] + mov r11,QWORD PTR[5+rdi*8+rbp] + shr ebx,16 + lea rsi,QWORD PTR[rcx*1+rcx] + movzx ecx,bl + lea rdi,QWORD PTR[rdx*1+rdx] + movzx edx,bh + mov r12,QWORD PTR[4+rsi*8+rbp] + mov r13,QWORD PTR[3+rdi*8+rbp] + mov ebx,DWORD PTR[((0+8+4))+rsp] + lea rsi,QWORD PTR[rcx*1+rcx] + movzx ecx,al + lea rdi,QWORD PTR[rdx*1+rdx] + movzx edx,ah + mov r14,QWORD PTR[2+rsi*8+rbp] + mov r15,QWORD PTR[1+rdi*8+rbp] + shr eax,16 + lea rsi,QWORD PTR[rcx*1+rcx] + movzx ecx,al + lea rdi,QWORD PTR[rdx*1+rdx] + movzx edx,ah + xor r9,QWORD PTR[rsi*8+rbp] + xor r10,QWORD PTR[7+rdi*8+rbp] + mov eax,DWORD PTR[((8+8))+rsp] + lea rsi,QWORD PTR[rcx*1+rcx] + movzx ecx,bl + lea rdi,QWORD PTR[rdx*1+rdx] + movzx edx,bh + xor r11,QWORD PTR[6+rsi*8+rbp] + xor r12,QWORD PTR[5+rdi*8+rbp] + shr ebx,16 + lea rsi,QWORD PTR[rcx*1+rcx] + movzx ecx,bl + lea rdi,QWORD PTR[rdx*1+rdx] + movzx edx,bh + xor r13,QWORD PTR[4+rsi*8+rbp] + xor r14,QWORD PTR[3+rdi*8+rbp] + mov ebx,DWORD PTR[((8+8+4))+rsp] + lea rsi,QWORD PTR[rcx*1+rcx] + movzx ecx,al + lea rdi,QWORD PTR[rdx*1+rdx] + movzx edx,ah + xor r15,QWORD PTR[2+rsi*8+rbp] + xor r8,QWORD PTR[1+rdi*8+rbp] + shr eax,16 + lea rsi,QWORD PTR[rcx*1+rcx] + movzx ecx,al + lea rdi,QWORD PTR[rdx*1+rdx] + movzx edx,ah + xor r10,QWORD PTR[rsi*8+rbp] + xor r11,QWORD PTR[7+rdi*8+rbp] + mov eax,DWORD PTR[((16+8))+rsp] + lea rsi,QWORD PTR[rcx*1+rcx] + movzx ecx,bl + lea rdi,QWORD PTR[rdx*1+rdx] + movzx edx,bh + xor r12,QWORD PTR[6+rsi*8+rbp] + xor r13,QWORD PTR[5+rdi*8+rbp] + shr ebx,16 + lea rsi,QWORD PTR[rcx*1+rcx] + movzx ecx,bl + lea rdi,QWORD PTR[rdx*1+rdx] + movzx edx,bh + xor r14,QWORD PTR[4+rsi*8+rbp] + xor r15,QWORD PTR[3+rdi*8+rbp] + mov ebx,DWORD PTR[((16+8+4))+rsp] + lea rsi,QWORD PTR[rcx*1+rcx] + movzx ecx,al + lea rdi,QWORD PTR[rdx*1+rdx] + movzx edx,ah + xor r8,QWORD PTR[2+rsi*8+rbp] + xor r9,QWORD PTR[1+rdi*8+rbp] + shr eax,16 + lea rsi,QWORD PTR[rcx*1+rcx] + movzx ecx,al + lea rdi,QWORD PTR[rdx*1+rdx] + movzx edx,ah + xor r11,QWORD PTR[rsi*8+rbp] + xor r12,QWORD PTR[7+rdi*8+rbp] + mov eax,DWORD PTR[((24+8))+rsp] + lea rsi,QWORD PTR[rcx*1+rcx] + movzx ecx,bl + lea rdi,QWORD PTR[rdx*1+rdx] + movzx edx,bh + xor r13,QWORD PTR[6+rsi*8+rbp] + xor r14,QWORD PTR[5+rdi*8+rbp] + shr ebx,16 + lea rsi,QWORD PTR[rcx*1+rcx] + movzx ecx,bl + lea rdi,QWORD PTR[rdx*1+rdx] + movzx edx,bh + xor r15,QWORD PTR[4+rsi*8+rbp] + xor r8,QWORD PTR[3+rdi*8+rbp] + mov ebx,DWORD PTR[((24+8+4))+rsp] + lea rsi,QWORD PTR[rcx*1+rcx] + movzx ecx,al + lea rdi,QWORD PTR[rdx*1+rdx] + movzx edx,ah + xor r9,QWORD PTR[2+rsi*8+rbp] + xor r10,QWORD PTR[1+rdi*8+rbp] + shr eax,16 + lea rsi,QWORD PTR[rcx*1+rcx] + movzx ecx,al + lea rdi,QWORD PTR[rdx*1+rdx] + movzx edx,ah + xor r12,QWORD PTR[rsi*8+rbp] + xor r13,QWORD PTR[7+rdi*8+rbp] + mov eax,DWORD PTR[((32+8))+rsp] + lea rsi,QWORD PTR[rcx*1+rcx] + movzx ecx,bl + lea rdi,QWORD PTR[rdx*1+rdx] + movzx edx,bh + xor r14,QWORD PTR[6+rsi*8+rbp] + xor r15,QWORD PTR[5+rdi*8+rbp] + shr ebx,16 + lea rsi,QWORD PTR[rcx*1+rcx] + movzx ecx,bl + lea rdi,QWORD PTR[rdx*1+rdx] + movzx edx,bh + xor r8,QWORD PTR[4+rsi*8+rbp] + xor r9,QWORD PTR[3+rdi*8+rbp] + mov ebx,DWORD PTR[((32+8+4))+rsp] + lea rsi,QWORD PTR[rcx*1+rcx] + movzx ecx,al + lea rdi,QWORD PTR[rdx*1+rdx] + movzx edx,ah + xor r10,QWORD PTR[2+rsi*8+rbp] + xor r11,QWORD PTR[1+rdi*8+rbp] + shr eax,16 + lea rsi,QWORD PTR[rcx*1+rcx] + movzx ecx,al + lea rdi,QWORD PTR[rdx*1+rdx] + movzx edx,ah + xor r13,QWORD PTR[rsi*8+rbp] + xor r14,QWORD PTR[7+rdi*8+rbp] + mov eax,DWORD PTR[((40+8))+rsp] + lea rsi,QWORD PTR[rcx*1+rcx] + movzx ecx,bl + lea rdi,QWORD PTR[rdx*1+rdx] + movzx edx,bh + xor r15,QWORD PTR[6+rsi*8+rbp] + xor r8,QWORD PTR[5+rdi*8+rbp] + shr ebx,16 + lea rsi,QWORD PTR[rcx*1+rcx] + movzx ecx,bl + lea rdi,QWORD PTR[rdx*1+rdx] + movzx edx,bh + xor r9,QWORD PTR[4+rsi*8+rbp] + xor r10,QWORD PTR[3+rdi*8+rbp] + mov ebx,DWORD PTR[((40+8+4))+rsp] + lea rsi,QWORD PTR[rcx*1+rcx] + movzx ecx,al + lea rdi,QWORD PTR[rdx*1+rdx] + movzx edx,ah + xor r11,QWORD PTR[2+rsi*8+rbp] + xor r12,QWORD PTR[1+rdi*8+rbp] + shr eax,16 + lea rsi,QWORD PTR[rcx*1+rcx] + movzx ecx,al + lea rdi,QWORD PTR[rdx*1+rdx] + movzx edx,ah + xor r14,QWORD PTR[rsi*8+rbp] + xor r15,QWORD PTR[7+rdi*8+rbp] + mov eax,DWORD PTR[((48+8))+rsp] + lea rsi,QWORD PTR[rcx*1+rcx] + movzx ecx,bl + lea rdi,QWORD PTR[rdx*1+rdx] + movzx edx,bh + xor r8,QWORD PTR[6+rsi*8+rbp] + xor r9,QWORD PTR[5+rdi*8+rbp] + shr ebx,16 + lea rsi,QWORD PTR[rcx*1+rcx] + movzx ecx,bl + lea rdi,QWORD PTR[rdx*1+rdx] + movzx edx,bh + xor r10,QWORD PTR[4+rsi*8+rbp] + xor r11,QWORD PTR[3+rdi*8+rbp] + mov ebx,DWORD PTR[((48+8+4))+rsp] + lea rsi,QWORD PTR[rcx*1+rcx] + movzx ecx,al + lea rdi,QWORD PTR[rdx*1+rdx] + movzx edx,ah + xor r12,QWORD PTR[2+rsi*8+rbp] + xor r13,QWORD PTR[1+rdi*8+rbp] + shr eax,16 + lea rsi,QWORD PTR[rcx*1+rcx] + movzx ecx,al + lea rdi,QWORD PTR[rdx*1+rdx] + movzx edx,ah + xor r15,QWORD PTR[rsi*8+rbp] + xor r8,QWORD PTR[7+rdi*8+rbp] + mov eax,DWORD PTR[((56+8))+rsp] + lea rsi,QWORD PTR[rcx*1+rcx] + movzx ecx,bl + lea rdi,QWORD PTR[rdx*1+rdx] + movzx edx,bh + xor r9,QWORD PTR[6+rsi*8+rbp] + xor r10,QWORD PTR[5+rdi*8+rbp] + shr ebx,16 + lea rsi,QWORD PTR[rcx*1+rcx] + movzx ecx,bl + lea rdi,QWORD PTR[rdx*1+rdx] + movzx edx,bh + xor r11,QWORD PTR[4+rsi*8+rbp] + xor r12,QWORD PTR[3+rdi*8+rbp] + mov ebx,DWORD PTR[((56+8+4))+rsp] + lea rsi,QWORD PTR[rcx*1+rcx] + movzx ecx,al + lea rdi,QWORD PTR[rdx*1+rdx] + movzx edx,ah + xor r13,QWORD PTR[2+rsi*8+rbp] + xor r14,QWORD PTR[1+rdi*8+rbp] + mov QWORD PTR[rsp],r8 + mov QWORD PTR[8+rsp],r9 + mov QWORD PTR[16+rsp],r10 + mov QWORD PTR[24+rsp],r11 + mov QWORD PTR[32+rsp],r12 + mov QWORD PTR[40+rsp],r13 + mov QWORD PTR[48+rsp],r14 + mov QWORD PTR[56+rsp],r15 + shr eax,16 + lea rsi,QWORD PTR[rcx*1+rcx] + movzx ecx,al + lea rdi,QWORD PTR[rdx*1+rdx] + movzx edx,ah + xor r8,QWORD PTR[rsi*8+rbp] + xor r9,QWORD PTR[7+rdi*8+rbp] + mov eax,DWORD PTR[((64+0+8))+rsp] + lea rsi,QWORD PTR[rcx*1+rcx] + movzx ecx,bl + lea rdi,QWORD PTR[rdx*1+rdx] + movzx edx,bh + xor r10,QWORD PTR[6+rsi*8+rbp] + xor r11,QWORD PTR[5+rdi*8+rbp] + shr ebx,16 + lea rsi,QWORD PTR[rcx*1+rcx] + movzx ecx,bl + lea rdi,QWORD PTR[rdx*1+rdx] + movzx edx,bh + xor r12,QWORD PTR[4+rsi*8+rbp] + xor r13,QWORD PTR[3+rdi*8+rbp] + mov ebx,DWORD PTR[((64+0+8+4))+rsp] + lea rsi,QWORD PTR[rcx*1+rcx] + movzx ecx,al + lea rdi,QWORD PTR[rdx*1+rdx] + movzx edx,ah + xor r14,QWORD PTR[2+rsi*8+rbp] + xor r15,QWORD PTR[1+rdi*8+rbp] + shr eax,16 + lea rsi,QWORD PTR[rcx*1+rcx] + movzx ecx,al + lea rdi,QWORD PTR[rdx*1+rdx] + movzx edx,ah + xor r9,QWORD PTR[rsi*8+rbp] + xor r10,QWORD PTR[7+rdi*8+rbp] + mov eax,DWORD PTR[((64+8+8))+rsp] + lea rsi,QWORD PTR[rcx*1+rcx] + movzx ecx,bl + lea rdi,QWORD PTR[rdx*1+rdx] + movzx edx,bh + xor r11,QWORD PTR[6+rsi*8+rbp] + xor r12,QWORD PTR[5+rdi*8+rbp] + shr ebx,16 + lea rsi,QWORD PTR[rcx*1+rcx] + movzx ecx,bl + lea rdi,QWORD PTR[rdx*1+rdx] + movzx edx,bh + xor r13,QWORD PTR[4+rsi*8+rbp] + xor r14,QWORD PTR[3+rdi*8+rbp] + mov ebx,DWORD PTR[((64+8+8+4))+rsp] + lea rsi,QWORD PTR[rcx*1+rcx] + movzx ecx,al + lea rdi,QWORD PTR[rdx*1+rdx] + movzx edx,ah + xor r15,QWORD PTR[2+rsi*8+rbp] + xor r8,QWORD PTR[1+rdi*8+rbp] + shr eax,16 + lea rsi,QWORD PTR[rcx*1+rcx] + movzx ecx,al + lea rdi,QWORD PTR[rdx*1+rdx] + movzx edx,ah + xor r10,QWORD PTR[rsi*8+rbp] + xor r11,QWORD PTR[7+rdi*8+rbp] + mov eax,DWORD PTR[((64+16+8))+rsp] + lea rsi,QWORD PTR[rcx*1+rcx] + movzx ecx,bl + lea rdi,QWORD PTR[rdx*1+rdx] + movzx edx,bh + xor r12,QWORD PTR[6+rsi*8+rbp] + xor r13,QWORD PTR[5+rdi*8+rbp] + shr ebx,16 + lea rsi,QWORD PTR[rcx*1+rcx] + movzx ecx,bl + lea rdi,QWORD PTR[rdx*1+rdx] + movzx edx,bh + xor r14,QWORD PTR[4+rsi*8+rbp] + xor r15,QWORD PTR[3+rdi*8+rbp] + mov ebx,DWORD PTR[((64+16+8+4))+rsp] + lea rsi,QWORD PTR[rcx*1+rcx] + movzx ecx,al + lea rdi,QWORD PTR[rdx*1+rdx] + movzx edx,ah + xor r8,QWORD PTR[2+rsi*8+rbp] + xor r9,QWORD PTR[1+rdi*8+rbp] + shr eax,16 + lea rsi,QWORD PTR[rcx*1+rcx] + movzx ecx,al + lea rdi,QWORD PTR[rdx*1+rdx] + movzx edx,ah + xor r11,QWORD PTR[rsi*8+rbp] + xor r12,QWORD PTR[7+rdi*8+rbp] + mov eax,DWORD PTR[((64+24+8))+rsp] + lea rsi,QWORD PTR[rcx*1+rcx] + movzx ecx,bl + lea rdi,QWORD PTR[rdx*1+rdx] + movzx edx,bh + xor r13,QWORD PTR[6+rsi*8+rbp] + xor r14,QWORD PTR[5+rdi*8+rbp] + shr ebx,16 + lea rsi,QWORD PTR[rcx*1+rcx] + movzx ecx,bl + lea rdi,QWORD PTR[rdx*1+rdx] + movzx edx,bh + xor r15,QWORD PTR[4+rsi*8+rbp] + xor r8,QWORD PTR[3+rdi*8+rbp] + mov ebx,DWORD PTR[((64+24+8+4))+rsp] + lea rsi,QWORD PTR[rcx*1+rcx] + movzx ecx,al + lea rdi,QWORD PTR[rdx*1+rdx] + movzx edx,ah + xor r9,QWORD PTR[2+rsi*8+rbp] + xor r10,QWORD PTR[1+rdi*8+rbp] + shr eax,16 + lea rsi,QWORD PTR[rcx*1+rcx] + movzx ecx,al + lea rdi,QWORD PTR[rdx*1+rdx] + movzx edx,ah + xor r12,QWORD PTR[rsi*8+rbp] + xor r13,QWORD PTR[7+rdi*8+rbp] + mov eax,DWORD PTR[((64+32+8))+rsp] + lea rsi,QWORD PTR[rcx*1+rcx] + movzx ecx,bl + lea rdi,QWORD PTR[rdx*1+rdx] + movzx edx,bh + xor r14,QWORD PTR[6+rsi*8+rbp] + xor r15,QWORD PTR[5+rdi*8+rbp] + shr ebx,16 + lea rsi,QWORD PTR[rcx*1+rcx] + movzx ecx,bl + lea rdi,QWORD PTR[rdx*1+rdx] + movzx edx,bh + xor r8,QWORD PTR[4+rsi*8+rbp] + xor r9,QWORD PTR[3+rdi*8+rbp] + mov ebx,DWORD PTR[((64+32+8+4))+rsp] + lea rsi,QWORD PTR[rcx*1+rcx] + movzx ecx,al + lea rdi,QWORD PTR[rdx*1+rdx] + movzx edx,ah + xor r10,QWORD PTR[2+rsi*8+rbp] + xor r11,QWORD PTR[1+rdi*8+rbp] + shr eax,16 + lea rsi,QWORD PTR[rcx*1+rcx] + movzx ecx,al + lea rdi,QWORD PTR[rdx*1+rdx] + movzx edx,ah + xor r13,QWORD PTR[rsi*8+rbp] + xor r14,QWORD PTR[7+rdi*8+rbp] + mov eax,DWORD PTR[((64+40+8))+rsp] + lea rsi,QWORD PTR[rcx*1+rcx] + movzx ecx,bl + lea rdi,QWORD PTR[rdx*1+rdx] + movzx edx,bh + xor r15,QWORD PTR[6+rsi*8+rbp] + xor r8,QWORD PTR[5+rdi*8+rbp] + shr ebx,16 + lea rsi,QWORD PTR[rcx*1+rcx] + movzx ecx,bl + lea rdi,QWORD PTR[rdx*1+rdx] + movzx edx,bh + xor r9,QWORD PTR[4+rsi*8+rbp] + xor r10,QWORD PTR[3+rdi*8+rbp] + mov ebx,DWORD PTR[((64+40+8+4))+rsp] + lea rsi,QWORD PTR[rcx*1+rcx] + movzx ecx,al + lea rdi,QWORD PTR[rdx*1+rdx] + movzx edx,ah + xor r11,QWORD PTR[2+rsi*8+rbp] + xor r12,QWORD PTR[1+rdi*8+rbp] + shr eax,16 + lea rsi,QWORD PTR[rcx*1+rcx] + movzx ecx,al + lea rdi,QWORD PTR[rdx*1+rdx] + movzx edx,ah + xor r14,QWORD PTR[rsi*8+rbp] + xor r15,QWORD PTR[7+rdi*8+rbp] + mov eax,DWORD PTR[((64+48+8))+rsp] + lea rsi,QWORD PTR[rcx*1+rcx] + movzx ecx,bl + lea rdi,QWORD PTR[rdx*1+rdx] + movzx edx,bh + xor r8,QWORD PTR[6+rsi*8+rbp] + xor r9,QWORD PTR[5+rdi*8+rbp] + shr ebx,16 + lea rsi,QWORD PTR[rcx*1+rcx] + movzx ecx,bl + lea rdi,QWORD PTR[rdx*1+rdx] + movzx edx,bh + xor r10,QWORD PTR[4+rsi*8+rbp] + xor r11,QWORD PTR[3+rdi*8+rbp] + mov ebx,DWORD PTR[((64+48+8+4))+rsp] + lea rsi,QWORD PTR[rcx*1+rcx] + movzx ecx,al + lea rdi,QWORD PTR[rdx*1+rdx] + movzx edx,ah + xor r12,QWORD PTR[2+rsi*8+rbp] + xor r13,QWORD PTR[1+rdi*8+rbp] + shr eax,16 + lea rsi,QWORD PTR[rcx*1+rcx] + movzx ecx,al + lea rdi,QWORD PTR[rdx*1+rdx] + movzx edx,ah + xor r15,QWORD PTR[rsi*8+rbp] + xor r8,QWORD PTR[7+rdi*8+rbp] + + lea rsi,QWORD PTR[rcx*1+rcx] + movzx ecx,bl + lea rdi,QWORD PTR[rdx*1+rdx] + movzx edx,bh + xor r9,QWORD PTR[6+rsi*8+rbp] + xor r10,QWORD PTR[5+rdi*8+rbp] + shr ebx,16 + lea rsi,QWORD PTR[rcx*1+rcx] + movzx ecx,bl + lea rdi,QWORD PTR[rdx*1+rdx] + movzx edx,bh + xor r11,QWORD PTR[4+rsi*8+rbp] + xor r12,QWORD PTR[3+rdi*8+rbp] + + lea rsi,QWORD PTR[rcx*1+rcx] + movzx ecx,al + lea rdi,QWORD PTR[rdx*1+rdx] + movzx edx,ah + xor r13,QWORD PTR[2+rsi*8+rbp] + xor r14,QWORD PTR[1+rdi*8+rbp] + lea rbx,QWORD PTR[128+rsp] + mov rsi,QWORD PTR[24+rbx] + add rsi,1 + cmp rsi,10 + je $L$roundsdone + + mov QWORD PTR[24+rbx],rsi + mov QWORD PTR[((64+0))+rsp],r8 + mov QWORD PTR[((64+8))+rsp],r9 + mov QWORD PTR[((64+16))+rsp],r10 + mov QWORD PTR[((64+24))+rsp],r11 + mov QWORD PTR[((64+32))+rsp],r12 + mov QWORD PTR[((64+40))+rsp],r13 + mov QWORD PTR[((64+48))+rsp],r14 + mov QWORD PTR[((64+56))+rsp],r15 + jmp $L$round +ALIGN 16 +$L$roundsdone:: + mov rdi,QWORD PTR[rbx] + mov rsi,QWORD PTR[8+rbx] + mov rax,QWORD PTR[16+rbx] + xor r8,QWORD PTR[rsi] + xor r9,QWORD PTR[8+rsi] + xor r10,QWORD PTR[16+rsi] + xor r11,QWORD PTR[24+rsi] + xor r12,QWORD PTR[32+rsi] + xor r13,QWORD PTR[40+rsi] + xor r14,QWORD PTR[48+rsi] + xor r15,QWORD PTR[56+rsi] + xor r8,QWORD PTR[rdi] + xor r9,QWORD PTR[8+rdi] + xor r10,QWORD PTR[16+rdi] + xor r11,QWORD PTR[24+rdi] + xor r12,QWORD PTR[32+rdi] + xor r13,QWORD PTR[40+rdi] + xor r14,QWORD PTR[48+rdi] + xor r15,QWORD PTR[56+rdi] + mov QWORD PTR[rdi],r8 + mov QWORD PTR[8+rdi],r9 + mov QWORD PTR[16+rdi],r10 + mov QWORD PTR[24+rdi],r11 + mov QWORD PTR[32+rdi],r12 + mov QWORD PTR[40+rdi],r13 + mov QWORD PTR[48+rdi],r14 + mov QWORD PTR[56+rdi],r15 + lea rsi,QWORD PTR[64+rsi] + sub rax,1 + jz $L$alldone + mov QWORD PTR[8+rbx],rsi + mov QWORD PTR[16+rbx],rax + jmp $L$outerloop +$L$alldone:: + mov rsi,QWORD PTR[32+rbx] + mov r15,QWORD PTR[rsi] + mov r14,QWORD PTR[8+rsi] + mov r13,QWORD PTR[16+rsi] + mov r12,QWORD PTR[24+rsi] + mov rbp,QWORD PTR[32+rsi] + mov rbx,QWORD PTR[40+rsi] + lea rsp,QWORD PTR[48+rsi] +$L$epilogue:: + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_whirlpool_block:: +whirlpool_block ENDP + +ALIGN 64 + +$L$table:: +DB 24,24,96,24,192,120,48,216,24,24,96,24,192,120,48,216 +DB 35,35,140,35,5,175,70,38,35,35,140,35,5,175,70,38 +DB 198,198,63,198,126,249,145,184,198,198,63,198,126,249,145,184 +DB 232,232,135,232,19,111,205,251,232,232,135,232,19,111,205,251 +DB 135,135,38,135,76,161,19,203,135,135,38,135,76,161,19,203 +DB 184,184,218,184,169,98,109,17,184,184,218,184,169,98,109,17 +DB 1,1,4,1,8,5,2,9,1,1,4,1,8,5,2,9 +DB 79,79,33,79,66,110,158,13,79,79,33,79,66,110,158,13 +DB 54,54,216,54,173,238,108,155,54,54,216,54,173,238,108,155 +DB 166,166,162,166,89,4,81,255,166,166,162,166,89,4,81,255 +DB 210,210,111,210,222,189,185,12,210,210,111,210,222,189,185,12 +DB 245,245,243,245,251,6,247,14,245,245,243,245,251,6,247,14 +DB 121,121,249,121,239,128,242,150,121,121,249,121,239,128,242,150 +DB 111,111,161,111,95,206,222,48,111,111,161,111,95,206,222,48 +DB 145,145,126,145,252,239,63,109,145,145,126,145,252,239,63,109 +DB 82,82,85,82,170,7,164,248,82,82,85,82,170,7,164,248 +DB 96,96,157,96,39,253,192,71,96,96,157,96,39,253,192,71 +DB 188,188,202,188,137,118,101,53,188,188,202,188,137,118,101,53 +DB 155,155,86,155,172,205,43,55,155,155,86,155,172,205,43,55 +DB 142,142,2,142,4,140,1,138,142,142,2,142,4,140,1,138 +DB 163,163,182,163,113,21,91,210,163,163,182,163,113,21,91,210 +DB 12,12,48,12,96,60,24,108,12,12,48,12,96,60,24,108 +DB 123,123,241,123,255,138,246,132,123,123,241,123,255,138,246,132 +DB 53,53,212,53,181,225,106,128,53,53,212,53,181,225,106,128 +DB 29,29,116,29,232,105,58,245,29,29,116,29,232,105,58,245 +DB 224,224,167,224,83,71,221,179,224,224,167,224,83,71,221,179 +DB 215,215,123,215,246,172,179,33,215,215,123,215,246,172,179,33 +DB 194,194,47,194,94,237,153,156,194,194,47,194,94,237,153,156 +DB 46,46,184,46,109,150,92,67,46,46,184,46,109,150,92,67 +DB 75,75,49,75,98,122,150,41,75,75,49,75,98,122,150,41 +DB 254,254,223,254,163,33,225,93,254,254,223,254,163,33,225,93 +DB 87,87,65,87,130,22,174,213,87,87,65,87,130,22,174,213 +DB 21,21,84,21,168,65,42,189,21,21,84,21,168,65,42,189 +DB 119,119,193,119,159,182,238,232,119,119,193,119,159,182,238,232 +DB 55,55,220,55,165,235,110,146,55,55,220,55,165,235,110,146 +DB 229,229,179,229,123,86,215,158,229,229,179,229,123,86,215,158 +DB 159,159,70,159,140,217,35,19,159,159,70,159,140,217,35,19 +DB 240,240,231,240,211,23,253,35,240,240,231,240,211,23,253,35 +DB 74,74,53,74,106,127,148,32,74,74,53,74,106,127,148,32 +DB 218,218,79,218,158,149,169,68,218,218,79,218,158,149,169,68 +DB 88,88,125,88,250,37,176,162,88,88,125,88,250,37,176,162 +DB 201,201,3,201,6,202,143,207,201,201,3,201,6,202,143,207 +DB 41,41,164,41,85,141,82,124,41,41,164,41,85,141,82,124 +DB 10,10,40,10,80,34,20,90,10,10,40,10,80,34,20,90 +DB 177,177,254,177,225,79,127,80,177,177,254,177,225,79,127,80 +DB 160,160,186,160,105,26,93,201,160,160,186,160,105,26,93,201 +DB 107,107,177,107,127,218,214,20,107,107,177,107,127,218,214,20 +DB 133,133,46,133,92,171,23,217,133,133,46,133,92,171,23,217 +DB 189,189,206,189,129,115,103,60,189,189,206,189,129,115,103,60 +DB 93,93,105,93,210,52,186,143,93,93,105,93,210,52,186,143 +DB 16,16,64,16,128,80,32,144,16,16,64,16,128,80,32,144 +DB 244,244,247,244,243,3,245,7,244,244,247,244,243,3,245,7 +DB 203,203,11,203,22,192,139,221,203,203,11,203,22,192,139,221 +DB 62,62,248,62,237,198,124,211,62,62,248,62,237,198,124,211 +DB 5,5,20,5,40,17,10,45,5,5,20,5,40,17,10,45 +DB 103,103,129,103,31,230,206,120,103,103,129,103,31,230,206,120 +DB 228,228,183,228,115,83,213,151,228,228,183,228,115,83,213,151 +DB 39,39,156,39,37,187,78,2,39,39,156,39,37,187,78,2 +DB 65,65,25,65,50,88,130,115,65,65,25,65,50,88,130,115 +DB 139,139,22,139,44,157,11,167,139,139,22,139,44,157,11,167 +DB 167,167,166,167,81,1,83,246,167,167,166,167,81,1,83,246 +DB 125,125,233,125,207,148,250,178,125,125,233,125,207,148,250,178 +DB 149,149,110,149,220,251,55,73,149,149,110,149,220,251,55,73 +DB 216,216,71,216,142,159,173,86,216,216,71,216,142,159,173,86 +DB 251,251,203,251,139,48,235,112,251,251,203,251,139,48,235,112 +DB 238,238,159,238,35,113,193,205,238,238,159,238,35,113,193,205 +DB 124,124,237,124,199,145,248,187,124,124,237,124,199,145,248,187 +DB 102,102,133,102,23,227,204,113,102,102,133,102,23,227,204,113 +DB 221,221,83,221,166,142,167,123,221,221,83,221,166,142,167,123 +DB 23,23,92,23,184,75,46,175,23,23,92,23,184,75,46,175 +DB 71,71,1,71,2,70,142,69,71,71,1,71,2,70,142,69 +DB 158,158,66,158,132,220,33,26,158,158,66,158,132,220,33,26 +DB 202,202,15,202,30,197,137,212,202,202,15,202,30,197,137,212 +DB 45,45,180,45,117,153,90,88,45,45,180,45,117,153,90,88 +DB 191,191,198,191,145,121,99,46,191,191,198,191,145,121,99,46 +DB 7,7,28,7,56,27,14,63,7,7,28,7,56,27,14,63 +DB 173,173,142,173,1,35,71,172,173,173,142,173,1,35,71,172 +DB 90,90,117,90,234,47,180,176,90,90,117,90,234,47,180,176 +DB 131,131,54,131,108,181,27,239,131,131,54,131,108,181,27,239 +DB 51,51,204,51,133,255,102,182,51,51,204,51,133,255,102,182 +DB 99,99,145,99,63,242,198,92,99,99,145,99,63,242,198,92 +DB 2,2,8,2,16,10,4,18,2,2,8,2,16,10,4,18 +DB 170,170,146,170,57,56,73,147,170,170,146,170,57,56,73,147 +DB 113,113,217,113,175,168,226,222,113,113,217,113,175,168,226,222 +DB 200,200,7,200,14,207,141,198,200,200,7,200,14,207,141,198 +DB 25,25,100,25,200,125,50,209,25,25,100,25,200,125,50,209 +DB 73,73,57,73,114,112,146,59,73,73,57,73,114,112,146,59 +DB 217,217,67,217,134,154,175,95,217,217,67,217,134,154,175,95 +DB 242,242,239,242,195,29,249,49,242,242,239,242,195,29,249,49 +DB 227,227,171,227,75,72,219,168,227,227,171,227,75,72,219,168 +DB 91,91,113,91,226,42,182,185,91,91,113,91,226,42,182,185 +DB 136,136,26,136,52,146,13,188,136,136,26,136,52,146,13,188 +DB 154,154,82,154,164,200,41,62,154,154,82,154,164,200,41,62 +DB 38,38,152,38,45,190,76,11,38,38,152,38,45,190,76,11 +DB 50,50,200,50,141,250,100,191,50,50,200,50,141,250,100,191 +DB 176,176,250,176,233,74,125,89,176,176,250,176,233,74,125,89 +DB 233,233,131,233,27,106,207,242,233,233,131,233,27,106,207,242 +DB 15,15,60,15,120,51,30,119,15,15,60,15,120,51,30,119 +DB 213,213,115,213,230,166,183,51,213,213,115,213,230,166,183,51 +DB 128,128,58,128,116,186,29,244,128,128,58,128,116,186,29,244 +DB 190,190,194,190,153,124,97,39,190,190,194,190,153,124,97,39 +DB 205,205,19,205,38,222,135,235,205,205,19,205,38,222,135,235 +DB 52,52,208,52,189,228,104,137,52,52,208,52,189,228,104,137 +DB 72,72,61,72,122,117,144,50,72,72,61,72,122,117,144,50 +DB 255,255,219,255,171,36,227,84,255,255,219,255,171,36,227,84 +DB 122,122,245,122,247,143,244,141,122,122,245,122,247,143,244,141 +DB 144,144,122,144,244,234,61,100,144,144,122,144,244,234,61,100 +DB 95,95,97,95,194,62,190,157,95,95,97,95,194,62,190,157 +DB 32,32,128,32,29,160,64,61,32,32,128,32,29,160,64,61 +DB 104,104,189,104,103,213,208,15,104,104,189,104,103,213,208,15 +DB 26,26,104,26,208,114,52,202,26,26,104,26,208,114,52,202 +DB 174,174,130,174,25,44,65,183,174,174,130,174,25,44,65,183 +DB 180,180,234,180,201,94,117,125,180,180,234,180,201,94,117,125 +DB 84,84,77,84,154,25,168,206,84,84,77,84,154,25,168,206 +DB 147,147,118,147,236,229,59,127,147,147,118,147,236,229,59,127 +DB 34,34,136,34,13,170,68,47,34,34,136,34,13,170,68,47 +DB 100,100,141,100,7,233,200,99,100,100,141,100,7,233,200,99 +DB 241,241,227,241,219,18,255,42,241,241,227,241,219,18,255,42 +DB 115,115,209,115,191,162,230,204,115,115,209,115,191,162,230,204 +DB 18,18,72,18,144,90,36,130,18,18,72,18,144,90,36,130 +DB 64,64,29,64,58,93,128,122,64,64,29,64,58,93,128,122 +DB 8,8,32,8,64,40,16,72,8,8,32,8,64,40,16,72 +DB 195,195,43,195,86,232,155,149,195,195,43,195,86,232,155,149 +DB 236,236,151,236,51,123,197,223,236,236,151,236,51,123,197,223 +DB 219,219,75,219,150,144,171,77,219,219,75,219,150,144,171,77 +DB 161,161,190,161,97,31,95,192,161,161,190,161,97,31,95,192 +DB 141,141,14,141,28,131,7,145,141,141,14,141,28,131,7,145 +DB 61,61,244,61,245,201,122,200,61,61,244,61,245,201,122,200 +DB 151,151,102,151,204,241,51,91,151,151,102,151,204,241,51,91 +DB 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 +DB 207,207,27,207,54,212,131,249,207,207,27,207,54,212,131,249 +DB 43,43,172,43,69,135,86,110,43,43,172,43,69,135,86,110 +DB 118,118,197,118,151,179,236,225,118,118,197,118,151,179,236,225 +DB 130,130,50,130,100,176,25,230,130,130,50,130,100,176,25,230 +DB 214,214,127,214,254,169,177,40,214,214,127,214,254,169,177,40 +DB 27,27,108,27,216,119,54,195,27,27,108,27,216,119,54,195 +DB 181,181,238,181,193,91,119,116,181,181,238,181,193,91,119,116 +DB 175,175,134,175,17,41,67,190,175,175,134,175,17,41,67,190 +DB 106,106,181,106,119,223,212,29,106,106,181,106,119,223,212,29 +DB 80,80,93,80,186,13,160,234,80,80,93,80,186,13,160,234 +DB 69,69,9,69,18,76,138,87,69,69,9,69,18,76,138,87 +DB 243,243,235,243,203,24,251,56,243,243,235,243,203,24,251,56 +DB 48,48,192,48,157,240,96,173,48,48,192,48,157,240,96,173 +DB 239,239,155,239,43,116,195,196,239,239,155,239,43,116,195,196 +DB 63,63,252,63,229,195,126,218,63,63,252,63,229,195,126,218 +DB 85,85,73,85,146,28,170,199,85,85,73,85,146,28,170,199 +DB 162,162,178,162,121,16,89,219,162,162,178,162,121,16,89,219 +DB 234,234,143,234,3,101,201,233,234,234,143,234,3,101,201,233 +DB 101,101,137,101,15,236,202,106,101,101,137,101,15,236,202,106 +DB 186,186,210,186,185,104,105,3,186,186,210,186,185,104,105,3 +DB 47,47,188,47,101,147,94,74,47,47,188,47,101,147,94,74 +DB 192,192,39,192,78,231,157,142,192,192,39,192,78,231,157,142 +DB 222,222,95,222,190,129,161,96,222,222,95,222,190,129,161,96 +DB 28,28,112,28,224,108,56,252,28,28,112,28,224,108,56,252 +DB 253,253,211,253,187,46,231,70,253,253,211,253,187,46,231,70 +DB 77,77,41,77,82,100,154,31,77,77,41,77,82,100,154,31 +DB 146,146,114,146,228,224,57,118,146,146,114,146,228,224,57,118 +DB 117,117,201,117,143,188,234,250,117,117,201,117,143,188,234,250 +DB 6,6,24,6,48,30,12,54,6,6,24,6,48,30,12,54 +DB 138,138,18,138,36,152,9,174,138,138,18,138,36,152,9,174 +DB 178,178,242,178,249,64,121,75,178,178,242,178,249,64,121,75 +DB 230,230,191,230,99,89,209,133,230,230,191,230,99,89,209,133 +DB 14,14,56,14,112,54,28,126,14,14,56,14,112,54,28,126 +DB 31,31,124,31,248,99,62,231,31,31,124,31,248,99,62,231 +DB 98,98,149,98,55,247,196,85,98,98,149,98,55,247,196,85 +DB 212,212,119,212,238,163,181,58,212,212,119,212,238,163,181,58 +DB 168,168,154,168,41,50,77,129,168,168,154,168,41,50,77,129 +DB 150,150,98,150,196,244,49,82,150,150,98,150,196,244,49,82 +DB 249,249,195,249,155,58,239,98,249,249,195,249,155,58,239,98 +DB 197,197,51,197,102,246,151,163,197,197,51,197,102,246,151,163 +DB 37,37,148,37,53,177,74,16,37,37,148,37,53,177,74,16 +DB 89,89,121,89,242,32,178,171,89,89,121,89,242,32,178,171 +DB 132,132,42,132,84,174,21,208,132,132,42,132,84,174,21,208 +DB 114,114,213,114,183,167,228,197,114,114,213,114,183,167,228,197 +DB 57,57,228,57,213,221,114,236,57,57,228,57,213,221,114,236 +DB 76,76,45,76,90,97,152,22,76,76,45,76,90,97,152,22 +DB 94,94,101,94,202,59,188,148,94,94,101,94,202,59,188,148 +DB 120,120,253,120,231,133,240,159,120,120,253,120,231,133,240,159 +DB 56,56,224,56,221,216,112,229,56,56,224,56,221,216,112,229 +DB 140,140,10,140,20,134,5,152,140,140,10,140,20,134,5,152 +DB 209,209,99,209,198,178,191,23,209,209,99,209,198,178,191,23 +DB 165,165,174,165,65,11,87,228,165,165,174,165,65,11,87,228 +DB 226,226,175,226,67,77,217,161,226,226,175,226,67,77,217,161 +DB 97,97,153,97,47,248,194,78,97,97,153,97,47,248,194,78 +DB 179,179,246,179,241,69,123,66,179,179,246,179,241,69,123,66 +DB 33,33,132,33,21,165,66,52,33,33,132,33,21,165,66,52 +DB 156,156,74,156,148,214,37,8,156,156,74,156,148,214,37,8 +DB 30,30,120,30,240,102,60,238,30,30,120,30,240,102,60,238 +DB 67,67,17,67,34,82,134,97,67,67,17,67,34,82,134,97 +DB 199,199,59,199,118,252,147,177,199,199,59,199,118,252,147,177 +DB 252,252,215,252,179,43,229,79,252,252,215,252,179,43,229,79 +DB 4,4,16,4,32,20,8,36,4,4,16,4,32,20,8,36 +DB 81,81,89,81,178,8,162,227,81,81,89,81,178,8,162,227 +DB 153,153,94,153,188,199,47,37,153,153,94,153,188,199,47,37 +DB 109,109,169,109,79,196,218,34,109,109,169,109,79,196,218,34 +DB 13,13,52,13,104,57,26,101,13,13,52,13,104,57,26,101 +DB 250,250,207,250,131,53,233,121,250,250,207,250,131,53,233,121 +DB 223,223,91,223,182,132,163,105,223,223,91,223,182,132,163,105 +DB 126,126,229,126,215,155,252,169,126,126,229,126,215,155,252,169 +DB 36,36,144,36,61,180,72,25,36,36,144,36,61,180,72,25 +DB 59,59,236,59,197,215,118,254,59,59,236,59,197,215,118,254 +DB 171,171,150,171,49,61,75,154,171,171,150,171,49,61,75,154 +DB 206,206,31,206,62,209,129,240,206,206,31,206,62,209,129,240 +DB 17,17,68,17,136,85,34,153,17,17,68,17,136,85,34,153 +DB 143,143,6,143,12,137,3,131,143,143,6,143,12,137,3,131 +DB 78,78,37,78,74,107,156,4,78,78,37,78,74,107,156,4 +DB 183,183,230,183,209,81,115,102,183,183,230,183,209,81,115,102 +DB 235,235,139,235,11,96,203,224,235,235,139,235,11,96,203,224 +DB 60,60,240,60,253,204,120,193,60,60,240,60,253,204,120,193 +DB 129,129,62,129,124,191,31,253,129,129,62,129,124,191,31,253 +DB 148,148,106,148,212,254,53,64,148,148,106,148,212,254,53,64 +DB 247,247,251,247,235,12,243,28,247,247,251,247,235,12,243,28 +DB 185,185,222,185,161,103,111,24,185,185,222,185,161,103,111,24 +DB 19,19,76,19,152,95,38,139,19,19,76,19,152,95,38,139 +DB 44,44,176,44,125,156,88,81,44,44,176,44,125,156,88,81 +DB 211,211,107,211,214,184,187,5,211,211,107,211,214,184,187,5 +DB 231,231,187,231,107,92,211,140,231,231,187,231,107,92,211,140 +DB 110,110,165,110,87,203,220,57,110,110,165,110,87,203,220,57 +DB 196,196,55,196,110,243,149,170,196,196,55,196,110,243,149,170 +DB 3,3,12,3,24,15,6,27,3,3,12,3,24,15,6,27 +DB 86,86,69,86,138,19,172,220,86,86,69,86,138,19,172,220 +DB 68,68,13,68,26,73,136,94,68,68,13,68,26,73,136,94 +DB 127,127,225,127,223,158,254,160,127,127,225,127,223,158,254,160 +DB 169,169,158,169,33,55,79,136,169,169,158,169,33,55,79,136 +DB 42,42,168,42,77,130,84,103,42,42,168,42,77,130,84,103 +DB 187,187,214,187,177,109,107,10,187,187,214,187,177,109,107,10 +DB 193,193,35,193,70,226,159,135,193,193,35,193,70,226,159,135 +DB 83,83,81,83,162,2,166,241,83,83,81,83,162,2,166,241 +DB 220,220,87,220,174,139,165,114,220,220,87,220,174,139,165,114 +DB 11,11,44,11,88,39,22,83,11,11,44,11,88,39,22,83 +DB 157,157,78,157,156,211,39,1,157,157,78,157,156,211,39,1 +DB 108,108,173,108,71,193,216,43,108,108,173,108,71,193,216,43 +DB 49,49,196,49,149,245,98,164,49,49,196,49,149,245,98,164 +DB 116,116,205,116,135,185,232,243,116,116,205,116,135,185,232,243 +DB 246,246,255,246,227,9,241,21,246,246,255,246,227,9,241,21 +DB 70,70,5,70,10,67,140,76,70,70,5,70,10,67,140,76 +DB 172,172,138,172,9,38,69,165,172,172,138,172,9,38,69,165 +DB 137,137,30,137,60,151,15,181,137,137,30,137,60,151,15,181 +DB 20,20,80,20,160,68,40,180,20,20,80,20,160,68,40,180 +DB 225,225,163,225,91,66,223,186,225,225,163,225,91,66,223,186 +DB 22,22,88,22,176,78,44,166,22,22,88,22,176,78,44,166 +DB 58,58,232,58,205,210,116,247,58,58,232,58,205,210,116,247 +DB 105,105,185,105,111,208,210,6,105,105,185,105,111,208,210,6 +DB 9,9,36,9,72,45,18,65,9,9,36,9,72,45,18,65 +DB 112,112,221,112,167,173,224,215,112,112,221,112,167,173,224,215 +DB 182,182,226,182,217,84,113,111,182,182,226,182,217,84,113,111 +DB 208,208,103,208,206,183,189,30,208,208,103,208,206,183,189,30 +DB 237,237,147,237,59,126,199,214,237,237,147,237,59,126,199,214 +DB 204,204,23,204,46,219,133,226,204,204,23,204,46,219,133,226 +DB 66,66,21,66,42,87,132,104,66,66,21,66,42,87,132,104 +DB 152,152,90,152,180,194,45,44,152,152,90,152,180,194,45,44 +DB 164,164,170,164,73,14,85,237,164,164,170,164,73,14,85,237 +DB 40,40,160,40,93,136,80,117,40,40,160,40,93,136,80,117 +DB 92,92,109,92,218,49,184,134,92,92,109,92,218,49,184,134 +DB 248,248,199,248,147,63,237,107,248,248,199,248,147,63,237,107 +DB 134,134,34,134,68,164,17,194,134,134,34,134,68,164,17,194 +DB 24,35,198,232,135,184,1,79 +DB 54,166,210,245,121,111,145,82 +DB 96,188,155,142,163,12,123,53 +DB 29,224,215,194,46,75,254,87 +DB 21,119,55,229,159,240,74,218 +DB 88,201,41,10,177,160,107,133 +DB 189,93,16,244,203,62,5,103 +DB 228,39,65,139,167,125,149,216 +DB 251,238,124,102,221,23,71,158 +DB 202,45,191,7,173,90,131,51 +EXTERN __imp_RtlVirtualUnwind:NEAR + +ALIGN 16 +se_handler PROC PRIVATE + push rsi + push rdi + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + pushfq + sub rsp,64 + + mov rax,QWORD PTR[120+r8] + mov rbx,QWORD PTR[248+r8] + + lea r10,QWORD PTR[$L$prologue] + cmp rbx,r10 + jb $L$in_prologue + + mov rax,QWORD PTR[152+r8] + + lea r10,QWORD PTR[$L$epilogue] + cmp rbx,r10 + jae $L$in_prologue + + mov rax,QWORD PTR[((128+32))+rax] + lea rax,QWORD PTR[48+rax] + + mov rbx,QWORD PTR[((-8))+rax] + mov rbp,QWORD PTR[((-16))+rax] + mov r12,QWORD PTR[((-24))+rax] + mov r13,QWORD PTR[((-32))+rax] + mov r14,QWORD PTR[((-40))+rax] + mov r15,QWORD PTR[((-48))+rax] + mov QWORD PTR[144+r8],rbx + mov QWORD PTR[160+r8],rbp + mov QWORD PTR[216+r8],r12 + mov QWORD PTR[224+r8],r13 + mov QWORD PTR[232+r8],r14 + mov QWORD PTR[240+r8],r15 + +$L$in_prologue:: + mov rdi,QWORD PTR[8+rax] + mov rsi,QWORD PTR[16+rax] + mov QWORD PTR[152+r8],rax + mov QWORD PTR[168+r8],rsi + mov QWORD PTR[176+r8],rdi + + mov rdi,QWORD PTR[40+r9] + mov rsi,r8 + mov ecx,154 + DD 0a548f3fch + + mov rsi,r9 + xor rcx,rcx + mov rdx,QWORD PTR[8+rsi] + mov r8,QWORD PTR[rsi] + mov r9,QWORD PTR[16+rsi] + mov r10,QWORD PTR[40+rsi] + lea r11,QWORD PTR[56+rsi] + lea r12,QWORD PTR[24+rsi] + mov QWORD PTR[32+rsp],r10 + mov QWORD PTR[40+rsp],r11 + mov QWORD PTR[48+rsp],r12 + mov QWORD PTR[56+rsp],rcx + call QWORD PTR[__imp_RtlVirtualUnwind] + + mov eax,1 + add rsp,64 + popfq + pop r15 + pop r14 + pop r13 + pop r12 + pop rbp + pop rbx + pop rdi + pop rsi + DB 0F3h,0C3h ;repret +se_handler ENDP + +.text$ ENDS +.pdata SEGMENT READONLY ALIGN(4) +ALIGN 4 + DD imagerel $L$SEH_begin_whirlpool_block + DD imagerel $L$SEH_end_whirlpool_block + DD imagerel $L$SEH_info_whirlpool_block + +.pdata ENDS +.xdata SEGMENT READONLY ALIGN(8) +ALIGN 8 +$L$SEH_info_whirlpool_block:: +DB 9,0,0,0 + DD imagerel se_handler + +.xdata ENDS +END diff --git a/deps/openssl/asm_obsolete/x64-win32-masm/x86_64cpuid.asm b/deps/openssl/asm_obsolete/x64-win32-masm/x86_64cpuid.asm new file mode 100644 index 00000000000000..c767b91f9a7cd1 --- /dev/null +++ b/deps/openssl/asm_obsolete/x64-win32-masm/x86_64cpuid.asm @@ -0,0 +1,264 @@ +OPTION DOTNAME +EXTERN OPENSSL_cpuid_setup:NEAR + +.CRT$XCU SEGMENT READONLY ALIGN(8) + DQ OPENSSL_cpuid_setup + + +.CRT$XCU ENDS +_DATA SEGMENT +COMM OPENSSL_ia32cap_P:DWORD:4 + +_DATA ENDS +.text$ SEGMENT ALIGN(256) 'CODE' + +PUBLIC OPENSSL_atomic_add + +ALIGN 16 +OPENSSL_atomic_add PROC PUBLIC + mov eax,DWORD PTR[rcx] +$L$spin:: lea r8,QWORD PTR[rax*1+rdx] +DB 0f0h + cmpxchg DWORD PTR[rcx],r8d + jne $L$spin + mov eax,r8d +DB 048h,098h + DB 0F3h,0C3h ;repret +OPENSSL_atomic_add ENDP + +PUBLIC OPENSSL_rdtsc + +ALIGN 16 +OPENSSL_rdtsc PROC PUBLIC + rdtsc + shl rdx,32 + or rax,rdx + DB 0F3h,0C3h ;repret +OPENSSL_rdtsc ENDP + +PUBLIC OPENSSL_ia32_cpuid + +ALIGN 16 +OPENSSL_ia32_cpuid PROC PUBLIC + mov QWORD PTR[8+rsp],rdi ;WIN64 prologue + mov QWORD PTR[16+rsp],rsi + mov rax,rsp +$L$SEH_begin_OPENSSL_ia32_cpuid:: + mov rdi,rcx + + + mov r8,rbx + + xor eax,eax + mov DWORD PTR[8+rdi],eax + cpuid + mov r11d,eax + + xor eax,eax + cmp ebx,0756e6547h + setne al + mov r9d,eax + cmp edx,049656e69h + setne al + or r9d,eax + cmp ecx,06c65746eh + setne al + or r9d,eax + jz $L$intel + + cmp ebx,068747541h + setne al + mov r10d,eax + cmp edx,069746E65h + setne al + or r10d,eax + cmp ecx,0444D4163h + setne al + or r10d,eax + jnz $L$intel + + + mov eax,080000000h + cpuid + cmp eax,080000001h + jb $L$intel + mov r10d,eax + mov eax,080000001h + cpuid + or r9d,ecx + and r9d,000000801h + + cmp r10d,080000008h + jb $L$intel + + mov eax,080000008h + cpuid + movzx r10,cl + inc r10 + + mov eax,1 + cpuid + bt edx,28 + jnc $L$generic + shr ebx,16 + cmp bl,r10b + ja $L$generic + and edx,0efffffffh + jmp $L$generic + +$L$intel:: + cmp r11d,4 + mov r10d,-1 + jb $L$nocacheinfo + + mov eax,4 + mov ecx,0 + cpuid + mov r10d,eax + shr r10d,14 + and r10d,0fffh + + cmp r11d,7 + jb $L$nocacheinfo + + mov eax,7 + xor ecx,ecx + cpuid + mov DWORD PTR[8+rdi],ebx + +$L$nocacheinfo:: + mov eax,1 + cpuid + and edx,0bfefffffh + cmp r9d,0 + jne $L$notintel + or edx,040000000h + and ah,15 + cmp ah,15 + jne $L$notintel + or edx,000100000h +$L$notintel:: + bt edx,28 + jnc $L$generic + and edx,0efffffffh + cmp r10d,0 + je $L$generic + + or edx,010000000h + shr ebx,16 + cmp bl,1 + ja $L$generic + and edx,0efffffffh +$L$generic:: + and r9d,000000800h + and ecx,0fffff7ffh + or r9d,ecx + + mov r10d,edx + bt r9d,27 + jnc $L$clear_avx + xor ecx,ecx +DB 00fh,001h,0d0h + and eax,6 + cmp eax,6 + je $L$done +$L$clear_avx:: + mov eax,0efffe7ffh + and r9d,eax + and DWORD PTR[8+rdi],0ffffffdfh +$L$done:: + shl r9,32 + mov eax,r10d + mov rbx,r8 + or rax,r9 + mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue + mov rsi,QWORD PTR[16+rsp] + DB 0F3h,0C3h ;repret +$L$SEH_end_OPENSSL_ia32_cpuid:: +OPENSSL_ia32_cpuid ENDP + +PUBLIC OPENSSL_cleanse + +ALIGN 16 +OPENSSL_cleanse PROC PUBLIC + xor rax,rax + cmp rdx,15 + jae $L$ot + cmp rdx,0 + je $L$ret +$L$ittle:: + mov BYTE PTR[rcx],al + sub rdx,1 + lea rcx,QWORD PTR[1+rcx] + jnz $L$ittle +$L$ret:: + DB 0F3h,0C3h ;repret +ALIGN 16 +$L$ot:: + test rcx,7 + jz $L$aligned + mov BYTE PTR[rcx],al + lea rdx,QWORD PTR[((-1))+rdx] + lea rcx,QWORD PTR[1+rcx] + jmp $L$ot +$L$aligned:: + mov QWORD PTR[rcx],rax + lea rdx,QWORD PTR[((-8))+rdx] + test rdx,-8 + lea rcx,QWORD PTR[8+rcx] + jnz $L$aligned + cmp rdx,0 + jne $L$ittle + DB 0F3h,0C3h ;repret +OPENSSL_cleanse ENDP +PUBLIC OPENSSL_wipe_cpu + +ALIGN 16 +OPENSSL_wipe_cpu PROC PUBLIC + pxor xmm0,xmm0 + pxor xmm1,xmm1 + pxor xmm2,xmm2 + pxor xmm3,xmm3 + pxor xmm4,xmm4 + pxor xmm5,xmm5 + xor rcx,rcx + xor rdx,rdx + xor r8,r8 + xor r9,r9 + xor r10,r10 + xor r11,r11 + lea rax,QWORD PTR[8+rsp] + DB 0F3h,0C3h ;repret +OPENSSL_wipe_cpu ENDP +PUBLIC OPENSSL_ia32_rdrand + +ALIGN 16 +OPENSSL_ia32_rdrand PROC PUBLIC + mov ecx,8 +$L$oop_rdrand:: +DB 72,15,199,240 + jc $L$break_rdrand + loop $L$oop_rdrand +$L$break_rdrand:: + cmp rax,0 + cmove rax,rcx + DB 0F3h,0C3h ;repret +OPENSSL_ia32_rdrand ENDP + +PUBLIC OPENSSL_ia32_rdseed + +ALIGN 16 +OPENSSL_ia32_rdseed PROC PUBLIC + mov ecx,8 +$L$oop_rdseed:: +DB 72,15,199,248 + jc $L$break_rdseed + loop $L$oop_rdseed +$L$break_rdseed:: + cmp rax,0 + cmove rax,rcx + DB 0F3h,0C3h ;repret +OPENSSL_ia32_rdseed ENDP + +.text$ ENDS +END diff --git a/deps/openssl/asm_obsolete/x86-elf-gas/aes/aes-586.s b/deps/openssl/asm_obsolete/x86-elf-gas/aes/aes-586.s new file mode 100644 index 00000000000000..2d7c1fd09df5a9 --- /dev/null +++ b/deps/openssl/asm_obsolete/x86-elf-gas/aes/aes-586.s @@ -0,0 +1,3244 @@ +.file "aes-586.s" +.text +.type _x86_AES_encrypt_compact,@function +.align 16 +_x86_AES_encrypt_compact: + movl %edi,20(%esp) + xorl (%edi),%eax + xorl 4(%edi),%ebx + xorl 8(%edi),%ecx + xorl 12(%edi),%edx + movl 240(%edi),%esi + leal -2(%esi,%esi,1),%esi + leal (%edi,%esi,8),%esi + movl %esi,24(%esp) + movl -128(%ebp),%edi + movl -96(%ebp),%esi + movl -64(%ebp),%edi + movl -32(%ebp),%esi + movl (%ebp),%edi + movl 32(%ebp),%esi + movl 64(%ebp),%edi + movl 96(%ebp),%esi +.align 16 +.L000loop: + movl %eax,%esi + andl $255,%esi + movzbl -128(%ebp,%esi,1),%esi + movzbl %bh,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $8,%edi + xorl %edi,%esi + movl %ecx,%edi + shrl $16,%edi + andl $255,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $16,%edi + xorl %edi,%esi + movl %edx,%edi + shrl $24,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $24,%edi + xorl %edi,%esi + movl %esi,4(%esp) + + movl %ebx,%esi + andl $255,%esi + shrl $16,%ebx + movzbl -128(%ebp,%esi,1),%esi + movzbl %ch,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $8,%edi + xorl %edi,%esi + movl %edx,%edi + shrl $16,%edi + andl $255,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $16,%edi + xorl %edi,%esi + movl %eax,%edi + shrl $24,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $24,%edi + xorl %edi,%esi + movl %esi,8(%esp) + + movl %ecx,%esi + andl $255,%esi + shrl $24,%ecx + movzbl -128(%ebp,%esi,1),%esi + movzbl %dh,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $8,%edi + xorl %edi,%esi + movl %eax,%edi + shrl $16,%edi + andl $255,%edx + andl $255,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $16,%edi + xorl %edi,%esi + movzbl %bh,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $24,%edi + xorl %edi,%esi + + andl $255,%edx + movzbl -128(%ebp,%edx,1),%edx + movzbl %ah,%eax + movzbl -128(%ebp,%eax,1),%eax + shll $8,%eax + xorl %eax,%edx + movl 4(%esp),%eax + andl $255,%ebx + movzbl -128(%ebp,%ebx,1),%ebx + shll $16,%ebx + xorl %ebx,%edx + movl 8(%esp),%ebx + movzbl -128(%ebp,%ecx,1),%ecx + shll $24,%ecx + xorl %ecx,%edx + movl %esi,%ecx + + movl $2155905152,%ebp + andl %ecx,%ebp + leal (%ecx,%ecx,1),%edi + movl %ebp,%esi + shrl $7,%ebp + andl $4278124286,%edi + subl %ebp,%esi + movl %ecx,%ebp + andl $454761243,%esi + rorl $16,%ebp + xorl %edi,%esi + movl %ecx,%edi + xorl %esi,%ecx + rorl $24,%edi + xorl %ebp,%esi + roll $24,%ecx + xorl %edi,%esi + movl $2155905152,%ebp + xorl %esi,%ecx + andl %edx,%ebp + leal (%edx,%edx,1),%edi + movl %ebp,%esi + shrl $7,%ebp + andl $4278124286,%edi + subl %ebp,%esi + movl %edx,%ebp + andl $454761243,%esi + rorl $16,%ebp + xorl %edi,%esi + movl %edx,%edi + xorl %esi,%edx + rorl $24,%edi + xorl %ebp,%esi + roll $24,%edx + xorl %edi,%esi + movl $2155905152,%ebp + xorl %esi,%edx + andl %eax,%ebp + leal (%eax,%eax,1),%edi + movl %ebp,%esi + shrl $7,%ebp + andl $4278124286,%edi + subl %ebp,%esi + movl %eax,%ebp + andl $454761243,%esi + rorl $16,%ebp + xorl %edi,%esi + movl %eax,%edi + xorl %esi,%eax + rorl $24,%edi + xorl %ebp,%esi + roll $24,%eax + xorl %edi,%esi + movl $2155905152,%ebp + xorl %esi,%eax + andl %ebx,%ebp + leal (%ebx,%ebx,1),%edi + movl %ebp,%esi + shrl $7,%ebp + andl $4278124286,%edi + subl %ebp,%esi + movl %ebx,%ebp + andl $454761243,%esi + rorl $16,%ebp + xorl %edi,%esi + movl %ebx,%edi + xorl %esi,%ebx + rorl $24,%edi + xorl %ebp,%esi + roll $24,%ebx + xorl %edi,%esi + xorl %esi,%ebx + movl 20(%esp),%edi + movl 28(%esp),%ebp + addl $16,%edi + xorl (%edi),%eax + xorl 4(%edi),%ebx + xorl 8(%edi),%ecx + xorl 12(%edi),%edx + cmpl 24(%esp),%edi + movl %edi,20(%esp) + jb .L000loop + movl %eax,%esi + andl $255,%esi + movzbl -128(%ebp,%esi,1),%esi + movzbl %bh,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $8,%edi + xorl %edi,%esi + movl %ecx,%edi + shrl $16,%edi + andl $255,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $16,%edi + xorl %edi,%esi + movl %edx,%edi + shrl $24,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $24,%edi + xorl %edi,%esi + movl %esi,4(%esp) + + movl %ebx,%esi + andl $255,%esi + shrl $16,%ebx + movzbl -128(%ebp,%esi,1),%esi + movzbl %ch,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $8,%edi + xorl %edi,%esi + movl %edx,%edi + shrl $16,%edi + andl $255,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $16,%edi + xorl %edi,%esi + movl %eax,%edi + shrl $24,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $24,%edi + xorl %edi,%esi + movl %esi,8(%esp) + + movl %ecx,%esi + andl $255,%esi + shrl $24,%ecx + movzbl -128(%ebp,%esi,1),%esi + movzbl %dh,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $8,%edi + xorl %edi,%esi + movl %eax,%edi + shrl $16,%edi + andl $255,%edx + andl $255,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $16,%edi + xorl %edi,%esi + movzbl %bh,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $24,%edi + xorl %edi,%esi + + movl 20(%esp),%edi + andl $255,%edx + movzbl -128(%ebp,%edx,1),%edx + movzbl %ah,%eax + movzbl -128(%ebp,%eax,1),%eax + shll $8,%eax + xorl %eax,%edx + movl 4(%esp),%eax + andl $255,%ebx + movzbl -128(%ebp,%ebx,1),%ebx + shll $16,%ebx + xorl %ebx,%edx + movl 8(%esp),%ebx + movzbl -128(%ebp,%ecx,1),%ecx + shll $24,%ecx + xorl %ecx,%edx + movl %esi,%ecx + + xorl 16(%edi),%eax + xorl 20(%edi),%ebx + xorl 24(%edi),%ecx + xorl 28(%edi),%edx + ret +.size _x86_AES_encrypt_compact,.-_x86_AES_encrypt_compact +.type _sse_AES_encrypt_compact,@function +.align 16 +_sse_AES_encrypt_compact: + pxor (%edi),%mm0 + pxor 8(%edi),%mm4 + movl 240(%edi),%esi + leal -2(%esi,%esi,1),%esi + leal (%edi,%esi,8),%esi + movl %esi,24(%esp) + movl $454761243,%eax + movl %eax,8(%esp) + movl %eax,12(%esp) + movl -128(%ebp),%eax + movl -96(%ebp),%ebx + movl -64(%ebp),%ecx + movl -32(%ebp),%edx + movl (%ebp),%eax + movl 32(%ebp),%ebx + movl 64(%ebp),%ecx + movl 96(%ebp),%edx +.align 16 +.L001loop: + pshufw $8,%mm0,%mm1 + pshufw $13,%mm4,%mm5 + movd %mm1,%eax + movd %mm5,%ebx + movl %edi,20(%esp) + movzbl %al,%esi + movzbl %ah,%edx + pshufw $13,%mm0,%mm2 + movzbl -128(%ebp,%esi,1),%ecx + movzbl %bl,%edi + movzbl -128(%ebp,%edx,1),%edx + shrl $16,%eax + shll $8,%edx + movzbl -128(%ebp,%edi,1),%esi + movzbl %bh,%edi + shll $16,%esi + pshufw $8,%mm4,%mm6 + orl %esi,%ecx + movzbl -128(%ebp,%edi,1),%esi + movzbl %ah,%edi + shll $24,%esi + shrl $16,%ebx + orl %esi,%edx + movzbl -128(%ebp,%edi,1),%esi + movzbl %bh,%edi + shll $8,%esi + orl %esi,%ecx + movzbl -128(%ebp,%edi,1),%esi + movzbl %al,%edi + shll $24,%esi + orl %esi,%ecx + movzbl -128(%ebp,%edi,1),%esi + movzbl %bl,%edi + movd %mm2,%eax + movd %ecx,%mm0 + movzbl -128(%ebp,%edi,1),%ecx + movzbl %ah,%edi + shll $16,%ecx + movd %mm6,%ebx + orl %esi,%ecx + movzbl -128(%ebp,%edi,1),%esi + movzbl %bh,%edi + shll $24,%esi + orl %esi,%ecx + movzbl -128(%ebp,%edi,1),%esi + movzbl %bl,%edi + shll $8,%esi + shrl $16,%ebx + orl %esi,%ecx + movzbl -128(%ebp,%edi,1),%esi + movzbl %al,%edi + shrl $16,%eax + movd %ecx,%mm1 + movzbl -128(%ebp,%edi,1),%ecx + movzbl %ah,%edi + shll $16,%ecx + andl $255,%eax + orl %esi,%ecx + punpckldq %mm1,%mm0 + movzbl -128(%ebp,%edi,1),%esi + movzbl %bh,%edi + shll $24,%esi + andl $255,%ebx + movzbl -128(%ebp,%eax,1),%eax + orl %esi,%ecx + shll $16,%eax + movzbl -128(%ebp,%edi,1),%esi + orl %eax,%edx + shll $8,%esi + movzbl -128(%ebp,%ebx,1),%ebx + orl %esi,%ecx + orl %ebx,%edx + movl 20(%esp),%edi + movd %ecx,%mm4 + movd %edx,%mm5 + punpckldq %mm5,%mm4 + addl $16,%edi + cmpl 24(%esp),%edi + ja .L002out + movq 8(%esp),%mm2 + pxor %mm3,%mm3 + pxor %mm7,%mm7 + movq %mm0,%mm1 + movq %mm4,%mm5 + pcmpgtb %mm0,%mm3 + pcmpgtb %mm4,%mm7 + pand %mm2,%mm3 + pand %mm2,%mm7 + pshufw $177,%mm0,%mm2 + pshufw $177,%mm4,%mm6 + paddb %mm0,%mm0 + paddb %mm4,%mm4 + pxor %mm3,%mm0 + pxor %mm7,%mm4 + pshufw $177,%mm2,%mm3 + pshufw $177,%mm6,%mm7 + pxor %mm0,%mm1 + pxor %mm4,%mm5 + pxor %mm2,%mm0 + pxor %mm6,%mm4 + movq %mm3,%mm2 + movq %mm7,%mm6 + pslld $8,%mm3 + pslld $8,%mm7 + psrld $24,%mm2 + psrld $24,%mm6 + pxor %mm3,%mm0 + pxor %mm7,%mm4 + pxor %mm2,%mm0 + pxor %mm6,%mm4 + movq %mm1,%mm3 + movq %mm5,%mm7 + movq (%edi),%mm2 + movq 8(%edi),%mm6 + psrld $8,%mm1 + psrld $8,%mm5 + movl -128(%ebp),%eax + pslld $24,%mm3 + pslld $24,%mm7 + movl -64(%ebp),%ebx + pxor %mm1,%mm0 + pxor %mm5,%mm4 + movl (%ebp),%ecx + pxor %mm3,%mm0 + pxor %mm7,%mm4 + movl 64(%ebp),%edx + pxor %mm2,%mm0 + pxor %mm6,%mm4 + jmp .L001loop +.align 16 +.L002out: + pxor (%edi),%mm0 + pxor 8(%edi),%mm4 + ret +.size _sse_AES_encrypt_compact,.-_sse_AES_encrypt_compact +.type _x86_AES_encrypt,@function +.align 16 +_x86_AES_encrypt: + movl %edi,20(%esp) + xorl (%edi),%eax + xorl 4(%edi),%ebx + xorl 8(%edi),%ecx + xorl 12(%edi),%edx + movl 240(%edi),%esi + leal -2(%esi,%esi,1),%esi + leal (%edi,%esi,8),%esi + movl %esi,24(%esp) +.align 16 +.L003loop: + movl %eax,%esi + andl $255,%esi + movl (%ebp,%esi,8),%esi + movzbl %bh,%edi + xorl 3(%ebp,%edi,8),%esi + movl %ecx,%edi + shrl $16,%edi + andl $255,%edi + xorl 2(%ebp,%edi,8),%esi + movl %edx,%edi + shrl $24,%edi + xorl 1(%ebp,%edi,8),%esi + movl %esi,4(%esp) + + movl %ebx,%esi + andl $255,%esi + shrl $16,%ebx + movl (%ebp,%esi,8),%esi + movzbl %ch,%edi + xorl 3(%ebp,%edi,8),%esi + movl %edx,%edi + shrl $16,%edi + andl $255,%edi + xorl 2(%ebp,%edi,8),%esi + movl %eax,%edi + shrl $24,%edi + xorl 1(%ebp,%edi,8),%esi + movl %esi,8(%esp) + + movl %ecx,%esi + andl $255,%esi + shrl $24,%ecx + movl (%ebp,%esi,8),%esi + movzbl %dh,%edi + xorl 3(%ebp,%edi,8),%esi + movl %eax,%edi + shrl $16,%edi + andl $255,%edx + andl $255,%edi + xorl 2(%ebp,%edi,8),%esi + movzbl %bh,%edi + xorl 1(%ebp,%edi,8),%esi + + movl 20(%esp),%edi + movl (%ebp,%edx,8),%edx + movzbl %ah,%eax + xorl 3(%ebp,%eax,8),%edx + movl 4(%esp),%eax + andl $255,%ebx + xorl 2(%ebp,%ebx,8),%edx + movl 8(%esp),%ebx + xorl 1(%ebp,%ecx,8),%edx + movl %esi,%ecx + + addl $16,%edi + xorl (%edi),%eax + xorl 4(%edi),%ebx + xorl 8(%edi),%ecx + xorl 12(%edi),%edx + cmpl 24(%esp),%edi + movl %edi,20(%esp) + jb .L003loop + movl %eax,%esi + andl $255,%esi + movl 2(%ebp,%esi,8),%esi + andl $255,%esi + movzbl %bh,%edi + movl (%ebp,%edi,8),%edi + andl $65280,%edi + xorl %edi,%esi + movl %ecx,%edi + shrl $16,%edi + andl $255,%edi + movl (%ebp,%edi,8),%edi + andl $16711680,%edi + xorl %edi,%esi + movl %edx,%edi + shrl $24,%edi + movl 2(%ebp,%edi,8),%edi + andl $4278190080,%edi + xorl %edi,%esi + movl %esi,4(%esp) + movl %ebx,%esi + andl $255,%esi + shrl $16,%ebx + movl 2(%ebp,%esi,8),%esi + andl $255,%esi + movzbl %ch,%edi + movl (%ebp,%edi,8),%edi + andl $65280,%edi + xorl %edi,%esi + movl %edx,%edi + shrl $16,%edi + andl $255,%edi + movl (%ebp,%edi,8),%edi + andl $16711680,%edi + xorl %edi,%esi + movl %eax,%edi + shrl $24,%edi + movl 2(%ebp,%edi,8),%edi + andl $4278190080,%edi + xorl %edi,%esi + movl %esi,8(%esp) + movl %ecx,%esi + andl $255,%esi + shrl $24,%ecx + movl 2(%ebp,%esi,8),%esi + andl $255,%esi + movzbl %dh,%edi + movl (%ebp,%edi,8),%edi + andl $65280,%edi + xorl %edi,%esi + movl %eax,%edi + shrl $16,%edi + andl $255,%edx + andl $255,%edi + movl (%ebp,%edi,8),%edi + andl $16711680,%edi + xorl %edi,%esi + movzbl %bh,%edi + movl 2(%ebp,%edi,8),%edi + andl $4278190080,%edi + xorl %edi,%esi + movl 20(%esp),%edi + andl $255,%edx + movl 2(%ebp,%edx,8),%edx + andl $255,%edx + movzbl %ah,%eax + movl (%ebp,%eax,8),%eax + andl $65280,%eax + xorl %eax,%edx + movl 4(%esp),%eax + andl $255,%ebx + movl (%ebp,%ebx,8),%ebx + andl $16711680,%ebx + xorl %ebx,%edx + movl 8(%esp),%ebx + movl 2(%ebp,%ecx,8),%ecx + andl $4278190080,%ecx + xorl %ecx,%edx + movl %esi,%ecx + addl $16,%edi + xorl (%edi),%eax + xorl 4(%edi),%ebx + xorl 8(%edi),%ecx + xorl 12(%edi),%edx + ret +.align 64 +.LAES_Te: +.long 2774754246,2774754246 +.long 2222750968,2222750968 +.long 2574743534,2574743534 +.long 2373680118,2373680118 +.long 234025727,234025727 +.long 3177933782,3177933782 +.long 2976870366,2976870366 +.long 1422247313,1422247313 +.long 1345335392,1345335392 +.long 50397442,50397442 +.long 2842126286,2842126286 +.long 2099981142,2099981142 +.long 436141799,436141799 +.long 1658312629,1658312629 +.long 3870010189,3870010189 +.long 2591454956,2591454956 +.long 1170918031,1170918031 +.long 2642575903,2642575903 +.long 1086966153,1086966153 +.long 2273148410,2273148410 +.long 368769775,368769775 +.long 3948501426,3948501426 +.long 3376891790,3376891790 +.long 200339707,200339707 +.long 3970805057,3970805057 +.long 1742001331,1742001331 +.long 4255294047,4255294047 +.long 3937382213,3937382213 +.long 3214711843,3214711843 +.long 4154762323,4154762323 +.long 2524082916,2524082916 +.long 1539358875,1539358875 +.long 3266819957,3266819957 +.long 486407649,486407649 +.long 2928907069,2928907069 +.long 1780885068,1780885068 +.long 1513502316,1513502316 +.long 1094664062,1094664062 +.long 49805301,49805301 +.long 1338821763,1338821763 +.long 1546925160,1546925160 +.long 4104496465,4104496465 +.long 887481809,887481809 +.long 150073849,150073849 +.long 2473685474,2473685474 +.long 1943591083,1943591083 +.long 1395732834,1395732834 +.long 1058346282,1058346282 +.long 201589768,201589768 +.long 1388824469,1388824469 +.long 1696801606,1696801606 +.long 1589887901,1589887901 +.long 672667696,672667696 +.long 2711000631,2711000631 +.long 251987210,251987210 +.long 3046808111,3046808111 +.long 151455502,151455502 +.long 907153956,907153956 +.long 2608889883,2608889883 +.long 1038279391,1038279391 +.long 652995533,652995533 +.long 1764173646,1764173646 +.long 3451040383,3451040383 +.long 2675275242,2675275242 +.long 453576978,453576978 +.long 2659418909,2659418909 +.long 1949051992,1949051992 +.long 773462580,773462580 +.long 756751158,756751158 +.long 2993581788,2993581788 +.long 3998898868,3998898868 +.long 4221608027,4221608027 +.long 4132590244,4132590244 +.long 1295727478,1295727478 +.long 1641469623,1641469623 +.long 3467883389,3467883389 +.long 2066295122,2066295122 +.long 1055122397,1055122397 +.long 1898917726,1898917726 +.long 2542044179,2542044179 +.long 4115878822,4115878822 +.long 1758581177,1758581177 +.long 0,0 +.long 753790401,753790401 +.long 1612718144,1612718144 +.long 536673507,536673507 +.long 3367088505,3367088505 +.long 3982187446,3982187446 +.long 3194645204,3194645204 +.long 1187761037,1187761037 +.long 3653156455,3653156455 +.long 1262041458,1262041458 +.long 3729410708,3729410708 +.long 3561770136,3561770136 +.long 3898103984,3898103984 +.long 1255133061,1255133061 +.long 1808847035,1808847035 +.long 720367557,720367557 +.long 3853167183,3853167183 +.long 385612781,385612781 +.long 3309519750,3309519750 +.long 3612167578,3612167578 +.long 1429418854,1429418854 +.long 2491778321,2491778321 +.long 3477423498,3477423498 +.long 284817897,284817897 +.long 100794884,100794884 +.long 2172616702,2172616702 +.long 4031795360,4031795360 +.long 1144798328,1144798328 +.long 3131023141,3131023141 +.long 3819481163,3819481163 +.long 4082192802,4082192802 +.long 4272137053,4272137053 +.long 3225436288,3225436288 +.long 2324664069,2324664069 +.long 2912064063,2912064063 +.long 3164445985,3164445985 +.long 1211644016,1211644016 +.long 83228145,83228145 +.long 3753688163,3753688163 +.long 3249976951,3249976951 +.long 1977277103,1977277103 +.long 1663115586,1663115586 +.long 806359072,806359072 +.long 452984805,452984805 +.long 250868733,250868733 +.long 1842533055,1842533055 +.long 1288555905,1288555905 +.long 336333848,336333848 +.long 890442534,890442534 +.long 804056259,804056259 +.long 3781124030,3781124030 +.long 2727843637,2727843637 +.long 3427026056,3427026056 +.long 957814574,957814574 +.long 1472513171,1472513171 +.long 4071073621,4071073621 +.long 2189328124,2189328124 +.long 1195195770,1195195770 +.long 2892260552,2892260552 +.long 3881655738,3881655738 +.long 723065138,723065138 +.long 2507371494,2507371494 +.long 2690670784,2690670784 +.long 2558624025,2558624025 +.long 3511635870,3511635870 +.long 2145180835,2145180835 +.long 1713513028,1713513028 +.long 2116692564,2116692564 +.long 2878378043,2878378043 +.long 2206763019,2206763019 +.long 3393603212,3393603212 +.long 703524551,703524551 +.long 3552098411,3552098411 +.long 1007948840,1007948840 +.long 2044649127,2044649127 +.long 3797835452,3797835452 +.long 487262998,487262998 +.long 1994120109,1994120109 +.long 1004593371,1004593371 +.long 1446130276,1446130276 +.long 1312438900,1312438900 +.long 503974420,503974420 +.long 3679013266,3679013266 +.long 168166924,168166924 +.long 1814307912,1814307912 +.long 3831258296,3831258296 +.long 1573044895,1573044895 +.long 1859376061,1859376061 +.long 4021070915,4021070915 +.long 2791465668,2791465668 +.long 2828112185,2828112185 +.long 2761266481,2761266481 +.long 937747667,937747667 +.long 2339994098,2339994098 +.long 854058965,854058965 +.long 1137232011,1137232011 +.long 1496790894,1496790894 +.long 3077402074,3077402074 +.long 2358086913,2358086913 +.long 1691735473,1691735473 +.long 3528347292,3528347292 +.long 3769215305,3769215305 +.long 3027004632,3027004632 +.long 4199962284,4199962284 +.long 133494003,133494003 +.long 636152527,636152527 +.long 2942657994,2942657994 +.long 2390391540,2390391540 +.long 3920539207,3920539207 +.long 403179536,403179536 +.long 3585784431,3585784431 +.long 2289596656,2289596656 +.long 1864705354,1864705354 +.long 1915629148,1915629148 +.long 605822008,605822008 +.long 4054230615,4054230615 +.long 3350508659,3350508659 +.long 1371981463,1371981463 +.long 602466507,602466507 +.long 2094914977,2094914977 +.long 2624877800,2624877800 +.long 555687742,555687742 +.long 3712699286,3712699286 +.long 3703422305,3703422305 +.long 2257292045,2257292045 +.long 2240449039,2240449039 +.long 2423288032,2423288032 +.long 1111375484,1111375484 +.long 3300242801,3300242801 +.long 2858837708,2858837708 +.long 3628615824,3628615824 +.long 84083462,84083462 +.long 32962295,32962295 +.long 302911004,302911004 +.long 2741068226,2741068226 +.long 1597322602,1597322602 +.long 4183250862,4183250862 +.long 3501832553,3501832553 +.long 2441512471,2441512471 +.long 1489093017,1489093017 +.long 656219450,656219450 +.long 3114180135,3114180135 +.long 954327513,954327513 +.long 335083755,335083755 +.long 3013122091,3013122091 +.long 856756514,856756514 +.long 3144247762,3144247762 +.long 1893325225,1893325225 +.long 2307821063,2307821063 +.long 2811532339,2811532339 +.long 3063651117,3063651117 +.long 572399164,572399164 +.long 2458355477,2458355477 +.long 552200649,552200649 +.long 1238290055,1238290055 +.long 4283782570,4283782570 +.long 2015897680,2015897680 +.long 2061492133,2061492133 +.long 2408352771,2408352771 +.long 4171342169,4171342169 +.long 2156497161,2156497161 +.long 386731290,386731290 +.long 3669999461,3669999461 +.long 837215959,837215959 +.long 3326231172,3326231172 +.long 3093850320,3093850320 +.long 3275833730,3275833730 +.long 2962856233,2962856233 +.long 1999449434,1999449434 +.long 286199582,286199582 +.long 3417354363,3417354363 +.long 4233385128,4233385128 +.long 3602627437,3602627437 +.long 974525996,974525996 +.byte 99,124,119,123,242,107,111,197 +.byte 48,1,103,43,254,215,171,118 +.byte 202,130,201,125,250,89,71,240 +.byte 173,212,162,175,156,164,114,192 +.byte 183,253,147,38,54,63,247,204 +.byte 52,165,229,241,113,216,49,21 +.byte 4,199,35,195,24,150,5,154 +.byte 7,18,128,226,235,39,178,117 +.byte 9,131,44,26,27,110,90,160 +.byte 82,59,214,179,41,227,47,132 +.byte 83,209,0,237,32,252,177,91 +.byte 106,203,190,57,74,76,88,207 +.byte 208,239,170,251,67,77,51,133 +.byte 69,249,2,127,80,60,159,168 +.byte 81,163,64,143,146,157,56,245 +.byte 188,182,218,33,16,255,243,210 +.byte 205,12,19,236,95,151,68,23 +.byte 196,167,126,61,100,93,25,115 +.byte 96,129,79,220,34,42,144,136 +.byte 70,238,184,20,222,94,11,219 +.byte 224,50,58,10,73,6,36,92 +.byte 194,211,172,98,145,149,228,121 +.byte 231,200,55,109,141,213,78,169 +.byte 108,86,244,234,101,122,174,8 +.byte 186,120,37,46,28,166,180,198 +.byte 232,221,116,31,75,189,139,138 +.byte 112,62,181,102,72,3,246,14 +.byte 97,53,87,185,134,193,29,158 +.byte 225,248,152,17,105,217,142,148 +.byte 155,30,135,233,206,85,40,223 +.byte 140,161,137,13,191,230,66,104 +.byte 65,153,45,15,176,84,187,22 +.byte 99,124,119,123,242,107,111,197 +.byte 48,1,103,43,254,215,171,118 +.byte 202,130,201,125,250,89,71,240 +.byte 173,212,162,175,156,164,114,192 +.byte 183,253,147,38,54,63,247,204 +.byte 52,165,229,241,113,216,49,21 +.byte 4,199,35,195,24,150,5,154 +.byte 7,18,128,226,235,39,178,117 +.byte 9,131,44,26,27,110,90,160 +.byte 82,59,214,179,41,227,47,132 +.byte 83,209,0,237,32,252,177,91 +.byte 106,203,190,57,74,76,88,207 +.byte 208,239,170,251,67,77,51,133 +.byte 69,249,2,127,80,60,159,168 +.byte 81,163,64,143,146,157,56,245 +.byte 188,182,218,33,16,255,243,210 +.byte 205,12,19,236,95,151,68,23 +.byte 196,167,126,61,100,93,25,115 +.byte 96,129,79,220,34,42,144,136 +.byte 70,238,184,20,222,94,11,219 +.byte 224,50,58,10,73,6,36,92 +.byte 194,211,172,98,145,149,228,121 +.byte 231,200,55,109,141,213,78,169 +.byte 108,86,244,234,101,122,174,8 +.byte 186,120,37,46,28,166,180,198 +.byte 232,221,116,31,75,189,139,138 +.byte 112,62,181,102,72,3,246,14 +.byte 97,53,87,185,134,193,29,158 +.byte 225,248,152,17,105,217,142,148 +.byte 155,30,135,233,206,85,40,223 +.byte 140,161,137,13,191,230,66,104 +.byte 65,153,45,15,176,84,187,22 +.byte 99,124,119,123,242,107,111,197 +.byte 48,1,103,43,254,215,171,118 +.byte 202,130,201,125,250,89,71,240 +.byte 173,212,162,175,156,164,114,192 +.byte 183,253,147,38,54,63,247,204 +.byte 52,165,229,241,113,216,49,21 +.byte 4,199,35,195,24,150,5,154 +.byte 7,18,128,226,235,39,178,117 +.byte 9,131,44,26,27,110,90,160 +.byte 82,59,214,179,41,227,47,132 +.byte 83,209,0,237,32,252,177,91 +.byte 106,203,190,57,74,76,88,207 +.byte 208,239,170,251,67,77,51,133 +.byte 69,249,2,127,80,60,159,168 +.byte 81,163,64,143,146,157,56,245 +.byte 188,182,218,33,16,255,243,210 +.byte 205,12,19,236,95,151,68,23 +.byte 196,167,126,61,100,93,25,115 +.byte 96,129,79,220,34,42,144,136 +.byte 70,238,184,20,222,94,11,219 +.byte 224,50,58,10,73,6,36,92 +.byte 194,211,172,98,145,149,228,121 +.byte 231,200,55,109,141,213,78,169 +.byte 108,86,244,234,101,122,174,8 +.byte 186,120,37,46,28,166,180,198 +.byte 232,221,116,31,75,189,139,138 +.byte 112,62,181,102,72,3,246,14 +.byte 97,53,87,185,134,193,29,158 +.byte 225,248,152,17,105,217,142,148 +.byte 155,30,135,233,206,85,40,223 +.byte 140,161,137,13,191,230,66,104 +.byte 65,153,45,15,176,84,187,22 +.byte 99,124,119,123,242,107,111,197 +.byte 48,1,103,43,254,215,171,118 +.byte 202,130,201,125,250,89,71,240 +.byte 173,212,162,175,156,164,114,192 +.byte 183,253,147,38,54,63,247,204 +.byte 52,165,229,241,113,216,49,21 +.byte 4,199,35,195,24,150,5,154 +.byte 7,18,128,226,235,39,178,117 +.byte 9,131,44,26,27,110,90,160 +.byte 82,59,214,179,41,227,47,132 +.byte 83,209,0,237,32,252,177,91 +.byte 106,203,190,57,74,76,88,207 +.byte 208,239,170,251,67,77,51,133 +.byte 69,249,2,127,80,60,159,168 +.byte 81,163,64,143,146,157,56,245 +.byte 188,182,218,33,16,255,243,210 +.byte 205,12,19,236,95,151,68,23 +.byte 196,167,126,61,100,93,25,115 +.byte 96,129,79,220,34,42,144,136 +.byte 70,238,184,20,222,94,11,219 +.byte 224,50,58,10,73,6,36,92 +.byte 194,211,172,98,145,149,228,121 +.byte 231,200,55,109,141,213,78,169 +.byte 108,86,244,234,101,122,174,8 +.byte 186,120,37,46,28,166,180,198 +.byte 232,221,116,31,75,189,139,138 +.byte 112,62,181,102,72,3,246,14 +.byte 97,53,87,185,134,193,29,158 +.byte 225,248,152,17,105,217,142,148 +.byte 155,30,135,233,206,85,40,223 +.byte 140,161,137,13,191,230,66,104 +.byte 65,153,45,15,176,84,187,22 +.long 1,2,4,8 +.long 16,32,64,128 +.long 27,54,0,0 +.long 0,0,0,0 +.size _x86_AES_encrypt,.-_x86_AES_encrypt +.globl AES_encrypt +.type AES_encrypt,@function +.align 16 +AES_encrypt: +.L_AES_encrypt_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 20(%esp),%esi + movl 28(%esp),%edi + movl %esp,%eax + subl $36,%esp + andl $-64,%esp + leal -127(%edi),%ebx + subl %esp,%ebx + negl %ebx + andl $960,%ebx + subl %ebx,%esp + addl $4,%esp + movl %eax,28(%esp) + call .L004pic_point +.L004pic_point: + popl %ebp + leal OPENSSL_ia32cap_P,%eax + leal .LAES_Te-.L004pic_point(%ebp),%ebp + leal 764(%esp),%ebx + subl %ebp,%ebx + andl $768,%ebx + leal 2176(%ebp,%ebx,1),%ebp + btl $25,(%eax) + jnc .L005x86 + movq (%esi),%mm0 + movq 8(%esi),%mm4 + call _sse_AES_encrypt_compact + movl 28(%esp),%esp + movl 24(%esp),%esi + movq %mm0,(%esi) + movq %mm4,8(%esi) + emms + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.align 16 +.L005x86: + movl %ebp,24(%esp) + movl (%esi),%eax + movl 4(%esi),%ebx + movl 8(%esi),%ecx + movl 12(%esi),%edx + call _x86_AES_encrypt_compact + movl 28(%esp),%esp + movl 24(%esp),%esi + movl %eax,(%esi) + movl %ebx,4(%esi) + movl %ecx,8(%esi) + movl %edx,12(%esi) + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.size AES_encrypt,.-.L_AES_encrypt_begin +.type _x86_AES_decrypt_compact,@function +.align 16 +_x86_AES_decrypt_compact: + movl %edi,20(%esp) + xorl (%edi),%eax + xorl 4(%edi),%ebx + xorl 8(%edi),%ecx + xorl 12(%edi),%edx + movl 240(%edi),%esi + leal -2(%esi,%esi,1),%esi + leal (%edi,%esi,8),%esi + movl %esi,24(%esp) + movl -128(%ebp),%edi + movl -96(%ebp),%esi + movl -64(%ebp),%edi + movl -32(%ebp),%esi + movl (%ebp),%edi + movl 32(%ebp),%esi + movl 64(%ebp),%edi + movl 96(%ebp),%esi +.align 16 +.L006loop: + movl %eax,%esi + andl $255,%esi + movzbl -128(%ebp,%esi,1),%esi + movzbl %dh,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $8,%edi + xorl %edi,%esi + movl %ecx,%edi + shrl $16,%edi + andl $255,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $16,%edi + xorl %edi,%esi + movl %ebx,%edi + shrl $24,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $24,%edi + xorl %edi,%esi + movl %esi,4(%esp) + movl %ebx,%esi + andl $255,%esi + movzbl -128(%ebp,%esi,1),%esi + movzbl %ah,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $8,%edi + xorl %edi,%esi + movl %edx,%edi + shrl $16,%edi + andl $255,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $16,%edi + xorl %edi,%esi + movl %ecx,%edi + shrl $24,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $24,%edi + xorl %edi,%esi + movl %esi,8(%esp) + movl %ecx,%esi + andl $255,%esi + movzbl -128(%ebp,%esi,1),%esi + movzbl %bh,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $8,%edi + xorl %edi,%esi + movl %eax,%edi + shrl $16,%edi + andl $255,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $16,%edi + xorl %edi,%esi + movl %edx,%edi + shrl $24,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $24,%edi + xorl %edi,%esi + andl $255,%edx + movzbl -128(%ebp,%edx,1),%edx + movzbl %ch,%ecx + movzbl -128(%ebp,%ecx,1),%ecx + shll $8,%ecx + xorl %ecx,%edx + movl %esi,%ecx + shrl $16,%ebx + andl $255,%ebx + movzbl -128(%ebp,%ebx,1),%ebx + shll $16,%ebx + xorl %ebx,%edx + shrl $24,%eax + movzbl -128(%ebp,%eax,1),%eax + shll $24,%eax + xorl %eax,%edx + movl $2155905152,%edi + andl %ecx,%edi + movl %edi,%esi + shrl $7,%edi + leal (%ecx,%ecx,1),%eax + subl %edi,%esi + andl $4278124286,%eax + andl $454761243,%esi + xorl %esi,%eax + movl $2155905152,%edi + andl %eax,%edi + movl %edi,%esi + shrl $7,%edi + leal (%eax,%eax,1),%ebx + subl %edi,%esi + andl $4278124286,%ebx + andl $454761243,%esi + xorl %ecx,%eax + xorl %esi,%ebx + movl $2155905152,%edi + andl %ebx,%edi + movl %edi,%esi + shrl $7,%edi + leal (%ebx,%ebx,1),%ebp + subl %edi,%esi + andl $4278124286,%ebp + andl $454761243,%esi + xorl %ecx,%ebx + roll $8,%ecx + xorl %esi,%ebp + xorl %eax,%ecx + xorl %ebp,%eax + xorl %ebx,%ecx + xorl %ebp,%ebx + roll $24,%eax + xorl %ebp,%ecx + roll $16,%ebx + xorl %eax,%ecx + roll $8,%ebp + xorl %ebx,%ecx + movl 4(%esp),%eax + xorl %ebp,%ecx + movl %ecx,12(%esp) + movl $2155905152,%edi + andl %edx,%edi + movl %edi,%esi + shrl $7,%edi + leal (%edx,%edx,1),%ebx + subl %edi,%esi + andl $4278124286,%ebx + andl $454761243,%esi + xorl %esi,%ebx + movl $2155905152,%edi + andl %ebx,%edi + movl %edi,%esi + shrl $7,%edi + leal (%ebx,%ebx,1),%ecx + subl %edi,%esi + andl $4278124286,%ecx + andl $454761243,%esi + xorl %edx,%ebx + xorl %esi,%ecx + movl $2155905152,%edi + andl %ecx,%edi + movl %edi,%esi + shrl $7,%edi + leal (%ecx,%ecx,1),%ebp + subl %edi,%esi + andl $4278124286,%ebp + andl $454761243,%esi + xorl %edx,%ecx + roll $8,%edx + xorl %esi,%ebp + xorl %ebx,%edx + xorl %ebp,%ebx + xorl %ecx,%edx + xorl %ebp,%ecx + roll $24,%ebx + xorl %ebp,%edx + roll $16,%ecx + xorl %ebx,%edx + roll $8,%ebp + xorl %ecx,%edx + movl 8(%esp),%ebx + xorl %ebp,%edx + movl %edx,16(%esp) + movl $2155905152,%edi + andl %eax,%edi + movl %edi,%esi + shrl $7,%edi + leal (%eax,%eax,1),%ecx + subl %edi,%esi + andl $4278124286,%ecx + andl $454761243,%esi + xorl %esi,%ecx + movl $2155905152,%edi + andl %ecx,%edi + movl %edi,%esi + shrl $7,%edi + leal (%ecx,%ecx,1),%edx + subl %edi,%esi + andl $4278124286,%edx + andl $454761243,%esi + xorl %eax,%ecx + xorl %esi,%edx + movl $2155905152,%edi + andl %edx,%edi + movl %edi,%esi + shrl $7,%edi + leal (%edx,%edx,1),%ebp + subl %edi,%esi + andl $4278124286,%ebp + andl $454761243,%esi + xorl %eax,%edx + roll $8,%eax + xorl %esi,%ebp + xorl %ecx,%eax + xorl %ebp,%ecx + xorl %edx,%eax + xorl %ebp,%edx + roll $24,%ecx + xorl %ebp,%eax + roll $16,%edx + xorl %ecx,%eax + roll $8,%ebp + xorl %edx,%eax + xorl %ebp,%eax + movl $2155905152,%edi + andl %ebx,%edi + movl %edi,%esi + shrl $7,%edi + leal (%ebx,%ebx,1),%ecx + subl %edi,%esi + andl $4278124286,%ecx + andl $454761243,%esi + xorl %esi,%ecx + movl $2155905152,%edi + andl %ecx,%edi + movl %edi,%esi + shrl $7,%edi + leal (%ecx,%ecx,1),%edx + subl %edi,%esi + andl $4278124286,%edx + andl $454761243,%esi + xorl %ebx,%ecx + xorl %esi,%edx + movl $2155905152,%edi + andl %edx,%edi + movl %edi,%esi + shrl $7,%edi + leal (%edx,%edx,1),%ebp + subl %edi,%esi + andl $4278124286,%ebp + andl $454761243,%esi + xorl %ebx,%edx + roll $8,%ebx + xorl %esi,%ebp + xorl %ecx,%ebx + xorl %ebp,%ecx + xorl %edx,%ebx + xorl %ebp,%edx + roll $24,%ecx + xorl %ebp,%ebx + roll $16,%edx + xorl %ecx,%ebx + roll $8,%ebp + xorl %edx,%ebx + movl 12(%esp),%ecx + xorl %ebp,%ebx + movl 16(%esp),%edx + movl 20(%esp),%edi + movl 28(%esp),%ebp + addl $16,%edi + xorl (%edi),%eax + xorl 4(%edi),%ebx + xorl 8(%edi),%ecx + xorl 12(%edi),%edx + cmpl 24(%esp),%edi + movl %edi,20(%esp) + jb .L006loop + movl %eax,%esi + andl $255,%esi + movzbl -128(%ebp,%esi,1),%esi + movzbl %dh,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $8,%edi + xorl %edi,%esi + movl %ecx,%edi + shrl $16,%edi + andl $255,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $16,%edi + xorl %edi,%esi + movl %ebx,%edi + shrl $24,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $24,%edi + xorl %edi,%esi + movl %esi,4(%esp) + movl %ebx,%esi + andl $255,%esi + movzbl -128(%ebp,%esi,1),%esi + movzbl %ah,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $8,%edi + xorl %edi,%esi + movl %edx,%edi + shrl $16,%edi + andl $255,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $16,%edi + xorl %edi,%esi + movl %ecx,%edi + shrl $24,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $24,%edi + xorl %edi,%esi + movl %esi,8(%esp) + movl %ecx,%esi + andl $255,%esi + movzbl -128(%ebp,%esi,1),%esi + movzbl %bh,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $8,%edi + xorl %edi,%esi + movl %eax,%edi + shrl $16,%edi + andl $255,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $16,%edi + xorl %edi,%esi + movl %edx,%edi + shrl $24,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $24,%edi + xorl %edi,%esi + movl 20(%esp),%edi + andl $255,%edx + movzbl -128(%ebp,%edx,1),%edx + movzbl %ch,%ecx + movzbl -128(%ebp,%ecx,1),%ecx + shll $8,%ecx + xorl %ecx,%edx + movl %esi,%ecx + shrl $16,%ebx + andl $255,%ebx + movzbl -128(%ebp,%ebx,1),%ebx + shll $16,%ebx + xorl %ebx,%edx + movl 8(%esp),%ebx + shrl $24,%eax + movzbl -128(%ebp,%eax,1),%eax + shll $24,%eax + xorl %eax,%edx + movl 4(%esp),%eax + xorl 16(%edi),%eax + xorl 20(%edi),%ebx + xorl 24(%edi),%ecx + xorl 28(%edi),%edx + ret +.size _x86_AES_decrypt_compact,.-_x86_AES_decrypt_compact +.type _sse_AES_decrypt_compact,@function +.align 16 +_sse_AES_decrypt_compact: + pxor (%edi),%mm0 + pxor 8(%edi),%mm4 + movl 240(%edi),%esi + leal -2(%esi,%esi,1),%esi + leal (%edi,%esi,8),%esi + movl %esi,24(%esp) + movl $454761243,%eax + movl %eax,8(%esp) + movl %eax,12(%esp) + movl -128(%ebp),%eax + movl -96(%ebp),%ebx + movl -64(%ebp),%ecx + movl -32(%ebp),%edx + movl (%ebp),%eax + movl 32(%ebp),%ebx + movl 64(%ebp),%ecx + movl 96(%ebp),%edx +.align 16 +.L007loop: + pshufw $12,%mm0,%mm1 + pshufw $9,%mm4,%mm5 + movd %mm1,%eax + movd %mm5,%ebx + movl %edi,20(%esp) + movzbl %al,%esi + movzbl %ah,%edx + pshufw $6,%mm0,%mm2 + movzbl -128(%ebp,%esi,1),%ecx + movzbl %bl,%edi + movzbl -128(%ebp,%edx,1),%edx + shrl $16,%eax + shll $8,%edx + movzbl -128(%ebp,%edi,1),%esi + movzbl %bh,%edi + shll $16,%esi + pshufw $3,%mm4,%mm6 + orl %esi,%ecx + movzbl -128(%ebp,%edi,1),%esi + movzbl %ah,%edi + shll $24,%esi + shrl $16,%ebx + orl %esi,%edx + movzbl -128(%ebp,%edi,1),%esi + movzbl %bh,%edi + shll $24,%esi + orl %esi,%ecx + movzbl -128(%ebp,%edi,1),%esi + movzbl %al,%edi + shll $8,%esi + movd %mm2,%eax + orl %esi,%ecx + movzbl -128(%ebp,%edi,1),%esi + movzbl %bl,%edi + shll $16,%esi + movd %mm6,%ebx + movd %ecx,%mm0 + movzbl -128(%ebp,%edi,1),%ecx + movzbl %al,%edi + orl %esi,%ecx + movzbl -128(%ebp,%edi,1),%esi + movzbl %bl,%edi + orl %esi,%edx + movzbl -128(%ebp,%edi,1),%esi + movzbl %ah,%edi + shll $16,%esi + shrl $16,%eax + orl %esi,%edx + movzbl -128(%ebp,%edi,1),%esi + movzbl %bh,%edi + shrl $16,%ebx + shll $8,%esi + movd %edx,%mm1 + movzbl -128(%ebp,%edi,1),%edx + movzbl %bh,%edi + shll $24,%edx + andl $255,%ebx + orl %esi,%edx + punpckldq %mm1,%mm0 + movzbl -128(%ebp,%edi,1),%esi + movzbl %al,%edi + shll $8,%esi + movzbl %ah,%eax + movzbl -128(%ebp,%ebx,1),%ebx + orl %esi,%ecx + movzbl -128(%ebp,%edi,1),%esi + orl %ebx,%edx + shll $16,%esi + movzbl -128(%ebp,%eax,1),%eax + orl %esi,%edx + shll $24,%eax + orl %eax,%ecx + movl 20(%esp),%edi + movd %edx,%mm4 + movd %ecx,%mm5 + punpckldq %mm5,%mm4 + addl $16,%edi + cmpl 24(%esp),%edi + ja .L008out + movq %mm0,%mm3 + movq %mm4,%mm7 + pshufw $228,%mm0,%mm2 + pshufw $228,%mm4,%mm6 + movq %mm0,%mm1 + movq %mm4,%mm5 + pshufw $177,%mm0,%mm0 + pshufw $177,%mm4,%mm4 + pslld $8,%mm2 + pslld $8,%mm6 + psrld $8,%mm3 + psrld $8,%mm7 + pxor %mm2,%mm0 + pxor %mm6,%mm4 + pxor %mm3,%mm0 + pxor %mm7,%mm4 + pslld $16,%mm2 + pslld $16,%mm6 + psrld $16,%mm3 + psrld $16,%mm7 + pxor %mm2,%mm0 + pxor %mm6,%mm4 + pxor %mm3,%mm0 + pxor %mm7,%mm4 + movq 8(%esp),%mm3 + pxor %mm2,%mm2 + pxor %mm6,%mm6 + pcmpgtb %mm1,%mm2 + pcmpgtb %mm5,%mm6 + pand %mm3,%mm2 + pand %mm3,%mm6 + paddb %mm1,%mm1 + paddb %mm5,%mm5 + pxor %mm2,%mm1 + pxor %mm6,%mm5 + movq %mm1,%mm3 + movq %mm5,%mm7 + movq %mm1,%mm2 + movq %mm5,%mm6 + pxor %mm1,%mm0 + pxor %mm5,%mm4 + pslld $24,%mm3 + pslld $24,%mm7 + psrld $8,%mm2 + psrld $8,%mm6 + pxor %mm3,%mm0 + pxor %mm7,%mm4 + pxor %mm2,%mm0 + pxor %mm6,%mm4 + movq 8(%esp),%mm2 + pxor %mm3,%mm3 + pxor %mm7,%mm7 + pcmpgtb %mm1,%mm3 + pcmpgtb %mm5,%mm7 + pand %mm2,%mm3 + pand %mm2,%mm7 + paddb %mm1,%mm1 + paddb %mm5,%mm5 + pxor %mm3,%mm1 + pxor %mm7,%mm5 + pshufw $177,%mm1,%mm3 + pshufw $177,%mm5,%mm7 + pxor %mm1,%mm0 + pxor %mm5,%mm4 + pxor %mm3,%mm0 + pxor %mm7,%mm4 + pxor %mm3,%mm3 + pxor %mm7,%mm7 + pcmpgtb %mm1,%mm3 + pcmpgtb %mm5,%mm7 + pand %mm2,%mm3 + pand %mm2,%mm7 + paddb %mm1,%mm1 + paddb %mm5,%mm5 + pxor %mm3,%mm1 + pxor %mm7,%mm5 + pxor %mm1,%mm0 + pxor %mm5,%mm4 + movq %mm1,%mm3 + movq %mm5,%mm7 + pshufw $177,%mm1,%mm2 + pshufw $177,%mm5,%mm6 + pxor %mm2,%mm0 + pxor %mm6,%mm4 + pslld $8,%mm1 + pslld $8,%mm5 + psrld $8,%mm3 + psrld $8,%mm7 + movq (%edi),%mm2 + movq 8(%edi),%mm6 + pxor %mm1,%mm0 + pxor %mm5,%mm4 + pxor %mm3,%mm0 + pxor %mm7,%mm4 + movl -128(%ebp),%eax + pslld $16,%mm1 + pslld $16,%mm5 + movl -64(%ebp),%ebx + psrld $16,%mm3 + psrld $16,%mm7 + movl (%ebp),%ecx + pxor %mm1,%mm0 + pxor %mm5,%mm4 + movl 64(%ebp),%edx + pxor %mm3,%mm0 + pxor %mm7,%mm4 + pxor %mm2,%mm0 + pxor %mm6,%mm4 + jmp .L007loop +.align 16 +.L008out: + pxor (%edi),%mm0 + pxor 8(%edi),%mm4 + ret +.size _sse_AES_decrypt_compact,.-_sse_AES_decrypt_compact +.type _x86_AES_decrypt,@function +.align 16 +_x86_AES_decrypt: + movl %edi,20(%esp) + xorl (%edi),%eax + xorl 4(%edi),%ebx + xorl 8(%edi),%ecx + xorl 12(%edi),%edx + movl 240(%edi),%esi + leal -2(%esi,%esi,1),%esi + leal (%edi,%esi,8),%esi + movl %esi,24(%esp) +.align 16 +.L009loop: + movl %eax,%esi + andl $255,%esi + movl (%ebp,%esi,8),%esi + movzbl %dh,%edi + xorl 3(%ebp,%edi,8),%esi + movl %ecx,%edi + shrl $16,%edi + andl $255,%edi + xorl 2(%ebp,%edi,8),%esi + movl %ebx,%edi + shrl $24,%edi + xorl 1(%ebp,%edi,8),%esi + movl %esi,4(%esp) + + movl %ebx,%esi + andl $255,%esi + movl (%ebp,%esi,8),%esi + movzbl %ah,%edi + xorl 3(%ebp,%edi,8),%esi + movl %edx,%edi + shrl $16,%edi + andl $255,%edi + xorl 2(%ebp,%edi,8),%esi + movl %ecx,%edi + shrl $24,%edi + xorl 1(%ebp,%edi,8),%esi + movl %esi,8(%esp) + + movl %ecx,%esi + andl $255,%esi + movl (%ebp,%esi,8),%esi + movzbl %bh,%edi + xorl 3(%ebp,%edi,8),%esi + movl %eax,%edi + shrl $16,%edi + andl $255,%edi + xorl 2(%ebp,%edi,8),%esi + movl %edx,%edi + shrl $24,%edi + xorl 1(%ebp,%edi,8),%esi + + movl 20(%esp),%edi + andl $255,%edx + movl (%ebp,%edx,8),%edx + movzbl %ch,%ecx + xorl 3(%ebp,%ecx,8),%edx + movl %esi,%ecx + shrl $16,%ebx + andl $255,%ebx + xorl 2(%ebp,%ebx,8),%edx + movl 8(%esp),%ebx + shrl $24,%eax + xorl 1(%ebp,%eax,8),%edx + movl 4(%esp),%eax + + addl $16,%edi + xorl (%edi),%eax + xorl 4(%edi),%ebx + xorl 8(%edi),%ecx + xorl 12(%edi),%edx + cmpl 24(%esp),%edi + movl %edi,20(%esp) + jb .L009loop + leal 2176(%ebp),%ebp + movl -128(%ebp),%edi + movl -96(%ebp),%esi + movl -64(%ebp),%edi + movl -32(%ebp),%esi + movl (%ebp),%edi + movl 32(%ebp),%esi + movl 64(%ebp),%edi + movl 96(%ebp),%esi + leal -128(%ebp),%ebp + movl %eax,%esi + andl $255,%esi + movzbl (%ebp,%esi,1),%esi + movzbl %dh,%edi + movzbl (%ebp,%edi,1),%edi + shll $8,%edi + xorl %edi,%esi + movl %ecx,%edi + shrl $16,%edi + andl $255,%edi + movzbl (%ebp,%edi,1),%edi + shll $16,%edi + xorl %edi,%esi + movl %ebx,%edi + shrl $24,%edi + movzbl (%ebp,%edi,1),%edi + shll $24,%edi + xorl %edi,%esi + movl %esi,4(%esp) + movl %ebx,%esi + andl $255,%esi + movzbl (%ebp,%esi,1),%esi + movzbl %ah,%edi + movzbl (%ebp,%edi,1),%edi + shll $8,%edi + xorl %edi,%esi + movl %edx,%edi + shrl $16,%edi + andl $255,%edi + movzbl (%ebp,%edi,1),%edi + shll $16,%edi + xorl %edi,%esi + movl %ecx,%edi + shrl $24,%edi + movzbl (%ebp,%edi,1),%edi + shll $24,%edi + xorl %edi,%esi + movl %esi,8(%esp) + movl %ecx,%esi + andl $255,%esi + movzbl (%ebp,%esi,1),%esi + movzbl %bh,%edi + movzbl (%ebp,%edi,1),%edi + shll $8,%edi + xorl %edi,%esi + movl %eax,%edi + shrl $16,%edi + andl $255,%edi + movzbl (%ebp,%edi,1),%edi + shll $16,%edi + xorl %edi,%esi + movl %edx,%edi + shrl $24,%edi + movzbl (%ebp,%edi,1),%edi + shll $24,%edi + xorl %edi,%esi + movl 20(%esp),%edi + andl $255,%edx + movzbl (%ebp,%edx,1),%edx + movzbl %ch,%ecx + movzbl (%ebp,%ecx,1),%ecx + shll $8,%ecx + xorl %ecx,%edx + movl %esi,%ecx + shrl $16,%ebx + andl $255,%ebx + movzbl (%ebp,%ebx,1),%ebx + shll $16,%ebx + xorl %ebx,%edx + movl 8(%esp),%ebx + shrl $24,%eax + movzbl (%ebp,%eax,1),%eax + shll $24,%eax + xorl %eax,%edx + movl 4(%esp),%eax + leal -2048(%ebp),%ebp + addl $16,%edi + xorl (%edi),%eax + xorl 4(%edi),%ebx + xorl 8(%edi),%ecx + xorl 12(%edi),%edx + ret +.align 64 +.LAES_Td: +.long 1353184337,1353184337 +.long 1399144830,1399144830 +.long 3282310938,3282310938 +.long 2522752826,2522752826 +.long 3412831035,3412831035 +.long 4047871263,4047871263 +.long 2874735276,2874735276 +.long 2466505547,2466505547 +.long 1442459680,1442459680 +.long 4134368941,4134368941 +.long 2440481928,2440481928 +.long 625738485,625738485 +.long 4242007375,4242007375 +.long 3620416197,3620416197 +.long 2151953702,2151953702 +.long 2409849525,2409849525 +.long 1230680542,1230680542 +.long 1729870373,1729870373 +.long 2551114309,2551114309 +.long 3787521629,3787521629 +.long 41234371,41234371 +.long 317738113,317738113 +.long 2744600205,2744600205 +.long 3338261355,3338261355 +.long 3881799427,3881799427 +.long 2510066197,2510066197 +.long 3950669247,3950669247 +.long 3663286933,3663286933 +.long 763608788,763608788 +.long 3542185048,3542185048 +.long 694804553,694804553 +.long 1154009486,1154009486 +.long 1787413109,1787413109 +.long 2021232372,2021232372 +.long 1799248025,1799248025 +.long 3715217703,3715217703 +.long 3058688446,3058688446 +.long 397248752,397248752 +.long 1722556617,1722556617 +.long 3023752829,3023752829 +.long 407560035,407560035 +.long 2184256229,2184256229 +.long 1613975959,1613975959 +.long 1165972322,1165972322 +.long 3765920945,3765920945 +.long 2226023355,2226023355 +.long 480281086,480281086 +.long 2485848313,2485848313 +.long 1483229296,1483229296 +.long 436028815,436028815 +.long 2272059028,2272059028 +.long 3086515026,3086515026 +.long 601060267,601060267 +.long 3791801202,3791801202 +.long 1468997603,1468997603 +.long 715871590,715871590 +.long 120122290,120122290 +.long 63092015,63092015 +.long 2591802758,2591802758 +.long 2768779219,2768779219 +.long 4068943920,4068943920 +.long 2997206819,2997206819 +.long 3127509762,3127509762 +.long 1552029421,1552029421 +.long 723308426,723308426 +.long 2461301159,2461301159 +.long 4042393587,4042393587 +.long 2715969870,2715969870 +.long 3455375973,3455375973 +.long 3586000134,3586000134 +.long 526529745,526529745 +.long 2331944644,2331944644 +.long 2639474228,2639474228 +.long 2689987490,2689987490 +.long 853641733,853641733 +.long 1978398372,1978398372 +.long 971801355,971801355 +.long 2867814464,2867814464 +.long 111112542,111112542 +.long 1360031421,1360031421 +.long 4186579262,4186579262 +.long 1023860118,1023860118 +.long 2919579357,2919579357 +.long 1186850381,1186850381 +.long 3045938321,3045938321 +.long 90031217,90031217 +.long 1876166148,1876166148 +.long 4279586912,4279586912 +.long 620468249,620468249 +.long 2548678102,2548678102 +.long 3426959497,3426959497 +.long 2006899047,2006899047 +.long 3175278768,3175278768 +.long 2290845959,2290845959 +.long 945494503,945494503 +.long 3689859193,3689859193 +.long 1191869601,1191869601 +.long 3910091388,3910091388 +.long 3374220536,3374220536 +.long 0,0 +.long 2206629897,2206629897 +.long 1223502642,1223502642 +.long 2893025566,2893025566 +.long 1316117100,1316117100 +.long 4227796733,4227796733 +.long 1446544655,1446544655 +.long 517320253,517320253 +.long 658058550,658058550 +.long 1691946762,1691946762 +.long 564550760,564550760 +.long 3511966619,3511966619 +.long 976107044,976107044 +.long 2976320012,2976320012 +.long 266819475,266819475 +.long 3533106868,3533106868 +.long 2660342555,2660342555 +.long 1338359936,1338359936 +.long 2720062561,2720062561 +.long 1766553434,1766553434 +.long 370807324,370807324 +.long 179999714,179999714 +.long 3844776128,3844776128 +.long 1138762300,1138762300 +.long 488053522,488053522 +.long 185403662,185403662 +.long 2915535858,2915535858 +.long 3114841645,3114841645 +.long 3366526484,3366526484 +.long 2233069911,2233069911 +.long 1275557295,1275557295 +.long 3151862254,3151862254 +.long 4250959779,4250959779 +.long 2670068215,2670068215 +.long 3170202204,3170202204 +.long 3309004356,3309004356 +.long 880737115,880737115 +.long 1982415755,1982415755 +.long 3703972811,3703972811 +.long 1761406390,1761406390 +.long 1676797112,1676797112 +.long 3403428311,3403428311 +.long 277177154,277177154 +.long 1076008723,1076008723 +.long 538035844,538035844 +.long 2099530373,2099530373 +.long 4164795346,4164795346 +.long 288553390,288553390 +.long 1839278535,1839278535 +.long 1261411869,1261411869 +.long 4080055004,4080055004 +.long 3964831245,3964831245 +.long 3504587127,3504587127 +.long 1813426987,1813426987 +.long 2579067049,2579067049 +.long 4199060497,4199060497 +.long 577038663,577038663 +.long 3297574056,3297574056 +.long 440397984,440397984 +.long 3626794326,3626794326 +.long 4019204898,4019204898 +.long 3343796615,3343796615 +.long 3251714265,3251714265 +.long 4272081548,4272081548 +.long 906744984,906744984 +.long 3481400742,3481400742 +.long 685669029,685669029 +.long 646887386,646887386 +.long 2764025151,2764025151 +.long 3835509292,3835509292 +.long 227702864,227702864 +.long 2613862250,2613862250 +.long 1648787028,1648787028 +.long 3256061430,3256061430 +.long 3904428176,3904428176 +.long 1593260334,1593260334 +.long 4121936770,4121936770 +.long 3196083615,3196083615 +.long 2090061929,2090061929 +.long 2838353263,2838353263 +.long 3004310991,3004310991 +.long 999926984,999926984 +.long 2809993232,2809993232 +.long 1852021992,1852021992 +.long 2075868123,2075868123 +.long 158869197,158869197 +.long 4095236462,4095236462 +.long 28809964,28809964 +.long 2828685187,2828685187 +.long 1701746150,1701746150 +.long 2129067946,2129067946 +.long 147831841,147831841 +.long 3873969647,3873969647 +.long 3650873274,3650873274 +.long 3459673930,3459673930 +.long 3557400554,3557400554 +.long 3598495785,3598495785 +.long 2947720241,2947720241 +.long 824393514,824393514 +.long 815048134,815048134 +.long 3227951669,3227951669 +.long 935087732,935087732 +.long 2798289660,2798289660 +.long 2966458592,2966458592 +.long 366520115,366520115 +.long 1251476721,1251476721 +.long 4158319681,4158319681 +.long 240176511,240176511 +.long 804688151,804688151 +.long 2379631990,2379631990 +.long 1303441219,1303441219 +.long 1414376140,1414376140 +.long 3741619940,3741619940 +.long 3820343710,3820343710 +.long 461924940,461924940 +.long 3089050817,3089050817 +.long 2136040774,2136040774 +.long 82468509,82468509 +.long 1563790337,1563790337 +.long 1937016826,1937016826 +.long 776014843,776014843 +.long 1511876531,1511876531 +.long 1389550482,1389550482 +.long 861278441,861278441 +.long 323475053,323475053 +.long 2355222426,2355222426 +.long 2047648055,2047648055 +.long 2383738969,2383738969 +.long 2302415851,2302415851 +.long 3995576782,3995576782 +.long 902390199,902390199 +.long 3991215329,3991215329 +.long 1018251130,1018251130 +.long 1507840668,1507840668 +.long 1064563285,1064563285 +.long 2043548696,2043548696 +.long 3208103795,3208103795 +.long 3939366739,3939366739 +.long 1537932639,1537932639 +.long 342834655,342834655 +.long 2262516856,2262516856 +.long 2180231114,2180231114 +.long 1053059257,1053059257 +.long 741614648,741614648 +.long 1598071746,1598071746 +.long 1925389590,1925389590 +.long 203809468,203809468 +.long 2336832552,2336832552 +.long 1100287487,1100287487 +.long 1895934009,1895934009 +.long 3736275976,3736275976 +.long 2632234200,2632234200 +.long 2428589668,2428589668 +.long 1636092795,1636092795 +.long 1890988757,1890988757 +.long 1952214088,1952214088 +.long 1113045200,1113045200 +.byte 82,9,106,213,48,54,165,56 +.byte 191,64,163,158,129,243,215,251 +.byte 124,227,57,130,155,47,255,135 +.byte 52,142,67,68,196,222,233,203 +.byte 84,123,148,50,166,194,35,61 +.byte 238,76,149,11,66,250,195,78 +.byte 8,46,161,102,40,217,36,178 +.byte 118,91,162,73,109,139,209,37 +.byte 114,248,246,100,134,104,152,22 +.byte 212,164,92,204,93,101,182,146 +.byte 108,112,72,80,253,237,185,218 +.byte 94,21,70,87,167,141,157,132 +.byte 144,216,171,0,140,188,211,10 +.byte 247,228,88,5,184,179,69,6 +.byte 208,44,30,143,202,63,15,2 +.byte 193,175,189,3,1,19,138,107 +.byte 58,145,17,65,79,103,220,234 +.byte 151,242,207,206,240,180,230,115 +.byte 150,172,116,34,231,173,53,133 +.byte 226,249,55,232,28,117,223,110 +.byte 71,241,26,113,29,41,197,137 +.byte 111,183,98,14,170,24,190,27 +.byte 252,86,62,75,198,210,121,32 +.byte 154,219,192,254,120,205,90,244 +.byte 31,221,168,51,136,7,199,49 +.byte 177,18,16,89,39,128,236,95 +.byte 96,81,127,169,25,181,74,13 +.byte 45,229,122,159,147,201,156,239 +.byte 160,224,59,77,174,42,245,176 +.byte 200,235,187,60,131,83,153,97 +.byte 23,43,4,126,186,119,214,38 +.byte 225,105,20,99,85,33,12,125 +.byte 82,9,106,213,48,54,165,56 +.byte 191,64,163,158,129,243,215,251 +.byte 124,227,57,130,155,47,255,135 +.byte 52,142,67,68,196,222,233,203 +.byte 84,123,148,50,166,194,35,61 +.byte 238,76,149,11,66,250,195,78 +.byte 8,46,161,102,40,217,36,178 +.byte 118,91,162,73,109,139,209,37 +.byte 114,248,246,100,134,104,152,22 +.byte 212,164,92,204,93,101,182,146 +.byte 108,112,72,80,253,237,185,218 +.byte 94,21,70,87,167,141,157,132 +.byte 144,216,171,0,140,188,211,10 +.byte 247,228,88,5,184,179,69,6 +.byte 208,44,30,143,202,63,15,2 +.byte 193,175,189,3,1,19,138,107 +.byte 58,145,17,65,79,103,220,234 +.byte 151,242,207,206,240,180,230,115 +.byte 150,172,116,34,231,173,53,133 +.byte 226,249,55,232,28,117,223,110 +.byte 71,241,26,113,29,41,197,137 +.byte 111,183,98,14,170,24,190,27 +.byte 252,86,62,75,198,210,121,32 +.byte 154,219,192,254,120,205,90,244 +.byte 31,221,168,51,136,7,199,49 +.byte 177,18,16,89,39,128,236,95 +.byte 96,81,127,169,25,181,74,13 +.byte 45,229,122,159,147,201,156,239 +.byte 160,224,59,77,174,42,245,176 +.byte 200,235,187,60,131,83,153,97 +.byte 23,43,4,126,186,119,214,38 +.byte 225,105,20,99,85,33,12,125 +.byte 82,9,106,213,48,54,165,56 +.byte 191,64,163,158,129,243,215,251 +.byte 124,227,57,130,155,47,255,135 +.byte 52,142,67,68,196,222,233,203 +.byte 84,123,148,50,166,194,35,61 +.byte 238,76,149,11,66,250,195,78 +.byte 8,46,161,102,40,217,36,178 +.byte 118,91,162,73,109,139,209,37 +.byte 114,248,246,100,134,104,152,22 +.byte 212,164,92,204,93,101,182,146 +.byte 108,112,72,80,253,237,185,218 +.byte 94,21,70,87,167,141,157,132 +.byte 144,216,171,0,140,188,211,10 +.byte 247,228,88,5,184,179,69,6 +.byte 208,44,30,143,202,63,15,2 +.byte 193,175,189,3,1,19,138,107 +.byte 58,145,17,65,79,103,220,234 +.byte 151,242,207,206,240,180,230,115 +.byte 150,172,116,34,231,173,53,133 +.byte 226,249,55,232,28,117,223,110 +.byte 71,241,26,113,29,41,197,137 +.byte 111,183,98,14,170,24,190,27 +.byte 252,86,62,75,198,210,121,32 +.byte 154,219,192,254,120,205,90,244 +.byte 31,221,168,51,136,7,199,49 +.byte 177,18,16,89,39,128,236,95 +.byte 96,81,127,169,25,181,74,13 +.byte 45,229,122,159,147,201,156,239 +.byte 160,224,59,77,174,42,245,176 +.byte 200,235,187,60,131,83,153,97 +.byte 23,43,4,126,186,119,214,38 +.byte 225,105,20,99,85,33,12,125 +.byte 82,9,106,213,48,54,165,56 +.byte 191,64,163,158,129,243,215,251 +.byte 124,227,57,130,155,47,255,135 +.byte 52,142,67,68,196,222,233,203 +.byte 84,123,148,50,166,194,35,61 +.byte 238,76,149,11,66,250,195,78 +.byte 8,46,161,102,40,217,36,178 +.byte 118,91,162,73,109,139,209,37 +.byte 114,248,246,100,134,104,152,22 +.byte 212,164,92,204,93,101,182,146 +.byte 108,112,72,80,253,237,185,218 +.byte 94,21,70,87,167,141,157,132 +.byte 144,216,171,0,140,188,211,10 +.byte 247,228,88,5,184,179,69,6 +.byte 208,44,30,143,202,63,15,2 +.byte 193,175,189,3,1,19,138,107 +.byte 58,145,17,65,79,103,220,234 +.byte 151,242,207,206,240,180,230,115 +.byte 150,172,116,34,231,173,53,133 +.byte 226,249,55,232,28,117,223,110 +.byte 71,241,26,113,29,41,197,137 +.byte 111,183,98,14,170,24,190,27 +.byte 252,86,62,75,198,210,121,32 +.byte 154,219,192,254,120,205,90,244 +.byte 31,221,168,51,136,7,199,49 +.byte 177,18,16,89,39,128,236,95 +.byte 96,81,127,169,25,181,74,13 +.byte 45,229,122,159,147,201,156,239 +.byte 160,224,59,77,174,42,245,176 +.byte 200,235,187,60,131,83,153,97 +.byte 23,43,4,126,186,119,214,38 +.byte 225,105,20,99,85,33,12,125 +.size _x86_AES_decrypt,.-_x86_AES_decrypt +.globl AES_decrypt +.type AES_decrypt,@function +.align 16 +AES_decrypt: +.L_AES_decrypt_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 20(%esp),%esi + movl 28(%esp),%edi + movl %esp,%eax + subl $36,%esp + andl $-64,%esp + leal -127(%edi),%ebx + subl %esp,%ebx + negl %ebx + andl $960,%ebx + subl %ebx,%esp + addl $4,%esp + movl %eax,28(%esp) + call .L010pic_point +.L010pic_point: + popl %ebp + leal OPENSSL_ia32cap_P,%eax + leal .LAES_Td-.L010pic_point(%ebp),%ebp + leal 764(%esp),%ebx + subl %ebp,%ebx + andl $768,%ebx + leal 2176(%ebp,%ebx,1),%ebp + btl $25,(%eax) + jnc .L011x86 + movq (%esi),%mm0 + movq 8(%esi),%mm4 + call _sse_AES_decrypt_compact + movl 28(%esp),%esp + movl 24(%esp),%esi + movq %mm0,(%esi) + movq %mm4,8(%esi) + emms + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.align 16 +.L011x86: + movl %ebp,24(%esp) + movl (%esi),%eax + movl 4(%esi),%ebx + movl 8(%esi),%ecx + movl 12(%esi),%edx + call _x86_AES_decrypt_compact + movl 28(%esp),%esp + movl 24(%esp),%esi + movl %eax,(%esi) + movl %ebx,4(%esi) + movl %ecx,8(%esi) + movl %edx,12(%esi) + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.size AES_decrypt,.-.L_AES_decrypt_begin +.globl AES_cbc_encrypt +.type AES_cbc_encrypt,@function +.align 16 +AES_cbc_encrypt: +.L_AES_cbc_encrypt_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 28(%esp),%ecx + cmpl $0,%ecx + je .L012drop_out + call .L013pic_point +.L013pic_point: + popl %ebp + leal OPENSSL_ia32cap_P,%eax + cmpl $0,40(%esp) + leal .LAES_Te-.L013pic_point(%ebp),%ebp + jne .L014picked_te + leal .LAES_Td-.LAES_Te(%ebp),%ebp +.L014picked_te: + pushfl + cld + cmpl $512,%ecx + jb .L015slow_way + testl $15,%ecx + jnz .L015slow_way + btl $28,(%eax) + jc .L015slow_way + leal -324(%esp),%esi + andl $-64,%esi + movl %ebp,%eax + leal 2304(%ebp),%ebx + movl %esi,%edx + andl $4095,%eax + andl $4095,%ebx + andl $4095,%edx + cmpl %ebx,%edx + jb .L016tbl_break_out + subl %ebx,%edx + subl %edx,%esi + jmp .L017tbl_ok +.align 4 +.L016tbl_break_out: + subl %eax,%edx + andl $4095,%edx + addl $384,%edx + subl %edx,%esi +.align 4 +.L017tbl_ok: + leal 24(%esp),%edx + xchgl %esi,%esp + addl $4,%esp + movl %ebp,24(%esp) + movl %esi,28(%esp) + movl (%edx),%eax + movl 4(%edx),%ebx + movl 12(%edx),%edi + movl 16(%edx),%esi + movl 20(%edx),%edx + movl %eax,32(%esp) + movl %ebx,36(%esp) + movl %ecx,40(%esp) + movl %edi,44(%esp) + movl %esi,48(%esp) + movl $0,316(%esp) + movl %edi,%ebx + movl $61,%ecx + subl %ebp,%ebx + movl %edi,%esi + andl $4095,%ebx + leal 76(%esp),%edi + cmpl $2304,%ebx + jb .L018do_copy + cmpl $3852,%ebx + jb .L019skip_copy +.align 4 +.L018do_copy: + movl %edi,44(%esp) +.long 2784229001 +.L019skip_copy: + movl $16,%edi +.align 4 +.L020prefetch_tbl: + movl (%ebp),%eax + movl 32(%ebp),%ebx + movl 64(%ebp),%ecx + movl 96(%ebp),%esi + leal 128(%ebp),%ebp + subl $1,%edi + jnz .L020prefetch_tbl + subl $2048,%ebp + movl 32(%esp),%esi + movl 48(%esp),%edi + cmpl $0,%edx + je .L021fast_decrypt + movl (%edi),%eax + movl 4(%edi),%ebx +.align 16 +.L022fast_enc_loop: + movl 8(%edi),%ecx + movl 12(%edi),%edx + xorl (%esi),%eax + xorl 4(%esi),%ebx + xorl 8(%esi),%ecx + xorl 12(%esi),%edx + movl 44(%esp),%edi + call _x86_AES_encrypt + movl 32(%esp),%esi + movl 36(%esp),%edi + movl %eax,(%edi) + movl %ebx,4(%edi) + movl %ecx,8(%edi) + movl %edx,12(%edi) + leal 16(%esi),%esi + movl 40(%esp),%ecx + movl %esi,32(%esp) + leal 16(%edi),%edx + movl %edx,36(%esp) + subl $16,%ecx + movl %ecx,40(%esp) + jnz .L022fast_enc_loop + movl 48(%esp),%esi + movl 8(%edi),%ecx + movl 12(%edi),%edx + movl %eax,(%esi) + movl %ebx,4(%esi) + movl %ecx,8(%esi) + movl %edx,12(%esi) + cmpl $0,316(%esp) + movl 44(%esp),%edi + je .L023skip_ezero + movl $60,%ecx + xorl %eax,%eax +.align 4 +.long 2884892297 +.L023skip_ezero: + movl 28(%esp),%esp + popfl +.L012drop_out: + popl %edi + popl %esi + popl %ebx + popl %ebp + ret + pushfl +.align 16 +.L021fast_decrypt: + cmpl 36(%esp),%esi + je .L024fast_dec_in_place + movl %edi,52(%esp) +.align 4 +.align 16 +.L025fast_dec_loop: + movl (%esi),%eax + movl 4(%esi),%ebx + movl 8(%esi),%ecx + movl 12(%esi),%edx + movl 44(%esp),%edi + call _x86_AES_decrypt + movl 52(%esp),%edi + movl 40(%esp),%esi + xorl (%edi),%eax + xorl 4(%edi),%ebx + xorl 8(%edi),%ecx + xorl 12(%edi),%edx + movl 36(%esp),%edi + movl 32(%esp),%esi + movl %eax,(%edi) + movl %ebx,4(%edi) + movl %ecx,8(%edi) + movl %edx,12(%edi) + movl 40(%esp),%ecx + movl %esi,52(%esp) + leal 16(%esi),%esi + movl %esi,32(%esp) + leal 16(%edi),%edi + movl %edi,36(%esp) + subl $16,%ecx + movl %ecx,40(%esp) + jnz .L025fast_dec_loop + movl 52(%esp),%edi + movl 48(%esp),%esi + movl (%edi),%eax + movl 4(%edi),%ebx + movl 8(%edi),%ecx + movl 12(%edi),%edx + movl %eax,(%esi) + movl %ebx,4(%esi) + movl %ecx,8(%esi) + movl %edx,12(%esi) + jmp .L026fast_dec_out +.align 16 +.L024fast_dec_in_place: +.L027fast_dec_in_place_loop: + movl (%esi),%eax + movl 4(%esi),%ebx + movl 8(%esi),%ecx + movl 12(%esi),%edx + leal 60(%esp),%edi + movl %eax,(%edi) + movl %ebx,4(%edi) + movl %ecx,8(%edi) + movl %edx,12(%edi) + movl 44(%esp),%edi + call _x86_AES_decrypt + movl 48(%esp),%edi + movl 36(%esp),%esi + xorl (%edi),%eax + xorl 4(%edi),%ebx + xorl 8(%edi),%ecx + xorl 12(%edi),%edx + movl %eax,(%esi) + movl %ebx,4(%esi) + movl %ecx,8(%esi) + movl %edx,12(%esi) + leal 16(%esi),%esi + movl %esi,36(%esp) + leal 60(%esp),%esi + movl (%esi),%eax + movl 4(%esi),%ebx + movl 8(%esi),%ecx + movl 12(%esi),%edx + movl %eax,(%edi) + movl %ebx,4(%edi) + movl %ecx,8(%edi) + movl %edx,12(%edi) + movl 32(%esp),%esi + movl 40(%esp),%ecx + leal 16(%esi),%esi + movl %esi,32(%esp) + subl $16,%ecx + movl %ecx,40(%esp) + jnz .L027fast_dec_in_place_loop +.align 4 +.L026fast_dec_out: + cmpl $0,316(%esp) + movl 44(%esp),%edi + je .L028skip_dzero + movl $60,%ecx + xorl %eax,%eax +.align 4 +.long 2884892297 +.L028skip_dzero: + movl 28(%esp),%esp + popfl + popl %edi + popl %esi + popl %ebx + popl %ebp + ret + pushfl +.align 16 +.L015slow_way: + movl (%eax),%eax + movl 36(%esp),%edi + leal -80(%esp),%esi + andl $-64,%esi + leal -143(%edi),%ebx + subl %esi,%ebx + negl %ebx + andl $960,%ebx + subl %ebx,%esi + leal 768(%esi),%ebx + subl %ebp,%ebx + andl $768,%ebx + leal 2176(%ebp,%ebx,1),%ebp + leal 24(%esp),%edx + xchgl %esi,%esp + addl $4,%esp + movl %ebp,24(%esp) + movl %esi,28(%esp) + movl %eax,52(%esp) + movl (%edx),%eax + movl 4(%edx),%ebx + movl 16(%edx),%esi + movl 20(%edx),%edx + movl %eax,32(%esp) + movl %ebx,36(%esp) + movl %ecx,40(%esp) + movl %edi,44(%esp) + movl %esi,48(%esp) + movl %esi,%edi + movl %eax,%esi + cmpl $0,%edx + je .L029slow_decrypt + cmpl $16,%ecx + movl %ebx,%edx + jb .L030slow_enc_tail + btl $25,52(%esp) + jnc .L031slow_enc_x86 + movq (%edi),%mm0 + movq 8(%edi),%mm4 +.align 16 +.L032slow_enc_loop_sse: + pxor (%esi),%mm0 + pxor 8(%esi),%mm4 + movl 44(%esp),%edi + call _sse_AES_encrypt_compact + movl 32(%esp),%esi + movl 36(%esp),%edi + movl 40(%esp),%ecx + movq %mm0,(%edi) + movq %mm4,8(%edi) + leal 16(%esi),%esi + movl %esi,32(%esp) + leal 16(%edi),%edx + movl %edx,36(%esp) + subl $16,%ecx + cmpl $16,%ecx + movl %ecx,40(%esp) + jae .L032slow_enc_loop_sse + testl $15,%ecx + jnz .L030slow_enc_tail + movl 48(%esp),%esi + movq %mm0,(%esi) + movq %mm4,8(%esi) + emms + movl 28(%esp),%esp + popfl + popl %edi + popl %esi + popl %ebx + popl %ebp + ret + pushfl +.align 16 +.L031slow_enc_x86: + movl (%edi),%eax + movl 4(%edi),%ebx +.align 4 +.L033slow_enc_loop_x86: + movl 8(%edi),%ecx + movl 12(%edi),%edx + xorl (%esi),%eax + xorl 4(%esi),%ebx + xorl 8(%esi),%ecx + xorl 12(%esi),%edx + movl 44(%esp),%edi + call _x86_AES_encrypt_compact + movl 32(%esp),%esi + movl 36(%esp),%edi + movl %eax,(%edi) + movl %ebx,4(%edi) + movl %ecx,8(%edi) + movl %edx,12(%edi) + movl 40(%esp),%ecx + leal 16(%esi),%esi + movl %esi,32(%esp) + leal 16(%edi),%edx + movl %edx,36(%esp) + subl $16,%ecx + cmpl $16,%ecx + movl %ecx,40(%esp) + jae .L033slow_enc_loop_x86 + testl $15,%ecx + jnz .L030slow_enc_tail + movl 48(%esp),%esi + movl 8(%edi),%ecx + movl 12(%edi),%edx + movl %eax,(%esi) + movl %ebx,4(%esi) + movl %ecx,8(%esi) + movl %edx,12(%esi) + movl 28(%esp),%esp + popfl + popl %edi + popl %esi + popl %ebx + popl %ebp + ret + pushfl +.align 16 +.L030slow_enc_tail: + emms + movl %edx,%edi + movl $16,%ebx + subl %ecx,%ebx + cmpl %esi,%edi + je .L034enc_in_place +.align 4 +.long 2767451785 + jmp .L035enc_skip_in_place +.L034enc_in_place: + leal (%edi,%ecx,1),%edi +.L035enc_skip_in_place: + movl %ebx,%ecx + xorl %eax,%eax +.align 4 +.long 2868115081 + movl 48(%esp),%edi + movl %edx,%esi + movl (%edi),%eax + movl 4(%edi),%ebx + movl $16,40(%esp) + jmp .L033slow_enc_loop_x86 +.align 16 +.L029slow_decrypt: + btl $25,52(%esp) + jnc .L036slow_dec_loop_x86 +.align 4 +.L037slow_dec_loop_sse: + movq (%esi),%mm0 + movq 8(%esi),%mm4 + movl 44(%esp),%edi + call _sse_AES_decrypt_compact + movl 32(%esp),%esi + leal 60(%esp),%eax + movl 36(%esp),%ebx + movl 40(%esp),%ecx + movl 48(%esp),%edi + movq (%esi),%mm1 + movq 8(%esi),%mm5 + pxor (%edi),%mm0 + pxor 8(%edi),%mm4 + movq %mm1,(%edi) + movq %mm5,8(%edi) + subl $16,%ecx + jc .L038slow_dec_partial_sse + movq %mm0,(%ebx) + movq %mm4,8(%ebx) + leal 16(%ebx),%ebx + movl %ebx,36(%esp) + leal 16(%esi),%esi + movl %esi,32(%esp) + movl %ecx,40(%esp) + jnz .L037slow_dec_loop_sse + emms + movl 28(%esp),%esp + popfl + popl %edi + popl %esi + popl %ebx + popl %ebp + ret + pushfl +.align 16 +.L038slow_dec_partial_sse: + movq %mm0,(%eax) + movq %mm4,8(%eax) + emms + addl $16,%ecx + movl %ebx,%edi + movl %eax,%esi +.align 4 +.long 2767451785 + movl 28(%esp),%esp + popfl + popl %edi + popl %esi + popl %ebx + popl %ebp + ret + pushfl +.align 16 +.L036slow_dec_loop_x86: + movl (%esi),%eax + movl 4(%esi),%ebx + movl 8(%esi),%ecx + movl 12(%esi),%edx + leal 60(%esp),%edi + movl %eax,(%edi) + movl %ebx,4(%edi) + movl %ecx,8(%edi) + movl %edx,12(%edi) + movl 44(%esp),%edi + call _x86_AES_decrypt_compact + movl 48(%esp),%edi + movl 40(%esp),%esi + xorl (%edi),%eax + xorl 4(%edi),%ebx + xorl 8(%edi),%ecx + xorl 12(%edi),%edx + subl $16,%esi + jc .L039slow_dec_partial_x86 + movl %esi,40(%esp) + movl 36(%esp),%esi + movl %eax,(%esi) + movl %ebx,4(%esi) + movl %ecx,8(%esi) + movl %edx,12(%esi) + leal 16(%esi),%esi + movl %esi,36(%esp) + leal 60(%esp),%esi + movl (%esi),%eax + movl 4(%esi),%ebx + movl 8(%esi),%ecx + movl 12(%esi),%edx + movl %eax,(%edi) + movl %ebx,4(%edi) + movl %ecx,8(%edi) + movl %edx,12(%edi) + movl 32(%esp),%esi + leal 16(%esi),%esi + movl %esi,32(%esp) + jnz .L036slow_dec_loop_x86 + movl 28(%esp),%esp + popfl + popl %edi + popl %esi + popl %ebx + popl %ebp + ret + pushfl +.align 16 +.L039slow_dec_partial_x86: + leal 60(%esp),%esi + movl %eax,(%esi) + movl %ebx,4(%esi) + movl %ecx,8(%esi) + movl %edx,12(%esi) + movl 32(%esp),%esi + movl (%esi),%eax + movl 4(%esi),%ebx + movl 8(%esi),%ecx + movl 12(%esi),%edx + movl %eax,(%edi) + movl %ebx,4(%edi) + movl %ecx,8(%edi) + movl %edx,12(%edi) + movl 40(%esp),%ecx + movl 36(%esp),%edi + leal 60(%esp),%esi +.align 4 +.long 2767451785 + movl 28(%esp),%esp + popfl + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.size AES_cbc_encrypt,.-.L_AES_cbc_encrypt_begin +.type _x86_AES_set_encrypt_key,@function +.align 16 +_x86_AES_set_encrypt_key: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 24(%esp),%esi + movl 32(%esp),%edi + testl $-1,%esi + jz .L040badpointer + testl $-1,%edi + jz .L040badpointer + call .L041pic_point +.L041pic_point: + popl %ebp + leal .LAES_Te-.L041pic_point(%ebp),%ebp + leal 2176(%ebp),%ebp + movl -128(%ebp),%eax + movl -96(%ebp),%ebx + movl -64(%ebp),%ecx + movl -32(%ebp),%edx + movl (%ebp),%eax + movl 32(%ebp),%ebx + movl 64(%ebp),%ecx + movl 96(%ebp),%edx + movl 28(%esp),%ecx + cmpl $128,%ecx + je .L04210rounds + cmpl $192,%ecx + je .L04312rounds + cmpl $256,%ecx + je .L04414rounds + movl $-2,%eax + jmp .L045exit +.L04210rounds: + movl (%esi),%eax + movl 4(%esi),%ebx + movl 8(%esi),%ecx + movl 12(%esi),%edx + movl %eax,(%edi) + movl %ebx,4(%edi) + movl %ecx,8(%edi) + movl %edx,12(%edi) + xorl %ecx,%ecx + jmp .L04610shortcut +.align 4 +.L04710loop: + movl (%edi),%eax + movl 12(%edi),%edx +.L04610shortcut: + movzbl %dl,%esi + movzbl -128(%ebp,%esi,1),%ebx + movzbl %dh,%esi + shll $24,%ebx + xorl %ebx,%eax + movzbl -128(%ebp,%esi,1),%ebx + shrl $16,%edx + movzbl %dl,%esi + xorl %ebx,%eax + movzbl -128(%ebp,%esi,1),%ebx + movzbl %dh,%esi + shll $8,%ebx + xorl %ebx,%eax + movzbl -128(%ebp,%esi,1),%ebx + shll $16,%ebx + xorl %ebx,%eax + xorl 896(%ebp,%ecx,4),%eax + movl %eax,16(%edi) + xorl 4(%edi),%eax + movl %eax,20(%edi) + xorl 8(%edi),%eax + movl %eax,24(%edi) + xorl 12(%edi),%eax + movl %eax,28(%edi) + incl %ecx + addl $16,%edi + cmpl $10,%ecx + jl .L04710loop + movl $10,80(%edi) + xorl %eax,%eax + jmp .L045exit +.L04312rounds: + movl (%esi),%eax + movl 4(%esi),%ebx + movl 8(%esi),%ecx + movl 12(%esi),%edx + movl %eax,(%edi) + movl %ebx,4(%edi) + movl %ecx,8(%edi) + movl %edx,12(%edi) + movl 16(%esi),%ecx + movl 20(%esi),%edx + movl %ecx,16(%edi) + movl %edx,20(%edi) + xorl %ecx,%ecx + jmp .L04812shortcut +.align 4 +.L04912loop: + movl (%edi),%eax + movl 20(%edi),%edx +.L04812shortcut: + movzbl %dl,%esi + movzbl -128(%ebp,%esi,1),%ebx + movzbl %dh,%esi + shll $24,%ebx + xorl %ebx,%eax + movzbl -128(%ebp,%esi,1),%ebx + shrl $16,%edx + movzbl %dl,%esi + xorl %ebx,%eax + movzbl -128(%ebp,%esi,1),%ebx + movzbl %dh,%esi + shll $8,%ebx + xorl %ebx,%eax + movzbl -128(%ebp,%esi,1),%ebx + shll $16,%ebx + xorl %ebx,%eax + xorl 896(%ebp,%ecx,4),%eax + movl %eax,24(%edi) + xorl 4(%edi),%eax + movl %eax,28(%edi) + xorl 8(%edi),%eax + movl %eax,32(%edi) + xorl 12(%edi),%eax + movl %eax,36(%edi) + cmpl $7,%ecx + je .L05012break + incl %ecx + xorl 16(%edi),%eax + movl %eax,40(%edi) + xorl 20(%edi),%eax + movl %eax,44(%edi) + addl $24,%edi + jmp .L04912loop +.L05012break: + movl $12,72(%edi) + xorl %eax,%eax + jmp .L045exit +.L04414rounds: + movl (%esi),%eax + movl 4(%esi),%ebx + movl 8(%esi),%ecx + movl 12(%esi),%edx + movl %eax,(%edi) + movl %ebx,4(%edi) + movl %ecx,8(%edi) + movl %edx,12(%edi) + movl 16(%esi),%eax + movl 20(%esi),%ebx + movl 24(%esi),%ecx + movl 28(%esi),%edx + movl %eax,16(%edi) + movl %ebx,20(%edi) + movl %ecx,24(%edi) + movl %edx,28(%edi) + xorl %ecx,%ecx + jmp .L05114shortcut +.align 4 +.L05214loop: + movl 28(%edi),%edx +.L05114shortcut: + movl (%edi),%eax + movzbl %dl,%esi + movzbl -128(%ebp,%esi,1),%ebx + movzbl %dh,%esi + shll $24,%ebx + xorl %ebx,%eax + movzbl -128(%ebp,%esi,1),%ebx + shrl $16,%edx + movzbl %dl,%esi + xorl %ebx,%eax + movzbl -128(%ebp,%esi,1),%ebx + movzbl %dh,%esi + shll $8,%ebx + xorl %ebx,%eax + movzbl -128(%ebp,%esi,1),%ebx + shll $16,%ebx + xorl %ebx,%eax + xorl 896(%ebp,%ecx,4),%eax + movl %eax,32(%edi) + xorl 4(%edi),%eax + movl %eax,36(%edi) + xorl 8(%edi),%eax + movl %eax,40(%edi) + xorl 12(%edi),%eax + movl %eax,44(%edi) + cmpl $6,%ecx + je .L05314break + incl %ecx + movl %eax,%edx + movl 16(%edi),%eax + movzbl %dl,%esi + movzbl -128(%ebp,%esi,1),%ebx + movzbl %dh,%esi + xorl %ebx,%eax + movzbl -128(%ebp,%esi,1),%ebx + shrl $16,%edx + shll $8,%ebx + movzbl %dl,%esi + xorl %ebx,%eax + movzbl -128(%ebp,%esi,1),%ebx + movzbl %dh,%esi + shll $16,%ebx + xorl %ebx,%eax + movzbl -128(%ebp,%esi,1),%ebx + shll $24,%ebx + xorl %ebx,%eax + movl %eax,48(%edi) + xorl 20(%edi),%eax + movl %eax,52(%edi) + xorl 24(%edi),%eax + movl %eax,56(%edi) + xorl 28(%edi),%eax + movl %eax,60(%edi) + addl $32,%edi + jmp .L05214loop +.L05314break: + movl $14,48(%edi) + xorl %eax,%eax + jmp .L045exit +.L040badpointer: + movl $-1,%eax +.L045exit: + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.size _x86_AES_set_encrypt_key,.-_x86_AES_set_encrypt_key +.globl private_AES_set_encrypt_key +.type private_AES_set_encrypt_key,@function +.align 16 +private_AES_set_encrypt_key: +.L_private_AES_set_encrypt_key_begin: + call _x86_AES_set_encrypt_key + ret +.size private_AES_set_encrypt_key,.-.L_private_AES_set_encrypt_key_begin +.globl private_AES_set_decrypt_key +.type private_AES_set_decrypt_key,@function +.align 16 +private_AES_set_decrypt_key: +.L_private_AES_set_decrypt_key_begin: + call _x86_AES_set_encrypt_key + cmpl $0,%eax + je .L054proceed + ret +.L054proceed: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 28(%esp),%esi + movl 240(%esi),%ecx + leal (,%ecx,4),%ecx + leal (%esi,%ecx,4),%edi +.align 4 +.L055invert: + movl (%esi),%eax + movl 4(%esi),%ebx + movl (%edi),%ecx + movl 4(%edi),%edx + movl %eax,(%edi) + movl %ebx,4(%edi) + movl %ecx,(%esi) + movl %edx,4(%esi) + movl 8(%esi),%eax + movl 12(%esi),%ebx + movl 8(%edi),%ecx + movl 12(%edi),%edx + movl %eax,8(%edi) + movl %ebx,12(%edi) + movl %ecx,8(%esi) + movl %edx,12(%esi) + addl $16,%esi + subl $16,%edi + cmpl %edi,%esi + jne .L055invert + movl 28(%esp),%edi + movl 240(%edi),%esi + leal -2(%esi,%esi,1),%esi + leal (%edi,%esi,8),%esi + movl %esi,28(%esp) + movl 16(%edi),%eax +.align 4 +.L056permute: + addl $16,%edi + movl $2155905152,%ebp + andl %eax,%ebp + leal (%eax,%eax,1),%ebx + movl %ebp,%esi + shrl $7,%ebp + subl %ebp,%esi + andl $4278124286,%ebx + andl $454761243,%esi + xorl %esi,%ebx + movl $2155905152,%ebp + andl %ebx,%ebp + leal (%ebx,%ebx,1),%ecx + movl %ebp,%esi + shrl $7,%ebp + subl %ebp,%esi + andl $4278124286,%ecx + andl $454761243,%esi + xorl %eax,%ebx + xorl %esi,%ecx + movl $2155905152,%ebp + andl %ecx,%ebp + leal (%ecx,%ecx,1),%edx + movl %ebp,%esi + shrl $7,%ebp + xorl %eax,%ecx + subl %ebp,%esi + andl $4278124286,%edx + andl $454761243,%esi + roll $8,%eax + xorl %esi,%edx + movl 4(%edi),%ebp + xorl %ebx,%eax + xorl %edx,%ebx + xorl %ecx,%eax + roll $24,%ebx + xorl %edx,%ecx + xorl %edx,%eax + roll $16,%ecx + xorl %ebx,%eax + roll $8,%edx + xorl %ecx,%eax + movl %ebp,%ebx + xorl %edx,%eax + movl %eax,(%edi) + movl $2155905152,%ebp + andl %ebx,%ebp + leal (%ebx,%ebx,1),%ecx + movl %ebp,%esi + shrl $7,%ebp + subl %ebp,%esi + andl $4278124286,%ecx + andl $454761243,%esi + xorl %esi,%ecx + movl $2155905152,%ebp + andl %ecx,%ebp + leal (%ecx,%ecx,1),%edx + movl %ebp,%esi + shrl $7,%ebp + subl %ebp,%esi + andl $4278124286,%edx + andl $454761243,%esi + xorl %ebx,%ecx + xorl %esi,%edx + movl $2155905152,%ebp + andl %edx,%ebp + leal (%edx,%edx,1),%eax + movl %ebp,%esi + shrl $7,%ebp + xorl %ebx,%edx + subl %ebp,%esi + andl $4278124286,%eax + andl $454761243,%esi + roll $8,%ebx + xorl %esi,%eax + movl 8(%edi),%ebp + xorl %ecx,%ebx + xorl %eax,%ecx + xorl %edx,%ebx + roll $24,%ecx + xorl %eax,%edx + xorl %eax,%ebx + roll $16,%edx + xorl %ecx,%ebx + roll $8,%eax + xorl %edx,%ebx + movl %ebp,%ecx + xorl %eax,%ebx + movl %ebx,4(%edi) + movl $2155905152,%ebp + andl %ecx,%ebp + leal (%ecx,%ecx,1),%edx + movl %ebp,%esi + shrl $7,%ebp + subl %ebp,%esi + andl $4278124286,%edx + andl $454761243,%esi + xorl %esi,%edx + movl $2155905152,%ebp + andl %edx,%ebp + leal (%edx,%edx,1),%eax + movl %ebp,%esi + shrl $7,%ebp + subl %ebp,%esi + andl $4278124286,%eax + andl $454761243,%esi + xorl %ecx,%edx + xorl %esi,%eax + movl $2155905152,%ebp + andl %eax,%ebp + leal (%eax,%eax,1),%ebx + movl %ebp,%esi + shrl $7,%ebp + xorl %ecx,%eax + subl %ebp,%esi + andl $4278124286,%ebx + andl $454761243,%esi + roll $8,%ecx + xorl %esi,%ebx + movl 12(%edi),%ebp + xorl %edx,%ecx + xorl %ebx,%edx + xorl %eax,%ecx + roll $24,%edx + xorl %ebx,%eax + xorl %ebx,%ecx + roll $16,%eax + xorl %edx,%ecx + roll $8,%ebx + xorl %eax,%ecx + movl %ebp,%edx + xorl %ebx,%ecx + movl %ecx,8(%edi) + movl $2155905152,%ebp + andl %edx,%ebp + leal (%edx,%edx,1),%eax + movl %ebp,%esi + shrl $7,%ebp + subl %ebp,%esi + andl $4278124286,%eax + andl $454761243,%esi + xorl %esi,%eax + movl $2155905152,%ebp + andl %eax,%ebp + leal (%eax,%eax,1),%ebx + movl %ebp,%esi + shrl $7,%ebp + subl %ebp,%esi + andl $4278124286,%ebx + andl $454761243,%esi + xorl %edx,%eax + xorl %esi,%ebx + movl $2155905152,%ebp + andl %ebx,%ebp + leal (%ebx,%ebx,1),%ecx + movl %ebp,%esi + shrl $7,%ebp + xorl %edx,%ebx + subl %ebp,%esi + andl $4278124286,%ecx + andl $454761243,%esi + roll $8,%edx + xorl %esi,%ecx + movl 16(%edi),%ebp + xorl %eax,%edx + xorl %ecx,%eax + xorl %ebx,%edx + roll $24,%eax + xorl %ecx,%ebx + xorl %ecx,%edx + roll $16,%ebx + xorl %eax,%edx + roll $8,%ecx + xorl %ebx,%edx + movl %ebp,%eax + xorl %ecx,%edx + movl %edx,12(%edi) + cmpl 28(%esp),%edi + jb .L056permute + xorl %eax,%eax + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.size private_AES_set_decrypt_key,.-.L_private_AES_set_decrypt_key_begin +.byte 65,69,83,32,102,111,114,32,120,56,54,44,32,67,82,89 +.byte 80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114 +.byte 111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 +.comm OPENSSL_ia32cap_P,16,4 diff --git a/deps/openssl/asm_obsolete/x86-elf-gas/aes/aesni-x86.s b/deps/openssl/asm_obsolete/x86-elf-gas/aes/aesni-x86.s new file mode 100644 index 00000000000000..a68f7cdbe9cbe6 --- /dev/null +++ b/deps/openssl/asm_obsolete/x86-elf-gas/aes/aesni-x86.s @@ -0,0 +1,2196 @@ +.file "../openssl/crypto/aes/asm/aesni-x86.s" +.text +.globl aesni_encrypt +.type aesni_encrypt,@function +.align 16 +aesni_encrypt: +.L_aesni_encrypt_begin: + movl 4(%esp),%eax + movl 12(%esp),%edx + movups (%eax),%xmm2 + movl 240(%edx),%ecx + movl 8(%esp),%eax + movups (%edx),%xmm0 + movups 16(%edx),%xmm1 + leal 32(%edx),%edx + xorps %xmm0,%xmm2 +.L000enc1_loop_1: +.byte 102,15,56,220,209 + decl %ecx + movups (%edx),%xmm1 + leal 16(%edx),%edx + jnz .L000enc1_loop_1 +.byte 102,15,56,221,209 + movups %xmm2,(%eax) + ret +.size aesni_encrypt,.-.L_aesni_encrypt_begin +.globl aesni_decrypt +.type aesni_decrypt,@function +.align 16 +aesni_decrypt: +.L_aesni_decrypt_begin: + movl 4(%esp),%eax + movl 12(%esp),%edx + movups (%eax),%xmm2 + movl 240(%edx),%ecx + movl 8(%esp),%eax + movups (%edx),%xmm0 + movups 16(%edx),%xmm1 + leal 32(%edx),%edx + xorps %xmm0,%xmm2 +.L001dec1_loop_2: +.byte 102,15,56,222,209 + decl %ecx + movups (%edx),%xmm1 + leal 16(%edx),%edx + jnz .L001dec1_loop_2 +.byte 102,15,56,223,209 + movups %xmm2,(%eax) + ret +.size aesni_decrypt,.-.L_aesni_decrypt_begin +.type _aesni_encrypt2,@function +.align 16 +_aesni_encrypt2: + movups (%edx),%xmm0 + shll $4,%ecx + movups 16(%edx),%xmm1 + xorps %xmm0,%xmm2 + pxor %xmm0,%xmm3 + movups 32(%edx),%xmm0 + leal 32(%edx,%ecx,1),%edx + negl %ecx + addl $16,%ecx +.L002enc2_loop: +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 + movups (%edx,%ecx,1),%xmm1 + addl $32,%ecx +.byte 102,15,56,220,208 +.byte 102,15,56,220,216 + movups -16(%edx,%ecx,1),%xmm0 + jnz .L002enc2_loop +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 +.byte 102,15,56,221,208 +.byte 102,15,56,221,216 + ret +.size _aesni_encrypt2,.-_aesni_encrypt2 +.type _aesni_decrypt2,@function +.align 16 +_aesni_decrypt2: + movups (%edx),%xmm0 + shll $4,%ecx + movups 16(%edx),%xmm1 + xorps %xmm0,%xmm2 + pxor %xmm0,%xmm3 + movups 32(%edx),%xmm0 + leal 32(%edx,%ecx,1),%edx + negl %ecx + addl $16,%ecx +.L003dec2_loop: +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 + movups (%edx,%ecx,1),%xmm1 + addl $32,%ecx +.byte 102,15,56,222,208 +.byte 102,15,56,222,216 + movups -16(%edx,%ecx,1),%xmm0 + jnz .L003dec2_loop +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 +.byte 102,15,56,223,208 +.byte 102,15,56,223,216 + ret +.size _aesni_decrypt2,.-_aesni_decrypt2 +.type _aesni_encrypt3,@function +.align 16 +_aesni_encrypt3: + movups (%edx),%xmm0 + shll $4,%ecx + movups 16(%edx),%xmm1 + xorps %xmm0,%xmm2 + pxor %xmm0,%xmm3 + pxor %xmm0,%xmm4 + movups 32(%edx),%xmm0 + leal 32(%edx,%ecx,1),%edx + negl %ecx + addl $16,%ecx +.L004enc3_loop: +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 +.byte 102,15,56,220,225 + movups (%edx,%ecx,1),%xmm1 + addl $32,%ecx +.byte 102,15,56,220,208 +.byte 102,15,56,220,216 +.byte 102,15,56,220,224 + movups -16(%edx,%ecx,1),%xmm0 + jnz .L004enc3_loop +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 +.byte 102,15,56,220,225 +.byte 102,15,56,221,208 +.byte 102,15,56,221,216 +.byte 102,15,56,221,224 + ret +.size _aesni_encrypt3,.-_aesni_encrypt3 +.type _aesni_decrypt3,@function +.align 16 +_aesni_decrypt3: + movups (%edx),%xmm0 + shll $4,%ecx + movups 16(%edx),%xmm1 + xorps %xmm0,%xmm2 + pxor %xmm0,%xmm3 + pxor %xmm0,%xmm4 + movups 32(%edx),%xmm0 + leal 32(%edx,%ecx,1),%edx + negl %ecx + addl $16,%ecx +.L005dec3_loop: +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 +.byte 102,15,56,222,225 + movups (%edx,%ecx,1),%xmm1 + addl $32,%ecx +.byte 102,15,56,222,208 +.byte 102,15,56,222,216 +.byte 102,15,56,222,224 + movups -16(%edx,%ecx,1),%xmm0 + jnz .L005dec3_loop +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 +.byte 102,15,56,222,225 +.byte 102,15,56,223,208 +.byte 102,15,56,223,216 +.byte 102,15,56,223,224 + ret +.size _aesni_decrypt3,.-_aesni_decrypt3 +.type _aesni_encrypt4,@function +.align 16 +_aesni_encrypt4: + movups (%edx),%xmm0 + movups 16(%edx),%xmm1 + shll $4,%ecx + xorps %xmm0,%xmm2 + pxor %xmm0,%xmm3 + pxor %xmm0,%xmm4 + pxor %xmm0,%xmm5 + movups 32(%edx),%xmm0 + leal 32(%edx,%ecx,1),%edx + negl %ecx +.byte 15,31,64,0 + addl $16,%ecx +.L006enc4_loop: +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 + movups (%edx,%ecx,1),%xmm1 + addl $32,%ecx +.byte 102,15,56,220,208 +.byte 102,15,56,220,216 +.byte 102,15,56,220,224 +.byte 102,15,56,220,232 + movups -16(%edx,%ecx,1),%xmm0 + jnz .L006enc4_loop +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 +.byte 102,15,56,221,208 +.byte 102,15,56,221,216 +.byte 102,15,56,221,224 +.byte 102,15,56,221,232 + ret +.size _aesni_encrypt4,.-_aesni_encrypt4 +.type _aesni_decrypt4,@function +.align 16 +_aesni_decrypt4: + movups (%edx),%xmm0 + movups 16(%edx),%xmm1 + shll $4,%ecx + xorps %xmm0,%xmm2 + pxor %xmm0,%xmm3 + pxor %xmm0,%xmm4 + pxor %xmm0,%xmm5 + movups 32(%edx),%xmm0 + leal 32(%edx,%ecx,1),%edx + negl %ecx +.byte 15,31,64,0 + addl $16,%ecx +.L007dec4_loop: +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 +.byte 102,15,56,222,225 +.byte 102,15,56,222,233 + movups (%edx,%ecx,1),%xmm1 + addl $32,%ecx +.byte 102,15,56,222,208 +.byte 102,15,56,222,216 +.byte 102,15,56,222,224 +.byte 102,15,56,222,232 + movups -16(%edx,%ecx,1),%xmm0 + jnz .L007dec4_loop +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 +.byte 102,15,56,222,225 +.byte 102,15,56,222,233 +.byte 102,15,56,223,208 +.byte 102,15,56,223,216 +.byte 102,15,56,223,224 +.byte 102,15,56,223,232 + ret +.size _aesni_decrypt4,.-_aesni_decrypt4 +.type _aesni_encrypt6,@function +.align 16 +_aesni_encrypt6: + movups (%edx),%xmm0 + shll $4,%ecx + movups 16(%edx),%xmm1 + xorps %xmm0,%xmm2 + pxor %xmm0,%xmm3 + pxor %xmm0,%xmm4 +.byte 102,15,56,220,209 + pxor %xmm0,%xmm5 + pxor %xmm0,%xmm6 +.byte 102,15,56,220,217 + leal 32(%edx,%ecx,1),%edx + negl %ecx +.byte 102,15,56,220,225 + pxor %xmm0,%xmm7 + addl $16,%ecx +.byte 102,15,56,220,233 +.byte 102,15,56,220,241 +.byte 102,15,56,220,249 + movups -16(%edx,%ecx,1),%xmm0 + jmp .L_aesni_encrypt6_enter +.align 16 +.L008enc6_loop: +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 +.byte 102,15,56,220,241 +.byte 102,15,56,220,249 +.L_aesni_encrypt6_enter: + movups (%edx,%ecx,1),%xmm1 + addl $32,%ecx +.byte 102,15,56,220,208 +.byte 102,15,56,220,216 +.byte 102,15,56,220,224 +.byte 102,15,56,220,232 +.byte 102,15,56,220,240 +.byte 102,15,56,220,248 + movups -16(%edx,%ecx,1),%xmm0 + jnz .L008enc6_loop +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 +.byte 102,15,56,220,241 +.byte 102,15,56,220,249 +.byte 102,15,56,221,208 +.byte 102,15,56,221,216 +.byte 102,15,56,221,224 +.byte 102,15,56,221,232 +.byte 102,15,56,221,240 +.byte 102,15,56,221,248 + ret +.size _aesni_encrypt6,.-_aesni_encrypt6 +.type _aesni_decrypt6,@function +.align 16 +_aesni_decrypt6: + movups (%edx),%xmm0 + shll $4,%ecx + movups 16(%edx),%xmm1 + xorps %xmm0,%xmm2 + pxor %xmm0,%xmm3 + pxor %xmm0,%xmm4 +.byte 102,15,56,222,209 + pxor %xmm0,%xmm5 + pxor %xmm0,%xmm6 +.byte 102,15,56,222,217 + leal 32(%edx,%ecx,1),%edx + negl %ecx +.byte 102,15,56,222,225 + pxor %xmm0,%xmm7 + addl $16,%ecx +.byte 102,15,56,222,233 +.byte 102,15,56,222,241 +.byte 102,15,56,222,249 + movups -16(%edx,%ecx,1),%xmm0 + jmp .L_aesni_decrypt6_enter +.align 16 +.L009dec6_loop: +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 +.byte 102,15,56,222,225 +.byte 102,15,56,222,233 +.byte 102,15,56,222,241 +.byte 102,15,56,222,249 +.L_aesni_decrypt6_enter: + movups (%edx,%ecx,1),%xmm1 + addl $32,%ecx +.byte 102,15,56,222,208 +.byte 102,15,56,222,216 +.byte 102,15,56,222,224 +.byte 102,15,56,222,232 +.byte 102,15,56,222,240 +.byte 102,15,56,222,248 + movups -16(%edx,%ecx,1),%xmm0 + jnz .L009dec6_loop +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 +.byte 102,15,56,222,225 +.byte 102,15,56,222,233 +.byte 102,15,56,222,241 +.byte 102,15,56,222,249 +.byte 102,15,56,223,208 +.byte 102,15,56,223,216 +.byte 102,15,56,223,224 +.byte 102,15,56,223,232 +.byte 102,15,56,223,240 +.byte 102,15,56,223,248 + ret +.size _aesni_decrypt6,.-_aesni_decrypt6 +.globl aesni_ecb_encrypt +.type aesni_ecb_encrypt,@function +.align 16 +aesni_ecb_encrypt: +.L_aesni_ecb_encrypt_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 20(%esp),%esi + movl 24(%esp),%edi + movl 28(%esp),%eax + movl 32(%esp),%edx + movl 36(%esp),%ebx + andl $-16,%eax + jz .L010ecb_ret + movl 240(%edx),%ecx + testl %ebx,%ebx + jz .L011ecb_decrypt + movl %edx,%ebp + movl %ecx,%ebx + cmpl $96,%eax + jb .L012ecb_enc_tail + movdqu (%esi),%xmm2 + movdqu 16(%esi),%xmm3 + movdqu 32(%esi),%xmm4 + movdqu 48(%esi),%xmm5 + movdqu 64(%esi),%xmm6 + movdqu 80(%esi),%xmm7 + leal 96(%esi),%esi + subl $96,%eax + jmp .L013ecb_enc_loop6_enter +.align 16 +.L014ecb_enc_loop6: + movups %xmm2,(%edi) + movdqu (%esi),%xmm2 + movups %xmm3,16(%edi) + movdqu 16(%esi),%xmm3 + movups %xmm4,32(%edi) + movdqu 32(%esi),%xmm4 + movups %xmm5,48(%edi) + movdqu 48(%esi),%xmm5 + movups %xmm6,64(%edi) + movdqu 64(%esi),%xmm6 + movups %xmm7,80(%edi) + leal 96(%edi),%edi + movdqu 80(%esi),%xmm7 + leal 96(%esi),%esi +.L013ecb_enc_loop6_enter: + call _aesni_encrypt6 + movl %ebp,%edx + movl %ebx,%ecx + subl $96,%eax + jnc .L014ecb_enc_loop6 + movups %xmm2,(%edi) + movups %xmm3,16(%edi) + movups %xmm4,32(%edi) + movups %xmm5,48(%edi) + movups %xmm6,64(%edi) + movups %xmm7,80(%edi) + leal 96(%edi),%edi + addl $96,%eax + jz .L010ecb_ret +.L012ecb_enc_tail: + movups (%esi),%xmm2 + cmpl $32,%eax + jb .L015ecb_enc_one + movups 16(%esi),%xmm3 + je .L016ecb_enc_two + movups 32(%esi),%xmm4 + cmpl $64,%eax + jb .L017ecb_enc_three + movups 48(%esi),%xmm5 + je .L018ecb_enc_four + movups 64(%esi),%xmm6 + xorps %xmm7,%xmm7 + call _aesni_encrypt6 + movups %xmm2,(%edi) + movups %xmm3,16(%edi) + movups %xmm4,32(%edi) + movups %xmm5,48(%edi) + movups %xmm6,64(%edi) + jmp .L010ecb_ret +.align 16 +.L015ecb_enc_one: + movups (%edx),%xmm0 + movups 16(%edx),%xmm1 + leal 32(%edx),%edx + xorps %xmm0,%xmm2 +.L019enc1_loop_3: +.byte 102,15,56,220,209 + decl %ecx + movups (%edx),%xmm1 + leal 16(%edx),%edx + jnz .L019enc1_loop_3 +.byte 102,15,56,221,209 + movups %xmm2,(%edi) + jmp .L010ecb_ret +.align 16 +.L016ecb_enc_two: + call _aesni_encrypt2 + movups %xmm2,(%edi) + movups %xmm3,16(%edi) + jmp .L010ecb_ret +.align 16 +.L017ecb_enc_three: + call _aesni_encrypt3 + movups %xmm2,(%edi) + movups %xmm3,16(%edi) + movups %xmm4,32(%edi) + jmp .L010ecb_ret +.align 16 +.L018ecb_enc_four: + call _aesni_encrypt4 + movups %xmm2,(%edi) + movups %xmm3,16(%edi) + movups %xmm4,32(%edi) + movups %xmm5,48(%edi) + jmp .L010ecb_ret +.align 16 +.L011ecb_decrypt: + movl %edx,%ebp + movl %ecx,%ebx + cmpl $96,%eax + jb .L020ecb_dec_tail + movdqu (%esi),%xmm2 + movdqu 16(%esi),%xmm3 + movdqu 32(%esi),%xmm4 + movdqu 48(%esi),%xmm5 + movdqu 64(%esi),%xmm6 + movdqu 80(%esi),%xmm7 + leal 96(%esi),%esi + subl $96,%eax + jmp .L021ecb_dec_loop6_enter +.align 16 +.L022ecb_dec_loop6: + movups %xmm2,(%edi) + movdqu (%esi),%xmm2 + movups %xmm3,16(%edi) + movdqu 16(%esi),%xmm3 + movups %xmm4,32(%edi) + movdqu 32(%esi),%xmm4 + movups %xmm5,48(%edi) + movdqu 48(%esi),%xmm5 + movups %xmm6,64(%edi) + movdqu 64(%esi),%xmm6 + movups %xmm7,80(%edi) + leal 96(%edi),%edi + movdqu 80(%esi),%xmm7 + leal 96(%esi),%esi +.L021ecb_dec_loop6_enter: + call _aesni_decrypt6 + movl %ebp,%edx + movl %ebx,%ecx + subl $96,%eax + jnc .L022ecb_dec_loop6 + movups %xmm2,(%edi) + movups %xmm3,16(%edi) + movups %xmm4,32(%edi) + movups %xmm5,48(%edi) + movups %xmm6,64(%edi) + movups %xmm7,80(%edi) + leal 96(%edi),%edi + addl $96,%eax + jz .L010ecb_ret +.L020ecb_dec_tail: + movups (%esi),%xmm2 + cmpl $32,%eax + jb .L023ecb_dec_one + movups 16(%esi),%xmm3 + je .L024ecb_dec_two + movups 32(%esi),%xmm4 + cmpl $64,%eax + jb .L025ecb_dec_three + movups 48(%esi),%xmm5 + je .L026ecb_dec_four + movups 64(%esi),%xmm6 + xorps %xmm7,%xmm7 + call _aesni_decrypt6 + movups %xmm2,(%edi) + movups %xmm3,16(%edi) + movups %xmm4,32(%edi) + movups %xmm5,48(%edi) + movups %xmm6,64(%edi) + jmp .L010ecb_ret +.align 16 +.L023ecb_dec_one: + movups (%edx),%xmm0 + movups 16(%edx),%xmm1 + leal 32(%edx),%edx + xorps %xmm0,%xmm2 +.L027dec1_loop_4: +.byte 102,15,56,222,209 + decl %ecx + movups (%edx),%xmm1 + leal 16(%edx),%edx + jnz .L027dec1_loop_4 +.byte 102,15,56,223,209 + movups %xmm2,(%edi) + jmp .L010ecb_ret +.align 16 +.L024ecb_dec_two: + call _aesni_decrypt2 + movups %xmm2,(%edi) + movups %xmm3,16(%edi) + jmp .L010ecb_ret +.align 16 +.L025ecb_dec_three: + call _aesni_decrypt3 + movups %xmm2,(%edi) + movups %xmm3,16(%edi) + movups %xmm4,32(%edi) + jmp .L010ecb_ret +.align 16 +.L026ecb_dec_four: + call _aesni_decrypt4 + movups %xmm2,(%edi) + movups %xmm3,16(%edi) + movups %xmm4,32(%edi) + movups %xmm5,48(%edi) +.L010ecb_ret: + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.size aesni_ecb_encrypt,.-.L_aesni_ecb_encrypt_begin +.globl aesni_ccm64_encrypt_blocks +.type aesni_ccm64_encrypt_blocks,@function +.align 16 +aesni_ccm64_encrypt_blocks: +.L_aesni_ccm64_encrypt_blocks_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 20(%esp),%esi + movl 24(%esp),%edi + movl 28(%esp),%eax + movl 32(%esp),%edx + movl 36(%esp),%ebx + movl 40(%esp),%ecx + movl %esp,%ebp + subl $60,%esp + andl $-16,%esp + movl %ebp,48(%esp) + movdqu (%ebx),%xmm7 + movdqu (%ecx),%xmm3 + movl 240(%edx),%ecx + movl $202182159,(%esp) + movl $134810123,4(%esp) + movl $67438087,8(%esp) + movl $66051,12(%esp) + movl $1,%ebx + xorl %ebp,%ebp + movl %ebx,16(%esp) + movl %ebp,20(%esp) + movl %ebp,24(%esp) + movl %ebp,28(%esp) + shll $4,%ecx + movl $16,%ebx + leal (%edx),%ebp + movdqa (%esp),%xmm5 + movdqa %xmm7,%xmm2 + leal 32(%edx,%ecx,1),%edx + subl %ecx,%ebx +.byte 102,15,56,0,253 +.L028ccm64_enc_outer: + movups (%ebp),%xmm0 + movl %ebx,%ecx + movups (%esi),%xmm6 + xorps %xmm0,%xmm2 + movups 16(%ebp),%xmm1 + xorps %xmm6,%xmm0 + xorps %xmm0,%xmm3 + movups 32(%ebp),%xmm0 +.L029ccm64_enc2_loop: +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 + movups (%edx,%ecx,1),%xmm1 + addl $32,%ecx +.byte 102,15,56,220,208 +.byte 102,15,56,220,216 + movups -16(%edx,%ecx,1),%xmm0 + jnz .L029ccm64_enc2_loop +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 + paddq 16(%esp),%xmm7 + decl %eax +.byte 102,15,56,221,208 +.byte 102,15,56,221,216 + leal 16(%esi),%esi + xorps %xmm2,%xmm6 + movdqa %xmm7,%xmm2 + movups %xmm6,(%edi) +.byte 102,15,56,0,213 + leal 16(%edi),%edi + jnz .L028ccm64_enc_outer + movl 48(%esp),%esp + movl 40(%esp),%edi + movups %xmm3,(%edi) + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.size aesni_ccm64_encrypt_blocks,.-.L_aesni_ccm64_encrypt_blocks_begin +.globl aesni_ccm64_decrypt_blocks +.type aesni_ccm64_decrypt_blocks,@function +.align 16 +aesni_ccm64_decrypt_blocks: +.L_aesni_ccm64_decrypt_blocks_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 20(%esp),%esi + movl 24(%esp),%edi + movl 28(%esp),%eax + movl 32(%esp),%edx + movl 36(%esp),%ebx + movl 40(%esp),%ecx + movl %esp,%ebp + subl $60,%esp + andl $-16,%esp + movl %ebp,48(%esp) + movdqu (%ebx),%xmm7 + movdqu (%ecx),%xmm3 + movl 240(%edx),%ecx + movl $202182159,(%esp) + movl $134810123,4(%esp) + movl $67438087,8(%esp) + movl $66051,12(%esp) + movl $1,%ebx + xorl %ebp,%ebp + movl %ebx,16(%esp) + movl %ebp,20(%esp) + movl %ebp,24(%esp) + movl %ebp,28(%esp) + movdqa (%esp),%xmm5 + movdqa %xmm7,%xmm2 + movl %edx,%ebp + movl %ecx,%ebx +.byte 102,15,56,0,253 + movups (%edx),%xmm0 + movups 16(%edx),%xmm1 + leal 32(%edx),%edx + xorps %xmm0,%xmm2 +.L030enc1_loop_5: +.byte 102,15,56,220,209 + decl %ecx + movups (%edx),%xmm1 + leal 16(%edx),%edx + jnz .L030enc1_loop_5 +.byte 102,15,56,221,209 + shll $4,%ebx + movl $16,%ecx + movups (%esi),%xmm6 + paddq 16(%esp),%xmm7 + leal 16(%esi),%esi + subl %ebx,%ecx + leal 32(%ebp,%ebx,1),%edx + movl %ecx,%ebx + jmp .L031ccm64_dec_outer +.align 16 +.L031ccm64_dec_outer: + xorps %xmm2,%xmm6 + movdqa %xmm7,%xmm2 + movups %xmm6,(%edi) + leal 16(%edi),%edi +.byte 102,15,56,0,213 + subl $1,%eax + jz .L032ccm64_dec_break + movups (%ebp),%xmm0 + movl %ebx,%ecx + movups 16(%ebp),%xmm1 + xorps %xmm0,%xmm6 + xorps %xmm0,%xmm2 + xorps %xmm6,%xmm3 + movups 32(%ebp),%xmm0 +.L033ccm64_dec2_loop: +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 + movups (%edx,%ecx,1),%xmm1 + addl $32,%ecx +.byte 102,15,56,220,208 +.byte 102,15,56,220,216 + movups -16(%edx,%ecx,1),%xmm0 + jnz .L033ccm64_dec2_loop + movups (%esi),%xmm6 + paddq 16(%esp),%xmm7 +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 +.byte 102,15,56,221,208 +.byte 102,15,56,221,216 + leal 16(%esi),%esi + jmp .L031ccm64_dec_outer +.align 16 +.L032ccm64_dec_break: + movl 240(%ebp),%ecx + movl %ebp,%edx + movups (%edx),%xmm0 + movups 16(%edx),%xmm1 + xorps %xmm0,%xmm6 + leal 32(%edx),%edx + xorps %xmm6,%xmm3 +.L034enc1_loop_6: +.byte 102,15,56,220,217 + decl %ecx + movups (%edx),%xmm1 + leal 16(%edx),%edx + jnz .L034enc1_loop_6 +.byte 102,15,56,221,217 + movl 48(%esp),%esp + movl 40(%esp),%edi + movups %xmm3,(%edi) + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.size aesni_ccm64_decrypt_blocks,.-.L_aesni_ccm64_decrypt_blocks_begin +.globl aesni_ctr32_encrypt_blocks +.type aesni_ctr32_encrypt_blocks,@function +.align 16 +aesni_ctr32_encrypt_blocks: +.L_aesni_ctr32_encrypt_blocks_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 20(%esp),%esi + movl 24(%esp),%edi + movl 28(%esp),%eax + movl 32(%esp),%edx + movl 36(%esp),%ebx + movl %esp,%ebp + subl $88,%esp + andl $-16,%esp + movl %ebp,80(%esp) + cmpl $1,%eax + je .L035ctr32_one_shortcut + movdqu (%ebx),%xmm7 + movl $202182159,(%esp) + movl $134810123,4(%esp) + movl $67438087,8(%esp) + movl $66051,12(%esp) + movl $6,%ecx + xorl %ebp,%ebp + movl %ecx,16(%esp) + movl %ecx,20(%esp) + movl %ecx,24(%esp) + movl %ebp,28(%esp) +.byte 102,15,58,22,251,3 +.byte 102,15,58,34,253,3 + movl 240(%edx),%ecx + bswap %ebx + pxor %xmm0,%xmm0 + pxor %xmm1,%xmm1 + movdqa (%esp),%xmm2 +.byte 102,15,58,34,195,0 + leal 3(%ebx),%ebp +.byte 102,15,58,34,205,0 + incl %ebx +.byte 102,15,58,34,195,1 + incl %ebp +.byte 102,15,58,34,205,1 + incl %ebx +.byte 102,15,58,34,195,2 + incl %ebp +.byte 102,15,58,34,205,2 + movdqa %xmm0,48(%esp) +.byte 102,15,56,0,194 + movdqu (%edx),%xmm6 + movdqa %xmm1,64(%esp) +.byte 102,15,56,0,202 + pshufd $192,%xmm0,%xmm2 + pshufd $128,%xmm0,%xmm3 + cmpl $6,%eax + jb .L036ctr32_tail + pxor %xmm6,%xmm7 + shll $4,%ecx + movl $16,%ebx + movdqa %xmm7,32(%esp) + movl %edx,%ebp + subl %ecx,%ebx + leal 32(%edx,%ecx,1),%edx + subl $6,%eax + jmp .L037ctr32_loop6 +.align 16 +.L037ctr32_loop6: + pshufd $64,%xmm0,%xmm4 + movdqa 32(%esp),%xmm0 + pshufd $192,%xmm1,%xmm5 + pxor %xmm0,%xmm2 + pshufd $128,%xmm1,%xmm6 + pxor %xmm0,%xmm3 + pshufd $64,%xmm1,%xmm7 + movups 16(%ebp),%xmm1 + pxor %xmm0,%xmm4 + pxor %xmm0,%xmm5 +.byte 102,15,56,220,209 + pxor %xmm0,%xmm6 + pxor %xmm0,%xmm7 +.byte 102,15,56,220,217 + movups 32(%ebp),%xmm0 + movl %ebx,%ecx +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 +.byte 102,15,56,220,241 +.byte 102,15,56,220,249 + call .L_aesni_encrypt6_enter + movups (%esi),%xmm1 + movups 16(%esi),%xmm0 + xorps %xmm1,%xmm2 + movups 32(%esi),%xmm1 + xorps %xmm0,%xmm3 + movups %xmm2,(%edi) + movdqa 16(%esp),%xmm0 + xorps %xmm1,%xmm4 + movdqa 64(%esp),%xmm1 + movups %xmm3,16(%edi) + movups %xmm4,32(%edi) + paddd %xmm0,%xmm1 + paddd 48(%esp),%xmm0 + movdqa (%esp),%xmm2 + movups 48(%esi),%xmm3 + movups 64(%esi),%xmm4 + xorps %xmm3,%xmm5 + movups 80(%esi),%xmm3 + leal 96(%esi),%esi + movdqa %xmm0,48(%esp) +.byte 102,15,56,0,194 + xorps %xmm4,%xmm6 + movups %xmm5,48(%edi) + xorps %xmm3,%xmm7 + movdqa %xmm1,64(%esp) +.byte 102,15,56,0,202 + movups %xmm6,64(%edi) + pshufd $192,%xmm0,%xmm2 + movups %xmm7,80(%edi) + leal 96(%edi),%edi + pshufd $128,%xmm0,%xmm3 + subl $6,%eax + jnc .L037ctr32_loop6 + addl $6,%eax + jz .L038ctr32_ret + movdqu (%ebp),%xmm7 + movl %ebp,%edx + pxor 32(%esp),%xmm7 + movl 240(%ebp),%ecx +.L036ctr32_tail: + por %xmm7,%xmm2 + cmpl $2,%eax + jb .L039ctr32_one + pshufd $64,%xmm0,%xmm4 + por %xmm7,%xmm3 + je .L040ctr32_two + pshufd $192,%xmm1,%xmm5 + por %xmm7,%xmm4 + cmpl $4,%eax + jb .L041ctr32_three + pshufd $128,%xmm1,%xmm6 + por %xmm7,%xmm5 + je .L042ctr32_four + por %xmm7,%xmm6 + call _aesni_encrypt6 + movups (%esi),%xmm1 + movups 16(%esi),%xmm0 + xorps %xmm1,%xmm2 + movups 32(%esi),%xmm1 + xorps %xmm0,%xmm3 + movups 48(%esi),%xmm0 + xorps %xmm1,%xmm4 + movups 64(%esi),%xmm1 + xorps %xmm0,%xmm5 + movups %xmm2,(%edi) + xorps %xmm1,%xmm6 + movups %xmm3,16(%edi) + movups %xmm4,32(%edi) + movups %xmm5,48(%edi) + movups %xmm6,64(%edi) + jmp .L038ctr32_ret +.align 16 +.L035ctr32_one_shortcut: + movups (%ebx),%xmm2 + movl 240(%edx),%ecx +.L039ctr32_one: + movups (%edx),%xmm0 + movups 16(%edx),%xmm1 + leal 32(%edx),%edx + xorps %xmm0,%xmm2 +.L043enc1_loop_7: +.byte 102,15,56,220,209 + decl %ecx + movups (%edx),%xmm1 + leal 16(%edx),%edx + jnz .L043enc1_loop_7 +.byte 102,15,56,221,209 + movups (%esi),%xmm6 + xorps %xmm2,%xmm6 + movups %xmm6,(%edi) + jmp .L038ctr32_ret +.align 16 +.L040ctr32_two: + call _aesni_encrypt2 + movups (%esi),%xmm5 + movups 16(%esi),%xmm6 + xorps %xmm5,%xmm2 + xorps %xmm6,%xmm3 + movups %xmm2,(%edi) + movups %xmm3,16(%edi) + jmp .L038ctr32_ret +.align 16 +.L041ctr32_three: + call _aesni_encrypt3 + movups (%esi),%xmm5 + movups 16(%esi),%xmm6 + xorps %xmm5,%xmm2 + movups 32(%esi),%xmm7 + xorps %xmm6,%xmm3 + movups %xmm2,(%edi) + xorps %xmm7,%xmm4 + movups %xmm3,16(%edi) + movups %xmm4,32(%edi) + jmp .L038ctr32_ret +.align 16 +.L042ctr32_four: + call _aesni_encrypt4 + movups (%esi),%xmm6 + movups 16(%esi),%xmm7 + movups 32(%esi),%xmm1 + xorps %xmm6,%xmm2 + movups 48(%esi),%xmm0 + xorps %xmm7,%xmm3 + movups %xmm2,(%edi) + xorps %xmm1,%xmm4 + movups %xmm3,16(%edi) + xorps %xmm0,%xmm5 + movups %xmm4,32(%edi) + movups %xmm5,48(%edi) +.L038ctr32_ret: + movl 80(%esp),%esp + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.size aesni_ctr32_encrypt_blocks,.-.L_aesni_ctr32_encrypt_blocks_begin +.globl aesni_xts_encrypt +.type aesni_xts_encrypt,@function +.align 16 +aesni_xts_encrypt: +.L_aesni_xts_encrypt_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 36(%esp),%edx + movl 40(%esp),%esi + movl 240(%edx),%ecx + movups (%esi),%xmm2 + movups (%edx),%xmm0 + movups 16(%edx),%xmm1 + leal 32(%edx),%edx + xorps %xmm0,%xmm2 +.L044enc1_loop_8: +.byte 102,15,56,220,209 + decl %ecx + movups (%edx),%xmm1 + leal 16(%edx),%edx + jnz .L044enc1_loop_8 +.byte 102,15,56,221,209 + movl 20(%esp),%esi + movl 24(%esp),%edi + movl 28(%esp),%eax + movl 32(%esp),%edx + movl %esp,%ebp + subl $120,%esp + movl 240(%edx),%ecx + andl $-16,%esp + movl $135,96(%esp) + movl $0,100(%esp) + movl $1,104(%esp) + movl $0,108(%esp) + movl %eax,112(%esp) + movl %ebp,116(%esp) + movdqa %xmm2,%xmm1 + pxor %xmm0,%xmm0 + movdqa 96(%esp),%xmm3 + pcmpgtd %xmm1,%xmm0 + andl $-16,%eax + movl %edx,%ebp + movl %ecx,%ebx + subl $96,%eax + jc .L045xts_enc_short + shll $4,%ecx + movl $16,%ebx + subl %ecx,%ebx + leal 32(%edx,%ecx,1),%edx + jmp .L046xts_enc_loop6 +.align 16 +.L046xts_enc_loop6: + pshufd $19,%xmm0,%xmm2 + pxor %xmm0,%xmm0 + movdqa %xmm1,(%esp) + paddq %xmm1,%xmm1 + pand %xmm3,%xmm2 + pcmpgtd %xmm1,%xmm0 + pxor %xmm2,%xmm1 + pshufd $19,%xmm0,%xmm2 + pxor %xmm0,%xmm0 + movdqa %xmm1,16(%esp) + paddq %xmm1,%xmm1 + pand %xmm3,%xmm2 + pcmpgtd %xmm1,%xmm0 + pxor %xmm2,%xmm1 + pshufd $19,%xmm0,%xmm2 + pxor %xmm0,%xmm0 + movdqa %xmm1,32(%esp) + paddq %xmm1,%xmm1 + pand %xmm3,%xmm2 + pcmpgtd %xmm1,%xmm0 + pxor %xmm2,%xmm1 + pshufd $19,%xmm0,%xmm2 + pxor %xmm0,%xmm0 + movdqa %xmm1,48(%esp) + paddq %xmm1,%xmm1 + pand %xmm3,%xmm2 + pcmpgtd %xmm1,%xmm0 + pxor %xmm2,%xmm1 + pshufd $19,%xmm0,%xmm7 + movdqa %xmm1,64(%esp) + paddq %xmm1,%xmm1 + movups (%ebp),%xmm0 + pand %xmm3,%xmm7 + movups (%esi),%xmm2 + pxor %xmm1,%xmm7 + movl %ebx,%ecx + movdqu 16(%esi),%xmm3 + xorps %xmm0,%xmm2 + movdqu 32(%esi),%xmm4 + pxor %xmm0,%xmm3 + movdqu 48(%esi),%xmm5 + pxor %xmm0,%xmm4 + movdqu 64(%esi),%xmm6 + pxor %xmm0,%xmm5 + movdqu 80(%esi),%xmm1 + pxor %xmm0,%xmm6 + leal 96(%esi),%esi + pxor (%esp),%xmm2 + movdqa %xmm7,80(%esp) + pxor %xmm1,%xmm7 + movups 16(%ebp),%xmm1 + pxor 16(%esp),%xmm3 + pxor 32(%esp),%xmm4 +.byte 102,15,56,220,209 + pxor 48(%esp),%xmm5 + pxor 64(%esp),%xmm6 +.byte 102,15,56,220,217 + pxor %xmm0,%xmm7 + movups 32(%ebp),%xmm0 +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 +.byte 102,15,56,220,241 +.byte 102,15,56,220,249 + call .L_aesni_encrypt6_enter + movdqa 80(%esp),%xmm1 + pxor %xmm0,%xmm0 + xorps (%esp),%xmm2 + pcmpgtd %xmm1,%xmm0 + xorps 16(%esp),%xmm3 + movups %xmm2,(%edi) + xorps 32(%esp),%xmm4 + movups %xmm3,16(%edi) + xorps 48(%esp),%xmm5 + movups %xmm4,32(%edi) + xorps 64(%esp),%xmm6 + movups %xmm5,48(%edi) + xorps %xmm1,%xmm7 + movups %xmm6,64(%edi) + pshufd $19,%xmm0,%xmm2 + movups %xmm7,80(%edi) + leal 96(%edi),%edi + movdqa 96(%esp),%xmm3 + pxor %xmm0,%xmm0 + paddq %xmm1,%xmm1 + pand %xmm3,%xmm2 + pcmpgtd %xmm1,%xmm0 + pxor %xmm2,%xmm1 + subl $96,%eax + jnc .L046xts_enc_loop6 + movl 240(%ebp),%ecx + movl %ebp,%edx + movl %ecx,%ebx +.L045xts_enc_short: + addl $96,%eax + jz .L047xts_enc_done6x + movdqa %xmm1,%xmm5 + cmpl $32,%eax + jb .L048xts_enc_one + pshufd $19,%xmm0,%xmm2 + pxor %xmm0,%xmm0 + paddq %xmm1,%xmm1 + pand %xmm3,%xmm2 + pcmpgtd %xmm1,%xmm0 + pxor %xmm2,%xmm1 + je .L049xts_enc_two + pshufd $19,%xmm0,%xmm2 + pxor %xmm0,%xmm0 + movdqa %xmm1,%xmm6 + paddq %xmm1,%xmm1 + pand %xmm3,%xmm2 + pcmpgtd %xmm1,%xmm0 + pxor %xmm2,%xmm1 + cmpl $64,%eax + jb .L050xts_enc_three + pshufd $19,%xmm0,%xmm2 + pxor %xmm0,%xmm0 + movdqa %xmm1,%xmm7 + paddq %xmm1,%xmm1 + pand %xmm3,%xmm2 + pcmpgtd %xmm1,%xmm0 + pxor %xmm2,%xmm1 + movdqa %xmm5,(%esp) + movdqa %xmm6,16(%esp) + je .L051xts_enc_four + movdqa %xmm7,32(%esp) + pshufd $19,%xmm0,%xmm7 + movdqa %xmm1,48(%esp) + paddq %xmm1,%xmm1 + pand %xmm3,%xmm7 + pxor %xmm1,%xmm7 + movdqu (%esi),%xmm2 + movdqu 16(%esi),%xmm3 + movdqu 32(%esi),%xmm4 + pxor (%esp),%xmm2 + movdqu 48(%esi),%xmm5 + pxor 16(%esp),%xmm3 + movdqu 64(%esi),%xmm6 + pxor 32(%esp),%xmm4 + leal 80(%esi),%esi + pxor 48(%esp),%xmm5 + movdqa %xmm7,64(%esp) + pxor %xmm7,%xmm6 + call _aesni_encrypt6 + movaps 64(%esp),%xmm1 + xorps (%esp),%xmm2 + xorps 16(%esp),%xmm3 + xorps 32(%esp),%xmm4 + movups %xmm2,(%edi) + xorps 48(%esp),%xmm5 + movups %xmm3,16(%edi) + xorps %xmm1,%xmm6 + movups %xmm4,32(%edi) + movups %xmm5,48(%edi) + movups %xmm6,64(%edi) + leal 80(%edi),%edi + jmp .L052xts_enc_done +.align 16 +.L048xts_enc_one: + movups (%esi),%xmm2 + leal 16(%esi),%esi + xorps %xmm5,%xmm2 + movups (%edx),%xmm0 + movups 16(%edx),%xmm1 + leal 32(%edx),%edx + xorps %xmm0,%xmm2 +.L053enc1_loop_9: +.byte 102,15,56,220,209 + decl %ecx + movups (%edx),%xmm1 + leal 16(%edx),%edx + jnz .L053enc1_loop_9 +.byte 102,15,56,221,209 + xorps %xmm5,%xmm2 + movups %xmm2,(%edi) + leal 16(%edi),%edi + movdqa %xmm5,%xmm1 + jmp .L052xts_enc_done +.align 16 +.L049xts_enc_two: + movaps %xmm1,%xmm6 + movups (%esi),%xmm2 + movups 16(%esi),%xmm3 + leal 32(%esi),%esi + xorps %xmm5,%xmm2 + xorps %xmm6,%xmm3 + call _aesni_encrypt2 + xorps %xmm5,%xmm2 + xorps %xmm6,%xmm3 + movups %xmm2,(%edi) + movups %xmm3,16(%edi) + leal 32(%edi),%edi + movdqa %xmm6,%xmm1 + jmp .L052xts_enc_done +.align 16 +.L050xts_enc_three: + movaps %xmm1,%xmm7 + movups (%esi),%xmm2 + movups 16(%esi),%xmm3 + movups 32(%esi),%xmm4 + leal 48(%esi),%esi + xorps %xmm5,%xmm2 + xorps %xmm6,%xmm3 + xorps %xmm7,%xmm4 + call _aesni_encrypt3 + xorps %xmm5,%xmm2 + xorps %xmm6,%xmm3 + xorps %xmm7,%xmm4 + movups %xmm2,(%edi) + movups %xmm3,16(%edi) + movups %xmm4,32(%edi) + leal 48(%edi),%edi + movdqa %xmm7,%xmm1 + jmp .L052xts_enc_done +.align 16 +.L051xts_enc_four: + movaps %xmm1,%xmm6 + movups (%esi),%xmm2 + movups 16(%esi),%xmm3 + movups 32(%esi),%xmm4 + xorps (%esp),%xmm2 + movups 48(%esi),%xmm5 + leal 64(%esi),%esi + xorps 16(%esp),%xmm3 + xorps %xmm7,%xmm4 + xorps %xmm6,%xmm5 + call _aesni_encrypt4 + xorps (%esp),%xmm2 + xorps 16(%esp),%xmm3 + xorps %xmm7,%xmm4 + movups %xmm2,(%edi) + xorps %xmm6,%xmm5 + movups %xmm3,16(%edi) + movups %xmm4,32(%edi) + movups %xmm5,48(%edi) + leal 64(%edi),%edi + movdqa %xmm6,%xmm1 + jmp .L052xts_enc_done +.align 16 +.L047xts_enc_done6x: + movl 112(%esp),%eax + andl $15,%eax + jz .L054xts_enc_ret + movdqa %xmm1,%xmm5 + movl %eax,112(%esp) + jmp .L055xts_enc_steal +.align 16 +.L052xts_enc_done: + movl 112(%esp),%eax + pxor %xmm0,%xmm0 + andl $15,%eax + jz .L054xts_enc_ret + pcmpgtd %xmm1,%xmm0 + movl %eax,112(%esp) + pshufd $19,%xmm0,%xmm5 + paddq %xmm1,%xmm1 + pand 96(%esp),%xmm5 + pxor %xmm1,%xmm5 +.L055xts_enc_steal: + movzbl (%esi),%ecx + movzbl -16(%edi),%edx + leal 1(%esi),%esi + movb %cl,-16(%edi) + movb %dl,(%edi) + leal 1(%edi),%edi + subl $1,%eax + jnz .L055xts_enc_steal + subl 112(%esp),%edi + movl %ebp,%edx + movl %ebx,%ecx + movups -16(%edi),%xmm2 + xorps %xmm5,%xmm2 + movups (%edx),%xmm0 + movups 16(%edx),%xmm1 + leal 32(%edx),%edx + xorps %xmm0,%xmm2 +.L056enc1_loop_10: +.byte 102,15,56,220,209 + decl %ecx + movups (%edx),%xmm1 + leal 16(%edx),%edx + jnz .L056enc1_loop_10 +.byte 102,15,56,221,209 + xorps %xmm5,%xmm2 + movups %xmm2,-16(%edi) +.L054xts_enc_ret: + movl 116(%esp),%esp + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.size aesni_xts_encrypt,.-.L_aesni_xts_encrypt_begin +.globl aesni_xts_decrypt +.type aesni_xts_decrypt,@function +.align 16 +aesni_xts_decrypt: +.L_aesni_xts_decrypt_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 36(%esp),%edx + movl 40(%esp),%esi + movl 240(%edx),%ecx + movups (%esi),%xmm2 + movups (%edx),%xmm0 + movups 16(%edx),%xmm1 + leal 32(%edx),%edx + xorps %xmm0,%xmm2 +.L057enc1_loop_11: +.byte 102,15,56,220,209 + decl %ecx + movups (%edx),%xmm1 + leal 16(%edx),%edx + jnz .L057enc1_loop_11 +.byte 102,15,56,221,209 + movl 20(%esp),%esi + movl 24(%esp),%edi + movl 28(%esp),%eax + movl 32(%esp),%edx + movl %esp,%ebp + subl $120,%esp + andl $-16,%esp + xorl %ebx,%ebx + testl $15,%eax + setnz %bl + shll $4,%ebx + subl %ebx,%eax + movl $135,96(%esp) + movl $0,100(%esp) + movl $1,104(%esp) + movl $0,108(%esp) + movl %eax,112(%esp) + movl %ebp,116(%esp) + movl 240(%edx),%ecx + movl %edx,%ebp + movl %ecx,%ebx + movdqa %xmm2,%xmm1 + pxor %xmm0,%xmm0 + movdqa 96(%esp),%xmm3 + pcmpgtd %xmm1,%xmm0 + andl $-16,%eax + subl $96,%eax + jc .L058xts_dec_short + shll $4,%ecx + movl $16,%ebx + subl %ecx,%ebx + leal 32(%edx,%ecx,1),%edx + jmp .L059xts_dec_loop6 +.align 16 +.L059xts_dec_loop6: + pshufd $19,%xmm0,%xmm2 + pxor %xmm0,%xmm0 + movdqa %xmm1,(%esp) + paddq %xmm1,%xmm1 + pand %xmm3,%xmm2 + pcmpgtd %xmm1,%xmm0 + pxor %xmm2,%xmm1 + pshufd $19,%xmm0,%xmm2 + pxor %xmm0,%xmm0 + movdqa %xmm1,16(%esp) + paddq %xmm1,%xmm1 + pand %xmm3,%xmm2 + pcmpgtd %xmm1,%xmm0 + pxor %xmm2,%xmm1 + pshufd $19,%xmm0,%xmm2 + pxor %xmm0,%xmm0 + movdqa %xmm1,32(%esp) + paddq %xmm1,%xmm1 + pand %xmm3,%xmm2 + pcmpgtd %xmm1,%xmm0 + pxor %xmm2,%xmm1 + pshufd $19,%xmm0,%xmm2 + pxor %xmm0,%xmm0 + movdqa %xmm1,48(%esp) + paddq %xmm1,%xmm1 + pand %xmm3,%xmm2 + pcmpgtd %xmm1,%xmm0 + pxor %xmm2,%xmm1 + pshufd $19,%xmm0,%xmm7 + movdqa %xmm1,64(%esp) + paddq %xmm1,%xmm1 + movups (%ebp),%xmm0 + pand %xmm3,%xmm7 + movups (%esi),%xmm2 + pxor %xmm1,%xmm7 + movl %ebx,%ecx + movdqu 16(%esi),%xmm3 + xorps %xmm0,%xmm2 + movdqu 32(%esi),%xmm4 + pxor %xmm0,%xmm3 + movdqu 48(%esi),%xmm5 + pxor %xmm0,%xmm4 + movdqu 64(%esi),%xmm6 + pxor %xmm0,%xmm5 + movdqu 80(%esi),%xmm1 + pxor %xmm0,%xmm6 + leal 96(%esi),%esi + pxor (%esp),%xmm2 + movdqa %xmm7,80(%esp) + pxor %xmm1,%xmm7 + movups 16(%ebp),%xmm1 + pxor 16(%esp),%xmm3 + pxor 32(%esp),%xmm4 +.byte 102,15,56,222,209 + pxor 48(%esp),%xmm5 + pxor 64(%esp),%xmm6 +.byte 102,15,56,222,217 + pxor %xmm0,%xmm7 + movups 32(%ebp),%xmm0 +.byte 102,15,56,222,225 +.byte 102,15,56,222,233 +.byte 102,15,56,222,241 +.byte 102,15,56,222,249 + call .L_aesni_decrypt6_enter + movdqa 80(%esp),%xmm1 + pxor %xmm0,%xmm0 + xorps (%esp),%xmm2 + pcmpgtd %xmm1,%xmm0 + xorps 16(%esp),%xmm3 + movups %xmm2,(%edi) + xorps 32(%esp),%xmm4 + movups %xmm3,16(%edi) + xorps 48(%esp),%xmm5 + movups %xmm4,32(%edi) + xorps 64(%esp),%xmm6 + movups %xmm5,48(%edi) + xorps %xmm1,%xmm7 + movups %xmm6,64(%edi) + pshufd $19,%xmm0,%xmm2 + movups %xmm7,80(%edi) + leal 96(%edi),%edi + movdqa 96(%esp),%xmm3 + pxor %xmm0,%xmm0 + paddq %xmm1,%xmm1 + pand %xmm3,%xmm2 + pcmpgtd %xmm1,%xmm0 + pxor %xmm2,%xmm1 + subl $96,%eax + jnc .L059xts_dec_loop6 + movl 240(%ebp),%ecx + movl %ebp,%edx + movl %ecx,%ebx +.L058xts_dec_short: + addl $96,%eax + jz .L060xts_dec_done6x + movdqa %xmm1,%xmm5 + cmpl $32,%eax + jb .L061xts_dec_one + pshufd $19,%xmm0,%xmm2 + pxor %xmm0,%xmm0 + paddq %xmm1,%xmm1 + pand %xmm3,%xmm2 + pcmpgtd %xmm1,%xmm0 + pxor %xmm2,%xmm1 + je .L062xts_dec_two + pshufd $19,%xmm0,%xmm2 + pxor %xmm0,%xmm0 + movdqa %xmm1,%xmm6 + paddq %xmm1,%xmm1 + pand %xmm3,%xmm2 + pcmpgtd %xmm1,%xmm0 + pxor %xmm2,%xmm1 + cmpl $64,%eax + jb .L063xts_dec_three + pshufd $19,%xmm0,%xmm2 + pxor %xmm0,%xmm0 + movdqa %xmm1,%xmm7 + paddq %xmm1,%xmm1 + pand %xmm3,%xmm2 + pcmpgtd %xmm1,%xmm0 + pxor %xmm2,%xmm1 + movdqa %xmm5,(%esp) + movdqa %xmm6,16(%esp) + je .L064xts_dec_four + movdqa %xmm7,32(%esp) + pshufd $19,%xmm0,%xmm7 + movdqa %xmm1,48(%esp) + paddq %xmm1,%xmm1 + pand %xmm3,%xmm7 + pxor %xmm1,%xmm7 + movdqu (%esi),%xmm2 + movdqu 16(%esi),%xmm3 + movdqu 32(%esi),%xmm4 + pxor (%esp),%xmm2 + movdqu 48(%esi),%xmm5 + pxor 16(%esp),%xmm3 + movdqu 64(%esi),%xmm6 + pxor 32(%esp),%xmm4 + leal 80(%esi),%esi + pxor 48(%esp),%xmm5 + movdqa %xmm7,64(%esp) + pxor %xmm7,%xmm6 + call _aesni_decrypt6 + movaps 64(%esp),%xmm1 + xorps (%esp),%xmm2 + xorps 16(%esp),%xmm3 + xorps 32(%esp),%xmm4 + movups %xmm2,(%edi) + xorps 48(%esp),%xmm5 + movups %xmm3,16(%edi) + xorps %xmm1,%xmm6 + movups %xmm4,32(%edi) + movups %xmm5,48(%edi) + movups %xmm6,64(%edi) + leal 80(%edi),%edi + jmp .L065xts_dec_done +.align 16 +.L061xts_dec_one: + movups (%esi),%xmm2 + leal 16(%esi),%esi + xorps %xmm5,%xmm2 + movups (%edx),%xmm0 + movups 16(%edx),%xmm1 + leal 32(%edx),%edx + xorps %xmm0,%xmm2 +.L066dec1_loop_12: +.byte 102,15,56,222,209 + decl %ecx + movups (%edx),%xmm1 + leal 16(%edx),%edx + jnz .L066dec1_loop_12 +.byte 102,15,56,223,209 + xorps %xmm5,%xmm2 + movups %xmm2,(%edi) + leal 16(%edi),%edi + movdqa %xmm5,%xmm1 + jmp .L065xts_dec_done +.align 16 +.L062xts_dec_two: + movaps %xmm1,%xmm6 + movups (%esi),%xmm2 + movups 16(%esi),%xmm3 + leal 32(%esi),%esi + xorps %xmm5,%xmm2 + xorps %xmm6,%xmm3 + call _aesni_decrypt2 + xorps %xmm5,%xmm2 + xorps %xmm6,%xmm3 + movups %xmm2,(%edi) + movups %xmm3,16(%edi) + leal 32(%edi),%edi + movdqa %xmm6,%xmm1 + jmp .L065xts_dec_done +.align 16 +.L063xts_dec_three: + movaps %xmm1,%xmm7 + movups (%esi),%xmm2 + movups 16(%esi),%xmm3 + movups 32(%esi),%xmm4 + leal 48(%esi),%esi + xorps %xmm5,%xmm2 + xorps %xmm6,%xmm3 + xorps %xmm7,%xmm4 + call _aesni_decrypt3 + xorps %xmm5,%xmm2 + xorps %xmm6,%xmm3 + xorps %xmm7,%xmm4 + movups %xmm2,(%edi) + movups %xmm3,16(%edi) + movups %xmm4,32(%edi) + leal 48(%edi),%edi + movdqa %xmm7,%xmm1 + jmp .L065xts_dec_done +.align 16 +.L064xts_dec_four: + movaps %xmm1,%xmm6 + movups (%esi),%xmm2 + movups 16(%esi),%xmm3 + movups 32(%esi),%xmm4 + xorps (%esp),%xmm2 + movups 48(%esi),%xmm5 + leal 64(%esi),%esi + xorps 16(%esp),%xmm3 + xorps %xmm7,%xmm4 + xorps %xmm6,%xmm5 + call _aesni_decrypt4 + xorps (%esp),%xmm2 + xorps 16(%esp),%xmm3 + xorps %xmm7,%xmm4 + movups %xmm2,(%edi) + xorps %xmm6,%xmm5 + movups %xmm3,16(%edi) + movups %xmm4,32(%edi) + movups %xmm5,48(%edi) + leal 64(%edi),%edi + movdqa %xmm6,%xmm1 + jmp .L065xts_dec_done +.align 16 +.L060xts_dec_done6x: + movl 112(%esp),%eax + andl $15,%eax + jz .L067xts_dec_ret + movl %eax,112(%esp) + jmp .L068xts_dec_only_one_more +.align 16 +.L065xts_dec_done: + movl 112(%esp),%eax + pxor %xmm0,%xmm0 + andl $15,%eax + jz .L067xts_dec_ret + pcmpgtd %xmm1,%xmm0 + movl %eax,112(%esp) + pshufd $19,%xmm0,%xmm2 + pxor %xmm0,%xmm0 + movdqa 96(%esp),%xmm3 + paddq %xmm1,%xmm1 + pand %xmm3,%xmm2 + pcmpgtd %xmm1,%xmm0 + pxor %xmm2,%xmm1 +.L068xts_dec_only_one_more: + pshufd $19,%xmm0,%xmm5 + movdqa %xmm1,%xmm6 + paddq %xmm1,%xmm1 + pand %xmm3,%xmm5 + pxor %xmm1,%xmm5 + movl %ebp,%edx + movl %ebx,%ecx + movups (%esi),%xmm2 + xorps %xmm5,%xmm2 + movups (%edx),%xmm0 + movups 16(%edx),%xmm1 + leal 32(%edx),%edx + xorps %xmm0,%xmm2 +.L069dec1_loop_13: +.byte 102,15,56,222,209 + decl %ecx + movups (%edx),%xmm1 + leal 16(%edx),%edx + jnz .L069dec1_loop_13 +.byte 102,15,56,223,209 + xorps %xmm5,%xmm2 + movups %xmm2,(%edi) +.L070xts_dec_steal: + movzbl 16(%esi),%ecx + movzbl (%edi),%edx + leal 1(%esi),%esi + movb %cl,(%edi) + movb %dl,16(%edi) + leal 1(%edi),%edi + subl $1,%eax + jnz .L070xts_dec_steal + subl 112(%esp),%edi + movl %ebp,%edx + movl %ebx,%ecx + movups (%edi),%xmm2 + xorps %xmm6,%xmm2 + movups (%edx),%xmm0 + movups 16(%edx),%xmm1 + leal 32(%edx),%edx + xorps %xmm0,%xmm2 +.L071dec1_loop_14: +.byte 102,15,56,222,209 + decl %ecx + movups (%edx),%xmm1 + leal 16(%edx),%edx + jnz .L071dec1_loop_14 +.byte 102,15,56,223,209 + xorps %xmm6,%xmm2 + movups %xmm2,(%edi) +.L067xts_dec_ret: + movl 116(%esp),%esp + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.size aesni_xts_decrypt,.-.L_aesni_xts_decrypt_begin +.globl aesni_cbc_encrypt +.type aesni_cbc_encrypt,@function +.align 16 +aesni_cbc_encrypt: +.L_aesni_cbc_encrypt_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 20(%esp),%esi + movl %esp,%ebx + movl 24(%esp),%edi + subl $24,%ebx + movl 28(%esp),%eax + andl $-16,%ebx + movl 32(%esp),%edx + movl 36(%esp),%ebp + testl %eax,%eax + jz .L072cbc_abort + cmpl $0,40(%esp) + xchgl %esp,%ebx + movups (%ebp),%xmm7 + movl 240(%edx),%ecx + movl %edx,%ebp + movl %ebx,16(%esp) + movl %ecx,%ebx + je .L073cbc_decrypt + movaps %xmm7,%xmm2 + cmpl $16,%eax + jb .L074cbc_enc_tail + subl $16,%eax + jmp .L075cbc_enc_loop +.align 16 +.L075cbc_enc_loop: + movups (%esi),%xmm7 + leal 16(%esi),%esi + movups (%edx),%xmm0 + movups 16(%edx),%xmm1 + xorps %xmm0,%xmm7 + leal 32(%edx),%edx + xorps %xmm7,%xmm2 +.L076enc1_loop_15: +.byte 102,15,56,220,209 + decl %ecx + movups (%edx),%xmm1 + leal 16(%edx),%edx + jnz .L076enc1_loop_15 +.byte 102,15,56,221,209 + movl %ebx,%ecx + movl %ebp,%edx + movups %xmm2,(%edi) + leal 16(%edi),%edi + subl $16,%eax + jnc .L075cbc_enc_loop + addl $16,%eax + jnz .L074cbc_enc_tail + movaps %xmm2,%xmm7 + jmp .L077cbc_ret +.L074cbc_enc_tail: + movl %eax,%ecx +.long 2767451785 + movl $16,%ecx + subl %eax,%ecx + xorl %eax,%eax +.long 2868115081 + leal -16(%edi),%edi + movl %ebx,%ecx + movl %edi,%esi + movl %ebp,%edx + jmp .L075cbc_enc_loop +.align 16 +.L073cbc_decrypt: + cmpl $80,%eax + jbe .L078cbc_dec_tail + movaps %xmm7,(%esp) + subl $80,%eax + jmp .L079cbc_dec_loop6_enter +.align 16 +.L080cbc_dec_loop6: + movaps %xmm0,(%esp) + movups %xmm7,(%edi) + leal 16(%edi),%edi +.L079cbc_dec_loop6_enter: + movdqu (%esi),%xmm2 + movdqu 16(%esi),%xmm3 + movdqu 32(%esi),%xmm4 + movdqu 48(%esi),%xmm5 + movdqu 64(%esi),%xmm6 + movdqu 80(%esi),%xmm7 + call _aesni_decrypt6 + movups (%esi),%xmm1 + movups 16(%esi),%xmm0 + xorps (%esp),%xmm2 + xorps %xmm1,%xmm3 + movups 32(%esi),%xmm1 + xorps %xmm0,%xmm4 + movups 48(%esi),%xmm0 + xorps %xmm1,%xmm5 + movups 64(%esi),%xmm1 + xorps %xmm0,%xmm6 + movups 80(%esi),%xmm0 + xorps %xmm1,%xmm7 + movups %xmm2,(%edi) + movups %xmm3,16(%edi) + leal 96(%esi),%esi + movups %xmm4,32(%edi) + movl %ebx,%ecx + movups %xmm5,48(%edi) + movl %ebp,%edx + movups %xmm6,64(%edi) + leal 80(%edi),%edi + subl $96,%eax + ja .L080cbc_dec_loop6 + movaps %xmm7,%xmm2 + movaps %xmm0,%xmm7 + addl $80,%eax + jle .L081cbc_dec_tail_collected + movups %xmm2,(%edi) + leal 16(%edi),%edi +.L078cbc_dec_tail: + movups (%esi),%xmm2 + movaps %xmm2,%xmm6 + cmpl $16,%eax + jbe .L082cbc_dec_one + movups 16(%esi),%xmm3 + movaps %xmm3,%xmm5 + cmpl $32,%eax + jbe .L083cbc_dec_two + movups 32(%esi),%xmm4 + cmpl $48,%eax + jbe .L084cbc_dec_three + movups 48(%esi),%xmm5 + cmpl $64,%eax + jbe .L085cbc_dec_four + movups 64(%esi),%xmm6 + movaps %xmm7,(%esp) + movups (%esi),%xmm2 + xorps %xmm7,%xmm7 + call _aesni_decrypt6 + movups (%esi),%xmm1 + movups 16(%esi),%xmm0 + xorps (%esp),%xmm2 + xorps %xmm1,%xmm3 + movups 32(%esi),%xmm1 + xorps %xmm0,%xmm4 + movups 48(%esi),%xmm0 + xorps %xmm1,%xmm5 + movups 64(%esi),%xmm7 + xorps %xmm0,%xmm6 + movups %xmm2,(%edi) + movups %xmm3,16(%edi) + movups %xmm4,32(%edi) + movups %xmm5,48(%edi) + leal 64(%edi),%edi + movaps %xmm6,%xmm2 + subl $80,%eax + jmp .L081cbc_dec_tail_collected +.align 16 +.L082cbc_dec_one: + movups (%edx),%xmm0 + movups 16(%edx),%xmm1 + leal 32(%edx),%edx + xorps %xmm0,%xmm2 +.L086dec1_loop_16: +.byte 102,15,56,222,209 + decl %ecx + movups (%edx),%xmm1 + leal 16(%edx),%edx + jnz .L086dec1_loop_16 +.byte 102,15,56,223,209 + xorps %xmm7,%xmm2 + movaps %xmm6,%xmm7 + subl $16,%eax + jmp .L081cbc_dec_tail_collected +.align 16 +.L083cbc_dec_two: + call _aesni_decrypt2 + xorps %xmm7,%xmm2 + xorps %xmm6,%xmm3 + movups %xmm2,(%edi) + movaps %xmm3,%xmm2 + leal 16(%edi),%edi + movaps %xmm5,%xmm7 + subl $32,%eax + jmp .L081cbc_dec_tail_collected +.align 16 +.L084cbc_dec_three: + call _aesni_decrypt3 + xorps %xmm7,%xmm2 + xorps %xmm6,%xmm3 + xorps %xmm5,%xmm4 + movups %xmm2,(%edi) + movaps %xmm4,%xmm2 + movups %xmm3,16(%edi) + leal 32(%edi),%edi + movups 32(%esi),%xmm7 + subl $48,%eax + jmp .L081cbc_dec_tail_collected +.align 16 +.L085cbc_dec_four: + call _aesni_decrypt4 + movups 16(%esi),%xmm1 + movups 32(%esi),%xmm0 + xorps %xmm7,%xmm2 + movups 48(%esi),%xmm7 + xorps %xmm6,%xmm3 + movups %xmm2,(%edi) + xorps %xmm1,%xmm4 + movups %xmm3,16(%edi) + xorps %xmm0,%xmm5 + movups %xmm4,32(%edi) + leal 48(%edi),%edi + movaps %xmm5,%xmm2 + subl $64,%eax +.L081cbc_dec_tail_collected: + andl $15,%eax + jnz .L087cbc_dec_tail_partial + movups %xmm2,(%edi) + jmp .L077cbc_ret +.align 16 +.L087cbc_dec_tail_partial: + movaps %xmm2,(%esp) + movl $16,%ecx + movl %esp,%esi + subl %eax,%ecx +.long 2767451785 +.L077cbc_ret: + movl 16(%esp),%esp + movl 36(%esp),%ebp + movups %xmm7,(%ebp) +.L072cbc_abort: + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.size aesni_cbc_encrypt,.-.L_aesni_cbc_encrypt_begin +.type _aesni_set_encrypt_key,@function +.align 16 +_aesni_set_encrypt_key: + testl %eax,%eax + jz .L088bad_pointer + testl %edx,%edx + jz .L088bad_pointer + movups (%eax),%xmm0 + xorps %xmm4,%xmm4 + leal 16(%edx),%edx + cmpl $256,%ecx + je .L08914rounds + cmpl $192,%ecx + je .L09012rounds + cmpl $128,%ecx + jne .L091bad_keybits +.align 16 +.L09210rounds: + movl $9,%ecx + movups %xmm0,-16(%edx) +.byte 102,15,58,223,200,1 + call .L093key_128_cold +.byte 102,15,58,223,200,2 + call .L094key_128 +.byte 102,15,58,223,200,4 + call .L094key_128 +.byte 102,15,58,223,200,8 + call .L094key_128 +.byte 102,15,58,223,200,16 + call .L094key_128 +.byte 102,15,58,223,200,32 + call .L094key_128 +.byte 102,15,58,223,200,64 + call .L094key_128 +.byte 102,15,58,223,200,128 + call .L094key_128 +.byte 102,15,58,223,200,27 + call .L094key_128 +.byte 102,15,58,223,200,54 + call .L094key_128 + movups %xmm0,(%edx) + movl %ecx,80(%edx) + xorl %eax,%eax + ret +.align 16 +.L094key_128: + movups %xmm0,(%edx) + leal 16(%edx),%edx +.L093key_128_cold: + shufps $16,%xmm0,%xmm4 + xorps %xmm4,%xmm0 + shufps $140,%xmm0,%xmm4 + xorps %xmm4,%xmm0 + shufps $255,%xmm1,%xmm1 + xorps %xmm1,%xmm0 + ret +.align 16 +.L09012rounds: + movq 16(%eax),%xmm2 + movl $11,%ecx + movups %xmm0,-16(%edx) +.byte 102,15,58,223,202,1 + call .L095key_192a_cold +.byte 102,15,58,223,202,2 + call .L096key_192b +.byte 102,15,58,223,202,4 + call .L097key_192a +.byte 102,15,58,223,202,8 + call .L096key_192b +.byte 102,15,58,223,202,16 + call .L097key_192a +.byte 102,15,58,223,202,32 + call .L096key_192b +.byte 102,15,58,223,202,64 + call .L097key_192a +.byte 102,15,58,223,202,128 + call .L096key_192b + movups %xmm0,(%edx) + movl %ecx,48(%edx) + xorl %eax,%eax + ret +.align 16 +.L097key_192a: + movups %xmm0,(%edx) + leal 16(%edx),%edx +.align 16 +.L095key_192a_cold: + movaps %xmm2,%xmm5 +.L098key_192b_warm: + shufps $16,%xmm0,%xmm4 + movdqa %xmm2,%xmm3 + xorps %xmm4,%xmm0 + shufps $140,%xmm0,%xmm4 + pslldq $4,%xmm3 + xorps %xmm4,%xmm0 + pshufd $85,%xmm1,%xmm1 + pxor %xmm3,%xmm2 + pxor %xmm1,%xmm0 + pshufd $255,%xmm0,%xmm3 + pxor %xmm3,%xmm2 + ret +.align 16 +.L096key_192b: + movaps %xmm0,%xmm3 + shufps $68,%xmm0,%xmm5 + movups %xmm5,(%edx) + shufps $78,%xmm2,%xmm3 + movups %xmm3,16(%edx) + leal 32(%edx),%edx + jmp .L098key_192b_warm +.align 16 +.L08914rounds: + movups 16(%eax),%xmm2 + movl $13,%ecx + leal 16(%edx),%edx + movups %xmm0,-32(%edx) + movups %xmm2,-16(%edx) +.byte 102,15,58,223,202,1 + call .L099key_256a_cold +.byte 102,15,58,223,200,1 + call .L100key_256b +.byte 102,15,58,223,202,2 + call .L101key_256a +.byte 102,15,58,223,200,2 + call .L100key_256b +.byte 102,15,58,223,202,4 + call .L101key_256a +.byte 102,15,58,223,200,4 + call .L100key_256b +.byte 102,15,58,223,202,8 + call .L101key_256a +.byte 102,15,58,223,200,8 + call .L100key_256b +.byte 102,15,58,223,202,16 + call .L101key_256a +.byte 102,15,58,223,200,16 + call .L100key_256b +.byte 102,15,58,223,202,32 + call .L101key_256a +.byte 102,15,58,223,200,32 + call .L100key_256b +.byte 102,15,58,223,202,64 + call .L101key_256a + movups %xmm0,(%edx) + movl %ecx,16(%edx) + xorl %eax,%eax + ret +.align 16 +.L101key_256a: + movups %xmm2,(%edx) + leal 16(%edx),%edx +.L099key_256a_cold: + shufps $16,%xmm0,%xmm4 + xorps %xmm4,%xmm0 + shufps $140,%xmm0,%xmm4 + xorps %xmm4,%xmm0 + shufps $255,%xmm1,%xmm1 + xorps %xmm1,%xmm0 + ret +.align 16 +.L100key_256b: + movups %xmm0,(%edx) + leal 16(%edx),%edx + shufps $16,%xmm2,%xmm4 + xorps %xmm4,%xmm2 + shufps $140,%xmm2,%xmm4 + xorps %xmm4,%xmm2 + shufps $170,%xmm1,%xmm1 + xorps %xmm1,%xmm2 + ret +.align 4 +.L088bad_pointer: + movl $-1,%eax + ret +.align 4 +.L091bad_keybits: + movl $-2,%eax + ret +.size _aesni_set_encrypt_key,.-_aesni_set_encrypt_key +.globl aesni_set_encrypt_key +.type aesni_set_encrypt_key,@function +.align 16 +aesni_set_encrypt_key: +.L_aesni_set_encrypt_key_begin: + movl 4(%esp),%eax + movl 8(%esp),%ecx + movl 12(%esp),%edx + call _aesni_set_encrypt_key + ret +.size aesni_set_encrypt_key,.-.L_aesni_set_encrypt_key_begin +.globl aesni_set_decrypt_key +.type aesni_set_decrypt_key,@function +.align 16 +aesni_set_decrypt_key: +.L_aesni_set_decrypt_key_begin: + movl 4(%esp),%eax + movl 8(%esp),%ecx + movl 12(%esp),%edx + call _aesni_set_encrypt_key + movl 12(%esp),%edx + shll $4,%ecx + testl %eax,%eax + jnz .L102dec_key_ret + leal 16(%edx,%ecx,1),%eax + movups (%edx),%xmm0 + movups (%eax),%xmm1 + movups %xmm0,(%eax) + movups %xmm1,(%edx) + leal 16(%edx),%edx + leal -16(%eax),%eax +.L103dec_key_inverse: + movups (%edx),%xmm0 + movups (%eax),%xmm1 +.byte 102,15,56,219,192 +.byte 102,15,56,219,201 + leal 16(%edx),%edx + leal -16(%eax),%eax + movups %xmm0,16(%eax) + movups %xmm1,-16(%edx) + cmpl %edx,%eax + ja .L103dec_key_inverse + movups (%edx),%xmm0 +.byte 102,15,56,219,192 + movups %xmm0,(%edx) + xorl %eax,%eax +.L102dec_key_ret: + ret +.size aesni_set_decrypt_key,.-.L_aesni_set_decrypt_key_begin +.byte 65,69,83,32,102,111,114,32,73,110,116,101,108,32,65,69 +.byte 83,45,78,73,44,32,67,82,89,80,84,79,71,65,77,83 +.byte 32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115 +.byte 115,108,46,111,114,103,62,0 diff --git a/deps/openssl/asm_obsolete/x86-elf-gas/aes/vpaes-x86.s b/deps/openssl/asm_obsolete/x86-elf-gas/aes/vpaes-x86.s new file mode 100644 index 00000000000000..5dc8d19633a8ae --- /dev/null +++ b/deps/openssl/asm_obsolete/x86-elf-gas/aes/vpaes-x86.s @@ -0,0 +1,661 @@ +.file "vpaes-x86.s" +.text +.align 64 +.L_vpaes_consts: +.long 218628480,235210255,168496130,67568393 +.long 252381056,17041926,33884169,51187212 +.long 252645135,252645135,252645135,252645135 +.long 1512730624,3266504856,1377990664,3401244816 +.long 830229760,1275146365,2969422977,3447763452 +.long 3411033600,2979783055,338359620,2782886510 +.long 4209124096,907596821,221174255,1006095553 +.long 191964160,3799684038,3164090317,1589111125 +.long 182528256,1777043520,2877432650,3265356744 +.long 1874708224,3503451415,3305285752,363511674 +.long 1606117888,3487855781,1093350906,2384367825 +.long 197121,67569157,134941193,202313229 +.long 67569157,134941193,202313229,197121 +.long 134941193,202313229,197121,67569157 +.long 202313229,197121,67569157,134941193 +.long 33619971,100992007,168364043,235736079 +.long 235736079,33619971,100992007,168364043 +.long 168364043,235736079,33619971,100992007 +.long 100992007,168364043,235736079,33619971 +.long 50462976,117835012,185207048,252579084 +.long 252314880,51251460,117574920,184942860 +.long 184682752,252054788,50987272,118359308 +.long 118099200,185467140,251790600,50727180 +.long 2946363062,528716217,1300004225,1881839624 +.long 1532713819,1532713819,1532713819,1532713819 +.long 3602276352,4288629033,3737020424,4153884961 +.long 1354558464,32357713,2958822624,3775749553 +.long 1201988352,132424512,1572796698,503232858 +.long 2213177600,1597421020,4103937655,675398315 +.long 2749646592,4273543773,1511898873,121693092 +.long 3040248576,1103263732,2871565598,1608280554 +.long 2236667136,2588920351,482954393,64377734 +.long 3069987328,291237287,2117370568,3650299247 +.long 533321216,3573750986,2572112006,1401264716 +.long 1339849704,2721158661,548607111,3445553514 +.long 2128193280,3054596040,2183486460,1257083700 +.long 655635200,1165381986,3923443150,2344132524 +.long 190078720,256924420,290342170,357187870 +.long 1610966272,2263057382,4103205268,309794674 +.long 2592527872,2233205587,1335446729,3402964816 +.long 3973531904,3225098121,3002836325,1918774430 +.long 3870401024,2102906079,2284471353,4117666579 +.long 617007872,1021508343,366931923,691083277 +.long 2528395776,3491914898,2968704004,1613121270 +.long 3445188352,3247741094,844474987,4093578302 +.long 651481088,1190302358,1689581232,574775300 +.long 4289380608,206939853,2555985458,2489840491 +.long 2130264064,327674451,3566485037,3349835193 +.long 2470714624,316102159,3636825756,3393945945 +.byte 86,101,99,116,111,114,32,80,101,114,109,117,116,97,116,105 +.byte 111,110,32,65,69,83,32,102,111,114,32,120,56,54,47,83 +.byte 83,83,69,51,44,32,77,105,107,101,32,72,97,109,98,117 +.byte 114,103,32,40,83,116,97,110,102,111,114,100,32,85,110,105 +.byte 118,101,114,115,105,116,121,41,0 +.align 64 +.type _vpaes_preheat,@function +.align 16 +_vpaes_preheat: + addl (%esp),%ebp + movdqa -48(%ebp),%xmm7 + movdqa -16(%ebp),%xmm6 + ret +.size _vpaes_preheat,.-_vpaes_preheat +.type _vpaes_encrypt_core,@function +.align 16 +_vpaes_encrypt_core: + movl $16,%ecx + movl 240(%edx),%eax + movdqa %xmm6,%xmm1 + movdqa (%ebp),%xmm2 + pandn %xmm0,%xmm1 + pand %xmm6,%xmm0 + movdqu (%edx),%xmm5 +.byte 102,15,56,0,208 + movdqa 16(%ebp),%xmm0 + pxor %xmm5,%xmm2 + psrld $4,%xmm1 + addl $16,%edx +.byte 102,15,56,0,193 + leal 192(%ebp),%ebx + pxor %xmm2,%xmm0 + jmp .L000enc_entry +.align 16 +.L001enc_loop: + movdqa 32(%ebp),%xmm4 + movdqa 48(%ebp),%xmm0 +.byte 102,15,56,0,226 +.byte 102,15,56,0,195 + pxor %xmm5,%xmm4 + movdqa 64(%ebp),%xmm5 + pxor %xmm4,%xmm0 + movdqa -64(%ebx,%ecx,1),%xmm1 +.byte 102,15,56,0,234 + movdqa 80(%ebp),%xmm2 + movdqa (%ebx,%ecx,1),%xmm4 +.byte 102,15,56,0,211 + movdqa %xmm0,%xmm3 + pxor %xmm5,%xmm2 +.byte 102,15,56,0,193 + addl $16,%edx + pxor %xmm2,%xmm0 +.byte 102,15,56,0,220 + addl $16,%ecx + pxor %xmm0,%xmm3 +.byte 102,15,56,0,193 + andl $48,%ecx + subl $1,%eax + pxor %xmm3,%xmm0 +.L000enc_entry: + movdqa %xmm6,%xmm1 + movdqa -32(%ebp),%xmm5 + pandn %xmm0,%xmm1 + psrld $4,%xmm1 + pand %xmm6,%xmm0 +.byte 102,15,56,0,232 + movdqa %xmm7,%xmm3 + pxor %xmm1,%xmm0 +.byte 102,15,56,0,217 + movdqa %xmm7,%xmm4 + pxor %xmm5,%xmm3 +.byte 102,15,56,0,224 + movdqa %xmm7,%xmm2 + pxor %xmm5,%xmm4 +.byte 102,15,56,0,211 + movdqa %xmm7,%xmm3 + pxor %xmm0,%xmm2 +.byte 102,15,56,0,220 + movdqu (%edx),%xmm5 + pxor %xmm1,%xmm3 + jnz .L001enc_loop + movdqa 96(%ebp),%xmm4 + movdqa 112(%ebp),%xmm0 +.byte 102,15,56,0,226 + pxor %xmm5,%xmm4 +.byte 102,15,56,0,195 + movdqa 64(%ebx,%ecx,1),%xmm1 + pxor %xmm4,%xmm0 +.byte 102,15,56,0,193 + ret +.size _vpaes_encrypt_core,.-_vpaes_encrypt_core +.type _vpaes_decrypt_core,@function +.align 16 +_vpaes_decrypt_core: + leal 608(%ebp),%ebx + movl 240(%edx),%eax + movdqa %xmm6,%xmm1 + movdqa -64(%ebx),%xmm2 + pandn %xmm0,%xmm1 + movl %eax,%ecx + psrld $4,%xmm1 + movdqu (%edx),%xmm5 + shll $4,%ecx + pand %xmm6,%xmm0 +.byte 102,15,56,0,208 + movdqa -48(%ebx),%xmm0 + xorl $48,%ecx +.byte 102,15,56,0,193 + andl $48,%ecx + pxor %xmm5,%xmm2 + movdqa 176(%ebp),%xmm5 + pxor %xmm2,%xmm0 + addl $16,%edx + leal -352(%ebx,%ecx,1),%ecx + jmp .L002dec_entry +.align 16 +.L003dec_loop: + movdqa -32(%ebx),%xmm4 + movdqa -16(%ebx),%xmm1 +.byte 102,15,56,0,226 +.byte 102,15,56,0,203 + pxor %xmm4,%xmm0 + movdqa (%ebx),%xmm4 + pxor %xmm1,%xmm0 + movdqa 16(%ebx),%xmm1 +.byte 102,15,56,0,226 +.byte 102,15,56,0,197 +.byte 102,15,56,0,203 + pxor %xmm4,%xmm0 + movdqa 32(%ebx),%xmm4 + pxor %xmm1,%xmm0 + movdqa 48(%ebx),%xmm1 +.byte 102,15,56,0,226 +.byte 102,15,56,0,197 +.byte 102,15,56,0,203 + pxor %xmm4,%xmm0 + movdqa 64(%ebx),%xmm4 + pxor %xmm1,%xmm0 + movdqa 80(%ebx),%xmm1 +.byte 102,15,56,0,226 +.byte 102,15,56,0,197 +.byte 102,15,56,0,203 + pxor %xmm4,%xmm0 + addl $16,%edx +.byte 102,15,58,15,237,12 + pxor %xmm1,%xmm0 + subl $1,%eax +.L002dec_entry: + movdqa %xmm6,%xmm1 + movdqa -32(%ebp),%xmm2 + pandn %xmm0,%xmm1 + pand %xmm6,%xmm0 + psrld $4,%xmm1 +.byte 102,15,56,0,208 + movdqa %xmm7,%xmm3 + pxor %xmm1,%xmm0 +.byte 102,15,56,0,217 + movdqa %xmm7,%xmm4 + pxor %xmm2,%xmm3 +.byte 102,15,56,0,224 + pxor %xmm2,%xmm4 + movdqa %xmm7,%xmm2 +.byte 102,15,56,0,211 + movdqa %xmm7,%xmm3 + pxor %xmm0,%xmm2 +.byte 102,15,56,0,220 + movdqu (%edx),%xmm0 + pxor %xmm1,%xmm3 + jnz .L003dec_loop + movdqa 96(%ebx),%xmm4 +.byte 102,15,56,0,226 + pxor %xmm0,%xmm4 + movdqa 112(%ebx),%xmm0 + movdqa (%ecx),%xmm2 +.byte 102,15,56,0,195 + pxor %xmm4,%xmm0 +.byte 102,15,56,0,194 + ret +.size _vpaes_decrypt_core,.-_vpaes_decrypt_core +.type _vpaes_schedule_core,@function +.align 16 +_vpaes_schedule_core: + addl (%esp),%ebp + movdqu (%esi),%xmm0 + movdqa 320(%ebp),%xmm2 + movdqa %xmm0,%xmm3 + leal (%ebp),%ebx + movdqa %xmm2,4(%esp) + call _vpaes_schedule_transform + movdqa %xmm0,%xmm7 + testl %edi,%edi + jnz .L004schedule_am_decrypting + movdqu %xmm0,(%edx) + jmp .L005schedule_go +.L004schedule_am_decrypting: + movdqa 256(%ebp,%ecx,1),%xmm1 +.byte 102,15,56,0,217 + movdqu %xmm3,(%edx) + xorl $48,%ecx +.L005schedule_go: + cmpl $192,%eax + ja .L006schedule_256 + je .L007schedule_192 +.L008schedule_128: + movl $10,%eax +.L009loop_schedule_128: + call _vpaes_schedule_round + decl %eax + jz .L010schedule_mangle_last + call _vpaes_schedule_mangle + jmp .L009loop_schedule_128 +.align 16 +.L007schedule_192: + movdqu 8(%esi),%xmm0 + call _vpaes_schedule_transform + movdqa %xmm0,%xmm6 + pxor %xmm4,%xmm4 + movhlps %xmm4,%xmm6 + movl $4,%eax +.L011loop_schedule_192: + call _vpaes_schedule_round +.byte 102,15,58,15,198,8 + call _vpaes_schedule_mangle + call _vpaes_schedule_192_smear + call _vpaes_schedule_mangle + call _vpaes_schedule_round + decl %eax + jz .L010schedule_mangle_last + call _vpaes_schedule_mangle + call _vpaes_schedule_192_smear + jmp .L011loop_schedule_192 +.align 16 +.L006schedule_256: + movdqu 16(%esi),%xmm0 + call _vpaes_schedule_transform + movl $7,%eax +.L012loop_schedule_256: + call _vpaes_schedule_mangle + movdqa %xmm0,%xmm6 + call _vpaes_schedule_round + decl %eax + jz .L010schedule_mangle_last + call _vpaes_schedule_mangle + pshufd $255,%xmm0,%xmm0 + movdqa %xmm7,20(%esp) + movdqa %xmm6,%xmm7 + call .L_vpaes_schedule_low_round + movdqa 20(%esp),%xmm7 + jmp .L012loop_schedule_256 +.align 16 +.L010schedule_mangle_last: + leal 384(%ebp),%ebx + testl %edi,%edi + jnz .L013schedule_mangle_last_dec + movdqa 256(%ebp,%ecx,1),%xmm1 +.byte 102,15,56,0,193 + leal 352(%ebp),%ebx + addl $32,%edx +.L013schedule_mangle_last_dec: + addl $-16,%edx + pxor 336(%ebp),%xmm0 + call _vpaes_schedule_transform + movdqu %xmm0,(%edx) + pxor %xmm0,%xmm0 + pxor %xmm1,%xmm1 + pxor %xmm2,%xmm2 + pxor %xmm3,%xmm3 + pxor %xmm4,%xmm4 + pxor %xmm5,%xmm5 + pxor %xmm6,%xmm6 + pxor %xmm7,%xmm7 + ret +.size _vpaes_schedule_core,.-_vpaes_schedule_core +.type _vpaes_schedule_192_smear,@function +.align 16 +_vpaes_schedule_192_smear: + pshufd $128,%xmm6,%xmm1 + pshufd $254,%xmm7,%xmm0 + pxor %xmm1,%xmm6 + pxor %xmm1,%xmm1 + pxor %xmm0,%xmm6 + movdqa %xmm6,%xmm0 + movhlps %xmm1,%xmm6 + ret +.size _vpaes_schedule_192_smear,.-_vpaes_schedule_192_smear +.type _vpaes_schedule_round,@function +.align 16 +_vpaes_schedule_round: + movdqa 8(%esp),%xmm2 + pxor %xmm1,%xmm1 +.byte 102,15,58,15,202,15 +.byte 102,15,58,15,210,15 + pxor %xmm1,%xmm7 + pshufd $255,%xmm0,%xmm0 +.byte 102,15,58,15,192,1 + movdqa %xmm2,8(%esp) +.L_vpaes_schedule_low_round: + movdqa %xmm7,%xmm1 + pslldq $4,%xmm7 + pxor %xmm1,%xmm7 + movdqa %xmm7,%xmm1 + pslldq $8,%xmm7 + pxor %xmm1,%xmm7 + pxor 336(%ebp),%xmm7 + movdqa -16(%ebp),%xmm4 + movdqa -48(%ebp),%xmm5 + movdqa %xmm4,%xmm1 + pandn %xmm0,%xmm1 + psrld $4,%xmm1 + pand %xmm4,%xmm0 + movdqa -32(%ebp),%xmm2 +.byte 102,15,56,0,208 + pxor %xmm1,%xmm0 + movdqa %xmm5,%xmm3 +.byte 102,15,56,0,217 + pxor %xmm2,%xmm3 + movdqa %xmm5,%xmm4 +.byte 102,15,56,0,224 + pxor %xmm2,%xmm4 + movdqa %xmm5,%xmm2 +.byte 102,15,56,0,211 + pxor %xmm0,%xmm2 + movdqa %xmm5,%xmm3 +.byte 102,15,56,0,220 + pxor %xmm1,%xmm3 + movdqa 32(%ebp),%xmm4 +.byte 102,15,56,0,226 + movdqa 48(%ebp),%xmm0 +.byte 102,15,56,0,195 + pxor %xmm4,%xmm0 + pxor %xmm7,%xmm0 + movdqa %xmm0,%xmm7 + ret +.size _vpaes_schedule_round,.-_vpaes_schedule_round +.type _vpaes_schedule_transform,@function +.align 16 +_vpaes_schedule_transform: + movdqa -16(%ebp),%xmm2 + movdqa %xmm2,%xmm1 + pandn %xmm0,%xmm1 + psrld $4,%xmm1 + pand %xmm2,%xmm0 + movdqa (%ebx),%xmm2 +.byte 102,15,56,0,208 + movdqa 16(%ebx),%xmm0 +.byte 102,15,56,0,193 + pxor %xmm2,%xmm0 + ret +.size _vpaes_schedule_transform,.-_vpaes_schedule_transform +.type _vpaes_schedule_mangle,@function +.align 16 +_vpaes_schedule_mangle: + movdqa %xmm0,%xmm4 + movdqa 128(%ebp),%xmm5 + testl %edi,%edi + jnz .L014schedule_mangle_dec + addl $16,%edx + pxor 336(%ebp),%xmm4 +.byte 102,15,56,0,229 + movdqa %xmm4,%xmm3 +.byte 102,15,56,0,229 + pxor %xmm4,%xmm3 +.byte 102,15,56,0,229 + pxor %xmm4,%xmm3 + jmp .L015schedule_mangle_both +.align 16 +.L014schedule_mangle_dec: + movdqa -16(%ebp),%xmm2 + leal 416(%ebp),%esi + movdqa %xmm2,%xmm1 + pandn %xmm4,%xmm1 + psrld $4,%xmm1 + pand %xmm2,%xmm4 + movdqa (%esi),%xmm2 +.byte 102,15,56,0,212 + movdqa 16(%esi),%xmm3 +.byte 102,15,56,0,217 + pxor %xmm2,%xmm3 +.byte 102,15,56,0,221 + movdqa 32(%esi),%xmm2 +.byte 102,15,56,0,212 + pxor %xmm3,%xmm2 + movdqa 48(%esi),%xmm3 +.byte 102,15,56,0,217 + pxor %xmm2,%xmm3 +.byte 102,15,56,0,221 + movdqa 64(%esi),%xmm2 +.byte 102,15,56,0,212 + pxor %xmm3,%xmm2 + movdqa 80(%esi),%xmm3 +.byte 102,15,56,0,217 + pxor %xmm2,%xmm3 +.byte 102,15,56,0,221 + movdqa 96(%esi),%xmm2 +.byte 102,15,56,0,212 + pxor %xmm3,%xmm2 + movdqa 112(%esi),%xmm3 +.byte 102,15,56,0,217 + pxor %xmm2,%xmm3 + addl $-16,%edx +.L015schedule_mangle_both: + movdqa 256(%ebp,%ecx,1),%xmm1 +.byte 102,15,56,0,217 + addl $-16,%ecx + andl $48,%ecx + movdqu %xmm3,(%edx) + ret +.size _vpaes_schedule_mangle,.-_vpaes_schedule_mangle +.globl vpaes_set_encrypt_key +.type vpaes_set_encrypt_key,@function +.align 16 +vpaes_set_encrypt_key: +.L_vpaes_set_encrypt_key_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 20(%esp),%esi + leal -56(%esp),%ebx + movl 24(%esp),%eax + andl $-16,%ebx + movl 28(%esp),%edx + xchgl %esp,%ebx + movl %ebx,48(%esp) + movl %eax,%ebx + shrl $5,%ebx + addl $5,%ebx + movl %ebx,240(%edx) + movl $48,%ecx + movl $0,%edi + leal .L_vpaes_consts+0x30-.L016pic_point,%ebp + call _vpaes_schedule_core +.L016pic_point: + movl 48(%esp),%esp + xorl %eax,%eax + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.size vpaes_set_encrypt_key,.-.L_vpaes_set_encrypt_key_begin +.globl vpaes_set_decrypt_key +.type vpaes_set_decrypt_key,@function +.align 16 +vpaes_set_decrypt_key: +.L_vpaes_set_decrypt_key_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 20(%esp),%esi + leal -56(%esp),%ebx + movl 24(%esp),%eax + andl $-16,%ebx + movl 28(%esp),%edx + xchgl %esp,%ebx + movl %ebx,48(%esp) + movl %eax,%ebx + shrl $5,%ebx + addl $5,%ebx + movl %ebx,240(%edx) + shll $4,%ebx + leal 16(%edx,%ebx,1),%edx + movl $1,%edi + movl %eax,%ecx + shrl $1,%ecx + andl $32,%ecx + xorl $32,%ecx + leal .L_vpaes_consts+0x30-.L017pic_point,%ebp + call _vpaes_schedule_core +.L017pic_point: + movl 48(%esp),%esp + xorl %eax,%eax + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.size vpaes_set_decrypt_key,.-.L_vpaes_set_decrypt_key_begin +.globl vpaes_encrypt +.type vpaes_encrypt,@function +.align 16 +vpaes_encrypt: +.L_vpaes_encrypt_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + leal .L_vpaes_consts+0x30-.L018pic_point,%ebp + call _vpaes_preheat +.L018pic_point: + movl 20(%esp),%esi + leal -56(%esp),%ebx + movl 24(%esp),%edi + andl $-16,%ebx + movl 28(%esp),%edx + xchgl %esp,%ebx + movl %ebx,48(%esp) + movdqu (%esi),%xmm0 + call _vpaes_encrypt_core + movdqu %xmm0,(%edi) + movl 48(%esp),%esp + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.size vpaes_encrypt,.-.L_vpaes_encrypt_begin +.globl vpaes_decrypt +.type vpaes_decrypt,@function +.align 16 +vpaes_decrypt: +.L_vpaes_decrypt_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + leal .L_vpaes_consts+0x30-.L019pic_point,%ebp + call _vpaes_preheat +.L019pic_point: + movl 20(%esp),%esi + leal -56(%esp),%ebx + movl 24(%esp),%edi + andl $-16,%ebx + movl 28(%esp),%edx + xchgl %esp,%ebx + movl %ebx,48(%esp) + movdqu (%esi),%xmm0 + call _vpaes_decrypt_core + movdqu %xmm0,(%edi) + movl 48(%esp),%esp + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.size vpaes_decrypt,.-.L_vpaes_decrypt_begin +.globl vpaes_cbc_encrypt +.type vpaes_cbc_encrypt,@function +.align 16 +vpaes_cbc_encrypt: +.L_vpaes_cbc_encrypt_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 20(%esp),%esi + movl 24(%esp),%edi + movl 28(%esp),%eax + movl 32(%esp),%edx + subl $16,%eax + jc .L020cbc_abort + leal -56(%esp),%ebx + movl 36(%esp),%ebp + andl $-16,%ebx + movl 40(%esp),%ecx + xchgl %esp,%ebx + movdqu (%ebp),%xmm1 + subl %esi,%edi + movl %ebx,48(%esp) + movl %edi,(%esp) + movl %edx,4(%esp) + movl %ebp,8(%esp) + movl %eax,%edi + leal .L_vpaes_consts+0x30-.L021pic_point,%ebp + call _vpaes_preheat +.L021pic_point: + cmpl $0,%ecx + je .L022cbc_dec_loop + jmp .L023cbc_enc_loop +.align 16 +.L023cbc_enc_loop: + movdqu (%esi),%xmm0 + pxor %xmm1,%xmm0 + call _vpaes_encrypt_core + movl (%esp),%ebx + movl 4(%esp),%edx + movdqa %xmm0,%xmm1 + movdqu %xmm0,(%ebx,%esi,1) + leal 16(%esi),%esi + subl $16,%edi + jnc .L023cbc_enc_loop + jmp .L024cbc_done +.align 16 +.L022cbc_dec_loop: + movdqu (%esi),%xmm0 + movdqa %xmm1,16(%esp) + movdqa %xmm0,32(%esp) + call _vpaes_decrypt_core + movl (%esp),%ebx + movl 4(%esp),%edx + pxor 16(%esp),%xmm0 + movdqa 32(%esp),%xmm1 + movdqu %xmm0,(%ebx,%esi,1) + leal 16(%esi),%esi + subl $16,%edi + jnc .L022cbc_dec_loop +.L024cbc_done: + movl 8(%esp),%ebx + movl 48(%esp),%esp + movdqu %xmm1,(%ebx) +.L020cbc_abort: + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.size vpaes_cbc_encrypt,.-.L_vpaes_cbc_encrypt_begin diff --git a/deps/openssl/asm_obsolete/x86-elf-gas/bf/bf-586.s b/deps/openssl/asm_obsolete/x86-elf-gas/bf/bf-586.s new file mode 100644 index 00000000000000..aa718d40cd5c2a --- /dev/null +++ b/deps/openssl/asm_obsolete/x86-elf-gas/bf/bf-586.s @@ -0,0 +1,896 @@ +.file "bf-586.s" +.text +.globl BF_encrypt +.type BF_encrypt,@function +.align 16 +BF_encrypt: +.L_BF_encrypt_begin: + + pushl %ebp + pushl %ebx + movl 12(%esp),%ebx + movl 16(%esp),%ebp + pushl %esi + pushl %edi + + movl (%ebx),%edi + movl 4(%ebx),%esi + xorl %eax,%eax + movl (%ebp),%ebx + xorl %ecx,%ecx + xorl %ebx,%edi + + + movl 4(%ebp),%edx + movl %edi,%ebx + xorl %edx,%esi + shrl $16,%ebx + movl %edi,%edx + movb %bh,%al + andl $255,%ebx + movb %dh,%cl + andl $255,%edx + movl 72(%ebp,%eax,4),%eax + movl 1096(%ebp,%ebx,4),%ebx + addl %eax,%ebx + movl 2120(%ebp,%ecx,4),%eax + xorl %eax,%ebx + movl 3144(%ebp,%edx,4),%edx + addl %edx,%ebx + xorl %eax,%eax + xorl %ebx,%esi + + + movl 8(%ebp),%edx + movl %esi,%ebx + xorl %edx,%edi + shrl $16,%ebx + movl %esi,%edx + movb %bh,%al + andl $255,%ebx + movb %dh,%cl + andl $255,%edx + movl 72(%ebp,%eax,4),%eax + movl 1096(%ebp,%ebx,4),%ebx + addl %eax,%ebx + movl 2120(%ebp,%ecx,4),%eax + xorl %eax,%ebx + movl 3144(%ebp,%edx,4),%edx + addl %edx,%ebx + xorl %eax,%eax + xorl %ebx,%edi + + + movl 12(%ebp),%edx + movl %edi,%ebx + xorl %edx,%esi + shrl $16,%ebx + movl %edi,%edx + movb %bh,%al + andl $255,%ebx + movb %dh,%cl + andl $255,%edx + movl 72(%ebp,%eax,4),%eax + movl 1096(%ebp,%ebx,4),%ebx + addl %eax,%ebx + movl 2120(%ebp,%ecx,4),%eax + xorl %eax,%ebx + movl 3144(%ebp,%edx,4),%edx + addl %edx,%ebx + xorl %eax,%eax + xorl %ebx,%esi + + + movl 16(%ebp),%edx + movl %esi,%ebx + xorl %edx,%edi + shrl $16,%ebx + movl %esi,%edx + movb %bh,%al + andl $255,%ebx + movb %dh,%cl + andl $255,%edx + movl 72(%ebp,%eax,4),%eax + movl 1096(%ebp,%ebx,4),%ebx + addl %eax,%ebx + movl 2120(%ebp,%ecx,4),%eax + xorl %eax,%ebx + movl 3144(%ebp,%edx,4),%edx + addl %edx,%ebx + xorl %eax,%eax + xorl %ebx,%edi + + + movl 20(%ebp),%edx + movl %edi,%ebx + xorl %edx,%esi + shrl $16,%ebx + movl %edi,%edx + movb %bh,%al + andl $255,%ebx + movb %dh,%cl + andl $255,%edx + movl 72(%ebp,%eax,4),%eax + movl 1096(%ebp,%ebx,4),%ebx + addl %eax,%ebx + movl 2120(%ebp,%ecx,4),%eax + xorl %eax,%ebx + movl 3144(%ebp,%edx,4),%edx + addl %edx,%ebx + xorl %eax,%eax + xorl %ebx,%esi + + + movl 24(%ebp),%edx + movl %esi,%ebx + xorl %edx,%edi + shrl $16,%ebx + movl %esi,%edx + movb %bh,%al + andl $255,%ebx + movb %dh,%cl + andl $255,%edx + movl 72(%ebp,%eax,4),%eax + movl 1096(%ebp,%ebx,4),%ebx + addl %eax,%ebx + movl 2120(%ebp,%ecx,4),%eax + xorl %eax,%ebx + movl 3144(%ebp,%edx,4),%edx + addl %edx,%ebx + xorl %eax,%eax + xorl %ebx,%edi + + + movl 28(%ebp),%edx + movl %edi,%ebx + xorl %edx,%esi + shrl $16,%ebx + movl %edi,%edx + movb %bh,%al + andl $255,%ebx + movb %dh,%cl + andl $255,%edx + movl 72(%ebp,%eax,4),%eax + movl 1096(%ebp,%ebx,4),%ebx + addl %eax,%ebx + movl 2120(%ebp,%ecx,4),%eax + xorl %eax,%ebx + movl 3144(%ebp,%edx,4),%edx + addl %edx,%ebx + xorl %eax,%eax + xorl %ebx,%esi + + + movl 32(%ebp),%edx + movl %esi,%ebx + xorl %edx,%edi + shrl $16,%ebx + movl %esi,%edx + movb %bh,%al + andl $255,%ebx + movb %dh,%cl + andl $255,%edx + movl 72(%ebp,%eax,4),%eax + movl 1096(%ebp,%ebx,4),%ebx + addl %eax,%ebx + movl 2120(%ebp,%ecx,4),%eax + xorl %eax,%ebx + movl 3144(%ebp,%edx,4),%edx + addl %edx,%ebx + xorl %eax,%eax + xorl %ebx,%edi + + + movl 36(%ebp),%edx + movl %edi,%ebx + xorl %edx,%esi + shrl $16,%ebx + movl %edi,%edx + movb %bh,%al + andl $255,%ebx + movb %dh,%cl + andl $255,%edx + movl 72(%ebp,%eax,4),%eax + movl 1096(%ebp,%ebx,4),%ebx + addl %eax,%ebx + movl 2120(%ebp,%ecx,4),%eax + xorl %eax,%ebx + movl 3144(%ebp,%edx,4),%edx + addl %edx,%ebx + xorl %eax,%eax + xorl %ebx,%esi + + + movl 40(%ebp),%edx + movl %esi,%ebx + xorl %edx,%edi + shrl $16,%ebx + movl %esi,%edx + movb %bh,%al + andl $255,%ebx + movb %dh,%cl + andl $255,%edx + movl 72(%ebp,%eax,4),%eax + movl 1096(%ebp,%ebx,4),%ebx + addl %eax,%ebx + movl 2120(%ebp,%ecx,4),%eax + xorl %eax,%ebx + movl 3144(%ebp,%edx,4),%edx + addl %edx,%ebx + xorl %eax,%eax + xorl %ebx,%edi + + + movl 44(%ebp),%edx + movl %edi,%ebx + xorl %edx,%esi + shrl $16,%ebx + movl %edi,%edx + movb %bh,%al + andl $255,%ebx + movb %dh,%cl + andl $255,%edx + movl 72(%ebp,%eax,4),%eax + movl 1096(%ebp,%ebx,4),%ebx + addl %eax,%ebx + movl 2120(%ebp,%ecx,4),%eax + xorl %eax,%ebx + movl 3144(%ebp,%edx,4),%edx + addl %edx,%ebx + xorl %eax,%eax + xorl %ebx,%esi + + + movl 48(%ebp),%edx + movl %esi,%ebx + xorl %edx,%edi + shrl $16,%ebx + movl %esi,%edx + movb %bh,%al + andl $255,%ebx + movb %dh,%cl + andl $255,%edx + movl 72(%ebp,%eax,4),%eax + movl 1096(%ebp,%ebx,4),%ebx + addl %eax,%ebx + movl 2120(%ebp,%ecx,4),%eax + xorl %eax,%ebx + movl 3144(%ebp,%edx,4),%edx + addl %edx,%ebx + xorl %eax,%eax + xorl %ebx,%edi + + + movl 52(%ebp),%edx + movl %edi,%ebx + xorl %edx,%esi + shrl $16,%ebx + movl %edi,%edx + movb %bh,%al + andl $255,%ebx + movb %dh,%cl + andl $255,%edx + movl 72(%ebp,%eax,4),%eax + movl 1096(%ebp,%ebx,4),%ebx + addl %eax,%ebx + movl 2120(%ebp,%ecx,4),%eax + xorl %eax,%ebx + movl 3144(%ebp,%edx,4),%edx + addl %edx,%ebx + xorl %eax,%eax + xorl %ebx,%esi + + + movl 56(%ebp),%edx + movl %esi,%ebx + xorl %edx,%edi + shrl $16,%ebx + movl %esi,%edx + movb %bh,%al + andl $255,%ebx + movb %dh,%cl + andl $255,%edx + movl 72(%ebp,%eax,4),%eax + movl 1096(%ebp,%ebx,4),%ebx + addl %eax,%ebx + movl 2120(%ebp,%ecx,4),%eax + xorl %eax,%ebx + movl 3144(%ebp,%edx,4),%edx + addl %edx,%ebx + xorl %eax,%eax + xorl %ebx,%edi + + + movl 60(%ebp),%edx + movl %edi,%ebx + xorl %edx,%esi + shrl $16,%ebx + movl %edi,%edx + movb %bh,%al + andl $255,%ebx + movb %dh,%cl + andl $255,%edx + movl 72(%ebp,%eax,4),%eax + movl 1096(%ebp,%ebx,4),%ebx + addl %eax,%ebx + movl 2120(%ebp,%ecx,4),%eax + xorl %eax,%ebx + movl 3144(%ebp,%edx,4),%edx + addl %edx,%ebx + xorl %eax,%eax + xorl %ebx,%esi + + + movl 64(%ebp),%edx + movl %esi,%ebx + xorl %edx,%edi + shrl $16,%ebx + movl %esi,%edx + movb %bh,%al + andl $255,%ebx + movb %dh,%cl + andl $255,%edx + movl 72(%ebp,%eax,4),%eax + movl 1096(%ebp,%ebx,4),%ebx + addl %eax,%ebx + movl 2120(%ebp,%ecx,4),%eax + xorl %eax,%ebx + movl 3144(%ebp,%edx,4),%edx + addl %edx,%ebx + + movl 20(%esp),%eax + xorl %ebx,%edi + movl 68(%ebp),%edx + xorl %edx,%esi + movl %edi,4(%eax) + movl %esi,(%eax) + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.size BF_encrypt,.-.L_BF_encrypt_begin +.globl BF_decrypt +.type BF_decrypt,@function +.align 16 +BF_decrypt: +.L_BF_decrypt_begin: + + pushl %ebp + pushl %ebx + movl 12(%esp),%ebx + movl 16(%esp),%ebp + pushl %esi + pushl %edi + + movl (%ebx),%edi + movl 4(%ebx),%esi + xorl %eax,%eax + movl 68(%ebp),%ebx + xorl %ecx,%ecx + xorl %ebx,%edi + + + movl 64(%ebp),%edx + movl %edi,%ebx + xorl %edx,%esi + shrl $16,%ebx + movl %edi,%edx + movb %bh,%al + andl $255,%ebx + movb %dh,%cl + andl $255,%edx + movl 72(%ebp,%eax,4),%eax + movl 1096(%ebp,%ebx,4),%ebx + addl %eax,%ebx + movl 2120(%ebp,%ecx,4),%eax + xorl %eax,%ebx + movl 3144(%ebp,%edx,4),%edx + addl %edx,%ebx + xorl %eax,%eax + xorl %ebx,%esi + + + movl 60(%ebp),%edx + movl %esi,%ebx + xorl %edx,%edi + shrl $16,%ebx + movl %esi,%edx + movb %bh,%al + andl $255,%ebx + movb %dh,%cl + andl $255,%edx + movl 72(%ebp,%eax,4),%eax + movl 1096(%ebp,%ebx,4),%ebx + addl %eax,%ebx + movl 2120(%ebp,%ecx,4),%eax + xorl %eax,%ebx + movl 3144(%ebp,%edx,4),%edx + addl %edx,%ebx + xorl %eax,%eax + xorl %ebx,%edi + + + movl 56(%ebp),%edx + movl %edi,%ebx + xorl %edx,%esi + shrl $16,%ebx + movl %edi,%edx + movb %bh,%al + andl $255,%ebx + movb %dh,%cl + andl $255,%edx + movl 72(%ebp,%eax,4),%eax + movl 1096(%ebp,%ebx,4),%ebx + addl %eax,%ebx + movl 2120(%ebp,%ecx,4),%eax + xorl %eax,%ebx + movl 3144(%ebp,%edx,4),%edx + addl %edx,%ebx + xorl %eax,%eax + xorl %ebx,%esi + + + movl 52(%ebp),%edx + movl %esi,%ebx + xorl %edx,%edi + shrl $16,%ebx + movl %esi,%edx + movb %bh,%al + andl $255,%ebx + movb %dh,%cl + andl $255,%edx + movl 72(%ebp,%eax,4),%eax + movl 1096(%ebp,%ebx,4),%ebx + addl %eax,%ebx + movl 2120(%ebp,%ecx,4),%eax + xorl %eax,%ebx + movl 3144(%ebp,%edx,4),%edx + addl %edx,%ebx + xorl %eax,%eax + xorl %ebx,%edi + + + movl 48(%ebp),%edx + movl %edi,%ebx + xorl %edx,%esi + shrl $16,%ebx + movl %edi,%edx + movb %bh,%al + andl $255,%ebx + movb %dh,%cl + andl $255,%edx + movl 72(%ebp,%eax,4),%eax + movl 1096(%ebp,%ebx,4),%ebx + addl %eax,%ebx + movl 2120(%ebp,%ecx,4),%eax + xorl %eax,%ebx + movl 3144(%ebp,%edx,4),%edx + addl %edx,%ebx + xorl %eax,%eax + xorl %ebx,%esi + + + movl 44(%ebp),%edx + movl %esi,%ebx + xorl %edx,%edi + shrl $16,%ebx + movl %esi,%edx + movb %bh,%al + andl $255,%ebx + movb %dh,%cl + andl $255,%edx + movl 72(%ebp,%eax,4),%eax + movl 1096(%ebp,%ebx,4),%ebx + addl %eax,%ebx + movl 2120(%ebp,%ecx,4),%eax + xorl %eax,%ebx + movl 3144(%ebp,%edx,4),%edx + addl %edx,%ebx + xorl %eax,%eax + xorl %ebx,%edi + + + movl 40(%ebp),%edx + movl %edi,%ebx + xorl %edx,%esi + shrl $16,%ebx + movl %edi,%edx + movb %bh,%al + andl $255,%ebx + movb %dh,%cl + andl $255,%edx + movl 72(%ebp,%eax,4),%eax + movl 1096(%ebp,%ebx,4),%ebx + addl %eax,%ebx + movl 2120(%ebp,%ecx,4),%eax + xorl %eax,%ebx + movl 3144(%ebp,%edx,4),%edx + addl %edx,%ebx + xorl %eax,%eax + xorl %ebx,%esi + + + movl 36(%ebp),%edx + movl %esi,%ebx + xorl %edx,%edi + shrl $16,%ebx + movl %esi,%edx + movb %bh,%al + andl $255,%ebx + movb %dh,%cl + andl $255,%edx + movl 72(%ebp,%eax,4),%eax + movl 1096(%ebp,%ebx,4),%ebx + addl %eax,%ebx + movl 2120(%ebp,%ecx,4),%eax + xorl %eax,%ebx + movl 3144(%ebp,%edx,4),%edx + addl %edx,%ebx + xorl %eax,%eax + xorl %ebx,%edi + + + movl 32(%ebp),%edx + movl %edi,%ebx + xorl %edx,%esi + shrl $16,%ebx + movl %edi,%edx + movb %bh,%al + andl $255,%ebx + movb %dh,%cl + andl $255,%edx + movl 72(%ebp,%eax,4),%eax + movl 1096(%ebp,%ebx,4),%ebx + addl %eax,%ebx + movl 2120(%ebp,%ecx,4),%eax + xorl %eax,%ebx + movl 3144(%ebp,%edx,4),%edx + addl %edx,%ebx + xorl %eax,%eax + xorl %ebx,%esi + + + movl 28(%ebp),%edx + movl %esi,%ebx + xorl %edx,%edi + shrl $16,%ebx + movl %esi,%edx + movb %bh,%al + andl $255,%ebx + movb %dh,%cl + andl $255,%edx + movl 72(%ebp,%eax,4),%eax + movl 1096(%ebp,%ebx,4),%ebx + addl %eax,%ebx + movl 2120(%ebp,%ecx,4),%eax + xorl %eax,%ebx + movl 3144(%ebp,%edx,4),%edx + addl %edx,%ebx + xorl %eax,%eax + xorl %ebx,%edi + + + movl 24(%ebp),%edx + movl %edi,%ebx + xorl %edx,%esi + shrl $16,%ebx + movl %edi,%edx + movb %bh,%al + andl $255,%ebx + movb %dh,%cl + andl $255,%edx + movl 72(%ebp,%eax,4),%eax + movl 1096(%ebp,%ebx,4),%ebx + addl %eax,%ebx + movl 2120(%ebp,%ecx,4),%eax + xorl %eax,%ebx + movl 3144(%ebp,%edx,4),%edx + addl %edx,%ebx + xorl %eax,%eax + xorl %ebx,%esi + + + movl 20(%ebp),%edx + movl %esi,%ebx + xorl %edx,%edi + shrl $16,%ebx + movl %esi,%edx + movb %bh,%al + andl $255,%ebx + movb %dh,%cl + andl $255,%edx + movl 72(%ebp,%eax,4),%eax + movl 1096(%ebp,%ebx,4),%ebx + addl %eax,%ebx + movl 2120(%ebp,%ecx,4),%eax + xorl %eax,%ebx + movl 3144(%ebp,%edx,4),%edx + addl %edx,%ebx + xorl %eax,%eax + xorl %ebx,%edi + + + movl 16(%ebp),%edx + movl %edi,%ebx + xorl %edx,%esi + shrl $16,%ebx + movl %edi,%edx + movb %bh,%al + andl $255,%ebx + movb %dh,%cl + andl $255,%edx + movl 72(%ebp,%eax,4),%eax + movl 1096(%ebp,%ebx,4),%ebx + addl %eax,%ebx + movl 2120(%ebp,%ecx,4),%eax + xorl %eax,%ebx + movl 3144(%ebp,%edx,4),%edx + addl %edx,%ebx + xorl %eax,%eax + xorl %ebx,%esi + + + movl 12(%ebp),%edx + movl %esi,%ebx + xorl %edx,%edi + shrl $16,%ebx + movl %esi,%edx + movb %bh,%al + andl $255,%ebx + movb %dh,%cl + andl $255,%edx + movl 72(%ebp,%eax,4),%eax + movl 1096(%ebp,%ebx,4),%ebx + addl %eax,%ebx + movl 2120(%ebp,%ecx,4),%eax + xorl %eax,%ebx + movl 3144(%ebp,%edx,4),%edx + addl %edx,%ebx + xorl %eax,%eax + xorl %ebx,%edi + + + movl 8(%ebp),%edx + movl %edi,%ebx + xorl %edx,%esi + shrl $16,%ebx + movl %edi,%edx + movb %bh,%al + andl $255,%ebx + movb %dh,%cl + andl $255,%edx + movl 72(%ebp,%eax,4),%eax + movl 1096(%ebp,%ebx,4),%ebx + addl %eax,%ebx + movl 2120(%ebp,%ecx,4),%eax + xorl %eax,%ebx + movl 3144(%ebp,%edx,4),%edx + addl %edx,%ebx + xorl %eax,%eax + xorl %ebx,%esi + + + movl 4(%ebp),%edx + movl %esi,%ebx + xorl %edx,%edi + shrl $16,%ebx + movl %esi,%edx + movb %bh,%al + andl $255,%ebx + movb %dh,%cl + andl $255,%edx + movl 72(%ebp,%eax,4),%eax + movl 1096(%ebp,%ebx,4),%ebx + addl %eax,%ebx + movl 2120(%ebp,%ecx,4),%eax + xorl %eax,%ebx + movl 3144(%ebp,%edx,4),%edx + addl %edx,%ebx + + movl 20(%esp),%eax + xorl %ebx,%edi + movl (%ebp),%edx + xorl %edx,%esi + movl %edi,4(%eax) + movl %esi,(%eax) + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.size BF_decrypt,.-.L_BF_decrypt_begin +.globl BF_cbc_encrypt +.type BF_cbc_encrypt,@function +.align 16 +BF_cbc_encrypt: +.L_BF_cbc_encrypt_begin: + + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 28(%esp),%ebp + + movl 36(%esp),%ebx + movl (%ebx),%esi + movl 4(%ebx),%edi + pushl %edi + pushl %esi + pushl %edi + pushl %esi + movl %esp,%ebx + movl 36(%esp),%esi + movl 40(%esp),%edi + + movl 56(%esp),%ecx + + movl 48(%esp),%eax + pushl %eax + pushl %ebx + cmpl $0,%ecx + jz .L000decrypt + andl $4294967288,%ebp + movl 8(%esp),%eax + movl 12(%esp),%ebx + jz .L001encrypt_finish +.L002encrypt_loop: + movl (%esi),%ecx + movl 4(%esi),%edx + xorl %ecx,%eax + xorl %edx,%ebx + bswap %eax + bswap %ebx + movl %eax,8(%esp) + movl %ebx,12(%esp) + call .L_BF_encrypt_begin + movl 8(%esp),%eax + movl 12(%esp),%ebx + bswap %eax + bswap %ebx + movl %eax,(%edi) + movl %ebx,4(%edi) + addl $8,%esi + addl $8,%edi + subl $8,%ebp + jnz .L002encrypt_loop +.L001encrypt_finish: + movl 52(%esp),%ebp + andl $7,%ebp + jz .L003finish + call .L004PIC_point +.L004PIC_point: + popl %edx + leal .L005cbc_enc_jmp_table-.L004PIC_point(%edx),%ecx + movl (%ecx,%ebp,4),%ebp + addl %edx,%ebp + xorl %ecx,%ecx + xorl %edx,%edx + jmp *%ebp +.L006ej7: + movb 6(%esi),%dh + shll $8,%edx +.L007ej6: + movb 5(%esi),%dh +.L008ej5: + movb 4(%esi),%dl +.L009ej4: + movl (%esi),%ecx + jmp .L010ejend +.L011ej3: + movb 2(%esi),%ch + shll $8,%ecx +.L012ej2: + movb 1(%esi),%ch +.L013ej1: + movb (%esi),%cl +.L010ejend: + xorl %ecx,%eax + xorl %edx,%ebx + bswap %eax + bswap %ebx + movl %eax,8(%esp) + movl %ebx,12(%esp) + call .L_BF_encrypt_begin + movl 8(%esp),%eax + movl 12(%esp),%ebx + bswap %eax + bswap %ebx + movl %eax,(%edi) + movl %ebx,4(%edi) + jmp .L003finish +.L000decrypt: + andl $4294967288,%ebp + movl 16(%esp),%eax + movl 20(%esp),%ebx + jz .L014decrypt_finish +.L015decrypt_loop: + movl (%esi),%eax + movl 4(%esi),%ebx + bswap %eax + bswap %ebx + movl %eax,8(%esp) + movl %ebx,12(%esp) + call .L_BF_decrypt_begin + movl 8(%esp),%eax + movl 12(%esp),%ebx + bswap %eax + bswap %ebx + movl 16(%esp),%ecx + movl 20(%esp),%edx + xorl %eax,%ecx + xorl %ebx,%edx + movl (%esi),%eax + movl 4(%esi),%ebx + movl %ecx,(%edi) + movl %edx,4(%edi) + movl %eax,16(%esp) + movl %ebx,20(%esp) + addl $8,%esi + addl $8,%edi + subl $8,%ebp + jnz .L015decrypt_loop +.L014decrypt_finish: + movl 52(%esp),%ebp + andl $7,%ebp + jz .L003finish + movl (%esi),%eax + movl 4(%esi),%ebx + bswap %eax + bswap %ebx + movl %eax,8(%esp) + movl %ebx,12(%esp) + call .L_BF_decrypt_begin + movl 8(%esp),%eax + movl 12(%esp),%ebx + bswap %eax + bswap %ebx + movl 16(%esp),%ecx + movl 20(%esp),%edx + xorl %eax,%ecx + xorl %ebx,%edx + movl (%esi),%eax + movl 4(%esi),%ebx +.L016dj7: + rorl $16,%edx + movb %dl,6(%edi) + shrl $16,%edx +.L017dj6: + movb %dh,5(%edi) +.L018dj5: + movb %dl,4(%edi) +.L019dj4: + movl %ecx,(%edi) + jmp .L020djend +.L021dj3: + rorl $16,%ecx + movb %cl,2(%edi) + shll $16,%ecx +.L022dj2: + movb %ch,1(%esi) +.L023dj1: + movb %cl,(%esi) +.L020djend: + jmp .L003finish +.L003finish: + movl 60(%esp),%ecx + addl $24,%esp + movl %eax,(%ecx) + movl %ebx,4(%ecx) + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.align 64 +.L005cbc_enc_jmp_table: +.long 0 +.long .L013ej1-.L004PIC_point +.long .L012ej2-.L004PIC_point +.long .L011ej3-.L004PIC_point +.long .L009ej4-.L004PIC_point +.long .L008ej5-.L004PIC_point +.long .L007ej6-.L004PIC_point +.long .L006ej7-.L004PIC_point +.align 64 +.size BF_cbc_encrypt,.-.L_BF_cbc_encrypt_begin diff --git a/deps/openssl/asm_obsolete/x86-elf-gas/bn/bn-586.s b/deps/openssl/asm_obsolete/x86-elf-gas/bn/bn-586.s new file mode 100644 index 00000000000000..3e20e9df09cc2a --- /dev/null +++ b/deps/openssl/asm_obsolete/x86-elf-gas/bn/bn-586.s @@ -0,0 +1,1521 @@ +.file "../openssl/crypto/bn/asm/bn-586.s" +.text +.globl bn_mul_add_words +.type bn_mul_add_words,@function +.align 16 +bn_mul_add_words: +.L_bn_mul_add_words_begin: + leal OPENSSL_ia32cap_P,%eax + btl $26,(%eax) + jnc .L000maw_non_sse2 + movl 4(%esp),%eax + movl 8(%esp),%edx + movl 12(%esp),%ecx + movd 16(%esp),%mm0 + pxor %mm1,%mm1 + jmp .L001maw_sse2_entry +.align 16 +.L002maw_sse2_unrolled: + movd (%eax),%mm3 + paddq %mm3,%mm1 + movd (%edx),%mm2 + pmuludq %mm0,%mm2 + movd 4(%edx),%mm4 + pmuludq %mm0,%mm4 + movd 8(%edx),%mm6 + pmuludq %mm0,%mm6 + movd 12(%edx),%mm7 + pmuludq %mm0,%mm7 + paddq %mm2,%mm1 + movd 4(%eax),%mm3 + paddq %mm4,%mm3 + movd 8(%eax),%mm5 + paddq %mm6,%mm5 + movd 12(%eax),%mm4 + paddq %mm4,%mm7 + movd %mm1,(%eax) + movd 16(%edx),%mm2 + pmuludq %mm0,%mm2 + psrlq $32,%mm1 + movd 20(%edx),%mm4 + pmuludq %mm0,%mm4 + paddq %mm3,%mm1 + movd 24(%edx),%mm6 + pmuludq %mm0,%mm6 + movd %mm1,4(%eax) + psrlq $32,%mm1 + movd 28(%edx),%mm3 + addl $32,%edx + pmuludq %mm0,%mm3 + paddq %mm5,%mm1 + movd 16(%eax),%mm5 + paddq %mm5,%mm2 + movd %mm1,8(%eax) + psrlq $32,%mm1 + paddq %mm7,%mm1 + movd 20(%eax),%mm5 + paddq %mm5,%mm4 + movd %mm1,12(%eax) + psrlq $32,%mm1 + paddq %mm2,%mm1 + movd 24(%eax),%mm5 + paddq %mm5,%mm6 + movd %mm1,16(%eax) + psrlq $32,%mm1 + paddq %mm4,%mm1 + movd 28(%eax),%mm5 + paddq %mm5,%mm3 + movd %mm1,20(%eax) + psrlq $32,%mm1 + paddq %mm6,%mm1 + movd %mm1,24(%eax) + psrlq $32,%mm1 + paddq %mm3,%mm1 + movd %mm1,28(%eax) + leal 32(%eax),%eax + psrlq $32,%mm1 + subl $8,%ecx + jz .L003maw_sse2_exit +.L001maw_sse2_entry: + testl $4294967288,%ecx + jnz .L002maw_sse2_unrolled +.align 4 +.L004maw_sse2_loop: + movd (%edx),%mm2 + movd (%eax),%mm3 + pmuludq %mm0,%mm2 + leal 4(%edx),%edx + paddq %mm3,%mm1 + paddq %mm2,%mm1 + movd %mm1,(%eax) + subl $1,%ecx + psrlq $32,%mm1 + leal 4(%eax),%eax + jnz .L004maw_sse2_loop +.L003maw_sse2_exit: + movd %mm1,%eax + emms + ret +.align 16 +.L000maw_non_sse2: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + + xorl %esi,%esi + movl 20(%esp),%edi + movl 28(%esp),%ecx + movl 24(%esp),%ebx + andl $4294967288,%ecx + movl 32(%esp),%ebp + pushl %ecx + jz .L005maw_finish +.align 16 +.L006maw_loop: + + movl (%ebx),%eax + mull %ebp + addl %esi,%eax + adcl $0,%edx + addl (%edi),%eax + adcl $0,%edx + movl %eax,(%edi) + movl %edx,%esi + + movl 4(%ebx),%eax + mull %ebp + addl %esi,%eax + adcl $0,%edx + addl 4(%edi),%eax + adcl $0,%edx + movl %eax,4(%edi) + movl %edx,%esi + + movl 8(%ebx),%eax + mull %ebp + addl %esi,%eax + adcl $0,%edx + addl 8(%edi),%eax + adcl $0,%edx + movl %eax,8(%edi) + movl %edx,%esi + + movl 12(%ebx),%eax + mull %ebp + addl %esi,%eax + adcl $0,%edx + addl 12(%edi),%eax + adcl $0,%edx + movl %eax,12(%edi) + movl %edx,%esi + + movl 16(%ebx),%eax + mull %ebp + addl %esi,%eax + adcl $0,%edx + addl 16(%edi),%eax + adcl $0,%edx + movl %eax,16(%edi) + movl %edx,%esi + + movl 20(%ebx),%eax + mull %ebp + addl %esi,%eax + adcl $0,%edx + addl 20(%edi),%eax + adcl $0,%edx + movl %eax,20(%edi) + movl %edx,%esi + + movl 24(%ebx),%eax + mull %ebp + addl %esi,%eax + adcl $0,%edx + addl 24(%edi),%eax + adcl $0,%edx + movl %eax,24(%edi) + movl %edx,%esi + + movl 28(%ebx),%eax + mull %ebp + addl %esi,%eax + adcl $0,%edx + addl 28(%edi),%eax + adcl $0,%edx + movl %eax,28(%edi) + movl %edx,%esi + + subl $8,%ecx + leal 32(%ebx),%ebx + leal 32(%edi),%edi + jnz .L006maw_loop +.L005maw_finish: + movl 32(%esp),%ecx + andl $7,%ecx + jnz .L007maw_finish2 + jmp .L008maw_end +.L007maw_finish2: + + movl (%ebx),%eax + mull %ebp + addl %esi,%eax + adcl $0,%edx + addl (%edi),%eax + adcl $0,%edx + decl %ecx + movl %eax,(%edi) + movl %edx,%esi + jz .L008maw_end + + movl 4(%ebx),%eax + mull %ebp + addl %esi,%eax + adcl $0,%edx + addl 4(%edi),%eax + adcl $0,%edx + decl %ecx + movl %eax,4(%edi) + movl %edx,%esi + jz .L008maw_end + + movl 8(%ebx),%eax + mull %ebp + addl %esi,%eax + adcl $0,%edx + addl 8(%edi),%eax + adcl $0,%edx + decl %ecx + movl %eax,8(%edi) + movl %edx,%esi + jz .L008maw_end + + movl 12(%ebx),%eax + mull %ebp + addl %esi,%eax + adcl $0,%edx + addl 12(%edi),%eax + adcl $0,%edx + decl %ecx + movl %eax,12(%edi) + movl %edx,%esi + jz .L008maw_end + + movl 16(%ebx),%eax + mull %ebp + addl %esi,%eax + adcl $0,%edx + addl 16(%edi),%eax + adcl $0,%edx + decl %ecx + movl %eax,16(%edi) + movl %edx,%esi + jz .L008maw_end + + movl 20(%ebx),%eax + mull %ebp + addl %esi,%eax + adcl $0,%edx + addl 20(%edi),%eax + adcl $0,%edx + decl %ecx + movl %eax,20(%edi) + movl %edx,%esi + jz .L008maw_end + + movl 24(%ebx),%eax + mull %ebp + addl %esi,%eax + adcl $0,%edx + addl 24(%edi),%eax + adcl $0,%edx + movl %eax,24(%edi) + movl %edx,%esi +.L008maw_end: + movl %esi,%eax + popl %ecx + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.size bn_mul_add_words,.-.L_bn_mul_add_words_begin +.globl bn_mul_words +.type bn_mul_words,@function +.align 16 +bn_mul_words: +.L_bn_mul_words_begin: + leal OPENSSL_ia32cap_P,%eax + btl $26,(%eax) + jnc .L009mw_non_sse2 + movl 4(%esp),%eax + movl 8(%esp),%edx + movl 12(%esp),%ecx + movd 16(%esp),%mm0 + pxor %mm1,%mm1 +.align 16 +.L010mw_sse2_loop: + movd (%edx),%mm2 + pmuludq %mm0,%mm2 + leal 4(%edx),%edx + paddq %mm2,%mm1 + movd %mm1,(%eax) + subl $1,%ecx + psrlq $32,%mm1 + leal 4(%eax),%eax + jnz .L010mw_sse2_loop + movd %mm1,%eax + emms + ret +.align 16 +.L009mw_non_sse2: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + + xorl %esi,%esi + movl 20(%esp),%edi + movl 24(%esp),%ebx + movl 28(%esp),%ebp + movl 32(%esp),%ecx + andl $4294967288,%ebp + jz .L011mw_finish +.L012mw_loop: + + movl (%ebx),%eax + mull %ecx + addl %esi,%eax + adcl $0,%edx + movl %eax,(%edi) + movl %edx,%esi + + movl 4(%ebx),%eax + mull %ecx + addl %esi,%eax + adcl $0,%edx + movl %eax,4(%edi) + movl %edx,%esi + + movl 8(%ebx),%eax + mull %ecx + addl %esi,%eax + adcl $0,%edx + movl %eax,8(%edi) + movl %edx,%esi + + movl 12(%ebx),%eax + mull %ecx + addl %esi,%eax + adcl $0,%edx + movl %eax,12(%edi) + movl %edx,%esi + + movl 16(%ebx),%eax + mull %ecx + addl %esi,%eax + adcl $0,%edx + movl %eax,16(%edi) + movl %edx,%esi + + movl 20(%ebx),%eax + mull %ecx + addl %esi,%eax + adcl $0,%edx + movl %eax,20(%edi) + movl %edx,%esi + + movl 24(%ebx),%eax + mull %ecx + addl %esi,%eax + adcl $0,%edx + movl %eax,24(%edi) + movl %edx,%esi + + movl 28(%ebx),%eax + mull %ecx + addl %esi,%eax + adcl $0,%edx + movl %eax,28(%edi) + movl %edx,%esi + + addl $32,%ebx + addl $32,%edi + subl $8,%ebp + jz .L011mw_finish + jmp .L012mw_loop +.L011mw_finish: + movl 28(%esp),%ebp + andl $7,%ebp + jnz .L013mw_finish2 + jmp .L014mw_end +.L013mw_finish2: + + movl (%ebx),%eax + mull %ecx + addl %esi,%eax + adcl $0,%edx + movl %eax,(%edi) + movl %edx,%esi + decl %ebp + jz .L014mw_end + + movl 4(%ebx),%eax + mull %ecx + addl %esi,%eax + adcl $0,%edx + movl %eax,4(%edi) + movl %edx,%esi + decl %ebp + jz .L014mw_end + + movl 8(%ebx),%eax + mull %ecx + addl %esi,%eax + adcl $0,%edx + movl %eax,8(%edi) + movl %edx,%esi + decl %ebp + jz .L014mw_end + + movl 12(%ebx),%eax + mull %ecx + addl %esi,%eax + adcl $0,%edx + movl %eax,12(%edi) + movl %edx,%esi + decl %ebp + jz .L014mw_end + + movl 16(%ebx),%eax + mull %ecx + addl %esi,%eax + adcl $0,%edx + movl %eax,16(%edi) + movl %edx,%esi + decl %ebp + jz .L014mw_end + + movl 20(%ebx),%eax + mull %ecx + addl %esi,%eax + adcl $0,%edx + movl %eax,20(%edi) + movl %edx,%esi + decl %ebp + jz .L014mw_end + + movl 24(%ebx),%eax + mull %ecx + addl %esi,%eax + adcl $0,%edx + movl %eax,24(%edi) + movl %edx,%esi +.L014mw_end: + movl %esi,%eax + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.size bn_mul_words,.-.L_bn_mul_words_begin +.globl bn_sqr_words +.type bn_sqr_words,@function +.align 16 +bn_sqr_words: +.L_bn_sqr_words_begin: + leal OPENSSL_ia32cap_P,%eax + btl $26,(%eax) + jnc .L015sqr_non_sse2 + movl 4(%esp),%eax + movl 8(%esp),%edx + movl 12(%esp),%ecx +.align 16 +.L016sqr_sse2_loop: + movd (%edx),%mm0 + pmuludq %mm0,%mm0 + leal 4(%edx),%edx + movq %mm0,(%eax) + subl $1,%ecx + leal 8(%eax),%eax + jnz .L016sqr_sse2_loop + emms + ret +.align 16 +.L015sqr_non_sse2: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + + movl 20(%esp),%esi + movl 24(%esp),%edi + movl 28(%esp),%ebx + andl $4294967288,%ebx + jz .L017sw_finish +.L018sw_loop: + + movl (%edi),%eax + mull %eax + movl %eax,(%esi) + movl %edx,4(%esi) + + movl 4(%edi),%eax + mull %eax + movl %eax,8(%esi) + movl %edx,12(%esi) + + movl 8(%edi),%eax + mull %eax + movl %eax,16(%esi) + movl %edx,20(%esi) + + movl 12(%edi),%eax + mull %eax + movl %eax,24(%esi) + movl %edx,28(%esi) + + movl 16(%edi),%eax + mull %eax + movl %eax,32(%esi) + movl %edx,36(%esi) + + movl 20(%edi),%eax + mull %eax + movl %eax,40(%esi) + movl %edx,44(%esi) + + movl 24(%edi),%eax + mull %eax + movl %eax,48(%esi) + movl %edx,52(%esi) + + movl 28(%edi),%eax + mull %eax + movl %eax,56(%esi) + movl %edx,60(%esi) + + addl $32,%edi + addl $64,%esi + subl $8,%ebx + jnz .L018sw_loop +.L017sw_finish: + movl 28(%esp),%ebx + andl $7,%ebx + jz .L019sw_end + + movl (%edi),%eax + mull %eax + movl %eax,(%esi) + decl %ebx + movl %edx,4(%esi) + jz .L019sw_end + + movl 4(%edi),%eax + mull %eax + movl %eax,8(%esi) + decl %ebx + movl %edx,12(%esi) + jz .L019sw_end + + movl 8(%edi),%eax + mull %eax + movl %eax,16(%esi) + decl %ebx + movl %edx,20(%esi) + jz .L019sw_end + + movl 12(%edi),%eax + mull %eax + movl %eax,24(%esi) + decl %ebx + movl %edx,28(%esi) + jz .L019sw_end + + movl 16(%edi),%eax + mull %eax + movl %eax,32(%esi) + decl %ebx + movl %edx,36(%esi) + jz .L019sw_end + + movl 20(%edi),%eax + mull %eax + movl %eax,40(%esi) + decl %ebx + movl %edx,44(%esi) + jz .L019sw_end + + movl 24(%edi),%eax + mull %eax + movl %eax,48(%esi) + movl %edx,52(%esi) +.L019sw_end: + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.size bn_sqr_words,.-.L_bn_sqr_words_begin +.globl bn_div_words +.type bn_div_words,@function +.align 16 +bn_div_words: +.L_bn_div_words_begin: + movl 4(%esp),%edx + movl 8(%esp),%eax + movl 12(%esp),%ecx + divl %ecx + ret +.size bn_div_words,.-.L_bn_div_words_begin +.globl bn_add_words +.type bn_add_words,@function +.align 16 +bn_add_words: +.L_bn_add_words_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + + movl 20(%esp),%ebx + movl 24(%esp),%esi + movl 28(%esp),%edi + movl 32(%esp),%ebp + xorl %eax,%eax + andl $4294967288,%ebp + jz .L020aw_finish +.L021aw_loop: + + movl (%esi),%ecx + movl (%edi),%edx + addl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + addl %edx,%ecx + adcl $0,%eax + movl %ecx,(%ebx) + + movl 4(%esi),%ecx + movl 4(%edi),%edx + addl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + addl %edx,%ecx + adcl $0,%eax + movl %ecx,4(%ebx) + + movl 8(%esi),%ecx + movl 8(%edi),%edx + addl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + addl %edx,%ecx + adcl $0,%eax + movl %ecx,8(%ebx) + + movl 12(%esi),%ecx + movl 12(%edi),%edx + addl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + addl %edx,%ecx + adcl $0,%eax + movl %ecx,12(%ebx) + + movl 16(%esi),%ecx + movl 16(%edi),%edx + addl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + addl %edx,%ecx + adcl $0,%eax + movl %ecx,16(%ebx) + + movl 20(%esi),%ecx + movl 20(%edi),%edx + addl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + addl %edx,%ecx + adcl $0,%eax + movl %ecx,20(%ebx) + + movl 24(%esi),%ecx + movl 24(%edi),%edx + addl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + addl %edx,%ecx + adcl $0,%eax + movl %ecx,24(%ebx) + + movl 28(%esi),%ecx + movl 28(%edi),%edx + addl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + addl %edx,%ecx + adcl $0,%eax + movl %ecx,28(%ebx) + + addl $32,%esi + addl $32,%edi + addl $32,%ebx + subl $8,%ebp + jnz .L021aw_loop +.L020aw_finish: + movl 32(%esp),%ebp + andl $7,%ebp + jz .L022aw_end + + movl (%esi),%ecx + movl (%edi),%edx + addl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + addl %edx,%ecx + adcl $0,%eax + decl %ebp + movl %ecx,(%ebx) + jz .L022aw_end + + movl 4(%esi),%ecx + movl 4(%edi),%edx + addl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + addl %edx,%ecx + adcl $0,%eax + decl %ebp + movl %ecx,4(%ebx) + jz .L022aw_end + + movl 8(%esi),%ecx + movl 8(%edi),%edx + addl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + addl %edx,%ecx + adcl $0,%eax + decl %ebp + movl %ecx,8(%ebx) + jz .L022aw_end + + movl 12(%esi),%ecx + movl 12(%edi),%edx + addl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + addl %edx,%ecx + adcl $0,%eax + decl %ebp + movl %ecx,12(%ebx) + jz .L022aw_end + + movl 16(%esi),%ecx + movl 16(%edi),%edx + addl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + addl %edx,%ecx + adcl $0,%eax + decl %ebp + movl %ecx,16(%ebx) + jz .L022aw_end + + movl 20(%esi),%ecx + movl 20(%edi),%edx + addl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + addl %edx,%ecx + adcl $0,%eax + decl %ebp + movl %ecx,20(%ebx) + jz .L022aw_end + + movl 24(%esi),%ecx + movl 24(%edi),%edx + addl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + addl %edx,%ecx + adcl $0,%eax + movl %ecx,24(%ebx) +.L022aw_end: + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.size bn_add_words,.-.L_bn_add_words_begin +.globl bn_sub_words +.type bn_sub_words,@function +.align 16 +bn_sub_words: +.L_bn_sub_words_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + + movl 20(%esp),%ebx + movl 24(%esp),%esi + movl 28(%esp),%edi + movl 32(%esp),%ebp + xorl %eax,%eax + andl $4294967288,%ebp + jz .L023aw_finish +.L024aw_loop: + + movl (%esi),%ecx + movl (%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,(%ebx) + + movl 4(%esi),%ecx + movl 4(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,4(%ebx) + + movl 8(%esi),%ecx + movl 8(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,8(%ebx) + + movl 12(%esi),%ecx + movl 12(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,12(%ebx) + + movl 16(%esi),%ecx + movl 16(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,16(%ebx) + + movl 20(%esi),%ecx + movl 20(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,20(%ebx) + + movl 24(%esi),%ecx + movl 24(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,24(%ebx) + + movl 28(%esi),%ecx + movl 28(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,28(%ebx) + + addl $32,%esi + addl $32,%edi + addl $32,%ebx + subl $8,%ebp + jnz .L024aw_loop +.L023aw_finish: + movl 32(%esp),%ebp + andl $7,%ebp + jz .L025aw_end + + movl (%esi),%ecx + movl (%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + decl %ebp + movl %ecx,(%ebx) + jz .L025aw_end + + movl 4(%esi),%ecx + movl 4(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + decl %ebp + movl %ecx,4(%ebx) + jz .L025aw_end + + movl 8(%esi),%ecx + movl 8(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + decl %ebp + movl %ecx,8(%ebx) + jz .L025aw_end + + movl 12(%esi),%ecx + movl 12(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + decl %ebp + movl %ecx,12(%ebx) + jz .L025aw_end + + movl 16(%esi),%ecx + movl 16(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + decl %ebp + movl %ecx,16(%ebx) + jz .L025aw_end + + movl 20(%esi),%ecx + movl 20(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + decl %ebp + movl %ecx,20(%ebx) + jz .L025aw_end + + movl 24(%esi),%ecx + movl 24(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,24(%ebx) +.L025aw_end: + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.size bn_sub_words,.-.L_bn_sub_words_begin +.globl bn_sub_part_words +.type bn_sub_part_words,@function +.align 16 +bn_sub_part_words: +.L_bn_sub_part_words_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + + movl 20(%esp),%ebx + movl 24(%esp),%esi + movl 28(%esp),%edi + movl 32(%esp),%ebp + xorl %eax,%eax + andl $4294967288,%ebp + jz .L026aw_finish +.L027aw_loop: + + movl (%esi),%ecx + movl (%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,(%ebx) + + movl 4(%esi),%ecx + movl 4(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,4(%ebx) + + movl 8(%esi),%ecx + movl 8(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,8(%ebx) + + movl 12(%esi),%ecx + movl 12(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,12(%ebx) + + movl 16(%esi),%ecx + movl 16(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,16(%ebx) + + movl 20(%esi),%ecx + movl 20(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,20(%ebx) + + movl 24(%esi),%ecx + movl 24(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,24(%ebx) + + movl 28(%esi),%ecx + movl 28(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,28(%ebx) + + addl $32,%esi + addl $32,%edi + addl $32,%ebx + subl $8,%ebp + jnz .L027aw_loop +.L026aw_finish: + movl 32(%esp),%ebp + andl $7,%ebp + jz .L028aw_end + + movl (%esi),%ecx + movl (%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,(%ebx) + addl $4,%esi + addl $4,%edi + addl $4,%ebx + decl %ebp + jz .L028aw_end + + movl (%esi),%ecx + movl (%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,(%ebx) + addl $4,%esi + addl $4,%edi + addl $4,%ebx + decl %ebp + jz .L028aw_end + + movl (%esi),%ecx + movl (%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,(%ebx) + addl $4,%esi + addl $4,%edi + addl $4,%ebx + decl %ebp + jz .L028aw_end + + movl (%esi),%ecx + movl (%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,(%ebx) + addl $4,%esi + addl $4,%edi + addl $4,%ebx + decl %ebp + jz .L028aw_end + + movl (%esi),%ecx + movl (%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,(%ebx) + addl $4,%esi + addl $4,%edi + addl $4,%ebx + decl %ebp + jz .L028aw_end + + movl (%esi),%ecx + movl (%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,(%ebx) + addl $4,%esi + addl $4,%edi + addl $4,%ebx + decl %ebp + jz .L028aw_end + + movl (%esi),%ecx + movl (%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,(%ebx) + addl $4,%esi + addl $4,%edi + addl $4,%ebx +.L028aw_end: + cmpl $0,36(%esp) + je .L029pw_end + movl 36(%esp),%ebp + cmpl $0,%ebp + je .L029pw_end + jge .L030pw_pos + + movl $0,%edx + subl %ebp,%edx + movl %edx,%ebp + andl $4294967288,%ebp + jz .L031pw_neg_finish +.L032pw_neg_loop: + + movl $0,%ecx + movl (%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,(%ebx) + + movl $0,%ecx + movl 4(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,4(%ebx) + + movl $0,%ecx + movl 8(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,8(%ebx) + + movl $0,%ecx + movl 12(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,12(%ebx) + + movl $0,%ecx + movl 16(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,16(%ebx) + + movl $0,%ecx + movl 20(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,20(%ebx) + + movl $0,%ecx + movl 24(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,24(%ebx) + + movl $0,%ecx + movl 28(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,28(%ebx) + + addl $32,%edi + addl $32,%ebx + subl $8,%ebp + jnz .L032pw_neg_loop +.L031pw_neg_finish: + movl 36(%esp),%edx + movl $0,%ebp + subl %edx,%ebp + andl $7,%ebp + jz .L029pw_end + + movl $0,%ecx + movl (%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + decl %ebp + movl %ecx,(%ebx) + jz .L029pw_end + + movl $0,%ecx + movl 4(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + decl %ebp + movl %ecx,4(%ebx) + jz .L029pw_end + + movl $0,%ecx + movl 8(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + decl %ebp + movl %ecx,8(%ebx) + jz .L029pw_end + + movl $0,%ecx + movl 12(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + decl %ebp + movl %ecx,12(%ebx) + jz .L029pw_end + + movl $0,%ecx + movl 16(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + decl %ebp + movl %ecx,16(%ebx) + jz .L029pw_end + + movl $0,%ecx + movl 20(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + decl %ebp + movl %ecx,20(%ebx) + jz .L029pw_end + + movl $0,%ecx + movl 24(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,24(%ebx) + jmp .L029pw_end +.L030pw_pos: + andl $4294967288,%ebp + jz .L033pw_pos_finish +.L034pw_pos_loop: + + movl (%esi),%ecx + subl %eax,%ecx + movl %ecx,(%ebx) + jnc .L035pw_nc0 + + movl 4(%esi),%ecx + subl %eax,%ecx + movl %ecx,4(%ebx) + jnc .L036pw_nc1 + + movl 8(%esi),%ecx + subl %eax,%ecx + movl %ecx,8(%ebx) + jnc .L037pw_nc2 + + movl 12(%esi),%ecx + subl %eax,%ecx + movl %ecx,12(%ebx) + jnc .L038pw_nc3 + + movl 16(%esi),%ecx + subl %eax,%ecx + movl %ecx,16(%ebx) + jnc .L039pw_nc4 + + movl 20(%esi),%ecx + subl %eax,%ecx + movl %ecx,20(%ebx) + jnc .L040pw_nc5 + + movl 24(%esi),%ecx + subl %eax,%ecx + movl %ecx,24(%ebx) + jnc .L041pw_nc6 + + movl 28(%esi),%ecx + subl %eax,%ecx + movl %ecx,28(%ebx) + jnc .L042pw_nc7 + + addl $32,%esi + addl $32,%ebx + subl $8,%ebp + jnz .L034pw_pos_loop +.L033pw_pos_finish: + movl 36(%esp),%ebp + andl $7,%ebp + jz .L029pw_end + + movl (%esi),%ecx + subl %eax,%ecx + movl %ecx,(%ebx) + jnc .L043pw_tail_nc0 + decl %ebp + jz .L029pw_end + + movl 4(%esi),%ecx + subl %eax,%ecx + movl %ecx,4(%ebx) + jnc .L044pw_tail_nc1 + decl %ebp + jz .L029pw_end + + movl 8(%esi),%ecx + subl %eax,%ecx + movl %ecx,8(%ebx) + jnc .L045pw_tail_nc2 + decl %ebp + jz .L029pw_end + + movl 12(%esi),%ecx + subl %eax,%ecx + movl %ecx,12(%ebx) + jnc .L046pw_tail_nc3 + decl %ebp + jz .L029pw_end + + movl 16(%esi),%ecx + subl %eax,%ecx + movl %ecx,16(%ebx) + jnc .L047pw_tail_nc4 + decl %ebp + jz .L029pw_end + + movl 20(%esi),%ecx + subl %eax,%ecx + movl %ecx,20(%ebx) + jnc .L048pw_tail_nc5 + decl %ebp + jz .L029pw_end + + movl 24(%esi),%ecx + subl %eax,%ecx + movl %ecx,24(%ebx) + jnc .L049pw_tail_nc6 + movl $1,%eax + jmp .L029pw_end +.L050pw_nc_loop: + movl (%esi),%ecx + movl %ecx,(%ebx) +.L035pw_nc0: + movl 4(%esi),%ecx + movl %ecx,4(%ebx) +.L036pw_nc1: + movl 8(%esi),%ecx + movl %ecx,8(%ebx) +.L037pw_nc2: + movl 12(%esi),%ecx + movl %ecx,12(%ebx) +.L038pw_nc3: + movl 16(%esi),%ecx + movl %ecx,16(%ebx) +.L039pw_nc4: + movl 20(%esi),%ecx + movl %ecx,20(%ebx) +.L040pw_nc5: + movl 24(%esi),%ecx + movl %ecx,24(%ebx) +.L041pw_nc6: + movl 28(%esi),%ecx + movl %ecx,28(%ebx) +.L042pw_nc7: + + addl $32,%esi + addl $32,%ebx + subl $8,%ebp + jnz .L050pw_nc_loop + movl 36(%esp),%ebp + andl $7,%ebp + jz .L051pw_nc_end + movl (%esi),%ecx + movl %ecx,(%ebx) +.L043pw_tail_nc0: + decl %ebp + jz .L051pw_nc_end + movl 4(%esi),%ecx + movl %ecx,4(%ebx) +.L044pw_tail_nc1: + decl %ebp + jz .L051pw_nc_end + movl 8(%esi),%ecx + movl %ecx,8(%ebx) +.L045pw_tail_nc2: + decl %ebp + jz .L051pw_nc_end + movl 12(%esi),%ecx + movl %ecx,12(%ebx) +.L046pw_tail_nc3: + decl %ebp + jz .L051pw_nc_end + movl 16(%esi),%ecx + movl %ecx,16(%ebx) +.L047pw_tail_nc4: + decl %ebp + jz .L051pw_nc_end + movl 20(%esi),%ecx + movl %ecx,20(%ebx) +.L048pw_tail_nc5: + decl %ebp + jz .L051pw_nc_end + movl 24(%esi),%ecx + movl %ecx,24(%ebx) +.L049pw_tail_nc6: +.L051pw_nc_end: + movl $0,%eax +.L029pw_end: + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.size bn_sub_part_words,.-.L_bn_sub_part_words_begin +.comm OPENSSL_ia32cap_P,16,4 diff --git a/deps/openssl/asm_obsolete/x86-elf-gas/bn/co-586.s b/deps/openssl/asm_obsolete/x86-elf-gas/bn/co-586.s new file mode 100644 index 00000000000000..1128e3e959a3a6 --- /dev/null +++ b/deps/openssl/asm_obsolete/x86-elf-gas/bn/co-586.s @@ -0,0 +1,1254 @@ +.file "../openssl/crypto/bn/asm/co-586.s" +.text +.globl bn_mul_comba8 +.type bn_mul_comba8,@function +.align 16 +bn_mul_comba8: +.L_bn_mul_comba8_begin: + pushl %esi + movl 12(%esp),%esi + pushl %edi + movl 20(%esp),%edi + pushl %ebp + pushl %ebx + xorl %ebx,%ebx + movl (%esi),%eax + xorl %ecx,%ecx + movl (%edi),%edx + + xorl %ebp,%ebp + + mull %edx + addl %eax,%ebx + movl 20(%esp),%eax + adcl %edx,%ecx + movl (%edi),%edx + adcl $0,%ebp + movl %ebx,(%eax) + movl 4(%esi),%eax + + + xorl %ebx,%ebx + + mull %edx + addl %eax,%ecx + movl (%esi),%eax + adcl %edx,%ebp + movl 4(%edi),%edx + adcl $0,%ebx + + mull %edx + addl %eax,%ecx + movl 20(%esp),%eax + adcl %edx,%ebp + movl (%edi),%edx + adcl $0,%ebx + movl %ecx,4(%eax) + movl 8(%esi),%eax + + + xorl %ecx,%ecx + + mull %edx + addl %eax,%ebp + movl 4(%esi),%eax + adcl %edx,%ebx + movl 4(%edi),%edx + adcl $0,%ecx + + mull %edx + addl %eax,%ebp + movl (%esi),%eax + adcl %edx,%ebx + movl 8(%edi),%edx + adcl $0,%ecx + + mull %edx + addl %eax,%ebp + movl 20(%esp),%eax + adcl %edx,%ebx + movl (%edi),%edx + adcl $0,%ecx + movl %ebp,8(%eax) + movl 12(%esi),%eax + + + xorl %ebp,%ebp + + mull %edx + addl %eax,%ebx + movl 8(%esi),%eax + adcl %edx,%ecx + movl 4(%edi),%edx + adcl $0,%ebp + + mull %edx + addl %eax,%ebx + movl 4(%esi),%eax + adcl %edx,%ecx + movl 8(%edi),%edx + adcl $0,%ebp + + mull %edx + addl %eax,%ebx + movl (%esi),%eax + adcl %edx,%ecx + movl 12(%edi),%edx + adcl $0,%ebp + + mull %edx + addl %eax,%ebx + movl 20(%esp),%eax + adcl %edx,%ecx + movl (%edi),%edx + adcl $0,%ebp + movl %ebx,12(%eax) + movl 16(%esi),%eax + + + xorl %ebx,%ebx + + mull %edx + addl %eax,%ecx + movl 12(%esi),%eax + adcl %edx,%ebp + movl 4(%edi),%edx + adcl $0,%ebx + + mull %edx + addl %eax,%ecx + movl 8(%esi),%eax + adcl %edx,%ebp + movl 8(%edi),%edx + adcl $0,%ebx + + mull %edx + addl %eax,%ecx + movl 4(%esi),%eax + adcl %edx,%ebp + movl 12(%edi),%edx + adcl $0,%ebx + + mull %edx + addl %eax,%ecx + movl (%esi),%eax + adcl %edx,%ebp + movl 16(%edi),%edx + adcl $0,%ebx + + mull %edx + addl %eax,%ecx + movl 20(%esp),%eax + adcl %edx,%ebp + movl (%edi),%edx + adcl $0,%ebx + movl %ecx,16(%eax) + movl 20(%esi),%eax + + + xorl %ecx,%ecx + + mull %edx + addl %eax,%ebp + movl 16(%esi),%eax + adcl %edx,%ebx + movl 4(%edi),%edx + adcl $0,%ecx + + mull %edx + addl %eax,%ebp + movl 12(%esi),%eax + adcl %edx,%ebx + movl 8(%edi),%edx + adcl $0,%ecx + + mull %edx + addl %eax,%ebp + movl 8(%esi),%eax + adcl %edx,%ebx + movl 12(%edi),%edx + adcl $0,%ecx + + mull %edx + addl %eax,%ebp + movl 4(%esi),%eax + adcl %edx,%ebx + movl 16(%edi),%edx + adcl $0,%ecx + + mull %edx + addl %eax,%ebp + movl (%esi),%eax + adcl %edx,%ebx + movl 20(%edi),%edx + adcl $0,%ecx + + mull %edx + addl %eax,%ebp + movl 20(%esp),%eax + adcl %edx,%ebx + movl (%edi),%edx + adcl $0,%ecx + movl %ebp,20(%eax) + movl 24(%esi),%eax + + + xorl %ebp,%ebp + + mull %edx + addl %eax,%ebx + movl 20(%esi),%eax + adcl %edx,%ecx + movl 4(%edi),%edx + adcl $0,%ebp + + mull %edx + addl %eax,%ebx + movl 16(%esi),%eax + adcl %edx,%ecx + movl 8(%edi),%edx + adcl $0,%ebp + + mull %edx + addl %eax,%ebx + movl 12(%esi),%eax + adcl %edx,%ecx + movl 12(%edi),%edx + adcl $0,%ebp + + mull %edx + addl %eax,%ebx + movl 8(%esi),%eax + adcl %edx,%ecx + movl 16(%edi),%edx + adcl $0,%ebp + + mull %edx + addl %eax,%ebx + movl 4(%esi),%eax + adcl %edx,%ecx + movl 20(%edi),%edx + adcl $0,%ebp + + mull %edx + addl %eax,%ebx + movl (%esi),%eax + adcl %edx,%ecx + movl 24(%edi),%edx + adcl $0,%ebp + + mull %edx + addl %eax,%ebx + movl 20(%esp),%eax + adcl %edx,%ecx + movl (%edi),%edx + adcl $0,%ebp + movl %ebx,24(%eax) + movl 28(%esi),%eax + + + xorl %ebx,%ebx + + mull %edx + addl %eax,%ecx + movl 24(%esi),%eax + adcl %edx,%ebp + movl 4(%edi),%edx + adcl $0,%ebx + + mull %edx + addl %eax,%ecx + movl 20(%esi),%eax + adcl %edx,%ebp + movl 8(%edi),%edx + adcl $0,%ebx + + mull %edx + addl %eax,%ecx + movl 16(%esi),%eax + adcl %edx,%ebp + movl 12(%edi),%edx + adcl $0,%ebx + + mull %edx + addl %eax,%ecx + movl 12(%esi),%eax + adcl %edx,%ebp + movl 16(%edi),%edx + adcl $0,%ebx + + mull %edx + addl %eax,%ecx + movl 8(%esi),%eax + adcl %edx,%ebp + movl 20(%edi),%edx + adcl $0,%ebx + + mull %edx + addl %eax,%ecx + movl 4(%esi),%eax + adcl %edx,%ebp + movl 24(%edi),%edx + adcl $0,%ebx + + mull %edx + addl %eax,%ecx + movl (%esi),%eax + adcl %edx,%ebp + movl 28(%edi),%edx + adcl $0,%ebx + + mull %edx + addl %eax,%ecx + movl 20(%esp),%eax + adcl %edx,%ebp + movl 4(%edi),%edx + adcl $0,%ebx + movl %ecx,28(%eax) + movl 28(%esi),%eax + + + xorl %ecx,%ecx + + mull %edx + addl %eax,%ebp + movl 24(%esi),%eax + adcl %edx,%ebx + movl 8(%edi),%edx + adcl $0,%ecx + + mull %edx + addl %eax,%ebp + movl 20(%esi),%eax + adcl %edx,%ebx + movl 12(%edi),%edx + adcl $0,%ecx + + mull %edx + addl %eax,%ebp + movl 16(%esi),%eax + adcl %edx,%ebx + movl 16(%edi),%edx + adcl $0,%ecx + + mull %edx + addl %eax,%ebp + movl 12(%esi),%eax + adcl %edx,%ebx + movl 20(%edi),%edx + adcl $0,%ecx + + mull %edx + addl %eax,%ebp + movl 8(%esi),%eax + adcl %edx,%ebx + movl 24(%edi),%edx + adcl $0,%ecx + + mull %edx + addl %eax,%ebp + movl 4(%esi),%eax + adcl %edx,%ebx + movl 28(%edi),%edx + adcl $0,%ecx + + mull %edx + addl %eax,%ebp + movl 20(%esp),%eax + adcl %edx,%ebx + movl 8(%edi),%edx + adcl $0,%ecx + movl %ebp,32(%eax) + movl 28(%esi),%eax + + + xorl %ebp,%ebp + + mull %edx + addl %eax,%ebx + movl 24(%esi),%eax + adcl %edx,%ecx + movl 12(%edi),%edx + adcl $0,%ebp + + mull %edx + addl %eax,%ebx + movl 20(%esi),%eax + adcl %edx,%ecx + movl 16(%edi),%edx + adcl $0,%ebp + + mull %edx + addl %eax,%ebx + movl 16(%esi),%eax + adcl %edx,%ecx + movl 20(%edi),%edx + adcl $0,%ebp + + mull %edx + addl %eax,%ebx + movl 12(%esi),%eax + adcl %edx,%ecx + movl 24(%edi),%edx + adcl $0,%ebp + + mull %edx + addl %eax,%ebx + movl 8(%esi),%eax + adcl %edx,%ecx + movl 28(%edi),%edx + adcl $0,%ebp + + mull %edx + addl %eax,%ebx + movl 20(%esp),%eax + adcl %edx,%ecx + movl 12(%edi),%edx + adcl $0,%ebp + movl %ebx,36(%eax) + movl 28(%esi),%eax + + + xorl %ebx,%ebx + + mull %edx + addl %eax,%ecx + movl 24(%esi),%eax + adcl %edx,%ebp + movl 16(%edi),%edx + adcl $0,%ebx + + mull %edx + addl %eax,%ecx + movl 20(%esi),%eax + adcl %edx,%ebp + movl 20(%edi),%edx + adcl $0,%ebx + + mull %edx + addl %eax,%ecx + movl 16(%esi),%eax + adcl %edx,%ebp + movl 24(%edi),%edx + adcl $0,%ebx + + mull %edx + addl %eax,%ecx + movl 12(%esi),%eax + adcl %edx,%ebp + movl 28(%edi),%edx + adcl $0,%ebx + + mull %edx + addl %eax,%ecx + movl 20(%esp),%eax + adcl %edx,%ebp + movl 16(%edi),%edx + adcl $0,%ebx + movl %ecx,40(%eax) + movl 28(%esi),%eax + + + xorl %ecx,%ecx + + mull %edx + addl %eax,%ebp + movl 24(%esi),%eax + adcl %edx,%ebx + movl 20(%edi),%edx + adcl $0,%ecx + + mull %edx + addl %eax,%ebp + movl 20(%esi),%eax + adcl %edx,%ebx + movl 24(%edi),%edx + adcl $0,%ecx + + mull %edx + addl %eax,%ebp + movl 16(%esi),%eax + adcl %edx,%ebx + movl 28(%edi),%edx + adcl $0,%ecx + + mull %edx + addl %eax,%ebp + movl 20(%esp),%eax + adcl %edx,%ebx + movl 20(%edi),%edx + adcl $0,%ecx + movl %ebp,44(%eax) + movl 28(%esi),%eax + + + xorl %ebp,%ebp + + mull %edx + addl %eax,%ebx + movl 24(%esi),%eax + adcl %edx,%ecx + movl 24(%edi),%edx + adcl $0,%ebp + + mull %edx + addl %eax,%ebx + movl 20(%esi),%eax + adcl %edx,%ecx + movl 28(%edi),%edx + adcl $0,%ebp + + mull %edx + addl %eax,%ebx + movl 20(%esp),%eax + adcl %edx,%ecx + movl 24(%edi),%edx + adcl $0,%ebp + movl %ebx,48(%eax) + movl 28(%esi),%eax + + + xorl %ebx,%ebx + + mull %edx + addl %eax,%ecx + movl 24(%esi),%eax + adcl %edx,%ebp + movl 28(%edi),%edx + adcl $0,%ebx + + mull %edx + addl %eax,%ecx + movl 20(%esp),%eax + adcl %edx,%ebp + movl 28(%edi),%edx + adcl $0,%ebx + movl %ecx,52(%eax) + movl 28(%esi),%eax + + + xorl %ecx,%ecx + + mull %edx + addl %eax,%ebp + movl 20(%esp),%eax + adcl %edx,%ebx + adcl $0,%ecx + movl %ebp,56(%eax) + + + movl %ebx,60(%eax) + popl %ebx + popl %ebp + popl %edi + popl %esi + ret +.size bn_mul_comba8,.-.L_bn_mul_comba8_begin +.globl bn_mul_comba4 +.type bn_mul_comba4,@function +.align 16 +bn_mul_comba4: +.L_bn_mul_comba4_begin: + pushl %esi + movl 12(%esp),%esi + pushl %edi + movl 20(%esp),%edi + pushl %ebp + pushl %ebx + xorl %ebx,%ebx + movl (%esi),%eax + xorl %ecx,%ecx + movl (%edi),%edx + + xorl %ebp,%ebp + + mull %edx + addl %eax,%ebx + movl 20(%esp),%eax + adcl %edx,%ecx + movl (%edi),%edx + adcl $0,%ebp + movl %ebx,(%eax) + movl 4(%esi),%eax + + + xorl %ebx,%ebx + + mull %edx + addl %eax,%ecx + movl (%esi),%eax + adcl %edx,%ebp + movl 4(%edi),%edx + adcl $0,%ebx + + mull %edx + addl %eax,%ecx + movl 20(%esp),%eax + adcl %edx,%ebp + movl (%edi),%edx + adcl $0,%ebx + movl %ecx,4(%eax) + movl 8(%esi),%eax + + + xorl %ecx,%ecx + + mull %edx + addl %eax,%ebp + movl 4(%esi),%eax + adcl %edx,%ebx + movl 4(%edi),%edx + adcl $0,%ecx + + mull %edx + addl %eax,%ebp + movl (%esi),%eax + adcl %edx,%ebx + movl 8(%edi),%edx + adcl $0,%ecx + + mull %edx + addl %eax,%ebp + movl 20(%esp),%eax + adcl %edx,%ebx + movl (%edi),%edx + adcl $0,%ecx + movl %ebp,8(%eax) + movl 12(%esi),%eax + + + xorl %ebp,%ebp + + mull %edx + addl %eax,%ebx + movl 8(%esi),%eax + adcl %edx,%ecx + movl 4(%edi),%edx + adcl $0,%ebp + + mull %edx + addl %eax,%ebx + movl 4(%esi),%eax + adcl %edx,%ecx + movl 8(%edi),%edx + adcl $0,%ebp + + mull %edx + addl %eax,%ebx + movl (%esi),%eax + adcl %edx,%ecx + movl 12(%edi),%edx + adcl $0,%ebp + + mull %edx + addl %eax,%ebx + movl 20(%esp),%eax + adcl %edx,%ecx + movl 4(%edi),%edx + adcl $0,%ebp + movl %ebx,12(%eax) + movl 12(%esi),%eax + + + xorl %ebx,%ebx + + mull %edx + addl %eax,%ecx + movl 8(%esi),%eax + adcl %edx,%ebp + movl 8(%edi),%edx + adcl $0,%ebx + + mull %edx + addl %eax,%ecx + movl 4(%esi),%eax + adcl %edx,%ebp + movl 12(%edi),%edx + adcl $0,%ebx + + mull %edx + addl %eax,%ecx + movl 20(%esp),%eax + adcl %edx,%ebp + movl 8(%edi),%edx + adcl $0,%ebx + movl %ecx,16(%eax) + movl 12(%esi),%eax + + + xorl %ecx,%ecx + + mull %edx + addl %eax,%ebp + movl 8(%esi),%eax + adcl %edx,%ebx + movl 12(%edi),%edx + adcl $0,%ecx + + mull %edx + addl %eax,%ebp + movl 20(%esp),%eax + adcl %edx,%ebx + movl 12(%edi),%edx + adcl $0,%ecx + movl %ebp,20(%eax) + movl 12(%esi),%eax + + + xorl %ebp,%ebp + + mull %edx + addl %eax,%ebx + movl 20(%esp),%eax + adcl %edx,%ecx + adcl $0,%ebp + movl %ebx,24(%eax) + + + movl %ecx,28(%eax) + popl %ebx + popl %ebp + popl %edi + popl %esi + ret +.size bn_mul_comba4,.-.L_bn_mul_comba4_begin +.globl bn_sqr_comba8 +.type bn_sqr_comba8,@function +.align 16 +bn_sqr_comba8: +.L_bn_sqr_comba8_begin: + pushl %esi + pushl %edi + pushl %ebp + pushl %ebx + movl 20(%esp),%edi + movl 24(%esp),%esi + xorl %ebx,%ebx + xorl %ecx,%ecx + movl (%esi),%eax + + xorl %ebp,%ebp + + mull %eax + addl %eax,%ebx + adcl %edx,%ecx + movl (%esi),%edx + adcl $0,%ebp + movl %ebx,(%edi) + movl 4(%esi),%eax + + + xorl %ebx,%ebx + + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ebx + addl %eax,%ecx + adcl %edx,%ebp + movl 8(%esi),%eax + adcl $0,%ebx + movl %ecx,4(%edi) + movl (%esi),%edx + + + xorl %ecx,%ecx + + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ecx + addl %eax,%ebp + adcl %edx,%ebx + movl 4(%esi),%eax + adcl $0,%ecx + + mull %eax + addl %eax,%ebp + adcl %edx,%ebx + movl (%esi),%edx + adcl $0,%ecx + movl %ebp,8(%edi) + movl 12(%esi),%eax + + + xorl %ebp,%ebp + + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ebp + addl %eax,%ebx + adcl %edx,%ecx + movl 8(%esi),%eax + adcl $0,%ebp + movl 4(%esi),%edx + + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ebp + addl %eax,%ebx + adcl %edx,%ecx + movl 16(%esi),%eax + adcl $0,%ebp + movl %ebx,12(%edi) + movl (%esi),%edx + + + xorl %ebx,%ebx + + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ebx + addl %eax,%ecx + adcl %edx,%ebp + movl 12(%esi),%eax + adcl $0,%ebx + movl 4(%esi),%edx + + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ebx + addl %eax,%ecx + adcl %edx,%ebp + movl 8(%esi),%eax + adcl $0,%ebx + + mull %eax + addl %eax,%ecx + adcl %edx,%ebp + movl (%esi),%edx + adcl $0,%ebx + movl %ecx,16(%edi) + movl 20(%esi),%eax + + + xorl %ecx,%ecx + + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ecx + addl %eax,%ebp + adcl %edx,%ebx + movl 16(%esi),%eax + adcl $0,%ecx + movl 4(%esi),%edx + + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ecx + addl %eax,%ebp + adcl %edx,%ebx + movl 12(%esi),%eax + adcl $0,%ecx + movl 8(%esi),%edx + + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ecx + addl %eax,%ebp + adcl %edx,%ebx + movl 24(%esi),%eax + adcl $0,%ecx + movl %ebp,20(%edi) + movl (%esi),%edx + + + xorl %ebp,%ebp + + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ebp + addl %eax,%ebx + adcl %edx,%ecx + movl 20(%esi),%eax + adcl $0,%ebp + movl 4(%esi),%edx + + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ebp + addl %eax,%ebx + adcl %edx,%ecx + movl 16(%esi),%eax + adcl $0,%ebp + movl 8(%esi),%edx + + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ebp + addl %eax,%ebx + adcl %edx,%ecx + movl 12(%esi),%eax + adcl $0,%ebp + + mull %eax + addl %eax,%ebx + adcl %edx,%ecx + movl (%esi),%edx + adcl $0,%ebp + movl %ebx,24(%edi) + movl 28(%esi),%eax + + + xorl %ebx,%ebx + + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ebx + addl %eax,%ecx + adcl %edx,%ebp + movl 24(%esi),%eax + adcl $0,%ebx + movl 4(%esi),%edx + + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ebx + addl %eax,%ecx + adcl %edx,%ebp + movl 20(%esi),%eax + adcl $0,%ebx + movl 8(%esi),%edx + + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ebx + addl %eax,%ecx + adcl %edx,%ebp + movl 16(%esi),%eax + adcl $0,%ebx + movl 12(%esi),%edx + + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ebx + addl %eax,%ecx + adcl %edx,%ebp + movl 28(%esi),%eax + adcl $0,%ebx + movl %ecx,28(%edi) + movl 4(%esi),%edx + + + xorl %ecx,%ecx + + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ecx + addl %eax,%ebp + adcl %edx,%ebx + movl 24(%esi),%eax + adcl $0,%ecx + movl 8(%esi),%edx + + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ecx + addl %eax,%ebp + adcl %edx,%ebx + movl 20(%esi),%eax + adcl $0,%ecx + movl 12(%esi),%edx + + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ecx + addl %eax,%ebp + adcl %edx,%ebx + movl 16(%esi),%eax + adcl $0,%ecx + + mull %eax + addl %eax,%ebp + adcl %edx,%ebx + movl 8(%esi),%edx + adcl $0,%ecx + movl %ebp,32(%edi) + movl 28(%esi),%eax + + + xorl %ebp,%ebp + + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ebp + addl %eax,%ebx + adcl %edx,%ecx + movl 24(%esi),%eax + adcl $0,%ebp + movl 12(%esi),%edx + + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ebp + addl %eax,%ebx + adcl %edx,%ecx + movl 20(%esi),%eax + adcl $0,%ebp + movl 16(%esi),%edx + + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ebp + addl %eax,%ebx + adcl %edx,%ecx + movl 28(%esi),%eax + adcl $0,%ebp + movl %ebx,36(%edi) + movl 12(%esi),%edx + + + xorl %ebx,%ebx + + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ebx + addl %eax,%ecx + adcl %edx,%ebp + movl 24(%esi),%eax + adcl $0,%ebx + movl 16(%esi),%edx + + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ebx + addl %eax,%ecx + adcl %edx,%ebp + movl 20(%esi),%eax + adcl $0,%ebx + + mull %eax + addl %eax,%ecx + adcl %edx,%ebp + movl 16(%esi),%edx + adcl $0,%ebx + movl %ecx,40(%edi) + movl 28(%esi),%eax + + + xorl %ecx,%ecx + + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ecx + addl %eax,%ebp + adcl %edx,%ebx + movl 24(%esi),%eax + adcl $0,%ecx + movl 20(%esi),%edx + + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ecx + addl %eax,%ebp + adcl %edx,%ebx + movl 28(%esi),%eax + adcl $0,%ecx + movl %ebp,44(%edi) + movl 20(%esi),%edx + + + xorl %ebp,%ebp + + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ebp + addl %eax,%ebx + adcl %edx,%ecx + movl 24(%esi),%eax + adcl $0,%ebp + + mull %eax + addl %eax,%ebx + adcl %edx,%ecx + movl 24(%esi),%edx + adcl $0,%ebp + movl %ebx,48(%edi) + movl 28(%esi),%eax + + + xorl %ebx,%ebx + + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ebx + addl %eax,%ecx + adcl %edx,%ebp + movl 28(%esi),%eax + adcl $0,%ebx + movl %ecx,52(%edi) + + + xorl %ecx,%ecx + + mull %eax + addl %eax,%ebp + adcl %edx,%ebx + adcl $0,%ecx + movl %ebp,56(%edi) + + movl %ebx,60(%edi) + popl %ebx + popl %ebp + popl %edi + popl %esi + ret +.size bn_sqr_comba8,.-.L_bn_sqr_comba8_begin +.globl bn_sqr_comba4 +.type bn_sqr_comba4,@function +.align 16 +bn_sqr_comba4: +.L_bn_sqr_comba4_begin: + pushl %esi + pushl %edi + pushl %ebp + pushl %ebx + movl 20(%esp),%edi + movl 24(%esp),%esi + xorl %ebx,%ebx + xorl %ecx,%ecx + movl (%esi),%eax + + xorl %ebp,%ebp + + mull %eax + addl %eax,%ebx + adcl %edx,%ecx + movl (%esi),%edx + adcl $0,%ebp + movl %ebx,(%edi) + movl 4(%esi),%eax + + + xorl %ebx,%ebx + + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ebx + addl %eax,%ecx + adcl %edx,%ebp + movl 8(%esi),%eax + adcl $0,%ebx + movl %ecx,4(%edi) + movl (%esi),%edx + + + xorl %ecx,%ecx + + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ecx + addl %eax,%ebp + adcl %edx,%ebx + movl 4(%esi),%eax + adcl $0,%ecx + + mull %eax + addl %eax,%ebp + adcl %edx,%ebx + movl (%esi),%edx + adcl $0,%ecx + movl %ebp,8(%edi) + movl 12(%esi),%eax + + + xorl %ebp,%ebp + + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ebp + addl %eax,%ebx + adcl %edx,%ecx + movl 8(%esi),%eax + adcl $0,%ebp + movl 4(%esi),%edx + + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ebp + addl %eax,%ebx + adcl %edx,%ecx + movl 12(%esi),%eax + adcl $0,%ebp + movl %ebx,12(%edi) + movl 4(%esi),%edx + + + xorl %ebx,%ebx + + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ebx + addl %eax,%ecx + adcl %edx,%ebp + movl 8(%esi),%eax + adcl $0,%ebx + + mull %eax + addl %eax,%ecx + adcl %edx,%ebp + movl 8(%esi),%edx + adcl $0,%ebx + movl %ecx,16(%edi) + movl 12(%esi),%eax + + + xorl %ecx,%ecx + + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ecx + addl %eax,%ebp + adcl %edx,%ebx + movl 12(%esi),%eax + adcl $0,%ecx + movl %ebp,20(%edi) + + + xorl %ebp,%ebp + + mull %eax + addl %eax,%ebx + adcl %edx,%ecx + adcl $0,%ebp + movl %ebx,24(%edi) + + movl %ecx,28(%edi) + popl %ebx + popl %ebp + popl %edi + popl %esi + ret +.size bn_sqr_comba4,.-.L_bn_sqr_comba4_begin diff --git a/deps/openssl/asm_obsolete/x86-elf-gas/bn/x86-gf2m.s b/deps/openssl/asm_obsolete/x86-elf-gas/bn/x86-gf2m.s new file mode 100644 index 00000000000000..d8fff7c102714d --- /dev/null +++ b/deps/openssl/asm_obsolete/x86-elf-gas/bn/x86-gf2m.s @@ -0,0 +1,343 @@ +.file "../openssl/crypto/bn/asm/x86-gf2m.s" +.text +.type _mul_1x1_mmx,@function +.align 16 +_mul_1x1_mmx: + subl $36,%esp + movl %eax,%ecx + leal (%eax,%eax,1),%edx + andl $1073741823,%ecx + leal (%edx,%edx,1),%ebp + movl $0,(%esp) + andl $2147483647,%edx + movd %eax,%mm2 + movd %ebx,%mm3 + movl %ecx,4(%esp) + xorl %edx,%ecx + pxor %mm5,%mm5 + pxor %mm4,%mm4 + movl %edx,8(%esp) + xorl %ebp,%edx + movl %ecx,12(%esp) + pcmpgtd %mm2,%mm5 + paddd %mm2,%mm2 + xorl %edx,%ecx + movl %ebp,16(%esp) + xorl %edx,%ebp + pand %mm3,%mm5 + pcmpgtd %mm2,%mm4 + movl %ecx,20(%esp) + xorl %ecx,%ebp + psllq $31,%mm5 + pand %mm3,%mm4 + movl %edx,24(%esp) + movl $7,%esi + movl %ebp,28(%esp) + movl %esi,%ebp + andl %ebx,%esi + shrl $3,%ebx + movl %ebp,%edi + psllq $30,%mm4 + andl %ebx,%edi + shrl $3,%ebx + movd (%esp,%esi,4),%mm0 + movl %ebp,%esi + andl %ebx,%esi + shrl $3,%ebx + movd (%esp,%edi,4),%mm2 + movl %ebp,%edi + psllq $3,%mm2 + andl %ebx,%edi + shrl $3,%ebx + pxor %mm2,%mm0 + movd (%esp,%esi,4),%mm1 + movl %ebp,%esi + psllq $6,%mm1 + andl %ebx,%esi + shrl $3,%ebx + pxor %mm1,%mm0 + movd (%esp,%edi,4),%mm2 + movl %ebp,%edi + psllq $9,%mm2 + andl %ebx,%edi + shrl $3,%ebx + pxor %mm2,%mm0 + movd (%esp,%esi,4),%mm1 + movl %ebp,%esi + psllq $12,%mm1 + andl %ebx,%esi + shrl $3,%ebx + pxor %mm1,%mm0 + movd (%esp,%edi,4),%mm2 + movl %ebp,%edi + psllq $15,%mm2 + andl %ebx,%edi + shrl $3,%ebx + pxor %mm2,%mm0 + movd (%esp,%esi,4),%mm1 + movl %ebp,%esi + psllq $18,%mm1 + andl %ebx,%esi + shrl $3,%ebx + pxor %mm1,%mm0 + movd (%esp,%edi,4),%mm2 + movl %ebp,%edi + psllq $21,%mm2 + andl %ebx,%edi + shrl $3,%ebx + pxor %mm2,%mm0 + movd (%esp,%esi,4),%mm1 + movl %ebp,%esi + psllq $24,%mm1 + andl %ebx,%esi + shrl $3,%ebx + pxor %mm1,%mm0 + movd (%esp,%edi,4),%mm2 + pxor %mm4,%mm0 + psllq $27,%mm2 + pxor %mm2,%mm0 + movd (%esp,%esi,4),%mm1 + pxor %mm5,%mm0 + psllq $30,%mm1 + addl $36,%esp + pxor %mm1,%mm0 + ret +.size _mul_1x1_mmx,.-_mul_1x1_mmx +.type _mul_1x1_ialu,@function +.align 16 +_mul_1x1_ialu: + subl $36,%esp + movl %eax,%ecx + leal (%eax,%eax,1),%edx + leal (,%eax,4),%ebp + andl $1073741823,%ecx + leal (%eax,%eax,1),%edi + sarl $31,%eax + movl $0,(%esp) + andl $2147483647,%edx + movl %ecx,4(%esp) + xorl %edx,%ecx + movl %edx,8(%esp) + xorl %ebp,%edx + movl %ecx,12(%esp) + xorl %edx,%ecx + movl %ebp,16(%esp) + xorl %edx,%ebp + movl %ecx,20(%esp) + xorl %ecx,%ebp + sarl $31,%edi + andl %ebx,%eax + movl %edx,24(%esp) + andl %ebx,%edi + movl %ebp,28(%esp) + movl %eax,%edx + shll $31,%eax + movl %edi,%ecx + shrl $1,%edx + movl $7,%esi + shll $30,%edi + andl %ebx,%esi + shrl $2,%ecx + xorl %edi,%eax + shrl $3,%ebx + movl $7,%edi + andl %ebx,%edi + shrl $3,%ebx + xorl %ecx,%edx + xorl (%esp,%esi,4),%eax + movl $7,%esi + andl %ebx,%esi + shrl $3,%ebx + movl (%esp,%edi,4),%ebp + movl $7,%edi + movl %ebp,%ecx + shll $3,%ebp + andl %ebx,%edi + shrl $29,%ecx + xorl %ebp,%eax + shrl $3,%ebx + xorl %ecx,%edx + movl (%esp,%esi,4),%ecx + movl $7,%esi + movl %ecx,%ebp + shll $6,%ecx + andl %ebx,%esi + shrl $26,%ebp + xorl %ecx,%eax + shrl $3,%ebx + xorl %ebp,%edx + movl (%esp,%edi,4),%ebp + movl $7,%edi + movl %ebp,%ecx + shll $9,%ebp + andl %ebx,%edi + shrl $23,%ecx + xorl %ebp,%eax + shrl $3,%ebx + xorl %ecx,%edx + movl (%esp,%esi,4),%ecx + movl $7,%esi + movl %ecx,%ebp + shll $12,%ecx + andl %ebx,%esi + shrl $20,%ebp + xorl %ecx,%eax + shrl $3,%ebx + xorl %ebp,%edx + movl (%esp,%edi,4),%ebp + movl $7,%edi + movl %ebp,%ecx + shll $15,%ebp + andl %ebx,%edi + shrl $17,%ecx + xorl %ebp,%eax + shrl $3,%ebx + xorl %ecx,%edx + movl (%esp,%esi,4),%ecx + movl $7,%esi + movl %ecx,%ebp + shll $18,%ecx + andl %ebx,%esi + shrl $14,%ebp + xorl %ecx,%eax + shrl $3,%ebx + xorl %ebp,%edx + movl (%esp,%edi,4),%ebp + movl $7,%edi + movl %ebp,%ecx + shll $21,%ebp + andl %ebx,%edi + shrl $11,%ecx + xorl %ebp,%eax + shrl $3,%ebx + xorl %ecx,%edx + movl (%esp,%esi,4),%ecx + movl $7,%esi + movl %ecx,%ebp + shll $24,%ecx + andl %ebx,%esi + shrl $8,%ebp + xorl %ecx,%eax + shrl $3,%ebx + xorl %ebp,%edx + movl (%esp,%edi,4),%ebp + movl %ebp,%ecx + shll $27,%ebp + movl (%esp,%esi,4),%edi + shrl $5,%ecx + movl %edi,%esi + xorl %ebp,%eax + shll $30,%edi + xorl %ecx,%edx + shrl $2,%esi + xorl %edi,%eax + xorl %esi,%edx + addl $36,%esp + ret +.size _mul_1x1_ialu,.-_mul_1x1_ialu +.globl bn_GF2m_mul_2x2 +.type bn_GF2m_mul_2x2,@function +.align 16 +bn_GF2m_mul_2x2: +.L_bn_GF2m_mul_2x2_begin: + leal OPENSSL_ia32cap_P,%edx + movl (%edx),%eax + movl 4(%edx),%edx + testl $8388608,%eax + jz .L000ialu + testl $16777216,%eax + jz .L001mmx + testl $2,%edx + jz .L001mmx + movups 8(%esp),%xmm0 + shufps $177,%xmm0,%xmm0 +.byte 102,15,58,68,192,1 + movl 4(%esp),%eax + movups %xmm0,(%eax) + ret +.align 16 +.L001mmx: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 24(%esp),%eax + movl 32(%esp),%ebx + call _mul_1x1_mmx + movq %mm0,%mm7 + movl 28(%esp),%eax + movl 36(%esp),%ebx + call _mul_1x1_mmx + movq %mm0,%mm6 + movl 24(%esp),%eax + movl 32(%esp),%ebx + xorl 28(%esp),%eax + xorl 36(%esp),%ebx + call _mul_1x1_mmx + pxor %mm7,%mm0 + movl 20(%esp),%eax + pxor %mm6,%mm0 + movq %mm0,%mm2 + psllq $32,%mm0 + popl %edi + psrlq $32,%mm2 + popl %esi + pxor %mm6,%mm0 + popl %ebx + pxor %mm7,%mm2 + movq %mm0,(%eax) + popl %ebp + movq %mm2,8(%eax) + emms + ret +.align 16 +.L000ialu: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + subl $20,%esp + movl 44(%esp),%eax + movl 52(%esp),%ebx + call _mul_1x1_ialu + movl %eax,8(%esp) + movl %edx,12(%esp) + movl 48(%esp),%eax + movl 56(%esp),%ebx + call _mul_1x1_ialu + movl %eax,(%esp) + movl %edx,4(%esp) + movl 44(%esp),%eax + movl 52(%esp),%ebx + xorl 48(%esp),%eax + xorl 56(%esp),%ebx + call _mul_1x1_ialu + movl 40(%esp),%ebp + movl (%esp),%ebx + movl 4(%esp),%ecx + movl 8(%esp),%edi + movl 12(%esp),%esi + xorl %edx,%eax + xorl %ecx,%edx + xorl %ebx,%eax + movl %ebx,(%ebp) + xorl %edi,%edx + movl %esi,12(%ebp) + xorl %esi,%eax + addl $20,%esp + xorl %esi,%edx + popl %edi + xorl %edx,%eax + popl %esi + movl %edx,8(%ebp) + popl %ebx + movl %eax,4(%ebp) + popl %ebp + ret +.size bn_GF2m_mul_2x2,.-.L_bn_GF2m_mul_2x2_begin +.byte 71,70,40,50,94,109,41,32,77,117,108,116,105,112,108,105 +.byte 99,97,116,105,111,110,32,102,111,114,32,120,56,54,44,32 +.byte 67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97 +.byte 112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103 +.byte 62,0 +.comm OPENSSL_ia32cap_P,16,4 diff --git a/deps/openssl/asm_obsolete/x86-elf-gas/bn/x86-mont.s b/deps/openssl/asm_obsolete/x86-elf-gas/bn/x86-mont.s new file mode 100644 index 00000000000000..1d815a0472f562 --- /dev/null +++ b/deps/openssl/asm_obsolete/x86-elf-gas/bn/x86-mont.s @@ -0,0 +1,456 @@ +.file "../openssl/crypto/bn/asm/x86-mont.s" +.text +.globl bn_mul_mont +.type bn_mul_mont,@function +.align 16 +bn_mul_mont: +.L_bn_mul_mont_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + xorl %eax,%eax + movl 40(%esp),%edi + cmpl $4,%edi + jl .L000just_leave + leal 20(%esp),%esi + leal 24(%esp),%edx + movl %esp,%ebp + addl $2,%edi + negl %edi + leal -32(%esp,%edi,4),%esp + negl %edi + movl %esp,%eax + subl %edx,%eax + andl $2047,%eax + subl %eax,%esp + xorl %esp,%edx + andl $2048,%edx + xorl $2048,%edx + subl %edx,%esp + andl $-64,%esp + movl (%esi),%eax + movl 4(%esi),%ebx + movl 8(%esi),%ecx + movl 12(%esi),%edx + movl 16(%esi),%esi + movl (%esi),%esi + movl %eax,4(%esp) + movl %ebx,8(%esp) + movl %ecx,12(%esp) + movl %edx,16(%esp) + movl %esi,20(%esp) + leal -3(%edi),%ebx + movl %ebp,24(%esp) + leal OPENSSL_ia32cap_P,%eax + btl $26,(%eax) + jnc .L001non_sse2 + movl $-1,%eax + movd %eax,%mm7 + movl 8(%esp),%esi + movl 12(%esp),%edi + movl 16(%esp),%ebp + xorl %edx,%edx + xorl %ecx,%ecx + movd (%edi),%mm4 + movd (%esi),%mm5 + movd (%ebp),%mm3 + pmuludq %mm4,%mm5 + movq %mm5,%mm2 + movq %mm5,%mm0 + pand %mm7,%mm0 + pmuludq 20(%esp),%mm5 + pmuludq %mm5,%mm3 + paddq %mm0,%mm3 + movd 4(%ebp),%mm1 + movd 4(%esi),%mm0 + psrlq $32,%mm2 + psrlq $32,%mm3 + incl %ecx +.align 16 +.L0021st: + pmuludq %mm4,%mm0 + pmuludq %mm5,%mm1 + paddq %mm0,%mm2 + paddq %mm1,%mm3 + movq %mm2,%mm0 + pand %mm7,%mm0 + movd 4(%ebp,%ecx,4),%mm1 + paddq %mm0,%mm3 + movd 4(%esi,%ecx,4),%mm0 + psrlq $32,%mm2 + movd %mm3,28(%esp,%ecx,4) + psrlq $32,%mm3 + leal 1(%ecx),%ecx + cmpl %ebx,%ecx + jl .L0021st + pmuludq %mm4,%mm0 + pmuludq %mm5,%mm1 + paddq %mm0,%mm2 + paddq %mm1,%mm3 + movq %mm2,%mm0 + pand %mm7,%mm0 + paddq %mm0,%mm3 + movd %mm3,28(%esp,%ecx,4) + psrlq $32,%mm2 + psrlq $32,%mm3 + paddq %mm2,%mm3 + movq %mm3,32(%esp,%ebx,4) + incl %edx +.L003outer: + xorl %ecx,%ecx + movd (%edi,%edx,4),%mm4 + movd (%esi),%mm5 + movd 32(%esp),%mm6 + movd (%ebp),%mm3 + pmuludq %mm4,%mm5 + paddq %mm6,%mm5 + movq %mm5,%mm0 + movq %mm5,%mm2 + pand %mm7,%mm0 + pmuludq 20(%esp),%mm5 + pmuludq %mm5,%mm3 + paddq %mm0,%mm3 + movd 36(%esp),%mm6 + movd 4(%ebp),%mm1 + movd 4(%esi),%mm0 + psrlq $32,%mm2 + psrlq $32,%mm3 + paddq %mm6,%mm2 + incl %ecx + decl %ebx +.L004inner: + pmuludq %mm4,%mm0 + pmuludq %mm5,%mm1 + paddq %mm0,%mm2 + paddq %mm1,%mm3 + movq %mm2,%mm0 + movd 36(%esp,%ecx,4),%mm6 + pand %mm7,%mm0 + movd 4(%ebp,%ecx,4),%mm1 + paddq %mm0,%mm3 + movd 4(%esi,%ecx,4),%mm0 + psrlq $32,%mm2 + movd %mm3,28(%esp,%ecx,4) + psrlq $32,%mm3 + paddq %mm6,%mm2 + decl %ebx + leal 1(%ecx),%ecx + jnz .L004inner + movl %ecx,%ebx + pmuludq %mm4,%mm0 + pmuludq %mm5,%mm1 + paddq %mm0,%mm2 + paddq %mm1,%mm3 + movq %mm2,%mm0 + pand %mm7,%mm0 + paddq %mm0,%mm3 + movd %mm3,28(%esp,%ecx,4) + psrlq $32,%mm2 + psrlq $32,%mm3 + movd 36(%esp,%ebx,4),%mm6 + paddq %mm2,%mm3 + paddq %mm6,%mm3 + movq %mm3,32(%esp,%ebx,4) + leal 1(%edx),%edx + cmpl %ebx,%edx + jle .L003outer + emms + jmp .L005common_tail +.align 16 +.L001non_sse2: + movl 8(%esp),%esi + leal 1(%ebx),%ebp + movl 12(%esp),%edi + xorl %ecx,%ecx + movl %esi,%edx + andl $1,%ebp + subl %edi,%edx + leal 4(%edi,%ebx,4),%eax + orl %edx,%ebp + movl (%edi),%edi + jz .L006bn_sqr_mont + movl %eax,28(%esp) + movl (%esi),%eax + xorl %edx,%edx +.align 16 +.L007mull: + movl %edx,%ebp + mull %edi + addl %eax,%ebp + leal 1(%ecx),%ecx + adcl $0,%edx + movl (%esi,%ecx,4),%eax + cmpl %ebx,%ecx + movl %ebp,28(%esp,%ecx,4) + jl .L007mull + movl %edx,%ebp + mull %edi + movl 20(%esp),%edi + addl %ebp,%eax + movl 16(%esp),%esi + adcl $0,%edx + imull 32(%esp),%edi + movl %eax,32(%esp,%ebx,4) + xorl %ecx,%ecx + movl %edx,36(%esp,%ebx,4) + movl %ecx,40(%esp,%ebx,4) + movl (%esi),%eax + mull %edi + addl 32(%esp),%eax + movl 4(%esi),%eax + adcl $0,%edx + incl %ecx + jmp .L0082ndmadd +.align 16 +.L0091stmadd: + movl %edx,%ebp + mull %edi + addl 32(%esp,%ecx,4),%ebp + leal 1(%ecx),%ecx + adcl $0,%edx + addl %eax,%ebp + movl (%esi,%ecx,4),%eax + adcl $0,%edx + cmpl %ebx,%ecx + movl %ebp,28(%esp,%ecx,4) + jl .L0091stmadd + movl %edx,%ebp + mull %edi + addl 32(%esp,%ebx,4),%eax + movl 20(%esp),%edi + adcl $0,%edx + movl 16(%esp),%esi + addl %eax,%ebp + adcl $0,%edx + imull 32(%esp),%edi + xorl %ecx,%ecx + addl 36(%esp,%ebx,4),%edx + movl %ebp,32(%esp,%ebx,4) + adcl $0,%ecx + movl (%esi),%eax + movl %edx,36(%esp,%ebx,4) + movl %ecx,40(%esp,%ebx,4) + mull %edi + addl 32(%esp),%eax + movl 4(%esi),%eax + adcl $0,%edx + movl $1,%ecx +.align 16 +.L0082ndmadd: + movl %edx,%ebp + mull %edi + addl 32(%esp,%ecx,4),%ebp + leal 1(%ecx),%ecx + adcl $0,%edx + addl %eax,%ebp + movl (%esi,%ecx,4),%eax + adcl $0,%edx + cmpl %ebx,%ecx + movl %ebp,24(%esp,%ecx,4) + jl .L0082ndmadd + movl %edx,%ebp + mull %edi + addl 32(%esp,%ebx,4),%ebp + adcl $0,%edx + addl %eax,%ebp + adcl $0,%edx + movl %ebp,28(%esp,%ebx,4) + xorl %eax,%eax + movl 12(%esp),%ecx + addl 36(%esp,%ebx,4),%edx + adcl 40(%esp,%ebx,4),%eax + leal 4(%ecx),%ecx + movl %edx,32(%esp,%ebx,4) + cmpl 28(%esp),%ecx + movl %eax,36(%esp,%ebx,4) + je .L005common_tail + movl (%ecx),%edi + movl 8(%esp),%esi + movl %ecx,12(%esp) + xorl %ecx,%ecx + xorl %edx,%edx + movl (%esi),%eax + jmp .L0091stmadd +.align 16 +.L006bn_sqr_mont: + movl %ebx,(%esp) + movl %ecx,12(%esp) + movl %edi,%eax + mull %edi + movl %eax,32(%esp) + movl %edx,%ebx + shrl $1,%edx + andl $1,%ebx + incl %ecx +.align 16 +.L010sqr: + movl (%esi,%ecx,4),%eax + movl %edx,%ebp + mull %edi + addl %ebp,%eax + leal 1(%ecx),%ecx + adcl $0,%edx + leal (%ebx,%eax,2),%ebp + shrl $31,%eax + cmpl (%esp),%ecx + movl %eax,%ebx + movl %ebp,28(%esp,%ecx,4) + jl .L010sqr + movl (%esi,%ecx,4),%eax + movl %edx,%ebp + mull %edi + addl %ebp,%eax + movl 20(%esp),%edi + adcl $0,%edx + movl 16(%esp),%esi + leal (%ebx,%eax,2),%ebp + imull 32(%esp),%edi + shrl $31,%eax + movl %ebp,32(%esp,%ecx,4) + leal (%eax,%edx,2),%ebp + movl (%esi),%eax + shrl $31,%edx + movl %ebp,36(%esp,%ecx,4) + movl %edx,40(%esp,%ecx,4) + mull %edi + addl 32(%esp),%eax + movl %ecx,%ebx + adcl $0,%edx + movl 4(%esi),%eax + movl $1,%ecx +.align 16 +.L0113rdmadd: + movl %edx,%ebp + mull %edi + addl 32(%esp,%ecx,4),%ebp + adcl $0,%edx + addl %eax,%ebp + movl 4(%esi,%ecx,4),%eax + adcl $0,%edx + movl %ebp,28(%esp,%ecx,4) + movl %edx,%ebp + mull %edi + addl 36(%esp,%ecx,4),%ebp + leal 2(%ecx),%ecx + adcl $0,%edx + addl %eax,%ebp + movl (%esi,%ecx,4),%eax + adcl $0,%edx + cmpl %ebx,%ecx + movl %ebp,24(%esp,%ecx,4) + jl .L0113rdmadd + movl %edx,%ebp + mull %edi + addl 32(%esp,%ebx,4),%ebp + adcl $0,%edx + addl %eax,%ebp + adcl $0,%edx + movl %ebp,28(%esp,%ebx,4) + movl 12(%esp),%ecx + xorl %eax,%eax + movl 8(%esp),%esi + addl 36(%esp,%ebx,4),%edx + adcl 40(%esp,%ebx,4),%eax + movl %edx,32(%esp,%ebx,4) + cmpl %ebx,%ecx + movl %eax,36(%esp,%ebx,4) + je .L005common_tail + movl 4(%esi,%ecx,4),%edi + leal 1(%ecx),%ecx + movl %edi,%eax + movl %ecx,12(%esp) + mull %edi + addl 32(%esp,%ecx,4),%eax + adcl $0,%edx + movl %eax,32(%esp,%ecx,4) + xorl %ebp,%ebp + cmpl %ebx,%ecx + leal 1(%ecx),%ecx + je .L012sqrlast + movl %edx,%ebx + shrl $1,%edx + andl $1,%ebx +.align 16 +.L013sqradd: + movl (%esi,%ecx,4),%eax + movl %edx,%ebp + mull %edi + addl %ebp,%eax + leal (%eax,%eax,1),%ebp + adcl $0,%edx + shrl $31,%eax + addl 32(%esp,%ecx,4),%ebp + leal 1(%ecx),%ecx + adcl $0,%eax + addl %ebx,%ebp + adcl $0,%eax + cmpl (%esp),%ecx + movl %ebp,28(%esp,%ecx,4) + movl %eax,%ebx + jle .L013sqradd + movl %edx,%ebp + addl %edx,%edx + shrl $31,%ebp + addl %ebx,%edx + adcl $0,%ebp +.L012sqrlast: + movl 20(%esp),%edi + movl 16(%esp),%esi + imull 32(%esp),%edi + addl 32(%esp,%ecx,4),%edx + movl (%esi),%eax + adcl $0,%ebp + movl %edx,32(%esp,%ecx,4) + movl %ebp,36(%esp,%ecx,4) + mull %edi + addl 32(%esp),%eax + leal -1(%ecx),%ebx + adcl $0,%edx + movl $1,%ecx + movl 4(%esi),%eax + jmp .L0113rdmadd +.align 16 +.L005common_tail: + movl 16(%esp),%ebp + movl 4(%esp),%edi + leal 32(%esp),%esi + movl (%esi),%eax + movl %ebx,%ecx + xorl %edx,%edx +.align 16 +.L014sub: + sbbl (%ebp,%edx,4),%eax + movl %eax,(%edi,%edx,4) + decl %ecx + movl 4(%esi,%edx,4),%eax + leal 1(%edx),%edx + jge .L014sub + sbbl $0,%eax + andl %eax,%esi + notl %eax + movl %edi,%ebp + andl %eax,%ebp + orl %ebp,%esi +.align 16 +.L015copy: + movl (%esi,%ebx,4),%eax + movl %eax,(%edi,%ebx,4) + movl %ecx,32(%esp,%ebx,4) + decl %ebx + jge .L015copy + movl 24(%esp),%esp + movl $1,%eax +.L000just_leave: + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.size bn_mul_mont,.-.L_bn_mul_mont_begin +.byte 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105 +.byte 112,108,105,99,97,116,105,111,110,32,102,111,114,32,120,56 +.byte 54,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121 +.byte 32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46 +.byte 111,114,103,62,0 +.comm OPENSSL_ia32cap_P,16,4 diff --git a/deps/openssl/asm_obsolete/x86-elf-gas/camellia/cmll-x86.s b/deps/openssl/asm_obsolete/x86-elf-gas/camellia/cmll-x86.s new file mode 100644 index 00000000000000..5c87910e34eb36 --- /dev/null +++ b/deps/openssl/asm_obsolete/x86-elf-gas/camellia/cmll-x86.s @@ -0,0 +1,2375 @@ +.file "cmll-586.s" +.text +.globl Camellia_EncryptBlock_Rounds +.type Camellia_EncryptBlock_Rounds,@function +.align 16 +Camellia_EncryptBlock_Rounds: +.L_Camellia_EncryptBlock_Rounds_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 20(%esp),%eax + movl 24(%esp),%esi + movl 28(%esp),%edi + movl %esp,%ebx + subl $28,%esp + andl $-64,%esp + leal -127(%edi),%ecx + subl %esp,%ecx + negl %ecx + andl $960,%ecx + subl %ecx,%esp + addl $4,%esp + shll $6,%eax + leal (%edi,%eax,1),%eax + movl %ebx,20(%esp) + movl %eax,16(%esp) + call .L000pic_point +.L000pic_point: + popl %ebp + leal .LCamellia_SBOX-.L000pic_point(%ebp),%ebp + movl (%esi),%eax + movl 4(%esi),%ebx + movl 8(%esi),%ecx + bswap %eax + movl 12(%esi),%edx + bswap %ebx + bswap %ecx + bswap %edx + call _x86_Camellia_encrypt + movl 20(%esp),%esp + bswap %eax + movl 32(%esp),%esi + bswap %ebx + bswap %ecx + bswap %edx + movl %eax,(%esi) + movl %ebx,4(%esi) + movl %ecx,8(%esi) + movl %edx,12(%esi) + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.size Camellia_EncryptBlock_Rounds,.-.L_Camellia_EncryptBlock_Rounds_begin +.globl Camellia_EncryptBlock +.type Camellia_EncryptBlock,@function +.align 16 +Camellia_EncryptBlock: +.L_Camellia_EncryptBlock_begin: + movl $128,%eax + subl 4(%esp),%eax + movl $3,%eax + adcl $0,%eax + movl %eax,4(%esp) + jmp .L_Camellia_EncryptBlock_Rounds_begin +.size Camellia_EncryptBlock,.-.L_Camellia_EncryptBlock_begin +.globl Camellia_encrypt +.type Camellia_encrypt,@function +.align 16 +Camellia_encrypt: +.L_Camellia_encrypt_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 20(%esp),%esi + movl 28(%esp),%edi + movl %esp,%ebx + subl $28,%esp + andl $-64,%esp + movl 272(%edi),%eax + leal -127(%edi),%ecx + subl %esp,%ecx + negl %ecx + andl $960,%ecx + subl %ecx,%esp + addl $4,%esp + shll $6,%eax + leal (%edi,%eax,1),%eax + movl %ebx,20(%esp) + movl %eax,16(%esp) + call .L001pic_point +.L001pic_point: + popl %ebp + leal .LCamellia_SBOX-.L001pic_point(%ebp),%ebp + movl (%esi),%eax + movl 4(%esi),%ebx + movl 8(%esi),%ecx + bswap %eax + movl 12(%esi),%edx + bswap %ebx + bswap %ecx + bswap %edx + call _x86_Camellia_encrypt + movl 20(%esp),%esp + bswap %eax + movl 24(%esp),%esi + bswap %ebx + bswap %ecx + bswap %edx + movl %eax,(%esi) + movl %ebx,4(%esi) + movl %ecx,8(%esi) + movl %edx,12(%esi) + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.size Camellia_encrypt,.-.L_Camellia_encrypt_begin +.type _x86_Camellia_encrypt,@function +.align 16 +_x86_Camellia_encrypt: + xorl (%edi),%eax + xorl 4(%edi),%ebx + xorl 8(%edi),%ecx + xorl 12(%edi),%edx + movl 16(%edi),%esi + movl %eax,4(%esp) + movl %ebx,8(%esp) + movl %ecx,12(%esp) + movl %edx,16(%esp) +.align 16 +.L002loop: + xorl %esi,%eax + xorl 20(%edi),%ebx + movzbl %ah,%esi + movl 2052(%ebp,%esi,8),%edx + movzbl %al,%esi + xorl 4(%ebp,%esi,8),%edx + shrl $16,%eax + movzbl %bl,%esi + movl (%ebp,%esi,8),%ecx + movzbl %ah,%esi + xorl (%ebp,%esi,8),%edx + movzbl %bh,%esi + xorl 4(%ebp,%esi,8),%ecx + shrl $16,%ebx + movzbl %al,%eax + xorl 2048(%ebp,%eax,8),%edx + movzbl %bh,%esi + movl 16(%esp),%eax + xorl %edx,%ecx + rorl $8,%edx + xorl 2048(%ebp,%esi,8),%ecx + movzbl %bl,%esi + movl 12(%esp),%ebx + xorl %eax,%edx + xorl 2052(%ebp,%esi,8),%ecx + movl 24(%edi),%esi + xorl %ecx,%edx + movl %edx,16(%esp) + xorl %ebx,%ecx + movl %ecx,12(%esp) + xorl %esi,%ecx + xorl 28(%edi),%edx + movzbl %ch,%esi + movl 2052(%ebp,%esi,8),%ebx + movzbl %cl,%esi + xorl 4(%ebp,%esi,8),%ebx + shrl $16,%ecx + movzbl %dl,%esi + movl (%ebp,%esi,8),%eax + movzbl %ch,%esi + xorl (%ebp,%esi,8),%ebx + movzbl %dh,%esi + xorl 4(%ebp,%esi,8),%eax + shrl $16,%edx + movzbl %cl,%ecx + xorl 2048(%ebp,%ecx,8),%ebx + movzbl %dh,%esi + movl 8(%esp),%ecx + xorl %ebx,%eax + rorl $8,%ebx + xorl 2048(%ebp,%esi,8),%eax + movzbl %dl,%esi + movl 4(%esp),%edx + xorl %ecx,%ebx + xorl 2052(%ebp,%esi,8),%eax + movl 32(%edi),%esi + xorl %eax,%ebx + movl %ebx,8(%esp) + xorl %edx,%eax + movl %eax,4(%esp) + xorl %esi,%eax + xorl 36(%edi),%ebx + movzbl %ah,%esi + movl 2052(%ebp,%esi,8),%edx + movzbl %al,%esi + xorl 4(%ebp,%esi,8),%edx + shrl $16,%eax + movzbl %bl,%esi + movl (%ebp,%esi,8),%ecx + movzbl %ah,%esi + xorl (%ebp,%esi,8),%edx + movzbl %bh,%esi + xorl 4(%ebp,%esi,8),%ecx + shrl $16,%ebx + movzbl %al,%eax + xorl 2048(%ebp,%eax,8),%edx + movzbl %bh,%esi + movl 16(%esp),%eax + xorl %edx,%ecx + rorl $8,%edx + xorl 2048(%ebp,%esi,8),%ecx + movzbl %bl,%esi + movl 12(%esp),%ebx + xorl %eax,%edx + xorl 2052(%ebp,%esi,8),%ecx + movl 40(%edi),%esi + xorl %ecx,%edx + movl %edx,16(%esp) + xorl %ebx,%ecx + movl %ecx,12(%esp) + xorl %esi,%ecx + xorl 44(%edi),%edx + movzbl %ch,%esi + movl 2052(%ebp,%esi,8),%ebx + movzbl %cl,%esi + xorl 4(%ebp,%esi,8),%ebx + shrl $16,%ecx + movzbl %dl,%esi + movl (%ebp,%esi,8),%eax + movzbl %ch,%esi + xorl (%ebp,%esi,8),%ebx + movzbl %dh,%esi + xorl 4(%ebp,%esi,8),%eax + shrl $16,%edx + movzbl %cl,%ecx + xorl 2048(%ebp,%ecx,8),%ebx + movzbl %dh,%esi + movl 8(%esp),%ecx + xorl %ebx,%eax + rorl $8,%ebx + xorl 2048(%ebp,%esi,8),%eax + movzbl %dl,%esi + movl 4(%esp),%edx + xorl %ecx,%ebx + xorl 2052(%ebp,%esi,8),%eax + movl 48(%edi),%esi + xorl %eax,%ebx + movl %ebx,8(%esp) + xorl %edx,%eax + movl %eax,4(%esp) + xorl %esi,%eax + xorl 52(%edi),%ebx + movzbl %ah,%esi + movl 2052(%ebp,%esi,8),%edx + movzbl %al,%esi + xorl 4(%ebp,%esi,8),%edx + shrl $16,%eax + movzbl %bl,%esi + movl (%ebp,%esi,8),%ecx + movzbl %ah,%esi + xorl (%ebp,%esi,8),%edx + movzbl %bh,%esi + xorl 4(%ebp,%esi,8),%ecx + shrl $16,%ebx + movzbl %al,%eax + xorl 2048(%ebp,%eax,8),%edx + movzbl %bh,%esi + movl 16(%esp),%eax + xorl %edx,%ecx + rorl $8,%edx + xorl 2048(%ebp,%esi,8),%ecx + movzbl %bl,%esi + movl 12(%esp),%ebx + xorl %eax,%edx + xorl 2052(%ebp,%esi,8),%ecx + movl 56(%edi),%esi + xorl %ecx,%edx + movl %edx,16(%esp) + xorl %ebx,%ecx + movl %ecx,12(%esp) + xorl %esi,%ecx + xorl 60(%edi),%edx + movzbl %ch,%esi + movl 2052(%ebp,%esi,8),%ebx + movzbl %cl,%esi + xorl 4(%ebp,%esi,8),%ebx + shrl $16,%ecx + movzbl %dl,%esi + movl (%ebp,%esi,8),%eax + movzbl %ch,%esi + xorl (%ebp,%esi,8),%ebx + movzbl %dh,%esi + xorl 4(%ebp,%esi,8),%eax + shrl $16,%edx + movzbl %cl,%ecx + xorl 2048(%ebp,%ecx,8),%ebx + movzbl %dh,%esi + movl 8(%esp),%ecx + xorl %ebx,%eax + rorl $8,%ebx + xorl 2048(%ebp,%esi,8),%eax + movzbl %dl,%esi + movl 4(%esp),%edx + xorl %ecx,%ebx + xorl 2052(%ebp,%esi,8),%eax + movl 64(%edi),%esi + xorl %eax,%ebx + movl %ebx,8(%esp) + xorl %edx,%eax + movl %eax,4(%esp) + addl $64,%edi + cmpl 20(%esp),%edi + je .L003done + andl %eax,%esi + movl 16(%esp),%edx + roll $1,%esi + movl %edx,%ecx + xorl %esi,%ebx + orl 12(%edi),%ecx + movl %ebx,8(%esp) + xorl 12(%esp),%ecx + movl 4(%edi),%esi + movl %ecx,12(%esp) + orl %ebx,%esi + andl 8(%edi),%ecx + xorl %esi,%eax + roll $1,%ecx + movl %eax,4(%esp) + xorl %ecx,%edx + movl 16(%edi),%esi + movl %edx,16(%esp) + jmp .L002loop +.align 8 +.L003done: + movl %eax,%ecx + movl %ebx,%edx + movl 12(%esp),%eax + movl 16(%esp),%ebx + xorl %esi,%eax + xorl 4(%edi),%ebx + xorl 8(%edi),%ecx + xorl 12(%edi),%edx + ret +.size _x86_Camellia_encrypt,.-_x86_Camellia_encrypt +.globl Camellia_DecryptBlock_Rounds +.type Camellia_DecryptBlock_Rounds,@function +.align 16 +Camellia_DecryptBlock_Rounds: +.L_Camellia_DecryptBlock_Rounds_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 20(%esp),%eax + movl 24(%esp),%esi + movl 28(%esp),%edi + movl %esp,%ebx + subl $28,%esp + andl $-64,%esp + leal -127(%edi),%ecx + subl %esp,%ecx + negl %ecx + andl $960,%ecx + subl %ecx,%esp + addl $4,%esp + shll $6,%eax + movl %edi,16(%esp) + leal (%edi,%eax,1),%edi + movl %ebx,20(%esp) + call .L004pic_point +.L004pic_point: + popl %ebp + leal .LCamellia_SBOX-.L004pic_point(%ebp),%ebp + movl (%esi),%eax + movl 4(%esi),%ebx + movl 8(%esi),%ecx + bswap %eax + movl 12(%esi),%edx + bswap %ebx + bswap %ecx + bswap %edx + call _x86_Camellia_decrypt + movl 20(%esp),%esp + bswap %eax + movl 32(%esp),%esi + bswap %ebx + bswap %ecx + bswap %edx + movl %eax,(%esi) + movl %ebx,4(%esi) + movl %ecx,8(%esi) + movl %edx,12(%esi) + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.size Camellia_DecryptBlock_Rounds,.-.L_Camellia_DecryptBlock_Rounds_begin +.globl Camellia_DecryptBlock +.type Camellia_DecryptBlock,@function +.align 16 +Camellia_DecryptBlock: +.L_Camellia_DecryptBlock_begin: + movl $128,%eax + subl 4(%esp),%eax + movl $3,%eax + adcl $0,%eax + movl %eax,4(%esp) + jmp .L_Camellia_DecryptBlock_Rounds_begin +.size Camellia_DecryptBlock,.-.L_Camellia_DecryptBlock_begin +.globl Camellia_decrypt +.type Camellia_decrypt,@function +.align 16 +Camellia_decrypt: +.L_Camellia_decrypt_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 20(%esp),%esi + movl 28(%esp),%edi + movl %esp,%ebx + subl $28,%esp + andl $-64,%esp + movl 272(%edi),%eax + leal -127(%edi),%ecx + subl %esp,%ecx + negl %ecx + andl $960,%ecx + subl %ecx,%esp + addl $4,%esp + shll $6,%eax + movl %edi,16(%esp) + leal (%edi,%eax,1),%edi + movl %ebx,20(%esp) + call .L005pic_point +.L005pic_point: + popl %ebp + leal .LCamellia_SBOX-.L005pic_point(%ebp),%ebp + movl (%esi),%eax + movl 4(%esi),%ebx + movl 8(%esi),%ecx + bswap %eax + movl 12(%esi),%edx + bswap %ebx + bswap %ecx + bswap %edx + call _x86_Camellia_decrypt + movl 20(%esp),%esp + bswap %eax + movl 24(%esp),%esi + bswap %ebx + bswap %ecx + bswap %edx + movl %eax,(%esi) + movl %ebx,4(%esi) + movl %ecx,8(%esi) + movl %edx,12(%esi) + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.size Camellia_decrypt,.-.L_Camellia_decrypt_begin +.type _x86_Camellia_decrypt,@function +.align 16 +_x86_Camellia_decrypt: + xorl (%edi),%eax + xorl 4(%edi),%ebx + xorl 8(%edi),%ecx + xorl 12(%edi),%edx + movl -8(%edi),%esi + movl %eax,4(%esp) + movl %ebx,8(%esp) + movl %ecx,12(%esp) + movl %edx,16(%esp) +.align 16 +.L006loop: + xorl %esi,%eax + xorl -4(%edi),%ebx + movzbl %ah,%esi + movl 2052(%ebp,%esi,8),%edx + movzbl %al,%esi + xorl 4(%ebp,%esi,8),%edx + shrl $16,%eax + movzbl %bl,%esi + movl (%ebp,%esi,8),%ecx + movzbl %ah,%esi + xorl (%ebp,%esi,8),%edx + movzbl %bh,%esi + xorl 4(%ebp,%esi,8),%ecx + shrl $16,%ebx + movzbl %al,%eax + xorl 2048(%ebp,%eax,8),%edx + movzbl %bh,%esi + movl 16(%esp),%eax + xorl %edx,%ecx + rorl $8,%edx + xorl 2048(%ebp,%esi,8),%ecx + movzbl %bl,%esi + movl 12(%esp),%ebx + xorl %eax,%edx + xorl 2052(%ebp,%esi,8),%ecx + movl -16(%edi),%esi + xorl %ecx,%edx + movl %edx,16(%esp) + xorl %ebx,%ecx + movl %ecx,12(%esp) + xorl %esi,%ecx + xorl -12(%edi),%edx + movzbl %ch,%esi + movl 2052(%ebp,%esi,8),%ebx + movzbl %cl,%esi + xorl 4(%ebp,%esi,8),%ebx + shrl $16,%ecx + movzbl %dl,%esi + movl (%ebp,%esi,8),%eax + movzbl %ch,%esi + xorl (%ebp,%esi,8),%ebx + movzbl %dh,%esi + xorl 4(%ebp,%esi,8),%eax + shrl $16,%edx + movzbl %cl,%ecx + xorl 2048(%ebp,%ecx,8),%ebx + movzbl %dh,%esi + movl 8(%esp),%ecx + xorl %ebx,%eax + rorl $8,%ebx + xorl 2048(%ebp,%esi,8),%eax + movzbl %dl,%esi + movl 4(%esp),%edx + xorl %ecx,%ebx + xorl 2052(%ebp,%esi,8),%eax + movl -24(%edi),%esi + xorl %eax,%ebx + movl %ebx,8(%esp) + xorl %edx,%eax + movl %eax,4(%esp) + xorl %esi,%eax + xorl -20(%edi),%ebx + movzbl %ah,%esi + movl 2052(%ebp,%esi,8),%edx + movzbl %al,%esi + xorl 4(%ebp,%esi,8),%edx + shrl $16,%eax + movzbl %bl,%esi + movl (%ebp,%esi,8),%ecx + movzbl %ah,%esi + xorl (%ebp,%esi,8),%edx + movzbl %bh,%esi + xorl 4(%ebp,%esi,8),%ecx + shrl $16,%ebx + movzbl %al,%eax + xorl 2048(%ebp,%eax,8),%edx + movzbl %bh,%esi + movl 16(%esp),%eax + xorl %edx,%ecx + rorl $8,%edx + xorl 2048(%ebp,%esi,8),%ecx + movzbl %bl,%esi + movl 12(%esp),%ebx + xorl %eax,%edx + xorl 2052(%ebp,%esi,8),%ecx + movl -32(%edi),%esi + xorl %ecx,%edx + movl %edx,16(%esp) + xorl %ebx,%ecx + movl %ecx,12(%esp) + xorl %esi,%ecx + xorl -28(%edi),%edx + movzbl %ch,%esi + movl 2052(%ebp,%esi,8),%ebx + movzbl %cl,%esi + xorl 4(%ebp,%esi,8),%ebx + shrl $16,%ecx + movzbl %dl,%esi + movl (%ebp,%esi,8),%eax + movzbl %ch,%esi + xorl (%ebp,%esi,8),%ebx + movzbl %dh,%esi + xorl 4(%ebp,%esi,8),%eax + shrl $16,%edx + movzbl %cl,%ecx + xorl 2048(%ebp,%ecx,8),%ebx + movzbl %dh,%esi + movl 8(%esp),%ecx + xorl %ebx,%eax + rorl $8,%ebx + xorl 2048(%ebp,%esi,8),%eax + movzbl %dl,%esi + movl 4(%esp),%edx + xorl %ecx,%ebx + xorl 2052(%ebp,%esi,8),%eax + movl -40(%edi),%esi + xorl %eax,%ebx + movl %ebx,8(%esp) + xorl %edx,%eax + movl %eax,4(%esp) + xorl %esi,%eax + xorl -36(%edi),%ebx + movzbl %ah,%esi + movl 2052(%ebp,%esi,8),%edx + movzbl %al,%esi + xorl 4(%ebp,%esi,8),%edx + shrl $16,%eax + movzbl %bl,%esi + movl (%ebp,%esi,8),%ecx + movzbl %ah,%esi + xorl (%ebp,%esi,8),%edx + movzbl %bh,%esi + xorl 4(%ebp,%esi,8),%ecx + shrl $16,%ebx + movzbl %al,%eax + xorl 2048(%ebp,%eax,8),%edx + movzbl %bh,%esi + movl 16(%esp),%eax + xorl %edx,%ecx + rorl $8,%edx + xorl 2048(%ebp,%esi,8),%ecx + movzbl %bl,%esi + movl 12(%esp),%ebx + xorl %eax,%edx + xorl 2052(%ebp,%esi,8),%ecx + movl -48(%edi),%esi + xorl %ecx,%edx + movl %edx,16(%esp) + xorl %ebx,%ecx + movl %ecx,12(%esp) + xorl %esi,%ecx + xorl -44(%edi),%edx + movzbl %ch,%esi + movl 2052(%ebp,%esi,8),%ebx + movzbl %cl,%esi + xorl 4(%ebp,%esi,8),%ebx + shrl $16,%ecx + movzbl %dl,%esi + movl (%ebp,%esi,8),%eax + movzbl %ch,%esi + xorl (%ebp,%esi,8),%ebx + movzbl %dh,%esi + xorl 4(%ebp,%esi,8),%eax + shrl $16,%edx + movzbl %cl,%ecx + xorl 2048(%ebp,%ecx,8),%ebx + movzbl %dh,%esi + movl 8(%esp),%ecx + xorl %ebx,%eax + rorl $8,%ebx + xorl 2048(%ebp,%esi,8),%eax + movzbl %dl,%esi + movl 4(%esp),%edx + xorl %ecx,%ebx + xorl 2052(%ebp,%esi,8),%eax + movl -56(%edi),%esi + xorl %eax,%ebx + movl %ebx,8(%esp) + xorl %edx,%eax + movl %eax,4(%esp) + subl $64,%edi + cmpl 20(%esp),%edi + je .L007done + andl %eax,%esi + movl 16(%esp),%edx + roll $1,%esi + movl %edx,%ecx + xorl %esi,%ebx + orl 4(%edi),%ecx + movl %ebx,8(%esp) + xorl 12(%esp),%ecx + movl 12(%edi),%esi + movl %ecx,12(%esp) + orl %ebx,%esi + andl (%edi),%ecx + xorl %esi,%eax + roll $1,%ecx + movl %eax,4(%esp) + xorl %ecx,%edx + movl -8(%edi),%esi + movl %edx,16(%esp) + jmp .L006loop +.align 8 +.L007done: + movl %eax,%ecx + movl %ebx,%edx + movl 12(%esp),%eax + movl 16(%esp),%ebx + xorl %esi,%ecx + xorl 12(%edi),%edx + xorl (%edi),%eax + xorl 4(%edi),%ebx + ret +.size _x86_Camellia_decrypt,.-_x86_Camellia_decrypt +.globl Camellia_Ekeygen +.type Camellia_Ekeygen,@function +.align 16 +Camellia_Ekeygen: +.L_Camellia_Ekeygen_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + subl $16,%esp + movl 36(%esp),%ebp + movl 40(%esp),%esi + movl 44(%esp),%edi + movl (%esi),%eax + movl 4(%esi),%ebx + movl 8(%esi),%ecx + movl 12(%esi),%edx + bswap %eax + bswap %ebx + bswap %ecx + bswap %edx + movl %eax,(%edi) + movl %ebx,4(%edi) + movl %ecx,8(%edi) + movl %edx,12(%edi) + cmpl $128,%ebp + je .L0081st128 + movl 16(%esi),%eax + movl 20(%esi),%ebx + cmpl $192,%ebp + je .L0091st192 + movl 24(%esi),%ecx + movl 28(%esi),%edx + jmp .L0101st256 +.align 4 +.L0091st192: + movl %eax,%ecx + movl %ebx,%edx + notl %ecx + notl %edx +.align 4 +.L0101st256: + bswap %eax + bswap %ebx + bswap %ecx + bswap %edx + movl %eax,32(%edi) + movl %ebx,36(%edi) + movl %ecx,40(%edi) + movl %edx,44(%edi) + xorl (%edi),%eax + xorl 4(%edi),%ebx + xorl 8(%edi),%ecx + xorl 12(%edi),%edx +.align 4 +.L0081st128: + call .L011pic_point +.L011pic_point: + popl %ebp + leal .LCamellia_SBOX-.L011pic_point(%ebp),%ebp + leal .LCamellia_SIGMA-.LCamellia_SBOX(%ebp),%edi + movl (%edi),%esi + movl %eax,(%esp) + movl %ebx,4(%esp) + movl %ecx,8(%esp) + movl %edx,12(%esp) + xorl %esi,%eax + xorl 4(%edi),%ebx + movzbl %ah,%esi + movl 2052(%ebp,%esi,8),%edx + movzbl %al,%esi + xorl 4(%ebp,%esi,8),%edx + shrl $16,%eax + movzbl %bl,%esi + movl (%ebp,%esi,8),%ecx + movzbl %ah,%esi + xorl (%ebp,%esi,8),%edx + movzbl %bh,%esi + xorl 4(%ebp,%esi,8),%ecx + shrl $16,%ebx + movzbl %al,%eax + xorl 2048(%ebp,%eax,8),%edx + movzbl %bh,%esi + movl 12(%esp),%eax + xorl %edx,%ecx + rorl $8,%edx + xorl 2048(%ebp,%esi,8),%ecx + movzbl %bl,%esi + movl 8(%esp),%ebx + xorl %eax,%edx + xorl 2052(%ebp,%esi,8),%ecx + movl 8(%edi),%esi + xorl %ecx,%edx + movl %edx,12(%esp) + xorl %ebx,%ecx + movl %ecx,8(%esp) + xorl %esi,%ecx + xorl 12(%edi),%edx + movzbl %ch,%esi + movl 2052(%ebp,%esi,8),%ebx + movzbl %cl,%esi + xorl 4(%ebp,%esi,8),%ebx + shrl $16,%ecx + movzbl %dl,%esi + movl (%ebp,%esi,8),%eax + movzbl %ch,%esi + xorl (%ebp,%esi,8),%ebx + movzbl %dh,%esi + xorl 4(%ebp,%esi,8),%eax + shrl $16,%edx + movzbl %cl,%ecx + xorl 2048(%ebp,%ecx,8),%ebx + movzbl %dh,%esi + movl 4(%esp),%ecx + xorl %ebx,%eax + rorl $8,%ebx + xorl 2048(%ebp,%esi,8),%eax + movzbl %dl,%esi + movl (%esp),%edx + xorl %ecx,%ebx + xorl 2052(%ebp,%esi,8),%eax + movl 16(%edi),%esi + xorl %eax,%ebx + movl %ebx,4(%esp) + xorl %edx,%eax + movl %eax,(%esp) + movl 8(%esp),%ecx + movl 12(%esp),%edx + movl 44(%esp),%esi + xorl (%esi),%eax + xorl 4(%esi),%ebx + xorl 8(%esi),%ecx + xorl 12(%esi),%edx + movl 16(%edi),%esi + movl %eax,(%esp) + movl %ebx,4(%esp) + movl %ecx,8(%esp) + movl %edx,12(%esp) + xorl %esi,%eax + xorl 20(%edi),%ebx + movzbl %ah,%esi + movl 2052(%ebp,%esi,8),%edx + movzbl %al,%esi + xorl 4(%ebp,%esi,8),%edx + shrl $16,%eax + movzbl %bl,%esi + movl (%ebp,%esi,8),%ecx + movzbl %ah,%esi + xorl (%ebp,%esi,8),%edx + movzbl %bh,%esi + xorl 4(%ebp,%esi,8),%ecx + shrl $16,%ebx + movzbl %al,%eax + xorl 2048(%ebp,%eax,8),%edx + movzbl %bh,%esi + movl 12(%esp),%eax + xorl %edx,%ecx + rorl $8,%edx + xorl 2048(%ebp,%esi,8),%ecx + movzbl %bl,%esi + movl 8(%esp),%ebx + xorl %eax,%edx + xorl 2052(%ebp,%esi,8),%ecx + movl 24(%edi),%esi + xorl %ecx,%edx + movl %edx,12(%esp) + xorl %ebx,%ecx + movl %ecx,8(%esp) + xorl %esi,%ecx + xorl 28(%edi),%edx + movzbl %ch,%esi + movl 2052(%ebp,%esi,8),%ebx + movzbl %cl,%esi + xorl 4(%ebp,%esi,8),%ebx + shrl $16,%ecx + movzbl %dl,%esi + movl (%ebp,%esi,8),%eax + movzbl %ch,%esi + xorl (%ebp,%esi,8),%ebx + movzbl %dh,%esi + xorl 4(%ebp,%esi,8),%eax + shrl $16,%edx + movzbl %cl,%ecx + xorl 2048(%ebp,%ecx,8),%ebx + movzbl %dh,%esi + movl 4(%esp),%ecx + xorl %ebx,%eax + rorl $8,%ebx + xorl 2048(%ebp,%esi,8),%eax + movzbl %dl,%esi + movl (%esp),%edx + xorl %ecx,%ebx + xorl 2052(%ebp,%esi,8),%eax + movl 32(%edi),%esi + xorl %eax,%ebx + movl %ebx,4(%esp) + xorl %edx,%eax + movl %eax,(%esp) + movl 8(%esp),%ecx + movl 12(%esp),%edx + movl 36(%esp),%esi + cmpl $128,%esi + jne .L0122nd256 + movl 44(%esp),%edi + leal 128(%edi),%edi + movl %eax,-112(%edi) + movl %ebx,-108(%edi) + movl %ecx,-104(%edi) + movl %edx,-100(%edi) + movl %eax,%ebp + shll $15,%eax + movl %ebx,%esi + shrl $17,%esi + shll $15,%ebx + orl %esi,%eax + movl %ecx,%esi + shll $15,%ecx + movl %eax,-80(%edi) + shrl $17,%esi + orl %esi,%ebx + shrl $17,%ebp + movl %edx,%esi + shrl $17,%esi + movl %ebx,-76(%edi) + shll $15,%edx + orl %esi,%ecx + orl %ebp,%edx + movl %ecx,-72(%edi) + movl %edx,-68(%edi) + movl %eax,%ebp + shll $15,%eax + movl %ebx,%esi + shrl $17,%esi + shll $15,%ebx + orl %esi,%eax + movl %ecx,%esi + shll $15,%ecx + movl %eax,-64(%edi) + shrl $17,%esi + orl %esi,%ebx + shrl $17,%ebp + movl %edx,%esi + shrl $17,%esi + movl %ebx,-60(%edi) + shll $15,%edx + orl %esi,%ecx + orl %ebp,%edx + movl %ecx,-56(%edi) + movl %edx,-52(%edi) + movl %eax,%ebp + shll $15,%eax + movl %ebx,%esi + shrl $17,%esi + shll $15,%ebx + orl %esi,%eax + movl %ecx,%esi + shll $15,%ecx + movl %eax,-32(%edi) + shrl $17,%esi + orl %esi,%ebx + shrl $17,%ebp + movl %edx,%esi + shrl $17,%esi + movl %ebx,-28(%edi) + shll $15,%edx + orl %esi,%ecx + orl %ebp,%edx + movl %eax,%ebp + shll $15,%eax + movl %ebx,%esi + shrl $17,%esi + shll $15,%ebx + orl %esi,%eax + movl %ecx,%esi + shll $15,%ecx + movl %eax,-16(%edi) + shrl $17,%esi + orl %esi,%ebx + shrl $17,%ebp + movl %edx,%esi + shrl $17,%esi + movl %ebx,-12(%edi) + shll $15,%edx + orl %esi,%ecx + orl %ebp,%edx + movl %ecx,-8(%edi) + movl %edx,-4(%edi) + movl %ebx,%ebp + shll $2,%ebx + movl %ecx,%esi + shrl $30,%esi + shll $2,%ecx + orl %esi,%ebx + movl %edx,%esi + shll $2,%edx + movl %ebx,32(%edi) + shrl $30,%esi + orl %esi,%ecx + shrl $30,%ebp + movl %eax,%esi + shrl $30,%esi + movl %ecx,36(%edi) + shll $2,%eax + orl %esi,%edx + orl %ebp,%eax + movl %edx,40(%edi) + movl %eax,44(%edi) + movl %ebx,%ebp + shll $17,%ebx + movl %ecx,%esi + shrl $15,%esi + shll $17,%ecx + orl %esi,%ebx + movl %edx,%esi + shll $17,%edx + movl %ebx,64(%edi) + shrl $15,%esi + orl %esi,%ecx + shrl $15,%ebp + movl %eax,%esi + shrl $15,%esi + movl %ecx,68(%edi) + shll $17,%eax + orl %esi,%edx + orl %ebp,%eax + movl %edx,72(%edi) + movl %eax,76(%edi) + movl -128(%edi),%ebx + movl -124(%edi),%ecx + movl -120(%edi),%edx + movl -116(%edi),%eax + movl %ebx,%ebp + shll $15,%ebx + movl %ecx,%esi + shrl $17,%esi + shll $15,%ecx + orl %esi,%ebx + movl %edx,%esi + shll $15,%edx + movl %ebx,-96(%edi) + shrl $17,%esi + orl %esi,%ecx + shrl $17,%ebp + movl %eax,%esi + shrl $17,%esi + movl %ecx,-92(%edi) + shll $15,%eax + orl %esi,%edx + orl %ebp,%eax + movl %edx,-88(%edi) + movl %eax,-84(%edi) + movl %ebx,%ebp + shll $30,%ebx + movl %ecx,%esi + shrl $2,%esi + shll $30,%ecx + orl %esi,%ebx + movl %edx,%esi + shll $30,%edx + movl %ebx,-48(%edi) + shrl $2,%esi + orl %esi,%ecx + shrl $2,%ebp + movl %eax,%esi + shrl $2,%esi + movl %ecx,-44(%edi) + shll $30,%eax + orl %esi,%edx + orl %ebp,%eax + movl %edx,-40(%edi) + movl %eax,-36(%edi) + movl %ebx,%ebp + shll $15,%ebx + movl %ecx,%esi + shrl $17,%esi + shll $15,%ecx + orl %esi,%ebx + movl %edx,%esi + shll $15,%edx + shrl $17,%esi + orl %esi,%ecx + shrl $17,%ebp + movl %eax,%esi + shrl $17,%esi + shll $15,%eax + orl %esi,%edx + orl %ebp,%eax + movl %edx,-24(%edi) + movl %eax,-20(%edi) + movl %ebx,%ebp + shll $17,%ebx + movl %ecx,%esi + shrl $15,%esi + shll $17,%ecx + orl %esi,%ebx + movl %edx,%esi + shll $17,%edx + movl %ebx,(%edi) + shrl $15,%esi + orl %esi,%ecx + shrl $15,%ebp + movl %eax,%esi + shrl $15,%esi + movl %ecx,4(%edi) + shll $17,%eax + orl %esi,%edx + orl %ebp,%eax + movl %edx,8(%edi) + movl %eax,12(%edi) + movl %ebx,%ebp + shll $17,%ebx + movl %ecx,%esi + shrl $15,%esi + shll $17,%ecx + orl %esi,%ebx + movl %edx,%esi + shll $17,%edx + movl %ebx,16(%edi) + shrl $15,%esi + orl %esi,%ecx + shrl $15,%ebp + movl %eax,%esi + shrl $15,%esi + movl %ecx,20(%edi) + shll $17,%eax + orl %esi,%edx + orl %ebp,%eax + movl %edx,24(%edi) + movl %eax,28(%edi) + movl %ebx,%ebp + shll $17,%ebx + movl %ecx,%esi + shrl $15,%esi + shll $17,%ecx + orl %esi,%ebx + movl %edx,%esi + shll $17,%edx + movl %ebx,48(%edi) + shrl $15,%esi + orl %esi,%ecx + shrl $15,%ebp + movl %eax,%esi + shrl $15,%esi + movl %ecx,52(%edi) + shll $17,%eax + orl %esi,%edx + orl %ebp,%eax + movl %edx,56(%edi) + movl %eax,60(%edi) + movl $3,%eax + jmp .L013done +.align 16 +.L0122nd256: + movl 44(%esp),%esi + movl %eax,48(%esi) + movl %ebx,52(%esi) + movl %ecx,56(%esi) + movl %edx,60(%esi) + xorl 32(%esi),%eax + xorl 36(%esi),%ebx + xorl 40(%esi),%ecx + xorl 44(%esi),%edx + movl 32(%edi),%esi + movl %eax,(%esp) + movl %ebx,4(%esp) + movl %ecx,8(%esp) + movl %edx,12(%esp) + xorl %esi,%eax + xorl 36(%edi),%ebx + movzbl %ah,%esi + movl 2052(%ebp,%esi,8),%edx + movzbl %al,%esi + xorl 4(%ebp,%esi,8),%edx + shrl $16,%eax + movzbl %bl,%esi + movl (%ebp,%esi,8),%ecx + movzbl %ah,%esi + xorl (%ebp,%esi,8),%edx + movzbl %bh,%esi + xorl 4(%ebp,%esi,8),%ecx + shrl $16,%ebx + movzbl %al,%eax + xorl 2048(%ebp,%eax,8),%edx + movzbl %bh,%esi + movl 12(%esp),%eax + xorl %edx,%ecx + rorl $8,%edx + xorl 2048(%ebp,%esi,8),%ecx + movzbl %bl,%esi + movl 8(%esp),%ebx + xorl %eax,%edx + xorl 2052(%ebp,%esi,8),%ecx + movl 40(%edi),%esi + xorl %ecx,%edx + movl %edx,12(%esp) + xorl %ebx,%ecx + movl %ecx,8(%esp) + xorl %esi,%ecx + xorl 44(%edi),%edx + movzbl %ch,%esi + movl 2052(%ebp,%esi,8),%ebx + movzbl %cl,%esi + xorl 4(%ebp,%esi,8),%ebx + shrl $16,%ecx + movzbl %dl,%esi + movl (%ebp,%esi,8),%eax + movzbl %ch,%esi + xorl (%ebp,%esi,8),%ebx + movzbl %dh,%esi + xorl 4(%ebp,%esi,8),%eax + shrl $16,%edx + movzbl %cl,%ecx + xorl 2048(%ebp,%ecx,8),%ebx + movzbl %dh,%esi + movl 4(%esp),%ecx + xorl %ebx,%eax + rorl $8,%ebx + xorl 2048(%ebp,%esi,8),%eax + movzbl %dl,%esi + movl (%esp),%edx + xorl %ecx,%ebx + xorl 2052(%ebp,%esi,8),%eax + movl 48(%edi),%esi + xorl %eax,%ebx + movl %ebx,4(%esp) + xorl %edx,%eax + movl %eax,(%esp) + movl 8(%esp),%ecx + movl 12(%esp),%edx + movl 44(%esp),%edi + leal 128(%edi),%edi + movl %eax,-112(%edi) + movl %ebx,-108(%edi) + movl %ecx,-104(%edi) + movl %edx,-100(%edi) + movl %eax,%ebp + shll $30,%eax + movl %ebx,%esi + shrl $2,%esi + shll $30,%ebx + orl %esi,%eax + movl %ecx,%esi + shll $30,%ecx + movl %eax,-48(%edi) + shrl $2,%esi + orl %esi,%ebx + shrl $2,%ebp + movl %edx,%esi + shrl $2,%esi + movl %ebx,-44(%edi) + shll $30,%edx + orl %esi,%ecx + orl %ebp,%edx + movl %ecx,-40(%edi) + movl %edx,-36(%edi) + movl %eax,%ebp + shll $30,%eax + movl %ebx,%esi + shrl $2,%esi + shll $30,%ebx + orl %esi,%eax + movl %ecx,%esi + shll $30,%ecx + movl %eax,32(%edi) + shrl $2,%esi + orl %esi,%ebx + shrl $2,%ebp + movl %edx,%esi + shrl $2,%esi + movl %ebx,36(%edi) + shll $30,%edx + orl %esi,%ecx + orl %ebp,%edx + movl %ecx,40(%edi) + movl %edx,44(%edi) + movl %ebx,%ebp + shll $19,%ebx + movl %ecx,%esi + shrl $13,%esi + shll $19,%ecx + orl %esi,%ebx + movl %edx,%esi + shll $19,%edx + movl %ebx,128(%edi) + shrl $13,%esi + orl %esi,%ecx + shrl $13,%ebp + movl %eax,%esi + shrl $13,%esi + movl %ecx,132(%edi) + shll $19,%eax + orl %esi,%edx + orl %ebp,%eax + movl %edx,136(%edi) + movl %eax,140(%edi) + movl -96(%edi),%ebx + movl -92(%edi),%ecx + movl -88(%edi),%edx + movl -84(%edi),%eax + movl %ebx,%ebp + shll $15,%ebx + movl %ecx,%esi + shrl $17,%esi + shll $15,%ecx + orl %esi,%ebx + movl %edx,%esi + shll $15,%edx + movl %ebx,-96(%edi) + shrl $17,%esi + orl %esi,%ecx + shrl $17,%ebp + movl %eax,%esi + shrl $17,%esi + movl %ecx,-92(%edi) + shll $15,%eax + orl %esi,%edx + orl %ebp,%eax + movl %edx,-88(%edi) + movl %eax,-84(%edi) + movl %ebx,%ebp + shll $15,%ebx + movl %ecx,%esi + shrl $17,%esi + shll $15,%ecx + orl %esi,%ebx + movl %edx,%esi + shll $15,%edx + movl %ebx,-64(%edi) + shrl $17,%esi + orl %esi,%ecx + shrl $17,%ebp + movl %eax,%esi + shrl $17,%esi + movl %ecx,-60(%edi) + shll $15,%eax + orl %esi,%edx + orl %ebp,%eax + movl %edx,-56(%edi) + movl %eax,-52(%edi) + movl %ebx,%ebp + shll $30,%ebx + movl %ecx,%esi + shrl $2,%esi + shll $30,%ecx + orl %esi,%ebx + movl %edx,%esi + shll $30,%edx + movl %ebx,16(%edi) + shrl $2,%esi + orl %esi,%ecx + shrl $2,%ebp + movl %eax,%esi + shrl $2,%esi + movl %ecx,20(%edi) + shll $30,%eax + orl %esi,%edx + orl %ebp,%eax + movl %edx,24(%edi) + movl %eax,28(%edi) + movl %ecx,%ebp + shll $2,%ecx + movl %edx,%esi + shrl $30,%esi + shll $2,%edx + orl %esi,%ecx + movl %eax,%esi + shll $2,%eax + movl %ecx,80(%edi) + shrl $30,%esi + orl %esi,%edx + shrl $30,%ebp + movl %ebx,%esi + shrl $30,%esi + movl %edx,84(%edi) + shll $2,%ebx + orl %esi,%eax + orl %ebp,%ebx + movl %eax,88(%edi) + movl %ebx,92(%edi) + movl -80(%edi),%ecx + movl -76(%edi),%edx + movl -72(%edi),%eax + movl -68(%edi),%ebx + movl %ecx,%ebp + shll $15,%ecx + movl %edx,%esi + shrl $17,%esi + shll $15,%edx + orl %esi,%ecx + movl %eax,%esi + shll $15,%eax + movl %ecx,-80(%edi) + shrl $17,%esi + orl %esi,%edx + shrl $17,%ebp + movl %ebx,%esi + shrl $17,%esi + movl %edx,-76(%edi) + shll $15,%ebx + orl %esi,%eax + orl %ebp,%ebx + movl %eax,-72(%edi) + movl %ebx,-68(%edi) + movl %ecx,%ebp + shll $30,%ecx + movl %edx,%esi + shrl $2,%esi + shll $30,%edx + orl %esi,%ecx + movl %eax,%esi + shll $30,%eax + movl %ecx,-16(%edi) + shrl $2,%esi + orl %esi,%edx + shrl $2,%ebp + movl %ebx,%esi + shrl $2,%esi + movl %edx,-12(%edi) + shll $30,%ebx + orl %esi,%eax + orl %ebp,%ebx + movl %eax,-8(%edi) + movl %ebx,-4(%edi) + movl %edx,64(%edi) + movl %eax,68(%edi) + movl %ebx,72(%edi) + movl %ecx,76(%edi) + movl %edx,%ebp + shll $17,%edx + movl %eax,%esi + shrl $15,%esi + shll $17,%eax + orl %esi,%edx + movl %ebx,%esi + shll $17,%ebx + movl %edx,96(%edi) + shrl $15,%esi + orl %esi,%eax + shrl $15,%ebp + movl %ecx,%esi + shrl $15,%esi + movl %eax,100(%edi) + shll $17,%ecx + orl %esi,%ebx + orl %ebp,%ecx + movl %ebx,104(%edi) + movl %ecx,108(%edi) + movl -128(%edi),%edx + movl -124(%edi),%eax + movl -120(%edi),%ebx + movl -116(%edi),%ecx + movl %eax,%ebp + shll $13,%eax + movl %ebx,%esi + shrl $19,%esi + shll $13,%ebx + orl %esi,%eax + movl %ecx,%esi + shll $13,%ecx + movl %eax,-32(%edi) + shrl $19,%esi + orl %esi,%ebx + shrl $19,%ebp + movl %edx,%esi + shrl $19,%esi + movl %ebx,-28(%edi) + shll $13,%edx + orl %esi,%ecx + orl %ebp,%edx + movl %ecx,-24(%edi) + movl %edx,-20(%edi) + movl %eax,%ebp + shll $15,%eax + movl %ebx,%esi + shrl $17,%esi + shll $15,%ebx + orl %esi,%eax + movl %ecx,%esi + shll $15,%ecx + movl %eax,(%edi) + shrl $17,%esi + orl %esi,%ebx + shrl $17,%ebp + movl %edx,%esi + shrl $17,%esi + movl %ebx,4(%edi) + shll $15,%edx + orl %esi,%ecx + orl %ebp,%edx + movl %ecx,8(%edi) + movl %edx,12(%edi) + movl %eax,%ebp + shll $17,%eax + movl %ebx,%esi + shrl $15,%esi + shll $17,%ebx + orl %esi,%eax + movl %ecx,%esi + shll $17,%ecx + movl %eax,48(%edi) + shrl $15,%esi + orl %esi,%ebx + shrl $15,%ebp + movl %edx,%esi + shrl $15,%esi + movl %ebx,52(%edi) + shll $17,%edx + orl %esi,%ecx + orl %ebp,%edx + movl %ecx,56(%edi) + movl %edx,60(%edi) + movl %ebx,%ebp + shll $2,%ebx + movl %ecx,%esi + shrl $30,%esi + shll $2,%ecx + orl %esi,%ebx + movl %edx,%esi + shll $2,%edx + movl %ebx,112(%edi) + shrl $30,%esi + orl %esi,%ecx + shrl $30,%ebp + movl %eax,%esi + shrl $30,%esi + movl %ecx,116(%edi) + shll $2,%eax + orl %esi,%edx + orl %ebp,%eax + movl %edx,120(%edi) + movl %eax,124(%edi) + movl $4,%eax +.L013done: + leal 144(%edi),%edx + addl $16,%esp + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.size Camellia_Ekeygen,.-.L_Camellia_Ekeygen_begin +.globl private_Camellia_set_key +.type private_Camellia_set_key,@function +.align 16 +private_Camellia_set_key: +.L_private_Camellia_set_key_begin: + pushl %ebx + movl 8(%esp),%ecx + movl 12(%esp),%ebx + movl 16(%esp),%edx + movl $-1,%eax + testl %ecx,%ecx + jz .L014done + testl %edx,%edx + jz .L014done + movl $-2,%eax + cmpl $256,%ebx + je .L015arg_ok + cmpl $192,%ebx + je .L015arg_ok + cmpl $128,%ebx + jne .L014done +.align 4 +.L015arg_ok: + pushl %edx + pushl %ecx + pushl %ebx + call .L_Camellia_Ekeygen_begin + addl $12,%esp + movl %eax,(%edx) + xorl %eax,%eax +.align 4 +.L014done: + popl %ebx + ret +.size private_Camellia_set_key,.-.L_private_Camellia_set_key_begin +.align 64 +.LCamellia_SIGMA: +.long 2694735487,1003262091,3061508184,1286239154,3337565999,3914302142,1426019237,4057165596,283453434,3731369245,2958461122,3018244605,0,0,0,0 +.align 64 +.LCamellia_SBOX: +.long 1886416896,1886388336 +.long 2189591040,741081132 +.long 741092352,3014852787 +.long 3974949888,3233808576 +.long 3014898432,3840147684 +.long 656877312,1465319511 +.long 3233857536,3941204202 +.long 3857048832,2930639022 +.long 3840205824,589496355 +.long 2240120064,1802174571 +.long 1465341696,1162149957 +.long 892679424,2779054245 +.long 3941263872,3991732461 +.long 202116096,1330577487 +.long 2930683392,488439837 +.long 1094795520,2459041938 +.long 589505280,2256928902 +.long 4025478912,2947481775 +.long 1802201856,2088501372 +.long 2475922176,522125343 +.long 1162167552,1044250686 +.long 421075200,3705405660 +.long 2779096320,1583218782 +.long 555819264,185270283 +.long 3991792896,2795896998 +.long 235802112,960036921 +.long 1330597632,3587506389 +.long 1313754624,1566376029 +.long 488447232,3654877401 +.long 1701143808,1515847770 +.long 2459079168,1364262993 +.long 3183328512,1819017324 +.long 2256963072,2341142667 +.long 3099113472,2593783962 +.long 2947526400,4227531003 +.long 2408550144,2964324528 +.long 2088532992,1953759348 +.long 3958106880,724238379 +.long 522133248,4042260720 +.long 3469659648,2223243396 +.long 1044266496,3755933919 +.long 808464384,3419078859 +.long 3705461760,875823156 +.long 1600085760,1987444854 +.long 1583242752,1835860077 +.long 3318072576,2846425257 +.long 185273088,3520135377 +.long 437918208,67371012 +.long 2795939328,336855060 +.long 3789676800,976879674 +.long 960051456,3739091166 +.long 3402287616,286326801 +.long 3587560704,842137650 +.long 1195853568,2627469468 +.long 1566399744,1397948499 +.long 1027423488,4075946226 +.long 3654932736,4278059262 +.long 16843008,3486449871 +.long 1515870720,3284336835 +.long 3604403712,2054815866 +.long 1364283648,606339108 +.long 1448498688,3907518696 +.long 1819044864,1616904288 +.long 1296911616,1768489065 +.long 2341178112,2863268010 +.long 218959104,2694840480 +.long 2593823232,2711683233 +.long 1717986816,1650589794 +.long 4227595008,1414791252 +.long 3435973632,505282590 +.long 2964369408,3772776672 +.long 757935360,1684275300 +.long 1953788928,269484048 +.long 303174144,0 +.long 724249344,2745368739 +.long 538976256,1970602101 +.long 4042321920,2324299914 +.long 2981212416,3873833190 +.long 2223277056,151584777 +.long 2576980224,3722248413 +.long 3755990784,2273771655 +.long 1280068608,2206400643 +.long 3419130624,3452764365 +.long 3267543552,2425356432 +.long 875836416,1936916595 +.long 2122219008,4143317238 +.long 1987474944,2644312221 +.long 84215040,3216965823 +.long 1835887872,1381105746 +.long 3082270464,3638034648 +.long 2846468352,3368550600 +.long 825307392,3334865094 +.long 3520188672,2172715137 +.long 387389184,1869545583 +.long 67372032,320012307 +.long 3621246720,1667432547 +.long 336860160,3924361449 +.long 1482184704,2812739751 +.long 976894464,2677997727 +.long 1633771776,3166437564 +.long 3739147776,690552873 +.long 454761216,4193845497 +.long 286331136,791609391 +.long 471604224,3031695540 +.long 842150400,2021130360 +.long 252645120,101056518 +.long 2627509248,3890675943 +.long 370546176,1903231089 +.long 1397969664,3570663636 +.long 404232192,2880110763 +.long 4076007936,2290614408 +.long 572662272,2374828173 +.long 4278124032,1920073842 +.long 1145324544,3115909305 +.long 3486502656,4177002744 +.long 2998055424,2896953516 +.long 3284386560,909508662 +.long 3048584448,707395626 +.long 2054846976,1010565180 +.long 2442236160,4059103473 +.long 606348288,1077936192 +.long 134744064,3553820883 +.long 3907577856,3149594811 +.long 2829625344,1128464451 +.long 1616928768,353697813 +.long 4244438016,2913796269 +.long 1768515840,2004287607 +.long 1347440640,2155872384 +.long 2863311360,2189557890 +.long 3503345664,3974889708 +.long 2694881280,656867367 +.long 2105376000,3856990437 +.long 2711724288,2240086149 +.long 2307492096,892665909 +.long 1650614784,202113036 +.long 2543294208,1094778945 +.long 1414812672,4025417967 +.long 1532713728,2475884691 +.long 505290240,421068825 +.long 2509608192,555810849 +.long 3772833792,235798542 +.long 4294967040,1313734734 +.long 1684300800,1701118053 +.long 3537031680,3183280317 +.long 269488128,3099066552 +.long 3301229568,2408513679 +.long 0,3958046955 +.long 1212696576,3469607118 +.long 2745410304,808452144 +.long 4160222976,1600061535 +.long 1970631936,3318022341 +.long 3688618752,437911578 +.long 2324335104,3789619425 +.long 50529024,3402236106 +.long 3873891840,1195835463 +.long 3671775744,1027407933 +.long 151587072,16842753 +.long 1061109504,3604349142 +.long 3722304768,1448476758 +.long 2492765184,1296891981 +.long 2273806080,218955789 +.long 1549556736,1717960806 +.long 2206434048,3435921612 +.long 33686016,757923885 +.long 3452816640,303169554 +.long 1246382592,538968096 +.long 2425393152,2981167281 +.long 858993408,2576941209 +.long 1936945920,1280049228 +.long 1734829824,3267494082 +.long 4143379968,2122186878 +.long 4092850944,84213765 +.long 2644352256,3082223799 +.long 2139062016,825294897 +.long 3217014528,387383319 +.long 3806519808,3621191895 +.long 1381126656,1482162264 +.long 2610666240,1633747041 +.long 3638089728,454754331 +.long 640034304,471597084 +.long 3368601600,252641295 +.long 926365440,370540566 +.long 3334915584,404226072 +.long 993737472,572653602 +.long 2172748032,1145307204 +.long 2526451200,2998010034 +.long 1869573888,3048538293 +.long 1263225600,2442199185 +.long 320017152,134742024 +.long 3200171520,2829582504 +.long 1667457792,4244373756 +.long 774778368,1347420240 +.long 3924420864,3503292624 +.long 2038003968,2105344125 +.long 2812782336,2307457161 +.long 2358021120,2543255703 +.long 2678038272,1532690523 +.long 1852730880,2509570197 +.long 3166485504,4294902015 +.long 2391707136,3536978130 +.long 690563328,3301179588 +.long 4126536960,1212678216 +.long 4193908992,4160159991 +.long 3065427456,3688562907 +.long 791621376,50528259 +.long 4261281024,3671720154 +.long 3031741440,1061093439 +.long 1499027712,2492727444 +.long 2021160960,1549533276 +.long 2560137216,33685506 +.long 101058048,1246363722 +.long 1785358848,858980403 +.long 3890734848,1734803559 +.long 1179010560,4092788979 +.long 1903259904,2139029631 +.long 3132799488,3806462178 +.long 3570717696,2610626715 +.long 623191296,640024614 +.long 2880154368,926351415 +.long 1111638528,993722427 +.long 2290649088,2526412950 +.long 2728567296,1263206475 +.long 2374864128,3200123070 +.long 4210752000,774766638 +.long 1920102912,2037973113 +.long 117901056,2357985420 +.long 3115956480,1852702830 +.long 1431655680,2391670926 +.long 4177065984,4126474485 +.long 4008635904,3065381046 +.long 2896997376,4261216509 +.long 168430080,1499005017 +.long 909522432,2560098456 +.long 1229539584,1785331818 +.long 707406336,1178992710 +.long 1751672832,3132752058 +.long 1010580480,623181861 +.long 943208448,1111621698 +.long 4059164928,2728525986 +.long 2762253312,4210688250 +.long 1077952512,117899271 +.long 673720320,1431634005 +.long 3553874688,4008575214 +.long 2071689984,168427530 +.long 3149642496,1229520969 +.long 3385444608,1751646312 +.long 1128481536,943194168 +.long 3250700544,2762211492 +.long 353703168,673710120 +.long 3823362816,2071658619 +.long 2913840384,3385393353 +.long 4109693952,3250651329 +.long 2004317952,3823304931 +.long 3351758592,4109631732 +.long 2155905024,3351707847 +.long 2661195264,2661154974 +.long 14737632,939538488 +.long 328965,1090535745 +.long 5789784,369104406 +.long 14277081,1979741814 +.long 6776679,3640711641 +.long 5131854,2466288531 +.long 8487297,1610637408 +.long 13355979,4060148466 +.long 13224393,1912631922 +.long 723723,3254829762 +.long 11447982,2868947883 +.long 6974058,2583730842 +.long 14013909,1962964341 +.long 1579032,100664838 +.long 6118749,1459640151 +.long 8553090,2684395680 +.long 4605510,2432733585 +.long 14671839,4144035831 +.long 14079702,3036722613 +.long 2565927,3372272073 +.long 9079434,2717950626 +.long 3289650,2348846220 +.long 4934475,3523269330 +.long 4342338,2415956112 +.long 14408667,4127258358 +.long 1842204,117442311 +.long 10395294,2801837991 +.long 10263708,654321447 +.long 3815994,2382401166 +.long 13290186,2986390194 +.long 2434341,1224755529 +.long 8092539,3724599006 +.long 855309,1124090691 +.long 7434609,1543527516 +.long 6250335,3607156695 +.long 2039583,3338717127 +.long 16316664,1040203326 +.long 14145495,4110480885 +.long 4079166,2399178639 +.long 10329501,1728079719 +.long 8158332,520101663 +.long 6316128,402659352 +.long 12171705,1845522030 +.long 12500670,2936057775 +.long 12369084,788541231 +.long 9145227,3791708898 +.long 1447446,2231403909 +.long 3421236,218107149 +.long 5066061,1392530259 +.long 12829635,4026593520 +.long 7500402,2617285788 +.long 9803157,1694524773 +.long 11250603,3925928682 +.long 9342606,2734728099 +.long 12237498,2919280302 +.long 8026746,2650840734 +.long 11776947,3959483628 +.long 131586,2147516544 +.long 11842740,754986285 +.long 11382189,1795189611 +.long 10658466,2818615464 +.long 11316396,721431339 +.long 14211288,905983542 +.long 10132122,2785060518 +.long 1513239,3305162181 +.long 1710618,2248181382 +.long 3487029,1291865421 +.long 13421772,855651123 +.long 16250871,4244700669 +.long 10066329,1711302246 +.long 6381921,1476417624 +.long 5921370,2516620950 +.long 15263976,973093434 +.long 2368548,150997257 +.long 5658198,2499843477 +.long 4210752,268439568 +.long 14803425,2013296760 +.long 6513507,3623934168 +.long 592137,1107313218 +.long 3355443,3422604492 +.long 12566463,4009816047 +.long 10000536,637543974 +.long 9934743,3842041317 +.long 8750469,1627414881 +.long 6842472,436214298 +.long 16579836,1056980799 +.long 15527148,989870907 +.long 657930,2181071490 +.long 14342874,3053500086 +.long 7303023,3674266587 +.long 5460819,3556824276 +.long 6447714,2550175896 +.long 10724259,3892373736 +.long 3026478,2332068747 +.long 526344,33554946 +.long 11513775,3942706155 +.long 2631720,167774730 +.long 11579568,738208812 +.long 7631988,486546717 +.long 12763842,2952835248 +.long 12434877,1862299503 +.long 3552822,2365623693 +.long 2236962,2281736328 +.long 3684408,234884622 +.long 6579300,419436825 +.long 1973790,2264958855 +.long 3750201,1308642894 +.long 2894892,184552203 +.long 10921638,2835392937 +.long 3158064,201329676 +.long 15066597,2030074233 +.long 4473924,285217041 +.long 16645629,2130739071 +.long 8947848,570434082 +.long 10461087,3875596263 +.long 6645093,1493195097 +.long 8882055,3774931425 +.long 7039851,3657489114 +.long 16053492,1023425853 +.long 2302755,3355494600 +.long 4737096,301994514 +.long 1052688,67109892 +.long 13750737,1946186868 +.long 5329233,1409307732 +.long 12632256,805318704 +.long 16382457,2113961598 +.long 13816530,3019945140 +.long 10526880,671098920 +.long 5592405,1426085205 +.long 10592673,1744857192 +.long 4276545,1342197840 +.long 16448250,3187719870 +.long 4408131,3489714384 +.long 1250067,3288384708 +.long 12895428,822096177 +.long 3092271,3405827019 +.long 11053224,704653866 +.long 11974326,2902502829 +.long 3947580,251662095 +.long 2829099,3389049546 +.long 12698049,1879076976 +.long 16777215,4278255615 +.long 13158600,838873650 +.long 10855845,1761634665 +.long 2105376,134219784 +.long 9013641,1644192354 +.long 0,0 +.long 9474192,603989028 +.long 4671303,3506491857 +.long 15724527,4211145723 +.long 15395562,3120609978 +.long 12040119,3976261101 +.long 1381653,1157645637 +.long 394758,2164294017 +.long 13487565,1929409395 +.long 11908533,1828744557 +.long 1184274,2214626436 +.long 8289918,2667618207 +.long 12303291,3993038574 +.long 2697513,1241533002 +.long 986895,3271607235 +.long 12105912,771763758 +.long 460551,3238052289 +.long 263172,16777473 +.long 10197915,3858818790 +.long 9737364,620766501 +.long 2171169,1207978056 +.long 6710886,2566953369 +.long 15132390,3103832505 +.long 13553358,3003167667 +.long 15592941,2063629179 +.long 15198183,4177590777 +.long 3881787,3456159438 +.long 16711422,3204497343 +.long 8355711,3741376479 +.long 12961221,1895854449 +.long 10790052,687876393 +.long 3618615,3439381965 +.long 11645361,1811967084 +.long 5000268,318771987 +.long 9539985,1677747300 +.long 7237230,2600508315 +.long 9276813,1660969827 +.long 7763574,2634063261 +.long 197379,3221274816 +.long 2960685,1258310475 +.long 14606046,3070277559 +.long 9868950,2768283045 +.long 2500134,2298513801 +.long 8224125,1593859935 +.long 13027014,2969612721 +.long 6052956,385881879 +.long 13882323,4093703412 +.long 15921906,3154164924 +.long 5197647,3540046803 +.long 1644825,1174423110 +.long 4144959,3472936911 +.long 14474460,922761015 +.long 7960953,1577082462 +.long 1907997,1191200583 +.long 5395026,2483066004 +.long 15461355,4194368250 +.long 15987699,4227923196 +.long 7171437,1526750043 +.long 6184542,2533398423 +.long 16514043,4261478142 +.long 6908265,1509972570 +.long 11711154,2885725356 +.long 15790320,1006648380 +.long 3223857,1275087948 +.long 789516,50332419 +.long 13948116,889206069 +.long 13619151,4076925939 +.long 9211020,587211555 +.long 14869218,3087055032 +.long 7697781,1560304989 +.long 11119017,1778412138 +.long 4868682,2449511058 +.long 5723991,3573601749 +.long 8684676,553656609 +.long 1118481,1140868164 +.long 4539717,1358975313 +.long 1776411,3321939654 +.long 16119285,2097184125 +.long 15000804,956315961 +.long 921102,2197848963 +.long 7566195,3691044060 +.long 11184810,2852170410 +.long 15856113,2080406652 +.long 14540253,1996519287 +.long 5855577,1442862678 +.long 1315860,83887365 +.long 7105644,452991771 +.long 9605778,2751505572 +.long 5526612,352326933 +.long 13684944,872428596 +.long 7895160,503324190 +.long 7368816,469769244 +.long 14935011,4160813304 +.long 4802889,1375752786 +.long 8421504,536879136 +.long 5263440,335549460 +.long 10987431,3909151209 +.long 16185078,3170942397 +.long 7829367,3707821533 +.long 9671571,3825263844 +.long 8816262,2701173153 +.long 8618883,3758153952 +.long 2763306,2315291274 +.long 13092807,4043370993 +.long 5987163,3590379222 +.long 15329769,2046851706 +.long 15658734,3137387451 +.long 9408399,3808486371 +.long 65793,1073758272 +.long 4013373,1325420367 +.globl Camellia_cbc_encrypt +.type Camellia_cbc_encrypt,@function +.align 16 +Camellia_cbc_encrypt: +.L_Camellia_cbc_encrypt_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 28(%esp),%ecx + cmpl $0,%ecx + je .L016enc_out + pushfl + cld + movl 24(%esp),%eax + movl 28(%esp),%ebx + movl 36(%esp),%edx + movl 40(%esp),%ebp + leal -64(%esp),%esi + andl $-64,%esi + leal -127(%edx),%edi + subl %esi,%edi + negl %edi + andl $960,%edi + subl %edi,%esi + movl 44(%esp),%edi + xchgl %esi,%esp + addl $4,%esp + movl %esi,20(%esp) + movl %eax,24(%esp) + movl %ebx,28(%esp) + movl %ecx,32(%esp) + movl %edx,36(%esp) + movl %ebp,40(%esp) + call .L017pic_point +.L017pic_point: + popl %ebp + leal .LCamellia_SBOX-.L017pic_point(%ebp),%ebp + movl $32,%esi +.align 4 +.L018prefetch_sbox: + movl (%ebp),%eax + movl 32(%ebp),%ebx + movl 64(%ebp),%ecx + movl 96(%ebp),%edx + leal 128(%ebp),%ebp + decl %esi + jnz .L018prefetch_sbox + movl 36(%esp),%eax + subl $4096,%ebp + movl 24(%esp),%esi + movl 272(%eax),%edx + cmpl $0,%edi + je .L019DECRYPT + movl 32(%esp),%ecx + movl 40(%esp),%edi + shll $6,%edx + leal (%eax,%edx,1),%edx + movl %edx,16(%esp) + testl $4294967280,%ecx + jz .L020enc_tail + movl (%edi),%eax + movl 4(%edi),%ebx +.align 4 +.L021enc_loop: + movl 8(%edi),%ecx + movl 12(%edi),%edx + xorl (%esi),%eax + xorl 4(%esi),%ebx + xorl 8(%esi),%ecx + bswap %eax + xorl 12(%esi),%edx + bswap %ebx + movl 36(%esp),%edi + bswap %ecx + bswap %edx + call _x86_Camellia_encrypt + movl 24(%esp),%esi + movl 28(%esp),%edi + bswap %eax + bswap %ebx + bswap %ecx + movl %eax,(%edi) + bswap %edx + movl %ebx,4(%edi) + movl %ecx,8(%edi) + movl %edx,12(%edi) + movl 32(%esp),%ecx + leal 16(%esi),%esi + movl %esi,24(%esp) + leal 16(%edi),%edx + movl %edx,28(%esp) + subl $16,%ecx + testl $4294967280,%ecx + movl %ecx,32(%esp) + jnz .L021enc_loop + testl $15,%ecx + jnz .L020enc_tail + movl 40(%esp),%esi + movl 8(%edi),%ecx + movl 12(%edi),%edx + movl %eax,(%esi) + movl %ebx,4(%esi) + movl %ecx,8(%esi) + movl %edx,12(%esi) + movl 20(%esp),%esp + popfl +.L016enc_out: + popl %edi + popl %esi + popl %ebx + popl %ebp + ret + pushfl +.align 4 +.L020enc_tail: + movl %edi,%eax + movl 28(%esp),%edi + pushl %eax + movl $16,%ebx + subl %ecx,%ebx + cmpl %esi,%edi + je .L022enc_in_place +.align 4 +.long 2767451785 + jmp .L023enc_skip_in_place +.L022enc_in_place: + leal (%edi,%ecx,1),%edi +.L023enc_skip_in_place: + movl %ebx,%ecx + xorl %eax,%eax +.align 4 +.long 2868115081 + popl %edi + movl 28(%esp),%esi + movl (%edi),%eax + movl 4(%edi),%ebx + movl $16,32(%esp) + jmp .L021enc_loop +.align 16 +.L019DECRYPT: + shll $6,%edx + leal (%eax,%edx,1),%edx + movl %eax,16(%esp) + movl %edx,36(%esp) + cmpl 28(%esp),%esi + je .L024dec_in_place + movl 40(%esp),%edi + movl %edi,44(%esp) +.align 4 +.L025dec_loop: + movl (%esi),%eax + movl 4(%esi),%ebx + movl 8(%esi),%ecx + bswap %eax + movl 12(%esi),%edx + bswap %ebx + movl 36(%esp),%edi + bswap %ecx + bswap %edx + call _x86_Camellia_decrypt + movl 44(%esp),%edi + movl 32(%esp),%esi + bswap %eax + bswap %ebx + bswap %ecx + xorl (%edi),%eax + bswap %edx + xorl 4(%edi),%ebx + xorl 8(%edi),%ecx + xorl 12(%edi),%edx + subl $16,%esi + jc .L026dec_partial + movl %esi,32(%esp) + movl 24(%esp),%esi + movl 28(%esp),%edi + movl %eax,(%edi) + movl %ebx,4(%edi) + movl %ecx,8(%edi) + movl %edx,12(%edi) + movl %esi,44(%esp) + leal 16(%esi),%esi + movl %esi,24(%esp) + leal 16(%edi),%edi + movl %edi,28(%esp) + jnz .L025dec_loop + movl 44(%esp),%edi +.L027dec_end: + movl 40(%esp),%esi + movl (%edi),%eax + movl 4(%edi),%ebx + movl 8(%edi),%ecx + movl 12(%edi),%edx + movl %eax,(%esi) + movl %ebx,4(%esi) + movl %ecx,8(%esi) + movl %edx,12(%esi) + jmp .L028dec_out +.align 4 +.L026dec_partial: + leal 44(%esp),%edi + movl %eax,(%edi) + movl %ebx,4(%edi) + movl %ecx,8(%edi) + movl %edx,12(%edi) + leal 16(%esi),%ecx + movl %edi,%esi + movl 28(%esp),%edi +.long 2767451785 + movl 24(%esp),%edi + jmp .L027dec_end +.align 4 +.L024dec_in_place: +.L029dec_in_place_loop: + leal 44(%esp),%edi + movl (%esi),%eax + movl 4(%esi),%ebx + movl 8(%esi),%ecx + movl 12(%esi),%edx + movl %eax,(%edi) + movl %ebx,4(%edi) + movl %ecx,8(%edi) + bswap %eax + movl %edx,12(%edi) + bswap %ebx + movl 36(%esp),%edi + bswap %ecx + bswap %edx + call _x86_Camellia_decrypt + movl 40(%esp),%edi + movl 28(%esp),%esi + bswap %eax + bswap %ebx + bswap %ecx + xorl (%edi),%eax + bswap %edx + xorl 4(%edi),%ebx + xorl 8(%edi),%ecx + xorl 12(%edi),%edx + movl %eax,(%esi) + movl %ebx,4(%esi) + movl %ecx,8(%esi) + movl %edx,12(%esi) + leal 16(%esi),%esi + movl %esi,28(%esp) + leal 44(%esp),%esi + movl (%esi),%eax + movl 4(%esi),%ebx + movl 8(%esi),%ecx + movl 12(%esi),%edx + movl %eax,(%edi) + movl %ebx,4(%edi) + movl %ecx,8(%edi) + movl %edx,12(%edi) + movl 24(%esp),%esi + leal 16(%esi),%esi + movl %esi,24(%esp) + movl 32(%esp),%ecx + subl $16,%ecx + jc .L030dec_in_place_partial + movl %ecx,32(%esp) + jnz .L029dec_in_place_loop + jmp .L028dec_out +.align 4 +.L030dec_in_place_partial: + movl 28(%esp),%edi + leal 44(%esp),%esi + leal (%edi,%ecx,1),%edi + leal 16(%esi,%ecx,1),%esi + negl %ecx +.long 2767451785 +.align 4 +.L028dec_out: + movl 20(%esp),%esp + popfl + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.size Camellia_cbc_encrypt,.-.L_Camellia_cbc_encrypt_begin +.byte 67,97,109,101,108,108,105,97,32,102,111,114,32,120,56,54 +.byte 32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115 +.byte 115,108,46,111,114,103,62,0 diff --git a/deps/openssl/asm_obsolete/x86-elf-gas/cast/cast-586.s b/deps/openssl/asm_obsolete/x86-elf-gas/cast/cast-586.s new file mode 100644 index 00000000000000..3ef7fe32b7d42b --- /dev/null +++ b/deps/openssl/asm_obsolete/x86-elf-gas/cast/cast-586.s @@ -0,0 +1,933 @@ +.file "cast-586.s" +.text +.globl CAST_encrypt +.type CAST_encrypt,@function +.align 16 +CAST_encrypt: +.L_CAST_encrypt_begin: + + pushl %ebp + pushl %ebx + movl 12(%esp),%ebx + movl 16(%esp),%ebp + pushl %esi + pushl %edi + + movl (%ebx),%edi + movl 4(%ebx),%esi + + movl 128(%ebp),%eax + pushl %eax + xorl %eax,%eax + + movl (%ebp),%edx + movl 4(%ebp),%ecx + addl %esi,%edx + roll %cl,%edx + movl %edx,%ebx + xorl %ecx,%ecx + movb %dh,%cl + andl $255,%ebx + shrl $16,%edx + xorl %eax,%eax + movb %dh,%al + andl $255,%edx + movl CAST_S_table0(,%ecx,4),%ecx + movl CAST_S_table1(,%ebx,4),%ebx + xorl %ebx,%ecx + movl CAST_S_table2(,%eax,4),%ebx + subl %ebx,%ecx + movl CAST_S_table3(,%edx,4),%ebx + addl %ebx,%ecx + xorl %ecx,%edi + + movl 8(%ebp),%edx + movl 12(%ebp),%ecx + xorl %edi,%edx + roll %cl,%edx + movl %edx,%ebx + xorl %ecx,%ecx + movb %dh,%cl + andl $255,%ebx + shrl $16,%edx + xorl %eax,%eax + movb %dh,%al + andl $255,%edx + movl CAST_S_table0(,%ecx,4),%ecx + movl CAST_S_table1(,%ebx,4),%ebx + subl %ebx,%ecx + movl CAST_S_table2(,%eax,4),%ebx + addl %ebx,%ecx + movl CAST_S_table3(,%edx,4),%ebx + xorl %ebx,%ecx + xorl %ecx,%esi + + movl 16(%ebp),%edx + movl 20(%ebp),%ecx + subl %esi,%edx + roll %cl,%edx + movl %edx,%ebx + xorl %ecx,%ecx + movb %dh,%cl + andl $255,%ebx + shrl $16,%edx + xorl %eax,%eax + movb %dh,%al + andl $255,%edx + movl CAST_S_table0(,%ecx,4),%ecx + movl CAST_S_table1(,%ebx,4),%ebx + addl %ebx,%ecx + movl CAST_S_table2(,%eax,4),%ebx + xorl %ebx,%ecx + movl CAST_S_table3(,%edx,4),%ebx + subl %ebx,%ecx + xorl %ecx,%edi + + movl 24(%ebp),%edx + movl 28(%ebp),%ecx + addl %edi,%edx + roll %cl,%edx + movl %edx,%ebx + xorl %ecx,%ecx + movb %dh,%cl + andl $255,%ebx + shrl $16,%edx + xorl %eax,%eax + movb %dh,%al + andl $255,%edx + movl CAST_S_table0(,%ecx,4),%ecx + movl CAST_S_table1(,%ebx,4),%ebx + xorl %ebx,%ecx + movl CAST_S_table2(,%eax,4),%ebx + subl %ebx,%ecx + movl CAST_S_table3(,%edx,4),%ebx + addl %ebx,%ecx + xorl %ecx,%esi + + movl 32(%ebp),%edx + movl 36(%ebp),%ecx + xorl %esi,%edx + roll %cl,%edx + movl %edx,%ebx + xorl %ecx,%ecx + movb %dh,%cl + andl $255,%ebx + shrl $16,%edx + xorl %eax,%eax + movb %dh,%al + andl $255,%edx + movl CAST_S_table0(,%ecx,4),%ecx + movl CAST_S_table1(,%ebx,4),%ebx + subl %ebx,%ecx + movl CAST_S_table2(,%eax,4),%ebx + addl %ebx,%ecx + movl CAST_S_table3(,%edx,4),%ebx + xorl %ebx,%ecx + xorl %ecx,%edi + + movl 40(%ebp),%edx + movl 44(%ebp),%ecx + subl %edi,%edx + roll %cl,%edx + movl %edx,%ebx + xorl %ecx,%ecx + movb %dh,%cl + andl $255,%ebx + shrl $16,%edx + xorl %eax,%eax + movb %dh,%al + andl $255,%edx + movl CAST_S_table0(,%ecx,4),%ecx + movl CAST_S_table1(,%ebx,4),%ebx + addl %ebx,%ecx + movl CAST_S_table2(,%eax,4),%ebx + xorl %ebx,%ecx + movl CAST_S_table3(,%edx,4),%ebx + subl %ebx,%ecx + xorl %ecx,%esi + + movl 48(%ebp),%edx + movl 52(%ebp),%ecx + addl %esi,%edx + roll %cl,%edx + movl %edx,%ebx + xorl %ecx,%ecx + movb %dh,%cl + andl $255,%ebx + shrl $16,%edx + xorl %eax,%eax + movb %dh,%al + andl $255,%edx + movl CAST_S_table0(,%ecx,4),%ecx + movl CAST_S_table1(,%ebx,4),%ebx + xorl %ebx,%ecx + movl CAST_S_table2(,%eax,4),%ebx + subl %ebx,%ecx + movl CAST_S_table3(,%edx,4),%ebx + addl %ebx,%ecx + xorl %ecx,%edi + + movl 56(%ebp),%edx + movl 60(%ebp),%ecx + xorl %edi,%edx + roll %cl,%edx + movl %edx,%ebx + xorl %ecx,%ecx + movb %dh,%cl + andl $255,%ebx + shrl $16,%edx + xorl %eax,%eax + movb %dh,%al + andl $255,%edx + movl CAST_S_table0(,%ecx,4),%ecx + movl CAST_S_table1(,%ebx,4),%ebx + subl %ebx,%ecx + movl CAST_S_table2(,%eax,4),%ebx + addl %ebx,%ecx + movl CAST_S_table3(,%edx,4),%ebx + xorl %ebx,%ecx + xorl %ecx,%esi + + movl 64(%ebp),%edx + movl 68(%ebp),%ecx + subl %esi,%edx + roll %cl,%edx + movl %edx,%ebx + xorl %ecx,%ecx + movb %dh,%cl + andl $255,%ebx + shrl $16,%edx + xorl %eax,%eax + movb %dh,%al + andl $255,%edx + movl CAST_S_table0(,%ecx,4),%ecx + movl CAST_S_table1(,%ebx,4),%ebx + addl %ebx,%ecx + movl CAST_S_table2(,%eax,4),%ebx + xorl %ebx,%ecx + movl CAST_S_table3(,%edx,4),%ebx + subl %ebx,%ecx + xorl %ecx,%edi + + movl 72(%ebp),%edx + movl 76(%ebp),%ecx + addl %edi,%edx + roll %cl,%edx + movl %edx,%ebx + xorl %ecx,%ecx + movb %dh,%cl + andl $255,%ebx + shrl $16,%edx + xorl %eax,%eax + movb %dh,%al + andl $255,%edx + movl CAST_S_table0(,%ecx,4),%ecx + movl CAST_S_table1(,%ebx,4),%ebx + xorl %ebx,%ecx + movl CAST_S_table2(,%eax,4),%ebx + subl %ebx,%ecx + movl CAST_S_table3(,%edx,4),%ebx + addl %ebx,%ecx + xorl %ecx,%esi + + movl 80(%ebp),%edx + movl 84(%ebp),%ecx + xorl %esi,%edx + roll %cl,%edx + movl %edx,%ebx + xorl %ecx,%ecx + movb %dh,%cl + andl $255,%ebx + shrl $16,%edx + xorl %eax,%eax + movb %dh,%al + andl $255,%edx + movl CAST_S_table0(,%ecx,4),%ecx + movl CAST_S_table1(,%ebx,4),%ebx + subl %ebx,%ecx + movl CAST_S_table2(,%eax,4),%ebx + addl %ebx,%ecx + movl CAST_S_table3(,%edx,4),%ebx + xorl %ebx,%ecx + xorl %ecx,%edi + + movl 88(%ebp),%edx + movl 92(%ebp),%ecx + subl %edi,%edx + roll %cl,%edx + movl %edx,%ebx + xorl %ecx,%ecx + movb %dh,%cl + andl $255,%ebx + shrl $16,%edx + xorl %eax,%eax + movb %dh,%al + andl $255,%edx + movl CAST_S_table0(,%ecx,4),%ecx + movl CAST_S_table1(,%ebx,4),%ebx + addl %ebx,%ecx + movl CAST_S_table2(,%eax,4),%ebx + xorl %ebx,%ecx + movl CAST_S_table3(,%edx,4),%ebx + subl %ebx,%ecx + xorl %ecx,%esi + + popl %edx + orl %edx,%edx + jnz .L000cast_enc_done + + movl 96(%ebp),%edx + movl 100(%ebp),%ecx + addl %esi,%edx + roll %cl,%edx + movl %edx,%ebx + xorl %ecx,%ecx + movb %dh,%cl + andl $255,%ebx + shrl $16,%edx + xorl %eax,%eax + movb %dh,%al + andl $255,%edx + movl CAST_S_table0(,%ecx,4),%ecx + movl CAST_S_table1(,%ebx,4),%ebx + xorl %ebx,%ecx + movl CAST_S_table2(,%eax,4),%ebx + subl %ebx,%ecx + movl CAST_S_table3(,%edx,4),%ebx + addl %ebx,%ecx + xorl %ecx,%edi + + movl 104(%ebp),%edx + movl 108(%ebp),%ecx + xorl %edi,%edx + roll %cl,%edx + movl %edx,%ebx + xorl %ecx,%ecx + movb %dh,%cl + andl $255,%ebx + shrl $16,%edx + xorl %eax,%eax + movb %dh,%al + andl $255,%edx + movl CAST_S_table0(,%ecx,4),%ecx + movl CAST_S_table1(,%ebx,4),%ebx + subl %ebx,%ecx + movl CAST_S_table2(,%eax,4),%ebx + addl %ebx,%ecx + movl CAST_S_table3(,%edx,4),%ebx + xorl %ebx,%ecx + xorl %ecx,%esi + + movl 112(%ebp),%edx + movl 116(%ebp),%ecx + subl %esi,%edx + roll %cl,%edx + movl %edx,%ebx + xorl %ecx,%ecx + movb %dh,%cl + andl $255,%ebx + shrl $16,%edx + xorl %eax,%eax + movb %dh,%al + andl $255,%edx + movl CAST_S_table0(,%ecx,4),%ecx + movl CAST_S_table1(,%ebx,4),%ebx + addl %ebx,%ecx + movl CAST_S_table2(,%eax,4),%ebx + xorl %ebx,%ecx + movl CAST_S_table3(,%edx,4),%ebx + subl %ebx,%ecx + xorl %ecx,%edi + + movl 120(%ebp),%edx + movl 124(%ebp),%ecx + addl %edi,%edx + roll %cl,%edx + movl %edx,%ebx + xorl %ecx,%ecx + movb %dh,%cl + andl $255,%ebx + shrl $16,%edx + xorl %eax,%eax + movb %dh,%al + andl $255,%edx + movl CAST_S_table0(,%ecx,4),%ecx + movl CAST_S_table1(,%ebx,4),%ebx + xorl %ebx,%ecx + movl CAST_S_table2(,%eax,4),%ebx + subl %ebx,%ecx + movl CAST_S_table3(,%edx,4),%ebx + addl %ebx,%ecx + xorl %ecx,%esi +.L000cast_enc_done: + nop + movl 20(%esp),%eax + movl %edi,4(%eax) + movl %esi,(%eax) + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.size CAST_encrypt,.-.L_CAST_encrypt_begin +.globl CAST_decrypt +.type CAST_decrypt,@function +.align 16 +CAST_decrypt: +.L_CAST_decrypt_begin: + + pushl %ebp + pushl %ebx + movl 12(%esp),%ebx + movl 16(%esp),%ebp + pushl %esi + pushl %edi + + movl (%ebx),%edi + movl 4(%ebx),%esi + + movl 128(%ebp),%eax + orl %eax,%eax + jnz .L001cast_dec_skip + xorl %eax,%eax + + movl 120(%ebp),%edx + movl 124(%ebp),%ecx + addl %esi,%edx + roll %cl,%edx + movl %edx,%ebx + xorl %ecx,%ecx + movb %dh,%cl + andl $255,%ebx + shrl $16,%edx + xorl %eax,%eax + movb %dh,%al + andl $255,%edx + movl CAST_S_table0(,%ecx,4),%ecx + movl CAST_S_table1(,%ebx,4),%ebx + xorl %ebx,%ecx + movl CAST_S_table2(,%eax,4),%ebx + subl %ebx,%ecx + movl CAST_S_table3(,%edx,4),%ebx + addl %ebx,%ecx + xorl %ecx,%edi + + movl 112(%ebp),%edx + movl 116(%ebp),%ecx + subl %edi,%edx + roll %cl,%edx + movl %edx,%ebx + xorl %ecx,%ecx + movb %dh,%cl + andl $255,%ebx + shrl $16,%edx + xorl %eax,%eax + movb %dh,%al + andl $255,%edx + movl CAST_S_table0(,%ecx,4),%ecx + movl CAST_S_table1(,%ebx,4),%ebx + addl %ebx,%ecx + movl CAST_S_table2(,%eax,4),%ebx + xorl %ebx,%ecx + movl CAST_S_table3(,%edx,4),%ebx + subl %ebx,%ecx + xorl %ecx,%esi + + movl 104(%ebp),%edx + movl 108(%ebp),%ecx + xorl %esi,%edx + roll %cl,%edx + movl %edx,%ebx + xorl %ecx,%ecx + movb %dh,%cl + andl $255,%ebx + shrl $16,%edx + xorl %eax,%eax + movb %dh,%al + andl $255,%edx + movl CAST_S_table0(,%ecx,4),%ecx + movl CAST_S_table1(,%ebx,4),%ebx + subl %ebx,%ecx + movl CAST_S_table2(,%eax,4),%ebx + addl %ebx,%ecx + movl CAST_S_table3(,%edx,4),%ebx + xorl %ebx,%ecx + xorl %ecx,%edi + + movl 96(%ebp),%edx + movl 100(%ebp),%ecx + addl %edi,%edx + roll %cl,%edx + movl %edx,%ebx + xorl %ecx,%ecx + movb %dh,%cl + andl $255,%ebx + shrl $16,%edx + xorl %eax,%eax + movb %dh,%al + andl $255,%edx + movl CAST_S_table0(,%ecx,4),%ecx + movl CAST_S_table1(,%ebx,4),%ebx + xorl %ebx,%ecx + movl CAST_S_table2(,%eax,4),%ebx + subl %ebx,%ecx + movl CAST_S_table3(,%edx,4),%ebx + addl %ebx,%ecx + xorl %ecx,%esi +.L001cast_dec_skip: + + movl 88(%ebp),%edx + movl 92(%ebp),%ecx + subl %esi,%edx + roll %cl,%edx + movl %edx,%ebx + xorl %ecx,%ecx + movb %dh,%cl + andl $255,%ebx + shrl $16,%edx + xorl %eax,%eax + movb %dh,%al + andl $255,%edx + movl CAST_S_table0(,%ecx,4),%ecx + movl CAST_S_table1(,%ebx,4),%ebx + addl %ebx,%ecx + movl CAST_S_table2(,%eax,4),%ebx + xorl %ebx,%ecx + movl CAST_S_table3(,%edx,4),%ebx + subl %ebx,%ecx + xorl %ecx,%edi + + movl 80(%ebp),%edx + movl 84(%ebp),%ecx + xorl %edi,%edx + roll %cl,%edx + movl %edx,%ebx + xorl %ecx,%ecx + movb %dh,%cl + andl $255,%ebx + shrl $16,%edx + xorl %eax,%eax + movb %dh,%al + andl $255,%edx + movl CAST_S_table0(,%ecx,4),%ecx + movl CAST_S_table1(,%ebx,4),%ebx + subl %ebx,%ecx + movl CAST_S_table2(,%eax,4),%ebx + addl %ebx,%ecx + movl CAST_S_table3(,%edx,4),%ebx + xorl %ebx,%ecx + xorl %ecx,%esi + + movl 72(%ebp),%edx + movl 76(%ebp),%ecx + addl %esi,%edx + roll %cl,%edx + movl %edx,%ebx + xorl %ecx,%ecx + movb %dh,%cl + andl $255,%ebx + shrl $16,%edx + xorl %eax,%eax + movb %dh,%al + andl $255,%edx + movl CAST_S_table0(,%ecx,4),%ecx + movl CAST_S_table1(,%ebx,4),%ebx + xorl %ebx,%ecx + movl CAST_S_table2(,%eax,4),%ebx + subl %ebx,%ecx + movl CAST_S_table3(,%edx,4),%ebx + addl %ebx,%ecx + xorl %ecx,%edi + + movl 64(%ebp),%edx + movl 68(%ebp),%ecx + subl %edi,%edx + roll %cl,%edx + movl %edx,%ebx + xorl %ecx,%ecx + movb %dh,%cl + andl $255,%ebx + shrl $16,%edx + xorl %eax,%eax + movb %dh,%al + andl $255,%edx + movl CAST_S_table0(,%ecx,4),%ecx + movl CAST_S_table1(,%ebx,4),%ebx + addl %ebx,%ecx + movl CAST_S_table2(,%eax,4),%ebx + xorl %ebx,%ecx + movl CAST_S_table3(,%edx,4),%ebx + subl %ebx,%ecx + xorl %ecx,%esi + + movl 56(%ebp),%edx + movl 60(%ebp),%ecx + xorl %esi,%edx + roll %cl,%edx + movl %edx,%ebx + xorl %ecx,%ecx + movb %dh,%cl + andl $255,%ebx + shrl $16,%edx + xorl %eax,%eax + movb %dh,%al + andl $255,%edx + movl CAST_S_table0(,%ecx,4),%ecx + movl CAST_S_table1(,%ebx,4),%ebx + subl %ebx,%ecx + movl CAST_S_table2(,%eax,4),%ebx + addl %ebx,%ecx + movl CAST_S_table3(,%edx,4),%ebx + xorl %ebx,%ecx + xorl %ecx,%edi + + movl 48(%ebp),%edx + movl 52(%ebp),%ecx + addl %edi,%edx + roll %cl,%edx + movl %edx,%ebx + xorl %ecx,%ecx + movb %dh,%cl + andl $255,%ebx + shrl $16,%edx + xorl %eax,%eax + movb %dh,%al + andl $255,%edx + movl CAST_S_table0(,%ecx,4),%ecx + movl CAST_S_table1(,%ebx,4),%ebx + xorl %ebx,%ecx + movl CAST_S_table2(,%eax,4),%ebx + subl %ebx,%ecx + movl CAST_S_table3(,%edx,4),%ebx + addl %ebx,%ecx + xorl %ecx,%esi + + movl 40(%ebp),%edx + movl 44(%ebp),%ecx + subl %esi,%edx + roll %cl,%edx + movl %edx,%ebx + xorl %ecx,%ecx + movb %dh,%cl + andl $255,%ebx + shrl $16,%edx + xorl %eax,%eax + movb %dh,%al + andl $255,%edx + movl CAST_S_table0(,%ecx,4),%ecx + movl CAST_S_table1(,%ebx,4),%ebx + addl %ebx,%ecx + movl CAST_S_table2(,%eax,4),%ebx + xorl %ebx,%ecx + movl CAST_S_table3(,%edx,4),%ebx + subl %ebx,%ecx + xorl %ecx,%edi + + movl 32(%ebp),%edx + movl 36(%ebp),%ecx + xorl %edi,%edx + roll %cl,%edx + movl %edx,%ebx + xorl %ecx,%ecx + movb %dh,%cl + andl $255,%ebx + shrl $16,%edx + xorl %eax,%eax + movb %dh,%al + andl $255,%edx + movl CAST_S_table0(,%ecx,4),%ecx + movl CAST_S_table1(,%ebx,4),%ebx + subl %ebx,%ecx + movl CAST_S_table2(,%eax,4),%ebx + addl %ebx,%ecx + movl CAST_S_table3(,%edx,4),%ebx + xorl %ebx,%ecx + xorl %ecx,%esi + + movl 24(%ebp),%edx + movl 28(%ebp),%ecx + addl %esi,%edx + roll %cl,%edx + movl %edx,%ebx + xorl %ecx,%ecx + movb %dh,%cl + andl $255,%ebx + shrl $16,%edx + xorl %eax,%eax + movb %dh,%al + andl $255,%edx + movl CAST_S_table0(,%ecx,4),%ecx + movl CAST_S_table1(,%ebx,4),%ebx + xorl %ebx,%ecx + movl CAST_S_table2(,%eax,4),%ebx + subl %ebx,%ecx + movl CAST_S_table3(,%edx,4),%ebx + addl %ebx,%ecx + xorl %ecx,%edi + + movl 16(%ebp),%edx + movl 20(%ebp),%ecx + subl %edi,%edx + roll %cl,%edx + movl %edx,%ebx + xorl %ecx,%ecx + movb %dh,%cl + andl $255,%ebx + shrl $16,%edx + xorl %eax,%eax + movb %dh,%al + andl $255,%edx + movl CAST_S_table0(,%ecx,4),%ecx + movl CAST_S_table1(,%ebx,4),%ebx + addl %ebx,%ecx + movl CAST_S_table2(,%eax,4),%ebx + xorl %ebx,%ecx + movl CAST_S_table3(,%edx,4),%ebx + subl %ebx,%ecx + xorl %ecx,%esi + + movl 8(%ebp),%edx + movl 12(%ebp),%ecx + xorl %esi,%edx + roll %cl,%edx + movl %edx,%ebx + xorl %ecx,%ecx + movb %dh,%cl + andl $255,%ebx + shrl $16,%edx + xorl %eax,%eax + movb %dh,%al + andl $255,%edx + movl CAST_S_table0(,%ecx,4),%ecx + movl CAST_S_table1(,%ebx,4),%ebx + subl %ebx,%ecx + movl CAST_S_table2(,%eax,4),%ebx + addl %ebx,%ecx + movl CAST_S_table3(,%edx,4),%ebx + xorl %ebx,%ecx + xorl %ecx,%edi + + movl (%ebp),%edx + movl 4(%ebp),%ecx + addl %edi,%edx + roll %cl,%edx + movl %edx,%ebx + xorl %ecx,%ecx + movb %dh,%cl + andl $255,%ebx + shrl $16,%edx + xorl %eax,%eax + movb %dh,%al + andl $255,%edx + movl CAST_S_table0(,%ecx,4),%ecx + movl CAST_S_table1(,%ebx,4),%ebx + xorl %ebx,%ecx + movl CAST_S_table2(,%eax,4),%ebx + subl %ebx,%ecx + movl CAST_S_table3(,%edx,4),%ebx + addl %ebx,%ecx + xorl %ecx,%esi + nop + movl 20(%esp),%eax + movl %edi,4(%eax) + movl %esi,(%eax) + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.size CAST_decrypt,.-.L_CAST_decrypt_begin +.globl CAST_cbc_encrypt +.type CAST_cbc_encrypt,@function +.align 16 +CAST_cbc_encrypt: +.L_CAST_cbc_encrypt_begin: + + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 28(%esp),%ebp + + movl 36(%esp),%ebx + movl (%ebx),%esi + movl 4(%ebx),%edi + pushl %edi + pushl %esi + pushl %edi + pushl %esi + movl %esp,%ebx + movl 36(%esp),%esi + movl 40(%esp),%edi + + movl 56(%esp),%ecx + + movl 48(%esp),%eax + pushl %eax + pushl %ebx + cmpl $0,%ecx + jz .L002decrypt + andl $4294967288,%ebp + movl 8(%esp),%eax + movl 12(%esp),%ebx + jz .L003encrypt_finish +.L004encrypt_loop: + movl (%esi),%ecx + movl 4(%esi),%edx + xorl %ecx,%eax + xorl %edx,%ebx + bswap %eax + bswap %ebx + movl %eax,8(%esp) + movl %ebx,12(%esp) + call .L_CAST_encrypt_begin + movl 8(%esp),%eax + movl 12(%esp),%ebx + bswap %eax + bswap %ebx + movl %eax,(%edi) + movl %ebx,4(%edi) + addl $8,%esi + addl $8,%edi + subl $8,%ebp + jnz .L004encrypt_loop +.L003encrypt_finish: + movl 52(%esp),%ebp + andl $7,%ebp + jz .L005finish + call .L006PIC_point +.L006PIC_point: + popl %edx + leal .L007cbc_enc_jmp_table-.L006PIC_point(%edx),%ecx + movl (%ecx,%ebp,4),%ebp + addl %edx,%ebp + xorl %ecx,%ecx + xorl %edx,%edx + jmp *%ebp +.L008ej7: + movb 6(%esi),%dh + shll $8,%edx +.L009ej6: + movb 5(%esi),%dh +.L010ej5: + movb 4(%esi),%dl +.L011ej4: + movl (%esi),%ecx + jmp .L012ejend +.L013ej3: + movb 2(%esi),%ch + shll $8,%ecx +.L014ej2: + movb 1(%esi),%ch +.L015ej1: + movb (%esi),%cl +.L012ejend: + xorl %ecx,%eax + xorl %edx,%ebx + bswap %eax + bswap %ebx + movl %eax,8(%esp) + movl %ebx,12(%esp) + call .L_CAST_encrypt_begin + movl 8(%esp),%eax + movl 12(%esp),%ebx + bswap %eax + bswap %ebx + movl %eax,(%edi) + movl %ebx,4(%edi) + jmp .L005finish +.L002decrypt: + andl $4294967288,%ebp + movl 16(%esp),%eax + movl 20(%esp),%ebx + jz .L016decrypt_finish +.L017decrypt_loop: + movl (%esi),%eax + movl 4(%esi),%ebx + bswap %eax + bswap %ebx + movl %eax,8(%esp) + movl %ebx,12(%esp) + call .L_CAST_decrypt_begin + movl 8(%esp),%eax + movl 12(%esp),%ebx + bswap %eax + bswap %ebx + movl 16(%esp),%ecx + movl 20(%esp),%edx + xorl %eax,%ecx + xorl %ebx,%edx + movl (%esi),%eax + movl 4(%esi),%ebx + movl %ecx,(%edi) + movl %edx,4(%edi) + movl %eax,16(%esp) + movl %ebx,20(%esp) + addl $8,%esi + addl $8,%edi + subl $8,%ebp + jnz .L017decrypt_loop +.L016decrypt_finish: + movl 52(%esp),%ebp + andl $7,%ebp + jz .L005finish + movl (%esi),%eax + movl 4(%esi),%ebx + bswap %eax + bswap %ebx + movl %eax,8(%esp) + movl %ebx,12(%esp) + call .L_CAST_decrypt_begin + movl 8(%esp),%eax + movl 12(%esp),%ebx + bswap %eax + bswap %ebx + movl 16(%esp),%ecx + movl 20(%esp),%edx + xorl %eax,%ecx + xorl %ebx,%edx + movl (%esi),%eax + movl 4(%esi),%ebx +.L018dj7: + rorl $16,%edx + movb %dl,6(%edi) + shrl $16,%edx +.L019dj6: + movb %dh,5(%edi) +.L020dj5: + movb %dl,4(%edi) +.L021dj4: + movl %ecx,(%edi) + jmp .L022djend +.L023dj3: + rorl $16,%ecx + movb %cl,2(%edi) + shll $16,%ecx +.L024dj2: + movb %ch,1(%esi) +.L025dj1: + movb %cl,(%esi) +.L022djend: + jmp .L005finish +.L005finish: + movl 60(%esp),%ecx + addl $24,%esp + movl %eax,(%ecx) + movl %ebx,4(%ecx) + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.align 64 +.L007cbc_enc_jmp_table: +.long 0 +.long .L015ej1-.L006PIC_point +.long .L014ej2-.L006PIC_point +.long .L013ej3-.L006PIC_point +.long .L011ej4-.L006PIC_point +.long .L010ej5-.L006PIC_point +.long .L009ej6-.L006PIC_point +.long .L008ej7-.L006PIC_point +.align 64 +.size CAST_cbc_encrypt,.-.L_CAST_cbc_encrypt_begin diff --git a/deps/openssl/asm_obsolete/x86-elf-gas/des/crypt586.s b/deps/openssl/asm_obsolete/x86-elf-gas/des/crypt586.s new file mode 100644 index 00000000000000..46c81c493d0b8e --- /dev/null +++ b/deps/openssl/asm_obsolete/x86-elf-gas/des/crypt586.s @@ -0,0 +1,875 @@ +.file "crypt586.s" +.text +.globl fcrypt_body +.type fcrypt_body,@function +.align 16 +fcrypt_body: +.L_fcrypt_body_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + + + xorl %edi,%edi + xorl %esi,%esi + leal DES_SPtrans,%edx + pushl %edx + movl 28(%esp),%ebp + pushl $25 +.L000start: + + + movl 36(%esp),%eax + movl %esi,%edx + shrl $16,%edx + movl 40(%esp),%ecx + xorl %esi,%edx + andl %edx,%eax + andl %ecx,%edx + movl %eax,%ebx + shll $16,%ebx + movl %edx,%ecx + shll $16,%ecx + xorl %ebx,%eax + xorl %ecx,%edx + movl (%ebp),%ebx + xorl %ebx,%eax + movl 4(%ebp),%ecx + xorl %esi,%eax + xorl %esi,%edx + xorl %ecx,%edx + andl $0xfcfcfcfc,%eax + xorl %ebx,%ebx + andl $0xcfcfcfcf,%edx + xorl %ecx,%ecx + movb %al,%bl + movb %ah,%cl + rorl $4,%edx + movl 4(%esp),%ebp + xorl (%ebp,%ebx,1),%edi + movb %dl,%bl + xorl 0x200(%ebp,%ecx,1),%edi + movb %dh,%cl + shrl $16,%eax + xorl 0x100(%ebp,%ebx,1),%edi + movb %ah,%bl + shrl $16,%edx + xorl 0x300(%ebp,%ecx,1),%edi + movb %dh,%cl + andl $0xff,%eax + andl $0xff,%edx + movl 0x600(%ebp,%ebx,1),%ebx + xorl %ebx,%edi + movl 0x700(%ebp,%ecx,1),%ebx + xorl %ebx,%edi + movl 0x400(%ebp,%eax,1),%ebx + xorl %ebx,%edi + movl 0x500(%ebp,%edx,1),%ebx + xorl %ebx,%edi + movl 32(%esp),%ebp + + + movl 36(%esp),%eax + movl %edi,%edx + shrl $16,%edx + movl 40(%esp),%ecx + xorl %edi,%edx + andl %edx,%eax + andl %ecx,%edx + movl %eax,%ebx + shll $16,%ebx + movl %edx,%ecx + shll $16,%ecx + xorl %ebx,%eax + xorl %ecx,%edx + movl 8(%ebp),%ebx + xorl %ebx,%eax + movl 12(%ebp),%ecx + xorl %edi,%eax + xorl %edi,%edx + xorl %ecx,%edx + andl $0xfcfcfcfc,%eax + xorl %ebx,%ebx + andl $0xcfcfcfcf,%edx + xorl %ecx,%ecx + movb %al,%bl + movb %ah,%cl + rorl $4,%edx + movl 4(%esp),%ebp + xorl (%ebp,%ebx,1),%esi + movb %dl,%bl + xorl 0x200(%ebp,%ecx,1),%esi + movb %dh,%cl + shrl $16,%eax + xorl 0x100(%ebp,%ebx,1),%esi + movb %ah,%bl + shrl $16,%edx + xorl 0x300(%ebp,%ecx,1),%esi + movb %dh,%cl + andl $0xff,%eax + andl $0xff,%edx + movl 0x600(%ebp,%ebx,1),%ebx + xorl %ebx,%esi + movl 0x700(%ebp,%ecx,1),%ebx + xorl %ebx,%esi + movl 0x400(%ebp,%eax,1),%ebx + xorl %ebx,%esi + movl 0x500(%ebp,%edx,1),%ebx + xorl %ebx,%esi + movl 32(%esp),%ebp + + + movl 36(%esp),%eax + movl %esi,%edx + shrl $16,%edx + movl 40(%esp),%ecx + xorl %esi,%edx + andl %edx,%eax + andl %ecx,%edx + movl %eax,%ebx + shll $16,%ebx + movl %edx,%ecx + shll $16,%ecx + xorl %ebx,%eax + xorl %ecx,%edx + movl 16(%ebp),%ebx + xorl %ebx,%eax + movl 20(%ebp),%ecx + xorl %esi,%eax + xorl %esi,%edx + xorl %ecx,%edx + andl $0xfcfcfcfc,%eax + xorl %ebx,%ebx + andl $0xcfcfcfcf,%edx + xorl %ecx,%ecx + movb %al,%bl + movb %ah,%cl + rorl $4,%edx + movl 4(%esp),%ebp + xorl (%ebp,%ebx,1),%edi + movb %dl,%bl + xorl 0x200(%ebp,%ecx,1),%edi + movb %dh,%cl + shrl $16,%eax + xorl 0x100(%ebp,%ebx,1),%edi + movb %ah,%bl + shrl $16,%edx + xorl 0x300(%ebp,%ecx,1),%edi + movb %dh,%cl + andl $0xff,%eax + andl $0xff,%edx + movl 0x600(%ebp,%ebx,1),%ebx + xorl %ebx,%edi + movl 0x700(%ebp,%ecx,1),%ebx + xorl %ebx,%edi + movl 0x400(%ebp,%eax,1),%ebx + xorl %ebx,%edi + movl 0x500(%ebp,%edx,1),%ebx + xorl %ebx,%edi + movl 32(%esp),%ebp + + + movl 36(%esp),%eax + movl %edi,%edx + shrl $16,%edx + movl 40(%esp),%ecx + xorl %edi,%edx + andl %edx,%eax + andl %ecx,%edx + movl %eax,%ebx + shll $16,%ebx + movl %edx,%ecx + shll $16,%ecx + xorl %ebx,%eax + xorl %ecx,%edx + movl 24(%ebp),%ebx + xorl %ebx,%eax + movl 28(%ebp),%ecx + xorl %edi,%eax + xorl %edi,%edx + xorl %ecx,%edx + andl $0xfcfcfcfc,%eax + xorl %ebx,%ebx + andl $0xcfcfcfcf,%edx + xorl %ecx,%ecx + movb %al,%bl + movb %ah,%cl + rorl $4,%edx + movl 4(%esp),%ebp + xorl (%ebp,%ebx,1),%esi + movb %dl,%bl + xorl 0x200(%ebp,%ecx,1),%esi + movb %dh,%cl + shrl $16,%eax + xorl 0x100(%ebp,%ebx,1),%esi + movb %ah,%bl + shrl $16,%edx + xorl 0x300(%ebp,%ecx,1),%esi + movb %dh,%cl + andl $0xff,%eax + andl $0xff,%edx + movl 0x600(%ebp,%ebx,1),%ebx + xorl %ebx,%esi + movl 0x700(%ebp,%ecx,1),%ebx + xorl %ebx,%esi + movl 0x400(%ebp,%eax,1),%ebx + xorl %ebx,%esi + movl 0x500(%ebp,%edx,1),%ebx + xorl %ebx,%esi + movl 32(%esp),%ebp + + + movl 36(%esp),%eax + movl %esi,%edx + shrl $16,%edx + movl 40(%esp),%ecx + xorl %esi,%edx + andl %edx,%eax + andl %ecx,%edx + movl %eax,%ebx + shll $16,%ebx + movl %edx,%ecx + shll $16,%ecx + xorl %ebx,%eax + xorl %ecx,%edx + movl 32(%ebp),%ebx + xorl %ebx,%eax + movl 36(%ebp),%ecx + xorl %esi,%eax + xorl %esi,%edx + xorl %ecx,%edx + andl $0xfcfcfcfc,%eax + xorl %ebx,%ebx + andl $0xcfcfcfcf,%edx + xorl %ecx,%ecx + movb %al,%bl + movb %ah,%cl + rorl $4,%edx + movl 4(%esp),%ebp + xorl (%ebp,%ebx,1),%edi + movb %dl,%bl + xorl 0x200(%ebp,%ecx,1),%edi + movb %dh,%cl + shrl $16,%eax + xorl 0x100(%ebp,%ebx,1),%edi + movb %ah,%bl + shrl $16,%edx + xorl 0x300(%ebp,%ecx,1),%edi + movb %dh,%cl + andl $0xff,%eax + andl $0xff,%edx + movl 0x600(%ebp,%ebx,1),%ebx + xorl %ebx,%edi + movl 0x700(%ebp,%ecx,1),%ebx + xorl %ebx,%edi + movl 0x400(%ebp,%eax,1),%ebx + xorl %ebx,%edi + movl 0x500(%ebp,%edx,1),%ebx + xorl %ebx,%edi + movl 32(%esp),%ebp + + + movl 36(%esp),%eax + movl %edi,%edx + shrl $16,%edx + movl 40(%esp),%ecx + xorl %edi,%edx + andl %edx,%eax + andl %ecx,%edx + movl %eax,%ebx + shll $16,%ebx + movl %edx,%ecx + shll $16,%ecx + xorl %ebx,%eax + xorl %ecx,%edx + movl 40(%ebp),%ebx + xorl %ebx,%eax + movl 44(%ebp),%ecx + xorl %edi,%eax + xorl %edi,%edx + xorl %ecx,%edx + andl $0xfcfcfcfc,%eax + xorl %ebx,%ebx + andl $0xcfcfcfcf,%edx + xorl %ecx,%ecx + movb %al,%bl + movb %ah,%cl + rorl $4,%edx + movl 4(%esp),%ebp + xorl (%ebp,%ebx,1),%esi + movb %dl,%bl + xorl 0x200(%ebp,%ecx,1),%esi + movb %dh,%cl + shrl $16,%eax + xorl 0x100(%ebp,%ebx,1),%esi + movb %ah,%bl + shrl $16,%edx + xorl 0x300(%ebp,%ecx,1),%esi + movb %dh,%cl + andl $0xff,%eax + andl $0xff,%edx + movl 0x600(%ebp,%ebx,1),%ebx + xorl %ebx,%esi + movl 0x700(%ebp,%ecx,1),%ebx + xorl %ebx,%esi + movl 0x400(%ebp,%eax,1),%ebx + xorl %ebx,%esi + movl 0x500(%ebp,%edx,1),%ebx + xorl %ebx,%esi + movl 32(%esp),%ebp + + + movl 36(%esp),%eax + movl %esi,%edx + shrl $16,%edx + movl 40(%esp),%ecx + xorl %esi,%edx + andl %edx,%eax + andl %ecx,%edx + movl %eax,%ebx + shll $16,%ebx + movl %edx,%ecx + shll $16,%ecx + xorl %ebx,%eax + xorl %ecx,%edx + movl 48(%ebp),%ebx + xorl %ebx,%eax + movl 52(%ebp),%ecx + xorl %esi,%eax + xorl %esi,%edx + xorl %ecx,%edx + andl $0xfcfcfcfc,%eax + xorl %ebx,%ebx + andl $0xcfcfcfcf,%edx + xorl %ecx,%ecx + movb %al,%bl + movb %ah,%cl + rorl $4,%edx + movl 4(%esp),%ebp + xorl (%ebp,%ebx,1),%edi + movb %dl,%bl + xorl 0x200(%ebp,%ecx,1),%edi + movb %dh,%cl + shrl $16,%eax + xorl 0x100(%ebp,%ebx,1),%edi + movb %ah,%bl + shrl $16,%edx + xorl 0x300(%ebp,%ecx,1),%edi + movb %dh,%cl + andl $0xff,%eax + andl $0xff,%edx + movl 0x600(%ebp,%ebx,1),%ebx + xorl %ebx,%edi + movl 0x700(%ebp,%ecx,1),%ebx + xorl %ebx,%edi + movl 0x400(%ebp,%eax,1),%ebx + xorl %ebx,%edi + movl 0x500(%ebp,%edx,1),%ebx + xorl %ebx,%edi + movl 32(%esp),%ebp + + + movl 36(%esp),%eax + movl %edi,%edx + shrl $16,%edx + movl 40(%esp),%ecx + xorl %edi,%edx + andl %edx,%eax + andl %ecx,%edx + movl %eax,%ebx + shll $16,%ebx + movl %edx,%ecx + shll $16,%ecx + xorl %ebx,%eax + xorl %ecx,%edx + movl 56(%ebp),%ebx + xorl %ebx,%eax + movl 60(%ebp),%ecx + xorl %edi,%eax + xorl %edi,%edx + xorl %ecx,%edx + andl $0xfcfcfcfc,%eax + xorl %ebx,%ebx + andl $0xcfcfcfcf,%edx + xorl %ecx,%ecx + movb %al,%bl + movb %ah,%cl + rorl $4,%edx + movl 4(%esp),%ebp + xorl (%ebp,%ebx,1),%esi + movb %dl,%bl + xorl 0x200(%ebp,%ecx,1),%esi + movb %dh,%cl + shrl $16,%eax + xorl 0x100(%ebp,%ebx,1),%esi + movb %ah,%bl + shrl $16,%edx + xorl 0x300(%ebp,%ecx,1),%esi + movb %dh,%cl + andl $0xff,%eax + andl $0xff,%edx + movl 0x600(%ebp,%ebx,1),%ebx + xorl %ebx,%esi + movl 0x700(%ebp,%ecx,1),%ebx + xorl %ebx,%esi + movl 0x400(%ebp,%eax,1),%ebx + xorl %ebx,%esi + movl 0x500(%ebp,%edx,1),%ebx + xorl %ebx,%esi + movl 32(%esp),%ebp + + + movl 36(%esp),%eax + movl %esi,%edx + shrl $16,%edx + movl 40(%esp),%ecx + xorl %esi,%edx + andl %edx,%eax + andl %ecx,%edx + movl %eax,%ebx + shll $16,%ebx + movl %edx,%ecx + shll $16,%ecx + xorl %ebx,%eax + xorl %ecx,%edx + movl 64(%ebp),%ebx + xorl %ebx,%eax + movl 68(%ebp),%ecx + xorl %esi,%eax + xorl %esi,%edx + xorl %ecx,%edx + andl $0xfcfcfcfc,%eax + xorl %ebx,%ebx + andl $0xcfcfcfcf,%edx + xorl %ecx,%ecx + movb %al,%bl + movb %ah,%cl + rorl $4,%edx + movl 4(%esp),%ebp + xorl (%ebp,%ebx,1),%edi + movb %dl,%bl + xorl 0x200(%ebp,%ecx,1),%edi + movb %dh,%cl + shrl $16,%eax + xorl 0x100(%ebp,%ebx,1),%edi + movb %ah,%bl + shrl $16,%edx + xorl 0x300(%ebp,%ecx,1),%edi + movb %dh,%cl + andl $0xff,%eax + andl $0xff,%edx + movl 0x600(%ebp,%ebx,1),%ebx + xorl %ebx,%edi + movl 0x700(%ebp,%ecx,1),%ebx + xorl %ebx,%edi + movl 0x400(%ebp,%eax,1),%ebx + xorl %ebx,%edi + movl 0x500(%ebp,%edx,1),%ebx + xorl %ebx,%edi + movl 32(%esp),%ebp + + + movl 36(%esp),%eax + movl %edi,%edx + shrl $16,%edx + movl 40(%esp),%ecx + xorl %edi,%edx + andl %edx,%eax + andl %ecx,%edx + movl %eax,%ebx + shll $16,%ebx + movl %edx,%ecx + shll $16,%ecx + xorl %ebx,%eax + xorl %ecx,%edx + movl 72(%ebp),%ebx + xorl %ebx,%eax + movl 76(%ebp),%ecx + xorl %edi,%eax + xorl %edi,%edx + xorl %ecx,%edx + andl $0xfcfcfcfc,%eax + xorl %ebx,%ebx + andl $0xcfcfcfcf,%edx + xorl %ecx,%ecx + movb %al,%bl + movb %ah,%cl + rorl $4,%edx + movl 4(%esp),%ebp + xorl (%ebp,%ebx,1),%esi + movb %dl,%bl + xorl 0x200(%ebp,%ecx,1),%esi + movb %dh,%cl + shrl $16,%eax + xorl 0x100(%ebp,%ebx,1),%esi + movb %ah,%bl + shrl $16,%edx + xorl 0x300(%ebp,%ecx,1),%esi + movb %dh,%cl + andl $0xff,%eax + andl $0xff,%edx + movl 0x600(%ebp,%ebx,1),%ebx + xorl %ebx,%esi + movl 0x700(%ebp,%ecx,1),%ebx + xorl %ebx,%esi + movl 0x400(%ebp,%eax,1),%ebx + xorl %ebx,%esi + movl 0x500(%ebp,%edx,1),%ebx + xorl %ebx,%esi + movl 32(%esp),%ebp + + + movl 36(%esp),%eax + movl %esi,%edx + shrl $16,%edx + movl 40(%esp),%ecx + xorl %esi,%edx + andl %edx,%eax + andl %ecx,%edx + movl %eax,%ebx + shll $16,%ebx + movl %edx,%ecx + shll $16,%ecx + xorl %ebx,%eax + xorl %ecx,%edx + movl 80(%ebp),%ebx + xorl %ebx,%eax + movl 84(%ebp),%ecx + xorl %esi,%eax + xorl %esi,%edx + xorl %ecx,%edx + andl $0xfcfcfcfc,%eax + xorl %ebx,%ebx + andl $0xcfcfcfcf,%edx + xorl %ecx,%ecx + movb %al,%bl + movb %ah,%cl + rorl $4,%edx + movl 4(%esp),%ebp + xorl (%ebp,%ebx,1),%edi + movb %dl,%bl + xorl 0x200(%ebp,%ecx,1),%edi + movb %dh,%cl + shrl $16,%eax + xorl 0x100(%ebp,%ebx,1),%edi + movb %ah,%bl + shrl $16,%edx + xorl 0x300(%ebp,%ecx,1),%edi + movb %dh,%cl + andl $0xff,%eax + andl $0xff,%edx + movl 0x600(%ebp,%ebx,1),%ebx + xorl %ebx,%edi + movl 0x700(%ebp,%ecx,1),%ebx + xorl %ebx,%edi + movl 0x400(%ebp,%eax,1),%ebx + xorl %ebx,%edi + movl 0x500(%ebp,%edx,1),%ebx + xorl %ebx,%edi + movl 32(%esp),%ebp + + + movl 36(%esp),%eax + movl %edi,%edx + shrl $16,%edx + movl 40(%esp),%ecx + xorl %edi,%edx + andl %edx,%eax + andl %ecx,%edx + movl %eax,%ebx + shll $16,%ebx + movl %edx,%ecx + shll $16,%ecx + xorl %ebx,%eax + xorl %ecx,%edx + movl 88(%ebp),%ebx + xorl %ebx,%eax + movl 92(%ebp),%ecx + xorl %edi,%eax + xorl %edi,%edx + xorl %ecx,%edx + andl $0xfcfcfcfc,%eax + xorl %ebx,%ebx + andl $0xcfcfcfcf,%edx + xorl %ecx,%ecx + movb %al,%bl + movb %ah,%cl + rorl $4,%edx + movl 4(%esp),%ebp + xorl (%ebp,%ebx,1),%esi + movb %dl,%bl + xorl 0x200(%ebp,%ecx,1),%esi + movb %dh,%cl + shrl $16,%eax + xorl 0x100(%ebp,%ebx,1),%esi + movb %ah,%bl + shrl $16,%edx + xorl 0x300(%ebp,%ecx,1),%esi + movb %dh,%cl + andl $0xff,%eax + andl $0xff,%edx + movl 0x600(%ebp,%ebx,1),%ebx + xorl %ebx,%esi + movl 0x700(%ebp,%ecx,1),%ebx + xorl %ebx,%esi + movl 0x400(%ebp,%eax,1),%ebx + xorl %ebx,%esi + movl 0x500(%ebp,%edx,1),%ebx + xorl %ebx,%esi + movl 32(%esp),%ebp + + + movl 36(%esp),%eax + movl %esi,%edx + shrl $16,%edx + movl 40(%esp),%ecx + xorl %esi,%edx + andl %edx,%eax + andl %ecx,%edx + movl %eax,%ebx + shll $16,%ebx + movl %edx,%ecx + shll $16,%ecx + xorl %ebx,%eax + xorl %ecx,%edx + movl 96(%ebp),%ebx + xorl %ebx,%eax + movl 100(%ebp),%ecx + xorl %esi,%eax + xorl %esi,%edx + xorl %ecx,%edx + andl $0xfcfcfcfc,%eax + xorl %ebx,%ebx + andl $0xcfcfcfcf,%edx + xorl %ecx,%ecx + movb %al,%bl + movb %ah,%cl + rorl $4,%edx + movl 4(%esp),%ebp + xorl (%ebp,%ebx,1),%edi + movb %dl,%bl + xorl 0x200(%ebp,%ecx,1),%edi + movb %dh,%cl + shrl $16,%eax + xorl 0x100(%ebp,%ebx,1),%edi + movb %ah,%bl + shrl $16,%edx + xorl 0x300(%ebp,%ecx,1),%edi + movb %dh,%cl + andl $0xff,%eax + andl $0xff,%edx + movl 0x600(%ebp,%ebx,1),%ebx + xorl %ebx,%edi + movl 0x700(%ebp,%ecx,1),%ebx + xorl %ebx,%edi + movl 0x400(%ebp,%eax,1),%ebx + xorl %ebx,%edi + movl 0x500(%ebp,%edx,1),%ebx + xorl %ebx,%edi + movl 32(%esp),%ebp + + + movl 36(%esp),%eax + movl %edi,%edx + shrl $16,%edx + movl 40(%esp),%ecx + xorl %edi,%edx + andl %edx,%eax + andl %ecx,%edx + movl %eax,%ebx + shll $16,%ebx + movl %edx,%ecx + shll $16,%ecx + xorl %ebx,%eax + xorl %ecx,%edx + movl 104(%ebp),%ebx + xorl %ebx,%eax + movl 108(%ebp),%ecx + xorl %edi,%eax + xorl %edi,%edx + xorl %ecx,%edx + andl $0xfcfcfcfc,%eax + xorl %ebx,%ebx + andl $0xcfcfcfcf,%edx + xorl %ecx,%ecx + movb %al,%bl + movb %ah,%cl + rorl $4,%edx + movl 4(%esp),%ebp + xorl (%ebp,%ebx,1),%esi + movb %dl,%bl + xorl 0x200(%ebp,%ecx,1),%esi + movb %dh,%cl + shrl $16,%eax + xorl 0x100(%ebp,%ebx,1),%esi + movb %ah,%bl + shrl $16,%edx + xorl 0x300(%ebp,%ecx,1),%esi + movb %dh,%cl + andl $0xff,%eax + andl $0xff,%edx + movl 0x600(%ebp,%ebx,1),%ebx + xorl %ebx,%esi + movl 0x700(%ebp,%ecx,1),%ebx + xorl %ebx,%esi + movl 0x400(%ebp,%eax,1),%ebx + xorl %ebx,%esi + movl 0x500(%ebp,%edx,1),%ebx + xorl %ebx,%esi + movl 32(%esp),%ebp + + + movl 36(%esp),%eax + movl %esi,%edx + shrl $16,%edx + movl 40(%esp),%ecx + xorl %esi,%edx + andl %edx,%eax + andl %ecx,%edx + movl %eax,%ebx + shll $16,%ebx + movl %edx,%ecx + shll $16,%ecx + xorl %ebx,%eax + xorl %ecx,%edx + movl 112(%ebp),%ebx + xorl %ebx,%eax + movl 116(%ebp),%ecx + xorl %esi,%eax + xorl %esi,%edx + xorl %ecx,%edx + andl $0xfcfcfcfc,%eax + xorl %ebx,%ebx + andl $0xcfcfcfcf,%edx + xorl %ecx,%ecx + movb %al,%bl + movb %ah,%cl + rorl $4,%edx + movl 4(%esp),%ebp + xorl (%ebp,%ebx,1),%edi + movb %dl,%bl + xorl 0x200(%ebp,%ecx,1),%edi + movb %dh,%cl + shrl $16,%eax + xorl 0x100(%ebp,%ebx,1),%edi + movb %ah,%bl + shrl $16,%edx + xorl 0x300(%ebp,%ecx,1),%edi + movb %dh,%cl + andl $0xff,%eax + andl $0xff,%edx + movl 0x600(%ebp,%ebx,1),%ebx + xorl %ebx,%edi + movl 0x700(%ebp,%ecx,1),%ebx + xorl %ebx,%edi + movl 0x400(%ebp,%eax,1),%ebx + xorl %ebx,%edi + movl 0x500(%ebp,%edx,1),%ebx + xorl %ebx,%edi + movl 32(%esp),%ebp + + + movl 36(%esp),%eax + movl %edi,%edx + shrl $16,%edx + movl 40(%esp),%ecx + xorl %edi,%edx + andl %edx,%eax + andl %ecx,%edx + movl %eax,%ebx + shll $16,%ebx + movl %edx,%ecx + shll $16,%ecx + xorl %ebx,%eax + xorl %ecx,%edx + movl 120(%ebp),%ebx + xorl %ebx,%eax + movl 124(%ebp),%ecx + xorl %edi,%eax + xorl %edi,%edx + xorl %ecx,%edx + andl $0xfcfcfcfc,%eax + xorl %ebx,%ebx + andl $0xcfcfcfcf,%edx + xorl %ecx,%ecx + movb %al,%bl + movb %ah,%cl + rorl $4,%edx + movl 4(%esp),%ebp + xorl (%ebp,%ebx,1),%esi + movb %dl,%bl + xorl 0x200(%ebp,%ecx,1),%esi + movb %dh,%cl + shrl $16,%eax + xorl 0x100(%ebp,%ebx,1),%esi + movb %ah,%bl + shrl $16,%edx + xorl 0x300(%ebp,%ecx,1),%esi + movb %dh,%cl + andl $0xff,%eax + andl $0xff,%edx + movl 0x600(%ebp,%ebx,1),%ebx + xorl %ebx,%esi + movl 0x700(%ebp,%ecx,1),%ebx + xorl %ebx,%esi + movl 0x400(%ebp,%eax,1),%ebx + xorl %ebx,%esi + movl 0x500(%ebp,%edx,1),%ebx + xorl %ebx,%esi + movl 32(%esp),%ebp + movl (%esp),%ebx + movl %edi,%eax + decl %ebx + movl %esi,%edi + movl %eax,%esi + movl %ebx,(%esp) + jnz .L000start + + + movl 28(%esp),%edx + rorl $1,%edi + movl %esi,%eax + xorl %edi,%esi + andl $0xaaaaaaaa,%esi + xorl %esi,%eax + xorl %esi,%edi + + roll $23,%eax + movl %eax,%esi + xorl %edi,%eax + andl $0x03fc03fc,%eax + xorl %eax,%esi + xorl %eax,%edi + + roll $10,%esi + movl %esi,%eax + xorl %edi,%esi + andl $0x33333333,%esi + xorl %esi,%eax + xorl %esi,%edi + + roll $18,%edi + movl %edi,%esi + xorl %eax,%edi + andl $0xfff0000f,%edi + xorl %edi,%esi + xorl %edi,%eax + + roll $12,%esi + movl %esi,%edi + xorl %eax,%esi + andl $0xf0f0f0f0,%esi + xorl %esi,%edi + xorl %esi,%eax + + rorl $4,%eax + movl %eax,(%edx) + movl %edi,4(%edx) + addl $8,%esp + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.size fcrypt_body,.-.L_fcrypt_body_begin diff --git a/deps/openssl/asm_obsolete/x86-elf-gas/des/des-586.s b/deps/openssl/asm_obsolete/x86-elf-gas/des/des-586.s new file mode 100644 index 00000000000000..054e2b0d597efa --- /dev/null +++ b/deps/openssl/asm_obsolete/x86-elf-gas/des/des-586.s @@ -0,0 +1,1838 @@ +.file "des-586.s" +.text +.globl DES_SPtrans +.type _x86_DES_encrypt,@function +.align 16 +_x86_DES_encrypt: + pushl %ecx + + movl (%ecx),%eax + xorl %ebx,%ebx + movl 4(%ecx),%edx + xorl %esi,%eax + xorl %ecx,%ecx + xorl %esi,%edx + andl $0xfcfcfcfc,%eax + andl $0xcfcfcfcf,%edx + movb %al,%bl + movb %ah,%cl + rorl $4,%edx + xorl (%ebp,%ebx,1),%edi + movb %dl,%bl + xorl 0x200(%ebp,%ecx,1),%edi + movb %dh,%cl + shrl $16,%eax + xorl 0x100(%ebp,%ebx,1),%edi + movb %ah,%bl + shrl $16,%edx + xorl 0x300(%ebp,%ecx,1),%edi + movb %dh,%cl + andl $0xff,%eax + andl $0xff,%edx + xorl 0x600(%ebp,%ebx,1),%edi + xorl 0x700(%ebp,%ecx,1),%edi + movl (%esp),%ecx + xorl 0x400(%ebp,%eax,1),%edi + xorl 0x500(%ebp,%edx,1),%edi + + movl 8(%ecx),%eax + xorl %ebx,%ebx + movl 12(%ecx),%edx + xorl %edi,%eax + xorl %ecx,%ecx + xorl %edi,%edx + andl $0xfcfcfcfc,%eax + andl $0xcfcfcfcf,%edx + movb %al,%bl + movb %ah,%cl + rorl $4,%edx + xorl (%ebp,%ebx,1),%esi + movb %dl,%bl + xorl 0x200(%ebp,%ecx,1),%esi + movb %dh,%cl + shrl $16,%eax + xorl 0x100(%ebp,%ebx,1),%esi + movb %ah,%bl + shrl $16,%edx + xorl 0x300(%ebp,%ecx,1),%esi + movb %dh,%cl + andl $0xff,%eax + andl $0xff,%edx + xorl 0x600(%ebp,%ebx,1),%esi + xorl 0x700(%ebp,%ecx,1),%esi + movl (%esp),%ecx + xorl 0x400(%ebp,%eax,1),%esi + xorl 0x500(%ebp,%edx,1),%esi + + movl 16(%ecx),%eax + xorl %ebx,%ebx + movl 20(%ecx),%edx + xorl %esi,%eax + xorl %ecx,%ecx + xorl %esi,%edx + andl $0xfcfcfcfc,%eax + andl $0xcfcfcfcf,%edx + movb %al,%bl + movb %ah,%cl + rorl $4,%edx + xorl (%ebp,%ebx,1),%edi + movb %dl,%bl + xorl 0x200(%ebp,%ecx,1),%edi + movb %dh,%cl + shrl $16,%eax + xorl 0x100(%ebp,%ebx,1),%edi + movb %ah,%bl + shrl $16,%edx + xorl 0x300(%ebp,%ecx,1),%edi + movb %dh,%cl + andl $0xff,%eax + andl $0xff,%edx + xorl 0x600(%ebp,%ebx,1),%edi + xorl 0x700(%ebp,%ecx,1),%edi + movl (%esp),%ecx + xorl 0x400(%ebp,%eax,1),%edi + xorl 0x500(%ebp,%edx,1),%edi + + movl 24(%ecx),%eax + xorl %ebx,%ebx + movl 28(%ecx),%edx + xorl %edi,%eax + xorl %ecx,%ecx + xorl %edi,%edx + andl $0xfcfcfcfc,%eax + andl $0xcfcfcfcf,%edx + movb %al,%bl + movb %ah,%cl + rorl $4,%edx + xorl (%ebp,%ebx,1),%esi + movb %dl,%bl + xorl 0x200(%ebp,%ecx,1),%esi + movb %dh,%cl + shrl $16,%eax + xorl 0x100(%ebp,%ebx,1),%esi + movb %ah,%bl + shrl $16,%edx + xorl 0x300(%ebp,%ecx,1),%esi + movb %dh,%cl + andl $0xff,%eax + andl $0xff,%edx + xorl 0x600(%ebp,%ebx,1),%esi + xorl 0x700(%ebp,%ecx,1),%esi + movl (%esp),%ecx + xorl 0x400(%ebp,%eax,1),%esi + xorl 0x500(%ebp,%edx,1),%esi + + movl 32(%ecx),%eax + xorl %ebx,%ebx + movl 36(%ecx),%edx + xorl %esi,%eax + xorl %ecx,%ecx + xorl %esi,%edx + andl $0xfcfcfcfc,%eax + andl $0xcfcfcfcf,%edx + movb %al,%bl + movb %ah,%cl + rorl $4,%edx + xorl (%ebp,%ebx,1),%edi + movb %dl,%bl + xorl 0x200(%ebp,%ecx,1),%edi + movb %dh,%cl + shrl $16,%eax + xorl 0x100(%ebp,%ebx,1),%edi + movb %ah,%bl + shrl $16,%edx + xorl 0x300(%ebp,%ecx,1),%edi + movb %dh,%cl + andl $0xff,%eax + andl $0xff,%edx + xorl 0x600(%ebp,%ebx,1),%edi + xorl 0x700(%ebp,%ecx,1),%edi + movl (%esp),%ecx + xorl 0x400(%ebp,%eax,1),%edi + xorl 0x500(%ebp,%edx,1),%edi + + movl 40(%ecx),%eax + xorl %ebx,%ebx + movl 44(%ecx),%edx + xorl %edi,%eax + xorl %ecx,%ecx + xorl %edi,%edx + andl $0xfcfcfcfc,%eax + andl $0xcfcfcfcf,%edx + movb %al,%bl + movb %ah,%cl + rorl $4,%edx + xorl (%ebp,%ebx,1),%esi + movb %dl,%bl + xorl 0x200(%ebp,%ecx,1),%esi + movb %dh,%cl + shrl $16,%eax + xorl 0x100(%ebp,%ebx,1),%esi + movb %ah,%bl + shrl $16,%edx + xorl 0x300(%ebp,%ecx,1),%esi + movb %dh,%cl + andl $0xff,%eax + andl $0xff,%edx + xorl 0x600(%ebp,%ebx,1),%esi + xorl 0x700(%ebp,%ecx,1),%esi + movl (%esp),%ecx + xorl 0x400(%ebp,%eax,1),%esi + xorl 0x500(%ebp,%edx,1),%esi + + movl 48(%ecx),%eax + xorl %ebx,%ebx + movl 52(%ecx),%edx + xorl %esi,%eax + xorl %ecx,%ecx + xorl %esi,%edx + andl $0xfcfcfcfc,%eax + andl $0xcfcfcfcf,%edx + movb %al,%bl + movb %ah,%cl + rorl $4,%edx + xorl (%ebp,%ebx,1),%edi + movb %dl,%bl + xorl 0x200(%ebp,%ecx,1),%edi + movb %dh,%cl + shrl $16,%eax + xorl 0x100(%ebp,%ebx,1),%edi + movb %ah,%bl + shrl $16,%edx + xorl 0x300(%ebp,%ecx,1),%edi + movb %dh,%cl + andl $0xff,%eax + andl $0xff,%edx + xorl 0x600(%ebp,%ebx,1),%edi + xorl 0x700(%ebp,%ecx,1),%edi + movl (%esp),%ecx + xorl 0x400(%ebp,%eax,1),%edi + xorl 0x500(%ebp,%edx,1),%edi + + movl 56(%ecx),%eax + xorl %ebx,%ebx + movl 60(%ecx),%edx + xorl %edi,%eax + xorl %ecx,%ecx + xorl %edi,%edx + andl $0xfcfcfcfc,%eax + andl $0xcfcfcfcf,%edx + movb %al,%bl + movb %ah,%cl + rorl $4,%edx + xorl (%ebp,%ebx,1),%esi + movb %dl,%bl + xorl 0x200(%ebp,%ecx,1),%esi + movb %dh,%cl + shrl $16,%eax + xorl 0x100(%ebp,%ebx,1),%esi + movb %ah,%bl + shrl $16,%edx + xorl 0x300(%ebp,%ecx,1),%esi + movb %dh,%cl + andl $0xff,%eax + andl $0xff,%edx + xorl 0x600(%ebp,%ebx,1),%esi + xorl 0x700(%ebp,%ecx,1),%esi + movl (%esp),%ecx + xorl 0x400(%ebp,%eax,1),%esi + xorl 0x500(%ebp,%edx,1),%esi + + movl 64(%ecx),%eax + xorl %ebx,%ebx + movl 68(%ecx),%edx + xorl %esi,%eax + xorl %ecx,%ecx + xorl %esi,%edx + andl $0xfcfcfcfc,%eax + andl $0xcfcfcfcf,%edx + movb %al,%bl + movb %ah,%cl + rorl $4,%edx + xorl (%ebp,%ebx,1),%edi + movb %dl,%bl + xorl 0x200(%ebp,%ecx,1),%edi + movb %dh,%cl + shrl $16,%eax + xorl 0x100(%ebp,%ebx,1),%edi + movb %ah,%bl + shrl $16,%edx + xorl 0x300(%ebp,%ecx,1),%edi + movb %dh,%cl + andl $0xff,%eax + andl $0xff,%edx + xorl 0x600(%ebp,%ebx,1),%edi + xorl 0x700(%ebp,%ecx,1),%edi + movl (%esp),%ecx + xorl 0x400(%ebp,%eax,1),%edi + xorl 0x500(%ebp,%edx,1),%edi + + movl 72(%ecx),%eax + xorl %ebx,%ebx + movl 76(%ecx),%edx + xorl %edi,%eax + xorl %ecx,%ecx + xorl %edi,%edx + andl $0xfcfcfcfc,%eax + andl $0xcfcfcfcf,%edx + movb %al,%bl + movb %ah,%cl + rorl $4,%edx + xorl (%ebp,%ebx,1),%esi + movb %dl,%bl + xorl 0x200(%ebp,%ecx,1),%esi + movb %dh,%cl + shrl $16,%eax + xorl 0x100(%ebp,%ebx,1),%esi + movb %ah,%bl + shrl $16,%edx + xorl 0x300(%ebp,%ecx,1),%esi + movb %dh,%cl + andl $0xff,%eax + andl $0xff,%edx + xorl 0x600(%ebp,%ebx,1),%esi + xorl 0x700(%ebp,%ecx,1),%esi + movl (%esp),%ecx + xorl 0x400(%ebp,%eax,1),%esi + xorl 0x500(%ebp,%edx,1),%esi + + movl 80(%ecx),%eax + xorl %ebx,%ebx + movl 84(%ecx),%edx + xorl %esi,%eax + xorl %ecx,%ecx + xorl %esi,%edx + andl $0xfcfcfcfc,%eax + andl $0xcfcfcfcf,%edx + movb %al,%bl + movb %ah,%cl + rorl $4,%edx + xorl (%ebp,%ebx,1),%edi + movb %dl,%bl + xorl 0x200(%ebp,%ecx,1),%edi + movb %dh,%cl + shrl $16,%eax + xorl 0x100(%ebp,%ebx,1),%edi + movb %ah,%bl + shrl $16,%edx + xorl 0x300(%ebp,%ecx,1),%edi + movb %dh,%cl + andl $0xff,%eax + andl $0xff,%edx + xorl 0x600(%ebp,%ebx,1),%edi + xorl 0x700(%ebp,%ecx,1),%edi + movl (%esp),%ecx + xorl 0x400(%ebp,%eax,1),%edi + xorl 0x500(%ebp,%edx,1),%edi + + movl 88(%ecx),%eax + xorl %ebx,%ebx + movl 92(%ecx),%edx + xorl %edi,%eax + xorl %ecx,%ecx + xorl %edi,%edx + andl $0xfcfcfcfc,%eax + andl $0xcfcfcfcf,%edx + movb %al,%bl + movb %ah,%cl + rorl $4,%edx + xorl (%ebp,%ebx,1),%esi + movb %dl,%bl + xorl 0x200(%ebp,%ecx,1),%esi + movb %dh,%cl + shrl $16,%eax + xorl 0x100(%ebp,%ebx,1),%esi + movb %ah,%bl + shrl $16,%edx + xorl 0x300(%ebp,%ecx,1),%esi + movb %dh,%cl + andl $0xff,%eax + andl $0xff,%edx + xorl 0x600(%ebp,%ebx,1),%esi + xorl 0x700(%ebp,%ecx,1),%esi + movl (%esp),%ecx + xorl 0x400(%ebp,%eax,1),%esi + xorl 0x500(%ebp,%edx,1),%esi + + movl 96(%ecx),%eax + xorl %ebx,%ebx + movl 100(%ecx),%edx + xorl %esi,%eax + xorl %ecx,%ecx + xorl %esi,%edx + andl $0xfcfcfcfc,%eax + andl $0xcfcfcfcf,%edx + movb %al,%bl + movb %ah,%cl + rorl $4,%edx + xorl (%ebp,%ebx,1),%edi + movb %dl,%bl + xorl 0x200(%ebp,%ecx,1),%edi + movb %dh,%cl + shrl $16,%eax + xorl 0x100(%ebp,%ebx,1),%edi + movb %ah,%bl + shrl $16,%edx + xorl 0x300(%ebp,%ecx,1),%edi + movb %dh,%cl + andl $0xff,%eax + andl $0xff,%edx + xorl 0x600(%ebp,%ebx,1),%edi + xorl 0x700(%ebp,%ecx,1),%edi + movl (%esp),%ecx + xorl 0x400(%ebp,%eax,1),%edi + xorl 0x500(%ebp,%edx,1),%edi + + movl 104(%ecx),%eax + xorl %ebx,%ebx + movl 108(%ecx),%edx + xorl %edi,%eax + xorl %ecx,%ecx + xorl %edi,%edx + andl $0xfcfcfcfc,%eax + andl $0xcfcfcfcf,%edx + movb %al,%bl + movb %ah,%cl + rorl $4,%edx + xorl (%ebp,%ebx,1),%esi + movb %dl,%bl + xorl 0x200(%ebp,%ecx,1),%esi + movb %dh,%cl + shrl $16,%eax + xorl 0x100(%ebp,%ebx,1),%esi + movb %ah,%bl + shrl $16,%edx + xorl 0x300(%ebp,%ecx,1),%esi + movb %dh,%cl + andl $0xff,%eax + andl $0xff,%edx + xorl 0x600(%ebp,%ebx,1),%esi + xorl 0x700(%ebp,%ecx,1),%esi + movl (%esp),%ecx + xorl 0x400(%ebp,%eax,1),%esi + xorl 0x500(%ebp,%edx,1),%esi + + movl 112(%ecx),%eax + xorl %ebx,%ebx + movl 116(%ecx),%edx + xorl %esi,%eax + xorl %ecx,%ecx + xorl %esi,%edx + andl $0xfcfcfcfc,%eax + andl $0xcfcfcfcf,%edx + movb %al,%bl + movb %ah,%cl + rorl $4,%edx + xorl (%ebp,%ebx,1),%edi + movb %dl,%bl + xorl 0x200(%ebp,%ecx,1),%edi + movb %dh,%cl + shrl $16,%eax + xorl 0x100(%ebp,%ebx,1),%edi + movb %ah,%bl + shrl $16,%edx + xorl 0x300(%ebp,%ecx,1),%edi + movb %dh,%cl + andl $0xff,%eax + andl $0xff,%edx + xorl 0x600(%ebp,%ebx,1),%edi + xorl 0x700(%ebp,%ecx,1),%edi + movl (%esp),%ecx + xorl 0x400(%ebp,%eax,1),%edi + xorl 0x500(%ebp,%edx,1),%edi + + movl 120(%ecx),%eax + xorl %ebx,%ebx + movl 124(%ecx),%edx + xorl %edi,%eax + xorl %ecx,%ecx + xorl %edi,%edx + andl $0xfcfcfcfc,%eax + andl $0xcfcfcfcf,%edx + movb %al,%bl + movb %ah,%cl + rorl $4,%edx + xorl (%ebp,%ebx,1),%esi + movb %dl,%bl + xorl 0x200(%ebp,%ecx,1),%esi + movb %dh,%cl + shrl $16,%eax + xorl 0x100(%ebp,%ebx,1),%esi + movb %ah,%bl + shrl $16,%edx + xorl 0x300(%ebp,%ecx,1),%esi + movb %dh,%cl + andl $0xff,%eax + andl $0xff,%edx + xorl 0x600(%ebp,%ebx,1),%esi + xorl 0x700(%ebp,%ecx,1),%esi + movl (%esp),%ecx + xorl 0x400(%ebp,%eax,1),%esi + xorl 0x500(%ebp,%edx,1),%esi + addl $4,%esp + ret +.size _x86_DES_encrypt,.-_x86_DES_encrypt +.type _x86_DES_decrypt,@function +.align 16 +_x86_DES_decrypt: + pushl %ecx + + movl 120(%ecx),%eax + xorl %ebx,%ebx + movl 124(%ecx),%edx + xorl %esi,%eax + xorl %ecx,%ecx + xorl %esi,%edx + andl $0xfcfcfcfc,%eax + andl $0xcfcfcfcf,%edx + movb %al,%bl + movb %ah,%cl + rorl $4,%edx + xorl (%ebp,%ebx,1),%edi + movb %dl,%bl + xorl 0x200(%ebp,%ecx,1),%edi + movb %dh,%cl + shrl $16,%eax + xorl 0x100(%ebp,%ebx,1),%edi + movb %ah,%bl + shrl $16,%edx + xorl 0x300(%ebp,%ecx,1),%edi + movb %dh,%cl + andl $0xff,%eax + andl $0xff,%edx + xorl 0x600(%ebp,%ebx,1),%edi + xorl 0x700(%ebp,%ecx,1),%edi + movl (%esp),%ecx + xorl 0x400(%ebp,%eax,1),%edi + xorl 0x500(%ebp,%edx,1),%edi + + movl 112(%ecx),%eax + xorl %ebx,%ebx + movl 116(%ecx),%edx + xorl %edi,%eax + xorl %ecx,%ecx + xorl %edi,%edx + andl $0xfcfcfcfc,%eax + andl $0xcfcfcfcf,%edx + movb %al,%bl + movb %ah,%cl + rorl $4,%edx + xorl (%ebp,%ebx,1),%esi + movb %dl,%bl + xorl 0x200(%ebp,%ecx,1),%esi + movb %dh,%cl + shrl $16,%eax + xorl 0x100(%ebp,%ebx,1),%esi + movb %ah,%bl + shrl $16,%edx + xorl 0x300(%ebp,%ecx,1),%esi + movb %dh,%cl + andl $0xff,%eax + andl $0xff,%edx + xorl 0x600(%ebp,%ebx,1),%esi + xorl 0x700(%ebp,%ecx,1),%esi + movl (%esp),%ecx + xorl 0x400(%ebp,%eax,1),%esi + xorl 0x500(%ebp,%edx,1),%esi + + movl 104(%ecx),%eax + xorl %ebx,%ebx + movl 108(%ecx),%edx + xorl %esi,%eax + xorl %ecx,%ecx + xorl %esi,%edx + andl $0xfcfcfcfc,%eax + andl $0xcfcfcfcf,%edx + movb %al,%bl + movb %ah,%cl + rorl $4,%edx + xorl (%ebp,%ebx,1),%edi + movb %dl,%bl + xorl 0x200(%ebp,%ecx,1),%edi + movb %dh,%cl + shrl $16,%eax + xorl 0x100(%ebp,%ebx,1),%edi + movb %ah,%bl + shrl $16,%edx + xorl 0x300(%ebp,%ecx,1),%edi + movb %dh,%cl + andl $0xff,%eax + andl $0xff,%edx + xorl 0x600(%ebp,%ebx,1),%edi + xorl 0x700(%ebp,%ecx,1),%edi + movl (%esp),%ecx + xorl 0x400(%ebp,%eax,1),%edi + xorl 0x500(%ebp,%edx,1),%edi + + movl 96(%ecx),%eax + xorl %ebx,%ebx + movl 100(%ecx),%edx + xorl %edi,%eax + xorl %ecx,%ecx + xorl %edi,%edx + andl $0xfcfcfcfc,%eax + andl $0xcfcfcfcf,%edx + movb %al,%bl + movb %ah,%cl + rorl $4,%edx + xorl (%ebp,%ebx,1),%esi + movb %dl,%bl + xorl 0x200(%ebp,%ecx,1),%esi + movb %dh,%cl + shrl $16,%eax + xorl 0x100(%ebp,%ebx,1),%esi + movb %ah,%bl + shrl $16,%edx + xorl 0x300(%ebp,%ecx,1),%esi + movb %dh,%cl + andl $0xff,%eax + andl $0xff,%edx + xorl 0x600(%ebp,%ebx,1),%esi + xorl 0x700(%ebp,%ecx,1),%esi + movl (%esp),%ecx + xorl 0x400(%ebp,%eax,1),%esi + xorl 0x500(%ebp,%edx,1),%esi + + movl 88(%ecx),%eax + xorl %ebx,%ebx + movl 92(%ecx),%edx + xorl %esi,%eax + xorl %ecx,%ecx + xorl %esi,%edx + andl $0xfcfcfcfc,%eax + andl $0xcfcfcfcf,%edx + movb %al,%bl + movb %ah,%cl + rorl $4,%edx + xorl (%ebp,%ebx,1),%edi + movb %dl,%bl + xorl 0x200(%ebp,%ecx,1),%edi + movb %dh,%cl + shrl $16,%eax + xorl 0x100(%ebp,%ebx,1),%edi + movb %ah,%bl + shrl $16,%edx + xorl 0x300(%ebp,%ecx,1),%edi + movb %dh,%cl + andl $0xff,%eax + andl $0xff,%edx + xorl 0x600(%ebp,%ebx,1),%edi + xorl 0x700(%ebp,%ecx,1),%edi + movl (%esp),%ecx + xorl 0x400(%ebp,%eax,1),%edi + xorl 0x500(%ebp,%edx,1),%edi + + movl 80(%ecx),%eax + xorl %ebx,%ebx + movl 84(%ecx),%edx + xorl %edi,%eax + xorl %ecx,%ecx + xorl %edi,%edx + andl $0xfcfcfcfc,%eax + andl $0xcfcfcfcf,%edx + movb %al,%bl + movb %ah,%cl + rorl $4,%edx + xorl (%ebp,%ebx,1),%esi + movb %dl,%bl + xorl 0x200(%ebp,%ecx,1),%esi + movb %dh,%cl + shrl $16,%eax + xorl 0x100(%ebp,%ebx,1),%esi + movb %ah,%bl + shrl $16,%edx + xorl 0x300(%ebp,%ecx,1),%esi + movb %dh,%cl + andl $0xff,%eax + andl $0xff,%edx + xorl 0x600(%ebp,%ebx,1),%esi + xorl 0x700(%ebp,%ecx,1),%esi + movl (%esp),%ecx + xorl 0x400(%ebp,%eax,1),%esi + xorl 0x500(%ebp,%edx,1),%esi + + movl 72(%ecx),%eax + xorl %ebx,%ebx + movl 76(%ecx),%edx + xorl %esi,%eax + xorl %ecx,%ecx + xorl %esi,%edx + andl $0xfcfcfcfc,%eax + andl $0xcfcfcfcf,%edx + movb %al,%bl + movb %ah,%cl + rorl $4,%edx + xorl (%ebp,%ebx,1),%edi + movb %dl,%bl + xorl 0x200(%ebp,%ecx,1),%edi + movb %dh,%cl + shrl $16,%eax + xorl 0x100(%ebp,%ebx,1),%edi + movb %ah,%bl + shrl $16,%edx + xorl 0x300(%ebp,%ecx,1),%edi + movb %dh,%cl + andl $0xff,%eax + andl $0xff,%edx + xorl 0x600(%ebp,%ebx,1),%edi + xorl 0x700(%ebp,%ecx,1),%edi + movl (%esp),%ecx + xorl 0x400(%ebp,%eax,1),%edi + xorl 0x500(%ebp,%edx,1),%edi + + movl 64(%ecx),%eax + xorl %ebx,%ebx + movl 68(%ecx),%edx + xorl %edi,%eax + xorl %ecx,%ecx + xorl %edi,%edx + andl $0xfcfcfcfc,%eax + andl $0xcfcfcfcf,%edx + movb %al,%bl + movb %ah,%cl + rorl $4,%edx + xorl (%ebp,%ebx,1),%esi + movb %dl,%bl + xorl 0x200(%ebp,%ecx,1),%esi + movb %dh,%cl + shrl $16,%eax + xorl 0x100(%ebp,%ebx,1),%esi + movb %ah,%bl + shrl $16,%edx + xorl 0x300(%ebp,%ecx,1),%esi + movb %dh,%cl + andl $0xff,%eax + andl $0xff,%edx + xorl 0x600(%ebp,%ebx,1),%esi + xorl 0x700(%ebp,%ecx,1),%esi + movl (%esp),%ecx + xorl 0x400(%ebp,%eax,1),%esi + xorl 0x500(%ebp,%edx,1),%esi + + movl 56(%ecx),%eax + xorl %ebx,%ebx + movl 60(%ecx),%edx + xorl %esi,%eax + xorl %ecx,%ecx + xorl %esi,%edx + andl $0xfcfcfcfc,%eax + andl $0xcfcfcfcf,%edx + movb %al,%bl + movb %ah,%cl + rorl $4,%edx + xorl (%ebp,%ebx,1),%edi + movb %dl,%bl + xorl 0x200(%ebp,%ecx,1),%edi + movb %dh,%cl + shrl $16,%eax + xorl 0x100(%ebp,%ebx,1),%edi + movb %ah,%bl + shrl $16,%edx + xorl 0x300(%ebp,%ecx,1),%edi + movb %dh,%cl + andl $0xff,%eax + andl $0xff,%edx + xorl 0x600(%ebp,%ebx,1),%edi + xorl 0x700(%ebp,%ecx,1),%edi + movl (%esp),%ecx + xorl 0x400(%ebp,%eax,1),%edi + xorl 0x500(%ebp,%edx,1),%edi + + movl 48(%ecx),%eax + xorl %ebx,%ebx + movl 52(%ecx),%edx + xorl %edi,%eax + xorl %ecx,%ecx + xorl %edi,%edx + andl $0xfcfcfcfc,%eax + andl $0xcfcfcfcf,%edx + movb %al,%bl + movb %ah,%cl + rorl $4,%edx + xorl (%ebp,%ebx,1),%esi + movb %dl,%bl + xorl 0x200(%ebp,%ecx,1),%esi + movb %dh,%cl + shrl $16,%eax + xorl 0x100(%ebp,%ebx,1),%esi + movb %ah,%bl + shrl $16,%edx + xorl 0x300(%ebp,%ecx,1),%esi + movb %dh,%cl + andl $0xff,%eax + andl $0xff,%edx + xorl 0x600(%ebp,%ebx,1),%esi + xorl 0x700(%ebp,%ecx,1),%esi + movl (%esp),%ecx + xorl 0x400(%ebp,%eax,1),%esi + xorl 0x500(%ebp,%edx,1),%esi + + movl 40(%ecx),%eax + xorl %ebx,%ebx + movl 44(%ecx),%edx + xorl %esi,%eax + xorl %ecx,%ecx + xorl %esi,%edx + andl $0xfcfcfcfc,%eax + andl $0xcfcfcfcf,%edx + movb %al,%bl + movb %ah,%cl + rorl $4,%edx + xorl (%ebp,%ebx,1),%edi + movb %dl,%bl + xorl 0x200(%ebp,%ecx,1),%edi + movb %dh,%cl + shrl $16,%eax + xorl 0x100(%ebp,%ebx,1),%edi + movb %ah,%bl + shrl $16,%edx + xorl 0x300(%ebp,%ecx,1),%edi + movb %dh,%cl + andl $0xff,%eax + andl $0xff,%edx + xorl 0x600(%ebp,%ebx,1),%edi + xorl 0x700(%ebp,%ecx,1),%edi + movl (%esp),%ecx + xorl 0x400(%ebp,%eax,1),%edi + xorl 0x500(%ebp,%edx,1),%edi + + movl 32(%ecx),%eax + xorl %ebx,%ebx + movl 36(%ecx),%edx + xorl %edi,%eax + xorl %ecx,%ecx + xorl %edi,%edx + andl $0xfcfcfcfc,%eax + andl $0xcfcfcfcf,%edx + movb %al,%bl + movb %ah,%cl + rorl $4,%edx + xorl (%ebp,%ebx,1),%esi + movb %dl,%bl + xorl 0x200(%ebp,%ecx,1),%esi + movb %dh,%cl + shrl $16,%eax + xorl 0x100(%ebp,%ebx,1),%esi + movb %ah,%bl + shrl $16,%edx + xorl 0x300(%ebp,%ecx,1),%esi + movb %dh,%cl + andl $0xff,%eax + andl $0xff,%edx + xorl 0x600(%ebp,%ebx,1),%esi + xorl 0x700(%ebp,%ecx,1),%esi + movl (%esp),%ecx + xorl 0x400(%ebp,%eax,1),%esi + xorl 0x500(%ebp,%edx,1),%esi + + movl 24(%ecx),%eax + xorl %ebx,%ebx + movl 28(%ecx),%edx + xorl %esi,%eax + xorl %ecx,%ecx + xorl %esi,%edx + andl $0xfcfcfcfc,%eax + andl $0xcfcfcfcf,%edx + movb %al,%bl + movb %ah,%cl + rorl $4,%edx + xorl (%ebp,%ebx,1),%edi + movb %dl,%bl + xorl 0x200(%ebp,%ecx,1),%edi + movb %dh,%cl + shrl $16,%eax + xorl 0x100(%ebp,%ebx,1),%edi + movb %ah,%bl + shrl $16,%edx + xorl 0x300(%ebp,%ecx,1),%edi + movb %dh,%cl + andl $0xff,%eax + andl $0xff,%edx + xorl 0x600(%ebp,%ebx,1),%edi + xorl 0x700(%ebp,%ecx,1),%edi + movl (%esp),%ecx + xorl 0x400(%ebp,%eax,1),%edi + xorl 0x500(%ebp,%edx,1),%edi + + movl 16(%ecx),%eax + xorl %ebx,%ebx + movl 20(%ecx),%edx + xorl %edi,%eax + xorl %ecx,%ecx + xorl %edi,%edx + andl $0xfcfcfcfc,%eax + andl $0xcfcfcfcf,%edx + movb %al,%bl + movb %ah,%cl + rorl $4,%edx + xorl (%ebp,%ebx,1),%esi + movb %dl,%bl + xorl 0x200(%ebp,%ecx,1),%esi + movb %dh,%cl + shrl $16,%eax + xorl 0x100(%ebp,%ebx,1),%esi + movb %ah,%bl + shrl $16,%edx + xorl 0x300(%ebp,%ecx,1),%esi + movb %dh,%cl + andl $0xff,%eax + andl $0xff,%edx + xorl 0x600(%ebp,%ebx,1),%esi + xorl 0x700(%ebp,%ecx,1),%esi + movl (%esp),%ecx + xorl 0x400(%ebp,%eax,1),%esi + xorl 0x500(%ebp,%edx,1),%esi + + movl 8(%ecx),%eax + xorl %ebx,%ebx + movl 12(%ecx),%edx + xorl %esi,%eax + xorl %ecx,%ecx + xorl %esi,%edx + andl $0xfcfcfcfc,%eax + andl $0xcfcfcfcf,%edx + movb %al,%bl + movb %ah,%cl + rorl $4,%edx + xorl (%ebp,%ebx,1),%edi + movb %dl,%bl + xorl 0x200(%ebp,%ecx,1),%edi + movb %dh,%cl + shrl $16,%eax + xorl 0x100(%ebp,%ebx,1),%edi + movb %ah,%bl + shrl $16,%edx + xorl 0x300(%ebp,%ecx,1),%edi + movb %dh,%cl + andl $0xff,%eax + andl $0xff,%edx + xorl 0x600(%ebp,%ebx,1),%edi + xorl 0x700(%ebp,%ecx,1),%edi + movl (%esp),%ecx + xorl 0x400(%ebp,%eax,1),%edi + xorl 0x500(%ebp,%edx,1),%edi + + movl (%ecx),%eax + xorl %ebx,%ebx + movl 4(%ecx),%edx + xorl %edi,%eax + xorl %ecx,%ecx + xorl %edi,%edx + andl $0xfcfcfcfc,%eax + andl $0xcfcfcfcf,%edx + movb %al,%bl + movb %ah,%cl + rorl $4,%edx + xorl (%ebp,%ebx,1),%esi + movb %dl,%bl + xorl 0x200(%ebp,%ecx,1),%esi + movb %dh,%cl + shrl $16,%eax + xorl 0x100(%ebp,%ebx,1),%esi + movb %ah,%bl + shrl $16,%edx + xorl 0x300(%ebp,%ecx,1),%esi + movb %dh,%cl + andl $0xff,%eax + andl $0xff,%edx + xorl 0x600(%ebp,%ebx,1),%esi + xorl 0x700(%ebp,%ecx,1),%esi + movl (%esp),%ecx + xorl 0x400(%ebp,%eax,1),%esi + xorl 0x500(%ebp,%edx,1),%esi + addl $4,%esp + ret +.size _x86_DES_decrypt,.-_x86_DES_decrypt +.globl DES_encrypt1 +.type DES_encrypt1,@function +.align 16 +DES_encrypt1: +.L_DES_encrypt1_begin: + pushl %esi + pushl %edi + + + movl 12(%esp),%esi + xorl %ecx,%ecx + pushl %ebx + pushl %ebp + movl (%esi),%eax + movl 28(%esp),%ebx + movl 4(%esi),%edi + + + roll $4,%eax + movl %eax,%esi + xorl %edi,%eax + andl $0xf0f0f0f0,%eax + xorl %eax,%esi + xorl %eax,%edi + + roll $20,%edi + movl %edi,%eax + xorl %esi,%edi + andl $0xfff0000f,%edi + xorl %edi,%eax + xorl %edi,%esi + + roll $14,%eax + movl %eax,%edi + xorl %esi,%eax + andl $0x33333333,%eax + xorl %eax,%edi + xorl %eax,%esi + + roll $22,%esi + movl %esi,%eax + xorl %edi,%esi + andl $0x03fc03fc,%esi + xorl %esi,%eax + xorl %esi,%edi + + roll $9,%eax + movl %eax,%esi + xorl %edi,%eax + andl $0xaaaaaaaa,%eax + xorl %eax,%esi + xorl %eax,%edi + + roll $1,%edi + call .L000pic_point +.L000pic_point: + popl %ebp + leal .Ldes_sptrans-.L000pic_point(%ebp),%ebp + movl 24(%esp),%ecx + cmpl $0,%ebx + je .L001decrypt + call _x86_DES_encrypt + jmp .L002done +.L001decrypt: + call _x86_DES_decrypt +.L002done: + + + movl 20(%esp),%edx + rorl $1,%esi + movl %edi,%eax + xorl %esi,%edi + andl $0xaaaaaaaa,%edi + xorl %edi,%eax + xorl %edi,%esi + + roll $23,%eax + movl %eax,%edi + xorl %esi,%eax + andl $0x03fc03fc,%eax + xorl %eax,%edi + xorl %eax,%esi + + roll $10,%edi + movl %edi,%eax + xorl %esi,%edi + andl $0x33333333,%edi + xorl %edi,%eax + xorl %edi,%esi + + roll $18,%esi + movl %esi,%edi + xorl %eax,%esi + andl $0xfff0000f,%esi + xorl %esi,%edi + xorl %esi,%eax + + roll $12,%edi + movl %edi,%esi + xorl %eax,%edi + andl $0xf0f0f0f0,%edi + xorl %edi,%esi + xorl %edi,%eax + + rorl $4,%eax + movl %eax,(%edx) + movl %esi,4(%edx) + popl %ebp + popl %ebx + popl %edi + popl %esi + ret +.size DES_encrypt1,.-.L_DES_encrypt1_begin +.globl DES_encrypt2 +.type DES_encrypt2,@function +.align 16 +DES_encrypt2: +.L_DES_encrypt2_begin: + pushl %esi + pushl %edi + + + movl 12(%esp),%eax + xorl %ecx,%ecx + pushl %ebx + pushl %ebp + movl (%eax),%esi + movl 28(%esp),%ebx + roll $3,%esi + movl 4(%eax),%edi + roll $3,%edi + call .L003pic_point +.L003pic_point: + popl %ebp + leal .Ldes_sptrans-.L003pic_point(%ebp),%ebp + movl 24(%esp),%ecx + cmpl $0,%ebx + je .L004decrypt + call _x86_DES_encrypt + jmp .L005done +.L004decrypt: + call _x86_DES_decrypt +.L005done: + + + rorl $3,%edi + movl 20(%esp),%eax + rorl $3,%esi + movl %edi,(%eax) + movl %esi,4(%eax) + popl %ebp + popl %ebx + popl %edi + popl %esi + ret +.size DES_encrypt2,.-.L_DES_encrypt2_begin +.globl DES_encrypt3 +.type DES_encrypt3,@function +.align 16 +DES_encrypt3: +.L_DES_encrypt3_begin: + pushl %ebx + movl 8(%esp),%ebx + pushl %ebp + pushl %esi + pushl %edi + + + movl (%ebx),%edi + movl 4(%ebx),%esi + subl $12,%esp + + + roll $4,%edi + movl %edi,%edx + xorl %esi,%edi + andl $0xf0f0f0f0,%edi + xorl %edi,%edx + xorl %edi,%esi + + roll $20,%esi + movl %esi,%edi + xorl %edx,%esi + andl $0xfff0000f,%esi + xorl %esi,%edi + xorl %esi,%edx + + roll $14,%edi + movl %edi,%esi + xorl %edx,%edi + andl $0x33333333,%edi + xorl %edi,%esi + xorl %edi,%edx + + roll $22,%edx + movl %edx,%edi + xorl %esi,%edx + andl $0x03fc03fc,%edx + xorl %edx,%edi + xorl %edx,%esi + + roll $9,%edi + movl %edi,%edx + xorl %esi,%edi + andl $0xaaaaaaaa,%edi + xorl %edi,%edx + xorl %edi,%esi + + rorl $3,%edx + rorl $2,%esi + movl %esi,4(%ebx) + movl 36(%esp),%eax + movl %edx,(%ebx) + movl 40(%esp),%edi + movl 44(%esp),%esi + movl $1,8(%esp) + movl %eax,4(%esp) + movl %ebx,(%esp) + call .L_DES_encrypt2_begin + movl $0,8(%esp) + movl %edi,4(%esp) + movl %ebx,(%esp) + call .L_DES_encrypt2_begin + movl $1,8(%esp) + movl %esi,4(%esp) + movl %ebx,(%esp) + call .L_DES_encrypt2_begin + addl $12,%esp + movl (%ebx),%edi + movl 4(%ebx),%esi + + + roll $2,%esi + roll $3,%edi + movl %edi,%eax + xorl %esi,%edi + andl $0xaaaaaaaa,%edi + xorl %edi,%eax + xorl %edi,%esi + + roll $23,%eax + movl %eax,%edi + xorl %esi,%eax + andl $0x03fc03fc,%eax + xorl %eax,%edi + xorl %eax,%esi + + roll $10,%edi + movl %edi,%eax + xorl %esi,%edi + andl $0x33333333,%edi + xorl %edi,%eax + xorl %edi,%esi + + roll $18,%esi + movl %esi,%edi + xorl %eax,%esi + andl $0xfff0000f,%esi + xorl %esi,%edi + xorl %esi,%eax + + roll $12,%edi + movl %edi,%esi + xorl %eax,%edi + andl $0xf0f0f0f0,%edi + xorl %edi,%esi + xorl %edi,%eax + + rorl $4,%eax + movl %eax,(%ebx) + movl %esi,4(%ebx) + popl %edi + popl %esi + popl %ebp + popl %ebx + ret +.size DES_encrypt3,.-.L_DES_encrypt3_begin +.globl DES_decrypt3 +.type DES_decrypt3,@function +.align 16 +DES_decrypt3: +.L_DES_decrypt3_begin: + pushl %ebx + movl 8(%esp),%ebx + pushl %ebp + pushl %esi + pushl %edi + + + movl (%ebx),%edi + movl 4(%ebx),%esi + subl $12,%esp + + + roll $4,%edi + movl %edi,%edx + xorl %esi,%edi + andl $0xf0f0f0f0,%edi + xorl %edi,%edx + xorl %edi,%esi + + roll $20,%esi + movl %esi,%edi + xorl %edx,%esi + andl $0xfff0000f,%esi + xorl %esi,%edi + xorl %esi,%edx + + roll $14,%edi + movl %edi,%esi + xorl %edx,%edi + andl $0x33333333,%edi + xorl %edi,%esi + xorl %edi,%edx + + roll $22,%edx + movl %edx,%edi + xorl %esi,%edx + andl $0x03fc03fc,%edx + xorl %edx,%edi + xorl %edx,%esi + + roll $9,%edi + movl %edi,%edx + xorl %esi,%edi + andl $0xaaaaaaaa,%edi + xorl %edi,%edx + xorl %edi,%esi + + rorl $3,%edx + rorl $2,%esi + movl %esi,4(%ebx) + movl 36(%esp),%esi + movl %edx,(%ebx) + movl 40(%esp),%edi + movl 44(%esp),%eax + movl $0,8(%esp) + movl %eax,4(%esp) + movl %ebx,(%esp) + call .L_DES_encrypt2_begin + movl $1,8(%esp) + movl %edi,4(%esp) + movl %ebx,(%esp) + call .L_DES_encrypt2_begin + movl $0,8(%esp) + movl %esi,4(%esp) + movl %ebx,(%esp) + call .L_DES_encrypt2_begin + addl $12,%esp + movl (%ebx),%edi + movl 4(%ebx),%esi + + + roll $2,%esi + roll $3,%edi + movl %edi,%eax + xorl %esi,%edi + andl $0xaaaaaaaa,%edi + xorl %edi,%eax + xorl %edi,%esi + + roll $23,%eax + movl %eax,%edi + xorl %esi,%eax + andl $0x03fc03fc,%eax + xorl %eax,%edi + xorl %eax,%esi + + roll $10,%edi + movl %edi,%eax + xorl %esi,%edi + andl $0x33333333,%edi + xorl %edi,%eax + xorl %edi,%esi + + roll $18,%esi + movl %esi,%edi + xorl %eax,%esi + andl $0xfff0000f,%esi + xorl %esi,%edi + xorl %esi,%eax + + roll $12,%edi + movl %edi,%esi + xorl %eax,%edi + andl $0xf0f0f0f0,%edi + xorl %edi,%esi + xorl %edi,%eax + + rorl $4,%eax + movl %eax,(%ebx) + movl %esi,4(%ebx) + popl %edi + popl %esi + popl %ebp + popl %ebx + ret +.size DES_decrypt3,.-.L_DES_decrypt3_begin +.globl DES_ncbc_encrypt +.type DES_ncbc_encrypt,@function +.align 16 +DES_ncbc_encrypt: +.L_DES_ncbc_encrypt_begin: + + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 28(%esp),%ebp + + movl 36(%esp),%ebx + movl (%ebx),%esi + movl 4(%ebx),%edi + pushl %edi + pushl %esi + pushl %edi + pushl %esi + movl %esp,%ebx + movl 36(%esp),%esi + movl 40(%esp),%edi + + movl 56(%esp),%ecx + + pushl %ecx + + movl 52(%esp),%eax + pushl %eax + pushl %ebx + cmpl $0,%ecx + jz .L006decrypt + andl $4294967288,%ebp + movl 12(%esp),%eax + movl 16(%esp),%ebx + jz .L007encrypt_finish +.L008encrypt_loop: + movl (%esi),%ecx + movl 4(%esi),%edx + xorl %ecx,%eax + xorl %edx,%ebx + movl %eax,12(%esp) + movl %ebx,16(%esp) + call .L_DES_encrypt1_begin + movl 12(%esp),%eax + movl 16(%esp),%ebx + movl %eax,(%edi) + movl %ebx,4(%edi) + addl $8,%esi + addl $8,%edi + subl $8,%ebp + jnz .L008encrypt_loop +.L007encrypt_finish: + movl 56(%esp),%ebp + andl $7,%ebp + jz .L009finish + call .L010PIC_point +.L010PIC_point: + popl %edx + leal .L011cbc_enc_jmp_table-.L010PIC_point(%edx),%ecx + movl (%ecx,%ebp,4),%ebp + addl %edx,%ebp + xorl %ecx,%ecx + xorl %edx,%edx + jmp *%ebp +.L012ej7: + movb 6(%esi),%dh + shll $8,%edx +.L013ej6: + movb 5(%esi),%dh +.L014ej5: + movb 4(%esi),%dl +.L015ej4: + movl (%esi),%ecx + jmp .L016ejend +.L017ej3: + movb 2(%esi),%ch + shll $8,%ecx +.L018ej2: + movb 1(%esi),%ch +.L019ej1: + movb (%esi),%cl +.L016ejend: + xorl %ecx,%eax + xorl %edx,%ebx + movl %eax,12(%esp) + movl %ebx,16(%esp) + call .L_DES_encrypt1_begin + movl 12(%esp),%eax + movl 16(%esp),%ebx + movl %eax,(%edi) + movl %ebx,4(%edi) + jmp .L009finish +.L006decrypt: + andl $4294967288,%ebp + movl 20(%esp),%eax + movl 24(%esp),%ebx + jz .L020decrypt_finish +.L021decrypt_loop: + movl (%esi),%eax + movl 4(%esi),%ebx + movl %eax,12(%esp) + movl %ebx,16(%esp) + call .L_DES_encrypt1_begin + movl 12(%esp),%eax + movl 16(%esp),%ebx + movl 20(%esp),%ecx + movl 24(%esp),%edx + xorl %eax,%ecx + xorl %ebx,%edx + movl (%esi),%eax + movl 4(%esi),%ebx + movl %ecx,(%edi) + movl %edx,4(%edi) + movl %eax,20(%esp) + movl %ebx,24(%esp) + addl $8,%esi + addl $8,%edi + subl $8,%ebp + jnz .L021decrypt_loop +.L020decrypt_finish: + movl 56(%esp),%ebp + andl $7,%ebp + jz .L009finish + movl (%esi),%eax + movl 4(%esi),%ebx + movl %eax,12(%esp) + movl %ebx,16(%esp) + call .L_DES_encrypt1_begin + movl 12(%esp),%eax + movl 16(%esp),%ebx + movl 20(%esp),%ecx + movl 24(%esp),%edx + xorl %eax,%ecx + xorl %ebx,%edx + movl (%esi),%eax + movl 4(%esi),%ebx +.L022dj7: + rorl $16,%edx + movb %dl,6(%edi) + shrl $16,%edx +.L023dj6: + movb %dh,5(%edi) +.L024dj5: + movb %dl,4(%edi) +.L025dj4: + movl %ecx,(%edi) + jmp .L026djend +.L027dj3: + rorl $16,%ecx + movb %cl,2(%edi) + shll $16,%ecx +.L028dj2: + movb %ch,1(%esi) +.L029dj1: + movb %cl,(%esi) +.L026djend: + jmp .L009finish +.L009finish: + movl 64(%esp),%ecx + addl $28,%esp + movl %eax,(%ecx) + movl %ebx,4(%ecx) + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.align 64 +.L011cbc_enc_jmp_table: +.long 0 +.long .L019ej1-.L010PIC_point +.long .L018ej2-.L010PIC_point +.long .L017ej3-.L010PIC_point +.long .L015ej4-.L010PIC_point +.long .L014ej5-.L010PIC_point +.long .L013ej6-.L010PIC_point +.long .L012ej7-.L010PIC_point +.align 64 +.size DES_ncbc_encrypt,.-.L_DES_ncbc_encrypt_begin +.globl DES_ede3_cbc_encrypt +.type DES_ede3_cbc_encrypt,@function +.align 16 +DES_ede3_cbc_encrypt: +.L_DES_ede3_cbc_encrypt_begin: + + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 28(%esp),%ebp + + movl 44(%esp),%ebx + movl (%ebx),%esi + movl 4(%ebx),%edi + pushl %edi + pushl %esi + pushl %edi + pushl %esi + movl %esp,%ebx + movl 36(%esp),%esi + movl 40(%esp),%edi + + movl 64(%esp),%ecx + + movl 56(%esp),%eax + pushl %eax + + movl 56(%esp),%eax + pushl %eax + + movl 56(%esp),%eax + pushl %eax + pushl %ebx + cmpl $0,%ecx + jz .L030decrypt + andl $4294967288,%ebp + movl 16(%esp),%eax + movl 20(%esp),%ebx + jz .L031encrypt_finish +.L032encrypt_loop: + movl (%esi),%ecx + movl 4(%esi),%edx + xorl %ecx,%eax + xorl %edx,%ebx + movl %eax,16(%esp) + movl %ebx,20(%esp) + call .L_DES_encrypt3_begin + movl 16(%esp),%eax + movl 20(%esp),%ebx + movl %eax,(%edi) + movl %ebx,4(%edi) + addl $8,%esi + addl $8,%edi + subl $8,%ebp + jnz .L032encrypt_loop +.L031encrypt_finish: + movl 60(%esp),%ebp + andl $7,%ebp + jz .L033finish + call .L034PIC_point +.L034PIC_point: + popl %edx + leal .L035cbc_enc_jmp_table-.L034PIC_point(%edx),%ecx + movl (%ecx,%ebp,4),%ebp + addl %edx,%ebp + xorl %ecx,%ecx + xorl %edx,%edx + jmp *%ebp +.L036ej7: + movb 6(%esi),%dh + shll $8,%edx +.L037ej6: + movb 5(%esi),%dh +.L038ej5: + movb 4(%esi),%dl +.L039ej4: + movl (%esi),%ecx + jmp .L040ejend +.L041ej3: + movb 2(%esi),%ch + shll $8,%ecx +.L042ej2: + movb 1(%esi),%ch +.L043ej1: + movb (%esi),%cl +.L040ejend: + xorl %ecx,%eax + xorl %edx,%ebx + movl %eax,16(%esp) + movl %ebx,20(%esp) + call .L_DES_encrypt3_begin + movl 16(%esp),%eax + movl 20(%esp),%ebx + movl %eax,(%edi) + movl %ebx,4(%edi) + jmp .L033finish +.L030decrypt: + andl $4294967288,%ebp + movl 24(%esp),%eax + movl 28(%esp),%ebx + jz .L044decrypt_finish +.L045decrypt_loop: + movl (%esi),%eax + movl 4(%esi),%ebx + movl %eax,16(%esp) + movl %ebx,20(%esp) + call .L_DES_decrypt3_begin + movl 16(%esp),%eax + movl 20(%esp),%ebx + movl 24(%esp),%ecx + movl 28(%esp),%edx + xorl %eax,%ecx + xorl %ebx,%edx + movl (%esi),%eax + movl 4(%esi),%ebx + movl %ecx,(%edi) + movl %edx,4(%edi) + movl %eax,24(%esp) + movl %ebx,28(%esp) + addl $8,%esi + addl $8,%edi + subl $8,%ebp + jnz .L045decrypt_loop +.L044decrypt_finish: + movl 60(%esp),%ebp + andl $7,%ebp + jz .L033finish + movl (%esi),%eax + movl 4(%esi),%ebx + movl %eax,16(%esp) + movl %ebx,20(%esp) + call .L_DES_decrypt3_begin + movl 16(%esp),%eax + movl 20(%esp),%ebx + movl 24(%esp),%ecx + movl 28(%esp),%edx + xorl %eax,%ecx + xorl %ebx,%edx + movl (%esi),%eax + movl 4(%esi),%ebx +.L046dj7: + rorl $16,%edx + movb %dl,6(%edi) + shrl $16,%edx +.L047dj6: + movb %dh,5(%edi) +.L048dj5: + movb %dl,4(%edi) +.L049dj4: + movl %ecx,(%edi) + jmp .L050djend +.L051dj3: + rorl $16,%ecx + movb %cl,2(%edi) + shll $16,%ecx +.L052dj2: + movb %ch,1(%esi) +.L053dj1: + movb %cl,(%esi) +.L050djend: + jmp .L033finish +.L033finish: + movl 76(%esp),%ecx + addl $32,%esp + movl %eax,(%ecx) + movl %ebx,4(%ecx) + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.align 64 +.L035cbc_enc_jmp_table: +.long 0 +.long .L043ej1-.L034PIC_point +.long .L042ej2-.L034PIC_point +.long .L041ej3-.L034PIC_point +.long .L039ej4-.L034PIC_point +.long .L038ej5-.L034PIC_point +.long .L037ej6-.L034PIC_point +.long .L036ej7-.L034PIC_point +.align 64 +.size DES_ede3_cbc_encrypt,.-.L_DES_ede3_cbc_encrypt_begin +.align 64 +DES_SPtrans: +.Ldes_sptrans: +.long 34080768,524288,33554434,34080770 +.long 33554432,526338,524290,33554434 +.long 526338,34080768,34078720,2050 +.long 33556482,33554432,0,524290 +.long 524288,2,33556480,526336 +.long 34080770,34078720,2050,33556480 +.long 2,2048,526336,34078722 +.long 2048,33556482,34078722,0 +.long 0,34080770,33556480,524290 +.long 34080768,524288,2050,33556480 +.long 34078722,2048,526336,33554434 +.long 526338,2,33554434,34078720 +.long 34080770,526336,34078720,33556482 +.long 33554432,2050,524290,0 +.long 524288,33554432,33556482,34080768 +.long 2,34078722,2048,526338 +.long 1074823184,0,1081344,1074790400 +.long 1073741840,32784,1073774592,1081344 +.long 32768,1074790416,16,1073774592 +.long 1048592,1074823168,1074790400,16 +.long 1048576,1073774608,1074790416,32768 +.long 1081360,1073741824,0,1048592 +.long 1073774608,1081360,1074823168,1073741840 +.long 1073741824,1048576,32784,1074823184 +.long 1048592,1074823168,1073774592,1081360 +.long 1074823184,1048592,1073741840,0 +.long 1073741824,32784,1048576,1074790416 +.long 32768,1073741824,1081360,1073774608 +.long 1074823168,32768,0,1073741840 +.long 16,1074823184,1081344,1074790400 +.long 1074790416,1048576,32784,1073774592 +.long 1073774608,16,1074790400,1081344 +.long 67108865,67371264,256,67109121 +.long 262145,67108864,67109121,262400 +.long 67109120,262144,67371008,1 +.long 67371265,257,1,67371009 +.long 0,262145,67371264,256 +.long 257,67371265,262144,67108865 +.long 67371009,67109120,262401,67371008 +.long 262400,0,67108864,262401 +.long 67371264,256,1,262144 +.long 257,262145,67371008,67109121 +.long 0,67371264,262400,67371009 +.long 262145,67108864,67371265,1 +.long 262401,67108865,67108864,67371265 +.long 262144,67109120,67109121,262400 +.long 67109120,0,67371009,257 +.long 67108865,262401,256,67371008 +.long 4198408,268439552,8,272633864 +.long 0,272629760,268439560,4194312 +.long 272633856,268435464,268435456,4104 +.long 268435464,4198408,4194304,268435456 +.long 272629768,4198400,4096,8 +.long 4198400,268439560,272629760,4096 +.long 4104,0,4194312,272633856 +.long 268439552,272629768,272633864,4194304 +.long 272629768,4104,4194304,268435464 +.long 4198400,268439552,8,272629760 +.long 268439560,0,4096,4194312 +.long 0,272629768,272633856,4096 +.long 268435456,272633864,4198408,4194304 +.long 272633864,8,268439552,4198408 +.long 4194312,4198400,272629760,268439560 +.long 4104,268435456,268435464,272633856 +.long 134217728,65536,1024,134284320 +.long 134283296,134218752,66592,134283264 +.long 65536,32,134217760,66560 +.long 134218784,134283296,134284288,0 +.long 66560,134217728,65568,1056 +.long 134218752,66592,0,134217760 +.long 32,134218784,134284320,65568 +.long 134283264,1024,1056,134284288 +.long 134284288,134218784,65568,134283264 +.long 65536,32,134217760,134218752 +.long 134217728,66560,134284320,0 +.long 66592,134217728,1024,65568 +.long 134218784,1024,0,134284320 +.long 134283296,134284288,1056,65536 +.long 66560,134283296,134218752,1056 +.long 32,66592,134283264,134217760 +.long 2147483712,2097216,0,2149588992 +.long 2097216,8192,2147491904,2097152 +.long 8256,2149589056,2105344,2147483648 +.long 2147491840,2147483712,2149580800,2105408 +.long 2097152,2147491904,2149580864,0 +.long 8192,64,2149588992,2149580864 +.long 2149589056,2149580800,2147483648,8256 +.long 64,2105344,2105408,2147491840 +.long 8256,2147483648,2147491840,2105408 +.long 2149588992,2097216,0,2147491840 +.long 2147483648,8192,2149580864,2097152 +.long 2097216,2149589056,2105344,64 +.long 2149589056,2105344,2097152,2147491904 +.long 2147483712,2149580800,2105408,0 +.long 8192,2147483712,2147491904,2149588992 +.long 2149580800,8256,64,2149580864 +.long 16384,512,16777728,16777220 +.long 16794116,16388,16896,0 +.long 16777216,16777732,516,16793600 +.long 4,16794112,16793600,516 +.long 16777732,16384,16388,16794116 +.long 0,16777728,16777220,16896 +.long 16793604,16900,16794112,4 +.long 16900,16793604,512,16777216 +.long 16900,16793600,16793604,516 +.long 16384,512,16777216,16793604 +.long 16777732,16900,16896,0 +.long 512,16777220,4,16777728 +.long 0,16777732,16777728,16896 +.long 516,16384,16794116,16777216 +.long 16794112,4,16388,16794116 +.long 16777220,16794112,16793600,16388 +.long 545259648,545390592,131200,0 +.long 537001984,8388736,545259520,545390720 +.long 128,536870912,8519680,131200 +.long 8519808,537002112,536871040,545259520 +.long 131072,8519808,8388736,537001984 +.long 545390720,536871040,0,8519680 +.long 536870912,8388608,537002112,545259648 +.long 8388608,131072,545390592,128 +.long 8388608,131072,536871040,545390720 +.long 131200,536870912,0,8519680 +.long 545259648,537002112,537001984,8388736 +.long 545390592,128,8388736,537001984 +.long 545390720,8388608,545259520,536871040 +.long 8519680,131200,537002112,545259520 +.long 128,545390592,8519808,0 +.long 536870912,545259648,131072,8519808 diff --git a/deps/openssl/asm_obsolete/x86-elf-gas/md5/md5-586.s b/deps/openssl/asm_obsolete/x86-elf-gas/md5/md5-586.s new file mode 100644 index 00000000000000..e354c4ebcdea8c --- /dev/null +++ b/deps/openssl/asm_obsolete/x86-elf-gas/md5/md5-586.s @@ -0,0 +1,679 @@ +.file "../openssl/crypto/md5/asm/md5-586.s" +.text +.globl md5_block_asm_data_order +.type md5_block_asm_data_order,@function +.align 16 +md5_block_asm_data_order: +.L_md5_block_asm_data_order_begin: + pushl %esi + pushl %edi + movl 12(%esp),%edi + movl 16(%esp),%esi + movl 20(%esp),%ecx + pushl %ebp + shll $6,%ecx + pushl %ebx + addl %esi,%ecx + subl $64,%ecx + movl (%edi),%eax + pushl %ecx + movl 4(%edi),%ebx + movl 8(%edi),%ecx + movl 12(%edi),%edx +.L000start: + + + movl %ecx,%edi + movl (%esi),%ebp + + xorl %edx,%edi + andl %ebx,%edi + leal 3614090360(%eax,%ebp,1),%eax + xorl %edx,%edi + addl %edi,%eax + movl %ebx,%edi + roll $7,%eax + movl 4(%esi),%ebp + addl %ebx,%eax + + xorl %ecx,%edi + andl %eax,%edi + leal 3905402710(%edx,%ebp,1),%edx + xorl %ecx,%edi + addl %edi,%edx + movl %eax,%edi + roll $12,%edx + movl 8(%esi),%ebp + addl %eax,%edx + + xorl %ebx,%edi + andl %edx,%edi + leal 606105819(%ecx,%ebp,1),%ecx + xorl %ebx,%edi + addl %edi,%ecx + movl %edx,%edi + roll $17,%ecx + movl 12(%esi),%ebp + addl %edx,%ecx + + xorl %eax,%edi + andl %ecx,%edi + leal 3250441966(%ebx,%ebp,1),%ebx + xorl %eax,%edi + addl %edi,%ebx + movl %ecx,%edi + roll $22,%ebx + movl 16(%esi),%ebp + addl %ecx,%ebx + + xorl %edx,%edi + andl %ebx,%edi + leal 4118548399(%eax,%ebp,1),%eax + xorl %edx,%edi + addl %edi,%eax + movl %ebx,%edi + roll $7,%eax + movl 20(%esi),%ebp + addl %ebx,%eax + + xorl %ecx,%edi + andl %eax,%edi + leal 1200080426(%edx,%ebp,1),%edx + xorl %ecx,%edi + addl %edi,%edx + movl %eax,%edi + roll $12,%edx + movl 24(%esi),%ebp + addl %eax,%edx + + xorl %ebx,%edi + andl %edx,%edi + leal 2821735955(%ecx,%ebp,1),%ecx + xorl %ebx,%edi + addl %edi,%ecx + movl %edx,%edi + roll $17,%ecx + movl 28(%esi),%ebp + addl %edx,%ecx + + xorl %eax,%edi + andl %ecx,%edi + leal 4249261313(%ebx,%ebp,1),%ebx + xorl %eax,%edi + addl %edi,%ebx + movl %ecx,%edi + roll $22,%ebx + movl 32(%esi),%ebp + addl %ecx,%ebx + + xorl %edx,%edi + andl %ebx,%edi + leal 1770035416(%eax,%ebp,1),%eax + xorl %edx,%edi + addl %edi,%eax + movl %ebx,%edi + roll $7,%eax + movl 36(%esi),%ebp + addl %ebx,%eax + + xorl %ecx,%edi + andl %eax,%edi + leal 2336552879(%edx,%ebp,1),%edx + xorl %ecx,%edi + addl %edi,%edx + movl %eax,%edi + roll $12,%edx + movl 40(%esi),%ebp + addl %eax,%edx + + xorl %ebx,%edi + andl %edx,%edi + leal 4294925233(%ecx,%ebp,1),%ecx + xorl %ebx,%edi + addl %edi,%ecx + movl %edx,%edi + roll $17,%ecx + movl 44(%esi),%ebp + addl %edx,%ecx + + xorl %eax,%edi + andl %ecx,%edi + leal 2304563134(%ebx,%ebp,1),%ebx + xorl %eax,%edi + addl %edi,%ebx + movl %ecx,%edi + roll $22,%ebx + movl 48(%esi),%ebp + addl %ecx,%ebx + + xorl %edx,%edi + andl %ebx,%edi + leal 1804603682(%eax,%ebp,1),%eax + xorl %edx,%edi + addl %edi,%eax + movl %ebx,%edi + roll $7,%eax + movl 52(%esi),%ebp + addl %ebx,%eax + + xorl %ecx,%edi + andl %eax,%edi + leal 4254626195(%edx,%ebp,1),%edx + xorl %ecx,%edi + addl %edi,%edx + movl %eax,%edi + roll $12,%edx + movl 56(%esi),%ebp + addl %eax,%edx + + xorl %ebx,%edi + andl %edx,%edi + leal 2792965006(%ecx,%ebp,1),%ecx + xorl %ebx,%edi + addl %edi,%ecx + movl %edx,%edi + roll $17,%ecx + movl 60(%esi),%ebp + addl %edx,%ecx + + xorl %eax,%edi + andl %ecx,%edi + leal 1236535329(%ebx,%ebp,1),%ebx + xorl %eax,%edi + addl %edi,%ebx + movl %ecx,%edi + roll $22,%ebx + movl 4(%esi),%ebp + addl %ecx,%ebx + + + + leal 4129170786(%eax,%ebp,1),%eax + xorl %ebx,%edi + andl %edx,%edi + movl 24(%esi),%ebp + xorl %ecx,%edi + addl %edi,%eax + movl %ebx,%edi + roll $5,%eax + addl %ebx,%eax + + leal 3225465664(%edx,%ebp,1),%edx + xorl %eax,%edi + andl %ecx,%edi + movl 44(%esi),%ebp + xorl %ebx,%edi + addl %edi,%edx + movl %eax,%edi + roll $9,%edx + addl %eax,%edx + + leal 643717713(%ecx,%ebp,1),%ecx + xorl %edx,%edi + andl %ebx,%edi + movl (%esi),%ebp + xorl %eax,%edi + addl %edi,%ecx + movl %edx,%edi + roll $14,%ecx + addl %edx,%ecx + + leal 3921069994(%ebx,%ebp,1),%ebx + xorl %ecx,%edi + andl %eax,%edi + movl 20(%esi),%ebp + xorl %edx,%edi + addl %edi,%ebx + movl %ecx,%edi + roll $20,%ebx + addl %ecx,%ebx + + leal 3593408605(%eax,%ebp,1),%eax + xorl %ebx,%edi + andl %edx,%edi + movl 40(%esi),%ebp + xorl %ecx,%edi + addl %edi,%eax + movl %ebx,%edi + roll $5,%eax + addl %ebx,%eax + + leal 38016083(%edx,%ebp,1),%edx + xorl %eax,%edi + andl %ecx,%edi + movl 60(%esi),%ebp + xorl %ebx,%edi + addl %edi,%edx + movl %eax,%edi + roll $9,%edx + addl %eax,%edx + + leal 3634488961(%ecx,%ebp,1),%ecx + xorl %edx,%edi + andl %ebx,%edi + movl 16(%esi),%ebp + xorl %eax,%edi + addl %edi,%ecx + movl %edx,%edi + roll $14,%ecx + addl %edx,%ecx + + leal 3889429448(%ebx,%ebp,1),%ebx + xorl %ecx,%edi + andl %eax,%edi + movl 36(%esi),%ebp + xorl %edx,%edi + addl %edi,%ebx + movl %ecx,%edi + roll $20,%ebx + addl %ecx,%ebx + + leal 568446438(%eax,%ebp,1),%eax + xorl %ebx,%edi + andl %edx,%edi + movl 56(%esi),%ebp + xorl %ecx,%edi + addl %edi,%eax + movl %ebx,%edi + roll $5,%eax + addl %ebx,%eax + + leal 3275163606(%edx,%ebp,1),%edx + xorl %eax,%edi + andl %ecx,%edi + movl 12(%esi),%ebp + xorl %ebx,%edi + addl %edi,%edx + movl %eax,%edi + roll $9,%edx + addl %eax,%edx + + leal 4107603335(%ecx,%ebp,1),%ecx + xorl %edx,%edi + andl %ebx,%edi + movl 32(%esi),%ebp + xorl %eax,%edi + addl %edi,%ecx + movl %edx,%edi + roll $14,%ecx + addl %edx,%ecx + + leal 1163531501(%ebx,%ebp,1),%ebx + xorl %ecx,%edi + andl %eax,%edi + movl 52(%esi),%ebp + xorl %edx,%edi + addl %edi,%ebx + movl %ecx,%edi + roll $20,%ebx + addl %ecx,%ebx + + leal 2850285829(%eax,%ebp,1),%eax + xorl %ebx,%edi + andl %edx,%edi + movl 8(%esi),%ebp + xorl %ecx,%edi + addl %edi,%eax + movl %ebx,%edi + roll $5,%eax + addl %ebx,%eax + + leal 4243563512(%edx,%ebp,1),%edx + xorl %eax,%edi + andl %ecx,%edi + movl 28(%esi),%ebp + xorl %ebx,%edi + addl %edi,%edx + movl %eax,%edi + roll $9,%edx + addl %eax,%edx + + leal 1735328473(%ecx,%ebp,1),%ecx + xorl %edx,%edi + andl %ebx,%edi + movl 48(%esi),%ebp + xorl %eax,%edi + addl %edi,%ecx + movl %edx,%edi + roll $14,%ecx + addl %edx,%ecx + + leal 2368359562(%ebx,%ebp,1),%ebx + xorl %ecx,%edi + andl %eax,%edi + movl 20(%esi),%ebp + xorl %edx,%edi + addl %edi,%ebx + movl %ecx,%edi + roll $20,%ebx + addl %ecx,%ebx + + + + xorl %edx,%edi + xorl %ebx,%edi + leal 4294588738(%eax,%ebp,1),%eax + addl %edi,%eax + roll $4,%eax + movl 32(%esi),%ebp + movl %ebx,%edi + + leal 2272392833(%edx,%ebp,1),%edx + addl %ebx,%eax + xorl %ecx,%edi + xorl %eax,%edi + movl 44(%esi),%ebp + addl %edi,%edx + movl %eax,%edi + roll $11,%edx + addl %eax,%edx + + xorl %ebx,%edi + xorl %edx,%edi + leal 1839030562(%ecx,%ebp,1),%ecx + addl %edi,%ecx + roll $16,%ecx + movl 56(%esi),%ebp + movl %edx,%edi + + leal 4259657740(%ebx,%ebp,1),%ebx + addl %edx,%ecx + xorl %eax,%edi + xorl %ecx,%edi + movl 4(%esi),%ebp + addl %edi,%ebx + movl %ecx,%edi + roll $23,%ebx + addl %ecx,%ebx + + xorl %edx,%edi + xorl %ebx,%edi + leal 2763975236(%eax,%ebp,1),%eax + addl %edi,%eax + roll $4,%eax + movl 16(%esi),%ebp + movl %ebx,%edi + + leal 1272893353(%edx,%ebp,1),%edx + addl %ebx,%eax + xorl %ecx,%edi + xorl %eax,%edi + movl 28(%esi),%ebp + addl %edi,%edx + movl %eax,%edi + roll $11,%edx + addl %eax,%edx + + xorl %ebx,%edi + xorl %edx,%edi + leal 4139469664(%ecx,%ebp,1),%ecx + addl %edi,%ecx + roll $16,%ecx + movl 40(%esi),%ebp + movl %edx,%edi + + leal 3200236656(%ebx,%ebp,1),%ebx + addl %edx,%ecx + xorl %eax,%edi + xorl %ecx,%edi + movl 52(%esi),%ebp + addl %edi,%ebx + movl %ecx,%edi + roll $23,%ebx + addl %ecx,%ebx + + xorl %edx,%edi + xorl %ebx,%edi + leal 681279174(%eax,%ebp,1),%eax + addl %edi,%eax + roll $4,%eax + movl (%esi),%ebp + movl %ebx,%edi + + leal 3936430074(%edx,%ebp,1),%edx + addl %ebx,%eax + xorl %ecx,%edi + xorl %eax,%edi + movl 12(%esi),%ebp + addl %edi,%edx + movl %eax,%edi + roll $11,%edx + addl %eax,%edx + + xorl %ebx,%edi + xorl %edx,%edi + leal 3572445317(%ecx,%ebp,1),%ecx + addl %edi,%ecx + roll $16,%ecx + movl 24(%esi),%ebp + movl %edx,%edi + + leal 76029189(%ebx,%ebp,1),%ebx + addl %edx,%ecx + xorl %eax,%edi + xorl %ecx,%edi + movl 36(%esi),%ebp + addl %edi,%ebx + movl %ecx,%edi + roll $23,%ebx + addl %ecx,%ebx + + xorl %edx,%edi + xorl %ebx,%edi + leal 3654602809(%eax,%ebp,1),%eax + addl %edi,%eax + roll $4,%eax + movl 48(%esi),%ebp + movl %ebx,%edi + + leal 3873151461(%edx,%ebp,1),%edx + addl %ebx,%eax + xorl %ecx,%edi + xorl %eax,%edi + movl 60(%esi),%ebp + addl %edi,%edx + movl %eax,%edi + roll $11,%edx + addl %eax,%edx + + xorl %ebx,%edi + xorl %edx,%edi + leal 530742520(%ecx,%ebp,1),%ecx + addl %edi,%ecx + roll $16,%ecx + movl 8(%esi),%ebp + movl %edx,%edi + + leal 3299628645(%ebx,%ebp,1),%ebx + addl %edx,%ecx + xorl %eax,%edi + xorl %ecx,%edi + movl (%esi),%ebp + addl %edi,%ebx + movl $-1,%edi + roll $23,%ebx + addl %ecx,%ebx + + + + xorl %edx,%edi + orl %ebx,%edi + leal 4096336452(%eax,%ebp,1),%eax + xorl %ecx,%edi + movl 28(%esi),%ebp + addl %edi,%eax + movl $-1,%edi + roll $6,%eax + xorl %ecx,%edi + addl %ebx,%eax + + orl %eax,%edi + leal 1126891415(%edx,%ebp,1),%edx + xorl %ebx,%edi + movl 56(%esi),%ebp + addl %edi,%edx + movl $-1,%edi + roll $10,%edx + xorl %ebx,%edi + addl %eax,%edx + + orl %edx,%edi + leal 2878612391(%ecx,%ebp,1),%ecx + xorl %eax,%edi + movl 20(%esi),%ebp + addl %edi,%ecx + movl $-1,%edi + roll $15,%ecx + xorl %eax,%edi + addl %edx,%ecx + + orl %ecx,%edi + leal 4237533241(%ebx,%ebp,1),%ebx + xorl %edx,%edi + movl 48(%esi),%ebp + addl %edi,%ebx + movl $-1,%edi + roll $21,%ebx + xorl %edx,%edi + addl %ecx,%ebx + + orl %ebx,%edi + leal 1700485571(%eax,%ebp,1),%eax + xorl %ecx,%edi + movl 12(%esi),%ebp + addl %edi,%eax + movl $-1,%edi + roll $6,%eax + xorl %ecx,%edi + addl %ebx,%eax + + orl %eax,%edi + leal 2399980690(%edx,%ebp,1),%edx + xorl %ebx,%edi + movl 40(%esi),%ebp + addl %edi,%edx + movl $-1,%edi + roll $10,%edx + xorl %ebx,%edi + addl %eax,%edx + + orl %edx,%edi + leal 4293915773(%ecx,%ebp,1),%ecx + xorl %eax,%edi + movl 4(%esi),%ebp + addl %edi,%ecx + movl $-1,%edi + roll $15,%ecx + xorl %eax,%edi + addl %edx,%ecx + + orl %ecx,%edi + leal 2240044497(%ebx,%ebp,1),%ebx + xorl %edx,%edi + movl 32(%esi),%ebp + addl %edi,%ebx + movl $-1,%edi + roll $21,%ebx + xorl %edx,%edi + addl %ecx,%ebx + + orl %ebx,%edi + leal 1873313359(%eax,%ebp,1),%eax + xorl %ecx,%edi + movl 60(%esi),%ebp + addl %edi,%eax + movl $-1,%edi + roll $6,%eax + xorl %ecx,%edi + addl %ebx,%eax + + orl %eax,%edi + leal 4264355552(%edx,%ebp,1),%edx + xorl %ebx,%edi + movl 24(%esi),%ebp + addl %edi,%edx + movl $-1,%edi + roll $10,%edx + xorl %ebx,%edi + addl %eax,%edx + + orl %edx,%edi + leal 2734768916(%ecx,%ebp,1),%ecx + xorl %eax,%edi + movl 52(%esi),%ebp + addl %edi,%ecx + movl $-1,%edi + roll $15,%ecx + xorl %eax,%edi + addl %edx,%ecx + + orl %ecx,%edi + leal 1309151649(%ebx,%ebp,1),%ebx + xorl %edx,%edi + movl 16(%esi),%ebp + addl %edi,%ebx + movl $-1,%edi + roll $21,%ebx + xorl %edx,%edi + addl %ecx,%ebx + + orl %ebx,%edi + leal 4149444226(%eax,%ebp,1),%eax + xorl %ecx,%edi + movl 44(%esi),%ebp + addl %edi,%eax + movl $-1,%edi + roll $6,%eax + xorl %ecx,%edi + addl %ebx,%eax + + orl %eax,%edi + leal 3174756917(%edx,%ebp,1),%edx + xorl %ebx,%edi + movl 8(%esi),%ebp + addl %edi,%edx + movl $-1,%edi + roll $10,%edx + xorl %ebx,%edi + addl %eax,%edx + + orl %edx,%edi + leal 718787259(%ecx,%ebp,1),%ecx + xorl %eax,%edi + movl 36(%esi),%ebp + addl %edi,%ecx + movl $-1,%edi + roll $15,%ecx + xorl %eax,%edi + addl %edx,%ecx + + orl %ecx,%edi + leal 3951481745(%ebx,%ebp,1),%ebx + xorl %edx,%edi + movl 24(%esp),%ebp + addl %edi,%ebx + addl $64,%esi + roll $21,%ebx + movl (%ebp),%edi + addl %ecx,%ebx + addl %edi,%eax + movl 4(%ebp),%edi + addl %edi,%ebx + movl 8(%ebp),%edi + addl %edi,%ecx + movl 12(%ebp),%edi + addl %edi,%edx + movl %eax,(%ebp) + movl %ebx,4(%ebp) + movl (%esp),%edi + movl %ecx,8(%ebp) + movl %edx,12(%ebp) + cmpl %esi,%edi + jae .L000start + popl %eax + popl %ebx + popl %ebp + popl %edi + popl %esi + ret +.size md5_block_asm_data_order,.-.L_md5_block_asm_data_order_begin diff --git a/deps/openssl/asm_obsolete/x86-elf-gas/modes/ghash-x86.s b/deps/openssl/asm_obsolete/x86-elf-gas/modes/ghash-x86.s new file mode 100644 index 00000000000000..70e9493d1b3737 --- /dev/null +++ b/deps/openssl/asm_obsolete/x86-elf-gas/modes/ghash-x86.s @@ -0,0 +1,1265 @@ +.file "ghash-x86.s" +.text +.globl gcm_gmult_4bit_x86 +.type gcm_gmult_4bit_x86,@function +.align 16 +gcm_gmult_4bit_x86: +.L_gcm_gmult_4bit_x86_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + subl $84,%esp + movl 104(%esp),%edi + movl 108(%esp),%esi + movl (%edi),%ebp + movl 4(%edi),%edx + movl 8(%edi),%ecx + movl 12(%edi),%ebx + movl $0,16(%esp) + movl $471859200,20(%esp) + movl $943718400,24(%esp) + movl $610271232,28(%esp) + movl $1887436800,32(%esp) + movl $1822425088,36(%esp) + movl $1220542464,40(%esp) + movl $1423966208,44(%esp) + movl $3774873600,48(%esp) + movl $4246732800,52(%esp) + movl $3644850176,56(%esp) + movl $3311403008,60(%esp) + movl $2441084928,64(%esp) + movl $2376073216,68(%esp) + movl $2847932416,72(%esp) + movl $3051356160,76(%esp) + movl %ebp,(%esp) + movl %edx,4(%esp) + movl %ecx,8(%esp) + movl %ebx,12(%esp) + shrl $20,%ebx + andl $240,%ebx + movl 4(%esi,%ebx,1),%ebp + movl (%esi,%ebx,1),%edx + movl 12(%esi,%ebx,1),%ecx + movl 8(%esi,%ebx,1),%ebx + xorl %eax,%eax + movl $15,%edi + jmp .L000x86_loop +.align 16 +.L000x86_loop: + movb %bl,%al + shrdl $4,%ecx,%ebx + andb $15,%al + shrdl $4,%edx,%ecx + shrdl $4,%ebp,%edx + shrl $4,%ebp + xorl 16(%esp,%eax,4),%ebp + movb (%esp,%edi,1),%al + andb $240,%al + xorl 8(%esi,%eax,1),%ebx + xorl 12(%esi,%eax,1),%ecx + xorl (%esi,%eax,1),%edx + xorl 4(%esi,%eax,1),%ebp + decl %edi + js .L001x86_break + movb %bl,%al + shrdl $4,%ecx,%ebx + andb $15,%al + shrdl $4,%edx,%ecx + shrdl $4,%ebp,%edx + shrl $4,%ebp + xorl 16(%esp,%eax,4),%ebp + movb (%esp,%edi,1),%al + shlb $4,%al + xorl 8(%esi,%eax,1),%ebx + xorl 12(%esi,%eax,1),%ecx + xorl (%esi,%eax,1),%edx + xorl 4(%esi,%eax,1),%ebp + jmp .L000x86_loop +.align 16 +.L001x86_break: + bswap %ebx + bswap %ecx + bswap %edx + bswap %ebp + movl 104(%esp),%edi + movl %ebx,12(%edi) + movl %ecx,8(%edi) + movl %edx,4(%edi) + movl %ebp,(%edi) + addl $84,%esp + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.size gcm_gmult_4bit_x86,.-.L_gcm_gmult_4bit_x86_begin +.globl gcm_ghash_4bit_x86 +.type gcm_ghash_4bit_x86,@function +.align 16 +gcm_ghash_4bit_x86: +.L_gcm_ghash_4bit_x86_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + subl $84,%esp + movl 104(%esp),%ebx + movl 108(%esp),%esi + movl 112(%esp),%edi + movl 116(%esp),%ecx + addl %edi,%ecx + movl %ecx,116(%esp) + movl (%ebx),%ebp + movl 4(%ebx),%edx + movl 8(%ebx),%ecx + movl 12(%ebx),%ebx + movl $0,16(%esp) + movl $471859200,20(%esp) + movl $943718400,24(%esp) + movl $610271232,28(%esp) + movl $1887436800,32(%esp) + movl $1822425088,36(%esp) + movl $1220542464,40(%esp) + movl $1423966208,44(%esp) + movl $3774873600,48(%esp) + movl $4246732800,52(%esp) + movl $3644850176,56(%esp) + movl $3311403008,60(%esp) + movl $2441084928,64(%esp) + movl $2376073216,68(%esp) + movl $2847932416,72(%esp) + movl $3051356160,76(%esp) +.align 16 +.L002x86_outer_loop: + xorl 12(%edi),%ebx + xorl 8(%edi),%ecx + xorl 4(%edi),%edx + xorl (%edi),%ebp + movl %ebx,12(%esp) + movl %ecx,8(%esp) + movl %edx,4(%esp) + movl %ebp,(%esp) + shrl $20,%ebx + andl $240,%ebx + movl 4(%esi,%ebx,1),%ebp + movl (%esi,%ebx,1),%edx + movl 12(%esi,%ebx,1),%ecx + movl 8(%esi,%ebx,1),%ebx + xorl %eax,%eax + movl $15,%edi + jmp .L003x86_loop +.align 16 +.L003x86_loop: + movb %bl,%al + shrdl $4,%ecx,%ebx + andb $15,%al + shrdl $4,%edx,%ecx + shrdl $4,%ebp,%edx + shrl $4,%ebp + xorl 16(%esp,%eax,4),%ebp + movb (%esp,%edi,1),%al + andb $240,%al + xorl 8(%esi,%eax,1),%ebx + xorl 12(%esi,%eax,1),%ecx + xorl (%esi,%eax,1),%edx + xorl 4(%esi,%eax,1),%ebp + decl %edi + js .L004x86_break + movb %bl,%al + shrdl $4,%ecx,%ebx + andb $15,%al + shrdl $4,%edx,%ecx + shrdl $4,%ebp,%edx + shrl $4,%ebp + xorl 16(%esp,%eax,4),%ebp + movb (%esp,%edi,1),%al + shlb $4,%al + xorl 8(%esi,%eax,1),%ebx + xorl 12(%esi,%eax,1),%ecx + xorl (%esi,%eax,1),%edx + xorl 4(%esi,%eax,1),%ebp + jmp .L003x86_loop +.align 16 +.L004x86_break: + bswap %ebx + bswap %ecx + bswap %edx + bswap %ebp + movl 112(%esp),%edi + leal 16(%edi),%edi + cmpl 116(%esp),%edi + movl %edi,112(%esp) + jb .L002x86_outer_loop + movl 104(%esp),%edi + movl %ebx,12(%edi) + movl %ecx,8(%edi) + movl %edx,4(%edi) + movl %ebp,(%edi) + addl $84,%esp + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.size gcm_ghash_4bit_x86,.-.L_gcm_ghash_4bit_x86_begin +.globl gcm_gmult_4bit_mmx +.type gcm_gmult_4bit_mmx,@function +.align 16 +gcm_gmult_4bit_mmx: +.L_gcm_gmult_4bit_mmx_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 20(%esp),%edi + movl 24(%esp),%esi + call .L005pic_point +.L005pic_point: + popl %eax + leal .Lrem_4bit-.L005pic_point(%eax),%eax + movzbl 15(%edi),%ebx + xorl %ecx,%ecx + movl %ebx,%edx + movb %dl,%cl + movl $14,%ebp + shlb $4,%cl + andl $240,%edx + movq 8(%esi,%ecx,1),%mm0 + movq (%esi,%ecx,1),%mm1 + movd %mm0,%ebx + jmp .L006mmx_loop +.align 16 +.L006mmx_loop: + psrlq $4,%mm0 + andl $15,%ebx + movq %mm1,%mm2 + psrlq $4,%mm1 + pxor 8(%esi,%edx,1),%mm0 + movb (%edi,%ebp,1),%cl + psllq $60,%mm2 + pxor (%eax,%ebx,8),%mm1 + decl %ebp + movd %mm0,%ebx + pxor (%esi,%edx,1),%mm1 + movl %ecx,%edx + pxor %mm2,%mm0 + js .L007mmx_break + shlb $4,%cl + andl $15,%ebx + psrlq $4,%mm0 + andl $240,%edx + movq %mm1,%mm2 + psrlq $4,%mm1 + pxor 8(%esi,%ecx,1),%mm0 + psllq $60,%mm2 + pxor (%eax,%ebx,8),%mm1 + movd %mm0,%ebx + pxor (%esi,%ecx,1),%mm1 + pxor %mm2,%mm0 + jmp .L006mmx_loop +.align 16 +.L007mmx_break: + shlb $4,%cl + andl $15,%ebx + psrlq $4,%mm0 + andl $240,%edx + movq %mm1,%mm2 + psrlq $4,%mm1 + pxor 8(%esi,%ecx,1),%mm0 + psllq $60,%mm2 + pxor (%eax,%ebx,8),%mm1 + movd %mm0,%ebx + pxor (%esi,%ecx,1),%mm1 + pxor %mm2,%mm0 + psrlq $4,%mm0 + andl $15,%ebx + movq %mm1,%mm2 + psrlq $4,%mm1 + pxor 8(%esi,%edx,1),%mm0 + psllq $60,%mm2 + pxor (%eax,%ebx,8),%mm1 + movd %mm0,%ebx + pxor (%esi,%edx,1),%mm1 + pxor %mm2,%mm0 + psrlq $32,%mm0 + movd %mm1,%edx + psrlq $32,%mm1 + movd %mm0,%ecx + movd %mm1,%ebp + bswap %ebx + bswap %edx + bswap %ecx + bswap %ebp + emms + movl %ebx,12(%edi) + movl %edx,4(%edi) + movl %ecx,8(%edi) + movl %ebp,(%edi) + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.size gcm_gmult_4bit_mmx,.-.L_gcm_gmult_4bit_mmx_begin +.globl gcm_ghash_4bit_mmx +.type gcm_ghash_4bit_mmx,@function +.align 16 +gcm_ghash_4bit_mmx: +.L_gcm_ghash_4bit_mmx_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 20(%esp),%eax + movl 24(%esp),%ebx + movl 28(%esp),%ecx + movl 32(%esp),%edx + movl %esp,%ebp + call .L008pic_point +.L008pic_point: + popl %esi + leal .Lrem_8bit-.L008pic_point(%esi),%esi + subl $544,%esp + andl $-64,%esp + subl $16,%esp + addl %ecx,%edx + movl %eax,544(%esp) + movl %edx,552(%esp) + movl %ebp,556(%esp) + addl $128,%ebx + leal 144(%esp),%edi + leal 400(%esp),%ebp + movl -120(%ebx),%edx + movq -120(%ebx),%mm0 + movq -128(%ebx),%mm3 + shll $4,%edx + movb %dl,(%esp) + movl -104(%ebx),%edx + movq -104(%ebx),%mm2 + movq -112(%ebx),%mm5 + movq %mm0,-128(%edi) + psrlq $4,%mm0 + movq %mm3,(%edi) + movq %mm3,%mm7 + psrlq $4,%mm3 + shll $4,%edx + movb %dl,1(%esp) + movl -88(%ebx),%edx + movq -88(%ebx),%mm1 + psllq $60,%mm7 + movq -96(%ebx),%mm4 + por %mm7,%mm0 + movq %mm2,-120(%edi) + psrlq $4,%mm2 + movq %mm5,8(%edi) + movq %mm5,%mm6 + movq %mm0,-128(%ebp) + psrlq $4,%mm5 + movq %mm3,(%ebp) + shll $4,%edx + movb %dl,2(%esp) + movl -72(%ebx),%edx + movq -72(%ebx),%mm0 + psllq $60,%mm6 + movq -80(%ebx),%mm3 + por %mm6,%mm2 + movq %mm1,-112(%edi) + psrlq $4,%mm1 + movq %mm4,16(%edi) + movq %mm4,%mm7 + movq %mm2,-120(%ebp) + psrlq $4,%mm4 + movq %mm5,8(%ebp) + shll $4,%edx + movb %dl,3(%esp) + movl -56(%ebx),%edx + movq -56(%ebx),%mm2 + psllq $60,%mm7 + movq -64(%ebx),%mm5 + por %mm7,%mm1 + movq %mm0,-104(%edi) + psrlq $4,%mm0 + movq %mm3,24(%edi) + movq %mm3,%mm6 + movq %mm1,-112(%ebp) + psrlq $4,%mm3 + movq %mm4,16(%ebp) + shll $4,%edx + movb %dl,4(%esp) + movl -40(%ebx),%edx + movq -40(%ebx),%mm1 + psllq $60,%mm6 + movq -48(%ebx),%mm4 + por %mm6,%mm0 + movq %mm2,-96(%edi) + psrlq $4,%mm2 + movq %mm5,32(%edi) + movq %mm5,%mm7 + movq %mm0,-104(%ebp) + psrlq $4,%mm5 + movq %mm3,24(%ebp) + shll $4,%edx + movb %dl,5(%esp) + movl -24(%ebx),%edx + movq -24(%ebx),%mm0 + psllq $60,%mm7 + movq -32(%ebx),%mm3 + por %mm7,%mm2 + movq %mm1,-88(%edi) + psrlq $4,%mm1 + movq %mm4,40(%edi) + movq %mm4,%mm6 + movq %mm2,-96(%ebp) + psrlq $4,%mm4 + movq %mm5,32(%ebp) + shll $4,%edx + movb %dl,6(%esp) + movl -8(%ebx),%edx + movq -8(%ebx),%mm2 + psllq $60,%mm6 + movq -16(%ebx),%mm5 + por %mm6,%mm1 + movq %mm0,-80(%edi) + psrlq $4,%mm0 + movq %mm3,48(%edi) + movq %mm3,%mm7 + movq %mm1,-88(%ebp) + psrlq $4,%mm3 + movq %mm4,40(%ebp) + shll $4,%edx + movb %dl,7(%esp) + movl 8(%ebx),%edx + movq 8(%ebx),%mm1 + psllq $60,%mm7 + movq (%ebx),%mm4 + por %mm7,%mm0 + movq %mm2,-72(%edi) + psrlq $4,%mm2 + movq %mm5,56(%edi) + movq %mm5,%mm6 + movq %mm0,-80(%ebp) + psrlq $4,%mm5 + movq %mm3,48(%ebp) + shll $4,%edx + movb %dl,8(%esp) + movl 24(%ebx),%edx + movq 24(%ebx),%mm0 + psllq $60,%mm6 + movq 16(%ebx),%mm3 + por %mm6,%mm2 + movq %mm1,-64(%edi) + psrlq $4,%mm1 + movq %mm4,64(%edi) + movq %mm4,%mm7 + movq %mm2,-72(%ebp) + psrlq $4,%mm4 + movq %mm5,56(%ebp) + shll $4,%edx + movb %dl,9(%esp) + movl 40(%ebx),%edx + movq 40(%ebx),%mm2 + psllq $60,%mm7 + movq 32(%ebx),%mm5 + por %mm7,%mm1 + movq %mm0,-56(%edi) + psrlq $4,%mm0 + movq %mm3,72(%edi) + movq %mm3,%mm6 + movq %mm1,-64(%ebp) + psrlq $4,%mm3 + movq %mm4,64(%ebp) + shll $4,%edx + movb %dl,10(%esp) + movl 56(%ebx),%edx + movq 56(%ebx),%mm1 + psllq $60,%mm6 + movq 48(%ebx),%mm4 + por %mm6,%mm0 + movq %mm2,-48(%edi) + psrlq $4,%mm2 + movq %mm5,80(%edi) + movq %mm5,%mm7 + movq %mm0,-56(%ebp) + psrlq $4,%mm5 + movq %mm3,72(%ebp) + shll $4,%edx + movb %dl,11(%esp) + movl 72(%ebx),%edx + movq 72(%ebx),%mm0 + psllq $60,%mm7 + movq 64(%ebx),%mm3 + por %mm7,%mm2 + movq %mm1,-40(%edi) + psrlq $4,%mm1 + movq %mm4,88(%edi) + movq %mm4,%mm6 + movq %mm2,-48(%ebp) + psrlq $4,%mm4 + movq %mm5,80(%ebp) + shll $4,%edx + movb %dl,12(%esp) + movl 88(%ebx),%edx + movq 88(%ebx),%mm2 + psllq $60,%mm6 + movq 80(%ebx),%mm5 + por %mm6,%mm1 + movq %mm0,-32(%edi) + psrlq $4,%mm0 + movq %mm3,96(%edi) + movq %mm3,%mm7 + movq %mm1,-40(%ebp) + psrlq $4,%mm3 + movq %mm4,88(%ebp) + shll $4,%edx + movb %dl,13(%esp) + movl 104(%ebx),%edx + movq 104(%ebx),%mm1 + psllq $60,%mm7 + movq 96(%ebx),%mm4 + por %mm7,%mm0 + movq %mm2,-24(%edi) + psrlq $4,%mm2 + movq %mm5,104(%edi) + movq %mm5,%mm6 + movq %mm0,-32(%ebp) + psrlq $4,%mm5 + movq %mm3,96(%ebp) + shll $4,%edx + movb %dl,14(%esp) + movl 120(%ebx),%edx + movq 120(%ebx),%mm0 + psllq $60,%mm6 + movq 112(%ebx),%mm3 + por %mm6,%mm2 + movq %mm1,-16(%edi) + psrlq $4,%mm1 + movq %mm4,112(%edi) + movq %mm4,%mm7 + movq %mm2,-24(%ebp) + psrlq $4,%mm4 + movq %mm5,104(%ebp) + shll $4,%edx + movb %dl,15(%esp) + psllq $60,%mm7 + por %mm7,%mm1 + movq %mm0,-8(%edi) + psrlq $4,%mm0 + movq %mm3,120(%edi) + movq %mm3,%mm6 + movq %mm1,-16(%ebp) + psrlq $4,%mm3 + movq %mm4,112(%ebp) + psllq $60,%mm6 + por %mm6,%mm0 + movq %mm0,-8(%ebp) + movq %mm3,120(%ebp) + movq (%eax),%mm6 + movl 8(%eax),%ebx + movl 12(%eax),%edx +.align 16 +.L009outer: + xorl 12(%ecx),%edx + xorl 8(%ecx),%ebx + pxor (%ecx),%mm6 + leal 16(%ecx),%ecx + movl %ebx,536(%esp) + movq %mm6,528(%esp) + movl %ecx,548(%esp) + xorl %eax,%eax + roll $8,%edx + movb %dl,%al + movl %eax,%ebp + andb $15,%al + shrl $4,%ebp + pxor %mm0,%mm0 + roll $8,%edx + pxor %mm1,%mm1 + pxor %mm2,%mm2 + movq 16(%esp,%eax,8),%mm7 + movq 144(%esp,%eax,8),%mm6 + movb %dl,%al + movd %mm7,%ebx + psrlq $8,%mm7 + movq %mm6,%mm3 + movl %eax,%edi + psrlq $8,%mm6 + pxor 272(%esp,%ebp,8),%mm7 + andb $15,%al + psllq $56,%mm3 + shrl $4,%edi + pxor 16(%esp,%eax,8),%mm7 + roll $8,%edx + pxor 144(%esp,%eax,8),%mm6 + pxor %mm3,%mm7 + pxor 400(%esp,%ebp,8),%mm6 + xorb (%esp,%ebp,1),%bl + movb %dl,%al + movd %mm7,%ecx + movzbl %bl,%ebx + psrlq $8,%mm7 + movq %mm6,%mm3 + movl %eax,%ebp + psrlq $8,%mm6 + pxor 272(%esp,%edi,8),%mm7 + andb $15,%al + psllq $56,%mm3 + shrl $4,%ebp + pinsrw $2,(%esi,%ebx,2),%mm2 + pxor 16(%esp,%eax,8),%mm7 + roll $8,%edx + pxor 144(%esp,%eax,8),%mm6 + pxor %mm3,%mm7 + pxor 400(%esp,%edi,8),%mm6 + xorb (%esp,%edi,1),%cl + movb %dl,%al + movl 536(%esp),%edx + movd %mm7,%ebx + movzbl %cl,%ecx + psrlq $8,%mm7 + movq %mm6,%mm3 + movl %eax,%edi + psrlq $8,%mm6 + pxor 272(%esp,%ebp,8),%mm7 + andb $15,%al + psllq $56,%mm3 + pxor %mm2,%mm6 + shrl $4,%edi + pinsrw $2,(%esi,%ecx,2),%mm1 + pxor 16(%esp,%eax,8),%mm7 + roll $8,%edx + pxor 144(%esp,%eax,8),%mm6 + pxor %mm3,%mm7 + pxor 400(%esp,%ebp,8),%mm6 + xorb (%esp,%ebp,1),%bl + movb %dl,%al + movd %mm7,%ecx + movzbl %bl,%ebx + psrlq $8,%mm7 + movq %mm6,%mm3 + movl %eax,%ebp + psrlq $8,%mm6 + pxor 272(%esp,%edi,8),%mm7 + andb $15,%al + psllq $56,%mm3 + pxor %mm1,%mm6 + shrl $4,%ebp + pinsrw $2,(%esi,%ebx,2),%mm0 + pxor 16(%esp,%eax,8),%mm7 + roll $8,%edx + pxor 144(%esp,%eax,8),%mm6 + pxor %mm3,%mm7 + pxor 400(%esp,%edi,8),%mm6 + xorb (%esp,%edi,1),%cl + movb %dl,%al + movd %mm7,%ebx + movzbl %cl,%ecx + psrlq $8,%mm7 + movq %mm6,%mm3 + movl %eax,%edi + psrlq $8,%mm6 + pxor 272(%esp,%ebp,8),%mm7 + andb $15,%al + psllq $56,%mm3 + pxor %mm0,%mm6 + shrl $4,%edi + pinsrw $2,(%esi,%ecx,2),%mm2 + pxor 16(%esp,%eax,8),%mm7 + roll $8,%edx + pxor 144(%esp,%eax,8),%mm6 + pxor %mm3,%mm7 + pxor 400(%esp,%ebp,8),%mm6 + xorb (%esp,%ebp,1),%bl + movb %dl,%al + movd %mm7,%ecx + movzbl %bl,%ebx + psrlq $8,%mm7 + movq %mm6,%mm3 + movl %eax,%ebp + psrlq $8,%mm6 + pxor 272(%esp,%edi,8),%mm7 + andb $15,%al + psllq $56,%mm3 + pxor %mm2,%mm6 + shrl $4,%ebp + pinsrw $2,(%esi,%ebx,2),%mm1 + pxor 16(%esp,%eax,8),%mm7 + roll $8,%edx + pxor 144(%esp,%eax,8),%mm6 + pxor %mm3,%mm7 + pxor 400(%esp,%edi,8),%mm6 + xorb (%esp,%edi,1),%cl + movb %dl,%al + movl 532(%esp),%edx + movd %mm7,%ebx + movzbl %cl,%ecx + psrlq $8,%mm7 + movq %mm6,%mm3 + movl %eax,%edi + psrlq $8,%mm6 + pxor 272(%esp,%ebp,8),%mm7 + andb $15,%al + psllq $56,%mm3 + pxor %mm1,%mm6 + shrl $4,%edi + pinsrw $2,(%esi,%ecx,2),%mm0 + pxor 16(%esp,%eax,8),%mm7 + roll $8,%edx + pxor 144(%esp,%eax,8),%mm6 + pxor %mm3,%mm7 + pxor 400(%esp,%ebp,8),%mm6 + xorb (%esp,%ebp,1),%bl + movb %dl,%al + movd %mm7,%ecx + movzbl %bl,%ebx + psrlq $8,%mm7 + movq %mm6,%mm3 + movl %eax,%ebp + psrlq $8,%mm6 + pxor 272(%esp,%edi,8),%mm7 + andb $15,%al + psllq $56,%mm3 + pxor %mm0,%mm6 + shrl $4,%ebp + pinsrw $2,(%esi,%ebx,2),%mm2 + pxor 16(%esp,%eax,8),%mm7 + roll $8,%edx + pxor 144(%esp,%eax,8),%mm6 + pxor %mm3,%mm7 + pxor 400(%esp,%edi,8),%mm6 + xorb (%esp,%edi,1),%cl + movb %dl,%al + movd %mm7,%ebx + movzbl %cl,%ecx + psrlq $8,%mm7 + movq %mm6,%mm3 + movl %eax,%edi + psrlq $8,%mm6 + pxor 272(%esp,%ebp,8),%mm7 + andb $15,%al + psllq $56,%mm3 + pxor %mm2,%mm6 + shrl $4,%edi + pinsrw $2,(%esi,%ecx,2),%mm1 + pxor 16(%esp,%eax,8),%mm7 + roll $8,%edx + pxor 144(%esp,%eax,8),%mm6 + pxor %mm3,%mm7 + pxor 400(%esp,%ebp,8),%mm6 + xorb (%esp,%ebp,1),%bl + movb %dl,%al + movd %mm7,%ecx + movzbl %bl,%ebx + psrlq $8,%mm7 + movq %mm6,%mm3 + movl %eax,%ebp + psrlq $8,%mm6 + pxor 272(%esp,%edi,8),%mm7 + andb $15,%al + psllq $56,%mm3 + pxor %mm1,%mm6 + shrl $4,%ebp + pinsrw $2,(%esi,%ebx,2),%mm0 + pxor 16(%esp,%eax,8),%mm7 + roll $8,%edx + pxor 144(%esp,%eax,8),%mm6 + pxor %mm3,%mm7 + pxor 400(%esp,%edi,8),%mm6 + xorb (%esp,%edi,1),%cl + movb %dl,%al + movl 528(%esp),%edx + movd %mm7,%ebx + movzbl %cl,%ecx + psrlq $8,%mm7 + movq %mm6,%mm3 + movl %eax,%edi + psrlq $8,%mm6 + pxor 272(%esp,%ebp,8),%mm7 + andb $15,%al + psllq $56,%mm3 + pxor %mm0,%mm6 + shrl $4,%edi + pinsrw $2,(%esi,%ecx,2),%mm2 + pxor 16(%esp,%eax,8),%mm7 + roll $8,%edx + pxor 144(%esp,%eax,8),%mm6 + pxor %mm3,%mm7 + pxor 400(%esp,%ebp,8),%mm6 + xorb (%esp,%ebp,1),%bl + movb %dl,%al + movd %mm7,%ecx + movzbl %bl,%ebx + psrlq $8,%mm7 + movq %mm6,%mm3 + movl %eax,%ebp + psrlq $8,%mm6 + pxor 272(%esp,%edi,8),%mm7 + andb $15,%al + psllq $56,%mm3 + pxor %mm2,%mm6 + shrl $4,%ebp + pinsrw $2,(%esi,%ebx,2),%mm1 + pxor 16(%esp,%eax,8),%mm7 + roll $8,%edx + pxor 144(%esp,%eax,8),%mm6 + pxor %mm3,%mm7 + pxor 400(%esp,%edi,8),%mm6 + xorb (%esp,%edi,1),%cl + movb %dl,%al + movd %mm7,%ebx + movzbl %cl,%ecx + psrlq $8,%mm7 + movq %mm6,%mm3 + movl %eax,%edi + psrlq $8,%mm6 + pxor 272(%esp,%ebp,8),%mm7 + andb $15,%al + psllq $56,%mm3 + pxor %mm1,%mm6 + shrl $4,%edi + pinsrw $2,(%esi,%ecx,2),%mm0 + pxor 16(%esp,%eax,8),%mm7 + roll $8,%edx + pxor 144(%esp,%eax,8),%mm6 + pxor %mm3,%mm7 + pxor 400(%esp,%ebp,8),%mm6 + xorb (%esp,%ebp,1),%bl + movb %dl,%al + movd %mm7,%ecx + movzbl %bl,%ebx + psrlq $8,%mm7 + movq %mm6,%mm3 + movl %eax,%ebp + psrlq $8,%mm6 + pxor 272(%esp,%edi,8),%mm7 + andb $15,%al + psllq $56,%mm3 + pxor %mm0,%mm6 + shrl $4,%ebp + pinsrw $2,(%esi,%ebx,2),%mm2 + pxor 16(%esp,%eax,8),%mm7 + roll $8,%edx + pxor 144(%esp,%eax,8),%mm6 + pxor %mm3,%mm7 + pxor 400(%esp,%edi,8),%mm6 + xorb (%esp,%edi,1),%cl + movb %dl,%al + movl 524(%esp),%edx + movd %mm7,%ebx + movzbl %cl,%ecx + psrlq $8,%mm7 + movq %mm6,%mm3 + movl %eax,%edi + psrlq $8,%mm6 + pxor 272(%esp,%ebp,8),%mm7 + andb $15,%al + psllq $56,%mm3 + pxor %mm2,%mm6 + shrl $4,%edi + pinsrw $2,(%esi,%ecx,2),%mm1 + pxor 16(%esp,%eax,8),%mm7 + pxor 144(%esp,%eax,8),%mm6 + xorb (%esp,%ebp,1),%bl + pxor %mm3,%mm7 + pxor 400(%esp,%ebp,8),%mm6 + movzbl %bl,%ebx + pxor %mm2,%mm2 + psllq $4,%mm1 + movd %mm7,%ecx + psrlq $4,%mm7 + movq %mm6,%mm3 + psrlq $4,%mm6 + shll $4,%ecx + pxor 16(%esp,%edi,8),%mm7 + psllq $60,%mm3 + movzbl %cl,%ecx + pxor %mm3,%mm7 + pxor 144(%esp,%edi,8),%mm6 + pinsrw $2,(%esi,%ebx,2),%mm0 + pxor %mm1,%mm6 + movd %mm7,%edx + pinsrw $3,(%esi,%ecx,2),%mm2 + psllq $12,%mm0 + pxor %mm0,%mm6 + psrlq $32,%mm7 + pxor %mm2,%mm6 + movl 548(%esp),%ecx + movd %mm7,%ebx + movq %mm6,%mm3 + psllw $8,%mm6 + psrlw $8,%mm3 + por %mm3,%mm6 + bswap %edx + pshufw $27,%mm6,%mm6 + bswap %ebx + cmpl 552(%esp),%ecx + jne .L009outer + movl 544(%esp),%eax + movl %edx,12(%eax) + movl %ebx,8(%eax) + movq %mm6,(%eax) + movl 556(%esp),%esp + emms + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.size gcm_ghash_4bit_mmx,.-.L_gcm_ghash_4bit_mmx_begin +.globl gcm_init_clmul +.type gcm_init_clmul,@function +.align 16 +gcm_init_clmul: +.L_gcm_init_clmul_begin: + movl 4(%esp),%edx + movl 8(%esp),%eax + call .L010pic +.L010pic: + popl %ecx + leal .Lbswap-.L010pic(%ecx),%ecx + movdqu (%eax),%xmm2 + pshufd $78,%xmm2,%xmm2 + pshufd $255,%xmm2,%xmm4 + movdqa %xmm2,%xmm3 + psllq $1,%xmm2 + pxor %xmm5,%xmm5 + psrlq $63,%xmm3 + pcmpgtd %xmm4,%xmm5 + pslldq $8,%xmm3 + por %xmm3,%xmm2 + pand 16(%ecx),%xmm5 + pxor %xmm5,%xmm2 + movdqa %xmm2,%xmm0 + movdqa %xmm0,%xmm1 + pshufd $78,%xmm0,%xmm3 + pshufd $78,%xmm2,%xmm4 + pxor %xmm0,%xmm3 + pxor %xmm2,%xmm4 +.byte 102,15,58,68,194,0 +.byte 102,15,58,68,202,17 +.byte 102,15,58,68,220,0 + xorps %xmm0,%xmm3 + xorps %xmm1,%xmm3 + movdqa %xmm3,%xmm4 + psrldq $8,%xmm3 + pslldq $8,%xmm4 + pxor %xmm3,%xmm1 + pxor %xmm4,%xmm0 + movdqa %xmm0,%xmm4 + movdqa %xmm0,%xmm3 + psllq $5,%xmm0 + pxor %xmm0,%xmm3 + psllq $1,%xmm0 + pxor %xmm3,%xmm0 + psllq $57,%xmm0 + movdqa %xmm0,%xmm3 + pslldq $8,%xmm0 + psrldq $8,%xmm3 + pxor %xmm4,%xmm0 + pxor %xmm3,%xmm1 + movdqa %xmm0,%xmm4 + psrlq $1,%xmm0 + pxor %xmm4,%xmm1 + pxor %xmm0,%xmm4 + psrlq $5,%xmm0 + pxor %xmm4,%xmm0 + psrlq $1,%xmm0 + pxor %xmm1,%xmm0 + pshufd $78,%xmm2,%xmm3 + pshufd $78,%xmm0,%xmm4 + pxor %xmm2,%xmm3 + movdqu %xmm2,(%edx) + pxor %xmm0,%xmm4 + movdqu %xmm0,16(%edx) +.byte 102,15,58,15,227,8 + movdqu %xmm4,32(%edx) + ret +.size gcm_init_clmul,.-.L_gcm_init_clmul_begin +.globl gcm_gmult_clmul +.type gcm_gmult_clmul,@function +.align 16 +gcm_gmult_clmul: +.L_gcm_gmult_clmul_begin: + movl 4(%esp),%eax + movl 8(%esp),%edx + call .L011pic +.L011pic: + popl %ecx + leal .Lbswap-.L011pic(%ecx),%ecx + movdqu (%eax),%xmm0 + movdqa (%ecx),%xmm5 + movups (%edx),%xmm2 +.byte 102,15,56,0,197 + movups 32(%edx),%xmm4 + movdqa %xmm0,%xmm1 + pshufd $78,%xmm0,%xmm3 + pxor %xmm0,%xmm3 +.byte 102,15,58,68,194,0 +.byte 102,15,58,68,202,17 +.byte 102,15,58,68,220,0 + xorps %xmm0,%xmm3 + xorps %xmm1,%xmm3 + movdqa %xmm3,%xmm4 + psrldq $8,%xmm3 + pslldq $8,%xmm4 + pxor %xmm3,%xmm1 + pxor %xmm4,%xmm0 + movdqa %xmm0,%xmm4 + movdqa %xmm0,%xmm3 + psllq $5,%xmm0 + pxor %xmm0,%xmm3 + psllq $1,%xmm0 + pxor %xmm3,%xmm0 + psllq $57,%xmm0 + movdqa %xmm0,%xmm3 + pslldq $8,%xmm0 + psrldq $8,%xmm3 + pxor %xmm4,%xmm0 + pxor %xmm3,%xmm1 + movdqa %xmm0,%xmm4 + psrlq $1,%xmm0 + pxor %xmm4,%xmm1 + pxor %xmm0,%xmm4 + psrlq $5,%xmm0 + pxor %xmm4,%xmm0 + psrlq $1,%xmm0 + pxor %xmm1,%xmm0 +.byte 102,15,56,0,197 + movdqu %xmm0,(%eax) + ret +.size gcm_gmult_clmul,.-.L_gcm_gmult_clmul_begin +.globl gcm_ghash_clmul +.type gcm_ghash_clmul,@function +.align 16 +gcm_ghash_clmul: +.L_gcm_ghash_clmul_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 20(%esp),%eax + movl 24(%esp),%edx + movl 28(%esp),%esi + movl 32(%esp),%ebx + call .L012pic +.L012pic: + popl %ecx + leal .Lbswap-.L012pic(%ecx),%ecx + movdqu (%eax),%xmm0 + movdqa (%ecx),%xmm5 + movdqu (%edx),%xmm2 +.byte 102,15,56,0,197 + subl $16,%ebx + jz .L013odd_tail + movdqu (%esi),%xmm3 + movdqu 16(%esi),%xmm6 +.byte 102,15,56,0,221 +.byte 102,15,56,0,245 + movdqu 32(%edx),%xmm5 + pxor %xmm3,%xmm0 + pshufd $78,%xmm6,%xmm3 + movdqa %xmm6,%xmm7 + pxor %xmm6,%xmm3 + leal 32(%esi),%esi +.byte 102,15,58,68,242,0 +.byte 102,15,58,68,250,17 +.byte 102,15,58,68,221,0 + movups 16(%edx),%xmm2 + nop + subl $32,%ebx + jbe .L014even_tail + jmp .L015mod_loop +.align 32 +.L015mod_loop: + pshufd $78,%xmm0,%xmm4 + movdqa %xmm0,%xmm1 + pxor %xmm0,%xmm4 + nop +.byte 102,15,58,68,194,0 +.byte 102,15,58,68,202,17 +.byte 102,15,58,68,229,16 + movups (%edx),%xmm2 + xorps %xmm6,%xmm0 + movdqa (%ecx),%xmm5 + xorps %xmm7,%xmm1 + movdqu (%esi),%xmm7 + pxor %xmm0,%xmm3 + movdqu 16(%esi),%xmm6 + pxor %xmm1,%xmm3 +.byte 102,15,56,0,253 + pxor %xmm3,%xmm4 + movdqa %xmm4,%xmm3 + psrldq $8,%xmm4 + pslldq $8,%xmm3 + pxor %xmm4,%xmm1 + pxor %xmm3,%xmm0 +.byte 102,15,56,0,245 + pxor %xmm7,%xmm1 + movdqa %xmm6,%xmm7 + movdqa %xmm0,%xmm4 + movdqa %xmm0,%xmm3 + psllq $5,%xmm0 + pxor %xmm0,%xmm3 + psllq $1,%xmm0 + pxor %xmm3,%xmm0 +.byte 102,15,58,68,242,0 + movups 32(%edx),%xmm5 + psllq $57,%xmm0 + movdqa %xmm0,%xmm3 + pslldq $8,%xmm0 + psrldq $8,%xmm3 + pxor %xmm4,%xmm0 + pxor %xmm3,%xmm1 + pshufd $78,%xmm7,%xmm3 + movdqa %xmm0,%xmm4 + psrlq $1,%xmm0 + pxor %xmm7,%xmm3 + pxor %xmm4,%xmm1 +.byte 102,15,58,68,250,17 + movups 16(%edx),%xmm2 + pxor %xmm0,%xmm4 + psrlq $5,%xmm0 + pxor %xmm4,%xmm0 + psrlq $1,%xmm0 + pxor %xmm1,%xmm0 +.byte 102,15,58,68,221,0 + leal 32(%esi),%esi + subl $32,%ebx + ja .L015mod_loop +.L014even_tail: + pshufd $78,%xmm0,%xmm4 + movdqa %xmm0,%xmm1 + pxor %xmm0,%xmm4 +.byte 102,15,58,68,194,0 +.byte 102,15,58,68,202,17 +.byte 102,15,58,68,229,16 + movdqa (%ecx),%xmm5 + xorps %xmm6,%xmm0 + xorps %xmm7,%xmm1 + pxor %xmm0,%xmm3 + pxor %xmm1,%xmm3 + pxor %xmm3,%xmm4 + movdqa %xmm4,%xmm3 + psrldq $8,%xmm4 + pslldq $8,%xmm3 + pxor %xmm4,%xmm1 + pxor %xmm3,%xmm0 + movdqa %xmm0,%xmm4 + movdqa %xmm0,%xmm3 + psllq $5,%xmm0 + pxor %xmm0,%xmm3 + psllq $1,%xmm0 + pxor %xmm3,%xmm0 + psllq $57,%xmm0 + movdqa %xmm0,%xmm3 + pslldq $8,%xmm0 + psrldq $8,%xmm3 + pxor %xmm4,%xmm0 + pxor %xmm3,%xmm1 + movdqa %xmm0,%xmm4 + psrlq $1,%xmm0 + pxor %xmm4,%xmm1 + pxor %xmm0,%xmm4 + psrlq $5,%xmm0 + pxor %xmm4,%xmm0 + psrlq $1,%xmm0 + pxor %xmm1,%xmm0 + testl %ebx,%ebx + jnz .L016done + movups (%edx),%xmm2 +.L013odd_tail: + movdqu (%esi),%xmm3 +.byte 102,15,56,0,221 + pxor %xmm3,%xmm0 + movdqa %xmm0,%xmm1 + pshufd $78,%xmm0,%xmm3 + pshufd $78,%xmm2,%xmm4 + pxor %xmm0,%xmm3 + pxor %xmm2,%xmm4 +.byte 102,15,58,68,194,0 +.byte 102,15,58,68,202,17 +.byte 102,15,58,68,220,0 + xorps %xmm0,%xmm3 + xorps %xmm1,%xmm3 + movdqa %xmm3,%xmm4 + psrldq $8,%xmm3 + pslldq $8,%xmm4 + pxor %xmm3,%xmm1 + pxor %xmm4,%xmm0 + movdqa %xmm0,%xmm4 + movdqa %xmm0,%xmm3 + psllq $5,%xmm0 + pxor %xmm0,%xmm3 + psllq $1,%xmm0 + pxor %xmm3,%xmm0 + psllq $57,%xmm0 + movdqa %xmm0,%xmm3 + pslldq $8,%xmm0 + psrldq $8,%xmm3 + pxor %xmm4,%xmm0 + pxor %xmm3,%xmm1 + movdqa %xmm0,%xmm4 + psrlq $1,%xmm0 + pxor %xmm4,%xmm1 + pxor %xmm0,%xmm4 + psrlq $5,%xmm0 + pxor %xmm4,%xmm0 + psrlq $1,%xmm0 + pxor %xmm1,%xmm0 +.L016done: +.byte 102,15,56,0,197 + movdqu %xmm0,(%eax) + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.size gcm_ghash_clmul,.-.L_gcm_ghash_clmul_begin +.align 64 +.Lbswap: +.byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0 +.byte 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,194 +.align 64 +.Lrem_8bit: +.value 0,450,900,582,1800,1738,1164,1358 +.value 3600,4050,3476,3158,2328,2266,2716,2910 +.value 7200,7650,8100,7782,6952,6890,6316,6510 +.value 4656,5106,4532,4214,5432,5370,5820,6014 +.value 14400,14722,15300,14854,16200,16010,15564,15630 +.value 13904,14226,13780,13334,12632,12442,13020,13086 +.value 9312,9634,10212,9766,9064,8874,8428,8494 +.value 10864,11186,10740,10294,11640,11450,12028,12094 +.value 28800,28994,29444,29382,30600,30282,29708,30158 +.value 32400,32594,32020,31958,31128,30810,31260,31710 +.value 27808,28002,28452,28390,27560,27242,26668,27118 +.value 25264,25458,24884,24822,26040,25722,26172,26622 +.value 18624,18690,19268,19078,20424,19978,19532,19854 +.value 18128,18194,17748,17558,16856,16410,16988,17310 +.value 21728,21794,22372,22182,21480,21034,20588,20910 +.value 23280,23346,22900,22710,24056,23610,24188,24510 +.value 57600,57538,57988,58182,58888,59338,58764,58446 +.value 61200,61138,60564,60758,59416,59866,60316,59998 +.value 64800,64738,65188,65382,64040,64490,63916,63598 +.value 62256,62194,61620,61814,62520,62970,63420,63102 +.value 55616,55426,56004,56070,56904,57226,56780,56334 +.value 55120,54930,54484,54550,53336,53658,54236,53790 +.value 50528,50338,50916,50982,49768,50090,49644,49198 +.value 52080,51890,51444,51510,52344,52666,53244,52798 +.value 37248,36930,37380,37830,38536,38730,38156,38094 +.value 40848,40530,39956,40406,39064,39258,39708,39646 +.value 36256,35938,36388,36838,35496,35690,35116,35054 +.value 33712,33394,32820,33270,33976,34170,34620,34558 +.value 43456,43010,43588,43910,44744,44810,44364,44174 +.value 42960,42514,42068,42390,41176,41242,41820,41630 +.value 46560,46114,46692,47014,45800,45866,45420,45230 +.value 48112,47666,47220,47542,48376,48442,49020,48830 +.align 64 +.Lrem_4bit: +.long 0,0,0,471859200,0,943718400,0,610271232 +.long 0,1887436800,0,1822425088,0,1220542464,0,1423966208 +.long 0,3774873600,0,4246732800,0,3644850176,0,3311403008 +.long 0,2441084928,0,2376073216,0,2847932416,0,3051356160 +.byte 71,72,65,83,72,32,102,111,114,32,120,56,54,44,32,67 +.byte 82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112 +.byte 112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62 +.byte 0 diff --git a/deps/openssl/asm_obsolete/x86-elf-gas/rc4/rc4-586.s b/deps/openssl/asm_obsolete/x86-elf-gas/rc4/rc4-586.s new file mode 100644 index 00000000000000..fcb4adc14633c9 --- /dev/null +++ b/deps/openssl/asm_obsolete/x86-elf-gas/rc4/rc4-586.s @@ -0,0 +1,372 @@ +.file "rc4-586.s" +.text +.globl RC4 +.type RC4,@function +.align 16 +RC4: +.L_RC4_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 20(%esp),%edi + movl 24(%esp),%edx + movl 28(%esp),%esi + movl 32(%esp),%ebp + xorl %eax,%eax + xorl %ebx,%ebx + cmpl $0,%edx + je .L000abort + movb (%edi),%al + movb 4(%edi),%bl + addl $8,%edi + leal (%esi,%edx,1),%ecx + subl %esi,%ebp + movl %ecx,24(%esp) + incb %al + cmpl $-1,256(%edi) + je .L001RC4_CHAR + movl (%edi,%eax,4),%ecx + andl $-4,%edx + jz .L002loop1 + movl %ebp,32(%esp) + testl $-8,%edx + jz .L003go4loop4 + leal OPENSSL_ia32cap_P,%ebp + btl $26,(%ebp) + jnc .L003go4loop4 + movl 32(%esp),%ebp + andl $-8,%edx + leal -8(%esi,%edx,1),%edx + movl %edx,-4(%edi) + addb %cl,%bl + movl (%edi,%ebx,4),%edx + movl %ecx,(%edi,%ebx,4) + movl %edx,(%edi,%eax,4) + incl %eax + addl %ecx,%edx + movzbl %al,%eax + movzbl %dl,%edx + movq (%esi),%mm0 + movl (%edi,%eax,4),%ecx + movd (%edi,%edx,4),%mm2 + jmp .L004loop_mmx_enter +.align 16 +.L005loop_mmx: + addb %cl,%bl + psllq $56,%mm1 + movl (%edi,%ebx,4),%edx + movl %ecx,(%edi,%ebx,4) + movl %edx,(%edi,%eax,4) + incl %eax + addl %ecx,%edx + movzbl %al,%eax + movzbl %dl,%edx + pxor %mm1,%mm2 + movq (%esi),%mm0 + movq %mm2,-8(%ebp,%esi,1) + movl (%edi,%eax,4),%ecx + movd (%edi,%edx,4),%mm2 +.L004loop_mmx_enter: + addb %cl,%bl + movl (%edi,%ebx,4),%edx + movl %ecx,(%edi,%ebx,4) + movl %edx,(%edi,%eax,4) + incl %eax + addl %ecx,%edx + movzbl %al,%eax + movzbl %dl,%edx + pxor %mm0,%mm2 + movl (%edi,%eax,4),%ecx + movd (%edi,%edx,4),%mm1 + addb %cl,%bl + psllq $8,%mm1 + movl (%edi,%ebx,4),%edx + movl %ecx,(%edi,%ebx,4) + movl %edx,(%edi,%eax,4) + incl %eax + addl %ecx,%edx + movzbl %al,%eax + movzbl %dl,%edx + pxor %mm1,%mm2 + movl (%edi,%eax,4),%ecx + movd (%edi,%edx,4),%mm1 + addb %cl,%bl + psllq $16,%mm1 + movl (%edi,%ebx,4),%edx + movl %ecx,(%edi,%ebx,4) + movl %edx,(%edi,%eax,4) + incl %eax + addl %ecx,%edx + movzbl %al,%eax + movzbl %dl,%edx + pxor %mm1,%mm2 + movl (%edi,%eax,4),%ecx + movd (%edi,%edx,4),%mm1 + addb %cl,%bl + psllq $24,%mm1 + movl (%edi,%ebx,4),%edx + movl %ecx,(%edi,%ebx,4) + movl %edx,(%edi,%eax,4) + incl %eax + addl %ecx,%edx + movzbl %al,%eax + movzbl %dl,%edx + pxor %mm1,%mm2 + movl (%edi,%eax,4),%ecx + movd (%edi,%edx,4),%mm1 + addb %cl,%bl + psllq $32,%mm1 + movl (%edi,%ebx,4),%edx + movl %ecx,(%edi,%ebx,4) + movl %edx,(%edi,%eax,4) + incl %eax + addl %ecx,%edx + movzbl %al,%eax + movzbl %dl,%edx + pxor %mm1,%mm2 + movl (%edi,%eax,4),%ecx + movd (%edi,%edx,4),%mm1 + addb %cl,%bl + psllq $40,%mm1 + movl (%edi,%ebx,4),%edx + movl %ecx,(%edi,%ebx,4) + movl %edx,(%edi,%eax,4) + incl %eax + addl %ecx,%edx + movzbl %al,%eax + movzbl %dl,%edx + pxor %mm1,%mm2 + movl (%edi,%eax,4),%ecx + movd (%edi,%edx,4),%mm1 + addb %cl,%bl + psllq $48,%mm1 + movl (%edi,%ebx,4),%edx + movl %ecx,(%edi,%ebx,4) + movl %edx,(%edi,%eax,4) + incl %eax + addl %ecx,%edx + movzbl %al,%eax + movzbl %dl,%edx + pxor %mm1,%mm2 + movl (%edi,%eax,4),%ecx + movd (%edi,%edx,4),%mm1 + movl %ebx,%edx + xorl %ebx,%ebx + movb %dl,%bl + cmpl -4(%edi),%esi + leal 8(%esi),%esi + jb .L005loop_mmx + psllq $56,%mm1 + pxor %mm1,%mm2 + movq %mm2,-8(%ebp,%esi,1) + emms + cmpl 24(%esp),%esi + je .L006done + jmp .L002loop1 +.align 16 +.L003go4loop4: + leal -4(%esi,%edx,1),%edx + movl %edx,28(%esp) +.L007loop4: + addb %cl,%bl + movl (%edi,%ebx,4),%edx + movl %ecx,(%edi,%ebx,4) + movl %edx,(%edi,%eax,4) + addl %ecx,%edx + incb %al + andl $255,%edx + movl (%edi,%eax,4),%ecx + movl (%edi,%edx,4),%ebp + addb %cl,%bl + movl (%edi,%ebx,4),%edx + movl %ecx,(%edi,%ebx,4) + movl %edx,(%edi,%eax,4) + addl %ecx,%edx + incb %al + andl $255,%edx + rorl $8,%ebp + movl (%edi,%eax,4),%ecx + orl (%edi,%edx,4),%ebp + addb %cl,%bl + movl (%edi,%ebx,4),%edx + movl %ecx,(%edi,%ebx,4) + movl %edx,(%edi,%eax,4) + addl %ecx,%edx + incb %al + andl $255,%edx + rorl $8,%ebp + movl (%edi,%eax,4),%ecx + orl (%edi,%edx,4),%ebp + addb %cl,%bl + movl (%edi,%ebx,4),%edx + movl %ecx,(%edi,%ebx,4) + movl %edx,(%edi,%eax,4) + addl %ecx,%edx + incb %al + andl $255,%edx + rorl $8,%ebp + movl 32(%esp),%ecx + orl (%edi,%edx,4),%ebp + rorl $8,%ebp + xorl (%esi),%ebp + cmpl 28(%esp),%esi + movl %ebp,(%ecx,%esi,1) + leal 4(%esi),%esi + movl (%edi,%eax,4),%ecx + jb .L007loop4 + cmpl 24(%esp),%esi + je .L006done + movl 32(%esp),%ebp +.align 16 +.L002loop1: + addb %cl,%bl + movl (%edi,%ebx,4),%edx + movl %ecx,(%edi,%ebx,4) + movl %edx,(%edi,%eax,4) + addl %ecx,%edx + incb %al + andl $255,%edx + movl (%edi,%edx,4),%edx + xorb (%esi),%dl + leal 1(%esi),%esi + movl (%edi,%eax,4),%ecx + cmpl 24(%esp),%esi + movb %dl,-1(%ebp,%esi,1) + jb .L002loop1 + jmp .L006done +.align 16 +.L001RC4_CHAR: + movzbl (%edi,%eax,1),%ecx +.L008cloop1: + addb %cl,%bl + movzbl (%edi,%ebx,1),%edx + movb %cl,(%edi,%ebx,1) + movb %dl,(%edi,%eax,1) + addb %cl,%dl + movzbl (%edi,%edx,1),%edx + addb $1,%al + xorb (%esi),%dl + leal 1(%esi),%esi + movzbl (%edi,%eax,1),%ecx + cmpl 24(%esp),%esi + movb %dl,-1(%ebp,%esi,1) + jb .L008cloop1 +.L006done: + decb %al + movl %ebx,-4(%edi) + movb %al,-8(%edi) +.L000abort: + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.size RC4,.-.L_RC4_begin +.globl private_RC4_set_key +.type private_RC4_set_key,@function +.align 16 +private_RC4_set_key: +.L_private_RC4_set_key_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 20(%esp),%edi + movl 24(%esp),%ebp + movl 28(%esp),%esi + leal OPENSSL_ia32cap_P,%edx + leal 8(%edi),%edi + leal (%esi,%ebp,1),%esi + negl %ebp + xorl %eax,%eax + movl %ebp,-4(%edi) + btl $20,(%edx) + jc .L009c1stloop +.align 16 +.L010w1stloop: + movl %eax,(%edi,%eax,4) + addb $1,%al + jnc .L010w1stloop + xorl %ecx,%ecx + xorl %edx,%edx +.align 16 +.L011w2ndloop: + movl (%edi,%ecx,4),%eax + addb (%esi,%ebp,1),%dl + addb %al,%dl + addl $1,%ebp + movl (%edi,%edx,4),%ebx + jnz .L012wnowrap + movl -4(%edi),%ebp +.L012wnowrap: + movl %eax,(%edi,%edx,4) + movl %ebx,(%edi,%ecx,4) + addb $1,%cl + jnc .L011w2ndloop + jmp .L013exit +.align 16 +.L009c1stloop: + movb %al,(%edi,%eax,1) + addb $1,%al + jnc .L009c1stloop + xorl %ecx,%ecx + xorl %edx,%edx + xorl %ebx,%ebx +.align 16 +.L014c2ndloop: + movb (%edi,%ecx,1),%al + addb (%esi,%ebp,1),%dl + addb %al,%dl + addl $1,%ebp + movb (%edi,%edx,1),%bl + jnz .L015cnowrap + movl -4(%edi),%ebp +.L015cnowrap: + movb %al,(%edi,%edx,1) + movb %bl,(%edi,%ecx,1) + addb $1,%cl + jnc .L014c2ndloop + movl $-1,256(%edi) +.L013exit: + xorl %eax,%eax + movl %eax,-8(%edi) + movl %eax,-4(%edi) + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.size private_RC4_set_key,.-.L_private_RC4_set_key_begin +.globl RC4_options +.type RC4_options,@function +.align 16 +RC4_options: +.L_RC4_options_begin: + call .L016pic_point +.L016pic_point: + popl %eax + leal .L017opts-.L016pic_point(%eax),%eax + leal OPENSSL_ia32cap_P,%edx + movl (%edx),%edx + btl $20,%edx + jc .L0181xchar + btl $26,%edx + jnc .L019ret + addl $25,%eax + ret +.L0181xchar: + addl $12,%eax +.L019ret: + ret +.align 64 +.L017opts: +.byte 114,99,52,40,52,120,44,105,110,116,41,0 +.byte 114,99,52,40,49,120,44,99,104,97,114,41,0 +.byte 114,99,52,40,56,120,44,109,109,120,41,0 +.byte 82,67,52,32,102,111,114,32,120,56,54,44,32,67,82,89 +.byte 80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114 +.byte 111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 +.align 64 +.size RC4_options,.-.L_RC4_options_begin +.comm OPENSSL_ia32cap_P,16,4 diff --git a/deps/openssl/asm_obsolete/x86-elf-gas/ripemd/rmd-586.s b/deps/openssl/asm_obsolete/x86-elf-gas/ripemd/rmd-586.s new file mode 100644 index 00000000000000..3c45fb91d0563b --- /dev/null +++ b/deps/openssl/asm_obsolete/x86-elf-gas/ripemd/rmd-586.s @@ -0,0 +1,1965 @@ +.file "../openssl/crypto/ripemd/asm/rmd-586.s" +.text +.globl ripemd160_block_asm_data_order +.type ripemd160_block_asm_data_order,@function +.align 16 +ripemd160_block_asm_data_order: +.L_ripemd160_block_asm_data_order_begin: + movl 4(%esp),%edx + movl 8(%esp),%eax + pushl %esi + movl (%edx),%ecx + pushl %edi + movl 4(%edx),%esi + pushl %ebp + movl 8(%edx),%edi + pushl %ebx + subl $108,%esp +.L000start: + + movl (%eax),%ebx + movl 4(%eax),%ebp + movl %ebx,(%esp) + movl %ebp,4(%esp) + movl 8(%eax),%ebx + movl 12(%eax),%ebp + movl %ebx,8(%esp) + movl %ebp,12(%esp) + movl 16(%eax),%ebx + movl 20(%eax),%ebp + movl %ebx,16(%esp) + movl %ebp,20(%esp) + movl 24(%eax),%ebx + movl 28(%eax),%ebp + movl %ebx,24(%esp) + movl %ebp,28(%esp) + movl 32(%eax),%ebx + movl 36(%eax),%ebp + movl %ebx,32(%esp) + movl %ebp,36(%esp) + movl 40(%eax),%ebx + movl 44(%eax),%ebp + movl %ebx,40(%esp) + movl %ebp,44(%esp) + movl 48(%eax),%ebx + movl 52(%eax),%ebp + movl %ebx,48(%esp) + movl %ebp,52(%esp) + movl 56(%eax),%ebx + movl 60(%eax),%ebp + movl %ebx,56(%esp) + movl %ebp,60(%esp) + movl %edi,%eax + movl 12(%edx),%ebx + movl 16(%edx),%ebp + + xorl %ebx,%eax + movl (%esp),%edx + xorl %esi,%eax + addl %edx,%ecx + roll $10,%edi + addl %eax,%ecx + movl %esi,%eax + roll $11,%ecx + addl %ebp,%ecx + + xorl %edi,%eax + movl 4(%esp),%edx + xorl %ecx,%eax + addl %eax,%ebp + movl %ecx,%eax + roll $10,%esi + addl %edx,%ebp + xorl %esi,%eax + roll $14,%ebp + addl %ebx,%ebp + + movl 8(%esp),%edx + xorl %ebp,%eax + addl %edx,%ebx + roll $10,%ecx + addl %eax,%ebx + movl %ebp,%eax + roll $15,%ebx + addl %edi,%ebx + + xorl %ecx,%eax + movl 12(%esp),%edx + xorl %ebx,%eax + addl %eax,%edi + movl %ebx,%eax + roll $10,%ebp + addl %edx,%edi + xorl %ebp,%eax + roll $12,%edi + addl %esi,%edi + + movl 16(%esp),%edx + xorl %edi,%eax + addl %edx,%esi + roll $10,%ebx + addl %eax,%esi + movl %edi,%eax + roll $5,%esi + addl %ecx,%esi + + xorl %ebx,%eax + movl 20(%esp),%edx + xorl %esi,%eax + addl %eax,%ecx + movl %esi,%eax + roll $10,%edi + addl %edx,%ecx + xorl %edi,%eax + roll $8,%ecx + addl %ebp,%ecx + + movl 24(%esp),%edx + xorl %ecx,%eax + addl %edx,%ebp + roll $10,%esi + addl %eax,%ebp + movl %ecx,%eax + roll $7,%ebp + addl %ebx,%ebp + + xorl %esi,%eax + movl 28(%esp),%edx + xorl %ebp,%eax + addl %eax,%ebx + movl %ebp,%eax + roll $10,%ecx + addl %edx,%ebx + xorl %ecx,%eax + roll $9,%ebx + addl %edi,%ebx + + movl 32(%esp),%edx + xorl %ebx,%eax + addl %edx,%edi + roll $10,%ebp + addl %eax,%edi + movl %ebx,%eax + roll $11,%edi + addl %esi,%edi + + xorl %ebp,%eax + movl 36(%esp),%edx + xorl %edi,%eax + addl %eax,%esi + movl %edi,%eax + roll $10,%ebx + addl %edx,%esi + xorl %ebx,%eax + roll $13,%esi + addl %ecx,%esi + + movl 40(%esp),%edx + xorl %esi,%eax + addl %edx,%ecx + roll $10,%edi + addl %eax,%ecx + movl %esi,%eax + roll $14,%ecx + addl %ebp,%ecx + + xorl %edi,%eax + movl 44(%esp),%edx + xorl %ecx,%eax + addl %eax,%ebp + movl %ecx,%eax + roll $10,%esi + addl %edx,%ebp + xorl %esi,%eax + roll $15,%ebp + addl %ebx,%ebp + + movl 48(%esp),%edx + xorl %ebp,%eax + addl %edx,%ebx + roll $10,%ecx + addl %eax,%ebx + movl %ebp,%eax + roll $6,%ebx + addl %edi,%ebx + + xorl %ecx,%eax + movl 52(%esp),%edx + xorl %ebx,%eax + addl %eax,%edi + movl %ebx,%eax + roll $10,%ebp + addl %edx,%edi + xorl %ebp,%eax + roll $7,%edi + addl %esi,%edi + + movl 56(%esp),%edx + xorl %edi,%eax + addl %edx,%esi + roll $10,%ebx + addl %eax,%esi + movl %edi,%eax + roll $9,%esi + addl %ecx,%esi + + xorl %ebx,%eax + movl 60(%esp),%edx + xorl %esi,%eax + addl %eax,%ecx + movl $-1,%eax + roll $10,%edi + addl %edx,%ecx + movl 28(%esp),%edx + roll $8,%ecx + addl %ebp,%ecx + + addl %edx,%ebp + movl %esi,%edx + subl %ecx,%eax + andl %ecx,%edx + andl %edi,%eax + orl %eax,%edx + movl 16(%esp),%eax + roll $10,%esi + leal 1518500249(%ebp,%edx,1),%ebp + movl $-1,%edx + roll $7,%ebp + addl %ebx,%ebp + + addl %eax,%ebx + movl %ecx,%eax + subl %ebp,%edx + andl %ebp,%eax + andl %esi,%edx + orl %edx,%eax + movl 52(%esp),%edx + roll $10,%ecx + leal 1518500249(%ebx,%eax,1),%ebx + movl $-1,%eax + roll $6,%ebx + addl %edi,%ebx + + addl %edx,%edi + movl %ebp,%edx + subl %ebx,%eax + andl %ebx,%edx + andl %ecx,%eax + orl %eax,%edx + movl 4(%esp),%eax + roll $10,%ebp + leal 1518500249(%edi,%edx,1),%edi + movl $-1,%edx + roll $8,%edi + addl %esi,%edi + + addl %eax,%esi + movl %ebx,%eax + subl %edi,%edx + andl %edi,%eax + andl %ebp,%edx + orl %edx,%eax + movl 40(%esp),%edx + roll $10,%ebx + leal 1518500249(%esi,%eax,1),%esi + movl $-1,%eax + roll $13,%esi + addl %ecx,%esi + + addl %edx,%ecx + movl %edi,%edx + subl %esi,%eax + andl %esi,%edx + andl %ebx,%eax + orl %eax,%edx + movl 24(%esp),%eax + roll $10,%edi + leal 1518500249(%ecx,%edx,1),%ecx + movl $-1,%edx + roll $11,%ecx + addl %ebp,%ecx + + addl %eax,%ebp + movl %esi,%eax + subl %ecx,%edx + andl %ecx,%eax + andl %edi,%edx + orl %edx,%eax + movl 60(%esp),%edx + roll $10,%esi + leal 1518500249(%ebp,%eax,1),%ebp + movl $-1,%eax + roll $9,%ebp + addl %ebx,%ebp + + addl %edx,%ebx + movl %ecx,%edx + subl %ebp,%eax + andl %ebp,%edx + andl %esi,%eax + orl %eax,%edx + movl 12(%esp),%eax + roll $10,%ecx + leal 1518500249(%ebx,%edx,1),%ebx + movl $-1,%edx + roll $7,%ebx + addl %edi,%ebx + + addl %eax,%edi + movl %ebp,%eax + subl %ebx,%edx + andl %ebx,%eax + andl %ecx,%edx + orl %edx,%eax + movl 48(%esp),%edx + roll $10,%ebp + leal 1518500249(%edi,%eax,1),%edi + movl $-1,%eax + roll $15,%edi + addl %esi,%edi + + addl %edx,%esi + movl %ebx,%edx + subl %edi,%eax + andl %edi,%edx + andl %ebp,%eax + orl %eax,%edx + movl (%esp),%eax + roll $10,%ebx + leal 1518500249(%esi,%edx,1),%esi + movl $-1,%edx + roll $7,%esi + addl %ecx,%esi + + addl %eax,%ecx + movl %edi,%eax + subl %esi,%edx + andl %esi,%eax + andl %ebx,%edx + orl %edx,%eax + movl 36(%esp),%edx + roll $10,%edi + leal 1518500249(%ecx,%eax,1),%ecx + movl $-1,%eax + roll $12,%ecx + addl %ebp,%ecx + + addl %edx,%ebp + movl %esi,%edx + subl %ecx,%eax + andl %ecx,%edx + andl %edi,%eax + orl %eax,%edx + movl 20(%esp),%eax + roll $10,%esi + leal 1518500249(%ebp,%edx,1),%ebp + movl $-1,%edx + roll $15,%ebp + addl %ebx,%ebp + + addl %eax,%ebx + movl %ecx,%eax + subl %ebp,%edx + andl %ebp,%eax + andl %esi,%edx + orl %edx,%eax + movl 8(%esp),%edx + roll $10,%ecx + leal 1518500249(%ebx,%eax,1),%ebx + movl $-1,%eax + roll $9,%ebx + addl %edi,%ebx + + addl %edx,%edi + movl %ebp,%edx + subl %ebx,%eax + andl %ebx,%edx + andl %ecx,%eax + orl %eax,%edx + movl 56(%esp),%eax + roll $10,%ebp + leal 1518500249(%edi,%edx,1),%edi + movl $-1,%edx + roll $11,%edi + addl %esi,%edi + + addl %eax,%esi + movl %ebx,%eax + subl %edi,%edx + andl %edi,%eax + andl %ebp,%edx + orl %edx,%eax + movl 44(%esp),%edx + roll $10,%ebx + leal 1518500249(%esi,%eax,1),%esi + movl $-1,%eax + roll $7,%esi + addl %ecx,%esi + + addl %edx,%ecx + movl %edi,%edx + subl %esi,%eax + andl %esi,%edx + andl %ebx,%eax + orl %eax,%edx + movl 32(%esp),%eax + roll $10,%edi + leal 1518500249(%ecx,%edx,1),%ecx + movl $-1,%edx + roll $13,%ecx + addl %ebp,%ecx + + addl %eax,%ebp + movl %esi,%eax + subl %ecx,%edx + andl %ecx,%eax + andl %edi,%edx + orl %edx,%eax + movl $-1,%edx + roll $10,%esi + leal 1518500249(%ebp,%eax,1),%ebp + subl %ecx,%edx + roll $12,%ebp + addl %ebx,%ebp + + movl 12(%esp),%eax + orl %ebp,%edx + addl %eax,%ebx + xorl %esi,%edx + movl $-1,%eax + roll $10,%ecx + leal 1859775393(%ebx,%edx,1),%ebx + subl %ebp,%eax + roll $11,%ebx + addl %edi,%ebx + + movl 40(%esp),%edx + orl %ebx,%eax + addl %edx,%edi + xorl %ecx,%eax + movl $-1,%edx + roll $10,%ebp + leal 1859775393(%edi,%eax,1),%edi + subl %ebx,%edx + roll $13,%edi + addl %esi,%edi + + movl 56(%esp),%eax + orl %edi,%edx + addl %eax,%esi + xorl %ebp,%edx + movl $-1,%eax + roll $10,%ebx + leal 1859775393(%esi,%edx,1),%esi + subl %edi,%eax + roll $6,%esi + addl %ecx,%esi + + movl 16(%esp),%edx + orl %esi,%eax + addl %edx,%ecx + xorl %ebx,%eax + movl $-1,%edx + roll $10,%edi + leal 1859775393(%ecx,%eax,1),%ecx + subl %esi,%edx + roll $7,%ecx + addl %ebp,%ecx + + movl 36(%esp),%eax + orl %ecx,%edx + addl %eax,%ebp + xorl %edi,%edx + movl $-1,%eax + roll $10,%esi + leal 1859775393(%ebp,%edx,1),%ebp + subl %ecx,%eax + roll $14,%ebp + addl %ebx,%ebp + + movl 60(%esp),%edx + orl %ebp,%eax + addl %edx,%ebx + xorl %esi,%eax + movl $-1,%edx + roll $10,%ecx + leal 1859775393(%ebx,%eax,1),%ebx + subl %ebp,%edx + roll $9,%ebx + addl %edi,%ebx + + movl 32(%esp),%eax + orl %ebx,%edx + addl %eax,%edi + xorl %ecx,%edx + movl $-1,%eax + roll $10,%ebp + leal 1859775393(%edi,%edx,1),%edi + subl %ebx,%eax + roll $13,%edi + addl %esi,%edi + + movl 4(%esp),%edx + orl %edi,%eax + addl %edx,%esi + xorl %ebp,%eax + movl $-1,%edx + roll $10,%ebx + leal 1859775393(%esi,%eax,1),%esi + subl %edi,%edx + roll $15,%esi + addl %ecx,%esi + + movl 8(%esp),%eax + orl %esi,%edx + addl %eax,%ecx + xorl %ebx,%edx + movl $-1,%eax + roll $10,%edi + leal 1859775393(%ecx,%edx,1),%ecx + subl %esi,%eax + roll $14,%ecx + addl %ebp,%ecx + + movl 28(%esp),%edx + orl %ecx,%eax + addl %edx,%ebp + xorl %edi,%eax + movl $-1,%edx + roll $10,%esi + leal 1859775393(%ebp,%eax,1),%ebp + subl %ecx,%edx + roll $8,%ebp + addl %ebx,%ebp + + movl (%esp),%eax + orl %ebp,%edx + addl %eax,%ebx + xorl %esi,%edx + movl $-1,%eax + roll $10,%ecx + leal 1859775393(%ebx,%edx,1),%ebx + subl %ebp,%eax + roll $13,%ebx + addl %edi,%ebx + + movl 24(%esp),%edx + orl %ebx,%eax + addl %edx,%edi + xorl %ecx,%eax + movl $-1,%edx + roll $10,%ebp + leal 1859775393(%edi,%eax,1),%edi + subl %ebx,%edx + roll $6,%edi + addl %esi,%edi + + movl 52(%esp),%eax + orl %edi,%edx + addl %eax,%esi + xorl %ebp,%edx + movl $-1,%eax + roll $10,%ebx + leal 1859775393(%esi,%edx,1),%esi + subl %edi,%eax + roll $5,%esi + addl %ecx,%esi + + movl 44(%esp),%edx + orl %esi,%eax + addl %edx,%ecx + xorl %ebx,%eax + movl $-1,%edx + roll $10,%edi + leal 1859775393(%ecx,%eax,1),%ecx + subl %esi,%edx + roll $12,%ecx + addl %ebp,%ecx + + movl 20(%esp),%eax + orl %ecx,%edx + addl %eax,%ebp + xorl %edi,%edx + movl $-1,%eax + roll $10,%esi + leal 1859775393(%ebp,%edx,1),%ebp + subl %ecx,%eax + roll $7,%ebp + addl %ebx,%ebp + + movl 48(%esp),%edx + orl %ebp,%eax + addl %edx,%ebx + xorl %esi,%eax + movl $-1,%edx + roll $10,%ecx + leal 1859775393(%ebx,%eax,1),%ebx + movl %ecx,%eax + roll $5,%ebx + addl %edi,%ebx + + subl %ecx,%edx + andl %ebx,%eax + andl %ebp,%edx + orl %eax,%edx + movl 4(%esp),%eax + roll $10,%ebp + leal 2400959708(%edi,%edx,1),%edi + movl $-1,%edx + addl %eax,%edi + movl %ebp,%eax + roll $11,%edi + addl %esi,%edi + + subl %ebp,%edx + andl %edi,%eax + andl %ebx,%edx + orl %eax,%edx + movl 36(%esp),%eax + roll $10,%ebx + leal 2400959708(%esi,%edx,1),%esi + movl $-1,%edx + addl %eax,%esi + movl %ebx,%eax + roll $12,%esi + addl %ecx,%esi + + subl %ebx,%edx + andl %esi,%eax + andl %edi,%edx + orl %eax,%edx + movl 44(%esp),%eax + roll $10,%edi + leal 2400959708(%ecx,%edx,1),%ecx + movl $-1,%edx + addl %eax,%ecx + movl %edi,%eax + roll $14,%ecx + addl %ebp,%ecx + + subl %edi,%edx + andl %ecx,%eax + andl %esi,%edx + orl %eax,%edx + movl 40(%esp),%eax + roll $10,%esi + leal 2400959708(%ebp,%edx,1),%ebp + movl $-1,%edx + addl %eax,%ebp + movl %esi,%eax + roll $15,%ebp + addl %ebx,%ebp + + subl %esi,%edx + andl %ebp,%eax + andl %ecx,%edx + orl %eax,%edx + movl (%esp),%eax + roll $10,%ecx + leal 2400959708(%ebx,%edx,1),%ebx + movl $-1,%edx + addl %eax,%ebx + movl %ecx,%eax + roll $14,%ebx + addl %edi,%ebx + + subl %ecx,%edx + andl %ebx,%eax + andl %ebp,%edx + orl %eax,%edx + movl 32(%esp),%eax + roll $10,%ebp + leal 2400959708(%edi,%edx,1),%edi + movl $-1,%edx + addl %eax,%edi + movl %ebp,%eax + roll $15,%edi + addl %esi,%edi + + subl %ebp,%edx + andl %edi,%eax + andl %ebx,%edx + orl %eax,%edx + movl 48(%esp),%eax + roll $10,%ebx + leal 2400959708(%esi,%edx,1),%esi + movl $-1,%edx + addl %eax,%esi + movl %ebx,%eax + roll $9,%esi + addl %ecx,%esi + + subl %ebx,%edx + andl %esi,%eax + andl %edi,%edx + orl %eax,%edx + movl 16(%esp),%eax + roll $10,%edi + leal 2400959708(%ecx,%edx,1),%ecx + movl $-1,%edx + addl %eax,%ecx + movl %edi,%eax + roll $8,%ecx + addl %ebp,%ecx + + subl %edi,%edx + andl %ecx,%eax + andl %esi,%edx + orl %eax,%edx + movl 52(%esp),%eax + roll $10,%esi + leal 2400959708(%ebp,%edx,1),%ebp + movl $-1,%edx + addl %eax,%ebp + movl %esi,%eax + roll $9,%ebp + addl %ebx,%ebp + + subl %esi,%edx + andl %ebp,%eax + andl %ecx,%edx + orl %eax,%edx + movl 12(%esp),%eax + roll $10,%ecx + leal 2400959708(%ebx,%edx,1),%ebx + movl $-1,%edx + addl %eax,%ebx + movl %ecx,%eax + roll $14,%ebx + addl %edi,%ebx + + subl %ecx,%edx + andl %ebx,%eax + andl %ebp,%edx + orl %eax,%edx + movl 28(%esp),%eax + roll $10,%ebp + leal 2400959708(%edi,%edx,1),%edi + movl $-1,%edx + addl %eax,%edi + movl %ebp,%eax + roll $5,%edi + addl %esi,%edi + + subl %ebp,%edx + andl %edi,%eax + andl %ebx,%edx + orl %eax,%edx + movl 60(%esp),%eax + roll $10,%ebx + leal 2400959708(%esi,%edx,1),%esi + movl $-1,%edx + addl %eax,%esi + movl %ebx,%eax + roll $6,%esi + addl %ecx,%esi + + subl %ebx,%edx + andl %esi,%eax + andl %edi,%edx + orl %eax,%edx + movl 56(%esp),%eax + roll $10,%edi + leal 2400959708(%ecx,%edx,1),%ecx + movl $-1,%edx + addl %eax,%ecx + movl %edi,%eax + roll $8,%ecx + addl %ebp,%ecx + + subl %edi,%edx + andl %ecx,%eax + andl %esi,%edx + orl %eax,%edx + movl 20(%esp),%eax + roll $10,%esi + leal 2400959708(%ebp,%edx,1),%ebp + movl $-1,%edx + addl %eax,%ebp + movl %esi,%eax + roll $6,%ebp + addl %ebx,%ebp + + subl %esi,%edx + andl %ebp,%eax + andl %ecx,%edx + orl %eax,%edx + movl 24(%esp),%eax + roll $10,%ecx + leal 2400959708(%ebx,%edx,1),%ebx + movl $-1,%edx + addl %eax,%ebx + movl %ecx,%eax + roll $5,%ebx + addl %edi,%ebx + + subl %ecx,%edx + andl %ebx,%eax + andl %ebp,%edx + orl %eax,%edx + movl 8(%esp),%eax + roll $10,%ebp + leal 2400959708(%edi,%edx,1),%edi + movl $-1,%edx + addl %eax,%edi + subl %ebp,%edx + roll $12,%edi + addl %esi,%edi + + movl 16(%esp),%eax + orl %ebx,%edx + addl %eax,%esi + xorl %edi,%edx + movl $-1,%eax + roll $10,%ebx + leal 2840853838(%esi,%edx,1),%esi + subl %ebx,%eax + roll $9,%esi + addl %ecx,%esi + + movl (%esp),%edx + orl %edi,%eax + addl %edx,%ecx + xorl %esi,%eax + movl $-1,%edx + roll $10,%edi + leal 2840853838(%ecx,%eax,1),%ecx + subl %edi,%edx + roll $15,%ecx + addl %ebp,%ecx + + movl 20(%esp),%eax + orl %esi,%edx + addl %eax,%ebp + xorl %ecx,%edx + movl $-1,%eax + roll $10,%esi + leal 2840853838(%ebp,%edx,1),%ebp + subl %esi,%eax + roll $5,%ebp + addl %ebx,%ebp + + movl 36(%esp),%edx + orl %ecx,%eax + addl %edx,%ebx + xorl %ebp,%eax + movl $-1,%edx + roll $10,%ecx + leal 2840853838(%ebx,%eax,1),%ebx + subl %ecx,%edx + roll $11,%ebx + addl %edi,%ebx + + movl 28(%esp),%eax + orl %ebp,%edx + addl %eax,%edi + xorl %ebx,%edx + movl $-1,%eax + roll $10,%ebp + leal 2840853838(%edi,%edx,1),%edi + subl %ebp,%eax + roll $6,%edi + addl %esi,%edi + + movl 48(%esp),%edx + orl %ebx,%eax + addl %edx,%esi + xorl %edi,%eax + movl $-1,%edx + roll $10,%ebx + leal 2840853838(%esi,%eax,1),%esi + subl %ebx,%edx + roll $8,%esi + addl %ecx,%esi + + movl 8(%esp),%eax + orl %edi,%edx + addl %eax,%ecx + xorl %esi,%edx + movl $-1,%eax + roll $10,%edi + leal 2840853838(%ecx,%edx,1),%ecx + subl %edi,%eax + roll $13,%ecx + addl %ebp,%ecx + + movl 40(%esp),%edx + orl %esi,%eax + addl %edx,%ebp + xorl %ecx,%eax + movl $-1,%edx + roll $10,%esi + leal 2840853838(%ebp,%eax,1),%ebp + subl %esi,%edx + roll $12,%ebp + addl %ebx,%ebp + + movl 56(%esp),%eax + orl %ecx,%edx + addl %eax,%ebx + xorl %ebp,%edx + movl $-1,%eax + roll $10,%ecx + leal 2840853838(%ebx,%edx,1),%ebx + subl %ecx,%eax + roll $5,%ebx + addl %edi,%ebx + + movl 4(%esp),%edx + orl %ebp,%eax + addl %edx,%edi + xorl %ebx,%eax + movl $-1,%edx + roll $10,%ebp + leal 2840853838(%edi,%eax,1),%edi + subl %ebp,%edx + roll $12,%edi + addl %esi,%edi + + movl 12(%esp),%eax + orl %ebx,%edx + addl %eax,%esi + xorl %edi,%edx + movl $-1,%eax + roll $10,%ebx + leal 2840853838(%esi,%edx,1),%esi + subl %ebx,%eax + roll $13,%esi + addl %ecx,%esi + + movl 32(%esp),%edx + orl %edi,%eax + addl %edx,%ecx + xorl %esi,%eax + movl $-1,%edx + roll $10,%edi + leal 2840853838(%ecx,%eax,1),%ecx + subl %edi,%edx + roll $14,%ecx + addl %ebp,%ecx + + movl 44(%esp),%eax + orl %esi,%edx + addl %eax,%ebp + xorl %ecx,%edx + movl $-1,%eax + roll $10,%esi + leal 2840853838(%ebp,%edx,1),%ebp + subl %esi,%eax + roll $11,%ebp + addl %ebx,%ebp + + movl 24(%esp),%edx + orl %ecx,%eax + addl %edx,%ebx + xorl %ebp,%eax + movl $-1,%edx + roll $10,%ecx + leal 2840853838(%ebx,%eax,1),%ebx + subl %ecx,%edx + roll $8,%ebx + addl %edi,%ebx + + movl 60(%esp),%eax + orl %ebp,%edx + addl %eax,%edi + xorl %ebx,%edx + movl $-1,%eax + roll $10,%ebp + leal 2840853838(%edi,%edx,1),%edi + subl %ebp,%eax + roll $5,%edi + addl %esi,%edi + + movl 52(%esp),%edx + orl %ebx,%eax + addl %edx,%esi + xorl %edi,%eax + movl 128(%esp),%edx + roll $10,%ebx + leal 2840853838(%esi,%eax,1),%esi + movl %ecx,64(%esp) + roll $6,%esi + addl %ecx,%esi + movl (%edx),%ecx + movl %esi,68(%esp) + movl %edi,72(%esp) + movl 4(%edx),%esi + movl %ebx,76(%esp) + movl 8(%edx),%edi + movl %ebp,80(%esp) + movl 12(%edx),%ebx + movl 16(%edx),%ebp + + movl $-1,%edx + subl %ebx,%edx + movl 20(%esp),%eax + orl %edi,%edx + addl %eax,%ecx + xorl %esi,%edx + movl $-1,%eax + roll $10,%edi + leal 1352829926(%ecx,%edx,1),%ecx + subl %edi,%eax + roll $8,%ecx + addl %ebp,%ecx + + movl 56(%esp),%edx + orl %esi,%eax + addl %edx,%ebp + xorl %ecx,%eax + movl $-1,%edx + roll $10,%esi + leal 1352829926(%ebp,%eax,1),%ebp + subl %esi,%edx + roll $9,%ebp + addl %ebx,%ebp + + movl 28(%esp),%eax + orl %ecx,%edx + addl %eax,%ebx + xorl %ebp,%edx + movl $-1,%eax + roll $10,%ecx + leal 1352829926(%ebx,%edx,1),%ebx + subl %ecx,%eax + roll $9,%ebx + addl %edi,%ebx + + movl (%esp),%edx + orl %ebp,%eax + addl %edx,%edi + xorl %ebx,%eax + movl $-1,%edx + roll $10,%ebp + leal 1352829926(%edi,%eax,1),%edi + subl %ebp,%edx + roll $11,%edi + addl %esi,%edi + + movl 36(%esp),%eax + orl %ebx,%edx + addl %eax,%esi + xorl %edi,%edx + movl $-1,%eax + roll $10,%ebx + leal 1352829926(%esi,%edx,1),%esi + subl %ebx,%eax + roll $13,%esi + addl %ecx,%esi + + movl 8(%esp),%edx + orl %edi,%eax + addl %edx,%ecx + xorl %esi,%eax + movl $-1,%edx + roll $10,%edi + leal 1352829926(%ecx,%eax,1),%ecx + subl %edi,%edx + roll $15,%ecx + addl %ebp,%ecx + + movl 44(%esp),%eax + orl %esi,%edx + addl %eax,%ebp + xorl %ecx,%edx + movl $-1,%eax + roll $10,%esi + leal 1352829926(%ebp,%edx,1),%ebp + subl %esi,%eax + roll $15,%ebp + addl %ebx,%ebp + + movl 16(%esp),%edx + orl %ecx,%eax + addl %edx,%ebx + xorl %ebp,%eax + movl $-1,%edx + roll $10,%ecx + leal 1352829926(%ebx,%eax,1),%ebx + subl %ecx,%edx + roll $5,%ebx + addl %edi,%ebx + + movl 52(%esp),%eax + orl %ebp,%edx + addl %eax,%edi + xorl %ebx,%edx + movl $-1,%eax + roll $10,%ebp + leal 1352829926(%edi,%edx,1),%edi + subl %ebp,%eax + roll $7,%edi + addl %esi,%edi + + movl 24(%esp),%edx + orl %ebx,%eax + addl %edx,%esi + xorl %edi,%eax + movl $-1,%edx + roll $10,%ebx + leal 1352829926(%esi,%eax,1),%esi + subl %ebx,%edx + roll $7,%esi + addl %ecx,%esi + + movl 60(%esp),%eax + orl %edi,%edx + addl %eax,%ecx + xorl %esi,%edx + movl $-1,%eax + roll $10,%edi + leal 1352829926(%ecx,%edx,1),%ecx + subl %edi,%eax + roll $8,%ecx + addl %ebp,%ecx + + movl 32(%esp),%edx + orl %esi,%eax + addl %edx,%ebp + xorl %ecx,%eax + movl $-1,%edx + roll $10,%esi + leal 1352829926(%ebp,%eax,1),%ebp + subl %esi,%edx + roll $11,%ebp + addl %ebx,%ebp + + movl 4(%esp),%eax + orl %ecx,%edx + addl %eax,%ebx + xorl %ebp,%edx + movl $-1,%eax + roll $10,%ecx + leal 1352829926(%ebx,%edx,1),%ebx + subl %ecx,%eax + roll $14,%ebx + addl %edi,%ebx + + movl 40(%esp),%edx + orl %ebp,%eax + addl %edx,%edi + xorl %ebx,%eax + movl $-1,%edx + roll $10,%ebp + leal 1352829926(%edi,%eax,1),%edi + subl %ebp,%edx + roll $14,%edi + addl %esi,%edi + + movl 12(%esp),%eax + orl %ebx,%edx + addl %eax,%esi + xorl %edi,%edx + movl $-1,%eax + roll $10,%ebx + leal 1352829926(%esi,%edx,1),%esi + subl %ebx,%eax + roll $12,%esi + addl %ecx,%esi + + movl 48(%esp),%edx + orl %edi,%eax + addl %edx,%ecx + xorl %esi,%eax + movl $-1,%edx + roll $10,%edi + leal 1352829926(%ecx,%eax,1),%ecx + movl %edi,%eax + roll $6,%ecx + addl %ebp,%ecx + + subl %edi,%edx + andl %ecx,%eax + andl %esi,%edx + orl %eax,%edx + movl 24(%esp),%eax + roll $10,%esi + leal 1548603684(%ebp,%edx,1),%ebp + movl $-1,%edx + addl %eax,%ebp + movl %esi,%eax + roll $9,%ebp + addl %ebx,%ebp + + subl %esi,%edx + andl %ebp,%eax + andl %ecx,%edx + orl %eax,%edx + movl 44(%esp),%eax + roll $10,%ecx + leal 1548603684(%ebx,%edx,1),%ebx + movl $-1,%edx + addl %eax,%ebx + movl %ecx,%eax + roll $13,%ebx + addl %edi,%ebx + + subl %ecx,%edx + andl %ebx,%eax + andl %ebp,%edx + orl %eax,%edx + movl 12(%esp),%eax + roll $10,%ebp + leal 1548603684(%edi,%edx,1),%edi + movl $-1,%edx + addl %eax,%edi + movl %ebp,%eax + roll $15,%edi + addl %esi,%edi + + subl %ebp,%edx + andl %edi,%eax + andl %ebx,%edx + orl %eax,%edx + movl 28(%esp),%eax + roll $10,%ebx + leal 1548603684(%esi,%edx,1),%esi + movl $-1,%edx + addl %eax,%esi + movl %ebx,%eax + roll $7,%esi + addl %ecx,%esi + + subl %ebx,%edx + andl %esi,%eax + andl %edi,%edx + orl %eax,%edx + movl (%esp),%eax + roll $10,%edi + leal 1548603684(%ecx,%edx,1),%ecx + movl $-1,%edx + addl %eax,%ecx + movl %edi,%eax + roll $12,%ecx + addl %ebp,%ecx + + subl %edi,%edx + andl %ecx,%eax + andl %esi,%edx + orl %eax,%edx + movl 52(%esp),%eax + roll $10,%esi + leal 1548603684(%ebp,%edx,1),%ebp + movl $-1,%edx + addl %eax,%ebp + movl %esi,%eax + roll $8,%ebp + addl %ebx,%ebp + + subl %esi,%edx + andl %ebp,%eax + andl %ecx,%edx + orl %eax,%edx + movl 20(%esp),%eax + roll $10,%ecx + leal 1548603684(%ebx,%edx,1),%ebx + movl $-1,%edx + addl %eax,%ebx + movl %ecx,%eax + roll $9,%ebx + addl %edi,%ebx + + subl %ecx,%edx + andl %ebx,%eax + andl %ebp,%edx + orl %eax,%edx + movl 40(%esp),%eax + roll $10,%ebp + leal 1548603684(%edi,%edx,1),%edi + movl $-1,%edx + addl %eax,%edi + movl %ebp,%eax + roll $11,%edi + addl %esi,%edi + + subl %ebp,%edx + andl %edi,%eax + andl %ebx,%edx + orl %eax,%edx + movl 56(%esp),%eax + roll $10,%ebx + leal 1548603684(%esi,%edx,1),%esi + movl $-1,%edx + addl %eax,%esi + movl %ebx,%eax + roll $7,%esi + addl %ecx,%esi + + subl %ebx,%edx + andl %esi,%eax + andl %edi,%edx + orl %eax,%edx + movl 60(%esp),%eax + roll $10,%edi + leal 1548603684(%ecx,%edx,1),%ecx + movl $-1,%edx + addl %eax,%ecx + movl %edi,%eax + roll $7,%ecx + addl %ebp,%ecx + + subl %edi,%edx + andl %ecx,%eax + andl %esi,%edx + orl %eax,%edx + movl 32(%esp),%eax + roll $10,%esi + leal 1548603684(%ebp,%edx,1),%ebp + movl $-1,%edx + addl %eax,%ebp + movl %esi,%eax + roll $12,%ebp + addl %ebx,%ebp + + subl %esi,%edx + andl %ebp,%eax + andl %ecx,%edx + orl %eax,%edx + movl 48(%esp),%eax + roll $10,%ecx + leal 1548603684(%ebx,%edx,1),%ebx + movl $-1,%edx + addl %eax,%ebx + movl %ecx,%eax + roll $7,%ebx + addl %edi,%ebx + + subl %ecx,%edx + andl %ebx,%eax + andl %ebp,%edx + orl %eax,%edx + movl 16(%esp),%eax + roll $10,%ebp + leal 1548603684(%edi,%edx,1),%edi + movl $-1,%edx + addl %eax,%edi + movl %ebp,%eax + roll $6,%edi + addl %esi,%edi + + subl %ebp,%edx + andl %edi,%eax + andl %ebx,%edx + orl %eax,%edx + movl 36(%esp),%eax + roll $10,%ebx + leal 1548603684(%esi,%edx,1),%esi + movl $-1,%edx + addl %eax,%esi + movl %ebx,%eax + roll $15,%esi + addl %ecx,%esi + + subl %ebx,%edx + andl %esi,%eax + andl %edi,%edx + orl %eax,%edx + movl 4(%esp),%eax + roll $10,%edi + leal 1548603684(%ecx,%edx,1),%ecx + movl $-1,%edx + addl %eax,%ecx + movl %edi,%eax + roll $13,%ecx + addl %ebp,%ecx + + subl %edi,%edx + andl %ecx,%eax + andl %esi,%edx + orl %eax,%edx + movl 8(%esp),%eax + roll $10,%esi + leal 1548603684(%ebp,%edx,1),%ebp + movl $-1,%edx + addl %eax,%ebp + subl %ecx,%edx + roll $11,%ebp + addl %ebx,%ebp + + movl 60(%esp),%eax + orl %ebp,%edx + addl %eax,%ebx + xorl %esi,%edx + movl $-1,%eax + roll $10,%ecx + leal 1836072691(%ebx,%edx,1),%ebx + subl %ebp,%eax + roll $9,%ebx + addl %edi,%ebx + + movl 20(%esp),%edx + orl %ebx,%eax + addl %edx,%edi + xorl %ecx,%eax + movl $-1,%edx + roll $10,%ebp + leal 1836072691(%edi,%eax,1),%edi + subl %ebx,%edx + roll $7,%edi + addl %esi,%edi + + movl 4(%esp),%eax + orl %edi,%edx + addl %eax,%esi + xorl %ebp,%edx + movl $-1,%eax + roll $10,%ebx + leal 1836072691(%esi,%edx,1),%esi + subl %edi,%eax + roll $15,%esi + addl %ecx,%esi + + movl 12(%esp),%edx + orl %esi,%eax + addl %edx,%ecx + xorl %ebx,%eax + movl $-1,%edx + roll $10,%edi + leal 1836072691(%ecx,%eax,1),%ecx + subl %esi,%edx + roll $11,%ecx + addl %ebp,%ecx + + movl 28(%esp),%eax + orl %ecx,%edx + addl %eax,%ebp + xorl %edi,%edx + movl $-1,%eax + roll $10,%esi + leal 1836072691(%ebp,%edx,1),%ebp + subl %ecx,%eax + roll $8,%ebp + addl %ebx,%ebp + + movl 56(%esp),%edx + orl %ebp,%eax + addl %edx,%ebx + xorl %esi,%eax + movl $-1,%edx + roll $10,%ecx + leal 1836072691(%ebx,%eax,1),%ebx + subl %ebp,%edx + roll $6,%ebx + addl %edi,%ebx + + movl 24(%esp),%eax + orl %ebx,%edx + addl %eax,%edi + xorl %ecx,%edx + movl $-1,%eax + roll $10,%ebp + leal 1836072691(%edi,%edx,1),%edi + subl %ebx,%eax + roll $6,%edi + addl %esi,%edi + + movl 36(%esp),%edx + orl %edi,%eax + addl %edx,%esi + xorl %ebp,%eax + movl $-1,%edx + roll $10,%ebx + leal 1836072691(%esi,%eax,1),%esi + subl %edi,%edx + roll $14,%esi + addl %ecx,%esi + + movl 44(%esp),%eax + orl %esi,%edx + addl %eax,%ecx + xorl %ebx,%edx + movl $-1,%eax + roll $10,%edi + leal 1836072691(%ecx,%edx,1),%ecx + subl %esi,%eax + roll $12,%ecx + addl %ebp,%ecx + + movl 32(%esp),%edx + orl %ecx,%eax + addl %edx,%ebp + xorl %edi,%eax + movl $-1,%edx + roll $10,%esi + leal 1836072691(%ebp,%eax,1),%ebp + subl %ecx,%edx + roll $13,%ebp + addl %ebx,%ebp + + movl 48(%esp),%eax + orl %ebp,%edx + addl %eax,%ebx + xorl %esi,%edx + movl $-1,%eax + roll $10,%ecx + leal 1836072691(%ebx,%edx,1),%ebx + subl %ebp,%eax + roll $5,%ebx + addl %edi,%ebx + + movl 8(%esp),%edx + orl %ebx,%eax + addl %edx,%edi + xorl %ecx,%eax + movl $-1,%edx + roll $10,%ebp + leal 1836072691(%edi,%eax,1),%edi + subl %ebx,%edx + roll $14,%edi + addl %esi,%edi + + movl 40(%esp),%eax + orl %edi,%edx + addl %eax,%esi + xorl %ebp,%edx + movl $-1,%eax + roll $10,%ebx + leal 1836072691(%esi,%edx,1),%esi + subl %edi,%eax + roll $13,%esi + addl %ecx,%esi + + movl (%esp),%edx + orl %esi,%eax + addl %edx,%ecx + xorl %ebx,%eax + movl $-1,%edx + roll $10,%edi + leal 1836072691(%ecx,%eax,1),%ecx + subl %esi,%edx + roll $13,%ecx + addl %ebp,%ecx + + movl 16(%esp),%eax + orl %ecx,%edx + addl %eax,%ebp + xorl %edi,%edx + movl $-1,%eax + roll $10,%esi + leal 1836072691(%ebp,%edx,1),%ebp + subl %ecx,%eax + roll $7,%ebp + addl %ebx,%ebp + + movl 52(%esp),%edx + orl %ebp,%eax + addl %edx,%ebx + xorl %esi,%eax + movl 32(%esp),%edx + roll $10,%ecx + leal 1836072691(%ebx,%eax,1),%ebx + movl $-1,%eax + roll $5,%ebx + addl %edi,%ebx + + addl %edx,%edi + movl %ebp,%edx + subl %ebx,%eax + andl %ebx,%edx + andl %ecx,%eax + orl %eax,%edx + movl 24(%esp),%eax + roll $10,%ebp + leal 2053994217(%edi,%edx,1),%edi + movl $-1,%edx + roll $15,%edi + addl %esi,%edi + + addl %eax,%esi + movl %ebx,%eax + subl %edi,%edx + andl %edi,%eax + andl %ebp,%edx + orl %edx,%eax + movl 16(%esp),%edx + roll $10,%ebx + leal 2053994217(%esi,%eax,1),%esi + movl $-1,%eax + roll $5,%esi + addl %ecx,%esi + + addl %edx,%ecx + movl %edi,%edx + subl %esi,%eax + andl %esi,%edx + andl %ebx,%eax + orl %eax,%edx + movl 4(%esp),%eax + roll $10,%edi + leal 2053994217(%ecx,%edx,1),%ecx + movl $-1,%edx + roll $8,%ecx + addl %ebp,%ecx + + addl %eax,%ebp + movl %esi,%eax + subl %ecx,%edx + andl %ecx,%eax + andl %edi,%edx + orl %edx,%eax + movl 12(%esp),%edx + roll $10,%esi + leal 2053994217(%ebp,%eax,1),%ebp + movl $-1,%eax + roll $11,%ebp + addl %ebx,%ebp + + addl %edx,%ebx + movl %ecx,%edx + subl %ebp,%eax + andl %ebp,%edx + andl %esi,%eax + orl %eax,%edx + movl 44(%esp),%eax + roll $10,%ecx + leal 2053994217(%ebx,%edx,1),%ebx + movl $-1,%edx + roll $14,%ebx + addl %edi,%ebx + + addl %eax,%edi + movl %ebp,%eax + subl %ebx,%edx + andl %ebx,%eax + andl %ecx,%edx + orl %edx,%eax + movl 60(%esp),%edx + roll $10,%ebp + leal 2053994217(%edi,%eax,1),%edi + movl $-1,%eax + roll $14,%edi + addl %esi,%edi + + addl %edx,%esi + movl %ebx,%edx + subl %edi,%eax + andl %edi,%edx + andl %ebp,%eax + orl %eax,%edx + movl (%esp),%eax + roll $10,%ebx + leal 2053994217(%esi,%edx,1),%esi + movl $-1,%edx + roll $6,%esi + addl %ecx,%esi + + addl %eax,%ecx + movl %edi,%eax + subl %esi,%edx + andl %esi,%eax + andl %ebx,%edx + orl %edx,%eax + movl 20(%esp),%edx + roll $10,%edi + leal 2053994217(%ecx,%eax,1),%ecx + movl $-1,%eax + roll $14,%ecx + addl %ebp,%ecx + + addl %edx,%ebp + movl %esi,%edx + subl %ecx,%eax + andl %ecx,%edx + andl %edi,%eax + orl %eax,%edx + movl 48(%esp),%eax + roll $10,%esi + leal 2053994217(%ebp,%edx,1),%ebp + movl $-1,%edx + roll $6,%ebp + addl %ebx,%ebp + + addl %eax,%ebx + movl %ecx,%eax + subl %ebp,%edx + andl %ebp,%eax + andl %esi,%edx + orl %edx,%eax + movl 8(%esp),%edx + roll $10,%ecx + leal 2053994217(%ebx,%eax,1),%ebx + movl $-1,%eax + roll $9,%ebx + addl %edi,%ebx + + addl %edx,%edi + movl %ebp,%edx + subl %ebx,%eax + andl %ebx,%edx + andl %ecx,%eax + orl %eax,%edx + movl 52(%esp),%eax + roll $10,%ebp + leal 2053994217(%edi,%edx,1),%edi + movl $-1,%edx + roll $12,%edi + addl %esi,%edi + + addl %eax,%esi + movl %ebx,%eax + subl %edi,%edx + andl %edi,%eax + andl %ebp,%edx + orl %edx,%eax + movl 36(%esp),%edx + roll $10,%ebx + leal 2053994217(%esi,%eax,1),%esi + movl $-1,%eax + roll $9,%esi + addl %ecx,%esi + + addl %edx,%ecx + movl %edi,%edx + subl %esi,%eax + andl %esi,%edx + andl %ebx,%eax + orl %eax,%edx + movl 28(%esp),%eax + roll $10,%edi + leal 2053994217(%ecx,%edx,1),%ecx + movl $-1,%edx + roll $12,%ecx + addl %ebp,%ecx + + addl %eax,%ebp + movl %esi,%eax + subl %ecx,%edx + andl %ecx,%eax + andl %edi,%edx + orl %edx,%eax + movl 40(%esp),%edx + roll $10,%esi + leal 2053994217(%ebp,%eax,1),%ebp + movl $-1,%eax + roll $5,%ebp + addl %ebx,%ebp + + addl %edx,%ebx + movl %ecx,%edx + subl %ebp,%eax + andl %ebp,%edx + andl %esi,%eax + orl %eax,%edx + movl 56(%esp),%eax + roll $10,%ecx + leal 2053994217(%ebx,%edx,1),%ebx + movl $-1,%edx + roll $15,%ebx + addl %edi,%ebx + + addl %eax,%edi + movl %ebp,%eax + subl %ebx,%edx + andl %ebx,%eax + andl %ecx,%edx + orl %eax,%edx + movl %ebx,%eax + roll $10,%ebp + leal 2053994217(%edi,%edx,1),%edi + xorl %ebp,%eax + roll $8,%edi + addl %esi,%edi + + movl 48(%esp),%edx + xorl %edi,%eax + addl %edx,%esi + roll $10,%ebx + addl %eax,%esi + movl %edi,%eax + roll $8,%esi + addl %ecx,%esi + + xorl %ebx,%eax + movl 60(%esp),%edx + xorl %esi,%eax + addl %eax,%ecx + movl %esi,%eax + roll $10,%edi + addl %edx,%ecx + xorl %edi,%eax + roll $5,%ecx + addl %ebp,%ecx + + movl 40(%esp),%edx + xorl %ecx,%eax + addl %edx,%ebp + roll $10,%esi + addl %eax,%ebp + movl %ecx,%eax + roll $12,%ebp + addl %ebx,%ebp + + xorl %esi,%eax + movl 16(%esp),%edx + xorl %ebp,%eax + addl %eax,%ebx + movl %ebp,%eax + roll $10,%ecx + addl %edx,%ebx + xorl %ecx,%eax + roll $9,%ebx + addl %edi,%ebx + + movl 4(%esp),%edx + xorl %ebx,%eax + addl %edx,%edi + roll $10,%ebp + addl %eax,%edi + movl %ebx,%eax + roll $12,%edi + addl %esi,%edi + + xorl %ebp,%eax + movl 20(%esp),%edx + xorl %edi,%eax + addl %eax,%esi + movl %edi,%eax + roll $10,%ebx + addl %edx,%esi + xorl %ebx,%eax + roll $5,%esi + addl %ecx,%esi + + movl 32(%esp),%edx + xorl %esi,%eax + addl %edx,%ecx + roll $10,%edi + addl %eax,%ecx + movl %esi,%eax + roll $14,%ecx + addl %ebp,%ecx + + xorl %edi,%eax + movl 28(%esp),%edx + xorl %ecx,%eax + addl %eax,%ebp + movl %ecx,%eax + roll $10,%esi + addl %edx,%ebp + xorl %esi,%eax + roll $6,%ebp + addl %ebx,%ebp + + movl 24(%esp),%edx + xorl %ebp,%eax + addl %edx,%ebx + roll $10,%ecx + addl %eax,%ebx + movl %ebp,%eax + roll $8,%ebx + addl %edi,%ebx + + xorl %ecx,%eax + movl 8(%esp),%edx + xorl %ebx,%eax + addl %eax,%edi + movl %ebx,%eax + roll $10,%ebp + addl %edx,%edi + xorl %ebp,%eax + roll $13,%edi + addl %esi,%edi + + movl 52(%esp),%edx + xorl %edi,%eax + addl %edx,%esi + roll $10,%ebx + addl %eax,%esi + movl %edi,%eax + roll $6,%esi + addl %ecx,%esi + + xorl %ebx,%eax + movl 56(%esp),%edx + xorl %esi,%eax + addl %eax,%ecx + movl %esi,%eax + roll $10,%edi + addl %edx,%ecx + xorl %edi,%eax + roll $5,%ecx + addl %ebp,%ecx + + movl (%esp),%edx + xorl %ecx,%eax + addl %edx,%ebp + roll $10,%esi + addl %eax,%ebp + movl %ecx,%eax + roll $15,%ebp + addl %ebx,%ebp + + xorl %esi,%eax + movl 12(%esp),%edx + xorl %ebp,%eax + addl %eax,%ebx + movl %ebp,%eax + roll $10,%ecx + addl %edx,%ebx + xorl %ecx,%eax + roll $13,%ebx + addl %edi,%ebx + + movl 36(%esp),%edx + xorl %ebx,%eax + addl %edx,%edi + roll $10,%ebp + addl %eax,%edi + movl %ebx,%eax + roll $11,%edi + addl %esi,%edi + + xorl %ebp,%eax + movl 44(%esp),%edx + xorl %edi,%eax + addl %eax,%esi + roll $10,%ebx + addl %edx,%esi + movl 128(%esp),%edx + roll $11,%esi + addl %ecx,%esi + movl 4(%edx),%eax + addl %eax,%ebx + movl 72(%esp),%eax + addl %eax,%ebx + movl 8(%edx),%eax + addl %eax,%ebp + movl 76(%esp),%eax + addl %eax,%ebp + movl 12(%edx),%eax + addl %eax,%ecx + movl 80(%esp),%eax + addl %eax,%ecx + movl 16(%edx),%eax + addl %eax,%esi + movl 64(%esp),%eax + addl %eax,%esi + movl (%edx),%eax + addl %eax,%edi + movl 68(%esp),%eax + addl %eax,%edi + movl 136(%esp),%eax + movl %ebx,(%edx) + movl %ebp,4(%edx) + movl %ecx,8(%edx) + subl $1,%eax + movl %esi,12(%edx) + movl %edi,16(%edx) + jle .L001get_out + movl %eax,136(%esp) + movl %ecx,%edi + movl 132(%esp),%eax + movl %ebx,%ecx + addl $64,%eax + movl %ebp,%esi + movl %eax,132(%esp) + jmp .L000start +.L001get_out: + addl $108,%esp + popl %ebx + popl %ebp + popl %edi + popl %esi + ret +.size ripemd160_block_asm_data_order,.-.L_ripemd160_block_asm_data_order_begin diff --git a/deps/openssl/asm_obsolete/x86-elf-gas/sha/sha1-586.s b/deps/openssl/asm_obsolete/x86-elf-gas/sha/sha1-586.s new file mode 100644 index 00000000000000..816b1d55bb206b --- /dev/null +++ b/deps/openssl/asm_obsolete/x86-elf-gas/sha/sha1-586.s @@ -0,0 +1,2795 @@ +.file "sha1-586.s" +.text +.globl sha1_block_data_order +.type sha1_block_data_order,@function +.align 16 +sha1_block_data_order: +.L_sha1_block_data_order_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + call .L000pic_point +.L000pic_point: + popl %ebp + leal OPENSSL_ia32cap_P,%esi + leal .LK_XX_XX-.L000pic_point(%ebp),%ebp + movl (%esi),%eax + movl 4(%esi),%edx + testl $512,%edx + jz .L001x86 + movl 8(%esi),%ecx + testl $16777216,%eax + jz .L001x86 + testl $536870912,%ecx + jnz .Lshaext_shortcut + jmp .Lssse3_shortcut +.align 16 +.L001x86: + movl 20(%esp),%ebp + movl 24(%esp),%esi + movl 28(%esp),%eax + subl $76,%esp + shll $6,%eax + addl %esi,%eax + movl %eax,104(%esp) + movl 16(%ebp),%edi + jmp .L002loop +.align 16 +.L002loop: + movl (%esi),%eax + movl 4(%esi),%ebx + movl 8(%esi),%ecx + movl 12(%esi),%edx + bswap %eax + bswap %ebx + bswap %ecx + bswap %edx + movl %eax,(%esp) + movl %ebx,4(%esp) + movl %ecx,8(%esp) + movl %edx,12(%esp) + movl 16(%esi),%eax + movl 20(%esi),%ebx + movl 24(%esi),%ecx + movl 28(%esi),%edx + bswap %eax + bswap %ebx + bswap %ecx + bswap %edx + movl %eax,16(%esp) + movl %ebx,20(%esp) + movl %ecx,24(%esp) + movl %edx,28(%esp) + movl 32(%esi),%eax + movl 36(%esi),%ebx + movl 40(%esi),%ecx + movl 44(%esi),%edx + bswap %eax + bswap %ebx + bswap %ecx + bswap %edx + movl %eax,32(%esp) + movl %ebx,36(%esp) + movl %ecx,40(%esp) + movl %edx,44(%esp) + movl 48(%esi),%eax + movl 52(%esi),%ebx + movl 56(%esi),%ecx + movl 60(%esi),%edx + bswap %eax + bswap %ebx + bswap %ecx + bswap %edx + movl %eax,48(%esp) + movl %ebx,52(%esp) + movl %ecx,56(%esp) + movl %edx,60(%esp) + movl %esi,100(%esp) + movl (%ebp),%eax + movl 4(%ebp),%ebx + movl 8(%ebp),%ecx + movl 12(%ebp),%edx + + movl %ecx,%esi + movl %eax,%ebp + roll $5,%ebp + xorl %edx,%esi + addl %edi,%ebp + movl (%esp),%edi + andl %ebx,%esi + rorl $2,%ebx + xorl %edx,%esi + leal 1518500249(%ebp,%edi,1),%ebp + addl %esi,%ebp + + movl %ebx,%edi + movl %ebp,%esi + roll $5,%ebp + xorl %ecx,%edi + addl %edx,%ebp + movl 4(%esp),%edx + andl %eax,%edi + rorl $2,%eax + xorl %ecx,%edi + leal 1518500249(%ebp,%edx,1),%ebp + addl %edi,%ebp + + movl %eax,%edx + movl %ebp,%edi + roll $5,%ebp + xorl %ebx,%edx + addl %ecx,%ebp + movl 8(%esp),%ecx + andl %esi,%edx + rorl $2,%esi + xorl %ebx,%edx + leal 1518500249(%ebp,%ecx,1),%ebp + addl %edx,%ebp + + movl %esi,%ecx + movl %ebp,%edx + roll $5,%ebp + xorl %eax,%ecx + addl %ebx,%ebp + movl 12(%esp),%ebx + andl %edi,%ecx + rorl $2,%edi + xorl %eax,%ecx + leal 1518500249(%ebp,%ebx,1),%ebp + addl %ecx,%ebp + + movl %edi,%ebx + movl %ebp,%ecx + roll $5,%ebp + xorl %esi,%ebx + addl %eax,%ebp + movl 16(%esp),%eax + andl %edx,%ebx + rorl $2,%edx + xorl %esi,%ebx + leal 1518500249(%ebp,%eax,1),%ebp + addl %ebx,%ebp + + movl %edx,%eax + movl %ebp,%ebx + roll $5,%ebp + xorl %edi,%eax + addl %esi,%ebp + movl 20(%esp),%esi + andl %ecx,%eax + rorl $2,%ecx + xorl %edi,%eax + leal 1518500249(%ebp,%esi,1),%ebp + addl %eax,%ebp + + movl %ecx,%esi + movl %ebp,%eax + roll $5,%ebp + xorl %edx,%esi + addl %edi,%ebp + movl 24(%esp),%edi + andl %ebx,%esi + rorl $2,%ebx + xorl %edx,%esi + leal 1518500249(%ebp,%edi,1),%ebp + addl %esi,%ebp + + movl %ebx,%edi + movl %ebp,%esi + roll $5,%ebp + xorl %ecx,%edi + addl %edx,%ebp + movl 28(%esp),%edx + andl %eax,%edi + rorl $2,%eax + xorl %ecx,%edi + leal 1518500249(%ebp,%edx,1),%ebp + addl %edi,%ebp + + movl %eax,%edx + movl %ebp,%edi + roll $5,%ebp + xorl %ebx,%edx + addl %ecx,%ebp + movl 32(%esp),%ecx + andl %esi,%edx + rorl $2,%esi + xorl %ebx,%edx + leal 1518500249(%ebp,%ecx,1),%ebp + addl %edx,%ebp + + movl %esi,%ecx + movl %ebp,%edx + roll $5,%ebp + xorl %eax,%ecx + addl %ebx,%ebp + movl 36(%esp),%ebx + andl %edi,%ecx + rorl $2,%edi + xorl %eax,%ecx + leal 1518500249(%ebp,%ebx,1),%ebp + addl %ecx,%ebp + + movl %edi,%ebx + movl %ebp,%ecx + roll $5,%ebp + xorl %esi,%ebx + addl %eax,%ebp + movl 40(%esp),%eax + andl %edx,%ebx + rorl $2,%edx + xorl %esi,%ebx + leal 1518500249(%ebp,%eax,1),%ebp + addl %ebx,%ebp + + movl %edx,%eax + movl %ebp,%ebx + roll $5,%ebp + xorl %edi,%eax + addl %esi,%ebp + movl 44(%esp),%esi + andl %ecx,%eax + rorl $2,%ecx + xorl %edi,%eax + leal 1518500249(%ebp,%esi,1),%ebp + addl %eax,%ebp + + movl %ecx,%esi + movl %ebp,%eax + roll $5,%ebp + xorl %edx,%esi + addl %edi,%ebp + movl 48(%esp),%edi + andl %ebx,%esi + rorl $2,%ebx + xorl %edx,%esi + leal 1518500249(%ebp,%edi,1),%ebp + addl %esi,%ebp + + movl %ebx,%edi + movl %ebp,%esi + roll $5,%ebp + xorl %ecx,%edi + addl %edx,%ebp + movl 52(%esp),%edx + andl %eax,%edi + rorl $2,%eax + xorl %ecx,%edi + leal 1518500249(%ebp,%edx,1),%ebp + addl %edi,%ebp + + movl %eax,%edx + movl %ebp,%edi + roll $5,%ebp + xorl %ebx,%edx + addl %ecx,%ebp + movl 56(%esp),%ecx + andl %esi,%edx + rorl $2,%esi + xorl %ebx,%edx + leal 1518500249(%ebp,%ecx,1),%ebp + addl %edx,%ebp + + movl %esi,%ecx + movl %ebp,%edx + roll $5,%ebp + xorl %eax,%ecx + addl %ebx,%ebp + movl 60(%esp),%ebx + andl %edi,%ecx + rorl $2,%edi + xorl %eax,%ecx + leal 1518500249(%ebp,%ebx,1),%ebp + movl (%esp),%ebx + addl %ebp,%ecx + + movl %edi,%ebp + xorl 8(%esp),%ebx + xorl %esi,%ebp + xorl 32(%esp),%ebx + andl %edx,%ebp + xorl 52(%esp),%ebx + roll $1,%ebx + xorl %esi,%ebp + addl %ebp,%eax + movl %ecx,%ebp + rorl $2,%edx + movl %ebx,(%esp) + roll $5,%ebp + leal 1518500249(%ebx,%eax,1),%ebx + movl 4(%esp),%eax + addl %ebp,%ebx + + movl %edx,%ebp + xorl 12(%esp),%eax + xorl %edi,%ebp + xorl 36(%esp),%eax + andl %ecx,%ebp + xorl 56(%esp),%eax + roll $1,%eax + xorl %edi,%ebp + addl %ebp,%esi + movl %ebx,%ebp + rorl $2,%ecx + movl %eax,4(%esp) + roll $5,%ebp + leal 1518500249(%eax,%esi,1),%eax + movl 8(%esp),%esi + addl %ebp,%eax + + movl %ecx,%ebp + xorl 16(%esp),%esi + xorl %edx,%ebp + xorl 40(%esp),%esi + andl %ebx,%ebp + xorl 60(%esp),%esi + roll $1,%esi + xorl %edx,%ebp + addl %ebp,%edi + movl %eax,%ebp + rorl $2,%ebx + movl %esi,8(%esp) + roll $5,%ebp + leal 1518500249(%esi,%edi,1),%esi + movl 12(%esp),%edi + addl %ebp,%esi + + movl %ebx,%ebp + xorl 20(%esp),%edi + xorl %ecx,%ebp + xorl 44(%esp),%edi + andl %eax,%ebp + xorl (%esp),%edi + roll $1,%edi + xorl %ecx,%ebp + addl %ebp,%edx + movl %esi,%ebp + rorl $2,%eax + movl %edi,12(%esp) + roll $5,%ebp + leal 1518500249(%edi,%edx,1),%edi + movl 16(%esp),%edx + addl %ebp,%edi + + movl %esi,%ebp + xorl 24(%esp),%edx + xorl %eax,%ebp + xorl 48(%esp),%edx + xorl %ebx,%ebp + xorl 4(%esp),%edx + roll $1,%edx + addl %ebp,%ecx + rorl $2,%esi + movl %edi,%ebp + roll $5,%ebp + movl %edx,16(%esp) + leal 1859775393(%edx,%ecx,1),%edx + movl 20(%esp),%ecx + addl %ebp,%edx + + movl %edi,%ebp + xorl 28(%esp),%ecx + xorl %esi,%ebp + xorl 52(%esp),%ecx + xorl %eax,%ebp + xorl 8(%esp),%ecx + roll $1,%ecx + addl %ebp,%ebx + rorl $2,%edi + movl %edx,%ebp + roll $5,%ebp + movl %ecx,20(%esp) + leal 1859775393(%ecx,%ebx,1),%ecx + movl 24(%esp),%ebx + addl %ebp,%ecx + + movl %edx,%ebp + xorl 32(%esp),%ebx + xorl %edi,%ebp + xorl 56(%esp),%ebx + xorl %esi,%ebp + xorl 12(%esp),%ebx + roll $1,%ebx + addl %ebp,%eax + rorl $2,%edx + movl %ecx,%ebp + roll $5,%ebp + movl %ebx,24(%esp) + leal 1859775393(%ebx,%eax,1),%ebx + movl 28(%esp),%eax + addl %ebp,%ebx + + movl %ecx,%ebp + xorl 36(%esp),%eax + xorl %edx,%ebp + xorl 60(%esp),%eax + xorl %edi,%ebp + xorl 16(%esp),%eax + roll $1,%eax + addl %ebp,%esi + rorl $2,%ecx + movl %ebx,%ebp + roll $5,%ebp + movl %eax,28(%esp) + leal 1859775393(%eax,%esi,1),%eax + movl 32(%esp),%esi + addl %ebp,%eax + + movl %ebx,%ebp + xorl 40(%esp),%esi + xorl %ecx,%ebp + xorl (%esp),%esi + xorl %edx,%ebp + xorl 20(%esp),%esi + roll $1,%esi + addl %ebp,%edi + rorl $2,%ebx + movl %eax,%ebp + roll $5,%ebp + movl %esi,32(%esp) + leal 1859775393(%esi,%edi,1),%esi + movl 36(%esp),%edi + addl %ebp,%esi + + movl %eax,%ebp + xorl 44(%esp),%edi + xorl %ebx,%ebp + xorl 4(%esp),%edi + xorl %ecx,%ebp + xorl 24(%esp),%edi + roll $1,%edi + addl %ebp,%edx + rorl $2,%eax + movl %esi,%ebp + roll $5,%ebp + movl %edi,36(%esp) + leal 1859775393(%edi,%edx,1),%edi + movl 40(%esp),%edx + addl %ebp,%edi + + movl %esi,%ebp + xorl 48(%esp),%edx + xorl %eax,%ebp + xorl 8(%esp),%edx + xorl %ebx,%ebp + xorl 28(%esp),%edx + roll $1,%edx + addl %ebp,%ecx + rorl $2,%esi + movl %edi,%ebp + roll $5,%ebp + movl %edx,40(%esp) + leal 1859775393(%edx,%ecx,1),%edx + movl 44(%esp),%ecx + addl %ebp,%edx + + movl %edi,%ebp + xorl 52(%esp),%ecx + xorl %esi,%ebp + xorl 12(%esp),%ecx + xorl %eax,%ebp + xorl 32(%esp),%ecx + roll $1,%ecx + addl %ebp,%ebx + rorl $2,%edi + movl %edx,%ebp + roll $5,%ebp + movl %ecx,44(%esp) + leal 1859775393(%ecx,%ebx,1),%ecx + movl 48(%esp),%ebx + addl %ebp,%ecx + + movl %edx,%ebp + xorl 56(%esp),%ebx + xorl %edi,%ebp + xorl 16(%esp),%ebx + xorl %esi,%ebp + xorl 36(%esp),%ebx + roll $1,%ebx + addl %ebp,%eax + rorl $2,%edx + movl %ecx,%ebp + roll $5,%ebp + movl %ebx,48(%esp) + leal 1859775393(%ebx,%eax,1),%ebx + movl 52(%esp),%eax + addl %ebp,%ebx + + movl %ecx,%ebp + xorl 60(%esp),%eax + xorl %edx,%ebp + xorl 20(%esp),%eax + xorl %edi,%ebp + xorl 40(%esp),%eax + roll $1,%eax + addl %ebp,%esi + rorl $2,%ecx + movl %ebx,%ebp + roll $5,%ebp + movl %eax,52(%esp) + leal 1859775393(%eax,%esi,1),%eax + movl 56(%esp),%esi + addl %ebp,%eax + + movl %ebx,%ebp + xorl (%esp),%esi + xorl %ecx,%ebp + xorl 24(%esp),%esi + xorl %edx,%ebp + xorl 44(%esp),%esi + roll $1,%esi + addl %ebp,%edi + rorl $2,%ebx + movl %eax,%ebp + roll $5,%ebp + movl %esi,56(%esp) + leal 1859775393(%esi,%edi,1),%esi + movl 60(%esp),%edi + addl %ebp,%esi + + movl %eax,%ebp + xorl 4(%esp),%edi + xorl %ebx,%ebp + xorl 28(%esp),%edi + xorl %ecx,%ebp + xorl 48(%esp),%edi + roll $1,%edi + addl %ebp,%edx + rorl $2,%eax + movl %esi,%ebp + roll $5,%ebp + movl %edi,60(%esp) + leal 1859775393(%edi,%edx,1),%edi + movl (%esp),%edx + addl %ebp,%edi + + movl %esi,%ebp + xorl 8(%esp),%edx + xorl %eax,%ebp + xorl 32(%esp),%edx + xorl %ebx,%ebp + xorl 52(%esp),%edx + roll $1,%edx + addl %ebp,%ecx + rorl $2,%esi + movl %edi,%ebp + roll $5,%ebp + movl %edx,(%esp) + leal 1859775393(%edx,%ecx,1),%edx + movl 4(%esp),%ecx + addl %ebp,%edx + + movl %edi,%ebp + xorl 12(%esp),%ecx + xorl %esi,%ebp + xorl 36(%esp),%ecx + xorl %eax,%ebp + xorl 56(%esp),%ecx + roll $1,%ecx + addl %ebp,%ebx + rorl $2,%edi + movl %edx,%ebp + roll $5,%ebp + movl %ecx,4(%esp) + leal 1859775393(%ecx,%ebx,1),%ecx + movl 8(%esp),%ebx + addl %ebp,%ecx + + movl %edx,%ebp + xorl 16(%esp),%ebx + xorl %edi,%ebp + xorl 40(%esp),%ebx + xorl %esi,%ebp + xorl 60(%esp),%ebx + roll $1,%ebx + addl %ebp,%eax + rorl $2,%edx + movl %ecx,%ebp + roll $5,%ebp + movl %ebx,8(%esp) + leal 1859775393(%ebx,%eax,1),%ebx + movl 12(%esp),%eax + addl %ebp,%ebx + + movl %ecx,%ebp + xorl 20(%esp),%eax + xorl %edx,%ebp + xorl 44(%esp),%eax + xorl %edi,%ebp + xorl (%esp),%eax + roll $1,%eax + addl %ebp,%esi + rorl $2,%ecx + movl %ebx,%ebp + roll $5,%ebp + movl %eax,12(%esp) + leal 1859775393(%eax,%esi,1),%eax + movl 16(%esp),%esi + addl %ebp,%eax + + movl %ebx,%ebp + xorl 24(%esp),%esi + xorl %ecx,%ebp + xorl 48(%esp),%esi + xorl %edx,%ebp + xorl 4(%esp),%esi + roll $1,%esi + addl %ebp,%edi + rorl $2,%ebx + movl %eax,%ebp + roll $5,%ebp + movl %esi,16(%esp) + leal 1859775393(%esi,%edi,1),%esi + movl 20(%esp),%edi + addl %ebp,%esi + + movl %eax,%ebp + xorl 28(%esp),%edi + xorl %ebx,%ebp + xorl 52(%esp),%edi + xorl %ecx,%ebp + xorl 8(%esp),%edi + roll $1,%edi + addl %ebp,%edx + rorl $2,%eax + movl %esi,%ebp + roll $5,%ebp + movl %edi,20(%esp) + leal 1859775393(%edi,%edx,1),%edi + movl 24(%esp),%edx + addl %ebp,%edi + + movl %esi,%ebp + xorl 32(%esp),%edx + xorl %eax,%ebp + xorl 56(%esp),%edx + xorl %ebx,%ebp + xorl 12(%esp),%edx + roll $1,%edx + addl %ebp,%ecx + rorl $2,%esi + movl %edi,%ebp + roll $5,%ebp + movl %edx,24(%esp) + leal 1859775393(%edx,%ecx,1),%edx + movl 28(%esp),%ecx + addl %ebp,%edx + + movl %edi,%ebp + xorl 36(%esp),%ecx + xorl %esi,%ebp + xorl 60(%esp),%ecx + xorl %eax,%ebp + xorl 16(%esp),%ecx + roll $1,%ecx + addl %ebp,%ebx + rorl $2,%edi + movl %edx,%ebp + roll $5,%ebp + movl %ecx,28(%esp) + leal 1859775393(%ecx,%ebx,1),%ecx + movl 32(%esp),%ebx + addl %ebp,%ecx + + movl %edi,%ebp + xorl 40(%esp),%ebx + xorl %esi,%ebp + xorl (%esp),%ebx + andl %edx,%ebp + xorl 20(%esp),%ebx + roll $1,%ebx + addl %eax,%ebp + rorl $2,%edx + movl %ecx,%eax + roll $5,%eax + movl %ebx,32(%esp) + leal 2400959708(%ebx,%ebp,1),%ebx + movl %edi,%ebp + addl %eax,%ebx + andl %esi,%ebp + movl 36(%esp),%eax + addl %ebp,%ebx + + movl %edx,%ebp + xorl 44(%esp),%eax + xorl %edi,%ebp + xorl 4(%esp),%eax + andl %ecx,%ebp + xorl 24(%esp),%eax + roll $1,%eax + addl %esi,%ebp + rorl $2,%ecx + movl %ebx,%esi + roll $5,%esi + movl %eax,36(%esp) + leal 2400959708(%eax,%ebp,1),%eax + movl %edx,%ebp + addl %esi,%eax + andl %edi,%ebp + movl 40(%esp),%esi + addl %ebp,%eax + + movl %ecx,%ebp + xorl 48(%esp),%esi + xorl %edx,%ebp + xorl 8(%esp),%esi + andl %ebx,%ebp + xorl 28(%esp),%esi + roll $1,%esi + addl %edi,%ebp + rorl $2,%ebx + movl %eax,%edi + roll $5,%edi + movl %esi,40(%esp) + leal 2400959708(%esi,%ebp,1),%esi + movl %ecx,%ebp + addl %edi,%esi + andl %edx,%ebp + movl 44(%esp),%edi + addl %ebp,%esi + + movl %ebx,%ebp + xorl 52(%esp),%edi + xorl %ecx,%ebp + xorl 12(%esp),%edi + andl %eax,%ebp + xorl 32(%esp),%edi + roll $1,%edi + addl %edx,%ebp + rorl $2,%eax + movl %esi,%edx + roll $5,%edx + movl %edi,44(%esp) + leal 2400959708(%edi,%ebp,1),%edi + movl %ebx,%ebp + addl %edx,%edi + andl %ecx,%ebp + movl 48(%esp),%edx + addl %ebp,%edi + + movl %eax,%ebp + xorl 56(%esp),%edx + xorl %ebx,%ebp + xorl 16(%esp),%edx + andl %esi,%ebp + xorl 36(%esp),%edx + roll $1,%edx + addl %ecx,%ebp + rorl $2,%esi + movl %edi,%ecx + roll $5,%ecx + movl %edx,48(%esp) + leal 2400959708(%edx,%ebp,1),%edx + movl %eax,%ebp + addl %ecx,%edx + andl %ebx,%ebp + movl 52(%esp),%ecx + addl %ebp,%edx + + movl %esi,%ebp + xorl 60(%esp),%ecx + xorl %eax,%ebp + xorl 20(%esp),%ecx + andl %edi,%ebp + xorl 40(%esp),%ecx + roll $1,%ecx + addl %ebx,%ebp + rorl $2,%edi + movl %edx,%ebx + roll $5,%ebx + movl %ecx,52(%esp) + leal 2400959708(%ecx,%ebp,1),%ecx + movl %esi,%ebp + addl %ebx,%ecx + andl %eax,%ebp + movl 56(%esp),%ebx + addl %ebp,%ecx + + movl %edi,%ebp + xorl (%esp),%ebx + xorl %esi,%ebp + xorl 24(%esp),%ebx + andl %edx,%ebp + xorl 44(%esp),%ebx + roll $1,%ebx + addl %eax,%ebp + rorl $2,%edx + movl %ecx,%eax + roll $5,%eax + movl %ebx,56(%esp) + leal 2400959708(%ebx,%ebp,1),%ebx + movl %edi,%ebp + addl %eax,%ebx + andl %esi,%ebp + movl 60(%esp),%eax + addl %ebp,%ebx + + movl %edx,%ebp + xorl 4(%esp),%eax + xorl %edi,%ebp + xorl 28(%esp),%eax + andl %ecx,%ebp + xorl 48(%esp),%eax + roll $1,%eax + addl %esi,%ebp + rorl $2,%ecx + movl %ebx,%esi + roll $5,%esi + movl %eax,60(%esp) + leal 2400959708(%eax,%ebp,1),%eax + movl %edx,%ebp + addl %esi,%eax + andl %edi,%ebp + movl (%esp),%esi + addl %ebp,%eax + + movl %ecx,%ebp + xorl 8(%esp),%esi + xorl %edx,%ebp + xorl 32(%esp),%esi + andl %ebx,%ebp + xorl 52(%esp),%esi + roll $1,%esi + addl %edi,%ebp + rorl $2,%ebx + movl %eax,%edi + roll $5,%edi + movl %esi,(%esp) + leal 2400959708(%esi,%ebp,1),%esi + movl %ecx,%ebp + addl %edi,%esi + andl %edx,%ebp + movl 4(%esp),%edi + addl %ebp,%esi + + movl %ebx,%ebp + xorl 12(%esp),%edi + xorl %ecx,%ebp + xorl 36(%esp),%edi + andl %eax,%ebp + xorl 56(%esp),%edi + roll $1,%edi + addl %edx,%ebp + rorl $2,%eax + movl %esi,%edx + roll $5,%edx + movl %edi,4(%esp) + leal 2400959708(%edi,%ebp,1),%edi + movl %ebx,%ebp + addl %edx,%edi + andl %ecx,%ebp + movl 8(%esp),%edx + addl %ebp,%edi + + movl %eax,%ebp + xorl 16(%esp),%edx + xorl %ebx,%ebp + xorl 40(%esp),%edx + andl %esi,%ebp + xorl 60(%esp),%edx + roll $1,%edx + addl %ecx,%ebp + rorl $2,%esi + movl %edi,%ecx + roll $5,%ecx + movl %edx,8(%esp) + leal 2400959708(%edx,%ebp,1),%edx + movl %eax,%ebp + addl %ecx,%edx + andl %ebx,%ebp + movl 12(%esp),%ecx + addl %ebp,%edx + + movl %esi,%ebp + xorl 20(%esp),%ecx + xorl %eax,%ebp + xorl 44(%esp),%ecx + andl %edi,%ebp + xorl (%esp),%ecx + roll $1,%ecx + addl %ebx,%ebp + rorl $2,%edi + movl %edx,%ebx + roll $5,%ebx + movl %ecx,12(%esp) + leal 2400959708(%ecx,%ebp,1),%ecx + movl %esi,%ebp + addl %ebx,%ecx + andl %eax,%ebp + movl 16(%esp),%ebx + addl %ebp,%ecx + + movl %edi,%ebp + xorl 24(%esp),%ebx + xorl %esi,%ebp + xorl 48(%esp),%ebx + andl %edx,%ebp + xorl 4(%esp),%ebx + roll $1,%ebx + addl %eax,%ebp + rorl $2,%edx + movl %ecx,%eax + roll $5,%eax + movl %ebx,16(%esp) + leal 2400959708(%ebx,%ebp,1),%ebx + movl %edi,%ebp + addl %eax,%ebx + andl %esi,%ebp + movl 20(%esp),%eax + addl %ebp,%ebx + + movl %edx,%ebp + xorl 28(%esp),%eax + xorl %edi,%ebp + xorl 52(%esp),%eax + andl %ecx,%ebp + xorl 8(%esp),%eax + roll $1,%eax + addl %esi,%ebp + rorl $2,%ecx + movl %ebx,%esi + roll $5,%esi + movl %eax,20(%esp) + leal 2400959708(%eax,%ebp,1),%eax + movl %edx,%ebp + addl %esi,%eax + andl %edi,%ebp + movl 24(%esp),%esi + addl %ebp,%eax + + movl %ecx,%ebp + xorl 32(%esp),%esi + xorl %edx,%ebp + xorl 56(%esp),%esi + andl %ebx,%ebp + xorl 12(%esp),%esi + roll $1,%esi + addl %edi,%ebp + rorl $2,%ebx + movl %eax,%edi + roll $5,%edi + movl %esi,24(%esp) + leal 2400959708(%esi,%ebp,1),%esi + movl %ecx,%ebp + addl %edi,%esi + andl %edx,%ebp + movl 28(%esp),%edi + addl %ebp,%esi + + movl %ebx,%ebp + xorl 36(%esp),%edi + xorl %ecx,%ebp + xorl 60(%esp),%edi + andl %eax,%ebp + xorl 16(%esp),%edi + roll $1,%edi + addl %edx,%ebp + rorl $2,%eax + movl %esi,%edx + roll $5,%edx + movl %edi,28(%esp) + leal 2400959708(%edi,%ebp,1),%edi + movl %ebx,%ebp + addl %edx,%edi + andl %ecx,%ebp + movl 32(%esp),%edx + addl %ebp,%edi + + movl %eax,%ebp + xorl 40(%esp),%edx + xorl %ebx,%ebp + xorl (%esp),%edx + andl %esi,%ebp + xorl 20(%esp),%edx + roll $1,%edx + addl %ecx,%ebp + rorl $2,%esi + movl %edi,%ecx + roll $5,%ecx + movl %edx,32(%esp) + leal 2400959708(%edx,%ebp,1),%edx + movl %eax,%ebp + addl %ecx,%edx + andl %ebx,%ebp + movl 36(%esp),%ecx + addl %ebp,%edx + + movl %esi,%ebp + xorl 44(%esp),%ecx + xorl %eax,%ebp + xorl 4(%esp),%ecx + andl %edi,%ebp + xorl 24(%esp),%ecx + roll $1,%ecx + addl %ebx,%ebp + rorl $2,%edi + movl %edx,%ebx + roll $5,%ebx + movl %ecx,36(%esp) + leal 2400959708(%ecx,%ebp,1),%ecx + movl %esi,%ebp + addl %ebx,%ecx + andl %eax,%ebp + movl 40(%esp),%ebx + addl %ebp,%ecx + + movl %edi,%ebp + xorl 48(%esp),%ebx + xorl %esi,%ebp + xorl 8(%esp),%ebx + andl %edx,%ebp + xorl 28(%esp),%ebx + roll $1,%ebx + addl %eax,%ebp + rorl $2,%edx + movl %ecx,%eax + roll $5,%eax + movl %ebx,40(%esp) + leal 2400959708(%ebx,%ebp,1),%ebx + movl %edi,%ebp + addl %eax,%ebx + andl %esi,%ebp + movl 44(%esp),%eax + addl %ebp,%ebx + + movl %edx,%ebp + xorl 52(%esp),%eax + xorl %edi,%ebp + xorl 12(%esp),%eax + andl %ecx,%ebp + xorl 32(%esp),%eax + roll $1,%eax + addl %esi,%ebp + rorl $2,%ecx + movl %ebx,%esi + roll $5,%esi + movl %eax,44(%esp) + leal 2400959708(%eax,%ebp,1),%eax + movl %edx,%ebp + addl %esi,%eax + andl %edi,%ebp + movl 48(%esp),%esi + addl %ebp,%eax + + movl %ebx,%ebp + xorl 56(%esp),%esi + xorl %ecx,%ebp + xorl 16(%esp),%esi + xorl %edx,%ebp + xorl 36(%esp),%esi + roll $1,%esi + addl %ebp,%edi + rorl $2,%ebx + movl %eax,%ebp + roll $5,%ebp + movl %esi,48(%esp) + leal 3395469782(%esi,%edi,1),%esi + movl 52(%esp),%edi + addl %ebp,%esi + + movl %eax,%ebp + xorl 60(%esp),%edi + xorl %ebx,%ebp + xorl 20(%esp),%edi + xorl %ecx,%ebp + xorl 40(%esp),%edi + roll $1,%edi + addl %ebp,%edx + rorl $2,%eax + movl %esi,%ebp + roll $5,%ebp + movl %edi,52(%esp) + leal 3395469782(%edi,%edx,1),%edi + movl 56(%esp),%edx + addl %ebp,%edi + + movl %esi,%ebp + xorl (%esp),%edx + xorl %eax,%ebp + xorl 24(%esp),%edx + xorl %ebx,%ebp + xorl 44(%esp),%edx + roll $1,%edx + addl %ebp,%ecx + rorl $2,%esi + movl %edi,%ebp + roll $5,%ebp + movl %edx,56(%esp) + leal 3395469782(%edx,%ecx,1),%edx + movl 60(%esp),%ecx + addl %ebp,%edx + + movl %edi,%ebp + xorl 4(%esp),%ecx + xorl %esi,%ebp + xorl 28(%esp),%ecx + xorl %eax,%ebp + xorl 48(%esp),%ecx + roll $1,%ecx + addl %ebp,%ebx + rorl $2,%edi + movl %edx,%ebp + roll $5,%ebp + movl %ecx,60(%esp) + leal 3395469782(%ecx,%ebx,1),%ecx + movl (%esp),%ebx + addl %ebp,%ecx + + movl %edx,%ebp + xorl 8(%esp),%ebx + xorl %edi,%ebp + xorl 32(%esp),%ebx + xorl %esi,%ebp + xorl 52(%esp),%ebx + roll $1,%ebx + addl %ebp,%eax + rorl $2,%edx + movl %ecx,%ebp + roll $5,%ebp + movl %ebx,(%esp) + leal 3395469782(%ebx,%eax,1),%ebx + movl 4(%esp),%eax + addl %ebp,%ebx + + movl %ecx,%ebp + xorl 12(%esp),%eax + xorl %edx,%ebp + xorl 36(%esp),%eax + xorl %edi,%ebp + xorl 56(%esp),%eax + roll $1,%eax + addl %ebp,%esi + rorl $2,%ecx + movl %ebx,%ebp + roll $5,%ebp + movl %eax,4(%esp) + leal 3395469782(%eax,%esi,1),%eax + movl 8(%esp),%esi + addl %ebp,%eax + + movl %ebx,%ebp + xorl 16(%esp),%esi + xorl %ecx,%ebp + xorl 40(%esp),%esi + xorl %edx,%ebp + xorl 60(%esp),%esi + roll $1,%esi + addl %ebp,%edi + rorl $2,%ebx + movl %eax,%ebp + roll $5,%ebp + movl %esi,8(%esp) + leal 3395469782(%esi,%edi,1),%esi + movl 12(%esp),%edi + addl %ebp,%esi + + movl %eax,%ebp + xorl 20(%esp),%edi + xorl %ebx,%ebp + xorl 44(%esp),%edi + xorl %ecx,%ebp + xorl (%esp),%edi + roll $1,%edi + addl %ebp,%edx + rorl $2,%eax + movl %esi,%ebp + roll $5,%ebp + movl %edi,12(%esp) + leal 3395469782(%edi,%edx,1),%edi + movl 16(%esp),%edx + addl %ebp,%edi + + movl %esi,%ebp + xorl 24(%esp),%edx + xorl %eax,%ebp + xorl 48(%esp),%edx + xorl %ebx,%ebp + xorl 4(%esp),%edx + roll $1,%edx + addl %ebp,%ecx + rorl $2,%esi + movl %edi,%ebp + roll $5,%ebp + movl %edx,16(%esp) + leal 3395469782(%edx,%ecx,1),%edx + movl 20(%esp),%ecx + addl %ebp,%edx + + movl %edi,%ebp + xorl 28(%esp),%ecx + xorl %esi,%ebp + xorl 52(%esp),%ecx + xorl %eax,%ebp + xorl 8(%esp),%ecx + roll $1,%ecx + addl %ebp,%ebx + rorl $2,%edi + movl %edx,%ebp + roll $5,%ebp + movl %ecx,20(%esp) + leal 3395469782(%ecx,%ebx,1),%ecx + movl 24(%esp),%ebx + addl %ebp,%ecx + + movl %edx,%ebp + xorl 32(%esp),%ebx + xorl %edi,%ebp + xorl 56(%esp),%ebx + xorl %esi,%ebp + xorl 12(%esp),%ebx + roll $1,%ebx + addl %ebp,%eax + rorl $2,%edx + movl %ecx,%ebp + roll $5,%ebp + movl %ebx,24(%esp) + leal 3395469782(%ebx,%eax,1),%ebx + movl 28(%esp),%eax + addl %ebp,%ebx + + movl %ecx,%ebp + xorl 36(%esp),%eax + xorl %edx,%ebp + xorl 60(%esp),%eax + xorl %edi,%ebp + xorl 16(%esp),%eax + roll $1,%eax + addl %ebp,%esi + rorl $2,%ecx + movl %ebx,%ebp + roll $5,%ebp + movl %eax,28(%esp) + leal 3395469782(%eax,%esi,1),%eax + movl 32(%esp),%esi + addl %ebp,%eax + + movl %ebx,%ebp + xorl 40(%esp),%esi + xorl %ecx,%ebp + xorl (%esp),%esi + xorl %edx,%ebp + xorl 20(%esp),%esi + roll $1,%esi + addl %ebp,%edi + rorl $2,%ebx + movl %eax,%ebp + roll $5,%ebp + movl %esi,32(%esp) + leal 3395469782(%esi,%edi,1),%esi + movl 36(%esp),%edi + addl %ebp,%esi + + movl %eax,%ebp + xorl 44(%esp),%edi + xorl %ebx,%ebp + xorl 4(%esp),%edi + xorl %ecx,%ebp + xorl 24(%esp),%edi + roll $1,%edi + addl %ebp,%edx + rorl $2,%eax + movl %esi,%ebp + roll $5,%ebp + movl %edi,36(%esp) + leal 3395469782(%edi,%edx,1),%edi + movl 40(%esp),%edx + addl %ebp,%edi + + movl %esi,%ebp + xorl 48(%esp),%edx + xorl %eax,%ebp + xorl 8(%esp),%edx + xorl %ebx,%ebp + xorl 28(%esp),%edx + roll $1,%edx + addl %ebp,%ecx + rorl $2,%esi + movl %edi,%ebp + roll $5,%ebp + movl %edx,40(%esp) + leal 3395469782(%edx,%ecx,1),%edx + movl 44(%esp),%ecx + addl %ebp,%edx + + movl %edi,%ebp + xorl 52(%esp),%ecx + xorl %esi,%ebp + xorl 12(%esp),%ecx + xorl %eax,%ebp + xorl 32(%esp),%ecx + roll $1,%ecx + addl %ebp,%ebx + rorl $2,%edi + movl %edx,%ebp + roll $5,%ebp + movl %ecx,44(%esp) + leal 3395469782(%ecx,%ebx,1),%ecx + movl 48(%esp),%ebx + addl %ebp,%ecx + + movl %edx,%ebp + xorl 56(%esp),%ebx + xorl %edi,%ebp + xorl 16(%esp),%ebx + xorl %esi,%ebp + xorl 36(%esp),%ebx + roll $1,%ebx + addl %ebp,%eax + rorl $2,%edx + movl %ecx,%ebp + roll $5,%ebp + movl %ebx,48(%esp) + leal 3395469782(%ebx,%eax,1),%ebx + movl 52(%esp),%eax + addl %ebp,%ebx + + movl %ecx,%ebp + xorl 60(%esp),%eax + xorl %edx,%ebp + xorl 20(%esp),%eax + xorl %edi,%ebp + xorl 40(%esp),%eax + roll $1,%eax + addl %ebp,%esi + rorl $2,%ecx + movl %ebx,%ebp + roll $5,%ebp + leal 3395469782(%eax,%esi,1),%eax + movl 56(%esp),%esi + addl %ebp,%eax + + movl %ebx,%ebp + xorl (%esp),%esi + xorl %ecx,%ebp + xorl 24(%esp),%esi + xorl %edx,%ebp + xorl 44(%esp),%esi + roll $1,%esi + addl %ebp,%edi + rorl $2,%ebx + movl %eax,%ebp + roll $5,%ebp + leal 3395469782(%esi,%edi,1),%esi + movl 60(%esp),%edi + addl %ebp,%esi + + movl %eax,%ebp + xorl 4(%esp),%edi + xorl %ebx,%ebp + xorl 28(%esp),%edi + xorl %ecx,%ebp + xorl 48(%esp),%edi + roll $1,%edi + addl %ebp,%edx + rorl $2,%eax + movl %esi,%ebp + roll $5,%ebp + leal 3395469782(%edi,%edx,1),%edi + addl %ebp,%edi + movl 96(%esp),%ebp + movl 100(%esp),%edx + addl (%ebp),%edi + addl 4(%ebp),%esi + addl 8(%ebp),%eax + addl 12(%ebp),%ebx + addl 16(%ebp),%ecx + movl %edi,(%ebp) + addl $64,%edx + movl %esi,4(%ebp) + cmpl 104(%esp),%edx + movl %eax,8(%ebp) + movl %ecx,%edi + movl %ebx,12(%ebp) + movl %edx,%esi + movl %ecx,16(%ebp) + jb .L002loop + addl $76,%esp + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.size sha1_block_data_order,.-.L_sha1_block_data_order_begin +.type _sha1_block_data_order_shaext,@function +.align 16 +_sha1_block_data_order_shaext: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + call .L003pic_point +.L003pic_point: + popl %ebp + leal .LK_XX_XX-.L003pic_point(%ebp),%ebp +.Lshaext_shortcut: + movl 20(%esp),%edi + movl %esp,%ebx + movl 24(%esp),%esi + movl 28(%esp),%ecx + subl $32,%esp + movdqu (%edi),%xmm0 + movd 16(%edi),%xmm1 + andl $-32,%esp + movdqa 80(%ebp),%xmm3 + movdqu (%esi),%xmm4 + pshufd $27,%xmm0,%xmm0 + movdqu 16(%esi),%xmm5 + pshufd $27,%xmm1,%xmm1 + movdqu 32(%esi),%xmm6 +.byte 102,15,56,0,227 + movdqu 48(%esi),%xmm7 +.byte 102,15,56,0,235 +.byte 102,15,56,0,243 +.byte 102,15,56,0,251 + jmp .L004loop_shaext +.align 16 +.L004loop_shaext: + decl %ecx + leal 64(%esi),%eax + movdqa %xmm1,(%esp) + paddd %xmm4,%xmm1 + cmovnel %eax,%esi + movdqa %xmm0,16(%esp) +.byte 15,56,201,229 + movdqa %xmm0,%xmm2 +.byte 15,58,204,193,0 +.byte 15,56,200,213 + pxor %xmm6,%xmm4 +.byte 15,56,201,238 +.byte 15,56,202,231 + movdqa %xmm0,%xmm1 +.byte 15,58,204,194,0 +.byte 15,56,200,206 + pxor %xmm7,%xmm5 +.byte 15,56,202,236 +.byte 15,56,201,247 + movdqa %xmm0,%xmm2 +.byte 15,58,204,193,0 +.byte 15,56,200,215 + pxor %xmm4,%xmm6 +.byte 15,56,201,252 +.byte 15,56,202,245 + movdqa %xmm0,%xmm1 +.byte 15,58,204,194,0 +.byte 15,56,200,204 + pxor %xmm5,%xmm7 +.byte 15,56,202,254 +.byte 15,56,201,229 + movdqa %xmm0,%xmm2 +.byte 15,58,204,193,0 +.byte 15,56,200,213 + pxor %xmm6,%xmm4 +.byte 15,56,201,238 +.byte 15,56,202,231 + movdqa %xmm0,%xmm1 +.byte 15,58,204,194,1 +.byte 15,56,200,206 + pxor %xmm7,%xmm5 +.byte 15,56,202,236 +.byte 15,56,201,247 + movdqa %xmm0,%xmm2 +.byte 15,58,204,193,1 +.byte 15,56,200,215 + pxor %xmm4,%xmm6 +.byte 15,56,201,252 +.byte 15,56,202,245 + movdqa %xmm0,%xmm1 +.byte 15,58,204,194,1 +.byte 15,56,200,204 + pxor %xmm5,%xmm7 +.byte 15,56,202,254 +.byte 15,56,201,229 + movdqa %xmm0,%xmm2 +.byte 15,58,204,193,1 +.byte 15,56,200,213 + pxor %xmm6,%xmm4 +.byte 15,56,201,238 +.byte 15,56,202,231 + movdqa %xmm0,%xmm1 +.byte 15,58,204,194,1 +.byte 15,56,200,206 + pxor %xmm7,%xmm5 +.byte 15,56,202,236 +.byte 15,56,201,247 + movdqa %xmm0,%xmm2 +.byte 15,58,204,193,2 +.byte 15,56,200,215 + pxor %xmm4,%xmm6 +.byte 15,56,201,252 +.byte 15,56,202,245 + movdqa %xmm0,%xmm1 +.byte 15,58,204,194,2 +.byte 15,56,200,204 + pxor %xmm5,%xmm7 +.byte 15,56,202,254 +.byte 15,56,201,229 + movdqa %xmm0,%xmm2 +.byte 15,58,204,193,2 +.byte 15,56,200,213 + pxor %xmm6,%xmm4 +.byte 15,56,201,238 +.byte 15,56,202,231 + movdqa %xmm0,%xmm1 +.byte 15,58,204,194,2 +.byte 15,56,200,206 + pxor %xmm7,%xmm5 +.byte 15,56,202,236 +.byte 15,56,201,247 + movdqa %xmm0,%xmm2 +.byte 15,58,204,193,2 +.byte 15,56,200,215 + pxor %xmm4,%xmm6 +.byte 15,56,201,252 +.byte 15,56,202,245 + movdqa %xmm0,%xmm1 +.byte 15,58,204,194,3 +.byte 15,56,200,204 + pxor %xmm5,%xmm7 +.byte 15,56,202,254 + movdqu (%esi),%xmm4 + movdqa %xmm0,%xmm2 +.byte 15,58,204,193,3 +.byte 15,56,200,213 + movdqu 16(%esi),%xmm5 +.byte 102,15,56,0,227 + movdqa %xmm0,%xmm1 +.byte 15,58,204,194,3 +.byte 15,56,200,206 + movdqu 32(%esi),%xmm6 +.byte 102,15,56,0,235 + movdqa %xmm0,%xmm2 +.byte 15,58,204,193,3 +.byte 15,56,200,215 + movdqu 48(%esi),%xmm7 +.byte 102,15,56,0,243 + movdqa %xmm0,%xmm1 +.byte 15,58,204,194,3 + movdqa (%esp),%xmm2 +.byte 102,15,56,0,251 +.byte 15,56,200,202 + paddd 16(%esp),%xmm0 + jnz .L004loop_shaext + pshufd $27,%xmm0,%xmm0 + pshufd $27,%xmm1,%xmm1 + movdqu %xmm0,(%edi) + movd %xmm1,16(%edi) + movl %ebx,%esp + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.size _sha1_block_data_order_shaext,.-_sha1_block_data_order_shaext +.type _sha1_block_data_order_ssse3,@function +.align 16 +_sha1_block_data_order_ssse3: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + call .L005pic_point +.L005pic_point: + popl %ebp + leal .LK_XX_XX-.L005pic_point(%ebp),%ebp +.Lssse3_shortcut: + movdqa (%ebp),%xmm7 + movdqa 16(%ebp),%xmm0 + movdqa 32(%ebp),%xmm1 + movdqa 48(%ebp),%xmm2 + movdqa 64(%ebp),%xmm6 + movl 20(%esp),%edi + movl 24(%esp),%ebp + movl 28(%esp),%edx + movl %esp,%esi + subl $208,%esp + andl $-64,%esp + movdqa %xmm0,112(%esp) + movdqa %xmm1,128(%esp) + movdqa %xmm2,144(%esp) + shll $6,%edx + movdqa %xmm7,160(%esp) + addl %ebp,%edx + movdqa %xmm6,176(%esp) + addl $64,%ebp + movl %edi,192(%esp) + movl %ebp,196(%esp) + movl %edx,200(%esp) + movl %esi,204(%esp) + movl (%edi),%eax + movl 4(%edi),%ebx + movl 8(%edi),%ecx + movl 12(%edi),%edx + movl 16(%edi),%edi + movl %ebx,%esi + movdqu -64(%ebp),%xmm0 + movdqu -48(%ebp),%xmm1 + movdqu -32(%ebp),%xmm2 + movdqu -16(%ebp),%xmm3 +.byte 102,15,56,0,198 +.byte 102,15,56,0,206 +.byte 102,15,56,0,214 + movdqa %xmm7,96(%esp) +.byte 102,15,56,0,222 + paddd %xmm7,%xmm0 + paddd %xmm7,%xmm1 + paddd %xmm7,%xmm2 + movdqa %xmm0,(%esp) + psubd %xmm7,%xmm0 + movdqa %xmm1,16(%esp) + psubd %xmm7,%xmm1 + movdqa %xmm2,32(%esp) + movl %ecx,%ebp + psubd %xmm7,%xmm2 + xorl %edx,%ebp + pshufd $238,%xmm0,%xmm4 + andl %ebp,%esi + jmp .L006loop +.align 16 +.L006loop: + rorl $2,%ebx + xorl %edx,%esi + movl %eax,%ebp + punpcklqdq %xmm1,%xmm4 + movdqa %xmm3,%xmm6 + addl (%esp),%edi + xorl %ecx,%ebx + paddd %xmm3,%xmm7 + movdqa %xmm0,64(%esp) + roll $5,%eax + addl %esi,%edi + psrldq $4,%xmm6 + andl %ebx,%ebp + xorl %ecx,%ebx + pxor %xmm0,%xmm4 + addl %eax,%edi + rorl $7,%eax + pxor %xmm2,%xmm6 + xorl %ecx,%ebp + movl %edi,%esi + addl 4(%esp),%edx + pxor %xmm6,%xmm4 + xorl %ebx,%eax + roll $5,%edi + movdqa %xmm7,48(%esp) + addl %ebp,%edx + andl %eax,%esi + movdqa %xmm4,%xmm0 + xorl %ebx,%eax + addl %edi,%edx + rorl $7,%edi + movdqa %xmm4,%xmm6 + xorl %ebx,%esi + pslldq $12,%xmm0 + paddd %xmm4,%xmm4 + movl %edx,%ebp + addl 8(%esp),%ecx + psrld $31,%xmm6 + xorl %eax,%edi + roll $5,%edx + movdqa %xmm0,%xmm7 + addl %esi,%ecx + andl %edi,%ebp + xorl %eax,%edi + psrld $30,%xmm0 + addl %edx,%ecx + rorl $7,%edx + por %xmm6,%xmm4 + xorl %eax,%ebp + movl %ecx,%esi + addl 12(%esp),%ebx + pslld $2,%xmm7 + xorl %edi,%edx + roll $5,%ecx + pxor %xmm0,%xmm4 + movdqa 96(%esp),%xmm0 + addl %ebp,%ebx + andl %edx,%esi + pxor %xmm7,%xmm4 + pshufd $238,%xmm1,%xmm5 + xorl %edi,%edx + addl %ecx,%ebx + rorl $7,%ecx + xorl %edi,%esi + movl %ebx,%ebp + punpcklqdq %xmm2,%xmm5 + movdqa %xmm4,%xmm7 + addl 16(%esp),%eax + xorl %edx,%ecx + paddd %xmm4,%xmm0 + movdqa %xmm1,80(%esp) + roll $5,%ebx + addl %esi,%eax + psrldq $4,%xmm7 + andl %ecx,%ebp + xorl %edx,%ecx + pxor %xmm1,%xmm5 + addl %ebx,%eax + rorl $7,%ebx + pxor %xmm3,%xmm7 + xorl %edx,%ebp + movl %eax,%esi + addl 20(%esp),%edi + pxor %xmm7,%xmm5 + xorl %ecx,%ebx + roll $5,%eax + movdqa %xmm0,(%esp) + addl %ebp,%edi + andl %ebx,%esi + movdqa %xmm5,%xmm1 + xorl %ecx,%ebx + addl %eax,%edi + rorl $7,%eax + movdqa %xmm5,%xmm7 + xorl %ecx,%esi + pslldq $12,%xmm1 + paddd %xmm5,%xmm5 + movl %edi,%ebp + addl 24(%esp),%edx + psrld $31,%xmm7 + xorl %ebx,%eax + roll $5,%edi + movdqa %xmm1,%xmm0 + addl %esi,%edx + andl %eax,%ebp + xorl %ebx,%eax + psrld $30,%xmm1 + addl %edi,%edx + rorl $7,%edi + por %xmm7,%xmm5 + xorl %ebx,%ebp + movl %edx,%esi + addl 28(%esp),%ecx + pslld $2,%xmm0 + xorl %eax,%edi + roll $5,%edx + pxor %xmm1,%xmm5 + movdqa 112(%esp),%xmm1 + addl %ebp,%ecx + andl %edi,%esi + pxor %xmm0,%xmm5 + pshufd $238,%xmm2,%xmm6 + xorl %eax,%edi + addl %edx,%ecx + rorl $7,%edx + xorl %eax,%esi + movl %ecx,%ebp + punpcklqdq %xmm3,%xmm6 + movdqa %xmm5,%xmm0 + addl 32(%esp),%ebx + xorl %edi,%edx + paddd %xmm5,%xmm1 + movdqa %xmm2,96(%esp) + roll $5,%ecx + addl %esi,%ebx + psrldq $4,%xmm0 + andl %edx,%ebp + xorl %edi,%edx + pxor %xmm2,%xmm6 + addl %ecx,%ebx + rorl $7,%ecx + pxor %xmm4,%xmm0 + xorl %edi,%ebp + movl %ebx,%esi + addl 36(%esp),%eax + pxor %xmm0,%xmm6 + xorl %edx,%ecx + roll $5,%ebx + movdqa %xmm1,16(%esp) + addl %ebp,%eax + andl %ecx,%esi + movdqa %xmm6,%xmm2 + xorl %edx,%ecx + addl %ebx,%eax + rorl $7,%ebx + movdqa %xmm6,%xmm0 + xorl %edx,%esi + pslldq $12,%xmm2 + paddd %xmm6,%xmm6 + movl %eax,%ebp + addl 40(%esp),%edi + psrld $31,%xmm0 + xorl %ecx,%ebx + roll $5,%eax + movdqa %xmm2,%xmm1 + addl %esi,%edi + andl %ebx,%ebp + xorl %ecx,%ebx + psrld $30,%xmm2 + addl %eax,%edi + rorl $7,%eax + por %xmm0,%xmm6 + xorl %ecx,%ebp + movdqa 64(%esp),%xmm0 + movl %edi,%esi + addl 44(%esp),%edx + pslld $2,%xmm1 + xorl %ebx,%eax + roll $5,%edi + pxor %xmm2,%xmm6 + movdqa 112(%esp),%xmm2 + addl %ebp,%edx + andl %eax,%esi + pxor %xmm1,%xmm6 + pshufd $238,%xmm3,%xmm7 + xorl %ebx,%eax + addl %edi,%edx + rorl $7,%edi + xorl %ebx,%esi + movl %edx,%ebp + punpcklqdq %xmm4,%xmm7 + movdqa %xmm6,%xmm1 + addl 48(%esp),%ecx + xorl %eax,%edi + paddd %xmm6,%xmm2 + movdqa %xmm3,64(%esp) + roll $5,%edx + addl %esi,%ecx + psrldq $4,%xmm1 + andl %edi,%ebp + xorl %eax,%edi + pxor %xmm3,%xmm7 + addl %edx,%ecx + rorl $7,%edx + pxor %xmm5,%xmm1 + xorl %eax,%ebp + movl %ecx,%esi + addl 52(%esp),%ebx + pxor %xmm1,%xmm7 + xorl %edi,%edx + roll $5,%ecx + movdqa %xmm2,32(%esp) + addl %ebp,%ebx + andl %edx,%esi + movdqa %xmm7,%xmm3 + xorl %edi,%edx + addl %ecx,%ebx + rorl $7,%ecx + movdqa %xmm7,%xmm1 + xorl %edi,%esi + pslldq $12,%xmm3 + paddd %xmm7,%xmm7 + movl %ebx,%ebp + addl 56(%esp),%eax + psrld $31,%xmm1 + xorl %edx,%ecx + roll $5,%ebx + movdqa %xmm3,%xmm2 + addl %esi,%eax + andl %ecx,%ebp + xorl %edx,%ecx + psrld $30,%xmm3 + addl %ebx,%eax + rorl $7,%ebx + por %xmm1,%xmm7 + xorl %edx,%ebp + movdqa 80(%esp),%xmm1 + movl %eax,%esi + addl 60(%esp),%edi + pslld $2,%xmm2 + xorl %ecx,%ebx + roll $5,%eax + pxor %xmm3,%xmm7 + movdqa 112(%esp),%xmm3 + addl %ebp,%edi + andl %ebx,%esi + pxor %xmm2,%xmm7 + pshufd $238,%xmm6,%xmm2 + xorl %ecx,%ebx + addl %eax,%edi + rorl $7,%eax + pxor %xmm4,%xmm0 + punpcklqdq %xmm7,%xmm2 + xorl %ecx,%esi + movl %edi,%ebp + addl (%esp),%edx + pxor %xmm1,%xmm0 + movdqa %xmm4,80(%esp) + xorl %ebx,%eax + roll $5,%edi + movdqa %xmm3,%xmm4 + addl %esi,%edx + paddd %xmm7,%xmm3 + andl %eax,%ebp + pxor %xmm2,%xmm0 + xorl %ebx,%eax + addl %edi,%edx + rorl $7,%edi + xorl %ebx,%ebp + movdqa %xmm0,%xmm2 + movdqa %xmm3,48(%esp) + movl %edx,%esi + addl 4(%esp),%ecx + xorl %eax,%edi + roll $5,%edx + pslld $2,%xmm0 + addl %ebp,%ecx + andl %edi,%esi + psrld $30,%xmm2 + xorl %eax,%edi + addl %edx,%ecx + rorl $7,%edx + xorl %eax,%esi + movl %ecx,%ebp + addl 8(%esp),%ebx + xorl %edi,%edx + roll $5,%ecx + por %xmm2,%xmm0 + addl %esi,%ebx + andl %edx,%ebp + movdqa 96(%esp),%xmm2 + xorl %edi,%edx + addl %ecx,%ebx + addl 12(%esp),%eax + xorl %edi,%ebp + movl %ebx,%esi + pshufd $238,%xmm7,%xmm3 + roll $5,%ebx + addl %ebp,%eax + xorl %edx,%esi + rorl $7,%ecx + addl %ebx,%eax + addl 16(%esp),%edi + pxor %xmm5,%xmm1 + punpcklqdq %xmm0,%xmm3 + xorl %ecx,%esi + movl %eax,%ebp + roll $5,%eax + pxor %xmm2,%xmm1 + movdqa %xmm5,96(%esp) + addl %esi,%edi + xorl %ecx,%ebp + movdqa %xmm4,%xmm5 + rorl $7,%ebx + paddd %xmm0,%xmm4 + addl %eax,%edi + pxor %xmm3,%xmm1 + addl 20(%esp),%edx + xorl %ebx,%ebp + movl %edi,%esi + roll $5,%edi + movdqa %xmm1,%xmm3 + movdqa %xmm4,(%esp) + addl %ebp,%edx + xorl %ebx,%esi + rorl $7,%eax + addl %edi,%edx + pslld $2,%xmm1 + addl 24(%esp),%ecx + xorl %eax,%esi + psrld $30,%xmm3 + movl %edx,%ebp + roll $5,%edx + addl %esi,%ecx + xorl %eax,%ebp + rorl $7,%edi + addl %edx,%ecx + por %xmm3,%xmm1 + addl 28(%esp),%ebx + xorl %edi,%ebp + movdqa 64(%esp),%xmm3 + movl %ecx,%esi + roll $5,%ecx + addl %ebp,%ebx + xorl %edi,%esi + rorl $7,%edx + pshufd $238,%xmm0,%xmm4 + addl %ecx,%ebx + addl 32(%esp),%eax + pxor %xmm6,%xmm2 + punpcklqdq %xmm1,%xmm4 + xorl %edx,%esi + movl %ebx,%ebp + roll $5,%ebx + pxor %xmm3,%xmm2 + movdqa %xmm6,64(%esp) + addl %esi,%eax + xorl %edx,%ebp + movdqa 128(%esp),%xmm6 + rorl $7,%ecx + paddd %xmm1,%xmm5 + addl %ebx,%eax + pxor %xmm4,%xmm2 + addl 36(%esp),%edi + xorl %ecx,%ebp + movl %eax,%esi + roll $5,%eax + movdqa %xmm2,%xmm4 + movdqa %xmm5,16(%esp) + addl %ebp,%edi + xorl %ecx,%esi + rorl $7,%ebx + addl %eax,%edi + pslld $2,%xmm2 + addl 40(%esp),%edx + xorl %ebx,%esi + psrld $30,%xmm4 + movl %edi,%ebp + roll $5,%edi + addl %esi,%edx + xorl %ebx,%ebp + rorl $7,%eax + addl %edi,%edx + por %xmm4,%xmm2 + addl 44(%esp),%ecx + xorl %eax,%ebp + movdqa 80(%esp),%xmm4 + movl %edx,%esi + roll $5,%edx + addl %ebp,%ecx + xorl %eax,%esi + rorl $7,%edi + pshufd $238,%xmm1,%xmm5 + addl %edx,%ecx + addl 48(%esp),%ebx + pxor %xmm7,%xmm3 + punpcklqdq %xmm2,%xmm5 + xorl %edi,%esi + movl %ecx,%ebp + roll $5,%ecx + pxor %xmm4,%xmm3 + movdqa %xmm7,80(%esp) + addl %esi,%ebx + xorl %edi,%ebp + movdqa %xmm6,%xmm7 + rorl $7,%edx + paddd %xmm2,%xmm6 + addl %ecx,%ebx + pxor %xmm5,%xmm3 + addl 52(%esp),%eax + xorl %edx,%ebp + movl %ebx,%esi + roll $5,%ebx + movdqa %xmm3,%xmm5 + movdqa %xmm6,32(%esp) + addl %ebp,%eax + xorl %edx,%esi + rorl $7,%ecx + addl %ebx,%eax + pslld $2,%xmm3 + addl 56(%esp),%edi + xorl %ecx,%esi + psrld $30,%xmm5 + movl %eax,%ebp + roll $5,%eax + addl %esi,%edi + xorl %ecx,%ebp + rorl $7,%ebx + addl %eax,%edi + por %xmm5,%xmm3 + addl 60(%esp),%edx + xorl %ebx,%ebp + movdqa 96(%esp),%xmm5 + movl %edi,%esi + roll $5,%edi + addl %ebp,%edx + xorl %ebx,%esi + rorl $7,%eax + pshufd $238,%xmm2,%xmm6 + addl %edi,%edx + addl (%esp),%ecx + pxor %xmm0,%xmm4 + punpcklqdq %xmm3,%xmm6 + xorl %eax,%esi + movl %edx,%ebp + roll $5,%edx + pxor %xmm5,%xmm4 + movdqa %xmm0,96(%esp) + addl %esi,%ecx + xorl %eax,%ebp + movdqa %xmm7,%xmm0 + rorl $7,%edi + paddd %xmm3,%xmm7 + addl %edx,%ecx + pxor %xmm6,%xmm4 + addl 4(%esp),%ebx + xorl %edi,%ebp + movl %ecx,%esi + roll $5,%ecx + movdqa %xmm4,%xmm6 + movdqa %xmm7,48(%esp) + addl %ebp,%ebx + xorl %edi,%esi + rorl $7,%edx + addl %ecx,%ebx + pslld $2,%xmm4 + addl 8(%esp),%eax + xorl %edx,%esi + psrld $30,%xmm6 + movl %ebx,%ebp + roll $5,%ebx + addl %esi,%eax + xorl %edx,%ebp + rorl $7,%ecx + addl %ebx,%eax + por %xmm6,%xmm4 + addl 12(%esp),%edi + xorl %ecx,%ebp + movdqa 64(%esp),%xmm6 + movl %eax,%esi + roll $5,%eax + addl %ebp,%edi + xorl %ecx,%esi + rorl $7,%ebx + pshufd $238,%xmm3,%xmm7 + addl %eax,%edi + addl 16(%esp),%edx + pxor %xmm1,%xmm5 + punpcklqdq %xmm4,%xmm7 + xorl %ebx,%esi + movl %edi,%ebp + roll $5,%edi + pxor %xmm6,%xmm5 + movdqa %xmm1,64(%esp) + addl %esi,%edx + xorl %ebx,%ebp + movdqa %xmm0,%xmm1 + rorl $7,%eax + paddd %xmm4,%xmm0 + addl %edi,%edx + pxor %xmm7,%xmm5 + addl 20(%esp),%ecx + xorl %eax,%ebp + movl %edx,%esi + roll $5,%edx + movdqa %xmm5,%xmm7 + movdqa %xmm0,(%esp) + addl %ebp,%ecx + xorl %eax,%esi + rorl $7,%edi + addl %edx,%ecx + pslld $2,%xmm5 + addl 24(%esp),%ebx + xorl %edi,%esi + psrld $30,%xmm7 + movl %ecx,%ebp + roll $5,%ecx + addl %esi,%ebx + xorl %edi,%ebp + rorl $7,%edx + addl %ecx,%ebx + por %xmm7,%xmm5 + addl 28(%esp),%eax + movdqa 80(%esp),%xmm7 + rorl $7,%ecx + movl %ebx,%esi + xorl %edx,%ebp + roll $5,%ebx + pshufd $238,%xmm4,%xmm0 + addl %ebp,%eax + xorl %ecx,%esi + xorl %edx,%ecx + addl %ebx,%eax + addl 32(%esp),%edi + pxor %xmm2,%xmm6 + punpcklqdq %xmm5,%xmm0 + andl %ecx,%esi + xorl %edx,%ecx + rorl $7,%ebx + pxor %xmm7,%xmm6 + movdqa %xmm2,80(%esp) + movl %eax,%ebp + xorl %ecx,%esi + roll $5,%eax + movdqa %xmm1,%xmm2 + addl %esi,%edi + paddd %xmm5,%xmm1 + xorl %ebx,%ebp + pxor %xmm0,%xmm6 + xorl %ecx,%ebx + addl %eax,%edi + addl 36(%esp),%edx + andl %ebx,%ebp + movdqa %xmm6,%xmm0 + movdqa %xmm1,16(%esp) + xorl %ecx,%ebx + rorl $7,%eax + movl %edi,%esi + xorl %ebx,%ebp + roll $5,%edi + pslld $2,%xmm6 + addl %ebp,%edx + xorl %eax,%esi + psrld $30,%xmm0 + xorl %ebx,%eax + addl %edi,%edx + addl 40(%esp),%ecx + andl %eax,%esi + xorl %ebx,%eax + rorl $7,%edi + por %xmm0,%xmm6 + movl %edx,%ebp + xorl %eax,%esi + movdqa 96(%esp),%xmm0 + roll $5,%edx + addl %esi,%ecx + xorl %edi,%ebp + xorl %eax,%edi + addl %edx,%ecx + pshufd $238,%xmm5,%xmm1 + addl 44(%esp),%ebx + andl %edi,%ebp + xorl %eax,%edi + rorl $7,%edx + movl %ecx,%esi + xorl %edi,%ebp + roll $5,%ecx + addl %ebp,%ebx + xorl %edx,%esi + xorl %edi,%edx + addl %ecx,%ebx + addl 48(%esp),%eax + pxor %xmm3,%xmm7 + punpcklqdq %xmm6,%xmm1 + andl %edx,%esi + xorl %edi,%edx + rorl $7,%ecx + pxor %xmm0,%xmm7 + movdqa %xmm3,96(%esp) + movl %ebx,%ebp + xorl %edx,%esi + roll $5,%ebx + movdqa 144(%esp),%xmm3 + addl %esi,%eax + paddd %xmm6,%xmm2 + xorl %ecx,%ebp + pxor %xmm1,%xmm7 + xorl %edx,%ecx + addl %ebx,%eax + addl 52(%esp),%edi + andl %ecx,%ebp + movdqa %xmm7,%xmm1 + movdqa %xmm2,32(%esp) + xorl %edx,%ecx + rorl $7,%ebx + movl %eax,%esi + xorl %ecx,%ebp + roll $5,%eax + pslld $2,%xmm7 + addl %ebp,%edi + xorl %ebx,%esi + psrld $30,%xmm1 + xorl %ecx,%ebx + addl %eax,%edi + addl 56(%esp),%edx + andl %ebx,%esi + xorl %ecx,%ebx + rorl $7,%eax + por %xmm1,%xmm7 + movl %edi,%ebp + xorl %ebx,%esi + movdqa 64(%esp),%xmm1 + roll $5,%edi + addl %esi,%edx + xorl %eax,%ebp + xorl %ebx,%eax + addl %edi,%edx + pshufd $238,%xmm6,%xmm2 + addl 60(%esp),%ecx + andl %eax,%ebp + xorl %ebx,%eax + rorl $7,%edi + movl %edx,%esi + xorl %eax,%ebp + roll $5,%edx + addl %ebp,%ecx + xorl %edi,%esi + xorl %eax,%edi + addl %edx,%ecx + addl (%esp),%ebx + pxor %xmm4,%xmm0 + punpcklqdq %xmm7,%xmm2 + andl %edi,%esi + xorl %eax,%edi + rorl $7,%edx + pxor %xmm1,%xmm0 + movdqa %xmm4,64(%esp) + movl %ecx,%ebp + xorl %edi,%esi + roll $5,%ecx + movdqa %xmm3,%xmm4 + addl %esi,%ebx + paddd %xmm7,%xmm3 + xorl %edx,%ebp + pxor %xmm2,%xmm0 + xorl %edi,%edx + addl %ecx,%ebx + addl 4(%esp),%eax + andl %edx,%ebp + movdqa %xmm0,%xmm2 + movdqa %xmm3,48(%esp) + xorl %edi,%edx + rorl $7,%ecx + movl %ebx,%esi + xorl %edx,%ebp + roll $5,%ebx + pslld $2,%xmm0 + addl %ebp,%eax + xorl %ecx,%esi + psrld $30,%xmm2 + xorl %edx,%ecx + addl %ebx,%eax + addl 8(%esp),%edi + andl %ecx,%esi + xorl %edx,%ecx + rorl $7,%ebx + por %xmm2,%xmm0 + movl %eax,%ebp + xorl %ecx,%esi + movdqa 80(%esp),%xmm2 + roll $5,%eax + addl %esi,%edi + xorl %ebx,%ebp + xorl %ecx,%ebx + addl %eax,%edi + pshufd $238,%xmm7,%xmm3 + addl 12(%esp),%edx + andl %ebx,%ebp + xorl %ecx,%ebx + rorl $7,%eax + movl %edi,%esi + xorl %ebx,%ebp + roll $5,%edi + addl %ebp,%edx + xorl %eax,%esi + xorl %ebx,%eax + addl %edi,%edx + addl 16(%esp),%ecx + pxor %xmm5,%xmm1 + punpcklqdq %xmm0,%xmm3 + andl %eax,%esi + xorl %ebx,%eax + rorl $7,%edi + pxor %xmm2,%xmm1 + movdqa %xmm5,80(%esp) + movl %edx,%ebp + xorl %eax,%esi + roll $5,%edx + movdqa %xmm4,%xmm5 + addl %esi,%ecx + paddd %xmm0,%xmm4 + xorl %edi,%ebp + pxor %xmm3,%xmm1 + xorl %eax,%edi + addl %edx,%ecx + addl 20(%esp),%ebx + andl %edi,%ebp + movdqa %xmm1,%xmm3 + movdqa %xmm4,(%esp) + xorl %eax,%edi + rorl $7,%edx + movl %ecx,%esi + xorl %edi,%ebp + roll $5,%ecx + pslld $2,%xmm1 + addl %ebp,%ebx + xorl %edx,%esi + psrld $30,%xmm3 + xorl %edi,%edx + addl %ecx,%ebx + addl 24(%esp),%eax + andl %edx,%esi + xorl %edi,%edx + rorl $7,%ecx + por %xmm3,%xmm1 + movl %ebx,%ebp + xorl %edx,%esi + movdqa 96(%esp),%xmm3 + roll $5,%ebx + addl %esi,%eax + xorl %ecx,%ebp + xorl %edx,%ecx + addl %ebx,%eax + pshufd $238,%xmm0,%xmm4 + addl 28(%esp),%edi + andl %ecx,%ebp + xorl %edx,%ecx + rorl $7,%ebx + movl %eax,%esi + xorl %ecx,%ebp + roll $5,%eax + addl %ebp,%edi + xorl %ebx,%esi + xorl %ecx,%ebx + addl %eax,%edi + addl 32(%esp),%edx + pxor %xmm6,%xmm2 + punpcklqdq %xmm1,%xmm4 + andl %ebx,%esi + xorl %ecx,%ebx + rorl $7,%eax + pxor %xmm3,%xmm2 + movdqa %xmm6,96(%esp) + movl %edi,%ebp + xorl %ebx,%esi + roll $5,%edi + movdqa %xmm5,%xmm6 + addl %esi,%edx + paddd %xmm1,%xmm5 + xorl %eax,%ebp + pxor %xmm4,%xmm2 + xorl %ebx,%eax + addl %edi,%edx + addl 36(%esp),%ecx + andl %eax,%ebp + movdqa %xmm2,%xmm4 + movdqa %xmm5,16(%esp) + xorl %ebx,%eax + rorl $7,%edi + movl %edx,%esi + xorl %eax,%ebp + roll $5,%edx + pslld $2,%xmm2 + addl %ebp,%ecx + xorl %edi,%esi + psrld $30,%xmm4 + xorl %eax,%edi + addl %edx,%ecx + addl 40(%esp),%ebx + andl %edi,%esi + xorl %eax,%edi + rorl $7,%edx + por %xmm4,%xmm2 + movl %ecx,%ebp + xorl %edi,%esi + movdqa 64(%esp),%xmm4 + roll $5,%ecx + addl %esi,%ebx + xorl %edx,%ebp + xorl %edi,%edx + addl %ecx,%ebx + pshufd $238,%xmm1,%xmm5 + addl 44(%esp),%eax + andl %edx,%ebp + xorl %edi,%edx + rorl $7,%ecx + movl %ebx,%esi + xorl %edx,%ebp + roll $5,%ebx + addl %ebp,%eax + xorl %edx,%esi + addl %ebx,%eax + addl 48(%esp),%edi + pxor %xmm7,%xmm3 + punpcklqdq %xmm2,%xmm5 + xorl %ecx,%esi + movl %eax,%ebp + roll $5,%eax + pxor %xmm4,%xmm3 + movdqa %xmm7,64(%esp) + addl %esi,%edi + xorl %ecx,%ebp + movdqa %xmm6,%xmm7 + rorl $7,%ebx + paddd %xmm2,%xmm6 + addl %eax,%edi + pxor %xmm5,%xmm3 + addl 52(%esp),%edx + xorl %ebx,%ebp + movl %edi,%esi + roll $5,%edi + movdqa %xmm3,%xmm5 + movdqa %xmm6,32(%esp) + addl %ebp,%edx + xorl %ebx,%esi + rorl $7,%eax + addl %edi,%edx + pslld $2,%xmm3 + addl 56(%esp),%ecx + xorl %eax,%esi + psrld $30,%xmm5 + movl %edx,%ebp + roll $5,%edx + addl %esi,%ecx + xorl %eax,%ebp + rorl $7,%edi + addl %edx,%ecx + por %xmm5,%xmm3 + addl 60(%esp),%ebx + xorl %edi,%ebp + movl %ecx,%esi + roll $5,%ecx + addl %ebp,%ebx + xorl %edi,%esi + rorl $7,%edx + addl %ecx,%ebx + addl (%esp),%eax + xorl %edx,%esi + movl %ebx,%ebp + roll $5,%ebx + addl %esi,%eax + xorl %edx,%ebp + rorl $7,%ecx + paddd %xmm3,%xmm7 + addl %ebx,%eax + addl 4(%esp),%edi + xorl %ecx,%ebp + movl %eax,%esi + movdqa %xmm7,48(%esp) + roll $5,%eax + addl %ebp,%edi + xorl %ecx,%esi + rorl $7,%ebx + addl %eax,%edi + addl 8(%esp),%edx + xorl %ebx,%esi + movl %edi,%ebp + roll $5,%edi + addl %esi,%edx + xorl %ebx,%ebp + rorl $7,%eax + addl %edi,%edx + addl 12(%esp),%ecx + xorl %eax,%ebp + movl %edx,%esi + roll $5,%edx + addl %ebp,%ecx + xorl %eax,%esi + rorl $7,%edi + addl %edx,%ecx + movl 196(%esp),%ebp + cmpl 200(%esp),%ebp + je .L007done + movdqa 160(%esp),%xmm7 + movdqa 176(%esp),%xmm6 + movdqu (%ebp),%xmm0 + movdqu 16(%ebp),%xmm1 + movdqu 32(%ebp),%xmm2 + movdqu 48(%ebp),%xmm3 + addl $64,%ebp +.byte 102,15,56,0,198 + movl %ebp,196(%esp) + movdqa %xmm7,96(%esp) + addl 16(%esp),%ebx + xorl %edi,%esi + movl %ecx,%ebp + roll $5,%ecx + addl %esi,%ebx + xorl %edi,%ebp + rorl $7,%edx +.byte 102,15,56,0,206 + addl %ecx,%ebx + addl 20(%esp),%eax + xorl %edx,%ebp + movl %ebx,%esi + paddd %xmm7,%xmm0 + roll $5,%ebx + addl %ebp,%eax + xorl %edx,%esi + rorl $7,%ecx + movdqa %xmm0,(%esp) + addl %ebx,%eax + addl 24(%esp),%edi + xorl %ecx,%esi + movl %eax,%ebp + psubd %xmm7,%xmm0 + roll $5,%eax + addl %esi,%edi + xorl %ecx,%ebp + rorl $7,%ebx + addl %eax,%edi + addl 28(%esp),%edx + xorl %ebx,%ebp + movl %edi,%esi + roll $5,%edi + addl %ebp,%edx + xorl %ebx,%esi + rorl $7,%eax + addl %edi,%edx + addl 32(%esp),%ecx + xorl %eax,%esi + movl %edx,%ebp + roll $5,%edx + addl %esi,%ecx + xorl %eax,%ebp + rorl $7,%edi +.byte 102,15,56,0,214 + addl %edx,%ecx + addl 36(%esp),%ebx + xorl %edi,%ebp + movl %ecx,%esi + paddd %xmm7,%xmm1 + roll $5,%ecx + addl %ebp,%ebx + xorl %edi,%esi + rorl $7,%edx + movdqa %xmm1,16(%esp) + addl %ecx,%ebx + addl 40(%esp),%eax + xorl %edx,%esi + movl %ebx,%ebp + psubd %xmm7,%xmm1 + roll $5,%ebx + addl %esi,%eax + xorl %edx,%ebp + rorl $7,%ecx + addl %ebx,%eax + addl 44(%esp),%edi + xorl %ecx,%ebp + movl %eax,%esi + roll $5,%eax + addl %ebp,%edi + xorl %ecx,%esi + rorl $7,%ebx + addl %eax,%edi + addl 48(%esp),%edx + xorl %ebx,%esi + movl %edi,%ebp + roll $5,%edi + addl %esi,%edx + xorl %ebx,%ebp + rorl $7,%eax +.byte 102,15,56,0,222 + addl %edi,%edx + addl 52(%esp),%ecx + xorl %eax,%ebp + movl %edx,%esi + paddd %xmm7,%xmm2 + roll $5,%edx + addl %ebp,%ecx + xorl %eax,%esi + rorl $7,%edi + movdqa %xmm2,32(%esp) + addl %edx,%ecx + addl 56(%esp),%ebx + xorl %edi,%esi + movl %ecx,%ebp + psubd %xmm7,%xmm2 + roll $5,%ecx + addl %esi,%ebx + xorl %edi,%ebp + rorl $7,%edx + addl %ecx,%ebx + addl 60(%esp),%eax + xorl %edx,%ebp + movl %ebx,%esi + roll $5,%ebx + addl %ebp,%eax + rorl $7,%ecx + addl %ebx,%eax + movl 192(%esp),%ebp + addl (%ebp),%eax + addl 4(%ebp),%esi + addl 8(%ebp),%ecx + movl %eax,(%ebp) + addl 12(%ebp),%edx + movl %esi,4(%ebp) + addl 16(%ebp),%edi + movl %ecx,8(%ebp) + movl %ecx,%ebx + movl %edx,12(%ebp) + xorl %edx,%ebx + movl %edi,16(%ebp) + movl %esi,%ebp + pshufd $238,%xmm0,%xmm4 + andl %ebx,%esi + movl %ebp,%ebx + jmp .L006loop +.align 16 +.L007done: + addl 16(%esp),%ebx + xorl %edi,%esi + movl %ecx,%ebp + roll $5,%ecx + addl %esi,%ebx + xorl %edi,%ebp + rorl $7,%edx + addl %ecx,%ebx + addl 20(%esp),%eax + xorl %edx,%ebp + movl %ebx,%esi + roll $5,%ebx + addl %ebp,%eax + xorl %edx,%esi + rorl $7,%ecx + addl %ebx,%eax + addl 24(%esp),%edi + xorl %ecx,%esi + movl %eax,%ebp + roll $5,%eax + addl %esi,%edi + xorl %ecx,%ebp + rorl $7,%ebx + addl %eax,%edi + addl 28(%esp),%edx + xorl %ebx,%ebp + movl %edi,%esi + roll $5,%edi + addl %ebp,%edx + xorl %ebx,%esi + rorl $7,%eax + addl %edi,%edx + addl 32(%esp),%ecx + xorl %eax,%esi + movl %edx,%ebp + roll $5,%edx + addl %esi,%ecx + xorl %eax,%ebp + rorl $7,%edi + addl %edx,%ecx + addl 36(%esp),%ebx + xorl %edi,%ebp + movl %ecx,%esi + roll $5,%ecx + addl %ebp,%ebx + xorl %edi,%esi + rorl $7,%edx + addl %ecx,%ebx + addl 40(%esp),%eax + xorl %edx,%esi + movl %ebx,%ebp + roll $5,%ebx + addl %esi,%eax + xorl %edx,%ebp + rorl $7,%ecx + addl %ebx,%eax + addl 44(%esp),%edi + xorl %ecx,%ebp + movl %eax,%esi + roll $5,%eax + addl %ebp,%edi + xorl %ecx,%esi + rorl $7,%ebx + addl %eax,%edi + addl 48(%esp),%edx + xorl %ebx,%esi + movl %edi,%ebp + roll $5,%edi + addl %esi,%edx + xorl %ebx,%ebp + rorl $7,%eax + addl %edi,%edx + addl 52(%esp),%ecx + xorl %eax,%ebp + movl %edx,%esi + roll $5,%edx + addl %ebp,%ecx + xorl %eax,%esi + rorl $7,%edi + addl %edx,%ecx + addl 56(%esp),%ebx + xorl %edi,%esi + movl %ecx,%ebp + roll $5,%ecx + addl %esi,%ebx + xorl %edi,%ebp + rorl $7,%edx + addl %ecx,%ebx + addl 60(%esp),%eax + xorl %edx,%ebp + movl %ebx,%esi + roll $5,%ebx + addl %ebp,%eax + rorl $7,%ecx + addl %ebx,%eax + movl 192(%esp),%ebp + addl (%ebp),%eax + movl 204(%esp),%esp + addl 4(%ebp),%esi + addl 8(%ebp),%ecx + movl %eax,(%ebp) + addl 12(%ebp),%edx + movl %esi,4(%ebp) + addl 16(%ebp),%edi + movl %ecx,8(%ebp) + movl %edx,12(%ebp) + movl %edi,16(%ebp) + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.size _sha1_block_data_order_ssse3,.-_sha1_block_data_order_ssse3 +.align 64 +.LK_XX_XX: +.long 1518500249,1518500249,1518500249,1518500249 +.long 1859775393,1859775393,1859775393,1859775393 +.long 2400959708,2400959708,2400959708,2400959708 +.long 3395469782,3395469782,3395469782,3395469782 +.long 66051,67438087,134810123,202182159 +.byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0 +.byte 83,72,65,49,32,98,108,111,99,107,32,116,114,97,110,115 +.byte 102,111,114,109,32,102,111,114,32,120,56,54,44,32,67,82 +.byte 89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112 +.byte 114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 +.comm OPENSSL_ia32cap_P,16,4 diff --git a/deps/openssl/asm_obsolete/x86-elf-gas/sha/sha256-586.s b/deps/openssl/asm_obsolete/x86-elf-gas/sha/sha256-586.s new file mode 100644 index 00000000000000..836d91886bda53 --- /dev/null +++ b/deps/openssl/asm_obsolete/x86-elf-gas/sha/sha256-586.s @@ -0,0 +1,4577 @@ +.file "sha512-586.s" +.text +.globl sha256_block_data_order +.type sha256_block_data_order,@function +.align 16 +sha256_block_data_order: +.L_sha256_block_data_order_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 20(%esp),%esi + movl 24(%esp),%edi + movl 28(%esp),%eax + movl %esp,%ebx + call .L000pic_point +.L000pic_point: + popl %ebp + leal .L001K256-.L000pic_point(%ebp),%ebp + subl $16,%esp + andl $-64,%esp + shll $6,%eax + addl %edi,%eax + movl %esi,(%esp) + movl %edi,4(%esp) + movl %eax,8(%esp) + movl %ebx,12(%esp) + leal OPENSSL_ia32cap_P,%edx + movl (%edx),%ecx + movl 4(%edx),%ebx + testl $1048576,%ecx + jnz .L002loop + movl 8(%edx),%edx + testl $16777216,%ecx + jz .L003no_xmm + andl $1073741824,%ecx + andl $268435968,%ebx + testl $536870912,%edx + jnz .L004shaext + orl %ebx,%ecx + andl $1342177280,%ecx + cmpl $1342177280,%ecx + testl $512,%ebx + jnz .L005SSSE3 +.L003no_xmm: + subl %edi,%eax + cmpl $256,%eax + jae .L006unrolled + jmp .L002loop +.align 16 +.L002loop: + movl (%edi),%eax + movl 4(%edi),%ebx + movl 8(%edi),%ecx + bswap %eax + movl 12(%edi),%edx + bswap %ebx + pushl %eax + bswap %ecx + pushl %ebx + bswap %edx + pushl %ecx + pushl %edx + movl 16(%edi),%eax + movl 20(%edi),%ebx + movl 24(%edi),%ecx + bswap %eax + movl 28(%edi),%edx + bswap %ebx + pushl %eax + bswap %ecx + pushl %ebx + bswap %edx + pushl %ecx + pushl %edx + movl 32(%edi),%eax + movl 36(%edi),%ebx + movl 40(%edi),%ecx + bswap %eax + movl 44(%edi),%edx + bswap %ebx + pushl %eax + bswap %ecx + pushl %ebx + bswap %edx + pushl %ecx + pushl %edx + movl 48(%edi),%eax + movl 52(%edi),%ebx + movl 56(%edi),%ecx + bswap %eax + movl 60(%edi),%edx + bswap %ebx + pushl %eax + bswap %ecx + pushl %ebx + bswap %edx + pushl %ecx + pushl %edx + addl $64,%edi + leal -36(%esp),%esp + movl %edi,104(%esp) + movl (%esi),%eax + movl 4(%esi),%ebx + movl 8(%esi),%ecx + movl 12(%esi),%edi + movl %ebx,8(%esp) + xorl %ecx,%ebx + movl %ecx,12(%esp) + movl %edi,16(%esp) + movl %ebx,(%esp) + movl 16(%esi),%edx + movl 20(%esi),%ebx + movl 24(%esi),%ecx + movl 28(%esi),%edi + movl %ebx,24(%esp) + movl %ecx,28(%esp) + movl %edi,32(%esp) +.align 16 +.L00700_15: + movl %edx,%ecx + movl 24(%esp),%esi + rorl $14,%ecx + movl 28(%esp),%edi + xorl %edx,%ecx + xorl %edi,%esi + movl 96(%esp),%ebx + rorl $5,%ecx + andl %edx,%esi + movl %edx,20(%esp) + xorl %ecx,%edx + addl 32(%esp),%ebx + xorl %edi,%esi + rorl $6,%edx + movl %eax,%ecx + addl %esi,%ebx + rorl $9,%ecx + addl %edx,%ebx + movl 8(%esp),%edi + xorl %eax,%ecx + movl %eax,4(%esp) + leal -4(%esp),%esp + rorl $11,%ecx + movl (%ebp),%esi + xorl %eax,%ecx + movl 20(%esp),%edx + xorl %edi,%eax + rorl $2,%ecx + addl %esi,%ebx + movl %eax,(%esp) + addl %ebx,%edx + andl 4(%esp),%eax + addl %ecx,%ebx + xorl %edi,%eax + addl $4,%ebp + addl %ebx,%eax + cmpl $3248222580,%esi + jne .L00700_15 + movl 156(%esp),%ecx + jmp .L00816_63 +.align 16 +.L00816_63: + movl %ecx,%ebx + movl 104(%esp),%esi + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 160(%esp),%ebx + shrl $10,%edi + addl 124(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 24(%esp),%esi + rorl $14,%ecx + addl %edi,%ebx + movl 28(%esp),%edi + xorl %edx,%ecx + xorl %edi,%esi + movl %ebx,96(%esp) + rorl $5,%ecx + andl %edx,%esi + movl %edx,20(%esp) + xorl %ecx,%edx + addl 32(%esp),%ebx + xorl %edi,%esi + rorl $6,%edx + movl %eax,%ecx + addl %esi,%ebx + rorl $9,%ecx + addl %edx,%ebx + movl 8(%esp),%edi + xorl %eax,%ecx + movl %eax,4(%esp) + leal -4(%esp),%esp + rorl $11,%ecx + movl (%ebp),%esi + xorl %eax,%ecx + movl 20(%esp),%edx + xorl %edi,%eax + rorl $2,%ecx + addl %esi,%ebx + movl %eax,(%esp) + addl %ebx,%edx + andl 4(%esp),%eax + addl %ecx,%ebx + xorl %edi,%eax + movl 156(%esp),%ecx + addl $4,%ebp + addl %ebx,%eax + cmpl $3329325298,%esi + jne .L00816_63 + movl 356(%esp),%esi + movl 8(%esp),%ebx + movl 16(%esp),%ecx + addl (%esi),%eax + addl 4(%esi),%ebx + addl 8(%esi),%edi + addl 12(%esi),%ecx + movl %eax,(%esi) + movl %ebx,4(%esi) + movl %edi,8(%esi) + movl %ecx,12(%esi) + movl 24(%esp),%eax + movl 28(%esp),%ebx + movl 32(%esp),%ecx + movl 360(%esp),%edi + addl 16(%esi),%edx + addl 20(%esi),%eax + addl 24(%esi),%ebx + addl 28(%esi),%ecx + movl %edx,16(%esi) + movl %eax,20(%esi) + movl %ebx,24(%esi) + movl %ecx,28(%esi) + leal 356(%esp),%esp + subl $256,%ebp + cmpl 8(%esp),%edi + jb .L002loop + movl 12(%esp),%esp + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.align 64 +.L001K256: +.long 1116352408,1899447441,3049323471,3921009573,961987163,1508970993,2453635748,2870763221,3624381080,310598401,607225278,1426881987,1925078388,2162078206,2614888103,3248222580,3835390401,4022224774,264347078,604807628,770255983,1249150122,1555081692,1996064986,2554220882,2821834349,2952996808,3210313671,3336571891,3584528711,113926993,338241895,666307205,773529912,1294757372,1396182291,1695183700,1986661051,2177026350,2456956037,2730485921,2820302411,3259730800,3345764771,3516065817,3600352804,4094571909,275423344,430227734,506948616,659060556,883997877,958139571,1322822218,1537002063,1747873779,1955562222,2024104815,2227730452,2361852424,2428436474,2756734187,3204031479,3329325298 +.long 66051,67438087,134810123,202182159 +.byte 83,72,65,50,53,54,32,98,108,111,99,107,32,116,114,97 +.byte 110,115,102,111,114,109,32,102,111,114,32,120,56,54,44,32 +.byte 67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97 +.byte 112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103 +.byte 62,0 +.align 16 +.L006unrolled: + leal -96(%esp),%esp + movl (%esi),%eax + movl 4(%esi),%ebp + movl 8(%esi),%ecx + movl 12(%esi),%ebx + movl %ebp,4(%esp) + xorl %ecx,%ebp + movl %ecx,8(%esp) + movl %ebx,12(%esp) + movl 16(%esi),%edx + movl 20(%esi),%ebx + movl 24(%esi),%ecx + movl 28(%esi),%esi + movl %ebx,20(%esp) + movl %ecx,24(%esp) + movl %esi,28(%esp) + jmp .L009grand_loop +.align 16 +.L009grand_loop: + movl (%edi),%ebx + movl 4(%edi),%ecx + bswap %ebx + movl 8(%edi),%esi + bswap %ecx + movl %ebx,32(%esp) + bswap %esi + movl %ecx,36(%esp) + movl %esi,40(%esp) + movl 12(%edi),%ebx + movl 16(%edi),%ecx + bswap %ebx + movl 20(%edi),%esi + bswap %ecx + movl %ebx,44(%esp) + bswap %esi + movl %ecx,48(%esp) + movl %esi,52(%esp) + movl 24(%edi),%ebx + movl 28(%edi),%ecx + bswap %ebx + movl 32(%edi),%esi + bswap %ecx + movl %ebx,56(%esp) + bswap %esi + movl %ecx,60(%esp) + movl %esi,64(%esp) + movl 36(%edi),%ebx + movl 40(%edi),%ecx + bswap %ebx + movl 44(%edi),%esi + bswap %ecx + movl %ebx,68(%esp) + bswap %esi + movl %ecx,72(%esp) + movl %esi,76(%esp) + movl 48(%edi),%ebx + movl 52(%edi),%ecx + bswap %ebx + movl 56(%edi),%esi + bswap %ecx + movl %ebx,80(%esp) + bswap %esi + movl %ecx,84(%esp) + movl %esi,88(%esp) + movl 60(%edi),%ebx + addl $64,%edi + bswap %ebx + movl %edi,100(%esp) + movl %ebx,92(%esp) + movl %edx,%ecx + movl 20(%esp),%esi + rorl $14,%edx + movl 24(%esp),%edi + xorl %ecx,%edx + movl 32(%esp),%ebx + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,16(%esp) + xorl %ecx,%edx + addl 28(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 4(%esp),%edi + xorl %eax,%ecx + movl %eax,(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 1116352408(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + rorl $2,%ecx + addl %edx,%ebp + addl 12(%esp),%edx + addl %ecx,%ebp + movl %edx,%esi + movl 16(%esp),%ecx + rorl $14,%edx + movl 20(%esp),%edi + xorl %esi,%edx + movl 36(%esp),%ebx + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,12(%esp) + xorl %esi,%edx + addl 24(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl (%esp),%edi + xorl %ebp,%esi + movl %ebp,28(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 1899447441(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + rorl $2,%esi + addl %edx,%eax + addl 8(%esp),%edx + addl %esi,%eax + movl %edx,%ecx + movl 12(%esp),%esi + rorl $14,%edx + movl 16(%esp),%edi + xorl %ecx,%edx + movl 40(%esp),%ebx + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,8(%esp) + xorl %ecx,%edx + addl 20(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 28(%esp),%edi + xorl %eax,%ecx + movl %eax,24(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 3049323471(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + rorl $2,%ecx + addl %edx,%ebp + addl 4(%esp),%edx + addl %ecx,%ebp + movl %edx,%esi + movl 8(%esp),%ecx + rorl $14,%edx + movl 12(%esp),%edi + xorl %esi,%edx + movl 44(%esp),%ebx + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,4(%esp) + xorl %esi,%edx + addl 16(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 24(%esp),%edi + xorl %ebp,%esi + movl %ebp,20(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 3921009573(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + rorl $2,%esi + addl %edx,%eax + addl (%esp),%edx + addl %esi,%eax + movl %edx,%ecx + movl 4(%esp),%esi + rorl $14,%edx + movl 8(%esp),%edi + xorl %ecx,%edx + movl 48(%esp),%ebx + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,(%esp) + xorl %ecx,%edx + addl 12(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 20(%esp),%edi + xorl %eax,%ecx + movl %eax,16(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 961987163(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + rorl $2,%ecx + addl %edx,%ebp + addl 28(%esp),%edx + addl %ecx,%ebp + movl %edx,%esi + movl (%esp),%ecx + rorl $14,%edx + movl 4(%esp),%edi + xorl %esi,%edx + movl 52(%esp),%ebx + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,28(%esp) + xorl %esi,%edx + addl 8(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 16(%esp),%edi + xorl %ebp,%esi + movl %ebp,12(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 1508970993(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + rorl $2,%esi + addl %edx,%eax + addl 24(%esp),%edx + addl %esi,%eax + movl %edx,%ecx + movl 28(%esp),%esi + rorl $14,%edx + movl (%esp),%edi + xorl %ecx,%edx + movl 56(%esp),%ebx + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,24(%esp) + xorl %ecx,%edx + addl 4(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 12(%esp),%edi + xorl %eax,%ecx + movl %eax,8(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 2453635748(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + rorl $2,%ecx + addl %edx,%ebp + addl 20(%esp),%edx + addl %ecx,%ebp + movl %edx,%esi + movl 24(%esp),%ecx + rorl $14,%edx + movl 28(%esp),%edi + xorl %esi,%edx + movl 60(%esp),%ebx + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,20(%esp) + xorl %esi,%edx + addl (%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 8(%esp),%edi + xorl %ebp,%esi + movl %ebp,4(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 2870763221(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + rorl $2,%esi + addl %edx,%eax + addl 16(%esp),%edx + addl %esi,%eax + movl %edx,%ecx + movl 20(%esp),%esi + rorl $14,%edx + movl 24(%esp),%edi + xorl %ecx,%edx + movl 64(%esp),%ebx + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,16(%esp) + xorl %ecx,%edx + addl 28(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 4(%esp),%edi + xorl %eax,%ecx + movl %eax,(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 3624381080(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + rorl $2,%ecx + addl %edx,%ebp + addl 12(%esp),%edx + addl %ecx,%ebp + movl %edx,%esi + movl 16(%esp),%ecx + rorl $14,%edx + movl 20(%esp),%edi + xorl %esi,%edx + movl 68(%esp),%ebx + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,12(%esp) + xorl %esi,%edx + addl 24(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl (%esp),%edi + xorl %ebp,%esi + movl %ebp,28(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 310598401(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + rorl $2,%esi + addl %edx,%eax + addl 8(%esp),%edx + addl %esi,%eax + movl %edx,%ecx + movl 12(%esp),%esi + rorl $14,%edx + movl 16(%esp),%edi + xorl %ecx,%edx + movl 72(%esp),%ebx + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,8(%esp) + xorl %ecx,%edx + addl 20(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 28(%esp),%edi + xorl %eax,%ecx + movl %eax,24(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 607225278(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + rorl $2,%ecx + addl %edx,%ebp + addl 4(%esp),%edx + addl %ecx,%ebp + movl %edx,%esi + movl 8(%esp),%ecx + rorl $14,%edx + movl 12(%esp),%edi + xorl %esi,%edx + movl 76(%esp),%ebx + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,4(%esp) + xorl %esi,%edx + addl 16(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 24(%esp),%edi + xorl %ebp,%esi + movl %ebp,20(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 1426881987(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + rorl $2,%esi + addl %edx,%eax + addl (%esp),%edx + addl %esi,%eax + movl %edx,%ecx + movl 4(%esp),%esi + rorl $14,%edx + movl 8(%esp),%edi + xorl %ecx,%edx + movl 80(%esp),%ebx + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,(%esp) + xorl %ecx,%edx + addl 12(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 20(%esp),%edi + xorl %eax,%ecx + movl %eax,16(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 1925078388(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + rorl $2,%ecx + addl %edx,%ebp + addl 28(%esp),%edx + addl %ecx,%ebp + movl %edx,%esi + movl (%esp),%ecx + rorl $14,%edx + movl 4(%esp),%edi + xorl %esi,%edx + movl 84(%esp),%ebx + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,28(%esp) + xorl %esi,%edx + addl 8(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 16(%esp),%edi + xorl %ebp,%esi + movl %ebp,12(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 2162078206(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + rorl $2,%esi + addl %edx,%eax + addl 24(%esp),%edx + addl %esi,%eax + movl %edx,%ecx + movl 28(%esp),%esi + rorl $14,%edx + movl (%esp),%edi + xorl %ecx,%edx + movl 88(%esp),%ebx + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,24(%esp) + xorl %ecx,%edx + addl 4(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 12(%esp),%edi + xorl %eax,%ecx + movl %eax,8(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 2614888103(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + rorl $2,%ecx + addl %edx,%ebp + addl 20(%esp),%edx + addl %ecx,%ebp + movl %edx,%esi + movl 24(%esp),%ecx + rorl $14,%edx + movl 28(%esp),%edi + xorl %esi,%edx + movl 92(%esp),%ebx + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,20(%esp) + xorl %esi,%edx + addl (%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 8(%esp),%edi + xorl %ebp,%esi + movl %ebp,4(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 3248222580(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 36(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl 16(%esp),%edx + addl %esi,%eax + movl 88(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 32(%esp),%ebx + shrl $10,%edi + addl 68(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 20(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl 24(%esp),%edi + xorl %ecx,%edx + movl %ebx,32(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,16(%esp) + xorl %ecx,%edx + addl 28(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 4(%esp),%edi + xorl %eax,%ecx + movl %eax,(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 3835390401(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 40(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 12(%esp),%edx + addl %ecx,%ebp + movl 92(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 36(%esp),%ebx + shrl $10,%edi + addl 72(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl 16(%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 20(%esp),%edi + xorl %esi,%edx + movl %ebx,36(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,12(%esp) + xorl %esi,%edx + addl 24(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl (%esp),%edi + xorl %ebp,%esi + movl %ebp,28(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 4022224774(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 44(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl 8(%esp),%edx + addl %esi,%eax + movl 32(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 40(%esp),%ebx + shrl $10,%edi + addl 76(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 12(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl 16(%esp),%edi + xorl %ecx,%edx + movl %ebx,40(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,8(%esp) + xorl %ecx,%edx + addl 20(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 28(%esp),%edi + xorl %eax,%ecx + movl %eax,24(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 264347078(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 48(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 4(%esp),%edx + addl %ecx,%ebp + movl 36(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 44(%esp),%ebx + shrl $10,%edi + addl 80(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl 8(%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 12(%esp),%edi + xorl %esi,%edx + movl %ebx,44(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,4(%esp) + xorl %esi,%edx + addl 16(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 24(%esp),%edi + xorl %ebp,%esi + movl %ebp,20(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 604807628(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 52(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl (%esp),%edx + addl %esi,%eax + movl 40(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 48(%esp),%ebx + shrl $10,%edi + addl 84(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 4(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl 8(%esp),%edi + xorl %ecx,%edx + movl %ebx,48(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,(%esp) + xorl %ecx,%edx + addl 12(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 20(%esp),%edi + xorl %eax,%ecx + movl %eax,16(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 770255983(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 56(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 28(%esp),%edx + addl %ecx,%ebp + movl 44(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 52(%esp),%ebx + shrl $10,%edi + addl 88(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl (%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 4(%esp),%edi + xorl %esi,%edx + movl %ebx,52(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,28(%esp) + xorl %esi,%edx + addl 8(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 16(%esp),%edi + xorl %ebp,%esi + movl %ebp,12(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 1249150122(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 60(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl 24(%esp),%edx + addl %esi,%eax + movl 48(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 56(%esp),%ebx + shrl $10,%edi + addl 92(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 28(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl (%esp),%edi + xorl %ecx,%edx + movl %ebx,56(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,24(%esp) + xorl %ecx,%edx + addl 4(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 12(%esp),%edi + xorl %eax,%ecx + movl %eax,8(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 1555081692(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 64(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 20(%esp),%edx + addl %ecx,%ebp + movl 52(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 60(%esp),%ebx + shrl $10,%edi + addl 32(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl 24(%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 28(%esp),%edi + xorl %esi,%edx + movl %ebx,60(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,20(%esp) + xorl %esi,%edx + addl (%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 8(%esp),%edi + xorl %ebp,%esi + movl %ebp,4(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 1996064986(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 68(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl 16(%esp),%edx + addl %esi,%eax + movl 56(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 64(%esp),%ebx + shrl $10,%edi + addl 36(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 20(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl 24(%esp),%edi + xorl %ecx,%edx + movl %ebx,64(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,16(%esp) + xorl %ecx,%edx + addl 28(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 4(%esp),%edi + xorl %eax,%ecx + movl %eax,(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 2554220882(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 72(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 12(%esp),%edx + addl %ecx,%ebp + movl 60(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 68(%esp),%ebx + shrl $10,%edi + addl 40(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl 16(%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 20(%esp),%edi + xorl %esi,%edx + movl %ebx,68(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,12(%esp) + xorl %esi,%edx + addl 24(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl (%esp),%edi + xorl %ebp,%esi + movl %ebp,28(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 2821834349(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 76(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl 8(%esp),%edx + addl %esi,%eax + movl 64(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 72(%esp),%ebx + shrl $10,%edi + addl 44(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 12(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl 16(%esp),%edi + xorl %ecx,%edx + movl %ebx,72(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,8(%esp) + xorl %ecx,%edx + addl 20(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 28(%esp),%edi + xorl %eax,%ecx + movl %eax,24(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 2952996808(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 80(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 4(%esp),%edx + addl %ecx,%ebp + movl 68(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 76(%esp),%ebx + shrl $10,%edi + addl 48(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl 8(%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 12(%esp),%edi + xorl %esi,%edx + movl %ebx,76(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,4(%esp) + xorl %esi,%edx + addl 16(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 24(%esp),%edi + xorl %ebp,%esi + movl %ebp,20(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 3210313671(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 84(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl (%esp),%edx + addl %esi,%eax + movl 72(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 80(%esp),%ebx + shrl $10,%edi + addl 52(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 4(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl 8(%esp),%edi + xorl %ecx,%edx + movl %ebx,80(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,(%esp) + xorl %ecx,%edx + addl 12(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 20(%esp),%edi + xorl %eax,%ecx + movl %eax,16(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 3336571891(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 88(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 28(%esp),%edx + addl %ecx,%ebp + movl 76(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 84(%esp),%ebx + shrl $10,%edi + addl 56(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl (%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 4(%esp),%edi + xorl %esi,%edx + movl %ebx,84(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,28(%esp) + xorl %esi,%edx + addl 8(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 16(%esp),%edi + xorl %ebp,%esi + movl %ebp,12(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 3584528711(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 92(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl 24(%esp),%edx + addl %esi,%eax + movl 80(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 88(%esp),%ebx + shrl $10,%edi + addl 60(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 28(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl (%esp),%edi + xorl %ecx,%edx + movl %ebx,88(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,24(%esp) + xorl %ecx,%edx + addl 4(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 12(%esp),%edi + xorl %eax,%ecx + movl %eax,8(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 113926993(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 32(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 20(%esp),%edx + addl %ecx,%ebp + movl 84(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 92(%esp),%ebx + shrl $10,%edi + addl 64(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl 24(%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 28(%esp),%edi + xorl %esi,%edx + movl %ebx,92(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,20(%esp) + xorl %esi,%edx + addl (%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 8(%esp),%edi + xorl %ebp,%esi + movl %ebp,4(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 338241895(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 36(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl 16(%esp),%edx + addl %esi,%eax + movl 88(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 32(%esp),%ebx + shrl $10,%edi + addl 68(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 20(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl 24(%esp),%edi + xorl %ecx,%edx + movl %ebx,32(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,16(%esp) + xorl %ecx,%edx + addl 28(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 4(%esp),%edi + xorl %eax,%ecx + movl %eax,(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 666307205(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 40(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 12(%esp),%edx + addl %ecx,%ebp + movl 92(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 36(%esp),%ebx + shrl $10,%edi + addl 72(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl 16(%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 20(%esp),%edi + xorl %esi,%edx + movl %ebx,36(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,12(%esp) + xorl %esi,%edx + addl 24(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl (%esp),%edi + xorl %ebp,%esi + movl %ebp,28(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 773529912(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 44(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl 8(%esp),%edx + addl %esi,%eax + movl 32(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 40(%esp),%ebx + shrl $10,%edi + addl 76(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 12(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl 16(%esp),%edi + xorl %ecx,%edx + movl %ebx,40(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,8(%esp) + xorl %ecx,%edx + addl 20(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 28(%esp),%edi + xorl %eax,%ecx + movl %eax,24(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 1294757372(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 48(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 4(%esp),%edx + addl %ecx,%ebp + movl 36(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 44(%esp),%ebx + shrl $10,%edi + addl 80(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl 8(%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 12(%esp),%edi + xorl %esi,%edx + movl %ebx,44(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,4(%esp) + xorl %esi,%edx + addl 16(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 24(%esp),%edi + xorl %ebp,%esi + movl %ebp,20(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 1396182291(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 52(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl (%esp),%edx + addl %esi,%eax + movl 40(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 48(%esp),%ebx + shrl $10,%edi + addl 84(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 4(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl 8(%esp),%edi + xorl %ecx,%edx + movl %ebx,48(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,(%esp) + xorl %ecx,%edx + addl 12(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 20(%esp),%edi + xorl %eax,%ecx + movl %eax,16(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 1695183700(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 56(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 28(%esp),%edx + addl %ecx,%ebp + movl 44(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 52(%esp),%ebx + shrl $10,%edi + addl 88(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl (%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 4(%esp),%edi + xorl %esi,%edx + movl %ebx,52(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,28(%esp) + xorl %esi,%edx + addl 8(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 16(%esp),%edi + xorl %ebp,%esi + movl %ebp,12(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 1986661051(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 60(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl 24(%esp),%edx + addl %esi,%eax + movl 48(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 56(%esp),%ebx + shrl $10,%edi + addl 92(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 28(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl (%esp),%edi + xorl %ecx,%edx + movl %ebx,56(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,24(%esp) + xorl %ecx,%edx + addl 4(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 12(%esp),%edi + xorl %eax,%ecx + movl %eax,8(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 2177026350(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 64(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 20(%esp),%edx + addl %ecx,%ebp + movl 52(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 60(%esp),%ebx + shrl $10,%edi + addl 32(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl 24(%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 28(%esp),%edi + xorl %esi,%edx + movl %ebx,60(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,20(%esp) + xorl %esi,%edx + addl (%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 8(%esp),%edi + xorl %ebp,%esi + movl %ebp,4(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 2456956037(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 68(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl 16(%esp),%edx + addl %esi,%eax + movl 56(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 64(%esp),%ebx + shrl $10,%edi + addl 36(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 20(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl 24(%esp),%edi + xorl %ecx,%edx + movl %ebx,64(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,16(%esp) + xorl %ecx,%edx + addl 28(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 4(%esp),%edi + xorl %eax,%ecx + movl %eax,(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 2730485921(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 72(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 12(%esp),%edx + addl %ecx,%ebp + movl 60(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 68(%esp),%ebx + shrl $10,%edi + addl 40(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl 16(%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 20(%esp),%edi + xorl %esi,%edx + movl %ebx,68(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,12(%esp) + xorl %esi,%edx + addl 24(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl (%esp),%edi + xorl %ebp,%esi + movl %ebp,28(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 2820302411(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 76(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl 8(%esp),%edx + addl %esi,%eax + movl 64(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 72(%esp),%ebx + shrl $10,%edi + addl 44(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 12(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl 16(%esp),%edi + xorl %ecx,%edx + movl %ebx,72(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,8(%esp) + xorl %ecx,%edx + addl 20(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 28(%esp),%edi + xorl %eax,%ecx + movl %eax,24(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 3259730800(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 80(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 4(%esp),%edx + addl %ecx,%ebp + movl 68(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 76(%esp),%ebx + shrl $10,%edi + addl 48(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl 8(%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 12(%esp),%edi + xorl %esi,%edx + movl %ebx,76(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,4(%esp) + xorl %esi,%edx + addl 16(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 24(%esp),%edi + xorl %ebp,%esi + movl %ebp,20(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 3345764771(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 84(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl (%esp),%edx + addl %esi,%eax + movl 72(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 80(%esp),%ebx + shrl $10,%edi + addl 52(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 4(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl 8(%esp),%edi + xorl %ecx,%edx + movl %ebx,80(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,(%esp) + xorl %ecx,%edx + addl 12(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 20(%esp),%edi + xorl %eax,%ecx + movl %eax,16(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 3516065817(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 88(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 28(%esp),%edx + addl %ecx,%ebp + movl 76(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 84(%esp),%ebx + shrl $10,%edi + addl 56(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl (%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 4(%esp),%edi + xorl %esi,%edx + movl %ebx,84(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,28(%esp) + xorl %esi,%edx + addl 8(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 16(%esp),%edi + xorl %ebp,%esi + movl %ebp,12(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 3600352804(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 92(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl 24(%esp),%edx + addl %esi,%eax + movl 80(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 88(%esp),%ebx + shrl $10,%edi + addl 60(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 28(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl (%esp),%edi + xorl %ecx,%edx + movl %ebx,88(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,24(%esp) + xorl %ecx,%edx + addl 4(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 12(%esp),%edi + xorl %eax,%ecx + movl %eax,8(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 4094571909(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 32(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 20(%esp),%edx + addl %ecx,%ebp + movl 84(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 92(%esp),%ebx + shrl $10,%edi + addl 64(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl 24(%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 28(%esp),%edi + xorl %esi,%edx + movl %ebx,92(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,20(%esp) + xorl %esi,%edx + addl (%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 8(%esp),%edi + xorl %ebp,%esi + movl %ebp,4(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 275423344(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 36(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl 16(%esp),%edx + addl %esi,%eax + movl 88(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 32(%esp),%ebx + shrl $10,%edi + addl 68(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 20(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl 24(%esp),%edi + xorl %ecx,%edx + movl %ebx,32(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,16(%esp) + xorl %ecx,%edx + addl 28(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 4(%esp),%edi + xorl %eax,%ecx + movl %eax,(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 430227734(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 40(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 12(%esp),%edx + addl %ecx,%ebp + movl 92(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 36(%esp),%ebx + shrl $10,%edi + addl 72(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl 16(%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 20(%esp),%edi + xorl %esi,%edx + movl %ebx,36(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,12(%esp) + xorl %esi,%edx + addl 24(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl (%esp),%edi + xorl %ebp,%esi + movl %ebp,28(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 506948616(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 44(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl 8(%esp),%edx + addl %esi,%eax + movl 32(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 40(%esp),%ebx + shrl $10,%edi + addl 76(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 12(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl 16(%esp),%edi + xorl %ecx,%edx + movl %ebx,40(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,8(%esp) + xorl %ecx,%edx + addl 20(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 28(%esp),%edi + xorl %eax,%ecx + movl %eax,24(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 659060556(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 48(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 4(%esp),%edx + addl %ecx,%ebp + movl 36(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 44(%esp),%ebx + shrl $10,%edi + addl 80(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl 8(%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 12(%esp),%edi + xorl %esi,%edx + movl %ebx,44(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,4(%esp) + xorl %esi,%edx + addl 16(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 24(%esp),%edi + xorl %ebp,%esi + movl %ebp,20(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 883997877(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 52(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl (%esp),%edx + addl %esi,%eax + movl 40(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 48(%esp),%ebx + shrl $10,%edi + addl 84(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 4(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl 8(%esp),%edi + xorl %ecx,%edx + movl %ebx,48(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,(%esp) + xorl %ecx,%edx + addl 12(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 20(%esp),%edi + xorl %eax,%ecx + movl %eax,16(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 958139571(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 56(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 28(%esp),%edx + addl %ecx,%ebp + movl 44(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 52(%esp),%ebx + shrl $10,%edi + addl 88(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl (%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 4(%esp),%edi + xorl %esi,%edx + movl %ebx,52(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,28(%esp) + xorl %esi,%edx + addl 8(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 16(%esp),%edi + xorl %ebp,%esi + movl %ebp,12(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 1322822218(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 60(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl 24(%esp),%edx + addl %esi,%eax + movl 48(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 56(%esp),%ebx + shrl $10,%edi + addl 92(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 28(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl (%esp),%edi + xorl %ecx,%edx + movl %ebx,56(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,24(%esp) + xorl %ecx,%edx + addl 4(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 12(%esp),%edi + xorl %eax,%ecx + movl %eax,8(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 1537002063(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 64(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 20(%esp),%edx + addl %ecx,%ebp + movl 52(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 60(%esp),%ebx + shrl $10,%edi + addl 32(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl 24(%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 28(%esp),%edi + xorl %esi,%edx + movl %ebx,60(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,20(%esp) + xorl %esi,%edx + addl (%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 8(%esp),%edi + xorl %ebp,%esi + movl %ebp,4(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 1747873779(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 68(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl 16(%esp),%edx + addl %esi,%eax + movl 56(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 64(%esp),%ebx + shrl $10,%edi + addl 36(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 20(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl 24(%esp),%edi + xorl %ecx,%edx + movl %ebx,64(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,16(%esp) + xorl %ecx,%edx + addl 28(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 4(%esp),%edi + xorl %eax,%ecx + movl %eax,(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 1955562222(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 72(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 12(%esp),%edx + addl %ecx,%ebp + movl 60(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 68(%esp),%ebx + shrl $10,%edi + addl 40(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl 16(%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 20(%esp),%edi + xorl %esi,%edx + movl %ebx,68(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,12(%esp) + xorl %esi,%edx + addl 24(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl (%esp),%edi + xorl %ebp,%esi + movl %ebp,28(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 2024104815(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 76(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl 8(%esp),%edx + addl %esi,%eax + movl 64(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 72(%esp),%ebx + shrl $10,%edi + addl 44(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 12(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl 16(%esp),%edi + xorl %ecx,%edx + movl %ebx,72(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,8(%esp) + xorl %ecx,%edx + addl 20(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 28(%esp),%edi + xorl %eax,%ecx + movl %eax,24(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 2227730452(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 80(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 4(%esp),%edx + addl %ecx,%ebp + movl 68(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 76(%esp),%ebx + shrl $10,%edi + addl 48(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl 8(%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 12(%esp),%edi + xorl %esi,%edx + movl %ebx,76(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,4(%esp) + xorl %esi,%edx + addl 16(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 24(%esp),%edi + xorl %ebp,%esi + movl %ebp,20(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 2361852424(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 84(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl (%esp),%edx + addl %esi,%eax + movl 72(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 80(%esp),%ebx + shrl $10,%edi + addl 52(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 4(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl 8(%esp),%edi + xorl %ecx,%edx + movl %ebx,80(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,(%esp) + xorl %ecx,%edx + addl 12(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 20(%esp),%edi + xorl %eax,%ecx + movl %eax,16(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 2428436474(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 88(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 28(%esp),%edx + addl %ecx,%ebp + movl 76(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 84(%esp),%ebx + shrl $10,%edi + addl 56(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl (%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 4(%esp),%edi + xorl %esi,%edx + movl %ebx,84(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,28(%esp) + xorl %esi,%edx + addl 8(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 16(%esp),%edi + xorl %ebp,%esi + movl %ebp,12(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 2756734187(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 92(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl 24(%esp),%edx + addl %esi,%eax + movl 80(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 88(%esp),%ebx + shrl $10,%edi + addl 60(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 28(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl (%esp),%edi + xorl %ecx,%edx + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,24(%esp) + xorl %ecx,%edx + addl 4(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 12(%esp),%edi + xorl %eax,%ecx + movl %eax,8(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 3204031479(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 32(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 20(%esp),%edx + addl %ecx,%ebp + movl 84(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 92(%esp),%ebx + shrl $10,%edi + addl 64(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl 24(%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 28(%esp),%edi + xorl %esi,%edx + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,20(%esp) + xorl %esi,%edx + addl (%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 8(%esp),%edi + xorl %ebp,%esi + movl %ebp,4(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 3329325298(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + rorl $2,%esi + addl %edx,%eax + addl 16(%esp),%edx + addl %esi,%eax + movl 96(%esp),%esi + xorl %edi,%ebp + movl 12(%esp),%ecx + addl (%esi),%eax + addl 4(%esi),%ebp + addl 8(%esi),%edi + addl 12(%esi),%ecx + movl %eax,(%esi) + movl %ebp,4(%esi) + movl %edi,8(%esi) + movl %ecx,12(%esi) + movl %ebp,4(%esp) + xorl %edi,%ebp + movl %edi,8(%esp) + movl %ecx,12(%esp) + movl 20(%esp),%edi + movl 24(%esp),%ebx + movl 28(%esp),%ecx + addl 16(%esi),%edx + addl 20(%esi),%edi + addl 24(%esi),%ebx + addl 28(%esi),%ecx + movl %edx,16(%esi) + movl %edi,20(%esi) + movl %ebx,24(%esi) + movl %ecx,28(%esi) + movl %edi,20(%esp) + movl 100(%esp),%edi + movl %ebx,24(%esp) + movl %ecx,28(%esp) + cmpl 104(%esp),%edi + jb .L009grand_loop + movl 108(%esp),%esp + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.align 32 +.L004shaext: + subl $32,%esp + movdqu (%esi),%xmm1 + leal 128(%ebp),%ebp + movdqu 16(%esi),%xmm2 + movdqa 128(%ebp),%xmm7 + pshufd $27,%xmm1,%xmm0 + pshufd $177,%xmm1,%xmm1 + pshufd $27,%xmm2,%xmm2 +.byte 102,15,58,15,202,8 + punpcklqdq %xmm0,%xmm2 + jmp .L010loop_shaext +.align 16 +.L010loop_shaext: + movdqu (%edi),%xmm3 + movdqu 16(%edi),%xmm4 + movdqu 32(%edi),%xmm5 +.byte 102,15,56,0,223 + movdqu 48(%edi),%xmm6 + movdqa %xmm2,16(%esp) + movdqa -128(%ebp),%xmm0 + paddd %xmm3,%xmm0 +.byte 102,15,56,0,231 +.byte 15,56,203,209 + pshufd $14,%xmm0,%xmm0 + nop + movdqa %xmm1,(%esp) +.byte 15,56,203,202 + movdqa -112(%ebp),%xmm0 + paddd %xmm4,%xmm0 +.byte 102,15,56,0,239 +.byte 15,56,203,209 + pshufd $14,%xmm0,%xmm0 + leal 64(%edi),%edi +.byte 15,56,204,220 +.byte 15,56,203,202 + movdqa -96(%ebp),%xmm0 + paddd %xmm5,%xmm0 +.byte 102,15,56,0,247 +.byte 15,56,203,209 + pshufd $14,%xmm0,%xmm0 + movdqa %xmm6,%xmm7 +.byte 102,15,58,15,253,4 + nop + paddd %xmm7,%xmm3 +.byte 15,56,204,229 +.byte 15,56,203,202 + movdqa -80(%ebp),%xmm0 + paddd %xmm6,%xmm0 +.byte 15,56,205,222 +.byte 15,56,203,209 + pshufd $14,%xmm0,%xmm0 + movdqa %xmm3,%xmm7 +.byte 102,15,58,15,254,4 + nop + paddd %xmm7,%xmm4 +.byte 15,56,204,238 +.byte 15,56,203,202 + movdqa -64(%ebp),%xmm0 + paddd %xmm3,%xmm0 +.byte 15,56,205,227 +.byte 15,56,203,209 + pshufd $14,%xmm0,%xmm0 + movdqa %xmm4,%xmm7 +.byte 102,15,58,15,251,4 + nop + paddd %xmm7,%xmm5 +.byte 15,56,204,243 +.byte 15,56,203,202 + movdqa -48(%ebp),%xmm0 + paddd %xmm4,%xmm0 +.byte 15,56,205,236 +.byte 15,56,203,209 + pshufd $14,%xmm0,%xmm0 + movdqa %xmm5,%xmm7 +.byte 102,15,58,15,252,4 + nop + paddd %xmm7,%xmm6 +.byte 15,56,204,220 +.byte 15,56,203,202 + movdqa -32(%ebp),%xmm0 + paddd %xmm5,%xmm0 +.byte 15,56,205,245 +.byte 15,56,203,209 + pshufd $14,%xmm0,%xmm0 + movdqa %xmm6,%xmm7 +.byte 102,15,58,15,253,4 + nop + paddd %xmm7,%xmm3 +.byte 15,56,204,229 +.byte 15,56,203,202 + movdqa -16(%ebp),%xmm0 + paddd %xmm6,%xmm0 +.byte 15,56,205,222 +.byte 15,56,203,209 + pshufd $14,%xmm0,%xmm0 + movdqa %xmm3,%xmm7 +.byte 102,15,58,15,254,4 + nop + paddd %xmm7,%xmm4 +.byte 15,56,204,238 +.byte 15,56,203,202 + movdqa (%ebp),%xmm0 + paddd %xmm3,%xmm0 +.byte 15,56,205,227 +.byte 15,56,203,209 + pshufd $14,%xmm0,%xmm0 + movdqa %xmm4,%xmm7 +.byte 102,15,58,15,251,4 + nop + paddd %xmm7,%xmm5 +.byte 15,56,204,243 +.byte 15,56,203,202 + movdqa 16(%ebp),%xmm0 + paddd %xmm4,%xmm0 +.byte 15,56,205,236 +.byte 15,56,203,209 + pshufd $14,%xmm0,%xmm0 + movdqa %xmm5,%xmm7 +.byte 102,15,58,15,252,4 + nop + paddd %xmm7,%xmm6 +.byte 15,56,204,220 +.byte 15,56,203,202 + movdqa 32(%ebp),%xmm0 + paddd %xmm5,%xmm0 +.byte 15,56,205,245 +.byte 15,56,203,209 + pshufd $14,%xmm0,%xmm0 + movdqa %xmm6,%xmm7 +.byte 102,15,58,15,253,4 + nop + paddd %xmm7,%xmm3 +.byte 15,56,204,229 +.byte 15,56,203,202 + movdqa 48(%ebp),%xmm0 + paddd %xmm6,%xmm0 +.byte 15,56,205,222 +.byte 15,56,203,209 + pshufd $14,%xmm0,%xmm0 + movdqa %xmm3,%xmm7 +.byte 102,15,58,15,254,4 + nop + paddd %xmm7,%xmm4 +.byte 15,56,204,238 +.byte 15,56,203,202 + movdqa 64(%ebp),%xmm0 + paddd %xmm3,%xmm0 +.byte 15,56,205,227 +.byte 15,56,203,209 + pshufd $14,%xmm0,%xmm0 + movdqa %xmm4,%xmm7 +.byte 102,15,58,15,251,4 + nop + paddd %xmm7,%xmm5 +.byte 15,56,204,243 +.byte 15,56,203,202 + movdqa 80(%ebp),%xmm0 + paddd %xmm4,%xmm0 +.byte 15,56,205,236 +.byte 15,56,203,209 + pshufd $14,%xmm0,%xmm0 + movdqa %xmm5,%xmm7 +.byte 102,15,58,15,252,4 +.byte 15,56,203,202 + paddd %xmm7,%xmm6 + movdqa 96(%ebp),%xmm0 + paddd %xmm5,%xmm0 +.byte 15,56,203,209 + pshufd $14,%xmm0,%xmm0 +.byte 15,56,205,245 + movdqa 128(%ebp),%xmm7 +.byte 15,56,203,202 + movdqa 112(%ebp),%xmm0 + paddd %xmm6,%xmm0 + nop +.byte 15,56,203,209 + pshufd $14,%xmm0,%xmm0 + cmpl %edi,%eax + nop +.byte 15,56,203,202 + paddd 16(%esp),%xmm2 + paddd (%esp),%xmm1 + jnz .L010loop_shaext + pshufd $177,%xmm2,%xmm2 + pshufd $27,%xmm1,%xmm7 + pshufd $177,%xmm1,%xmm1 + punpckhqdq %xmm2,%xmm1 +.byte 102,15,58,15,215,8 + movl 44(%esp),%esp + movdqu %xmm1,(%esi) + movdqu %xmm2,16(%esi) + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.align 32 +.L005SSSE3: + leal -96(%esp),%esp + movl (%esi),%eax + movl 4(%esi),%ebx + movl 8(%esi),%ecx + movl 12(%esi),%edi + movl %ebx,4(%esp) + xorl %ecx,%ebx + movl %ecx,8(%esp) + movl %edi,12(%esp) + movl 16(%esi),%edx + movl 20(%esi),%edi + movl 24(%esi),%ecx + movl 28(%esi),%esi + movl %edi,20(%esp) + movl 100(%esp),%edi + movl %ecx,24(%esp) + movl %esi,28(%esp) + movdqa 256(%ebp),%xmm7 + jmp .L011grand_ssse3 +.align 16 +.L011grand_ssse3: + movdqu (%edi),%xmm0 + movdqu 16(%edi),%xmm1 + movdqu 32(%edi),%xmm2 + movdqu 48(%edi),%xmm3 + addl $64,%edi +.byte 102,15,56,0,199 + movl %edi,100(%esp) +.byte 102,15,56,0,207 + movdqa (%ebp),%xmm4 +.byte 102,15,56,0,215 + movdqa 16(%ebp),%xmm5 + paddd %xmm0,%xmm4 +.byte 102,15,56,0,223 + movdqa 32(%ebp),%xmm6 + paddd %xmm1,%xmm5 + movdqa 48(%ebp),%xmm7 + movdqa %xmm4,32(%esp) + paddd %xmm2,%xmm6 + movdqa %xmm5,48(%esp) + paddd %xmm3,%xmm7 + movdqa %xmm6,64(%esp) + movdqa %xmm7,80(%esp) + jmp .L012ssse3_00_47 +.align 16 +.L012ssse3_00_47: + addl $64,%ebp + movl %edx,%ecx + movdqa %xmm1,%xmm4 + rorl $14,%edx + movl 20(%esp),%esi + movdqa %xmm3,%xmm7 + xorl %ecx,%edx + movl 24(%esp),%edi +.byte 102,15,58,15,224,4 + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi +.byte 102,15,58,15,250,4 + movl %ecx,16(%esp) + xorl %ecx,%edx + xorl %esi,%edi + movdqa %xmm4,%xmm5 + rorl $6,%edx + movl %eax,%ecx + movdqa %xmm4,%xmm6 + addl %edi,%edx + movl 4(%esp),%edi + psrld $3,%xmm4 + movl %eax,%esi + rorl $9,%ecx + paddd %xmm7,%xmm0 + movl %eax,(%esp) + xorl %eax,%ecx + psrld $7,%xmm6 + xorl %edi,%eax + addl 28(%esp),%edx + rorl $11,%ecx + andl %eax,%ebx + pshufd $250,%xmm3,%xmm7 + xorl %esi,%ecx + addl 32(%esp),%edx + pslld $14,%xmm5 + xorl %edi,%ebx + rorl $2,%ecx + pxor %xmm6,%xmm4 + addl %edx,%ebx + addl 12(%esp),%edx + psrld $11,%xmm6 + addl %ecx,%ebx + movl %edx,%ecx + rorl $14,%edx + pxor %xmm5,%xmm4 + movl 16(%esp),%esi + xorl %ecx,%edx + pslld $11,%xmm5 + movl 20(%esp),%edi + xorl %edi,%esi + rorl $5,%edx + pxor %xmm6,%xmm4 + andl %ecx,%esi + movl %ecx,12(%esp) + movdqa %xmm7,%xmm6 + xorl %ecx,%edx + xorl %esi,%edi + rorl $6,%edx + pxor %xmm5,%xmm4 + movl %ebx,%ecx + addl %edi,%edx + psrld $10,%xmm7 + movl (%esp),%edi + movl %ebx,%esi + rorl $9,%ecx + paddd %xmm4,%xmm0 + movl %ebx,28(%esp) + xorl %ebx,%ecx + psrlq $17,%xmm6 + xorl %edi,%ebx + addl 24(%esp),%edx + rorl $11,%ecx + pxor %xmm6,%xmm7 + andl %ebx,%eax + xorl %esi,%ecx + psrlq $2,%xmm6 + addl 36(%esp),%edx + xorl %edi,%eax + rorl $2,%ecx + pxor %xmm6,%xmm7 + addl %edx,%eax + addl 8(%esp),%edx + pshufd $128,%xmm7,%xmm7 + addl %ecx,%eax + movl %edx,%ecx + rorl $14,%edx + movl 12(%esp),%esi + xorl %ecx,%edx + movl 16(%esp),%edi + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + psrldq $8,%xmm7 + movl %ecx,8(%esp) + xorl %ecx,%edx + xorl %esi,%edi + paddd %xmm7,%xmm0 + rorl $6,%edx + movl %eax,%ecx + addl %edi,%edx + movl 28(%esp),%edi + movl %eax,%esi + rorl $9,%ecx + movl %eax,24(%esp) + pshufd $80,%xmm0,%xmm7 + xorl %eax,%ecx + xorl %edi,%eax + addl 20(%esp),%edx + movdqa %xmm7,%xmm6 + rorl $11,%ecx + psrld $10,%xmm7 + andl %eax,%ebx + psrlq $17,%xmm6 + xorl %esi,%ecx + addl 40(%esp),%edx + xorl %edi,%ebx + rorl $2,%ecx + pxor %xmm6,%xmm7 + addl %edx,%ebx + addl 4(%esp),%edx + psrlq $2,%xmm6 + addl %ecx,%ebx + movl %edx,%ecx + rorl $14,%edx + pxor %xmm6,%xmm7 + movl 8(%esp),%esi + xorl %ecx,%edx + movl 12(%esp),%edi + pshufd $8,%xmm7,%xmm7 + xorl %edi,%esi + rorl $5,%edx + movdqa (%ebp),%xmm6 + andl %ecx,%esi + movl %ecx,4(%esp) + pslldq $8,%xmm7 + xorl %ecx,%edx + xorl %esi,%edi + rorl $6,%edx + movl %ebx,%ecx + addl %edi,%edx + movl 24(%esp),%edi + movl %ebx,%esi + rorl $9,%ecx + paddd %xmm7,%xmm0 + movl %ebx,20(%esp) + xorl %ebx,%ecx + xorl %edi,%ebx + addl 16(%esp),%edx + paddd %xmm0,%xmm6 + rorl $11,%ecx + andl %ebx,%eax + xorl %esi,%ecx + addl 44(%esp),%edx + xorl %edi,%eax + rorl $2,%ecx + addl %edx,%eax + addl (%esp),%edx + addl %ecx,%eax + movdqa %xmm6,32(%esp) + movl %edx,%ecx + movdqa %xmm2,%xmm4 + rorl $14,%edx + movl 4(%esp),%esi + movdqa %xmm0,%xmm7 + xorl %ecx,%edx + movl 8(%esp),%edi +.byte 102,15,58,15,225,4 + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi +.byte 102,15,58,15,251,4 + movl %ecx,(%esp) + xorl %ecx,%edx + xorl %esi,%edi + movdqa %xmm4,%xmm5 + rorl $6,%edx + movl %eax,%ecx + movdqa %xmm4,%xmm6 + addl %edi,%edx + movl 20(%esp),%edi + psrld $3,%xmm4 + movl %eax,%esi + rorl $9,%ecx + paddd %xmm7,%xmm1 + movl %eax,16(%esp) + xorl %eax,%ecx + psrld $7,%xmm6 + xorl %edi,%eax + addl 12(%esp),%edx + rorl $11,%ecx + andl %eax,%ebx + pshufd $250,%xmm0,%xmm7 + xorl %esi,%ecx + addl 48(%esp),%edx + pslld $14,%xmm5 + xorl %edi,%ebx + rorl $2,%ecx + pxor %xmm6,%xmm4 + addl %edx,%ebx + addl 28(%esp),%edx + psrld $11,%xmm6 + addl %ecx,%ebx + movl %edx,%ecx + rorl $14,%edx + pxor %xmm5,%xmm4 + movl (%esp),%esi + xorl %ecx,%edx + pslld $11,%xmm5 + movl 4(%esp),%edi + xorl %edi,%esi + rorl $5,%edx + pxor %xmm6,%xmm4 + andl %ecx,%esi + movl %ecx,28(%esp) + movdqa %xmm7,%xmm6 + xorl %ecx,%edx + xorl %esi,%edi + rorl $6,%edx + pxor %xmm5,%xmm4 + movl %ebx,%ecx + addl %edi,%edx + psrld $10,%xmm7 + movl 16(%esp),%edi + movl %ebx,%esi + rorl $9,%ecx + paddd %xmm4,%xmm1 + movl %ebx,12(%esp) + xorl %ebx,%ecx + psrlq $17,%xmm6 + xorl %edi,%ebx + addl 8(%esp),%edx + rorl $11,%ecx + pxor %xmm6,%xmm7 + andl %ebx,%eax + xorl %esi,%ecx + psrlq $2,%xmm6 + addl 52(%esp),%edx + xorl %edi,%eax + rorl $2,%ecx + pxor %xmm6,%xmm7 + addl %edx,%eax + addl 24(%esp),%edx + pshufd $128,%xmm7,%xmm7 + addl %ecx,%eax + movl %edx,%ecx + rorl $14,%edx + movl 28(%esp),%esi + xorl %ecx,%edx + movl (%esp),%edi + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + psrldq $8,%xmm7 + movl %ecx,24(%esp) + xorl %ecx,%edx + xorl %esi,%edi + paddd %xmm7,%xmm1 + rorl $6,%edx + movl %eax,%ecx + addl %edi,%edx + movl 12(%esp),%edi + movl %eax,%esi + rorl $9,%ecx + movl %eax,8(%esp) + pshufd $80,%xmm1,%xmm7 + xorl %eax,%ecx + xorl %edi,%eax + addl 4(%esp),%edx + movdqa %xmm7,%xmm6 + rorl $11,%ecx + psrld $10,%xmm7 + andl %eax,%ebx + psrlq $17,%xmm6 + xorl %esi,%ecx + addl 56(%esp),%edx + xorl %edi,%ebx + rorl $2,%ecx + pxor %xmm6,%xmm7 + addl %edx,%ebx + addl 20(%esp),%edx + psrlq $2,%xmm6 + addl %ecx,%ebx + movl %edx,%ecx + rorl $14,%edx + pxor %xmm6,%xmm7 + movl 24(%esp),%esi + xorl %ecx,%edx + movl 28(%esp),%edi + pshufd $8,%xmm7,%xmm7 + xorl %edi,%esi + rorl $5,%edx + movdqa 16(%ebp),%xmm6 + andl %ecx,%esi + movl %ecx,20(%esp) + pslldq $8,%xmm7 + xorl %ecx,%edx + xorl %esi,%edi + rorl $6,%edx + movl %ebx,%ecx + addl %edi,%edx + movl 8(%esp),%edi + movl %ebx,%esi + rorl $9,%ecx + paddd %xmm7,%xmm1 + movl %ebx,4(%esp) + xorl %ebx,%ecx + xorl %edi,%ebx + addl (%esp),%edx + paddd %xmm1,%xmm6 + rorl $11,%ecx + andl %ebx,%eax + xorl %esi,%ecx + addl 60(%esp),%edx + xorl %edi,%eax + rorl $2,%ecx + addl %edx,%eax + addl 16(%esp),%edx + addl %ecx,%eax + movdqa %xmm6,48(%esp) + movl %edx,%ecx + movdqa %xmm3,%xmm4 + rorl $14,%edx + movl 20(%esp),%esi + movdqa %xmm1,%xmm7 + xorl %ecx,%edx + movl 24(%esp),%edi +.byte 102,15,58,15,226,4 + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi +.byte 102,15,58,15,248,4 + movl %ecx,16(%esp) + xorl %ecx,%edx + xorl %esi,%edi + movdqa %xmm4,%xmm5 + rorl $6,%edx + movl %eax,%ecx + movdqa %xmm4,%xmm6 + addl %edi,%edx + movl 4(%esp),%edi + psrld $3,%xmm4 + movl %eax,%esi + rorl $9,%ecx + paddd %xmm7,%xmm2 + movl %eax,(%esp) + xorl %eax,%ecx + psrld $7,%xmm6 + xorl %edi,%eax + addl 28(%esp),%edx + rorl $11,%ecx + andl %eax,%ebx + pshufd $250,%xmm1,%xmm7 + xorl %esi,%ecx + addl 64(%esp),%edx + pslld $14,%xmm5 + xorl %edi,%ebx + rorl $2,%ecx + pxor %xmm6,%xmm4 + addl %edx,%ebx + addl 12(%esp),%edx + psrld $11,%xmm6 + addl %ecx,%ebx + movl %edx,%ecx + rorl $14,%edx + pxor %xmm5,%xmm4 + movl 16(%esp),%esi + xorl %ecx,%edx + pslld $11,%xmm5 + movl 20(%esp),%edi + xorl %edi,%esi + rorl $5,%edx + pxor %xmm6,%xmm4 + andl %ecx,%esi + movl %ecx,12(%esp) + movdqa %xmm7,%xmm6 + xorl %ecx,%edx + xorl %esi,%edi + rorl $6,%edx + pxor %xmm5,%xmm4 + movl %ebx,%ecx + addl %edi,%edx + psrld $10,%xmm7 + movl (%esp),%edi + movl %ebx,%esi + rorl $9,%ecx + paddd %xmm4,%xmm2 + movl %ebx,28(%esp) + xorl %ebx,%ecx + psrlq $17,%xmm6 + xorl %edi,%ebx + addl 24(%esp),%edx + rorl $11,%ecx + pxor %xmm6,%xmm7 + andl %ebx,%eax + xorl %esi,%ecx + psrlq $2,%xmm6 + addl 68(%esp),%edx + xorl %edi,%eax + rorl $2,%ecx + pxor %xmm6,%xmm7 + addl %edx,%eax + addl 8(%esp),%edx + pshufd $128,%xmm7,%xmm7 + addl %ecx,%eax + movl %edx,%ecx + rorl $14,%edx + movl 12(%esp),%esi + xorl %ecx,%edx + movl 16(%esp),%edi + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + psrldq $8,%xmm7 + movl %ecx,8(%esp) + xorl %ecx,%edx + xorl %esi,%edi + paddd %xmm7,%xmm2 + rorl $6,%edx + movl %eax,%ecx + addl %edi,%edx + movl 28(%esp),%edi + movl %eax,%esi + rorl $9,%ecx + movl %eax,24(%esp) + pshufd $80,%xmm2,%xmm7 + xorl %eax,%ecx + xorl %edi,%eax + addl 20(%esp),%edx + movdqa %xmm7,%xmm6 + rorl $11,%ecx + psrld $10,%xmm7 + andl %eax,%ebx + psrlq $17,%xmm6 + xorl %esi,%ecx + addl 72(%esp),%edx + xorl %edi,%ebx + rorl $2,%ecx + pxor %xmm6,%xmm7 + addl %edx,%ebx + addl 4(%esp),%edx + psrlq $2,%xmm6 + addl %ecx,%ebx + movl %edx,%ecx + rorl $14,%edx + pxor %xmm6,%xmm7 + movl 8(%esp),%esi + xorl %ecx,%edx + movl 12(%esp),%edi + pshufd $8,%xmm7,%xmm7 + xorl %edi,%esi + rorl $5,%edx + movdqa 32(%ebp),%xmm6 + andl %ecx,%esi + movl %ecx,4(%esp) + pslldq $8,%xmm7 + xorl %ecx,%edx + xorl %esi,%edi + rorl $6,%edx + movl %ebx,%ecx + addl %edi,%edx + movl 24(%esp),%edi + movl %ebx,%esi + rorl $9,%ecx + paddd %xmm7,%xmm2 + movl %ebx,20(%esp) + xorl %ebx,%ecx + xorl %edi,%ebx + addl 16(%esp),%edx + paddd %xmm2,%xmm6 + rorl $11,%ecx + andl %ebx,%eax + xorl %esi,%ecx + addl 76(%esp),%edx + xorl %edi,%eax + rorl $2,%ecx + addl %edx,%eax + addl (%esp),%edx + addl %ecx,%eax + movdqa %xmm6,64(%esp) + movl %edx,%ecx + movdqa %xmm0,%xmm4 + rorl $14,%edx + movl 4(%esp),%esi + movdqa %xmm2,%xmm7 + xorl %ecx,%edx + movl 8(%esp),%edi +.byte 102,15,58,15,227,4 + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi +.byte 102,15,58,15,249,4 + movl %ecx,(%esp) + xorl %ecx,%edx + xorl %esi,%edi + movdqa %xmm4,%xmm5 + rorl $6,%edx + movl %eax,%ecx + movdqa %xmm4,%xmm6 + addl %edi,%edx + movl 20(%esp),%edi + psrld $3,%xmm4 + movl %eax,%esi + rorl $9,%ecx + paddd %xmm7,%xmm3 + movl %eax,16(%esp) + xorl %eax,%ecx + psrld $7,%xmm6 + xorl %edi,%eax + addl 12(%esp),%edx + rorl $11,%ecx + andl %eax,%ebx + pshufd $250,%xmm2,%xmm7 + xorl %esi,%ecx + addl 80(%esp),%edx + pslld $14,%xmm5 + xorl %edi,%ebx + rorl $2,%ecx + pxor %xmm6,%xmm4 + addl %edx,%ebx + addl 28(%esp),%edx + psrld $11,%xmm6 + addl %ecx,%ebx + movl %edx,%ecx + rorl $14,%edx + pxor %xmm5,%xmm4 + movl (%esp),%esi + xorl %ecx,%edx + pslld $11,%xmm5 + movl 4(%esp),%edi + xorl %edi,%esi + rorl $5,%edx + pxor %xmm6,%xmm4 + andl %ecx,%esi + movl %ecx,28(%esp) + movdqa %xmm7,%xmm6 + xorl %ecx,%edx + xorl %esi,%edi + rorl $6,%edx + pxor %xmm5,%xmm4 + movl %ebx,%ecx + addl %edi,%edx + psrld $10,%xmm7 + movl 16(%esp),%edi + movl %ebx,%esi + rorl $9,%ecx + paddd %xmm4,%xmm3 + movl %ebx,12(%esp) + xorl %ebx,%ecx + psrlq $17,%xmm6 + xorl %edi,%ebx + addl 8(%esp),%edx + rorl $11,%ecx + pxor %xmm6,%xmm7 + andl %ebx,%eax + xorl %esi,%ecx + psrlq $2,%xmm6 + addl 84(%esp),%edx + xorl %edi,%eax + rorl $2,%ecx + pxor %xmm6,%xmm7 + addl %edx,%eax + addl 24(%esp),%edx + pshufd $128,%xmm7,%xmm7 + addl %ecx,%eax + movl %edx,%ecx + rorl $14,%edx + movl 28(%esp),%esi + xorl %ecx,%edx + movl (%esp),%edi + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + psrldq $8,%xmm7 + movl %ecx,24(%esp) + xorl %ecx,%edx + xorl %esi,%edi + paddd %xmm7,%xmm3 + rorl $6,%edx + movl %eax,%ecx + addl %edi,%edx + movl 12(%esp),%edi + movl %eax,%esi + rorl $9,%ecx + movl %eax,8(%esp) + pshufd $80,%xmm3,%xmm7 + xorl %eax,%ecx + xorl %edi,%eax + addl 4(%esp),%edx + movdqa %xmm7,%xmm6 + rorl $11,%ecx + psrld $10,%xmm7 + andl %eax,%ebx + psrlq $17,%xmm6 + xorl %esi,%ecx + addl 88(%esp),%edx + xorl %edi,%ebx + rorl $2,%ecx + pxor %xmm6,%xmm7 + addl %edx,%ebx + addl 20(%esp),%edx + psrlq $2,%xmm6 + addl %ecx,%ebx + movl %edx,%ecx + rorl $14,%edx + pxor %xmm6,%xmm7 + movl 24(%esp),%esi + xorl %ecx,%edx + movl 28(%esp),%edi + pshufd $8,%xmm7,%xmm7 + xorl %edi,%esi + rorl $5,%edx + movdqa 48(%ebp),%xmm6 + andl %ecx,%esi + movl %ecx,20(%esp) + pslldq $8,%xmm7 + xorl %ecx,%edx + xorl %esi,%edi + rorl $6,%edx + movl %ebx,%ecx + addl %edi,%edx + movl 8(%esp),%edi + movl %ebx,%esi + rorl $9,%ecx + paddd %xmm7,%xmm3 + movl %ebx,4(%esp) + xorl %ebx,%ecx + xorl %edi,%ebx + addl (%esp),%edx + paddd %xmm3,%xmm6 + rorl $11,%ecx + andl %ebx,%eax + xorl %esi,%ecx + addl 92(%esp),%edx + xorl %edi,%eax + rorl $2,%ecx + addl %edx,%eax + addl 16(%esp),%edx + addl %ecx,%eax + movdqa %xmm6,80(%esp) + cmpl $66051,64(%ebp) + jne .L012ssse3_00_47 + movl %edx,%ecx + rorl $14,%edx + movl 20(%esp),%esi + xorl %ecx,%edx + movl 24(%esp),%edi + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,16(%esp) + xorl %ecx,%edx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%edx + movl 4(%esp),%edi + movl %eax,%esi + rorl $9,%ecx + movl %eax,(%esp) + xorl %eax,%ecx + xorl %edi,%eax + addl 28(%esp),%edx + rorl $11,%ecx + andl %eax,%ebx + xorl %esi,%ecx + addl 32(%esp),%edx + xorl %edi,%ebx + rorl $2,%ecx + addl %edx,%ebx + addl 12(%esp),%edx + addl %ecx,%ebx + movl %edx,%ecx + rorl $14,%edx + movl 16(%esp),%esi + xorl %ecx,%edx + movl 20(%esp),%edi + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,12(%esp) + xorl %ecx,%edx + xorl %esi,%edi + rorl $6,%edx + movl %ebx,%ecx + addl %edi,%edx + movl (%esp),%edi + movl %ebx,%esi + rorl $9,%ecx + movl %ebx,28(%esp) + xorl %ebx,%ecx + xorl %edi,%ebx + addl 24(%esp),%edx + rorl $11,%ecx + andl %ebx,%eax + xorl %esi,%ecx + addl 36(%esp),%edx + xorl %edi,%eax + rorl $2,%ecx + addl %edx,%eax + addl 8(%esp),%edx + addl %ecx,%eax + movl %edx,%ecx + rorl $14,%edx + movl 12(%esp),%esi + xorl %ecx,%edx + movl 16(%esp),%edi + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,8(%esp) + xorl %ecx,%edx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%edx + movl 28(%esp),%edi + movl %eax,%esi + rorl $9,%ecx + movl %eax,24(%esp) + xorl %eax,%ecx + xorl %edi,%eax + addl 20(%esp),%edx + rorl $11,%ecx + andl %eax,%ebx + xorl %esi,%ecx + addl 40(%esp),%edx + xorl %edi,%ebx + rorl $2,%ecx + addl %edx,%ebx + addl 4(%esp),%edx + addl %ecx,%ebx + movl %edx,%ecx + rorl $14,%edx + movl 8(%esp),%esi + xorl %ecx,%edx + movl 12(%esp),%edi + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,4(%esp) + xorl %ecx,%edx + xorl %esi,%edi + rorl $6,%edx + movl %ebx,%ecx + addl %edi,%edx + movl 24(%esp),%edi + movl %ebx,%esi + rorl $9,%ecx + movl %ebx,20(%esp) + xorl %ebx,%ecx + xorl %edi,%ebx + addl 16(%esp),%edx + rorl $11,%ecx + andl %ebx,%eax + xorl %esi,%ecx + addl 44(%esp),%edx + xorl %edi,%eax + rorl $2,%ecx + addl %edx,%eax + addl (%esp),%edx + addl %ecx,%eax + movl %edx,%ecx + rorl $14,%edx + movl 4(%esp),%esi + xorl %ecx,%edx + movl 8(%esp),%edi + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,(%esp) + xorl %ecx,%edx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%edx + movl 20(%esp),%edi + movl %eax,%esi + rorl $9,%ecx + movl %eax,16(%esp) + xorl %eax,%ecx + xorl %edi,%eax + addl 12(%esp),%edx + rorl $11,%ecx + andl %eax,%ebx + xorl %esi,%ecx + addl 48(%esp),%edx + xorl %edi,%ebx + rorl $2,%ecx + addl %edx,%ebx + addl 28(%esp),%edx + addl %ecx,%ebx + movl %edx,%ecx + rorl $14,%edx + movl (%esp),%esi + xorl %ecx,%edx + movl 4(%esp),%edi + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,28(%esp) + xorl %ecx,%edx + xorl %esi,%edi + rorl $6,%edx + movl %ebx,%ecx + addl %edi,%edx + movl 16(%esp),%edi + movl %ebx,%esi + rorl $9,%ecx + movl %ebx,12(%esp) + xorl %ebx,%ecx + xorl %edi,%ebx + addl 8(%esp),%edx + rorl $11,%ecx + andl %ebx,%eax + xorl %esi,%ecx + addl 52(%esp),%edx + xorl %edi,%eax + rorl $2,%ecx + addl %edx,%eax + addl 24(%esp),%edx + addl %ecx,%eax + movl %edx,%ecx + rorl $14,%edx + movl 28(%esp),%esi + xorl %ecx,%edx + movl (%esp),%edi + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,24(%esp) + xorl %ecx,%edx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%edx + movl 12(%esp),%edi + movl %eax,%esi + rorl $9,%ecx + movl %eax,8(%esp) + xorl %eax,%ecx + xorl %edi,%eax + addl 4(%esp),%edx + rorl $11,%ecx + andl %eax,%ebx + xorl %esi,%ecx + addl 56(%esp),%edx + xorl %edi,%ebx + rorl $2,%ecx + addl %edx,%ebx + addl 20(%esp),%edx + addl %ecx,%ebx + movl %edx,%ecx + rorl $14,%edx + movl 24(%esp),%esi + xorl %ecx,%edx + movl 28(%esp),%edi + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,20(%esp) + xorl %ecx,%edx + xorl %esi,%edi + rorl $6,%edx + movl %ebx,%ecx + addl %edi,%edx + movl 8(%esp),%edi + movl %ebx,%esi + rorl $9,%ecx + movl %ebx,4(%esp) + xorl %ebx,%ecx + xorl %edi,%ebx + addl (%esp),%edx + rorl $11,%ecx + andl %ebx,%eax + xorl %esi,%ecx + addl 60(%esp),%edx + xorl %edi,%eax + rorl $2,%ecx + addl %edx,%eax + addl 16(%esp),%edx + addl %ecx,%eax + movl %edx,%ecx + rorl $14,%edx + movl 20(%esp),%esi + xorl %ecx,%edx + movl 24(%esp),%edi + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,16(%esp) + xorl %ecx,%edx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%edx + movl 4(%esp),%edi + movl %eax,%esi + rorl $9,%ecx + movl %eax,(%esp) + xorl %eax,%ecx + xorl %edi,%eax + addl 28(%esp),%edx + rorl $11,%ecx + andl %eax,%ebx + xorl %esi,%ecx + addl 64(%esp),%edx + xorl %edi,%ebx + rorl $2,%ecx + addl %edx,%ebx + addl 12(%esp),%edx + addl %ecx,%ebx + movl %edx,%ecx + rorl $14,%edx + movl 16(%esp),%esi + xorl %ecx,%edx + movl 20(%esp),%edi + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,12(%esp) + xorl %ecx,%edx + xorl %esi,%edi + rorl $6,%edx + movl %ebx,%ecx + addl %edi,%edx + movl (%esp),%edi + movl %ebx,%esi + rorl $9,%ecx + movl %ebx,28(%esp) + xorl %ebx,%ecx + xorl %edi,%ebx + addl 24(%esp),%edx + rorl $11,%ecx + andl %ebx,%eax + xorl %esi,%ecx + addl 68(%esp),%edx + xorl %edi,%eax + rorl $2,%ecx + addl %edx,%eax + addl 8(%esp),%edx + addl %ecx,%eax + movl %edx,%ecx + rorl $14,%edx + movl 12(%esp),%esi + xorl %ecx,%edx + movl 16(%esp),%edi + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,8(%esp) + xorl %ecx,%edx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%edx + movl 28(%esp),%edi + movl %eax,%esi + rorl $9,%ecx + movl %eax,24(%esp) + xorl %eax,%ecx + xorl %edi,%eax + addl 20(%esp),%edx + rorl $11,%ecx + andl %eax,%ebx + xorl %esi,%ecx + addl 72(%esp),%edx + xorl %edi,%ebx + rorl $2,%ecx + addl %edx,%ebx + addl 4(%esp),%edx + addl %ecx,%ebx + movl %edx,%ecx + rorl $14,%edx + movl 8(%esp),%esi + xorl %ecx,%edx + movl 12(%esp),%edi + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,4(%esp) + xorl %ecx,%edx + xorl %esi,%edi + rorl $6,%edx + movl %ebx,%ecx + addl %edi,%edx + movl 24(%esp),%edi + movl %ebx,%esi + rorl $9,%ecx + movl %ebx,20(%esp) + xorl %ebx,%ecx + xorl %edi,%ebx + addl 16(%esp),%edx + rorl $11,%ecx + andl %ebx,%eax + xorl %esi,%ecx + addl 76(%esp),%edx + xorl %edi,%eax + rorl $2,%ecx + addl %edx,%eax + addl (%esp),%edx + addl %ecx,%eax + movl %edx,%ecx + rorl $14,%edx + movl 4(%esp),%esi + xorl %ecx,%edx + movl 8(%esp),%edi + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,(%esp) + xorl %ecx,%edx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%edx + movl 20(%esp),%edi + movl %eax,%esi + rorl $9,%ecx + movl %eax,16(%esp) + xorl %eax,%ecx + xorl %edi,%eax + addl 12(%esp),%edx + rorl $11,%ecx + andl %eax,%ebx + xorl %esi,%ecx + addl 80(%esp),%edx + xorl %edi,%ebx + rorl $2,%ecx + addl %edx,%ebx + addl 28(%esp),%edx + addl %ecx,%ebx + movl %edx,%ecx + rorl $14,%edx + movl (%esp),%esi + xorl %ecx,%edx + movl 4(%esp),%edi + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,28(%esp) + xorl %ecx,%edx + xorl %esi,%edi + rorl $6,%edx + movl %ebx,%ecx + addl %edi,%edx + movl 16(%esp),%edi + movl %ebx,%esi + rorl $9,%ecx + movl %ebx,12(%esp) + xorl %ebx,%ecx + xorl %edi,%ebx + addl 8(%esp),%edx + rorl $11,%ecx + andl %ebx,%eax + xorl %esi,%ecx + addl 84(%esp),%edx + xorl %edi,%eax + rorl $2,%ecx + addl %edx,%eax + addl 24(%esp),%edx + addl %ecx,%eax + movl %edx,%ecx + rorl $14,%edx + movl 28(%esp),%esi + xorl %ecx,%edx + movl (%esp),%edi + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,24(%esp) + xorl %ecx,%edx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%edx + movl 12(%esp),%edi + movl %eax,%esi + rorl $9,%ecx + movl %eax,8(%esp) + xorl %eax,%ecx + xorl %edi,%eax + addl 4(%esp),%edx + rorl $11,%ecx + andl %eax,%ebx + xorl %esi,%ecx + addl 88(%esp),%edx + xorl %edi,%ebx + rorl $2,%ecx + addl %edx,%ebx + addl 20(%esp),%edx + addl %ecx,%ebx + movl %edx,%ecx + rorl $14,%edx + movl 24(%esp),%esi + xorl %ecx,%edx + movl 28(%esp),%edi + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,20(%esp) + xorl %ecx,%edx + xorl %esi,%edi + rorl $6,%edx + movl %ebx,%ecx + addl %edi,%edx + movl 8(%esp),%edi + movl %ebx,%esi + rorl $9,%ecx + movl %ebx,4(%esp) + xorl %ebx,%ecx + xorl %edi,%ebx + addl (%esp),%edx + rorl $11,%ecx + andl %ebx,%eax + xorl %esi,%ecx + addl 92(%esp),%edx + xorl %edi,%eax + rorl $2,%ecx + addl %edx,%eax + addl 16(%esp),%edx + addl %ecx,%eax + movl 96(%esp),%esi + xorl %edi,%ebx + movl 12(%esp),%ecx + addl (%esi),%eax + addl 4(%esi),%ebx + addl 8(%esi),%edi + addl 12(%esi),%ecx + movl %eax,(%esi) + movl %ebx,4(%esi) + movl %edi,8(%esi) + movl %ecx,12(%esi) + movl %ebx,4(%esp) + xorl %edi,%ebx + movl %edi,8(%esp) + movl %ecx,12(%esp) + movl 20(%esp),%edi + movl 24(%esp),%ecx + addl 16(%esi),%edx + addl 20(%esi),%edi + addl 24(%esi),%ecx + movl %edx,16(%esi) + movl %edi,20(%esi) + movl %edi,20(%esp) + movl 28(%esp),%edi + movl %ecx,24(%esi) + addl 28(%esi),%edi + movl %ecx,24(%esp) + movl %edi,28(%esi) + movl %edi,28(%esp) + movl 100(%esp),%edi + movdqa 64(%ebp),%xmm7 + subl $192,%ebp + cmpl 104(%esp),%edi + jb .L011grand_ssse3 + movl 108(%esp),%esp + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.size sha256_block_data_order,.-.L_sha256_block_data_order_begin +.comm OPENSSL_ia32cap_P,16,4 diff --git a/deps/openssl/asm_obsolete/x86-elf-gas/sha/sha512-586.s b/deps/openssl/asm_obsolete/x86-elf-gas/sha/sha512-586.s new file mode 100644 index 00000000000000..83329f439e8b79 --- /dev/null +++ b/deps/openssl/asm_obsolete/x86-elf-gas/sha/sha512-586.s @@ -0,0 +1,2829 @@ +.file "sha512-586.s" +.text +.globl sha512_block_data_order +.type sha512_block_data_order,@function +.align 16 +sha512_block_data_order: +.L_sha512_block_data_order_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 20(%esp),%esi + movl 24(%esp),%edi + movl 28(%esp),%eax + movl %esp,%ebx + call .L000pic_point +.L000pic_point: + popl %ebp + leal .L001K512-.L000pic_point(%ebp),%ebp + subl $16,%esp + andl $-64,%esp + shll $7,%eax + addl %edi,%eax + movl %esi,(%esp) + movl %edi,4(%esp) + movl %eax,8(%esp) + movl %ebx,12(%esp) + leal OPENSSL_ia32cap_P,%edx + movl (%edx),%ecx + testl $67108864,%ecx + jz .L002loop_x86 + movl 4(%edx),%edx + movq (%esi),%mm0 + andl $16777216,%ecx + movq 8(%esi),%mm1 + andl $512,%edx + movq 16(%esi),%mm2 + orl %edx,%ecx + movq 24(%esi),%mm3 + movq 32(%esi),%mm4 + movq 40(%esi),%mm5 + movq 48(%esi),%mm6 + movq 56(%esi),%mm7 + cmpl $16777728,%ecx + je .L003SSSE3 + subl $80,%esp + jmp .L004loop_sse2 +.align 16 +.L004loop_sse2: + movq %mm1,8(%esp) + movq %mm2,16(%esp) + movq %mm3,24(%esp) + movq %mm5,40(%esp) + movq %mm6,48(%esp) + pxor %mm1,%mm2 + movq %mm7,56(%esp) + movq %mm0,%mm3 + movl (%edi),%eax + movl 4(%edi),%ebx + addl $8,%edi + movl $15,%edx + bswap %eax + bswap %ebx + jmp .L00500_14_sse2 +.align 16 +.L00500_14_sse2: + movd %eax,%mm1 + movl (%edi),%eax + movd %ebx,%mm7 + movl 4(%edi),%ebx + addl $8,%edi + bswap %eax + bswap %ebx + punpckldq %mm1,%mm7 + movq %mm4,%mm1 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,32(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + movq %mm3,%mm0 + movq %mm7,72(%esp) + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm0,(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 56(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + paddq (%ebp),%mm7 + pxor %mm4,%mm3 + movq 24(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm0,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm0,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 8(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + subl $8,%esp + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm0,%mm2 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + pxor %mm7,%mm6 + movq 40(%esp),%mm5 + paddq %mm2,%mm3 + movq %mm0,%mm2 + addl $8,%ebp + paddq %mm6,%mm3 + movq 48(%esp),%mm6 + decl %edx + jnz .L00500_14_sse2 + movd %eax,%mm1 + movd %ebx,%mm7 + punpckldq %mm1,%mm7 + movq %mm4,%mm1 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,32(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + movq %mm3,%mm0 + movq %mm7,72(%esp) + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm0,(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 56(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + paddq (%ebp),%mm7 + pxor %mm4,%mm3 + movq 24(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm0,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm0,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 8(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + subl $8,%esp + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm0,%mm2 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + pxor %mm7,%mm6 + movq 192(%esp),%mm7 + paddq %mm2,%mm3 + movq %mm0,%mm2 + addl $8,%ebp + paddq %mm6,%mm3 + pxor %mm0,%mm0 + movl $32,%edx + jmp .L00616_79_sse2 +.align 16 +.L00616_79_sse2: + movq 88(%esp),%mm5 + movq %mm7,%mm1 + psrlq $1,%mm7 + movq %mm5,%mm6 + psrlq $6,%mm5 + psllq $56,%mm1 + paddq %mm3,%mm0 + movq %mm7,%mm3 + psrlq $6,%mm7 + pxor %mm1,%mm3 + psllq $7,%mm1 + pxor %mm7,%mm3 + psrlq $1,%mm7 + pxor %mm1,%mm3 + movq %mm5,%mm1 + psrlq $13,%mm5 + pxor %mm3,%mm7 + psllq $3,%mm6 + pxor %mm5,%mm1 + paddq 200(%esp),%mm7 + pxor %mm6,%mm1 + psrlq $42,%mm5 + paddq 128(%esp),%mm7 + pxor %mm5,%mm1 + psllq $42,%mm6 + movq 40(%esp),%mm5 + pxor %mm6,%mm1 + movq 48(%esp),%mm6 + paddq %mm1,%mm7 + movq %mm4,%mm1 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,32(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + movq %mm7,72(%esp) + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm0,(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 56(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + paddq (%ebp),%mm7 + pxor %mm4,%mm3 + movq 24(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm0,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm0,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 8(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + subl $8,%esp + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm0,%mm2 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + pxor %mm7,%mm6 + movq 192(%esp),%mm7 + paddq %mm6,%mm2 + addl $8,%ebp + movq 88(%esp),%mm5 + movq %mm7,%mm1 + psrlq $1,%mm7 + movq %mm5,%mm6 + psrlq $6,%mm5 + psllq $56,%mm1 + paddq %mm3,%mm2 + movq %mm7,%mm3 + psrlq $6,%mm7 + pxor %mm1,%mm3 + psllq $7,%mm1 + pxor %mm7,%mm3 + psrlq $1,%mm7 + pxor %mm1,%mm3 + movq %mm5,%mm1 + psrlq $13,%mm5 + pxor %mm3,%mm7 + psllq $3,%mm6 + pxor %mm5,%mm1 + paddq 200(%esp),%mm7 + pxor %mm6,%mm1 + psrlq $42,%mm5 + paddq 128(%esp),%mm7 + pxor %mm5,%mm1 + psllq $42,%mm6 + movq 40(%esp),%mm5 + pxor %mm6,%mm1 + movq 48(%esp),%mm6 + paddq %mm1,%mm7 + movq %mm4,%mm1 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,32(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + movq %mm7,72(%esp) + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm2,(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 56(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + paddq (%ebp),%mm7 + pxor %mm4,%mm3 + movq 24(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm2,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm2,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 8(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + subl $8,%esp + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm2,%mm0 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + pxor %mm7,%mm6 + movq 192(%esp),%mm7 + paddq %mm6,%mm0 + addl $8,%ebp + decl %edx + jnz .L00616_79_sse2 + paddq %mm3,%mm0 + movq 8(%esp),%mm1 + movq 24(%esp),%mm3 + movq 40(%esp),%mm5 + movq 48(%esp),%mm6 + movq 56(%esp),%mm7 + pxor %mm1,%mm2 + paddq (%esi),%mm0 + paddq 8(%esi),%mm1 + paddq 16(%esi),%mm2 + paddq 24(%esi),%mm3 + paddq 32(%esi),%mm4 + paddq 40(%esi),%mm5 + paddq 48(%esi),%mm6 + paddq 56(%esi),%mm7 + movl $640,%eax + movq %mm0,(%esi) + movq %mm1,8(%esi) + movq %mm2,16(%esi) + movq %mm3,24(%esi) + movq %mm4,32(%esi) + movq %mm5,40(%esi) + movq %mm6,48(%esi) + movq %mm7,56(%esi) + leal (%esp,%eax,1),%esp + subl %eax,%ebp + cmpl 88(%esp),%edi + jb .L004loop_sse2 + movl 92(%esp),%esp + emms + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.align 32 +.L003SSSE3: + leal -64(%esp),%edx + subl $256,%esp + movdqa 640(%ebp),%xmm1 + movdqu (%edi),%xmm0 +.byte 102,15,56,0,193 + movdqa (%ebp),%xmm3 + movdqa %xmm1,%xmm2 + movdqu 16(%edi),%xmm1 + paddq %xmm0,%xmm3 +.byte 102,15,56,0,202 + movdqa %xmm3,-128(%edx) + movdqa 16(%ebp),%xmm4 + movdqa %xmm2,%xmm3 + movdqu 32(%edi),%xmm2 + paddq %xmm1,%xmm4 +.byte 102,15,56,0,211 + movdqa %xmm4,-112(%edx) + movdqa 32(%ebp),%xmm5 + movdqa %xmm3,%xmm4 + movdqu 48(%edi),%xmm3 + paddq %xmm2,%xmm5 +.byte 102,15,56,0,220 + movdqa %xmm5,-96(%edx) + movdqa 48(%ebp),%xmm6 + movdqa %xmm4,%xmm5 + movdqu 64(%edi),%xmm4 + paddq %xmm3,%xmm6 +.byte 102,15,56,0,229 + movdqa %xmm6,-80(%edx) + movdqa 64(%ebp),%xmm7 + movdqa %xmm5,%xmm6 + movdqu 80(%edi),%xmm5 + paddq %xmm4,%xmm7 +.byte 102,15,56,0,238 + movdqa %xmm7,-64(%edx) + movdqa %xmm0,(%edx) + movdqa 80(%ebp),%xmm0 + movdqa %xmm6,%xmm7 + movdqu 96(%edi),%xmm6 + paddq %xmm5,%xmm0 +.byte 102,15,56,0,247 + movdqa %xmm0,-48(%edx) + movdqa %xmm1,16(%edx) + movdqa 96(%ebp),%xmm1 + movdqa %xmm7,%xmm0 + movdqu 112(%edi),%xmm7 + paddq %xmm6,%xmm1 +.byte 102,15,56,0,248 + movdqa %xmm1,-32(%edx) + movdqa %xmm2,32(%edx) + movdqa 112(%ebp),%xmm2 + movdqa (%edx),%xmm0 + paddq %xmm7,%xmm2 + movdqa %xmm2,-16(%edx) + nop +.align 32 +.L007loop_ssse3: + movdqa 16(%edx),%xmm2 + movdqa %xmm3,48(%edx) + leal 128(%ebp),%ebp + movq %mm1,8(%esp) + movl %edi,%ebx + movq %mm2,16(%esp) + leal 128(%edi),%edi + movq %mm3,24(%esp) + cmpl %eax,%edi + movq %mm5,40(%esp) + cmovbl %edi,%ebx + movq %mm6,48(%esp) + movl $4,%ecx + pxor %mm1,%mm2 + movq %mm7,56(%esp) + pxor %mm3,%mm3 + jmp .L00800_47_ssse3 +.align 32 +.L00800_47_ssse3: + movdqa %xmm5,%xmm3 + movdqa %xmm2,%xmm1 +.byte 102,15,58,15,208,8 + movdqa %xmm4,(%edx) +.byte 102,15,58,15,220,8 + movdqa %xmm2,%xmm4 + psrlq $7,%xmm2 + paddq %xmm3,%xmm0 + movdqa %xmm4,%xmm3 + psrlq $1,%xmm4 + psllq $56,%xmm3 + pxor %xmm4,%xmm2 + psrlq $7,%xmm4 + pxor %xmm3,%xmm2 + psllq $7,%xmm3 + pxor %xmm4,%xmm2 + movdqa %xmm7,%xmm4 + pxor %xmm3,%xmm2 + movdqa %xmm7,%xmm3 + psrlq $6,%xmm4 + paddq %xmm2,%xmm0 + movdqa %xmm7,%xmm2 + psrlq $19,%xmm3 + psllq $3,%xmm2 + pxor %xmm3,%xmm4 + psrlq $42,%xmm3 + pxor %xmm2,%xmm4 + psllq $42,%xmm2 + pxor %xmm3,%xmm4 + movdqa 32(%edx),%xmm3 + pxor %xmm2,%xmm4 + movdqa (%ebp),%xmm2 + movq %mm4,%mm1 + paddq %xmm4,%xmm0 + movq -128(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,32(%esp) + paddq %xmm0,%xmm2 + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm0 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm0,(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 56(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 24(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm0,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm0,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 8(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm0,%mm2 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + pxor %mm7,%mm6 + movq 32(%esp),%mm5 + paddq %mm6,%mm2 + movq 40(%esp),%mm6 + movq %mm4,%mm1 + movq -120(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,24(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm2 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm2,56(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 48(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 16(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm2,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm2,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq (%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm2,%mm0 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + pxor %mm7,%mm6 + movq 24(%esp),%mm5 + paddq %mm6,%mm0 + movq 32(%esp),%mm6 + movdqa %xmm2,-128(%edx) + movdqa %xmm6,%xmm4 + movdqa %xmm3,%xmm2 +.byte 102,15,58,15,217,8 + movdqa %xmm5,16(%edx) +.byte 102,15,58,15,229,8 + movdqa %xmm3,%xmm5 + psrlq $7,%xmm3 + paddq %xmm4,%xmm1 + movdqa %xmm5,%xmm4 + psrlq $1,%xmm5 + psllq $56,%xmm4 + pxor %xmm5,%xmm3 + psrlq $7,%xmm5 + pxor %xmm4,%xmm3 + psllq $7,%xmm4 + pxor %xmm5,%xmm3 + movdqa %xmm0,%xmm5 + pxor %xmm4,%xmm3 + movdqa %xmm0,%xmm4 + psrlq $6,%xmm5 + paddq %xmm3,%xmm1 + movdqa %xmm0,%xmm3 + psrlq $19,%xmm4 + psllq $3,%xmm3 + pxor %xmm4,%xmm5 + psrlq $42,%xmm4 + pxor %xmm3,%xmm5 + psllq $42,%xmm3 + pxor %xmm4,%xmm5 + movdqa 48(%edx),%xmm4 + pxor %xmm3,%xmm5 + movdqa 16(%ebp),%xmm3 + movq %mm4,%mm1 + paddq %xmm5,%xmm1 + movq -112(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,16(%esp) + paddq %xmm1,%xmm3 + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm0 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm0,48(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 40(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 8(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm0,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm0,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 56(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm0,%mm2 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + pxor %mm7,%mm6 + movq 16(%esp),%mm5 + paddq %mm6,%mm2 + movq 24(%esp),%mm6 + movq %mm4,%mm1 + movq -104(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,8(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm2 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm2,40(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 32(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq (%esp),%mm4 + paddq %mm7,%mm3 + movq %mm2,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm2,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 48(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm2,%mm0 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + pxor %mm7,%mm6 + movq 8(%esp),%mm5 + paddq %mm6,%mm0 + movq 16(%esp),%mm6 + movdqa %xmm3,-112(%edx) + movdqa %xmm7,%xmm5 + movdqa %xmm4,%xmm3 +.byte 102,15,58,15,226,8 + movdqa %xmm6,32(%edx) +.byte 102,15,58,15,238,8 + movdqa %xmm4,%xmm6 + psrlq $7,%xmm4 + paddq %xmm5,%xmm2 + movdqa %xmm6,%xmm5 + psrlq $1,%xmm6 + psllq $56,%xmm5 + pxor %xmm6,%xmm4 + psrlq $7,%xmm6 + pxor %xmm5,%xmm4 + psllq $7,%xmm5 + pxor %xmm6,%xmm4 + movdqa %xmm1,%xmm6 + pxor %xmm5,%xmm4 + movdqa %xmm1,%xmm5 + psrlq $6,%xmm6 + paddq %xmm4,%xmm2 + movdqa %xmm1,%xmm4 + psrlq $19,%xmm5 + psllq $3,%xmm4 + pxor %xmm5,%xmm6 + psrlq $42,%xmm5 + pxor %xmm4,%xmm6 + psllq $42,%xmm4 + pxor %xmm5,%xmm6 + movdqa (%edx),%xmm5 + pxor %xmm4,%xmm6 + movdqa 32(%ebp),%xmm4 + movq %mm4,%mm1 + paddq %xmm6,%xmm2 + movq -96(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,(%esp) + paddq %xmm2,%xmm4 + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm0 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm0,32(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 24(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 56(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm0,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm0,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 40(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm0,%mm2 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + pxor %mm7,%mm6 + movq (%esp),%mm5 + paddq %mm6,%mm2 + movq 8(%esp),%mm6 + movq %mm4,%mm1 + movq -88(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,56(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm2 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm2,24(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 16(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 48(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm2,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm2,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 32(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm2,%mm0 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + pxor %mm7,%mm6 + movq 56(%esp),%mm5 + paddq %mm6,%mm0 + movq (%esp),%mm6 + movdqa %xmm4,-96(%edx) + movdqa %xmm0,%xmm6 + movdqa %xmm5,%xmm4 +.byte 102,15,58,15,235,8 + movdqa %xmm7,48(%edx) +.byte 102,15,58,15,247,8 + movdqa %xmm5,%xmm7 + psrlq $7,%xmm5 + paddq %xmm6,%xmm3 + movdqa %xmm7,%xmm6 + psrlq $1,%xmm7 + psllq $56,%xmm6 + pxor %xmm7,%xmm5 + psrlq $7,%xmm7 + pxor %xmm6,%xmm5 + psllq $7,%xmm6 + pxor %xmm7,%xmm5 + movdqa %xmm2,%xmm7 + pxor %xmm6,%xmm5 + movdqa %xmm2,%xmm6 + psrlq $6,%xmm7 + paddq %xmm5,%xmm3 + movdqa %xmm2,%xmm5 + psrlq $19,%xmm6 + psllq $3,%xmm5 + pxor %xmm6,%xmm7 + psrlq $42,%xmm6 + pxor %xmm5,%xmm7 + psllq $42,%xmm5 + pxor %xmm6,%xmm7 + movdqa 16(%edx),%xmm6 + pxor %xmm5,%xmm7 + movdqa 48(%ebp),%xmm5 + movq %mm4,%mm1 + paddq %xmm7,%xmm3 + movq -80(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,48(%esp) + paddq %xmm3,%xmm5 + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm0 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm0,16(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 8(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 40(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm0,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm0,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 24(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm0,%mm2 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + pxor %mm7,%mm6 + movq 48(%esp),%mm5 + paddq %mm6,%mm2 + movq 56(%esp),%mm6 + movq %mm4,%mm1 + movq -72(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,40(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm2 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm2,8(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq (%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 32(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm2,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm2,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 16(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm2,%mm0 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + pxor %mm7,%mm6 + movq 40(%esp),%mm5 + paddq %mm6,%mm0 + movq 48(%esp),%mm6 + movdqa %xmm5,-80(%edx) + movdqa %xmm1,%xmm7 + movdqa %xmm6,%xmm5 +.byte 102,15,58,15,244,8 + movdqa %xmm0,(%edx) +.byte 102,15,58,15,248,8 + movdqa %xmm6,%xmm0 + psrlq $7,%xmm6 + paddq %xmm7,%xmm4 + movdqa %xmm0,%xmm7 + psrlq $1,%xmm0 + psllq $56,%xmm7 + pxor %xmm0,%xmm6 + psrlq $7,%xmm0 + pxor %xmm7,%xmm6 + psllq $7,%xmm7 + pxor %xmm0,%xmm6 + movdqa %xmm3,%xmm0 + pxor %xmm7,%xmm6 + movdqa %xmm3,%xmm7 + psrlq $6,%xmm0 + paddq %xmm6,%xmm4 + movdqa %xmm3,%xmm6 + psrlq $19,%xmm7 + psllq $3,%xmm6 + pxor %xmm7,%xmm0 + psrlq $42,%xmm7 + pxor %xmm6,%xmm0 + psllq $42,%xmm6 + pxor %xmm7,%xmm0 + movdqa 32(%edx),%xmm7 + pxor %xmm6,%xmm0 + movdqa 64(%ebp),%xmm6 + movq %mm4,%mm1 + paddq %xmm0,%xmm4 + movq -64(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,32(%esp) + paddq %xmm4,%xmm6 + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm0 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm0,(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 56(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 24(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm0,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm0,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 8(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm0,%mm2 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + pxor %mm7,%mm6 + movq 32(%esp),%mm5 + paddq %mm6,%mm2 + movq 40(%esp),%mm6 + movq %mm4,%mm1 + movq -56(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,24(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm2 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm2,56(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 48(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 16(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm2,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm2,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq (%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm2,%mm0 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + pxor %mm7,%mm6 + movq 24(%esp),%mm5 + paddq %mm6,%mm0 + movq 32(%esp),%mm6 + movdqa %xmm6,-64(%edx) + movdqa %xmm2,%xmm0 + movdqa %xmm7,%xmm6 +.byte 102,15,58,15,253,8 + movdqa %xmm1,16(%edx) +.byte 102,15,58,15,193,8 + movdqa %xmm7,%xmm1 + psrlq $7,%xmm7 + paddq %xmm0,%xmm5 + movdqa %xmm1,%xmm0 + psrlq $1,%xmm1 + psllq $56,%xmm0 + pxor %xmm1,%xmm7 + psrlq $7,%xmm1 + pxor %xmm0,%xmm7 + psllq $7,%xmm0 + pxor %xmm1,%xmm7 + movdqa %xmm4,%xmm1 + pxor %xmm0,%xmm7 + movdqa %xmm4,%xmm0 + psrlq $6,%xmm1 + paddq %xmm7,%xmm5 + movdqa %xmm4,%xmm7 + psrlq $19,%xmm0 + psllq $3,%xmm7 + pxor %xmm0,%xmm1 + psrlq $42,%xmm0 + pxor %xmm7,%xmm1 + psllq $42,%xmm7 + pxor %xmm0,%xmm1 + movdqa 48(%edx),%xmm0 + pxor %xmm7,%xmm1 + movdqa 80(%ebp),%xmm7 + movq %mm4,%mm1 + paddq %xmm1,%xmm5 + movq -48(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,16(%esp) + paddq %xmm5,%xmm7 + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm0 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm0,48(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 40(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 8(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm0,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm0,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 56(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm0,%mm2 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + pxor %mm7,%mm6 + movq 16(%esp),%mm5 + paddq %mm6,%mm2 + movq 24(%esp),%mm6 + movq %mm4,%mm1 + movq -40(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,8(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm2 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm2,40(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 32(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq (%esp),%mm4 + paddq %mm7,%mm3 + movq %mm2,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm2,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 48(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm2,%mm0 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + pxor %mm7,%mm6 + movq 8(%esp),%mm5 + paddq %mm6,%mm0 + movq 16(%esp),%mm6 + movdqa %xmm7,-48(%edx) + movdqa %xmm3,%xmm1 + movdqa %xmm0,%xmm7 +.byte 102,15,58,15,198,8 + movdqa %xmm2,32(%edx) +.byte 102,15,58,15,202,8 + movdqa %xmm0,%xmm2 + psrlq $7,%xmm0 + paddq %xmm1,%xmm6 + movdqa %xmm2,%xmm1 + psrlq $1,%xmm2 + psllq $56,%xmm1 + pxor %xmm2,%xmm0 + psrlq $7,%xmm2 + pxor %xmm1,%xmm0 + psllq $7,%xmm1 + pxor %xmm2,%xmm0 + movdqa %xmm5,%xmm2 + pxor %xmm1,%xmm0 + movdqa %xmm5,%xmm1 + psrlq $6,%xmm2 + paddq %xmm0,%xmm6 + movdqa %xmm5,%xmm0 + psrlq $19,%xmm1 + psllq $3,%xmm0 + pxor %xmm1,%xmm2 + psrlq $42,%xmm1 + pxor %xmm0,%xmm2 + psllq $42,%xmm0 + pxor %xmm1,%xmm2 + movdqa (%edx),%xmm1 + pxor %xmm0,%xmm2 + movdqa 96(%ebp),%xmm0 + movq %mm4,%mm1 + paddq %xmm2,%xmm6 + movq -32(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,(%esp) + paddq %xmm6,%xmm0 + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm0 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm0,32(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 24(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 56(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm0,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm0,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 40(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm0,%mm2 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + pxor %mm7,%mm6 + movq (%esp),%mm5 + paddq %mm6,%mm2 + movq 8(%esp),%mm6 + movq %mm4,%mm1 + movq -24(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,56(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm2 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm2,24(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 16(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 48(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm2,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm2,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 32(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm2,%mm0 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + pxor %mm7,%mm6 + movq 56(%esp),%mm5 + paddq %mm6,%mm0 + movq (%esp),%mm6 + movdqa %xmm0,-32(%edx) + movdqa %xmm4,%xmm2 + movdqa %xmm1,%xmm0 +.byte 102,15,58,15,207,8 + movdqa %xmm3,48(%edx) +.byte 102,15,58,15,211,8 + movdqa %xmm1,%xmm3 + psrlq $7,%xmm1 + paddq %xmm2,%xmm7 + movdqa %xmm3,%xmm2 + psrlq $1,%xmm3 + psllq $56,%xmm2 + pxor %xmm3,%xmm1 + psrlq $7,%xmm3 + pxor %xmm2,%xmm1 + psllq $7,%xmm2 + pxor %xmm3,%xmm1 + movdqa %xmm6,%xmm3 + pxor %xmm2,%xmm1 + movdqa %xmm6,%xmm2 + psrlq $6,%xmm3 + paddq %xmm1,%xmm7 + movdqa %xmm6,%xmm1 + psrlq $19,%xmm2 + psllq $3,%xmm1 + pxor %xmm2,%xmm3 + psrlq $42,%xmm2 + pxor %xmm1,%xmm3 + psllq $42,%xmm1 + pxor %xmm2,%xmm3 + movdqa 16(%edx),%xmm2 + pxor %xmm1,%xmm3 + movdqa 112(%ebp),%xmm1 + movq %mm4,%mm1 + paddq %xmm3,%xmm7 + movq -16(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,48(%esp) + paddq %xmm7,%xmm1 + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm0 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm0,16(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 8(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 40(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm0,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm0,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 24(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm0,%mm2 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + pxor %mm7,%mm6 + movq 48(%esp),%mm5 + paddq %mm6,%mm2 + movq 56(%esp),%mm6 + movq %mm4,%mm1 + movq -8(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,40(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm2 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm2,8(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq (%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 32(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm2,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm2,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 16(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm2,%mm0 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + pxor %mm7,%mm6 + movq 40(%esp),%mm5 + paddq %mm6,%mm0 + movq 48(%esp),%mm6 + movdqa %xmm1,-16(%edx) + leal 128(%ebp),%ebp + decl %ecx + jnz .L00800_47_ssse3 + movdqa (%ebp),%xmm1 + leal -640(%ebp),%ebp + movdqu (%ebx),%xmm0 +.byte 102,15,56,0,193 + movdqa (%ebp),%xmm3 + movdqa %xmm1,%xmm2 + movdqu 16(%ebx),%xmm1 + paddq %xmm0,%xmm3 +.byte 102,15,56,0,202 + movq %mm4,%mm1 + movq -128(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,32(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm0 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm0,(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 56(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 24(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm0,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm0,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 8(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm0,%mm2 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + pxor %mm7,%mm6 + movq 32(%esp),%mm5 + paddq %mm6,%mm2 + movq 40(%esp),%mm6 + movq %mm4,%mm1 + movq -120(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,24(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm2 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm2,56(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 48(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 16(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm2,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm2,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq (%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm2,%mm0 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + pxor %mm7,%mm6 + movq 24(%esp),%mm5 + paddq %mm6,%mm0 + movq 32(%esp),%mm6 + movdqa %xmm3,-128(%edx) + movdqa 16(%ebp),%xmm4 + movdqa %xmm2,%xmm3 + movdqu 32(%ebx),%xmm2 + paddq %xmm1,%xmm4 +.byte 102,15,56,0,211 + movq %mm4,%mm1 + movq -112(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,16(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm0 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm0,48(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 40(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 8(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm0,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm0,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 56(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm0,%mm2 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + pxor %mm7,%mm6 + movq 16(%esp),%mm5 + paddq %mm6,%mm2 + movq 24(%esp),%mm6 + movq %mm4,%mm1 + movq -104(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,8(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm2 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm2,40(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 32(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq (%esp),%mm4 + paddq %mm7,%mm3 + movq %mm2,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm2,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 48(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm2,%mm0 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + pxor %mm7,%mm6 + movq 8(%esp),%mm5 + paddq %mm6,%mm0 + movq 16(%esp),%mm6 + movdqa %xmm4,-112(%edx) + movdqa 32(%ebp),%xmm5 + movdqa %xmm3,%xmm4 + movdqu 48(%ebx),%xmm3 + paddq %xmm2,%xmm5 +.byte 102,15,56,0,220 + movq %mm4,%mm1 + movq -96(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm0 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm0,32(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 24(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 56(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm0,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm0,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 40(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm0,%mm2 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + pxor %mm7,%mm6 + movq (%esp),%mm5 + paddq %mm6,%mm2 + movq 8(%esp),%mm6 + movq %mm4,%mm1 + movq -88(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,56(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm2 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm2,24(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 16(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 48(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm2,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm2,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 32(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm2,%mm0 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + pxor %mm7,%mm6 + movq 56(%esp),%mm5 + paddq %mm6,%mm0 + movq (%esp),%mm6 + movdqa %xmm5,-96(%edx) + movdqa 48(%ebp),%xmm6 + movdqa %xmm4,%xmm5 + movdqu 64(%ebx),%xmm4 + paddq %xmm3,%xmm6 +.byte 102,15,56,0,229 + movq %mm4,%mm1 + movq -80(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,48(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm0 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm0,16(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 8(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 40(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm0,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm0,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 24(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm0,%mm2 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + pxor %mm7,%mm6 + movq 48(%esp),%mm5 + paddq %mm6,%mm2 + movq 56(%esp),%mm6 + movq %mm4,%mm1 + movq -72(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,40(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm2 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm2,8(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq (%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 32(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm2,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm2,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 16(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm2,%mm0 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + pxor %mm7,%mm6 + movq 40(%esp),%mm5 + paddq %mm6,%mm0 + movq 48(%esp),%mm6 + movdqa %xmm6,-80(%edx) + movdqa 64(%ebp),%xmm7 + movdqa %xmm5,%xmm6 + movdqu 80(%ebx),%xmm5 + paddq %xmm4,%xmm7 +.byte 102,15,56,0,238 + movq %mm4,%mm1 + movq -64(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,32(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm0 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm0,(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 56(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 24(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm0,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm0,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 8(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm0,%mm2 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + pxor %mm7,%mm6 + movq 32(%esp),%mm5 + paddq %mm6,%mm2 + movq 40(%esp),%mm6 + movq %mm4,%mm1 + movq -56(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,24(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm2 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm2,56(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 48(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 16(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm2,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm2,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq (%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm2,%mm0 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + pxor %mm7,%mm6 + movq 24(%esp),%mm5 + paddq %mm6,%mm0 + movq 32(%esp),%mm6 + movdqa %xmm7,-64(%edx) + movdqa %xmm0,(%edx) + movdqa 80(%ebp),%xmm0 + movdqa %xmm6,%xmm7 + movdqu 96(%ebx),%xmm6 + paddq %xmm5,%xmm0 +.byte 102,15,56,0,247 + movq %mm4,%mm1 + movq -48(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,16(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm0 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm0,48(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 40(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 8(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm0,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm0,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 56(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm0,%mm2 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + pxor %mm7,%mm6 + movq 16(%esp),%mm5 + paddq %mm6,%mm2 + movq 24(%esp),%mm6 + movq %mm4,%mm1 + movq -40(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,8(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm2 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm2,40(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 32(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq (%esp),%mm4 + paddq %mm7,%mm3 + movq %mm2,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm2,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 48(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm2,%mm0 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + pxor %mm7,%mm6 + movq 8(%esp),%mm5 + paddq %mm6,%mm0 + movq 16(%esp),%mm6 + movdqa %xmm0,-48(%edx) + movdqa %xmm1,16(%edx) + movdqa 96(%ebp),%xmm1 + movdqa %xmm7,%xmm0 + movdqu 112(%ebx),%xmm7 + paddq %xmm6,%xmm1 +.byte 102,15,56,0,248 + movq %mm4,%mm1 + movq -32(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm0 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm0,32(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 24(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 56(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm0,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm0,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 40(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm0,%mm2 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + pxor %mm7,%mm6 + movq (%esp),%mm5 + paddq %mm6,%mm2 + movq 8(%esp),%mm6 + movq %mm4,%mm1 + movq -24(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,56(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm2 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm2,24(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 16(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 48(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm2,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm2,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 32(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm2,%mm0 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + pxor %mm7,%mm6 + movq 56(%esp),%mm5 + paddq %mm6,%mm0 + movq (%esp),%mm6 + movdqa %xmm1,-32(%edx) + movdqa %xmm2,32(%edx) + movdqa 112(%ebp),%xmm2 + movdqa (%edx),%xmm0 + paddq %xmm7,%xmm2 + movq %mm4,%mm1 + movq -16(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,48(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm0 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm0,16(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 8(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 40(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm0,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm0,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 24(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm0,%mm2 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + pxor %mm7,%mm6 + movq 48(%esp),%mm5 + paddq %mm6,%mm2 + movq 56(%esp),%mm6 + movq %mm4,%mm1 + movq -8(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,40(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm2 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm2,8(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq (%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 32(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm2,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm2,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 16(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm2,%mm0 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + pxor %mm7,%mm6 + movq 40(%esp),%mm5 + paddq %mm6,%mm0 + movq 48(%esp),%mm6 + movdqa %xmm2,-16(%edx) + movq 8(%esp),%mm1 + paddq %mm3,%mm0 + movq 24(%esp),%mm3 + movq 56(%esp),%mm7 + pxor %mm1,%mm2 + paddq (%esi),%mm0 + paddq 8(%esi),%mm1 + paddq 16(%esi),%mm2 + paddq 24(%esi),%mm3 + paddq 32(%esi),%mm4 + paddq 40(%esi),%mm5 + paddq 48(%esi),%mm6 + paddq 56(%esi),%mm7 + movq %mm0,(%esi) + movq %mm1,8(%esi) + movq %mm2,16(%esi) + movq %mm3,24(%esi) + movq %mm4,32(%esi) + movq %mm5,40(%esi) + movq %mm6,48(%esi) + movq %mm7,56(%esi) + cmpl %eax,%edi + jb .L007loop_ssse3 + movl 76(%edx),%esp + emms + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.align 16 +.L002loop_x86: + movl (%edi),%eax + movl 4(%edi),%ebx + movl 8(%edi),%ecx + movl 12(%edi),%edx + bswap %eax + bswap %ebx + bswap %ecx + bswap %edx + pushl %eax + pushl %ebx + pushl %ecx + pushl %edx + movl 16(%edi),%eax + movl 20(%edi),%ebx + movl 24(%edi),%ecx + movl 28(%edi),%edx + bswap %eax + bswap %ebx + bswap %ecx + bswap %edx + pushl %eax + pushl %ebx + pushl %ecx + pushl %edx + movl 32(%edi),%eax + movl 36(%edi),%ebx + movl 40(%edi),%ecx + movl 44(%edi),%edx + bswap %eax + bswap %ebx + bswap %ecx + bswap %edx + pushl %eax + pushl %ebx + pushl %ecx + pushl %edx + movl 48(%edi),%eax + movl 52(%edi),%ebx + movl 56(%edi),%ecx + movl 60(%edi),%edx + bswap %eax + bswap %ebx + bswap %ecx + bswap %edx + pushl %eax + pushl %ebx + pushl %ecx + pushl %edx + movl 64(%edi),%eax + movl 68(%edi),%ebx + movl 72(%edi),%ecx + movl 76(%edi),%edx + bswap %eax + bswap %ebx + bswap %ecx + bswap %edx + pushl %eax + pushl %ebx + pushl %ecx + pushl %edx + movl 80(%edi),%eax + movl 84(%edi),%ebx + movl 88(%edi),%ecx + movl 92(%edi),%edx + bswap %eax + bswap %ebx + bswap %ecx + bswap %edx + pushl %eax + pushl %ebx + pushl %ecx + pushl %edx + movl 96(%edi),%eax + movl 100(%edi),%ebx + movl 104(%edi),%ecx + movl 108(%edi),%edx + bswap %eax + bswap %ebx + bswap %ecx + bswap %edx + pushl %eax + pushl %ebx + pushl %ecx + pushl %edx + movl 112(%edi),%eax + movl 116(%edi),%ebx + movl 120(%edi),%ecx + movl 124(%edi),%edx + bswap %eax + bswap %ebx + bswap %ecx + bswap %edx + pushl %eax + pushl %ebx + pushl %ecx + pushl %edx + addl $128,%edi + subl $72,%esp + movl %edi,204(%esp) + leal 8(%esp),%edi + movl $16,%ecx +.long 2784229001 +.align 16 +.L00900_15_x86: + movl 40(%esp),%ecx + movl 44(%esp),%edx + movl %ecx,%esi + shrl $9,%ecx + movl %edx,%edi + shrl $9,%edx + movl %ecx,%ebx + shll $14,%esi + movl %edx,%eax + shll $14,%edi + xorl %esi,%ebx + shrl $5,%ecx + xorl %edi,%eax + shrl $5,%edx + xorl %ecx,%eax + shll $4,%esi + xorl %edx,%ebx + shll $4,%edi + xorl %esi,%ebx + shrl $4,%ecx + xorl %edi,%eax + shrl $4,%edx + xorl %ecx,%eax + shll $5,%esi + xorl %edx,%ebx + shll $5,%edi + xorl %esi,%eax + xorl %edi,%ebx + movl 48(%esp),%ecx + movl 52(%esp),%edx + movl 56(%esp),%esi + movl 60(%esp),%edi + addl 64(%esp),%eax + adcl 68(%esp),%ebx + xorl %esi,%ecx + xorl %edi,%edx + andl 40(%esp),%ecx + andl 44(%esp),%edx + addl 192(%esp),%eax + adcl 196(%esp),%ebx + xorl %esi,%ecx + xorl %edi,%edx + movl (%ebp),%esi + movl 4(%ebp),%edi + addl %ecx,%eax + adcl %edx,%ebx + movl 32(%esp),%ecx + movl 36(%esp),%edx + addl %esi,%eax + adcl %edi,%ebx + movl %eax,(%esp) + movl %ebx,4(%esp) + addl %ecx,%eax + adcl %edx,%ebx + movl 8(%esp),%ecx + movl 12(%esp),%edx + movl %eax,32(%esp) + movl %ebx,36(%esp) + movl %ecx,%esi + shrl $2,%ecx + movl %edx,%edi + shrl $2,%edx + movl %ecx,%ebx + shll $4,%esi + movl %edx,%eax + shll $4,%edi + xorl %esi,%ebx + shrl $5,%ecx + xorl %edi,%eax + shrl $5,%edx + xorl %ecx,%ebx + shll $21,%esi + xorl %edx,%eax + shll $21,%edi + xorl %esi,%eax + shrl $21,%ecx + xorl %edi,%ebx + shrl $21,%edx + xorl %ecx,%eax + shll $5,%esi + xorl %edx,%ebx + shll $5,%edi + xorl %esi,%eax + xorl %edi,%ebx + movl 8(%esp),%ecx + movl 12(%esp),%edx + movl 16(%esp),%esi + movl 20(%esp),%edi + addl (%esp),%eax + adcl 4(%esp),%ebx + orl %esi,%ecx + orl %edi,%edx + andl 24(%esp),%ecx + andl 28(%esp),%edx + andl 8(%esp),%esi + andl 12(%esp),%edi + orl %esi,%ecx + orl %edi,%edx + addl %ecx,%eax + adcl %edx,%ebx + movl %eax,(%esp) + movl %ebx,4(%esp) + movb (%ebp),%dl + subl $8,%esp + leal 8(%ebp),%ebp + cmpb $148,%dl + jne .L00900_15_x86 +.align 16 +.L01016_79_x86: + movl 312(%esp),%ecx + movl 316(%esp),%edx + movl %ecx,%esi + shrl $1,%ecx + movl %edx,%edi + shrl $1,%edx + movl %ecx,%eax + shll $24,%esi + movl %edx,%ebx + shll $24,%edi + xorl %esi,%ebx + shrl $6,%ecx + xorl %edi,%eax + shrl $6,%edx + xorl %ecx,%eax + shll $7,%esi + xorl %edx,%ebx + shll $1,%edi + xorl %esi,%ebx + shrl $1,%ecx + xorl %edi,%eax + shrl $1,%edx + xorl %ecx,%eax + shll $6,%edi + xorl %edx,%ebx + xorl %edi,%eax + movl %eax,(%esp) + movl %ebx,4(%esp) + movl 208(%esp),%ecx + movl 212(%esp),%edx + movl %ecx,%esi + shrl $6,%ecx + movl %edx,%edi + shrl $6,%edx + movl %ecx,%eax + shll $3,%esi + movl %edx,%ebx + shll $3,%edi + xorl %esi,%eax + shrl $13,%ecx + xorl %edi,%ebx + shrl $13,%edx + xorl %ecx,%eax + shll $10,%esi + xorl %edx,%ebx + shll $10,%edi + xorl %esi,%ebx + shrl $10,%ecx + xorl %edi,%eax + shrl $10,%edx + xorl %ecx,%ebx + shll $13,%edi + xorl %edx,%eax + xorl %edi,%eax + movl 320(%esp),%ecx + movl 324(%esp),%edx + addl (%esp),%eax + adcl 4(%esp),%ebx + movl 248(%esp),%esi + movl 252(%esp),%edi + addl %ecx,%eax + adcl %edx,%ebx + addl %esi,%eax + adcl %edi,%ebx + movl %eax,192(%esp) + movl %ebx,196(%esp) + movl 40(%esp),%ecx + movl 44(%esp),%edx + movl %ecx,%esi + shrl $9,%ecx + movl %edx,%edi + shrl $9,%edx + movl %ecx,%ebx + shll $14,%esi + movl %edx,%eax + shll $14,%edi + xorl %esi,%ebx + shrl $5,%ecx + xorl %edi,%eax + shrl $5,%edx + xorl %ecx,%eax + shll $4,%esi + xorl %edx,%ebx + shll $4,%edi + xorl %esi,%ebx + shrl $4,%ecx + xorl %edi,%eax + shrl $4,%edx + xorl %ecx,%eax + shll $5,%esi + xorl %edx,%ebx + shll $5,%edi + xorl %esi,%eax + xorl %edi,%ebx + movl 48(%esp),%ecx + movl 52(%esp),%edx + movl 56(%esp),%esi + movl 60(%esp),%edi + addl 64(%esp),%eax + adcl 68(%esp),%ebx + xorl %esi,%ecx + xorl %edi,%edx + andl 40(%esp),%ecx + andl 44(%esp),%edx + addl 192(%esp),%eax + adcl 196(%esp),%ebx + xorl %esi,%ecx + xorl %edi,%edx + movl (%ebp),%esi + movl 4(%ebp),%edi + addl %ecx,%eax + adcl %edx,%ebx + movl 32(%esp),%ecx + movl 36(%esp),%edx + addl %esi,%eax + adcl %edi,%ebx + movl %eax,(%esp) + movl %ebx,4(%esp) + addl %ecx,%eax + adcl %edx,%ebx + movl 8(%esp),%ecx + movl 12(%esp),%edx + movl %eax,32(%esp) + movl %ebx,36(%esp) + movl %ecx,%esi + shrl $2,%ecx + movl %edx,%edi + shrl $2,%edx + movl %ecx,%ebx + shll $4,%esi + movl %edx,%eax + shll $4,%edi + xorl %esi,%ebx + shrl $5,%ecx + xorl %edi,%eax + shrl $5,%edx + xorl %ecx,%ebx + shll $21,%esi + xorl %edx,%eax + shll $21,%edi + xorl %esi,%eax + shrl $21,%ecx + xorl %edi,%ebx + shrl $21,%edx + xorl %ecx,%eax + shll $5,%esi + xorl %edx,%ebx + shll $5,%edi + xorl %esi,%eax + xorl %edi,%ebx + movl 8(%esp),%ecx + movl 12(%esp),%edx + movl 16(%esp),%esi + movl 20(%esp),%edi + addl (%esp),%eax + adcl 4(%esp),%ebx + orl %esi,%ecx + orl %edi,%edx + andl 24(%esp),%ecx + andl 28(%esp),%edx + andl 8(%esp),%esi + andl 12(%esp),%edi + orl %esi,%ecx + orl %edi,%edx + addl %ecx,%eax + adcl %edx,%ebx + movl %eax,(%esp) + movl %ebx,4(%esp) + movb (%ebp),%dl + subl $8,%esp + leal 8(%ebp),%ebp + cmpb $23,%dl + jne .L01016_79_x86 + movl 840(%esp),%esi + movl 844(%esp),%edi + movl (%esi),%eax + movl 4(%esi),%ebx + movl 8(%esi),%ecx + movl 12(%esi),%edx + addl 8(%esp),%eax + adcl 12(%esp),%ebx + movl %eax,(%esi) + movl %ebx,4(%esi) + addl 16(%esp),%ecx + adcl 20(%esp),%edx + movl %ecx,8(%esi) + movl %edx,12(%esi) + movl 16(%esi),%eax + movl 20(%esi),%ebx + movl 24(%esi),%ecx + movl 28(%esi),%edx + addl 24(%esp),%eax + adcl 28(%esp),%ebx + movl %eax,16(%esi) + movl %ebx,20(%esi) + addl 32(%esp),%ecx + adcl 36(%esp),%edx + movl %ecx,24(%esi) + movl %edx,28(%esi) + movl 32(%esi),%eax + movl 36(%esi),%ebx + movl 40(%esi),%ecx + movl 44(%esi),%edx + addl 40(%esp),%eax + adcl 44(%esp),%ebx + movl %eax,32(%esi) + movl %ebx,36(%esi) + addl 48(%esp),%ecx + adcl 52(%esp),%edx + movl %ecx,40(%esi) + movl %edx,44(%esi) + movl 48(%esi),%eax + movl 52(%esi),%ebx + movl 56(%esi),%ecx + movl 60(%esi),%edx + addl 56(%esp),%eax + adcl 60(%esp),%ebx + movl %eax,48(%esi) + movl %ebx,52(%esi) + addl 64(%esp),%ecx + adcl 68(%esp),%edx + movl %ecx,56(%esi) + movl %edx,60(%esi) + addl $840,%esp + subl $640,%ebp + cmpl 8(%esp),%edi + jb .L002loop_x86 + movl 12(%esp),%esp + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.align 64 +.L001K512: +.long 3609767458,1116352408 +.long 602891725,1899447441 +.long 3964484399,3049323471 +.long 2173295548,3921009573 +.long 4081628472,961987163 +.long 3053834265,1508970993 +.long 2937671579,2453635748 +.long 3664609560,2870763221 +.long 2734883394,3624381080 +.long 1164996542,310598401 +.long 1323610764,607225278 +.long 3590304994,1426881987 +.long 4068182383,1925078388 +.long 991336113,2162078206 +.long 633803317,2614888103 +.long 3479774868,3248222580 +.long 2666613458,3835390401 +.long 944711139,4022224774 +.long 2341262773,264347078 +.long 2007800933,604807628 +.long 1495990901,770255983 +.long 1856431235,1249150122 +.long 3175218132,1555081692 +.long 2198950837,1996064986 +.long 3999719339,2554220882 +.long 766784016,2821834349 +.long 2566594879,2952996808 +.long 3203337956,3210313671 +.long 1034457026,3336571891 +.long 2466948901,3584528711 +.long 3758326383,113926993 +.long 168717936,338241895 +.long 1188179964,666307205 +.long 1546045734,773529912 +.long 1522805485,1294757372 +.long 2643833823,1396182291 +.long 2343527390,1695183700 +.long 1014477480,1986661051 +.long 1206759142,2177026350 +.long 344077627,2456956037 +.long 1290863460,2730485921 +.long 3158454273,2820302411 +.long 3505952657,3259730800 +.long 106217008,3345764771 +.long 3606008344,3516065817 +.long 1432725776,3600352804 +.long 1467031594,4094571909 +.long 851169720,275423344 +.long 3100823752,430227734 +.long 1363258195,506948616 +.long 3750685593,659060556 +.long 3785050280,883997877 +.long 3318307427,958139571 +.long 3812723403,1322822218 +.long 2003034995,1537002063 +.long 3602036899,1747873779 +.long 1575990012,1955562222 +.long 1125592928,2024104815 +.long 2716904306,2227730452 +.long 442776044,2361852424 +.long 593698344,2428436474 +.long 3733110249,2756734187 +.long 2999351573,3204031479 +.long 3815920427,3329325298 +.long 3928383900,3391569614 +.long 566280711,3515267271 +.long 3454069534,3940187606 +.long 4000239992,4118630271 +.long 1914138554,116418474 +.long 2731055270,174292421 +.long 3203993006,289380356 +.long 320620315,460393269 +.long 587496836,685471733 +.long 1086792851,852142971 +.long 365543100,1017036298 +.long 2618297676,1126000580 +.long 3409855158,1288033470 +.long 4234509866,1501505948 +.long 987167468,1607167915 +.long 1246189591,1816402316 +.long 67438087,66051 +.long 202182159,134810123 +.size sha512_block_data_order,.-.L_sha512_block_data_order_begin +.byte 83,72,65,53,49,50,32,98,108,111,99,107,32,116,114,97 +.byte 110,115,102,111,114,109,32,102,111,114,32,120,56,54,44,32 +.byte 67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97 +.byte 112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103 +.byte 62,0 +.comm OPENSSL_ia32cap_P,16,4 diff --git a/deps/openssl/asm_obsolete/x86-elf-gas/whrlpool/wp-mmx.s b/deps/openssl/asm_obsolete/x86-elf-gas/whrlpool/wp-mmx.s new file mode 100644 index 00000000000000..37f50898ae6b53 --- /dev/null +++ b/deps/openssl/asm_obsolete/x86-elf-gas/whrlpool/wp-mmx.s @@ -0,0 +1,1107 @@ +.file "wp-mmx.s" +.text +.globl whirlpool_block_mmx +.type whirlpool_block_mmx,@function +.align 16 +whirlpool_block_mmx: +.L_whirlpool_block_mmx_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 20(%esp),%esi + movl 24(%esp),%edi + movl 28(%esp),%ebp + movl %esp,%eax + subl $148,%esp + andl $-64,%esp + leal 128(%esp),%ebx + movl %esi,(%ebx) + movl %edi,4(%ebx) + movl %ebp,8(%ebx) + movl %eax,16(%ebx) + call .L000pic_point +.L000pic_point: + popl %ebp + leal .L001table-.L000pic_point(%ebp),%ebp + xorl %ecx,%ecx + xorl %edx,%edx + movq (%esi),%mm0 + movq 8(%esi),%mm1 + movq 16(%esi),%mm2 + movq 24(%esi),%mm3 + movq 32(%esi),%mm4 + movq 40(%esi),%mm5 + movq 48(%esi),%mm6 + movq 56(%esi),%mm7 +.L002outerloop: + movq %mm0,(%esp) + movq %mm1,8(%esp) + movq %mm2,16(%esp) + movq %mm3,24(%esp) + movq %mm4,32(%esp) + movq %mm5,40(%esp) + movq %mm6,48(%esp) + movq %mm7,56(%esp) + pxor (%edi),%mm0 + pxor 8(%edi),%mm1 + pxor 16(%edi),%mm2 + pxor 24(%edi),%mm3 + pxor 32(%edi),%mm4 + pxor 40(%edi),%mm5 + pxor 48(%edi),%mm6 + pxor 56(%edi),%mm7 + movq %mm0,64(%esp) + movq %mm1,72(%esp) + movq %mm2,80(%esp) + movq %mm3,88(%esp) + movq %mm4,96(%esp) + movq %mm5,104(%esp) + movq %mm6,112(%esp) + movq %mm7,120(%esp) + xorl %esi,%esi + movl %esi,12(%ebx) +.align 16 +.L003round: + movq 4096(%ebp,%esi,8),%mm0 + movl (%esp),%eax + movl 4(%esp),%ebx + movzbl %al,%ecx + movzbl %ah,%edx + shrl $16,%eax + leal (%ecx,%ecx,1),%esi + movzbl %al,%ecx + leal (%edx,%edx,1),%edi + movzbl %ah,%edx + pxor (%ebp,%esi,8),%mm0 + movq 7(%ebp,%edi,8),%mm1 + movl 8(%esp),%eax + leal (%ecx,%ecx,1),%esi + movzbl %bl,%ecx + leal (%edx,%edx,1),%edi + movzbl %bh,%edx + movq 6(%ebp,%esi,8),%mm2 + movq 5(%ebp,%edi,8),%mm3 + shrl $16,%ebx + leal (%ecx,%ecx,1),%esi + movzbl %bl,%ecx + leal (%edx,%edx,1),%edi + movzbl %bh,%edx + movq 4(%ebp,%esi,8),%mm4 + movq 3(%ebp,%edi,8),%mm5 + movl 12(%esp),%ebx + leal (%ecx,%ecx,1),%esi + movzbl %al,%ecx + leal (%edx,%edx,1),%edi + movzbl %ah,%edx + movq 2(%ebp,%esi,8),%mm6 + movq 1(%ebp,%edi,8),%mm7 + shrl $16,%eax + leal (%ecx,%ecx,1),%esi + movzbl %al,%ecx + leal (%edx,%edx,1),%edi + movzbl %ah,%edx + pxor (%ebp,%esi,8),%mm1 + pxor 7(%ebp,%edi,8),%mm2 + movl 16(%esp),%eax + leal (%ecx,%ecx,1),%esi + movzbl %bl,%ecx + leal (%edx,%edx,1),%edi + movzbl %bh,%edx + pxor 6(%ebp,%esi,8),%mm3 + pxor 5(%ebp,%edi,8),%mm4 + shrl $16,%ebx + leal (%ecx,%ecx,1),%esi + movzbl %bl,%ecx + leal (%edx,%edx,1),%edi + movzbl %bh,%edx + pxor 4(%ebp,%esi,8),%mm5 + pxor 3(%ebp,%edi,8),%mm6 + movl 20(%esp),%ebx + leal (%ecx,%ecx,1),%esi + movzbl %al,%ecx + leal (%edx,%edx,1),%edi + movzbl %ah,%edx + pxor 2(%ebp,%esi,8),%mm7 + pxor 1(%ebp,%edi,8),%mm0 + shrl $16,%eax + leal (%ecx,%ecx,1),%esi + movzbl %al,%ecx + leal (%edx,%edx,1),%edi + movzbl %ah,%edx + pxor (%ebp,%esi,8),%mm2 + pxor 7(%ebp,%edi,8),%mm3 + movl 24(%esp),%eax + leal (%ecx,%ecx,1),%esi + movzbl %bl,%ecx + leal (%edx,%edx,1),%edi + movzbl %bh,%edx + pxor 6(%ebp,%esi,8),%mm4 + pxor 5(%ebp,%edi,8),%mm5 + shrl $16,%ebx + leal (%ecx,%ecx,1),%esi + movzbl %bl,%ecx + leal (%edx,%edx,1),%edi + movzbl %bh,%edx + pxor 4(%ebp,%esi,8),%mm6 + pxor 3(%ebp,%edi,8),%mm7 + movl 28(%esp),%ebx + leal (%ecx,%ecx,1),%esi + movzbl %al,%ecx + leal (%edx,%edx,1),%edi + movzbl %ah,%edx + pxor 2(%ebp,%esi,8),%mm0 + pxor 1(%ebp,%edi,8),%mm1 + shrl $16,%eax + leal (%ecx,%ecx,1),%esi + movzbl %al,%ecx + leal (%edx,%edx,1),%edi + movzbl %ah,%edx + pxor (%ebp,%esi,8),%mm3 + pxor 7(%ebp,%edi,8),%mm4 + movl 32(%esp),%eax + leal (%ecx,%ecx,1),%esi + movzbl %bl,%ecx + leal (%edx,%edx,1),%edi + movzbl %bh,%edx + pxor 6(%ebp,%esi,8),%mm5 + pxor 5(%ebp,%edi,8),%mm6 + shrl $16,%ebx + leal (%ecx,%ecx,1),%esi + movzbl %bl,%ecx + leal (%edx,%edx,1),%edi + movzbl %bh,%edx + pxor 4(%ebp,%esi,8),%mm7 + pxor 3(%ebp,%edi,8),%mm0 + movl 36(%esp),%ebx + leal (%ecx,%ecx,1),%esi + movzbl %al,%ecx + leal (%edx,%edx,1),%edi + movzbl %ah,%edx + pxor 2(%ebp,%esi,8),%mm1 + pxor 1(%ebp,%edi,8),%mm2 + shrl $16,%eax + leal (%ecx,%ecx,1),%esi + movzbl %al,%ecx + leal (%edx,%edx,1),%edi + movzbl %ah,%edx + pxor (%ebp,%esi,8),%mm4 + pxor 7(%ebp,%edi,8),%mm5 + movl 40(%esp),%eax + leal (%ecx,%ecx,1),%esi + movzbl %bl,%ecx + leal (%edx,%edx,1),%edi + movzbl %bh,%edx + pxor 6(%ebp,%esi,8),%mm6 + pxor 5(%ebp,%edi,8),%mm7 + shrl $16,%ebx + leal (%ecx,%ecx,1),%esi + movzbl %bl,%ecx + leal (%edx,%edx,1),%edi + movzbl %bh,%edx + pxor 4(%ebp,%esi,8),%mm0 + pxor 3(%ebp,%edi,8),%mm1 + movl 44(%esp),%ebx + leal (%ecx,%ecx,1),%esi + movzbl %al,%ecx + leal (%edx,%edx,1),%edi + movzbl %ah,%edx + pxor 2(%ebp,%esi,8),%mm2 + pxor 1(%ebp,%edi,8),%mm3 + shrl $16,%eax + leal (%ecx,%ecx,1),%esi + movzbl %al,%ecx + leal (%edx,%edx,1),%edi + movzbl %ah,%edx + pxor (%ebp,%esi,8),%mm5 + pxor 7(%ebp,%edi,8),%mm6 + movl 48(%esp),%eax + leal (%ecx,%ecx,1),%esi + movzbl %bl,%ecx + leal (%edx,%edx,1),%edi + movzbl %bh,%edx + pxor 6(%ebp,%esi,8),%mm7 + pxor 5(%ebp,%edi,8),%mm0 + shrl $16,%ebx + leal (%ecx,%ecx,1),%esi + movzbl %bl,%ecx + leal (%edx,%edx,1),%edi + movzbl %bh,%edx + pxor 4(%ebp,%esi,8),%mm1 + pxor 3(%ebp,%edi,8),%mm2 + movl 52(%esp),%ebx + leal (%ecx,%ecx,1),%esi + movzbl %al,%ecx + leal (%edx,%edx,1),%edi + movzbl %ah,%edx + pxor 2(%ebp,%esi,8),%mm3 + pxor 1(%ebp,%edi,8),%mm4 + shrl $16,%eax + leal (%ecx,%ecx,1),%esi + movzbl %al,%ecx + leal (%edx,%edx,1),%edi + movzbl %ah,%edx + pxor (%ebp,%esi,8),%mm6 + pxor 7(%ebp,%edi,8),%mm7 + movl 56(%esp),%eax + leal (%ecx,%ecx,1),%esi + movzbl %bl,%ecx + leal (%edx,%edx,1),%edi + movzbl %bh,%edx + pxor 6(%ebp,%esi,8),%mm0 + pxor 5(%ebp,%edi,8),%mm1 + shrl $16,%ebx + leal (%ecx,%ecx,1),%esi + movzbl %bl,%ecx + leal (%edx,%edx,1),%edi + movzbl %bh,%edx + pxor 4(%ebp,%esi,8),%mm2 + pxor 3(%ebp,%edi,8),%mm3 + movl 60(%esp),%ebx + leal (%ecx,%ecx,1),%esi + movzbl %al,%ecx + leal (%edx,%edx,1),%edi + movzbl %ah,%edx + pxor 2(%ebp,%esi,8),%mm4 + pxor 1(%ebp,%edi,8),%mm5 + shrl $16,%eax + leal (%ecx,%ecx,1),%esi + movzbl %al,%ecx + leal (%edx,%edx,1),%edi + movzbl %ah,%edx + pxor (%ebp,%esi,8),%mm7 + pxor 7(%ebp,%edi,8),%mm0 + movl 64(%esp),%eax + leal (%ecx,%ecx,1),%esi + movzbl %bl,%ecx + leal (%edx,%edx,1),%edi + movzbl %bh,%edx + pxor 6(%ebp,%esi,8),%mm1 + pxor 5(%ebp,%edi,8),%mm2 + shrl $16,%ebx + leal (%ecx,%ecx,1),%esi + movzbl %bl,%ecx + leal (%edx,%edx,1),%edi + movzbl %bh,%edx + pxor 4(%ebp,%esi,8),%mm3 + pxor 3(%ebp,%edi,8),%mm4 + movl 68(%esp),%ebx + leal (%ecx,%ecx,1),%esi + movzbl %al,%ecx + leal (%edx,%edx,1),%edi + movzbl %ah,%edx + pxor 2(%ebp,%esi,8),%mm5 + pxor 1(%ebp,%edi,8),%mm6 + movq %mm0,(%esp) + movq %mm1,8(%esp) + movq %mm2,16(%esp) + movq %mm3,24(%esp) + movq %mm4,32(%esp) + movq %mm5,40(%esp) + movq %mm6,48(%esp) + movq %mm7,56(%esp) + shrl $16,%eax + leal (%ecx,%ecx,1),%esi + movzbl %al,%ecx + leal (%edx,%edx,1),%edi + movzbl %ah,%edx + pxor (%ebp,%esi,8),%mm0 + pxor 7(%ebp,%edi,8),%mm1 + movl 72(%esp),%eax + leal (%ecx,%ecx,1),%esi + movzbl %bl,%ecx + leal (%edx,%edx,1),%edi + movzbl %bh,%edx + pxor 6(%ebp,%esi,8),%mm2 + pxor 5(%ebp,%edi,8),%mm3 + shrl $16,%ebx + leal (%ecx,%ecx,1),%esi + movzbl %bl,%ecx + leal (%edx,%edx,1),%edi + movzbl %bh,%edx + pxor 4(%ebp,%esi,8),%mm4 + pxor 3(%ebp,%edi,8),%mm5 + movl 76(%esp),%ebx + leal (%ecx,%ecx,1),%esi + movzbl %al,%ecx + leal (%edx,%edx,1),%edi + movzbl %ah,%edx + pxor 2(%ebp,%esi,8),%mm6 + pxor 1(%ebp,%edi,8),%mm7 + shrl $16,%eax + leal (%ecx,%ecx,1),%esi + movzbl %al,%ecx + leal (%edx,%edx,1),%edi + movzbl %ah,%edx + pxor (%ebp,%esi,8),%mm1 + pxor 7(%ebp,%edi,8),%mm2 + movl 80(%esp),%eax + leal (%ecx,%ecx,1),%esi + movzbl %bl,%ecx + leal (%edx,%edx,1),%edi + movzbl %bh,%edx + pxor 6(%ebp,%esi,8),%mm3 + pxor 5(%ebp,%edi,8),%mm4 + shrl $16,%ebx + leal (%ecx,%ecx,1),%esi + movzbl %bl,%ecx + leal (%edx,%edx,1),%edi + movzbl %bh,%edx + pxor 4(%ebp,%esi,8),%mm5 + pxor 3(%ebp,%edi,8),%mm6 + movl 84(%esp),%ebx + leal (%ecx,%ecx,1),%esi + movzbl %al,%ecx + leal (%edx,%edx,1),%edi + movzbl %ah,%edx + pxor 2(%ebp,%esi,8),%mm7 + pxor 1(%ebp,%edi,8),%mm0 + shrl $16,%eax + leal (%ecx,%ecx,1),%esi + movzbl %al,%ecx + leal (%edx,%edx,1),%edi + movzbl %ah,%edx + pxor (%ebp,%esi,8),%mm2 + pxor 7(%ebp,%edi,8),%mm3 + movl 88(%esp),%eax + leal (%ecx,%ecx,1),%esi + movzbl %bl,%ecx + leal (%edx,%edx,1),%edi + movzbl %bh,%edx + pxor 6(%ebp,%esi,8),%mm4 + pxor 5(%ebp,%edi,8),%mm5 + shrl $16,%ebx + leal (%ecx,%ecx,1),%esi + movzbl %bl,%ecx + leal (%edx,%edx,1),%edi + movzbl %bh,%edx + pxor 4(%ebp,%esi,8),%mm6 + pxor 3(%ebp,%edi,8),%mm7 + movl 92(%esp),%ebx + leal (%ecx,%ecx,1),%esi + movzbl %al,%ecx + leal (%edx,%edx,1),%edi + movzbl %ah,%edx + pxor 2(%ebp,%esi,8),%mm0 + pxor 1(%ebp,%edi,8),%mm1 + shrl $16,%eax + leal (%ecx,%ecx,1),%esi + movzbl %al,%ecx + leal (%edx,%edx,1),%edi + movzbl %ah,%edx + pxor (%ebp,%esi,8),%mm3 + pxor 7(%ebp,%edi,8),%mm4 + movl 96(%esp),%eax + leal (%ecx,%ecx,1),%esi + movzbl %bl,%ecx + leal (%edx,%edx,1),%edi + movzbl %bh,%edx + pxor 6(%ebp,%esi,8),%mm5 + pxor 5(%ebp,%edi,8),%mm6 + shrl $16,%ebx + leal (%ecx,%ecx,1),%esi + movzbl %bl,%ecx + leal (%edx,%edx,1),%edi + movzbl %bh,%edx + pxor 4(%ebp,%esi,8),%mm7 + pxor 3(%ebp,%edi,8),%mm0 + movl 100(%esp),%ebx + leal (%ecx,%ecx,1),%esi + movzbl %al,%ecx + leal (%edx,%edx,1),%edi + movzbl %ah,%edx + pxor 2(%ebp,%esi,8),%mm1 + pxor 1(%ebp,%edi,8),%mm2 + shrl $16,%eax + leal (%ecx,%ecx,1),%esi + movzbl %al,%ecx + leal (%edx,%edx,1),%edi + movzbl %ah,%edx + pxor (%ebp,%esi,8),%mm4 + pxor 7(%ebp,%edi,8),%mm5 + movl 104(%esp),%eax + leal (%ecx,%ecx,1),%esi + movzbl %bl,%ecx + leal (%edx,%edx,1),%edi + movzbl %bh,%edx + pxor 6(%ebp,%esi,8),%mm6 + pxor 5(%ebp,%edi,8),%mm7 + shrl $16,%ebx + leal (%ecx,%ecx,1),%esi + movzbl %bl,%ecx + leal (%edx,%edx,1),%edi + movzbl %bh,%edx + pxor 4(%ebp,%esi,8),%mm0 + pxor 3(%ebp,%edi,8),%mm1 + movl 108(%esp),%ebx + leal (%ecx,%ecx,1),%esi + movzbl %al,%ecx + leal (%edx,%edx,1),%edi + movzbl %ah,%edx + pxor 2(%ebp,%esi,8),%mm2 + pxor 1(%ebp,%edi,8),%mm3 + shrl $16,%eax + leal (%ecx,%ecx,1),%esi + movzbl %al,%ecx + leal (%edx,%edx,1),%edi + movzbl %ah,%edx + pxor (%ebp,%esi,8),%mm5 + pxor 7(%ebp,%edi,8),%mm6 + movl 112(%esp),%eax + leal (%ecx,%ecx,1),%esi + movzbl %bl,%ecx + leal (%edx,%edx,1),%edi + movzbl %bh,%edx + pxor 6(%ebp,%esi,8),%mm7 + pxor 5(%ebp,%edi,8),%mm0 + shrl $16,%ebx + leal (%ecx,%ecx,1),%esi + movzbl %bl,%ecx + leal (%edx,%edx,1),%edi + movzbl %bh,%edx + pxor 4(%ebp,%esi,8),%mm1 + pxor 3(%ebp,%edi,8),%mm2 + movl 116(%esp),%ebx + leal (%ecx,%ecx,1),%esi + movzbl %al,%ecx + leal (%edx,%edx,1),%edi + movzbl %ah,%edx + pxor 2(%ebp,%esi,8),%mm3 + pxor 1(%ebp,%edi,8),%mm4 + shrl $16,%eax + leal (%ecx,%ecx,1),%esi + movzbl %al,%ecx + leal (%edx,%edx,1),%edi + movzbl %ah,%edx + pxor (%ebp,%esi,8),%mm6 + pxor 7(%ebp,%edi,8),%mm7 + movl 120(%esp),%eax + leal (%ecx,%ecx,1),%esi + movzbl %bl,%ecx + leal (%edx,%edx,1),%edi + movzbl %bh,%edx + pxor 6(%ebp,%esi,8),%mm0 + pxor 5(%ebp,%edi,8),%mm1 + shrl $16,%ebx + leal (%ecx,%ecx,1),%esi + movzbl %bl,%ecx + leal (%edx,%edx,1),%edi + movzbl %bh,%edx + pxor 4(%ebp,%esi,8),%mm2 + pxor 3(%ebp,%edi,8),%mm3 + movl 124(%esp),%ebx + leal (%ecx,%ecx,1),%esi + movzbl %al,%ecx + leal (%edx,%edx,1),%edi + movzbl %ah,%edx + pxor 2(%ebp,%esi,8),%mm4 + pxor 1(%ebp,%edi,8),%mm5 + shrl $16,%eax + leal (%ecx,%ecx,1),%esi + movzbl %al,%ecx + leal (%edx,%edx,1),%edi + movzbl %ah,%edx + pxor (%ebp,%esi,8),%mm7 + pxor 7(%ebp,%edi,8),%mm0 + leal (%ecx,%ecx,1),%esi + movzbl %bl,%ecx + leal (%edx,%edx,1),%edi + movzbl %bh,%edx + pxor 6(%ebp,%esi,8),%mm1 + pxor 5(%ebp,%edi,8),%mm2 + shrl $16,%ebx + leal (%ecx,%ecx,1),%esi + movzbl %bl,%ecx + leal (%edx,%edx,1),%edi + movzbl %bh,%edx + pxor 4(%ebp,%esi,8),%mm3 + pxor 3(%ebp,%edi,8),%mm4 + leal (%ecx,%ecx,1),%esi + movzbl %al,%ecx + leal (%edx,%edx,1),%edi + movzbl %ah,%edx + pxor 2(%ebp,%esi,8),%mm5 + pxor 1(%ebp,%edi,8),%mm6 + leal 128(%esp),%ebx + movl 12(%ebx),%esi + addl $1,%esi + cmpl $10,%esi + je .L004roundsdone + movl %esi,12(%ebx) + movq %mm0,64(%esp) + movq %mm1,72(%esp) + movq %mm2,80(%esp) + movq %mm3,88(%esp) + movq %mm4,96(%esp) + movq %mm5,104(%esp) + movq %mm6,112(%esp) + movq %mm7,120(%esp) + jmp .L003round +.align 16 +.L004roundsdone: + movl (%ebx),%esi + movl 4(%ebx),%edi + movl 8(%ebx),%eax + pxor (%edi),%mm0 + pxor 8(%edi),%mm1 + pxor 16(%edi),%mm2 + pxor 24(%edi),%mm3 + pxor 32(%edi),%mm4 + pxor 40(%edi),%mm5 + pxor 48(%edi),%mm6 + pxor 56(%edi),%mm7 + pxor (%esi),%mm0 + pxor 8(%esi),%mm1 + pxor 16(%esi),%mm2 + pxor 24(%esi),%mm3 + pxor 32(%esi),%mm4 + pxor 40(%esi),%mm5 + pxor 48(%esi),%mm6 + pxor 56(%esi),%mm7 + movq %mm0,(%esi) + movq %mm1,8(%esi) + movq %mm2,16(%esi) + movq %mm3,24(%esi) + movq %mm4,32(%esi) + movq %mm5,40(%esi) + movq %mm6,48(%esi) + movq %mm7,56(%esi) + leal 64(%edi),%edi + subl $1,%eax + jz .L005alldone + movl %edi,4(%ebx) + movl %eax,8(%ebx) + jmp .L002outerloop +.L005alldone: + emms + movl 16(%ebx),%esp + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.align 64 +.L001table: +.byte 24,24,96,24,192,120,48,216 +.byte 24,24,96,24,192,120,48,216 +.byte 35,35,140,35,5,175,70,38 +.byte 35,35,140,35,5,175,70,38 +.byte 198,198,63,198,126,249,145,184 +.byte 198,198,63,198,126,249,145,184 +.byte 232,232,135,232,19,111,205,251 +.byte 232,232,135,232,19,111,205,251 +.byte 135,135,38,135,76,161,19,203 +.byte 135,135,38,135,76,161,19,203 +.byte 184,184,218,184,169,98,109,17 +.byte 184,184,218,184,169,98,109,17 +.byte 1,1,4,1,8,5,2,9 +.byte 1,1,4,1,8,5,2,9 +.byte 79,79,33,79,66,110,158,13 +.byte 79,79,33,79,66,110,158,13 +.byte 54,54,216,54,173,238,108,155 +.byte 54,54,216,54,173,238,108,155 +.byte 166,166,162,166,89,4,81,255 +.byte 166,166,162,166,89,4,81,255 +.byte 210,210,111,210,222,189,185,12 +.byte 210,210,111,210,222,189,185,12 +.byte 245,245,243,245,251,6,247,14 +.byte 245,245,243,245,251,6,247,14 +.byte 121,121,249,121,239,128,242,150 +.byte 121,121,249,121,239,128,242,150 +.byte 111,111,161,111,95,206,222,48 +.byte 111,111,161,111,95,206,222,48 +.byte 145,145,126,145,252,239,63,109 +.byte 145,145,126,145,252,239,63,109 +.byte 82,82,85,82,170,7,164,248 +.byte 82,82,85,82,170,7,164,248 +.byte 96,96,157,96,39,253,192,71 +.byte 96,96,157,96,39,253,192,71 +.byte 188,188,202,188,137,118,101,53 +.byte 188,188,202,188,137,118,101,53 +.byte 155,155,86,155,172,205,43,55 +.byte 155,155,86,155,172,205,43,55 +.byte 142,142,2,142,4,140,1,138 +.byte 142,142,2,142,4,140,1,138 +.byte 163,163,182,163,113,21,91,210 +.byte 163,163,182,163,113,21,91,210 +.byte 12,12,48,12,96,60,24,108 +.byte 12,12,48,12,96,60,24,108 +.byte 123,123,241,123,255,138,246,132 +.byte 123,123,241,123,255,138,246,132 +.byte 53,53,212,53,181,225,106,128 +.byte 53,53,212,53,181,225,106,128 +.byte 29,29,116,29,232,105,58,245 +.byte 29,29,116,29,232,105,58,245 +.byte 224,224,167,224,83,71,221,179 +.byte 224,224,167,224,83,71,221,179 +.byte 215,215,123,215,246,172,179,33 +.byte 215,215,123,215,246,172,179,33 +.byte 194,194,47,194,94,237,153,156 +.byte 194,194,47,194,94,237,153,156 +.byte 46,46,184,46,109,150,92,67 +.byte 46,46,184,46,109,150,92,67 +.byte 75,75,49,75,98,122,150,41 +.byte 75,75,49,75,98,122,150,41 +.byte 254,254,223,254,163,33,225,93 +.byte 254,254,223,254,163,33,225,93 +.byte 87,87,65,87,130,22,174,213 +.byte 87,87,65,87,130,22,174,213 +.byte 21,21,84,21,168,65,42,189 +.byte 21,21,84,21,168,65,42,189 +.byte 119,119,193,119,159,182,238,232 +.byte 119,119,193,119,159,182,238,232 +.byte 55,55,220,55,165,235,110,146 +.byte 55,55,220,55,165,235,110,146 +.byte 229,229,179,229,123,86,215,158 +.byte 229,229,179,229,123,86,215,158 +.byte 159,159,70,159,140,217,35,19 +.byte 159,159,70,159,140,217,35,19 +.byte 240,240,231,240,211,23,253,35 +.byte 240,240,231,240,211,23,253,35 +.byte 74,74,53,74,106,127,148,32 +.byte 74,74,53,74,106,127,148,32 +.byte 218,218,79,218,158,149,169,68 +.byte 218,218,79,218,158,149,169,68 +.byte 88,88,125,88,250,37,176,162 +.byte 88,88,125,88,250,37,176,162 +.byte 201,201,3,201,6,202,143,207 +.byte 201,201,3,201,6,202,143,207 +.byte 41,41,164,41,85,141,82,124 +.byte 41,41,164,41,85,141,82,124 +.byte 10,10,40,10,80,34,20,90 +.byte 10,10,40,10,80,34,20,90 +.byte 177,177,254,177,225,79,127,80 +.byte 177,177,254,177,225,79,127,80 +.byte 160,160,186,160,105,26,93,201 +.byte 160,160,186,160,105,26,93,201 +.byte 107,107,177,107,127,218,214,20 +.byte 107,107,177,107,127,218,214,20 +.byte 133,133,46,133,92,171,23,217 +.byte 133,133,46,133,92,171,23,217 +.byte 189,189,206,189,129,115,103,60 +.byte 189,189,206,189,129,115,103,60 +.byte 93,93,105,93,210,52,186,143 +.byte 93,93,105,93,210,52,186,143 +.byte 16,16,64,16,128,80,32,144 +.byte 16,16,64,16,128,80,32,144 +.byte 244,244,247,244,243,3,245,7 +.byte 244,244,247,244,243,3,245,7 +.byte 203,203,11,203,22,192,139,221 +.byte 203,203,11,203,22,192,139,221 +.byte 62,62,248,62,237,198,124,211 +.byte 62,62,248,62,237,198,124,211 +.byte 5,5,20,5,40,17,10,45 +.byte 5,5,20,5,40,17,10,45 +.byte 103,103,129,103,31,230,206,120 +.byte 103,103,129,103,31,230,206,120 +.byte 228,228,183,228,115,83,213,151 +.byte 228,228,183,228,115,83,213,151 +.byte 39,39,156,39,37,187,78,2 +.byte 39,39,156,39,37,187,78,2 +.byte 65,65,25,65,50,88,130,115 +.byte 65,65,25,65,50,88,130,115 +.byte 139,139,22,139,44,157,11,167 +.byte 139,139,22,139,44,157,11,167 +.byte 167,167,166,167,81,1,83,246 +.byte 167,167,166,167,81,1,83,246 +.byte 125,125,233,125,207,148,250,178 +.byte 125,125,233,125,207,148,250,178 +.byte 149,149,110,149,220,251,55,73 +.byte 149,149,110,149,220,251,55,73 +.byte 216,216,71,216,142,159,173,86 +.byte 216,216,71,216,142,159,173,86 +.byte 251,251,203,251,139,48,235,112 +.byte 251,251,203,251,139,48,235,112 +.byte 238,238,159,238,35,113,193,205 +.byte 238,238,159,238,35,113,193,205 +.byte 124,124,237,124,199,145,248,187 +.byte 124,124,237,124,199,145,248,187 +.byte 102,102,133,102,23,227,204,113 +.byte 102,102,133,102,23,227,204,113 +.byte 221,221,83,221,166,142,167,123 +.byte 221,221,83,221,166,142,167,123 +.byte 23,23,92,23,184,75,46,175 +.byte 23,23,92,23,184,75,46,175 +.byte 71,71,1,71,2,70,142,69 +.byte 71,71,1,71,2,70,142,69 +.byte 158,158,66,158,132,220,33,26 +.byte 158,158,66,158,132,220,33,26 +.byte 202,202,15,202,30,197,137,212 +.byte 202,202,15,202,30,197,137,212 +.byte 45,45,180,45,117,153,90,88 +.byte 45,45,180,45,117,153,90,88 +.byte 191,191,198,191,145,121,99,46 +.byte 191,191,198,191,145,121,99,46 +.byte 7,7,28,7,56,27,14,63 +.byte 7,7,28,7,56,27,14,63 +.byte 173,173,142,173,1,35,71,172 +.byte 173,173,142,173,1,35,71,172 +.byte 90,90,117,90,234,47,180,176 +.byte 90,90,117,90,234,47,180,176 +.byte 131,131,54,131,108,181,27,239 +.byte 131,131,54,131,108,181,27,239 +.byte 51,51,204,51,133,255,102,182 +.byte 51,51,204,51,133,255,102,182 +.byte 99,99,145,99,63,242,198,92 +.byte 99,99,145,99,63,242,198,92 +.byte 2,2,8,2,16,10,4,18 +.byte 2,2,8,2,16,10,4,18 +.byte 170,170,146,170,57,56,73,147 +.byte 170,170,146,170,57,56,73,147 +.byte 113,113,217,113,175,168,226,222 +.byte 113,113,217,113,175,168,226,222 +.byte 200,200,7,200,14,207,141,198 +.byte 200,200,7,200,14,207,141,198 +.byte 25,25,100,25,200,125,50,209 +.byte 25,25,100,25,200,125,50,209 +.byte 73,73,57,73,114,112,146,59 +.byte 73,73,57,73,114,112,146,59 +.byte 217,217,67,217,134,154,175,95 +.byte 217,217,67,217,134,154,175,95 +.byte 242,242,239,242,195,29,249,49 +.byte 242,242,239,242,195,29,249,49 +.byte 227,227,171,227,75,72,219,168 +.byte 227,227,171,227,75,72,219,168 +.byte 91,91,113,91,226,42,182,185 +.byte 91,91,113,91,226,42,182,185 +.byte 136,136,26,136,52,146,13,188 +.byte 136,136,26,136,52,146,13,188 +.byte 154,154,82,154,164,200,41,62 +.byte 154,154,82,154,164,200,41,62 +.byte 38,38,152,38,45,190,76,11 +.byte 38,38,152,38,45,190,76,11 +.byte 50,50,200,50,141,250,100,191 +.byte 50,50,200,50,141,250,100,191 +.byte 176,176,250,176,233,74,125,89 +.byte 176,176,250,176,233,74,125,89 +.byte 233,233,131,233,27,106,207,242 +.byte 233,233,131,233,27,106,207,242 +.byte 15,15,60,15,120,51,30,119 +.byte 15,15,60,15,120,51,30,119 +.byte 213,213,115,213,230,166,183,51 +.byte 213,213,115,213,230,166,183,51 +.byte 128,128,58,128,116,186,29,244 +.byte 128,128,58,128,116,186,29,244 +.byte 190,190,194,190,153,124,97,39 +.byte 190,190,194,190,153,124,97,39 +.byte 205,205,19,205,38,222,135,235 +.byte 205,205,19,205,38,222,135,235 +.byte 52,52,208,52,189,228,104,137 +.byte 52,52,208,52,189,228,104,137 +.byte 72,72,61,72,122,117,144,50 +.byte 72,72,61,72,122,117,144,50 +.byte 255,255,219,255,171,36,227,84 +.byte 255,255,219,255,171,36,227,84 +.byte 122,122,245,122,247,143,244,141 +.byte 122,122,245,122,247,143,244,141 +.byte 144,144,122,144,244,234,61,100 +.byte 144,144,122,144,244,234,61,100 +.byte 95,95,97,95,194,62,190,157 +.byte 95,95,97,95,194,62,190,157 +.byte 32,32,128,32,29,160,64,61 +.byte 32,32,128,32,29,160,64,61 +.byte 104,104,189,104,103,213,208,15 +.byte 104,104,189,104,103,213,208,15 +.byte 26,26,104,26,208,114,52,202 +.byte 26,26,104,26,208,114,52,202 +.byte 174,174,130,174,25,44,65,183 +.byte 174,174,130,174,25,44,65,183 +.byte 180,180,234,180,201,94,117,125 +.byte 180,180,234,180,201,94,117,125 +.byte 84,84,77,84,154,25,168,206 +.byte 84,84,77,84,154,25,168,206 +.byte 147,147,118,147,236,229,59,127 +.byte 147,147,118,147,236,229,59,127 +.byte 34,34,136,34,13,170,68,47 +.byte 34,34,136,34,13,170,68,47 +.byte 100,100,141,100,7,233,200,99 +.byte 100,100,141,100,7,233,200,99 +.byte 241,241,227,241,219,18,255,42 +.byte 241,241,227,241,219,18,255,42 +.byte 115,115,209,115,191,162,230,204 +.byte 115,115,209,115,191,162,230,204 +.byte 18,18,72,18,144,90,36,130 +.byte 18,18,72,18,144,90,36,130 +.byte 64,64,29,64,58,93,128,122 +.byte 64,64,29,64,58,93,128,122 +.byte 8,8,32,8,64,40,16,72 +.byte 8,8,32,8,64,40,16,72 +.byte 195,195,43,195,86,232,155,149 +.byte 195,195,43,195,86,232,155,149 +.byte 236,236,151,236,51,123,197,223 +.byte 236,236,151,236,51,123,197,223 +.byte 219,219,75,219,150,144,171,77 +.byte 219,219,75,219,150,144,171,77 +.byte 161,161,190,161,97,31,95,192 +.byte 161,161,190,161,97,31,95,192 +.byte 141,141,14,141,28,131,7,145 +.byte 141,141,14,141,28,131,7,145 +.byte 61,61,244,61,245,201,122,200 +.byte 61,61,244,61,245,201,122,200 +.byte 151,151,102,151,204,241,51,91 +.byte 151,151,102,151,204,241,51,91 +.byte 0,0,0,0,0,0,0,0 +.byte 0,0,0,0,0,0,0,0 +.byte 207,207,27,207,54,212,131,249 +.byte 207,207,27,207,54,212,131,249 +.byte 43,43,172,43,69,135,86,110 +.byte 43,43,172,43,69,135,86,110 +.byte 118,118,197,118,151,179,236,225 +.byte 118,118,197,118,151,179,236,225 +.byte 130,130,50,130,100,176,25,230 +.byte 130,130,50,130,100,176,25,230 +.byte 214,214,127,214,254,169,177,40 +.byte 214,214,127,214,254,169,177,40 +.byte 27,27,108,27,216,119,54,195 +.byte 27,27,108,27,216,119,54,195 +.byte 181,181,238,181,193,91,119,116 +.byte 181,181,238,181,193,91,119,116 +.byte 175,175,134,175,17,41,67,190 +.byte 175,175,134,175,17,41,67,190 +.byte 106,106,181,106,119,223,212,29 +.byte 106,106,181,106,119,223,212,29 +.byte 80,80,93,80,186,13,160,234 +.byte 80,80,93,80,186,13,160,234 +.byte 69,69,9,69,18,76,138,87 +.byte 69,69,9,69,18,76,138,87 +.byte 243,243,235,243,203,24,251,56 +.byte 243,243,235,243,203,24,251,56 +.byte 48,48,192,48,157,240,96,173 +.byte 48,48,192,48,157,240,96,173 +.byte 239,239,155,239,43,116,195,196 +.byte 239,239,155,239,43,116,195,196 +.byte 63,63,252,63,229,195,126,218 +.byte 63,63,252,63,229,195,126,218 +.byte 85,85,73,85,146,28,170,199 +.byte 85,85,73,85,146,28,170,199 +.byte 162,162,178,162,121,16,89,219 +.byte 162,162,178,162,121,16,89,219 +.byte 234,234,143,234,3,101,201,233 +.byte 234,234,143,234,3,101,201,233 +.byte 101,101,137,101,15,236,202,106 +.byte 101,101,137,101,15,236,202,106 +.byte 186,186,210,186,185,104,105,3 +.byte 186,186,210,186,185,104,105,3 +.byte 47,47,188,47,101,147,94,74 +.byte 47,47,188,47,101,147,94,74 +.byte 192,192,39,192,78,231,157,142 +.byte 192,192,39,192,78,231,157,142 +.byte 222,222,95,222,190,129,161,96 +.byte 222,222,95,222,190,129,161,96 +.byte 28,28,112,28,224,108,56,252 +.byte 28,28,112,28,224,108,56,252 +.byte 253,253,211,253,187,46,231,70 +.byte 253,253,211,253,187,46,231,70 +.byte 77,77,41,77,82,100,154,31 +.byte 77,77,41,77,82,100,154,31 +.byte 146,146,114,146,228,224,57,118 +.byte 146,146,114,146,228,224,57,118 +.byte 117,117,201,117,143,188,234,250 +.byte 117,117,201,117,143,188,234,250 +.byte 6,6,24,6,48,30,12,54 +.byte 6,6,24,6,48,30,12,54 +.byte 138,138,18,138,36,152,9,174 +.byte 138,138,18,138,36,152,9,174 +.byte 178,178,242,178,249,64,121,75 +.byte 178,178,242,178,249,64,121,75 +.byte 230,230,191,230,99,89,209,133 +.byte 230,230,191,230,99,89,209,133 +.byte 14,14,56,14,112,54,28,126 +.byte 14,14,56,14,112,54,28,126 +.byte 31,31,124,31,248,99,62,231 +.byte 31,31,124,31,248,99,62,231 +.byte 98,98,149,98,55,247,196,85 +.byte 98,98,149,98,55,247,196,85 +.byte 212,212,119,212,238,163,181,58 +.byte 212,212,119,212,238,163,181,58 +.byte 168,168,154,168,41,50,77,129 +.byte 168,168,154,168,41,50,77,129 +.byte 150,150,98,150,196,244,49,82 +.byte 150,150,98,150,196,244,49,82 +.byte 249,249,195,249,155,58,239,98 +.byte 249,249,195,249,155,58,239,98 +.byte 197,197,51,197,102,246,151,163 +.byte 197,197,51,197,102,246,151,163 +.byte 37,37,148,37,53,177,74,16 +.byte 37,37,148,37,53,177,74,16 +.byte 89,89,121,89,242,32,178,171 +.byte 89,89,121,89,242,32,178,171 +.byte 132,132,42,132,84,174,21,208 +.byte 132,132,42,132,84,174,21,208 +.byte 114,114,213,114,183,167,228,197 +.byte 114,114,213,114,183,167,228,197 +.byte 57,57,228,57,213,221,114,236 +.byte 57,57,228,57,213,221,114,236 +.byte 76,76,45,76,90,97,152,22 +.byte 76,76,45,76,90,97,152,22 +.byte 94,94,101,94,202,59,188,148 +.byte 94,94,101,94,202,59,188,148 +.byte 120,120,253,120,231,133,240,159 +.byte 120,120,253,120,231,133,240,159 +.byte 56,56,224,56,221,216,112,229 +.byte 56,56,224,56,221,216,112,229 +.byte 140,140,10,140,20,134,5,152 +.byte 140,140,10,140,20,134,5,152 +.byte 209,209,99,209,198,178,191,23 +.byte 209,209,99,209,198,178,191,23 +.byte 165,165,174,165,65,11,87,228 +.byte 165,165,174,165,65,11,87,228 +.byte 226,226,175,226,67,77,217,161 +.byte 226,226,175,226,67,77,217,161 +.byte 97,97,153,97,47,248,194,78 +.byte 97,97,153,97,47,248,194,78 +.byte 179,179,246,179,241,69,123,66 +.byte 179,179,246,179,241,69,123,66 +.byte 33,33,132,33,21,165,66,52 +.byte 33,33,132,33,21,165,66,52 +.byte 156,156,74,156,148,214,37,8 +.byte 156,156,74,156,148,214,37,8 +.byte 30,30,120,30,240,102,60,238 +.byte 30,30,120,30,240,102,60,238 +.byte 67,67,17,67,34,82,134,97 +.byte 67,67,17,67,34,82,134,97 +.byte 199,199,59,199,118,252,147,177 +.byte 199,199,59,199,118,252,147,177 +.byte 252,252,215,252,179,43,229,79 +.byte 252,252,215,252,179,43,229,79 +.byte 4,4,16,4,32,20,8,36 +.byte 4,4,16,4,32,20,8,36 +.byte 81,81,89,81,178,8,162,227 +.byte 81,81,89,81,178,8,162,227 +.byte 153,153,94,153,188,199,47,37 +.byte 153,153,94,153,188,199,47,37 +.byte 109,109,169,109,79,196,218,34 +.byte 109,109,169,109,79,196,218,34 +.byte 13,13,52,13,104,57,26,101 +.byte 13,13,52,13,104,57,26,101 +.byte 250,250,207,250,131,53,233,121 +.byte 250,250,207,250,131,53,233,121 +.byte 223,223,91,223,182,132,163,105 +.byte 223,223,91,223,182,132,163,105 +.byte 126,126,229,126,215,155,252,169 +.byte 126,126,229,126,215,155,252,169 +.byte 36,36,144,36,61,180,72,25 +.byte 36,36,144,36,61,180,72,25 +.byte 59,59,236,59,197,215,118,254 +.byte 59,59,236,59,197,215,118,254 +.byte 171,171,150,171,49,61,75,154 +.byte 171,171,150,171,49,61,75,154 +.byte 206,206,31,206,62,209,129,240 +.byte 206,206,31,206,62,209,129,240 +.byte 17,17,68,17,136,85,34,153 +.byte 17,17,68,17,136,85,34,153 +.byte 143,143,6,143,12,137,3,131 +.byte 143,143,6,143,12,137,3,131 +.byte 78,78,37,78,74,107,156,4 +.byte 78,78,37,78,74,107,156,4 +.byte 183,183,230,183,209,81,115,102 +.byte 183,183,230,183,209,81,115,102 +.byte 235,235,139,235,11,96,203,224 +.byte 235,235,139,235,11,96,203,224 +.byte 60,60,240,60,253,204,120,193 +.byte 60,60,240,60,253,204,120,193 +.byte 129,129,62,129,124,191,31,253 +.byte 129,129,62,129,124,191,31,253 +.byte 148,148,106,148,212,254,53,64 +.byte 148,148,106,148,212,254,53,64 +.byte 247,247,251,247,235,12,243,28 +.byte 247,247,251,247,235,12,243,28 +.byte 185,185,222,185,161,103,111,24 +.byte 185,185,222,185,161,103,111,24 +.byte 19,19,76,19,152,95,38,139 +.byte 19,19,76,19,152,95,38,139 +.byte 44,44,176,44,125,156,88,81 +.byte 44,44,176,44,125,156,88,81 +.byte 211,211,107,211,214,184,187,5 +.byte 211,211,107,211,214,184,187,5 +.byte 231,231,187,231,107,92,211,140 +.byte 231,231,187,231,107,92,211,140 +.byte 110,110,165,110,87,203,220,57 +.byte 110,110,165,110,87,203,220,57 +.byte 196,196,55,196,110,243,149,170 +.byte 196,196,55,196,110,243,149,170 +.byte 3,3,12,3,24,15,6,27 +.byte 3,3,12,3,24,15,6,27 +.byte 86,86,69,86,138,19,172,220 +.byte 86,86,69,86,138,19,172,220 +.byte 68,68,13,68,26,73,136,94 +.byte 68,68,13,68,26,73,136,94 +.byte 127,127,225,127,223,158,254,160 +.byte 127,127,225,127,223,158,254,160 +.byte 169,169,158,169,33,55,79,136 +.byte 169,169,158,169,33,55,79,136 +.byte 42,42,168,42,77,130,84,103 +.byte 42,42,168,42,77,130,84,103 +.byte 187,187,214,187,177,109,107,10 +.byte 187,187,214,187,177,109,107,10 +.byte 193,193,35,193,70,226,159,135 +.byte 193,193,35,193,70,226,159,135 +.byte 83,83,81,83,162,2,166,241 +.byte 83,83,81,83,162,2,166,241 +.byte 220,220,87,220,174,139,165,114 +.byte 220,220,87,220,174,139,165,114 +.byte 11,11,44,11,88,39,22,83 +.byte 11,11,44,11,88,39,22,83 +.byte 157,157,78,157,156,211,39,1 +.byte 157,157,78,157,156,211,39,1 +.byte 108,108,173,108,71,193,216,43 +.byte 108,108,173,108,71,193,216,43 +.byte 49,49,196,49,149,245,98,164 +.byte 49,49,196,49,149,245,98,164 +.byte 116,116,205,116,135,185,232,243 +.byte 116,116,205,116,135,185,232,243 +.byte 246,246,255,246,227,9,241,21 +.byte 246,246,255,246,227,9,241,21 +.byte 70,70,5,70,10,67,140,76 +.byte 70,70,5,70,10,67,140,76 +.byte 172,172,138,172,9,38,69,165 +.byte 172,172,138,172,9,38,69,165 +.byte 137,137,30,137,60,151,15,181 +.byte 137,137,30,137,60,151,15,181 +.byte 20,20,80,20,160,68,40,180 +.byte 20,20,80,20,160,68,40,180 +.byte 225,225,163,225,91,66,223,186 +.byte 225,225,163,225,91,66,223,186 +.byte 22,22,88,22,176,78,44,166 +.byte 22,22,88,22,176,78,44,166 +.byte 58,58,232,58,205,210,116,247 +.byte 58,58,232,58,205,210,116,247 +.byte 105,105,185,105,111,208,210,6 +.byte 105,105,185,105,111,208,210,6 +.byte 9,9,36,9,72,45,18,65 +.byte 9,9,36,9,72,45,18,65 +.byte 112,112,221,112,167,173,224,215 +.byte 112,112,221,112,167,173,224,215 +.byte 182,182,226,182,217,84,113,111 +.byte 182,182,226,182,217,84,113,111 +.byte 208,208,103,208,206,183,189,30 +.byte 208,208,103,208,206,183,189,30 +.byte 237,237,147,237,59,126,199,214 +.byte 237,237,147,237,59,126,199,214 +.byte 204,204,23,204,46,219,133,226 +.byte 204,204,23,204,46,219,133,226 +.byte 66,66,21,66,42,87,132,104 +.byte 66,66,21,66,42,87,132,104 +.byte 152,152,90,152,180,194,45,44 +.byte 152,152,90,152,180,194,45,44 +.byte 164,164,170,164,73,14,85,237 +.byte 164,164,170,164,73,14,85,237 +.byte 40,40,160,40,93,136,80,117 +.byte 40,40,160,40,93,136,80,117 +.byte 92,92,109,92,218,49,184,134 +.byte 92,92,109,92,218,49,184,134 +.byte 248,248,199,248,147,63,237,107 +.byte 248,248,199,248,147,63,237,107 +.byte 134,134,34,134,68,164,17,194 +.byte 134,134,34,134,68,164,17,194 +.byte 24,35,198,232,135,184,1,79 +.byte 54,166,210,245,121,111,145,82 +.byte 96,188,155,142,163,12,123,53 +.byte 29,224,215,194,46,75,254,87 +.byte 21,119,55,229,159,240,74,218 +.byte 88,201,41,10,177,160,107,133 +.byte 189,93,16,244,203,62,5,103 +.byte 228,39,65,139,167,125,149,216 +.byte 251,238,124,102,221,23,71,158 +.byte 202,45,191,7,173,90,131,51 +.size whirlpool_block_mmx,.-.L_whirlpool_block_mmx_begin diff --git a/deps/openssl/asm_obsolete/x86-elf-gas/x86cpuid.s b/deps/openssl/asm_obsolete/x86-elf-gas/x86cpuid.s new file mode 100644 index 00000000000000..80a4d1a5183adf --- /dev/null +++ b/deps/openssl/asm_obsolete/x86-elf-gas/x86cpuid.s @@ -0,0 +1,363 @@ +.file "x86cpuid.s" +.text +.globl OPENSSL_ia32_cpuid +.type OPENSSL_ia32_cpuid,@function +.align 16 +OPENSSL_ia32_cpuid: +.L_OPENSSL_ia32_cpuid_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + xorl %edx,%edx + pushfl + popl %eax + movl %eax,%ecx + xorl $2097152,%eax + pushl %eax + popfl + pushfl + popl %eax + xorl %eax,%ecx + xorl %eax,%eax + btl $21,%ecx + jnc .L000nocpuid + movl 20(%esp),%esi + movl %eax,8(%esi) + .byte 0x0f,0xa2 + movl %eax,%edi + xorl %eax,%eax + cmpl $1970169159,%ebx + setne %al + movl %eax,%ebp + cmpl $1231384169,%edx + setne %al + orl %eax,%ebp + cmpl $1818588270,%ecx + setne %al + orl %eax,%ebp + jz .L001intel + cmpl $1752462657,%ebx + setne %al + movl %eax,%esi + cmpl $1769238117,%edx + setne %al + orl %eax,%esi + cmpl $1145913699,%ecx + setne %al + orl %eax,%esi + jnz .L001intel + movl $2147483648,%eax + .byte 0x0f,0xa2 + cmpl $2147483649,%eax + jb .L001intel + movl %eax,%esi + movl $2147483649,%eax + .byte 0x0f,0xa2 + orl %ecx,%ebp + andl $2049,%ebp + cmpl $2147483656,%esi + jb .L001intel + movl $2147483656,%eax + .byte 0x0f,0xa2 + movzbl %cl,%esi + incl %esi + movl $1,%eax + xorl %ecx,%ecx + .byte 0x0f,0xa2 + btl $28,%edx + jnc .L002generic + shrl $16,%ebx + andl $255,%ebx + cmpl %esi,%ebx + ja .L002generic + andl $4026531839,%edx + jmp .L002generic +.L001intel: + cmpl $7,%edi + jb .L003cacheinfo + movl 20(%esp),%esi + movl $7,%eax + xorl %ecx,%ecx + .byte 0x0f,0xa2 + movl %ebx,8(%esi) +.L003cacheinfo: + cmpl $4,%edi + movl $-1,%edi + jb .L004nocacheinfo + movl $4,%eax + movl $0,%ecx + .byte 0x0f,0xa2 + movl %eax,%edi + shrl $14,%edi + andl $4095,%edi +.L004nocacheinfo: + movl $1,%eax + xorl %ecx,%ecx + .byte 0x0f,0xa2 + andl $3220176895,%edx + cmpl $0,%ebp + jne .L005notintel + orl $1073741824,%edx + andb $15,%ah + cmpb $15,%ah + jne .L005notintel + orl $1048576,%edx +.L005notintel: + btl $28,%edx + jnc .L002generic + andl $4026531839,%edx + cmpl $0,%edi + je .L002generic + orl $268435456,%edx + shrl $16,%ebx + cmpb $1,%bl + ja .L002generic + andl $4026531839,%edx +.L002generic: + andl $2048,%ebp + andl $4294965247,%ecx + movl %edx,%esi + orl %ecx,%ebp + btl $27,%ecx + jnc .L006clear_avx + xorl %ecx,%ecx +.byte 15,1,208 + andl $6,%eax + cmpl $6,%eax + je .L007done + cmpl $2,%eax + je .L006clear_avx +.L008clear_xmm: + andl $4261412861,%ebp + andl $4278190079,%esi +.L006clear_avx: + andl $4026525695,%ebp + movl 20(%esp),%edi + andl $4294967263,8(%edi) +.L007done: + movl %esi,%eax + movl %ebp,%edx +.L000nocpuid: + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.size OPENSSL_ia32_cpuid,.-.L_OPENSSL_ia32_cpuid_begin +.globl OPENSSL_rdtsc +.type OPENSSL_rdtsc,@function +.align 16 +OPENSSL_rdtsc: +.L_OPENSSL_rdtsc_begin: + xorl %eax,%eax + xorl %edx,%edx + leal OPENSSL_ia32cap_P,%ecx + btl $4,(%ecx) + jnc .L009notsc + .byte 0x0f,0x31 +.L009notsc: + ret +.size OPENSSL_rdtsc,.-.L_OPENSSL_rdtsc_begin +.globl OPENSSL_instrument_halt +.type OPENSSL_instrument_halt,@function +.align 16 +OPENSSL_instrument_halt: +.L_OPENSSL_instrument_halt_begin: + leal OPENSSL_ia32cap_P,%ecx + btl $4,(%ecx) + jnc .L010nohalt +.long 2421723150 + andl $3,%eax + jnz .L010nohalt + pushfl + popl %eax + btl $9,%eax + jnc .L010nohalt + .byte 0x0f,0x31 + pushl %edx + pushl %eax + hlt + .byte 0x0f,0x31 + subl (%esp),%eax + sbbl 4(%esp),%edx + addl $8,%esp + ret +.L010nohalt: + xorl %eax,%eax + xorl %edx,%edx + ret +.size OPENSSL_instrument_halt,.-.L_OPENSSL_instrument_halt_begin +.globl OPENSSL_far_spin +.type OPENSSL_far_spin,@function +.align 16 +OPENSSL_far_spin: +.L_OPENSSL_far_spin_begin: + pushfl + popl %eax + btl $9,%eax + jnc .L011nospin + movl 4(%esp),%eax + movl 8(%esp),%ecx +.long 2430111262 + xorl %eax,%eax + movl (%ecx),%edx + jmp .L012spin +.align 16 +.L012spin: + incl %eax + cmpl (%ecx),%edx + je .L012spin +.long 529567888 + ret +.L011nospin: + xorl %eax,%eax + xorl %edx,%edx + ret +.size OPENSSL_far_spin,.-.L_OPENSSL_far_spin_begin +.globl OPENSSL_wipe_cpu +.type OPENSSL_wipe_cpu,@function +.align 16 +OPENSSL_wipe_cpu: +.L_OPENSSL_wipe_cpu_begin: + xorl %eax,%eax + xorl %edx,%edx + leal OPENSSL_ia32cap_P,%ecx + movl (%ecx),%ecx + btl $1,(%ecx) + jnc .L013no_x87 + andl $83886080,%ecx + cmpl $83886080,%ecx + jne .L014no_sse2 + pxor %xmm0,%xmm0 + pxor %xmm1,%xmm1 + pxor %xmm2,%xmm2 + pxor %xmm3,%xmm3 + pxor %xmm4,%xmm4 + pxor %xmm5,%xmm5 + pxor %xmm6,%xmm6 + pxor %xmm7,%xmm7 +.L014no_sse2: +.long 4007259865,4007259865,4007259865,4007259865,2430851995 +.L013no_x87: + leal 4(%esp),%eax + ret +.size OPENSSL_wipe_cpu,.-.L_OPENSSL_wipe_cpu_begin +.globl OPENSSL_atomic_add +.type OPENSSL_atomic_add,@function +.align 16 +OPENSSL_atomic_add: +.L_OPENSSL_atomic_add_begin: + movl 4(%esp),%edx + movl 8(%esp),%ecx + pushl %ebx + nop + movl (%edx),%eax +.L015spin: + leal (%eax,%ecx,1),%ebx + nop +.long 447811568 + jne .L015spin + movl %ebx,%eax + popl %ebx + ret +.size OPENSSL_atomic_add,.-.L_OPENSSL_atomic_add_begin +.globl OPENSSL_indirect_call +.type OPENSSL_indirect_call,@function +.align 16 +OPENSSL_indirect_call: +.L_OPENSSL_indirect_call_begin: + pushl %ebp + movl %esp,%ebp + subl $28,%esp + movl 12(%ebp),%ecx + movl %ecx,(%esp) + movl 16(%ebp),%edx + movl %edx,4(%esp) + movl 20(%ebp),%eax + movl %eax,8(%esp) + movl 24(%ebp),%eax + movl %eax,12(%esp) + movl 28(%ebp),%eax + movl %eax,16(%esp) + movl 32(%ebp),%eax + movl %eax,20(%esp) + movl 36(%ebp),%eax + movl %eax,24(%esp) + call *8(%ebp) + movl %ebp,%esp + popl %ebp + ret +.size OPENSSL_indirect_call,.-.L_OPENSSL_indirect_call_begin +.globl OPENSSL_cleanse +.type OPENSSL_cleanse,@function +.align 16 +OPENSSL_cleanse: +.L_OPENSSL_cleanse_begin: + movl 4(%esp),%edx + movl 8(%esp),%ecx + xorl %eax,%eax + cmpl $7,%ecx + jae .L016lot + cmpl $0,%ecx + je .L017ret +.L018little: + movb %al,(%edx) + subl $1,%ecx + leal 1(%edx),%edx + jnz .L018little +.L017ret: + ret +.align 16 +.L016lot: + testl $3,%edx + jz .L019aligned + movb %al,(%edx) + leal -1(%ecx),%ecx + leal 1(%edx),%edx + jmp .L016lot +.L019aligned: + movl %eax,(%edx) + leal -4(%ecx),%ecx + testl $-4,%ecx + leal 4(%edx),%edx + jnz .L019aligned + cmpl $0,%ecx + jne .L018little + ret +.size OPENSSL_cleanse,.-.L_OPENSSL_cleanse_begin +.globl OPENSSL_ia32_rdrand +.type OPENSSL_ia32_rdrand,@function +.align 16 +OPENSSL_ia32_rdrand: +.L_OPENSSL_ia32_rdrand_begin: + movl $8,%ecx +.L020loop: +.byte 15,199,240 + jc .L021break + loop .L020loop +.L021break: + cmpl $0,%eax + cmovel %ecx,%eax + ret +.size OPENSSL_ia32_rdrand,.-.L_OPENSSL_ia32_rdrand_begin +.globl OPENSSL_ia32_rdseed +.type OPENSSL_ia32_rdseed,@function +.align 16 +OPENSSL_ia32_rdseed: +.L_OPENSSL_ia32_rdseed_begin: + movl $8,%ecx +.L022loop: +.byte 15,199,248 + jc .L023break + loop .L022loop +.L023break: + cmpl $0,%eax + cmovel %ecx,%eax + ret +.size OPENSSL_ia32_rdseed,.-.L_OPENSSL_ia32_rdseed_begin +.hidden OPENSSL_cpuid_setup +.hidden OPENSSL_ia32cap_P +.comm OPENSSL_ia32cap_P,16,4 +.section .init + call OPENSSL_cpuid_setup diff --git a/deps/openssl/asm_obsolete/x86-macosx-gas/aes/aes-586.s b/deps/openssl/asm_obsolete/x86-macosx-gas/aes/aes-586.s new file mode 100644 index 00000000000000..12e9100222d438 --- /dev/null +++ b/deps/openssl/asm_obsolete/x86-macosx-gas/aes/aes-586.s @@ -0,0 +1,3208 @@ +.file "aes-586.s" +.text +.align 4 +__x86_AES_encrypt_compact: + movl %edi,20(%esp) + xorl (%edi),%eax + xorl 4(%edi),%ebx + xorl 8(%edi),%ecx + xorl 12(%edi),%edx + movl 240(%edi),%esi + leal -2(%esi,%esi,1),%esi + leal (%edi,%esi,8),%esi + movl %esi,24(%esp) + movl -128(%ebp),%edi + movl -96(%ebp),%esi + movl -64(%ebp),%edi + movl -32(%ebp),%esi + movl (%ebp),%edi + movl 32(%ebp),%esi + movl 64(%ebp),%edi + movl 96(%ebp),%esi +.align 4,0x90 +L000loop: + movl %eax,%esi + andl $255,%esi + movzbl -128(%ebp,%esi,1),%esi + movzbl %bh,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $8,%edi + xorl %edi,%esi + movl %ecx,%edi + shrl $16,%edi + andl $255,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $16,%edi + xorl %edi,%esi + movl %edx,%edi + shrl $24,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $24,%edi + xorl %edi,%esi + movl %esi,4(%esp) + movl %ebx,%esi + andl $255,%esi + shrl $16,%ebx + movzbl -128(%ebp,%esi,1),%esi + movzbl %ch,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $8,%edi + xorl %edi,%esi + movl %edx,%edi + shrl $16,%edi + andl $255,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $16,%edi + xorl %edi,%esi + movl %eax,%edi + shrl $24,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $24,%edi + xorl %edi,%esi + movl %esi,8(%esp) + movl %ecx,%esi + andl $255,%esi + shrl $24,%ecx + movzbl -128(%ebp,%esi,1),%esi + movzbl %dh,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $8,%edi + xorl %edi,%esi + movl %eax,%edi + shrl $16,%edi + andl $255,%edx + andl $255,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $16,%edi + xorl %edi,%esi + movzbl %bh,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $24,%edi + xorl %edi,%esi + andl $255,%edx + movzbl -128(%ebp,%edx,1),%edx + movzbl %ah,%eax + movzbl -128(%ebp,%eax,1),%eax + shll $8,%eax + xorl %eax,%edx + movl 4(%esp),%eax + andl $255,%ebx + movzbl -128(%ebp,%ebx,1),%ebx + shll $16,%ebx + xorl %ebx,%edx + movl 8(%esp),%ebx + movzbl -128(%ebp,%ecx,1),%ecx + shll $24,%ecx + xorl %ecx,%edx + movl %esi,%ecx + movl $2155905152,%ebp + andl %ecx,%ebp + leal (%ecx,%ecx,1),%edi + movl %ebp,%esi + shrl $7,%ebp + andl $4278124286,%edi + subl %ebp,%esi + movl %ecx,%ebp + andl $454761243,%esi + rorl $16,%ebp + xorl %edi,%esi + movl %ecx,%edi + xorl %esi,%ecx + rorl $24,%edi + xorl %ebp,%esi + roll $24,%ecx + xorl %edi,%esi + movl $2155905152,%ebp + xorl %esi,%ecx + andl %edx,%ebp + leal (%edx,%edx,1),%edi + movl %ebp,%esi + shrl $7,%ebp + andl $4278124286,%edi + subl %ebp,%esi + movl %edx,%ebp + andl $454761243,%esi + rorl $16,%ebp + xorl %edi,%esi + movl %edx,%edi + xorl %esi,%edx + rorl $24,%edi + xorl %ebp,%esi + roll $24,%edx + xorl %edi,%esi + movl $2155905152,%ebp + xorl %esi,%edx + andl %eax,%ebp + leal (%eax,%eax,1),%edi + movl %ebp,%esi + shrl $7,%ebp + andl $4278124286,%edi + subl %ebp,%esi + movl %eax,%ebp + andl $454761243,%esi + rorl $16,%ebp + xorl %edi,%esi + movl %eax,%edi + xorl %esi,%eax + rorl $24,%edi + xorl %ebp,%esi + roll $24,%eax + xorl %edi,%esi + movl $2155905152,%ebp + xorl %esi,%eax + andl %ebx,%ebp + leal (%ebx,%ebx,1),%edi + movl %ebp,%esi + shrl $7,%ebp + andl $4278124286,%edi + subl %ebp,%esi + movl %ebx,%ebp + andl $454761243,%esi + rorl $16,%ebp + xorl %edi,%esi + movl %ebx,%edi + xorl %esi,%ebx + rorl $24,%edi + xorl %ebp,%esi + roll $24,%ebx + xorl %edi,%esi + xorl %esi,%ebx + movl 20(%esp),%edi + movl 28(%esp),%ebp + addl $16,%edi + xorl (%edi),%eax + xorl 4(%edi),%ebx + xorl 8(%edi),%ecx + xorl 12(%edi),%edx + cmpl 24(%esp),%edi + movl %edi,20(%esp) + jb L000loop + movl %eax,%esi + andl $255,%esi + movzbl -128(%ebp,%esi,1),%esi + movzbl %bh,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $8,%edi + xorl %edi,%esi + movl %ecx,%edi + shrl $16,%edi + andl $255,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $16,%edi + xorl %edi,%esi + movl %edx,%edi + shrl $24,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $24,%edi + xorl %edi,%esi + movl %esi,4(%esp) + movl %ebx,%esi + andl $255,%esi + shrl $16,%ebx + movzbl -128(%ebp,%esi,1),%esi + movzbl %ch,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $8,%edi + xorl %edi,%esi + movl %edx,%edi + shrl $16,%edi + andl $255,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $16,%edi + xorl %edi,%esi + movl %eax,%edi + shrl $24,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $24,%edi + xorl %edi,%esi + movl %esi,8(%esp) + movl %ecx,%esi + andl $255,%esi + shrl $24,%ecx + movzbl -128(%ebp,%esi,1),%esi + movzbl %dh,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $8,%edi + xorl %edi,%esi + movl %eax,%edi + shrl $16,%edi + andl $255,%edx + andl $255,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $16,%edi + xorl %edi,%esi + movzbl %bh,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $24,%edi + xorl %edi,%esi + movl 20(%esp),%edi + andl $255,%edx + movzbl -128(%ebp,%edx,1),%edx + movzbl %ah,%eax + movzbl -128(%ebp,%eax,1),%eax + shll $8,%eax + xorl %eax,%edx + movl 4(%esp),%eax + andl $255,%ebx + movzbl -128(%ebp,%ebx,1),%ebx + shll $16,%ebx + xorl %ebx,%edx + movl 8(%esp),%ebx + movzbl -128(%ebp,%ecx,1),%ecx + shll $24,%ecx + xorl %ecx,%edx + movl %esi,%ecx + xorl 16(%edi),%eax + xorl 20(%edi),%ebx + xorl 24(%edi),%ecx + xorl 28(%edi),%edx + ret +.align 4 +__sse_AES_encrypt_compact: + pxor (%edi),%mm0 + pxor 8(%edi),%mm4 + movl 240(%edi),%esi + leal -2(%esi,%esi,1),%esi + leal (%edi,%esi,8),%esi + movl %esi,24(%esp) + movl $454761243,%eax + movl %eax,8(%esp) + movl %eax,12(%esp) + movl -128(%ebp),%eax + movl -96(%ebp),%ebx + movl -64(%ebp),%ecx + movl -32(%ebp),%edx + movl (%ebp),%eax + movl 32(%ebp),%ebx + movl 64(%ebp),%ecx + movl 96(%ebp),%edx +.align 4,0x90 +L001loop: + pshufw $8,%mm0,%mm1 + pshufw $13,%mm4,%mm5 + movd %mm1,%eax + movd %mm5,%ebx + movl %edi,20(%esp) + movzbl %al,%esi + movzbl %ah,%edx + pshufw $13,%mm0,%mm2 + movzbl -128(%ebp,%esi,1),%ecx + movzbl %bl,%edi + movzbl -128(%ebp,%edx,1),%edx + shrl $16,%eax + shll $8,%edx + movzbl -128(%ebp,%edi,1),%esi + movzbl %bh,%edi + shll $16,%esi + pshufw $8,%mm4,%mm6 + orl %esi,%ecx + movzbl -128(%ebp,%edi,1),%esi + movzbl %ah,%edi + shll $24,%esi + shrl $16,%ebx + orl %esi,%edx + movzbl -128(%ebp,%edi,1),%esi + movzbl %bh,%edi + shll $8,%esi + orl %esi,%ecx + movzbl -128(%ebp,%edi,1),%esi + movzbl %al,%edi + shll $24,%esi + orl %esi,%ecx + movzbl -128(%ebp,%edi,1),%esi + movzbl %bl,%edi + movd %mm2,%eax + movd %ecx,%mm0 + movzbl -128(%ebp,%edi,1),%ecx + movzbl %ah,%edi + shll $16,%ecx + movd %mm6,%ebx + orl %esi,%ecx + movzbl -128(%ebp,%edi,1),%esi + movzbl %bh,%edi + shll $24,%esi + orl %esi,%ecx + movzbl -128(%ebp,%edi,1),%esi + movzbl %bl,%edi + shll $8,%esi + shrl $16,%ebx + orl %esi,%ecx + movzbl -128(%ebp,%edi,1),%esi + movzbl %al,%edi + shrl $16,%eax + movd %ecx,%mm1 + movzbl -128(%ebp,%edi,1),%ecx + movzbl %ah,%edi + shll $16,%ecx + andl $255,%eax + orl %esi,%ecx + punpckldq %mm1,%mm0 + movzbl -128(%ebp,%edi,1),%esi + movzbl %bh,%edi + shll $24,%esi + andl $255,%ebx + movzbl -128(%ebp,%eax,1),%eax + orl %esi,%ecx + shll $16,%eax + movzbl -128(%ebp,%edi,1),%esi + orl %eax,%edx + shll $8,%esi + movzbl -128(%ebp,%ebx,1),%ebx + orl %esi,%ecx + orl %ebx,%edx + movl 20(%esp),%edi + movd %ecx,%mm4 + movd %edx,%mm5 + punpckldq %mm5,%mm4 + addl $16,%edi + cmpl 24(%esp),%edi + ja L002out + movq 8(%esp),%mm2 + pxor %mm3,%mm3 + pxor %mm7,%mm7 + movq %mm0,%mm1 + movq %mm4,%mm5 + pcmpgtb %mm0,%mm3 + pcmpgtb %mm4,%mm7 + pand %mm2,%mm3 + pand %mm2,%mm7 + pshufw $177,%mm0,%mm2 + pshufw $177,%mm4,%mm6 + paddb %mm0,%mm0 + paddb %mm4,%mm4 + pxor %mm3,%mm0 + pxor %mm7,%mm4 + pshufw $177,%mm2,%mm3 + pshufw $177,%mm6,%mm7 + pxor %mm0,%mm1 + pxor %mm4,%mm5 + pxor %mm2,%mm0 + pxor %mm6,%mm4 + movq %mm3,%mm2 + movq %mm7,%mm6 + pslld $8,%mm3 + pslld $8,%mm7 + psrld $24,%mm2 + psrld $24,%mm6 + pxor %mm3,%mm0 + pxor %mm7,%mm4 + pxor %mm2,%mm0 + pxor %mm6,%mm4 + movq %mm1,%mm3 + movq %mm5,%mm7 + movq (%edi),%mm2 + movq 8(%edi),%mm6 + psrld $8,%mm1 + psrld $8,%mm5 + movl -128(%ebp),%eax + pslld $24,%mm3 + pslld $24,%mm7 + movl -64(%ebp),%ebx + pxor %mm1,%mm0 + pxor %mm5,%mm4 + movl (%ebp),%ecx + pxor %mm3,%mm0 + pxor %mm7,%mm4 + movl 64(%ebp),%edx + pxor %mm2,%mm0 + pxor %mm6,%mm4 + jmp L001loop +.align 4,0x90 +L002out: + pxor (%edi),%mm0 + pxor 8(%edi),%mm4 + ret +.align 4 +__x86_AES_encrypt: + movl %edi,20(%esp) + xorl (%edi),%eax + xorl 4(%edi),%ebx + xorl 8(%edi),%ecx + xorl 12(%edi),%edx + movl 240(%edi),%esi + leal -2(%esi,%esi,1),%esi + leal (%edi,%esi,8),%esi + movl %esi,24(%esp) +.align 4,0x90 +L003loop: + movl %eax,%esi + andl $255,%esi + movl (%ebp,%esi,8),%esi + movzbl %bh,%edi + xorl 3(%ebp,%edi,8),%esi + movl %ecx,%edi + shrl $16,%edi + andl $255,%edi + xorl 2(%ebp,%edi,8),%esi + movl %edx,%edi + shrl $24,%edi + xorl 1(%ebp,%edi,8),%esi + movl %esi,4(%esp) + movl %ebx,%esi + andl $255,%esi + shrl $16,%ebx + movl (%ebp,%esi,8),%esi + movzbl %ch,%edi + xorl 3(%ebp,%edi,8),%esi + movl %edx,%edi + shrl $16,%edi + andl $255,%edi + xorl 2(%ebp,%edi,8),%esi + movl %eax,%edi + shrl $24,%edi + xorl 1(%ebp,%edi,8),%esi + movl %esi,8(%esp) + movl %ecx,%esi + andl $255,%esi + shrl $24,%ecx + movl (%ebp,%esi,8),%esi + movzbl %dh,%edi + xorl 3(%ebp,%edi,8),%esi + movl %eax,%edi + shrl $16,%edi + andl $255,%edx + andl $255,%edi + xorl 2(%ebp,%edi,8),%esi + movzbl %bh,%edi + xorl 1(%ebp,%edi,8),%esi + movl 20(%esp),%edi + movl (%ebp,%edx,8),%edx + movzbl %ah,%eax + xorl 3(%ebp,%eax,8),%edx + movl 4(%esp),%eax + andl $255,%ebx + xorl 2(%ebp,%ebx,8),%edx + movl 8(%esp),%ebx + xorl 1(%ebp,%ecx,8),%edx + movl %esi,%ecx + addl $16,%edi + xorl (%edi),%eax + xorl 4(%edi),%ebx + xorl 8(%edi),%ecx + xorl 12(%edi),%edx + cmpl 24(%esp),%edi + movl %edi,20(%esp) + jb L003loop + movl %eax,%esi + andl $255,%esi + movl 2(%ebp,%esi,8),%esi + andl $255,%esi + movzbl %bh,%edi + movl (%ebp,%edi,8),%edi + andl $65280,%edi + xorl %edi,%esi + movl %ecx,%edi + shrl $16,%edi + andl $255,%edi + movl (%ebp,%edi,8),%edi + andl $16711680,%edi + xorl %edi,%esi + movl %edx,%edi + shrl $24,%edi + movl 2(%ebp,%edi,8),%edi + andl $4278190080,%edi + xorl %edi,%esi + movl %esi,4(%esp) + movl %ebx,%esi + andl $255,%esi + shrl $16,%ebx + movl 2(%ebp,%esi,8),%esi + andl $255,%esi + movzbl %ch,%edi + movl (%ebp,%edi,8),%edi + andl $65280,%edi + xorl %edi,%esi + movl %edx,%edi + shrl $16,%edi + andl $255,%edi + movl (%ebp,%edi,8),%edi + andl $16711680,%edi + xorl %edi,%esi + movl %eax,%edi + shrl $24,%edi + movl 2(%ebp,%edi,8),%edi + andl $4278190080,%edi + xorl %edi,%esi + movl %esi,8(%esp) + movl %ecx,%esi + andl $255,%esi + shrl $24,%ecx + movl 2(%ebp,%esi,8),%esi + andl $255,%esi + movzbl %dh,%edi + movl (%ebp,%edi,8),%edi + andl $65280,%edi + xorl %edi,%esi + movl %eax,%edi + shrl $16,%edi + andl $255,%edx + andl $255,%edi + movl (%ebp,%edi,8),%edi + andl $16711680,%edi + xorl %edi,%esi + movzbl %bh,%edi + movl 2(%ebp,%edi,8),%edi + andl $4278190080,%edi + xorl %edi,%esi + movl 20(%esp),%edi + andl $255,%edx + movl 2(%ebp,%edx,8),%edx + andl $255,%edx + movzbl %ah,%eax + movl (%ebp,%eax,8),%eax + andl $65280,%eax + xorl %eax,%edx + movl 4(%esp),%eax + andl $255,%ebx + movl (%ebp,%ebx,8),%ebx + andl $16711680,%ebx + xorl %ebx,%edx + movl 8(%esp),%ebx + movl 2(%ebp,%ecx,8),%ecx + andl $4278190080,%ecx + xorl %ecx,%edx + movl %esi,%ecx + addl $16,%edi + xorl (%edi),%eax + xorl 4(%edi),%ebx + xorl 8(%edi),%ecx + xorl 12(%edi),%edx + ret +.align 6,0x90 +LAES_Te: +.long 2774754246,2774754246 +.long 2222750968,2222750968 +.long 2574743534,2574743534 +.long 2373680118,2373680118 +.long 234025727,234025727 +.long 3177933782,3177933782 +.long 2976870366,2976870366 +.long 1422247313,1422247313 +.long 1345335392,1345335392 +.long 50397442,50397442 +.long 2842126286,2842126286 +.long 2099981142,2099981142 +.long 436141799,436141799 +.long 1658312629,1658312629 +.long 3870010189,3870010189 +.long 2591454956,2591454956 +.long 1170918031,1170918031 +.long 2642575903,2642575903 +.long 1086966153,1086966153 +.long 2273148410,2273148410 +.long 368769775,368769775 +.long 3948501426,3948501426 +.long 3376891790,3376891790 +.long 200339707,200339707 +.long 3970805057,3970805057 +.long 1742001331,1742001331 +.long 4255294047,4255294047 +.long 3937382213,3937382213 +.long 3214711843,3214711843 +.long 4154762323,4154762323 +.long 2524082916,2524082916 +.long 1539358875,1539358875 +.long 3266819957,3266819957 +.long 486407649,486407649 +.long 2928907069,2928907069 +.long 1780885068,1780885068 +.long 1513502316,1513502316 +.long 1094664062,1094664062 +.long 49805301,49805301 +.long 1338821763,1338821763 +.long 1546925160,1546925160 +.long 4104496465,4104496465 +.long 887481809,887481809 +.long 150073849,150073849 +.long 2473685474,2473685474 +.long 1943591083,1943591083 +.long 1395732834,1395732834 +.long 1058346282,1058346282 +.long 201589768,201589768 +.long 1388824469,1388824469 +.long 1696801606,1696801606 +.long 1589887901,1589887901 +.long 672667696,672667696 +.long 2711000631,2711000631 +.long 251987210,251987210 +.long 3046808111,3046808111 +.long 151455502,151455502 +.long 907153956,907153956 +.long 2608889883,2608889883 +.long 1038279391,1038279391 +.long 652995533,652995533 +.long 1764173646,1764173646 +.long 3451040383,3451040383 +.long 2675275242,2675275242 +.long 453576978,453576978 +.long 2659418909,2659418909 +.long 1949051992,1949051992 +.long 773462580,773462580 +.long 756751158,756751158 +.long 2993581788,2993581788 +.long 3998898868,3998898868 +.long 4221608027,4221608027 +.long 4132590244,4132590244 +.long 1295727478,1295727478 +.long 1641469623,1641469623 +.long 3467883389,3467883389 +.long 2066295122,2066295122 +.long 1055122397,1055122397 +.long 1898917726,1898917726 +.long 2542044179,2542044179 +.long 4115878822,4115878822 +.long 1758581177,1758581177 +.long 0,0 +.long 753790401,753790401 +.long 1612718144,1612718144 +.long 536673507,536673507 +.long 3367088505,3367088505 +.long 3982187446,3982187446 +.long 3194645204,3194645204 +.long 1187761037,1187761037 +.long 3653156455,3653156455 +.long 1262041458,1262041458 +.long 3729410708,3729410708 +.long 3561770136,3561770136 +.long 3898103984,3898103984 +.long 1255133061,1255133061 +.long 1808847035,1808847035 +.long 720367557,720367557 +.long 3853167183,3853167183 +.long 385612781,385612781 +.long 3309519750,3309519750 +.long 3612167578,3612167578 +.long 1429418854,1429418854 +.long 2491778321,2491778321 +.long 3477423498,3477423498 +.long 284817897,284817897 +.long 100794884,100794884 +.long 2172616702,2172616702 +.long 4031795360,4031795360 +.long 1144798328,1144798328 +.long 3131023141,3131023141 +.long 3819481163,3819481163 +.long 4082192802,4082192802 +.long 4272137053,4272137053 +.long 3225436288,3225436288 +.long 2324664069,2324664069 +.long 2912064063,2912064063 +.long 3164445985,3164445985 +.long 1211644016,1211644016 +.long 83228145,83228145 +.long 3753688163,3753688163 +.long 3249976951,3249976951 +.long 1977277103,1977277103 +.long 1663115586,1663115586 +.long 806359072,806359072 +.long 452984805,452984805 +.long 250868733,250868733 +.long 1842533055,1842533055 +.long 1288555905,1288555905 +.long 336333848,336333848 +.long 890442534,890442534 +.long 804056259,804056259 +.long 3781124030,3781124030 +.long 2727843637,2727843637 +.long 3427026056,3427026056 +.long 957814574,957814574 +.long 1472513171,1472513171 +.long 4071073621,4071073621 +.long 2189328124,2189328124 +.long 1195195770,1195195770 +.long 2892260552,2892260552 +.long 3881655738,3881655738 +.long 723065138,723065138 +.long 2507371494,2507371494 +.long 2690670784,2690670784 +.long 2558624025,2558624025 +.long 3511635870,3511635870 +.long 2145180835,2145180835 +.long 1713513028,1713513028 +.long 2116692564,2116692564 +.long 2878378043,2878378043 +.long 2206763019,2206763019 +.long 3393603212,3393603212 +.long 703524551,703524551 +.long 3552098411,3552098411 +.long 1007948840,1007948840 +.long 2044649127,2044649127 +.long 3797835452,3797835452 +.long 487262998,487262998 +.long 1994120109,1994120109 +.long 1004593371,1004593371 +.long 1446130276,1446130276 +.long 1312438900,1312438900 +.long 503974420,503974420 +.long 3679013266,3679013266 +.long 168166924,168166924 +.long 1814307912,1814307912 +.long 3831258296,3831258296 +.long 1573044895,1573044895 +.long 1859376061,1859376061 +.long 4021070915,4021070915 +.long 2791465668,2791465668 +.long 2828112185,2828112185 +.long 2761266481,2761266481 +.long 937747667,937747667 +.long 2339994098,2339994098 +.long 854058965,854058965 +.long 1137232011,1137232011 +.long 1496790894,1496790894 +.long 3077402074,3077402074 +.long 2358086913,2358086913 +.long 1691735473,1691735473 +.long 3528347292,3528347292 +.long 3769215305,3769215305 +.long 3027004632,3027004632 +.long 4199962284,4199962284 +.long 133494003,133494003 +.long 636152527,636152527 +.long 2942657994,2942657994 +.long 2390391540,2390391540 +.long 3920539207,3920539207 +.long 403179536,403179536 +.long 3585784431,3585784431 +.long 2289596656,2289596656 +.long 1864705354,1864705354 +.long 1915629148,1915629148 +.long 605822008,605822008 +.long 4054230615,4054230615 +.long 3350508659,3350508659 +.long 1371981463,1371981463 +.long 602466507,602466507 +.long 2094914977,2094914977 +.long 2624877800,2624877800 +.long 555687742,555687742 +.long 3712699286,3712699286 +.long 3703422305,3703422305 +.long 2257292045,2257292045 +.long 2240449039,2240449039 +.long 2423288032,2423288032 +.long 1111375484,1111375484 +.long 3300242801,3300242801 +.long 2858837708,2858837708 +.long 3628615824,3628615824 +.long 84083462,84083462 +.long 32962295,32962295 +.long 302911004,302911004 +.long 2741068226,2741068226 +.long 1597322602,1597322602 +.long 4183250862,4183250862 +.long 3501832553,3501832553 +.long 2441512471,2441512471 +.long 1489093017,1489093017 +.long 656219450,656219450 +.long 3114180135,3114180135 +.long 954327513,954327513 +.long 335083755,335083755 +.long 3013122091,3013122091 +.long 856756514,856756514 +.long 3144247762,3144247762 +.long 1893325225,1893325225 +.long 2307821063,2307821063 +.long 2811532339,2811532339 +.long 3063651117,3063651117 +.long 572399164,572399164 +.long 2458355477,2458355477 +.long 552200649,552200649 +.long 1238290055,1238290055 +.long 4283782570,4283782570 +.long 2015897680,2015897680 +.long 2061492133,2061492133 +.long 2408352771,2408352771 +.long 4171342169,4171342169 +.long 2156497161,2156497161 +.long 386731290,386731290 +.long 3669999461,3669999461 +.long 837215959,837215959 +.long 3326231172,3326231172 +.long 3093850320,3093850320 +.long 3275833730,3275833730 +.long 2962856233,2962856233 +.long 1999449434,1999449434 +.long 286199582,286199582 +.long 3417354363,3417354363 +.long 4233385128,4233385128 +.long 3602627437,3602627437 +.long 974525996,974525996 +.byte 99,124,119,123,242,107,111,197 +.byte 48,1,103,43,254,215,171,118 +.byte 202,130,201,125,250,89,71,240 +.byte 173,212,162,175,156,164,114,192 +.byte 183,253,147,38,54,63,247,204 +.byte 52,165,229,241,113,216,49,21 +.byte 4,199,35,195,24,150,5,154 +.byte 7,18,128,226,235,39,178,117 +.byte 9,131,44,26,27,110,90,160 +.byte 82,59,214,179,41,227,47,132 +.byte 83,209,0,237,32,252,177,91 +.byte 106,203,190,57,74,76,88,207 +.byte 208,239,170,251,67,77,51,133 +.byte 69,249,2,127,80,60,159,168 +.byte 81,163,64,143,146,157,56,245 +.byte 188,182,218,33,16,255,243,210 +.byte 205,12,19,236,95,151,68,23 +.byte 196,167,126,61,100,93,25,115 +.byte 96,129,79,220,34,42,144,136 +.byte 70,238,184,20,222,94,11,219 +.byte 224,50,58,10,73,6,36,92 +.byte 194,211,172,98,145,149,228,121 +.byte 231,200,55,109,141,213,78,169 +.byte 108,86,244,234,101,122,174,8 +.byte 186,120,37,46,28,166,180,198 +.byte 232,221,116,31,75,189,139,138 +.byte 112,62,181,102,72,3,246,14 +.byte 97,53,87,185,134,193,29,158 +.byte 225,248,152,17,105,217,142,148 +.byte 155,30,135,233,206,85,40,223 +.byte 140,161,137,13,191,230,66,104 +.byte 65,153,45,15,176,84,187,22 +.byte 99,124,119,123,242,107,111,197 +.byte 48,1,103,43,254,215,171,118 +.byte 202,130,201,125,250,89,71,240 +.byte 173,212,162,175,156,164,114,192 +.byte 183,253,147,38,54,63,247,204 +.byte 52,165,229,241,113,216,49,21 +.byte 4,199,35,195,24,150,5,154 +.byte 7,18,128,226,235,39,178,117 +.byte 9,131,44,26,27,110,90,160 +.byte 82,59,214,179,41,227,47,132 +.byte 83,209,0,237,32,252,177,91 +.byte 106,203,190,57,74,76,88,207 +.byte 208,239,170,251,67,77,51,133 +.byte 69,249,2,127,80,60,159,168 +.byte 81,163,64,143,146,157,56,245 +.byte 188,182,218,33,16,255,243,210 +.byte 205,12,19,236,95,151,68,23 +.byte 196,167,126,61,100,93,25,115 +.byte 96,129,79,220,34,42,144,136 +.byte 70,238,184,20,222,94,11,219 +.byte 224,50,58,10,73,6,36,92 +.byte 194,211,172,98,145,149,228,121 +.byte 231,200,55,109,141,213,78,169 +.byte 108,86,244,234,101,122,174,8 +.byte 186,120,37,46,28,166,180,198 +.byte 232,221,116,31,75,189,139,138 +.byte 112,62,181,102,72,3,246,14 +.byte 97,53,87,185,134,193,29,158 +.byte 225,248,152,17,105,217,142,148 +.byte 155,30,135,233,206,85,40,223 +.byte 140,161,137,13,191,230,66,104 +.byte 65,153,45,15,176,84,187,22 +.byte 99,124,119,123,242,107,111,197 +.byte 48,1,103,43,254,215,171,118 +.byte 202,130,201,125,250,89,71,240 +.byte 173,212,162,175,156,164,114,192 +.byte 183,253,147,38,54,63,247,204 +.byte 52,165,229,241,113,216,49,21 +.byte 4,199,35,195,24,150,5,154 +.byte 7,18,128,226,235,39,178,117 +.byte 9,131,44,26,27,110,90,160 +.byte 82,59,214,179,41,227,47,132 +.byte 83,209,0,237,32,252,177,91 +.byte 106,203,190,57,74,76,88,207 +.byte 208,239,170,251,67,77,51,133 +.byte 69,249,2,127,80,60,159,168 +.byte 81,163,64,143,146,157,56,245 +.byte 188,182,218,33,16,255,243,210 +.byte 205,12,19,236,95,151,68,23 +.byte 196,167,126,61,100,93,25,115 +.byte 96,129,79,220,34,42,144,136 +.byte 70,238,184,20,222,94,11,219 +.byte 224,50,58,10,73,6,36,92 +.byte 194,211,172,98,145,149,228,121 +.byte 231,200,55,109,141,213,78,169 +.byte 108,86,244,234,101,122,174,8 +.byte 186,120,37,46,28,166,180,198 +.byte 232,221,116,31,75,189,139,138 +.byte 112,62,181,102,72,3,246,14 +.byte 97,53,87,185,134,193,29,158 +.byte 225,248,152,17,105,217,142,148 +.byte 155,30,135,233,206,85,40,223 +.byte 140,161,137,13,191,230,66,104 +.byte 65,153,45,15,176,84,187,22 +.byte 99,124,119,123,242,107,111,197 +.byte 48,1,103,43,254,215,171,118 +.byte 202,130,201,125,250,89,71,240 +.byte 173,212,162,175,156,164,114,192 +.byte 183,253,147,38,54,63,247,204 +.byte 52,165,229,241,113,216,49,21 +.byte 4,199,35,195,24,150,5,154 +.byte 7,18,128,226,235,39,178,117 +.byte 9,131,44,26,27,110,90,160 +.byte 82,59,214,179,41,227,47,132 +.byte 83,209,0,237,32,252,177,91 +.byte 106,203,190,57,74,76,88,207 +.byte 208,239,170,251,67,77,51,133 +.byte 69,249,2,127,80,60,159,168 +.byte 81,163,64,143,146,157,56,245 +.byte 188,182,218,33,16,255,243,210 +.byte 205,12,19,236,95,151,68,23 +.byte 196,167,126,61,100,93,25,115 +.byte 96,129,79,220,34,42,144,136 +.byte 70,238,184,20,222,94,11,219 +.byte 224,50,58,10,73,6,36,92 +.byte 194,211,172,98,145,149,228,121 +.byte 231,200,55,109,141,213,78,169 +.byte 108,86,244,234,101,122,174,8 +.byte 186,120,37,46,28,166,180,198 +.byte 232,221,116,31,75,189,139,138 +.byte 112,62,181,102,72,3,246,14 +.byte 97,53,87,185,134,193,29,158 +.byte 225,248,152,17,105,217,142,148 +.byte 155,30,135,233,206,85,40,223 +.byte 140,161,137,13,191,230,66,104 +.byte 65,153,45,15,176,84,187,22 +.long 1,2,4,8 +.long 16,32,64,128 +.long 27,54,0,0 +.long 0,0,0,0 +.globl _AES_encrypt +.align 4 +_AES_encrypt: +L_AES_encrypt_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 20(%esp),%esi + movl 28(%esp),%edi + movl %esp,%eax + subl $36,%esp + andl $-64,%esp + leal -127(%edi),%ebx + subl %esp,%ebx + negl %ebx + andl $960,%ebx + subl %ebx,%esp + addl $4,%esp + movl %eax,28(%esp) + call L004pic_point +L004pic_point: + popl %ebp + movl L_OPENSSL_ia32cap_P$non_lazy_ptr-L004pic_point(%ebp),%eax + leal LAES_Te-L004pic_point(%ebp),%ebp + leal 764(%esp),%ebx + subl %ebp,%ebx + andl $768,%ebx + leal 2176(%ebp,%ebx,1),%ebp + btl $25,(%eax) + jnc L005x86 + movq (%esi),%mm0 + movq 8(%esi),%mm4 + call __sse_AES_encrypt_compact + movl 28(%esp),%esp + movl 24(%esp),%esi + movq %mm0,(%esi) + movq %mm4,8(%esi) + emms + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.align 4,0x90 +L005x86: + movl %ebp,24(%esp) + movl (%esi),%eax + movl 4(%esi),%ebx + movl 8(%esi),%ecx + movl 12(%esi),%edx + call __x86_AES_encrypt_compact + movl 28(%esp),%esp + movl 24(%esp),%esi + movl %eax,(%esi) + movl %ebx,4(%esi) + movl %ecx,8(%esi) + movl %edx,12(%esi) + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.align 4 +__x86_AES_decrypt_compact: + movl %edi,20(%esp) + xorl (%edi),%eax + xorl 4(%edi),%ebx + xorl 8(%edi),%ecx + xorl 12(%edi),%edx + movl 240(%edi),%esi + leal -2(%esi,%esi,1),%esi + leal (%edi,%esi,8),%esi + movl %esi,24(%esp) + movl -128(%ebp),%edi + movl -96(%ebp),%esi + movl -64(%ebp),%edi + movl -32(%ebp),%esi + movl (%ebp),%edi + movl 32(%ebp),%esi + movl 64(%ebp),%edi + movl 96(%ebp),%esi +.align 4,0x90 +L006loop: + movl %eax,%esi + andl $255,%esi + movzbl -128(%ebp,%esi,1),%esi + movzbl %dh,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $8,%edi + xorl %edi,%esi + movl %ecx,%edi + shrl $16,%edi + andl $255,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $16,%edi + xorl %edi,%esi + movl %ebx,%edi + shrl $24,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $24,%edi + xorl %edi,%esi + movl %esi,4(%esp) + movl %ebx,%esi + andl $255,%esi + movzbl -128(%ebp,%esi,1),%esi + movzbl %ah,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $8,%edi + xorl %edi,%esi + movl %edx,%edi + shrl $16,%edi + andl $255,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $16,%edi + xorl %edi,%esi + movl %ecx,%edi + shrl $24,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $24,%edi + xorl %edi,%esi + movl %esi,8(%esp) + movl %ecx,%esi + andl $255,%esi + movzbl -128(%ebp,%esi,1),%esi + movzbl %bh,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $8,%edi + xorl %edi,%esi + movl %eax,%edi + shrl $16,%edi + andl $255,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $16,%edi + xorl %edi,%esi + movl %edx,%edi + shrl $24,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $24,%edi + xorl %edi,%esi + andl $255,%edx + movzbl -128(%ebp,%edx,1),%edx + movzbl %ch,%ecx + movzbl -128(%ebp,%ecx,1),%ecx + shll $8,%ecx + xorl %ecx,%edx + movl %esi,%ecx + shrl $16,%ebx + andl $255,%ebx + movzbl -128(%ebp,%ebx,1),%ebx + shll $16,%ebx + xorl %ebx,%edx + shrl $24,%eax + movzbl -128(%ebp,%eax,1),%eax + shll $24,%eax + xorl %eax,%edx + movl $2155905152,%edi + andl %ecx,%edi + movl %edi,%esi + shrl $7,%edi + leal (%ecx,%ecx,1),%eax + subl %edi,%esi + andl $4278124286,%eax + andl $454761243,%esi + xorl %esi,%eax + movl $2155905152,%edi + andl %eax,%edi + movl %edi,%esi + shrl $7,%edi + leal (%eax,%eax,1),%ebx + subl %edi,%esi + andl $4278124286,%ebx + andl $454761243,%esi + xorl %ecx,%eax + xorl %esi,%ebx + movl $2155905152,%edi + andl %ebx,%edi + movl %edi,%esi + shrl $7,%edi + leal (%ebx,%ebx,1),%ebp + subl %edi,%esi + andl $4278124286,%ebp + andl $454761243,%esi + xorl %ecx,%ebx + roll $8,%ecx + xorl %esi,%ebp + xorl %eax,%ecx + xorl %ebp,%eax + xorl %ebx,%ecx + xorl %ebp,%ebx + roll $24,%eax + xorl %ebp,%ecx + roll $16,%ebx + xorl %eax,%ecx + roll $8,%ebp + xorl %ebx,%ecx + movl 4(%esp),%eax + xorl %ebp,%ecx + movl %ecx,12(%esp) + movl $2155905152,%edi + andl %edx,%edi + movl %edi,%esi + shrl $7,%edi + leal (%edx,%edx,1),%ebx + subl %edi,%esi + andl $4278124286,%ebx + andl $454761243,%esi + xorl %esi,%ebx + movl $2155905152,%edi + andl %ebx,%edi + movl %edi,%esi + shrl $7,%edi + leal (%ebx,%ebx,1),%ecx + subl %edi,%esi + andl $4278124286,%ecx + andl $454761243,%esi + xorl %edx,%ebx + xorl %esi,%ecx + movl $2155905152,%edi + andl %ecx,%edi + movl %edi,%esi + shrl $7,%edi + leal (%ecx,%ecx,1),%ebp + subl %edi,%esi + andl $4278124286,%ebp + andl $454761243,%esi + xorl %edx,%ecx + roll $8,%edx + xorl %esi,%ebp + xorl %ebx,%edx + xorl %ebp,%ebx + xorl %ecx,%edx + xorl %ebp,%ecx + roll $24,%ebx + xorl %ebp,%edx + roll $16,%ecx + xorl %ebx,%edx + roll $8,%ebp + xorl %ecx,%edx + movl 8(%esp),%ebx + xorl %ebp,%edx + movl %edx,16(%esp) + movl $2155905152,%edi + andl %eax,%edi + movl %edi,%esi + shrl $7,%edi + leal (%eax,%eax,1),%ecx + subl %edi,%esi + andl $4278124286,%ecx + andl $454761243,%esi + xorl %esi,%ecx + movl $2155905152,%edi + andl %ecx,%edi + movl %edi,%esi + shrl $7,%edi + leal (%ecx,%ecx,1),%edx + subl %edi,%esi + andl $4278124286,%edx + andl $454761243,%esi + xorl %eax,%ecx + xorl %esi,%edx + movl $2155905152,%edi + andl %edx,%edi + movl %edi,%esi + shrl $7,%edi + leal (%edx,%edx,1),%ebp + subl %edi,%esi + andl $4278124286,%ebp + andl $454761243,%esi + xorl %eax,%edx + roll $8,%eax + xorl %esi,%ebp + xorl %ecx,%eax + xorl %ebp,%ecx + xorl %edx,%eax + xorl %ebp,%edx + roll $24,%ecx + xorl %ebp,%eax + roll $16,%edx + xorl %ecx,%eax + roll $8,%ebp + xorl %edx,%eax + xorl %ebp,%eax + movl $2155905152,%edi + andl %ebx,%edi + movl %edi,%esi + shrl $7,%edi + leal (%ebx,%ebx,1),%ecx + subl %edi,%esi + andl $4278124286,%ecx + andl $454761243,%esi + xorl %esi,%ecx + movl $2155905152,%edi + andl %ecx,%edi + movl %edi,%esi + shrl $7,%edi + leal (%ecx,%ecx,1),%edx + subl %edi,%esi + andl $4278124286,%edx + andl $454761243,%esi + xorl %ebx,%ecx + xorl %esi,%edx + movl $2155905152,%edi + andl %edx,%edi + movl %edi,%esi + shrl $7,%edi + leal (%edx,%edx,1),%ebp + subl %edi,%esi + andl $4278124286,%ebp + andl $454761243,%esi + xorl %ebx,%edx + roll $8,%ebx + xorl %esi,%ebp + xorl %ecx,%ebx + xorl %ebp,%ecx + xorl %edx,%ebx + xorl %ebp,%edx + roll $24,%ecx + xorl %ebp,%ebx + roll $16,%edx + xorl %ecx,%ebx + roll $8,%ebp + xorl %edx,%ebx + movl 12(%esp),%ecx + xorl %ebp,%ebx + movl 16(%esp),%edx + movl 20(%esp),%edi + movl 28(%esp),%ebp + addl $16,%edi + xorl (%edi),%eax + xorl 4(%edi),%ebx + xorl 8(%edi),%ecx + xorl 12(%edi),%edx + cmpl 24(%esp),%edi + movl %edi,20(%esp) + jb L006loop + movl %eax,%esi + andl $255,%esi + movzbl -128(%ebp,%esi,1),%esi + movzbl %dh,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $8,%edi + xorl %edi,%esi + movl %ecx,%edi + shrl $16,%edi + andl $255,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $16,%edi + xorl %edi,%esi + movl %ebx,%edi + shrl $24,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $24,%edi + xorl %edi,%esi + movl %esi,4(%esp) + movl %ebx,%esi + andl $255,%esi + movzbl -128(%ebp,%esi,1),%esi + movzbl %ah,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $8,%edi + xorl %edi,%esi + movl %edx,%edi + shrl $16,%edi + andl $255,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $16,%edi + xorl %edi,%esi + movl %ecx,%edi + shrl $24,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $24,%edi + xorl %edi,%esi + movl %esi,8(%esp) + movl %ecx,%esi + andl $255,%esi + movzbl -128(%ebp,%esi,1),%esi + movzbl %bh,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $8,%edi + xorl %edi,%esi + movl %eax,%edi + shrl $16,%edi + andl $255,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $16,%edi + xorl %edi,%esi + movl %edx,%edi + shrl $24,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $24,%edi + xorl %edi,%esi + movl 20(%esp),%edi + andl $255,%edx + movzbl -128(%ebp,%edx,1),%edx + movzbl %ch,%ecx + movzbl -128(%ebp,%ecx,1),%ecx + shll $8,%ecx + xorl %ecx,%edx + movl %esi,%ecx + shrl $16,%ebx + andl $255,%ebx + movzbl -128(%ebp,%ebx,1),%ebx + shll $16,%ebx + xorl %ebx,%edx + movl 8(%esp),%ebx + shrl $24,%eax + movzbl -128(%ebp,%eax,1),%eax + shll $24,%eax + xorl %eax,%edx + movl 4(%esp),%eax + xorl 16(%edi),%eax + xorl 20(%edi),%ebx + xorl 24(%edi),%ecx + xorl 28(%edi),%edx + ret +.align 4 +__sse_AES_decrypt_compact: + pxor (%edi),%mm0 + pxor 8(%edi),%mm4 + movl 240(%edi),%esi + leal -2(%esi,%esi,1),%esi + leal (%edi,%esi,8),%esi + movl %esi,24(%esp) + movl $454761243,%eax + movl %eax,8(%esp) + movl %eax,12(%esp) + movl -128(%ebp),%eax + movl -96(%ebp),%ebx + movl -64(%ebp),%ecx + movl -32(%ebp),%edx + movl (%ebp),%eax + movl 32(%ebp),%ebx + movl 64(%ebp),%ecx + movl 96(%ebp),%edx +.align 4,0x90 +L007loop: + pshufw $12,%mm0,%mm1 + pshufw $9,%mm4,%mm5 + movd %mm1,%eax + movd %mm5,%ebx + movl %edi,20(%esp) + movzbl %al,%esi + movzbl %ah,%edx + pshufw $6,%mm0,%mm2 + movzbl -128(%ebp,%esi,1),%ecx + movzbl %bl,%edi + movzbl -128(%ebp,%edx,1),%edx + shrl $16,%eax + shll $8,%edx + movzbl -128(%ebp,%edi,1),%esi + movzbl %bh,%edi + shll $16,%esi + pshufw $3,%mm4,%mm6 + orl %esi,%ecx + movzbl -128(%ebp,%edi,1),%esi + movzbl %ah,%edi + shll $24,%esi + shrl $16,%ebx + orl %esi,%edx + movzbl -128(%ebp,%edi,1),%esi + movzbl %bh,%edi + shll $24,%esi + orl %esi,%ecx + movzbl -128(%ebp,%edi,1),%esi + movzbl %al,%edi + shll $8,%esi + movd %mm2,%eax + orl %esi,%ecx + movzbl -128(%ebp,%edi,1),%esi + movzbl %bl,%edi + shll $16,%esi + movd %mm6,%ebx + movd %ecx,%mm0 + movzbl -128(%ebp,%edi,1),%ecx + movzbl %al,%edi + orl %esi,%ecx + movzbl -128(%ebp,%edi,1),%esi + movzbl %bl,%edi + orl %esi,%edx + movzbl -128(%ebp,%edi,1),%esi + movzbl %ah,%edi + shll $16,%esi + shrl $16,%eax + orl %esi,%edx + movzbl -128(%ebp,%edi,1),%esi + movzbl %bh,%edi + shrl $16,%ebx + shll $8,%esi + movd %edx,%mm1 + movzbl -128(%ebp,%edi,1),%edx + movzbl %bh,%edi + shll $24,%edx + andl $255,%ebx + orl %esi,%edx + punpckldq %mm1,%mm0 + movzbl -128(%ebp,%edi,1),%esi + movzbl %al,%edi + shll $8,%esi + movzbl %ah,%eax + movzbl -128(%ebp,%ebx,1),%ebx + orl %esi,%ecx + movzbl -128(%ebp,%edi,1),%esi + orl %ebx,%edx + shll $16,%esi + movzbl -128(%ebp,%eax,1),%eax + orl %esi,%edx + shll $24,%eax + orl %eax,%ecx + movl 20(%esp),%edi + movd %edx,%mm4 + movd %ecx,%mm5 + punpckldq %mm5,%mm4 + addl $16,%edi + cmpl 24(%esp),%edi + ja L008out + movq %mm0,%mm3 + movq %mm4,%mm7 + pshufw $228,%mm0,%mm2 + pshufw $228,%mm4,%mm6 + movq %mm0,%mm1 + movq %mm4,%mm5 + pshufw $177,%mm0,%mm0 + pshufw $177,%mm4,%mm4 + pslld $8,%mm2 + pslld $8,%mm6 + psrld $8,%mm3 + psrld $8,%mm7 + pxor %mm2,%mm0 + pxor %mm6,%mm4 + pxor %mm3,%mm0 + pxor %mm7,%mm4 + pslld $16,%mm2 + pslld $16,%mm6 + psrld $16,%mm3 + psrld $16,%mm7 + pxor %mm2,%mm0 + pxor %mm6,%mm4 + pxor %mm3,%mm0 + pxor %mm7,%mm4 + movq 8(%esp),%mm3 + pxor %mm2,%mm2 + pxor %mm6,%mm6 + pcmpgtb %mm1,%mm2 + pcmpgtb %mm5,%mm6 + pand %mm3,%mm2 + pand %mm3,%mm6 + paddb %mm1,%mm1 + paddb %mm5,%mm5 + pxor %mm2,%mm1 + pxor %mm6,%mm5 + movq %mm1,%mm3 + movq %mm5,%mm7 + movq %mm1,%mm2 + movq %mm5,%mm6 + pxor %mm1,%mm0 + pxor %mm5,%mm4 + pslld $24,%mm3 + pslld $24,%mm7 + psrld $8,%mm2 + psrld $8,%mm6 + pxor %mm3,%mm0 + pxor %mm7,%mm4 + pxor %mm2,%mm0 + pxor %mm6,%mm4 + movq 8(%esp),%mm2 + pxor %mm3,%mm3 + pxor %mm7,%mm7 + pcmpgtb %mm1,%mm3 + pcmpgtb %mm5,%mm7 + pand %mm2,%mm3 + pand %mm2,%mm7 + paddb %mm1,%mm1 + paddb %mm5,%mm5 + pxor %mm3,%mm1 + pxor %mm7,%mm5 + pshufw $177,%mm1,%mm3 + pshufw $177,%mm5,%mm7 + pxor %mm1,%mm0 + pxor %mm5,%mm4 + pxor %mm3,%mm0 + pxor %mm7,%mm4 + pxor %mm3,%mm3 + pxor %mm7,%mm7 + pcmpgtb %mm1,%mm3 + pcmpgtb %mm5,%mm7 + pand %mm2,%mm3 + pand %mm2,%mm7 + paddb %mm1,%mm1 + paddb %mm5,%mm5 + pxor %mm3,%mm1 + pxor %mm7,%mm5 + pxor %mm1,%mm0 + pxor %mm5,%mm4 + movq %mm1,%mm3 + movq %mm5,%mm7 + pshufw $177,%mm1,%mm2 + pshufw $177,%mm5,%mm6 + pxor %mm2,%mm0 + pxor %mm6,%mm4 + pslld $8,%mm1 + pslld $8,%mm5 + psrld $8,%mm3 + psrld $8,%mm7 + movq (%edi),%mm2 + movq 8(%edi),%mm6 + pxor %mm1,%mm0 + pxor %mm5,%mm4 + pxor %mm3,%mm0 + pxor %mm7,%mm4 + movl -128(%ebp),%eax + pslld $16,%mm1 + pslld $16,%mm5 + movl -64(%ebp),%ebx + psrld $16,%mm3 + psrld $16,%mm7 + movl (%ebp),%ecx + pxor %mm1,%mm0 + pxor %mm5,%mm4 + movl 64(%ebp),%edx + pxor %mm3,%mm0 + pxor %mm7,%mm4 + pxor %mm2,%mm0 + pxor %mm6,%mm4 + jmp L007loop +.align 4,0x90 +L008out: + pxor (%edi),%mm0 + pxor 8(%edi),%mm4 + ret +.align 4 +__x86_AES_decrypt: + movl %edi,20(%esp) + xorl (%edi),%eax + xorl 4(%edi),%ebx + xorl 8(%edi),%ecx + xorl 12(%edi),%edx + movl 240(%edi),%esi + leal -2(%esi,%esi,1),%esi + leal (%edi,%esi,8),%esi + movl %esi,24(%esp) +.align 4,0x90 +L009loop: + movl %eax,%esi + andl $255,%esi + movl (%ebp,%esi,8),%esi + movzbl %dh,%edi + xorl 3(%ebp,%edi,8),%esi + movl %ecx,%edi + shrl $16,%edi + andl $255,%edi + xorl 2(%ebp,%edi,8),%esi + movl %ebx,%edi + shrl $24,%edi + xorl 1(%ebp,%edi,8),%esi + movl %esi,4(%esp) + movl %ebx,%esi + andl $255,%esi + movl (%ebp,%esi,8),%esi + movzbl %ah,%edi + xorl 3(%ebp,%edi,8),%esi + movl %edx,%edi + shrl $16,%edi + andl $255,%edi + xorl 2(%ebp,%edi,8),%esi + movl %ecx,%edi + shrl $24,%edi + xorl 1(%ebp,%edi,8),%esi + movl %esi,8(%esp) + movl %ecx,%esi + andl $255,%esi + movl (%ebp,%esi,8),%esi + movzbl %bh,%edi + xorl 3(%ebp,%edi,8),%esi + movl %eax,%edi + shrl $16,%edi + andl $255,%edi + xorl 2(%ebp,%edi,8),%esi + movl %edx,%edi + shrl $24,%edi + xorl 1(%ebp,%edi,8),%esi + movl 20(%esp),%edi + andl $255,%edx + movl (%ebp,%edx,8),%edx + movzbl %ch,%ecx + xorl 3(%ebp,%ecx,8),%edx + movl %esi,%ecx + shrl $16,%ebx + andl $255,%ebx + xorl 2(%ebp,%ebx,8),%edx + movl 8(%esp),%ebx + shrl $24,%eax + xorl 1(%ebp,%eax,8),%edx + movl 4(%esp),%eax + addl $16,%edi + xorl (%edi),%eax + xorl 4(%edi),%ebx + xorl 8(%edi),%ecx + xorl 12(%edi),%edx + cmpl 24(%esp),%edi + movl %edi,20(%esp) + jb L009loop + leal 2176(%ebp),%ebp + movl -128(%ebp),%edi + movl -96(%ebp),%esi + movl -64(%ebp),%edi + movl -32(%ebp),%esi + movl (%ebp),%edi + movl 32(%ebp),%esi + movl 64(%ebp),%edi + movl 96(%ebp),%esi + leal -128(%ebp),%ebp + movl %eax,%esi + andl $255,%esi + movzbl (%ebp,%esi,1),%esi + movzbl %dh,%edi + movzbl (%ebp,%edi,1),%edi + shll $8,%edi + xorl %edi,%esi + movl %ecx,%edi + shrl $16,%edi + andl $255,%edi + movzbl (%ebp,%edi,1),%edi + shll $16,%edi + xorl %edi,%esi + movl %ebx,%edi + shrl $24,%edi + movzbl (%ebp,%edi,1),%edi + shll $24,%edi + xorl %edi,%esi + movl %esi,4(%esp) + movl %ebx,%esi + andl $255,%esi + movzbl (%ebp,%esi,1),%esi + movzbl %ah,%edi + movzbl (%ebp,%edi,1),%edi + shll $8,%edi + xorl %edi,%esi + movl %edx,%edi + shrl $16,%edi + andl $255,%edi + movzbl (%ebp,%edi,1),%edi + shll $16,%edi + xorl %edi,%esi + movl %ecx,%edi + shrl $24,%edi + movzbl (%ebp,%edi,1),%edi + shll $24,%edi + xorl %edi,%esi + movl %esi,8(%esp) + movl %ecx,%esi + andl $255,%esi + movzbl (%ebp,%esi,1),%esi + movzbl %bh,%edi + movzbl (%ebp,%edi,1),%edi + shll $8,%edi + xorl %edi,%esi + movl %eax,%edi + shrl $16,%edi + andl $255,%edi + movzbl (%ebp,%edi,1),%edi + shll $16,%edi + xorl %edi,%esi + movl %edx,%edi + shrl $24,%edi + movzbl (%ebp,%edi,1),%edi + shll $24,%edi + xorl %edi,%esi + movl 20(%esp),%edi + andl $255,%edx + movzbl (%ebp,%edx,1),%edx + movzbl %ch,%ecx + movzbl (%ebp,%ecx,1),%ecx + shll $8,%ecx + xorl %ecx,%edx + movl %esi,%ecx + shrl $16,%ebx + andl $255,%ebx + movzbl (%ebp,%ebx,1),%ebx + shll $16,%ebx + xorl %ebx,%edx + movl 8(%esp),%ebx + shrl $24,%eax + movzbl (%ebp,%eax,1),%eax + shll $24,%eax + xorl %eax,%edx + movl 4(%esp),%eax + leal -2048(%ebp),%ebp + addl $16,%edi + xorl (%edi),%eax + xorl 4(%edi),%ebx + xorl 8(%edi),%ecx + xorl 12(%edi),%edx + ret +.align 6,0x90 +LAES_Td: +.long 1353184337,1353184337 +.long 1399144830,1399144830 +.long 3282310938,3282310938 +.long 2522752826,2522752826 +.long 3412831035,3412831035 +.long 4047871263,4047871263 +.long 2874735276,2874735276 +.long 2466505547,2466505547 +.long 1442459680,1442459680 +.long 4134368941,4134368941 +.long 2440481928,2440481928 +.long 625738485,625738485 +.long 4242007375,4242007375 +.long 3620416197,3620416197 +.long 2151953702,2151953702 +.long 2409849525,2409849525 +.long 1230680542,1230680542 +.long 1729870373,1729870373 +.long 2551114309,2551114309 +.long 3787521629,3787521629 +.long 41234371,41234371 +.long 317738113,317738113 +.long 2744600205,2744600205 +.long 3338261355,3338261355 +.long 3881799427,3881799427 +.long 2510066197,2510066197 +.long 3950669247,3950669247 +.long 3663286933,3663286933 +.long 763608788,763608788 +.long 3542185048,3542185048 +.long 694804553,694804553 +.long 1154009486,1154009486 +.long 1787413109,1787413109 +.long 2021232372,2021232372 +.long 1799248025,1799248025 +.long 3715217703,3715217703 +.long 3058688446,3058688446 +.long 397248752,397248752 +.long 1722556617,1722556617 +.long 3023752829,3023752829 +.long 407560035,407560035 +.long 2184256229,2184256229 +.long 1613975959,1613975959 +.long 1165972322,1165972322 +.long 3765920945,3765920945 +.long 2226023355,2226023355 +.long 480281086,480281086 +.long 2485848313,2485848313 +.long 1483229296,1483229296 +.long 436028815,436028815 +.long 2272059028,2272059028 +.long 3086515026,3086515026 +.long 601060267,601060267 +.long 3791801202,3791801202 +.long 1468997603,1468997603 +.long 715871590,715871590 +.long 120122290,120122290 +.long 63092015,63092015 +.long 2591802758,2591802758 +.long 2768779219,2768779219 +.long 4068943920,4068943920 +.long 2997206819,2997206819 +.long 3127509762,3127509762 +.long 1552029421,1552029421 +.long 723308426,723308426 +.long 2461301159,2461301159 +.long 4042393587,4042393587 +.long 2715969870,2715969870 +.long 3455375973,3455375973 +.long 3586000134,3586000134 +.long 526529745,526529745 +.long 2331944644,2331944644 +.long 2639474228,2639474228 +.long 2689987490,2689987490 +.long 853641733,853641733 +.long 1978398372,1978398372 +.long 971801355,971801355 +.long 2867814464,2867814464 +.long 111112542,111112542 +.long 1360031421,1360031421 +.long 4186579262,4186579262 +.long 1023860118,1023860118 +.long 2919579357,2919579357 +.long 1186850381,1186850381 +.long 3045938321,3045938321 +.long 90031217,90031217 +.long 1876166148,1876166148 +.long 4279586912,4279586912 +.long 620468249,620468249 +.long 2548678102,2548678102 +.long 3426959497,3426959497 +.long 2006899047,2006899047 +.long 3175278768,3175278768 +.long 2290845959,2290845959 +.long 945494503,945494503 +.long 3689859193,3689859193 +.long 1191869601,1191869601 +.long 3910091388,3910091388 +.long 3374220536,3374220536 +.long 0,0 +.long 2206629897,2206629897 +.long 1223502642,1223502642 +.long 2893025566,2893025566 +.long 1316117100,1316117100 +.long 4227796733,4227796733 +.long 1446544655,1446544655 +.long 517320253,517320253 +.long 658058550,658058550 +.long 1691946762,1691946762 +.long 564550760,564550760 +.long 3511966619,3511966619 +.long 976107044,976107044 +.long 2976320012,2976320012 +.long 266819475,266819475 +.long 3533106868,3533106868 +.long 2660342555,2660342555 +.long 1338359936,1338359936 +.long 2720062561,2720062561 +.long 1766553434,1766553434 +.long 370807324,370807324 +.long 179999714,179999714 +.long 3844776128,3844776128 +.long 1138762300,1138762300 +.long 488053522,488053522 +.long 185403662,185403662 +.long 2915535858,2915535858 +.long 3114841645,3114841645 +.long 3366526484,3366526484 +.long 2233069911,2233069911 +.long 1275557295,1275557295 +.long 3151862254,3151862254 +.long 4250959779,4250959779 +.long 2670068215,2670068215 +.long 3170202204,3170202204 +.long 3309004356,3309004356 +.long 880737115,880737115 +.long 1982415755,1982415755 +.long 3703972811,3703972811 +.long 1761406390,1761406390 +.long 1676797112,1676797112 +.long 3403428311,3403428311 +.long 277177154,277177154 +.long 1076008723,1076008723 +.long 538035844,538035844 +.long 2099530373,2099530373 +.long 4164795346,4164795346 +.long 288553390,288553390 +.long 1839278535,1839278535 +.long 1261411869,1261411869 +.long 4080055004,4080055004 +.long 3964831245,3964831245 +.long 3504587127,3504587127 +.long 1813426987,1813426987 +.long 2579067049,2579067049 +.long 4199060497,4199060497 +.long 577038663,577038663 +.long 3297574056,3297574056 +.long 440397984,440397984 +.long 3626794326,3626794326 +.long 4019204898,4019204898 +.long 3343796615,3343796615 +.long 3251714265,3251714265 +.long 4272081548,4272081548 +.long 906744984,906744984 +.long 3481400742,3481400742 +.long 685669029,685669029 +.long 646887386,646887386 +.long 2764025151,2764025151 +.long 3835509292,3835509292 +.long 227702864,227702864 +.long 2613862250,2613862250 +.long 1648787028,1648787028 +.long 3256061430,3256061430 +.long 3904428176,3904428176 +.long 1593260334,1593260334 +.long 4121936770,4121936770 +.long 3196083615,3196083615 +.long 2090061929,2090061929 +.long 2838353263,2838353263 +.long 3004310991,3004310991 +.long 999926984,999926984 +.long 2809993232,2809993232 +.long 1852021992,1852021992 +.long 2075868123,2075868123 +.long 158869197,158869197 +.long 4095236462,4095236462 +.long 28809964,28809964 +.long 2828685187,2828685187 +.long 1701746150,1701746150 +.long 2129067946,2129067946 +.long 147831841,147831841 +.long 3873969647,3873969647 +.long 3650873274,3650873274 +.long 3459673930,3459673930 +.long 3557400554,3557400554 +.long 3598495785,3598495785 +.long 2947720241,2947720241 +.long 824393514,824393514 +.long 815048134,815048134 +.long 3227951669,3227951669 +.long 935087732,935087732 +.long 2798289660,2798289660 +.long 2966458592,2966458592 +.long 366520115,366520115 +.long 1251476721,1251476721 +.long 4158319681,4158319681 +.long 240176511,240176511 +.long 804688151,804688151 +.long 2379631990,2379631990 +.long 1303441219,1303441219 +.long 1414376140,1414376140 +.long 3741619940,3741619940 +.long 3820343710,3820343710 +.long 461924940,461924940 +.long 3089050817,3089050817 +.long 2136040774,2136040774 +.long 82468509,82468509 +.long 1563790337,1563790337 +.long 1937016826,1937016826 +.long 776014843,776014843 +.long 1511876531,1511876531 +.long 1389550482,1389550482 +.long 861278441,861278441 +.long 323475053,323475053 +.long 2355222426,2355222426 +.long 2047648055,2047648055 +.long 2383738969,2383738969 +.long 2302415851,2302415851 +.long 3995576782,3995576782 +.long 902390199,902390199 +.long 3991215329,3991215329 +.long 1018251130,1018251130 +.long 1507840668,1507840668 +.long 1064563285,1064563285 +.long 2043548696,2043548696 +.long 3208103795,3208103795 +.long 3939366739,3939366739 +.long 1537932639,1537932639 +.long 342834655,342834655 +.long 2262516856,2262516856 +.long 2180231114,2180231114 +.long 1053059257,1053059257 +.long 741614648,741614648 +.long 1598071746,1598071746 +.long 1925389590,1925389590 +.long 203809468,203809468 +.long 2336832552,2336832552 +.long 1100287487,1100287487 +.long 1895934009,1895934009 +.long 3736275976,3736275976 +.long 2632234200,2632234200 +.long 2428589668,2428589668 +.long 1636092795,1636092795 +.long 1890988757,1890988757 +.long 1952214088,1952214088 +.long 1113045200,1113045200 +.byte 82,9,106,213,48,54,165,56 +.byte 191,64,163,158,129,243,215,251 +.byte 124,227,57,130,155,47,255,135 +.byte 52,142,67,68,196,222,233,203 +.byte 84,123,148,50,166,194,35,61 +.byte 238,76,149,11,66,250,195,78 +.byte 8,46,161,102,40,217,36,178 +.byte 118,91,162,73,109,139,209,37 +.byte 114,248,246,100,134,104,152,22 +.byte 212,164,92,204,93,101,182,146 +.byte 108,112,72,80,253,237,185,218 +.byte 94,21,70,87,167,141,157,132 +.byte 144,216,171,0,140,188,211,10 +.byte 247,228,88,5,184,179,69,6 +.byte 208,44,30,143,202,63,15,2 +.byte 193,175,189,3,1,19,138,107 +.byte 58,145,17,65,79,103,220,234 +.byte 151,242,207,206,240,180,230,115 +.byte 150,172,116,34,231,173,53,133 +.byte 226,249,55,232,28,117,223,110 +.byte 71,241,26,113,29,41,197,137 +.byte 111,183,98,14,170,24,190,27 +.byte 252,86,62,75,198,210,121,32 +.byte 154,219,192,254,120,205,90,244 +.byte 31,221,168,51,136,7,199,49 +.byte 177,18,16,89,39,128,236,95 +.byte 96,81,127,169,25,181,74,13 +.byte 45,229,122,159,147,201,156,239 +.byte 160,224,59,77,174,42,245,176 +.byte 200,235,187,60,131,83,153,97 +.byte 23,43,4,126,186,119,214,38 +.byte 225,105,20,99,85,33,12,125 +.byte 82,9,106,213,48,54,165,56 +.byte 191,64,163,158,129,243,215,251 +.byte 124,227,57,130,155,47,255,135 +.byte 52,142,67,68,196,222,233,203 +.byte 84,123,148,50,166,194,35,61 +.byte 238,76,149,11,66,250,195,78 +.byte 8,46,161,102,40,217,36,178 +.byte 118,91,162,73,109,139,209,37 +.byte 114,248,246,100,134,104,152,22 +.byte 212,164,92,204,93,101,182,146 +.byte 108,112,72,80,253,237,185,218 +.byte 94,21,70,87,167,141,157,132 +.byte 144,216,171,0,140,188,211,10 +.byte 247,228,88,5,184,179,69,6 +.byte 208,44,30,143,202,63,15,2 +.byte 193,175,189,3,1,19,138,107 +.byte 58,145,17,65,79,103,220,234 +.byte 151,242,207,206,240,180,230,115 +.byte 150,172,116,34,231,173,53,133 +.byte 226,249,55,232,28,117,223,110 +.byte 71,241,26,113,29,41,197,137 +.byte 111,183,98,14,170,24,190,27 +.byte 252,86,62,75,198,210,121,32 +.byte 154,219,192,254,120,205,90,244 +.byte 31,221,168,51,136,7,199,49 +.byte 177,18,16,89,39,128,236,95 +.byte 96,81,127,169,25,181,74,13 +.byte 45,229,122,159,147,201,156,239 +.byte 160,224,59,77,174,42,245,176 +.byte 200,235,187,60,131,83,153,97 +.byte 23,43,4,126,186,119,214,38 +.byte 225,105,20,99,85,33,12,125 +.byte 82,9,106,213,48,54,165,56 +.byte 191,64,163,158,129,243,215,251 +.byte 124,227,57,130,155,47,255,135 +.byte 52,142,67,68,196,222,233,203 +.byte 84,123,148,50,166,194,35,61 +.byte 238,76,149,11,66,250,195,78 +.byte 8,46,161,102,40,217,36,178 +.byte 118,91,162,73,109,139,209,37 +.byte 114,248,246,100,134,104,152,22 +.byte 212,164,92,204,93,101,182,146 +.byte 108,112,72,80,253,237,185,218 +.byte 94,21,70,87,167,141,157,132 +.byte 144,216,171,0,140,188,211,10 +.byte 247,228,88,5,184,179,69,6 +.byte 208,44,30,143,202,63,15,2 +.byte 193,175,189,3,1,19,138,107 +.byte 58,145,17,65,79,103,220,234 +.byte 151,242,207,206,240,180,230,115 +.byte 150,172,116,34,231,173,53,133 +.byte 226,249,55,232,28,117,223,110 +.byte 71,241,26,113,29,41,197,137 +.byte 111,183,98,14,170,24,190,27 +.byte 252,86,62,75,198,210,121,32 +.byte 154,219,192,254,120,205,90,244 +.byte 31,221,168,51,136,7,199,49 +.byte 177,18,16,89,39,128,236,95 +.byte 96,81,127,169,25,181,74,13 +.byte 45,229,122,159,147,201,156,239 +.byte 160,224,59,77,174,42,245,176 +.byte 200,235,187,60,131,83,153,97 +.byte 23,43,4,126,186,119,214,38 +.byte 225,105,20,99,85,33,12,125 +.byte 82,9,106,213,48,54,165,56 +.byte 191,64,163,158,129,243,215,251 +.byte 124,227,57,130,155,47,255,135 +.byte 52,142,67,68,196,222,233,203 +.byte 84,123,148,50,166,194,35,61 +.byte 238,76,149,11,66,250,195,78 +.byte 8,46,161,102,40,217,36,178 +.byte 118,91,162,73,109,139,209,37 +.byte 114,248,246,100,134,104,152,22 +.byte 212,164,92,204,93,101,182,146 +.byte 108,112,72,80,253,237,185,218 +.byte 94,21,70,87,167,141,157,132 +.byte 144,216,171,0,140,188,211,10 +.byte 247,228,88,5,184,179,69,6 +.byte 208,44,30,143,202,63,15,2 +.byte 193,175,189,3,1,19,138,107 +.byte 58,145,17,65,79,103,220,234 +.byte 151,242,207,206,240,180,230,115 +.byte 150,172,116,34,231,173,53,133 +.byte 226,249,55,232,28,117,223,110 +.byte 71,241,26,113,29,41,197,137 +.byte 111,183,98,14,170,24,190,27 +.byte 252,86,62,75,198,210,121,32 +.byte 154,219,192,254,120,205,90,244 +.byte 31,221,168,51,136,7,199,49 +.byte 177,18,16,89,39,128,236,95 +.byte 96,81,127,169,25,181,74,13 +.byte 45,229,122,159,147,201,156,239 +.byte 160,224,59,77,174,42,245,176 +.byte 200,235,187,60,131,83,153,97 +.byte 23,43,4,126,186,119,214,38 +.byte 225,105,20,99,85,33,12,125 +.globl _AES_decrypt +.align 4 +_AES_decrypt: +L_AES_decrypt_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 20(%esp),%esi + movl 28(%esp),%edi + movl %esp,%eax + subl $36,%esp + andl $-64,%esp + leal -127(%edi),%ebx + subl %esp,%ebx + negl %ebx + andl $960,%ebx + subl %ebx,%esp + addl $4,%esp + movl %eax,28(%esp) + call L010pic_point +L010pic_point: + popl %ebp + movl L_OPENSSL_ia32cap_P$non_lazy_ptr-L010pic_point(%ebp),%eax + leal LAES_Td-L010pic_point(%ebp),%ebp + leal 764(%esp),%ebx + subl %ebp,%ebx + andl $768,%ebx + leal 2176(%ebp,%ebx,1),%ebp + btl $25,(%eax) + jnc L011x86 + movq (%esi),%mm0 + movq 8(%esi),%mm4 + call __sse_AES_decrypt_compact + movl 28(%esp),%esp + movl 24(%esp),%esi + movq %mm0,(%esi) + movq %mm4,8(%esi) + emms + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.align 4,0x90 +L011x86: + movl %ebp,24(%esp) + movl (%esi),%eax + movl 4(%esi),%ebx + movl 8(%esi),%ecx + movl 12(%esi),%edx + call __x86_AES_decrypt_compact + movl 28(%esp),%esp + movl 24(%esp),%esi + movl %eax,(%esi) + movl %ebx,4(%esi) + movl %ecx,8(%esi) + movl %edx,12(%esi) + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.globl _AES_cbc_encrypt +.align 4 +_AES_cbc_encrypt: +L_AES_cbc_encrypt_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 28(%esp),%ecx + cmpl $0,%ecx + je L012drop_out + call L013pic_point +L013pic_point: + popl %ebp + movl L_OPENSSL_ia32cap_P$non_lazy_ptr-L013pic_point(%ebp),%eax + cmpl $0,40(%esp) + leal LAES_Te-L013pic_point(%ebp),%ebp + jne L014picked_te + leal LAES_Td-LAES_Te(%ebp),%ebp +L014picked_te: + pushfl + cld + cmpl $512,%ecx + jb L015slow_way + testl $15,%ecx + jnz L015slow_way + btl $28,(%eax) + jc L015slow_way + leal -324(%esp),%esi + andl $-64,%esi + movl %ebp,%eax + leal 2304(%ebp),%ebx + movl %esi,%edx + andl $4095,%eax + andl $4095,%ebx + andl $4095,%edx + cmpl %ebx,%edx + jb L016tbl_break_out + subl %ebx,%edx + subl %edx,%esi + jmp L017tbl_ok +.align 2,0x90 +L016tbl_break_out: + subl %eax,%edx + andl $4095,%edx + addl $384,%edx + subl %edx,%esi +.align 2,0x90 +L017tbl_ok: + leal 24(%esp),%edx + xchgl %esi,%esp + addl $4,%esp + movl %ebp,24(%esp) + movl %esi,28(%esp) + movl (%edx),%eax + movl 4(%edx),%ebx + movl 12(%edx),%edi + movl 16(%edx),%esi + movl 20(%edx),%edx + movl %eax,32(%esp) + movl %ebx,36(%esp) + movl %ecx,40(%esp) + movl %edi,44(%esp) + movl %esi,48(%esp) + movl $0,316(%esp) + movl %edi,%ebx + movl $61,%ecx + subl %ebp,%ebx + movl %edi,%esi + andl $4095,%ebx + leal 76(%esp),%edi + cmpl $2304,%ebx + jb L018do_copy + cmpl $3852,%ebx + jb L019skip_copy +.align 2,0x90 +L018do_copy: + movl %edi,44(%esp) +.long 2784229001 +L019skip_copy: + movl $16,%edi +.align 2,0x90 +L020prefetch_tbl: + movl (%ebp),%eax + movl 32(%ebp),%ebx + movl 64(%ebp),%ecx + movl 96(%ebp),%esi + leal 128(%ebp),%ebp + subl $1,%edi + jnz L020prefetch_tbl + subl $2048,%ebp + movl 32(%esp),%esi + movl 48(%esp),%edi + cmpl $0,%edx + je L021fast_decrypt + movl (%edi),%eax + movl 4(%edi),%ebx +.align 4,0x90 +L022fast_enc_loop: + movl 8(%edi),%ecx + movl 12(%edi),%edx + xorl (%esi),%eax + xorl 4(%esi),%ebx + xorl 8(%esi),%ecx + xorl 12(%esi),%edx + movl 44(%esp),%edi + call __x86_AES_encrypt + movl 32(%esp),%esi + movl 36(%esp),%edi + movl %eax,(%edi) + movl %ebx,4(%edi) + movl %ecx,8(%edi) + movl %edx,12(%edi) + leal 16(%esi),%esi + movl 40(%esp),%ecx + movl %esi,32(%esp) + leal 16(%edi),%edx + movl %edx,36(%esp) + subl $16,%ecx + movl %ecx,40(%esp) + jnz L022fast_enc_loop + movl 48(%esp),%esi + movl 8(%edi),%ecx + movl 12(%edi),%edx + movl %eax,(%esi) + movl %ebx,4(%esi) + movl %ecx,8(%esi) + movl %edx,12(%esi) + cmpl $0,316(%esp) + movl 44(%esp),%edi + je L023skip_ezero + movl $60,%ecx + xorl %eax,%eax +.align 2,0x90 +.long 2884892297 +L023skip_ezero: + movl 28(%esp),%esp + popfl +L012drop_out: + popl %edi + popl %esi + popl %ebx + popl %ebp + ret + pushfl +.align 4,0x90 +L021fast_decrypt: + cmpl 36(%esp),%esi + je L024fast_dec_in_place + movl %edi,52(%esp) +.align 2,0x90 +.align 4,0x90 +L025fast_dec_loop: + movl (%esi),%eax + movl 4(%esi),%ebx + movl 8(%esi),%ecx + movl 12(%esi),%edx + movl 44(%esp),%edi + call __x86_AES_decrypt + movl 52(%esp),%edi + movl 40(%esp),%esi + xorl (%edi),%eax + xorl 4(%edi),%ebx + xorl 8(%edi),%ecx + xorl 12(%edi),%edx + movl 36(%esp),%edi + movl 32(%esp),%esi + movl %eax,(%edi) + movl %ebx,4(%edi) + movl %ecx,8(%edi) + movl %edx,12(%edi) + movl 40(%esp),%ecx + movl %esi,52(%esp) + leal 16(%esi),%esi + movl %esi,32(%esp) + leal 16(%edi),%edi + movl %edi,36(%esp) + subl $16,%ecx + movl %ecx,40(%esp) + jnz L025fast_dec_loop + movl 52(%esp),%edi + movl 48(%esp),%esi + movl (%edi),%eax + movl 4(%edi),%ebx + movl 8(%edi),%ecx + movl 12(%edi),%edx + movl %eax,(%esi) + movl %ebx,4(%esi) + movl %ecx,8(%esi) + movl %edx,12(%esi) + jmp L026fast_dec_out +.align 4,0x90 +L024fast_dec_in_place: +L027fast_dec_in_place_loop: + movl (%esi),%eax + movl 4(%esi),%ebx + movl 8(%esi),%ecx + movl 12(%esi),%edx + leal 60(%esp),%edi + movl %eax,(%edi) + movl %ebx,4(%edi) + movl %ecx,8(%edi) + movl %edx,12(%edi) + movl 44(%esp),%edi + call __x86_AES_decrypt + movl 48(%esp),%edi + movl 36(%esp),%esi + xorl (%edi),%eax + xorl 4(%edi),%ebx + xorl 8(%edi),%ecx + xorl 12(%edi),%edx + movl %eax,(%esi) + movl %ebx,4(%esi) + movl %ecx,8(%esi) + movl %edx,12(%esi) + leal 16(%esi),%esi + movl %esi,36(%esp) + leal 60(%esp),%esi + movl (%esi),%eax + movl 4(%esi),%ebx + movl 8(%esi),%ecx + movl 12(%esi),%edx + movl %eax,(%edi) + movl %ebx,4(%edi) + movl %ecx,8(%edi) + movl %edx,12(%edi) + movl 32(%esp),%esi + movl 40(%esp),%ecx + leal 16(%esi),%esi + movl %esi,32(%esp) + subl $16,%ecx + movl %ecx,40(%esp) + jnz L027fast_dec_in_place_loop +.align 2,0x90 +L026fast_dec_out: + cmpl $0,316(%esp) + movl 44(%esp),%edi + je L028skip_dzero + movl $60,%ecx + xorl %eax,%eax +.align 2,0x90 +.long 2884892297 +L028skip_dzero: + movl 28(%esp),%esp + popfl + popl %edi + popl %esi + popl %ebx + popl %ebp + ret + pushfl +.align 4,0x90 +L015slow_way: + movl (%eax),%eax + movl 36(%esp),%edi + leal -80(%esp),%esi + andl $-64,%esi + leal -143(%edi),%ebx + subl %esi,%ebx + negl %ebx + andl $960,%ebx + subl %ebx,%esi + leal 768(%esi),%ebx + subl %ebp,%ebx + andl $768,%ebx + leal 2176(%ebp,%ebx,1),%ebp + leal 24(%esp),%edx + xchgl %esi,%esp + addl $4,%esp + movl %ebp,24(%esp) + movl %esi,28(%esp) + movl %eax,52(%esp) + movl (%edx),%eax + movl 4(%edx),%ebx + movl 16(%edx),%esi + movl 20(%edx),%edx + movl %eax,32(%esp) + movl %ebx,36(%esp) + movl %ecx,40(%esp) + movl %edi,44(%esp) + movl %esi,48(%esp) + movl %esi,%edi + movl %eax,%esi + cmpl $0,%edx + je L029slow_decrypt + cmpl $16,%ecx + movl %ebx,%edx + jb L030slow_enc_tail + btl $25,52(%esp) + jnc L031slow_enc_x86 + movq (%edi),%mm0 + movq 8(%edi),%mm4 +.align 4,0x90 +L032slow_enc_loop_sse: + pxor (%esi),%mm0 + pxor 8(%esi),%mm4 + movl 44(%esp),%edi + call __sse_AES_encrypt_compact + movl 32(%esp),%esi + movl 36(%esp),%edi + movl 40(%esp),%ecx + movq %mm0,(%edi) + movq %mm4,8(%edi) + leal 16(%esi),%esi + movl %esi,32(%esp) + leal 16(%edi),%edx + movl %edx,36(%esp) + subl $16,%ecx + cmpl $16,%ecx + movl %ecx,40(%esp) + jae L032slow_enc_loop_sse + testl $15,%ecx + jnz L030slow_enc_tail + movl 48(%esp),%esi + movq %mm0,(%esi) + movq %mm4,8(%esi) + emms + movl 28(%esp),%esp + popfl + popl %edi + popl %esi + popl %ebx + popl %ebp + ret + pushfl +.align 4,0x90 +L031slow_enc_x86: + movl (%edi),%eax + movl 4(%edi),%ebx +.align 2,0x90 +L033slow_enc_loop_x86: + movl 8(%edi),%ecx + movl 12(%edi),%edx + xorl (%esi),%eax + xorl 4(%esi),%ebx + xorl 8(%esi),%ecx + xorl 12(%esi),%edx + movl 44(%esp),%edi + call __x86_AES_encrypt_compact + movl 32(%esp),%esi + movl 36(%esp),%edi + movl %eax,(%edi) + movl %ebx,4(%edi) + movl %ecx,8(%edi) + movl %edx,12(%edi) + movl 40(%esp),%ecx + leal 16(%esi),%esi + movl %esi,32(%esp) + leal 16(%edi),%edx + movl %edx,36(%esp) + subl $16,%ecx + cmpl $16,%ecx + movl %ecx,40(%esp) + jae L033slow_enc_loop_x86 + testl $15,%ecx + jnz L030slow_enc_tail + movl 48(%esp),%esi + movl 8(%edi),%ecx + movl 12(%edi),%edx + movl %eax,(%esi) + movl %ebx,4(%esi) + movl %ecx,8(%esi) + movl %edx,12(%esi) + movl 28(%esp),%esp + popfl + popl %edi + popl %esi + popl %ebx + popl %ebp + ret + pushfl +.align 4,0x90 +L030slow_enc_tail: + emms + movl %edx,%edi + movl $16,%ebx + subl %ecx,%ebx + cmpl %esi,%edi + je L034enc_in_place +.align 2,0x90 +.long 2767451785 + jmp L035enc_skip_in_place +L034enc_in_place: + leal (%edi,%ecx,1),%edi +L035enc_skip_in_place: + movl %ebx,%ecx + xorl %eax,%eax +.align 2,0x90 +.long 2868115081 + movl 48(%esp),%edi + movl %edx,%esi + movl (%edi),%eax + movl 4(%edi),%ebx + movl $16,40(%esp) + jmp L033slow_enc_loop_x86 +.align 4,0x90 +L029slow_decrypt: + btl $25,52(%esp) + jnc L036slow_dec_loop_x86 +.align 2,0x90 +L037slow_dec_loop_sse: + movq (%esi),%mm0 + movq 8(%esi),%mm4 + movl 44(%esp),%edi + call __sse_AES_decrypt_compact + movl 32(%esp),%esi + leal 60(%esp),%eax + movl 36(%esp),%ebx + movl 40(%esp),%ecx + movl 48(%esp),%edi + movq (%esi),%mm1 + movq 8(%esi),%mm5 + pxor (%edi),%mm0 + pxor 8(%edi),%mm4 + movq %mm1,(%edi) + movq %mm5,8(%edi) + subl $16,%ecx + jc L038slow_dec_partial_sse + movq %mm0,(%ebx) + movq %mm4,8(%ebx) + leal 16(%ebx),%ebx + movl %ebx,36(%esp) + leal 16(%esi),%esi + movl %esi,32(%esp) + movl %ecx,40(%esp) + jnz L037slow_dec_loop_sse + emms + movl 28(%esp),%esp + popfl + popl %edi + popl %esi + popl %ebx + popl %ebp + ret + pushfl +.align 4,0x90 +L038slow_dec_partial_sse: + movq %mm0,(%eax) + movq %mm4,8(%eax) + emms + addl $16,%ecx + movl %ebx,%edi + movl %eax,%esi +.align 2,0x90 +.long 2767451785 + movl 28(%esp),%esp + popfl + popl %edi + popl %esi + popl %ebx + popl %ebp + ret + pushfl +.align 4,0x90 +L036slow_dec_loop_x86: + movl (%esi),%eax + movl 4(%esi),%ebx + movl 8(%esi),%ecx + movl 12(%esi),%edx + leal 60(%esp),%edi + movl %eax,(%edi) + movl %ebx,4(%edi) + movl %ecx,8(%edi) + movl %edx,12(%edi) + movl 44(%esp),%edi + call __x86_AES_decrypt_compact + movl 48(%esp),%edi + movl 40(%esp),%esi + xorl (%edi),%eax + xorl 4(%edi),%ebx + xorl 8(%edi),%ecx + xorl 12(%edi),%edx + subl $16,%esi + jc L039slow_dec_partial_x86 + movl %esi,40(%esp) + movl 36(%esp),%esi + movl %eax,(%esi) + movl %ebx,4(%esi) + movl %ecx,8(%esi) + movl %edx,12(%esi) + leal 16(%esi),%esi + movl %esi,36(%esp) + leal 60(%esp),%esi + movl (%esi),%eax + movl 4(%esi),%ebx + movl 8(%esi),%ecx + movl 12(%esi),%edx + movl %eax,(%edi) + movl %ebx,4(%edi) + movl %ecx,8(%edi) + movl %edx,12(%edi) + movl 32(%esp),%esi + leal 16(%esi),%esi + movl %esi,32(%esp) + jnz L036slow_dec_loop_x86 + movl 28(%esp),%esp + popfl + popl %edi + popl %esi + popl %ebx + popl %ebp + ret + pushfl +.align 4,0x90 +L039slow_dec_partial_x86: + leal 60(%esp),%esi + movl %eax,(%esi) + movl %ebx,4(%esi) + movl %ecx,8(%esi) + movl %edx,12(%esi) + movl 32(%esp),%esi + movl (%esi),%eax + movl 4(%esi),%ebx + movl 8(%esi),%ecx + movl 12(%esi),%edx + movl %eax,(%edi) + movl %ebx,4(%edi) + movl %ecx,8(%edi) + movl %edx,12(%edi) + movl 40(%esp),%ecx + movl 36(%esp),%edi + leal 60(%esp),%esi +.align 2,0x90 +.long 2767451785 + movl 28(%esp),%esp + popfl + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.align 4 +__x86_AES_set_encrypt_key: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 24(%esp),%esi + movl 32(%esp),%edi + testl $-1,%esi + jz L040badpointer + testl $-1,%edi + jz L040badpointer + call L041pic_point +L041pic_point: + popl %ebp + leal LAES_Te-L041pic_point(%ebp),%ebp + leal 2176(%ebp),%ebp + movl -128(%ebp),%eax + movl -96(%ebp),%ebx + movl -64(%ebp),%ecx + movl -32(%ebp),%edx + movl (%ebp),%eax + movl 32(%ebp),%ebx + movl 64(%ebp),%ecx + movl 96(%ebp),%edx + movl 28(%esp),%ecx + cmpl $128,%ecx + je L04210rounds + cmpl $192,%ecx + je L04312rounds + cmpl $256,%ecx + je L04414rounds + movl $-2,%eax + jmp L045exit +L04210rounds: + movl (%esi),%eax + movl 4(%esi),%ebx + movl 8(%esi),%ecx + movl 12(%esi),%edx + movl %eax,(%edi) + movl %ebx,4(%edi) + movl %ecx,8(%edi) + movl %edx,12(%edi) + xorl %ecx,%ecx + jmp L04610shortcut +.align 2,0x90 +L04710loop: + movl (%edi),%eax + movl 12(%edi),%edx +L04610shortcut: + movzbl %dl,%esi + movzbl -128(%ebp,%esi,1),%ebx + movzbl %dh,%esi + shll $24,%ebx + xorl %ebx,%eax + movzbl -128(%ebp,%esi,1),%ebx + shrl $16,%edx + movzbl %dl,%esi + xorl %ebx,%eax + movzbl -128(%ebp,%esi,1),%ebx + movzbl %dh,%esi + shll $8,%ebx + xorl %ebx,%eax + movzbl -128(%ebp,%esi,1),%ebx + shll $16,%ebx + xorl %ebx,%eax + xorl 896(%ebp,%ecx,4),%eax + movl %eax,16(%edi) + xorl 4(%edi),%eax + movl %eax,20(%edi) + xorl 8(%edi),%eax + movl %eax,24(%edi) + xorl 12(%edi),%eax + movl %eax,28(%edi) + incl %ecx + addl $16,%edi + cmpl $10,%ecx + jl L04710loop + movl $10,80(%edi) + xorl %eax,%eax + jmp L045exit +L04312rounds: + movl (%esi),%eax + movl 4(%esi),%ebx + movl 8(%esi),%ecx + movl 12(%esi),%edx + movl %eax,(%edi) + movl %ebx,4(%edi) + movl %ecx,8(%edi) + movl %edx,12(%edi) + movl 16(%esi),%ecx + movl 20(%esi),%edx + movl %ecx,16(%edi) + movl %edx,20(%edi) + xorl %ecx,%ecx + jmp L04812shortcut +.align 2,0x90 +L04912loop: + movl (%edi),%eax + movl 20(%edi),%edx +L04812shortcut: + movzbl %dl,%esi + movzbl -128(%ebp,%esi,1),%ebx + movzbl %dh,%esi + shll $24,%ebx + xorl %ebx,%eax + movzbl -128(%ebp,%esi,1),%ebx + shrl $16,%edx + movzbl %dl,%esi + xorl %ebx,%eax + movzbl -128(%ebp,%esi,1),%ebx + movzbl %dh,%esi + shll $8,%ebx + xorl %ebx,%eax + movzbl -128(%ebp,%esi,1),%ebx + shll $16,%ebx + xorl %ebx,%eax + xorl 896(%ebp,%ecx,4),%eax + movl %eax,24(%edi) + xorl 4(%edi),%eax + movl %eax,28(%edi) + xorl 8(%edi),%eax + movl %eax,32(%edi) + xorl 12(%edi),%eax + movl %eax,36(%edi) + cmpl $7,%ecx + je L05012break + incl %ecx + xorl 16(%edi),%eax + movl %eax,40(%edi) + xorl 20(%edi),%eax + movl %eax,44(%edi) + addl $24,%edi + jmp L04912loop +L05012break: + movl $12,72(%edi) + xorl %eax,%eax + jmp L045exit +L04414rounds: + movl (%esi),%eax + movl 4(%esi),%ebx + movl 8(%esi),%ecx + movl 12(%esi),%edx + movl %eax,(%edi) + movl %ebx,4(%edi) + movl %ecx,8(%edi) + movl %edx,12(%edi) + movl 16(%esi),%eax + movl 20(%esi),%ebx + movl 24(%esi),%ecx + movl 28(%esi),%edx + movl %eax,16(%edi) + movl %ebx,20(%edi) + movl %ecx,24(%edi) + movl %edx,28(%edi) + xorl %ecx,%ecx + jmp L05114shortcut +.align 2,0x90 +L05214loop: + movl 28(%edi),%edx +L05114shortcut: + movl (%edi),%eax + movzbl %dl,%esi + movzbl -128(%ebp,%esi,1),%ebx + movzbl %dh,%esi + shll $24,%ebx + xorl %ebx,%eax + movzbl -128(%ebp,%esi,1),%ebx + shrl $16,%edx + movzbl %dl,%esi + xorl %ebx,%eax + movzbl -128(%ebp,%esi,1),%ebx + movzbl %dh,%esi + shll $8,%ebx + xorl %ebx,%eax + movzbl -128(%ebp,%esi,1),%ebx + shll $16,%ebx + xorl %ebx,%eax + xorl 896(%ebp,%ecx,4),%eax + movl %eax,32(%edi) + xorl 4(%edi),%eax + movl %eax,36(%edi) + xorl 8(%edi),%eax + movl %eax,40(%edi) + xorl 12(%edi),%eax + movl %eax,44(%edi) + cmpl $6,%ecx + je L05314break + incl %ecx + movl %eax,%edx + movl 16(%edi),%eax + movzbl %dl,%esi + movzbl -128(%ebp,%esi,1),%ebx + movzbl %dh,%esi + xorl %ebx,%eax + movzbl -128(%ebp,%esi,1),%ebx + shrl $16,%edx + shll $8,%ebx + movzbl %dl,%esi + xorl %ebx,%eax + movzbl -128(%ebp,%esi,1),%ebx + movzbl %dh,%esi + shll $16,%ebx + xorl %ebx,%eax + movzbl -128(%ebp,%esi,1),%ebx + shll $24,%ebx + xorl %ebx,%eax + movl %eax,48(%edi) + xorl 20(%edi),%eax + movl %eax,52(%edi) + xorl 24(%edi),%eax + movl %eax,56(%edi) + xorl 28(%edi),%eax + movl %eax,60(%edi) + addl $32,%edi + jmp L05214loop +L05314break: + movl $14,48(%edi) + xorl %eax,%eax + jmp L045exit +L040badpointer: + movl $-1,%eax +L045exit: + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.globl _private_AES_set_encrypt_key +.align 4 +_private_AES_set_encrypt_key: +L_private_AES_set_encrypt_key_begin: + call __x86_AES_set_encrypt_key + ret +.globl _private_AES_set_decrypt_key +.align 4 +_private_AES_set_decrypt_key: +L_private_AES_set_decrypt_key_begin: + call __x86_AES_set_encrypt_key + cmpl $0,%eax + je L054proceed + ret +L054proceed: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 28(%esp),%esi + movl 240(%esi),%ecx + leal (,%ecx,4),%ecx + leal (%esi,%ecx,4),%edi +.align 2,0x90 +L055invert: + movl (%esi),%eax + movl 4(%esi),%ebx + movl (%edi),%ecx + movl 4(%edi),%edx + movl %eax,(%edi) + movl %ebx,4(%edi) + movl %ecx,(%esi) + movl %edx,4(%esi) + movl 8(%esi),%eax + movl 12(%esi),%ebx + movl 8(%edi),%ecx + movl 12(%edi),%edx + movl %eax,8(%edi) + movl %ebx,12(%edi) + movl %ecx,8(%esi) + movl %edx,12(%esi) + addl $16,%esi + subl $16,%edi + cmpl %edi,%esi + jne L055invert + movl 28(%esp),%edi + movl 240(%edi),%esi + leal -2(%esi,%esi,1),%esi + leal (%edi,%esi,8),%esi + movl %esi,28(%esp) + movl 16(%edi),%eax +.align 2,0x90 +L056permute: + addl $16,%edi + movl $2155905152,%ebp + andl %eax,%ebp + leal (%eax,%eax,1),%ebx + movl %ebp,%esi + shrl $7,%ebp + subl %ebp,%esi + andl $4278124286,%ebx + andl $454761243,%esi + xorl %esi,%ebx + movl $2155905152,%ebp + andl %ebx,%ebp + leal (%ebx,%ebx,1),%ecx + movl %ebp,%esi + shrl $7,%ebp + subl %ebp,%esi + andl $4278124286,%ecx + andl $454761243,%esi + xorl %eax,%ebx + xorl %esi,%ecx + movl $2155905152,%ebp + andl %ecx,%ebp + leal (%ecx,%ecx,1),%edx + movl %ebp,%esi + shrl $7,%ebp + xorl %eax,%ecx + subl %ebp,%esi + andl $4278124286,%edx + andl $454761243,%esi + roll $8,%eax + xorl %esi,%edx + movl 4(%edi),%ebp + xorl %ebx,%eax + xorl %edx,%ebx + xorl %ecx,%eax + roll $24,%ebx + xorl %edx,%ecx + xorl %edx,%eax + roll $16,%ecx + xorl %ebx,%eax + roll $8,%edx + xorl %ecx,%eax + movl %ebp,%ebx + xorl %edx,%eax + movl %eax,(%edi) + movl $2155905152,%ebp + andl %ebx,%ebp + leal (%ebx,%ebx,1),%ecx + movl %ebp,%esi + shrl $7,%ebp + subl %ebp,%esi + andl $4278124286,%ecx + andl $454761243,%esi + xorl %esi,%ecx + movl $2155905152,%ebp + andl %ecx,%ebp + leal (%ecx,%ecx,1),%edx + movl %ebp,%esi + shrl $7,%ebp + subl %ebp,%esi + andl $4278124286,%edx + andl $454761243,%esi + xorl %ebx,%ecx + xorl %esi,%edx + movl $2155905152,%ebp + andl %edx,%ebp + leal (%edx,%edx,1),%eax + movl %ebp,%esi + shrl $7,%ebp + xorl %ebx,%edx + subl %ebp,%esi + andl $4278124286,%eax + andl $454761243,%esi + roll $8,%ebx + xorl %esi,%eax + movl 8(%edi),%ebp + xorl %ecx,%ebx + xorl %eax,%ecx + xorl %edx,%ebx + roll $24,%ecx + xorl %eax,%edx + xorl %eax,%ebx + roll $16,%edx + xorl %ecx,%ebx + roll $8,%eax + xorl %edx,%ebx + movl %ebp,%ecx + xorl %eax,%ebx + movl %ebx,4(%edi) + movl $2155905152,%ebp + andl %ecx,%ebp + leal (%ecx,%ecx,1),%edx + movl %ebp,%esi + shrl $7,%ebp + subl %ebp,%esi + andl $4278124286,%edx + andl $454761243,%esi + xorl %esi,%edx + movl $2155905152,%ebp + andl %edx,%ebp + leal (%edx,%edx,1),%eax + movl %ebp,%esi + shrl $7,%ebp + subl %ebp,%esi + andl $4278124286,%eax + andl $454761243,%esi + xorl %ecx,%edx + xorl %esi,%eax + movl $2155905152,%ebp + andl %eax,%ebp + leal (%eax,%eax,1),%ebx + movl %ebp,%esi + shrl $7,%ebp + xorl %ecx,%eax + subl %ebp,%esi + andl $4278124286,%ebx + andl $454761243,%esi + roll $8,%ecx + xorl %esi,%ebx + movl 12(%edi),%ebp + xorl %edx,%ecx + xorl %ebx,%edx + xorl %eax,%ecx + roll $24,%edx + xorl %ebx,%eax + xorl %ebx,%ecx + roll $16,%eax + xorl %edx,%ecx + roll $8,%ebx + xorl %eax,%ecx + movl %ebp,%edx + xorl %ebx,%ecx + movl %ecx,8(%edi) + movl $2155905152,%ebp + andl %edx,%ebp + leal (%edx,%edx,1),%eax + movl %ebp,%esi + shrl $7,%ebp + subl %ebp,%esi + andl $4278124286,%eax + andl $454761243,%esi + xorl %esi,%eax + movl $2155905152,%ebp + andl %eax,%ebp + leal (%eax,%eax,1),%ebx + movl %ebp,%esi + shrl $7,%ebp + subl %ebp,%esi + andl $4278124286,%ebx + andl $454761243,%esi + xorl %edx,%eax + xorl %esi,%ebx + movl $2155905152,%ebp + andl %ebx,%ebp + leal (%ebx,%ebx,1),%ecx + movl %ebp,%esi + shrl $7,%ebp + xorl %edx,%ebx + subl %ebp,%esi + andl $4278124286,%ecx + andl $454761243,%esi + roll $8,%edx + xorl %esi,%ecx + movl 16(%edi),%ebp + xorl %eax,%edx + xorl %ecx,%eax + xorl %ebx,%edx + roll $24,%eax + xorl %ecx,%ebx + xorl %ecx,%edx + roll $16,%ebx + xorl %eax,%edx + roll $8,%ecx + xorl %ebx,%edx + movl %ebp,%eax + xorl %ecx,%edx + movl %edx,12(%edi) + cmpl 28(%esp),%edi + jb L056permute + xorl %eax,%eax + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.byte 65,69,83,32,102,111,114,32,120,56,54,44,32,67,82,89 +.byte 80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114 +.byte 111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 +.section __IMPORT,__pointers,non_lazy_symbol_pointers +L_OPENSSL_ia32cap_P$non_lazy_ptr: +.indirect_symbol _OPENSSL_ia32cap_P +.long 0 +.comm _OPENSSL_ia32cap_P,16,2 diff --git a/deps/openssl/asm_obsolete/x86-macosx-gas/aes/aesni-x86.s b/deps/openssl/asm_obsolete/x86-macosx-gas/aes/aesni-x86.s new file mode 100644 index 00000000000000..cecd5f83f71e6d --- /dev/null +++ b/deps/openssl/asm_obsolete/x86-macosx-gas/aes/aesni-x86.s @@ -0,0 +1,2156 @@ +.file "../openssl/crypto/aes/asm/aesni-x86.s" +.text +.globl _aesni_encrypt +.align 4 +_aesni_encrypt: +L_aesni_encrypt_begin: + movl 4(%esp),%eax + movl 12(%esp),%edx + movups (%eax),%xmm2 + movl 240(%edx),%ecx + movl 8(%esp),%eax + movups (%edx),%xmm0 + movups 16(%edx),%xmm1 + leal 32(%edx),%edx + xorps %xmm0,%xmm2 +L000enc1_loop_1: +.byte 102,15,56,220,209 + decl %ecx + movups (%edx),%xmm1 + leal 16(%edx),%edx + jnz L000enc1_loop_1 +.byte 102,15,56,221,209 + movups %xmm2,(%eax) + ret +.globl _aesni_decrypt +.align 4 +_aesni_decrypt: +L_aesni_decrypt_begin: + movl 4(%esp),%eax + movl 12(%esp),%edx + movups (%eax),%xmm2 + movl 240(%edx),%ecx + movl 8(%esp),%eax + movups (%edx),%xmm0 + movups 16(%edx),%xmm1 + leal 32(%edx),%edx + xorps %xmm0,%xmm2 +L001dec1_loop_2: +.byte 102,15,56,222,209 + decl %ecx + movups (%edx),%xmm1 + leal 16(%edx),%edx + jnz L001dec1_loop_2 +.byte 102,15,56,223,209 + movups %xmm2,(%eax) + ret +.align 4 +__aesni_encrypt2: + movups (%edx),%xmm0 + shll $4,%ecx + movups 16(%edx),%xmm1 + xorps %xmm0,%xmm2 + pxor %xmm0,%xmm3 + movups 32(%edx),%xmm0 + leal 32(%edx,%ecx,1),%edx + negl %ecx + addl $16,%ecx +L002enc2_loop: +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 + movups (%edx,%ecx,1),%xmm1 + addl $32,%ecx +.byte 102,15,56,220,208 +.byte 102,15,56,220,216 + movups -16(%edx,%ecx,1),%xmm0 + jnz L002enc2_loop +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 +.byte 102,15,56,221,208 +.byte 102,15,56,221,216 + ret +.align 4 +__aesni_decrypt2: + movups (%edx),%xmm0 + shll $4,%ecx + movups 16(%edx),%xmm1 + xorps %xmm0,%xmm2 + pxor %xmm0,%xmm3 + movups 32(%edx),%xmm0 + leal 32(%edx,%ecx,1),%edx + negl %ecx + addl $16,%ecx +L003dec2_loop: +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 + movups (%edx,%ecx,1),%xmm1 + addl $32,%ecx +.byte 102,15,56,222,208 +.byte 102,15,56,222,216 + movups -16(%edx,%ecx,1),%xmm0 + jnz L003dec2_loop +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 +.byte 102,15,56,223,208 +.byte 102,15,56,223,216 + ret +.align 4 +__aesni_encrypt3: + movups (%edx),%xmm0 + shll $4,%ecx + movups 16(%edx),%xmm1 + xorps %xmm0,%xmm2 + pxor %xmm0,%xmm3 + pxor %xmm0,%xmm4 + movups 32(%edx),%xmm0 + leal 32(%edx,%ecx,1),%edx + negl %ecx + addl $16,%ecx +L004enc3_loop: +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 +.byte 102,15,56,220,225 + movups (%edx,%ecx,1),%xmm1 + addl $32,%ecx +.byte 102,15,56,220,208 +.byte 102,15,56,220,216 +.byte 102,15,56,220,224 + movups -16(%edx,%ecx,1),%xmm0 + jnz L004enc3_loop +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 +.byte 102,15,56,220,225 +.byte 102,15,56,221,208 +.byte 102,15,56,221,216 +.byte 102,15,56,221,224 + ret +.align 4 +__aesni_decrypt3: + movups (%edx),%xmm0 + shll $4,%ecx + movups 16(%edx),%xmm1 + xorps %xmm0,%xmm2 + pxor %xmm0,%xmm3 + pxor %xmm0,%xmm4 + movups 32(%edx),%xmm0 + leal 32(%edx,%ecx,1),%edx + negl %ecx + addl $16,%ecx +L005dec3_loop: +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 +.byte 102,15,56,222,225 + movups (%edx,%ecx,1),%xmm1 + addl $32,%ecx +.byte 102,15,56,222,208 +.byte 102,15,56,222,216 +.byte 102,15,56,222,224 + movups -16(%edx,%ecx,1),%xmm0 + jnz L005dec3_loop +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 +.byte 102,15,56,222,225 +.byte 102,15,56,223,208 +.byte 102,15,56,223,216 +.byte 102,15,56,223,224 + ret +.align 4 +__aesni_encrypt4: + movups (%edx),%xmm0 + movups 16(%edx),%xmm1 + shll $4,%ecx + xorps %xmm0,%xmm2 + pxor %xmm0,%xmm3 + pxor %xmm0,%xmm4 + pxor %xmm0,%xmm5 + movups 32(%edx),%xmm0 + leal 32(%edx,%ecx,1),%edx + negl %ecx +.byte 15,31,64,0 + addl $16,%ecx +L006enc4_loop: +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 + movups (%edx,%ecx,1),%xmm1 + addl $32,%ecx +.byte 102,15,56,220,208 +.byte 102,15,56,220,216 +.byte 102,15,56,220,224 +.byte 102,15,56,220,232 + movups -16(%edx,%ecx,1),%xmm0 + jnz L006enc4_loop +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 +.byte 102,15,56,221,208 +.byte 102,15,56,221,216 +.byte 102,15,56,221,224 +.byte 102,15,56,221,232 + ret +.align 4 +__aesni_decrypt4: + movups (%edx),%xmm0 + movups 16(%edx),%xmm1 + shll $4,%ecx + xorps %xmm0,%xmm2 + pxor %xmm0,%xmm3 + pxor %xmm0,%xmm4 + pxor %xmm0,%xmm5 + movups 32(%edx),%xmm0 + leal 32(%edx,%ecx,1),%edx + negl %ecx +.byte 15,31,64,0 + addl $16,%ecx +L007dec4_loop: +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 +.byte 102,15,56,222,225 +.byte 102,15,56,222,233 + movups (%edx,%ecx,1),%xmm1 + addl $32,%ecx +.byte 102,15,56,222,208 +.byte 102,15,56,222,216 +.byte 102,15,56,222,224 +.byte 102,15,56,222,232 + movups -16(%edx,%ecx,1),%xmm0 + jnz L007dec4_loop +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 +.byte 102,15,56,222,225 +.byte 102,15,56,222,233 +.byte 102,15,56,223,208 +.byte 102,15,56,223,216 +.byte 102,15,56,223,224 +.byte 102,15,56,223,232 + ret +.align 4 +__aesni_encrypt6: + movups (%edx),%xmm0 + shll $4,%ecx + movups 16(%edx),%xmm1 + xorps %xmm0,%xmm2 + pxor %xmm0,%xmm3 + pxor %xmm0,%xmm4 +.byte 102,15,56,220,209 + pxor %xmm0,%xmm5 + pxor %xmm0,%xmm6 +.byte 102,15,56,220,217 + leal 32(%edx,%ecx,1),%edx + negl %ecx +.byte 102,15,56,220,225 + pxor %xmm0,%xmm7 + addl $16,%ecx +.byte 102,15,56,220,233 +.byte 102,15,56,220,241 +.byte 102,15,56,220,249 + movups -16(%edx,%ecx,1),%xmm0 + jmp L_aesni_encrypt6_enter +.align 4,0x90 +L008enc6_loop: +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 +.byte 102,15,56,220,241 +.byte 102,15,56,220,249 +L_aesni_encrypt6_enter: + movups (%edx,%ecx,1),%xmm1 + addl $32,%ecx +.byte 102,15,56,220,208 +.byte 102,15,56,220,216 +.byte 102,15,56,220,224 +.byte 102,15,56,220,232 +.byte 102,15,56,220,240 +.byte 102,15,56,220,248 + movups -16(%edx,%ecx,1),%xmm0 + jnz L008enc6_loop +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 +.byte 102,15,56,220,241 +.byte 102,15,56,220,249 +.byte 102,15,56,221,208 +.byte 102,15,56,221,216 +.byte 102,15,56,221,224 +.byte 102,15,56,221,232 +.byte 102,15,56,221,240 +.byte 102,15,56,221,248 + ret +.align 4 +__aesni_decrypt6: + movups (%edx),%xmm0 + shll $4,%ecx + movups 16(%edx),%xmm1 + xorps %xmm0,%xmm2 + pxor %xmm0,%xmm3 + pxor %xmm0,%xmm4 +.byte 102,15,56,222,209 + pxor %xmm0,%xmm5 + pxor %xmm0,%xmm6 +.byte 102,15,56,222,217 + leal 32(%edx,%ecx,1),%edx + negl %ecx +.byte 102,15,56,222,225 + pxor %xmm0,%xmm7 + addl $16,%ecx +.byte 102,15,56,222,233 +.byte 102,15,56,222,241 +.byte 102,15,56,222,249 + movups -16(%edx,%ecx,1),%xmm0 + jmp L_aesni_decrypt6_enter +.align 4,0x90 +L009dec6_loop: +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 +.byte 102,15,56,222,225 +.byte 102,15,56,222,233 +.byte 102,15,56,222,241 +.byte 102,15,56,222,249 +L_aesni_decrypt6_enter: + movups (%edx,%ecx,1),%xmm1 + addl $32,%ecx +.byte 102,15,56,222,208 +.byte 102,15,56,222,216 +.byte 102,15,56,222,224 +.byte 102,15,56,222,232 +.byte 102,15,56,222,240 +.byte 102,15,56,222,248 + movups -16(%edx,%ecx,1),%xmm0 + jnz L009dec6_loop +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 +.byte 102,15,56,222,225 +.byte 102,15,56,222,233 +.byte 102,15,56,222,241 +.byte 102,15,56,222,249 +.byte 102,15,56,223,208 +.byte 102,15,56,223,216 +.byte 102,15,56,223,224 +.byte 102,15,56,223,232 +.byte 102,15,56,223,240 +.byte 102,15,56,223,248 + ret +.globl _aesni_ecb_encrypt +.align 4 +_aesni_ecb_encrypt: +L_aesni_ecb_encrypt_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 20(%esp),%esi + movl 24(%esp),%edi + movl 28(%esp),%eax + movl 32(%esp),%edx + movl 36(%esp),%ebx + andl $-16,%eax + jz L010ecb_ret + movl 240(%edx),%ecx + testl %ebx,%ebx + jz L011ecb_decrypt + movl %edx,%ebp + movl %ecx,%ebx + cmpl $96,%eax + jb L012ecb_enc_tail + movdqu (%esi),%xmm2 + movdqu 16(%esi),%xmm3 + movdqu 32(%esi),%xmm4 + movdqu 48(%esi),%xmm5 + movdqu 64(%esi),%xmm6 + movdqu 80(%esi),%xmm7 + leal 96(%esi),%esi + subl $96,%eax + jmp L013ecb_enc_loop6_enter +.align 4,0x90 +L014ecb_enc_loop6: + movups %xmm2,(%edi) + movdqu (%esi),%xmm2 + movups %xmm3,16(%edi) + movdqu 16(%esi),%xmm3 + movups %xmm4,32(%edi) + movdqu 32(%esi),%xmm4 + movups %xmm5,48(%edi) + movdqu 48(%esi),%xmm5 + movups %xmm6,64(%edi) + movdqu 64(%esi),%xmm6 + movups %xmm7,80(%edi) + leal 96(%edi),%edi + movdqu 80(%esi),%xmm7 + leal 96(%esi),%esi +L013ecb_enc_loop6_enter: + call __aesni_encrypt6 + movl %ebp,%edx + movl %ebx,%ecx + subl $96,%eax + jnc L014ecb_enc_loop6 + movups %xmm2,(%edi) + movups %xmm3,16(%edi) + movups %xmm4,32(%edi) + movups %xmm5,48(%edi) + movups %xmm6,64(%edi) + movups %xmm7,80(%edi) + leal 96(%edi),%edi + addl $96,%eax + jz L010ecb_ret +L012ecb_enc_tail: + movups (%esi),%xmm2 + cmpl $32,%eax + jb L015ecb_enc_one + movups 16(%esi),%xmm3 + je L016ecb_enc_two + movups 32(%esi),%xmm4 + cmpl $64,%eax + jb L017ecb_enc_three + movups 48(%esi),%xmm5 + je L018ecb_enc_four + movups 64(%esi),%xmm6 + xorps %xmm7,%xmm7 + call __aesni_encrypt6 + movups %xmm2,(%edi) + movups %xmm3,16(%edi) + movups %xmm4,32(%edi) + movups %xmm5,48(%edi) + movups %xmm6,64(%edi) + jmp L010ecb_ret +.align 4,0x90 +L015ecb_enc_one: + movups (%edx),%xmm0 + movups 16(%edx),%xmm1 + leal 32(%edx),%edx + xorps %xmm0,%xmm2 +L019enc1_loop_3: +.byte 102,15,56,220,209 + decl %ecx + movups (%edx),%xmm1 + leal 16(%edx),%edx + jnz L019enc1_loop_3 +.byte 102,15,56,221,209 + movups %xmm2,(%edi) + jmp L010ecb_ret +.align 4,0x90 +L016ecb_enc_two: + call __aesni_encrypt2 + movups %xmm2,(%edi) + movups %xmm3,16(%edi) + jmp L010ecb_ret +.align 4,0x90 +L017ecb_enc_three: + call __aesni_encrypt3 + movups %xmm2,(%edi) + movups %xmm3,16(%edi) + movups %xmm4,32(%edi) + jmp L010ecb_ret +.align 4,0x90 +L018ecb_enc_four: + call __aesni_encrypt4 + movups %xmm2,(%edi) + movups %xmm3,16(%edi) + movups %xmm4,32(%edi) + movups %xmm5,48(%edi) + jmp L010ecb_ret +.align 4,0x90 +L011ecb_decrypt: + movl %edx,%ebp + movl %ecx,%ebx + cmpl $96,%eax + jb L020ecb_dec_tail + movdqu (%esi),%xmm2 + movdqu 16(%esi),%xmm3 + movdqu 32(%esi),%xmm4 + movdqu 48(%esi),%xmm5 + movdqu 64(%esi),%xmm6 + movdqu 80(%esi),%xmm7 + leal 96(%esi),%esi + subl $96,%eax + jmp L021ecb_dec_loop6_enter +.align 4,0x90 +L022ecb_dec_loop6: + movups %xmm2,(%edi) + movdqu (%esi),%xmm2 + movups %xmm3,16(%edi) + movdqu 16(%esi),%xmm3 + movups %xmm4,32(%edi) + movdqu 32(%esi),%xmm4 + movups %xmm5,48(%edi) + movdqu 48(%esi),%xmm5 + movups %xmm6,64(%edi) + movdqu 64(%esi),%xmm6 + movups %xmm7,80(%edi) + leal 96(%edi),%edi + movdqu 80(%esi),%xmm7 + leal 96(%esi),%esi +L021ecb_dec_loop6_enter: + call __aesni_decrypt6 + movl %ebp,%edx + movl %ebx,%ecx + subl $96,%eax + jnc L022ecb_dec_loop6 + movups %xmm2,(%edi) + movups %xmm3,16(%edi) + movups %xmm4,32(%edi) + movups %xmm5,48(%edi) + movups %xmm6,64(%edi) + movups %xmm7,80(%edi) + leal 96(%edi),%edi + addl $96,%eax + jz L010ecb_ret +L020ecb_dec_tail: + movups (%esi),%xmm2 + cmpl $32,%eax + jb L023ecb_dec_one + movups 16(%esi),%xmm3 + je L024ecb_dec_two + movups 32(%esi),%xmm4 + cmpl $64,%eax + jb L025ecb_dec_three + movups 48(%esi),%xmm5 + je L026ecb_dec_four + movups 64(%esi),%xmm6 + xorps %xmm7,%xmm7 + call __aesni_decrypt6 + movups %xmm2,(%edi) + movups %xmm3,16(%edi) + movups %xmm4,32(%edi) + movups %xmm5,48(%edi) + movups %xmm6,64(%edi) + jmp L010ecb_ret +.align 4,0x90 +L023ecb_dec_one: + movups (%edx),%xmm0 + movups 16(%edx),%xmm1 + leal 32(%edx),%edx + xorps %xmm0,%xmm2 +L027dec1_loop_4: +.byte 102,15,56,222,209 + decl %ecx + movups (%edx),%xmm1 + leal 16(%edx),%edx + jnz L027dec1_loop_4 +.byte 102,15,56,223,209 + movups %xmm2,(%edi) + jmp L010ecb_ret +.align 4,0x90 +L024ecb_dec_two: + call __aesni_decrypt2 + movups %xmm2,(%edi) + movups %xmm3,16(%edi) + jmp L010ecb_ret +.align 4,0x90 +L025ecb_dec_three: + call __aesni_decrypt3 + movups %xmm2,(%edi) + movups %xmm3,16(%edi) + movups %xmm4,32(%edi) + jmp L010ecb_ret +.align 4,0x90 +L026ecb_dec_four: + call __aesni_decrypt4 + movups %xmm2,(%edi) + movups %xmm3,16(%edi) + movups %xmm4,32(%edi) + movups %xmm5,48(%edi) +L010ecb_ret: + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.globl _aesni_ccm64_encrypt_blocks +.align 4 +_aesni_ccm64_encrypt_blocks: +L_aesni_ccm64_encrypt_blocks_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 20(%esp),%esi + movl 24(%esp),%edi + movl 28(%esp),%eax + movl 32(%esp),%edx + movl 36(%esp),%ebx + movl 40(%esp),%ecx + movl %esp,%ebp + subl $60,%esp + andl $-16,%esp + movl %ebp,48(%esp) + movdqu (%ebx),%xmm7 + movdqu (%ecx),%xmm3 + movl 240(%edx),%ecx + movl $202182159,(%esp) + movl $134810123,4(%esp) + movl $67438087,8(%esp) + movl $66051,12(%esp) + movl $1,%ebx + xorl %ebp,%ebp + movl %ebx,16(%esp) + movl %ebp,20(%esp) + movl %ebp,24(%esp) + movl %ebp,28(%esp) + shll $4,%ecx + movl $16,%ebx + leal (%edx),%ebp + movdqa (%esp),%xmm5 + movdqa %xmm7,%xmm2 + leal 32(%edx,%ecx,1),%edx + subl %ecx,%ebx +.byte 102,15,56,0,253 +L028ccm64_enc_outer: + movups (%ebp),%xmm0 + movl %ebx,%ecx + movups (%esi),%xmm6 + xorps %xmm0,%xmm2 + movups 16(%ebp),%xmm1 + xorps %xmm6,%xmm0 + xorps %xmm0,%xmm3 + movups 32(%ebp),%xmm0 +L029ccm64_enc2_loop: +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 + movups (%edx,%ecx,1),%xmm1 + addl $32,%ecx +.byte 102,15,56,220,208 +.byte 102,15,56,220,216 + movups -16(%edx,%ecx,1),%xmm0 + jnz L029ccm64_enc2_loop +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 + paddq 16(%esp),%xmm7 + decl %eax +.byte 102,15,56,221,208 +.byte 102,15,56,221,216 + leal 16(%esi),%esi + xorps %xmm2,%xmm6 + movdqa %xmm7,%xmm2 + movups %xmm6,(%edi) +.byte 102,15,56,0,213 + leal 16(%edi),%edi + jnz L028ccm64_enc_outer + movl 48(%esp),%esp + movl 40(%esp),%edi + movups %xmm3,(%edi) + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.globl _aesni_ccm64_decrypt_blocks +.align 4 +_aesni_ccm64_decrypt_blocks: +L_aesni_ccm64_decrypt_blocks_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 20(%esp),%esi + movl 24(%esp),%edi + movl 28(%esp),%eax + movl 32(%esp),%edx + movl 36(%esp),%ebx + movl 40(%esp),%ecx + movl %esp,%ebp + subl $60,%esp + andl $-16,%esp + movl %ebp,48(%esp) + movdqu (%ebx),%xmm7 + movdqu (%ecx),%xmm3 + movl 240(%edx),%ecx + movl $202182159,(%esp) + movl $134810123,4(%esp) + movl $67438087,8(%esp) + movl $66051,12(%esp) + movl $1,%ebx + xorl %ebp,%ebp + movl %ebx,16(%esp) + movl %ebp,20(%esp) + movl %ebp,24(%esp) + movl %ebp,28(%esp) + movdqa (%esp),%xmm5 + movdqa %xmm7,%xmm2 + movl %edx,%ebp + movl %ecx,%ebx +.byte 102,15,56,0,253 + movups (%edx),%xmm0 + movups 16(%edx),%xmm1 + leal 32(%edx),%edx + xorps %xmm0,%xmm2 +L030enc1_loop_5: +.byte 102,15,56,220,209 + decl %ecx + movups (%edx),%xmm1 + leal 16(%edx),%edx + jnz L030enc1_loop_5 +.byte 102,15,56,221,209 + shll $4,%ebx + movl $16,%ecx + movups (%esi),%xmm6 + paddq 16(%esp),%xmm7 + leal 16(%esi),%esi + subl %ebx,%ecx + leal 32(%ebp,%ebx,1),%edx + movl %ecx,%ebx + jmp L031ccm64_dec_outer +.align 4,0x90 +L031ccm64_dec_outer: + xorps %xmm2,%xmm6 + movdqa %xmm7,%xmm2 + movups %xmm6,(%edi) + leal 16(%edi),%edi +.byte 102,15,56,0,213 + subl $1,%eax + jz L032ccm64_dec_break + movups (%ebp),%xmm0 + movl %ebx,%ecx + movups 16(%ebp),%xmm1 + xorps %xmm0,%xmm6 + xorps %xmm0,%xmm2 + xorps %xmm6,%xmm3 + movups 32(%ebp),%xmm0 +L033ccm64_dec2_loop: +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 + movups (%edx,%ecx,1),%xmm1 + addl $32,%ecx +.byte 102,15,56,220,208 +.byte 102,15,56,220,216 + movups -16(%edx,%ecx,1),%xmm0 + jnz L033ccm64_dec2_loop + movups (%esi),%xmm6 + paddq 16(%esp),%xmm7 +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 +.byte 102,15,56,221,208 +.byte 102,15,56,221,216 + leal 16(%esi),%esi + jmp L031ccm64_dec_outer +.align 4,0x90 +L032ccm64_dec_break: + movl 240(%ebp),%ecx + movl %ebp,%edx + movups (%edx),%xmm0 + movups 16(%edx),%xmm1 + xorps %xmm0,%xmm6 + leal 32(%edx),%edx + xorps %xmm6,%xmm3 +L034enc1_loop_6: +.byte 102,15,56,220,217 + decl %ecx + movups (%edx),%xmm1 + leal 16(%edx),%edx + jnz L034enc1_loop_6 +.byte 102,15,56,221,217 + movl 48(%esp),%esp + movl 40(%esp),%edi + movups %xmm3,(%edi) + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.globl _aesni_ctr32_encrypt_blocks +.align 4 +_aesni_ctr32_encrypt_blocks: +L_aesni_ctr32_encrypt_blocks_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 20(%esp),%esi + movl 24(%esp),%edi + movl 28(%esp),%eax + movl 32(%esp),%edx + movl 36(%esp),%ebx + movl %esp,%ebp + subl $88,%esp + andl $-16,%esp + movl %ebp,80(%esp) + cmpl $1,%eax + je L035ctr32_one_shortcut + movdqu (%ebx),%xmm7 + movl $202182159,(%esp) + movl $134810123,4(%esp) + movl $67438087,8(%esp) + movl $66051,12(%esp) + movl $6,%ecx + xorl %ebp,%ebp + movl %ecx,16(%esp) + movl %ecx,20(%esp) + movl %ecx,24(%esp) + movl %ebp,28(%esp) +.byte 102,15,58,22,251,3 +.byte 102,15,58,34,253,3 + movl 240(%edx),%ecx + bswap %ebx + pxor %xmm0,%xmm0 + pxor %xmm1,%xmm1 + movdqa (%esp),%xmm2 +.byte 102,15,58,34,195,0 + leal 3(%ebx),%ebp +.byte 102,15,58,34,205,0 + incl %ebx +.byte 102,15,58,34,195,1 + incl %ebp +.byte 102,15,58,34,205,1 + incl %ebx +.byte 102,15,58,34,195,2 + incl %ebp +.byte 102,15,58,34,205,2 + movdqa %xmm0,48(%esp) +.byte 102,15,56,0,194 + movdqu (%edx),%xmm6 + movdqa %xmm1,64(%esp) +.byte 102,15,56,0,202 + pshufd $192,%xmm0,%xmm2 + pshufd $128,%xmm0,%xmm3 + cmpl $6,%eax + jb L036ctr32_tail + pxor %xmm6,%xmm7 + shll $4,%ecx + movl $16,%ebx + movdqa %xmm7,32(%esp) + movl %edx,%ebp + subl %ecx,%ebx + leal 32(%edx,%ecx,1),%edx + subl $6,%eax + jmp L037ctr32_loop6 +.align 4,0x90 +L037ctr32_loop6: + pshufd $64,%xmm0,%xmm4 + movdqa 32(%esp),%xmm0 + pshufd $192,%xmm1,%xmm5 + pxor %xmm0,%xmm2 + pshufd $128,%xmm1,%xmm6 + pxor %xmm0,%xmm3 + pshufd $64,%xmm1,%xmm7 + movups 16(%ebp),%xmm1 + pxor %xmm0,%xmm4 + pxor %xmm0,%xmm5 +.byte 102,15,56,220,209 + pxor %xmm0,%xmm6 + pxor %xmm0,%xmm7 +.byte 102,15,56,220,217 + movups 32(%ebp),%xmm0 + movl %ebx,%ecx +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 +.byte 102,15,56,220,241 +.byte 102,15,56,220,249 + call L_aesni_encrypt6_enter + movups (%esi),%xmm1 + movups 16(%esi),%xmm0 + xorps %xmm1,%xmm2 + movups 32(%esi),%xmm1 + xorps %xmm0,%xmm3 + movups %xmm2,(%edi) + movdqa 16(%esp),%xmm0 + xorps %xmm1,%xmm4 + movdqa 64(%esp),%xmm1 + movups %xmm3,16(%edi) + movups %xmm4,32(%edi) + paddd %xmm0,%xmm1 + paddd 48(%esp),%xmm0 + movdqa (%esp),%xmm2 + movups 48(%esi),%xmm3 + movups 64(%esi),%xmm4 + xorps %xmm3,%xmm5 + movups 80(%esi),%xmm3 + leal 96(%esi),%esi + movdqa %xmm0,48(%esp) +.byte 102,15,56,0,194 + xorps %xmm4,%xmm6 + movups %xmm5,48(%edi) + xorps %xmm3,%xmm7 + movdqa %xmm1,64(%esp) +.byte 102,15,56,0,202 + movups %xmm6,64(%edi) + pshufd $192,%xmm0,%xmm2 + movups %xmm7,80(%edi) + leal 96(%edi),%edi + pshufd $128,%xmm0,%xmm3 + subl $6,%eax + jnc L037ctr32_loop6 + addl $6,%eax + jz L038ctr32_ret + movdqu (%ebp),%xmm7 + movl %ebp,%edx + pxor 32(%esp),%xmm7 + movl 240(%ebp),%ecx +L036ctr32_tail: + por %xmm7,%xmm2 + cmpl $2,%eax + jb L039ctr32_one + pshufd $64,%xmm0,%xmm4 + por %xmm7,%xmm3 + je L040ctr32_two + pshufd $192,%xmm1,%xmm5 + por %xmm7,%xmm4 + cmpl $4,%eax + jb L041ctr32_three + pshufd $128,%xmm1,%xmm6 + por %xmm7,%xmm5 + je L042ctr32_four + por %xmm7,%xmm6 + call __aesni_encrypt6 + movups (%esi),%xmm1 + movups 16(%esi),%xmm0 + xorps %xmm1,%xmm2 + movups 32(%esi),%xmm1 + xorps %xmm0,%xmm3 + movups 48(%esi),%xmm0 + xorps %xmm1,%xmm4 + movups 64(%esi),%xmm1 + xorps %xmm0,%xmm5 + movups %xmm2,(%edi) + xorps %xmm1,%xmm6 + movups %xmm3,16(%edi) + movups %xmm4,32(%edi) + movups %xmm5,48(%edi) + movups %xmm6,64(%edi) + jmp L038ctr32_ret +.align 4,0x90 +L035ctr32_one_shortcut: + movups (%ebx),%xmm2 + movl 240(%edx),%ecx +L039ctr32_one: + movups (%edx),%xmm0 + movups 16(%edx),%xmm1 + leal 32(%edx),%edx + xorps %xmm0,%xmm2 +L043enc1_loop_7: +.byte 102,15,56,220,209 + decl %ecx + movups (%edx),%xmm1 + leal 16(%edx),%edx + jnz L043enc1_loop_7 +.byte 102,15,56,221,209 + movups (%esi),%xmm6 + xorps %xmm2,%xmm6 + movups %xmm6,(%edi) + jmp L038ctr32_ret +.align 4,0x90 +L040ctr32_two: + call __aesni_encrypt2 + movups (%esi),%xmm5 + movups 16(%esi),%xmm6 + xorps %xmm5,%xmm2 + xorps %xmm6,%xmm3 + movups %xmm2,(%edi) + movups %xmm3,16(%edi) + jmp L038ctr32_ret +.align 4,0x90 +L041ctr32_three: + call __aesni_encrypt3 + movups (%esi),%xmm5 + movups 16(%esi),%xmm6 + xorps %xmm5,%xmm2 + movups 32(%esi),%xmm7 + xorps %xmm6,%xmm3 + movups %xmm2,(%edi) + xorps %xmm7,%xmm4 + movups %xmm3,16(%edi) + movups %xmm4,32(%edi) + jmp L038ctr32_ret +.align 4,0x90 +L042ctr32_four: + call __aesni_encrypt4 + movups (%esi),%xmm6 + movups 16(%esi),%xmm7 + movups 32(%esi),%xmm1 + xorps %xmm6,%xmm2 + movups 48(%esi),%xmm0 + xorps %xmm7,%xmm3 + movups %xmm2,(%edi) + xorps %xmm1,%xmm4 + movups %xmm3,16(%edi) + xorps %xmm0,%xmm5 + movups %xmm4,32(%edi) + movups %xmm5,48(%edi) +L038ctr32_ret: + movl 80(%esp),%esp + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.globl _aesni_xts_encrypt +.align 4 +_aesni_xts_encrypt: +L_aesni_xts_encrypt_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 36(%esp),%edx + movl 40(%esp),%esi + movl 240(%edx),%ecx + movups (%esi),%xmm2 + movups (%edx),%xmm0 + movups 16(%edx),%xmm1 + leal 32(%edx),%edx + xorps %xmm0,%xmm2 +L044enc1_loop_8: +.byte 102,15,56,220,209 + decl %ecx + movups (%edx),%xmm1 + leal 16(%edx),%edx + jnz L044enc1_loop_8 +.byte 102,15,56,221,209 + movl 20(%esp),%esi + movl 24(%esp),%edi + movl 28(%esp),%eax + movl 32(%esp),%edx + movl %esp,%ebp + subl $120,%esp + movl 240(%edx),%ecx + andl $-16,%esp + movl $135,96(%esp) + movl $0,100(%esp) + movl $1,104(%esp) + movl $0,108(%esp) + movl %eax,112(%esp) + movl %ebp,116(%esp) + movdqa %xmm2,%xmm1 + pxor %xmm0,%xmm0 + movdqa 96(%esp),%xmm3 + pcmpgtd %xmm1,%xmm0 + andl $-16,%eax + movl %edx,%ebp + movl %ecx,%ebx + subl $96,%eax + jc L045xts_enc_short + shll $4,%ecx + movl $16,%ebx + subl %ecx,%ebx + leal 32(%edx,%ecx,1),%edx + jmp L046xts_enc_loop6 +.align 4,0x90 +L046xts_enc_loop6: + pshufd $19,%xmm0,%xmm2 + pxor %xmm0,%xmm0 + movdqa %xmm1,(%esp) + paddq %xmm1,%xmm1 + pand %xmm3,%xmm2 + pcmpgtd %xmm1,%xmm0 + pxor %xmm2,%xmm1 + pshufd $19,%xmm0,%xmm2 + pxor %xmm0,%xmm0 + movdqa %xmm1,16(%esp) + paddq %xmm1,%xmm1 + pand %xmm3,%xmm2 + pcmpgtd %xmm1,%xmm0 + pxor %xmm2,%xmm1 + pshufd $19,%xmm0,%xmm2 + pxor %xmm0,%xmm0 + movdqa %xmm1,32(%esp) + paddq %xmm1,%xmm1 + pand %xmm3,%xmm2 + pcmpgtd %xmm1,%xmm0 + pxor %xmm2,%xmm1 + pshufd $19,%xmm0,%xmm2 + pxor %xmm0,%xmm0 + movdqa %xmm1,48(%esp) + paddq %xmm1,%xmm1 + pand %xmm3,%xmm2 + pcmpgtd %xmm1,%xmm0 + pxor %xmm2,%xmm1 + pshufd $19,%xmm0,%xmm7 + movdqa %xmm1,64(%esp) + paddq %xmm1,%xmm1 + movups (%ebp),%xmm0 + pand %xmm3,%xmm7 + movups (%esi),%xmm2 + pxor %xmm1,%xmm7 + movl %ebx,%ecx + movdqu 16(%esi),%xmm3 + xorps %xmm0,%xmm2 + movdqu 32(%esi),%xmm4 + pxor %xmm0,%xmm3 + movdqu 48(%esi),%xmm5 + pxor %xmm0,%xmm4 + movdqu 64(%esi),%xmm6 + pxor %xmm0,%xmm5 + movdqu 80(%esi),%xmm1 + pxor %xmm0,%xmm6 + leal 96(%esi),%esi + pxor (%esp),%xmm2 + movdqa %xmm7,80(%esp) + pxor %xmm1,%xmm7 + movups 16(%ebp),%xmm1 + pxor 16(%esp),%xmm3 + pxor 32(%esp),%xmm4 +.byte 102,15,56,220,209 + pxor 48(%esp),%xmm5 + pxor 64(%esp),%xmm6 +.byte 102,15,56,220,217 + pxor %xmm0,%xmm7 + movups 32(%ebp),%xmm0 +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 +.byte 102,15,56,220,241 +.byte 102,15,56,220,249 + call L_aesni_encrypt6_enter + movdqa 80(%esp),%xmm1 + pxor %xmm0,%xmm0 + xorps (%esp),%xmm2 + pcmpgtd %xmm1,%xmm0 + xorps 16(%esp),%xmm3 + movups %xmm2,(%edi) + xorps 32(%esp),%xmm4 + movups %xmm3,16(%edi) + xorps 48(%esp),%xmm5 + movups %xmm4,32(%edi) + xorps 64(%esp),%xmm6 + movups %xmm5,48(%edi) + xorps %xmm1,%xmm7 + movups %xmm6,64(%edi) + pshufd $19,%xmm0,%xmm2 + movups %xmm7,80(%edi) + leal 96(%edi),%edi + movdqa 96(%esp),%xmm3 + pxor %xmm0,%xmm0 + paddq %xmm1,%xmm1 + pand %xmm3,%xmm2 + pcmpgtd %xmm1,%xmm0 + pxor %xmm2,%xmm1 + subl $96,%eax + jnc L046xts_enc_loop6 + movl 240(%ebp),%ecx + movl %ebp,%edx + movl %ecx,%ebx +L045xts_enc_short: + addl $96,%eax + jz L047xts_enc_done6x + movdqa %xmm1,%xmm5 + cmpl $32,%eax + jb L048xts_enc_one + pshufd $19,%xmm0,%xmm2 + pxor %xmm0,%xmm0 + paddq %xmm1,%xmm1 + pand %xmm3,%xmm2 + pcmpgtd %xmm1,%xmm0 + pxor %xmm2,%xmm1 + je L049xts_enc_two + pshufd $19,%xmm0,%xmm2 + pxor %xmm0,%xmm0 + movdqa %xmm1,%xmm6 + paddq %xmm1,%xmm1 + pand %xmm3,%xmm2 + pcmpgtd %xmm1,%xmm0 + pxor %xmm2,%xmm1 + cmpl $64,%eax + jb L050xts_enc_three + pshufd $19,%xmm0,%xmm2 + pxor %xmm0,%xmm0 + movdqa %xmm1,%xmm7 + paddq %xmm1,%xmm1 + pand %xmm3,%xmm2 + pcmpgtd %xmm1,%xmm0 + pxor %xmm2,%xmm1 + movdqa %xmm5,(%esp) + movdqa %xmm6,16(%esp) + je L051xts_enc_four + movdqa %xmm7,32(%esp) + pshufd $19,%xmm0,%xmm7 + movdqa %xmm1,48(%esp) + paddq %xmm1,%xmm1 + pand %xmm3,%xmm7 + pxor %xmm1,%xmm7 + movdqu (%esi),%xmm2 + movdqu 16(%esi),%xmm3 + movdqu 32(%esi),%xmm4 + pxor (%esp),%xmm2 + movdqu 48(%esi),%xmm5 + pxor 16(%esp),%xmm3 + movdqu 64(%esi),%xmm6 + pxor 32(%esp),%xmm4 + leal 80(%esi),%esi + pxor 48(%esp),%xmm5 + movdqa %xmm7,64(%esp) + pxor %xmm7,%xmm6 + call __aesni_encrypt6 + movaps 64(%esp),%xmm1 + xorps (%esp),%xmm2 + xorps 16(%esp),%xmm3 + xorps 32(%esp),%xmm4 + movups %xmm2,(%edi) + xorps 48(%esp),%xmm5 + movups %xmm3,16(%edi) + xorps %xmm1,%xmm6 + movups %xmm4,32(%edi) + movups %xmm5,48(%edi) + movups %xmm6,64(%edi) + leal 80(%edi),%edi + jmp L052xts_enc_done +.align 4,0x90 +L048xts_enc_one: + movups (%esi),%xmm2 + leal 16(%esi),%esi + xorps %xmm5,%xmm2 + movups (%edx),%xmm0 + movups 16(%edx),%xmm1 + leal 32(%edx),%edx + xorps %xmm0,%xmm2 +L053enc1_loop_9: +.byte 102,15,56,220,209 + decl %ecx + movups (%edx),%xmm1 + leal 16(%edx),%edx + jnz L053enc1_loop_9 +.byte 102,15,56,221,209 + xorps %xmm5,%xmm2 + movups %xmm2,(%edi) + leal 16(%edi),%edi + movdqa %xmm5,%xmm1 + jmp L052xts_enc_done +.align 4,0x90 +L049xts_enc_two: + movaps %xmm1,%xmm6 + movups (%esi),%xmm2 + movups 16(%esi),%xmm3 + leal 32(%esi),%esi + xorps %xmm5,%xmm2 + xorps %xmm6,%xmm3 + call __aesni_encrypt2 + xorps %xmm5,%xmm2 + xorps %xmm6,%xmm3 + movups %xmm2,(%edi) + movups %xmm3,16(%edi) + leal 32(%edi),%edi + movdqa %xmm6,%xmm1 + jmp L052xts_enc_done +.align 4,0x90 +L050xts_enc_three: + movaps %xmm1,%xmm7 + movups (%esi),%xmm2 + movups 16(%esi),%xmm3 + movups 32(%esi),%xmm4 + leal 48(%esi),%esi + xorps %xmm5,%xmm2 + xorps %xmm6,%xmm3 + xorps %xmm7,%xmm4 + call __aesni_encrypt3 + xorps %xmm5,%xmm2 + xorps %xmm6,%xmm3 + xorps %xmm7,%xmm4 + movups %xmm2,(%edi) + movups %xmm3,16(%edi) + movups %xmm4,32(%edi) + leal 48(%edi),%edi + movdqa %xmm7,%xmm1 + jmp L052xts_enc_done +.align 4,0x90 +L051xts_enc_four: + movaps %xmm1,%xmm6 + movups (%esi),%xmm2 + movups 16(%esi),%xmm3 + movups 32(%esi),%xmm4 + xorps (%esp),%xmm2 + movups 48(%esi),%xmm5 + leal 64(%esi),%esi + xorps 16(%esp),%xmm3 + xorps %xmm7,%xmm4 + xorps %xmm6,%xmm5 + call __aesni_encrypt4 + xorps (%esp),%xmm2 + xorps 16(%esp),%xmm3 + xorps %xmm7,%xmm4 + movups %xmm2,(%edi) + xorps %xmm6,%xmm5 + movups %xmm3,16(%edi) + movups %xmm4,32(%edi) + movups %xmm5,48(%edi) + leal 64(%edi),%edi + movdqa %xmm6,%xmm1 + jmp L052xts_enc_done +.align 4,0x90 +L047xts_enc_done6x: + movl 112(%esp),%eax + andl $15,%eax + jz L054xts_enc_ret + movdqa %xmm1,%xmm5 + movl %eax,112(%esp) + jmp L055xts_enc_steal +.align 4,0x90 +L052xts_enc_done: + movl 112(%esp),%eax + pxor %xmm0,%xmm0 + andl $15,%eax + jz L054xts_enc_ret + pcmpgtd %xmm1,%xmm0 + movl %eax,112(%esp) + pshufd $19,%xmm0,%xmm5 + paddq %xmm1,%xmm1 + pand 96(%esp),%xmm5 + pxor %xmm1,%xmm5 +L055xts_enc_steal: + movzbl (%esi),%ecx + movzbl -16(%edi),%edx + leal 1(%esi),%esi + movb %cl,-16(%edi) + movb %dl,(%edi) + leal 1(%edi),%edi + subl $1,%eax + jnz L055xts_enc_steal + subl 112(%esp),%edi + movl %ebp,%edx + movl %ebx,%ecx + movups -16(%edi),%xmm2 + xorps %xmm5,%xmm2 + movups (%edx),%xmm0 + movups 16(%edx),%xmm1 + leal 32(%edx),%edx + xorps %xmm0,%xmm2 +L056enc1_loop_10: +.byte 102,15,56,220,209 + decl %ecx + movups (%edx),%xmm1 + leal 16(%edx),%edx + jnz L056enc1_loop_10 +.byte 102,15,56,221,209 + xorps %xmm5,%xmm2 + movups %xmm2,-16(%edi) +L054xts_enc_ret: + movl 116(%esp),%esp + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.globl _aesni_xts_decrypt +.align 4 +_aesni_xts_decrypt: +L_aesni_xts_decrypt_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 36(%esp),%edx + movl 40(%esp),%esi + movl 240(%edx),%ecx + movups (%esi),%xmm2 + movups (%edx),%xmm0 + movups 16(%edx),%xmm1 + leal 32(%edx),%edx + xorps %xmm0,%xmm2 +L057enc1_loop_11: +.byte 102,15,56,220,209 + decl %ecx + movups (%edx),%xmm1 + leal 16(%edx),%edx + jnz L057enc1_loop_11 +.byte 102,15,56,221,209 + movl 20(%esp),%esi + movl 24(%esp),%edi + movl 28(%esp),%eax + movl 32(%esp),%edx + movl %esp,%ebp + subl $120,%esp + andl $-16,%esp + xorl %ebx,%ebx + testl $15,%eax + setnz %bl + shll $4,%ebx + subl %ebx,%eax + movl $135,96(%esp) + movl $0,100(%esp) + movl $1,104(%esp) + movl $0,108(%esp) + movl %eax,112(%esp) + movl %ebp,116(%esp) + movl 240(%edx),%ecx + movl %edx,%ebp + movl %ecx,%ebx + movdqa %xmm2,%xmm1 + pxor %xmm0,%xmm0 + movdqa 96(%esp),%xmm3 + pcmpgtd %xmm1,%xmm0 + andl $-16,%eax + subl $96,%eax + jc L058xts_dec_short + shll $4,%ecx + movl $16,%ebx + subl %ecx,%ebx + leal 32(%edx,%ecx,1),%edx + jmp L059xts_dec_loop6 +.align 4,0x90 +L059xts_dec_loop6: + pshufd $19,%xmm0,%xmm2 + pxor %xmm0,%xmm0 + movdqa %xmm1,(%esp) + paddq %xmm1,%xmm1 + pand %xmm3,%xmm2 + pcmpgtd %xmm1,%xmm0 + pxor %xmm2,%xmm1 + pshufd $19,%xmm0,%xmm2 + pxor %xmm0,%xmm0 + movdqa %xmm1,16(%esp) + paddq %xmm1,%xmm1 + pand %xmm3,%xmm2 + pcmpgtd %xmm1,%xmm0 + pxor %xmm2,%xmm1 + pshufd $19,%xmm0,%xmm2 + pxor %xmm0,%xmm0 + movdqa %xmm1,32(%esp) + paddq %xmm1,%xmm1 + pand %xmm3,%xmm2 + pcmpgtd %xmm1,%xmm0 + pxor %xmm2,%xmm1 + pshufd $19,%xmm0,%xmm2 + pxor %xmm0,%xmm0 + movdqa %xmm1,48(%esp) + paddq %xmm1,%xmm1 + pand %xmm3,%xmm2 + pcmpgtd %xmm1,%xmm0 + pxor %xmm2,%xmm1 + pshufd $19,%xmm0,%xmm7 + movdqa %xmm1,64(%esp) + paddq %xmm1,%xmm1 + movups (%ebp),%xmm0 + pand %xmm3,%xmm7 + movups (%esi),%xmm2 + pxor %xmm1,%xmm7 + movl %ebx,%ecx + movdqu 16(%esi),%xmm3 + xorps %xmm0,%xmm2 + movdqu 32(%esi),%xmm4 + pxor %xmm0,%xmm3 + movdqu 48(%esi),%xmm5 + pxor %xmm0,%xmm4 + movdqu 64(%esi),%xmm6 + pxor %xmm0,%xmm5 + movdqu 80(%esi),%xmm1 + pxor %xmm0,%xmm6 + leal 96(%esi),%esi + pxor (%esp),%xmm2 + movdqa %xmm7,80(%esp) + pxor %xmm1,%xmm7 + movups 16(%ebp),%xmm1 + pxor 16(%esp),%xmm3 + pxor 32(%esp),%xmm4 +.byte 102,15,56,222,209 + pxor 48(%esp),%xmm5 + pxor 64(%esp),%xmm6 +.byte 102,15,56,222,217 + pxor %xmm0,%xmm7 + movups 32(%ebp),%xmm0 +.byte 102,15,56,222,225 +.byte 102,15,56,222,233 +.byte 102,15,56,222,241 +.byte 102,15,56,222,249 + call L_aesni_decrypt6_enter + movdqa 80(%esp),%xmm1 + pxor %xmm0,%xmm0 + xorps (%esp),%xmm2 + pcmpgtd %xmm1,%xmm0 + xorps 16(%esp),%xmm3 + movups %xmm2,(%edi) + xorps 32(%esp),%xmm4 + movups %xmm3,16(%edi) + xorps 48(%esp),%xmm5 + movups %xmm4,32(%edi) + xorps 64(%esp),%xmm6 + movups %xmm5,48(%edi) + xorps %xmm1,%xmm7 + movups %xmm6,64(%edi) + pshufd $19,%xmm0,%xmm2 + movups %xmm7,80(%edi) + leal 96(%edi),%edi + movdqa 96(%esp),%xmm3 + pxor %xmm0,%xmm0 + paddq %xmm1,%xmm1 + pand %xmm3,%xmm2 + pcmpgtd %xmm1,%xmm0 + pxor %xmm2,%xmm1 + subl $96,%eax + jnc L059xts_dec_loop6 + movl 240(%ebp),%ecx + movl %ebp,%edx + movl %ecx,%ebx +L058xts_dec_short: + addl $96,%eax + jz L060xts_dec_done6x + movdqa %xmm1,%xmm5 + cmpl $32,%eax + jb L061xts_dec_one + pshufd $19,%xmm0,%xmm2 + pxor %xmm0,%xmm0 + paddq %xmm1,%xmm1 + pand %xmm3,%xmm2 + pcmpgtd %xmm1,%xmm0 + pxor %xmm2,%xmm1 + je L062xts_dec_two + pshufd $19,%xmm0,%xmm2 + pxor %xmm0,%xmm0 + movdqa %xmm1,%xmm6 + paddq %xmm1,%xmm1 + pand %xmm3,%xmm2 + pcmpgtd %xmm1,%xmm0 + pxor %xmm2,%xmm1 + cmpl $64,%eax + jb L063xts_dec_three + pshufd $19,%xmm0,%xmm2 + pxor %xmm0,%xmm0 + movdqa %xmm1,%xmm7 + paddq %xmm1,%xmm1 + pand %xmm3,%xmm2 + pcmpgtd %xmm1,%xmm0 + pxor %xmm2,%xmm1 + movdqa %xmm5,(%esp) + movdqa %xmm6,16(%esp) + je L064xts_dec_four + movdqa %xmm7,32(%esp) + pshufd $19,%xmm0,%xmm7 + movdqa %xmm1,48(%esp) + paddq %xmm1,%xmm1 + pand %xmm3,%xmm7 + pxor %xmm1,%xmm7 + movdqu (%esi),%xmm2 + movdqu 16(%esi),%xmm3 + movdqu 32(%esi),%xmm4 + pxor (%esp),%xmm2 + movdqu 48(%esi),%xmm5 + pxor 16(%esp),%xmm3 + movdqu 64(%esi),%xmm6 + pxor 32(%esp),%xmm4 + leal 80(%esi),%esi + pxor 48(%esp),%xmm5 + movdqa %xmm7,64(%esp) + pxor %xmm7,%xmm6 + call __aesni_decrypt6 + movaps 64(%esp),%xmm1 + xorps (%esp),%xmm2 + xorps 16(%esp),%xmm3 + xorps 32(%esp),%xmm4 + movups %xmm2,(%edi) + xorps 48(%esp),%xmm5 + movups %xmm3,16(%edi) + xorps %xmm1,%xmm6 + movups %xmm4,32(%edi) + movups %xmm5,48(%edi) + movups %xmm6,64(%edi) + leal 80(%edi),%edi + jmp L065xts_dec_done +.align 4,0x90 +L061xts_dec_one: + movups (%esi),%xmm2 + leal 16(%esi),%esi + xorps %xmm5,%xmm2 + movups (%edx),%xmm0 + movups 16(%edx),%xmm1 + leal 32(%edx),%edx + xorps %xmm0,%xmm2 +L066dec1_loop_12: +.byte 102,15,56,222,209 + decl %ecx + movups (%edx),%xmm1 + leal 16(%edx),%edx + jnz L066dec1_loop_12 +.byte 102,15,56,223,209 + xorps %xmm5,%xmm2 + movups %xmm2,(%edi) + leal 16(%edi),%edi + movdqa %xmm5,%xmm1 + jmp L065xts_dec_done +.align 4,0x90 +L062xts_dec_two: + movaps %xmm1,%xmm6 + movups (%esi),%xmm2 + movups 16(%esi),%xmm3 + leal 32(%esi),%esi + xorps %xmm5,%xmm2 + xorps %xmm6,%xmm3 + call __aesni_decrypt2 + xorps %xmm5,%xmm2 + xorps %xmm6,%xmm3 + movups %xmm2,(%edi) + movups %xmm3,16(%edi) + leal 32(%edi),%edi + movdqa %xmm6,%xmm1 + jmp L065xts_dec_done +.align 4,0x90 +L063xts_dec_three: + movaps %xmm1,%xmm7 + movups (%esi),%xmm2 + movups 16(%esi),%xmm3 + movups 32(%esi),%xmm4 + leal 48(%esi),%esi + xorps %xmm5,%xmm2 + xorps %xmm6,%xmm3 + xorps %xmm7,%xmm4 + call __aesni_decrypt3 + xorps %xmm5,%xmm2 + xorps %xmm6,%xmm3 + xorps %xmm7,%xmm4 + movups %xmm2,(%edi) + movups %xmm3,16(%edi) + movups %xmm4,32(%edi) + leal 48(%edi),%edi + movdqa %xmm7,%xmm1 + jmp L065xts_dec_done +.align 4,0x90 +L064xts_dec_four: + movaps %xmm1,%xmm6 + movups (%esi),%xmm2 + movups 16(%esi),%xmm3 + movups 32(%esi),%xmm4 + xorps (%esp),%xmm2 + movups 48(%esi),%xmm5 + leal 64(%esi),%esi + xorps 16(%esp),%xmm3 + xorps %xmm7,%xmm4 + xorps %xmm6,%xmm5 + call __aesni_decrypt4 + xorps (%esp),%xmm2 + xorps 16(%esp),%xmm3 + xorps %xmm7,%xmm4 + movups %xmm2,(%edi) + xorps %xmm6,%xmm5 + movups %xmm3,16(%edi) + movups %xmm4,32(%edi) + movups %xmm5,48(%edi) + leal 64(%edi),%edi + movdqa %xmm6,%xmm1 + jmp L065xts_dec_done +.align 4,0x90 +L060xts_dec_done6x: + movl 112(%esp),%eax + andl $15,%eax + jz L067xts_dec_ret + movl %eax,112(%esp) + jmp L068xts_dec_only_one_more +.align 4,0x90 +L065xts_dec_done: + movl 112(%esp),%eax + pxor %xmm0,%xmm0 + andl $15,%eax + jz L067xts_dec_ret + pcmpgtd %xmm1,%xmm0 + movl %eax,112(%esp) + pshufd $19,%xmm0,%xmm2 + pxor %xmm0,%xmm0 + movdqa 96(%esp),%xmm3 + paddq %xmm1,%xmm1 + pand %xmm3,%xmm2 + pcmpgtd %xmm1,%xmm0 + pxor %xmm2,%xmm1 +L068xts_dec_only_one_more: + pshufd $19,%xmm0,%xmm5 + movdqa %xmm1,%xmm6 + paddq %xmm1,%xmm1 + pand %xmm3,%xmm5 + pxor %xmm1,%xmm5 + movl %ebp,%edx + movl %ebx,%ecx + movups (%esi),%xmm2 + xorps %xmm5,%xmm2 + movups (%edx),%xmm0 + movups 16(%edx),%xmm1 + leal 32(%edx),%edx + xorps %xmm0,%xmm2 +L069dec1_loop_13: +.byte 102,15,56,222,209 + decl %ecx + movups (%edx),%xmm1 + leal 16(%edx),%edx + jnz L069dec1_loop_13 +.byte 102,15,56,223,209 + xorps %xmm5,%xmm2 + movups %xmm2,(%edi) +L070xts_dec_steal: + movzbl 16(%esi),%ecx + movzbl (%edi),%edx + leal 1(%esi),%esi + movb %cl,(%edi) + movb %dl,16(%edi) + leal 1(%edi),%edi + subl $1,%eax + jnz L070xts_dec_steal + subl 112(%esp),%edi + movl %ebp,%edx + movl %ebx,%ecx + movups (%edi),%xmm2 + xorps %xmm6,%xmm2 + movups (%edx),%xmm0 + movups 16(%edx),%xmm1 + leal 32(%edx),%edx + xorps %xmm0,%xmm2 +L071dec1_loop_14: +.byte 102,15,56,222,209 + decl %ecx + movups (%edx),%xmm1 + leal 16(%edx),%edx + jnz L071dec1_loop_14 +.byte 102,15,56,223,209 + xorps %xmm6,%xmm2 + movups %xmm2,(%edi) +L067xts_dec_ret: + movl 116(%esp),%esp + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.globl _aesni_cbc_encrypt +.align 4 +_aesni_cbc_encrypt: +L_aesni_cbc_encrypt_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 20(%esp),%esi + movl %esp,%ebx + movl 24(%esp),%edi + subl $24,%ebx + movl 28(%esp),%eax + andl $-16,%ebx + movl 32(%esp),%edx + movl 36(%esp),%ebp + testl %eax,%eax + jz L072cbc_abort + cmpl $0,40(%esp) + xchgl %esp,%ebx + movups (%ebp),%xmm7 + movl 240(%edx),%ecx + movl %edx,%ebp + movl %ebx,16(%esp) + movl %ecx,%ebx + je L073cbc_decrypt + movaps %xmm7,%xmm2 + cmpl $16,%eax + jb L074cbc_enc_tail + subl $16,%eax + jmp L075cbc_enc_loop +.align 4,0x90 +L075cbc_enc_loop: + movups (%esi),%xmm7 + leal 16(%esi),%esi + movups (%edx),%xmm0 + movups 16(%edx),%xmm1 + xorps %xmm0,%xmm7 + leal 32(%edx),%edx + xorps %xmm7,%xmm2 +L076enc1_loop_15: +.byte 102,15,56,220,209 + decl %ecx + movups (%edx),%xmm1 + leal 16(%edx),%edx + jnz L076enc1_loop_15 +.byte 102,15,56,221,209 + movl %ebx,%ecx + movl %ebp,%edx + movups %xmm2,(%edi) + leal 16(%edi),%edi + subl $16,%eax + jnc L075cbc_enc_loop + addl $16,%eax + jnz L074cbc_enc_tail + movaps %xmm2,%xmm7 + jmp L077cbc_ret +L074cbc_enc_tail: + movl %eax,%ecx +.long 2767451785 + movl $16,%ecx + subl %eax,%ecx + xorl %eax,%eax +.long 2868115081 + leal -16(%edi),%edi + movl %ebx,%ecx + movl %edi,%esi + movl %ebp,%edx + jmp L075cbc_enc_loop +.align 4,0x90 +L073cbc_decrypt: + cmpl $80,%eax + jbe L078cbc_dec_tail + movaps %xmm7,(%esp) + subl $80,%eax + jmp L079cbc_dec_loop6_enter +.align 4,0x90 +L080cbc_dec_loop6: + movaps %xmm0,(%esp) + movups %xmm7,(%edi) + leal 16(%edi),%edi +L079cbc_dec_loop6_enter: + movdqu (%esi),%xmm2 + movdqu 16(%esi),%xmm3 + movdqu 32(%esi),%xmm4 + movdqu 48(%esi),%xmm5 + movdqu 64(%esi),%xmm6 + movdqu 80(%esi),%xmm7 + call __aesni_decrypt6 + movups (%esi),%xmm1 + movups 16(%esi),%xmm0 + xorps (%esp),%xmm2 + xorps %xmm1,%xmm3 + movups 32(%esi),%xmm1 + xorps %xmm0,%xmm4 + movups 48(%esi),%xmm0 + xorps %xmm1,%xmm5 + movups 64(%esi),%xmm1 + xorps %xmm0,%xmm6 + movups 80(%esi),%xmm0 + xorps %xmm1,%xmm7 + movups %xmm2,(%edi) + movups %xmm3,16(%edi) + leal 96(%esi),%esi + movups %xmm4,32(%edi) + movl %ebx,%ecx + movups %xmm5,48(%edi) + movl %ebp,%edx + movups %xmm6,64(%edi) + leal 80(%edi),%edi + subl $96,%eax + ja L080cbc_dec_loop6 + movaps %xmm7,%xmm2 + movaps %xmm0,%xmm7 + addl $80,%eax + jle L081cbc_dec_tail_collected + movups %xmm2,(%edi) + leal 16(%edi),%edi +L078cbc_dec_tail: + movups (%esi),%xmm2 + movaps %xmm2,%xmm6 + cmpl $16,%eax + jbe L082cbc_dec_one + movups 16(%esi),%xmm3 + movaps %xmm3,%xmm5 + cmpl $32,%eax + jbe L083cbc_dec_two + movups 32(%esi),%xmm4 + cmpl $48,%eax + jbe L084cbc_dec_three + movups 48(%esi),%xmm5 + cmpl $64,%eax + jbe L085cbc_dec_four + movups 64(%esi),%xmm6 + movaps %xmm7,(%esp) + movups (%esi),%xmm2 + xorps %xmm7,%xmm7 + call __aesni_decrypt6 + movups (%esi),%xmm1 + movups 16(%esi),%xmm0 + xorps (%esp),%xmm2 + xorps %xmm1,%xmm3 + movups 32(%esi),%xmm1 + xorps %xmm0,%xmm4 + movups 48(%esi),%xmm0 + xorps %xmm1,%xmm5 + movups 64(%esi),%xmm7 + xorps %xmm0,%xmm6 + movups %xmm2,(%edi) + movups %xmm3,16(%edi) + movups %xmm4,32(%edi) + movups %xmm5,48(%edi) + leal 64(%edi),%edi + movaps %xmm6,%xmm2 + subl $80,%eax + jmp L081cbc_dec_tail_collected +.align 4,0x90 +L082cbc_dec_one: + movups (%edx),%xmm0 + movups 16(%edx),%xmm1 + leal 32(%edx),%edx + xorps %xmm0,%xmm2 +L086dec1_loop_16: +.byte 102,15,56,222,209 + decl %ecx + movups (%edx),%xmm1 + leal 16(%edx),%edx + jnz L086dec1_loop_16 +.byte 102,15,56,223,209 + xorps %xmm7,%xmm2 + movaps %xmm6,%xmm7 + subl $16,%eax + jmp L081cbc_dec_tail_collected +.align 4,0x90 +L083cbc_dec_two: + call __aesni_decrypt2 + xorps %xmm7,%xmm2 + xorps %xmm6,%xmm3 + movups %xmm2,(%edi) + movaps %xmm3,%xmm2 + leal 16(%edi),%edi + movaps %xmm5,%xmm7 + subl $32,%eax + jmp L081cbc_dec_tail_collected +.align 4,0x90 +L084cbc_dec_three: + call __aesni_decrypt3 + xorps %xmm7,%xmm2 + xorps %xmm6,%xmm3 + xorps %xmm5,%xmm4 + movups %xmm2,(%edi) + movaps %xmm4,%xmm2 + movups %xmm3,16(%edi) + leal 32(%edi),%edi + movups 32(%esi),%xmm7 + subl $48,%eax + jmp L081cbc_dec_tail_collected +.align 4,0x90 +L085cbc_dec_four: + call __aesni_decrypt4 + movups 16(%esi),%xmm1 + movups 32(%esi),%xmm0 + xorps %xmm7,%xmm2 + movups 48(%esi),%xmm7 + xorps %xmm6,%xmm3 + movups %xmm2,(%edi) + xorps %xmm1,%xmm4 + movups %xmm3,16(%edi) + xorps %xmm0,%xmm5 + movups %xmm4,32(%edi) + leal 48(%edi),%edi + movaps %xmm5,%xmm2 + subl $64,%eax +L081cbc_dec_tail_collected: + andl $15,%eax + jnz L087cbc_dec_tail_partial + movups %xmm2,(%edi) + jmp L077cbc_ret +.align 4,0x90 +L087cbc_dec_tail_partial: + movaps %xmm2,(%esp) + movl $16,%ecx + movl %esp,%esi + subl %eax,%ecx +.long 2767451785 +L077cbc_ret: + movl 16(%esp),%esp + movl 36(%esp),%ebp + movups %xmm7,(%ebp) +L072cbc_abort: + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.align 4 +__aesni_set_encrypt_key: + testl %eax,%eax + jz L088bad_pointer + testl %edx,%edx + jz L088bad_pointer + movups (%eax),%xmm0 + xorps %xmm4,%xmm4 + leal 16(%edx),%edx + cmpl $256,%ecx + je L08914rounds + cmpl $192,%ecx + je L09012rounds + cmpl $128,%ecx + jne L091bad_keybits +.align 4,0x90 +L09210rounds: + movl $9,%ecx + movups %xmm0,-16(%edx) +.byte 102,15,58,223,200,1 + call L093key_128_cold +.byte 102,15,58,223,200,2 + call L094key_128 +.byte 102,15,58,223,200,4 + call L094key_128 +.byte 102,15,58,223,200,8 + call L094key_128 +.byte 102,15,58,223,200,16 + call L094key_128 +.byte 102,15,58,223,200,32 + call L094key_128 +.byte 102,15,58,223,200,64 + call L094key_128 +.byte 102,15,58,223,200,128 + call L094key_128 +.byte 102,15,58,223,200,27 + call L094key_128 +.byte 102,15,58,223,200,54 + call L094key_128 + movups %xmm0,(%edx) + movl %ecx,80(%edx) + xorl %eax,%eax + ret +.align 4,0x90 +L094key_128: + movups %xmm0,(%edx) + leal 16(%edx),%edx +L093key_128_cold: + shufps $16,%xmm0,%xmm4 + xorps %xmm4,%xmm0 + shufps $140,%xmm0,%xmm4 + xorps %xmm4,%xmm0 + shufps $255,%xmm1,%xmm1 + xorps %xmm1,%xmm0 + ret +.align 4,0x90 +L09012rounds: + movq 16(%eax),%xmm2 + movl $11,%ecx + movups %xmm0,-16(%edx) +.byte 102,15,58,223,202,1 + call L095key_192a_cold +.byte 102,15,58,223,202,2 + call L096key_192b +.byte 102,15,58,223,202,4 + call L097key_192a +.byte 102,15,58,223,202,8 + call L096key_192b +.byte 102,15,58,223,202,16 + call L097key_192a +.byte 102,15,58,223,202,32 + call L096key_192b +.byte 102,15,58,223,202,64 + call L097key_192a +.byte 102,15,58,223,202,128 + call L096key_192b + movups %xmm0,(%edx) + movl %ecx,48(%edx) + xorl %eax,%eax + ret +.align 4,0x90 +L097key_192a: + movups %xmm0,(%edx) + leal 16(%edx),%edx +.align 4,0x90 +L095key_192a_cold: + movaps %xmm2,%xmm5 +L098key_192b_warm: + shufps $16,%xmm0,%xmm4 + movdqa %xmm2,%xmm3 + xorps %xmm4,%xmm0 + shufps $140,%xmm0,%xmm4 + pslldq $4,%xmm3 + xorps %xmm4,%xmm0 + pshufd $85,%xmm1,%xmm1 + pxor %xmm3,%xmm2 + pxor %xmm1,%xmm0 + pshufd $255,%xmm0,%xmm3 + pxor %xmm3,%xmm2 + ret +.align 4,0x90 +L096key_192b: + movaps %xmm0,%xmm3 + shufps $68,%xmm0,%xmm5 + movups %xmm5,(%edx) + shufps $78,%xmm2,%xmm3 + movups %xmm3,16(%edx) + leal 32(%edx),%edx + jmp L098key_192b_warm +.align 4,0x90 +L08914rounds: + movups 16(%eax),%xmm2 + movl $13,%ecx + leal 16(%edx),%edx + movups %xmm0,-32(%edx) + movups %xmm2,-16(%edx) +.byte 102,15,58,223,202,1 + call L099key_256a_cold +.byte 102,15,58,223,200,1 + call L100key_256b +.byte 102,15,58,223,202,2 + call L101key_256a +.byte 102,15,58,223,200,2 + call L100key_256b +.byte 102,15,58,223,202,4 + call L101key_256a +.byte 102,15,58,223,200,4 + call L100key_256b +.byte 102,15,58,223,202,8 + call L101key_256a +.byte 102,15,58,223,200,8 + call L100key_256b +.byte 102,15,58,223,202,16 + call L101key_256a +.byte 102,15,58,223,200,16 + call L100key_256b +.byte 102,15,58,223,202,32 + call L101key_256a +.byte 102,15,58,223,200,32 + call L100key_256b +.byte 102,15,58,223,202,64 + call L101key_256a + movups %xmm0,(%edx) + movl %ecx,16(%edx) + xorl %eax,%eax + ret +.align 4,0x90 +L101key_256a: + movups %xmm2,(%edx) + leal 16(%edx),%edx +L099key_256a_cold: + shufps $16,%xmm0,%xmm4 + xorps %xmm4,%xmm0 + shufps $140,%xmm0,%xmm4 + xorps %xmm4,%xmm0 + shufps $255,%xmm1,%xmm1 + xorps %xmm1,%xmm0 + ret +.align 4,0x90 +L100key_256b: + movups %xmm0,(%edx) + leal 16(%edx),%edx + shufps $16,%xmm2,%xmm4 + xorps %xmm4,%xmm2 + shufps $140,%xmm2,%xmm4 + xorps %xmm4,%xmm2 + shufps $170,%xmm1,%xmm1 + xorps %xmm1,%xmm2 + ret +.align 2,0x90 +L088bad_pointer: + movl $-1,%eax + ret +.align 2,0x90 +L091bad_keybits: + movl $-2,%eax + ret +.globl _aesni_set_encrypt_key +.align 4 +_aesni_set_encrypt_key: +L_aesni_set_encrypt_key_begin: + movl 4(%esp),%eax + movl 8(%esp),%ecx + movl 12(%esp),%edx + call __aesni_set_encrypt_key + ret +.globl _aesni_set_decrypt_key +.align 4 +_aesni_set_decrypt_key: +L_aesni_set_decrypt_key_begin: + movl 4(%esp),%eax + movl 8(%esp),%ecx + movl 12(%esp),%edx + call __aesni_set_encrypt_key + movl 12(%esp),%edx + shll $4,%ecx + testl %eax,%eax + jnz L102dec_key_ret + leal 16(%edx,%ecx,1),%eax + movups (%edx),%xmm0 + movups (%eax),%xmm1 + movups %xmm0,(%eax) + movups %xmm1,(%edx) + leal 16(%edx),%edx + leal -16(%eax),%eax +L103dec_key_inverse: + movups (%edx),%xmm0 + movups (%eax),%xmm1 +.byte 102,15,56,219,192 +.byte 102,15,56,219,201 + leal 16(%edx),%edx + leal -16(%eax),%eax + movups %xmm0,16(%eax) + movups %xmm1,-16(%edx) + cmpl %edx,%eax + ja L103dec_key_inverse + movups (%edx),%xmm0 +.byte 102,15,56,219,192 + movups %xmm0,(%edx) + xorl %eax,%eax +L102dec_key_ret: + ret +.byte 65,69,83,32,102,111,114,32,73,110,116,101,108,32,65,69 +.byte 83,45,78,73,44,32,67,82,89,80,84,79,71,65,77,83 +.byte 32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115 +.byte 115,108,46,111,114,103,62,0 diff --git a/deps/openssl/asm_obsolete/x86-macosx-gas/aes/vpaes-x86.s b/deps/openssl/asm_obsolete/x86-macosx-gas/aes/vpaes-x86.s new file mode 100644 index 00000000000000..81f7af82388417 --- /dev/null +++ b/deps/openssl/asm_obsolete/x86-macosx-gas/aes/vpaes-x86.s @@ -0,0 +1,635 @@ +.file "vpaes-x86.s" +.text +.align 6,0x90 +L_vpaes_consts: +.long 218628480,235210255,168496130,67568393 +.long 252381056,17041926,33884169,51187212 +.long 252645135,252645135,252645135,252645135 +.long 1512730624,3266504856,1377990664,3401244816 +.long 830229760,1275146365,2969422977,3447763452 +.long 3411033600,2979783055,338359620,2782886510 +.long 4209124096,907596821,221174255,1006095553 +.long 191964160,3799684038,3164090317,1589111125 +.long 182528256,1777043520,2877432650,3265356744 +.long 1874708224,3503451415,3305285752,363511674 +.long 1606117888,3487855781,1093350906,2384367825 +.long 197121,67569157,134941193,202313229 +.long 67569157,134941193,202313229,197121 +.long 134941193,202313229,197121,67569157 +.long 202313229,197121,67569157,134941193 +.long 33619971,100992007,168364043,235736079 +.long 235736079,33619971,100992007,168364043 +.long 168364043,235736079,33619971,100992007 +.long 100992007,168364043,235736079,33619971 +.long 50462976,117835012,185207048,252579084 +.long 252314880,51251460,117574920,184942860 +.long 184682752,252054788,50987272,118359308 +.long 118099200,185467140,251790600,50727180 +.long 2946363062,528716217,1300004225,1881839624 +.long 1532713819,1532713819,1532713819,1532713819 +.long 3602276352,4288629033,3737020424,4153884961 +.long 1354558464,32357713,2958822624,3775749553 +.long 1201988352,132424512,1572796698,503232858 +.long 2213177600,1597421020,4103937655,675398315 +.long 2749646592,4273543773,1511898873,121693092 +.long 3040248576,1103263732,2871565598,1608280554 +.long 2236667136,2588920351,482954393,64377734 +.long 3069987328,291237287,2117370568,3650299247 +.long 533321216,3573750986,2572112006,1401264716 +.long 1339849704,2721158661,548607111,3445553514 +.long 2128193280,3054596040,2183486460,1257083700 +.long 655635200,1165381986,3923443150,2344132524 +.long 190078720,256924420,290342170,357187870 +.long 1610966272,2263057382,4103205268,309794674 +.long 2592527872,2233205587,1335446729,3402964816 +.long 3973531904,3225098121,3002836325,1918774430 +.long 3870401024,2102906079,2284471353,4117666579 +.long 617007872,1021508343,366931923,691083277 +.long 2528395776,3491914898,2968704004,1613121270 +.long 3445188352,3247741094,844474987,4093578302 +.long 651481088,1190302358,1689581232,574775300 +.long 4289380608,206939853,2555985458,2489840491 +.long 2130264064,327674451,3566485037,3349835193 +.long 2470714624,316102159,3636825756,3393945945 +.byte 86,101,99,116,111,114,32,80,101,114,109,117,116,97,116,105 +.byte 111,110,32,65,69,83,32,102,111,114,32,120,56,54,47,83 +.byte 83,83,69,51,44,32,77,105,107,101,32,72,97,109,98,117 +.byte 114,103,32,40,83,116,97,110,102,111,114,100,32,85,110,105 +.byte 118,101,114,115,105,116,121,41,0 +.align 6,0x90 +.align 4 +__vpaes_preheat: + addl (%esp),%ebp + movdqa -48(%ebp),%xmm7 + movdqa -16(%ebp),%xmm6 + ret +.align 4 +__vpaes_encrypt_core: + movl $16,%ecx + movl 240(%edx),%eax + movdqa %xmm6,%xmm1 + movdqa (%ebp),%xmm2 + pandn %xmm0,%xmm1 + pand %xmm6,%xmm0 + movdqu (%edx),%xmm5 +.byte 102,15,56,0,208 + movdqa 16(%ebp),%xmm0 + pxor %xmm5,%xmm2 + psrld $4,%xmm1 + addl $16,%edx +.byte 102,15,56,0,193 + leal 192(%ebp),%ebx + pxor %xmm2,%xmm0 + jmp L000enc_entry +.align 4,0x90 +L001enc_loop: + movdqa 32(%ebp),%xmm4 + movdqa 48(%ebp),%xmm0 +.byte 102,15,56,0,226 +.byte 102,15,56,0,195 + pxor %xmm5,%xmm4 + movdqa 64(%ebp),%xmm5 + pxor %xmm4,%xmm0 + movdqa -64(%ebx,%ecx,1),%xmm1 +.byte 102,15,56,0,234 + movdqa 80(%ebp),%xmm2 + movdqa (%ebx,%ecx,1),%xmm4 +.byte 102,15,56,0,211 + movdqa %xmm0,%xmm3 + pxor %xmm5,%xmm2 +.byte 102,15,56,0,193 + addl $16,%edx + pxor %xmm2,%xmm0 +.byte 102,15,56,0,220 + addl $16,%ecx + pxor %xmm0,%xmm3 +.byte 102,15,56,0,193 + andl $48,%ecx + subl $1,%eax + pxor %xmm3,%xmm0 +L000enc_entry: + movdqa %xmm6,%xmm1 + movdqa -32(%ebp),%xmm5 + pandn %xmm0,%xmm1 + psrld $4,%xmm1 + pand %xmm6,%xmm0 +.byte 102,15,56,0,232 + movdqa %xmm7,%xmm3 + pxor %xmm1,%xmm0 +.byte 102,15,56,0,217 + movdqa %xmm7,%xmm4 + pxor %xmm5,%xmm3 +.byte 102,15,56,0,224 + movdqa %xmm7,%xmm2 + pxor %xmm5,%xmm4 +.byte 102,15,56,0,211 + movdqa %xmm7,%xmm3 + pxor %xmm0,%xmm2 +.byte 102,15,56,0,220 + movdqu (%edx),%xmm5 + pxor %xmm1,%xmm3 + jnz L001enc_loop + movdqa 96(%ebp),%xmm4 + movdqa 112(%ebp),%xmm0 +.byte 102,15,56,0,226 + pxor %xmm5,%xmm4 +.byte 102,15,56,0,195 + movdqa 64(%ebx,%ecx,1),%xmm1 + pxor %xmm4,%xmm0 +.byte 102,15,56,0,193 + ret +.align 4 +__vpaes_decrypt_core: + leal 608(%ebp),%ebx + movl 240(%edx),%eax + movdqa %xmm6,%xmm1 + movdqa -64(%ebx),%xmm2 + pandn %xmm0,%xmm1 + movl %eax,%ecx + psrld $4,%xmm1 + movdqu (%edx),%xmm5 + shll $4,%ecx + pand %xmm6,%xmm0 +.byte 102,15,56,0,208 + movdqa -48(%ebx),%xmm0 + xorl $48,%ecx +.byte 102,15,56,0,193 + andl $48,%ecx + pxor %xmm5,%xmm2 + movdqa 176(%ebp),%xmm5 + pxor %xmm2,%xmm0 + addl $16,%edx + leal -352(%ebx,%ecx,1),%ecx + jmp L002dec_entry +.align 4,0x90 +L003dec_loop: + movdqa -32(%ebx),%xmm4 + movdqa -16(%ebx),%xmm1 +.byte 102,15,56,0,226 +.byte 102,15,56,0,203 + pxor %xmm4,%xmm0 + movdqa (%ebx),%xmm4 + pxor %xmm1,%xmm0 + movdqa 16(%ebx),%xmm1 +.byte 102,15,56,0,226 +.byte 102,15,56,0,197 +.byte 102,15,56,0,203 + pxor %xmm4,%xmm0 + movdqa 32(%ebx),%xmm4 + pxor %xmm1,%xmm0 + movdqa 48(%ebx),%xmm1 +.byte 102,15,56,0,226 +.byte 102,15,56,0,197 +.byte 102,15,56,0,203 + pxor %xmm4,%xmm0 + movdqa 64(%ebx),%xmm4 + pxor %xmm1,%xmm0 + movdqa 80(%ebx),%xmm1 +.byte 102,15,56,0,226 +.byte 102,15,56,0,197 +.byte 102,15,56,0,203 + pxor %xmm4,%xmm0 + addl $16,%edx +.byte 102,15,58,15,237,12 + pxor %xmm1,%xmm0 + subl $1,%eax +L002dec_entry: + movdqa %xmm6,%xmm1 + movdqa -32(%ebp),%xmm2 + pandn %xmm0,%xmm1 + pand %xmm6,%xmm0 + psrld $4,%xmm1 +.byte 102,15,56,0,208 + movdqa %xmm7,%xmm3 + pxor %xmm1,%xmm0 +.byte 102,15,56,0,217 + movdqa %xmm7,%xmm4 + pxor %xmm2,%xmm3 +.byte 102,15,56,0,224 + pxor %xmm2,%xmm4 + movdqa %xmm7,%xmm2 +.byte 102,15,56,0,211 + movdqa %xmm7,%xmm3 + pxor %xmm0,%xmm2 +.byte 102,15,56,0,220 + movdqu (%edx),%xmm0 + pxor %xmm1,%xmm3 + jnz L003dec_loop + movdqa 96(%ebx),%xmm4 +.byte 102,15,56,0,226 + pxor %xmm0,%xmm4 + movdqa 112(%ebx),%xmm0 + movdqa (%ecx),%xmm2 +.byte 102,15,56,0,195 + pxor %xmm4,%xmm0 +.byte 102,15,56,0,194 + ret +.align 4 +__vpaes_schedule_core: + addl (%esp),%ebp + movdqu (%esi),%xmm0 + movdqa 320(%ebp),%xmm2 + movdqa %xmm0,%xmm3 + leal (%ebp),%ebx + movdqa %xmm2,4(%esp) + call __vpaes_schedule_transform + movdqa %xmm0,%xmm7 + testl %edi,%edi + jnz L004schedule_am_decrypting + movdqu %xmm0,(%edx) + jmp L005schedule_go +L004schedule_am_decrypting: + movdqa 256(%ebp,%ecx,1),%xmm1 +.byte 102,15,56,0,217 + movdqu %xmm3,(%edx) + xorl $48,%ecx +L005schedule_go: + cmpl $192,%eax + ja L006schedule_256 + je L007schedule_192 +L008schedule_128: + movl $10,%eax +L009loop_schedule_128: + call __vpaes_schedule_round + decl %eax + jz L010schedule_mangle_last + call __vpaes_schedule_mangle + jmp L009loop_schedule_128 +.align 4,0x90 +L007schedule_192: + movdqu 8(%esi),%xmm0 + call __vpaes_schedule_transform + movdqa %xmm0,%xmm6 + pxor %xmm4,%xmm4 + movhlps %xmm4,%xmm6 + movl $4,%eax +L011loop_schedule_192: + call __vpaes_schedule_round +.byte 102,15,58,15,198,8 + call __vpaes_schedule_mangle + call __vpaes_schedule_192_smear + call __vpaes_schedule_mangle + call __vpaes_schedule_round + decl %eax + jz L010schedule_mangle_last + call __vpaes_schedule_mangle + call __vpaes_schedule_192_smear + jmp L011loop_schedule_192 +.align 4,0x90 +L006schedule_256: + movdqu 16(%esi),%xmm0 + call __vpaes_schedule_transform + movl $7,%eax +L012loop_schedule_256: + call __vpaes_schedule_mangle + movdqa %xmm0,%xmm6 + call __vpaes_schedule_round + decl %eax + jz L010schedule_mangle_last + call __vpaes_schedule_mangle + pshufd $255,%xmm0,%xmm0 + movdqa %xmm7,20(%esp) + movdqa %xmm6,%xmm7 + call L_vpaes_schedule_low_round + movdqa 20(%esp),%xmm7 + jmp L012loop_schedule_256 +.align 4,0x90 +L010schedule_mangle_last: + leal 384(%ebp),%ebx + testl %edi,%edi + jnz L013schedule_mangle_last_dec + movdqa 256(%ebp,%ecx,1),%xmm1 +.byte 102,15,56,0,193 + leal 352(%ebp),%ebx + addl $32,%edx +L013schedule_mangle_last_dec: + addl $-16,%edx + pxor 336(%ebp),%xmm0 + call __vpaes_schedule_transform + movdqu %xmm0,(%edx) + pxor %xmm0,%xmm0 + pxor %xmm1,%xmm1 + pxor %xmm2,%xmm2 + pxor %xmm3,%xmm3 + pxor %xmm4,%xmm4 + pxor %xmm5,%xmm5 + pxor %xmm6,%xmm6 + pxor %xmm7,%xmm7 + ret +.align 4 +__vpaes_schedule_192_smear: + pshufd $128,%xmm6,%xmm1 + pshufd $254,%xmm7,%xmm0 + pxor %xmm1,%xmm6 + pxor %xmm1,%xmm1 + pxor %xmm0,%xmm6 + movdqa %xmm6,%xmm0 + movhlps %xmm1,%xmm6 + ret +.align 4 +__vpaes_schedule_round: + movdqa 8(%esp),%xmm2 + pxor %xmm1,%xmm1 +.byte 102,15,58,15,202,15 +.byte 102,15,58,15,210,15 + pxor %xmm1,%xmm7 + pshufd $255,%xmm0,%xmm0 +.byte 102,15,58,15,192,1 + movdqa %xmm2,8(%esp) +L_vpaes_schedule_low_round: + movdqa %xmm7,%xmm1 + pslldq $4,%xmm7 + pxor %xmm1,%xmm7 + movdqa %xmm7,%xmm1 + pslldq $8,%xmm7 + pxor %xmm1,%xmm7 + pxor 336(%ebp),%xmm7 + movdqa -16(%ebp),%xmm4 + movdqa -48(%ebp),%xmm5 + movdqa %xmm4,%xmm1 + pandn %xmm0,%xmm1 + psrld $4,%xmm1 + pand %xmm4,%xmm0 + movdqa -32(%ebp),%xmm2 +.byte 102,15,56,0,208 + pxor %xmm1,%xmm0 + movdqa %xmm5,%xmm3 +.byte 102,15,56,0,217 + pxor %xmm2,%xmm3 + movdqa %xmm5,%xmm4 +.byte 102,15,56,0,224 + pxor %xmm2,%xmm4 + movdqa %xmm5,%xmm2 +.byte 102,15,56,0,211 + pxor %xmm0,%xmm2 + movdqa %xmm5,%xmm3 +.byte 102,15,56,0,220 + pxor %xmm1,%xmm3 + movdqa 32(%ebp),%xmm4 +.byte 102,15,56,0,226 + movdqa 48(%ebp),%xmm0 +.byte 102,15,56,0,195 + pxor %xmm4,%xmm0 + pxor %xmm7,%xmm0 + movdqa %xmm0,%xmm7 + ret +.align 4 +__vpaes_schedule_transform: + movdqa -16(%ebp),%xmm2 + movdqa %xmm2,%xmm1 + pandn %xmm0,%xmm1 + psrld $4,%xmm1 + pand %xmm2,%xmm0 + movdqa (%ebx),%xmm2 +.byte 102,15,56,0,208 + movdqa 16(%ebx),%xmm0 +.byte 102,15,56,0,193 + pxor %xmm2,%xmm0 + ret +.align 4 +__vpaes_schedule_mangle: + movdqa %xmm0,%xmm4 + movdqa 128(%ebp),%xmm5 + testl %edi,%edi + jnz L014schedule_mangle_dec + addl $16,%edx + pxor 336(%ebp),%xmm4 +.byte 102,15,56,0,229 + movdqa %xmm4,%xmm3 +.byte 102,15,56,0,229 + pxor %xmm4,%xmm3 +.byte 102,15,56,0,229 + pxor %xmm4,%xmm3 + jmp L015schedule_mangle_both +.align 4,0x90 +L014schedule_mangle_dec: + movdqa -16(%ebp),%xmm2 + leal 416(%ebp),%esi + movdqa %xmm2,%xmm1 + pandn %xmm4,%xmm1 + psrld $4,%xmm1 + pand %xmm2,%xmm4 + movdqa (%esi),%xmm2 +.byte 102,15,56,0,212 + movdqa 16(%esi),%xmm3 +.byte 102,15,56,0,217 + pxor %xmm2,%xmm3 +.byte 102,15,56,0,221 + movdqa 32(%esi),%xmm2 +.byte 102,15,56,0,212 + pxor %xmm3,%xmm2 + movdqa 48(%esi),%xmm3 +.byte 102,15,56,0,217 + pxor %xmm2,%xmm3 +.byte 102,15,56,0,221 + movdqa 64(%esi),%xmm2 +.byte 102,15,56,0,212 + pxor %xmm3,%xmm2 + movdqa 80(%esi),%xmm3 +.byte 102,15,56,0,217 + pxor %xmm2,%xmm3 +.byte 102,15,56,0,221 + movdqa 96(%esi),%xmm2 +.byte 102,15,56,0,212 + pxor %xmm3,%xmm2 + movdqa 112(%esi),%xmm3 +.byte 102,15,56,0,217 + pxor %xmm2,%xmm3 + addl $-16,%edx +L015schedule_mangle_both: + movdqa 256(%ebp,%ecx,1),%xmm1 +.byte 102,15,56,0,217 + addl $-16,%ecx + andl $48,%ecx + movdqu %xmm3,(%edx) + ret +.globl _vpaes_set_encrypt_key +.align 4 +_vpaes_set_encrypt_key: +L_vpaes_set_encrypt_key_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 20(%esp),%esi + leal -56(%esp),%ebx + movl 24(%esp),%eax + andl $-16,%ebx + movl 28(%esp),%edx + xchgl %esp,%ebx + movl %ebx,48(%esp) + movl %eax,%ebx + shrl $5,%ebx + addl $5,%ebx + movl %ebx,240(%edx) + movl $48,%ecx + movl $0,%edi + leal L_vpaes_consts+0x30-L016pic_point,%ebp + call __vpaes_schedule_core +L016pic_point: + movl 48(%esp),%esp + xorl %eax,%eax + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.globl _vpaes_set_decrypt_key +.align 4 +_vpaes_set_decrypt_key: +L_vpaes_set_decrypt_key_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 20(%esp),%esi + leal -56(%esp),%ebx + movl 24(%esp),%eax + andl $-16,%ebx + movl 28(%esp),%edx + xchgl %esp,%ebx + movl %ebx,48(%esp) + movl %eax,%ebx + shrl $5,%ebx + addl $5,%ebx + movl %ebx,240(%edx) + shll $4,%ebx + leal 16(%edx,%ebx,1),%edx + movl $1,%edi + movl %eax,%ecx + shrl $1,%ecx + andl $32,%ecx + xorl $32,%ecx + leal L_vpaes_consts+0x30-L017pic_point,%ebp + call __vpaes_schedule_core +L017pic_point: + movl 48(%esp),%esp + xorl %eax,%eax + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.globl _vpaes_encrypt +.align 4 +_vpaes_encrypt: +L_vpaes_encrypt_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + leal L_vpaes_consts+0x30-L018pic_point,%ebp + call __vpaes_preheat +L018pic_point: + movl 20(%esp),%esi + leal -56(%esp),%ebx + movl 24(%esp),%edi + andl $-16,%ebx + movl 28(%esp),%edx + xchgl %esp,%ebx + movl %ebx,48(%esp) + movdqu (%esi),%xmm0 + call __vpaes_encrypt_core + movdqu %xmm0,(%edi) + movl 48(%esp),%esp + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.globl _vpaes_decrypt +.align 4 +_vpaes_decrypt: +L_vpaes_decrypt_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + leal L_vpaes_consts+0x30-L019pic_point,%ebp + call __vpaes_preheat +L019pic_point: + movl 20(%esp),%esi + leal -56(%esp),%ebx + movl 24(%esp),%edi + andl $-16,%ebx + movl 28(%esp),%edx + xchgl %esp,%ebx + movl %ebx,48(%esp) + movdqu (%esi),%xmm0 + call __vpaes_decrypt_core + movdqu %xmm0,(%edi) + movl 48(%esp),%esp + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.globl _vpaes_cbc_encrypt +.align 4 +_vpaes_cbc_encrypt: +L_vpaes_cbc_encrypt_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 20(%esp),%esi + movl 24(%esp),%edi + movl 28(%esp),%eax + movl 32(%esp),%edx + subl $16,%eax + jc L020cbc_abort + leal -56(%esp),%ebx + movl 36(%esp),%ebp + andl $-16,%ebx + movl 40(%esp),%ecx + xchgl %esp,%ebx + movdqu (%ebp),%xmm1 + subl %esi,%edi + movl %ebx,48(%esp) + movl %edi,(%esp) + movl %edx,4(%esp) + movl %ebp,8(%esp) + movl %eax,%edi + leal L_vpaes_consts+0x30-L021pic_point,%ebp + call __vpaes_preheat +L021pic_point: + cmpl $0,%ecx + je L022cbc_dec_loop + jmp L023cbc_enc_loop +.align 4,0x90 +L023cbc_enc_loop: + movdqu (%esi),%xmm0 + pxor %xmm1,%xmm0 + call __vpaes_encrypt_core + movl (%esp),%ebx + movl 4(%esp),%edx + movdqa %xmm0,%xmm1 + movdqu %xmm0,(%ebx,%esi,1) + leal 16(%esi),%esi + subl $16,%edi + jnc L023cbc_enc_loop + jmp L024cbc_done +.align 4,0x90 +L022cbc_dec_loop: + movdqu (%esi),%xmm0 + movdqa %xmm1,16(%esp) + movdqa %xmm0,32(%esp) + call __vpaes_decrypt_core + movl (%esp),%ebx + movl 4(%esp),%edx + pxor 16(%esp),%xmm0 + movdqa 32(%esp),%xmm1 + movdqu %xmm0,(%ebx,%esi,1) + leal 16(%esi),%esi + subl $16,%edi + jnc L022cbc_dec_loop +L024cbc_done: + movl 8(%esp),%ebx + movl 48(%esp),%esp + movdqu %xmm1,(%ebx) +L020cbc_abort: + popl %edi + popl %esi + popl %ebx + popl %ebp + ret diff --git a/deps/openssl/asm/x86-elf-gas/bf/bf-686.s b/deps/openssl/asm_obsolete/x86-macosx-gas/bf/bf-586.s similarity index 88% rename from deps/openssl/asm/x86-elf-gas/bf/bf-686.s rename to deps/openssl/asm_obsolete/x86-macosx-gas/bf/bf-586.s index 69b0ce681b11ec..2b75536e78e668 100644 --- a/deps/openssl/asm/x86-elf-gas/bf/bf-686.s +++ b/deps/openssl/asm_obsolete/x86-macosx-gas/bf/bf-586.s @@ -1,27 +1,26 @@ .file "bf-686.s" .text -.globl BF_encrypt -.type BF_encrypt,@function -.align 16 -BF_encrypt: -.L_BF_encrypt_begin: +.globl _BF_encrypt +.align 4 +_BF_encrypt: +L_BF_encrypt_begin: pushl %ebp pushl %ebx pushl %esi pushl %edi - + # Load the 2 words movl 20(%esp),%eax movl (%eax),%ecx movl 4(%eax),%edx - + # P pointer, s and enc flag movl 24(%esp),%edi xorl %eax,%eax xorl %ebx,%ebx xorl (%edi),%ecx - + # Round 0 rorl $16,%ecx movl 4(%edi),%esi movb %ch,%al @@ -40,7 +39,7 @@ BF_encrypt: xorl %eax,%eax xorl %esi,%edx - + # Round 1 rorl $16,%edx movl 8(%edi),%esi movb %dh,%al @@ -59,7 +58,7 @@ BF_encrypt: xorl %eax,%eax xorl %esi,%ecx - + # Round 2 rorl $16,%ecx movl 12(%edi),%esi movb %ch,%al @@ -78,7 +77,7 @@ BF_encrypt: xorl %eax,%eax xorl %esi,%edx - + # Round 3 rorl $16,%edx movl 16(%edi),%esi movb %dh,%al @@ -97,7 +96,7 @@ BF_encrypt: xorl %eax,%eax xorl %esi,%ecx - + # Round 4 rorl $16,%ecx movl 20(%edi),%esi movb %ch,%al @@ -116,7 +115,7 @@ BF_encrypt: xorl %eax,%eax xorl %esi,%edx - + # Round 5 rorl $16,%edx movl 24(%edi),%esi movb %dh,%al @@ -135,7 +134,7 @@ BF_encrypt: xorl %eax,%eax xorl %esi,%ecx - + # Round 6 rorl $16,%ecx movl 28(%edi),%esi movb %ch,%al @@ -154,7 +153,7 @@ BF_encrypt: xorl %eax,%eax xorl %esi,%edx - + # Round 7 rorl $16,%edx movl 32(%edi),%esi movb %dh,%al @@ -173,7 +172,7 @@ BF_encrypt: xorl %eax,%eax xorl %esi,%ecx - + # Round 8 rorl $16,%ecx movl 36(%edi),%esi movb %ch,%al @@ -192,7 +191,7 @@ BF_encrypt: xorl %eax,%eax xorl %esi,%edx - + # Round 9 rorl $16,%edx movl 40(%edi),%esi movb %dh,%al @@ -211,7 +210,7 @@ BF_encrypt: xorl %eax,%eax xorl %esi,%ecx - + # Round 10 rorl $16,%ecx movl 44(%edi),%esi movb %ch,%al @@ -230,7 +229,7 @@ BF_encrypt: xorl %eax,%eax xorl %esi,%edx - + # Round 11 rorl $16,%edx movl 48(%edi),%esi movb %dh,%al @@ -249,7 +248,7 @@ BF_encrypt: xorl %eax,%eax xorl %esi,%ecx - + # Round 12 rorl $16,%ecx movl 52(%edi),%esi movb %ch,%al @@ -268,7 +267,7 @@ BF_encrypt: xorl %eax,%eax xorl %esi,%edx - + # Round 13 rorl $16,%edx movl 56(%edi),%esi movb %dh,%al @@ -287,7 +286,7 @@ BF_encrypt: xorl %eax,%eax xorl %esi,%ecx - + # Round 14 rorl $16,%ecx movl 60(%edi),%esi movb %ch,%al @@ -306,7 +305,7 @@ BF_encrypt: xorl %eax,%eax xorl %esi,%edx - + # Round 15 rorl $16,%edx movl 64(%edi),%esi movb %dh,%al @@ -333,29 +332,27 @@ BF_encrypt: popl %ebx popl %ebp ret -.size BF_encrypt,.-.L_BF_encrypt_begin -.globl BF_decrypt -.type BF_decrypt,@function -.align 16 -BF_decrypt: -.L_BF_decrypt_begin: +.globl _BF_decrypt +.align 4 +_BF_decrypt: +L_BF_decrypt_begin: pushl %ebp pushl %ebx pushl %esi pushl %edi - + # Load the 2 words movl 20(%esp),%eax movl (%eax),%ecx movl 4(%eax),%edx - + # P pointer, s and enc flag movl 24(%esp),%edi xorl %eax,%eax xorl %ebx,%ebx xorl 68(%edi),%ecx - + # Round 16 rorl $16,%ecx movl 64(%edi),%esi movb %ch,%al @@ -374,7 +371,7 @@ BF_decrypt: xorl %eax,%eax xorl %esi,%edx - + # Round 15 rorl $16,%edx movl 60(%edi),%esi movb %dh,%al @@ -393,7 +390,7 @@ BF_decrypt: xorl %eax,%eax xorl %esi,%ecx - + # Round 14 rorl $16,%ecx movl 56(%edi),%esi movb %ch,%al @@ -412,7 +409,7 @@ BF_decrypt: xorl %eax,%eax xorl %esi,%edx - + # Round 13 rorl $16,%edx movl 52(%edi),%esi movb %dh,%al @@ -431,7 +428,7 @@ BF_decrypt: xorl %eax,%eax xorl %esi,%ecx - + # Round 12 rorl $16,%ecx movl 48(%edi),%esi movb %ch,%al @@ -450,7 +447,7 @@ BF_decrypt: xorl %eax,%eax xorl %esi,%edx - + # Round 11 rorl $16,%edx movl 44(%edi),%esi movb %dh,%al @@ -469,7 +466,7 @@ BF_decrypt: xorl %eax,%eax xorl %esi,%ecx - + # Round 10 rorl $16,%ecx movl 40(%edi),%esi movb %ch,%al @@ -488,7 +485,7 @@ BF_decrypt: xorl %eax,%eax xorl %esi,%edx - + # Round 9 rorl $16,%edx movl 36(%edi),%esi movb %dh,%al @@ -507,7 +504,7 @@ BF_decrypt: xorl %eax,%eax xorl %esi,%ecx - + # Round 8 rorl $16,%ecx movl 32(%edi),%esi movb %ch,%al @@ -526,7 +523,7 @@ BF_decrypt: xorl %eax,%eax xorl %esi,%edx - + # Round 7 rorl $16,%edx movl 28(%edi),%esi movb %dh,%al @@ -545,7 +542,7 @@ BF_decrypt: xorl %eax,%eax xorl %esi,%ecx - + # Round 6 rorl $16,%ecx movl 24(%edi),%esi movb %ch,%al @@ -564,7 +561,7 @@ BF_decrypt: xorl %eax,%eax xorl %esi,%edx - + # Round 5 rorl $16,%edx movl 20(%edi),%esi movb %dh,%al @@ -583,7 +580,7 @@ BF_decrypt: xorl %eax,%eax xorl %esi,%ecx - + # Round 4 rorl $16,%ecx movl 16(%edi),%esi movb %ch,%al @@ -602,7 +599,7 @@ BF_decrypt: xorl %eax,%eax xorl %esi,%edx - + # Round 3 rorl $16,%edx movl 12(%edi),%esi movb %dh,%al @@ -621,7 +618,7 @@ BF_decrypt: xorl %eax,%eax xorl %esi,%ecx - + # Round 2 rorl $16,%ecx movl 8(%edi),%esi movb %ch,%al @@ -640,7 +637,7 @@ BF_decrypt: xorl %eax,%eax xorl %esi,%edx - + # Round 1 rorl $16,%edx movl 4(%edi),%esi movb %dh,%al @@ -667,19 +664,17 @@ BF_decrypt: popl %ebx popl %ebp ret -.size BF_decrypt,.-.L_BF_decrypt_begin -.globl BF_cbc_encrypt -.type BF_cbc_encrypt,@function -.align 16 -BF_cbc_encrypt: -.L_BF_cbc_encrypt_begin: +.globl _BF_cbc_encrypt +.align 4 +_BF_cbc_encrypt: +L_BF_cbc_encrypt_begin: pushl %ebp pushl %ebx pushl %esi pushl %edi movl 28(%esp),%ebp - + # getting iv ptr from parameter 4 movl 36(%esp),%ebx movl (%ebx),%esi movl 4(%ebx),%edi @@ -690,19 +685,19 @@ BF_cbc_encrypt: movl %esp,%ebx movl 36(%esp),%esi movl 40(%esp),%edi - + # getting encrypt flag from parameter 5 movl 56(%esp),%ecx - + # get and push parameter 3 movl 48(%esp),%eax pushl %eax pushl %ebx cmpl $0,%ecx - jz .L000decrypt + jz L000decrypt andl $4294967288,%ebp movl 8(%esp),%eax movl 12(%esp),%ebx - jz .L001encrypt_finish -.L002encrypt_loop: + jz L001encrypt_finish +L002encrypt_loop: movl (%esi),%ecx movl 4(%esi),%edx xorl %ecx,%eax @@ -711,7 +706,7 @@ BF_cbc_encrypt: bswap %ebx movl %eax,8(%esp) movl %ebx,12(%esp) - call .L_BF_encrypt_begin + call L_BF_encrypt_begin movl 8(%esp),%eax movl 12(%esp),%ebx bswap %eax @@ -721,65 +716,65 @@ BF_cbc_encrypt: addl $8,%esi addl $8,%edi subl $8,%ebp - jnz .L002encrypt_loop -.L001encrypt_finish: + jnz L002encrypt_loop +L001encrypt_finish: movl 52(%esp),%ebp andl $7,%ebp - jz .L003finish - call .L004PIC_point -.L004PIC_point: + jz L003finish + call L004PIC_point +L004PIC_point: popl %edx - leal .L005cbc_enc_jmp_table-.L004PIC_point(%edx),%ecx + leal L005cbc_enc_jmp_table-L004PIC_point(%edx),%ecx movl (%ecx,%ebp,4),%ebp addl %edx,%ebp xorl %ecx,%ecx xorl %edx,%edx jmp *%ebp -.L006ej7: +L006ej7: movb 6(%esi),%dh shll $8,%edx -.L007ej6: +L007ej6: movb 5(%esi),%dh -.L008ej5: +L008ej5: movb 4(%esi),%dl -.L009ej4: +L009ej4: movl (%esi),%ecx - jmp .L010ejend -.L011ej3: + jmp L010ejend +L011ej3: movb 2(%esi),%ch shll $8,%ecx -.L012ej2: +L012ej2: movb 1(%esi),%ch -.L013ej1: +L013ej1: movb (%esi),%cl -.L010ejend: +L010ejend: xorl %ecx,%eax xorl %edx,%ebx bswap %eax bswap %ebx movl %eax,8(%esp) movl %ebx,12(%esp) - call .L_BF_encrypt_begin + call L_BF_encrypt_begin movl 8(%esp),%eax movl 12(%esp),%ebx bswap %eax bswap %ebx movl %eax,(%edi) movl %ebx,4(%edi) - jmp .L003finish -.L000decrypt: + jmp L003finish +L000decrypt: andl $4294967288,%ebp movl 16(%esp),%eax movl 20(%esp),%ebx - jz .L014decrypt_finish -.L015decrypt_loop: + jz L014decrypt_finish +L015decrypt_loop: movl (%esi),%eax movl 4(%esi),%ebx bswap %eax bswap %ebx movl %eax,8(%esp) movl %ebx,12(%esp) - call .L_BF_decrypt_begin + call L_BF_decrypt_begin movl 8(%esp),%eax movl 12(%esp),%ebx bswap %eax @@ -797,18 +792,18 @@ BF_cbc_encrypt: addl $8,%esi addl $8,%edi subl $8,%ebp - jnz .L015decrypt_loop -.L014decrypt_finish: + jnz L015decrypt_loop +L014decrypt_finish: movl 52(%esp),%ebp andl $7,%ebp - jz .L003finish + jz L003finish movl (%esi),%eax movl 4(%esi),%ebx bswap %eax bswap %ebx movl %eax,8(%esp) movl %ebx,12(%esp) - call .L_BF_decrypt_begin + call L_BF_decrypt_begin movl 8(%esp),%eax movl 12(%esp),%ebx bswap %eax @@ -819,28 +814,28 @@ BF_cbc_encrypt: xorl %ebx,%edx movl (%esi),%eax movl 4(%esi),%ebx -.L016dj7: +L016dj7: rorl $16,%edx movb %dl,6(%edi) shrl $16,%edx -.L017dj6: +L017dj6: movb %dh,5(%edi) -.L018dj5: +L018dj5: movb %dl,4(%edi) -.L019dj4: +L019dj4: movl %ecx,(%edi) - jmp .L020djend -.L021dj3: + jmp L020djend +L021dj3: rorl $16,%ecx movb %cl,2(%edi) shll $16,%ecx -.L022dj2: +L022dj2: movb %ch,1(%esi) -.L023dj1: +L023dj1: movb %cl,(%esi) -.L020djend: - jmp .L003finish -.L003finish: +L020djend: + jmp L003finish +L003finish: movl 60(%esp),%ecx addl $24,%esp movl %eax,(%ecx) @@ -850,15 +845,14 @@ BF_cbc_encrypt: popl %ebx popl %ebp ret -.align 64 -.L005cbc_enc_jmp_table: +.align 6,0x90 +L005cbc_enc_jmp_table: .long 0 -.long .L013ej1-.L004PIC_point -.long .L012ej2-.L004PIC_point -.long .L011ej3-.L004PIC_point -.long .L009ej4-.L004PIC_point -.long .L008ej5-.L004PIC_point -.long .L007ej6-.L004PIC_point -.long .L006ej7-.L004PIC_point -.align 64 -.size BF_cbc_encrypt,.-.L_BF_cbc_encrypt_begin +.long L013ej1-L004PIC_point +.long L012ej2-L004PIC_point +.long L011ej3-L004PIC_point +.long L009ej4-L004PIC_point +.long L008ej5-L004PIC_point +.long L007ej6-L004PIC_point +.long L006ej7-L004PIC_point +.align 6,0x90 diff --git a/deps/openssl/asm_obsolete/x86-macosx-gas/bn/bn-586.s b/deps/openssl/asm_obsolete/x86-macosx-gas/bn/bn-586.s new file mode 100644 index 00000000000000..777121eae88f3f --- /dev/null +++ b/deps/openssl/asm_obsolete/x86-macosx-gas/bn/bn-586.s @@ -0,0 +1,1520 @@ +.file "../openssl/crypto/bn/asm/bn-586.s" +.text +.globl _bn_mul_add_words +.align 4 +_bn_mul_add_words: +L_bn_mul_add_words_begin: + call L000PIC_me_up +L000PIC_me_up: + popl %eax + movl L_OPENSSL_ia32cap_P$non_lazy_ptr-L000PIC_me_up(%eax),%eax + btl $26,(%eax) + jnc L001maw_non_sse2 + movl 4(%esp),%eax + movl 8(%esp),%edx + movl 12(%esp),%ecx + movd 16(%esp),%mm0 + pxor %mm1,%mm1 + jmp L002maw_sse2_entry +.align 4,0x90 +L003maw_sse2_unrolled: + movd (%eax),%mm3 + paddq %mm3,%mm1 + movd (%edx),%mm2 + pmuludq %mm0,%mm2 + movd 4(%edx),%mm4 + pmuludq %mm0,%mm4 + movd 8(%edx),%mm6 + pmuludq %mm0,%mm6 + movd 12(%edx),%mm7 + pmuludq %mm0,%mm7 + paddq %mm2,%mm1 + movd 4(%eax),%mm3 + paddq %mm4,%mm3 + movd 8(%eax),%mm5 + paddq %mm6,%mm5 + movd 12(%eax),%mm4 + paddq %mm4,%mm7 + movd %mm1,(%eax) + movd 16(%edx),%mm2 + pmuludq %mm0,%mm2 + psrlq $32,%mm1 + movd 20(%edx),%mm4 + pmuludq %mm0,%mm4 + paddq %mm3,%mm1 + movd 24(%edx),%mm6 + pmuludq %mm0,%mm6 + movd %mm1,4(%eax) + psrlq $32,%mm1 + movd 28(%edx),%mm3 + addl $32,%edx + pmuludq %mm0,%mm3 + paddq %mm5,%mm1 + movd 16(%eax),%mm5 + paddq %mm5,%mm2 + movd %mm1,8(%eax) + psrlq $32,%mm1 + paddq %mm7,%mm1 + movd 20(%eax),%mm5 + paddq %mm5,%mm4 + movd %mm1,12(%eax) + psrlq $32,%mm1 + paddq %mm2,%mm1 + movd 24(%eax),%mm5 + paddq %mm5,%mm6 + movd %mm1,16(%eax) + psrlq $32,%mm1 + paddq %mm4,%mm1 + movd 28(%eax),%mm5 + paddq %mm5,%mm3 + movd %mm1,20(%eax) + psrlq $32,%mm1 + paddq %mm6,%mm1 + movd %mm1,24(%eax) + psrlq $32,%mm1 + paddq %mm3,%mm1 + movd %mm1,28(%eax) + leal 32(%eax),%eax + psrlq $32,%mm1 + subl $8,%ecx + jz L004maw_sse2_exit +L002maw_sse2_entry: + testl $4294967288,%ecx + jnz L003maw_sse2_unrolled +.align 2,0x90 +L005maw_sse2_loop: + movd (%edx),%mm2 + movd (%eax),%mm3 + pmuludq %mm0,%mm2 + leal 4(%edx),%edx + paddq %mm3,%mm1 + paddq %mm2,%mm1 + movd %mm1,(%eax) + subl $1,%ecx + psrlq $32,%mm1 + leal 4(%eax),%eax + jnz L005maw_sse2_loop +L004maw_sse2_exit: + movd %mm1,%eax + emms + ret +.align 4,0x90 +L001maw_non_sse2: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + + xorl %esi,%esi + movl 20(%esp),%edi + movl 28(%esp),%ecx + movl 24(%esp),%ebx + andl $4294967288,%ecx + movl 32(%esp),%ebp + pushl %ecx + jz L006maw_finish +.align 4,0x90 +L007maw_loop: + # Round 0 + movl (%ebx),%eax + mull %ebp + addl %esi,%eax + adcl $0,%edx + addl (%edi),%eax + adcl $0,%edx + movl %eax,(%edi) + movl %edx,%esi + # Round 4 + movl 4(%ebx),%eax + mull %ebp + addl %esi,%eax + adcl $0,%edx + addl 4(%edi),%eax + adcl $0,%edx + movl %eax,4(%edi) + movl %edx,%esi + # Round 8 + movl 8(%ebx),%eax + mull %ebp + addl %esi,%eax + adcl $0,%edx + addl 8(%edi),%eax + adcl $0,%edx + movl %eax,8(%edi) + movl %edx,%esi + # Round 12 + movl 12(%ebx),%eax + mull %ebp + addl %esi,%eax + adcl $0,%edx + addl 12(%edi),%eax + adcl $0,%edx + movl %eax,12(%edi) + movl %edx,%esi + # Round 16 + movl 16(%ebx),%eax + mull %ebp + addl %esi,%eax + adcl $0,%edx + addl 16(%edi),%eax + adcl $0,%edx + movl %eax,16(%edi) + movl %edx,%esi + # Round 20 + movl 20(%ebx),%eax + mull %ebp + addl %esi,%eax + adcl $0,%edx + addl 20(%edi),%eax + adcl $0,%edx + movl %eax,20(%edi) + movl %edx,%esi + # Round 24 + movl 24(%ebx),%eax + mull %ebp + addl %esi,%eax + adcl $0,%edx + addl 24(%edi),%eax + adcl $0,%edx + movl %eax,24(%edi) + movl %edx,%esi + # Round 28 + movl 28(%ebx),%eax + mull %ebp + addl %esi,%eax + adcl $0,%edx + addl 28(%edi),%eax + adcl $0,%edx + movl %eax,28(%edi) + movl %edx,%esi + + subl $8,%ecx + leal 32(%ebx),%ebx + leal 32(%edi),%edi + jnz L007maw_loop +L006maw_finish: + movl 32(%esp),%ecx + andl $7,%ecx + jnz L008maw_finish2 + jmp L009maw_end +L008maw_finish2: + # Tail Round 0 + movl (%ebx),%eax + mull %ebp + addl %esi,%eax + adcl $0,%edx + addl (%edi),%eax + adcl $0,%edx + decl %ecx + movl %eax,(%edi) + movl %edx,%esi + jz L009maw_end + # Tail Round 1 + movl 4(%ebx),%eax + mull %ebp + addl %esi,%eax + adcl $0,%edx + addl 4(%edi),%eax + adcl $0,%edx + decl %ecx + movl %eax,4(%edi) + movl %edx,%esi + jz L009maw_end + # Tail Round 2 + movl 8(%ebx),%eax + mull %ebp + addl %esi,%eax + adcl $0,%edx + addl 8(%edi),%eax + adcl $0,%edx + decl %ecx + movl %eax,8(%edi) + movl %edx,%esi + jz L009maw_end + # Tail Round 3 + movl 12(%ebx),%eax + mull %ebp + addl %esi,%eax + adcl $0,%edx + addl 12(%edi),%eax + adcl $0,%edx + decl %ecx + movl %eax,12(%edi) + movl %edx,%esi + jz L009maw_end + # Tail Round 4 + movl 16(%ebx),%eax + mull %ebp + addl %esi,%eax + adcl $0,%edx + addl 16(%edi),%eax + adcl $0,%edx + decl %ecx + movl %eax,16(%edi) + movl %edx,%esi + jz L009maw_end + # Tail Round 5 + movl 20(%ebx),%eax + mull %ebp + addl %esi,%eax + adcl $0,%edx + addl 20(%edi),%eax + adcl $0,%edx + decl %ecx + movl %eax,20(%edi) + movl %edx,%esi + jz L009maw_end + # Tail Round 6 + movl 24(%ebx),%eax + mull %ebp + addl %esi,%eax + adcl $0,%edx + addl 24(%edi),%eax + adcl $0,%edx + movl %eax,24(%edi) + movl %edx,%esi +L009maw_end: + movl %esi,%eax + popl %ecx + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.globl _bn_mul_words +.align 4 +_bn_mul_words: +L_bn_mul_words_begin: + call L010PIC_me_up +L010PIC_me_up: + popl %eax + movl L_OPENSSL_ia32cap_P$non_lazy_ptr-L010PIC_me_up(%eax),%eax + btl $26,(%eax) + jnc L011mw_non_sse2 + movl 4(%esp),%eax + movl 8(%esp),%edx + movl 12(%esp),%ecx + movd 16(%esp),%mm0 + pxor %mm1,%mm1 +.align 4,0x90 +L012mw_sse2_loop: + movd (%edx),%mm2 + pmuludq %mm0,%mm2 + leal 4(%edx),%edx + paddq %mm2,%mm1 + movd %mm1,(%eax) + subl $1,%ecx + psrlq $32,%mm1 + leal 4(%eax),%eax + jnz L012mw_sse2_loop + movd %mm1,%eax + emms + ret +.align 4,0x90 +L011mw_non_sse2: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + + xorl %esi,%esi + movl 20(%esp),%edi + movl 24(%esp),%ebx + movl 28(%esp),%ebp + movl 32(%esp),%ecx + andl $4294967288,%ebp + jz L013mw_finish +L014mw_loop: + # Round 0 + movl (%ebx),%eax + mull %ecx + addl %esi,%eax + adcl $0,%edx + movl %eax,(%edi) + movl %edx,%esi + # Round 4 + movl 4(%ebx),%eax + mull %ecx + addl %esi,%eax + adcl $0,%edx + movl %eax,4(%edi) + movl %edx,%esi + # Round 8 + movl 8(%ebx),%eax + mull %ecx + addl %esi,%eax + adcl $0,%edx + movl %eax,8(%edi) + movl %edx,%esi + # Round 12 + movl 12(%ebx),%eax + mull %ecx + addl %esi,%eax + adcl $0,%edx + movl %eax,12(%edi) + movl %edx,%esi + # Round 16 + movl 16(%ebx),%eax + mull %ecx + addl %esi,%eax + adcl $0,%edx + movl %eax,16(%edi) + movl %edx,%esi + # Round 20 + movl 20(%ebx),%eax + mull %ecx + addl %esi,%eax + adcl $0,%edx + movl %eax,20(%edi) + movl %edx,%esi + # Round 24 + movl 24(%ebx),%eax + mull %ecx + addl %esi,%eax + adcl $0,%edx + movl %eax,24(%edi) + movl %edx,%esi + # Round 28 + movl 28(%ebx),%eax + mull %ecx + addl %esi,%eax + adcl $0,%edx + movl %eax,28(%edi) + movl %edx,%esi + + addl $32,%ebx + addl $32,%edi + subl $8,%ebp + jz L013mw_finish + jmp L014mw_loop +L013mw_finish: + movl 28(%esp),%ebp + andl $7,%ebp + jnz L015mw_finish2 + jmp L016mw_end +L015mw_finish2: + # Tail Round 0 + movl (%ebx),%eax + mull %ecx + addl %esi,%eax + adcl $0,%edx + movl %eax,(%edi) + movl %edx,%esi + decl %ebp + jz L016mw_end + # Tail Round 1 + movl 4(%ebx),%eax + mull %ecx + addl %esi,%eax + adcl $0,%edx + movl %eax,4(%edi) + movl %edx,%esi + decl %ebp + jz L016mw_end + # Tail Round 2 + movl 8(%ebx),%eax + mull %ecx + addl %esi,%eax + adcl $0,%edx + movl %eax,8(%edi) + movl %edx,%esi + decl %ebp + jz L016mw_end + # Tail Round 3 + movl 12(%ebx),%eax + mull %ecx + addl %esi,%eax + adcl $0,%edx + movl %eax,12(%edi) + movl %edx,%esi + decl %ebp + jz L016mw_end + # Tail Round 4 + movl 16(%ebx),%eax + mull %ecx + addl %esi,%eax + adcl $0,%edx + movl %eax,16(%edi) + movl %edx,%esi + decl %ebp + jz L016mw_end + # Tail Round 5 + movl 20(%ebx),%eax + mull %ecx + addl %esi,%eax + adcl $0,%edx + movl %eax,20(%edi) + movl %edx,%esi + decl %ebp + jz L016mw_end + # Tail Round 6 + movl 24(%ebx),%eax + mull %ecx + addl %esi,%eax + adcl $0,%edx + movl %eax,24(%edi) + movl %edx,%esi +L016mw_end: + movl %esi,%eax + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.globl _bn_sqr_words +.align 4 +_bn_sqr_words: +L_bn_sqr_words_begin: + call L017PIC_me_up +L017PIC_me_up: + popl %eax + movl L_OPENSSL_ia32cap_P$non_lazy_ptr-L017PIC_me_up(%eax),%eax + btl $26,(%eax) + jnc L018sqr_non_sse2 + movl 4(%esp),%eax + movl 8(%esp),%edx + movl 12(%esp),%ecx +.align 4,0x90 +L019sqr_sse2_loop: + movd (%edx),%mm0 + pmuludq %mm0,%mm0 + leal 4(%edx),%edx + movq %mm0,(%eax) + subl $1,%ecx + leal 8(%eax),%eax + jnz L019sqr_sse2_loop + emms + ret +.align 4,0x90 +L018sqr_non_sse2: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + + movl 20(%esp),%esi + movl 24(%esp),%edi + movl 28(%esp),%ebx + andl $4294967288,%ebx + jz L020sw_finish +L021sw_loop: + # Round 0 + movl (%edi),%eax + mull %eax + movl %eax,(%esi) + movl %edx,4(%esi) + # Round 4 + movl 4(%edi),%eax + mull %eax + movl %eax,8(%esi) + movl %edx,12(%esi) + # Round 8 + movl 8(%edi),%eax + mull %eax + movl %eax,16(%esi) + movl %edx,20(%esi) + # Round 12 + movl 12(%edi),%eax + mull %eax + movl %eax,24(%esi) + movl %edx,28(%esi) + # Round 16 + movl 16(%edi),%eax + mull %eax + movl %eax,32(%esi) + movl %edx,36(%esi) + # Round 20 + movl 20(%edi),%eax + mull %eax + movl %eax,40(%esi) + movl %edx,44(%esi) + # Round 24 + movl 24(%edi),%eax + mull %eax + movl %eax,48(%esi) + movl %edx,52(%esi) + # Round 28 + movl 28(%edi),%eax + mull %eax + movl %eax,56(%esi) + movl %edx,60(%esi) + + addl $32,%edi + addl $64,%esi + subl $8,%ebx + jnz L021sw_loop +L020sw_finish: + movl 28(%esp),%ebx + andl $7,%ebx + jz L022sw_end + # Tail Round 0 + movl (%edi),%eax + mull %eax + movl %eax,(%esi) + decl %ebx + movl %edx,4(%esi) + jz L022sw_end + # Tail Round 1 + movl 4(%edi),%eax + mull %eax + movl %eax,8(%esi) + decl %ebx + movl %edx,12(%esi) + jz L022sw_end + # Tail Round 2 + movl 8(%edi),%eax + mull %eax + movl %eax,16(%esi) + decl %ebx + movl %edx,20(%esi) + jz L022sw_end + # Tail Round 3 + movl 12(%edi),%eax + mull %eax + movl %eax,24(%esi) + decl %ebx + movl %edx,28(%esi) + jz L022sw_end + # Tail Round 4 + movl 16(%edi),%eax + mull %eax + movl %eax,32(%esi) + decl %ebx + movl %edx,36(%esi) + jz L022sw_end + # Tail Round 5 + movl 20(%edi),%eax + mull %eax + movl %eax,40(%esi) + decl %ebx + movl %edx,44(%esi) + jz L022sw_end + # Tail Round 6 + movl 24(%edi),%eax + mull %eax + movl %eax,48(%esi) + movl %edx,52(%esi) +L022sw_end: + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.globl _bn_div_words +.align 4 +_bn_div_words: +L_bn_div_words_begin: + movl 4(%esp),%edx + movl 8(%esp),%eax + movl 12(%esp),%ecx + divl %ecx + ret +.globl _bn_add_words +.align 4 +_bn_add_words: +L_bn_add_words_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + + movl 20(%esp),%ebx + movl 24(%esp),%esi + movl 28(%esp),%edi + movl 32(%esp),%ebp + xorl %eax,%eax + andl $4294967288,%ebp + jz L023aw_finish +L024aw_loop: + # Round 0 + movl (%esi),%ecx + movl (%edi),%edx + addl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + addl %edx,%ecx + adcl $0,%eax + movl %ecx,(%ebx) + # Round 1 + movl 4(%esi),%ecx + movl 4(%edi),%edx + addl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + addl %edx,%ecx + adcl $0,%eax + movl %ecx,4(%ebx) + # Round 2 + movl 8(%esi),%ecx + movl 8(%edi),%edx + addl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + addl %edx,%ecx + adcl $0,%eax + movl %ecx,8(%ebx) + # Round 3 + movl 12(%esi),%ecx + movl 12(%edi),%edx + addl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + addl %edx,%ecx + adcl $0,%eax + movl %ecx,12(%ebx) + # Round 4 + movl 16(%esi),%ecx + movl 16(%edi),%edx + addl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + addl %edx,%ecx + adcl $0,%eax + movl %ecx,16(%ebx) + # Round 5 + movl 20(%esi),%ecx + movl 20(%edi),%edx + addl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + addl %edx,%ecx + adcl $0,%eax + movl %ecx,20(%ebx) + # Round 6 + movl 24(%esi),%ecx + movl 24(%edi),%edx + addl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + addl %edx,%ecx + adcl $0,%eax + movl %ecx,24(%ebx) + # Round 7 + movl 28(%esi),%ecx + movl 28(%edi),%edx + addl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + addl %edx,%ecx + adcl $0,%eax + movl %ecx,28(%ebx) + + addl $32,%esi + addl $32,%edi + addl $32,%ebx + subl $8,%ebp + jnz L024aw_loop +L023aw_finish: + movl 32(%esp),%ebp + andl $7,%ebp + jz L025aw_end + # Tail Round 0 + movl (%esi),%ecx + movl (%edi),%edx + addl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + addl %edx,%ecx + adcl $0,%eax + decl %ebp + movl %ecx,(%ebx) + jz L025aw_end + # Tail Round 1 + movl 4(%esi),%ecx + movl 4(%edi),%edx + addl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + addl %edx,%ecx + adcl $0,%eax + decl %ebp + movl %ecx,4(%ebx) + jz L025aw_end + # Tail Round 2 + movl 8(%esi),%ecx + movl 8(%edi),%edx + addl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + addl %edx,%ecx + adcl $0,%eax + decl %ebp + movl %ecx,8(%ebx) + jz L025aw_end + # Tail Round 3 + movl 12(%esi),%ecx + movl 12(%edi),%edx + addl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + addl %edx,%ecx + adcl $0,%eax + decl %ebp + movl %ecx,12(%ebx) + jz L025aw_end + # Tail Round 4 + movl 16(%esi),%ecx + movl 16(%edi),%edx + addl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + addl %edx,%ecx + adcl $0,%eax + decl %ebp + movl %ecx,16(%ebx) + jz L025aw_end + # Tail Round 5 + movl 20(%esi),%ecx + movl 20(%edi),%edx + addl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + addl %edx,%ecx + adcl $0,%eax + decl %ebp + movl %ecx,20(%ebx) + jz L025aw_end + # Tail Round 6 + movl 24(%esi),%ecx + movl 24(%edi),%edx + addl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + addl %edx,%ecx + adcl $0,%eax + movl %ecx,24(%ebx) +L025aw_end: + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.globl _bn_sub_words +.align 4 +_bn_sub_words: +L_bn_sub_words_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + + movl 20(%esp),%ebx + movl 24(%esp),%esi + movl 28(%esp),%edi + movl 32(%esp),%ebp + xorl %eax,%eax + andl $4294967288,%ebp + jz L026aw_finish +L027aw_loop: + # Round 0 + movl (%esi),%ecx + movl (%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,(%ebx) + # Round 1 + movl 4(%esi),%ecx + movl 4(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,4(%ebx) + # Round 2 + movl 8(%esi),%ecx + movl 8(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,8(%ebx) + # Round 3 + movl 12(%esi),%ecx + movl 12(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,12(%ebx) + # Round 4 + movl 16(%esi),%ecx + movl 16(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,16(%ebx) + # Round 5 + movl 20(%esi),%ecx + movl 20(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,20(%ebx) + # Round 6 + movl 24(%esi),%ecx + movl 24(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,24(%ebx) + # Round 7 + movl 28(%esi),%ecx + movl 28(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,28(%ebx) + + addl $32,%esi + addl $32,%edi + addl $32,%ebx + subl $8,%ebp + jnz L027aw_loop +L026aw_finish: + movl 32(%esp),%ebp + andl $7,%ebp + jz L028aw_end + # Tail Round 0 + movl (%esi),%ecx + movl (%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + decl %ebp + movl %ecx,(%ebx) + jz L028aw_end + # Tail Round 1 + movl 4(%esi),%ecx + movl 4(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + decl %ebp + movl %ecx,4(%ebx) + jz L028aw_end + # Tail Round 2 + movl 8(%esi),%ecx + movl 8(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + decl %ebp + movl %ecx,8(%ebx) + jz L028aw_end + # Tail Round 3 + movl 12(%esi),%ecx + movl 12(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + decl %ebp + movl %ecx,12(%ebx) + jz L028aw_end + # Tail Round 4 + movl 16(%esi),%ecx + movl 16(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + decl %ebp + movl %ecx,16(%ebx) + jz L028aw_end + # Tail Round 5 + movl 20(%esi),%ecx + movl 20(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + decl %ebp + movl %ecx,20(%ebx) + jz L028aw_end + # Tail Round 6 + movl 24(%esi),%ecx + movl 24(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,24(%ebx) +L028aw_end: + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.globl _bn_sub_part_words +.align 4 +_bn_sub_part_words: +L_bn_sub_part_words_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + + movl 20(%esp),%ebx + movl 24(%esp),%esi + movl 28(%esp),%edi + movl 32(%esp),%ebp + xorl %eax,%eax + andl $4294967288,%ebp + jz L029aw_finish +L030aw_loop: + # Round 0 + movl (%esi),%ecx + movl (%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,(%ebx) + # Round 1 + movl 4(%esi),%ecx + movl 4(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,4(%ebx) + # Round 2 + movl 8(%esi),%ecx + movl 8(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,8(%ebx) + # Round 3 + movl 12(%esi),%ecx + movl 12(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,12(%ebx) + # Round 4 + movl 16(%esi),%ecx + movl 16(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,16(%ebx) + # Round 5 + movl 20(%esi),%ecx + movl 20(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,20(%ebx) + # Round 6 + movl 24(%esi),%ecx + movl 24(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,24(%ebx) + # Round 7 + movl 28(%esi),%ecx + movl 28(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,28(%ebx) + + addl $32,%esi + addl $32,%edi + addl $32,%ebx + subl $8,%ebp + jnz L030aw_loop +L029aw_finish: + movl 32(%esp),%ebp + andl $7,%ebp + jz L031aw_end + # Tail Round 0 + movl (%esi),%ecx + movl (%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,(%ebx) + addl $4,%esi + addl $4,%edi + addl $4,%ebx + decl %ebp + jz L031aw_end + # Tail Round 1 + movl (%esi),%ecx + movl (%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,(%ebx) + addl $4,%esi + addl $4,%edi + addl $4,%ebx + decl %ebp + jz L031aw_end + # Tail Round 2 + movl (%esi),%ecx + movl (%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,(%ebx) + addl $4,%esi + addl $4,%edi + addl $4,%ebx + decl %ebp + jz L031aw_end + # Tail Round 3 + movl (%esi),%ecx + movl (%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,(%ebx) + addl $4,%esi + addl $4,%edi + addl $4,%ebx + decl %ebp + jz L031aw_end + # Tail Round 4 + movl (%esi),%ecx + movl (%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,(%ebx) + addl $4,%esi + addl $4,%edi + addl $4,%ebx + decl %ebp + jz L031aw_end + # Tail Round 5 + movl (%esi),%ecx + movl (%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,(%ebx) + addl $4,%esi + addl $4,%edi + addl $4,%ebx + decl %ebp + jz L031aw_end + # Tail Round 6 + movl (%esi),%ecx + movl (%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,(%ebx) + addl $4,%esi + addl $4,%edi + addl $4,%ebx +L031aw_end: + cmpl $0,36(%esp) + je L032pw_end + movl 36(%esp),%ebp + cmpl $0,%ebp + je L032pw_end + jge L033pw_pos + # pw_neg + movl $0,%edx + subl %ebp,%edx + movl %edx,%ebp + andl $4294967288,%ebp + jz L034pw_neg_finish +L035pw_neg_loop: + # dl<0 Round 0 + movl $0,%ecx + movl (%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,(%ebx) + # dl<0 Round 1 + movl $0,%ecx + movl 4(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,4(%ebx) + # dl<0 Round 2 + movl $0,%ecx + movl 8(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,8(%ebx) + # dl<0 Round 3 + movl $0,%ecx + movl 12(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,12(%ebx) + # dl<0 Round 4 + movl $0,%ecx + movl 16(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,16(%ebx) + # dl<0 Round 5 + movl $0,%ecx + movl 20(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,20(%ebx) + # dl<0 Round 6 + movl $0,%ecx + movl 24(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,24(%ebx) + # dl<0 Round 7 + movl $0,%ecx + movl 28(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,28(%ebx) + + addl $32,%edi + addl $32,%ebx + subl $8,%ebp + jnz L035pw_neg_loop +L034pw_neg_finish: + movl 36(%esp),%edx + movl $0,%ebp + subl %edx,%ebp + andl $7,%ebp + jz L032pw_end + # dl<0 Tail Round 0 + movl $0,%ecx + movl (%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + decl %ebp + movl %ecx,(%ebx) + jz L032pw_end + # dl<0 Tail Round 1 + movl $0,%ecx + movl 4(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + decl %ebp + movl %ecx,4(%ebx) + jz L032pw_end + # dl<0 Tail Round 2 + movl $0,%ecx + movl 8(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + decl %ebp + movl %ecx,8(%ebx) + jz L032pw_end + # dl<0 Tail Round 3 + movl $0,%ecx + movl 12(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + decl %ebp + movl %ecx,12(%ebx) + jz L032pw_end + # dl<0 Tail Round 4 + movl $0,%ecx + movl 16(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + decl %ebp + movl %ecx,16(%ebx) + jz L032pw_end + # dl<0 Tail Round 5 + movl $0,%ecx + movl 20(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + decl %ebp + movl %ecx,20(%ebx) + jz L032pw_end + # dl<0 Tail Round 6 + movl $0,%ecx + movl 24(%edi),%edx + subl %eax,%ecx + movl $0,%eax + adcl %eax,%eax + subl %edx,%ecx + adcl $0,%eax + movl %ecx,24(%ebx) + jmp L032pw_end +L033pw_pos: + andl $4294967288,%ebp + jz L036pw_pos_finish +L037pw_pos_loop: + # dl>0 Round 0 + movl (%esi),%ecx + subl %eax,%ecx + movl %ecx,(%ebx) + jnc L038pw_nc0 + # dl>0 Round 1 + movl 4(%esi),%ecx + subl %eax,%ecx + movl %ecx,4(%ebx) + jnc L039pw_nc1 + # dl>0 Round 2 + movl 8(%esi),%ecx + subl %eax,%ecx + movl %ecx,8(%ebx) + jnc L040pw_nc2 + # dl>0 Round 3 + movl 12(%esi),%ecx + subl %eax,%ecx + movl %ecx,12(%ebx) + jnc L041pw_nc3 + # dl>0 Round 4 + movl 16(%esi),%ecx + subl %eax,%ecx + movl %ecx,16(%ebx) + jnc L042pw_nc4 + # dl>0 Round 5 + movl 20(%esi),%ecx + subl %eax,%ecx + movl %ecx,20(%ebx) + jnc L043pw_nc5 + # dl>0 Round 6 + movl 24(%esi),%ecx + subl %eax,%ecx + movl %ecx,24(%ebx) + jnc L044pw_nc6 + # dl>0 Round 7 + movl 28(%esi),%ecx + subl %eax,%ecx + movl %ecx,28(%ebx) + jnc L045pw_nc7 + + addl $32,%esi + addl $32,%ebx + subl $8,%ebp + jnz L037pw_pos_loop +L036pw_pos_finish: + movl 36(%esp),%ebp + andl $7,%ebp + jz L032pw_end + # dl>0 Tail Round 0 + movl (%esi),%ecx + subl %eax,%ecx + movl %ecx,(%ebx) + jnc L046pw_tail_nc0 + decl %ebp + jz L032pw_end + # dl>0 Tail Round 1 + movl 4(%esi),%ecx + subl %eax,%ecx + movl %ecx,4(%ebx) + jnc L047pw_tail_nc1 + decl %ebp + jz L032pw_end + # dl>0 Tail Round 2 + movl 8(%esi),%ecx + subl %eax,%ecx + movl %ecx,8(%ebx) + jnc L048pw_tail_nc2 + decl %ebp + jz L032pw_end + # dl>0 Tail Round 3 + movl 12(%esi),%ecx + subl %eax,%ecx + movl %ecx,12(%ebx) + jnc L049pw_tail_nc3 + decl %ebp + jz L032pw_end + # dl>0 Tail Round 4 + movl 16(%esi),%ecx + subl %eax,%ecx + movl %ecx,16(%ebx) + jnc L050pw_tail_nc4 + decl %ebp + jz L032pw_end + # dl>0 Tail Round 5 + movl 20(%esi),%ecx + subl %eax,%ecx + movl %ecx,20(%ebx) + jnc L051pw_tail_nc5 + decl %ebp + jz L032pw_end + # dl>0 Tail Round 6 + movl 24(%esi),%ecx + subl %eax,%ecx + movl %ecx,24(%ebx) + jnc L052pw_tail_nc6 + movl $1,%eax + jmp L032pw_end +L053pw_nc_loop: + movl (%esi),%ecx + movl %ecx,(%ebx) +L038pw_nc0: + movl 4(%esi),%ecx + movl %ecx,4(%ebx) +L039pw_nc1: + movl 8(%esi),%ecx + movl %ecx,8(%ebx) +L040pw_nc2: + movl 12(%esi),%ecx + movl %ecx,12(%ebx) +L041pw_nc3: + movl 16(%esi),%ecx + movl %ecx,16(%ebx) +L042pw_nc4: + movl 20(%esi),%ecx + movl %ecx,20(%ebx) +L043pw_nc5: + movl 24(%esi),%ecx + movl %ecx,24(%ebx) +L044pw_nc6: + movl 28(%esi),%ecx + movl %ecx,28(%ebx) +L045pw_nc7: + + addl $32,%esi + addl $32,%ebx + subl $8,%ebp + jnz L053pw_nc_loop + movl 36(%esp),%ebp + andl $7,%ebp + jz L054pw_nc_end + movl (%esi),%ecx + movl %ecx,(%ebx) +L046pw_tail_nc0: + decl %ebp + jz L054pw_nc_end + movl 4(%esi),%ecx + movl %ecx,4(%ebx) +L047pw_tail_nc1: + decl %ebp + jz L054pw_nc_end + movl 8(%esi),%ecx + movl %ecx,8(%ebx) +L048pw_tail_nc2: + decl %ebp + jz L054pw_nc_end + movl 12(%esi),%ecx + movl %ecx,12(%ebx) +L049pw_tail_nc3: + decl %ebp + jz L054pw_nc_end + movl 16(%esi),%ecx + movl %ecx,16(%ebx) +L050pw_tail_nc4: + decl %ebp + jz L054pw_nc_end + movl 20(%esi),%ecx + movl %ecx,20(%ebx) +L051pw_tail_nc5: + decl %ebp + jz L054pw_nc_end + movl 24(%esi),%ecx + movl %ecx,24(%ebx) +L052pw_tail_nc6: +L054pw_nc_end: + movl $0,%eax +L032pw_end: + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.section __IMPORT,__pointers,non_lazy_symbol_pointers +L_OPENSSL_ia32cap_P$non_lazy_ptr: +.indirect_symbol _OPENSSL_ia32cap_P +.long 0 +.comm _OPENSSL_ia32cap_P,16,2 diff --git a/deps/openssl/asm_obsolete/x86-macosx-gas/bn/co-586.s b/deps/openssl/asm_obsolete/x86-macosx-gas/bn/co-586.s new file mode 100644 index 00000000000000..6174dea38f0243 --- /dev/null +++ b/deps/openssl/asm_obsolete/x86-macosx-gas/bn/co-586.s @@ -0,0 +1,1246 @@ +.file "../openssl/crypto/bn/asm/co-586.s" +.text +.globl _bn_mul_comba8 +.align 4 +_bn_mul_comba8: +L_bn_mul_comba8_begin: + pushl %esi + movl 12(%esp),%esi + pushl %edi + movl 20(%esp),%edi + pushl %ebp + pushl %ebx + xorl %ebx,%ebx + movl (%esi),%eax + xorl %ecx,%ecx + movl (%edi),%edx + # ################## Calculate word 0 + xorl %ebp,%ebp + # mul a[0]*b[0] + mull %edx + addl %eax,%ebx + movl 20(%esp),%eax + adcl %edx,%ecx + movl (%edi),%edx + adcl $0,%ebp + movl %ebx,(%eax) + movl 4(%esi),%eax + # saved r[0] + # ################## Calculate word 1 + xorl %ebx,%ebx + # mul a[1]*b[0] + mull %edx + addl %eax,%ecx + movl (%esi),%eax + adcl %edx,%ebp + movl 4(%edi),%edx + adcl $0,%ebx + # mul a[0]*b[1] + mull %edx + addl %eax,%ecx + movl 20(%esp),%eax + adcl %edx,%ebp + movl (%edi),%edx + adcl $0,%ebx + movl %ecx,4(%eax) + movl 8(%esi),%eax + # saved r[1] + # ################## Calculate word 2 + xorl %ecx,%ecx + # mul a[2]*b[0] + mull %edx + addl %eax,%ebp + movl 4(%esi),%eax + adcl %edx,%ebx + movl 4(%edi),%edx + adcl $0,%ecx + # mul a[1]*b[1] + mull %edx + addl %eax,%ebp + movl (%esi),%eax + adcl %edx,%ebx + movl 8(%edi),%edx + adcl $0,%ecx + # mul a[0]*b[2] + mull %edx + addl %eax,%ebp + movl 20(%esp),%eax + adcl %edx,%ebx + movl (%edi),%edx + adcl $0,%ecx + movl %ebp,8(%eax) + movl 12(%esi),%eax + # saved r[2] + # ################## Calculate word 3 + xorl %ebp,%ebp + # mul a[3]*b[0] + mull %edx + addl %eax,%ebx + movl 8(%esi),%eax + adcl %edx,%ecx + movl 4(%edi),%edx + adcl $0,%ebp + # mul a[2]*b[1] + mull %edx + addl %eax,%ebx + movl 4(%esi),%eax + adcl %edx,%ecx + movl 8(%edi),%edx + adcl $0,%ebp + # mul a[1]*b[2] + mull %edx + addl %eax,%ebx + movl (%esi),%eax + adcl %edx,%ecx + movl 12(%edi),%edx + adcl $0,%ebp + # mul a[0]*b[3] + mull %edx + addl %eax,%ebx + movl 20(%esp),%eax + adcl %edx,%ecx + movl (%edi),%edx + adcl $0,%ebp + movl %ebx,12(%eax) + movl 16(%esi),%eax + # saved r[3] + # ################## Calculate word 4 + xorl %ebx,%ebx + # mul a[4]*b[0] + mull %edx + addl %eax,%ecx + movl 12(%esi),%eax + adcl %edx,%ebp + movl 4(%edi),%edx + adcl $0,%ebx + # mul a[3]*b[1] + mull %edx + addl %eax,%ecx + movl 8(%esi),%eax + adcl %edx,%ebp + movl 8(%edi),%edx + adcl $0,%ebx + # mul a[2]*b[2] + mull %edx + addl %eax,%ecx + movl 4(%esi),%eax + adcl %edx,%ebp + movl 12(%edi),%edx + adcl $0,%ebx + # mul a[1]*b[3] + mull %edx + addl %eax,%ecx + movl (%esi),%eax + adcl %edx,%ebp + movl 16(%edi),%edx + adcl $0,%ebx + # mul a[0]*b[4] + mull %edx + addl %eax,%ecx + movl 20(%esp),%eax + adcl %edx,%ebp + movl (%edi),%edx + adcl $0,%ebx + movl %ecx,16(%eax) + movl 20(%esi),%eax + # saved r[4] + # ################## Calculate word 5 + xorl %ecx,%ecx + # mul a[5]*b[0] + mull %edx + addl %eax,%ebp + movl 16(%esi),%eax + adcl %edx,%ebx + movl 4(%edi),%edx + adcl $0,%ecx + # mul a[4]*b[1] + mull %edx + addl %eax,%ebp + movl 12(%esi),%eax + adcl %edx,%ebx + movl 8(%edi),%edx + adcl $0,%ecx + # mul a[3]*b[2] + mull %edx + addl %eax,%ebp + movl 8(%esi),%eax + adcl %edx,%ebx + movl 12(%edi),%edx + adcl $0,%ecx + # mul a[2]*b[3] + mull %edx + addl %eax,%ebp + movl 4(%esi),%eax + adcl %edx,%ebx + movl 16(%edi),%edx + adcl $0,%ecx + # mul a[1]*b[4] + mull %edx + addl %eax,%ebp + movl (%esi),%eax + adcl %edx,%ebx + movl 20(%edi),%edx + adcl $0,%ecx + # mul a[0]*b[5] + mull %edx + addl %eax,%ebp + movl 20(%esp),%eax + adcl %edx,%ebx + movl (%edi),%edx + adcl $0,%ecx + movl %ebp,20(%eax) + movl 24(%esi),%eax + # saved r[5] + # ################## Calculate word 6 + xorl %ebp,%ebp + # mul a[6]*b[0] + mull %edx + addl %eax,%ebx + movl 20(%esi),%eax + adcl %edx,%ecx + movl 4(%edi),%edx + adcl $0,%ebp + # mul a[5]*b[1] + mull %edx + addl %eax,%ebx + movl 16(%esi),%eax + adcl %edx,%ecx + movl 8(%edi),%edx + adcl $0,%ebp + # mul a[4]*b[2] + mull %edx + addl %eax,%ebx + movl 12(%esi),%eax + adcl %edx,%ecx + movl 12(%edi),%edx + adcl $0,%ebp + # mul a[3]*b[3] + mull %edx + addl %eax,%ebx + movl 8(%esi),%eax + adcl %edx,%ecx + movl 16(%edi),%edx + adcl $0,%ebp + # mul a[2]*b[4] + mull %edx + addl %eax,%ebx + movl 4(%esi),%eax + adcl %edx,%ecx + movl 20(%edi),%edx + adcl $0,%ebp + # mul a[1]*b[5] + mull %edx + addl %eax,%ebx + movl (%esi),%eax + adcl %edx,%ecx + movl 24(%edi),%edx + adcl $0,%ebp + # mul a[0]*b[6] + mull %edx + addl %eax,%ebx + movl 20(%esp),%eax + adcl %edx,%ecx + movl (%edi),%edx + adcl $0,%ebp + movl %ebx,24(%eax) + movl 28(%esi),%eax + # saved r[6] + # ################## Calculate word 7 + xorl %ebx,%ebx + # mul a[7]*b[0] + mull %edx + addl %eax,%ecx + movl 24(%esi),%eax + adcl %edx,%ebp + movl 4(%edi),%edx + adcl $0,%ebx + # mul a[6]*b[1] + mull %edx + addl %eax,%ecx + movl 20(%esi),%eax + adcl %edx,%ebp + movl 8(%edi),%edx + adcl $0,%ebx + # mul a[5]*b[2] + mull %edx + addl %eax,%ecx + movl 16(%esi),%eax + adcl %edx,%ebp + movl 12(%edi),%edx + adcl $0,%ebx + # mul a[4]*b[3] + mull %edx + addl %eax,%ecx + movl 12(%esi),%eax + adcl %edx,%ebp + movl 16(%edi),%edx + adcl $0,%ebx + # mul a[3]*b[4] + mull %edx + addl %eax,%ecx + movl 8(%esi),%eax + adcl %edx,%ebp + movl 20(%edi),%edx + adcl $0,%ebx + # mul a[2]*b[5] + mull %edx + addl %eax,%ecx + movl 4(%esi),%eax + adcl %edx,%ebp + movl 24(%edi),%edx + adcl $0,%ebx + # mul a[1]*b[6] + mull %edx + addl %eax,%ecx + movl (%esi),%eax + adcl %edx,%ebp + movl 28(%edi),%edx + adcl $0,%ebx + # mul a[0]*b[7] + mull %edx + addl %eax,%ecx + movl 20(%esp),%eax + adcl %edx,%ebp + movl 4(%edi),%edx + adcl $0,%ebx + movl %ecx,28(%eax) + movl 28(%esi),%eax + # saved r[7] + # ################## Calculate word 8 + xorl %ecx,%ecx + # mul a[7]*b[1] + mull %edx + addl %eax,%ebp + movl 24(%esi),%eax + adcl %edx,%ebx + movl 8(%edi),%edx + adcl $0,%ecx + # mul a[6]*b[2] + mull %edx + addl %eax,%ebp + movl 20(%esi),%eax + adcl %edx,%ebx + movl 12(%edi),%edx + adcl $0,%ecx + # mul a[5]*b[3] + mull %edx + addl %eax,%ebp + movl 16(%esi),%eax + adcl %edx,%ebx + movl 16(%edi),%edx + adcl $0,%ecx + # mul a[4]*b[4] + mull %edx + addl %eax,%ebp + movl 12(%esi),%eax + adcl %edx,%ebx + movl 20(%edi),%edx + adcl $0,%ecx + # mul a[3]*b[5] + mull %edx + addl %eax,%ebp + movl 8(%esi),%eax + adcl %edx,%ebx + movl 24(%edi),%edx + adcl $0,%ecx + # mul a[2]*b[6] + mull %edx + addl %eax,%ebp + movl 4(%esi),%eax + adcl %edx,%ebx + movl 28(%edi),%edx + adcl $0,%ecx + # mul a[1]*b[7] + mull %edx + addl %eax,%ebp + movl 20(%esp),%eax + adcl %edx,%ebx + movl 8(%edi),%edx + adcl $0,%ecx + movl %ebp,32(%eax) + movl 28(%esi),%eax + # saved r[8] + # ################## Calculate word 9 + xorl %ebp,%ebp + # mul a[7]*b[2] + mull %edx + addl %eax,%ebx + movl 24(%esi),%eax + adcl %edx,%ecx + movl 12(%edi),%edx + adcl $0,%ebp + # mul a[6]*b[3] + mull %edx + addl %eax,%ebx + movl 20(%esi),%eax + adcl %edx,%ecx + movl 16(%edi),%edx + adcl $0,%ebp + # mul a[5]*b[4] + mull %edx + addl %eax,%ebx + movl 16(%esi),%eax + adcl %edx,%ecx + movl 20(%edi),%edx + adcl $0,%ebp + # mul a[4]*b[5] + mull %edx + addl %eax,%ebx + movl 12(%esi),%eax + adcl %edx,%ecx + movl 24(%edi),%edx + adcl $0,%ebp + # mul a[3]*b[6] + mull %edx + addl %eax,%ebx + movl 8(%esi),%eax + adcl %edx,%ecx + movl 28(%edi),%edx + adcl $0,%ebp + # mul a[2]*b[7] + mull %edx + addl %eax,%ebx + movl 20(%esp),%eax + adcl %edx,%ecx + movl 12(%edi),%edx + adcl $0,%ebp + movl %ebx,36(%eax) + movl 28(%esi),%eax + # saved r[9] + # ################## Calculate word 10 + xorl %ebx,%ebx + # mul a[7]*b[3] + mull %edx + addl %eax,%ecx + movl 24(%esi),%eax + adcl %edx,%ebp + movl 16(%edi),%edx + adcl $0,%ebx + # mul a[6]*b[4] + mull %edx + addl %eax,%ecx + movl 20(%esi),%eax + adcl %edx,%ebp + movl 20(%edi),%edx + adcl $0,%ebx + # mul a[5]*b[5] + mull %edx + addl %eax,%ecx + movl 16(%esi),%eax + adcl %edx,%ebp + movl 24(%edi),%edx + adcl $0,%ebx + # mul a[4]*b[6] + mull %edx + addl %eax,%ecx + movl 12(%esi),%eax + adcl %edx,%ebp + movl 28(%edi),%edx + adcl $0,%ebx + # mul a[3]*b[7] + mull %edx + addl %eax,%ecx + movl 20(%esp),%eax + adcl %edx,%ebp + movl 16(%edi),%edx + adcl $0,%ebx + movl %ecx,40(%eax) + movl 28(%esi),%eax + # saved r[10] + # ################## Calculate word 11 + xorl %ecx,%ecx + # mul a[7]*b[4] + mull %edx + addl %eax,%ebp + movl 24(%esi),%eax + adcl %edx,%ebx + movl 20(%edi),%edx + adcl $0,%ecx + # mul a[6]*b[5] + mull %edx + addl %eax,%ebp + movl 20(%esi),%eax + adcl %edx,%ebx + movl 24(%edi),%edx + adcl $0,%ecx + # mul a[5]*b[6] + mull %edx + addl %eax,%ebp + movl 16(%esi),%eax + adcl %edx,%ebx + movl 28(%edi),%edx + adcl $0,%ecx + # mul a[4]*b[7] + mull %edx + addl %eax,%ebp + movl 20(%esp),%eax + adcl %edx,%ebx + movl 20(%edi),%edx + adcl $0,%ecx + movl %ebp,44(%eax) + movl 28(%esi),%eax + # saved r[11] + # ################## Calculate word 12 + xorl %ebp,%ebp + # mul a[7]*b[5] + mull %edx + addl %eax,%ebx + movl 24(%esi),%eax + adcl %edx,%ecx + movl 24(%edi),%edx + adcl $0,%ebp + # mul a[6]*b[6] + mull %edx + addl %eax,%ebx + movl 20(%esi),%eax + adcl %edx,%ecx + movl 28(%edi),%edx + adcl $0,%ebp + # mul a[5]*b[7] + mull %edx + addl %eax,%ebx + movl 20(%esp),%eax + adcl %edx,%ecx + movl 24(%edi),%edx + adcl $0,%ebp + movl %ebx,48(%eax) + movl 28(%esi),%eax + # saved r[12] + # ################## Calculate word 13 + xorl %ebx,%ebx + # mul a[7]*b[6] + mull %edx + addl %eax,%ecx + movl 24(%esi),%eax + adcl %edx,%ebp + movl 28(%edi),%edx + adcl $0,%ebx + # mul a[6]*b[7] + mull %edx + addl %eax,%ecx + movl 20(%esp),%eax + adcl %edx,%ebp + movl 28(%edi),%edx + adcl $0,%ebx + movl %ecx,52(%eax) + movl 28(%esi),%eax + # saved r[13] + # ################## Calculate word 14 + xorl %ecx,%ecx + # mul a[7]*b[7] + mull %edx + addl %eax,%ebp + movl 20(%esp),%eax + adcl %edx,%ebx + adcl $0,%ecx + movl %ebp,56(%eax) + # saved r[14] + # save r[15] + movl %ebx,60(%eax) + popl %ebx + popl %ebp + popl %edi + popl %esi + ret +.globl _bn_mul_comba4 +.align 4 +_bn_mul_comba4: +L_bn_mul_comba4_begin: + pushl %esi + movl 12(%esp),%esi + pushl %edi + movl 20(%esp),%edi + pushl %ebp + pushl %ebx + xorl %ebx,%ebx + movl (%esi),%eax + xorl %ecx,%ecx + movl (%edi),%edx + # ################## Calculate word 0 + xorl %ebp,%ebp + # mul a[0]*b[0] + mull %edx + addl %eax,%ebx + movl 20(%esp),%eax + adcl %edx,%ecx + movl (%edi),%edx + adcl $0,%ebp + movl %ebx,(%eax) + movl 4(%esi),%eax + # saved r[0] + # ################## Calculate word 1 + xorl %ebx,%ebx + # mul a[1]*b[0] + mull %edx + addl %eax,%ecx + movl (%esi),%eax + adcl %edx,%ebp + movl 4(%edi),%edx + adcl $0,%ebx + # mul a[0]*b[1] + mull %edx + addl %eax,%ecx + movl 20(%esp),%eax + adcl %edx,%ebp + movl (%edi),%edx + adcl $0,%ebx + movl %ecx,4(%eax) + movl 8(%esi),%eax + # saved r[1] + # ################## Calculate word 2 + xorl %ecx,%ecx + # mul a[2]*b[0] + mull %edx + addl %eax,%ebp + movl 4(%esi),%eax + adcl %edx,%ebx + movl 4(%edi),%edx + adcl $0,%ecx + # mul a[1]*b[1] + mull %edx + addl %eax,%ebp + movl (%esi),%eax + adcl %edx,%ebx + movl 8(%edi),%edx + adcl $0,%ecx + # mul a[0]*b[2] + mull %edx + addl %eax,%ebp + movl 20(%esp),%eax + adcl %edx,%ebx + movl (%edi),%edx + adcl $0,%ecx + movl %ebp,8(%eax) + movl 12(%esi),%eax + # saved r[2] + # ################## Calculate word 3 + xorl %ebp,%ebp + # mul a[3]*b[0] + mull %edx + addl %eax,%ebx + movl 8(%esi),%eax + adcl %edx,%ecx + movl 4(%edi),%edx + adcl $0,%ebp + # mul a[2]*b[1] + mull %edx + addl %eax,%ebx + movl 4(%esi),%eax + adcl %edx,%ecx + movl 8(%edi),%edx + adcl $0,%ebp + # mul a[1]*b[2] + mull %edx + addl %eax,%ebx + movl (%esi),%eax + adcl %edx,%ecx + movl 12(%edi),%edx + adcl $0,%ebp + # mul a[0]*b[3] + mull %edx + addl %eax,%ebx + movl 20(%esp),%eax + adcl %edx,%ecx + movl 4(%edi),%edx + adcl $0,%ebp + movl %ebx,12(%eax) + movl 12(%esi),%eax + # saved r[3] + # ################## Calculate word 4 + xorl %ebx,%ebx + # mul a[3]*b[1] + mull %edx + addl %eax,%ecx + movl 8(%esi),%eax + adcl %edx,%ebp + movl 8(%edi),%edx + adcl $0,%ebx + # mul a[2]*b[2] + mull %edx + addl %eax,%ecx + movl 4(%esi),%eax + adcl %edx,%ebp + movl 12(%edi),%edx + adcl $0,%ebx + # mul a[1]*b[3] + mull %edx + addl %eax,%ecx + movl 20(%esp),%eax + adcl %edx,%ebp + movl 8(%edi),%edx + adcl $0,%ebx + movl %ecx,16(%eax) + movl 12(%esi),%eax + # saved r[4] + # ################## Calculate word 5 + xorl %ecx,%ecx + # mul a[3]*b[2] + mull %edx + addl %eax,%ebp + movl 8(%esi),%eax + adcl %edx,%ebx + movl 12(%edi),%edx + adcl $0,%ecx + # mul a[2]*b[3] + mull %edx + addl %eax,%ebp + movl 20(%esp),%eax + adcl %edx,%ebx + movl 12(%edi),%edx + adcl $0,%ecx + movl %ebp,20(%eax) + movl 12(%esi),%eax + # saved r[5] + # ################## Calculate word 6 + xorl %ebp,%ebp + # mul a[3]*b[3] + mull %edx + addl %eax,%ebx + movl 20(%esp),%eax + adcl %edx,%ecx + adcl $0,%ebp + movl %ebx,24(%eax) + # saved r[6] + # save r[7] + movl %ecx,28(%eax) + popl %ebx + popl %ebp + popl %edi + popl %esi + ret +.globl _bn_sqr_comba8 +.align 4 +_bn_sqr_comba8: +L_bn_sqr_comba8_begin: + pushl %esi + pushl %edi + pushl %ebp + pushl %ebx + movl 20(%esp),%edi + movl 24(%esp),%esi + xorl %ebx,%ebx + xorl %ecx,%ecx + movl (%esi),%eax + # ############### Calculate word 0 + xorl %ebp,%ebp + # sqr a[0]*a[0] + mull %eax + addl %eax,%ebx + adcl %edx,%ecx + movl (%esi),%edx + adcl $0,%ebp + movl %ebx,(%edi) + movl 4(%esi),%eax + # saved r[0] + # ############### Calculate word 1 + xorl %ebx,%ebx + # sqr a[1]*a[0] + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ebx + addl %eax,%ecx + adcl %edx,%ebp + movl 8(%esi),%eax + adcl $0,%ebx + movl %ecx,4(%edi) + movl (%esi),%edx + # saved r[1] + # ############### Calculate word 2 + xorl %ecx,%ecx + # sqr a[2]*a[0] + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ecx + addl %eax,%ebp + adcl %edx,%ebx + movl 4(%esi),%eax + adcl $0,%ecx + # sqr a[1]*a[1] + mull %eax + addl %eax,%ebp + adcl %edx,%ebx + movl (%esi),%edx + adcl $0,%ecx + movl %ebp,8(%edi) + movl 12(%esi),%eax + # saved r[2] + # ############### Calculate word 3 + xorl %ebp,%ebp + # sqr a[3]*a[0] + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ebp + addl %eax,%ebx + adcl %edx,%ecx + movl 8(%esi),%eax + adcl $0,%ebp + movl 4(%esi),%edx + # sqr a[2]*a[1] + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ebp + addl %eax,%ebx + adcl %edx,%ecx + movl 16(%esi),%eax + adcl $0,%ebp + movl %ebx,12(%edi) + movl (%esi),%edx + # saved r[3] + # ############### Calculate word 4 + xorl %ebx,%ebx + # sqr a[4]*a[0] + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ebx + addl %eax,%ecx + adcl %edx,%ebp + movl 12(%esi),%eax + adcl $0,%ebx + movl 4(%esi),%edx + # sqr a[3]*a[1] + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ebx + addl %eax,%ecx + adcl %edx,%ebp + movl 8(%esi),%eax + adcl $0,%ebx + # sqr a[2]*a[2] + mull %eax + addl %eax,%ecx + adcl %edx,%ebp + movl (%esi),%edx + adcl $0,%ebx + movl %ecx,16(%edi) + movl 20(%esi),%eax + # saved r[4] + # ############### Calculate word 5 + xorl %ecx,%ecx + # sqr a[5]*a[0] + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ecx + addl %eax,%ebp + adcl %edx,%ebx + movl 16(%esi),%eax + adcl $0,%ecx + movl 4(%esi),%edx + # sqr a[4]*a[1] + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ecx + addl %eax,%ebp + adcl %edx,%ebx + movl 12(%esi),%eax + adcl $0,%ecx + movl 8(%esi),%edx + # sqr a[3]*a[2] + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ecx + addl %eax,%ebp + adcl %edx,%ebx + movl 24(%esi),%eax + adcl $0,%ecx + movl %ebp,20(%edi) + movl (%esi),%edx + # saved r[5] + # ############### Calculate word 6 + xorl %ebp,%ebp + # sqr a[6]*a[0] + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ebp + addl %eax,%ebx + adcl %edx,%ecx + movl 20(%esi),%eax + adcl $0,%ebp + movl 4(%esi),%edx + # sqr a[5]*a[1] + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ebp + addl %eax,%ebx + adcl %edx,%ecx + movl 16(%esi),%eax + adcl $0,%ebp + movl 8(%esi),%edx + # sqr a[4]*a[2] + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ebp + addl %eax,%ebx + adcl %edx,%ecx + movl 12(%esi),%eax + adcl $0,%ebp + # sqr a[3]*a[3] + mull %eax + addl %eax,%ebx + adcl %edx,%ecx + movl (%esi),%edx + adcl $0,%ebp + movl %ebx,24(%edi) + movl 28(%esi),%eax + # saved r[6] + # ############### Calculate word 7 + xorl %ebx,%ebx + # sqr a[7]*a[0] + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ebx + addl %eax,%ecx + adcl %edx,%ebp + movl 24(%esi),%eax + adcl $0,%ebx + movl 4(%esi),%edx + # sqr a[6]*a[1] + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ebx + addl %eax,%ecx + adcl %edx,%ebp + movl 20(%esi),%eax + adcl $0,%ebx + movl 8(%esi),%edx + # sqr a[5]*a[2] + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ebx + addl %eax,%ecx + adcl %edx,%ebp + movl 16(%esi),%eax + adcl $0,%ebx + movl 12(%esi),%edx + # sqr a[4]*a[3] + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ebx + addl %eax,%ecx + adcl %edx,%ebp + movl 28(%esi),%eax + adcl $0,%ebx + movl %ecx,28(%edi) + movl 4(%esi),%edx + # saved r[7] + # ############### Calculate word 8 + xorl %ecx,%ecx + # sqr a[7]*a[1] + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ecx + addl %eax,%ebp + adcl %edx,%ebx + movl 24(%esi),%eax + adcl $0,%ecx + movl 8(%esi),%edx + # sqr a[6]*a[2] + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ecx + addl %eax,%ebp + adcl %edx,%ebx + movl 20(%esi),%eax + adcl $0,%ecx + movl 12(%esi),%edx + # sqr a[5]*a[3] + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ecx + addl %eax,%ebp + adcl %edx,%ebx + movl 16(%esi),%eax + adcl $0,%ecx + # sqr a[4]*a[4] + mull %eax + addl %eax,%ebp + adcl %edx,%ebx + movl 8(%esi),%edx + adcl $0,%ecx + movl %ebp,32(%edi) + movl 28(%esi),%eax + # saved r[8] + # ############### Calculate word 9 + xorl %ebp,%ebp + # sqr a[7]*a[2] + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ebp + addl %eax,%ebx + adcl %edx,%ecx + movl 24(%esi),%eax + adcl $0,%ebp + movl 12(%esi),%edx + # sqr a[6]*a[3] + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ebp + addl %eax,%ebx + adcl %edx,%ecx + movl 20(%esi),%eax + adcl $0,%ebp + movl 16(%esi),%edx + # sqr a[5]*a[4] + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ebp + addl %eax,%ebx + adcl %edx,%ecx + movl 28(%esi),%eax + adcl $0,%ebp + movl %ebx,36(%edi) + movl 12(%esi),%edx + # saved r[9] + # ############### Calculate word 10 + xorl %ebx,%ebx + # sqr a[7]*a[3] + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ebx + addl %eax,%ecx + adcl %edx,%ebp + movl 24(%esi),%eax + adcl $0,%ebx + movl 16(%esi),%edx + # sqr a[6]*a[4] + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ebx + addl %eax,%ecx + adcl %edx,%ebp + movl 20(%esi),%eax + adcl $0,%ebx + # sqr a[5]*a[5] + mull %eax + addl %eax,%ecx + adcl %edx,%ebp + movl 16(%esi),%edx + adcl $0,%ebx + movl %ecx,40(%edi) + movl 28(%esi),%eax + # saved r[10] + # ############### Calculate word 11 + xorl %ecx,%ecx + # sqr a[7]*a[4] + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ecx + addl %eax,%ebp + adcl %edx,%ebx + movl 24(%esi),%eax + adcl $0,%ecx + movl 20(%esi),%edx + # sqr a[6]*a[5] + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ecx + addl %eax,%ebp + adcl %edx,%ebx + movl 28(%esi),%eax + adcl $0,%ecx + movl %ebp,44(%edi) + movl 20(%esi),%edx + # saved r[11] + # ############### Calculate word 12 + xorl %ebp,%ebp + # sqr a[7]*a[5] + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ebp + addl %eax,%ebx + adcl %edx,%ecx + movl 24(%esi),%eax + adcl $0,%ebp + # sqr a[6]*a[6] + mull %eax + addl %eax,%ebx + adcl %edx,%ecx + movl 24(%esi),%edx + adcl $0,%ebp + movl %ebx,48(%edi) + movl 28(%esi),%eax + # saved r[12] + # ############### Calculate word 13 + xorl %ebx,%ebx + # sqr a[7]*a[6] + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ebx + addl %eax,%ecx + adcl %edx,%ebp + movl 28(%esi),%eax + adcl $0,%ebx + movl %ecx,52(%edi) + # saved r[13] + # ############### Calculate word 14 + xorl %ecx,%ecx + # sqr a[7]*a[7] + mull %eax + addl %eax,%ebp + adcl %edx,%ebx + adcl $0,%ecx + movl %ebp,56(%edi) + # saved r[14] + movl %ebx,60(%edi) + popl %ebx + popl %ebp + popl %edi + popl %esi + ret +.globl _bn_sqr_comba4 +.align 4 +_bn_sqr_comba4: +L_bn_sqr_comba4_begin: + pushl %esi + pushl %edi + pushl %ebp + pushl %ebx + movl 20(%esp),%edi + movl 24(%esp),%esi + xorl %ebx,%ebx + xorl %ecx,%ecx + movl (%esi),%eax + # ############### Calculate word 0 + xorl %ebp,%ebp + # sqr a[0]*a[0] + mull %eax + addl %eax,%ebx + adcl %edx,%ecx + movl (%esi),%edx + adcl $0,%ebp + movl %ebx,(%edi) + movl 4(%esi),%eax + # saved r[0] + # ############### Calculate word 1 + xorl %ebx,%ebx + # sqr a[1]*a[0] + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ebx + addl %eax,%ecx + adcl %edx,%ebp + movl 8(%esi),%eax + adcl $0,%ebx + movl %ecx,4(%edi) + movl (%esi),%edx + # saved r[1] + # ############### Calculate word 2 + xorl %ecx,%ecx + # sqr a[2]*a[0] + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ecx + addl %eax,%ebp + adcl %edx,%ebx + movl 4(%esi),%eax + adcl $0,%ecx + # sqr a[1]*a[1] + mull %eax + addl %eax,%ebp + adcl %edx,%ebx + movl (%esi),%edx + adcl $0,%ecx + movl %ebp,8(%edi) + movl 12(%esi),%eax + # saved r[2] + # ############### Calculate word 3 + xorl %ebp,%ebp + # sqr a[3]*a[0] + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ebp + addl %eax,%ebx + adcl %edx,%ecx + movl 8(%esi),%eax + adcl $0,%ebp + movl 4(%esi),%edx + # sqr a[2]*a[1] + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ebp + addl %eax,%ebx + adcl %edx,%ecx + movl 12(%esi),%eax + adcl $0,%ebp + movl %ebx,12(%edi) + movl 4(%esi),%edx + # saved r[3] + # ############### Calculate word 4 + xorl %ebx,%ebx + # sqr a[3]*a[1] + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ebx + addl %eax,%ecx + adcl %edx,%ebp + movl 8(%esi),%eax + adcl $0,%ebx + # sqr a[2]*a[2] + mull %eax + addl %eax,%ecx + adcl %edx,%ebp + movl 8(%esi),%edx + adcl $0,%ebx + movl %ecx,16(%edi) + movl 12(%esi),%eax + # saved r[4] + # ############### Calculate word 5 + xorl %ecx,%ecx + # sqr a[3]*a[2] + mull %edx + addl %eax,%eax + adcl %edx,%edx + adcl $0,%ecx + addl %eax,%ebp + adcl %edx,%ebx + movl 12(%esi),%eax + adcl $0,%ecx + movl %ebp,20(%edi) + # saved r[5] + # ############### Calculate word 6 + xorl %ebp,%ebp + # sqr a[3]*a[3] + mull %eax + addl %eax,%ebx + adcl %edx,%ecx + adcl $0,%ebp + movl %ebx,24(%edi) + # saved r[6] + movl %ecx,28(%edi) + popl %ebx + popl %ebp + popl %edi + popl %esi + ret diff --git a/deps/openssl/asm_obsolete/x86-macosx-gas/bn/x86-gf2m.s b/deps/openssl/asm_obsolete/x86-macosx-gas/bn/x86-gf2m.s new file mode 100644 index 00000000000000..6aa546e3389c37 --- /dev/null +++ b/deps/openssl/asm_obsolete/x86-macosx-gas/bn/x86-gf2m.s @@ -0,0 +1,344 @@ +.file "../openssl/crypto/bn/asm/x86-gf2m.s" +.text +.align 4 +__mul_1x1_mmx: + subl $36,%esp + movl %eax,%ecx + leal (%eax,%eax,1),%edx + andl $1073741823,%ecx + leal (%edx,%edx,1),%ebp + movl $0,(%esp) + andl $2147483647,%edx + movd %eax,%mm2 + movd %ebx,%mm3 + movl %ecx,4(%esp) + xorl %edx,%ecx + pxor %mm5,%mm5 + pxor %mm4,%mm4 + movl %edx,8(%esp) + xorl %ebp,%edx + movl %ecx,12(%esp) + pcmpgtd %mm2,%mm5 + paddd %mm2,%mm2 + xorl %edx,%ecx + movl %ebp,16(%esp) + xorl %edx,%ebp + pand %mm3,%mm5 + pcmpgtd %mm2,%mm4 + movl %ecx,20(%esp) + xorl %ecx,%ebp + psllq $31,%mm5 + pand %mm3,%mm4 + movl %edx,24(%esp) + movl $7,%esi + movl %ebp,28(%esp) + movl %esi,%ebp + andl %ebx,%esi + shrl $3,%ebx + movl %ebp,%edi + psllq $30,%mm4 + andl %ebx,%edi + shrl $3,%ebx + movd (%esp,%esi,4),%mm0 + movl %ebp,%esi + andl %ebx,%esi + shrl $3,%ebx + movd (%esp,%edi,4),%mm2 + movl %ebp,%edi + psllq $3,%mm2 + andl %ebx,%edi + shrl $3,%ebx + pxor %mm2,%mm0 + movd (%esp,%esi,4),%mm1 + movl %ebp,%esi + psllq $6,%mm1 + andl %ebx,%esi + shrl $3,%ebx + pxor %mm1,%mm0 + movd (%esp,%edi,4),%mm2 + movl %ebp,%edi + psllq $9,%mm2 + andl %ebx,%edi + shrl $3,%ebx + pxor %mm2,%mm0 + movd (%esp,%esi,4),%mm1 + movl %ebp,%esi + psllq $12,%mm1 + andl %ebx,%esi + shrl $3,%ebx + pxor %mm1,%mm0 + movd (%esp,%edi,4),%mm2 + movl %ebp,%edi + psllq $15,%mm2 + andl %ebx,%edi + shrl $3,%ebx + pxor %mm2,%mm0 + movd (%esp,%esi,4),%mm1 + movl %ebp,%esi + psllq $18,%mm1 + andl %ebx,%esi + shrl $3,%ebx + pxor %mm1,%mm0 + movd (%esp,%edi,4),%mm2 + movl %ebp,%edi + psllq $21,%mm2 + andl %ebx,%edi + shrl $3,%ebx + pxor %mm2,%mm0 + movd (%esp,%esi,4),%mm1 + movl %ebp,%esi + psllq $24,%mm1 + andl %ebx,%esi + shrl $3,%ebx + pxor %mm1,%mm0 + movd (%esp,%edi,4),%mm2 + pxor %mm4,%mm0 + psllq $27,%mm2 + pxor %mm2,%mm0 + movd (%esp,%esi,4),%mm1 + pxor %mm5,%mm0 + psllq $30,%mm1 + addl $36,%esp + pxor %mm1,%mm0 + ret +.align 4 +__mul_1x1_ialu: + subl $36,%esp + movl %eax,%ecx + leal (%eax,%eax,1),%edx + leal (,%eax,4),%ebp + andl $1073741823,%ecx + leal (%eax,%eax,1),%edi + sarl $31,%eax + movl $0,(%esp) + andl $2147483647,%edx + movl %ecx,4(%esp) + xorl %edx,%ecx + movl %edx,8(%esp) + xorl %ebp,%edx + movl %ecx,12(%esp) + xorl %edx,%ecx + movl %ebp,16(%esp) + xorl %edx,%ebp + movl %ecx,20(%esp) + xorl %ecx,%ebp + sarl $31,%edi + andl %ebx,%eax + movl %edx,24(%esp) + andl %ebx,%edi + movl %ebp,28(%esp) + movl %eax,%edx + shll $31,%eax + movl %edi,%ecx + shrl $1,%edx + movl $7,%esi + shll $30,%edi + andl %ebx,%esi + shrl $2,%ecx + xorl %edi,%eax + shrl $3,%ebx + movl $7,%edi + andl %ebx,%edi + shrl $3,%ebx + xorl %ecx,%edx + xorl (%esp,%esi,4),%eax + movl $7,%esi + andl %ebx,%esi + shrl $3,%ebx + movl (%esp,%edi,4),%ebp + movl $7,%edi + movl %ebp,%ecx + shll $3,%ebp + andl %ebx,%edi + shrl $29,%ecx + xorl %ebp,%eax + shrl $3,%ebx + xorl %ecx,%edx + movl (%esp,%esi,4),%ecx + movl $7,%esi + movl %ecx,%ebp + shll $6,%ecx + andl %ebx,%esi + shrl $26,%ebp + xorl %ecx,%eax + shrl $3,%ebx + xorl %ebp,%edx + movl (%esp,%edi,4),%ebp + movl $7,%edi + movl %ebp,%ecx + shll $9,%ebp + andl %ebx,%edi + shrl $23,%ecx + xorl %ebp,%eax + shrl $3,%ebx + xorl %ecx,%edx + movl (%esp,%esi,4),%ecx + movl $7,%esi + movl %ecx,%ebp + shll $12,%ecx + andl %ebx,%esi + shrl $20,%ebp + xorl %ecx,%eax + shrl $3,%ebx + xorl %ebp,%edx + movl (%esp,%edi,4),%ebp + movl $7,%edi + movl %ebp,%ecx + shll $15,%ebp + andl %ebx,%edi + shrl $17,%ecx + xorl %ebp,%eax + shrl $3,%ebx + xorl %ecx,%edx + movl (%esp,%esi,4),%ecx + movl $7,%esi + movl %ecx,%ebp + shll $18,%ecx + andl %ebx,%esi + shrl $14,%ebp + xorl %ecx,%eax + shrl $3,%ebx + xorl %ebp,%edx + movl (%esp,%edi,4),%ebp + movl $7,%edi + movl %ebp,%ecx + shll $21,%ebp + andl %ebx,%edi + shrl $11,%ecx + xorl %ebp,%eax + shrl $3,%ebx + xorl %ecx,%edx + movl (%esp,%esi,4),%ecx + movl $7,%esi + movl %ecx,%ebp + shll $24,%ecx + andl %ebx,%esi + shrl $8,%ebp + xorl %ecx,%eax + shrl $3,%ebx + xorl %ebp,%edx + movl (%esp,%edi,4),%ebp + movl %ebp,%ecx + shll $27,%ebp + movl (%esp,%esi,4),%edi + shrl $5,%ecx + movl %edi,%esi + xorl %ebp,%eax + shll $30,%edi + xorl %ecx,%edx + shrl $2,%esi + xorl %edi,%eax + xorl %esi,%edx + addl $36,%esp + ret +.globl _bn_GF2m_mul_2x2 +.align 4 +_bn_GF2m_mul_2x2: +L_bn_GF2m_mul_2x2_begin: + call L000PIC_me_up +L000PIC_me_up: + popl %edx + movl L_OPENSSL_ia32cap_P$non_lazy_ptr-L000PIC_me_up(%edx),%edx + movl (%edx),%eax + movl 4(%edx),%edx + testl $8388608,%eax + jz L001ialu + testl $16777216,%eax + jz L002mmx + testl $2,%edx + jz L002mmx + movups 8(%esp),%xmm0 + shufps $177,%xmm0,%xmm0 +.byte 102,15,58,68,192,1 + movl 4(%esp),%eax + movups %xmm0,(%eax) + ret +.align 4,0x90 +L002mmx: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 24(%esp),%eax + movl 32(%esp),%ebx + call __mul_1x1_mmx + movq %mm0,%mm7 + movl 28(%esp),%eax + movl 36(%esp),%ebx + call __mul_1x1_mmx + movq %mm0,%mm6 + movl 24(%esp),%eax + movl 32(%esp),%ebx + xorl 28(%esp),%eax + xorl 36(%esp),%ebx + call __mul_1x1_mmx + pxor %mm7,%mm0 + movl 20(%esp),%eax + pxor %mm6,%mm0 + movq %mm0,%mm2 + psllq $32,%mm0 + popl %edi + psrlq $32,%mm2 + popl %esi + pxor %mm6,%mm0 + popl %ebx + pxor %mm7,%mm2 + movq %mm0,(%eax) + popl %ebp + movq %mm2,8(%eax) + emms + ret +.align 4,0x90 +L001ialu: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + subl $20,%esp + movl 44(%esp),%eax + movl 52(%esp),%ebx + call __mul_1x1_ialu + movl %eax,8(%esp) + movl %edx,12(%esp) + movl 48(%esp),%eax + movl 56(%esp),%ebx + call __mul_1x1_ialu + movl %eax,(%esp) + movl %edx,4(%esp) + movl 44(%esp),%eax + movl 52(%esp),%ebx + xorl 48(%esp),%eax + xorl 56(%esp),%ebx + call __mul_1x1_ialu + movl 40(%esp),%ebp + movl (%esp),%ebx + movl 4(%esp),%ecx + movl 8(%esp),%edi + movl 12(%esp),%esi + xorl %edx,%eax + xorl %ecx,%edx + xorl %ebx,%eax + movl %ebx,(%ebp) + xorl %edi,%edx + movl %esi,12(%ebp) + xorl %esi,%eax + addl $20,%esp + xorl %esi,%edx + popl %edi + xorl %edx,%eax + popl %esi + movl %edx,8(%ebp) + popl %ebx + movl %eax,4(%ebp) + popl %ebp + ret +.byte 71,70,40,50,94,109,41,32,77,117,108,116,105,112,108,105 +.byte 99,97,116,105,111,110,32,102,111,114,32,120,56,54,44,32 +.byte 67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97 +.byte 112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103 +.byte 62,0 +.section __IMPORT,__pointers,non_lazy_symbol_pointers +L_OPENSSL_ia32cap_P$non_lazy_ptr: +.indirect_symbol _OPENSSL_ia32cap_P +.long 0 +.comm _OPENSSL_ia32cap_P,16,2 diff --git a/deps/openssl/asm_obsolete/x86-macosx-gas/bn/x86-mont.s b/deps/openssl/asm_obsolete/x86-macosx-gas/bn/x86-mont.s new file mode 100644 index 00000000000000..b544a46c6758af --- /dev/null +++ b/deps/openssl/asm_obsolete/x86-macosx-gas/bn/x86-mont.s @@ -0,0 +1,461 @@ +.file "../openssl/crypto/bn/asm/x86-mont.s" +.text +.globl _bn_mul_mont +.align 4 +_bn_mul_mont: +L_bn_mul_mont_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + xorl %eax,%eax + movl 40(%esp),%edi + cmpl $4,%edi + jl L000just_leave + leal 20(%esp),%esi + leal 24(%esp),%edx + movl %esp,%ebp + addl $2,%edi + negl %edi + leal -32(%esp,%edi,4),%esp + negl %edi + movl %esp,%eax + subl %edx,%eax + andl $2047,%eax + subl %eax,%esp + xorl %esp,%edx + andl $2048,%edx + xorl $2048,%edx + subl %edx,%esp + andl $-64,%esp + movl (%esi),%eax + movl 4(%esi),%ebx + movl 8(%esi),%ecx + movl 12(%esi),%edx + movl 16(%esi),%esi + movl (%esi),%esi + movl %eax,4(%esp) + movl %ebx,8(%esp) + movl %ecx,12(%esp) + movl %edx,16(%esp) + movl %esi,20(%esp) + leal -3(%edi),%ebx + movl %ebp,24(%esp) + call L001PIC_me_up +L001PIC_me_up: + popl %eax + movl L_OPENSSL_ia32cap_P$non_lazy_ptr-L001PIC_me_up(%eax),%eax + btl $26,(%eax) + jnc L002non_sse2 + movl $-1,%eax + movd %eax,%mm7 + movl 8(%esp),%esi + movl 12(%esp),%edi + movl 16(%esp),%ebp + xorl %edx,%edx + xorl %ecx,%ecx + movd (%edi),%mm4 + movd (%esi),%mm5 + movd (%ebp),%mm3 + pmuludq %mm4,%mm5 + movq %mm5,%mm2 + movq %mm5,%mm0 + pand %mm7,%mm0 + pmuludq 20(%esp),%mm5 + pmuludq %mm5,%mm3 + paddq %mm0,%mm3 + movd 4(%ebp),%mm1 + movd 4(%esi),%mm0 + psrlq $32,%mm2 + psrlq $32,%mm3 + incl %ecx +.align 4,0x90 +L0031st: + pmuludq %mm4,%mm0 + pmuludq %mm5,%mm1 + paddq %mm0,%mm2 + paddq %mm1,%mm3 + movq %mm2,%mm0 + pand %mm7,%mm0 + movd 4(%ebp,%ecx,4),%mm1 + paddq %mm0,%mm3 + movd 4(%esi,%ecx,4),%mm0 + psrlq $32,%mm2 + movd %mm3,28(%esp,%ecx,4) + psrlq $32,%mm3 + leal 1(%ecx),%ecx + cmpl %ebx,%ecx + jl L0031st + pmuludq %mm4,%mm0 + pmuludq %mm5,%mm1 + paddq %mm0,%mm2 + paddq %mm1,%mm3 + movq %mm2,%mm0 + pand %mm7,%mm0 + paddq %mm0,%mm3 + movd %mm3,28(%esp,%ecx,4) + psrlq $32,%mm2 + psrlq $32,%mm3 + paddq %mm2,%mm3 + movq %mm3,32(%esp,%ebx,4) + incl %edx +L004outer: + xorl %ecx,%ecx + movd (%edi,%edx,4),%mm4 + movd (%esi),%mm5 + movd 32(%esp),%mm6 + movd (%ebp),%mm3 + pmuludq %mm4,%mm5 + paddq %mm6,%mm5 + movq %mm5,%mm0 + movq %mm5,%mm2 + pand %mm7,%mm0 + pmuludq 20(%esp),%mm5 + pmuludq %mm5,%mm3 + paddq %mm0,%mm3 + movd 36(%esp),%mm6 + movd 4(%ebp),%mm1 + movd 4(%esi),%mm0 + psrlq $32,%mm2 + psrlq $32,%mm3 + paddq %mm6,%mm2 + incl %ecx + decl %ebx +L005inner: + pmuludq %mm4,%mm0 + pmuludq %mm5,%mm1 + paddq %mm0,%mm2 + paddq %mm1,%mm3 + movq %mm2,%mm0 + movd 36(%esp,%ecx,4),%mm6 + pand %mm7,%mm0 + movd 4(%ebp,%ecx,4),%mm1 + paddq %mm0,%mm3 + movd 4(%esi,%ecx,4),%mm0 + psrlq $32,%mm2 + movd %mm3,28(%esp,%ecx,4) + psrlq $32,%mm3 + paddq %mm6,%mm2 + decl %ebx + leal 1(%ecx),%ecx + jnz L005inner + movl %ecx,%ebx + pmuludq %mm4,%mm0 + pmuludq %mm5,%mm1 + paddq %mm0,%mm2 + paddq %mm1,%mm3 + movq %mm2,%mm0 + pand %mm7,%mm0 + paddq %mm0,%mm3 + movd %mm3,28(%esp,%ecx,4) + psrlq $32,%mm2 + psrlq $32,%mm3 + movd 36(%esp,%ebx,4),%mm6 + paddq %mm2,%mm3 + paddq %mm6,%mm3 + movq %mm3,32(%esp,%ebx,4) + leal 1(%edx),%edx + cmpl %ebx,%edx + jle L004outer + emms + jmp L006common_tail +.align 4,0x90 +L002non_sse2: + movl 8(%esp),%esi + leal 1(%ebx),%ebp + movl 12(%esp),%edi + xorl %ecx,%ecx + movl %esi,%edx + andl $1,%ebp + subl %edi,%edx + leal 4(%edi,%ebx,4),%eax + orl %edx,%ebp + movl (%edi),%edi + jz L007bn_sqr_mont + movl %eax,28(%esp) + movl (%esi),%eax + xorl %edx,%edx +.align 4,0x90 +L008mull: + movl %edx,%ebp + mull %edi + addl %eax,%ebp + leal 1(%ecx),%ecx + adcl $0,%edx + movl (%esi,%ecx,4),%eax + cmpl %ebx,%ecx + movl %ebp,28(%esp,%ecx,4) + jl L008mull + movl %edx,%ebp + mull %edi + movl 20(%esp),%edi + addl %ebp,%eax + movl 16(%esp),%esi + adcl $0,%edx + imull 32(%esp),%edi + movl %eax,32(%esp,%ebx,4) + xorl %ecx,%ecx + movl %edx,36(%esp,%ebx,4) + movl %ecx,40(%esp,%ebx,4) + movl (%esi),%eax + mull %edi + addl 32(%esp),%eax + movl 4(%esi),%eax + adcl $0,%edx + incl %ecx + jmp L0092ndmadd +.align 4,0x90 +L0101stmadd: + movl %edx,%ebp + mull %edi + addl 32(%esp,%ecx,4),%ebp + leal 1(%ecx),%ecx + adcl $0,%edx + addl %eax,%ebp + movl (%esi,%ecx,4),%eax + adcl $0,%edx + cmpl %ebx,%ecx + movl %ebp,28(%esp,%ecx,4) + jl L0101stmadd + movl %edx,%ebp + mull %edi + addl 32(%esp,%ebx,4),%eax + movl 20(%esp),%edi + adcl $0,%edx + movl 16(%esp),%esi + addl %eax,%ebp + adcl $0,%edx + imull 32(%esp),%edi + xorl %ecx,%ecx + addl 36(%esp,%ebx,4),%edx + movl %ebp,32(%esp,%ebx,4) + adcl $0,%ecx + movl (%esi),%eax + movl %edx,36(%esp,%ebx,4) + movl %ecx,40(%esp,%ebx,4) + mull %edi + addl 32(%esp),%eax + movl 4(%esi),%eax + adcl $0,%edx + movl $1,%ecx +.align 4,0x90 +L0092ndmadd: + movl %edx,%ebp + mull %edi + addl 32(%esp,%ecx,4),%ebp + leal 1(%ecx),%ecx + adcl $0,%edx + addl %eax,%ebp + movl (%esi,%ecx,4),%eax + adcl $0,%edx + cmpl %ebx,%ecx + movl %ebp,24(%esp,%ecx,4) + jl L0092ndmadd + movl %edx,%ebp + mull %edi + addl 32(%esp,%ebx,4),%ebp + adcl $0,%edx + addl %eax,%ebp + adcl $0,%edx + movl %ebp,28(%esp,%ebx,4) + xorl %eax,%eax + movl 12(%esp),%ecx + addl 36(%esp,%ebx,4),%edx + adcl 40(%esp,%ebx,4),%eax + leal 4(%ecx),%ecx + movl %edx,32(%esp,%ebx,4) + cmpl 28(%esp),%ecx + movl %eax,36(%esp,%ebx,4) + je L006common_tail + movl (%ecx),%edi + movl 8(%esp),%esi + movl %ecx,12(%esp) + xorl %ecx,%ecx + xorl %edx,%edx + movl (%esi),%eax + jmp L0101stmadd +.align 4,0x90 +L007bn_sqr_mont: + movl %ebx,(%esp) + movl %ecx,12(%esp) + movl %edi,%eax + mull %edi + movl %eax,32(%esp) + movl %edx,%ebx + shrl $1,%edx + andl $1,%ebx + incl %ecx +.align 4,0x90 +L011sqr: + movl (%esi,%ecx,4),%eax + movl %edx,%ebp + mull %edi + addl %ebp,%eax + leal 1(%ecx),%ecx + adcl $0,%edx + leal (%ebx,%eax,2),%ebp + shrl $31,%eax + cmpl (%esp),%ecx + movl %eax,%ebx + movl %ebp,28(%esp,%ecx,4) + jl L011sqr + movl (%esi,%ecx,4),%eax + movl %edx,%ebp + mull %edi + addl %ebp,%eax + movl 20(%esp),%edi + adcl $0,%edx + movl 16(%esp),%esi + leal (%ebx,%eax,2),%ebp + imull 32(%esp),%edi + shrl $31,%eax + movl %ebp,32(%esp,%ecx,4) + leal (%eax,%edx,2),%ebp + movl (%esi),%eax + shrl $31,%edx + movl %ebp,36(%esp,%ecx,4) + movl %edx,40(%esp,%ecx,4) + mull %edi + addl 32(%esp),%eax + movl %ecx,%ebx + adcl $0,%edx + movl 4(%esi),%eax + movl $1,%ecx +.align 4,0x90 +L0123rdmadd: + movl %edx,%ebp + mull %edi + addl 32(%esp,%ecx,4),%ebp + adcl $0,%edx + addl %eax,%ebp + movl 4(%esi,%ecx,4),%eax + adcl $0,%edx + movl %ebp,28(%esp,%ecx,4) + movl %edx,%ebp + mull %edi + addl 36(%esp,%ecx,4),%ebp + leal 2(%ecx),%ecx + adcl $0,%edx + addl %eax,%ebp + movl (%esi,%ecx,4),%eax + adcl $0,%edx + cmpl %ebx,%ecx + movl %ebp,24(%esp,%ecx,4) + jl L0123rdmadd + movl %edx,%ebp + mull %edi + addl 32(%esp,%ebx,4),%ebp + adcl $0,%edx + addl %eax,%ebp + adcl $0,%edx + movl %ebp,28(%esp,%ebx,4) + movl 12(%esp),%ecx + xorl %eax,%eax + movl 8(%esp),%esi + addl 36(%esp,%ebx,4),%edx + adcl 40(%esp,%ebx,4),%eax + movl %edx,32(%esp,%ebx,4) + cmpl %ebx,%ecx + movl %eax,36(%esp,%ebx,4) + je L006common_tail + movl 4(%esi,%ecx,4),%edi + leal 1(%ecx),%ecx + movl %edi,%eax + movl %ecx,12(%esp) + mull %edi + addl 32(%esp,%ecx,4),%eax + adcl $0,%edx + movl %eax,32(%esp,%ecx,4) + xorl %ebp,%ebp + cmpl %ebx,%ecx + leal 1(%ecx),%ecx + je L013sqrlast + movl %edx,%ebx + shrl $1,%edx + andl $1,%ebx +.align 4,0x90 +L014sqradd: + movl (%esi,%ecx,4),%eax + movl %edx,%ebp + mull %edi + addl %ebp,%eax + leal (%eax,%eax,1),%ebp + adcl $0,%edx + shrl $31,%eax + addl 32(%esp,%ecx,4),%ebp + leal 1(%ecx),%ecx + adcl $0,%eax + addl %ebx,%ebp + adcl $0,%eax + cmpl (%esp),%ecx + movl %ebp,28(%esp,%ecx,4) + movl %eax,%ebx + jle L014sqradd + movl %edx,%ebp + addl %edx,%edx + shrl $31,%ebp + addl %ebx,%edx + adcl $0,%ebp +L013sqrlast: + movl 20(%esp),%edi + movl 16(%esp),%esi + imull 32(%esp),%edi + addl 32(%esp,%ecx,4),%edx + movl (%esi),%eax + adcl $0,%ebp + movl %edx,32(%esp,%ecx,4) + movl %ebp,36(%esp,%ecx,4) + mull %edi + addl 32(%esp),%eax + leal -1(%ecx),%ebx + adcl $0,%edx + movl $1,%ecx + movl 4(%esi),%eax + jmp L0123rdmadd +.align 4,0x90 +L006common_tail: + movl 16(%esp),%ebp + movl 4(%esp),%edi + leal 32(%esp),%esi + movl (%esi),%eax + movl %ebx,%ecx + xorl %edx,%edx +.align 4,0x90 +L015sub: + sbbl (%ebp,%edx,4),%eax + movl %eax,(%edi,%edx,4) + decl %ecx + movl 4(%esi,%edx,4),%eax + leal 1(%edx),%edx + jge L015sub + sbbl $0,%eax + andl %eax,%esi + notl %eax + movl %edi,%ebp + andl %eax,%ebp + orl %ebp,%esi +.align 4,0x90 +L016copy: + movl (%esi,%ebx,4),%eax + movl %eax,(%edi,%ebx,4) + movl %ecx,32(%esp,%ebx,4) + decl %ebx + jge L016copy + movl 24(%esp),%esp + movl $1,%eax +L000just_leave: + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.byte 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105 +.byte 112,108,105,99,97,116,105,111,110,32,102,111,114,32,120,56 +.byte 54,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121 +.byte 32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46 +.byte 111,114,103,62,0 +.section __IMPORT,__pointers,non_lazy_symbol_pointers +L_OPENSSL_ia32cap_P$non_lazy_ptr: +.indirect_symbol _OPENSSL_ia32cap_P +.long 0 +.comm _OPENSSL_ia32cap_P,16,2 diff --git a/deps/openssl/asm_obsolete/x86-macosx-gas/camellia/cmll-x86.s b/deps/openssl/asm_obsolete/x86-macosx-gas/camellia/cmll-x86.s new file mode 100644 index 00000000000000..2367cee780242b --- /dev/null +++ b/deps/openssl/asm_obsolete/x86-macosx-gas/camellia/cmll-x86.s @@ -0,0 +1,2353 @@ +.file "cmll-586.s" +.text +.globl _Camellia_EncryptBlock_Rounds +.align 4 +_Camellia_EncryptBlock_Rounds: +L_Camellia_EncryptBlock_Rounds_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 20(%esp),%eax + movl 24(%esp),%esi + movl 28(%esp),%edi + movl %esp,%ebx + subl $28,%esp + andl $-64,%esp + leal -127(%edi),%ecx + subl %esp,%ecx + negl %ecx + andl $960,%ecx + subl %ecx,%esp + addl $4,%esp + shll $6,%eax + leal (%edi,%eax,1),%eax + movl %ebx,20(%esp) + movl %eax,16(%esp) + call L000pic_point +L000pic_point: + popl %ebp + leal LCamellia_SBOX-L000pic_point(%ebp),%ebp + movl (%esi),%eax + movl 4(%esi),%ebx + movl 8(%esi),%ecx + bswap %eax + movl 12(%esi),%edx + bswap %ebx + bswap %ecx + bswap %edx + call __x86_Camellia_encrypt + movl 20(%esp),%esp + bswap %eax + movl 32(%esp),%esi + bswap %ebx + bswap %ecx + bswap %edx + movl %eax,(%esi) + movl %ebx,4(%esi) + movl %ecx,8(%esi) + movl %edx,12(%esi) + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.globl _Camellia_EncryptBlock +.align 4 +_Camellia_EncryptBlock: +L_Camellia_EncryptBlock_begin: + movl $128,%eax + subl 4(%esp),%eax + movl $3,%eax + adcl $0,%eax + movl %eax,4(%esp) + jmp L_Camellia_EncryptBlock_Rounds_begin +.globl _Camellia_encrypt +.align 4 +_Camellia_encrypt: +L_Camellia_encrypt_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 20(%esp),%esi + movl 28(%esp),%edi + movl %esp,%ebx + subl $28,%esp + andl $-64,%esp + movl 272(%edi),%eax + leal -127(%edi),%ecx + subl %esp,%ecx + negl %ecx + andl $960,%ecx + subl %ecx,%esp + addl $4,%esp + shll $6,%eax + leal (%edi,%eax,1),%eax + movl %ebx,20(%esp) + movl %eax,16(%esp) + call L001pic_point +L001pic_point: + popl %ebp + leal LCamellia_SBOX-L001pic_point(%ebp),%ebp + movl (%esi),%eax + movl 4(%esi),%ebx + movl 8(%esi),%ecx + bswap %eax + movl 12(%esi),%edx + bswap %ebx + bswap %ecx + bswap %edx + call __x86_Camellia_encrypt + movl 20(%esp),%esp + bswap %eax + movl 24(%esp),%esi + bswap %ebx + bswap %ecx + bswap %edx + movl %eax,(%esi) + movl %ebx,4(%esi) + movl %ecx,8(%esi) + movl %edx,12(%esi) + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.align 4 +__x86_Camellia_encrypt: + xorl (%edi),%eax + xorl 4(%edi),%ebx + xorl 8(%edi),%ecx + xorl 12(%edi),%edx + movl 16(%edi),%esi + movl %eax,4(%esp) + movl %ebx,8(%esp) + movl %ecx,12(%esp) + movl %edx,16(%esp) +.align 4,0x90 +L002loop: + xorl %esi,%eax + xorl 20(%edi),%ebx + movzbl %ah,%esi + movl 2052(%ebp,%esi,8),%edx + movzbl %al,%esi + xorl 4(%ebp,%esi,8),%edx + shrl $16,%eax + movzbl %bl,%esi + movl (%ebp,%esi,8),%ecx + movzbl %ah,%esi + xorl (%ebp,%esi,8),%edx + movzbl %bh,%esi + xorl 4(%ebp,%esi,8),%ecx + shrl $16,%ebx + movzbl %al,%eax + xorl 2048(%ebp,%eax,8),%edx + movzbl %bh,%esi + movl 16(%esp),%eax + xorl %edx,%ecx + rorl $8,%edx + xorl 2048(%ebp,%esi,8),%ecx + movzbl %bl,%esi + movl 12(%esp),%ebx + xorl %eax,%edx + xorl 2052(%ebp,%esi,8),%ecx + movl 24(%edi),%esi + xorl %ecx,%edx + movl %edx,16(%esp) + xorl %ebx,%ecx + movl %ecx,12(%esp) + xorl %esi,%ecx + xorl 28(%edi),%edx + movzbl %ch,%esi + movl 2052(%ebp,%esi,8),%ebx + movzbl %cl,%esi + xorl 4(%ebp,%esi,8),%ebx + shrl $16,%ecx + movzbl %dl,%esi + movl (%ebp,%esi,8),%eax + movzbl %ch,%esi + xorl (%ebp,%esi,8),%ebx + movzbl %dh,%esi + xorl 4(%ebp,%esi,8),%eax + shrl $16,%edx + movzbl %cl,%ecx + xorl 2048(%ebp,%ecx,8),%ebx + movzbl %dh,%esi + movl 8(%esp),%ecx + xorl %ebx,%eax + rorl $8,%ebx + xorl 2048(%ebp,%esi,8),%eax + movzbl %dl,%esi + movl 4(%esp),%edx + xorl %ecx,%ebx + xorl 2052(%ebp,%esi,8),%eax + movl 32(%edi),%esi + xorl %eax,%ebx + movl %ebx,8(%esp) + xorl %edx,%eax + movl %eax,4(%esp) + xorl %esi,%eax + xorl 36(%edi),%ebx + movzbl %ah,%esi + movl 2052(%ebp,%esi,8),%edx + movzbl %al,%esi + xorl 4(%ebp,%esi,8),%edx + shrl $16,%eax + movzbl %bl,%esi + movl (%ebp,%esi,8),%ecx + movzbl %ah,%esi + xorl (%ebp,%esi,8),%edx + movzbl %bh,%esi + xorl 4(%ebp,%esi,8),%ecx + shrl $16,%ebx + movzbl %al,%eax + xorl 2048(%ebp,%eax,8),%edx + movzbl %bh,%esi + movl 16(%esp),%eax + xorl %edx,%ecx + rorl $8,%edx + xorl 2048(%ebp,%esi,8),%ecx + movzbl %bl,%esi + movl 12(%esp),%ebx + xorl %eax,%edx + xorl 2052(%ebp,%esi,8),%ecx + movl 40(%edi),%esi + xorl %ecx,%edx + movl %edx,16(%esp) + xorl %ebx,%ecx + movl %ecx,12(%esp) + xorl %esi,%ecx + xorl 44(%edi),%edx + movzbl %ch,%esi + movl 2052(%ebp,%esi,8),%ebx + movzbl %cl,%esi + xorl 4(%ebp,%esi,8),%ebx + shrl $16,%ecx + movzbl %dl,%esi + movl (%ebp,%esi,8),%eax + movzbl %ch,%esi + xorl (%ebp,%esi,8),%ebx + movzbl %dh,%esi + xorl 4(%ebp,%esi,8),%eax + shrl $16,%edx + movzbl %cl,%ecx + xorl 2048(%ebp,%ecx,8),%ebx + movzbl %dh,%esi + movl 8(%esp),%ecx + xorl %ebx,%eax + rorl $8,%ebx + xorl 2048(%ebp,%esi,8),%eax + movzbl %dl,%esi + movl 4(%esp),%edx + xorl %ecx,%ebx + xorl 2052(%ebp,%esi,8),%eax + movl 48(%edi),%esi + xorl %eax,%ebx + movl %ebx,8(%esp) + xorl %edx,%eax + movl %eax,4(%esp) + xorl %esi,%eax + xorl 52(%edi),%ebx + movzbl %ah,%esi + movl 2052(%ebp,%esi,8),%edx + movzbl %al,%esi + xorl 4(%ebp,%esi,8),%edx + shrl $16,%eax + movzbl %bl,%esi + movl (%ebp,%esi,8),%ecx + movzbl %ah,%esi + xorl (%ebp,%esi,8),%edx + movzbl %bh,%esi + xorl 4(%ebp,%esi,8),%ecx + shrl $16,%ebx + movzbl %al,%eax + xorl 2048(%ebp,%eax,8),%edx + movzbl %bh,%esi + movl 16(%esp),%eax + xorl %edx,%ecx + rorl $8,%edx + xorl 2048(%ebp,%esi,8),%ecx + movzbl %bl,%esi + movl 12(%esp),%ebx + xorl %eax,%edx + xorl 2052(%ebp,%esi,8),%ecx + movl 56(%edi),%esi + xorl %ecx,%edx + movl %edx,16(%esp) + xorl %ebx,%ecx + movl %ecx,12(%esp) + xorl %esi,%ecx + xorl 60(%edi),%edx + movzbl %ch,%esi + movl 2052(%ebp,%esi,8),%ebx + movzbl %cl,%esi + xorl 4(%ebp,%esi,8),%ebx + shrl $16,%ecx + movzbl %dl,%esi + movl (%ebp,%esi,8),%eax + movzbl %ch,%esi + xorl (%ebp,%esi,8),%ebx + movzbl %dh,%esi + xorl 4(%ebp,%esi,8),%eax + shrl $16,%edx + movzbl %cl,%ecx + xorl 2048(%ebp,%ecx,8),%ebx + movzbl %dh,%esi + movl 8(%esp),%ecx + xorl %ebx,%eax + rorl $8,%ebx + xorl 2048(%ebp,%esi,8),%eax + movzbl %dl,%esi + movl 4(%esp),%edx + xorl %ecx,%ebx + xorl 2052(%ebp,%esi,8),%eax + movl 64(%edi),%esi + xorl %eax,%ebx + movl %ebx,8(%esp) + xorl %edx,%eax + movl %eax,4(%esp) + addl $64,%edi + cmpl 20(%esp),%edi + je L003done + andl %eax,%esi + movl 16(%esp),%edx + roll $1,%esi + movl %edx,%ecx + xorl %esi,%ebx + orl 12(%edi),%ecx + movl %ebx,8(%esp) + xorl 12(%esp),%ecx + movl 4(%edi),%esi + movl %ecx,12(%esp) + orl %ebx,%esi + andl 8(%edi),%ecx + xorl %esi,%eax + roll $1,%ecx + movl %eax,4(%esp) + xorl %ecx,%edx + movl 16(%edi),%esi + movl %edx,16(%esp) + jmp L002loop +.align 3,0x90 +L003done: + movl %eax,%ecx + movl %ebx,%edx + movl 12(%esp),%eax + movl 16(%esp),%ebx + xorl %esi,%eax + xorl 4(%edi),%ebx + xorl 8(%edi),%ecx + xorl 12(%edi),%edx + ret +.globl _Camellia_DecryptBlock_Rounds +.align 4 +_Camellia_DecryptBlock_Rounds: +L_Camellia_DecryptBlock_Rounds_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 20(%esp),%eax + movl 24(%esp),%esi + movl 28(%esp),%edi + movl %esp,%ebx + subl $28,%esp + andl $-64,%esp + leal -127(%edi),%ecx + subl %esp,%ecx + negl %ecx + andl $960,%ecx + subl %ecx,%esp + addl $4,%esp + shll $6,%eax + movl %edi,16(%esp) + leal (%edi,%eax,1),%edi + movl %ebx,20(%esp) + call L004pic_point +L004pic_point: + popl %ebp + leal LCamellia_SBOX-L004pic_point(%ebp),%ebp + movl (%esi),%eax + movl 4(%esi),%ebx + movl 8(%esi),%ecx + bswap %eax + movl 12(%esi),%edx + bswap %ebx + bswap %ecx + bswap %edx + call __x86_Camellia_decrypt + movl 20(%esp),%esp + bswap %eax + movl 32(%esp),%esi + bswap %ebx + bswap %ecx + bswap %edx + movl %eax,(%esi) + movl %ebx,4(%esi) + movl %ecx,8(%esi) + movl %edx,12(%esi) + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.globl _Camellia_DecryptBlock +.align 4 +_Camellia_DecryptBlock: +L_Camellia_DecryptBlock_begin: + movl $128,%eax + subl 4(%esp),%eax + movl $3,%eax + adcl $0,%eax + movl %eax,4(%esp) + jmp L_Camellia_DecryptBlock_Rounds_begin +.globl _Camellia_decrypt +.align 4 +_Camellia_decrypt: +L_Camellia_decrypt_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 20(%esp),%esi + movl 28(%esp),%edi + movl %esp,%ebx + subl $28,%esp + andl $-64,%esp + movl 272(%edi),%eax + leal -127(%edi),%ecx + subl %esp,%ecx + negl %ecx + andl $960,%ecx + subl %ecx,%esp + addl $4,%esp + shll $6,%eax + movl %edi,16(%esp) + leal (%edi,%eax,1),%edi + movl %ebx,20(%esp) + call L005pic_point +L005pic_point: + popl %ebp + leal LCamellia_SBOX-L005pic_point(%ebp),%ebp + movl (%esi),%eax + movl 4(%esi),%ebx + movl 8(%esi),%ecx + bswap %eax + movl 12(%esi),%edx + bswap %ebx + bswap %ecx + bswap %edx + call __x86_Camellia_decrypt + movl 20(%esp),%esp + bswap %eax + movl 24(%esp),%esi + bswap %ebx + bswap %ecx + bswap %edx + movl %eax,(%esi) + movl %ebx,4(%esi) + movl %ecx,8(%esi) + movl %edx,12(%esi) + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.align 4 +__x86_Camellia_decrypt: + xorl (%edi),%eax + xorl 4(%edi),%ebx + xorl 8(%edi),%ecx + xorl 12(%edi),%edx + movl -8(%edi),%esi + movl %eax,4(%esp) + movl %ebx,8(%esp) + movl %ecx,12(%esp) + movl %edx,16(%esp) +.align 4,0x90 +L006loop: + xorl %esi,%eax + xorl -4(%edi),%ebx + movzbl %ah,%esi + movl 2052(%ebp,%esi,8),%edx + movzbl %al,%esi + xorl 4(%ebp,%esi,8),%edx + shrl $16,%eax + movzbl %bl,%esi + movl (%ebp,%esi,8),%ecx + movzbl %ah,%esi + xorl (%ebp,%esi,8),%edx + movzbl %bh,%esi + xorl 4(%ebp,%esi,8),%ecx + shrl $16,%ebx + movzbl %al,%eax + xorl 2048(%ebp,%eax,8),%edx + movzbl %bh,%esi + movl 16(%esp),%eax + xorl %edx,%ecx + rorl $8,%edx + xorl 2048(%ebp,%esi,8),%ecx + movzbl %bl,%esi + movl 12(%esp),%ebx + xorl %eax,%edx + xorl 2052(%ebp,%esi,8),%ecx + movl -16(%edi),%esi + xorl %ecx,%edx + movl %edx,16(%esp) + xorl %ebx,%ecx + movl %ecx,12(%esp) + xorl %esi,%ecx + xorl -12(%edi),%edx + movzbl %ch,%esi + movl 2052(%ebp,%esi,8),%ebx + movzbl %cl,%esi + xorl 4(%ebp,%esi,8),%ebx + shrl $16,%ecx + movzbl %dl,%esi + movl (%ebp,%esi,8),%eax + movzbl %ch,%esi + xorl (%ebp,%esi,8),%ebx + movzbl %dh,%esi + xorl 4(%ebp,%esi,8),%eax + shrl $16,%edx + movzbl %cl,%ecx + xorl 2048(%ebp,%ecx,8),%ebx + movzbl %dh,%esi + movl 8(%esp),%ecx + xorl %ebx,%eax + rorl $8,%ebx + xorl 2048(%ebp,%esi,8),%eax + movzbl %dl,%esi + movl 4(%esp),%edx + xorl %ecx,%ebx + xorl 2052(%ebp,%esi,8),%eax + movl -24(%edi),%esi + xorl %eax,%ebx + movl %ebx,8(%esp) + xorl %edx,%eax + movl %eax,4(%esp) + xorl %esi,%eax + xorl -20(%edi),%ebx + movzbl %ah,%esi + movl 2052(%ebp,%esi,8),%edx + movzbl %al,%esi + xorl 4(%ebp,%esi,8),%edx + shrl $16,%eax + movzbl %bl,%esi + movl (%ebp,%esi,8),%ecx + movzbl %ah,%esi + xorl (%ebp,%esi,8),%edx + movzbl %bh,%esi + xorl 4(%ebp,%esi,8),%ecx + shrl $16,%ebx + movzbl %al,%eax + xorl 2048(%ebp,%eax,8),%edx + movzbl %bh,%esi + movl 16(%esp),%eax + xorl %edx,%ecx + rorl $8,%edx + xorl 2048(%ebp,%esi,8),%ecx + movzbl %bl,%esi + movl 12(%esp),%ebx + xorl %eax,%edx + xorl 2052(%ebp,%esi,8),%ecx + movl -32(%edi),%esi + xorl %ecx,%edx + movl %edx,16(%esp) + xorl %ebx,%ecx + movl %ecx,12(%esp) + xorl %esi,%ecx + xorl -28(%edi),%edx + movzbl %ch,%esi + movl 2052(%ebp,%esi,8),%ebx + movzbl %cl,%esi + xorl 4(%ebp,%esi,8),%ebx + shrl $16,%ecx + movzbl %dl,%esi + movl (%ebp,%esi,8),%eax + movzbl %ch,%esi + xorl (%ebp,%esi,8),%ebx + movzbl %dh,%esi + xorl 4(%ebp,%esi,8),%eax + shrl $16,%edx + movzbl %cl,%ecx + xorl 2048(%ebp,%ecx,8),%ebx + movzbl %dh,%esi + movl 8(%esp),%ecx + xorl %ebx,%eax + rorl $8,%ebx + xorl 2048(%ebp,%esi,8),%eax + movzbl %dl,%esi + movl 4(%esp),%edx + xorl %ecx,%ebx + xorl 2052(%ebp,%esi,8),%eax + movl -40(%edi),%esi + xorl %eax,%ebx + movl %ebx,8(%esp) + xorl %edx,%eax + movl %eax,4(%esp) + xorl %esi,%eax + xorl -36(%edi),%ebx + movzbl %ah,%esi + movl 2052(%ebp,%esi,8),%edx + movzbl %al,%esi + xorl 4(%ebp,%esi,8),%edx + shrl $16,%eax + movzbl %bl,%esi + movl (%ebp,%esi,8),%ecx + movzbl %ah,%esi + xorl (%ebp,%esi,8),%edx + movzbl %bh,%esi + xorl 4(%ebp,%esi,8),%ecx + shrl $16,%ebx + movzbl %al,%eax + xorl 2048(%ebp,%eax,8),%edx + movzbl %bh,%esi + movl 16(%esp),%eax + xorl %edx,%ecx + rorl $8,%edx + xorl 2048(%ebp,%esi,8),%ecx + movzbl %bl,%esi + movl 12(%esp),%ebx + xorl %eax,%edx + xorl 2052(%ebp,%esi,8),%ecx + movl -48(%edi),%esi + xorl %ecx,%edx + movl %edx,16(%esp) + xorl %ebx,%ecx + movl %ecx,12(%esp) + xorl %esi,%ecx + xorl -44(%edi),%edx + movzbl %ch,%esi + movl 2052(%ebp,%esi,8),%ebx + movzbl %cl,%esi + xorl 4(%ebp,%esi,8),%ebx + shrl $16,%ecx + movzbl %dl,%esi + movl (%ebp,%esi,8),%eax + movzbl %ch,%esi + xorl (%ebp,%esi,8),%ebx + movzbl %dh,%esi + xorl 4(%ebp,%esi,8),%eax + shrl $16,%edx + movzbl %cl,%ecx + xorl 2048(%ebp,%ecx,8),%ebx + movzbl %dh,%esi + movl 8(%esp),%ecx + xorl %ebx,%eax + rorl $8,%ebx + xorl 2048(%ebp,%esi,8),%eax + movzbl %dl,%esi + movl 4(%esp),%edx + xorl %ecx,%ebx + xorl 2052(%ebp,%esi,8),%eax + movl -56(%edi),%esi + xorl %eax,%ebx + movl %ebx,8(%esp) + xorl %edx,%eax + movl %eax,4(%esp) + subl $64,%edi + cmpl 20(%esp),%edi + je L007done + andl %eax,%esi + movl 16(%esp),%edx + roll $1,%esi + movl %edx,%ecx + xorl %esi,%ebx + orl 4(%edi),%ecx + movl %ebx,8(%esp) + xorl 12(%esp),%ecx + movl 12(%edi),%esi + movl %ecx,12(%esp) + orl %ebx,%esi + andl (%edi),%ecx + xorl %esi,%eax + roll $1,%ecx + movl %eax,4(%esp) + xorl %ecx,%edx + movl -8(%edi),%esi + movl %edx,16(%esp) + jmp L006loop +.align 3,0x90 +L007done: + movl %eax,%ecx + movl %ebx,%edx + movl 12(%esp),%eax + movl 16(%esp),%ebx + xorl %esi,%ecx + xorl 12(%edi),%edx + xorl (%edi),%eax + xorl 4(%edi),%ebx + ret +.globl _Camellia_Ekeygen +.align 4 +_Camellia_Ekeygen: +L_Camellia_Ekeygen_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + subl $16,%esp + movl 36(%esp),%ebp + movl 40(%esp),%esi + movl 44(%esp),%edi + movl (%esi),%eax + movl 4(%esi),%ebx + movl 8(%esi),%ecx + movl 12(%esi),%edx + bswap %eax + bswap %ebx + bswap %ecx + bswap %edx + movl %eax,(%edi) + movl %ebx,4(%edi) + movl %ecx,8(%edi) + movl %edx,12(%edi) + cmpl $128,%ebp + je L0081st128 + movl 16(%esi),%eax + movl 20(%esi),%ebx + cmpl $192,%ebp + je L0091st192 + movl 24(%esi),%ecx + movl 28(%esi),%edx + jmp L0101st256 +.align 2,0x90 +L0091st192: + movl %eax,%ecx + movl %ebx,%edx + notl %ecx + notl %edx +.align 2,0x90 +L0101st256: + bswap %eax + bswap %ebx + bswap %ecx + bswap %edx + movl %eax,32(%edi) + movl %ebx,36(%edi) + movl %ecx,40(%edi) + movl %edx,44(%edi) + xorl (%edi),%eax + xorl 4(%edi),%ebx + xorl 8(%edi),%ecx + xorl 12(%edi),%edx +.align 2,0x90 +L0081st128: + call L011pic_point +L011pic_point: + popl %ebp + leal LCamellia_SBOX-L011pic_point(%ebp),%ebp + leal LCamellia_SIGMA-LCamellia_SBOX(%ebp),%edi + movl (%edi),%esi + movl %eax,(%esp) + movl %ebx,4(%esp) + movl %ecx,8(%esp) + movl %edx,12(%esp) + xorl %esi,%eax + xorl 4(%edi),%ebx + movzbl %ah,%esi + movl 2052(%ebp,%esi,8),%edx + movzbl %al,%esi + xorl 4(%ebp,%esi,8),%edx + shrl $16,%eax + movzbl %bl,%esi + movl (%ebp,%esi,8),%ecx + movzbl %ah,%esi + xorl (%ebp,%esi,8),%edx + movzbl %bh,%esi + xorl 4(%ebp,%esi,8),%ecx + shrl $16,%ebx + movzbl %al,%eax + xorl 2048(%ebp,%eax,8),%edx + movzbl %bh,%esi + movl 12(%esp),%eax + xorl %edx,%ecx + rorl $8,%edx + xorl 2048(%ebp,%esi,8),%ecx + movzbl %bl,%esi + movl 8(%esp),%ebx + xorl %eax,%edx + xorl 2052(%ebp,%esi,8),%ecx + movl 8(%edi),%esi + xorl %ecx,%edx + movl %edx,12(%esp) + xorl %ebx,%ecx + movl %ecx,8(%esp) + xorl %esi,%ecx + xorl 12(%edi),%edx + movzbl %ch,%esi + movl 2052(%ebp,%esi,8),%ebx + movzbl %cl,%esi + xorl 4(%ebp,%esi,8),%ebx + shrl $16,%ecx + movzbl %dl,%esi + movl (%ebp,%esi,8),%eax + movzbl %ch,%esi + xorl (%ebp,%esi,8),%ebx + movzbl %dh,%esi + xorl 4(%ebp,%esi,8),%eax + shrl $16,%edx + movzbl %cl,%ecx + xorl 2048(%ebp,%ecx,8),%ebx + movzbl %dh,%esi + movl 4(%esp),%ecx + xorl %ebx,%eax + rorl $8,%ebx + xorl 2048(%ebp,%esi,8),%eax + movzbl %dl,%esi + movl (%esp),%edx + xorl %ecx,%ebx + xorl 2052(%ebp,%esi,8),%eax + movl 16(%edi),%esi + xorl %eax,%ebx + movl %ebx,4(%esp) + xorl %edx,%eax + movl %eax,(%esp) + movl 8(%esp),%ecx + movl 12(%esp),%edx + movl 44(%esp),%esi + xorl (%esi),%eax + xorl 4(%esi),%ebx + xorl 8(%esi),%ecx + xorl 12(%esi),%edx + movl 16(%edi),%esi + movl %eax,(%esp) + movl %ebx,4(%esp) + movl %ecx,8(%esp) + movl %edx,12(%esp) + xorl %esi,%eax + xorl 20(%edi),%ebx + movzbl %ah,%esi + movl 2052(%ebp,%esi,8),%edx + movzbl %al,%esi + xorl 4(%ebp,%esi,8),%edx + shrl $16,%eax + movzbl %bl,%esi + movl (%ebp,%esi,8),%ecx + movzbl %ah,%esi + xorl (%ebp,%esi,8),%edx + movzbl %bh,%esi + xorl 4(%ebp,%esi,8),%ecx + shrl $16,%ebx + movzbl %al,%eax + xorl 2048(%ebp,%eax,8),%edx + movzbl %bh,%esi + movl 12(%esp),%eax + xorl %edx,%ecx + rorl $8,%edx + xorl 2048(%ebp,%esi,8),%ecx + movzbl %bl,%esi + movl 8(%esp),%ebx + xorl %eax,%edx + xorl 2052(%ebp,%esi,8),%ecx + movl 24(%edi),%esi + xorl %ecx,%edx + movl %edx,12(%esp) + xorl %ebx,%ecx + movl %ecx,8(%esp) + xorl %esi,%ecx + xorl 28(%edi),%edx + movzbl %ch,%esi + movl 2052(%ebp,%esi,8),%ebx + movzbl %cl,%esi + xorl 4(%ebp,%esi,8),%ebx + shrl $16,%ecx + movzbl %dl,%esi + movl (%ebp,%esi,8),%eax + movzbl %ch,%esi + xorl (%ebp,%esi,8),%ebx + movzbl %dh,%esi + xorl 4(%ebp,%esi,8),%eax + shrl $16,%edx + movzbl %cl,%ecx + xorl 2048(%ebp,%ecx,8),%ebx + movzbl %dh,%esi + movl 4(%esp),%ecx + xorl %ebx,%eax + rorl $8,%ebx + xorl 2048(%ebp,%esi,8),%eax + movzbl %dl,%esi + movl (%esp),%edx + xorl %ecx,%ebx + xorl 2052(%ebp,%esi,8),%eax + movl 32(%edi),%esi + xorl %eax,%ebx + movl %ebx,4(%esp) + xorl %edx,%eax + movl %eax,(%esp) + movl 8(%esp),%ecx + movl 12(%esp),%edx + movl 36(%esp),%esi + cmpl $128,%esi + jne L0122nd256 + movl 44(%esp),%edi + leal 128(%edi),%edi + movl %eax,-112(%edi) + movl %ebx,-108(%edi) + movl %ecx,-104(%edi) + movl %edx,-100(%edi) + movl %eax,%ebp + shll $15,%eax + movl %ebx,%esi + shrl $17,%esi + shll $15,%ebx + orl %esi,%eax + movl %ecx,%esi + shll $15,%ecx + movl %eax,-80(%edi) + shrl $17,%esi + orl %esi,%ebx + shrl $17,%ebp + movl %edx,%esi + shrl $17,%esi + movl %ebx,-76(%edi) + shll $15,%edx + orl %esi,%ecx + orl %ebp,%edx + movl %ecx,-72(%edi) + movl %edx,-68(%edi) + movl %eax,%ebp + shll $15,%eax + movl %ebx,%esi + shrl $17,%esi + shll $15,%ebx + orl %esi,%eax + movl %ecx,%esi + shll $15,%ecx + movl %eax,-64(%edi) + shrl $17,%esi + orl %esi,%ebx + shrl $17,%ebp + movl %edx,%esi + shrl $17,%esi + movl %ebx,-60(%edi) + shll $15,%edx + orl %esi,%ecx + orl %ebp,%edx + movl %ecx,-56(%edi) + movl %edx,-52(%edi) + movl %eax,%ebp + shll $15,%eax + movl %ebx,%esi + shrl $17,%esi + shll $15,%ebx + orl %esi,%eax + movl %ecx,%esi + shll $15,%ecx + movl %eax,-32(%edi) + shrl $17,%esi + orl %esi,%ebx + shrl $17,%ebp + movl %edx,%esi + shrl $17,%esi + movl %ebx,-28(%edi) + shll $15,%edx + orl %esi,%ecx + orl %ebp,%edx + movl %eax,%ebp + shll $15,%eax + movl %ebx,%esi + shrl $17,%esi + shll $15,%ebx + orl %esi,%eax + movl %ecx,%esi + shll $15,%ecx + movl %eax,-16(%edi) + shrl $17,%esi + orl %esi,%ebx + shrl $17,%ebp + movl %edx,%esi + shrl $17,%esi + movl %ebx,-12(%edi) + shll $15,%edx + orl %esi,%ecx + orl %ebp,%edx + movl %ecx,-8(%edi) + movl %edx,-4(%edi) + movl %ebx,%ebp + shll $2,%ebx + movl %ecx,%esi + shrl $30,%esi + shll $2,%ecx + orl %esi,%ebx + movl %edx,%esi + shll $2,%edx + movl %ebx,32(%edi) + shrl $30,%esi + orl %esi,%ecx + shrl $30,%ebp + movl %eax,%esi + shrl $30,%esi + movl %ecx,36(%edi) + shll $2,%eax + orl %esi,%edx + orl %ebp,%eax + movl %edx,40(%edi) + movl %eax,44(%edi) + movl %ebx,%ebp + shll $17,%ebx + movl %ecx,%esi + shrl $15,%esi + shll $17,%ecx + orl %esi,%ebx + movl %edx,%esi + shll $17,%edx + movl %ebx,64(%edi) + shrl $15,%esi + orl %esi,%ecx + shrl $15,%ebp + movl %eax,%esi + shrl $15,%esi + movl %ecx,68(%edi) + shll $17,%eax + orl %esi,%edx + orl %ebp,%eax + movl %edx,72(%edi) + movl %eax,76(%edi) + movl -128(%edi),%ebx + movl -124(%edi),%ecx + movl -120(%edi),%edx + movl -116(%edi),%eax + movl %ebx,%ebp + shll $15,%ebx + movl %ecx,%esi + shrl $17,%esi + shll $15,%ecx + orl %esi,%ebx + movl %edx,%esi + shll $15,%edx + movl %ebx,-96(%edi) + shrl $17,%esi + orl %esi,%ecx + shrl $17,%ebp + movl %eax,%esi + shrl $17,%esi + movl %ecx,-92(%edi) + shll $15,%eax + orl %esi,%edx + orl %ebp,%eax + movl %edx,-88(%edi) + movl %eax,-84(%edi) + movl %ebx,%ebp + shll $30,%ebx + movl %ecx,%esi + shrl $2,%esi + shll $30,%ecx + orl %esi,%ebx + movl %edx,%esi + shll $30,%edx + movl %ebx,-48(%edi) + shrl $2,%esi + orl %esi,%ecx + shrl $2,%ebp + movl %eax,%esi + shrl $2,%esi + movl %ecx,-44(%edi) + shll $30,%eax + orl %esi,%edx + orl %ebp,%eax + movl %edx,-40(%edi) + movl %eax,-36(%edi) + movl %ebx,%ebp + shll $15,%ebx + movl %ecx,%esi + shrl $17,%esi + shll $15,%ecx + orl %esi,%ebx + movl %edx,%esi + shll $15,%edx + shrl $17,%esi + orl %esi,%ecx + shrl $17,%ebp + movl %eax,%esi + shrl $17,%esi + shll $15,%eax + orl %esi,%edx + orl %ebp,%eax + movl %edx,-24(%edi) + movl %eax,-20(%edi) + movl %ebx,%ebp + shll $17,%ebx + movl %ecx,%esi + shrl $15,%esi + shll $17,%ecx + orl %esi,%ebx + movl %edx,%esi + shll $17,%edx + movl %ebx,(%edi) + shrl $15,%esi + orl %esi,%ecx + shrl $15,%ebp + movl %eax,%esi + shrl $15,%esi + movl %ecx,4(%edi) + shll $17,%eax + orl %esi,%edx + orl %ebp,%eax + movl %edx,8(%edi) + movl %eax,12(%edi) + movl %ebx,%ebp + shll $17,%ebx + movl %ecx,%esi + shrl $15,%esi + shll $17,%ecx + orl %esi,%ebx + movl %edx,%esi + shll $17,%edx + movl %ebx,16(%edi) + shrl $15,%esi + orl %esi,%ecx + shrl $15,%ebp + movl %eax,%esi + shrl $15,%esi + movl %ecx,20(%edi) + shll $17,%eax + orl %esi,%edx + orl %ebp,%eax + movl %edx,24(%edi) + movl %eax,28(%edi) + movl %ebx,%ebp + shll $17,%ebx + movl %ecx,%esi + shrl $15,%esi + shll $17,%ecx + orl %esi,%ebx + movl %edx,%esi + shll $17,%edx + movl %ebx,48(%edi) + shrl $15,%esi + orl %esi,%ecx + shrl $15,%ebp + movl %eax,%esi + shrl $15,%esi + movl %ecx,52(%edi) + shll $17,%eax + orl %esi,%edx + orl %ebp,%eax + movl %edx,56(%edi) + movl %eax,60(%edi) + movl $3,%eax + jmp L013done +.align 4,0x90 +L0122nd256: + movl 44(%esp),%esi + movl %eax,48(%esi) + movl %ebx,52(%esi) + movl %ecx,56(%esi) + movl %edx,60(%esi) + xorl 32(%esi),%eax + xorl 36(%esi),%ebx + xorl 40(%esi),%ecx + xorl 44(%esi),%edx + movl 32(%edi),%esi + movl %eax,(%esp) + movl %ebx,4(%esp) + movl %ecx,8(%esp) + movl %edx,12(%esp) + xorl %esi,%eax + xorl 36(%edi),%ebx + movzbl %ah,%esi + movl 2052(%ebp,%esi,8),%edx + movzbl %al,%esi + xorl 4(%ebp,%esi,8),%edx + shrl $16,%eax + movzbl %bl,%esi + movl (%ebp,%esi,8),%ecx + movzbl %ah,%esi + xorl (%ebp,%esi,8),%edx + movzbl %bh,%esi + xorl 4(%ebp,%esi,8),%ecx + shrl $16,%ebx + movzbl %al,%eax + xorl 2048(%ebp,%eax,8),%edx + movzbl %bh,%esi + movl 12(%esp),%eax + xorl %edx,%ecx + rorl $8,%edx + xorl 2048(%ebp,%esi,8),%ecx + movzbl %bl,%esi + movl 8(%esp),%ebx + xorl %eax,%edx + xorl 2052(%ebp,%esi,8),%ecx + movl 40(%edi),%esi + xorl %ecx,%edx + movl %edx,12(%esp) + xorl %ebx,%ecx + movl %ecx,8(%esp) + xorl %esi,%ecx + xorl 44(%edi),%edx + movzbl %ch,%esi + movl 2052(%ebp,%esi,8),%ebx + movzbl %cl,%esi + xorl 4(%ebp,%esi,8),%ebx + shrl $16,%ecx + movzbl %dl,%esi + movl (%ebp,%esi,8),%eax + movzbl %ch,%esi + xorl (%ebp,%esi,8),%ebx + movzbl %dh,%esi + xorl 4(%ebp,%esi,8),%eax + shrl $16,%edx + movzbl %cl,%ecx + xorl 2048(%ebp,%ecx,8),%ebx + movzbl %dh,%esi + movl 4(%esp),%ecx + xorl %ebx,%eax + rorl $8,%ebx + xorl 2048(%ebp,%esi,8),%eax + movzbl %dl,%esi + movl (%esp),%edx + xorl %ecx,%ebx + xorl 2052(%ebp,%esi,8),%eax + movl 48(%edi),%esi + xorl %eax,%ebx + movl %ebx,4(%esp) + xorl %edx,%eax + movl %eax,(%esp) + movl 8(%esp),%ecx + movl 12(%esp),%edx + movl 44(%esp),%edi + leal 128(%edi),%edi + movl %eax,-112(%edi) + movl %ebx,-108(%edi) + movl %ecx,-104(%edi) + movl %edx,-100(%edi) + movl %eax,%ebp + shll $30,%eax + movl %ebx,%esi + shrl $2,%esi + shll $30,%ebx + orl %esi,%eax + movl %ecx,%esi + shll $30,%ecx + movl %eax,-48(%edi) + shrl $2,%esi + orl %esi,%ebx + shrl $2,%ebp + movl %edx,%esi + shrl $2,%esi + movl %ebx,-44(%edi) + shll $30,%edx + orl %esi,%ecx + orl %ebp,%edx + movl %ecx,-40(%edi) + movl %edx,-36(%edi) + movl %eax,%ebp + shll $30,%eax + movl %ebx,%esi + shrl $2,%esi + shll $30,%ebx + orl %esi,%eax + movl %ecx,%esi + shll $30,%ecx + movl %eax,32(%edi) + shrl $2,%esi + orl %esi,%ebx + shrl $2,%ebp + movl %edx,%esi + shrl $2,%esi + movl %ebx,36(%edi) + shll $30,%edx + orl %esi,%ecx + orl %ebp,%edx + movl %ecx,40(%edi) + movl %edx,44(%edi) + movl %ebx,%ebp + shll $19,%ebx + movl %ecx,%esi + shrl $13,%esi + shll $19,%ecx + orl %esi,%ebx + movl %edx,%esi + shll $19,%edx + movl %ebx,128(%edi) + shrl $13,%esi + orl %esi,%ecx + shrl $13,%ebp + movl %eax,%esi + shrl $13,%esi + movl %ecx,132(%edi) + shll $19,%eax + orl %esi,%edx + orl %ebp,%eax + movl %edx,136(%edi) + movl %eax,140(%edi) + movl -96(%edi),%ebx + movl -92(%edi),%ecx + movl -88(%edi),%edx + movl -84(%edi),%eax + movl %ebx,%ebp + shll $15,%ebx + movl %ecx,%esi + shrl $17,%esi + shll $15,%ecx + orl %esi,%ebx + movl %edx,%esi + shll $15,%edx + movl %ebx,-96(%edi) + shrl $17,%esi + orl %esi,%ecx + shrl $17,%ebp + movl %eax,%esi + shrl $17,%esi + movl %ecx,-92(%edi) + shll $15,%eax + orl %esi,%edx + orl %ebp,%eax + movl %edx,-88(%edi) + movl %eax,-84(%edi) + movl %ebx,%ebp + shll $15,%ebx + movl %ecx,%esi + shrl $17,%esi + shll $15,%ecx + orl %esi,%ebx + movl %edx,%esi + shll $15,%edx + movl %ebx,-64(%edi) + shrl $17,%esi + orl %esi,%ecx + shrl $17,%ebp + movl %eax,%esi + shrl $17,%esi + movl %ecx,-60(%edi) + shll $15,%eax + orl %esi,%edx + orl %ebp,%eax + movl %edx,-56(%edi) + movl %eax,-52(%edi) + movl %ebx,%ebp + shll $30,%ebx + movl %ecx,%esi + shrl $2,%esi + shll $30,%ecx + orl %esi,%ebx + movl %edx,%esi + shll $30,%edx + movl %ebx,16(%edi) + shrl $2,%esi + orl %esi,%ecx + shrl $2,%ebp + movl %eax,%esi + shrl $2,%esi + movl %ecx,20(%edi) + shll $30,%eax + orl %esi,%edx + orl %ebp,%eax + movl %edx,24(%edi) + movl %eax,28(%edi) + movl %ecx,%ebp + shll $2,%ecx + movl %edx,%esi + shrl $30,%esi + shll $2,%edx + orl %esi,%ecx + movl %eax,%esi + shll $2,%eax + movl %ecx,80(%edi) + shrl $30,%esi + orl %esi,%edx + shrl $30,%ebp + movl %ebx,%esi + shrl $30,%esi + movl %edx,84(%edi) + shll $2,%ebx + orl %esi,%eax + orl %ebp,%ebx + movl %eax,88(%edi) + movl %ebx,92(%edi) + movl -80(%edi),%ecx + movl -76(%edi),%edx + movl -72(%edi),%eax + movl -68(%edi),%ebx + movl %ecx,%ebp + shll $15,%ecx + movl %edx,%esi + shrl $17,%esi + shll $15,%edx + orl %esi,%ecx + movl %eax,%esi + shll $15,%eax + movl %ecx,-80(%edi) + shrl $17,%esi + orl %esi,%edx + shrl $17,%ebp + movl %ebx,%esi + shrl $17,%esi + movl %edx,-76(%edi) + shll $15,%ebx + orl %esi,%eax + orl %ebp,%ebx + movl %eax,-72(%edi) + movl %ebx,-68(%edi) + movl %ecx,%ebp + shll $30,%ecx + movl %edx,%esi + shrl $2,%esi + shll $30,%edx + orl %esi,%ecx + movl %eax,%esi + shll $30,%eax + movl %ecx,-16(%edi) + shrl $2,%esi + orl %esi,%edx + shrl $2,%ebp + movl %ebx,%esi + shrl $2,%esi + movl %edx,-12(%edi) + shll $30,%ebx + orl %esi,%eax + orl %ebp,%ebx + movl %eax,-8(%edi) + movl %ebx,-4(%edi) + movl %edx,64(%edi) + movl %eax,68(%edi) + movl %ebx,72(%edi) + movl %ecx,76(%edi) + movl %edx,%ebp + shll $17,%edx + movl %eax,%esi + shrl $15,%esi + shll $17,%eax + orl %esi,%edx + movl %ebx,%esi + shll $17,%ebx + movl %edx,96(%edi) + shrl $15,%esi + orl %esi,%eax + shrl $15,%ebp + movl %ecx,%esi + shrl $15,%esi + movl %eax,100(%edi) + shll $17,%ecx + orl %esi,%ebx + orl %ebp,%ecx + movl %ebx,104(%edi) + movl %ecx,108(%edi) + movl -128(%edi),%edx + movl -124(%edi),%eax + movl -120(%edi),%ebx + movl -116(%edi),%ecx + movl %eax,%ebp + shll $13,%eax + movl %ebx,%esi + shrl $19,%esi + shll $13,%ebx + orl %esi,%eax + movl %ecx,%esi + shll $13,%ecx + movl %eax,-32(%edi) + shrl $19,%esi + orl %esi,%ebx + shrl $19,%ebp + movl %edx,%esi + shrl $19,%esi + movl %ebx,-28(%edi) + shll $13,%edx + orl %esi,%ecx + orl %ebp,%edx + movl %ecx,-24(%edi) + movl %edx,-20(%edi) + movl %eax,%ebp + shll $15,%eax + movl %ebx,%esi + shrl $17,%esi + shll $15,%ebx + orl %esi,%eax + movl %ecx,%esi + shll $15,%ecx + movl %eax,(%edi) + shrl $17,%esi + orl %esi,%ebx + shrl $17,%ebp + movl %edx,%esi + shrl $17,%esi + movl %ebx,4(%edi) + shll $15,%edx + orl %esi,%ecx + orl %ebp,%edx + movl %ecx,8(%edi) + movl %edx,12(%edi) + movl %eax,%ebp + shll $17,%eax + movl %ebx,%esi + shrl $15,%esi + shll $17,%ebx + orl %esi,%eax + movl %ecx,%esi + shll $17,%ecx + movl %eax,48(%edi) + shrl $15,%esi + orl %esi,%ebx + shrl $15,%ebp + movl %edx,%esi + shrl $15,%esi + movl %ebx,52(%edi) + shll $17,%edx + orl %esi,%ecx + orl %ebp,%edx + movl %ecx,56(%edi) + movl %edx,60(%edi) + movl %ebx,%ebp + shll $2,%ebx + movl %ecx,%esi + shrl $30,%esi + shll $2,%ecx + orl %esi,%ebx + movl %edx,%esi + shll $2,%edx + movl %ebx,112(%edi) + shrl $30,%esi + orl %esi,%ecx + shrl $30,%ebp + movl %eax,%esi + shrl $30,%esi + movl %ecx,116(%edi) + shll $2,%eax + orl %esi,%edx + orl %ebp,%eax + movl %edx,120(%edi) + movl %eax,124(%edi) + movl $4,%eax +L013done: + leal 144(%edi),%edx + addl $16,%esp + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.globl _private_Camellia_set_key +.align 4 +_private_Camellia_set_key: +L_private_Camellia_set_key_begin: + pushl %ebx + movl 8(%esp),%ecx + movl 12(%esp),%ebx + movl 16(%esp),%edx + movl $-1,%eax + testl %ecx,%ecx + jz L014done + testl %edx,%edx + jz L014done + movl $-2,%eax + cmpl $256,%ebx + je L015arg_ok + cmpl $192,%ebx + je L015arg_ok + cmpl $128,%ebx + jne L014done +.align 2,0x90 +L015arg_ok: + pushl %edx + pushl %ecx + pushl %ebx + call L_Camellia_Ekeygen_begin + addl $12,%esp + movl %eax,(%edx) + xorl %eax,%eax +.align 2,0x90 +L014done: + popl %ebx + ret +.align 6,0x90 +LCamellia_SIGMA: +.long 2694735487,1003262091,3061508184,1286239154,3337565999,3914302142,1426019237,4057165596,283453434,3731369245,2958461122,3018244605,0,0,0,0 +.align 6,0x90 +LCamellia_SBOX: +.long 1886416896,1886388336 +.long 2189591040,741081132 +.long 741092352,3014852787 +.long 3974949888,3233808576 +.long 3014898432,3840147684 +.long 656877312,1465319511 +.long 3233857536,3941204202 +.long 3857048832,2930639022 +.long 3840205824,589496355 +.long 2240120064,1802174571 +.long 1465341696,1162149957 +.long 892679424,2779054245 +.long 3941263872,3991732461 +.long 202116096,1330577487 +.long 2930683392,488439837 +.long 1094795520,2459041938 +.long 589505280,2256928902 +.long 4025478912,2947481775 +.long 1802201856,2088501372 +.long 2475922176,522125343 +.long 1162167552,1044250686 +.long 421075200,3705405660 +.long 2779096320,1583218782 +.long 555819264,185270283 +.long 3991792896,2795896998 +.long 235802112,960036921 +.long 1330597632,3587506389 +.long 1313754624,1566376029 +.long 488447232,3654877401 +.long 1701143808,1515847770 +.long 2459079168,1364262993 +.long 3183328512,1819017324 +.long 2256963072,2341142667 +.long 3099113472,2593783962 +.long 2947526400,4227531003 +.long 2408550144,2964324528 +.long 2088532992,1953759348 +.long 3958106880,724238379 +.long 522133248,4042260720 +.long 3469659648,2223243396 +.long 1044266496,3755933919 +.long 808464384,3419078859 +.long 3705461760,875823156 +.long 1600085760,1987444854 +.long 1583242752,1835860077 +.long 3318072576,2846425257 +.long 185273088,3520135377 +.long 437918208,67371012 +.long 2795939328,336855060 +.long 3789676800,976879674 +.long 960051456,3739091166 +.long 3402287616,286326801 +.long 3587560704,842137650 +.long 1195853568,2627469468 +.long 1566399744,1397948499 +.long 1027423488,4075946226 +.long 3654932736,4278059262 +.long 16843008,3486449871 +.long 1515870720,3284336835 +.long 3604403712,2054815866 +.long 1364283648,606339108 +.long 1448498688,3907518696 +.long 1819044864,1616904288 +.long 1296911616,1768489065 +.long 2341178112,2863268010 +.long 218959104,2694840480 +.long 2593823232,2711683233 +.long 1717986816,1650589794 +.long 4227595008,1414791252 +.long 3435973632,505282590 +.long 2964369408,3772776672 +.long 757935360,1684275300 +.long 1953788928,269484048 +.long 303174144,0 +.long 724249344,2745368739 +.long 538976256,1970602101 +.long 4042321920,2324299914 +.long 2981212416,3873833190 +.long 2223277056,151584777 +.long 2576980224,3722248413 +.long 3755990784,2273771655 +.long 1280068608,2206400643 +.long 3419130624,3452764365 +.long 3267543552,2425356432 +.long 875836416,1936916595 +.long 2122219008,4143317238 +.long 1987474944,2644312221 +.long 84215040,3216965823 +.long 1835887872,1381105746 +.long 3082270464,3638034648 +.long 2846468352,3368550600 +.long 825307392,3334865094 +.long 3520188672,2172715137 +.long 387389184,1869545583 +.long 67372032,320012307 +.long 3621246720,1667432547 +.long 336860160,3924361449 +.long 1482184704,2812739751 +.long 976894464,2677997727 +.long 1633771776,3166437564 +.long 3739147776,690552873 +.long 454761216,4193845497 +.long 286331136,791609391 +.long 471604224,3031695540 +.long 842150400,2021130360 +.long 252645120,101056518 +.long 2627509248,3890675943 +.long 370546176,1903231089 +.long 1397969664,3570663636 +.long 404232192,2880110763 +.long 4076007936,2290614408 +.long 572662272,2374828173 +.long 4278124032,1920073842 +.long 1145324544,3115909305 +.long 3486502656,4177002744 +.long 2998055424,2896953516 +.long 3284386560,909508662 +.long 3048584448,707395626 +.long 2054846976,1010565180 +.long 2442236160,4059103473 +.long 606348288,1077936192 +.long 134744064,3553820883 +.long 3907577856,3149594811 +.long 2829625344,1128464451 +.long 1616928768,353697813 +.long 4244438016,2913796269 +.long 1768515840,2004287607 +.long 1347440640,2155872384 +.long 2863311360,2189557890 +.long 3503345664,3974889708 +.long 2694881280,656867367 +.long 2105376000,3856990437 +.long 2711724288,2240086149 +.long 2307492096,892665909 +.long 1650614784,202113036 +.long 2543294208,1094778945 +.long 1414812672,4025417967 +.long 1532713728,2475884691 +.long 505290240,421068825 +.long 2509608192,555810849 +.long 3772833792,235798542 +.long 4294967040,1313734734 +.long 1684300800,1701118053 +.long 3537031680,3183280317 +.long 269488128,3099066552 +.long 3301229568,2408513679 +.long 0,3958046955 +.long 1212696576,3469607118 +.long 2745410304,808452144 +.long 4160222976,1600061535 +.long 1970631936,3318022341 +.long 3688618752,437911578 +.long 2324335104,3789619425 +.long 50529024,3402236106 +.long 3873891840,1195835463 +.long 3671775744,1027407933 +.long 151587072,16842753 +.long 1061109504,3604349142 +.long 3722304768,1448476758 +.long 2492765184,1296891981 +.long 2273806080,218955789 +.long 1549556736,1717960806 +.long 2206434048,3435921612 +.long 33686016,757923885 +.long 3452816640,303169554 +.long 1246382592,538968096 +.long 2425393152,2981167281 +.long 858993408,2576941209 +.long 1936945920,1280049228 +.long 1734829824,3267494082 +.long 4143379968,2122186878 +.long 4092850944,84213765 +.long 2644352256,3082223799 +.long 2139062016,825294897 +.long 3217014528,387383319 +.long 3806519808,3621191895 +.long 1381126656,1482162264 +.long 2610666240,1633747041 +.long 3638089728,454754331 +.long 640034304,471597084 +.long 3368601600,252641295 +.long 926365440,370540566 +.long 3334915584,404226072 +.long 993737472,572653602 +.long 2172748032,1145307204 +.long 2526451200,2998010034 +.long 1869573888,3048538293 +.long 1263225600,2442199185 +.long 320017152,134742024 +.long 3200171520,2829582504 +.long 1667457792,4244373756 +.long 774778368,1347420240 +.long 3924420864,3503292624 +.long 2038003968,2105344125 +.long 2812782336,2307457161 +.long 2358021120,2543255703 +.long 2678038272,1532690523 +.long 1852730880,2509570197 +.long 3166485504,4294902015 +.long 2391707136,3536978130 +.long 690563328,3301179588 +.long 4126536960,1212678216 +.long 4193908992,4160159991 +.long 3065427456,3688562907 +.long 791621376,50528259 +.long 4261281024,3671720154 +.long 3031741440,1061093439 +.long 1499027712,2492727444 +.long 2021160960,1549533276 +.long 2560137216,33685506 +.long 101058048,1246363722 +.long 1785358848,858980403 +.long 3890734848,1734803559 +.long 1179010560,4092788979 +.long 1903259904,2139029631 +.long 3132799488,3806462178 +.long 3570717696,2610626715 +.long 623191296,640024614 +.long 2880154368,926351415 +.long 1111638528,993722427 +.long 2290649088,2526412950 +.long 2728567296,1263206475 +.long 2374864128,3200123070 +.long 4210752000,774766638 +.long 1920102912,2037973113 +.long 117901056,2357985420 +.long 3115956480,1852702830 +.long 1431655680,2391670926 +.long 4177065984,4126474485 +.long 4008635904,3065381046 +.long 2896997376,4261216509 +.long 168430080,1499005017 +.long 909522432,2560098456 +.long 1229539584,1785331818 +.long 707406336,1178992710 +.long 1751672832,3132752058 +.long 1010580480,623181861 +.long 943208448,1111621698 +.long 4059164928,2728525986 +.long 2762253312,4210688250 +.long 1077952512,117899271 +.long 673720320,1431634005 +.long 3553874688,4008575214 +.long 2071689984,168427530 +.long 3149642496,1229520969 +.long 3385444608,1751646312 +.long 1128481536,943194168 +.long 3250700544,2762211492 +.long 353703168,673710120 +.long 3823362816,2071658619 +.long 2913840384,3385393353 +.long 4109693952,3250651329 +.long 2004317952,3823304931 +.long 3351758592,4109631732 +.long 2155905024,3351707847 +.long 2661195264,2661154974 +.long 14737632,939538488 +.long 328965,1090535745 +.long 5789784,369104406 +.long 14277081,1979741814 +.long 6776679,3640711641 +.long 5131854,2466288531 +.long 8487297,1610637408 +.long 13355979,4060148466 +.long 13224393,1912631922 +.long 723723,3254829762 +.long 11447982,2868947883 +.long 6974058,2583730842 +.long 14013909,1962964341 +.long 1579032,100664838 +.long 6118749,1459640151 +.long 8553090,2684395680 +.long 4605510,2432733585 +.long 14671839,4144035831 +.long 14079702,3036722613 +.long 2565927,3372272073 +.long 9079434,2717950626 +.long 3289650,2348846220 +.long 4934475,3523269330 +.long 4342338,2415956112 +.long 14408667,4127258358 +.long 1842204,117442311 +.long 10395294,2801837991 +.long 10263708,654321447 +.long 3815994,2382401166 +.long 13290186,2986390194 +.long 2434341,1224755529 +.long 8092539,3724599006 +.long 855309,1124090691 +.long 7434609,1543527516 +.long 6250335,3607156695 +.long 2039583,3338717127 +.long 16316664,1040203326 +.long 14145495,4110480885 +.long 4079166,2399178639 +.long 10329501,1728079719 +.long 8158332,520101663 +.long 6316128,402659352 +.long 12171705,1845522030 +.long 12500670,2936057775 +.long 12369084,788541231 +.long 9145227,3791708898 +.long 1447446,2231403909 +.long 3421236,218107149 +.long 5066061,1392530259 +.long 12829635,4026593520 +.long 7500402,2617285788 +.long 9803157,1694524773 +.long 11250603,3925928682 +.long 9342606,2734728099 +.long 12237498,2919280302 +.long 8026746,2650840734 +.long 11776947,3959483628 +.long 131586,2147516544 +.long 11842740,754986285 +.long 11382189,1795189611 +.long 10658466,2818615464 +.long 11316396,721431339 +.long 14211288,905983542 +.long 10132122,2785060518 +.long 1513239,3305162181 +.long 1710618,2248181382 +.long 3487029,1291865421 +.long 13421772,855651123 +.long 16250871,4244700669 +.long 10066329,1711302246 +.long 6381921,1476417624 +.long 5921370,2516620950 +.long 15263976,973093434 +.long 2368548,150997257 +.long 5658198,2499843477 +.long 4210752,268439568 +.long 14803425,2013296760 +.long 6513507,3623934168 +.long 592137,1107313218 +.long 3355443,3422604492 +.long 12566463,4009816047 +.long 10000536,637543974 +.long 9934743,3842041317 +.long 8750469,1627414881 +.long 6842472,436214298 +.long 16579836,1056980799 +.long 15527148,989870907 +.long 657930,2181071490 +.long 14342874,3053500086 +.long 7303023,3674266587 +.long 5460819,3556824276 +.long 6447714,2550175896 +.long 10724259,3892373736 +.long 3026478,2332068747 +.long 526344,33554946 +.long 11513775,3942706155 +.long 2631720,167774730 +.long 11579568,738208812 +.long 7631988,486546717 +.long 12763842,2952835248 +.long 12434877,1862299503 +.long 3552822,2365623693 +.long 2236962,2281736328 +.long 3684408,234884622 +.long 6579300,419436825 +.long 1973790,2264958855 +.long 3750201,1308642894 +.long 2894892,184552203 +.long 10921638,2835392937 +.long 3158064,201329676 +.long 15066597,2030074233 +.long 4473924,285217041 +.long 16645629,2130739071 +.long 8947848,570434082 +.long 10461087,3875596263 +.long 6645093,1493195097 +.long 8882055,3774931425 +.long 7039851,3657489114 +.long 16053492,1023425853 +.long 2302755,3355494600 +.long 4737096,301994514 +.long 1052688,67109892 +.long 13750737,1946186868 +.long 5329233,1409307732 +.long 12632256,805318704 +.long 16382457,2113961598 +.long 13816530,3019945140 +.long 10526880,671098920 +.long 5592405,1426085205 +.long 10592673,1744857192 +.long 4276545,1342197840 +.long 16448250,3187719870 +.long 4408131,3489714384 +.long 1250067,3288384708 +.long 12895428,822096177 +.long 3092271,3405827019 +.long 11053224,704653866 +.long 11974326,2902502829 +.long 3947580,251662095 +.long 2829099,3389049546 +.long 12698049,1879076976 +.long 16777215,4278255615 +.long 13158600,838873650 +.long 10855845,1761634665 +.long 2105376,134219784 +.long 9013641,1644192354 +.long 0,0 +.long 9474192,603989028 +.long 4671303,3506491857 +.long 15724527,4211145723 +.long 15395562,3120609978 +.long 12040119,3976261101 +.long 1381653,1157645637 +.long 394758,2164294017 +.long 13487565,1929409395 +.long 11908533,1828744557 +.long 1184274,2214626436 +.long 8289918,2667618207 +.long 12303291,3993038574 +.long 2697513,1241533002 +.long 986895,3271607235 +.long 12105912,771763758 +.long 460551,3238052289 +.long 263172,16777473 +.long 10197915,3858818790 +.long 9737364,620766501 +.long 2171169,1207978056 +.long 6710886,2566953369 +.long 15132390,3103832505 +.long 13553358,3003167667 +.long 15592941,2063629179 +.long 15198183,4177590777 +.long 3881787,3456159438 +.long 16711422,3204497343 +.long 8355711,3741376479 +.long 12961221,1895854449 +.long 10790052,687876393 +.long 3618615,3439381965 +.long 11645361,1811967084 +.long 5000268,318771987 +.long 9539985,1677747300 +.long 7237230,2600508315 +.long 9276813,1660969827 +.long 7763574,2634063261 +.long 197379,3221274816 +.long 2960685,1258310475 +.long 14606046,3070277559 +.long 9868950,2768283045 +.long 2500134,2298513801 +.long 8224125,1593859935 +.long 13027014,2969612721 +.long 6052956,385881879 +.long 13882323,4093703412 +.long 15921906,3154164924 +.long 5197647,3540046803 +.long 1644825,1174423110 +.long 4144959,3472936911 +.long 14474460,922761015 +.long 7960953,1577082462 +.long 1907997,1191200583 +.long 5395026,2483066004 +.long 15461355,4194368250 +.long 15987699,4227923196 +.long 7171437,1526750043 +.long 6184542,2533398423 +.long 16514043,4261478142 +.long 6908265,1509972570 +.long 11711154,2885725356 +.long 15790320,1006648380 +.long 3223857,1275087948 +.long 789516,50332419 +.long 13948116,889206069 +.long 13619151,4076925939 +.long 9211020,587211555 +.long 14869218,3087055032 +.long 7697781,1560304989 +.long 11119017,1778412138 +.long 4868682,2449511058 +.long 5723991,3573601749 +.long 8684676,553656609 +.long 1118481,1140868164 +.long 4539717,1358975313 +.long 1776411,3321939654 +.long 16119285,2097184125 +.long 15000804,956315961 +.long 921102,2197848963 +.long 7566195,3691044060 +.long 11184810,2852170410 +.long 15856113,2080406652 +.long 14540253,1996519287 +.long 5855577,1442862678 +.long 1315860,83887365 +.long 7105644,452991771 +.long 9605778,2751505572 +.long 5526612,352326933 +.long 13684944,872428596 +.long 7895160,503324190 +.long 7368816,469769244 +.long 14935011,4160813304 +.long 4802889,1375752786 +.long 8421504,536879136 +.long 5263440,335549460 +.long 10987431,3909151209 +.long 16185078,3170942397 +.long 7829367,3707821533 +.long 9671571,3825263844 +.long 8816262,2701173153 +.long 8618883,3758153952 +.long 2763306,2315291274 +.long 13092807,4043370993 +.long 5987163,3590379222 +.long 15329769,2046851706 +.long 15658734,3137387451 +.long 9408399,3808486371 +.long 65793,1073758272 +.long 4013373,1325420367 +.globl _Camellia_cbc_encrypt +.align 4 +_Camellia_cbc_encrypt: +L_Camellia_cbc_encrypt_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 28(%esp),%ecx + cmpl $0,%ecx + je L016enc_out + pushfl + cld + movl 24(%esp),%eax + movl 28(%esp),%ebx + movl 36(%esp),%edx + movl 40(%esp),%ebp + leal -64(%esp),%esi + andl $-64,%esi + leal -127(%edx),%edi + subl %esi,%edi + negl %edi + andl $960,%edi + subl %edi,%esi + movl 44(%esp),%edi + xchgl %esi,%esp + addl $4,%esp + movl %esi,20(%esp) + movl %eax,24(%esp) + movl %ebx,28(%esp) + movl %ecx,32(%esp) + movl %edx,36(%esp) + movl %ebp,40(%esp) + call L017pic_point +L017pic_point: + popl %ebp + leal LCamellia_SBOX-L017pic_point(%ebp),%ebp + movl $32,%esi +.align 2,0x90 +L018prefetch_sbox: + movl (%ebp),%eax + movl 32(%ebp),%ebx + movl 64(%ebp),%ecx + movl 96(%ebp),%edx + leal 128(%ebp),%ebp + decl %esi + jnz L018prefetch_sbox + movl 36(%esp),%eax + subl $4096,%ebp + movl 24(%esp),%esi + movl 272(%eax),%edx + cmpl $0,%edi + je L019DECRYPT + movl 32(%esp),%ecx + movl 40(%esp),%edi + shll $6,%edx + leal (%eax,%edx,1),%edx + movl %edx,16(%esp) + testl $4294967280,%ecx + jz L020enc_tail + movl (%edi),%eax + movl 4(%edi),%ebx +.align 2,0x90 +L021enc_loop: + movl 8(%edi),%ecx + movl 12(%edi),%edx + xorl (%esi),%eax + xorl 4(%esi),%ebx + xorl 8(%esi),%ecx + bswap %eax + xorl 12(%esi),%edx + bswap %ebx + movl 36(%esp),%edi + bswap %ecx + bswap %edx + call __x86_Camellia_encrypt + movl 24(%esp),%esi + movl 28(%esp),%edi + bswap %eax + bswap %ebx + bswap %ecx + movl %eax,(%edi) + bswap %edx + movl %ebx,4(%edi) + movl %ecx,8(%edi) + movl %edx,12(%edi) + movl 32(%esp),%ecx + leal 16(%esi),%esi + movl %esi,24(%esp) + leal 16(%edi),%edx + movl %edx,28(%esp) + subl $16,%ecx + testl $4294967280,%ecx + movl %ecx,32(%esp) + jnz L021enc_loop + testl $15,%ecx + jnz L020enc_tail + movl 40(%esp),%esi + movl 8(%edi),%ecx + movl 12(%edi),%edx + movl %eax,(%esi) + movl %ebx,4(%esi) + movl %ecx,8(%esi) + movl %edx,12(%esi) + movl 20(%esp),%esp + popfl +L016enc_out: + popl %edi + popl %esi + popl %ebx + popl %ebp + ret + pushfl +.align 2,0x90 +L020enc_tail: + movl %edi,%eax + movl 28(%esp),%edi + pushl %eax + movl $16,%ebx + subl %ecx,%ebx + cmpl %esi,%edi + je L022enc_in_place +.align 2,0x90 +.long 2767451785 + jmp L023enc_skip_in_place +L022enc_in_place: + leal (%edi,%ecx,1),%edi +L023enc_skip_in_place: + movl %ebx,%ecx + xorl %eax,%eax +.align 2,0x90 +.long 2868115081 + popl %edi + movl 28(%esp),%esi + movl (%edi),%eax + movl 4(%edi),%ebx + movl $16,32(%esp) + jmp L021enc_loop +.align 4,0x90 +L019DECRYPT: + shll $6,%edx + leal (%eax,%edx,1),%edx + movl %eax,16(%esp) + movl %edx,36(%esp) + cmpl 28(%esp),%esi + je L024dec_in_place + movl 40(%esp),%edi + movl %edi,44(%esp) +.align 2,0x90 +L025dec_loop: + movl (%esi),%eax + movl 4(%esi),%ebx + movl 8(%esi),%ecx + bswap %eax + movl 12(%esi),%edx + bswap %ebx + movl 36(%esp),%edi + bswap %ecx + bswap %edx + call __x86_Camellia_decrypt + movl 44(%esp),%edi + movl 32(%esp),%esi + bswap %eax + bswap %ebx + bswap %ecx + xorl (%edi),%eax + bswap %edx + xorl 4(%edi),%ebx + xorl 8(%edi),%ecx + xorl 12(%edi),%edx + subl $16,%esi + jc L026dec_partial + movl %esi,32(%esp) + movl 24(%esp),%esi + movl 28(%esp),%edi + movl %eax,(%edi) + movl %ebx,4(%edi) + movl %ecx,8(%edi) + movl %edx,12(%edi) + movl %esi,44(%esp) + leal 16(%esi),%esi + movl %esi,24(%esp) + leal 16(%edi),%edi + movl %edi,28(%esp) + jnz L025dec_loop + movl 44(%esp),%edi +L027dec_end: + movl 40(%esp),%esi + movl (%edi),%eax + movl 4(%edi),%ebx + movl 8(%edi),%ecx + movl 12(%edi),%edx + movl %eax,(%esi) + movl %ebx,4(%esi) + movl %ecx,8(%esi) + movl %edx,12(%esi) + jmp L028dec_out +.align 2,0x90 +L026dec_partial: + leal 44(%esp),%edi + movl %eax,(%edi) + movl %ebx,4(%edi) + movl %ecx,8(%edi) + movl %edx,12(%edi) + leal 16(%esi),%ecx + movl %edi,%esi + movl 28(%esp),%edi +.long 2767451785 + movl 24(%esp),%edi + jmp L027dec_end +.align 2,0x90 +L024dec_in_place: +L029dec_in_place_loop: + leal 44(%esp),%edi + movl (%esi),%eax + movl 4(%esi),%ebx + movl 8(%esi),%ecx + movl 12(%esi),%edx + movl %eax,(%edi) + movl %ebx,4(%edi) + movl %ecx,8(%edi) + bswap %eax + movl %edx,12(%edi) + bswap %ebx + movl 36(%esp),%edi + bswap %ecx + bswap %edx + call __x86_Camellia_decrypt + movl 40(%esp),%edi + movl 28(%esp),%esi + bswap %eax + bswap %ebx + bswap %ecx + xorl (%edi),%eax + bswap %edx + xorl 4(%edi),%ebx + xorl 8(%edi),%ecx + xorl 12(%edi),%edx + movl %eax,(%esi) + movl %ebx,4(%esi) + movl %ecx,8(%esi) + movl %edx,12(%esi) + leal 16(%esi),%esi + movl %esi,28(%esp) + leal 44(%esp),%esi + movl (%esi),%eax + movl 4(%esi),%ebx + movl 8(%esi),%ecx + movl 12(%esi),%edx + movl %eax,(%edi) + movl %ebx,4(%edi) + movl %ecx,8(%edi) + movl %edx,12(%edi) + movl 24(%esp),%esi + leal 16(%esi),%esi + movl %esi,24(%esp) + movl 32(%esp),%ecx + subl $16,%ecx + jc L030dec_in_place_partial + movl %ecx,32(%esp) + jnz L029dec_in_place_loop + jmp L028dec_out +.align 2,0x90 +L030dec_in_place_partial: + movl 28(%esp),%edi + leal 44(%esp),%esi + leal (%edi,%ecx,1),%edi + leal 16(%esi,%ecx,1),%esi + negl %ecx +.long 2767451785 +.align 2,0x90 +L028dec_out: + movl 20(%esp),%esp + popfl + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.byte 67,97,109,101,108,108,105,97,32,102,111,114,32,120,56,54 +.byte 32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115 +.byte 115,108,46,111,114,103,62,0 diff --git a/deps/openssl/asm_obsolete/x86-macosx-gas/cast/cast-586.s b/deps/openssl/asm_obsolete/x86-macosx-gas/cast/cast-586.s new file mode 100644 index 00000000000000..3e797b6145f527 --- /dev/null +++ b/deps/openssl/asm_obsolete/x86-macosx-gas/cast/cast-586.s @@ -0,0 +1,927 @@ +.file "cast-586.s" +.text +.globl _CAST_encrypt +.align 4 +_CAST_encrypt: +L_CAST_encrypt_begin: + + pushl %ebp + pushl %ebx + movl 12(%esp),%ebx + movl 16(%esp),%ebp + pushl %esi + pushl %edi + # Load the 2 words + movl (%ebx),%edi + movl 4(%ebx),%esi + # Get short key flag + movl 128(%ebp),%eax + pushl %eax + xorl %eax,%eax + # round 0 + movl (%ebp),%edx + movl 4(%ebp),%ecx + addl %esi,%edx + roll %cl,%edx + movl %edx,%ebx + xorl %ecx,%ecx + movb %dh,%cl + andl $255,%ebx + shrl $16,%edx + xorl %eax,%eax + movb %dh,%al + andl $255,%edx + movl _CAST_S_table0(,%ecx,4),%ecx + movl _CAST_S_table1(,%ebx,4),%ebx + xorl %ebx,%ecx + movl _CAST_S_table2(,%eax,4),%ebx + subl %ebx,%ecx + movl _CAST_S_table3(,%edx,4),%ebx + addl %ebx,%ecx + xorl %ecx,%edi + # round 1 + movl 8(%ebp),%edx + movl 12(%ebp),%ecx + xorl %edi,%edx + roll %cl,%edx + movl %edx,%ebx + xorl %ecx,%ecx + movb %dh,%cl + andl $255,%ebx + shrl $16,%edx + xorl %eax,%eax + movb %dh,%al + andl $255,%edx + movl _CAST_S_table0(,%ecx,4),%ecx + movl _CAST_S_table1(,%ebx,4),%ebx + subl %ebx,%ecx + movl _CAST_S_table2(,%eax,4),%ebx + addl %ebx,%ecx + movl _CAST_S_table3(,%edx,4),%ebx + xorl %ebx,%ecx + xorl %ecx,%esi + # round 2 + movl 16(%ebp),%edx + movl 20(%ebp),%ecx + subl %esi,%edx + roll %cl,%edx + movl %edx,%ebx + xorl %ecx,%ecx + movb %dh,%cl + andl $255,%ebx + shrl $16,%edx + xorl %eax,%eax + movb %dh,%al + andl $255,%edx + movl _CAST_S_table0(,%ecx,4),%ecx + movl _CAST_S_table1(,%ebx,4),%ebx + addl %ebx,%ecx + movl _CAST_S_table2(,%eax,4),%ebx + xorl %ebx,%ecx + movl _CAST_S_table3(,%edx,4),%ebx + subl %ebx,%ecx + xorl %ecx,%edi + # round 3 + movl 24(%ebp),%edx + movl 28(%ebp),%ecx + addl %edi,%edx + roll %cl,%edx + movl %edx,%ebx + xorl %ecx,%ecx + movb %dh,%cl + andl $255,%ebx + shrl $16,%edx + xorl %eax,%eax + movb %dh,%al + andl $255,%edx + movl _CAST_S_table0(,%ecx,4),%ecx + movl _CAST_S_table1(,%ebx,4),%ebx + xorl %ebx,%ecx + movl _CAST_S_table2(,%eax,4),%ebx + subl %ebx,%ecx + movl _CAST_S_table3(,%edx,4),%ebx + addl %ebx,%ecx + xorl %ecx,%esi + # round 4 + movl 32(%ebp),%edx + movl 36(%ebp),%ecx + xorl %esi,%edx + roll %cl,%edx + movl %edx,%ebx + xorl %ecx,%ecx + movb %dh,%cl + andl $255,%ebx + shrl $16,%edx + xorl %eax,%eax + movb %dh,%al + andl $255,%edx + movl _CAST_S_table0(,%ecx,4),%ecx + movl _CAST_S_table1(,%ebx,4),%ebx + subl %ebx,%ecx + movl _CAST_S_table2(,%eax,4),%ebx + addl %ebx,%ecx + movl _CAST_S_table3(,%edx,4),%ebx + xorl %ebx,%ecx + xorl %ecx,%edi + # round 5 + movl 40(%ebp),%edx + movl 44(%ebp),%ecx + subl %edi,%edx + roll %cl,%edx + movl %edx,%ebx + xorl %ecx,%ecx + movb %dh,%cl + andl $255,%ebx + shrl $16,%edx + xorl %eax,%eax + movb %dh,%al + andl $255,%edx + movl _CAST_S_table0(,%ecx,4),%ecx + movl _CAST_S_table1(,%ebx,4),%ebx + addl %ebx,%ecx + movl _CAST_S_table2(,%eax,4),%ebx + xorl %ebx,%ecx + movl _CAST_S_table3(,%edx,4),%ebx + subl %ebx,%ecx + xorl %ecx,%esi + # round 6 + movl 48(%ebp),%edx + movl 52(%ebp),%ecx + addl %esi,%edx + roll %cl,%edx + movl %edx,%ebx + xorl %ecx,%ecx + movb %dh,%cl + andl $255,%ebx + shrl $16,%edx + xorl %eax,%eax + movb %dh,%al + andl $255,%edx + movl _CAST_S_table0(,%ecx,4),%ecx + movl _CAST_S_table1(,%ebx,4),%ebx + xorl %ebx,%ecx + movl _CAST_S_table2(,%eax,4),%ebx + subl %ebx,%ecx + movl _CAST_S_table3(,%edx,4),%ebx + addl %ebx,%ecx + xorl %ecx,%edi + # round 7 + movl 56(%ebp),%edx + movl 60(%ebp),%ecx + xorl %edi,%edx + roll %cl,%edx + movl %edx,%ebx + xorl %ecx,%ecx + movb %dh,%cl + andl $255,%ebx + shrl $16,%edx + xorl %eax,%eax + movb %dh,%al + andl $255,%edx + movl _CAST_S_table0(,%ecx,4),%ecx + movl _CAST_S_table1(,%ebx,4),%ebx + subl %ebx,%ecx + movl _CAST_S_table2(,%eax,4),%ebx + addl %ebx,%ecx + movl _CAST_S_table3(,%edx,4),%ebx + xorl %ebx,%ecx + xorl %ecx,%esi + # round 8 + movl 64(%ebp),%edx + movl 68(%ebp),%ecx + subl %esi,%edx + roll %cl,%edx + movl %edx,%ebx + xorl %ecx,%ecx + movb %dh,%cl + andl $255,%ebx + shrl $16,%edx + xorl %eax,%eax + movb %dh,%al + andl $255,%edx + movl _CAST_S_table0(,%ecx,4),%ecx + movl _CAST_S_table1(,%ebx,4),%ebx + addl %ebx,%ecx + movl _CAST_S_table2(,%eax,4),%ebx + xorl %ebx,%ecx + movl _CAST_S_table3(,%edx,4),%ebx + subl %ebx,%ecx + xorl %ecx,%edi + # round 9 + movl 72(%ebp),%edx + movl 76(%ebp),%ecx + addl %edi,%edx + roll %cl,%edx + movl %edx,%ebx + xorl %ecx,%ecx + movb %dh,%cl + andl $255,%ebx + shrl $16,%edx + xorl %eax,%eax + movb %dh,%al + andl $255,%edx + movl _CAST_S_table0(,%ecx,4),%ecx + movl _CAST_S_table1(,%ebx,4),%ebx + xorl %ebx,%ecx + movl _CAST_S_table2(,%eax,4),%ebx + subl %ebx,%ecx + movl _CAST_S_table3(,%edx,4),%ebx + addl %ebx,%ecx + xorl %ecx,%esi + # round 10 + movl 80(%ebp),%edx + movl 84(%ebp),%ecx + xorl %esi,%edx + roll %cl,%edx + movl %edx,%ebx + xorl %ecx,%ecx + movb %dh,%cl + andl $255,%ebx + shrl $16,%edx + xorl %eax,%eax + movb %dh,%al + andl $255,%edx + movl _CAST_S_table0(,%ecx,4),%ecx + movl _CAST_S_table1(,%ebx,4),%ebx + subl %ebx,%ecx + movl _CAST_S_table2(,%eax,4),%ebx + addl %ebx,%ecx + movl _CAST_S_table3(,%edx,4),%ebx + xorl %ebx,%ecx + xorl %ecx,%edi + # round 11 + movl 88(%ebp),%edx + movl 92(%ebp),%ecx + subl %edi,%edx + roll %cl,%edx + movl %edx,%ebx + xorl %ecx,%ecx + movb %dh,%cl + andl $255,%ebx + shrl $16,%edx + xorl %eax,%eax + movb %dh,%al + andl $255,%edx + movl _CAST_S_table0(,%ecx,4),%ecx + movl _CAST_S_table1(,%ebx,4),%ebx + addl %ebx,%ecx + movl _CAST_S_table2(,%eax,4),%ebx + xorl %ebx,%ecx + movl _CAST_S_table3(,%edx,4),%ebx + subl %ebx,%ecx + xorl %ecx,%esi + # test short key flag + popl %edx + orl %edx,%edx + jnz L000cast_enc_done + # round 12 + movl 96(%ebp),%edx + movl 100(%ebp),%ecx + addl %esi,%edx + roll %cl,%edx + movl %edx,%ebx + xorl %ecx,%ecx + movb %dh,%cl + andl $255,%ebx + shrl $16,%edx + xorl %eax,%eax + movb %dh,%al + andl $255,%edx + movl _CAST_S_table0(,%ecx,4),%ecx + movl _CAST_S_table1(,%ebx,4),%ebx + xorl %ebx,%ecx + movl _CAST_S_table2(,%eax,4),%ebx + subl %ebx,%ecx + movl _CAST_S_table3(,%edx,4),%ebx + addl %ebx,%ecx + xorl %ecx,%edi + # round 13 + movl 104(%ebp),%edx + movl 108(%ebp),%ecx + xorl %edi,%edx + roll %cl,%edx + movl %edx,%ebx + xorl %ecx,%ecx + movb %dh,%cl + andl $255,%ebx + shrl $16,%edx + xorl %eax,%eax + movb %dh,%al + andl $255,%edx + movl _CAST_S_table0(,%ecx,4),%ecx + movl _CAST_S_table1(,%ebx,4),%ebx + subl %ebx,%ecx + movl _CAST_S_table2(,%eax,4),%ebx + addl %ebx,%ecx + movl _CAST_S_table3(,%edx,4),%ebx + xorl %ebx,%ecx + xorl %ecx,%esi + # round 14 + movl 112(%ebp),%edx + movl 116(%ebp),%ecx + subl %esi,%edx + roll %cl,%edx + movl %edx,%ebx + xorl %ecx,%ecx + movb %dh,%cl + andl $255,%ebx + shrl $16,%edx + xorl %eax,%eax + movb %dh,%al + andl $255,%edx + movl _CAST_S_table0(,%ecx,4),%ecx + movl _CAST_S_table1(,%ebx,4),%ebx + addl %ebx,%ecx + movl _CAST_S_table2(,%eax,4),%ebx + xorl %ebx,%ecx + movl _CAST_S_table3(,%edx,4),%ebx + subl %ebx,%ecx + xorl %ecx,%edi + # round 15 + movl 120(%ebp),%edx + movl 124(%ebp),%ecx + addl %edi,%edx + roll %cl,%edx + movl %edx,%ebx + xorl %ecx,%ecx + movb %dh,%cl + andl $255,%ebx + shrl $16,%edx + xorl %eax,%eax + movb %dh,%al + andl $255,%edx + movl _CAST_S_table0(,%ecx,4),%ecx + movl _CAST_S_table1(,%ebx,4),%ebx + xorl %ebx,%ecx + movl _CAST_S_table2(,%eax,4),%ebx + subl %ebx,%ecx + movl _CAST_S_table3(,%edx,4),%ebx + addl %ebx,%ecx + xorl %ecx,%esi +L000cast_enc_done: + nop + movl 20(%esp),%eax + movl %edi,4(%eax) + movl %esi,(%eax) + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.globl _CAST_decrypt +.align 4 +_CAST_decrypt: +L_CAST_decrypt_begin: + + pushl %ebp + pushl %ebx + movl 12(%esp),%ebx + movl 16(%esp),%ebp + pushl %esi + pushl %edi + # Load the 2 words + movl (%ebx),%edi + movl 4(%ebx),%esi + # Get short key flag + movl 128(%ebp),%eax + orl %eax,%eax + jnz L001cast_dec_skip + xorl %eax,%eax + # round 15 + movl 120(%ebp),%edx + movl 124(%ebp),%ecx + addl %esi,%edx + roll %cl,%edx + movl %edx,%ebx + xorl %ecx,%ecx + movb %dh,%cl + andl $255,%ebx + shrl $16,%edx + xorl %eax,%eax + movb %dh,%al + andl $255,%edx + movl _CAST_S_table0(,%ecx,4),%ecx + movl _CAST_S_table1(,%ebx,4),%ebx + xorl %ebx,%ecx + movl _CAST_S_table2(,%eax,4),%ebx + subl %ebx,%ecx + movl _CAST_S_table3(,%edx,4),%ebx + addl %ebx,%ecx + xorl %ecx,%edi + # round 14 + movl 112(%ebp),%edx + movl 116(%ebp),%ecx + subl %edi,%edx + roll %cl,%edx + movl %edx,%ebx + xorl %ecx,%ecx + movb %dh,%cl + andl $255,%ebx + shrl $16,%edx + xorl %eax,%eax + movb %dh,%al + andl $255,%edx + movl _CAST_S_table0(,%ecx,4),%ecx + movl _CAST_S_table1(,%ebx,4),%ebx + addl %ebx,%ecx + movl _CAST_S_table2(,%eax,4),%ebx + xorl %ebx,%ecx + movl _CAST_S_table3(,%edx,4),%ebx + subl %ebx,%ecx + xorl %ecx,%esi + # round 13 + movl 104(%ebp),%edx + movl 108(%ebp),%ecx + xorl %esi,%edx + roll %cl,%edx + movl %edx,%ebx + xorl %ecx,%ecx + movb %dh,%cl + andl $255,%ebx + shrl $16,%edx + xorl %eax,%eax + movb %dh,%al + andl $255,%edx + movl _CAST_S_table0(,%ecx,4),%ecx + movl _CAST_S_table1(,%ebx,4),%ebx + subl %ebx,%ecx + movl _CAST_S_table2(,%eax,4),%ebx + addl %ebx,%ecx + movl _CAST_S_table3(,%edx,4),%ebx + xorl %ebx,%ecx + xorl %ecx,%edi + # round 12 + movl 96(%ebp),%edx + movl 100(%ebp),%ecx + addl %edi,%edx + roll %cl,%edx + movl %edx,%ebx + xorl %ecx,%ecx + movb %dh,%cl + andl $255,%ebx + shrl $16,%edx + xorl %eax,%eax + movb %dh,%al + andl $255,%edx + movl _CAST_S_table0(,%ecx,4),%ecx + movl _CAST_S_table1(,%ebx,4),%ebx + xorl %ebx,%ecx + movl _CAST_S_table2(,%eax,4),%ebx + subl %ebx,%ecx + movl _CAST_S_table3(,%edx,4),%ebx + addl %ebx,%ecx + xorl %ecx,%esi +L001cast_dec_skip: + # round 11 + movl 88(%ebp),%edx + movl 92(%ebp),%ecx + subl %esi,%edx + roll %cl,%edx + movl %edx,%ebx + xorl %ecx,%ecx + movb %dh,%cl + andl $255,%ebx + shrl $16,%edx + xorl %eax,%eax + movb %dh,%al + andl $255,%edx + movl _CAST_S_table0(,%ecx,4),%ecx + movl _CAST_S_table1(,%ebx,4),%ebx + addl %ebx,%ecx + movl _CAST_S_table2(,%eax,4),%ebx + xorl %ebx,%ecx + movl _CAST_S_table3(,%edx,4),%ebx + subl %ebx,%ecx + xorl %ecx,%edi + # round 10 + movl 80(%ebp),%edx + movl 84(%ebp),%ecx + xorl %edi,%edx + roll %cl,%edx + movl %edx,%ebx + xorl %ecx,%ecx + movb %dh,%cl + andl $255,%ebx + shrl $16,%edx + xorl %eax,%eax + movb %dh,%al + andl $255,%edx + movl _CAST_S_table0(,%ecx,4),%ecx + movl _CAST_S_table1(,%ebx,4),%ebx + subl %ebx,%ecx + movl _CAST_S_table2(,%eax,4),%ebx + addl %ebx,%ecx + movl _CAST_S_table3(,%edx,4),%ebx + xorl %ebx,%ecx + xorl %ecx,%esi + # round 9 + movl 72(%ebp),%edx + movl 76(%ebp),%ecx + addl %esi,%edx + roll %cl,%edx + movl %edx,%ebx + xorl %ecx,%ecx + movb %dh,%cl + andl $255,%ebx + shrl $16,%edx + xorl %eax,%eax + movb %dh,%al + andl $255,%edx + movl _CAST_S_table0(,%ecx,4),%ecx + movl _CAST_S_table1(,%ebx,4),%ebx + xorl %ebx,%ecx + movl _CAST_S_table2(,%eax,4),%ebx + subl %ebx,%ecx + movl _CAST_S_table3(,%edx,4),%ebx + addl %ebx,%ecx + xorl %ecx,%edi + # round 8 + movl 64(%ebp),%edx + movl 68(%ebp),%ecx + subl %edi,%edx + roll %cl,%edx + movl %edx,%ebx + xorl %ecx,%ecx + movb %dh,%cl + andl $255,%ebx + shrl $16,%edx + xorl %eax,%eax + movb %dh,%al + andl $255,%edx + movl _CAST_S_table0(,%ecx,4),%ecx + movl _CAST_S_table1(,%ebx,4),%ebx + addl %ebx,%ecx + movl _CAST_S_table2(,%eax,4),%ebx + xorl %ebx,%ecx + movl _CAST_S_table3(,%edx,4),%ebx + subl %ebx,%ecx + xorl %ecx,%esi + # round 7 + movl 56(%ebp),%edx + movl 60(%ebp),%ecx + xorl %esi,%edx + roll %cl,%edx + movl %edx,%ebx + xorl %ecx,%ecx + movb %dh,%cl + andl $255,%ebx + shrl $16,%edx + xorl %eax,%eax + movb %dh,%al + andl $255,%edx + movl _CAST_S_table0(,%ecx,4),%ecx + movl _CAST_S_table1(,%ebx,4),%ebx + subl %ebx,%ecx + movl _CAST_S_table2(,%eax,4),%ebx + addl %ebx,%ecx + movl _CAST_S_table3(,%edx,4),%ebx + xorl %ebx,%ecx + xorl %ecx,%edi + # round 6 + movl 48(%ebp),%edx + movl 52(%ebp),%ecx + addl %edi,%edx + roll %cl,%edx + movl %edx,%ebx + xorl %ecx,%ecx + movb %dh,%cl + andl $255,%ebx + shrl $16,%edx + xorl %eax,%eax + movb %dh,%al + andl $255,%edx + movl _CAST_S_table0(,%ecx,4),%ecx + movl _CAST_S_table1(,%ebx,4),%ebx + xorl %ebx,%ecx + movl _CAST_S_table2(,%eax,4),%ebx + subl %ebx,%ecx + movl _CAST_S_table3(,%edx,4),%ebx + addl %ebx,%ecx + xorl %ecx,%esi + # round 5 + movl 40(%ebp),%edx + movl 44(%ebp),%ecx + subl %esi,%edx + roll %cl,%edx + movl %edx,%ebx + xorl %ecx,%ecx + movb %dh,%cl + andl $255,%ebx + shrl $16,%edx + xorl %eax,%eax + movb %dh,%al + andl $255,%edx + movl _CAST_S_table0(,%ecx,4),%ecx + movl _CAST_S_table1(,%ebx,4),%ebx + addl %ebx,%ecx + movl _CAST_S_table2(,%eax,4),%ebx + xorl %ebx,%ecx + movl _CAST_S_table3(,%edx,4),%ebx + subl %ebx,%ecx + xorl %ecx,%edi + # round 4 + movl 32(%ebp),%edx + movl 36(%ebp),%ecx + xorl %edi,%edx + roll %cl,%edx + movl %edx,%ebx + xorl %ecx,%ecx + movb %dh,%cl + andl $255,%ebx + shrl $16,%edx + xorl %eax,%eax + movb %dh,%al + andl $255,%edx + movl _CAST_S_table0(,%ecx,4),%ecx + movl _CAST_S_table1(,%ebx,4),%ebx + subl %ebx,%ecx + movl _CAST_S_table2(,%eax,4),%ebx + addl %ebx,%ecx + movl _CAST_S_table3(,%edx,4),%ebx + xorl %ebx,%ecx + xorl %ecx,%esi + # round 3 + movl 24(%ebp),%edx + movl 28(%ebp),%ecx + addl %esi,%edx + roll %cl,%edx + movl %edx,%ebx + xorl %ecx,%ecx + movb %dh,%cl + andl $255,%ebx + shrl $16,%edx + xorl %eax,%eax + movb %dh,%al + andl $255,%edx + movl _CAST_S_table0(,%ecx,4),%ecx + movl _CAST_S_table1(,%ebx,4),%ebx + xorl %ebx,%ecx + movl _CAST_S_table2(,%eax,4),%ebx + subl %ebx,%ecx + movl _CAST_S_table3(,%edx,4),%ebx + addl %ebx,%ecx + xorl %ecx,%edi + # round 2 + movl 16(%ebp),%edx + movl 20(%ebp),%ecx + subl %edi,%edx + roll %cl,%edx + movl %edx,%ebx + xorl %ecx,%ecx + movb %dh,%cl + andl $255,%ebx + shrl $16,%edx + xorl %eax,%eax + movb %dh,%al + andl $255,%edx + movl _CAST_S_table0(,%ecx,4),%ecx + movl _CAST_S_table1(,%ebx,4),%ebx + addl %ebx,%ecx + movl _CAST_S_table2(,%eax,4),%ebx + xorl %ebx,%ecx + movl _CAST_S_table3(,%edx,4),%ebx + subl %ebx,%ecx + xorl %ecx,%esi + # round 1 + movl 8(%ebp),%edx + movl 12(%ebp),%ecx + xorl %esi,%edx + roll %cl,%edx + movl %edx,%ebx + xorl %ecx,%ecx + movb %dh,%cl + andl $255,%ebx + shrl $16,%edx + xorl %eax,%eax + movb %dh,%al + andl $255,%edx + movl _CAST_S_table0(,%ecx,4),%ecx + movl _CAST_S_table1(,%ebx,4),%ebx + subl %ebx,%ecx + movl _CAST_S_table2(,%eax,4),%ebx + addl %ebx,%ecx + movl _CAST_S_table3(,%edx,4),%ebx + xorl %ebx,%ecx + xorl %ecx,%edi + # round 0 + movl (%ebp),%edx + movl 4(%ebp),%ecx + addl %edi,%edx + roll %cl,%edx + movl %edx,%ebx + xorl %ecx,%ecx + movb %dh,%cl + andl $255,%ebx + shrl $16,%edx + xorl %eax,%eax + movb %dh,%al + andl $255,%edx + movl _CAST_S_table0(,%ecx,4),%ecx + movl _CAST_S_table1(,%ebx,4),%ebx + xorl %ebx,%ecx + movl _CAST_S_table2(,%eax,4),%ebx + subl %ebx,%ecx + movl _CAST_S_table3(,%edx,4),%ebx + addl %ebx,%ecx + xorl %ecx,%esi + nop + movl 20(%esp),%eax + movl %edi,4(%eax) + movl %esi,(%eax) + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.globl _CAST_cbc_encrypt +.align 4 +_CAST_cbc_encrypt: +L_CAST_cbc_encrypt_begin: + + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 28(%esp),%ebp + # getting iv ptr from parameter 4 + movl 36(%esp),%ebx + movl (%ebx),%esi + movl 4(%ebx),%edi + pushl %edi + pushl %esi + pushl %edi + pushl %esi + movl %esp,%ebx + movl 36(%esp),%esi + movl 40(%esp),%edi + # getting encrypt flag from parameter 5 + movl 56(%esp),%ecx + # get and push parameter 3 + movl 48(%esp),%eax + pushl %eax + pushl %ebx + cmpl $0,%ecx + jz L002decrypt + andl $4294967288,%ebp + movl 8(%esp),%eax + movl 12(%esp),%ebx + jz L003encrypt_finish +L004encrypt_loop: + movl (%esi),%ecx + movl 4(%esi),%edx + xorl %ecx,%eax + xorl %edx,%ebx + bswap %eax + bswap %ebx + movl %eax,8(%esp) + movl %ebx,12(%esp) + call L_CAST_encrypt_begin + movl 8(%esp),%eax + movl 12(%esp),%ebx + bswap %eax + bswap %ebx + movl %eax,(%edi) + movl %ebx,4(%edi) + addl $8,%esi + addl $8,%edi + subl $8,%ebp + jnz L004encrypt_loop +L003encrypt_finish: + movl 52(%esp),%ebp + andl $7,%ebp + jz L005finish + call L006PIC_point +L006PIC_point: + popl %edx + leal L007cbc_enc_jmp_table-L006PIC_point(%edx),%ecx + movl (%ecx,%ebp,4),%ebp + addl %edx,%ebp + xorl %ecx,%ecx + xorl %edx,%edx + jmp *%ebp +L008ej7: + movb 6(%esi),%dh + shll $8,%edx +L009ej6: + movb 5(%esi),%dh +L010ej5: + movb 4(%esi),%dl +L011ej4: + movl (%esi),%ecx + jmp L012ejend +L013ej3: + movb 2(%esi),%ch + shll $8,%ecx +L014ej2: + movb 1(%esi),%ch +L015ej1: + movb (%esi),%cl +L012ejend: + xorl %ecx,%eax + xorl %edx,%ebx + bswap %eax + bswap %ebx + movl %eax,8(%esp) + movl %ebx,12(%esp) + call L_CAST_encrypt_begin + movl 8(%esp),%eax + movl 12(%esp),%ebx + bswap %eax + bswap %ebx + movl %eax,(%edi) + movl %ebx,4(%edi) + jmp L005finish +L002decrypt: + andl $4294967288,%ebp + movl 16(%esp),%eax + movl 20(%esp),%ebx + jz L016decrypt_finish +L017decrypt_loop: + movl (%esi),%eax + movl 4(%esi),%ebx + bswap %eax + bswap %ebx + movl %eax,8(%esp) + movl %ebx,12(%esp) + call L_CAST_decrypt_begin + movl 8(%esp),%eax + movl 12(%esp),%ebx + bswap %eax + bswap %ebx + movl 16(%esp),%ecx + movl 20(%esp),%edx + xorl %eax,%ecx + xorl %ebx,%edx + movl (%esi),%eax + movl 4(%esi),%ebx + movl %ecx,(%edi) + movl %edx,4(%edi) + movl %eax,16(%esp) + movl %ebx,20(%esp) + addl $8,%esi + addl $8,%edi + subl $8,%ebp + jnz L017decrypt_loop +L016decrypt_finish: + movl 52(%esp),%ebp + andl $7,%ebp + jz L005finish + movl (%esi),%eax + movl 4(%esi),%ebx + bswap %eax + bswap %ebx + movl %eax,8(%esp) + movl %ebx,12(%esp) + call L_CAST_decrypt_begin + movl 8(%esp),%eax + movl 12(%esp),%ebx + bswap %eax + bswap %ebx + movl 16(%esp),%ecx + movl 20(%esp),%edx + xorl %eax,%ecx + xorl %ebx,%edx + movl (%esi),%eax + movl 4(%esi),%ebx +L018dj7: + rorl $16,%edx + movb %dl,6(%edi) + shrl $16,%edx +L019dj6: + movb %dh,5(%edi) +L020dj5: + movb %dl,4(%edi) +L021dj4: + movl %ecx,(%edi) + jmp L022djend +L023dj3: + rorl $16,%ecx + movb %cl,2(%edi) + shll $16,%ecx +L024dj2: + movb %ch,1(%esi) +L025dj1: + movb %cl,(%esi) +L022djend: + jmp L005finish +L005finish: + movl 60(%esp),%ecx + addl $24,%esp + movl %eax,(%ecx) + movl %ebx,4(%ecx) + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.align 6,0x90 +L007cbc_enc_jmp_table: +.long 0 +.long L015ej1-L006PIC_point +.long L014ej2-L006PIC_point +.long L013ej3-L006PIC_point +.long L011ej4-L006PIC_point +.long L010ej5-L006PIC_point +.long L009ej6-L006PIC_point +.long L008ej7-L006PIC_point +.align 6,0x90 diff --git a/deps/openssl/asm_obsolete/x86-macosx-gas/des/crypt586.s b/deps/openssl/asm_obsolete/x86-macosx-gas/des/crypt586.s new file mode 100644 index 00000000000000..1731c53faac06e --- /dev/null +++ b/deps/openssl/asm_obsolete/x86-macosx-gas/des/crypt586.s @@ -0,0 +1,880 @@ +.file "crypt586.s" +.text +.globl _fcrypt_body +.align 4 +_fcrypt_body: +L_fcrypt_body_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + + # Load the 2 words + xorl %edi,%edi + xorl %esi,%esi + call L000PIC_me_up +L000PIC_me_up: + popl %edx + movl L_DES_SPtrans$non_lazy_ptr-L000PIC_me_up(%edx),%edx + pushl %edx + movl 28(%esp),%ebp + pushl $25 +L001start: + + # Round 0 + movl 36(%esp),%eax + movl %esi,%edx + shrl $16,%edx + movl 40(%esp),%ecx + xorl %esi,%edx + andl %edx,%eax + andl %ecx,%edx + movl %eax,%ebx + shll $16,%ebx + movl %edx,%ecx + shll $16,%ecx + xorl %ebx,%eax + xorl %ecx,%edx + movl (%ebp),%ebx + xorl %ebx,%eax + movl 4(%ebp),%ecx + xorl %esi,%eax + xorl %esi,%edx + xorl %ecx,%edx + andl $0xfcfcfcfc,%eax + xorl %ebx,%ebx + andl $0xcfcfcfcf,%edx + xorl %ecx,%ecx + movb %al,%bl + movb %ah,%cl + rorl $4,%edx + movl 4(%esp),%ebp + xorl (%ebp,%ebx,1),%edi + movb %dl,%bl + xorl 0x200(%ebp,%ecx,1),%edi + movb %dh,%cl + shrl $16,%eax + xorl 0x100(%ebp,%ebx,1),%edi + movb %ah,%bl + shrl $16,%edx + xorl 0x300(%ebp,%ecx,1),%edi + movb %dh,%cl + andl $0xff,%eax + andl $0xff,%edx + movl 0x600(%ebp,%ebx,1),%ebx + xorl %ebx,%edi + movl 0x700(%ebp,%ecx,1),%ebx + xorl %ebx,%edi + movl 0x400(%ebp,%eax,1),%ebx + xorl %ebx,%edi + movl 0x500(%ebp,%edx,1),%ebx + xorl %ebx,%edi + movl 32(%esp),%ebp + + # Round 1 + movl 36(%esp),%eax + movl %edi,%edx + shrl $16,%edx + movl 40(%esp),%ecx + xorl %edi,%edx + andl %edx,%eax + andl %ecx,%edx + movl %eax,%ebx + shll $16,%ebx + movl %edx,%ecx + shll $16,%ecx + xorl %ebx,%eax + xorl %ecx,%edx + movl 8(%ebp),%ebx + xorl %ebx,%eax + movl 12(%ebp),%ecx + xorl %edi,%eax + xorl %edi,%edx + xorl %ecx,%edx + andl $0xfcfcfcfc,%eax + xorl %ebx,%ebx + andl $0xcfcfcfcf,%edx + xorl %ecx,%ecx + movb %al,%bl + movb %ah,%cl + rorl $4,%edx + movl 4(%esp),%ebp + xorl (%ebp,%ebx,1),%esi + movb %dl,%bl + xorl 0x200(%ebp,%ecx,1),%esi + movb %dh,%cl + shrl $16,%eax + xorl 0x100(%ebp,%ebx,1),%esi + movb %ah,%bl + shrl $16,%edx + xorl 0x300(%ebp,%ecx,1),%esi + movb %dh,%cl + andl $0xff,%eax + andl $0xff,%edx + movl 0x600(%ebp,%ebx,1),%ebx + xorl %ebx,%esi + movl 0x700(%ebp,%ecx,1),%ebx + xorl %ebx,%esi + movl 0x400(%ebp,%eax,1),%ebx + xorl %ebx,%esi + movl 0x500(%ebp,%edx,1),%ebx + xorl %ebx,%esi + movl 32(%esp),%ebp + + # Round 2 + movl 36(%esp),%eax + movl %esi,%edx + shrl $16,%edx + movl 40(%esp),%ecx + xorl %esi,%edx + andl %edx,%eax + andl %ecx,%edx + movl %eax,%ebx + shll $16,%ebx + movl %edx,%ecx + shll $16,%ecx + xorl %ebx,%eax + xorl %ecx,%edx + movl 16(%ebp),%ebx + xorl %ebx,%eax + movl 20(%ebp),%ecx + xorl %esi,%eax + xorl %esi,%edx + xorl %ecx,%edx + andl $0xfcfcfcfc,%eax + xorl %ebx,%ebx + andl $0xcfcfcfcf,%edx + xorl %ecx,%ecx + movb %al,%bl + movb %ah,%cl + rorl $4,%edx + movl 4(%esp),%ebp + xorl (%ebp,%ebx,1),%edi + movb %dl,%bl + xorl 0x200(%ebp,%ecx,1),%edi + movb %dh,%cl + shrl $16,%eax + xorl 0x100(%ebp,%ebx,1),%edi + movb %ah,%bl + shrl $16,%edx + xorl 0x300(%ebp,%ecx,1),%edi + movb %dh,%cl + andl $0xff,%eax + andl $0xff,%edx + movl 0x600(%ebp,%ebx,1),%ebx + xorl %ebx,%edi + movl 0x700(%ebp,%ecx,1),%ebx + xorl %ebx,%edi + movl 0x400(%ebp,%eax,1),%ebx + xorl %ebx,%edi + movl 0x500(%ebp,%edx,1),%ebx + xorl %ebx,%edi + movl 32(%esp),%ebp + + # Round 3 + movl 36(%esp),%eax + movl %edi,%edx + shrl $16,%edx + movl 40(%esp),%ecx + xorl %edi,%edx + andl %edx,%eax + andl %ecx,%edx + movl %eax,%ebx + shll $16,%ebx + movl %edx,%ecx + shll $16,%ecx + xorl %ebx,%eax + xorl %ecx,%edx + movl 24(%ebp),%ebx + xorl %ebx,%eax + movl 28(%ebp),%ecx + xorl %edi,%eax + xorl %edi,%edx + xorl %ecx,%edx + andl $0xfcfcfcfc,%eax + xorl %ebx,%ebx + andl $0xcfcfcfcf,%edx + xorl %ecx,%ecx + movb %al,%bl + movb %ah,%cl + rorl $4,%edx + movl 4(%esp),%ebp + xorl (%ebp,%ebx,1),%esi + movb %dl,%bl + xorl 0x200(%ebp,%ecx,1),%esi + movb %dh,%cl + shrl $16,%eax + xorl 0x100(%ebp,%ebx,1),%esi + movb %ah,%bl + shrl $16,%edx + xorl 0x300(%ebp,%ecx,1),%esi + movb %dh,%cl + andl $0xff,%eax + andl $0xff,%edx + movl 0x600(%ebp,%ebx,1),%ebx + xorl %ebx,%esi + movl 0x700(%ebp,%ecx,1),%ebx + xorl %ebx,%esi + movl 0x400(%ebp,%eax,1),%ebx + xorl %ebx,%esi + movl 0x500(%ebp,%edx,1),%ebx + xorl %ebx,%esi + movl 32(%esp),%ebp + + # Round 4 + movl 36(%esp),%eax + movl %esi,%edx + shrl $16,%edx + movl 40(%esp),%ecx + xorl %esi,%edx + andl %edx,%eax + andl %ecx,%edx + movl %eax,%ebx + shll $16,%ebx + movl %edx,%ecx + shll $16,%ecx + xorl %ebx,%eax + xorl %ecx,%edx + movl 32(%ebp),%ebx + xorl %ebx,%eax + movl 36(%ebp),%ecx + xorl %esi,%eax + xorl %esi,%edx + xorl %ecx,%edx + andl $0xfcfcfcfc,%eax + xorl %ebx,%ebx + andl $0xcfcfcfcf,%edx + xorl %ecx,%ecx + movb %al,%bl + movb %ah,%cl + rorl $4,%edx + movl 4(%esp),%ebp + xorl (%ebp,%ebx,1),%edi + movb %dl,%bl + xorl 0x200(%ebp,%ecx,1),%edi + movb %dh,%cl + shrl $16,%eax + xorl 0x100(%ebp,%ebx,1),%edi + movb %ah,%bl + shrl $16,%edx + xorl 0x300(%ebp,%ecx,1),%edi + movb %dh,%cl + andl $0xff,%eax + andl $0xff,%edx + movl 0x600(%ebp,%ebx,1),%ebx + xorl %ebx,%edi + movl 0x700(%ebp,%ecx,1),%ebx + xorl %ebx,%edi + movl 0x400(%ebp,%eax,1),%ebx + xorl %ebx,%edi + movl 0x500(%ebp,%edx,1),%ebx + xorl %ebx,%edi + movl 32(%esp),%ebp + + # Round 5 + movl 36(%esp),%eax + movl %edi,%edx + shrl $16,%edx + movl 40(%esp),%ecx + xorl %edi,%edx + andl %edx,%eax + andl %ecx,%edx + movl %eax,%ebx + shll $16,%ebx + movl %edx,%ecx + shll $16,%ecx + xorl %ebx,%eax + xorl %ecx,%edx + movl 40(%ebp),%ebx + xorl %ebx,%eax + movl 44(%ebp),%ecx + xorl %edi,%eax + xorl %edi,%edx + xorl %ecx,%edx + andl $0xfcfcfcfc,%eax + xorl %ebx,%ebx + andl $0xcfcfcfcf,%edx + xorl %ecx,%ecx + movb %al,%bl + movb %ah,%cl + rorl $4,%edx + movl 4(%esp),%ebp + xorl (%ebp,%ebx,1),%esi + movb %dl,%bl + xorl 0x200(%ebp,%ecx,1),%esi + movb %dh,%cl + shrl $16,%eax + xorl 0x100(%ebp,%ebx,1),%esi + movb %ah,%bl + shrl $16,%edx + xorl 0x300(%ebp,%ecx,1),%esi + movb %dh,%cl + andl $0xff,%eax + andl $0xff,%edx + movl 0x600(%ebp,%ebx,1),%ebx + xorl %ebx,%esi + movl 0x700(%ebp,%ecx,1),%ebx + xorl %ebx,%esi + movl 0x400(%ebp,%eax,1),%ebx + xorl %ebx,%esi + movl 0x500(%ebp,%edx,1),%ebx + xorl %ebx,%esi + movl 32(%esp),%ebp + + # Round 6 + movl 36(%esp),%eax + movl %esi,%edx + shrl $16,%edx + movl 40(%esp),%ecx + xorl %esi,%edx + andl %edx,%eax + andl %ecx,%edx + movl %eax,%ebx + shll $16,%ebx + movl %edx,%ecx + shll $16,%ecx + xorl %ebx,%eax + xorl %ecx,%edx + movl 48(%ebp),%ebx + xorl %ebx,%eax + movl 52(%ebp),%ecx + xorl %esi,%eax + xorl %esi,%edx + xorl %ecx,%edx + andl $0xfcfcfcfc,%eax + xorl %ebx,%ebx + andl $0xcfcfcfcf,%edx + xorl %ecx,%ecx + movb %al,%bl + movb %ah,%cl + rorl $4,%edx + movl 4(%esp),%ebp + xorl (%ebp,%ebx,1),%edi + movb %dl,%bl + xorl 0x200(%ebp,%ecx,1),%edi + movb %dh,%cl + shrl $16,%eax + xorl 0x100(%ebp,%ebx,1),%edi + movb %ah,%bl + shrl $16,%edx + xorl 0x300(%ebp,%ecx,1),%edi + movb %dh,%cl + andl $0xff,%eax + andl $0xff,%edx + movl 0x600(%ebp,%ebx,1),%ebx + xorl %ebx,%edi + movl 0x700(%ebp,%ecx,1),%ebx + xorl %ebx,%edi + movl 0x400(%ebp,%eax,1),%ebx + xorl %ebx,%edi + movl 0x500(%ebp,%edx,1),%ebx + xorl %ebx,%edi + movl 32(%esp),%ebp + + # Round 7 + movl 36(%esp),%eax + movl %edi,%edx + shrl $16,%edx + movl 40(%esp),%ecx + xorl %edi,%edx + andl %edx,%eax + andl %ecx,%edx + movl %eax,%ebx + shll $16,%ebx + movl %edx,%ecx + shll $16,%ecx + xorl %ebx,%eax + xorl %ecx,%edx + movl 56(%ebp),%ebx + xorl %ebx,%eax + movl 60(%ebp),%ecx + xorl %edi,%eax + xorl %edi,%edx + xorl %ecx,%edx + andl $0xfcfcfcfc,%eax + xorl %ebx,%ebx + andl $0xcfcfcfcf,%edx + xorl %ecx,%ecx + movb %al,%bl + movb %ah,%cl + rorl $4,%edx + movl 4(%esp),%ebp + xorl (%ebp,%ebx,1),%esi + movb %dl,%bl + xorl 0x200(%ebp,%ecx,1),%esi + movb %dh,%cl + shrl $16,%eax + xorl 0x100(%ebp,%ebx,1),%esi + movb %ah,%bl + shrl $16,%edx + xorl 0x300(%ebp,%ecx,1),%esi + movb %dh,%cl + andl $0xff,%eax + andl $0xff,%edx + movl 0x600(%ebp,%ebx,1),%ebx + xorl %ebx,%esi + movl 0x700(%ebp,%ecx,1),%ebx + xorl %ebx,%esi + movl 0x400(%ebp,%eax,1),%ebx + xorl %ebx,%esi + movl 0x500(%ebp,%edx,1),%ebx + xorl %ebx,%esi + movl 32(%esp),%ebp + + # Round 8 + movl 36(%esp),%eax + movl %esi,%edx + shrl $16,%edx + movl 40(%esp),%ecx + xorl %esi,%edx + andl %edx,%eax + andl %ecx,%edx + movl %eax,%ebx + shll $16,%ebx + movl %edx,%ecx + shll $16,%ecx + xorl %ebx,%eax + xorl %ecx,%edx + movl 64(%ebp),%ebx + xorl %ebx,%eax + movl 68(%ebp),%ecx + xorl %esi,%eax + xorl %esi,%edx + xorl %ecx,%edx + andl $0xfcfcfcfc,%eax + xorl %ebx,%ebx + andl $0xcfcfcfcf,%edx + xorl %ecx,%ecx + movb %al,%bl + movb %ah,%cl + rorl $4,%edx + movl 4(%esp),%ebp + xorl (%ebp,%ebx,1),%edi + movb %dl,%bl + xorl 0x200(%ebp,%ecx,1),%edi + movb %dh,%cl + shrl $16,%eax + xorl 0x100(%ebp,%ebx,1),%edi + movb %ah,%bl + shrl $16,%edx + xorl 0x300(%ebp,%ecx,1),%edi + movb %dh,%cl + andl $0xff,%eax + andl $0xff,%edx + movl 0x600(%ebp,%ebx,1),%ebx + xorl %ebx,%edi + movl 0x700(%ebp,%ecx,1),%ebx + xorl %ebx,%edi + movl 0x400(%ebp,%eax,1),%ebx + xorl %ebx,%edi + movl 0x500(%ebp,%edx,1),%ebx + xorl %ebx,%edi + movl 32(%esp),%ebp + + # Round 9 + movl 36(%esp),%eax + movl %edi,%edx + shrl $16,%edx + movl 40(%esp),%ecx + xorl %edi,%edx + andl %edx,%eax + andl %ecx,%edx + movl %eax,%ebx + shll $16,%ebx + movl %edx,%ecx + shll $16,%ecx + xorl %ebx,%eax + xorl %ecx,%edx + movl 72(%ebp),%ebx + xorl %ebx,%eax + movl 76(%ebp),%ecx + xorl %edi,%eax + xorl %edi,%edx + xorl %ecx,%edx + andl $0xfcfcfcfc,%eax + xorl %ebx,%ebx + andl $0xcfcfcfcf,%edx + xorl %ecx,%ecx + movb %al,%bl + movb %ah,%cl + rorl $4,%edx + movl 4(%esp),%ebp + xorl (%ebp,%ebx,1),%esi + movb %dl,%bl + xorl 0x200(%ebp,%ecx,1),%esi + movb %dh,%cl + shrl $16,%eax + xorl 0x100(%ebp,%ebx,1),%esi + movb %ah,%bl + shrl $16,%edx + xorl 0x300(%ebp,%ecx,1),%esi + movb %dh,%cl + andl $0xff,%eax + andl $0xff,%edx + movl 0x600(%ebp,%ebx,1),%ebx + xorl %ebx,%esi + movl 0x700(%ebp,%ecx,1),%ebx + xorl %ebx,%esi + movl 0x400(%ebp,%eax,1),%ebx + xorl %ebx,%esi + movl 0x500(%ebp,%edx,1),%ebx + xorl %ebx,%esi + movl 32(%esp),%ebp + + # Round 10 + movl 36(%esp),%eax + movl %esi,%edx + shrl $16,%edx + movl 40(%esp),%ecx + xorl %esi,%edx + andl %edx,%eax + andl %ecx,%edx + movl %eax,%ebx + shll $16,%ebx + movl %edx,%ecx + shll $16,%ecx + xorl %ebx,%eax + xorl %ecx,%edx + movl 80(%ebp),%ebx + xorl %ebx,%eax + movl 84(%ebp),%ecx + xorl %esi,%eax + xorl %esi,%edx + xorl %ecx,%edx + andl $0xfcfcfcfc,%eax + xorl %ebx,%ebx + andl $0xcfcfcfcf,%edx + xorl %ecx,%ecx + movb %al,%bl + movb %ah,%cl + rorl $4,%edx + movl 4(%esp),%ebp + xorl (%ebp,%ebx,1),%edi + movb %dl,%bl + xorl 0x200(%ebp,%ecx,1),%edi + movb %dh,%cl + shrl $16,%eax + xorl 0x100(%ebp,%ebx,1),%edi + movb %ah,%bl + shrl $16,%edx + xorl 0x300(%ebp,%ecx,1),%edi + movb %dh,%cl + andl $0xff,%eax + andl $0xff,%edx + movl 0x600(%ebp,%ebx,1),%ebx + xorl %ebx,%edi + movl 0x700(%ebp,%ecx,1),%ebx + xorl %ebx,%edi + movl 0x400(%ebp,%eax,1),%ebx + xorl %ebx,%edi + movl 0x500(%ebp,%edx,1),%ebx + xorl %ebx,%edi + movl 32(%esp),%ebp + + # Round 11 + movl 36(%esp),%eax + movl %edi,%edx + shrl $16,%edx + movl 40(%esp),%ecx + xorl %edi,%edx + andl %edx,%eax + andl %ecx,%edx + movl %eax,%ebx + shll $16,%ebx + movl %edx,%ecx + shll $16,%ecx + xorl %ebx,%eax + xorl %ecx,%edx + movl 88(%ebp),%ebx + xorl %ebx,%eax + movl 92(%ebp),%ecx + xorl %edi,%eax + xorl %edi,%edx + xorl %ecx,%edx + andl $0xfcfcfcfc,%eax + xorl %ebx,%ebx + andl $0xcfcfcfcf,%edx + xorl %ecx,%ecx + movb %al,%bl + movb %ah,%cl + rorl $4,%edx + movl 4(%esp),%ebp + xorl (%ebp,%ebx,1),%esi + movb %dl,%bl + xorl 0x200(%ebp,%ecx,1),%esi + movb %dh,%cl + shrl $16,%eax + xorl 0x100(%ebp,%ebx,1),%esi + movb %ah,%bl + shrl $16,%edx + xorl 0x300(%ebp,%ecx,1),%esi + movb %dh,%cl + andl $0xff,%eax + andl $0xff,%edx + movl 0x600(%ebp,%ebx,1),%ebx + xorl %ebx,%esi + movl 0x700(%ebp,%ecx,1),%ebx + xorl %ebx,%esi + movl 0x400(%ebp,%eax,1),%ebx + xorl %ebx,%esi + movl 0x500(%ebp,%edx,1),%ebx + xorl %ebx,%esi + movl 32(%esp),%ebp + + # Round 12 + movl 36(%esp),%eax + movl %esi,%edx + shrl $16,%edx + movl 40(%esp),%ecx + xorl %esi,%edx + andl %edx,%eax + andl %ecx,%edx + movl %eax,%ebx + shll $16,%ebx + movl %edx,%ecx + shll $16,%ecx + xorl %ebx,%eax + xorl %ecx,%edx + movl 96(%ebp),%ebx + xorl %ebx,%eax + movl 100(%ebp),%ecx + xorl %esi,%eax + xorl %esi,%edx + xorl %ecx,%edx + andl $0xfcfcfcfc,%eax + xorl %ebx,%ebx + andl $0xcfcfcfcf,%edx + xorl %ecx,%ecx + movb %al,%bl + movb %ah,%cl + rorl $4,%edx + movl 4(%esp),%ebp + xorl (%ebp,%ebx,1),%edi + movb %dl,%bl + xorl 0x200(%ebp,%ecx,1),%edi + movb %dh,%cl + shrl $16,%eax + xorl 0x100(%ebp,%ebx,1),%edi + movb %ah,%bl + shrl $16,%edx + xorl 0x300(%ebp,%ecx,1),%edi + movb %dh,%cl + andl $0xff,%eax + andl $0xff,%edx + movl 0x600(%ebp,%ebx,1),%ebx + xorl %ebx,%edi + movl 0x700(%ebp,%ecx,1),%ebx + xorl %ebx,%edi + movl 0x400(%ebp,%eax,1),%ebx + xorl %ebx,%edi + movl 0x500(%ebp,%edx,1),%ebx + xorl %ebx,%edi + movl 32(%esp),%ebp + + # Round 13 + movl 36(%esp),%eax + movl %edi,%edx + shrl $16,%edx + movl 40(%esp),%ecx + xorl %edi,%edx + andl %edx,%eax + andl %ecx,%edx + movl %eax,%ebx + shll $16,%ebx + movl %edx,%ecx + shll $16,%ecx + xorl %ebx,%eax + xorl %ecx,%edx + movl 104(%ebp),%ebx + xorl %ebx,%eax + movl 108(%ebp),%ecx + xorl %edi,%eax + xorl %edi,%edx + xorl %ecx,%edx + andl $0xfcfcfcfc,%eax + xorl %ebx,%ebx + andl $0xcfcfcfcf,%edx + xorl %ecx,%ecx + movb %al,%bl + movb %ah,%cl + rorl $4,%edx + movl 4(%esp),%ebp + xorl (%ebp,%ebx,1),%esi + movb %dl,%bl + xorl 0x200(%ebp,%ecx,1),%esi + movb %dh,%cl + shrl $16,%eax + xorl 0x100(%ebp,%ebx,1),%esi + movb %ah,%bl + shrl $16,%edx + xorl 0x300(%ebp,%ecx,1),%esi + movb %dh,%cl + andl $0xff,%eax + andl $0xff,%edx + movl 0x600(%ebp,%ebx,1),%ebx + xorl %ebx,%esi + movl 0x700(%ebp,%ecx,1),%ebx + xorl %ebx,%esi + movl 0x400(%ebp,%eax,1),%ebx + xorl %ebx,%esi + movl 0x500(%ebp,%edx,1),%ebx + xorl %ebx,%esi + movl 32(%esp),%ebp + + # Round 14 + movl 36(%esp),%eax + movl %esi,%edx + shrl $16,%edx + movl 40(%esp),%ecx + xorl %esi,%edx + andl %edx,%eax + andl %ecx,%edx + movl %eax,%ebx + shll $16,%ebx + movl %edx,%ecx + shll $16,%ecx + xorl %ebx,%eax + xorl %ecx,%edx + movl 112(%ebp),%ebx + xorl %ebx,%eax + movl 116(%ebp),%ecx + xorl %esi,%eax + xorl %esi,%edx + xorl %ecx,%edx + andl $0xfcfcfcfc,%eax + xorl %ebx,%ebx + andl $0xcfcfcfcf,%edx + xorl %ecx,%ecx + movb %al,%bl + movb %ah,%cl + rorl $4,%edx + movl 4(%esp),%ebp + xorl (%ebp,%ebx,1),%edi + movb %dl,%bl + xorl 0x200(%ebp,%ecx,1),%edi + movb %dh,%cl + shrl $16,%eax + xorl 0x100(%ebp,%ebx,1),%edi + movb %ah,%bl + shrl $16,%edx + xorl 0x300(%ebp,%ecx,1),%edi + movb %dh,%cl + andl $0xff,%eax + andl $0xff,%edx + movl 0x600(%ebp,%ebx,1),%ebx + xorl %ebx,%edi + movl 0x700(%ebp,%ecx,1),%ebx + xorl %ebx,%edi + movl 0x400(%ebp,%eax,1),%ebx + xorl %ebx,%edi + movl 0x500(%ebp,%edx,1),%ebx + xorl %ebx,%edi + movl 32(%esp),%ebp + + # Round 15 + movl 36(%esp),%eax + movl %edi,%edx + shrl $16,%edx + movl 40(%esp),%ecx + xorl %edi,%edx + andl %edx,%eax + andl %ecx,%edx + movl %eax,%ebx + shll $16,%ebx + movl %edx,%ecx + shll $16,%ecx + xorl %ebx,%eax + xorl %ecx,%edx + movl 120(%ebp),%ebx + xorl %ebx,%eax + movl 124(%ebp),%ecx + xorl %edi,%eax + xorl %edi,%edx + xorl %ecx,%edx + andl $0xfcfcfcfc,%eax + xorl %ebx,%ebx + andl $0xcfcfcfcf,%edx + xorl %ecx,%ecx + movb %al,%bl + movb %ah,%cl + rorl $4,%edx + movl 4(%esp),%ebp + xorl (%ebp,%ebx,1),%esi + movb %dl,%bl + xorl 0x200(%ebp,%ecx,1),%esi + movb %dh,%cl + shrl $16,%eax + xorl 0x100(%ebp,%ebx,1),%esi + movb %ah,%bl + shrl $16,%edx + xorl 0x300(%ebp,%ecx,1),%esi + movb %dh,%cl + andl $0xff,%eax + andl $0xff,%edx + movl 0x600(%ebp,%ebx,1),%ebx + xorl %ebx,%esi + movl 0x700(%ebp,%ecx,1),%ebx + xorl %ebx,%esi + movl 0x400(%ebp,%eax,1),%ebx + xorl %ebx,%esi + movl 0x500(%ebp,%edx,1),%ebx + xorl %ebx,%esi + movl 32(%esp),%ebp + movl (%esp),%ebx + movl %edi,%eax + decl %ebx + movl %esi,%edi + movl %eax,%esi + movl %ebx,(%esp) + jnz L001start + + # FP + movl 28(%esp),%edx + rorl $1,%edi + movl %esi,%eax + xorl %edi,%esi + andl $0xaaaaaaaa,%esi + xorl %esi,%eax + xorl %esi,%edi + + roll $23,%eax + movl %eax,%esi + xorl %edi,%eax + andl $0x03fc03fc,%eax + xorl %eax,%esi + xorl %eax,%edi + + roll $10,%esi + movl %esi,%eax + xorl %edi,%esi + andl $0x33333333,%esi + xorl %esi,%eax + xorl %esi,%edi + + roll $18,%edi + movl %edi,%esi + xorl %eax,%edi + andl $0xfff0000f,%edi + xorl %edi,%esi + xorl %edi,%eax + + roll $12,%esi + movl %esi,%edi + xorl %eax,%esi + andl $0xf0f0f0f0,%esi + xorl %esi,%edi + xorl %esi,%eax + + rorl $4,%eax + movl %eax,(%edx) + movl %edi,4(%edx) + addl $8,%esp + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.section __IMPORT,__pointers,non_lazy_symbol_pointers +L_DES_SPtrans$non_lazy_ptr: +.indirect_symbol _DES_SPtrans +.long 0 diff --git a/deps/openssl/asm_obsolete/x86-macosx-gas/des/des-586.s b/deps/openssl/asm_obsolete/x86-macosx-gas/des/des-586.s new file mode 100644 index 00000000000000..43354871fcd4fa --- /dev/null +++ b/deps/openssl/asm_obsolete/x86-macosx-gas/des/des-586.s @@ -0,0 +1,1822 @@ +.file "des-586.s" +.text +.globl _DES_SPtrans +.align 4 +__x86_DES_encrypt: + pushl %ecx + # Round 0 + movl (%ecx),%eax + xorl %ebx,%ebx + movl 4(%ecx),%edx + xorl %esi,%eax + xorl %ecx,%ecx + xorl %esi,%edx + andl $0xfcfcfcfc,%eax + andl $0xcfcfcfcf,%edx + movb %al,%bl + movb %ah,%cl + rorl $4,%edx + xorl (%ebp,%ebx,1),%edi + movb %dl,%bl + xorl 0x200(%ebp,%ecx,1),%edi + movb %dh,%cl + shrl $16,%eax + xorl 0x100(%ebp,%ebx,1),%edi + movb %ah,%bl + shrl $16,%edx + xorl 0x300(%ebp,%ecx,1),%edi + movb %dh,%cl + andl $0xff,%eax + andl $0xff,%edx + xorl 0x600(%ebp,%ebx,1),%edi + xorl 0x700(%ebp,%ecx,1),%edi + movl (%esp),%ecx + xorl 0x400(%ebp,%eax,1),%edi + xorl 0x500(%ebp,%edx,1),%edi + # Round 1 + movl 8(%ecx),%eax + xorl %ebx,%ebx + movl 12(%ecx),%edx + xorl %edi,%eax + xorl %ecx,%ecx + xorl %edi,%edx + andl $0xfcfcfcfc,%eax + andl $0xcfcfcfcf,%edx + movb %al,%bl + movb %ah,%cl + rorl $4,%edx + xorl (%ebp,%ebx,1),%esi + movb %dl,%bl + xorl 0x200(%ebp,%ecx,1),%esi + movb %dh,%cl + shrl $16,%eax + xorl 0x100(%ebp,%ebx,1),%esi + movb %ah,%bl + shrl $16,%edx + xorl 0x300(%ebp,%ecx,1),%esi + movb %dh,%cl + andl $0xff,%eax + andl $0xff,%edx + xorl 0x600(%ebp,%ebx,1),%esi + xorl 0x700(%ebp,%ecx,1),%esi + movl (%esp),%ecx + xorl 0x400(%ebp,%eax,1),%esi + xorl 0x500(%ebp,%edx,1),%esi + # Round 2 + movl 16(%ecx),%eax + xorl %ebx,%ebx + movl 20(%ecx),%edx + xorl %esi,%eax + xorl %ecx,%ecx + xorl %esi,%edx + andl $0xfcfcfcfc,%eax + andl $0xcfcfcfcf,%edx + movb %al,%bl + movb %ah,%cl + rorl $4,%edx + xorl (%ebp,%ebx,1),%edi + movb %dl,%bl + xorl 0x200(%ebp,%ecx,1),%edi + movb %dh,%cl + shrl $16,%eax + xorl 0x100(%ebp,%ebx,1),%edi + movb %ah,%bl + shrl $16,%edx + xorl 0x300(%ebp,%ecx,1),%edi + movb %dh,%cl + andl $0xff,%eax + andl $0xff,%edx + xorl 0x600(%ebp,%ebx,1),%edi + xorl 0x700(%ebp,%ecx,1),%edi + movl (%esp),%ecx + xorl 0x400(%ebp,%eax,1),%edi + xorl 0x500(%ebp,%edx,1),%edi + # Round 3 + movl 24(%ecx),%eax + xorl %ebx,%ebx + movl 28(%ecx),%edx + xorl %edi,%eax + xorl %ecx,%ecx + xorl %edi,%edx + andl $0xfcfcfcfc,%eax + andl $0xcfcfcfcf,%edx + movb %al,%bl + movb %ah,%cl + rorl $4,%edx + xorl (%ebp,%ebx,1),%esi + movb %dl,%bl + xorl 0x200(%ebp,%ecx,1),%esi + movb %dh,%cl + shrl $16,%eax + xorl 0x100(%ebp,%ebx,1),%esi + movb %ah,%bl + shrl $16,%edx + xorl 0x300(%ebp,%ecx,1),%esi + movb %dh,%cl + andl $0xff,%eax + andl $0xff,%edx + xorl 0x600(%ebp,%ebx,1),%esi + xorl 0x700(%ebp,%ecx,1),%esi + movl (%esp),%ecx + xorl 0x400(%ebp,%eax,1),%esi + xorl 0x500(%ebp,%edx,1),%esi + # Round 4 + movl 32(%ecx),%eax + xorl %ebx,%ebx + movl 36(%ecx),%edx + xorl %esi,%eax + xorl %ecx,%ecx + xorl %esi,%edx + andl $0xfcfcfcfc,%eax + andl $0xcfcfcfcf,%edx + movb %al,%bl + movb %ah,%cl + rorl $4,%edx + xorl (%ebp,%ebx,1),%edi + movb %dl,%bl + xorl 0x200(%ebp,%ecx,1),%edi + movb %dh,%cl + shrl $16,%eax + xorl 0x100(%ebp,%ebx,1),%edi + movb %ah,%bl + shrl $16,%edx + xorl 0x300(%ebp,%ecx,1),%edi + movb %dh,%cl + andl $0xff,%eax + andl $0xff,%edx + xorl 0x600(%ebp,%ebx,1),%edi + xorl 0x700(%ebp,%ecx,1),%edi + movl (%esp),%ecx + xorl 0x400(%ebp,%eax,1),%edi + xorl 0x500(%ebp,%edx,1),%edi + # Round 5 + movl 40(%ecx),%eax + xorl %ebx,%ebx + movl 44(%ecx),%edx + xorl %edi,%eax + xorl %ecx,%ecx + xorl %edi,%edx + andl $0xfcfcfcfc,%eax + andl $0xcfcfcfcf,%edx + movb %al,%bl + movb %ah,%cl + rorl $4,%edx + xorl (%ebp,%ebx,1),%esi + movb %dl,%bl + xorl 0x200(%ebp,%ecx,1),%esi + movb %dh,%cl + shrl $16,%eax + xorl 0x100(%ebp,%ebx,1),%esi + movb %ah,%bl + shrl $16,%edx + xorl 0x300(%ebp,%ecx,1),%esi + movb %dh,%cl + andl $0xff,%eax + andl $0xff,%edx + xorl 0x600(%ebp,%ebx,1),%esi + xorl 0x700(%ebp,%ecx,1),%esi + movl (%esp),%ecx + xorl 0x400(%ebp,%eax,1),%esi + xorl 0x500(%ebp,%edx,1),%esi + # Round 6 + movl 48(%ecx),%eax + xorl %ebx,%ebx + movl 52(%ecx),%edx + xorl %esi,%eax + xorl %ecx,%ecx + xorl %esi,%edx + andl $0xfcfcfcfc,%eax + andl $0xcfcfcfcf,%edx + movb %al,%bl + movb %ah,%cl + rorl $4,%edx + xorl (%ebp,%ebx,1),%edi + movb %dl,%bl + xorl 0x200(%ebp,%ecx,1),%edi + movb %dh,%cl + shrl $16,%eax + xorl 0x100(%ebp,%ebx,1),%edi + movb %ah,%bl + shrl $16,%edx + xorl 0x300(%ebp,%ecx,1),%edi + movb %dh,%cl + andl $0xff,%eax + andl $0xff,%edx + xorl 0x600(%ebp,%ebx,1),%edi + xorl 0x700(%ebp,%ecx,1),%edi + movl (%esp),%ecx + xorl 0x400(%ebp,%eax,1),%edi + xorl 0x500(%ebp,%edx,1),%edi + # Round 7 + movl 56(%ecx),%eax + xorl %ebx,%ebx + movl 60(%ecx),%edx + xorl %edi,%eax + xorl %ecx,%ecx + xorl %edi,%edx + andl $0xfcfcfcfc,%eax + andl $0xcfcfcfcf,%edx + movb %al,%bl + movb %ah,%cl + rorl $4,%edx + xorl (%ebp,%ebx,1),%esi + movb %dl,%bl + xorl 0x200(%ebp,%ecx,1),%esi + movb %dh,%cl + shrl $16,%eax + xorl 0x100(%ebp,%ebx,1),%esi + movb %ah,%bl + shrl $16,%edx + xorl 0x300(%ebp,%ecx,1),%esi + movb %dh,%cl + andl $0xff,%eax + andl $0xff,%edx + xorl 0x600(%ebp,%ebx,1),%esi + xorl 0x700(%ebp,%ecx,1),%esi + movl (%esp),%ecx + xorl 0x400(%ebp,%eax,1),%esi + xorl 0x500(%ebp,%edx,1),%esi + # Round 8 + movl 64(%ecx),%eax + xorl %ebx,%ebx + movl 68(%ecx),%edx + xorl %esi,%eax + xorl %ecx,%ecx + xorl %esi,%edx + andl $0xfcfcfcfc,%eax + andl $0xcfcfcfcf,%edx + movb %al,%bl + movb %ah,%cl + rorl $4,%edx + xorl (%ebp,%ebx,1),%edi + movb %dl,%bl + xorl 0x200(%ebp,%ecx,1),%edi + movb %dh,%cl + shrl $16,%eax + xorl 0x100(%ebp,%ebx,1),%edi + movb %ah,%bl + shrl $16,%edx + xorl 0x300(%ebp,%ecx,1),%edi + movb %dh,%cl + andl $0xff,%eax + andl $0xff,%edx + xorl 0x600(%ebp,%ebx,1),%edi + xorl 0x700(%ebp,%ecx,1),%edi + movl (%esp),%ecx + xorl 0x400(%ebp,%eax,1),%edi + xorl 0x500(%ebp,%edx,1),%edi + # Round 9 + movl 72(%ecx),%eax + xorl %ebx,%ebx + movl 76(%ecx),%edx + xorl %edi,%eax + xorl %ecx,%ecx + xorl %edi,%edx + andl $0xfcfcfcfc,%eax + andl $0xcfcfcfcf,%edx + movb %al,%bl + movb %ah,%cl + rorl $4,%edx + xorl (%ebp,%ebx,1),%esi + movb %dl,%bl + xorl 0x200(%ebp,%ecx,1),%esi + movb %dh,%cl + shrl $16,%eax + xorl 0x100(%ebp,%ebx,1),%esi + movb %ah,%bl + shrl $16,%edx + xorl 0x300(%ebp,%ecx,1),%esi + movb %dh,%cl + andl $0xff,%eax + andl $0xff,%edx + xorl 0x600(%ebp,%ebx,1),%esi + xorl 0x700(%ebp,%ecx,1),%esi + movl (%esp),%ecx + xorl 0x400(%ebp,%eax,1),%esi + xorl 0x500(%ebp,%edx,1),%esi + # Round 10 + movl 80(%ecx),%eax + xorl %ebx,%ebx + movl 84(%ecx),%edx + xorl %esi,%eax + xorl %ecx,%ecx + xorl %esi,%edx + andl $0xfcfcfcfc,%eax + andl $0xcfcfcfcf,%edx + movb %al,%bl + movb %ah,%cl + rorl $4,%edx + xorl (%ebp,%ebx,1),%edi + movb %dl,%bl + xorl 0x200(%ebp,%ecx,1),%edi + movb %dh,%cl + shrl $16,%eax + xorl 0x100(%ebp,%ebx,1),%edi + movb %ah,%bl + shrl $16,%edx + xorl 0x300(%ebp,%ecx,1),%edi + movb %dh,%cl + andl $0xff,%eax + andl $0xff,%edx + xorl 0x600(%ebp,%ebx,1),%edi + xorl 0x700(%ebp,%ecx,1),%edi + movl (%esp),%ecx + xorl 0x400(%ebp,%eax,1),%edi + xorl 0x500(%ebp,%edx,1),%edi + # Round 11 + movl 88(%ecx),%eax + xorl %ebx,%ebx + movl 92(%ecx),%edx + xorl %edi,%eax + xorl %ecx,%ecx + xorl %edi,%edx + andl $0xfcfcfcfc,%eax + andl $0xcfcfcfcf,%edx + movb %al,%bl + movb %ah,%cl + rorl $4,%edx + xorl (%ebp,%ebx,1),%esi + movb %dl,%bl + xorl 0x200(%ebp,%ecx,1),%esi + movb %dh,%cl + shrl $16,%eax + xorl 0x100(%ebp,%ebx,1),%esi + movb %ah,%bl + shrl $16,%edx + xorl 0x300(%ebp,%ecx,1),%esi + movb %dh,%cl + andl $0xff,%eax + andl $0xff,%edx + xorl 0x600(%ebp,%ebx,1),%esi + xorl 0x700(%ebp,%ecx,1),%esi + movl (%esp),%ecx + xorl 0x400(%ebp,%eax,1),%esi + xorl 0x500(%ebp,%edx,1),%esi + # Round 12 + movl 96(%ecx),%eax + xorl %ebx,%ebx + movl 100(%ecx),%edx + xorl %esi,%eax + xorl %ecx,%ecx + xorl %esi,%edx + andl $0xfcfcfcfc,%eax + andl $0xcfcfcfcf,%edx + movb %al,%bl + movb %ah,%cl + rorl $4,%edx + xorl (%ebp,%ebx,1),%edi + movb %dl,%bl + xorl 0x200(%ebp,%ecx,1),%edi + movb %dh,%cl + shrl $16,%eax + xorl 0x100(%ebp,%ebx,1),%edi + movb %ah,%bl + shrl $16,%edx + xorl 0x300(%ebp,%ecx,1),%edi + movb %dh,%cl + andl $0xff,%eax + andl $0xff,%edx + xorl 0x600(%ebp,%ebx,1),%edi + xorl 0x700(%ebp,%ecx,1),%edi + movl (%esp),%ecx + xorl 0x400(%ebp,%eax,1),%edi + xorl 0x500(%ebp,%edx,1),%edi + # Round 13 + movl 104(%ecx),%eax + xorl %ebx,%ebx + movl 108(%ecx),%edx + xorl %edi,%eax + xorl %ecx,%ecx + xorl %edi,%edx + andl $0xfcfcfcfc,%eax + andl $0xcfcfcfcf,%edx + movb %al,%bl + movb %ah,%cl + rorl $4,%edx + xorl (%ebp,%ebx,1),%esi + movb %dl,%bl + xorl 0x200(%ebp,%ecx,1),%esi + movb %dh,%cl + shrl $16,%eax + xorl 0x100(%ebp,%ebx,1),%esi + movb %ah,%bl + shrl $16,%edx + xorl 0x300(%ebp,%ecx,1),%esi + movb %dh,%cl + andl $0xff,%eax + andl $0xff,%edx + xorl 0x600(%ebp,%ebx,1),%esi + xorl 0x700(%ebp,%ecx,1),%esi + movl (%esp),%ecx + xorl 0x400(%ebp,%eax,1),%esi + xorl 0x500(%ebp,%edx,1),%esi + # Round 14 + movl 112(%ecx),%eax + xorl %ebx,%ebx + movl 116(%ecx),%edx + xorl %esi,%eax + xorl %ecx,%ecx + xorl %esi,%edx + andl $0xfcfcfcfc,%eax + andl $0xcfcfcfcf,%edx + movb %al,%bl + movb %ah,%cl + rorl $4,%edx + xorl (%ebp,%ebx,1),%edi + movb %dl,%bl + xorl 0x200(%ebp,%ecx,1),%edi + movb %dh,%cl + shrl $16,%eax + xorl 0x100(%ebp,%ebx,1),%edi + movb %ah,%bl + shrl $16,%edx + xorl 0x300(%ebp,%ecx,1),%edi + movb %dh,%cl + andl $0xff,%eax + andl $0xff,%edx + xorl 0x600(%ebp,%ebx,1),%edi + xorl 0x700(%ebp,%ecx,1),%edi + movl (%esp),%ecx + xorl 0x400(%ebp,%eax,1),%edi + xorl 0x500(%ebp,%edx,1),%edi + # Round 15 + movl 120(%ecx),%eax + xorl %ebx,%ebx + movl 124(%ecx),%edx + xorl %edi,%eax + xorl %ecx,%ecx + xorl %edi,%edx + andl $0xfcfcfcfc,%eax + andl $0xcfcfcfcf,%edx + movb %al,%bl + movb %ah,%cl + rorl $4,%edx + xorl (%ebp,%ebx,1),%esi + movb %dl,%bl + xorl 0x200(%ebp,%ecx,1),%esi + movb %dh,%cl + shrl $16,%eax + xorl 0x100(%ebp,%ebx,1),%esi + movb %ah,%bl + shrl $16,%edx + xorl 0x300(%ebp,%ecx,1),%esi + movb %dh,%cl + andl $0xff,%eax + andl $0xff,%edx + xorl 0x600(%ebp,%ebx,1),%esi + xorl 0x700(%ebp,%ecx,1),%esi + movl (%esp),%ecx + xorl 0x400(%ebp,%eax,1),%esi + xorl 0x500(%ebp,%edx,1),%esi + addl $4,%esp + ret +.align 4 +__x86_DES_decrypt: + pushl %ecx + # Round 15 + movl 120(%ecx),%eax + xorl %ebx,%ebx + movl 124(%ecx),%edx + xorl %esi,%eax + xorl %ecx,%ecx + xorl %esi,%edx + andl $0xfcfcfcfc,%eax + andl $0xcfcfcfcf,%edx + movb %al,%bl + movb %ah,%cl + rorl $4,%edx + xorl (%ebp,%ebx,1),%edi + movb %dl,%bl + xorl 0x200(%ebp,%ecx,1),%edi + movb %dh,%cl + shrl $16,%eax + xorl 0x100(%ebp,%ebx,1),%edi + movb %ah,%bl + shrl $16,%edx + xorl 0x300(%ebp,%ecx,1),%edi + movb %dh,%cl + andl $0xff,%eax + andl $0xff,%edx + xorl 0x600(%ebp,%ebx,1),%edi + xorl 0x700(%ebp,%ecx,1),%edi + movl (%esp),%ecx + xorl 0x400(%ebp,%eax,1),%edi + xorl 0x500(%ebp,%edx,1),%edi + # Round 14 + movl 112(%ecx),%eax + xorl %ebx,%ebx + movl 116(%ecx),%edx + xorl %edi,%eax + xorl %ecx,%ecx + xorl %edi,%edx + andl $0xfcfcfcfc,%eax + andl $0xcfcfcfcf,%edx + movb %al,%bl + movb %ah,%cl + rorl $4,%edx + xorl (%ebp,%ebx,1),%esi + movb %dl,%bl + xorl 0x200(%ebp,%ecx,1),%esi + movb %dh,%cl + shrl $16,%eax + xorl 0x100(%ebp,%ebx,1),%esi + movb %ah,%bl + shrl $16,%edx + xorl 0x300(%ebp,%ecx,1),%esi + movb %dh,%cl + andl $0xff,%eax + andl $0xff,%edx + xorl 0x600(%ebp,%ebx,1),%esi + xorl 0x700(%ebp,%ecx,1),%esi + movl (%esp),%ecx + xorl 0x400(%ebp,%eax,1),%esi + xorl 0x500(%ebp,%edx,1),%esi + # Round 13 + movl 104(%ecx),%eax + xorl %ebx,%ebx + movl 108(%ecx),%edx + xorl %esi,%eax + xorl %ecx,%ecx + xorl %esi,%edx + andl $0xfcfcfcfc,%eax + andl $0xcfcfcfcf,%edx + movb %al,%bl + movb %ah,%cl + rorl $4,%edx + xorl (%ebp,%ebx,1),%edi + movb %dl,%bl + xorl 0x200(%ebp,%ecx,1),%edi + movb %dh,%cl + shrl $16,%eax + xorl 0x100(%ebp,%ebx,1),%edi + movb %ah,%bl + shrl $16,%edx + xorl 0x300(%ebp,%ecx,1),%edi + movb %dh,%cl + andl $0xff,%eax + andl $0xff,%edx + xorl 0x600(%ebp,%ebx,1),%edi + xorl 0x700(%ebp,%ecx,1),%edi + movl (%esp),%ecx + xorl 0x400(%ebp,%eax,1),%edi + xorl 0x500(%ebp,%edx,1),%edi + # Round 12 + movl 96(%ecx),%eax + xorl %ebx,%ebx + movl 100(%ecx),%edx + xorl %edi,%eax + xorl %ecx,%ecx + xorl %edi,%edx + andl $0xfcfcfcfc,%eax + andl $0xcfcfcfcf,%edx + movb %al,%bl + movb %ah,%cl + rorl $4,%edx + xorl (%ebp,%ebx,1),%esi + movb %dl,%bl + xorl 0x200(%ebp,%ecx,1),%esi + movb %dh,%cl + shrl $16,%eax + xorl 0x100(%ebp,%ebx,1),%esi + movb %ah,%bl + shrl $16,%edx + xorl 0x300(%ebp,%ecx,1),%esi + movb %dh,%cl + andl $0xff,%eax + andl $0xff,%edx + xorl 0x600(%ebp,%ebx,1),%esi + xorl 0x700(%ebp,%ecx,1),%esi + movl (%esp),%ecx + xorl 0x400(%ebp,%eax,1),%esi + xorl 0x500(%ebp,%edx,1),%esi + # Round 11 + movl 88(%ecx),%eax + xorl %ebx,%ebx + movl 92(%ecx),%edx + xorl %esi,%eax + xorl %ecx,%ecx + xorl %esi,%edx + andl $0xfcfcfcfc,%eax + andl $0xcfcfcfcf,%edx + movb %al,%bl + movb %ah,%cl + rorl $4,%edx + xorl (%ebp,%ebx,1),%edi + movb %dl,%bl + xorl 0x200(%ebp,%ecx,1),%edi + movb %dh,%cl + shrl $16,%eax + xorl 0x100(%ebp,%ebx,1),%edi + movb %ah,%bl + shrl $16,%edx + xorl 0x300(%ebp,%ecx,1),%edi + movb %dh,%cl + andl $0xff,%eax + andl $0xff,%edx + xorl 0x600(%ebp,%ebx,1),%edi + xorl 0x700(%ebp,%ecx,1),%edi + movl (%esp),%ecx + xorl 0x400(%ebp,%eax,1),%edi + xorl 0x500(%ebp,%edx,1),%edi + # Round 10 + movl 80(%ecx),%eax + xorl %ebx,%ebx + movl 84(%ecx),%edx + xorl %edi,%eax + xorl %ecx,%ecx + xorl %edi,%edx + andl $0xfcfcfcfc,%eax + andl $0xcfcfcfcf,%edx + movb %al,%bl + movb %ah,%cl + rorl $4,%edx + xorl (%ebp,%ebx,1),%esi + movb %dl,%bl + xorl 0x200(%ebp,%ecx,1),%esi + movb %dh,%cl + shrl $16,%eax + xorl 0x100(%ebp,%ebx,1),%esi + movb %ah,%bl + shrl $16,%edx + xorl 0x300(%ebp,%ecx,1),%esi + movb %dh,%cl + andl $0xff,%eax + andl $0xff,%edx + xorl 0x600(%ebp,%ebx,1),%esi + xorl 0x700(%ebp,%ecx,1),%esi + movl (%esp),%ecx + xorl 0x400(%ebp,%eax,1),%esi + xorl 0x500(%ebp,%edx,1),%esi + # Round 9 + movl 72(%ecx),%eax + xorl %ebx,%ebx + movl 76(%ecx),%edx + xorl %esi,%eax + xorl %ecx,%ecx + xorl %esi,%edx + andl $0xfcfcfcfc,%eax + andl $0xcfcfcfcf,%edx + movb %al,%bl + movb %ah,%cl + rorl $4,%edx + xorl (%ebp,%ebx,1),%edi + movb %dl,%bl + xorl 0x200(%ebp,%ecx,1),%edi + movb %dh,%cl + shrl $16,%eax + xorl 0x100(%ebp,%ebx,1),%edi + movb %ah,%bl + shrl $16,%edx + xorl 0x300(%ebp,%ecx,1),%edi + movb %dh,%cl + andl $0xff,%eax + andl $0xff,%edx + xorl 0x600(%ebp,%ebx,1),%edi + xorl 0x700(%ebp,%ecx,1),%edi + movl (%esp),%ecx + xorl 0x400(%ebp,%eax,1),%edi + xorl 0x500(%ebp,%edx,1),%edi + # Round 8 + movl 64(%ecx),%eax + xorl %ebx,%ebx + movl 68(%ecx),%edx + xorl %edi,%eax + xorl %ecx,%ecx + xorl %edi,%edx + andl $0xfcfcfcfc,%eax + andl $0xcfcfcfcf,%edx + movb %al,%bl + movb %ah,%cl + rorl $4,%edx + xorl (%ebp,%ebx,1),%esi + movb %dl,%bl + xorl 0x200(%ebp,%ecx,1),%esi + movb %dh,%cl + shrl $16,%eax + xorl 0x100(%ebp,%ebx,1),%esi + movb %ah,%bl + shrl $16,%edx + xorl 0x300(%ebp,%ecx,1),%esi + movb %dh,%cl + andl $0xff,%eax + andl $0xff,%edx + xorl 0x600(%ebp,%ebx,1),%esi + xorl 0x700(%ebp,%ecx,1),%esi + movl (%esp),%ecx + xorl 0x400(%ebp,%eax,1),%esi + xorl 0x500(%ebp,%edx,1),%esi + # Round 7 + movl 56(%ecx),%eax + xorl %ebx,%ebx + movl 60(%ecx),%edx + xorl %esi,%eax + xorl %ecx,%ecx + xorl %esi,%edx + andl $0xfcfcfcfc,%eax + andl $0xcfcfcfcf,%edx + movb %al,%bl + movb %ah,%cl + rorl $4,%edx + xorl (%ebp,%ebx,1),%edi + movb %dl,%bl + xorl 0x200(%ebp,%ecx,1),%edi + movb %dh,%cl + shrl $16,%eax + xorl 0x100(%ebp,%ebx,1),%edi + movb %ah,%bl + shrl $16,%edx + xorl 0x300(%ebp,%ecx,1),%edi + movb %dh,%cl + andl $0xff,%eax + andl $0xff,%edx + xorl 0x600(%ebp,%ebx,1),%edi + xorl 0x700(%ebp,%ecx,1),%edi + movl (%esp),%ecx + xorl 0x400(%ebp,%eax,1),%edi + xorl 0x500(%ebp,%edx,1),%edi + # Round 6 + movl 48(%ecx),%eax + xorl %ebx,%ebx + movl 52(%ecx),%edx + xorl %edi,%eax + xorl %ecx,%ecx + xorl %edi,%edx + andl $0xfcfcfcfc,%eax + andl $0xcfcfcfcf,%edx + movb %al,%bl + movb %ah,%cl + rorl $4,%edx + xorl (%ebp,%ebx,1),%esi + movb %dl,%bl + xorl 0x200(%ebp,%ecx,1),%esi + movb %dh,%cl + shrl $16,%eax + xorl 0x100(%ebp,%ebx,1),%esi + movb %ah,%bl + shrl $16,%edx + xorl 0x300(%ebp,%ecx,1),%esi + movb %dh,%cl + andl $0xff,%eax + andl $0xff,%edx + xorl 0x600(%ebp,%ebx,1),%esi + xorl 0x700(%ebp,%ecx,1),%esi + movl (%esp),%ecx + xorl 0x400(%ebp,%eax,1),%esi + xorl 0x500(%ebp,%edx,1),%esi + # Round 5 + movl 40(%ecx),%eax + xorl %ebx,%ebx + movl 44(%ecx),%edx + xorl %esi,%eax + xorl %ecx,%ecx + xorl %esi,%edx + andl $0xfcfcfcfc,%eax + andl $0xcfcfcfcf,%edx + movb %al,%bl + movb %ah,%cl + rorl $4,%edx + xorl (%ebp,%ebx,1),%edi + movb %dl,%bl + xorl 0x200(%ebp,%ecx,1),%edi + movb %dh,%cl + shrl $16,%eax + xorl 0x100(%ebp,%ebx,1),%edi + movb %ah,%bl + shrl $16,%edx + xorl 0x300(%ebp,%ecx,1),%edi + movb %dh,%cl + andl $0xff,%eax + andl $0xff,%edx + xorl 0x600(%ebp,%ebx,1),%edi + xorl 0x700(%ebp,%ecx,1),%edi + movl (%esp),%ecx + xorl 0x400(%ebp,%eax,1),%edi + xorl 0x500(%ebp,%edx,1),%edi + # Round 4 + movl 32(%ecx),%eax + xorl %ebx,%ebx + movl 36(%ecx),%edx + xorl %edi,%eax + xorl %ecx,%ecx + xorl %edi,%edx + andl $0xfcfcfcfc,%eax + andl $0xcfcfcfcf,%edx + movb %al,%bl + movb %ah,%cl + rorl $4,%edx + xorl (%ebp,%ebx,1),%esi + movb %dl,%bl + xorl 0x200(%ebp,%ecx,1),%esi + movb %dh,%cl + shrl $16,%eax + xorl 0x100(%ebp,%ebx,1),%esi + movb %ah,%bl + shrl $16,%edx + xorl 0x300(%ebp,%ecx,1),%esi + movb %dh,%cl + andl $0xff,%eax + andl $0xff,%edx + xorl 0x600(%ebp,%ebx,1),%esi + xorl 0x700(%ebp,%ecx,1),%esi + movl (%esp),%ecx + xorl 0x400(%ebp,%eax,1),%esi + xorl 0x500(%ebp,%edx,1),%esi + # Round 3 + movl 24(%ecx),%eax + xorl %ebx,%ebx + movl 28(%ecx),%edx + xorl %esi,%eax + xorl %ecx,%ecx + xorl %esi,%edx + andl $0xfcfcfcfc,%eax + andl $0xcfcfcfcf,%edx + movb %al,%bl + movb %ah,%cl + rorl $4,%edx + xorl (%ebp,%ebx,1),%edi + movb %dl,%bl + xorl 0x200(%ebp,%ecx,1),%edi + movb %dh,%cl + shrl $16,%eax + xorl 0x100(%ebp,%ebx,1),%edi + movb %ah,%bl + shrl $16,%edx + xorl 0x300(%ebp,%ecx,1),%edi + movb %dh,%cl + andl $0xff,%eax + andl $0xff,%edx + xorl 0x600(%ebp,%ebx,1),%edi + xorl 0x700(%ebp,%ecx,1),%edi + movl (%esp),%ecx + xorl 0x400(%ebp,%eax,1),%edi + xorl 0x500(%ebp,%edx,1),%edi + # Round 2 + movl 16(%ecx),%eax + xorl %ebx,%ebx + movl 20(%ecx),%edx + xorl %edi,%eax + xorl %ecx,%ecx + xorl %edi,%edx + andl $0xfcfcfcfc,%eax + andl $0xcfcfcfcf,%edx + movb %al,%bl + movb %ah,%cl + rorl $4,%edx + xorl (%ebp,%ebx,1),%esi + movb %dl,%bl + xorl 0x200(%ebp,%ecx,1),%esi + movb %dh,%cl + shrl $16,%eax + xorl 0x100(%ebp,%ebx,1),%esi + movb %ah,%bl + shrl $16,%edx + xorl 0x300(%ebp,%ecx,1),%esi + movb %dh,%cl + andl $0xff,%eax + andl $0xff,%edx + xorl 0x600(%ebp,%ebx,1),%esi + xorl 0x700(%ebp,%ecx,1),%esi + movl (%esp),%ecx + xorl 0x400(%ebp,%eax,1),%esi + xorl 0x500(%ebp,%edx,1),%esi + # Round 1 + movl 8(%ecx),%eax + xorl %ebx,%ebx + movl 12(%ecx),%edx + xorl %esi,%eax + xorl %ecx,%ecx + xorl %esi,%edx + andl $0xfcfcfcfc,%eax + andl $0xcfcfcfcf,%edx + movb %al,%bl + movb %ah,%cl + rorl $4,%edx + xorl (%ebp,%ebx,1),%edi + movb %dl,%bl + xorl 0x200(%ebp,%ecx,1),%edi + movb %dh,%cl + shrl $16,%eax + xorl 0x100(%ebp,%ebx,1),%edi + movb %ah,%bl + shrl $16,%edx + xorl 0x300(%ebp,%ecx,1),%edi + movb %dh,%cl + andl $0xff,%eax + andl $0xff,%edx + xorl 0x600(%ebp,%ebx,1),%edi + xorl 0x700(%ebp,%ecx,1),%edi + movl (%esp),%ecx + xorl 0x400(%ebp,%eax,1),%edi + xorl 0x500(%ebp,%edx,1),%edi + # Round 0 + movl (%ecx),%eax + xorl %ebx,%ebx + movl 4(%ecx),%edx + xorl %edi,%eax + xorl %ecx,%ecx + xorl %edi,%edx + andl $0xfcfcfcfc,%eax + andl $0xcfcfcfcf,%edx + movb %al,%bl + movb %ah,%cl + rorl $4,%edx + xorl (%ebp,%ebx,1),%esi + movb %dl,%bl + xorl 0x200(%ebp,%ecx,1),%esi + movb %dh,%cl + shrl $16,%eax + xorl 0x100(%ebp,%ebx,1),%esi + movb %ah,%bl + shrl $16,%edx + xorl 0x300(%ebp,%ecx,1),%esi + movb %dh,%cl + andl $0xff,%eax + andl $0xff,%edx + xorl 0x600(%ebp,%ebx,1),%esi + xorl 0x700(%ebp,%ecx,1),%esi + movl (%esp),%ecx + xorl 0x400(%ebp,%eax,1),%esi + xorl 0x500(%ebp,%edx,1),%esi + addl $4,%esp + ret +.globl _DES_encrypt1 +.align 4 +_DES_encrypt1: +L_DES_encrypt1_begin: + pushl %esi + pushl %edi + + # Load the 2 words + movl 12(%esp),%esi + xorl %ecx,%ecx + pushl %ebx + pushl %ebp + movl (%esi),%eax + movl 28(%esp),%ebx + movl 4(%esi),%edi + + # IP + roll $4,%eax + movl %eax,%esi + xorl %edi,%eax + andl $0xf0f0f0f0,%eax + xorl %eax,%esi + xorl %eax,%edi + + roll $20,%edi + movl %edi,%eax + xorl %esi,%edi + andl $0xfff0000f,%edi + xorl %edi,%eax + xorl %edi,%esi + + roll $14,%eax + movl %eax,%edi + xorl %esi,%eax + andl $0x33333333,%eax + xorl %eax,%edi + xorl %eax,%esi + + roll $22,%esi + movl %esi,%eax + xorl %edi,%esi + andl $0x03fc03fc,%esi + xorl %esi,%eax + xorl %esi,%edi + + roll $9,%eax + movl %eax,%esi + xorl %edi,%eax + andl $0xaaaaaaaa,%eax + xorl %eax,%esi + xorl %eax,%edi + + roll $1,%edi + call L000pic_point +L000pic_point: + popl %ebp + leal Ldes_sptrans-L000pic_point(%ebp),%ebp + movl 24(%esp),%ecx + cmpl $0,%ebx + je L001decrypt + call __x86_DES_encrypt + jmp L002done +L001decrypt: + call __x86_DES_decrypt +L002done: + + # FP + movl 20(%esp),%edx + rorl $1,%esi + movl %edi,%eax + xorl %esi,%edi + andl $0xaaaaaaaa,%edi + xorl %edi,%eax + xorl %edi,%esi + + roll $23,%eax + movl %eax,%edi + xorl %esi,%eax + andl $0x03fc03fc,%eax + xorl %eax,%edi + xorl %eax,%esi + + roll $10,%edi + movl %edi,%eax + xorl %esi,%edi + andl $0x33333333,%edi + xorl %edi,%eax + xorl %edi,%esi + + roll $18,%esi + movl %esi,%edi + xorl %eax,%esi + andl $0xfff0000f,%esi + xorl %esi,%edi + xorl %esi,%eax + + roll $12,%edi + movl %edi,%esi + xorl %eax,%edi + andl $0xf0f0f0f0,%edi + xorl %edi,%esi + xorl %edi,%eax + + rorl $4,%eax + movl %eax,(%edx) + movl %esi,4(%edx) + popl %ebp + popl %ebx + popl %edi + popl %esi + ret +.globl _DES_encrypt2 +.align 4 +_DES_encrypt2: +L_DES_encrypt2_begin: + pushl %esi + pushl %edi + + # Load the 2 words + movl 12(%esp),%eax + xorl %ecx,%ecx + pushl %ebx + pushl %ebp + movl (%eax),%esi + movl 28(%esp),%ebx + roll $3,%esi + movl 4(%eax),%edi + roll $3,%edi + call L003pic_point +L003pic_point: + popl %ebp + leal Ldes_sptrans-L003pic_point(%ebp),%ebp + movl 24(%esp),%ecx + cmpl $0,%ebx + je L004decrypt + call __x86_DES_encrypt + jmp L005done +L004decrypt: + call __x86_DES_decrypt +L005done: + + # Fixup + rorl $3,%edi + movl 20(%esp),%eax + rorl $3,%esi + movl %edi,(%eax) + movl %esi,4(%eax) + popl %ebp + popl %ebx + popl %edi + popl %esi + ret +.globl _DES_encrypt3 +.align 4 +_DES_encrypt3: +L_DES_encrypt3_begin: + pushl %ebx + movl 8(%esp),%ebx + pushl %ebp + pushl %esi + pushl %edi + + # Load the data words + movl (%ebx),%edi + movl 4(%ebx),%esi + subl $12,%esp + + # IP + roll $4,%edi + movl %edi,%edx + xorl %esi,%edi + andl $0xf0f0f0f0,%edi + xorl %edi,%edx + xorl %edi,%esi + + roll $20,%esi + movl %esi,%edi + xorl %edx,%esi + andl $0xfff0000f,%esi + xorl %esi,%edi + xorl %esi,%edx + + roll $14,%edi + movl %edi,%esi + xorl %edx,%edi + andl $0x33333333,%edi + xorl %edi,%esi + xorl %edi,%edx + + roll $22,%edx + movl %edx,%edi + xorl %esi,%edx + andl $0x03fc03fc,%edx + xorl %edx,%edi + xorl %edx,%esi + + roll $9,%edi + movl %edi,%edx + xorl %esi,%edi + andl $0xaaaaaaaa,%edi + xorl %edi,%edx + xorl %edi,%esi + + rorl $3,%edx + rorl $2,%esi + movl %esi,4(%ebx) + movl 36(%esp),%eax + movl %edx,(%ebx) + movl 40(%esp),%edi + movl 44(%esp),%esi + movl $1,8(%esp) + movl %eax,4(%esp) + movl %ebx,(%esp) + call L_DES_encrypt2_begin + movl $0,8(%esp) + movl %edi,4(%esp) + movl %ebx,(%esp) + call L_DES_encrypt2_begin + movl $1,8(%esp) + movl %esi,4(%esp) + movl %ebx,(%esp) + call L_DES_encrypt2_begin + addl $12,%esp + movl (%ebx),%edi + movl 4(%ebx),%esi + + # FP + roll $2,%esi + roll $3,%edi + movl %edi,%eax + xorl %esi,%edi + andl $0xaaaaaaaa,%edi + xorl %edi,%eax + xorl %edi,%esi + + roll $23,%eax + movl %eax,%edi + xorl %esi,%eax + andl $0x03fc03fc,%eax + xorl %eax,%edi + xorl %eax,%esi + + roll $10,%edi + movl %edi,%eax + xorl %esi,%edi + andl $0x33333333,%edi + xorl %edi,%eax + xorl %edi,%esi + + roll $18,%esi + movl %esi,%edi + xorl %eax,%esi + andl $0xfff0000f,%esi + xorl %esi,%edi + xorl %esi,%eax + + roll $12,%edi + movl %edi,%esi + xorl %eax,%edi + andl $0xf0f0f0f0,%edi + xorl %edi,%esi + xorl %edi,%eax + + rorl $4,%eax + movl %eax,(%ebx) + movl %esi,4(%ebx) + popl %edi + popl %esi + popl %ebp + popl %ebx + ret +.globl _DES_decrypt3 +.align 4 +_DES_decrypt3: +L_DES_decrypt3_begin: + pushl %ebx + movl 8(%esp),%ebx + pushl %ebp + pushl %esi + pushl %edi + + # Load the data words + movl (%ebx),%edi + movl 4(%ebx),%esi + subl $12,%esp + + # IP + roll $4,%edi + movl %edi,%edx + xorl %esi,%edi + andl $0xf0f0f0f0,%edi + xorl %edi,%edx + xorl %edi,%esi + + roll $20,%esi + movl %esi,%edi + xorl %edx,%esi + andl $0xfff0000f,%esi + xorl %esi,%edi + xorl %esi,%edx + + roll $14,%edi + movl %edi,%esi + xorl %edx,%edi + andl $0x33333333,%edi + xorl %edi,%esi + xorl %edi,%edx + + roll $22,%edx + movl %edx,%edi + xorl %esi,%edx + andl $0x03fc03fc,%edx + xorl %edx,%edi + xorl %edx,%esi + + roll $9,%edi + movl %edi,%edx + xorl %esi,%edi + andl $0xaaaaaaaa,%edi + xorl %edi,%edx + xorl %edi,%esi + + rorl $3,%edx + rorl $2,%esi + movl %esi,4(%ebx) + movl 36(%esp),%esi + movl %edx,(%ebx) + movl 40(%esp),%edi + movl 44(%esp),%eax + movl $0,8(%esp) + movl %eax,4(%esp) + movl %ebx,(%esp) + call L_DES_encrypt2_begin + movl $1,8(%esp) + movl %edi,4(%esp) + movl %ebx,(%esp) + call L_DES_encrypt2_begin + movl $0,8(%esp) + movl %esi,4(%esp) + movl %ebx,(%esp) + call L_DES_encrypt2_begin + addl $12,%esp + movl (%ebx),%edi + movl 4(%ebx),%esi + + # FP + roll $2,%esi + roll $3,%edi + movl %edi,%eax + xorl %esi,%edi + andl $0xaaaaaaaa,%edi + xorl %edi,%eax + xorl %edi,%esi + + roll $23,%eax + movl %eax,%edi + xorl %esi,%eax + andl $0x03fc03fc,%eax + xorl %eax,%edi + xorl %eax,%esi + + roll $10,%edi + movl %edi,%eax + xorl %esi,%edi + andl $0x33333333,%edi + xorl %edi,%eax + xorl %edi,%esi + + roll $18,%esi + movl %esi,%edi + xorl %eax,%esi + andl $0xfff0000f,%esi + xorl %esi,%edi + xorl %esi,%eax + + roll $12,%edi + movl %edi,%esi + xorl %eax,%edi + andl $0xf0f0f0f0,%edi + xorl %edi,%esi + xorl %edi,%eax + + rorl $4,%eax + movl %eax,(%ebx) + movl %esi,4(%ebx) + popl %edi + popl %esi + popl %ebp + popl %ebx + ret +.globl _DES_ncbc_encrypt +.align 4 +_DES_ncbc_encrypt: +L_DES_ncbc_encrypt_begin: + + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 28(%esp),%ebp + # getting iv ptr from parameter 4 + movl 36(%esp),%ebx + movl (%ebx),%esi + movl 4(%ebx),%edi + pushl %edi + pushl %esi + pushl %edi + pushl %esi + movl %esp,%ebx + movl 36(%esp),%esi + movl 40(%esp),%edi + # getting encrypt flag from parameter 5 + movl 56(%esp),%ecx + # get and push parameter 5 + pushl %ecx + # get and push parameter 3 + movl 52(%esp),%eax + pushl %eax + pushl %ebx + cmpl $0,%ecx + jz L006decrypt + andl $4294967288,%ebp + movl 12(%esp),%eax + movl 16(%esp),%ebx + jz L007encrypt_finish +L008encrypt_loop: + movl (%esi),%ecx + movl 4(%esi),%edx + xorl %ecx,%eax + xorl %edx,%ebx + movl %eax,12(%esp) + movl %ebx,16(%esp) + call L_DES_encrypt1_begin + movl 12(%esp),%eax + movl 16(%esp),%ebx + movl %eax,(%edi) + movl %ebx,4(%edi) + addl $8,%esi + addl $8,%edi + subl $8,%ebp + jnz L008encrypt_loop +L007encrypt_finish: + movl 56(%esp),%ebp + andl $7,%ebp + jz L009finish + call L010PIC_point +L010PIC_point: + popl %edx + leal L011cbc_enc_jmp_table-L010PIC_point(%edx),%ecx + movl (%ecx,%ebp,4),%ebp + addl %edx,%ebp + xorl %ecx,%ecx + xorl %edx,%edx + jmp *%ebp +L012ej7: + movb 6(%esi),%dh + shll $8,%edx +L013ej6: + movb 5(%esi),%dh +L014ej5: + movb 4(%esi),%dl +L015ej4: + movl (%esi),%ecx + jmp L016ejend +L017ej3: + movb 2(%esi),%ch + shll $8,%ecx +L018ej2: + movb 1(%esi),%ch +L019ej1: + movb (%esi),%cl +L016ejend: + xorl %ecx,%eax + xorl %edx,%ebx + movl %eax,12(%esp) + movl %ebx,16(%esp) + call L_DES_encrypt1_begin + movl 12(%esp),%eax + movl 16(%esp),%ebx + movl %eax,(%edi) + movl %ebx,4(%edi) + jmp L009finish +L006decrypt: + andl $4294967288,%ebp + movl 20(%esp),%eax + movl 24(%esp),%ebx + jz L020decrypt_finish +L021decrypt_loop: + movl (%esi),%eax + movl 4(%esi),%ebx + movl %eax,12(%esp) + movl %ebx,16(%esp) + call L_DES_encrypt1_begin + movl 12(%esp),%eax + movl 16(%esp),%ebx + movl 20(%esp),%ecx + movl 24(%esp),%edx + xorl %eax,%ecx + xorl %ebx,%edx + movl (%esi),%eax + movl 4(%esi),%ebx + movl %ecx,(%edi) + movl %edx,4(%edi) + movl %eax,20(%esp) + movl %ebx,24(%esp) + addl $8,%esi + addl $8,%edi + subl $8,%ebp + jnz L021decrypt_loop +L020decrypt_finish: + movl 56(%esp),%ebp + andl $7,%ebp + jz L009finish + movl (%esi),%eax + movl 4(%esi),%ebx + movl %eax,12(%esp) + movl %ebx,16(%esp) + call L_DES_encrypt1_begin + movl 12(%esp),%eax + movl 16(%esp),%ebx + movl 20(%esp),%ecx + movl 24(%esp),%edx + xorl %eax,%ecx + xorl %ebx,%edx + movl (%esi),%eax + movl 4(%esi),%ebx +L022dj7: + rorl $16,%edx + movb %dl,6(%edi) + shrl $16,%edx +L023dj6: + movb %dh,5(%edi) +L024dj5: + movb %dl,4(%edi) +L025dj4: + movl %ecx,(%edi) + jmp L026djend +L027dj3: + rorl $16,%ecx + movb %cl,2(%edi) + shll $16,%ecx +L028dj2: + movb %ch,1(%esi) +L029dj1: + movb %cl,(%esi) +L026djend: + jmp L009finish +L009finish: + movl 64(%esp),%ecx + addl $28,%esp + movl %eax,(%ecx) + movl %ebx,4(%ecx) + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.align 6,0x90 +L011cbc_enc_jmp_table: +.long 0 +.long L019ej1-L010PIC_point +.long L018ej2-L010PIC_point +.long L017ej3-L010PIC_point +.long L015ej4-L010PIC_point +.long L014ej5-L010PIC_point +.long L013ej6-L010PIC_point +.long L012ej7-L010PIC_point +.align 6,0x90 +.globl _DES_ede3_cbc_encrypt +.align 4 +_DES_ede3_cbc_encrypt: +L_DES_ede3_cbc_encrypt_begin: + + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 28(%esp),%ebp + # getting iv ptr from parameter 6 + movl 44(%esp),%ebx + movl (%ebx),%esi + movl 4(%ebx),%edi + pushl %edi + pushl %esi + pushl %edi + pushl %esi + movl %esp,%ebx + movl 36(%esp),%esi + movl 40(%esp),%edi + # getting encrypt flag from parameter 7 + movl 64(%esp),%ecx + # get and push parameter 5 + movl 56(%esp),%eax + pushl %eax + # get and push parameter 4 + movl 56(%esp),%eax + pushl %eax + # get and push parameter 3 + movl 56(%esp),%eax + pushl %eax + pushl %ebx + cmpl $0,%ecx + jz L030decrypt + andl $4294967288,%ebp + movl 16(%esp),%eax + movl 20(%esp),%ebx + jz L031encrypt_finish +L032encrypt_loop: + movl (%esi),%ecx + movl 4(%esi),%edx + xorl %ecx,%eax + xorl %edx,%ebx + movl %eax,16(%esp) + movl %ebx,20(%esp) + call L_DES_encrypt3_begin + movl 16(%esp),%eax + movl 20(%esp),%ebx + movl %eax,(%edi) + movl %ebx,4(%edi) + addl $8,%esi + addl $8,%edi + subl $8,%ebp + jnz L032encrypt_loop +L031encrypt_finish: + movl 60(%esp),%ebp + andl $7,%ebp + jz L033finish + call L034PIC_point +L034PIC_point: + popl %edx + leal L035cbc_enc_jmp_table-L034PIC_point(%edx),%ecx + movl (%ecx,%ebp,4),%ebp + addl %edx,%ebp + xorl %ecx,%ecx + xorl %edx,%edx + jmp *%ebp +L036ej7: + movb 6(%esi),%dh + shll $8,%edx +L037ej6: + movb 5(%esi),%dh +L038ej5: + movb 4(%esi),%dl +L039ej4: + movl (%esi),%ecx + jmp L040ejend +L041ej3: + movb 2(%esi),%ch + shll $8,%ecx +L042ej2: + movb 1(%esi),%ch +L043ej1: + movb (%esi),%cl +L040ejend: + xorl %ecx,%eax + xorl %edx,%ebx + movl %eax,16(%esp) + movl %ebx,20(%esp) + call L_DES_encrypt3_begin + movl 16(%esp),%eax + movl 20(%esp),%ebx + movl %eax,(%edi) + movl %ebx,4(%edi) + jmp L033finish +L030decrypt: + andl $4294967288,%ebp + movl 24(%esp),%eax + movl 28(%esp),%ebx + jz L044decrypt_finish +L045decrypt_loop: + movl (%esi),%eax + movl 4(%esi),%ebx + movl %eax,16(%esp) + movl %ebx,20(%esp) + call L_DES_decrypt3_begin + movl 16(%esp),%eax + movl 20(%esp),%ebx + movl 24(%esp),%ecx + movl 28(%esp),%edx + xorl %eax,%ecx + xorl %ebx,%edx + movl (%esi),%eax + movl 4(%esi),%ebx + movl %ecx,(%edi) + movl %edx,4(%edi) + movl %eax,24(%esp) + movl %ebx,28(%esp) + addl $8,%esi + addl $8,%edi + subl $8,%ebp + jnz L045decrypt_loop +L044decrypt_finish: + movl 60(%esp),%ebp + andl $7,%ebp + jz L033finish + movl (%esi),%eax + movl 4(%esi),%ebx + movl %eax,16(%esp) + movl %ebx,20(%esp) + call L_DES_decrypt3_begin + movl 16(%esp),%eax + movl 20(%esp),%ebx + movl 24(%esp),%ecx + movl 28(%esp),%edx + xorl %eax,%ecx + xorl %ebx,%edx + movl (%esi),%eax + movl 4(%esi),%ebx +L046dj7: + rorl $16,%edx + movb %dl,6(%edi) + shrl $16,%edx +L047dj6: + movb %dh,5(%edi) +L048dj5: + movb %dl,4(%edi) +L049dj4: + movl %ecx,(%edi) + jmp L050djend +L051dj3: + rorl $16,%ecx + movb %cl,2(%edi) + shll $16,%ecx +L052dj2: + movb %ch,1(%esi) +L053dj1: + movb %cl,(%esi) +L050djend: + jmp L033finish +L033finish: + movl 76(%esp),%ecx + addl $32,%esp + movl %eax,(%ecx) + movl %ebx,4(%ecx) + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.align 6,0x90 +L035cbc_enc_jmp_table: +.long 0 +.long L043ej1-L034PIC_point +.long L042ej2-L034PIC_point +.long L041ej3-L034PIC_point +.long L039ej4-L034PIC_point +.long L038ej5-L034PIC_point +.long L037ej6-L034PIC_point +.long L036ej7-L034PIC_point +.align 6,0x90 +.align 6,0x90 +_DES_SPtrans: +Ldes_sptrans: +.long 34080768,524288,33554434,34080770 +.long 33554432,526338,524290,33554434 +.long 526338,34080768,34078720,2050 +.long 33556482,33554432,0,524290 +.long 524288,2,33556480,526336 +.long 34080770,34078720,2050,33556480 +.long 2,2048,526336,34078722 +.long 2048,33556482,34078722,0 +.long 0,34080770,33556480,524290 +.long 34080768,524288,2050,33556480 +.long 34078722,2048,526336,33554434 +.long 526338,2,33554434,34078720 +.long 34080770,526336,34078720,33556482 +.long 33554432,2050,524290,0 +.long 524288,33554432,33556482,34080768 +.long 2,34078722,2048,526338 +.long 1074823184,0,1081344,1074790400 +.long 1073741840,32784,1073774592,1081344 +.long 32768,1074790416,16,1073774592 +.long 1048592,1074823168,1074790400,16 +.long 1048576,1073774608,1074790416,32768 +.long 1081360,1073741824,0,1048592 +.long 1073774608,1081360,1074823168,1073741840 +.long 1073741824,1048576,32784,1074823184 +.long 1048592,1074823168,1073774592,1081360 +.long 1074823184,1048592,1073741840,0 +.long 1073741824,32784,1048576,1074790416 +.long 32768,1073741824,1081360,1073774608 +.long 1074823168,32768,0,1073741840 +.long 16,1074823184,1081344,1074790400 +.long 1074790416,1048576,32784,1073774592 +.long 1073774608,16,1074790400,1081344 +.long 67108865,67371264,256,67109121 +.long 262145,67108864,67109121,262400 +.long 67109120,262144,67371008,1 +.long 67371265,257,1,67371009 +.long 0,262145,67371264,256 +.long 257,67371265,262144,67108865 +.long 67371009,67109120,262401,67371008 +.long 262400,0,67108864,262401 +.long 67371264,256,1,262144 +.long 257,262145,67371008,67109121 +.long 0,67371264,262400,67371009 +.long 262145,67108864,67371265,1 +.long 262401,67108865,67108864,67371265 +.long 262144,67109120,67109121,262400 +.long 67109120,0,67371009,257 +.long 67108865,262401,256,67371008 +.long 4198408,268439552,8,272633864 +.long 0,272629760,268439560,4194312 +.long 272633856,268435464,268435456,4104 +.long 268435464,4198408,4194304,268435456 +.long 272629768,4198400,4096,8 +.long 4198400,268439560,272629760,4096 +.long 4104,0,4194312,272633856 +.long 268439552,272629768,272633864,4194304 +.long 272629768,4104,4194304,268435464 +.long 4198400,268439552,8,272629760 +.long 268439560,0,4096,4194312 +.long 0,272629768,272633856,4096 +.long 268435456,272633864,4198408,4194304 +.long 272633864,8,268439552,4198408 +.long 4194312,4198400,272629760,268439560 +.long 4104,268435456,268435464,272633856 +.long 134217728,65536,1024,134284320 +.long 134283296,134218752,66592,134283264 +.long 65536,32,134217760,66560 +.long 134218784,134283296,134284288,0 +.long 66560,134217728,65568,1056 +.long 134218752,66592,0,134217760 +.long 32,134218784,134284320,65568 +.long 134283264,1024,1056,134284288 +.long 134284288,134218784,65568,134283264 +.long 65536,32,134217760,134218752 +.long 134217728,66560,134284320,0 +.long 66592,134217728,1024,65568 +.long 134218784,1024,0,134284320 +.long 134283296,134284288,1056,65536 +.long 66560,134283296,134218752,1056 +.long 32,66592,134283264,134217760 +.long 2147483712,2097216,0,2149588992 +.long 2097216,8192,2147491904,2097152 +.long 8256,2149589056,2105344,2147483648 +.long 2147491840,2147483712,2149580800,2105408 +.long 2097152,2147491904,2149580864,0 +.long 8192,64,2149588992,2149580864 +.long 2149589056,2149580800,2147483648,8256 +.long 64,2105344,2105408,2147491840 +.long 8256,2147483648,2147491840,2105408 +.long 2149588992,2097216,0,2147491840 +.long 2147483648,8192,2149580864,2097152 +.long 2097216,2149589056,2105344,64 +.long 2149589056,2105344,2097152,2147491904 +.long 2147483712,2149580800,2105408,0 +.long 8192,2147483712,2147491904,2149588992 +.long 2149580800,8256,64,2149580864 +.long 16384,512,16777728,16777220 +.long 16794116,16388,16896,0 +.long 16777216,16777732,516,16793600 +.long 4,16794112,16793600,516 +.long 16777732,16384,16388,16794116 +.long 0,16777728,16777220,16896 +.long 16793604,16900,16794112,4 +.long 16900,16793604,512,16777216 +.long 16900,16793600,16793604,516 +.long 16384,512,16777216,16793604 +.long 16777732,16900,16896,0 +.long 512,16777220,4,16777728 +.long 0,16777732,16777728,16896 +.long 516,16384,16794116,16777216 +.long 16794112,4,16388,16794116 +.long 16777220,16794112,16793600,16388 +.long 545259648,545390592,131200,0 +.long 537001984,8388736,545259520,545390720 +.long 128,536870912,8519680,131200 +.long 8519808,537002112,536871040,545259520 +.long 131072,8519808,8388736,537001984 +.long 545390720,536871040,0,8519680 +.long 536870912,8388608,537002112,545259648 +.long 8388608,131072,545390592,128 +.long 8388608,131072,536871040,545390720 +.long 131200,536870912,0,8519680 +.long 545259648,537002112,537001984,8388736 +.long 545390592,128,8388736,537001984 +.long 545390720,8388608,545259520,536871040 +.long 8519680,131200,537002112,545259520 +.long 128,545390592,8519808,0 +.long 536870912,545259648,131072,8519808 diff --git a/deps/openssl/asm_obsolete/x86-macosx-gas/md5/md5-586.s b/deps/openssl/asm_obsolete/x86-macosx-gas/md5/md5-586.s new file mode 100644 index 00000000000000..cd5dd459ca0337 --- /dev/null +++ b/deps/openssl/asm_obsolete/x86-macosx-gas/md5/md5-586.s @@ -0,0 +1,677 @@ +.file "../openssl/crypto/md5/asm/md5-586.s" +.text +.globl _md5_block_asm_data_order +.align 4 +_md5_block_asm_data_order: +L_md5_block_asm_data_order_begin: + pushl %esi + pushl %edi + movl 12(%esp),%edi + movl 16(%esp),%esi + movl 20(%esp),%ecx + pushl %ebp + shll $6,%ecx + pushl %ebx + addl %esi,%ecx + subl $64,%ecx + movl (%edi),%eax + pushl %ecx + movl 4(%edi),%ebx + movl 8(%edi),%ecx + movl 12(%edi),%edx +L000start: + + # R0 section + movl %ecx,%edi + movl (%esi),%ebp + # R0 0 + xorl %edx,%edi + andl %ebx,%edi + leal 3614090360(%eax,%ebp,1),%eax + xorl %edx,%edi + addl %edi,%eax + movl %ebx,%edi + roll $7,%eax + movl 4(%esi),%ebp + addl %ebx,%eax + # R0 1 + xorl %ecx,%edi + andl %eax,%edi + leal 3905402710(%edx,%ebp,1),%edx + xorl %ecx,%edi + addl %edi,%edx + movl %eax,%edi + roll $12,%edx + movl 8(%esi),%ebp + addl %eax,%edx + # R0 2 + xorl %ebx,%edi + andl %edx,%edi + leal 606105819(%ecx,%ebp,1),%ecx + xorl %ebx,%edi + addl %edi,%ecx + movl %edx,%edi + roll $17,%ecx + movl 12(%esi),%ebp + addl %edx,%ecx + # R0 3 + xorl %eax,%edi + andl %ecx,%edi + leal 3250441966(%ebx,%ebp,1),%ebx + xorl %eax,%edi + addl %edi,%ebx + movl %ecx,%edi + roll $22,%ebx + movl 16(%esi),%ebp + addl %ecx,%ebx + # R0 4 + xorl %edx,%edi + andl %ebx,%edi + leal 4118548399(%eax,%ebp,1),%eax + xorl %edx,%edi + addl %edi,%eax + movl %ebx,%edi + roll $7,%eax + movl 20(%esi),%ebp + addl %ebx,%eax + # R0 5 + xorl %ecx,%edi + andl %eax,%edi + leal 1200080426(%edx,%ebp,1),%edx + xorl %ecx,%edi + addl %edi,%edx + movl %eax,%edi + roll $12,%edx + movl 24(%esi),%ebp + addl %eax,%edx + # R0 6 + xorl %ebx,%edi + andl %edx,%edi + leal 2821735955(%ecx,%ebp,1),%ecx + xorl %ebx,%edi + addl %edi,%ecx + movl %edx,%edi + roll $17,%ecx + movl 28(%esi),%ebp + addl %edx,%ecx + # R0 7 + xorl %eax,%edi + andl %ecx,%edi + leal 4249261313(%ebx,%ebp,1),%ebx + xorl %eax,%edi + addl %edi,%ebx + movl %ecx,%edi + roll $22,%ebx + movl 32(%esi),%ebp + addl %ecx,%ebx + # R0 8 + xorl %edx,%edi + andl %ebx,%edi + leal 1770035416(%eax,%ebp,1),%eax + xorl %edx,%edi + addl %edi,%eax + movl %ebx,%edi + roll $7,%eax + movl 36(%esi),%ebp + addl %ebx,%eax + # R0 9 + xorl %ecx,%edi + andl %eax,%edi + leal 2336552879(%edx,%ebp,1),%edx + xorl %ecx,%edi + addl %edi,%edx + movl %eax,%edi + roll $12,%edx + movl 40(%esi),%ebp + addl %eax,%edx + # R0 10 + xorl %ebx,%edi + andl %edx,%edi + leal 4294925233(%ecx,%ebp,1),%ecx + xorl %ebx,%edi + addl %edi,%ecx + movl %edx,%edi + roll $17,%ecx + movl 44(%esi),%ebp + addl %edx,%ecx + # R0 11 + xorl %eax,%edi + andl %ecx,%edi + leal 2304563134(%ebx,%ebp,1),%ebx + xorl %eax,%edi + addl %edi,%ebx + movl %ecx,%edi + roll $22,%ebx + movl 48(%esi),%ebp + addl %ecx,%ebx + # R0 12 + xorl %edx,%edi + andl %ebx,%edi + leal 1804603682(%eax,%ebp,1),%eax + xorl %edx,%edi + addl %edi,%eax + movl %ebx,%edi + roll $7,%eax + movl 52(%esi),%ebp + addl %ebx,%eax + # R0 13 + xorl %ecx,%edi + andl %eax,%edi + leal 4254626195(%edx,%ebp,1),%edx + xorl %ecx,%edi + addl %edi,%edx + movl %eax,%edi + roll $12,%edx + movl 56(%esi),%ebp + addl %eax,%edx + # R0 14 + xorl %ebx,%edi + andl %edx,%edi + leal 2792965006(%ecx,%ebp,1),%ecx + xorl %ebx,%edi + addl %edi,%ecx + movl %edx,%edi + roll $17,%ecx + movl 60(%esi),%ebp + addl %edx,%ecx + # R0 15 + xorl %eax,%edi + andl %ecx,%edi + leal 1236535329(%ebx,%ebp,1),%ebx + xorl %eax,%edi + addl %edi,%ebx + movl %ecx,%edi + roll $22,%ebx + movl 4(%esi),%ebp + addl %ecx,%ebx + + # R1 section + # R1 16 + leal 4129170786(%eax,%ebp,1),%eax + xorl %ebx,%edi + andl %edx,%edi + movl 24(%esi),%ebp + xorl %ecx,%edi + addl %edi,%eax + movl %ebx,%edi + roll $5,%eax + addl %ebx,%eax + # R1 17 + leal 3225465664(%edx,%ebp,1),%edx + xorl %eax,%edi + andl %ecx,%edi + movl 44(%esi),%ebp + xorl %ebx,%edi + addl %edi,%edx + movl %eax,%edi + roll $9,%edx + addl %eax,%edx + # R1 18 + leal 643717713(%ecx,%ebp,1),%ecx + xorl %edx,%edi + andl %ebx,%edi + movl (%esi),%ebp + xorl %eax,%edi + addl %edi,%ecx + movl %edx,%edi + roll $14,%ecx + addl %edx,%ecx + # R1 19 + leal 3921069994(%ebx,%ebp,1),%ebx + xorl %ecx,%edi + andl %eax,%edi + movl 20(%esi),%ebp + xorl %edx,%edi + addl %edi,%ebx + movl %ecx,%edi + roll $20,%ebx + addl %ecx,%ebx + # R1 20 + leal 3593408605(%eax,%ebp,1),%eax + xorl %ebx,%edi + andl %edx,%edi + movl 40(%esi),%ebp + xorl %ecx,%edi + addl %edi,%eax + movl %ebx,%edi + roll $5,%eax + addl %ebx,%eax + # R1 21 + leal 38016083(%edx,%ebp,1),%edx + xorl %eax,%edi + andl %ecx,%edi + movl 60(%esi),%ebp + xorl %ebx,%edi + addl %edi,%edx + movl %eax,%edi + roll $9,%edx + addl %eax,%edx + # R1 22 + leal 3634488961(%ecx,%ebp,1),%ecx + xorl %edx,%edi + andl %ebx,%edi + movl 16(%esi),%ebp + xorl %eax,%edi + addl %edi,%ecx + movl %edx,%edi + roll $14,%ecx + addl %edx,%ecx + # R1 23 + leal 3889429448(%ebx,%ebp,1),%ebx + xorl %ecx,%edi + andl %eax,%edi + movl 36(%esi),%ebp + xorl %edx,%edi + addl %edi,%ebx + movl %ecx,%edi + roll $20,%ebx + addl %ecx,%ebx + # R1 24 + leal 568446438(%eax,%ebp,1),%eax + xorl %ebx,%edi + andl %edx,%edi + movl 56(%esi),%ebp + xorl %ecx,%edi + addl %edi,%eax + movl %ebx,%edi + roll $5,%eax + addl %ebx,%eax + # R1 25 + leal 3275163606(%edx,%ebp,1),%edx + xorl %eax,%edi + andl %ecx,%edi + movl 12(%esi),%ebp + xorl %ebx,%edi + addl %edi,%edx + movl %eax,%edi + roll $9,%edx + addl %eax,%edx + # R1 26 + leal 4107603335(%ecx,%ebp,1),%ecx + xorl %edx,%edi + andl %ebx,%edi + movl 32(%esi),%ebp + xorl %eax,%edi + addl %edi,%ecx + movl %edx,%edi + roll $14,%ecx + addl %edx,%ecx + # R1 27 + leal 1163531501(%ebx,%ebp,1),%ebx + xorl %ecx,%edi + andl %eax,%edi + movl 52(%esi),%ebp + xorl %edx,%edi + addl %edi,%ebx + movl %ecx,%edi + roll $20,%ebx + addl %ecx,%ebx + # R1 28 + leal 2850285829(%eax,%ebp,1),%eax + xorl %ebx,%edi + andl %edx,%edi + movl 8(%esi),%ebp + xorl %ecx,%edi + addl %edi,%eax + movl %ebx,%edi + roll $5,%eax + addl %ebx,%eax + # R1 29 + leal 4243563512(%edx,%ebp,1),%edx + xorl %eax,%edi + andl %ecx,%edi + movl 28(%esi),%ebp + xorl %ebx,%edi + addl %edi,%edx + movl %eax,%edi + roll $9,%edx + addl %eax,%edx + # R1 30 + leal 1735328473(%ecx,%ebp,1),%ecx + xorl %edx,%edi + andl %ebx,%edi + movl 48(%esi),%ebp + xorl %eax,%edi + addl %edi,%ecx + movl %edx,%edi + roll $14,%ecx + addl %edx,%ecx + # R1 31 + leal 2368359562(%ebx,%ebp,1),%ebx + xorl %ecx,%edi + andl %eax,%edi + movl 20(%esi),%ebp + xorl %edx,%edi + addl %edi,%ebx + movl %ecx,%edi + roll $20,%ebx + addl %ecx,%ebx + + # R2 section + # R2 32 + xorl %edx,%edi + xorl %ebx,%edi + leal 4294588738(%eax,%ebp,1),%eax + addl %edi,%eax + roll $4,%eax + movl 32(%esi),%ebp + movl %ebx,%edi + # R2 33 + leal 2272392833(%edx,%ebp,1),%edx + addl %ebx,%eax + xorl %ecx,%edi + xorl %eax,%edi + movl 44(%esi),%ebp + addl %edi,%edx + movl %eax,%edi + roll $11,%edx + addl %eax,%edx + # R2 34 + xorl %ebx,%edi + xorl %edx,%edi + leal 1839030562(%ecx,%ebp,1),%ecx + addl %edi,%ecx + roll $16,%ecx + movl 56(%esi),%ebp + movl %edx,%edi + # R2 35 + leal 4259657740(%ebx,%ebp,1),%ebx + addl %edx,%ecx + xorl %eax,%edi + xorl %ecx,%edi + movl 4(%esi),%ebp + addl %edi,%ebx + movl %ecx,%edi + roll $23,%ebx + addl %ecx,%ebx + # R2 36 + xorl %edx,%edi + xorl %ebx,%edi + leal 2763975236(%eax,%ebp,1),%eax + addl %edi,%eax + roll $4,%eax + movl 16(%esi),%ebp + movl %ebx,%edi + # R2 37 + leal 1272893353(%edx,%ebp,1),%edx + addl %ebx,%eax + xorl %ecx,%edi + xorl %eax,%edi + movl 28(%esi),%ebp + addl %edi,%edx + movl %eax,%edi + roll $11,%edx + addl %eax,%edx + # R2 38 + xorl %ebx,%edi + xorl %edx,%edi + leal 4139469664(%ecx,%ebp,1),%ecx + addl %edi,%ecx + roll $16,%ecx + movl 40(%esi),%ebp + movl %edx,%edi + # R2 39 + leal 3200236656(%ebx,%ebp,1),%ebx + addl %edx,%ecx + xorl %eax,%edi + xorl %ecx,%edi + movl 52(%esi),%ebp + addl %edi,%ebx + movl %ecx,%edi + roll $23,%ebx + addl %ecx,%ebx + # R2 40 + xorl %edx,%edi + xorl %ebx,%edi + leal 681279174(%eax,%ebp,1),%eax + addl %edi,%eax + roll $4,%eax + movl (%esi),%ebp + movl %ebx,%edi + # R2 41 + leal 3936430074(%edx,%ebp,1),%edx + addl %ebx,%eax + xorl %ecx,%edi + xorl %eax,%edi + movl 12(%esi),%ebp + addl %edi,%edx + movl %eax,%edi + roll $11,%edx + addl %eax,%edx + # R2 42 + xorl %ebx,%edi + xorl %edx,%edi + leal 3572445317(%ecx,%ebp,1),%ecx + addl %edi,%ecx + roll $16,%ecx + movl 24(%esi),%ebp + movl %edx,%edi + # R2 43 + leal 76029189(%ebx,%ebp,1),%ebx + addl %edx,%ecx + xorl %eax,%edi + xorl %ecx,%edi + movl 36(%esi),%ebp + addl %edi,%ebx + movl %ecx,%edi + roll $23,%ebx + addl %ecx,%ebx + # R2 44 + xorl %edx,%edi + xorl %ebx,%edi + leal 3654602809(%eax,%ebp,1),%eax + addl %edi,%eax + roll $4,%eax + movl 48(%esi),%ebp + movl %ebx,%edi + # R2 45 + leal 3873151461(%edx,%ebp,1),%edx + addl %ebx,%eax + xorl %ecx,%edi + xorl %eax,%edi + movl 60(%esi),%ebp + addl %edi,%edx + movl %eax,%edi + roll $11,%edx + addl %eax,%edx + # R2 46 + xorl %ebx,%edi + xorl %edx,%edi + leal 530742520(%ecx,%ebp,1),%ecx + addl %edi,%ecx + roll $16,%ecx + movl 8(%esi),%ebp + movl %edx,%edi + # R2 47 + leal 3299628645(%ebx,%ebp,1),%ebx + addl %edx,%ecx + xorl %eax,%edi + xorl %ecx,%edi + movl (%esi),%ebp + addl %edi,%ebx + movl $-1,%edi + roll $23,%ebx + addl %ecx,%ebx + + # R3 section + # R3 48 + xorl %edx,%edi + orl %ebx,%edi + leal 4096336452(%eax,%ebp,1),%eax + xorl %ecx,%edi + movl 28(%esi),%ebp + addl %edi,%eax + movl $-1,%edi + roll $6,%eax + xorl %ecx,%edi + addl %ebx,%eax + # R3 49 + orl %eax,%edi + leal 1126891415(%edx,%ebp,1),%edx + xorl %ebx,%edi + movl 56(%esi),%ebp + addl %edi,%edx + movl $-1,%edi + roll $10,%edx + xorl %ebx,%edi + addl %eax,%edx + # R3 50 + orl %edx,%edi + leal 2878612391(%ecx,%ebp,1),%ecx + xorl %eax,%edi + movl 20(%esi),%ebp + addl %edi,%ecx + movl $-1,%edi + roll $15,%ecx + xorl %eax,%edi + addl %edx,%ecx + # R3 51 + orl %ecx,%edi + leal 4237533241(%ebx,%ebp,1),%ebx + xorl %edx,%edi + movl 48(%esi),%ebp + addl %edi,%ebx + movl $-1,%edi + roll $21,%ebx + xorl %edx,%edi + addl %ecx,%ebx + # R3 52 + orl %ebx,%edi + leal 1700485571(%eax,%ebp,1),%eax + xorl %ecx,%edi + movl 12(%esi),%ebp + addl %edi,%eax + movl $-1,%edi + roll $6,%eax + xorl %ecx,%edi + addl %ebx,%eax + # R3 53 + orl %eax,%edi + leal 2399980690(%edx,%ebp,1),%edx + xorl %ebx,%edi + movl 40(%esi),%ebp + addl %edi,%edx + movl $-1,%edi + roll $10,%edx + xorl %ebx,%edi + addl %eax,%edx + # R3 54 + orl %edx,%edi + leal 4293915773(%ecx,%ebp,1),%ecx + xorl %eax,%edi + movl 4(%esi),%ebp + addl %edi,%ecx + movl $-1,%edi + roll $15,%ecx + xorl %eax,%edi + addl %edx,%ecx + # R3 55 + orl %ecx,%edi + leal 2240044497(%ebx,%ebp,1),%ebx + xorl %edx,%edi + movl 32(%esi),%ebp + addl %edi,%ebx + movl $-1,%edi + roll $21,%ebx + xorl %edx,%edi + addl %ecx,%ebx + # R3 56 + orl %ebx,%edi + leal 1873313359(%eax,%ebp,1),%eax + xorl %ecx,%edi + movl 60(%esi),%ebp + addl %edi,%eax + movl $-1,%edi + roll $6,%eax + xorl %ecx,%edi + addl %ebx,%eax + # R3 57 + orl %eax,%edi + leal 4264355552(%edx,%ebp,1),%edx + xorl %ebx,%edi + movl 24(%esi),%ebp + addl %edi,%edx + movl $-1,%edi + roll $10,%edx + xorl %ebx,%edi + addl %eax,%edx + # R3 58 + orl %edx,%edi + leal 2734768916(%ecx,%ebp,1),%ecx + xorl %eax,%edi + movl 52(%esi),%ebp + addl %edi,%ecx + movl $-1,%edi + roll $15,%ecx + xorl %eax,%edi + addl %edx,%ecx + # R3 59 + orl %ecx,%edi + leal 1309151649(%ebx,%ebp,1),%ebx + xorl %edx,%edi + movl 16(%esi),%ebp + addl %edi,%ebx + movl $-1,%edi + roll $21,%ebx + xorl %edx,%edi + addl %ecx,%ebx + # R3 60 + orl %ebx,%edi + leal 4149444226(%eax,%ebp,1),%eax + xorl %ecx,%edi + movl 44(%esi),%ebp + addl %edi,%eax + movl $-1,%edi + roll $6,%eax + xorl %ecx,%edi + addl %ebx,%eax + # R3 61 + orl %eax,%edi + leal 3174756917(%edx,%ebp,1),%edx + xorl %ebx,%edi + movl 8(%esi),%ebp + addl %edi,%edx + movl $-1,%edi + roll $10,%edx + xorl %ebx,%edi + addl %eax,%edx + # R3 62 + orl %edx,%edi + leal 718787259(%ecx,%ebp,1),%ecx + xorl %eax,%edi + movl 36(%esi),%ebp + addl %edi,%ecx + movl $-1,%edi + roll $15,%ecx + xorl %eax,%edi + addl %edx,%ecx + # R3 63 + orl %ecx,%edi + leal 3951481745(%ebx,%ebp,1),%ebx + xorl %edx,%edi + movl 24(%esp),%ebp + addl %edi,%ebx + addl $64,%esi + roll $21,%ebx + movl (%ebp),%edi + addl %ecx,%ebx + addl %edi,%eax + movl 4(%ebp),%edi + addl %edi,%ebx + movl 8(%ebp),%edi + addl %edi,%ecx + movl 12(%ebp),%edi + addl %edi,%edx + movl %eax,(%ebp) + movl %ebx,4(%ebp) + movl (%esp),%edi + movl %ecx,8(%ebp) + movl %edx,12(%ebp) + cmpl %esi,%edi + jae L000start + popl %eax + popl %ebx + popl %ebp + popl %edi + popl %esi + ret diff --git a/deps/openssl/asm_obsolete/x86-macosx-gas/modes/ghash-x86.s b/deps/openssl/asm_obsolete/x86-macosx-gas/modes/ghash-x86.s new file mode 100644 index 00000000000000..c68edef5be97d8 --- /dev/null +++ b/deps/openssl/asm_obsolete/x86-macosx-gas/modes/ghash-x86.s @@ -0,0 +1,1251 @@ +.file "ghash-x86.s" +.text +.globl _gcm_gmult_4bit_x86 +.align 4 +_gcm_gmult_4bit_x86: +L_gcm_gmult_4bit_x86_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + subl $84,%esp + movl 104(%esp),%edi + movl 108(%esp),%esi + movl (%edi),%ebp + movl 4(%edi),%edx + movl 8(%edi),%ecx + movl 12(%edi),%ebx + movl $0,16(%esp) + movl $471859200,20(%esp) + movl $943718400,24(%esp) + movl $610271232,28(%esp) + movl $1887436800,32(%esp) + movl $1822425088,36(%esp) + movl $1220542464,40(%esp) + movl $1423966208,44(%esp) + movl $3774873600,48(%esp) + movl $4246732800,52(%esp) + movl $3644850176,56(%esp) + movl $3311403008,60(%esp) + movl $2441084928,64(%esp) + movl $2376073216,68(%esp) + movl $2847932416,72(%esp) + movl $3051356160,76(%esp) + movl %ebp,(%esp) + movl %edx,4(%esp) + movl %ecx,8(%esp) + movl %ebx,12(%esp) + shrl $20,%ebx + andl $240,%ebx + movl 4(%esi,%ebx,1),%ebp + movl (%esi,%ebx,1),%edx + movl 12(%esi,%ebx,1),%ecx + movl 8(%esi,%ebx,1),%ebx + xorl %eax,%eax + movl $15,%edi + jmp L000x86_loop +.align 4,0x90 +L000x86_loop: + movb %bl,%al + shrdl $4,%ecx,%ebx + andb $15,%al + shrdl $4,%edx,%ecx + shrdl $4,%ebp,%edx + shrl $4,%ebp + xorl 16(%esp,%eax,4),%ebp + movb (%esp,%edi,1),%al + andb $240,%al + xorl 8(%esi,%eax,1),%ebx + xorl 12(%esi,%eax,1),%ecx + xorl (%esi,%eax,1),%edx + xorl 4(%esi,%eax,1),%ebp + decl %edi + js L001x86_break + movb %bl,%al + shrdl $4,%ecx,%ebx + andb $15,%al + shrdl $4,%edx,%ecx + shrdl $4,%ebp,%edx + shrl $4,%ebp + xorl 16(%esp,%eax,4),%ebp + movb (%esp,%edi,1),%al + shlb $4,%al + xorl 8(%esi,%eax,1),%ebx + xorl 12(%esi,%eax,1),%ecx + xorl (%esi,%eax,1),%edx + xorl 4(%esi,%eax,1),%ebp + jmp L000x86_loop +.align 4,0x90 +L001x86_break: + bswap %ebx + bswap %ecx + bswap %edx + bswap %ebp + movl 104(%esp),%edi + movl %ebx,12(%edi) + movl %ecx,8(%edi) + movl %edx,4(%edi) + movl %ebp,(%edi) + addl $84,%esp + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.globl _gcm_ghash_4bit_x86 +.align 4 +_gcm_ghash_4bit_x86: +L_gcm_ghash_4bit_x86_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + subl $84,%esp + movl 104(%esp),%ebx + movl 108(%esp),%esi + movl 112(%esp),%edi + movl 116(%esp),%ecx + addl %edi,%ecx + movl %ecx,116(%esp) + movl (%ebx),%ebp + movl 4(%ebx),%edx + movl 8(%ebx),%ecx + movl 12(%ebx),%ebx + movl $0,16(%esp) + movl $471859200,20(%esp) + movl $943718400,24(%esp) + movl $610271232,28(%esp) + movl $1887436800,32(%esp) + movl $1822425088,36(%esp) + movl $1220542464,40(%esp) + movl $1423966208,44(%esp) + movl $3774873600,48(%esp) + movl $4246732800,52(%esp) + movl $3644850176,56(%esp) + movl $3311403008,60(%esp) + movl $2441084928,64(%esp) + movl $2376073216,68(%esp) + movl $2847932416,72(%esp) + movl $3051356160,76(%esp) +.align 4,0x90 +L002x86_outer_loop: + xorl 12(%edi),%ebx + xorl 8(%edi),%ecx + xorl 4(%edi),%edx + xorl (%edi),%ebp + movl %ebx,12(%esp) + movl %ecx,8(%esp) + movl %edx,4(%esp) + movl %ebp,(%esp) + shrl $20,%ebx + andl $240,%ebx + movl 4(%esi,%ebx,1),%ebp + movl (%esi,%ebx,1),%edx + movl 12(%esi,%ebx,1),%ecx + movl 8(%esi,%ebx,1),%ebx + xorl %eax,%eax + movl $15,%edi + jmp L003x86_loop +.align 4,0x90 +L003x86_loop: + movb %bl,%al + shrdl $4,%ecx,%ebx + andb $15,%al + shrdl $4,%edx,%ecx + shrdl $4,%ebp,%edx + shrl $4,%ebp + xorl 16(%esp,%eax,4),%ebp + movb (%esp,%edi,1),%al + andb $240,%al + xorl 8(%esi,%eax,1),%ebx + xorl 12(%esi,%eax,1),%ecx + xorl (%esi,%eax,1),%edx + xorl 4(%esi,%eax,1),%ebp + decl %edi + js L004x86_break + movb %bl,%al + shrdl $4,%ecx,%ebx + andb $15,%al + shrdl $4,%edx,%ecx + shrdl $4,%ebp,%edx + shrl $4,%ebp + xorl 16(%esp,%eax,4),%ebp + movb (%esp,%edi,1),%al + shlb $4,%al + xorl 8(%esi,%eax,1),%ebx + xorl 12(%esi,%eax,1),%ecx + xorl (%esi,%eax,1),%edx + xorl 4(%esi,%eax,1),%ebp + jmp L003x86_loop +.align 4,0x90 +L004x86_break: + bswap %ebx + bswap %ecx + bswap %edx + bswap %ebp + movl 112(%esp),%edi + leal 16(%edi),%edi + cmpl 116(%esp),%edi + movl %edi,112(%esp) + jb L002x86_outer_loop + movl 104(%esp),%edi + movl %ebx,12(%edi) + movl %ecx,8(%edi) + movl %edx,4(%edi) + movl %ebp,(%edi) + addl $84,%esp + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.globl _gcm_gmult_4bit_mmx +.align 4 +_gcm_gmult_4bit_mmx: +L_gcm_gmult_4bit_mmx_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 20(%esp),%edi + movl 24(%esp),%esi + call L005pic_point +L005pic_point: + popl %eax + leal Lrem_4bit-L005pic_point(%eax),%eax + movzbl 15(%edi),%ebx + xorl %ecx,%ecx + movl %ebx,%edx + movb %dl,%cl + movl $14,%ebp + shlb $4,%cl + andl $240,%edx + movq 8(%esi,%ecx,1),%mm0 + movq (%esi,%ecx,1),%mm1 + movd %mm0,%ebx + jmp L006mmx_loop +.align 4,0x90 +L006mmx_loop: + psrlq $4,%mm0 + andl $15,%ebx + movq %mm1,%mm2 + psrlq $4,%mm1 + pxor 8(%esi,%edx,1),%mm0 + movb (%edi,%ebp,1),%cl + psllq $60,%mm2 + pxor (%eax,%ebx,8),%mm1 + decl %ebp + movd %mm0,%ebx + pxor (%esi,%edx,1),%mm1 + movl %ecx,%edx + pxor %mm2,%mm0 + js L007mmx_break + shlb $4,%cl + andl $15,%ebx + psrlq $4,%mm0 + andl $240,%edx + movq %mm1,%mm2 + psrlq $4,%mm1 + pxor 8(%esi,%ecx,1),%mm0 + psllq $60,%mm2 + pxor (%eax,%ebx,8),%mm1 + movd %mm0,%ebx + pxor (%esi,%ecx,1),%mm1 + pxor %mm2,%mm0 + jmp L006mmx_loop +.align 4,0x90 +L007mmx_break: + shlb $4,%cl + andl $15,%ebx + psrlq $4,%mm0 + andl $240,%edx + movq %mm1,%mm2 + psrlq $4,%mm1 + pxor 8(%esi,%ecx,1),%mm0 + psllq $60,%mm2 + pxor (%eax,%ebx,8),%mm1 + movd %mm0,%ebx + pxor (%esi,%ecx,1),%mm1 + pxor %mm2,%mm0 + psrlq $4,%mm0 + andl $15,%ebx + movq %mm1,%mm2 + psrlq $4,%mm1 + pxor 8(%esi,%edx,1),%mm0 + psllq $60,%mm2 + pxor (%eax,%ebx,8),%mm1 + movd %mm0,%ebx + pxor (%esi,%edx,1),%mm1 + pxor %mm2,%mm0 + psrlq $32,%mm0 + movd %mm1,%edx + psrlq $32,%mm1 + movd %mm0,%ecx + movd %mm1,%ebp + bswap %ebx + bswap %edx + bswap %ecx + bswap %ebp + emms + movl %ebx,12(%edi) + movl %edx,4(%edi) + movl %ecx,8(%edi) + movl %ebp,(%edi) + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.globl _gcm_ghash_4bit_mmx +.align 4 +_gcm_ghash_4bit_mmx: +L_gcm_ghash_4bit_mmx_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 20(%esp),%eax + movl 24(%esp),%ebx + movl 28(%esp),%ecx + movl 32(%esp),%edx + movl %esp,%ebp + call L008pic_point +L008pic_point: + popl %esi + leal Lrem_8bit-L008pic_point(%esi),%esi + subl $544,%esp + andl $-64,%esp + subl $16,%esp + addl %ecx,%edx + movl %eax,544(%esp) + movl %edx,552(%esp) + movl %ebp,556(%esp) + addl $128,%ebx + leal 144(%esp),%edi + leal 400(%esp),%ebp + movl -120(%ebx),%edx + movq -120(%ebx),%mm0 + movq -128(%ebx),%mm3 + shll $4,%edx + movb %dl,(%esp) + movl -104(%ebx),%edx + movq -104(%ebx),%mm2 + movq -112(%ebx),%mm5 + movq %mm0,-128(%edi) + psrlq $4,%mm0 + movq %mm3,(%edi) + movq %mm3,%mm7 + psrlq $4,%mm3 + shll $4,%edx + movb %dl,1(%esp) + movl -88(%ebx),%edx + movq -88(%ebx),%mm1 + psllq $60,%mm7 + movq -96(%ebx),%mm4 + por %mm7,%mm0 + movq %mm2,-120(%edi) + psrlq $4,%mm2 + movq %mm5,8(%edi) + movq %mm5,%mm6 + movq %mm0,-128(%ebp) + psrlq $4,%mm5 + movq %mm3,(%ebp) + shll $4,%edx + movb %dl,2(%esp) + movl -72(%ebx),%edx + movq -72(%ebx),%mm0 + psllq $60,%mm6 + movq -80(%ebx),%mm3 + por %mm6,%mm2 + movq %mm1,-112(%edi) + psrlq $4,%mm1 + movq %mm4,16(%edi) + movq %mm4,%mm7 + movq %mm2,-120(%ebp) + psrlq $4,%mm4 + movq %mm5,8(%ebp) + shll $4,%edx + movb %dl,3(%esp) + movl -56(%ebx),%edx + movq -56(%ebx),%mm2 + psllq $60,%mm7 + movq -64(%ebx),%mm5 + por %mm7,%mm1 + movq %mm0,-104(%edi) + psrlq $4,%mm0 + movq %mm3,24(%edi) + movq %mm3,%mm6 + movq %mm1,-112(%ebp) + psrlq $4,%mm3 + movq %mm4,16(%ebp) + shll $4,%edx + movb %dl,4(%esp) + movl -40(%ebx),%edx + movq -40(%ebx),%mm1 + psllq $60,%mm6 + movq -48(%ebx),%mm4 + por %mm6,%mm0 + movq %mm2,-96(%edi) + psrlq $4,%mm2 + movq %mm5,32(%edi) + movq %mm5,%mm7 + movq %mm0,-104(%ebp) + psrlq $4,%mm5 + movq %mm3,24(%ebp) + shll $4,%edx + movb %dl,5(%esp) + movl -24(%ebx),%edx + movq -24(%ebx),%mm0 + psllq $60,%mm7 + movq -32(%ebx),%mm3 + por %mm7,%mm2 + movq %mm1,-88(%edi) + psrlq $4,%mm1 + movq %mm4,40(%edi) + movq %mm4,%mm6 + movq %mm2,-96(%ebp) + psrlq $4,%mm4 + movq %mm5,32(%ebp) + shll $4,%edx + movb %dl,6(%esp) + movl -8(%ebx),%edx + movq -8(%ebx),%mm2 + psllq $60,%mm6 + movq -16(%ebx),%mm5 + por %mm6,%mm1 + movq %mm0,-80(%edi) + psrlq $4,%mm0 + movq %mm3,48(%edi) + movq %mm3,%mm7 + movq %mm1,-88(%ebp) + psrlq $4,%mm3 + movq %mm4,40(%ebp) + shll $4,%edx + movb %dl,7(%esp) + movl 8(%ebx),%edx + movq 8(%ebx),%mm1 + psllq $60,%mm7 + movq (%ebx),%mm4 + por %mm7,%mm0 + movq %mm2,-72(%edi) + psrlq $4,%mm2 + movq %mm5,56(%edi) + movq %mm5,%mm6 + movq %mm0,-80(%ebp) + psrlq $4,%mm5 + movq %mm3,48(%ebp) + shll $4,%edx + movb %dl,8(%esp) + movl 24(%ebx),%edx + movq 24(%ebx),%mm0 + psllq $60,%mm6 + movq 16(%ebx),%mm3 + por %mm6,%mm2 + movq %mm1,-64(%edi) + psrlq $4,%mm1 + movq %mm4,64(%edi) + movq %mm4,%mm7 + movq %mm2,-72(%ebp) + psrlq $4,%mm4 + movq %mm5,56(%ebp) + shll $4,%edx + movb %dl,9(%esp) + movl 40(%ebx),%edx + movq 40(%ebx),%mm2 + psllq $60,%mm7 + movq 32(%ebx),%mm5 + por %mm7,%mm1 + movq %mm0,-56(%edi) + psrlq $4,%mm0 + movq %mm3,72(%edi) + movq %mm3,%mm6 + movq %mm1,-64(%ebp) + psrlq $4,%mm3 + movq %mm4,64(%ebp) + shll $4,%edx + movb %dl,10(%esp) + movl 56(%ebx),%edx + movq 56(%ebx),%mm1 + psllq $60,%mm6 + movq 48(%ebx),%mm4 + por %mm6,%mm0 + movq %mm2,-48(%edi) + psrlq $4,%mm2 + movq %mm5,80(%edi) + movq %mm5,%mm7 + movq %mm0,-56(%ebp) + psrlq $4,%mm5 + movq %mm3,72(%ebp) + shll $4,%edx + movb %dl,11(%esp) + movl 72(%ebx),%edx + movq 72(%ebx),%mm0 + psllq $60,%mm7 + movq 64(%ebx),%mm3 + por %mm7,%mm2 + movq %mm1,-40(%edi) + psrlq $4,%mm1 + movq %mm4,88(%edi) + movq %mm4,%mm6 + movq %mm2,-48(%ebp) + psrlq $4,%mm4 + movq %mm5,80(%ebp) + shll $4,%edx + movb %dl,12(%esp) + movl 88(%ebx),%edx + movq 88(%ebx),%mm2 + psllq $60,%mm6 + movq 80(%ebx),%mm5 + por %mm6,%mm1 + movq %mm0,-32(%edi) + psrlq $4,%mm0 + movq %mm3,96(%edi) + movq %mm3,%mm7 + movq %mm1,-40(%ebp) + psrlq $4,%mm3 + movq %mm4,88(%ebp) + shll $4,%edx + movb %dl,13(%esp) + movl 104(%ebx),%edx + movq 104(%ebx),%mm1 + psllq $60,%mm7 + movq 96(%ebx),%mm4 + por %mm7,%mm0 + movq %mm2,-24(%edi) + psrlq $4,%mm2 + movq %mm5,104(%edi) + movq %mm5,%mm6 + movq %mm0,-32(%ebp) + psrlq $4,%mm5 + movq %mm3,96(%ebp) + shll $4,%edx + movb %dl,14(%esp) + movl 120(%ebx),%edx + movq 120(%ebx),%mm0 + psllq $60,%mm6 + movq 112(%ebx),%mm3 + por %mm6,%mm2 + movq %mm1,-16(%edi) + psrlq $4,%mm1 + movq %mm4,112(%edi) + movq %mm4,%mm7 + movq %mm2,-24(%ebp) + psrlq $4,%mm4 + movq %mm5,104(%ebp) + shll $4,%edx + movb %dl,15(%esp) + psllq $60,%mm7 + por %mm7,%mm1 + movq %mm0,-8(%edi) + psrlq $4,%mm0 + movq %mm3,120(%edi) + movq %mm3,%mm6 + movq %mm1,-16(%ebp) + psrlq $4,%mm3 + movq %mm4,112(%ebp) + psllq $60,%mm6 + por %mm6,%mm0 + movq %mm0,-8(%ebp) + movq %mm3,120(%ebp) + movq (%eax),%mm6 + movl 8(%eax),%ebx + movl 12(%eax),%edx +.align 4,0x90 +L009outer: + xorl 12(%ecx),%edx + xorl 8(%ecx),%ebx + pxor (%ecx),%mm6 + leal 16(%ecx),%ecx + movl %ebx,536(%esp) + movq %mm6,528(%esp) + movl %ecx,548(%esp) + xorl %eax,%eax + roll $8,%edx + movb %dl,%al + movl %eax,%ebp + andb $15,%al + shrl $4,%ebp + pxor %mm0,%mm0 + roll $8,%edx + pxor %mm1,%mm1 + pxor %mm2,%mm2 + movq 16(%esp,%eax,8),%mm7 + movq 144(%esp,%eax,8),%mm6 + movb %dl,%al + movd %mm7,%ebx + psrlq $8,%mm7 + movq %mm6,%mm3 + movl %eax,%edi + psrlq $8,%mm6 + pxor 272(%esp,%ebp,8),%mm7 + andb $15,%al + psllq $56,%mm3 + shrl $4,%edi + pxor 16(%esp,%eax,8),%mm7 + roll $8,%edx + pxor 144(%esp,%eax,8),%mm6 + pxor %mm3,%mm7 + pxor 400(%esp,%ebp,8),%mm6 + xorb (%esp,%ebp,1),%bl + movb %dl,%al + movd %mm7,%ecx + movzbl %bl,%ebx + psrlq $8,%mm7 + movq %mm6,%mm3 + movl %eax,%ebp + psrlq $8,%mm6 + pxor 272(%esp,%edi,8),%mm7 + andb $15,%al + psllq $56,%mm3 + shrl $4,%ebp + pinsrw $2,(%esi,%ebx,2),%mm2 + pxor 16(%esp,%eax,8),%mm7 + roll $8,%edx + pxor 144(%esp,%eax,8),%mm6 + pxor %mm3,%mm7 + pxor 400(%esp,%edi,8),%mm6 + xorb (%esp,%edi,1),%cl + movb %dl,%al + movl 536(%esp),%edx + movd %mm7,%ebx + movzbl %cl,%ecx + psrlq $8,%mm7 + movq %mm6,%mm3 + movl %eax,%edi + psrlq $8,%mm6 + pxor 272(%esp,%ebp,8),%mm7 + andb $15,%al + psllq $56,%mm3 + pxor %mm2,%mm6 + shrl $4,%edi + pinsrw $2,(%esi,%ecx,2),%mm1 + pxor 16(%esp,%eax,8),%mm7 + roll $8,%edx + pxor 144(%esp,%eax,8),%mm6 + pxor %mm3,%mm7 + pxor 400(%esp,%ebp,8),%mm6 + xorb (%esp,%ebp,1),%bl + movb %dl,%al + movd %mm7,%ecx + movzbl %bl,%ebx + psrlq $8,%mm7 + movq %mm6,%mm3 + movl %eax,%ebp + psrlq $8,%mm6 + pxor 272(%esp,%edi,8),%mm7 + andb $15,%al + psllq $56,%mm3 + pxor %mm1,%mm6 + shrl $4,%ebp + pinsrw $2,(%esi,%ebx,2),%mm0 + pxor 16(%esp,%eax,8),%mm7 + roll $8,%edx + pxor 144(%esp,%eax,8),%mm6 + pxor %mm3,%mm7 + pxor 400(%esp,%edi,8),%mm6 + xorb (%esp,%edi,1),%cl + movb %dl,%al + movd %mm7,%ebx + movzbl %cl,%ecx + psrlq $8,%mm7 + movq %mm6,%mm3 + movl %eax,%edi + psrlq $8,%mm6 + pxor 272(%esp,%ebp,8),%mm7 + andb $15,%al + psllq $56,%mm3 + pxor %mm0,%mm6 + shrl $4,%edi + pinsrw $2,(%esi,%ecx,2),%mm2 + pxor 16(%esp,%eax,8),%mm7 + roll $8,%edx + pxor 144(%esp,%eax,8),%mm6 + pxor %mm3,%mm7 + pxor 400(%esp,%ebp,8),%mm6 + xorb (%esp,%ebp,1),%bl + movb %dl,%al + movd %mm7,%ecx + movzbl %bl,%ebx + psrlq $8,%mm7 + movq %mm6,%mm3 + movl %eax,%ebp + psrlq $8,%mm6 + pxor 272(%esp,%edi,8),%mm7 + andb $15,%al + psllq $56,%mm3 + pxor %mm2,%mm6 + shrl $4,%ebp + pinsrw $2,(%esi,%ebx,2),%mm1 + pxor 16(%esp,%eax,8),%mm7 + roll $8,%edx + pxor 144(%esp,%eax,8),%mm6 + pxor %mm3,%mm7 + pxor 400(%esp,%edi,8),%mm6 + xorb (%esp,%edi,1),%cl + movb %dl,%al + movl 532(%esp),%edx + movd %mm7,%ebx + movzbl %cl,%ecx + psrlq $8,%mm7 + movq %mm6,%mm3 + movl %eax,%edi + psrlq $8,%mm6 + pxor 272(%esp,%ebp,8),%mm7 + andb $15,%al + psllq $56,%mm3 + pxor %mm1,%mm6 + shrl $4,%edi + pinsrw $2,(%esi,%ecx,2),%mm0 + pxor 16(%esp,%eax,8),%mm7 + roll $8,%edx + pxor 144(%esp,%eax,8),%mm6 + pxor %mm3,%mm7 + pxor 400(%esp,%ebp,8),%mm6 + xorb (%esp,%ebp,1),%bl + movb %dl,%al + movd %mm7,%ecx + movzbl %bl,%ebx + psrlq $8,%mm7 + movq %mm6,%mm3 + movl %eax,%ebp + psrlq $8,%mm6 + pxor 272(%esp,%edi,8),%mm7 + andb $15,%al + psllq $56,%mm3 + pxor %mm0,%mm6 + shrl $4,%ebp + pinsrw $2,(%esi,%ebx,2),%mm2 + pxor 16(%esp,%eax,8),%mm7 + roll $8,%edx + pxor 144(%esp,%eax,8),%mm6 + pxor %mm3,%mm7 + pxor 400(%esp,%edi,8),%mm6 + xorb (%esp,%edi,1),%cl + movb %dl,%al + movd %mm7,%ebx + movzbl %cl,%ecx + psrlq $8,%mm7 + movq %mm6,%mm3 + movl %eax,%edi + psrlq $8,%mm6 + pxor 272(%esp,%ebp,8),%mm7 + andb $15,%al + psllq $56,%mm3 + pxor %mm2,%mm6 + shrl $4,%edi + pinsrw $2,(%esi,%ecx,2),%mm1 + pxor 16(%esp,%eax,8),%mm7 + roll $8,%edx + pxor 144(%esp,%eax,8),%mm6 + pxor %mm3,%mm7 + pxor 400(%esp,%ebp,8),%mm6 + xorb (%esp,%ebp,1),%bl + movb %dl,%al + movd %mm7,%ecx + movzbl %bl,%ebx + psrlq $8,%mm7 + movq %mm6,%mm3 + movl %eax,%ebp + psrlq $8,%mm6 + pxor 272(%esp,%edi,8),%mm7 + andb $15,%al + psllq $56,%mm3 + pxor %mm1,%mm6 + shrl $4,%ebp + pinsrw $2,(%esi,%ebx,2),%mm0 + pxor 16(%esp,%eax,8),%mm7 + roll $8,%edx + pxor 144(%esp,%eax,8),%mm6 + pxor %mm3,%mm7 + pxor 400(%esp,%edi,8),%mm6 + xorb (%esp,%edi,1),%cl + movb %dl,%al + movl 528(%esp),%edx + movd %mm7,%ebx + movzbl %cl,%ecx + psrlq $8,%mm7 + movq %mm6,%mm3 + movl %eax,%edi + psrlq $8,%mm6 + pxor 272(%esp,%ebp,8),%mm7 + andb $15,%al + psllq $56,%mm3 + pxor %mm0,%mm6 + shrl $4,%edi + pinsrw $2,(%esi,%ecx,2),%mm2 + pxor 16(%esp,%eax,8),%mm7 + roll $8,%edx + pxor 144(%esp,%eax,8),%mm6 + pxor %mm3,%mm7 + pxor 400(%esp,%ebp,8),%mm6 + xorb (%esp,%ebp,1),%bl + movb %dl,%al + movd %mm7,%ecx + movzbl %bl,%ebx + psrlq $8,%mm7 + movq %mm6,%mm3 + movl %eax,%ebp + psrlq $8,%mm6 + pxor 272(%esp,%edi,8),%mm7 + andb $15,%al + psllq $56,%mm3 + pxor %mm2,%mm6 + shrl $4,%ebp + pinsrw $2,(%esi,%ebx,2),%mm1 + pxor 16(%esp,%eax,8),%mm7 + roll $8,%edx + pxor 144(%esp,%eax,8),%mm6 + pxor %mm3,%mm7 + pxor 400(%esp,%edi,8),%mm6 + xorb (%esp,%edi,1),%cl + movb %dl,%al + movd %mm7,%ebx + movzbl %cl,%ecx + psrlq $8,%mm7 + movq %mm6,%mm3 + movl %eax,%edi + psrlq $8,%mm6 + pxor 272(%esp,%ebp,8),%mm7 + andb $15,%al + psllq $56,%mm3 + pxor %mm1,%mm6 + shrl $4,%edi + pinsrw $2,(%esi,%ecx,2),%mm0 + pxor 16(%esp,%eax,8),%mm7 + roll $8,%edx + pxor 144(%esp,%eax,8),%mm6 + pxor %mm3,%mm7 + pxor 400(%esp,%ebp,8),%mm6 + xorb (%esp,%ebp,1),%bl + movb %dl,%al + movd %mm7,%ecx + movzbl %bl,%ebx + psrlq $8,%mm7 + movq %mm6,%mm3 + movl %eax,%ebp + psrlq $8,%mm6 + pxor 272(%esp,%edi,8),%mm7 + andb $15,%al + psllq $56,%mm3 + pxor %mm0,%mm6 + shrl $4,%ebp + pinsrw $2,(%esi,%ebx,2),%mm2 + pxor 16(%esp,%eax,8),%mm7 + roll $8,%edx + pxor 144(%esp,%eax,8),%mm6 + pxor %mm3,%mm7 + pxor 400(%esp,%edi,8),%mm6 + xorb (%esp,%edi,1),%cl + movb %dl,%al + movl 524(%esp),%edx + movd %mm7,%ebx + movzbl %cl,%ecx + psrlq $8,%mm7 + movq %mm6,%mm3 + movl %eax,%edi + psrlq $8,%mm6 + pxor 272(%esp,%ebp,8),%mm7 + andb $15,%al + psllq $56,%mm3 + pxor %mm2,%mm6 + shrl $4,%edi + pinsrw $2,(%esi,%ecx,2),%mm1 + pxor 16(%esp,%eax,8),%mm7 + pxor 144(%esp,%eax,8),%mm6 + xorb (%esp,%ebp,1),%bl + pxor %mm3,%mm7 + pxor 400(%esp,%ebp,8),%mm6 + movzbl %bl,%ebx + pxor %mm2,%mm2 + psllq $4,%mm1 + movd %mm7,%ecx + psrlq $4,%mm7 + movq %mm6,%mm3 + psrlq $4,%mm6 + shll $4,%ecx + pxor 16(%esp,%edi,8),%mm7 + psllq $60,%mm3 + movzbl %cl,%ecx + pxor %mm3,%mm7 + pxor 144(%esp,%edi,8),%mm6 + pinsrw $2,(%esi,%ebx,2),%mm0 + pxor %mm1,%mm6 + movd %mm7,%edx + pinsrw $3,(%esi,%ecx,2),%mm2 + psllq $12,%mm0 + pxor %mm0,%mm6 + psrlq $32,%mm7 + pxor %mm2,%mm6 + movl 548(%esp),%ecx + movd %mm7,%ebx + movq %mm6,%mm3 + psllw $8,%mm6 + psrlw $8,%mm3 + por %mm3,%mm6 + bswap %edx + pshufw $27,%mm6,%mm6 + bswap %ebx + cmpl 552(%esp),%ecx + jne L009outer + movl 544(%esp),%eax + movl %edx,12(%eax) + movl %ebx,8(%eax) + movq %mm6,(%eax) + movl 556(%esp),%esp + emms + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.globl _gcm_init_clmul +.align 4 +_gcm_init_clmul: +L_gcm_init_clmul_begin: + movl 4(%esp),%edx + movl 8(%esp),%eax + call L010pic +L010pic: + popl %ecx + leal Lbswap-L010pic(%ecx),%ecx + movdqu (%eax),%xmm2 + pshufd $78,%xmm2,%xmm2 + pshufd $255,%xmm2,%xmm4 + movdqa %xmm2,%xmm3 + psllq $1,%xmm2 + pxor %xmm5,%xmm5 + psrlq $63,%xmm3 + pcmpgtd %xmm4,%xmm5 + pslldq $8,%xmm3 + por %xmm3,%xmm2 + pand 16(%ecx),%xmm5 + pxor %xmm5,%xmm2 + movdqa %xmm2,%xmm0 + movdqa %xmm0,%xmm1 + pshufd $78,%xmm0,%xmm3 + pshufd $78,%xmm2,%xmm4 + pxor %xmm0,%xmm3 + pxor %xmm2,%xmm4 +.byte 102,15,58,68,194,0 +.byte 102,15,58,68,202,17 +.byte 102,15,58,68,220,0 + xorps %xmm0,%xmm3 + xorps %xmm1,%xmm3 + movdqa %xmm3,%xmm4 + psrldq $8,%xmm3 + pslldq $8,%xmm4 + pxor %xmm3,%xmm1 + pxor %xmm4,%xmm0 + movdqa %xmm0,%xmm4 + movdqa %xmm0,%xmm3 + psllq $5,%xmm0 + pxor %xmm0,%xmm3 + psllq $1,%xmm0 + pxor %xmm3,%xmm0 + psllq $57,%xmm0 + movdqa %xmm0,%xmm3 + pslldq $8,%xmm0 + psrldq $8,%xmm3 + pxor %xmm4,%xmm0 + pxor %xmm3,%xmm1 + movdqa %xmm0,%xmm4 + psrlq $1,%xmm0 + pxor %xmm4,%xmm1 + pxor %xmm0,%xmm4 + psrlq $5,%xmm0 + pxor %xmm4,%xmm0 + psrlq $1,%xmm0 + pxor %xmm1,%xmm0 + pshufd $78,%xmm2,%xmm3 + pshufd $78,%xmm0,%xmm4 + pxor %xmm2,%xmm3 + movdqu %xmm2,(%edx) + pxor %xmm0,%xmm4 + movdqu %xmm0,16(%edx) +.byte 102,15,58,15,227,8 + movdqu %xmm4,32(%edx) + ret +.globl _gcm_gmult_clmul +.align 4 +_gcm_gmult_clmul: +L_gcm_gmult_clmul_begin: + movl 4(%esp),%eax + movl 8(%esp),%edx + call L011pic +L011pic: + popl %ecx + leal Lbswap-L011pic(%ecx),%ecx + movdqu (%eax),%xmm0 + movdqa (%ecx),%xmm5 + movups (%edx),%xmm2 +.byte 102,15,56,0,197 + movups 32(%edx),%xmm4 + movdqa %xmm0,%xmm1 + pshufd $78,%xmm0,%xmm3 + pxor %xmm0,%xmm3 +.byte 102,15,58,68,194,0 +.byte 102,15,58,68,202,17 +.byte 102,15,58,68,220,0 + xorps %xmm0,%xmm3 + xorps %xmm1,%xmm3 + movdqa %xmm3,%xmm4 + psrldq $8,%xmm3 + pslldq $8,%xmm4 + pxor %xmm3,%xmm1 + pxor %xmm4,%xmm0 + movdqa %xmm0,%xmm4 + movdqa %xmm0,%xmm3 + psllq $5,%xmm0 + pxor %xmm0,%xmm3 + psllq $1,%xmm0 + pxor %xmm3,%xmm0 + psllq $57,%xmm0 + movdqa %xmm0,%xmm3 + pslldq $8,%xmm0 + psrldq $8,%xmm3 + pxor %xmm4,%xmm0 + pxor %xmm3,%xmm1 + movdqa %xmm0,%xmm4 + psrlq $1,%xmm0 + pxor %xmm4,%xmm1 + pxor %xmm0,%xmm4 + psrlq $5,%xmm0 + pxor %xmm4,%xmm0 + psrlq $1,%xmm0 + pxor %xmm1,%xmm0 +.byte 102,15,56,0,197 + movdqu %xmm0,(%eax) + ret +.globl _gcm_ghash_clmul +.align 4 +_gcm_ghash_clmul: +L_gcm_ghash_clmul_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 20(%esp),%eax + movl 24(%esp),%edx + movl 28(%esp),%esi + movl 32(%esp),%ebx + call L012pic +L012pic: + popl %ecx + leal Lbswap-L012pic(%ecx),%ecx + movdqu (%eax),%xmm0 + movdqa (%ecx),%xmm5 + movdqu (%edx),%xmm2 +.byte 102,15,56,0,197 + subl $16,%ebx + jz L013odd_tail + movdqu (%esi),%xmm3 + movdqu 16(%esi),%xmm6 +.byte 102,15,56,0,221 +.byte 102,15,56,0,245 + movdqu 32(%edx),%xmm5 + pxor %xmm3,%xmm0 + pshufd $78,%xmm6,%xmm3 + movdqa %xmm6,%xmm7 + pxor %xmm6,%xmm3 + leal 32(%esi),%esi +.byte 102,15,58,68,242,0 +.byte 102,15,58,68,250,17 +.byte 102,15,58,68,221,0 + movups 16(%edx),%xmm2 + nop + subl $32,%ebx + jbe L014even_tail + jmp L015mod_loop +.align 5,0x90 +L015mod_loop: + pshufd $78,%xmm0,%xmm4 + movdqa %xmm0,%xmm1 + pxor %xmm0,%xmm4 + nop +.byte 102,15,58,68,194,0 +.byte 102,15,58,68,202,17 +.byte 102,15,58,68,229,16 + movups (%edx),%xmm2 + xorps %xmm6,%xmm0 + movdqa (%ecx),%xmm5 + xorps %xmm7,%xmm1 + movdqu (%esi),%xmm7 + pxor %xmm0,%xmm3 + movdqu 16(%esi),%xmm6 + pxor %xmm1,%xmm3 +.byte 102,15,56,0,253 + pxor %xmm3,%xmm4 + movdqa %xmm4,%xmm3 + psrldq $8,%xmm4 + pslldq $8,%xmm3 + pxor %xmm4,%xmm1 + pxor %xmm3,%xmm0 +.byte 102,15,56,0,245 + pxor %xmm7,%xmm1 + movdqa %xmm6,%xmm7 + movdqa %xmm0,%xmm4 + movdqa %xmm0,%xmm3 + psllq $5,%xmm0 + pxor %xmm0,%xmm3 + psllq $1,%xmm0 + pxor %xmm3,%xmm0 +.byte 102,15,58,68,242,0 + movups 32(%edx),%xmm5 + psllq $57,%xmm0 + movdqa %xmm0,%xmm3 + pslldq $8,%xmm0 + psrldq $8,%xmm3 + pxor %xmm4,%xmm0 + pxor %xmm3,%xmm1 + pshufd $78,%xmm7,%xmm3 + movdqa %xmm0,%xmm4 + psrlq $1,%xmm0 + pxor %xmm7,%xmm3 + pxor %xmm4,%xmm1 +.byte 102,15,58,68,250,17 + movups 16(%edx),%xmm2 + pxor %xmm0,%xmm4 + psrlq $5,%xmm0 + pxor %xmm4,%xmm0 + psrlq $1,%xmm0 + pxor %xmm1,%xmm0 +.byte 102,15,58,68,221,0 + leal 32(%esi),%esi + subl $32,%ebx + ja L015mod_loop +L014even_tail: + pshufd $78,%xmm0,%xmm4 + movdqa %xmm0,%xmm1 + pxor %xmm0,%xmm4 +.byte 102,15,58,68,194,0 +.byte 102,15,58,68,202,17 +.byte 102,15,58,68,229,16 + movdqa (%ecx),%xmm5 + xorps %xmm6,%xmm0 + xorps %xmm7,%xmm1 + pxor %xmm0,%xmm3 + pxor %xmm1,%xmm3 + pxor %xmm3,%xmm4 + movdqa %xmm4,%xmm3 + psrldq $8,%xmm4 + pslldq $8,%xmm3 + pxor %xmm4,%xmm1 + pxor %xmm3,%xmm0 + movdqa %xmm0,%xmm4 + movdqa %xmm0,%xmm3 + psllq $5,%xmm0 + pxor %xmm0,%xmm3 + psllq $1,%xmm0 + pxor %xmm3,%xmm0 + psllq $57,%xmm0 + movdqa %xmm0,%xmm3 + pslldq $8,%xmm0 + psrldq $8,%xmm3 + pxor %xmm4,%xmm0 + pxor %xmm3,%xmm1 + movdqa %xmm0,%xmm4 + psrlq $1,%xmm0 + pxor %xmm4,%xmm1 + pxor %xmm0,%xmm4 + psrlq $5,%xmm0 + pxor %xmm4,%xmm0 + psrlq $1,%xmm0 + pxor %xmm1,%xmm0 + testl %ebx,%ebx + jnz L016done + movups (%edx),%xmm2 +L013odd_tail: + movdqu (%esi),%xmm3 +.byte 102,15,56,0,221 + pxor %xmm3,%xmm0 + movdqa %xmm0,%xmm1 + pshufd $78,%xmm0,%xmm3 + pshufd $78,%xmm2,%xmm4 + pxor %xmm0,%xmm3 + pxor %xmm2,%xmm4 +.byte 102,15,58,68,194,0 +.byte 102,15,58,68,202,17 +.byte 102,15,58,68,220,0 + xorps %xmm0,%xmm3 + xorps %xmm1,%xmm3 + movdqa %xmm3,%xmm4 + psrldq $8,%xmm3 + pslldq $8,%xmm4 + pxor %xmm3,%xmm1 + pxor %xmm4,%xmm0 + movdqa %xmm0,%xmm4 + movdqa %xmm0,%xmm3 + psllq $5,%xmm0 + pxor %xmm0,%xmm3 + psllq $1,%xmm0 + pxor %xmm3,%xmm0 + psllq $57,%xmm0 + movdqa %xmm0,%xmm3 + pslldq $8,%xmm0 + psrldq $8,%xmm3 + pxor %xmm4,%xmm0 + pxor %xmm3,%xmm1 + movdqa %xmm0,%xmm4 + psrlq $1,%xmm0 + pxor %xmm4,%xmm1 + pxor %xmm0,%xmm4 + psrlq $5,%xmm0 + pxor %xmm4,%xmm0 + psrlq $1,%xmm0 + pxor %xmm1,%xmm0 +L016done: +.byte 102,15,56,0,197 + movdqu %xmm0,(%eax) + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.align 6,0x90 +Lbswap: +.byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0 +.byte 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,194 +.align 6,0x90 +Lrem_8bit: +.value 0,450,900,582,1800,1738,1164,1358 +.value 3600,4050,3476,3158,2328,2266,2716,2910 +.value 7200,7650,8100,7782,6952,6890,6316,6510 +.value 4656,5106,4532,4214,5432,5370,5820,6014 +.value 14400,14722,15300,14854,16200,16010,15564,15630 +.value 13904,14226,13780,13334,12632,12442,13020,13086 +.value 9312,9634,10212,9766,9064,8874,8428,8494 +.value 10864,11186,10740,10294,11640,11450,12028,12094 +.value 28800,28994,29444,29382,30600,30282,29708,30158 +.value 32400,32594,32020,31958,31128,30810,31260,31710 +.value 27808,28002,28452,28390,27560,27242,26668,27118 +.value 25264,25458,24884,24822,26040,25722,26172,26622 +.value 18624,18690,19268,19078,20424,19978,19532,19854 +.value 18128,18194,17748,17558,16856,16410,16988,17310 +.value 21728,21794,22372,22182,21480,21034,20588,20910 +.value 23280,23346,22900,22710,24056,23610,24188,24510 +.value 57600,57538,57988,58182,58888,59338,58764,58446 +.value 61200,61138,60564,60758,59416,59866,60316,59998 +.value 64800,64738,65188,65382,64040,64490,63916,63598 +.value 62256,62194,61620,61814,62520,62970,63420,63102 +.value 55616,55426,56004,56070,56904,57226,56780,56334 +.value 55120,54930,54484,54550,53336,53658,54236,53790 +.value 50528,50338,50916,50982,49768,50090,49644,49198 +.value 52080,51890,51444,51510,52344,52666,53244,52798 +.value 37248,36930,37380,37830,38536,38730,38156,38094 +.value 40848,40530,39956,40406,39064,39258,39708,39646 +.value 36256,35938,36388,36838,35496,35690,35116,35054 +.value 33712,33394,32820,33270,33976,34170,34620,34558 +.value 43456,43010,43588,43910,44744,44810,44364,44174 +.value 42960,42514,42068,42390,41176,41242,41820,41630 +.value 46560,46114,46692,47014,45800,45866,45420,45230 +.value 48112,47666,47220,47542,48376,48442,49020,48830 +.align 6,0x90 +Lrem_4bit: +.long 0,0,0,471859200,0,943718400,0,610271232 +.long 0,1887436800,0,1822425088,0,1220542464,0,1423966208 +.long 0,3774873600,0,4246732800,0,3644850176,0,3311403008 +.long 0,2441084928,0,2376073216,0,2847932416,0,3051356160 +.byte 71,72,65,83,72,32,102,111,114,32,120,56,54,44,32,67 +.byte 82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112 +.byte 112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62 +.byte 0 diff --git a/deps/openssl/asm_obsolete/x86-macosx-gas/rc4/rc4-586.s b/deps/openssl/asm_obsolete/x86-macosx-gas/rc4/rc4-586.s new file mode 100644 index 00000000000000..ac82e76498b3b7 --- /dev/null +++ b/deps/openssl/asm_obsolete/x86-macosx-gas/rc4/rc4-586.s @@ -0,0 +1,379 @@ +.file "rc4-586.s" +.text +.globl _RC4 +.align 4 +_RC4: +L_RC4_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 20(%esp),%edi + movl 24(%esp),%edx + movl 28(%esp),%esi + movl 32(%esp),%ebp + xorl %eax,%eax + xorl %ebx,%ebx + cmpl $0,%edx + je L000abort + movb (%edi),%al + movb 4(%edi),%bl + addl $8,%edi + leal (%esi,%edx,1),%ecx + subl %esi,%ebp + movl %ecx,24(%esp) + incb %al + cmpl $-1,256(%edi) + je L001RC4_CHAR + movl (%edi,%eax,4),%ecx + andl $-4,%edx + jz L002loop1 + movl %ebp,32(%esp) + testl $-8,%edx + jz L003go4loop4 + call L004PIC_me_up +L004PIC_me_up: + popl %ebp + movl L_OPENSSL_ia32cap_P$non_lazy_ptr-L004PIC_me_up(%ebp),%ebp + btl $26,(%ebp) + jnc L003go4loop4 + movl 32(%esp),%ebp + andl $-8,%edx + leal -8(%esi,%edx,1),%edx + movl %edx,-4(%edi) + addb %cl,%bl + movl (%edi,%ebx,4),%edx + movl %ecx,(%edi,%ebx,4) + movl %edx,(%edi,%eax,4) + incl %eax + addl %ecx,%edx + movzbl %al,%eax + movzbl %dl,%edx + movq (%esi),%mm0 + movl (%edi,%eax,4),%ecx + movd (%edi,%edx,4),%mm2 + jmp L005loop_mmx_enter +.align 4,0x90 +L006loop_mmx: + addb %cl,%bl + psllq $56,%mm1 + movl (%edi,%ebx,4),%edx + movl %ecx,(%edi,%ebx,4) + movl %edx,(%edi,%eax,4) + incl %eax + addl %ecx,%edx + movzbl %al,%eax + movzbl %dl,%edx + pxor %mm1,%mm2 + movq (%esi),%mm0 + movq %mm2,-8(%ebp,%esi,1) + movl (%edi,%eax,4),%ecx + movd (%edi,%edx,4),%mm2 +L005loop_mmx_enter: + addb %cl,%bl + movl (%edi,%ebx,4),%edx + movl %ecx,(%edi,%ebx,4) + movl %edx,(%edi,%eax,4) + incl %eax + addl %ecx,%edx + movzbl %al,%eax + movzbl %dl,%edx + pxor %mm0,%mm2 + movl (%edi,%eax,4),%ecx + movd (%edi,%edx,4),%mm1 + addb %cl,%bl + psllq $8,%mm1 + movl (%edi,%ebx,4),%edx + movl %ecx,(%edi,%ebx,4) + movl %edx,(%edi,%eax,4) + incl %eax + addl %ecx,%edx + movzbl %al,%eax + movzbl %dl,%edx + pxor %mm1,%mm2 + movl (%edi,%eax,4),%ecx + movd (%edi,%edx,4),%mm1 + addb %cl,%bl + psllq $16,%mm1 + movl (%edi,%ebx,4),%edx + movl %ecx,(%edi,%ebx,4) + movl %edx,(%edi,%eax,4) + incl %eax + addl %ecx,%edx + movzbl %al,%eax + movzbl %dl,%edx + pxor %mm1,%mm2 + movl (%edi,%eax,4),%ecx + movd (%edi,%edx,4),%mm1 + addb %cl,%bl + psllq $24,%mm1 + movl (%edi,%ebx,4),%edx + movl %ecx,(%edi,%ebx,4) + movl %edx,(%edi,%eax,4) + incl %eax + addl %ecx,%edx + movzbl %al,%eax + movzbl %dl,%edx + pxor %mm1,%mm2 + movl (%edi,%eax,4),%ecx + movd (%edi,%edx,4),%mm1 + addb %cl,%bl + psllq $32,%mm1 + movl (%edi,%ebx,4),%edx + movl %ecx,(%edi,%ebx,4) + movl %edx,(%edi,%eax,4) + incl %eax + addl %ecx,%edx + movzbl %al,%eax + movzbl %dl,%edx + pxor %mm1,%mm2 + movl (%edi,%eax,4),%ecx + movd (%edi,%edx,4),%mm1 + addb %cl,%bl + psllq $40,%mm1 + movl (%edi,%ebx,4),%edx + movl %ecx,(%edi,%ebx,4) + movl %edx,(%edi,%eax,4) + incl %eax + addl %ecx,%edx + movzbl %al,%eax + movzbl %dl,%edx + pxor %mm1,%mm2 + movl (%edi,%eax,4),%ecx + movd (%edi,%edx,4),%mm1 + addb %cl,%bl + psllq $48,%mm1 + movl (%edi,%ebx,4),%edx + movl %ecx,(%edi,%ebx,4) + movl %edx,(%edi,%eax,4) + incl %eax + addl %ecx,%edx + movzbl %al,%eax + movzbl %dl,%edx + pxor %mm1,%mm2 + movl (%edi,%eax,4),%ecx + movd (%edi,%edx,4),%mm1 + movl %ebx,%edx + xorl %ebx,%ebx + movb %dl,%bl + cmpl -4(%edi),%esi + leal 8(%esi),%esi + jb L006loop_mmx + psllq $56,%mm1 + pxor %mm1,%mm2 + movq %mm2,-8(%ebp,%esi,1) + emms + cmpl 24(%esp),%esi + je L007done + jmp L002loop1 +.align 4,0x90 +L003go4loop4: + leal -4(%esi,%edx,1),%edx + movl %edx,28(%esp) +L008loop4: + addb %cl,%bl + movl (%edi,%ebx,4),%edx + movl %ecx,(%edi,%ebx,4) + movl %edx,(%edi,%eax,4) + addl %ecx,%edx + incb %al + andl $255,%edx + movl (%edi,%eax,4),%ecx + movl (%edi,%edx,4),%ebp + addb %cl,%bl + movl (%edi,%ebx,4),%edx + movl %ecx,(%edi,%ebx,4) + movl %edx,(%edi,%eax,4) + addl %ecx,%edx + incb %al + andl $255,%edx + rorl $8,%ebp + movl (%edi,%eax,4),%ecx + orl (%edi,%edx,4),%ebp + addb %cl,%bl + movl (%edi,%ebx,4),%edx + movl %ecx,(%edi,%ebx,4) + movl %edx,(%edi,%eax,4) + addl %ecx,%edx + incb %al + andl $255,%edx + rorl $8,%ebp + movl (%edi,%eax,4),%ecx + orl (%edi,%edx,4),%ebp + addb %cl,%bl + movl (%edi,%ebx,4),%edx + movl %ecx,(%edi,%ebx,4) + movl %edx,(%edi,%eax,4) + addl %ecx,%edx + incb %al + andl $255,%edx + rorl $8,%ebp + movl 32(%esp),%ecx + orl (%edi,%edx,4),%ebp + rorl $8,%ebp + xorl (%esi),%ebp + cmpl 28(%esp),%esi + movl %ebp,(%ecx,%esi,1) + leal 4(%esi),%esi + movl (%edi,%eax,4),%ecx + jb L008loop4 + cmpl 24(%esp),%esi + je L007done + movl 32(%esp),%ebp +.align 4,0x90 +L002loop1: + addb %cl,%bl + movl (%edi,%ebx,4),%edx + movl %ecx,(%edi,%ebx,4) + movl %edx,(%edi,%eax,4) + addl %ecx,%edx + incb %al + andl $255,%edx + movl (%edi,%edx,4),%edx + xorb (%esi),%dl + leal 1(%esi),%esi + movl (%edi,%eax,4),%ecx + cmpl 24(%esp),%esi + movb %dl,-1(%ebp,%esi,1) + jb L002loop1 + jmp L007done +.align 4,0x90 +L001RC4_CHAR: + movzbl (%edi,%eax,1),%ecx +L009cloop1: + addb %cl,%bl + movzbl (%edi,%ebx,1),%edx + movb %cl,(%edi,%ebx,1) + movb %dl,(%edi,%eax,1) + addb %cl,%dl + movzbl (%edi,%edx,1),%edx + addb $1,%al + xorb (%esi),%dl + leal 1(%esi),%esi + movzbl (%edi,%eax,1),%ecx + cmpl 24(%esp),%esi + movb %dl,-1(%ebp,%esi,1) + jb L009cloop1 +L007done: + decb %al + movl %ebx,-4(%edi) + movb %al,-8(%edi) +L000abort: + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.globl _private_RC4_set_key +.align 4 +_private_RC4_set_key: +L_private_RC4_set_key_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 20(%esp),%edi + movl 24(%esp),%ebp + movl 28(%esp),%esi + call L010PIC_me_up +L010PIC_me_up: + popl %edx + movl L_OPENSSL_ia32cap_P$non_lazy_ptr-L010PIC_me_up(%edx),%edx + leal 8(%edi),%edi + leal (%esi,%ebp,1),%esi + negl %ebp + xorl %eax,%eax + movl %ebp,-4(%edi) + btl $20,(%edx) + jc L011c1stloop +.align 4,0x90 +L012w1stloop: + movl %eax,(%edi,%eax,4) + addb $1,%al + jnc L012w1stloop + xorl %ecx,%ecx + xorl %edx,%edx +.align 4,0x90 +L013w2ndloop: + movl (%edi,%ecx,4),%eax + addb (%esi,%ebp,1),%dl + addb %al,%dl + addl $1,%ebp + movl (%edi,%edx,4),%ebx + jnz L014wnowrap + movl -4(%edi),%ebp +L014wnowrap: + movl %eax,(%edi,%edx,4) + movl %ebx,(%edi,%ecx,4) + addb $1,%cl + jnc L013w2ndloop + jmp L015exit +.align 4,0x90 +L011c1stloop: + movb %al,(%edi,%eax,1) + addb $1,%al + jnc L011c1stloop + xorl %ecx,%ecx + xorl %edx,%edx + xorl %ebx,%ebx +.align 4,0x90 +L016c2ndloop: + movb (%edi,%ecx,1),%al + addb (%esi,%ebp,1),%dl + addb %al,%dl + addl $1,%ebp + movb (%edi,%edx,1),%bl + jnz L017cnowrap + movl -4(%edi),%ebp +L017cnowrap: + movb %al,(%edi,%edx,1) + movb %bl,(%edi,%ecx,1) + addb $1,%cl + jnc L016c2ndloop + movl $-1,256(%edi) +L015exit: + xorl %eax,%eax + movl %eax,-8(%edi) + movl %eax,-4(%edi) + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.globl _RC4_options +.align 4 +_RC4_options: +L_RC4_options_begin: + call L018pic_point +L018pic_point: + popl %eax + leal L019opts-L018pic_point(%eax),%eax + call L020PIC_me_up +L020PIC_me_up: + popl %edx + movl L_OPENSSL_ia32cap_P$non_lazy_ptr-L020PIC_me_up(%edx),%edx + movl (%edx),%edx + btl $20,%edx + jc L0211xchar + btl $26,%edx + jnc L022ret + addl $25,%eax + ret +L0211xchar: + addl $12,%eax +L022ret: + ret +.align 6,0x90 +L019opts: +.byte 114,99,52,40,52,120,44,105,110,116,41,0 +.byte 114,99,52,40,49,120,44,99,104,97,114,41,0 +.byte 114,99,52,40,56,120,44,109,109,120,41,0 +.byte 82,67,52,32,102,111,114,32,120,56,54,44,32,67,82,89 +.byte 80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114 +.byte 111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 +.align 6,0x90 +.section __IMPORT,__pointers,non_lazy_symbol_pointers +L_OPENSSL_ia32cap_P$non_lazy_ptr: +.indirect_symbol _OPENSSL_ia32cap_P +.long 0 +.comm _OPENSSL_ia32cap_P,16,2 diff --git a/deps/openssl/asm_obsolete/x86-macosx-gas/ripemd/rmd-586.s b/deps/openssl/asm_obsolete/x86-macosx-gas/ripemd/rmd-586.s new file mode 100644 index 00000000000000..7323b2de7305a3 --- /dev/null +++ b/deps/openssl/asm_obsolete/x86-macosx-gas/ripemd/rmd-586.s @@ -0,0 +1,1963 @@ +.file "../openssl/crypto/ripemd/asm/rmd-586.s" +.text +.globl _ripemd160_block_asm_data_order +.align 4 +_ripemd160_block_asm_data_order: +L_ripemd160_block_asm_data_order_begin: + movl 4(%esp),%edx + movl 8(%esp),%eax + pushl %esi + movl (%edx),%ecx + pushl %edi + movl 4(%edx),%esi + pushl %ebp + movl 8(%edx),%edi + pushl %ebx + subl $108,%esp +L000start: + + movl (%eax),%ebx + movl 4(%eax),%ebp + movl %ebx,(%esp) + movl %ebp,4(%esp) + movl 8(%eax),%ebx + movl 12(%eax),%ebp + movl %ebx,8(%esp) + movl %ebp,12(%esp) + movl 16(%eax),%ebx + movl 20(%eax),%ebp + movl %ebx,16(%esp) + movl %ebp,20(%esp) + movl 24(%eax),%ebx + movl 28(%eax),%ebp + movl %ebx,24(%esp) + movl %ebp,28(%esp) + movl 32(%eax),%ebx + movl 36(%eax),%ebp + movl %ebx,32(%esp) + movl %ebp,36(%esp) + movl 40(%eax),%ebx + movl 44(%eax),%ebp + movl %ebx,40(%esp) + movl %ebp,44(%esp) + movl 48(%eax),%ebx + movl 52(%eax),%ebp + movl %ebx,48(%esp) + movl %ebp,52(%esp) + movl 56(%eax),%ebx + movl 60(%eax),%ebp + movl %ebx,56(%esp) + movl %ebp,60(%esp) + movl %edi,%eax + movl 12(%edx),%ebx + movl 16(%edx),%ebp + # 0 + xorl %ebx,%eax + movl (%esp),%edx + xorl %esi,%eax + addl %edx,%ecx + roll $10,%edi + addl %eax,%ecx + movl %esi,%eax + roll $11,%ecx + addl %ebp,%ecx + # 1 + xorl %edi,%eax + movl 4(%esp),%edx + xorl %ecx,%eax + addl %eax,%ebp + movl %ecx,%eax + roll $10,%esi + addl %edx,%ebp + xorl %esi,%eax + roll $14,%ebp + addl %ebx,%ebp + # 2 + movl 8(%esp),%edx + xorl %ebp,%eax + addl %edx,%ebx + roll $10,%ecx + addl %eax,%ebx + movl %ebp,%eax + roll $15,%ebx + addl %edi,%ebx + # 3 + xorl %ecx,%eax + movl 12(%esp),%edx + xorl %ebx,%eax + addl %eax,%edi + movl %ebx,%eax + roll $10,%ebp + addl %edx,%edi + xorl %ebp,%eax + roll $12,%edi + addl %esi,%edi + # 4 + movl 16(%esp),%edx + xorl %edi,%eax + addl %edx,%esi + roll $10,%ebx + addl %eax,%esi + movl %edi,%eax + roll $5,%esi + addl %ecx,%esi + # 5 + xorl %ebx,%eax + movl 20(%esp),%edx + xorl %esi,%eax + addl %eax,%ecx + movl %esi,%eax + roll $10,%edi + addl %edx,%ecx + xorl %edi,%eax + roll $8,%ecx + addl %ebp,%ecx + # 6 + movl 24(%esp),%edx + xorl %ecx,%eax + addl %edx,%ebp + roll $10,%esi + addl %eax,%ebp + movl %ecx,%eax + roll $7,%ebp + addl %ebx,%ebp + # 7 + xorl %esi,%eax + movl 28(%esp),%edx + xorl %ebp,%eax + addl %eax,%ebx + movl %ebp,%eax + roll $10,%ecx + addl %edx,%ebx + xorl %ecx,%eax + roll $9,%ebx + addl %edi,%ebx + # 8 + movl 32(%esp),%edx + xorl %ebx,%eax + addl %edx,%edi + roll $10,%ebp + addl %eax,%edi + movl %ebx,%eax + roll $11,%edi + addl %esi,%edi + # 9 + xorl %ebp,%eax + movl 36(%esp),%edx + xorl %edi,%eax + addl %eax,%esi + movl %edi,%eax + roll $10,%ebx + addl %edx,%esi + xorl %ebx,%eax + roll $13,%esi + addl %ecx,%esi + # 10 + movl 40(%esp),%edx + xorl %esi,%eax + addl %edx,%ecx + roll $10,%edi + addl %eax,%ecx + movl %esi,%eax + roll $14,%ecx + addl %ebp,%ecx + # 11 + xorl %edi,%eax + movl 44(%esp),%edx + xorl %ecx,%eax + addl %eax,%ebp + movl %ecx,%eax + roll $10,%esi + addl %edx,%ebp + xorl %esi,%eax + roll $15,%ebp + addl %ebx,%ebp + # 12 + movl 48(%esp),%edx + xorl %ebp,%eax + addl %edx,%ebx + roll $10,%ecx + addl %eax,%ebx + movl %ebp,%eax + roll $6,%ebx + addl %edi,%ebx + # 13 + xorl %ecx,%eax + movl 52(%esp),%edx + xorl %ebx,%eax + addl %eax,%edi + movl %ebx,%eax + roll $10,%ebp + addl %edx,%edi + xorl %ebp,%eax + roll $7,%edi + addl %esi,%edi + # 14 + movl 56(%esp),%edx + xorl %edi,%eax + addl %edx,%esi + roll $10,%ebx + addl %eax,%esi + movl %edi,%eax + roll $9,%esi + addl %ecx,%esi + # 15 + xorl %ebx,%eax + movl 60(%esp),%edx + xorl %esi,%eax + addl %eax,%ecx + movl $-1,%eax + roll $10,%edi + addl %edx,%ecx + movl 28(%esp),%edx + roll $8,%ecx + addl %ebp,%ecx + # 16 + addl %edx,%ebp + movl %esi,%edx + subl %ecx,%eax + andl %ecx,%edx + andl %edi,%eax + orl %eax,%edx + movl 16(%esp),%eax + roll $10,%esi + leal 1518500249(%ebp,%edx,1),%ebp + movl $-1,%edx + roll $7,%ebp + addl %ebx,%ebp + # 17 + addl %eax,%ebx + movl %ecx,%eax + subl %ebp,%edx + andl %ebp,%eax + andl %esi,%edx + orl %edx,%eax + movl 52(%esp),%edx + roll $10,%ecx + leal 1518500249(%ebx,%eax,1),%ebx + movl $-1,%eax + roll $6,%ebx + addl %edi,%ebx + # 18 + addl %edx,%edi + movl %ebp,%edx + subl %ebx,%eax + andl %ebx,%edx + andl %ecx,%eax + orl %eax,%edx + movl 4(%esp),%eax + roll $10,%ebp + leal 1518500249(%edi,%edx,1),%edi + movl $-1,%edx + roll $8,%edi + addl %esi,%edi + # 19 + addl %eax,%esi + movl %ebx,%eax + subl %edi,%edx + andl %edi,%eax + andl %ebp,%edx + orl %edx,%eax + movl 40(%esp),%edx + roll $10,%ebx + leal 1518500249(%esi,%eax,1),%esi + movl $-1,%eax + roll $13,%esi + addl %ecx,%esi + # 20 + addl %edx,%ecx + movl %edi,%edx + subl %esi,%eax + andl %esi,%edx + andl %ebx,%eax + orl %eax,%edx + movl 24(%esp),%eax + roll $10,%edi + leal 1518500249(%ecx,%edx,1),%ecx + movl $-1,%edx + roll $11,%ecx + addl %ebp,%ecx + # 21 + addl %eax,%ebp + movl %esi,%eax + subl %ecx,%edx + andl %ecx,%eax + andl %edi,%edx + orl %edx,%eax + movl 60(%esp),%edx + roll $10,%esi + leal 1518500249(%ebp,%eax,1),%ebp + movl $-1,%eax + roll $9,%ebp + addl %ebx,%ebp + # 22 + addl %edx,%ebx + movl %ecx,%edx + subl %ebp,%eax + andl %ebp,%edx + andl %esi,%eax + orl %eax,%edx + movl 12(%esp),%eax + roll $10,%ecx + leal 1518500249(%ebx,%edx,1),%ebx + movl $-1,%edx + roll $7,%ebx + addl %edi,%ebx + # 23 + addl %eax,%edi + movl %ebp,%eax + subl %ebx,%edx + andl %ebx,%eax + andl %ecx,%edx + orl %edx,%eax + movl 48(%esp),%edx + roll $10,%ebp + leal 1518500249(%edi,%eax,1),%edi + movl $-1,%eax + roll $15,%edi + addl %esi,%edi + # 24 + addl %edx,%esi + movl %ebx,%edx + subl %edi,%eax + andl %edi,%edx + andl %ebp,%eax + orl %eax,%edx + movl (%esp),%eax + roll $10,%ebx + leal 1518500249(%esi,%edx,1),%esi + movl $-1,%edx + roll $7,%esi + addl %ecx,%esi + # 25 + addl %eax,%ecx + movl %edi,%eax + subl %esi,%edx + andl %esi,%eax + andl %ebx,%edx + orl %edx,%eax + movl 36(%esp),%edx + roll $10,%edi + leal 1518500249(%ecx,%eax,1),%ecx + movl $-1,%eax + roll $12,%ecx + addl %ebp,%ecx + # 26 + addl %edx,%ebp + movl %esi,%edx + subl %ecx,%eax + andl %ecx,%edx + andl %edi,%eax + orl %eax,%edx + movl 20(%esp),%eax + roll $10,%esi + leal 1518500249(%ebp,%edx,1),%ebp + movl $-1,%edx + roll $15,%ebp + addl %ebx,%ebp + # 27 + addl %eax,%ebx + movl %ecx,%eax + subl %ebp,%edx + andl %ebp,%eax + andl %esi,%edx + orl %edx,%eax + movl 8(%esp),%edx + roll $10,%ecx + leal 1518500249(%ebx,%eax,1),%ebx + movl $-1,%eax + roll $9,%ebx + addl %edi,%ebx + # 28 + addl %edx,%edi + movl %ebp,%edx + subl %ebx,%eax + andl %ebx,%edx + andl %ecx,%eax + orl %eax,%edx + movl 56(%esp),%eax + roll $10,%ebp + leal 1518500249(%edi,%edx,1),%edi + movl $-1,%edx + roll $11,%edi + addl %esi,%edi + # 29 + addl %eax,%esi + movl %ebx,%eax + subl %edi,%edx + andl %edi,%eax + andl %ebp,%edx + orl %edx,%eax + movl 44(%esp),%edx + roll $10,%ebx + leal 1518500249(%esi,%eax,1),%esi + movl $-1,%eax + roll $7,%esi + addl %ecx,%esi + # 30 + addl %edx,%ecx + movl %edi,%edx + subl %esi,%eax + andl %esi,%edx + andl %ebx,%eax + orl %eax,%edx + movl 32(%esp),%eax + roll $10,%edi + leal 1518500249(%ecx,%edx,1),%ecx + movl $-1,%edx + roll $13,%ecx + addl %ebp,%ecx + # 31 + addl %eax,%ebp + movl %esi,%eax + subl %ecx,%edx + andl %ecx,%eax + andl %edi,%edx + orl %edx,%eax + movl $-1,%edx + roll $10,%esi + leal 1518500249(%ebp,%eax,1),%ebp + subl %ecx,%edx + roll $12,%ebp + addl %ebx,%ebp + # 32 + movl 12(%esp),%eax + orl %ebp,%edx + addl %eax,%ebx + xorl %esi,%edx + movl $-1,%eax + roll $10,%ecx + leal 1859775393(%ebx,%edx,1),%ebx + subl %ebp,%eax + roll $11,%ebx + addl %edi,%ebx + # 33 + movl 40(%esp),%edx + orl %ebx,%eax + addl %edx,%edi + xorl %ecx,%eax + movl $-1,%edx + roll $10,%ebp + leal 1859775393(%edi,%eax,1),%edi + subl %ebx,%edx + roll $13,%edi + addl %esi,%edi + # 34 + movl 56(%esp),%eax + orl %edi,%edx + addl %eax,%esi + xorl %ebp,%edx + movl $-1,%eax + roll $10,%ebx + leal 1859775393(%esi,%edx,1),%esi + subl %edi,%eax + roll $6,%esi + addl %ecx,%esi + # 35 + movl 16(%esp),%edx + orl %esi,%eax + addl %edx,%ecx + xorl %ebx,%eax + movl $-1,%edx + roll $10,%edi + leal 1859775393(%ecx,%eax,1),%ecx + subl %esi,%edx + roll $7,%ecx + addl %ebp,%ecx + # 36 + movl 36(%esp),%eax + orl %ecx,%edx + addl %eax,%ebp + xorl %edi,%edx + movl $-1,%eax + roll $10,%esi + leal 1859775393(%ebp,%edx,1),%ebp + subl %ecx,%eax + roll $14,%ebp + addl %ebx,%ebp + # 37 + movl 60(%esp),%edx + orl %ebp,%eax + addl %edx,%ebx + xorl %esi,%eax + movl $-1,%edx + roll $10,%ecx + leal 1859775393(%ebx,%eax,1),%ebx + subl %ebp,%edx + roll $9,%ebx + addl %edi,%ebx + # 38 + movl 32(%esp),%eax + orl %ebx,%edx + addl %eax,%edi + xorl %ecx,%edx + movl $-1,%eax + roll $10,%ebp + leal 1859775393(%edi,%edx,1),%edi + subl %ebx,%eax + roll $13,%edi + addl %esi,%edi + # 39 + movl 4(%esp),%edx + orl %edi,%eax + addl %edx,%esi + xorl %ebp,%eax + movl $-1,%edx + roll $10,%ebx + leal 1859775393(%esi,%eax,1),%esi + subl %edi,%edx + roll $15,%esi + addl %ecx,%esi + # 40 + movl 8(%esp),%eax + orl %esi,%edx + addl %eax,%ecx + xorl %ebx,%edx + movl $-1,%eax + roll $10,%edi + leal 1859775393(%ecx,%edx,1),%ecx + subl %esi,%eax + roll $14,%ecx + addl %ebp,%ecx + # 41 + movl 28(%esp),%edx + orl %ecx,%eax + addl %edx,%ebp + xorl %edi,%eax + movl $-1,%edx + roll $10,%esi + leal 1859775393(%ebp,%eax,1),%ebp + subl %ecx,%edx + roll $8,%ebp + addl %ebx,%ebp + # 42 + movl (%esp),%eax + orl %ebp,%edx + addl %eax,%ebx + xorl %esi,%edx + movl $-1,%eax + roll $10,%ecx + leal 1859775393(%ebx,%edx,1),%ebx + subl %ebp,%eax + roll $13,%ebx + addl %edi,%ebx + # 43 + movl 24(%esp),%edx + orl %ebx,%eax + addl %edx,%edi + xorl %ecx,%eax + movl $-1,%edx + roll $10,%ebp + leal 1859775393(%edi,%eax,1),%edi + subl %ebx,%edx + roll $6,%edi + addl %esi,%edi + # 44 + movl 52(%esp),%eax + orl %edi,%edx + addl %eax,%esi + xorl %ebp,%edx + movl $-1,%eax + roll $10,%ebx + leal 1859775393(%esi,%edx,1),%esi + subl %edi,%eax + roll $5,%esi + addl %ecx,%esi + # 45 + movl 44(%esp),%edx + orl %esi,%eax + addl %edx,%ecx + xorl %ebx,%eax + movl $-1,%edx + roll $10,%edi + leal 1859775393(%ecx,%eax,1),%ecx + subl %esi,%edx + roll $12,%ecx + addl %ebp,%ecx + # 46 + movl 20(%esp),%eax + orl %ecx,%edx + addl %eax,%ebp + xorl %edi,%edx + movl $-1,%eax + roll $10,%esi + leal 1859775393(%ebp,%edx,1),%ebp + subl %ecx,%eax + roll $7,%ebp + addl %ebx,%ebp + # 47 + movl 48(%esp),%edx + orl %ebp,%eax + addl %edx,%ebx + xorl %esi,%eax + movl $-1,%edx + roll $10,%ecx + leal 1859775393(%ebx,%eax,1),%ebx + movl %ecx,%eax + roll $5,%ebx + addl %edi,%ebx + # 48 + subl %ecx,%edx + andl %ebx,%eax + andl %ebp,%edx + orl %eax,%edx + movl 4(%esp),%eax + roll $10,%ebp + leal 2400959708(%edi,%edx,1),%edi + movl $-1,%edx + addl %eax,%edi + movl %ebp,%eax + roll $11,%edi + addl %esi,%edi + # 49 + subl %ebp,%edx + andl %edi,%eax + andl %ebx,%edx + orl %eax,%edx + movl 36(%esp),%eax + roll $10,%ebx + leal 2400959708(%esi,%edx,1),%esi + movl $-1,%edx + addl %eax,%esi + movl %ebx,%eax + roll $12,%esi + addl %ecx,%esi + # 50 + subl %ebx,%edx + andl %esi,%eax + andl %edi,%edx + orl %eax,%edx + movl 44(%esp),%eax + roll $10,%edi + leal 2400959708(%ecx,%edx,1),%ecx + movl $-1,%edx + addl %eax,%ecx + movl %edi,%eax + roll $14,%ecx + addl %ebp,%ecx + # 51 + subl %edi,%edx + andl %ecx,%eax + andl %esi,%edx + orl %eax,%edx + movl 40(%esp),%eax + roll $10,%esi + leal 2400959708(%ebp,%edx,1),%ebp + movl $-1,%edx + addl %eax,%ebp + movl %esi,%eax + roll $15,%ebp + addl %ebx,%ebp + # 52 + subl %esi,%edx + andl %ebp,%eax + andl %ecx,%edx + orl %eax,%edx + movl (%esp),%eax + roll $10,%ecx + leal 2400959708(%ebx,%edx,1),%ebx + movl $-1,%edx + addl %eax,%ebx + movl %ecx,%eax + roll $14,%ebx + addl %edi,%ebx + # 53 + subl %ecx,%edx + andl %ebx,%eax + andl %ebp,%edx + orl %eax,%edx + movl 32(%esp),%eax + roll $10,%ebp + leal 2400959708(%edi,%edx,1),%edi + movl $-1,%edx + addl %eax,%edi + movl %ebp,%eax + roll $15,%edi + addl %esi,%edi + # 54 + subl %ebp,%edx + andl %edi,%eax + andl %ebx,%edx + orl %eax,%edx + movl 48(%esp),%eax + roll $10,%ebx + leal 2400959708(%esi,%edx,1),%esi + movl $-1,%edx + addl %eax,%esi + movl %ebx,%eax + roll $9,%esi + addl %ecx,%esi + # 55 + subl %ebx,%edx + andl %esi,%eax + andl %edi,%edx + orl %eax,%edx + movl 16(%esp),%eax + roll $10,%edi + leal 2400959708(%ecx,%edx,1),%ecx + movl $-1,%edx + addl %eax,%ecx + movl %edi,%eax + roll $8,%ecx + addl %ebp,%ecx + # 56 + subl %edi,%edx + andl %ecx,%eax + andl %esi,%edx + orl %eax,%edx + movl 52(%esp),%eax + roll $10,%esi + leal 2400959708(%ebp,%edx,1),%ebp + movl $-1,%edx + addl %eax,%ebp + movl %esi,%eax + roll $9,%ebp + addl %ebx,%ebp + # 57 + subl %esi,%edx + andl %ebp,%eax + andl %ecx,%edx + orl %eax,%edx + movl 12(%esp),%eax + roll $10,%ecx + leal 2400959708(%ebx,%edx,1),%ebx + movl $-1,%edx + addl %eax,%ebx + movl %ecx,%eax + roll $14,%ebx + addl %edi,%ebx + # 58 + subl %ecx,%edx + andl %ebx,%eax + andl %ebp,%edx + orl %eax,%edx + movl 28(%esp),%eax + roll $10,%ebp + leal 2400959708(%edi,%edx,1),%edi + movl $-1,%edx + addl %eax,%edi + movl %ebp,%eax + roll $5,%edi + addl %esi,%edi + # 59 + subl %ebp,%edx + andl %edi,%eax + andl %ebx,%edx + orl %eax,%edx + movl 60(%esp),%eax + roll $10,%ebx + leal 2400959708(%esi,%edx,1),%esi + movl $-1,%edx + addl %eax,%esi + movl %ebx,%eax + roll $6,%esi + addl %ecx,%esi + # 60 + subl %ebx,%edx + andl %esi,%eax + andl %edi,%edx + orl %eax,%edx + movl 56(%esp),%eax + roll $10,%edi + leal 2400959708(%ecx,%edx,1),%ecx + movl $-1,%edx + addl %eax,%ecx + movl %edi,%eax + roll $8,%ecx + addl %ebp,%ecx + # 61 + subl %edi,%edx + andl %ecx,%eax + andl %esi,%edx + orl %eax,%edx + movl 20(%esp),%eax + roll $10,%esi + leal 2400959708(%ebp,%edx,1),%ebp + movl $-1,%edx + addl %eax,%ebp + movl %esi,%eax + roll $6,%ebp + addl %ebx,%ebp + # 62 + subl %esi,%edx + andl %ebp,%eax + andl %ecx,%edx + orl %eax,%edx + movl 24(%esp),%eax + roll $10,%ecx + leal 2400959708(%ebx,%edx,1),%ebx + movl $-1,%edx + addl %eax,%ebx + movl %ecx,%eax + roll $5,%ebx + addl %edi,%ebx + # 63 + subl %ecx,%edx + andl %ebx,%eax + andl %ebp,%edx + orl %eax,%edx + movl 8(%esp),%eax + roll $10,%ebp + leal 2400959708(%edi,%edx,1),%edi + movl $-1,%edx + addl %eax,%edi + subl %ebp,%edx + roll $12,%edi + addl %esi,%edi + # 64 + movl 16(%esp),%eax + orl %ebx,%edx + addl %eax,%esi + xorl %edi,%edx + movl $-1,%eax + roll $10,%ebx + leal 2840853838(%esi,%edx,1),%esi + subl %ebx,%eax + roll $9,%esi + addl %ecx,%esi + # 65 + movl (%esp),%edx + orl %edi,%eax + addl %edx,%ecx + xorl %esi,%eax + movl $-1,%edx + roll $10,%edi + leal 2840853838(%ecx,%eax,1),%ecx + subl %edi,%edx + roll $15,%ecx + addl %ebp,%ecx + # 66 + movl 20(%esp),%eax + orl %esi,%edx + addl %eax,%ebp + xorl %ecx,%edx + movl $-1,%eax + roll $10,%esi + leal 2840853838(%ebp,%edx,1),%ebp + subl %esi,%eax + roll $5,%ebp + addl %ebx,%ebp + # 67 + movl 36(%esp),%edx + orl %ecx,%eax + addl %edx,%ebx + xorl %ebp,%eax + movl $-1,%edx + roll $10,%ecx + leal 2840853838(%ebx,%eax,1),%ebx + subl %ecx,%edx + roll $11,%ebx + addl %edi,%ebx + # 68 + movl 28(%esp),%eax + orl %ebp,%edx + addl %eax,%edi + xorl %ebx,%edx + movl $-1,%eax + roll $10,%ebp + leal 2840853838(%edi,%edx,1),%edi + subl %ebp,%eax + roll $6,%edi + addl %esi,%edi + # 69 + movl 48(%esp),%edx + orl %ebx,%eax + addl %edx,%esi + xorl %edi,%eax + movl $-1,%edx + roll $10,%ebx + leal 2840853838(%esi,%eax,1),%esi + subl %ebx,%edx + roll $8,%esi + addl %ecx,%esi + # 70 + movl 8(%esp),%eax + orl %edi,%edx + addl %eax,%ecx + xorl %esi,%edx + movl $-1,%eax + roll $10,%edi + leal 2840853838(%ecx,%edx,1),%ecx + subl %edi,%eax + roll $13,%ecx + addl %ebp,%ecx + # 71 + movl 40(%esp),%edx + orl %esi,%eax + addl %edx,%ebp + xorl %ecx,%eax + movl $-1,%edx + roll $10,%esi + leal 2840853838(%ebp,%eax,1),%ebp + subl %esi,%edx + roll $12,%ebp + addl %ebx,%ebp + # 72 + movl 56(%esp),%eax + orl %ecx,%edx + addl %eax,%ebx + xorl %ebp,%edx + movl $-1,%eax + roll $10,%ecx + leal 2840853838(%ebx,%edx,1),%ebx + subl %ecx,%eax + roll $5,%ebx + addl %edi,%ebx + # 73 + movl 4(%esp),%edx + orl %ebp,%eax + addl %edx,%edi + xorl %ebx,%eax + movl $-1,%edx + roll $10,%ebp + leal 2840853838(%edi,%eax,1),%edi + subl %ebp,%edx + roll $12,%edi + addl %esi,%edi + # 74 + movl 12(%esp),%eax + orl %ebx,%edx + addl %eax,%esi + xorl %edi,%edx + movl $-1,%eax + roll $10,%ebx + leal 2840853838(%esi,%edx,1),%esi + subl %ebx,%eax + roll $13,%esi + addl %ecx,%esi + # 75 + movl 32(%esp),%edx + orl %edi,%eax + addl %edx,%ecx + xorl %esi,%eax + movl $-1,%edx + roll $10,%edi + leal 2840853838(%ecx,%eax,1),%ecx + subl %edi,%edx + roll $14,%ecx + addl %ebp,%ecx + # 76 + movl 44(%esp),%eax + orl %esi,%edx + addl %eax,%ebp + xorl %ecx,%edx + movl $-1,%eax + roll $10,%esi + leal 2840853838(%ebp,%edx,1),%ebp + subl %esi,%eax + roll $11,%ebp + addl %ebx,%ebp + # 77 + movl 24(%esp),%edx + orl %ecx,%eax + addl %edx,%ebx + xorl %ebp,%eax + movl $-1,%edx + roll $10,%ecx + leal 2840853838(%ebx,%eax,1),%ebx + subl %ecx,%edx + roll $8,%ebx + addl %edi,%ebx + # 78 + movl 60(%esp),%eax + orl %ebp,%edx + addl %eax,%edi + xorl %ebx,%edx + movl $-1,%eax + roll $10,%ebp + leal 2840853838(%edi,%edx,1),%edi + subl %ebp,%eax + roll $5,%edi + addl %esi,%edi + # 79 + movl 52(%esp),%edx + orl %ebx,%eax + addl %edx,%esi + xorl %edi,%eax + movl 128(%esp),%edx + roll $10,%ebx + leal 2840853838(%esi,%eax,1),%esi + movl %ecx,64(%esp) + roll $6,%esi + addl %ecx,%esi + movl (%edx),%ecx + movl %esi,68(%esp) + movl %edi,72(%esp) + movl 4(%edx),%esi + movl %ebx,76(%esp) + movl 8(%edx),%edi + movl %ebp,80(%esp) + movl 12(%edx),%ebx + movl 16(%edx),%ebp + # 80 + movl $-1,%edx + subl %ebx,%edx + movl 20(%esp),%eax + orl %edi,%edx + addl %eax,%ecx + xorl %esi,%edx + movl $-1,%eax + roll $10,%edi + leal 1352829926(%ecx,%edx,1),%ecx + subl %edi,%eax + roll $8,%ecx + addl %ebp,%ecx + # 81 + movl 56(%esp),%edx + orl %esi,%eax + addl %edx,%ebp + xorl %ecx,%eax + movl $-1,%edx + roll $10,%esi + leal 1352829926(%ebp,%eax,1),%ebp + subl %esi,%edx + roll $9,%ebp + addl %ebx,%ebp + # 82 + movl 28(%esp),%eax + orl %ecx,%edx + addl %eax,%ebx + xorl %ebp,%edx + movl $-1,%eax + roll $10,%ecx + leal 1352829926(%ebx,%edx,1),%ebx + subl %ecx,%eax + roll $9,%ebx + addl %edi,%ebx + # 83 + movl (%esp),%edx + orl %ebp,%eax + addl %edx,%edi + xorl %ebx,%eax + movl $-1,%edx + roll $10,%ebp + leal 1352829926(%edi,%eax,1),%edi + subl %ebp,%edx + roll $11,%edi + addl %esi,%edi + # 84 + movl 36(%esp),%eax + orl %ebx,%edx + addl %eax,%esi + xorl %edi,%edx + movl $-1,%eax + roll $10,%ebx + leal 1352829926(%esi,%edx,1),%esi + subl %ebx,%eax + roll $13,%esi + addl %ecx,%esi + # 85 + movl 8(%esp),%edx + orl %edi,%eax + addl %edx,%ecx + xorl %esi,%eax + movl $-1,%edx + roll $10,%edi + leal 1352829926(%ecx,%eax,1),%ecx + subl %edi,%edx + roll $15,%ecx + addl %ebp,%ecx + # 86 + movl 44(%esp),%eax + orl %esi,%edx + addl %eax,%ebp + xorl %ecx,%edx + movl $-1,%eax + roll $10,%esi + leal 1352829926(%ebp,%edx,1),%ebp + subl %esi,%eax + roll $15,%ebp + addl %ebx,%ebp + # 87 + movl 16(%esp),%edx + orl %ecx,%eax + addl %edx,%ebx + xorl %ebp,%eax + movl $-1,%edx + roll $10,%ecx + leal 1352829926(%ebx,%eax,1),%ebx + subl %ecx,%edx + roll $5,%ebx + addl %edi,%ebx + # 88 + movl 52(%esp),%eax + orl %ebp,%edx + addl %eax,%edi + xorl %ebx,%edx + movl $-1,%eax + roll $10,%ebp + leal 1352829926(%edi,%edx,1),%edi + subl %ebp,%eax + roll $7,%edi + addl %esi,%edi + # 89 + movl 24(%esp),%edx + orl %ebx,%eax + addl %edx,%esi + xorl %edi,%eax + movl $-1,%edx + roll $10,%ebx + leal 1352829926(%esi,%eax,1),%esi + subl %ebx,%edx + roll $7,%esi + addl %ecx,%esi + # 90 + movl 60(%esp),%eax + orl %edi,%edx + addl %eax,%ecx + xorl %esi,%edx + movl $-1,%eax + roll $10,%edi + leal 1352829926(%ecx,%edx,1),%ecx + subl %edi,%eax + roll $8,%ecx + addl %ebp,%ecx + # 91 + movl 32(%esp),%edx + orl %esi,%eax + addl %edx,%ebp + xorl %ecx,%eax + movl $-1,%edx + roll $10,%esi + leal 1352829926(%ebp,%eax,1),%ebp + subl %esi,%edx + roll $11,%ebp + addl %ebx,%ebp + # 92 + movl 4(%esp),%eax + orl %ecx,%edx + addl %eax,%ebx + xorl %ebp,%edx + movl $-1,%eax + roll $10,%ecx + leal 1352829926(%ebx,%edx,1),%ebx + subl %ecx,%eax + roll $14,%ebx + addl %edi,%ebx + # 93 + movl 40(%esp),%edx + orl %ebp,%eax + addl %edx,%edi + xorl %ebx,%eax + movl $-1,%edx + roll $10,%ebp + leal 1352829926(%edi,%eax,1),%edi + subl %ebp,%edx + roll $14,%edi + addl %esi,%edi + # 94 + movl 12(%esp),%eax + orl %ebx,%edx + addl %eax,%esi + xorl %edi,%edx + movl $-1,%eax + roll $10,%ebx + leal 1352829926(%esi,%edx,1),%esi + subl %ebx,%eax + roll $12,%esi + addl %ecx,%esi + # 95 + movl 48(%esp),%edx + orl %edi,%eax + addl %edx,%ecx + xorl %esi,%eax + movl $-1,%edx + roll $10,%edi + leal 1352829926(%ecx,%eax,1),%ecx + movl %edi,%eax + roll $6,%ecx + addl %ebp,%ecx + # 96 + subl %edi,%edx + andl %ecx,%eax + andl %esi,%edx + orl %eax,%edx + movl 24(%esp),%eax + roll $10,%esi + leal 1548603684(%ebp,%edx,1),%ebp + movl $-1,%edx + addl %eax,%ebp + movl %esi,%eax + roll $9,%ebp + addl %ebx,%ebp + # 97 + subl %esi,%edx + andl %ebp,%eax + andl %ecx,%edx + orl %eax,%edx + movl 44(%esp),%eax + roll $10,%ecx + leal 1548603684(%ebx,%edx,1),%ebx + movl $-1,%edx + addl %eax,%ebx + movl %ecx,%eax + roll $13,%ebx + addl %edi,%ebx + # 98 + subl %ecx,%edx + andl %ebx,%eax + andl %ebp,%edx + orl %eax,%edx + movl 12(%esp),%eax + roll $10,%ebp + leal 1548603684(%edi,%edx,1),%edi + movl $-1,%edx + addl %eax,%edi + movl %ebp,%eax + roll $15,%edi + addl %esi,%edi + # 99 + subl %ebp,%edx + andl %edi,%eax + andl %ebx,%edx + orl %eax,%edx + movl 28(%esp),%eax + roll $10,%ebx + leal 1548603684(%esi,%edx,1),%esi + movl $-1,%edx + addl %eax,%esi + movl %ebx,%eax + roll $7,%esi + addl %ecx,%esi + # 100 + subl %ebx,%edx + andl %esi,%eax + andl %edi,%edx + orl %eax,%edx + movl (%esp),%eax + roll $10,%edi + leal 1548603684(%ecx,%edx,1),%ecx + movl $-1,%edx + addl %eax,%ecx + movl %edi,%eax + roll $12,%ecx + addl %ebp,%ecx + # 101 + subl %edi,%edx + andl %ecx,%eax + andl %esi,%edx + orl %eax,%edx + movl 52(%esp),%eax + roll $10,%esi + leal 1548603684(%ebp,%edx,1),%ebp + movl $-1,%edx + addl %eax,%ebp + movl %esi,%eax + roll $8,%ebp + addl %ebx,%ebp + # 102 + subl %esi,%edx + andl %ebp,%eax + andl %ecx,%edx + orl %eax,%edx + movl 20(%esp),%eax + roll $10,%ecx + leal 1548603684(%ebx,%edx,1),%ebx + movl $-1,%edx + addl %eax,%ebx + movl %ecx,%eax + roll $9,%ebx + addl %edi,%ebx + # 103 + subl %ecx,%edx + andl %ebx,%eax + andl %ebp,%edx + orl %eax,%edx + movl 40(%esp),%eax + roll $10,%ebp + leal 1548603684(%edi,%edx,1),%edi + movl $-1,%edx + addl %eax,%edi + movl %ebp,%eax + roll $11,%edi + addl %esi,%edi + # 104 + subl %ebp,%edx + andl %edi,%eax + andl %ebx,%edx + orl %eax,%edx + movl 56(%esp),%eax + roll $10,%ebx + leal 1548603684(%esi,%edx,1),%esi + movl $-1,%edx + addl %eax,%esi + movl %ebx,%eax + roll $7,%esi + addl %ecx,%esi + # 105 + subl %ebx,%edx + andl %esi,%eax + andl %edi,%edx + orl %eax,%edx + movl 60(%esp),%eax + roll $10,%edi + leal 1548603684(%ecx,%edx,1),%ecx + movl $-1,%edx + addl %eax,%ecx + movl %edi,%eax + roll $7,%ecx + addl %ebp,%ecx + # 106 + subl %edi,%edx + andl %ecx,%eax + andl %esi,%edx + orl %eax,%edx + movl 32(%esp),%eax + roll $10,%esi + leal 1548603684(%ebp,%edx,1),%ebp + movl $-1,%edx + addl %eax,%ebp + movl %esi,%eax + roll $12,%ebp + addl %ebx,%ebp + # 107 + subl %esi,%edx + andl %ebp,%eax + andl %ecx,%edx + orl %eax,%edx + movl 48(%esp),%eax + roll $10,%ecx + leal 1548603684(%ebx,%edx,1),%ebx + movl $-1,%edx + addl %eax,%ebx + movl %ecx,%eax + roll $7,%ebx + addl %edi,%ebx + # 108 + subl %ecx,%edx + andl %ebx,%eax + andl %ebp,%edx + orl %eax,%edx + movl 16(%esp),%eax + roll $10,%ebp + leal 1548603684(%edi,%edx,1),%edi + movl $-1,%edx + addl %eax,%edi + movl %ebp,%eax + roll $6,%edi + addl %esi,%edi + # 109 + subl %ebp,%edx + andl %edi,%eax + andl %ebx,%edx + orl %eax,%edx + movl 36(%esp),%eax + roll $10,%ebx + leal 1548603684(%esi,%edx,1),%esi + movl $-1,%edx + addl %eax,%esi + movl %ebx,%eax + roll $15,%esi + addl %ecx,%esi + # 110 + subl %ebx,%edx + andl %esi,%eax + andl %edi,%edx + orl %eax,%edx + movl 4(%esp),%eax + roll $10,%edi + leal 1548603684(%ecx,%edx,1),%ecx + movl $-1,%edx + addl %eax,%ecx + movl %edi,%eax + roll $13,%ecx + addl %ebp,%ecx + # 111 + subl %edi,%edx + andl %ecx,%eax + andl %esi,%edx + orl %eax,%edx + movl 8(%esp),%eax + roll $10,%esi + leal 1548603684(%ebp,%edx,1),%ebp + movl $-1,%edx + addl %eax,%ebp + subl %ecx,%edx + roll $11,%ebp + addl %ebx,%ebp + # 112 + movl 60(%esp),%eax + orl %ebp,%edx + addl %eax,%ebx + xorl %esi,%edx + movl $-1,%eax + roll $10,%ecx + leal 1836072691(%ebx,%edx,1),%ebx + subl %ebp,%eax + roll $9,%ebx + addl %edi,%ebx + # 113 + movl 20(%esp),%edx + orl %ebx,%eax + addl %edx,%edi + xorl %ecx,%eax + movl $-1,%edx + roll $10,%ebp + leal 1836072691(%edi,%eax,1),%edi + subl %ebx,%edx + roll $7,%edi + addl %esi,%edi + # 114 + movl 4(%esp),%eax + orl %edi,%edx + addl %eax,%esi + xorl %ebp,%edx + movl $-1,%eax + roll $10,%ebx + leal 1836072691(%esi,%edx,1),%esi + subl %edi,%eax + roll $15,%esi + addl %ecx,%esi + # 115 + movl 12(%esp),%edx + orl %esi,%eax + addl %edx,%ecx + xorl %ebx,%eax + movl $-1,%edx + roll $10,%edi + leal 1836072691(%ecx,%eax,1),%ecx + subl %esi,%edx + roll $11,%ecx + addl %ebp,%ecx + # 116 + movl 28(%esp),%eax + orl %ecx,%edx + addl %eax,%ebp + xorl %edi,%edx + movl $-1,%eax + roll $10,%esi + leal 1836072691(%ebp,%edx,1),%ebp + subl %ecx,%eax + roll $8,%ebp + addl %ebx,%ebp + # 117 + movl 56(%esp),%edx + orl %ebp,%eax + addl %edx,%ebx + xorl %esi,%eax + movl $-1,%edx + roll $10,%ecx + leal 1836072691(%ebx,%eax,1),%ebx + subl %ebp,%edx + roll $6,%ebx + addl %edi,%ebx + # 118 + movl 24(%esp),%eax + orl %ebx,%edx + addl %eax,%edi + xorl %ecx,%edx + movl $-1,%eax + roll $10,%ebp + leal 1836072691(%edi,%edx,1),%edi + subl %ebx,%eax + roll $6,%edi + addl %esi,%edi + # 119 + movl 36(%esp),%edx + orl %edi,%eax + addl %edx,%esi + xorl %ebp,%eax + movl $-1,%edx + roll $10,%ebx + leal 1836072691(%esi,%eax,1),%esi + subl %edi,%edx + roll $14,%esi + addl %ecx,%esi + # 120 + movl 44(%esp),%eax + orl %esi,%edx + addl %eax,%ecx + xorl %ebx,%edx + movl $-1,%eax + roll $10,%edi + leal 1836072691(%ecx,%edx,1),%ecx + subl %esi,%eax + roll $12,%ecx + addl %ebp,%ecx + # 121 + movl 32(%esp),%edx + orl %ecx,%eax + addl %edx,%ebp + xorl %edi,%eax + movl $-1,%edx + roll $10,%esi + leal 1836072691(%ebp,%eax,1),%ebp + subl %ecx,%edx + roll $13,%ebp + addl %ebx,%ebp + # 122 + movl 48(%esp),%eax + orl %ebp,%edx + addl %eax,%ebx + xorl %esi,%edx + movl $-1,%eax + roll $10,%ecx + leal 1836072691(%ebx,%edx,1),%ebx + subl %ebp,%eax + roll $5,%ebx + addl %edi,%ebx + # 123 + movl 8(%esp),%edx + orl %ebx,%eax + addl %edx,%edi + xorl %ecx,%eax + movl $-1,%edx + roll $10,%ebp + leal 1836072691(%edi,%eax,1),%edi + subl %ebx,%edx + roll $14,%edi + addl %esi,%edi + # 124 + movl 40(%esp),%eax + orl %edi,%edx + addl %eax,%esi + xorl %ebp,%edx + movl $-1,%eax + roll $10,%ebx + leal 1836072691(%esi,%edx,1),%esi + subl %edi,%eax + roll $13,%esi + addl %ecx,%esi + # 125 + movl (%esp),%edx + orl %esi,%eax + addl %edx,%ecx + xorl %ebx,%eax + movl $-1,%edx + roll $10,%edi + leal 1836072691(%ecx,%eax,1),%ecx + subl %esi,%edx + roll $13,%ecx + addl %ebp,%ecx + # 126 + movl 16(%esp),%eax + orl %ecx,%edx + addl %eax,%ebp + xorl %edi,%edx + movl $-1,%eax + roll $10,%esi + leal 1836072691(%ebp,%edx,1),%ebp + subl %ecx,%eax + roll $7,%ebp + addl %ebx,%ebp + # 127 + movl 52(%esp),%edx + orl %ebp,%eax + addl %edx,%ebx + xorl %esi,%eax + movl 32(%esp),%edx + roll $10,%ecx + leal 1836072691(%ebx,%eax,1),%ebx + movl $-1,%eax + roll $5,%ebx + addl %edi,%ebx + # 128 + addl %edx,%edi + movl %ebp,%edx + subl %ebx,%eax + andl %ebx,%edx + andl %ecx,%eax + orl %eax,%edx + movl 24(%esp),%eax + roll $10,%ebp + leal 2053994217(%edi,%edx,1),%edi + movl $-1,%edx + roll $15,%edi + addl %esi,%edi + # 129 + addl %eax,%esi + movl %ebx,%eax + subl %edi,%edx + andl %edi,%eax + andl %ebp,%edx + orl %edx,%eax + movl 16(%esp),%edx + roll $10,%ebx + leal 2053994217(%esi,%eax,1),%esi + movl $-1,%eax + roll $5,%esi + addl %ecx,%esi + # 130 + addl %edx,%ecx + movl %edi,%edx + subl %esi,%eax + andl %esi,%edx + andl %ebx,%eax + orl %eax,%edx + movl 4(%esp),%eax + roll $10,%edi + leal 2053994217(%ecx,%edx,1),%ecx + movl $-1,%edx + roll $8,%ecx + addl %ebp,%ecx + # 131 + addl %eax,%ebp + movl %esi,%eax + subl %ecx,%edx + andl %ecx,%eax + andl %edi,%edx + orl %edx,%eax + movl 12(%esp),%edx + roll $10,%esi + leal 2053994217(%ebp,%eax,1),%ebp + movl $-1,%eax + roll $11,%ebp + addl %ebx,%ebp + # 132 + addl %edx,%ebx + movl %ecx,%edx + subl %ebp,%eax + andl %ebp,%edx + andl %esi,%eax + orl %eax,%edx + movl 44(%esp),%eax + roll $10,%ecx + leal 2053994217(%ebx,%edx,1),%ebx + movl $-1,%edx + roll $14,%ebx + addl %edi,%ebx + # 133 + addl %eax,%edi + movl %ebp,%eax + subl %ebx,%edx + andl %ebx,%eax + andl %ecx,%edx + orl %edx,%eax + movl 60(%esp),%edx + roll $10,%ebp + leal 2053994217(%edi,%eax,1),%edi + movl $-1,%eax + roll $14,%edi + addl %esi,%edi + # 134 + addl %edx,%esi + movl %ebx,%edx + subl %edi,%eax + andl %edi,%edx + andl %ebp,%eax + orl %eax,%edx + movl (%esp),%eax + roll $10,%ebx + leal 2053994217(%esi,%edx,1),%esi + movl $-1,%edx + roll $6,%esi + addl %ecx,%esi + # 135 + addl %eax,%ecx + movl %edi,%eax + subl %esi,%edx + andl %esi,%eax + andl %ebx,%edx + orl %edx,%eax + movl 20(%esp),%edx + roll $10,%edi + leal 2053994217(%ecx,%eax,1),%ecx + movl $-1,%eax + roll $14,%ecx + addl %ebp,%ecx + # 136 + addl %edx,%ebp + movl %esi,%edx + subl %ecx,%eax + andl %ecx,%edx + andl %edi,%eax + orl %eax,%edx + movl 48(%esp),%eax + roll $10,%esi + leal 2053994217(%ebp,%edx,1),%ebp + movl $-1,%edx + roll $6,%ebp + addl %ebx,%ebp + # 137 + addl %eax,%ebx + movl %ecx,%eax + subl %ebp,%edx + andl %ebp,%eax + andl %esi,%edx + orl %edx,%eax + movl 8(%esp),%edx + roll $10,%ecx + leal 2053994217(%ebx,%eax,1),%ebx + movl $-1,%eax + roll $9,%ebx + addl %edi,%ebx + # 138 + addl %edx,%edi + movl %ebp,%edx + subl %ebx,%eax + andl %ebx,%edx + andl %ecx,%eax + orl %eax,%edx + movl 52(%esp),%eax + roll $10,%ebp + leal 2053994217(%edi,%edx,1),%edi + movl $-1,%edx + roll $12,%edi + addl %esi,%edi + # 139 + addl %eax,%esi + movl %ebx,%eax + subl %edi,%edx + andl %edi,%eax + andl %ebp,%edx + orl %edx,%eax + movl 36(%esp),%edx + roll $10,%ebx + leal 2053994217(%esi,%eax,1),%esi + movl $-1,%eax + roll $9,%esi + addl %ecx,%esi + # 140 + addl %edx,%ecx + movl %edi,%edx + subl %esi,%eax + andl %esi,%edx + andl %ebx,%eax + orl %eax,%edx + movl 28(%esp),%eax + roll $10,%edi + leal 2053994217(%ecx,%edx,1),%ecx + movl $-1,%edx + roll $12,%ecx + addl %ebp,%ecx + # 141 + addl %eax,%ebp + movl %esi,%eax + subl %ecx,%edx + andl %ecx,%eax + andl %edi,%edx + orl %edx,%eax + movl 40(%esp),%edx + roll $10,%esi + leal 2053994217(%ebp,%eax,1),%ebp + movl $-1,%eax + roll $5,%ebp + addl %ebx,%ebp + # 142 + addl %edx,%ebx + movl %ecx,%edx + subl %ebp,%eax + andl %ebp,%edx + andl %esi,%eax + orl %eax,%edx + movl 56(%esp),%eax + roll $10,%ecx + leal 2053994217(%ebx,%edx,1),%ebx + movl $-1,%edx + roll $15,%ebx + addl %edi,%ebx + # 143 + addl %eax,%edi + movl %ebp,%eax + subl %ebx,%edx + andl %ebx,%eax + andl %ecx,%edx + orl %eax,%edx + movl %ebx,%eax + roll $10,%ebp + leal 2053994217(%edi,%edx,1),%edi + xorl %ebp,%eax + roll $8,%edi + addl %esi,%edi + # 144 + movl 48(%esp),%edx + xorl %edi,%eax + addl %edx,%esi + roll $10,%ebx + addl %eax,%esi + movl %edi,%eax + roll $8,%esi + addl %ecx,%esi + # 145 + xorl %ebx,%eax + movl 60(%esp),%edx + xorl %esi,%eax + addl %eax,%ecx + movl %esi,%eax + roll $10,%edi + addl %edx,%ecx + xorl %edi,%eax + roll $5,%ecx + addl %ebp,%ecx + # 146 + movl 40(%esp),%edx + xorl %ecx,%eax + addl %edx,%ebp + roll $10,%esi + addl %eax,%ebp + movl %ecx,%eax + roll $12,%ebp + addl %ebx,%ebp + # 147 + xorl %esi,%eax + movl 16(%esp),%edx + xorl %ebp,%eax + addl %eax,%ebx + movl %ebp,%eax + roll $10,%ecx + addl %edx,%ebx + xorl %ecx,%eax + roll $9,%ebx + addl %edi,%ebx + # 148 + movl 4(%esp),%edx + xorl %ebx,%eax + addl %edx,%edi + roll $10,%ebp + addl %eax,%edi + movl %ebx,%eax + roll $12,%edi + addl %esi,%edi + # 149 + xorl %ebp,%eax + movl 20(%esp),%edx + xorl %edi,%eax + addl %eax,%esi + movl %edi,%eax + roll $10,%ebx + addl %edx,%esi + xorl %ebx,%eax + roll $5,%esi + addl %ecx,%esi + # 150 + movl 32(%esp),%edx + xorl %esi,%eax + addl %edx,%ecx + roll $10,%edi + addl %eax,%ecx + movl %esi,%eax + roll $14,%ecx + addl %ebp,%ecx + # 151 + xorl %edi,%eax + movl 28(%esp),%edx + xorl %ecx,%eax + addl %eax,%ebp + movl %ecx,%eax + roll $10,%esi + addl %edx,%ebp + xorl %esi,%eax + roll $6,%ebp + addl %ebx,%ebp + # 152 + movl 24(%esp),%edx + xorl %ebp,%eax + addl %edx,%ebx + roll $10,%ecx + addl %eax,%ebx + movl %ebp,%eax + roll $8,%ebx + addl %edi,%ebx + # 153 + xorl %ecx,%eax + movl 8(%esp),%edx + xorl %ebx,%eax + addl %eax,%edi + movl %ebx,%eax + roll $10,%ebp + addl %edx,%edi + xorl %ebp,%eax + roll $13,%edi + addl %esi,%edi + # 154 + movl 52(%esp),%edx + xorl %edi,%eax + addl %edx,%esi + roll $10,%ebx + addl %eax,%esi + movl %edi,%eax + roll $6,%esi + addl %ecx,%esi + # 155 + xorl %ebx,%eax + movl 56(%esp),%edx + xorl %esi,%eax + addl %eax,%ecx + movl %esi,%eax + roll $10,%edi + addl %edx,%ecx + xorl %edi,%eax + roll $5,%ecx + addl %ebp,%ecx + # 156 + movl (%esp),%edx + xorl %ecx,%eax + addl %edx,%ebp + roll $10,%esi + addl %eax,%ebp + movl %ecx,%eax + roll $15,%ebp + addl %ebx,%ebp + # 157 + xorl %esi,%eax + movl 12(%esp),%edx + xorl %ebp,%eax + addl %eax,%ebx + movl %ebp,%eax + roll $10,%ecx + addl %edx,%ebx + xorl %ecx,%eax + roll $13,%ebx + addl %edi,%ebx + # 158 + movl 36(%esp),%edx + xorl %ebx,%eax + addl %edx,%edi + roll $10,%ebp + addl %eax,%edi + movl %ebx,%eax + roll $11,%edi + addl %esi,%edi + # 159 + xorl %ebp,%eax + movl 44(%esp),%edx + xorl %edi,%eax + addl %eax,%esi + roll $10,%ebx + addl %edx,%esi + movl 128(%esp),%edx + roll $11,%esi + addl %ecx,%esi + movl 4(%edx),%eax + addl %eax,%ebx + movl 72(%esp),%eax + addl %eax,%ebx + movl 8(%edx),%eax + addl %eax,%ebp + movl 76(%esp),%eax + addl %eax,%ebp + movl 12(%edx),%eax + addl %eax,%ecx + movl 80(%esp),%eax + addl %eax,%ecx + movl 16(%edx),%eax + addl %eax,%esi + movl 64(%esp),%eax + addl %eax,%esi + movl (%edx),%eax + addl %eax,%edi + movl 68(%esp),%eax + addl %eax,%edi + movl 136(%esp),%eax + movl %ebx,(%edx) + movl %ebp,4(%edx) + movl %ecx,8(%edx) + subl $1,%eax + movl %esi,12(%edx) + movl %edi,16(%edx) + jle L001get_out + movl %eax,136(%esp) + movl %ecx,%edi + movl 132(%esp),%eax + movl %ebx,%ecx + addl $64,%eax + movl %ebp,%esi + movl %eax,132(%esp) + jmp L000start +L001get_out: + addl $108,%esp + popl %ebx + popl %ebp + popl %edi + popl %esi + ret diff --git a/deps/openssl/asm_obsolete/x86-macosx-gas/sha/sha1-586.s b/deps/openssl/asm_obsolete/x86-macosx-gas/sha/sha1-586.s new file mode 100644 index 00000000000000..a0fe22eb2eca7b --- /dev/null +++ b/deps/openssl/asm_obsolete/x86-macosx-gas/sha/sha1-586.s @@ -0,0 +1,2793 @@ +.file "sha1-586.s" +.text +.globl _sha1_block_data_order +.align 4 +_sha1_block_data_order: +L_sha1_block_data_order_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + call L000pic_point +L000pic_point: + popl %ebp + movl L_OPENSSL_ia32cap_P$non_lazy_ptr-L000pic_point(%ebp),%esi + leal LK_XX_XX-L000pic_point(%ebp),%ebp + movl (%esi),%eax + movl 4(%esi),%edx + testl $512,%edx + jz L001x86 + movl 8(%esi),%ecx + testl $16777216,%eax + jz L001x86 + testl $536870912,%ecx + jnz Lshaext_shortcut + jmp Lssse3_shortcut +.align 4,0x90 +L001x86: + movl 20(%esp),%ebp + movl 24(%esp),%esi + movl 28(%esp),%eax + subl $76,%esp + shll $6,%eax + addl %esi,%eax + movl %eax,104(%esp) + movl 16(%ebp),%edi + jmp L002loop +.align 4,0x90 +L002loop: + movl (%esi),%eax + movl 4(%esi),%ebx + movl 8(%esi),%ecx + movl 12(%esi),%edx + bswap %eax + bswap %ebx + bswap %ecx + bswap %edx + movl %eax,(%esp) + movl %ebx,4(%esp) + movl %ecx,8(%esp) + movl %edx,12(%esp) + movl 16(%esi),%eax + movl 20(%esi),%ebx + movl 24(%esi),%ecx + movl 28(%esi),%edx + bswap %eax + bswap %ebx + bswap %ecx + bswap %edx + movl %eax,16(%esp) + movl %ebx,20(%esp) + movl %ecx,24(%esp) + movl %edx,28(%esp) + movl 32(%esi),%eax + movl 36(%esi),%ebx + movl 40(%esi),%ecx + movl 44(%esi),%edx + bswap %eax + bswap %ebx + bswap %ecx + bswap %edx + movl %eax,32(%esp) + movl %ebx,36(%esp) + movl %ecx,40(%esp) + movl %edx,44(%esp) + movl 48(%esi),%eax + movl 52(%esi),%ebx + movl 56(%esi),%ecx + movl 60(%esi),%edx + bswap %eax + bswap %ebx + bswap %ecx + bswap %edx + movl %eax,48(%esp) + movl %ebx,52(%esp) + movl %ecx,56(%esp) + movl %edx,60(%esp) + movl %esi,100(%esp) + movl (%ebp),%eax + movl 4(%ebp),%ebx + movl 8(%ebp),%ecx + movl 12(%ebp),%edx + # 00_15 0 + movl %ecx,%esi + movl %eax,%ebp + roll $5,%ebp + xorl %edx,%esi + addl %edi,%ebp + movl (%esp),%edi + andl %ebx,%esi + rorl $2,%ebx + xorl %edx,%esi + leal 1518500249(%ebp,%edi,1),%ebp + addl %esi,%ebp + # 00_15 1 + movl %ebx,%edi + movl %ebp,%esi + roll $5,%ebp + xorl %ecx,%edi + addl %edx,%ebp + movl 4(%esp),%edx + andl %eax,%edi + rorl $2,%eax + xorl %ecx,%edi + leal 1518500249(%ebp,%edx,1),%ebp + addl %edi,%ebp + # 00_15 2 + movl %eax,%edx + movl %ebp,%edi + roll $5,%ebp + xorl %ebx,%edx + addl %ecx,%ebp + movl 8(%esp),%ecx + andl %esi,%edx + rorl $2,%esi + xorl %ebx,%edx + leal 1518500249(%ebp,%ecx,1),%ebp + addl %edx,%ebp + # 00_15 3 + movl %esi,%ecx + movl %ebp,%edx + roll $5,%ebp + xorl %eax,%ecx + addl %ebx,%ebp + movl 12(%esp),%ebx + andl %edi,%ecx + rorl $2,%edi + xorl %eax,%ecx + leal 1518500249(%ebp,%ebx,1),%ebp + addl %ecx,%ebp + # 00_15 4 + movl %edi,%ebx + movl %ebp,%ecx + roll $5,%ebp + xorl %esi,%ebx + addl %eax,%ebp + movl 16(%esp),%eax + andl %edx,%ebx + rorl $2,%edx + xorl %esi,%ebx + leal 1518500249(%ebp,%eax,1),%ebp + addl %ebx,%ebp + # 00_15 5 + movl %edx,%eax + movl %ebp,%ebx + roll $5,%ebp + xorl %edi,%eax + addl %esi,%ebp + movl 20(%esp),%esi + andl %ecx,%eax + rorl $2,%ecx + xorl %edi,%eax + leal 1518500249(%ebp,%esi,1),%ebp + addl %eax,%ebp + # 00_15 6 + movl %ecx,%esi + movl %ebp,%eax + roll $5,%ebp + xorl %edx,%esi + addl %edi,%ebp + movl 24(%esp),%edi + andl %ebx,%esi + rorl $2,%ebx + xorl %edx,%esi + leal 1518500249(%ebp,%edi,1),%ebp + addl %esi,%ebp + # 00_15 7 + movl %ebx,%edi + movl %ebp,%esi + roll $5,%ebp + xorl %ecx,%edi + addl %edx,%ebp + movl 28(%esp),%edx + andl %eax,%edi + rorl $2,%eax + xorl %ecx,%edi + leal 1518500249(%ebp,%edx,1),%ebp + addl %edi,%ebp + # 00_15 8 + movl %eax,%edx + movl %ebp,%edi + roll $5,%ebp + xorl %ebx,%edx + addl %ecx,%ebp + movl 32(%esp),%ecx + andl %esi,%edx + rorl $2,%esi + xorl %ebx,%edx + leal 1518500249(%ebp,%ecx,1),%ebp + addl %edx,%ebp + # 00_15 9 + movl %esi,%ecx + movl %ebp,%edx + roll $5,%ebp + xorl %eax,%ecx + addl %ebx,%ebp + movl 36(%esp),%ebx + andl %edi,%ecx + rorl $2,%edi + xorl %eax,%ecx + leal 1518500249(%ebp,%ebx,1),%ebp + addl %ecx,%ebp + # 00_15 10 + movl %edi,%ebx + movl %ebp,%ecx + roll $5,%ebp + xorl %esi,%ebx + addl %eax,%ebp + movl 40(%esp),%eax + andl %edx,%ebx + rorl $2,%edx + xorl %esi,%ebx + leal 1518500249(%ebp,%eax,1),%ebp + addl %ebx,%ebp + # 00_15 11 + movl %edx,%eax + movl %ebp,%ebx + roll $5,%ebp + xorl %edi,%eax + addl %esi,%ebp + movl 44(%esp),%esi + andl %ecx,%eax + rorl $2,%ecx + xorl %edi,%eax + leal 1518500249(%ebp,%esi,1),%ebp + addl %eax,%ebp + # 00_15 12 + movl %ecx,%esi + movl %ebp,%eax + roll $5,%ebp + xorl %edx,%esi + addl %edi,%ebp + movl 48(%esp),%edi + andl %ebx,%esi + rorl $2,%ebx + xorl %edx,%esi + leal 1518500249(%ebp,%edi,1),%ebp + addl %esi,%ebp + # 00_15 13 + movl %ebx,%edi + movl %ebp,%esi + roll $5,%ebp + xorl %ecx,%edi + addl %edx,%ebp + movl 52(%esp),%edx + andl %eax,%edi + rorl $2,%eax + xorl %ecx,%edi + leal 1518500249(%ebp,%edx,1),%ebp + addl %edi,%ebp + # 00_15 14 + movl %eax,%edx + movl %ebp,%edi + roll $5,%ebp + xorl %ebx,%edx + addl %ecx,%ebp + movl 56(%esp),%ecx + andl %esi,%edx + rorl $2,%esi + xorl %ebx,%edx + leal 1518500249(%ebp,%ecx,1),%ebp + addl %edx,%ebp + # 00_15 15 + movl %esi,%ecx + movl %ebp,%edx + roll $5,%ebp + xorl %eax,%ecx + addl %ebx,%ebp + movl 60(%esp),%ebx + andl %edi,%ecx + rorl $2,%edi + xorl %eax,%ecx + leal 1518500249(%ebp,%ebx,1),%ebp + movl (%esp),%ebx + addl %ebp,%ecx + # 16_19 16 + movl %edi,%ebp + xorl 8(%esp),%ebx + xorl %esi,%ebp + xorl 32(%esp),%ebx + andl %edx,%ebp + xorl 52(%esp),%ebx + roll $1,%ebx + xorl %esi,%ebp + addl %ebp,%eax + movl %ecx,%ebp + rorl $2,%edx + movl %ebx,(%esp) + roll $5,%ebp + leal 1518500249(%ebx,%eax,1),%ebx + movl 4(%esp),%eax + addl %ebp,%ebx + # 16_19 17 + movl %edx,%ebp + xorl 12(%esp),%eax + xorl %edi,%ebp + xorl 36(%esp),%eax + andl %ecx,%ebp + xorl 56(%esp),%eax + roll $1,%eax + xorl %edi,%ebp + addl %ebp,%esi + movl %ebx,%ebp + rorl $2,%ecx + movl %eax,4(%esp) + roll $5,%ebp + leal 1518500249(%eax,%esi,1),%eax + movl 8(%esp),%esi + addl %ebp,%eax + # 16_19 18 + movl %ecx,%ebp + xorl 16(%esp),%esi + xorl %edx,%ebp + xorl 40(%esp),%esi + andl %ebx,%ebp + xorl 60(%esp),%esi + roll $1,%esi + xorl %edx,%ebp + addl %ebp,%edi + movl %eax,%ebp + rorl $2,%ebx + movl %esi,8(%esp) + roll $5,%ebp + leal 1518500249(%esi,%edi,1),%esi + movl 12(%esp),%edi + addl %ebp,%esi + # 16_19 19 + movl %ebx,%ebp + xorl 20(%esp),%edi + xorl %ecx,%ebp + xorl 44(%esp),%edi + andl %eax,%ebp + xorl (%esp),%edi + roll $1,%edi + xorl %ecx,%ebp + addl %ebp,%edx + movl %esi,%ebp + rorl $2,%eax + movl %edi,12(%esp) + roll $5,%ebp + leal 1518500249(%edi,%edx,1),%edi + movl 16(%esp),%edx + addl %ebp,%edi + # 20_39 20 + movl %esi,%ebp + xorl 24(%esp),%edx + xorl %eax,%ebp + xorl 48(%esp),%edx + xorl %ebx,%ebp + xorl 4(%esp),%edx + roll $1,%edx + addl %ebp,%ecx + rorl $2,%esi + movl %edi,%ebp + roll $5,%ebp + movl %edx,16(%esp) + leal 1859775393(%edx,%ecx,1),%edx + movl 20(%esp),%ecx + addl %ebp,%edx + # 20_39 21 + movl %edi,%ebp + xorl 28(%esp),%ecx + xorl %esi,%ebp + xorl 52(%esp),%ecx + xorl %eax,%ebp + xorl 8(%esp),%ecx + roll $1,%ecx + addl %ebp,%ebx + rorl $2,%edi + movl %edx,%ebp + roll $5,%ebp + movl %ecx,20(%esp) + leal 1859775393(%ecx,%ebx,1),%ecx + movl 24(%esp),%ebx + addl %ebp,%ecx + # 20_39 22 + movl %edx,%ebp + xorl 32(%esp),%ebx + xorl %edi,%ebp + xorl 56(%esp),%ebx + xorl %esi,%ebp + xorl 12(%esp),%ebx + roll $1,%ebx + addl %ebp,%eax + rorl $2,%edx + movl %ecx,%ebp + roll $5,%ebp + movl %ebx,24(%esp) + leal 1859775393(%ebx,%eax,1),%ebx + movl 28(%esp),%eax + addl %ebp,%ebx + # 20_39 23 + movl %ecx,%ebp + xorl 36(%esp),%eax + xorl %edx,%ebp + xorl 60(%esp),%eax + xorl %edi,%ebp + xorl 16(%esp),%eax + roll $1,%eax + addl %ebp,%esi + rorl $2,%ecx + movl %ebx,%ebp + roll $5,%ebp + movl %eax,28(%esp) + leal 1859775393(%eax,%esi,1),%eax + movl 32(%esp),%esi + addl %ebp,%eax + # 20_39 24 + movl %ebx,%ebp + xorl 40(%esp),%esi + xorl %ecx,%ebp + xorl (%esp),%esi + xorl %edx,%ebp + xorl 20(%esp),%esi + roll $1,%esi + addl %ebp,%edi + rorl $2,%ebx + movl %eax,%ebp + roll $5,%ebp + movl %esi,32(%esp) + leal 1859775393(%esi,%edi,1),%esi + movl 36(%esp),%edi + addl %ebp,%esi + # 20_39 25 + movl %eax,%ebp + xorl 44(%esp),%edi + xorl %ebx,%ebp + xorl 4(%esp),%edi + xorl %ecx,%ebp + xorl 24(%esp),%edi + roll $1,%edi + addl %ebp,%edx + rorl $2,%eax + movl %esi,%ebp + roll $5,%ebp + movl %edi,36(%esp) + leal 1859775393(%edi,%edx,1),%edi + movl 40(%esp),%edx + addl %ebp,%edi + # 20_39 26 + movl %esi,%ebp + xorl 48(%esp),%edx + xorl %eax,%ebp + xorl 8(%esp),%edx + xorl %ebx,%ebp + xorl 28(%esp),%edx + roll $1,%edx + addl %ebp,%ecx + rorl $2,%esi + movl %edi,%ebp + roll $5,%ebp + movl %edx,40(%esp) + leal 1859775393(%edx,%ecx,1),%edx + movl 44(%esp),%ecx + addl %ebp,%edx + # 20_39 27 + movl %edi,%ebp + xorl 52(%esp),%ecx + xorl %esi,%ebp + xorl 12(%esp),%ecx + xorl %eax,%ebp + xorl 32(%esp),%ecx + roll $1,%ecx + addl %ebp,%ebx + rorl $2,%edi + movl %edx,%ebp + roll $5,%ebp + movl %ecx,44(%esp) + leal 1859775393(%ecx,%ebx,1),%ecx + movl 48(%esp),%ebx + addl %ebp,%ecx + # 20_39 28 + movl %edx,%ebp + xorl 56(%esp),%ebx + xorl %edi,%ebp + xorl 16(%esp),%ebx + xorl %esi,%ebp + xorl 36(%esp),%ebx + roll $1,%ebx + addl %ebp,%eax + rorl $2,%edx + movl %ecx,%ebp + roll $5,%ebp + movl %ebx,48(%esp) + leal 1859775393(%ebx,%eax,1),%ebx + movl 52(%esp),%eax + addl %ebp,%ebx + # 20_39 29 + movl %ecx,%ebp + xorl 60(%esp),%eax + xorl %edx,%ebp + xorl 20(%esp),%eax + xorl %edi,%ebp + xorl 40(%esp),%eax + roll $1,%eax + addl %ebp,%esi + rorl $2,%ecx + movl %ebx,%ebp + roll $5,%ebp + movl %eax,52(%esp) + leal 1859775393(%eax,%esi,1),%eax + movl 56(%esp),%esi + addl %ebp,%eax + # 20_39 30 + movl %ebx,%ebp + xorl (%esp),%esi + xorl %ecx,%ebp + xorl 24(%esp),%esi + xorl %edx,%ebp + xorl 44(%esp),%esi + roll $1,%esi + addl %ebp,%edi + rorl $2,%ebx + movl %eax,%ebp + roll $5,%ebp + movl %esi,56(%esp) + leal 1859775393(%esi,%edi,1),%esi + movl 60(%esp),%edi + addl %ebp,%esi + # 20_39 31 + movl %eax,%ebp + xorl 4(%esp),%edi + xorl %ebx,%ebp + xorl 28(%esp),%edi + xorl %ecx,%ebp + xorl 48(%esp),%edi + roll $1,%edi + addl %ebp,%edx + rorl $2,%eax + movl %esi,%ebp + roll $5,%ebp + movl %edi,60(%esp) + leal 1859775393(%edi,%edx,1),%edi + movl (%esp),%edx + addl %ebp,%edi + # 20_39 32 + movl %esi,%ebp + xorl 8(%esp),%edx + xorl %eax,%ebp + xorl 32(%esp),%edx + xorl %ebx,%ebp + xorl 52(%esp),%edx + roll $1,%edx + addl %ebp,%ecx + rorl $2,%esi + movl %edi,%ebp + roll $5,%ebp + movl %edx,(%esp) + leal 1859775393(%edx,%ecx,1),%edx + movl 4(%esp),%ecx + addl %ebp,%edx + # 20_39 33 + movl %edi,%ebp + xorl 12(%esp),%ecx + xorl %esi,%ebp + xorl 36(%esp),%ecx + xorl %eax,%ebp + xorl 56(%esp),%ecx + roll $1,%ecx + addl %ebp,%ebx + rorl $2,%edi + movl %edx,%ebp + roll $5,%ebp + movl %ecx,4(%esp) + leal 1859775393(%ecx,%ebx,1),%ecx + movl 8(%esp),%ebx + addl %ebp,%ecx + # 20_39 34 + movl %edx,%ebp + xorl 16(%esp),%ebx + xorl %edi,%ebp + xorl 40(%esp),%ebx + xorl %esi,%ebp + xorl 60(%esp),%ebx + roll $1,%ebx + addl %ebp,%eax + rorl $2,%edx + movl %ecx,%ebp + roll $5,%ebp + movl %ebx,8(%esp) + leal 1859775393(%ebx,%eax,1),%ebx + movl 12(%esp),%eax + addl %ebp,%ebx + # 20_39 35 + movl %ecx,%ebp + xorl 20(%esp),%eax + xorl %edx,%ebp + xorl 44(%esp),%eax + xorl %edi,%ebp + xorl (%esp),%eax + roll $1,%eax + addl %ebp,%esi + rorl $2,%ecx + movl %ebx,%ebp + roll $5,%ebp + movl %eax,12(%esp) + leal 1859775393(%eax,%esi,1),%eax + movl 16(%esp),%esi + addl %ebp,%eax + # 20_39 36 + movl %ebx,%ebp + xorl 24(%esp),%esi + xorl %ecx,%ebp + xorl 48(%esp),%esi + xorl %edx,%ebp + xorl 4(%esp),%esi + roll $1,%esi + addl %ebp,%edi + rorl $2,%ebx + movl %eax,%ebp + roll $5,%ebp + movl %esi,16(%esp) + leal 1859775393(%esi,%edi,1),%esi + movl 20(%esp),%edi + addl %ebp,%esi + # 20_39 37 + movl %eax,%ebp + xorl 28(%esp),%edi + xorl %ebx,%ebp + xorl 52(%esp),%edi + xorl %ecx,%ebp + xorl 8(%esp),%edi + roll $1,%edi + addl %ebp,%edx + rorl $2,%eax + movl %esi,%ebp + roll $5,%ebp + movl %edi,20(%esp) + leal 1859775393(%edi,%edx,1),%edi + movl 24(%esp),%edx + addl %ebp,%edi + # 20_39 38 + movl %esi,%ebp + xorl 32(%esp),%edx + xorl %eax,%ebp + xorl 56(%esp),%edx + xorl %ebx,%ebp + xorl 12(%esp),%edx + roll $1,%edx + addl %ebp,%ecx + rorl $2,%esi + movl %edi,%ebp + roll $5,%ebp + movl %edx,24(%esp) + leal 1859775393(%edx,%ecx,1),%edx + movl 28(%esp),%ecx + addl %ebp,%edx + # 20_39 39 + movl %edi,%ebp + xorl 36(%esp),%ecx + xorl %esi,%ebp + xorl 60(%esp),%ecx + xorl %eax,%ebp + xorl 16(%esp),%ecx + roll $1,%ecx + addl %ebp,%ebx + rorl $2,%edi + movl %edx,%ebp + roll $5,%ebp + movl %ecx,28(%esp) + leal 1859775393(%ecx,%ebx,1),%ecx + movl 32(%esp),%ebx + addl %ebp,%ecx + # 40_59 40 + movl %edi,%ebp + xorl 40(%esp),%ebx + xorl %esi,%ebp + xorl (%esp),%ebx + andl %edx,%ebp + xorl 20(%esp),%ebx + roll $1,%ebx + addl %eax,%ebp + rorl $2,%edx + movl %ecx,%eax + roll $5,%eax + movl %ebx,32(%esp) + leal 2400959708(%ebx,%ebp,1),%ebx + movl %edi,%ebp + addl %eax,%ebx + andl %esi,%ebp + movl 36(%esp),%eax + addl %ebp,%ebx + # 40_59 41 + movl %edx,%ebp + xorl 44(%esp),%eax + xorl %edi,%ebp + xorl 4(%esp),%eax + andl %ecx,%ebp + xorl 24(%esp),%eax + roll $1,%eax + addl %esi,%ebp + rorl $2,%ecx + movl %ebx,%esi + roll $5,%esi + movl %eax,36(%esp) + leal 2400959708(%eax,%ebp,1),%eax + movl %edx,%ebp + addl %esi,%eax + andl %edi,%ebp + movl 40(%esp),%esi + addl %ebp,%eax + # 40_59 42 + movl %ecx,%ebp + xorl 48(%esp),%esi + xorl %edx,%ebp + xorl 8(%esp),%esi + andl %ebx,%ebp + xorl 28(%esp),%esi + roll $1,%esi + addl %edi,%ebp + rorl $2,%ebx + movl %eax,%edi + roll $5,%edi + movl %esi,40(%esp) + leal 2400959708(%esi,%ebp,1),%esi + movl %ecx,%ebp + addl %edi,%esi + andl %edx,%ebp + movl 44(%esp),%edi + addl %ebp,%esi + # 40_59 43 + movl %ebx,%ebp + xorl 52(%esp),%edi + xorl %ecx,%ebp + xorl 12(%esp),%edi + andl %eax,%ebp + xorl 32(%esp),%edi + roll $1,%edi + addl %edx,%ebp + rorl $2,%eax + movl %esi,%edx + roll $5,%edx + movl %edi,44(%esp) + leal 2400959708(%edi,%ebp,1),%edi + movl %ebx,%ebp + addl %edx,%edi + andl %ecx,%ebp + movl 48(%esp),%edx + addl %ebp,%edi + # 40_59 44 + movl %eax,%ebp + xorl 56(%esp),%edx + xorl %ebx,%ebp + xorl 16(%esp),%edx + andl %esi,%ebp + xorl 36(%esp),%edx + roll $1,%edx + addl %ecx,%ebp + rorl $2,%esi + movl %edi,%ecx + roll $5,%ecx + movl %edx,48(%esp) + leal 2400959708(%edx,%ebp,1),%edx + movl %eax,%ebp + addl %ecx,%edx + andl %ebx,%ebp + movl 52(%esp),%ecx + addl %ebp,%edx + # 40_59 45 + movl %esi,%ebp + xorl 60(%esp),%ecx + xorl %eax,%ebp + xorl 20(%esp),%ecx + andl %edi,%ebp + xorl 40(%esp),%ecx + roll $1,%ecx + addl %ebx,%ebp + rorl $2,%edi + movl %edx,%ebx + roll $5,%ebx + movl %ecx,52(%esp) + leal 2400959708(%ecx,%ebp,1),%ecx + movl %esi,%ebp + addl %ebx,%ecx + andl %eax,%ebp + movl 56(%esp),%ebx + addl %ebp,%ecx + # 40_59 46 + movl %edi,%ebp + xorl (%esp),%ebx + xorl %esi,%ebp + xorl 24(%esp),%ebx + andl %edx,%ebp + xorl 44(%esp),%ebx + roll $1,%ebx + addl %eax,%ebp + rorl $2,%edx + movl %ecx,%eax + roll $5,%eax + movl %ebx,56(%esp) + leal 2400959708(%ebx,%ebp,1),%ebx + movl %edi,%ebp + addl %eax,%ebx + andl %esi,%ebp + movl 60(%esp),%eax + addl %ebp,%ebx + # 40_59 47 + movl %edx,%ebp + xorl 4(%esp),%eax + xorl %edi,%ebp + xorl 28(%esp),%eax + andl %ecx,%ebp + xorl 48(%esp),%eax + roll $1,%eax + addl %esi,%ebp + rorl $2,%ecx + movl %ebx,%esi + roll $5,%esi + movl %eax,60(%esp) + leal 2400959708(%eax,%ebp,1),%eax + movl %edx,%ebp + addl %esi,%eax + andl %edi,%ebp + movl (%esp),%esi + addl %ebp,%eax + # 40_59 48 + movl %ecx,%ebp + xorl 8(%esp),%esi + xorl %edx,%ebp + xorl 32(%esp),%esi + andl %ebx,%ebp + xorl 52(%esp),%esi + roll $1,%esi + addl %edi,%ebp + rorl $2,%ebx + movl %eax,%edi + roll $5,%edi + movl %esi,(%esp) + leal 2400959708(%esi,%ebp,1),%esi + movl %ecx,%ebp + addl %edi,%esi + andl %edx,%ebp + movl 4(%esp),%edi + addl %ebp,%esi + # 40_59 49 + movl %ebx,%ebp + xorl 12(%esp),%edi + xorl %ecx,%ebp + xorl 36(%esp),%edi + andl %eax,%ebp + xorl 56(%esp),%edi + roll $1,%edi + addl %edx,%ebp + rorl $2,%eax + movl %esi,%edx + roll $5,%edx + movl %edi,4(%esp) + leal 2400959708(%edi,%ebp,1),%edi + movl %ebx,%ebp + addl %edx,%edi + andl %ecx,%ebp + movl 8(%esp),%edx + addl %ebp,%edi + # 40_59 50 + movl %eax,%ebp + xorl 16(%esp),%edx + xorl %ebx,%ebp + xorl 40(%esp),%edx + andl %esi,%ebp + xorl 60(%esp),%edx + roll $1,%edx + addl %ecx,%ebp + rorl $2,%esi + movl %edi,%ecx + roll $5,%ecx + movl %edx,8(%esp) + leal 2400959708(%edx,%ebp,1),%edx + movl %eax,%ebp + addl %ecx,%edx + andl %ebx,%ebp + movl 12(%esp),%ecx + addl %ebp,%edx + # 40_59 51 + movl %esi,%ebp + xorl 20(%esp),%ecx + xorl %eax,%ebp + xorl 44(%esp),%ecx + andl %edi,%ebp + xorl (%esp),%ecx + roll $1,%ecx + addl %ebx,%ebp + rorl $2,%edi + movl %edx,%ebx + roll $5,%ebx + movl %ecx,12(%esp) + leal 2400959708(%ecx,%ebp,1),%ecx + movl %esi,%ebp + addl %ebx,%ecx + andl %eax,%ebp + movl 16(%esp),%ebx + addl %ebp,%ecx + # 40_59 52 + movl %edi,%ebp + xorl 24(%esp),%ebx + xorl %esi,%ebp + xorl 48(%esp),%ebx + andl %edx,%ebp + xorl 4(%esp),%ebx + roll $1,%ebx + addl %eax,%ebp + rorl $2,%edx + movl %ecx,%eax + roll $5,%eax + movl %ebx,16(%esp) + leal 2400959708(%ebx,%ebp,1),%ebx + movl %edi,%ebp + addl %eax,%ebx + andl %esi,%ebp + movl 20(%esp),%eax + addl %ebp,%ebx + # 40_59 53 + movl %edx,%ebp + xorl 28(%esp),%eax + xorl %edi,%ebp + xorl 52(%esp),%eax + andl %ecx,%ebp + xorl 8(%esp),%eax + roll $1,%eax + addl %esi,%ebp + rorl $2,%ecx + movl %ebx,%esi + roll $5,%esi + movl %eax,20(%esp) + leal 2400959708(%eax,%ebp,1),%eax + movl %edx,%ebp + addl %esi,%eax + andl %edi,%ebp + movl 24(%esp),%esi + addl %ebp,%eax + # 40_59 54 + movl %ecx,%ebp + xorl 32(%esp),%esi + xorl %edx,%ebp + xorl 56(%esp),%esi + andl %ebx,%ebp + xorl 12(%esp),%esi + roll $1,%esi + addl %edi,%ebp + rorl $2,%ebx + movl %eax,%edi + roll $5,%edi + movl %esi,24(%esp) + leal 2400959708(%esi,%ebp,1),%esi + movl %ecx,%ebp + addl %edi,%esi + andl %edx,%ebp + movl 28(%esp),%edi + addl %ebp,%esi + # 40_59 55 + movl %ebx,%ebp + xorl 36(%esp),%edi + xorl %ecx,%ebp + xorl 60(%esp),%edi + andl %eax,%ebp + xorl 16(%esp),%edi + roll $1,%edi + addl %edx,%ebp + rorl $2,%eax + movl %esi,%edx + roll $5,%edx + movl %edi,28(%esp) + leal 2400959708(%edi,%ebp,1),%edi + movl %ebx,%ebp + addl %edx,%edi + andl %ecx,%ebp + movl 32(%esp),%edx + addl %ebp,%edi + # 40_59 56 + movl %eax,%ebp + xorl 40(%esp),%edx + xorl %ebx,%ebp + xorl (%esp),%edx + andl %esi,%ebp + xorl 20(%esp),%edx + roll $1,%edx + addl %ecx,%ebp + rorl $2,%esi + movl %edi,%ecx + roll $5,%ecx + movl %edx,32(%esp) + leal 2400959708(%edx,%ebp,1),%edx + movl %eax,%ebp + addl %ecx,%edx + andl %ebx,%ebp + movl 36(%esp),%ecx + addl %ebp,%edx + # 40_59 57 + movl %esi,%ebp + xorl 44(%esp),%ecx + xorl %eax,%ebp + xorl 4(%esp),%ecx + andl %edi,%ebp + xorl 24(%esp),%ecx + roll $1,%ecx + addl %ebx,%ebp + rorl $2,%edi + movl %edx,%ebx + roll $5,%ebx + movl %ecx,36(%esp) + leal 2400959708(%ecx,%ebp,1),%ecx + movl %esi,%ebp + addl %ebx,%ecx + andl %eax,%ebp + movl 40(%esp),%ebx + addl %ebp,%ecx + # 40_59 58 + movl %edi,%ebp + xorl 48(%esp),%ebx + xorl %esi,%ebp + xorl 8(%esp),%ebx + andl %edx,%ebp + xorl 28(%esp),%ebx + roll $1,%ebx + addl %eax,%ebp + rorl $2,%edx + movl %ecx,%eax + roll $5,%eax + movl %ebx,40(%esp) + leal 2400959708(%ebx,%ebp,1),%ebx + movl %edi,%ebp + addl %eax,%ebx + andl %esi,%ebp + movl 44(%esp),%eax + addl %ebp,%ebx + # 40_59 59 + movl %edx,%ebp + xorl 52(%esp),%eax + xorl %edi,%ebp + xorl 12(%esp),%eax + andl %ecx,%ebp + xorl 32(%esp),%eax + roll $1,%eax + addl %esi,%ebp + rorl $2,%ecx + movl %ebx,%esi + roll $5,%esi + movl %eax,44(%esp) + leal 2400959708(%eax,%ebp,1),%eax + movl %edx,%ebp + addl %esi,%eax + andl %edi,%ebp + movl 48(%esp),%esi + addl %ebp,%eax + # 20_39 60 + movl %ebx,%ebp + xorl 56(%esp),%esi + xorl %ecx,%ebp + xorl 16(%esp),%esi + xorl %edx,%ebp + xorl 36(%esp),%esi + roll $1,%esi + addl %ebp,%edi + rorl $2,%ebx + movl %eax,%ebp + roll $5,%ebp + movl %esi,48(%esp) + leal 3395469782(%esi,%edi,1),%esi + movl 52(%esp),%edi + addl %ebp,%esi + # 20_39 61 + movl %eax,%ebp + xorl 60(%esp),%edi + xorl %ebx,%ebp + xorl 20(%esp),%edi + xorl %ecx,%ebp + xorl 40(%esp),%edi + roll $1,%edi + addl %ebp,%edx + rorl $2,%eax + movl %esi,%ebp + roll $5,%ebp + movl %edi,52(%esp) + leal 3395469782(%edi,%edx,1),%edi + movl 56(%esp),%edx + addl %ebp,%edi + # 20_39 62 + movl %esi,%ebp + xorl (%esp),%edx + xorl %eax,%ebp + xorl 24(%esp),%edx + xorl %ebx,%ebp + xorl 44(%esp),%edx + roll $1,%edx + addl %ebp,%ecx + rorl $2,%esi + movl %edi,%ebp + roll $5,%ebp + movl %edx,56(%esp) + leal 3395469782(%edx,%ecx,1),%edx + movl 60(%esp),%ecx + addl %ebp,%edx + # 20_39 63 + movl %edi,%ebp + xorl 4(%esp),%ecx + xorl %esi,%ebp + xorl 28(%esp),%ecx + xorl %eax,%ebp + xorl 48(%esp),%ecx + roll $1,%ecx + addl %ebp,%ebx + rorl $2,%edi + movl %edx,%ebp + roll $5,%ebp + movl %ecx,60(%esp) + leal 3395469782(%ecx,%ebx,1),%ecx + movl (%esp),%ebx + addl %ebp,%ecx + # 20_39 64 + movl %edx,%ebp + xorl 8(%esp),%ebx + xorl %edi,%ebp + xorl 32(%esp),%ebx + xorl %esi,%ebp + xorl 52(%esp),%ebx + roll $1,%ebx + addl %ebp,%eax + rorl $2,%edx + movl %ecx,%ebp + roll $5,%ebp + movl %ebx,(%esp) + leal 3395469782(%ebx,%eax,1),%ebx + movl 4(%esp),%eax + addl %ebp,%ebx + # 20_39 65 + movl %ecx,%ebp + xorl 12(%esp),%eax + xorl %edx,%ebp + xorl 36(%esp),%eax + xorl %edi,%ebp + xorl 56(%esp),%eax + roll $1,%eax + addl %ebp,%esi + rorl $2,%ecx + movl %ebx,%ebp + roll $5,%ebp + movl %eax,4(%esp) + leal 3395469782(%eax,%esi,1),%eax + movl 8(%esp),%esi + addl %ebp,%eax + # 20_39 66 + movl %ebx,%ebp + xorl 16(%esp),%esi + xorl %ecx,%ebp + xorl 40(%esp),%esi + xorl %edx,%ebp + xorl 60(%esp),%esi + roll $1,%esi + addl %ebp,%edi + rorl $2,%ebx + movl %eax,%ebp + roll $5,%ebp + movl %esi,8(%esp) + leal 3395469782(%esi,%edi,1),%esi + movl 12(%esp),%edi + addl %ebp,%esi + # 20_39 67 + movl %eax,%ebp + xorl 20(%esp),%edi + xorl %ebx,%ebp + xorl 44(%esp),%edi + xorl %ecx,%ebp + xorl (%esp),%edi + roll $1,%edi + addl %ebp,%edx + rorl $2,%eax + movl %esi,%ebp + roll $5,%ebp + movl %edi,12(%esp) + leal 3395469782(%edi,%edx,1),%edi + movl 16(%esp),%edx + addl %ebp,%edi + # 20_39 68 + movl %esi,%ebp + xorl 24(%esp),%edx + xorl %eax,%ebp + xorl 48(%esp),%edx + xorl %ebx,%ebp + xorl 4(%esp),%edx + roll $1,%edx + addl %ebp,%ecx + rorl $2,%esi + movl %edi,%ebp + roll $5,%ebp + movl %edx,16(%esp) + leal 3395469782(%edx,%ecx,1),%edx + movl 20(%esp),%ecx + addl %ebp,%edx + # 20_39 69 + movl %edi,%ebp + xorl 28(%esp),%ecx + xorl %esi,%ebp + xorl 52(%esp),%ecx + xorl %eax,%ebp + xorl 8(%esp),%ecx + roll $1,%ecx + addl %ebp,%ebx + rorl $2,%edi + movl %edx,%ebp + roll $5,%ebp + movl %ecx,20(%esp) + leal 3395469782(%ecx,%ebx,1),%ecx + movl 24(%esp),%ebx + addl %ebp,%ecx + # 20_39 70 + movl %edx,%ebp + xorl 32(%esp),%ebx + xorl %edi,%ebp + xorl 56(%esp),%ebx + xorl %esi,%ebp + xorl 12(%esp),%ebx + roll $1,%ebx + addl %ebp,%eax + rorl $2,%edx + movl %ecx,%ebp + roll $5,%ebp + movl %ebx,24(%esp) + leal 3395469782(%ebx,%eax,1),%ebx + movl 28(%esp),%eax + addl %ebp,%ebx + # 20_39 71 + movl %ecx,%ebp + xorl 36(%esp),%eax + xorl %edx,%ebp + xorl 60(%esp),%eax + xorl %edi,%ebp + xorl 16(%esp),%eax + roll $1,%eax + addl %ebp,%esi + rorl $2,%ecx + movl %ebx,%ebp + roll $5,%ebp + movl %eax,28(%esp) + leal 3395469782(%eax,%esi,1),%eax + movl 32(%esp),%esi + addl %ebp,%eax + # 20_39 72 + movl %ebx,%ebp + xorl 40(%esp),%esi + xorl %ecx,%ebp + xorl (%esp),%esi + xorl %edx,%ebp + xorl 20(%esp),%esi + roll $1,%esi + addl %ebp,%edi + rorl $2,%ebx + movl %eax,%ebp + roll $5,%ebp + movl %esi,32(%esp) + leal 3395469782(%esi,%edi,1),%esi + movl 36(%esp),%edi + addl %ebp,%esi + # 20_39 73 + movl %eax,%ebp + xorl 44(%esp),%edi + xorl %ebx,%ebp + xorl 4(%esp),%edi + xorl %ecx,%ebp + xorl 24(%esp),%edi + roll $1,%edi + addl %ebp,%edx + rorl $2,%eax + movl %esi,%ebp + roll $5,%ebp + movl %edi,36(%esp) + leal 3395469782(%edi,%edx,1),%edi + movl 40(%esp),%edx + addl %ebp,%edi + # 20_39 74 + movl %esi,%ebp + xorl 48(%esp),%edx + xorl %eax,%ebp + xorl 8(%esp),%edx + xorl %ebx,%ebp + xorl 28(%esp),%edx + roll $1,%edx + addl %ebp,%ecx + rorl $2,%esi + movl %edi,%ebp + roll $5,%ebp + movl %edx,40(%esp) + leal 3395469782(%edx,%ecx,1),%edx + movl 44(%esp),%ecx + addl %ebp,%edx + # 20_39 75 + movl %edi,%ebp + xorl 52(%esp),%ecx + xorl %esi,%ebp + xorl 12(%esp),%ecx + xorl %eax,%ebp + xorl 32(%esp),%ecx + roll $1,%ecx + addl %ebp,%ebx + rorl $2,%edi + movl %edx,%ebp + roll $5,%ebp + movl %ecx,44(%esp) + leal 3395469782(%ecx,%ebx,1),%ecx + movl 48(%esp),%ebx + addl %ebp,%ecx + # 20_39 76 + movl %edx,%ebp + xorl 56(%esp),%ebx + xorl %edi,%ebp + xorl 16(%esp),%ebx + xorl %esi,%ebp + xorl 36(%esp),%ebx + roll $1,%ebx + addl %ebp,%eax + rorl $2,%edx + movl %ecx,%ebp + roll $5,%ebp + movl %ebx,48(%esp) + leal 3395469782(%ebx,%eax,1),%ebx + movl 52(%esp),%eax + addl %ebp,%ebx + # 20_39 77 + movl %ecx,%ebp + xorl 60(%esp),%eax + xorl %edx,%ebp + xorl 20(%esp),%eax + xorl %edi,%ebp + xorl 40(%esp),%eax + roll $1,%eax + addl %ebp,%esi + rorl $2,%ecx + movl %ebx,%ebp + roll $5,%ebp + leal 3395469782(%eax,%esi,1),%eax + movl 56(%esp),%esi + addl %ebp,%eax + # 20_39 78 + movl %ebx,%ebp + xorl (%esp),%esi + xorl %ecx,%ebp + xorl 24(%esp),%esi + xorl %edx,%ebp + xorl 44(%esp),%esi + roll $1,%esi + addl %ebp,%edi + rorl $2,%ebx + movl %eax,%ebp + roll $5,%ebp + leal 3395469782(%esi,%edi,1),%esi + movl 60(%esp),%edi + addl %ebp,%esi + # 20_39 79 + movl %eax,%ebp + xorl 4(%esp),%edi + xorl %ebx,%ebp + xorl 28(%esp),%edi + xorl %ecx,%ebp + xorl 48(%esp),%edi + roll $1,%edi + addl %ebp,%edx + rorl $2,%eax + movl %esi,%ebp + roll $5,%ebp + leal 3395469782(%edi,%edx,1),%edi + addl %ebp,%edi + movl 96(%esp),%ebp + movl 100(%esp),%edx + addl (%ebp),%edi + addl 4(%ebp),%esi + addl 8(%ebp),%eax + addl 12(%ebp),%ebx + addl 16(%ebp),%ecx + movl %edi,(%ebp) + addl $64,%edx + movl %esi,4(%ebp) + cmpl 104(%esp),%edx + movl %eax,8(%ebp) + movl %ecx,%edi + movl %ebx,12(%ebp) + movl %edx,%esi + movl %ecx,16(%ebp) + jb L002loop + addl $76,%esp + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.align 4 +__sha1_block_data_order_shaext: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + call L003pic_point +L003pic_point: + popl %ebp + leal LK_XX_XX-L003pic_point(%ebp),%ebp +Lshaext_shortcut: + movl 20(%esp),%edi + movl %esp,%ebx + movl 24(%esp),%esi + movl 28(%esp),%ecx + subl $32,%esp + movdqu (%edi),%xmm0 + movd 16(%edi),%xmm1 + andl $-32,%esp + movdqa 80(%ebp),%xmm3 + movdqu (%esi),%xmm4 + pshufd $27,%xmm0,%xmm0 + movdqu 16(%esi),%xmm5 + pshufd $27,%xmm1,%xmm1 + movdqu 32(%esi),%xmm6 +.byte 102,15,56,0,227 + movdqu 48(%esi),%xmm7 +.byte 102,15,56,0,235 +.byte 102,15,56,0,243 +.byte 102,15,56,0,251 + jmp L004loop_shaext +.align 4,0x90 +L004loop_shaext: + decl %ecx + leal 64(%esi),%eax + movdqa %xmm1,(%esp) + paddd %xmm4,%xmm1 + cmovnel %eax,%esi + movdqa %xmm0,16(%esp) +.byte 15,56,201,229 + movdqa %xmm0,%xmm2 +.byte 15,58,204,193,0 +.byte 15,56,200,213 + pxor %xmm6,%xmm4 +.byte 15,56,201,238 +.byte 15,56,202,231 + movdqa %xmm0,%xmm1 +.byte 15,58,204,194,0 +.byte 15,56,200,206 + pxor %xmm7,%xmm5 +.byte 15,56,202,236 +.byte 15,56,201,247 + movdqa %xmm0,%xmm2 +.byte 15,58,204,193,0 +.byte 15,56,200,215 + pxor %xmm4,%xmm6 +.byte 15,56,201,252 +.byte 15,56,202,245 + movdqa %xmm0,%xmm1 +.byte 15,58,204,194,0 +.byte 15,56,200,204 + pxor %xmm5,%xmm7 +.byte 15,56,202,254 +.byte 15,56,201,229 + movdqa %xmm0,%xmm2 +.byte 15,58,204,193,0 +.byte 15,56,200,213 + pxor %xmm6,%xmm4 +.byte 15,56,201,238 +.byte 15,56,202,231 + movdqa %xmm0,%xmm1 +.byte 15,58,204,194,1 +.byte 15,56,200,206 + pxor %xmm7,%xmm5 +.byte 15,56,202,236 +.byte 15,56,201,247 + movdqa %xmm0,%xmm2 +.byte 15,58,204,193,1 +.byte 15,56,200,215 + pxor %xmm4,%xmm6 +.byte 15,56,201,252 +.byte 15,56,202,245 + movdqa %xmm0,%xmm1 +.byte 15,58,204,194,1 +.byte 15,56,200,204 + pxor %xmm5,%xmm7 +.byte 15,56,202,254 +.byte 15,56,201,229 + movdqa %xmm0,%xmm2 +.byte 15,58,204,193,1 +.byte 15,56,200,213 + pxor %xmm6,%xmm4 +.byte 15,56,201,238 +.byte 15,56,202,231 + movdqa %xmm0,%xmm1 +.byte 15,58,204,194,1 +.byte 15,56,200,206 + pxor %xmm7,%xmm5 +.byte 15,56,202,236 +.byte 15,56,201,247 + movdqa %xmm0,%xmm2 +.byte 15,58,204,193,2 +.byte 15,56,200,215 + pxor %xmm4,%xmm6 +.byte 15,56,201,252 +.byte 15,56,202,245 + movdqa %xmm0,%xmm1 +.byte 15,58,204,194,2 +.byte 15,56,200,204 + pxor %xmm5,%xmm7 +.byte 15,56,202,254 +.byte 15,56,201,229 + movdqa %xmm0,%xmm2 +.byte 15,58,204,193,2 +.byte 15,56,200,213 + pxor %xmm6,%xmm4 +.byte 15,56,201,238 +.byte 15,56,202,231 + movdqa %xmm0,%xmm1 +.byte 15,58,204,194,2 +.byte 15,56,200,206 + pxor %xmm7,%xmm5 +.byte 15,56,202,236 +.byte 15,56,201,247 + movdqa %xmm0,%xmm2 +.byte 15,58,204,193,2 +.byte 15,56,200,215 + pxor %xmm4,%xmm6 +.byte 15,56,201,252 +.byte 15,56,202,245 + movdqa %xmm0,%xmm1 +.byte 15,58,204,194,3 +.byte 15,56,200,204 + pxor %xmm5,%xmm7 +.byte 15,56,202,254 + movdqu (%esi),%xmm4 + movdqa %xmm0,%xmm2 +.byte 15,58,204,193,3 +.byte 15,56,200,213 + movdqu 16(%esi),%xmm5 +.byte 102,15,56,0,227 + movdqa %xmm0,%xmm1 +.byte 15,58,204,194,3 +.byte 15,56,200,206 + movdqu 32(%esi),%xmm6 +.byte 102,15,56,0,235 + movdqa %xmm0,%xmm2 +.byte 15,58,204,193,3 +.byte 15,56,200,215 + movdqu 48(%esi),%xmm7 +.byte 102,15,56,0,243 + movdqa %xmm0,%xmm1 +.byte 15,58,204,194,3 + movdqa (%esp),%xmm2 +.byte 102,15,56,0,251 +.byte 15,56,200,202 + paddd 16(%esp),%xmm0 + jnz L004loop_shaext + pshufd $27,%xmm0,%xmm0 + pshufd $27,%xmm1,%xmm1 + movdqu %xmm0,(%edi) + movd %xmm1,16(%edi) + movl %ebx,%esp + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.align 4 +__sha1_block_data_order_ssse3: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + call L005pic_point +L005pic_point: + popl %ebp + leal LK_XX_XX-L005pic_point(%ebp),%ebp +Lssse3_shortcut: + movdqa (%ebp),%xmm7 + movdqa 16(%ebp),%xmm0 + movdqa 32(%ebp),%xmm1 + movdqa 48(%ebp),%xmm2 + movdqa 64(%ebp),%xmm6 + movl 20(%esp),%edi + movl 24(%esp),%ebp + movl 28(%esp),%edx + movl %esp,%esi + subl $208,%esp + andl $-64,%esp + movdqa %xmm0,112(%esp) + movdqa %xmm1,128(%esp) + movdqa %xmm2,144(%esp) + shll $6,%edx + movdqa %xmm7,160(%esp) + addl %ebp,%edx + movdqa %xmm6,176(%esp) + addl $64,%ebp + movl %edi,192(%esp) + movl %ebp,196(%esp) + movl %edx,200(%esp) + movl %esi,204(%esp) + movl (%edi),%eax + movl 4(%edi),%ebx + movl 8(%edi),%ecx + movl 12(%edi),%edx + movl 16(%edi),%edi + movl %ebx,%esi + movdqu -64(%ebp),%xmm0 + movdqu -48(%ebp),%xmm1 + movdqu -32(%ebp),%xmm2 + movdqu -16(%ebp),%xmm3 +.byte 102,15,56,0,198 +.byte 102,15,56,0,206 +.byte 102,15,56,0,214 + movdqa %xmm7,96(%esp) +.byte 102,15,56,0,222 + paddd %xmm7,%xmm0 + paddd %xmm7,%xmm1 + paddd %xmm7,%xmm2 + movdqa %xmm0,(%esp) + psubd %xmm7,%xmm0 + movdqa %xmm1,16(%esp) + psubd %xmm7,%xmm1 + movdqa %xmm2,32(%esp) + movl %ecx,%ebp + psubd %xmm7,%xmm2 + xorl %edx,%ebp + pshufd $238,%xmm0,%xmm4 + andl %ebp,%esi + jmp L006loop +.align 4,0x90 +L006loop: + rorl $2,%ebx + xorl %edx,%esi + movl %eax,%ebp + punpcklqdq %xmm1,%xmm4 + movdqa %xmm3,%xmm6 + addl (%esp),%edi + xorl %ecx,%ebx + paddd %xmm3,%xmm7 + movdqa %xmm0,64(%esp) + roll $5,%eax + addl %esi,%edi + psrldq $4,%xmm6 + andl %ebx,%ebp + xorl %ecx,%ebx + pxor %xmm0,%xmm4 + addl %eax,%edi + rorl $7,%eax + pxor %xmm2,%xmm6 + xorl %ecx,%ebp + movl %edi,%esi + addl 4(%esp),%edx + pxor %xmm6,%xmm4 + xorl %ebx,%eax + roll $5,%edi + movdqa %xmm7,48(%esp) + addl %ebp,%edx + andl %eax,%esi + movdqa %xmm4,%xmm0 + xorl %ebx,%eax + addl %edi,%edx + rorl $7,%edi + movdqa %xmm4,%xmm6 + xorl %ebx,%esi + pslldq $12,%xmm0 + paddd %xmm4,%xmm4 + movl %edx,%ebp + addl 8(%esp),%ecx + psrld $31,%xmm6 + xorl %eax,%edi + roll $5,%edx + movdqa %xmm0,%xmm7 + addl %esi,%ecx + andl %edi,%ebp + xorl %eax,%edi + psrld $30,%xmm0 + addl %edx,%ecx + rorl $7,%edx + por %xmm6,%xmm4 + xorl %eax,%ebp + movl %ecx,%esi + addl 12(%esp),%ebx + pslld $2,%xmm7 + xorl %edi,%edx + roll $5,%ecx + pxor %xmm0,%xmm4 + movdqa 96(%esp),%xmm0 + addl %ebp,%ebx + andl %edx,%esi + pxor %xmm7,%xmm4 + pshufd $238,%xmm1,%xmm5 + xorl %edi,%edx + addl %ecx,%ebx + rorl $7,%ecx + xorl %edi,%esi + movl %ebx,%ebp + punpcklqdq %xmm2,%xmm5 + movdqa %xmm4,%xmm7 + addl 16(%esp),%eax + xorl %edx,%ecx + paddd %xmm4,%xmm0 + movdqa %xmm1,80(%esp) + roll $5,%ebx + addl %esi,%eax + psrldq $4,%xmm7 + andl %ecx,%ebp + xorl %edx,%ecx + pxor %xmm1,%xmm5 + addl %ebx,%eax + rorl $7,%ebx + pxor %xmm3,%xmm7 + xorl %edx,%ebp + movl %eax,%esi + addl 20(%esp),%edi + pxor %xmm7,%xmm5 + xorl %ecx,%ebx + roll $5,%eax + movdqa %xmm0,(%esp) + addl %ebp,%edi + andl %ebx,%esi + movdqa %xmm5,%xmm1 + xorl %ecx,%ebx + addl %eax,%edi + rorl $7,%eax + movdqa %xmm5,%xmm7 + xorl %ecx,%esi + pslldq $12,%xmm1 + paddd %xmm5,%xmm5 + movl %edi,%ebp + addl 24(%esp),%edx + psrld $31,%xmm7 + xorl %ebx,%eax + roll $5,%edi + movdqa %xmm1,%xmm0 + addl %esi,%edx + andl %eax,%ebp + xorl %ebx,%eax + psrld $30,%xmm1 + addl %edi,%edx + rorl $7,%edi + por %xmm7,%xmm5 + xorl %ebx,%ebp + movl %edx,%esi + addl 28(%esp),%ecx + pslld $2,%xmm0 + xorl %eax,%edi + roll $5,%edx + pxor %xmm1,%xmm5 + movdqa 112(%esp),%xmm1 + addl %ebp,%ecx + andl %edi,%esi + pxor %xmm0,%xmm5 + pshufd $238,%xmm2,%xmm6 + xorl %eax,%edi + addl %edx,%ecx + rorl $7,%edx + xorl %eax,%esi + movl %ecx,%ebp + punpcklqdq %xmm3,%xmm6 + movdqa %xmm5,%xmm0 + addl 32(%esp),%ebx + xorl %edi,%edx + paddd %xmm5,%xmm1 + movdqa %xmm2,96(%esp) + roll $5,%ecx + addl %esi,%ebx + psrldq $4,%xmm0 + andl %edx,%ebp + xorl %edi,%edx + pxor %xmm2,%xmm6 + addl %ecx,%ebx + rorl $7,%ecx + pxor %xmm4,%xmm0 + xorl %edi,%ebp + movl %ebx,%esi + addl 36(%esp),%eax + pxor %xmm0,%xmm6 + xorl %edx,%ecx + roll $5,%ebx + movdqa %xmm1,16(%esp) + addl %ebp,%eax + andl %ecx,%esi + movdqa %xmm6,%xmm2 + xorl %edx,%ecx + addl %ebx,%eax + rorl $7,%ebx + movdqa %xmm6,%xmm0 + xorl %edx,%esi + pslldq $12,%xmm2 + paddd %xmm6,%xmm6 + movl %eax,%ebp + addl 40(%esp),%edi + psrld $31,%xmm0 + xorl %ecx,%ebx + roll $5,%eax + movdqa %xmm2,%xmm1 + addl %esi,%edi + andl %ebx,%ebp + xorl %ecx,%ebx + psrld $30,%xmm2 + addl %eax,%edi + rorl $7,%eax + por %xmm0,%xmm6 + xorl %ecx,%ebp + movdqa 64(%esp),%xmm0 + movl %edi,%esi + addl 44(%esp),%edx + pslld $2,%xmm1 + xorl %ebx,%eax + roll $5,%edi + pxor %xmm2,%xmm6 + movdqa 112(%esp),%xmm2 + addl %ebp,%edx + andl %eax,%esi + pxor %xmm1,%xmm6 + pshufd $238,%xmm3,%xmm7 + xorl %ebx,%eax + addl %edi,%edx + rorl $7,%edi + xorl %ebx,%esi + movl %edx,%ebp + punpcklqdq %xmm4,%xmm7 + movdqa %xmm6,%xmm1 + addl 48(%esp),%ecx + xorl %eax,%edi + paddd %xmm6,%xmm2 + movdqa %xmm3,64(%esp) + roll $5,%edx + addl %esi,%ecx + psrldq $4,%xmm1 + andl %edi,%ebp + xorl %eax,%edi + pxor %xmm3,%xmm7 + addl %edx,%ecx + rorl $7,%edx + pxor %xmm5,%xmm1 + xorl %eax,%ebp + movl %ecx,%esi + addl 52(%esp),%ebx + pxor %xmm1,%xmm7 + xorl %edi,%edx + roll $5,%ecx + movdqa %xmm2,32(%esp) + addl %ebp,%ebx + andl %edx,%esi + movdqa %xmm7,%xmm3 + xorl %edi,%edx + addl %ecx,%ebx + rorl $7,%ecx + movdqa %xmm7,%xmm1 + xorl %edi,%esi + pslldq $12,%xmm3 + paddd %xmm7,%xmm7 + movl %ebx,%ebp + addl 56(%esp),%eax + psrld $31,%xmm1 + xorl %edx,%ecx + roll $5,%ebx + movdqa %xmm3,%xmm2 + addl %esi,%eax + andl %ecx,%ebp + xorl %edx,%ecx + psrld $30,%xmm3 + addl %ebx,%eax + rorl $7,%ebx + por %xmm1,%xmm7 + xorl %edx,%ebp + movdqa 80(%esp),%xmm1 + movl %eax,%esi + addl 60(%esp),%edi + pslld $2,%xmm2 + xorl %ecx,%ebx + roll $5,%eax + pxor %xmm3,%xmm7 + movdqa 112(%esp),%xmm3 + addl %ebp,%edi + andl %ebx,%esi + pxor %xmm2,%xmm7 + pshufd $238,%xmm6,%xmm2 + xorl %ecx,%ebx + addl %eax,%edi + rorl $7,%eax + pxor %xmm4,%xmm0 + punpcklqdq %xmm7,%xmm2 + xorl %ecx,%esi + movl %edi,%ebp + addl (%esp),%edx + pxor %xmm1,%xmm0 + movdqa %xmm4,80(%esp) + xorl %ebx,%eax + roll $5,%edi + movdqa %xmm3,%xmm4 + addl %esi,%edx + paddd %xmm7,%xmm3 + andl %eax,%ebp + pxor %xmm2,%xmm0 + xorl %ebx,%eax + addl %edi,%edx + rorl $7,%edi + xorl %ebx,%ebp + movdqa %xmm0,%xmm2 + movdqa %xmm3,48(%esp) + movl %edx,%esi + addl 4(%esp),%ecx + xorl %eax,%edi + roll $5,%edx + pslld $2,%xmm0 + addl %ebp,%ecx + andl %edi,%esi + psrld $30,%xmm2 + xorl %eax,%edi + addl %edx,%ecx + rorl $7,%edx + xorl %eax,%esi + movl %ecx,%ebp + addl 8(%esp),%ebx + xorl %edi,%edx + roll $5,%ecx + por %xmm2,%xmm0 + addl %esi,%ebx + andl %edx,%ebp + movdqa 96(%esp),%xmm2 + xorl %edi,%edx + addl %ecx,%ebx + addl 12(%esp),%eax + xorl %edi,%ebp + movl %ebx,%esi + pshufd $238,%xmm7,%xmm3 + roll $5,%ebx + addl %ebp,%eax + xorl %edx,%esi + rorl $7,%ecx + addl %ebx,%eax + addl 16(%esp),%edi + pxor %xmm5,%xmm1 + punpcklqdq %xmm0,%xmm3 + xorl %ecx,%esi + movl %eax,%ebp + roll $5,%eax + pxor %xmm2,%xmm1 + movdqa %xmm5,96(%esp) + addl %esi,%edi + xorl %ecx,%ebp + movdqa %xmm4,%xmm5 + rorl $7,%ebx + paddd %xmm0,%xmm4 + addl %eax,%edi + pxor %xmm3,%xmm1 + addl 20(%esp),%edx + xorl %ebx,%ebp + movl %edi,%esi + roll $5,%edi + movdqa %xmm1,%xmm3 + movdqa %xmm4,(%esp) + addl %ebp,%edx + xorl %ebx,%esi + rorl $7,%eax + addl %edi,%edx + pslld $2,%xmm1 + addl 24(%esp),%ecx + xorl %eax,%esi + psrld $30,%xmm3 + movl %edx,%ebp + roll $5,%edx + addl %esi,%ecx + xorl %eax,%ebp + rorl $7,%edi + addl %edx,%ecx + por %xmm3,%xmm1 + addl 28(%esp),%ebx + xorl %edi,%ebp + movdqa 64(%esp),%xmm3 + movl %ecx,%esi + roll $5,%ecx + addl %ebp,%ebx + xorl %edi,%esi + rorl $7,%edx + pshufd $238,%xmm0,%xmm4 + addl %ecx,%ebx + addl 32(%esp),%eax + pxor %xmm6,%xmm2 + punpcklqdq %xmm1,%xmm4 + xorl %edx,%esi + movl %ebx,%ebp + roll $5,%ebx + pxor %xmm3,%xmm2 + movdqa %xmm6,64(%esp) + addl %esi,%eax + xorl %edx,%ebp + movdqa 128(%esp),%xmm6 + rorl $7,%ecx + paddd %xmm1,%xmm5 + addl %ebx,%eax + pxor %xmm4,%xmm2 + addl 36(%esp),%edi + xorl %ecx,%ebp + movl %eax,%esi + roll $5,%eax + movdqa %xmm2,%xmm4 + movdqa %xmm5,16(%esp) + addl %ebp,%edi + xorl %ecx,%esi + rorl $7,%ebx + addl %eax,%edi + pslld $2,%xmm2 + addl 40(%esp),%edx + xorl %ebx,%esi + psrld $30,%xmm4 + movl %edi,%ebp + roll $5,%edi + addl %esi,%edx + xorl %ebx,%ebp + rorl $7,%eax + addl %edi,%edx + por %xmm4,%xmm2 + addl 44(%esp),%ecx + xorl %eax,%ebp + movdqa 80(%esp),%xmm4 + movl %edx,%esi + roll $5,%edx + addl %ebp,%ecx + xorl %eax,%esi + rorl $7,%edi + pshufd $238,%xmm1,%xmm5 + addl %edx,%ecx + addl 48(%esp),%ebx + pxor %xmm7,%xmm3 + punpcklqdq %xmm2,%xmm5 + xorl %edi,%esi + movl %ecx,%ebp + roll $5,%ecx + pxor %xmm4,%xmm3 + movdqa %xmm7,80(%esp) + addl %esi,%ebx + xorl %edi,%ebp + movdqa %xmm6,%xmm7 + rorl $7,%edx + paddd %xmm2,%xmm6 + addl %ecx,%ebx + pxor %xmm5,%xmm3 + addl 52(%esp),%eax + xorl %edx,%ebp + movl %ebx,%esi + roll $5,%ebx + movdqa %xmm3,%xmm5 + movdqa %xmm6,32(%esp) + addl %ebp,%eax + xorl %edx,%esi + rorl $7,%ecx + addl %ebx,%eax + pslld $2,%xmm3 + addl 56(%esp),%edi + xorl %ecx,%esi + psrld $30,%xmm5 + movl %eax,%ebp + roll $5,%eax + addl %esi,%edi + xorl %ecx,%ebp + rorl $7,%ebx + addl %eax,%edi + por %xmm5,%xmm3 + addl 60(%esp),%edx + xorl %ebx,%ebp + movdqa 96(%esp),%xmm5 + movl %edi,%esi + roll $5,%edi + addl %ebp,%edx + xorl %ebx,%esi + rorl $7,%eax + pshufd $238,%xmm2,%xmm6 + addl %edi,%edx + addl (%esp),%ecx + pxor %xmm0,%xmm4 + punpcklqdq %xmm3,%xmm6 + xorl %eax,%esi + movl %edx,%ebp + roll $5,%edx + pxor %xmm5,%xmm4 + movdqa %xmm0,96(%esp) + addl %esi,%ecx + xorl %eax,%ebp + movdqa %xmm7,%xmm0 + rorl $7,%edi + paddd %xmm3,%xmm7 + addl %edx,%ecx + pxor %xmm6,%xmm4 + addl 4(%esp),%ebx + xorl %edi,%ebp + movl %ecx,%esi + roll $5,%ecx + movdqa %xmm4,%xmm6 + movdqa %xmm7,48(%esp) + addl %ebp,%ebx + xorl %edi,%esi + rorl $7,%edx + addl %ecx,%ebx + pslld $2,%xmm4 + addl 8(%esp),%eax + xorl %edx,%esi + psrld $30,%xmm6 + movl %ebx,%ebp + roll $5,%ebx + addl %esi,%eax + xorl %edx,%ebp + rorl $7,%ecx + addl %ebx,%eax + por %xmm6,%xmm4 + addl 12(%esp),%edi + xorl %ecx,%ebp + movdqa 64(%esp),%xmm6 + movl %eax,%esi + roll $5,%eax + addl %ebp,%edi + xorl %ecx,%esi + rorl $7,%ebx + pshufd $238,%xmm3,%xmm7 + addl %eax,%edi + addl 16(%esp),%edx + pxor %xmm1,%xmm5 + punpcklqdq %xmm4,%xmm7 + xorl %ebx,%esi + movl %edi,%ebp + roll $5,%edi + pxor %xmm6,%xmm5 + movdqa %xmm1,64(%esp) + addl %esi,%edx + xorl %ebx,%ebp + movdqa %xmm0,%xmm1 + rorl $7,%eax + paddd %xmm4,%xmm0 + addl %edi,%edx + pxor %xmm7,%xmm5 + addl 20(%esp),%ecx + xorl %eax,%ebp + movl %edx,%esi + roll $5,%edx + movdqa %xmm5,%xmm7 + movdqa %xmm0,(%esp) + addl %ebp,%ecx + xorl %eax,%esi + rorl $7,%edi + addl %edx,%ecx + pslld $2,%xmm5 + addl 24(%esp),%ebx + xorl %edi,%esi + psrld $30,%xmm7 + movl %ecx,%ebp + roll $5,%ecx + addl %esi,%ebx + xorl %edi,%ebp + rorl $7,%edx + addl %ecx,%ebx + por %xmm7,%xmm5 + addl 28(%esp),%eax + movdqa 80(%esp),%xmm7 + rorl $7,%ecx + movl %ebx,%esi + xorl %edx,%ebp + roll $5,%ebx + pshufd $238,%xmm4,%xmm0 + addl %ebp,%eax + xorl %ecx,%esi + xorl %edx,%ecx + addl %ebx,%eax + addl 32(%esp),%edi + pxor %xmm2,%xmm6 + punpcklqdq %xmm5,%xmm0 + andl %ecx,%esi + xorl %edx,%ecx + rorl $7,%ebx + pxor %xmm7,%xmm6 + movdqa %xmm2,80(%esp) + movl %eax,%ebp + xorl %ecx,%esi + roll $5,%eax + movdqa %xmm1,%xmm2 + addl %esi,%edi + paddd %xmm5,%xmm1 + xorl %ebx,%ebp + pxor %xmm0,%xmm6 + xorl %ecx,%ebx + addl %eax,%edi + addl 36(%esp),%edx + andl %ebx,%ebp + movdqa %xmm6,%xmm0 + movdqa %xmm1,16(%esp) + xorl %ecx,%ebx + rorl $7,%eax + movl %edi,%esi + xorl %ebx,%ebp + roll $5,%edi + pslld $2,%xmm6 + addl %ebp,%edx + xorl %eax,%esi + psrld $30,%xmm0 + xorl %ebx,%eax + addl %edi,%edx + addl 40(%esp),%ecx + andl %eax,%esi + xorl %ebx,%eax + rorl $7,%edi + por %xmm0,%xmm6 + movl %edx,%ebp + xorl %eax,%esi + movdqa 96(%esp),%xmm0 + roll $5,%edx + addl %esi,%ecx + xorl %edi,%ebp + xorl %eax,%edi + addl %edx,%ecx + pshufd $238,%xmm5,%xmm1 + addl 44(%esp),%ebx + andl %edi,%ebp + xorl %eax,%edi + rorl $7,%edx + movl %ecx,%esi + xorl %edi,%ebp + roll $5,%ecx + addl %ebp,%ebx + xorl %edx,%esi + xorl %edi,%edx + addl %ecx,%ebx + addl 48(%esp),%eax + pxor %xmm3,%xmm7 + punpcklqdq %xmm6,%xmm1 + andl %edx,%esi + xorl %edi,%edx + rorl $7,%ecx + pxor %xmm0,%xmm7 + movdqa %xmm3,96(%esp) + movl %ebx,%ebp + xorl %edx,%esi + roll $5,%ebx + movdqa 144(%esp),%xmm3 + addl %esi,%eax + paddd %xmm6,%xmm2 + xorl %ecx,%ebp + pxor %xmm1,%xmm7 + xorl %edx,%ecx + addl %ebx,%eax + addl 52(%esp),%edi + andl %ecx,%ebp + movdqa %xmm7,%xmm1 + movdqa %xmm2,32(%esp) + xorl %edx,%ecx + rorl $7,%ebx + movl %eax,%esi + xorl %ecx,%ebp + roll $5,%eax + pslld $2,%xmm7 + addl %ebp,%edi + xorl %ebx,%esi + psrld $30,%xmm1 + xorl %ecx,%ebx + addl %eax,%edi + addl 56(%esp),%edx + andl %ebx,%esi + xorl %ecx,%ebx + rorl $7,%eax + por %xmm1,%xmm7 + movl %edi,%ebp + xorl %ebx,%esi + movdqa 64(%esp),%xmm1 + roll $5,%edi + addl %esi,%edx + xorl %eax,%ebp + xorl %ebx,%eax + addl %edi,%edx + pshufd $238,%xmm6,%xmm2 + addl 60(%esp),%ecx + andl %eax,%ebp + xorl %ebx,%eax + rorl $7,%edi + movl %edx,%esi + xorl %eax,%ebp + roll $5,%edx + addl %ebp,%ecx + xorl %edi,%esi + xorl %eax,%edi + addl %edx,%ecx + addl (%esp),%ebx + pxor %xmm4,%xmm0 + punpcklqdq %xmm7,%xmm2 + andl %edi,%esi + xorl %eax,%edi + rorl $7,%edx + pxor %xmm1,%xmm0 + movdqa %xmm4,64(%esp) + movl %ecx,%ebp + xorl %edi,%esi + roll $5,%ecx + movdqa %xmm3,%xmm4 + addl %esi,%ebx + paddd %xmm7,%xmm3 + xorl %edx,%ebp + pxor %xmm2,%xmm0 + xorl %edi,%edx + addl %ecx,%ebx + addl 4(%esp),%eax + andl %edx,%ebp + movdqa %xmm0,%xmm2 + movdqa %xmm3,48(%esp) + xorl %edi,%edx + rorl $7,%ecx + movl %ebx,%esi + xorl %edx,%ebp + roll $5,%ebx + pslld $2,%xmm0 + addl %ebp,%eax + xorl %ecx,%esi + psrld $30,%xmm2 + xorl %edx,%ecx + addl %ebx,%eax + addl 8(%esp),%edi + andl %ecx,%esi + xorl %edx,%ecx + rorl $7,%ebx + por %xmm2,%xmm0 + movl %eax,%ebp + xorl %ecx,%esi + movdqa 80(%esp),%xmm2 + roll $5,%eax + addl %esi,%edi + xorl %ebx,%ebp + xorl %ecx,%ebx + addl %eax,%edi + pshufd $238,%xmm7,%xmm3 + addl 12(%esp),%edx + andl %ebx,%ebp + xorl %ecx,%ebx + rorl $7,%eax + movl %edi,%esi + xorl %ebx,%ebp + roll $5,%edi + addl %ebp,%edx + xorl %eax,%esi + xorl %ebx,%eax + addl %edi,%edx + addl 16(%esp),%ecx + pxor %xmm5,%xmm1 + punpcklqdq %xmm0,%xmm3 + andl %eax,%esi + xorl %ebx,%eax + rorl $7,%edi + pxor %xmm2,%xmm1 + movdqa %xmm5,80(%esp) + movl %edx,%ebp + xorl %eax,%esi + roll $5,%edx + movdqa %xmm4,%xmm5 + addl %esi,%ecx + paddd %xmm0,%xmm4 + xorl %edi,%ebp + pxor %xmm3,%xmm1 + xorl %eax,%edi + addl %edx,%ecx + addl 20(%esp),%ebx + andl %edi,%ebp + movdqa %xmm1,%xmm3 + movdqa %xmm4,(%esp) + xorl %eax,%edi + rorl $7,%edx + movl %ecx,%esi + xorl %edi,%ebp + roll $5,%ecx + pslld $2,%xmm1 + addl %ebp,%ebx + xorl %edx,%esi + psrld $30,%xmm3 + xorl %edi,%edx + addl %ecx,%ebx + addl 24(%esp),%eax + andl %edx,%esi + xorl %edi,%edx + rorl $7,%ecx + por %xmm3,%xmm1 + movl %ebx,%ebp + xorl %edx,%esi + movdqa 96(%esp),%xmm3 + roll $5,%ebx + addl %esi,%eax + xorl %ecx,%ebp + xorl %edx,%ecx + addl %ebx,%eax + pshufd $238,%xmm0,%xmm4 + addl 28(%esp),%edi + andl %ecx,%ebp + xorl %edx,%ecx + rorl $7,%ebx + movl %eax,%esi + xorl %ecx,%ebp + roll $5,%eax + addl %ebp,%edi + xorl %ebx,%esi + xorl %ecx,%ebx + addl %eax,%edi + addl 32(%esp),%edx + pxor %xmm6,%xmm2 + punpcklqdq %xmm1,%xmm4 + andl %ebx,%esi + xorl %ecx,%ebx + rorl $7,%eax + pxor %xmm3,%xmm2 + movdqa %xmm6,96(%esp) + movl %edi,%ebp + xorl %ebx,%esi + roll $5,%edi + movdqa %xmm5,%xmm6 + addl %esi,%edx + paddd %xmm1,%xmm5 + xorl %eax,%ebp + pxor %xmm4,%xmm2 + xorl %ebx,%eax + addl %edi,%edx + addl 36(%esp),%ecx + andl %eax,%ebp + movdqa %xmm2,%xmm4 + movdqa %xmm5,16(%esp) + xorl %ebx,%eax + rorl $7,%edi + movl %edx,%esi + xorl %eax,%ebp + roll $5,%edx + pslld $2,%xmm2 + addl %ebp,%ecx + xorl %edi,%esi + psrld $30,%xmm4 + xorl %eax,%edi + addl %edx,%ecx + addl 40(%esp),%ebx + andl %edi,%esi + xorl %eax,%edi + rorl $7,%edx + por %xmm4,%xmm2 + movl %ecx,%ebp + xorl %edi,%esi + movdqa 64(%esp),%xmm4 + roll $5,%ecx + addl %esi,%ebx + xorl %edx,%ebp + xorl %edi,%edx + addl %ecx,%ebx + pshufd $238,%xmm1,%xmm5 + addl 44(%esp),%eax + andl %edx,%ebp + xorl %edi,%edx + rorl $7,%ecx + movl %ebx,%esi + xorl %edx,%ebp + roll $5,%ebx + addl %ebp,%eax + xorl %edx,%esi + addl %ebx,%eax + addl 48(%esp),%edi + pxor %xmm7,%xmm3 + punpcklqdq %xmm2,%xmm5 + xorl %ecx,%esi + movl %eax,%ebp + roll $5,%eax + pxor %xmm4,%xmm3 + movdqa %xmm7,64(%esp) + addl %esi,%edi + xorl %ecx,%ebp + movdqa %xmm6,%xmm7 + rorl $7,%ebx + paddd %xmm2,%xmm6 + addl %eax,%edi + pxor %xmm5,%xmm3 + addl 52(%esp),%edx + xorl %ebx,%ebp + movl %edi,%esi + roll $5,%edi + movdqa %xmm3,%xmm5 + movdqa %xmm6,32(%esp) + addl %ebp,%edx + xorl %ebx,%esi + rorl $7,%eax + addl %edi,%edx + pslld $2,%xmm3 + addl 56(%esp),%ecx + xorl %eax,%esi + psrld $30,%xmm5 + movl %edx,%ebp + roll $5,%edx + addl %esi,%ecx + xorl %eax,%ebp + rorl $7,%edi + addl %edx,%ecx + por %xmm5,%xmm3 + addl 60(%esp),%ebx + xorl %edi,%ebp + movl %ecx,%esi + roll $5,%ecx + addl %ebp,%ebx + xorl %edi,%esi + rorl $7,%edx + addl %ecx,%ebx + addl (%esp),%eax + xorl %edx,%esi + movl %ebx,%ebp + roll $5,%ebx + addl %esi,%eax + xorl %edx,%ebp + rorl $7,%ecx + paddd %xmm3,%xmm7 + addl %ebx,%eax + addl 4(%esp),%edi + xorl %ecx,%ebp + movl %eax,%esi + movdqa %xmm7,48(%esp) + roll $5,%eax + addl %ebp,%edi + xorl %ecx,%esi + rorl $7,%ebx + addl %eax,%edi + addl 8(%esp),%edx + xorl %ebx,%esi + movl %edi,%ebp + roll $5,%edi + addl %esi,%edx + xorl %ebx,%ebp + rorl $7,%eax + addl %edi,%edx + addl 12(%esp),%ecx + xorl %eax,%ebp + movl %edx,%esi + roll $5,%edx + addl %ebp,%ecx + xorl %eax,%esi + rorl $7,%edi + addl %edx,%ecx + movl 196(%esp),%ebp + cmpl 200(%esp),%ebp + je L007done + movdqa 160(%esp),%xmm7 + movdqa 176(%esp),%xmm6 + movdqu (%ebp),%xmm0 + movdqu 16(%ebp),%xmm1 + movdqu 32(%ebp),%xmm2 + movdqu 48(%ebp),%xmm3 + addl $64,%ebp +.byte 102,15,56,0,198 + movl %ebp,196(%esp) + movdqa %xmm7,96(%esp) + addl 16(%esp),%ebx + xorl %edi,%esi + movl %ecx,%ebp + roll $5,%ecx + addl %esi,%ebx + xorl %edi,%ebp + rorl $7,%edx +.byte 102,15,56,0,206 + addl %ecx,%ebx + addl 20(%esp),%eax + xorl %edx,%ebp + movl %ebx,%esi + paddd %xmm7,%xmm0 + roll $5,%ebx + addl %ebp,%eax + xorl %edx,%esi + rorl $7,%ecx + movdqa %xmm0,(%esp) + addl %ebx,%eax + addl 24(%esp),%edi + xorl %ecx,%esi + movl %eax,%ebp + psubd %xmm7,%xmm0 + roll $5,%eax + addl %esi,%edi + xorl %ecx,%ebp + rorl $7,%ebx + addl %eax,%edi + addl 28(%esp),%edx + xorl %ebx,%ebp + movl %edi,%esi + roll $5,%edi + addl %ebp,%edx + xorl %ebx,%esi + rorl $7,%eax + addl %edi,%edx + addl 32(%esp),%ecx + xorl %eax,%esi + movl %edx,%ebp + roll $5,%edx + addl %esi,%ecx + xorl %eax,%ebp + rorl $7,%edi +.byte 102,15,56,0,214 + addl %edx,%ecx + addl 36(%esp),%ebx + xorl %edi,%ebp + movl %ecx,%esi + paddd %xmm7,%xmm1 + roll $5,%ecx + addl %ebp,%ebx + xorl %edi,%esi + rorl $7,%edx + movdqa %xmm1,16(%esp) + addl %ecx,%ebx + addl 40(%esp),%eax + xorl %edx,%esi + movl %ebx,%ebp + psubd %xmm7,%xmm1 + roll $5,%ebx + addl %esi,%eax + xorl %edx,%ebp + rorl $7,%ecx + addl %ebx,%eax + addl 44(%esp),%edi + xorl %ecx,%ebp + movl %eax,%esi + roll $5,%eax + addl %ebp,%edi + xorl %ecx,%esi + rorl $7,%ebx + addl %eax,%edi + addl 48(%esp),%edx + xorl %ebx,%esi + movl %edi,%ebp + roll $5,%edi + addl %esi,%edx + xorl %ebx,%ebp + rorl $7,%eax +.byte 102,15,56,0,222 + addl %edi,%edx + addl 52(%esp),%ecx + xorl %eax,%ebp + movl %edx,%esi + paddd %xmm7,%xmm2 + roll $5,%edx + addl %ebp,%ecx + xorl %eax,%esi + rorl $7,%edi + movdqa %xmm2,32(%esp) + addl %edx,%ecx + addl 56(%esp),%ebx + xorl %edi,%esi + movl %ecx,%ebp + psubd %xmm7,%xmm2 + roll $5,%ecx + addl %esi,%ebx + xorl %edi,%ebp + rorl $7,%edx + addl %ecx,%ebx + addl 60(%esp),%eax + xorl %edx,%ebp + movl %ebx,%esi + roll $5,%ebx + addl %ebp,%eax + rorl $7,%ecx + addl %ebx,%eax + movl 192(%esp),%ebp + addl (%ebp),%eax + addl 4(%ebp),%esi + addl 8(%ebp),%ecx + movl %eax,(%ebp) + addl 12(%ebp),%edx + movl %esi,4(%ebp) + addl 16(%ebp),%edi + movl %ecx,8(%ebp) + movl %ecx,%ebx + movl %edx,12(%ebp) + xorl %edx,%ebx + movl %edi,16(%ebp) + movl %esi,%ebp + pshufd $238,%xmm0,%xmm4 + andl %ebx,%esi + movl %ebp,%ebx + jmp L006loop +.align 4,0x90 +L007done: + addl 16(%esp),%ebx + xorl %edi,%esi + movl %ecx,%ebp + roll $5,%ecx + addl %esi,%ebx + xorl %edi,%ebp + rorl $7,%edx + addl %ecx,%ebx + addl 20(%esp),%eax + xorl %edx,%ebp + movl %ebx,%esi + roll $5,%ebx + addl %ebp,%eax + xorl %edx,%esi + rorl $7,%ecx + addl %ebx,%eax + addl 24(%esp),%edi + xorl %ecx,%esi + movl %eax,%ebp + roll $5,%eax + addl %esi,%edi + xorl %ecx,%ebp + rorl $7,%ebx + addl %eax,%edi + addl 28(%esp),%edx + xorl %ebx,%ebp + movl %edi,%esi + roll $5,%edi + addl %ebp,%edx + xorl %ebx,%esi + rorl $7,%eax + addl %edi,%edx + addl 32(%esp),%ecx + xorl %eax,%esi + movl %edx,%ebp + roll $5,%edx + addl %esi,%ecx + xorl %eax,%ebp + rorl $7,%edi + addl %edx,%ecx + addl 36(%esp),%ebx + xorl %edi,%ebp + movl %ecx,%esi + roll $5,%ecx + addl %ebp,%ebx + xorl %edi,%esi + rorl $7,%edx + addl %ecx,%ebx + addl 40(%esp),%eax + xorl %edx,%esi + movl %ebx,%ebp + roll $5,%ebx + addl %esi,%eax + xorl %edx,%ebp + rorl $7,%ecx + addl %ebx,%eax + addl 44(%esp),%edi + xorl %ecx,%ebp + movl %eax,%esi + roll $5,%eax + addl %ebp,%edi + xorl %ecx,%esi + rorl $7,%ebx + addl %eax,%edi + addl 48(%esp),%edx + xorl %ebx,%esi + movl %edi,%ebp + roll $5,%edi + addl %esi,%edx + xorl %ebx,%ebp + rorl $7,%eax + addl %edi,%edx + addl 52(%esp),%ecx + xorl %eax,%ebp + movl %edx,%esi + roll $5,%edx + addl %ebp,%ecx + xorl %eax,%esi + rorl $7,%edi + addl %edx,%ecx + addl 56(%esp),%ebx + xorl %edi,%esi + movl %ecx,%ebp + roll $5,%ecx + addl %esi,%ebx + xorl %edi,%ebp + rorl $7,%edx + addl %ecx,%ebx + addl 60(%esp),%eax + xorl %edx,%ebp + movl %ebx,%esi + roll $5,%ebx + addl %ebp,%eax + rorl $7,%ecx + addl %ebx,%eax + movl 192(%esp),%ebp + addl (%ebp),%eax + movl 204(%esp),%esp + addl 4(%ebp),%esi + addl 8(%ebp),%ecx + movl %eax,(%ebp) + addl 12(%ebp),%edx + movl %esi,4(%ebp) + addl 16(%ebp),%edi + movl %ecx,8(%ebp) + movl %edx,12(%ebp) + movl %edi,16(%ebp) + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.align 6,0x90 +LK_XX_XX: +.long 1518500249,1518500249,1518500249,1518500249 +.long 1859775393,1859775393,1859775393,1859775393 +.long 2400959708,2400959708,2400959708,2400959708 +.long 3395469782,3395469782,3395469782,3395469782 +.long 66051,67438087,134810123,202182159 +.byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0 +.byte 83,72,65,49,32,98,108,111,99,107,32,116,114,97,110,115 +.byte 102,111,114,109,32,102,111,114,32,120,56,54,44,32,67,82 +.byte 89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112 +.byte 114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 +.section __IMPORT,__pointers,non_lazy_symbol_pointers +L_OPENSSL_ia32cap_P$non_lazy_ptr: +.indirect_symbol _OPENSSL_ia32cap_P +.long 0 +.comm _OPENSSL_ia32cap_P,16,2 diff --git a/deps/openssl/asm_obsolete/x86-macosx-gas/sha/sha256-586.s b/deps/openssl/asm_obsolete/x86-macosx-gas/sha/sha256-586.s new file mode 100644 index 00000000000000..37f532aa5fb686 --- /dev/null +++ b/deps/openssl/asm_obsolete/x86-macosx-gas/sha/sha256-586.s @@ -0,0 +1,4579 @@ +.file "sha512-586.s" +.text +.globl _sha256_block_data_order +.align 4 +_sha256_block_data_order: +L_sha256_block_data_order_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 20(%esp),%esi + movl 24(%esp),%edi + movl 28(%esp),%eax + movl %esp,%ebx + call L000pic_point +L000pic_point: + popl %ebp + leal L001K256-L000pic_point(%ebp),%ebp + subl $16,%esp + andl $-64,%esp + shll $6,%eax + addl %edi,%eax + movl %esi,(%esp) + movl %edi,4(%esp) + movl %eax,8(%esp) + movl %ebx,12(%esp) + movl L_OPENSSL_ia32cap_P$non_lazy_ptr-L001K256(%ebp),%edx + movl (%edx),%ecx + movl 4(%edx),%ebx + testl $1048576,%ecx + jnz L002loop + movl 8(%edx),%edx + testl $16777216,%ecx + jz L003no_xmm + andl $1073741824,%ecx + andl $268435968,%ebx + testl $536870912,%edx + jnz L004shaext + orl %ebx,%ecx + andl $1342177280,%ecx + cmpl $1342177280,%ecx + testl $512,%ebx + jnz L005SSSE3 +L003no_xmm: + subl %edi,%eax + cmpl $256,%eax + jae L006unrolled + jmp L002loop +.align 4,0x90 +L002loop: + movl (%edi),%eax + movl 4(%edi),%ebx + movl 8(%edi),%ecx + bswap %eax + movl 12(%edi),%edx + bswap %ebx + pushl %eax + bswap %ecx + pushl %ebx + bswap %edx + pushl %ecx + pushl %edx + movl 16(%edi),%eax + movl 20(%edi),%ebx + movl 24(%edi),%ecx + bswap %eax + movl 28(%edi),%edx + bswap %ebx + pushl %eax + bswap %ecx + pushl %ebx + bswap %edx + pushl %ecx + pushl %edx + movl 32(%edi),%eax + movl 36(%edi),%ebx + movl 40(%edi),%ecx + bswap %eax + movl 44(%edi),%edx + bswap %ebx + pushl %eax + bswap %ecx + pushl %ebx + bswap %edx + pushl %ecx + pushl %edx + movl 48(%edi),%eax + movl 52(%edi),%ebx + movl 56(%edi),%ecx + bswap %eax + movl 60(%edi),%edx + bswap %ebx + pushl %eax + bswap %ecx + pushl %ebx + bswap %edx + pushl %ecx + pushl %edx + addl $64,%edi + leal -36(%esp),%esp + movl %edi,104(%esp) + movl (%esi),%eax + movl 4(%esi),%ebx + movl 8(%esi),%ecx + movl 12(%esi),%edi + movl %ebx,8(%esp) + xorl %ecx,%ebx + movl %ecx,12(%esp) + movl %edi,16(%esp) + movl %ebx,(%esp) + movl 16(%esi),%edx + movl 20(%esi),%ebx + movl 24(%esi),%ecx + movl 28(%esi),%edi + movl %ebx,24(%esp) + movl %ecx,28(%esp) + movl %edi,32(%esp) +.align 4,0x90 +L00700_15: + movl %edx,%ecx + movl 24(%esp),%esi + rorl $14,%ecx + movl 28(%esp),%edi + xorl %edx,%ecx + xorl %edi,%esi + movl 96(%esp),%ebx + rorl $5,%ecx + andl %edx,%esi + movl %edx,20(%esp) + xorl %ecx,%edx + addl 32(%esp),%ebx + xorl %edi,%esi + rorl $6,%edx + movl %eax,%ecx + addl %esi,%ebx + rorl $9,%ecx + addl %edx,%ebx + movl 8(%esp),%edi + xorl %eax,%ecx + movl %eax,4(%esp) + leal -4(%esp),%esp + rorl $11,%ecx + movl (%ebp),%esi + xorl %eax,%ecx + movl 20(%esp),%edx + xorl %edi,%eax + rorl $2,%ecx + addl %esi,%ebx + movl %eax,(%esp) + addl %ebx,%edx + andl 4(%esp),%eax + addl %ecx,%ebx + xorl %edi,%eax + addl $4,%ebp + addl %ebx,%eax + cmpl $3248222580,%esi + jne L00700_15 + movl 156(%esp),%ecx + jmp L00816_63 +.align 4,0x90 +L00816_63: + movl %ecx,%ebx + movl 104(%esp),%esi + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 160(%esp),%ebx + shrl $10,%edi + addl 124(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 24(%esp),%esi + rorl $14,%ecx + addl %edi,%ebx + movl 28(%esp),%edi + xorl %edx,%ecx + xorl %edi,%esi + movl %ebx,96(%esp) + rorl $5,%ecx + andl %edx,%esi + movl %edx,20(%esp) + xorl %ecx,%edx + addl 32(%esp),%ebx + xorl %edi,%esi + rorl $6,%edx + movl %eax,%ecx + addl %esi,%ebx + rorl $9,%ecx + addl %edx,%ebx + movl 8(%esp),%edi + xorl %eax,%ecx + movl %eax,4(%esp) + leal -4(%esp),%esp + rorl $11,%ecx + movl (%ebp),%esi + xorl %eax,%ecx + movl 20(%esp),%edx + xorl %edi,%eax + rorl $2,%ecx + addl %esi,%ebx + movl %eax,(%esp) + addl %ebx,%edx + andl 4(%esp),%eax + addl %ecx,%ebx + xorl %edi,%eax + movl 156(%esp),%ecx + addl $4,%ebp + addl %ebx,%eax + cmpl $3329325298,%esi + jne L00816_63 + movl 356(%esp),%esi + movl 8(%esp),%ebx + movl 16(%esp),%ecx + addl (%esi),%eax + addl 4(%esi),%ebx + addl 8(%esi),%edi + addl 12(%esi),%ecx + movl %eax,(%esi) + movl %ebx,4(%esi) + movl %edi,8(%esi) + movl %ecx,12(%esi) + movl 24(%esp),%eax + movl 28(%esp),%ebx + movl 32(%esp),%ecx + movl 360(%esp),%edi + addl 16(%esi),%edx + addl 20(%esi),%eax + addl 24(%esi),%ebx + addl 28(%esi),%ecx + movl %edx,16(%esi) + movl %eax,20(%esi) + movl %ebx,24(%esi) + movl %ecx,28(%esi) + leal 356(%esp),%esp + subl $256,%ebp + cmpl 8(%esp),%edi + jb L002loop + movl 12(%esp),%esp + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.align 6,0x90 +L001K256: +.long 1116352408,1899447441,3049323471,3921009573,961987163,1508970993,2453635748,2870763221,3624381080,310598401,607225278,1426881987,1925078388,2162078206,2614888103,3248222580,3835390401,4022224774,264347078,604807628,770255983,1249150122,1555081692,1996064986,2554220882,2821834349,2952996808,3210313671,3336571891,3584528711,113926993,338241895,666307205,773529912,1294757372,1396182291,1695183700,1986661051,2177026350,2456956037,2730485921,2820302411,3259730800,3345764771,3516065817,3600352804,4094571909,275423344,430227734,506948616,659060556,883997877,958139571,1322822218,1537002063,1747873779,1955562222,2024104815,2227730452,2361852424,2428436474,2756734187,3204031479,3329325298 +.long 66051,67438087,134810123,202182159 +.byte 83,72,65,50,53,54,32,98,108,111,99,107,32,116,114,97 +.byte 110,115,102,111,114,109,32,102,111,114,32,120,56,54,44,32 +.byte 67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97 +.byte 112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103 +.byte 62,0 +.align 4,0x90 +L006unrolled: + leal -96(%esp),%esp + movl (%esi),%eax + movl 4(%esi),%ebp + movl 8(%esi),%ecx + movl 12(%esi),%ebx + movl %ebp,4(%esp) + xorl %ecx,%ebp + movl %ecx,8(%esp) + movl %ebx,12(%esp) + movl 16(%esi),%edx + movl 20(%esi),%ebx + movl 24(%esi),%ecx + movl 28(%esi),%esi + movl %ebx,20(%esp) + movl %ecx,24(%esp) + movl %esi,28(%esp) + jmp L009grand_loop +.align 4,0x90 +L009grand_loop: + movl (%edi),%ebx + movl 4(%edi),%ecx + bswap %ebx + movl 8(%edi),%esi + bswap %ecx + movl %ebx,32(%esp) + bswap %esi + movl %ecx,36(%esp) + movl %esi,40(%esp) + movl 12(%edi),%ebx + movl 16(%edi),%ecx + bswap %ebx + movl 20(%edi),%esi + bswap %ecx + movl %ebx,44(%esp) + bswap %esi + movl %ecx,48(%esp) + movl %esi,52(%esp) + movl 24(%edi),%ebx + movl 28(%edi),%ecx + bswap %ebx + movl 32(%edi),%esi + bswap %ecx + movl %ebx,56(%esp) + bswap %esi + movl %ecx,60(%esp) + movl %esi,64(%esp) + movl 36(%edi),%ebx + movl 40(%edi),%ecx + bswap %ebx + movl 44(%edi),%esi + bswap %ecx + movl %ebx,68(%esp) + bswap %esi + movl %ecx,72(%esp) + movl %esi,76(%esp) + movl 48(%edi),%ebx + movl 52(%edi),%ecx + bswap %ebx + movl 56(%edi),%esi + bswap %ecx + movl %ebx,80(%esp) + bswap %esi + movl %ecx,84(%esp) + movl %esi,88(%esp) + movl 60(%edi),%ebx + addl $64,%edi + bswap %ebx + movl %edi,100(%esp) + movl %ebx,92(%esp) + movl %edx,%ecx + movl 20(%esp),%esi + rorl $14,%edx + movl 24(%esp),%edi + xorl %ecx,%edx + movl 32(%esp),%ebx + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,16(%esp) + xorl %ecx,%edx + addl 28(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 4(%esp),%edi + xorl %eax,%ecx + movl %eax,(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 1116352408(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + rorl $2,%ecx + addl %edx,%ebp + addl 12(%esp),%edx + addl %ecx,%ebp + movl %edx,%esi + movl 16(%esp),%ecx + rorl $14,%edx + movl 20(%esp),%edi + xorl %esi,%edx + movl 36(%esp),%ebx + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,12(%esp) + xorl %esi,%edx + addl 24(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl (%esp),%edi + xorl %ebp,%esi + movl %ebp,28(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 1899447441(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + rorl $2,%esi + addl %edx,%eax + addl 8(%esp),%edx + addl %esi,%eax + movl %edx,%ecx + movl 12(%esp),%esi + rorl $14,%edx + movl 16(%esp),%edi + xorl %ecx,%edx + movl 40(%esp),%ebx + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,8(%esp) + xorl %ecx,%edx + addl 20(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 28(%esp),%edi + xorl %eax,%ecx + movl %eax,24(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 3049323471(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + rorl $2,%ecx + addl %edx,%ebp + addl 4(%esp),%edx + addl %ecx,%ebp + movl %edx,%esi + movl 8(%esp),%ecx + rorl $14,%edx + movl 12(%esp),%edi + xorl %esi,%edx + movl 44(%esp),%ebx + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,4(%esp) + xorl %esi,%edx + addl 16(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 24(%esp),%edi + xorl %ebp,%esi + movl %ebp,20(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 3921009573(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + rorl $2,%esi + addl %edx,%eax + addl (%esp),%edx + addl %esi,%eax + movl %edx,%ecx + movl 4(%esp),%esi + rorl $14,%edx + movl 8(%esp),%edi + xorl %ecx,%edx + movl 48(%esp),%ebx + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,(%esp) + xorl %ecx,%edx + addl 12(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 20(%esp),%edi + xorl %eax,%ecx + movl %eax,16(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 961987163(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + rorl $2,%ecx + addl %edx,%ebp + addl 28(%esp),%edx + addl %ecx,%ebp + movl %edx,%esi + movl (%esp),%ecx + rorl $14,%edx + movl 4(%esp),%edi + xorl %esi,%edx + movl 52(%esp),%ebx + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,28(%esp) + xorl %esi,%edx + addl 8(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 16(%esp),%edi + xorl %ebp,%esi + movl %ebp,12(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 1508970993(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + rorl $2,%esi + addl %edx,%eax + addl 24(%esp),%edx + addl %esi,%eax + movl %edx,%ecx + movl 28(%esp),%esi + rorl $14,%edx + movl (%esp),%edi + xorl %ecx,%edx + movl 56(%esp),%ebx + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,24(%esp) + xorl %ecx,%edx + addl 4(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 12(%esp),%edi + xorl %eax,%ecx + movl %eax,8(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 2453635748(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + rorl $2,%ecx + addl %edx,%ebp + addl 20(%esp),%edx + addl %ecx,%ebp + movl %edx,%esi + movl 24(%esp),%ecx + rorl $14,%edx + movl 28(%esp),%edi + xorl %esi,%edx + movl 60(%esp),%ebx + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,20(%esp) + xorl %esi,%edx + addl (%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 8(%esp),%edi + xorl %ebp,%esi + movl %ebp,4(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 2870763221(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + rorl $2,%esi + addl %edx,%eax + addl 16(%esp),%edx + addl %esi,%eax + movl %edx,%ecx + movl 20(%esp),%esi + rorl $14,%edx + movl 24(%esp),%edi + xorl %ecx,%edx + movl 64(%esp),%ebx + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,16(%esp) + xorl %ecx,%edx + addl 28(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 4(%esp),%edi + xorl %eax,%ecx + movl %eax,(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 3624381080(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + rorl $2,%ecx + addl %edx,%ebp + addl 12(%esp),%edx + addl %ecx,%ebp + movl %edx,%esi + movl 16(%esp),%ecx + rorl $14,%edx + movl 20(%esp),%edi + xorl %esi,%edx + movl 68(%esp),%ebx + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,12(%esp) + xorl %esi,%edx + addl 24(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl (%esp),%edi + xorl %ebp,%esi + movl %ebp,28(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 310598401(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + rorl $2,%esi + addl %edx,%eax + addl 8(%esp),%edx + addl %esi,%eax + movl %edx,%ecx + movl 12(%esp),%esi + rorl $14,%edx + movl 16(%esp),%edi + xorl %ecx,%edx + movl 72(%esp),%ebx + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,8(%esp) + xorl %ecx,%edx + addl 20(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 28(%esp),%edi + xorl %eax,%ecx + movl %eax,24(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 607225278(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + rorl $2,%ecx + addl %edx,%ebp + addl 4(%esp),%edx + addl %ecx,%ebp + movl %edx,%esi + movl 8(%esp),%ecx + rorl $14,%edx + movl 12(%esp),%edi + xorl %esi,%edx + movl 76(%esp),%ebx + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,4(%esp) + xorl %esi,%edx + addl 16(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 24(%esp),%edi + xorl %ebp,%esi + movl %ebp,20(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 1426881987(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + rorl $2,%esi + addl %edx,%eax + addl (%esp),%edx + addl %esi,%eax + movl %edx,%ecx + movl 4(%esp),%esi + rorl $14,%edx + movl 8(%esp),%edi + xorl %ecx,%edx + movl 80(%esp),%ebx + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,(%esp) + xorl %ecx,%edx + addl 12(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 20(%esp),%edi + xorl %eax,%ecx + movl %eax,16(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 1925078388(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + rorl $2,%ecx + addl %edx,%ebp + addl 28(%esp),%edx + addl %ecx,%ebp + movl %edx,%esi + movl (%esp),%ecx + rorl $14,%edx + movl 4(%esp),%edi + xorl %esi,%edx + movl 84(%esp),%ebx + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,28(%esp) + xorl %esi,%edx + addl 8(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 16(%esp),%edi + xorl %ebp,%esi + movl %ebp,12(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 2162078206(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + rorl $2,%esi + addl %edx,%eax + addl 24(%esp),%edx + addl %esi,%eax + movl %edx,%ecx + movl 28(%esp),%esi + rorl $14,%edx + movl (%esp),%edi + xorl %ecx,%edx + movl 88(%esp),%ebx + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,24(%esp) + xorl %ecx,%edx + addl 4(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 12(%esp),%edi + xorl %eax,%ecx + movl %eax,8(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 2614888103(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + rorl $2,%ecx + addl %edx,%ebp + addl 20(%esp),%edx + addl %ecx,%ebp + movl %edx,%esi + movl 24(%esp),%ecx + rorl $14,%edx + movl 28(%esp),%edi + xorl %esi,%edx + movl 92(%esp),%ebx + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,20(%esp) + xorl %esi,%edx + addl (%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 8(%esp),%edi + xorl %ebp,%esi + movl %ebp,4(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 3248222580(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 36(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl 16(%esp),%edx + addl %esi,%eax + movl 88(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 32(%esp),%ebx + shrl $10,%edi + addl 68(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 20(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl 24(%esp),%edi + xorl %ecx,%edx + movl %ebx,32(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,16(%esp) + xorl %ecx,%edx + addl 28(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 4(%esp),%edi + xorl %eax,%ecx + movl %eax,(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 3835390401(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 40(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 12(%esp),%edx + addl %ecx,%ebp + movl 92(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 36(%esp),%ebx + shrl $10,%edi + addl 72(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl 16(%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 20(%esp),%edi + xorl %esi,%edx + movl %ebx,36(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,12(%esp) + xorl %esi,%edx + addl 24(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl (%esp),%edi + xorl %ebp,%esi + movl %ebp,28(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 4022224774(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 44(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl 8(%esp),%edx + addl %esi,%eax + movl 32(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 40(%esp),%ebx + shrl $10,%edi + addl 76(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 12(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl 16(%esp),%edi + xorl %ecx,%edx + movl %ebx,40(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,8(%esp) + xorl %ecx,%edx + addl 20(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 28(%esp),%edi + xorl %eax,%ecx + movl %eax,24(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 264347078(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 48(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 4(%esp),%edx + addl %ecx,%ebp + movl 36(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 44(%esp),%ebx + shrl $10,%edi + addl 80(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl 8(%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 12(%esp),%edi + xorl %esi,%edx + movl %ebx,44(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,4(%esp) + xorl %esi,%edx + addl 16(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 24(%esp),%edi + xorl %ebp,%esi + movl %ebp,20(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 604807628(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 52(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl (%esp),%edx + addl %esi,%eax + movl 40(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 48(%esp),%ebx + shrl $10,%edi + addl 84(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 4(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl 8(%esp),%edi + xorl %ecx,%edx + movl %ebx,48(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,(%esp) + xorl %ecx,%edx + addl 12(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 20(%esp),%edi + xorl %eax,%ecx + movl %eax,16(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 770255983(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 56(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 28(%esp),%edx + addl %ecx,%ebp + movl 44(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 52(%esp),%ebx + shrl $10,%edi + addl 88(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl (%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 4(%esp),%edi + xorl %esi,%edx + movl %ebx,52(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,28(%esp) + xorl %esi,%edx + addl 8(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 16(%esp),%edi + xorl %ebp,%esi + movl %ebp,12(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 1249150122(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 60(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl 24(%esp),%edx + addl %esi,%eax + movl 48(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 56(%esp),%ebx + shrl $10,%edi + addl 92(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 28(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl (%esp),%edi + xorl %ecx,%edx + movl %ebx,56(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,24(%esp) + xorl %ecx,%edx + addl 4(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 12(%esp),%edi + xorl %eax,%ecx + movl %eax,8(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 1555081692(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 64(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 20(%esp),%edx + addl %ecx,%ebp + movl 52(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 60(%esp),%ebx + shrl $10,%edi + addl 32(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl 24(%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 28(%esp),%edi + xorl %esi,%edx + movl %ebx,60(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,20(%esp) + xorl %esi,%edx + addl (%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 8(%esp),%edi + xorl %ebp,%esi + movl %ebp,4(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 1996064986(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 68(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl 16(%esp),%edx + addl %esi,%eax + movl 56(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 64(%esp),%ebx + shrl $10,%edi + addl 36(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 20(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl 24(%esp),%edi + xorl %ecx,%edx + movl %ebx,64(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,16(%esp) + xorl %ecx,%edx + addl 28(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 4(%esp),%edi + xorl %eax,%ecx + movl %eax,(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 2554220882(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 72(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 12(%esp),%edx + addl %ecx,%ebp + movl 60(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 68(%esp),%ebx + shrl $10,%edi + addl 40(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl 16(%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 20(%esp),%edi + xorl %esi,%edx + movl %ebx,68(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,12(%esp) + xorl %esi,%edx + addl 24(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl (%esp),%edi + xorl %ebp,%esi + movl %ebp,28(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 2821834349(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 76(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl 8(%esp),%edx + addl %esi,%eax + movl 64(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 72(%esp),%ebx + shrl $10,%edi + addl 44(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 12(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl 16(%esp),%edi + xorl %ecx,%edx + movl %ebx,72(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,8(%esp) + xorl %ecx,%edx + addl 20(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 28(%esp),%edi + xorl %eax,%ecx + movl %eax,24(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 2952996808(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 80(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 4(%esp),%edx + addl %ecx,%ebp + movl 68(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 76(%esp),%ebx + shrl $10,%edi + addl 48(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl 8(%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 12(%esp),%edi + xorl %esi,%edx + movl %ebx,76(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,4(%esp) + xorl %esi,%edx + addl 16(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 24(%esp),%edi + xorl %ebp,%esi + movl %ebp,20(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 3210313671(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 84(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl (%esp),%edx + addl %esi,%eax + movl 72(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 80(%esp),%ebx + shrl $10,%edi + addl 52(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 4(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl 8(%esp),%edi + xorl %ecx,%edx + movl %ebx,80(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,(%esp) + xorl %ecx,%edx + addl 12(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 20(%esp),%edi + xorl %eax,%ecx + movl %eax,16(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 3336571891(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 88(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 28(%esp),%edx + addl %ecx,%ebp + movl 76(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 84(%esp),%ebx + shrl $10,%edi + addl 56(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl (%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 4(%esp),%edi + xorl %esi,%edx + movl %ebx,84(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,28(%esp) + xorl %esi,%edx + addl 8(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 16(%esp),%edi + xorl %ebp,%esi + movl %ebp,12(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 3584528711(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 92(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl 24(%esp),%edx + addl %esi,%eax + movl 80(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 88(%esp),%ebx + shrl $10,%edi + addl 60(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 28(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl (%esp),%edi + xorl %ecx,%edx + movl %ebx,88(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,24(%esp) + xorl %ecx,%edx + addl 4(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 12(%esp),%edi + xorl %eax,%ecx + movl %eax,8(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 113926993(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 32(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 20(%esp),%edx + addl %ecx,%ebp + movl 84(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 92(%esp),%ebx + shrl $10,%edi + addl 64(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl 24(%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 28(%esp),%edi + xorl %esi,%edx + movl %ebx,92(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,20(%esp) + xorl %esi,%edx + addl (%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 8(%esp),%edi + xorl %ebp,%esi + movl %ebp,4(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 338241895(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 36(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl 16(%esp),%edx + addl %esi,%eax + movl 88(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 32(%esp),%ebx + shrl $10,%edi + addl 68(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 20(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl 24(%esp),%edi + xorl %ecx,%edx + movl %ebx,32(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,16(%esp) + xorl %ecx,%edx + addl 28(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 4(%esp),%edi + xorl %eax,%ecx + movl %eax,(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 666307205(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 40(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 12(%esp),%edx + addl %ecx,%ebp + movl 92(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 36(%esp),%ebx + shrl $10,%edi + addl 72(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl 16(%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 20(%esp),%edi + xorl %esi,%edx + movl %ebx,36(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,12(%esp) + xorl %esi,%edx + addl 24(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl (%esp),%edi + xorl %ebp,%esi + movl %ebp,28(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 773529912(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 44(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl 8(%esp),%edx + addl %esi,%eax + movl 32(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 40(%esp),%ebx + shrl $10,%edi + addl 76(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 12(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl 16(%esp),%edi + xorl %ecx,%edx + movl %ebx,40(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,8(%esp) + xorl %ecx,%edx + addl 20(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 28(%esp),%edi + xorl %eax,%ecx + movl %eax,24(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 1294757372(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 48(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 4(%esp),%edx + addl %ecx,%ebp + movl 36(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 44(%esp),%ebx + shrl $10,%edi + addl 80(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl 8(%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 12(%esp),%edi + xorl %esi,%edx + movl %ebx,44(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,4(%esp) + xorl %esi,%edx + addl 16(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 24(%esp),%edi + xorl %ebp,%esi + movl %ebp,20(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 1396182291(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 52(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl (%esp),%edx + addl %esi,%eax + movl 40(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 48(%esp),%ebx + shrl $10,%edi + addl 84(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 4(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl 8(%esp),%edi + xorl %ecx,%edx + movl %ebx,48(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,(%esp) + xorl %ecx,%edx + addl 12(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 20(%esp),%edi + xorl %eax,%ecx + movl %eax,16(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 1695183700(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 56(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 28(%esp),%edx + addl %ecx,%ebp + movl 44(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 52(%esp),%ebx + shrl $10,%edi + addl 88(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl (%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 4(%esp),%edi + xorl %esi,%edx + movl %ebx,52(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,28(%esp) + xorl %esi,%edx + addl 8(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 16(%esp),%edi + xorl %ebp,%esi + movl %ebp,12(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 1986661051(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 60(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl 24(%esp),%edx + addl %esi,%eax + movl 48(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 56(%esp),%ebx + shrl $10,%edi + addl 92(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 28(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl (%esp),%edi + xorl %ecx,%edx + movl %ebx,56(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,24(%esp) + xorl %ecx,%edx + addl 4(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 12(%esp),%edi + xorl %eax,%ecx + movl %eax,8(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 2177026350(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 64(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 20(%esp),%edx + addl %ecx,%ebp + movl 52(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 60(%esp),%ebx + shrl $10,%edi + addl 32(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl 24(%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 28(%esp),%edi + xorl %esi,%edx + movl %ebx,60(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,20(%esp) + xorl %esi,%edx + addl (%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 8(%esp),%edi + xorl %ebp,%esi + movl %ebp,4(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 2456956037(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 68(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl 16(%esp),%edx + addl %esi,%eax + movl 56(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 64(%esp),%ebx + shrl $10,%edi + addl 36(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 20(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl 24(%esp),%edi + xorl %ecx,%edx + movl %ebx,64(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,16(%esp) + xorl %ecx,%edx + addl 28(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 4(%esp),%edi + xorl %eax,%ecx + movl %eax,(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 2730485921(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 72(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 12(%esp),%edx + addl %ecx,%ebp + movl 60(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 68(%esp),%ebx + shrl $10,%edi + addl 40(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl 16(%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 20(%esp),%edi + xorl %esi,%edx + movl %ebx,68(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,12(%esp) + xorl %esi,%edx + addl 24(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl (%esp),%edi + xorl %ebp,%esi + movl %ebp,28(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 2820302411(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 76(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl 8(%esp),%edx + addl %esi,%eax + movl 64(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 72(%esp),%ebx + shrl $10,%edi + addl 44(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 12(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl 16(%esp),%edi + xorl %ecx,%edx + movl %ebx,72(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,8(%esp) + xorl %ecx,%edx + addl 20(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 28(%esp),%edi + xorl %eax,%ecx + movl %eax,24(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 3259730800(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 80(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 4(%esp),%edx + addl %ecx,%ebp + movl 68(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 76(%esp),%ebx + shrl $10,%edi + addl 48(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl 8(%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 12(%esp),%edi + xorl %esi,%edx + movl %ebx,76(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,4(%esp) + xorl %esi,%edx + addl 16(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 24(%esp),%edi + xorl %ebp,%esi + movl %ebp,20(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 3345764771(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 84(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl (%esp),%edx + addl %esi,%eax + movl 72(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 80(%esp),%ebx + shrl $10,%edi + addl 52(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 4(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl 8(%esp),%edi + xorl %ecx,%edx + movl %ebx,80(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,(%esp) + xorl %ecx,%edx + addl 12(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 20(%esp),%edi + xorl %eax,%ecx + movl %eax,16(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 3516065817(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 88(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 28(%esp),%edx + addl %ecx,%ebp + movl 76(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 84(%esp),%ebx + shrl $10,%edi + addl 56(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl (%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 4(%esp),%edi + xorl %esi,%edx + movl %ebx,84(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,28(%esp) + xorl %esi,%edx + addl 8(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 16(%esp),%edi + xorl %ebp,%esi + movl %ebp,12(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 3600352804(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 92(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl 24(%esp),%edx + addl %esi,%eax + movl 80(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 88(%esp),%ebx + shrl $10,%edi + addl 60(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 28(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl (%esp),%edi + xorl %ecx,%edx + movl %ebx,88(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,24(%esp) + xorl %ecx,%edx + addl 4(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 12(%esp),%edi + xorl %eax,%ecx + movl %eax,8(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 4094571909(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 32(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 20(%esp),%edx + addl %ecx,%ebp + movl 84(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 92(%esp),%ebx + shrl $10,%edi + addl 64(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl 24(%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 28(%esp),%edi + xorl %esi,%edx + movl %ebx,92(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,20(%esp) + xorl %esi,%edx + addl (%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 8(%esp),%edi + xorl %ebp,%esi + movl %ebp,4(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 275423344(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 36(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl 16(%esp),%edx + addl %esi,%eax + movl 88(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 32(%esp),%ebx + shrl $10,%edi + addl 68(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 20(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl 24(%esp),%edi + xorl %ecx,%edx + movl %ebx,32(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,16(%esp) + xorl %ecx,%edx + addl 28(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 4(%esp),%edi + xorl %eax,%ecx + movl %eax,(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 430227734(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 40(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 12(%esp),%edx + addl %ecx,%ebp + movl 92(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 36(%esp),%ebx + shrl $10,%edi + addl 72(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl 16(%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 20(%esp),%edi + xorl %esi,%edx + movl %ebx,36(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,12(%esp) + xorl %esi,%edx + addl 24(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl (%esp),%edi + xorl %ebp,%esi + movl %ebp,28(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 506948616(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 44(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl 8(%esp),%edx + addl %esi,%eax + movl 32(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 40(%esp),%ebx + shrl $10,%edi + addl 76(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 12(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl 16(%esp),%edi + xorl %ecx,%edx + movl %ebx,40(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,8(%esp) + xorl %ecx,%edx + addl 20(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 28(%esp),%edi + xorl %eax,%ecx + movl %eax,24(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 659060556(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 48(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 4(%esp),%edx + addl %ecx,%ebp + movl 36(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 44(%esp),%ebx + shrl $10,%edi + addl 80(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl 8(%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 12(%esp),%edi + xorl %esi,%edx + movl %ebx,44(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,4(%esp) + xorl %esi,%edx + addl 16(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 24(%esp),%edi + xorl %ebp,%esi + movl %ebp,20(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 883997877(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 52(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl (%esp),%edx + addl %esi,%eax + movl 40(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 48(%esp),%ebx + shrl $10,%edi + addl 84(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 4(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl 8(%esp),%edi + xorl %ecx,%edx + movl %ebx,48(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,(%esp) + xorl %ecx,%edx + addl 12(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 20(%esp),%edi + xorl %eax,%ecx + movl %eax,16(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 958139571(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 56(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 28(%esp),%edx + addl %ecx,%ebp + movl 44(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 52(%esp),%ebx + shrl $10,%edi + addl 88(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl (%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 4(%esp),%edi + xorl %esi,%edx + movl %ebx,52(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,28(%esp) + xorl %esi,%edx + addl 8(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 16(%esp),%edi + xorl %ebp,%esi + movl %ebp,12(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 1322822218(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 60(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl 24(%esp),%edx + addl %esi,%eax + movl 48(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 56(%esp),%ebx + shrl $10,%edi + addl 92(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 28(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl (%esp),%edi + xorl %ecx,%edx + movl %ebx,56(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,24(%esp) + xorl %ecx,%edx + addl 4(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 12(%esp),%edi + xorl %eax,%ecx + movl %eax,8(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 1537002063(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 64(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 20(%esp),%edx + addl %ecx,%ebp + movl 52(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 60(%esp),%ebx + shrl $10,%edi + addl 32(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl 24(%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 28(%esp),%edi + xorl %esi,%edx + movl %ebx,60(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,20(%esp) + xorl %esi,%edx + addl (%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 8(%esp),%edi + xorl %ebp,%esi + movl %ebp,4(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 1747873779(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 68(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl 16(%esp),%edx + addl %esi,%eax + movl 56(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 64(%esp),%ebx + shrl $10,%edi + addl 36(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 20(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl 24(%esp),%edi + xorl %ecx,%edx + movl %ebx,64(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,16(%esp) + xorl %ecx,%edx + addl 28(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 4(%esp),%edi + xorl %eax,%ecx + movl %eax,(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 1955562222(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 72(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 12(%esp),%edx + addl %ecx,%ebp + movl 60(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 68(%esp),%ebx + shrl $10,%edi + addl 40(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl 16(%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 20(%esp),%edi + xorl %esi,%edx + movl %ebx,68(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,12(%esp) + xorl %esi,%edx + addl 24(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl (%esp),%edi + xorl %ebp,%esi + movl %ebp,28(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 2024104815(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 76(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl 8(%esp),%edx + addl %esi,%eax + movl 64(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 72(%esp),%ebx + shrl $10,%edi + addl 44(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 12(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl 16(%esp),%edi + xorl %ecx,%edx + movl %ebx,72(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,8(%esp) + xorl %ecx,%edx + addl 20(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 28(%esp),%edi + xorl %eax,%ecx + movl %eax,24(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 2227730452(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 80(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 4(%esp),%edx + addl %ecx,%ebp + movl 68(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 76(%esp),%ebx + shrl $10,%edi + addl 48(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl 8(%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 12(%esp),%edi + xorl %esi,%edx + movl %ebx,76(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,4(%esp) + xorl %esi,%edx + addl 16(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 24(%esp),%edi + xorl %ebp,%esi + movl %ebp,20(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 2361852424(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 84(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl (%esp),%edx + addl %esi,%eax + movl 72(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 80(%esp),%ebx + shrl $10,%edi + addl 52(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 4(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl 8(%esp),%edi + xorl %ecx,%edx + movl %ebx,80(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,(%esp) + xorl %ecx,%edx + addl 12(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 20(%esp),%edi + xorl %eax,%ecx + movl %eax,16(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 2428436474(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 88(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 28(%esp),%edx + addl %ecx,%ebp + movl 76(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 84(%esp),%ebx + shrl $10,%edi + addl 56(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl (%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 4(%esp),%edi + xorl %esi,%edx + movl %ebx,84(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,28(%esp) + xorl %esi,%edx + addl 8(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 16(%esp),%edi + xorl %ebp,%esi + movl %ebp,12(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 2756734187(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 92(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl 24(%esp),%edx + addl %esi,%eax + movl 80(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 88(%esp),%ebx + shrl $10,%edi + addl 60(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 28(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl (%esp),%edi + xorl %ecx,%edx + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,24(%esp) + xorl %ecx,%edx + addl 4(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 12(%esp),%edi + xorl %eax,%ecx + movl %eax,8(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 3204031479(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 32(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 20(%esp),%edx + addl %ecx,%ebp + movl 84(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 92(%esp),%ebx + shrl $10,%edi + addl 64(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl 24(%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 28(%esp),%edi + xorl %esi,%edx + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,20(%esp) + xorl %esi,%edx + addl (%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 8(%esp),%edi + xorl %ebp,%esi + movl %ebp,4(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 3329325298(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + rorl $2,%esi + addl %edx,%eax + addl 16(%esp),%edx + addl %esi,%eax + movl 96(%esp),%esi + xorl %edi,%ebp + movl 12(%esp),%ecx + addl (%esi),%eax + addl 4(%esi),%ebp + addl 8(%esi),%edi + addl 12(%esi),%ecx + movl %eax,(%esi) + movl %ebp,4(%esi) + movl %edi,8(%esi) + movl %ecx,12(%esi) + movl %ebp,4(%esp) + xorl %edi,%ebp + movl %edi,8(%esp) + movl %ecx,12(%esp) + movl 20(%esp),%edi + movl 24(%esp),%ebx + movl 28(%esp),%ecx + addl 16(%esi),%edx + addl 20(%esi),%edi + addl 24(%esi),%ebx + addl 28(%esi),%ecx + movl %edx,16(%esi) + movl %edi,20(%esi) + movl %ebx,24(%esi) + movl %ecx,28(%esi) + movl %edi,20(%esp) + movl 100(%esp),%edi + movl %ebx,24(%esp) + movl %ecx,28(%esp) + cmpl 104(%esp),%edi + jb L009grand_loop + movl 108(%esp),%esp + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.align 5,0x90 +L004shaext: + subl $32,%esp + movdqu (%esi),%xmm1 + leal 128(%ebp),%ebp + movdqu 16(%esi),%xmm2 + movdqa 128(%ebp),%xmm7 + pshufd $27,%xmm1,%xmm0 + pshufd $177,%xmm1,%xmm1 + pshufd $27,%xmm2,%xmm2 +.byte 102,15,58,15,202,8 + punpcklqdq %xmm0,%xmm2 + jmp L010loop_shaext +.align 4,0x90 +L010loop_shaext: + movdqu (%edi),%xmm3 + movdqu 16(%edi),%xmm4 + movdqu 32(%edi),%xmm5 +.byte 102,15,56,0,223 + movdqu 48(%edi),%xmm6 + movdqa %xmm2,16(%esp) + movdqa -128(%ebp),%xmm0 + paddd %xmm3,%xmm0 +.byte 102,15,56,0,231 +.byte 15,56,203,209 + pshufd $14,%xmm0,%xmm0 + nop + movdqa %xmm1,(%esp) +.byte 15,56,203,202 + movdqa -112(%ebp),%xmm0 + paddd %xmm4,%xmm0 +.byte 102,15,56,0,239 +.byte 15,56,203,209 + pshufd $14,%xmm0,%xmm0 + leal 64(%edi),%edi +.byte 15,56,204,220 +.byte 15,56,203,202 + movdqa -96(%ebp),%xmm0 + paddd %xmm5,%xmm0 +.byte 102,15,56,0,247 +.byte 15,56,203,209 + pshufd $14,%xmm0,%xmm0 + movdqa %xmm6,%xmm7 +.byte 102,15,58,15,253,4 + nop + paddd %xmm7,%xmm3 +.byte 15,56,204,229 +.byte 15,56,203,202 + movdqa -80(%ebp),%xmm0 + paddd %xmm6,%xmm0 +.byte 15,56,205,222 +.byte 15,56,203,209 + pshufd $14,%xmm0,%xmm0 + movdqa %xmm3,%xmm7 +.byte 102,15,58,15,254,4 + nop + paddd %xmm7,%xmm4 +.byte 15,56,204,238 +.byte 15,56,203,202 + movdqa -64(%ebp),%xmm0 + paddd %xmm3,%xmm0 +.byte 15,56,205,227 +.byte 15,56,203,209 + pshufd $14,%xmm0,%xmm0 + movdqa %xmm4,%xmm7 +.byte 102,15,58,15,251,4 + nop + paddd %xmm7,%xmm5 +.byte 15,56,204,243 +.byte 15,56,203,202 + movdqa -48(%ebp),%xmm0 + paddd %xmm4,%xmm0 +.byte 15,56,205,236 +.byte 15,56,203,209 + pshufd $14,%xmm0,%xmm0 + movdqa %xmm5,%xmm7 +.byte 102,15,58,15,252,4 + nop + paddd %xmm7,%xmm6 +.byte 15,56,204,220 +.byte 15,56,203,202 + movdqa -32(%ebp),%xmm0 + paddd %xmm5,%xmm0 +.byte 15,56,205,245 +.byte 15,56,203,209 + pshufd $14,%xmm0,%xmm0 + movdqa %xmm6,%xmm7 +.byte 102,15,58,15,253,4 + nop + paddd %xmm7,%xmm3 +.byte 15,56,204,229 +.byte 15,56,203,202 + movdqa -16(%ebp),%xmm0 + paddd %xmm6,%xmm0 +.byte 15,56,205,222 +.byte 15,56,203,209 + pshufd $14,%xmm0,%xmm0 + movdqa %xmm3,%xmm7 +.byte 102,15,58,15,254,4 + nop + paddd %xmm7,%xmm4 +.byte 15,56,204,238 +.byte 15,56,203,202 + movdqa (%ebp),%xmm0 + paddd %xmm3,%xmm0 +.byte 15,56,205,227 +.byte 15,56,203,209 + pshufd $14,%xmm0,%xmm0 + movdqa %xmm4,%xmm7 +.byte 102,15,58,15,251,4 + nop + paddd %xmm7,%xmm5 +.byte 15,56,204,243 +.byte 15,56,203,202 + movdqa 16(%ebp),%xmm0 + paddd %xmm4,%xmm0 +.byte 15,56,205,236 +.byte 15,56,203,209 + pshufd $14,%xmm0,%xmm0 + movdqa %xmm5,%xmm7 +.byte 102,15,58,15,252,4 + nop + paddd %xmm7,%xmm6 +.byte 15,56,204,220 +.byte 15,56,203,202 + movdqa 32(%ebp),%xmm0 + paddd %xmm5,%xmm0 +.byte 15,56,205,245 +.byte 15,56,203,209 + pshufd $14,%xmm0,%xmm0 + movdqa %xmm6,%xmm7 +.byte 102,15,58,15,253,4 + nop + paddd %xmm7,%xmm3 +.byte 15,56,204,229 +.byte 15,56,203,202 + movdqa 48(%ebp),%xmm0 + paddd %xmm6,%xmm0 +.byte 15,56,205,222 +.byte 15,56,203,209 + pshufd $14,%xmm0,%xmm0 + movdqa %xmm3,%xmm7 +.byte 102,15,58,15,254,4 + nop + paddd %xmm7,%xmm4 +.byte 15,56,204,238 +.byte 15,56,203,202 + movdqa 64(%ebp),%xmm0 + paddd %xmm3,%xmm0 +.byte 15,56,205,227 +.byte 15,56,203,209 + pshufd $14,%xmm0,%xmm0 + movdqa %xmm4,%xmm7 +.byte 102,15,58,15,251,4 + nop + paddd %xmm7,%xmm5 +.byte 15,56,204,243 +.byte 15,56,203,202 + movdqa 80(%ebp),%xmm0 + paddd %xmm4,%xmm0 +.byte 15,56,205,236 +.byte 15,56,203,209 + pshufd $14,%xmm0,%xmm0 + movdqa %xmm5,%xmm7 +.byte 102,15,58,15,252,4 +.byte 15,56,203,202 + paddd %xmm7,%xmm6 + movdqa 96(%ebp),%xmm0 + paddd %xmm5,%xmm0 +.byte 15,56,203,209 + pshufd $14,%xmm0,%xmm0 +.byte 15,56,205,245 + movdqa 128(%ebp),%xmm7 +.byte 15,56,203,202 + movdqa 112(%ebp),%xmm0 + paddd %xmm6,%xmm0 + nop +.byte 15,56,203,209 + pshufd $14,%xmm0,%xmm0 + cmpl %edi,%eax + nop +.byte 15,56,203,202 + paddd 16(%esp),%xmm2 + paddd (%esp),%xmm1 + jnz L010loop_shaext + pshufd $177,%xmm2,%xmm2 + pshufd $27,%xmm1,%xmm7 + pshufd $177,%xmm1,%xmm1 + punpckhqdq %xmm2,%xmm1 +.byte 102,15,58,15,215,8 + movl 44(%esp),%esp + movdqu %xmm1,(%esi) + movdqu %xmm2,16(%esi) + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.align 5,0x90 +L005SSSE3: + leal -96(%esp),%esp + movl (%esi),%eax + movl 4(%esi),%ebx + movl 8(%esi),%ecx + movl 12(%esi),%edi + movl %ebx,4(%esp) + xorl %ecx,%ebx + movl %ecx,8(%esp) + movl %edi,12(%esp) + movl 16(%esi),%edx + movl 20(%esi),%edi + movl 24(%esi),%ecx + movl 28(%esi),%esi + movl %edi,20(%esp) + movl 100(%esp),%edi + movl %ecx,24(%esp) + movl %esi,28(%esp) + movdqa 256(%ebp),%xmm7 + jmp L011grand_ssse3 +.align 4,0x90 +L011grand_ssse3: + movdqu (%edi),%xmm0 + movdqu 16(%edi),%xmm1 + movdqu 32(%edi),%xmm2 + movdqu 48(%edi),%xmm3 + addl $64,%edi +.byte 102,15,56,0,199 + movl %edi,100(%esp) +.byte 102,15,56,0,207 + movdqa (%ebp),%xmm4 +.byte 102,15,56,0,215 + movdqa 16(%ebp),%xmm5 + paddd %xmm0,%xmm4 +.byte 102,15,56,0,223 + movdqa 32(%ebp),%xmm6 + paddd %xmm1,%xmm5 + movdqa 48(%ebp),%xmm7 + movdqa %xmm4,32(%esp) + paddd %xmm2,%xmm6 + movdqa %xmm5,48(%esp) + paddd %xmm3,%xmm7 + movdqa %xmm6,64(%esp) + movdqa %xmm7,80(%esp) + jmp L012ssse3_00_47 +.align 4,0x90 +L012ssse3_00_47: + addl $64,%ebp + movl %edx,%ecx + movdqa %xmm1,%xmm4 + rorl $14,%edx + movl 20(%esp),%esi + movdqa %xmm3,%xmm7 + xorl %ecx,%edx + movl 24(%esp),%edi +.byte 102,15,58,15,224,4 + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi +.byte 102,15,58,15,250,4 + movl %ecx,16(%esp) + xorl %ecx,%edx + xorl %esi,%edi + movdqa %xmm4,%xmm5 + rorl $6,%edx + movl %eax,%ecx + movdqa %xmm4,%xmm6 + addl %edi,%edx + movl 4(%esp),%edi + psrld $3,%xmm4 + movl %eax,%esi + rorl $9,%ecx + paddd %xmm7,%xmm0 + movl %eax,(%esp) + xorl %eax,%ecx + psrld $7,%xmm6 + xorl %edi,%eax + addl 28(%esp),%edx + rorl $11,%ecx + andl %eax,%ebx + pshufd $250,%xmm3,%xmm7 + xorl %esi,%ecx + addl 32(%esp),%edx + pslld $14,%xmm5 + xorl %edi,%ebx + rorl $2,%ecx + pxor %xmm6,%xmm4 + addl %edx,%ebx + addl 12(%esp),%edx + psrld $11,%xmm6 + addl %ecx,%ebx + movl %edx,%ecx + rorl $14,%edx + pxor %xmm5,%xmm4 + movl 16(%esp),%esi + xorl %ecx,%edx + pslld $11,%xmm5 + movl 20(%esp),%edi + xorl %edi,%esi + rorl $5,%edx + pxor %xmm6,%xmm4 + andl %ecx,%esi + movl %ecx,12(%esp) + movdqa %xmm7,%xmm6 + xorl %ecx,%edx + xorl %esi,%edi + rorl $6,%edx + pxor %xmm5,%xmm4 + movl %ebx,%ecx + addl %edi,%edx + psrld $10,%xmm7 + movl (%esp),%edi + movl %ebx,%esi + rorl $9,%ecx + paddd %xmm4,%xmm0 + movl %ebx,28(%esp) + xorl %ebx,%ecx + psrlq $17,%xmm6 + xorl %edi,%ebx + addl 24(%esp),%edx + rorl $11,%ecx + pxor %xmm6,%xmm7 + andl %ebx,%eax + xorl %esi,%ecx + psrlq $2,%xmm6 + addl 36(%esp),%edx + xorl %edi,%eax + rorl $2,%ecx + pxor %xmm6,%xmm7 + addl %edx,%eax + addl 8(%esp),%edx + pshufd $128,%xmm7,%xmm7 + addl %ecx,%eax + movl %edx,%ecx + rorl $14,%edx + movl 12(%esp),%esi + xorl %ecx,%edx + movl 16(%esp),%edi + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + psrldq $8,%xmm7 + movl %ecx,8(%esp) + xorl %ecx,%edx + xorl %esi,%edi + paddd %xmm7,%xmm0 + rorl $6,%edx + movl %eax,%ecx + addl %edi,%edx + movl 28(%esp),%edi + movl %eax,%esi + rorl $9,%ecx + movl %eax,24(%esp) + pshufd $80,%xmm0,%xmm7 + xorl %eax,%ecx + xorl %edi,%eax + addl 20(%esp),%edx + movdqa %xmm7,%xmm6 + rorl $11,%ecx + psrld $10,%xmm7 + andl %eax,%ebx + psrlq $17,%xmm6 + xorl %esi,%ecx + addl 40(%esp),%edx + xorl %edi,%ebx + rorl $2,%ecx + pxor %xmm6,%xmm7 + addl %edx,%ebx + addl 4(%esp),%edx + psrlq $2,%xmm6 + addl %ecx,%ebx + movl %edx,%ecx + rorl $14,%edx + pxor %xmm6,%xmm7 + movl 8(%esp),%esi + xorl %ecx,%edx + movl 12(%esp),%edi + pshufd $8,%xmm7,%xmm7 + xorl %edi,%esi + rorl $5,%edx + movdqa (%ebp),%xmm6 + andl %ecx,%esi + movl %ecx,4(%esp) + pslldq $8,%xmm7 + xorl %ecx,%edx + xorl %esi,%edi + rorl $6,%edx + movl %ebx,%ecx + addl %edi,%edx + movl 24(%esp),%edi + movl %ebx,%esi + rorl $9,%ecx + paddd %xmm7,%xmm0 + movl %ebx,20(%esp) + xorl %ebx,%ecx + xorl %edi,%ebx + addl 16(%esp),%edx + paddd %xmm0,%xmm6 + rorl $11,%ecx + andl %ebx,%eax + xorl %esi,%ecx + addl 44(%esp),%edx + xorl %edi,%eax + rorl $2,%ecx + addl %edx,%eax + addl (%esp),%edx + addl %ecx,%eax + movdqa %xmm6,32(%esp) + movl %edx,%ecx + movdqa %xmm2,%xmm4 + rorl $14,%edx + movl 4(%esp),%esi + movdqa %xmm0,%xmm7 + xorl %ecx,%edx + movl 8(%esp),%edi +.byte 102,15,58,15,225,4 + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi +.byte 102,15,58,15,251,4 + movl %ecx,(%esp) + xorl %ecx,%edx + xorl %esi,%edi + movdqa %xmm4,%xmm5 + rorl $6,%edx + movl %eax,%ecx + movdqa %xmm4,%xmm6 + addl %edi,%edx + movl 20(%esp),%edi + psrld $3,%xmm4 + movl %eax,%esi + rorl $9,%ecx + paddd %xmm7,%xmm1 + movl %eax,16(%esp) + xorl %eax,%ecx + psrld $7,%xmm6 + xorl %edi,%eax + addl 12(%esp),%edx + rorl $11,%ecx + andl %eax,%ebx + pshufd $250,%xmm0,%xmm7 + xorl %esi,%ecx + addl 48(%esp),%edx + pslld $14,%xmm5 + xorl %edi,%ebx + rorl $2,%ecx + pxor %xmm6,%xmm4 + addl %edx,%ebx + addl 28(%esp),%edx + psrld $11,%xmm6 + addl %ecx,%ebx + movl %edx,%ecx + rorl $14,%edx + pxor %xmm5,%xmm4 + movl (%esp),%esi + xorl %ecx,%edx + pslld $11,%xmm5 + movl 4(%esp),%edi + xorl %edi,%esi + rorl $5,%edx + pxor %xmm6,%xmm4 + andl %ecx,%esi + movl %ecx,28(%esp) + movdqa %xmm7,%xmm6 + xorl %ecx,%edx + xorl %esi,%edi + rorl $6,%edx + pxor %xmm5,%xmm4 + movl %ebx,%ecx + addl %edi,%edx + psrld $10,%xmm7 + movl 16(%esp),%edi + movl %ebx,%esi + rorl $9,%ecx + paddd %xmm4,%xmm1 + movl %ebx,12(%esp) + xorl %ebx,%ecx + psrlq $17,%xmm6 + xorl %edi,%ebx + addl 8(%esp),%edx + rorl $11,%ecx + pxor %xmm6,%xmm7 + andl %ebx,%eax + xorl %esi,%ecx + psrlq $2,%xmm6 + addl 52(%esp),%edx + xorl %edi,%eax + rorl $2,%ecx + pxor %xmm6,%xmm7 + addl %edx,%eax + addl 24(%esp),%edx + pshufd $128,%xmm7,%xmm7 + addl %ecx,%eax + movl %edx,%ecx + rorl $14,%edx + movl 28(%esp),%esi + xorl %ecx,%edx + movl (%esp),%edi + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + psrldq $8,%xmm7 + movl %ecx,24(%esp) + xorl %ecx,%edx + xorl %esi,%edi + paddd %xmm7,%xmm1 + rorl $6,%edx + movl %eax,%ecx + addl %edi,%edx + movl 12(%esp),%edi + movl %eax,%esi + rorl $9,%ecx + movl %eax,8(%esp) + pshufd $80,%xmm1,%xmm7 + xorl %eax,%ecx + xorl %edi,%eax + addl 4(%esp),%edx + movdqa %xmm7,%xmm6 + rorl $11,%ecx + psrld $10,%xmm7 + andl %eax,%ebx + psrlq $17,%xmm6 + xorl %esi,%ecx + addl 56(%esp),%edx + xorl %edi,%ebx + rorl $2,%ecx + pxor %xmm6,%xmm7 + addl %edx,%ebx + addl 20(%esp),%edx + psrlq $2,%xmm6 + addl %ecx,%ebx + movl %edx,%ecx + rorl $14,%edx + pxor %xmm6,%xmm7 + movl 24(%esp),%esi + xorl %ecx,%edx + movl 28(%esp),%edi + pshufd $8,%xmm7,%xmm7 + xorl %edi,%esi + rorl $5,%edx + movdqa 16(%ebp),%xmm6 + andl %ecx,%esi + movl %ecx,20(%esp) + pslldq $8,%xmm7 + xorl %ecx,%edx + xorl %esi,%edi + rorl $6,%edx + movl %ebx,%ecx + addl %edi,%edx + movl 8(%esp),%edi + movl %ebx,%esi + rorl $9,%ecx + paddd %xmm7,%xmm1 + movl %ebx,4(%esp) + xorl %ebx,%ecx + xorl %edi,%ebx + addl (%esp),%edx + paddd %xmm1,%xmm6 + rorl $11,%ecx + andl %ebx,%eax + xorl %esi,%ecx + addl 60(%esp),%edx + xorl %edi,%eax + rorl $2,%ecx + addl %edx,%eax + addl 16(%esp),%edx + addl %ecx,%eax + movdqa %xmm6,48(%esp) + movl %edx,%ecx + movdqa %xmm3,%xmm4 + rorl $14,%edx + movl 20(%esp),%esi + movdqa %xmm1,%xmm7 + xorl %ecx,%edx + movl 24(%esp),%edi +.byte 102,15,58,15,226,4 + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi +.byte 102,15,58,15,248,4 + movl %ecx,16(%esp) + xorl %ecx,%edx + xorl %esi,%edi + movdqa %xmm4,%xmm5 + rorl $6,%edx + movl %eax,%ecx + movdqa %xmm4,%xmm6 + addl %edi,%edx + movl 4(%esp),%edi + psrld $3,%xmm4 + movl %eax,%esi + rorl $9,%ecx + paddd %xmm7,%xmm2 + movl %eax,(%esp) + xorl %eax,%ecx + psrld $7,%xmm6 + xorl %edi,%eax + addl 28(%esp),%edx + rorl $11,%ecx + andl %eax,%ebx + pshufd $250,%xmm1,%xmm7 + xorl %esi,%ecx + addl 64(%esp),%edx + pslld $14,%xmm5 + xorl %edi,%ebx + rorl $2,%ecx + pxor %xmm6,%xmm4 + addl %edx,%ebx + addl 12(%esp),%edx + psrld $11,%xmm6 + addl %ecx,%ebx + movl %edx,%ecx + rorl $14,%edx + pxor %xmm5,%xmm4 + movl 16(%esp),%esi + xorl %ecx,%edx + pslld $11,%xmm5 + movl 20(%esp),%edi + xorl %edi,%esi + rorl $5,%edx + pxor %xmm6,%xmm4 + andl %ecx,%esi + movl %ecx,12(%esp) + movdqa %xmm7,%xmm6 + xorl %ecx,%edx + xorl %esi,%edi + rorl $6,%edx + pxor %xmm5,%xmm4 + movl %ebx,%ecx + addl %edi,%edx + psrld $10,%xmm7 + movl (%esp),%edi + movl %ebx,%esi + rorl $9,%ecx + paddd %xmm4,%xmm2 + movl %ebx,28(%esp) + xorl %ebx,%ecx + psrlq $17,%xmm6 + xorl %edi,%ebx + addl 24(%esp),%edx + rorl $11,%ecx + pxor %xmm6,%xmm7 + andl %ebx,%eax + xorl %esi,%ecx + psrlq $2,%xmm6 + addl 68(%esp),%edx + xorl %edi,%eax + rorl $2,%ecx + pxor %xmm6,%xmm7 + addl %edx,%eax + addl 8(%esp),%edx + pshufd $128,%xmm7,%xmm7 + addl %ecx,%eax + movl %edx,%ecx + rorl $14,%edx + movl 12(%esp),%esi + xorl %ecx,%edx + movl 16(%esp),%edi + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + psrldq $8,%xmm7 + movl %ecx,8(%esp) + xorl %ecx,%edx + xorl %esi,%edi + paddd %xmm7,%xmm2 + rorl $6,%edx + movl %eax,%ecx + addl %edi,%edx + movl 28(%esp),%edi + movl %eax,%esi + rorl $9,%ecx + movl %eax,24(%esp) + pshufd $80,%xmm2,%xmm7 + xorl %eax,%ecx + xorl %edi,%eax + addl 20(%esp),%edx + movdqa %xmm7,%xmm6 + rorl $11,%ecx + psrld $10,%xmm7 + andl %eax,%ebx + psrlq $17,%xmm6 + xorl %esi,%ecx + addl 72(%esp),%edx + xorl %edi,%ebx + rorl $2,%ecx + pxor %xmm6,%xmm7 + addl %edx,%ebx + addl 4(%esp),%edx + psrlq $2,%xmm6 + addl %ecx,%ebx + movl %edx,%ecx + rorl $14,%edx + pxor %xmm6,%xmm7 + movl 8(%esp),%esi + xorl %ecx,%edx + movl 12(%esp),%edi + pshufd $8,%xmm7,%xmm7 + xorl %edi,%esi + rorl $5,%edx + movdqa 32(%ebp),%xmm6 + andl %ecx,%esi + movl %ecx,4(%esp) + pslldq $8,%xmm7 + xorl %ecx,%edx + xorl %esi,%edi + rorl $6,%edx + movl %ebx,%ecx + addl %edi,%edx + movl 24(%esp),%edi + movl %ebx,%esi + rorl $9,%ecx + paddd %xmm7,%xmm2 + movl %ebx,20(%esp) + xorl %ebx,%ecx + xorl %edi,%ebx + addl 16(%esp),%edx + paddd %xmm2,%xmm6 + rorl $11,%ecx + andl %ebx,%eax + xorl %esi,%ecx + addl 76(%esp),%edx + xorl %edi,%eax + rorl $2,%ecx + addl %edx,%eax + addl (%esp),%edx + addl %ecx,%eax + movdqa %xmm6,64(%esp) + movl %edx,%ecx + movdqa %xmm0,%xmm4 + rorl $14,%edx + movl 4(%esp),%esi + movdqa %xmm2,%xmm7 + xorl %ecx,%edx + movl 8(%esp),%edi +.byte 102,15,58,15,227,4 + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi +.byte 102,15,58,15,249,4 + movl %ecx,(%esp) + xorl %ecx,%edx + xorl %esi,%edi + movdqa %xmm4,%xmm5 + rorl $6,%edx + movl %eax,%ecx + movdqa %xmm4,%xmm6 + addl %edi,%edx + movl 20(%esp),%edi + psrld $3,%xmm4 + movl %eax,%esi + rorl $9,%ecx + paddd %xmm7,%xmm3 + movl %eax,16(%esp) + xorl %eax,%ecx + psrld $7,%xmm6 + xorl %edi,%eax + addl 12(%esp),%edx + rorl $11,%ecx + andl %eax,%ebx + pshufd $250,%xmm2,%xmm7 + xorl %esi,%ecx + addl 80(%esp),%edx + pslld $14,%xmm5 + xorl %edi,%ebx + rorl $2,%ecx + pxor %xmm6,%xmm4 + addl %edx,%ebx + addl 28(%esp),%edx + psrld $11,%xmm6 + addl %ecx,%ebx + movl %edx,%ecx + rorl $14,%edx + pxor %xmm5,%xmm4 + movl (%esp),%esi + xorl %ecx,%edx + pslld $11,%xmm5 + movl 4(%esp),%edi + xorl %edi,%esi + rorl $5,%edx + pxor %xmm6,%xmm4 + andl %ecx,%esi + movl %ecx,28(%esp) + movdqa %xmm7,%xmm6 + xorl %ecx,%edx + xorl %esi,%edi + rorl $6,%edx + pxor %xmm5,%xmm4 + movl %ebx,%ecx + addl %edi,%edx + psrld $10,%xmm7 + movl 16(%esp),%edi + movl %ebx,%esi + rorl $9,%ecx + paddd %xmm4,%xmm3 + movl %ebx,12(%esp) + xorl %ebx,%ecx + psrlq $17,%xmm6 + xorl %edi,%ebx + addl 8(%esp),%edx + rorl $11,%ecx + pxor %xmm6,%xmm7 + andl %ebx,%eax + xorl %esi,%ecx + psrlq $2,%xmm6 + addl 84(%esp),%edx + xorl %edi,%eax + rorl $2,%ecx + pxor %xmm6,%xmm7 + addl %edx,%eax + addl 24(%esp),%edx + pshufd $128,%xmm7,%xmm7 + addl %ecx,%eax + movl %edx,%ecx + rorl $14,%edx + movl 28(%esp),%esi + xorl %ecx,%edx + movl (%esp),%edi + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + psrldq $8,%xmm7 + movl %ecx,24(%esp) + xorl %ecx,%edx + xorl %esi,%edi + paddd %xmm7,%xmm3 + rorl $6,%edx + movl %eax,%ecx + addl %edi,%edx + movl 12(%esp),%edi + movl %eax,%esi + rorl $9,%ecx + movl %eax,8(%esp) + pshufd $80,%xmm3,%xmm7 + xorl %eax,%ecx + xorl %edi,%eax + addl 4(%esp),%edx + movdqa %xmm7,%xmm6 + rorl $11,%ecx + psrld $10,%xmm7 + andl %eax,%ebx + psrlq $17,%xmm6 + xorl %esi,%ecx + addl 88(%esp),%edx + xorl %edi,%ebx + rorl $2,%ecx + pxor %xmm6,%xmm7 + addl %edx,%ebx + addl 20(%esp),%edx + psrlq $2,%xmm6 + addl %ecx,%ebx + movl %edx,%ecx + rorl $14,%edx + pxor %xmm6,%xmm7 + movl 24(%esp),%esi + xorl %ecx,%edx + movl 28(%esp),%edi + pshufd $8,%xmm7,%xmm7 + xorl %edi,%esi + rorl $5,%edx + movdqa 48(%ebp),%xmm6 + andl %ecx,%esi + movl %ecx,20(%esp) + pslldq $8,%xmm7 + xorl %ecx,%edx + xorl %esi,%edi + rorl $6,%edx + movl %ebx,%ecx + addl %edi,%edx + movl 8(%esp),%edi + movl %ebx,%esi + rorl $9,%ecx + paddd %xmm7,%xmm3 + movl %ebx,4(%esp) + xorl %ebx,%ecx + xorl %edi,%ebx + addl (%esp),%edx + paddd %xmm3,%xmm6 + rorl $11,%ecx + andl %ebx,%eax + xorl %esi,%ecx + addl 92(%esp),%edx + xorl %edi,%eax + rorl $2,%ecx + addl %edx,%eax + addl 16(%esp),%edx + addl %ecx,%eax + movdqa %xmm6,80(%esp) + cmpl $66051,64(%ebp) + jne L012ssse3_00_47 + movl %edx,%ecx + rorl $14,%edx + movl 20(%esp),%esi + xorl %ecx,%edx + movl 24(%esp),%edi + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,16(%esp) + xorl %ecx,%edx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%edx + movl 4(%esp),%edi + movl %eax,%esi + rorl $9,%ecx + movl %eax,(%esp) + xorl %eax,%ecx + xorl %edi,%eax + addl 28(%esp),%edx + rorl $11,%ecx + andl %eax,%ebx + xorl %esi,%ecx + addl 32(%esp),%edx + xorl %edi,%ebx + rorl $2,%ecx + addl %edx,%ebx + addl 12(%esp),%edx + addl %ecx,%ebx + movl %edx,%ecx + rorl $14,%edx + movl 16(%esp),%esi + xorl %ecx,%edx + movl 20(%esp),%edi + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,12(%esp) + xorl %ecx,%edx + xorl %esi,%edi + rorl $6,%edx + movl %ebx,%ecx + addl %edi,%edx + movl (%esp),%edi + movl %ebx,%esi + rorl $9,%ecx + movl %ebx,28(%esp) + xorl %ebx,%ecx + xorl %edi,%ebx + addl 24(%esp),%edx + rorl $11,%ecx + andl %ebx,%eax + xorl %esi,%ecx + addl 36(%esp),%edx + xorl %edi,%eax + rorl $2,%ecx + addl %edx,%eax + addl 8(%esp),%edx + addl %ecx,%eax + movl %edx,%ecx + rorl $14,%edx + movl 12(%esp),%esi + xorl %ecx,%edx + movl 16(%esp),%edi + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,8(%esp) + xorl %ecx,%edx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%edx + movl 28(%esp),%edi + movl %eax,%esi + rorl $9,%ecx + movl %eax,24(%esp) + xorl %eax,%ecx + xorl %edi,%eax + addl 20(%esp),%edx + rorl $11,%ecx + andl %eax,%ebx + xorl %esi,%ecx + addl 40(%esp),%edx + xorl %edi,%ebx + rorl $2,%ecx + addl %edx,%ebx + addl 4(%esp),%edx + addl %ecx,%ebx + movl %edx,%ecx + rorl $14,%edx + movl 8(%esp),%esi + xorl %ecx,%edx + movl 12(%esp),%edi + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,4(%esp) + xorl %ecx,%edx + xorl %esi,%edi + rorl $6,%edx + movl %ebx,%ecx + addl %edi,%edx + movl 24(%esp),%edi + movl %ebx,%esi + rorl $9,%ecx + movl %ebx,20(%esp) + xorl %ebx,%ecx + xorl %edi,%ebx + addl 16(%esp),%edx + rorl $11,%ecx + andl %ebx,%eax + xorl %esi,%ecx + addl 44(%esp),%edx + xorl %edi,%eax + rorl $2,%ecx + addl %edx,%eax + addl (%esp),%edx + addl %ecx,%eax + movl %edx,%ecx + rorl $14,%edx + movl 4(%esp),%esi + xorl %ecx,%edx + movl 8(%esp),%edi + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,(%esp) + xorl %ecx,%edx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%edx + movl 20(%esp),%edi + movl %eax,%esi + rorl $9,%ecx + movl %eax,16(%esp) + xorl %eax,%ecx + xorl %edi,%eax + addl 12(%esp),%edx + rorl $11,%ecx + andl %eax,%ebx + xorl %esi,%ecx + addl 48(%esp),%edx + xorl %edi,%ebx + rorl $2,%ecx + addl %edx,%ebx + addl 28(%esp),%edx + addl %ecx,%ebx + movl %edx,%ecx + rorl $14,%edx + movl (%esp),%esi + xorl %ecx,%edx + movl 4(%esp),%edi + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,28(%esp) + xorl %ecx,%edx + xorl %esi,%edi + rorl $6,%edx + movl %ebx,%ecx + addl %edi,%edx + movl 16(%esp),%edi + movl %ebx,%esi + rorl $9,%ecx + movl %ebx,12(%esp) + xorl %ebx,%ecx + xorl %edi,%ebx + addl 8(%esp),%edx + rorl $11,%ecx + andl %ebx,%eax + xorl %esi,%ecx + addl 52(%esp),%edx + xorl %edi,%eax + rorl $2,%ecx + addl %edx,%eax + addl 24(%esp),%edx + addl %ecx,%eax + movl %edx,%ecx + rorl $14,%edx + movl 28(%esp),%esi + xorl %ecx,%edx + movl (%esp),%edi + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,24(%esp) + xorl %ecx,%edx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%edx + movl 12(%esp),%edi + movl %eax,%esi + rorl $9,%ecx + movl %eax,8(%esp) + xorl %eax,%ecx + xorl %edi,%eax + addl 4(%esp),%edx + rorl $11,%ecx + andl %eax,%ebx + xorl %esi,%ecx + addl 56(%esp),%edx + xorl %edi,%ebx + rorl $2,%ecx + addl %edx,%ebx + addl 20(%esp),%edx + addl %ecx,%ebx + movl %edx,%ecx + rorl $14,%edx + movl 24(%esp),%esi + xorl %ecx,%edx + movl 28(%esp),%edi + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,20(%esp) + xorl %ecx,%edx + xorl %esi,%edi + rorl $6,%edx + movl %ebx,%ecx + addl %edi,%edx + movl 8(%esp),%edi + movl %ebx,%esi + rorl $9,%ecx + movl %ebx,4(%esp) + xorl %ebx,%ecx + xorl %edi,%ebx + addl (%esp),%edx + rorl $11,%ecx + andl %ebx,%eax + xorl %esi,%ecx + addl 60(%esp),%edx + xorl %edi,%eax + rorl $2,%ecx + addl %edx,%eax + addl 16(%esp),%edx + addl %ecx,%eax + movl %edx,%ecx + rorl $14,%edx + movl 20(%esp),%esi + xorl %ecx,%edx + movl 24(%esp),%edi + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,16(%esp) + xorl %ecx,%edx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%edx + movl 4(%esp),%edi + movl %eax,%esi + rorl $9,%ecx + movl %eax,(%esp) + xorl %eax,%ecx + xorl %edi,%eax + addl 28(%esp),%edx + rorl $11,%ecx + andl %eax,%ebx + xorl %esi,%ecx + addl 64(%esp),%edx + xorl %edi,%ebx + rorl $2,%ecx + addl %edx,%ebx + addl 12(%esp),%edx + addl %ecx,%ebx + movl %edx,%ecx + rorl $14,%edx + movl 16(%esp),%esi + xorl %ecx,%edx + movl 20(%esp),%edi + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,12(%esp) + xorl %ecx,%edx + xorl %esi,%edi + rorl $6,%edx + movl %ebx,%ecx + addl %edi,%edx + movl (%esp),%edi + movl %ebx,%esi + rorl $9,%ecx + movl %ebx,28(%esp) + xorl %ebx,%ecx + xorl %edi,%ebx + addl 24(%esp),%edx + rorl $11,%ecx + andl %ebx,%eax + xorl %esi,%ecx + addl 68(%esp),%edx + xorl %edi,%eax + rorl $2,%ecx + addl %edx,%eax + addl 8(%esp),%edx + addl %ecx,%eax + movl %edx,%ecx + rorl $14,%edx + movl 12(%esp),%esi + xorl %ecx,%edx + movl 16(%esp),%edi + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,8(%esp) + xorl %ecx,%edx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%edx + movl 28(%esp),%edi + movl %eax,%esi + rorl $9,%ecx + movl %eax,24(%esp) + xorl %eax,%ecx + xorl %edi,%eax + addl 20(%esp),%edx + rorl $11,%ecx + andl %eax,%ebx + xorl %esi,%ecx + addl 72(%esp),%edx + xorl %edi,%ebx + rorl $2,%ecx + addl %edx,%ebx + addl 4(%esp),%edx + addl %ecx,%ebx + movl %edx,%ecx + rorl $14,%edx + movl 8(%esp),%esi + xorl %ecx,%edx + movl 12(%esp),%edi + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,4(%esp) + xorl %ecx,%edx + xorl %esi,%edi + rorl $6,%edx + movl %ebx,%ecx + addl %edi,%edx + movl 24(%esp),%edi + movl %ebx,%esi + rorl $9,%ecx + movl %ebx,20(%esp) + xorl %ebx,%ecx + xorl %edi,%ebx + addl 16(%esp),%edx + rorl $11,%ecx + andl %ebx,%eax + xorl %esi,%ecx + addl 76(%esp),%edx + xorl %edi,%eax + rorl $2,%ecx + addl %edx,%eax + addl (%esp),%edx + addl %ecx,%eax + movl %edx,%ecx + rorl $14,%edx + movl 4(%esp),%esi + xorl %ecx,%edx + movl 8(%esp),%edi + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,(%esp) + xorl %ecx,%edx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%edx + movl 20(%esp),%edi + movl %eax,%esi + rorl $9,%ecx + movl %eax,16(%esp) + xorl %eax,%ecx + xorl %edi,%eax + addl 12(%esp),%edx + rorl $11,%ecx + andl %eax,%ebx + xorl %esi,%ecx + addl 80(%esp),%edx + xorl %edi,%ebx + rorl $2,%ecx + addl %edx,%ebx + addl 28(%esp),%edx + addl %ecx,%ebx + movl %edx,%ecx + rorl $14,%edx + movl (%esp),%esi + xorl %ecx,%edx + movl 4(%esp),%edi + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,28(%esp) + xorl %ecx,%edx + xorl %esi,%edi + rorl $6,%edx + movl %ebx,%ecx + addl %edi,%edx + movl 16(%esp),%edi + movl %ebx,%esi + rorl $9,%ecx + movl %ebx,12(%esp) + xorl %ebx,%ecx + xorl %edi,%ebx + addl 8(%esp),%edx + rorl $11,%ecx + andl %ebx,%eax + xorl %esi,%ecx + addl 84(%esp),%edx + xorl %edi,%eax + rorl $2,%ecx + addl %edx,%eax + addl 24(%esp),%edx + addl %ecx,%eax + movl %edx,%ecx + rorl $14,%edx + movl 28(%esp),%esi + xorl %ecx,%edx + movl (%esp),%edi + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,24(%esp) + xorl %ecx,%edx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%edx + movl 12(%esp),%edi + movl %eax,%esi + rorl $9,%ecx + movl %eax,8(%esp) + xorl %eax,%ecx + xorl %edi,%eax + addl 4(%esp),%edx + rorl $11,%ecx + andl %eax,%ebx + xorl %esi,%ecx + addl 88(%esp),%edx + xorl %edi,%ebx + rorl $2,%ecx + addl %edx,%ebx + addl 20(%esp),%edx + addl %ecx,%ebx + movl %edx,%ecx + rorl $14,%edx + movl 24(%esp),%esi + xorl %ecx,%edx + movl 28(%esp),%edi + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,20(%esp) + xorl %ecx,%edx + xorl %esi,%edi + rorl $6,%edx + movl %ebx,%ecx + addl %edi,%edx + movl 8(%esp),%edi + movl %ebx,%esi + rorl $9,%ecx + movl %ebx,4(%esp) + xorl %ebx,%ecx + xorl %edi,%ebx + addl (%esp),%edx + rorl $11,%ecx + andl %ebx,%eax + xorl %esi,%ecx + addl 92(%esp),%edx + xorl %edi,%eax + rorl $2,%ecx + addl %edx,%eax + addl 16(%esp),%edx + addl %ecx,%eax + movl 96(%esp),%esi + xorl %edi,%ebx + movl 12(%esp),%ecx + addl (%esi),%eax + addl 4(%esi),%ebx + addl 8(%esi),%edi + addl 12(%esi),%ecx + movl %eax,(%esi) + movl %ebx,4(%esi) + movl %edi,8(%esi) + movl %ecx,12(%esi) + movl %ebx,4(%esp) + xorl %edi,%ebx + movl %edi,8(%esp) + movl %ecx,12(%esp) + movl 20(%esp),%edi + movl 24(%esp),%ecx + addl 16(%esi),%edx + addl 20(%esi),%edi + addl 24(%esi),%ecx + movl %edx,16(%esi) + movl %edi,20(%esi) + movl %edi,20(%esp) + movl 28(%esp),%edi + movl %ecx,24(%esi) + addl 28(%esi),%edi + movl %ecx,24(%esp) + movl %edi,28(%esi) + movl %edi,28(%esp) + movl 100(%esp),%edi + movdqa 64(%ebp),%xmm7 + subl $192,%ebp + cmpl 104(%esp),%edi + jb L011grand_ssse3 + movl 108(%esp),%esp + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.section __IMPORT,__pointers,non_lazy_symbol_pointers +L_OPENSSL_ia32cap_P$non_lazy_ptr: +.indirect_symbol _OPENSSL_ia32cap_P +.long 0 +.comm _OPENSSL_ia32cap_P,16,2 diff --git a/deps/openssl/asm_obsolete/x86-macosx-gas/sha/sha512-586.s b/deps/openssl/asm_obsolete/x86-macosx-gas/sha/sha512-586.s new file mode 100644 index 00000000000000..d539b1d168116f --- /dev/null +++ b/deps/openssl/asm_obsolete/x86-macosx-gas/sha/sha512-586.s @@ -0,0 +1,2831 @@ +.file "sha512-586.s" +.text +.globl _sha512_block_data_order +.align 4 +_sha512_block_data_order: +L_sha512_block_data_order_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 20(%esp),%esi + movl 24(%esp),%edi + movl 28(%esp),%eax + movl %esp,%ebx + call L000pic_point +L000pic_point: + popl %ebp + leal L001K512-L000pic_point(%ebp),%ebp + subl $16,%esp + andl $-64,%esp + shll $7,%eax + addl %edi,%eax + movl %esi,(%esp) + movl %edi,4(%esp) + movl %eax,8(%esp) + movl %ebx,12(%esp) + movl L_OPENSSL_ia32cap_P$non_lazy_ptr-L001K512(%ebp),%edx + movl (%edx),%ecx + testl $67108864,%ecx + jz L002loop_x86 + movl 4(%edx),%edx + movq (%esi),%mm0 + andl $16777216,%ecx + movq 8(%esi),%mm1 + andl $512,%edx + movq 16(%esi),%mm2 + orl %edx,%ecx + movq 24(%esi),%mm3 + movq 32(%esi),%mm4 + movq 40(%esi),%mm5 + movq 48(%esi),%mm6 + movq 56(%esi),%mm7 + cmpl $16777728,%ecx + je L003SSSE3 + subl $80,%esp + jmp L004loop_sse2 +.align 4,0x90 +L004loop_sse2: + movq %mm1,8(%esp) + movq %mm2,16(%esp) + movq %mm3,24(%esp) + movq %mm5,40(%esp) + movq %mm6,48(%esp) + pxor %mm1,%mm2 + movq %mm7,56(%esp) + movq %mm0,%mm3 + movl (%edi),%eax + movl 4(%edi),%ebx + addl $8,%edi + movl $15,%edx + bswap %eax + bswap %ebx + jmp L00500_14_sse2 +.align 4,0x90 +L00500_14_sse2: + movd %eax,%mm1 + movl (%edi),%eax + movd %ebx,%mm7 + movl 4(%edi),%ebx + addl $8,%edi + bswap %eax + bswap %ebx + punpckldq %mm1,%mm7 + movq %mm4,%mm1 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,32(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + movq %mm3,%mm0 + movq %mm7,72(%esp) + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm0,(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 56(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + paddq (%ebp),%mm7 + pxor %mm4,%mm3 + movq 24(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm0,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm0,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 8(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + subl $8,%esp + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm0,%mm2 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + pxor %mm7,%mm6 + movq 40(%esp),%mm5 + paddq %mm2,%mm3 + movq %mm0,%mm2 + addl $8,%ebp + paddq %mm6,%mm3 + movq 48(%esp),%mm6 + decl %edx + jnz L00500_14_sse2 + movd %eax,%mm1 + movd %ebx,%mm7 + punpckldq %mm1,%mm7 + movq %mm4,%mm1 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,32(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + movq %mm3,%mm0 + movq %mm7,72(%esp) + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm0,(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 56(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + paddq (%ebp),%mm7 + pxor %mm4,%mm3 + movq 24(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm0,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm0,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 8(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + subl $8,%esp + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm0,%mm2 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + pxor %mm7,%mm6 + movq 192(%esp),%mm7 + paddq %mm2,%mm3 + movq %mm0,%mm2 + addl $8,%ebp + paddq %mm6,%mm3 + pxor %mm0,%mm0 + movl $32,%edx + jmp L00616_79_sse2 +.align 4,0x90 +L00616_79_sse2: + movq 88(%esp),%mm5 + movq %mm7,%mm1 + psrlq $1,%mm7 + movq %mm5,%mm6 + psrlq $6,%mm5 + psllq $56,%mm1 + paddq %mm3,%mm0 + movq %mm7,%mm3 + psrlq $6,%mm7 + pxor %mm1,%mm3 + psllq $7,%mm1 + pxor %mm7,%mm3 + psrlq $1,%mm7 + pxor %mm1,%mm3 + movq %mm5,%mm1 + psrlq $13,%mm5 + pxor %mm3,%mm7 + psllq $3,%mm6 + pxor %mm5,%mm1 + paddq 200(%esp),%mm7 + pxor %mm6,%mm1 + psrlq $42,%mm5 + paddq 128(%esp),%mm7 + pxor %mm5,%mm1 + psllq $42,%mm6 + movq 40(%esp),%mm5 + pxor %mm6,%mm1 + movq 48(%esp),%mm6 + paddq %mm1,%mm7 + movq %mm4,%mm1 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,32(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + movq %mm7,72(%esp) + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm0,(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 56(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + paddq (%ebp),%mm7 + pxor %mm4,%mm3 + movq 24(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm0,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm0,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 8(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + subl $8,%esp + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm0,%mm2 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + pxor %mm7,%mm6 + movq 192(%esp),%mm7 + paddq %mm6,%mm2 + addl $8,%ebp + movq 88(%esp),%mm5 + movq %mm7,%mm1 + psrlq $1,%mm7 + movq %mm5,%mm6 + psrlq $6,%mm5 + psllq $56,%mm1 + paddq %mm3,%mm2 + movq %mm7,%mm3 + psrlq $6,%mm7 + pxor %mm1,%mm3 + psllq $7,%mm1 + pxor %mm7,%mm3 + psrlq $1,%mm7 + pxor %mm1,%mm3 + movq %mm5,%mm1 + psrlq $13,%mm5 + pxor %mm3,%mm7 + psllq $3,%mm6 + pxor %mm5,%mm1 + paddq 200(%esp),%mm7 + pxor %mm6,%mm1 + psrlq $42,%mm5 + paddq 128(%esp),%mm7 + pxor %mm5,%mm1 + psllq $42,%mm6 + movq 40(%esp),%mm5 + pxor %mm6,%mm1 + movq 48(%esp),%mm6 + paddq %mm1,%mm7 + movq %mm4,%mm1 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,32(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + movq %mm7,72(%esp) + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm2,(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 56(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + paddq (%ebp),%mm7 + pxor %mm4,%mm3 + movq 24(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm2,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm2,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 8(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + subl $8,%esp + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm2,%mm0 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + pxor %mm7,%mm6 + movq 192(%esp),%mm7 + paddq %mm6,%mm0 + addl $8,%ebp + decl %edx + jnz L00616_79_sse2 + paddq %mm3,%mm0 + movq 8(%esp),%mm1 + movq 24(%esp),%mm3 + movq 40(%esp),%mm5 + movq 48(%esp),%mm6 + movq 56(%esp),%mm7 + pxor %mm1,%mm2 + paddq (%esi),%mm0 + paddq 8(%esi),%mm1 + paddq 16(%esi),%mm2 + paddq 24(%esi),%mm3 + paddq 32(%esi),%mm4 + paddq 40(%esi),%mm5 + paddq 48(%esi),%mm6 + paddq 56(%esi),%mm7 + movl $640,%eax + movq %mm0,(%esi) + movq %mm1,8(%esi) + movq %mm2,16(%esi) + movq %mm3,24(%esi) + movq %mm4,32(%esi) + movq %mm5,40(%esi) + movq %mm6,48(%esi) + movq %mm7,56(%esi) + leal (%esp,%eax,1),%esp + subl %eax,%ebp + cmpl 88(%esp),%edi + jb L004loop_sse2 + movl 92(%esp),%esp + emms + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.align 5,0x90 +L003SSSE3: + leal -64(%esp),%edx + subl $256,%esp + movdqa 640(%ebp),%xmm1 + movdqu (%edi),%xmm0 +.byte 102,15,56,0,193 + movdqa (%ebp),%xmm3 + movdqa %xmm1,%xmm2 + movdqu 16(%edi),%xmm1 + paddq %xmm0,%xmm3 +.byte 102,15,56,0,202 + movdqa %xmm3,-128(%edx) + movdqa 16(%ebp),%xmm4 + movdqa %xmm2,%xmm3 + movdqu 32(%edi),%xmm2 + paddq %xmm1,%xmm4 +.byte 102,15,56,0,211 + movdqa %xmm4,-112(%edx) + movdqa 32(%ebp),%xmm5 + movdqa %xmm3,%xmm4 + movdqu 48(%edi),%xmm3 + paddq %xmm2,%xmm5 +.byte 102,15,56,0,220 + movdqa %xmm5,-96(%edx) + movdqa 48(%ebp),%xmm6 + movdqa %xmm4,%xmm5 + movdqu 64(%edi),%xmm4 + paddq %xmm3,%xmm6 +.byte 102,15,56,0,229 + movdqa %xmm6,-80(%edx) + movdqa 64(%ebp),%xmm7 + movdqa %xmm5,%xmm6 + movdqu 80(%edi),%xmm5 + paddq %xmm4,%xmm7 +.byte 102,15,56,0,238 + movdqa %xmm7,-64(%edx) + movdqa %xmm0,(%edx) + movdqa 80(%ebp),%xmm0 + movdqa %xmm6,%xmm7 + movdqu 96(%edi),%xmm6 + paddq %xmm5,%xmm0 +.byte 102,15,56,0,247 + movdqa %xmm0,-48(%edx) + movdqa %xmm1,16(%edx) + movdqa 96(%ebp),%xmm1 + movdqa %xmm7,%xmm0 + movdqu 112(%edi),%xmm7 + paddq %xmm6,%xmm1 +.byte 102,15,56,0,248 + movdqa %xmm1,-32(%edx) + movdqa %xmm2,32(%edx) + movdqa 112(%ebp),%xmm2 + movdqa (%edx),%xmm0 + paddq %xmm7,%xmm2 + movdqa %xmm2,-16(%edx) + nop +.align 5,0x90 +L007loop_ssse3: + movdqa 16(%edx),%xmm2 + movdqa %xmm3,48(%edx) + leal 128(%ebp),%ebp + movq %mm1,8(%esp) + movl %edi,%ebx + movq %mm2,16(%esp) + leal 128(%edi),%edi + movq %mm3,24(%esp) + cmpl %eax,%edi + movq %mm5,40(%esp) + cmovbl %edi,%ebx + movq %mm6,48(%esp) + movl $4,%ecx + pxor %mm1,%mm2 + movq %mm7,56(%esp) + pxor %mm3,%mm3 + jmp L00800_47_ssse3 +.align 5,0x90 +L00800_47_ssse3: + movdqa %xmm5,%xmm3 + movdqa %xmm2,%xmm1 +.byte 102,15,58,15,208,8 + movdqa %xmm4,(%edx) +.byte 102,15,58,15,220,8 + movdqa %xmm2,%xmm4 + psrlq $7,%xmm2 + paddq %xmm3,%xmm0 + movdqa %xmm4,%xmm3 + psrlq $1,%xmm4 + psllq $56,%xmm3 + pxor %xmm4,%xmm2 + psrlq $7,%xmm4 + pxor %xmm3,%xmm2 + psllq $7,%xmm3 + pxor %xmm4,%xmm2 + movdqa %xmm7,%xmm4 + pxor %xmm3,%xmm2 + movdqa %xmm7,%xmm3 + psrlq $6,%xmm4 + paddq %xmm2,%xmm0 + movdqa %xmm7,%xmm2 + psrlq $19,%xmm3 + psllq $3,%xmm2 + pxor %xmm3,%xmm4 + psrlq $42,%xmm3 + pxor %xmm2,%xmm4 + psllq $42,%xmm2 + pxor %xmm3,%xmm4 + movdqa 32(%edx),%xmm3 + pxor %xmm2,%xmm4 + movdqa (%ebp),%xmm2 + movq %mm4,%mm1 + paddq %xmm4,%xmm0 + movq -128(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,32(%esp) + paddq %xmm0,%xmm2 + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm0 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm0,(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 56(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 24(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm0,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm0,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 8(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm0,%mm2 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + pxor %mm7,%mm6 + movq 32(%esp),%mm5 + paddq %mm6,%mm2 + movq 40(%esp),%mm6 + movq %mm4,%mm1 + movq -120(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,24(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm2 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm2,56(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 48(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 16(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm2,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm2,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq (%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm2,%mm0 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + pxor %mm7,%mm6 + movq 24(%esp),%mm5 + paddq %mm6,%mm0 + movq 32(%esp),%mm6 + movdqa %xmm2,-128(%edx) + movdqa %xmm6,%xmm4 + movdqa %xmm3,%xmm2 +.byte 102,15,58,15,217,8 + movdqa %xmm5,16(%edx) +.byte 102,15,58,15,229,8 + movdqa %xmm3,%xmm5 + psrlq $7,%xmm3 + paddq %xmm4,%xmm1 + movdqa %xmm5,%xmm4 + psrlq $1,%xmm5 + psllq $56,%xmm4 + pxor %xmm5,%xmm3 + psrlq $7,%xmm5 + pxor %xmm4,%xmm3 + psllq $7,%xmm4 + pxor %xmm5,%xmm3 + movdqa %xmm0,%xmm5 + pxor %xmm4,%xmm3 + movdqa %xmm0,%xmm4 + psrlq $6,%xmm5 + paddq %xmm3,%xmm1 + movdqa %xmm0,%xmm3 + psrlq $19,%xmm4 + psllq $3,%xmm3 + pxor %xmm4,%xmm5 + psrlq $42,%xmm4 + pxor %xmm3,%xmm5 + psllq $42,%xmm3 + pxor %xmm4,%xmm5 + movdqa 48(%edx),%xmm4 + pxor %xmm3,%xmm5 + movdqa 16(%ebp),%xmm3 + movq %mm4,%mm1 + paddq %xmm5,%xmm1 + movq -112(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,16(%esp) + paddq %xmm1,%xmm3 + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm0 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm0,48(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 40(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 8(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm0,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm0,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 56(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm0,%mm2 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + pxor %mm7,%mm6 + movq 16(%esp),%mm5 + paddq %mm6,%mm2 + movq 24(%esp),%mm6 + movq %mm4,%mm1 + movq -104(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,8(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm2 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm2,40(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 32(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq (%esp),%mm4 + paddq %mm7,%mm3 + movq %mm2,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm2,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 48(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm2,%mm0 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + pxor %mm7,%mm6 + movq 8(%esp),%mm5 + paddq %mm6,%mm0 + movq 16(%esp),%mm6 + movdqa %xmm3,-112(%edx) + movdqa %xmm7,%xmm5 + movdqa %xmm4,%xmm3 +.byte 102,15,58,15,226,8 + movdqa %xmm6,32(%edx) +.byte 102,15,58,15,238,8 + movdqa %xmm4,%xmm6 + psrlq $7,%xmm4 + paddq %xmm5,%xmm2 + movdqa %xmm6,%xmm5 + psrlq $1,%xmm6 + psllq $56,%xmm5 + pxor %xmm6,%xmm4 + psrlq $7,%xmm6 + pxor %xmm5,%xmm4 + psllq $7,%xmm5 + pxor %xmm6,%xmm4 + movdqa %xmm1,%xmm6 + pxor %xmm5,%xmm4 + movdqa %xmm1,%xmm5 + psrlq $6,%xmm6 + paddq %xmm4,%xmm2 + movdqa %xmm1,%xmm4 + psrlq $19,%xmm5 + psllq $3,%xmm4 + pxor %xmm5,%xmm6 + psrlq $42,%xmm5 + pxor %xmm4,%xmm6 + psllq $42,%xmm4 + pxor %xmm5,%xmm6 + movdqa (%edx),%xmm5 + pxor %xmm4,%xmm6 + movdqa 32(%ebp),%xmm4 + movq %mm4,%mm1 + paddq %xmm6,%xmm2 + movq -96(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,(%esp) + paddq %xmm2,%xmm4 + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm0 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm0,32(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 24(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 56(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm0,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm0,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 40(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm0,%mm2 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + pxor %mm7,%mm6 + movq (%esp),%mm5 + paddq %mm6,%mm2 + movq 8(%esp),%mm6 + movq %mm4,%mm1 + movq -88(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,56(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm2 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm2,24(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 16(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 48(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm2,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm2,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 32(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm2,%mm0 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + pxor %mm7,%mm6 + movq 56(%esp),%mm5 + paddq %mm6,%mm0 + movq (%esp),%mm6 + movdqa %xmm4,-96(%edx) + movdqa %xmm0,%xmm6 + movdqa %xmm5,%xmm4 +.byte 102,15,58,15,235,8 + movdqa %xmm7,48(%edx) +.byte 102,15,58,15,247,8 + movdqa %xmm5,%xmm7 + psrlq $7,%xmm5 + paddq %xmm6,%xmm3 + movdqa %xmm7,%xmm6 + psrlq $1,%xmm7 + psllq $56,%xmm6 + pxor %xmm7,%xmm5 + psrlq $7,%xmm7 + pxor %xmm6,%xmm5 + psllq $7,%xmm6 + pxor %xmm7,%xmm5 + movdqa %xmm2,%xmm7 + pxor %xmm6,%xmm5 + movdqa %xmm2,%xmm6 + psrlq $6,%xmm7 + paddq %xmm5,%xmm3 + movdqa %xmm2,%xmm5 + psrlq $19,%xmm6 + psllq $3,%xmm5 + pxor %xmm6,%xmm7 + psrlq $42,%xmm6 + pxor %xmm5,%xmm7 + psllq $42,%xmm5 + pxor %xmm6,%xmm7 + movdqa 16(%edx),%xmm6 + pxor %xmm5,%xmm7 + movdqa 48(%ebp),%xmm5 + movq %mm4,%mm1 + paddq %xmm7,%xmm3 + movq -80(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,48(%esp) + paddq %xmm3,%xmm5 + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm0 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm0,16(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 8(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 40(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm0,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm0,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 24(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm0,%mm2 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + pxor %mm7,%mm6 + movq 48(%esp),%mm5 + paddq %mm6,%mm2 + movq 56(%esp),%mm6 + movq %mm4,%mm1 + movq -72(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,40(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm2 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm2,8(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq (%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 32(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm2,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm2,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 16(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm2,%mm0 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + pxor %mm7,%mm6 + movq 40(%esp),%mm5 + paddq %mm6,%mm0 + movq 48(%esp),%mm6 + movdqa %xmm5,-80(%edx) + movdqa %xmm1,%xmm7 + movdqa %xmm6,%xmm5 +.byte 102,15,58,15,244,8 + movdqa %xmm0,(%edx) +.byte 102,15,58,15,248,8 + movdqa %xmm6,%xmm0 + psrlq $7,%xmm6 + paddq %xmm7,%xmm4 + movdqa %xmm0,%xmm7 + psrlq $1,%xmm0 + psllq $56,%xmm7 + pxor %xmm0,%xmm6 + psrlq $7,%xmm0 + pxor %xmm7,%xmm6 + psllq $7,%xmm7 + pxor %xmm0,%xmm6 + movdqa %xmm3,%xmm0 + pxor %xmm7,%xmm6 + movdqa %xmm3,%xmm7 + psrlq $6,%xmm0 + paddq %xmm6,%xmm4 + movdqa %xmm3,%xmm6 + psrlq $19,%xmm7 + psllq $3,%xmm6 + pxor %xmm7,%xmm0 + psrlq $42,%xmm7 + pxor %xmm6,%xmm0 + psllq $42,%xmm6 + pxor %xmm7,%xmm0 + movdqa 32(%edx),%xmm7 + pxor %xmm6,%xmm0 + movdqa 64(%ebp),%xmm6 + movq %mm4,%mm1 + paddq %xmm0,%xmm4 + movq -64(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,32(%esp) + paddq %xmm4,%xmm6 + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm0 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm0,(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 56(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 24(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm0,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm0,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 8(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm0,%mm2 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + pxor %mm7,%mm6 + movq 32(%esp),%mm5 + paddq %mm6,%mm2 + movq 40(%esp),%mm6 + movq %mm4,%mm1 + movq -56(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,24(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm2 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm2,56(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 48(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 16(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm2,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm2,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq (%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm2,%mm0 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + pxor %mm7,%mm6 + movq 24(%esp),%mm5 + paddq %mm6,%mm0 + movq 32(%esp),%mm6 + movdqa %xmm6,-64(%edx) + movdqa %xmm2,%xmm0 + movdqa %xmm7,%xmm6 +.byte 102,15,58,15,253,8 + movdqa %xmm1,16(%edx) +.byte 102,15,58,15,193,8 + movdqa %xmm7,%xmm1 + psrlq $7,%xmm7 + paddq %xmm0,%xmm5 + movdqa %xmm1,%xmm0 + psrlq $1,%xmm1 + psllq $56,%xmm0 + pxor %xmm1,%xmm7 + psrlq $7,%xmm1 + pxor %xmm0,%xmm7 + psllq $7,%xmm0 + pxor %xmm1,%xmm7 + movdqa %xmm4,%xmm1 + pxor %xmm0,%xmm7 + movdqa %xmm4,%xmm0 + psrlq $6,%xmm1 + paddq %xmm7,%xmm5 + movdqa %xmm4,%xmm7 + psrlq $19,%xmm0 + psllq $3,%xmm7 + pxor %xmm0,%xmm1 + psrlq $42,%xmm0 + pxor %xmm7,%xmm1 + psllq $42,%xmm7 + pxor %xmm0,%xmm1 + movdqa 48(%edx),%xmm0 + pxor %xmm7,%xmm1 + movdqa 80(%ebp),%xmm7 + movq %mm4,%mm1 + paddq %xmm1,%xmm5 + movq -48(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,16(%esp) + paddq %xmm5,%xmm7 + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm0 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm0,48(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 40(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 8(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm0,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm0,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 56(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm0,%mm2 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + pxor %mm7,%mm6 + movq 16(%esp),%mm5 + paddq %mm6,%mm2 + movq 24(%esp),%mm6 + movq %mm4,%mm1 + movq -40(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,8(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm2 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm2,40(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 32(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq (%esp),%mm4 + paddq %mm7,%mm3 + movq %mm2,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm2,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 48(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm2,%mm0 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + pxor %mm7,%mm6 + movq 8(%esp),%mm5 + paddq %mm6,%mm0 + movq 16(%esp),%mm6 + movdqa %xmm7,-48(%edx) + movdqa %xmm3,%xmm1 + movdqa %xmm0,%xmm7 +.byte 102,15,58,15,198,8 + movdqa %xmm2,32(%edx) +.byte 102,15,58,15,202,8 + movdqa %xmm0,%xmm2 + psrlq $7,%xmm0 + paddq %xmm1,%xmm6 + movdqa %xmm2,%xmm1 + psrlq $1,%xmm2 + psllq $56,%xmm1 + pxor %xmm2,%xmm0 + psrlq $7,%xmm2 + pxor %xmm1,%xmm0 + psllq $7,%xmm1 + pxor %xmm2,%xmm0 + movdqa %xmm5,%xmm2 + pxor %xmm1,%xmm0 + movdqa %xmm5,%xmm1 + psrlq $6,%xmm2 + paddq %xmm0,%xmm6 + movdqa %xmm5,%xmm0 + psrlq $19,%xmm1 + psllq $3,%xmm0 + pxor %xmm1,%xmm2 + psrlq $42,%xmm1 + pxor %xmm0,%xmm2 + psllq $42,%xmm0 + pxor %xmm1,%xmm2 + movdqa (%edx),%xmm1 + pxor %xmm0,%xmm2 + movdqa 96(%ebp),%xmm0 + movq %mm4,%mm1 + paddq %xmm2,%xmm6 + movq -32(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,(%esp) + paddq %xmm6,%xmm0 + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm0 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm0,32(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 24(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 56(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm0,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm0,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 40(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm0,%mm2 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + pxor %mm7,%mm6 + movq (%esp),%mm5 + paddq %mm6,%mm2 + movq 8(%esp),%mm6 + movq %mm4,%mm1 + movq -24(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,56(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm2 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm2,24(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 16(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 48(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm2,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm2,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 32(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm2,%mm0 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + pxor %mm7,%mm6 + movq 56(%esp),%mm5 + paddq %mm6,%mm0 + movq (%esp),%mm6 + movdqa %xmm0,-32(%edx) + movdqa %xmm4,%xmm2 + movdqa %xmm1,%xmm0 +.byte 102,15,58,15,207,8 + movdqa %xmm3,48(%edx) +.byte 102,15,58,15,211,8 + movdqa %xmm1,%xmm3 + psrlq $7,%xmm1 + paddq %xmm2,%xmm7 + movdqa %xmm3,%xmm2 + psrlq $1,%xmm3 + psllq $56,%xmm2 + pxor %xmm3,%xmm1 + psrlq $7,%xmm3 + pxor %xmm2,%xmm1 + psllq $7,%xmm2 + pxor %xmm3,%xmm1 + movdqa %xmm6,%xmm3 + pxor %xmm2,%xmm1 + movdqa %xmm6,%xmm2 + psrlq $6,%xmm3 + paddq %xmm1,%xmm7 + movdqa %xmm6,%xmm1 + psrlq $19,%xmm2 + psllq $3,%xmm1 + pxor %xmm2,%xmm3 + psrlq $42,%xmm2 + pxor %xmm1,%xmm3 + psllq $42,%xmm1 + pxor %xmm2,%xmm3 + movdqa 16(%edx),%xmm2 + pxor %xmm1,%xmm3 + movdqa 112(%ebp),%xmm1 + movq %mm4,%mm1 + paddq %xmm3,%xmm7 + movq -16(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,48(%esp) + paddq %xmm7,%xmm1 + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm0 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm0,16(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 8(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 40(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm0,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm0,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 24(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm0,%mm2 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + pxor %mm7,%mm6 + movq 48(%esp),%mm5 + paddq %mm6,%mm2 + movq 56(%esp),%mm6 + movq %mm4,%mm1 + movq -8(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,40(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm2 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm2,8(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq (%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 32(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm2,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm2,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 16(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm2,%mm0 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + pxor %mm7,%mm6 + movq 40(%esp),%mm5 + paddq %mm6,%mm0 + movq 48(%esp),%mm6 + movdqa %xmm1,-16(%edx) + leal 128(%ebp),%ebp + decl %ecx + jnz L00800_47_ssse3 + movdqa (%ebp),%xmm1 + leal -640(%ebp),%ebp + movdqu (%ebx),%xmm0 +.byte 102,15,56,0,193 + movdqa (%ebp),%xmm3 + movdqa %xmm1,%xmm2 + movdqu 16(%ebx),%xmm1 + paddq %xmm0,%xmm3 +.byte 102,15,56,0,202 + movq %mm4,%mm1 + movq -128(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,32(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm0 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm0,(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 56(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 24(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm0,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm0,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 8(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm0,%mm2 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + pxor %mm7,%mm6 + movq 32(%esp),%mm5 + paddq %mm6,%mm2 + movq 40(%esp),%mm6 + movq %mm4,%mm1 + movq -120(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,24(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm2 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm2,56(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 48(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 16(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm2,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm2,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq (%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm2,%mm0 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + pxor %mm7,%mm6 + movq 24(%esp),%mm5 + paddq %mm6,%mm0 + movq 32(%esp),%mm6 + movdqa %xmm3,-128(%edx) + movdqa 16(%ebp),%xmm4 + movdqa %xmm2,%xmm3 + movdqu 32(%ebx),%xmm2 + paddq %xmm1,%xmm4 +.byte 102,15,56,0,211 + movq %mm4,%mm1 + movq -112(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,16(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm0 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm0,48(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 40(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 8(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm0,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm0,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 56(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm0,%mm2 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + pxor %mm7,%mm6 + movq 16(%esp),%mm5 + paddq %mm6,%mm2 + movq 24(%esp),%mm6 + movq %mm4,%mm1 + movq -104(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,8(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm2 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm2,40(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 32(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq (%esp),%mm4 + paddq %mm7,%mm3 + movq %mm2,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm2,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 48(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm2,%mm0 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + pxor %mm7,%mm6 + movq 8(%esp),%mm5 + paddq %mm6,%mm0 + movq 16(%esp),%mm6 + movdqa %xmm4,-112(%edx) + movdqa 32(%ebp),%xmm5 + movdqa %xmm3,%xmm4 + movdqu 48(%ebx),%xmm3 + paddq %xmm2,%xmm5 +.byte 102,15,56,0,220 + movq %mm4,%mm1 + movq -96(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm0 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm0,32(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 24(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 56(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm0,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm0,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 40(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm0,%mm2 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + pxor %mm7,%mm6 + movq (%esp),%mm5 + paddq %mm6,%mm2 + movq 8(%esp),%mm6 + movq %mm4,%mm1 + movq -88(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,56(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm2 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm2,24(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 16(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 48(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm2,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm2,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 32(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm2,%mm0 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + pxor %mm7,%mm6 + movq 56(%esp),%mm5 + paddq %mm6,%mm0 + movq (%esp),%mm6 + movdqa %xmm5,-96(%edx) + movdqa 48(%ebp),%xmm6 + movdqa %xmm4,%xmm5 + movdqu 64(%ebx),%xmm4 + paddq %xmm3,%xmm6 +.byte 102,15,56,0,229 + movq %mm4,%mm1 + movq -80(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,48(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm0 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm0,16(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 8(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 40(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm0,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm0,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 24(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm0,%mm2 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + pxor %mm7,%mm6 + movq 48(%esp),%mm5 + paddq %mm6,%mm2 + movq 56(%esp),%mm6 + movq %mm4,%mm1 + movq -72(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,40(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm2 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm2,8(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq (%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 32(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm2,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm2,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 16(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm2,%mm0 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + pxor %mm7,%mm6 + movq 40(%esp),%mm5 + paddq %mm6,%mm0 + movq 48(%esp),%mm6 + movdqa %xmm6,-80(%edx) + movdqa 64(%ebp),%xmm7 + movdqa %xmm5,%xmm6 + movdqu 80(%ebx),%xmm5 + paddq %xmm4,%xmm7 +.byte 102,15,56,0,238 + movq %mm4,%mm1 + movq -64(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,32(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm0 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm0,(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 56(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 24(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm0,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm0,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 8(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm0,%mm2 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + pxor %mm7,%mm6 + movq 32(%esp),%mm5 + paddq %mm6,%mm2 + movq 40(%esp),%mm6 + movq %mm4,%mm1 + movq -56(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,24(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm2 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm2,56(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 48(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 16(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm2,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm2,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq (%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm2,%mm0 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + pxor %mm7,%mm6 + movq 24(%esp),%mm5 + paddq %mm6,%mm0 + movq 32(%esp),%mm6 + movdqa %xmm7,-64(%edx) + movdqa %xmm0,(%edx) + movdqa 80(%ebp),%xmm0 + movdqa %xmm6,%xmm7 + movdqu 96(%ebx),%xmm6 + paddq %xmm5,%xmm0 +.byte 102,15,56,0,247 + movq %mm4,%mm1 + movq -48(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,16(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm0 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm0,48(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 40(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 8(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm0,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm0,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 56(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm0,%mm2 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + pxor %mm7,%mm6 + movq 16(%esp),%mm5 + paddq %mm6,%mm2 + movq 24(%esp),%mm6 + movq %mm4,%mm1 + movq -40(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,8(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm2 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm2,40(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 32(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq (%esp),%mm4 + paddq %mm7,%mm3 + movq %mm2,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm2,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 48(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm2,%mm0 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + pxor %mm7,%mm6 + movq 8(%esp),%mm5 + paddq %mm6,%mm0 + movq 16(%esp),%mm6 + movdqa %xmm0,-48(%edx) + movdqa %xmm1,16(%edx) + movdqa 96(%ebp),%xmm1 + movdqa %xmm7,%xmm0 + movdqu 112(%ebx),%xmm7 + paddq %xmm6,%xmm1 +.byte 102,15,56,0,248 + movq %mm4,%mm1 + movq -32(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm0 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm0,32(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 24(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 56(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm0,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm0,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 40(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm0,%mm2 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + pxor %mm7,%mm6 + movq (%esp),%mm5 + paddq %mm6,%mm2 + movq 8(%esp),%mm6 + movq %mm4,%mm1 + movq -24(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,56(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm2 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm2,24(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 16(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 48(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm2,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm2,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 32(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm2,%mm0 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + pxor %mm7,%mm6 + movq 56(%esp),%mm5 + paddq %mm6,%mm0 + movq (%esp),%mm6 + movdqa %xmm1,-32(%edx) + movdqa %xmm2,32(%edx) + movdqa 112(%ebp),%xmm2 + movdqa (%edx),%xmm0 + paddq %xmm7,%xmm2 + movq %mm4,%mm1 + movq -16(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,48(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm0 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm0,16(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 8(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 40(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm0,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm0,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 24(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm0,%mm2 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + pxor %mm7,%mm6 + movq 48(%esp),%mm5 + paddq %mm6,%mm2 + movq 56(%esp),%mm6 + movq %mm4,%mm1 + movq -8(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,40(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm2 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm2,8(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq (%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 32(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm2,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm2,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 16(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm2,%mm0 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + pxor %mm7,%mm6 + movq 40(%esp),%mm5 + paddq %mm6,%mm0 + movq 48(%esp),%mm6 + movdqa %xmm2,-16(%edx) + movq 8(%esp),%mm1 + paddq %mm3,%mm0 + movq 24(%esp),%mm3 + movq 56(%esp),%mm7 + pxor %mm1,%mm2 + paddq (%esi),%mm0 + paddq 8(%esi),%mm1 + paddq 16(%esi),%mm2 + paddq 24(%esi),%mm3 + paddq 32(%esi),%mm4 + paddq 40(%esi),%mm5 + paddq 48(%esi),%mm6 + paddq 56(%esi),%mm7 + movq %mm0,(%esi) + movq %mm1,8(%esi) + movq %mm2,16(%esi) + movq %mm3,24(%esi) + movq %mm4,32(%esi) + movq %mm5,40(%esi) + movq %mm6,48(%esi) + movq %mm7,56(%esi) + cmpl %eax,%edi + jb L007loop_ssse3 + movl 76(%edx),%esp + emms + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.align 4,0x90 +L002loop_x86: + movl (%edi),%eax + movl 4(%edi),%ebx + movl 8(%edi),%ecx + movl 12(%edi),%edx + bswap %eax + bswap %ebx + bswap %ecx + bswap %edx + pushl %eax + pushl %ebx + pushl %ecx + pushl %edx + movl 16(%edi),%eax + movl 20(%edi),%ebx + movl 24(%edi),%ecx + movl 28(%edi),%edx + bswap %eax + bswap %ebx + bswap %ecx + bswap %edx + pushl %eax + pushl %ebx + pushl %ecx + pushl %edx + movl 32(%edi),%eax + movl 36(%edi),%ebx + movl 40(%edi),%ecx + movl 44(%edi),%edx + bswap %eax + bswap %ebx + bswap %ecx + bswap %edx + pushl %eax + pushl %ebx + pushl %ecx + pushl %edx + movl 48(%edi),%eax + movl 52(%edi),%ebx + movl 56(%edi),%ecx + movl 60(%edi),%edx + bswap %eax + bswap %ebx + bswap %ecx + bswap %edx + pushl %eax + pushl %ebx + pushl %ecx + pushl %edx + movl 64(%edi),%eax + movl 68(%edi),%ebx + movl 72(%edi),%ecx + movl 76(%edi),%edx + bswap %eax + bswap %ebx + bswap %ecx + bswap %edx + pushl %eax + pushl %ebx + pushl %ecx + pushl %edx + movl 80(%edi),%eax + movl 84(%edi),%ebx + movl 88(%edi),%ecx + movl 92(%edi),%edx + bswap %eax + bswap %ebx + bswap %ecx + bswap %edx + pushl %eax + pushl %ebx + pushl %ecx + pushl %edx + movl 96(%edi),%eax + movl 100(%edi),%ebx + movl 104(%edi),%ecx + movl 108(%edi),%edx + bswap %eax + bswap %ebx + bswap %ecx + bswap %edx + pushl %eax + pushl %ebx + pushl %ecx + pushl %edx + movl 112(%edi),%eax + movl 116(%edi),%ebx + movl 120(%edi),%ecx + movl 124(%edi),%edx + bswap %eax + bswap %ebx + bswap %ecx + bswap %edx + pushl %eax + pushl %ebx + pushl %ecx + pushl %edx + addl $128,%edi + subl $72,%esp + movl %edi,204(%esp) + leal 8(%esp),%edi + movl $16,%ecx +.long 2784229001 +.align 4,0x90 +L00900_15_x86: + movl 40(%esp),%ecx + movl 44(%esp),%edx + movl %ecx,%esi + shrl $9,%ecx + movl %edx,%edi + shrl $9,%edx + movl %ecx,%ebx + shll $14,%esi + movl %edx,%eax + shll $14,%edi + xorl %esi,%ebx + shrl $5,%ecx + xorl %edi,%eax + shrl $5,%edx + xorl %ecx,%eax + shll $4,%esi + xorl %edx,%ebx + shll $4,%edi + xorl %esi,%ebx + shrl $4,%ecx + xorl %edi,%eax + shrl $4,%edx + xorl %ecx,%eax + shll $5,%esi + xorl %edx,%ebx + shll $5,%edi + xorl %esi,%eax + xorl %edi,%ebx + movl 48(%esp),%ecx + movl 52(%esp),%edx + movl 56(%esp),%esi + movl 60(%esp),%edi + addl 64(%esp),%eax + adcl 68(%esp),%ebx + xorl %esi,%ecx + xorl %edi,%edx + andl 40(%esp),%ecx + andl 44(%esp),%edx + addl 192(%esp),%eax + adcl 196(%esp),%ebx + xorl %esi,%ecx + xorl %edi,%edx + movl (%ebp),%esi + movl 4(%ebp),%edi + addl %ecx,%eax + adcl %edx,%ebx + movl 32(%esp),%ecx + movl 36(%esp),%edx + addl %esi,%eax + adcl %edi,%ebx + movl %eax,(%esp) + movl %ebx,4(%esp) + addl %ecx,%eax + adcl %edx,%ebx + movl 8(%esp),%ecx + movl 12(%esp),%edx + movl %eax,32(%esp) + movl %ebx,36(%esp) + movl %ecx,%esi + shrl $2,%ecx + movl %edx,%edi + shrl $2,%edx + movl %ecx,%ebx + shll $4,%esi + movl %edx,%eax + shll $4,%edi + xorl %esi,%ebx + shrl $5,%ecx + xorl %edi,%eax + shrl $5,%edx + xorl %ecx,%ebx + shll $21,%esi + xorl %edx,%eax + shll $21,%edi + xorl %esi,%eax + shrl $21,%ecx + xorl %edi,%ebx + shrl $21,%edx + xorl %ecx,%eax + shll $5,%esi + xorl %edx,%ebx + shll $5,%edi + xorl %esi,%eax + xorl %edi,%ebx + movl 8(%esp),%ecx + movl 12(%esp),%edx + movl 16(%esp),%esi + movl 20(%esp),%edi + addl (%esp),%eax + adcl 4(%esp),%ebx + orl %esi,%ecx + orl %edi,%edx + andl 24(%esp),%ecx + andl 28(%esp),%edx + andl 8(%esp),%esi + andl 12(%esp),%edi + orl %esi,%ecx + orl %edi,%edx + addl %ecx,%eax + adcl %edx,%ebx + movl %eax,(%esp) + movl %ebx,4(%esp) + movb (%ebp),%dl + subl $8,%esp + leal 8(%ebp),%ebp + cmpb $148,%dl + jne L00900_15_x86 +.align 4,0x90 +L01016_79_x86: + movl 312(%esp),%ecx + movl 316(%esp),%edx + movl %ecx,%esi + shrl $1,%ecx + movl %edx,%edi + shrl $1,%edx + movl %ecx,%eax + shll $24,%esi + movl %edx,%ebx + shll $24,%edi + xorl %esi,%ebx + shrl $6,%ecx + xorl %edi,%eax + shrl $6,%edx + xorl %ecx,%eax + shll $7,%esi + xorl %edx,%ebx + shll $1,%edi + xorl %esi,%ebx + shrl $1,%ecx + xorl %edi,%eax + shrl $1,%edx + xorl %ecx,%eax + shll $6,%edi + xorl %edx,%ebx + xorl %edi,%eax + movl %eax,(%esp) + movl %ebx,4(%esp) + movl 208(%esp),%ecx + movl 212(%esp),%edx + movl %ecx,%esi + shrl $6,%ecx + movl %edx,%edi + shrl $6,%edx + movl %ecx,%eax + shll $3,%esi + movl %edx,%ebx + shll $3,%edi + xorl %esi,%eax + shrl $13,%ecx + xorl %edi,%ebx + shrl $13,%edx + xorl %ecx,%eax + shll $10,%esi + xorl %edx,%ebx + shll $10,%edi + xorl %esi,%ebx + shrl $10,%ecx + xorl %edi,%eax + shrl $10,%edx + xorl %ecx,%ebx + shll $13,%edi + xorl %edx,%eax + xorl %edi,%eax + movl 320(%esp),%ecx + movl 324(%esp),%edx + addl (%esp),%eax + adcl 4(%esp),%ebx + movl 248(%esp),%esi + movl 252(%esp),%edi + addl %ecx,%eax + adcl %edx,%ebx + addl %esi,%eax + adcl %edi,%ebx + movl %eax,192(%esp) + movl %ebx,196(%esp) + movl 40(%esp),%ecx + movl 44(%esp),%edx + movl %ecx,%esi + shrl $9,%ecx + movl %edx,%edi + shrl $9,%edx + movl %ecx,%ebx + shll $14,%esi + movl %edx,%eax + shll $14,%edi + xorl %esi,%ebx + shrl $5,%ecx + xorl %edi,%eax + shrl $5,%edx + xorl %ecx,%eax + shll $4,%esi + xorl %edx,%ebx + shll $4,%edi + xorl %esi,%ebx + shrl $4,%ecx + xorl %edi,%eax + shrl $4,%edx + xorl %ecx,%eax + shll $5,%esi + xorl %edx,%ebx + shll $5,%edi + xorl %esi,%eax + xorl %edi,%ebx + movl 48(%esp),%ecx + movl 52(%esp),%edx + movl 56(%esp),%esi + movl 60(%esp),%edi + addl 64(%esp),%eax + adcl 68(%esp),%ebx + xorl %esi,%ecx + xorl %edi,%edx + andl 40(%esp),%ecx + andl 44(%esp),%edx + addl 192(%esp),%eax + adcl 196(%esp),%ebx + xorl %esi,%ecx + xorl %edi,%edx + movl (%ebp),%esi + movl 4(%ebp),%edi + addl %ecx,%eax + adcl %edx,%ebx + movl 32(%esp),%ecx + movl 36(%esp),%edx + addl %esi,%eax + adcl %edi,%ebx + movl %eax,(%esp) + movl %ebx,4(%esp) + addl %ecx,%eax + adcl %edx,%ebx + movl 8(%esp),%ecx + movl 12(%esp),%edx + movl %eax,32(%esp) + movl %ebx,36(%esp) + movl %ecx,%esi + shrl $2,%ecx + movl %edx,%edi + shrl $2,%edx + movl %ecx,%ebx + shll $4,%esi + movl %edx,%eax + shll $4,%edi + xorl %esi,%ebx + shrl $5,%ecx + xorl %edi,%eax + shrl $5,%edx + xorl %ecx,%ebx + shll $21,%esi + xorl %edx,%eax + shll $21,%edi + xorl %esi,%eax + shrl $21,%ecx + xorl %edi,%ebx + shrl $21,%edx + xorl %ecx,%eax + shll $5,%esi + xorl %edx,%ebx + shll $5,%edi + xorl %esi,%eax + xorl %edi,%ebx + movl 8(%esp),%ecx + movl 12(%esp),%edx + movl 16(%esp),%esi + movl 20(%esp),%edi + addl (%esp),%eax + adcl 4(%esp),%ebx + orl %esi,%ecx + orl %edi,%edx + andl 24(%esp),%ecx + andl 28(%esp),%edx + andl 8(%esp),%esi + andl 12(%esp),%edi + orl %esi,%ecx + orl %edi,%edx + addl %ecx,%eax + adcl %edx,%ebx + movl %eax,(%esp) + movl %ebx,4(%esp) + movb (%ebp),%dl + subl $8,%esp + leal 8(%ebp),%ebp + cmpb $23,%dl + jne L01016_79_x86 + movl 840(%esp),%esi + movl 844(%esp),%edi + movl (%esi),%eax + movl 4(%esi),%ebx + movl 8(%esi),%ecx + movl 12(%esi),%edx + addl 8(%esp),%eax + adcl 12(%esp),%ebx + movl %eax,(%esi) + movl %ebx,4(%esi) + addl 16(%esp),%ecx + adcl 20(%esp),%edx + movl %ecx,8(%esi) + movl %edx,12(%esi) + movl 16(%esi),%eax + movl 20(%esi),%ebx + movl 24(%esi),%ecx + movl 28(%esi),%edx + addl 24(%esp),%eax + adcl 28(%esp),%ebx + movl %eax,16(%esi) + movl %ebx,20(%esi) + addl 32(%esp),%ecx + adcl 36(%esp),%edx + movl %ecx,24(%esi) + movl %edx,28(%esi) + movl 32(%esi),%eax + movl 36(%esi),%ebx + movl 40(%esi),%ecx + movl 44(%esi),%edx + addl 40(%esp),%eax + adcl 44(%esp),%ebx + movl %eax,32(%esi) + movl %ebx,36(%esi) + addl 48(%esp),%ecx + adcl 52(%esp),%edx + movl %ecx,40(%esi) + movl %edx,44(%esi) + movl 48(%esi),%eax + movl 52(%esi),%ebx + movl 56(%esi),%ecx + movl 60(%esi),%edx + addl 56(%esp),%eax + adcl 60(%esp),%ebx + movl %eax,48(%esi) + movl %ebx,52(%esi) + addl 64(%esp),%ecx + adcl 68(%esp),%edx + movl %ecx,56(%esi) + movl %edx,60(%esi) + addl $840,%esp + subl $640,%ebp + cmpl 8(%esp),%edi + jb L002loop_x86 + movl 12(%esp),%esp + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.align 6,0x90 +L001K512: +.long 3609767458,1116352408 +.long 602891725,1899447441 +.long 3964484399,3049323471 +.long 2173295548,3921009573 +.long 4081628472,961987163 +.long 3053834265,1508970993 +.long 2937671579,2453635748 +.long 3664609560,2870763221 +.long 2734883394,3624381080 +.long 1164996542,310598401 +.long 1323610764,607225278 +.long 3590304994,1426881987 +.long 4068182383,1925078388 +.long 991336113,2162078206 +.long 633803317,2614888103 +.long 3479774868,3248222580 +.long 2666613458,3835390401 +.long 944711139,4022224774 +.long 2341262773,264347078 +.long 2007800933,604807628 +.long 1495990901,770255983 +.long 1856431235,1249150122 +.long 3175218132,1555081692 +.long 2198950837,1996064986 +.long 3999719339,2554220882 +.long 766784016,2821834349 +.long 2566594879,2952996808 +.long 3203337956,3210313671 +.long 1034457026,3336571891 +.long 2466948901,3584528711 +.long 3758326383,113926993 +.long 168717936,338241895 +.long 1188179964,666307205 +.long 1546045734,773529912 +.long 1522805485,1294757372 +.long 2643833823,1396182291 +.long 2343527390,1695183700 +.long 1014477480,1986661051 +.long 1206759142,2177026350 +.long 344077627,2456956037 +.long 1290863460,2730485921 +.long 3158454273,2820302411 +.long 3505952657,3259730800 +.long 106217008,3345764771 +.long 3606008344,3516065817 +.long 1432725776,3600352804 +.long 1467031594,4094571909 +.long 851169720,275423344 +.long 3100823752,430227734 +.long 1363258195,506948616 +.long 3750685593,659060556 +.long 3785050280,883997877 +.long 3318307427,958139571 +.long 3812723403,1322822218 +.long 2003034995,1537002063 +.long 3602036899,1747873779 +.long 1575990012,1955562222 +.long 1125592928,2024104815 +.long 2716904306,2227730452 +.long 442776044,2361852424 +.long 593698344,2428436474 +.long 3733110249,2756734187 +.long 2999351573,3204031479 +.long 3815920427,3329325298 +.long 3928383900,3391569614 +.long 566280711,3515267271 +.long 3454069534,3940187606 +.long 4000239992,4118630271 +.long 1914138554,116418474 +.long 2731055270,174292421 +.long 3203993006,289380356 +.long 320620315,460393269 +.long 587496836,685471733 +.long 1086792851,852142971 +.long 365543100,1017036298 +.long 2618297676,1126000580 +.long 3409855158,1288033470 +.long 4234509866,1501505948 +.long 987167468,1607167915 +.long 1246189591,1816402316 +.long 67438087,66051 +.long 202182159,134810123 +.byte 83,72,65,53,49,50,32,98,108,111,99,107,32,116,114,97 +.byte 110,115,102,111,114,109,32,102,111,114,32,120,56,54,44,32 +.byte 67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97 +.byte 112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103 +.byte 62,0 +.section __IMPORT,__pointers,non_lazy_symbol_pointers +L_OPENSSL_ia32cap_P$non_lazy_ptr: +.indirect_symbol _OPENSSL_ia32cap_P +.long 0 +.comm _OPENSSL_ia32cap_P,16,2 diff --git a/deps/openssl/asm_obsolete/x86-macosx-gas/whrlpool/wp-mmx.s b/deps/openssl/asm_obsolete/x86-macosx-gas/whrlpool/wp-mmx.s new file mode 100644 index 00000000000000..379e585263db4d --- /dev/null +++ b/deps/openssl/asm_obsolete/x86-macosx-gas/whrlpool/wp-mmx.s @@ -0,0 +1,1105 @@ +.file "wp-mmx.s" +.text +.globl _whirlpool_block_mmx +.align 4 +_whirlpool_block_mmx: +L_whirlpool_block_mmx_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 20(%esp),%esi + movl 24(%esp),%edi + movl 28(%esp),%ebp + movl %esp,%eax + subl $148,%esp + andl $-64,%esp + leal 128(%esp),%ebx + movl %esi,(%ebx) + movl %edi,4(%ebx) + movl %ebp,8(%ebx) + movl %eax,16(%ebx) + call L000pic_point +L000pic_point: + popl %ebp + leal L001table-L000pic_point(%ebp),%ebp + xorl %ecx,%ecx + xorl %edx,%edx + movq (%esi),%mm0 + movq 8(%esi),%mm1 + movq 16(%esi),%mm2 + movq 24(%esi),%mm3 + movq 32(%esi),%mm4 + movq 40(%esi),%mm5 + movq 48(%esi),%mm6 + movq 56(%esi),%mm7 +L002outerloop: + movq %mm0,(%esp) + movq %mm1,8(%esp) + movq %mm2,16(%esp) + movq %mm3,24(%esp) + movq %mm4,32(%esp) + movq %mm5,40(%esp) + movq %mm6,48(%esp) + movq %mm7,56(%esp) + pxor (%edi),%mm0 + pxor 8(%edi),%mm1 + pxor 16(%edi),%mm2 + pxor 24(%edi),%mm3 + pxor 32(%edi),%mm4 + pxor 40(%edi),%mm5 + pxor 48(%edi),%mm6 + pxor 56(%edi),%mm7 + movq %mm0,64(%esp) + movq %mm1,72(%esp) + movq %mm2,80(%esp) + movq %mm3,88(%esp) + movq %mm4,96(%esp) + movq %mm5,104(%esp) + movq %mm6,112(%esp) + movq %mm7,120(%esp) + xorl %esi,%esi + movl %esi,12(%ebx) +.align 4,0x90 +L003round: + movq 4096(%ebp,%esi,8),%mm0 + movl (%esp),%eax + movl 4(%esp),%ebx + movzbl %al,%ecx + movzbl %ah,%edx + shrl $16,%eax + leal (%ecx,%ecx,1),%esi + movzbl %al,%ecx + leal (%edx,%edx,1),%edi + movzbl %ah,%edx + pxor (%ebp,%esi,8),%mm0 + movq 7(%ebp,%edi,8),%mm1 + movl 8(%esp),%eax + leal (%ecx,%ecx,1),%esi + movzbl %bl,%ecx + leal (%edx,%edx,1),%edi + movzbl %bh,%edx + movq 6(%ebp,%esi,8),%mm2 + movq 5(%ebp,%edi,8),%mm3 + shrl $16,%ebx + leal (%ecx,%ecx,1),%esi + movzbl %bl,%ecx + leal (%edx,%edx,1),%edi + movzbl %bh,%edx + movq 4(%ebp,%esi,8),%mm4 + movq 3(%ebp,%edi,8),%mm5 + movl 12(%esp),%ebx + leal (%ecx,%ecx,1),%esi + movzbl %al,%ecx + leal (%edx,%edx,1),%edi + movzbl %ah,%edx + movq 2(%ebp,%esi,8),%mm6 + movq 1(%ebp,%edi,8),%mm7 + shrl $16,%eax + leal (%ecx,%ecx,1),%esi + movzbl %al,%ecx + leal (%edx,%edx,1),%edi + movzbl %ah,%edx + pxor (%ebp,%esi,8),%mm1 + pxor 7(%ebp,%edi,8),%mm2 + movl 16(%esp),%eax + leal (%ecx,%ecx,1),%esi + movzbl %bl,%ecx + leal (%edx,%edx,1),%edi + movzbl %bh,%edx + pxor 6(%ebp,%esi,8),%mm3 + pxor 5(%ebp,%edi,8),%mm4 + shrl $16,%ebx + leal (%ecx,%ecx,1),%esi + movzbl %bl,%ecx + leal (%edx,%edx,1),%edi + movzbl %bh,%edx + pxor 4(%ebp,%esi,8),%mm5 + pxor 3(%ebp,%edi,8),%mm6 + movl 20(%esp),%ebx + leal (%ecx,%ecx,1),%esi + movzbl %al,%ecx + leal (%edx,%edx,1),%edi + movzbl %ah,%edx + pxor 2(%ebp,%esi,8),%mm7 + pxor 1(%ebp,%edi,8),%mm0 + shrl $16,%eax + leal (%ecx,%ecx,1),%esi + movzbl %al,%ecx + leal (%edx,%edx,1),%edi + movzbl %ah,%edx + pxor (%ebp,%esi,8),%mm2 + pxor 7(%ebp,%edi,8),%mm3 + movl 24(%esp),%eax + leal (%ecx,%ecx,1),%esi + movzbl %bl,%ecx + leal (%edx,%edx,1),%edi + movzbl %bh,%edx + pxor 6(%ebp,%esi,8),%mm4 + pxor 5(%ebp,%edi,8),%mm5 + shrl $16,%ebx + leal (%ecx,%ecx,1),%esi + movzbl %bl,%ecx + leal (%edx,%edx,1),%edi + movzbl %bh,%edx + pxor 4(%ebp,%esi,8),%mm6 + pxor 3(%ebp,%edi,8),%mm7 + movl 28(%esp),%ebx + leal (%ecx,%ecx,1),%esi + movzbl %al,%ecx + leal (%edx,%edx,1),%edi + movzbl %ah,%edx + pxor 2(%ebp,%esi,8),%mm0 + pxor 1(%ebp,%edi,8),%mm1 + shrl $16,%eax + leal (%ecx,%ecx,1),%esi + movzbl %al,%ecx + leal (%edx,%edx,1),%edi + movzbl %ah,%edx + pxor (%ebp,%esi,8),%mm3 + pxor 7(%ebp,%edi,8),%mm4 + movl 32(%esp),%eax + leal (%ecx,%ecx,1),%esi + movzbl %bl,%ecx + leal (%edx,%edx,1),%edi + movzbl %bh,%edx + pxor 6(%ebp,%esi,8),%mm5 + pxor 5(%ebp,%edi,8),%mm6 + shrl $16,%ebx + leal (%ecx,%ecx,1),%esi + movzbl %bl,%ecx + leal (%edx,%edx,1),%edi + movzbl %bh,%edx + pxor 4(%ebp,%esi,8),%mm7 + pxor 3(%ebp,%edi,8),%mm0 + movl 36(%esp),%ebx + leal (%ecx,%ecx,1),%esi + movzbl %al,%ecx + leal (%edx,%edx,1),%edi + movzbl %ah,%edx + pxor 2(%ebp,%esi,8),%mm1 + pxor 1(%ebp,%edi,8),%mm2 + shrl $16,%eax + leal (%ecx,%ecx,1),%esi + movzbl %al,%ecx + leal (%edx,%edx,1),%edi + movzbl %ah,%edx + pxor (%ebp,%esi,8),%mm4 + pxor 7(%ebp,%edi,8),%mm5 + movl 40(%esp),%eax + leal (%ecx,%ecx,1),%esi + movzbl %bl,%ecx + leal (%edx,%edx,1),%edi + movzbl %bh,%edx + pxor 6(%ebp,%esi,8),%mm6 + pxor 5(%ebp,%edi,8),%mm7 + shrl $16,%ebx + leal (%ecx,%ecx,1),%esi + movzbl %bl,%ecx + leal (%edx,%edx,1),%edi + movzbl %bh,%edx + pxor 4(%ebp,%esi,8),%mm0 + pxor 3(%ebp,%edi,8),%mm1 + movl 44(%esp),%ebx + leal (%ecx,%ecx,1),%esi + movzbl %al,%ecx + leal (%edx,%edx,1),%edi + movzbl %ah,%edx + pxor 2(%ebp,%esi,8),%mm2 + pxor 1(%ebp,%edi,8),%mm3 + shrl $16,%eax + leal (%ecx,%ecx,1),%esi + movzbl %al,%ecx + leal (%edx,%edx,1),%edi + movzbl %ah,%edx + pxor (%ebp,%esi,8),%mm5 + pxor 7(%ebp,%edi,8),%mm6 + movl 48(%esp),%eax + leal (%ecx,%ecx,1),%esi + movzbl %bl,%ecx + leal (%edx,%edx,1),%edi + movzbl %bh,%edx + pxor 6(%ebp,%esi,8),%mm7 + pxor 5(%ebp,%edi,8),%mm0 + shrl $16,%ebx + leal (%ecx,%ecx,1),%esi + movzbl %bl,%ecx + leal (%edx,%edx,1),%edi + movzbl %bh,%edx + pxor 4(%ebp,%esi,8),%mm1 + pxor 3(%ebp,%edi,8),%mm2 + movl 52(%esp),%ebx + leal (%ecx,%ecx,1),%esi + movzbl %al,%ecx + leal (%edx,%edx,1),%edi + movzbl %ah,%edx + pxor 2(%ebp,%esi,8),%mm3 + pxor 1(%ebp,%edi,8),%mm4 + shrl $16,%eax + leal (%ecx,%ecx,1),%esi + movzbl %al,%ecx + leal (%edx,%edx,1),%edi + movzbl %ah,%edx + pxor (%ebp,%esi,8),%mm6 + pxor 7(%ebp,%edi,8),%mm7 + movl 56(%esp),%eax + leal (%ecx,%ecx,1),%esi + movzbl %bl,%ecx + leal (%edx,%edx,1),%edi + movzbl %bh,%edx + pxor 6(%ebp,%esi,8),%mm0 + pxor 5(%ebp,%edi,8),%mm1 + shrl $16,%ebx + leal (%ecx,%ecx,1),%esi + movzbl %bl,%ecx + leal (%edx,%edx,1),%edi + movzbl %bh,%edx + pxor 4(%ebp,%esi,8),%mm2 + pxor 3(%ebp,%edi,8),%mm3 + movl 60(%esp),%ebx + leal (%ecx,%ecx,1),%esi + movzbl %al,%ecx + leal (%edx,%edx,1),%edi + movzbl %ah,%edx + pxor 2(%ebp,%esi,8),%mm4 + pxor 1(%ebp,%edi,8),%mm5 + shrl $16,%eax + leal (%ecx,%ecx,1),%esi + movzbl %al,%ecx + leal (%edx,%edx,1),%edi + movzbl %ah,%edx + pxor (%ebp,%esi,8),%mm7 + pxor 7(%ebp,%edi,8),%mm0 + movl 64(%esp),%eax + leal (%ecx,%ecx,1),%esi + movzbl %bl,%ecx + leal (%edx,%edx,1),%edi + movzbl %bh,%edx + pxor 6(%ebp,%esi,8),%mm1 + pxor 5(%ebp,%edi,8),%mm2 + shrl $16,%ebx + leal (%ecx,%ecx,1),%esi + movzbl %bl,%ecx + leal (%edx,%edx,1),%edi + movzbl %bh,%edx + pxor 4(%ebp,%esi,8),%mm3 + pxor 3(%ebp,%edi,8),%mm4 + movl 68(%esp),%ebx + leal (%ecx,%ecx,1),%esi + movzbl %al,%ecx + leal (%edx,%edx,1),%edi + movzbl %ah,%edx + pxor 2(%ebp,%esi,8),%mm5 + pxor 1(%ebp,%edi,8),%mm6 + movq %mm0,(%esp) + movq %mm1,8(%esp) + movq %mm2,16(%esp) + movq %mm3,24(%esp) + movq %mm4,32(%esp) + movq %mm5,40(%esp) + movq %mm6,48(%esp) + movq %mm7,56(%esp) + shrl $16,%eax + leal (%ecx,%ecx,1),%esi + movzbl %al,%ecx + leal (%edx,%edx,1),%edi + movzbl %ah,%edx + pxor (%ebp,%esi,8),%mm0 + pxor 7(%ebp,%edi,8),%mm1 + movl 72(%esp),%eax + leal (%ecx,%ecx,1),%esi + movzbl %bl,%ecx + leal (%edx,%edx,1),%edi + movzbl %bh,%edx + pxor 6(%ebp,%esi,8),%mm2 + pxor 5(%ebp,%edi,8),%mm3 + shrl $16,%ebx + leal (%ecx,%ecx,1),%esi + movzbl %bl,%ecx + leal (%edx,%edx,1),%edi + movzbl %bh,%edx + pxor 4(%ebp,%esi,8),%mm4 + pxor 3(%ebp,%edi,8),%mm5 + movl 76(%esp),%ebx + leal (%ecx,%ecx,1),%esi + movzbl %al,%ecx + leal (%edx,%edx,1),%edi + movzbl %ah,%edx + pxor 2(%ebp,%esi,8),%mm6 + pxor 1(%ebp,%edi,8),%mm7 + shrl $16,%eax + leal (%ecx,%ecx,1),%esi + movzbl %al,%ecx + leal (%edx,%edx,1),%edi + movzbl %ah,%edx + pxor (%ebp,%esi,8),%mm1 + pxor 7(%ebp,%edi,8),%mm2 + movl 80(%esp),%eax + leal (%ecx,%ecx,1),%esi + movzbl %bl,%ecx + leal (%edx,%edx,1),%edi + movzbl %bh,%edx + pxor 6(%ebp,%esi,8),%mm3 + pxor 5(%ebp,%edi,8),%mm4 + shrl $16,%ebx + leal (%ecx,%ecx,1),%esi + movzbl %bl,%ecx + leal (%edx,%edx,1),%edi + movzbl %bh,%edx + pxor 4(%ebp,%esi,8),%mm5 + pxor 3(%ebp,%edi,8),%mm6 + movl 84(%esp),%ebx + leal (%ecx,%ecx,1),%esi + movzbl %al,%ecx + leal (%edx,%edx,1),%edi + movzbl %ah,%edx + pxor 2(%ebp,%esi,8),%mm7 + pxor 1(%ebp,%edi,8),%mm0 + shrl $16,%eax + leal (%ecx,%ecx,1),%esi + movzbl %al,%ecx + leal (%edx,%edx,1),%edi + movzbl %ah,%edx + pxor (%ebp,%esi,8),%mm2 + pxor 7(%ebp,%edi,8),%mm3 + movl 88(%esp),%eax + leal (%ecx,%ecx,1),%esi + movzbl %bl,%ecx + leal (%edx,%edx,1),%edi + movzbl %bh,%edx + pxor 6(%ebp,%esi,8),%mm4 + pxor 5(%ebp,%edi,8),%mm5 + shrl $16,%ebx + leal (%ecx,%ecx,1),%esi + movzbl %bl,%ecx + leal (%edx,%edx,1),%edi + movzbl %bh,%edx + pxor 4(%ebp,%esi,8),%mm6 + pxor 3(%ebp,%edi,8),%mm7 + movl 92(%esp),%ebx + leal (%ecx,%ecx,1),%esi + movzbl %al,%ecx + leal (%edx,%edx,1),%edi + movzbl %ah,%edx + pxor 2(%ebp,%esi,8),%mm0 + pxor 1(%ebp,%edi,8),%mm1 + shrl $16,%eax + leal (%ecx,%ecx,1),%esi + movzbl %al,%ecx + leal (%edx,%edx,1),%edi + movzbl %ah,%edx + pxor (%ebp,%esi,8),%mm3 + pxor 7(%ebp,%edi,8),%mm4 + movl 96(%esp),%eax + leal (%ecx,%ecx,1),%esi + movzbl %bl,%ecx + leal (%edx,%edx,1),%edi + movzbl %bh,%edx + pxor 6(%ebp,%esi,8),%mm5 + pxor 5(%ebp,%edi,8),%mm6 + shrl $16,%ebx + leal (%ecx,%ecx,1),%esi + movzbl %bl,%ecx + leal (%edx,%edx,1),%edi + movzbl %bh,%edx + pxor 4(%ebp,%esi,8),%mm7 + pxor 3(%ebp,%edi,8),%mm0 + movl 100(%esp),%ebx + leal (%ecx,%ecx,1),%esi + movzbl %al,%ecx + leal (%edx,%edx,1),%edi + movzbl %ah,%edx + pxor 2(%ebp,%esi,8),%mm1 + pxor 1(%ebp,%edi,8),%mm2 + shrl $16,%eax + leal (%ecx,%ecx,1),%esi + movzbl %al,%ecx + leal (%edx,%edx,1),%edi + movzbl %ah,%edx + pxor (%ebp,%esi,8),%mm4 + pxor 7(%ebp,%edi,8),%mm5 + movl 104(%esp),%eax + leal (%ecx,%ecx,1),%esi + movzbl %bl,%ecx + leal (%edx,%edx,1),%edi + movzbl %bh,%edx + pxor 6(%ebp,%esi,8),%mm6 + pxor 5(%ebp,%edi,8),%mm7 + shrl $16,%ebx + leal (%ecx,%ecx,1),%esi + movzbl %bl,%ecx + leal (%edx,%edx,1),%edi + movzbl %bh,%edx + pxor 4(%ebp,%esi,8),%mm0 + pxor 3(%ebp,%edi,8),%mm1 + movl 108(%esp),%ebx + leal (%ecx,%ecx,1),%esi + movzbl %al,%ecx + leal (%edx,%edx,1),%edi + movzbl %ah,%edx + pxor 2(%ebp,%esi,8),%mm2 + pxor 1(%ebp,%edi,8),%mm3 + shrl $16,%eax + leal (%ecx,%ecx,1),%esi + movzbl %al,%ecx + leal (%edx,%edx,1),%edi + movzbl %ah,%edx + pxor (%ebp,%esi,8),%mm5 + pxor 7(%ebp,%edi,8),%mm6 + movl 112(%esp),%eax + leal (%ecx,%ecx,1),%esi + movzbl %bl,%ecx + leal (%edx,%edx,1),%edi + movzbl %bh,%edx + pxor 6(%ebp,%esi,8),%mm7 + pxor 5(%ebp,%edi,8),%mm0 + shrl $16,%ebx + leal (%ecx,%ecx,1),%esi + movzbl %bl,%ecx + leal (%edx,%edx,1),%edi + movzbl %bh,%edx + pxor 4(%ebp,%esi,8),%mm1 + pxor 3(%ebp,%edi,8),%mm2 + movl 116(%esp),%ebx + leal (%ecx,%ecx,1),%esi + movzbl %al,%ecx + leal (%edx,%edx,1),%edi + movzbl %ah,%edx + pxor 2(%ebp,%esi,8),%mm3 + pxor 1(%ebp,%edi,8),%mm4 + shrl $16,%eax + leal (%ecx,%ecx,1),%esi + movzbl %al,%ecx + leal (%edx,%edx,1),%edi + movzbl %ah,%edx + pxor (%ebp,%esi,8),%mm6 + pxor 7(%ebp,%edi,8),%mm7 + movl 120(%esp),%eax + leal (%ecx,%ecx,1),%esi + movzbl %bl,%ecx + leal (%edx,%edx,1),%edi + movzbl %bh,%edx + pxor 6(%ebp,%esi,8),%mm0 + pxor 5(%ebp,%edi,8),%mm1 + shrl $16,%ebx + leal (%ecx,%ecx,1),%esi + movzbl %bl,%ecx + leal (%edx,%edx,1),%edi + movzbl %bh,%edx + pxor 4(%ebp,%esi,8),%mm2 + pxor 3(%ebp,%edi,8),%mm3 + movl 124(%esp),%ebx + leal (%ecx,%ecx,1),%esi + movzbl %al,%ecx + leal (%edx,%edx,1),%edi + movzbl %ah,%edx + pxor 2(%ebp,%esi,8),%mm4 + pxor 1(%ebp,%edi,8),%mm5 + shrl $16,%eax + leal (%ecx,%ecx,1),%esi + movzbl %al,%ecx + leal (%edx,%edx,1),%edi + movzbl %ah,%edx + pxor (%ebp,%esi,8),%mm7 + pxor 7(%ebp,%edi,8),%mm0 + leal (%ecx,%ecx,1),%esi + movzbl %bl,%ecx + leal (%edx,%edx,1),%edi + movzbl %bh,%edx + pxor 6(%ebp,%esi,8),%mm1 + pxor 5(%ebp,%edi,8),%mm2 + shrl $16,%ebx + leal (%ecx,%ecx,1),%esi + movzbl %bl,%ecx + leal (%edx,%edx,1),%edi + movzbl %bh,%edx + pxor 4(%ebp,%esi,8),%mm3 + pxor 3(%ebp,%edi,8),%mm4 + leal (%ecx,%ecx,1),%esi + movzbl %al,%ecx + leal (%edx,%edx,1),%edi + movzbl %ah,%edx + pxor 2(%ebp,%esi,8),%mm5 + pxor 1(%ebp,%edi,8),%mm6 + leal 128(%esp),%ebx + movl 12(%ebx),%esi + addl $1,%esi + cmpl $10,%esi + je L004roundsdone + movl %esi,12(%ebx) + movq %mm0,64(%esp) + movq %mm1,72(%esp) + movq %mm2,80(%esp) + movq %mm3,88(%esp) + movq %mm4,96(%esp) + movq %mm5,104(%esp) + movq %mm6,112(%esp) + movq %mm7,120(%esp) + jmp L003round +.align 4,0x90 +L004roundsdone: + movl (%ebx),%esi + movl 4(%ebx),%edi + movl 8(%ebx),%eax + pxor (%edi),%mm0 + pxor 8(%edi),%mm1 + pxor 16(%edi),%mm2 + pxor 24(%edi),%mm3 + pxor 32(%edi),%mm4 + pxor 40(%edi),%mm5 + pxor 48(%edi),%mm6 + pxor 56(%edi),%mm7 + pxor (%esi),%mm0 + pxor 8(%esi),%mm1 + pxor 16(%esi),%mm2 + pxor 24(%esi),%mm3 + pxor 32(%esi),%mm4 + pxor 40(%esi),%mm5 + pxor 48(%esi),%mm6 + pxor 56(%esi),%mm7 + movq %mm0,(%esi) + movq %mm1,8(%esi) + movq %mm2,16(%esi) + movq %mm3,24(%esi) + movq %mm4,32(%esi) + movq %mm5,40(%esi) + movq %mm6,48(%esi) + movq %mm7,56(%esi) + leal 64(%edi),%edi + subl $1,%eax + jz L005alldone + movl %edi,4(%ebx) + movl %eax,8(%ebx) + jmp L002outerloop +L005alldone: + emms + movl 16(%ebx),%esp + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.align 6,0x90 +L001table: +.byte 24,24,96,24,192,120,48,216 +.byte 24,24,96,24,192,120,48,216 +.byte 35,35,140,35,5,175,70,38 +.byte 35,35,140,35,5,175,70,38 +.byte 198,198,63,198,126,249,145,184 +.byte 198,198,63,198,126,249,145,184 +.byte 232,232,135,232,19,111,205,251 +.byte 232,232,135,232,19,111,205,251 +.byte 135,135,38,135,76,161,19,203 +.byte 135,135,38,135,76,161,19,203 +.byte 184,184,218,184,169,98,109,17 +.byte 184,184,218,184,169,98,109,17 +.byte 1,1,4,1,8,5,2,9 +.byte 1,1,4,1,8,5,2,9 +.byte 79,79,33,79,66,110,158,13 +.byte 79,79,33,79,66,110,158,13 +.byte 54,54,216,54,173,238,108,155 +.byte 54,54,216,54,173,238,108,155 +.byte 166,166,162,166,89,4,81,255 +.byte 166,166,162,166,89,4,81,255 +.byte 210,210,111,210,222,189,185,12 +.byte 210,210,111,210,222,189,185,12 +.byte 245,245,243,245,251,6,247,14 +.byte 245,245,243,245,251,6,247,14 +.byte 121,121,249,121,239,128,242,150 +.byte 121,121,249,121,239,128,242,150 +.byte 111,111,161,111,95,206,222,48 +.byte 111,111,161,111,95,206,222,48 +.byte 145,145,126,145,252,239,63,109 +.byte 145,145,126,145,252,239,63,109 +.byte 82,82,85,82,170,7,164,248 +.byte 82,82,85,82,170,7,164,248 +.byte 96,96,157,96,39,253,192,71 +.byte 96,96,157,96,39,253,192,71 +.byte 188,188,202,188,137,118,101,53 +.byte 188,188,202,188,137,118,101,53 +.byte 155,155,86,155,172,205,43,55 +.byte 155,155,86,155,172,205,43,55 +.byte 142,142,2,142,4,140,1,138 +.byte 142,142,2,142,4,140,1,138 +.byte 163,163,182,163,113,21,91,210 +.byte 163,163,182,163,113,21,91,210 +.byte 12,12,48,12,96,60,24,108 +.byte 12,12,48,12,96,60,24,108 +.byte 123,123,241,123,255,138,246,132 +.byte 123,123,241,123,255,138,246,132 +.byte 53,53,212,53,181,225,106,128 +.byte 53,53,212,53,181,225,106,128 +.byte 29,29,116,29,232,105,58,245 +.byte 29,29,116,29,232,105,58,245 +.byte 224,224,167,224,83,71,221,179 +.byte 224,224,167,224,83,71,221,179 +.byte 215,215,123,215,246,172,179,33 +.byte 215,215,123,215,246,172,179,33 +.byte 194,194,47,194,94,237,153,156 +.byte 194,194,47,194,94,237,153,156 +.byte 46,46,184,46,109,150,92,67 +.byte 46,46,184,46,109,150,92,67 +.byte 75,75,49,75,98,122,150,41 +.byte 75,75,49,75,98,122,150,41 +.byte 254,254,223,254,163,33,225,93 +.byte 254,254,223,254,163,33,225,93 +.byte 87,87,65,87,130,22,174,213 +.byte 87,87,65,87,130,22,174,213 +.byte 21,21,84,21,168,65,42,189 +.byte 21,21,84,21,168,65,42,189 +.byte 119,119,193,119,159,182,238,232 +.byte 119,119,193,119,159,182,238,232 +.byte 55,55,220,55,165,235,110,146 +.byte 55,55,220,55,165,235,110,146 +.byte 229,229,179,229,123,86,215,158 +.byte 229,229,179,229,123,86,215,158 +.byte 159,159,70,159,140,217,35,19 +.byte 159,159,70,159,140,217,35,19 +.byte 240,240,231,240,211,23,253,35 +.byte 240,240,231,240,211,23,253,35 +.byte 74,74,53,74,106,127,148,32 +.byte 74,74,53,74,106,127,148,32 +.byte 218,218,79,218,158,149,169,68 +.byte 218,218,79,218,158,149,169,68 +.byte 88,88,125,88,250,37,176,162 +.byte 88,88,125,88,250,37,176,162 +.byte 201,201,3,201,6,202,143,207 +.byte 201,201,3,201,6,202,143,207 +.byte 41,41,164,41,85,141,82,124 +.byte 41,41,164,41,85,141,82,124 +.byte 10,10,40,10,80,34,20,90 +.byte 10,10,40,10,80,34,20,90 +.byte 177,177,254,177,225,79,127,80 +.byte 177,177,254,177,225,79,127,80 +.byte 160,160,186,160,105,26,93,201 +.byte 160,160,186,160,105,26,93,201 +.byte 107,107,177,107,127,218,214,20 +.byte 107,107,177,107,127,218,214,20 +.byte 133,133,46,133,92,171,23,217 +.byte 133,133,46,133,92,171,23,217 +.byte 189,189,206,189,129,115,103,60 +.byte 189,189,206,189,129,115,103,60 +.byte 93,93,105,93,210,52,186,143 +.byte 93,93,105,93,210,52,186,143 +.byte 16,16,64,16,128,80,32,144 +.byte 16,16,64,16,128,80,32,144 +.byte 244,244,247,244,243,3,245,7 +.byte 244,244,247,244,243,3,245,7 +.byte 203,203,11,203,22,192,139,221 +.byte 203,203,11,203,22,192,139,221 +.byte 62,62,248,62,237,198,124,211 +.byte 62,62,248,62,237,198,124,211 +.byte 5,5,20,5,40,17,10,45 +.byte 5,5,20,5,40,17,10,45 +.byte 103,103,129,103,31,230,206,120 +.byte 103,103,129,103,31,230,206,120 +.byte 228,228,183,228,115,83,213,151 +.byte 228,228,183,228,115,83,213,151 +.byte 39,39,156,39,37,187,78,2 +.byte 39,39,156,39,37,187,78,2 +.byte 65,65,25,65,50,88,130,115 +.byte 65,65,25,65,50,88,130,115 +.byte 139,139,22,139,44,157,11,167 +.byte 139,139,22,139,44,157,11,167 +.byte 167,167,166,167,81,1,83,246 +.byte 167,167,166,167,81,1,83,246 +.byte 125,125,233,125,207,148,250,178 +.byte 125,125,233,125,207,148,250,178 +.byte 149,149,110,149,220,251,55,73 +.byte 149,149,110,149,220,251,55,73 +.byte 216,216,71,216,142,159,173,86 +.byte 216,216,71,216,142,159,173,86 +.byte 251,251,203,251,139,48,235,112 +.byte 251,251,203,251,139,48,235,112 +.byte 238,238,159,238,35,113,193,205 +.byte 238,238,159,238,35,113,193,205 +.byte 124,124,237,124,199,145,248,187 +.byte 124,124,237,124,199,145,248,187 +.byte 102,102,133,102,23,227,204,113 +.byte 102,102,133,102,23,227,204,113 +.byte 221,221,83,221,166,142,167,123 +.byte 221,221,83,221,166,142,167,123 +.byte 23,23,92,23,184,75,46,175 +.byte 23,23,92,23,184,75,46,175 +.byte 71,71,1,71,2,70,142,69 +.byte 71,71,1,71,2,70,142,69 +.byte 158,158,66,158,132,220,33,26 +.byte 158,158,66,158,132,220,33,26 +.byte 202,202,15,202,30,197,137,212 +.byte 202,202,15,202,30,197,137,212 +.byte 45,45,180,45,117,153,90,88 +.byte 45,45,180,45,117,153,90,88 +.byte 191,191,198,191,145,121,99,46 +.byte 191,191,198,191,145,121,99,46 +.byte 7,7,28,7,56,27,14,63 +.byte 7,7,28,7,56,27,14,63 +.byte 173,173,142,173,1,35,71,172 +.byte 173,173,142,173,1,35,71,172 +.byte 90,90,117,90,234,47,180,176 +.byte 90,90,117,90,234,47,180,176 +.byte 131,131,54,131,108,181,27,239 +.byte 131,131,54,131,108,181,27,239 +.byte 51,51,204,51,133,255,102,182 +.byte 51,51,204,51,133,255,102,182 +.byte 99,99,145,99,63,242,198,92 +.byte 99,99,145,99,63,242,198,92 +.byte 2,2,8,2,16,10,4,18 +.byte 2,2,8,2,16,10,4,18 +.byte 170,170,146,170,57,56,73,147 +.byte 170,170,146,170,57,56,73,147 +.byte 113,113,217,113,175,168,226,222 +.byte 113,113,217,113,175,168,226,222 +.byte 200,200,7,200,14,207,141,198 +.byte 200,200,7,200,14,207,141,198 +.byte 25,25,100,25,200,125,50,209 +.byte 25,25,100,25,200,125,50,209 +.byte 73,73,57,73,114,112,146,59 +.byte 73,73,57,73,114,112,146,59 +.byte 217,217,67,217,134,154,175,95 +.byte 217,217,67,217,134,154,175,95 +.byte 242,242,239,242,195,29,249,49 +.byte 242,242,239,242,195,29,249,49 +.byte 227,227,171,227,75,72,219,168 +.byte 227,227,171,227,75,72,219,168 +.byte 91,91,113,91,226,42,182,185 +.byte 91,91,113,91,226,42,182,185 +.byte 136,136,26,136,52,146,13,188 +.byte 136,136,26,136,52,146,13,188 +.byte 154,154,82,154,164,200,41,62 +.byte 154,154,82,154,164,200,41,62 +.byte 38,38,152,38,45,190,76,11 +.byte 38,38,152,38,45,190,76,11 +.byte 50,50,200,50,141,250,100,191 +.byte 50,50,200,50,141,250,100,191 +.byte 176,176,250,176,233,74,125,89 +.byte 176,176,250,176,233,74,125,89 +.byte 233,233,131,233,27,106,207,242 +.byte 233,233,131,233,27,106,207,242 +.byte 15,15,60,15,120,51,30,119 +.byte 15,15,60,15,120,51,30,119 +.byte 213,213,115,213,230,166,183,51 +.byte 213,213,115,213,230,166,183,51 +.byte 128,128,58,128,116,186,29,244 +.byte 128,128,58,128,116,186,29,244 +.byte 190,190,194,190,153,124,97,39 +.byte 190,190,194,190,153,124,97,39 +.byte 205,205,19,205,38,222,135,235 +.byte 205,205,19,205,38,222,135,235 +.byte 52,52,208,52,189,228,104,137 +.byte 52,52,208,52,189,228,104,137 +.byte 72,72,61,72,122,117,144,50 +.byte 72,72,61,72,122,117,144,50 +.byte 255,255,219,255,171,36,227,84 +.byte 255,255,219,255,171,36,227,84 +.byte 122,122,245,122,247,143,244,141 +.byte 122,122,245,122,247,143,244,141 +.byte 144,144,122,144,244,234,61,100 +.byte 144,144,122,144,244,234,61,100 +.byte 95,95,97,95,194,62,190,157 +.byte 95,95,97,95,194,62,190,157 +.byte 32,32,128,32,29,160,64,61 +.byte 32,32,128,32,29,160,64,61 +.byte 104,104,189,104,103,213,208,15 +.byte 104,104,189,104,103,213,208,15 +.byte 26,26,104,26,208,114,52,202 +.byte 26,26,104,26,208,114,52,202 +.byte 174,174,130,174,25,44,65,183 +.byte 174,174,130,174,25,44,65,183 +.byte 180,180,234,180,201,94,117,125 +.byte 180,180,234,180,201,94,117,125 +.byte 84,84,77,84,154,25,168,206 +.byte 84,84,77,84,154,25,168,206 +.byte 147,147,118,147,236,229,59,127 +.byte 147,147,118,147,236,229,59,127 +.byte 34,34,136,34,13,170,68,47 +.byte 34,34,136,34,13,170,68,47 +.byte 100,100,141,100,7,233,200,99 +.byte 100,100,141,100,7,233,200,99 +.byte 241,241,227,241,219,18,255,42 +.byte 241,241,227,241,219,18,255,42 +.byte 115,115,209,115,191,162,230,204 +.byte 115,115,209,115,191,162,230,204 +.byte 18,18,72,18,144,90,36,130 +.byte 18,18,72,18,144,90,36,130 +.byte 64,64,29,64,58,93,128,122 +.byte 64,64,29,64,58,93,128,122 +.byte 8,8,32,8,64,40,16,72 +.byte 8,8,32,8,64,40,16,72 +.byte 195,195,43,195,86,232,155,149 +.byte 195,195,43,195,86,232,155,149 +.byte 236,236,151,236,51,123,197,223 +.byte 236,236,151,236,51,123,197,223 +.byte 219,219,75,219,150,144,171,77 +.byte 219,219,75,219,150,144,171,77 +.byte 161,161,190,161,97,31,95,192 +.byte 161,161,190,161,97,31,95,192 +.byte 141,141,14,141,28,131,7,145 +.byte 141,141,14,141,28,131,7,145 +.byte 61,61,244,61,245,201,122,200 +.byte 61,61,244,61,245,201,122,200 +.byte 151,151,102,151,204,241,51,91 +.byte 151,151,102,151,204,241,51,91 +.byte 0,0,0,0,0,0,0,0 +.byte 0,0,0,0,0,0,0,0 +.byte 207,207,27,207,54,212,131,249 +.byte 207,207,27,207,54,212,131,249 +.byte 43,43,172,43,69,135,86,110 +.byte 43,43,172,43,69,135,86,110 +.byte 118,118,197,118,151,179,236,225 +.byte 118,118,197,118,151,179,236,225 +.byte 130,130,50,130,100,176,25,230 +.byte 130,130,50,130,100,176,25,230 +.byte 214,214,127,214,254,169,177,40 +.byte 214,214,127,214,254,169,177,40 +.byte 27,27,108,27,216,119,54,195 +.byte 27,27,108,27,216,119,54,195 +.byte 181,181,238,181,193,91,119,116 +.byte 181,181,238,181,193,91,119,116 +.byte 175,175,134,175,17,41,67,190 +.byte 175,175,134,175,17,41,67,190 +.byte 106,106,181,106,119,223,212,29 +.byte 106,106,181,106,119,223,212,29 +.byte 80,80,93,80,186,13,160,234 +.byte 80,80,93,80,186,13,160,234 +.byte 69,69,9,69,18,76,138,87 +.byte 69,69,9,69,18,76,138,87 +.byte 243,243,235,243,203,24,251,56 +.byte 243,243,235,243,203,24,251,56 +.byte 48,48,192,48,157,240,96,173 +.byte 48,48,192,48,157,240,96,173 +.byte 239,239,155,239,43,116,195,196 +.byte 239,239,155,239,43,116,195,196 +.byte 63,63,252,63,229,195,126,218 +.byte 63,63,252,63,229,195,126,218 +.byte 85,85,73,85,146,28,170,199 +.byte 85,85,73,85,146,28,170,199 +.byte 162,162,178,162,121,16,89,219 +.byte 162,162,178,162,121,16,89,219 +.byte 234,234,143,234,3,101,201,233 +.byte 234,234,143,234,3,101,201,233 +.byte 101,101,137,101,15,236,202,106 +.byte 101,101,137,101,15,236,202,106 +.byte 186,186,210,186,185,104,105,3 +.byte 186,186,210,186,185,104,105,3 +.byte 47,47,188,47,101,147,94,74 +.byte 47,47,188,47,101,147,94,74 +.byte 192,192,39,192,78,231,157,142 +.byte 192,192,39,192,78,231,157,142 +.byte 222,222,95,222,190,129,161,96 +.byte 222,222,95,222,190,129,161,96 +.byte 28,28,112,28,224,108,56,252 +.byte 28,28,112,28,224,108,56,252 +.byte 253,253,211,253,187,46,231,70 +.byte 253,253,211,253,187,46,231,70 +.byte 77,77,41,77,82,100,154,31 +.byte 77,77,41,77,82,100,154,31 +.byte 146,146,114,146,228,224,57,118 +.byte 146,146,114,146,228,224,57,118 +.byte 117,117,201,117,143,188,234,250 +.byte 117,117,201,117,143,188,234,250 +.byte 6,6,24,6,48,30,12,54 +.byte 6,6,24,6,48,30,12,54 +.byte 138,138,18,138,36,152,9,174 +.byte 138,138,18,138,36,152,9,174 +.byte 178,178,242,178,249,64,121,75 +.byte 178,178,242,178,249,64,121,75 +.byte 230,230,191,230,99,89,209,133 +.byte 230,230,191,230,99,89,209,133 +.byte 14,14,56,14,112,54,28,126 +.byte 14,14,56,14,112,54,28,126 +.byte 31,31,124,31,248,99,62,231 +.byte 31,31,124,31,248,99,62,231 +.byte 98,98,149,98,55,247,196,85 +.byte 98,98,149,98,55,247,196,85 +.byte 212,212,119,212,238,163,181,58 +.byte 212,212,119,212,238,163,181,58 +.byte 168,168,154,168,41,50,77,129 +.byte 168,168,154,168,41,50,77,129 +.byte 150,150,98,150,196,244,49,82 +.byte 150,150,98,150,196,244,49,82 +.byte 249,249,195,249,155,58,239,98 +.byte 249,249,195,249,155,58,239,98 +.byte 197,197,51,197,102,246,151,163 +.byte 197,197,51,197,102,246,151,163 +.byte 37,37,148,37,53,177,74,16 +.byte 37,37,148,37,53,177,74,16 +.byte 89,89,121,89,242,32,178,171 +.byte 89,89,121,89,242,32,178,171 +.byte 132,132,42,132,84,174,21,208 +.byte 132,132,42,132,84,174,21,208 +.byte 114,114,213,114,183,167,228,197 +.byte 114,114,213,114,183,167,228,197 +.byte 57,57,228,57,213,221,114,236 +.byte 57,57,228,57,213,221,114,236 +.byte 76,76,45,76,90,97,152,22 +.byte 76,76,45,76,90,97,152,22 +.byte 94,94,101,94,202,59,188,148 +.byte 94,94,101,94,202,59,188,148 +.byte 120,120,253,120,231,133,240,159 +.byte 120,120,253,120,231,133,240,159 +.byte 56,56,224,56,221,216,112,229 +.byte 56,56,224,56,221,216,112,229 +.byte 140,140,10,140,20,134,5,152 +.byte 140,140,10,140,20,134,5,152 +.byte 209,209,99,209,198,178,191,23 +.byte 209,209,99,209,198,178,191,23 +.byte 165,165,174,165,65,11,87,228 +.byte 165,165,174,165,65,11,87,228 +.byte 226,226,175,226,67,77,217,161 +.byte 226,226,175,226,67,77,217,161 +.byte 97,97,153,97,47,248,194,78 +.byte 97,97,153,97,47,248,194,78 +.byte 179,179,246,179,241,69,123,66 +.byte 179,179,246,179,241,69,123,66 +.byte 33,33,132,33,21,165,66,52 +.byte 33,33,132,33,21,165,66,52 +.byte 156,156,74,156,148,214,37,8 +.byte 156,156,74,156,148,214,37,8 +.byte 30,30,120,30,240,102,60,238 +.byte 30,30,120,30,240,102,60,238 +.byte 67,67,17,67,34,82,134,97 +.byte 67,67,17,67,34,82,134,97 +.byte 199,199,59,199,118,252,147,177 +.byte 199,199,59,199,118,252,147,177 +.byte 252,252,215,252,179,43,229,79 +.byte 252,252,215,252,179,43,229,79 +.byte 4,4,16,4,32,20,8,36 +.byte 4,4,16,4,32,20,8,36 +.byte 81,81,89,81,178,8,162,227 +.byte 81,81,89,81,178,8,162,227 +.byte 153,153,94,153,188,199,47,37 +.byte 153,153,94,153,188,199,47,37 +.byte 109,109,169,109,79,196,218,34 +.byte 109,109,169,109,79,196,218,34 +.byte 13,13,52,13,104,57,26,101 +.byte 13,13,52,13,104,57,26,101 +.byte 250,250,207,250,131,53,233,121 +.byte 250,250,207,250,131,53,233,121 +.byte 223,223,91,223,182,132,163,105 +.byte 223,223,91,223,182,132,163,105 +.byte 126,126,229,126,215,155,252,169 +.byte 126,126,229,126,215,155,252,169 +.byte 36,36,144,36,61,180,72,25 +.byte 36,36,144,36,61,180,72,25 +.byte 59,59,236,59,197,215,118,254 +.byte 59,59,236,59,197,215,118,254 +.byte 171,171,150,171,49,61,75,154 +.byte 171,171,150,171,49,61,75,154 +.byte 206,206,31,206,62,209,129,240 +.byte 206,206,31,206,62,209,129,240 +.byte 17,17,68,17,136,85,34,153 +.byte 17,17,68,17,136,85,34,153 +.byte 143,143,6,143,12,137,3,131 +.byte 143,143,6,143,12,137,3,131 +.byte 78,78,37,78,74,107,156,4 +.byte 78,78,37,78,74,107,156,4 +.byte 183,183,230,183,209,81,115,102 +.byte 183,183,230,183,209,81,115,102 +.byte 235,235,139,235,11,96,203,224 +.byte 235,235,139,235,11,96,203,224 +.byte 60,60,240,60,253,204,120,193 +.byte 60,60,240,60,253,204,120,193 +.byte 129,129,62,129,124,191,31,253 +.byte 129,129,62,129,124,191,31,253 +.byte 148,148,106,148,212,254,53,64 +.byte 148,148,106,148,212,254,53,64 +.byte 247,247,251,247,235,12,243,28 +.byte 247,247,251,247,235,12,243,28 +.byte 185,185,222,185,161,103,111,24 +.byte 185,185,222,185,161,103,111,24 +.byte 19,19,76,19,152,95,38,139 +.byte 19,19,76,19,152,95,38,139 +.byte 44,44,176,44,125,156,88,81 +.byte 44,44,176,44,125,156,88,81 +.byte 211,211,107,211,214,184,187,5 +.byte 211,211,107,211,214,184,187,5 +.byte 231,231,187,231,107,92,211,140 +.byte 231,231,187,231,107,92,211,140 +.byte 110,110,165,110,87,203,220,57 +.byte 110,110,165,110,87,203,220,57 +.byte 196,196,55,196,110,243,149,170 +.byte 196,196,55,196,110,243,149,170 +.byte 3,3,12,3,24,15,6,27 +.byte 3,3,12,3,24,15,6,27 +.byte 86,86,69,86,138,19,172,220 +.byte 86,86,69,86,138,19,172,220 +.byte 68,68,13,68,26,73,136,94 +.byte 68,68,13,68,26,73,136,94 +.byte 127,127,225,127,223,158,254,160 +.byte 127,127,225,127,223,158,254,160 +.byte 169,169,158,169,33,55,79,136 +.byte 169,169,158,169,33,55,79,136 +.byte 42,42,168,42,77,130,84,103 +.byte 42,42,168,42,77,130,84,103 +.byte 187,187,214,187,177,109,107,10 +.byte 187,187,214,187,177,109,107,10 +.byte 193,193,35,193,70,226,159,135 +.byte 193,193,35,193,70,226,159,135 +.byte 83,83,81,83,162,2,166,241 +.byte 83,83,81,83,162,2,166,241 +.byte 220,220,87,220,174,139,165,114 +.byte 220,220,87,220,174,139,165,114 +.byte 11,11,44,11,88,39,22,83 +.byte 11,11,44,11,88,39,22,83 +.byte 157,157,78,157,156,211,39,1 +.byte 157,157,78,157,156,211,39,1 +.byte 108,108,173,108,71,193,216,43 +.byte 108,108,173,108,71,193,216,43 +.byte 49,49,196,49,149,245,98,164 +.byte 49,49,196,49,149,245,98,164 +.byte 116,116,205,116,135,185,232,243 +.byte 116,116,205,116,135,185,232,243 +.byte 246,246,255,246,227,9,241,21 +.byte 246,246,255,246,227,9,241,21 +.byte 70,70,5,70,10,67,140,76 +.byte 70,70,5,70,10,67,140,76 +.byte 172,172,138,172,9,38,69,165 +.byte 172,172,138,172,9,38,69,165 +.byte 137,137,30,137,60,151,15,181 +.byte 137,137,30,137,60,151,15,181 +.byte 20,20,80,20,160,68,40,180 +.byte 20,20,80,20,160,68,40,180 +.byte 225,225,163,225,91,66,223,186 +.byte 225,225,163,225,91,66,223,186 +.byte 22,22,88,22,176,78,44,166 +.byte 22,22,88,22,176,78,44,166 +.byte 58,58,232,58,205,210,116,247 +.byte 58,58,232,58,205,210,116,247 +.byte 105,105,185,105,111,208,210,6 +.byte 105,105,185,105,111,208,210,6 +.byte 9,9,36,9,72,45,18,65 +.byte 9,9,36,9,72,45,18,65 +.byte 112,112,221,112,167,173,224,215 +.byte 112,112,221,112,167,173,224,215 +.byte 182,182,226,182,217,84,113,111 +.byte 182,182,226,182,217,84,113,111 +.byte 208,208,103,208,206,183,189,30 +.byte 208,208,103,208,206,183,189,30 +.byte 237,237,147,237,59,126,199,214 +.byte 237,237,147,237,59,126,199,214 +.byte 204,204,23,204,46,219,133,226 +.byte 204,204,23,204,46,219,133,226 +.byte 66,66,21,66,42,87,132,104 +.byte 66,66,21,66,42,87,132,104 +.byte 152,152,90,152,180,194,45,44 +.byte 152,152,90,152,180,194,45,44 +.byte 164,164,170,164,73,14,85,237 +.byte 164,164,170,164,73,14,85,237 +.byte 40,40,160,40,93,136,80,117 +.byte 40,40,160,40,93,136,80,117 +.byte 92,92,109,92,218,49,184,134 +.byte 92,92,109,92,218,49,184,134 +.byte 248,248,199,248,147,63,237,107 +.byte 248,248,199,248,147,63,237,107 +.byte 134,134,34,134,68,164,17,194 +.byte 134,134,34,134,68,164,17,194 +.byte 24,35,198,232,135,184,1,79 +.byte 54,166,210,245,121,111,145,82 +.byte 96,188,155,142,163,12,123,53 +.byte 29,224,215,194,46,75,254,87 +.byte 21,119,55,229,159,240,74,218 +.byte 88,201,41,10,177,160,107,133 +.byte 189,93,16,244,203,62,5,103 +.byte 228,39,65,139,167,125,149,216 +.byte 251,238,124,102,221,23,71,158 +.byte 202,45,191,7,173,90,131,51 diff --git a/deps/openssl/asm_obsolete/x86-macosx-gas/x86cpuid.s b/deps/openssl/asm_obsolete/x86-macosx-gas/x86cpuid.s new file mode 100644 index 00000000000000..3db70b61139e72 --- /dev/null +++ b/deps/openssl/asm_obsolete/x86-macosx-gas/x86cpuid.s @@ -0,0 +1,355 @@ +.file "x86cpuid.s" +.text +.globl _OPENSSL_ia32_cpuid +.align 4 +_OPENSSL_ia32_cpuid: +L_OPENSSL_ia32_cpuid_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + xorl %edx,%edx + pushfl + popl %eax + movl %eax,%ecx + xorl $2097152,%eax + pushl %eax + popfl + pushfl + popl %eax + xorl %eax,%ecx + xorl %eax,%eax + btl $21,%ecx + jnc L000nocpuid + movl 20(%esp),%esi + movl %eax,8(%esi) + .byte 0x0f,0xa2 + movl %eax,%edi + xorl %eax,%eax + cmpl $1970169159,%ebx + setne %al + movl %eax,%ebp + cmpl $1231384169,%edx + setne %al + orl %eax,%ebp + cmpl $1818588270,%ecx + setne %al + orl %eax,%ebp + jz L001intel + cmpl $1752462657,%ebx + setne %al + movl %eax,%esi + cmpl $1769238117,%edx + setne %al + orl %eax,%esi + cmpl $1145913699,%ecx + setne %al + orl %eax,%esi + jnz L001intel + movl $2147483648,%eax + .byte 0x0f,0xa2 + cmpl $2147483649,%eax + jb L001intel + movl %eax,%esi + movl $2147483649,%eax + .byte 0x0f,0xa2 + orl %ecx,%ebp + andl $2049,%ebp + cmpl $2147483656,%esi + jb L001intel + movl $2147483656,%eax + .byte 0x0f,0xa2 + movzbl %cl,%esi + incl %esi + movl $1,%eax + xorl %ecx,%ecx + .byte 0x0f,0xa2 + btl $28,%edx + jnc L002generic + shrl $16,%ebx + andl $255,%ebx + cmpl %esi,%ebx + ja L002generic + andl $4026531839,%edx + jmp L002generic +L001intel: + cmpl $7,%edi + jb L003cacheinfo + movl 20(%esp),%esi + movl $7,%eax + xorl %ecx,%ecx + .byte 0x0f,0xa2 + movl %ebx,8(%esi) +L003cacheinfo: + cmpl $4,%edi + movl $-1,%edi + jb L004nocacheinfo + movl $4,%eax + movl $0,%ecx + .byte 0x0f,0xa2 + movl %eax,%edi + shrl $14,%edi + andl $4095,%edi +L004nocacheinfo: + movl $1,%eax + xorl %ecx,%ecx + .byte 0x0f,0xa2 + andl $3220176895,%edx + cmpl $0,%ebp + jne L005notintel + orl $1073741824,%edx + andb $15,%ah + cmpb $15,%ah + jne L005notintel + orl $1048576,%edx +L005notintel: + btl $28,%edx + jnc L002generic + andl $4026531839,%edx + cmpl $0,%edi + je L002generic + orl $268435456,%edx + shrl $16,%ebx + cmpb $1,%bl + ja L002generic + andl $4026531839,%edx +L002generic: + andl $2048,%ebp + andl $4294965247,%ecx + movl %edx,%esi + orl %ecx,%ebp + btl $27,%ecx + jnc L006clear_avx + xorl %ecx,%ecx +.byte 15,1,208 + andl $6,%eax + cmpl $6,%eax + je L007done + cmpl $2,%eax + je L006clear_avx +L008clear_xmm: + andl $4261412861,%ebp + andl $4278190079,%esi +L006clear_avx: + andl $4026525695,%ebp + movl 20(%esp),%edi + andl $4294967263,8(%edi) +L007done: + movl %esi,%eax + movl %ebp,%edx +L000nocpuid: + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.globl _OPENSSL_rdtsc +.align 4 +_OPENSSL_rdtsc: +L_OPENSSL_rdtsc_begin: + xorl %eax,%eax + xorl %edx,%edx + call L009PIC_me_up +L009PIC_me_up: + popl %ecx + movl L_OPENSSL_ia32cap_P$non_lazy_ptr-L009PIC_me_up(%ecx),%ecx + btl $4,(%ecx) + jnc L010notsc + .byte 0x0f,0x31 +L010notsc: + ret +.globl _OPENSSL_instrument_halt +.align 4 +_OPENSSL_instrument_halt: +L_OPENSSL_instrument_halt_begin: + call L011PIC_me_up +L011PIC_me_up: + popl %ecx + movl L_OPENSSL_ia32cap_P$non_lazy_ptr-L011PIC_me_up(%ecx),%ecx + btl $4,(%ecx) + jnc L012nohalt +.long 2421723150 + andl $3,%eax + jnz L012nohalt + pushfl + popl %eax + btl $9,%eax + jnc L012nohalt + .byte 0x0f,0x31 + pushl %edx + pushl %eax + hlt + .byte 0x0f,0x31 + subl (%esp),%eax + sbbl 4(%esp),%edx + addl $8,%esp + ret +L012nohalt: + xorl %eax,%eax + xorl %edx,%edx + ret +.globl _OPENSSL_far_spin +.align 4 +_OPENSSL_far_spin: +L_OPENSSL_far_spin_begin: + pushfl + popl %eax + btl $9,%eax + jnc L013nospin + movl 4(%esp),%eax + movl 8(%esp),%ecx +.long 2430111262 + xorl %eax,%eax + movl (%ecx),%edx + jmp L014spin +.align 4,0x90 +L014spin: + incl %eax + cmpl (%ecx),%edx + je L014spin +.long 529567888 + ret +L013nospin: + xorl %eax,%eax + xorl %edx,%edx + ret +.globl _OPENSSL_wipe_cpu +.align 4 +_OPENSSL_wipe_cpu: +L_OPENSSL_wipe_cpu_begin: + xorl %eax,%eax + xorl %edx,%edx + call L015PIC_me_up +L015PIC_me_up: + popl %ecx + movl L_OPENSSL_ia32cap_P$non_lazy_ptr-L015PIC_me_up(%ecx),%ecx + movl (%ecx),%ecx + btl $1,(%ecx) + jnc L016no_x87 + andl $83886080,%ecx + cmpl $83886080,%ecx + jne L017no_sse2 + pxor %xmm0,%xmm0 + pxor %xmm1,%xmm1 + pxor %xmm2,%xmm2 + pxor %xmm3,%xmm3 + pxor %xmm4,%xmm4 + pxor %xmm5,%xmm5 + pxor %xmm6,%xmm6 + pxor %xmm7,%xmm7 +L017no_sse2: +.long 4007259865,4007259865,4007259865,4007259865,2430851995 +L016no_x87: + leal 4(%esp),%eax + ret +.globl _OPENSSL_atomic_add +.align 4 +_OPENSSL_atomic_add: +L_OPENSSL_atomic_add_begin: + movl 4(%esp),%edx + movl 8(%esp),%ecx + pushl %ebx + nop + movl (%edx),%eax +L018spin: + leal (%eax,%ecx,1),%ebx + nop +.long 447811568 + jne L018spin + movl %ebx,%eax + popl %ebx + ret +.globl _OPENSSL_indirect_call +.align 4 +_OPENSSL_indirect_call: +L_OPENSSL_indirect_call_begin: + pushl %ebp + movl %esp,%ebp + subl $28,%esp + movl 12(%ebp),%ecx + movl %ecx,(%esp) + movl 16(%ebp),%edx + movl %edx,4(%esp) + movl 20(%ebp),%eax + movl %eax,8(%esp) + movl 24(%ebp),%eax + movl %eax,12(%esp) + movl 28(%ebp),%eax + movl %eax,16(%esp) + movl 32(%ebp),%eax + movl %eax,20(%esp) + movl 36(%ebp),%eax + movl %eax,24(%esp) + call *8(%ebp) + movl %ebp,%esp + popl %ebp + ret +.globl _OPENSSL_cleanse +.align 4 +_OPENSSL_cleanse: +L_OPENSSL_cleanse_begin: + movl 4(%esp),%edx + movl 8(%esp),%ecx + xorl %eax,%eax + cmpl $7,%ecx + jae L019lot + cmpl $0,%ecx + je L020ret +L021little: + movb %al,(%edx) + subl $1,%ecx + leal 1(%edx),%edx + jnz L021little +L020ret: + ret +.align 4,0x90 +L019lot: + testl $3,%edx + jz L022aligned + movb %al,(%edx) + leal -1(%ecx),%ecx + leal 1(%edx),%edx + jmp L019lot +L022aligned: + movl %eax,(%edx) + leal -4(%ecx),%ecx + testl $-4,%ecx + leal 4(%edx),%edx + jnz L022aligned + cmpl $0,%ecx + jne L021little + ret +.globl _OPENSSL_ia32_rdrand +.align 4 +_OPENSSL_ia32_rdrand: +L_OPENSSL_ia32_rdrand_begin: + movl $8,%ecx +L023loop: +.byte 15,199,240 + jc L024break + loop L023loop +L024break: + cmpl $0,%eax + cmovel %ecx,%eax + ret +.globl _OPENSSL_ia32_rdseed +.align 4 +_OPENSSL_ia32_rdseed: +L_OPENSSL_ia32_rdseed_begin: + movl $8,%ecx +L025loop: +.byte 15,199,248 + jc L026break + loop L025loop +L026break: + cmpl $0,%eax + cmovel %ecx,%eax + ret +.section __IMPORT,__pointers,non_lazy_symbol_pointers +L_OPENSSL_ia32cap_P$non_lazy_ptr: +.indirect_symbol _OPENSSL_ia32cap_P +.long 0 +.comm _OPENSSL_ia32cap_P,16,2 +.mod_init_func +.align 2 +.long _OPENSSL_cpuid_setup diff --git a/deps/openssl/asm_obsolete/x86-win32-masm/aes/aes-586.asm b/deps/openssl/asm_obsolete/x86-win32-masm/aes/aes-586.asm new file mode 100644 index 00000000000000..ea853704c7458e --- /dev/null +++ b/deps/openssl/asm_obsolete/x86-win32-masm/aes/aes-586.asm @@ -0,0 +1,3232 @@ +TITLE aes-586.asm +IF @Version LT 800 +ECHO MASM version 8.00 or later is strongly recommended. +ENDIF +.686 +.XMM +IF @Version LT 800 +XMMWORD STRUCT 16 +DQ 2 dup (?) +XMMWORD ENDS +ENDIF + +.MODEL FLAT +OPTION DOTNAME +IF @Version LT 800 +.text$ SEGMENT PAGE 'CODE' +ELSE +.text$ SEGMENT ALIGN(64) 'CODE' +ENDIF +ALIGN 16 +__x86_AES_encrypt_compact PROC PRIVATE + mov DWORD PTR 20[esp],edi + xor eax,DWORD PTR [edi] + xor ebx,DWORD PTR 4[edi] + xor ecx,DWORD PTR 8[edi] + xor edx,DWORD PTR 12[edi] + mov esi,DWORD PTR 240[edi] + lea esi,DWORD PTR [esi*1+esi-2] + lea esi,DWORD PTR [esi*8+edi] + mov DWORD PTR 24[esp],esi + mov edi,DWORD PTR [ebp-128] + mov esi,DWORD PTR [ebp-96] + mov edi,DWORD PTR [ebp-64] + mov esi,DWORD PTR [ebp-32] + mov edi,DWORD PTR [ebp] + mov esi,DWORD PTR 32[ebp] + mov edi,DWORD PTR 64[ebp] + mov esi,DWORD PTR 96[ebp] +ALIGN 16 +$L000loop: + mov esi,eax + and esi,255 + movzx esi,BYTE PTR [esi*1+ebp-128] + movzx edi,bh + movzx edi,BYTE PTR [edi*1+ebp-128] + shl edi,8 + xor esi,edi + mov edi,ecx + shr edi,16 + and edi,255 + movzx edi,BYTE PTR [edi*1+ebp-128] + shl edi,16 + xor esi,edi + mov edi,edx + shr edi,24 + movzx edi,BYTE PTR [edi*1+ebp-128] + shl edi,24 + xor esi,edi + mov DWORD PTR 4[esp],esi + mov esi,ebx + and esi,255 + shr ebx,16 + movzx esi,BYTE PTR [esi*1+ebp-128] + movzx edi,ch + movzx edi,BYTE PTR [edi*1+ebp-128] + shl edi,8 + xor esi,edi + mov edi,edx + shr edi,16 + and edi,255 + movzx edi,BYTE PTR [edi*1+ebp-128] + shl edi,16 + xor esi,edi + mov edi,eax + shr edi,24 + movzx edi,BYTE PTR [edi*1+ebp-128] + shl edi,24 + xor esi,edi + mov DWORD PTR 8[esp],esi + mov esi,ecx + and esi,255 + shr ecx,24 + movzx esi,BYTE PTR [esi*1+ebp-128] + movzx edi,dh + movzx edi,BYTE PTR [edi*1+ebp-128] + shl edi,8 + xor esi,edi + mov edi,eax + shr edi,16 + and edx,255 + and edi,255 + movzx edi,BYTE PTR [edi*1+ebp-128] + shl edi,16 + xor esi,edi + movzx edi,bh + movzx edi,BYTE PTR [edi*1+ebp-128] + shl edi,24 + xor esi,edi + and edx,255 + movzx edx,BYTE PTR [edx*1+ebp-128] + movzx eax,ah + movzx eax,BYTE PTR [eax*1+ebp-128] + shl eax,8 + xor edx,eax + mov eax,DWORD PTR 4[esp] + and ebx,255 + movzx ebx,BYTE PTR [ebx*1+ebp-128] + shl ebx,16 + xor edx,ebx + mov ebx,DWORD PTR 8[esp] + movzx ecx,BYTE PTR [ecx*1+ebp-128] + shl ecx,24 + xor edx,ecx + mov ecx,esi + mov ebp,2155905152 + and ebp,ecx + lea edi,DWORD PTR [ecx*1+ecx] + mov esi,ebp + shr ebp,7 + and edi,4278124286 + sub esi,ebp + mov ebp,ecx + and esi,454761243 + ror ebp,16 + xor esi,edi + mov edi,ecx + xor ecx,esi + ror edi,24 + xor esi,ebp + rol ecx,24 + xor esi,edi + mov ebp,2155905152 + xor ecx,esi + and ebp,edx + lea edi,DWORD PTR [edx*1+edx] + mov esi,ebp + shr ebp,7 + and edi,4278124286 + sub esi,ebp + mov ebp,edx + and esi,454761243 + ror ebp,16 + xor esi,edi + mov edi,edx + xor edx,esi + ror edi,24 + xor esi,ebp + rol edx,24 + xor esi,edi + mov ebp,2155905152 + xor edx,esi + and ebp,eax + lea edi,DWORD PTR [eax*1+eax] + mov esi,ebp + shr ebp,7 + and edi,4278124286 + sub esi,ebp + mov ebp,eax + and esi,454761243 + ror ebp,16 + xor esi,edi + mov edi,eax + xor eax,esi + ror edi,24 + xor esi,ebp + rol eax,24 + xor esi,edi + mov ebp,2155905152 + xor eax,esi + and ebp,ebx + lea edi,DWORD PTR [ebx*1+ebx] + mov esi,ebp + shr ebp,7 + and edi,4278124286 + sub esi,ebp + mov ebp,ebx + and esi,454761243 + ror ebp,16 + xor esi,edi + mov edi,ebx + xor ebx,esi + ror edi,24 + xor esi,ebp + rol ebx,24 + xor esi,edi + xor ebx,esi + mov edi,DWORD PTR 20[esp] + mov ebp,DWORD PTR 28[esp] + add edi,16 + xor eax,DWORD PTR [edi] + xor ebx,DWORD PTR 4[edi] + xor ecx,DWORD PTR 8[edi] + xor edx,DWORD PTR 12[edi] + cmp edi,DWORD PTR 24[esp] + mov DWORD PTR 20[esp],edi + jb $L000loop + mov esi,eax + and esi,255 + movzx esi,BYTE PTR [esi*1+ebp-128] + movzx edi,bh + movzx edi,BYTE PTR [edi*1+ebp-128] + shl edi,8 + xor esi,edi + mov edi,ecx + shr edi,16 + and edi,255 + movzx edi,BYTE PTR [edi*1+ebp-128] + shl edi,16 + xor esi,edi + mov edi,edx + shr edi,24 + movzx edi,BYTE PTR [edi*1+ebp-128] + shl edi,24 + xor esi,edi + mov DWORD PTR 4[esp],esi + mov esi,ebx + and esi,255 + shr ebx,16 + movzx esi,BYTE PTR [esi*1+ebp-128] + movzx edi,ch + movzx edi,BYTE PTR [edi*1+ebp-128] + shl edi,8 + xor esi,edi + mov edi,edx + shr edi,16 + and edi,255 + movzx edi,BYTE PTR [edi*1+ebp-128] + shl edi,16 + xor esi,edi + mov edi,eax + shr edi,24 + movzx edi,BYTE PTR [edi*1+ebp-128] + shl edi,24 + xor esi,edi + mov DWORD PTR 8[esp],esi + mov esi,ecx + and esi,255 + shr ecx,24 + movzx esi,BYTE PTR [esi*1+ebp-128] + movzx edi,dh + movzx edi,BYTE PTR [edi*1+ebp-128] + shl edi,8 + xor esi,edi + mov edi,eax + shr edi,16 + and edx,255 + and edi,255 + movzx edi,BYTE PTR [edi*1+ebp-128] + shl edi,16 + xor esi,edi + movzx edi,bh + movzx edi,BYTE PTR [edi*1+ebp-128] + shl edi,24 + xor esi,edi + mov edi,DWORD PTR 20[esp] + and edx,255 + movzx edx,BYTE PTR [edx*1+ebp-128] + movzx eax,ah + movzx eax,BYTE PTR [eax*1+ebp-128] + shl eax,8 + xor edx,eax + mov eax,DWORD PTR 4[esp] + and ebx,255 + movzx ebx,BYTE PTR [ebx*1+ebp-128] + shl ebx,16 + xor edx,ebx + mov ebx,DWORD PTR 8[esp] + movzx ecx,BYTE PTR [ecx*1+ebp-128] + shl ecx,24 + xor edx,ecx + mov ecx,esi + xor eax,DWORD PTR 16[edi] + xor ebx,DWORD PTR 20[edi] + xor ecx,DWORD PTR 24[edi] + xor edx,DWORD PTR 28[edi] + ret +__x86_AES_encrypt_compact ENDP +ALIGN 16 +__sse_AES_encrypt_compact PROC PRIVATE + pxor mm0,QWORD PTR [edi] + pxor mm4,QWORD PTR 8[edi] + mov esi,DWORD PTR 240[edi] + lea esi,DWORD PTR [esi*1+esi-2] + lea esi,DWORD PTR [esi*8+edi] + mov DWORD PTR 24[esp],esi + mov eax,454761243 + mov DWORD PTR 8[esp],eax + mov DWORD PTR 12[esp],eax + mov eax,DWORD PTR [ebp-128] + mov ebx,DWORD PTR [ebp-96] + mov ecx,DWORD PTR [ebp-64] + mov edx,DWORD PTR [ebp-32] + mov eax,DWORD PTR [ebp] + mov ebx,DWORD PTR 32[ebp] + mov ecx,DWORD PTR 64[ebp] + mov edx,DWORD PTR 96[ebp] +ALIGN 16 +$L001loop: + pshufw mm1,mm0,8 + pshufw mm5,mm4,13 + movd eax,mm1 + movd ebx,mm5 + mov DWORD PTR 20[esp],edi + movzx esi,al + movzx edx,ah + pshufw mm2,mm0,13 + movzx ecx,BYTE PTR [esi*1+ebp-128] + movzx edi,bl + movzx edx,BYTE PTR [edx*1+ebp-128] + shr eax,16 + shl edx,8 + movzx esi,BYTE PTR [edi*1+ebp-128] + movzx edi,bh + shl esi,16 + pshufw mm6,mm4,8 + or ecx,esi + movzx esi,BYTE PTR [edi*1+ebp-128] + movzx edi,ah + shl esi,24 + shr ebx,16 + or edx,esi + movzx esi,BYTE PTR [edi*1+ebp-128] + movzx edi,bh + shl esi,8 + or ecx,esi + movzx esi,BYTE PTR [edi*1+ebp-128] + movzx edi,al + shl esi,24 + or ecx,esi + movzx esi,BYTE PTR [edi*1+ebp-128] + movzx edi,bl + movd eax,mm2 + movd mm0,ecx + movzx ecx,BYTE PTR [edi*1+ebp-128] + movzx edi,ah + shl ecx,16 + movd ebx,mm6 + or ecx,esi + movzx esi,BYTE PTR [edi*1+ebp-128] + movzx edi,bh + shl esi,24 + or ecx,esi + movzx esi,BYTE PTR [edi*1+ebp-128] + movzx edi,bl + shl esi,8 + shr ebx,16 + or ecx,esi + movzx esi,BYTE PTR [edi*1+ebp-128] + movzx edi,al + shr eax,16 + movd mm1,ecx + movzx ecx,BYTE PTR [edi*1+ebp-128] + movzx edi,ah + shl ecx,16 + and eax,255 + or ecx,esi + punpckldq mm0,mm1 + movzx esi,BYTE PTR [edi*1+ebp-128] + movzx edi,bh + shl esi,24 + and ebx,255 + movzx eax,BYTE PTR [eax*1+ebp-128] + or ecx,esi + shl eax,16 + movzx esi,BYTE PTR [edi*1+ebp-128] + or edx,eax + shl esi,8 + movzx ebx,BYTE PTR [ebx*1+ebp-128] + or ecx,esi + or edx,ebx + mov edi,DWORD PTR 20[esp] + movd mm4,ecx + movd mm5,edx + punpckldq mm4,mm5 + add edi,16 + cmp edi,DWORD PTR 24[esp] + ja $L002out + movq mm2,QWORD PTR 8[esp] + pxor mm3,mm3 + pxor mm7,mm7 + movq mm1,mm0 + movq mm5,mm4 + pcmpgtb mm3,mm0 + pcmpgtb mm7,mm4 + pand mm3,mm2 + pand mm7,mm2 + pshufw mm2,mm0,177 + pshufw mm6,mm4,177 + paddb mm0,mm0 + paddb mm4,mm4 + pxor mm0,mm3 + pxor mm4,mm7 + pshufw mm3,mm2,177 + pshufw mm7,mm6,177 + pxor mm1,mm0 + pxor mm5,mm4 + pxor mm0,mm2 + pxor mm4,mm6 + movq mm2,mm3 + movq mm6,mm7 + pslld mm3,8 + pslld mm7,8 + psrld mm2,24 + psrld mm6,24 + pxor mm0,mm3 + pxor mm4,mm7 + pxor mm0,mm2 + pxor mm4,mm6 + movq mm3,mm1 + movq mm7,mm5 + movq mm2,QWORD PTR [edi] + movq mm6,QWORD PTR 8[edi] + psrld mm1,8 + psrld mm5,8 + mov eax,DWORD PTR [ebp-128] + pslld mm3,24 + pslld mm7,24 + mov ebx,DWORD PTR [ebp-64] + pxor mm0,mm1 + pxor mm4,mm5 + mov ecx,DWORD PTR [ebp] + pxor mm0,mm3 + pxor mm4,mm7 + mov edx,DWORD PTR 64[ebp] + pxor mm0,mm2 + pxor mm4,mm6 + jmp $L001loop +ALIGN 16 +$L002out: + pxor mm0,QWORD PTR [edi] + pxor mm4,QWORD PTR 8[edi] + ret +__sse_AES_encrypt_compact ENDP +ALIGN 16 +__x86_AES_encrypt PROC PRIVATE + mov DWORD PTR 20[esp],edi + xor eax,DWORD PTR [edi] + xor ebx,DWORD PTR 4[edi] + xor ecx,DWORD PTR 8[edi] + xor edx,DWORD PTR 12[edi] + mov esi,DWORD PTR 240[edi] + lea esi,DWORD PTR [esi*1+esi-2] + lea esi,DWORD PTR [esi*8+edi] + mov DWORD PTR 24[esp],esi +ALIGN 16 +$L003loop: + mov esi,eax + and esi,255 + mov esi,DWORD PTR [esi*8+ebp] + movzx edi,bh + xor esi,DWORD PTR 3[edi*8+ebp] + mov edi,ecx + shr edi,16 + and edi,255 + xor esi,DWORD PTR 2[edi*8+ebp] + mov edi,edx + shr edi,24 + xor esi,DWORD PTR 1[edi*8+ebp] + mov DWORD PTR 4[esp],esi + mov esi,ebx + and esi,255 + shr ebx,16 + mov esi,DWORD PTR [esi*8+ebp] + movzx edi,ch + xor esi,DWORD PTR 3[edi*8+ebp] + mov edi,edx + shr edi,16 + and edi,255 + xor esi,DWORD PTR 2[edi*8+ebp] + mov edi,eax + shr edi,24 + xor esi,DWORD PTR 1[edi*8+ebp] + mov DWORD PTR 8[esp],esi + mov esi,ecx + and esi,255 + shr ecx,24 + mov esi,DWORD PTR [esi*8+ebp] + movzx edi,dh + xor esi,DWORD PTR 3[edi*8+ebp] + mov edi,eax + shr edi,16 + and edx,255 + and edi,255 + xor esi,DWORD PTR 2[edi*8+ebp] + movzx edi,bh + xor esi,DWORD PTR 1[edi*8+ebp] + mov edi,DWORD PTR 20[esp] + mov edx,DWORD PTR [edx*8+ebp] + movzx eax,ah + xor edx,DWORD PTR 3[eax*8+ebp] + mov eax,DWORD PTR 4[esp] + and ebx,255 + xor edx,DWORD PTR 2[ebx*8+ebp] + mov ebx,DWORD PTR 8[esp] + xor edx,DWORD PTR 1[ecx*8+ebp] + mov ecx,esi + add edi,16 + xor eax,DWORD PTR [edi] + xor ebx,DWORD PTR 4[edi] + xor ecx,DWORD PTR 8[edi] + xor edx,DWORD PTR 12[edi] + cmp edi,DWORD PTR 24[esp] + mov DWORD PTR 20[esp],edi + jb $L003loop + mov esi,eax + and esi,255 + mov esi,DWORD PTR 2[esi*8+ebp] + and esi,255 + movzx edi,bh + mov edi,DWORD PTR [edi*8+ebp] + and edi,65280 + xor esi,edi + mov edi,ecx + shr edi,16 + and edi,255 + mov edi,DWORD PTR [edi*8+ebp] + and edi,16711680 + xor esi,edi + mov edi,edx + shr edi,24 + mov edi,DWORD PTR 2[edi*8+ebp] + and edi,4278190080 + xor esi,edi + mov DWORD PTR 4[esp],esi + mov esi,ebx + and esi,255 + shr ebx,16 + mov esi,DWORD PTR 2[esi*8+ebp] + and esi,255 + movzx edi,ch + mov edi,DWORD PTR [edi*8+ebp] + and edi,65280 + xor esi,edi + mov edi,edx + shr edi,16 + and edi,255 + mov edi,DWORD PTR [edi*8+ebp] + and edi,16711680 + xor esi,edi + mov edi,eax + shr edi,24 + mov edi,DWORD PTR 2[edi*8+ebp] + and edi,4278190080 + xor esi,edi + mov DWORD PTR 8[esp],esi + mov esi,ecx + and esi,255 + shr ecx,24 + mov esi,DWORD PTR 2[esi*8+ebp] + and esi,255 + movzx edi,dh + mov edi,DWORD PTR [edi*8+ebp] + and edi,65280 + xor esi,edi + mov edi,eax + shr edi,16 + and edx,255 + and edi,255 + mov edi,DWORD PTR [edi*8+ebp] + and edi,16711680 + xor esi,edi + movzx edi,bh + mov edi,DWORD PTR 2[edi*8+ebp] + and edi,4278190080 + xor esi,edi + mov edi,DWORD PTR 20[esp] + and edx,255 + mov edx,DWORD PTR 2[edx*8+ebp] + and edx,255 + movzx eax,ah + mov eax,DWORD PTR [eax*8+ebp] + and eax,65280 + xor edx,eax + mov eax,DWORD PTR 4[esp] + and ebx,255 + mov ebx,DWORD PTR [ebx*8+ebp] + and ebx,16711680 + xor edx,ebx + mov ebx,DWORD PTR 8[esp] + mov ecx,DWORD PTR 2[ecx*8+ebp] + and ecx,4278190080 + xor edx,ecx + mov ecx,esi + add edi,16 + xor eax,DWORD PTR [edi] + xor ebx,DWORD PTR 4[edi] + xor ecx,DWORD PTR 8[edi] + xor edx,DWORD PTR 12[edi] + ret +ALIGN 64 +$LAES_Te:: +DD 2774754246,2774754246 +DD 2222750968,2222750968 +DD 2574743534,2574743534 +DD 2373680118,2373680118 +DD 234025727,234025727 +DD 3177933782,3177933782 +DD 2976870366,2976870366 +DD 1422247313,1422247313 +DD 1345335392,1345335392 +DD 50397442,50397442 +DD 2842126286,2842126286 +DD 2099981142,2099981142 +DD 436141799,436141799 +DD 1658312629,1658312629 +DD 3870010189,3870010189 +DD 2591454956,2591454956 +DD 1170918031,1170918031 +DD 2642575903,2642575903 +DD 1086966153,1086966153 +DD 2273148410,2273148410 +DD 368769775,368769775 +DD 3948501426,3948501426 +DD 3376891790,3376891790 +DD 200339707,200339707 +DD 3970805057,3970805057 +DD 1742001331,1742001331 +DD 4255294047,4255294047 +DD 3937382213,3937382213 +DD 3214711843,3214711843 +DD 4154762323,4154762323 +DD 2524082916,2524082916 +DD 1539358875,1539358875 +DD 3266819957,3266819957 +DD 486407649,486407649 +DD 2928907069,2928907069 +DD 1780885068,1780885068 +DD 1513502316,1513502316 +DD 1094664062,1094664062 +DD 49805301,49805301 +DD 1338821763,1338821763 +DD 1546925160,1546925160 +DD 4104496465,4104496465 +DD 887481809,887481809 +DD 150073849,150073849 +DD 2473685474,2473685474 +DD 1943591083,1943591083 +DD 1395732834,1395732834 +DD 1058346282,1058346282 +DD 201589768,201589768 +DD 1388824469,1388824469 +DD 1696801606,1696801606 +DD 1589887901,1589887901 +DD 672667696,672667696 +DD 2711000631,2711000631 +DD 251987210,251987210 +DD 3046808111,3046808111 +DD 151455502,151455502 +DD 907153956,907153956 +DD 2608889883,2608889883 +DD 1038279391,1038279391 +DD 652995533,652995533 +DD 1764173646,1764173646 +DD 3451040383,3451040383 +DD 2675275242,2675275242 +DD 453576978,453576978 +DD 2659418909,2659418909 +DD 1949051992,1949051992 +DD 773462580,773462580 +DD 756751158,756751158 +DD 2993581788,2993581788 +DD 3998898868,3998898868 +DD 4221608027,4221608027 +DD 4132590244,4132590244 +DD 1295727478,1295727478 +DD 1641469623,1641469623 +DD 3467883389,3467883389 +DD 2066295122,2066295122 +DD 1055122397,1055122397 +DD 1898917726,1898917726 +DD 2542044179,2542044179 +DD 4115878822,4115878822 +DD 1758581177,1758581177 +DD 0,0 +DD 753790401,753790401 +DD 1612718144,1612718144 +DD 536673507,536673507 +DD 3367088505,3367088505 +DD 3982187446,3982187446 +DD 3194645204,3194645204 +DD 1187761037,1187761037 +DD 3653156455,3653156455 +DD 1262041458,1262041458 +DD 3729410708,3729410708 +DD 3561770136,3561770136 +DD 3898103984,3898103984 +DD 1255133061,1255133061 +DD 1808847035,1808847035 +DD 720367557,720367557 +DD 3853167183,3853167183 +DD 385612781,385612781 +DD 3309519750,3309519750 +DD 3612167578,3612167578 +DD 1429418854,1429418854 +DD 2491778321,2491778321 +DD 3477423498,3477423498 +DD 284817897,284817897 +DD 100794884,100794884 +DD 2172616702,2172616702 +DD 4031795360,4031795360 +DD 1144798328,1144798328 +DD 3131023141,3131023141 +DD 3819481163,3819481163 +DD 4082192802,4082192802 +DD 4272137053,4272137053 +DD 3225436288,3225436288 +DD 2324664069,2324664069 +DD 2912064063,2912064063 +DD 3164445985,3164445985 +DD 1211644016,1211644016 +DD 83228145,83228145 +DD 3753688163,3753688163 +DD 3249976951,3249976951 +DD 1977277103,1977277103 +DD 1663115586,1663115586 +DD 806359072,806359072 +DD 452984805,452984805 +DD 250868733,250868733 +DD 1842533055,1842533055 +DD 1288555905,1288555905 +DD 336333848,336333848 +DD 890442534,890442534 +DD 804056259,804056259 +DD 3781124030,3781124030 +DD 2727843637,2727843637 +DD 3427026056,3427026056 +DD 957814574,957814574 +DD 1472513171,1472513171 +DD 4071073621,4071073621 +DD 2189328124,2189328124 +DD 1195195770,1195195770 +DD 2892260552,2892260552 +DD 3881655738,3881655738 +DD 723065138,723065138 +DD 2507371494,2507371494 +DD 2690670784,2690670784 +DD 2558624025,2558624025 +DD 3511635870,3511635870 +DD 2145180835,2145180835 +DD 1713513028,1713513028 +DD 2116692564,2116692564 +DD 2878378043,2878378043 +DD 2206763019,2206763019 +DD 3393603212,3393603212 +DD 703524551,703524551 +DD 3552098411,3552098411 +DD 1007948840,1007948840 +DD 2044649127,2044649127 +DD 3797835452,3797835452 +DD 487262998,487262998 +DD 1994120109,1994120109 +DD 1004593371,1004593371 +DD 1446130276,1446130276 +DD 1312438900,1312438900 +DD 503974420,503974420 +DD 3679013266,3679013266 +DD 168166924,168166924 +DD 1814307912,1814307912 +DD 3831258296,3831258296 +DD 1573044895,1573044895 +DD 1859376061,1859376061 +DD 4021070915,4021070915 +DD 2791465668,2791465668 +DD 2828112185,2828112185 +DD 2761266481,2761266481 +DD 937747667,937747667 +DD 2339994098,2339994098 +DD 854058965,854058965 +DD 1137232011,1137232011 +DD 1496790894,1496790894 +DD 3077402074,3077402074 +DD 2358086913,2358086913 +DD 1691735473,1691735473 +DD 3528347292,3528347292 +DD 3769215305,3769215305 +DD 3027004632,3027004632 +DD 4199962284,4199962284 +DD 133494003,133494003 +DD 636152527,636152527 +DD 2942657994,2942657994 +DD 2390391540,2390391540 +DD 3920539207,3920539207 +DD 403179536,403179536 +DD 3585784431,3585784431 +DD 2289596656,2289596656 +DD 1864705354,1864705354 +DD 1915629148,1915629148 +DD 605822008,605822008 +DD 4054230615,4054230615 +DD 3350508659,3350508659 +DD 1371981463,1371981463 +DD 602466507,602466507 +DD 2094914977,2094914977 +DD 2624877800,2624877800 +DD 555687742,555687742 +DD 3712699286,3712699286 +DD 3703422305,3703422305 +DD 2257292045,2257292045 +DD 2240449039,2240449039 +DD 2423288032,2423288032 +DD 1111375484,1111375484 +DD 3300242801,3300242801 +DD 2858837708,2858837708 +DD 3628615824,3628615824 +DD 84083462,84083462 +DD 32962295,32962295 +DD 302911004,302911004 +DD 2741068226,2741068226 +DD 1597322602,1597322602 +DD 4183250862,4183250862 +DD 3501832553,3501832553 +DD 2441512471,2441512471 +DD 1489093017,1489093017 +DD 656219450,656219450 +DD 3114180135,3114180135 +DD 954327513,954327513 +DD 335083755,335083755 +DD 3013122091,3013122091 +DD 856756514,856756514 +DD 3144247762,3144247762 +DD 1893325225,1893325225 +DD 2307821063,2307821063 +DD 2811532339,2811532339 +DD 3063651117,3063651117 +DD 572399164,572399164 +DD 2458355477,2458355477 +DD 552200649,552200649 +DD 1238290055,1238290055 +DD 4283782570,4283782570 +DD 2015897680,2015897680 +DD 2061492133,2061492133 +DD 2408352771,2408352771 +DD 4171342169,4171342169 +DD 2156497161,2156497161 +DD 386731290,386731290 +DD 3669999461,3669999461 +DD 837215959,837215959 +DD 3326231172,3326231172 +DD 3093850320,3093850320 +DD 3275833730,3275833730 +DD 2962856233,2962856233 +DD 1999449434,1999449434 +DD 286199582,286199582 +DD 3417354363,3417354363 +DD 4233385128,4233385128 +DD 3602627437,3602627437 +DD 974525996,974525996 +DB 99,124,119,123,242,107,111,197 +DB 48,1,103,43,254,215,171,118 +DB 202,130,201,125,250,89,71,240 +DB 173,212,162,175,156,164,114,192 +DB 183,253,147,38,54,63,247,204 +DB 52,165,229,241,113,216,49,21 +DB 4,199,35,195,24,150,5,154 +DB 7,18,128,226,235,39,178,117 +DB 9,131,44,26,27,110,90,160 +DB 82,59,214,179,41,227,47,132 +DB 83,209,0,237,32,252,177,91 +DB 106,203,190,57,74,76,88,207 +DB 208,239,170,251,67,77,51,133 +DB 69,249,2,127,80,60,159,168 +DB 81,163,64,143,146,157,56,245 +DB 188,182,218,33,16,255,243,210 +DB 205,12,19,236,95,151,68,23 +DB 196,167,126,61,100,93,25,115 +DB 96,129,79,220,34,42,144,136 +DB 70,238,184,20,222,94,11,219 +DB 224,50,58,10,73,6,36,92 +DB 194,211,172,98,145,149,228,121 +DB 231,200,55,109,141,213,78,169 +DB 108,86,244,234,101,122,174,8 +DB 186,120,37,46,28,166,180,198 +DB 232,221,116,31,75,189,139,138 +DB 112,62,181,102,72,3,246,14 +DB 97,53,87,185,134,193,29,158 +DB 225,248,152,17,105,217,142,148 +DB 155,30,135,233,206,85,40,223 +DB 140,161,137,13,191,230,66,104 +DB 65,153,45,15,176,84,187,22 +DB 99,124,119,123,242,107,111,197 +DB 48,1,103,43,254,215,171,118 +DB 202,130,201,125,250,89,71,240 +DB 173,212,162,175,156,164,114,192 +DB 183,253,147,38,54,63,247,204 +DB 52,165,229,241,113,216,49,21 +DB 4,199,35,195,24,150,5,154 +DB 7,18,128,226,235,39,178,117 +DB 9,131,44,26,27,110,90,160 +DB 82,59,214,179,41,227,47,132 +DB 83,209,0,237,32,252,177,91 +DB 106,203,190,57,74,76,88,207 +DB 208,239,170,251,67,77,51,133 +DB 69,249,2,127,80,60,159,168 +DB 81,163,64,143,146,157,56,245 +DB 188,182,218,33,16,255,243,210 +DB 205,12,19,236,95,151,68,23 +DB 196,167,126,61,100,93,25,115 +DB 96,129,79,220,34,42,144,136 +DB 70,238,184,20,222,94,11,219 +DB 224,50,58,10,73,6,36,92 +DB 194,211,172,98,145,149,228,121 +DB 231,200,55,109,141,213,78,169 +DB 108,86,244,234,101,122,174,8 +DB 186,120,37,46,28,166,180,198 +DB 232,221,116,31,75,189,139,138 +DB 112,62,181,102,72,3,246,14 +DB 97,53,87,185,134,193,29,158 +DB 225,248,152,17,105,217,142,148 +DB 155,30,135,233,206,85,40,223 +DB 140,161,137,13,191,230,66,104 +DB 65,153,45,15,176,84,187,22 +DB 99,124,119,123,242,107,111,197 +DB 48,1,103,43,254,215,171,118 +DB 202,130,201,125,250,89,71,240 +DB 173,212,162,175,156,164,114,192 +DB 183,253,147,38,54,63,247,204 +DB 52,165,229,241,113,216,49,21 +DB 4,199,35,195,24,150,5,154 +DB 7,18,128,226,235,39,178,117 +DB 9,131,44,26,27,110,90,160 +DB 82,59,214,179,41,227,47,132 +DB 83,209,0,237,32,252,177,91 +DB 106,203,190,57,74,76,88,207 +DB 208,239,170,251,67,77,51,133 +DB 69,249,2,127,80,60,159,168 +DB 81,163,64,143,146,157,56,245 +DB 188,182,218,33,16,255,243,210 +DB 205,12,19,236,95,151,68,23 +DB 196,167,126,61,100,93,25,115 +DB 96,129,79,220,34,42,144,136 +DB 70,238,184,20,222,94,11,219 +DB 224,50,58,10,73,6,36,92 +DB 194,211,172,98,145,149,228,121 +DB 231,200,55,109,141,213,78,169 +DB 108,86,244,234,101,122,174,8 +DB 186,120,37,46,28,166,180,198 +DB 232,221,116,31,75,189,139,138 +DB 112,62,181,102,72,3,246,14 +DB 97,53,87,185,134,193,29,158 +DB 225,248,152,17,105,217,142,148 +DB 155,30,135,233,206,85,40,223 +DB 140,161,137,13,191,230,66,104 +DB 65,153,45,15,176,84,187,22 +DB 99,124,119,123,242,107,111,197 +DB 48,1,103,43,254,215,171,118 +DB 202,130,201,125,250,89,71,240 +DB 173,212,162,175,156,164,114,192 +DB 183,253,147,38,54,63,247,204 +DB 52,165,229,241,113,216,49,21 +DB 4,199,35,195,24,150,5,154 +DB 7,18,128,226,235,39,178,117 +DB 9,131,44,26,27,110,90,160 +DB 82,59,214,179,41,227,47,132 +DB 83,209,0,237,32,252,177,91 +DB 106,203,190,57,74,76,88,207 +DB 208,239,170,251,67,77,51,133 +DB 69,249,2,127,80,60,159,168 +DB 81,163,64,143,146,157,56,245 +DB 188,182,218,33,16,255,243,210 +DB 205,12,19,236,95,151,68,23 +DB 196,167,126,61,100,93,25,115 +DB 96,129,79,220,34,42,144,136 +DB 70,238,184,20,222,94,11,219 +DB 224,50,58,10,73,6,36,92 +DB 194,211,172,98,145,149,228,121 +DB 231,200,55,109,141,213,78,169 +DB 108,86,244,234,101,122,174,8 +DB 186,120,37,46,28,166,180,198 +DB 232,221,116,31,75,189,139,138 +DB 112,62,181,102,72,3,246,14 +DB 97,53,87,185,134,193,29,158 +DB 225,248,152,17,105,217,142,148 +DB 155,30,135,233,206,85,40,223 +DB 140,161,137,13,191,230,66,104 +DB 65,153,45,15,176,84,187,22 +DD 1,2,4,8 +DD 16,32,64,128 +DD 27,54,0,0 +DD 0,0,0,0 +__x86_AES_encrypt ENDP +ALIGN 16 +_AES_encrypt PROC PUBLIC +$L_AES_encrypt_begin:: + push ebp + push ebx + push esi + push edi + mov esi,DWORD PTR 20[esp] + mov edi,DWORD PTR 28[esp] + mov eax,esp + sub esp,36 + and esp,-64 + lea ebx,DWORD PTR [edi-127] + sub ebx,esp + neg ebx + and ebx,960 + sub esp,ebx + add esp,4 + mov DWORD PTR 28[esp],eax + call $L004pic_point +$L004pic_point: + pop ebp + lea eax,DWORD PTR _OPENSSL_ia32cap_P + lea ebp,DWORD PTR ($LAES_Te-$L004pic_point)[ebp] + lea ebx,DWORD PTR 764[esp] + sub ebx,ebp + and ebx,768 + lea ebp,DWORD PTR 2176[ebx*1+ebp] + bt DWORD PTR [eax],25 + jnc $L005x86 + movq mm0,QWORD PTR [esi] + movq mm4,QWORD PTR 8[esi] + call __sse_AES_encrypt_compact + mov esp,DWORD PTR 28[esp] + mov esi,DWORD PTR 24[esp] + movq QWORD PTR [esi],mm0 + movq QWORD PTR 8[esi],mm4 + emms + pop edi + pop esi + pop ebx + pop ebp + ret +ALIGN 16 +$L005x86: + mov DWORD PTR 24[esp],ebp + mov eax,DWORD PTR [esi] + mov ebx,DWORD PTR 4[esi] + mov ecx,DWORD PTR 8[esi] + mov edx,DWORD PTR 12[esi] + call __x86_AES_encrypt_compact + mov esp,DWORD PTR 28[esp] + mov esi,DWORD PTR 24[esp] + mov DWORD PTR [esi],eax + mov DWORD PTR 4[esi],ebx + mov DWORD PTR 8[esi],ecx + mov DWORD PTR 12[esi],edx + pop edi + pop esi + pop ebx + pop ebp + ret +_AES_encrypt ENDP +ALIGN 16 +__x86_AES_decrypt_compact PROC PRIVATE + mov DWORD PTR 20[esp],edi + xor eax,DWORD PTR [edi] + xor ebx,DWORD PTR 4[edi] + xor ecx,DWORD PTR 8[edi] + xor edx,DWORD PTR 12[edi] + mov esi,DWORD PTR 240[edi] + lea esi,DWORD PTR [esi*1+esi-2] + lea esi,DWORD PTR [esi*8+edi] + mov DWORD PTR 24[esp],esi + mov edi,DWORD PTR [ebp-128] + mov esi,DWORD PTR [ebp-96] + mov edi,DWORD PTR [ebp-64] + mov esi,DWORD PTR [ebp-32] + mov edi,DWORD PTR [ebp] + mov esi,DWORD PTR 32[ebp] + mov edi,DWORD PTR 64[ebp] + mov esi,DWORD PTR 96[ebp] +ALIGN 16 +$L006loop: + mov esi,eax + and esi,255 + movzx esi,BYTE PTR [esi*1+ebp-128] + movzx edi,dh + movzx edi,BYTE PTR [edi*1+ebp-128] + shl edi,8 + xor esi,edi + mov edi,ecx + shr edi,16 + and edi,255 + movzx edi,BYTE PTR [edi*1+ebp-128] + shl edi,16 + xor esi,edi + mov edi,ebx + shr edi,24 + movzx edi,BYTE PTR [edi*1+ebp-128] + shl edi,24 + xor esi,edi + mov DWORD PTR 4[esp],esi + mov esi,ebx + and esi,255 + movzx esi,BYTE PTR [esi*1+ebp-128] + movzx edi,ah + movzx edi,BYTE PTR [edi*1+ebp-128] + shl edi,8 + xor esi,edi + mov edi,edx + shr edi,16 + and edi,255 + movzx edi,BYTE PTR [edi*1+ebp-128] + shl edi,16 + xor esi,edi + mov edi,ecx + shr edi,24 + movzx edi,BYTE PTR [edi*1+ebp-128] + shl edi,24 + xor esi,edi + mov DWORD PTR 8[esp],esi + mov esi,ecx + and esi,255 + movzx esi,BYTE PTR [esi*1+ebp-128] + movzx edi,bh + movzx edi,BYTE PTR [edi*1+ebp-128] + shl edi,8 + xor esi,edi + mov edi,eax + shr edi,16 + and edi,255 + movzx edi,BYTE PTR [edi*1+ebp-128] + shl edi,16 + xor esi,edi + mov edi,edx + shr edi,24 + movzx edi,BYTE PTR [edi*1+ebp-128] + shl edi,24 + xor esi,edi + and edx,255 + movzx edx,BYTE PTR [edx*1+ebp-128] + movzx ecx,ch + movzx ecx,BYTE PTR [ecx*1+ebp-128] + shl ecx,8 + xor edx,ecx + mov ecx,esi + shr ebx,16 + and ebx,255 + movzx ebx,BYTE PTR [ebx*1+ebp-128] + shl ebx,16 + xor edx,ebx + shr eax,24 + movzx eax,BYTE PTR [eax*1+ebp-128] + shl eax,24 + xor edx,eax + mov edi,2155905152 + and edi,ecx + mov esi,edi + shr edi,7 + lea eax,DWORD PTR [ecx*1+ecx] + sub esi,edi + and eax,4278124286 + and esi,454761243 + xor eax,esi + mov edi,2155905152 + and edi,eax + mov esi,edi + shr edi,7 + lea ebx,DWORD PTR [eax*1+eax] + sub esi,edi + and ebx,4278124286 + and esi,454761243 + xor eax,ecx + xor ebx,esi + mov edi,2155905152 + and edi,ebx + mov esi,edi + shr edi,7 + lea ebp,DWORD PTR [ebx*1+ebx] + sub esi,edi + and ebp,4278124286 + and esi,454761243 + xor ebx,ecx + rol ecx,8 + xor ebp,esi + xor ecx,eax + xor eax,ebp + xor ecx,ebx + xor ebx,ebp + rol eax,24 + xor ecx,ebp + rol ebx,16 + xor ecx,eax + rol ebp,8 + xor ecx,ebx + mov eax,DWORD PTR 4[esp] + xor ecx,ebp + mov DWORD PTR 12[esp],ecx + mov edi,2155905152 + and edi,edx + mov esi,edi + shr edi,7 + lea ebx,DWORD PTR [edx*1+edx] + sub esi,edi + and ebx,4278124286 + and esi,454761243 + xor ebx,esi + mov edi,2155905152 + and edi,ebx + mov esi,edi + shr edi,7 + lea ecx,DWORD PTR [ebx*1+ebx] + sub esi,edi + and ecx,4278124286 + and esi,454761243 + xor ebx,edx + xor ecx,esi + mov edi,2155905152 + and edi,ecx + mov esi,edi + shr edi,7 + lea ebp,DWORD PTR [ecx*1+ecx] + sub esi,edi + and ebp,4278124286 + and esi,454761243 + xor ecx,edx + rol edx,8 + xor ebp,esi + xor edx,ebx + xor ebx,ebp + xor edx,ecx + xor ecx,ebp + rol ebx,24 + xor edx,ebp + rol ecx,16 + xor edx,ebx + rol ebp,8 + xor edx,ecx + mov ebx,DWORD PTR 8[esp] + xor edx,ebp + mov DWORD PTR 16[esp],edx + mov edi,2155905152 + and edi,eax + mov esi,edi + shr edi,7 + lea ecx,DWORD PTR [eax*1+eax] + sub esi,edi + and ecx,4278124286 + and esi,454761243 + xor ecx,esi + mov edi,2155905152 + and edi,ecx + mov esi,edi + shr edi,7 + lea edx,DWORD PTR [ecx*1+ecx] + sub esi,edi + and edx,4278124286 + and esi,454761243 + xor ecx,eax + xor edx,esi + mov edi,2155905152 + and edi,edx + mov esi,edi + shr edi,7 + lea ebp,DWORD PTR [edx*1+edx] + sub esi,edi + and ebp,4278124286 + and esi,454761243 + xor edx,eax + rol eax,8 + xor ebp,esi + xor eax,ecx + xor ecx,ebp + xor eax,edx + xor edx,ebp + rol ecx,24 + xor eax,ebp + rol edx,16 + xor eax,ecx + rol ebp,8 + xor eax,edx + xor eax,ebp + mov edi,2155905152 + and edi,ebx + mov esi,edi + shr edi,7 + lea ecx,DWORD PTR [ebx*1+ebx] + sub esi,edi + and ecx,4278124286 + and esi,454761243 + xor ecx,esi + mov edi,2155905152 + and edi,ecx + mov esi,edi + shr edi,7 + lea edx,DWORD PTR [ecx*1+ecx] + sub esi,edi + and edx,4278124286 + and esi,454761243 + xor ecx,ebx + xor edx,esi + mov edi,2155905152 + and edi,edx + mov esi,edi + shr edi,7 + lea ebp,DWORD PTR [edx*1+edx] + sub esi,edi + and ebp,4278124286 + and esi,454761243 + xor edx,ebx + rol ebx,8 + xor ebp,esi + xor ebx,ecx + xor ecx,ebp + xor ebx,edx + xor edx,ebp + rol ecx,24 + xor ebx,ebp + rol edx,16 + xor ebx,ecx + rol ebp,8 + xor ebx,edx + mov ecx,DWORD PTR 12[esp] + xor ebx,ebp + mov edx,DWORD PTR 16[esp] + mov edi,DWORD PTR 20[esp] + mov ebp,DWORD PTR 28[esp] + add edi,16 + xor eax,DWORD PTR [edi] + xor ebx,DWORD PTR 4[edi] + xor ecx,DWORD PTR 8[edi] + xor edx,DWORD PTR 12[edi] + cmp edi,DWORD PTR 24[esp] + mov DWORD PTR 20[esp],edi + jb $L006loop + mov esi,eax + and esi,255 + movzx esi,BYTE PTR [esi*1+ebp-128] + movzx edi,dh + movzx edi,BYTE PTR [edi*1+ebp-128] + shl edi,8 + xor esi,edi + mov edi,ecx + shr edi,16 + and edi,255 + movzx edi,BYTE PTR [edi*1+ebp-128] + shl edi,16 + xor esi,edi + mov edi,ebx + shr edi,24 + movzx edi,BYTE PTR [edi*1+ebp-128] + shl edi,24 + xor esi,edi + mov DWORD PTR 4[esp],esi + mov esi,ebx + and esi,255 + movzx esi,BYTE PTR [esi*1+ebp-128] + movzx edi,ah + movzx edi,BYTE PTR [edi*1+ebp-128] + shl edi,8 + xor esi,edi + mov edi,edx + shr edi,16 + and edi,255 + movzx edi,BYTE PTR [edi*1+ebp-128] + shl edi,16 + xor esi,edi + mov edi,ecx + shr edi,24 + movzx edi,BYTE PTR [edi*1+ebp-128] + shl edi,24 + xor esi,edi + mov DWORD PTR 8[esp],esi + mov esi,ecx + and esi,255 + movzx esi,BYTE PTR [esi*1+ebp-128] + movzx edi,bh + movzx edi,BYTE PTR [edi*1+ebp-128] + shl edi,8 + xor esi,edi + mov edi,eax + shr edi,16 + and edi,255 + movzx edi,BYTE PTR [edi*1+ebp-128] + shl edi,16 + xor esi,edi + mov edi,edx + shr edi,24 + movzx edi,BYTE PTR [edi*1+ebp-128] + shl edi,24 + xor esi,edi + mov edi,DWORD PTR 20[esp] + and edx,255 + movzx edx,BYTE PTR [edx*1+ebp-128] + movzx ecx,ch + movzx ecx,BYTE PTR [ecx*1+ebp-128] + shl ecx,8 + xor edx,ecx + mov ecx,esi + shr ebx,16 + and ebx,255 + movzx ebx,BYTE PTR [ebx*1+ebp-128] + shl ebx,16 + xor edx,ebx + mov ebx,DWORD PTR 8[esp] + shr eax,24 + movzx eax,BYTE PTR [eax*1+ebp-128] + shl eax,24 + xor edx,eax + mov eax,DWORD PTR 4[esp] + xor eax,DWORD PTR 16[edi] + xor ebx,DWORD PTR 20[edi] + xor ecx,DWORD PTR 24[edi] + xor edx,DWORD PTR 28[edi] + ret +__x86_AES_decrypt_compact ENDP +ALIGN 16 +__sse_AES_decrypt_compact PROC PRIVATE + pxor mm0,QWORD PTR [edi] + pxor mm4,QWORD PTR 8[edi] + mov esi,DWORD PTR 240[edi] + lea esi,DWORD PTR [esi*1+esi-2] + lea esi,DWORD PTR [esi*8+edi] + mov DWORD PTR 24[esp],esi + mov eax,454761243 + mov DWORD PTR 8[esp],eax + mov DWORD PTR 12[esp],eax + mov eax,DWORD PTR [ebp-128] + mov ebx,DWORD PTR [ebp-96] + mov ecx,DWORD PTR [ebp-64] + mov edx,DWORD PTR [ebp-32] + mov eax,DWORD PTR [ebp] + mov ebx,DWORD PTR 32[ebp] + mov ecx,DWORD PTR 64[ebp] + mov edx,DWORD PTR 96[ebp] +ALIGN 16 +$L007loop: + pshufw mm1,mm0,12 + pshufw mm5,mm4,9 + movd eax,mm1 + movd ebx,mm5 + mov DWORD PTR 20[esp],edi + movzx esi,al + movzx edx,ah + pshufw mm2,mm0,6 + movzx ecx,BYTE PTR [esi*1+ebp-128] + movzx edi,bl + movzx edx,BYTE PTR [edx*1+ebp-128] + shr eax,16 + shl edx,8 + movzx esi,BYTE PTR [edi*1+ebp-128] + movzx edi,bh + shl esi,16 + pshufw mm6,mm4,3 + or ecx,esi + movzx esi,BYTE PTR [edi*1+ebp-128] + movzx edi,ah + shl esi,24 + shr ebx,16 + or edx,esi + movzx esi,BYTE PTR [edi*1+ebp-128] + movzx edi,bh + shl esi,24 + or ecx,esi + movzx esi,BYTE PTR [edi*1+ebp-128] + movzx edi,al + shl esi,8 + movd eax,mm2 + or ecx,esi + movzx esi,BYTE PTR [edi*1+ebp-128] + movzx edi,bl + shl esi,16 + movd ebx,mm6 + movd mm0,ecx + movzx ecx,BYTE PTR [edi*1+ebp-128] + movzx edi,al + or ecx,esi + movzx esi,BYTE PTR [edi*1+ebp-128] + movzx edi,bl + or edx,esi + movzx esi,BYTE PTR [edi*1+ebp-128] + movzx edi,ah + shl esi,16 + shr eax,16 + or edx,esi + movzx esi,BYTE PTR [edi*1+ebp-128] + movzx edi,bh + shr ebx,16 + shl esi,8 + movd mm1,edx + movzx edx,BYTE PTR [edi*1+ebp-128] + movzx edi,bh + shl edx,24 + and ebx,255 + or edx,esi + punpckldq mm0,mm1 + movzx esi,BYTE PTR [edi*1+ebp-128] + movzx edi,al + shl esi,8 + movzx eax,ah + movzx ebx,BYTE PTR [ebx*1+ebp-128] + or ecx,esi + movzx esi,BYTE PTR [edi*1+ebp-128] + or edx,ebx + shl esi,16 + movzx eax,BYTE PTR [eax*1+ebp-128] + or edx,esi + shl eax,24 + or ecx,eax + mov edi,DWORD PTR 20[esp] + movd mm4,edx + movd mm5,ecx + punpckldq mm4,mm5 + add edi,16 + cmp edi,DWORD PTR 24[esp] + ja $L008out + movq mm3,mm0 + movq mm7,mm4 + pshufw mm2,mm0,228 + pshufw mm6,mm4,228 + movq mm1,mm0 + movq mm5,mm4 + pshufw mm0,mm0,177 + pshufw mm4,mm4,177 + pslld mm2,8 + pslld mm6,8 + psrld mm3,8 + psrld mm7,8 + pxor mm0,mm2 + pxor mm4,mm6 + pxor mm0,mm3 + pxor mm4,mm7 + pslld mm2,16 + pslld mm6,16 + psrld mm3,16 + psrld mm7,16 + pxor mm0,mm2 + pxor mm4,mm6 + pxor mm0,mm3 + pxor mm4,mm7 + movq mm3,QWORD PTR 8[esp] + pxor mm2,mm2 + pxor mm6,mm6 + pcmpgtb mm2,mm1 + pcmpgtb mm6,mm5 + pand mm2,mm3 + pand mm6,mm3 + paddb mm1,mm1 + paddb mm5,mm5 + pxor mm1,mm2 + pxor mm5,mm6 + movq mm3,mm1 + movq mm7,mm5 + movq mm2,mm1 + movq mm6,mm5 + pxor mm0,mm1 + pxor mm4,mm5 + pslld mm3,24 + pslld mm7,24 + psrld mm2,8 + psrld mm6,8 + pxor mm0,mm3 + pxor mm4,mm7 + pxor mm0,mm2 + pxor mm4,mm6 + movq mm2,QWORD PTR 8[esp] + pxor mm3,mm3 + pxor mm7,mm7 + pcmpgtb mm3,mm1 + pcmpgtb mm7,mm5 + pand mm3,mm2 + pand mm7,mm2 + paddb mm1,mm1 + paddb mm5,mm5 + pxor mm1,mm3 + pxor mm5,mm7 + pshufw mm3,mm1,177 + pshufw mm7,mm5,177 + pxor mm0,mm1 + pxor mm4,mm5 + pxor mm0,mm3 + pxor mm4,mm7 + pxor mm3,mm3 + pxor mm7,mm7 + pcmpgtb mm3,mm1 + pcmpgtb mm7,mm5 + pand mm3,mm2 + pand mm7,mm2 + paddb mm1,mm1 + paddb mm5,mm5 + pxor mm1,mm3 + pxor mm5,mm7 + pxor mm0,mm1 + pxor mm4,mm5 + movq mm3,mm1 + movq mm7,mm5 + pshufw mm2,mm1,177 + pshufw mm6,mm5,177 + pxor mm0,mm2 + pxor mm4,mm6 + pslld mm1,8 + pslld mm5,8 + psrld mm3,8 + psrld mm7,8 + movq mm2,QWORD PTR [edi] + movq mm6,QWORD PTR 8[edi] + pxor mm0,mm1 + pxor mm4,mm5 + pxor mm0,mm3 + pxor mm4,mm7 + mov eax,DWORD PTR [ebp-128] + pslld mm1,16 + pslld mm5,16 + mov ebx,DWORD PTR [ebp-64] + psrld mm3,16 + psrld mm7,16 + mov ecx,DWORD PTR [ebp] + pxor mm0,mm1 + pxor mm4,mm5 + mov edx,DWORD PTR 64[ebp] + pxor mm0,mm3 + pxor mm4,mm7 + pxor mm0,mm2 + pxor mm4,mm6 + jmp $L007loop +ALIGN 16 +$L008out: + pxor mm0,QWORD PTR [edi] + pxor mm4,QWORD PTR 8[edi] + ret +__sse_AES_decrypt_compact ENDP +ALIGN 16 +__x86_AES_decrypt PROC PRIVATE + mov DWORD PTR 20[esp],edi + xor eax,DWORD PTR [edi] + xor ebx,DWORD PTR 4[edi] + xor ecx,DWORD PTR 8[edi] + xor edx,DWORD PTR 12[edi] + mov esi,DWORD PTR 240[edi] + lea esi,DWORD PTR [esi*1+esi-2] + lea esi,DWORD PTR [esi*8+edi] + mov DWORD PTR 24[esp],esi +ALIGN 16 +$L009loop: + mov esi,eax + and esi,255 + mov esi,DWORD PTR [esi*8+ebp] + movzx edi,dh + xor esi,DWORD PTR 3[edi*8+ebp] + mov edi,ecx + shr edi,16 + and edi,255 + xor esi,DWORD PTR 2[edi*8+ebp] + mov edi,ebx + shr edi,24 + xor esi,DWORD PTR 1[edi*8+ebp] + mov DWORD PTR 4[esp],esi + mov esi,ebx + and esi,255 + mov esi,DWORD PTR [esi*8+ebp] + movzx edi,ah + xor esi,DWORD PTR 3[edi*8+ebp] + mov edi,edx + shr edi,16 + and edi,255 + xor esi,DWORD PTR 2[edi*8+ebp] + mov edi,ecx + shr edi,24 + xor esi,DWORD PTR 1[edi*8+ebp] + mov DWORD PTR 8[esp],esi + mov esi,ecx + and esi,255 + mov esi,DWORD PTR [esi*8+ebp] + movzx edi,bh + xor esi,DWORD PTR 3[edi*8+ebp] + mov edi,eax + shr edi,16 + and edi,255 + xor esi,DWORD PTR 2[edi*8+ebp] + mov edi,edx + shr edi,24 + xor esi,DWORD PTR 1[edi*8+ebp] + mov edi,DWORD PTR 20[esp] + and edx,255 + mov edx,DWORD PTR [edx*8+ebp] + movzx ecx,ch + xor edx,DWORD PTR 3[ecx*8+ebp] + mov ecx,esi + shr ebx,16 + and ebx,255 + xor edx,DWORD PTR 2[ebx*8+ebp] + mov ebx,DWORD PTR 8[esp] + shr eax,24 + xor edx,DWORD PTR 1[eax*8+ebp] + mov eax,DWORD PTR 4[esp] + add edi,16 + xor eax,DWORD PTR [edi] + xor ebx,DWORD PTR 4[edi] + xor ecx,DWORD PTR 8[edi] + xor edx,DWORD PTR 12[edi] + cmp edi,DWORD PTR 24[esp] + mov DWORD PTR 20[esp],edi + jb $L009loop + lea ebp,DWORD PTR 2176[ebp] + mov edi,DWORD PTR [ebp-128] + mov esi,DWORD PTR [ebp-96] + mov edi,DWORD PTR [ebp-64] + mov esi,DWORD PTR [ebp-32] + mov edi,DWORD PTR [ebp] + mov esi,DWORD PTR 32[ebp] + mov edi,DWORD PTR 64[ebp] + mov esi,DWORD PTR 96[ebp] + lea ebp,DWORD PTR [ebp-128] + mov esi,eax + and esi,255 + movzx esi,BYTE PTR [esi*1+ebp] + movzx edi,dh + movzx edi,BYTE PTR [edi*1+ebp] + shl edi,8 + xor esi,edi + mov edi,ecx + shr edi,16 + and edi,255 + movzx edi,BYTE PTR [edi*1+ebp] + shl edi,16 + xor esi,edi + mov edi,ebx + shr edi,24 + movzx edi,BYTE PTR [edi*1+ebp] + shl edi,24 + xor esi,edi + mov DWORD PTR 4[esp],esi + mov esi,ebx + and esi,255 + movzx esi,BYTE PTR [esi*1+ebp] + movzx edi,ah + movzx edi,BYTE PTR [edi*1+ebp] + shl edi,8 + xor esi,edi + mov edi,edx + shr edi,16 + and edi,255 + movzx edi,BYTE PTR [edi*1+ebp] + shl edi,16 + xor esi,edi + mov edi,ecx + shr edi,24 + movzx edi,BYTE PTR [edi*1+ebp] + shl edi,24 + xor esi,edi + mov DWORD PTR 8[esp],esi + mov esi,ecx + and esi,255 + movzx esi,BYTE PTR [esi*1+ebp] + movzx edi,bh + movzx edi,BYTE PTR [edi*1+ebp] + shl edi,8 + xor esi,edi + mov edi,eax + shr edi,16 + and edi,255 + movzx edi,BYTE PTR [edi*1+ebp] + shl edi,16 + xor esi,edi + mov edi,edx + shr edi,24 + movzx edi,BYTE PTR [edi*1+ebp] + shl edi,24 + xor esi,edi + mov edi,DWORD PTR 20[esp] + and edx,255 + movzx edx,BYTE PTR [edx*1+ebp] + movzx ecx,ch + movzx ecx,BYTE PTR [ecx*1+ebp] + shl ecx,8 + xor edx,ecx + mov ecx,esi + shr ebx,16 + and ebx,255 + movzx ebx,BYTE PTR [ebx*1+ebp] + shl ebx,16 + xor edx,ebx + mov ebx,DWORD PTR 8[esp] + shr eax,24 + movzx eax,BYTE PTR [eax*1+ebp] + shl eax,24 + xor edx,eax + mov eax,DWORD PTR 4[esp] + lea ebp,DWORD PTR [ebp-2048] + add edi,16 + xor eax,DWORD PTR [edi] + xor ebx,DWORD PTR 4[edi] + xor ecx,DWORD PTR 8[edi] + xor edx,DWORD PTR 12[edi] + ret +ALIGN 64 +$LAES_Td:: +DD 1353184337,1353184337 +DD 1399144830,1399144830 +DD 3282310938,3282310938 +DD 2522752826,2522752826 +DD 3412831035,3412831035 +DD 4047871263,4047871263 +DD 2874735276,2874735276 +DD 2466505547,2466505547 +DD 1442459680,1442459680 +DD 4134368941,4134368941 +DD 2440481928,2440481928 +DD 625738485,625738485 +DD 4242007375,4242007375 +DD 3620416197,3620416197 +DD 2151953702,2151953702 +DD 2409849525,2409849525 +DD 1230680542,1230680542 +DD 1729870373,1729870373 +DD 2551114309,2551114309 +DD 3787521629,3787521629 +DD 41234371,41234371 +DD 317738113,317738113 +DD 2744600205,2744600205 +DD 3338261355,3338261355 +DD 3881799427,3881799427 +DD 2510066197,2510066197 +DD 3950669247,3950669247 +DD 3663286933,3663286933 +DD 763608788,763608788 +DD 3542185048,3542185048 +DD 694804553,694804553 +DD 1154009486,1154009486 +DD 1787413109,1787413109 +DD 2021232372,2021232372 +DD 1799248025,1799248025 +DD 3715217703,3715217703 +DD 3058688446,3058688446 +DD 397248752,397248752 +DD 1722556617,1722556617 +DD 3023752829,3023752829 +DD 407560035,407560035 +DD 2184256229,2184256229 +DD 1613975959,1613975959 +DD 1165972322,1165972322 +DD 3765920945,3765920945 +DD 2226023355,2226023355 +DD 480281086,480281086 +DD 2485848313,2485848313 +DD 1483229296,1483229296 +DD 436028815,436028815 +DD 2272059028,2272059028 +DD 3086515026,3086515026 +DD 601060267,601060267 +DD 3791801202,3791801202 +DD 1468997603,1468997603 +DD 715871590,715871590 +DD 120122290,120122290 +DD 63092015,63092015 +DD 2591802758,2591802758 +DD 2768779219,2768779219 +DD 4068943920,4068943920 +DD 2997206819,2997206819 +DD 3127509762,3127509762 +DD 1552029421,1552029421 +DD 723308426,723308426 +DD 2461301159,2461301159 +DD 4042393587,4042393587 +DD 2715969870,2715969870 +DD 3455375973,3455375973 +DD 3586000134,3586000134 +DD 526529745,526529745 +DD 2331944644,2331944644 +DD 2639474228,2639474228 +DD 2689987490,2689987490 +DD 853641733,853641733 +DD 1978398372,1978398372 +DD 971801355,971801355 +DD 2867814464,2867814464 +DD 111112542,111112542 +DD 1360031421,1360031421 +DD 4186579262,4186579262 +DD 1023860118,1023860118 +DD 2919579357,2919579357 +DD 1186850381,1186850381 +DD 3045938321,3045938321 +DD 90031217,90031217 +DD 1876166148,1876166148 +DD 4279586912,4279586912 +DD 620468249,620468249 +DD 2548678102,2548678102 +DD 3426959497,3426959497 +DD 2006899047,2006899047 +DD 3175278768,3175278768 +DD 2290845959,2290845959 +DD 945494503,945494503 +DD 3689859193,3689859193 +DD 1191869601,1191869601 +DD 3910091388,3910091388 +DD 3374220536,3374220536 +DD 0,0 +DD 2206629897,2206629897 +DD 1223502642,1223502642 +DD 2893025566,2893025566 +DD 1316117100,1316117100 +DD 4227796733,4227796733 +DD 1446544655,1446544655 +DD 517320253,517320253 +DD 658058550,658058550 +DD 1691946762,1691946762 +DD 564550760,564550760 +DD 3511966619,3511966619 +DD 976107044,976107044 +DD 2976320012,2976320012 +DD 266819475,266819475 +DD 3533106868,3533106868 +DD 2660342555,2660342555 +DD 1338359936,1338359936 +DD 2720062561,2720062561 +DD 1766553434,1766553434 +DD 370807324,370807324 +DD 179999714,179999714 +DD 3844776128,3844776128 +DD 1138762300,1138762300 +DD 488053522,488053522 +DD 185403662,185403662 +DD 2915535858,2915535858 +DD 3114841645,3114841645 +DD 3366526484,3366526484 +DD 2233069911,2233069911 +DD 1275557295,1275557295 +DD 3151862254,3151862254 +DD 4250959779,4250959779 +DD 2670068215,2670068215 +DD 3170202204,3170202204 +DD 3309004356,3309004356 +DD 880737115,880737115 +DD 1982415755,1982415755 +DD 3703972811,3703972811 +DD 1761406390,1761406390 +DD 1676797112,1676797112 +DD 3403428311,3403428311 +DD 277177154,277177154 +DD 1076008723,1076008723 +DD 538035844,538035844 +DD 2099530373,2099530373 +DD 4164795346,4164795346 +DD 288553390,288553390 +DD 1839278535,1839278535 +DD 1261411869,1261411869 +DD 4080055004,4080055004 +DD 3964831245,3964831245 +DD 3504587127,3504587127 +DD 1813426987,1813426987 +DD 2579067049,2579067049 +DD 4199060497,4199060497 +DD 577038663,577038663 +DD 3297574056,3297574056 +DD 440397984,440397984 +DD 3626794326,3626794326 +DD 4019204898,4019204898 +DD 3343796615,3343796615 +DD 3251714265,3251714265 +DD 4272081548,4272081548 +DD 906744984,906744984 +DD 3481400742,3481400742 +DD 685669029,685669029 +DD 646887386,646887386 +DD 2764025151,2764025151 +DD 3835509292,3835509292 +DD 227702864,227702864 +DD 2613862250,2613862250 +DD 1648787028,1648787028 +DD 3256061430,3256061430 +DD 3904428176,3904428176 +DD 1593260334,1593260334 +DD 4121936770,4121936770 +DD 3196083615,3196083615 +DD 2090061929,2090061929 +DD 2838353263,2838353263 +DD 3004310991,3004310991 +DD 999926984,999926984 +DD 2809993232,2809993232 +DD 1852021992,1852021992 +DD 2075868123,2075868123 +DD 158869197,158869197 +DD 4095236462,4095236462 +DD 28809964,28809964 +DD 2828685187,2828685187 +DD 1701746150,1701746150 +DD 2129067946,2129067946 +DD 147831841,147831841 +DD 3873969647,3873969647 +DD 3650873274,3650873274 +DD 3459673930,3459673930 +DD 3557400554,3557400554 +DD 3598495785,3598495785 +DD 2947720241,2947720241 +DD 824393514,824393514 +DD 815048134,815048134 +DD 3227951669,3227951669 +DD 935087732,935087732 +DD 2798289660,2798289660 +DD 2966458592,2966458592 +DD 366520115,366520115 +DD 1251476721,1251476721 +DD 4158319681,4158319681 +DD 240176511,240176511 +DD 804688151,804688151 +DD 2379631990,2379631990 +DD 1303441219,1303441219 +DD 1414376140,1414376140 +DD 3741619940,3741619940 +DD 3820343710,3820343710 +DD 461924940,461924940 +DD 3089050817,3089050817 +DD 2136040774,2136040774 +DD 82468509,82468509 +DD 1563790337,1563790337 +DD 1937016826,1937016826 +DD 776014843,776014843 +DD 1511876531,1511876531 +DD 1389550482,1389550482 +DD 861278441,861278441 +DD 323475053,323475053 +DD 2355222426,2355222426 +DD 2047648055,2047648055 +DD 2383738969,2383738969 +DD 2302415851,2302415851 +DD 3995576782,3995576782 +DD 902390199,902390199 +DD 3991215329,3991215329 +DD 1018251130,1018251130 +DD 1507840668,1507840668 +DD 1064563285,1064563285 +DD 2043548696,2043548696 +DD 3208103795,3208103795 +DD 3939366739,3939366739 +DD 1537932639,1537932639 +DD 342834655,342834655 +DD 2262516856,2262516856 +DD 2180231114,2180231114 +DD 1053059257,1053059257 +DD 741614648,741614648 +DD 1598071746,1598071746 +DD 1925389590,1925389590 +DD 203809468,203809468 +DD 2336832552,2336832552 +DD 1100287487,1100287487 +DD 1895934009,1895934009 +DD 3736275976,3736275976 +DD 2632234200,2632234200 +DD 2428589668,2428589668 +DD 1636092795,1636092795 +DD 1890988757,1890988757 +DD 1952214088,1952214088 +DD 1113045200,1113045200 +DB 82,9,106,213,48,54,165,56 +DB 191,64,163,158,129,243,215,251 +DB 124,227,57,130,155,47,255,135 +DB 52,142,67,68,196,222,233,203 +DB 84,123,148,50,166,194,35,61 +DB 238,76,149,11,66,250,195,78 +DB 8,46,161,102,40,217,36,178 +DB 118,91,162,73,109,139,209,37 +DB 114,248,246,100,134,104,152,22 +DB 212,164,92,204,93,101,182,146 +DB 108,112,72,80,253,237,185,218 +DB 94,21,70,87,167,141,157,132 +DB 144,216,171,0,140,188,211,10 +DB 247,228,88,5,184,179,69,6 +DB 208,44,30,143,202,63,15,2 +DB 193,175,189,3,1,19,138,107 +DB 58,145,17,65,79,103,220,234 +DB 151,242,207,206,240,180,230,115 +DB 150,172,116,34,231,173,53,133 +DB 226,249,55,232,28,117,223,110 +DB 71,241,26,113,29,41,197,137 +DB 111,183,98,14,170,24,190,27 +DB 252,86,62,75,198,210,121,32 +DB 154,219,192,254,120,205,90,244 +DB 31,221,168,51,136,7,199,49 +DB 177,18,16,89,39,128,236,95 +DB 96,81,127,169,25,181,74,13 +DB 45,229,122,159,147,201,156,239 +DB 160,224,59,77,174,42,245,176 +DB 200,235,187,60,131,83,153,97 +DB 23,43,4,126,186,119,214,38 +DB 225,105,20,99,85,33,12,125 +DB 82,9,106,213,48,54,165,56 +DB 191,64,163,158,129,243,215,251 +DB 124,227,57,130,155,47,255,135 +DB 52,142,67,68,196,222,233,203 +DB 84,123,148,50,166,194,35,61 +DB 238,76,149,11,66,250,195,78 +DB 8,46,161,102,40,217,36,178 +DB 118,91,162,73,109,139,209,37 +DB 114,248,246,100,134,104,152,22 +DB 212,164,92,204,93,101,182,146 +DB 108,112,72,80,253,237,185,218 +DB 94,21,70,87,167,141,157,132 +DB 144,216,171,0,140,188,211,10 +DB 247,228,88,5,184,179,69,6 +DB 208,44,30,143,202,63,15,2 +DB 193,175,189,3,1,19,138,107 +DB 58,145,17,65,79,103,220,234 +DB 151,242,207,206,240,180,230,115 +DB 150,172,116,34,231,173,53,133 +DB 226,249,55,232,28,117,223,110 +DB 71,241,26,113,29,41,197,137 +DB 111,183,98,14,170,24,190,27 +DB 252,86,62,75,198,210,121,32 +DB 154,219,192,254,120,205,90,244 +DB 31,221,168,51,136,7,199,49 +DB 177,18,16,89,39,128,236,95 +DB 96,81,127,169,25,181,74,13 +DB 45,229,122,159,147,201,156,239 +DB 160,224,59,77,174,42,245,176 +DB 200,235,187,60,131,83,153,97 +DB 23,43,4,126,186,119,214,38 +DB 225,105,20,99,85,33,12,125 +DB 82,9,106,213,48,54,165,56 +DB 191,64,163,158,129,243,215,251 +DB 124,227,57,130,155,47,255,135 +DB 52,142,67,68,196,222,233,203 +DB 84,123,148,50,166,194,35,61 +DB 238,76,149,11,66,250,195,78 +DB 8,46,161,102,40,217,36,178 +DB 118,91,162,73,109,139,209,37 +DB 114,248,246,100,134,104,152,22 +DB 212,164,92,204,93,101,182,146 +DB 108,112,72,80,253,237,185,218 +DB 94,21,70,87,167,141,157,132 +DB 144,216,171,0,140,188,211,10 +DB 247,228,88,5,184,179,69,6 +DB 208,44,30,143,202,63,15,2 +DB 193,175,189,3,1,19,138,107 +DB 58,145,17,65,79,103,220,234 +DB 151,242,207,206,240,180,230,115 +DB 150,172,116,34,231,173,53,133 +DB 226,249,55,232,28,117,223,110 +DB 71,241,26,113,29,41,197,137 +DB 111,183,98,14,170,24,190,27 +DB 252,86,62,75,198,210,121,32 +DB 154,219,192,254,120,205,90,244 +DB 31,221,168,51,136,7,199,49 +DB 177,18,16,89,39,128,236,95 +DB 96,81,127,169,25,181,74,13 +DB 45,229,122,159,147,201,156,239 +DB 160,224,59,77,174,42,245,176 +DB 200,235,187,60,131,83,153,97 +DB 23,43,4,126,186,119,214,38 +DB 225,105,20,99,85,33,12,125 +DB 82,9,106,213,48,54,165,56 +DB 191,64,163,158,129,243,215,251 +DB 124,227,57,130,155,47,255,135 +DB 52,142,67,68,196,222,233,203 +DB 84,123,148,50,166,194,35,61 +DB 238,76,149,11,66,250,195,78 +DB 8,46,161,102,40,217,36,178 +DB 118,91,162,73,109,139,209,37 +DB 114,248,246,100,134,104,152,22 +DB 212,164,92,204,93,101,182,146 +DB 108,112,72,80,253,237,185,218 +DB 94,21,70,87,167,141,157,132 +DB 144,216,171,0,140,188,211,10 +DB 247,228,88,5,184,179,69,6 +DB 208,44,30,143,202,63,15,2 +DB 193,175,189,3,1,19,138,107 +DB 58,145,17,65,79,103,220,234 +DB 151,242,207,206,240,180,230,115 +DB 150,172,116,34,231,173,53,133 +DB 226,249,55,232,28,117,223,110 +DB 71,241,26,113,29,41,197,137 +DB 111,183,98,14,170,24,190,27 +DB 252,86,62,75,198,210,121,32 +DB 154,219,192,254,120,205,90,244 +DB 31,221,168,51,136,7,199,49 +DB 177,18,16,89,39,128,236,95 +DB 96,81,127,169,25,181,74,13 +DB 45,229,122,159,147,201,156,239 +DB 160,224,59,77,174,42,245,176 +DB 200,235,187,60,131,83,153,97 +DB 23,43,4,126,186,119,214,38 +DB 225,105,20,99,85,33,12,125 +__x86_AES_decrypt ENDP +ALIGN 16 +_AES_decrypt PROC PUBLIC +$L_AES_decrypt_begin:: + push ebp + push ebx + push esi + push edi + mov esi,DWORD PTR 20[esp] + mov edi,DWORD PTR 28[esp] + mov eax,esp + sub esp,36 + and esp,-64 + lea ebx,DWORD PTR [edi-127] + sub ebx,esp + neg ebx + and ebx,960 + sub esp,ebx + add esp,4 + mov DWORD PTR 28[esp],eax + call $L010pic_point +$L010pic_point: + pop ebp + lea eax,DWORD PTR _OPENSSL_ia32cap_P + lea ebp,DWORD PTR ($LAES_Td-$L010pic_point)[ebp] + lea ebx,DWORD PTR 764[esp] + sub ebx,ebp + and ebx,768 + lea ebp,DWORD PTR 2176[ebx*1+ebp] + bt DWORD PTR [eax],25 + jnc $L011x86 + movq mm0,QWORD PTR [esi] + movq mm4,QWORD PTR 8[esi] + call __sse_AES_decrypt_compact + mov esp,DWORD PTR 28[esp] + mov esi,DWORD PTR 24[esp] + movq QWORD PTR [esi],mm0 + movq QWORD PTR 8[esi],mm4 + emms + pop edi + pop esi + pop ebx + pop ebp + ret +ALIGN 16 +$L011x86: + mov DWORD PTR 24[esp],ebp + mov eax,DWORD PTR [esi] + mov ebx,DWORD PTR 4[esi] + mov ecx,DWORD PTR 8[esi] + mov edx,DWORD PTR 12[esi] + call __x86_AES_decrypt_compact + mov esp,DWORD PTR 28[esp] + mov esi,DWORD PTR 24[esp] + mov DWORD PTR [esi],eax + mov DWORD PTR 4[esi],ebx + mov DWORD PTR 8[esi],ecx + mov DWORD PTR 12[esi],edx + pop edi + pop esi + pop ebx + pop ebp + ret +_AES_decrypt ENDP +ALIGN 16 +_AES_cbc_encrypt PROC PUBLIC +$L_AES_cbc_encrypt_begin:: + push ebp + push ebx + push esi + push edi + mov ecx,DWORD PTR 28[esp] + cmp ecx,0 + je $L012drop_out + call $L013pic_point +$L013pic_point: + pop ebp + lea eax,DWORD PTR _OPENSSL_ia32cap_P + cmp DWORD PTR 40[esp],0 + lea ebp,DWORD PTR ($LAES_Te-$L013pic_point)[ebp] + jne $L014picked_te + lea ebp,DWORD PTR ($LAES_Td-$LAES_Te)[ebp] +$L014picked_te: + pushfd + cld + cmp ecx,512 + jb $L015slow_way + test ecx,15 + jnz $L015slow_way + bt DWORD PTR [eax],28 + jc $L015slow_way + lea esi,DWORD PTR [esp-324] + and esi,-64 + mov eax,ebp + lea ebx,DWORD PTR 2304[ebp] + mov edx,esi + and eax,4095 + and ebx,4095 + and edx,4095 + cmp edx,ebx + jb $L016tbl_break_out + sub edx,ebx + sub esi,edx + jmp $L017tbl_ok +ALIGN 4 +$L016tbl_break_out: + sub edx,eax + and edx,4095 + add edx,384 + sub esi,edx +ALIGN 4 +$L017tbl_ok: + lea edx,DWORD PTR 24[esp] + xchg esp,esi + add esp,4 + mov DWORD PTR 24[esp],ebp + mov DWORD PTR 28[esp],esi + mov eax,DWORD PTR [edx] + mov ebx,DWORD PTR 4[edx] + mov edi,DWORD PTR 12[edx] + mov esi,DWORD PTR 16[edx] + mov edx,DWORD PTR 20[edx] + mov DWORD PTR 32[esp],eax + mov DWORD PTR 36[esp],ebx + mov DWORD PTR 40[esp],ecx + mov DWORD PTR 44[esp],edi + mov DWORD PTR 48[esp],esi + mov DWORD PTR 316[esp],0 + mov ebx,edi + mov ecx,61 + sub ebx,ebp + mov esi,edi + and ebx,4095 + lea edi,DWORD PTR 76[esp] + cmp ebx,2304 + jb $L018do_copy + cmp ebx,3852 + jb $L019skip_copy +ALIGN 4 +$L018do_copy: + mov DWORD PTR 44[esp],edi +DD 2784229001 +$L019skip_copy: + mov edi,16 +ALIGN 4 +$L020prefetch_tbl: + mov eax,DWORD PTR [ebp] + mov ebx,DWORD PTR 32[ebp] + mov ecx,DWORD PTR 64[ebp] + mov esi,DWORD PTR 96[ebp] + lea ebp,DWORD PTR 128[ebp] + sub edi,1 + jnz $L020prefetch_tbl + sub ebp,2048 + mov esi,DWORD PTR 32[esp] + mov edi,DWORD PTR 48[esp] + cmp edx,0 + je $L021fast_decrypt + mov eax,DWORD PTR [edi] + mov ebx,DWORD PTR 4[edi] +ALIGN 16 +$L022fast_enc_loop: + mov ecx,DWORD PTR 8[edi] + mov edx,DWORD PTR 12[edi] + xor eax,DWORD PTR [esi] + xor ebx,DWORD PTR 4[esi] + xor ecx,DWORD PTR 8[esi] + xor edx,DWORD PTR 12[esi] + mov edi,DWORD PTR 44[esp] + call __x86_AES_encrypt + mov esi,DWORD PTR 32[esp] + mov edi,DWORD PTR 36[esp] + mov DWORD PTR [edi],eax + mov DWORD PTR 4[edi],ebx + mov DWORD PTR 8[edi],ecx + mov DWORD PTR 12[edi],edx + lea esi,DWORD PTR 16[esi] + mov ecx,DWORD PTR 40[esp] + mov DWORD PTR 32[esp],esi + lea edx,DWORD PTR 16[edi] + mov DWORD PTR 36[esp],edx + sub ecx,16 + mov DWORD PTR 40[esp],ecx + jnz $L022fast_enc_loop + mov esi,DWORD PTR 48[esp] + mov ecx,DWORD PTR 8[edi] + mov edx,DWORD PTR 12[edi] + mov DWORD PTR [esi],eax + mov DWORD PTR 4[esi],ebx + mov DWORD PTR 8[esi],ecx + mov DWORD PTR 12[esi],edx + cmp DWORD PTR 316[esp],0 + mov edi,DWORD PTR 44[esp] + je $L023skip_ezero + mov ecx,60 + xor eax,eax +ALIGN 4 +DD 2884892297 +$L023skip_ezero: + mov esp,DWORD PTR 28[esp] + popfd +$L012drop_out: + pop edi + pop esi + pop ebx + pop ebp + ret + pushfd +ALIGN 16 +$L021fast_decrypt: + cmp esi,DWORD PTR 36[esp] + je $L024fast_dec_in_place + mov DWORD PTR 52[esp],edi +ALIGN 4 +ALIGN 16 +$L025fast_dec_loop: + mov eax,DWORD PTR [esi] + mov ebx,DWORD PTR 4[esi] + mov ecx,DWORD PTR 8[esi] + mov edx,DWORD PTR 12[esi] + mov edi,DWORD PTR 44[esp] + call __x86_AES_decrypt + mov edi,DWORD PTR 52[esp] + mov esi,DWORD PTR 40[esp] + xor eax,DWORD PTR [edi] + xor ebx,DWORD PTR 4[edi] + xor ecx,DWORD PTR 8[edi] + xor edx,DWORD PTR 12[edi] + mov edi,DWORD PTR 36[esp] + mov esi,DWORD PTR 32[esp] + mov DWORD PTR [edi],eax + mov DWORD PTR 4[edi],ebx + mov DWORD PTR 8[edi],ecx + mov DWORD PTR 12[edi],edx + mov ecx,DWORD PTR 40[esp] + mov DWORD PTR 52[esp],esi + lea esi,DWORD PTR 16[esi] + mov DWORD PTR 32[esp],esi + lea edi,DWORD PTR 16[edi] + mov DWORD PTR 36[esp],edi + sub ecx,16 + mov DWORD PTR 40[esp],ecx + jnz $L025fast_dec_loop + mov edi,DWORD PTR 52[esp] + mov esi,DWORD PTR 48[esp] + mov eax,DWORD PTR [edi] + mov ebx,DWORD PTR 4[edi] + mov ecx,DWORD PTR 8[edi] + mov edx,DWORD PTR 12[edi] + mov DWORD PTR [esi],eax + mov DWORD PTR 4[esi],ebx + mov DWORD PTR 8[esi],ecx + mov DWORD PTR 12[esi],edx + jmp $L026fast_dec_out +ALIGN 16 +$L024fast_dec_in_place: +$L027fast_dec_in_place_loop: + mov eax,DWORD PTR [esi] + mov ebx,DWORD PTR 4[esi] + mov ecx,DWORD PTR 8[esi] + mov edx,DWORD PTR 12[esi] + lea edi,DWORD PTR 60[esp] + mov DWORD PTR [edi],eax + mov DWORD PTR 4[edi],ebx + mov DWORD PTR 8[edi],ecx + mov DWORD PTR 12[edi],edx + mov edi,DWORD PTR 44[esp] + call __x86_AES_decrypt + mov edi,DWORD PTR 48[esp] + mov esi,DWORD PTR 36[esp] + xor eax,DWORD PTR [edi] + xor ebx,DWORD PTR 4[edi] + xor ecx,DWORD PTR 8[edi] + xor edx,DWORD PTR 12[edi] + mov DWORD PTR [esi],eax + mov DWORD PTR 4[esi],ebx + mov DWORD PTR 8[esi],ecx + mov DWORD PTR 12[esi],edx + lea esi,DWORD PTR 16[esi] + mov DWORD PTR 36[esp],esi + lea esi,DWORD PTR 60[esp] + mov eax,DWORD PTR [esi] + mov ebx,DWORD PTR 4[esi] + mov ecx,DWORD PTR 8[esi] + mov edx,DWORD PTR 12[esi] + mov DWORD PTR [edi],eax + mov DWORD PTR 4[edi],ebx + mov DWORD PTR 8[edi],ecx + mov DWORD PTR 12[edi],edx + mov esi,DWORD PTR 32[esp] + mov ecx,DWORD PTR 40[esp] + lea esi,DWORD PTR 16[esi] + mov DWORD PTR 32[esp],esi + sub ecx,16 + mov DWORD PTR 40[esp],ecx + jnz $L027fast_dec_in_place_loop +ALIGN 4 +$L026fast_dec_out: + cmp DWORD PTR 316[esp],0 + mov edi,DWORD PTR 44[esp] + je $L028skip_dzero + mov ecx,60 + xor eax,eax +ALIGN 4 +DD 2884892297 +$L028skip_dzero: + mov esp,DWORD PTR 28[esp] + popfd + pop edi + pop esi + pop ebx + pop ebp + ret + pushfd +ALIGN 16 +$L015slow_way: + mov eax,DWORD PTR [eax] + mov edi,DWORD PTR 36[esp] + lea esi,DWORD PTR [esp-80] + and esi,-64 + lea ebx,DWORD PTR [edi-143] + sub ebx,esi + neg ebx + and ebx,960 + sub esi,ebx + lea ebx,DWORD PTR 768[esi] + sub ebx,ebp + and ebx,768 + lea ebp,DWORD PTR 2176[ebx*1+ebp] + lea edx,DWORD PTR 24[esp] + xchg esp,esi + add esp,4 + mov DWORD PTR 24[esp],ebp + mov DWORD PTR 28[esp],esi + mov DWORD PTR 52[esp],eax + mov eax,DWORD PTR [edx] + mov ebx,DWORD PTR 4[edx] + mov esi,DWORD PTR 16[edx] + mov edx,DWORD PTR 20[edx] + mov DWORD PTR 32[esp],eax + mov DWORD PTR 36[esp],ebx + mov DWORD PTR 40[esp],ecx + mov DWORD PTR 44[esp],edi + mov DWORD PTR 48[esp],esi + mov edi,esi + mov esi,eax + cmp edx,0 + je $L029slow_decrypt + cmp ecx,16 + mov edx,ebx + jb $L030slow_enc_tail + bt DWORD PTR 52[esp],25 + jnc $L031slow_enc_x86 + movq mm0,QWORD PTR [edi] + movq mm4,QWORD PTR 8[edi] +ALIGN 16 +$L032slow_enc_loop_sse: + pxor mm0,QWORD PTR [esi] + pxor mm4,QWORD PTR 8[esi] + mov edi,DWORD PTR 44[esp] + call __sse_AES_encrypt_compact + mov esi,DWORD PTR 32[esp] + mov edi,DWORD PTR 36[esp] + mov ecx,DWORD PTR 40[esp] + movq QWORD PTR [edi],mm0 + movq QWORD PTR 8[edi],mm4 + lea esi,DWORD PTR 16[esi] + mov DWORD PTR 32[esp],esi + lea edx,DWORD PTR 16[edi] + mov DWORD PTR 36[esp],edx + sub ecx,16 + cmp ecx,16 + mov DWORD PTR 40[esp],ecx + jae $L032slow_enc_loop_sse + test ecx,15 + jnz $L030slow_enc_tail + mov esi,DWORD PTR 48[esp] + movq QWORD PTR [esi],mm0 + movq QWORD PTR 8[esi],mm4 + emms + mov esp,DWORD PTR 28[esp] + popfd + pop edi + pop esi + pop ebx + pop ebp + ret + pushfd +ALIGN 16 +$L031slow_enc_x86: + mov eax,DWORD PTR [edi] + mov ebx,DWORD PTR 4[edi] +ALIGN 4 +$L033slow_enc_loop_x86: + mov ecx,DWORD PTR 8[edi] + mov edx,DWORD PTR 12[edi] + xor eax,DWORD PTR [esi] + xor ebx,DWORD PTR 4[esi] + xor ecx,DWORD PTR 8[esi] + xor edx,DWORD PTR 12[esi] + mov edi,DWORD PTR 44[esp] + call __x86_AES_encrypt_compact + mov esi,DWORD PTR 32[esp] + mov edi,DWORD PTR 36[esp] + mov DWORD PTR [edi],eax + mov DWORD PTR 4[edi],ebx + mov DWORD PTR 8[edi],ecx + mov DWORD PTR 12[edi],edx + mov ecx,DWORD PTR 40[esp] + lea esi,DWORD PTR 16[esi] + mov DWORD PTR 32[esp],esi + lea edx,DWORD PTR 16[edi] + mov DWORD PTR 36[esp],edx + sub ecx,16 + cmp ecx,16 + mov DWORD PTR 40[esp],ecx + jae $L033slow_enc_loop_x86 + test ecx,15 + jnz $L030slow_enc_tail + mov esi,DWORD PTR 48[esp] + mov ecx,DWORD PTR 8[edi] + mov edx,DWORD PTR 12[edi] + mov DWORD PTR [esi],eax + mov DWORD PTR 4[esi],ebx + mov DWORD PTR 8[esi],ecx + mov DWORD PTR 12[esi],edx + mov esp,DWORD PTR 28[esp] + popfd + pop edi + pop esi + pop ebx + pop ebp + ret + pushfd +ALIGN 16 +$L030slow_enc_tail: + emms + mov edi,edx + mov ebx,16 + sub ebx,ecx + cmp edi,esi + je $L034enc_in_place +ALIGN 4 +DD 2767451785 + jmp $L035enc_skip_in_place +$L034enc_in_place: + lea edi,DWORD PTR [ecx*1+edi] +$L035enc_skip_in_place: + mov ecx,ebx + xor eax,eax +ALIGN 4 +DD 2868115081 + mov edi,DWORD PTR 48[esp] + mov esi,edx + mov eax,DWORD PTR [edi] + mov ebx,DWORD PTR 4[edi] + mov DWORD PTR 40[esp],16 + jmp $L033slow_enc_loop_x86 +ALIGN 16 +$L029slow_decrypt: + bt DWORD PTR 52[esp],25 + jnc $L036slow_dec_loop_x86 +ALIGN 4 +$L037slow_dec_loop_sse: + movq mm0,QWORD PTR [esi] + movq mm4,QWORD PTR 8[esi] + mov edi,DWORD PTR 44[esp] + call __sse_AES_decrypt_compact + mov esi,DWORD PTR 32[esp] + lea eax,DWORD PTR 60[esp] + mov ebx,DWORD PTR 36[esp] + mov ecx,DWORD PTR 40[esp] + mov edi,DWORD PTR 48[esp] + movq mm1,QWORD PTR [esi] + movq mm5,QWORD PTR 8[esi] + pxor mm0,QWORD PTR [edi] + pxor mm4,QWORD PTR 8[edi] + movq QWORD PTR [edi],mm1 + movq QWORD PTR 8[edi],mm5 + sub ecx,16 + jc $L038slow_dec_partial_sse + movq QWORD PTR [ebx],mm0 + movq QWORD PTR 8[ebx],mm4 + lea ebx,DWORD PTR 16[ebx] + mov DWORD PTR 36[esp],ebx + lea esi,DWORD PTR 16[esi] + mov DWORD PTR 32[esp],esi + mov DWORD PTR 40[esp],ecx + jnz $L037slow_dec_loop_sse + emms + mov esp,DWORD PTR 28[esp] + popfd + pop edi + pop esi + pop ebx + pop ebp + ret + pushfd +ALIGN 16 +$L038slow_dec_partial_sse: + movq QWORD PTR [eax],mm0 + movq QWORD PTR 8[eax],mm4 + emms + add ecx,16 + mov edi,ebx + mov esi,eax +ALIGN 4 +DD 2767451785 + mov esp,DWORD PTR 28[esp] + popfd + pop edi + pop esi + pop ebx + pop ebp + ret + pushfd +ALIGN 16 +$L036slow_dec_loop_x86: + mov eax,DWORD PTR [esi] + mov ebx,DWORD PTR 4[esi] + mov ecx,DWORD PTR 8[esi] + mov edx,DWORD PTR 12[esi] + lea edi,DWORD PTR 60[esp] + mov DWORD PTR [edi],eax + mov DWORD PTR 4[edi],ebx + mov DWORD PTR 8[edi],ecx + mov DWORD PTR 12[edi],edx + mov edi,DWORD PTR 44[esp] + call __x86_AES_decrypt_compact + mov edi,DWORD PTR 48[esp] + mov esi,DWORD PTR 40[esp] + xor eax,DWORD PTR [edi] + xor ebx,DWORD PTR 4[edi] + xor ecx,DWORD PTR 8[edi] + xor edx,DWORD PTR 12[edi] + sub esi,16 + jc $L039slow_dec_partial_x86 + mov DWORD PTR 40[esp],esi + mov esi,DWORD PTR 36[esp] + mov DWORD PTR [esi],eax + mov DWORD PTR 4[esi],ebx + mov DWORD PTR 8[esi],ecx + mov DWORD PTR 12[esi],edx + lea esi,DWORD PTR 16[esi] + mov DWORD PTR 36[esp],esi + lea esi,DWORD PTR 60[esp] + mov eax,DWORD PTR [esi] + mov ebx,DWORD PTR 4[esi] + mov ecx,DWORD PTR 8[esi] + mov edx,DWORD PTR 12[esi] + mov DWORD PTR [edi],eax + mov DWORD PTR 4[edi],ebx + mov DWORD PTR 8[edi],ecx + mov DWORD PTR 12[edi],edx + mov esi,DWORD PTR 32[esp] + lea esi,DWORD PTR 16[esi] + mov DWORD PTR 32[esp],esi + jnz $L036slow_dec_loop_x86 + mov esp,DWORD PTR 28[esp] + popfd + pop edi + pop esi + pop ebx + pop ebp + ret + pushfd +ALIGN 16 +$L039slow_dec_partial_x86: + lea esi,DWORD PTR 60[esp] + mov DWORD PTR [esi],eax + mov DWORD PTR 4[esi],ebx + mov DWORD PTR 8[esi],ecx + mov DWORD PTR 12[esi],edx + mov esi,DWORD PTR 32[esp] + mov eax,DWORD PTR [esi] + mov ebx,DWORD PTR 4[esi] + mov ecx,DWORD PTR 8[esi] + mov edx,DWORD PTR 12[esi] + mov DWORD PTR [edi],eax + mov DWORD PTR 4[edi],ebx + mov DWORD PTR 8[edi],ecx + mov DWORD PTR 12[edi],edx + mov ecx,DWORD PTR 40[esp] + mov edi,DWORD PTR 36[esp] + lea esi,DWORD PTR 60[esp] +ALIGN 4 +DD 2767451785 + mov esp,DWORD PTR 28[esp] + popfd + pop edi + pop esi + pop ebx + pop ebp + ret +_AES_cbc_encrypt ENDP +ALIGN 16 +__x86_AES_set_encrypt_key PROC PRIVATE + push ebp + push ebx + push esi + push edi + mov esi,DWORD PTR 24[esp] + mov edi,DWORD PTR 32[esp] + test esi,-1 + jz $L040badpointer + test edi,-1 + jz $L040badpointer + call $L041pic_point +$L041pic_point: + pop ebp + lea ebp,DWORD PTR ($LAES_Te-$L041pic_point)[ebp] + lea ebp,DWORD PTR 2176[ebp] + mov eax,DWORD PTR [ebp-128] + mov ebx,DWORD PTR [ebp-96] + mov ecx,DWORD PTR [ebp-64] + mov edx,DWORD PTR [ebp-32] + mov eax,DWORD PTR [ebp] + mov ebx,DWORD PTR 32[ebp] + mov ecx,DWORD PTR 64[ebp] + mov edx,DWORD PTR 96[ebp] + mov ecx,DWORD PTR 28[esp] + cmp ecx,128 + je $L04210rounds + cmp ecx,192 + je $L04312rounds + cmp ecx,256 + je $L04414rounds + mov eax,-2 + jmp $L045exit +$L04210rounds: + mov eax,DWORD PTR [esi] + mov ebx,DWORD PTR 4[esi] + mov ecx,DWORD PTR 8[esi] + mov edx,DWORD PTR 12[esi] + mov DWORD PTR [edi],eax + mov DWORD PTR 4[edi],ebx + mov DWORD PTR 8[edi],ecx + mov DWORD PTR 12[edi],edx + xor ecx,ecx + jmp $L04610shortcut +ALIGN 4 +$L04710loop: + mov eax,DWORD PTR [edi] + mov edx,DWORD PTR 12[edi] +$L04610shortcut: + movzx esi,dl + movzx ebx,BYTE PTR [esi*1+ebp-128] + movzx esi,dh + shl ebx,24 + xor eax,ebx + movzx ebx,BYTE PTR [esi*1+ebp-128] + shr edx,16 + movzx esi,dl + xor eax,ebx + movzx ebx,BYTE PTR [esi*1+ebp-128] + movzx esi,dh + shl ebx,8 + xor eax,ebx + movzx ebx,BYTE PTR [esi*1+ebp-128] + shl ebx,16 + xor eax,ebx + xor eax,DWORD PTR 896[ecx*4+ebp] + mov DWORD PTR 16[edi],eax + xor eax,DWORD PTR 4[edi] + mov DWORD PTR 20[edi],eax + xor eax,DWORD PTR 8[edi] + mov DWORD PTR 24[edi],eax + xor eax,DWORD PTR 12[edi] + mov DWORD PTR 28[edi],eax + inc ecx + add edi,16 + cmp ecx,10 + jl $L04710loop + mov DWORD PTR 80[edi],10 + xor eax,eax + jmp $L045exit +$L04312rounds: + mov eax,DWORD PTR [esi] + mov ebx,DWORD PTR 4[esi] + mov ecx,DWORD PTR 8[esi] + mov edx,DWORD PTR 12[esi] + mov DWORD PTR [edi],eax + mov DWORD PTR 4[edi],ebx + mov DWORD PTR 8[edi],ecx + mov DWORD PTR 12[edi],edx + mov ecx,DWORD PTR 16[esi] + mov edx,DWORD PTR 20[esi] + mov DWORD PTR 16[edi],ecx + mov DWORD PTR 20[edi],edx + xor ecx,ecx + jmp $L04812shortcut +ALIGN 4 +$L04912loop: + mov eax,DWORD PTR [edi] + mov edx,DWORD PTR 20[edi] +$L04812shortcut: + movzx esi,dl + movzx ebx,BYTE PTR [esi*1+ebp-128] + movzx esi,dh + shl ebx,24 + xor eax,ebx + movzx ebx,BYTE PTR [esi*1+ebp-128] + shr edx,16 + movzx esi,dl + xor eax,ebx + movzx ebx,BYTE PTR [esi*1+ebp-128] + movzx esi,dh + shl ebx,8 + xor eax,ebx + movzx ebx,BYTE PTR [esi*1+ebp-128] + shl ebx,16 + xor eax,ebx + xor eax,DWORD PTR 896[ecx*4+ebp] + mov DWORD PTR 24[edi],eax + xor eax,DWORD PTR 4[edi] + mov DWORD PTR 28[edi],eax + xor eax,DWORD PTR 8[edi] + mov DWORD PTR 32[edi],eax + xor eax,DWORD PTR 12[edi] + mov DWORD PTR 36[edi],eax + cmp ecx,7 + je $L05012break + inc ecx + xor eax,DWORD PTR 16[edi] + mov DWORD PTR 40[edi],eax + xor eax,DWORD PTR 20[edi] + mov DWORD PTR 44[edi],eax + add edi,24 + jmp $L04912loop +$L05012break: + mov DWORD PTR 72[edi],12 + xor eax,eax + jmp $L045exit +$L04414rounds: + mov eax,DWORD PTR [esi] + mov ebx,DWORD PTR 4[esi] + mov ecx,DWORD PTR 8[esi] + mov edx,DWORD PTR 12[esi] + mov DWORD PTR [edi],eax + mov DWORD PTR 4[edi],ebx + mov DWORD PTR 8[edi],ecx + mov DWORD PTR 12[edi],edx + mov eax,DWORD PTR 16[esi] + mov ebx,DWORD PTR 20[esi] + mov ecx,DWORD PTR 24[esi] + mov edx,DWORD PTR 28[esi] + mov DWORD PTR 16[edi],eax + mov DWORD PTR 20[edi],ebx + mov DWORD PTR 24[edi],ecx + mov DWORD PTR 28[edi],edx + xor ecx,ecx + jmp $L05114shortcut +ALIGN 4 +$L05214loop: + mov edx,DWORD PTR 28[edi] +$L05114shortcut: + mov eax,DWORD PTR [edi] + movzx esi,dl + movzx ebx,BYTE PTR [esi*1+ebp-128] + movzx esi,dh + shl ebx,24 + xor eax,ebx + movzx ebx,BYTE PTR [esi*1+ebp-128] + shr edx,16 + movzx esi,dl + xor eax,ebx + movzx ebx,BYTE PTR [esi*1+ebp-128] + movzx esi,dh + shl ebx,8 + xor eax,ebx + movzx ebx,BYTE PTR [esi*1+ebp-128] + shl ebx,16 + xor eax,ebx + xor eax,DWORD PTR 896[ecx*4+ebp] + mov DWORD PTR 32[edi],eax + xor eax,DWORD PTR 4[edi] + mov DWORD PTR 36[edi],eax + xor eax,DWORD PTR 8[edi] + mov DWORD PTR 40[edi],eax + xor eax,DWORD PTR 12[edi] + mov DWORD PTR 44[edi],eax + cmp ecx,6 + je $L05314break + inc ecx + mov edx,eax + mov eax,DWORD PTR 16[edi] + movzx esi,dl + movzx ebx,BYTE PTR [esi*1+ebp-128] + movzx esi,dh + xor eax,ebx + movzx ebx,BYTE PTR [esi*1+ebp-128] + shr edx,16 + shl ebx,8 + movzx esi,dl + xor eax,ebx + movzx ebx,BYTE PTR [esi*1+ebp-128] + movzx esi,dh + shl ebx,16 + xor eax,ebx + movzx ebx,BYTE PTR [esi*1+ebp-128] + shl ebx,24 + xor eax,ebx + mov DWORD PTR 48[edi],eax + xor eax,DWORD PTR 20[edi] + mov DWORD PTR 52[edi],eax + xor eax,DWORD PTR 24[edi] + mov DWORD PTR 56[edi],eax + xor eax,DWORD PTR 28[edi] + mov DWORD PTR 60[edi],eax + add edi,32 + jmp $L05214loop +$L05314break: + mov DWORD PTR 48[edi],14 + xor eax,eax + jmp $L045exit +$L040badpointer: + mov eax,-1 +$L045exit: + pop edi + pop esi + pop ebx + pop ebp + ret +__x86_AES_set_encrypt_key ENDP +ALIGN 16 +_private_AES_set_encrypt_key PROC PUBLIC +$L_private_AES_set_encrypt_key_begin:: + call __x86_AES_set_encrypt_key + ret +_private_AES_set_encrypt_key ENDP +ALIGN 16 +_private_AES_set_decrypt_key PROC PUBLIC +$L_private_AES_set_decrypt_key_begin:: + call __x86_AES_set_encrypt_key + cmp eax,0 + je $L054proceed + ret +$L054proceed: + push ebp + push ebx + push esi + push edi + mov esi,DWORD PTR 28[esp] + mov ecx,DWORD PTR 240[esi] + lea ecx,DWORD PTR [ecx*4] + lea edi,DWORD PTR [ecx*4+esi] +ALIGN 4 +$L055invert: + mov eax,DWORD PTR [esi] + mov ebx,DWORD PTR 4[esi] + mov ecx,DWORD PTR [edi] + mov edx,DWORD PTR 4[edi] + mov DWORD PTR [edi],eax + mov DWORD PTR 4[edi],ebx + mov DWORD PTR [esi],ecx + mov DWORD PTR 4[esi],edx + mov eax,DWORD PTR 8[esi] + mov ebx,DWORD PTR 12[esi] + mov ecx,DWORD PTR 8[edi] + mov edx,DWORD PTR 12[edi] + mov DWORD PTR 8[edi],eax + mov DWORD PTR 12[edi],ebx + mov DWORD PTR 8[esi],ecx + mov DWORD PTR 12[esi],edx + add esi,16 + sub edi,16 + cmp esi,edi + jne $L055invert + mov edi,DWORD PTR 28[esp] + mov esi,DWORD PTR 240[edi] + lea esi,DWORD PTR [esi*1+esi-2] + lea esi,DWORD PTR [esi*8+edi] + mov DWORD PTR 28[esp],esi + mov eax,DWORD PTR 16[edi] +ALIGN 4 +$L056permute: + add edi,16 + mov ebp,2155905152 + and ebp,eax + lea ebx,DWORD PTR [eax*1+eax] + mov esi,ebp + shr ebp,7 + sub esi,ebp + and ebx,4278124286 + and esi,454761243 + xor ebx,esi + mov ebp,2155905152 + and ebp,ebx + lea ecx,DWORD PTR [ebx*1+ebx] + mov esi,ebp + shr ebp,7 + sub esi,ebp + and ecx,4278124286 + and esi,454761243 + xor ebx,eax + xor ecx,esi + mov ebp,2155905152 + and ebp,ecx + lea edx,DWORD PTR [ecx*1+ecx] + mov esi,ebp + shr ebp,7 + xor ecx,eax + sub esi,ebp + and edx,4278124286 + and esi,454761243 + rol eax,8 + xor edx,esi + mov ebp,DWORD PTR 4[edi] + xor eax,ebx + xor ebx,edx + xor eax,ecx + rol ebx,24 + xor ecx,edx + xor eax,edx + rol ecx,16 + xor eax,ebx + rol edx,8 + xor eax,ecx + mov ebx,ebp + xor eax,edx + mov DWORD PTR [edi],eax + mov ebp,2155905152 + and ebp,ebx + lea ecx,DWORD PTR [ebx*1+ebx] + mov esi,ebp + shr ebp,7 + sub esi,ebp + and ecx,4278124286 + and esi,454761243 + xor ecx,esi + mov ebp,2155905152 + and ebp,ecx + lea edx,DWORD PTR [ecx*1+ecx] + mov esi,ebp + shr ebp,7 + sub esi,ebp + and edx,4278124286 + and esi,454761243 + xor ecx,ebx + xor edx,esi + mov ebp,2155905152 + and ebp,edx + lea eax,DWORD PTR [edx*1+edx] + mov esi,ebp + shr ebp,7 + xor edx,ebx + sub esi,ebp + and eax,4278124286 + and esi,454761243 + rol ebx,8 + xor eax,esi + mov ebp,DWORD PTR 8[edi] + xor ebx,ecx + xor ecx,eax + xor ebx,edx + rol ecx,24 + xor edx,eax + xor ebx,eax + rol edx,16 + xor ebx,ecx + rol eax,8 + xor ebx,edx + mov ecx,ebp + xor ebx,eax + mov DWORD PTR 4[edi],ebx + mov ebp,2155905152 + and ebp,ecx + lea edx,DWORD PTR [ecx*1+ecx] + mov esi,ebp + shr ebp,7 + sub esi,ebp + and edx,4278124286 + and esi,454761243 + xor edx,esi + mov ebp,2155905152 + and ebp,edx + lea eax,DWORD PTR [edx*1+edx] + mov esi,ebp + shr ebp,7 + sub esi,ebp + and eax,4278124286 + and esi,454761243 + xor edx,ecx + xor eax,esi + mov ebp,2155905152 + and ebp,eax + lea ebx,DWORD PTR [eax*1+eax] + mov esi,ebp + shr ebp,7 + xor eax,ecx + sub esi,ebp + and ebx,4278124286 + and esi,454761243 + rol ecx,8 + xor ebx,esi + mov ebp,DWORD PTR 12[edi] + xor ecx,edx + xor edx,ebx + xor ecx,eax + rol edx,24 + xor eax,ebx + xor ecx,ebx + rol eax,16 + xor ecx,edx + rol ebx,8 + xor ecx,eax + mov edx,ebp + xor ecx,ebx + mov DWORD PTR 8[edi],ecx + mov ebp,2155905152 + and ebp,edx + lea eax,DWORD PTR [edx*1+edx] + mov esi,ebp + shr ebp,7 + sub esi,ebp + and eax,4278124286 + and esi,454761243 + xor eax,esi + mov ebp,2155905152 + and ebp,eax + lea ebx,DWORD PTR [eax*1+eax] + mov esi,ebp + shr ebp,7 + sub esi,ebp + and ebx,4278124286 + and esi,454761243 + xor eax,edx + xor ebx,esi + mov ebp,2155905152 + and ebp,ebx + lea ecx,DWORD PTR [ebx*1+ebx] + mov esi,ebp + shr ebp,7 + xor ebx,edx + sub esi,ebp + and ecx,4278124286 + and esi,454761243 + rol edx,8 + xor ecx,esi + mov ebp,DWORD PTR 16[edi] + xor edx,eax + xor eax,ecx + xor edx,ebx + rol eax,24 + xor ebx,ecx + xor edx,ecx + rol ebx,16 + xor edx,eax + rol ecx,8 + xor edx,ebx + mov eax,ebp + xor edx,ecx + mov DWORD PTR 12[edi],edx + cmp edi,DWORD PTR 28[esp] + jb $L056permute + xor eax,eax + pop edi + pop esi + pop ebx + pop ebp + ret +_private_AES_set_decrypt_key ENDP +DB 65,69,83,32,102,111,114,32,120,56,54,44,32,67,82,89 +DB 80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114 +DB 111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 +.text$ ENDS +.bss SEGMENT 'BSS' +COMM _OPENSSL_ia32cap_P:DWORD:4 +.bss ENDS +END diff --git a/deps/openssl/asm_obsolete/x86-win32-masm/aes/aesni-x86.asm b/deps/openssl/asm_obsolete/x86-win32-masm/aes/aesni-x86.asm new file mode 100644 index 00000000000000..43fdb5a0345e93 --- /dev/null +++ b/deps/openssl/asm_obsolete/x86-win32-masm/aes/aesni-x86.asm @@ -0,0 +1,2184 @@ +TITLE ../openssl/crypto/aes/asm/aesni-x86.asm +IF @Version LT 800 +ECHO MASM version 8.00 or later is strongly recommended. +ENDIF +.686 +.XMM +IF @Version LT 800 +XMMWORD STRUCT 16 +DQ 2 dup (?) +XMMWORD ENDS +ENDIF + +.MODEL FLAT +OPTION DOTNAME +IF @Version LT 800 +.text$ SEGMENT PAGE 'CODE' +ELSE +.text$ SEGMENT ALIGN(64) 'CODE' +ENDIF +ALIGN 16 +_aesni_encrypt PROC PUBLIC +$L_aesni_encrypt_begin:: + mov eax,DWORD PTR 4[esp] + mov edx,DWORD PTR 12[esp] + movups xmm2,XMMWORD PTR [eax] + mov ecx,DWORD PTR 240[edx] + mov eax,DWORD PTR 8[esp] + movups xmm0,XMMWORD PTR [edx] + movups xmm1,XMMWORD PTR 16[edx] + lea edx,DWORD PTR 32[edx] + xorps xmm2,xmm0 +$L000enc1_loop_1: +DB 102,15,56,220,209 + dec ecx + movups xmm1,XMMWORD PTR [edx] + lea edx,DWORD PTR 16[edx] + jnz $L000enc1_loop_1 +DB 102,15,56,221,209 + movups XMMWORD PTR [eax],xmm2 + ret +_aesni_encrypt ENDP +ALIGN 16 +_aesni_decrypt PROC PUBLIC +$L_aesni_decrypt_begin:: + mov eax,DWORD PTR 4[esp] + mov edx,DWORD PTR 12[esp] + movups xmm2,XMMWORD PTR [eax] + mov ecx,DWORD PTR 240[edx] + mov eax,DWORD PTR 8[esp] + movups xmm0,XMMWORD PTR [edx] + movups xmm1,XMMWORD PTR 16[edx] + lea edx,DWORD PTR 32[edx] + xorps xmm2,xmm0 +$L001dec1_loop_2: +DB 102,15,56,222,209 + dec ecx + movups xmm1,XMMWORD PTR [edx] + lea edx,DWORD PTR 16[edx] + jnz $L001dec1_loop_2 +DB 102,15,56,223,209 + movups XMMWORD PTR [eax],xmm2 + ret +_aesni_decrypt ENDP +ALIGN 16 +__aesni_encrypt2 PROC PRIVATE + movups xmm0,XMMWORD PTR [edx] + shl ecx,4 + movups xmm1,XMMWORD PTR 16[edx] + xorps xmm2,xmm0 + pxor xmm3,xmm0 + movups xmm0,XMMWORD PTR 32[edx] + lea edx,DWORD PTR 32[ecx*1+edx] + neg ecx + add ecx,16 +$L002enc2_loop: +DB 102,15,56,220,209 +DB 102,15,56,220,217 + movups xmm1,XMMWORD PTR [ecx*1+edx] + add ecx,32 +DB 102,15,56,220,208 +DB 102,15,56,220,216 + movups xmm0,XMMWORD PTR [ecx*1+edx-16] + jnz $L002enc2_loop +DB 102,15,56,220,209 +DB 102,15,56,220,217 +DB 102,15,56,221,208 +DB 102,15,56,221,216 + ret +__aesni_encrypt2 ENDP +ALIGN 16 +__aesni_decrypt2 PROC PRIVATE + movups xmm0,XMMWORD PTR [edx] + shl ecx,4 + movups xmm1,XMMWORD PTR 16[edx] + xorps xmm2,xmm0 + pxor xmm3,xmm0 + movups xmm0,XMMWORD PTR 32[edx] + lea edx,DWORD PTR 32[ecx*1+edx] + neg ecx + add ecx,16 +$L003dec2_loop: +DB 102,15,56,222,209 +DB 102,15,56,222,217 + movups xmm1,XMMWORD PTR [ecx*1+edx] + add ecx,32 +DB 102,15,56,222,208 +DB 102,15,56,222,216 + movups xmm0,XMMWORD PTR [ecx*1+edx-16] + jnz $L003dec2_loop +DB 102,15,56,222,209 +DB 102,15,56,222,217 +DB 102,15,56,223,208 +DB 102,15,56,223,216 + ret +__aesni_decrypt2 ENDP +ALIGN 16 +__aesni_encrypt3 PROC PRIVATE + movups xmm0,XMMWORD PTR [edx] + shl ecx,4 + movups xmm1,XMMWORD PTR 16[edx] + xorps xmm2,xmm0 + pxor xmm3,xmm0 + pxor xmm4,xmm0 + movups xmm0,XMMWORD PTR 32[edx] + lea edx,DWORD PTR 32[ecx*1+edx] + neg ecx + add ecx,16 +$L004enc3_loop: +DB 102,15,56,220,209 +DB 102,15,56,220,217 +DB 102,15,56,220,225 + movups xmm1,XMMWORD PTR [ecx*1+edx] + add ecx,32 +DB 102,15,56,220,208 +DB 102,15,56,220,216 +DB 102,15,56,220,224 + movups xmm0,XMMWORD PTR [ecx*1+edx-16] + jnz $L004enc3_loop +DB 102,15,56,220,209 +DB 102,15,56,220,217 +DB 102,15,56,220,225 +DB 102,15,56,221,208 +DB 102,15,56,221,216 +DB 102,15,56,221,224 + ret +__aesni_encrypt3 ENDP +ALIGN 16 +__aesni_decrypt3 PROC PRIVATE + movups xmm0,XMMWORD PTR [edx] + shl ecx,4 + movups xmm1,XMMWORD PTR 16[edx] + xorps xmm2,xmm0 + pxor xmm3,xmm0 + pxor xmm4,xmm0 + movups xmm0,XMMWORD PTR 32[edx] + lea edx,DWORD PTR 32[ecx*1+edx] + neg ecx + add ecx,16 +$L005dec3_loop: +DB 102,15,56,222,209 +DB 102,15,56,222,217 +DB 102,15,56,222,225 + movups xmm1,XMMWORD PTR [ecx*1+edx] + add ecx,32 +DB 102,15,56,222,208 +DB 102,15,56,222,216 +DB 102,15,56,222,224 + movups xmm0,XMMWORD PTR [ecx*1+edx-16] + jnz $L005dec3_loop +DB 102,15,56,222,209 +DB 102,15,56,222,217 +DB 102,15,56,222,225 +DB 102,15,56,223,208 +DB 102,15,56,223,216 +DB 102,15,56,223,224 + ret +__aesni_decrypt3 ENDP +ALIGN 16 +__aesni_encrypt4 PROC PRIVATE + movups xmm0,XMMWORD PTR [edx] + movups xmm1,XMMWORD PTR 16[edx] + shl ecx,4 + xorps xmm2,xmm0 + pxor xmm3,xmm0 + pxor xmm4,xmm0 + pxor xmm5,xmm0 + movups xmm0,XMMWORD PTR 32[edx] + lea edx,DWORD PTR 32[ecx*1+edx] + neg ecx +DB 15,31,64,0 + add ecx,16 +$L006enc4_loop: +DB 102,15,56,220,209 +DB 102,15,56,220,217 +DB 102,15,56,220,225 +DB 102,15,56,220,233 + movups xmm1,XMMWORD PTR [ecx*1+edx] + add ecx,32 +DB 102,15,56,220,208 +DB 102,15,56,220,216 +DB 102,15,56,220,224 +DB 102,15,56,220,232 + movups xmm0,XMMWORD PTR [ecx*1+edx-16] + jnz $L006enc4_loop +DB 102,15,56,220,209 +DB 102,15,56,220,217 +DB 102,15,56,220,225 +DB 102,15,56,220,233 +DB 102,15,56,221,208 +DB 102,15,56,221,216 +DB 102,15,56,221,224 +DB 102,15,56,221,232 + ret +__aesni_encrypt4 ENDP +ALIGN 16 +__aesni_decrypt4 PROC PRIVATE + movups xmm0,XMMWORD PTR [edx] + movups xmm1,XMMWORD PTR 16[edx] + shl ecx,4 + xorps xmm2,xmm0 + pxor xmm3,xmm0 + pxor xmm4,xmm0 + pxor xmm5,xmm0 + movups xmm0,XMMWORD PTR 32[edx] + lea edx,DWORD PTR 32[ecx*1+edx] + neg ecx +DB 15,31,64,0 + add ecx,16 +$L007dec4_loop: +DB 102,15,56,222,209 +DB 102,15,56,222,217 +DB 102,15,56,222,225 +DB 102,15,56,222,233 + movups xmm1,XMMWORD PTR [ecx*1+edx] + add ecx,32 +DB 102,15,56,222,208 +DB 102,15,56,222,216 +DB 102,15,56,222,224 +DB 102,15,56,222,232 + movups xmm0,XMMWORD PTR [ecx*1+edx-16] + jnz $L007dec4_loop +DB 102,15,56,222,209 +DB 102,15,56,222,217 +DB 102,15,56,222,225 +DB 102,15,56,222,233 +DB 102,15,56,223,208 +DB 102,15,56,223,216 +DB 102,15,56,223,224 +DB 102,15,56,223,232 + ret +__aesni_decrypt4 ENDP +ALIGN 16 +__aesni_encrypt6 PROC PRIVATE + movups xmm0,XMMWORD PTR [edx] + shl ecx,4 + movups xmm1,XMMWORD PTR 16[edx] + xorps xmm2,xmm0 + pxor xmm3,xmm0 + pxor xmm4,xmm0 +DB 102,15,56,220,209 + pxor xmm5,xmm0 + pxor xmm6,xmm0 +DB 102,15,56,220,217 + lea edx,DWORD PTR 32[ecx*1+edx] + neg ecx +DB 102,15,56,220,225 + pxor xmm7,xmm0 + add ecx,16 +DB 102,15,56,220,233 +DB 102,15,56,220,241 +DB 102,15,56,220,249 + movups xmm0,XMMWORD PTR [ecx*1+edx-16] + jmp $L_aesni_encrypt6_enter +ALIGN 16 +$L008enc6_loop: +DB 102,15,56,220,209 +DB 102,15,56,220,217 +DB 102,15,56,220,225 +DB 102,15,56,220,233 +DB 102,15,56,220,241 +DB 102,15,56,220,249 +$L_aesni_encrypt6_enter:: + movups xmm1,XMMWORD PTR [ecx*1+edx] + add ecx,32 +DB 102,15,56,220,208 +DB 102,15,56,220,216 +DB 102,15,56,220,224 +DB 102,15,56,220,232 +DB 102,15,56,220,240 +DB 102,15,56,220,248 + movups xmm0,XMMWORD PTR [ecx*1+edx-16] + jnz $L008enc6_loop +DB 102,15,56,220,209 +DB 102,15,56,220,217 +DB 102,15,56,220,225 +DB 102,15,56,220,233 +DB 102,15,56,220,241 +DB 102,15,56,220,249 +DB 102,15,56,221,208 +DB 102,15,56,221,216 +DB 102,15,56,221,224 +DB 102,15,56,221,232 +DB 102,15,56,221,240 +DB 102,15,56,221,248 + ret +__aesni_encrypt6 ENDP +ALIGN 16 +__aesni_decrypt6 PROC PRIVATE + movups xmm0,XMMWORD PTR [edx] + shl ecx,4 + movups xmm1,XMMWORD PTR 16[edx] + xorps xmm2,xmm0 + pxor xmm3,xmm0 + pxor xmm4,xmm0 +DB 102,15,56,222,209 + pxor xmm5,xmm0 + pxor xmm6,xmm0 +DB 102,15,56,222,217 + lea edx,DWORD PTR 32[ecx*1+edx] + neg ecx +DB 102,15,56,222,225 + pxor xmm7,xmm0 + add ecx,16 +DB 102,15,56,222,233 +DB 102,15,56,222,241 +DB 102,15,56,222,249 + movups xmm0,XMMWORD PTR [ecx*1+edx-16] + jmp $L_aesni_decrypt6_enter +ALIGN 16 +$L009dec6_loop: +DB 102,15,56,222,209 +DB 102,15,56,222,217 +DB 102,15,56,222,225 +DB 102,15,56,222,233 +DB 102,15,56,222,241 +DB 102,15,56,222,249 +$L_aesni_decrypt6_enter:: + movups xmm1,XMMWORD PTR [ecx*1+edx] + add ecx,32 +DB 102,15,56,222,208 +DB 102,15,56,222,216 +DB 102,15,56,222,224 +DB 102,15,56,222,232 +DB 102,15,56,222,240 +DB 102,15,56,222,248 + movups xmm0,XMMWORD PTR [ecx*1+edx-16] + jnz $L009dec6_loop +DB 102,15,56,222,209 +DB 102,15,56,222,217 +DB 102,15,56,222,225 +DB 102,15,56,222,233 +DB 102,15,56,222,241 +DB 102,15,56,222,249 +DB 102,15,56,223,208 +DB 102,15,56,223,216 +DB 102,15,56,223,224 +DB 102,15,56,223,232 +DB 102,15,56,223,240 +DB 102,15,56,223,248 + ret +__aesni_decrypt6 ENDP +ALIGN 16 +_aesni_ecb_encrypt PROC PUBLIC +$L_aesni_ecb_encrypt_begin:: + push ebp + push ebx + push esi + push edi + mov esi,DWORD PTR 20[esp] + mov edi,DWORD PTR 24[esp] + mov eax,DWORD PTR 28[esp] + mov edx,DWORD PTR 32[esp] + mov ebx,DWORD PTR 36[esp] + and eax,-16 + jz $L010ecb_ret + mov ecx,DWORD PTR 240[edx] + test ebx,ebx + jz $L011ecb_decrypt + mov ebp,edx + mov ebx,ecx + cmp eax,96 + jb $L012ecb_enc_tail + movdqu xmm2,XMMWORD PTR [esi] + movdqu xmm3,XMMWORD PTR 16[esi] + movdqu xmm4,XMMWORD PTR 32[esi] + movdqu xmm5,XMMWORD PTR 48[esi] + movdqu xmm6,XMMWORD PTR 64[esi] + movdqu xmm7,XMMWORD PTR 80[esi] + lea esi,DWORD PTR 96[esi] + sub eax,96 + jmp $L013ecb_enc_loop6_enter +ALIGN 16 +$L014ecb_enc_loop6: + movups XMMWORD PTR [edi],xmm2 + movdqu xmm2,XMMWORD PTR [esi] + movups XMMWORD PTR 16[edi],xmm3 + movdqu xmm3,XMMWORD PTR 16[esi] + movups XMMWORD PTR 32[edi],xmm4 + movdqu xmm4,XMMWORD PTR 32[esi] + movups XMMWORD PTR 48[edi],xmm5 + movdqu xmm5,XMMWORD PTR 48[esi] + movups XMMWORD PTR 64[edi],xmm6 + movdqu xmm6,XMMWORD PTR 64[esi] + movups XMMWORD PTR 80[edi],xmm7 + lea edi,DWORD PTR 96[edi] + movdqu xmm7,XMMWORD PTR 80[esi] + lea esi,DWORD PTR 96[esi] +$L013ecb_enc_loop6_enter: + call __aesni_encrypt6 + mov edx,ebp + mov ecx,ebx + sub eax,96 + jnc $L014ecb_enc_loop6 + movups XMMWORD PTR [edi],xmm2 + movups XMMWORD PTR 16[edi],xmm3 + movups XMMWORD PTR 32[edi],xmm4 + movups XMMWORD PTR 48[edi],xmm5 + movups XMMWORD PTR 64[edi],xmm6 + movups XMMWORD PTR 80[edi],xmm7 + lea edi,DWORD PTR 96[edi] + add eax,96 + jz $L010ecb_ret +$L012ecb_enc_tail: + movups xmm2,XMMWORD PTR [esi] + cmp eax,32 + jb $L015ecb_enc_one + movups xmm3,XMMWORD PTR 16[esi] + je $L016ecb_enc_two + movups xmm4,XMMWORD PTR 32[esi] + cmp eax,64 + jb $L017ecb_enc_three + movups xmm5,XMMWORD PTR 48[esi] + je $L018ecb_enc_four + movups xmm6,XMMWORD PTR 64[esi] + xorps xmm7,xmm7 + call __aesni_encrypt6 + movups XMMWORD PTR [edi],xmm2 + movups XMMWORD PTR 16[edi],xmm3 + movups XMMWORD PTR 32[edi],xmm4 + movups XMMWORD PTR 48[edi],xmm5 + movups XMMWORD PTR 64[edi],xmm6 + jmp $L010ecb_ret +ALIGN 16 +$L015ecb_enc_one: + movups xmm0,XMMWORD PTR [edx] + movups xmm1,XMMWORD PTR 16[edx] + lea edx,DWORD PTR 32[edx] + xorps xmm2,xmm0 +$L019enc1_loop_3: +DB 102,15,56,220,209 + dec ecx + movups xmm1,XMMWORD PTR [edx] + lea edx,DWORD PTR 16[edx] + jnz $L019enc1_loop_3 +DB 102,15,56,221,209 + movups XMMWORD PTR [edi],xmm2 + jmp $L010ecb_ret +ALIGN 16 +$L016ecb_enc_two: + call __aesni_encrypt2 + movups XMMWORD PTR [edi],xmm2 + movups XMMWORD PTR 16[edi],xmm3 + jmp $L010ecb_ret +ALIGN 16 +$L017ecb_enc_three: + call __aesni_encrypt3 + movups XMMWORD PTR [edi],xmm2 + movups XMMWORD PTR 16[edi],xmm3 + movups XMMWORD PTR 32[edi],xmm4 + jmp $L010ecb_ret +ALIGN 16 +$L018ecb_enc_four: + call __aesni_encrypt4 + movups XMMWORD PTR [edi],xmm2 + movups XMMWORD PTR 16[edi],xmm3 + movups XMMWORD PTR 32[edi],xmm4 + movups XMMWORD PTR 48[edi],xmm5 + jmp $L010ecb_ret +ALIGN 16 +$L011ecb_decrypt: + mov ebp,edx + mov ebx,ecx + cmp eax,96 + jb $L020ecb_dec_tail + movdqu xmm2,XMMWORD PTR [esi] + movdqu xmm3,XMMWORD PTR 16[esi] + movdqu xmm4,XMMWORD PTR 32[esi] + movdqu xmm5,XMMWORD PTR 48[esi] + movdqu xmm6,XMMWORD PTR 64[esi] + movdqu xmm7,XMMWORD PTR 80[esi] + lea esi,DWORD PTR 96[esi] + sub eax,96 + jmp $L021ecb_dec_loop6_enter +ALIGN 16 +$L022ecb_dec_loop6: + movups XMMWORD PTR [edi],xmm2 + movdqu xmm2,XMMWORD PTR [esi] + movups XMMWORD PTR 16[edi],xmm3 + movdqu xmm3,XMMWORD PTR 16[esi] + movups XMMWORD PTR 32[edi],xmm4 + movdqu xmm4,XMMWORD PTR 32[esi] + movups XMMWORD PTR 48[edi],xmm5 + movdqu xmm5,XMMWORD PTR 48[esi] + movups XMMWORD PTR 64[edi],xmm6 + movdqu xmm6,XMMWORD PTR 64[esi] + movups XMMWORD PTR 80[edi],xmm7 + lea edi,DWORD PTR 96[edi] + movdqu xmm7,XMMWORD PTR 80[esi] + lea esi,DWORD PTR 96[esi] +$L021ecb_dec_loop6_enter: + call __aesni_decrypt6 + mov edx,ebp + mov ecx,ebx + sub eax,96 + jnc $L022ecb_dec_loop6 + movups XMMWORD PTR [edi],xmm2 + movups XMMWORD PTR 16[edi],xmm3 + movups XMMWORD PTR 32[edi],xmm4 + movups XMMWORD PTR 48[edi],xmm5 + movups XMMWORD PTR 64[edi],xmm6 + movups XMMWORD PTR 80[edi],xmm7 + lea edi,DWORD PTR 96[edi] + add eax,96 + jz $L010ecb_ret +$L020ecb_dec_tail: + movups xmm2,XMMWORD PTR [esi] + cmp eax,32 + jb $L023ecb_dec_one + movups xmm3,XMMWORD PTR 16[esi] + je $L024ecb_dec_two + movups xmm4,XMMWORD PTR 32[esi] + cmp eax,64 + jb $L025ecb_dec_three + movups xmm5,XMMWORD PTR 48[esi] + je $L026ecb_dec_four + movups xmm6,XMMWORD PTR 64[esi] + xorps xmm7,xmm7 + call __aesni_decrypt6 + movups XMMWORD PTR [edi],xmm2 + movups XMMWORD PTR 16[edi],xmm3 + movups XMMWORD PTR 32[edi],xmm4 + movups XMMWORD PTR 48[edi],xmm5 + movups XMMWORD PTR 64[edi],xmm6 + jmp $L010ecb_ret +ALIGN 16 +$L023ecb_dec_one: + movups xmm0,XMMWORD PTR [edx] + movups xmm1,XMMWORD PTR 16[edx] + lea edx,DWORD PTR 32[edx] + xorps xmm2,xmm0 +$L027dec1_loop_4: +DB 102,15,56,222,209 + dec ecx + movups xmm1,XMMWORD PTR [edx] + lea edx,DWORD PTR 16[edx] + jnz $L027dec1_loop_4 +DB 102,15,56,223,209 + movups XMMWORD PTR [edi],xmm2 + jmp $L010ecb_ret +ALIGN 16 +$L024ecb_dec_two: + call __aesni_decrypt2 + movups XMMWORD PTR [edi],xmm2 + movups XMMWORD PTR 16[edi],xmm3 + jmp $L010ecb_ret +ALIGN 16 +$L025ecb_dec_three: + call __aesni_decrypt3 + movups XMMWORD PTR [edi],xmm2 + movups XMMWORD PTR 16[edi],xmm3 + movups XMMWORD PTR 32[edi],xmm4 + jmp $L010ecb_ret +ALIGN 16 +$L026ecb_dec_four: + call __aesni_decrypt4 + movups XMMWORD PTR [edi],xmm2 + movups XMMWORD PTR 16[edi],xmm3 + movups XMMWORD PTR 32[edi],xmm4 + movups XMMWORD PTR 48[edi],xmm5 +$L010ecb_ret: + pop edi + pop esi + pop ebx + pop ebp + ret +_aesni_ecb_encrypt ENDP +ALIGN 16 +_aesni_ccm64_encrypt_blocks PROC PUBLIC +$L_aesni_ccm64_encrypt_blocks_begin:: + push ebp + push ebx + push esi + push edi + mov esi,DWORD PTR 20[esp] + mov edi,DWORD PTR 24[esp] + mov eax,DWORD PTR 28[esp] + mov edx,DWORD PTR 32[esp] + mov ebx,DWORD PTR 36[esp] + mov ecx,DWORD PTR 40[esp] + mov ebp,esp + sub esp,60 + and esp,-16 + mov DWORD PTR 48[esp],ebp + movdqu xmm7,XMMWORD PTR [ebx] + movdqu xmm3,XMMWORD PTR [ecx] + mov ecx,DWORD PTR 240[edx] + mov DWORD PTR [esp],202182159 + mov DWORD PTR 4[esp],134810123 + mov DWORD PTR 8[esp],67438087 + mov DWORD PTR 12[esp],66051 + mov ebx,1 + xor ebp,ebp + mov DWORD PTR 16[esp],ebx + mov DWORD PTR 20[esp],ebp + mov DWORD PTR 24[esp],ebp + mov DWORD PTR 28[esp],ebp + shl ecx,4 + mov ebx,16 + lea ebp,DWORD PTR [edx] + movdqa xmm5,XMMWORD PTR [esp] + movdqa xmm2,xmm7 + lea edx,DWORD PTR 32[ecx*1+edx] + sub ebx,ecx +DB 102,15,56,0,253 +$L028ccm64_enc_outer: + movups xmm0,XMMWORD PTR [ebp] + mov ecx,ebx + movups xmm6,XMMWORD PTR [esi] + xorps xmm2,xmm0 + movups xmm1,XMMWORD PTR 16[ebp] + xorps xmm0,xmm6 + xorps xmm3,xmm0 + movups xmm0,XMMWORD PTR 32[ebp] +$L029ccm64_enc2_loop: +DB 102,15,56,220,209 +DB 102,15,56,220,217 + movups xmm1,XMMWORD PTR [ecx*1+edx] + add ecx,32 +DB 102,15,56,220,208 +DB 102,15,56,220,216 + movups xmm0,XMMWORD PTR [ecx*1+edx-16] + jnz $L029ccm64_enc2_loop +DB 102,15,56,220,209 +DB 102,15,56,220,217 + paddq xmm7,XMMWORD PTR 16[esp] + dec eax +DB 102,15,56,221,208 +DB 102,15,56,221,216 + lea esi,DWORD PTR 16[esi] + xorps xmm6,xmm2 + movdqa xmm2,xmm7 + movups XMMWORD PTR [edi],xmm6 +DB 102,15,56,0,213 + lea edi,DWORD PTR 16[edi] + jnz $L028ccm64_enc_outer + mov esp,DWORD PTR 48[esp] + mov edi,DWORD PTR 40[esp] + movups XMMWORD PTR [edi],xmm3 + pop edi + pop esi + pop ebx + pop ebp + ret +_aesni_ccm64_encrypt_blocks ENDP +ALIGN 16 +_aesni_ccm64_decrypt_blocks PROC PUBLIC +$L_aesni_ccm64_decrypt_blocks_begin:: + push ebp + push ebx + push esi + push edi + mov esi,DWORD PTR 20[esp] + mov edi,DWORD PTR 24[esp] + mov eax,DWORD PTR 28[esp] + mov edx,DWORD PTR 32[esp] + mov ebx,DWORD PTR 36[esp] + mov ecx,DWORD PTR 40[esp] + mov ebp,esp + sub esp,60 + and esp,-16 + mov DWORD PTR 48[esp],ebp + movdqu xmm7,XMMWORD PTR [ebx] + movdqu xmm3,XMMWORD PTR [ecx] + mov ecx,DWORD PTR 240[edx] + mov DWORD PTR [esp],202182159 + mov DWORD PTR 4[esp],134810123 + mov DWORD PTR 8[esp],67438087 + mov DWORD PTR 12[esp],66051 + mov ebx,1 + xor ebp,ebp + mov DWORD PTR 16[esp],ebx + mov DWORD PTR 20[esp],ebp + mov DWORD PTR 24[esp],ebp + mov DWORD PTR 28[esp],ebp + movdqa xmm5,XMMWORD PTR [esp] + movdqa xmm2,xmm7 + mov ebp,edx + mov ebx,ecx +DB 102,15,56,0,253 + movups xmm0,XMMWORD PTR [edx] + movups xmm1,XMMWORD PTR 16[edx] + lea edx,DWORD PTR 32[edx] + xorps xmm2,xmm0 +$L030enc1_loop_5: +DB 102,15,56,220,209 + dec ecx + movups xmm1,XMMWORD PTR [edx] + lea edx,DWORD PTR 16[edx] + jnz $L030enc1_loop_5 +DB 102,15,56,221,209 + shl ebx,4 + mov ecx,16 + movups xmm6,XMMWORD PTR [esi] + paddq xmm7,XMMWORD PTR 16[esp] + lea esi,QWORD PTR 16[esi] + sub ecx,ebx + lea edx,DWORD PTR 32[ebx*1+ebp] + mov ebx,ecx + jmp $L031ccm64_dec_outer +ALIGN 16 +$L031ccm64_dec_outer: + xorps xmm6,xmm2 + movdqa xmm2,xmm7 + movups XMMWORD PTR [edi],xmm6 + lea edi,DWORD PTR 16[edi] +DB 102,15,56,0,213 + sub eax,1 + jz $L032ccm64_dec_break + movups xmm0,XMMWORD PTR [ebp] + mov ecx,ebx + movups xmm1,XMMWORD PTR 16[ebp] + xorps xmm6,xmm0 + xorps xmm2,xmm0 + xorps xmm3,xmm6 + movups xmm0,XMMWORD PTR 32[ebp] +$L033ccm64_dec2_loop: +DB 102,15,56,220,209 +DB 102,15,56,220,217 + movups xmm1,XMMWORD PTR [ecx*1+edx] + add ecx,32 +DB 102,15,56,220,208 +DB 102,15,56,220,216 + movups xmm0,XMMWORD PTR [ecx*1+edx-16] + jnz $L033ccm64_dec2_loop + movups xmm6,XMMWORD PTR [esi] + paddq xmm7,XMMWORD PTR 16[esp] +DB 102,15,56,220,209 +DB 102,15,56,220,217 +DB 102,15,56,221,208 +DB 102,15,56,221,216 + lea esi,QWORD PTR 16[esi] + jmp $L031ccm64_dec_outer +ALIGN 16 +$L032ccm64_dec_break: + mov ecx,DWORD PTR 240[ebp] + mov edx,ebp + movups xmm0,XMMWORD PTR [edx] + movups xmm1,XMMWORD PTR 16[edx] + xorps xmm6,xmm0 + lea edx,DWORD PTR 32[edx] + xorps xmm3,xmm6 +$L034enc1_loop_6: +DB 102,15,56,220,217 + dec ecx + movups xmm1,XMMWORD PTR [edx] + lea edx,DWORD PTR 16[edx] + jnz $L034enc1_loop_6 +DB 102,15,56,221,217 + mov esp,DWORD PTR 48[esp] + mov edi,DWORD PTR 40[esp] + movups XMMWORD PTR [edi],xmm3 + pop edi + pop esi + pop ebx + pop ebp + ret +_aesni_ccm64_decrypt_blocks ENDP +ALIGN 16 +_aesni_ctr32_encrypt_blocks PROC PUBLIC +$L_aesni_ctr32_encrypt_blocks_begin:: + push ebp + push ebx + push esi + push edi + mov esi,DWORD PTR 20[esp] + mov edi,DWORD PTR 24[esp] + mov eax,DWORD PTR 28[esp] + mov edx,DWORD PTR 32[esp] + mov ebx,DWORD PTR 36[esp] + mov ebp,esp + sub esp,88 + and esp,-16 + mov DWORD PTR 80[esp],ebp + cmp eax,1 + je $L035ctr32_one_shortcut + movdqu xmm7,XMMWORD PTR [ebx] + mov DWORD PTR [esp],202182159 + mov DWORD PTR 4[esp],134810123 + mov DWORD PTR 8[esp],67438087 + mov DWORD PTR 12[esp],66051 + mov ecx,6 + xor ebp,ebp + mov DWORD PTR 16[esp],ecx + mov DWORD PTR 20[esp],ecx + mov DWORD PTR 24[esp],ecx + mov DWORD PTR 28[esp],ebp +DB 102,15,58,22,251,3 +DB 102,15,58,34,253,3 + mov ecx,DWORD PTR 240[edx] + bswap ebx + pxor xmm0,xmm0 + pxor xmm1,xmm1 + movdqa xmm2,XMMWORD PTR [esp] +DB 102,15,58,34,195,0 + lea ebp,DWORD PTR 3[ebx] +DB 102,15,58,34,205,0 + inc ebx +DB 102,15,58,34,195,1 + inc ebp +DB 102,15,58,34,205,1 + inc ebx +DB 102,15,58,34,195,2 + inc ebp +DB 102,15,58,34,205,2 + movdqa XMMWORD PTR 48[esp],xmm0 +DB 102,15,56,0,194 + movdqu xmm6,XMMWORD PTR [edx] + movdqa XMMWORD PTR 64[esp],xmm1 +DB 102,15,56,0,202 + pshufd xmm2,xmm0,192 + pshufd xmm3,xmm0,128 + cmp eax,6 + jb $L036ctr32_tail + pxor xmm7,xmm6 + shl ecx,4 + mov ebx,16 + movdqa XMMWORD PTR 32[esp],xmm7 + mov ebp,edx + sub ebx,ecx + lea edx,DWORD PTR 32[ecx*1+edx] + sub eax,6 + jmp $L037ctr32_loop6 +ALIGN 16 +$L037ctr32_loop6: + pshufd xmm4,xmm0,64 + movdqa xmm0,XMMWORD PTR 32[esp] + pshufd xmm5,xmm1,192 + pxor xmm2,xmm0 + pshufd xmm6,xmm1,128 + pxor xmm3,xmm0 + pshufd xmm7,xmm1,64 + movups xmm1,XMMWORD PTR 16[ebp] + pxor xmm4,xmm0 + pxor xmm5,xmm0 +DB 102,15,56,220,209 + pxor xmm6,xmm0 + pxor xmm7,xmm0 +DB 102,15,56,220,217 + movups xmm0,XMMWORD PTR 32[ebp] + mov ecx,ebx +DB 102,15,56,220,225 +DB 102,15,56,220,233 +DB 102,15,56,220,241 +DB 102,15,56,220,249 + call $L_aesni_encrypt6_enter + movups xmm1,XMMWORD PTR [esi] + movups xmm0,XMMWORD PTR 16[esi] + xorps xmm2,xmm1 + movups xmm1,XMMWORD PTR 32[esi] + xorps xmm3,xmm0 + movups XMMWORD PTR [edi],xmm2 + movdqa xmm0,XMMWORD PTR 16[esp] + xorps xmm4,xmm1 + movdqa xmm1,XMMWORD PTR 64[esp] + movups XMMWORD PTR 16[edi],xmm3 + movups XMMWORD PTR 32[edi],xmm4 + paddd xmm1,xmm0 + paddd xmm0,XMMWORD PTR 48[esp] + movdqa xmm2,XMMWORD PTR [esp] + movups xmm3,XMMWORD PTR 48[esi] + movups xmm4,XMMWORD PTR 64[esi] + xorps xmm5,xmm3 + movups xmm3,XMMWORD PTR 80[esi] + lea esi,DWORD PTR 96[esi] + movdqa XMMWORD PTR 48[esp],xmm0 +DB 102,15,56,0,194 + xorps xmm6,xmm4 + movups XMMWORD PTR 48[edi],xmm5 + xorps xmm7,xmm3 + movdqa XMMWORD PTR 64[esp],xmm1 +DB 102,15,56,0,202 + movups XMMWORD PTR 64[edi],xmm6 + pshufd xmm2,xmm0,192 + movups XMMWORD PTR 80[edi],xmm7 + lea edi,DWORD PTR 96[edi] + pshufd xmm3,xmm0,128 + sub eax,6 + jnc $L037ctr32_loop6 + add eax,6 + jz $L038ctr32_ret + movdqu xmm7,XMMWORD PTR [ebp] + mov edx,ebp + pxor xmm7,XMMWORD PTR 32[esp] + mov ecx,DWORD PTR 240[ebp] +$L036ctr32_tail: + por xmm2,xmm7 + cmp eax,2 + jb $L039ctr32_one + pshufd xmm4,xmm0,64 + por xmm3,xmm7 + je $L040ctr32_two + pshufd xmm5,xmm1,192 + por xmm4,xmm7 + cmp eax,4 + jb $L041ctr32_three + pshufd xmm6,xmm1,128 + por xmm5,xmm7 + je $L042ctr32_four + por xmm6,xmm7 + call __aesni_encrypt6 + movups xmm1,XMMWORD PTR [esi] + movups xmm0,XMMWORD PTR 16[esi] + xorps xmm2,xmm1 + movups xmm1,XMMWORD PTR 32[esi] + xorps xmm3,xmm0 + movups xmm0,XMMWORD PTR 48[esi] + xorps xmm4,xmm1 + movups xmm1,XMMWORD PTR 64[esi] + xorps xmm5,xmm0 + movups XMMWORD PTR [edi],xmm2 + xorps xmm6,xmm1 + movups XMMWORD PTR 16[edi],xmm3 + movups XMMWORD PTR 32[edi],xmm4 + movups XMMWORD PTR 48[edi],xmm5 + movups XMMWORD PTR 64[edi],xmm6 + jmp $L038ctr32_ret +ALIGN 16 +$L035ctr32_one_shortcut: + movups xmm2,XMMWORD PTR [ebx] + mov ecx,DWORD PTR 240[edx] +$L039ctr32_one: + movups xmm0,XMMWORD PTR [edx] + movups xmm1,XMMWORD PTR 16[edx] + lea edx,DWORD PTR 32[edx] + xorps xmm2,xmm0 +$L043enc1_loop_7: +DB 102,15,56,220,209 + dec ecx + movups xmm1,XMMWORD PTR [edx] + lea edx,DWORD PTR 16[edx] + jnz $L043enc1_loop_7 +DB 102,15,56,221,209 + movups xmm6,XMMWORD PTR [esi] + xorps xmm6,xmm2 + movups XMMWORD PTR [edi],xmm6 + jmp $L038ctr32_ret +ALIGN 16 +$L040ctr32_two: + call __aesni_encrypt2 + movups xmm5,XMMWORD PTR [esi] + movups xmm6,XMMWORD PTR 16[esi] + xorps xmm2,xmm5 + xorps xmm3,xmm6 + movups XMMWORD PTR [edi],xmm2 + movups XMMWORD PTR 16[edi],xmm3 + jmp $L038ctr32_ret +ALIGN 16 +$L041ctr32_three: + call __aesni_encrypt3 + movups xmm5,XMMWORD PTR [esi] + movups xmm6,XMMWORD PTR 16[esi] + xorps xmm2,xmm5 + movups xmm7,XMMWORD PTR 32[esi] + xorps xmm3,xmm6 + movups XMMWORD PTR [edi],xmm2 + xorps xmm4,xmm7 + movups XMMWORD PTR 16[edi],xmm3 + movups XMMWORD PTR 32[edi],xmm4 + jmp $L038ctr32_ret +ALIGN 16 +$L042ctr32_four: + call __aesni_encrypt4 + movups xmm6,XMMWORD PTR [esi] + movups xmm7,XMMWORD PTR 16[esi] + movups xmm1,XMMWORD PTR 32[esi] + xorps xmm2,xmm6 + movups xmm0,XMMWORD PTR 48[esi] + xorps xmm3,xmm7 + movups XMMWORD PTR [edi],xmm2 + xorps xmm4,xmm1 + movups XMMWORD PTR 16[edi],xmm3 + xorps xmm5,xmm0 + movups XMMWORD PTR 32[edi],xmm4 + movups XMMWORD PTR 48[edi],xmm5 +$L038ctr32_ret: + mov esp,DWORD PTR 80[esp] + pop edi + pop esi + pop ebx + pop ebp + ret +_aesni_ctr32_encrypt_blocks ENDP +ALIGN 16 +_aesni_xts_encrypt PROC PUBLIC +$L_aesni_xts_encrypt_begin:: + push ebp + push ebx + push esi + push edi + mov edx,DWORD PTR 36[esp] + mov esi,DWORD PTR 40[esp] + mov ecx,DWORD PTR 240[edx] + movups xmm2,XMMWORD PTR [esi] + movups xmm0,XMMWORD PTR [edx] + movups xmm1,XMMWORD PTR 16[edx] + lea edx,DWORD PTR 32[edx] + xorps xmm2,xmm0 +$L044enc1_loop_8: +DB 102,15,56,220,209 + dec ecx + movups xmm1,XMMWORD PTR [edx] + lea edx,DWORD PTR 16[edx] + jnz $L044enc1_loop_8 +DB 102,15,56,221,209 + mov esi,DWORD PTR 20[esp] + mov edi,DWORD PTR 24[esp] + mov eax,DWORD PTR 28[esp] + mov edx,DWORD PTR 32[esp] + mov ebp,esp + sub esp,120 + mov ecx,DWORD PTR 240[edx] + and esp,-16 + mov DWORD PTR 96[esp],135 + mov DWORD PTR 100[esp],0 + mov DWORD PTR 104[esp],1 + mov DWORD PTR 108[esp],0 + mov DWORD PTR 112[esp],eax + mov DWORD PTR 116[esp],ebp + movdqa xmm1,xmm2 + pxor xmm0,xmm0 + movdqa xmm3,XMMWORD PTR 96[esp] + pcmpgtd xmm0,xmm1 + and eax,-16 + mov ebp,edx + mov ebx,ecx + sub eax,96 + jc $L045xts_enc_short + shl ecx,4 + mov ebx,16 + sub ebx,ecx + lea edx,DWORD PTR 32[ecx*1+edx] + jmp $L046xts_enc_loop6 +ALIGN 16 +$L046xts_enc_loop6: + pshufd xmm2,xmm0,19 + pxor xmm0,xmm0 + movdqa XMMWORD PTR [esp],xmm1 + paddq xmm1,xmm1 + pand xmm2,xmm3 + pcmpgtd xmm0,xmm1 + pxor xmm1,xmm2 + pshufd xmm2,xmm0,19 + pxor xmm0,xmm0 + movdqa XMMWORD PTR 16[esp],xmm1 + paddq xmm1,xmm1 + pand xmm2,xmm3 + pcmpgtd xmm0,xmm1 + pxor xmm1,xmm2 + pshufd xmm2,xmm0,19 + pxor xmm0,xmm0 + movdqa XMMWORD PTR 32[esp],xmm1 + paddq xmm1,xmm1 + pand xmm2,xmm3 + pcmpgtd xmm0,xmm1 + pxor xmm1,xmm2 + pshufd xmm2,xmm0,19 + pxor xmm0,xmm0 + movdqa XMMWORD PTR 48[esp],xmm1 + paddq xmm1,xmm1 + pand xmm2,xmm3 + pcmpgtd xmm0,xmm1 + pxor xmm1,xmm2 + pshufd xmm7,xmm0,19 + movdqa XMMWORD PTR 64[esp],xmm1 + paddq xmm1,xmm1 + movups xmm0,XMMWORD PTR [ebp] + pand xmm7,xmm3 + movups xmm2,XMMWORD PTR [esi] + pxor xmm7,xmm1 + mov ecx,ebx + movdqu xmm3,XMMWORD PTR 16[esi] + xorps xmm2,xmm0 + movdqu xmm4,XMMWORD PTR 32[esi] + pxor xmm3,xmm0 + movdqu xmm5,XMMWORD PTR 48[esi] + pxor xmm4,xmm0 + movdqu xmm6,XMMWORD PTR 64[esi] + pxor xmm5,xmm0 + movdqu xmm1,XMMWORD PTR 80[esi] + pxor xmm6,xmm0 + lea esi,DWORD PTR 96[esi] + pxor xmm2,XMMWORD PTR [esp] + movdqa XMMWORD PTR 80[esp],xmm7 + pxor xmm7,xmm1 + movups xmm1,XMMWORD PTR 16[ebp] + pxor xmm3,XMMWORD PTR 16[esp] + pxor xmm4,XMMWORD PTR 32[esp] +DB 102,15,56,220,209 + pxor xmm5,XMMWORD PTR 48[esp] + pxor xmm6,XMMWORD PTR 64[esp] +DB 102,15,56,220,217 + pxor xmm7,xmm0 + movups xmm0,XMMWORD PTR 32[ebp] +DB 102,15,56,220,225 +DB 102,15,56,220,233 +DB 102,15,56,220,241 +DB 102,15,56,220,249 + call $L_aesni_encrypt6_enter + movdqa xmm1,XMMWORD PTR 80[esp] + pxor xmm0,xmm0 + xorps xmm2,XMMWORD PTR [esp] + pcmpgtd xmm0,xmm1 + xorps xmm3,XMMWORD PTR 16[esp] + movups XMMWORD PTR [edi],xmm2 + xorps xmm4,XMMWORD PTR 32[esp] + movups XMMWORD PTR 16[edi],xmm3 + xorps xmm5,XMMWORD PTR 48[esp] + movups XMMWORD PTR 32[edi],xmm4 + xorps xmm6,XMMWORD PTR 64[esp] + movups XMMWORD PTR 48[edi],xmm5 + xorps xmm7,xmm1 + movups XMMWORD PTR 64[edi],xmm6 + pshufd xmm2,xmm0,19 + movups XMMWORD PTR 80[edi],xmm7 + lea edi,DWORD PTR 96[edi] + movdqa xmm3,XMMWORD PTR 96[esp] + pxor xmm0,xmm0 + paddq xmm1,xmm1 + pand xmm2,xmm3 + pcmpgtd xmm0,xmm1 + pxor xmm1,xmm2 + sub eax,96 + jnc $L046xts_enc_loop6 + mov ecx,DWORD PTR 240[ebp] + mov edx,ebp + mov ebx,ecx +$L045xts_enc_short: + add eax,96 + jz $L047xts_enc_done6x + movdqa xmm5,xmm1 + cmp eax,32 + jb $L048xts_enc_one + pshufd xmm2,xmm0,19 + pxor xmm0,xmm0 + paddq xmm1,xmm1 + pand xmm2,xmm3 + pcmpgtd xmm0,xmm1 + pxor xmm1,xmm2 + je $L049xts_enc_two + pshufd xmm2,xmm0,19 + pxor xmm0,xmm0 + movdqa xmm6,xmm1 + paddq xmm1,xmm1 + pand xmm2,xmm3 + pcmpgtd xmm0,xmm1 + pxor xmm1,xmm2 + cmp eax,64 + jb $L050xts_enc_three + pshufd xmm2,xmm0,19 + pxor xmm0,xmm0 + movdqa xmm7,xmm1 + paddq xmm1,xmm1 + pand xmm2,xmm3 + pcmpgtd xmm0,xmm1 + pxor xmm1,xmm2 + movdqa XMMWORD PTR [esp],xmm5 + movdqa XMMWORD PTR 16[esp],xmm6 + je $L051xts_enc_four + movdqa XMMWORD PTR 32[esp],xmm7 + pshufd xmm7,xmm0,19 + movdqa XMMWORD PTR 48[esp],xmm1 + paddq xmm1,xmm1 + pand xmm7,xmm3 + pxor xmm7,xmm1 + movdqu xmm2,XMMWORD PTR [esi] + movdqu xmm3,XMMWORD PTR 16[esi] + movdqu xmm4,XMMWORD PTR 32[esi] + pxor xmm2,XMMWORD PTR [esp] + movdqu xmm5,XMMWORD PTR 48[esi] + pxor xmm3,XMMWORD PTR 16[esp] + movdqu xmm6,XMMWORD PTR 64[esi] + pxor xmm4,XMMWORD PTR 32[esp] + lea esi,DWORD PTR 80[esi] + pxor xmm5,XMMWORD PTR 48[esp] + movdqa XMMWORD PTR 64[esp],xmm7 + pxor xmm6,xmm7 + call __aesni_encrypt6 + movaps xmm1,XMMWORD PTR 64[esp] + xorps xmm2,XMMWORD PTR [esp] + xorps xmm3,XMMWORD PTR 16[esp] + xorps xmm4,XMMWORD PTR 32[esp] + movups XMMWORD PTR [edi],xmm2 + xorps xmm5,XMMWORD PTR 48[esp] + movups XMMWORD PTR 16[edi],xmm3 + xorps xmm6,xmm1 + movups XMMWORD PTR 32[edi],xmm4 + movups XMMWORD PTR 48[edi],xmm5 + movups XMMWORD PTR 64[edi],xmm6 + lea edi,DWORD PTR 80[edi] + jmp $L052xts_enc_done +ALIGN 16 +$L048xts_enc_one: + movups xmm2,XMMWORD PTR [esi] + lea esi,DWORD PTR 16[esi] + xorps xmm2,xmm5 + movups xmm0,XMMWORD PTR [edx] + movups xmm1,XMMWORD PTR 16[edx] + lea edx,DWORD PTR 32[edx] + xorps xmm2,xmm0 +$L053enc1_loop_9: +DB 102,15,56,220,209 + dec ecx + movups xmm1,XMMWORD PTR [edx] + lea edx,DWORD PTR 16[edx] + jnz $L053enc1_loop_9 +DB 102,15,56,221,209 + xorps xmm2,xmm5 + movups XMMWORD PTR [edi],xmm2 + lea edi,DWORD PTR 16[edi] + movdqa xmm1,xmm5 + jmp $L052xts_enc_done +ALIGN 16 +$L049xts_enc_two: + movaps xmm6,xmm1 + movups xmm2,XMMWORD PTR [esi] + movups xmm3,XMMWORD PTR 16[esi] + lea esi,DWORD PTR 32[esi] + xorps xmm2,xmm5 + xorps xmm3,xmm6 + call __aesni_encrypt2 + xorps xmm2,xmm5 + xorps xmm3,xmm6 + movups XMMWORD PTR [edi],xmm2 + movups XMMWORD PTR 16[edi],xmm3 + lea edi,DWORD PTR 32[edi] + movdqa xmm1,xmm6 + jmp $L052xts_enc_done +ALIGN 16 +$L050xts_enc_three: + movaps xmm7,xmm1 + movups xmm2,XMMWORD PTR [esi] + movups xmm3,XMMWORD PTR 16[esi] + movups xmm4,XMMWORD PTR 32[esi] + lea esi,DWORD PTR 48[esi] + xorps xmm2,xmm5 + xorps xmm3,xmm6 + xorps xmm4,xmm7 + call __aesni_encrypt3 + xorps xmm2,xmm5 + xorps xmm3,xmm6 + xorps xmm4,xmm7 + movups XMMWORD PTR [edi],xmm2 + movups XMMWORD PTR 16[edi],xmm3 + movups XMMWORD PTR 32[edi],xmm4 + lea edi,DWORD PTR 48[edi] + movdqa xmm1,xmm7 + jmp $L052xts_enc_done +ALIGN 16 +$L051xts_enc_four: + movaps xmm6,xmm1 + movups xmm2,XMMWORD PTR [esi] + movups xmm3,XMMWORD PTR 16[esi] + movups xmm4,XMMWORD PTR 32[esi] + xorps xmm2,XMMWORD PTR [esp] + movups xmm5,XMMWORD PTR 48[esi] + lea esi,DWORD PTR 64[esi] + xorps xmm3,XMMWORD PTR 16[esp] + xorps xmm4,xmm7 + xorps xmm5,xmm6 + call __aesni_encrypt4 + xorps xmm2,XMMWORD PTR [esp] + xorps xmm3,XMMWORD PTR 16[esp] + xorps xmm4,xmm7 + movups XMMWORD PTR [edi],xmm2 + xorps xmm5,xmm6 + movups XMMWORD PTR 16[edi],xmm3 + movups XMMWORD PTR 32[edi],xmm4 + movups XMMWORD PTR 48[edi],xmm5 + lea edi,DWORD PTR 64[edi] + movdqa xmm1,xmm6 + jmp $L052xts_enc_done +ALIGN 16 +$L047xts_enc_done6x: + mov eax,DWORD PTR 112[esp] + and eax,15 + jz $L054xts_enc_ret + movdqa xmm5,xmm1 + mov DWORD PTR 112[esp],eax + jmp $L055xts_enc_steal +ALIGN 16 +$L052xts_enc_done: + mov eax,DWORD PTR 112[esp] + pxor xmm0,xmm0 + and eax,15 + jz $L054xts_enc_ret + pcmpgtd xmm0,xmm1 + mov DWORD PTR 112[esp],eax + pshufd xmm5,xmm0,19 + paddq xmm1,xmm1 + pand xmm5,XMMWORD PTR 96[esp] + pxor xmm5,xmm1 +$L055xts_enc_steal: + movzx ecx,BYTE PTR [esi] + movzx edx,BYTE PTR [edi-16] + lea esi,DWORD PTR 1[esi] + mov BYTE PTR [edi-16],cl + mov BYTE PTR [edi],dl + lea edi,DWORD PTR 1[edi] + sub eax,1 + jnz $L055xts_enc_steal + sub edi,DWORD PTR 112[esp] + mov edx,ebp + mov ecx,ebx + movups xmm2,XMMWORD PTR [edi-16] + xorps xmm2,xmm5 + movups xmm0,XMMWORD PTR [edx] + movups xmm1,XMMWORD PTR 16[edx] + lea edx,DWORD PTR 32[edx] + xorps xmm2,xmm0 +$L056enc1_loop_10: +DB 102,15,56,220,209 + dec ecx + movups xmm1,XMMWORD PTR [edx] + lea edx,DWORD PTR 16[edx] + jnz $L056enc1_loop_10 +DB 102,15,56,221,209 + xorps xmm2,xmm5 + movups XMMWORD PTR [edi-16],xmm2 +$L054xts_enc_ret: + mov esp,DWORD PTR 116[esp] + pop edi + pop esi + pop ebx + pop ebp + ret +_aesni_xts_encrypt ENDP +ALIGN 16 +_aesni_xts_decrypt PROC PUBLIC +$L_aesni_xts_decrypt_begin:: + push ebp + push ebx + push esi + push edi + mov edx,DWORD PTR 36[esp] + mov esi,DWORD PTR 40[esp] + mov ecx,DWORD PTR 240[edx] + movups xmm2,XMMWORD PTR [esi] + movups xmm0,XMMWORD PTR [edx] + movups xmm1,XMMWORD PTR 16[edx] + lea edx,DWORD PTR 32[edx] + xorps xmm2,xmm0 +$L057enc1_loop_11: +DB 102,15,56,220,209 + dec ecx + movups xmm1,XMMWORD PTR [edx] + lea edx,DWORD PTR 16[edx] + jnz $L057enc1_loop_11 +DB 102,15,56,221,209 + mov esi,DWORD PTR 20[esp] + mov edi,DWORD PTR 24[esp] + mov eax,DWORD PTR 28[esp] + mov edx,DWORD PTR 32[esp] + mov ebp,esp + sub esp,120 + and esp,-16 + xor ebx,ebx + test eax,15 + setnz bl + shl ebx,4 + sub eax,ebx + mov DWORD PTR 96[esp],135 + mov DWORD PTR 100[esp],0 + mov DWORD PTR 104[esp],1 + mov DWORD PTR 108[esp],0 + mov DWORD PTR 112[esp],eax + mov DWORD PTR 116[esp],ebp + mov ecx,DWORD PTR 240[edx] + mov ebp,edx + mov ebx,ecx + movdqa xmm1,xmm2 + pxor xmm0,xmm0 + movdqa xmm3,XMMWORD PTR 96[esp] + pcmpgtd xmm0,xmm1 + and eax,-16 + sub eax,96 + jc $L058xts_dec_short + shl ecx,4 + mov ebx,16 + sub ebx,ecx + lea edx,DWORD PTR 32[ecx*1+edx] + jmp $L059xts_dec_loop6 +ALIGN 16 +$L059xts_dec_loop6: + pshufd xmm2,xmm0,19 + pxor xmm0,xmm0 + movdqa XMMWORD PTR [esp],xmm1 + paddq xmm1,xmm1 + pand xmm2,xmm3 + pcmpgtd xmm0,xmm1 + pxor xmm1,xmm2 + pshufd xmm2,xmm0,19 + pxor xmm0,xmm0 + movdqa XMMWORD PTR 16[esp],xmm1 + paddq xmm1,xmm1 + pand xmm2,xmm3 + pcmpgtd xmm0,xmm1 + pxor xmm1,xmm2 + pshufd xmm2,xmm0,19 + pxor xmm0,xmm0 + movdqa XMMWORD PTR 32[esp],xmm1 + paddq xmm1,xmm1 + pand xmm2,xmm3 + pcmpgtd xmm0,xmm1 + pxor xmm1,xmm2 + pshufd xmm2,xmm0,19 + pxor xmm0,xmm0 + movdqa XMMWORD PTR 48[esp],xmm1 + paddq xmm1,xmm1 + pand xmm2,xmm3 + pcmpgtd xmm0,xmm1 + pxor xmm1,xmm2 + pshufd xmm7,xmm0,19 + movdqa XMMWORD PTR 64[esp],xmm1 + paddq xmm1,xmm1 + movups xmm0,XMMWORD PTR [ebp] + pand xmm7,xmm3 + movups xmm2,XMMWORD PTR [esi] + pxor xmm7,xmm1 + mov ecx,ebx + movdqu xmm3,XMMWORD PTR 16[esi] + xorps xmm2,xmm0 + movdqu xmm4,XMMWORD PTR 32[esi] + pxor xmm3,xmm0 + movdqu xmm5,XMMWORD PTR 48[esi] + pxor xmm4,xmm0 + movdqu xmm6,XMMWORD PTR 64[esi] + pxor xmm5,xmm0 + movdqu xmm1,XMMWORD PTR 80[esi] + pxor xmm6,xmm0 + lea esi,DWORD PTR 96[esi] + pxor xmm2,XMMWORD PTR [esp] + movdqa XMMWORD PTR 80[esp],xmm7 + pxor xmm7,xmm1 + movups xmm1,XMMWORD PTR 16[ebp] + pxor xmm3,XMMWORD PTR 16[esp] + pxor xmm4,XMMWORD PTR 32[esp] +DB 102,15,56,222,209 + pxor xmm5,XMMWORD PTR 48[esp] + pxor xmm6,XMMWORD PTR 64[esp] +DB 102,15,56,222,217 + pxor xmm7,xmm0 + movups xmm0,XMMWORD PTR 32[ebp] +DB 102,15,56,222,225 +DB 102,15,56,222,233 +DB 102,15,56,222,241 +DB 102,15,56,222,249 + call $L_aesni_decrypt6_enter + movdqa xmm1,XMMWORD PTR 80[esp] + pxor xmm0,xmm0 + xorps xmm2,XMMWORD PTR [esp] + pcmpgtd xmm0,xmm1 + xorps xmm3,XMMWORD PTR 16[esp] + movups XMMWORD PTR [edi],xmm2 + xorps xmm4,XMMWORD PTR 32[esp] + movups XMMWORD PTR 16[edi],xmm3 + xorps xmm5,XMMWORD PTR 48[esp] + movups XMMWORD PTR 32[edi],xmm4 + xorps xmm6,XMMWORD PTR 64[esp] + movups XMMWORD PTR 48[edi],xmm5 + xorps xmm7,xmm1 + movups XMMWORD PTR 64[edi],xmm6 + pshufd xmm2,xmm0,19 + movups XMMWORD PTR 80[edi],xmm7 + lea edi,DWORD PTR 96[edi] + movdqa xmm3,XMMWORD PTR 96[esp] + pxor xmm0,xmm0 + paddq xmm1,xmm1 + pand xmm2,xmm3 + pcmpgtd xmm0,xmm1 + pxor xmm1,xmm2 + sub eax,96 + jnc $L059xts_dec_loop6 + mov ecx,DWORD PTR 240[ebp] + mov edx,ebp + mov ebx,ecx +$L058xts_dec_short: + add eax,96 + jz $L060xts_dec_done6x + movdqa xmm5,xmm1 + cmp eax,32 + jb $L061xts_dec_one + pshufd xmm2,xmm0,19 + pxor xmm0,xmm0 + paddq xmm1,xmm1 + pand xmm2,xmm3 + pcmpgtd xmm0,xmm1 + pxor xmm1,xmm2 + je $L062xts_dec_two + pshufd xmm2,xmm0,19 + pxor xmm0,xmm0 + movdqa xmm6,xmm1 + paddq xmm1,xmm1 + pand xmm2,xmm3 + pcmpgtd xmm0,xmm1 + pxor xmm1,xmm2 + cmp eax,64 + jb $L063xts_dec_three + pshufd xmm2,xmm0,19 + pxor xmm0,xmm0 + movdqa xmm7,xmm1 + paddq xmm1,xmm1 + pand xmm2,xmm3 + pcmpgtd xmm0,xmm1 + pxor xmm1,xmm2 + movdqa XMMWORD PTR [esp],xmm5 + movdqa XMMWORD PTR 16[esp],xmm6 + je $L064xts_dec_four + movdqa XMMWORD PTR 32[esp],xmm7 + pshufd xmm7,xmm0,19 + movdqa XMMWORD PTR 48[esp],xmm1 + paddq xmm1,xmm1 + pand xmm7,xmm3 + pxor xmm7,xmm1 + movdqu xmm2,XMMWORD PTR [esi] + movdqu xmm3,XMMWORD PTR 16[esi] + movdqu xmm4,XMMWORD PTR 32[esi] + pxor xmm2,XMMWORD PTR [esp] + movdqu xmm5,XMMWORD PTR 48[esi] + pxor xmm3,XMMWORD PTR 16[esp] + movdqu xmm6,XMMWORD PTR 64[esi] + pxor xmm4,XMMWORD PTR 32[esp] + lea esi,DWORD PTR 80[esi] + pxor xmm5,XMMWORD PTR 48[esp] + movdqa XMMWORD PTR 64[esp],xmm7 + pxor xmm6,xmm7 + call __aesni_decrypt6 + movaps xmm1,XMMWORD PTR 64[esp] + xorps xmm2,XMMWORD PTR [esp] + xorps xmm3,XMMWORD PTR 16[esp] + xorps xmm4,XMMWORD PTR 32[esp] + movups XMMWORD PTR [edi],xmm2 + xorps xmm5,XMMWORD PTR 48[esp] + movups XMMWORD PTR 16[edi],xmm3 + xorps xmm6,xmm1 + movups XMMWORD PTR 32[edi],xmm4 + movups XMMWORD PTR 48[edi],xmm5 + movups XMMWORD PTR 64[edi],xmm6 + lea edi,DWORD PTR 80[edi] + jmp $L065xts_dec_done +ALIGN 16 +$L061xts_dec_one: + movups xmm2,XMMWORD PTR [esi] + lea esi,DWORD PTR 16[esi] + xorps xmm2,xmm5 + movups xmm0,XMMWORD PTR [edx] + movups xmm1,XMMWORD PTR 16[edx] + lea edx,DWORD PTR 32[edx] + xorps xmm2,xmm0 +$L066dec1_loop_12: +DB 102,15,56,222,209 + dec ecx + movups xmm1,XMMWORD PTR [edx] + lea edx,DWORD PTR 16[edx] + jnz $L066dec1_loop_12 +DB 102,15,56,223,209 + xorps xmm2,xmm5 + movups XMMWORD PTR [edi],xmm2 + lea edi,DWORD PTR 16[edi] + movdqa xmm1,xmm5 + jmp $L065xts_dec_done +ALIGN 16 +$L062xts_dec_two: + movaps xmm6,xmm1 + movups xmm2,XMMWORD PTR [esi] + movups xmm3,XMMWORD PTR 16[esi] + lea esi,DWORD PTR 32[esi] + xorps xmm2,xmm5 + xorps xmm3,xmm6 + call __aesni_decrypt2 + xorps xmm2,xmm5 + xorps xmm3,xmm6 + movups XMMWORD PTR [edi],xmm2 + movups XMMWORD PTR 16[edi],xmm3 + lea edi,DWORD PTR 32[edi] + movdqa xmm1,xmm6 + jmp $L065xts_dec_done +ALIGN 16 +$L063xts_dec_three: + movaps xmm7,xmm1 + movups xmm2,XMMWORD PTR [esi] + movups xmm3,XMMWORD PTR 16[esi] + movups xmm4,XMMWORD PTR 32[esi] + lea esi,DWORD PTR 48[esi] + xorps xmm2,xmm5 + xorps xmm3,xmm6 + xorps xmm4,xmm7 + call __aesni_decrypt3 + xorps xmm2,xmm5 + xorps xmm3,xmm6 + xorps xmm4,xmm7 + movups XMMWORD PTR [edi],xmm2 + movups XMMWORD PTR 16[edi],xmm3 + movups XMMWORD PTR 32[edi],xmm4 + lea edi,DWORD PTR 48[edi] + movdqa xmm1,xmm7 + jmp $L065xts_dec_done +ALIGN 16 +$L064xts_dec_four: + movaps xmm6,xmm1 + movups xmm2,XMMWORD PTR [esi] + movups xmm3,XMMWORD PTR 16[esi] + movups xmm4,XMMWORD PTR 32[esi] + xorps xmm2,XMMWORD PTR [esp] + movups xmm5,XMMWORD PTR 48[esi] + lea esi,DWORD PTR 64[esi] + xorps xmm3,XMMWORD PTR 16[esp] + xorps xmm4,xmm7 + xorps xmm5,xmm6 + call __aesni_decrypt4 + xorps xmm2,XMMWORD PTR [esp] + xorps xmm3,XMMWORD PTR 16[esp] + xorps xmm4,xmm7 + movups XMMWORD PTR [edi],xmm2 + xorps xmm5,xmm6 + movups XMMWORD PTR 16[edi],xmm3 + movups XMMWORD PTR 32[edi],xmm4 + movups XMMWORD PTR 48[edi],xmm5 + lea edi,DWORD PTR 64[edi] + movdqa xmm1,xmm6 + jmp $L065xts_dec_done +ALIGN 16 +$L060xts_dec_done6x: + mov eax,DWORD PTR 112[esp] + and eax,15 + jz $L067xts_dec_ret + mov DWORD PTR 112[esp],eax + jmp $L068xts_dec_only_one_more +ALIGN 16 +$L065xts_dec_done: + mov eax,DWORD PTR 112[esp] + pxor xmm0,xmm0 + and eax,15 + jz $L067xts_dec_ret + pcmpgtd xmm0,xmm1 + mov DWORD PTR 112[esp],eax + pshufd xmm2,xmm0,19 + pxor xmm0,xmm0 + movdqa xmm3,XMMWORD PTR 96[esp] + paddq xmm1,xmm1 + pand xmm2,xmm3 + pcmpgtd xmm0,xmm1 + pxor xmm1,xmm2 +$L068xts_dec_only_one_more: + pshufd xmm5,xmm0,19 + movdqa xmm6,xmm1 + paddq xmm1,xmm1 + pand xmm5,xmm3 + pxor xmm5,xmm1 + mov edx,ebp + mov ecx,ebx + movups xmm2,XMMWORD PTR [esi] + xorps xmm2,xmm5 + movups xmm0,XMMWORD PTR [edx] + movups xmm1,XMMWORD PTR 16[edx] + lea edx,DWORD PTR 32[edx] + xorps xmm2,xmm0 +$L069dec1_loop_13: +DB 102,15,56,222,209 + dec ecx + movups xmm1,XMMWORD PTR [edx] + lea edx,DWORD PTR 16[edx] + jnz $L069dec1_loop_13 +DB 102,15,56,223,209 + xorps xmm2,xmm5 + movups XMMWORD PTR [edi],xmm2 +$L070xts_dec_steal: + movzx ecx,BYTE PTR 16[esi] + movzx edx,BYTE PTR [edi] + lea esi,DWORD PTR 1[esi] + mov BYTE PTR [edi],cl + mov BYTE PTR 16[edi],dl + lea edi,DWORD PTR 1[edi] + sub eax,1 + jnz $L070xts_dec_steal + sub edi,DWORD PTR 112[esp] + mov edx,ebp + mov ecx,ebx + movups xmm2,XMMWORD PTR [edi] + xorps xmm2,xmm6 + movups xmm0,XMMWORD PTR [edx] + movups xmm1,XMMWORD PTR 16[edx] + lea edx,DWORD PTR 32[edx] + xorps xmm2,xmm0 +$L071dec1_loop_14: +DB 102,15,56,222,209 + dec ecx + movups xmm1,XMMWORD PTR [edx] + lea edx,DWORD PTR 16[edx] + jnz $L071dec1_loop_14 +DB 102,15,56,223,209 + xorps xmm2,xmm6 + movups XMMWORD PTR [edi],xmm2 +$L067xts_dec_ret: + mov esp,DWORD PTR 116[esp] + pop edi + pop esi + pop ebx + pop ebp + ret +_aesni_xts_decrypt ENDP +ALIGN 16 +_aesni_cbc_encrypt PROC PUBLIC +$L_aesni_cbc_encrypt_begin:: + push ebp + push ebx + push esi + push edi + mov esi,DWORD PTR 20[esp] + mov ebx,esp + mov edi,DWORD PTR 24[esp] + sub ebx,24 + mov eax,DWORD PTR 28[esp] + and ebx,-16 + mov edx,DWORD PTR 32[esp] + mov ebp,DWORD PTR 36[esp] + test eax,eax + jz $L072cbc_abort + cmp DWORD PTR 40[esp],0 + xchg ebx,esp + movups xmm7,XMMWORD PTR [ebp] + mov ecx,DWORD PTR 240[edx] + mov ebp,edx + mov DWORD PTR 16[esp],ebx + mov ebx,ecx + je $L073cbc_decrypt + movaps xmm2,xmm7 + cmp eax,16 + jb $L074cbc_enc_tail + sub eax,16 + jmp $L075cbc_enc_loop +ALIGN 16 +$L075cbc_enc_loop: + movups xmm7,XMMWORD PTR [esi] + lea esi,DWORD PTR 16[esi] + movups xmm0,XMMWORD PTR [edx] + movups xmm1,XMMWORD PTR 16[edx] + xorps xmm7,xmm0 + lea edx,DWORD PTR 32[edx] + xorps xmm2,xmm7 +$L076enc1_loop_15: +DB 102,15,56,220,209 + dec ecx + movups xmm1,XMMWORD PTR [edx] + lea edx,DWORD PTR 16[edx] + jnz $L076enc1_loop_15 +DB 102,15,56,221,209 + mov ecx,ebx + mov edx,ebp + movups XMMWORD PTR [edi],xmm2 + lea edi,DWORD PTR 16[edi] + sub eax,16 + jnc $L075cbc_enc_loop + add eax,16 + jnz $L074cbc_enc_tail + movaps xmm7,xmm2 + jmp $L077cbc_ret +$L074cbc_enc_tail: + mov ecx,eax +DD 2767451785 + mov ecx,16 + sub ecx,eax + xor eax,eax +DD 2868115081 + lea edi,DWORD PTR [edi-16] + mov ecx,ebx + mov esi,edi + mov edx,ebp + jmp $L075cbc_enc_loop +ALIGN 16 +$L073cbc_decrypt: + cmp eax,80 + jbe $L078cbc_dec_tail + movaps XMMWORD PTR [esp],xmm7 + sub eax,80 + jmp $L079cbc_dec_loop6_enter +ALIGN 16 +$L080cbc_dec_loop6: + movaps XMMWORD PTR [esp],xmm0 + movups XMMWORD PTR [edi],xmm7 + lea edi,DWORD PTR 16[edi] +$L079cbc_dec_loop6_enter: + movdqu xmm2,XMMWORD PTR [esi] + movdqu xmm3,XMMWORD PTR 16[esi] + movdqu xmm4,XMMWORD PTR 32[esi] + movdqu xmm5,XMMWORD PTR 48[esi] + movdqu xmm6,XMMWORD PTR 64[esi] + movdqu xmm7,XMMWORD PTR 80[esi] + call __aesni_decrypt6 + movups xmm1,XMMWORD PTR [esi] + movups xmm0,XMMWORD PTR 16[esi] + xorps xmm2,XMMWORD PTR [esp] + xorps xmm3,xmm1 + movups xmm1,XMMWORD PTR 32[esi] + xorps xmm4,xmm0 + movups xmm0,XMMWORD PTR 48[esi] + xorps xmm5,xmm1 + movups xmm1,XMMWORD PTR 64[esi] + xorps xmm6,xmm0 + movups xmm0,XMMWORD PTR 80[esi] + xorps xmm7,xmm1 + movups XMMWORD PTR [edi],xmm2 + movups XMMWORD PTR 16[edi],xmm3 + lea esi,DWORD PTR 96[esi] + movups XMMWORD PTR 32[edi],xmm4 + mov ecx,ebx + movups XMMWORD PTR 48[edi],xmm5 + mov edx,ebp + movups XMMWORD PTR 64[edi],xmm6 + lea edi,DWORD PTR 80[edi] + sub eax,96 + ja $L080cbc_dec_loop6 + movaps xmm2,xmm7 + movaps xmm7,xmm0 + add eax,80 + jle $L081cbc_dec_tail_collected + movups XMMWORD PTR [edi],xmm2 + lea edi,DWORD PTR 16[edi] +$L078cbc_dec_tail: + movups xmm2,XMMWORD PTR [esi] + movaps xmm6,xmm2 + cmp eax,16 + jbe $L082cbc_dec_one + movups xmm3,XMMWORD PTR 16[esi] + movaps xmm5,xmm3 + cmp eax,32 + jbe $L083cbc_dec_two + movups xmm4,XMMWORD PTR 32[esi] + cmp eax,48 + jbe $L084cbc_dec_three + movups xmm5,XMMWORD PTR 48[esi] + cmp eax,64 + jbe $L085cbc_dec_four + movups xmm6,XMMWORD PTR 64[esi] + movaps XMMWORD PTR [esp],xmm7 + movups xmm2,XMMWORD PTR [esi] + xorps xmm7,xmm7 + call __aesni_decrypt6 + movups xmm1,XMMWORD PTR [esi] + movups xmm0,XMMWORD PTR 16[esi] + xorps xmm2,XMMWORD PTR [esp] + xorps xmm3,xmm1 + movups xmm1,XMMWORD PTR 32[esi] + xorps xmm4,xmm0 + movups xmm0,XMMWORD PTR 48[esi] + xorps xmm5,xmm1 + movups xmm7,XMMWORD PTR 64[esi] + xorps xmm6,xmm0 + movups XMMWORD PTR [edi],xmm2 + movups XMMWORD PTR 16[edi],xmm3 + movups XMMWORD PTR 32[edi],xmm4 + movups XMMWORD PTR 48[edi],xmm5 + lea edi,DWORD PTR 64[edi] + movaps xmm2,xmm6 + sub eax,80 + jmp $L081cbc_dec_tail_collected +ALIGN 16 +$L082cbc_dec_one: + movups xmm0,XMMWORD PTR [edx] + movups xmm1,XMMWORD PTR 16[edx] + lea edx,DWORD PTR 32[edx] + xorps xmm2,xmm0 +$L086dec1_loop_16: +DB 102,15,56,222,209 + dec ecx + movups xmm1,XMMWORD PTR [edx] + lea edx,DWORD PTR 16[edx] + jnz $L086dec1_loop_16 +DB 102,15,56,223,209 + xorps xmm2,xmm7 + movaps xmm7,xmm6 + sub eax,16 + jmp $L081cbc_dec_tail_collected +ALIGN 16 +$L083cbc_dec_two: + call __aesni_decrypt2 + xorps xmm2,xmm7 + xorps xmm3,xmm6 + movups XMMWORD PTR [edi],xmm2 + movaps xmm2,xmm3 + lea edi,DWORD PTR 16[edi] + movaps xmm7,xmm5 + sub eax,32 + jmp $L081cbc_dec_tail_collected +ALIGN 16 +$L084cbc_dec_three: + call __aesni_decrypt3 + xorps xmm2,xmm7 + xorps xmm3,xmm6 + xorps xmm4,xmm5 + movups XMMWORD PTR [edi],xmm2 + movaps xmm2,xmm4 + movups XMMWORD PTR 16[edi],xmm3 + lea edi,DWORD PTR 32[edi] + movups xmm7,XMMWORD PTR 32[esi] + sub eax,48 + jmp $L081cbc_dec_tail_collected +ALIGN 16 +$L085cbc_dec_four: + call __aesni_decrypt4 + movups xmm1,XMMWORD PTR 16[esi] + movups xmm0,XMMWORD PTR 32[esi] + xorps xmm2,xmm7 + movups xmm7,XMMWORD PTR 48[esi] + xorps xmm3,xmm6 + movups XMMWORD PTR [edi],xmm2 + xorps xmm4,xmm1 + movups XMMWORD PTR 16[edi],xmm3 + xorps xmm5,xmm0 + movups XMMWORD PTR 32[edi],xmm4 + lea edi,DWORD PTR 48[edi] + movaps xmm2,xmm5 + sub eax,64 +$L081cbc_dec_tail_collected: + and eax,15 + jnz $L087cbc_dec_tail_partial + movups XMMWORD PTR [edi],xmm2 + jmp $L077cbc_ret +ALIGN 16 +$L087cbc_dec_tail_partial: + movaps XMMWORD PTR [esp],xmm2 + mov ecx,16 + mov esi,esp + sub ecx,eax +DD 2767451785 +$L077cbc_ret: + mov esp,DWORD PTR 16[esp] + mov ebp,DWORD PTR 36[esp] + movups XMMWORD PTR [ebp],xmm7 +$L072cbc_abort: + pop edi + pop esi + pop ebx + pop ebp + ret +_aesni_cbc_encrypt ENDP +ALIGN 16 +__aesni_set_encrypt_key PROC PRIVATE + test eax,eax + jz $L088bad_pointer + test edx,edx + jz $L088bad_pointer + movups xmm0,XMMWORD PTR [eax] + xorps xmm4,xmm4 + lea edx,DWORD PTR 16[edx] + cmp ecx,256 + je $L08914rounds + cmp ecx,192 + je $L09012rounds + cmp ecx,128 + jne $L091bad_keybits +ALIGN 16 +$L09210rounds: + mov ecx,9 + movups XMMWORD PTR [edx-16],xmm0 +DB 102,15,58,223,200,1 + call $L093key_128_cold +DB 102,15,58,223,200,2 + call $L094key_128 +DB 102,15,58,223,200,4 + call $L094key_128 +DB 102,15,58,223,200,8 + call $L094key_128 +DB 102,15,58,223,200,16 + call $L094key_128 +DB 102,15,58,223,200,32 + call $L094key_128 +DB 102,15,58,223,200,64 + call $L094key_128 +DB 102,15,58,223,200,128 + call $L094key_128 +DB 102,15,58,223,200,27 + call $L094key_128 +DB 102,15,58,223,200,54 + call $L094key_128 + movups XMMWORD PTR [edx],xmm0 + mov DWORD PTR 80[edx],ecx + xor eax,eax + ret +ALIGN 16 +$L094key_128: + movups XMMWORD PTR [edx],xmm0 + lea edx,DWORD PTR 16[edx] +$L093key_128_cold: + shufps xmm4,xmm0,16 + xorps xmm0,xmm4 + shufps xmm4,xmm0,140 + xorps xmm0,xmm4 + shufps xmm1,xmm1,255 + xorps xmm0,xmm1 + ret +ALIGN 16 +$L09012rounds: + movq xmm2,QWORD PTR 16[eax] + mov ecx,11 + movups XMMWORD PTR [edx-16],xmm0 +DB 102,15,58,223,202,1 + call $L095key_192a_cold +DB 102,15,58,223,202,2 + call $L096key_192b +DB 102,15,58,223,202,4 + call $L097key_192a +DB 102,15,58,223,202,8 + call $L096key_192b +DB 102,15,58,223,202,16 + call $L097key_192a +DB 102,15,58,223,202,32 + call $L096key_192b +DB 102,15,58,223,202,64 + call $L097key_192a +DB 102,15,58,223,202,128 + call $L096key_192b + movups XMMWORD PTR [edx],xmm0 + mov DWORD PTR 48[edx],ecx + xor eax,eax + ret +ALIGN 16 +$L097key_192a: + movups XMMWORD PTR [edx],xmm0 + lea edx,DWORD PTR 16[edx] +ALIGN 16 +$L095key_192a_cold: + movaps xmm5,xmm2 +$L098key_192b_warm: + shufps xmm4,xmm0,16 + movdqa xmm3,xmm2 + xorps xmm0,xmm4 + shufps xmm4,xmm0,140 + pslldq xmm3,4 + xorps xmm0,xmm4 + pshufd xmm1,xmm1,85 + pxor xmm2,xmm3 + pxor xmm0,xmm1 + pshufd xmm3,xmm0,255 + pxor xmm2,xmm3 + ret +ALIGN 16 +$L096key_192b: + movaps xmm3,xmm0 + shufps xmm5,xmm0,68 + movups XMMWORD PTR [edx],xmm5 + shufps xmm3,xmm2,78 + movups XMMWORD PTR 16[edx],xmm3 + lea edx,DWORD PTR 32[edx] + jmp $L098key_192b_warm +ALIGN 16 +$L08914rounds: + movups xmm2,XMMWORD PTR 16[eax] + mov ecx,13 + lea edx,DWORD PTR 16[edx] + movups XMMWORD PTR [edx-32],xmm0 + movups XMMWORD PTR [edx-16],xmm2 +DB 102,15,58,223,202,1 + call $L099key_256a_cold +DB 102,15,58,223,200,1 + call $L100key_256b +DB 102,15,58,223,202,2 + call $L101key_256a +DB 102,15,58,223,200,2 + call $L100key_256b +DB 102,15,58,223,202,4 + call $L101key_256a +DB 102,15,58,223,200,4 + call $L100key_256b +DB 102,15,58,223,202,8 + call $L101key_256a +DB 102,15,58,223,200,8 + call $L100key_256b +DB 102,15,58,223,202,16 + call $L101key_256a +DB 102,15,58,223,200,16 + call $L100key_256b +DB 102,15,58,223,202,32 + call $L101key_256a +DB 102,15,58,223,200,32 + call $L100key_256b +DB 102,15,58,223,202,64 + call $L101key_256a + movups XMMWORD PTR [edx],xmm0 + mov DWORD PTR 16[edx],ecx + xor eax,eax + ret +ALIGN 16 +$L101key_256a: + movups XMMWORD PTR [edx],xmm2 + lea edx,DWORD PTR 16[edx] +$L099key_256a_cold: + shufps xmm4,xmm0,16 + xorps xmm0,xmm4 + shufps xmm4,xmm0,140 + xorps xmm0,xmm4 + shufps xmm1,xmm1,255 + xorps xmm0,xmm1 + ret +ALIGN 16 +$L100key_256b: + movups XMMWORD PTR [edx],xmm0 + lea edx,DWORD PTR 16[edx] + shufps xmm4,xmm2,16 + xorps xmm2,xmm4 + shufps xmm4,xmm2,140 + xorps xmm2,xmm4 + shufps xmm1,xmm1,170 + xorps xmm2,xmm1 + ret +ALIGN 4 +$L088bad_pointer: + mov eax,-1 + ret +ALIGN 4 +$L091bad_keybits: + mov eax,-2 + ret +__aesni_set_encrypt_key ENDP +ALIGN 16 +_aesni_set_encrypt_key PROC PUBLIC +$L_aesni_set_encrypt_key_begin:: + mov eax,DWORD PTR 4[esp] + mov ecx,DWORD PTR 8[esp] + mov edx,DWORD PTR 12[esp] + call __aesni_set_encrypt_key + ret +_aesni_set_encrypt_key ENDP +ALIGN 16 +_aesni_set_decrypt_key PROC PUBLIC +$L_aesni_set_decrypt_key_begin:: + mov eax,DWORD PTR 4[esp] + mov ecx,DWORD PTR 8[esp] + mov edx,DWORD PTR 12[esp] + call __aesni_set_encrypt_key + mov edx,DWORD PTR 12[esp] + shl ecx,4 + test eax,eax + jnz $L102dec_key_ret + lea eax,DWORD PTR 16[ecx*1+edx] + movups xmm0,XMMWORD PTR [edx] + movups xmm1,XMMWORD PTR [eax] + movups XMMWORD PTR [eax],xmm0 + movups XMMWORD PTR [edx],xmm1 + lea edx,DWORD PTR 16[edx] + lea eax,DWORD PTR [eax-16] +$L103dec_key_inverse: + movups xmm0,XMMWORD PTR [edx] + movups xmm1,XMMWORD PTR [eax] +DB 102,15,56,219,192 +DB 102,15,56,219,201 + lea edx,DWORD PTR 16[edx] + lea eax,DWORD PTR [eax-16] + movups XMMWORD PTR 16[eax],xmm0 + movups XMMWORD PTR [edx-16],xmm1 + cmp eax,edx + ja $L103dec_key_inverse + movups xmm0,XMMWORD PTR [edx] +DB 102,15,56,219,192 + movups XMMWORD PTR [edx],xmm0 + xor eax,eax +$L102dec_key_ret: + ret +_aesni_set_decrypt_key ENDP +DB 65,69,83,32,102,111,114,32,73,110,116,101,108,32,65,69 +DB 83,45,78,73,44,32,67,82,89,80,84,79,71,65,77,83 +DB 32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115 +DB 115,108,46,111,114,103,62,0 +.text$ ENDS +END diff --git a/deps/openssl/asm_obsolete/x86-win32-masm/aes/vpaes-x86.asm b/deps/openssl/asm_obsolete/x86-win32-masm/aes/vpaes-x86.asm new file mode 100644 index 00000000000000..57afc202bb7f31 --- /dev/null +++ b/deps/openssl/asm_obsolete/x86-win32-masm/aes/vpaes-x86.asm @@ -0,0 +1,662 @@ +TITLE vpaes-x86.asm +IF @Version LT 800 +ECHO MASM version 8.00 or later is strongly recommended. +ENDIF +.686 +.XMM +IF @Version LT 800 +XMMWORD STRUCT 16 +DQ 2 dup (?) +XMMWORD ENDS +ENDIF + +.MODEL FLAT +OPTION DOTNAME +IF @Version LT 800 +.text$ SEGMENT PAGE 'CODE' +ELSE +.text$ SEGMENT ALIGN(64) 'CODE' +ENDIF +ALIGN 64 +$L_vpaes_consts:: +DD 218628480,235210255,168496130,67568393 +DD 252381056,17041926,33884169,51187212 +DD 252645135,252645135,252645135,252645135 +DD 1512730624,3266504856,1377990664,3401244816 +DD 830229760,1275146365,2969422977,3447763452 +DD 3411033600,2979783055,338359620,2782886510 +DD 4209124096,907596821,221174255,1006095553 +DD 191964160,3799684038,3164090317,1589111125 +DD 182528256,1777043520,2877432650,3265356744 +DD 1874708224,3503451415,3305285752,363511674 +DD 1606117888,3487855781,1093350906,2384367825 +DD 197121,67569157,134941193,202313229 +DD 67569157,134941193,202313229,197121 +DD 134941193,202313229,197121,67569157 +DD 202313229,197121,67569157,134941193 +DD 33619971,100992007,168364043,235736079 +DD 235736079,33619971,100992007,168364043 +DD 168364043,235736079,33619971,100992007 +DD 100992007,168364043,235736079,33619971 +DD 50462976,117835012,185207048,252579084 +DD 252314880,51251460,117574920,184942860 +DD 184682752,252054788,50987272,118359308 +DD 118099200,185467140,251790600,50727180 +DD 2946363062,528716217,1300004225,1881839624 +DD 1532713819,1532713819,1532713819,1532713819 +DD 3602276352,4288629033,3737020424,4153884961 +DD 1354558464,32357713,2958822624,3775749553 +DD 1201988352,132424512,1572796698,503232858 +DD 2213177600,1597421020,4103937655,675398315 +DD 2749646592,4273543773,1511898873,121693092 +DD 3040248576,1103263732,2871565598,1608280554 +DD 2236667136,2588920351,482954393,64377734 +DD 3069987328,291237287,2117370568,3650299247 +DD 533321216,3573750986,2572112006,1401264716 +DD 1339849704,2721158661,548607111,3445553514 +DD 2128193280,3054596040,2183486460,1257083700 +DD 655635200,1165381986,3923443150,2344132524 +DD 190078720,256924420,290342170,357187870 +DD 1610966272,2263057382,4103205268,309794674 +DD 2592527872,2233205587,1335446729,3402964816 +DD 3973531904,3225098121,3002836325,1918774430 +DD 3870401024,2102906079,2284471353,4117666579 +DD 617007872,1021508343,366931923,691083277 +DD 2528395776,3491914898,2968704004,1613121270 +DD 3445188352,3247741094,844474987,4093578302 +DD 651481088,1190302358,1689581232,574775300 +DD 4289380608,206939853,2555985458,2489840491 +DD 2130264064,327674451,3566485037,3349835193 +DD 2470714624,316102159,3636825756,3393945945 +DB 86,101,99,116,111,114,32,80,101,114,109,117,116,97,116,105 +DB 111,110,32,65,69,83,32,102,111,114,32,120,56,54,47,83 +DB 83,83,69,51,44,32,77,105,107,101,32,72,97,109,98,117 +DB 114,103,32,40,83,116,97,110,102,111,114,100,32,85,110,105 +DB 118,101,114,115,105,116,121,41,0 +ALIGN 64 +ALIGN 16 +__vpaes_preheat PROC PRIVATE + add ebp,DWORD PTR [esp] + movdqa xmm7,XMMWORD PTR [ebp-48] + movdqa xmm6,XMMWORD PTR [ebp-16] + ret +__vpaes_preheat ENDP +ALIGN 16 +__vpaes_encrypt_core PROC PRIVATE + mov ecx,16 + mov eax,DWORD PTR 240[edx] + movdqa xmm1,xmm6 + movdqa xmm2,XMMWORD PTR [ebp] + pandn xmm1,xmm0 + pand xmm0,xmm6 + movdqu xmm5,XMMWORD PTR [edx] +DB 102,15,56,0,208 + movdqa xmm0,XMMWORD PTR 16[ebp] + pxor xmm2,xmm5 + psrld xmm1,4 + add edx,16 +DB 102,15,56,0,193 + lea ebx,DWORD PTR 192[ebp] + pxor xmm0,xmm2 + jmp $L000enc_entry +ALIGN 16 +$L001enc_loop: + movdqa xmm4,XMMWORD PTR 32[ebp] + movdqa xmm0,XMMWORD PTR 48[ebp] +DB 102,15,56,0,226 +DB 102,15,56,0,195 + pxor xmm4,xmm5 + movdqa xmm5,XMMWORD PTR 64[ebp] + pxor xmm0,xmm4 + movdqa xmm1,XMMWORD PTR [ecx*1+ebx-64] +DB 102,15,56,0,234 + movdqa xmm2,XMMWORD PTR 80[ebp] + movdqa xmm4,XMMWORD PTR [ecx*1+ebx] +DB 102,15,56,0,211 + movdqa xmm3,xmm0 + pxor xmm2,xmm5 +DB 102,15,56,0,193 + add edx,16 + pxor xmm0,xmm2 +DB 102,15,56,0,220 + add ecx,16 + pxor xmm3,xmm0 +DB 102,15,56,0,193 + and ecx,48 + sub eax,1 + pxor xmm0,xmm3 +$L000enc_entry: + movdqa xmm1,xmm6 + movdqa xmm5,XMMWORD PTR [ebp-32] + pandn xmm1,xmm0 + psrld xmm1,4 + pand xmm0,xmm6 +DB 102,15,56,0,232 + movdqa xmm3,xmm7 + pxor xmm0,xmm1 +DB 102,15,56,0,217 + movdqa xmm4,xmm7 + pxor xmm3,xmm5 +DB 102,15,56,0,224 + movdqa xmm2,xmm7 + pxor xmm4,xmm5 +DB 102,15,56,0,211 + movdqa xmm3,xmm7 + pxor xmm2,xmm0 +DB 102,15,56,0,220 + movdqu xmm5,XMMWORD PTR [edx] + pxor xmm3,xmm1 + jnz $L001enc_loop + movdqa xmm4,XMMWORD PTR 96[ebp] + movdqa xmm0,XMMWORD PTR 112[ebp] +DB 102,15,56,0,226 + pxor xmm4,xmm5 +DB 102,15,56,0,195 + movdqa xmm1,XMMWORD PTR 64[ecx*1+ebx] + pxor xmm0,xmm4 +DB 102,15,56,0,193 + ret +__vpaes_encrypt_core ENDP +ALIGN 16 +__vpaes_decrypt_core PROC PRIVATE + lea ebx,DWORD PTR 608[ebp] + mov eax,DWORD PTR 240[edx] + movdqa xmm1,xmm6 + movdqa xmm2,XMMWORD PTR [ebx-64] + pandn xmm1,xmm0 + mov ecx,eax + psrld xmm1,4 + movdqu xmm5,XMMWORD PTR [edx] + shl ecx,4 + pand xmm0,xmm6 +DB 102,15,56,0,208 + movdqa xmm0,XMMWORD PTR [ebx-48] + xor ecx,48 +DB 102,15,56,0,193 + and ecx,48 + pxor xmm2,xmm5 + movdqa xmm5,XMMWORD PTR 176[ebp] + pxor xmm0,xmm2 + add edx,16 + lea ecx,DWORD PTR [ecx*1+ebx-352] + jmp $L002dec_entry +ALIGN 16 +$L003dec_loop: + movdqa xmm4,XMMWORD PTR [ebx-32] + movdqa xmm1,XMMWORD PTR [ebx-16] +DB 102,15,56,0,226 +DB 102,15,56,0,203 + pxor xmm0,xmm4 + movdqa xmm4,XMMWORD PTR [ebx] + pxor xmm0,xmm1 + movdqa xmm1,XMMWORD PTR 16[ebx] +DB 102,15,56,0,226 +DB 102,15,56,0,197 +DB 102,15,56,0,203 + pxor xmm0,xmm4 + movdqa xmm4,XMMWORD PTR 32[ebx] + pxor xmm0,xmm1 + movdqa xmm1,XMMWORD PTR 48[ebx] +DB 102,15,56,0,226 +DB 102,15,56,0,197 +DB 102,15,56,0,203 + pxor xmm0,xmm4 + movdqa xmm4,XMMWORD PTR 64[ebx] + pxor xmm0,xmm1 + movdqa xmm1,XMMWORD PTR 80[ebx] +DB 102,15,56,0,226 +DB 102,15,56,0,197 +DB 102,15,56,0,203 + pxor xmm0,xmm4 + add edx,16 +DB 102,15,58,15,237,12 + pxor xmm0,xmm1 + sub eax,1 +$L002dec_entry: + movdqa xmm1,xmm6 + movdqa xmm2,XMMWORD PTR [ebp-32] + pandn xmm1,xmm0 + pand xmm0,xmm6 + psrld xmm1,4 +DB 102,15,56,0,208 + movdqa xmm3,xmm7 + pxor xmm0,xmm1 +DB 102,15,56,0,217 + movdqa xmm4,xmm7 + pxor xmm3,xmm2 +DB 102,15,56,0,224 + pxor xmm4,xmm2 + movdqa xmm2,xmm7 +DB 102,15,56,0,211 + movdqa xmm3,xmm7 + pxor xmm2,xmm0 +DB 102,15,56,0,220 + movdqu xmm0,XMMWORD PTR [edx] + pxor xmm3,xmm1 + jnz $L003dec_loop + movdqa xmm4,XMMWORD PTR 96[ebx] +DB 102,15,56,0,226 + pxor xmm4,xmm0 + movdqa xmm0,XMMWORD PTR 112[ebx] + movdqa xmm2,XMMWORD PTR [ecx] +DB 102,15,56,0,195 + pxor xmm0,xmm4 +DB 102,15,56,0,194 + ret +__vpaes_decrypt_core ENDP +ALIGN 16 +__vpaes_schedule_core PROC PRIVATE + add ebp,DWORD PTR [esp] + movdqu xmm0,XMMWORD PTR [esi] + movdqa xmm2,XMMWORD PTR 320[ebp] + movdqa xmm3,xmm0 + lea ebx,DWORD PTR [ebp] + movdqa XMMWORD PTR 4[esp],xmm2 + call __vpaes_schedule_transform + movdqa xmm7,xmm0 + test edi,edi + jnz $L004schedule_am_decrypting + movdqu XMMWORD PTR [edx],xmm0 + jmp $L005schedule_go +$L004schedule_am_decrypting: + movdqa xmm1,XMMWORD PTR 256[ecx*1+ebp] +DB 102,15,56,0,217 + movdqu XMMWORD PTR [edx],xmm3 + xor ecx,48 +$L005schedule_go: + cmp eax,192 + ja $L006schedule_256 + je $L007schedule_192 +$L008schedule_128: + mov eax,10 +$L009loop_schedule_128: + call __vpaes_schedule_round + dec eax + jz $L010schedule_mangle_last + call __vpaes_schedule_mangle + jmp $L009loop_schedule_128 +ALIGN 16 +$L007schedule_192: + movdqu xmm0,XMMWORD PTR 8[esi] + call __vpaes_schedule_transform + movdqa xmm6,xmm0 + pxor xmm4,xmm4 + movhlps xmm6,xmm4 + mov eax,4 +$L011loop_schedule_192: + call __vpaes_schedule_round +DB 102,15,58,15,198,8 + call __vpaes_schedule_mangle + call __vpaes_schedule_192_smear + call __vpaes_schedule_mangle + call __vpaes_schedule_round + dec eax + jz $L010schedule_mangle_last + call __vpaes_schedule_mangle + call __vpaes_schedule_192_smear + jmp $L011loop_schedule_192 +ALIGN 16 +$L006schedule_256: + movdqu xmm0,XMMWORD PTR 16[esi] + call __vpaes_schedule_transform + mov eax,7 +$L012loop_schedule_256: + call __vpaes_schedule_mangle + movdqa xmm6,xmm0 + call __vpaes_schedule_round + dec eax + jz $L010schedule_mangle_last + call __vpaes_schedule_mangle + pshufd xmm0,xmm0,255 + movdqa XMMWORD PTR 20[esp],xmm7 + movdqa xmm7,xmm6 + call $L_vpaes_schedule_low_round + movdqa xmm7,XMMWORD PTR 20[esp] + jmp $L012loop_schedule_256 +ALIGN 16 +$L010schedule_mangle_last: + lea ebx,DWORD PTR 384[ebp] + test edi,edi + jnz $L013schedule_mangle_last_dec + movdqa xmm1,XMMWORD PTR 256[ecx*1+ebp] +DB 102,15,56,0,193 + lea ebx,DWORD PTR 352[ebp] + add edx,32 +$L013schedule_mangle_last_dec: + add edx,-16 + pxor xmm0,XMMWORD PTR 336[ebp] + call __vpaes_schedule_transform + movdqu XMMWORD PTR [edx],xmm0 + pxor xmm0,xmm0 + pxor xmm1,xmm1 + pxor xmm2,xmm2 + pxor xmm3,xmm3 + pxor xmm4,xmm4 + pxor xmm5,xmm5 + pxor xmm6,xmm6 + pxor xmm7,xmm7 + ret +__vpaes_schedule_core ENDP +ALIGN 16 +__vpaes_schedule_192_smear PROC PRIVATE + pshufd xmm1,xmm6,128 + pshufd xmm0,xmm7,254 + pxor xmm6,xmm1 + pxor xmm1,xmm1 + pxor xmm6,xmm0 + movdqa xmm0,xmm6 + movhlps xmm6,xmm1 + ret +__vpaes_schedule_192_smear ENDP +ALIGN 16 +__vpaes_schedule_round PROC PRIVATE + movdqa xmm2,XMMWORD PTR 8[esp] + pxor xmm1,xmm1 +DB 102,15,58,15,202,15 +DB 102,15,58,15,210,15 + pxor xmm7,xmm1 + pshufd xmm0,xmm0,255 +DB 102,15,58,15,192,1 + movdqa XMMWORD PTR 8[esp],xmm2 +$L_vpaes_schedule_low_round:: + movdqa xmm1,xmm7 + pslldq xmm7,4 + pxor xmm7,xmm1 + movdqa xmm1,xmm7 + pslldq xmm7,8 + pxor xmm7,xmm1 + pxor xmm7,XMMWORD PTR 336[ebp] + movdqa xmm4,XMMWORD PTR [ebp-16] + movdqa xmm5,XMMWORD PTR [ebp-48] + movdqa xmm1,xmm4 + pandn xmm1,xmm0 + psrld xmm1,4 + pand xmm0,xmm4 + movdqa xmm2,XMMWORD PTR [ebp-32] +DB 102,15,56,0,208 + pxor xmm0,xmm1 + movdqa xmm3,xmm5 +DB 102,15,56,0,217 + pxor xmm3,xmm2 + movdqa xmm4,xmm5 +DB 102,15,56,0,224 + pxor xmm4,xmm2 + movdqa xmm2,xmm5 +DB 102,15,56,0,211 + pxor xmm2,xmm0 + movdqa xmm3,xmm5 +DB 102,15,56,0,220 + pxor xmm3,xmm1 + movdqa xmm4,XMMWORD PTR 32[ebp] +DB 102,15,56,0,226 + movdqa xmm0,XMMWORD PTR 48[ebp] +DB 102,15,56,0,195 + pxor xmm0,xmm4 + pxor xmm0,xmm7 + movdqa xmm7,xmm0 + ret +__vpaes_schedule_round ENDP +ALIGN 16 +__vpaes_schedule_transform PROC PRIVATE + movdqa xmm2,XMMWORD PTR [ebp-16] + movdqa xmm1,xmm2 + pandn xmm1,xmm0 + psrld xmm1,4 + pand xmm0,xmm2 + movdqa xmm2,XMMWORD PTR [ebx] +DB 102,15,56,0,208 + movdqa xmm0,XMMWORD PTR 16[ebx] +DB 102,15,56,0,193 + pxor xmm0,xmm2 + ret +__vpaes_schedule_transform ENDP +ALIGN 16 +__vpaes_schedule_mangle PROC PRIVATE + movdqa xmm4,xmm0 + movdqa xmm5,XMMWORD PTR 128[ebp] + test edi,edi + jnz $L014schedule_mangle_dec + add edx,16 + pxor xmm4,XMMWORD PTR 336[ebp] +DB 102,15,56,0,229 + movdqa xmm3,xmm4 +DB 102,15,56,0,229 + pxor xmm3,xmm4 +DB 102,15,56,0,229 + pxor xmm3,xmm4 + jmp $L015schedule_mangle_both +ALIGN 16 +$L014schedule_mangle_dec: + movdqa xmm2,XMMWORD PTR [ebp-16] + lea esi,DWORD PTR 416[ebp] + movdqa xmm1,xmm2 + pandn xmm1,xmm4 + psrld xmm1,4 + pand xmm4,xmm2 + movdqa xmm2,XMMWORD PTR [esi] +DB 102,15,56,0,212 + movdqa xmm3,XMMWORD PTR 16[esi] +DB 102,15,56,0,217 + pxor xmm3,xmm2 +DB 102,15,56,0,221 + movdqa xmm2,XMMWORD PTR 32[esi] +DB 102,15,56,0,212 + pxor xmm2,xmm3 + movdqa xmm3,XMMWORD PTR 48[esi] +DB 102,15,56,0,217 + pxor xmm3,xmm2 +DB 102,15,56,0,221 + movdqa xmm2,XMMWORD PTR 64[esi] +DB 102,15,56,0,212 + pxor xmm2,xmm3 + movdqa xmm3,XMMWORD PTR 80[esi] +DB 102,15,56,0,217 + pxor xmm3,xmm2 +DB 102,15,56,0,221 + movdqa xmm2,XMMWORD PTR 96[esi] +DB 102,15,56,0,212 + pxor xmm2,xmm3 + movdqa xmm3,XMMWORD PTR 112[esi] +DB 102,15,56,0,217 + pxor xmm3,xmm2 + add edx,-16 +$L015schedule_mangle_both: + movdqa xmm1,XMMWORD PTR 256[ecx*1+ebp] +DB 102,15,56,0,217 + add ecx,-16 + and ecx,48 + movdqu XMMWORD PTR [edx],xmm3 + ret +__vpaes_schedule_mangle ENDP +ALIGN 16 +_vpaes_set_encrypt_key PROC PUBLIC +$L_vpaes_set_encrypt_key_begin:: + push ebp + push ebx + push esi + push edi + mov esi,DWORD PTR 20[esp] + lea ebx,DWORD PTR [esp-56] + mov eax,DWORD PTR 24[esp] + and ebx,-16 + mov edx,DWORD PTR 28[esp] + xchg ebx,esp + mov DWORD PTR 48[esp],ebx + mov ebx,eax + shr ebx,5 + add ebx,5 + mov DWORD PTR 240[edx],ebx + mov ecx,48 + mov edi,0 + mov ebp,OFFSET ($L_vpaes_consts+030h-$L016pic_point) + call __vpaes_schedule_core +$L016pic_point: + mov esp,DWORD PTR 48[esp] + xor eax,eax + pop edi + pop esi + pop ebx + pop ebp + ret +_vpaes_set_encrypt_key ENDP +ALIGN 16 +_vpaes_set_decrypt_key PROC PUBLIC +$L_vpaes_set_decrypt_key_begin:: + push ebp + push ebx + push esi + push edi + mov esi,DWORD PTR 20[esp] + lea ebx,DWORD PTR [esp-56] + mov eax,DWORD PTR 24[esp] + and ebx,-16 + mov edx,DWORD PTR 28[esp] + xchg ebx,esp + mov DWORD PTR 48[esp],ebx + mov ebx,eax + shr ebx,5 + add ebx,5 + mov DWORD PTR 240[edx],ebx + shl ebx,4 + lea edx,DWORD PTR 16[ebx*1+edx] + mov edi,1 + mov ecx,eax + shr ecx,1 + and ecx,32 + xor ecx,32 + mov ebp,OFFSET ($L_vpaes_consts+030h-$L017pic_point) + call __vpaes_schedule_core +$L017pic_point: + mov esp,DWORD PTR 48[esp] + xor eax,eax + pop edi + pop esi + pop ebx + pop ebp + ret +_vpaes_set_decrypt_key ENDP +ALIGN 16 +_vpaes_encrypt PROC PUBLIC +$L_vpaes_encrypt_begin:: + push ebp + push ebx + push esi + push edi + mov ebp,OFFSET ($L_vpaes_consts+030h-$L018pic_point) + call __vpaes_preheat +$L018pic_point: + mov esi,DWORD PTR 20[esp] + lea ebx,DWORD PTR [esp-56] + mov edi,DWORD PTR 24[esp] + and ebx,-16 + mov edx,DWORD PTR 28[esp] + xchg ebx,esp + mov DWORD PTR 48[esp],ebx + movdqu xmm0,XMMWORD PTR [esi] + call __vpaes_encrypt_core + movdqu XMMWORD PTR [edi],xmm0 + mov esp,DWORD PTR 48[esp] + pop edi + pop esi + pop ebx + pop ebp + ret +_vpaes_encrypt ENDP +ALIGN 16 +_vpaes_decrypt PROC PUBLIC +$L_vpaes_decrypt_begin:: + push ebp + push ebx + push esi + push edi + mov ebp,OFFSET ($L_vpaes_consts+030h-$L019pic_point) + call __vpaes_preheat +$L019pic_point: + mov esi,DWORD PTR 20[esp] + lea ebx,DWORD PTR [esp-56] + mov edi,DWORD PTR 24[esp] + and ebx,-16 + mov edx,DWORD PTR 28[esp] + xchg ebx,esp + mov DWORD PTR 48[esp],ebx + movdqu xmm0,XMMWORD PTR [esi] + call __vpaes_decrypt_core + movdqu XMMWORD PTR [edi],xmm0 + mov esp,DWORD PTR 48[esp] + pop edi + pop esi + pop ebx + pop ebp + ret +_vpaes_decrypt ENDP +ALIGN 16 +_vpaes_cbc_encrypt PROC PUBLIC +$L_vpaes_cbc_encrypt_begin:: + push ebp + push ebx + push esi + push edi + mov esi,DWORD PTR 20[esp] + mov edi,DWORD PTR 24[esp] + mov eax,DWORD PTR 28[esp] + mov edx,DWORD PTR 32[esp] + sub eax,16 + jc $L020cbc_abort + lea ebx,DWORD PTR [esp-56] + mov ebp,DWORD PTR 36[esp] + and ebx,-16 + mov ecx,DWORD PTR 40[esp] + xchg ebx,esp + movdqu xmm1,XMMWORD PTR [ebp] + sub edi,esi + mov DWORD PTR 48[esp],ebx + mov DWORD PTR [esp],edi + mov DWORD PTR 4[esp],edx + mov DWORD PTR 8[esp],ebp + mov edi,eax + mov ebp,OFFSET ($L_vpaes_consts+030h-$L021pic_point) + call __vpaes_preheat +$L021pic_point: + cmp ecx,0 + je $L022cbc_dec_loop + jmp $L023cbc_enc_loop +ALIGN 16 +$L023cbc_enc_loop: + movdqu xmm0,XMMWORD PTR [esi] + pxor xmm0,xmm1 + call __vpaes_encrypt_core + mov ebx,DWORD PTR [esp] + mov edx,DWORD PTR 4[esp] + movdqa xmm1,xmm0 + movdqu XMMWORD PTR [esi*1+ebx],xmm0 + lea esi,DWORD PTR 16[esi] + sub edi,16 + jnc $L023cbc_enc_loop + jmp $L024cbc_done +ALIGN 16 +$L022cbc_dec_loop: + movdqu xmm0,XMMWORD PTR [esi] + movdqa XMMWORD PTR 16[esp],xmm1 + movdqa XMMWORD PTR 32[esp],xmm0 + call __vpaes_decrypt_core + mov ebx,DWORD PTR [esp] + mov edx,DWORD PTR 4[esp] + pxor xmm0,XMMWORD PTR 16[esp] + movdqa xmm1,XMMWORD PTR 32[esp] + movdqu XMMWORD PTR [esi*1+ebx],xmm0 + lea esi,DWORD PTR 16[esi] + sub edi,16 + jnc $L022cbc_dec_loop +$L024cbc_done: + mov ebx,DWORD PTR 8[esp] + mov esp,DWORD PTR 48[esp] + movdqu XMMWORD PTR [ebx],xmm1 +$L020cbc_abort: + pop edi + pop esi + pop ebx + pop ebp + ret +_vpaes_cbc_encrypt ENDP +.text$ ENDS +END diff --git a/deps/openssl/asm_obsolete/x86-win32-masm/bf/bf-586.asm b/deps/openssl/asm_obsolete/x86-win32-masm/bf/bf-586.asm new file mode 100644 index 00000000000000..218e8f5c7d445d --- /dev/null +++ b/deps/openssl/asm_obsolete/x86-win32-masm/bf/bf-586.asm @@ -0,0 +1,902 @@ +TITLE bf-586.asm +IF @Version LT 800 +ECHO MASM version 8.00 or later is strongly recommended. +ENDIF +.686 +.MODEL FLAT +OPTION DOTNAME +IF @Version LT 800 +.text$ SEGMENT PAGE 'CODE' +ELSE +.text$ SEGMENT ALIGN(64) 'CODE' +ENDIF +ALIGN 16 +_BF_encrypt PROC PUBLIC +$L_BF_encrypt_begin:: + ; + push ebp + push ebx + mov ebx,DWORD PTR 12[esp] + mov ebp,DWORD PTR 16[esp] + push esi + push edi + ; Load the 2 words + mov edi,DWORD PTR [ebx] + mov esi,DWORD PTR 4[ebx] + xor eax,eax + mov ebx,DWORD PTR [ebp] + xor ecx,ecx + xor edi,ebx + ; + ; Round 0 + mov edx,DWORD PTR 4[ebp] + mov ebx,edi + xor esi,edx + shr ebx,16 + mov edx,edi + mov al,bh + and ebx,255 + mov cl,dh + and edx,255 + mov eax,DWORD PTR 72[eax*4+ebp] + mov ebx,DWORD PTR 1096[ebx*4+ebp] + add ebx,eax + mov eax,DWORD PTR 2120[ecx*4+ebp] + xor ebx,eax + mov edx,DWORD PTR 3144[edx*4+ebp] + add ebx,edx + xor eax,eax + xor esi,ebx + ; + ; Round 1 + mov edx,DWORD PTR 8[ebp] + mov ebx,esi + xor edi,edx + shr ebx,16 + mov edx,esi + mov al,bh + and ebx,255 + mov cl,dh + and edx,255 + mov eax,DWORD PTR 72[eax*4+ebp] + mov ebx,DWORD PTR 1096[ebx*4+ebp] + add ebx,eax + mov eax,DWORD PTR 2120[ecx*4+ebp] + xor ebx,eax + mov edx,DWORD PTR 3144[edx*4+ebp] + add ebx,edx + xor eax,eax + xor edi,ebx + ; + ; Round 2 + mov edx,DWORD PTR 12[ebp] + mov ebx,edi + xor esi,edx + shr ebx,16 + mov edx,edi + mov al,bh + and ebx,255 + mov cl,dh + and edx,255 + mov eax,DWORD PTR 72[eax*4+ebp] + mov ebx,DWORD PTR 1096[ebx*4+ebp] + add ebx,eax + mov eax,DWORD PTR 2120[ecx*4+ebp] + xor ebx,eax + mov edx,DWORD PTR 3144[edx*4+ebp] + add ebx,edx + xor eax,eax + xor esi,ebx + ; + ; Round 3 + mov edx,DWORD PTR 16[ebp] + mov ebx,esi + xor edi,edx + shr ebx,16 + mov edx,esi + mov al,bh + and ebx,255 + mov cl,dh + and edx,255 + mov eax,DWORD PTR 72[eax*4+ebp] + mov ebx,DWORD PTR 1096[ebx*4+ebp] + add ebx,eax + mov eax,DWORD PTR 2120[ecx*4+ebp] + xor ebx,eax + mov edx,DWORD PTR 3144[edx*4+ebp] + add ebx,edx + xor eax,eax + xor edi,ebx + ; + ; Round 4 + mov edx,DWORD PTR 20[ebp] + mov ebx,edi + xor esi,edx + shr ebx,16 + mov edx,edi + mov al,bh + and ebx,255 + mov cl,dh + and edx,255 + mov eax,DWORD PTR 72[eax*4+ebp] + mov ebx,DWORD PTR 1096[ebx*4+ebp] + add ebx,eax + mov eax,DWORD PTR 2120[ecx*4+ebp] + xor ebx,eax + mov edx,DWORD PTR 3144[edx*4+ebp] + add ebx,edx + xor eax,eax + xor esi,ebx + ; + ; Round 5 + mov edx,DWORD PTR 24[ebp] + mov ebx,esi + xor edi,edx + shr ebx,16 + mov edx,esi + mov al,bh + and ebx,255 + mov cl,dh + and edx,255 + mov eax,DWORD PTR 72[eax*4+ebp] + mov ebx,DWORD PTR 1096[ebx*4+ebp] + add ebx,eax + mov eax,DWORD PTR 2120[ecx*4+ebp] + xor ebx,eax + mov edx,DWORD PTR 3144[edx*4+ebp] + add ebx,edx + xor eax,eax + xor edi,ebx + ; + ; Round 6 + mov edx,DWORD PTR 28[ebp] + mov ebx,edi + xor esi,edx + shr ebx,16 + mov edx,edi + mov al,bh + and ebx,255 + mov cl,dh + and edx,255 + mov eax,DWORD PTR 72[eax*4+ebp] + mov ebx,DWORD PTR 1096[ebx*4+ebp] + add ebx,eax + mov eax,DWORD PTR 2120[ecx*4+ebp] + xor ebx,eax + mov edx,DWORD PTR 3144[edx*4+ebp] + add ebx,edx + xor eax,eax + xor esi,ebx + ; + ; Round 7 + mov edx,DWORD PTR 32[ebp] + mov ebx,esi + xor edi,edx + shr ebx,16 + mov edx,esi + mov al,bh + and ebx,255 + mov cl,dh + and edx,255 + mov eax,DWORD PTR 72[eax*4+ebp] + mov ebx,DWORD PTR 1096[ebx*4+ebp] + add ebx,eax + mov eax,DWORD PTR 2120[ecx*4+ebp] + xor ebx,eax + mov edx,DWORD PTR 3144[edx*4+ebp] + add ebx,edx + xor eax,eax + xor edi,ebx + ; + ; Round 8 + mov edx,DWORD PTR 36[ebp] + mov ebx,edi + xor esi,edx + shr ebx,16 + mov edx,edi + mov al,bh + and ebx,255 + mov cl,dh + and edx,255 + mov eax,DWORD PTR 72[eax*4+ebp] + mov ebx,DWORD PTR 1096[ebx*4+ebp] + add ebx,eax + mov eax,DWORD PTR 2120[ecx*4+ebp] + xor ebx,eax + mov edx,DWORD PTR 3144[edx*4+ebp] + add ebx,edx + xor eax,eax + xor esi,ebx + ; + ; Round 9 + mov edx,DWORD PTR 40[ebp] + mov ebx,esi + xor edi,edx + shr ebx,16 + mov edx,esi + mov al,bh + and ebx,255 + mov cl,dh + and edx,255 + mov eax,DWORD PTR 72[eax*4+ebp] + mov ebx,DWORD PTR 1096[ebx*4+ebp] + add ebx,eax + mov eax,DWORD PTR 2120[ecx*4+ebp] + xor ebx,eax + mov edx,DWORD PTR 3144[edx*4+ebp] + add ebx,edx + xor eax,eax + xor edi,ebx + ; + ; Round 10 + mov edx,DWORD PTR 44[ebp] + mov ebx,edi + xor esi,edx + shr ebx,16 + mov edx,edi + mov al,bh + and ebx,255 + mov cl,dh + and edx,255 + mov eax,DWORD PTR 72[eax*4+ebp] + mov ebx,DWORD PTR 1096[ebx*4+ebp] + add ebx,eax + mov eax,DWORD PTR 2120[ecx*4+ebp] + xor ebx,eax + mov edx,DWORD PTR 3144[edx*4+ebp] + add ebx,edx + xor eax,eax + xor esi,ebx + ; + ; Round 11 + mov edx,DWORD PTR 48[ebp] + mov ebx,esi + xor edi,edx + shr ebx,16 + mov edx,esi + mov al,bh + and ebx,255 + mov cl,dh + and edx,255 + mov eax,DWORD PTR 72[eax*4+ebp] + mov ebx,DWORD PTR 1096[ebx*4+ebp] + add ebx,eax + mov eax,DWORD PTR 2120[ecx*4+ebp] + xor ebx,eax + mov edx,DWORD PTR 3144[edx*4+ebp] + add ebx,edx + xor eax,eax + xor edi,ebx + ; + ; Round 12 + mov edx,DWORD PTR 52[ebp] + mov ebx,edi + xor esi,edx + shr ebx,16 + mov edx,edi + mov al,bh + and ebx,255 + mov cl,dh + and edx,255 + mov eax,DWORD PTR 72[eax*4+ebp] + mov ebx,DWORD PTR 1096[ebx*4+ebp] + add ebx,eax + mov eax,DWORD PTR 2120[ecx*4+ebp] + xor ebx,eax + mov edx,DWORD PTR 3144[edx*4+ebp] + add ebx,edx + xor eax,eax + xor esi,ebx + ; + ; Round 13 + mov edx,DWORD PTR 56[ebp] + mov ebx,esi + xor edi,edx + shr ebx,16 + mov edx,esi + mov al,bh + and ebx,255 + mov cl,dh + and edx,255 + mov eax,DWORD PTR 72[eax*4+ebp] + mov ebx,DWORD PTR 1096[ebx*4+ebp] + add ebx,eax + mov eax,DWORD PTR 2120[ecx*4+ebp] + xor ebx,eax + mov edx,DWORD PTR 3144[edx*4+ebp] + add ebx,edx + xor eax,eax + xor edi,ebx + ; + ; Round 14 + mov edx,DWORD PTR 60[ebp] + mov ebx,edi + xor esi,edx + shr ebx,16 + mov edx,edi + mov al,bh + and ebx,255 + mov cl,dh + and edx,255 + mov eax,DWORD PTR 72[eax*4+ebp] + mov ebx,DWORD PTR 1096[ebx*4+ebp] + add ebx,eax + mov eax,DWORD PTR 2120[ecx*4+ebp] + xor ebx,eax + mov edx,DWORD PTR 3144[edx*4+ebp] + add ebx,edx + xor eax,eax + xor esi,ebx + ; + ; Round 15 + mov edx,DWORD PTR 64[ebp] + mov ebx,esi + xor edi,edx + shr ebx,16 + mov edx,esi + mov al,bh + and ebx,255 + mov cl,dh + and edx,255 + mov eax,DWORD PTR 72[eax*4+ebp] + mov ebx,DWORD PTR 1096[ebx*4+ebp] + add ebx,eax + mov eax,DWORD PTR 2120[ecx*4+ebp] + xor ebx,eax + mov edx,DWORD PTR 3144[edx*4+ebp] + add ebx,edx + ; Load parameter 0 (16) enc=1 + mov eax,DWORD PTR 20[esp] + xor edi,ebx + mov edx,DWORD PTR 68[ebp] + xor esi,edx + mov DWORD PTR 4[eax],edi + mov DWORD PTR [eax],esi + pop edi + pop esi + pop ebx + pop ebp + ret +_BF_encrypt ENDP +ALIGN 16 +_BF_decrypt PROC PUBLIC +$L_BF_decrypt_begin:: + ; + push ebp + push ebx + mov ebx,DWORD PTR 12[esp] + mov ebp,DWORD PTR 16[esp] + push esi + push edi + ; Load the 2 words + mov edi,DWORD PTR [ebx] + mov esi,DWORD PTR 4[ebx] + xor eax,eax + mov ebx,DWORD PTR 68[ebp] + xor ecx,ecx + xor edi,ebx + ; + ; Round 16 + mov edx,DWORD PTR 64[ebp] + mov ebx,edi + xor esi,edx + shr ebx,16 + mov edx,edi + mov al,bh + and ebx,255 + mov cl,dh + and edx,255 + mov eax,DWORD PTR 72[eax*4+ebp] + mov ebx,DWORD PTR 1096[ebx*4+ebp] + add ebx,eax + mov eax,DWORD PTR 2120[ecx*4+ebp] + xor ebx,eax + mov edx,DWORD PTR 3144[edx*4+ebp] + add ebx,edx + xor eax,eax + xor esi,ebx + ; + ; Round 15 + mov edx,DWORD PTR 60[ebp] + mov ebx,esi + xor edi,edx + shr ebx,16 + mov edx,esi + mov al,bh + and ebx,255 + mov cl,dh + and edx,255 + mov eax,DWORD PTR 72[eax*4+ebp] + mov ebx,DWORD PTR 1096[ebx*4+ebp] + add ebx,eax + mov eax,DWORD PTR 2120[ecx*4+ebp] + xor ebx,eax + mov edx,DWORD PTR 3144[edx*4+ebp] + add ebx,edx + xor eax,eax + xor edi,ebx + ; + ; Round 14 + mov edx,DWORD PTR 56[ebp] + mov ebx,edi + xor esi,edx + shr ebx,16 + mov edx,edi + mov al,bh + and ebx,255 + mov cl,dh + and edx,255 + mov eax,DWORD PTR 72[eax*4+ebp] + mov ebx,DWORD PTR 1096[ebx*4+ebp] + add ebx,eax + mov eax,DWORD PTR 2120[ecx*4+ebp] + xor ebx,eax + mov edx,DWORD PTR 3144[edx*4+ebp] + add ebx,edx + xor eax,eax + xor esi,ebx + ; + ; Round 13 + mov edx,DWORD PTR 52[ebp] + mov ebx,esi + xor edi,edx + shr ebx,16 + mov edx,esi + mov al,bh + and ebx,255 + mov cl,dh + and edx,255 + mov eax,DWORD PTR 72[eax*4+ebp] + mov ebx,DWORD PTR 1096[ebx*4+ebp] + add ebx,eax + mov eax,DWORD PTR 2120[ecx*4+ebp] + xor ebx,eax + mov edx,DWORD PTR 3144[edx*4+ebp] + add ebx,edx + xor eax,eax + xor edi,ebx + ; + ; Round 12 + mov edx,DWORD PTR 48[ebp] + mov ebx,edi + xor esi,edx + shr ebx,16 + mov edx,edi + mov al,bh + and ebx,255 + mov cl,dh + and edx,255 + mov eax,DWORD PTR 72[eax*4+ebp] + mov ebx,DWORD PTR 1096[ebx*4+ebp] + add ebx,eax + mov eax,DWORD PTR 2120[ecx*4+ebp] + xor ebx,eax + mov edx,DWORD PTR 3144[edx*4+ebp] + add ebx,edx + xor eax,eax + xor esi,ebx + ; + ; Round 11 + mov edx,DWORD PTR 44[ebp] + mov ebx,esi + xor edi,edx + shr ebx,16 + mov edx,esi + mov al,bh + and ebx,255 + mov cl,dh + and edx,255 + mov eax,DWORD PTR 72[eax*4+ebp] + mov ebx,DWORD PTR 1096[ebx*4+ebp] + add ebx,eax + mov eax,DWORD PTR 2120[ecx*4+ebp] + xor ebx,eax + mov edx,DWORD PTR 3144[edx*4+ebp] + add ebx,edx + xor eax,eax + xor edi,ebx + ; + ; Round 10 + mov edx,DWORD PTR 40[ebp] + mov ebx,edi + xor esi,edx + shr ebx,16 + mov edx,edi + mov al,bh + and ebx,255 + mov cl,dh + and edx,255 + mov eax,DWORD PTR 72[eax*4+ebp] + mov ebx,DWORD PTR 1096[ebx*4+ebp] + add ebx,eax + mov eax,DWORD PTR 2120[ecx*4+ebp] + xor ebx,eax + mov edx,DWORD PTR 3144[edx*4+ebp] + add ebx,edx + xor eax,eax + xor esi,ebx + ; + ; Round 9 + mov edx,DWORD PTR 36[ebp] + mov ebx,esi + xor edi,edx + shr ebx,16 + mov edx,esi + mov al,bh + and ebx,255 + mov cl,dh + and edx,255 + mov eax,DWORD PTR 72[eax*4+ebp] + mov ebx,DWORD PTR 1096[ebx*4+ebp] + add ebx,eax + mov eax,DWORD PTR 2120[ecx*4+ebp] + xor ebx,eax + mov edx,DWORD PTR 3144[edx*4+ebp] + add ebx,edx + xor eax,eax + xor edi,ebx + ; + ; Round 8 + mov edx,DWORD PTR 32[ebp] + mov ebx,edi + xor esi,edx + shr ebx,16 + mov edx,edi + mov al,bh + and ebx,255 + mov cl,dh + and edx,255 + mov eax,DWORD PTR 72[eax*4+ebp] + mov ebx,DWORD PTR 1096[ebx*4+ebp] + add ebx,eax + mov eax,DWORD PTR 2120[ecx*4+ebp] + xor ebx,eax + mov edx,DWORD PTR 3144[edx*4+ebp] + add ebx,edx + xor eax,eax + xor esi,ebx + ; + ; Round 7 + mov edx,DWORD PTR 28[ebp] + mov ebx,esi + xor edi,edx + shr ebx,16 + mov edx,esi + mov al,bh + and ebx,255 + mov cl,dh + and edx,255 + mov eax,DWORD PTR 72[eax*4+ebp] + mov ebx,DWORD PTR 1096[ebx*4+ebp] + add ebx,eax + mov eax,DWORD PTR 2120[ecx*4+ebp] + xor ebx,eax + mov edx,DWORD PTR 3144[edx*4+ebp] + add ebx,edx + xor eax,eax + xor edi,ebx + ; + ; Round 6 + mov edx,DWORD PTR 24[ebp] + mov ebx,edi + xor esi,edx + shr ebx,16 + mov edx,edi + mov al,bh + and ebx,255 + mov cl,dh + and edx,255 + mov eax,DWORD PTR 72[eax*4+ebp] + mov ebx,DWORD PTR 1096[ebx*4+ebp] + add ebx,eax + mov eax,DWORD PTR 2120[ecx*4+ebp] + xor ebx,eax + mov edx,DWORD PTR 3144[edx*4+ebp] + add ebx,edx + xor eax,eax + xor esi,ebx + ; + ; Round 5 + mov edx,DWORD PTR 20[ebp] + mov ebx,esi + xor edi,edx + shr ebx,16 + mov edx,esi + mov al,bh + and ebx,255 + mov cl,dh + and edx,255 + mov eax,DWORD PTR 72[eax*4+ebp] + mov ebx,DWORD PTR 1096[ebx*4+ebp] + add ebx,eax + mov eax,DWORD PTR 2120[ecx*4+ebp] + xor ebx,eax + mov edx,DWORD PTR 3144[edx*4+ebp] + add ebx,edx + xor eax,eax + xor edi,ebx + ; + ; Round 4 + mov edx,DWORD PTR 16[ebp] + mov ebx,edi + xor esi,edx + shr ebx,16 + mov edx,edi + mov al,bh + and ebx,255 + mov cl,dh + and edx,255 + mov eax,DWORD PTR 72[eax*4+ebp] + mov ebx,DWORD PTR 1096[ebx*4+ebp] + add ebx,eax + mov eax,DWORD PTR 2120[ecx*4+ebp] + xor ebx,eax + mov edx,DWORD PTR 3144[edx*4+ebp] + add ebx,edx + xor eax,eax + xor esi,ebx + ; + ; Round 3 + mov edx,DWORD PTR 12[ebp] + mov ebx,esi + xor edi,edx + shr ebx,16 + mov edx,esi + mov al,bh + and ebx,255 + mov cl,dh + and edx,255 + mov eax,DWORD PTR 72[eax*4+ebp] + mov ebx,DWORD PTR 1096[ebx*4+ebp] + add ebx,eax + mov eax,DWORD PTR 2120[ecx*4+ebp] + xor ebx,eax + mov edx,DWORD PTR 3144[edx*4+ebp] + add ebx,edx + xor eax,eax + xor edi,ebx + ; + ; Round 2 + mov edx,DWORD PTR 8[ebp] + mov ebx,edi + xor esi,edx + shr ebx,16 + mov edx,edi + mov al,bh + and ebx,255 + mov cl,dh + and edx,255 + mov eax,DWORD PTR 72[eax*4+ebp] + mov ebx,DWORD PTR 1096[ebx*4+ebp] + add ebx,eax + mov eax,DWORD PTR 2120[ecx*4+ebp] + xor ebx,eax + mov edx,DWORD PTR 3144[edx*4+ebp] + add ebx,edx + xor eax,eax + xor esi,ebx + ; + ; Round 1 + mov edx,DWORD PTR 4[ebp] + mov ebx,esi + xor edi,edx + shr ebx,16 + mov edx,esi + mov al,bh + and ebx,255 + mov cl,dh + and edx,255 + mov eax,DWORD PTR 72[eax*4+ebp] + mov ebx,DWORD PTR 1096[ebx*4+ebp] + add ebx,eax + mov eax,DWORD PTR 2120[ecx*4+ebp] + xor ebx,eax + mov edx,DWORD PTR 3144[edx*4+ebp] + add ebx,edx + ; Load parameter 0 (1) enc=0 + mov eax,DWORD PTR 20[esp] + xor edi,ebx + mov edx,DWORD PTR [ebp] + xor esi,edx + mov DWORD PTR 4[eax],edi + mov DWORD PTR [eax],esi + pop edi + pop esi + pop ebx + pop ebp + ret +_BF_decrypt ENDP +ALIGN 16 +_BF_cbc_encrypt PROC PUBLIC +$L_BF_cbc_encrypt_begin:: + ; + push ebp + push ebx + push esi + push edi + mov ebp,DWORD PTR 28[esp] + ; getting iv ptr from parameter 4 + mov ebx,DWORD PTR 36[esp] + mov esi,DWORD PTR [ebx] + mov edi,DWORD PTR 4[ebx] + push edi + push esi + push edi + push esi + mov ebx,esp + mov esi,DWORD PTR 36[esp] + mov edi,DWORD PTR 40[esp] + ; getting encrypt flag from parameter 5 + mov ecx,DWORD PTR 56[esp] + ; get and push parameter 3 + mov eax,DWORD PTR 48[esp] + push eax + push ebx + cmp ecx,0 + jz $L000decrypt + and ebp,4294967288 + mov eax,DWORD PTR 8[esp] + mov ebx,DWORD PTR 12[esp] + jz $L001encrypt_finish +$L002encrypt_loop: + mov ecx,DWORD PTR [esi] + mov edx,DWORD PTR 4[esi] + xor eax,ecx + xor ebx,edx + bswap eax + bswap ebx + mov DWORD PTR 8[esp],eax + mov DWORD PTR 12[esp],ebx + call $L_BF_encrypt_begin + mov eax,DWORD PTR 8[esp] + mov ebx,DWORD PTR 12[esp] + bswap eax + bswap ebx + mov DWORD PTR [edi],eax + mov DWORD PTR 4[edi],ebx + add esi,8 + add edi,8 + sub ebp,8 + jnz $L002encrypt_loop +$L001encrypt_finish: + mov ebp,DWORD PTR 52[esp] + and ebp,7 + jz $L003finish + call $L004PIC_point +$L004PIC_point: + pop edx + lea ecx,DWORD PTR ($L005cbc_enc_jmp_table-$L004PIC_point)[edx] + mov ebp,DWORD PTR [ebp*4+ecx] + add ebp,edx + xor ecx,ecx + xor edx,edx + jmp ebp +$L006ej7: + mov dh,BYTE PTR 6[esi] + shl edx,8 +$L007ej6: + mov dh,BYTE PTR 5[esi] +$L008ej5: + mov dl,BYTE PTR 4[esi] +$L009ej4: + mov ecx,DWORD PTR [esi] + jmp $L010ejend +$L011ej3: + mov ch,BYTE PTR 2[esi] + shl ecx,8 +$L012ej2: + mov ch,BYTE PTR 1[esi] +$L013ej1: + mov cl,BYTE PTR [esi] +$L010ejend: + xor eax,ecx + xor ebx,edx + bswap eax + bswap ebx + mov DWORD PTR 8[esp],eax + mov DWORD PTR 12[esp],ebx + call $L_BF_encrypt_begin + mov eax,DWORD PTR 8[esp] + mov ebx,DWORD PTR 12[esp] + bswap eax + bswap ebx + mov DWORD PTR [edi],eax + mov DWORD PTR 4[edi],ebx + jmp $L003finish +$L000decrypt: + and ebp,4294967288 + mov eax,DWORD PTR 16[esp] + mov ebx,DWORD PTR 20[esp] + jz $L014decrypt_finish +$L015decrypt_loop: + mov eax,DWORD PTR [esi] + mov ebx,DWORD PTR 4[esi] + bswap eax + bswap ebx + mov DWORD PTR 8[esp],eax + mov DWORD PTR 12[esp],ebx + call $L_BF_decrypt_begin + mov eax,DWORD PTR 8[esp] + mov ebx,DWORD PTR 12[esp] + bswap eax + bswap ebx + mov ecx,DWORD PTR 16[esp] + mov edx,DWORD PTR 20[esp] + xor ecx,eax + xor edx,ebx + mov eax,DWORD PTR [esi] + mov ebx,DWORD PTR 4[esi] + mov DWORD PTR [edi],ecx + mov DWORD PTR 4[edi],edx + mov DWORD PTR 16[esp],eax + mov DWORD PTR 20[esp],ebx + add esi,8 + add edi,8 + sub ebp,8 + jnz $L015decrypt_loop +$L014decrypt_finish: + mov ebp,DWORD PTR 52[esp] + and ebp,7 + jz $L003finish + mov eax,DWORD PTR [esi] + mov ebx,DWORD PTR 4[esi] + bswap eax + bswap ebx + mov DWORD PTR 8[esp],eax + mov DWORD PTR 12[esp],ebx + call $L_BF_decrypt_begin + mov eax,DWORD PTR 8[esp] + mov ebx,DWORD PTR 12[esp] + bswap eax + bswap ebx + mov ecx,DWORD PTR 16[esp] + mov edx,DWORD PTR 20[esp] + xor ecx,eax + xor edx,ebx + mov eax,DWORD PTR [esi] + mov ebx,DWORD PTR 4[esi] +$L016dj7: + ror edx,16 + mov BYTE PTR 6[edi],dl + shr edx,16 +$L017dj6: + mov BYTE PTR 5[edi],dh +$L018dj5: + mov BYTE PTR 4[edi],dl +$L019dj4: + mov DWORD PTR [edi],ecx + jmp $L020djend +$L021dj3: + ror ecx,16 + mov BYTE PTR 2[edi],cl + shl ecx,16 +$L022dj2: + mov BYTE PTR 1[esi],ch +$L023dj1: + mov BYTE PTR [esi],cl +$L020djend: + jmp $L003finish +$L003finish: + mov ecx,DWORD PTR 60[esp] + add esp,24 + mov DWORD PTR [ecx],eax + mov DWORD PTR 4[ecx],ebx + pop edi + pop esi + pop ebx + pop ebp + ret +ALIGN 64 +$L005cbc_enc_jmp_table: +DD 0 +DD $L013ej1-$L004PIC_point +DD $L012ej2-$L004PIC_point +DD $L011ej3-$L004PIC_point +DD $L009ej4-$L004PIC_point +DD $L008ej5-$L004PIC_point +DD $L007ej6-$L004PIC_point +DD $L006ej7-$L004PIC_point +ALIGN 64 +_BF_cbc_encrypt ENDP +.text$ ENDS +END diff --git a/deps/openssl/asm_obsolete/x86-win32-masm/bn/bn-586.asm b/deps/openssl/asm_obsolete/x86-win32-masm/bn/bn-586.asm new file mode 100644 index 00000000000000..916ed888615f8b --- /dev/null +++ b/deps/openssl/asm_obsolete/x86-win32-masm/bn/bn-586.asm @@ -0,0 +1,1529 @@ +TITLE ../openssl/crypto/bn/asm/bn-586.asm +IF @Version LT 800 +ECHO MASM version 8.00 or later is strongly recommended. +ENDIF +.686 +.XMM +IF @Version LT 800 +XMMWORD STRUCT 16 +DQ 2 dup (?) +XMMWORD ENDS +ENDIF + +.MODEL FLAT +OPTION DOTNAME +IF @Version LT 800 +.text$ SEGMENT PAGE 'CODE' +ELSE +.text$ SEGMENT ALIGN(64) 'CODE' +ENDIF +;EXTERN _OPENSSL_ia32cap_P:NEAR +ALIGN 16 +_bn_mul_add_words PROC PUBLIC +$L_bn_mul_add_words_begin:: + lea eax,DWORD PTR _OPENSSL_ia32cap_P + bt DWORD PTR [eax],26 + jnc $L000maw_non_sse2 + mov eax,DWORD PTR 4[esp] + mov edx,DWORD PTR 8[esp] + mov ecx,DWORD PTR 12[esp] + movd mm0,DWORD PTR 16[esp] + pxor mm1,mm1 + jmp $L001maw_sse2_entry +ALIGN 16 +$L002maw_sse2_unrolled: + movd mm3,DWORD PTR [eax] + paddq mm1,mm3 + movd mm2,DWORD PTR [edx] + pmuludq mm2,mm0 + movd mm4,DWORD PTR 4[edx] + pmuludq mm4,mm0 + movd mm6,DWORD PTR 8[edx] + pmuludq mm6,mm0 + movd mm7,DWORD PTR 12[edx] + pmuludq mm7,mm0 + paddq mm1,mm2 + movd mm3,DWORD PTR 4[eax] + paddq mm3,mm4 + movd mm5,DWORD PTR 8[eax] + paddq mm5,mm6 + movd mm4,DWORD PTR 12[eax] + paddq mm7,mm4 + movd DWORD PTR [eax],mm1 + movd mm2,DWORD PTR 16[edx] + pmuludq mm2,mm0 + psrlq mm1,32 + movd mm4,DWORD PTR 20[edx] + pmuludq mm4,mm0 + paddq mm1,mm3 + movd mm6,DWORD PTR 24[edx] + pmuludq mm6,mm0 + movd DWORD PTR 4[eax],mm1 + psrlq mm1,32 + movd mm3,DWORD PTR 28[edx] + add edx,32 + pmuludq mm3,mm0 + paddq mm1,mm5 + movd mm5,DWORD PTR 16[eax] + paddq mm2,mm5 + movd DWORD PTR 8[eax],mm1 + psrlq mm1,32 + paddq mm1,mm7 + movd mm5,DWORD PTR 20[eax] + paddq mm4,mm5 + movd DWORD PTR 12[eax],mm1 + psrlq mm1,32 + paddq mm1,mm2 + movd mm5,DWORD PTR 24[eax] + paddq mm6,mm5 + movd DWORD PTR 16[eax],mm1 + psrlq mm1,32 + paddq mm1,mm4 + movd mm5,DWORD PTR 28[eax] + paddq mm3,mm5 + movd DWORD PTR 20[eax],mm1 + psrlq mm1,32 + paddq mm1,mm6 + movd DWORD PTR 24[eax],mm1 + psrlq mm1,32 + paddq mm1,mm3 + movd DWORD PTR 28[eax],mm1 + lea eax,DWORD PTR 32[eax] + psrlq mm1,32 + sub ecx,8 + jz $L003maw_sse2_exit +$L001maw_sse2_entry: + test ecx,4294967288 + jnz $L002maw_sse2_unrolled +ALIGN 4 +$L004maw_sse2_loop: + movd mm2,DWORD PTR [edx] + movd mm3,DWORD PTR [eax] + pmuludq mm2,mm0 + lea edx,DWORD PTR 4[edx] + paddq mm1,mm3 + paddq mm1,mm2 + movd DWORD PTR [eax],mm1 + sub ecx,1 + psrlq mm1,32 + lea eax,DWORD PTR 4[eax] + jnz $L004maw_sse2_loop +$L003maw_sse2_exit: + movd eax,mm1 + emms + ret +ALIGN 16 +$L000maw_non_sse2: + push ebp + push ebx + push esi + push edi + ; + xor esi,esi + mov edi,DWORD PTR 20[esp] + mov ecx,DWORD PTR 28[esp] + mov ebx,DWORD PTR 24[esp] + and ecx,4294967288 + mov ebp,DWORD PTR 32[esp] + push ecx + jz $L005maw_finish +ALIGN 16 +$L006maw_loop: + ; Round 0 + mov eax,DWORD PTR [ebx] + mul ebp + add eax,esi + adc edx,0 + add eax,DWORD PTR [edi] + adc edx,0 + mov DWORD PTR [edi],eax + mov esi,edx + ; Round 4 + mov eax,DWORD PTR 4[ebx] + mul ebp + add eax,esi + adc edx,0 + add eax,DWORD PTR 4[edi] + adc edx,0 + mov DWORD PTR 4[edi],eax + mov esi,edx + ; Round 8 + mov eax,DWORD PTR 8[ebx] + mul ebp + add eax,esi + adc edx,0 + add eax,DWORD PTR 8[edi] + adc edx,0 + mov DWORD PTR 8[edi],eax + mov esi,edx + ; Round 12 + mov eax,DWORD PTR 12[ebx] + mul ebp + add eax,esi + adc edx,0 + add eax,DWORD PTR 12[edi] + adc edx,0 + mov DWORD PTR 12[edi],eax + mov esi,edx + ; Round 16 + mov eax,DWORD PTR 16[ebx] + mul ebp + add eax,esi + adc edx,0 + add eax,DWORD PTR 16[edi] + adc edx,0 + mov DWORD PTR 16[edi],eax + mov esi,edx + ; Round 20 + mov eax,DWORD PTR 20[ebx] + mul ebp + add eax,esi + adc edx,0 + add eax,DWORD PTR 20[edi] + adc edx,0 + mov DWORD PTR 20[edi],eax + mov esi,edx + ; Round 24 + mov eax,DWORD PTR 24[ebx] + mul ebp + add eax,esi + adc edx,0 + add eax,DWORD PTR 24[edi] + adc edx,0 + mov DWORD PTR 24[edi],eax + mov esi,edx + ; Round 28 + mov eax,DWORD PTR 28[ebx] + mul ebp + add eax,esi + adc edx,0 + add eax,DWORD PTR 28[edi] + adc edx,0 + mov DWORD PTR 28[edi],eax + mov esi,edx + ; + sub ecx,8 + lea ebx,DWORD PTR 32[ebx] + lea edi,DWORD PTR 32[edi] + jnz $L006maw_loop +$L005maw_finish: + mov ecx,DWORD PTR 32[esp] + and ecx,7 + jnz $L007maw_finish2 + jmp $L008maw_end +$L007maw_finish2: + ; Tail Round 0 + mov eax,DWORD PTR [ebx] + mul ebp + add eax,esi + adc edx,0 + add eax,DWORD PTR [edi] + adc edx,0 + dec ecx + mov DWORD PTR [edi],eax + mov esi,edx + jz $L008maw_end + ; Tail Round 1 + mov eax,DWORD PTR 4[ebx] + mul ebp + add eax,esi + adc edx,0 + add eax,DWORD PTR 4[edi] + adc edx,0 + dec ecx + mov DWORD PTR 4[edi],eax + mov esi,edx + jz $L008maw_end + ; Tail Round 2 + mov eax,DWORD PTR 8[ebx] + mul ebp + add eax,esi + adc edx,0 + add eax,DWORD PTR 8[edi] + adc edx,0 + dec ecx + mov DWORD PTR 8[edi],eax + mov esi,edx + jz $L008maw_end + ; Tail Round 3 + mov eax,DWORD PTR 12[ebx] + mul ebp + add eax,esi + adc edx,0 + add eax,DWORD PTR 12[edi] + adc edx,0 + dec ecx + mov DWORD PTR 12[edi],eax + mov esi,edx + jz $L008maw_end + ; Tail Round 4 + mov eax,DWORD PTR 16[ebx] + mul ebp + add eax,esi + adc edx,0 + add eax,DWORD PTR 16[edi] + adc edx,0 + dec ecx + mov DWORD PTR 16[edi],eax + mov esi,edx + jz $L008maw_end + ; Tail Round 5 + mov eax,DWORD PTR 20[ebx] + mul ebp + add eax,esi + adc edx,0 + add eax,DWORD PTR 20[edi] + adc edx,0 + dec ecx + mov DWORD PTR 20[edi],eax + mov esi,edx + jz $L008maw_end + ; Tail Round 6 + mov eax,DWORD PTR 24[ebx] + mul ebp + add eax,esi + adc edx,0 + add eax,DWORD PTR 24[edi] + adc edx,0 + mov DWORD PTR 24[edi],eax + mov esi,edx +$L008maw_end: + mov eax,esi + pop ecx + pop edi + pop esi + pop ebx + pop ebp + ret +_bn_mul_add_words ENDP +ALIGN 16 +_bn_mul_words PROC PUBLIC +$L_bn_mul_words_begin:: + lea eax,DWORD PTR _OPENSSL_ia32cap_P + bt DWORD PTR [eax],26 + jnc $L009mw_non_sse2 + mov eax,DWORD PTR 4[esp] + mov edx,DWORD PTR 8[esp] + mov ecx,DWORD PTR 12[esp] + movd mm0,DWORD PTR 16[esp] + pxor mm1,mm1 +ALIGN 16 +$L010mw_sse2_loop: + movd mm2,DWORD PTR [edx] + pmuludq mm2,mm0 + lea edx,DWORD PTR 4[edx] + paddq mm1,mm2 + movd DWORD PTR [eax],mm1 + sub ecx,1 + psrlq mm1,32 + lea eax,DWORD PTR 4[eax] + jnz $L010mw_sse2_loop + movd eax,mm1 + emms + ret +ALIGN 16 +$L009mw_non_sse2: + push ebp + push ebx + push esi + push edi + ; + xor esi,esi + mov edi,DWORD PTR 20[esp] + mov ebx,DWORD PTR 24[esp] + mov ebp,DWORD PTR 28[esp] + mov ecx,DWORD PTR 32[esp] + and ebp,4294967288 + jz $L011mw_finish +$L012mw_loop: + ; Round 0 + mov eax,DWORD PTR [ebx] + mul ecx + add eax,esi + adc edx,0 + mov DWORD PTR [edi],eax + mov esi,edx + ; Round 4 + mov eax,DWORD PTR 4[ebx] + mul ecx + add eax,esi + adc edx,0 + mov DWORD PTR 4[edi],eax + mov esi,edx + ; Round 8 + mov eax,DWORD PTR 8[ebx] + mul ecx + add eax,esi + adc edx,0 + mov DWORD PTR 8[edi],eax + mov esi,edx + ; Round 12 + mov eax,DWORD PTR 12[ebx] + mul ecx + add eax,esi + adc edx,0 + mov DWORD PTR 12[edi],eax + mov esi,edx + ; Round 16 + mov eax,DWORD PTR 16[ebx] + mul ecx + add eax,esi + adc edx,0 + mov DWORD PTR 16[edi],eax + mov esi,edx + ; Round 20 + mov eax,DWORD PTR 20[ebx] + mul ecx + add eax,esi + adc edx,0 + mov DWORD PTR 20[edi],eax + mov esi,edx + ; Round 24 + mov eax,DWORD PTR 24[ebx] + mul ecx + add eax,esi + adc edx,0 + mov DWORD PTR 24[edi],eax + mov esi,edx + ; Round 28 + mov eax,DWORD PTR 28[ebx] + mul ecx + add eax,esi + adc edx,0 + mov DWORD PTR 28[edi],eax + mov esi,edx + ; + add ebx,32 + add edi,32 + sub ebp,8 + jz $L011mw_finish + jmp $L012mw_loop +$L011mw_finish: + mov ebp,DWORD PTR 28[esp] + and ebp,7 + jnz $L013mw_finish2 + jmp $L014mw_end +$L013mw_finish2: + ; Tail Round 0 + mov eax,DWORD PTR [ebx] + mul ecx + add eax,esi + adc edx,0 + mov DWORD PTR [edi],eax + mov esi,edx + dec ebp + jz $L014mw_end + ; Tail Round 1 + mov eax,DWORD PTR 4[ebx] + mul ecx + add eax,esi + adc edx,0 + mov DWORD PTR 4[edi],eax + mov esi,edx + dec ebp + jz $L014mw_end + ; Tail Round 2 + mov eax,DWORD PTR 8[ebx] + mul ecx + add eax,esi + adc edx,0 + mov DWORD PTR 8[edi],eax + mov esi,edx + dec ebp + jz $L014mw_end + ; Tail Round 3 + mov eax,DWORD PTR 12[ebx] + mul ecx + add eax,esi + adc edx,0 + mov DWORD PTR 12[edi],eax + mov esi,edx + dec ebp + jz $L014mw_end + ; Tail Round 4 + mov eax,DWORD PTR 16[ebx] + mul ecx + add eax,esi + adc edx,0 + mov DWORD PTR 16[edi],eax + mov esi,edx + dec ebp + jz $L014mw_end + ; Tail Round 5 + mov eax,DWORD PTR 20[ebx] + mul ecx + add eax,esi + adc edx,0 + mov DWORD PTR 20[edi],eax + mov esi,edx + dec ebp + jz $L014mw_end + ; Tail Round 6 + mov eax,DWORD PTR 24[ebx] + mul ecx + add eax,esi + adc edx,0 + mov DWORD PTR 24[edi],eax + mov esi,edx +$L014mw_end: + mov eax,esi + pop edi + pop esi + pop ebx + pop ebp + ret +_bn_mul_words ENDP +ALIGN 16 +_bn_sqr_words PROC PUBLIC +$L_bn_sqr_words_begin:: + lea eax,DWORD PTR _OPENSSL_ia32cap_P + bt DWORD PTR [eax],26 + jnc $L015sqr_non_sse2 + mov eax,DWORD PTR 4[esp] + mov edx,DWORD PTR 8[esp] + mov ecx,DWORD PTR 12[esp] +ALIGN 16 +$L016sqr_sse2_loop: + movd mm0,DWORD PTR [edx] + pmuludq mm0,mm0 + lea edx,DWORD PTR 4[edx] + movq QWORD PTR [eax],mm0 + sub ecx,1 + lea eax,DWORD PTR 8[eax] + jnz $L016sqr_sse2_loop + emms + ret +ALIGN 16 +$L015sqr_non_sse2: + push ebp + push ebx + push esi + push edi + ; + mov esi,DWORD PTR 20[esp] + mov edi,DWORD PTR 24[esp] + mov ebx,DWORD PTR 28[esp] + and ebx,4294967288 + jz $L017sw_finish +$L018sw_loop: + ; Round 0 + mov eax,DWORD PTR [edi] + mul eax + mov DWORD PTR [esi],eax + mov DWORD PTR 4[esi],edx + ; Round 4 + mov eax,DWORD PTR 4[edi] + mul eax + mov DWORD PTR 8[esi],eax + mov DWORD PTR 12[esi],edx + ; Round 8 + mov eax,DWORD PTR 8[edi] + mul eax + mov DWORD PTR 16[esi],eax + mov DWORD PTR 20[esi],edx + ; Round 12 + mov eax,DWORD PTR 12[edi] + mul eax + mov DWORD PTR 24[esi],eax + mov DWORD PTR 28[esi],edx + ; Round 16 + mov eax,DWORD PTR 16[edi] + mul eax + mov DWORD PTR 32[esi],eax + mov DWORD PTR 36[esi],edx + ; Round 20 + mov eax,DWORD PTR 20[edi] + mul eax + mov DWORD PTR 40[esi],eax + mov DWORD PTR 44[esi],edx + ; Round 24 + mov eax,DWORD PTR 24[edi] + mul eax + mov DWORD PTR 48[esi],eax + mov DWORD PTR 52[esi],edx + ; Round 28 + mov eax,DWORD PTR 28[edi] + mul eax + mov DWORD PTR 56[esi],eax + mov DWORD PTR 60[esi],edx + ; + add edi,32 + add esi,64 + sub ebx,8 + jnz $L018sw_loop +$L017sw_finish: + mov ebx,DWORD PTR 28[esp] + and ebx,7 + jz $L019sw_end + ; Tail Round 0 + mov eax,DWORD PTR [edi] + mul eax + mov DWORD PTR [esi],eax + dec ebx + mov DWORD PTR 4[esi],edx + jz $L019sw_end + ; Tail Round 1 + mov eax,DWORD PTR 4[edi] + mul eax + mov DWORD PTR 8[esi],eax + dec ebx + mov DWORD PTR 12[esi],edx + jz $L019sw_end + ; Tail Round 2 + mov eax,DWORD PTR 8[edi] + mul eax + mov DWORD PTR 16[esi],eax + dec ebx + mov DWORD PTR 20[esi],edx + jz $L019sw_end + ; Tail Round 3 + mov eax,DWORD PTR 12[edi] + mul eax + mov DWORD PTR 24[esi],eax + dec ebx + mov DWORD PTR 28[esi],edx + jz $L019sw_end + ; Tail Round 4 + mov eax,DWORD PTR 16[edi] + mul eax + mov DWORD PTR 32[esi],eax + dec ebx + mov DWORD PTR 36[esi],edx + jz $L019sw_end + ; Tail Round 5 + mov eax,DWORD PTR 20[edi] + mul eax + mov DWORD PTR 40[esi],eax + dec ebx + mov DWORD PTR 44[esi],edx + jz $L019sw_end + ; Tail Round 6 + mov eax,DWORD PTR 24[edi] + mul eax + mov DWORD PTR 48[esi],eax + mov DWORD PTR 52[esi],edx +$L019sw_end: + pop edi + pop esi + pop ebx + pop ebp + ret +_bn_sqr_words ENDP +ALIGN 16 +_bn_div_words PROC PUBLIC +$L_bn_div_words_begin:: + mov edx,DWORD PTR 4[esp] + mov eax,DWORD PTR 8[esp] + mov ecx,DWORD PTR 12[esp] + div ecx + ret +_bn_div_words ENDP +ALIGN 16 +_bn_add_words PROC PUBLIC +$L_bn_add_words_begin:: + push ebp + push ebx + push esi + push edi + ; + mov ebx,DWORD PTR 20[esp] + mov esi,DWORD PTR 24[esp] + mov edi,DWORD PTR 28[esp] + mov ebp,DWORD PTR 32[esp] + xor eax,eax + and ebp,4294967288 + jz $L020aw_finish +$L021aw_loop: + ; Round 0 + mov ecx,DWORD PTR [esi] + mov edx,DWORD PTR [edi] + add ecx,eax + mov eax,0 + adc eax,eax + add ecx,edx + adc eax,0 + mov DWORD PTR [ebx],ecx + ; Round 1 + mov ecx,DWORD PTR 4[esi] + mov edx,DWORD PTR 4[edi] + add ecx,eax + mov eax,0 + adc eax,eax + add ecx,edx + adc eax,0 + mov DWORD PTR 4[ebx],ecx + ; Round 2 + mov ecx,DWORD PTR 8[esi] + mov edx,DWORD PTR 8[edi] + add ecx,eax + mov eax,0 + adc eax,eax + add ecx,edx + adc eax,0 + mov DWORD PTR 8[ebx],ecx + ; Round 3 + mov ecx,DWORD PTR 12[esi] + mov edx,DWORD PTR 12[edi] + add ecx,eax + mov eax,0 + adc eax,eax + add ecx,edx + adc eax,0 + mov DWORD PTR 12[ebx],ecx + ; Round 4 + mov ecx,DWORD PTR 16[esi] + mov edx,DWORD PTR 16[edi] + add ecx,eax + mov eax,0 + adc eax,eax + add ecx,edx + adc eax,0 + mov DWORD PTR 16[ebx],ecx + ; Round 5 + mov ecx,DWORD PTR 20[esi] + mov edx,DWORD PTR 20[edi] + add ecx,eax + mov eax,0 + adc eax,eax + add ecx,edx + adc eax,0 + mov DWORD PTR 20[ebx],ecx + ; Round 6 + mov ecx,DWORD PTR 24[esi] + mov edx,DWORD PTR 24[edi] + add ecx,eax + mov eax,0 + adc eax,eax + add ecx,edx + adc eax,0 + mov DWORD PTR 24[ebx],ecx + ; Round 7 + mov ecx,DWORD PTR 28[esi] + mov edx,DWORD PTR 28[edi] + add ecx,eax + mov eax,0 + adc eax,eax + add ecx,edx + adc eax,0 + mov DWORD PTR 28[ebx],ecx + ; + add esi,32 + add edi,32 + add ebx,32 + sub ebp,8 + jnz $L021aw_loop +$L020aw_finish: + mov ebp,DWORD PTR 32[esp] + and ebp,7 + jz $L022aw_end + ; Tail Round 0 + mov ecx,DWORD PTR [esi] + mov edx,DWORD PTR [edi] + add ecx,eax + mov eax,0 + adc eax,eax + add ecx,edx + adc eax,0 + dec ebp + mov DWORD PTR [ebx],ecx + jz $L022aw_end + ; Tail Round 1 + mov ecx,DWORD PTR 4[esi] + mov edx,DWORD PTR 4[edi] + add ecx,eax + mov eax,0 + adc eax,eax + add ecx,edx + adc eax,0 + dec ebp + mov DWORD PTR 4[ebx],ecx + jz $L022aw_end + ; Tail Round 2 + mov ecx,DWORD PTR 8[esi] + mov edx,DWORD PTR 8[edi] + add ecx,eax + mov eax,0 + adc eax,eax + add ecx,edx + adc eax,0 + dec ebp + mov DWORD PTR 8[ebx],ecx + jz $L022aw_end + ; Tail Round 3 + mov ecx,DWORD PTR 12[esi] + mov edx,DWORD PTR 12[edi] + add ecx,eax + mov eax,0 + adc eax,eax + add ecx,edx + adc eax,0 + dec ebp + mov DWORD PTR 12[ebx],ecx + jz $L022aw_end + ; Tail Round 4 + mov ecx,DWORD PTR 16[esi] + mov edx,DWORD PTR 16[edi] + add ecx,eax + mov eax,0 + adc eax,eax + add ecx,edx + adc eax,0 + dec ebp + mov DWORD PTR 16[ebx],ecx + jz $L022aw_end + ; Tail Round 5 + mov ecx,DWORD PTR 20[esi] + mov edx,DWORD PTR 20[edi] + add ecx,eax + mov eax,0 + adc eax,eax + add ecx,edx + adc eax,0 + dec ebp + mov DWORD PTR 20[ebx],ecx + jz $L022aw_end + ; Tail Round 6 + mov ecx,DWORD PTR 24[esi] + mov edx,DWORD PTR 24[edi] + add ecx,eax + mov eax,0 + adc eax,eax + add ecx,edx + adc eax,0 + mov DWORD PTR 24[ebx],ecx +$L022aw_end: + pop edi + pop esi + pop ebx + pop ebp + ret +_bn_add_words ENDP +ALIGN 16 +_bn_sub_words PROC PUBLIC +$L_bn_sub_words_begin:: + push ebp + push ebx + push esi + push edi + ; + mov ebx,DWORD PTR 20[esp] + mov esi,DWORD PTR 24[esp] + mov edi,DWORD PTR 28[esp] + mov ebp,DWORD PTR 32[esp] + xor eax,eax + and ebp,4294967288 + jz $L023aw_finish +$L024aw_loop: + ; Round 0 + mov ecx,DWORD PTR [esi] + mov edx,DWORD PTR [edi] + sub ecx,eax + mov eax,0 + adc eax,eax + sub ecx,edx + adc eax,0 + mov DWORD PTR [ebx],ecx + ; Round 1 + mov ecx,DWORD PTR 4[esi] + mov edx,DWORD PTR 4[edi] + sub ecx,eax + mov eax,0 + adc eax,eax + sub ecx,edx + adc eax,0 + mov DWORD PTR 4[ebx],ecx + ; Round 2 + mov ecx,DWORD PTR 8[esi] + mov edx,DWORD PTR 8[edi] + sub ecx,eax + mov eax,0 + adc eax,eax + sub ecx,edx + adc eax,0 + mov DWORD PTR 8[ebx],ecx + ; Round 3 + mov ecx,DWORD PTR 12[esi] + mov edx,DWORD PTR 12[edi] + sub ecx,eax + mov eax,0 + adc eax,eax + sub ecx,edx + adc eax,0 + mov DWORD PTR 12[ebx],ecx + ; Round 4 + mov ecx,DWORD PTR 16[esi] + mov edx,DWORD PTR 16[edi] + sub ecx,eax + mov eax,0 + adc eax,eax + sub ecx,edx + adc eax,0 + mov DWORD PTR 16[ebx],ecx + ; Round 5 + mov ecx,DWORD PTR 20[esi] + mov edx,DWORD PTR 20[edi] + sub ecx,eax + mov eax,0 + adc eax,eax + sub ecx,edx + adc eax,0 + mov DWORD PTR 20[ebx],ecx + ; Round 6 + mov ecx,DWORD PTR 24[esi] + mov edx,DWORD PTR 24[edi] + sub ecx,eax + mov eax,0 + adc eax,eax + sub ecx,edx + adc eax,0 + mov DWORD PTR 24[ebx],ecx + ; Round 7 + mov ecx,DWORD PTR 28[esi] + mov edx,DWORD PTR 28[edi] + sub ecx,eax + mov eax,0 + adc eax,eax + sub ecx,edx + adc eax,0 + mov DWORD PTR 28[ebx],ecx + ; + add esi,32 + add edi,32 + add ebx,32 + sub ebp,8 + jnz $L024aw_loop +$L023aw_finish: + mov ebp,DWORD PTR 32[esp] + and ebp,7 + jz $L025aw_end + ; Tail Round 0 + mov ecx,DWORD PTR [esi] + mov edx,DWORD PTR [edi] + sub ecx,eax + mov eax,0 + adc eax,eax + sub ecx,edx + adc eax,0 + dec ebp + mov DWORD PTR [ebx],ecx + jz $L025aw_end + ; Tail Round 1 + mov ecx,DWORD PTR 4[esi] + mov edx,DWORD PTR 4[edi] + sub ecx,eax + mov eax,0 + adc eax,eax + sub ecx,edx + adc eax,0 + dec ebp + mov DWORD PTR 4[ebx],ecx + jz $L025aw_end + ; Tail Round 2 + mov ecx,DWORD PTR 8[esi] + mov edx,DWORD PTR 8[edi] + sub ecx,eax + mov eax,0 + adc eax,eax + sub ecx,edx + adc eax,0 + dec ebp + mov DWORD PTR 8[ebx],ecx + jz $L025aw_end + ; Tail Round 3 + mov ecx,DWORD PTR 12[esi] + mov edx,DWORD PTR 12[edi] + sub ecx,eax + mov eax,0 + adc eax,eax + sub ecx,edx + adc eax,0 + dec ebp + mov DWORD PTR 12[ebx],ecx + jz $L025aw_end + ; Tail Round 4 + mov ecx,DWORD PTR 16[esi] + mov edx,DWORD PTR 16[edi] + sub ecx,eax + mov eax,0 + adc eax,eax + sub ecx,edx + adc eax,0 + dec ebp + mov DWORD PTR 16[ebx],ecx + jz $L025aw_end + ; Tail Round 5 + mov ecx,DWORD PTR 20[esi] + mov edx,DWORD PTR 20[edi] + sub ecx,eax + mov eax,0 + adc eax,eax + sub ecx,edx + adc eax,0 + dec ebp + mov DWORD PTR 20[ebx],ecx + jz $L025aw_end + ; Tail Round 6 + mov ecx,DWORD PTR 24[esi] + mov edx,DWORD PTR 24[edi] + sub ecx,eax + mov eax,0 + adc eax,eax + sub ecx,edx + adc eax,0 + mov DWORD PTR 24[ebx],ecx +$L025aw_end: + pop edi + pop esi + pop ebx + pop ebp + ret +_bn_sub_words ENDP +ALIGN 16 +_bn_sub_part_words PROC PUBLIC +$L_bn_sub_part_words_begin:: + push ebp + push ebx + push esi + push edi + ; + mov ebx,DWORD PTR 20[esp] + mov esi,DWORD PTR 24[esp] + mov edi,DWORD PTR 28[esp] + mov ebp,DWORD PTR 32[esp] + xor eax,eax + and ebp,4294967288 + jz $L026aw_finish +$L027aw_loop: + ; Round 0 + mov ecx,DWORD PTR [esi] + mov edx,DWORD PTR [edi] + sub ecx,eax + mov eax,0 + adc eax,eax + sub ecx,edx + adc eax,0 + mov DWORD PTR [ebx],ecx + ; Round 1 + mov ecx,DWORD PTR 4[esi] + mov edx,DWORD PTR 4[edi] + sub ecx,eax + mov eax,0 + adc eax,eax + sub ecx,edx + adc eax,0 + mov DWORD PTR 4[ebx],ecx + ; Round 2 + mov ecx,DWORD PTR 8[esi] + mov edx,DWORD PTR 8[edi] + sub ecx,eax + mov eax,0 + adc eax,eax + sub ecx,edx + adc eax,0 + mov DWORD PTR 8[ebx],ecx + ; Round 3 + mov ecx,DWORD PTR 12[esi] + mov edx,DWORD PTR 12[edi] + sub ecx,eax + mov eax,0 + adc eax,eax + sub ecx,edx + adc eax,0 + mov DWORD PTR 12[ebx],ecx + ; Round 4 + mov ecx,DWORD PTR 16[esi] + mov edx,DWORD PTR 16[edi] + sub ecx,eax + mov eax,0 + adc eax,eax + sub ecx,edx + adc eax,0 + mov DWORD PTR 16[ebx],ecx + ; Round 5 + mov ecx,DWORD PTR 20[esi] + mov edx,DWORD PTR 20[edi] + sub ecx,eax + mov eax,0 + adc eax,eax + sub ecx,edx + adc eax,0 + mov DWORD PTR 20[ebx],ecx + ; Round 6 + mov ecx,DWORD PTR 24[esi] + mov edx,DWORD PTR 24[edi] + sub ecx,eax + mov eax,0 + adc eax,eax + sub ecx,edx + adc eax,0 + mov DWORD PTR 24[ebx],ecx + ; Round 7 + mov ecx,DWORD PTR 28[esi] + mov edx,DWORD PTR 28[edi] + sub ecx,eax + mov eax,0 + adc eax,eax + sub ecx,edx + adc eax,0 + mov DWORD PTR 28[ebx],ecx + ; + add esi,32 + add edi,32 + add ebx,32 + sub ebp,8 + jnz $L027aw_loop +$L026aw_finish: + mov ebp,DWORD PTR 32[esp] + and ebp,7 + jz $L028aw_end + ; Tail Round 0 + mov ecx,DWORD PTR [esi] + mov edx,DWORD PTR [edi] + sub ecx,eax + mov eax,0 + adc eax,eax + sub ecx,edx + adc eax,0 + mov DWORD PTR [ebx],ecx + add esi,4 + add edi,4 + add ebx,4 + dec ebp + jz $L028aw_end + ; Tail Round 1 + mov ecx,DWORD PTR [esi] + mov edx,DWORD PTR [edi] + sub ecx,eax + mov eax,0 + adc eax,eax + sub ecx,edx + adc eax,0 + mov DWORD PTR [ebx],ecx + add esi,4 + add edi,4 + add ebx,4 + dec ebp + jz $L028aw_end + ; Tail Round 2 + mov ecx,DWORD PTR [esi] + mov edx,DWORD PTR [edi] + sub ecx,eax + mov eax,0 + adc eax,eax + sub ecx,edx + adc eax,0 + mov DWORD PTR [ebx],ecx + add esi,4 + add edi,4 + add ebx,4 + dec ebp + jz $L028aw_end + ; Tail Round 3 + mov ecx,DWORD PTR [esi] + mov edx,DWORD PTR [edi] + sub ecx,eax + mov eax,0 + adc eax,eax + sub ecx,edx + adc eax,0 + mov DWORD PTR [ebx],ecx + add esi,4 + add edi,4 + add ebx,4 + dec ebp + jz $L028aw_end + ; Tail Round 4 + mov ecx,DWORD PTR [esi] + mov edx,DWORD PTR [edi] + sub ecx,eax + mov eax,0 + adc eax,eax + sub ecx,edx + adc eax,0 + mov DWORD PTR [ebx],ecx + add esi,4 + add edi,4 + add ebx,4 + dec ebp + jz $L028aw_end + ; Tail Round 5 + mov ecx,DWORD PTR [esi] + mov edx,DWORD PTR [edi] + sub ecx,eax + mov eax,0 + adc eax,eax + sub ecx,edx + adc eax,0 + mov DWORD PTR [ebx],ecx + add esi,4 + add edi,4 + add ebx,4 + dec ebp + jz $L028aw_end + ; Tail Round 6 + mov ecx,DWORD PTR [esi] + mov edx,DWORD PTR [edi] + sub ecx,eax + mov eax,0 + adc eax,eax + sub ecx,edx + adc eax,0 + mov DWORD PTR [ebx],ecx + add esi,4 + add edi,4 + add ebx,4 +$L028aw_end: + cmp DWORD PTR 36[esp],0 + je $L029pw_end + mov ebp,DWORD PTR 36[esp] + cmp ebp,0 + je $L029pw_end + jge $L030pw_pos + ; pw_neg + mov edx,0 + sub edx,ebp + mov ebp,edx + and ebp,4294967288 + jz $L031pw_neg_finish +$L032pw_neg_loop: + ; dl<0 Round 0 + mov ecx,0 + mov edx,DWORD PTR [edi] + sub ecx,eax + mov eax,0 + adc eax,eax + sub ecx,edx + adc eax,0 + mov DWORD PTR [ebx],ecx + ; dl<0 Round 1 + mov ecx,0 + mov edx,DWORD PTR 4[edi] + sub ecx,eax + mov eax,0 + adc eax,eax + sub ecx,edx + adc eax,0 + mov DWORD PTR 4[ebx],ecx + ; dl<0 Round 2 + mov ecx,0 + mov edx,DWORD PTR 8[edi] + sub ecx,eax + mov eax,0 + adc eax,eax + sub ecx,edx + adc eax,0 + mov DWORD PTR 8[ebx],ecx + ; dl<0 Round 3 + mov ecx,0 + mov edx,DWORD PTR 12[edi] + sub ecx,eax + mov eax,0 + adc eax,eax + sub ecx,edx + adc eax,0 + mov DWORD PTR 12[ebx],ecx + ; dl<0 Round 4 + mov ecx,0 + mov edx,DWORD PTR 16[edi] + sub ecx,eax + mov eax,0 + adc eax,eax + sub ecx,edx + adc eax,0 + mov DWORD PTR 16[ebx],ecx + ; dl<0 Round 5 + mov ecx,0 + mov edx,DWORD PTR 20[edi] + sub ecx,eax + mov eax,0 + adc eax,eax + sub ecx,edx + adc eax,0 + mov DWORD PTR 20[ebx],ecx + ; dl<0 Round 6 + mov ecx,0 + mov edx,DWORD PTR 24[edi] + sub ecx,eax + mov eax,0 + adc eax,eax + sub ecx,edx + adc eax,0 + mov DWORD PTR 24[ebx],ecx + ; dl<0 Round 7 + mov ecx,0 + mov edx,DWORD PTR 28[edi] + sub ecx,eax + mov eax,0 + adc eax,eax + sub ecx,edx + adc eax,0 + mov DWORD PTR 28[ebx],ecx + ; + add edi,32 + add ebx,32 + sub ebp,8 + jnz $L032pw_neg_loop +$L031pw_neg_finish: + mov edx,DWORD PTR 36[esp] + mov ebp,0 + sub ebp,edx + and ebp,7 + jz $L029pw_end + ; dl<0 Tail Round 0 + mov ecx,0 + mov edx,DWORD PTR [edi] + sub ecx,eax + mov eax,0 + adc eax,eax + sub ecx,edx + adc eax,0 + dec ebp + mov DWORD PTR [ebx],ecx + jz $L029pw_end + ; dl<0 Tail Round 1 + mov ecx,0 + mov edx,DWORD PTR 4[edi] + sub ecx,eax + mov eax,0 + adc eax,eax + sub ecx,edx + adc eax,0 + dec ebp + mov DWORD PTR 4[ebx],ecx + jz $L029pw_end + ; dl<0 Tail Round 2 + mov ecx,0 + mov edx,DWORD PTR 8[edi] + sub ecx,eax + mov eax,0 + adc eax,eax + sub ecx,edx + adc eax,0 + dec ebp + mov DWORD PTR 8[ebx],ecx + jz $L029pw_end + ; dl<0 Tail Round 3 + mov ecx,0 + mov edx,DWORD PTR 12[edi] + sub ecx,eax + mov eax,0 + adc eax,eax + sub ecx,edx + adc eax,0 + dec ebp + mov DWORD PTR 12[ebx],ecx + jz $L029pw_end + ; dl<0 Tail Round 4 + mov ecx,0 + mov edx,DWORD PTR 16[edi] + sub ecx,eax + mov eax,0 + adc eax,eax + sub ecx,edx + adc eax,0 + dec ebp + mov DWORD PTR 16[ebx],ecx + jz $L029pw_end + ; dl<0 Tail Round 5 + mov ecx,0 + mov edx,DWORD PTR 20[edi] + sub ecx,eax + mov eax,0 + adc eax,eax + sub ecx,edx + adc eax,0 + dec ebp + mov DWORD PTR 20[ebx],ecx + jz $L029pw_end + ; dl<0 Tail Round 6 + mov ecx,0 + mov edx,DWORD PTR 24[edi] + sub ecx,eax + mov eax,0 + adc eax,eax + sub ecx,edx + adc eax,0 + mov DWORD PTR 24[ebx],ecx + jmp $L029pw_end +$L030pw_pos: + and ebp,4294967288 + jz $L033pw_pos_finish +$L034pw_pos_loop: + ; dl>0 Round 0 + mov ecx,DWORD PTR [esi] + sub ecx,eax + mov DWORD PTR [ebx],ecx + jnc $L035pw_nc0 + ; dl>0 Round 1 + mov ecx,DWORD PTR 4[esi] + sub ecx,eax + mov DWORD PTR 4[ebx],ecx + jnc $L036pw_nc1 + ; dl>0 Round 2 + mov ecx,DWORD PTR 8[esi] + sub ecx,eax + mov DWORD PTR 8[ebx],ecx + jnc $L037pw_nc2 + ; dl>0 Round 3 + mov ecx,DWORD PTR 12[esi] + sub ecx,eax + mov DWORD PTR 12[ebx],ecx + jnc $L038pw_nc3 + ; dl>0 Round 4 + mov ecx,DWORD PTR 16[esi] + sub ecx,eax + mov DWORD PTR 16[ebx],ecx + jnc $L039pw_nc4 + ; dl>0 Round 5 + mov ecx,DWORD PTR 20[esi] + sub ecx,eax + mov DWORD PTR 20[ebx],ecx + jnc $L040pw_nc5 + ; dl>0 Round 6 + mov ecx,DWORD PTR 24[esi] + sub ecx,eax + mov DWORD PTR 24[ebx],ecx + jnc $L041pw_nc6 + ; dl>0 Round 7 + mov ecx,DWORD PTR 28[esi] + sub ecx,eax + mov DWORD PTR 28[ebx],ecx + jnc $L042pw_nc7 + ; + add esi,32 + add ebx,32 + sub ebp,8 + jnz $L034pw_pos_loop +$L033pw_pos_finish: + mov ebp,DWORD PTR 36[esp] + and ebp,7 + jz $L029pw_end + ; dl>0 Tail Round 0 + mov ecx,DWORD PTR [esi] + sub ecx,eax + mov DWORD PTR [ebx],ecx + jnc $L043pw_tail_nc0 + dec ebp + jz $L029pw_end + ; dl>0 Tail Round 1 + mov ecx,DWORD PTR 4[esi] + sub ecx,eax + mov DWORD PTR 4[ebx],ecx + jnc $L044pw_tail_nc1 + dec ebp + jz $L029pw_end + ; dl>0 Tail Round 2 + mov ecx,DWORD PTR 8[esi] + sub ecx,eax + mov DWORD PTR 8[ebx],ecx + jnc $L045pw_tail_nc2 + dec ebp + jz $L029pw_end + ; dl>0 Tail Round 3 + mov ecx,DWORD PTR 12[esi] + sub ecx,eax + mov DWORD PTR 12[ebx],ecx + jnc $L046pw_tail_nc3 + dec ebp + jz $L029pw_end + ; dl>0 Tail Round 4 + mov ecx,DWORD PTR 16[esi] + sub ecx,eax + mov DWORD PTR 16[ebx],ecx + jnc $L047pw_tail_nc4 + dec ebp + jz $L029pw_end + ; dl>0 Tail Round 5 + mov ecx,DWORD PTR 20[esi] + sub ecx,eax + mov DWORD PTR 20[ebx],ecx + jnc $L048pw_tail_nc5 + dec ebp + jz $L029pw_end + ; dl>0 Tail Round 6 + mov ecx,DWORD PTR 24[esi] + sub ecx,eax + mov DWORD PTR 24[ebx],ecx + jnc $L049pw_tail_nc6 + mov eax,1 + jmp $L029pw_end +$L050pw_nc_loop: + mov ecx,DWORD PTR [esi] + mov DWORD PTR [ebx],ecx +$L035pw_nc0: + mov ecx,DWORD PTR 4[esi] + mov DWORD PTR 4[ebx],ecx +$L036pw_nc1: + mov ecx,DWORD PTR 8[esi] + mov DWORD PTR 8[ebx],ecx +$L037pw_nc2: + mov ecx,DWORD PTR 12[esi] + mov DWORD PTR 12[ebx],ecx +$L038pw_nc3: + mov ecx,DWORD PTR 16[esi] + mov DWORD PTR 16[ebx],ecx +$L039pw_nc4: + mov ecx,DWORD PTR 20[esi] + mov DWORD PTR 20[ebx],ecx +$L040pw_nc5: + mov ecx,DWORD PTR 24[esi] + mov DWORD PTR 24[ebx],ecx +$L041pw_nc6: + mov ecx,DWORD PTR 28[esi] + mov DWORD PTR 28[ebx],ecx +$L042pw_nc7: + ; + add esi,32 + add ebx,32 + sub ebp,8 + jnz $L050pw_nc_loop + mov ebp,DWORD PTR 36[esp] + and ebp,7 + jz $L051pw_nc_end + mov ecx,DWORD PTR [esi] + mov DWORD PTR [ebx],ecx +$L043pw_tail_nc0: + dec ebp + jz $L051pw_nc_end + mov ecx,DWORD PTR 4[esi] + mov DWORD PTR 4[ebx],ecx +$L044pw_tail_nc1: + dec ebp + jz $L051pw_nc_end + mov ecx,DWORD PTR 8[esi] + mov DWORD PTR 8[ebx],ecx +$L045pw_tail_nc2: + dec ebp + jz $L051pw_nc_end + mov ecx,DWORD PTR 12[esi] + mov DWORD PTR 12[ebx],ecx +$L046pw_tail_nc3: + dec ebp + jz $L051pw_nc_end + mov ecx,DWORD PTR 16[esi] + mov DWORD PTR 16[ebx],ecx +$L047pw_tail_nc4: + dec ebp + jz $L051pw_nc_end + mov ecx,DWORD PTR 20[esi] + mov DWORD PTR 20[ebx],ecx +$L048pw_tail_nc5: + dec ebp + jz $L051pw_nc_end + mov ecx,DWORD PTR 24[esi] + mov DWORD PTR 24[ebx],ecx +$L049pw_tail_nc6: +$L051pw_nc_end: + mov eax,0 +$L029pw_end: + pop edi + pop esi + pop ebx + pop ebp + ret +_bn_sub_part_words ENDP +.text$ ENDS +.bss SEGMENT 'BSS' +COMM _OPENSSL_ia32cap_P:DWORD:4 +.bss ENDS +END diff --git a/deps/openssl/asm_obsolete/x86-win32-masm/bn/co-586.asm b/deps/openssl/asm_obsolete/x86-win32-masm/bn/co-586.asm new file mode 100644 index 00000000000000..a44b2b1b957268 --- /dev/null +++ b/deps/openssl/asm_obsolete/x86-win32-masm/bn/co-586.asm @@ -0,0 +1,1258 @@ +TITLE ../openssl/crypto/bn/asm/co-586.asm +IF @Version LT 800 +ECHO MASM version 8.00 or later is strongly recommended. +ENDIF +.686 +.MODEL FLAT +OPTION DOTNAME +IF @Version LT 800 +.text$ SEGMENT PAGE 'CODE' +ELSE +.text$ SEGMENT ALIGN(64) 'CODE' +ENDIF +ALIGN 16 +_bn_mul_comba8 PROC PUBLIC +$L_bn_mul_comba8_begin:: + push esi + mov esi,DWORD PTR 12[esp] + push edi + mov edi,DWORD PTR 20[esp] + push ebp + push ebx + xor ebx,ebx + mov eax,DWORD PTR [esi] + xor ecx,ecx + mov edx,DWORD PTR [edi] + ; ################## Calculate word 0 + xor ebp,ebp + ; mul a[0]*b[0] + mul edx + add ebx,eax + mov eax,DWORD PTR 20[esp] + adc ecx,edx + mov edx,DWORD PTR [edi] + adc ebp,0 + mov DWORD PTR [eax],ebx + mov eax,DWORD PTR 4[esi] + ; saved r[0] + ; ################## Calculate word 1 + xor ebx,ebx + ; mul a[1]*b[0] + mul edx + add ecx,eax + mov eax,DWORD PTR [esi] + adc ebp,edx + mov edx,DWORD PTR 4[edi] + adc ebx,0 + ; mul a[0]*b[1] + mul edx + add ecx,eax + mov eax,DWORD PTR 20[esp] + adc ebp,edx + mov edx,DWORD PTR [edi] + adc ebx,0 + mov DWORD PTR 4[eax],ecx + mov eax,DWORD PTR 8[esi] + ; saved r[1] + ; ################## Calculate word 2 + xor ecx,ecx + ; mul a[2]*b[0] + mul edx + add ebp,eax + mov eax,DWORD PTR 4[esi] + adc ebx,edx + mov edx,DWORD PTR 4[edi] + adc ecx,0 + ; mul a[1]*b[1] + mul edx + add ebp,eax + mov eax,DWORD PTR [esi] + adc ebx,edx + mov edx,DWORD PTR 8[edi] + adc ecx,0 + ; mul a[0]*b[2] + mul edx + add ebp,eax + mov eax,DWORD PTR 20[esp] + adc ebx,edx + mov edx,DWORD PTR [edi] + adc ecx,0 + mov DWORD PTR 8[eax],ebp + mov eax,DWORD PTR 12[esi] + ; saved r[2] + ; ################## Calculate word 3 + xor ebp,ebp + ; mul a[3]*b[0] + mul edx + add ebx,eax + mov eax,DWORD PTR 8[esi] + adc ecx,edx + mov edx,DWORD PTR 4[edi] + adc ebp,0 + ; mul a[2]*b[1] + mul edx + add ebx,eax + mov eax,DWORD PTR 4[esi] + adc ecx,edx + mov edx,DWORD PTR 8[edi] + adc ebp,0 + ; mul a[1]*b[2] + mul edx + add ebx,eax + mov eax,DWORD PTR [esi] + adc ecx,edx + mov edx,DWORD PTR 12[edi] + adc ebp,0 + ; mul a[0]*b[3] + mul edx + add ebx,eax + mov eax,DWORD PTR 20[esp] + adc ecx,edx + mov edx,DWORD PTR [edi] + adc ebp,0 + mov DWORD PTR 12[eax],ebx + mov eax,DWORD PTR 16[esi] + ; saved r[3] + ; ################## Calculate word 4 + xor ebx,ebx + ; mul a[4]*b[0] + mul edx + add ecx,eax + mov eax,DWORD PTR 12[esi] + adc ebp,edx + mov edx,DWORD PTR 4[edi] + adc ebx,0 + ; mul a[3]*b[1] + mul edx + add ecx,eax + mov eax,DWORD PTR 8[esi] + adc ebp,edx + mov edx,DWORD PTR 8[edi] + adc ebx,0 + ; mul a[2]*b[2] + mul edx + add ecx,eax + mov eax,DWORD PTR 4[esi] + adc ebp,edx + mov edx,DWORD PTR 12[edi] + adc ebx,0 + ; mul a[1]*b[3] + mul edx + add ecx,eax + mov eax,DWORD PTR [esi] + adc ebp,edx + mov edx,DWORD PTR 16[edi] + adc ebx,0 + ; mul a[0]*b[4] + mul edx + add ecx,eax + mov eax,DWORD PTR 20[esp] + adc ebp,edx + mov edx,DWORD PTR [edi] + adc ebx,0 + mov DWORD PTR 16[eax],ecx + mov eax,DWORD PTR 20[esi] + ; saved r[4] + ; ################## Calculate word 5 + xor ecx,ecx + ; mul a[5]*b[0] + mul edx + add ebp,eax + mov eax,DWORD PTR 16[esi] + adc ebx,edx + mov edx,DWORD PTR 4[edi] + adc ecx,0 + ; mul a[4]*b[1] + mul edx + add ebp,eax + mov eax,DWORD PTR 12[esi] + adc ebx,edx + mov edx,DWORD PTR 8[edi] + adc ecx,0 + ; mul a[3]*b[2] + mul edx + add ebp,eax + mov eax,DWORD PTR 8[esi] + adc ebx,edx + mov edx,DWORD PTR 12[edi] + adc ecx,0 + ; mul a[2]*b[3] + mul edx + add ebp,eax + mov eax,DWORD PTR 4[esi] + adc ebx,edx + mov edx,DWORD PTR 16[edi] + adc ecx,0 + ; mul a[1]*b[4] + mul edx + add ebp,eax + mov eax,DWORD PTR [esi] + adc ebx,edx + mov edx,DWORD PTR 20[edi] + adc ecx,0 + ; mul a[0]*b[5] + mul edx + add ebp,eax + mov eax,DWORD PTR 20[esp] + adc ebx,edx + mov edx,DWORD PTR [edi] + adc ecx,0 + mov DWORD PTR 20[eax],ebp + mov eax,DWORD PTR 24[esi] + ; saved r[5] + ; ################## Calculate word 6 + xor ebp,ebp + ; mul a[6]*b[0] + mul edx + add ebx,eax + mov eax,DWORD PTR 20[esi] + adc ecx,edx + mov edx,DWORD PTR 4[edi] + adc ebp,0 + ; mul a[5]*b[1] + mul edx + add ebx,eax + mov eax,DWORD PTR 16[esi] + adc ecx,edx + mov edx,DWORD PTR 8[edi] + adc ebp,0 + ; mul a[4]*b[2] + mul edx + add ebx,eax + mov eax,DWORD PTR 12[esi] + adc ecx,edx + mov edx,DWORD PTR 12[edi] + adc ebp,0 + ; mul a[3]*b[3] + mul edx + add ebx,eax + mov eax,DWORD PTR 8[esi] + adc ecx,edx + mov edx,DWORD PTR 16[edi] + adc ebp,0 + ; mul a[2]*b[4] + mul edx + add ebx,eax + mov eax,DWORD PTR 4[esi] + adc ecx,edx + mov edx,DWORD PTR 20[edi] + adc ebp,0 + ; mul a[1]*b[5] + mul edx + add ebx,eax + mov eax,DWORD PTR [esi] + adc ecx,edx + mov edx,DWORD PTR 24[edi] + adc ebp,0 + ; mul a[0]*b[6] + mul edx + add ebx,eax + mov eax,DWORD PTR 20[esp] + adc ecx,edx + mov edx,DWORD PTR [edi] + adc ebp,0 + mov DWORD PTR 24[eax],ebx + mov eax,DWORD PTR 28[esi] + ; saved r[6] + ; ################## Calculate word 7 + xor ebx,ebx + ; mul a[7]*b[0] + mul edx + add ecx,eax + mov eax,DWORD PTR 24[esi] + adc ebp,edx + mov edx,DWORD PTR 4[edi] + adc ebx,0 + ; mul a[6]*b[1] + mul edx + add ecx,eax + mov eax,DWORD PTR 20[esi] + adc ebp,edx + mov edx,DWORD PTR 8[edi] + adc ebx,0 + ; mul a[5]*b[2] + mul edx + add ecx,eax + mov eax,DWORD PTR 16[esi] + adc ebp,edx + mov edx,DWORD PTR 12[edi] + adc ebx,0 + ; mul a[4]*b[3] + mul edx + add ecx,eax + mov eax,DWORD PTR 12[esi] + adc ebp,edx + mov edx,DWORD PTR 16[edi] + adc ebx,0 + ; mul a[3]*b[4] + mul edx + add ecx,eax + mov eax,DWORD PTR 8[esi] + adc ebp,edx + mov edx,DWORD PTR 20[edi] + adc ebx,0 + ; mul a[2]*b[5] + mul edx + add ecx,eax + mov eax,DWORD PTR 4[esi] + adc ebp,edx + mov edx,DWORD PTR 24[edi] + adc ebx,0 + ; mul a[1]*b[6] + mul edx + add ecx,eax + mov eax,DWORD PTR [esi] + adc ebp,edx + mov edx,DWORD PTR 28[edi] + adc ebx,0 + ; mul a[0]*b[7] + mul edx + add ecx,eax + mov eax,DWORD PTR 20[esp] + adc ebp,edx + mov edx,DWORD PTR 4[edi] + adc ebx,0 + mov DWORD PTR 28[eax],ecx + mov eax,DWORD PTR 28[esi] + ; saved r[7] + ; ################## Calculate word 8 + xor ecx,ecx + ; mul a[7]*b[1] + mul edx + add ebp,eax + mov eax,DWORD PTR 24[esi] + adc ebx,edx + mov edx,DWORD PTR 8[edi] + adc ecx,0 + ; mul a[6]*b[2] + mul edx + add ebp,eax + mov eax,DWORD PTR 20[esi] + adc ebx,edx + mov edx,DWORD PTR 12[edi] + adc ecx,0 + ; mul a[5]*b[3] + mul edx + add ebp,eax + mov eax,DWORD PTR 16[esi] + adc ebx,edx + mov edx,DWORD PTR 16[edi] + adc ecx,0 + ; mul a[4]*b[4] + mul edx + add ebp,eax + mov eax,DWORD PTR 12[esi] + adc ebx,edx + mov edx,DWORD PTR 20[edi] + adc ecx,0 + ; mul a[3]*b[5] + mul edx + add ebp,eax + mov eax,DWORD PTR 8[esi] + adc ebx,edx + mov edx,DWORD PTR 24[edi] + adc ecx,0 + ; mul a[2]*b[6] + mul edx + add ebp,eax + mov eax,DWORD PTR 4[esi] + adc ebx,edx + mov edx,DWORD PTR 28[edi] + adc ecx,0 + ; mul a[1]*b[7] + mul edx + add ebp,eax + mov eax,DWORD PTR 20[esp] + adc ebx,edx + mov edx,DWORD PTR 8[edi] + adc ecx,0 + mov DWORD PTR 32[eax],ebp + mov eax,DWORD PTR 28[esi] + ; saved r[8] + ; ################## Calculate word 9 + xor ebp,ebp + ; mul a[7]*b[2] + mul edx + add ebx,eax + mov eax,DWORD PTR 24[esi] + adc ecx,edx + mov edx,DWORD PTR 12[edi] + adc ebp,0 + ; mul a[6]*b[3] + mul edx + add ebx,eax + mov eax,DWORD PTR 20[esi] + adc ecx,edx + mov edx,DWORD PTR 16[edi] + adc ebp,0 + ; mul a[5]*b[4] + mul edx + add ebx,eax + mov eax,DWORD PTR 16[esi] + adc ecx,edx + mov edx,DWORD PTR 20[edi] + adc ebp,0 + ; mul a[4]*b[5] + mul edx + add ebx,eax + mov eax,DWORD PTR 12[esi] + adc ecx,edx + mov edx,DWORD PTR 24[edi] + adc ebp,0 + ; mul a[3]*b[6] + mul edx + add ebx,eax + mov eax,DWORD PTR 8[esi] + adc ecx,edx + mov edx,DWORD PTR 28[edi] + adc ebp,0 + ; mul a[2]*b[7] + mul edx + add ebx,eax + mov eax,DWORD PTR 20[esp] + adc ecx,edx + mov edx,DWORD PTR 12[edi] + adc ebp,0 + mov DWORD PTR 36[eax],ebx + mov eax,DWORD PTR 28[esi] + ; saved r[9] + ; ################## Calculate word 10 + xor ebx,ebx + ; mul a[7]*b[3] + mul edx + add ecx,eax + mov eax,DWORD PTR 24[esi] + adc ebp,edx + mov edx,DWORD PTR 16[edi] + adc ebx,0 + ; mul a[6]*b[4] + mul edx + add ecx,eax + mov eax,DWORD PTR 20[esi] + adc ebp,edx + mov edx,DWORD PTR 20[edi] + adc ebx,0 + ; mul a[5]*b[5] + mul edx + add ecx,eax + mov eax,DWORD PTR 16[esi] + adc ebp,edx + mov edx,DWORD PTR 24[edi] + adc ebx,0 + ; mul a[4]*b[6] + mul edx + add ecx,eax + mov eax,DWORD PTR 12[esi] + adc ebp,edx + mov edx,DWORD PTR 28[edi] + adc ebx,0 + ; mul a[3]*b[7] + mul edx + add ecx,eax + mov eax,DWORD PTR 20[esp] + adc ebp,edx + mov edx,DWORD PTR 16[edi] + adc ebx,0 + mov DWORD PTR 40[eax],ecx + mov eax,DWORD PTR 28[esi] + ; saved r[10] + ; ################## Calculate word 11 + xor ecx,ecx + ; mul a[7]*b[4] + mul edx + add ebp,eax + mov eax,DWORD PTR 24[esi] + adc ebx,edx + mov edx,DWORD PTR 20[edi] + adc ecx,0 + ; mul a[6]*b[5] + mul edx + add ebp,eax + mov eax,DWORD PTR 20[esi] + adc ebx,edx + mov edx,DWORD PTR 24[edi] + adc ecx,0 + ; mul a[5]*b[6] + mul edx + add ebp,eax + mov eax,DWORD PTR 16[esi] + adc ebx,edx + mov edx,DWORD PTR 28[edi] + adc ecx,0 + ; mul a[4]*b[7] + mul edx + add ebp,eax + mov eax,DWORD PTR 20[esp] + adc ebx,edx + mov edx,DWORD PTR 20[edi] + adc ecx,0 + mov DWORD PTR 44[eax],ebp + mov eax,DWORD PTR 28[esi] + ; saved r[11] + ; ################## Calculate word 12 + xor ebp,ebp + ; mul a[7]*b[5] + mul edx + add ebx,eax + mov eax,DWORD PTR 24[esi] + adc ecx,edx + mov edx,DWORD PTR 24[edi] + adc ebp,0 + ; mul a[6]*b[6] + mul edx + add ebx,eax + mov eax,DWORD PTR 20[esi] + adc ecx,edx + mov edx,DWORD PTR 28[edi] + adc ebp,0 + ; mul a[5]*b[7] + mul edx + add ebx,eax + mov eax,DWORD PTR 20[esp] + adc ecx,edx + mov edx,DWORD PTR 24[edi] + adc ebp,0 + mov DWORD PTR 48[eax],ebx + mov eax,DWORD PTR 28[esi] + ; saved r[12] + ; ################## Calculate word 13 + xor ebx,ebx + ; mul a[7]*b[6] + mul edx + add ecx,eax + mov eax,DWORD PTR 24[esi] + adc ebp,edx + mov edx,DWORD PTR 28[edi] + adc ebx,0 + ; mul a[6]*b[7] + mul edx + add ecx,eax + mov eax,DWORD PTR 20[esp] + adc ebp,edx + mov edx,DWORD PTR 28[edi] + adc ebx,0 + mov DWORD PTR 52[eax],ecx + mov eax,DWORD PTR 28[esi] + ; saved r[13] + ; ################## Calculate word 14 + xor ecx,ecx + ; mul a[7]*b[7] + mul edx + add ebp,eax + mov eax,DWORD PTR 20[esp] + adc ebx,edx + adc ecx,0 + mov DWORD PTR 56[eax],ebp + ; saved r[14] + ; save r[15] + mov DWORD PTR 60[eax],ebx + pop ebx + pop ebp + pop edi + pop esi + ret +_bn_mul_comba8 ENDP +ALIGN 16 +_bn_mul_comba4 PROC PUBLIC +$L_bn_mul_comba4_begin:: + push esi + mov esi,DWORD PTR 12[esp] + push edi + mov edi,DWORD PTR 20[esp] + push ebp + push ebx + xor ebx,ebx + mov eax,DWORD PTR [esi] + xor ecx,ecx + mov edx,DWORD PTR [edi] + ; ################## Calculate word 0 + xor ebp,ebp + ; mul a[0]*b[0] + mul edx + add ebx,eax + mov eax,DWORD PTR 20[esp] + adc ecx,edx + mov edx,DWORD PTR [edi] + adc ebp,0 + mov DWORD PTR [eax],ebx + mov eax,DWORD PTR 4[esi] + ; saved r[0] + ; ################## Calculate word 1 + xor ebx,ebx + ; mul a[1]*b[0] + mul edx + add ecx,eax + mov eax,DWORD PTR [esi] + adc ebp,edx + mov edx,DWORD PTR 4[edi] + adc ebx,0 + ; mul a[0]*b[1] + mul edx + add ecx,eax + mov eax,DWORD PTR 20[esp] + adc ebp,edx + mov edx,DWORD PTR [edi] + adc ebx,0 + mov DWORD PTR 4[eax],ecx + mov eax,DWORD PTR 8[esi] + ; saved r[1] + ; ################## Calculate word 2 + xor ecx,ecx + ; mul a[2]*b[0] + mul edx + add ebp,eax + mov eax,DWORD PTR 4[esi] + adc ebx,edx + mov edx,DWORD PTR 4[edi] + adc ecx,0 + ; mul a[1]*b[1] + mul edx + add ebp,eax + mov eax,DWORD PTR [esi] + adc ebx,edx + mov edx,DWORD PTR 8[edi] + adc ecx,0 + ; mul a[0]*b[2] + mul edx + add ebp,eax + mov eax,DWORD PTR 20[esp] + adc ebx,edx + mov edx,DWORD PTR [edi] + adc ecx,0 + mov DWORD PTR 8[eax],ebp + mov eax,DWORD PTR 12[esi] + ; saved r[2] + ; ################## Calculate word 3 + xor ebp,ebp + ; mul a[3]*b[0] + mul edx + add ebx,eax + mov eax,DWORD PTR 8[esi] + adc ecx,edx + mov edx,DWORD PTR 4[edi] + adc ebp,0 + ; mul a[2]*b[1] + mul edx + add ebx,eax + mov eax,DWORD PTR 4[esi] + adc ecx,edx + mov edx,DWORD PTR 8[edi] + adc ebp,0 + ; mul a[1]*b[2] + mul edx + add ebx,eax + mov eax,DWORD PTR [esi] + adc ecx,edx + mov edx,DWORD PTR 12[edi] + adc ebp,0 + ; mul a[0]*b[3] + mul edx + add ebx,eax + mov eax,DWORD PTR 20[esp] + adc ecx,edx + mov edx,DWORD PTR 4[edi] + adc ebp,0 + mov DWORD PTR 12[eax],ebx + mov eax,DWORD PTR 12[esi] + ; saved r[3] + ; ################## Calculate word 4 + xor ebx,ebx + ; mul a[3]*b[1] + mul edx + add ecx,eax + mov eax,DWORD PTR 8[esi] + adc ebp,edx + mov edx,DWORD PTR 8[edi] + adc ebx,0 + ; mul a[2]*b[2] + mul edx + add ecx,eax + mov eax,DWORD PTR 4[esi] + adc ebp,edx + mov edx,DWORD PTR 12[edi] + adc ebx,0 + ; mul a[1]*b[3] + mul edx + add ecx,eax + mov eax,DWORD PTR 20[esp] + adc ebp,edx + mov edx,DWORD PTR 8[edi] + adc ebx,0 + mov DWORD PTR 16[eax],ecx + mov eax,DWORD PTR 12[esi] + ; saved r[4] + ; ################## Calculate word 5 + xor ecx,ecx + ; mul a[3]*b[2] + mul edx + add ebp,eax + mov eax,DWORD PTR 8[esi] + adc ebx,edx + mov edx,DWORD PTR 12[edi] + adc ecx,0 + ; mul a[2]*b[3] + mul edx + add ebp,eax + mov eax,DWORD PTR 20[esp] + adc ebx,edx + mov edx,DWORD PTR 12[edi] + adc ecx,0 + mov DWORD PTR 20[eax],ebp + mov eax,DWORD PTR 12[esi] + ; saved r[5] + ; ################## Calculate word 6 + xor ebp,ebp + ; mul a[3]*b[3] + mul edx + add ebx,eax + mov eax,DWORD PTR 20[esp] + adc ecx,edx + adc ebp,0 + mov DWORD PTR 24[eax],ebx + ; saved r[6] + ; save r[7] + mov DWORD PTR 28[eax],ecx + pop ebx + pop ebp + pop edi + pop esi + ret +_bn_mul_comba4 ENDP +ALIGN 16 +_bn_sqr_comba8 PROC PUBLIC +$L_bn_sqr_comba8_begin:: + push esi + push edi + push ebp + push ebx + mov edi,DWORD PTR 20[esp] + mov esi,DWORD PTR 24[esp] + xor ebx,ebx + xor ecx,ecx + mov eax,DWORD PTR [esi] + ; ############### Calculate word 0 + xor ebp,ebp + ; sqr a[0]*a[0] + mul eax + add ebx,eax + adc ecx,edx + mov edx,DWORD PTR [esi] + adc ebp,0 + mov DWORD PTR [edi],ebx + mov eax,DWORD PTR 4[esi] + ; saved r[0] + ; ############### Calculate word 1 + xor ebx,ebx + ; sqr a[1]*a[0] + mul edx + add eax,eax + adc edx,edx + adc ebx,0 + add ecx,eax + adc ebp,edx + mov eax,DWORD PTR 8[esi] + adc ebx,0 + mov DWORD PTR 4[edi],ecx + mov edx,DWORD PTR [esi] + ; saved r[1] + ; ############### Calculate word 2 + xor ecx,ecx + ; sqr a[2]*a[0] + mul edx + add eax,eax + adc edx,edx + adc ecx,0 + add ebp,eax + adc ebx,edx + mov eax,DWORD PTR 4[esi] + adc ecx,0 + ; sqr a[1]*a[1] + mul eax + add ebp,eax + adc ebx,edx + mov edx,DWORD PTR [esi] + adc ecx,0 + mov DWORD PTR 8[edi],ebp + mov eax,DWORD PTR 12[esi] + ; saved r[2] + ; ############### Calculate word 3 + xor ebp,ebp + ; sqr a[3]*a[0] + mul edx + add eax,eax + adc edx,edx + adc ebp,0 + add ebx,eax + adc ecx,edx + mov eax,DWORD PTR 8[esi] + adc ebp,0 + mov edx,DWORD PTR 4[esi] + ; sqr a[2]*a[1] + mul edx + add eax,eax + adc edx,edx + adc ebp,0 + add ebx,eax + adc ecx,edx + mov eax,DWORD PTR 16[esi] + adc ebp,0 + mov DWORD PTR 12[edi],ebx + mov edx,DWORD PTR [esi] + ; saved r[3] + ; ############### Calculate word 4 + xor ebx,ebx + ; sqr a[4]*a[0] + mul edx + add eax,eax + adc edx,edx + adc ebx,0 + add ecx,eax + adc ebp,edx + mov eax,DWORD PTR 12[esi] + adc ebx,0 + mov edx,DWORD PTR 4[esi] + ; sqr a[3]*a[1] + mul edx + add eax,eax + adc edx,edx + adc ebx,0 + add ecx,eax + adc ebp,edx + mov eax,DWORD PTR 8[esi] + adc ebx,0 + ; sqr a[2]*a[2] + mul eax + add ecx,eax + adc ebp,edx + mov edx,DWORD PTR [esi] + adc ebx,0 + mov DWORD PTR 16[edi],ecx + mov eax,DWORD PTR 20[esi] + ; saved r[4] + ; ############### Calculate word 5 + xor ecx,ecx + ; sqr a[5]*a[0] + mul edx + add eax,eax + adc edx,edx + adc ecx,0 + add ebp,eax + adc ebx,edx + mov eax,DWORD PTR 16[esi] + adc ecx,0 + mov edx,DWORD PTR 4[esi] + ; sqr a[4]*a[1] + mul edx + add eax,eax + adc edx,edx + adc ecx,0 + add ebp,eax + adc ebx,edx + mov eax,DWORD PTR 12[esi] + adc ecx,0 + mov edx,DWORD PTR 8[esi] + ; sqr a[3]*a[2] + mul edx + add eax,eax + adc edx,edx + adc ecx,0 + add ebp,eax + adc ebx,edx + mov eax,DWORD PTR 24[esi] + adc ecx,0 + mov DWORD PTR 20[edi],ebp + mov edx,DWORD PTR [esi] + ; saved r[5] + ; ############### Calculate word 6 + xor ebp,ebp + ; sqr a[6]*a[0] + mul edx + add eax,eax + adc edx,edx + adc ebp,0 + add ebx,eax + adc ecx,edx + mov eax,DWORD PTR 20[esi] + adc ebp,0 + mov edx,DWORD PTR 4[esi] + ; sqr a[5]*a[1] + mul edx + add eax,eax + adc edx,edx + adc ebp,0 + add ebx,eax + adc ecx,edx + mov eax,DWORD PTR 16[esi] + adc ebp,0 + mov edx,DWORD PTR 8[esi] + ; sqr a[4]*a[2] + mul edx + add eax,eax + adc edx,edx + adc ebp,0 + add ebx,eax + adc ecx,edx + mov eax,DWORD PTR 12[esi] + adc ebp,0 + ; sqr a[3]*a[3] + mul eax + add ebx,eax + adc ecx,edx + mov edx,DWORD PTR [esi] + adc ebp,0 + mov DWORD PTR 24[edi],ebx + mov eax,DWORD PTR 28[esi] + ; saved r[6] + ; ############### Calculate word 7 + xor ebx,ebx + ; sqr a[7]*a[0] + mul edx + add eax,eax + adc edx,edx + adc ebx,0 + add ecx,eax + adc ebp,edx + mov eax,DWORD PTR 24[esi] + adc ebx,0 + mov edx,DWORD PTR 4[esi] + ; sqr a[6]*a[1] + mul edx + add eax,eax + adc edx,edx + adc ebx,0 + add ecx,eax + adc ebp,edx + mov eax,DWORD PTR 20[esi] + adc ebx,0 + mov edx,DWORD PTR 8[esi] + ; sqr a[5]*a[2] + mul edx + add eax,eax + adc edx,edx + adc ebx,0 + add ecx,eax + adc ebp,edx + mov eax,DWORD PTR 16[esi] + adc ebx,0 + mov edx,DWORD PTR 12[esi] + ; sqr a[4]*a[3] + mul edx + add eax,eax + adc edx,edx + adc ebx,0 + add ecx,eax + adc ebp,edx + mov eax,DWORD PTR 28[esi] + adc ebx,0 + mov DWORD PTR 28[edi],ecx + mov edx,DWORD PTR 4[esi] + ; saved r[7] + ; ############### Calculate word 8 + xor ecx,ecx + ; sqr a[7]*a[1] + mul edx + add eax,eax + adc edx,edx + adc ecx,0 + add ebp,eax + adc ebx,edx + mov eax,DWORD PTR 24[esi] + adc ecx,0 + mov edx,DWORD PTR 8[esi] + ; sqr a[6]*a[2] + mul edx + add eax,eax + adc edx,edx + adc ecx,0 + add ebp,eax + adc ebx,edx + mov eax,DWORD PTR 20[esi] + adc ecx,0 + mov edx,DWORD PTR 12[esi] + ; sqr a[5]*a[3] + mul edx + add eax,eax + adc edx,edx + adc ecx,0 + add ebp,eax + adc ebx,edx + mov eax,DWORD PTR 16[esi] + adc ecx,0 + ; sqr a[4]*a[4] + mul eax + add ebp,eax + adc ebx,edx + mov edx,DWORD PTR 8[esi] + adc ecx,0 + mov DWORD PTR 32[edi],ebp + mov eax,DWORD PTR 28[esi] + ; saved r[8] + ; ############### Calculate word 9 + xor ebp,ebp + ; sqr a[7]*a[2] + mul edx + add eax,eax + adc edx,edx + adc ebp,0 + add ebx,eax + adc ecx,edx + mov eax,DWORD PTR 24[esi] + adc ebp,0 + mov edx,DWORD PTR 12[esi] + ; sqr a[6]*a[3] + mul edx + add eax,eax + adc edx,edx + adc ebp,0 + add ebx,eax + adc ecx,edx + mov eax,DWORD PTR 20[esi] + adc ebp,0 + mov edx,DWORD PTR 16[esi] + ; sqr a[5]*a[4] + mul edx + add eax,eax + adc edx,edx + adc ebp,0 + add ebx,eax + adc ecx,edx + mov eax,DWORD PTR 28[esi] + adc ebp,0 + mov DWORD PTR 36[edi],ebx + mov edx,DWORD PTR 12[esi] + ; saved r[9] + ; ############### Calculate word 10 + xor ebx,ebx + ; sqr a[7]*a[3] + mul edx + add eax,eax + adc edx,edx + adc ebx,0 + add ecx,eax + adc ebp,edx + mov eax,DWORD PTR 24[esi] + adc ebx,0 + mov edx,DWORD PTR 16[esi] + ; sqr a[6]*a[4] + mul edx + add eax,eax + adc edx,edx + adc ebx,0 + add ecx,eax + adc ebp,edx + mov eax,DWORD PTR 20[esi] + adc ebx,0 + ; sqr a[5]*a[5] + mul eax + add ecx,eax + adc ebp,edx + mov edx,DWORD PTR 16[esi] + adc ebx,0 + mov DWORD PTR 40[edi],ecx + mov eax,DWORD PTR 28[esi] + ; saved r[10] + ; ############### Calculate word 11 + xor ecx,ecx + ; sqr a[7]*a[4] + mul edx + add eax,eax + adc edx,edx + adc ecx,0 + add ebp,eax + adc ebx,edx + mov eax,DWORD PTR 24[esi] + adc ecx,0 + mov edx,DWORD PTR 20[esi] + ; sqr a[6]*a[5] + mul edx + add eax,eax + adc edx,edx + adc ecx,0 + add ebp,eax + adc ebx,edx + mov eax,DWORD PTR 28[esi] + adc ecx,0 + mov DWORD PTR 44[edi],ebp + mov edx,DWORD PTR 20[esi] + ; saved r[11] + ; ############### Calculate word 12 + xor ebp,ebp + ; sqr a[7]*a[5] + mul edx + add eax,eax + adc edx,edx + adc ebp,0 + add ebx,eax + adc ecx,edx + mov eax,DWORD PTR 24[esi] + adc ebp,0 + ; sqr a[6]*a[6] + mul eax + add ebx,eax + adc ecx,edx + mov edx,DWORD PTR 24[esi] + adc ebp,0 + mov DWORD PTR 48[edi],ebx + mov eax,DWORD PTR 28[esi] + ; saved r[12] + ; ############### Calculate word 13 + xor ebx,ebx + ; sqr a[7]*a[6] + mul edx + add eax,eax + adc edx,edx + adc ebx,0 + add ecx,eax + adc ebp,edx + mov eax,DWORD PTR 28[esi] + adc ebx,0 + mov DWORD PTR 52[edi],ecx + ; saved r[13] + ; ############### Calculate word 14 + xor ecx,ecx + ; sqr a[7]*a[7] + mul eax + add ebp,eax + adc ebx,edx + adc ecx,0 + mov DWORD PTR 56[edi],ebp + ; saved r[14] + mov DWORD PTR 60[edi],ebx + pop ebx + pop ebp + pop edi + pop esi + ret +_bn_sqr_comba8 ENDP +ALIGN 16 +_bn_sqr_comba4 PROC PUBLIC +$L_bn_sqr_comba4_begin:: + push esi + push edi + push ebp + push ebx + mov edi,DWORD PTR 20[esp] + mov esi,DWORD PTR 24[esp] + xor ebx,ebx + xor ecx,ecx + mov eax,DWORD PTR [esi] + ; ############### Calculate word 0 + xor ebp,ebp + ; sqr a[0]*a[0] + mul eax + add ebx,eax + adc ecx,edx + mov edx,DWORD PTR [esi] + adc ebp,0 + mov DWORD PTR [edi],ebx + mov eax,DWORD PTR 4[esi] + ; saved r[0] + ; ############### Calculate word 1 + xor ebx,ebx + ; sqr a[1]*a[0] + mul edx + add eax,eax + adc edx,edx + adc ebx,0 + add ecx,eax + adc ebp,edx + mov eax,DWORD PTR 8[esi] + adc ebx,0 + mov DWORD PTR 4[edi],ecx + mov edx,DWORD PTR [esi] + ; saved r[1] + ; ############### Calculate word 2 + xor ecx,ecx + ; sqr a[2]*a[0] + mul edx + add eax,eax + adc edx,edx + adc ecx,0 + add ebp,eax + adc ebx,edx + mov eax,DWORD PTR 4[esi] + adc ecx,0 + ; sqr a[1]*a[1] + mul eax + add ebp,eax + adc ebx,edx + mov edx,DWORD PTR [esi] + adc ecx,0 + mov DWORD PTR 8[edi],ebp + mov eax,DWORD PTR 12[esi] + ; saved r[2] + ; ############### Calculate word 3 + xor ebp,ebp + ; sqr a[3]*a[0] + mul edx + add eax,eax + adc edx,edx + adc ebp,0 + add ebx,eax + adc ecx,edx + mov eax,DWORD PTR 8[esi] + adc ebp,0 + mov edx,DWORD PTR 4[esi] + ; sqr a[2]*a[1] + mul edx + add eax,eax + adc edx,edx + adc ebp,0 + add ebx,eax + adc ecx,edx + mov eax,DWORD PTR 12[esi] + adc ebp,0 + mov DWORD PTR 12[edi],ebx + mov edx,DWORD PTR 4[esi] + ; saved r[3] + ; ############### Calculate word 4 + xor ebx,ebx + ; sqr a[3]*a[1] + mul edx + add eax,eax + adc edx,edx + adc ebx,0 + add ecx,eax + adc ebp,edx + mov eax,DWORD PTR 8[esi] + adc ebx,0 + ; sqr a[2]*a[2] + mul eax + add ecx,eax + adc ebp,edx + mov edx,DWORD PTR 8[esi] + adc ebx,0 + mov DWORD PTR 16[edi],ecx + mov eax,DWORD PTR 12[esi] + ; saved r[4] + ; ############### Calculate word 5 + xor ecx,ecx + ; sqr a[3]*a[2] + mul edx + add eax,eax + adc edx,edx + adc ecx,0 + add ebp,eax + adc ebx,edx + mov eax,DWORD PTR 12[esi] + adc ecx,0 + mov DWORD PTR 20[edi],ebp + ; saved r[5] + ; ############### Calculate word 6 + xor ebp,ebp + ; sqr a[3]*a[3] + mul eax + add ebx,eax + adc ecx,edx + adc ebp,0 + mov DWORD PTR 24[edi],ebx + ; saved r[6] + mov DWORD PTR 28[edi],ecx + pop ebx + pop ebp + pop edi + pop esi + ret +_bn_sqr_comba4 ENDP +.text$ ENDS +END diff --git a/deps/openssl/asm_obsolete/x86-win32-masm/bn/x86-gf2m.asm b/deps/openssl/asm_obsolete/x86-win32-masm/bn/x86-gf2m.asm new file mode 100644 index 00000000000000..57adf3ace49e1f --- /dev/null +++ b/deps/openssl/asm_obsolete/x86-win32-masm/bn/x86-gf2m.asm @@ -0,0 +1,361 @@ +TITLE ../openssl/crypto/bn/asm/x86-gf2m.asm +IF @Version LT 800 +ECHO MASM version 8.00 or later is strongly recommended. +ENDIF +.686 +.XMM +IF @Version LT 800 +XMMWORD STRUCT 16 +DQ 2 dup (?) +XMMWORD ENDS +ENDIF + +.MODEL FLAT +OPTION DOTNAME +IF @Version LT 800 +.text$ SEGMENT PAGE 'CODE' +ELSE +.text$ SEGMENT ALIGN(64) 'CODE' +ENDIF +;EXTERN _OPENSSL_ia32cap_P:NEAR +ALIGN 16 +__mul_1x1_mmx PROC PRIVATE + sub esp,36 + mov ecx,eax + lea edx,DWORD PTR [eax*1+eax] + and ecx,1073741823 + lea ebp,DWORD PTR [edx*1+edx] + mov DWORD PTR [esp],0 + and edx,2147483647 + movd mm2,eax + movd mm3,ebx + mov DWORD PTR 4[esp],ecx + xor ecx,edx + pxor mm5,mm5 + pxor mm4,mm4 + mov DWORD PTR 8[esp],edx + xor edx,ebp + mov DWORD PTR 12[esp],ecx + pcmpgtd mm5,mm2 + paddd mm2,mm2 + xor ecx,edx + mov DWORD PTR 16[esp],ebp + xor ebp,edx + pand mm5,mm3 + pcmpgtd mm4,mm2 + mov DWORD PTR 20[esp],ecx + xor ebp,ecx + psllq mm5,31 + pand mm4,mm3 + mov DWORD PTR 24[esp],edx + mov esi,7 + mov DWORD PTR 28[esp],ebp + mov ebp,esi + and esi,ebx + shr ebx,3 + mov edi,ebp + psllq mm4,30 + and edi,ebx + shr ebx,3 + movd mm0,DWORD PTR [esi*4+esp] + mov esi,ebp + and esi,ebx + shr ebx,3 + movd mm2,DWORD PTR [edi*4+esp] + mov edi,ebp + psllq mm2,3 + and edi,ebx + shr ebx,3 + pxor mm0,mm2 + movd mm1,DWORD PTR [esi*4+esp] + mov esi,ebp + psllq mm1,6 + and esi,ebx + shr ebx,3 + pxor mm0,mm1 + movd mm2,DWORD PTR [edi*4+esp] + mov edi,ebp + psllq mm2,9 + and edi,ebx + shr ebx,3 + pxor mm0,mm2 + movd mm1,DWORD PTR [esi*4+esp] + mov esi,ebp + psllq mm1,12 + and esi,ebx + shr ebx,3 + pxor mm0,mm1 + movd mm2,DWORD PTR [edi*4+esp] + mov edi,ebp + psllq mm2,15 + and edi,ebx + shr ebx,3 + pxor mm0,mm2 + movd mm1,DWORD PTR [esi*4+esp] + mov esi,ebp + psllq mm1,18 + and esi,ebx + shr ebx,3 + pxor mm0,mm1 + movd mm2,DWORD PTR [edi*4+esp] + mov edi,ebp + psllq mm2,21 + and edi,ebx + shr ebx,3 + pxor mm0,mm2 + movd mm1,DWORD PTR [esi*4+esp] + mov esi,ebp + psllq mm1,24 + and esi,ebx + shr ebx,3 + pxor mm0,mm1 + movd mm2,DWORD PTR [edi*4+esp] + pxor mm0,mm4 + psllq mm2,27 + pxor mm0,mm2 + movd mm1,DWORD PTR [esi*4+esp] + pxor mm0,mm5 + psllq mm1,30 + add esp,36 + pxor mm0,mm1 + ret +__mul_1x1_mmx ENDP +ALIGN 16 +__mul_1x1_ialu PROC PRIVATE + sub esp,36 + mov ecx,eax + lea edx,DWORD PTR [eax*1+eax] + lea ebp,DWORD PTR [eax*4] + and ecx,1073741823 + lea edi,DWORD PTR [eax*1+eax] + sar eax,31 + mov DWORD PTR [esp],0 + and edx,2147483647 + mov DWORD PTR 4[esp],ecx + xor ecx,edx + mov DWORD PTR 8[esp],edx + xor edx,ebp + mov DWORD PTR 12[esp],ecx + xor ecx,edx + mov DWORD PTR 16[esp],ebp + xor ebp,edx + mov DWORD PTR 20[esp],ecx + xor ebp,ecx + sar edi,31 + and eax,ebx + mov DWORD PTR 24[esp],edx + and edi,ebx + mov DWORD PTR 28[esp],ebp + mov edx,eax + shl eax,31 + mov ecx,edi + shr edx,1 + mov esi,7 + shl edi,30 + and esi,ebx + shr ecx,2 + xor eax,edi + shr ebx,3 + mov edi,7 + and edi,ebx + shr ebx,3 + xor edx,ecx + xor eax,DWORD PTR [esi*4+esp] + mov esi,7 + and esi,ebx + shr ebx,3 + mov ebp,DWORD PTR [edi*4+esp] + mov edi,7 + mov ecx,ebp + shl ebp,3 + and edi,ebx + shr ecx,29 + xor eax,ebp + shr ebx,3 + xor edx,ecx + mov ecx,DWORD PTR [esi*4+esp] + mov esi,7 + mov ebp,ecx + shl ecx,6 + and esi,ebx + shr ebp,26 + xor eax,ecx + shr ebx,3 + xor edx,ebp + mov ebp,DWORD PTR [edi*4+esp] + mov edi,7 + mov ecx,ebp + shl ebp,9 + and edi,ebx + shr ecx,23 + xor eax,ebp + shr ebx,3 + xor edx,ecx + mov ecx,DWORD PTR [esi*4+esp] + mov esi,7 + mov ebp,ecx + shl ecx,12 + and esi,ebx + shr ebp,20 + xor eax,ecx + shr ebx,3 + xor edx,ebp + mov ebp,DWORD PTR [edi*4+esp] + mov edi,7 + mov ecx,ebp + shl ebp,15 + and edi,ebx + shr ecx,17 + xor eax,ebp + shr ebx,3 + xor edx,ecx + mov ecx,DWORD PTR [esi*4+esp] + mov esi,7 + mov ebp,ecx + shl ecx,18 + and esi,ebx + shr ebp,14 + xor eax,ecx + shr ebx,3 + xor edx,ebp + mov ebp,DWORD PTR [edi*4+esp] + mov edi,7 + mov ecx,ebp + shl ebp,21 + and edi,ebx + shr ecx,11 + xor eax,ebp + shr ebx,3 + xor edx,ecx + mov ecx,DWORD PTR [esi*4+esp] + mov esi,7 + mov ebp,ecx + shl ecx,24 + and esi,ebx + shr ebp,8 + xor eax,ecx + shr ebx,3 + xor edx,ebp + mov ebp,DWORD PTR [edi*4+esp] + mov ecx,ebp + shl ebp,27 + mov edi,DWORD PTR [esi*4+esp] + shr ecx,5 + mov esi,edi + xor eax,ebp + shl edi,30 + xor edx,ecx + shr esi,2 + xor eax,edi + xor edx,esi + add esp,36 + ret +__mul_1x1_ialu ENDP +ALIGN 16 +_bn_GF2m_mul_2x2 PROC PUBLIC +$L_bn_GF2m_mul_2x2_begin:: + lea edx,DWORD PTR _OPENSSL_ia32cap_P + mov eax,DWORD PTR [edx] + mov edx,DWORD PTR 4[edx] + test eax,8388608 + jz $L000ialu + test eax,16777216 + jz $L001mmx + test edx,2 + jz $L001mmx + movups xmm0,XMMWORD PTR 8[esp] + shufps xmm0,xmm0,177 +DB 102,15,58,68,192,1 + mov eax,DWORD PTR 4[esp] + movups XMMWORD PTR [eax],xmm0 + ret +ALIGN 16 +$L001mmx: + push ebp + push ebx + push esi + push edi + mov eax,DWORD PTR 24[esp] + mov ebx,DWORD PTR 32[esp] + call __mul_1x1_mmx + movq mm7,mm0 + mov eax,DWORD PTR 28[esp] + mov ebx,DWORD PTR 36[esp] + call __mul_1x1_mmx + movq mm6,mm0 + mov eax,DWORD PTR 24[esp] + mov ebx,DWORD PTR 32[esp] + xor eax,DWORD PTR 28[esp] + xor ebx,DWORD PTR 36[esp] + call __mul_1x1_mmx + pxor mm0,mm7 + mov eax,DWORD PTR 20[esp] + pxor mm0,mm6 + movq mm2,mm0 + psllq mm0,32 + pop edi + psrlq mm2,32 + pop esi + pxor mm0,mm6 + pop ebx + pxor mm2,mm7 + movq QWORD PTR [eax],mm0 + pop ebp + movq QWORD PTR 8[eax],mm2 + emms + ret +ALIGN 16 +$L000ialu: + push ebp + push ebx + push esi + push edi + sub esp,20 + mov eax,DWORD PTR 44[esp] + mov ebx,DWORD PTR 52[esp] + call __mul_1x1_ialu + mov DWORD PTR 8[esp],eax + mov DWORD PTR 12[esp],edx + mov eax,DWORD PTR 48[esp] + mov ebx,DWORD PTR 56[esp] + call __mul_1x1_ialu + mov DWORD PTR [esp],eax + mov DWORD PTR 4[esp],edx + mov eax,DWORD PTR 44[esp] + mov ebx,DWORD PTR 52[esp] + xor eax,DWORD PTR 48[esp] + xor ebx,DWORD PTR 56[esp] + call __mul_1x1_ialu + mov ebp,DWORD PTR 40[esp] + mov ebx,DWORD PTR [esp] + mov ecx,DWORD PTR 4[esp] + mov edi,DWORD PTR 8[esp] + mov esi,DWORD PTR 12[esp] + xor eax,edx + xor edx,ecx + xor eax,ebx + mov DWORD PTR [ebp],ebx + xor edx,edi + mov DWORD PTR 12[ebp],esi + xor eax,esi + add esp,20 + xor edx,esi + pop edi + xor eax,edx + pop esi + mov DWORD PTR 8[ebp],edx + pop ebx + mov DWORD PTR 4[ebp],eax + pop ebp + ret +_bn_GF2m_mul_2x2 ENDP +DB 71,70,40,50,94,109,41,32,77,117,108,116,105,112,108,105 +DB 99,97,116,105,111,110,32,102,111,114,32,120,56,54,44,32 +DB 67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97 +DB 112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103 +DB 62,0 +.text$ ENDS +.bss SEGMENT 'BSS' +COMM _OPENSSL_ia32cap_P:DWORD:4 +.bss ENDS +END diff --git a/deps/openssl/asm_obsolete/x86-win32-masm/bn/x86-mont.asm b/deps/openssl/asm_obsolete/x86-win32-masm/bn/x86-mont.asm new file mode 100644 index 00000000000000..9bfa4dc8eb1c88 --- /dev/null +++ b/deps/openssl/asm_obsolete/x86-win32-masm/bn/x86-mont.asm @@ -0,0 +1,476 @@ +TITLE ../openssl/crypto/bn/asm/x86-mont.asm +IF @Version LT 800 +ECHO MASM version 8.00 or later is strongly recommended. +ENDIF +.686 +.XMM +IF @Version LT 800 +XMMWORD STRUCT 16 +DQ 2 dup (?) +XMMWORD ENDS +ENDIF + +.MODEL FLAT +OPTION DOTNAME +IF @Version LT 800 +.text$ SEGMENT PAGE 'CODE' +ELSE +.text$ SEGMENT ALIGN(64) 'CODE' +ENDIF +;EXTERN _OPENSSL_ia32cap_P:NEAR +ALIGN 16 +_bn_mul_mont PROC PUBLIC +$L_bn_mul_mont_begin:: + push ebp + push ebx + push esi + push edi + xor eax,eax + mov edi,DWORD PTR 40[esp] + cmp edi,4 + jl $L000just_leave + lea esi,DWORD PTR 20[esp] + lea edx,DWORD PTR 24[esp] + mov ebp,esp + add edi,2 + neg edi + lea esp,DWORD PTR [edi*4+esp-32] + neg edi + mov eax,esp + sub eax,edx + and eax,2047 + sub esp,eax + xor edx,esp + and edx,2048 + xor edx,2048 + sub esp,edx + and esp,-64 + mov eax,DWORD PTR [esi] + mov ebx,DWORD PTR 4[esi] + mov ecx,DWORD PTR 8[esi] + mov edx,DWORD PTR 12[esi] + mov esi,DWORD PTR 16[esi] + mov esi,DWORD PTR [esi] + mov DWORD PTR 4[esp],eax + mov DWORD PTR 8[esp],ebx + mov DWORD PTR 12[esp],ecx + mov DWORD PTR 16[esp],edx + mov DWORD PTR 20[esp],esi + lea ebx,DWORD PTR [edi-3] + mov DWORD PTR 24[esp],ebp + lea eax,DWORD PTR _OPENSSL_ia32cap_P + bt DWORD PTR [eax],26 + jnc $L001non_sse2 + mov eax,-1 + movd mm7,eax + mov esi,DWORD PTR 8[esp] + mov edi,DWORD PTR 12[esp] + mov ebp,DWORD PTR 16[esp] + xor edx,edx + xor ecx,ecx + movd mm4,DWORD PTR [edi] + movd mm5,DWORD PTR [esi] + movd mm3,DWORD PTR [ebp] + pmuludq mm5,mm4 + movq mm2,mm5 + movq mm0,mm5 + pand mm0,mm7 + pmuludq mm5,QWORD PTR 20[esp] + pmuludq mm3,mm5 + paddq mm3,mm0 + movd mm1,DWORD PTR 4[ebp] + movd mm0,DWORD PTR 4[esi] + psrlq mm2,32 + psrlq mm3,32 + inc ecx +ALIGN 16 +$L0021st: + pmuludq mm0,mm4 + pmuludq mm1,mm5 + paddq mm2,mm0 + paddq mm3,mm1 + movq mm0,mm2 + pand mm0,mm7 + movd mm1,DWORD PTR 4[ecx*4+ebp] + paddq mm3,mm0 + movd mm0,DWORD PTR 4[ecx*4+esi] + psrlq mm2,32 + movd DWORD PTR 28[ecx*4+esp],mm3 + psrlq mm3,32 + lea ecx,DWORD PTR 1[ecx] + cmp ecx,ebx + jl $L0021st + pmuludq mm0,mm4 + pmuludq mm1,mm5 + paddq mm2,mm0 + paddq mm3,mm1 + movq mm0,mm2 + pand mm0,mm7 + paddq mm3,mm0 + movd DWORD PTR 28[ecx*4+esp],mm3 + psrlq mm2,32 + psrlq mm3,32 + paddq mm3,mm2 + movq QWORD PTR 32[ebx*4+esp],mm3 + inc edx +$L003outer: + xor ecx,ecx + movd mm4,DWORD PTR [edx*4+edi] + movd mm5,DWORD PTR [esi] + movd mm6,DWORD PTR 32[esp] + movd mm3,DWORD PTR [ebp] + pmuludq mm5,mm4 + paddq mm5,mm6 + movq mm0,mm5 + movq mm2,mm5 + pand mm0,mm7 + pmuludq mm5,QWORD PTR 20[esp] + pmuludq mm3,mm5 + paddq mm3,mm0 + movd mm6,DWORD PTR 36[esp] + movd mm1,DWORD PTR 4[ebp] + movd mm0,DWORD PTR 4[esi] + psrlq mm2,32 + psrlq mm3,32 + paddq mm2,mm6 + inc ecx + dec ebx +$L004inner: + pmuludq mm0,mm4 + pmuludq mm1,mm5 + paddq mm2,mm0 + paddq mm3,mm1 + movq mm0,mm2 + movd mm6,DWORD PTR 36[ecx*4+esp] + pand mm0,mm7 + movd mm1,DWORD PTR 4[ecx*4+ebp] + paddq mm3,mm0 + movd mm0,DWORD PTR 4[ecx*4+esi] + psrlq mm2,32 + movd DWORD PTR 28[ecx*4+esp],mm3 + psrlq mm3,32 + paddq mm2,mm6 + dec ebx + lea ecx,DWORD PTR 1[ecx] + jnz $L004inner + mov ebx,ecx + pmuludq mm0,mm4 + pmuludq mm1,mm5 + paddq mm2,mm0 + paddq mm3,mm1 + movq mm0,mm2 + pand mm0,mm7 + paddq mm3,mm0 + movd DWORD PTR 28[ecx*4+esp],mm3 + psrlq mm2,32 + psrlq mm3,32 + movd mm6,DWORD PTR 36[ebx*4+esp] + paddq mm3,mm2 + paddq mm3,mm6 + movq QWORD PTR 32[ebx*4+esp],mm3 + lea edx,DWORD PTR 1[edx] + cmp edx,ebx + jle $L003outer + emms + jmp $L005common_tail +ALIGN 16 +$L001non_sse2: + mov esi,DWORD PTR 8[esp] + lea ebp,DWORD PTR 1[ebx] + mov edi,DWORD PTR 12[esp] + xor ecx,ecx + mov edx,esi + and ebp,1 + sub edx,edi + lea eax,DWORD PTR 4[ebx*4+edi] + or ebp,edx + mov edi,DWORD PTR [edi] + jz $L006bn_sqr_mont + mov DWORD PTR 28[esp],eax + mov eax,DWORD PTR [esi] + xor edx,edx +ALIGN 16 +$L007mull: + mov ebp,edx + mul edi + add ebp,eax + lea ecx,DWORD PTR 1[ecx] + adc edx,0 + mov eax,DWORD PTR [ecx*4+esi] + cmp ecx,ebx + mov DWORD PTR 28[ecx*4+esp],ebp + jl $L007mull + mov ebp,edx + mul edi + mov edi,DWORD PTR 20[esp] + add eax,ebp + mov esi,DWORD PTR 16[esp] + adc edx,0 + imul edi,DWORD PTR 32[esp] + mov DWORD PTR 32[ebx*4+esp],eax + xor ecx,ecx + mov DWORD PTR 36[ebx*4+esp],edx + mov DWORD PTR 40[ebx*4+esp],ecx + mov eax,DWORD PTR [esi] + mul edi + add eax,DWORD PTR 32[esp] + mov eax,DWORD PTR 4[esi] + adc edx,0 + inc ecx + jmp $L0082ndmadd +ALIGN 16 +$L0091stmadd: + mov ebp,edx + mul edi + add ebp,DWORD PTR 32[ecx*4+esp] + lea ecx,DWORD PTR 1[ecx] + adc edx,0 + add ebp,eax + mov eax,DWORD PTR [ecx*4+esi] + adc edx,0 + cmp ecx,ebx + mov DWORD PTR 28[ecx*4+esp],ebp + jl $L0091stmadd + mov ebp,edx + mul edi + add eax,DWORD PTR 32[ebx*4+esp] + mov edi,DWORD PTR 20[esp] + adc edx,0 + mov esi,DWORD PTR 16[esp] + add ebp,eax + adc edx,0 + imul edi,DWORD PTR 32[esp] + xor ecx,ecx + add edx,DWORD PTR 36[ebx*4+esp] + mov DWORD PTR 32[ebx*4+esp],ebp + adc ecx,0 + mov eax,DWORD PTR [esi] + mov DWORD PTR 36[ebx*4+esp],edx + mov DWORD PTR 40[ebx*4+esp],ecx + mul edi + add eax,DWORD PTR 32[esp] + mov eax,DWORD PTR 4[esi] + adc edx,0 + mov ecx,1 +ALIGN 16 +$L0082ndmadd: + mov ebp,edx + mul edi + add ebp,DWORD PTR 32[ecx*4+esp] + lea ecx,DWORD PTR 1[ecx] + adc edx,0 + add ebp,eax + mov eax,DWORD PTR [ecx*4+esi] + adc edx,0 + cmp ecx,ebx + mov DWORD PTR 24[ecx*4+esp],ebp + jl $L0082ndmadd + mov ebp,edx + mul edi + add ebp,DWORD PTR 32[ebx*4+esp] + adc edx,0 + add ebp,eax + adc edx,0 + mov DWORD PTR 28[ebx*4+esp],ebp + xor eax,eax + mov ecx,DWORD PTR 12[esp] + add edx,DWORD PTR 36[ebx*4+esp] + adc eax,DWORD PTR 40[ebx*4+esp] + lea ecx,DWORD PTR 4[ecx] + mov DWORD PTR 32[ebx*4+esp],edx + cmp ecx,DWORD PTR 28[esp] + mov DWORD PTR 36[ebx*4+esp],eax + je $L005common_tail + mov edi,DWORD PTR [ecx] + mov esi,DWORD PTR 8[esp] + mov DWORD PTR 12[esp],ecx + xor ecx,ecx + xor edx,edx + mov eax,DWORD PTR [esi] + jmp $L0091stmadd +ALIGN 16 +$L006bn_sqr_mont: + mov DWORD PTR [esp],ebx + mov DWORD PTR 12[esp],ecx + mov eax,edi + mul edi + mov DWORD PTR 32[esp],eax + mov ebx,edx + shr edx,1 + and ebx,1 + inc ecx +ALIGN 16 +$L010sqr: + mov eax,DWORD PTR [ecx*4+esi] + mov ebp,edx + mul edi + add eax,ebp + lea ecx,DWORD PTR 1[ecx] + adc edx,0 + lea ebp,DWORD PTR [eax*2+ebx] + shr eax,31 + cmp ecx,DWORD PTR [esp] + mov ebx,eax + mov DWORD PTR 28[ecx*4+esp],ebp + jl $L010sqr + mov eax,DWORD PTR [ecx*4+esi] + mov ebp,edx + mul edi + add eax,ebp + mov edi,DWORD PTR 20[esp] + adc edx,0 + mov esi,DWORD PTR 16[esp] + lea ebp,DWORD PTR [eax*2+ebx] + imul edi,DWORD PTR 32[esp] + shr eax,31 + mov DWORD PTR 32[ecx*4+esp],ebp + lea ebp,DWORD PTR [edx*2+eax] + mov eax,DWORD PTR [esi] + shr edx,31 + mov DWORD PTR 36[ecx*4+esp],ebp + mov DWORD PTR 40[ecx*4+esp],edx + mul edi + add eax,DWORD PTR 32[esp] + mov ebx,ecx + adc edx,0 + mov eax,DWORD PTR 4[esi] + mov ecx,1 +ALIGN 16 +$L0113rdmadd: + mov ebp,edx + mul edi + add ebp,DWORD PTR 32[ecx*4+esp] + adc edx,0 + add ebp,eax + mov eax,DWORD PTR 4[ecx*4+esi] + adc edx,0 + mov DWORD PTR 28[ecx*4+esp],ebp + mov ebp,edx + mul edi + add ebp,DWORD PTR 36[ecx*4+esp] + lea ecx,DWORD PTR 2[ecx] + adc edx,0 + add ebp,eax + mov eax,DWORD PTR [ecx*4+esi] + adc edx,0 + cmp ecx,ebx + mov DWORD PTR 24[ecx*4+esp],ebp + jl $L0113rdmadd + mov ebp,edx + mul edi + add ebp,DWORD PTR 32[ebx*4+esp] + adc edx,0 + add ebp,eax + adc edx,0 + mov DWORD PTR 28[ebx*4+esp],ebp + mov ecx,DWORD PTR 12[esp] + xor eax,eax + mov esi,DWORD PTR 8[esp] + add edx,DWORD PTR 36[ebx*4+esp] + adc eax,DWORD PTR 40[ebx*4+esp] + mov DWORD PTR 32[ebx*4+esp],edx + cmp ecx,ebx + mov DWORD PTR 36[ebx*4+esp],eax + je $L005common_tail + mov edi,DWORD PTR 4[ecx*4+esi] + lea ecx,DWORD PTR 1[ecx] + mov eax,edi + mov DWORD PTR 12[esp],ecx + mul edi + add eax,DWORD PTR 32[ecx*4+esp] + adc edx,0 + mov DWORD PTR 32[ecx*4+esp],eax + xor ebp,ebp + cmp ecx,ebx + lea ecx,DWORD PTR 1[ecx] + je $L012sqrlast + mov ebx,edx + shr edx,1 + and ebx,1 +ALIGN 16 +$L013sqradd: + mov eax,DWORD PTR [ecx*4+esi] + mov ebp,edx + mul edi + add eax,ebp + lea ebp,DWORD PTR [eax*1+eax] + adc edx,0 + shr eax,31 + add ebp,DWORD PTR 32[ecx*4+esp] + lea ecx,DWORD PTR 1[ecx] + adc eax,0 + add ebp,ebx + adc eax,0 + cmp ecx,DWORD PTR [esp] + mov DWORD PTR 28[ecx*4+esp],ebp + mov ebx,eax + jle $L013sqradd + mov ebp,edx + add edx,edx + shr ebp,31 + add edx,ebx + adc ebp,0 +$L012sqrlast: + mov edi,DWORD PTR 20[esp] + mov esi,DWORD PTR 16[esp] + imul edi,DWORD PTR 32[esp] + add edx,DWORD PTR 32[ecx*4+esp] + mov eax,DWORD PTR [esi] + adc ebp,0 + mov DWORD PTR 32[ecx*4+esp],edx + mov DWORD PTR 36[ecx*4+esp],ebp + mul edi + add eax,DWORD PTR 32[esp] + lea ebx,DWORD PTR [ecx-1] + adc edx,0 + mov ecx,1 + mov eax,DWORD PTR 4[esi] + jmp $L0113rdmadd +ALIGN 16 +$L005common_tail: + mov ebp,DWORD PTR 16[esp] + mov edi,DWORD PTR 4[esp] + lea esi,DWORD PTR 32[esp] + mov eax,DWORD PTR [esi] + mov ecx,ebx + xor edx,edx +ALIGN 16 +$L014sub: + sbb eax,DWORD PTR [edx*4+ebp] + mov DWORD PTR [edx*4+edi],eax + dec ecx + mov eax,DWORD PTR 4[edx*4+esi] + lea edx,DWORD PTR 1[edx] + jge $L014sub + sbb eax,0 + and esi,eax + not eax + mov ebp,edi + and ebp,eax + or esi,ebp +ALIGN 16 +$L015copy: + mov eax,DWORD PTR [ebx*4+esi] + mov DWORD PTR [ebx*4+edi],eax + mov DWORD PTR 32[ebx*4+esp],ecx + dec ebx + jge $L015copy + mov esp,DWORD PTR 24[esp] + mov eax,1 +$L000just_leave: + pop edi + pop esi + pop ebx + pop ebp + ret +_bn_mul_mont ENDP +DB 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105 +DB 112,108,105,99,97,116,105,111,110,32,102,111,114,32,120,56 +DB 54,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121 +DB 32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46 +DB 111,114,103,62,0 +.text$ ENDS +.bss SEGMENT 'BSS' +COMM _OPENSSL_ia32cap_P:DWORD:4 +.bss ENDS +END diff --git a/deps/openssl/asm_obsolete/x86-win32-masm/camellia/cmll-x86.asm b/deps/openssl/asm_obsolete/x86-win32-masm/camellia/cmll-x86.asm new file mode 100644 index 00000000000000..6aac94e4906752 --- /dev/null +++ b/deps/openssl/asm_obsolete/x86-win32-masm/camellia/cmll-x86.asm @@ -0,0 +1,2370 @@ +TITLE cmll-586.asm +IF @Version LT 800 +ECHO MASM version 8.00 or later is strongly recommended. +ENDIF +.686 +.MODEL FLAT +OPTION DOTNAME +IF @Version LT 800 +.text$ SEGMENT PAGE 'CODE' +ELSE +.text$ SEGMENT ALIGN(64) 'CODE' +ENDIF +ALIGN 16 +_Camellia_EncryptBlock_Rounds PROC PUBLIC +$L_Camellia_EncryptBlock_Rounds_begin:: + push ebp + push ebx + push esi + push edi + mov eax,DWORD PTR 20[esp] + mov esi,DWORD PTR 24[esp] + mov edi,DWORD PTR 28[esp] + mov ebx,esp + sub esp,28 + and esp,-64 + lea ecx,DWORD PTR [edi-127] + sub ecx,esp + neg ecx + and ecx,960 + sub esp,ecx + add esp,4 + shl eax,6 + lea eax,DWORD PTR [eax*1+edi] + mov DWORD PTR 20[esp],ebx + mov DWORD PTR 16[esp],eax + call $L000pic_point +$L000pic_point: + pop ebp + lea ebp,DWORD PTR ($LCamellia_SBOX-$L000pic_point)[ebp] + mov eax,DWORD PTR [esi] + mov ebx,DWORD PTR 4[esi] + mov ecx,DWORD PTR 8[esi] + bswap eax + mov edx,DWORD PTR 12[esi] + bswap ebx + bswap ecx + bswap edx + call __x86_Camellia_encrypt + mov esp,DWORD PTR 20[esp] + bswap eax + mov esi,DWORD PTR 32[esp] + bswap ebx + bswap ecx + bswap edx + mov DWORD PTR [esi],eax + mov DWORD PTR 4[esi],ebx + mov DWORD PTR 8[esi],ecx + mov DWORD PTR 12[esi],edx + pop edi + pop esi + pop ebx + pop ebp + ret +_Camellia_EncryptBlock_Rounds ENDP +ALIGN 16 +_Camellia_EncryptBlock PROC PUBLIC +$L_Camellia_EncryptBlock_begin:: + mov eax,128 + sub eax,DWORD PTR 4[esp] + mov eax,3 + adc eax,0 + mov DWORD PTR 4[esp],eax + jmp $L_Camellia_EncryptBlock_Rounds_begin +_Camellia_EncryptBlock ENDP +ALIGN 16 +_Camellia_encrypt PROC PUBLIC +$L_Camellia_encrypt_begin:: + push ebp + push ebx + push esi + push edi + mov esi,DWORD PTR 20[esp] + mov edi,DWORD PTR 28[esp] + mov ebx,esp + sub esp,28 + and esp,-64 + mov eax,DWORD PTR 272[edi] + lea ecx,DWORD PTR [edi-127] + sub ecx,esp + neg ecx + and ecx,960 + sub esp,ecx + add esp,4 + shl eax,6 + lea eax,DWORD PTR [eax*1+edi] + mov DWORD PTR 20[esp],ebx + mov DWORD PTR 16[esp],eax + call $L001pic_point +$L001pic_point: + pop ebp + lea ebp,DWORD PTR ($LCamellia_SBOX-$L001pic_point)[ebp] + mov eax,DWORD PTR [esi] + mov ebx,DWORD PTR 4[esi] + mov ecx,DWORD PTR 8[esi] + bswap eax + mov edx,DWORD PTR 12[esi] + bswap ebx + bswap ecx + bswap edx + call __x86_Camellia_encrypt + mov esp,DWORD PTR 20[esp] + bswap eax + mov esi,DWORD PTR 24[esp] + bswap ebx + bswap ecx + bswap edx + mov DWORD PTR [esi],eax + mov DWORD PTR 4[esi],ebx + mov DWORD PTR 8[esi],ecx + mov DWORD PTR 12[esi],edx + pop edi + pop esi + pop ebx + pop ebp + ret +_Camellia_encrypt ENDP +ALIGN 16 +__x86_Camellia_encrypt PROC PRIVATE + xor eax,DWORD PTR [edi] + xor ebx,DWORD PTR 4[edi] + xor ecx,DWORD PTR 8[edi] + xor edx,DWORD PTR 12[edi] + mov esi,DWORD PTR 16[edi] + mov DWORD PTR 4[esp],eax + mov DWORD PTR 8[esp],ebx + mov DWORD PTR 12[esp],ecx + mov DWORD PTR 16[esp],edx +ALIGN 16 +$L002loop: + xor eax,esi + xor ebx,DWORD PTR 20[edi] + movzx esi,ah + mov edx,DWORD PTR 2052[esi*8+ebp] + movzx esi,al + xor edx,DWORD PTR 4[esi*8+ebp] + shr eax,16 + movzx esi,bl + mov ecx,DWORD PTR [esi*8+ebp] + movzx esi,ah + xor edx,DWORD PTR [esi*8+ebp] + movzx esi,bh + xor ecx,DWORD PTR 4[esi*8+ebp] + shr ebx,16 + movzx eax,al + xor edx,DWORD PTR 2048[eax*8+ebp] + movzx esi,bh + mov eax,DWORD PTR 16[esp] + xor ecx,edx + ror edx,8 + xor ecx,DWORD PTR 2048[esi*8+ebp] + movzx esi,bl + mov ebx,DWORD PTR 12[esp] + xor edx,eax + xor ecx,DWORD PTR 2052[esi*8+ebp] + mov esi,DWORD PTR 24[edi] + xor edx,ecx + mov DWORD PTR 16[esp],edx + xor ecx,ebx + mov DWORD PTR 12[esp],ecx + xor ecx,esi + xor edx,DWORD PTR 28[edi] + movzx esi,ch + mov ebx,DWORD PTR 2052[esi*8+ebp] + movzx esi,cl + xor ebx,DWORD PTR 4[esi*8+ebp] + shr ecx,16 + movzx esi,dl + mov eax,DWORD PTR [esi*8+ebp] + movzx esi,ch + xor ebx,DWORD PTR [esi*8+ebp] + movzx esi,dh + xor eax,DWORD PTR 4[esi*8+ebp] + shr edx,16 + movzx ecx,cl + xor ebx,DWORD PTR 2048[ecx*8+ebp] + movzx esi,dh + mov ecx,DWORD PTR 8[esp] + xor eax,ebx + ror ebx,8 + xor eax,DWORD PTR 2048[esi*8+ebp] + movzx esi,dl + mov edx,DWORD PTR 4[esp] + xor ebx,ecx + xor eax,DWORD PTR 2052[esi*8+ebp] + mov esi,DWORD PTR 32[edi] + xor ebx,eax + mov DWORD PTR 8[esp],ebx + xor eax,edx + mov DWORD PTR 4[esp],eax + xor eax,esi + xor ebx,DWORD PTR 36[edi] + movzx esi,ah + mov edx,DWORD PTR 2052[esi*8+ebp] + movzx esi,al + xor edx,DWORD PTR 4[esi*8+ebp] + shr eax,16 + movzx esi,bl + mov ecx,DWORD PTR [esi*8+ebp] + movzx esi,ah + xor edx,DWORD PTR [esi*8+ebp] + movzx esi,bh + xor ecx,DWORD PTR 4[esi*8+ebp] + shr ebx,16 + movzx eax,al + xor edx,DWORD PTR 2048[eax*8+ebp] + movzx esi,bh + mov eax,DWORD PTR 16[esp] + xor ecx,edx + ror edx,8 + xor ecx,DWORD PTR 2048[esi*8+ebp] + movzx esi,bl + mov ebx,DWORD PTR 12[esp] + xor edx,eax + xor ecx,DWORD PTR 2052[esi*8+ebp] + mov esi,DWORD PTR 40[edi] + xor edx,ecx + mov DWORD PTR 16[esp],edx + xor ecx,ebx + mov DWORD PTR 12[esp],ecx + xor ecx,esi + xor edx,DWORD PTR 44[edi] + movzx esi,ch + mov ebx,DWORD PTR 2052[esi*8+ebp] + movzx esi,cl + xor ebx,DWORD PTR 4[esi*8+ebp] + shr ecx,16 + movzx esi,dl + mov eax,DWORD PTR [esi*8+ebp] + movzx esi,ch + xor ebx,DWORD PTR [esi*8+ebp] + movzx esi,dh + xor eax,DWORD PTR 4[esi*8+ebp] + shr edx,16 + movzx ecx,cl + xor ebx,DWORD PTR 2048[ecx*8+ebp] + movzx esi,dh + mov ecx,DWORD PTR 8[esp] + xor eax,ebx + ror ebx,8 + xor eax,DWORD PTR 2048[esi*8+ebp] + movzx esi,dl + mov edx,DWORD PTR 4[esp] + xor ebx,ecx + xor eax,DWORD PTR 2052[esi*8+ebp] + mov esi,DWORD PTR 48[edi] + xor ebx,eax + mov DWORD PTR 8[esp],ebx + xor eax,edx + mov DWORD PTR 4[esp],eax + xor eax,esi + xor ebx,DWORD PTR 52[edi] + movzx esi,ah + mov edx,DWORD PTR 2052[esi*8+ebp] + movzx esi,al + xor edx,DWORD PTR 4[esi*8+ebp] + shr eax,16 + movzx esi,bl + mov ecx,DWORD PTR [esi*8+ebp] + movzx esi,ah + xor edx,DWORD PTR [esi*8+ebp] + movzx esi,bh + xor ecx,DWORD PTR 4[esi*8+ebp] + shr ebx,16 + movzx eax,al + xor edx,DWORD PTR 2048[eax*8+ebp] + movzx esi,bh + mov eax,DWORD PTR 16[esp] + xor ecx,edx + ror edx,8 + xor ecx,DWORD PTR 2048[esi*8+ebp] + movzx esi,bl + mov ebx,DWORD PTR 12[esp] + xor edx,eax + xor ecx,DWORD PTR 2052[esi*8+ebp] + mov esi,DWORD PTR 56[edi] + xor edx,ecx + mov DWORD PTR 16[esp],edx + xor ecx,ebx + mov DWORD PTR 12[esp],ecx + xor ecx,esi + xor edx,DWORD PTR 60[edi] + movzx esi,ch + mov ebx,DWORD PTR 2052[esi*8+ebp] + movzx esi,cl + xor ebx,DWORD PTR 4[esi*8+ebp] + shr ecx,16 + movzx esi,dl + mov eax,DWORD PTR [esi*8+ebp] + movzx esi,ch + xor ebx,DWORD PTR [esi*8+ebp] + movzx esi,dh + xor eax,DWORD PTR 4[esi*8+ebp] + shr edx,16 + movzx ecx,cl + xor ebx,DWORD PTR 2048[ecx*8+ebp] + movzx esi,dh + mov ecx,DWORD PTR 8[esp] + xor eax,ebx + ror ebx,8 + xor eax,DWORD PTR 2048[esi*8+ebp] + movzx esi,dl + mov edx,DWORD PTR 4[esp] + xor ebx,ecx + xor eax,DWORD PTR 2052[esi*8+ebp] + mov esi,DWORD PTR 64[edi] + xor ebx,eax + mov DWORD PTR 8[esp],ebx + xor eax,edx + mov DWORD PTR 4[esp],eax + add edi,64 + cmp edi,DWORD PTR 20[esp] + je $L003done + and esi,eax + mov edx,DWORD PTR 16[esp] + rol esi,1 + mov ecx,edx + xor ebx,esi + or ecx,DWORD PTR 12[edi] + mov DWORD PTR 8[esp],ebx + xor ecx,DWORD PTR 12[esp] + mov esi,DWORD PTR 4[edi] + mov DWORD PTR 12[esp],ecx + or esi,ebx + and ecx,DWORD PTR 8[edi] + xor eax,esi + rol ecx,1 + mov DWORD PTR 4[esp],eax + xor edx,ecx + mov esi,DWORD PTR 16[edi] + mov DWORD PTR 16[esp],edx + jmp $L002loop +ALIGN 8 +$L003done: + mov ecx,eax + mov edx,ebx + mov eax,DWORD PTR 12[esp] + mov ebx,DWORD PTR 16[esp] + xor eax,esi + xor ebx,DWORD PTR 4[edi] + xor ecx,DWORD PTR 8[edi] + xor edx,DWORD PTR 12[edi] + ret +__x86_Camellia_encrypt ENDP +ALIGN 16 +_Camellia_DecryptBlock_Rounds PROC PUBLIC +$L_Camellia_DecryptBlock_Rounds_begin:: + push ebp + push ebx + push esi + push edi + mov eax,DWORD PTR 20[esp] + mov esi,DWORD PTR 24[esp] + mov edi,DWORD PTR 28[esp] + mov ebx,esp + sub esp,28 + and esp,-64 + lea ecx,DWORD PTR [edi-127] + sub ecx,esp + neg ecx + and ecx,960 + sub esp,ecx + add esp,4 + shl eax,6 + mov DWORD PTR 16[esp],edi + lea edi,DWORD PTR [eax*1+edi] + mov DWORD PTR 20[esp],ebx + call $L004pic_point +$L004pic_point: + pop ebp + lea ebp,DWORD PTR ($LCamellia_SBOX-$L004pic_point)[ebp] + mov eax,DWORD PTR [esi] + mov ebx,DWORD PTR 4[esi] + mov ecx,DWORD PTR 8[esi] + bswap eax + mov edx,DWORD PTR 12[esi] + bswap ebx + bswap ecx + bswap edx + call __x86_Camellia_decrypt + mov esp,DWORD PTR 20[esp] + bswap eax + mov esi,DWORD PTR 32[esp] + bswap ebx + bswap ecx + bswap edx + mov DWORD PTR [esi],eax + mov DWORD PTR 4[esi],ebx + mov DWORD PTR 8[esi],ecx + mov DWORD PTR 12[esi],edx + pop edi + pop esi + pop ebx + pop ebp + ret +_Camellia_DecryptBlock_Rounds ENDP +ALIGN 16 +_Camellia_DecryptBlock PROC PUBLIC +$L_Camellia_DecryptBlock_begin:: + mov eax,128 + sub eax,DWORD PTR 4[esp] + mov eax,3 + adc eax,0 + mov DWORD PTR 4[esp],eax + jmp $L_Camellia_DecryptBlock_Rounds_begin +_Camellia_DecryptBlock ENDP +ALIGN 16 +_Camellia_decrypt PROC PUBLIC +$L_Camellia_decrypt_begin:: + push ebp + push ebx + push esi + push edi + mov esi,DWORD PTR 20[esp] + mov edi,DWORD PTR 28[esp] + mov ebx,esp + sub esp,28 + and esp,-64 + mov eax,DWORD PTR 272[edi] + lea ecx,DWORD PTR [edi-127] + sub ecx,esp + neg ecx + and ecx,960 + sub esp,ecx + add esp,4 + shl eax,6 + mov DWORD PTR 16[esp],edi + lea edi,DWORD PTR [eax*1+edi] + mov DWORD PTR 20[esp],ebx + call $L005pic_point +$L005pic_point: + pop ebp + lea ebp,DWORD PTR ($LCamellia_SBOX-$L005pic_point)[ebp] + mov eax,DWORD PTR [esi] + mov ebx,DWORD PTR 4[esi] + mov ecx,DWORD PTR 8[esi] + bswap eax + mov edx,DWORD PTR 12[esi] + bswap ebx + bswap ecx + bswap edx + call __x86_Camellia_decrypt + mov esp,DWORD PTR 20[esp] + bswap eax + mov esi,DWORD PTR 24[esp] + bswap ebx + bswap ecx + bswap edx + mov DWORD PTR [esi],eax + mov DWORD PTR 4[esi],ebx + mov DWORD PTR 8[esi],ecx + mov DWORD PTR 12[esi],edx + pop edi + pop esi + pop ebx + pop ebp + ret +_Camellia_decrypt ENDP +ALIGN 16 +__x86_Camellia_decrypt PROC PRIVATE + xor eax,DWORD PTR [edi] + xor ebx,DWORD PTR 4[edi] + xor ecx,DWORD PTR 8[edi] + xor edx,DWORD PTR 12[edi] + mov esi,DWORD PTR [edi-8] + mov DWORD PTR 4[esp],eax + mov DWORD PTR 8[esp],ebx + mov DWORD PTR 12[esp],ecx + mov DWORD PTR 16[esp],edx +ALIGN 16 +$L006loop: + xor eax,esi + xor ebx,DWORD PTR [edi-4] + movzx esi,ah + mov edx,DWORD PTR 2052[esi*8+ebp] + movzx esi,al + xor edx,DWORD PTR 4[esi*8+ebp] + shr eax,16 + movzx esi,bl + mov ecx,DWORD PTR [esi*8+ebp] + movzx esi,ah + xor edx,DWORD PTR [esi*8+ebp] + movzx esi,bh + xor ecx,DWORD PTR 4[esi*8+ebp] + shr ebx,16 + movzx eax,al + xor edx,DWORD PTR 2048[eax*8+ebp] + movzx esi,bh + mov eax,DWORD PTR 16[esp] + xor ecx,edx + ror edx,8 + xor ecx,DWORD PTR 2048[esi*8+ebp] + movzx esi,bl + mov ebx,DWORD PTR 12[esp] + xor edx,eax + xor ecx,DWORD PTR 2052[esi*8+ebp] + mov esi,DWORD PTR [edi-16] + xor edx,ecx + mov DWORD PTR 16[esp],edx + xor ecx,ebx + mov DWORD PTR 12[esp],ecx + xor ecx,esi + xor edx,DWORD PTR [edi-12] + movzx esi,ch + mov ebx,DWORD PTR 2052[esi*8+ebp] + movzx esi,cl + xor ebx,DWORD PTR 4[esi*8+ebp] + shr ecx,16 + movzx esi,dl + mov eax,DWORD PTR [esi*8+ebp] + movzx esi,ch + xor ebx,DWORD PTR [esi*8+ebp] + movzx esi,dh + xor eax,DWORD PTR 4[esi*8+ebp] + shr edx,16 + movzx ecx,cl + xor ebx,DWORD PTR 2048[ecx*8+ebp] + movzx esi,dh + mov ecx,DWORD PTR 8[esp] + xor eax,ebx + ror ebx,8 + xor eax,DWORD PTR 2048[esi*8+ebp] + movzx esi,dl + mov edx,DWORD PTR 4[esp] + xor ebx,ecx + xor eax,DWORD PTR 2052[esi*8+ebp] + mov esi,DWORD PTR [edi-24] + xor ebx,eax + mov DWORD PTR 8[esp],ebx + xor eax,edx + mov DWORD PTR 4[esp],eax + xor eax,esi + xor ebx,DWORD PTR [edi-20] + movzx esi,ah + mov edx,DWORD PTR 2052[esi*8+ebp] + movzx esi,al + xor edx,DWORD PTR 4[esi*8+ebp] + shr eax,16 + movzx esi,bl + mov ecx,DWORD PTR [esi*8+ebp] + movzx esi,ah + xor edx,DWORD PTR [esi*8+ebp] + movzx esi,bh + xor ecx,DWORD PTR 4[esi*8+ebp] + shr ebx,16 + movzx eax,al + xor edx,DWORD PTR 2048[eax*8+ebp] + movzx esi,bh + mov eax,DWORD PTR 16[esp] + xor ecx,edx + ror edx,8 + xor ecx,DWORD PTR 2048[esi*8+ebp] + movzx esi,bl + mov ebx,DWORD PTR 12[esp] + xor edx,eax + xor ecx,DWORD PTR 2052[esi*8+ebp] + mov esi,DWORD PTR [edi-32] + xor edx,ecx + mov DWORD PTR 16[esp],edx + xor ecx,ebx + mov DWORD PTR 12[esp],ecx + xor ecx,esi + xor edx,DWORD PTR [edi-28] + movzx esi,ch + mov ebx,DWORD PTR 2052[esi*8+ebp] + movzx esi,cl + xor ebx,DWORD PTR 4[esi*8+ebp] + shr ecx,16 + movzx esi,dl + mov eax,DWORD PTR [esi*8+ebp] + movzx esi,ch + xor ebx,DWORD PTR [esi*8+ebp] + movzx esi,dh + xor eax,DWORD PTR 4[esi*8+ebp] + shr edx,16 + movzx ecx,cl + xor ebx,DWORD PTR 2048[ecx*8+ebp] + movzx esi,dh + mov ecx,DWORD PTR 8[esp] + xor eax,ebx + ror ebx,8 + xor eax,DWORD PTR 2048[esi*8+ebp] + movzx esi,dl + mov edx,DWORD PTR 4[esp] + xor ebx,ecx + xor eax,DWORD PTR 2052[esi*8+ebp] + mov esi,DWORD PTR [edi-40] + xor ebx,eax + mov DWORD PTR 8[esp],ebx + xor eax,edx + mov DWORD PTR 4[esp],eax + xor eax,esi + xor ebx,DWORD PTR [edi-36] + movzx esi,ah + mov edx,DWORD PTR 2052[esi*8+ebp] + movzx esi,al + xor edx,DWORD PTR 4[esi*8+ebp] + shr eax,16 + movzx esi,bl + mov ecx,DWORD PTR [esi*8+ebp] + movzx esi,ah + xor edx,DWORD PTR [esi*8+ebp] + movzx esi,bh + xor ecx,DWORD PTR 4[esi*8+ebp] + shr ebx,16 + movzx eax,al + xor edx,DWORD PTR 2048[eax*8+ebp] + movzx esi,bh + mov eax,DWORD PTR 16[esp] + xor ecx,edx + ror edx,8 + xor ecx,DWORD PTR 2048[esi*8+ebp] + movzx esi,bl + mov ebx,DWORD PTR 12[esp] + xor edx,eax + xor ecx,DWORD PTR 2052[esi*8+ebp] + mov esi,DWORD PTR [edi-48] + xor edx,ecx + mov DWORD PTR 16[esp],edx + xor ecx,ebx + mov DWORD PTR 12[esp],ecx + xor ecx,esi + xor edx,DWORD PTR [edi-44] + movzx esi,ch + mov ebx,DWORD PTR 2052[esi*8+ebp] + movzx esi,cl + xor ebx,DWORD PTR 4[esi*8+ebp] + shr ecx,16 + movzx esi,dl + mov eax,DWORD PTR [esi*8+ebp] + movzx esi,ch + xor ebx,DWORD PTR [esi*8+ebp] + movzx esi,dh + xor eax,DWORD PTR 4[esi*8+ebp] + shr edx,16 + movzx ecx,cl + xor ebx,DWORD PTR 2048[ecx*8+ebp] + movzx esi,dh + mov ecx,DWORD PTR 8[esp] + xor eax,ebx + ror ebx,8 + xor eax,DWORD PTR 2048[esi*8+ebp] + movzx esi,dl + mov edx,DWORD PTR 4[esp] + xor ebx,ecx + xor eax,DWORD PTR 2052[esi*8+ebp] + mov esi,DWORD PTR [edi-56] + xor ebx,eax + mov DWORD PTR 8[esp],ebx + xor eax,edx + mov DWORD PTR 4[esp],eax + sub edi,64 + cmp edi,DWORD PTR 20[esp] + je $L007done + and esi,eax + mov edx,DWORD PTR 16[esp] + rol esi,1 + mov ecx,edx + xor ebx,esi + or ecx,DWORD PTR 4[edi] + mov DWORD PTR 8[esp],ebx + xor ecx,DWORD PTR 12[esp] + mov esi,DWORD PTR 12[edi] + mov DWORD PTR 12[esp],ecx + or esi,ebx + and ecx,DWORD PTR [edi] + xor eax,esi + rol ecx,1 + mov DWORD PTR 4[esp],eax + xor edx,ecx + mov esi,DWORD PTR [edi-8] + mov DWORD PTR 16[esp],edx + jmp $L006loop +ALIGN 8 +$L007done: + mov ecx,eax + mov edx,ebx + mov eax,DWORD PTR 12[esp] + mov ebx,DWORD PTR 16[esp] + xor ecx,esi + xor edx,DWORD PTR 12[edi] + xor eax,DWORD PTR [edi] + xor ebx,DWORD PTR 4[edi] + ret +__x86_Camellia_decrypt ENDP +ALIGN 16 +_Camellia_Ekeygen PROC PUBLIC +$L_Camellia_Ekeygen_begin:: + push ebp + push ebx + push esi + push edi + sub esp,16 + mov ebp,DWORD PTR 36[esp] + mov esi,DWORD PTR 40[esp] + mov edi,DWORD PTR 44[esp] + mov eax,DWORD PTR [esi] + mov ebx,DWORD PTR 4[esi] + mov ecx,DWORD PTR 8[esi] + mov edx,DWORD PTR 12[esi] + bswap eax + bswap ebx + bswap ecx + bswap edx + mov DWORD PTR [edi],eax + mov DWORD PTR 4[edi],ebx + mov DWORD PTR 8[edi],ecx + mov DWORD PTR 12[edi],edx + cmp ebp,128 + je $L0081st128 + mov eax,DWORD PTR 16[esi] + mov ebx,DWORD PTR 20[esi] + cmp ebp,192 + je $L0091st192 + mov ecx,DWORD PTR 24[esi] + mov edx,DWORD PTR 28[esi] + jmp $L0101st256 +ALIGN 4 +$L0091st192: + mov ecx,eax + mov edx,ebx + not ecx + not edx +ALIGN 4 +$L0101st256: + bswap eax + bswap ebx + bswap ecx + bswap edx + mov DWORD PTR 32[edi],eax + mov DWORD PTR 36[edi],ebx + mov DWORD PTR 40[edi],ecx + mov DWORD PTR 44[edi],edx + xor eax,DWORD PTR [edi] + xor ebx,DWORD PTR 4[edi] + xor ecx,DWORD PTR 8[edi] + xor edx,DWORD PTR 12[edi] +ALIGN 4 +$L0081st128: + call $L011pic_point +$L011pic_point: + pop ebp + lea ebp,DWORD PTR ($LCamellia_SBOX-$L011pic_point)[ebp] + lea edi,DWORD PTR ($LCamellia_SIGMA-$LCamellia_SBOX)[ebp] + mov esi,DWORD PTR [edi] + mov DWORD PTR [esp],eax + mov DWORD PTR 4[esp],ebx + mov DWORD PTR 8[esp],ecx + mov DWORD PTR 12[esp],edx + xor eax,esi + xor ebx,DWORD PTR 4[edi] + movzx esi,ah + mov edx,DWORD PTR 2052[esi*8+ebp] + movzx esi,al + xor edx,DWORD PTR 4[esi*8+ebp] + shr eax,16 + movzx esi,bl + mov ecx,DWORD PTR [esi*8+ebp] + movzx esi,ah + xor edx,DWORD PTR [esi*8+ebp] + movzx esi,bh + xor ecx,DWORD PTR 4[esi*8+ebp] + shr ebx,16 + movzx eax,al + xor edx,DWORD PTR 2048[eax*8+ebp] + movzx esi,bh + mov eax,DWORD PTR 12[esp] + xor ecx,edx + ror edx,8 + xor ecx,DWORD PTR 2048[esi*8+ebp] + movzx esi,bl + mov ebx,DWORD PTR 8[esp] + xor edx,eax + xor ecx,DWORD PTR 2052[esi*8+ebp] + mov esi,DWORD PTR 8[edi] + xor edx,ecx + mov DWORD PTR 12[esp],edx + xor ecx,ebx + mov DWORD PTR 8[esp],ecx + xor ecx,esi + xor edx,DWORD PTR 12[edi] + movzx esi,ch + mov ebx,DWORD PTR 2052[esi*8+ebp] + movzx esi,cl + xor ebx,DWORD PTR 4[esi*8+ebp] + shr ecx,16 + movzx esi,dl + mov eax,DWORD PTR [esi*8+ebp] + movzx esi,ch + xor ebx,DWORD PTR [esi*8+ebp] + movzx esi,dh + xor eax,DWORD PTR 4[esi*8+ebp] + shr edx,16 + movzx ecx,cl + xor ebx,DWORD PTR 2048[ecx*8+ebp] + movzx esi,dh + mov ecx,DWORD PTR 4[esp] + xor eax,ebx + ror ebx,8 + xor eax,DWORD PTR 2048[esi*8+ebp] + movzx esi,dl + mov edx,DWORD PTR [esp] + xor ebx,ecx + xor eax,DWORD PTR 2052[esi*8+ebp] + mov esi,DWORD PTR 16[edi] + xor ebx,eax + mov DWORD PTR 4[esp],ebx + xor eax,edx + mov DWORD PTR [esp],eax + mov ecx,DWORD PTR 8[esp] + mov edx,DWORD PTR 12[esp] + mov esi,DWORD PTR 44[esp] + xor eax,DWORD PTR [esi] + xor ebx,DWORD PTR 4[esi] + xor ecx,DWORD PTR 8[esi] + xor edx,DWORD PTR 12[esi] + mov esi,DWORD PTR 16[edi] + mov DWORD PTR [esp],eax + mov DWORD PTR 4[esp],ebx + mov DWORD PTR 8[esp],ecx + mov DWORD PTR 12[esp],edx + xor eax,esi + xor ebx,DWORD PTR 20[edi] + movzx esi,ah + mov edx,DWORD PTR 2052[esi*8+ebp] + movzx esi,al + xor edx,DWORD PTR 4[esi*8+ebp] + shr eax,16 + movzx esi,bl + mov ecx,DWORD PTR [esi*8+ebp] + movzx esi,ah + xor edx,DWORD PTR [esi*8+ebp] + movzx esi,bh + xor ecx,DWORD PTR 4[esi*8+ebp] + shr ebx,16 + movzx eax,al + xor edx,DWORD PTR 2048[eax*8+ebp] + movzx esi,bh + mov eax,DWORD PTR 12[esp] + xor ecx,edx + ror edx,8 + xor ecx,DWORD PTR 2048[esi*8+ebp] + movzx esi,bl + mov ebx,DWORD PTR 8[esp] + xor edx,eax + xor ecx,DWORD PTR 2052[esi*8+ebp] + mov esi,DWORD PTR 24[edi] + xor edx,ecx + mov DWORD PTR 12[esp],edx + xor ecx,ebx + mov DWORD PTR 8[esp],ecx + xor ecx,esi + xor edx,DWORD PTR 28[edi] + movzx esi,ch + mov ebx,DWORD PTR 2052[esi*8+ebp] + movzx esi,cl + xor ebx,DWORD PTR 4[esi*8+ebp] + shr ecx,16 + movzx esi,dl + mov eax,DWORD PTR [esi*8+ebp] + movzx esi,ch + xor ebx,DWORD PTR [esi*8+ebp] + movzx esi,dh + xor eax,DWORD PTR 4[esi*8+ebp] + shr edx,16 + movzx ecx,cl + xor ebx,DWORD PTR 2048[ecx*8+ebp] + movzx esi,dh + mov ecx,DWORD PTR 4[esp] + xor eax,ebx + ror ebx,8 + xor eax,DWORD PTR 2048[esi*8+ebp] + movzx esi,dl + mov edx,DWORD PTR [esp] + xor ebx,ecx + xor eax,DWORD PTR 2052[esi*8+ebp] + mov esi,DWORD PTR 32[edi] + xor ebx,eax + mov DWORD PTR 4[esp],ebx + xor eax,edx + mov DWORD PTR [esp],eax + mov ecx,DWORD PTR 8[esp] + mov edx,DWORD PTR 12[esp] + mov esi,DWORD PTR 36[esp] + cmp esi,128 + jne $L0122nd256 + mov edi,DWORD PTR 44[esp] + lea edi,DWORD PTR 128[edi] + mov DWORD PTR [edi-112],eax + mov DWORD PTR [edi-108],ebx + mov DWORD PTR [edi-104],ecx + mov DWORD PTR [edi-100],edx + mov ebp,eax + shl eax,15 + mov esi,ebx + shr esi,17 + shl ebx,15 + or eax,esi + mov esi,ecx + shl ecx,15 + mov DWORD PTR [edi-80],eax + shr esi,17 + or ebx,esi + shr ebp,17 + mov esi,edx + shr esi,17 + mov DWORD PTR [edi-76],ebx + shl edx,15 + or ecx,esi + or edx,ebp + mov DWORD PTR [edi-72],ecx + mov DWORD PTR [edi-68],edx + mov ebp,eax + shl eax,15 + mov esi,ebx + shr esi,17 + shl ebx,15 + or eax,esi + mov esi,ecx + shl ecx,15 + mov DWORD PTR [edi-64],eax + shr esi,17 + or ebx,esi + shr ebp,17 + mov esi,edx + shr esi,17 + mov DWORD PTR [edi-60],ebx + shl edx,15 + or ecx,esi + or edx,ebp + mov DWORD PTR [edi-56],ecx + mov DWORD PTR [edi-52],edx + mov ebp,eax + shl eax,15 + mov esi,ebx + shr esi,17 + shl ebx,15 + or eax,esi + mov esi,ecx + shl ecx,15 + mov DWORD PTR [edi-32],eax + shr esi,17 + or ebx,esi + shr ebp,17 + mov esi,edx + shr esi,17 + mov DWORD PTR [edi-28],ebx + shl edx,15 + or ecx,esi + or edx,ebp + mov ebp,eax + shl eax,15 + mov esi,ebx + shr esi,17 + shl ebx,15 + or eax,esi + mov esi,ecx + shl ecx,15 + mov DWORD PTR [edi-16],eax + shr esi,17 + or ebx,esi + shr ebp,17 + mov esi,edx + shr esi,17 + mov DWORD PTR [edi-12],ebx + shl edx,15 + or ecx,esi + or edx,ebp + mov DWORD PTR [edi-8],ecx + mov DWORD PTR [edi-4],edx + mov ebp,ebx + shl ebx,2 + mov esi,ecx + shr esi,30 + shl ecx,2 + or ebx,esi + mov esi,edx + shl edx,2 + mov DWORD PTR 32[edi],ebx + shr esi,30 + or ecx,esi + shr ebp,30 + mov esi,eax + shr esi,30 + mov DWORD PTR 36[edi],ecx + shl eax,2 + or edx,esi + or eax,ebp + mov DWORD PTR 40[edi],edx + mov DWORD PTR 44[edi],eax + mov ebp,ebx + shl ebx,17 + mov esi,ecx + shr esi,15 + shl ecx,17 + or ebx,esi + mov esi,edx + shl edx,17 + mov DWORD PTR 64[edi],ebx + shr esi,15 + or ecx,esi + shr ebp,15 + mov esi,eax + shr esi,15 + mov DWORD PTR 68[edi],ecx + shl eax,17 + or edx,esi + or eax,ebp + mov DWORD PTR 72[edi],edx + mov DWORD PTR 76[edi],eax + mov ebx,DWORD PTR [edi-128] + mov ecx,DWORD PTR [edi-124] + mov edx,DWORD PTR [edi-120] + mov eax,DWORD PTR [edi-116] + mov ebp,ebx + shl ebx,15 + mov esi,ecx + shr esi,17 + shl ecx,15 + or ebx,esi + mov esi,edx + shl edx,15 + mov DWORD PTR [edi-96],ebx + shr esi,17 + or ecx,esi + shr ebp,17 + mov esi,eax + shr esi,17 + mov DWORD PTR [edi-92],ecx + shl eax,15 + or edx,esi + or eax,ebp + mov DWORD PTR [edi-88],edx + mov DWORD PTR [edi-84],eax + mov ebp,ebx + shl ebx,30 + mov esi,ecx + shr esi,2 + shl ecx,30 + or ebx,esi + mov esi,edx + shl edx,30 + mov DWORD PTR [edi-48],ebx + shr esi,2 + or ecx,esi + shr ebp,2 + mov esi,eax + shr esi,2 + mov DWORD PTR [edi-44],ecx + shl eax,30 + or edx,esi + or eax,ebp + mov DWORD PTR [edi-40],edx + mov DWORD PTR [edi-36],eax + mov ebp,ebx + shl ebx,15 + mov esi,ecx + shr esi,17 + shl ecx,15 + or ebx,esi + mov esi,edx + shl edx,15 + shr esi,17 + or ecx,esi + shr ebp,17 + mov esi,eax + shr esi,17 + shl eax,15 + or edx,esi + or eax,ebp + mov DWORD PTR [edi-24],edx + mov DWORD PTR [edi-20],eax + mov ebp,ebx + shl ebx,17 + mov esi,ecx + shr esi,15 + shl ecx,17 + or ebx,esi + mov esi,edx + shl edx,17 + mov DWORD PTR [edi],ebx + shr esi,15 + or ecx,esi + shr ebp,15 + mov esi,eax + shr esi,15 + mov DWORD PTR 4[edi],ecx + shl eax,17 + or edx,esi + or eax,ebp + mov DWORD PTR 8[edi],edx + mov DWORD PTR 12[edi],eax + mov ebp,ebx + shl ebx,17 + mov esi,ecx + shr esi,15 + shl ecx,17 + or ebx,esi + mov esi,edx + shl edx,17 + mov DWORD PTR 16[edi],ebx + shr esi,15 + or ecx,esi + shr ebp,15 + mov esi,eax + shr esi,15 + mov DWORD PTR 20[edi],ecx + shl eax,17 + or edx,esi + or eax,ebp + mov DWORD PTR 24[edi],edx + mov DWORD PTR 28[edi],eax + mov ebp,ebx + shl ebx,17 + mov esi,ecx + shr esi,15 + shl ecx,17 + or ebx,esi + mov esi,edx + shl edx,17 + mov DWORD PTR 48[edi],ebx + shr esi,15 + or ecx,esi + shr ebp,15 + mov esi,eax + shr esi,15 + mov DWORD PTR 52[edi],ecx + shl eax,17 + or edx,esi + or eax,ebp + mov DWORD PTR 56[edi],edx + mov DWORD PTR 60[edi],eax + mov eax,3 + jmp $L013done +ALIGN 16 +$L0122nd256: + mov esi,DWORD PTR 44[esp] + mov DWORD PTR 48[esi],eax + mov DWORD PTR 52[esi],ebx + mov DWORD PTR 56[esi],ecx + mov DWORD PTR 60[esi],edx + xor eax,DWORD PTR 32[esi] + xor ebx,DWORD PTR 36[esi] + xor ecx,DWORD PTR 40[esi] + xor edx,DWORD PTR 44[esi] + mov esi,DWORD PTR 32[edi] + mov DWORD PTR [esp],eax + mov DWORD PTR 4[esp],ebx + mov DWORD PTR 8[esp],ecx + mov DWORD PTR 12[esp],edx + xor eax,esi + xor ebx,DWORD PTR 36[edi] + movzx esi,ah + mov edx,DWORD PTR 2052[esi*8+ebp] + movzx esi,al + xor edx,DWORD PTR 4[esi*8+ebp] + shr eax,16 + movzx esi,bl + mov ecx,DWORD PTR [esi*8+ebp] + movzx esi,ah + xor edx,DWORD PTR [esi*8+ebp] + movzx esi,bh + xor ecx,DWORD PTR 4[esi*8+ebp] + shr ebx,16 + movzx eax,al + xor edx,DWORD PTR 2048[eax*8+ebp] + movzx esi,bh + mov eax,DWORD PTR 12[esp] + xor ecx,edx + ror edx,8 + xor ecx,DWORD PTR 2048[esi*8+ebp] + movzx esi,bl + mov ebx,DWORD PTR 8[esp] + xor edx,eax + xor ecx,DWORD PTR 2052[esi*8+ebp] + mov esi,DWORD PTR 40[edi] + xor edx,ecx + mov DWORD PTR 12[esp],edx + xor ecx,ebx + mov DWORD PTR 8[esp],ecx + xor ecx,esi + xor edx,DWORD PTR 44[edi] + movzx esi,ch + mov ebx,DWORD PTR 2052[esi*8+ebp] + movzx esi,cl + xor ebx,DWORD PTR 4[esi*8+ebp] + shr ecx,16 + movzx esi,dl + mov eax,DWORD PTR [esi*8+ebp] + movzx esi,ch + xor ebx,DWORD PTR [esi*8+ebp] + movzx esi,dh + xor eax,DWORD PTR 4[esi*8+ebp] + shr edx,16 + movzx ecx,cl + xor ebx,DWORD PTR 2048[ecx*8+ebp] + movzx esi,dh + mov ecx,DWORD PTR 4[esp] + xor eax,ebx + ror ebx,8 + xor eax,DWORD PTR 2048[esi*8+ebp] + movzx esi,dl + mov edx,DWORD PTR [esp] + xor ebx,ecx + xor eax,DWORD PTR 2052[esi*8+ebp] + mov esi,DWORD PTR 48[edi] + xor ebx,eax + mov DWORD PTR 4[esp],ebx + xor eax,edx + mov DWORD PTR [esp],eax + mov ecx,DWORD PTR 8[esp] + mov edx,DWORD PTR 12[esp] + mov edi,DWORD PTR 44[esp] + lea edi,DWORD PTR 128[edi] + mov DWORD PTR [edi-112],eax + mov DWORD PTR [edi-108],ebx + mov DWORD PTR [edi-104],ecx + mov DWORD PTR [edi-100],edx + mov ebp,eax + shl eax,30 + mov esi,ebx + shr esi,2 + shl ebx,30 + or eax,esi + mov esi,ecx + shl ecx,30 + mov DWORD PTR [edi-48],eax + shr esi,2 + or ebx,esi + shr ebp,2 + mov esi,edx + shr esi,2 + mov DWORD PTR [edi-44],ebx + shl edx,30 + or ecx,esi + or edx,ebp + mov DWORD PTR [edi-40],ecx + mov DWORD PTR [edi-36],edx + mov ebp,eax + shl eax,30 + mov esi,ebx + shr esi,2 + shl ebx,30 + or eax,esi + mov esi,ecx + shl ecx,30 + mov DWORD PTR 32[edi],eax + shr esi,2 + or ebx,esi + shr ebp,2 + mov esi,edx + shr esi,2 + mov DWORD PTR 36[edi],ebx + shl edx,30 + or ecx,esi + or edx,ebp + mov DWORD PTR 40[edi],ecx + mov DWORD PTR 44[edi],edx + mov ebp,ebx + shl ebx,19 + mov esi,ecx + shr esi,13 + shl ecx,19 + or ebx,esi + mov esi,edx + shl edx,19 + mov DWORD PTR 128[edi],ebx + shr esi,13 + or ecx,esi + shr ebp,13 + mov esi,eax + shr esi,13 + mov DWORD PTR 132[edi],ecx + shl eax,19 + or edx,esi + or eax,ebp + mov DWORD PTR 136[edi],edx + mov DWORD PTR 140[edi],eax + mov ebx,DWORD PTR [edi-96] + mov ecx,DWORD PTR [edi-92] + mov edx,DWORD PTR [edi-88] + mov eax,DWORD PTR [edi-84] + mov ebp,ebx + shl ebx,15 + mov esi,ecx + shr esi,17 + shl ecx,15 + or ebx,esi + mov esi,edx + shl edx,15 + mov DWORD PTR [edi-96],ebx + shr esi,17 + or ecx,esi + shr ebp,17 + mov esi,eax + shr esi,17 + mov DWORD PTR [edi-92],ecx + shl eax,15 + or edx,esi + or eax,ebp + mov DWORD PTR [edi-88],edx + mov DWORD PTR [edi-84],eax + mov ebp,ebx + shl ebx,15 + mov esi,ecx + shr esi,17 + shl ecx,15 + or ebx,esi + mov esi,edx + shl edx,15 + mov DWORD PTR [edi-64],ebx + shr esi,17 + or ecx,esi + shr ebp,17 + mov esi,eax + shr esi,17 + mov DWORD PTR [edi-60],ecx + shl eax,15 + or edx,esi + or eax,ebp + mov DWORD PTR [edi-56],edx + mov DWORD PTR [edi-52],eax + mov ebp,ebx + shl ebx,30 + mov esi,ecx + shr esi,2 + shl ecx,30 + or ebx,esi + mov esi,edx + shl edx,30 + mov DWORD PTR 16[edi],ebx + shr esi,2 + or ecx,esi + shr ebp,2 + mov esi,eax + shr esi,2 + mov DWORD PTR 20[edi],ecx + shl eax,30 + or edx,esi + or eax,ebp + mov DWORD PTR 24[edi],edx + mov DWORD PTR 28[edi],eax + mov ebp,ecx + shl ecx,2 + mov esi,edx + shr esi,30 + shl edx,2 + or ecx,esi + mov esi,eax + shl eax,2 + mov DWORD PTR 80[edi],ecx + shr esi,30 + or edx,esi + shr ebp,30 + mov esi,ebx + shr esi,30 + mov DWORD PTR 84[edi],edx + shl ebx,2 + or eax,esi + or ebx,ebp + mov DWORD PTR 88[edi],eax + mov DWORD PTR 92[edi],ebx + mov ecx,DWORD PTR [edi-80] + mov edx,DWORD PTR [edi-76] + mov eax,DWORD PTR [edi-72] + mov ebx,DWORD PTR [edi-68] + mov ebp,ecx + shl ecx,15 + mov esi,edx + shr esi,17 + shl edx,15 + or ecx,esi + mov esi,eax + shl eax,15 + mov DWORD PTR [edi-80],ecx + shr esi,17 + or edx,esi + shr ebp,17 + mov esi,ebx + shr esi,17 + mov DWORD PTR [edi-76],edx + shl ebx,15 + or eax,esi + or ebx,ebp + mov DWORD PTR [edi-72],eax + mov DWORD PTR [edi-68],ebx + mov ebp,ecx + shl ecx,30 + mov esi,edx + shr esi,2 + shl edx,30 + or ecx,esi + mov esi,eax + shl eax,30 + mov DWORD PTR [edi-16],ecx + shr esi,2 + or edx,esi + shr ebp,2 + mov esi,ebx + shr esi,2 + mov DWORD PTR [edi-12],edx + shl ebx,30 + or eax,esi + or ebx,ebp + mov DWORD PTR [edi-8],eax + mov DWORD PTR [edi-4],ebx + mov DWORD PTR 64[edi],edx + mov DWORD PTR 68[edi],eax + mov DWORD PTR 72[edi],ebx + mov DWORD PTR 76[edi],ecx + mov ebp,edx + shl edx,17 + mov esi,eax + shr esi,15 + shl eax,17 + or edx,esi + mov esi,ebx + shl ebx,17 + mov DWORD PTR 96[edi],edx + shr esi,15 + or eax,esi + shr ebp,15 + mov esi,ecx + shr esi,15 + mov DWORD PTR 100[edi],eax + shl ecx,17 + or ebx,esi + or ecx,ebp + mov DWORD PTR 104[edi],ebx + mov DWORD PTR 108[edi],ecx + mov edx,DWORD PTR [edi-128] + mov eax,DWORD PTR [edi-124] + mov ebx,DWORD PTR [edi-120] + mov ecx,DWORD PTR [edi-116] + mov ebp,eax + shl eax,13 + mov esi,ebx + shr esi,19 + shl ebx,13 + or eax,esi + mov esi,ecx + shl ecx,13 + mov DWORD PTR [edi-32],eax + shr esi,19 + or ebx,esi + shr ebp,19 + mov esi,edx + shr esi,19 + mov DWORD PTR [edi-28],ebx + shl edx,13 + or ecx,esi + or edx,ebp + mov DWORD PTR [edi-24],ecx + mov DWORD PTR [edi-20],edx + mov ebp,eax + shl eax,15 + mov esi,ebx + shr esi,17 + shl ebx,15 + or eax,esi + mov esi,ecx + shl ecx,15 + mov DWORD PTR [edi],eax + shr esi,17 + or ebx,esi + shr ebp,17 + mov esi,edx + shr esi,17 + mov DWORD PTR 4[edi],ebx + shl edx,15 + or ecx,esi + or edx,ebp + mov DWORD PTR 8[edi],ecx + mov DWORD PTR 12[edi],edx + mov ebp,eax + shl eax,17 + mov esi,ebx + shr esi,15 + shl ebx,17 + or eax,esi + mov esi,ecx + shl ecx,17 + mov DWORD PTR 48[edi],eax + shr esi,15 + or ebx,esi + shr ebp,15 + mov esi,edx + shr esi,15 + mov DWORD PTR 52[edi],ebx + shl edx,17 + or ecx,esi + or edx,ebp + mov DWORD PTR 56[edi],ecx + mov DWORD PTR 60[edi],edx + mov ebp,ebx + shl ebx,2 + mov esi,ecx + shr esi,30 + shl ecx,2 + or ebx,esi + mov esi,edx + shl edx,2 + mov DWORD PTR 112[edi],ebx + shr esi,30 + or ecx,esi + shr ebp,30 + mov esi,eax + shr esi,30 + mov DWORD PTR 116[edi],ecx + shl eax,2 + or edx,esi + or eax,ebp + mov DWORD PTR 120[edi],edx + mov DWORD PTR 124[edi],eax + mov eax,4 +$L013done: + lea edx,DWORD PTR 144[edi] + add esp,16 + pop edi + pop esi + pop ebx + pop ebp + ret +_Camellia_Ekeygen ENDP +ALIGN 16 +_private_Camellia_set_key PROC PUBLIC +$L_private_Camellia_set_key_begin:: + push ebx + mov ecx,DWORD PTR 8[esp] + mov ebx,DWORD PTR 12[esp] + mov edx,DWORD PTR 16[esp] + mov eax,-1 + test ecx,ecx + jz $L014done + test edx,edx + jz $L014done + mov eax,-2 + cmp ebx,256 + je $L015arg_ok + cmp ebx,192 + je $L015arg_ok + cmp ebx,128 + jne $L014done +ALIGN 4 +$L015arg_ok: + push edx + push ecx + push ebx + call $L_Camellia_Ekeygen_begin + add esp,12 + mov DWORD PTR [edx],eax + xor eax,eax +ALIGN 4 +$L014done: + pop ebx + ret +_private_Camellia_set_key ENDP +ALIGN 64 +$LCamellia_SIGMA:: +DD 2694735487,1003262091,3061508184,1286239154 +DD 3337565999,3914302142,1426019237,4057165596 +DD 283453434,3731369245,2958461122,3018244605 +DD 0,0,0,0 +ALIGN 64 +$LCamellia_SBOX:: +DD 1886416896,1886388336 +DD 2189591040,741081132 +DD 741092352,3014852787 +DD 3974949888,3233808576 +DD 3014898432,3840147684 +DD 656877312,1465319511 +DD 3233857536,3941204202 +DD 3857048832,2930639022 +DD 3840205824,589496355 +DD 2240120064,1802174571 +DD 1465341696,1162149957 +DD 892679424,2779054245 +DD 3941263872,3991732461 +DD 202116096,1330577487 +DD 2930683392,488439837 +DD 1094795520,2459041938 +DD 589505280,2256928902 +DD 4025478912,2947481775 +DD 1802201856,2088501372 +DD 2475922176,522125343 +DD 1162167552,1044250686 +DD 421075200,3705405660 +DD 2779096320,1583218782 +DD 555819264,185270283 +DD 3991792896,2795896998 +DD 235802112,960036921 +DD 1330597632,3587506389 +DD 1313754624,1566376029 +DD 488447232,3654877401 +DD 1701143808,1515847770 +DD 2459079168,1364262993 +DD 3183328512,1819017324 +DD 2256963072,2341142667 +DD 3099113472,2593783962 +DD 2947526400,4227531003 +DD 2408550144,2964324528 +DD 2088532992,1953759348 +DD 3958106880,724238379 +DD 522133248,4042260720 +DD 3469659648,2223243396 +DD 1044266496,3755933919 +DD 808464384,3419078859 +DD 3705461760,875823156 +DD 1600085760,1987444854 +DD 1583242752,1835860077 +DD 3318072576,2846425257 +DD 185273088,3520135377 +DD 437918208,67371012 +DD 2795939328,336855060 +DD 3789676800,976879674 +DD 960051456,3739091166 +DD 3402287616,286326801 +DD 3587560704,842137650 +DD 1195853568,2627469468 +DD 1566399744,1397948499 +DD 1027423488,4075946226 +DD 3654932736,4278059262 +DD 16843008,3486449871 +DD 1515870720,3284336835 +DD 3604403712,2054815866 +DD 1364283648,606339108 +DD 1448498688,3907518696 +DD 1819044864,1616904288 +DD 1296911616,1768489065 +DD 2341178112,2863268010 +DD 218959104,2694840480 +DD 2593823232,2711683233 +DD 1717986816,1650589794 +DD 4227595008,1414791252 +DD 3435973632,505282590 +DD 2964369408,3772776672 +DD 757935360,1684275300 +DD 1953788928,269484048 +DD 303174144,0 +DD 724249344,2745368739 +DD 538976256,1970602101 +DD 4042321920,2324299914 +DD 2981212416,3873833190 +DD 2223277056,151584777 +DD 2576980224,3722248413 +DD 3755990784,2273771655 +DD 1280068608,2206400643 +DD 3419130624,3452764365 +DD 3267543552,2425356432 +DD 875836416,1936916595 +DD 2122219008,4143317238 +DD 1987474944,2644312221 +DD 84215040,3216965823 +DD 1835887872,1381105746 +DD 3082270464,3638034648 +DD 2846468352,3368550600 +DD 825307392,3334865094 +DD 3520188672,2172715137 +DD 387389184,1869545583 +DD 67372032,320012307 +DD 3621246720,1667432547 +DD 336860160,3924361449 +DD 1482184704,2812739751 +DD 976894464,2677997727 +DD 1633771776,3166437564 +DD 3739147776,690552873 +DD 454761216,4193845497 +DD 286331136,791609391 +DD 471604224,3031695540 +DD 842150400,2021130360 +DD 252645120,101056518 +DD 2627509248,3890675943 +DD 370546176,1903231089 +DD 1397969664,3570663636 +DD 404232192,2880110763 +DD 4076007936,2290614408 +DD 572662272,2374828173 +DD 4278124032,1920073842 +DD 1145324544,3115909305 +DD 3486502656,4177002744 +DD 2998055424,2896953516 +DD 3284386560,909508662 +DD 3048584448,707395626 +DD 2054846976,1010565180 +DD 2442236160,4059103473 +DD 606348288,1077936192 +DD 134744064,3553820883 +DD 3907577856,3149594811 +DD 2829625344,1128464451 +DD 1616928768,353697813 +DD 4244438016,2913796269 +DD 1768515840,2004287607 +DD 1347440640,2155872384 +DD 2863311360,2189557890 +DD 3503345664,3974889708 +DD 2694881280,656867367 +DD 2105376000,3856990437 +DD 2711724288,2240086149 +DD 2307492096,892665909 +DD 1650614784,202113036 +DD 2543294208,1094778945 +DD 1414812672,4025417967 +DD 1532713728,2475884691 +DD 505290240,421068825 +DD 2509608192,555810849 +DD 3772833792,235798542 +DD 4294967040,1313734734 +DD 1684300800,1701118053 +DD 3537031680,3183280317 +DD 269488128,3099066552 +DD 3301229568,2408513679 +DD 0,3958046955 +DD 1212696576,3469607118 +DD 2745410304,808452144 +DD 4160222976,1600061535 +DD 1970631936,3318022341 +DD 3688618752,437911578 +DD 2324335104,3789619425 +DD 50529024,3402236106 +DD 3873891840,1195835463 +DD 3671775744,1027407933 +DD 151587072,16842753 +DD 1061109504,3604349142 +DD 3722304768,1448476758 +DD 2492765184,1296891981 +DD 2273806080,218955789 +DD 1549556736,1717960806 +DD 2206434048,3435921612 +DD 33686016,757923885 +DD 3452816640,303169554 +DD 1246382592,538968096 +DD 2425393152,2981167281 +DD 858993408,2576941209 +DD 1936945920,1280049228 +DD 1734829824,3267494082 +DD 4143379968,2122186878 +DD 4092850944,84213765 +DD 2644352256,3082223799 +DD 2139062016,825294897 +DD 3217014528,387383319 +DD 3806519808,3621191895 +DD 1381126656,1482162264 +DD 2610666240,1633747041 +DD 3638089728,454754331 +DD 640034304,471597084 +DD 3368601600,252641295 +DD 926365440,370540566 +DD 3334915584,404226072 +DD 993737472,572653602 +DD 2172748032,1145307204 +DD 2526451200,2998010034 +DD 1869573888,3048538293 +DD 1263225600,2442199185 +DD 320017152,134742024 +DD 3200171520,2829582504 +DD 1667457792,4244373756 +DD 774778368,1347420240 +DD 3924420864,3503292624 +DD 2038003968,2105344125 +DD 2812782336,2307457161 +DD 2358021120,2543255703 +DD 2678038272,1532690523 +DD 1852730880,2509570197 +DD 3166485504,4294902015 +DD 2391707136,3536978130 +DD 690563328,3301179588 +DD 4126536960,1212678216 +DD 4193908992,4160159991 +DD 3065427456,3688562907 +DD 791621376,50528259 +DD 4261281024,3671720154 +DD 3031741440,1061093439 +DD 1499027712,2492727444 +DD 2021160960,1549533276 +DD 2560137216,33685506 +DD 101058048,1246363722 +DD 1785358848,858980403 +DD 3890734848,1734803559 +DD 1179010560,4092788979 +DD 1903259904,2139029631 +DD 3132799488,3806462178 +DD 3570717696,2610626715 +DD 623191296,640024614 +DD 2880154368,926351415 +DD 1111638528,993722427 +DD 2290649088,2526412950 +DD 2728567296,1263206475 +DD 2374864128,3200123070 +DD 4210752000,774766638 +DD 1920102912,2037973113 +DD 117901056,2357985420 +DD 3115956480,1852702830 +DD 1431655680,2391670926 +DD 4177065984,4126474485 +DD 4008635904,3065381046 +DD 2896997376,4261216509 +DD 168430080,1499005017 +DD 909522432,2560098456 +DD 1229539584,1785331818 +DD 707406336,1178992710 +DD 1751672832,3132752058 +DD 1010580480,623181861 +DD 943208448,1111621698 +DD 4059164928,2728525986 +DD 2762253312,4210688250 +DD 1077952512,117899271 +DD 673720320,1431634005 +DD 3553874688,4008575214 +DD 2071689984,168427530 +DD 3149642496,1229520969 +DD 3385444608,1751646312 +DD 1128481536,943194168 +DD 3250700544,2762211492 +DD 353703168,673710120 +DD 3823362816,2071658619 +DD 2913840384,3385393353 +DD 4109693952,3250651329 +DD 2004317952,3823304931 +DD 3351758592,4109631732 +DD 2155905024,3351707847 +DD 2661195264,2661154974 +DD 14737632,939538488 +DD 328965,1090535745 +DD 5789784,369104406 +DD 14277081,1979741814 +DD 6776679,3640711641 +DD 5131854,2466288531 +DD 8487297,1610637408 +DD 13355979,4060148466 +DD 13224393,1912631922 +DD 723723,3254829762 +DD 11447982,2868947883 +DD 6974058,2583730842 +DD 14013909,1962964341 +DD 1579032,100664838 +DD 6118749,1459640151 +DD 8553090,2684395680 +DD 4605510,2432733585 +DD 14671839,4144035831 +DD 14079702,3036722613 +DD 2565927,3372272073 +DD 9079434,2717950626 +DD 3289650,2348846220 +DD 4934475,3523269330 +DD 4342338,2415956112 +DD 14408667,4127258358 +DD 1842204,117442311 +DD 10395294,2801837991 +DD 10263708,654321447 +DD 3815994,2382401166 +DD 13290186,2986390194 +DD 2434341,1224755529 +DD 8092539,3724599006 +DD 855309,1124090691 +DD 7434609,1543527516 +DD 6250335,3607156695 +DD 2039583,3338717127 +DD 16316664,1040203326 +DD 14145495,4110480885 +DD 4079166,2399178639 +DD 10329501,1728079719 +DD 8158332,520101663 +DD 6316128,402659352 +DD 12171705,1845522030 +DD 12500670,2936057775 +DD 12369084,788541231 +DD 9145227,3791708898 +DD 1447446,2231403909 +DD 3421236,218107149 +DD 5066061,1392530259 +DD 12829635,4026593520 +DD 7500402,2617285788 +DD 9803157,1694524773 +DD 11250603,3925928682 +DD 9342606,2734728099 +DD 12237498,2919280302 +DD 8026746,2650840734 +DD 11776947,3959483628 +DD 131586,2147516544 +DD 11842740,754986285 +DD 11382189,1795189611 +DD 10658466,2818615464 +DD 11316396,721431339 +DD 14211288,905983542 +DD 10132122,2785060518 +DD 1513239,3305162181 +DD 1710618,2248181382 +DD 3487029,1291865421 +DD 13421772,855651123 +DD 16250871,4244700669 +DD 10066329,1711302246 +DD 6381921,1476417624 +DD 5921370,2516620950 +DD 15263976,973093434 +DD 2368548,150997257 +DD 5658198,2499843477 +DD 4210752,268439568 +DD 14803425,2013296760 +DD 6513507,3623934168 +DD 592137,1107313218 +DD 3355443,3422604492 +DD 12566463,4009816047 +DD 10000536,637543974 +DD 9934743,3842041317 +DD 8750469,1627414881 +DD 6842472,436214298 +DD 16579836,1056980799 +DD 15527148,989870907 +DD 657930,2181071490 +DD 14342874,3053500086 +DD 7303023,3674266587 +DD 5460819,3556824276 +DD 6447714,2550175896 +DD 10724259,3892373736 +DD 3026478,2332068747 +DD 526344,33554946 +DD 11513775,3942706155 +DD 2631720,167774730 +DD 11579568,738208812 +DD 7631988,486546717 +DD 12763842,2952835248 +DD 12434877,1862299503 +DD 3552822,2365623693 +DD 2236962,2281736328 +DD 3684408,234884622 +DD 6579300,419436825 +DD 1973790,2264958855 +DD 3750201,1308642894 +DD 2894892,184552203 +DD 10921638,2835392937 +DD 3158064,201329676 +DD 15066597,2030074233 +DD 4473924,285217041 +DD 16645629,2130739071 +DD 8947848,570434082 +DD 10461087,3875596263 +DD 6645093,1493195097 +DD 8882055,3774931425 +DD 7039851,3657489114 +DD 16053492,1023425853 +DD 2302755,3355494600 +DD 4737096,301994514 +DD 1052688,67109892 +DD 13750737,1946186868 +DD 5329233,1409307732 +DD 12632256,805318704 +DD 16382457,2113961598 +DD 13816530,3019945140 +DD 10526880,671098920 +DD 5592405,1426085205 +DD 10592673,1744857192 +DD 4276545,1342197840 +DD 16448250,3187719870 +DD 4408131,3489714384 +DD 1250067,3288384708 +DD 12895428,822096177 +DD 3092271,3405827019 +DD 11053224,704653866 +DD 11974326,2902502829 +DD 3947580,251662095 +DD 2829099,3389049546 +DD 12698049,1879076976 +DD 16777215,4278255615 +DD 13158600,838873650 +DD 10855845,1761634665 +DD 2105376,134219784 +DD 9013641,1644192354 +DD 0,0 +DD 9474192,603989028 +DD 4671303,3506491857 +DD 15724527,4211145723 +DD 15395562,3120609978 +DD 12040119,3976261101 +DD 1381653,1157645637 +DD 394758,2164294017 +DD 13487565,1929409395 +DD 11908533,1828744557 +DD 1184274,2214626436 +DD 8289918,2667618207 +DD 12303291,3993038574 +DD 2697513,1241533002 +DD 986895,3271607235 +DD 12105912,771763758 +DD 460551,3238052289 +DD 263172,16777473 +DD 10197915,3858818790 +DD 9737364,620766501 +DD 2171169,1207978056 +DD 6710886,2566953369 +DD 15132390,3103832505 +DD 13553358,3003167667 +DD 15592941,2063629179 +DD 15198183,4177590777 +DD 3881787,3456159438 +DD 16711422,3204497343 +DD 8355711,3741376479 +DD 12961221,1895854449 +DD 10790052,687876393 +DD 3618615,3439381965 +DD 11645361,1811967084 +DD 5000268,318771987 +DD 9539985,1677747300 +DD 7237230,2600508315 +DD 9276813,1660969827 +DD 7763574,2634063261 +DD 197379,3221274816 +DD 2960685,1258310475 +DD 14606046,3070277559 +DD 9868950,2768283045 +DD 2500134,2298513801 +DD 8224125,1593859935 +DD 13027014,2969612721 +DD 6052956,385881879 +DD 13882323,4093703412 +DD 15921906,3154164924 +DD 5197647,3540046803 +DD 1644825,1174423110 +DD 4144959,3472936911 +DD 14474460,922761015 +DD 7960953,1577082462 +DD 1907997,1191200583 +DD 5395026,2483066004 +DD 15461355,4194368250 +DD 15987699,4227923196 +DD 7171437,1526750043 +DD 6184542,2533398423 +DD 16514043,4261478142 +DD 6908265,1509972570 +DD 11711154,2885725356 +DD 15790320,1006648380 +DD 3223857,1275087948 +DD 789516,50332419 +DD 13948116,889206069 +DD 13619151,4076925939 +DD 9211020,587211555 +DD 14869218,3087055032 +DD 7697781,1560304989 +DD 11119017,1778412138 +DD 4868682,2449511058 +DD 5723991,3573601749 +DD 8684676,553656609 +DD 1118481,1140868164 +DD 4539717,1358975313 +DD 1776411,3321939654 +DD 16119285,2097184125 +DD 15000804,956315961 +DD 921102,2197848963 +DD 7566195,3691044060 +DD 11184810,2852170410 +DD 15856113,2080406652 +DD 14540253,1996519287 +DD 5855577,1442862678 +DD 1315860,83887365 +DD 7105644,452991771 +DD 9605778,2751505572 +DD 5526612,352326933 +DD 13684944,872428596 +DD 7895160,503324190 +DD 7368816,469769244 +DD 14935011,4160813304 +DD 4802889,1375752786 +DD 8421504,536879136 +DD 5263440,335549460 +DD 10987431,3909151209 +DD 16185078,3170942397 +DD 7829367,3707821533 +DD 9671571,3825263844 +DD 8816262,2701173153 +DD 8618883,3758153952 +DD 2763306,2315291274 +DD 13092807,4043370993 +DD 5987163,3590379222 +DD 15329769,2046851706 +DD 15658734,3137387451 +DD 9408399,3808486371 +DD 65793,1073758272 +DD 4013373,1325420367 +ALIGN 16 +_Camellia_cbc_encrypt PROC PUBLIC +$L_Camellia_cbc_encrypt_begin:: + push ebp + push ebx + push esi + push edi + mov ecx,DWORD PTR 28[esp] + cmp ecx,0 + je $L016enc_out + pushfd + cld + mov eax,DWORD PTR 24[esp] + mov ebx,DWORD PTR 28[esp] + mov edx,DWORD PTR 36[esp] + mov ebp,DWORD PTR 40[esp] + lea esi,DWORD PTR [esp-64] + and esi,-64 + lea edi,DWORD PTR [edx-127] + sub edi,esi + neg edi + and edi,960 + sub esi,edi + mov edi,DWORD PTR 44[esp] + xchg esp,esi + add esp,4 + mov DWORD PTR 20[esp],esi + mov DWORD PTR 24[esp],eax + mov DWORD PTR 28[esp],ebx + mov DWORD PTR 32[esp],ecx + mov DWORD PTR 36[esp],edx + mov DWORD PTR 40[esp],ebp + call $L017pic_point +$L017pic_point: + pop ebp + lea ebp,DWORD PTR ($LCamellia_SBOX-$L017pic_point)[ebp] + mov esi,32 +ALIGN 4 +$L018prefetch_sbox: + mov eax,DWORD PTR [ebp] + mov ebx,DWORD PTR 32[ebp] + mov ecx,DWORD PTR 64[ebp] + mov edx,DWORD PTR 96[ebp] + lea ebp,DWORD PTR 128[ebp] + dec esi + jnz $L018prefetch_sbox + mov eax,DWORD PTR 36[esp] + sub ebp,4096 + mov esi,DWORD PTR 24[esp] + mov edx,DWORD PTR 272[eax] + cmp edi,0 + je $L019DECRYPT + mov ecx,DWORD PTR 32[esp] + mov edi,DWORD PTR 40[esp] + shl edx,6 + lea edx,DWORD PTR [edx*1+eax] + mov DWORD PTR 16[esp],edx + test ecx,4294967280 + jz $L020enc_tail + mov eax,DWORD PTR [edi] + mov ebx,DWORD PTR 4[edi] +ALIGN 4 +$L021enc_loop: + mov ecx,DWORD PTR 8[edi] + mov edx,DWORD PTR 12[edi] + xor eax,DWORD PTR [esi] + xor ebx,DWORD PTR 4[esi] + xor ecx,DWORD PTR 8[esi] + bswap eax + xor edx,DWORD PTR 12[esi] + bswap ebx + mov edi,DWORD PTR 36[esp] + bswap ecx + bswap edx + call __x86_Camellia_encrypt + mov esi,DWORD PTR 24[esp] + mov edi,DWORD PTR 28[esp] + bswap eax + bswap ebx + bswap ecx + mov DWORD PTR [edi],eax + bswap edx + mov DWORD PTR 4[edi],ebx + mov DWORD PTR 8[edi],ecx + mov DWORD PTR 12[edi],edx + mov ecx,DWORD PTR 32[esp] + lea esi,DWORD PTR 16[esi] + mov DWORD PTR 24[esp],esi + lea edx,DWORD PTR 16[edi] + mov DWORD PTR 28[esp],edx + sub ecx,16 + test ecx,4294967280 + mov DWORD PTR 32[esp],ecx + jnz $L021enc_loop + test ecx,15 + jnz $L020enc_tail + mov esi,DWORD PTR 40[esp] + mov ecx,DWORD PTR 8[edi] + mov edx,DWORD PTR 12[edi] + mov DWORD PTR [esi],eax + mov DWORD PTR 4[esi],ebx + mov DWORD PTR 8[esi],ecx + mov DWORD PTR 12[esi],edx + mov esp,DWORD PTR 20[esp] + popfd +$L016enc_out: + pop edi + pop esi + pop ebx + pop ebp + ret + pushfd +ALIGN 4 +$L020enc_tail: + mov eax,edi + mov edi,DWORD PTR 28[esp] + push eax + mov ebx,16 + sub ebx,ecx + cmp edi,esi + je $L022enc_in_place +ALIGN 4 +DD 2767451785 + jmp $L023enc_skip_in_place +$L022enc_in_place: + lea edi,DWORD PTR [ecx*1+edi] +$L023enc_skip_in_place: + mov ecx,ebx + xor eax,eax +ALIGN 4 +DD 2868115081 + pop edi + mov esi,DWORD PTR 28[esp] + mov eax,DWORD PTR [edi] + mov ebx,DWORD PTR 4[edi] + mov DWORD PTR 32[esp],16 + jmp $L021enc_loop +ALIGN 16 +$L019DECRYPT: + shl edx,6 + lea edx,DWORD PTR [edx*1+eax] + mov DWORD PTR 16[esp],eax + mov DWORD PTR 36[esp],edx + cmp esi,DWORD PTR 28[esp] + je $L024dec_in_place + mov edi,DWORD PTR 40[esp] + mov DWORD PTR 44[esp],edi +ALIGN 4 +$L025dec_loop: + mov eax,DWORD PTR [esi] + mov ebx,DWORD PTR 4[esi] + mov ecx,DWORD PTR 8[esi] + bswap eax + mov edx,DWORD PTR 12[esi] + bswap ebx + mov edi,DWORD PTR 36[esp] + bswap ecx + bswap edx + call __x86_Camellia_decrypt + mov edi,DWORD PTR 44[esp] + mov esi,DWORD PTR 32[esp] + bswap eax + bswap ebx + bswap ecx + xor eax,DWORD PTR [edi] + bswap edx + xor ebx,DWORD PTR 4[edi] + xor ecx,DWORD PTR 8[edi] + xor edx,DWORD PTR 12[edi] + sub esi,16 + jc $L026dec_partial + mov DWORD PTR 32[esp],esi + mov esi,DWORD PTR 24[esp] + mov edi,DWORD PTR 28[esp] + mov DWORD PTR [edi],eax + mov DWORD PTR 4[edi],ebx + mov DWORD PTR 8[edi],ecx + mov DWORD PTR 12[edi],edx + mov DWORD PTR 44[esp],esi + lea esi,DWORD PTR 16[esi] + mov DWORD PTR 24[esp],esi + lea edi,DWORD PTR 16[edi] + mov DWORD PTR 28[esp],edi + jnz $L025dec_loop + mov edi,DWORD PTR 44[esp] +$L027dec_end: + mov esi,DWORD PTR 40[esp] + mov eax,DWORD PTR [edi] + mov ebx,DWORD PTR 4[edi] + mov ecx,DWORD PTR 8[edi] + mov edx,DWORD PTR 12[edi] + mov DWORD PTR [esi],eax + mov DWORD PTR 4[esi],ebx + mov DWORD PTR 8[esi],ecx + mov DWORD PTR 12[esi],edx + jmp $L028dec_out +ALIGN 4 +$L026dec_partial: + lea edi,DWORD PTR 44[esp] + mov DWORD PTR [edi],eax + mov DWORD PTR 4[edi],ebx + mov DWORD PTR 8[edi],ecx + mov DWORD PTR 12[edi],edx + lea ecx,DWORD PTR 16[esi] + mov esi,edi + mov edi,DWORD PTR 28[esp] +DD 2767451785 + mov edi,DWORD PTR 24[esp] + jmp $L027dec_end +ALIGN 4 +$L024dec_in_place: +$L029dec_in_place_loop: + lea edi,DWORD PTR 44[esp] + mov eax,DWORD PTR [esi] + mov ebx,DWORD PTR 4[esi] + mov ecx,DWORD PTR 8[esi] + mov edx,DWORD PTR 12[esi] + mov DWORD PTR [edi],eax + mov DWORD PTR 4[edi],ebx + mov DWORD PTR 8[edi],ecx + bswap eax + mov DWORD PTR 12[edi],edx + bswap ebx + mov edi,DWORD PTR 36[esp] + bswap ecx + bswap edx + call __x86_Camellia_decrypt + mov edi,DWORD PTR 40[esp] + mov esi,DWORD PTR 28[esp] + bswap eax + bswap ebx + bswap ecx + xor eax,DWORD PTR [edi] + bswap edx + xor ebx,DWORD PTR 4[edi] + xor ecx,DWORD PTR 8[edi] + xor edx,DWORD PTR 12[edi] + mov DWORD PTR [esi],eax + mov DWORD PTR 4[esi],ebx + mov DWORD PTR 8[esi],ecx + mov DWORD PTR 12[esi],edx + lea esi,DWORD PTR 16[esi] + mov DWORD PTR 28[esp],esi + lea esi,DWORD PTR 44[esp] + mov eax,DWORD PTR [esi] + mov ebx,DWORD PTR 4[esi] + mov ecx,DWORD PTR 8[esi] + mov edx,DWORD PTR 12[esi] + mov DWORD PTR [edi],eax + mov DWORD PTR 4[edi],ebx + mov DWORD PTR 8[edi],ecx + mov DWORD PTR 12[edi],edx + mov esi,DWORD PTR 24[esp] + lea esi,DWORD PTR 16[esi] + mov DWORD PTR 24[esp],esi + mov ecx,DWORD PTR 32[esp] + sub ecx,16 + jc $L030dec_in_place_partial + mov DWORD PTR 32[esp],ecx + jnz $L029dec_in_place_loop + jmp $L028dec_out +ALIGN 4 +$L030dec_in_place_partial: + mov edi,DWORD PTR 28[esp] + lea esi,DWORD PTR 44[esp] + lea edi,DWORD PTR [ecx*1+edi] + lea esi,DWORD PTR 16[ecx*1+esi] + neg ecx +DD 2767451785 +ALIGN 4 +$L028dec_out: + mov esp,DWORD PTR 20[esp] + popfd + pop edi + pop esi + pop ebx + pop ebp + ret +_Camellia_cbc_encrypt ENDP +DB 67,97,109,101,108,108,105,97,32,102,111,114,32,120,56,54 +DB 32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115 +DB 115,108,46,111,114,103,62,0 +.text$ ENDS +END diff --git a/deps/openssl/asm_obsolete/x86-win32-masm/cast/cast-586.asm b/deps/openssl/asm_obsolete/x86-win32-masm/cast/cast-586.asm new file mode 100644 index 00000000000000..0801d204e20353 --- /dev/null +++ b/deps/openssl/asm_obsolete/x86-win32-masm/cast/cast-586.asm @@ -0,0 +1,947 @@ +TITLE cast-586.asm +IF @Version LT 800 +ECHO MASM version 8.00 or later is strongly recommended. +ENDIF +.686 +.MODEL FLAT +OPTION DOTNAME +IF @Version LT 800 +.text$ SEGMENT PAGE 'CODE' +ELSE +.text$ SEGMENT ALIGN(64) 'CODE' +ENDIF +EXTERN _CAST_S_table0:NEAR +EXTERN _CAST_S_table1:NEAR +EXTERN _CAST_S_table2:NEAR +EXTERN _CAST_S_table3:NEAR +ALIGN 16 +_CAST_encrypt PROC PUBLIC +$L_CAST_encrypt_begin:: + ; + push ebp + push ebx + mov ebx,DWORD PTR 12[esp] + mov ebp,DWORD PTR 16[esp] + push esi + push edi + ; Load the 2 words + mov edi,DWORD PTR [ebx] + mov esi,DWORD PTR 4[ebx] + ; Get short key flag + mov eax,DWORD PTR 128[ebp] + push eax + xor eax,eax + ; round 0 + mov edx,DWORD PTR [ebp] + mov ecx,DWORD PTR 4[ebp] + add edx,esi + rol edx,cl + mov ebx,edx + xor ecx,ecx + mov cl,dh + and ebx,255 + shr edx,16 + xor eax,eax + mov al,dh + and edx,255 + mov ecx,DWORD PTR _CAST_S_table0[ecx*4] + mov ebx,DWORD PTR _CAST_S_table1[ebx*4] + xor ecx,ebx + mov ebx,DWORD PTR _CAST_S_table2[eax*4] + sub ecx,ebx + mov ebx,DWORD PTR _CAST_S_table3[edx*4] + add ecx,ebx + xor edi,ecx + ; round 1 + mov edx,DWORD PTR 8[ebp] + mov ecx,DWORD PTR 12[ebp] + xor edx,edi + rol edx,cl + mov ebx,edx + xor ecx,ecx + mov cl,dh + and ebx,255 + shr edx,16 + xor eax,eax + mov al,dh + and edx,255 + mov ecx,DWORD PTR _CAST_S_table0[ecx*4] + mov ebx,DWORD PTR _CAST_S_table1[ebx*4] + sub ecx,ebx + mov ebx,DWORD PTR _CAST_S_table2[eax*4] + add ecx,ebx + mov ebx,DWORD PTR _CAST_S_table3[edx*4] + xor ecx,ebx + xor esi,ecx + ; round 2 + mov edx,DWORD PTR 16[ebp] + mov ecx,DWORD PTR 20[ebp] + sub edx,esi + rol edx,cl + mov ebx,edx + xor ecx,ecx + mov cl,dh + and ebx,255 + shr edx,16 + xor eax,eax + mov al,dh + and edx,255 + mov ecx,DWORD PTR _CAST_S_table0[ecx*4] + mov ebx,DWORD PTR _CAST_S_table1[ebx*4] + add ecx,ebx + mov ebx,DWORD PTR _CAST_S_table2[eax*4] + xor ecx,ebx + mov ebx,DWORD PTR _CAST_S_table3[edx*4] + sub ecx,ebx + xor edi,ecx + ; round 3 + mov edx,DWORD PTR 24[ebp] + mov ecx,DWORD PTR 28[ebp] + add edx,edi + rol edx,cl + mov ebx,edx + xor ecx,ecx + mov cl,dh + and ebx,255 + shr edx,16 + xor eax,eax + mov al,dh + and edx,255 + mov ecx,DWORD PTR _CAST_S_table0[ecx*4] + mov ebx,DWORD PTR _CAST_S_table1[ebx*4] + xor ecx,ebx + mov ebx,DWORD PTR _CAST_S_table2[eax*4] + sub ecx,ebx + mov ebx,DWORD PTR _CAST_S_table3[edx*4] + add ecx,ebx + xor esi,ecx + ; round 4 + mov edx,DWORD PTR 32[ebp] + mov ecx,DWORD PTR 36[ebp] + xor edx,esi + rol edx,cl + mov ebx,edx + xor ecx,ecx + mov cl,dh + and ebx,255 + shr edx,16 + xor eax,eax + mov al,dh + and edx,255 + mov ecx,DWORD PTR _CAST_S_table0[ecx*4] + mov ebx,DWORD PTR _CAST_S_table1[ebx*4] + sub ecx,ebx + mov ebx,DWORD PTR _CAST_S_table2[eax*4] + add ecx,ebx + mov ebx,DWORD PTR _CAST_S_table3[edx*4] + xor ecx,ebx + xor edi,ecx + ; round 5 + mov edx,DWORD PTR 40[ebp] + mov ecx,DWORD PTR 44[ebp] + sub edx,edi + rol edx,cl + mov ebx,edx + xor ecx,ecx + mov cl,dh + and ebx,255 + shr edx,16 + xor eax,eax + mov al,dh + and edx,255 + mov ecx,DWORD PTR _CAST_S_table0[ecx*4] + mov ebx,DWORD PTR _CAST_S_table1[ebx*4] + add ecx,ebx + mov ebx,DWORD PTR _CAST_S_table2[eax*4] + xor ecx,ebx + mov ebx,DWORD PTR _CAST_S_table3[edx*4] + sub ecx,ebx + xor esi,ecx + ; round 6 + mov edx,DWORD PTR 48[ebp] + mov ecx,DWORD PTR 52[ebp] + add edx,esi + rol edx,cl + mov ebx,edx + xor ecx,ecx + mov cl,dh + and ebx,255 + shr edx,16 + xor eax,eax + mov al,dh + and edx,255 + mov ecx,DWORD PTR _CAST_S_table0[ecx*4] + mov ebx,DWORD PTR _CAST_S_table1[ebx*4] + xor ecx,ebx + mov ebx,DWORD PTR _CAST_S_table2[eax*4] + sub ecx,ebx + mov ebx,DWORD PTR _CAST_S_table3[edx*4] + add ecx,ebx + xor edi,ecx + ; round 7 + mov edx,DWORD PTR 56[ebp] + mov ecx,DWORD PTR 60[ebp] + xor edx,edi + rol edx,cl + mov ebx,edx + xor ecx,ecx + mov cl,dh + and ebx,255 + shr edx,16 + xor eax,eax + mov al,dh + and edx,255 + mov ecx,DWORD PTR _CAST_S_table0[ecx*4] + mov ebx,DWORD PTR _CAST_S_table1[ebx*4] + sub ecx,ebx + mov ebx,DWORD PTR _CAST_S_table2[eax*4] + add ecx,ebx + mov ebx,DWORD PTR _CAST_S_table3[edx*4] + xor ecx,ebx + xor esi,ecx + ; round 8 + mov edx,DWORD PTR 64[ebp] + mov ecx,DWORD PTR 68[ebp] + sub edx,esi + rol edx,cl + mov ebx,edx + xor ecx,ecx + mov cl,dh + and ebx,255 + shr edx,16 + xor eax,eax + mov al,dh + and edx,255 + mov ecx,DWORD PTR _CAST_S_table0[ecx*4] + mov ebx,DWORD PTR _CAST_S_table1[ebx*4] + add ecx,ebx + mov ebx,DWORD PTR _CAST_S_table2[eax*4] + xor ecx,ebx + mov ebx,DWORD PTR _CAST_S_table3[edx*4] + sub ecx,ebx + xor edi,ecx + ; round 9 + mov edx,DWORD PTR 72[ebp] + mov ecx,DWORD PTR 76[ebp] + add edx,edi + rol edx,cl + mov ebx,edx + xor ecx,ecx + mov cl,dh + and ebx,255 + shr edx,16 + xor eax,eax + mov al,dh + and edx,255 + mov ecx,DWORD PTR _CAST_S_table0[ecx*4] + mov ebx,DWORD PTR _CAST_S_table1[ebx*4] + xor ecx,ebx + mov ebx,DWORD PTR _CAST_S_table2[eax*4] + sub ecx,ebx + mov ebx,DWORD PTR _CAST_S_table3[edx*4] + add ecx,ebx + xor esi,ecx + ; round 10 + mov edx,DWORD PTR 80[ebp] + mov ecx,DWORD PTR 84[ebp] + xor edx,esi + rol edx,cl + mov ebx,edx + xor ecx,ecx + mov cl,dh + and ebx,255 + shr edx,16 + xor eax,eax + mov al,dh + and edx,255 + mov ecx,DWORD PTR _CAST_S_table0[ecx*4] + mov ebx,DWORD PTR _CAST_S_table1[ebx*4] + sub ecx,ebx + mov ebx,DWORD PTR _CAST_S_table2[eax*4] + add ecx,ebx + mov ebx,DWORD PTR _CAST_S_table3[edx*4] + xor ecx,ebx + xor edi,ecx + ; round 11 + mov edx,DWORD PTR 88[ebp] + mov ecx,DWORD PTR 92[ebp] + sub edx,edi + rol edx,cl + mov ebx,edx + xor ecx,ecx + mov cl,dh + and ebx,255 + shr edx,16 + xor eax,eax + mov al,dh + and edx,255 + mov ecx,DWORD PTR _CAST_S_table0[ecx*4] + mov ebx,DWORD PTR _CAST_S_table1[ebx*4] + add ecx,ebx + mov ebx,DWORD PTR _CAST_S_table2[eax*4] + xor ecx,ebx + mov ebx,DWORD PTR _CAST_S_table3[edx*4] + sub ecx,ebx + xor esi,ecx + ; test short key flag + pop edx + or edx,edx + jnz $L000cast_enc_done + ; round 12 + mov edx,DWORD PTR 96[ebp] + mov ecx,DWORD PTR 100[ebp] + add edx,esi + rol edx,cl + mov ebx,edx + xor ecx,ecx + mov cl,dh + and ebx,255 + shr edx,16 + xor eax,eax + mov al,dh + and edx,255 + mov ecx,DWORD PTR _CAST_S_table0[ecx*4] + mov ebx,DWORD PTR _CAST_S_table1[ebx*4] + xor ecx,ebx + mov ebx,DWORD PTR _CAST_S_table2[eax*4] + sub ecx,ebx + mov ebx,DWORD PTR _CAST_S_table3[edx*4] + add ecx,ebx + xor edi,ecx + ; round 13 + mov edx,DWORD PTR 104[ebp] + mov ecx,DWORD PTR 108[ebp] + xor edx,edi + rol edx,cl + mov ebx,edx + xor ecx,ecx + mov cl,dh + and ebx,255 + shr edx,16 + xor eax,eax + mov al,dh + and edx,255 + mov ecx,DWORD PTR _CAST_S_table0[ecx*4] + mov ebx,DWORD PTR _CAST_S_table1[ebx*4] + sub ecx,ebx + mov ebx,DWORD PTR _CAST_S_table2[eax*4] + add ecx,ebx + mov ebx,DWORD PTR _CAST_S_table3[edx*4] + xor ecx,ebx + xor esi,ecx + ; round 14 + mov edx,DWORD PTR 112[ebp] + mov ecx,DWORD PTR 116[ebp] + sub edx,esi + rol edx,cl + mov ebx,edx + xor ecx,ecx + mov cl,dh + and ebx,255 + shr edx,16 + xor eax,eax + mov al,dh + and edx,255 + mov ecx,DWORD PTR _CAST_S_table0[ecx*4] + mov ebx,DWORD PTR _CAST_S_table1[ebx*4] + add ecx,ebx + mov ebx,DWORD PTR _CAST_S_table2[eax*4] + xor ecx,ebx + mov ebx,DWORD PTR _CAST_S_table3[edx*4] + sub ecx,ebx + xor edi,ecx + ; round 15 + mov edx,DWORD PTR 120[ebp] + mov ecx,DWORD PTR 124[ebp] + add edx,edi + rol edx,cl + mov ebx,edx + xor ecx,ecx + mov cl,dh + and ebx,255 + shr edx,16 + xor eax,eax + mov al,dh + and edx,255 + mov ecx,DWORD PTR _CAST_S_table0[ecx*4] + mov ebx,DWORD PTR _CAST_S_table1[ebx*4] + xor ecx,ebx + mov ebx,DWORD PTR _CAST_S_table2[eax*4] + sub ecx,ebx + mov ebx,DWORD PTR _CAST_S_table3[edx*4] + add ecx,ebx + xor esi,ecx +$L000cast_enc_done: + nop + mov eax,DWORD PTR 20[esp] + mov DWORD PTR 4[eax],edi + mov DWORD PTR [eax],esi + pop edi + pop esi + pop ebx + pop ebp + ret +_CAST_encrypt ENDP +EXTERN _CAST_S_table0:NEAR +EXTERN _CAST_S_table1:NEAR +EXTERN _CAST_S_table2:NEAR +EXTERN _CAST_S_table3:NEAR +ALIGN 16 +_CAST_decrypt PROC PUBLIC +$L_CAST_decrypt_begin:: + ; + push ebp + push ebx + mov ebx,DWORD PTR 12[esp] + mov ebp,DWORD PTR 16[esp] + push esi + push edi + ; Load the 2 words + mov edi,DWORD PTR [ebx] + mov esi,DWORD PTR 4[ebx] + ; Get short key flag + mov eax,DWORD PTR 128[ebp] + or eax,eax + jnz $L001cast_dec_skip + xor eax,eax + ; round 15 + mov edx,DWORD PTR 120[ebp] + mov ecx,DWORD PTR 124[ebp] + add edx,esi + rol edx,cl + mov ebx,edx + xor ecx,ecx + mov cl,dh + and ebx,255 + shr edx,16 + xor eax,eax + mov al,dh + and edx,255 + mov ecx,DWORD PTR _CAST_S_table0[ecx*4] + mov ebx,DWORD PTR _CAST_S_table1[ebx*4] + xor ecx,ebx + mov ebx,DWORD PTR _CAST_S_table2[eax*4] + sub ecx,ebx + mov ebx,DWORD PTR _CAST_S_table3[edx*4] + add ecx,ebx + xor edi,ecx + ; round 14 + mov edx,DWORD PTR 112[ebp] + mov ecx,DWORD PTR 116[ebp] + sub edx,edi + rol edx,cl + mov ebx,edx + xor ecx,ecx + mov cl,dh + and ebx,255 + shr edx,16 + xor eax,eax + mov al,dh + and edx,255 + mov ecx,DWORD PTR _CAST_S_table0[ecx*4] + mov ebx,DWORD PTR _CAST_S_table1[ebx*4] + add ecx,ebx + mov ebx,DWORD PTR _CAST_S_table2[eax*4] + xor ecx,ebx + mov ebx,DWORD PTR _CAST_S_table3[edx*4] + sub ecx,ebx + xor esi,ecx + ; round 13 + mov edx,DWORD PTR 104[ebp] + mov ecx,DWORD PTR 108[ebp] + xor edx,esi + rol edx,cl + mov ebx,edx + xor ecx,ecx + mov cl,dh + and ebx,255 + shr edx,16 + xor eax,eax + mov al,dh + and edx,255 + mov ecx,DWORD PTR _CAST_S_table0[ecx*4] + mov ebx,DWORD PTR _CAST_S_table1[ebx*4] + sub ecx,ebx + mov ebx,DWORD PTR _CAST_S_table2[eax*4] + add ecx,ebx + mov ebx,DWORD PTR _CAST_S_table3[edx*4] + xor ecx,ebx + xor edi,ecx + ; round 12 + mov edx,DWORD PTR 96[ebp] + mov ecx,DWORD PTR 100[ebp] + add edx,edi + rol edx,cl + mov ebx,edx + xor ecx,ecx + mov cl,dh + and ebx,255 + shr edx,16 + xor eax,eax + mov al,dh + and edx,255 + mov ecx,DWORD PTR _CAST_S_table0[ecx*4] + mov ebx,DWORD PTR _CAST_S_table1[ebx*4] + xor ecx,ebx + mov ebx,DWORD PTR _CAST_S_table2[eax*4] + sub ecx,ebx + mov ebx,DWORD PTR _CAST_S_table3[edx*4] + add ecx,ebx + xor esi,ecx +$L001cast_dec_skip: + ; round 11 + mov edx,DWORD PTR 88[ebp] + mov ecx,DWORD PTR 92[ebp] + sub edx,esi + rol edx,cl + mov ebx,edx + xor ecx,ecx + mov cl,dh + and ebx,255 + shr edx,16 + xor eax,eax + mov al,dh + and edx,255 + mov ecx,DWORD PTR _CAST_S_table0[ecx*4] + mov ebx,DWORD PTR _CAST_S_table1[ebx*4] + add ecx,ebx + mov ebx,DWORD PTR _CAST_S_table2[eax*4] + xor ecx,ebx + mov ebx,DWORD PTR _CAST_S_table3[edx*4] + sub ecx,ebx + xor edi,ecx + ; round 10 + mov edx,DWORD PTR 80[ebp] + mov ecx,DWORD PTR 84[ebp] + xor edx,edi + rol edx,cl + mov ebx,edx + xor ecx,ecx + mov cl,dh + and ebx,255 + shr edx,16 + xor eax,eax + mov al,dh + and edx,255 + mov ecx,DWORD PTR _CAST_S_table0[ecx*4] + mov ebx,DWORD PTR _CAST_S_table1[ebx*4] + sub ecx,ebx + mov ebx,DWORD PTR _CAST_S_table2[eax*4] + add ecx,ebx + mov ebx,DWORD PTR _CAST_S_table3[edx*4] + xor ecx,ebx + xor esi,ecx + ; round 9 + mov edx,DWORD PTR 72[ebp] + mov ecx,DWORD PTR 76[ebp] + add edx,esi + rol edx,cl + mov ebx,edx + xor ecx,ecx + mov cl,dh + and ebx,255 + shr edx,16 + xor eax,eax + mov al,dh + and edx,255 + mov ecx,DWORD PTR _CAST_S_table0[ecx*4] + mov ebx,DWORD PTR _CAST_S_table1[ebx*4] + xor ecx,ebx + mov ebx,DWORD PTR _CAST_S_table2[eax*4] + sub ecx,ebx + mov ebx,DWORD PTR _CAST_S_table3[edx*4] + add ecx,ebx + xor edi,ecx + ; round 8 + mov edx,DWORD PTR 64[ebp] + mov ecx,DWORD PTR 68[ebp] + sub edx,edi + rol edx,cl + mov ebx,edx + xor ecx,ecx + mov cl,dh + and ebx,255 + shr edx,16 + xor eax,eax + mov al,dh + and edx,255 + mov ecx,DWORD PTR _CAST_S_table0[ecx*4] + mov ebx,DWORD PTR _CAST_S_table1[ebx*4] + add ecx,ebx + mov ebx,DWORD PTR _CAST_S_table2[eax*4] + xor ecx,ebx + mov ebx,DWORD PTR _CAST_S_table3[edx*4] + sub ecx,ebx + xor esi,ecx + ; round 7 + mov edx,DWORD PTR 56[ebp] + mov ecx,DWORD PTR 60[ebp] + xor edx,esi + rol edx,cl + mov ebx,edx + xor ecx,ecx + mov cl,dh + and ebx,255 + shr edx,16 + xor eax,eax + mov al,dh + and edx,255 + mov ecx,DWORD PTR _CAST_S_table0[ecx*4] + mov ebx,DWORD PTR _CAST_S_table1[ebx*4] + sub ecx,ebx + mov ebx,DWORD PTR _CAST_S_table2[eax*4] + add ecx,ebx + mov ebx,DWORD PTR _CAST_S_table3[edx*4] + xor ecx,ebx + xor edi,ecx + ; round 6 + mov edx,DWORD PTR 48[ebp] + mov ecx,DWORD PTR 52[ebp] + add edx,edi + rol edx,cl + mov ebx,edx + xor ecx,ecx + mov cl,dh + and ebx,255 + shr edx,16 + xor eax,eax + mov al,dh + and edx,255 + mov ecx,DWORD PTR _CAST_S_table0[ecx*4] + mov ebx,DWORD PTR _CAST_S_table1[ebx*4] + xor ecx,ebx + mov ebx,DWORD PTR _CAST_S_table2[eax*4] + sub ecx,ebx + mov ebx,DWORD PTR _CAST_S_table3[edx*4] + add ecx,ebx + xor esi,ecx + ; round 5 + mov edx,DWORD PTR 40[ebp] + mov ecx,DWORD PTR 44[ebp] + sub edx,esi + rol edx,cl + mov ebx,edx + xor ecx,ecx + mov cl,dh + and ebx,255 + shr edx,16 + xor eax,eax + mov al,dh + and edx,255 + mov ecx,DWORD PTR _CAST_S_table0[ecx*4] + mov ebx,DWORD PTR _CAST_S_table1[ebx*4] + add ecx,ebx + mov ebx,DWORD PTR _CAST_S_table2[eax*4] + xor ecx,ebx + mov ebx,DWORD PTR _CAST_S_table3[edx*4] + sub ecx,ebx + xor edi,ecx + ; round 4 + mov edx,DWORD PTR 32[ebp] + mov ecx,DWORD PTR 36[ebp] + xor edx,edi + rol edx,cl + mov ebx,edx + xor ecx,ecx + mov cl,dh + and ebx,255 + shr edx,16 + xor eax,eax + mov al,dh + and edx,255 + mov ecx,DWORD PTR _CAST_S_table0[ecx*4] + mov ebx,DWORD PTR _CAST_S_table1[ebx*4] + sub ecx,ebx + mov ebx,DWORD PTR _CAST_S_table2[eax*4] + add ecx,ebx + mov ebx,DWORD PTR _CAST_S_table3[edx*4] + xor ecx,ebx + xor esi,ecx + ; round 3 + mov edx,DWORD PTR 24[ebp] + mov ecx,DWORD PTR 28[ebp] + add edx,esi + rol edx,cl + mov ebx,edx + xor ecx,ecx + mov cl,dh + and ebx,255 + shr edx,16 + xor eax,eax + mov al,dh + and edx,255 + mov ecx,DWORD PTR _CAST_S_table0[ecx*4] + mov ebx,DWORD PTR _CAST_S_table1[ebx*4] + xor ecx,ebx + mov ebx,DWORD PTR _CAST_S_table2[eax*4] + sub ecx,ebx + mov ebx,DWORD PTR _CAST_S_table3[edx*4] + add ecx,ebx + xor edi,ecx + ; round 2 + mov edx,DWORD PTR 16[ebp] + mov ecx,DWORD PTR 20[ebp] + sub edx,edi + rol edx,cl + mov ebx,edx + xor ecx,ecx + mov cl,dh + and ebx,255 + shr edx,16 + xor eax,eax + mov al,dh + and edx,255 + mov ecx,DWORD PTR _CAST_S_table0[ecx*4] + mov ebx,DWORD PTR _CAST_S_table1[ebx*4] + add ecx,ebx + mov ebx,DWORD PTR _CAST_S_table2[eax*4] + xor ecx,ebx + mov ebx,DWORD PTR _CAST_S_table3[edx*4] + sub ecx,ebx + xor esi,ecx + ; round 1 + mov edx,DWORD PTR 8[ebp] + mov ecx,DWORD PTR 12[ebp] + xor edx,esi + rol edx,cl + mov ebx,edx + xor ecx,ecx + mov cl,dh + and ebx,255 + shr edx,16 + xor eax,eax + mov al,dh + and edx,255 + mov ecx,DWORD PTR _CAST_S_table0[ecx*4] + mov ebx,DWORD PTR _CAST_S_table1[ebx*4] + sub ecx,ebx + mov ebx,DWORD PTR _CAST_S_table2[eax*4] + add ecx,ebx + mov ebx,DWORD PTR _CAST_S_table3[edx*4] + xor ecx,ebx + xor edi,ecx + ; round 0 + mov edx,DWORD PTR [ebp] + mov ecx,DWORD PTR 4[ebp] + add edx,edi + rol edx,cl + mov ebx,edx + xor ecx,ecx + mov cl,dh + and ebx,255 + shr edx,16 + xor eax,eax + mov al,dh + and edx,255 + mov ecx,DWORD PTR _CAST_S_table0[ecx*4] + mov ebx,DWORD PTR _CAST_S_table1[ebx*4] + xor ecx,ebx + mov ebx,DWORD PTR _CAST_S_table2[eax*4] + sub ecx,ebx + mov ebx,DWORD PTR _CAST_S_table3[edx*4] + add ecx,ebx + xor esi,ecx + nop + mov eax,DWORD PTR 20[esp] + mov DWORD PTR 4[eax],edi + mov DWORD PTR [eax],esi + pop edi + pop esi + pop ebx + pop ebp + ret +_CAST_decrypt ENDP +ALIGN 16 +_CAST_cbc_encrypt PROC PUBLIC +$L_CAST_cbc_encrypt_begin:: + ; + push ebp + push ebx + push esi + push edi + mov ebp,DWORD PTR 28[esp] + ; getting iv ptr from parameter 4 + mov ebx,DWORD PTR 36[esp] + mov esi,DWORD PTR [ebx] + mov edi,DWORD PTR 4[ebx] + push edi + push esi + push edi + push esi + mov ebx,esp + mov esi,DWORD PTR 36[esp] + mov edi,DWORD PTR 40[esp] + ; getting encrypt flag from parameter 5 + mov ecx,DWORD PTR 56[esp] + ; get and push parameter 3 + mov eax,DWORD PTR 48[esp] + push eax + push ebx + cmp ecx,0 + jz $L002decrypt + and ebp,4294967288 + mov eax,DWORD PTR 8[esp] + mov ebx,DWORD PTR 12[esp] + jz $L003encrypt_finish +$L004encrypt_loop: + mov ecx,DWORD PTR [esi] + mov edx,DWORD PTR 4[esi] + xor eax,ecx + xor ebx,edx + bswap eax + bswap ebx + mov DWORD PTR 8[esp],eax + mov DWORD PTR 12[esp],ebx + call $L_CAST_encrypt_begin + mov eax,DWORD PTR 8[esp] + mov ebx,DWORD PTR 12[esp] + bswap eax + bswap ebx + mov DWORD PTR [edi],eax + mov DWORD PTR 4[edi],ebx + add esi,8 + add edi,8 + sub ebp,8 + jnz $L004encrypt_loop +$L003encrypt_finish: + mov ebp,DWORD PTR 52[esp] + and ebp,7 + jz $L005finish + call $L006PIC_point +$L006PIC_point: + pop edx + lea ecx,DWORD PTR ($L007cbc_enc_jmp_table-$L006PIC_point)[edx] + mov ebp,DWORD PTR [ebp*4+ecx] + add ebp,edx + xor ecx,ecx + xor edx,edx + jmp ebp +$L008ej7: + mov dh,BYTE PTR 6[esi] + shl edx,8 +$L009ej6: + mov dh,BYTE PTR 5[esi] +$L010ej5: + mov dl,BYTE PTR 4[esi] +$L011ej4: + mov ecx,DWORD PTR [esi] + jmp $L012ejend +$L013ej3: + mov ch,BYTE PTR 2[esi] + shl ecx,8 +$L014ej2: + mov ch,BYTE PTR 1[esi] +$L015ej1: + mov cl,BYTE PTR [esi] +$L012ejend: + xor eax,ecx + xor ebx,edx + bswap eax + bswap ebx + mov DWORD PTR 8[esp],eax + mov DWORD PTR 12[esp],ebx + call $L_CAST_encrypt_begin + mov eax,DWORD PTR 8[esp] + mov ebx,DWORD PTR 12[esp] + bswap eax + bswap ebx + mov DWORD PTR [edi],eax + mov DWORD PTR 4[edi],ebx + jmp $L005finish +$L002decrypt: + and ebp,4294967288 + mov eax,DWORD PTR 16[esp] + mov ebx,DWORD PTR 20[esp] + jz $L016decrypt_finish +$L017decrypt_loop: + mov eax,DWORD PTR [esi] + mov ebx,DWORD PTR 4[esi] + bswap eax + bswap ebx + mov DWORD PTR 8[esp],eax + mov DWORD PTR 12[esp],ebx + call $L_CAST_decrypt_begin + mov eax,DWORD PTR 8[esp] + mov ebx,DWORD PTR 12[esp] + bswap eax + bswap ebx + mov ecx,DWORD PTR 16[esp] + mov edx,DWORD PTR 20[esp] + xor ecx,eax + xor edx,ebx + mov eax,DWORD PTR [esi] + mov ebx,DWORD PTR 4[esi] + mov DWORD PTR [edi],ecx + mov DWORD PTR 4[edi],edx + mov DWORD PTR 16[esp],eax + mov DWORD PTR 20[esp],ebx + add esi,8 + add edi,8 + sub ebp,8 + jnz $L017decrypt_loop +$L016decrypt_finish: + mov ebp,DWORD PTR 52[esp] + and ebp,7 + jz $L005finish + mov eax,DWORD PTR [esi] + mov ebx,DWORD PTR 4[esi] + bswap eax + bswap ebx + mov DWORD PTR 8[esp],eax + mov DWORD PTR 12[esp],ebx + call $L_CAST_decrypt_begin + mov eax,DWORD PTR 8[esp] + mov ebx,DWORD PTR 12[esp] + bswap eax + bswap ebx + mov ecx,DWORD PTR 16[esp] + mov edx,DWORD PTR 20[esp] + xor ecx,eax + xor edx,ebx + mov eax,DWORD PTR [esi] + mov ebx,DWORD PTR 4[esi] +$L018dj7: + ror edx,16 + mov BYTE PTR 6[edi],dl + shr edx,16 +$L019dj6: + mov BYTE PTR 5[edi],dh +$L020dj5: + mov BYTE PTR 4[edi],dl +$L021dj4: + mov DWORD PTR [edi],ecx + jmp $L022djend +$L023dj3: + ror ecx,16 + mov BYTE PTR 2[edi],cl + shl ecx,16 +$L024dj2: + mov BYTE PTR 1[esi],ch +$L025dj1: + mov BYTE PTR [esi],cl +$L022djend: + jmp $L005finish +$L005finish: + mov ecx,DWORD PTR 60[esp] + add esp,24 + mov DWORD PTR [ecx],eax + mov DWORD PTR 4[ecx],ebx + pop edi + pop esi + pop ebx + pop ebp + ret +ALIGN 64 +$L007cbc_enc_jmp_table: +DD 0 +DD $L015ej1-$L006PIC_point +DD $L014ej2-$L006PIC_point +DD $L013ej3-$L006PIC_point +DD $L011ej4-$L006PIC_point +DD $L010ej5-$L006PIC_point +DD $L009ej6-$L006PIC_point +DD $L008ej7-$L006PIC_point +ALIGN 64 +_CAST_cbc_encrypt ENDP +.text$ ENDS +END diff --git a/deps/openssl/asm_obsolete/x86-win32-masm/des/crypt586.asm b/deps/openssl/asm_obsolete/x86-win32-masm/des/crypt586.asm new file mode 100644 index 00000000000000..6ca04c3afbdb1b --- /dev/null +++ b/deps/openssl/asm_obsolete/x86-win32-masm/des/crypt586.asm @@ -0,0 +1,886 @@ +TITLE crypt586.asm +IF @Version LT 800 +ECHO MASM version 8.00 or later is strongly recommended. +ENDIF +.686 +.MODEL FLAT +OPTION DOTNAME +IF @Version LT 800 +.text$ SEGMENT PAGE 'CODE' +ELSE +.text$ SEGMENT ALIGN(64) 'CODE' +ENDIF +EXTERN _DES_SPtrans:NEAR +ALIGN 16 +_fcrypt_body PROC PUBLIC +$L_fcrypt_body_begin:: + push ebp + push ebx + push esi + push edi + ; + ; Load the 2 words + xor edi,edi + xor esi,esi + lea edx,DWORD PTR _DES_SPtrans + push edx + mov ebp,DWORD PTR 28[esp] + push 25 +$L000start: + ; + ; Round 0 + mov eax,DWORD PTR 36[esp] + mov edx,esi + shr edx,16 + mov ecx,DWORD PTR 40[esp] + xor edx,esi + and eax,edx + and edx,ecx + mov ebx,eax + shl ebx,16 + mov ecx,edx + shl ecx,16 + xor eax,ebx + xor edx,ecx + mov ebx,DWORD PTR [ebp] + xor eax,ebx + mov ecx,DWORD PTR 4[ebp] + xor eax,esi + xor edx,esi + xor edx,ecx + and eax,0fcfcfcfch + xor ebx,ebx + and edx,0cfcfcfcfh + xor ecx,ecx + mov bl,al + mov cl,ah + ror edx,4 + mov ebp,DWORD PTR 4[esp] + xor edi,DWORD PTR [ebx*1+ebp] + mov bl,dl + xor edi,DWORD PTR 0200h[ecx*1+ebp] + mov cl,dh + shr eax,16 + xor edi,DWORD PTR 0100h[ebx*1+ebp] + mov bl,ah + shr edx,16 + xor edi,DWORD PTR 0300h[ecx*1+ebp] + mov cl,dh + and eax,0ffh + and edx,0ffh + mov ebx,DWORD PTR 0600h[ebx*1+ebp] + xor edi,ebx + mov ebx,DWORD PTR 0700h[ecx*1+ebp] + xor edi,ebx + mov ebx,DWORD PTR 0400h[eax*1+ebp] + xor edi,ebx + mov ebx,DWORD PTR 0500h[edx*1+ebp] + xor edi,ebx + mov ebp,DWORD PTR 32[esp] + ; + ; Round 1 + mov eax,DWORD PTR 36[esp] + mov edx,edi + shr edx,16 + mov ecx,DWORD PTR 40[esp] + xor edx,edi + and eax,edx + and edx,ecx + mov ebx,eax + shl ebx,16 + mov ecx,edx + shl ecx,16 + xor eax,ebx + xor edx,ecx + mov ebx,DWORD PTR 8[ebp] + xor eax,ebx + mov ecx,DWORD PTR 12[ebp] + xor eax,edi + xor edx,edi + xor edx,ecx + and eax,0fcfcfcfch + xor ebx,ebx + and edx,0cfcfcfcfh + xor ecx,ecx + mov bl,al + mov cl,ah + ror edx,4 + mov ebp,DWORD PTR 4[esp] + xor esi,DWORD PTR [ebx*1+ebp] + mov bl,dl + xor esi,DWORD PTR 0200h[ecx*1+ebp] + mov cl,dh + shr eax,16 + xor esi,DWORD PTR 0100h[ebx*1+ebp] + mov bl,ah + shr edx,16 + xor esi,DWORD PTR 0300h[ecx*1+ebp] + mov cl,dh + and eax,0ffh + and edx,0ffh + mov ebx,DWORD PTR 0600h[ebx*1+ebp] + xor esi,ebx + mov ebx,DWORD PTR 0700h[ecx*1+ebp] + xor esi,ebx + mov ebx,DWORD PTR 0400h[eax*1+ebp] + xor esi,ebx + mov ebx,DWORD PTR 0500h[edx*1+ebp] + xor esi,ebx + mov ebp,DWORD PTR 32[esp] + ; + ; Round 2 + mov eax,DWORD PTR 36[esp] + mov edx,esi + shr edx,16 + mov ecx,DWORD PTR 40[esp] + xor edx,esi + and eax,edx + and edx,ecx + mov ebx,eax + shl ebx,16 + mov ecx,edx + shl ecx,16 + xor eax,ebx + xor edx,ecx + mov ebx,DWORD PTR 16[ebp] + xor eax,ebx + mov ecx,DWORD PTR 20[ebp] + xor eax,esi + xor edx,esi + xor edx,ecx + and eax,0fcfcfcfch + xor ebx,ebx + and edx,0cfcfcfcfh + xor ecx,ecx + mov bl,al + mov cl,ah + ror edx,4 + mov ebp,DWORD PTR 4[esp] + xor edi,DWORD PTR [ebx*1+ebp] + mov bl,dl + xor edi,DWORD PTR 0200h[ecx*1+ebp] + mov cl,dh + shr eax,16 + xor edi,DWORD PTR 0100h[ebx*1+ebp] + mov bl,ah + shr edx,16 + xor edi,DWORD PTR 0300h[ecx*1+ebp] + mov cl,dh + and eax,0ffh + and edx,0ffh + mov ebx,DWORD PTR 0600h[ebx*1+ebp] + xor edi,ebx + mov ebx,DWORD PTR 0700h[ecx*1+ebp] + xor edi,ebx + mov ebx,DWORD PTR 0400h[eax*1+ebp] + xor edi,ebx + mov ebx,DWORD PTR 0500h[edx*1+ebp] + xor edi,ebx + mov ebp,DWORD PTR 32[esp] + ; + ; Round 3 + mov eax,DWORD PTR 36[esp] + mov edx,edi + shr edx,16 + mov ecx,DWORD PTR 40[esp] + xor edx,edi + and eax,edx + and edx,ecx + mov ebx,eax + shl ebx,16 + mov ecx,edx + shl ecx,16 + xor eax,ebx + xor edx,ecx + mov ebx,DWORD PTR 24[ebp] + xor eax,ebx + mov ecx,DWORD PTR 28[ebp] + xor eax,edi + xor edx,edi + xor edx,ecx + and eax,0fcfcfcfch + xor ebx,ebx + and edx,0cfcfcfcfh + xor ecx,ecx + mov bl,al + mov cl,ah + ror edx,4 + mov ebp,DWORD PTR 4[esp] + xor esi,DWORD PTR [ebx*1+ebp] + mov bl,dl + xor esi,DWORD PTR 0200h[ecx*1+ebp] + mov cl,dh + shr eax,16 + xor esi,DWORD PTR 0100h[ebx*1+ebp] + mov bl,ah + shr edx,16 + xor esi,DWORD PTR 0300h[ecx*1+ebp] + mov cl,dh + and eax,0ffh + and edx,0ffh + mov ebx,DWORD PTR 0600h[ebx*1+ebp] + xor esi,ebx + mov ebx,DWORD PTR 0700h[ecx*1+ebp] + xor esi,ebx + mov ebx,DWORD PTR 0400h[eax*1+ebp] + xor esi,ebx + mov ebx,DWORD PTR 0500h[edx*1+ebp] + xor esi,ebx + mov ebp,DWORD PTR 32[esp] + ; + ; Round 4 + mov eax,DWORD PTR 36[esp] + mov edx,esi + shr edx,16 + mov ecx,DWORD PTR 40[esp] + xor edx,esi + and eax,edx + and edx,ecx + mov ebx,eax + shl ebx,16 + mov ecx,edx + shl ecx,16 + xor eax,ebx + xor edx,ecx + mov ebx,DWORD PTR 32[ebp] + xor eax,ebx + mov ecx,DWORD PTR 36[ebp] + xor eax,esi + xor edx,esi + xor edx,ecx + and eax,0fcfcfcfch + xor ebx,ebx + and edx,0cfcfcfcfh + xor ecx,ecx + mov bl,al + mov cl,ah + ror edx,4 + mov ebp,DWORD PTR 4[esp] + xor edi,DWORD PTR [ebx*1+ebp] + mov bl,dl + xor edi,DWORD PTR 0200h[ecx*1+ebp] + mov cl,dh + shr eax,16 + xor edi,DWORD PTR 0100h[ebx*1+ebp] + mov bl,ah + shr edx,16 + xor edi,DWORD PTR 0300h[ecx*1+ebp] + mov cl,dh + and eax,0ffh + and edx,0ffh + mov ebx,DWORD PTR 0600h[ebx*1+ebp] + xor edi,ebx + mov ebx,DWORD PTR 0700h[ecx*1+ebp] + xor edi,ebx + mov ebx,DWORD PTR 0400h[eax*1+ebp] + xor edi,ebx + mov ebx,DWORD PTR 0500h[edx*1+ebp] + xor edi,ebx + mov ebp,DWORD PTR 32[esp] + ; + ; Round 5 + mov eax,DWORD PTR 36[esp] + mov edx,edi + shr edx,16 + mov ecx,DWORD PTR 40[esp] + xor edx,edi + and eax,edx + and edx,ecx + mov ebx,eax + shl ebx,16 + mov ecx,edx + shl ecx,16 + xor eax,ebx + xor edx,ecx + mov ebx,DWORD PTR 40[ebp] + xor eax,ebx + mov ecx,DWORD PTR 44[ebp] + xor eax,edi + xor edx,edi + xor edx,ecx + and eax,0fcfcfcfch + xor ebx,ebx + and edx,0cfcfcfcfh + xor ecx,ecx + mov bl,al + mov cl,ah + ror edx,4 + mov ebp,DWORD PTR 4[esp] + xor esi,DWORD PTR [ebx*1+ebp] + mov bl,dl + xor esi,DWORD PTR 0200h[ecx*1+ebp] + mov cl,dh + shr eax,16 + xor esi,DWORD PTR 0100h[ebx*1+ebp] + mov bl,ah + shr edx,16 + xor esi,DWORD PTR 0300h[ecx*1+ebp] + mov cl,dh + and eax,0ffh + and edx,0ffh + mov ebx,DWORD PTR 0600h[ebx*1+ebp] + xor esi,ebx + mov ebx,DWORD PTR 0700h[ecx*1+ebp] + xor esi,ebx + mov ebx,DWORD PTR 0400h[eax*1+ebp] + xor esi,ebx + mov ebx,DWORD PTR 0500h[edx*1+ebp] + xor esi,ebx + mov ebp,DWORD PTR 32[esp] + ; + ; Round 6 + mov eax,DWORD PTR 36[esp] + mov edx,esi + shr edx,16 + mov ecx,DWORD PTR 40[esp] + xor edx,esi + and eax,edx + and edx,ecx + mov ebx,eax + shl ebx,16 + mov ecx,edx + shl ecx,16 + xor eax,ebx + xor edx,ecx + mov ebx,DWORD PTR 48[ebp] + xor eax,ebx + mov ecx,DWORD PTR 52[ebp] + xor eax,esi + xor edx,esi + xor edx,ecx + and eax,0fcfcfcfch + xor ebx,ebx + and edx,0cfcfcfcfh + xor ecx,ecx + mov bl,al + mov cl,ah + ror edx,4 + mov ebp,DWORD PTR 4[esp] + xor edi,DWORD PTR [ebx*1+ebp] + mov bl,dl + xor edi,DWORD PTR 0200h[ecx*1+ebp] + mov cl,dh + shr eax,16 + xor edi,DWORD PTR 0100h[ebx*1+ebp] + mov bl,ah + shr edx,16 + xor edi,DWORD PTR 0300h[ecx*1+ebp] + mov cl,dh + and eax,0ffh + and edx,0ffh + mov ebx,DWORD PTR 0600h[ebx*1+ebp] + xor edi,ebx + mov ebx,DWORD PTR 0700h[ecx*1+ebp] + xor edi,ebx + mov ebx,DWORD PTR 0400h[eax*1+ebp] + xor edi,ebx + mov ebx,DWORD PTR 0500h[edx*1+ebp] + xor edi,ebx + mov ebp,DWORD PTR 32[esp] + ; + ; Round 7 + mov eax,DWORD PTR 36[esp] + mov edx,edi + shr edx,16 + mov ecx,DWORD PTR 40[esp] + xor edx,edi + and eax,edx + and edx,ecx + mov ebx,eax + shl ebx,16 + mov ecx,edx + shl ecx,16 + xor eax,ebx + xor edx,ecx + mov ebx,DWORD PTR 56[ebp] + xor eax,ebx + mov ecx,DWORD PTR 60[ebp] + xor eax,edi + xor edx,edi + xor edx,ecx + and eax,0fcfcfcfch + xor ebx,ebx + and edx,0cfcfcfcfh + xor ecx,ecx + mov bl,al + mov cl,ah + ror edx,4 + mov ebp,DWORD PTR 4[esp] + xor esi,DWORD PTR [ebx*1+ebp] + mov bl,dl + xor esi,DWORD PTR 0200h[ecx*1+ebp] + mov cl,dh + shr eax,16 + xor esi,DWORD PTR 0100h[ebx*1+ebp] + mov bl,ah + shr edx,16 + xor esi,DWORD PTR 0300h[ecx*1+ebp] + mov cl,dh + and eax,0ffh + and edx,0ffh + mov ebx,DWORD PTR 0600h[ebx*1+ebp] + xor esi,ebx + mov ebx,DWORD PTR 0700h[ecx*1+ebp] + xor esi,ebx + mov ebx,DWORD PTR 0400h[eax*1+ebp] + xor esi,ebx + mov ebx,DWORD PTR 0500h[edx*1+ebp] + xor esi,ebx + mov ebp,DWORD PTR 32[esp] + ; + ; Round 8 + mov eax,DWORD PTR 36[esp] + mov edx,esi + shr edx,16 + mov ecx,DWORD PTR 40[esp] + xor edx,esi + and eax,edx + and edx,ecx + mov ebx,eax + shl ebx,16 + mov ecx,edx + shl ecx,16 + xor eax,ebx + xor edx,ecx + mov ebx,DWORD PTR 64[ebp] + xor eax,ebx + mov ecx,DWORD PTR 68[ebp] + xor eax,esi + xor edx,esi + xor edx,ecx + and eax,0fcfcfcfch + xor ebx,ebx + and edx,0cfcfcfcfh + xor ecx,ecx + mov bl,al + mov cl,ah + ror edx,4 + mov ebp,DWORD PTR 4[esp] + xor edi,DWORD PTR [ebx*1+ebp] + mov bl,dl + xor edi,DWORD PTR 0200h[ecx*1+ebp] + mov cl,dh + shr eax,16 + xor edi,DWORD PTR 0100h[ebx*1+ebp] + mov bl,ah + shr edx,16 + xor edi,DWORD PTR 0300h[ecx*1+ebp] + mov cl,dh + and eax,0ffh + and edx,0ffh + mov ebx,DWORD PTR 0600h[ebx*1+ebp] + xor edi,ebx + mov ebx,DWORD PTR 0700h[ecx*1+ebp] + xor edi,ebx + mov ebx,DWORD PTR 0400h[eax*1+ebp] + xor edi,ebx + mov ebx,DWORD PTR 0500h[edx*1+ebp] + xor edi,ebx + mov ebp,DWORD PTR 32[esp] + ; + ; Round 9 + mov eax,DWORD PTR 36[esp] + mov edx,edi + shr edx,16 + mov ecx,DWORD PTR 40[esp] + xor edx,edi + and eax,edx + and edx,ecx + mov ebx,eax + shl ebx,16 + mov ecx,edx + shl ecx,16 + xor eax,ebx + xor edx,ecx + mov ebx,DWORD PTR 72[ebp] + xor eax,ebx + mov ecx,DWORD PTR 76[ebp] + xor eax,edi + xor edx,edi + xor edx,ecx + and eax,0fcfcfcfch + xor ebx,ebx + and edx,0cfcfcfcfh + xor ecx,ecx + mov bl,al + mov cl,ah + ror edx,4 + mov ebp,DWORD PTR 4[esp] + xor esi,DWORD PTR [ebx*1+ebp] + mov bl,dl + xor esi,DWORD PTR 0200h[ecx*1+ebp] + mov cl,dh + shr eax,16 + xor esi,DWORD PTR 0100h[ebx*1+ebp] + mov bl,ah + shr edx,16 + xor esi,DWORD PTR 0300h[ecx*1+ebp] + mov cl,dh + and eax,0ffh + and edx,0ffh + mov ebx,DWORD PTR 0600h[ebx*1+ebp] + xor esi,ebx + mov ebx,DWORD PTR 0700h[ecx*1+ebp] + xor esi,ebx + mov ebx,DWORD PTR 0400h[eax*1+ebp] + xor esi,ebx + mov ebx,DWORD PTR 0500h[edx*1+ebp] + xor esi,ebx + mov ebp,DWORD PTR 32[esp] + ; + ; Round 10 + mov eax,DWORD PTR 36[esp] + mov edx,esi + shr edx,16 + mov ecx,DWORD PTR 40[esp] + xor edx,esi + and eax,edx + and edx,ecx + mov ebx,eax + shl ebx,16 + mov ecx,edx + shl ecx,16 + xor eax,ebx + xor edx,ecx + mov ebx,DWORD PTR 80[ebp] + xor eax,ebx + mov ecx,DWORD PTR 84[ebp] + xor eax,esi + xor edx,esi + xor edx,ecx + and eax,0fcfcfcfch + xor ebx,ebx + and edx,0cfcfcfcfh + xor ecx,ecx + mov bl,al + mov cl,ah + ror edx,4 + mov ebp,DWORD PTR 4[esp] + xor edi,DWORD PTR [ebx*1+ebp] + mov bl,dl + xor edi,DWORD PTR 0200h[ecx*1+ebp] + mov cl,dh + shr eax,16 + xor edi,DWORD PTR 0100h[ebx*1+ebp] + mov bl,ah + shr edx,16 + xor edi,DWORD PTR 0300h[ecx*1+ebp] + mov cl,dh + and eax,0ffh + and edx,0ffh + mov ebx,DWORD PTR 0600h[ebx*1+ebp] + xor edi,ebx + mov ebx,DWORD PTR 0700h[ecx*1+ebp] + xor edi,ebx + mov ebx,DWORD PTR 0400h[eax*1+ebp] + xor edi,ebx + mov ebx,DWORD PTR 0500h[edx*1+ebp] + xor edi,ebx + mov ebp,DWORD PTR 32[esp] + ; + ; Round 11 + mov eax,DWORD PTR 36[esp] + mov edx,edi + shr edx,16 + mov ecx,DWORD PTR 40[esp] + xor edx,edi + and eax,edx + and edx,ecx + mov ebx,eax + shl ebx,16 + mov ecx,edx + shl ecx,16 + xor eax,ebx + xor edx,ecx + mov ebx,DWORD PTR 88[ebp] + xor eax,ebx + mov ecx,DWORD PTR 92[ebp] + xor eax,edi + xor edx,edi + xor edx,ecx + and eax,0fcfcfcfch + xor ebx,ebx + and edx,0cfcfcfcfh + xor ecx,ecx + mov bl,al + mov cl,ah + ror edx,4 + mov ebp,DWORD PTR 4[esp] + xor esi,DWORD PTR [ebx*1+ebp] + mov bl,dl + xor esi,DWORD PTR 0200h[ecx*1+ebp] + mov cl,dh + shr eax,16 + xor esi,DWORD PTR 0100h[ebx*1+ebp] + mov bl,ah + shr edx,16 + xor esi,DWORD PTR 0300h[ecx*1+ebp] + mov cl,dh + and eax,0ffh + and edx,0ffh + mov ebx,DWORD PTR 0600h[ebx*1+ebp] + xor esi,ebx + mov ebx,DWORD PTR 0700h[ecx*1+ebp] + xor esi,ebx + mov ebx,DWORD PTR 0400h[eax*1+ebp] + xor esi,ebx + mov ebx,DWORD PTR 0500h[edx*1+ebp] + xor esi,ebx + mov ebp,DWORD PTR 32[esp] + ; + ; Round 12 + mov eax,DWORD PTR 36[esp] + mov edx,esi + shr edx,16 + mov ecx,DWORD PTR 40[esp] + xor edx,esi + and eax,edx + and edx,ecx + mov ebx,eax + shl ebx,16 + mov ecx,edx + shl ecx,16 + xor eax,ebx + xor edx,ecx + mov ebx,DWORD PTR 96[ebp] + xor eax,ebx + mov ecx,DWORD PTR 100[ebp] + xor eax,esi + xor edx,esi + xor edx,ecx + and eax,0fcfcfcfch + xor ebx,ebx + and edx,0cfcfcfcfh + xor ecx,ecx + mov bl,al + mov cl,ah + ror edx,4 + mov ebp,DWORD PTR 4[esp] + xor edi,DWORD PTR [ebx*1+ebp] + mov bl,dl + xor edi,DWORD PTR 0200h[ecx*1+ebp] + mov cl,dh + shr eax,16 + xor edi,DWORD PTR 0100h[ebx*1+ebp] + mov bl,ah + shr edx,16 + xor edi,DWORD PTR 0300h[ecx*1+ebp] + mov cl,dh + and eax,0ffh + and edx,0ffh + mov ebx,DWORD PTR 0600h[ebx*1+ebp] + xor edi,ebx + mov ebx,DWORD PTR 0700h[ecx*1+ebp] + xor edi,ebx + mov ebx,DWORD PTR 0400h[eax*1+ebp] + xor edi,ebx + mov ebx,DWORD PTR 0500h[edx*1+ebp] + xor edi,ebx + mov ebp,DWORD PTR 32[esp] + ; + ; Round 13 + mov eax,DWORD PTR 36[esp] + mov edx,edi + shr edx,16 + mov ecx,DWORD PTR 40[esp] + xor edx,edi + and eax,edx + and edx,ecx + mov ebx,eax + shl ebx,16 + mov ecx,edx + shl ecx,16 + xor eax,ebx + xor edx,ecx + mov ebx,DWORD PTR 104[ebp] + xor eax,ebx + mov ecx,DWORD PTR 108[ebp] + xor eax,edi + xor edx,edi + xor edx,ecx + and eax,0fcfcfcfch + xor ebx,ebx + and edx,0cfcfcfcfh + xor ecx,ecx + mov bl,al + mov cl,ah + ror edx,4 + mov ebp,DWORD PTR 4[esp] + xor esi,DWORD PTR [ebx*1+ebp] + mov bl,dl + xor esi,DWORD PTR 0200h[ecx*1+ebp] + mov cl,dh + shr eax,16 + xor esi,DWORD PTR 0100h[ebx*1+ebp] + mov bl,ah + shr edx,16 + xor esi,DWORD PTR 0300h[ecx*1+ebp] + mov cl,dh + and eax,0ffh + and edx,0ffh + mov ebx,DWORD PTR 0600h[ebx*1+ebp] + xor esi,ebx + mov ebx,DWORD PTR 0700h[ecx*1+ebp] + xor esi,ebx + mov ebx,DWORD PTR 0400h[eax*1+ebp] + xor esi,ebx + mov ebx,DWORD PTR 0500h[edx*1+ebp] + xor esi,ebx + mov ebp,DWORD PTR 32[esp] + ; + ; Round 14 + mov eax,DWORD PTR 36[esp] + mov edx,esi + shr edx,16 + mov ecx,DWORD PTR 40[esp] + xor edx,esi + and eax,edx + and edx,ecx + mov ebx,eax + shl ebx,16 + mov ecx,edx + shl ecx,16 + xor eax,ebx + xor edx,ecx + mov ebx,DWORD PTR 112[ebp] + xor eax,ebx + mov ecx,DWORD PTR 116[ebp] + xor eax,esi + xor edx,esi + xor edx,ecx + and eax,0fcfcfcfch + xor ebx,ebx + and edx,0cfcfcfcfh + xor ecx,ecx + mov bl,al + mov cl,ah + ror edx,4 + mov ebp,DWORD PTR 4[esp] + xor edi,DWORD PTR [ebx*1+ebp] + mov bl,dl + xor edi,DWORD PTR 0200h[ecx*1+ebp] + mov cl,dh + shr eax,16 + xor edi,DWORD PTR 0100h[ebx*1+ebp] + mov bl,ah + shr edx,16 + xor edi,DWORD PTR 0300h[ecx*1+ebp] + mov cl,dh + and eax,0ffh + and edx,0ffh + mov ebx,DWORD PTR 0600h[ebx*1+ebp] + xor edi,ebx + mov ebx,DWORD PTR 0700h[ecx*1+ebp] + xor edi,ebx + mov ebx,DWORD PTR 0400h[eax*1+ebp] + xor edi,ebx + mov ebx,DWORD PTR 0500h[edx*1+ebp] + xor edi,ebx + mov ebp,DWORD PTR 32[esp] + ; + ; Round 15 + mov eax,DWORD PTR 36[esp] + mov edx,edi + shr edx,16 + mov ecx,DWORD PTR 40[esp] + xor edx,edi + and eax,edx + and edx,ecx + mov ebx,eax + shl ebx,16 + mov ecx,edx + shl ecx,16 + xor eax,ebx + xor edx,ecx + mov ebx,DWORD PTR 120[ebp] + xor eax,ebx + mov ecx,DWORD PTR 124[ebp] + xor eax,edi + xor edx,edi + xor edx,ecx + and eax,0fcfcfcfch + xor ebx,ebx + and edx,0cfcfcfcfh + xor ecx,ecx + mov bl,al + mov cl,ah + ror edx,4 + mov ebp,DWORD PTR 4[esp] + xor esi,DWORD PTR [ebx*1+ebp] + mov bl,dl + xor esi,DWORD PTR 0200h[ecx*1+ebp] + mov cl,dh + shr eax,16 + xor esi,DWORD PTR 0100h[ebx*1+ebp] + mov bl,ah + shr edx,16 + xor esi,DWORD PTR 0300h[ecx*1+ebp] + mov cl,dh + and eax,0ffh + and edx,0ffh + mov ebx,DWORD PTR 0600h[ebx*1+ebp] + xor esi,ebx + mov ebx,DWORD PTR 0700h[ecx*1+ebp] + xor esi,ebx + mov ebx,DWORD PTR 0400h[eax*1+ebp] + xor esi,ebx + mov ebx,DWORD PTR 0500h[edx*1+ebp] + xor esi,ebx + mov ebp,DWORD PTR 32[esp] + mov ebx,DWORD PTR [esp] + mov eax,edi + dec ebx + mov edi,esi + mov esi,eax + mov DWORD PTR [esp],ebx + jnz $L000start + ; + ; FP + mov edx,DWORD PTR 28[esp] + ror edi,1 + mov eax,esi + xor esi,edi + and esi,0aaaaaaaah + xor eax,esi + xor edi,esi + ; + rol eax,23 + mov esi,eax + xor eax,edi + and eax,003fc03fch + xor esi,eax + xor edi,eax + ; + rol esi,10 + mov eax,esi + xor esi,edi + and esi,033333333h + xor eax,esi + xor edi,esi + ; + rol edi,18 + mov esi,edi + xor edi,eax + and edi,0fff0000fh + xor esi,edi + xor eax,edi + ; + rol esi,12 + mov edi,esi + xor esi,eax + and esi,0f0f0f0f0h + xor edi,esi + xor eax,esi + ; + ror eax,4 + mov DWORD PTR [edx],eax + mov DWORD PTR 4[edx],edi + add esp,8 + pop edi + pop esi + pop ebx + pop ebp + ret +_fcrypt_body ENDP +.text$ ENDS +END diff --git a/deps/openssl/asm_obsolete/x86-win32-masm/des/des-586.asm b/deps/openssl/asm_obsolete/x86-win32-masm/des/des-586.asm new file mode 100644 index 00000000000000..ecae90ec7cbe5b --- /dev/null +++ b/deps/openssl/asm_obsolete/x86-win32-masm/des/des-586.asm @@ -0,0 +1,1836 @@ +TITLE des-586.asm +IF @Version LT 800 +ECHO MASM version 8.00 or later is strongly recommended. +ENDIF +.686 +.MODEL FLAT +OPTION DOTNAME +IF @Version LT 800 +.text$ SEGMENT PAGE 'CODE' +ELSE +.text$ SEGMENT ALIGN(64) 'CODE' +ENDIF +PUBLIC _DES_SPtrans +ALIGN 16 +__x86_DES_encrypt PROC PRIVATE + push ecx + ; Round 0 + mov eax,DWORD PTR [ecx] + xor ebx,ebx + mov edx,DWORD PTR 4[ecx] + xor eax,esi + xor ecx,ecx + xor edx,esi + and eax,0fcfcfcfch + and edx,0cfcfcfcfh + mov bl,al + mov cl,ah + ror edx,4 + xor edi,DWORD PTR [ebx*1+ebp] + mov bl,dl + xor edi,DWORD PTR 0200h[ecx*1+ebp] + mov cl,dh + shr eax,16 + xor edi,DWORD PTR 0100h[ebx*1+ebp] + mov bl,ah + shr edx,16 + xor edi,DWORD PTR 0300h[ecx*1+ebp] + mov cl,dh + and eax,0ffh + and edx,0ffh + xor edi,DWORD PTR 0600h[ebx*1+ebp] + xor edi,DWORD PTR 0700h[ecx*1+ebp] + mov ecx,DWORD PTR [esp] + xor edi,DWORD PTR 0400h[eax*1+ebp] + xor edi,DWORD PTR 0500h[edx*1+ebp] + ; Round 1 + mov eax,DWORD PTR 8[ecx] + xor ebx,ebx + mov edx,DWORD PTR 12[ecx] + xor eax,edi + xor ecx,ecx + xor edx,edi + and eax,0fcfcfcfch + and edx,0cfcfcfcfh + mov bl,al + mov cl,ah + ror edx,4 + xor esi,DWORD PTR [ebx*1+ebp] + mov bl,dl + xor esi,DWORD PTR 0200h[ecx*1+ebp] + mov cl,dh + shr eax,16 + xor esi,DWORD PTR 0100h[ebx*1+ebp] + mov bl,ah + shr edx,16 + xor esi,DWORD PTR 0300h[ecx*1+ebp] + mov cl,dh + and eax,0ffh + and edx,0ffh + xor esi,DWORD PTR 0600h[ebx*1+ebp] + xor esi,DWORD PTR 0700h[ecx*1+ebp] + mov ecx,DWORD PTR [esp] + xor esi,DWORD PTR 0400h[eax*1+ebp] + xor esi,DWORD PTR 0500h[edx*1+ebp] + ; Round 2 + mov eax,DWORD PTR 16[ecx] + xor ebx,ebx + mov edx,DWORD PTR 20[ecx] + xor eax,esi + xor ecx,ecx + xor edx,esi + and eax,0fcfcfcfch + and edx,0cfcfcfcfh + mov bl,al + mov cl,ah + ror edx,4 + xor edi,DWORD PTR [ebx*1+ebp] + mov bl,dl + xor edi,DWORD PTR 0200h[ecx*1+ebp] + mov cl,dh + shr eax,16 + xor edi,DWORD PTR 0100h[ebx*1+ebp] + mov bl,ah + shr edx,16 + xor edi,DWORD PTR 0300h[ecx*1+ebp] + mov cl,dh + and eax,0ffh + and edx,0ffh + xor edi,DWORD PTR 0600h[ebx*1+ebp] + xor edi,DWORD PTR 0700h[ecx*1+ebp] + mov ecx,DWORD PTR [esp] + xor edi,DWORD PTR 0400h[eax*1+ebp] + xor edi,DWORD PTR 0500h[edx*1+ebp] + ; Round 3 + mov eax,DWORD PTR 24[ecx] + xor ebx,ebx + mov edx,DWORD PTR 28[ecx] + xor eax,edi + xor ecx,ecx + xor edx,edi + and eax,0fcfcfcfch + and edx,0cfcfcfcfh + mov bl,al + mov cl,ah + ror edx,4 + xor esi,DWORD PTR [ebx*1+ebp] + mov bl,dl + xor esi,DWORD PTR 0200h[ecx*1+ebp] + mov cl,dh + shr eax,16 + xor esi,DWORD PTR 0100h[ebx*1+ebp] + mov bl,ah + shr edx,16 + xor esi,DWORD PTR 0300h[ecx*1+ebp] + mov cl,dh + and eax,0ffh + and edx,0ffh + xor esi,DWORD PTR 0600h[ebx*1+ebp] + xor esi,DWORD PTR 0700h[ecx*1+ebp] + mov ecx,DWORD PTR [esp] + xor esi,DWORD PTR 0400h[eax*1+ebp] + xor esi,DWORD PTR 0500h[edx*1+ebp] + ; Round 4 + mov eax,DWORD PTR 32[ecx] + xor ebx,ebx + mov edx,DWORD PTR 36[ecx] + xor eax,esi + xor ecx,ecx + xor edx,esi + and eax,0fcfcfcfch + and edx,0cfcfcfcfh + mov bl,al + mov cl,ah + ror edx,4 + xor edi,DWORD PTR [ebx*1+ebp] + mov bl,dl + xor edi,DWORD PTR 0200h[ecx*1+ebp] + mov cl,dh + shr eax,16 + xor edi,DWORD PTR 0100h[ebx*1+ebp] + mov bl,ah + shr edx,16 + xor edi,DWORD PTR 0300h[ecx*1+ebp] + mov cl,dh + and eax,0ffh + and edx,0ffh + xor edi,DWORD PTR 0600h[ebx*1+ebp] + xor edi,DWORD PTR 0700h[ecx*1+ebp] + mov ecx,DWORD PTR [esp] + xor edi,DWORD PTR 0400h[eax*1+ebp] + xor edi,DWORD PTR 0500h[edx*1+ebp] + ; Round 5 + mov eax,DWORD PTR 40[ecx] + xor ebx,ebx + mov edx,DWORD PTR 44[ecx] + xor eax,edi + xor ecx,ecx + xor edx,edi + and eax,0fcfcfcfch + and edx,0cfcfcfcfh + mov bl,al + mov cl,ah + ror edx,4 + xor esi,DWORD PTR [ebx*1+ebp] + mov bl,dl + xor esi,DWORD PTR 0200h[ecx*1+ebp] + mov cl,dh + shr eax,16 + xor esi,DWORD PTR 0100h[ebx*1+ebp] + mov bl,ah + shr edx,16 + xor esi,DWORD PTR 0300h[ecx*1+ebp] + mov cl,dh + and eax,0ffh + and edx,0ffh + xor esi,DWORD PTR 0600h[ebx*1+ebp] + xor esi,DWORD PTR 0700h[ecx*1+ebp] + mov ecx,DWORD PTR [esp] + xor esi,DWORD PTR 0400h[eax*1+ebp] + xor esi,DWORD PTR 0500h[edx*1+ebp] + ; Round 6 + mov eax,DWORD PTR 48[ecx] + xor ebx,ebx + mov edx,DWORD PTR 52[ecx] + xor eax,esi + xor ecx,ecx + xor edx,esi + and eax,0fcfcfcfch + and edx,0cfcfcfcfh + mov bl,al + mov cl,ah + ror edx,4 + xor edi,DWORD PTR [ebx*1+ebp] + mov bl,dl + xor edi,DWORD PTR 0200h[ecx*1+ebp] + mov cl,dh + shr eax,16 + xor edi,DWORD PTR 0100h[ebx*1+ebp] + mov bl,ah + shr edx,16 + xor edi,DWORD PTR 0300h[ecx*1+ebp] + mov cl,dh + and eax,0ffh + and edx,0ffh + xor edi,DWORD PTR 0600h[ebx*1+ebp] + xor edi,DWORD PTR 0700h[ecx*1+ebp] + mov ecx,DWORD PTR [esp] + xor edi,DWORD PTR 0400h[eax*1+ebp] + xor edi,DWORD PTR 0500h[edx*1+ebp] + ; Round 7 + mov eax,DWORD PTR 56[ecx] + xor ebx,ebx + mov edx,DWORD PTR 60[ecx] + xor eax,edi + xor ecx,ecx + xor edx,edi + and eax,0fcfcfcfch + and edx,0cfcfcfcfh + mov bl,al + mov cl,ah + ror edx,4 + xor esi,DWORD PTR [ebx*1+ebp] + mov bl,dl + xor esi,DWORD PTR 0200h[ecx*1+ebp] + mov cl,dh + shr eax,16 + xor esi,DWORD PTR 0100h[ebx*1+ebp] + mov bl,ah + shr edx,16 + xor esi,DWORD PTR 0300h[ecx*1+ebp] + mov cl,dh + and eax,0ffh + and edx,0ffh + xor esi,DWORD PTR 0600h[ebx*1+ebp] + xor esi,DWORD PTR 0700h[ecx*1+ebp] + mov ecx,DWORD PTR [esp] + xor esi,DWORD PTR 0400h[eax*1+ebp] + xor esi,DWORD PTR 0500h[edx*1+ebp] + ; Round 8 + mov eax,DWORD PTR 64[ecx] + xor ebx,ebx + mov edx,DWORD PTR 68[ecx] + xor eax,esi + xor ecx,ecx + xor edx,esi + and eax,0fcfcfcfch + and edx,0cfcfcfcfh + mov bl,al + mov cl,ah + ror edx,4 + xor edi,DWORD PTR [ebx*1+ebp] + mov bl,dl + xor edi,DWORD PTR 0200h[ecx*1+ebp] + mov cl,dh + shr eax,16 + xor edi,DWORD PTR 0100h[ebx*1+ebp] + mov bl,ah + shr edx,16 + xor edi,DWORD PTR 0300h[ecx*1+ebp] + mov cl,dh + and eax,0ffh + and edx,0ffh + xor edi,DWORD PTR 0600h[ebx*1+ebp] + xor edi,DWORD PTR 0700h[ecx*1+ebp] + mov ecx,DWORD PTR [esp] + xor edi,DWORD PTR 0400h[eax*1+ebp] + xor edi,DWORD PTR 0500h[edx*1+ebp] + ; Round 9 + mov eax,DWORD PTR 72[ecx] + xor ebx,ebx + mov edx,DWORD PTR 76[ecx] + xor eax,edi + xor ecx,ecx + xor edx,edi + and eax,0fcfcfcfch + and edx,0cfcfcfcfh + mov bl,al + mov cl,ah + ror edx,4 + xor esi,DWORD PTR [ebx*1+ebp] + mov bl,dl + xor esi,DWORD PTR 0200h[ecx*1+ebp] + mov cl,dh + shr eax,16 + xor esi,DWORD PTR 0100h[ebx*1+ebp] + mov bl,ah + shr edx,16 + xor esi,DWORD PTR 0300h[ecx*1+ebp] + mov cl,dh + and eax,0ffh + and edx,0ffh + xor esi,DWORD PTR 0600h[ebx*1+ebp] + xor esi,DWORD PTR 0700h[ecx*1+ebp] + mov ecx,DWORD PTR [esp] + xor esi,DWORD PTR 0400h[eax*1+ebp] + xor esi,DWORD PTR 0500h[edx*1+ebp] + ; Round 10 + mov eax,DWORD PTR 80[ecx] + xor ebx,ebx + mov edx,DWORD PTR 84[ecx] + xor eax,esi + xor ecx,ecx + xor edx,esi + and eax,0fcfcfcfch + and edx,0cfcfcfcfh + mov bl,al + mov cl,ah + ror edx,4 + xor edi,DWORD PTR [ebx*1+ebp] + mov bl,dl + xor edi,DWORD PTR 0200h[ecx*1+ebp] + mov cl,dh + shr eax,16 + xor edi,DWORD PTR 0100h[ebx*1+ebp] + mov bl,ah + shr edx,16 + xor edi,DWORD PTR 0300h[ecx*1+ebp] + mov cl,dh + and eax,0ffh + and edx,0ffh + xor edi,DWORD PTR 0600h[ebx*1+ebp] + xor edi,DWORD PTR 0700h[ecx*1+ebp] + mov ecx,DWORD PTR [esp] + xor edi,DWORD PTR 0400h[eax*1+ebp] + xor edi,DWORD PTR 0500h[edx*1+ebp] + ; Round 11 + mov eax,DWORD PTR 88[ecx] + xor ebx,ebx + mov edx,DWORD PTR 92[ecx] + xor eax,edi + xor ecx,ecx + xor edx,edi + and eax,0fcfcfcfch + and edx,0cfcfcfcfh + mov bl,al + mov cl,ah + ror edx,4 + xor esi,DWORD PTR [ebx*1+ebp] + mov bl,dl + xor esi,DWORD PTR 0200h[ecx*1+ebp] + mov cl,dh + shr eax,16 + xor esi,DWORD PTR 0100h[ebx*1+ebp] + mov bl,ah + shr edx,16 + xor esi,DWORD PTR 0300h[ecx*1+ebp] + mov cl,dh + and eax,0ffh + and edx,0ffh + xor esi,DWORD PTR 0600h[ebx*1+ebp] + xor esi,DWORD PTR 0700h[ecx*1+ebp] + mov ecx,DWORD PTR [esp] + xor esi,DWORD PTR 0400h[eax*1+ebp] + xor esi,DWORD PTR 0500h[edx*1+ebp] + ; Round 12 + mov eax,DWORD PTR 96[ecx] + xor ebx,ebx + mov edx,DWORD PTR 100[ecx] + xor eax,esi + xor ecx,ecx + xor edx,esi + and eax,0fcfcfcfch + and edx,0cfcfcfcfh + mov bl,al + mov cl,ah + ror edx,4 + xor edi,DWORD PTR [ebx*1+ebp] + mov bl,dl + xor edi,DWORD PTR 0200h[ecx*1+ebp] + mov cl,dh + shr eax,16 + xor edi,DWORD PTR 0100h[ebx*1+ebp] + mov bl,ah + shr edx,16 + xor edi,DWORD PTR 0300h[ecx*1+ebp] + mov cl,dh + and eax,0ffh + and edx,0ffh + xor edi,DWORD PTR 0600h[ebx*1+ebp] + xor edi,DWORD PTR 0700h[ecx*1+ebp] + mov ecx,DWORD PTR [esp] + xor edi,DWORD PTR 0400h[eax*1+ebp] + xor edi,DWORD PTR 0500h[edx*1+ebp] + ; Round 13 + mov eax,DWORD PTR 104[ecx] + xor ebx,ebx + mov edx,DWORD PTR 108[ecx] + xor eax,edi + xor ecx,ecx + xor edx,edi + and eax,0fcfcfcfch + and edx,0cfcfcfcfh + mov bl,al + mov cl,ah + ror edx,4 + xor esi,DWORD PTR [ebx*1+ebp] + mov bl,dl + xor esi,DWORD PTR 0200h[ecx*1+ebp] + mov cl,dh + shr eax,16 + xor esi,DWORD PTR 0100h[ebx*1+ebp] + mov bl,ah + shr edx,16 + xor esi,DWORD PTR 0300h[ecx*1+ebp] + mov cl,dh + and eax,0ffh + and edx,0ffh + xor esi,DWORD PTR 0600h[ebx*1+ebp] + xor esi,DWORD PTR 0700h[ecx*1+ebp] + mov ecx,DWORD PTR [esp] + xor esi,DWORD PTR 0400h[eax*1+ebp] + xor esi,DWORD PTR 0500h[edx*1+ebp] + ; Round 14 + mov eax,DWORD PTR 112[ecx] + xor ebx,ebx + mov edx,DWORD PTR 116[ecx] + xor eax,esi + xor ecx,ecx + xor edx,esi + and eax,0fcfcfcfch + and edx,0cfcfcfcfh + mov bl,al + mov cl,ah + ror edx,4 + xor edi,DWORD PTR [ebx*1+ebp] + mov bl,dl + xor edi,DWORD PTR 0200h[ecx*1+ebp] + mov cl,dh + shr eax,16 + xor edi,DWORD PTR 0100h[ebx*1+ebp] + mov bl,ah + shr edx,16 + xor edi,DWORD PTR 0300h[ecx*1+ebp] + mov cl,dh + and eax,0ffh + and edx,0ffh + xor edi,DWORD PTR 0600h[ebx*1+ebp] + xor edi,DWORD PTR 0700h[ecx*1+ebp] + mov ecx,DWORD PTR [esp] + xor edi,DWORD PTR 0400h[eax*1+ebp] + xor edi,DWORD PTR 0500h[edx*1+ebp] + ; Round 15 + mov eax,DWORD PTR 120[ecx] + xor ebx,ebx + mov edx,DWORD PTR 124[ecx] + xor eax,edi + xor ecx,ecx + xor edx,edi + and eax,0fcfcfcfch + and edx,0cfcfcfcfh + mov bl,al + mov cl,ah + ror edx,4 + xor esi,DWORD PTR [ebx*1+ebp] + mov bl,dl + xor esi,DWORD PTR 0200h[ecx*1+ebp] + mov cl,dh + shr eax,16 + xor esi,DWORD PTR 0100h[ebx*1+ebp] + mov bl,ah + shr edx,16 + xor esi,DWORD PTR 0300h[ecx*1+ebp] + mov cl,dh + and eax,0ffh + and edx,0ffh + xor esi,DWORD PTR 0600h[ebx*1+ebp] + xor esi,DWORD PTR 0700h[ecx*1+ebp] + mov ecx,DWORD PTR [esp] + xor esi,DWORD PTR 0400h[eax*1+ebp] + xor esi,DWORD PTR 0500h[edx*1+ebp] + add esp,4 + ret +__x86_DES_encrypt ENDP +ALIGN 16 +__x86_DES_decrypt PROC PRIVATE + push ecx + ; Round 15 + mov eax,DWORD PTR 120[ecx] + xor ebx,ebx + mov edx,DWORD PTR 124[ecx] + xor eax,esi + xor ecx,ecx + xor edx,esi + and eax,0fcfcfcfch + and edx,0cfcfcfcfh + mov bl,al + mov cl,ah + ror edx,4 + xor edi,DWORD PTR [ebx*1+ebp] + mov bl,dl + xor edi,DWORD PTR 0200h[ecx*1+ebp] + mov cl,dh + shr eax,16 + xor edi,DWORD PTR 0100h[ebx*1+ebp] + mov bl,ah + shr edx,16 + xor edi,DWORD PTR 0300h[ecx*1+ebp] + mov cl,dh + and eax,0ffh + and edx,0ffh + xor edi,DWORD PTR 0600h[ebx*1+ebp] + xor edi,DWORD PTR 0700h[ecx*1+ebp] + mov ecx,DWORD PTR [esp] + xor edi,DWORD PTR 0400h[eax*1+ebp] + xor edi,DWORD PTR 0500h[edx*1+ebp] + ; Round 14 + mov eax,DWORD PTR 112[ecx] + xor ebx,ebx + mov edx,DWORD PTR 116[ecx] + xor eax,edi + xor ecx,ecx + xor edx,edi + and eax,0fcfcfcfch + and edx,0cfcfcfcfh + mov bl,al + mov cl,ah + ror edx,4 + xor esi,DWORD PTR [ebx*1+ebp] + mov bl,dl + xor esi,DWORD PTR 0200h[ecx*1+ebp] + mov cl,dh + shr eax,16 + xor esi,DWORD PTR 0100h[ebx*1+ebp] + mov bl,ah + shr edx,16 + xor esi,DWORD PTR 0300h[ecx*1+ebp] + mov cl,dh + and eax,0ffh + and edx,0ffh + xor esi,DWORD PTR 0600h[ebx*1+ebp] + xor esi,DWORD PTR 0700h[ecx*1+ebp] + mov ecx,DWORD PTR [esp] + xor esi,DWORD PTR 0400h[eax*1+ebp] + xor esi,DWORD PTR 0500h[edx*1+ebp] + ; Round 13 + mov eax,DWORD PTR 104[ecx] + xor ebx,ebx + mov edx,DWORD PTR 108[ecx] + xor eax,esi + xor ecx,ecx + xor edx,esi + and eax,0fcfcfcfch + and edx,0cfcfcfcfh + mov bl,al + mov cl,ah + ror edx,4 + xor edi,DWORD PTR [ebx*1+ebp] + mov bl,dl + xor edi,DWORD PTR 0200h[ecx*1+ebp] + mov cl,dh + shr eax,16 + xor edi,DWORD PTR 0100h[ebx*1+ebp] + mov bl,ah + shr edx,16 + xor edi,DWORD PTR 0300h[ecx*1+ebp] + mov cl,dh + and eax,0ffh + and edx,0ffh + xor edi,DWORD PTR 0600h[ebx*1+ebp] + xor edi,DWORD PTR 0700h[ecx*1+ebp] + mov ecx,DWORD PTR [esp] + xor edi,DWORD PTR 0400h[eax*1+ebp] + xor edi,DWORD PTR 0500h[edx*1+ebp] + ; Round 12 + mov eax,DWORD PTR 96[ecx] + xor ebx,ebx + mov edx,DWORD PTR 100[ecx] + xor eax,edi + xor ecx,ecx + xor edx,edi + and eax,0fcfcfcfch + and edx,0cfcfcfcfh + mov bl,al + mov cl,ah + ror edx,4 + xor esi,DWORD PTR [ebx*1+ebp] + mov bl,dl + xor esi,DWORD PTR 0200h[ecx*1+ebp] + mov cl,dh + shr eax,16 + xor esi,DWORD PTR 0100h[ebx*1+ebp] + mov bl,ah + shr edx,16 + xor esi,DWORD PTR 0300h[ecx*1+ebp] + mov cl,dh + and eax,0ffh + and edx,0ffh + xor esi,DWORD PTR 0600h[ebx*1+ebp] + xor esi,DWORD PTR 0700h[ecx*1+ebp] + mov ecx,DWORD PTR [esp] + xor esi,DWORD PTR 0400h[eax*1+ebp] + xor esi,DWORD PTR 0500h[edx*1+ebp] + ; Round 11 + mov eax,DWORD PTR 88[ecx] + xor ebx,ebx + mov edx,DWORD PTR 92[ecx] + xor eax,esi + xor ecx,ecx + xor edx,esi + and eax,0fcfcfcfch + and edx,0cfcfcfcfh + mov bl,al + mov cl,ah + ror edx,4 + xor edi,DWORD PTR [ebx*1+ebp] + mov bl,dl + xor edi,DWORD PTR 0200h[ecx*1+ebp] + mov cl,dh + shr eax,16 + xor edi,DWORD PTR 0100h[ebx*1+ebp] + mov bl,ah + shr edx,16 + xor edi,DWORD PTR 0300h[ecx*1+ebp] + mov cl,dh + and eax,0ffh + and edx,0ffh + xor edi,DWORD PTR 0600h[ebx*1+ebp] + xor edi,DWORD PTR 0700h[ecx*1+ebp] + mov ecx,DWORD PTR [esp] + xor edi,DWORD PTR 0400h[eax*1+ebp] + xor edi,DWORD PTR 0500h[edx*1+ebp] + ; Round 10 + mov eax,DWORD PTR 80[ecx] + xor ebx,ebx + mov edx,DWORD PTR 84[ecx] + xor eax,edi + xor ecx,ecx + xor edx,edi + and eax,0fcfcfcfch + and edx,0cfcfcfcfh + mov bl,al + mov cl,ah + ror edx,4 + xor esi,DWORD PTR [ebx*1+ebp] + mov bl,dl + xor esi,DWORD PTR 0200h[ecx*1+ebp] + mov cl,dh + shr eax,16 + xor esi,DWORD PTR 0100h[ebx*1+ebp] + mov bl,ah + shr edx,16 + xor esi,DWORD PTR 0300h[ecx*1+ebp] + mov cl,dh + and eax,0ffh + and edx,0ffh + xor esi,DWORD PTR 0600h[ebx*1+ebp] + xor esi,DWORD PTR 0700h[ecx*1+ebp] + mov ecx,DWORD PTR [esp] + xor esi,DWORD PTR 0400h[eax*1+ebp] + xor esi,DWORD PTR 0500h[edx*1+ebp] + ; Round 9 + mov eax,DWORD PTR 72[ecx] + xor ebx,ebx + mov edx,DWORD PTR 76[ecx] + xor eax,esi + xor ecx,ecx + xor edx,esi + and eax,0fcfcfcfch + and edx,0cfcfcfcfh + mov bl,al + mov cl,ah + ror edx,4 + xor edi,DWORD PTR [ebx*1+ebp] + mov bl,dl + xor edi,DWORD PTR 0200h[ecx*1+ebp] + mov cl,dh + shr eax,16 + xor edi,DWORD PTR 0100h[ebx*1+ebp] + mov bl,ah + shr edx,16 + xor edi,DWORD PTR 0300h[ecx*1+ebp] + mov cl,dh + and eax,0ffh + and edx,0ffh + xor edi,DWORD PTR 0600h[ebx*1+ebp] + xor edi,DWORD PTR 0700h[ecx*1+ebp] + mov ecx,DWORD PTR [esp] + xor edi,DWORD PTR 0400h[eax*1+ebp] + xor edi,DWORD PTR 0500h[edx*1+ebp] + ; Round 8 + mov eax,DWORD PTR 64[ecx] + xor ebx,ebx + mov edx,DWORD PTR 68[ecx] + xor eax,edi + xor ecx,ecx + xor edx,edi + and eax,0fcfcfcfch + and edx,0cfcfcfcfh + mov bl,al + mov cl,ah + ror edx,4 + xor esi,DWORD PTR [ebx*1+ebp] + mov bl,dl + xor esi,DWORD PTR 0200h[ecx*1+ebp] + mov cl,dh + shr eax,16 + xor esi,DWORD PTR 0100h[ebx*1+ebp] + mov bl,ah + shr edx,16 + xor esi,DWORD PTR 0300h[ecx*1+ebp] + mov cl,dh + and eax,0ffh + and edx,0ffh + xor esi,DWORD PTR 0600h[ebx*1+ebp] + xor esi,DWORD PTR 0700h[ecx*1+ebp] + mov ecx,DWORD PTR [esp] + xor esi,DWORD PTR 0400h[eax*1+ebp] + xor esi,DWORD PTR 0500h[edx*1+ebp] + ; Round 7 + mov eax,DWORD PTR 56[ecx] + xor ebx,ebx + mov edx,DWORD PTR 60[ecx] + xor eax,esi + xor ecx,ecx + xor edx,esi + and eax,0fcfcfcfch + and edx,0cfcfcfcfh + mov bl,al + mov cl,ah + ror edx,4 + xor edi,DWORD PTR [ebx*1+ebp] + mov bl,dl + xor edi,DWORD PTR 0200h[ecx*1+ebp] + mov cl,dh + shr eax,16 + xor edi,DWORD PTR 0100h[ebx*1+ebp] + mov bl,ah + shr edx,16 + xor edi,DWORD PTR 0300h[ecx*1+ebp] + mov cl,dh + and eax,0ffh + and edx,0ffh + xor edi,DWORD PTR 0600h[ebx*1+ebp] + xor edi,DWORD PTR 0700h[ecx*1+ebp] + mov ecx,DWORD PTR [esp] + xor edi,DWORD PTR 0400h[eax*1+ebp] + xor edi,DWORD PTR 0500h[edx*1+ebp] + ; Round 6 + mov eax,DWORD PTR 48[ecx] + xor ebx,ebx + mov edx,DWORD PTR 52[ecx] + xor eax,edi + xor ecx,ecx + xor edx,edi + and eax,0fcfcfcfch + and edx,0cfcfcfcfh + mov bl,al + mov cl,ah + ror edx,4 + xor esi,DWORD PTR [ebx*1+ebp] + mov bl,dl + xor esi,DWORD PTR 0200h[ecx*1+ebp] + mov cl,dh + shr eax,16 + xor esi,DWORD PTR 0100h[ebx*1+ebp] + mov bl,ah + shr edx,16 + xor esi,DWORD PTR 0300h[ecx*1+ebp] + mov cl,dh + and eax,0ffh + and edx,0ffh + xor esi,DWORD PTR 0600h[ebx*1+ebp] + xor esi,DWORD PTR 0700h[ecx*1+ebp] + mov ecx,DWORD PTR [esp] + xor esi,DWORD PTR 0400h[eax*1+ebp] + xor esi,DWORD PTR 0500h[edx*1+ebp] + ; Round 5 + mov eax,DWORD PTR 40[ecx] + xor ebx,ebx + mov edx,DWORD PTR 44[ecx] + xor eax,esi + xor ecx,ecx + xor edx,esi + and eax,0fcfcfcfch + and edx,0cfcfcfcfh + mov bl,al + mov cl,ah + ror edx,4 + xor edi,DWORD PTR [ebx*1+ebp] + mov bl,dl + xor edi,DWORD PTR 0200h[ecx*1+ebp] + mov cl,dh + shr eax,16 + xor edi,DWORD PTR 0100h[ebx*1+ebp] + mov bl,ah + shr edx,16 + xor edi,DWORD PTR 0300h[ecx*1+ebp] + mov cl,dh + and eax,0ffh + and edx,0ffh + xor edi,DWORD PTR 0600h[ebx*1+ebp] + xor edi,DWORD PTR 0700h[ecx*1+ebp] + mov ecx,DWORD PTR [esp] + xor edi,DWORD PTR 0400h[eax*1+ebp] + xor edi,DWORD PTR 0500h[edx*1+ebp] + ; Round 4 + mov eax,DWORD PTR 32[ecx] + xor ebx,ebx + mov edx,DWORD PTR 36[ecx] + xor eax,edi + xor ecx,ecx + xor edx,edi + and eax,0fcfcfcfch + and edx,0cfcfcfcfh + mov bl,al + mov cl,ah + ror edx,4 + xor esi,DWORD PTR [ebx*1+ebp] + mov bl,dl + xor esi,DWORD PTR 0200h[ecx*1+ebp] + mov cl,dh + shr eax,16 + xor esi,DWORD PTR 0100h[ebx*1+ebp] + mov bl,ah + shr edx,16 + xor esi,DWORD PTR 0300h[ecx*1+ebp] + mov cl,dh + and eax,0ffh + and edx,0ffh + xor esi,DWORD PTR 0600h[ebx*1+ebp] + xor esi,DWORD PTR 0700h[ecx*1+ebp] + mov ecx,DWORD PTR [esp] + xor esi,DWORD PTR 0400h[eax*1+ebp] + xor esi,DWORD PTR 0500h[edx*1+ebp] + ; Round 3 + mov eax,DWORD PTR 24[ecx] + xor ebx,ebx + mov edx,DWORD PTR 28[ecx] + xor eax,esi + xor ecx,ecx + xor edx,esi + and eax,0fcfcfcfch + and edx,0cfcfcfcfh + mov bl,al + mov cl,ah + ror edx,4 + xor edi,DWORD PTR [ebx*1+ebp] + mov bl,dl + xor edi,DWORD PTR 0200h[ecx*1+ebp] + mov cl,dh + shr eax,16 + xor edi,DWORD PTR 0100h[ebx*1+ebp] + mov bl,ah + shr edx,16 + xor edi,DWORD PTR 0300h[ecx*1+ebp] + mov cl,dh + and eax,0ffh + and edx,0ffh + xor edi,DWORD PTR 0600h[ebx*1+ebp] + xor edi,DWORD PTR 0700h[ecx*1+ebp] + mov ecx,DWORD PTR [esp] + xor edi,DWORD PTR 0400h[eax*1+ebp] + xor edi,DWORD PTR 0500h[edx*1+ebp] + ; Round 2 + mov eax,DWORD PTR 16[ecx] + xor ebx,ebx + mov edx,DWORD PTR 20[ecx] + xor eax,edi + xor ecx,ecx + xor edx,edi + and eax,0fcfcfcfch + and edx,0cfcfcfcfh + mov bl,al + mov cl,ah + ror edx,4 + xor esi,DWORD PTR [ebx*1+ebp] + mov bl,dl + xor esi,DWORD PTR 0200h[ecx*1+ebp] + mov cl,dh + shr eax,16 + xor esi,DWORD PTR 0100h[ebx*1+ebp] + mov bl,ah + shr edx,16 + xor esi,DWORD PTR 0300h[ecx*1+ebp] + mov cl,dh + and eax,0ffh + and edx,0ffh + xor esi,DWORD PTR 0600h[ebx*1+ebp] + xor esi,DWORD PTR 0700h[ecx*1+ebp] + mov ecx,DWORD PTR [esp] + xor esi,DWORD PTR 0400h[eax*1+ebp] + xor esi,DWORD PTR 0500h[edx*1+ebp] + ; Round 1 + mov eax,DWORD PTR 8[ecx] + xor ebx,ebx + mov edx,DWORD PTR 12[ecx] + xor eax,esi + xor ecx,ecx + xor edx,esi + and eax,0fcfcfcfch + and edx,0cfcfcfcfh + mov bl,al + mov cl,ah + ror edx,4 + xor edi,DWORD PTR [ebx*1+ebp] + mov bl,dl + xor edi,DWORD PTR 0200h[ecx*1+ebp] + mov cl,dh + shr eax,16 + xor edi,DWORD PTR 0100h[ebx*1+ebp] + mov bl,ah + shr edx,16 + xor edi,DWORD PTR 0300h[ecx*1+ebp] + mov cl,dh + and eax,0ffh + and edx,0ffh + xor edi,DWORD PTR 0600h[ebx*1+ebp] + xor edi,DWORD PTR 0700h[ecx*1+ebp] + mov ecx,DWORD PTR [esp] + xor edi,DWORD PTR 0400h[eax*1+ebp] + xor edi,DWORD PTR 0500h[edx*1+ebp] + ; Round 0 + mov eax,DWORD PTR [ecx] + xor ebx,ebx + mov edx,DWORD PTR 4[ecx] + xor eax,edi + xor ecx,ecx + xor edx,edi + and eax,0fcfcfcfch + and edx,0cfcfcfcfh + mov bl,al + mov cl,ah + ror edx,4 + xor esi,DWORD PTR [ebx*1+ebp] + mov bl,dl + xor esi,DWORD PTR 0200h[ecx*1+ebp] + mov cl,dh + shr eax,16 + xor esi,DWORD PTR 0100h[ebx*1+ebp] + mov bl,ah + shr edx,16 + xor esi,DWORD PTR 0300h[ecx*1+ebp] + mov cl,dh + and eax,0ffh + and edx,0ffh + xor esi,DWORD PTR 0600h[ebx*1+ebp] + xor esi,DWORD PTR 0700h[ecx*1+ebp] + mov ecx,DWORD PTR [esp] + xor esi,DWORD PTR 0400h[eax*1+ebp] + xor esi,DWORD PTR 0500h[edx*1+ebp] + add esp,4 + ret +__x86_DES_decrypt ENDP +ALIGN 16 +_DES_encrypt1 PROC PUBLIC +$L_DES_encrypt1_begin:: + push esi + push edi + ; + ; Load the 2 words + mov esi,DWORD PTR 12[esp] + xor ecx,ecx + push ebx + push ebp + mov eax,DWORD PTR [esi] + mov ebx,DWORD PTR 28[esp] + mov edi,DWORD PTR 4[esi] + ; + ; IP + rol eax,4 + mov esi,eax + xor eax,edi + and eax,0f0f0f0f0h + xor esi,eax + xor edi,eax + ; + rol edi,20 + mov eax,edi + xor edi,esi + and edi,0fff0000fh + xor eax,edi + xor esi,edi + ; + rol eax,14 + mov edi,eax + xor eax,esi + and eax,033333333h + xor edi,eax + xor esi,eax + ; + rol esi,22 + mov eax,esi + xor esi,edi + and esi,003fc03fch + xor eax,esi + xor edi,esi + ; + rol eax,9 + mov esi,eax + xor eax,edi + and eax,0aaaaaaaah + xor esi,eax + xor edi,eax + ; + rol edi,1 + call $L000pic_point +$L000pic_point: + pop ebp + lea ebp,DWORD PTR ($Ldes_sptrans-$L000pic_point)[ebp] + mov ecx,DWORD PTR 24[esp] + cmp ebx,0 + je $L001decrypt + call __x86_DES_encrypt + jmp $L002done +$L001decrypt: + call __x86_DES_decrypt +$L002done: + ; + ; FP + mov edx,DWORD PTR 20[esp] + ror esi,1 + mov eax,edi + xor edi,esi + and edi,0aaaaaaaah + xor eax,edi + xor esi,edi + ; + rol eax,23 + mov edi,eax + xor eax,esi + and eax,003fc03fch + xor edi,eax + xor esi,eax + ; + rol edi,10 + mov eax,edi + xor edi,esi + and edi,033333333h + xor eax,edi + xor esi,edi + ; + rol esi,18 + mov edi,esi + xor esi,eax + and esi,0fff0000fh + xor edi,esi + xor eax,esi + ; + rol edi,12 + mov esi,edi + xor edi,eax + and edi,0f0f0f0f0h + xor esi,edi + xor eax,edi + ; + ror eax,4 + mov DWORD PTR [edx],eax + mov DWORD PTR 4[edx],esi + pop ebp + pop ebx + pop edi + pop esi + ret +_DES_encrypt1 ENDP +ALIGN 16 +_DES_encrypt2 PROC PUBLIC +$L_DES_encrypt2_begin:: + push esi + push edi + ; + ; Load the 2 words + mov eax,DWORD PTR 12[esp] + xor ecx,ecx + push ebx + push ebp + mov esi,DWORD PTR [eax] + mov ebx,DWORD PTR 28[esp] + rol esi,3 + mov edi,DWORD PTR 4[eax] + rol edi,3 + call $L003pic_point +$L003pic_point: + pop ebp + lea ebp,DWORD PTR ($Ldes_sptrans-$L003pic_point)[ebp] + mov ecx,DWORD PTR 24[esp] + cmp ebx,0 + je $L004decrypt + call __x86_DES_encrypt + jmp $L005done +$L004decrypt: + call __x86_DES_decrypt +$L005done: + ; + ; Fixup + ror edi,3 + mov eax,DWORD PTR 20[esp] + ror esi,3 + mov DWORD PTR [eax],edi + mov DWORD PTR 4[eax],esi + pop ebp + pop ebx + pop edi + pop esi + ret +_DES_encrypt2 ENDP +ALIGN 16 +_DES_encrypt3 PROC PUBLIC +$L_DES_encrypt3_begin:: + push ebx + mov ebx,DWORD PTR 8[esp] + push ebp + push esi + push edi + ; + ; Load the data words + mov edi,DWORD PTR [ebx] + mov esi,DWORD PTR 4[ebx] + sub esp,12 + ; + ; IP + rol edi,4 + mov edx,edi + xor edi,esi + and edi,0f0f0f0f0h + xor edx,edi + xor esi,edi + ; + rol esi,20 + mov edi,esi + xor esi,edx + and esi,0fff0000fh + xor edi,esi + xor edx,esi + ; + rol edi,14 + mov esi,edi + xor edi,edx + and edi,033333333h + xor esi,edi + xor edx,edi + ; + rol edx,22 + mov edi,edx + xor edx,esi + and edx,003fc03fch + xor edi,edx + xor esi,edx + ; + rol edi,9 + mov edx,edi + xor edi,esi + and edi,0aaaaaaaah + xor edx,edi + xor esi,edi + ; + ror edx,3 + ror esi,2 + mov DWORD PTR 4[ebx],esi + mov eax,DWORD PTR 36[esp] + mov DWORD PTR [ebx],edx + mov edi,DWORD PTR 40[esp] + mov esi,DWORD PTR 44[esp] + mov DWORD PTR 8[esp],1 + mov DWORD PTR 4[esp],eax + mov DWORD PTR [esp],ebx + call $L_DES_encrypt2_begin + mov DWORD PTR 8[esp],0 + mov DWORD PTR 4[esp],edi + mov DWORD PTR [esp],ebx + call $L_DES_encrypt2_begin + mov DWORD PTR 8[esp],1 + mov DWORD PTR 4[esp],esi + mov DWORD PTR [esp],ebx + call $L_DES_encrypt2_begin + add esp,12 + mov edi,DWORD PTR [ebx] + mov esi,DWORD PTR 4[ebx] + ; + ; FP + rol esi,2 + rol edi,3 + mov eax,edi + xor edi,esi + and edi,0aaaaaaaah + xor eax,edi + xor esi,edi + ; + rol eax,23 + mov edi,eax + xor eax,esi + and eax,003fc03fch + xor edi,eax + xor esi,eax + ; + rol edi,10 + mov eax,edi + xor edi,esi + and edi,033333333h + xor eax,edi + xor esi,edi + ; + rol esi,18 + mov edi,esi + xor esi,eax + and esi,0fff0000fh + xor edi,esi + xor eax,esi + ; + rol edi,12 + mov esi,edi + xor edi,eax + and edi,0f0f0f0f0h + xor esi,edi + xor eax,edi + ; + ror eax,4 + mov DWORD PTR [ebx],eax + mov DWORD PTR 4[ebx],esi + pop edi + pop esi + pop ebp + pop ebx + ret +_DES_encrypt3 ENDP +ALIGN 16 +_DES_decrypt3 PROC PUBLIC +$L_DES_decrypt3_begin:: + push ebx + mov ebx,DWORD PTR 8[esp] + push ebp + push esi + push edi + ; + ; Load the data words + mov edi,DWORD PTR [ebx] + mov esi,DWORD PTR 4[ebx] + sub esp,12 + ; + ; IP + rol edi,4 + mov edx,edi + xor edi,esi + and edi,0f0f0f0f0h + xor edx,edi + xor esi,edi + ; + rol esi,20 + mov edi,esi + xor esi,edx + and esi,0fff0000fh + xor edi,esi + xor edx,esi + ; + rol edi,14 + mov esi,edi + xor edi,edx + and edi,033333333h + xor esi,edi + xor edx,edi + ; + rol edx,22 + mov edi,edx + xor edx,esi + and edx,003fc03fch + xor edi,edx + xor esi,edx + ; + rol edi,9 + mov edx,edi + xor edi,esi + and edi,0aaaaaaaah + xor edx,edi + xor esi,edi + ; + ror edx,3 + ror esi,2 + mov DWORD PTR 4[ebx],esi + mov esi,DWORD PTR 36[esp] + mov DWORD PTR [ebx],edx + mov edi,DWORD PTR 40[esp] + mov eax,DWORD PTR 44[esp] + mov DWORD PTR 8[esp],0 + mov DWORD PTR 4[esp],eax + mov DWORD PTR [esp],ebx + call $L_DES_encrypt2_begin + mov DWORD PTR 8[esp],1 + mov DWORD PTR 4[esp],edi + mov DWORD PTR [esp],ebx + call $L_DES_encrypt2_begin + mov DWORD PTR 8[esp],0 + mov DWORD PTR 4[esp],esi + mov DWORD PTR [esp],ebx + call $L_DES_encrypt2_begin + add esp,12 + mov edi,DWORD PTR [ebx] + mov esi,DWORD PTR 4[ebx] + ; + ; FP + rol esi,2 + rol edi,3 + mov eax,edi + xor edi,esi + and edi,0aaaaaaaah + xor eax,edi + xor esi,edi + ; + rol eax,23 + mov edi,eax + xor eax,esi + and eax,003fc03fch + xor edi,eax + xor esi,eax + ; + rol edi,10 + mov eax,edi + xor edi,esi + and edi,033333333h + xor eax,edi + xor esi,edi + ; + rol esi,18 + mov edi,esi + xor esi,eax + and esi,0fff0000fh + xor edi,esi + xor eax,esi + ; + rol edi,12 + mov esi,edi + xor edi,eax + and edi,0f0f0f0f0h + xor esi,edi + xor eax,edi + ; + ror eax,4 + mov DWORD PTR [ebx],eax + mov DWORD PTR 4[ebx],esi + pop edi + pop esi + pop ebp + pop ebx + ret +_DES_decrypt3 ENDP +ALIGN 16 +_DES_ncbc_encrypt PROC PUBLIC +$L_DES_ncbc_encrypt_begin:: + ; + push ebp + push ebx + push esi + push edi + mov ebp,DWORD PTR 28[esp] + ; getting iv ptr from parameter 4 + mov ebx,DWORD PTR 36[esp] + mov esi,DWORD PTR [ebx] + mov edi,DWORD PTR 4[ebx] + push edi + push esi + push edi + push esi + mov ebx,esp + mov esi,DWORD PTR 36[esp] + mov edi,DWORD PTR 40[esp] + ; getting encrypt flag from parameter 5 + mov ecx,DWORD PTR 56[esp] + ; get and push parameter 5 + push ecx + ; get and push parameter 3 + mov eax,DWORD PTR 52[esp] + push eax + push ebx + cmp ecx,0 + jz $L006decrypt + and ebp,4294967288 + mov eax,DWORD PTR 12[esp] + mov ebx,DWORD PTR 16[esp] + jz $L007encrypt_finish +$L008encrypt_loop: + mov ecx,DWORD PTR [esi] + mov edx,DWORD PTR 4[esi] + xor eax,ecx + xor ebx,edx + mov DWORD PTR 12[esp],eax + mov DWORD PTR 16[esp],ebx + call $L_DES_encrypt1_begin + mov eax,DWORD PTR 12[esp] + mov ebx,DWORD PTR 16[esp] + mov DWORD PTR [edi],eax + mov DWORD PTR 4[edi],ebx + add esi,8 + add edi,8 + sub ebp,8 + jnz $L008encrypt_loop +$L007encrypt_finish: + mov ebp,DWORD PTR 56[esp] + and ebp,7 + jz $L009finish + call $L010PIC_point +$L010PIC_point: + pop edx + lea ecx,DWORD PTR ($L011cbc_enc_jmp_table-$L010PIC_point)[edx] + mov ebp,DWORD PTR [ebp*4+ecx] + add ebp,edx + xor ecx,ecx + xor edx,edx + jmp ebp +$L012ej7: + mov dh,BYTE PTR 6[esi] + shl edx,8 +$L013ej6: + mov dh,BYTE PTR 5[esi] +$L014ej5: + mov dl,BYTE PTR 4[esi] +$L015ej4: + mov ecx,DWORD PTR [esi] + jmp $L016ejend +$L017ej3: + mov ch,BYTE PTR 2[esi] + shl ecx,8 +$L018ej2: + mov ch,BYTE PTR 1[esi] +$L019ej1: + mov cl,BYTE PTR [esi] +$L016ejend: + xor eax,ecx + xor ebx,edx + mov DWORD PTR 12[esp],eax + mov DWORD PTR 16[esp],ebx + call $L_DES_encrypt1_begin + mov eax,DWORD PTR 12[esp] + mov ebx,DWORD PTR 16[esp] + mov DWORD PTR [edi],eax + mov DWORD PTR 4[edi],ebx + jmp $L009finish +$L006decrypt: + and ebp,4294967288 + mov eax,DWORD PTR 20[esp] + mov ebx,DWORD PTR 24[esp] + jz $L020decrypt_finish +$L021decrypt_loop: + mov eax,DWORD PTR [esi] + mov ebx,DWORD PTR 4[esi] + mov DWORD PTR 12[esp],eax + mov DWORD PTR 16[esp],ebx + call $L_DES_encrypt1_begin + mov eax,DWORD PTR 12[esp] + mov ebx,DWORD PTR 16[esp] + mov ecx,DWORD PTR 20[esp] + mov edx,DWORD PTR 24[esp] + xor ecx,eax + xor edx,ebx + mov eax,DWORD PTR [esi] + mov ebx,DWORD PTR 4[esi] + mov DWORD PTR [edi],ecx + mov DWORD PTR 4[edi],edx + mov DWORD PTR 20[esp],eax + mov DWORD PTR 24[esp],ebx + add esi,8 + add edi,8 + sub ebp,8 + jnz $L021decrypt_loop +$L020decrypt_finish: + mov ebp,DWORD PTR 56[esp] + and ebp,7 + jz $L009finish + mov eax,DWORD PTR [esi] + mov ebx,DWORD PTR 4[esi] + mov DWORD PTR 12[esp],eax + mov DWORD PTR 16[esp],ebx + call $L_DES_encrypt1_begin + mov eax,DWORD PTR 12[esp] + mov ebx,DWORD PTR 16[esp] + mov ecx,DWORD PTR 20[esp] + mov edx,DWORD PTR 24[esp] + xor ecx,eax + xor edx,ebx + mov eax,DWORD PTR [esi] + mov ebx,DWORD PTR 4[esi] +$L022dj7: + ror edx,16 + mov BYTE PTR 6[edi],dl + shr edx,16 +$L023dj6: + mov BYTE PTR 5[edi],dh +$L024dj5: + mov BYTE PTR 4[edi],dl +$L025dj4: + mov DWORD PTR [edi],ecx + jmp $L026djend +$L027dj3: + ror ecx,16 + mov BYTE PTR 2[edi],cl + shl ecx,16 +$L028dj2: + mov BYTE PTR 1[esi],ch +$L029dj1: + mov BYTE PTR [esi],cl +$L026djend: + jmp $L009finish +$L009finish: + mov ecx,DWORD PTR 64[esp] + add esp,28 + mov DWORD PTR [ecx],eax + mov DWORD PTR 4[ecx],ebx + pop edi + pop esi + pop ebx + pop ebp + ret +ALIGN 64 +$L011cbc_enc_jmp_table: +DD 0 +DD $L019ej1-$L010PIC_point +DD $L018ej2-$L010PIC_point +DD $L017ej3-$L010PIC_point +DD $L015ej4-$L010PIC_point +DD $L014ej5-$L010PIC_point +DD $L013ej6-$L010PIC_point +DD $L012ej7-$L010PIC_point +ALIGN 64 +_DES_ncbc_encrypt ENDP +ALIGN 16 +_DES_ede3_cbc_encrypt PROC PUBLIC +$L_DES_ede3_cbc_encrypt_begin:: + ; + push ebp + push ebx + push esi + push edi + mov ebp,DWORD PTR 28[esp] + ; getting iv ptr from parameter 6 + mov ebx,DWORD PTR 44[esp] + mov esi,DWORD PTR [ebx] + mov edi,DWORD PTR 4[ebx] + push edi + push esi + push edi + push esi + mov ebx,esp + mov esi,DWORD PTR 36[esp] + mov edi,DWORD PTR 40[esp] + ; getting encrypt flag from parameter 7 + mov ecx,DWORD PTR 64[esp] + ; get and push parameter 5 + mov eax,DWORD PTR 56[esp] + push eax + ; get and push parameter 4 + mov eax,DWORD PTR 56[esp] + push eax + ; get and push parameter 3 + mov eax,DWORD PTR 56[esp] + push eax + push ebx + cmp ecx,0 + jz $L030decrypt + and ebp,4294967288 + mov eax,DWORD PTR 16[esp] + mov ebx,DWORD PTR 20[esp] + jz $L031encrypt_finish +$L032encrypt_loop: + mov ecx,DWORD PTR [esi] + mov edx,DWORD PTR 4[esi] + xor eax,ecx + xor ebx,edx + mov DWORD PTR 16[esp],eax + mov DWORD PTR 20[esp],ebx + call $L_DES_encrypt3_begin + mov eax,DWORD PTR 16[esp] + mov ebx,DWORD PTR 20[esp] + mov DWORD PTR [edi],eax + mov DWORD PTR 4[edi],ebx + add esi,8 + add edi,8 + sub ebp,8 + jnz $L032encrypt_loop +$L031encrypt_finish: + mov ebp,DWORD PTR 60[esp] + and ebp,7 + jz $L033finish + call $L034PIC_point +$L034PIC_point: + pop edx + lea ecx,DWORD PTR ($L035cbc_enc_jmp_table-$L034PIC_point)[edx] + mov ebp,DWORD PTR [ebp*4+ecx] + add ebp,edx + xor ecx,ecx + xor edx,edx + jmp ebp +$L036ej7: + mov dh,BYTE PTR 6[esi] + shl edx,8 +$L037ej6: + mov dh,BYTE PTR 5[esi] +$L038ej5: + mov dl,BYTE PTR 4[esi] +$L039ej4: + mov ecx,DWORD PTR [esi] + jmp $L040ejend +$L041ej3: + mov ch,BYTE PTR 2[esi] + shl ecx,8 +$L042ej2: + mov ch,BYTE PTR 1[esi] +$L043ej1: + mov cl,BYTE PTR [esi] +$L040ejend: + xor eax,ecx + xor ebx,edx + mov DWORD PTR 16[esp],eax + mov DWORD PTR 20[esp],ebx + call $L_DES_encrypt3_begin + mov eax,DWORD PTR 16[esp] + mov ebx,DWORD PTR 20[esp] + mov DWORD PTR [edi],eax + mov DWORD PTR 4[edi],ebx + jmp $L033finish +$L030decrypt: + and ebp,4294967288 + mov eax,DWORD PTR 24[esp] + mov ebx,DWORD PTR 28[esp] + jz $L044decrypt_finish +$L045decrypt_loop: + mov eax,DWORD PTR [esi] + mov ebx,DWORD PTR 4[esi] + mov DWORD PTR 16[esp],eax + mov DWORD PTR 20[esp],ebx + call $L_DES_decrypt3_begin + mov eax,DWORD PTR 16[esp] + mov ebx,DWORD PTR 20[esp] + mov ecx,DWORD PTR 24[esp] + mov edx,DWORD PTR 28[esp] + xor ecx,eax + xor edx,ebx + mov eax,DWORD PTR [esi] + mov ebx,DWORD PTR 4[esi] + mov DWORD PTR [edi],ecx + mov DWORD PTR 4[edi],edx + mov DWORD PTR 24[esp],eax + mov DWORD PTR 28[esp],ebx + add esi,8 + add edi,8 + sub ebp,8 + jnz $L045decrypt_loop +$L044decrypt_finish: + mov ebp,DWORD PTR 60[esp] + and ebp,7 + jz $L033finish + mov eax,DWORD PTR [esi] + mov ebx,DWORD PTR 4[esi] + mov DWORD PTR 16[esp],eax + mov DWORD PTR 20[esp],ebx + call $L_DES_decrypt3_begin + mov eax,DWORD PTR 16[esp] + mov ebx,DWORD PTR 20[esp] + mov ecx,DWORD PTR 24[esp] + mov edx,DWORD PTR 28[esp] + xor ecx,eax + xor edx,ebx + mov eax,DWORD PTR [esi] + mov ebx,DWORD PTR 4[esi] +$L046dj7: + ror edx,16 + mov BYTE PTR 6[edi],dl + shr edx,16 +$L047dj6: + mov BYTE PTR 5[edi],dh +$L048dj5: + mov BYTE PTR 4[edi],dl +$L049dj4: + mov DWORD PTR [edi],ecx + jmp $L050djend +$L051dj3: + ror ecx,16 + mov BYTE PTR 2[edi],cl + shl ecx,16 +$L052dj2: + mov BYTE PTR 1[esi],ch +$L053dj1: + mov BYTE PTR [esi],cl +$L050djend: + jmp $L033finish +$L033finish: + mov ecx,DWORD PTR 76[esp] + add esp,32 + mov DWORD PTR [ecx],eax + mov DWORD PTR 4[ecx],ebx + pop edi + pop esi + pop ebx + pop ebp + ret +ALIGN 64 +$L035cbc_enc_jmp_table: +DD 0 +DD $L043ej1-$L034PIC_point +DD $L042ej2-$L034PIC_point +DD $L041ej3-$L034PIC_point +DD $L039ej4-$L034PIC_point +DD $L038ej5-$L034PIC_point +DD $L037ej6-$L034PIC_point +DD $L036ej7-$L034PIC_point +ALIGN 64 +_DES_ede3_cbc_encrypt ENDP +ALIGN 64 +_DES_SPtrans:: +$Ldes_sptrans:: +DD 34080768,524288,33554434,34080770 +DD 33554432,526338,524290,33554434 +DD 526338,34080768,34078720,2050 +DD 33556482,33554432,0,524290 +DD 524288,2,33556480,526336 +DD 34080770,34078720,2050,33556480 +DD 2,2048,526336,34078722 +DD 2048,33556482,34078722,0 +DD 0,34080770,33556480,524290 +DD 34080768,524288,2050,33556480 +DD 34078722,2048,526336,33554434 +DD 526338,2,33554434,34078720 +DD 34080770,526336,34078720,33556482 +DD 33554432,2050,524290,0 +DD 524288,33554432,33556482,34080768 +DD 2,34078722,2048,526338 +DD 1074823184,0,1081344,1074790400 +DD 1073741840,32784,1073774592,1081344 +DD 32768,1074790416,16,1073774592 +DD 1048592,1074823168,1074790400,16 +DD 1048576,1073774608,1074790416,32768 +DD 1081360,1073741824,0,1048592 +DD 1073774608,1081360,1074823168,1073741840 +DD 1073741824,1048576,32784,1074823184 +DD 1048592,1074823168,1073774592,1081360 +DD 1074823184,1048592,1073741840,0 +DD 1073741824,32784,1048576,1074790416 +DD 32768,1073741824,1081360,1073774608 +DD 1074823168,32768,0,1073741840 +DD 16,1074823184,1081344,1074790400 +DD 1074790416,1048576,32784,1073774592 +DD 1073774608,16,1074790400,1081344 +DD 67108865,67371264,256,67109121 +DD 262145,67108864,67109121,262400 +DD 67109120,262144,67371008,1 +DD 67371265,257,1,67371009 +DD 0,262145,67371264,256 +DD 257,67371265,262144,67108865 +DD 67371009,67109120,262401,67371008 +DD 262400,0,67108864,262401 +DD 67371264,256,1,262144 +DD 257,262145,67371008,67109121 +DD 0,67371264,262400,67371009 +DD 262145,67108864,67371265,1 +DD 262401,67108865,67108864,67371265 +DD 262144,67109120,67109121,262400 +DD 67109120,0,67371009,257 +DD 67108865,262401,256,67371008 +DD 4198408,268439552,8,272633864 +DD 0,272629760,268439560,4194312 +DD 272633856,268435464,268435456,4104 +DD 268435464,4198408,4194304,268435456 +DD 272629768,4198400,4096,8 +DD 4198400,268439560,272629760,4096 +DD 4104,0,4194312,272633856 +DD 268439552,272629768,272633864,4194304 +DD 272629768,4104,4194304,268435464 +DD 4198400,268439552,8,272629760 +DD 268439560,0,4096,4194312 +DD 0,272629768,272633856,4096 +DD 268435456,272633864,4198408,4194304 +DD 272633864,8,268439552,4198408 +DD 4194312,4198400,272629760,268439560 +DD 4104,268435456,268435464,272633856 +DD 134217728,65536,1024,134284320 +DD 134283296,134218752,66592,134283264 +DD 65536,32,134217760,66560 +DD 134218784,134283296,134284288,0 +DD 66560,134217728,65568,1056 +DD 134218752,66592,0,134217760 +DD 32,134218784,134284320,65568 +DD 134283264,1024,1056,134284288 +DD 134284288,134218784,65568,134283264 +DD 65536,32,134217760,134218752 +DD 134217728,66560,134284320,0 +DD 66592,134217728,1024,65568 +DD 134218784,1024,0,134284320 +DD 134283296,134284288,1056,65536 +DD 66560,134283296,134218752,1056 +DD 32,66592,134283264,134217760 +DD 2147483712,2097216,0,2149588992 +DD 2097216,8192,2147491904,2097152 +DD 8256,2149589056,2105344,2147483648 +DD 2147491840,2147483712,2149580800,2105408 +DD 2097152,2147491904,2149580864,0 +DD 8192,64,2149588992,2149580864 +DD 2149589056,2149580800,2147483648,8256 +DD 64,2105344,2105408,2147491840 +DD 8256,2147483648,2147491840,2105408 +DD 2149588992,2097216,0,2147491840 +DD 2147483648,8192,2149580864,2097152 +DD 2097216,2149589056,2105344,64 +DD 2149589056,2105344,2097152,2147491904 +DD 2147483712,2149580800,2105408,0 +DD 8192,2147483712,2147491904,2149588992 +DD 2149580800,8256,64,2149580864 +DD 16384,512,16777728,16777220 +DD 16794116,16388,16896,0 +DD 16777216,16777732,516,16793600 +DD 4,16794112,16793600,516 +DD 16777732,16384,16388,16794116 +DD 0,16777728,16777220,16896 +DD 16793604,16900,16794112,4 +DD 16900,16793604,512,16777216 +DD 16900,16793600,16793604,516 +DD 16384,512,16777216,16793604 +DD 16777732,16900,16896,0 +DD 512,16777220,4,16777728 +DD 0,16777732,16777728,16896 +DD 516,16384,16794116,16777216 +DD 16794112,4,16388,16794116 +DD 16777220,16794112,16793600,16388 +DD 545259648,545390592,131200,0 +DD 537001984,8388736,545259520,545390720 +DD 128,536870912,8519680,131200 +DD 8519808,537002112,536871040,545259520 +DD 131072,8519808,8388736,537001984 +DD 545390720,536871040,0,8519680 +DD 536870912,8388608,537002112,545259648 +DD 8388608,131072,545390592,128 +DD 8388608,131072,536871040,545390720 +DD 131200,536870912,0,8519680 +DD 545259648,537002112,537001984,8388736 +DD 545390592,128,8388736,537001984 +DD 545390720,8388608,545259520,536871040 +DD 8519680,131200,537002112,545259520 +DD 128,545390592,8519808,0 +DD 536870912,545259648,131072,8519808 +.text$ ENDS +END diff --git a/deps/openssl/asm_obsolete/x86-win32-masm/md5/md5-586.asm b/deps/openssl/asm_obsolete/x86-win32-masm/md5/md5-586.asm new file mode 100644 index 00000000000000..ae47efcb7a9af4 --- /dev/null +++ b/deps/openssl/asm_obsolete/x86-win32-masm/md5/md5-586.asm @@ -0,0 +1,689 @@ +TITLE ../openssl/crypto/md5/asm/md5-586.asm +IF @Version LT 800 +ECHO MASM version 8.00 or later is strongly recommended. +ENDIF +.686 +.MODEL FLAT +OPTION DOTNAME +IF @Version LT 800 +.text$ SEGMENT PAGE 'CODE' +ELSE +.text$ SEGMENT ALIGN(64) 'CODE' +ENDIF +ALIGN 16 +_md5_block_asm_data_order PROC PUBLIC +$L_md5_block_asm_data_order_begin:: + push esi + push edi + mov edi,DWORD PTR 12[esp] + mov esi,DWORD PTR 16[esp] + mov ecx,DWORD PTR 20[esp] + push ebp + shl ecx,6 + push ebx + add ecx,esi + sub ecx,64 + mov eax,DWORD PTR [edi] + push ecx + mov ebx,DWORD PTR 4[edi] + mov ecx,DWORD PTR 8[edi] + mov edx,DWORD PTR 12[edi] +$L000start: + ; + ; R0 section + mov edi,ecx + mov ebp,DWORD PTR [esi] + ; R0 0 + xor edi,edx + and edi,ebx + lea eax,DWORD PTR 3614090360[ebp*1+eax] + xor edi,edx + add eax,edi + mov edi,ebx + rol eax,7 + mov ebp,DWORD PTR 4[esi] + add eax,ebx + ; R0 1 + xor edi,ecx + and edi,eax + lea edx,DWORD PTR 3905402710[ebp*1+edx] + xor edi,ecx + add edx,edi + mov edi,eax + rol edx,12 + mov ebp,DWORD PTR 8[esi] + add edx,eax + ; R0 2 + xor edi,ebx + and edi,edx + lea ecx,DWORD PTR 606105819[ebp*1+ecx] + xor edi,ebx + add ecx,edi + mov edi,edx + rol ecx,17 + mov ebp,DWORD PTR 12[esi] + add ecx,edx + ; R0 3 + xor edi,eax + and edi,ecx + lea ebx,DWORD PTR 3250441966[ebp*1+ebx] + xor edi,eax + add ebx,edi + mov edi,ecx + rol ebx,22 + mov ebp,DWORD PTR 16[esi] + add ebx,ecx + ; R0 4 + xor edi,edx + and edi,ebx + lea eax,DWORD PTR 4118548399[ebp*1+eax] + xor edi,edx + add eax,edi + mov edi,ebx + rol eax,7 + mov ebp,DWORD PTR 20[esi] + add eax,ebx + ; R0 5 + xor edi,ecx + and edi,eax + lea edx,DWORD PTR 1200080426[ebp*1+edx] + xor edi,ecx + add edx,edi + mov edi,eax + rol edx,12 + mov ebp,DWORD PTR 24[esi] + add edx,eax + ; R0 6 + xor edi,ebx + and edi,edx + lea ecx,DWORD PTR 2821735955[ebp*1+ecx] + xor edi,ebx + add ecx,edi + mov edi,edx + rol ecx,17 + mov ebp,DWORD PTR 28[esi] + add ecx,edx + ; R0 7 + xor edi,eax + and edi,ecx + lea ebx,DWORD PTR 4249261313[ebp*1+ebx] + xor edi,eax + add ebx,edi + mov edi,ecx + rol ebx,22 + mov ebp,DWORD PTR 32[esi] + add ebx,ecx + ; R0 8 + xor edi,edx + and edi,ebx + lea eax,DWORD PTR 1770035416[ebp*1+eax] + xor edi,edx + add eax,edi + mov edi,ebx + rol eax,7 + mov ebp,DWORD PTR 36[esi] + add eax,ebx + ; R0 9 + xor edi,ecx + and edi,eax + lea edx,DWORD PTR 2336552879[ebp*1+edx] + xor edi,ecx + add edx,edi + mov edi,eax + rol edx,12 + mov ebp,DWORD PTR 40[esi] + add edx,eax + ; R0 10 + xor edi,ebx + and edi,edx + lea ecx,DWORD PTR 4294925233[ebp*1+ecx] + xor edi,ebx + add ecx,edi + mov edi,edx + rol ecx,17 + mov ebp,DWORD PTR 44[esi] + add ecx,edx + ; R0 11 + xor edi,eax + and edi,ecx + lea ebx,DWORD PTR 2304563134[ebp*1+ebx] + xor edi,eax + add ebx,edi + mov edi,ecx + rol ebx,22 + mov ebp,DWORD PTR 48[esi] + add ebx,ecx + ; R0 12 + xor edi,edx + and edi,ebx + lea eax,DWORD PTR 1804603682[ebp*1+eax] + xor edi,edx + add eax,edi + mov edi,ebx + rol eax,7 + mov ebp,DWORD PTR 52[esi] + add eax,ebx + ; R0 13 + xor edi,ecx + and edi,eax + lea edx,DWORD PTR 4254626195[ebp*1+edx] + xor edi,ecx + add edx,edi + mov edi,eax + rol edx,12 + mov ebp,DWORD PTR 56[esi] + add edx,eax + ; R0 14 + xor edi,ebx + and edi,edx + lea ecx,DWORD PTR 2792965006[ebp*1+ecx] + xor edi,ebx + add ecx,edi + mov edi,edx + rol ecx,17 + mov ebp,DWORD PTR 60[esi] + add ecx,edx + ; R0 15 + xor edi,eax + and edi,ecx + lea ebx,DWORD PTR 1236535329[ebp*1+ebx] + xor edi,eax + add ebx,edi + mov edi,ecx + rol ebx,22 + mov ebp,DWORD PTR 4[esi] + add ebx,ecx + ; + ; R1 section + ; R1 16 + lea eax,DWORD PTR 4129170786[ebp*1+eax] + xor edi,ebx + and edi,edx + mov ebp,DWORD PTR 24[esi] + xor edi,ecx + add eax,edi + mov edi,ebx + rol eax,5 + add eax,ebx + ; R1 17 + lea edx,DWORD PTR 3225465664[ebp*1+edx] + xor edi,eax + and edi,ecx + mov ebp,DWORD PTR 44[esi] + xor edi,ebx + add edx,edi + mov edi,eax + rol edx,9 + add edx,eax + ; R1 18 + lea ecx,DWORD PTR 643717713[ebp*1+ecx] + xor edi,edx + and edi,ebx + mov ebp,DWORD PTR [esi] + xor edi,eax + add ecx,edi + mov edi,edx + rol ecx,14 + add ecx,edx + ; R1 19 + lea ebx,DWORD PTR 3921069994[ebp*1+ebx] + xor edi,ecx + and edi,eax + mov ebp,DWORD PTR 20[esi] + xor edi,edx + add ebx,edi + mov edi,ecx + rol ebx,20 + add ebx,ecx + ; R1 20 + lea eax,DWORD PTR 3593408605[ebp*1+eax] + xor edi,ebx + and edi,edx + mov ebp,DWORD PTR 40[esi] + xor edi,ecx + add eax,edi + mov edi,ebx + rol eax,5 + add eax,ebx + ; R1 21 + lea edx,DWORD PTR 38016083[ebp*1+edx] + xor edi,eax + and edi,ecx + mov ebp,DWORD PTR 60[esi] + xor edi,ebx + add edx,edi + mov edi,eax + rol edx,9 + add edx,eax + ; R1 22 + lea ecx,DWORD PTR 3634488961[ebp*1+ecx] + xor edi,edx + and edi,ebx + mov ebp,DWORD PTR 16[esi] + xor edi,eax + add ecx,edi + mov edi,edx + rol ecx,14 + add ecx,edx + ; R1 23 + lea ebx,DWORD PTR 3889429448[ebp*1+ebx] + xor edi,ecx + and edi,eax + mov ebp,DWORD PTR 36[esi] + xor edi,edx + add ebx,edi + mov edi,ecx + rol ebx,20 + add ebx,ecx + ; R1 24 + lea eax,DWORD PTR 568446438[ebp*1+eax] + xor edi,ebx + and edi,edx + mov ebp,DWORD PTR 56[esi] + xor edi,ecx + add eax,edi + mov edi,ebx + rol eax,5 + add eax,ebx + ; R1 25 + lea edx,DWORD PTR 3275163606[ebp*1+edx] + xor edi,eax + and edi,ecx + mov ebp,DWORD PTR 12[esi] + xor edi,ebx + add edx,edi + mov edi,eax + rol edx,9 + add edx,eax + ; R1 26 + lea ecx,DWORD PTR 4107603335[ebp*1+ecx] + xor edi,edx + and edi,ebx + mov ebp,DWORD PTR 32[esi] + xor edi,eax + add ecx,edi + mov edi,edx + rol ecx,14 + add ecx,edx + ; R1 27 + lea ebx,DWORD PTR 1163531501[ebp*1+ebx] + xor edi,ecx + and edi,eax + mov ebp,DWORD PTR 52[esi] + xor edi,edx + add ebx,edi + mov edi,ecx + rol ebx,20 + add ebx,ecx + ; R1 28 + lea eax,DWORD PTR 2850285829[ebp*1+eax] + xor edi,ebx + and edi,edx + mov ebp,DWORD PTR 8[esi] + xor edi,ecx + add eax,edi + mov edi,ebx + rol eax,5 + add eax,ebx + ; R1 29 + lea edx,DWORD PTR 4243563512[ebp*1+edx] + xor edi,eax + and edi,ecx + mov ebp,DWORD PTR 28[esi] + xor edi,ebx + add edx,edi + mov edi,eax + rol edx,9 + add edx,eax + ; R1 30 + lea ecx,DWORD PTR 1735328473[ebp*1+ecx] + xor edi,edx + and edi,ebx + mov ebp,DWORD PTR 48[esi] + xor edi,eax + add ecx,edi + mov edi,edx + rol ecx,14 + add ecx,edx + ; R1 31 + lea ebx,DWORD PTR 2368359562[ebp*1+ebx] + xor edi,ecx + and edi,eax + mov ebp,DWORD PTR 20[esi] + xor edi,edx + add ebx,edi + mov edi,ecx + rol ebx,20 + add ebx,ecx + ; + ; R2 section + ; R2 32 + xor edi,edx + xor edi,ebx + lea eax,DWORD PTR 4294588738[ebp*1+eax] + add eax,edi + rol eax,4 + mov ebp,DWORD PTR 32[esi] + mov edi,ebx + ; R2 33 + lea edx,DWORD PTR 2272392833[ebp*1+edx] + add eax,ebx + xor edi,ecx + xor edi,eax + mov ebp,DWORD PTR 44[esi] + add edx,edi + mov edi,eax + rol edx,11 + add edx,eax + ; R2 34 + xor edi,ebx + xor edi,edx + lea ecx,DWORD PTR 1839030562[ebp*1+ecx] + add ecx,edi + rol ecx,16 + mov ebp,DWORD PTR 56[esi] + mov edi,edx + ; R2 35 + lea ebx,DWORD PTR 4259657740[ebp*1+ebx] + add ecx,edx + xor edi,eax + xor edi,ecx + mov ebp,DWORD PTR 4[esi] + add ebx,edi + mov edi,ecx + rol ebx,23 + add ebx,ecx + ; R2 36 + xor edi,edx + xor edi,ebx + lea eax,DWORD PTR 2763975236[ebp*1+eax] + add eax,edi + rol eax,4 + mov ebp,DWORD PTR 16[esi] + mov edi,ebx + ; R2 37 + lea edx,DWORD PTR 1272893353[ebp*1+edx] + add eax,ebx + xor edi,ecx + xor edi,eax + mov ebp,DWORD PTR 28[esi] + add edx,edi + mov edi,eax + rol edx,11 + add edx,eax + ; R2 38 + xor edi,ebx + xor edi,edx + lea ecx,DWORD PTR 4139469664[ebp*1+ecx] + add ecx,edi + rol ecx,16 + mov ebp,DWORD PTR 40[esi] + mov edi,edx + ; R2 39 + lea ebx,DWORD PTR 3200236656[ebp*1+ebx] + add ecx,edx + xor edi,eax + xor edi,ecx + mov ebp,DWORD PTR 52[esi] + add ebx,edi + mov edi,ecx + rol ebx,23 + add ebx,ecx + ; R2 40 + xor edi,edx + xor edi,ebx + lea eax,DWORD PTR 681279174[ebp*1+eax] + add eax,edi + rol eax,4 + mov ebp,DWORD PTR [esi] + mov edi,ebx + ; R2 41 + lea edx,DWORD PTR 3936430074[ebp*1+edx] + add eax,ebx + xor edi,ecx + xor edi,eax + mov ebp,DWORD PTR 12[esi] + add edx,edi + mov edi,eax + rol edx,11 + add edx,eax + ; R2 42 + xor edi,ebx + xor edi,edx + lea ecx,DWORD PTR 3572445317[ebp*1+ecx] + add ecx,edi + rol ecx,16 + mov ebp,DWORD PTR 24[esi] + mov edi,edx + ; R2 43 + lea ebx,DWORD PTR 76029189[ebp*1+ebx] + add ecx,edx + xor edi,eax + xor edi,ecx + mov ebp,DWORD PTR 36[esi] + add ebx,edi + mov edi,ecx + rol ebx,23 + add ebx,ecx + ; R2 44 + xor edi,edx + xor edi,ebx + lea eax,DWORD PTR 3654602809[ebp*1+eax] + add eax,edi + rol eax,4 + mov ebp,DWORD PTR 48[esi] + mov edi,ebx + ; R2 45 + lea edx,DWORD PTR 3873151461[ebp*1+edx] + add eax,ebx + xor edi,ecx + xor edi,eax + mov ebp,DWORD PTR 60[esi] + add edx,edi + mov edi,eax + rol edx,11 + add edx,eax + ; R2 46 + xor edi,ebx + xor edi,edx + lea ecx,DWORD PTR 530742520[ebp*1+ecx] + add ecx,edi + rol ecx,16 + mov ebp,DWORD PTR 8[esi] + mov edi,edx + ; R2 47 + lea ebx,DWORD PTR 3299628645[ebp*1+ebx] + add ecx,edx + xor edi,eax + xor edi,ecx + mov ebp,DWORD PTR [esi] + add ebx,edi + mov edi,-1 + rol ebx,23 + add ebx,ecx + ; + ; R3 section + ; R3 48 + xor edi,edx + or edi,ebx + lea eax,DWORD PTR 4096336452[ebp*1+eax] + xor edi,ecx + mov ebp,DWORD PTR 28[esi] + add eax,edi + mov edi,-1 + rol eax,6 + xor edi,ecx + add eax,ebx + ; R3 49 + or edi,eax + lea edx,DWORD PTR 1126891415[ebp*1+edx] + xor edi,ebx + mov ebp,DWORD PTR 56[esi] + add edx,edi + mov edi,-1 + rol edx,10 + xor edi,ebx + add edx,eax + ; R3 50 + or edi,edx + lea ecx,DWORD PTR 2878612391[ebp*1+ecx] + xor edi,eax + mov ebp,DWORD PTR 20[esi] + add ecx,edi + mov edi,-1 + rol ecx,15 + xor edi,eax + add ecx,edx + ; R3 51 + or edi,ecx + lea ebx,DWORD PTR 4237533241[ebp*1+ebx] + xor edi,edx + mov ebp,DWORD PTR 48[esi] + add ebx,edi + mov edi,-1 + rol ebx,21 + xor edi,edx + add ebx,ecx + ; R3 52 + or edi,ebx + lea eax,DWORD PTR 1700485571[ebp*1+eax] + xor edi,ecx + mov ebp,DWORD PTR 12[esi] + add eax,edi + mov edi,-1 + rol eax,6 + xor edi,ecx + add eax,ebx + ; R3 53 + or edi,eax + lea edx,DWORD PTR 2399980690[ebp*1+edx] + xor edi,ebx + mov ebp,DWORD PTR 40[esi] + add edx,edi + mov edi,-1 + rol edx,10 + xor edi,ebx + add edx,eax + ; R3 54 + or edi,edx + lea ecx,DWORD PTR 4293915773[ebp*1+ecx] + xor edi,eax + mov ebp,DWORD PTR 4[esi] + add ecx,edi + mov edi,-1 + rol ecx,15 + xor edi,eax + add ecx,edx + ; R3 55 + or edi,ecx + lea ebx,DWORD PTR 2240044497[ebp*1+ebx] + xor edi,edx + mov ebp,DWORD PTR 32[esi] + add ebx,edi + mov edi,-1 + rol ebx,21 + xor edi,edx + add ebx,ecx + ; R3 56 + or edi,ebx + lea eax,DWORD PTR 1873313359[ebp*1+eax] + xor edi,ecx + mov ebp,DWORD PTR 60[esi] + add eax,edi + mov edi,-1 + rol eax,6 + xor edi,ecx + add eax,ebx + ; R3 57 + or edi,eax + lea edx,DWORD PTR 4264355552[ebp*1+edx] + xor edi,ebx + mov ebp,DWORD PTR 24[esi] + add edx,edi + mov edi,-1 + rol edx,10 + xor edi,ebx + add edx,eax + ; R3 58 + or edi,edx + lea ecx,DWORD PTR 2734768916[ebp*1+ecx] + xor edi,eax + mov ebp,DWORD PTR 52[esi] + add ecx,edi + mov edi,-1 + rol ecx,15 + xor edi,eax + add ecx,edx + ; R3 59 + or edi,ecx + lea ebx,DWORD PTR 1309151649[ebp*1+ebx] + xor edi,edx + mov ebp,DWORD PTR 16[esi] + add ebx,edi + mov edi,-1 + rol ebx,21 + xor edi,edx + add ebx,ecx + ; R3 60 + or edi,ebx + lea eax,DWORD PTR 4149444226[ebp*1+eax] + xor edi,ecx + mov ebp,DWORD PTR 44[esi] + add eax,edi + mov edi,-1 + rol eax,6 + xor edi,ecx + add eax,ebx + ; R3 61 + or edi,eax + lea edx,DWORD PTR 3174756917[ebp*1+edx] + xor edi,ebx + mov ebp,DWORD PTR 8[esi] + add edx,edi + mov edi,-1 + rol edx,10 + xor edi,ebx + add edx,eax + ; R3 62 + or edi,edx + lea ecx,DWORD PTR 718787259[ebp*1+ecx] + xor edi,eax + mov ebp,DWORD PTR 36[esi] + add ecx,edi + mov edi,-1 + rol ecx,15 + xor edi,eax + add ecx,edx + ; R3 63 + or edi,ecx + lea ebx,DWORD PTR 3951481745[ebp*1+ebx] + xor edi,edx + mov ebp,DWORD PTR 24[esp] + add ebx,edi + add esi,64 + rol ebx,21 + mov edi,DWORD PTR [ebp] + add ebx,ecx + add eax,edi + mov edi,DWORD PTR 4[ebp] + add ebx,edi + mov edi,DWORD PTR 8[ebp] + add ecx,edi + mov edi,DWORD PTR 12[ebp] + add edx,edi + mov DWORD PTR [ebp],eax + mov DWORD PTR 4[ebp],ebx + mov edi,DWORD PTR [esp] + mov DWORD PTR 8[ebp],ecx + mov DWORD PTR 12[ebp],edx + cmp edi,esi + jae $L000start + pop eax + pop ebx + pop ebp + pop edi + pop esi + ret +_md5_block_asm_data_order ENDP +.text$ ENDS +END diff --git a/deps/openssl/asm_obsolete/x86-win32-masm/modes/ghash-x86.asm b/deps/openssl/asm_obsolete/x86-win32-masm/modes/ghash-x86.asm new file mode 100644 index 00000000000000..d18bb128c591d7 --- /dev/null +++ b/deps/openssl/asm_obsolete/x86-win32-masm/modes/ghash-x86.asm @@ -0,0 +1,1274 @@ +TITLE ghash-x86.asm +IF @Version LT 800 +ECHO MASM version 8.00 or later is strongly recommended. +ENDIF +.686 +.XMM +IF @Version LT 800 +XMMWORD STRUCT 16 +DQ 2 dup (?) +XMMWORD ENDS +ENDIF + +.MODEL FLAT +OPTION DOTNAME +IF @Version LT 800 +.text$ SEGMENT PAGE 'CODE' +ELSE +.text$ SEGMENT ALIGN(64) 'CODE' +ENDIF +ALIGN 16 +_gcm_gmult_4bit_x86 PROC PUBLIC +$L_gcm_gmult_4bit_x86_begin:: + push ebp + push ebx + push esi + push edi + sub esp,84 + mov edi,DWORD PTR 104[esp] + mov esi,DWORD PTR 108[esp] + mov ebp,DWORD PTR [edi] + mov edx,DWORD PTR 4[edi] + mov ecx,DWORD PTR 8[edi] + mov ebx,DWORD PTR 12[edi] + mov DWORD PTR 16[esp],0 + mov DWORD PTR 20[esp],471859200 + mov DWORD PTR 24[esp],943718400 + mov DWORD PTR 28[esp],610271232 + mov DWORD PTR 32[esp],1887436800 + mov DWORD PTR 36[esp],1822425088 + mov DWORD PTR 40[esp],1220542464 + mov DWORD PTR 44[esp],1423966208 + mov DWORD PTR 48[esp],3774873600 + mov DWORD PTR 52[esp],4246732800 + mov DWORD PTR 56[esp],3644850176 + mov DWORD PTR 60[esp],3311403008 + mov DWORD PTR 64[esp],2441084928 + mov DWORD PTR 68[esp],2376073216 + mov DWORD PTR 72[esp],2847932416 + mov DWORD PTR 76[esp],3051356160 + mov DWORD PTR [esp],ebp + mov DWORD PTR 4[esp],edx + mov DWORD PTR 8[esp],ecx + mov DWORD PTR 12[esp],ebx + shr ebx,20 + and ebx,240 + mov ebp,DWORD PTR 4[ebx*1+esi] + mov edx,DWORD PTR [ebx*1+esi] + mov ecx,DWORD PTR 12[ebx*1+esi] + mov ebx,DWORD PTR 8[ebx*1+esi] + xor eax,eax + mov edi,15 + jmp $L000x86_loop +ALIGN 16 +$L000x86_loop: + mov al,bl + shrd ebx,ecx,4 + and al,15 + shrd ecx,edx,4 + shrd edx,ebp,4 + shr ebp,4 + xor ebp,DWORD PTR 16[eax*4+esp] + mov al,BYTE PTR [edi*1+esp] + and al,240 + xor ebx,DWORD PTR 8[eax*1+esi] + xor ecx,DWORD PTR 12[eax*1+esi] + xor edx,DWORD PTR [eax*1+esi] + xor ebp,DWORD PTR 4[eax*1+esi] + dec edi + js $L001x86_break + mov al,bl + shrd ebx,ecx,4 + and al,15 + shrd ecx,edx,4 + shrd edx,ebp,4 + shr ebp,4 + xor ebp,DWORD PTR 16[eax*4+esp] + mov al,BYTE PTR [edi*1+esp] + shl al,4 + xor ebx,DWORD PTR 8[eax*1+esi] + xor ecx,DWORD PTR 12[eax*1+esi] + xor edx,DWORD PTR [eax*1+esi] + xor ebp,DWORD PTR 4[eax*1+esi] + jmp $L000x86_loop +ALIGN 16 +$L001x86_break: + bswap ebx + bswap ecx + bswap edx + bswap ebp + mov edi,DWORD PTR 104[esp] + mov DWORD PTR 12[edi],ebx + mov DWORD PTR 8[edi],ecx + mov DWORD PTR 4[edi],edx + mov DWORD PTR [edi],ebp + add esp,84 + pop edi + pop esi + pop ebx + pop ebp + ret +_gcm_gmult_4bit_x86 ENDP +ALIGN 16 +_gcm_ghash_4bit_x86 PROC PUBLIC +$L_gcm_ghash_4bit_x86_begin:: + push ebp + push ebx + push esi + push edi + sub esp,84 + mov ebx,DWORD PTR 104[esp] + mov esi,DWORD PTR 108[esp] + mov edi,DWORD PTR 112[esp] + mov ecx,DWORD PTR 116[esp] + add ecx,edi + mov DWORD PTR 116[esp],ecx + mov ebp,DWORD PTR [ebx] + mov edx,DWORD PTR 4[ebx] + mov ecx,DWORD PTR 8[ebx] + mov ebx,DWORD PTR 12[ebx] + mov DWORD PTR 16[esp],0 + mov DWORD PTR 20[esp],471859200 + mov DWORD PTR 24[esp],943718400 + mov DWORD PTR 28[esp],610271232 + mov DWORD PTR 32[esp],1887436800 + mov DWORD PTR 36[esp],1822425088 + mov DWORD PTR 40[esp],1220542464 + mov DWORD PTR 44[esp],1423966208 + mov DWORD PTR 48[esp],3774873600 + mov DWORD PTR 52[esp],4246732800 + mov DWORD PTR 56[esp],3644850176 + mov DWORD PTR 60[esp],3311403008 + mov DWORD PTR 64[esp],2441084928 + mov DWORD PTR 68[esp],2376073216 + mov DWORD PTR 72[esp],2847932416 + mov DWORD PTR 76[esp],3051356160 +ALIGN 16 +$L002x86_outer_loop: + xor ebx,DWORD PTR 12[edi] + xor ecx,DWORD PTR 8[edi] + xor edx,DWORD PTR 4[edi] + xor ebp,DWORD PTR [edi] + mov DWORD PTR 12[esp],ebx + mov DWORD PTR 8[esp],ecx + mov DWORD PTR 4[esp],edx + mov DWORD PTR [esp],ebp + shr ebx,20 + and ebx,240 + mov ebp,DWORD PTR 4[ebx*1+esi] + mov edx,DWORD PTR [ebx*1+esi] + mov ecx,DWORD PTR 12[ebx*1+esi] + mov ebx,DWORD PTR 8[ebx*1+esi] + xor eax,eax + mov edi,15 + jmp $L003x86_loop +ALIGN 16 +$L003x86_loop: + mov al,bl + shrd ebx,ecx,4 + and al,15 + shrd ecx,edx,4 + shrd edx,ebp,4 + shr ebp,4 + xor ebp,DWORD PTR 16[eax*4+esp] + mov al,BYTE PTR [edi*1+esp] + and al,240 + xor ebx,DWORD PTR 8[eax*1+esi] + xor ecx,DWORD PTR 12[eax*1+esi] + xor edx,DWORD PTR [eax*1+esi] + xor ebp,DWORD PTR 4[eax*1+esi] + dec edi + js $L004x86_break + mov al,bl + shrd ebx,ecx,4 + and al,15 + shrd ecx,edx,4 + shrd edx,ebp,4 + shr ebp,4 + xor ebp,DWORD PTR 16[eax*4+esp] + mov al,BYTE PTR [edi*1+esp] + shl al,4 + xor ebx,DWORD PTR 8[eax*1+esi] + xor ecx,DWORD PTR 12[eax*1+esi] + xor edx,DWORD PTR [eax*1+esi] + xor ebp,DWORD PTR 4[eax*1+esi] + jmp $L003x86_loop +ALIGN 16 +$L004x86_break: + bswap ebx + bswap ecx + bswap edx + bswap ebp + mov edi,DWORD PTR 112[esp] + lea edi,DWORD PTR 16[edi] + cmp edi,DWORD PTR 116[esp] + mov DWORD PTR 112[esp],edi + jb $L002x86_outer_loop + mov edi,DWORD PTR 104[esp] + mov DWORD PTR 12[edi],ebx + mov DWORD PTR 8[edi],ecx + mov DWORD PTR 4[edi],edx + mov DWORD PTR [edi],ebp + add esp,84 + pop edi + pop esi + pop ebx + pop ebp + ret +_gcm_ghash_4bit_x86 ENDP +ALIGN 16 +_gcm_gmult_4bit_mmx PROC PUBLIC +$L_gcm_gmult_4bit_mmx_begin:: + push ebp + push ebx + push esi + push edi + mov edi,DWORD PTR 20[esp] + mov esi,DWORD PTR 24[esp] + call $L005pic_point +$L005pic_point: + pop eax + lea eax,DWORD PTR ($Lrem_4bit-$L005pic_point)[eax] + movzx ebx,BYTE PTR 15[edi] + xor ecx,ecx + mov edx,ebx + mov cl,dl + mov ebp,14 + shl cl,4 + and edx,240 + movq mm0,QWORD PTR 8[ecx*1+esi] + movq mm1,QWORD PTR [ecx*1+esi] + movd ebx,mm0 + jmp $L006mmx_loop +ALIGN 16 +$L006mmx_loop: + psrlq mm0,4 + and ebx,15 + movq mm2,mm1 + psrlq mm1,4 + pxor mm0,QWORD PTR 8[edx*1+esi] + mov cl,BYTE PTR [ebp*1+edi] + psllq mm2,60 + pxor mm1,QWORD PTR [ebx*8+eax] + dec ebp + movd ebx,mm0 + pxor mm1,QWORD PTR [edx*1+esi] + mov edx,ecx + pxor mm0,mm2 + js $L007mmx_break + shl cl,4 + and ebx,15 + psrlq mm0,4 + and edx,240 + movq mm2,mm1 + psrlq mm1,4 + pxor mm0,QWORD PTR 8[ecx*1+esi] + psllq mm2,60 + pxor mm1,QWORD PTR [ebx*8+eax] + movd ebx,mm0 + pxor mm1,QWORD PTR [ecx*1+esi] + pxor mm0,mm2 + jmp $L006mmx_loop +ALIGN 16 +$L007mmx_break: + shl cl,4 + and ebx,15 + psrlq mm0,4 + and edx,240 + movq mm2,mm1 + psrlq mm1,4 + pxor mm0,QWORD PTR 8[ecx*1+esi] + psllq mm2,60 + pxor mm1,QWORD PTR [ebx*8+eax] + movd ebx,mm0 + pxor mm1,QWORD PTR [ecx*1+esi] + pxor mm0,mm2 + psrlq mm0,4 + and ebx,15 + movq mm2,mm1 + psrlq mm1,4 + pxor mm0,QWORD PTR 8[edx*1+esi] + psllq mm2,60 + pxor mm1,QWORD PTR [ebx*8+eax] + movd ebx,mm0 + pxor mm1,QWORD PTR [edx*1+esi] + pxor mm0,mm2 + psrlq mm0,32 + movd edx,mm1 + psrlq mm1,32 + movd ecx,mm0 + movd ebp,mm1 + bswap ebx + bswap edx + bswap ecx + bswap ebp + emms + mov DWORD PTR 12[edi],ebx + mov DWORD PTR 4[edi],edx + mov DWORD PTR 8[edi],ecx + mov DWORD PTR [edi],ebp + pop edi + pop esi + pop ebx + pop ebp + ret +_gcm_gmult_4bit_mmx ENDP +ALIGN 16 +_gcm_ghash_4bit_mmx PROC PUBLIC +$L_gcm_ghash_4bit_mmx_begin:: + push ebp + push ebx + push esi + push edi + mov eax,DWORD PTR 20[esp] + mov ebx,DWORD PTR 24[esp] + mov ecx,DWORD PTR 28[esp] + mov edx,DWORD PTR 32[esp] + mov ebp,esp + call $L008pic_point +$L008pic_point: + pop esi + lea esi,DWORD PTR ($Lrem_8bit-$L008pic_point)[esi] + sub esp,544 + and esp,-64 + sub esp,16 + add edx,ecx + mov DWORD PTR 544[esp],eax + mov DWORD PTR 552[esp],edx + mov DWORD PTR 556[esp],ebp + add ebx,128 + lea edi,DWORD PTR 144[esp] + lea ebp,DWORD PTR 400[esp] + mov edx,DWORD PTR [ebx-120] + movq mm0,QWORD PTR [ebx-120] + movq mm3,QWORD PTR [ebx-128] + shl edx,4 + mov BYTE PTR [esp],dl + mov edx,DWORD PTR [ebx-104] + movq mm2,QWORD PTR [ebx-104] + movq mm5,QWORD PTR [ebx-112] + movq QWORD PTR [edi-128],mm0 + psrlq mm0,4 + movq QWORD PTR [edi],mm3 + movq mm7,mm3 + psrlq mm3,4 + shl edx,4 + mov BYTE PTR 1[esp],dl + mov edx,DWORD PTR [ebx-88] + movq mm1,QWORD PTR [ebx-88] + psllq mm7,60 + movq mm4,QWORD PTR [ebx-96] + por mm0,mm7 + movq QWORD PTR [edi-120],mm2 + psrlq mm2,4 + movq QWORD PTR 8[edi],mm5 + movq mm6,mm5 + movq QWORD PTR [ebp-128],mm0 + psrlq mm5,4 + movq QWORD PTR [ebp],mm3 + shl edx,4 + mov BYTE PTR 2[esp],dl + mov edx,DWORD PTR [ebx-72] + movq mm0,QWORD PTR [ebx-72] + psllq mm6,60 + movq mm3,QWORD PTR [ebx-80] + por mm2,mm6 + movq QWORD PTR [edi-112],mm1 + psrlq mm1,4 + movq QWORD PTR 16[edi],mm4 + movq mm7,mm4 + movq QWORD PTR [ebp-120],mm2 + psrlq mm4,4 + movq QWORD PTR 8[ebp],mm5 + shl edx,4 + mov BYTE PTR 3[esp],dl + mov edx,DWORD PTR [ebx-56] + movq mm2,QWORD PTR [ebx-56] + psllq mm7,60 + movq mm5,QWORD PTR [ebx-64] + por mm1,mm7 + movq QWORD PTR [edi-104],mm0 + psrlq mm0,4 + movq QWORD PTR 24[edi],mm3 + movq mm6,mm3 + movq QWORD PTR [ebp-112],mm1 + psrlq mm3,4 + movq QWORD PTR 16[ebp],mm4 + shl edx,4 + mov BYTE PTR 4[esp],dl + mov edx,DWORD PTR [ebx-40] + movq mm1,QWORD PTR [ebx-40] + psllq mm6,60 + movq mm4,QWORD PTR [ebx-48] + por mm0,mm6 + movq QWORD PTR [edi-96],mm2 + psrlq mm2,4 + movq QWORD PTR 32[edi],mm5 + movq mm7,mm5 + movq QWORD PTR [ebp-104],mm0 + psrlq mm5,4 + movq QWORD PTR 24[ebp],mm3 + shl edx,4 + mov BYTE PTR 5[esp],dl + mov edx,DWORD PTR [ebx-24] + movq mm0,QWORD PTR [ebx-24] + psllq mm7,60 + movq mm3,QWORD PTR [ebx-32] + por mm2,mm7 + movq QWORD PTR [edi-88],mm1 + psrlq mm1,4 + movq QWORD PTR 40[edi],mm4 + movq mm6,mm4 + movq QWORD PTR [ebp-96],mm2 + psrlq mm4,4 + movq QWORD PTR 32[ebp],mm5 + shl edx,4 + mov BYTE PTR 6[esp],dl + mov edx,DWORD PTR [ebx-8] + movq mm2,QWORD PTR [ebx-8] + psllq mm6,60 + movq mm5,QWORD PTR [ebx-16] + por mm1,mm6 + movq QWORD PTR [edi-80],mm0 + psrlq mm0,4 + movq QWORD PTR 48[edi],mm3 + movq mm7,mm3 + movq QWORD PTR [ebp-88],mm1 + psrlq mm3,4 + movq QWORD PTR 40[ebp],mm4 + shl edx,4 + mov BYTE PTR 7[esp],dl + mov edx,DWORD PTR 8[ebx] + movq mm1,QWORD PTR 8[ebx] + psllq mm7,60 + movq mm4,QWORD PTR [ebx] + por mm0,mm7 + movq QWORD PTR [edi-72],mm2 + psrlq mm2,4 + movq QWORD PTR 56[edi],mm5 + movq mm6,mm5 + movq QWORD PTR [ebp-80],mm0 + psrlq mm5,4 + movq QWORD PTR 48[ebp],mm3 + shl edx,4 + mov BYTE PTR 8[esp],dl + mov edx,DWORD PTR 24[ebx] + movq mm0,QWORD PTR 24[ebx] + psllq mm6,60 + movq mm3,QWORD PTR 16[ebx] + por mm2,mm6 + movq QWORD PTR [edi-64],mm1 + psrlq mm1,4 + movq QWORD PTR 64[edi],mm4 + movq mm7,mm4 + movq QWORD PTR [ebp-72],mm2 + psrlq mm4,4 + movq QWORD PTR 56[ebp],mm5 + shl edx,4 + mov BYTE PTR 9[esp],dl + mov edx,DWORD PTR 40[ebx] + movq mm2,QWORD PTR 40[ebx] + psllq mm7,60 + movq mm5,QWORD PTR 32[ebx] + por mm1,mm7 + movq QWORD PTR [edi-56],mm0 + psrlq mm0,4 + movq QWORD PTR 72[edi],mm3 + movq mm6,mm3 + movq QWORD PTR [ebp-64],mm1 + psrlq mm3,4 + movq QWORD PTR 64[ebp],mm4 + shl edx,4 + mov BYTE PTR 10[esp],dl + mov edx,DWORD PTR 56[ebx] + movq mm1,QWORD PTR 56[ebx] + psllq mm6,60 + movq mm4,QWORD PTR 48[ebx] + por mm0,mm6 + movq QWORD PTR [edi-48],mm2 + psrlq mm2,4 + movq QWORD PTR 80[edi],mm5 + movq mm7,mm5 + movq QWORD PTR [ebp-56],mm0 + psrlq mm5,4 + movq QWORD PTR 72[ebp],mm3 + shl edx,4 + mov BYTE PTR 11[esp],dl + mov edx,DWORD PTR 72[ebx] + movq mm0,QWORD PTR 72[ebx] + psllq mm7,60 + movq mm3,QWORD PTR 64[ebx] + por mm2,mm7 + movq QWORD PTR [edi-40],mm1 + psrlq mm1,4 + movq QWORD PTR 88[edi],mm4 + movq mm6,mm4 + movq QWORD PTR [ebp-48],mm2 + psrlq mm4,4 + movq QWORD PTR 80[ebp],mm5 + shl edx,4 + mov BYTE PTR 12[esp],dl + mov edx,DWORD PTR 88[ebx] + movq mm2,QWORD PTR 88[ebx] + psllq mm6,60 + movq mm5,QWORD PTR 80[ebx] + por mm1,mm6 + movq QWORD PTR [edi-32],mm0 + psrlq mm0,4 + movq QWORD PTR 96[edi],mm3 + movq mm7,mm3 + movq QWORD PTR [ebp-40],mm1 + psrlq mm3,4 + movq QWORD PTR 88[ebp],mm4 + shl edx,4 + mov BYTE PTR 13[esp],dl + mov edx,DWORD PTR 104[ebx] + movq mm1,QWORD PTR 104[ebx] + psllq mm7,60 + movq mm4,QWORD PTR 96[ebx] + por mm0,mm7 + movq QWORD PTR [edi-24],mm2 + psrlq mm2,4 + movq QWORD PTR 104[edi],mm5 + movq mm6,mm5 + movq QWORD PTR [ebp-32],mm0 + psrlq mm5,4 + movq QWORD PTR 96[ebp],mm3 + shl edx,4 + mov BYTE PTR 14[esp],dl + mov edx,DWORD PTR 120[ebx] + movq mm0,QWORD PTR 120[ebx] + psllq mm6,60 + movq mm3,QWORD PTR 112[ebx] + por mm2,mm6 + movq QWORD PTR [edi-16],mm1 + psrlq mm1,4 + movq QWORD PTR 112[edi],mm4 + movq mm7,mm4 + movq QWORD PTR [ebp-24],mm2 + psrlq mm4,4 + movq QWORD PTR 104[ebp],mm5 + shl edx,4 + mov BYTE PTR 15[esp],dl + psllq mm7,60 + por mm1,mm7 + movq QWORD PTR [edi-8],mm0 + psrlq mm0,4 + movq QWORD PTR 120[edi],mm3 + movq mm6,mm3 + movq QWORD PTR [ebp-16],mm1 + psrlq mm3,4 + movq QWORD PTR 112[ebp],mm4 + psllq mm6,60 + por mm0,mm6 + movq QWORD PTR [ebp-8],mm0 + movq QWORD PTR 120[ebp],mm3 + movq mm6,QWORD PTR [eax] + mov ebx,DWORD PTR 8[eax] + mov edx,DWORD PTR 12[eax] +ALIGN 16 +$L009outer: + xor edx,DWORD PTR 12[ecx] + xor ebx,DWORD PTR 8[ecx] + pxor mm6,QWORD PTR [ecx] + lea ecx,DWORD PTR 16[ecx] + mov DWORD PTR 536[esp],ebx + movq QWORD PTR 528[esp],mm6 + mov DWORD PTR 548[esp],ecx + xor eax,eax + rol edx,8 + mov al,dl + mov ebp,eax + and al,15 + shr ebp,4 + pxor mm0,mm0 + rol edx,8 + pxor mm1,mm1 + pxor mm2,mm2 + movq mm7,QWORD PTR 16[eax*8+esp] + movq mm6,QWORD PTR 144[eax*8+esp] + mov al,dl + movd ebx,mm7 + psrlq mm7,8 + movq mm3,mm6 + mov edi,eax + psrlq mm6,8 + pxor mm7,QWORD PTR 272[ebp*8+esp] + and al,15 + psllq mm3,56 + shr edi,4 + pxor mm7,QWORD PTR 16[eax*8+esp] + rol edx,8 + pxor mm6,QWORD PTR 144[eax*8+esp] + pxor mm7,mm3 + pxor mm6,QWORD PTR 400[ebp*8+esp] + xor bl,BYTE PTR [ebp*1+esp] + mov al,dl + movd ecx,mm7 + movzx ebx,bl + psrlq mm7,8 + movq mm3,mm6 + mov ebp,eax + psrlq mm6,8 + pxor mm7,QWORD PTR 272[edi*8+esp] + and al,15 + psllq mm3,56 + shr ebp,4 + pinsrw mm2,WORD PTR [ebx*2+esi],2 + pxor mm7,QWORD PTR 16[eax*8+esp] + rol edx,8 + pxor mm6,QWORD PTR 144[eax*8+esp] + pxor mm7,mm3 + pxor mm6,QWORD PTR 400[edi*8+esp] + xor cl,BYTE PTR [edi*1+esp] + mov al,dl + mov edx,DWORD PTR 536[esp] + movd ebx,mm7 + movzx ecx,cl + psrlq mm7,8 + movq mm3,mm6 + mov edi,eax + psrlq mm6,8 + pxor mm7,QWORD PTR 272[ebp*8+esp] + and al,15 + psllq mm3,56 + pxor mm6,mm2 + shr edi,4 + pinsrw mm1,WORD PTR [ecx*2+esi],2 + pxor mm7,QWORD PTR 16[eax*8+esp] + rol edx,8 + pxor mm6,QWORD PTR 144[eax*8+esp] + pxor mm7,mm3 + pxor mm6,QWORD PTR 400[ebp*8+esp] + xor bl,BYTE PTR [ebp*1+esp] + mov al,dl + movd ecx,mm7 + movzx ebx,bl + psrlq mm7,8 + movq mm3,mm6 + mov ebp,eax + psrlq mm6,8 + pxor mm7,QWORD PTR 272[edi*8+esp] + and al,15 + psllq mm3,56 + pxor mm6,mm1 + shr ebp,4 + pinsrw mm0,WORD PTR [ebx*2+esi],2 + pxor mm7,QWORD PTR 16[eax*8+esp] + rol edx,8 + pxor mm6,QWORD PTR 144[eax*8+esp] + pxor mm7,mm3 + pxor mm6,QWORD PTR 400[edi*8+esp] + xor cl,BYTE PTR [edi*1+esp] + mov al,dl + movd ebx,mm7 + movzx ecx,cl + psrlq mm7,8 + movq mm3,mm6 + mov edi,eax + psrlq mm6,8 + pxor mm7,QWORD PTR 272[ebp*8+esp] + and al,15 + psllq mm3,56 + pxor mm6,mm0 + shr edi,4 + pinsrw mm2,WORD PTR [ecx*2+esi],2 + pxor mm7,QWORD PTR 16[eax*8+esp] + rol edx,8 + pxor mm6,QWORD PTR 144[eax*8+esp] + pxor mm7,mm3 + pxor mm6,QWORD PTR 400[ebp*8+esp] + xor bl,BYTE PTR [ebp*1+esp] + mov al,dl + movd ecx,mm7 + movzx ebx,bl + psrlq mm7,8 + movq mm3,mm6 + mov ebp,eax + psrlq mm6,8 + pxor mm7,QWORD PTR 272[edi*8+esp] + and al,15 + psllq mm3,56 + pxor mm6,mm2 + shr ebp,4 + pinsrw mm1,WORD PTR [ebx*2+esi],2 + pxor mm7,QWORD PTR 16[eax*8+esp] + rol edx,8 + pxor mm6,QWORD PTR 144[eax*8+esp] + pxor mm7,mm3 + pxor mm6,QWORD PTR 400[edi*8+esp] + xor cl,BYTE PTR [edi*1+esp] + mov al,dl + mov edx,DWORD PTR 532[esp] + movd ebx,mm7 + movzx ecx,cl + psrlq mm7,8 + movq mm3,mm6 + mov edi,eax + psrlq mm6,8 + pxor mm7,QWORD PTR 272[ebp*8+esp] + and al,15 + psllq mm3,56 + pxor mm6,mm1 + shr edi,4 + pinsrw mm0,WORD PTR [ecx*2+esi],2 + pxor mm7,QWORD PTR 16[eax*8+esp] + rol edx,8 + pxor mm6,QWORD PTR 144[eax*8+esp] + pxor mm7,mm3 + pxor mm6,QWORD PTR 400[ebp*8+esp] + xor bl,BYTE PTR [ebp*1+esp] + mov al,dl + movd ecx,mm7 + movzx ebx,bl + psrlq mm7,8 + movq mm3,mm6 + mov ebp,eax + psrlq mm6,8 + pxor mm7,QWORD PTR 272[edi*8+esp] + and al,15 + psllq mm3,56 + pxor mm6,mm0 + shr ebp,4 + pinsrw mm2,WORD PTR [ebx*2+esi],2 + pxor mm7,QWORD PTR 16[eax*8+esp] + rol edx,8 + pxor mm6,QWORD PTR 144[eax*8+esp] + pxor mm7,mm3 + pxor mm6,QWORD PTR 400[edi*8+esp] + xor cl,BYTE PTR [edi*1+esp] + mov al,dl + movd ebx,mm7 + movzx ecx,cl + psrlq mm7,8 + movq mm3,mm6 + mov edi,eax + psrlq mm6,8 + pxor mm7,QWORD PTR 272[ebp*8+esp] + and al,15 + psllq mm3,56 + pxor mm6,mm2 + shr edi,4 + pinsrw mm1,WORD PTR [ecx*2+esi],2 + pxor mm7,QWORD PTR 16[eax*8+esp] + rol edx,8 + pxor mm6,QWORD PTR 144[eax*8+esp] + pxor mm7,mm3 + pxor mm6,QWORD PTR 400[ebp*8+esp] + xor bl,BYTE PTR [ebp*1+esp] + mov al,dl + movd ecx,mm7 + movzx ebx,bl + psrlq mm7,8 + movq mm3,mm6 + mov ebp,eax + psrlq mm6,8 + pxor mm7,QWORD PTR 272[edi*8+esp] + and al,15 + psllq mm3,56 + pxor mm6,mm1 + shr ebp,4 + pinsrw mm0,WORD PTR [ebx*2+esi],2 + pxor mm7,QWORD PTR 16[eax*8+esp] + rol edx,8 + pxor mm6,QWORD PTR 144[eax*8+esp] + pxor mm7,mm3 + pxor mm6,QWORD PTR 400[edi*8+esp] + xor cl,BYTE PTR [edi*1+esp] + mov al,dl + mov edx,DWORD PTR 528[esp] + movd ebx,mm7 + movzx ecx,cl + psrlq mm7,8 + movq mm3,mm6 + mov edi,eax + psrlq mm6,8 + pxor mm7,QWORD PTR 272[ebp*8+esp] + and al,15 + psllq mm3,56 + pxor mm6,mm0 + shr edi,4 + pinsrw mm2,WORD PTR [ecx*2+esi],2 + pxor mm7,QWORD PTR 16[eax*8+esp] + rol edx,8 + pxor mm6,QWORD PTR 144[eax*8+esp] + pxor mm7,mm3 + pxor mm6,QWORD PTR 400[ebp*8+esp] + xor bl,BYTE PTR [ebp*1+esp] + mov al,dl + movd ecx,mm7 + movzx ebx,bl + psrlq mm7,8 + movq mm3,mm6 + mov ebp,eax + psrlq mm6,8 + pxor mm7,QWORD PTR 272[edi*8+esp] + and al,15 + psllq mm3,56 + pxor mm6,mm2 + shr ebp,4 + pinsrw mm1,WORD PTR [ebx*2+esi],2 + pxor mm7,QWORD PTR 16[eax*8+esp] + rol edx,8 + pxor mm6,QWORD PTR 144[eax*8+esp] + pxor mm7,mm3 + pxor mm6,QWORD PTR 400[edi*8+esp] + xor cl,BYTE PTR [edi*1+esp] + mov al,dl + movd ebx,mm7 + movzx ecx,cl + psrlq mm7,8 + movq mm3,mm6 + mov edi,eax + psrlq mm6,8 + pxor mm7,QWORD PTR 272[ebp*8+esp] + and al,15 + psllq mm3,56 + pxor mm6,mm1 + shr edi,4 + pinsrw mm0,WORD PTR [ecx*2+esi],2 + pxor mm7,QWORD PTR 16[eax*8+esp] + rol edx,8 + pxor mm6,QWORD PTR 144[eax*8+esp] + pxor mm7,mm3 + pxor mm6,QWORD PTR 400[ebp*8+esp] + xor bl,BYTE PTR [ebp*1+esp] + mov al,dl + movd ecx,mm7 + movzx ebx,bl + psrlq mm7,8 + movq mm3,mm6 + mov ebp,eax + psrlq mm6,8 + pxor mm7,QWORD PTR 272[edi*8+esp] + and al,15 + psllq mm3,56 + pxor mm6,mm0 + shr ebp,4 + pinsrw mm2,WORD PTR [ebx*2+esi],2 + pxor mm7,QWORD PTR 16[eax*8+esp] + rol edx,8 + pxor mm6,QWORD PTR 144[eax*8+esp] + pxor mm7,mm3 + pxor mm6,QWORD PTR 400[edi*8+esp] + xor cl,BYTE PTR [edi*1+esp] + mov al,dl + mov edx,DWORD PTR 524[esp] + movd ebx,mm7 + movzx ecx,cl + psrlq mm7,8 + movq mm3,mm6 + mov edi,eax + psrlq mm6,8 + pxor mm7,QWORD PTR 272[ebp*8+esp] + and al,15 + psllq mm3,56 + pxor mm6,mm2 + shr edi,4 + pinsrw mm1,WORD PTR [ecx*2+esi],2 + pxor mm7,QWORD PTR 16[eax*8+esp] + pxor mm6,QWORD PTR 144[eax*8+esp] + xor bl,BYTE PTR [ebp*1+esp] + pxor mm7,mm3 + pxor mm6,QWORD PTR 400[ebp*8+esp] + movzx ebx,bl + pxor mm2,mm2 + psllq mm1,4 + movd ecx,mm7 + psrlq mm7,4 + movq mm3,mm6 + psrlq mm6,4 + shl ecx,4 + pxor mm7,QWORD PTR 16[edi*8+esp] + psllq mm3,60 + movzx ecx,cl + pxor mm7,mm3 + pxor mm6,QWORD PTR 144[edi*8+esp] + pinsrw mm0,WORD PTR [ebx*2+esi],2 + pxor mm6,mm1 + movd edx,mm7 + pinsrw mm2,WORD PTR [ecx*2+esi],3 + psllq mm0,12 + pxor mm6,mm0 + psrlq mm7,32 + pxor mm6,mm2 + mov ecx,DWORD PTR 548[esp] + movd ebx,mm7 + movq mm3,mm6 + psllw mm6,8 + psrlw mm3,8 + por mm6,mm3 + bswap edx + pshufw mm6,mm6,27 + bswap ebx + cmp ecx,DWORD PTR 552[esp] + jne $L009outer + mov eax,DWORD PTR 544[esp] + mov DWORD PTR 12[eax],edx + mov DWORD PTR 8[eax],ebx + movq QWORD PTR [eax],mm6 + mov esp,DWORD PTR 556[esp] + emms + pop edi + pop esi + pop ebx + pop ebp + ret +_gcm_ghash_4bit_mmx ENDP +ALIGN 16 +_gcm_init_clmul PROC PUBLIC +$L_gcm_init_clmul_begin:: + mov edx,DWORD PTR 4[esp] + mov eax,DWORD PTR 8[esp] + call $L010pic +$L010pic: + pop ecx + lea ecx,DWORD PTR ($Lbswap-$L010pic)[ecx] + movdqu xmm2,XMMWORD PTR [eax] + pshufd xmm2,xmm2,78 + pshufd xmm4,xmm2,255 + movdqa xmm3,xmm2 + psllq xmm2,1 + pxor xmm5,xmm5 + psrlq xmm3,63 + pcmpgtd xmm5,xmm4 + pslldq xmm3,8 + por xmm2,xmm3 + pand xmm5,XMMWORD PTR 16[ecx] + pxor xmm2,xmm5 + movdqa xmm0,xmm2 + movdqa xmm1,xmm0 + pshufd xmm3,xmm0,78 + pshufd xmm4,xmm2,78 + pxor xmm3,xmm0 + pxor xmm4,xmm2 +DB 102,15,58,68,194,0 +DB 102,15,58,68,202,17 +DB 102,15,58,68,220,0 + xorps xmm3,xmm0 + xorps xmm3,xmm1 + movdqa xmm4,xmm3 + psrldq xmm3,8 + pslldq xmm4,8 + pxor xmm1,xmm3 + pxor xmm0,xmm4 + movdqa xmm4,xmm0 + movdqa xmm3,xmm0 + psllq xmm0,5 + pxor xmm3,xmm0 + psllq xmm0,1 + pxor xmm0,xmm3 + psllq xmm0,57 + movdqa xmm3,xmm0 + pslldq xmm0,8 + psrldq xmm3,8 + pxor xmm0,xmm4 + pxor xmm1,xmm3 + movdqa xmm4,xmm0 + psrlq xmm0,1 + pxor xmm1,xmm4 + pxor xmm4,xmm0 + psrlq xmm0,5 + pxor xmm0,xmm4 + psrlq xmm0,1 + pxor xmm0,xmm1 + pshufd xmm3,xmm2,78 + pshufd xmm4,xmm0,78 + pxor xmm3,xmm2 + movdqu XMMWORD PTR [edx],xmm2 + pxor xmm4,xmm0 + movdqu XMMWORD PTR 16[edx],xmm0 +DB 102,15,58,15,227,8 + movdqu XMMWORD PTR 32[edx],xmm4 + ret +_gcm_init_clmul ENDP +ALIGN 16 +_gcm_gmult_clmul PROC PUBLIC +$L_gcm_gmult_clmul_begin:: + mov eax,DWORD PTR 4[esp] + mov edx,DWORD PTR 8[esp] + call $L011pic +$L011pic: + pop ecx + lea ecx,DWORD PTR ($Lbswap-$L011pic)[ecx] + movdqu xmm0,XMMWORD PTR [eax] + movdqa xmm5,XMMWORD PTR [ecx] + movups xmm2,XMMWORD PTR [edx] +DB 102,15,56,0,197 + movups xmm4,XMMWORD PTR 32[edx] + movdqa xmm1,xmm0 + pshufd xmm3,xmm0,78 + pxor xmm3,xmm0 +DB 102,15,58,68,194,0 +DB 102,15,58,68,202,17 +DB 102,15,58,68,220,0 + xorps xmm3,xmm0 + xorps xmm3,xmm1 + movdqa xmm4,xmm3 + psrldq xmm3,8 + pslldq xmm4,8 + pxor xmm1,xmm3 + pxor xmm0,xmm4 + movdqa xmm4,xmm0 + movdqa xmm3,xmm0 + psllq xmm0,5 + pxor xmm3,xmm0 + psllq xmm0,1 + pxor xmm0,xmm3 + psllq xmm0,57 + movdqa xmm3,xmm0 + pslldq xmm0,8 + psrldq xmm3,8 + pxor xmm0,xmm4 + pxor xmm1,xmm3 + movdqa xmm4,xmm0 + psrlq xmm0,1 + pxor xmm1,xmm4 + pxor xmm4,xmm0 + psrlq xmm0,5 + pxor xmm0,xmm4 + psrlq xmm0,1 + pxor xmm0,xmm1 +DB 102,15,56,0,197 + movdqu XMMWORD PTR [eax],xmm0 + ret +_gcm_gmult_clmul ENDP +ALIGN 16 +_gcm_ghash_clmul PROC PUBLIC +$L_gcm_ghash_clmul_begin:: + push ebp + push ebx + push esi + push edi + mov eax,DWORD PTR 20[esp] + mov edx,DWORD PTR 24[esp] + mov esi,DWORD PTR 28[esp] + mov ebx,DWORD PTR 32[esp] + call $L012pic +$L012pic: + pop ecx + lea ecx,DWORD PTR ($Lbswap-$L012pic)[ecx] + movdqu xmm0,XMMWORD PTR [eax] + movdqa xmm5,XMMWORD PTR [ecx] + movdqu xmm2,XMMWORD PTR [edx] +DB 102,15,56,0,197 + sub ebx,16 + jz $L013odd_tail + movdqu xmm3,XMMWORD PTR [esi] + movdqu xmm6,XMMWORD PTR 16[esi] +DB 102,15,56,0,221 +DB 102,15,56,0,245 + movdqu xmm5,XMMWORD PTR 32[edx] + pxor xmm0,xmm3 + pshufd xmm3,xmm6,78 + movdqa xmm7,xmm6 + pxor xmm3,xmm6 + lea esi,DWORD PTR 32[esi] +DB 102,15,58,68,242,0 +DB 102,15,58,68,250,17 +DB 102,15,58,68,221,0 + movups xmm2,XMMWORD PTR 16[edx] + nop + sub ebx,32 + jbe $L014even_tail + jmp $L015mod_loop +ALIGN 32 +$L015mod_loop: + pshufd xmm4,xmm0,78 + movdqa xmm1,xmm0 + pxor xmm4,xmm0 + nop +DB 102,15,58,68,194,0 +DB 102,15,58,68,202,17 +DB 102,15,58,68,229,16 + movups xmm2,XMMWORD PTR [edx] + xorps xmm0,xmm6 + movdqa xmm5,XMMWORD PTR [ecx] + xorps xmm1,xmm7 + movdqu xmm7,XMMWORD PTR [esi] + pxor xmm3,xmm0 + movdqu xmm6,XMMWORD PTR 16[esi] + pxor xmm3,xmm1 +DB 102,15,56,0,253 + pxor xmm4,xmm3 + movdqa xmm3,xmm4 + psrldq xmm4,8 + pslldq xmm3,8 + pxor xmm1,xmm4 + pxor xmm0,xmm3 +DB 102,15,56,0,245 + pxor xmm1,xmm7 + movdqa xmm7,xmm6 + movdqa xmm4,xmm0 + movdqa xmm3,xmm0 + psllq xmm0,5 + pxor xmm3,xmm0 + psllq xmm0,1 + pxor xmm0,xmm3 +DB 102,15,58,68,242,0 + movups xmm5,XMMWORD PTR 32[edx] + psllq xmm0,57 + movdqa xmm3,xmm0 + pslldq xmm0,8 + psrldq xmm3,8 + pxor xmm0,xmm4 + pxor xmm1,xmm3 + pshufd xmm3,xmm7,78 + movdqa xmm4,xmm0 + psrlq xmm0,1 + pxor xmm3,xmm7 + pxor xmm1,xmm4 +DB 102,15,58,68,250,17 + movups xmm2,XMMWORD PTR 16[edx] + pxor xmm4,xmm0 + psrlq xmm0,5 + pxor xmm0,xmm4 + psrlq xmm0,1 + pxor xmm0,xmm1 +DB 102,15,58,68,221,0 + lea esi,DWORD PTR 32[esi] + sub ebx,32 + ja $L015mod_loop +$L014even_tail: + pshufd xmm4,xmm0,78 + movdqa xmm1,xmm0 + pxor xmm4,xmm0 +DB 102,15,58,68,194,0 +DB 102,15,58,68,202,17 +DB 102,15,58,68,229,16 + movdqa xmm5,XMMWORD PTR [ecx] + xorps xmm0,xmm6 + xorps xmm1,xmm7 + pxor xmm3,xmm0 + pxor xmm3,xmm1 + pxor xmm4,xmm3 + movdqa xmm3,xmm4 + psrldq xmm4,8 + pslldq xmm3,8 + pxor xmm1,xmm4 + pxor xmm0,xmm3 + movdqa xmm4,xmm0 + movdqa xmm3,xmm0 + psllq xmm0,5 + pxor xmm3,xmm0 + psllq xmm0,1 + pxor xmm0,xmm3 + psllq xmm0,57 + movdqa xmm3,xmm0 + pslldq xmm0,8 + psrldq xmm3,8 + pxor xmm0,xmm4 + pxor xmm1,xmm3 + movdqa xmm4,xmm0 + psrlq xmm0,1 + pxor xmm1,xmm4 + pxor xmm4,xmm0 + psrlq xmm0,5 + pxor xmm0,xmm4 + psrlq xmm0,1 + pxor xmm0,xmm1 + test ebx,ebx + jnz $L016done + movups xmm2,XMMWORD PTR [edx] +$L013odd_tail: + movdqu xmm3,XMMWORD PTR [esi] +DB 102,15,56,0,221 + pxor xmm0,xmm3 + movdqa xmm1,xmm0 + pshufd xmm3,xmm0,78 + pshufd xmm4,xmm2,78 + pxor xmm3,xmm0 + pxor xmm4,xmm2 +DB 102,15,58,68,194,0 +DB 102,15,58,68,202,17 +DB 102,15,58,68,220,0 + xorps xmm3,xmm0 + xorps xmm3,xmm1 + movdqa xmm4,xmm3 + psrldq xmm3,8 + pslldq xmm4,8 + pxor xmm1,xmm3 + pxor xmm0,xmm4 + movdqa xmm4,xmm0 + movdqa xmm3,xmm0 + psllq xmm0,5 + pxor xmm3,xmm0 + psllq xmm0,1 + pxor xmm0,xmm3 + psllq xmm0,57 + movdqa xmm3,xmm0 + pslldq xmm0,8 + psrldq xmm3,8 + pxor xmm0,xmm4 + pxor xmm1,xmm3 + movdqa xmm4,xmm0 + psrlq xmm0,1 + pxor xmm1,xmm4 + pxor xmm4,xmm0 + psrlq xmm0,5 + pxor xmm0,xmm4 + psrlq xmm0,1 + pxor xmm0,xmm1 +$L016done: +DB 102,15,56,0,197 + movdqu XMMWORD PTR [eax],xmm0 + pop edi + pop esi + pop ebx + pop ebp + ret +_gcm_ghash_clmul ENDP +ALIGN 64 +$Lbswap:: +DB 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0 +DB 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,194 +ALIGN 64 +$Lrem_8bit:: +DW 0,450,900,582,1800,1738,1164,1358 +DW 3600,4050,3476,3158,2328,2266,2716,2910 +DW 7200,7650,8100,7782,6952,6890,6316,6510 +DW 4656,5106,4532,4214,5432,5370,5820,6014 +DW 14400,14722,15300,14854,16200,16010,15564,15630 +DW 13904,14226,13780,13334,12632,12442,13020,13086 +DW 9312,9634,10212,9766,9064,8874,8428,8494 +DW 10864,11186,10740,10294,11640,11450,12028,12094 +DW 28800,28994,29444,29382,30600,30282,29708,30158 +DW 32400,32594,32020,31958,31128,30810,31260,31710 +DW 27808,28002,28452,28390,27560,27242,26668,27118 +DW 25264,25458,24884,24822,26040,25722,26172,26622 +DW 18624,18690,19268,19078,20424,19978,19532,19854 +DW 18128,18194,17748,17558,16856,16410,16988,17310 +DW 21728,21794,22372,22182,21480,21034,20588,20910 +DW 23280,23346,22900,22710,24056,23610,24188,24510 +DW 57600,57538,57988,58182,58888,59338,58764,58446 +DW 61200,61138,60564,60758,59416,59866,60316,59998 +DW 64800,64738,65188,65382,64040,64490,63916,63598 +DW 62256,62194,61620,61814,62520,62970,63420,63102 +DW 55616,55426,56004,56070,56904,57226,56780,56334 +DW 55120,54930,54484,54550,53336,53658,54236,53790 +DW 50528,50338,50916,50982,49768,50090,49644,49198 +DW 52080,51890,51444,51510,52344,52666,53244,52798 +DW 37248,36930,37380,37830,38536,38730,38156,38094 +DW 40848,40530,39956,40406,39064,39258,39708,39646 +DW 36256,35938,36388,36838,35496,35690,35116,35054 +DW 33712,33394,32820,33270,33976,34170,34620,34558 +DW 43456,43010,43588,43910,44744,44810,44364,44174 +DW 42960,42514,42068,42390,41176,41242,41820,41630 +DW 46560,46114,46692,47014,45800,45866,45420,45230 +DW 48112,47666,47220,47542,48376,48442,49020,48830 +ALIGN 64 +$Lrem_4bit:: +DD 0,0,0,471859200 +DD 0,943718400,0,610271232 +DD 0,1887436800,0,1822425088 +DD 0,1220542464,0,1423966208 +DD 0,3774873600,0,4246732800 +DD 0,3644850176,0,3311403008 +DD 0,2441084928,0,2376073216 +DD 0,2847932416,0,3051356160 +DB 71,72,65,83,72,32,102,111,114,32,120,56,54,44,32,67 +DB 82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112 +DB 112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62 +DB 0 +.text$ ENDS +END diff --git a/deps/openssl/asm_obsolete/x86-win32-masm/rc4/rc4-586.asm b/deps/openssl/asm_obsolete/x86-win32-masm/rc4/rc4-586.asm new file mode 100644 index 00000000000000..90ab38ab8a1f0a --- /dev/null +++ b/deps/openssl/asm_obsolete/x86-win32-masm/rc4/rc4-586.asm @@ -0,0 +1,388 @@ +TITLE rc4-586.asm +IF @Version LT 800 +ECHO MASM version 8.00 or later is strongly recommended. +ENDIF +.686 +.XMM +IF @Version LT 800 +XMMWORD STRUCT 16 +DQ 2 dup (?) +XMMWORD ENDS +ENDIF + +.MODEL FLAT +OPTION DOTNAME +IF @Version LT 800 +.text$ SEGMENT PAGE 'CODE' +ELSE +.text$ SEGMENT ALIGN(64) 'CODE' +ENDIF +;EXTERN _OPENSSL_ia32cap_P:NEAR +ALIGN 16 +_RC4 PROC PUBLIC +$L_RC4_begin:: + push ebp + push ebx + push esi + push edi + mov edi,DWORD PTR 20[esp] + mov edx,DWORD PTR 24[esp] + mov esi,DWORD PTR 28[esp] + mov ebp,DWORD PTR 32[esp] + xor eax,eax + xor ebx,ebx + cmp edx,0 + je $L000abort + mov al,BYTE PTR [edi] + mov bl,BYTE PTR 4[edi] + add edi,8 + lea ecx,DWORD PTR [edx*1+esi] + sub ebp,esi + mov DWORD PTR 24[esp],ecx + inc al + cmp DWORD PTR 256[edi],-1 + je $L001RC4_CHAR + mov ecx,DWORD PTR [eax*4+edi] + and edx,-4 + jz $L002loop1 + mov DWORD PTR 32[esp],ebp + test edx,-8 + jz $L003go4loop4 + lea ebp,DWORD PTR _OPENSSL_ia32cap_P + bt DWORD PTR [ebp],26 + jnc $L003go4loop4 + mov ebp,DWORD PTR 32[esp] + and edx,-8 + lea edx,DWORD PTR [edx*1+esi-8] + mov DWORD PTR [edi-4],edx + add bl,cl + mov edx,DWORD PTR [ebx*4+edi] + mov DWORD PTR [ebx*4+edi],ecx + mov DWORD PTR [eax*4+edi],edx + inc eax + add edx,ecx + movzx eax,al + movzx edx,dl + movq mm0,QWORD PTR [esi] + mov ecx,DWORD PTR [eax*4+edi] + movd mm2,DWORD PTR [edx*4+edi] + jmp $L004loop_mmx_enter +ALIGN 16 +$L005loop_mmx: + add bl,cl + psllq mm1,56 + mov edx,DWORD PTR [ebx*4+edi] + mov DWORD PTR [ebx*4+edi],ecx + mov DWORD PTR [eax*4+edi],edx + inc eax + add edx,ecx + movzx eax,al + movzx edx,dl + pxor mm2,mm1 + movq mm0,QWORD PTR [esi] + movq QWORD PTR [esi*1+ebp-8],mm2 + mov ecx,DWORD PTR [eax*4+edi] + movd mm2,DWORD PTR [edx*4+edi] +$L004loop_mmx_enter: + add bl,cl + mov edx,DWORD PTR [ebx*4+edi] + mov DWORD PTR [ebx*4+edi],ecx + mov DWORD PTR [eax*4+edi],edx + inc eax + add edx,ecx + movzx eax,al + movzx edx,dl + pxor mm2,mm0 + mov ecx,DWORD PTR [eax*4+edi] + movd mm1,DWORD PTR [edx*4+edi] + add bl,cl + psllq mm1,8 + mov edx,DWORD PTR [ebx*4+edi] + mov DWORD PTR [ebx*4+edi],ecx + mov DWORD PTR [eax*4+edi],edx + inc eax + add edx,ecx + movzx eax,al + movzx edx,dl + pxor mm2,mm1 + mov ecx,DWORD PTR [eax*4+edi] + movd mm1,DWORD PTR [edx*4+edi] + add bl,cl + psllq mm1,16 + mov edx,DWORD PTR [ebx*4+edi] + mov DWORD PTR [ebx*4+edi],ecx + mov DWORD PTR [eax*4+edi],edx + inc eax + add edx,ecx + movzx eax,al + movzx edx,dl + pxor mm2,mm1 + mov ecx,DWORD PTR [eax*4+edi] + movd mm1,DWORD PTR [edx*4+edi] + add bl,cl + psllq mm1,24 + mov edx,DWORD PTR [ebx*4+edi] + mov DWORD PTR [ebx*4+edi],ecx + mov DWORD PTR [eax*4+edi],edx + inc eax + add edx,ecx + movzx eax,al + movzx edx,dl + pxor mm2,mm1 + mov ecx,DWORD PTR [eax*4+edi] + movd mm1,DWORD PTR [edx*4+edi] + add bl,cl + psllq mm1,32 + mov edx,DWORD PTR [ebx*4+edi] + mov DWORD PTR [ebx*4+edi],ecx + mov DWORD PTR [eax*4+edi],edx + inc eax + add edx,ecx + movzx eax,al + movzx edx,dl + pxor mm2,mm1 + mov ecx,DWORD PTR [eax*4+edi] + movd mm1,DWORD PTR [edx*4+edi] + add bl,cl + psllq mm1,40 + mov edx,DWORD PTR [ebx*4+edi] + mov DWORD PTR [ebx*4+edi],ecx + mov DWORD PTR [eax*4+edi],edx + inc eax + add edx,ecx + movzx eax,al + movzx edx,dl + pxor mm2,mm1 + mov ecx,DWORD PTR [eax*4+edi] + movd mm1,DWORD PTR [edx*4+edi] + add bl,cl + psllq mm1,48 + mov edx,DWORD PTR [ebx*4+edi] + mov DWORD PTR [ebx*4+edi],ecx + mov DWORD PTR [eax*4+edi],edx + inc eax + add edx,ecx + movzx eax,al + movzx edx,dl + pxor mm2,mm1 + mov ecx,DWORD PTR [eax*4+edi] + movd mm1,DWORD PTR [edx*4+edi] + mov edx,ebx + xor ebx,ebx + mov bl,dl + cmp esi,DWORD PTR [edi-4] + lea esi,DWORD PTR 8[esi] + jb $L005loop_mmx + psllq mm1,56 + pxor mm2,mm1 + movq QWORD PTR [esi*1+ebp-8],mm2 + emms + cmp esi,DWORD PTR 24[esp] + je $L006done + jmp $L002loop1 +ALIGN 16 +$L003go4loop4: + lea edx,DWORD PTR [edx*1+esi-4] + mov DWORD PTR 28[esp],edx +$L007loop4: + add bl,cl + mov edx,DWORD PTR [ebx*4+edi] + mov DWORD PTR [ebx*4+edi],ecx + mov DWORD PTR [eax*4+edi],edx + add edx,ecx + inc al + and edx,255 + mov ecx,DWORD PTR [eax*4+edi] + mov ebp,DWORD PTR [edx*4+edi] + add bl,cl + mov edx,DWORD PTR [ebx*4+edi] + mov DWORD PTR [ebx*4+edi],ecx + mov DWORD PTR [eax*4+edi],edx + add edx,ecx + inc al + and edx,255 + ror ebp,8 + mov ecx,DWORD PTR [eax*4+edi] + or ebp,DWORD PTR [edx*4+edi] + add bl,cl + mov edx,DWORD PTR [ebx*4+edi] + mov DWORD PTR [ebx*4+edi],ecx + mov DWORD PTR [eax*4+edi],edx + add edx,ecx + inc al + and edx,255 + ror ebp,8 + mov ecx,DWORD PTR [eax*4+edi] + or ebp,DWORD PTR [edx*4+edi] + add bl,cl + mov edx,DWORD PTR [ebx*4+edi] + mov DWORD PTR [ebx*4+edi],ecx + mov DWORD PTR [eax*4+edi],edx + add edx,ecx + inc al + and edx,255 + ror ebp,8 + mov ecx,DWORD PTR 32[esp] + or ebp,DWORD PTR [edx*4+edi] + ror ebp,8 + xor ebp,DWORD PTR [esi] + cmp esi,DWORD PTR 28[esp] + mov DWORD PTR [esi*1+ecx],ebp + lea esi,DWORD PTR 4[esi] + mov ecx,DWORD PTR [eax*4+edi] + jb $L007loop4 + cmp esi,DWORD PTR 24[esp] + je $L006done + mov ebp,DWORD PTR 32[esp] +ALIGN 16 +$L002loop1: + add bl,cl + mov edx,DWORD PTR [ebx*4+edi] + mov DWORD PTR [ebx*4+edi],ecx + mov DWORD PTR [eax*4+edi],edx + add edx,ecx + inc al + and edx,255 + mov edx,DWORD PTR [edx*4+edi] + xor dl,BYTE PTR [esi] + lea esi,DWORD PTR 1[esi] + mov ecx,DWORD PTR [eax*4+edi] + cmp esi,DWORD PTR 24[esp] + mov BYTE PTR [esi*1+ebp-1],dl + jb $L002loop1 + jmp $L006done +ALIGN 16 +$L001RC4_CHAR: + movzx ecx,BYTE PTR [eax*1+edi] +$L008cloop1: + add bl,cl + movzx edx,BYTE PTR [ebx*1+edi] + mov BYTE PTR [ebx*1+edi],cl + mov BYTE PTR [eax*1+edi],dl + add dl,cl + movzx edx,BYTE PTR [edx*1+edi] + add al,1 + xor dl,BYTE PTR [esi] + lea esi,DWORD PTR 1[esi] + movzx ecx,BYTE PTR [eax*1+edi] + cmp esi,DWORD PTR 24[esp] + mov BYTE PTR [esi*1+ebp-1],dl + jb $L008cloop1 +$L006done: + dec al + mov DWORD PTR [edi-4],ebx + mov BYTE PTR [edi-8],al +$L000abort: + pop edi + pop esi + pop ebx + pop ebp + ret +_RC4 ENDP +ALIGN 16 +_private_RC4_set_key PROC PUBLIC +$L_private_RC4_set_key_begin:: + push ebp + push ebx + push esi + push edi + mov edi,DWORD PTR 20[esp] + mov ebp,DWORD PTR 24[esp] + mov esi,DWORD PTR 28[esp] + lea edx,DWORD PTR _OPENSSL_ia32cap_P + lea edi,DWORD PTR 8[edi] + lea esi,DWORD PTR [ebp*1+esi] + neg ebp + xor eax,eax + mov DWORD PTR [edi-4],ebp + bt DWORD PTR [edx],20 + jc $L009c1stloop +ALIGN 16 +$L010w1stloop: + mov DWORD PTR [eax*4+edi],eax + add al,1 + jnc $L010w1stloop + xor ecx,ecx + xor edx,edx +ALIGN 16 +$L011w2ndloop: + mov eax,DWORD PTR [ecx*4+edi] + add dl,BYTE PTR [ebp*1+esi] + add dl,al + add ebp,1 + mov ebx,DWORD PTR [edx*4+edi] + jnz $L012wnowrap + mov ebp,DWORD PTR [edi-4] +$L012wnowrap: + mov DWORD PTR [edx*4+edi],eax + mov DWORD PTR [ecx*4+edi],ebx + add cl,1 + jnc $L011w2ndloop + jmp $L013exit +ALIGN 16 +$L009c1stloop: + mov BYTE PTR [eax*1+edi],al + add al,1 + jnc $L009c1stloop + xor ecx,ecx + xor edx,edx + xor ebx,ebx +ALIGN 16 +$L014c2ndloop: + mov al,BYTE PTR [ecx*1+edi] + add dl,BYTE PTR [ebp*1+esi] + add dl,al + add ebp,1 + mov bl,BYTE PTR [edx*1+edi] + jnz $L015cnowrap + mov ebp,DWORD PTR [edi-4] +$L015cnowrap: + mov BYTE PTR [edx*1+edi],al + mov BYTE PTR [ecx*1+edi],bl + add cl,1 + jnc $L014c2ndloop + mov DWORD PTR 256[edi],-1 +$L013exit: + xor eax,eax + mov DWORD PTR [edi-8],eax + mov DWORD PTR [edi-4],eax + pop edi + pop esi + pop ebx + pop ebp + ret +_private_RC4_set_key ENDP +ALIGN 16 +_RC4_options PROC PUBLIC +$L_RC4_options_begin:: + call $L016pic_point +$L016pic_point: + pop eax + lea eax,DWORD PTR ($L017opts-$L016pic_point)[eax] + lea edx,DWORD PTR _OPENSSL_ia32cap_P + mov edx,DWORD PTR [edx] + bt edx,20 + jc $L0181xchar + bt edx,26 + jnc $L019ret + add eax,25 + ret +$L0181xchar: + add eax,12 +$L019ret: + ret +ALIGN 64 +$L017opts: +DB 114,99,52,40,52,120,44,105,110,116,41,0 +DB 114,99,52,40,49,120,44,99,104,97,114,41,0 +DB 114,99,52,40,56,120,44,109,109,120,41,0 +DB 82,67,52,32,102,111,114,32,120,56,54,44,32,67,82,89 +DB 80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114 +DB 111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 +ALIGN 64 +_RC4_options ENDP +.text$ ENDS +.bss SEGMENT 'BSS' +COMM _OPENSSL_ia32cap_P:DWORD:4 +.bss ENDS +END diff --git a/deps/openssl/asm_obsolete/x86-win32-masm/ripemd/rmd-586.asm b/deps/openssl/asm_obsolete/x86-win32-masm/ripemd/rmd-586.asm new file mode 100644 index 00000000000000..9bcd60a0b9120c --- /dev/null +++ b/deps/openssl/asm_obsolete/x86-win32-masm/ripemd/rmd-586.asm @@ -0,0 +1,1975 @@ +TITLE ../openssl/crypto/ripemd/asm/rmd-586.asm +IF @Version LT 800 +ECHO MASM version 8.00 or later is strongly recommended. +ENDIF +.686 +.MODEL FLAT +OPTION DOTNAME +IF @Version LT 800 +.text$ SEGMENT PAGE 'CODE' +ELSE +.text$ SEGMENT ALIGN(64) 'CODE' +ENDIF +ALIGN 16 +_ripemd160_block_asm_data_order PROC PUBLIC +$L_ripemd160_block_asm_data_order_begin:: + mov edx,DWORD PTR 4[esp] + mov eax,DWORD PTR 8[esp] + push esi + mov ecx,DWORD PTR [edx] + push edi + mov esi,DWORD PTR 4[edx] + push ebp + mov edi,DWORD PTR 8[edx] + push ebx + sub esp,108 +$L000start: + ; + mov ebx,DWORD PTR [eax] + mov ebp,DWORD PTR 4[eax] + mov DWORD PTR [esp],ebx + mov DWORD PTR 4[esp],ebp + mov ebx,DWORD PTR 8[eax] + mov ebp,DWORD PTR 12[eax] + mov DWORD PTR 8[esp],ebx + mov DWORD PTR 12[esp],ebp + mov ebx,DWORD PTR 16[eax] + mov ebp,DWORD PTR 20[eax] + mov DWORD PTR 16[esp],ebx + mov DWORD PTR 20[esp],ebp + mov ebx,DWORD PTR 24[eax] + mov ebp,DWORD PTR 28[eax] + mov DWORD PTR 24[esp],ebx + mov DWORD PTR 28[esp],ebp + mov ebx,DWORD PTR 32[eax] + mov ebp,DWORD PTR 36[eax] + mov DWORD PTR 32[esp],ebx + mov DWORD PTR 36[esp],ebp + mov ebx,DWORD PTR 40[eax] + mov ebp,DWORD PTR 44[eax] + mov DWORD PTR 40[esp],ebx + mov DWORD PTR 44[esp],ebp + mov ebx,DWORD PTR 48[eax] + mov ebp,DWORD PTR 52[eax] + mov DWORD PTR 48[esp],ebx + mov DWORD PTR 52[esp],ebp + mov ebx,DWORD PTR 56[eax] + mov ebp,DWORD PTR 60[eax] + mov DWORD PTR 56[esp],ebx + mov DWORD PTR 60[esp],ebp + mov eax,edi + mov ebx,DWORD PTR 12[edx] + mov ebp,DWORD PTR 16[edx] + ; 0 + xor eax,ebx + mov edx,DWORD PTR [esp] + xor eax,esi + add ecx,edx + rol edi,10 + add ecx,eax + mov eax,esi + rol ecx,11 + add ecx,ebp + ; 1 + xor eax,edi + mov edx,DWORD PTR 4[esp] + xor eax,ecx + add ebp,eax + mov eax,ecx + rol esi,10 + add ebp,edx + xor eax,esi + rol ebp,14 + add ebp,ebx + ; 2 + mov edx,DWORD PTR 8[esp] + xor eax,ebp + add ebx,edx + rol ecx,10 + add ebx,eax + mov eax,ebp + rol ebx,15 + add ebx,edi + ; 3 + xor eax,ecx + mov edx,DWORD PTR 12[esp] + xor eax,ebx + add edi,eax + mov eax,ebx + rol ebp,10 + add edi,edx + xor eax,ebp + rol edi,12 + add edi,esi + ; 4 + mov edx,DWORD PTR 16[esp] + xor eax,edi + add esi,edx + rol ebx,10 + add esi,eax + mov eax,edi + rol esi,5 + add esi,ecx + ; 5 + xor eax,ebx + mov edx,DWORD PTR 20[esp] + xor eax,esi + add ecx,eax + mov eax,esi + rol edi,10 + add ecx,edx + xor eax,edi + rol ecx,8 + add ecx,ebp + ; 6 + mov edx,DWORD PTR 24[esp] + xor eax,ecx + add ebp,edx + rol esi,10 + add ebp,eax + mov eax,ecx + rol ebp,7 + add ebp,ebx + ; 7 + xor eax,esi + mov edx,DWORD PTR 28[esp] + xor eax,ebp + add ebx,eax + mov eax,ebp + rol ecx,10 + add ebx,edx + xor eax,ecx + rol ebx,9 + add ebx,edi + ; 8 + mov edx,DWORD PTR 32[esp] + xor eax,ebx + add edi,edx + rol ebp,10 + add edi,eax + mov eax,ebx + rol edi,11 + add edi,esi + ; 9 + xor eax,ebp + mov edx,DWORD PTR 36[esp] + xor eax,edi + add esi,eax + mov eax,edi + rol ebx,10 + add esi,edx + xor eax,ebx + rol esi,13 + add esi,ecx + ; 10 + mov edx,DWORD PTR 40[esp] + xor eax,esi + add ecx,edx + rol edi,10 + add ecx,eax + mov eax,esi + rol ecx,14 + add ecx,ebp + ; 11 + xor eax,edi + mov edx,DWORD PTR 44[esp] + xor eax,ecx + add ebp,eax + mov eax,ecx + rol esi,10 + add ebp,edx + xor eax,esi + rol ebp,15 + add ebp,ebx + ; 12 + mov edx,DWORD PTR 48[esp] + xor eax,ebp + add ebx,edx + rol ecx,10 + add ebx,eax + mov eax,ebp + rol ebx,6 + add ebx,edi + ; 13 + xor eax,ecx + mov edx,DWORD PTR 52[esp] + xor eax,ebx + add edi,eax + mov eax,ebx + rol ebp,10 + add edi,edx + xor eax,ebp + rol edi,7 + add edi,esi + ; 14 + mov edx,DWORD PTR 56[esp] + xor eax,edi + add esi,edx + rol ebx,10 + add esi,eax + mov eax,edi + rol esi,9 + add esi,ecx + ; 15 + xor eax,ebx + mov edx,DWORD PTR 60[esp] + xor eax,esi + add ecx,eax + mov eax,-1 + rol edi,10 + add ecx,edx + mov edx,DWORD PTR 28[esp] + rol ecx,8 + add ecx,ebp + ; 16 + add ebp,edx + mov edx,esi + sub eax,ecx + and edx,ecx + and eax,edi + or edx,eax + mov eax,DWORD PTR 16[esp] + rol esi,10 + lea ebp,DWORD PTR 1518500249[edx*1+ebp] + mov edx,-1 + rol ebp,7 + add ebp,ebx + ; 17 + add ebx,eax + mov eax,ecx + sub edx,ebp + and eax,ebp + and edx,esi + or eax,edx + mov edx,DWORD PTR 52[esp] + rol ecx,10 + lea ebx,DWORD PTR 1518500249[eax*1+ebx] + mov eax,-1 + rol ebx,6 + add ebx,edi + ; 18 + add edi,edx + mov edx,ebp + sub eax,ebx + and edx,ebx + and eax,ecx + or edx,eax + mov eax,DWORD PTR 4[esp] + rol ebp,10 + lea edi,DWORD PTR 1518500249[edx*1+edi] + mov edx,-1 + rol edi,8 + add edi,esi + ; 19 + add esi,eax + mov eax,ebx + sub edx,edi + and eax,edi + and edx,ebp + or eax,edx + mov edx,DWORD PTR 40[esp] + rol ebx,10 + lea esi,DWORD PTR 1518500249[eax*1+esi] + mov eax,-1 + rol esi,13 + add esi,ecx + ; 20 + add ecx,edx + mov edx,edi + sub eax,esi + and edx,esi + and eax,ebx + or edx,eax + mov eax,DWORD PTR 24[esp] + rol edi,10 + lea ecx,DWORD PTR 1518500249[edx*1+ecx] + mov edx,-1 + rol ecx,11 + add ecx,ebp + ; 21 + add ebp,eax + mov eax,esi + sub edx,ecx + and eax,ecx + and edx,edi + or eax,edx + mov edx,DWORD PTR 60[esp] + rol esi,10 + lea ebp,DWORD PTR 1518500249[eax*1+ebp] + mov eax,-1 + rol ebp,9 + add ebp,ebx + ; 22 + add ebx,edx + mov edx,ecx + sub eax,ebp + and edx,ebp + and eax,esi + or edx,eax + mov eax,DWORD PTR 12[esp] + rol ecx,10 + lea ebx,DWORD PTR 1518500249[edx*1+ebx] + mov edx,-1 + rol ebx,7 + add ebx,edi + ; 23 + add edi,eax + mov eax,ebp + sub edx,ebx + and eax,ebx + and edx,ecx + or eax,edx + mov edx,DWORD PTR 48[esp] + rol ebp,10 + lea edi,DWORD PTR 1518500249[eax*1+edi] + mov eax,-1 + rol edi,15 + add edi,esi + ; 24 + add esi,edx + mov edx,ebx + sub eax,edi + and edx,edi + and eax,ebp + or edx,eax + mov eax,DWORD PTR [esp] + rol ebx,10 + lea esi,DWORD PTR 1518500249[edx*1+esi] + mov edx,-1 + rol esi,7 + add esi,ecx + ; 25 + add ecx,eax + mov eax,edi + sub edx,esi + and eax,esi + and edx,ebx + or eax,edx + mov edx,DWORD PTR 36[esp] + rol edi,10 + lea ecx,DWORD PTR 1518500249[eax*1+ecx] + mov eax,-1 + rol ecx,12 + add ecx,ebp + ; 26 + add ebp,edx + mov edx,esi + sub eax,ecx + and edx,ecx + and eax,edi + or edx,eax + mov eax,DWORD PTR 20[esp] + rol esi,10 + lea ebp,DWORD PTR 1518500249[edx*1+ebp] + mov edx,-1 + rol ebp,15 + add ebp,ebx + ; 27 + add ebx,eax + mov eax,ecx + sub edx,ebp + and eax,ebp + and edx,esi + or eax,edx + mov edx,DWORD PTR 8[esp] + rol ecx,10 + lea ebx,DWORD PTR 1518500249[eax*1+ebx] + mov eax,-1 + rol ebx,9 + add ebx,edi + ; 28 + add edi,edx + mov edx,ebp + sub eax,ebx + and edx,ebx + and eax,ecx + or edx,eax + mov eax,DWORD PTR 56[esp] + rol ebp,10 + lea edi,DWORD PTR 1518500249[edx*1+edi] + mov edx,-1 + rol edi,11 + add edi,esi + ; 29 + add esi,eax + mov eax,ebx + sub edx,edi + and eax,edi + and edx,ebp + or eax,edx + mov edx,DWORD PTR 44[esp] + rol ebx,10 + lea esi,DWORD PTR 1518500249[eax*1+esi] + mov eax,-1 + rol esi,7 + add esi,ecx + ; 30 + add ecx,edx + mov edx,edi + sub eax,esi + and edx,esi + and eax,ebx + or edx,eax + mov eax,DWORD PTR 32[esp] + rol edi,10 + lea ecx,DWORD PTR 1518500249[edx*1+ecx] + mov edx,-1 + rol ecx,13 + add ecx,ebp + ; 31 + add ebp,eax + mov eax,esi + sub edx,ecx + and eax,ecx + and edx,edi + or eax,edx + mov edx,-1 + rol esi,10 + lea ebp,DWORD PTR 1518500249[eax*1+ebp] + sub edx,ecx + rol ebp,12 + add ebp,ebx + ; 32 + mov eax,DWORD PTR 12[esp] + or edx,ebp + add ebx,eax + xor edx,esi + mov eax,-1 + rol ecx,10 + lea ebx,DWORD PTR 1859775393[edx*1+ebx] + sub eax,ebp + rol ebx,11 + add ebx,edi + ; 33 + mov edx,DWORD PTR 40[esp] + or eax,ebx + add edi,edx + xor eax,ecx + mov edx,-1 + rol ebp,10 + lea edi,DWORD PTR 1859775393[eax*1+edi] + sub edx,ebx + rol edi,13 + add edi,esi + ; 34 + mov eax,DWORD PTR 56[esp] + or edx,edi + add esi,eax + xor edx,ebp + mov eax,-1 + rol ebx,10 + lea esi,DWORD PTR 1859775393[edx*1+esi] + sub eax,edi + rol esi,6 + add esi,ecx + ; 35 + mov edx,DWORD PTR 16[esp] + or eax,esi + add ecx,edx + xor eax,ebx + mov edx,-1 + rol edi,10 + lea ecx,DWORD PTR 1859775393[eax*1+ecx] + sub edx,esi + rol ecx,7 + add ecx,ebp + ; 36 + mov eax,DWORD PTR 36[esp] + or edx,ecx + add ebp,eax + xor edx,edi + mov eax,-1 + rol esi,10 + lea ebp,DWORD PTR 1859775393[edx*1+ebp] + sub eax,ecx + rol ebp,14 + add ebp,ebx + ; 37 + mov edx,DWORD PTR 60[esp] + or eax,ebp + add ebx,edx + xor eax,esi + mov edx,-1 + rol ecx,10 + lea ebx,DWORD PTR 1859775393[eax*1+ebx] + sub edx,ebp + rol ebx,9 + add ebx,edi + ; 38 + mov eax,DWORD PTR 32[esp] + or edx,ebx + add edi,eax + xor edx,ecx + mov eax,-1 + rol ebp,10 + lea edi,DWORD PTR 1859775393[edx*1+edi] + sub eax,ebx + rol edi,13 + add edi,esi + ; 39 + mov edx,DWORD PTR 4[esp] + or eax,edi + add esi,edx + xor eax,ebp + mov edx,-1 + rol ebx,10 + lea esi,DWORD PTR 1859775393[eax*1+esi] + sub edx,edi + rol esi,15 + add esi,ecx + ; 40 + mov eax,DWORD PTR 8[esp] + or edx,esi + add ecx,eax + xor edx,ebx + mov eax,-1 + rol edi,10 + lea ecx,DWORD PTR 1859775393[edx*1+ecx] + sub eax,esi + rol ecx,14 + add ecx,ebp + ; 41 + mov edx,DWORD PTR 28[esp] + or eax,ecx + add ebp,edx + xor eax,edi + mov edx,-1 + rol esi,10 + lea ebp,DWORD PTR 1859775393[eax*1+ebp] + sub edx,ecx + rol ebp,8 + add ebp,ebx + ; 42 + mov eax,DWORD PTR [esp] + or edx,ebp + add ebx,eax + xor edx,esi + mov eax,-1 + rol ecx,10 + lea ebx,DWORD PTR 1859775393[edx*1+ebx] + sub eax,ebp + rol ebx,13 + add ebx,edi + ; 43 + mov edx,DWORD PTR 24[esp] + or eax,ebx + add edi,edx + xor eax,ecx + mov edx,-1 + rol ebp,10 + lea edi,DWORD PTR 1859775393[eax*1+edi] + sub edx,ebx + rol edi,6 + add edi,esi + ; 44 + mov eax,DWORD PTR 52[esp] + or edx,edi + add esi,eax + xor edx,ebp + mov eax,-1 + rol ebx,10 + lea esi,DWORD PTR 1859775393[edx*1+esi] + sub eax,edi + rol esi,5 + add esi,ecx + ; 45 + mov edx,DWORD PTR 44[esp] + or eax,esi + add ecx,edx + xor eax,ebx + mov edx,-1 + rol edi,10 + lea ecx,DWORD PTR 1859775393[eax*1+ecx] + sub edx,esi + rol ecx,12 + add ecx,ebp + ; 46 + mov eax,DWORD PTR 20[esp] + or edx,ecx + add ebp,eax + xor edx,edi + mov eax,-1 + rol esi,10 + lea ebp,DWORD PTR 1859775393[edx*1+ebp] + sub eax,ecx + rol ebp,7 + add ebp,ebx + ; 47 + mov edx,DWORD PTR 48[esp] + or eax,ebp + add ebx,edx + xor eax,esi + mov edx,-1 + rol ecx,10 + lea ebx,DWORD PTR 1859775393[eax*1+ebx] + mov eax,ecx + rol ebx,5 + add ebx,edi + ; 48 + sub edx,ecx + and eax,ebx + and edx,ebp + or edx,eax + mov eax,DWORD PTR 4[esp] + rol ebp,10 + lea edi,DWORD PTR 2400959708[edx*1+edi] + mov edx,-1 + add edi,eax + mov eax,ebp + rol edi,11 + add edi,esi + ; 49 + sub edx,ebp + and eax,edi + and edx,ebx + or edx,eax + mov eax,DWORD PTR 36[esp] + rol ebx,10 + lea esi,DWORD PTR 2400959708[edx*1+esi] + mov edx,-1 + add esi,eax + mov eax,ebx + rol esi,12 + add esi,ecx + ; 50 + sub edx,ebx + and eax,esi + and edx,edi + or edx,eax + mov eax,DWORD PTR 44[esp] + rol edi,10 + lea ecx,DWORD PTR 2400959708[edx*1+ecx] + mov edx,-1 + add ecx,eax + mov eax,edi + rol ecx,14 + add ecx,ebp + ; 51 + sub edx,edi + and eax,ecx + and edx,esi + or edx,eax + mov eax,DWORD PTR 40[esp] + rol esi,10 + lea ebp,DWORD PTR 2400959708[edx*1+ebp] + mov edx,-1 + add ebp,eax + mov eax,esi + rol ebp,15 + add ebp,ebx + ; 52 + sub edx,esi + and eax,ebp + and edx,ecx + or edx,eax + mov eax,DWORD PTR [esp] + rol ecx,10 + lea ebx,DWORD PTR 2400959708[edx*1+ebx] + mov edx,-1 + add ebx,eax + mov eax,ecx + rol ebx,14 + add ebx,edi + ; 53 + sub edx,ecx + and eax,ebx + and edx,ebp + or edx,eax + mov eax,DWORD PTR 32[esp] + rol ebp,10 + lea edi,DWORD PTR 2400959708[edx*1+edi] + mov edx,-1 + add edi,eax + mov eax,ebp + rol edi,15 + add edi,esi + ; 54 + sub edx,ebp + and eax,edi + and edx,ebx + or edx,eax + mov eax,DWORD PTR 48[esp] + rol ebx,10 + lea esi,DWORD PTR 2400959708[edx*1+esi] + mov edx,-1 + add esi,eax + mov eax,ebx + rol esi,9 + add esi,ecx + ; 55 + sub edx,ebx + and eax,esi + and edx,edi + or edx,eax + mov eax,DWORD PTR 16[esp] + rol edi,10 + lea ecx,DWORD PTR 2400959708[edx*1+ecx] + mov edx,-1 + add ecx,eax + mov eax,edi + rol ecx,8 + add ecx,ebp + ; 56 + sub edx,edi + and eax,ecx + and edx,esi + or edx,eax + mov eax,DWORD PTR 52[esp] + rol esi,10 + lea ebp,DWORD PTR 2400959708[edx*1+ebp] + mov edx,-1 + add ebp,eax + mov eax,esi + rol ebp,9 + add ebp,ebx + ; 57 + sub edx,esi + and eax,ebp + and edx,ecx + or edx,eax + mov eax,DWORD PTR 12[esp] + rol ecx,10 + lea ebx,DWORD PTR 2400959708[edx*1+ebx] + mov edx,-1 + add ebx,eax + mov eax,ecx + rol ebx,14 + add ebx,edi + ; 58 + sub edx,ecx + and eax,ebx + and edx,ebp + or edx,eax + mov eax,DWORD PTR 28[esp] + rol ebp,10 + lea edi,DWORD PTR 2400959708[edx*1+edi] + mov edx,-1 + add edi,eax + mov eax,ebp + rol edi,5 + add edi,esi + ; 59 + sub edx,ebp + and eax,edi + and edx,ebx + or edx,eax + mov eax,DWORD PTR 60[esp] + rol ebx,10 + lea esi,DWORD PTR 2400959708[edx*1+esi] + mov edx,-1 + add esi,eax + mov eax,ebx + rol esi,6 + add esi,ecx + ; 60 + sub edx,ebx + and eax,esi + and edx,edi + or edx,eax + mov eax,DWORD PTR 56[esp] + rol edi,10 + lea ecx,DWORD PTR 2400959708[edx*1+ecx] + mov edx,-1 + add ecx,eax + mov eax,edi + rol ecx,8 + add ecx,ebp + ; 61 + sub edx,edi + and eax,ecx + and edx,esi + or edx,eax + mov eax,DWORD PTR 20[esp] + rol esi,10 + lea ebp,DWORD PTR 2400959708[edx*1+ebp] + mov edx,-1 + add ebp,eax + mov eax,esi + rol ebp,6 + add ebp,ebx + ; 62 + sub edx,esi + and eax,ebp + and edx,ecx + or edx,eax + mov eax,DWORD PTR 24[esp] + rol ecx,10 + lea ebx,DWORD PTR 2400959708[edx*1+ebx] + mov edx,-1 + add ebx,eax + mov eax,ecx + rol ebx,5 + add ebx,edi + ; 63 + sub edx,ecx + and eax,ebx + and edx,ebp + or edx,eax + mov eax,DWORD PTR 8[esp] + rol ebp,10 + lea edi,DWORD PTR 2400959708[edx*1+edi] + mov edx,-1 + add edi,eax + sub edx,ebp + rol edi,12 + add edi,esi + ; 64 + mov eax,DWORD PTR 16[esp] + or edx,ebx + add esi,eax + xor edx,edi + mov eax,-1 + rol ebx,10 + lea esi,DWORD PTR 2840853838[edx*1+esi] + sub eax,ebx + rol esi,9 + add esi,ecx + ; 65 + mov edx,DWORD PTR [esp] + or eax,edi + add ecx,edx + xor eax,esi + mov edx,-1 + rol edi,10 + lea ecx,DWORD PTR 2840853838[eax*1+ecx] + sub edx,edi + rol ecx,15 + add ecx,ebp + ; 66 + mov eax,DWORD PTR 20[esp] + or edx,esi + add ebp,eax + xor edx,ecx + mov eax,-1 + rol esi,10 + lea ebp,DWORD PTR 2840853838[edx*1+ebp] + sub eax,esi + rol ebp,5 + add ebp,ebx + ; 67 + mov edx,DWORD PTR 36[esp] + or eax,ecx + add ebx,edx + xor eax,ebp + mov edx,-1 + rol ecx,10 + lea ebx,DWORD PTR 2840853838[eax*1+ebx] + sub edx,ecx + rol ebx,11 + add ebx,edi + ; 68 + mov eax,DWORD PTR 28[esp] + or edx,ebp + add edi,eax + xor edx,ebx + mov eax,-1 + rol ebp,10 + lea edi,DWORD PTR 2840853838[edx*1+edi] + sub eax,ebp + rol edi,6 + add edi,esi + ; 69 + mov edx,DWORD PTR 48[esp] + or eax,ebx + add esi,edx + xor eax,edi + mov edx,-1 + rol ebx,10 + lea esi,DWORD PTR 2840853838[eax*1+esi] + sub edx,ebx + rol esi,8 + add esi,ecx + ; 70 + mov eax,DWORD PTR 8[esp] + or edx,edi + add ecx,eax + xor edx,esi + mov eax,-1 + rol edi,10 + lea ecx,DWORD PTR 2840853838[edx*1+ecx] + sub eax,edi + rol ecx,13 + add ecx,ebp + ; 71 + mov edx,DWORD PTR 40[esp] + or eax,esi + add ebp,edx + xor eax,ecx + mov edx,-1 + rol esi,10 + lea ebp,DWORD PTR 2840853838[eax*1+ebp] + sub edx,esi + rol ebp,12 + add ebp,ebx + ; 72 + mov eax,DWORD PTR 56[esp] + or edx,ecx + add ebx,eax + xor edx,ebp + mov eax,-1 + rol ecx,10 + lea ebx,DWORD PTR 2840853838[edx*1+ebx] + sub eax,ecx + rol ebx,5 + add ebx,edi + ; 73 + mov edx,DWORD PTR 4[esp] + or eax,ebp + add edi,edx + xor eax,ebx + mov edx,-1 + rol ebp,10 + lea edi,DWORD PTR 2840853838[eax*1+edi] + sub edx,ebp + rol edi,12 + add edi,esi + ; 74 + mov eax,DWORD PTR 12[esp] + or edx,ebx + add esi,eax + xor edx,edi + mov eax,-1 + rol ebx,10 + lea esi,DWORD PTR 2840853838[edx*1+esi] + sub eax,ebx + rol esi,13 + add esi,ecx + ; 75 + mov edx,DWORD PTR 32[esp] + or eax,edi + add ecx,edx + xor eax,esi + mov edx,-1 + rol edi,10 + lea ecx,DWORD PTR 2840853838[eax*1+ecx] + sub edx,edi + rol ecx,14 + add ecx,ebp + ; 76 + mov eax,DWORD PTR 44[esp] + or edx,esi + add ebp,eax + xor edx,ecx + mov eax,-1 + rol esi,10 + lea ebp,DWORD PTR 2840853838[edx*1+ebp] + sub eax,esi + rol ebp,11 + add ebp,ebx + ; 77 + mov edx,DWORD PTR 24[esp] + or eax,ecx + add ebx,edx + xor eax,ebp + mov edx,-1 + rol ecx,10 + lea ebx,DWORD PTR 2840853838[eax*1+ebx] + sub edx,ecx + rol ebx,8 + add ebx,edi + ; 78 + mov eax,DWORD PTR 60[esp] + or edx,ebp + add edi,eax + xor edx,ebx + mov eax,-1 + rol ebp,10 + lea edi,DWORD PTR 2840853838[edx*1+edi] + sub eax,ebp + rol edi,5 + add edi,esi + ; 79 + mov edx,DWORD PTR 52[esp] + or eax,ebx + add esi,edx + xor eax,edi + mov edx,DWORD PTR 128[esp] + rol ebx,10 + lea esi,DWORD PTR 2840853838[eax*1+esi] + mov DWORD PTR 64[esp],ecx + rol esi,6 + add esi,ecx + mov ecx,DWORD PTR [edx] + mov DWORD PTR 68[esp],esi + mov DWORD PTR 72[esp],edi + mov esi,DWORD PTR 4[edx] + mov DWORD PTR 76[esp],ebx + mov edi,DWORD PTR 8[edx] + mov DWORD PTR 80[esp],ebp + mov ebx,DWORD PTR 12[edx] + mov ebp,DWORD PTR 16[edx] + ; 80 + mov edx,-1 + sub edx,ebx + mov eax,DWORD PTR 20[esp] + or edx,edi + add ecx,eax + xor edx,esi + mov eax,-1 + rol edi,10 + lea ecx,DWORD PTR 1352829926[edx*1+ecx] + sub eax,edi + rol ecx,8 + add ecx,ebp + ; 81 + mov edx,DWORD PTR 56[esp] + or eax,esi + add ebp,edx + xor eax,ecx + mov edx,-1 + rol esi,10 + lea ebp,DWORD PTR 1352829926[eax*1+ebp] + sub edx,esi + rol ebp,9 + add ebp,ebx + ; 82 + mov eax,DWORD PTR 28[esp] + or edx,ecx + add ebx,eax + xor edx,ebp + mov eax,-1 + rol ecx,10 + lea ebx,DWORD PTR 1352829926[edx*1+ebx] + sub eax,ecx + rol ebx,9 + add ebx,edi + ; 83 + mov edx,DWORD PTR [esp] + or eax,ebp + add edi,edx + xor eax,ebx + mov edx,-1 + rol ebp,10 + lea edi,DWORD PTR 1352829926[eax*1+edi] + sub edx,ebp + rol edi,11 + add edi,esi + ; 84 + mov eax,DWORD PTR 36[esp] + or edx,ebx + add esi,eax + xor edx,edi + mov eax,-1 + rol ebx,10 + lea esi,DWORD PTR 1352829926[edx*1+esi] + sub eax,ebx + rol esi,13 + add esi,ecx + ; 85 + mov edx,DWORD PTR 8[esp] + or eax,edi + add ecx,edx + xor eax,esi + mov edx,-1 + rol edi,10 + lea ecx,DWORD PTR 1352829926[eax*1+ecx] + sub edx,edi + rol ecx,15 + add ecx,ebp + ; 86 + mov eax,DWORD PTR 44[esp] + or edx,esi + add ebp,eax + xor edx,ecx + mov eax,-1 + rol esi,10 + lea ebp,DWORD PTR 1352829926[edx*1+ebp] + sub eax,esi + rol ebp,15 + add ebp,ebx + ; 87 + mov edx,DWORD PTR 16[esp] + or eax,ecx + add ebx,edx + xor eax,ebp + mov edx,-1 + rol ecx,10 + lea ebx,DWORD PTR 1352829926[eax*1+ebx] + sub edx,ecx + rol ebx,5 + add ebx,edi + ; 88 + mov eax,DWORD PTR 52[esp] + or edx,ebp + add edi,eax + xor edx,ebx + mov eax,-1 + rol ebp,10 + lea edi,DWORD PTR 1352829926[edx*1+edi] + sub eax,ebp + rol edi,7 + add edi,esi + ; 89 + mov edx,DWORD PTR 24[esp] + or eax,ebx + add esi,edx + xor eax,edi + mov edx,-1 + rol ebx,10 + lea esi,DWORD PTR 1352829926[eax*1+esi] + sub edx,ebx + rol esi,7 + add esi,ecx + ; 90 + mov eax,DWORD PTR 60[esp] + or edx,edi + add ecx,eax + xor edx,esi + mov eax,-1 + rol edi,10 + lea ecx,DWORD PTR 1352829926[edx*1+ecx] + sub eax,edi + rol ecx,8 + add ecx,ebp + ; 91 + mov edx,DWORD PTR 32[esp] + or eax,esi + add ebp,edx + xor eax,ecx + mov edx,-1 + rol esi,10 + lea ebp,DWORD PTR 1352829926[eax*1+ebp] + sub edx,esi + rol ebp,11 + add ebp,ebx + ; 92 + mov eax,DWORD PTR 4[esp] + or edx,ecx + add ebx,eax + xor edx,ebp + mov eax,-1 + rol ecx,10 + lea ebx,DWORD PTR 1352829926[edx*1+ebx] + sub eax,ecx + rol ebx,14 + add ebx,edi + ; 93 + mov edx,DWORD PTR 40[esp] + or eax,ebp + add edi,edx + xor eax,ebx + mov edx,-1 + rol ebp,10 + lea edi,DWORD PTR 1352829926[eax*1+edi] + sub edx,ebp + rol edi,14 + add edi,esi + ; 94 + mov eax,DWORD PTR 12[esp] + or edx,ebx + add esi,eax + xor edx,edi + mov eax,-1 + rol ebx,10 + lea esi,DWORD PTR 1352829926[edx*1+esi] + sub eax,ebx + rol esi,12 + add esi,ecx + ; 95 + mov edx,DWORD PTR 48[esp] + or eax,edi + add ecx,edx + xor eax,esi + mov edx,-1 + rol edi,10 + lea ecx,DWORD PTR 1352829926[eax*1+ecx] + mov eax,edi + rol ecx,6 + add ecx,ebp + ; 96 + sub edx,edi + and eax,ecx + and edx,esi + or edx,eax + mov eax,DWORD PTR 24[esp] + rol esi,10 + lea ebp,DWORD PTR 1548603684[edx*1+ebp] + mov edx,-1 + add ebp,eax + mov eax,esi + rol ebp,9 + add ebp,ebx + ; 97 + sub edx,esi + and eax,ebp + and edx,ecx + or edx,eax + mov eax,DWORD PTR 44[esp] + rol ecx,10 + lea ebx,DWORD PTR 1548603684[edx*1+ebx] + mov edx,-1 + add ebx,eax + mov eax,ecx + rol ebx,13 + add ebx,edi + ; 98 + sub edx,ecx + and eax,ebx + and edx,ebp + or edx,eax + mov eax,DWORD PTR 12[esp] + rol ebp,10 + lea edi,DWORD PTR 1548603684[edx*1+edi] + mov edx,-1 + add edi,eax + mov eax,ebp + rol edi,15 + add edi,esi + ; 99 + sub edx,ebp + and eax,edi + and edx,ebx + or edx,eax + mov eax,DWORD PTR 28[esp] + rol ebx,10 + lea esi,DWORD PTR 1548603684[edx*1+esi] + mov edx,-1 + add esi,eax + mov eax,ebx + rol esi,7 + add esi,ecx + ; 100 + sub edx,ebx + and eax,esi + and edx,edi + or edx,eax + mov eax,DWORD PTR [esp] + rol edi,10 + lea ecx,DWORD PTR 1548603684[edx*1+ecx] + mov edx,-1 + add ecx,eax + mov eax,edi + rol ecx,12 + add ecx,ebp + ; 101 + sub edx,edi + and eax,ecx + and edx,esi + or edx,eax + mov eax,DWORD PTR 52[esp] + rol esi,10 + lea ebp,DWORD PTR 1548603684[edx*1+ebp] + mov edx,-1 + add ebp,eax + mov eax,esi + rol ebp,8 + add ebp,ebx + ; 102 + sub edx,esi + and eax,ebp + and edx,ecx + or edx,eax + mov eax,DWORD PTR 20[esp] + rol ecx,10 + lea ebx,DWORD PTR 1548603684[edx*1+ebx] + mov edx,-1 + add ebx,eax + mov eax,ecx + rol ebx,9 + add ebx,edi + ; 103 + sub edx,ecx + and eax,ebx + and edx,ebp + or edx,eax + mov eax,DWORD PTR 40[esp] + rol ebp,10 + lea edi,DWORD PTR 1548603684[edx*1+edi] + mov edx,-1 + add edi,eax + mov eax,ebp + rol edi,11 + add edi,esi + ; 104 + sub edx,ebp + and eax,edi + and edx,ebx + or edx,eax + mov eax,DWORD PTR 56[esp] + rol ebx,10 + lea esi,DWORD PTR 1548603684[edx*1+esi] + mov edx,-1 + add esi,eax + mov eax,ebx + rol esi,7 + add esi,ecx + ; 105 + sub edx,ebx + and eax,esi + and edx,edi + or edx,eax + mov eax,DWORD PTR 60[esp] + rol edi,10 + lea ecx,DWORD PTR 1548603684[edx*1+ecx] + mov edx,-1 + add ecx,eax + mov eax,edi + rol ecx,7 + add ecx,ebp + ; 106 + sub edx,edi + and eax,ecx + and edx,esi + or edx,eax + mov eax,DWORD PTR 32[esp] + rol esi,10 + lea ebp,DWORD PTR 1548603684[edx*1+ebp] + mov edx,-1 + add ebp,eax + mov eax,esi + rol ebp,12 + add ebp,ebx + ; 107 + sub edx,esi + and eax,ebp + and edx,ecx + or edx,eax + mov eax,DWORD PTR 48[esp] + rol ecx,10 + lea ebx,DWORD PTR 1548603684[edx*1+ebx] + mov edx,-1 + add ebx,eax + mov eax,ecx + rol ebx,7 + add ebx,edi + ; 108 + sub edx,ecx + and eax,ebx + and edx,ebp + or edx,eax + mov eax,DWORD PTR 16[esp] + rol ebp,10 + lea edi,DWORD PTR 1548603684[edx*1+edi] + mov edx,-1 + add edi,eax + mov eax,ebp + rol edi,6 + add edi,esi + ; 109 + sub edx,ebp + and eax,edi + and edx,ebx + or edx,eax + mov eax,DWORD PTR 36[esp] + rol ebx,10 + lea esi,DWORD PTR 1548603684[edx*1+esi] + mov edx,-1 + add esi,eax + mov eax,ebx + rol esi,15 + add esi,ecx + ; 110 + sub edx,ebx + and eax,esi + and edx,edi + or edx,eax + mov eax,DWORD PTR 4[esp] + rol edi,10 + lea ecx,DWORD PTR 1548603684[edx*1+ecx] + mov edx,-1 + add ecx,eax + mov eax,edi + rol ecx,13 + add ecx,ebp + ; 111 + sub edx,edi + and eax,ecx + and edx,esi + or edx,eax + mov eax,DWORD PTR 8[esp] + rol esi,10 + lea ebp,DWORD PTR 1548603684[edx*1+ebp] + mov edx,-1 + add ebp,eax + sub edx,ecx + rol ebp,11 + add ebp,ebx + ; 112 + mov eax,DWORD PTR 60[esp] + or edx,ebp + add ebx,eax + xor edx,esi + mov eax,-1 + rol ecx,10 + lea ebx,DWORD PTR 1836072691[edx*1+ebx] + sub eax,ebp + rol ebx,9 + add ebx,edi + ; 113 + mov edx,DWORD PTR 20[esp] + or eax,ebx + add edi,edx + xor eax,ecx + mov edx,-1 + rol ebp,10 + lea edi,DWORD PTR 1836072691[eax*1+edi] + sub edx,ebx + rol edi,7 + add edi,esi + ; 114 + mov eax,DWORD PTR 4[esp] + or edx,edi + add esi,eax + xor edx,ebp + mov eax,-1 + rol ebx,10 + lea esi,DWORD PTR 1836072691[edx*1+esi] + sub eax,edi + rol esi,15 + add esi,ecx + ; 115 + mov edx,DWORD PTR 12[esp] + or eax,esi + add ecx,edx + xor eax,ebx + mov edx,-1 + rol edi,10 + lea ecx,DWORD PTR 1836072691[eax*1+ecx] + sub edx,esi + rol ecx,11 + add ecx,ebp + ; 116 + mov eax,DWORD PTR 28[esp] + or edx,ecx + add ebp,eax + xor edx,edi + mov eax,-1 + rol esi,10 + lea ebp,DWORD PTR 1836072691[edx*1+ebp] + sub eax,ecx + rol ebp,8 + add ebp,ebx + ; 117 + mov edx,DWORD PTR 56[esp] + or eax,ebp + add ebx,edx + xor eax,esi + mov edx,-1 + rol ecx,10 + lea ebx,DWORD PTR 1836072691[eax*1+ebx] + sub edx,ebp + rol ebx,6 + add ebx,edi + ; 118 + mov eax,DWORD PTR 24[esp] + or edx,ebx + add edi,eax + xor edx,ecx + mov eax,-1 + rol ebp,10 + lea edi,DWORD PTR 1836072691[edx*1+edi] + sub eax,ebx + rol edi,6 + add edi,esi + ; 119 + mov edx,DWORD PTR 36[esp] + or eax,edi + add esi,edx + xor eax,ebp + mov edx,-1 + rol ebx,10 + lea esi,DWORD PTR 1836072691[eax*1+esi] + sub edx,edi + rol esi,14 + add esi,ecx + ; 120 + mov eax,DWORD PTR 44[esp] + or edx,esi + add ecx,eax + xor edx,ebx + mov eax,-1 + rol edi,10 + lea ecx,DWORD PTR 1836072691[edx*1+ecx] + sub eax,esi + rol ecx,12 + add ecx,ebp + ; 121 + mov edx,DWORD PTR 32[esp] + or eax,ecx + add ebp,edx + xor eax,edi + mov edx,-1 + rol esi,10 + lea ebp,DWORD PTR 1836072691[eax*1+ebp] + sub edx,ecx + rol ebp,13 + add ebp,ebx + ; 122 + mov eax,DWORD PTR 48[esp] + or edx,ebp + add ebx,eax + xor edx,esi + mov eax,-1 + rol ecx,10 + lea ebx,DWORD PTR 1836072691[edx*1+ebx] + sub eax,ebp + rol ebx,5 + add ebx,edi + ; 123 + mov edx,DWORD PTR 8[esp] + or eax,ebx + add edi,edx + xor eax,ecx + mov edx,-1 + rol ebp,10 + lea edi,DWORD PTR 1836072691[eax*1+edi] + sub edx,ebx + rol edi,14 + add edi,esi + ; 124 + mov eax,DWORD PTR 40[esp] + or edx,edi + add esi,eax + xor edx,ebp + mov eax,-1 + rol ebx,10 + lea esi,DWORD PTR 1836072691[edx*1+esi] + sub eax,edi + rol esi,13 + add esi,ecx + ; 125 + mov edx,DWORD PTR [esp] + or eax,esi + add ecx,edx + xor eax,ebx + mov edx,-1 + rol edi,10 + lea ecx,DWORD PTR 1836072691[eax*1+ecx] + sub edx,esi + rol ecx,13 + add ecx,ebp + ; 126 + mov eax,DWORD PTR 16[esp] + or edx,ecx + add ebp,eax + xor edx,edi + mov eax,-1 + rol esi,10 + lea ebp,DWORD PTR 1836072691[edx*1+ebp] + sub eax,ecx + rol ebp,7 + add ebp,ebx + ; 127 + mov edx,DWORD PTR 52[esp] + or eax,ebp + add ebx,edx + xor eax,esi + mov edx,DWORD PTR 32[esp] + rol ecx,10 + lea ebx,DWORD PTR 1836072691[eax*1+ebx] + mov eax,-1 + rol ebx,5 + add ebx,edi + ; 128 + add edi,edx + mov edx,ebp + sub eax,ebx + and edx,ebx + and eax,ecx + or edx,eax + mov eax,DWORD PTR 24[esp] + rol ebp,10 + lea edi,DWORD PTR 2053994217[edx*1+edi] + mov edx,-1 + rol edi,15 + add edi,esi + ; 129 + add esi,eax + mov eax,ebx + sub edx,edi + and eax,edi + and edx,ebp + or eax,edx + mov edx,DWORD PTR 16[esp] + rol ebx,10 + lea esi,DWORD PTR 2053994217[eax*1+esi] + mov eax,-1 + rol esi,5 + add esi,ecx + ; 130 + add ecx,edx + mov edx,edi + sub eax,esi + and edx,esi + and eax,ebx + or edx,eax + mov eax,DWORD PTR 4[esp] + rol edi,10 + lea ecx,DWORD PTR 2053994217[edx*1+ecx] + mov edx,-1 + rol ecx,8 + add ecx,ebp + ; 131 + add ebp,eax + mov eax,esi + sub edx,ecx + and eax,ecx + and edx,edi + or eax,edx + mov edx,DWORD PTR 12[esp] + rol esi,10 + lea ebp,DWORD PTR 2053994217[eax*1+ebp] + mov eax,-1 + rol ebp,11 + add ebp,ebx + ; 132 + add ebx,edx + mov edx,ecx + sub eax,ebp + and edx,ebp + and eax,esi + or edx,eax + mov eax,DWORD PTR 44[esp] + rol ecx,10 + lea ebx,DWORD PTR 2053994217[edx*1+ebx] + mov edx,-1 + rol ebx,14 + add ebx,edi + ; 133 + add edi,eax + mov eax,ebp + sub edx,ebx + and eax,ebx + and edx,ecx + or eax,edx + mov edx,DWORD PTR 60[esp] + rol ebp,10 + lea edi,DWORD PTR 2053994217[eax*1+edi] + mov eax,-1 + rol edi,14 + add edi,esi + ; 134 + add esi,edx + mov edx,ebx + sub eax,edi + and edx,edi + and eax,ebp + or edx,eax + mov eax,DWORD PTR [esp] + rol ebx,10 + lea esi,DWORD PTR 2053994217[edx*1+esi] + mov edx,-1 + rol esi,6 + add esi,ecx + ; 135 + add ecx,eax + mov eax,edi + sub edx,esi + and eax,esi + and edx,ebx + or eax,edx + mov edx,DWORD PTR 20[esp] + rol edi,10 + lea ecx,DWORD PTR 2053994217[eax*1+ecx] + mov eax,-1 + rol ecx,14 + add ecx,ebp + ; 136 + add ebp,edx + mov edx,esi + sub eax,ecx + and edx,ecx + and eax,edi + or edx,eax + mov eax,DWORD PTR 48[esp] + rol esi,10 + lea ebp,DWORD PTR 2053994217[edx*1+ebp] + mov edx,-1 + rol ebp,6 + add ebp,ebx + ; 137 + add ebx,eax + mov eax,ecx + sub edx,ebp + and eax,ebp + and edx,esi + or eax,edx + mov edx,DWORD PTR 8[esp] + rol ecx,10 + lea ebx,DWORD PTR 2053994217[eax*1+ebx] + mov eax,-1 + rol ebx,9 + add ebx,edi + ; 138 + add edi,edx + mov edx,ebp + sub eax,ebx + and edx,ebx + and eax,ecx + or edx,eax + mov eax,DWORD PTR 52[esp] + rol ebp,10 + lea edi,DWORD PTR 2053994217[edx*1+edi] + mov edx,-1 + rol edi,12 + add edi,esi + ; 139 + add esi,eax + mov eax,ebx + sub edx,edi + and eax,edi + and edx,ebp + or eax,edx + mov edx,DWORD PTR 36[esp] + rol ebx,10 + lea esi,DWORD PTR 2053994217[eax*1+esi] + mov eax,-1 + rol esi,9 + add esi,ecx + ; 140 + add ecx,edx + mov edx,edi + sub eax,esi + and edx,esi + and eax,ebx + or edx,eax + mov eax,DWORD PTR 28[esp] + rol edi,10 + lea ecx,DWORD PTR 2053994217[edx*1+ecx] + mov edx,-1 + rol ecx,12 + add ecx,ebp + ; 141 + add ebp,eax + mov eax,esi + sub edx,ecx + and eax,ecx + and edx,edi + or eax,edx + mov edx,DWORD PTR 40[esp] + rol esi,10 + lea ebp,DWORD PTR 2053994217[eax*1+ebp] + mov eax,-1 + rol ebp,5 + add ebp,ebx + ; 142 + add ebx,edx + mov edx,ecx + sub eax,ebp + and edx,ebp + and eax,esi + or edx,eax + mov eax,DWORD PTR 56[esp] + rol ecx,10 + lea ebx,DWORD PTR 2053994217[edx*1+ebx] + mov edx,-1 + rol ebx,15 + add ebx,edi + ; 143 + add edi,eax + mov eax,ebp + sub edx,ebx + and eax,ebx + and edx,ecx + or edx,eax + mov eax,ebx + rol ebp,10 + lea edi,DWORD PTR 2053994217[edx*1+edi] + xor eax,ebp + rol edi,8 + add edi,esi + ; 144 + mov edx,DWORD PTR 48[esp] + xor eax,edi + add esi,edx + rol ebx,10 + add esi,eax + mov eax,edi + rol esi,8 + add esi,ecx + ; 145 + xor eax,ebx + mov edx,DWORD PTR 60[esp] + xor eax,esi + add ecx,eax + mov eax,esi + rol edi,10 + add ecx,edx + xor eax,edi + rol ecx,5 + add ecx,ebp + ; 146 + mov edx,DWORD PTR 40[esp] + xor eax,ecx + add ebp,edx + rol esi,10 + add ebp,eax + mov eax,ecx + rol ebp,12 + add ebp,ebx + ; 147 + xor eax,esi + mov edx,DWORD PTR 16[esp] + xor eax,ebp + add ebx,eax + mov eax,ebp + rol ecx,10 + add ebx,edx + xor eax,ecx + rol ebx,9 + add ebx,edi + ; 148 + mov edx,DWORD PTR 4[esp] + xor eax,ebx + add edi,edx + rol ebp,10 + add edi,eax + mov eax,ebx + rol edi,12 + add edi,esi + ; 149 + xor eax,ebp + mov edx,DWORD PTR 20[esp] + xor eax,edi + add esi,eax + mov eax,edi + rol ebx,10 + add esi,edx + xor eax,ebx + rol esi,5 + add esi,ecx + ; 150 + mov edx,DWORD PTR 32[esp] + xor eax,esi + add ecx,edx + rol edi,10 + add ecx,eax + mov eax,esi + rol ecx,14 + add ecx,ebp + ; 151 + xor eax,edi + mov edx,DWORD PTR 28[esp] + xor eax,ecx + add ebp,eax + mov eax,ecx + rol esi,10 + add ebp,edx + xor eax,esi + rol ebp,6 + add ebp,ebx + ; 152 + mov edx,DWORD PTR 24[esp] + xor eax,ebp + add ebx,edx + rol ecx,10 + add ebx,eax + mov eax,ebp + rol ebx,8 + add ebx,edi + ; 153 + xor eax,ecx + mov edx,DWORD PTR 8[esp] + xor eax,ebx + add edi,eax + mov eax,ebx + rol ebp,10 + add edi,edx + xor eax,ebp + rol edi,13 + add edi,esi + ; 154 + mov edx,DWORD PTR 52[esp] + xor eax,edi + add esi,edx + rol ebx,10 + add esi,eax + mov eax,edi + rol esi,6 + add esi,ecx + ; 155 + xor eax,ebx + mov edx,DWORD PTR 56[esp] + xor eax,esi + add ecx,eax + mov eax,esi + rol edi,10 + add ecx,edx + xor eax,edi + rol ecx,5 + add ecx,ebp + ; 156 + mov edx,DWORD PTR [esp] + xor eax,ecx + add ebp,edx + rol esi,10 + add ebp,eax + mov eax,ecx + rol ebp,15 + add ebp,ebx + ; 157 + xor eax,esi + mov edx,DWORD PTR 12[esp] + xor eax,ebp + add ebx,eax + mov eax,ebp + rol ecx,10 + add ebx,edx + xor eax,ecx + rol ebx,13 + add ebx,edi + ; 158 + mov edx,DWORD PTR 36[esp] + xor eax,ebx + add edi,edx + rol ebp,10 + add edi,eax + mov eax,ebx + rol edi,11 + add edi,esi + ; 159 + xor eax,ebp + mov edx,DWORD PTR 44[esp] + xor eax,edi + add esi,eax + rol ebx,10 + add esi,edx + mov edx,DWORD PTR 128[esp] + rol esi,11 + add esi,ecx + mov eax,DWORD PTR 4[edx] + add ebx,eax + mov eax,DWORD PTR 72[esp] + add ebx,eax + mov eax,DWORD PTR 8[edx] + add ebp,eax + mov eax,DWORD PTR 76[esp] + add ebp,eax + mov eax,DWORD PTR 12[edx] + add ecx,eax + mov eax,DWORD PTR 80[esp] + add ecx,eax + mov eax,DWORD PTR 16[edx] + add esi,eax + mov eax,DWORD PTR 64[esp] + add esi,eax + mov eax,DWORD PTR [edx] + add edi,eax + mov eax,DWORD PTR 68[esp] + add edi,eax + mov eax,DWORD PTR 136[esp] + mov DWORD PTR [edx],ebx + mov DWORD PTR 4[edx],ebp + mov DWORD PTR 8[edx],ecx + sub eax,1 + mov DWORD PTR 12[edx],esi + mov DWORD PTR 16[edx],edi + jle $L001get_out + mov DWORD PTR 136[esp],eax + mov edi,ecx + mov eax,DWORD PTR 132[esp] + mov ecx,ebx + add eax,64 + mov esi,ebp + mov DWORD PTR 132[esp],eax + jmp $L000start +$L001get_out: + add esp,108 + pop ebx + pop ebp + pop edi + pop esi + ret +_ripemd160_block_asm_data_order ENDP +.text$ ENDS +END diff --git a/deps/openssl/asm_obsolete/x86-win32-masm/sha/sha1-586.asm b/deps/openssl/asm_obsolete/x86-win32-masm/sha/sha1-586.asm new file mode 100644 index 00000000000000..38aaf17445b4b8 --- /dev/null +++ b/deps/openssl/asm_obsolete/x86-win32-masm/sha/sha1-586.asm @@ -0,0 +1,2813 @@ +TITLE sha1-586.asm +IF @Version LT 800 +ECHO MASM version 8.00 or later is strongly recommended. +ENDIF +.686 +.XMM +IF @Version LT 800 +XMMWORD STRUCT 16 +DQ 2 dup (?) +XMMWORD ENDS +ENDIF + +.MODEL FLAT +OPTION DOTNAME +IF @Version LT 800 +.text$ SEGMENT PAGE 'CODE' +ELSE +.text$ SEGMENT ALIGN(64) 'CODE' +ENDIF +;EXTERN _OPENSSL_ia32cap_P:NEAR +ALIGN 16 +_sha1_block_data_order PROC PUBLIC +$L_sha1_block_data_order_begin:: + push ebp + push ebx + push esi + push edi + call $L000pic_point +$L000pic_point: + pop ebp + lea esi,DWORD PTR _OPENSSL_ia32cap_P + lea ebp,DWORD PTR ($LK_XX_XX-$L000pic_point)[ebp] + mov eax,DWORD PTR [esi] + mov edx,DWORD PTR 4[esi] + test edx,512 + jz $L001x86 + mov ecx,DWORD PTR 8[esi] + test eax,16777216 + jz $L001x86 + test ecx,536870912 + jnz $Lshaext_shortcut + jmp $Lssse3_shortcut +ALIGN 16 +$L001x86: + mov ebp,DWORD PTR 20[esp] + mov esi,DWORD PTR 24[esp] + mov eax,DWORD PTR 28[esp] + sub esp,76 + shl eax,6 + add eax,esi + mov DWORD PTR 104[esp],eax + mov edi,DWORD PTR 16[ebp] + jmp $L002loop +ALIGN 16 +$L002loop: + mov eax,DWORD PTR [esi] + mov ebx,DWORD PTR 4[esi] + mov ecx,DWORD PTR 8[esi] + mov edx,DWORD PTR 12[esi] + bswap eax + bswap ebx + bswap ecx + bswap edx + mov DWORD PTR [esp],eax + mov DWORD PTR 4[esp],ebx + mov DWORD PTR 8[esp],ecx + mov DWORD PTR 12[esp],edx + mov eax,DWORD PTR 16[esi] + mov ebx,DWORD PTR 20[esi] + mov ecx,DWORD PTR 24[esi] + mov edx,DWORD PTR 28[esi] + bswap eax + bswap ebx + bswap ecx + bswap edx + mov DWORD PTR 16[esp],eax + mov DWORD PTR 20[esp],ebx + mov DWORD PTR 24[esp],ecx + mov DWORD PTR 28[esp],edx + mov eax,DWORD PTR 32[esi] + mov ebx,DWORD PTR 36[esi] + mov ecx,DWORD PTR 40[esi] + mov edx,DWORD PTR 44[esi] + bswap eax + bswap ebx + bswap ecx + bswap edx + mov DWORD PTR 32[esp],eax + mov DWORD PTR 36[esp],ebx + mov DWORD PTR 40[esp],ecx + mov DWORD PTR 44[esp],edx + mov eax,DWORD PTR 48[esi] + mov ebx,DWORD PTR 52[esi] + mov ecx,DWORD PTR 56[esi] + mov edx,DWORD PTR 60[esi] + bswap eax + bswap ebx + bswap ecx + bswap edx + mov DWORD PTR 48[esp],eax + mov DWORD PTR 52[esp],ebx + mov DWORD PTR 56[esp],ecx + mov DWORD PTR 60[esp],edx + mov DWORD PTR 100[esp],esi + mov eax,DWORD PTR [ebp] + mov ebx,DWORD PTR 4[ebp] + mov ecx,DWORD PTR 8[ebp] + mov edx,DWORD PTR 12[ebp] + ; 00_15 0 + mov esi,ecx + mov ebp,eax + rol ebp,5 + xor esi,edx + add ebp,edi + mov edi,DWORD PTR [esp] + and esi,ebx + ror ebx,2 + xor esi,edx + lea ebp,DWORD PTR 1518500249[edi*1+ebp] + add ebp,esi + ; 00_15 1 + mov edi,ebx + mov esi,ebp + rol ebp,5 + xor edi,ecx + add ebp,edx + mov edx,DWORD PTR 4[esp] + and edi,eax + ror eax,2 + xor edi,ecx + lea ebp,DWORD PTR 1518500249[edx*1+ebp] + add ebp,edi + ; 00_15 2 + mov edx,eax + mov edi,ebp + rol ebp,5 + xor edx,ebx + add ebp,ecx + mov ecx,DWORD PTR 8[esp] + and edx,esi + ror esi,2 + xor edx,ebx + lea ebp,DWORD PTR 1518500249[ecx*1+ebp] + add ebp,edx + ; 00_15 3 + mov ecx,esi + mov edx,ebp + rol ebp,5 + xor ecx,eax + add ebp,ebx + mov ebx,DWORD PTR 12[esp] + and ecx,edi + ror edi,2 + xor ecx,eax + lea ebp,DWORD PTR 1518500249[ebx*1+ebp] + add ebp,ecx + ; 00_15 4 + mov ebx,edi + mov ecx,ebp + rol ebp,5 + xor ebx,esi + add ebp,eax + mov eax,DWORD PTR 16[esp] + and ebx,edx + ror edx,2 + xor ebx,esi + lea ebp,DWORD PTR 1518500249[eax*1+ebp] + add ebp,ebx + ; 00_15 5 + mov eax,edx + mov ebx,ebp + rol ebp,5 + xor eax,edi + add ebp,esi + mov esi,DWORD PTR 20[esp] + and eax,ecx + ror ecx,2 + xor eax,edi + lea ebp,DWORD PTR 1518500249[esi*1+ebp] + add ebp,eax + ; 00_15 6 + mov esi,ecx + mov eax,ebp + rol ebp,5 + xor esi,edx + add ebp,edi + mov edi,DWORD PTR 24[esp] + and esi,ebx + ror ebx,2 + xor esi,edx + lea ebp,DWORD PTR 1518500249[edi*1+ebp] + add ebp,esi + ; 00_15 7 + mov edi,ebx + mov esi,ebp + rol ebp,5 + xor edi,ecx + add ebp,edx + mov edx,DWORD PTR 28[esp] + and edi,eax + ror eax,2 + xor edi,ecx + lea ebp,DWORD PTR 1518500249[edx*1+ebp] + add ebp,edi + ; 00_15 8 + mov edx,eax + mov edi,ebp + rol ebp,5 + xor edx,ebx + add ebp,ecx + mov ecx,DWORD PTR 32[esp] + and edx,esi + ror esi,2 + xor edx,ebx + lea ebp,DWORD PTR 1518500249[ecx*1+ebp] + add ebp,edx + ; 00_15 9 + mov ecx,esi + mov edx,ebp + rol ebp,5 + xor ecx,eax + add ebp,ebx + mov ebx,DWORD PTR 36[esp] + and ecx,edi + ror edi,2 + xor ecx,eax + lea ebp,DWORD PTR 1518500249[ebx*1+ebp] + add ebp,ecx + ; 00_15 10 + mov ebx,edi + mov ecx,ebp + rol ebp,5 + xor ebx,esi + add ebp,eax + mov eax,DWORD PTR 40[esp] + and ebx,edx + ror edx,2 + xor ebx,esi + lea ebp,DWORD PTR 1518500249[eax*1+ebp] + add ebp,ebx + ; 00_15 11 + mov eax,edx + mov ebx,ebp + rol ebp,5 + xor eax,edi + add ebp,esi + mov esi,DWORD PTR 44[esp] + and eax,ecx + ror ecx,2 + xor eax,edi + lea ebp,DWORD PTR 1518500249[esi*1+ebp] + add ebp,eax + ; 00_15 12 + mov esi,ecx + mov eax,ebp + rol ebp,5 + xor esi,edx + add ebp,edi + mov edi,DWORD PTR 48[esp] + and esi,ebx + ror ebx,2 + xor esi,edx + lea ebp,DWORD PTR 1518500249[edi*1+ebp] + add ebp,esi + ; 00_15 13 + mov edi,ebx + mov esi,ebp + rol ebp,5 + xor edi,ecx + add ebp,edx + mov edx,DWORD PTR 52[esp] + and edi,eax + ror eax,2 + xor edi,ecx + lea ebp,DWORD PTR 1518500249[edx*1+ebp] + add ebp,edi + ; 00_15 14 + mov edx,eax + mov edi,ebp + rol ebp,5 + xor edx,ebx + add ebp,ecx + mov ecx,DWORD PTR 56[esp] + and edx,esi + ror esi,2 + xor edx,ebx + lea ebp,DWORD PTR 1518500249[ecx*1+ebp] + add ebp,edx + ; 00_15 15 + mov ecx,esi + mov edx,ebp + rol ebp,5 + xor ecx,eax + add ebp,ebx + mov ebx,DWORD PTR 60[esp] + and ecx,edi + ror edi,2 + xor ecx,eax + lea ebp,DWORD PTR 1518500249[ebx*1+ebp] + mov ebx,DWORD PTR [esp] + add ecx,ebp + ; 16_19 16 + mov ebp,edi + xor ebx,DWORD PTR 8[esp] + xor ebp,esi + xor ebx,DWORD PTR 32[esp] + and ebp,edx + xor ebx,DWORD PTR 52[esp] + rol ebx,1 + xor ebp,esi + add eax,ebp + mov ebp,ecx + ror edx,2 + mov DWORD PTR [esp],ebx + rol ebp,5 + lea ebx,DWORD PTR 1518500249[eax*1+ebx] + mov eax,DWORD PTR 4[esp] + add ebx,ebp + ; 16_19 17 + mov ebp,edx + xor eax,DWORD PTR 12[esp] + xor ebp,edi + xor eax,DWORD PTR 36[esp] + and ebp,ecx + xor eax,DWORD PTR 56[esp] + rol eax,1 + xor ebp,edi + add esi,ebp + mov ebp,ebx + ror ecx,2 + mov DWORD PTR 4[esp],eax + rol ebp,5 + lea eax,DWORD PTR 1518500249[esi*1+eax] + mov esi,DWORD PTR 8[esp] + add eax,ebp + ; 16_19 18 + mov ebp,ecx + xor esi,DWORD PTR 16[esp] + xor ebp,edx + xor esi,DWORD PTR 40[esp] + and ebp,ebx + xor esi,DWORD PTR 60[esp] + rol esi,1 + xor ebp,edx + add edi,ebp + mov ebp,eax + ror ebx,2 + mov DWORD PTR 8[esp],esi + rol ebp,5 + lea esi,DWORD PTR 1518500249[edi*1+esi] + mov edi,DWORD PTR 12[esp] + add esi,ebp + ; 16_19 19 + mov ebp,ebx + xor edi,DWORD PTR 20[esp] + xor ebp,ecx + xor edi,DWORD PTR 44[esp] + and ebp,eax + xor edi,DWORD PTR [esp] + rol edi,1 + xor ebp,ecx + add edx,ebp + mov ebp,esi + ror eax,2 + mov DWORD PTR 12[esp],edi + rol ebp,5 + lea edi,DWORD PTR 1518500249[edx*1+edi] + mov edx,DWORD PTR 16[esp] + add edi,ebp + ; 20_39 20 + mov ebp,esi + xor edx,DWORD PTR 24[esp] + xor ebp,eax + xor edx,DWORD PTR 48[esp] + xor ebp,ebx + xor edx,DWORD PTR 4[esp] + rol edx,1 + add ecx,ebp + ror esi,2 + mov ebp,edi + rol ebp,5 + mov DWORD PTR 16[esp],edx + lea edx,DWORD PTR 1859775393[ecx*1+edx] + mov ecx,DWORD PTR 20[esp] + add edx,ebp + ; 20_39 21 + mov ebp,edi + xor ecx,DWORD PTR 28[esp] + xor ebp,esi + xor ecx,DWORD PTR 52[esp] + xor ebp,eax + xor ecx,DWORD PTR 8[esp] + rol ecx,1 + add ebx,ebp + ror edi,2 + mov ebp,edx + rol ebp,5 + mov DWORD PTR 20[esp],ecx + lea ecx,DWORD PTR 1859775393[ebx*1+ecx] + mov ebx,DWORD PTR 24[esp] + add ecx,ebp + ; 20_39 22 + mov ebp,edx + xor ebx,DWORD PTR 32[esp] + xor ebp,edi + xor ebx,DWORD PTR 56[esp] + xor ebp,esi + xor ebx,DWORD PTR 12[esp] + rol ebx,1 + add eax,ebp + ror edx,2 + mov ebp,ecx + rol ebp,5 + mov DWORD PTR 24[esp],ebx + lea ebx,DWORD PTR 1859775393[eax*1+ebx] + mov eax,DWORD PTR 28[esp] + add ebx,ebp + ; 20_39 23 + mov ebp,ecx + xor eax,DWORD PTR 36[esp] + xor ebp,edx + xor eax,DWORD PTR 60[esp] + xor ebp,edi + xor eax,DWORD PTR 16[esp] + rol eax,1 + add esi,ebp + ror ecx,2 + mov ebp,ebx + rol ebp,5 + mov DWORD PTR 28[esp],eax + lea eax,DWORD PTR 1859775393[esi*1+eax] + mov esi,DWORD PTR 32[esp] + add eax,ebp + ; 20_39 24 + mov ebp,ebx + xor esi,DWORD PTR 40[esp] + xor ebp,ecx + xor esi,DWORD PTR [esp] + xor ebp,edx + xor esi,DWORD PTR 20[esp] + rol esi,1 + add edi,ebp + ror ebx,2 + mov ebp,eax + rol ebp,5 + mov DWORD PTR 32[esp],esi + lea esi,DWORD PTR 1859775393[edi*1+esi] + mov edi,DWORD PTR 36[esp] + add esi,ebp + ; 20_39 25 + mov ebp,eax + xor edi,DWORD PTR 44[esp] + xor ebp,ebx + xor edi,DWORD PTR 4[esp] + xor ebp,ecx + xor edi,DWORD PTR 24[esp] + rol edi,1 + add edx,ebp + ror eax,2 + mov ebp,esi + rol ebp,5 + mov DWORD PTR 36[esp],edi + lea edi,DWORD PTR 1859775393[edx*1+edi] + mov edx,DWORD PTR 40[esp] + add edi,ebp + ; 20_39 26 + mov ebp,esi + xor edx,DWORD PTR 48[esp] + xor ebp,eax + xor edx,DWORD PTR 8[esp] + xor ebp,ebx + xor edx,DWORD PTR 28[esp] + rol edx,1 + add ecx,ebp + ror esi,2 + mov ebp,edi + rol ebp,5 + mov DWORD PTR 40[esp],edx + lea edx,DWORD PTR 1859775393[ecx*1+edx] + mov ecx,DWORD PTR 44[esp] + add edx,ebp + ; 20_39 27 + mov ebp,edi + xor ecx,DWORD PTR 52[esp] + xor ebp,esi + xor ecx,DWORD PTR 12[esp] + xor ebp,eax + xor ecx,DWORD PTR 32[esp] + rol ecx,1 + add ebx,ebp + ror edi,2 + mov ebp,edx + rol ebp,5 + mov DWORD PTR 44[esp],ecx + lea ecx,DWORD PTR 1859775393[ebx*1+ecx] + mov ebx,DWORD PTR 48[esp] + add ecx,ebp + ; 20_39 28 + mov ebp,edx + xor ebx,DWORD PTR 56[esp] + xor ebp,edi + xor ebx,DWORD PTR 16[esp] + xor ebp,esi + xor ebx,DWORD PTR 36[esp] + rol ebx,1 + add eax,ebp + ror edx,2 + mov ebp,ecx + rol ebp,5 + mov DWORD PTR 48[esp],ebx + lea ebx,DWORD PTR 1859775393[eax*1+ebx] + mov eax,DWORD PTR 52[esp] + add ebx,ebp + ; 20_39 29 + mov ebp,ecx + xor eax,DWORD PTR 60[esp] + xor ebp,edx + xor eax,DWORD PTR 20[esp] + xor ebp,edi + xor eax,DWORD PTR 40[esp] + rol eax,1 + add esi,ebp + ror ecx,2 + mov ebp,ebx + rol ebp,5 + mov DWORD PTR 52[esp],eax + lea eax,DWORD PTR 1859775393[esi*1+eax] + mov esi,DWORD PTR 56[esp] + add eax,ebp + ; 20_39 30 + mov ebp,ebx + xor esi,DWORD PTR [esp] + xor ebp,ecx + xor esi,DWORD PTR 24[esp] + xor ebp,edx + xor esi,DWORD PTR 44[esp] + rol esi,1 + add edi,ebp + ror ebx,2 + mov ebp,eax + rol ebp,5 + mov DWORD PTR 56[esp],esi + lea esi,DWORD PTR 1859775393[edi*1+esi] + mov edi,DWORD PTR 60[esp] + add esi,ebp + ; 20_39 31 + mov ebp,eax + xor edi,DWORD PTR 4[esp] + xor ebp,ebx + xor edi,DWORD PTR 28[esp] + xor ebp,ecx + xor edi,DWORD PTR 48[esp] + rol edi,1 + add edx,ebp + ror eax,2 + mov ebp,esi + rol ebp,5 + mov DWORD PTR 60[esp],edi + lea edi,DWORD PTR 1859775393[edx*1+edi] + mov edx,DWORD PTR [esp] + add edi,ebp + ; 20_39 32 + mov ebp,esi + xor edx,DWORD PTR 8[esp] + xor ebp,eax + xor edx,DWORD PTR 32[esp] + xor ebp,ebx + xor edx,DWORD PTR 52[esp] + rol edx,1 + add ecx,ebp + ror esi,2 + mov ebp,edi + rol ebp,5 + mov DWORD PTR [esp],edx + lea edx,DWORD PTR 1859775393[ecx*1+edx] + mov ecx,DWORD PTR 4[esp] + add edx,ebp + ; 20_39 33 + mov ebp,edi + xor ecx,DWORD PTR 12[esp] + xor ebp,esi + xor ecx,DWORD PTR 36[esp] + xor ebp,eax + xor ecx,DWORD PTR 56[esp] + rol ecx,1 + add ebx,ebp + ror edi,2 + mov ebp,edx + rol ebp,5 + mov DWORD PTR 4[esp],ecx + lea ecx,DWORD PTR 1859775393[ebx*1+ecx] + mov ebx,DWORD PTR 8[esp] + add ecx,ebp + ; 20_39 34 + mov ebp,edx + xor ebx,DWORD PTR 16[esp] + xor ebp,edi + xor ebx,DWORD PTR 40[esp] + xor ebp,esi + xor ebx,DWORD PTR 60[esp] + rol ebx,1 + add eax,ebp + ror edx,2 + mov ebp,ecx + rol ebp,5 + mov DWORD PTR 8[esp],ebx + lea ebx,DWORD PTR 1859775393[eax*1+ebx] + mov eax,DWORD PTR 12[esp] + add ebx,ebp + ; 20_39 35 + mov ebp,ecx + xor eax,DWORD PTR 20[esp] + xor ebp,edx + xor eax,DWORD PTR 44[esp] + xor ebp,edi + xor eax,DWORD PTR [esp] + rol eax,1 + add esi,ebp + ror ecx,2 + mov ebp,ebx + rol ebp,5 + mov DWORD PTR 12[esp],eax + lea eax,DWORD PTR 1859775393[esi*1+eax] + mov esi,DWORD PTR 16[esp] + add eax,ebp + ; 20_39 36 + mov ebp,ebx + xor esi,DWORD PTR 24[esp] + xor ebp,ecx + xor esi,DWORD PTR 48[esp] + xor ebp,edx + xor esi,DWORD PTR 4[esp] + rol esi,1 + add edi,ebp + ror ebx,2 + mov ebp,eax + rol ebp,5 + mov DWORD PTR 16[esp],esi + lea esi,DWORD PTR 1859775393[edi*1+esi] + mov edi,DWORD PTR 20[esp] + add esi,ebp + ; 20_39 37 + mov ebp,eax + xor edi,DWORD PTR 28[esp] + xor ebp,ebx + xor edi,DWORD PTR 52[esp] + xor ebp,ecx + xor edi,DWORD PTR 8[esp] + rol edi,1 + add edx,ebp + ror eax,2 + mov ebp,esi + rol ebp,5 + mov DWORD PTR 20[esp],edi + lea edi,DWORD PTR 1859775393[edx*1+edi] + mov edx,DWORD PTR 24[esp] + add edi,ebp + ; 20_39 38 + mov ebp,esi + xor edx,DWORD PTR 32[esp] + xor ebp,eax + xor edx,DWORD PTR 56[esp] + xor ebp,ebx + xor edx,DWORD PTR 12[esp] + rol edx,1 + add ecx,ebp + ror esi,2 + mov ebp,edi + rol ebp,5 + mov DWORD PTR 24[esp],edx + lea edx,DWORD PTR 1859775393[ecx*1+edx] + mov ecx,DWORD PTR 28[esp] + add edx,ebp + ; 20_39 39 + mov ebp,edi + xor ecx,DWORD PTR 36[esp] + xor ebp,esi + xor ecx,DWORD PTR 60[esp] + xor ebp,eax + xor ecx,DWORD PTR 16[esp] + rol ecx,1 + add ebx,ebp + ror edi,2 + mov ebp,edx + rol ebp,5 + mov DWORD PTR 28[esp],ecx + lea ecx,DWORD PTR 1859775393[ebx*1+ecx] + mov ebx,DWORD PTR 32[esp] + add ecx,ebp + ; 40_59 40 + mov ebp,edi + xor ebx,DWORD PTR 40[esp] + xor ebp,esi + xor ebx,DWORD PTR [esp] + and ebp,edx + xor ebx,DWORD PTR 20[esp] + rol ebx,1 + add ebp,eax + ror edx,2 + mov eax,ecx + rol eax,5 + mov DWORD PTR 32[esp],ebx + lea ebx,DWORD PTR 2400959708[ebp*1+ebx] + mov ebp,edi + add ebx,eax + and ebp,esi + mov eax,DWORD PTR 36[esp] + add ebx,ebp + ; 40_59 41 + mov ebp,edx + xor eax,DWORD PTR 44[esp] + xor ebp,edi + xor eax,DWORD PTR 4[esp] + and ebp,ecx + xor eax,DWORD PTR 24[esp] + rol eax,1 + add ebp,esi + ror ecx,2 + mov esi,ebx + rol esi,5 + mov DWORD PTR 36[esp],eax + lea eax,DWORD PTR 2400959708[ebp*1+eax] + mov ebp,edx + add eax,esi + and ebp,edi + mov esi,DWORD PTR 40[esp] + add eax,ebp + ; 40_59 42 + mov ebp,ecx + xor esi,DWORD PTR 48[esp] + xor ebp,edx + xor esi,DWORD PTR 8[esp] + and ebp,ebx + xor esi,DWORD PTR 28[esp] + rol esi,1 + add ebp,edi + ror ebx,2 + mov edi,eax + rol edi,5 + mov DWORD PTR 40[esp],esi + lea esi,DWORD PTR 2400959708[ebp*1+esi] + mov ebp,ecx + add esi,edi + and ebp,edx + mov edi,DWORD PTR 44[esp] + add esi,ebp + ; 40_59 43 + mov ebp,ebx + xor edi,DWORD PTR 52[esp] + xor ebp,ecx + xor edi,DWORD PTR 12[esp] + and ebp,eax + xor edi,DWORD PTR 32[esp] + rol edi,1 + add ebp,edx + ror eax,2 + mov edx,esi + rol edx,5 + mov DWORD PTR 44[esp],edi + lea edi,DWORD PTR 2400959708[ebp*1+edi] + mov ebp,ebx + add edi,edx + and ebp,ecx + mov edx,DWORD PTR 48[esp] + add edi,ebp + ; 40_59 44 + mov ebp,eax + xor edx,DWORD PTR 56[esp] + xor ebp,ebx + xor edx,DWORD PTR 16[esp] + and ebp,esi + xor edx,DWORD PTR 36[esp] + rol edx,1 + add ebp,ecx + ror esi,2 + mov ecx,edi + rol ecx,5 + mov DWORD PTR 48[esp],edx + lea edx,DWORD PTR 2400959708[ebp*1+edx] + mov ebp,eax + add edx,ecx + and ebp,ebx + mov ecx,DWORD PTR 52[esp] + add edx,ebp + ; 40_59 45 + mov ebp,esi + xor ecx,DWORD PTR 60[esp] + xor ebp,eax + xor ecx,DWORD PTR 20[esp] + and ebp,edi + xor ecx,DWORD PTR 40[esp] + rol ecx,1 + add ebp,ebx + ror edi,2 + mov ebx,edx + rol ebx,5 + mov DWORD PTR 52[esp],ecx + lea ecx,DWORD PTR 2400959708[ebp*1+ecx] + mov ebp,esi + add ecx,ebx + and ebp,eax + mov ebx,DWORD PTR 56[esp] + add ecx,ebp + ; 40_59 46 + mov ebp,edi + xor ebx,DWORD PTR [esp] + xor ebp,esi + xor ebx,DWORD PTR 24[esp] + and ebp,edx + xor ebx,DWORD PTR 44[esp] + rol ebx,1 + add ebp,eax + ror edx,2 + mov eax,ecx + rol eax,5 + mov DWORD PTR 56[esp],ebx + lea ebx,DWORD PTR 2400959708[ebp*1+ebx] + mov ebp,edi + add ebx,eax + and ebp,esi + mov eax,DWORD PTR 60[esp] + add ebx,ebp + ; 40_59 47 + mov ebp,edx + xor eax,DWORD PTR 4[esp] + xor ebp,edi + xor eax,DWORD PTR 28[esp] + and ebp,ecx + xor eax,DWORD PTR 48[esp] + rol eax,1 + add ebp,esi + ror ecx,2 + mov esi,ebx + rol esi,5 + mov DWORD PTR 60[esp],eax + lea eax,DWORD PTR 2400959708[ebp*1+eax] + mov ebp,edx + add eax,esi + and ebp,edi + mov esi,DWORD PTR [esp] + add eax,ebp + ; 40_59 48 + mov ebp,ecx + xor esi,DWORD PTR 8[esp] + xor ebp,edx + xor esi,DWORD PTR 32[esp] + and ebp,ebx + xor esi,DWORD PTR 52[esp] + rol esi,1 + add ebp,edi + ror ebx,2 + mov edi,eax + rol edi,5 + mov DWORD PTR [esp],esi + lea esi,DWORD PTR 2400959708[ebp*1+esi] + mov ebp,ecx + add esi,edi + and ebp,edx + mov edi,DWORD PTR 4[esp] + add esi,ebp + ; 40_59 49 + mov ebp,ebx + xor edi,DWORD PTR 12[esp] + xor ebp,ecx + xor edi,DWORD PTR 36[esp] + and ebp,eax + xor edi,DWORD PTR 56[esp] + rol edi,1 + add ebp,edx + ror eax,2 + mov edx,esi + rol edx,5 + mov DWORD PTR 4[esp],edi + lea edi,DWORD PTR 2400959708[ebp*1+edi] + mov ebp,ebx + add edi,edx + and ebp,ecx + mov edx,DWORD PTR 8[esp] + add edi,ebp + ; 40_59 50 + mov ebp,eax + xor edx,DWORD PTR 16[esp] + xor ebp,ebx + xor edx,DWORD PTR 40[esp] + and ebp,esi + xor edx,DWORD PTR 60[esp] + rol edx,1 + add ebp,ecx + ror esi,2 + mov ecx,edi + rol ecx,5 + mov DWORD PTR 8[esp],edx + lea edx,DWORD PTR 2400959708[ebp*1+edx] + mov ebp,eax + add edx,ecx + and ebp,ebx + mov ecx,DWORD PTR 12[esp] + add edx,ebp + ; 40_59 51 + mov ebp,esi + xor ecx,DWORD PTR 20[esp] + xor ebp,eax + xor ecx,DWORD PTR 44[esp] + and ebp,edi + xor ecx,DWORD PTR [esp] + rol ecx,1 + add ebp,ebx + ror edi,2 + mov ebx,edx + rol ebx,5 + mov DWORD PTR 12[esp],ecx + lea ecx,DWORD PTR 2400959708[ebp*1+ecx] + mov ebp,esi + add ecx,ebx + and ebp,eax + mov ebx,DWORD PTR 16[esp] + add ecx,ebp + ; 40_59 52 + mov ebp,edi + xor ebx,DWORD PTR 24[esp] + xor ebp,esi + xor ebx,DWORD PTR 48[esp] + and ebp,edx + xor ebx,DWORD PTR 4[esp] + rol ebx,1 + add ebp,eax + ror edx,2 + mov eax,ecx + rol eax,5 + mov DWORD PTR 16[esp],ebx + lea ebx,DWORD PTR 2400959708[ebp*1+ebx] + mov ebp,edi + add ebx,eax + and ebp,esi + mov eax,DWORD PTR 20[esp] + add ebx,ebp + ; 40_59 53 + mov ebp,edx + xor eax,DWORD PTR 28[esp] + xor ebp,edi + xor eax,DWORD PTR 52[esp] + and ebp,ecx + xor eax,DWORD PTR 8[esp] + rol eax,1 + add ebp,esi + ror ecx,2 + mov esi,ebx + rol esi,5 + mov DWORD PTR 20[esp],eax + lea eax,DWORD PTR 2400959708[ebp*1+eax] + mov ebp,edx + add eax,esi + and ebp,edi + mov esi,DWORD PTR 24[esp] + add eax,ebp + ; 40_59 54 + mov ebp,ecx + xor esi,DWORD PTR 32[esp] + xor ebp,edx + xor esi,DWORD PTR 56[esp] + and ebp,ebx + xor esi,DWORD PTR 12[esp] + rol esi,1 + add ebp,edi + ror ebx,2 + mov edi,eax + rol edi,5 + mov DWORD PTR 24[esp],esi + lea esi,DWORD PTR 2400959708[ebp*1+esi] + mov ebp,ecx + add esi,edi + and ebp,edx + mov edi,DWORD PTR 28[esp] + add esi,ebp + ; 40_59 55 + mov ebp,ebx + xor edi,DWORD PTR 36[esp] + xor ebp,ecx + xor edi,DWORD PTR 60[esp] + and ebp,eax + xor edi,DWORD PTR 16[esp] + rol edi,1 + add ebp,edx + ror eax,2 + mov edx,esi + rol edx,5 + mov DWORD PTR 28[esp],edi + lea edi,DWORD PTR 2400959708[ebp*1+edi] + mov ebp,ebx + add edi,edx + and ebp,ecx + mov edx,DWORD PTR 32[esp] + add edi,ebp + ; 40_59 56 + mov ebp,eax + xor edx,DWORD PTR 40[esp] + xor ebp,ebx + xor edx,DWORD PTR [esp] + and ebp,esi + xor edx,DWORD PTR 20[esp] + rol edx,1 + add ebp,ecx + ror esi,2 + mov ecx,edi + rol ecx,5 + mov DWORD PTR 32[esp],edx + lea edx,DWORD PTR 2400959708[ebp*1+edx] + mov ebp,eax + add edx,ecx + and ebp,ebx + mov ecx,DWORD PTR 36[esp] + add edx,ebp + ; 40_59 57 + mov ebp,esi + xor ecx,DWORD PTR 44[esp] + xor ebp,eax + xor ecx,DWORD PTR 4[esp] + and ebp,edi + xor ecx,DWORD PTR 24[esp] + rol ecx,1 + add ebp,ebx + ror edi,2 + mov ebx,edx + rol ebx,5 + mov DWORD PTR 36[esp],ecx + lea ecx,DWORD PTR 2400959708[ebp*1+ecx] + mov ebp,esi + add ecx,ebx + and ebp,eax + mov ebx,DWORD PTR 40[esp] + add ecx,ebp + ; 40_59 58 + mov ebp,edi + xor ebx,DWORD PTR 48[esp] + xor ebp,esi + xor ebx,DWORD PTR 8[esp] + and ebp,edx + xor ebx,DWORD PTR 28[esp] + rol ebx,1 + add ebp,eax + ror edx,2 + mov eax,ecx + rol eax,5 + mov DWORD PTR 40[esp],ebx + lea ebx,DWORD PTR 2400959708[ebp*1+ebx] + mov ebp,edi + add ebx,eax + and ebp,esi + mov eax,DWORD PTR 44[esp] + add ebx,ebp + ; 40_59 59 + mov ebp,edx + xor eax,DWORD PTR 52[esp] + xor ebp,edi + xor eax,DWORD PTR 12[esp] + and ebp,ecx + xor eax,DWORD PTR 32[esp] + rol eax,1 + add ebp,esi + ror ecx,2 + mov esi,ebx + rol esi,5 + mov DWORD PTR 44[esp],eax + lea eax,DWORD PTR 2400959708[ebp*1+eax] + mov ebp,edx + add eax,esi + and ebp,edi + mov esi,DWORD PTR 48[esp] + add eax,ebp + ; 20_39 60 + mov ebp,ebx + xor esi,DWORD PTR 56[esp] + xor ebp,ecx + xor esi,DWORD PTR 16[esp] + xor ebp,edx + xor esi,DWORD PTR 36[esp] + rol esi,1 + add edi,ebp + ror ebx,2 + mov ebp,eax + rol ebp,5 + mov DWORD PTR 48[esp],esi + lea esi,DWORD PTR 3395469782[edi*1+esi] + mov edi,DWORD PTR 52[esp] + add esi,ebp + ; 20_39 61 + mov ebp,eax + xor edi,DWORD PTR 60[esp] + xor ebp,ebx + xor edi,DWORD PTR 20[esp] + xor ebp,ecx + xor edi,DWORD PTR 40[esp] + rol edi,1 + add edx,ebp + ror eax,2 + mov ebp,esi + rol ebp,5 + mov DWORD PTR 52[esp],edi + lea edi,DWORD PTR 3395469782[edx*1+edi] + mov edx,DWORD PTR 56[esp] + add edi,ebp + ; 20_39 62 + mov ebp,esi + xor edx,DWORD PTR [esp] + xor ebp,eax + xor edx,DWORD PTR 24[esp] + xor ebp,ebx + xor edx,DWORD PTR 44[esp] + rol edx,1 + add ecx,ebp + ror esi,2 + mov ebp,edi + rol ebp,5 + mov DWORD PTR 56[esp],edx + lea edx,DWORD PTR 3395469782[ecx*1+edx] + mov ecx,DWORD PTR 60[esp] + add edx,ebp + ; 20_39 63 + mov ebp,edi + xor ecx,DWORD PTR 4[esp] + xor ebp,esi + xor ecx,DWORD PTR 28[esp] + xor ebp,eax + xor ecx,DWORD PTR 48[esp] + rol ecx,1 + add ebx,ebp + ror edi,2 + mov ebp,edx + rol ebp,5 + mov DWORD PTR 60[esp],ecx + lea ecx,DWORD PTR 3395469782[ebx*1+ecx] + mov ebx,DWORD PTR [esp] + add ecx,ebp + ; 20_39 64 + mov ebp,edx + xor ebx,DWORD PTR 8[esp] + xor ebp,edi + xor ebx,DWORD PTR 32[esp] + xor ebp,esi + xor ebx,DWORD PTR 52[esp] + rol ebx,1 + add eax,ebp + ror edx,2 + mov ebp,ecx + rol ebp,5 + mov DWORD PTR [esp],ebx + lea ebx,DWORD PTR 3395469782[eax*1+ebx] + mov eax,DWORD PTR 4[esp] + add ebx,ebp + ; 20_39 65 + mov ebp,ecx + xor eax,DWORD PTR 12[esp] + xor ebp,edx + xor eax,DWORD PTR 36[esp] + xor ebp,edi + xor eax,DWORD PTR 56[esp] + rol eax,1 + add esi,ebp + ror ecx,2 + mov ebp,ebx + rol ebp,5 + mov DWORD PTR 4[esp],eax + lea eax,DWORD PTR 3395469782[esi*1+eax] + mov esi,DWORD PTR 8[esp] + add eax,ebp + ; 20_39 66 + mov ebp,ebx + xor esi,DWORD PTR 16[esp] + xor ebp,ecx + xor esi,DWORD PTR 40[esp] + xor ebp,edx + xor esi,DWORD PTR 60[esp] + rol esi,1 + add edi,ebp + ror ebx,2 + mov ebp,eax + rol ebp,5 + mov DWORD PTR 8[esp],esi + lea esi,DWORD PTR 3395469782[edi*1+esi] + mov edi,DWORD PTR 12[esp] + add esi,ebp + ; 20_39 67 + mov ebp,eax + xor edi,DWORD PTR 20[esp] + xor ebp,ebx + xor edi,DWORD PTR 44[esp] + xor ebp,ecx + xor edi,DWORD PTR [esp] + rol edi,1 + add edx,ebp + ror eax,2 + mov ebp,esi + rol ebp,5 + mov DWORD PTR 12[esp],edi + lea edi,DWORD PTR 3395469782[edx*1+edi] + mov edx,DWORD PTR 16[esp] + add edi,ebp + ; 20_39 68 + mov ebp,esi + xor edx,DWORD PTR 24[esp] + xor ebp,eax + xor edx,DWORD PTR 48[esp] + xor ebp,ebx + xor edx,DWORD PTR 4[esp] + rol edx,1 + add ecx,ebp + ror esi,2 + mov ebp,edi + rol ebp,5 + mov DWORD PTR 16[esp],edx + lea edx,DWORD PTR 3395469782[ecx*1+edx] + mov ecx,DWORD PTR 20[esp] + add edx,ebp + ; 20_39 69 + mov ebp,edi + xor ecx,DWORD PTR 28[esp] + xor ebp,esi + xor ecx,DWORD PTR 52[esp] + xor ebp,eax + xor ecx,DWORD PTR 8[esp] + rol ecx,1 + add ebx,ebp + ror edi,2 + mov ebp,edx + rol ebp,5 + mov DWORD PTR 20[esp],ecx + lea ecx,DWORD PTR 3395469782[ebx*1+ecx] + mov ebx,DWORD PTR 24[esp] + add ecx,ebp + ; 20_39 70 + mov ebp,edx + xor ebx,DWORD PTR 32[esp] + xor ebp,edi + xor ebx,DWORD PTR 56[esp] + xor ebp,esi + xor ebx,DWORD PTR 12[esp] + rol ebx,1 + add eax,ebp + ror edx,2 + mov ebp,ecx + rol ebp,5 + mov DWORD PTR 24[esp],ebx + lea ebx,DWORD PTR 3395469782[eax*1+ebx] + mov eax,DWORD PTR 28[esp] + add ebx,ebp + ; 20_39 71 + mov ebp,ecx + xor eax,DWORD PTR 36[esp] + xor ebp,edx + xor eax,DWORD PTR 60[esp] + xor ebp,edi + xor eax,DWORD PTR 16[esp] + rol eax,1 + add esi,ebp + ror ecx,2 + mov ebp,ebx + rol ebp,5 + mov DWORD PTR 28[esp],eax + lea eax,DWORD PTR 3395469782[esi*1+eax] + mov esi,DWORD PTR 32[esp] + add eax,ebp + ; 20_39 72 + mov ebp,ebx + xor esi,DWORD PTR 40[esp] + xor ebp,ecx + xor esi,DWORD PTR [esp] + xor ebp,edx + xor esi,DWORD PTR 20[esp] + rol esi,1 + add edi,ebp + ror ebx,2 + mov ebp,eax + rol ebp,5 + mov DWORD PTR 32[esp],esi + lea esi,DWORD PTR 3395469782[edi*1+esi] + mov edi,DWORD PTR 36[esp] + add esi,ebp + ; 20_39 73 + mov ebp,eax + xor edi,DWORD PTR 44[esp] + xor ebp,ebx + xor edi,DWORD PTR 4[esp] + xor ebp,ecx + xor edi,DWORD PTR 24[esp] + rol edi,1 + add edx,ebp + ror eax,2 + mov ebp,esi + rol ebp,5 + mov DWORD PTR 36[esp],edi + lea edi,DWORD PTR 3395469782[edx*1+edi] + mov edx,DWORD PTR 40[esp] + add edi,ebp + ; 20_39 74 + mov ebp,esi + xor edx,DWORD PTR 48[esp] + xor ebp,eax + xor edx,DWORD PTR 8[esp] + xor ebp,ebx + xor edx,DWORD PTR 28[esp] + rol edx,1 + add ecx,ebp + ror esi,2 + mov ebp,edi + rol ebp,5 + mov DWORD PTR 40[esp],edx + lea edx,DWORD PTR 3395469782[ecx*1+edx] + mov ecx,DWORD PTR 44[esp] + add edx,ebp + ; 20_39 75 + mov ebp,edi + xor ecx,DWORD PTR 52[esp] + xor ebp,esi + xor ecx,DWORD PTR 12[esp] + xor ebp,eax + xor ecx,DWORD PTR 32[esp] + rol ecx,1 + add ebx,ebp + ror edi,2 + mov ebp,edx + rol ebp,5 + mov DWORD PTR 44[esp],ecx + lea ecx,DWORD PTR 3395469782[ebx*1+ecx] + mov ebx,DWORD PTR 48[esp] + add ecx,ebp + ; 20_39 76 + mov ebp,edx + xor ebx,DWORD PTR 56[esp] + xor ebp,edi + xor ebx,DWORD PTR 16[esp] + xor ebp,esi + xor ebx,DWORD PTR 36[esp] + rol ebx,1 + add eax,ebp + ror edx,2 + mov ebp,ecx + rol ebp,5 + mov DWORD PTR 48[esp],ebx + lea ebx,DWORD PTR 3395469782[eax*1+ebx] + mov eax,DWORD PTR 52[esp] + add ebx,ebp + ; 20_39 77 + mov ebp,ecx + xor eax,DWORD PTR 60[esp] + xor ebp,edx + xor eax,DWORD PTR 20[esp] + xor ebp,edi + xor eax,DWORD PTR 40[esp] + rol eax,1 + add esi,ebp + ror ecx,2 + mov ebp,ebx + rol ebp,5 + lea eax,DWORD PTR 3395469782[esi*1+eax] + mov esi,DWORD PTR 56[esp] + add eax,ebp + ; 20_39 78 + mov ebp,ebx + xor esi,DWORD PTR [esp] + xor ebp,ecx + xor esi,DWORD PTR 24[esp] + xor ebp,edx + xor esi,DWORD PTR 44[esp] + rol esi,1 + add edi,ebp + ror ebx,2 + mov ebp,eax + rol ebp,5 + lea esi,DWORD PTR 3395469782[edi*1+esi] + mov edi,DWORD PTR 60[esp] + add esi,ebp + ; 20_39 79 + mov ebp,eax + xor edi,DWORD PTR 4[esp] + xor ebp,ebx + xor edi,DWORD PTR 28[esp] + xor ebp,ecx + xor edi,DWORD PTR 48[esp] + rol edi,1 + add edx,ebp + ror eax,2 + mov ebp,esi + rol ebp,5 + lea edi,DWORD PTR 3395469782[edx*1+edi] + add edi,ebp + mov ebp,DWORD PTR 96[esp] + mov edx,DWORD PTR 100[esp] + add edi,DWORD PTR [ebp] + add esi,DWORD PTR 4[ebp] + add eax,DWORD PTR 8[ebp] + add ebx,DWORD PTR 12[ebp] + add ecx,DWORD PTR 16[ebp] + mov DWORD PTR [ebp],edi + add edx,64 + mov DWORD PTR 4[ebp],esi + cmp edx,DWORD PTR 104[esp] + mov DWORD PTR 8[ebp],eax + mov edi,ecx + mov DWORD PTR 12[ebp],ebx + mov esi,edx + mov DWORD PTR 16[ebp],ecx + jb $L002loop + add esp,76 + pop edi + pop esi + pop ebx + pop ebp + ret +_sha1_block_data_order ENDP +ALIGN 16 +__sha1_block_data_order_shaext PROC PRIVATE + push ebp + push ebx + push esi + push edi + call $L003pic_point +$L003pic_point: + pop ebp + lea ebp,DWORD PTR ($LK_XX_XX-$L003pic_point)[ebp] +$Lshaext_shortcut:: + mov edi,DWORD PTR 20[esp] + mov ebx,esp + mov esi,DWORD PTR 24[esp] + mov ecx,DWORD PTR 28[esp] + sub esp,32 + movdqu xmm0,XMMWORD PTR [edi] + movd xmm1,DWORD PTR 16[edi] + and esp,-32 + movdqa xmm3,XMMWORD PTR 80[ebp] + movdqu xmm4,XMMWORD PTR [esi] + pshufd xmm0,xmm0,27 + movdqu xmm5,XMMWORD PTR 16[esi] + pshufd xmm1,xmm1,27 + movdqu xmm6,XMMWORD PTR 32[esi] +DB 102,15,56,0,227 + movdqu xmm7,XMMWORD PTR 48[esi] +DB 102,15,56,0,235 +DB 102,15,56,0,243 +DB 102,15,56,0,251 + jmp $L004loop_shaext +ALIGN 16 +$L004loop_shaext: + dec ecx + lea eax,DWORD PTR 64[esi] + movdqa XMMWORD PTR [esp],xmm1 + paddd xmm1,xmm4 + cmovne esi,eax + movdqa XMMWORD PTR 16[esp],xmm0 +DB 15,56,201,229 + movdqa xmm2,xmm0 +DB 15,58,204,193,0 +DB 15,56,200,213 + pxor xmm4,xmm6 +DB 15,56,201,238 +DB 15,56,202,231 + movdqa xmm1,xmm0 +DB 15,58,204,194,0 +DB 15,56,200,206 + pxor xmm5,xmm7 +DB 15,56,202,236 +DB 15,56,201,247 + movdqa xmm2,xmm0 +DB 15,58,204,193,0 +DB 15,56,200,215 + pxor xmm6,xmm4 +DB 15,56,201,252 +DB 15,56,202,245 + movdqa xmm1,xmm0 +DB 15,58,204,194,0 +DB 15,56,200,204 + pxor xmm7,xmm5 +DB 15,56,202,254 +DB 15,56,201,229 + movdqa xmm2,xmm0 +DB 15,58,204,193,0 +DB 15,56,200,213 + pxor xmm4,xmm6 +DB 15,56,201,238 +DB 15,56,202,231 + movdqa xmm1,xmm0 +DB 15,58,204,194,1 +DB 15,56,200,206 + pxor xmm5,xmm7 +DB 15,56,202,236 +DB 15,56,201,247 + movdqa xmm2,xmm0 +DB 15,58,204,193,1 +DB 15,56,200,215 + pxor xmm6,xmm4 +DB 15,56,201,252 +DB 15,56,202,245 + movdqa xmm1,xmm0 +DB 15,58,204,194,1 +DB 15,56,200,204 + pxor xmm7,xmm5 +DB 15,56,202,254 +DB 15,56,201,229 + movdqa xmm2,xmm0 +DB 15,58,204,193,1 +DB 15,56,200,213 + pxor xmm4,xmm6 +DB 15,56,201,238 +DB 15,56,202,231 + movdqa xmm1,xmm0 +DB 15,58,204,194,1 +DB 15,56,200,206 + pxor xmm5,xmm7 +DB 15,56,202,236 +DB 15,56,201,247 + movdqa xmm2,xmm0 +DB 15,58,204,193,2 +DB 15,56,200,215 + pxor xmm6,xmm4 +DB 15,56,201,252 +DB 15,56,202,245 + movdqa xmm1,xmm0 +DB 15,58,204,194,2 +DB 15,56,200,204 + pxor xmm7,xmm5 +DB 15,56,202,254 +DB 15,56,201,229 + movdqa xmm2,xmm0 +DB 15,58,204,193,2 +DB 15,56,200,213 + pxor xmm4,xmm6 +DB 15,56,201,238 +DB 15,56,202,231 + movdqa xmm1,xmm0 +DB 15,58,204,194,2 +DB 15,56,200,206 + pxor xmm5,xmm7 +DB 15,56,202,236 +DB 15,56,201,247 + movdqa xmm2,xmm0 +DB 15,58,204,193,2 +DB 15,56,200,215 + pxor xmm6,xmm4 +DB 15,56,201,252 +DB 15,56,202,245 + movdqa xmm1,xmm0 +DB 15,58,204,194,3 +DB 15,56,200,204 + pxor xmm7,xmm5 +DB 15,56,202,254 + movdqu xmm4,XMMWORD PTR [esi] + movdqa xmm2,xmm0 +DB 15,58,204,193,3 +DB 15,56,200,213 + movdqu xmm5,XMMWORD PTR 16[esi] +DB 102,15,56,0,227 + movdqa xmm1,xmm0 +DB 15,58,204,194,3 +DB 15,56,200,206 + movdqu xmm6,XMMWORD PTR 32[esi] +DB 102,15,56,0,235 + movdqa xmm2,xmm0 +DB 15,58,204,193,3 +DB 15,56,200,215 + movdqu xmm7,XMMWORD PTR 48[esi] +DB 102,15,56,0,243 + movdqa xmm1,xmm0 +DB 15,58,204,194,3 + movdqa xmm2,XMMWORD PTR [esp] +DB 102,15,56,0,251 +DB 15,56,200,202 + paddd xmm0,XMMWORD PTR 16[esp] + jnz $L004loop_shaext + pshufd xmm0,xmm0,27 + pshufd xmm1,xmm1,27 + movdqu XMMWORD PTR [edi],xmm0 + movd DWORD PTR 16[edi],xmm1 + mov esp,ebx + pop edi + pop esi + pop ebx + pop ebp + ret +__sha1_block_data_order_shaext ENDP +ALIGN 16 +__sha1_block_data_order_ssse3 PROC PRIVATE + push ebp + push ebx + push esi + push edi + call $L005pic_point +$L005pic_point: + pop ebp + lea ebp,DWORD PTR ($LK_XX_XX-$L005pic_point)[ebp] +$Lssse3_shortcut:: + movdqa xmm7,XMMWORD PTR [ebp] + movdqa xmm0,XMMWORD PTR 16[ebp] + movdqa xmm1,XMMWORD PTR 32[ebp] + movdqa xmm2,XMMWORD PTR 48[ebp] + movdqa xmm6,XMMWORD PTR 64[ebp] + mov edi,DWORD PTR 20[esp] + mov ebp,DWORD PTR 24[esp] + mov edx,DWORD PTR 28[esp] + mov esi,esp + sub esp,208 + and esp,-64 + movdqa XMMWORD PTR 112[esp],xmm0 + movdqa XMMWORD PTR 128[esp],xmm1 + movdqa XMMWORD PTR 144[esp],xmm2 + shl edx,6 + movdqa XMMWORD PTR 160[esp],xmm7 + add edx,ebp + movdqa XMMWORD PTR 176[esp],xmm6 + add ebp,64 + mov DWORD PTR 192[esp],edi + mov DWORD PTR 196[esp],ebp + mov DWORD PTR 200[esp],edx + mov DWORD PTR 204[esp],esi + mov eax,DWORD PTR [edi] + mov ebx,DWORD PTR 4[edi] + mov ecx,DWORD PTR 8[edi] + mov edx,DWORD PTR 12[edi] + mov edi,DWORD PTR 16[edi] + mov esi,ebx + movdqu xmm0,XMMWORD PTR [ebp-64] + movdqu xmm1,XMMWORD PTR [ebp-48] + movdqu xmm2,XMMWORD PTR [ebp-32] + movdqu xmm3,XMMWORD PTR [ebp-16] +DB 102,15,56,0,198 +DB 102,15,56,0,206 +DB 102,15,56,0,214 + movdqa XMMWORD PTR 96[esp],xmm7 +DB 102,15,56,0,222 + paddd xmm0,xmm7 + paddd xmm1,xmm7 + paddd xmm2,xmm7 + movdqa XMMWORD PTR [esp],xmm0 + psubd xmm0,xmm7 + movdqa XMMWORD PTR 16[esp],xmm1 + psubd xmm1,xmm7 + movdqa XMMWORD PTR 32[esp],xmm2 + mov ebp,ecx + psubd xmm2,xmm7 + xor ebp,edx + pshufd xmm4,xmm0,238 + and esi,ebp + jmp $L006loop +ALIGN 16 +$L006loop: + ror ebx,2 + xor esi,edx + mov ebp,eax + punpcklqdq xmm4,xmm1 + movdqa xmm6,xmm3 + add edi,DWORD PTR [esp] + xor ebx,ecx + paddd xmm7,xmm3 + movdqa XMMWORD PTR 64[esp],xmm0 + rol eax,5 + add edi,esi + psrldq xmm6,4 + and ebp,ebx + xor ebx,ecx + pxor xmm4,xmm0 + add edi,eax + ror eax,7 + pxor xmm6,xmm2 + xor ebp,ecx + mov esi,edi + add edx,DWORD PTR 4[esp] + pxor xmm4,xmm6 + xor eax,ebx + rol edi,5 + movdqa XMMWORD PTR 48[esp],xmm7 + add edx,ebp + and esi,eax + movdqa xmm0,xmm4 + xor eax,ebx + add edx,edi + ror edi,7 + movdqa xmm6,xmm4 + xor esi,ebx + pslldq xmm0,12 + paddd xmm4,xmm4 + mov ebp,edx + add ecx,DWORD PTR 8[esp] + psrld xmm6,31 + xor edi,eax + rol edx,5 + movdqa xmm7,xmm0 + add ecx,esi + and ebp,edi + xor edi,eax + psrld xmm0,30 + add ecx,edx + ror edx,7 + por xmm4,xmm6 + xor ebp,eax + mov esi,ecx + add ebx,DWORD PTR 12[esp] + pslld xmm7,2 + xor edx,edi + rol ecx,5 + pxor xmm4,xmm0 + movdqa xmm0,XMMWORD PTR 96[esp] + add ebx,ebp + and esi,edx + pxor xmm4,xmm7 + pshufd xmm5,xmm1,238 + xor edx,edi + add ebx,ecx + ror ecx,7 + xor esi,edi + mov ebp,ebx + punpcklqdq xmm5,xmm2 + movdqa xmm7,xmm4 + add eax,DWORD PTR 16[esp] + xor ecx,edx + paddd xmm0,xmm4 + movdqa XMMWORD PTR 80[esp],xmm1 + rol ebx,5 + add eax,esi + psrldq xmm7,4 + and ebp,ecx + xor ecx,edx + pxor xmm5,xmm1 + add eax,ebx + ror ebx,7 + pxor xmm7,xmm3 + xor ebp,edx + mov esi,eax + add edi,DWORD PTR 20[esp] + pxor xmm5,xmm7 + xor ebx,ecx + rol eax,5 + movdqa XMMWORD PTR [esp],xmm0 + add edi,ebp + and esi,ebx + movdqa xmm1,xmm5 + xor ebx,ecx + add edi,eax + ror eax,7 + movdqa xmm7,xmm5 + xor esi,ecx + pslldq xmm1,12 + paddd xmm5,xmm5 + mov ebp,edi + add edx,DWORD PTR 24[esp] + psrld xmm7,31 + xor eax,ebx + rol edi,5 + movdqa xmm0,xmm1 + add edx,esi + and ebp,eax + xor eax,ebx + psrld xmm1,30 + add edx,edi + ror edi,7 + por xmm5,xmm7 + xor ebp,ebx + mov esi,edx + add ecx,DWORD PTR 28[esp] + pslld xmm0,2 + xor edi,eax + rol edx,5 + pxor xmm5,xmm1 + movdqa xmm1,XMMWORD PTR 112[esp] + add ecx,ebp + and esi,edi + pxor xmm5,xmm0 + pshufd xmm6,xmm2,238 + xor edi,eax + add ecx,edx + ror edx,7 + xor esi,eax + mov ebp,ecx + punpcklqdq xmm6,xmm3 + movdqa xmm0,xmm5 + add ebx,DWORD PTR 32[esp] + xor edx,edi + paddd xmm1,xmm5 + movdqa XMMWORD PTR 96[esp],xmm2 + rol ecx,5 + add ebx,esi + psrldq xmm0,4 + and ebp,edx + xor edx,edi + pxor xmm6,xmm2 + add ebx,ecx + ror ecx,7 + pxor xmm0,xmm4 + xor ebp,edi + mov esi,ebx + add eax,DWORD PTR 36[esp] + pxor xmm6,xmm0 + xor ecx,edx + rol ebx,5 + movdqa XMMWORD PTR 16[esp],xmm1 + add eax,ebp + and esi,ecx + movdqa xmm2,xmm6 + xor ecx,edx + add eax,ebx + ror ebx,7 + movdqa xmm0,xmm6 + xor esi,edx + pslldq xmm2,12 + paddd xmm6,xmm6 + mov ebp,eax + add edi,DWORD PTR 40[esp] + psrld xmm0,31 + xor ebx,ecx + rol eax,5 + movdqa xmm1,xmm2 + add edi,esi + and ebp,ebx + xor ebx,ecx + psrld xmm2,30 + add edi,eax + ror eax,7 + por xmm6,xmm0 + xor ebp,ecx + movdqa xmm0,XMMWORD PTR 64[esp] + mov esi,edi + add edx,DWORD PTR 44[esp] + pslld xmm1,2 + xor eax,ebx + rol edi,5 + pxor xmm6,xmm2 + movdqa xmm2,XMMWORD PTR 112[esp] + add edx,ebp + and esi,eax + pxor xmm6,xmm1 + pshufd xmm7,xmm3,238 + xor eax,ebx + add edx,edi + ror edi,7 + xor esi,ebx + mov ebp,edx + punpcklqdq xmm7,xmm4 + movdqa xmm1,xmm6 + add ecx,DWORD PTR 48[esp] + xor edi,eax + paddd xmm2,xmm6 + movdqa XMMWORD PTR 64[esp],xmm3 + rol edx,5 + add ecx,esi + psrldq xmm1,4 + and ebp,edi + xor edi,eax + pxor xmm7,xmm3 + add ecx,edx + ror edx,7 + pxor xmm1,xmm5 + xor ebp,eax + mov esi,ecx + add ebx,DWORD PTR 52[esp] + pxor xmm7,xmm1 + xor edx,edi + rol ecx,5 + movdqa XMMWORD PTR 32[esp],xmm2 + add ebx,ebp + and esi,edx + movdqa xmm3,xmm7 + xor edx,edi + add ebx,ecx + ror ecx,7 + movdqa xmm1,xmm7 + xor esi,edi + pslldq xmm3,12 + paddd xmm7,xmm7 + mov ebp,ebx + add eax,DWORD PTR 56[esp] + psrld xmm1,31 + xor ecx,edx + rol ebx,5 + movdqa xmm2,xmm3 + add eax,esi + and ebp,ecx + xor ecx,edx + psrld xmm3,30 + add eax,ebx + ror ebx,7 + por xmm7,xmm1 + xor ebp,edx + movdqa xmm1,XMMWORD PTR 80[esp] + mov esi,eax + add edi,DWORD PTR 60[esp] + pslld xmm2,2 + xor ebx,ecx + rol eax,5 + pxor xmm7,xmm3 + movdqa xmm3,XMMWORD PTR 112[esp] + add edi,ebp + and esi,ebx + pxor xmm7,xmm2 + pshufd xmm2,xmm6,238 + xor ebx,ecx + add edi,eax + ror eax,7 + pxor xmm0,xmm4 + punpcklqdq xmm2,xmm7 + xor esi,ecx + mov ebp,edi + add edx,DWORD PTR [esp] + pxor xmm0,xmm1 + movdqa XMMWORD PTR 80[esp],xmm4 + xor eax,ebx + rol edi,5 + movdqa xmm4,xmm3 + add edx,esi + paddd xmm3,xmm7 + and ebp,eax + pxor xmm0,xmm2 + xor eax,ebx + add edx,edi + ror edi,7 + xor ebp,ebx + movdqa xmm2,xmm0 + movdqa XMMWORD PTR 48[esp],xmm3 + mov esi,edx + add ecx,DWORD PTR 4[esp] + xor edi,eax + rol edx,5 + pslld xmm0,2 + add ecx,ebp + and esi,edi + psrld xmm2,30 + xor edi,eax + add ecx,edx + ror edx,7 + xor esi,eax + mov ebp,ecx + add ebx,DWORD PTR 8[esp] + xor edx,edi + rol ecx,5 + por xmm0,xmm2 + add ebx,esi + and ebp,edx + movdqa xmm2,XMMWORD PTR 96[esp] + xor edx,edi + add ebx,ecx + add eax,DWORD PTR 12[esp] + xor ebp,edi + mov esi,ebx + pshufd xmm3,xmm7,238 + rol ebx,5 + add eax,ebp + xor esi,edx + ror ecx,7 + add eax,ebx + add edi,DWORD PTR 16[esp] + pxor xmm1,xmm5 + punpcklqdq xmm3,xmm0 + xor esi,ecx + mov ebp,eax + rol eax,5 + pxor xmm1,xmm2 + movdqa XMMWORD PTR 96[esp],xmm5 + add edi,esi + xor ebp,ecx + movdqa xmm5,xmm4 + ror ebx,7 + paddd xmm4,xmm0 + add edi,eax + pxor xmm1,xmm3 + add edx,DWORD PTR 20[esp] + xor ebp,ebx + mov esi,edi + rol edi,5 + movdqa xmm3,xmm1 + movdqa XMMWORD PTR [esp],xmm4 + add edx,ebp + xor esi,ebx + ror eax,7 + add edx,edi + pslld xmm1,2 + add ecx,DWORD PTR 24[esp] + xor esi,eax + psrld xmm3,30 + mov ebp,edx + rol edx,5 + add ecx,esi + xor ebp,eax + ror edi,7 + add ecx,edx + por xmm1,xmm3 + add ebx,DWORD PTR 28[esp] + xor ebp,edi + movdqa xmm3,XMMWORD PTR 64[esp] + mov esi,ecx + rol ecx,5 + add ebx,ebp + xor esi,edi + ror edx,7 + pshufd xmm4,xmm0,238 + add ebx,ecx + add eax,DWORD PTR 32[esp] + pxor xmm2,xmm6 + punpcklqdq xmm4,xmm1 + xor esi,edx + mov ebp,ebx + rol ebx,5 + pxor xmm2,xmm3 + movdqa XMMWORD PTR 64[esp],xmm6 + add eax,esi + xor ebp,edx + movdqa xmm6,XMMWORD PTR 128[esp] + ror ecx,7 + paddd xmm5,xmm1 + add eax,ebx + pxor xmm2,xmm4 + add edi,DWORD PTR 36[esp] + xor ebp,ecx + mov esi,eax + rol eax,5 + movdqa xmm4,xmm2 + movdqa XMMWORD PTR 16[esp],xmm5 + add edi,ebp + xor esi,ecx + ror ebx,7 + add edi,eax + pslld xmm2,2 + add edx,DWORD PTR 40[esp] + xor esi,ebx + psrld xmm4,30 + mov ebp,edi + rol edi,5 + add edx,esi + xor ebp,ebx + ror eax,7 + add edx,edi + por xmm2,xmm4 + add ecx,DWORD PTR 44[esp] + xor ebp,eax + movdqa xmm4,XMMWORD PTR 80[esp] + mov esi,edx + rol edx,5 + add ecx,ebp + xor esi,eax + ror edi,7 + pshufd xmm5,xmm1,238 + add ecx,edx + add ebx,DWORD PTR 48[esp] + pxor xmm3,xmm7 + punpcklqdq xmm5,xmm2 + xor esi,edi + mov ebp,ecx + rol ecx,5 + pxor xmm3,xmm4 + movdqa XMMWORD PTR 80[esp],xmm7 + add ebx,esi + xor ebp,edi + movdqa xmm7,xmm6 + ror edx,7 + paddd xmm6,xmm2 + add ebx,ecx + pxor xmm3,xmm5 + add eax,DWORD PTR 52[esp] + xor ebp,edx + mov esi,ebx + rol ebx,5 + movdqa xmm5,xmm3 + movdqa XMMWORD PTR 32[esp],xmm6 + add eax,ebp + xor esi,edx + ror ecx,7 + add eax,ebx + pslld xmm3,2 + add edi,DWORD PTR 56[esp] + xor esi,ecx + psrld xmm5,30 + mov ebp,eax + rol eax,5 + add edi,esi + xor ebp,ecx + ror ebx,7 + add edi,eax + por xmm3,xmm5 + add edx,DWORD PTR 60[esp] + xor ebp,ebx + movdqa xmm5,XMMWORD PTR 96[esp] + mov esi,edi + rol edi,5 + add edx,ebp + xor esi,ebx + ror eax,7 + pshufd xmm6,xmm2,238 + add edx,edi + add ecx,DWORD PTR [esp] + pxor xmm4,xmm0 + punpcklqdq xmm6,xmm3 + xor esi,eax + mov ebp,edx + rol edx,5 + pxor xmm4,xmm5 + movdqa XMMWORD PTR 96[esp],xmm0 + add ecx,esi + xor ebp,eax + movdqa xmm0,xmm7 + ror edi,7 + paddd xmm7,xmm3 + add ecx,edx + pxor xmm4,xmm6 + add ebx,DWORD PTR 4[esp] + xor ebp,edi + mov esi,ecx + rol ecx,5 + movdqa xmm6,xmm4 + movdqa XMMWORD PTR 48[esp],xmm7 + add ebx,ebp + xor esi,edi + ror edx,7 + add ebx,ecx + pslld xmm4,2 + add eax,DWORD PTR 8[esp] + xor esi,edx + psrld xmm6,30 + mov ebp,ebx + rol ebx,5 + add eax,esi + xor ebp,edx + ror ecx,7 + add eax,ebx + por xmm4,xmm6 + add edi,DWORD PTR 12[esp] + xor ebp,ecx + movdqa xmm6,XMMWORD PTR 64[esp] + mov esi,eax + rol eax,5 + add edi,ebp + xor esi,ecx + ror ebx,7 + pshufd xmm7,xmm3,238 + add edi,eax + add edx,DWORD PTR 16[esp] + pxor xmm5,xmm1 + punpcklqdq xmm7,xmm4 + xor esi,ebx + mov ebp,edi + rol edi,5 + pxor xmm5,xmm6 + movdqa XMMWORD PTR 64[esp],xmm1 + add edx,esi + xor ebp,ebx + movdqa xmm1,xmm0 + ror eax,7 + paddd xmm0,xmm4 + add edx,edi + pxor xmm5,xmm7 + add ecx,DWORD PTR 20[esp] + xor ebp,eax + mov esi,edx + rol edx,5 + movdqa xmm7,xmm5 + movdqa XMMWORD PTR [esp],xmm0 + add ecx,ebp + xor esi,eax + ror edi,7 + add ecx,edx + pslld xmm5,2 + add ebx,DWORD PTR 24[esp] + xor esi,edi + psrld xmm7,30 + mov ebp,ecx + rol ecx,5 + add ebx,esi + xor ebp,edi + ror edx,7 + add ebx,ecx + por xmm5,xmm7 + add eax,DWORD PTR 28[esp] + movdqa xmm7,XMMWORD PTR 80[esp] + ror ecx,7 + mov esi,ebx + xor ebp,edx + rol ebx,5 + pshufd xmm0,xmm4,238 + add eax,ebp + xor esi,ecx + xor ecx,edx + add eax,ebx + add edi,DWORD PTR 32[esp] + pxor xmm6,xmm2 + punpcklqdq xmm0,xmm5 + and esi,ecx + xor ecx,edx + ror ebx,7 + pxor xmm6,xmm7 + movdqa XMMWORD PTR 80[esp],xmm2 + mov ebp,eax + xor esi,ecx + rol eax,5 + movdqa xmm2,xmm1 + add edi,esi + paddd xmm1,xmm5 + xor ebp,ebx + pxor xmm6,xmm0 + xor ebx,ecx + add edi,eax + add edx,DWORD PTR 36[esp] + and ebp,ebx + movdqa xmm0,xmm6 + movdqa XMMWORD PTR 16[esp],xmm1 + xor ebx,ecx + ror eax,7 + mov esi,edi + xor ebp,ebx + rol edi,5 + pslld xmm6,2 + add edx,ebp + xor esi,eax + psrld xmm0,30 + xor eax,ebx + add edx,edi + add ecx,DWORD PTR 40[esp] + and esi,eax + xor eax,ebx + ror edi,7 + por xmm6,xmm0 + mov ebp,edx + xor esi,eax + movdqa xmm0,XMMWORD PTR 96[esp] + rol edx,5 + add ecx,esi + xor ebp,edi + xor edi,eax + add ecx,edx + pshufd xmm1,xmm5,238 + add ebx,DWORD PTR 44[esp] + and ebp,edi + xor edi,eax + ror edx,7 + mov esi,ecx + xor ebp,edi + rol ecx,5 + add ebx,ebp + xor esi,edx + xor edx,edi + add ebx,ecx + add eax,DWORD PTR 48[esp] + pxor xmm7,xmm3 + punpcklqdq xmm1,xmm6 + and esi,edx + xor edx,edi + ror ecx,7 + pxor xmm7,xmm0 + movdqa XMMWORD PTR 96[esp],xmm3 + mov ebp,ebx + xor esi,edx + rol ebx,5 + movdqa xmm3,XMMWORD PTR 144[esp] + add eax,esi + paddd xmm2,xmm6 + xor ebp,ecx + pxor xmm7,xmm1 + xor ecx,edx + add eax,ebx + add edi,DWORD PTR 52[esp] + and ebp,ecx + movdqa xmm1,xmm7 + movdqa XMMWORD PTR 32[esp],xmm2 + xor ecx,edx + ror ebx,7 + mov esi,eax + xor ebp,ecx + rol eax,5 + pslld xmm7,2 + add edi,ebp + xor esi,ebx + psrld xmm1,30 + xor ebx,ecx + add edi,eax + add edx,DWORD PTR 56[esp] + and esi,ebx + xor ebx,ecx + ror eax,7 + por xmm7,xmm1 + mov ebp,edi + xor esi,ebx + movdqa xmm1,XMMWORD PTR 64[esp] + rol edi,5 + add edx,esi + xor ebp,eax + xor eax,ebx + add edx,edi + pshufd xmm2,xmm6,238 + add ecx,DWORD PTR 60[esp] + and ebp,eax + xor eax,ebx + ror edi,7 + mov esi,edx + xor ebp,eax + rol edx,5 + add ecx,ebp + xor esi,edi + xor edi,eax + add ecx,edx + add ebx,DWORD PTR [esp] + pxor xmm0,xmm4 + punpcklqdq xmm2,xmm7 + and esi,edi + xor edi,eax + ror edx,7 + pxor xmm0,xmm1 + movdqa XMMWORD PTR 64[esp],xmm4 + mov ebp,ecx + xor esi,edi + rol ecx,5 + movdqa xmm4,xmm3 + add ebx,esi + paddd xmm3,xmm7 + xor ebp,edx + pxor xmm0,xmm2 + xor edx,edi + add ebx,ecx + add eax,DWORD PTR 4[esp] + and ebp,edx + movdqa xmm2,xmm0 + movdqa XMMWORD PTR 48[esp],xmm3 + xor edx,edi + ror ecx,7 + mov esi,ebx + xor ebp,edx + rol ebx,5 + pslld xmm0,2 + add eax,ebp + xor esi,ecx + psrld xmm2,30 + xor ecx,edx + add eax,ebx + add edi,DWORD PTR 8[esp] + and esi,ecx + xor ecx,edx + ror ebx,7 + por xmm0,xmm2 + mov ebp,eax + xor esi,ecx + movdqa xmm2,XMMWORD PTR 80[esp] + rol eax,5 + add edi,esi + xor ebp,ebx + xor ebx,ecx + add edi,eax + pshufd xmm3,xmm7,238 + add edx,DWORD PTR 12[esp] + and ebp,ebx + xor ebx,ecx + ror eax,7 + mov esi,edi + xor ebp,ebx + rol edi,5 + add edx,ebp + xor esi,eax + xor eax,ebx + add edx,edi + add ecx,DWORD PTR 16[esp] + pxor xmm1,xmm5 + punpcklqdq xmm3,xmm0 + and esi,eax + xor eax,ebx + ror edi,7 + pxor xmm1,xmm2 + movdqa XMMWORD PTR 80[esp],xmm5 + mov ebp,edx + xor esi,eax + rol edx,5 + movdqa xmm5,xmm4 + add ecx,esi + paddd xmm4,xmm0 + xor ebp,edi + pxor xmm1,xmm3 + xor edi,eax + add ecx,edx + add ebx,DWORD PTR 20[esp] + and ebp,edi + movdqa xmm3,xmm1 + movdqa XMMWORD PTR [esp],xmm4 + xor edi,eax + ror edx,7 + mov esi,ecx + xor ebp,edi + rol ecx,5 + pslld xmm1,2 + add ebx,ebp + xor esi,edx + psrld xmm3,30 + xor edx,edi + add ebx,ecx + add eax,DWORD PTR 24[esp] + and esi,edx + xor edx,edi + ror ecx,7 + por xmm1,xmm3 + mov ebp,ebx + xor esi,edx + movdqa xmm3,XMMWORD PTR 96[esp] + rol ebx,5 + add eax,esi + xor ebp,ecx + xor ecx,edx + add eax,ebx + pshufd xmm4,xmm0,238 + add edi,DWORD PTR 28[esp] + and ebp,ecx + xor ecx,edx + ror ebx,7 + mov esi,eax + xor ebp,ecx + rol eax,5 + add edi,ebp + xor esi,ebx + xor ebx,ecx + add edi,eax + add edx,DWORD PTR 32[esp] + pxor xmm2,xmm6 + punpcklqdq xmm4,xmm1 + and esi,ebx + xor ebx,ecx + ror eax,7 + pxor xmm2,xmm3 + movdqa XMMWORD PTR 96[esp],xmm6 + mov ebp,edi + xor esi,ebx + rol edi,5 + movdqa xmm6,xmm5 + add edx,esi + paddd xmm5,xmm1 + xor ebp,eax + pxor xmm2,xmm4 + xor eax,ebx + add edx,edi + add ecx,DWORD PTR 36[esp] + and ebp,eax + movdqa xmm4,xmm2 + movdqa XMMWORD PTR 16[esp],xmm5 + xor eax,ebx + ror edi,7 + mov esi,edx + xor ebp,eax + rol edx,5 + pslld xmm2,2 + add ecx,ebp + xor esi,edi + psrld xmm4,30 + xor edi,eax + add ecx,edx + add ebx,DWORD PTR 40[esp] + and esi,edi + xor edi,eax + ror edx,7 + por xmm2,xmm4 + mov ebp,ecx + xor esi,edi + movdqa xmm4,XMMWORD PTR 64[esp] + rol ecx,5 + add ebx,esi + xor ebp,edx + xor edx,edi + add ebx,ecx + pshufd xmm5,xmm1,238 + add eax,DWORD PTR 44[esp] + and ebp,edx + xor edx,edi + ror ecx,7 + mov esi,ebx + xor ebp,edx + rol ebx,5 + add eax,ebp + xor esi,edx + add eax,ebx + add edi,DWORD PTR 48[esp] + pxor xmm3,xmm7 + punpcklqdq xmm5,xmm2 + xor esi,ecx + mov ebp,eax + rol eax,5 + pxor xmm3,xmm4 + movdqa XMMWORD PTR 64[esp],xmm7 + add edi,esi + xor ebp,ecx + movdqa xmm7,xmm6 + ror ebx,7 + paddd xmm6,xmm2 + add edi,eax + pxor xmm3,xmm5 + add edx,DWORD PTR 52[esp] + xor ebp,ebx + mov esi,edi + rol edi,5 + movdqa xmm5,xmm3 + movdqa XMMWORD PTR 32[esp],xmm6 + add edx,ebp + xor esi,ebx + ror eax,7 + add edx,edi + pslld xmm3,2 + add ecx,DWORD PTR 56[esp] + xor esi,eax + psrld xmm5,30 + mov ebp,edx + rol edx,5 + add ecx,esi + xor ebp,eax + ror edi,7 + add ecx,edx + por xmm3,xmm5 + add ebx,DWORD PTR 60[esp] + xor ebp,edi + mov esi,ecx + rol ecx,5 + add ebx,ebp + xor esi,edi + ror edx,7 + add ebx,ecx + add eax,DWORD PTR [esp] + xor esi,edx + mov ebp,ebx + rol ebx,5 + add eax,esi + xor ebp,edx + ror ecx,7 + paddd xmm7,xmm3 + add eax,ebx + add edi,DWORD PTR 4[esp] + xor ebp,ecx + mov esi,eax + movdqa XMMWORD PTR 48[esp],xmm7 + rol eax,5 + add edi,ebp + xor esi,ecx + ror ebx,7 + add edi,eax + add edx,DWORD PTR 8[esp] + xor esi,ebx + mov ebp,edi + rol edi,5 + add edx,esi + xor ebp,ebx + ror eax,7 + add edx,edi + add ecx,DWORD PTR 12[esp] + xor ebp,eax + mov esi,edx + rol edx,5 + add ecx,ebp + xor esi,eax + ror edi,7 + add ecx,edx + mov ebp,DWORD PTR 196[esp] + cmp ebp,DWORD PTR 200[esp] + je $L007done + movdqa xmm7,XMMWORD PTR 160[esp] + movdqa xmm6,XMMWORD PTR 176[esp] + movdqu xmm0,XMMWORD PTR [ebp] + movdqu xmm1,XMMWORD PTR 16[ebp] + movdqu xmm2,XMMWORD PTR 32[ebp] + movdqu xmm3,XMMWORD PTR 48[ebp] + add ebp,64 +DB 102,15,56,0,198 + mov DWORD PTR 196[esp],ebp + movdqa XMMWORD PTR 96[esp],xmm7 + add ebx,DWORD PTR 16[esp] + xor esi,edi + mov ebp,ecx + rol ecx,5 + add ebx,esi + xor ebp,edi + ror edx,7 +DB 102,15,56,0,206 + add ebx,ecx + add eax,DWORD PTR 20[esp] + xor ebp,edx + mov esi,ebx + paddd xmm0,xmm7 + rol ebx,5 + add eax,ebp + xor esi,edx + ror ecx,7 + movdqa XMMWORD PTR [esp],xmm0 + add eax,ebx + add edi,DWORD PTR 24[esp] + xor esi,ecx + mov ebp,eax + psubd xmm0,xmm7 + rol eax,5 + add edi,esi + xor ebp,ecx + ror ebx,7 + add edi,eax + add edx,DWORD PTR 28[esp] + xor ebp,ebx + mov esi,edi + rol edi,5 + add edx,ebp + xor esi,ebx + ror eax,7 + add edx,edi + add ecx,DWORD PTR 32[esp] + xor esi,eax + mov ebp,edx + rol edx,5 + add ecx,esi + xor ebp,eax + ror edi,7 +DB 102,15,56,0,214 + add ecx,edx + add ebx,DWORD PTR 36[esp] + xor ebp,edi + mov esi,ecx + paddd xmm1,xmm7 + rol ecx,5 + add ebx,ebp + xor esi,edi + ror edx,7 + movdqa XMMWORD PTR 16[esp],xmm1 + add ebx,ecx + add eax,DWORD PTR 40[esp] + xor esi,edx + mov ebp,ebx + psubd xmm1,xmm7 + rol ebx,5 + add eax,esi + xor ebp,edx + ror ecx,7 + add eax,ebx + add edi,DWORD PTR 44[esp] + xor ebp,ecx + mov esi,eax + rol eax,5 + add edi,ebp + xor esi,ecx + ror ebx,7 + add edi,eax + add edx,DWORD PTR 48[esp] + xor esi,ebx + mov ebp,edi + rol edi,5 + add edx,esi + xor ebp,ebx + ror eax,7 +DB 102,15,56,0,222 + add edx,edi + add ecx,DWORD PTR 52[esp] + xor ebp,eax + mov esi,edx + paddd xmm2,xmm7 + rol edx,5 + add ecx,ebp + xor esi,eax + ror edi,7 + movdqa XMMWORD PTR 32[esp],xmm2 + add ecx,edx + add ebx,DWORD PTR 56[esp] + xor esi,edi + mov ebp,ecx + psubd xmm2,xmm7 + rol ecx,5 + add ebx,esi + xor ebp,edi + ror edx,7 + add ebx,ecx + add eax,DWORD PTR 60[esp] + xor ebp,edx + mov esi,ebx + rol ebx,5 + add eax,ebp + ror ecx,7 + add eax,ebx + mov ebp,DWORD PTR 192[esp] + add eax,DWORD PTR [ebp] + add esi,DWORD PTR 4[ebp] + add ecx,DWORD PTR 8[ebp] + mov DWORD PTR [ebp],eax + add edx,DWORD PTR 12[ebp] + mov DWORD PTR 4[ebp],esi + add edi,DWORD PTR 16[ebp] + mov DWORD PTR 8[ebp],ecx + mov ebx,ecx + mov DWORD PTR 12[ebp],edx + xor ebx,edx + mov DWORD PTR 16[ebp],edi + mov ebp,esi + pshufd xmm4,xmm0,238 + and esi,ebx + mov ebx,ebp + jmp $L006loop +ALIGN 16 +$L007done: + add ebx,DWORD PTR 16[esp] + xor esi,edi + mov ebp,ecx + rol ecx,5 + add ebx,esi + xor ebp,edi + ror edx,7 + add ebx,ecx + add eax,DWORD PTR 20[esp] + xor ebp,edx + mov esi,ebx + rol ebx,5 + add eax,ebp + xor esi,edx + ror ecx,7 + add eax,ebx + add edi,DWORD PTR 24[esp] + xor esi,ecx + mov ebp,eax + rol eax,5 + add edi,esi + xor ebp,ecx + ror ebx,7 + add edi,eax + add edx,DWORD PTR 28[esp] + xor ebp,ebx + mov esi,edi + rol edi,5 + add edx,ebp + xor esi,ebx + ror eax,7 + add edx,edi + add ecx,DWORD PTR 32[esp] + xor esi,eax + mov ebp,edx + rol edx,5 + add ecx,esi + xor ebp,eax + ror edi,7 + add ecx,edx + add ebx,DWORD PTR 36[esp] + xor ebp,edi + mov esi,ecx + rol ecx,5 + add ebx,ebp + xor esi,edi + ror edx,7 + add ebx,ecx + add eax,DWORD PTR 40[esp] + xor esi,edx + mov ebp,ebx + rol ebx,5 + add eax,esi + xor ebp,edx + ror ecx,7 + add eax,ebx + add edi,DWORD PTR 44[esp] + xor ebp,ecx + mov esi,eax + rol eax,5 + add edi,ebp + xor esi,ecx + ror ebx,7 + add edi,eax + add edx,DWORD PTR 48[esp] + xor esi,ebx + mov ebp,edi + rol edi,5 + add edx,esi + xor ebp,ebx + ror eax,7 + add edx,edi + add ecx,DWORD PTR 52[esp] + xor ebp,eax + mov esi,edx + rol edx,5 + add ecx,ebp + xor esi,eax + ror edi,7 + add ecx,edx + add ebx,DWORD PTR 56[esp] + xor esi,edi + mov ebp,ecx + rol ecx,5 + add ebx,esi + xor ebp,edi + ror edx,7 + add ebx,ecx + add eax,DWORD PTR 60[esp] + xor ebp,edx + mov esi,ebx + rol ebx,5 + add eax,ebp + ror ecx,7 + add eax,ebx + mov ebp,DWORD PTR 192[esp] + add eax,DWORD PTR [ebp] + mov esp,DWORD PTR 204[esp] + add esi,DWORD PTR 4[ebp] + add ecx,DWORD PTR 8[ebp] + mov DWORD PTR [ebp],eax + add edx,DWORD PTR 12[ebp] + mov DWORD PTR 4[ebp],esi + add edi,DWORD PTR 16[ebp] + mov DWORD PTR 8[ebp],ecx + mov DWORD PTR 12[ebp],edx + mov DWORD PTR 16[ebp],edi + pop edi + pop esi + pop ebx + pop ebp + ret +__sha1_block_data_order_ssse3 ENDP +ALIGN 64 +$LK_XX_XX:: +DD 1518500249,1518500249,1518500249,1518500249 +DD 1859775393,1859775393,1859775393,1859775393 +DD 2400959708,2400959708,2400959708,2400959708 +DD 3395469782,3395469782,3395469782,3395469782 +DD 66051,67438087,134810123,202182159 +DB 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0 +DB 83,72,65,49,32,98,108,111,99,107,32,116,114,97,110,115 +DB 102,111,114,109,32,102,111,114,32,120,56,54,44,32,67,82 +DB 89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112 +DB 114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 +.text$ ENDS +.bss SEGMENT 'BSS' +COMM _OPENSSL_ia32cap_P:DWORD:4 +.bss ENDS +END diff --git a/deps/openssl/asm_obsolete/x86-win32-masm/sha/sha256-586.asm b/deps/openssl/asm_obsolete/x86-win32-masm/sha/sha256-586.asm new file mode 100644 index 00000000000000..b6af4ab0640332 --- /dev/null +++ b/deps/openssl/asm_obsolete/x86-win32-masm/sha/sha256-586.asm @@ -0,0 +1,4612 @@ +TITLE sha512-586.asm +IF @Version LT 800 +ECHO MASM version 8.00 or later is strongly recommended. +ENDIF +.686 +.XMM +IF @Version LT 800 +XMMWORD STRUCT 16 +DQ 2 dup (?) +XMMWORD ENDS +ENDIF + +.MODEL FLAT +OPTION DOTNAME +IF @Version LT 800 +.text$ SEGMENT PAGE 'CODE' +ELSE +.text$ SEGMENT ALIGN(64) 'CODE' +ENDIF +;EXTERN _OPENSSL_ia32cap_P:NEAR +ALIGN 16 +_sha256_block_data_order PROC PUBLIC +$L_sha256_block_data_order_begin:: + push ebp + push ebx + push esi + push edi + mov esi,DWORD PTR 20[esp] + mov edi,DWORD PTR 24[esp] + mov eax,DWORD PTR 28[esp] + mov ebx,esp + call $L000pic_point +$L000pic_point: + pop ebp + lea ebp,DWORD PTR ($L001K256-$L000pic_point)[ebp] + sub esp,16 + and esp,-64 + shl eax,6 + add eax,edi + mov DWORD PTR [esp],esi + mov DWORD PTR 4[esp],edi + mov DWORD PTR 8[esp],eax + mov DWORD PTR 12[esp],ebx + lea edx,DWORD PTR _OPENSSL_ia32cap_P + mov ecx,DWORD PTR [edx] + mov ebx,DWORD PTR 4[edx] + test ecx,1048576 + jnz $L002loop + mov edx,DWORD PTR 8[edx] + test ecx,16777216 + jz $L003no_xmm + and ecx,1073741824 + and ebx,268435968 + test edx,536870912 + jnz $L004shaext + or ecx,ebx + and ecx,1342177280 + cmp ecx,1342177280 + test ebx,512 + jnz $L005SSSE3 +$L003no_xmm: + sub eax,edi + cmp eax,256 + jae $L006unrolled + jmp $L002loop +ALIGN 16 +$L002loop: + mov eax,DWORD PTR [edi] + mov ebx,DWORD PTR 4[edi] + mov ecx,DWORD PTR 8[edi] + bswap eax + mov edx,DWORD PTR 12[edi] + bswap ebx + push eax + bswap ecx + push ebx + bswap edx + push ecx + push edx + mov eax,DWORD PTR 16[edi] + mov ebx,DWORD PTR 20[edi] + mov ecx,DWORD PTR 24[edi] + bswap eax + mov edx,DWORD PTR 28[edi] + bswap ebx + push eax + bswap ecx + push ebx + bswap edx + push ecx + push edx + mov eax,DWORD PTR 32[edi] + mov ebx,DWORD PTR 36[edi] + mov ecx,DWORD PTR 40[edi] + bswap eax + mov edx,DWORD PTR 44[edi] + bswap ebx + push eax + bswap ecx + push ebx + bswap edx + push ecx + push edx + mov eax,DWORD PTR 48[edi] + mov ebx,DWORD PTR 52[edi] + mov ecx,DWORD PTR 56[edi] + bswap eax + mov edx,DWORD PTR 60[edi] + bswap ebx + push eax + bswap ecx + push ebx + bswap edx + push ecx + push edx + add edi,64 + lea esp,DWORD PTR [esp-36] + mov DWORD PTR 104[esp],edi + mov eax,DWORD PTR [esi] + mov ebx,DWORD PTR 4[esi] + mov ecx,DWORD PTR 8[esi] + mov edi,DWORD PTR 12[esi] + mov DWORD PTR 8[esp],ebx + xor ebx,ecx + mov DWORD PTR 12[esp],ecx + mov DWORD PTR 16[esp],edi + mov DWORD PTR [esp],ebx + mov edx,DWORD PTR 16[esi] + mov ebx,DWORD PTR 20[esi] + mov ecx,DWORD PTR 24[esi] + mov edi,DWORD PTR 28[esi] + mov DWORD PTR 24[esp],ebx + mov DWORD PTR 28[esp],ecx + mov DWORD PTR 32[esp],edi +ALIGN 16 +$L00700_15: + mov ecx,edx + mov esi,DWORD PTR 24[esp] + ror ecx,14 + mov edi,DWORD PTR 28[esp] + xor ecx,edx + xor esi,edi + mov ebx,DWORD PTR 96[esp] + ror ecx,5 + and esi,edx + mov DWORD PTR 20[esp],edx + xor edx,ecx + add ebx,DWORD PTR 32[esp] + xor esi,edi + ror edx,6 + mov ecx,eax + add ebx,esi + ror ecx,9 + add ebx,edx + mov edi,DWORD PTR 8[esp] + xor ecx,eax + mov DWORD PTR 4[esp],eax + lea esp,DWORD PTR [esp-4] + ror ecx,11 + mov esi,DWORD PTR [ebp] + xor ecx,eax + mov edx,DWORD PTR 20[esp] + xor eax,edi + ror ecx,2 + add ebx,esi + mov DWORD PTR [esp],eax + add edx,ebx + and eax,DWORD PTR 4[esp] + add ebx,ecx + xor eax,edi + add ebp,4 + add eax,ebx + cmp esi,3248222580 + jne $L00700_15 + mov ecx,DWORD PTR 156[esp] + jmp $L00816_63 +ALIGN 16 +$L00816_63: + mov ebx,ecx + mov esi,DWORD PTR 104[esp] + ror ecx,11 + mov edi,esi + ror esi,2 + xor ecx,ebx + shr ebx,3 + ror ecx,7 + xor esi,edi + xor ebx,ecx + ror esi,17 + add ebx,DWORD PTR 160[esp] + shr edi,10 + add ebx,DWORD PTR 124[esp] + mov ecx,edx + xor edi,esi + mov esi,DWORD PTR 24[esp] + ror ecx,14 + add ebx,edi + mov edi,DWORD PTR 28[esp] + xor ecx,edx + xor esi,edi + mov DWORD PTR 96[esp],ebx + ror ecx,5 + and esi,edx + mov DWORD PTR 20[esp],edx + xor edx,ecx + add ebx,DWORD PTR 32[esp] + xor esi,edi + ror edx,6 + mov ecx,eax + add ebx,esi + ror ecx,9 + add ebx,edx + mov edi,DWORD PTR 8[esp] + xor ecx,eax + mov DWORD PTR 4[esp],eax + lea esp,DWORD PTR [esp-4] + ror ecx,11 + mov esi,DWORD PTR [ebp] + xor ecx,eax + mov edx,DWORD PTR 20[esp] + xor eax,edi + ror ecx,2 + add ebx,esi + mov DWORD PTR [esp],eax + add edx,ebx + and eax,DWORD PTR 4[esp] + add ebx,ecx + xor eax,edi + mov ecx,DWORD PTR 156[esp] + add ebp,4 + add eax,ebx + cmp esi,3329325298 + jne $L00816_63 + mov esi,DWORD PTR 356[esp] + mov ebx,DWORD PTR 8[esp] + mov ecx,DWORD PTR 16[esp] + add eax,DWORD PTR [esi] + add ebx,DWORD PTR 4[esi] + add edi,DWORD PTR 8[esi] + add ecx,DWORD PTR 12[esi] + mov DWORD PTR [esi],eax + mov DWORD PTR 4[esi],ebx + mov DWORD PTR 8[esi],edi + mov DWORD PTR 12[esi],ecx + mov eax,DWORD PTR 24[esp] + mov ebx,DWORD PTR 28[esp] + mov ecx,DWORD PTR 32[esp] + mov edi,DWORD PTR 360[esp] + add edx,DWORD PTR 16[esi] + add eax,DWORD PTR 20[esi] + add ebx,DWORD PTR 24[esi] + add ecx,DWORD PTR 28[esi] + mov DWORD PTR 16[esi],edx + mov DWORD PTR 20[esi],eax + mov DWORD PTR 24[esi],ebx + mov DWORD PTR 28[esi],ecx + lea esp,DWORD PTR 356[esp] + sub ebp,256 + cmp edi,DWORD PTR 8[esp] + jb $L002loop + mov esp,DWORD PTR 12[esp] + pop edi + pop esi + pop ebx + pop ebp + ret +ALIGN 64 +$L001K256: +DD 1116352408,1899447441,3049323471,3921009573 +DD 961987163,1508970993,2453635748,2870763221 +DD 3624381080,310598401,607225278,1426881987 +DD 1925078388,2162078206,2614888103,3248222580 +DD 3835390401,4022224774,264347078,604807628 +DD 770255983,1249150122,1555081692,1996064986 +DD 2554220882,2821834349,2952996808,3210313671 +DD 3336571891,3584528711,113926993,338241895 +DD 666307205,773529912,1294757372,1396182291 +DD 1695183700,1986661051,2177026350,2456956037 +DD 2730485921,2820302411,3259730800,3345764771 +DD 3516065817,3600352804,4094571909,275423344 +DD 430227734,506948616,659060556,883997877 +DD 958139571,1322822218,1537002063,1747873779 +DD 1955562222,2024104815,2227730452,2361852424 +DD 2428436474,2756734187,3204031479,3329325298 +DD 66051,67438087,134810123,202182159 +DB 83,72,65,50,53,54,32,98,108,111,99,107,32,116,114,97 +DB 110,115,102,111,114,109,32,102,111,114,32,120,56,54,44,32 +DB 67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97 +DB 112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103 +DB 62,0 +ALIGN 16 +$L006unrolled: + lea esp,DWORD PTR [esp-96] + mov eax,DWORD PTR [esi] + mov ebp,DWORD PTR 4[esi] + mov ecx,DWORD PTR 8[esi] + mov ebx,DWORD PTR 12[esi] + mov DWORD PTR 4[esp],ebp + xor ebp,ecx + mov DWORD PTR 8[esp],ecx + mov DWORD PTR 12[esp],ebx + mov edx,DWORD PTR 16[esi] + mov ebx,DWORD PTR 20[esi] + mov ecx,DWORD PTR 24[esi] + mov esi,DWORD PTR 28[esi] + mov DWORD PTR 20[esp],ebx + mov DWORD PTR 24[esp],ecx + mov DWORD PTR 28[esp],esi + jmp $L009grand_loop +ALIGN 16 +$L009grand_loop: + mov ebx,DWORD PTR [edi] + mov ecx,DWORD PTR 4[edi] + bswap ebx + mov esi,DWORD PTR 8[edi] + bswap ecx + mov DWORD PTR 32[esp],ebx + bswap esi + mov DWORD PTR 36[esp],ecx + mov DWORD PTR 40[esp],esi + mov ebx,DWORD PTR 12[edi] + mov ecx,DWORD PTR 16[edi] + bswap ebx + mov esi,DWORD PTR 20[edi] + bswap ecx + mov DWORD PTR 44[esp],ebx + bswap esi + mov DWORD PTR 48[esp],ecx + mov DWORD PTR 52[esp],esi + mov ebx,DWORD PTR 24[edi] + mov ecx,DWORD PTR 28[edi] + bswap ebx + mov esi,DWORD PTR 32[edi] + bswap ecx + mov DWORD PTR 56[esp],ebx + bswap esi + mov DWORD PTR 60[esp],ecx + mov DWORD PTR 64[esp],esi + mov ebx,DWORD PTR 36[edi] + mov ecx,DWORD PTR 40[edi] + bswap ebx + mov esi,DWORD PTR 44[edi] + bswap ecx + mov DWORD PTR 68[esp],ebx + bswap esi + mov DWORD PTR 72[esp],ecx + mov DWORD PTR 76[esp],esi + mov ebx,DWORD PTR 48[edi] + mov ecx,DWORD PTR 52[edi] + bswap ebx + mov esi,DWORD PTR 56[edi] + bswap ecx + mov DWORD PTR 80[esp],ebx + bswap esi + mov DWORD PTR 84[esp],ecx + mov DWORD PTR 88[esp],esi + mov ebx,DWORD PTR 60[edi] + add edi,64 + bswap ebx + mov DWORD PTR 100[esp],edi + mov DWORD PTR 92[esp],ebx + mov ecx,edx + mov esi,DWORD PTR 20[esp] + ror edx,14 + mov edi,DWORD PTR 24[esp] + xor edx,ecx + mov ebx,DWORD PTR 32[esp] + xor esi,edi + ror edx,5 + and esi,ecx + mov DWORD PTR 16[esp],ecx + xor edx,ecx + add ebx,DWORD PTR 28[esp] + xor edi,esi + ror edx,6 + mov ecx,eax + add ebx,edi + ror ecx,9 + mov esi,eax + mov edi,DWORD PTR 4[esp] + xor ecx,eax + mov DWORD PTR [esp],eax + xor eax,edi + ror ecx,11 + and ebp,eax + lea edx,DWORD PTR 1116352408[edx*1+ebx] + xor ecx,esi + xor ebp,edi + ror ecx,2 + add ebp,edx + add edx,DWORD PTR 12[esp] + add ebp,ecx + mov esi,edx + mov ecx,DWORD PTR 16[esp] + ror edx,14 + mov edi,DWORD PTR 20[esp] + xor edx,esi + mov ebx,DWORD PTR 36[esp] + xor ecx,edi + ror edx,5 + and ecx,esi + mov DWORD PTR 12[esp],esi + xor edx,esi + add ebx,DWORD PTR 24[esp] + xor edi,ecx + ror edx,6 + mov esi,ebp + add ebx,edi + ror esi,9 + mov ecx,ebp + mov edi,DWORD PTR [esp] + xor esi,ebp + mov DWORD PTR 28[esp],ebp + xor ebp,edi + ror esi,11 + and eax,ebp + lea edx,DWORD PTR 1899447441[edx*1+ebx] + xor esi,ecx + xor eax,edi + ror esi,2 + add eax,edx + add edx,DWORD PTR 8[esp] + add eax,esi + mov ecx,edx + mov esi,DWORD PTR 12[esp] + ror edx,14 + mov edi,DWORD PTR 16[esp] + xor edx,ecx + mov ebx,DWORD PTR 40[esp] + xor esi,edi + ror edx,5 + and esi,ecx + mov DWORD PTR 8[esp],ecx + xor edx,ecx + add ebx,DWORD PTR 20[esp] + xor edi,esi + ror edx,6 + mov ecx,eax + add ebx,edi + ror ecx,9 + mov esi,eax + mov edi,DWORD PTR 28[esp] + xor ecx,eax + mov DWORD PTR 24[esp],eax + xor eax,edi + ror ecx,11 + and ebp,eax + lea edx,DWORD PTR 3049323471[edx*1+ebx] + xor ecx,esi + xor ebp,edi + ror ecx,2 + add ebp,edx + add edx,DWORD PTR 4[esp] + add ebp,ecx + mov esi,edx + mov ecx,DWORD PTR 8[esp] + ror edx,14 + mov edi,DWORD PTR 12[esp] + xor edx,esi + mov ebx,DWORD PTR 44[esp] + xor ecx,edi + ror edx,5 + and ecx,esi + mov DWORD PTR 4[esp],esi + xor edx,esi + add ebx,DWORD PTR 16[esp] + xor edi,ecx + ror edx,6 + mov esi,ebp + add ebx,edi + ror esi,9 + mov ecx,ebp + mov edi,DWORD PTR 24[esp] + xor esi,ebp + mov DWORD PTR 20[esp],ebp + xor ebp,edi + ror esi,11 + and eax,ebp + lea edx,DWORD PTR 3921009573[edx*1+ebx] + xor esi,ecx + xor eax,edi + ror esi,2 + add eax,edx + add edx,DWORD PTR [esp] + add eax,esi + mov ecx,edx + mov esi,DWORD PTR 4[esp] + ror edx,14 + mov edi,DWORD PTR 8[esp] + xor edx,ecx + mov ebx,DWORD PTR 48[esp] + xor esi,edi + ror edx,5 + and esi,ecx + mov DWORD PTR [esp],ecx + xor edx,ecx + add ebx,DWORD PTR 12[esp] + xor edi,esi + ror edx,6 + mov ecx,eax + add ebx,edi + ror ecx,9 + mov esi,eax + mov edi,DWORD PTR 20[esp] + xor ecx,eax + mov DWORD PTR 16[esp],eax + xor eax,edi + ror ecx,11 + and ebp,eax + lea edx,DWORD PTR 961987163[edx*1+ebx] + xor ecx,esi + xor ebp,edi + ror ecx,2 + add ebp,edx + add edx,DWORD PTR 28[esp] + add ebp,ecx + mov esi,edx + mov ecx,DWORD PTR [esp] + ror edx,14 + mov edi,DWORD PTR 4[esp] + xor edx,esi + mov ebx,DWORD PTR 52[esp] + xor ecx,edi + ror edx,5 + and ecx,esi + mov DWORD PTR 28[esp],esi + xor edx,esi + add ebx,DWORD PTR 8[esp] + xor edi,ecx + ror edx,6 + mov esi,ebp + add ebx,edi + ror esi,9 + mov ecx,ebp + mov edi,DWORD PTR 16[esp] + xor esi,ebp + mov DWORD PTR 12[esp],ebp + xor ebp,edi + ror esi,11 + and eax,ebp + lea edx,DWORD PTR 1508970993[edx*1+ebx] + xor esi,ecx + xor eax,edi + ror esi,2 + add eax,edx + add edx,DWORD PTR 24[esp] + add eax,esi + mov ecx,edx + mov esi,DWORD PTR 28[esp] + ror edx,14 + mov edi,DWORD PTR [esp] + xor edx,ecx + mov ebx,DWORD PTR 56[esp] + xor esi,edi + ror edx,5 + and esi,ecx + mov DWORD PTR 24[esp],ecx + xor edx,ecx + add ebx,DWORD PTR 4[esp] + xor edi,esi + ror edx,6 + mov ecx,eax + add ebx,edi + ror ecx,9 + mov esi,eax + mov edi,DWORD PTR 12[esp] + xor ecx,eax + mov DWORD PTR 8[esp],eax + xor eax,edi + ror ecx,11 + and ebp,eax + lea edx,DWORD PTR 2453635748[edx*1+ebx] + xor ecx,esi + xor ebp,edi + ror ecx,2 + add ebp,edx + add edx,DWORD PTR 20[esp] + add ebp,ecx + mov esi,edx + mov ecx,DWORD PTR 24[esp] + ror edx,14 + mov edi,DWORD PTR 28[esp] + xor edx,esi + mov ebx,DWORD PTR 60[esp] + xor ecx,edi + ror edx,5 + and ecx,esi + mov DWORD PTR 20[esp],esi + xor edx,esi + add ebx,DWORD PTR [esp] + xor edi,ecx + ror edx,6 + mov esi,ebp + add ebx,edi + ror esi,9 + mov ecx,ebp + mov edi,DWORD PTR 8[esp] + xor esi,ebp + mov DWORD PTR 4[esp],ebp + xor ebp,edi + ror esi,11 + and eax,ebp + lea edx,DWORD PTR 2870763221[edx*1+ebx] + xor esi,ecx + xor eax,edi + ror esi,2 + add eax,edx + add edx,DWORD PTR 16[esp] + add eax,esi + mov ecx,edx + mov esi,DWORD PTR 20[esp] + ror edx,14 + mov edi,DWORD PTR 24[esp] + xor edx,ecx + mov ebx,DWORD PTR 64[esp] + xor esi,edi + ror edx,5 + and esi,ecx + mov DWORD PTR 16[esp],ecx + xor edx,ecx + add ebx,DWORD PTR 28[esp] + xor edi,esi + ror edx,6 + mov ecx,eax + add ebx,edi + ror ecx,9 + mov esi,eax + mov edi,DWORD PTR 4[esp] + xor ecx,eax + mov DWORD PTR [esp],eax + xor eax,edi + ror ecx,11 + and ebp,eax + lea edx,DWORD PTR 3624381080[edx*1+ebx] + xor ecx,esi + xor ebp,edi + ror ecx,2 + add ebp,edx + add edx,DWORD PTR 12[esp] + add ebp,ecx + mov esi,edx + mov ecx,DWORD PTR 16[esp] + ror edx,14 + mov edi,DWORD PTR 20[esp] + xor edx,esi + mov ebx,DWORD PTR 68[esp] + xor ecx,edi + ror edx,5 + and ecx,esi + mov DWORD PTR 12[esp],esi + xor edx,esi + add ebx,DWORD PTR 24[esp] + xor edi,ecx + ror edx,6 + mov esi,ebp + add ebx,edi + ror esi,9 + mov ecx,ebp + mov edi,DWORD PTR [esp] + xor esi,ebp + mov DWORD PTR 28[esp],ebp + xor ebp,edi + ror esi,11 + and eax,ebp + lea edx,DWORD PTR 310598401[edx*1+ebx] + xor esi,ecx + xor eax,edi + ror esi,2 + add eax,edx + add edx,DWORD PTR 8[esp] + add eax,esi + mov ecx,edx + mov esi,DWORD PTR 12[esp] + ror edx,14 + mov edi,DWORD PTR 16[esp] + xor edx,ecx + mov ebx,DWORD PTR 72[esp] + xor esi,edi + ror edx,5 + and esi,ecx + mov DWORD PTR 8[esp],ecx + xor edx,ecx + add ebx,DWORD PTR 20[esp] + xor edi,esi + ror edx,6 + mov ecx,eax + add ebx,edi + ror ecx,9 + mov esi,eax + mov edi,DWORD PTR 28[esp] + xor ecx,eax + mov DWORD PTR 24[esp],eax + xor eax,edi + ror ecx,11 + and ebp,eax + lea edx,DWORD PTR 607225278[edx*1+ebx] + xor ecx,esi + xor ebp,edi + ror ecx,2 + add ebp,edx + add edx,DWORD PTR 4[esp] + add ebp,ecx + mov esi,edx + mov ecx,DWORD PTR 8[esp] + ror edx,14 + mov edi,DWORD PTR 12[esp] + xor edx,esi + mov ebx,DWORD PTR 76[esp] + xor ecx,edi + ror edx,5 + and ecx,esi + mov DWORD PTR 4[esp],esi + xor edx,esi + add ebx,DWORD PTR 16[esp] + xor edi,ecx + ror edx,6 + mov esi,ebp + add ebx,edi + ror esi,9 + mov ecx,ebp + mov edi,DWORD PTR 24[esp] + xor esi,ebp + mov DWORD PTR 20[esp],ebp + xor ebp,edi + ror esi,11 + and eax,ebp + lea edx,DWORD PTR 1426881987[edx*1+ebx] + xor esi,ecx + xor eax,edi + ror esi,2 + add eax,edx + add edx,DWORD PTR [esp] + add eax,esi + mov ecx,edx + mov esi,DWORD PTR 4[esp] + ror edx,14 + mov edi,DWORD PTR 8[esp] + xor edx,ecx + mov ebx,DWORD PTR 80[esp] + xor esi,edi + ror edx,5 + and esi,ecx + mov DWORD PTR [esp],ecx + xor edx,ecx + add ebx,DWORD PTR 12[esp] + xor edi,esi + ror edx,6 + mov ecx,eax + add ebx,edi + ror ecx,9 + mov esi,eax + mov edi,DWORD PTR 20[esp] + xor ecx,eax + mov DWORD PTR 16[esp],eax + xor eax,edi + ror ecx,11 + and ebp,eax + lea edx,DWORD PTR 1925078388[edx*1+ebx] + xor ecx,esi + xor ebp,edi + ror ecx,2 + add ebp,edx + add edx,DWORD PTR 28[esp] + add ebp,ecx + mov esi,edx + mov ecx,DWORD PTR [esp] + ror edx,14 + mov edi,DWORD PTR 4[esp] + xor edx,esi + mov ebx,DWORD PTR 84[esp] + xor ecx,edi + ror edx,5 + and ecx,esi + mov DWORD PTR 28[esp],esi + xor edx,esi + add ebx,DWORD PTR 8[esp] + xor edi,ecx + ror edx,6 + mov esi,ebp + add ebx,edi + ror esi,9 + mov ecx,ebp + mov edi,DWORD PTR 16[esp] + xor esi,ebp + mov DWORD PTR 12[esp],ebp + xor ebp,edi + ror esi,11 + and eax,ebp + lea edx,DWORD PTR 2162078206[edx*1+ebx] + xor esi,ecx + xor eax,edi + ror esi,2 + add eax,edx + add edx,DWORD PTR 24[esp] + add eax,esi + mov ecx,edx + mov esi,DWORD PTR 28[esp] + ror edx,14 + mov edi,DWORD PTR [esp] + xor edx,ecx + mov ebx,DWORD PTR 88[esp] + xor esi,edi + ror edx,5 + and esi,ecx + mov DWORD PTR 24[esp],ecx + xor edx,ecx + add ebx,DWORD PTR 4[esp] + xor edi,esi + ror edx,6 + mov ecx,eax + add ebx,edi + ror ecx,9 + mov esi,eax + mov edi,DWORD PTR 12[esp] + xor ecx,eax + mov DWORD PTR 8[esp],eax + xor eax,edi + ror ecx,11 + and ebp,eax + lea edx,DWORD PTR 2614888103[edx*1+ebx] + xor ecx,esi + xor ebp,edi + ror ecx,2 + add ebp,edx + add edx,DWORD PTR 20[esp] + add ebp,ecx + mov esi,edx + mov ecx,DWORD PTR 24[esp] + ror edx,14 + mov edi,DWORD PTR 28[esp] + xor edx,esi + mov ebx,DWORD PTR 92[esp] + xor ecx,edi + ror edx,5 + and ecx,esi + mov DWORD PTR 20[esp],esi + xor edx,esi + add ebx,DWORD PTR [esp] + xor edi,ecx + ror edx,6 + mov esi,ebp + add ebx,edi + ror esi,9 + mov ecx,ebp + mov edi,DWORD PTR 8[esp] + xor esi,ebp + mov DWORD PTR 4[esp],ebp + xor ebp,edi + ror esi,11 + and eax,ebp + lea edx,DWORD PTR 3248222580[edx*1+ebx] + xor esi,ecx + xor eax,edi + mov ecx,DWORD PTR 36[esp] + ror esi,2 + add eax,edx + add edx,DWORD PTR 16[esp] + add eax,esi + mov esi,DWORD PTR 88[esp] + mov ebx,ecx + ror ecx,11 + mov edi,esi + ror esi,2 + xor ecx,ebx + shr ebx,3 + ror ecx,7 + xor esi,edi + xor ebx,ecx + ror esi,17 + add ebx,DWORD PTR 32[esp] + shr edi,10 + add ebx,DWORD PTR 68[esp] + mov ecx,edx + xor edi,esi + mov esi,DWORD PTR 20[esp] + ror edx,14 + add ebx,edi + mov edi,DWORD PTR 24[esp] + xor edx,ecx + mov DWORD PTR 32[esp],ebx + xor esi,edi + ror edx,5 + and esi,ecx + mov DWORD PTR 16[esp],ecx + xor edx,ecx + add ebx,DWORD PTR 28[esp] + xor edi,esi + ror edx,6 + mov ecx,eax + add ebx,edi + ror ecx,9 + mov esi,eax + mov edi,DWORD PTR 4[esp] + xor ecx,eax + mov DWORD PTR [esp],eax + xor eax,edi + ror ecx,11 + and ebp,eax + lea edx,DWORD PTR 3835390401[edx*1+ebx] + xor ecx,esi + xor ebp,edi + mov esi,DWORD PTR 40[esp] + ror ecx,2 + add ebp,edx + add edx,DWORD PTR 12[esp] + add ebp,ecx + mov ecx,DWORD PTR 92[esp] + mov ebx,esi + ror esi,11 + mov edi,ecx + ror ecx,2 + xor esi,ebx + shr ebx,3 + ror esi,7 + xor ecx,edi + xor ebx,esi + ror ecx,17 + add ebx,DWORD PTR 36[esp] + shr edi,10 + add ebx,DWORD PTR 72[esp] + mov esi,edx + xor edi,ecx + mov ecx,DWORD PTR 16[esp] + ror edx,14 + add ebx,edi + mov edi,DWORD PTR 20[esp] + xor edx,esi + mov DWORD PTR 36[esp],ebx + xor ecx,edi + ror edx,5 + and ecx,esi + mov DWORD PTR 12[esp],esi + xor edx,esi + add ebx,DWORD PTR 24[esp] + xor edi,ecx + ror edx,6 + mov esi,ebp + add ebx,edi + ror esi,9 + mov ecx,ebp + mov edi,DWORD PTR [esp] + xor esi,ebp + mov DWORD PTR 28[esp],ebp + xor ebp,edi + ror esi,11 + and eax,ebp + lea edx,DWORD PTR 4022224774[edx*1+ebx] + xor esi,ecx + xor eax,edi + mov ecx,DWORD PTR 44[esp] + ror esi,2 + add eax,edx + add edx,DWORD PTR 8[esp] + add eax,esi + mov esi,DWORD PTR 32[esp] + mov ebx,ecx + ror ecx,11 + mov edi,esi + ror esi,2 + xor ecx,ebx + shr ebx,3 + ror ecx,7 + xor esi,edi + xor ebx,ecx + ror esi,17 + add ebx,DWORD PTR 40[esp] + shr edi,10 + add ebx,DWORD PTR 76[esp] + mov ecx,edx + xor edi,esi + mov esi,DWORD PTR 12[esp] + ror edx,14 + add ebx,edi + mov edi,DWORD PTR 16[esp] + xor edx,ecx + mov DWORD PTR 40[esp],ebx + xor esi,edi + ror edx,5 + and esi,ecx + mov DWORD PTR 8[esp],ecx + xor edx,ecx + add ebx,DWORD PTR 20[esp] + xor edi,esi + ror edx,6 + mov ecx,eax + add ebx,edi + ror ecx,9 + mov esi,eax + mov edi,DWORD PTR 28[esp] + xor ecx,eax + mov DWORD PTR 24[esp],eax + xor eax,edi + ror ecx,11 + and ebp,eax + lea edx,DWORD PTR 264347078[edx*1+ebx] + xor ecx,esi + xor ebp,edi + mov esi,DWORD PTR 48[esp] + ror ecx,2 + add ebp,edx + add edx,DWORD PTR 4[esp] + add ebp,ecx + mov ecx,DWORD PTR 36[esp] + mov ebx,esi + ror esi,11 + mov edi,ecx + ror ecx,2 + xor esi,ebx + shr ebx,3 + ror esi,7 + xor ecx,edi + xor ebx,esi + ror ecx,17 + add ebx,DWORD PTR 44[esp] + shr edi,10 + add ebx,DWORD PTR 80[esp] + mov esi,edx + xor edi,ecx + mov ecx,DWORD PTR 8[esp] + ror edx,14 + add ebx,edi + mov edi,DWORD PTR 12[esp] + xor edx,esi + mov DWORD PTR 44[esp],ebx + xor ecx,edi + ror edx,5 + and ecx,esi + mov DWORD PTR 4[esp],esi + xor edx,esi + add ebx,DWORD PTR 16[esp] + xor edi,ecx + ror edx,6 + mov esi,ebp + add ebx,edi + ror esi,9 + mov ecx,ebp + mov edi,DWORD PTR 24[esp] + xor esi,ebp + mov DWORD PTR 20[esp],ebp + xor ebp,edi + ror esi,11 + and eax,ebp + lea edx,DWORD PTR 604807628[edx*1+ebx] + xor esi,ecx + xor eax,edi + mov ecx,DWORD PTR 52[esp] + ror esi,2 + add eax,edx + add edx,DWORD PTR [esp] + add eax,esi + mov esi,DWORD PTR 40[esp] + mov ebx,ecx + ror ecx,11 + mov edi,esi + ror esi,2 + xor ecx,ebx + shr ebx,3 + ror ecx,7 + xor esi,edi + xor ebx,ecx + ror esi,17 + add ebx,DWORD PTR 48[esp] + shr edi,10 + add ebx,DWORD PTR 84[esp] + mov ecx,edx + xor edi,esi + mov esi,DWORD PTR 4[esp] + ror edx,14 + add ebx,edi + mov edi,DWORD PTR 8[esp] + xor edx,ecx + mov DWORD PTR 48[esp],ebx + xor esi,edi + ror edx,5 + and esi,ecx + mov DWORD PTR [esp],ecx + xor edx,ecx + add ebx,DWORD PTR 12[esp] + xor edi,esi + ror edx,6 + mov ecx,eax + add ebx,edi + ror ecx,9 + mov esi,eax + mov edi,DWORD PTR 20[esp] + xor ecx,eax + mov DWORD PTR 16[esp],eax + xor eax,edi + ror ecx,11 + and ebp,eax + lea edx,DWORD PTR 770255983[edx*1+ebx] + xor ecx,esi + xor ebp,edi + mov esi,DWORD PTR 56[esp] + ror ecx,2 + add ebp,edx + add edx,DWORD PTR 28[esp] + add ebp,ecx + mov ecx,DWORD PTR 44[esp] + mov ebx,esi + ror esi,11 + mov edi,ecx + ror ecx,2 + xor esi,ebx + shr ebx,3 + ror esi,7 + xor ecx,edi + xor ebx,esi + ror ecx,17 + add ebx,DWORD PTR 52[esp] + shr edi,10 + add ebx,DWORD PTR 88[esp] + mov esi,edx + xor edi,ecx + mov ecx,DWORD PTR [esp] + ror edx,14 + add ebx,edi + mov edi,DWORD PTR 4[esp] + xor edx,esi + mov DWORD PTR 52[esp],ebx + xor ecx,edi + ror edx,5 + and ecx,esi + mov DWORD PTR 28[esp],esi + xor edx,esi + add ebx,DWORD PTR 8[esp] + xor edi,ecx + ror edx,6 + mov esi,ebp + add ebx,edi + ror esi,9 + mov ecx,ebp + mov edi,DWORD PTR 16[esp] + xor esi,ebp + mov DWORD PTR 12[esp],ebp + xor ebp,edi + ror esi,11 + and eax,ebp + lea edx,DWORD PTR 1249150122[edx*1+ebx] + xor esi,ecx + xor eax,edi + mov ecx,DWORD PTR 60[esp] + ror esi,2 + add eax,edx + add edx,DWORD PTR 24[esp] + add eax,esi + mov esi,DWORD PTR 48[esp] + mov ebx,ecx + ror ecx,11 + mov edi,esi + ror esi,2 + xor ecx,ebx + shr ebx,3 + ror ecx,7 + xor esi,edi + xor ebx,ecx + ror esi,17 + add ebx,DWORD PTR 56[esp] + shr edi,10 + add ebx,DWORD PTR 92[esp] + mov ecx,edx + xor edi,esi + mov esi,DWORD PTR 28[esp] + ror edx,14 + add ebx,edi + mov edi,DWORD PTR [esp] + xor edx,ecx + mov DWORD PTR 56[esp],ebx + xor esi,edi + ror edx,5 + and esi,ecx + mov DWORD PTR 24[esp],ecx + xor edx,ecx + add ebx,DWORD PTR 4[esp] + xor edi,esi + ror edx,6 + mov ecx,eax + add ebx,edi + ror ecx,9 + mov esi,eax + mov edi,DWORD PTR 12[esp] + xor ecx,eax + mov DWORD PTR 8[esp],eax + xor eax,edi + ror ecx,11 + and ebp,eax + lea edx,DWORD PTR 1555081692[edx*1+ebx] + xor ecx,esi + xor ebp,edi + mov esi,DWORD PTR 64[esp] + ror ecx,2 + add ebp,edx + add edx,DWORD PTR 20[esp] + add ebp,ecx + mov ecx,DWORD PTR 52[esp] + mov ebx,esi + ror esi,11 + mov edi,ecx + ror ecx,2 + xor esi,ebx + shr ebx,3 + ror esi,7 + xor ecx,edi + xor ebx,esi + ror ecx,17 + add ebx,DWORD PTR 60[esp] + shr edi,10 + add ebx,DWORD PTR 32[esp] + mov esi,edx + xor edi,ecx + mov ecx,DWORD PTR 24[esp] + ror edx,14 + add ebx,edi + mov edi,DWORD PTR 28[esp] + xor edx,esi + mov DWORD PTR 60[esp],ebx + xor ecx,edi + ror edx,5 + and ecx,esi + mov DWORD PTR 20[esp],esi + xor edx,esi + add ebx,DWORD PTR [esp] + xor edi,ecx + ror edx,6 + mov esi,ebp + add ebx,edi + ror esi,9 + mov ecx,ebp + mov edi,DWORD PTR 8[esp] + xor esi,ebp + mov DWORD PTR 4[esp],ebp + xor ebp,edi + ror esi,11 + and eax,ebp + lea edx,DWORD PTR 1996064986[edx*1+ebx] + xor esi,ecx + xor eax,edi + mov ecx,DWORD PTR 68[esp] + ror esi,2 + add eax,edx + add edx,DWORD PTR 16[esp] + add eax,esi + mov esi,DWORD PTR 56[esp] + mov ebx,ecx + ror ecx,11 + mov edi,esi + ror esi,2 + xor ecx,ebx + shr ebx,3 + ror ecx,7 + xor esi,edi + xor ebx,ecx + ror esi,17 + add ebx,DWORD PTR 64[esp] + shr edi,10 + add ebx,DWORD PTR 36[esp] + mov ecx,edx + xor edi,esi + mov esi,DWORD PTR 20[esp] + ror edx,14 + add ebx,edi + mov edi,DWORD PTR 24[esp] + xor edx,ecx + mov DWORD PTR 64[esp],ebx + xor esi,edi + ror edx,5 + and esi,ecx + mov DWORD PTR 16[esp],ecx + xor edx,ecx + add ebx,DWORD PTR 28[esp] + xor edi,esi + ror edx,6 + mov ecx,eax + add ebx,edi + ror ecx,9 + mov esi,eax + mov edi,DWORD PTR 4[esp] + xor ecx,eax + mov DWORD PTR [esp],eax + xor eax,edi + ror ecx,11 + and ebp,eax + lea edx,DWORD PTR 2554220882[edx*1+ebx] + xor ecx,esi + xor ebp,edi + mov esi,DWORD PTR 72[esp] + ror ecx,2 + add ebp,edx + add edx,DWORD PTR 12[esp] + add ebp,ecx + mov ecx,DWORD PTR 60[esp] + mov ebx,esi + ror esi,11 + mov edi,ecx + ror ecx,2 + xor esi,ebx + shr ebx,3 + ror esi,7 + xor ecx,edi + xor ebx,esi + ror ecx,17 + add ebx,DWORD PTR 68[esp] + shr edi,10 + add ebx,DWORD PTR 40[esp] + mov esi,edx + xor edi,ecx + mov ecx,DWORD PTR 16[esp] + ror edx,14 + add ebx,edi + mov edi,DWORD PTR 20[esp] + xor edx,esi + mov DWORD PTR 68[esp],ebx + xor ecx,edi + ror edx,5 + and ecx,esi + mov DWORD PTR 12[esp],esi + xor edx,esi + add ebx,DWORD PTR 24[esp] + xor edi,ecx + ror edx,6 + mov esi,ebp + add ebx,edi + ror esi,9 + mov ecx,ebp + mov edi,DWORD PTR [esp] + xor esi,ebp + mov DWORD PTR 28[esp],ebp + xor ebp,edi + ror esi,11 + and eax,ebp + lea edx,DWORD PTR 2821834349[edx*1+ebx] + xor esi,ecx + xor eax,edi + mov ecx,DWORD PTR 76[esp] + ror esi,2 + add eax,edx + add edx,DWORD PTR 8[esp] + add eax,esi + mov esi,DWORD PTR 64[esp] + mov ebx,ecx + ror ecx,11 + mov edi,esi + ror esi,2 + xor ecx,ebx + shr ebx,3 + ror ecx,7 + xor esi,edi + xor ebx,ecx + ror esi,17 + add ebx,DWORD PTR 72[esp] + shr edi,10 + add ebx,DWORD PTR 44[esp] + mov ecx,edx + xor edi,esi + mov esi,DWORD PTR 12[esp] + ror edx,14 + add ebx,edi + mov edi,DWORD PTR 16[esp] + xor edx,ecx + mov DWORD PTR 72[esp],ebx + xor esi,edi + ror edx,5 + and esi,ecx + mov DWORD PTR 8[esp],ecx + xor edx,ecx + add ebx,DWORD PTR 20[esp] + xor edi,esi + ror edx,6 + mov ecx,eax + add ebx,edi + ror ecx,9 + mov esi,eax + mov edi,DWORD PTR 28[esp] + xor ecx,eax + mov DWORD PTR 24[esp],eax + xor eax,edi + ror ecx,11 + and ebp,eax + lea edx,DWORD PTR 2952996808[edx*1+ebx] + xor ecx,esi + xor ebp,edi + mov esi,DWORD PTR 80[esp] + ror ecx,2 + add ebp,edx + add edx,DWORD PTR 4[esp] + add ebp,ecx + mov ecx,DWORD PTR 68[esp] + mov ebx,esi + ror esi,11 + mov edi,ecx + ror ecx,2 + xor esi,ebx + shr ebx,3 + ror esi,7 + xor ecx,edi + xor ebx,esi + ror ecx,17 + add ebx,DWORD PTR 76[esp] + shr edi,10 + add ebx,DWORD PTR 48[esp] + mov esi,edx + xor edi,ecx + mov ecx,DWORD PTR 8[esp] + ror edx,14 + add ebx,edi + mov edi,DWORD PTR 12[esp] + xor edx,esi + mov DWORD PTR 76[esp],ebx + xor ecx,edi + ror edx,5 + and ecx,esi + mov DWORD PTR 4[esp],esi + xor edx,esi + add ebx,DWORD PTR 16[esp] + xor edi,ecx + ror edx,6 + mov esi,ebp + add ebx,edi + ror esi,9 + mov ecx,ebp + mov edi,DWORD PTR 24[esp] + xor esi,ebp + mov DWORD PTR 20[esp],ebp + xor ebp,edi + ror esi,11 + and eax,ebp + lea edx,DWORD PTR 3210313671[edx*1+ebx] + xor esi,ecx + xor eax,edi + mov ecx,DWORD PTR 84[esp] + ror esi,2 + add eax,edx + add edx,DWORD PTR [esp] + add eax,esi + mov esi,DWORD PTR 72[esp] + mov ebx,ecx + ror ecx,11 + mov edi,esi + ror esi,2 + xor ecx,ebx + shr ebx,3 + ror ecx,7 + xor esi,edi + xor ebx,ecx + ror esi,17 + add ebx,DWORD PTR 80[esp] + shr edi,10 + add ebx,DWORD PTR 52[esp] + mov ecx,edx + xor edi,esi + mov esi,DWORD PTR 4[esp] + ror edx,14 + add ebx,edi + mov edi,DWORD PTR 8[esp] + xor edx,ecx + mov DWORD PTR 80[esp],ebx + xor esi,edi + ror edx,5 + and esi,ecx + mov DWORD PTR [esp],ecx + xor edx,ecx + add ebx,DWORD PTR 12[esp] + xor edi,esi + ror edx,6 + mov ecx,eax + add ebx,edi + ror ecx,9 + mov esi,eax + mov edi,DWORD PTR 20[esp] + xor ecx,eax + mov DWORD PTR 16[esp],eax + xor eax,edi + ror ecx,11 + and ebp,eax + lea edx,DWORD PTR 3336571891[edx*1+ebx] + xor ecx,esi + xor ebp,edi + mov esi,DWORD PTR 88[esp] + ror ecx,2 + add ebp,edx + add edx,DWORD PTR 28[esp] + add ebp,ecx + mov ecx,DWORD PTR 76[esp] + mov ebx,esi + ror esi,11 + mov edi,ecx + ror ecx,2 + xor esi,ebx + shr ebx,3 + ror esi,7 + xor ecx,edi + xor ebx,esi + ror ecx,17 + add ebx,DWORD PTR 84[esp] + shr edi,10 + add ebx,DWORD PTR 56[esp] + mov esi,edx + xor edi,ecx + mov ecx,DWORD PTR [esp] + ror edx,14 + add ebx,edi + mov edi,DWORD PTR 4[esp] + xor edx,esi + mov DWORD PTR 84[esp],ebx + xor ecx,edi + ror edx,5 + and ecx,esi + mov DWORD PTR 28[esp],esi + xor edx,esi + add ebx,DWORD PTR 8[esp] + xor edi,ecx + ror edx,6 + mov esi,ebp + add ebx,edi + ror esi,9 + mov ecx,ebp + mov edi,DWORD PTR 16[esp] + xor esi,ebp + mov DWORD PTR 12[esp],ebp + xor ebp,edi + ror esi,11 + and eax,ebp + lea edx,DWORD PTR 3584528711[edx*1+ebx] + xor esi,ecx + xor eax,edi + mov ecx,DWORD PTR 92[esp] + ror esi,2 + add eax,edx + add edx,DWORD PTR 24[esp] + add eax,esi + mov esi,DWORD PTR 80[esp] + mov ebx,ecx + ror ecx,11 + mov edi,esi + ror esi,2 + xor ecx,ebx + shr ebx,3 + ror ecx,7 + xor esi,edi + xor ebx,ecx + ror esi,17 + add ebx,DWORD PTR 88[esp] + shr edi,10 + add ebx,DWORD PTR 60[esp] + mov ecx,edx + xor edi,esi + mov esi,DWORD PTR 28[esp] + ror edx,14 + add ebx,edi + mov edi,DWORD PTR [esp] + xor edx,ecx + mov DWORD PTR 88[esp],ebx + xor esi,edi + ror edx,5 + and esi,ecx + mov DWORD PTR 24[esp],ecx + xor edx,ecx + add ebx,DWORD PTR 4[esp] + xor edi,esi + ror edx,6 + mov ecx,eax + add ebx,edi + ror ecx,9 + mov esi,eax + mov edi,DWORD PTR 12[esp] + xor ecx,eax + mov DWORD PTR 8[esp],eax + xor eax,edi + ror ecx,11 + and ebp,eax + lea edx,DWORD PTR 113926993[edx*1+ebx] + xor ecx,esi + xor ebp,edi + mov esi,DWORD PTR 32[esp] + ror ecx,2 + add ebp,edx + add edx,DWORD PTR 20[esp] + add ebp,ecx + mov ecx,DWORD PTR 84[esp] + mov ebx,esi + ror esi,11 + mov edi,ecx + ror ecx,2 + xor esi,ebx + shr ebx,3 + ror esi,7 + xor ecx,edi + xor ebx,esi + ror ecx,17 + add ebx,DWORD PTR 92[esp] + shr edi,10 + add ebx,DWORD PTR 64[esp] + mov esi,edx + xor edi,ecx + mov ecx,DWORD PTR 24[esp] + ror edx,14 + add ebx,edi + mov edi,DWORD PTR 28[esp] + xor edx,esi + mov DWORD PTR 92[esp],ebx + xor ecx,edi + ror edx,5 + and ecx,esi + mov DWORD PTR 20[esp],esi + xor edx,esi + add ebx,DWORD PTR [esp] + xor edi,ecx + ror edx,6 + mov esi,ebp + add ebx,edi + ror esi,9 + mov ecx,ebp + mov edi,DWORD PTR 8[esp] + xor esi,ebp + mov DWORD PTR 4[esp],ebp + xor ebp,edi + ror esi,11 + and eax,ebp + lea edx,DWORD PTR 338241895[edx*1+ebx] + xor esi,ecx + xor eax,edi + mov ecx,DWORD PTR 36[esp] + ror esi,2 + add eax,edx + add edx,DWORD PTR 16[esp] + add eax,esi + mov esi,DWORD PTR 88[esp] + mov ebx,ecx + ror ecx,11 + mov edi,esi + ror esi,2 + xor ecx,ebx + shr ebx,3 + ror ecx,7 + xor esi,edi + xor ebx,ecx + ror esi,17 + add ebx,DWORD PTR 32[esp] + shr edi,10 + add ebx,DWORD PTR 68[esp] + mov ecx,edx + xor edi,esi + mov esi,DWORD PTR 20[esp] + ror edx,14 + add ebx,edi + mov edi,DWORD PTR 24[esp] + xor edx,ecx + mov DWORD PTR 32[esp],ebx + xor esi,edi + ror edx,5 + and esi,ecx + mov DWORD PTR 16[esp],ecx + xor edx,ecx + add ebx,DWORD PTR 28[esp] + xor edi,esi + ror edx,6 + mov ecx,eax + add ebx,edi + ror ecx,9 + mov esi,eax + mov edi,DWORD PTR 4[esp] + xor ecx,eax + mov DWORD PTR [esp],eax + xor eax,edi + ror ecx,11 + and ebp,eax + lea edx,DWORD PTR 666307205[edx*1+ebx] + xor ecx,esi + xor ebp,edi + mov esi,DWORD PTR 40[esp] + ror ecx,2 + add ebp,edx + add edx,DWORD PTR 12[esp] + add ebp,ecx + mov ecx,DWORD PTR 92[esp] + mov ebx,esi + ror esi,11 + mov edi,ecx + ror ecx,2 + xor esi,ebx + shr ebx,3 + ror esi,7 + xor ecx,edi + xor ebx,esi + ror ecx,17 + add ebx,DWORD PTR 36[esp] + shr edi,10 + add ebx,DWORD PTR 72[esp] + mov esi,edx + xor edi,ecx + mov ecx,DWORD PTR 16[esp] + ror edx,14 + add ebx,edi + mov edi,DWORD PTR 20[esp] + xor edx,esi + mov DWORD PTR 36[esp],ebx + xor ecx,edi + ror edx,5 + and ecx,esi + mov DWORD PTR 12[esp],esi + xor edx,esi + add ebx,DWORD PTR 24[esp] + xor edi,ecx + ror edx,6 + mov esi,ebp + add ebx,edi + ror esi,9 + mov ecx,ebp + mov edi,DWORD PTR [esp] + xor esi,ebp + mov DWORD PTR 28[esp],ebp + xor ebp,edi + ror esi,11 + and eax,ebp + lea edx,DWORD PTR 773529912[edx*1+ebx] + xor esi,ecx + xor eax,edi + mov ecx,DWORD PTR 44[esp] + ror esi,2 + add eax,edx + add edx,DWORD PTR 8[esp] + add eax,esi + mov esi,DWORD PTR 32[esp] + mov ebx,ecx + ror ecx,11 + mov edi,esi + ror esi,2 + xor ecx,ebx + shr ebx,3 + ror ecx,7 + xor esi,edi + xor ebx,ecx + ror esi,17 + add ebx,DWORD PTR 40[esp] + shr edi,10 + add ebx,DWORD PTR 76[esp] + mov ecx,edx + xor edi,esi + mov esi,DWORD PTR 12[esp] + ror edx,14 + add ebx,edi + mov edi,DWORD PTR 16[esp] + xor edx,ecx + mov DWORD PTR 40[esp],ebx + xor esi,edi + ror edx,5 + and esi,ecx + mov DWORD PTR 8[esp],ecx + xor edx,ecx + add ebx,DWORD PTR 20[esp] + xor edi,esi + ror edx,6 + mov ecx,eax + add ebx,edi + ror ecx,9 + mov esi,eax + mov edi,DWORD PTR 28[esp] + xor ecx,eax + mov DWORD PTR 24[esp],eax + xor eax,edi + ror ecx,11 + and ebp,eax + lea edx,DWORD PTR 1294757372[edx*1+ebx] + xor ecx,esi + xor ebp,edi + mov esi,DWORD PTR 48[esp] + ror ecx,2 + add ebp,edx + add edx,DWORD PTR 4[esp] + add ebp,ecx + mov ecx,DWORD PTR 36[esp] + mov ebx,esi + ror esi,11 + mov edi,ecx + ror ecx,2 + xor esi,ebx + shr ebx,3 + ror esi,7 + xor ecx,edi + xor ebx,esi + ror ecx,17 + add ebx,DWORD PTR 44[esp] + shr edi,10 + add ebx,DWORD PTR 80[esp] + mov esi,edx + xor edi,ecx + mov ecx,DWORD PTR 8[esp] + ror edx,14 + add ebx,edi + mov edi,DWORD PTR 12[esp] + xor edx,esi + mov DWORD PTR 44[esp],ebx + xor ecx,edi + ror edx,5 + and ecx,esi + mov DWORD PTR 4[esp],esi + xor edx,esi + add ebx,DWORD PTR 16[esp] + xor edi,ecx + ror edx,6 + mov esi,ebp + add ebx,edi + ror esi,9 + mov ecx,ebp + mov edi,DWORD PTR 24[esp] + xor esi,ebp + mov DWORD PTR 20[esp],ebp + xor ebp,edi + ror esi,11 + and eax,ebp + lea edx,DWORD PTR 1396182291[edx*1+ebx] + xor esi,ecx + xor eax,edi + mov ecx,DWORD PTR 52[esp] + ror esi,2 + add eax,edx + add edx,DWORD PTR [esp] + add eax,esi + mov esi,DWORD PTR 40[esp] + mov ebx,ecx + ror ecx,11 + mov edi,esi + ror esi,2 + xor ecx,ebx + shr ebx,3 + ror ecx,7 + xor esi,edi + xor ebx,ecx + ror esi,17 + add ebx,DWORD PTR 48[esp] + shr edi,10 + add ebx,DWORD PTR 84[esp] + mov ecx,edx + xor edi,esi + mov esi,DWORD PTR 4[esp] + ror edx,14 + add ebx,edi + mov edi,DWORD PTR 8[esp] + xor edx,ecx + mov DWORD PTR 48[esp],ebx + xor esi,edi + ror edx,5 + and esi,ecx + mov DWORD PTR [esp],ecx + xor edx,ecx + add ebx,DWORD PTR 12[esp] + xor edi,esi + ror edx,6 + mov ecx,eax + add ebx,edi + ror ecx,9 + mov esi,eax + mov edi,DWORD PTR 20[esp] + xor ecx,eax + mov DWORD PTR 16[esp],eax + xor eax,edi + ror ecx,11 + and ebp,eax + lea edx,DWORD PTR 1695183700[edx*1+ebx] + xor ecx,esi + xor ebp,edi + mov esi,DWORD PTR 56[esp] + ror ecx,2 + add ebp,edx + add edx,DWORD PTR 28[esp] + add ebp,ecx + mov ecx,DWORD PTR 44[esp] + mov ebx,esi + ror esi,11 + mov edi,ecx + ror ecx,2 + xor esi,ebx + shr ebx,3 + ror esi,7 + xor ecx,edi + xor ebx,esi + ror ecx,17 + add ebx,DWORD PTR 52[esp] + shr edi,10 + add ebx,DWORD PTR 88[esp] + mov esi,edx + xor edi,ecx + mov ecx,DWORD PTR [esp] + ror edx,14 + add ebx,edi + mov edi,DWORD PTR 4[esp] + xor edx,esi + mov DWORD PTR 52[esp],ebx + xor ecx,edi + ror edx,5 + and ecx,esi + mov DWORD PTR 28[esp],esi + xor edx,esi + add ebx,DWORD PTR 8[esp] + xor edi,ecx + ror edx,6 + mov esi,ebp + add ebx,edi + ror esi,9 + mov ecx,ebp + mov edi,DWORD PTR 16[esp] + xor esi,ebp + mov DWORD PTR 12[esp],ebp + xor ebp,edi + ror esi,11 + and eax,ebp + lea edx,DWORD PTR 1986661051[edx*1+ebx] + xor esi,ecx + xor eax,edi + mov ecx,DWORD PTR 60[esp] + ror esi,2 + add eax,edx + add edx,DWORD PTR 24[esp] + add eax,esi + mov esi,DWORD PTR 48[esp] + mov ebx,ecx + ror ecx,11 + mov edi,esi + ror esi,2 + xor ecx,ebx + shr ebx,3 + ror ecx,7 + xor esi,edi + xor ebx,ecx + ror esi,17 + add ebx,DWORD PTR 56[esp] + shr edi,10 + add ebx,DWORD PTR 92[esp] + mov ecx,edx + xor edi,esi + mov esi,DWORD PTR 28[esp] + ror edx,14 + add ebx,edi + mov edi,DWORD PTR [esp] + xor edx,ecx + mov DWORD PTR 56[esp],ebx + xor esi,edi + ror edx,5 + and esi,ecx + mov DWORD PTR 24[esp],ecx + xor edx,ecx + add ebx,DWORD PTR 4[esp] + xor edi,esi + ror edx,6 + mov ecx,eax + add ebx,edi + ror ecx,9 + mov esi,eax + mov edi,DWORD PTR 12[esp] + xor ecx,eax + mov DWORD PTR 8[esp],eax + xor eax,edi + ror ecx,11 + and ebp,eax + lea edx,DWORD PTR 2177026350[edx*1+ebx] + xor ecx,esi + xor ebp,edi + mov esi,DWORD PTR 64[esp] + ror ecx,2 + add ebp,edx + add edx,DWORD PTR 20[esp] + add ebp,ecx + mov ecx,DWORD PTR 52[esp] + mov ebx,esi + ror esi,11 + mov edi,ecx + ror ecx,2 + xor esi,ebx + shr ebx,3 + ror esi,7 + xor ecx,edi + xor ebx,esi + ror ecx,17 + add ebx,DWORD PTR 60[esp] + shr edi,10 + add ebx,DWORD PTR 32[esp] + mov esi,edx + xor edi,ecx + mov ecx,DWORD PTR 24[esp] + ror edx,14 + add ebx,edi + mov edi,DWORD PTR 28[esp] + xor edx,esi + mov DWORD PTR 60[esp],ebx + xor ecx,edi + ror edx,5 + and ecx,esi + mov DWORD PTR 20[esp],esi + xor edx,esi + add ebx,DWORD PTR [esp] + xor edi,ecx + ror edx,6 + mov esi,ebp + add ebx,edi + ror esi,9 + mov ecx,ebp + mov edi,DWORD PTR 8[esp] + xor esi,ebp + mov DWORD PTR 4[esp],ebp + xor ebp,edi + ror esi,11 + and eax,ebp + lea edx,DWORD PTR 2456956037[edx*1+ebx] + xor esi,ecx + xor eax,edi + mov ecx,DWORD PTR 68[esp] + ror esi,2 + add eax,edx + add edx,DWORD PTR 16[esp] + add eax,esi + mov esi,DWORD PTR 56[esp] + mov ebx,ecx + ror ecx,11 + mov edi,esi + ror esi,2 + xor ecx,ebx + shr ebx,3 + ror ecx,7 + xor esi,edi + xor ebx,ecx + ror esi,17 + add ebx,DWORD PTR 64[esp] + shr edi,10 + add ebx,DWORD PTR 36[esp] + mov ecx,edx + xor edi,esi + mov esi,DWORD PTR 20[esp] + ror edx,14 + add ebx,edi + mov edi,DWORD PTR 24[esp] + xor edx,ecx + mov DWORD PTR 64[esp],ebx + xor esi,edi + ror edx,5 + and esi,ecx + mov DWORD PTR 16[esp],ecx + xor edx,ecx + add ebx,DWORD PTR 28[esp] + xor edi,esi + ror edx,6 + mov ecx,eax + add ebx,edi + ror ecx,9 + mov esi,eax + mov edi,DWORD PTR 4[esp] + xor ecx,eax + mov DWORD PTR [esp],eax + xor eax,edi + ror ecx,11 + and ebp,eax + lea edx,DWORD PTR 2730485921[edx*1+ebx] + xor ecx,esi + xor ebp,edi + mov esi,DWORD PTR 72[esp] + ror ecx,2 + add ebp,edx + add edx,DWORD PTR 12[esp] + add ebp,ecx + mov ecx,DWORD PTR 60[esp] + mov ebx,esi + ror esi,11 + mov edi,ecx + ror ecx,2 + xor esi,ebx + shr ebx,3 + ror esi,7 + xor ecx,edi + xor ebx,esi + ror ecx,17 + add ebx,DWORD PTR 68[esp] + shr edi,10 + add ebx,DWORD PTR 40[esp] + mov esi,edx + xor edi,ecx + mov ecx,DWORD PTR 16[esp] + ror edx,14 + add ebx,edi + mov edi,DWORD PTR 20[esp] + xor edx,esi + mov DWORD PTR 68[esp],ebx + xor ecx,edi + ror edx,5 + and ecx,esi + mov DWORD PTR 12[esp],esi + xor edx,esi + add ebx,DWORD PTR 24[esp] + xor edi,ecx + ror edx,6 + mov esi,ebp + add ebx,edi + ror esi,9 + mov ecx,ebp + mov edi,DWORD PTR [esp] + xor esi,ebp + mov DWORD PTR 28[esp],ebp + xor ebp,edi + ror esi,11 + and eax,ebp + lea edx,DWORD PTR 2820302411[edx*1+ebx] + xor esi,ecx + xor eax,edi + mov ecx,DWORD PTR 76[esp] + ror esi,2 + add eax,edx + add edx,DWORD PTR 8[esp] + add eax,esi + mov esi,DWORD PTR 64[esp] + mov ebx,ecx + ror ecx,11 + mov edi,esi + ror esi,2 + xor ecx,ebx + shr ebx,3 + ror ecx,7 + xor esi,edi + xor ebx,ecx + ror esi,17 + add ebx,DWORD PTR 72[esp] + shr edi,10 + add ebx,DWORD PTR 44[esp] + mov ecx,edx + xor edi,esi + mov esi,DWORD PTR 12[esp] + ror edx,14 + add ebx,edi + mov edi,DWORD PTR 16[esp] + xor edx,ecx + mov DWORD PTR 72[esp],ebx + xor esi,edi + ror edx,5 + and esi,ecx + mov DWORD PTR 8[esp],ecx + xor edx,ecx + add ebx,DWORD PTR 20[esp] + xor edi,esi + ror edx,6 + mov ecx,eax + add ebx,edi + ror ecx,9 + mov esi,eax + mov edi,DWORD PTR 28[esp] + xor ecx,eax + mov DWORD PTR 24[esp],eax + xor eax,edi + ror ecx,11 + and ebp,eax + lea edx,DWORD PTR 3259730800[edx*1+ebx] + xor ecx,esi + xor ebp,edi + mov esi,DWORD PTR 80[esp] + ror ecx,2 + add ebp,edx + add edx,DWORD PTR 4[esp] + add ebp,ecx + mov ecx,DWORD PTR 68[esp] + mov ebx,esi + ror esi,11 + mov edi,ecx + ror ecx,2 + xor esi,ebx + shr ebx,3 + ror esi,7 + xor ecx,edi + xor ebx,esi + ror ecx,17 + add ebx,DWORD PTR 76[esp] + shr edi,10 + add ebx,DWORD PTR 48[esp] + mov esi,edx + xor edi,ecx + mov ecx,DWORD PTR 8[esp] + ror edx,14 + add ebx,edi + mov edi,DWORD PTR 12[esp] + xor edx,esi + mov DWORD PTR 76[esp],ebx + xor ecx,edi + ror edx,5 + and ecx,esi + mov DWORD PTR 4[esp],esi + xor edx,esi + add ebx,DWORD PTR 16[esp] + xor edi,ecx + ror edx,6 + mov esi,ebp + add ebx,edi + ror esi,9 + mov ecx,ebp + mov edi,DWORD PTR 24[esp] + xor esi,ebp + mov DWORD PTR 20[esp],ebp + xor ebp,edi + ror esi,11 + and eax,ebp + lea edx,DWORD PTR 3345764771[edx*1+ebx] + xor esi,ecx + xor eax,edi + mov ecx,DWORD PTR 84[esp] + ror esi,2 + add eax,edx + add edx,DWORD PTR [esp] + add eax,esi + mov esi,DWORD PTR 72[esp] + mov ebx,ecx + ror ecx,11 + mov edi,esi + ror esi,2 + xor ecx,ebx + shr ebx,3 + ror ecx,7 + xor esi,edi + xor ebx,ecx + ror esi,17 + add ebx,DWORD PTR 80[esp] + shr edi,10 + add ebx,DWORD PTR 52[esp] + mov ecx,edx + xor edi,esi + mov esi,DWORD PTR 4[esp] + ror edx,14 + add ebx,edi + mov edi,DWORD PTR 8[esp] + xor edx,ecx + mov DWORD PTR 80[esp],ebx + xor esi,edi + ror edx,5 + and esi,ecx + mov DWORD PTR [esp],ecx + xor edx,ecx + add ebx,DWORD PTR 12[esp] + xor edi,esi + ror edx,6 + mov ecx,eax + add ebx,edi + ror ecx,9 + mov esi,eax + mov edi,DWORD PTR 20[esp] + xor ecx,eax + mov DWORD PTR 16[esp],eax + xor eax,edi + ror ecx,11 + and ebp,eax + lea edx,DWORD PTR 3516065817[edx*1+ebx] + xor ecx,esi + xor ebp,edi + mov esi,DWORD PTR 88[esp] + ror ecx,2 + add ebp,edx + add edx,DWORD PTR 28[esp] + add ebp,ecx + mov ecx,DWORD PTR 76[esp] + mov ebx,esi + ror esi,11 + mov edi,ecx + ror ecx,2 + xor esi,ebx + shr ebx,3 + ror esi,7 + xor ecx,edi + xor ebx,esi + ror ecx,17 + add ebx,DWORD PTR 84[esp] + shr edi,10 + add ebx,DWORD PTR 56[esp] + mov esi,edx + xor edi,ecx + mov ecx,DWORD PTR [esp] + ror edx,14 + add ebx,edi + mov edi,DWORD PTR 4[esp] + xor edx,esi + mov DWORD PTR 84[esp],ebx + xor ecx,edi + ror edx,5 + and ecx,esi + mov DWORD PTR 28[esp],esi + xor edx,esi + add ebx,DWORD PTR 8[esp] + xor edi,ecx + ror edx,6 + mov esi,ebp + add ebx,edi + ror esi,9 + mov ecx,ebp + mov edi,DWORD PTR 16[esp] + xor esi,ebp + mov DWORD PTR 12[esp],ebp + xor ebp,edi + ror esi,11 + and eax,ebp + lea edx,DWORD PTR 3600352804[edx*1+ebx] + xor esi,ecx + xor eax,edi + mov ecx,DWORD PTR 92[esp] + ror esi,2 + add eax,edx + add edx,DWORD PTR 24[esp] + add eax,esi + mov esi,DWORD PTR 80[esp] + mov ebx,ecx + ror ecx,11 + mov edi,esi + ror esi,2 + xor ecx,ebx + shr ebx,3 + ror ecx,7 + xor esi,edi + xor ebx,ecx + ror esi,17 + add ebx,DWORD PTR 88[esp] + shr edi,10 + add ebx,DWORD PTR 60[esp] + mov ecx,edx + xor edi,esi + mov esi,DWORD PTR 28[esp] + ror edx,14 + add ebx,edi + mov edi,DWORD PTR [esp] + xor edx,ecx + mov DWORD PTR 88[esp],ebx + xor esi,edi + ror edx,5 + and esi,ecx + mov DWORD PTR 24[esp],ecx + xor edx,ecx + add ebx,DWORD PTR 4[esp] + xor edi,esi + ror edx,6 + mov ecx,eax + add ebx,edi + ror ecx,9 + mov esi,eax + mov edi,DWORD PTR 12[esp] + xor ecx,eax + mov DWORD PTR 8[esp],eax + xor eax,edi + ror ecx,11 + and ebp,eax + lea edx,DWORD PTR 4094571909[edx*1+ebx] + xor ecx,esi + xor ebp,edi + mov esi,DWORD PTR 32[esp] + ror ecx,2 + add ebp,edx + add edx,DWORD PTR 20[esp] + add ebp,ecx + mov ecx,DWORD PTR 84[esp] + mov ebx,esi + ror esi,11 + mov edi,ecx + ror ecx,2 + xor esi,ebx + shr ebx,3 + ror esi,7 + xor ecx,edi + xor ebx,esi + ror ecx,17 + add ebx,DWORD PTR 92[esp] + shr edi,10 + add ebx,DWORD PTR 64[esp] + mov esi,edx + xor edi,ecx + mov ecx,DWORD PTR 24[esp] + ror edx,14 + add ebx,edi + mov edi,DWORD PTR 28[esp] + xor edx,esi + mov DWORD PTR 92[esp],ebx + xor ecx,edi + ror edx,5 + and ecx,esi + mov DWORD PTR 20[esp],esi + xor edx,esi + add ebx,DWORD PTR [esp] + xor edi,ecx + ror edx,6 + mov esi,ebp + add ebx,edi + ror esi,9 + mov ecx,ebp + mov edi,DWORD PTR 8[esp] + xor esi,ebp + mov DWORD PTR 4[esp],ebp + xor ebp,edi + ror esi,11 + and eax,ebp + lea edx,DWORD PTR 275423344[edx*1+ebx] + xor esi,ecx + xor eax,edi + mov ecx,DWORD PTR 36[esp] + ror esi,2 + add eax,edx + add edx,DWORD PTR 16[esp] + add eax,esi + mov esi,DWORD PTR 88[esp] + mov ebx,ecx + ror ecx,11 + mov edi,esi + ror esi,2 + xor ecx,ebx + shr ebx,3 + ror ecx,7 + xor esi,edi + xor ebx,ecx + ror esi,17 + add ebx,DWORD PTR 32[esp] + shr edi,10 + add ebx,DWORD PTR 68[esp] + mov ecx,edx + xor edi,esi + mov esi,DWORD PTR 20[esp] + ror edx,14 + add ebx,edi + mov edi,DWORD PTR 24[esp] + xor edx,ecx + mov DWORD PTR 32[esp],ebx + xor esi,edi + ror edx,5 + and esi,ecx + mov DWORD PTR 16[esp],ecx + xor edx,ecx + add ebx,DWORD PTR 28[esp] + xor edi,esi + ror edx,6 + mov ecx,eax + add ebx,edi + ror ecx,9 + mov esi,eax + mov edi,DWORD PTR 4[esp] + xor ecx,eax + mov DWORD PTR [esp],eax + xor eax,edi + ror ecx,11 + and ebp,eax + lea edx,DWORD PTR 430227734[edx*1+ebx] + xor ecx,esi + xor ebp,edi + mov esi,DWORD PTR 40[esp] + ror ecx,2 + add ebp,edx + add edx,DWORD PTR 12[esp] + add ebp,ecx + mov ecx,DWORD PTR 92[esp] + mov ebx,esi + ror esi,11 + mov edi,ecx + ror ecx,2 + xor esi,ebx + shr ebx,3 + ror esi,7 + xor ecx,edi + xor ebx,esi + ror ecx,17 + add ebx,DWORD PTR 36[esp] + shr edi,10 + add ebx,DWORD PTR 72[esp] + mov esi,edx + xor edi,ecx + mov ecx,DWORD PTR 16[esp] + ror edx,14 + add ebx,edi + mov edi,DWORD PTR 20[esp] + xor edx,esi + mov DWORD PTR 36[esp],ebx + xor ecx,edi + ror edx,5 + and ecx,esi + mov DWORD PTR 12[esp],esi + xor edx,esi + add ebx,DWORD PTR 24[esp] + xor edi,ecx + ror edx,6 + mov esi,ebp + add ebx,edi + ror esi,9 + mov ecx,ebp + mov edi,DWORD PTR [esp] + xor esi,ebp + mov DWORD PTR 28[esp],ebp + xor ebp,edi + ror esi,11 + and eax,ebp + lea edx,DWORD PTR 506948616[edx*1+ebx] + xor esi,ecx + xor eax,edi + mov ecx,DWORD PTR 44[esp] + ror esi,2 + add eax,edx + add edx,DWORD PTR 8[esp] + add eax,esi + mov esi,DWORD PTR 32[esp] + mov ebx,ecx + ror ecx,11 + mov edi,esi + ror esi,2 + xor ecx,ebx + shr ebx,3 + ror ecx,7 + xor esi,edi + xor ebx,ecx + ror esi,17 + add ebx,DWORD PTR 40[esp] + shr edi,10 + add ebx,DWORD PTR 76[esp] + mov ecx,edx + xor edi,esi + mov esi,DWORD PTR 12[esp] + ror edx,14 + add ebx,edi + mov edi,DWORD PTR 16[esp] + xor edx,ecx + mov DWORD PTR 40[esp],ebx + xor esi,edi + ror edx,5 + and esi,ecx + mov DWORD PTR 8[esp],ecx + xor edx,ecx + add ebx,DWORD PTR 20[esp] + xor edi,esi + ror edx,6 + mov ecx,eax + add ebx,edi + ror ecx,9 + mov esi,eax + mov edi,DWORD PTR 28[esp] + xor ecx,eax + mov DWORD PTR 24[esp],eax + xor eax,edi + ror ecx,11 + and ebp,eax + lea edx,DWORD PTR 659060556[edx*1+ebx] + xor ecx,esi + xor ebp,edi + mov esi,DWORD PTR 48[esp] + ror ecx,2 + add ebp,edx + add edx,DWORD PTR 4[esp] + add ebp,ecx + mov ecx,DWORD PTR 36[esp] + mov ebx,esi + ror esi,11 + mov edi,ecx + ror ecx,2 + xor esi,ebx + shr ebx,3 + ror esi,7 + xor ecx,edi + xor ebx,esi + ror ecx,17 + add ebx,DWORD PTR 44[esp] + shr edi,10 + add ebx,DWORD PTR 80[esp] + mov esi,edx + xor edi,ecx + mov ecx,DWORD PTR 8[esp] + ror edx,14 + add ebx,edi + mov edi,DWORD PTR 12[esp] + xor edx,esi + mov DWORD PTR 44[esp],ebx + xor ecx,edi + ror edx,5 + and ecx,esi + mov DWORD PTR 4[esp],esi + xor edx,esi + add ebx,DWORD PTR 16[esp] + xor edi,ecx + ror edx,6 + mov esi,ebp + add ebx,edi + ror esi,9 + mov ecx,ebp + mov edi,DWORD PTR 24[esp] + xor esi,ebp + mov DWORD PTR 20[esp],ebp + xor ebp,edi + ror esi,11 + and eax,ebp + lea edx,DWORD PTR 883997877[edx*1+ebx] + xor esi,ecx + xor eax,edi + mov ecx,DWORD PTR 52[esp] + ror esi,2 + add eax,edx + add edx,DWORD PTR [esp] + add eax,esi + mov esi,DWORD PTR 40[esp] + mov ebx,ecx + ror ecx,11 + mov edi,esi + ror esi,2 + xor ecx,ebx + shr ebx,3 + ror ecx,7 + xor esi,edi + xor ebx,ecx + ror esi,17 + add ebx,DWORD PTR 48[esp] + shr edi,10 + add ebx,DWORD PTR 84[esp] + mov ecx,edx + xor edi,esi + mov esi,DWORD PTR 4[esp] + ror edx,14 + add ebx,edi + mov edi,DWORD PTR 8[esp] + xor edx,ecx + mov DWORD PTR 48[esp],ebx + xor esi,edi + ror edx,5 + and esi,ecx + mov DWORD PTR [esp],ecx + xor edx,ecx + add ebx,DWORD PTR 12[esp] + xor edi,esi + ror edx,6 + mov ecx,eax + add ebx,edi + ror ecx,9 + mov esi,eax + mov edi,DWORD PTR 20[esp] + xor ecx,eax + mov DWORD PTR 16[esp],eax + xor eax,edi + ror ecx,11 + and ebp,eax + lea edx,DWORD PTR 958139571[edx*1+ebx] + xor ecx,esi + xor ebp,edi + mov esi,DWORD PTR 56[esp] + ror ecx,2 + add ebp,edx + add edx,DWORD PTR 28[esp] + add ebp,ecx + mov ecx,DWORD PTR 44[esp] + mov ebx,esi + ror esi,11 + mov edi,ecx + ror ecx,2 + xor esi,ebx + shr ebx,3 + ror esi,7 + xor ecx,edi + xor ebx,esi + ror ecx,17 + add ebx,DWORD PTR 52[esp] + shr edi,10 + add ebx,DWORD PTR 88[esp] + mov esi,edx + xor edi,ecx + mov ecx,DWORD PTR [esp] + ror edx,14 + add ebx,edi + mov edi,DWORD PTR 4[esp] + xor edx,esi + mov DWORD PTR 52[esp],ebx + xor ecx,edi + ror edx,5 + and ecx,esi + mov DWORD PTR 28[esp],esi + xor edx,esi + add ebx,DWORD PTR 8[esp] + xor edi,ecx + ror edx,6 + mov esi,ebp + add ebx,edi + ror esi,9 + mov ecx,ebp + mov edi,DWORD PTR 16[esp] + xor esi,ebp + mov DWORD PTR 12[esp],ebp + xor ebp,edi + ror esi,11 + and eax,ebp + lea edx,DWORD PTR 1322822218[edx*1+ebx] + xor esi,ecx + xor eax,edi + mov ecx,DWORD PTR 60[esp] + ror esi,2 + add eax,edx + add edx,DWORD PTR 24[esp] + add eax,esi + mov esi,DWORD PTR 48[esp] + mov ebx,ecx + ror ecx,11 + mov edi,esi + ror esi,2 + xor ecx,ebx + shr ebx,3 + ror ecx,7 + xor esi,edi + xor ebx,ecx + ror esi,17 + add ebx,DWORD PTR 56[esp] + shr edi,10 + add ebx,DWORD PTR 92[esp] + mov ecx,edx + xor edi,esi + mov esi,DWORD PTR 28[esp] + ror edx,14 + add ebx,edi + mov edi,DWORD PTR [esp] + xor edx,ecx + mov DWORD PTR 56[esp],ebx + xor esi,edi + ror edx,5 + and esi,ecx + mov DWORD PTR 24[esp],ecx + xor edx,ecx + add ebx,DWORD PTR 4[esp] + xor edi,esi + ror edx,6 + mov ecx,eax + add ebx,edi + ror ecx,9 + mov esi,eax + mov edi,DWORD PTR 12[esp] + xor ecx,eax + mov DWORD PTR 8[esp],eax + xor eax,edi + ror ecx,11 + and ebp,eax + lea edx,DWORD PTR 1537002063[edx*1+ebx] + xor ecx,esi + xor ebp,edi + mov esi,DWORD PTR 64[esp] + ror ecx,2 + add ebp,edx + add edx,DWORD PTR 20[esp] + add ebp,ecx + mov ecx,DWORD PTR 52[esp] + mov ebx,esi + ror esi,11 + mov edi,ecx + ror ecx,2 + xor esi,ebx + shr ebx,3 + ror esi,7 + xor ecx,edi + xor ebx,esi + ror ecx,17 + add ebx,DWORD PTR 60[esp] + shr edi,10 + add ebx,DWORD PTR 32[esp] + mov esi,edx + xor edi,ecx + mov ecx,DWORD PTR 24[esp] + ror edx,14 + add ebx,edi + mov edi,DWORD PTR 28[esp] + xor edx,esi + mov DWORD PTR 60[esp],ebx + xor ecx,edi + ror edx,5 + and ecx,esi + mov DWORD PTR 20[esp],esi + xor edx,esi + add ebx,DWORD PTR [esp] + xor edi,ecx + ror edx,6 + mov esi,ebp + add ebx,edi + ror esi,9 + mov ecx,ebp + mov edi,DWORD PTR 8[esp] + xor esi,ebp + mov DWORD PTR 4[esp],ebp + xor ebp,edi + ror esi,11 + and eax,ebp + lea edx,DWORD PTR 1747873779[edx*1+ebx] + xor esi,ecx + xor eax,edi + mov ecx,DWORD PTR 68[esp] + ror esi,2 + add eax,edx + add edx,DWORD PTR 16[esp] + add eax,esi + mov esi,DWORD PTR 56[esp] + mov ebx,ecx + ror ecx,11 + mov edi,esi + ror esi,2 + xor ecx,ebx + shr ebx,3 + ror ecx,7 + xor esi,edi + xor ebx,ecx + ror esi,17 + add ebx,DWORD PTR 64[esp] + shr edi,10 + add ebx,DWORD PTR 36[esp] + mov ecx,edx + xor edi,esi + mov esi,DWORD PTR 20[esp] + ror edx,14 + add ebx,edi + mov edi,DWORD PTR 24[esp] + xor edx,ecx + mov DWORD PTR 64[esp],ebx + xor esi,edi + ror edx,5 + and esi,ecx + mov DWORD PTR 16[esp],ecx + xor edx,ecx + add ebx,DWORD PTR 28[esp] + xor edi,esi + ror edx,6 + mov ecx,eax + add ebx,edi + ror ecx,9 + mov esi,eax + mov edi,DWORD PTR 4[esp] + xor ecx,eax + mov DWORD PTR [esp],eax + xor eax,edi + ror ecx,11 + and ebp,eax + lea edx,DWORD PTR 1955562222[edx*1+ebx] + xor ecx,esi + xor ebp,edi + mov esi,DWORD PTR 72[esp] + ror ecx,2 + add ebp,edx + add edx,DWORD PTR 12[esp] + add ebp,ecx + mov ecx,DWORD PTR 60[esp] + mov ebx,esi + ror esi,11 + mov edi,ecx + ror ecx,2 + xor esi,ebx + shr ebx,3 + ror esi,7 + xor ecx,edi + xor ebx,esi + ror ecx,17 + add ebx,DWORD PTR 68[esp] + shr edi,10 + add ebx,DWORD PTR 40[esp] + mov esi,edx + xor edi,ecx + mov ecx,DWORD PTR 16[esp] + ror edx,14 + add ebx,edi + mov edi,DWORD PTR 20[esp] + xor edx,esi + mov DWORD PTR 68[esp],ebx + xor ecx,edi + ror edx,5 + and ecx,esi + mov DWORD PTR 12[esp],esi + xor edx,esi + add ebx,DWORD PTR 24[esp] + xor edi,ecx + ror edx,6 + mov esi,ebp + add ebx,edi + ror esi,9 + mov ecx,ebp + mov edi,DWORD PTR [esp] + xor esi,ebp + mov DWORD PTR 28[esp],ebp + xor ebp,edi + ror esi,11 + and eax,ebp + lea edx,DWORD PTR 2024104815[edx*1+ebx] + xor esi,ecx + xor eax,edi + mov ecx,DWORD PTR 76[esp] + ror esi,2 + add eax,edx + add edx,DWORD PTR 8[esp] + add eax,esi + mov esi,DWORD PTR 64[esp] + mov ebx,ecx + ror ecx,11 + mov edi,esi + ror esi,2 + xor ecx,ebx + shr ebx,3 + ror ecx,7 + xor esi,edi + xor ebx,ecx + ror esi,17 + add ebx,DWORD PTR 72[esp] + shr edi,10 + add ebx,DWORD PTR 44[esp] + mov ecx,edx + xor edi,esi + mov esi,DWORD PTR 12[esp] + ror edx,14 + add ebx,edi + mov edi,DWORD PTR 16[esp] + xor edx,ecx + mov DWORD PTR 72[esp],ebx + xor esi,edi + ror edx,5 + and esi,ecx + mov DWORD PTR 8[esp],ecx + xor edx,ecx + add ebx,DWORD PTR 20[esp] + xor edi,esi + ror edx,6 + mov ecx,eax + add ebx,edi + ror ecx,9 + mov esi,eax + mov edi,DWORD PTR 28[esp] + xor ecx,eax + mov DWORD PTR 24[esp],eax + xor eax,edi + ror ecx,11 + and ebp,eax + lea edx,DWORD PTR 2227730452[edx*1+ebx] + xor ecx,esi + xor ebp,edi + mov esi,DWORD PTR 80[esp] + ror ecx,2 + add ebp,edx + add edx,DWORD PTR 4[esp] + add ebp,ecx + mov ecx,DWORD PTR 68[esp] + mov ebx,esi + ror esi,11 + mov edi,ecx + ror ecx,2 + xor esi,ebx + shr ebx,3 + ror esi,7 + xor ecx,edi + xor ebx,esi + ror ecx,17 + add ebx,DWORD PTR 76[esp] + shr edi,10 + add ebx,DWORD PTR 48[esp] + mov esi,edx + xor edi,ecx + mov ecx,DWORD PTR 8[esp] + ror edx,14 + add ebx,edi + mov edi,DWORD PTR 12[esp] + xor edx,esi + mov DWORD PTR 76[esp],ebx + xor ecx,edi + ror edx,5 + and ecx,esi + mov DWORD PTR 4[esp],esi + xor edx,esi + add ebx,DWORD PTR 16[esp] + xor edi,ecx + ror edx,6 + mov esi,ebp + add ebx,edi + ror esi,9 + mov ecx,ebp + mov edi,DWORD PTR 24[esp] + xor esi,ebp + mov DWORD PTR 20[esp],ebp + xor ebp,edi + ror esi,11 + and eax,ebp + lea edx,DWORD PTR 2361852424[edx*1+ebx] + xor esi,ecx + xor eax,edi + mov ecx,DWORD PTR 84[esp] + ror esi,2 + add eax,edx + add edx,DWORD PTR [esp] + add eax,esi + mov esi,DWORD PTR 72[esp] + mov ebx,ecx + ror ecx,11 + mov edi,esi + ror esi,2 + xor ecx,ebx + shr ebx,3 + ror ecx,7 + xor esi,edi + xor ebx,ecx + ror esi,17 + add ebx,DWORD PTR 80[esp] + shr edi,10 + add ebx,DWORD PTR 52[esp] + mov ecx,edx + xor edi,esi + mov esi,DWORD PTR 4[esp] + ror edx,14 + add ebx,edi + mov edi,DWORD PTR 8[esp] + xor edx,ecx + mov DWORD PTR 80[esp],ebx + xor esi,edi + ror edx,5 + and esi,ecx + mov DWORD PTR [esp],ecx + xor edx,ecx + add ebx,DWORD PTR 12[esp] + xor edi,esi + ror edx,6 + mov ecx,eax + add ebx,edi + ror ecx,9 + mov esi,eax + mov edi,DWORD PTR 20[esp] + xor ecx,eax + mov DWORD PTR 16[esp],eax + xor eax,edi + ror ecx,11 + and ebp,eax + lea edx,DWORD PTR 2428436474[edx*1+ebx] + xor ecx,esi + xor ebp,edi + mov esi,DWORD PTR 88[esp] + ror ecx,2 + add ebp,edx + add edx,DWORD PTR 28[esp] + add ebp,ecx + mov ecx,DWORD PTR 76[esp] + mov ebx,esi + ror esi,11 + mov edi,ecx + ror ecx,2 + xor esi,ebx + shr ebx,3 + ror esi,7 + xor ecx,edi + xor ebx,esi + ror ecx,17 + add ebx,DWORD PTR 84[esp] + shr edi,10 + add ebx,DWORD PTR 56[esp] + mov esi,edx + xor edi,ecx + mov ecx,DWORD PTR [esp] + ror edx,14 + add ebx,edi + mov edi,DWORD PTR 4[esp] + xor edx,esi + mov DWORD PTR 84[esp],ebx + xor ecx,edi + ror edx,5 + and ecx,esi + mov DWORD PTR 28[esp],esi + xor edx,esi + add ebx,DWORD PTR 8[esp] + xor edi,ecx + ror edx,6 + mov esi,ebp + add ebx,edi + ror esi,9 + mov ecx,ebp + mov edi,DWORD PTR 16[esp] + xor esi,ebp + mov DWORD PTR 12[esp],ebp + xor ebp,edi + ror esi,11 + and eax,ebp + lea edx,DWORD PTR 2756734187[edx*1+ebx] + xor esi,ecx + xor eax,edi + mov ecx,DWORD PTR 92[esp] + ror esi,2 + add eax,edx + add edx,DWORD PTR 24[esp] + add eax,esi + mov esi,DWORD PTR 80[esp] + mov ebx,ecx + ror ecx,11 + mov edi,esi + ror esi,2 + xor ecx,ebx + shr ebx,3 + ror ecx,7 + xor esi,edi + xor ebx,ecx + ror esi,17 + add ebx,DWORD PTR 88[esp] + shr edi,10 + add ebx,DWORD PTR 60[esp] + mov ecx,edx + xor edi,esi + mov esi,DWORD PTR 28[esp] + ror edx,14 + add ebx,edi + mov edi,DWORD PTR [esp] + xor edx,ecx + xor esi,edi + ror edx,5 + and esi,ecx + mov DWORD PTR 24[esp],ecx + xor edx,ecx + add ebx,DWORD PTR 4[esp] + xor edi,esi + ror edx,6 + mov ecx,eax + add ebx,edi + ror ecx,9 + mov esi,eax + mov edi,DWORD PTR 12[esp] + xor ecx,eax + mov DWORD PTR 8[esp],eax + xor eax,edi + ror ecx,11 + and ebp,eax + lea edx,DWORD PTR 3204031479[edx*1+ebx] + xor ecx,esi + xor ebp,edi + mov esi,DWORD PTR 32[esp] + ror ecx,2 + add ebp,edx + add edx,DWORD PTR 20[esp] + add ebp,ecx + mov ecx,DWORD PTR 84[esp] + mov ebx,esi + ror esi,11 + mov edi,ecx + ror ecx,2 + xor esi,ebx + shr ebx,3 + ror esi,7 + xor ecx,edi + xor ebx,esi + ror ecx,17 + add ebx,DWORD PTR 92[esp] + shr edi,10 + add ebx,DWORD PTR 64[esp] + mov esi,edx + xor edi,ecx + mov ecx,DWORD PTR 24[esp] + ror edx,14 + add ebx,edi + mov edi,DWORD PTR 28[esp] + xor edx,esi + xor ecx,edi + ror edx,5 + and ecx,esi + mov DWORD PTR 20[esp],esi + xor edx,esi + add ebx,DWORD PTR [esp] + xor edi,ecx + ror edx,6 + mov esi,ebp + add ebx,edi + ror esi,9 + mov ecx,ebp + mov edi,DWORD PTR 8[esp] + xor esi,ebp + mov DWORD PTR 4[esp],ebp + xor ebp,edi + ror esi,11 + and eax,ebp + lea edx,DWORD PTR 3329325298[edx*1+ebx] + xor esi,ecx + xor eax,edi + ror esi,2 + add eax,edx + add edx,DWORD PTR 16[esp] + add eax,esi + mov esi,DWORD PTR 96[esp] + xor ebp,edi + mov ecx,DWORD PTR 12[esp] + add eax,DWORD PTR [esi] + add ebp,DWORD PTR 4[esi] + add edi,DWORD PTR 8[esi] + add ecx,DWORD PTR 12[esi] + mov DWORD PTR [esi],eax + mov DWORD PTR 4[esi],ebp + mov DWORD PTR 8[esi],edi + mov DWORD PTR 12[esi],ecx + mov DWORD PTR 4[esp],ebp + xor ebp,edi + mov DWORD PTR 8[esp],edi + mov DWORD PTR 12[esp],ecx + mov edi,DWORD PTR 20[esp] + mov ebx,DWORD PTR 24[esp] + mov ecx,DWORD PTR 28[esp] + add edx,DWORD PTR 16[esi] + add edi,DWORD PTR 20[esi] + add ebx,DWORD PTR 24[esi] + add ecx,DWORD PTR 28[esi] + mov DWORD PTR 16[esi],edx + mov DWORD PTR 20[esi],edi + mov DWORD PTR 24[esi],ebx + mov DWORD PTR 28[esi],ecx + mov DWORD PTR 20[esp],edi + mov edi,DWORD PTR 100[esp] + mov DWORD PTR 24[esp],ebx + mov DWORD PTR 28[esp],ecx + cmp edi,DWORD PTR 104[esp] + jb $L009grand_loop + mov esp,DWORD PTR 108[esp] + pop edi + pop esi + pop ebx + pop ebp + ret +ALIGN 32 +$L004shaext: + sub esp,32 + movdqu xmm1,XMMWORD PTR [esi] + lea ebp,DWORD PTR 128[ebp] + movdqu xmm2,XMMWORD PTR 16[esi] + movdqa xmm7,XMMWORD PTR 128[ebp] + pshufd xmm0,xmm1,27 + pshufd xmm1,xmm1,177 + pshufd xmm2,xmm2,27 +DB 102,15,58,15,202,8 + punpcklqdq xmm2,xmm0 + jmp $L010loop_shaext +ALIGN 16 +$L010loop_shaext: + movdqu xmm3,XMMWORD PTR [edi] + movdqu xmm4,XMMWORD PTR 16[edi] + movdqu xmm5,XMMWORD PTR 32[edi] +DB 102,15,56,0,223 + movdqu xmm6,XMMWORD PTR 48[edi] + movdqa XMMWORD PTR 16[esp],xmm2 + movdqa xmm0,XMMWORD PTR [ebp-128] + paddd xmm0,xmm3 +DB 102,15,56,0,231 +DB 15,56,203,209 + pshufd xmm0,xmm0,14 + nop + movdqa XMMWORD PTR [esp],xmm1 +DB 15,56,203,202 + movdqa xmm0,XMMWORD PTR [ebp-112] + paddd xmm0,xmm4 +DB 102,15,56,0,239 +DB 15,56,203,209 + pshufd xmm0,xmm0,14 + lea edi,DWORD PTR 64[edi] +DB 15,56,204,220 +DB 15,56,203,202 + movdqa xmm0,XMMWORD PTR [ebp-96] + paddd xmm0,xmm5 +DB 102,15,56,0,247 +DB 15,56,203,209 + pshufd xmm0,xmm0,14 + movdqa xmm7,xmm6 +DB 102,15,58,15,253,4 + nop + paddd xmm3,xmm7 +DB 15,56,204,229 +DB 15,56,203,202 + movdqa xmm0,XMMWORD PTR [ebp-80] + paddd xmm0,xmm6 +DB 15,56,205,222 +DB 15,56,203,209 + pshufd xmm0,xmm0,14 + movdqa xmm7,xmm3 +DB 102,15,58,15,254,4 + nop + paddd xmm4,xmm7 +DB 15,56,204,238 +DB 15,56,203,202 + movdqa xmm0,XMMWORD PTR [ebp-64] + paddd xmm0,xmm3 +DB 15,56,205,227 +DB 15,56,203,209 + pshufd xmm0,xmm0,14 + movdqa xmm7,xmm4 +DB 102,15,58,15,251,4 + nop + paddd xmm5,xmm7 +DB 15,56,204,243 +DB 15,56,203,202 + movdqa xmm0,XMMWORD PTR [ebp-48] + paddd xmm0,xmm4 +DB 15,56,205,236 +DB 15,56,203,209 + pshufd xmm0,xmm0,14 + movdqa xmm7,xmm5 +DB 102,15,58,15,252,4 + nop + paddd xmm6,xmm7 +DB 15,56,204,220 +DB 15,56,203,202 + movdqa xmm0,XMMWORD PTR [ebp-32] + paddd xmm0,xmm5 +DB 15,56,205,245 +DB 15,56,203,209 + pshufd xmm0,xmm0,14 + movdqa xmm7,xmm6 +DB 102,15,58,15,253,4 + nop + paddd xmm3,xmm7 +DB 15,56,204,229 +DB 15,56,203,202 + movdqa xmm0,XMMWORD PTR [ebp-16] + paddd xmm0,xmm6 +DB 15,56,205,222 +DB 15,56,203,209 + pshufd xmm0,xmm0,14 + movdqa xmm7,xmm3 +DB 102,15,58,15,254,4 + nop + paddd xmm4,xmm7 +DB 15,56,204,238 +DB 15,56,203,202 + movdqa xmm0,XMMWORD PTR [ebp] + paddd xmm0,xmm3 +DB 15,56,205,227 +DB 15,56,203,209 + pshufd xmm0,xmm0,14 + movdqa xmm7,xmm4 +DB 102,15,58,15,251,4 + nop + paddd xmm5,xmm7 +DB 15,56,204,243 +DB 15,56,203,202 + movdqa xmm0,XMMWORD PTR 16[ebp] + paddd xmm0,xmm4 +DB 15,56,205,236 +DB 15,56,203,209 + pshufd xmm0,xmm0,14 + movdqa xmm7,xmm5 +DB 102,15,58,15,252,4 + nop + paddd xmm6,xmm7 +DB 15,56,204,220 +DB 15,56,203,202 + movdqa xmm0,XMMWORD PTR 32[ebp] + paddd xmm0,xmm5 +DB 15,56,205,245 +DB 15,56,203,209 + pshufd xmm0,xmm0,14 + movdqa xmm7,xmm6 +DB 102,15,58,15,253,4 + nop + paddd xmm3,xmm7 +DB 15,56,204,229 +DB 15,56,203,202 + movdqa xmm0,XMMWORD PTR 48[ebp] + paddd xmm0,xmm6 +DB 15,56,205,222 +DB 15,56,203,209 + pshufd xmm0,xmm0,14 + movdqa xmm7,xmm3 +DB 102,15,58,15,254,4 + nop + paddd xmm4,xmm7 +DB 15,56,204,238 +DB 15,56,203,202 + movdqa xmm0,XMMWORD PTR 64[ebp] + paddd xmm0,xmm3 +DB 15,56,205,227 +DB 15,56,203,209 + pshufd xmm0,xmm0,14 + movdqa xmm7,xmm4 +DB 102,15,58,15,251,4 + nop + paddd xmm5,xmm7 +DB 15,56,204,243 +DB 15,56,203,202 + movdqa xmm0,XMMWORD PTR 80[ebp] + paddd xmm0,xmm4 +DB 15,56,205,236 +DB 15,56,203,209 + pshufd xmm0,xmm0,14 + movdqa xmm7,xmm5 +DB 102,15,58,15,252,4 +DB 15,56,203,202 + paddd xmm6,xmm7 + movdqa xmm0,XMMWORD PTR 96[ebp] + paddd xmm0,xmm5 +DB 15,56,203,209 + pshufd xmm0,xmm0,14 +DB 15,56,205,245 + movdqa xmm7,XMMWORD PTR 128[ebp] +DB 15,56,203,202 + movdqa xmm0,XMMWORD PTR 112[ebp] + paddd xmm0,xmm6 + nop +DB 15,56,203,209 + pshufd xmm0,xmm0,14 + cmp eax,edi + nop +DB 15,56,203,202 + paddd xmm2,XMMWORD PTR 16[esp] + paddd xmm1,XMMWORD PTR [esp] + jnz $L010loop_shaext + pshufd xmm2,xmm2,177 + pshufd xmm7,xmm1,27 + pshufd xmm1,xmm1,177 + punpckhqdq xmm1,xmm2 +DB 102,15,58,15,215,8 + mov esp,DWORD PTR 44[esp] + movdqu XMMWORD PTR [esi],xmm1 + movdqu XMMWORD PTR 16[esi],xmm2 + pop edi + pop esi + pop ebx + pop ebp + ret +ALIGN 32 +$L005SSSE3: + lea esp,DWORD PTR [esp-96] + mov eax,DWORD PTR [esi] + mov ebx,DWORD PTR 4[esi] + mov ecx,DWORD PTR 8[esi] + mov edi,DWORD PTR 12[esi] + mov DWORD PTR 4[esp],ebx + xor ebx,ecx + mov DWORD PTR 8[esp],ecx + mov DWORD PTR 12[esp],edi + mov edx,DWORD PTR 16[esi] + mov edi,DWORD PTR 20[esi] + mov ecx,DWORD PTR 24[esi] + mov esi,DWORD PTR 28[esi] + mov DWORD PTR 20[esp],edi + mov edi,DWORD PTR 100[esp] + mov DWORD PTR 24[esp],ecx + mov DWORD PTR 28[esp],esi + movdqa xmm7,XMMWORD PTR 256[ebp] + jmp $L011grand_ssse3 +ALIGN 16 +$L011grand_ssse3: + movdqu xmm0,XMMWORD PTR [edi] + movdqu xmm1,XMMWORD PTR 16[edi] + movdqu xmm2,XMMWORD PTR 32[edi] + movdqu xmm3,XMMWORD PTR 48[edi] + add edi,64 +DB 102,15,56,0,199 + mov DWORD PTR 100[esp],edi +DB 102,15,56,0,207 + movdqa xmm4,XMMWORD PTR [ebp] +DB 102,15,56,0,215 + movdqa xmm5,XMMWORD PTR 16[ebp] + paddd xmm4,xmm0 +DB 102,15,56,0,223 + movdqa xmm6,XMMWORD PTR 32[ebp] + paddd xmm5,xmm1 + movdqa xmm7,XMMWORD PTR 48[ebp] + movdqa XMMWORD PTR 32[esp],xmm4 + paddd xmm6,xmm2 + movdqa XMMWORD PTR 48[esp],xmm5 + paddd xmm7,xmm3 + movdqa XMMWORD PTR 64[esp],xmm6 + movdqa XMMWORD PTR 80[esp],xmm7 + jmp $L012ssse3_00_47 +ALIGN 16 +$L012ssse3_00_47: + add ebp,64 + mov ecx,edx + movdqa xmm4,xmm1 + ror edx,14 + mov esi,DWORD PTR 20[esp] + movdqa xmm7,xmm3 + xor edx,ecx + mov edi,DWORD PTR 24[esp] +DB 102,15,58,15,224,4 + xor esi,edi + ror edx,5 + and esi,ecx +DB 102,15,58,15,250,4 + mov DWORD PTR 16[esp],ecx + xor edx,ecx + xor edi,esi + movdqa xmm5,xmm4 + ror edx,6 + mov ecx,eax + movdqa xmm6,xmm4 + add edx,edi + mov edi,DWORD PTR 4[esp] + psrld xmm4,3 + mov esi,eax + ror ecx,9 + paddd xmm0,xmm7 + mov DWORD PTR [esp],eax + xor ecx,eax + psrld xmm6,7 + xor eax,edi + add edx,DWORD PTR 28[esp] + ror ecx,11 + and ebx,eax + pshufd xmm7,xmm3,250 + xor ecx,esi + add edx,DWORD PTR 32[esp] + pslld xmm5,14 + xor ebx,edi + ror ecx,2 + pxor xmm4,xmm6 + add ebx,edx + add edx,DWORD PTR 12[esp] + psrld xmm6,11 + add ebx,ecx + mov ecx,edx + ror edx,14 + pxor xmm4,xmm5 + mov esi,DWORD PTR 16[esp] + xor edx,ecx + pslld xmm5,11 + mov edi,DWORD PTR 20[esp] + xor esi,edi + ror edx,5 + pxor xmm4,xmm6 + and esi,ecx + mov DWORD PTR 12[esp],ecx + movdqa xmm6,xmm7 + xor edx,ecx + xor edi,esi + ror edx,6 + pxor xmm4,xmm5 + mov ecx,ebx + add edx,edi + psrld xmm7,10 + mov edi,DWORD PTR [esp] + mov esi,ebx + ror ecx,9 + paddd xmm0,xmm4 + mov DWORD PTR 28[esp],ebx + xor ecx,ebx + psrlq xmm6,17 + xor ebx,edi + add edx,DWORD PTR 24[esp] + ror ecx,11 + pxor xmm7,xmm6 + and eax,ebx + xor ecx,esi + psrlq xmm6,2 + add edx,DWORD PTR 36[esp] + xor eax,edi + ror ecx,2 + pxor xmm7,xmm6 + add eax,edx + add edx,DWORD PTR 8[esp] + pshufd xmm7,xmm7,128 + add eax,ecx + mov ecx,edx + ror edx,14 + mov esi,DWORD PTR 12[esp] + xor edx,ecx + mov edi,DWORD PTR 16[esp] + xor esi,edi + ror edx,5 + and esi,ecx + psrldq xmm7,8 + mov DWORD PTR 8[esp],ecx + xor edx,ecx + xor edi,esi + paddd xmm0,xmm7 + ror edx,6 + mov ecx,eax + add edx,edi + mov edi,DWORD PTR 28[esp] + mov esi,eax + ror ecx,9 + mov DWORD PTR 24[esp],eax + pshufd xmm7,xmm0,80 + xor ecx,eax + xor eax,edi + add edx,DWORD PTR 20[esp] + movdqa xmm6,xmm7 + ror ecx,11 + psrld xmm7,10 + and ebx,eax + psrlq xmm6,17 + xor ecx,esi + add edx,DWORD PTR 40[esp] + xor ebx,edi + ror ecx,2 + pxor xmm7,xmm6 + add ebx,edx + add edx,DWORD PTR 4[esp] + psrlq xmm6,2 + add ebx,ecx + mov ecx,edx + ror edx,14 + pxor xmm7,xmm6 + mov esi,DWORD PTR 8[esp] + xor edx,ecx + mov edi,DWORD PTR 12[esp] + pshufd xmm7,xmm7,8 + xor esi,edi + ror edx,5 + movdqa xmm6,XMMWORD PTR [ebp] + and esi,ecx + mov DWORD PTR 4[esp],ecx + pslldq xmm7,8 + xor edx,ecx + xor edi,esi + ror edx,6 + mov ecx,ebx + add edx,edi + mov edi,DWORD PTR 24[esp] + mov esi,ebx + ror ecx,9 + paddd xmm0,xmm7 + mov DWORD PTR 20[esp],ebx + xor ecx,ebx + xor ebx,edi + add edx,DWORD PTR 16[esp] + paddd xmm6,xmm0 + ror ecx,11 + and eax,ebx + xor ecx,esi + add edx,DWORD PTR 44[esp] + xor eax,edi + ror ecx,2 + add eax,edx + add edx,DWORD PTR [esp] + add eax,ecx + movdqa XMMWORD PTR 32[esp],xmm6 + mov ecx,edx + movdqa xmm4,xmm2 + ror edx,14 + mov esi,DWORD PTR 4[esp] + movdqa xmm7,xmm0 + xor edx,ecx + mov edi,DWORD PTR 8[esp] +DB 102,15,58,15,225,4 + xor esi,edi + ror edx,5 + and esi,ecx +DB 102,15,58,15,251,4 + mov DWORD PTR [esp],ecx + xor edx,ecx + xor edi,esi + movdqa xmm5,xmm4 + ror edx,6 + mov ecx,eax + movdqa xmm6,xmm4 + add edx,edi + mov edi,DWORD PTR 20[esp] + psrld xmm4,3 + mov esi,eax + ror ecx,9 + paddd xmm1,xmm7 + mov DWORD PTR 16[esp],eax + xor ecx,eax + psrld xmm6,7 + xor eax,edi + add edx,DWORD PTR 12[esp] + ror ecx,11 + and ebx,eax + pshufd xmm7,xmm0,250 + xor ecx,esi + add edx,DWORD PTR 48[esp] + pslld xmm5,14 + xor ebx,edi + ror ecx,2 + pxor xmm4,xmm6 + add ebx,edx + add edx,DWORD PTR 28[esp] + psrld xmm6,11 + add ebx,ecx + mov ecx,edx + ror edx,14 + pxor xmm4,xmm5 + mov esi,DWORD PTR [esp] + xor edx,ecx + pslld xmm5,11 + mov edi,DWORD PTR 4[esp] + xor esi,edi + ror edx,5 + pxor xmm4,xmm6 + and esi,ecx + mov DWORD PTR 28[esp],ecx + movdqa xmm6,xmm7 + xor edx,ecx + xor edi,esi + ror edx,6 + pxor xmm4,xmm5 + mov ecx,ebx + add edx,edi + psrld xmm7,10 + mov edi,DWORD PTR 16[esp] + mov esi,ebx + ror ecx,9 + paddd xmm1,xmm4 + mov DWORD PTR 12[esp],ebx + xor ecx,ebx + psrlq xmm6,17 + xor ebx,edi + add edx,DWORD PTR 8[esp] + ror ecx,11 + pxor xmm7,xmm6 + and eax,ebx + xor ecx,esi + psrlq xmm6,2 + add edx,DWORD PTR 52[esp] + xor eax,edi + ror ecx,2 + pxor xmm7,xmm6 + add eax,edx + add edx,DWORD PTR 24[esp] + pshufd xmm7,xmm7,128 + add eax,ecx + mov ecx,edx + ror edx,14 + mov esi,DWORD PTR 28[esp] + xor edx,ecx + mov edi,DWORD PTR [esp] + xor esi,edi + ror edx,5 + and esi,ecx + psrldq xmm7,8 + mov DWORD PTR 24[esp],ecx + xor edx,ecx + xor edi,esi + paddd xmm1,xmm7 + ror edx,6 + mov ecx,eax + add edx,edi + mov edi,DWORD PTR 12[esp] + mov esi,eax + ror ecx,9 + mov DWORD PTR 8[esp],eax + pshufd xmm7,xmm1,80 + xor ecx,eax + xor eax,edi + add edx,DWORD PTR 4[esp] + movdqa xmm6,xmm7 + ror ecx,11 + psrld xmm7,10 + and ebx,eax + psrlq xmm6,17 + xor ecx,esi + add edx,DWORD PTR 56[esp] + xor ebx,edi + ror ecx,2 + pxor xmm7,xmm6 + add ebx,edx + add edx,DWORD PTR 20[esp] + psrlq xmm6,2 + add ebx,ecx + mov ecx,edx + ror edx,14 + pxor xmm7,xmm6 + mov esi,DWORD PTR 24[esp] + xor edx,ecx + mov edi,DWORD PTR 28[esp] + pshufd xmm7,xmm7,8 + xor esi,edi + ror edx,5 + movdqa xmm6,XMMWORD PTR 16[ebp] + and esi,ecx + mov DWORD PTR 20[esp],ecx + pslldq xmm7,8 + xor edx,ecx + xor edi,esi + ror edx,6 + mov ecx,ebx + add edx,edi + mov edi,DWORD PTR 8[esp] + mov esi,ebx + ror ecx,9 + paddd xmm1,xmm7 + mov DWORD PTR 4[esp],ebx + xor ecx,ebx + xor ebx,edi + add edx,DWORD PTR [esp] + paddd xmm6,xmm1 + ror ecx,11 + and eax,ebx + xor ecx,esi + add edx,DWORD PTR 60[esp] + xor eax,edi + ror ecx,2 + add eax,edx + add edx,DWORD PTR 16[esp] + add eax,ecx + movdqa XMMWORD PTR 48[esp],xmm6 + mov ecx,edx + movdqa xmm4,xmm3 + ror edx,14 + mov esi,DWORD PTR 20[esp] + movdqa xmm7,xmm1 + xor edx,ecx + mov edi,DWORD PTR 24[esp] +DB 102,15,58,15,226,4 + xor esi,edi + ror edx,5 + and esi,ecx +DB 102,15,58,15,248,4 + mov DWORD PTR 16[esp],ecx + xor edx,ecx + xor edi,esi + movdqa xmm5,xmm4 + ror edx,6 + mov ecx,eax + movdqa xmm6,xmm4 + add edx,edi + mov edi,DWORD PTR 4[esp] + psrld xmm4,3 + mov esi,eax + ror ecx,9 + paddd xmm2,xmm7 + mov DWORD PTR [esp],eax + xor ecx,eax + psrld xmm6,7 + xor eax,edi + add edx,DWORD PTR 28[esp] + ror ecx,11 + and ebx,eax + pshufd xmm7,xmm1,250 + xor ecx,esi + add edx,DWORD PTR 64[esp] + pslld xmm5,14 + xor ebx,edi + ror ecx,2 + pxor xmm4,xmm6 + add ebx,edx + add edx,DWORD PTR 12[esp] + psrld xmm6,11 + add ebx,ecx + mov ecx,edx + ror edx,14 + pxor xmm4,xmm5 + mov esi,DWORD PTR 16[esp] + xor edx,ecx + pslld xmm5,11 + mov edi,DWORD PTR 20[esp] + xor esi,edi + ror edx,5 + pxor xmm4,xmm6 + and esi,ecx + mov DWORD PTR 12[esp],ecx + movdqa xmm6,xmm7 + xor edx,ecx + xor edi,esi + ror edx,6 + pxor xmm4,xmm5 + mov ecx,ebx + add edx,edi + psrld xmm7,10 + mov edi,DWORD PTR [esp] + mov esi,ebx + ror ecx,9 + paddd xmm2,xmm4 + mov DWORD PTR 28[esp],ebx + xor ecx,ebx + psrlq xmm6,17 + xor ebx,edi + add edx,DWORD PTR 24[esp] + ror ecx,11 + pxor xmm7,xmm6 + and eax,ebx + xor ecx,esi + psrlq xmm6,2 + add edx,DWORD PTR 68[esp] + xor eax,edi + ror ecx,2 + pxor xmm7,xmm6 + add eax,edx + add edx,DWORD PTR 8[esp] + pshufd xmm7,xmm7,128 + add eax,ecx + mov ecx,edx + ror edx,14 + mov esi,DWORD PTR 12[esp] + xor edx,ecx + mov edi,DWORD PTR 16[esp] + xor esi,edi + ror edx,5 + and esi,ecx + psrldq xmm7,8 + mov DWORD PTR 8[esp],ecx + xor edx,ecx + xor edi,esi + paddd xmm2,xmm7 + ror edx,6 + mov ecx,eax + add edx,edi + mov edi,DWORD PTR 28[esp] + mov esi,eax + ror ecx,9 + mov DWORD PTR 24[esp],eax + pshufd xmm7,xmm2,80 + xor ecx,eax + xor eax,edi + add edx,DWORD PTR 20[esp] + movdqa xmm6,xmm7 + ror ecx,11 + psrld xmm7,10 + and ebx,eax + psrlq xmm6,17 + xor ecx,esi + add edx,DWORD PTR 72[esp] + xor ebx,edi + ror ecx,2 + pxor xmm7,xmm6 + add ebx,edx + add edx,DWORD PTR 4[esp] + psrlq xmm6,2 + add ebx,ecx + mov ecx,edx + ror edx,14 + pxor xmm7,xmm6 + mov esi,DWORD PTR 8[esp] + xor edx,ecx + mov edi,DWORD PTR 12[esp] + pshufd xmm7,xmm7,8 + xor esi,edi + ror edx,5 + movdqa xmm6,XMMWORD PTR 32[ebp] + and esi,ecx + mov DWORD PTR 4[esp],ecx + pslldq xmm7,8 + xor edx,ecx + xor edi,esi + ror edx,6 + mov ecx,ebx + add edx,edi + mov edi,DWORD PTR 24[esp] + mov esi,ebx + ror ecx,9 + paddd xmm2,xmm7 + mov DWORD PTR 20[esp],ebx + xor ecx,ebx + xor ebx,edi + add edx,DWORD PTR 16[esp] + paddd xmm6,xmm2 + ror ecx,11 + and eax,ebx + xor ecx,esi + add edx,DWORD PTR 76[esp] + xor eax,edi + ror ecx,2 + add eax,edx + add edx,DWORD PTR [esp] + add eax,ecx + movdqa XMMWORD PTR 64[esp],xmm6 + mov ecx,edx + movdqa xmm4,xmm0 + ror edx,14 + mov esi,DWORD PTR 4[esp] + movdqa xmm7,xmm2 + xor edx,ecx + mov edi,DWORD PTR 8[esp] +DB 102,15,58,15,227,4 + xor esi,edi + ror edx,5 + and esi,ecx +DB 102,15,58,15,249,4 + mov DWORD PTR [esp],ecx + xor edx,ecx + xor edi,esi + movdqa xmm5,xmm4 + ror edx,6 + mov ecx,eax + movdqa xmm6,xmm4 + add edx,edi + mov edi,DWORD PTR 20[esp] + psrld xmm4,3 + mov esi,eax + ror ecx,9 + paddd xmm3,xmm7 + mov DWORD PTR 16[esp],eax + xor ecx,eax + psrld xmm6,7 + xor eax,edi + add edx,DWORD PTR 12[esp] + ror ecx,11 + and ebx,eax + pshufd xmm7,xmm2,250 + xor ecx,esi + add edx,DWORD PTR 80[esp] + pslld xmm5,14 + xor ebx,edi + ror ecx,2 + pxor xmm4,xmm6 + add ebx,edx + add edx,DWORD PTR 28[esp] + psrld xmm6,11 + add ebx,ecx + mov ecx,edx + ror edx,14 + pxor xmm4,xmm5 + mov esi,DWORD PTR [esp] + xor edx,ecx + pslld xmm5,11 + mov edi,DWORD PTR 4[esp] + xor esi,edi + ror edx,5 + pxor xmm4,xmm6 + and esi,ecx + mov DWORD PTR 28[esp],ecx + movdqa xmm6,xmm7 + xor edx,ecx + xor edi,esi + ror edx,6 + pxor xmm4,xmm5 + mov ecx,ebx + add edx,edi + psrld xmm7,10 + mov edi,DWORD PTR 16[esp] + mov esi,ebx + ror ecx,9 + paddd xmm3,xmm4 + mov DWORD PTR 12[esp],ebx + xor ecx,ebx + psrlq xmm6,17 + xor ebx,edi + add edx,DWORD PTR 8[esp] + ror ecx,11 + pxor xmm7,xmm6 + and eax,ebx + xor ecx,esi + psrlq xmm6,2 + add edx,DWORD PTR 84[esp] + xor eax,edi + ror ecx,2 + pxor xmm7,xmm6 + add eax,edx + add edx,DWORD PTR 24[esp] + pshufd xmm7,xmm7,128 + add eax,ecx + mov ecx,edx + ror edx,14 + mov esi,DWORD PTR 28[esp] + xor edx,ecx + mov edi,DWORD PTR [esp] + xor esi,edi + ror edx,5 + and esi,ecx + psrldq xmm7,8 + mov DWORD PTR 24[esp],ecx + xor edx,ecx + xor edi,esi + paddd xmm3,xmm7 + ror edx,6 + mov ecx,eax + add edx,edi + mov edi,DWORD PTR 12[esp] + mov esi,eax + ror ecx,9 + mov DWORD PTR 8[esp],eax + pshufd xmm7,xmm3,80 + xor ecx,eax + xor eax,edi + add edx,DWORD PTR 4[esp] + movdqa xmm6,xmm7 + ror ecx,11 + psrld xmm7,10 + and ebx,eax + psrlq xmm6,17 + xor ecx,esi + add edx,DWORD PTR 88[esp] + xor ebx,edi + ror ecx,2 + pxor xmm7,xmm6 + add ebx,edx + add edx,DWORD PTR 20[esp] + psrlq xmm6,2 + add ebx,ecx + mov ecx,edx + ror edx,14 + pxor xmm7,xmm6 + mov esi,DWORD PTR 24[esp] + xor edx,ecx + mov edi,DWORD PTR 28[esp] + pshufd xmm7,xmm7,8 + xor esi,edi + ror edx,5 + movdqa xmm6,XMMWORD PTR 48[ebp] + and esi,ecx + mov DWORD PTR 20[esp],ecx + pslldq xmm7,8 + xor edx,ecx + xor edi,esi + ror edx,6 + mov ecx,ebx + add edx,edi + mov edi,DWORD PTR 8[esp] + mov esi,ebx + ror ecx,9 + paddd xmm3,xmm7 + mov DWORD PTR 4[esp],ebx + xor ecx,ebx + xor ebx,edi + add edx,DWORD PTR [esp] + paddd xmm6,xmm3 + ror ecx,11 + and eax,ebx + xor ecx,esi + add edx,DWORD PTR 92[esp] + xor eax,edi + ror ecx,2 + add eax,edx + add edx,DWORD PTR 16[esp] + add eax,ecx + movdqa XMMWORD PTR 80[esp],xmm6 + cmp DWORD PTR 64[ebp],66051 + jne $L012ssse3_00_47 + mov ecx,edx + ror edx,14 + mov esi,DWORD PTR 20[esp] + xor edx,ecx + mov edi,DWORD PTR 24[esp] + xor esi,edi + ror edx,5 + and esi,ecx + mov DWORD PTR 16[esp],ecx + xor edx,ecx + xor edi,esi + ror edx,6 + mov ecx,eax + add edx,edi + mov edi,DWORD PTR 4[esp] + mov esi,eax + ror ecx,9 + mov DWORD PTR [esp],eax + xor ecx,eax + xor eax,edi + add edx,DWORD PTR 28[esp] + ror ecx,11 + and ebx,eax + xor ecx,esi + add edx,DWORD PTR 32[esp] + xor ebx,edi + ror ecx,2 + add ebx,edx + add edx,DWORD PTR 12[esp] + add ebx,ecx + mov ecx,edx + ror edx,14 + mov esi,DWORD PTR 16[esp] + xor edx,ecx + mov edi,DWORD PTR 20[esp] + xor esi,edi + ror edx,5 + and esi,ecx + mov DWORD PTR 12[esp],ecx + xor edx,ecx + xor edi,esi + ror edx,6 + mov ecx,ebx + add edx,edi + mov edi,DWORD PTR [esp] + mov esi,ebx + ror ecx,9 + mov DWORD PTR 28[esp],ebx + xor ecx,ebx + xor ebx,edi + add edx,DWORD PTR 24[esp] + ror ecx,11 + and eax,ebx + xor ecx,esi + add edx,DWORD PTR 36[esp] + xor eax,edi + ror ecx,2 + add eax,edx + add edx,DWORD PTR 8[esp] + add eax,ecx + mov ecx,edx + ror edx,14 + mov esi,DWORD PTR 12[esp] + xor edx,ecx + mov edi,DWORD PTR 16[esp] + xor esi,edi + ror edx,5 + and esi,ecx + mov DWORD PTR 8[esp],ecx + xor edx,ecx + xor edi,esi + ror edx,6 + mov ecx,eax + add edx,edi + mov edi,DWORD PTR 28[esp] + mov esi,eax + ror ecx,9 + mov DWORD PTR 24[esp],eax + xor ecx,eax + xor eax,edi + add edx,DWORD PTR 20[esp] + ror ecx,11 + and ebx,eax + xor ecx,esi + add edx,DWORD PTR 40[esp] + xor ebx,edi + ror ecx,2 + add ebx,edx + add edx,DWORD PTR 4[esp] + add ebx,ecx + mov ecx,edx + ror edx,14 + mov esi,DWORD PTR 8[esp] + xor edx,ecx + mov edi,DWORD PTR 12[esp] + xor esi,edi + ror edx,5 + and esi,ecx + mov DWORD PTR 4[esp],ecx + xor edx,ecx + xor edi,esi + ror edx,6 + mov ecx,ebx + add edx,edi + mov edi,DWORD PTR 24[esp] + mov esi,ebx + ror ecx,9 + mov DWORD PTR 20[esp],ebx + xor ecx,ebx + xor ebx,edi + add edx,DWORD PTR 16[esp] + ror ecx,11 + and eax,ebx + xor ecx,esi + add edx,DWORD PTR 44[esp] + xor eax,edi + ror ecx,2 + add eax,edx + add edx,DWORD PTR [esp] + add eax,ecx + mov ecx,edx + ror edx,14 + mov esi,DWORD PTR 4[esp] + xor edx,ecx + mov edi,DWORD PTR 8[esp] + xor esi,edi + ror edx,5 + and esi,ecx + mov DWORD PTR [esp],ecx + xor edx,ecx + xor edi,esi + ror edx,6 + mov ecx,eax + add edx,edi + mov edi,DWORD PTR 20[esp] + mov esi,eax + ror ecx,9 + mov DWORD PTR 16[esp],eax + xor ecx,eax + xor eax,edi + add edx,DWORD PTR 12[esp] + ror ecx,11 + and ebx,eax + xor ecx,esi + add edx,DWORD PTR 48[esp] + xor ebx,edi + ror ecx,2 + add ebx,edx + add edx,DWORD PTR 28[esp] + add ebx,ecx + mov ecx,edx + ror edx,14 + mov esi,DWORD PTR [esp] + xor edx,ecx + mov edi,DWORD PTR 4[esp] + xor esi,edi + ror edx,5 + and esi,ecx + mov DWORD PTR 28[esp],ecx + xor edx,ecx + xor edi,esi + ror edx,6 + mov ecx,ebx + add edx,edi + mov edi,DWORD PTR 16[esp] + mov esi,ebx + ror ecx,9 + mov DWORD PTR 12[esp],ebx + xor ecx,ebx + xor ebx,edi + add edx,DWORD PTR 8[esp] + ror ecx,11 + and eax,ebx + xor ecx,esi + add edx,DWORD PTR 52[esp] + xor eax,edi + ror ecx,2 + add eax,edx + add edx,DWORD PTR 24[esp] + add eax,ecx + mov ecx,edx + ror edx,14 + mov esi,DWORD PTR 28[esp] + xor edx,ecx + mov edi,DWORD PTR [esp] + xor esi,edi + ror edx,5 + and esi,ecx + mov DWORD PTR 24[esp],ecx + xor edx,ecx + xor edi,esi + ror edx,6 + mov ecx,eax + add edx,edi + mov edi,DWORD PTR 12[esp] + mov esi,eax + ror ecx,9 + mov DWORD PTR 8[esp],eax + xor ecx,eax + xor eax,edi + add edx,DWORD PTR 4[esp] + ror ecx,11 + and ebx,eax + xor ecx,esi + add edx,DWORD PTR 56[esp] + xor ebx,edi + ror ecx,2 + add ebx,edx + add edx,DWORD PTR 20[esp] + add ebx,ecx + mov ecx,edx + ror edx,14 + mov esi,DWORD PTR 24[esp] + xor edx,ecx + mov edi,DWORD PTR 28[esp] + xor esi,edi + ror edx,5 + and esi,ecx + mov DWORD PTR 20[esp],ecx + xor edx,ecx + xor edi,esi + ror edx,6 + mov ecx,ebx + add edx,edi + mov edi,DWORD PTR 8[esp] + mov esi,ebx + ror ecx,9 + mov DWORD PTR 4[esp],ebx + xor ecx,ebx + xor ebx,edi + add edx,DWORD PTR [esp] + ror ecx,11 + and eax,ebx + xor ecx,esi + add edx,DWORD PTR 60[esp] + xor eax,edi + ror ecx,2 + add eax,edx + add edx,DWORD PTR 16[esp] + add eax,ecx + mov ecx,edx + ror edx,14 + mov esi,DWORD PTR 20[esp] + xor edx,ecx + mov edi,DWORD PTR 24[esp] + xor esi,edi + ror edx,5 + and esi,ecx + mov DWORD PTR 16[esp],ecx + xor edx,ecx + xor edi,esi + ror edx,6 + mov ecx,eax + add edx,edi + mov edi,DWORD PTR 4[esp] + mov esi,eax + ror ecx,9 + mov DWORD PTR [esp],eax + xor ecx,eax + xor eax,edi + add edx,DWORD PTR 28[esp] + ror ecx,11 + and ebx,eax + xor ecx,esi + add edx,DWORD PTR 64[esp] + xor ebx,edi + ror ecx,2 + add ebx,edx + add edx,DWORD PTR 12[esp] + add ebx,ecx + mov ecx,edx + ror edx,14 + mov esi,DWORD PTR 16[esp] + xor edx,ecx + mov edi,DWORD PTR 20[esp] + xor esi,edi + ror edx,5 + and esi,ecx + mov DWORD PTR 12[esp],ecx + xor edx,ecx + xor edi,esi + ror edx,6 + mov ecx,ebx + add edx,edi + mov edi,DWORD PTR [esp] + mov esi,ebx + ror ecx,9 + mov DWORD PTR 28[esp],ebx + xor ecx,ebx + xor ebx,edi + add edx,DWORD PTR 24[esp] + ror ecx,11 + and eax,ebx + xor ecx,esi + add edx,DWORD PTR 68[esp] + xor eax,edi + ror ecx,2 + add eax,edx + add edx,DWORD PTR 8[esp] + add eax,ecx + mov ecx,edx + ror edx,14 + mov esi,DWORD PTR 12[esp] + xor edx,ecx + mov edi,DWORD PTR 16[esp] + xor esi,edi + ror edx,5 + and esi,ecx + mov DWORD PTR 8[esp],ecx + xor edx,ecx + xor edi,esi + ror edx,6 + mov ecx,eax + add edx,edi + mov edi,DWORD PTR 28[esp] + mov esi,eax + ror ecx,9 + mov DWORD PTR 24[esp],eax + xor ecx,eax + xor eax,edi + add edx,DWORD PTR 20[esp] + ror ecx,11 + and ebx,eax + xor ecx,esi + add edx,DWORD PTR 72[esp] + xor ebx,edi + ror ecx,2 + add ebx,edx + add edx,DWORD PTR 4[esp] + add ebx,ecx + mov ecx,edx + ror edx,14 + mov esi,DWORD PTR 8[esp] + xor edx,ecx + mov edi,DWORD PTR 12[esp] + xor esi,edi + ror edx,5 + and esi,ecx + mov DWORD PTR 4[esp],ecx + xor edx,ecx + xor edi,esi + ror edx,6 + mov ecx,ebx + add edx,edi + mov edi,DWORD PTR 24[esp] + mov esi,ebx + ror ecx,9 + mov DWORD PTR 20[esp],ebx + xor ecx,ebx + xor ebx,edi + add edx,DWORD PTR 16[esp] + ror ecx,11 + and eax,ebx + xor ecx,esi + add edx,DWORD PTR 76[esp] + xor eax,edi + ror ecx,2 + add eax,edx + add edx,DWORD PTR [esp] + add eax,ecx + mov ecx,edx + ror edx,14 + mov esi,DWORD PTR 4[esp] + xor edx,ecx + mov edi,DWORD PTR 8[esp] + xor esi,edi + ror edx,5 + and esi,ecx + mov DWORD PTR [esp],ecx + xor edx,ecx + xor edi,esi + ror edx,6 + mov ecx,eax + add edx,edi + mov edi,DWORD PTR 20[esp] + mov esi,eax + ror ecx,9 + mov DWORD PTR 16[esp],eax + xor ecx,eax + xor eax,edi + add edx,DWORD PTR 12[esp] + ror ecx,11 + and ebx,eax + xor ecx,esi + add edx,DWORD PTR 80[esp] + xor ebx,edi + ror ecx,2 + add ebx,edx + add edx,DWORD PTR 28[esp] + add ebx,ecx + mov ecx,edx + ror edx,14 + mov esi,DWORD PTR [esp] + xor edx,ecx + mov edi,DWORD PTR 4[esp] + xor esi,edi + ror edx,5 + and esi,ecx + mov DWORD PTR 28[esp],ecx + xor edx,ecx + xor edi,esi + ror edx,6 + mov ecx,ebx + add edx,edi + mov edi,DWORD PTR 16[esp] + mov esi,ebx + ror ecx,9 + mov DWORD PTR 12[esp],ebx + xor ecx,ebx + xor ebx,edi + add edx,DWORD PTR 8[esp] + ror ecx,11 + and eax,ebx + xor ecx,esi + add edx,DWORD PTR 84[esp] + xor eax,edi + ror ecx,2 + add eax,edx + add edx,DWORD PTR 24[esp] + add eax,ecx + mov ecx,edx + ror edx,14 + mov esi,DWORD PTR 28[esp] + xor edx,ecx + mov edi,DWORD PTR [esp] + xor esi,edi + ror edx,5 + and esi,ecx + mov DWORD PTR 24[esp],ecx + xor edx,ecx + xor edi,esi + ror edx,6 + mov ecx,eax + add edx,edi + mov edi,DWORD PTR 12[esp] + mov esi,eax + ror ecx,9 + mov DWORD PTR 8[esp],eax + xor ecx,eax + xor eax,edi + add edx,DWORD PTR 4[esp] + ror ecx,11 + and ebx,eax + xor ecx,esi + add edx,DWORD PTR 88[esp] + xor ebx,edi + ror ecx,2 + add ebx,edx + add edx,DWORD PTR 20[esp] + add ebx,ecx + mov ecx,edx + ror edx,14 + mov esi,DWORD PTR 24[esp] + xor edx,ecx + mov edi,DWORD PTR 28[esp] + xor esi,edi + ror edx,5 + and esi,ecx + mov DWORD PTR 20[esp],ecx + xor edx,ecx + xor edi,esi + ror edx,6 + mov ecx,ebx + add edx,edi + mov edi,DWORD PTR 8[esp] + mov esi,ebx + ror ecx,9 + mov DWORD PTR 4[esp],ebx + xor ecx,ebx + xor ebx,edi + add edx,DWORD PTR [esp] + ror ecx,11 + and eax,ebx + xor ecx,esi + add edx,DWORD PTR 92[esp] + xor eax,edi + ror ecx,2 + add eax,edx + add edx,DWORD PTR 16[esp] + add eax,ecx + mov esi,DWORD PTR 96[esp] + xor ebx,edi + mov ecx,DWORD PTR 12[esp] + add eax,DWORD PTR [esi] + add ebx,DWORD PTR 4[esi] + add edi,DWORD PTR 8[esi] + add ecx,DWORD PTR 12[esi] + mov DWORD PTR [esi],eax + mov DWORD PTR 4[esi],ebx + mov DWORD PTR 8[esi],edi + mov DWORD PTR 12[esi],ecx + mov DWORD PTR 4[esp],ebx + xor ebx,edi + mov DWORD PTR 8[esp],edi + mov DWORD PTR 12[esp],ecx + mov edi,DWORD PTR 20[esp] + mov ecx,DWORD PTR 24[esp] + add edx,DWORD PTR 16[esi] + add edi,DWORD PTR 20[esi] + add ecx,DWORD PTR 24[esi] + mov DWORD PTR 16[esi],edx + mov DWORD PTR 20[esi],edi + mov DWORD PTR 20[esp],edi + mov edi,DWORD PTR 28[esp] + mov DWORD PTR 24[esi],ecx + add edi,DWORD PTR 28[esi] + mov DWORD PTR 24[esp],ecx + mov DWORD PTR 28[esi],edi + mov DWORD PTR 28[esp],edi + mov edi,DWORD PTR 100[esp] + movdqa xmm7,XMMWORD PTR 64[ebp] + sub ebp,192 + cmp edi,DWORD PTR 104[esp] + jb $L011grand_ssse3 + mov esp,DWORD PTR 108[esp] + pop edi + pop esi + pop ebx + pop ebp + ret +_sha256_block_data_order ENDP +.text$ ENDS +.bss SEGMENT 'BSS' +COMM _OPENSSL_ia32cap_P:DWORD:4 +.bss ENDS +END diff --git a/deps/openssl/asm_obsolete/x86-win32-masm/sha/sha512-586.asm b/deps/openssl/asm_obsolete/x86-win32-masm/sha/sha512-586.asm new file mode 100644 index 00000000000000..9a57e5af84be78 --- /dev/null +++ b/deps/openssl/asm_obsolete/x86-win32-masm/sha/sha512-586.asm @@ -0,0 +1,2849 @@ +TITLE sha512-586.asm +IF @Version LT 800 +ECHO MASM version 8.00 or later is strongly recommended. +ENDIF +.686 +.XMM +IF @Version LT 800 +XMMWORD STRUCT 16 +DQ 2 dup (?) +XMMWORD ENDS +ENDIF + +.MODEL FLAT +OPTION DOTNAME +IF @Version LT 800 +.text$ SEGMENT PAGE 'CODE' +ELSE +.text$ SEGMENT ALIGN(64) 'CODE' +ENDIF +;EXTERN _OPENSSL_ia32cap_P:NEAR +ALIGN 16 +_sha512_block_data_order PROC PUBLIC +$L_sha512_block_data_order_begin:: + push ebp + push ebx + push esi + push edi + mov esi,DWORD PTR 20[esp] + mov edi,DWORD PTR 24[esp] + mov eax,DWORD PTR 28[esp] + mov ebx,esp + call $L000pic_point +$L000pic_point: + pop ebp + lea ebp,DWORD PTR ($L001K512-$L000pic_point)[ebp] + sub esp,16 + and esp,-64 + shl eax,7 + add eax,edi + mov DWORD PTR [esp],esi + mov DWORD PTR 4[esp],edi + mov DWORD PTR 8[esp],eax + mov DWORD PTR 12[esp],ebx + lea edx,DWORD PTR _OPENSSL_ia32cap_P + mov ecx,DWORD PTR [edx] + test ecx,67108864 + jz $L002loop_x86 + mov edx,DWORD PTR 4[edx] + movq mm0,QWORD PTR [esi] + and ecx,16777216 + movq mm1,QWORD PTR 8[esi] + and edx,512 + movq mm2,QWORD PTR 16[esi] + or ecx,edx + movq mm3,QWORD PTR 24[esi] + movq mm4,QWORD PTR 32[esi] + movq mm5,QWORD PTR 40[esi] + movq mm6,QWORD PTR 48[esi] + movq mm7,QWORD PTR 56[esi] + cmp ecx,16777728 + je $L003SSSE3 + sub esp,80 + jmp $L004loop_sse2 +ALIGN 16 +$L004loop_sse2: + movq QWORD PTR 8[esp],mm1 + movq QWORD PTR 16[esp],mm2 + movq QWORD PTR 24[esp],mm3 + movq QWORD PTR 40[esp],mm5 + movq QWORD PTR 48[esp],mm6 + pxor mm2,mm1 + movq QWORD PTR 56[esp],mm7 + movq mm3,mm0 + mov eax,DWORD PTR [edi] + mov ebx,DWORD PTR 4[edi] + add edi,8 + mov edx,15 + bswap eax + bswap ebx + jmp $L00500_14_sse2 +ALIGN 16 +$L00500_14_sse2: + movd mm1,eax + mov eax,DWORD PTR [edi] + movd mm7,ebx + mov ebx,DWORD PTR 4[edi] + add edi,8 + bswap eax + bswap ebx + punpckldq mm7,mm1 + movq mm1,mm4 + pxor mm5,mm6 + psrlq mm1,14 + movq QWORD PTR 32[esp],mm4 + pand mm5,mm4 + psllq mm4,23 + movq mm0,mm3 + movq QWORD PTR 72[esp],mm7 + movq mm3,mm1 + psrlq mm1,4 + pxor mm5,mm6 + pxor mm3,mm4 + psllq mm4,23 + pxor mm3,mm1 + movq QWORD PTR [esp],mm0 + paddq mm7,mm5 + pxor mm3,mm4 + psrlq mm1,23 + paddq mm7,QWORD PTR 56[esp] + pxor mm3,mm1 + psllq mm4,4 + paddq mm7,QWORD PTR [ebp] + pxor mm3,mm4 + movq mm4,QWORD PTR 24[esp] + paddq mm3,mm7 + movq mm5,mm0 + psrlq mm5,28 + paddq mm4,mm3 + movq mm6,mm0 + movq mm7,mm5 + psllq mm6,25 + movq mm1,QWORD PTR 8[esp] + psrlq mm5,6 + pxor mm7,mm6 + sub esp,8 + psllq mm6,5 + pxor mm7,mm5 + pxor mm0,mm1 + psrlq mm5,5 + pxor mm7,mm6 + pand mm2,mm0 + psllq mm6,6 + pxor mm7,mm5 + pxor mm2,mm1 + pxor mm6,mm7 + movq mm5,QWORD PTR 40[esp] + paddq mm3,mm2 + movq mm2,mm0 + add ebp,8 + paddq mm3,mm6 + movq mm6,QWORD PTR 48[esp] + dec edx + jnz $L00500_14_sse2 + movd mm1,eax + movd mm7,ebx + punpckldq mm7,mm1 + movq mm1,mm4 + pxor mm5,mm6 + psrlq mm1,14 + movq QWORD PTR 32[esp],mm4 + pand mm5,mm4 + psllq mm4,23 + movq mm0,mm3 + movq QWORD PTR 72[esp],mm7 + movq mm3,mm1 + psrlq mm1,4 + pxor mm5,mm6 + pxor mm3,mm4 + psllq mm4,23 + pxor mm3,mm1 + movq QWORD PTR [esp],mm0 + paddq mm7,mm5 + pxor mm3,mm4 + psrlq mm1,23 + paddq mm7,QWORD PTR 56[esp] + pxor mm3,mm1 + psllq mm4,4 + paddq mm7,QWORD PTR [ebp] + pxor mm3,mm4 + movq mm4,QWORD PTR 24[esp] + paddq mm3,mm7 + movq mm5,mm0 + psrlq mm5,28 + paddq mm4,mm3 + movq mm6,mm0 + movq mm7,mm5 + psllq mm6,25 + movq mm1,QWORD PTR 8[esp] + psrlq mm5,6 + pxor mm7,mm6 + sub esp,8 + psllq mm6,5 + pxor mm7,mm5 + pxor mm0,mm1 + psrlq mm5,5 + pxor mm7,mm6 + pand mm2,mm0 + psllq mm6,6 + pxor mm7,mm5 + pxor mm2,mm1 + pxor mm6,mm7 + movq mm7,QWORD PTR 192[esp] + paddq mm3,mm2 + movq mm2,mm0 + add ebp,8 + paddq mm3,mm6 + pxor mm0,mm0 + mov edx,32 + jmp $L00616_79_sse2 +ALIGN 16 +$L00616_79_sse2: + movq mm5,QWORD PTR 88[esp] + movq mm1,mm7 + psrlq mm7,1 + movq mm6,mm5 + psrlq mm5,6 + psllq mm1,56 + paddq mm0,mm3 + movq mm3,mm7 + psrlq mm7,6 + pxor mm3,mm1 + psllq mm1,7 + pxor mm3,mm7 + psrlq mm7,1 + pxor mm3,mm1 + movq mm1,mm5 + psrlq mm5,13 + pxor mm7,mm3 + psllq mm6,3 + pxor mm1,mm5 + paddq mm7,QWORD PTR 200[esp] + pxor mm1,mm6 + psrlq mm5,42 + paddq mm7,QWORD PTR 128[esp] + pxor mm1,mm5 + psllq mm6,42 + movq mm5,QWORD PTR 40[esp] + pxor mm1,mm6 + movq mm6,QWORD PTR 48[esp] + paddq mm7,mm1 + movq mm1,mm4 + pxor mm5,mm6 + psrlq mm1,14 + movq QWORD PTR 32[esp],mm4 + pand mm5,mm4 + psllq mm4,23 + movq QWORD PTR 72[esp],mm7 + movq mm3,mm1 + psrlq mm1,4 + pxor mm5,mm6 + pxor mm3,mm4 + psllq mm4,23 + pxor mm3,mm1 + movq QWORD PTR [esp],mm0 + paddq mm7,mm5 + pxor mm3,mm4 + psrlq mm1,23 + paddq mm7,QWORD PTR 56[esp] + pxor mm3,mm1 + psllq mm4,4 + paddq mm7,QWORD PTR [ebp] + pxor mm3,mm4 + movq mm4,QWORD PTR 24[esp] + paddq mm3,mm7 + movq mm5,mm0 + psrlq mm5,28 + paddq mm4,mm3 + movq mm6,mm0 + movq mm7,mm5 + psllq mm6,25 + movq mm1,QWORD PTR 8[esp] + psrlq mm5,6 + pxor mm7,mm6 + sub esp,8 + psllq mm6,5 + pxor mm7,mm5 + pxor mm0,mm1 + psrlq mm5,5 + pxor mm7,mm6 + pand mm2,mm0 + psllq mm6,6 + pxor mm7,mm5 + pxor mm2,mm1 + pxor mm6,mm7 + movq mm7,QWORD PTR 192[esp] + paddq mm2,mm6 + add ebp,8 + movq mm5,QWORD PTR 88[esp] + movq mm1,mm7 + psrlq mm7,1 + movq mm6,mm5 + psrlq mm5,6 + psllq mm1,56 + paddq mm2,mm3 + movq mm3,mm7 + psrlq mm7,6 + pxor mm3,mm1 + psllq mm1,7 + pxor mm3,mm7 + psrlq mm7,1 + pxor mm3,mm1 + movq mm1,mm5 + psrlq mm5,13 + pxor mm7,mm3 + psllq mm6,3 + pxor mm1,mm5 + paddq mm7,QWORD PTR 200[esp] + pxor mm1,mm6 + psrlq mm5,42 + paddq mm7,QWORD PTR 128[esp] + pxor mm1,mm5 + psllq mm6,42 + movq mm5,QWORD PTR 40[esp] + pxor mm1,mm6 + movq mm6,QWORD PTR 48[esp] + paddq mm7,mm1 + movq mm1,mm4 + pxor mm5,mm6 + psrlq mm1,14 + movq QWORD PTR 32[esp],mm4 + pand mm5,mm4 + psllq mm4,23 + movq QWORD PTR 72[esp],mm7 + movq mm3,mm1 + psrlq mm1,4 + pxor mm5,mm6 + pxor mm3,mm4 + psllq mm4,23 + pxor mm3,mm1 + movq QWORD PTR [esp],mm2 + paddq mm7,mm5 + pxor mm3,mm4 + psrlq mm1,23 + paddq mm7,QWORD PTR 56[esp] + pxor mm3,mm1 + psllq mm4,4 + paddq mm7,QWORD PTR [ebp] + pxor mm3,mm4 + movq mm4,QWORD PTR 24[esp] + paddq mm3,mm7 + movq mm5,mm2 + psrlq mm5,28 + paddq mm4,mm3 + movq mm6,mm2 + movq mm7,mm5 + psllq mm6,25 + movq mm1,QWORD PTR 8[esp] + psrlq mm5,6 + pxor mm7,mm6 + sub esp,8 + psllq mm6,5 + pxor mm7,mm5 + pxor mm2,mm1 + psrlq mm5,5 + pxor mm7,mm6 + pand mm0,mm2 + psllq mm6,6 + pxor mm7,mm5 + pxor mm0,mm1 + pxor mm6,mm7 + movq mm7,QWORD PTR 192[esp] + paddq mm0,mm6 + add ebp,8 + dec edx + jnz $L00616_79_sse2 + paddq mm0,mm3 + movq mm1,QWORD PTR 8[esp] + movq mm3,QWORD PTR 24[esp] + movq mm5,QWORD PTR 40[esp] + movq mm6,QWORD PTR 48[esp] + movq mm7,QWORD PTR 56[esp] + pxor mm2,mm1 + paddq mm0,QWORD PTR [esi] + paddq mm1,QWORD PTR 8[esi] + paddq mm2,QWORD PTR 16[esi] + paddq mm3,QWORD PTR 24[esi] + paddq mm4,QWORD PTR 32[esi] + paddq mm5,QWORD PTR 40[esi] + paddq mm6,QWORD PTR 48[esi] + paddq mm7,QWORD PTR 56[esi] + mov eax,640 + movq QWORD PTR [esi],mm0 + movq QWORD PTR 8[esi],mm1 + movq QWORD PTR 16[esi],mm2 + movq QWORD PTR 24[esi],mm3 + movq QWORD PTR 32[esi],mm4 + movq QWORD PTR 40[esi],mm5 + movq QWORD PTR 48[esi],mm6 + movq QWORD PTR 56[esi],mm7 + lea esp,DWORD PTR [eax*1+esp] + sub ebp,eax + cmp edi,DWORD PTR 88[esp] + jb $L004loop_sse2 + mov esp,DWORD PTR 92[esp] + emms + pop edi + pop esi + pop ebx + pop ebp + ret +ALIGN 32 +$L003SSSE3: + lea edx,DWORD PTR [esp-64] + sub esp,256 + movdqa xmm1,XMMWORD PTR 640[ebp] + movdqu xmm0,XMMWORD PTR [edi] +DB 102,15,56,0,193 + movdqa xmm3,XMMWORD PTR [ebp] + movdqa xmm2,xmm1 + movdqu xmm1,XMMWORD PTR 16[edi] + paddq xmm3,xmm0 +DB 102,15,56,0,202 + movdqa XMMWORD PTR [edx-128],xmm3 + movdqa xmm4,XMMWORD PTR 16[ebp] + movdqa xmm3,xmm2 + movdqu xmm2,XMMWORD PTR 32[edi] + paddq xmm4,xmm1 +DB 102,15,56,0,211 + movdqa XMMWORD PTR [edx-112],xmm4 + movdqa xmm5,XMMWORD PTR 32[ebp] + movdqa xmm4,xmm3 + movdqu xmm3,XMMWORD PTR 48[edi] + paddq xmm5,xmm2 +DB 102,15,56,0,220 + movdqa XMMWORD PTR [edx-96],xmm5 + movdqa xmm6,XMMWORD PTR 48[ebp] + movdqa xmm5,xmm4 + movdqu xmm4,XMMWORD PTR 64[edi] + paddq xmm6,xmm3 +DB 102,15,56,0,229 + movdqa XMMWORD PTR [edx-80],xmm6 + movdqa xmm7,XMMWORD PTR 64[ebp] + movdqa xmm6,xmm5 + movdqu xmm5,XMMWORD PTR 80[edi] + paddq xmm7,xmm4 +DB 102,15,56,0,238 + movdqa XMMWORD PTR [edx-64],xmm7 + movdqa XMMWORD PTR [edx],xmm0 + movdqa xmm0,XMMWORD PTR 80[ebp] + movdqa xmm7,xmm6 + movdqu xmm6,XMMWORD PTR 96[edi] + paddq xmm0,xmm5 +DB 102,15,56,0,247 + movdqa XMMWORD PTR [edx-48],xmm0 + movdqa XMMWORD PTR 16[edx],xmm1 + movdqa xmm1,XMMWORD PTR 96[ebp] + movdqa xmm0,xmm7 + movdqu xmm7,XMMWORD PTR 112[edi] + paddq xmm1,xmm6 +DB 102,15,56,0,248 + movdqa XMMWORD PTR [edx-32],xmm1 + movdqa XMMWORD PTR 32[edx],xmm2 + movdqa xmm2,XMMWORD PTR 112[ebp] + movdqa xmm0,XMMWORD PTR [edx] + paddq xmm2,xmm7 + movdqa XMMWORD PTR [edx-16],xmm2 + nop +ALIGN 32 +$L007loop_ssse3: + movdqa xmm2,XMMWORD PTR 16[edx] + movdqa XMMWORD PTR 48[edx],xmm3 + lea ebp,DWORD PTR 128[ebp] + movq QWORD PTR 8[esp],mm1 + mov ebx,edi + movq QWORD PTR 16[esp],mm2 + lea edi,DWORD PTR 128[edi] + movq QWORD PTR 24[esp],mm3 + cmp edi,eax + movq QWORD PTR 40[esp],mm5 + cmovb ebx,edi + movq QWORD PTR 48[esp],mm6 + mov ecx,4 + pxor mm2,mm1 + movq QWORD PTR 56[esp],mm7 + pxor mm3,mm3 + jmp $L00800_47_ssse3 +ALIGN 32 +$L00800_47_ssse3: + movdqa xmm3,xmm5 + movdqa xmm1,xmm2 +DB 102,15,58,15,208,8 + movdqa XMMWORD PTR [edx],xmm4 +DB 102,15,58,15,220,8 + movdqa xmm4,xmm2 + psrlq xmm2,7 + paddq xmm0,xmm3 + movdqa xmm3,xmm4 + psrlq xmm4,1 + psllq xmm3,56 + pxor xmm2,xmm4 + psrlq xmm4,7 + pxor xmm2,xmm3 + psllq xmm3,7 + pxor xmm2,xmm4 + movdqa xmm4,xmm7 + pxor xmm2,xmm3 + movdqa xmm3,xmm7 + psrlq xmm4,6 + paddq xmm0,xmm2 + movdqa xmm2,xmm7 + psrlq xmm3,19 + psllq xmm2,3 + pxor xmm4,xmm3 + psrlq xmm3,42 + pxor xmm4,xmm2 + psllq xmm2,42 + pxor xmm4,xmm3 + movdqa xmm3,XMMWORD PTR 32[edx] + pxor xmm4,xmm2 + movdqa xmm2,XMMWORD PTR [ebp] + movq mm1,mm4 + paddq xmm0,xmm4 + movq mm7,QWORD PTR [edx-128] + pxor mm5,mm6 + psrlq mm1,14 + movq QWORD PTR 32[esp],mm4 + paddq xmm2,xmm0 + pand mm5,mm4 + psllq mm4,23 + paddq mm0,mm3 + movq mm3,mm1 + psrlq mm1,4 + pxor mm5,mm6 + pxor mm3,mm4 + psllq mm4,23 + pxor mm3,mm1 + movq QWORD PTR [esp],mm0 + paddq mm7,mm5 + pxor mm3,mm4 + psrlq mm1,23 + paddq mm7,QWORD PTR 56[esp] + pxor mm3,mm1 + psllq mm4,4 + pxor mm3,mm4 + movq mm4,QWORD PTR 24[esp] + paddq mm3,mm7 + movq mm5,mm0 + psrlq mm5,28 + paddq mm4,mm3 + movq mm6,mm0 + movq mm7,mm5 + psllq mm6,25 + movq mm1,QWORD PTR 8[esp] + psrlq mm5,6 + pxor mm7,mm6 + psllq mm6,5 + pxor mm7,mm5 + pxor mm0,mm1 + psrlq mm5,5 + pxor mm7,mm6 + pand mm2,mm0 + psllq mm6,6 + pxor mm7,mm5 + pxor mm2,mm1 + pxor mm6,mm7 + movq mm5,QWORD PTR 32[esp] + paddq mm2,mm6 + movq mm6,QWORD PTR 40[esp] + movq mm1,mm4 + movq mm7,QWORD PTR [edx-120] + pxor mm5,mm6 + psrlq mm1,14 + movq QWORD PTR 24[esp],mm4 + pand mm5,mm4 + psllq mm4,23 + paddq mm2,mm3 + movq mm3,mm1 + psrlq mm1,4 + pxor mm5,mm6 + pxor mm3,mm4 + psllq mm4,23 + pxor mm3,mm1 + movq QWORD PTR 56[esp],mm2 + paddq mm7,mm5 + pxor mm3,mm4 + psrlq mm1,23 + paddq mm7,QWORD PTR 48[esp] + pxor mm3,mm1 + psllq mm4,4 + pxor mm3,mm4 + movq mm4,QWORD PTR 16[esp] + paddq mm3,mm7 + movq mm5,mm2 + psrlq mm5,28 + paddq mm4,mm3 + movq mm6,mm2 + movq mm7,mm5 + psllq mm6,25 + movq mm1,QWORD PTR [esp] + psrlq mm5,6 + pxor mm7,mm6 + psllq mm6,5 + pxor mm7,mm5 + pxor mm2,mm1 + psrlq mm5,5 + pxor mm7,mm6 + pand mm0,mm2 + psllq mm6,6 + pxor mm7,mm5 + pxor mm0,mm1 + pxor mm6,mm7 + movq mm5,QWORD PTR 24[esp] + paddq mm0,mm6 + movq mm6,QWORD PTR 32[esp] + movdqa XMMWORD PTR [edx-128],xmm2 + movdqa xmm4,xmm6 + movdqa xmm2,xmm3 +DB 102,15,58,15,217,8 + movdqa XMMWORD PTR 16[edx],xmm5 +DB 102,15,58,15,229,8 + movdqa xmm5,xmm3 + psrlq xmm3,7 + paddq xmm1,xmm4 + movdqa xmm4,xmm5 + psrlq xmm5,1 + psllq xmm4,56 + pxor xmm3,xmm5 + psrlq xmm5,7 + pxor xmm3,xmm4 + psllq xmm4,7 + pxor xmm3,xmm5 + movdqa xmm5,xmm0 + pxor xmm3,xmm4 + movdqa xmm4,xmm0 + psrlq xmm5,6 + paddq xmm1,xmm3 + movdqa xmm3,xmm0 + psrlq xmm4,19 + psllq xmm3,3 + pxor xmm5,xmm4 + psrlq xmm4,42 + pxor xmm5,xmm3 + psllq xmm3,42 + pxor xmm5,xmm4 + movdqa xmm4,XMMWORD PTR 48[edx] + pxor xmm5,xmm3 + movdqa xmm3,XMMWORD PTR 16[ebp] + movq mm1,mm4 + paddq xmm1,xmm5 + movq mm7,QWORD PTR [edx-112] + pxor mm5,mm6 + psrlq mm1,14 + movq QWORD PTR 16[esp],mm4 + paddq xmm3,xmm1 + pand mm5,mm4 + psllq mm4,23 + paddq mm0,mm3 + movq mm3,mm1 + psrlq mm1,4 + pxor mm5,mm6 + pxor mm3,mm4 + psllq mm4,23 + pxor mm3,mm1 + movq QWORD PTR 48[esp],mm0 + paddq mm7,mm5 + pxor mm3,mm4 + psrlq mm1,23 + paddq mm7,QWORD PTR 40[esp] + pxor mm3,mm1 + psllq mm4,4 + pxor mm3,mm4 + movq mm4,QWORD PTR 8[esp] + paddq mm3,mm7 + movq mm5,mm0 + psrlq mm5,28 + paddq mm4,mm3 + movq mm6,mm0 + movq mm7,mm5 + psllq mm6,25 + movq mm1,QWORD PTR 56[esp] + psrlq mm5,6 + pxor mm7,mm6 + psllq mm6,5 + pxor mm7,mm5 + pxor mm0,mm1 + psrlq mm5,5 + pxor mm7,mm6 + pand mm2,mm0 + psllq mm6,6 + pxor mm7,mm5 + pxor mm2,mm1 + pxor mm6,mm7 + movq mm5,QWORD PTR 16[esp] + paddq mm2,mm6 + movq mm6,QWORD PTR 24[esp] + movq mm1,mm4 + movq mm7,QWORD PTR [edx-104] + pxor mm5,mm6 + psrlq mm1,14 + movq QWORD PTR 8[esp],mm4 + pand mm5,mm4 + psllq mm4,23 + paddq mm2,mm3 + movq mm3,mm1 + psrlq mm1,4 + pxor mm5,mm6 + pxor mm3,mm4 + psllq mm4,23 + pxor mm3,mm1 + movq QWORD PTR 40[esp],mm2 + paddq mm7,mm5 + pxor mm3,mm4 + psrlq mm1,23 + paddq mm7,QWORD PTR 32[esp] + pxor mm3,mm1 + psllq mm4,4 + pxor mm3,mm4 + movq mm4,QWORD PTR [esp] + paddq mm3,mm7 + movq mm5,mm2 + psrlq mm5,28 + paddq mm4,mm3 + movq mm6,mm2 + movq mm7,mm5 + psllq mm6,25 + movq mm1,QWORD PTR 48[esp] + psrlq mm5,6 + pxor mm7,mm6 + psllq mm6,5 + pxor mm7,mm5 + pxor mm2,mm1 + psrlq mm5,5 + pxor mm7,mm6 + pand mm0,mm2 + psllq mm6,6 + pxor mm7,mm5 + pxor mm0,mm1 + pxor mm6,mm7 + movq mm5,QWORD PTR 8[esp] + paddq mm0,mm6 + movq mm6,QWORD PTR 16[esp] + movdqa XMMWORD PTR [edx-112],xmm3 + movdqa xmm5,xmm7 + movdqa xmm3,xmm4 +DB 102,15,58,15,226,8 + movdqa XMMWORD PTR 32[edx],xmm6 +DB 102,15,58,15,238,8 + movdqa xmm6,xmm4 + psrlq xmm4,7 + paddq xmm2,xmm5 + movdqa xmm5,xmm6 + psrlq xmm6,1 + psllq xmm5,56 + pxor xmm4,xmm6 + psrlq xmm6,7 + pxor xmm4,xmm5 + psllq xmm5,7 + pxor xmm4,xmm6 + movdqa xmm6,xmm1 + pxor xmm4,xmm5 + movdqa xmm5,xmm1 + psrlq xmm6,6 + paddq xmm2,xmm4 + movdqa xmm4,xmm1 + psrlq xmm5,19 + psllq xmm4,3 + pxor xmm6,xmm5 + psrlq xmm5,42 + pxor xmm6,xmm4 + psllq xmm4,42 + pxor xmm6,xmm5 + movdqa xmm5,XMMWORD PTR [edx] + pxor xmm6,xmm4 + movdqa xmm4,XMMWORD PTR 32[ebp] + movq mm1,mm4 + paddq xmm2,xmm6 + movq mm7,QWORD PTR [edx-96] + pxor mm5,mm6 + psrlq mm1,14 + movq QWORD PTR [esp],mm4 + paddq xmm4,xmm2 + pand mm5,mm4 + psllq mm4,23 + paddq mm0,mm3 + movq mm3,mm1 + psrlq mm1,4 + pxor mm5,mm6 + pxor mm3,mm4 + psllq mm4,23 + pxor mm3,mm1 + movq QWORD PTR 32[esp],mm0 + paddq mm7,mm5 + pxor mm3,mm4 + psrlq mm1,23 + paddq mm7,QWORD PTR 24[esp] + pxor mm3,mm1 + psllq mm4,4 + pxor mm3,mm4 + movq mm4,QWORD PTR 56[esp] + paddq mm3,mm7 + movq mm5,mm0 + psrlq mm5,28 + paddq mm4,mm3 + movq mm6,mm0 + movq mm7,mm5 + psllq mm6,25 + movq mm1,QWORD PTR 40[esp] + psrlq mm5,6 + pxor mm7,mm6 + psllq mm6,5 + pxor mm7,mm5 + pxor mm0,mm1 + psrlq mm5,5 + pxor mm7,mm6 + pand mm2,mm0 + psllq mm6,6 + pxor mm7,mm5 + pxor mm2,mm1 + pxor mm6,mm7 + movq mm5,QWORD PTR [esp] + paddq mm2,mm6 + movq mm6,QWORD PTR 8[esp] + movq mm1,mm4 + movq mm7,QWORD PTR [edx-88] + pxor mm5,mm6 + psrlq mm1,14 + movq QWORD PTR 56[esp],mm4 + pand mm5,mm4 + psllq mm4,23 + paddq mm2,mm3 + movq mm3,mm1 + psrlq mm1,4 + pxor mm5,mm6 + pxor mm3,mm4 + psllq mm4,23 + pxor mm3,mm1 + movq QWORD PTR 24[esp],mm2 + paddq mm7,mm5 + pxor mm3,mm4 + psrlq mm1,23 + paddq mm7,QWORD PTR 16[esp] + pxor mm3,mm1 + psllq mm4,4 + pxor mm3,mm4 + movq mm4,QWORD PTR 48[esp] + paddq mm3,mm7 + movq mm5,mm2 + psrlq mm5,28 + paddq mm4,mm3 + movq mm6,mm2 + movq mm7,mm5 + psllq mm6,25 + movq mm1,QWORD PTR 32[esp] + psrlq mm5,6 + pxor mm7,mm6 + psllq mm6,5 + pxor mm7,mm5 + pxor mm2,mm1 + psrlq mm5,5 + pxor mm7,mm6 + pand mm0,mm2 + psllq mm6,6 + pxor mm7,mm5 + pxor mm0,mm1 + pxor mm6,mm7 + movq mm5,QWORD PTR 56[esp] + paddq mm0,mm6 + movq mm6,QWORD PTR [esp] + movdqa XMMWORD PTR [edx-96],xmm4 + movdqa xmm6,xmm0 + movdqa xmm4,xmm5 +DB 102,15,58,15,235,8 + movdqa XMMWORD PTR 48[edx],xmm7 +DB 102,15,58,15,247,8 + movdqa xmm7,xmm5 + psrlq xmm5,7 + paddq xmm3,xmm6 + movdqa xmm6,xmm7 + psrlq xmm7,1 + psllq xmm6,56 + pxor xmm5,xmm7 + psrlq xmm7,7 + pxor xmm5,xmm6 + psllq xmm6,7 + pxor xmm5,xmm7 + movdqa xmm7,xmm2 + pxor xmm5,xmm6 + movdqa xmm6,xmm2 + psrlq xmm7,6 + paddq xmm3,xmm5 + movdqa xmm5,xmm2 + psrlq xmm6,19 + psllq xmm5,3 + pxor xmm7,xmm6 + psrlq xmm6,42 + pxor xmm7,xmm5 + psllq xmm5,42 + pxor xmm7,xmm6 + movdqa xmm6,XMMWORD PTR 16[edx] + pxor xmm7,xmm5 + movdqa xmm5,XMMWORD PTR 48[ebp] + movq mm1,mm4 + paddq xmm3,xmm7 + movq mm7,QWORD PTR [edx-80] + pxor mm5,mm6 + psrlq mm1,14 + movq QWORD PTR 48[esp],mm4 + paddq xmm5,xmm3 + pand mm5,mm4 + psllq mm4,23 + paddq mm0,mm3 + movq mm3,mm1 + psrlq mm1,4 + pxor mm5,mm6 + pxor mm3,mm4 + psllq mm4,23 + pxor mm3,mm1 + movq QWORD PTR 16[esp],mm0 + paddq mm7,mm5 + pxor mm3,mm4 + psrlq mm1,23 + paddq mm7,QWORD PTR 8[esp] + pxor mm3,mm1 + psllq mm4,4 + pxor mm3,mm4 + movq mm4,QWORD PTR 40[esp] + paddq mm3,mm7 + movq mm5,mm0 + psrlq mm5,28 + paddq mm4,mm3 + movq mm6,mm0 + movq mm7,mm5 + psllq mm6,25 + movq mm1,QWORD PTR 24[esp] + psrlq mm5,6 + pxor mm7,mm6 + psllq mm6,5 + pxor mm7,mm5 + pxor mm0,mm1 + psrlq mm5,5 + pxor mm7,mm6 + pand mm2,mm0 + psllq mm6,6 + pxor mm7,mm5 + pxor mm2,mm1 + pxor mm6,mm7 + movq mm5,QWORD PTR 48[esp] + paddq mm2,mm6 + movq mm6,QWORD PTR 56[esp] + movq mm1,mm4 + movq mm7,QWORD PTR [edx-72] + pxor mm5,mm6 + psrlq mm1,14 + movq QWORD PTR 40[esp],mm4 + pand mm5,mm4 + psllq mm4,23 + paddq mm2,mm3 + movq mm3,mm1 + psrlq mm1,4 + pxor mm5,mm6 + pxor mm3,mm4 + psllq mm4,23 + pxor mm3,mm1 + movq QWORD PTR 8[esp],mm2 + paddq mm7,mm5 + pxor mm3,mm4 + psrlq mm1,23 + paddq mm7,QWORD PTR [esp] + pxor mm3,mm1 + psllq mm4,4 + pxor mm3,mm4 + movq mm4,QWORD PTR 32[esp] + paddq mm3,mm7 + movq mm5,mm2 + psrlq mm5,28 + paddq mm4,mm3 + movq mm6,mm2 + movq mm7,mm5 + psllq mm6,25 + movq mm1,QWORD PTR 16[esp] + psrlq mm5,6 + pxor mm7,mm6 + psllq mm6,5 + pxor mm7,mm5 + pxor mm2,mm1 + psrlq mm5,5 + pxor mm7,mm6 + pand mm0,mm2 + psllq mm6,6 + pxor mm7,mm5 + pxor mm0,mm1 + pxor mm6,mm7 + movq mm5,QWORD PTR 40[esp] + paddq mm0,mm6 + movq mm6,QWORD PTR 48[esp] + movdqa XMMWORD PTR [edx-80],xmm5 + movdqa xmm7,xmm1 + movdqa xmm5,xmm6 +DB 102,15,58,15,244,8 + movdqa XMMWORD PTR [edx],xmm0 +DB 102,15,58,15,248,8 + movdqa xmm0,xmm6 + psrlq xmm6,7 + paddq xmm4,xmm7 + movdqa xmm7,xmm0 + psrlq xmm0,1 + psllq xmm7,56 + pxor xmm6,xmm0 + psrlq xmm0,7 + pxor xmm6,xmm7 + psllq xmm7,7 + pxor xmm6,xmm0 + movdqa xmm0,xmm3 + pxor xmm6,xmm7 + movdqa xmm7,xmm3 + psrlq xmm0,6 + paddq xmm4,xmm6 + movdqa xmm6,xmm3 + psrlq xmm7,19 + psllq xmm6,3 + pxor xmm0,xmm7 + psrlq xmm7,42 + pxor xmm0,xmm6 + psllq xmm6,42 + pxor xmm0,xmm7 + movdqa xmm7,XMMWORD PTR 32[edx] + pxor xmm0,xmm6 + movdqa xmm6,XMMWORD PTR 64[ebp] + movq mm1,mm4 + paddq xmm4,xmm0 + movq mm7,QWORD PTR [edx-64] + pxor mm5,mm6 + psrlq mm1,14 + movq QWORD PTR 32[esp],mm4 + paddq xmm6,xmm4 + pand mm5,mm4 + psllq mm4,23 + paddq mm0,mm3 + movq mm3,mm1 + psrlq mm1,4 + pxor mm5,mm6 + pxor mm3,mm4 + psllq mm4,23 + pxor mm3,mm1 + movq QWORD PTR [esp],mm0 + paddq mm7,mm5 + pxor mm3,mm4 + psrlq mm1,23 + paddq mm7,QWORD PTR 56[esp] + pxor mm3,mm1 + psllq mm4,4 + pxor mm3,mm4 + movq mm4,QWORD PTR 24[esp] + paddq mm3,mm7 + movq mm5,mm0 + psrlq mm5,28 + paddq mm4,mm3 + movq mm6,mm0 + movq mm7,mm5 + psllq mm6,25 + movq mm1,QWORD PTR 8[esp] + psrlq mm5,6 + pxor mm7,mm6 + psllq mm6,5 + pxor mm7,mm5 + pxor mm0,mm1 + psrlq mm5,5 + pxor mm7,mm6 + pand mm2,mm0 + psllq mm6,6 + pxor mm7,mm5 + pxor mm2,mm1 + pxor mm6,mm7 + movq mm5,QWORD PTR 32[esp] + paddq mm2,mm6 + movq mm6,QWORD PTR 40[esp] + movq mm1,mm4 + movq mm7,QWORD PTR [edx-56] + pxor mm5,mm6 + psrlq mm1,14 + movq QWORD PTR 24[esp],mm4 + pand mm5,mm4 + psllq mm4,23 + paddq mm2,mm3 + movq mm3,mm1 + psrlq mm1,4 + pxor mm5,mm6 + pxor mm3,mm4 + psllq mm4,23 + pxor mm3,mm1 + movq QWORD PTR 56[esp],mm2 + paddq mm7,mm5 + pxor mm3,mm4 + psrlq mm1,23 + paddq mm7,QWORD PTR 48[esp] + pxor mm3,mm1 + psllq mm4,4 + pxor mm3,mm4 + movq mm4,QWORD PTR 16[esp] + paddq mm3,mm7 + movq mm5,mm2 + psrlq mm5,28 + paddq mm4,mm3 + movq mm6,mm2 + movq mm7,mm5 + psllq mm6,25 + movq mm1,QWORD PTR [esp] + psrlq mm5,6 + pxor mm7,mm6 + psllq mm6,5 + pxor mm7,mm5 + pxor mm2,mm1 + psrlq mm5,5 + pxor mm7,mm6 + pand mm0,mm2 + psllq mm6,6 + pxor mm7,mm5 + pxor mm0,mm1 + pxor mm6,mm7 + movq mm5,QWORD PTR 24[esp] + paddq mm0,mm6 + movq mm6,QWORD PTR 32[esp] + movdqa XMMWORD PTR [edx-64],xmm6 + movdqa xmm0,xmm2 + movdqa xmm6,xmm7 +DB 102,15,58,15,253,8 + movdqa XMMWORD PTR 16[edx],xmm1 +DB 102,15,58,15,193,8 + movdqa xmm1,xmm7 + psrlq xmm7,7 + paddq xmm5,xmm0 + movdqa xmm0,xmm1 + psrlq xmm1,1 + psllq xmm0,56 + pxor xmm7,xmm1 + psrlq xmm1,7 + pxor xmm7,xmm0 + psllq xmm0,7 + pxor xmm7,xmm1 + movdqa xmm1,xmm4 + pxor xmm7,xmm0 + movdqa xmm0,xmm4 + psrlq xmm1,6 + paddq xmm5,xmm7 + movdqa xmm7,xmm4 + psrlq xmm0,19 + psllq xmm7,3 + pxor xmm1,xmm0 + psrlq xmm0,42 + pxor xmm1,xmm7 + psllq xmm7,42 + pxor xmm1,xmm0 + movdqa xmm0,XMMWORD PTR 48[edx] + pxor xmm1,xmm7 + movdqa xmm7,XMMWORD PTR 80[ebp] + movq mm1,mm4 + paddq xmm5,xmm1 + movq mm7,QWORD PTR [edx-48] + pxor mm5,mm6 + psrlq mm1,14 + movq QWORD PTR 16[esp],mm4 + paddq xmm7,xmm5 + pand mm5,mm4 + psllq mm4,23 + paddq mm0,mm3 + movq mm3,mm1 + psrlq mm1,4 + pxor mm5,mm6 + pxor mm3,mm4 + psllq mm4,23 + pxor mm3,mm1 + movq QWORD PTR 48[esp],mm0 + paddq mm7,mm5 + pxor mm3,mm4 + psrlq mm1,23 + paddq mm7,QWORD PTR 40[esp] + pxor mm3,mm1 + psllq mm4,4 + pxor mm3,mm4 + movq mm4,QWORD PTR 8[esp] + paddq mm3,mm7 + movq mm5,mm0 + psrlq mm5,28 + paddq mm4,mm3 + movq mm6,mm0 + movq mm7,mm5 + psllq mm6,25 + movq mm1,QWORD PTR 56[esp] + psrlq mm5,6 + pxor mm7,mm6 + psllq mm6,5 + pxor mm7,mm5 + pxor mm0,mm1 + psrlq mm5,5 + pxor mm7,mm6 + pand mm2,mm0 + psllq mm6,6 + pxor mm7,mm5 + pxor mm2,mm1 + pxor mm6,mm7 + movq mm5,QWORD PTR 16[esp] + paddq mm2,mm6 + movq mm6,QWORD PTR 24[esp] + movq mm1,mm4 + movq mm7,QWORD PTR [edx-40] + pxor mm5,mm6 + psrlq mm1,14 + movq QWORD PTR 8[esp],mm4 + pand mm5,mm4 + psllq mm4,23 + paddq mm2,mm3 + movq mm3,mm1 + psrlq mm1,4 + pxor mm5,mm6 + pxor mm3,mm4 + psllq mm4,23 + pxor mm3,mm1 + movq QWORD PTR 40[esp],mm2 + paddq mm7,mm5 + pxor mm3,mm4 + psrlq mm1,23 + paddq mm7,QWORD PTR 32[esp] + pxor mm3,mm1 + psllq mm4,4 + pxor mm3,mm4 + movq mm4,QWORD PTR [esp] + paddq mm3,mm7 + movq mm5,mm2 + psrlq mm5,28 + paddq mm4,mm3 + movq mm6,mm2 + movq mm7,mm5 + psllq mm6,25 + movq mm1,QWORD PTR 48[esp] + psrlq mm5,6 + pxor mm7,mm6 + psllq mm6,5 + pxor mm7,mm5 + pxor mm2,mm1 + psrlq mm5,5 + pxor mm7,mm6 + pand mm0,mm2 + psllq mm6,6 + pxor mm7,mm5 + pxor mm0,mm1 + pxor mm6,mm7 + movq mm5,QWORD PTR 8[esp] + paddq mm0,mm6 + movq mm6,QWORD PTR 16[esp] + movdqa XMMWORD PTR [edx-48],xmm7 + movdqa xmm1,xmm3 + movdqa xmm7,xmm0 +DB 102,15,58,15,198,8 + movdqa XMMWORD PTR 32[edx],xmm2 +DB 102,15,58,15,202,8 + movdqa xmm2,xmm0 + psrlq xmm0,7 + paddq xmm6,xmm1 + movdqa xmm1,xmm2 + psrlq xmm2,1 + psllq xmm1,56 + pxor xmm0,xmm2 + psrlq xmm2,7 + pxor xmm0,xmm1 + psllq xmm1,7 + pxor xmm0,xmm2 + movdqa xmm2,xmm5 + pxor xmm0,xmm1 + movdqa xmm1,xmm5 + psrlq xmm2,6 + paddq xmm6,xmm0 + movdqa xmm0,xmm5 + psrlq xmm1,19 + psllq xmm0,3 + pxor xmm2,xmm1 + psrlq xmm1,42 + pxor xmm2,xmm0 + psllq xmm0,42 + pxor xmm2,xmm1 + movdqa xmm1,XMMWORD PTR [edx] + pxor xmm2,xmm0 + movdqa xmm0,XMMWORD PTR 96[ebp] + movq mm1,mm4 + paddq xmm6,xmm2 + movq mm7,QWORD PTR [edx-32] + pxor mm5,mm6 + psrlq mm1,14 + movq QWORD PTR [esp],mm4 + paddq xmm0,xmm6 + pand mm5,mm4 + psllq mm4,23 + paddq mm0,mm3 + movq mm3,mm1 + psrlq mm1,4 + pxor mm5,mm6 + pxor mm3,mm4 + psllq mm4,23 + pxor mm3,mm1 + movq QWORD PTR 32[esp],mm0 + paddq mm7,mm5 + pxor mm3,mm4 + psrlq mm1,23 + paddq mm7,QWORD PTR 24[esp] + pxor mm3,mm1 + psllq mm4,4 + pxor mm3,mm4 + movq mm4,QWORD PTR 56[esp] + paddq mm3,mm7 + movq mm5,mm0 + psrlq mm5,28 + paddq mm4,mm3 + movq mm6,mm0 + movq mm7,mm5 + psllq mm6,25 + movq mm1,QWORD PTR 40[esp] + psrlq mm5,6 + pxor mm7,mm6 + psllq mm6,5 + pxor mm7,mm5 + pxor mm0,mm1 + psrlq mm5,5 + pxor mm7,mm6 + pand mm2,mm0 + psllq mm6,6 + pxor mm7,mm5 + pxor mm2,mm1 + pxor mm6,mm7 + movq mm5,QWORD PTR [esp] + paddq mm2,mm6 + movq mm6,QWORD PTR 8[esp] + movq mm1,mm4 + movq mm7,QWORD PTR [edx-24] + pxor mm5,mm6 + psrlq mm1,14 + movq QWORD PTR 56[esp],mm4 + pand mm5,mm4 + psllq mm4,23 + paddq mm2,mm3 + movq mm3,mm1 + psrlq mm1,4 + pxor mm5,mm6 + pxor mm3,mm4 + psllq mm4,23 + pxor mm3,mm1 + movq QWORD PTR 24[esp],mm2 + paddq mm7,mm5 + pxor mm3,mm4 + psrlq mm1,23 + paddq mm7,QWORD PTR 16[esp] + pxor mm3,mm1 + psllq mm4,4 + pxor mm3,mm4 + movq mm4,QWORD PTR 48[esp] + paddq mm3,mm7 + movq mm5,mm2 + psrlq mm5,28 + paddq mm4,mm3 + movq mm6,mm2 + movq mm7,mm5 + psllq mm6,25 + movq mm1,QWORD PTR 32[esp] + psrlq mm5,6 + pxor mm7,mm6 + psllq mm6,5 + pxor mm7,mm5 + pxor mm2,mm1 + psrlq mm5,5 + pxor mm7,mm6 + pand mm0,mm2 + psllq mm6,6 + pxor mm7,mm5 + pxor mm0,mm1 + pxor mm6,mm7 + movq mm5,QWORD PTR 56[esp] + paddq mm0,mm6 + movq mm6,QWORD PTR [esp] + movdqa XMMWORD PTR [edx-32],xmm0 + movdqa xmm2,xmm4 + movdqa xmm0,xmm1 +DB 102,15,58,15,207,8 + movdqa XMMWORD PTR 48[edx],xmm3 +DB 102,15,58,15,211,8 + movdqa xmm3,xmm1 + psrlq xmm1,7 + paddq xmm7,xmm2 + movdqa xmm2,xmm3 + psrlq xmm3,1 + psllq xmm2,56 + pxor xmm1,xmm3 + psrlq xmm3,7 + pxor xmm1,xmm2 + psllq xmm2,7 + pxor xmm1,xmm3 + movdqa xmm3,xmm6 + pxor xmm1,xmm2 + movdqa xmm2,xmm6 + psrlq xmm3,6 + paddq xmm7,xmm1 + movdqa xmm1,xmm6 + psrlq xmm2,19 + psllq xmm1,3 + pxor xmm3,xmm2 + psrlq xmm2,42 + pxor xmm3,xmm1 + psllq xmm1,42 + pxor xmm3,xmm2 + movdqa xmm2,XMMWORD PTR 16[edx] + pxor xmm3,xmm1 + movdqa xmm1,XMMWORD PTR 112[ebp] + movq mm1,mm4 + paddq xmm7,xmm3 + movq mm7,QWORD PTR [edx-16] + pxor mm5,mm6 + psrlq mm1,14 + movq QWORD PTR 48[esp],mm4 + paddq xmm1,xmm7 + pand mm5,mm4 + psllq mm4,23 + paddq mm0,mm3 + movq mm3,mm1 + psrlq mm1,4 + pxor mm5,mm6 + pxor mm3,mm4 + psllq mm4,23 + pxor mm3,mm1 + movq QWORD PTR 16[esp],mm0 + paddq mm7,mm5 + pxor mm3,mm4 + psrlq mm1,23 + paddq mm7,QWORD PTR 8[esp] + pxor mm3,mm1 + psllq mm4,4 + pxor mm3,mm4 + movq mm4,QWORD PTR 40[esp] + paddq mm3,mm7 + movq mm5,mm0 + psrlq mm5,28 + paddq mm4,mm3 + movq mm6,mm0 + movq mm7,mm5 + psllq mm6,25 + movq mm1,QWORD PTR 24[esp] + psrlq mm5,6 + pxor mm7,mm6 + psllq mm6,5 + pxor mm7,mm5 + pxor mm0,mm1 + psrlq mm5,5 + pxor mm7,mm6 + pand mm2,mm0 + psllq mm6,6 + pxor mm7,mm5 + pxor mm2,mm1 + pxor mm6,mm7 + movq mm5,QWORD PTR 48[esp] + paddq mm2,mm6 + movq mm6,QWORD PTR 56[esp] + movq mm1,mm4 + movq mm7,QWORD PTR [edx-8] + pxor mm5,mm6 + psrlq mm1,14 + movq QWORD PTR 40[esp],mm4 + pand mm5,mm4 + psllq mm4,23 + paddq mm2,mm3 + movq mm3,mm1 + psrlq mm1,4 + pxor mm5,mm6 + pxor mm3,mm4 + psllq mm4,23 + pxor mm3,mm1 + movq QWORD PTR 8[esp],mm2 + paddq mm7,mm5 + pxor mm3,mm4 + psrlq mm1,23 + paddq mm7,QWORD PTR [esp] + pxor mm3,mm1 + psllq mm4,4 + pxor mm3,mm4 + movq mm4,QWORD PTR 32[esp] + paddq mm3,mm7 + movq mm5,mm2 + psrlq mm5,28 + paddq mm4,mm3 + movq mm6,mm2 + movq mm7,mm5 + psllq mm6,25 + movq mm1,QWORD PTR 16[esp] + psrlq mm5,6 + pxor mm7,mm6 + psllq mm6,5 + pxor mm7,mm5 + pxor mm2,mm1 + psrlq mm5,5 + pxor mm7,mm6 + pand mm0,mm2 + psllq mm6,6 + pxor mm7,mm5 + pxor mm0,mm1 + pxor mm6,mm7 + movq mm5,QWORD PTR 40[esp] + paddq mm0,mm6 + movq mm6,QWORD PTR 48[esp] + movdqa XMMWORD PTR [edx-16],xmm1 + lea ebp,DWORD PTR 128[ebp] + dec ecx + jnz $L00800_47_ssse3 + movdqa xmm1,XMMWORD PTR [ebp] + lea ebp,DWORD PTR [ebp-640] + movdqu xmm0,XMMWORD PTR [ebx] +DB 102,15,56,0,193 + movdqa xmm3,XMMWORD PTR [ebp] + movdqa xmm2,xmm1 + movdqu xmm1,XMMWORD PTR 16[ebx] + paddq xmm3,xmm0 +DB 102,15,56,0,202 + movq mm1,mm4 + movq mm7,QWORD PTR [edx-128] + pxor mm5,mm6 + psrlq mm1,14 + movq QWORD PTR 32[esp],mm4 + pand mm5,mm4 + psllq mm4,23 + paddq mm0,mm3 + movq mm3,mm1 + psrlq mm1,4 + pxor mm5,mm6 + pxor mm3,mm4 + psllq mm4,23 + pxor mm3,mm1 + movq QWORD PTR [esp],mm0 + paddq mm7,mm5 + pxor mm3,mm4 + psrlq mm1,23 + paddq mm7,QWORD PTR 56[esp] + pxor mm3,mm1 + psllq mm4,4 + pxor mm3,mm4 + movq mm4,QWORD PTR 24[esp] + paddq mm3,mm7 + movq mm5,mm0 + psrlq mm5,28 + paddq mm4,mm3 + movq mm6,mm0 + movq mm7,mm5 + psllq mm6,25 + movq mm1,QWORD PTR 8[esp] + psrlq mm5,6 + pxor mm7,mm6 + psllq mm6,5 + pxor mm7,mm5 + pxor mm0,mm1 + psrlq mm5,5 + pxor mm7,mm6 + pand mm2,mm0 + psllq mm6,6 + pxor mm7,mm5 + pxor mm2,mm1 + pxor mm6,mm7 + movq mm5,QWORD PTR 32[esp] + paddq mm2,mm6 + movq mm6,QWORD PTR 40[esp] + movq mm1,mm4 + movq mm7,QWORD PTR [edx-120] + pxor mm5,mm6 + psrlq mm1,14 + movq QWORD PTR 24[esp],mm4 + pand mm5,mm4 + psllq mm4,23 + paddq mm2,mm3 + movq mm3,mm1 + psrlq mm1,4 + pxor mm5,mm6 + pxor mm3,mm4 + psllq mm4,23 + pxor mm3,mm1 + movq QWORD PTR 56[esp],mm2 + paddq mm7,mm5 + pxor mm3,mm4 + psrlq mm1,23 + paddq mm7,QWORD PTR 48[esp] + pxor mm3,mm1 + psllq mm4,4 + pxor mm3,mm4 + movq mm4,QWORD PTR 16[esp] + paddq mm3,mm7 + movq mm5,mm2 + psrlq mm5,28 + paddq mm4,mm3 + movq mm6,mm2 + movq mm7,mm5 + psllq mm6,25 + movq mm1,QWORD PTR [esp] + psrlq mm5,6 + pxor mm7,mm6 + psllq mm6,5 + pxor mm7,mm5 + pxor mm2,mm1 + psrlq mm5,5 + pxor mm7,mm6 + pand mm0,mm2 + psllq mm6,6 + pxor mm7,mm5 + pxor mm0,mm1 + pxor mm6,mm7 + movq mm5,QWORD PTR 24[esp] + paddq mm0,mm6 + movq mm6,QWORD PTR 32[esp] + movdqa XMMWORD PTR [edx-128],xmm3 + movdqa xmm4,XMMWORD PTR 16[ebp] + movdqa xmm3,xmm2 + movdqu xmm2,XMMWORD PTR 32[ebx] + paddq xmm4,xmm1 +DB 102,15,56,0,211 + movq mm1,mm4 + movq mm7,QWORD PTR [edx-112] + pxor mm5,mm6 + psrlq mm1,14 + movq QWORD PTR 16[esp],mm4 + pand mm5,mm4 + psllq mm4,23 + paddq mm0,mm3 + movq mm3,mm1 + psrlq mm1,4 + pxor mm5,mm6 + pxor mm3,mm4 + psllq mm4,23 + pxor mm3,mm1 + movq QWORD PTR 48[esp],mm0 + paddq mm7,mm5 + pxor mm3,mm4 + psrlq mm1,23 + paddq mm7,QWORD PTR 40[esp] + pxor mm3,mm1 + psllq mm4,4 + pxor mm3,mm4 + movq mm4,QWORD PTR 8[esp] + paddq mm3,mm7 + movq mm5,mm0 + psrlq mm5,28 + paddq mm4,mm3 + movq mm6,mm0 + movq mm7,mm5 + psllq mm6,25 + movq mm1,QWORD PTR 56[esp] + psrlq mm5,6 + pxor mm7,mm6 + psllq mm6,5 + pxor mm7,mm5 + pxor mm0,mm1 + psrlq mm5,5 + pxor mm7,mm6 + pand mm2,mm0 + psllq mm6,6 + pxor mm7,mm5 + pxor mm2,mm1 + pxor mm6,mm7 + movq mm5,QWORD PTR 16[esp] + paddq mm2,mm6 + movq mm6,QWORD PTR 24[esp] + movq mm1,mm4 + movq mm7,QWORD PTR [edx-104] + pxor mm5,mm6 + psrlq mm1,14 + movq QWORD PTR 8[esp],mm4 + pand mm5,mm4 + psllq mm4,23 + paddq mm2,mm3 + movq mm3,mm1 + psrlq mm1,4 + pxor mm5,mm6 + pxor mm3,mm4 + psllq mm4,23 + pxor mm3,mm1 + movq QWORD PTR 40[esp],mm2 + paddq mm7,mm5 + pxor mm3,mm4 + psrlq mm1,23 + paddq mm7,QWORD PTR 32[esp] + pxor mm3,mm1 + psllq mm4,4 + pxor mm3,mm4 + movq mm4,QWORD PTR [esp] + paddq mm3,mm7 + movq mm5,mm2 + psrlq mm5,28 + paddq mm4,mm3 + movq mm6,mm2 + movq mm7,mm5 + psllq mm6,25 + movq mm1,QWORD PTR 48[esp] + psrlq mm5,6 + pxor mm7,mm6 + psllq mm6,5 + pxor mm7,mm5 + pxor mm2,mm1 + psrlq mm5,5 + pxor mm7,mm6 + pand mm0,mm2 + psllq mm6,6 + pxor mm7,mm5 + pxor mm0,mm1 + pxor mm6,mm7 + movq mm5,QWORD PTR 8[esp] + paddq mm0,mm6 + movq mm6,QWORD PTR 16[esp] + movdqa XMMWORD PTR [edx-112],xmm4 + movdqa xmm5,XMMWORD PTR 32[ebp] + movdqa xmm4,xmm3 + movdqu xmm3,XMMWORD PTR 48[ebx] + paddq xmm5,xmm2 +DB 102,15,56,0,220 + movq mm1,mm4 + movq mm7,QWORD PTR [edx-96] + pxor mm5,mm6 + psrlq mm1,14 + movq QWORD PTR [esp],mm4 + pand mm5,mm4 + psllq mm4,23 + paddq mm0,mm3 + movq mm3,mm1 + psrlq mm1,4 + pxor mm5,mm6 + pxor mm3,mm4 + psllq mm4,23 + pxor mm3,mm1 + movq QWORD PTR 32[esp],mm0 + paddq mm7,mm5 + pxor mm3,mm4 + psrlq mm1,23 + paddq mm7,QWORD PTR 24[esp] + pxor mm3,mm1 + psllq mm4,4 + pxor mm3,mm4 + movq mm4,QWORD PTR 56[esp] + paddq mm3,mm7 + movq mm5,mm0 + psrlq mm5,28 + paddq mm4,mm3 + movq mm6,mm0 + movq mm7,mm5 + psllq mm6,25 + movq mm1,QWORD PTR 40[esp] + psrlq mm5,6 + pxor mm7,mm6 + psllq mm6,5 + pxor mm7,mm5 + pxor mm0,mm1 + psrlq mm5,5 + pxor mm7,mm6 + pand mm2,mm0 + psllq mm6,6 + pxor mm7,mm5 + pxor mm2,mm1 + pxor mm6,mm7 + movq mm5,QWORD PTR [esp] + paddq mm2,mm6 + movq mm6,QWORD PTR 8[esp] + movq mm1,mm4 + movq mm7,QWORD PTR [edx-88] + pxor mm5,mm6 + psrlq mm1,14 + movq QWORD PTR 56[esp],mm4 + pand mm5,mm4 + psllq mm4,23 + paddq mm2,mm3 + movq mm3,mm1 + psrlq mm1,4 + pxor mm5,mm6 + pxor mm3,mm4 + psllq mm4,23 + pxor mm3,mm1 + movq QWORD PTR 24[esp],mm2 + paddq mm7,mm5 + pxor mm3,mm4 + psrlq mm1,23 + paddq mm7,QWORD PTR 16[esp] + pxor mm3,mm1 + psllq mm4,4 + pxor mm3,mm4 + movq mm4,QWORD PTR 48[esp] + paddq mm3,mm7 + movq mm5,mm2 + psrlq mm5,28 + paddq mm4,mm3 + movq mm6,mm2 + movq mm7,mm5 + psllq mm6,25 + movq mm1,QWORD PTR 32[esp] + psrlq mm5,6 + pxor mm7,mm6 + psllq mm6,5 + pxor mm7,mm5 + pxor mm2,mm1 + psrlq mm5,5 + pxor mm7,mm6 + pand mm0,mm2 + psllq mm6,6 + pxor mm7,mm5 + pxor mm0,mm1 + pxor mm6,mm7 + movq mm5,QWORD PTR 56[esp] + paddq mm0,mm6 + movq mm6,QWORD PTR [esp] + movdqa XMMWORD PTR [edx-96],xmm5 + movdqa xmm6,XMMWORD PTR 48[ebp] + movdqa xmm5,xmm4 + movdqu xmm4,XMMWORD PTR 64[ebx] + paddq xmm6,xmm3 +DB 102,15,56,0,229 + movq mm1,mm4 + movq mm7,QWORD PTR [edx-80] + pxor mm5,mm6 + psrlq mm1,14 + movq QWORD PTR 48[esp],mm4 + pand mm5,mm4 + psllq mm4,23 + paddq mm0,mm3 + movq mm3,mm1 + psrlq mm1,4 + pxor mm5,mm6 + pxor mm3,mm4 + psllq mm4,23 + pxor mm3,mm1 + movq QWORD PTR 16[esp],mm0 + paddq mm7,mm5 + pxor mm3,mm4 + psrlq mm1,23 + paddq mm7,QWORD PTR 8[esp] + pxor mm3,mm1 + psllq mm4,4 + pxor mm3,mm4 + movq mm4,QWORD PTR 40[esp] + paddq mm3,mm7 + movq mm5,mm0 + psrlq mm5,28 + paddq mm4,mm3 + movq mm6,mm0 + movq mm7,mm5 + psllq mm6,25 + movq mm1,QWORD PTR 24[esp] + psrlq mm5,6 + pxor mm7,mm6 + psllq mm6,5 + pxor mm7,mm5 + pxor mm0,mm1 + psrlq mm5,5 + pxor mm7,mm6 + pand mm2,mm0 + psllq mm6,6 + pxor mm7,mm5 + pxor mm2,mm1 + pxor mm6,mm7 + movq mm5,QWORD PTR 48[esp] + paddq mm2,mm6 + movq mm6,QWORD PTR 56[esp] + movq mm1,mm4 + movq mm7,QWORD PTR [edx-72] + pxor mm5,mm6 + psrlq mm1,14 + movq QWORD PTR 40[esp],mm4 + pand mm5,mm4 + psllq mm4,23 + paddq mm2,mm3 + movq mm3,mm1 + psrlq mm1,4 + pxor mm5,mm6 + pxor mm3,mm4 + psllq mm4,23 + pxor mm3,mm1 + movq QWORD PTR 8[esp],mm2 + paddq mm7,mm5 + pxor mm3,mm4 + psrlq mm1,23 + paddq mm7,QWORD PTR [esp] + pxor mm3,mm1 + psllq mm4,4 + pxor mm3,mm4 + movq mm4,QWORD PTR 32[esp] + paddq mm3,mm7 + movq mm5,mm2 + psrlq mm5,28 + paddq mm4,mm3 + movq mm6,mm2 + movq mm7,mm5 + psllq mm6,25 + movq mm1,QWORD PTR 16[esp] + psrlq mm5,6 + pxor mm7,mm6 + psllq mm6,5 + pxor mm7,mm5 + pxor mm2,mm1 + psrlq mm5,5 + pxor mm7,mm6 + pand mm0,mm2 + psllq mm6,6 + pxor mm7,mm5 + pxor mm0,mm1 + pxor mm6,mm7 + movq mm5,QWORD PTR 40[esp] + paddq mm0,mm6 + movq mm6,QWORD PTR 48[esp] + movdqa XMMWORD PTR [edx-80],xmm6 + movdqa xmm7,XMMWORD PTR 64[ebp] + movdqa xmm6,xmm5 + movdqu xmm5,XMMWORD PTR 80[ebx] + paddq xmm7,xmm4 +DB 102,15,56,0,238 + movq mm1,mm4 + movq mm7,QWORD PTR [edx-64] + pxor mm5,mm6 + psrlq mm1,14 + movq QWORD PTR 32[esp],mm4 + pand mm5,mm4 + psllq mm4,23 + paddq mm0,mm3 + movq mm3,mm1 + psrlq mm1,4 + pxor mm5,mm6 + pxor mm3,mm4 + psllq mm4,23 + pxor mm3,mm1 + movq QWORD PTR [esp],mm0 + paddq mm7,mm5 + pxor mm3,mm4 + psrlq mm1,23 + paddq mm7,QWORD PTR 56[esp] + pxor mm3,mm1 + psllq mm4,4 + pxor mm3,mm4 + movq mm4,QWORD PTR 24[esp] + paddq mm3,mm7 + movq mm5,mm0 + psrlq mm5,28 + paddq mm4,mm3 + movq mm6,mm0 + movq mm7,mm5 + psllq mm6,25 + movq mm1,QWORD PTR 8[esp] + psrlq mm5,6 + pxor mm7,mm6 + psllq mm6,5 + pxor mm7,mm5 + pxor mm0,mm1 + psrlq mm5,5 + pxor mm7,mm6 + pand mm2,mm0 + psllq mm6,6 + pxor mm7,mm5 + pxor mm2,mm1 + pxor mm6,mm7 + movq mm5,QWORD PTR 32[esp] + paddq mm2,mm6 + movq mm6,QWORD PTR 40[esp] + movq mm1,mm4 + movq mm7,QWORD PTR [edx-56] + pxor mm5,mm6 + psrlq mm1,14 + movq QWORD PTR 24[esp],mm4 + pand mm5,mm4 + psllq mm4,23 + paddq mm2,mm3 + movq mm3,mm1 + psrlq mm1,4 + pxor mm5,mm6 + pxor mm3,mm4 + psllq mm4,23 + pxor mm3,mm1 + movq QWORD PTR 56[esp],mm2 + paddq mm7,mm5 + pxor mm3,mm4 + psrlq mm1,23 + paddq mm7,QWORD PTR 48[esp] + pxor mm3,mm1 + psllq mm4,4 + pxor mm3,mm4 + movq mm4,QWORD PTR 16[esp] + paddq mm3,mm7 + movq mm5,mm2 + psrlq mm5,28 + paddq mm4,mm3 + movq mm6,mm2 + movq mm7,mm5 + psllq mm6,25 + movq mm1,QWORD PTR [esp] + psrlq mm5,6 + pxor mm7,mm6 + psllq mm6,5 + pxor mm7,mm5 + pxor mm2,mm1 + psrlq mm5,5 + pxor mm7,mm6 + pand mm0,mm2 + psllq mm6,6 + pxor mm7,mm5 + pxor mm0,mm1 + pxor mm6,mm7 + movq mm5,QWORD PTR 24[esp] + paddq mm0,mm6 + movq mm6,QWORD PTR 32[esp] + movdqa XMMWORD PTR [edx-64],xmm7 + movdqa XMMWORD PTR [edx],xmm0 + movdqa xmm0,XMMWORD PTR 80[ebp] + movdqa xmm7,xmm6 + movdqu xmm6,XMMWORD PTR 96[ebx] + paddq xmm0,xmm5 +DB 102,15,56,0,247 + movq mm1,mm4 + movq mm7,QWORD PTR [edx-48] + pxor mm5,mm6 + psrlq mm1,14 + movq QWORD PTR 16[esp],mm4 + pand mm5,mm4 + psllq mm4,23 + paddq mm0,mm3 + movq mm3,mm1 + psrlq mm1,4 + pxor mm5,mm6 + pxor mm3,mm4 + psllq mm4,23 + pxor mm3,mm1 + movq QWORD PTR 48[esp],mm0 + paddq mm7,mm5 + pxor mm3,mm4 + psrlq mm1,23 + paddq mm7,QWORD PTR 40[esp] + pxor mm3,mm1 + psllq mm4,4 + pxor mm3,mm4 + movq mm4,QWORD PTR 8[esp] + paddq mm3,mm7 + movq mm5,mm0 + psrlq mm5,28 + paddq mm4,mm3 + movq mm6,mm0 + movq mm7,mm5 + psllq mm6,25 + movq mm1,QWORD PTR 56[esp] + psrlq mm5,6 + pxor mm7,mm6 + psllq mm6,5 + pxor mm7,mm5 + pxor mm0,mm1 + psrlq mm5,5 + pxor mm7,mm6 + pand mm2,mm0 + psllq mm6,6 + pxor mm7,mm5 + pxor mm2,mm1 + pxor mm6,mm7 + movq mm5,QWORD PTR 16[esp] + paddq mm2,mm6 + movq mm6,QWORD PTR 24[esp] + movq mm1,mm4 + movq mm7,QWORD PTR [edx-40] + pxor mm5,mm6 + psrlq mm1,14 + movq QWORD PTR 8[esp],mm4 + pand mm5,mm4 + psllq mm4,23 + paddq mm2,mm3 + movq mm3,mm1 + psrlq mm1,4 + pxor mm5,mm6 + pxor mm3,mm4 + psllq mm4,23 + pxor mm3,mm1 + movq QWORD PTR 40[esp],mm2 + paddq mm7,mm5 + pxor mm3,mm4 + psrlq mm1,23 + paddq mm7,QWORD PTR 32[esp] + pxor mm3,mm1 + psllq mm4,4 + pxor mm3,mm4 + movq mm4,QWORD PTR [esp] + paddq mm3,mm7 + movq mm5,mm2 + psrlq mm5,28 + paddq mm4,mm3 + movq mm6,mm2 + movq mm7,mm5 + psllq mm6,25 + movq mm1,QWORD PTR 48[esp] + psrlq mm5,6 + pxor mm7,mm6 + psllq mm6,5 + pxor mm7,mm5 + pxor mm2,mm1 + psrlq mm5,5 + pxor mm7,mm6 + pand mm0,mm2 + psllq mm6,6 + pxor mm7,mm5 + pxor mm0,mm1 + pxor mm6,mm7 + movq mm5,QWORD PTR 8[esp] + paddq mm0,mm6 + movq mm6,QWORD PTR 16[esp] + movdqa XMMWORD PTR [edx-48],xmm0 + movdqa XMMWORD PTR 16[edx],xmm1 + movdqa xmm1,XMMWORD PTR 96[ebp] + movdqa xmm0,xmm7 + movdqu xmm7,XMMWORD PTR 112[ebx] + paddq xmm1,xmm6 +DB 102,15,56,0,248 + movq mm1,mm4 + movq mm7,QWORD PTR [edx-32] + pxor mm5,mm6 + psrlq mm1,14 + movq QWORD PTR [esp],mm4 + pand mm5,mm4 + psllq mm4,23 + paddq mm0,mm3 + movq mm3,mm1 + psrlq mm1,4 + pxor mm5,mm6 + pxor mm3,mm4 + psllq mm4,23 + pxor mm3,mm1 + movq QWORD PTR 32[esp],mm0 + paddq mm7,mm5 + pxor mm3,mm4 + psrlq mm1,23 + paddq mm7,QWORD PTR 24[esp] + pxor mm3,mm1 + psllq mm4,4 + pxor mm3,mm4 + movq mm4,QWORD PTR 56[esp] + paddq mm3,mm7 + movq mm5,mm0 + psrlq mm5,28 + paddq mm4,mm3 + movq mm6,mm0 + movq mm7,mm5 + psllq mm6,25 + movq mm1,QWORD PTR 40[esp] + psrlq mm5,6 + pxor mm7,mm6 + psllq mm6,5 + pxor mm7,mm5 + pxor mm0,mm1 + psrlq mm5,5 + pxor mm7,mm6 + pand mm2,mm0 + psllq mm6,6 + pxor mm7,mm5 + pxor mm2,mm1 + pxor mm6,mm7 + movq mm5,QWORD PTR [esp] + paddq mm2,mm6 + movq mm6,QWORD PTR 8[esp] + movq mm1,mm4 + movq mm7,QWORD PTR [edx-24] + pxor mm5,mm6 + psrlq mm1,14 + movq QWORD PTR 56[esp],mm4 + pand mm5,mm4 + psllq mm4,23 + paddq mm2,mm3 + movq mm3,mm1 + psrlq mm1,4 + pxor mm5,mm6 + pxor mm3,mm4 + psllq mm4,23 + pxor mm3,mm1 + movq QWORD PTR 24[esp],mm2 + paddq mm7,mm5 + pxor mm3,mm4 + psrlq mm1,23 + paddq mm7,QWORD PTR 16[esp] + pxor mm3,mm1 + psllq mm4,4 + pxor mm3,mm4 + movq mm4,QWORD PTR 48[esp] + paddq mm3,mm7 + movq mm5,mm2 + psrlq mm5,28 + paddq mm4,mm3 + movq mm6,mm2 + movq mm7,mm5 + psllq mm6,25 + movq mm1,QWORD PTR 32[esp] + psrlq mm5,6 + pxor mm7,mm6 + psllq mm6,5 + pxor mm7,mm5 + pxor mm2,mm1 + psrlq mm5,5 + pxor mm7,mm6 + pand mm0,mm2 + psllq mm6,6 + pxor mm7,mm5 + pxor mm0,mm1 + pxor mm6,mm7 + movq mm5,QWORD PTR 56[esp] + paddq mm0,mm6 + movq mm6,QWORD PTR [esp] + movdqa XMMWORD PTR [edx-32],xmm1 + movdqa XMMWORD PTR 32[edx],xmm2 + movdqa xmm2,XMMWORD PTR 112[ebp] + movdqa xmm0,XMMWORD PTR [edx] + paddq xmm2,xmm7 + movq mm1,mm4 + movq mm7,QWORD PTR [edx-16] + pxor mm5,mm6 + psrlq mm1,14 + movq QWORD PTR 48[esp],mm4 + pand mm5,mm4 + psllq mm4,23 + paddq mm0,mm3 + movq mm3,mm1 + psrlq mm1,4 + pxor mm5,mm6 + pxor mm3,mm4 + psllq mm4,23 + pxor mm3,mm1 + movq QWORD PTR 16[esp],mm0 + paddq mm7,mm5 + pxor mm3,mm4 + psrlq mm1,23 + paddq mm7,QWORD PTR 8[esp] + pxor mm3,mm1 + psllq mm4,4 + pxor mm3,mm4 + movq mm4,QWORD PTR 40[esp] + paddq mm3,mm7 + movq mm5,mm0 + psrlq mm5,28 + paddq mm4,mm3 + movq mm6,mm0 + movq mm7,mm5 + psllq mm6,25 + movq mm1,QWORD PTR 24[esp] + psrlq mm5,6 + pxor mm7,mm6 + psllq mm6,5 + pxor mm7,mm5 + pxor mm0,mm1 + psrlq mm5,5 + pxor mm7,mm6 + pand mm2,mm0 + psllq mm6,6 + pxor mm7,mm5 + pxor mm2,mm1 + pxor mm6,mm7 + movq mm5,QWORD PTR 48[esp] + paddq mm2,mm6 + movq mm6,QWORD PTR 56[esp] + movq mm1,mm4 + movq mm7,QWORD PTR [edx-8] + pxor mm5,mm6 + psrlq mm1,14 + movq QWORD PTR 40[esp],mm4 + pand mm5,mm4 + psllq mm4,23 + paddq mm2,mm3 + movq mm3,mm1 + psrlq mm1,4 + pxor mm5,mm6 + pxor mm3,mm4 + psllq mm4,23 + pxor mm3,mm1 + movq QWORD PTR 8[esp],mm2 + paddq mm7,mm5 + pxor mm3,mm4 + psrlq mm1,23 + paddq mm7,QWORD PTR [esp] + pxor mm3,mm1 + psllq mm4,4 + pxor mm3,mm4 + movq mm4,QWORD PTR 32[esp] + paddq mm3,mm7 + movq mm5,mm2 + psrlq mm5,28 + paddq mm4,mm3 + movq mm6,mm2 + movq mm7,mm5 + psllq mm6,25 + movq mm1,QWORD PTR 16[esp] + psrlq mm5,6 + pxor mm7,mm6 + psllq mm6,5 + pxor mm7,mm5 + pxor mm2,mm1 + psrlq mm5,5 + pxor mm7,mm6 + pand mm0,mm2 + psllq mm6,6 + pxor mm7,mm5 + pxor mm0,mm1 + pxor mm6,mm7 + movq mm5,QWORD PTR 40[esp] + paddq mm0,mm6 + movq mm6,QWORD PTR 48[esp] + movdqa XMMWORD PTR [edx-16],xmm2 + movq mm1,QWORD PTR 8[esp] + paddq mm0,mm3 + movq mm3,QWORD PTR 24[esp] + movq mm7,QWORD PTR 56[esp] + pxor mm2,mm1 + paddq mm0,QWORD PTR [esi] + paddq mm1,QWORD PTR 8[esi] + paddq mm2,QWORD PTR 16[esi] + paddq mm3,QWORD PTR 24[esi] + paddq mm4,QWORD PTR 32[esi] + paddq mm5,QWORD PTR 40[esi] + paddq mm6,QWORD PTR 48[esi] + paddq mm7,QWORD PTR 56[esi] + movq QWORD PTR [esi],mm0 + movq QWORD PTR 8[esi],mm1 + movq QWORD PTR 16[esi],mm2 + movq QWORD PTR 24[esi],mm3 + movq QWORD PTR 32[esi],mm4 + movq QWORD PTR 40[esi],mm5 + movq QWORD PTR 48[esi],mm6 + movq QWORD PTR 56[esi],mm7 + cmp edi,eax + jb $L007loop_ssse3 + mov esp,DWORD PTR 76[edx] + emms + pop edi + pop esi + pop ebx + pop ebp + ret +ALIGN 16 +$L002loop_x86: + mov eax,DWORD PTR [edi] + mov ebx,DWORD PTR 4[edi] + mov ecx,DWORD PTR 8[edi] + mov edx,DWORD PTR 12[edi] + bswap eax + bswap ebx + bswap ecx + bswap edx + push eax + push ebx + push ecx + push edx + mov eax,DWORD PTR 16[edi] + mov ebx,DWORD PTR 20[edi] + mov ecx,DWORD PTR 24[edi] + mov edx,DWORD PTR 28[edi] + bswap eax + bswap ebx + bswap ecx + bswap edx + push eax + push ebx + push ecx + push edx + mov eax,DWORD PTR 32[edi] + mov ebx,DWORD PTR 36[edi] + mov ecx,DWORD PTR 40[edi] + mov edx,DWORD PTR 44[edi] + bswap eax + bswap ebx + bswap ecx + bswap edx + push eax + push ebx + push ecx + push edx + mov eax,DWORD PTR 48[edi] + mov ebx,DWORD PTR 52[edi] + mov ecx,DWORD PTR 56[edi] + mov edx,DWORD PTR 60[edi] + bswap eax + bswap ebx + bswap ecx + bswap edx + push eax + push ebx + push ecx + push edx + mov eax,DWORD PTR 64[edi] + mov ebx,DWORD PTR 68[edi] + mov ecx,DWORD PTR 72[edi] + mov edx,DWORD PTR 76[edi] + bswap eax + bswap ebx + bswap ecx + bswap edx + push eax + push ebx + push ecx + push edx + mov eax,DWORD PTR 80[edi] + mov ebx,DWORD PTR 84[edi] + mov ecx,DWORD PTR 88[edi] + mov edx,DWORD PTR 92[edi] + bswap eax + bswap ebx + bswap ecx + bswap edx + push eax + push ebx + push ecx + push edx + mov eax,DWORD PTR 96[edi] + mov ebx,DWORD PTR 100[edi] + mov ecx,DWORD PTR 104[edi] + mov edx,DWORD PTR 108[edi] + bswap eax + bswap ebx + bswap ecx + bswap edx + push eax + push ebx + push ecx + push edx + mov eax,DWORD PTR 112[edi] + mov ebx,DWORD PTR 116[edi] + mov ecx,DWORD PTR 120[edi] + mov edx,DWORD PTR 124[edi] + bswap eax + bswap ebx + bswap ecx + bswap edx + push eax + push ebx + push ecx + push edx + add edi,128 + sub esp,72 + mov DWORD PTR 204[esp],edi + lea edi,DWORD PTR 8[esp] + mov ecx,16 +DD 2784229001 +ALIGN 16 +$L00900_15_x86: + mov ecx,DWORD PTR 40[esp] + mov edx,DWORD PTR 44[esp] + mov esi,ecx + shr ecx,9 + mov edi,edx + shr edx,9 + mov ebx,ecx + shl esi,14 + mov eax,edx + shl edi,14 + xor ebx,esi + shr ecx,5 + xor eax,edi + shr edx,5 + xor eax,ecx + shl esi,4 + xor ebx,edx + shl edi,4 + xor ebx,esi + shr ecx,4 + xor eax,edi + shr edx,4 + xor eax,ecx + shl esi,5 + xor ebx,edx + shl edi,5 + xor eax,esi + xor ebx,edi + mov ecx,DWORD PTR 48[esp] + mov edx,DWORD PTR 52[esp] + mov esi,DWORD PTR 56[esp] + mov edi,DWORD PTR 60[esp] + add eax,DWORD PTR 64[esp] + adc ebx,DWORD PTR 68[esp] + xor ecx,esi + xor edx,edi + and ecx,DWORD PTR 40[esp] + and edx,DWORD PTR 44[esp] + add eax,DWORD PTR 192[esp] + adc ebx,DWORD PTR 196[esp] + xor ecx,esi + xor edx,edi + mov esi,DWORD PTR [ebp] + mov edi,DWORD PTR 4[ebp] + add eax,ecx + adc ebx,edx + mov ecx,DWORD PTR 32[esp] + mov edx,DWORD PTR 36[esp] + add eax,esi + adc ebx,edi + mov DWORD PTR [esp],eax + mov DWORD PTR 4[esp],ebx + add eax,ecx + adc ebx,edx + mov ecx,DWORD PTR 8[esp] + mov edx,DWORD PTR 12[esp] + mov DWORD PTR 32[esp],eax + mov DWORD PTR 36[esp],ebx + mov esi,ecx + shr ecx,2 + mov edi,edx + shr edx,2 + mov ebx,ecx + shl esi,4 + mov eax,edx + shl edi,4 + xor ebx,esi + shr ecx,5 + xor eax,edi + shr edx,5 + xor ebx,ecx + shl esi,21 + xor eax,edx + shl edi,21 + xor eax,esi + shr ecx,21 + xor ebx,edi + shr edx,21 + xor eax,ecx + shl esi,5 + xor ebx,edx + shl edi,5 + xor eax,esi + xor ebx,edi + mov ecx,DWORD PTR 8[esp] + mov edx,DWORD PTR 12[esp] + mov esi,DWORD PTR 16[esp] + mov edi,DWORD PTR 20[esp] + add eax,DWORD PTR [esp] + adc ebx,DWORD PTR 4[esp] + or ecx,esi + or edx,edi + and ecx,DWORD PTR 24[esp] + and edx,DWORD PTR 28[esp] + and esi,DWORD PTR 8[esp] + and edi,DWORD PTR 12[esp] + or ecx,esi + or edx,edi + add eax,ecx + adc ebx,edx + mov DWORD PTR [esp],eax + mov DWORD PTR 4[esp],ebx + mov dl,BYTE PTR [ebp] + sub esp,8 + lea ebp,DWORD PTR 8[ebp] + cmp dl,148 + jne $L00900_15_x86 +ALIGN 16 +$L01016_79_x86: + mov ecx,DWORD PTR 312[esp] + mov edx,DWORD PTR 316[esp] + mov esi,ecx + shr ecx,1 + mov edi,edx + shr edx,1 + mov eax,ecx + shl esi,24 + mov ebx,edx + shl edi,24 + xor ebx,esi + shr ecx,6 + xor eax,edi + shr edx,6 + xor eax,ecx + shl esi,7 + xor ebx,edx + shl edi,1 + xor ebx,esi + shr ecx,1 + xor eax,edi + shr edx,1 + xor eax,ecx + shl edi,6 + xor ebx,edx + xor eax,edi + mov DWORD PTR [esp],eax + mov DWORD PTR 4[esp],ebx + mov ecx,DWORD PTR 208[esp] + mov edx,DWORD PTR 212[esp] + mov esi,ecx + shr ecx,6 + mov edi,edx + shr edx,6 + mov eax,ecx + shl esi,3 + mov ebx,edx + shl edi,3 + xor eax,esi + shr ecx,13 + xor ebx,edi + shr edx,13 + xor eax,ecx + shl esi,10 + xor ebx,edx + shl edi,10 + xor ebx,esi + shr ecx,10 + xor eax,edi + shr edx,10 + xor ebx,ecx + shl edi,13 + xor eax,edx + xor eax,edi + mov ecx,DWORD PTR 320[esp] + mov edx,DWORD PTR 324[esp] + add eax,DWORD PTR [esp] + adc ebx,DWORD PTR 4[esp] + mov esi,DWORD PTR 248[esp] + mov edi,DWORD PTR 252[esp] + add eax,ecx + adc ebx,edx + add eax,esi + adc ebx,edi + mov DWORD PTR 192[esp],eax + mov DWORD PTR 196[esp],ebx + mov ecx,DWORD PTR 40[esp] + mov edx,DWORD PTR 44[esp] + mov esi,ecx + shr ecx,9 + mov edi,edx + shr edx,9 + mov ebx,ecx + shl esi,14 + mov eax,edx + shl edi,14 + xor ebx,esi + shr ecx,5 + xor eax,edi + shr edx,5 + xor eax,ecx + shl esi,4 + xor ebx,edx + shl edi,4 + xor ebx,esi + shr ecx,4 + xor eax,edi + shr edx,4 + xor eax,ecx + shl esi,5 + xor ebx,edx + shl edi,5 + xor eax,esi + xor ebx,edi + mov ecx,DWORD PTR 48[esp] + mov edx,DWORD PTR 52[esp] + mov esi,DWORD PTR 56[esp] + mov edi,DWORD PTR 60[esp] + add eax,DWORD PTR 64[esp] + adc ebx,DWORD PTR 68[esp] + xor ecx,esi + xor edx,edi + and ecx,DWORD PTR 40[esp] + and edx,DWORD PTR 44[esp] + add eax,DWORD PTR 192[esp] + adc ebx,DWORD PTR 196[esp] + xor ecx,esi + xor edx,edi + mov esi,DWORD PTR [ebp] + mov edi,DWORD PTR 4[ebp] + add eax,ecx + adc ebx,edx + mov ecx,DWORD PTR 32[esp] + mov edx,DWORD PTR 36[esp] + add eax,esi + adc ebx,edi + mov DWORD PTR [esp],eax + mov DWORD PTR 4[esp],ebx + add eax,ecx + adc ebx,edx + mov ecx,DWORD PTR 8[esp] + mov edx,DWORD PTR 12[esp] + mov DWORD PTR 32[esp],eax + mov DWORD PTR 36[esp],ebx + mov esi,ecx + shr ecx,2 + mov edi,edx + shr edx,2 + mov ebx,ecx + shl esi,4 + mov eax,edx + shl edi,4 + xor ebx,esi + shr ecx,5 + xor eax,edi + shr edx,5 + xor ebx,ecx + shl esi,21 + xor eax,edx + shl edi,21 + xor eax,esi + shr ecx,21 + xor ebx,edi + shr edx,21 + xor eax,ecx + shl esi,5 + xor ebx,edx + shl edi,5 + xor eax,esi + xor ebx,edi + mov ecx,DWORD PTR 8[esp] + mov edx,DWORD PTR 12[esp] + mov esi,DWORD PTR 16[esp] + mov edi,DWORD PTR 20[esp] + add eax,DWORD PTR [esp] + adc ebx,DWORD PTR 4[esp] + or ecx,esi + or edx,edi + and ecx,DWORD PTR 24[esp] + and edx,DWORD PTR 28[esp] + and esi,DWORD PTR 8[esp] + and edi,DWORD PTR 12[esp] + or ecx,esi + or edx,edi + add eax,ecx + adc ebx,edx + mov DWORD PTR [esp],eax + mov DWORD PTR 4[esp],ebx + mov dl,BYTE PTR [ebp] + sub esp,8 + lea ebp,DWORD PTR 8[ebp] + cmp dl,23 + jne $L01016_79_x86 + mov esi,DWORD PTR 840[esp] + mov edi,DWORD PTR 844[esp] + mov eax,DWORD PTR [esi] + mov ebx,DWORD PTR 4[esi] + mov ecx,DWORD PTR 8[esi] + mov edx,DWORD PTR 12[esi] + add eax,DWORD PTR 8[esp] + adc ebx,DWORD PTR 12[esp] + mov DWORD PTR [esi],eax + mov DWORD PTR 4[esi],ebx + add ecx,DWORD PTR 16[esp] + adc edx,DWORD PTR 20[esp] + mov DWORD PTR 8[esi],ecx + mov DWORD PTR 12[esi],edx + mov eax,DWORD PTR 16[esi] + mov ebx,DWORD PTR 20[esi] + mov ecx,DWORD PTR 24[esi] + mov edx,DWORD PTR 28[esi] + add eax,DWORD PTR 24[esp] + adc ebx,DWORD PTR 28[esp] + mov DWORD PTR 16[esi],eax + mov DWORD PTR 20[esi],ebx + add ecx,DWORD PTR 32[esp] + adc edx,DWORD PTR 36[esp] + mov DWORD PTR 24[esi],ecx + mov DWORD PTR 28[esi],edx + mov eax,DWORD PTR 32[esi] + mov ebx,DWORD PTR 36[esi] + mov ecx,DWORD PTR 40[esi] + mov edx,DWORD PTR 44[esi] + add eax,DWORD PTR 40[esp] + adc ebx,DWORD PTR 44[esp] + mov DWORD PTR 32[esi],eax + mov DWORD PTR 36[esi],ebx + add ecx,DWORD PTR 48[esp] + adc edx,DWORD PTR 52[esp] + mov DWORD PTR 40[esi],ecx + mov DWORD PTR 44[esi],edx + mov eax,DWORD PTR 48[esi] + mov ebx,DWORD PTR 52[esi] + mov ecx,DWORD PTR 56[esi] + mov edx,DWORD PTR 60[esi] + add eax,DWORD PTR 56[esp] + adc ebx,DWORD PTR 60[esp] + mov DWORD PTR 48[esi],eax + mov DWORD PTR 52[esi],ebx + add ecx,DWORD PTR 64[esp] + adc edx,DWORD PTR 68[esp] + mov DWORD PTR 56[esi],ecx + mov DWORD PTR 60[esi],edx + add esp,840 + sub ebp,640 + cmp edi,DWORD PTR 8[esp] + jb $L002loop_x86 + mov esp,DWORD PTR 12[esp] + pop edi + pop esi + pop ebx + pop ebp + ret +ALIGN 64 +$L001K512: +DD 3609767458,1116352408 +DD 602891725,1899447441 +DD 3964484399,3049323471 +DD 2173295548,3921009573 +DD 4081628472,961987163 +DD 3053834265,1508970993 +DD 2937671579,2453635748 +DD 3664609560,2870763221 +DD 2734883394,3624381080 +DD 1164996542,310598401 +DD 1323610764,607225278 +DD 3590304994,1426881987 +DD 4068182383,1925078388 +DD 991336113,2162078206 +DD 633803317,2614888103 +DD 3479774868,3248222580 +DD 2666613458,3835390401 +DD 944711139,4022224774 +DD 2341262773,264347078 +DD 2007800933,604807628 +DD 1495990901,770255983 +DD 1856431235,1249150122 +DD 3175218132,1555081692 +DD 2198950837,1996064986 +DD 3999719339,2554220882 +DD 766784016,2821834349 +DD 2566594879,2952996808 +DD 3203337956,3210313671 +DD 1034457026,3336571891 +DD 2466948901,3584528711 +DD 3758326383,113926993 +DD 168717936,338241895 +DD 1188179964,666307205 +DD 1546045734,773529912 +DD 1522805485,1294757372 +DD 2643833823,1396182291 +DD 2343527390,1695183700 +DD 1014477480,1986661051 +DD 1206759142,2177026350 +DD 344077627,2456956037 +DD 1290863460,2730485921 +DD 3158454273,2820302411 +DD 3505952657,3259730800 +DD 106217008,3345764771 +DD 3606008344,3516065817 +DD 1432725776,3600352804 +DD 1467031594,4094571909 +DD 851169720,275423344 +DD 3100823752,430227734 +DD 1363258195,506948616 +DD 3750685593,659060556 +DD 3785050280,883997877 +DD 3318307427,958139571 +DD 3812723403,1322822218 +DD 2003034995,1537002063 +DD 3602036899,1747873779 +DD 1575990012,1955562222 +DD 1125592928,2024104815 +DD 2716904306,2227730452 +DD 442776044,2361852424 +DD 593698344,2428436474 +DD 3733110249,2756734187 +DD 2999351573,3204031479 +DD 3815920427,3329325298 +DD 3928383900,3391569614 +DD 566280711,3515267271 +DD 3454069534,3940187606 +DD 4000239992,4118630271 +DD 1914138554,116418474 +DD 2731055270,174292421 +DD 3203993006,289380356 +DD 320620315,460393269 +DD 587496836,685471733 +DD 1086792851,852142971 +DD 365543100,1017036298 +DD 2618297676,1126000580 +DD 3409855158,1288033470 +DD 4234509866,1501505948 +DD 987167468,1607167915 +DD 1246189591,1816402316 +DD 67438087,66051 +DD 202182159,134810123 +_sha512_block_data_order ENDP +DB 83,72,65,53,49,50,32,98,108,111,99,107,32,116,114,97 +DB 110,115,102,111,114,109,32,102,111,114,32,120,56,54,44,32 +DB 67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97 +DB 112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103 +DB 62,0 +.text$ ENDS +.bss SEGMENT 'BSS' +COMM _OPENSSL_ia32cap_P:DWORD:4 +.bss ENDS +END diff --git a/deps/openssl/asm_obsolete/x86-win32-masm/whrlpool/wp-mmx.asm b/deps/openssl/asm_obsolete/x86-win32-masm/whrlpool/wp-mmx.asm new file mode 100644 index 00000000000000..22a17d3510940c --- /dev/null +++ b/deps/openssl/asm_obsolete/x86-win32-masm/whrlpool/wp-mmx.asm @@ -0,0 +1,1124 @@ +TITLE wp-mmx.asm +IF @Version LT 800 +ECHO MASM version 8.00 or later is strongly recommended. +ENDIF +.686 +.XMM +IF @Version LT 800 +XMMWORD STRUCT 16 +DQ 2 dup (?) +XMMWORD ENDS +ENDIF + +.MODEL FLAT +OPTION DOTNAME +IF @Version LT 800 +.text$ SEGMENT PAGE 'CODE' +ELSE +.text$ SEGMENT ALIGN(64) 'CODE' +ENDIF +ALIGN 16 +_whirlpool_block_mmx PROC PUBLIC +$L_whirlpool_block_mmx_begin:: + push ebp + push ebx + push esi + push edi + mov esi,DWORD PTR 20[esp] + mov edi,DWORD PTR 24[esp] + mov ebp,DWORD PTR 28[esp] + mov eax,esp + sub esp,148 + and esp,-64 + lea ebx,DWORD PTR 128[esp] + mov DWORD PTR [ebx],esi + mov DWORD PTR 4[ebx],edi + mov DWORD PTR 8[ebx],ebp + mov DWORD PTR 16[ebx],eax + call $L000pic_point +$L000pic_point: + pop ebp + lea ebp,DWORD PTR ($L001table-$L000pic_point)[ebp] + xor ecx,ecx + xor edx,edx + movq mm0,QWORD PTR [esi] + movq mm1,QWORD PTR 8[esi] + movq mm2,QWORD PTR 16[esi] + movq mm3,QWORD PTR 24[esi] + movq mm4,QWORD PTR 32[esi] + movq mm5,QWORD PTR 40[esi] + movq mm6,QWORD PTR 48[esi] + movq mm7,QWORD PTR 56[esi] +$L002outerloop: + movq QWORD PTR [esp],mm0 + movq QWORD PTR 8[esp],mm1 + movq QWORD PTR 16[esp],mm2 + movq QWORD PTR 24[esp],mm3 + movq QWORD PTR 32[esp],mm4 + movq QWORD PTR 40[esp],mm5 + movq QWORD PTR 48[esp],mm6 + movq QWORD PTR 56[esp],mm7 + pxor mm0,QWORD PTR [edi] + pxor mm1,QWORD PTR 8[edi] + pxor mm2,QWORD PTR 16[edi] + pxor mm3,QWORD PTR 24[edi] + pxor mm4,QWORD PTR 32[edi] + pxor mm5,QWORD PTR 40[edi] + pxor mm6,QWORD PTR 48[edi] + pxor mm7,QWORD PTR 56[edi] + movq QWORD PTR 64[esp],mm0 + movq QWORD PTR 72[esp],mm1 + movq QWORD PTR 80[esp],mm2 + movq QWORD PTR 88[esp],mm3 + movq QWORD PTR 96[esp],mm4 + movq QWORD PTR 104[esp],mm5 + movq QWORD PTR 112[esp],mm6 + movq QWORD PTR 120[esp],mm7 + xor esi,esi + mov DWORD PTR 12[ebx],esi +ALIGN 16 +$L003round: + movq mm0,QWORD PTR 4096[esi*8+ebp] + mov eax,DWORD PTR [esp] + mov ebx,DWORD PTR 4[esp] + movzx ecx,al + movzx edx,ah + shr eax,16 + lea esi,DWORD PTR [ecx*1+ecx] + movzx ecx,al + lea edi,DWORD PTR [edx*1+edx] + movzx edx,ah + pxor mm0,QWORD PTR [esi*8+ebp] + movq mm1,QWORD PTR 7[edi*8+ebp] + mov eax,DWORD PTR 8[esp] + lea esi,DWORD PTR [ecx*1+ecx] + movzx ecx,bl + lea edi,DWORD PTR [edx*1+edx] + movzx edx,bh + movq mm2,QWORD PTR 6[esi*8+ebp] + movq mm3,QWORD PTR 5[edi*8+ebp] + shr ebx,16 + lea esi,DWORD PTR [ecx*1+ecx] + movzx ecx,bl + lea edi,DWORD PTR [edx*1+edx] + movzx edx,bh + movq mm4,QWORD PTR 4[esi*8+ebp] + movq mm5,QWORD PTR 3[edi*8+ebp] + mov ebx,DWORD PTR 12[esp] + lea esi,DWORD PTR [ecx*1+ecx] + movzx ecx,al + lea edi,DWORD PTR [edx*1+edx] + movzx edx,ah + movq mm6,QWORD PTR 2[esi*8+ebp] + movq mm7,QWORD PTR 1[edi*8+ebp] + shr eax,16 + lea esi,DWORD PTR [ecx*1+ecx] + movzx ecx,al + lea edi,DWORD PTR [edx*1+edx] + movzx edx,ah + pxor mm1,QWORD PTR [esi*8+ebp] + pxor mm2,QWORD PTR 7[edi*8+ebp] + mov eax,DWORD PTR 16[esp] + lea esi,DWORD PTR [ecx*1+ecx] + movzx ecx,bl + lea edi,DWORD PTR [edx*1+edx] + movzx edx,bh + pxor mm3,QWORD PTR 6[esi*8+ebp] + pxor mm4,QWORD PTR 5[edi*8+ebp] + shr ebx,16 + lea esi,DWORD PTR [ecx*1+ecx] + movzx ecx,bl + lea edi,DWORD PTR [edx*1+edx] + movzx edx,bh + pxor mm5,QWORD PTR 4[esi*8+ebp] + pxor mm6,QWORD PTR 3[edi*8+ebp] + mov ebx,DWORD PTR 20[esp] + lea esi,DWORD PTR [ecx*1+ecx] + movzx ecx,al + lea edi,DWORD PTR [edx*1+edx] + movzx edx,ah + pxor mm7,QWORD PTR 2[esi*8+ebp] + pxor mm0,QWORD PTR 1[edi*8+ebp] + shr eax,16 + lea esi,DWORD PTR [ecx*1+ecx] + movzx ecx,al + lea edi,DWORD PTR [edx*1+edx] + movzx edx,ah + pxor mm2,QWORD PTR [esi*8+ebp] + pxor mm3,QWORD PTR 7[edi*8+ebp] + mov eax,DWORD PTR 24[esp] + lea esi,DWORD PTR [ecx*1+ecx] + movzx ecx,bl + lea edi,DWORD PTR [edx*1+edx] + movzx edx,bh + pxor mm4,QWORD PTR 6[esi*8+ebp] + pxor mm5,QWORD PTR 5[edi*8+ebp] + shr ebx,16 + lea esi,DWORD PTR [ecx*1+ecx] + movzx ecx,bl + lea edi,DWORD PTR [edx*1+edx] + movzx edx,bh + pxor mm6,QWORD PTR 4[esi*8+ebp] + pxor mm7,QWORD PTR 3[edi*8+ebp] + mov ebx,DWORD PTR 28[esp] + lea esi,DWORD PTR [ecx*1+ecx] + movzx ecx,al + lea edi,DWORD PTR [edx*1+edx] + movzx edx,ah + pxor mm0,QWORD PTR 2[esi*8+ebp] + pxor mm1,QWORD PTR 1[edi*8+ebp] + shr eax,16 + lea esi,DWORD PTR [ecx*1+ecx] + movzx ecx,al + lea edi,DWORD PTR [edx*1+edx] + movzx edx,ah + pxor mm3,QWORD PTR [esi*8+ebp] + pxor mm4,QWORD PTR 7[edi*8+ebp] + mov eax,DWORD PTR 32[esp] + lea esi,DWORD PTR [ecx*1+ecx] + movzx ecx,bl + lea edi,DWORD PTR [edx*1+edx] + movzx edx,bh + pxor mm5,QWORD PTR 6[esi*8+ebp] + pxor mm6,QWORD PTR 5[edi*8+ebp] + shr ebx,16 + lea esi,DWORD PTR [ecx*1+ecx] + movzx ecx,bl + lea edi,DWORD PTR [edx*1+edx] + movzx edx,bh + pxor mm7,QWORD PTR 4[esi*8+ebp] + pxor mm0,QWORD PTR 3[edi*8+ebp] + mov ebx,DWORD PTR 36[esp] + lea esi,DWORD PTR [ecx*1+ecx] + movzx ecx,al + lea edi,DWORD PTR [edx*1+edx] + movzx edx,ah + pxor mm1,QWORD PTR 2[esi*8+ebp] + pxor mm2,QWORD PTR 1[edi*8+ebp] + shr eax,16 + lea esi,DWORD PTR [ecx*1+ecx] + movzx ecx,al + lea edi,DWORD PTR [edx*1+edx] + movzx edx,ah + pxor mm4,QWORD PTR [esi*8+ebp] + pxor mm5,QWORD PTR 7[edi*8+ebp] + mov eax,DWORD PTR 40[esp] + lea esi,DWORD PTR [ecx*1+ecx] + movzx ecx,bl + lea edi,DWORD PTR [edx*1+edx] + movzx edx,bh + pxor mm6,QWORD PTR 6[esi*8+ebp] + pxor mm7,QWORD PTR 5[edi*8+ebp] + shr ebx,16 + lea esi,DWORD PTR [ecx*1+ecx] + movzx ecx,bl + lea edi,DWORD PTR [edx*1+edx] + movzx edx,bh + pxor mm0,QWORD PTR 4[esi*8+ebp] + pxor mm1,QWORD PTR 3[edi*8+ebp] + mov ebx,DWORD PTR 44[esp] + lea esi,DWORD PTR [ecx*1+ecx] + movzx ecx,al + lea edi,DWORD PTR [edx*1+edx] + movzx edx,ah + pxor mm2,QWORD PTR 2[esi*8+ebp] + pxor mm3,QWORD PTR 1[edi*8+ebp] + shr eax,16 + lea esi,DWORD PTR [ecx*1+ecx] + movzx ecx,al + lea edi,DWORD PTR [edx*1+edx] + movzx edx,ah + pxor mm5,QWORD PTR [esi*8+ebp] + pxor mm6,QWORD PTR 7[edi*8+ebp] + mov eax,DWORD PTR 48[esp] + lea esi,DWORD PTR [ecx*1+ecx] + movzx ecx,bl + lea edi,DWORD PTR [edx*1+edx] + movzx edx,bh + pxor mm7,QWORD PTR 6[esi*8+ebp] + pxor mm0,QWORD PTR 5[edi*8+ebp] + shr ebx,16 + lea esi,DWORD PTR [ecx*1+ecx] + movzx ecx,bl + lea edi,DWORD PTR [edx*1+edx] + movzx edx,bh + pxor mm1,QWORD PTR 4[esi*8+ebp] + pxor mm2,QWORD PTR 3[edi*8+ebp] + mov ebx,DWORD PTR 52[esp] + lea esi,DWORD PTR [ecx*1+ecx] + movzx ecx,al + lea edi,DWORD PTR [edx*1+edx] + movzx edx,ah + pxor mm3,QWORD PTR 2[esi*8+ebp] + pxor mm4,QWORD PTR 1[edi*8+ebp] + shr eax,16 + lea esi,DWORD PTR [ecx*1+ecx] + movzx ecx,al + lea edi,DWORD PTR [edx*1+edx] + movzx edx,ah + pxor mm6,QWORD PTR [esi*8+ebp] + pxor mm7,QWORD PTR 7[edi*8+ebp] + mov eax,DWORD PTR 56[esp] + lea esi,DWORD PTR [ecx*1+ecx] + movzx ecx,bl + lea edi,DWORD PTR [edx*1+edx] + movzx edx,bh + pxor mm0,QWORD PTR 6[esi*8+ebp] + pxor mm1,QWORD PTR 5[edi*8+ebp] + shr ebx,16 + lea esi,DWORD PTR [ecx*1+ecx] + movzx ecx,bl + lea edi,DWORD PTR [edx*1+edx] + movzx edx,bh + pxor mm2,QWORD PTR 4[esi*8+ebp] + pxor mm3,QWORD PTR 3[edi*8+ebp] + mov ebx,DWORD PTR 60[esp] + lea esi,DWORD PTR [ecx*1+ecx] + movzx ecx,al + lea edi,DWORD PTR [edx*1+edx] + movzx edx,ah + pxor mm4,QWORD PTR 2[esi*8+ebp] + pxor mm5,QWORD PTR 1[edi*8+ebp] + shr eax,16 + lea esi,DWORD PTR [ecx*1+ecx] + movzx ecx,al + lea edi,DWORD PTR [edx*1+edx] + movzx edx,ah + pxor mm7,QWORD PTR [esi*8+ebp] + pxor mm0,QWORD PTR 7[edi*8+ebp] + mov eax,DWORD PTR 64[esp] + lea esi,DWORD PTR [ecx*1+ecx] + movzx ecx,bl + lea edi,DWORD PTR [edx*1+edx] + movzx edx,bh + pxor mm1,QWORD PTR 6[esi*8+ebp] + pxor mm2,QWORD PTR 5[edi*8+ebp] + shr ebx,16 + lea esi,DWORD PTR [ecx*1+ecx] + movzx ecx,bl + lea edi,DWORD PTR [edx*1+edx] + movzx edx,bh + pxor mm3,QWORD PTR 4[esi*8+ebp] + pxor mm4,QWORD PTR 3[edi*8+ebp] + mov ebx,DWORD PTR 68[esp] + lea esi,DWORD PTR [ecx*1+ecx] + movzx ecx,al + lea edi,DWORD PTR [edx*1+edx] + movzx edx,ah + pxor mm5,QWORD PTR 2[esi*8+ebp] + pxor mm6,QWORD PTR 1[edi*8+ebp] + movq QWORD PTR [esp],mm0 + movq QWORD PTR 8[esp],mm1 + movq QWORD PTR 16[esp],mm2 + movq QWORD PTR 24[esp],mm3 + movq QWORD PTR 32[esp],mm4 + movq QWORD PTR 40[esp],mm5 + movq QWORD PTR 48[esp],mm6 + movq QWORD PTR 56[esp],mm7 + shr eax,16 + lea esi,DWORD PTR [ecx*1+ecx] + movzx ecx,al + lea edi,DWORD PTR [edx*1+edx] + movzx edx,ah + pxor mm0,QWORD PTR [esi*8+ebp] + pxor mm1,QWORD PTR 7[edi*8+ebp] + mov eax,DWORD PTR 72[esp] + lea esi,DWORD PTR [ecx*1+ecx] + movzx ecx,bl + lea edi,DWORD PTR [edx*1+edx] + movzx edx,bh + pxor mm2,QWORD PTR 6[esi*8+ebp] + pxor mm3,QWORD PTR 5[edi*8+ebp] + shr ebx,16 + lea esi,DWORD PTR [ecx*1+ecx] + movzx ecx,bl + lea edi,DWORD PTR [edx*1+edx] + movzx edx,bh + pxor mm4,QWORD PTR 4[esi*8+ebp] + pxor mm5,QWORD PTR 3[edi*8+ebp] + mov ebx,DWORD PTR 76[esp] + lea esi,DWORD PTR [ecx*1+ecx] + movzx ecx,al + lea edi,DWORD PTR [edx*1+edx] + movzx edx,ah + pxor mm6,QWORD PTR 2[esi*8+ebp] + pxor mm7,QWORD PTR 1[edi*8+ebp] + shr eax,16 + lea esi,DWORD PTR [ecx*1+ecx] + movzx ecx,al + lea edi,DWORD PTR [edx*1+edx] + movzx edx,ah + pxor mm1,QWORD PTR [esi*8+ebp] + pxor mm2,QWORD PTR 7[edi*8+ebp] + mov eax,DWORD PTR 80[esp] + lea esi,DWORD PTR [ecx*1+ecx] + movzx ecx,bl + lea edi,DWORD PTR [edx*1+edx] + movzx edx,bh + pxor mm3,QWORD PTR 6[esi*8+ebp] + pxor mm4,QWORD PTR 5[edi*8+ebp] + shr ebx,16 + lea esi,DWORD PTR [ecx*1+ecx] + movzx ecx,bl + lea edi,DWORD PTR [edx*1+edx] + movzx edx,bh + pxor mm5,QWORD PTR 4[esi*8+ebp] + pxor mm6,QWORD PTR 3[edi*8+ebp] + mov ebx,DWORD PTR 84[esp] + lea esi,DWORD PTR [ecx*1+ecx] + movzx ecx,al + lea edi,DWORD PTR [edx*1+edx] + movzx edx,ah + pxor mm7,QWORD PTR 2[esi*8+ebp] + pxor mm0,QWORD PTR 1[edi*8+ebp] + shr eax,16 + lea esi,DWORD PTR [ecx*1+ecx] + movzx ecx,al + lea edi,DWORD PTR [edx*1+edx] + movzx edx,ah + pxor mm2,QWORD PTR [esi*8+ebp] + pxor mm3,QWORD PTR 7[edi*8+ebp] + mov eax,DWORD PTR 88[esp] + lea esi,DWORD PTR [ecx*1+ecx] + movzx ecx,bl + lea edi,DWORD PTR [edx*1+edx] + movzx edx,bh + pxor mm4,QWORD PTR 6[esi*8+ebp] + pxor mm5,QWORD PTR 5[edi*8+ebp] + shr ebx,16 + lea esi,DWORD PTR [ecx*1+ecx] + movzx ecx,bl + lea edi,DWORD PTR [edx*1+edx] + movzx edx,bh + pxor mm6,QWORD PTR 4[esi*8+ebp] + pxor mm7,QWORD PTR 3[edi*8+ebp] + mov ebx,DWORD PTR 92[esp] + lea esi,DWORD PTR [ecx*1+ecx] + movzx ecx,al + lea edi,DWORD PTR [edx*1+edx] + movzx edx,ah + pxor mm0,QWORD PTR 2[esi*8+ebp] + pxor mm1,QWORD PTR 1[edi*8+ebp] + shr eax,16 + lea esi,DWORD PTR [ecx*1+ecx] + movzx ecx,al + lea edi,DWORD PTR [edx*1+edx] + movzx edx,ah + pxor mm3,QWORD PTR [esi*8+ebp] + pxor mm4,QWORD PTR 7[edi*8+ebp] + mov eax,DWORD PTR 96[esp] + lea esi,DWORD PTR [ecx*1+ecx] + movzx ecx,bl + lea edi,DWORD PTR [edx*1+edx] + movzx edx,bh + pxor mm5,QWORD PTR 6[esi*8+ebp] + pxor mm6,QWORD PTR 5[edi*8+ebp] + shr ebx,16 + lea esi,DWORD PTR [ecx*1+ecx] + movzx ecx,bl + lea edi,DWORD PTR [edx*1+edx] + movzx edx,bh + pxor mm7,QWORD PTR 4[esi*8+ebp] + pxor mm0,QWORD PTR 3[edi*8+ebp] + mov ebx,DWORD PTR 100[esp] + lea esi,DWORD PTR [ecx*1+ecx] + movzx ecx,al + lea edi,DWORD PTR [edx*1+edx] + movzx edx,ah + pxor mm1,QWORD PTR 2[esi*8+ebp] + pxor mm2,QWORD PTR 1[edi*8+ebp] + shr eax,16 + lea esi,DWORD PTR [ecx*1+ecx] + movzx ecx,al + lea edi,DWORD PTR [edx*1+edx] + movzx edx,ah + pxor mm4,QWORD PTR [esi*8+ebp] + pxor mm5,QWORD PTR 7[edi*8+ebp] + mov eax,DWORD PTR 104[esp] + lea esi,DWORD PTR [ecx*1+ecx] + movzx ecx,bl + lea edi,DWORD PTR [edx*1+edx] + movzx edx,bh + pxor mm6,QWORD PTR 6[esi*8+ebp] + pxor mm7,QWORD PTR 5[edi*8+ebp] + shr ebx,16 + lea esi,DWORD PTR [ecx*1+ecx] + movzx ecx,bl + lea edi,DWORD PTR [edx*1+edx] + movzx edx,bh + pxor mm0,QWORD PTR 4[esi*8+ebp] + pxor mm1,QWORD PTR 3[edi*8+ebp] + mov ebx,DWORD PTR 108[esp] + lea esi,DWORD PTR [ecx*1+ecx] + movzx ecx,al + lea edi,DWORD PTR [edx*1+edx] + movzx edx,ah + pxor mm2,QWORD PTR 2[esi*8+ebp] + pxor mm3,QWORD PTR 1[edi*8+ebp] + shr eax,16 + lea esi,DWORD PTR [ecx*1+ecx] + movzx ecx,al + lea edi,DWORD PTR [edx*1+edx] + movzx edx,ah + pxor mm5,QWORD PTR [esi*8+ebp] + pxor mm6,QWORD PTR 7[edi*8+ebp] + mov eax,DWORD PTR 112[esp] + lea esi,DWORD PTR [ecx*1+ecx] + movzx ecx,bl + lea edi,DWORD PTR [edx*1+edx] + movzx edx,bh + pxor mm7,QWORD PTR 6[esi*8+ebp] + pxor mm0,QWORD PTR 5[edi*8+ebp] + shr ebx,16 + lea esi,DWORD PTR [ecx*1+ecx] + movzx ecx,bl + lea edi,DWORD PTR [edx*1+edx] + movzx edx,bh + pxor mm1,QWORD PTR 4[esi*8+ebp] + pxor mm2,QWORD PTR 3[edi*8+ebp] + mov ebx,DWORD PTR 116[esp] + lea esi,DWORD PTR [ecx*1+ecx] + movzx ecx,al + lea edi,DWORD PTR [edx*1+edx] + movzx edx,ah + pxor mm3,QWORD PTR 2[esi*8+ebp] + pxor mm4,QWORD PTR 1[edi*8+ebp] + shr eax,16 + lea esi,DWORD PTR [ecx*1+ecx] + movzx ecx,al + lea edi,DWORD PTR [edx*1+edx] + movzx edx,ah + pxor mm6,QWORD PTR [esi*8+ebp] + pxor mm7,QWORD PTR 7[edi*8+ebp] + mov eax,DWORD PTR 120[esp] + lea esi,DWORD PTR [ecx*1+ecx] + movzx ecx,bl + lea edi,DWORD PTR [edx*1+edx] + movzx edx,bh + pxor mm0,QWORD PTR 6[esi*8+ebp] + pxor mm1,QWORD PTR 5[edi*8+ebp] + shr ebx,16 + lea esi,DWORD PTR [ecx*1+ecx] + movzx ecx,bl + lea edi,DWORD PTR [edx*1+edx] + movzx edx,bh + pxor mm2,QWORD PTR 4[esi*8+ebp] + pxor mm3,QWORD PTR 3[edi*8+ebp] + mov ebx,DWORD PTR 124[esp] + lea esi,DWORD PTR [ecx*1+ecx] + movzx ecx,al + lea edi,DWORD PTR [edx*1+edx] + movzx edx,ah + pxor mm4,QWORD PTR 2[esi*8+ebp] + pxor mm5,QWORD PTR 1[edi*8+ebp] + shr eax,16 + lea esi,DWORD PTR [ecx*1+ecx] + movzx ecx,al + lea edi,DWORD PTR [edx*1+edx] + movzx edx,ah + pxor mm7,QWORD PTR [esi*8+ebp] + pxor mm0,QWORD PTR 7[edi*8+ebp] + lea esi,DWORD PTR [ecx*1+ecx] + movzx ecx,bl + lea edi,DWORD PTR [edx*1+edx] + movzx edx,bh + pxor mm1,QWORD PTR 6[esi*8+ebp] + pxor mm2,QWORD PTR 5[edi*8+ebp] + shr ebx,16 + lea esi,DWORD PTR [ecx*1+ecx] + movzx ecx,bl + lea edi,DWORD PTR [edx*1+edx] + movzx edx,bh + pxor mm3,QWORD PTR 4[esi*8+ebp] + pxor mm4,QWORD PTR 3[edi*8+ebp] + lea esi,DWORD PTR [ecx*1+ecx] + movzx ecx,al + lea edi,DWORD PTR [edx*1+edx] + movzx edx,ah + pxor mm5,QWORD PTR 2[esi*8+ebp] + pxor mm6,QWORD PTR 1[edi*8+ebp] + lea ebx,DWORD PTR 128[esp] + mov esi,DWORD PTR 12[ebx] + add esi,1 + cmp esi,10 + je $L004roundsdone + mov DWORD PTR 12[ebx],esi + movq QWORD PTR 64[esp],mm0 + movq QWORD PTR 72[esp],mm1 + movq QWORD PTR 80[esp],mm2 + movq QWORD PTR 88[esp],mm3 + movq QWORD PTR 96[esp],mm4 + movq QWORD PTR 104[esp],mm5 + movq QWORD PTR 112[esp],mm6 + movq QWORD PTR 120[esp],mm7 + jmp $L003round +ALIGN 16 +$L004roundsdone: + mov esi,DWORD PTR [ebx] + mov edi,DWORD PTR 4[ebx] + mov eax,DWORD PTR 8[ebx] + pxor mm0,QWORD PTR [edi] + pxor mm1,QWORD PTR 8[edi] + pxor mm2,QWORD PTR 16[edi] + pxor mm3,QWORD PTR 24[edi] + pxor mm4,QWORD PTR 32[edi] + pxor mm5,QWORD PTR 40[edi] + pxor mm6,QWORD PTR 48[edi] + pxor mm7,QWORD PTR 56[edi] + pxor mm0,QWORD PTR [esi] + pxor mm1,QWORD PTR 8[esi] + pxor mm2,QWORD PTR 16[esi] + pxor mm3,QWORD PTR 24[esi] + pxor mm4,QWORD PTR 32[esi] + pxor mm5,QWORD PTR 40[esi] + pxor mm6,QWORD PTR 48[esi] + pxor mm7,QWORD PTR 56[esi] + movq QWORD PTR [esi],mm0 + movq QWORD PTR 8[esi],mm1 + movq QWORD PTR 16[esi],mm2 + movq QWORD PTR 24[esi],mm3 + movq QWORD PTR 32[esi],mm4 + movq QWORD PTR 40[esi],mm5 + movq QWORD PTR 48[esi],mm6 + movq QWORD PTR 56[esi],mm7 + lea edi,DWORD PTR 64[edi] + sub eax,1 + jz $L005alldone + mov DWORD PTR 4[ebx],edi + mov DWORD PTR 8[ebx],eax + jmp $L002outerloop +$L005alldone: + emms + mov esp,DWORD PTR 16[ebx] + pop edi + pop esi + pop ebx + pop ebp + ret +ALIGN 64 +$L001table: +DB 24,24,96,24,192,120,48,216 +DB 24,24,96,24,192,120,48,216 +DB 35,35,140,35,5,175,70,38 +DB 35,35,140,35,5,175,70,38 +DB 198,198,63,198,126,249,145,184 +DB 198,198,63,198,126,249,145,184 +DB 232,232,135,232,19,111,205,251 +DB 232,232,135,232,19,111,205,251 +DB 135,135,38,135,76,161,19,203 +DB 135,135,38,135,76,161,19,203 +DB 184,184,218,184,169,98,109,17 +DB 184,184,218,184,169,98,109,17 +DB 1,1,4,1,8,5,2,9 +DB 1,1,4,1,8,5,2,9 +DB 79,79,33,79,66,110,158,13 +DB 79,79,33,79,66,110,158,13 +DB 54,54,216,54,173,238,108,155 +DB 54,54,216,54,173,238,108,155 +DB 166,166,162,166,89,4,81,255 +DB 166,166,162,166,89,4,81,255 +DB 210,210,111,210,222,189,185,12 +DB 210,210,111,210,222,189,185,12 +DB 245,245,243,245,251,6,247,14 +DB 245,245,243,245,251,6,247,14 +DB 121,121,249,121,239,128,242,150 +DB 121,121,249,121,239,128,242,150 +DB 111,111,161,111,95,206,222,48 +DB 111,111,161,111,95,206,222,48 +DB 145,145,126,145,252,239,63,109 +DB 145,145,126,145,252,239,63,109 +DB 82,82,85,82,170,7,164,248 +DB 82,82,85,82,170,7,164,248 +DB 96,96,157,96,39,253,192,71 +DB 96,96,157,96,39,253,192,71 +DB 188,188,202,188,137,118,101,53 +DB 188,188,202,188,137,118,101,53 +DB 155,155,86,155,172,205,43,55 +DB 155,155,86,155,172,205,43,55 +DB 142,142,2,142,4,140,1,138 +DB 142,142,2,142,4,140,1,138 +DB 163,163,182,163,113,21,91,210 +DB 163,163,182,163,113,21,91,210 +DB 12,12,48,12,96,60,24,108 +DB 12,12,48,12,96,60,24,108 +DB 123,123,241,123,255,138,246,132 +DB 123,123,241,123,255,138,246,132 +DB 53,53,212,53,181,225,106,128 +DB 53,53,212,53,181,225,106,128 +DB 29,29,116,29,232,105,58,245 +DB 29,29,116,29,232,105,58,245 +DB 224,224,167,224,83,71,221,179 +DB 224,224,167,224,83,71,221,179 +DB 215,215,123,215,246,172,179,33 +DB 215,215,123,215,246,172,179,33 +DB 194,194,47,194,94,237,153,156 +DB 194,194,47,194,94,237,153,156 +DB 46,46,184,46,109,150,92,67 +DB 46,46,184,46,109,150,92,67 +DB 75,75,49,75,98,122,150,41 +DB 75,75,49,75,98,122,150,41 +DB 254,254,223,254,163,33,225,93 +DB 254,254,223,254,163,33,225,93 +DB 87,87,65,87,130,22,174,213 +DB 87,87,65,87,130,22,174,213 +DB 21,21,84,21,168,65,42,189 +DB 21,21,84,21,168,65,42,189 +DB 119,119,193,119,159,182,238,232 +DB 119,119,193,119,159,182,238,232 +DB 55,55,220,55,165,235,110,146 +DB 55,55,220,55,165,235,110,146 +DB 229,229,179,229,123,86,215,158 +DB 229,229,179,229,123,86,215,158 +DB 159,159,70,159,140,217,35,19 +DB 159,159,70,159,140,217,35,19 +DB 240,240,231,240,211,23,253,35 +DB 240,240,231,240,211,23,253,35 +DB 74,74,53,74,106,127,148,32 +DB 74,74,53,74,106,127,148,32 +DB 218,218,79,218,158,149,169,68 +DB 218,218,79,218,158,149,169,68 +DB 88,88,125,88,250,37,176,162 +DB 88,88,125,88,250,37,176,162 +DB 201,201,3,201,6,202,143,207 +DB 201,201,3,201,6,202,143,207 +DB 41,41,164,41,85,141,82,124 +DB 41,41,164,41,85,141,82,124 +DB 10,10,40,10,80,34,20,90 +DB 10,10,40,10,80,34,20,90 +DB 177,177,254,177,225,79,127,80 +DB 177,177,254,177,225,79,127,80 +DB 160,160,186,160,105,26,93,201 +DB 160,160,186,160,105,26,93,201 +DB 107,107,177,107,127,218,214,20 +DB 107,107,177,107,127,218,214,20 +DB 133,133,46,133,92,171,23,217 +DB 133,133,46,133,92,171,23,217 +DB 189,189,206,189,129,115,103,60 +DB 189,189,206,189,129,115,103,60 +DB 93,93,105,93,210,52,186,143 +DB 93,93,105,93,210,52,186,143 +DB 16,16,64,16,128,80,32,144 +DB 16,16,64,16,128,80,32,144 +DB 244,244,247,244,243,3,245,7 +DB 244,244,247,244,243,3,245,7 +DB 203,203,11,203,22,192,139,221 +DB 203,203,11,203,22,192,139,221 +DB 62,62,248,62,237,198,124,211 +DB 62,62,248,62,237,198,124,211 +DB 5,5,20,5,40,17,10,45 +DB 5,5,20,5,40,17,10,45 +DB 103,103,129,103,31,230,206,120 +DB 103,103,129,103,31,230,206,120 +DB 228,228,183,228,115,83,213,151 +DB 228,228,183,228,115,83,213,151 +DB 39,39,156,39,37,187,78,2 +DB 39,39,156,39,37,187,78,2 +DB 65,65,25,65,50,88,130,115 +DB 65,65,25,65,50,88,130,115 +DB 139,139,22,139,44,157,11,167 +DB 139,139,22,139,44,157,11,167 +DB 167,167,166,167,81,1,83,246 +DB 167,167,166,167,81,1,83,246 +DB 125,125,233,125,207,148,250,178 +DB 125,125,233,125,207,148,250,178 +DB 149,149,110,149,220,251,55,73 +DB 149,149,110,149,220,251,55,73 +DB 216,216,71,216,142,159,173,86 +DB 216,216,71,216,142,159,173,86 +DB 251,251,203,251,139,48,235,112 +DB 251,251,203,251,139,48,235,112 +DB 238,238,159,238,35,113,193,205 +DB 238,238,159,238,35,113,193,205 +DB 124,124,237,124,199,145,248,187 +DB 124,124,237,124,199,145,248,187 +DB 102,102,133,102,23,227,204,113 +DB 102,102,133,102,23,227,204,113 +DB 221,221,83,221,166,142,167,123 +DB 221,221,83,221,166,142,167,123 +DB 23,23,92,23,184,75,46,175 +DB 23,23,92,23,184,75,46,175 +DB 71,71,1,71,2,70,142,69 +DB 71,71,1,71,2,70,142,69 +DB 158,158,66,158,132,220,33,26 +DB 158,158,66,158,132,220,33,26 +DB 202,202,15,202,30,197,137,212 +DB 202,202,15,202,30,197,137,212 +DB 45,45,180,45,117,153,90,88 +DB 45,45,180,45,117,153,90,88 +DB 191,191,198,191,145,121,99,46 +DB 191,191,198,191,145,121,99,46 +DB 7,7,28,7,56,27,14,63 +DB 7,7,28,7,56,27,14,63 +DB 173,173,142,173,1,35,71,172 +DB 173,173,142,173,1,35,71,172 +DB 90,90,117,90,234,47,180,176 +DB 90,90,117,90,234,47,180,176 +DB 131,131,54,131,108,181,27,239 +DB 131,131,54,131,108,181,27,239 +DB 51,51,204,51,133,255,102,182 +DB 51,51,204,51,133,255,102,182 +DB 99,99,145,99,63,242,198,92 +DB 99,99,145,99,63,242,198,92 +DB 2,2,8,2,16,10,4,18 +DB 2,2,8,2,16,10,4,18 +DB 170,170,146,170,57,56,73,147 +DB 170,170,146,170,57,56,73,147 +DB 113,113,217,113,175,168,226,222 +DB 113,113,217,113,175,168,226,222 +DB 200,200,7,200,14,207,141,198 +DB 200,200,7,200,14,207,141,198 +DB 25,25,100,25,200,125,50,209 +DB 25,25,100,25,200,125,50,209 +DB 73,73,57,73,114,112,146,59 +DB 73,73,57,73,114,112,146,59 +DB 217,217,67,217,134,154,175,95 +DB 217,217,67,217,134,154,175,95 +DB 242,242,239,242,195,29,249,49 +DB 242,242,239,242,195,29,249,49 +DB 227,227,171,227,75,72,219,168 +DB 227,227,171,227,75,72,219,168 +DB 91,91,113,91,226,42,182,185 +DB 91,91,113,91,226,42,182,185 +DB 136,136,26,136,52,146,13,188 +DB 136,136,26,136,52,146,13,188 +DB 154,154,82,154,164,200,41,62 +DB 154,154,82,154,164,200,41,62 +DB 38,38,152,38,45,190,76,11 +DB 38,38,152,38,45,190,76,11 +DB 50,50,200,50,141,250,100,191 +DB 50,50,200,50,141,250,100,191 +DB 176,176,250,176,233,74,125,89 +DB 176,176,250,176,233,74,125,89 +DB 233,233,131,233,27,106,207,242 +DB 233,233,131,233,27,106,207,242 +DB 15,15,60,15,120,51,30,119 +DB 15,15,60,15,120,51,30,119 +DB 213,213,115,213,230,166,183,51 +DB 213,213,115,213,230,166,183,51 +DB 128,128,58,128,116,186,29,244 +DB 128,128,58,128,116,186,29,244 +DB 190,190,194,190,153,124,97,39 +DB 190,190,194,190,153,124,97,39 +DB 205,205,19,205,38,222,135,235 +DB 205,205,19,205,38,222,135,235 +DB 52,52,208,52,189,228,104,137 +DB 52,52,208,52,189,228,104,137 +DB 72,72,61,72,122,117,144,50 +DB 72,72,61,72,122,117,144,50 +DB 255,255,219,255,171,36,227,84 +DB 255,255,219,255,171,36,227,84 +DB 122,122,245,122,247,143,244,141 +DB 122,122,245,122,247,143,244,141 +DB 144,144,122,144,244,234,61,100 +DB 144,144,122,144,244,234,61,100 +DB 95,95,97,95,194,62,190,157 +DB 95,95,97,95,194,62,190,157 +DB 32,32,128,32,29,160,64,61 +DB 32,32,128,32,29,160,64,61 +DB 104,104,189,104,103,213,208,15 +DB 104,104,189,104,103,213,208,15 +DB 26,26,104,26,208,114,52,202 +DB 26,26,104,26,208,114,52,202 +DB 174,174,130,174,25,44,65,183 +DB 174,174,130,174,25,44,65,183 +DB 180,180,234,180,201,94,117,125 +DB 180,180,234,180,201,94,117,125 +DB 84,84,77,84,154,25,168,206 +DB 84,84,77,84,154,25,168,206 +DB 147,147,118,147,236,229,59,127 +DB 147,147,118,147,236,229,59,127 +DB 34,34,136,34,13,170,68,47 +DB 34,34,136,34,13,170,68,47 +DB 100,100,141,100,7,233,200,99 +DB 100,100,141,100,7,233,200,99 +DB 241,241,227,241,219,18,255,42 +DB 241,241,227,241,219,18,255,42 +DB 115,115,209,115,191,162,230,204 +DB 115,115,209,115,191,162,230,204 +DB 18,18,72,18,144,90,36,130 +DB 18,18,72,18,144,90,36,130 +DB 64,64,29,64,58,93,128,122 +DB 64,64,29,64,58,93,128,122 +DB 8,8,32,8,64,40,16,72 +DB 8,8,32,8,64,40,16,72 +DB 195,195,43,195,86,232,155,149 +DB 195,195,43,195,86,232,155,149 +DB 236,236,151,236,51,123,197,223 +DB 236,236,151,236,51,123,197,223 +DB 219,219,75,219,150,144,171,77 +DB 219,219,75,219,150,144,171,77 +DB 161,161,190,161,97,31,95,192 +DB 161,161,190,161,97,31,95,192 +DB 141,141,14,141,28,131,7,145 +DB 141,141,14,141,28,131,7,145 +DB 61,61,244,61,245,201,122,200 +DB 61,61,244,61,245,201,122,200 +DB 151,151,102,151,204,241,51,91 +DB 151,151,102,151,204,241,51,91 +DB 0,0,0,0,0,0,0,0 +DB 0,0,0,0,0,0,0,0 +DB 207,207,27,207,54,212,131,249 +DB 207,207,27,207,54,212,131,249 +DB 43,43,172,43,69,135,86,110 +DB 43,43,172,43,69,135,86,110 +DB 118,118,197,118,151,179,236,225 +DB 118,118,197,118,151,179,236,225 +DB 130,130,50,130,100,176,25,230 +DB 130,130,50,130,100,176,25,230 +DB 214,214,127,214,254,169,177,40 +DB 214,214,127,214,254,169,177,40 +DB 27,27,108,27,216,119,54,195 +DB 27,27,108,27,216,119,54,195 +DB 181,181,238,181,193,91,119,116 +DB 181,181,238,181,193,91,119,116 +DB 175,175,134,175,17,41,67,190 +DB 175,175,134,175,17,41,67,190 +DB 106,106,181,106,119,223,212,29 +DB 106,106,181,106,119,223,212,29 +DB 80,80,93,80,186,13,160,234 +DB 80,80,93,80,186,13,160,234 +DB 69,69,9,69,18,76,138,87 +DB 69,69,9,69,18,76,138,87 +DB 243,243,235,243,203,24,251,56 +DB 243,243,235,243,203,24,251,56 +DB 48,48,192,48,157,240,96,173 +DB 48,48,192,48,157,240,96,173 +DB 239,239,155,239,43,116,195,196 +DB 239,239,155,239,43,116,195,196 +DB 63,63,252,63,229,195,126,218 +DB 63,63,252,63,229,195,126,218 +DB 85,85,73,85,146,28,170,199 +DB 85,85,73,85,146,28,170,199 +DB 162,162,178,162,121,16,89,219 +DB 162,162,178,162,121,16,89,219 +DB 234,234,143,234,3,101,201,233 +DB 234,234,143,234,3,101,201,233 +DB 101,101,137,101,15,236,202,106 +DB 101,101,137,101,15,236,202,106 +DB 186,186,210,186,185,104,105,3 +DB 186,186,210,186,185,104,105,3 +DB 47,47,188,47,101,147,94,74 +DB 47,47,188,47,101,147,94,74 +DB 192,192,39,192,78,231,157,142 +DB 192,192,39,192,78,231,157,142 +DB 222,222,95,222,190,129,161,96 +DB 222,222,95,222,190,129,161,96 +DB 28,28,112,28,224,108,56,252 +DB 28,28,112,28,224,108,56,252 +DB 253,253,211,253,187,46,231,70 +DB 253,253,211,253,187,46,231,70 +DB 77,77,41,77,82,100,154,31 +DB 77,77,41,77,82,100,154,31 +DB 146,146,114,146,228,224,57,118 +DB 146,146,114,146,228,224,57,118 +DB 117,117,201,117,143,188,234,250 +DB 117,117,201,117,143,188,234,250 +DB 6,6,24,6,48,30,12,54 +DB 6,6,24,6,48,30,12,54 +DB 138,138,18,138,36,152,9,174 +DB 138,138,18,138,36,152,9,174 +DB 178,178,242,178,249,64,121,75 +DB 178,178,242,178,249,64,121,75 +DB 230,230,191,230,99,89,209,133 +DB 230,230,191,230,99,89,209,133 +DB 14,14,56,14,112,54,28,126 +DB 14,14,56,14,112,54,28,126 +DB 31,31,124,31,248,99,62,231 +DB 31,31,124,31,248,99,62,231 +DB 98,98,149,98,55,247,196,85 +DB 98,98,149,98,55,247,196,85 +DB 212,212,119,212,238,163,181,58 +DB 212,212,119,212,238,163,181,58 +DB 168,168,154,168,41,50,77,129 +DB 168,168,154,168,41,50,77,129 +DB 150,150,98,150,196,244,49,82 +DB 150,150,98,150,196,244,49,82 +DB 249,249,195,249,155,58,239,98 +DB 249,249,195,249,155,58,239,98 +DB 197,197,51,197,102,246,151,163 +DB 197,197,51,197,102,246,151,163 +DB 37,37,148,37,53,177,74,16 +DB 37,37,148,37,53,177,74,16 +DB 89,89,121,89,242,32,178,171 +DB 89,89,121,89,242,32,178,171 +DB 132,132,42,132,84,174,21,208 +DB 132,132,42,132,84,174,21,208 +DB 114,114,213,114,183,167,228,197 +DB 114,114,213,114,183,167,228,197 +DB 57,57,228,57,213,221,114,236 +DB 57,57,228,57,213,221,114,236 +DB 76,76,45,76,90,97,152,22 +DB 76,76,45,76,90,97,152,22 +DB 94,94,101,94,202,59,188,148 +DB 94,94,101,94,202,59,188,148 +DB 120,120,253,120,231,133,240,159 +DB 120,120,253,120,231,133,240,159 +DB 56,56,224,56,221,216,112,229 +DB 56,56,224,56,221,216,112,229 +DB 140,140,10,140,20,134,5,152 +DB 140,140,10,140,20,134,5,152 +DB 209,209,99,209,198,178,191,23 +DB 209,209,99,209,198,178,191,23 +DB 165,165,174,165,65,11,87,228 +DB 165,165,174,165,65,11,87,228 +DB 226,226,175,226,67,77,217,161 +DB 226,226,175,226,67,77,217,161 +DB 97,97,153,97,47,248,194,78 +DB 97,97,153,97,47,248,194,78 +DB 179,179,246,179,241,69,123,66 +DB 179,179,246,179,241,69,123,66 +DB 33,33,132,33,21,165,66,52 +DB 33,33,132,33,21,165,66,52 +DB 156,156,74,156,148,214,37,8 +DB 156,156,74,156,148,214,37,8 +DB 30,30,120,30,240,102,60,238 +DB 30,30,120,30,240,102,60,238 +DB 67,67,17,67,34,82,134,97 +DB 67,67,17,67,34,82,134,97 +DB 199,199,59,199,118,252,147,177 +DB 199,199,59,199,118,252,147,177 +DB 252,252,215,252,179,43,229,79 +DB 252,252,215,252,179,43,229,79 +DB 4,4,16,4,32,20,8,36 +DB 4,4,16,4,32,20,8,36 +DB 81,81,89,81,178,8,162,227 +DB 81,81,89,81,178,8,162,227 +DB 153,153,94,153,188,199,47,37 +DB 153,153,94,153,188,199,47,37 +DB 109,109,169,109,79,196,218,34 +DB 109,109,169,109,79,196,218,34 +DB 13,13,52,13,104,57,26,101 +DB 13,13,52,13,104,57,26,101 +DB 250,250,207,250,131,53,233,121 +DB 250,250,207,250,131,53,233,121 +DB 223,223,91,223,182,132,163,105 +DB 223,223,91,223,182,132,163,105 +DB 126,126,229,126,215,155,252,169 +DB 126,126,229,126,215,155,252,169 +DB 36,36,144,36,61,180,72,25 +DB 36,36,144,36,61,180,72,25 +DB 59,59,236,59,197,215,118,254 +DB 59,59,236,59,197,215,118,254 +DB 171,171,150,171,49,61,75,154 +DB 171,171,150,171,49,61,75,154 +DB 206,206,31,206,62,209,129,240 +DB 206,206,31,206,62,209,129,240 +DB 17,17,68,17,136,85,34,153 +DB 17,17,68,17,136,85,34,153 +DB 143,143,6,143,12,137,3,131 +DB 143,143,6,143,12,137,3,131 +DB 78,78,37,78,74,107,156,4 +DB 78,78,37,78,74,107,156,4 +DB 183,183,230,183,209,81,115,102 +DB 183,183,230,183,209,81,115,102 +DB 235,235,139,235,11,96,203,224 +DB 235,235,139,235,11,96,203,224 +DB 60,60,240,60,253,204,120,193 +DB 60,60,240,60,253,204,120,193 +DB 129,129,62,129,124,191,31,253 +DB 129,129,62,129,124,191,31,253 +DB 148,148,106,148,212,254,53,64 +DB 148,148,106,148,212,254,53,64 +DB 247,247,251,247,235,12,243,28 +DB 247,247,251,247,235,12,243,28 +DB 185,185,222,185,161,103,111,24 +DB 185,185,222,185,161,103,111,24 +DB 19,19,76,19,152,95,38,139 +DB 19,19,76,19,152,95,38,139 +DB 44,44,176,44,125,156,88,81 +DB 44,44,176,44,125,156,88,81 +DB 211,211,107,211,214,184,187,5 +DB 211,211,107,211,214,184,187,5 +DB 231,231,187,231,107,92,211,140 +DB 231,231,187,231,107,92,211,140 +DB 110,110,165,110,87,203,220,57 +DB 110,110,165,110,87,203,220,57 +DB 196,196,55,196,110,243,149,170 +DB 196,196,55,196,110,243,149,170 +DB 3,3,12,3,24,15,6,27 +DB 3,3,12,3,24,15,6,27 +DB 86,86,69,86,138,19,172,220 +DB 86,86,69,86,138,19,172,220 +DB 68,68,13,68,26,73,136,94 +DB 68,68,13,68,26,73,136,94 +DB 127,127,225,127,223,158,254,160 +DB 127,127,225,127,223,158,254,160 +DB 169,169,158,169,33,55,79,136 +DB 169,169,158,169,33,55,79,136 +DB 42,42,168,42,77,130,84,103 +DB 42,42,168,42,77,130,84,103 +DB 187,187,214,187,177,109,107,10 +DB 187,187,214,187,177,109,107,10 +DB 193,193,35,193,70,226,159,135 +DB 193,193,35,193,70,226,159,135 +DB 83,83,81,83,162,2,166,241 +DB 83,83,81,83,162,2,166,241 +DB 220,220,87,220,174,139,165,114 +DB 220,220,87,220,174,139,165,114 +DB 11,11,44,11,88,39,22,83 +DB 11,11,44,11,88,39,22,83 +DB 157,157,78,157,156,211,39,1 +DB 157,157,78,157,156,211,39,1 +DB 108,108,173,108,71,193,216,43 +DB 108,108,173,108,71,193,216,43 +DB 49,49,196,49,149,245,98,164 +DB 49,49,196,49,149,245,98,164 +DB 116,116,205,116,135,185,232,243 +DB 116,116,205,116,135,185,232,243 +DB 246,246,255,246,227,9,241,21 +DB 246,246,255,246,227,9,241,21 +DB 70,70,5,70,10,67,140,76 +DB 70,70,5,70,10,67,140,76 +DB 172,172,138,172,9,38,69,165 +DB 172,172,138,172,9,38,69,165 +DB 137,137,30,137,60,151,15,181 +DB 137,137,30,137,60,151,15,181 +DB 20,20,80,20,160,68,40,180 +DB 20,20,80,20,160,68,40,180 +DB 225,225,163,225,91,66,223,186 +DB 225,225,163,225,91,66,223,186 +DB 22,22,88,22,176,78,44,166 +DB 22,22,88,22,176,78,44,166 +DB 58,58,232,58,205,210,116,247 +DB 58,58,232,58,205,210,116,247 +DB 105,105,185,105,111,208,210,6 +DB 105,105,185,105,111,208,210,6 +DB 9,9,36,9,72,45,18,65 +DB 9,9,36,9,72,45,18,65 +DB 112,112,221,112,167,173,224,215 +DB 112,112,221,112,167,173,224,215 +DB 182,182,226,182,217,84,113,111 +DB 182,182,226,182,217,84,113,111 +DB 208,208,103,208,206,183,189,30 +DB 208,208,103,208,206,183,189,30 +DB 237,237,147,237,59,126,199,214 +DB 237,237,147,237,59,126,199,214 +DB 204,204,23,204,46,219,133,226 +DB 204,204,23,204,46,219,133,226 +DB 66,66,21,66,42,87,132,104 +DB 66,66,21,66,42,87,132,104 +DB 152,152,90,152,180,194,45,44 +DB 152,152,90,152,180,194,45,44 +DB 164,164,170,164,73,14,85,237 +DB 164,164,170,164,73,14,85,237 +DB 40,40,160,40,93,136,80,117 +DB 40,40,160,40,93,136,80,117 +DB 92,92,109,92,218,49,184,134 +DB 92,92,109,92,218,49,184,134 +DB 248,248,199,248,147,63,237,107 +DB 248,248,199,248,147,63,237,107 +DB 134,134,34,134,68,164,17,194 +DB 134,134,34,134,68,164,17,194 +DB 24,35,198,232,135,184,1,79 +DB 54,166,210,245,121,111,145,82 +DB 96,188,155,142,163,12,123,53 +DB 29,224,215,194,46,75,254,87 +DB 21,119,55,229,159,240,74,218 +DB 88,201,41,10,177,160,107,133 +DB 189,93,16,244,203,62,5,103 +DB 228,39,65,139,167,125,149,216 +DB 251,238,124,102,221,23,71,158 +DB 202,45,191,7,173,90,131,51 +_whirlpool_block_mmx ENDP +.text$ ENDS +END diff --git a/deps/openssl/asm_obsolete/x86-win32-masm/x86cpuid.asm b/deps/openssl/asm_obsolete/x86-win32-masm/x86cpuid.asm new file mode 100644 index 00000000000000..b4462fc2aa72bd --- /dev/null +++ b/deps/openssl/asm_obsolete/x86-win32-masm/x86cpuid.asm @@ -0,0 +1,366 @@ +TITLE x86cpuid.asm +IF @Version LT 800 +ECHO MASM version 8.00 or later is strongly recommended. +ENDIF +.686 +.XMM +IF @Version LT 800 +XMMWORD STRUCT 16 +DQ 2 dup (?) +XMMWORD ENDS +ENDIF + +.MODEL FLAT +OPTION DOTNAME +IF @Version LT 800 +.text$ SEGMENT PAGE 'CODE' +ELSE +.text$ SEGMENT ALIGN(64) 'CODE' +ENDIF +ALIGN 16 +_OPENSSL_ia32_cpuid PROC PUBLIC +$L_OPENSSL_ia32_cpuid_begin:: + push ebp + push ebx + push esi + push edi + xor edx,edx + pushfd + pop eax + mov ecx,eax + xor eax,2097152 + push eax + popfd + pushfd + pop eax + xor ecx,eax + xor eax,eax + bt ecx,21 + jnc $L000nocpuid + mov esi,DWORD PTR 20[esp] + mov DWORD PTR 8[esi],eax + cpuid + mov edi,eax + xor eax,eax + cmp ebx,1970169159 + setne al + mov ebp,eax + cmp edx,1231384169 + setne al + or ebp,eax + cmp ecx,1818588270 + setne al + or ebp,eax + jz $L001intel + cmp ebx,1752462657 + setne al + mov esi,eax + cmp edx,1769238117 + setne al + or esi,eax + cmp ecx,1145913699 + setne al + or esi,eax + jnz $L001intel + mov eax,2147483648 + cpuid + cmp eax,2147483649 + jb $L001intel + mov esi,eax + mov eax,2147483649 + cpuid + or ebp,ecx + and ebp,2049 + cmp esi,2147483656 + jb $L001intel + mov eax,2147483656 + cpuid + movzx esi,cl + inc esi + mov eax,1 + xor ecx,ecx + cpuid + bt edx,28 + jnc $L002generic + shr ebx,16 + and ebx,255 + cmp ebx,esi + ja $L002generic + and edx,4026531839 + jmp $L002generic +$L001intel: + cmp edi,7 + jb $L003cacheinfo + mov esi,DWORD PTR 20[esp] + mov eax,7 + xor ecx,ecx + cpuid + mov DWORD PTR 8[esi],ebx +$L003cacheinfo: + cmp edi,4 + mov edi,-1 + jb $L004nocacheinfo + mov eax,4 + mov ecx,0 + cpuid + mov edi,eax + shr edi,14 + and edi,4095 +$L004nocacheinfo: + mov eax,1 + xor ecx,ecx + cpuid + and edx,3220176895 + cmp ebp,0 + jne $L005notintel + or edx,1073741824 + and ah,15 + cmp ah,15 + jne $L005notintel + or edx,1048576 +$L005notintel: + bt edx,28 + jnc $L002generic + and edx,4026531839 + cmp edi,0 + je $L002generic + or edx,268435456 + shr ebx,16 + cmp bl,1 + ja $L002generic + and edx,4026531839 +$L002generic: + and ebp,2048 + and ecx,4294965247 + mov esi,edx + or ebp,ecx + bt ecx,27 + jnc $L006clear_avx + xor ecx,ecx +DB 15,1,208 + and eax,6 + cmp eax,6 + je $L007done + cmp eax,2 + je $L006clear_avx +$L008clear_xmm: + and ebp,4261412861 + and esi,4278190079 +$L006clear_avx: + and ebp,4026525695 + mov edi,DWORD PTR 20[esp] + and DWORD PTR 8[edi],4294967263 +$L007done: + mov eax,esi + mov edx,ebp +$L000nocpuid: + pop edi + pop esi + pop ebx + pop ebp + ret +_OPENSSL_ia32_cpuid ENDP +;EXTERN _OPENSSL_ia32cap_P:NEAR +ALIGN 16 +_OPENSSL_rdtsc PROC PUBLIC +$L_OPENSSL_rdtsc_begin:: + xor eax,eax + xor edx,edx + lea ecx,DWORD PTR _OPENSSL_ia32cap_P + bt DWORD PTR [ecx],4 + jnc $L009notsc + rdtsc +$L009notsc: + ret +_OPENSSL_rdtsc ENDP +ALIGN 16 +_OPENSSL_instrument_halt PROC PUBLIC +$L_OPENSSL_instrument_halt_begin:: + lea ecx,DWORD PTR _OPENSSL_ia32cap_P + bt DWORD PTR [ecx],4 + jnc $L010nohalt +DD 2421723150 + and eax,3 + jnz $L010nohalt + pushfd + pop eax + bt eax,9 + jnc $L010nohalt + rdtsc + push edx + push eax + hlt + rdtsc + sub eax,DWORD PTR [esp] + sbb edx,DWORD PTR 4[esp] + add esp,8 + ret +$L010nohalt: + xor eax,eax + xor edx,edx + ret +_OPENSSL_instrument_halt ENDP +ALIGN 16 +_OPENSSL_far_spin PROC PUBLIC +$L_OPENSSL_far_spin_begin:: + pushfd + pop eax + bt eax,9 + jnc $L011nospin + mov eax,DWORD PTR 4[esp] + mov ecx,DWORD PTR 8[esp] +DD 2430111262 + xor eax,eax + mov edx,DWORD PTR [ecx] + jmp $L012spin +ALIGN 16 +$L012spin: + inc eax + cmp edx,DWORD PTR [ecx] + je $L012spin +DD 529567888 + ret +$L011nospin: + xor eax,eax + xor edx,edx + ret +_OPENSSL_far_spin ENDP +ALIGN 16 +_OPENSSL_wipe_cpu PROC PUBLIC +$L_OPENSSL_wipe_cpu_begin:: + xor eax,eax + xor edx,edx + lea ecx,DWORD PTR _OPENSSL_ia32cap_P + mov ecx,DWORD PTR [ecx] + bt DWORD PTR [ecx],1 + jnc $L013no_x87 + and ecx,83886080 + cmp ecx,83886080 + jne $L014no_sse2 + pxor xmm0,xmm0 + pxor xmm1,xmm1 + pxor xmm2,xmm2 + pxor xmm3,xmm3 + pxor xmm4,xmm4 + pxor xmm5,xmm5 + pxor xmm6,xmm6 + pxor xmm7,xmm7 +$L014no_sse2: +DD 4007259865,4007259865,4007259865,4007259865 +DD 2430851995 +$L013no_x87: + lea eax,DWORD PTR 4[esp] + ret +_OPENSSL_wipe_cpu ENDP +ALIGN 16 +_OPENSSL_atomic_add PROC PUBLIC +$L_OPENSSL_atomic_add_begin:: + mov edx,DWORD PTR 4[esp] + mov ecx,DWORD PTR 8[esp] + push ebx + nop + mov eax,DWORD PTR [edx] +$L015spin: + lea ebx,DWORD PTR [ecx*1+eax] + nop +DD 447811568 + jne $L015spin + mov eax,ebx + pop ebx + ret +_OPENSSL_atomic_add ENDP +ALIGN 16 +_OPENSSL_indirect_call PROC PUBLIC +$L_OPENSSL_indirect_call_begin:: + push ebp + mov ebp,esp + sub esp,28 + mov ecx,DWORD PTR 12[ebp] + mov DWORD PTR [esp],ecx + mov edx,DWORD PTR 16[ebp] + mov DWORD PTR 4[esp],edx + mov eax,DWORD PTR 20[ebp] + mov DWORD PTR 8[esp],eax + mov eax,DWORD PTR 24[ebp] + mov DWORD PTR 12[esp],eax + mov eax,DWORD PTR 28[ebp] + mov DWORD PTR 16[esp],eax + mov eax,DWORD PTR 32[ebp] + mov DWORD PTR 20[esp],eax + mov eax,DWORD PTR 36[ebp] + mov DWORD PTR 24[esp],eax + call DWORD PTR 8[ebp] + mov esp,ebp + pop ebp + ret +_OPENSSL_indirect_call ENDP +ALIGN 16 +_OPENSSL_cleanse PROC PUBLIC +$L_OPENSSL_cleanse_begin:: + mov edx,DWORD PTR 4[esp] + mov ecx,DWORD PTR 8[esp] + xor eax,eax + cmp ecx,7 + jae $L016lot + cmp ecx,0 + je $L017ret +$L018little: + mov BYTE PTR [edx],al + sub ecx,1 + lea edx,DWORD PTR 1[edx] + jnz $L018little +$L017ret: + ret +ALIGN 16 +$L016lot: + test edx,3 + jz $L019aligned + mov BYTE PTR [edx],al + lea ecx,DWORD PTR [ecx-1] + lea edx,DWORD PTR 1[edx] + jmp $L016lot +$L019aligned: + mov DWORD PTR [edx],eax + lea ecx,DWORD PTR [ecx-4] + test ecx,-4 + lea edx,DWORD PTR 4[edx] + jnz $L019aligned + cmp ecx,0 + jne $L018little + ret +_OPENSSL_cleanse ENDP +ALIGN 16 +_OPENSSL_ia32_rdrand PROC PUBLIC +$L_OPENSSL_ia32_rdrand_begin:: + mov ecx,8 +$L020loop: +DB 15,199,240 + jc $L021break + loop $L020loop +$L021break: + cmp eax,0 + cmove eax,ecx + ret +_OPENSSL_ia32_rdrand ENDP +ALIGN 16 +_OPENSSL_ia32_rdseed PROC PUBLIC +$L_OPENSSL_ia32_rdseed_begin:: + mov ecx,8 +$L022loop: +DB 15,199,248 + jc $L023break + loop $L022loop +$L023break: + cmp eax,0 + cmove eax,ecx + ret +_OPENSSL_ia32_rdseed ENDP +.text$ ENDS +.bss SEGMENT 'BSS' +COMM _OPENSSL_ia32cap_P:DWORD:4 +.bss ENDS +.CRT$XCU SEGMENT DWORD PUBLIC 'DATA' +EXTERN _OPENSSL_cpuid_setup:NEAR +DD _OPENSSL_cpuid_setup +.CRT$XCU ENDS +END diff --git a/deps/openssl/config/Makefile b/deps/openssl/config/Makefile index 818f671a1d3521..02f926e7362275 100644 --- a/deps/openssl/config/Makefile +++ b/deps/openssl/config/Makefile @@ -3,8 +3,8 @@ CONFIGURE = ./Configure COPT = no-shared no-symlinks ARCHS = BSD-x86 BSD-x86_64 VC-WIN32 VC-WIN64A darwin64-x86_64-cc \ -darwin-i386-cc linux-armv4 linux-elf linux-x86_64 solaris-x86-gcc \ -solaris64-x86_64-gcc +darwin-i386-cc linux-aarch64 linux-armv4 linux-elf linux-x32 \ +linux-x86_64 solaris-x86-gcc solaris64-x86_64-gcc CFG = opensslconf.h SRC_CFG = ../openssl/crypto/$(CFG) @@ -14,36 +14,17 @@ BACKUP_EXT = iojsbackup # OPENSSL_CPUID_OBJ is defined in openssl.gypi for use --openssl-no-asm CPUIDFIX = 's/\#define OPENSSL_CPUID_OBJ$$//;' -X32FIX = 's/\#define OPENSSL_CPUID_OBJ$$//;\ -s/RC4_CHUNK unsigned long$$/RC4_CHUNK unsigned long long/;\ -s/define SIXTY_FOUR_BIT_LONG$$/undef SIXTY_FOUR_BIT_LONG/;\ -s/undef SIXTY_FOUR_BIT$$/define SIXTY_FOUR_BIT/;' - MACFIX ='s/define RC4_INT unsigned char$$/define RC4_INT unsigned int/;' -WINFIX = 'if(/ifndef OPENSSL_DOING_MAKEDEPEND$$/){\ -print "\n\#ifndef OPENSSL_NO_DYNAMIC_ENGINE\n";\ -print "\# define OPENSSL_NO_DYNAMIC_ENGINE\n";\ -print "\#endif\n";\ -print "\#ifndef OPENSSL_NO_CAPIENG\n";\ -print "\# define OPENSSL_NO_CAPIENG\n";\ -print "\#endif\n\n";}' - PHONY = all clean backup restore .PHONY: $(PHONY) -all: backup $(ARCHS) linux-x32 cleanconf fixdarwin64 fixwin restore +all: backup $(ARCHS) cleanconf fixdarwin64 restore $(ARCHS): cd ../openssl; $(PERL) $(CONFIGURE) $(COPT) $@ > /dev/null $(PERL) -p -e $(CPUIDFIX) $(SRC_CFG) > ./archs/$@/$(CFG) -# linux-x32 was made by comparing define values of opensslconf.h which -# was generated `Configure linux-x32' in openssl-1.0.2a -linux-x32: - cd ../openssl; $(PERL) $(CONFIGURE) $(COPT) linux-x86_64 > /dev/null; - $(PERL) -p -e $(X32FIX) $(SRC_CFG) > ./archs/$@/$(CFG) - # The current openssl release does not use RC4 asm since it explicitly # specified as `$asm=~s/rc4\-[^:]+//;` in # https://github.com/openssl/openssl/blob/OpenSSL_1_0_1-stable/Configure#L584 @@ -53,14 +34,6 @@ linux-x32: fixdarwin64: $(PERL) -pi -e $(MACFIX) ./archs/darwin64-x86_64-cc/$(CFG) - -# OPENSSL_NO_DYNAMIC_ENGINE is needed for building static -# library. OPENSSL_NO_CAPIENG is needed to avoid build errors on -# Win. See the comments in `deps/openssl/openssl/engines/e_capi.c` for -# detail. -fixwin: - $(PERL) -pi -e $(WINFIX) ./archs/VC-WIN32/$(CFG) ./archs/VC-WIN64A/$(CFG) - # backup files to avoid to be overwritten backup: @for f in $(BACKUP_FILES); do \ diff --git a/deps/openssl/config/archs/BSD-x86/opensslconf.h b/deps/openssl/config/archs/BSD-x86/opensslconf.h index fb6363d56a364c..7d9b6d56beb7a0 100644 --- a/deps/openssl/config/archs/BSD-x86/opensslconf.h +++ b/deps/openssl/config/archs/BSD-x86/opensslconf.h @@ -20,6 +20,9 @@ extern "C" { #ifndef OPENSSL_NO_KRB5 # define OPENSSL_NO_KRB5 #endif +#ifndef OPENSSL_NO_LIBUNBOUND +# define OPENSSL_NO_LIBUNBOUND +#endif #ifndef OPENSSL_NO_MD2 # define OPENSSL_NO_MD2 #endif @@ -32,6 +35,9 @@ extern "C" { #ifndef OPENSSL_NO_SCTP # define OPENSSL_NO_SCTP #endif +#ifndef OPENSSL_NO_SSL_TRACE +# define OPENSSL_NO_SSL_TRACE +#endif #ifndef OPENSSL_NO_STORE # define OPENSSL_NO_STORE #endif @@ -65,6 +71,9 @@ extern "C" { # if defined(OPENSSL_NO_KRB5) && !defined(NO_KRB5) # define NO_KRB5 # endif +# if defined(OPENSSL_NO_LIBUNBOUND) && !defined(NO_LIBUNBOUND) +# define NO_LIBUNBOUND +# endif # if defined(OPENSSL_NO_MD2) && !defined(NO_MD2) # define NO_MD2 # endif @@ -77,6 +86,9 @@ extern "C" { # if defined(OPENSSL_NO_SCTP) && !defined(NO_SCTP) # define NO_SCTP # endif +# if defined(OPENSSL_NO_SSL_TRACE) && !defined(NO_SSL_TRACE) +# define NO_SSL_TRACE +# endif # if defined(OPENSSL_NO_STORE) && !defined(NO_STORE) # define NO_STORE # endif @@ -208,7 +220,7 @@ YOU SHOULD NOT HAVE BOTH DES_RISC1 AND DES_RISC2 DEFINED!!!!! even newer MIPS CPU's, but at the moment one size fits all for optimization options. Older Sparc's work better with only UNROLL, but there's no way to tell at compile time what it is you're running on */ - + #if defined( sun ) /* Newer Sparc's */ # define DES_PTR # define DES_RISC1 diff --git a/deps/openssl/config/archs/BSD-x86_64/opensslconf.h b/deps/openssl/config/archs/BSD-x86_64/opensslconf.h index 4acabe0e4609c5..68c38a25abbe8f 100644 --- a/deps/openssl/config/archs/BSD-x86_64/opensslconf.h +++ b/deps/openssl/config/archs/BSD-x86_64/opensslconf.h @@ -20,6 +20,9 @@ extern "C" { #ifndef OPENSSL_NO_KRB5 # define OPENSSL_NO_KRB5 #endif +#ifndef OPENSSL_NO_LIBUNBOUND +# define OPENSSL_NO_LIBUNBOUND +#endif #ifndef OPENSSL_NO_MD2 # define OPENSSL_NO_MD2 #endif @@ -32,6 +35,9 @@ extern "C" { #ifndef OPENSSL_NO_SCTP # define OPENSSL_NO_SCTP #endif +#ifndef OPENSSL_NO_SSL_TRACE +# define OPENSSL_NO_SSL_TRACE +#endif #ifndef OPENSSL_NO_STORE # define OPENSSL_NO_STORE #endif @@ -65,6 +71,9 @@ extern "C" { # if defined(OPENSSL_NO_KRB5) && !defined(NO_KRB5) # define NO_KRB5 # endif +# if defined(OPENSSL_NO_LIBUNBOUND) && !defined(NO_LIBUNBOUND) +# define NO_LIBUNBOUND +# endif # if defined(OPENSSL_NO_MD2) && !defined(NO_MD2) # define NO_MD2 # endif @@ -77,6 +86,9 @@ extern "C" { # if defined(OPENSSL_NO_SCTP) && !defined(NO_SCTP) # define NO_SCTP # endif +# if defined(OPENSSL_NO_SSL_TRACE) && !defined(NO_SSL_TRACE) +# define NO_SSL_TRACE +# endif # if defined(OPENSSL_NO_STORE) && !defined(NO_STORE) # define NO_STORE # endif @@ -208,7 +220,7 @@ YOU SHOULD NOT HAVE BOTH DES_RISC1 AND DES_RISC2 DEFINED!!!!! even newer MIPS CPU's, but at the moment one size fits all for optimization options. Older Sparc's work better with only UNROLL, but there's no way to tell at compile time what it is you're running on */ - + #if defined( sun ) /* Newer Sparc's */ # define DES_PTR # define DES_RISC1 diff --git a/deps/openssl/config/archs/VC-WIN32/opensslconf.h b/deps/openssl/config/archs/VC-WIN32/opensslconf.h index 16705fda02d181..9e7d774a69f7c0 100644 --- a/deps/openssl/config/archs/VC-WIN32/opensslconf.h +++ b/deps/openssl/config/archs/VC-WIN32/opensslconf.h @@ -8,14 +8,6 @@ extern "C" { #ifndef OPENSSL_SYSNAME_WIN32 # define OPENSSL_SYSNAME_WIN32 #endif - -#ifndef OPENSSL_NO_DYNAMIC_ENGINE -# define OPENSSL_NO_DYNAMIC_ENGINE -#endif -#ifndef OPENSSL_NO_CAPIENG -# define OPENSSL_NO_CAPIENG -#endif - #ifndef OPENSSL_DOING_MAKEDEPEND @@ -31,6 +23,9 @@ extern "C" { #ifndef OPENSSL_NO_KRB5 # define OPENSSL_NO_KRB5 #endif +#ifndef OPENSSL_NO_LIBUNBOUND +# define OPENSSL_NO_LIBUNBOUND +#endif #ifndef OPENSSL_NO_MD2 # define OPENSSL_NO_MD2 #endif @@ -43,6 +38,9 @@ extern "C" { #ifndef OPENSSL_NO_SCTP # define OPENSSL_NO_SCTP #endif +#ifndef OPENSSL_NO_SSL_TRACE +# define OPENSSL_NO_SSL_TRACE +#endif #ifndef OPENSSL_NO_STORE # define OPENSSL_NO_STORE #endif @@ -73,6 +71,9 @@ extern "C" { # if defined(OPENSSL_NO_KRB5) && !defined(NO_KRB5) # define NO_KRB5 # endif +# if defined(OPENSSL_NO_LIBUNBOUND) && !defined(NO_LIBUNBOUND) +# define NO_LIBUNBOUND +# endif # if defined(OPENSSL_NO_MD2) && !defined(NO_MD2) # define NO_MD2 # endif @@ -85,6 +86,9 @@ extern "C" { # if defined(OPENSSL_NO_SCTP) && !defined(NO_SCTP) # define NO_SCTP # endif +# if defined(OPENSSL_NO_SSL_TRACE) && !defined(NO_SSL_TRACE) +# define NO_SSL_TRACE +# endif # if defined(OPENSSL_NO_STORE) && !defined(NO_STORE) # define NO_STORE # endif @@ -217,7 +221,7 @@ YOU SHOULD NOT HAVE BOTH DES_RISC1 AND DES_RISC2 DEFINED!!!!! even newer MIPS CPU's, but at the moment one size fits all for optimization options. Older Sparc's work better with only UNROLL, but there's no way to tell at compile time what it is you're running on */ - + #if defined( sun ) /* Newer Sparc's */ # define DES_PTR # define DES_RISC1 diff --git a/deps/openssl/config/archs/VC-WIN64A/opensslconf.h b/deps/openssl/config/archs/VC-WIN64A/opensslconf.h index 22fafd631d1579..fbcd27df62aa96 100644 --- a/deps/openssl/config/archs/VC-WIN64A/opensslconf.h +++ b/deps/openssl/config/archs/VC-WIN64A/opensslconf.h @@ -8,14 +8,6 @@ extern "C" { #ifndef OPENSSL_SYSNAME_WIN64A # define OPENSSL_SYSNAME_WIN64A #endif - -#ifndef OPENSSL_NO_DYNAMIC_ENGINE -# define OPENSSL_NO_DYNAMIC_ENGINE -#endif -#ifndef OPENSSL_NO_CAPIENG -# define OPENSSL_NO_CAPIENG -#endif - #ifndef OPENSSL_DOING_MAKEDEPEND @@ -31,6 +23,9 @@ extern "C" { #ifndef OPENSSL_NO_KRB5 # define OPENSSL_NO_KRB5 #endif +#ifndef OPENSSL_NO_LIBUNBOUND +# define OPENSSL_NO_LIBUNBOUND +#endif #ifndef OPENSSL_NO_MD2 # define OPENSSL_NO_MD2 #endif @@ -43,6 +38,9 @@ extern "C" { #ifndef OPENSSL_NO_SCTP # define OPENSSL_NO_SCTP #endif +#ifndef OPENSSL_NO_SSL_TRACE +# define OPENSSL_NO_SSL_TRACE +#endif #ifndef OPENSSL_NO_STORE # define OPENSSL_NO_STORE #endif @@ -73,6 +71,9 @@ extern "C" { # if defined(OPENSSL_NO_KRB5) && !defined(NO_KRB5) # define NO_KRB5 # endif +# if defined(OPENSSL_NO_LIBUNBOUND) && !defined(NO_LIBUNBOUND) +# define NO_LIBUNBOUND +# endif # if defined(OPENSSL_NO_MD2) && !defined(NO_MD2) # define NO_MD2 # endif @@ -85,6 +86,9 @@ extern "C" { # if defined(OPENSSL_NO_SCTP) && !defined(NO_SCTP) # define NO_SCTP # endif +# if defined(OPENSSL_NO_SSL_TRACE) && !defined(NO_SSL_TRACE) +# define NO_SSL_TRACE +# endif # if defined(OPENSSL_NO_STORE) && !defined(NO_STORE) # define NO_STORE # endif @@ -217,7 +221,7 @@ YOU SHOULD NOT HAVE BOTH DES_RISC1 AND DES_RISC2 DEFINED!!!!! even newer MIPS CPU's, but at the moment one size fits all for optimization options. Older Sparc's work better with only UNROLL, but there's no way to tell at compile time what it is you're running on */ - + #if defined( sun ) /* Newer Sparc's */ # define DES_PTR # define DES_RISC1 diff --git a/deps/openssl/config/archs/darwin-i386-cc/opensslconf.h b/deps/openssl/config/archs/darwin-i386-cc/opensslconf.h index 6df0465e536eae..300e7672d1ae19 100644 --- a/deps/openssl/config/archs/darwin-i386-cc/opensslconf.h +++ b/deps/openssl/config/archs/darwin-i386-cc/opensslconf.h @@ -23,6 +23,9 @@ extern "C" { #ifndef OPENSSL_NO_KRB5 # define OPENSSL_NO_KRB5 #endif +#ifndef OPENSSL_NO_LIBUNBOUND +# define OPENSSL_NO_LIBUNBOUND +#endif #ifndef OPENSSL_NO_MD2 # define OPENSSL_NO_MD2 #endif @@ -35,6 +38,9 @@ extern "C" { #ifndef OPENSSL_NO_SCTP # define OPENSSL_NO_SCTP #endif +#ifndef OPENSSL_NO_SSL_TRACE +# define OPENSSL_NO_SSL_TRACE +#endif #ifndef OPENSSL_NO_STORE # define OPENSSL_NO_STORE #endif @@ -68,6 +74,9 @@ extern "C" { # if defined(OPENSSL_NO_KRB5) && !defined(NO_KRB5) # define NO_KRB5 # endif +# if defined(OPENSSL_NO_LIBUNBOUND) && !defined(NO_LIBUNBOUND) +# define NO_LIBUNBOUND +# endif # if defined(OPENSSL_NO_MD2) && !defined(NO_MD2) # define NO_MD2 # endif @@ -80,6 +89,9 @@ extern "C" { # if defined(OPENSSL_NO_SCTP) && !defined(NO_SCTP) # define NO_SCTP # endif +# if defined(OPENSSL_NO_SSL_TRACE) && !defined(NO_SSL_TRACE) +# define NO_SSL_TRACE +# endif # if defined(OPENSSL_NO_STORE) && !defined(NO_STORE) # define NO_STORE # endif @@ -211,7 +223,7 @@ YOU SHOULD NOT HAVE BOTH DES_RISC1 AND DES_RISC2 DEFINED!!!!! even newer MIPS CPU's, but at the moment one size fits all for optimization options. Older Sparc's work better with only UNROLL, but there's no way to tell at compile time what it is you're running on */ - + #if defined( sun ) /* Newer Sparc's */ # define DES_PTR # define DES_RISC1 diff --git a/deps/openssl/config/archs/darwin64-x86_64-cc/opensslconf.h b/deps/openssl/config/archs/darwin64-x86_64-cc/opensslconf.h index 349623bd4e139c..f4d36c35c760f0 100644 --- a/deps/openssl/config/archs/darwin64-x86_64-cc/opensslconf.h +++ b/deps/openssl/config/archs/darwin64-x86_64-cc/opensslconf.h @@ -23,6 +23,9 @@ extern "C" { #ifndef OPENSSL_NO_KRB5 # define OPENSSL_NO_KRB5 #endif +#ifndef OPENSSL_NO_LIBUNBOUND +# define OPENSSL_NO_LIBUNBOUND +#endif #ifndef OPENSSL_NO_MD2 # define OPENSSL_NO_MD2 #endif @@ -35,6 +38,9 @@ extern "C" { #ifndef OPENSSL_NO_SCTP # define OPENSSL_NO_SCTP #endif +#ifndef OPENSSL_NO_SSL_TRACE +# define OPENSSL_NO_SSL_TRACE +#endif #ifndef OPENSSL_NO_STORE # define OPENSSL_NO_STORE #endif @@ -68,6 +74,9 @@ extern "C" { # if defined(OPENSSL_NO_KRB5) && !defined(NO_KRB5) # define NO_KRB5 # endif +# if defined(OPENSSL_NO_LIBUNBOUND) && !defined(NO_LIBUNBOUND) +# define NO_LIBUNBOUND +# endif # if defined(OPENSSL_NO_MD2) && !defined(NO_MD2) # define NO_MD2 # endif @@ -80,6 +89,9 @@ extern "C" { # if defined(OPENSSL_NO_SCTP) && !defined(NO_SCTP) # define NO_SCTP # endif +# if defined(OPENSSL_NO_SSL_TRACE) && !defined(NO_SSL_TRACE) +# define NO_SSL_TRACE +# endif # if defined(OPENSSL_NO_STORE) && !defined(NO_STORE) # define NO_STORE # endif @@ -211,7 +223,7 @@ YOU SHOULD NOT HAVE BOTH DES_RISC1 AND DES_RISC2 DEFINED!!!!! even newer MIPS CPU's, but at the moment one size fits all for optimization options. Older Sparc's work better with only UNROLL, but there's no way to tell at compile time what it is you're running on */ - + #if defined( sun ) /* Newer Sparc's */ # define DES_PTR # define DES_RISC1 diff --git a/deps/openssl/config/archs/linux-aarch64/opensslconf.h b/deps/openssl/config/archs/linux-aarch64/opensslconf.h new file mode 100644 index 00000000000000..0de7cf7eec66b4 --- /dev/null +++ b/deps/openssl/config/archs/linux-aarch64/opensslconf.h @@ -0,0 +1,258 @@ +/* opensslconf.h */ +/* WARNING: Generated automatically from opensslconf.h.in by Configure. */ + +#ifdef __cplusplus +extern "C" { +#endif +/* OpenSSL was configured with the following options: */ +#ifndef OPENSSL_DOING_MAKEDEPEND + + +#ifndef OPENSSL_NO_EC_NISTP_64_GCC_128 +# define OPENSSL_NO_EC_NISTP_64_GCC_128 +#endif +#ifndef OPENSSL_NO_GMP +# define OPENSSL_NO_GMP +#endif +#ifndef OPENSSL_NO_JPAKE +# define OPENSSL_NO_JPAKE +#endif +#ifndef OPENSSL_NO_KRB5 +# define OPENSSL_NO_KRB5 +#endif +#ifndef OPENSSL_NO_LIBUNBOUND +# define OPENSSL_NO_LIBUNBOUND +#endif +#ifndef OPENSSL_NO_MD2 +# define OPENSSL_NO_MD2 +#endif +#ifndef OPENSSL_NO_RC5 +# define OPENSSL_NO_RC5 +#endif +#ifndef OPENSSL_NO_RFC3779 +# define OPENSSL_NO_RFC3779 +#endif +#ifndef OPENSSL_NO_SCTP +# define OPENSSL_NO_SCTP +#endif +#ifndef OPENSSL_NO_SSL_TRACE +# define OPENSSL_NO_SSL_TRACE +#endif +#ifndef OPENSSL_NO_STORE +# define OPENSSL_NO_STORE +#endif +#ifndef OPENSSL_NO_UNIT_TEST +# define OPENSSL_NO_UNIT_TEST +#endif + +#endif /* OPENSSL_DOING_MAKEDEPEND */ + +#ifndef OPENSSL_THREADS +# define OPENSSL_THREADS +#endif +#ifndef OPENSSL_NO_DYNAMIC_ENGINE +# define OPENSSL_NO_DYNAMIC_ENGINE +#endif + +/* The OPENSSL_NO_* macros are also defined as NO_* if the application + asks for it. This is a transient feature that is provided for those + who haven't had the time to do the appropriate changes in their + applications. */ +#ifdef OPENSSL_ALGORITHM_DEFINES +# if defined(OPENSSL_NO_EC_NISTP_64_GCC_128) && !defined(NO_EC_NISTP_64_GCC_128) +# define NO_EC_NISTP_64_GCC_128 +# endif +# if defined(OPENSSL_NO_GMP) && !defined(NO_GMP) +# define NO_GMP +# endif +# if defined(OPENSSL_NO_JPAKE) && !defined(NO_JPAKE) +# define NO_JPAKE +# endif +# if defined(OPENSSL_NO_KRB5) && !defined(NO_KRB5) +# define NO_KRB5 +# endif +# if defined(OPENSSL_NO_LIBUNBOUND) && !defined(NO_LIBUNBOUND) +# define NO_LIBUNBOUND +# endif +# if defined(OPENSSL_NO_MD2) && !defined(NO_MD2) +# define NO_MD2 +# endif +# if defined(OPENSSL_NO_RC5) && !defined(NO_RC5) +# define NO_RC5 +# endif +# if defined(OPENSSL_NO_RFC3779) && !defined(NO_RFC3779) +# define NO_RFC3779 +# endif +# if defined(OPENSSL_NO_SCTP) && !defined(NO_SCTP) +# define NO_SCTP +# endif +# if defined(OPENSSL_NO_SSL_TRACE) && !defined(NO_SSL_TRACE) +# define NO_SSL_TRACE +# endif +# if defined(OPENSSL_NO_STORE) && !defined(NO_STORE) +# define NO_STORE +# endif +# if defined(OPENSSL_NO_UNIT_TEST) && !defined(NO_UNIT_TEST) +# define NO_UNIT_TEST +# endif +#endif + + + +/* crypto/opensslconf.h.in */ + +/* Generate 80386 code? */ +#undef I386_ONLY + +#if !(defined(VMS) || defined(__VMS)) /* VMS uses logical names instead */ +#if defined(HEADER_CRYPTLIB_H) && !defined(OPENSSLDIR) +#define ENGINESDIR "/usr/local/ssl/lib/engines" +#define OPENSSLDIR "/usr/local/ssl" +#endif +#endif + +#undef OPENSSL_UNISTD +#define OPENSSL_UNISTD + +#undef OPENSSL_EXPORT_VAR_AS_FUNCTION + +#if defined(HEADER_IDEA_H) && !defined(IDEA_INT) +#define IDEA_INT unsigned int +#endif + +#if defined(HEADER_MD2_H) && !defined(MD2_INT) +#define MD2_INT unsigned int +#endif + +#if defined(HEADER_RC2_H) && !defined(RC2_INT) +/* I need to put in a mod for the alpha - eay */ +#define RC2_INT unsigned int +#endif + +#if defined(HEADER_RC4_H) +#if !defined(RC4_INT) +/* using int types make the structure larger but make the code faster + * on most boxes I have tested - up to %20 faster. */ +/* + * I don't know what does "most" mean, but declaring "int" is a must on: + * - Intel P6 because partial register stalls are very expensive; + * - elder Alpha because it lacks byte load/store instructions; + */ +#define RC4_INT unsigned char +#endif +#if !defined(RC4_CHUNK) +/* + * This enables code handling data aligned at natural CPU word + * boundary. See crypto/rc4/rc4_enc.c for further details. + */ +#define RC4_CHUNK unsigned long +#endif +#endif + +#if (defined(HEADER_NEW_DES_H) || defined(HEADER_DES_H)) && !defined(DES_LONG) +/* If this is set to 'unsigned int' on a DEC Alpha, this gives about a + * %20 speed up (longs are 8 bytes, int's are 4). */ +#ifndef DES_LONG +#define DES_LONG unsigned int +#endif +#endif + +#if defined(HEADER_BN_H) && !defined(CONFIG_HEADER_BN_H) +#define CONFIG_HEADER_BN_H +#undef BN_LLONG + +/* Should we define BN_DIV2W here? */ + +/* Only one for the following should be defined */ +#define SIXTY_FOUR_BIT_LONG +#undef SIXTY_FOUR_BIT +#undef THIRTY_TWO_BIT +#endif + +#if defined(HEADER_RC4_LOCL_H) && !defined(CONFIG_HEADER_RC4_LOCL_H) +#define CONFIG_HEADER_RC4_LOCL_H +/* if this is defined data[i] is used instead of *data, this is a %20 + * speedup on x86 */ +#undef RC4_INDEX +#endif + +#if defined(HEADER_BF_LOCL_H) && !defined(CONFIG_HEADER_BF_LOCL_H) +#define CONFIG_HEADER_BF_LOCL_H +#define BF_PTR +#endif /* HEADER_BF_LOCL_H */ + +#if defined(HEADER_DES_LOCL_H) && !defined(CONFIG_HEADER_DES_LOCL_H) +#define CONFIG_HEADER_DES_LOCL_H +#ifndef DES_DEFAULT_OPTIONS +/* the following is tweaked from a config script, that is why it is a + * protected undef/define */ +#ifndef DES_PTR +#undef DES_PTR +#endif + +/* This helps C compiler generate the correct code for multiple functional + * units. It reduces register dependancies at the expense of 2 more + * registers */ +#ifndef DES_RISC1 +#undef DES_RISC1 +#endif + +#ifndef DES_RISC2 +#undef DES_RISC2 +#endif + +#if defined(DES_RISC1) && defined(DES_RISC2) +YOU SHOULD NOT HAVE BOTH DES_RISC1 AND DES_RISC2 DEFINED!!!!! +#endif + +/* Unroll the inner loop, this sometimes helps, sometimes hinders. + * Very mucy CPU dependant */ +#ifndef DES_UNROLL +#define DES_UNROLL +#endif + +/* These default values were supplied by + * Peter Gutman + * They are only used if nothing else has been defined */ +#if !defined(DES_PTR) && !defined(DES_RISC1) && !defined(DES_RISC2) && !defined(DES_UNROLL) +/* Special defines which change the way the code is built depending on the + CPU and OS. For SGI machines you can use _MIPS_SZLONG (32 or 64) to find + even newer MIPS CPU's, but at the moment one size fits all for + optimization options. Older Sparc's work better with only UNROLL, but + there's no way to tell at compile time what it is you're running on */ + +#if defined( sun ) /* Newer Sparc's */ +# define DES_PTR +# define DES_RISC1 +# define DES_UNROLL +#elif defined( __ultrix ) /* Older MIPS */ +# define DES_PTR +# define DES_RISC2 +# define DES_UNROLL +#elif defined( __osf1__ ) /* Alpha */ +# define DES_PTR +# define DES_RISC2 +#elif defined ( _AIX ) /* RS6000 */ + /* Unknown */ +#elif defined( __hpux ) /* HP-PA */ + /* Unknown */ +#elif defined( __aux ) /* 68K */ + /* Unknown */ +#elif defined( __dgux ) /* 88K (but P6 in latest boxes) */ +# define DES_UNROLL +#elif defined( __sgi ) /* Newer MIPS */ +# define DES_PTR +# define DES_RISC2 +# define DES_UNROLL +#elif defined(i386) || defined(__i386__) /* x86 boxes, should be gcc */ +# define DES_PTR +# define DES_RISC1 +# define DES_UNROLL +#endif /* Systems-specific speed defines */ +#endif + +#endif /* DES_DEFAULT_OPTIONS */ +#endif /* HEADER_DES_LOCL_H */ +#ifdef __cplusplus +} +#endif diff --git a/deps/openssl/config/archs/linux-armv4/opensslconf.h b/deps/openssl/config/archs/linux-armv4/opensslconf.h index 9b5cf3499baf68..b4d7f8144d06ac 100644 --- a/deps/openssl/config/archs/linux-armv4/opensslconf.h +++ b/deps/openssl/config/archs/linux-armv4/opensslconf.h @@ -20,6 +20,9 @@ extern "C" { #ifndef OPENSSL_NO_KRB5 # define OPENSSL_NO_KRB5 #endif +#ifndef OPENSSL_NO_LIBUNBOUND +# define OPENSSL_NO_LIBUNBOUND +#endif #ifndef OPENSSL_NO_MD2 # define OPENSSL_NO_MD2 #endif @@ -32,6 +35,9 @@ extern "C" { #ifndef OPENSSL_NO_SCTP # define OPENSSL_NO_SCTP #endif +#ifndef OPENSSL_NO_SSL_TRACE +# define OPENSSL_NO_SSL_TRACE +#endif #ifndef OPENSSL_NO_STORE # define OPENSSL_NO_STORE #endif @@ -65,6 +71,9 @@ extern "C" { # if defined(OPENSSL_NO_KRB5) && !defined(NO_KRB5) # define NO_KRB5 # endif +# if defined(OPENSSL_NO_LIBUNBOUND) && !defined(NO_LIBUNBOUND) +# define NO_LIBUNBOUND +# endif # if defined(OPENSSL_NO_MD2) && !defined(NO_MD2) # define NO_MD2 # endif @@ -77,6 +86,9 @@ extern "C" { # if defined(OPENSSL_NO_SCTP) && !defined(NO_SCTP) # define NO_SCTP # endif +# if defined(OPENSSL_NO_SSL_TRACE) && !defined(NO_SSL_TRACE) +# define NO_SSL_TRACE +# endif # if defined(OPENSSL_NO_STORE) && !defined(NO_STORE) # define NO_STORE # endif @@ -208,7 +220,7 @@ YOU SHOULD NOT HAVE BOTH DES_RISC1 AND DES_RISC2 DEFINED!!!!! even newer MIPS CPU's, but at the moment one size fits all for optimization options. Older Sparc's work better with only UNROLL, but there's no way to tell at compile time what it is you're running on */ - + #if defined( sun ) /* Newer Sparc's */ # define DES_PTR # define DES_RISC1 diff --git a/deps/openssl/config/archs/linux-elf/opensslconf.h b/deps/openssl/config/archs/linux-elf/opensslconf.h index fb6363d56a364c..7d9b6d56beb7a0 100644 --- a/deps/openssl/config/archs/linux-elf/opensslconf.h +++ b/deps/openssl/config/archs/linux-elf/opensslconf.h @@ -20,6 +20,9 @@ extern "C" { #ifndef OPENSSL_NO_KRB5 # define OPENSSL_NO_KRB5 #endif +#ifndef OPENSSL_NO_LIBUNBOUND +# define OPENSSL_NO_LIBUNBOUND +#endif #ifndef OPENSSL_NO_MD2 # define OPENSSL_NO_MD2 #endif @@ -32,6 +35,9 @@ extern "C" { #ifndef OPENSSL_NO_SCTP # define OPENSSL_NO_SCTP #endif +#ifndef OPENSSL_NO_SSL_TRACE +# define OPENSSL_NO_SSL_TRACE +#endif #ifndef OPENSSL_NO_STORE # define OPENSSL_NO_STORE #endif @@ -65,6 +71,9 @@ extern "C" { # if defined(OPENSSL_NO_KRB5) && !defined(NO_KRB5) # define NO_KRB5 # endif +# if defined(OPENSSL_NO_LIBUNBOUND) && !defined(NO_LIBUNBOUND) +# define NO_LIBUNBOUND +# endif # if defined(OPENSSL_NO_MD2) && !defined(NO_MD2) # define NO_MD2 # endif @@ -77,6 +86,9 @@ extern "C" { # if defined(OPENSSL_NO_SCTP) && !defined(NO_SCTP) # define NO_SCTP # endif +# if defined(OPENSSL_NO_SSL_TRACE) && !defined(NO_SSL_TRACE) +# define NO_SSL_TRACE +# endif # if defined(OPENSSL_NO_STORE) && !defined(NO_STORE) # define NO_STORE # endif @@ -208,7 +220,7 @@ YOU SHOULD NOT HAVE BOTH DES_RISC1 AND DES_RISC2 DEFINED!!!!! even newer MIPS CPU's, but at the moment one size fits all for optimization options. Older Sparc's work better with only UNROLL, but there's no way to tell at compile time what it is you're running on */ - + #if defined( sun ) /* Newer Sparc's */ # define DES_PTR # define DES_RISC1 diff --git a/deps/openssl/config/archs/linux-x32/opensslconf.h b/deps/openssl/config/archs/linux-x32/opensslconf.h index 8cd62e788410ce..e2ca78ef5b6695 100644 --- a/deps/openssl/config/archs/linux-x32/opensslconf.h +++ b/deps/openssl/config/archs/linux-x32/opensslconf.h @@ -20,6 +20,9 @@ extern "C" { #ifndef OPENSSL_NO_KRB5 # define OPENSSL_NO_KRB5 #endif +#ifndef OPENSSL_NO_LIBUNBOUND +# define OPENSSL_NO_LIBUNBOUND +#endif #ifndef OPENSSL_NO_MD2 # define OPENSSL_NO_MD2 #endif @@ -32,6 +35,9 @@ extern "C" { #ifndef OPENSSL_NO_SCTP # define OPENSSL_NO_SCTP #endif +#ifndef OPENSSL_NO_SSL_TRACE +# define OPENSSL_NO_SSL_TRACE +#endif #ifndef OPENSSL_NO_STORE # define OPENSSL_NO_STORE #endif @@ -65,6 +71,9 @@ extern "C" { # if defined(OPENSSL_NO_KRB5) && !defined(NO_KRB5) # define NO_KRB5 # endif +# if defined(OPENSSL_NO_LIBUNBOUND) && !defined(NO_LIBUNBOUND) +# define NO_LIBUNBOUND +# endif # if defined(OPENSSL_NO_MD2) && !defined(NO_MD2) # define NO_MD2 # endif @@ -77,6 +86,9 @@ extern "C" { # if defined(OPENSSL_NO_SCTP) && !defined(NO_SCTP) # define NO_SCTP # endif +# if defined(OPENSSL_NO_SSL_TRACE) && !defined(NO_SSL_TRACE) +# define NO_SSL_TRACE +# endif # if defined(OPENSSL_NO_STORE) && !defined(NO_STORE) # define NO_STORE # endif @@ -208,7 +220,7 @@ YOU SHOULD NOT HAVE BOTH DES_RISC1 AND DES_RISC2 DEFINED!!!!! even newer MIPS CPU's, but at the moment one size fits all for optimization options. Older Sparc's work better with only UNROLL, but there's no way to tell at compile time what it is you're running on */ - + #if defined( sun ) /* Newer Sparc's */ # define DES_PTR # define DES_RISC1 diff --git a/deps/openssl/config/archs/linux-x86_64/opensslconf.h b/deps/openssl/config/archs/linux-x86_64/opensslconf.h index 4acabe0e4609c5..68c38a25abbe8f 100644 --- a/deps/openssl/config/archs/linux-x86_64/opensslconf.h +++ b/deps/openssl/config/archs/linux-x86_64/opensslconf.h @@ -20,6 +20,9 @@ extern "C" { #ifndef OPENSSL_NO_KRB5 # define OPENSSL_NO_KRB5 #endif +#ifndef OPENSSL_NO_LIBUNBOUND +# define OPENSSL_NO_LIBUNBOUND +#endif #ifndef OPENSSL_NO_MD2 # define OPENSSL_NO_MD2 #endif @@ -32,6 +35,9 @@ extern "C" { #ifndef OPENSSL_NO_SCTP # define OPENSSL_NO_SCTP #endif +#ifndef OPENSSL_NO_SSL_TRACE +# define OPENSSL_NO_SSL_TRACE +#endif #ifndef OPENSSL_NO_STORE # define OPENSSL_NO_STORE #endif @@ -65,6 +71,9 @@ extern "C" { # if defined(OPENSSL_NO_KRB5) && !defined(NO_KRB5) # define NO_KRB5 # endif +# if defined(OPENSSL_NO_LIBUNBOUND) && !defined(NO_LIBUNBOUND) +# define NO_LIBUNBOUND +# endif # if defined(OPENSSL_NO_MD2) && !defined(NO_MD2) # define NO_MD2 # endif @@ -77,6 +86,9 @@ extern "C" { # if defined(OPENSSL_NO_SCTP) && !defined(NO_SCTP) # define NO_SCTP # endif +# if defined(OPENSSL_NO_SSL_TRACE) && !defined(NO_SSL_TRACE) +# define NO_SSL_TRACE +# endif # if defined(OPENSSL_NO_STORE) && !defined(NO_STORE) # define NO_STORE # endif @@ -208,7 +220,7 @@ YOU SHOULD NOT HAVE BOTH DES_RISC1 AND DES_RISC2 DEFINED!!!!! even newer MIPS CPU's, but at the moment one size fits all for optimization options. Older Sparc's work better with only UNROLL, but there's no way to tell at compile time what it is you're running on */ - + #if defined( sun ) /* Newer Sparc's */ # define DES_PTR # define DES_RISC1 diff --git a/deps/openssl/config/archs/solaris-x86-gcc/opensslconf.h b/deps/openssl/config/archs/solaris-x86-gcc/opensslconf.h index fb6363d56a364c..7d9b6d56beb7a0 100644 --- a/deps/openssl/config/archs/solaris-x86-gcc/opensslconf.h +++ b/deps/openssl/config/archs/solaris-x86-gcc/opensslconf.h @@ -20,6 +20,9 @@ extern "C" { #ifndef OPENSSL_NO_KRB5 # define OPENSSL_NO_KRB5 #endif +#ifndef OPENSSL_NO_LIBUNBOUND +# define OPENSSL_NO_LIBUNBOUND +#endif #ifndef OPENSSL_NO_MD2 # define OPENSSL_NO_MD2 #endif @@ -32,6 +35,9 @@ extern "C" { #ifndef OPENSSL_NO_SCTP # define OPENSSL_NO_SCTP #endif +#ifndef OPENSSL_NO_SSL_TRACE +# define OPENSSL_NO_SSL_TRACE +#endif #ifndef OPENSSL_NO_STORE # define OPENSSL_NO_STORE #endif @@ -65,6 +71,9 @@ extern "C" { # if defined(OPENSSL_NO_KRB5) && !defined(NO_KRB5) # define NO_KRB5 # endif +# if defined(OPENSSL_NO_LIBUNBOUND) && !defined(NO_LIBUNBOUND) +# define NO_LIBUNBOUND +# endif # if defined(OPENSSL_NO_MD2) && !defined(NO_MD2) # define NO_MD2 # endif @@ -77,6 +86,9 @@ extern "C" { # if defined(OPENSSL_NO_SCTP) && !defined(NO_SCTP) # define NO_SCTP # endif +# if defined(OPENSSL_NO_SSL_TRACE) && !defined(NO_SSL_TRACE) +# define NO_SSL_TRACE +# endif # if defined(OPENSSL_NO_STORE) && !defined(NO_STORE) # define NO_STORE # endif @@ -208,7 +220,7 @@ YOU SHOULD NOT HAVE BOTH DES_RISC1 AND DES_RISC2 DEFINED!!!!! even newer MIPS CPU's, but at the moment one size fits all for optimization options. Older Sparc's work better with only UNROLL, but there's no way to tell at compile time what it is you're running on */ - + #if defined( sun ) /* Newer Sparc's */ # define DES_PTR # define DES_RISC1 diff --git a/deps/openssl/config/archs/solaris64-x86_64-gcc/opensslconf.h b/deps/openssl/config/archs/solaris64-x86_64-gcc/opensslconf.h index 4acabe0e4609c5..68c38a25abbe8f 100644 --- a/deps/openssl/config/archs/solaris64-x86_64-gcc/opensslconf.h +++ b/deps/openssl/config/archs/solaris64-x86_64-gcc/opensslconf.h @@ -20,6 +20,9 @@ extern "C" { #ifndef OPENSSL_NO_KRB5 # define OPENSSL_NO_KRB5 #endif +#ifndef OPENSSL_NO_LIBUNBOUND +# define OPENSSL_NO_LIBUNBOUND +#endif #ifndef OPENSSL_NO_MD2 # define OPENSSL_NO_MD2 #endif @@ -32,6 +35,9 @@ extern "C" { #ifndef OPENSSL_NO_SCTP # define OPENSSL_NO_SCTP #endif +#ifndef OPENSSL_NO_SSL_TRACE +# define OPENSSL_NO_SSL_TRACE +#endif #ifndef OPENSSL_NO_STORE # define OPENSSL_NO_STORE #endif @@ -65,6 +71,9 @@ extern "C" { # if defined(OPENSSL_NO_KRB5) && !defined(NO_KRB5) # define NO_KRB5 # endif +# if defined(OPENSSL_NO_LIBUNBOUND) && !defined(NO_LIBUNBOUND) +# define NO_LIBUNBOUND +# endif # if defined(OPENSSL_NO_MD2) && !defined(NO_MD2) # define NO_MD2 # endif @@ -77,6 +86,9 @@ extern "C" { # if defined(OPENSSL_NO_SCTP) && !defined(NO_SCTP) # define NO_SCTP # endif +# if defined(OPENSSL_NO_SSL_TRACE) && !defined(NO_SSL_TRACE) +# define NO_SSL_TRACE +# endif # if defined(OPENSSL_NO_STORE) && !defined(NO_STORE) # define NO_STORE # endif @@ -208,7 +220,7 @@ YOU SHOULD NOT HAVE BOTH DES_RISC1 AND DES_RISC2 DEFINED!!!!! even newer MIPS CPU's, but at the moment one size fits all for optimization options. Older Sparc's work better with only UNROLL, but there's no way to tell at compile time what it is you're running on */ - + #if defined( sun ) /* Newer Sparc's */ # define DES_PTR # define DES_RISC1 diff --git a/deps/openssl/config/opensslconf.h b/deps/openssl/config/opensslconf.h index ad49a0f983838d..76efd0863e8bca 100644 --- a/deps/openssl/config/opensslconf.h +++ b/deps/openssl/config/opensslconf.h @@ -19,10 +19,10 @@ | --dest-os | --dest-cpu | OpenSSL target arch | CI | | --------- | ---------- | -------------------- | --- | | linux | ia32 | linux-elf | o | - | linux | x32 | patched linux-x86_64 | - | + | linux | x32 | linux-x32 | - | | linux | x64 | linux-x86_64 | o | | linux | arm | linux-armv4 | o | - | linux | arm64 | N/A | - | + | linux | arm64 | linux-aarch64 | o | | mac | ia32 | darwin-i386-cc | o | | mac | x64 | darwin64-x86-cc | o | | win | ia32 | VC-WIN32 | - | @@ -90,7 +90,7 @@ #elif defined(OPENSSL_LINUX) && defined(__arm__) # include "./archs/linux-armv4/opensslconf.h" #elif defined(OPENSSL_LINUX) && defined(__aarch64__) - /* Not Supported Yet */ +# include "./archs/linux-aarch64/opensslconf.h" #elif defined(__APPLE__) && defined(__MACH__) && defined(__i386__) # include "./archs/darwin-i386-cc/opensslconf.h" #elif defined(__APPLE__) && defined(__MACH__) && defined(__x86_64__) diff --git a/deps/openssl/doc/UPGRADING.md b/deps/openssl/doc/UPGRADING.md new file mode 100644 index 00000000000000..81f129c4326cb0 --- /dev/null +++ b/deps/openssl/doc/UPGRADING.md @@ -0,0 +1,191 @@ +## How to upgrade openssl library in io.js + +This document describes the procedure to upgrade openssl from 1.0.1m +to 1.0.2a in io.js. + + +### Build System and Upgrading Overview +The openssl build system is based on the `Configure` perl script in +`deps/openssl/openssl`. For example, running `Configure linux_x86-64` +in the openssl repository generates `Makefile` and `opensslconf.h` for +the linux_x86_64 target architecture. + +The `Makefile` contains the list of asm files which are generated by +perl scripts during build so that we can get the most of use of the +hardware performance according to the type of cpus. + +`Configure TABLE` shows various build parameters that depend on each +os and arch. + +In io.js, build target is defined as `--dest-os` and `--dest-cpu` in +configure options which are different from the one that is defined in +openssl and it's build system is gyp that is based on python, +therefore we cannot use the openssl build system directly. + +In order to build openssl with gyp in iojs, files of opensslconf.h and +asm are generated in advance for several supported platforms. + +Here is a map table to show conf(opensslconf.h) and asm between +the openssl target and configuration parameters of os and cpu in iojs. +The tested platform in CI are also listed. + +| --dest-os | --dest-cpu | conf | asm | openssl target | CI | +|---------- |----------- |----- |----- |------------------- |--- | +| linux | ia32 | o | o |linux-elf | o | +| linux | x32 | o | x(*2)|linux-x32 | x | +| linux | x64 | o | o |linux-x86_64 | o | +| linux | arm | o | o |linux-arm | o | +| linux | arm64 | o | o |linux-aarch64 | o | +| mac | ia32 | o | o |darwin-i386-cc | - | +| mac | x64 | o | o |darwin64-x86_64-cc | o | +| win | ia32 | o | o(*3)|VC-WIN32 | x | +| win | x64 | o | o |VC-WIN64A | o | +| solaris | ia32 | o | o |solaris-x86-gcc | o | +| solaris | x64 | o | o |solaris64-x86_64-gcc| o | +| freebsd | ia32 | o | o |BSD-x86 | o | +| freebsd | x64 | o | o |BSD-x86_64 | o | +| openbsd | ia32 | o | o |BSD-x86 | x | +| openbsd | x64 | o | o |BSD-x86_64 | x | +| others | ia32 | x(*1)| o | - | x | +| others | x64 | x(*1)| o | - | x | +| others | arm | x(*1)| o | - | x | +| others | arm64 | x(*1)| o | - | x | +| others | others | x(*1)| x(*2)| - | x | + +- (*1) use linux-elf as a fallback configuration +- (*2) no-asm used +- (*3) currently masm (Microsoft Macro Assembler) is used but it's no +longer supported in openssl. We need to move to use nasm or yasm. + +All parameters such as sources, defines, cflags and others generated +in openssl Makefile are written down into `deps/openssl/openssl.gypi`. + +The header file of `deps/openssl/openssl/crypto/opensslconf.h` are +generated by `Configure` and varies on each os and arch so that we +made a new `deps/openssl/config/opensslconf.h`, where it includes each +conf file from `deps/openssl/config/archs/*/opensslconf.h` by using +pre-defined compiler macros. This procedure can be processed +automatically with `deps/openssl/config/Makefile` + +Assembler support is one of the key features in openssl, but asm files +are dynamically generated with +`deps/openssl/openssl/crypto/*/asm/*.pl` by perl during +build. Furthermore, these perl scripts check the version of assembler +and generate asm files according to the supported instructions in each +compiler. + +Since perl is not a build requirement in iojs, they all should be +generated in advance and statically stored in the repository. We +provide two sets of asm files, one is asm_latest(avx2 and addx +supported) in `deps/openssl/asm` and the other asm_obsolete(without +avx1/2 and addx) in `deps/openssl/asm_obsolute`, which depends on +supported features in assemblers. Each directory has a `Makefile` +to generate asm files with perl scripts in openssl sources. + +`configure` and gyp check the version of assemblers such as gnu +as(gas), llvm and Visual Studio. `deps/openssl/openssl.gypi` +determines what asm files should be used, in which the asm_latest +needs the version of gas >= 2.23, llvm >= 3.3 or MSVS_VERSION>='2012' +(ml64 >= 12) as defined in +https://github.com/openssl/openssl/blob/OpenSSL_1_0_2-stable/crypto/sha/asm/sha512-x86_64.pl#L112-L129, +otherwise asm_obsolete are used. + +The following is the detail instruction steps how to upgrade openssl +version from 1.0.1m to 1.0.2a in iojs. + +### 1. Replace openssl source in `deps/openssl/openssl` +Remove old openssl sources in `deps/openssl/openssl` . +Get original openssl sources from +https://www.openssl.org/source/openssl-1.0.2a.tar.gz and extract all +files into `deps/openssl/openssl` . + +### 2. Apply private patches +There are three kinds of private patches to be applied in openssl-1.0.2a. + +- The two fixes of assembly error on ia32 win32. masm is no longer + supported in openssl. We should move to use nasm or yasm in future + version of iojs. + +- The fix of openssl-cli built on win. Key press requirement of + openssl-cli in win causes timeout failures of several tests. + +- Backport patches for alt cert feature from openssl-1.1.x. Root certs + of 1024bit RSA key length were deprecated in io.js. When a tls + server has a cross root cert, io.js client leads CERT_UNTRUSTED + error because openssl does not find alternate cert chains. This fix + supports its feature but was made the current master which is + openssl-1.1.x. We backported them privately into openssl-1.0.2 on + iojs. + +### 3. Replace openssl header files in `deps/openssl/openssl/include/openssl` +all header files in `deps/openssl/openssl/include/openssl/*.h` are +symbolic links in the distributed release tar.gz. They cause issues in +Windows. They are replaced into the files to include a real header +file such as +```` +#include "../../crypto/aes/aes.h" +```` +### 4. Change `opensslconf.h` so as to fit each platform. +The opensslconf.h in each target was created in advance by typing +`deps/openssl/openssl/Configure {target}` and copied +into `deps/openssl/conf/archs/{target}/opensslconf.h`. +`deps/openssl/conf/openssconf.h` includes each file according to its +target by checking pre-defined compiler macros. These can be generated +by using `deps/openssl/conf/Makefile` + +We should remove OPENSSL_CPUID_OBJ define in opensslconf.h because it +causes build error when --openss-no-asm option is specified. Instead, +the OPENSSL_CPUID_OBJ is defined in `deps/openssl/openssl.gypi` +according to the configure options. + +One fix of opensslconf.h is needed in 64-bit MacOS. +The current openssl release does not use RC4 asm since it explicitly +specified as `$asm=~s/rc4\-[^:]+//;` in +https://github.com/openssl/openssl/blob/OpenSSL_1_0_1-stable/Configure#L584 +But iojs has used RC4 asm on MacOS for long time. Fix type of RC4_INT +into `unsigned int` in opensslconf.h of darwin64-x86_64-cc to work on +the RC4 asm. + +### 5. Update openssl.gyp and openssl.gypi +Sources, cflags and define parameters that depends on each target can +be obtained via `Configure TABLE`. Its list is put in the table of +[define and cflags changes in openssl-1.0.2a](openssl_define_list.pdf) + +There is no way to verify all necessary sources automatically. We can +only carefully look at the source list and compiled objects in +Makefile of openssl and compare the compiled objects that stored +stored under `out/Release/obj.target/openssl/deps/openssl/' in iojs. + +### 6. ASM files for openssl +We provide two sets of asm files. One is for the latest assembler +and the other is the older one. + +### 6.1. asm files for the latest compiler +This was made in `deps/openssl/asm/Makefile` +- Updated asm files for each platforms which are required in + openssl-1.0.2a. +- Some perl files need CC and ASM envs. Added a check if these envs + exist. Followed asm files are to be generated with CC=gcc and + ASM=nasm on Linux. See + `deps/openssl/openssl/crypto/sha/asm/sha512-x86_64.pl` +- Added new 32bit targets/rules with a sse2 flag (OPENSSL_IA32_SSE2) + to generate asm for use SSE2. +- Generating sha512 asm files in x86_64 need output filename which + has 512. Added new rules so as not to use stdout for outputs. +- PERLASM_SCHEME of linux-armv4 is `void` as defined in openssl + Configure. Changed its target/rule and all directories are moved + from arm-elf-gas to arm-void-gas. +- add a new rule for armv8 asm generation + +With export environments of CC=gcc and ASM=nasm, then type make +command and check if new asm files are generated. + +### 6.2.asm files for the older compiler +For older assembler, the version check of CC and ASM should be +skipped in generating asm file with perl scripts. +Copy files from `deps/openssl/asm` into +`deps/openssl/asm/asm_obsolete` and change rules to generate asm files +into this directories and remove the check of CC and ASM envs. + +Without environments of CC and ASM, then type make command and check +if new asm files for older compilers are generated. diff --git a/deps/openssl/doc/openssl_define_list.pdf b/deps/openssl/doc/openssl_define_list.pdf new file mode 100644 index 00000000000000..efbc85ff01e1b2 Binary files /dev/null and b/deps/openssl/doc/openssl_define_list.pdf differ diff --git a/deps/openssl/openssl-cli.gypi b/deps/openssl/openssl-cli.gypi index c0d4a2a64493c6..1209d64e46dbc2 100644 --- a/deps/openssl/openssl-cli.gypi +++ b/deps/openssl/openssl-cli.gypi @@ -5,6 +5,7 @@ 'defines': [ 'MONOLITH' ], + 'includes': ['openssl.gypi'], 'sources': ['<@(openssl_cli_sources)'], 'conditions': [ ['OS=="solaris"', { diff --git a/deps/openssl/openssl.gyp b/deps/openssl/openssl.gyp index 01ac2c5e051401..5a3dc9b6c74210 100644 --- a/deps/openssl/openssl.gyp +++ b/deps/openssl/openssl.gyp @@ -6,13 +6,15 @@ 'variables': { 'is_clang': 0, 'gcc_version': 0, - 'openssl_no_asm%': 0 + 'openssl_no_asm%': 0, + 'llvm_version%': 0, + 'gas_version%': 0, }, - 'includes': ['openssl.gypi'], 'targets': [ { 'target_name': 'openssl', 'type': '<(library)', + 'includes': ['openssl.gypi'], 'sources': ['<@(openssl_sources)'], 'sources/': [ ['exclude', 'md2/.*$'], @@ -30,7 +32,7 @@ 'conditions': [ ['target_arch=="arm"', { 'defines': ['<@(openssl_defines_asm)'], - 'sources': ['<@(openssl_sources_arm_elf_gas)'], + 'sources': ['<@(openssl_sources_arm_void_gas)'], }, 'target_arch=="ia32" and OS=="mac"', { 'defines': [ '<@(openssl_defines_asm)', @@ -69,6 +71,9 @@ '<@(openssl_defines_x64_elf)', ], 'sources': ['<@(openssl_sources_x64_elf_gas)'], + }, 'target_arch=="arm64"', { + 'defines': ['<@(openssl_defines_arm64)',], + 'sources': ['<@(openssl_sources_arm64_linux64_gas)'], }, { # Other architectures don't use assembly. 'defines': ['OPENSSL_NO_ASM'], @@ -96,6 +101,7 @@ } ], 'target_defaults': { + 'includes': ['openssl.gypi'], 'include_dirs': ['<@(openssl_default_include_dirs)'], 'defines': ['<@(openssl_default_defines_all)'], 'conditions': [ diff --git a/deps/openssl/openssl.gypi b/deps/openssl/openssl.gypi index faad91331a4f90..840b479e05d634 100644 --- a/deps/openssl/openssl.gypi +++ b/deps/openssl/openssl.gypi @@ -4,7 +4,6 @@ 'openssl/ssl/bio_ssl.c', 'openssl/ssl/d1_both.c', 'openssl/ssl/d1_clnt.c', - 'openssl/ssl/d1_enc.c', 'openssl/ssl/d1_lib.c', 'openssl/ssl/d1_meth.c', 'openssl/ssl/d1_pkt.c', @@ -23,17 +22,18 @@ 'openssl/ssl/s2_pkt.c', 'openssl/ssl/s2_srvr.c', 'openssl/ssl/s3_both.c', + 'openssl/ssl/s3_cbc.c', 'openssl/ssl/s3_clnt.c', 'openssl/ssl/s3_enc.c', 'openssl/ssl/s3_lib.c', 'openssl/ssl/s3_meth.c', 'openssl/ssl/s3_pkt.c', 'openssl/ssl/s3_srvr.c', - 'openssl/ssl/s3_cbc.c', 'openssl/ssl/ssl_algs.c', 'openssl/ssl/ssl_asn1.c', 'openssl/ssl/ssl_cert.c', 'openssl/ssl/ssl_ciph.c', + 'openssl/ssl/ssl_conf.c', 'openssl/ssl/ssl_err.c', 'openssl/ssl/ssl_err2.c', 'openssl/ssl/ssl_lib.c', @@ -41,12 +41,15 @@ 'openssl/ssl/ssl_sess.c', 'openssl/ssl/ssl_stat.c', 'openssl/ssl/ssl_txt.c', + 'openssl/ssl/ssl_utst.c', 'openssl/ssl/t1_clnt.c', 'openssl/ssl/t1_enc.c', + 'openssl/ssl/t1_ext.c', 'openssl/ssl/t1_lib.c', 'openssl/ssl/t1_meth.c', 'openssl/ssl/t1_reneg.c', 'openssl/ssl/t1_srvr.c', + 'openssl/ssl/t1_trce.c', 'openssl/ssl/tls_srp.c', 'openssl/crypto/aes/aes_cfb.c', 'openssl/crypto/aes/aes_ctr.c', @@ -206,6 +209,7 @@ 'openssl/crypto/cms/cms_err.c', 'openssl/crypto/cms/cms_ess.c', 'openssl/crypto/cms/cms_io.c', + 'openssl/crypto/cms/cms_kari.c', 'openssl/crypto/cms/cms_lib.c', 'openssl/crypto/cms/cms_pwri.c', 'openssl/crypto/cms/cms_sd.c', @@ -254,10 +258,12 @@ 'openssl/crypto/dh/dh_depr.c', 'openssl/crypto/dh/dh_err.c', 'openssl/crypto/dh/dh_gen.c', + 'openssl/crypto/dh/dh_kdf.c', 'openssl/crypto/dh/dh_key.c', 'openssl/crypto/dh/dh_lib.c', 'openssl/crypto/dh/dh_pmeth.c', 'openssl/crypto/dh/dh_prn.c', + 'openssl/crypto/dh/dh_rfc5114.c', 'openssl/crypto/dsa/dsa_ameth.c', 'openssl/crypto/dsa/dsa_asn1.c', 'openssl/crypto/dsa/dsa_depr.c', @@ -305,6 +311,7 @@ 'openssl/crypto/ec/ecp_oct.c', 'openssl/crypto/ec/ecp_smpl.c', 'openssl/crypto/ecdh/ech_err.c', + 'openssl/crypto/ecdh/ech_kdf.c', 'openssl/crypto/ecdh/ech_key.c', 'openssl/crypto/ecdh/ech_lib.c', 'openssl/crypto/ecdh/ech_ossl.c', @@ -327,7 +334,6 @@ 'openssl/crypto/engine/eng_openssl.c', 'openssl/crypto/engine/eng_pkey.c', 'openssl/crypto/engine/eng_rdrand.c', - 'openssl/crypto/engine/eng_rsax.c', 'openssl/crypto/engine/eng_table.c', 'openssl/crypto/engine/tb_asnmth.c', 'openssl/crypto/engine/tb_cipher.c', @@ -353,6 +359,7 @@ 'openssl/crypto/evp/digest.c', 'openssl/crypto/evp/e_aes.c', 'openssl/crypto/evp/e_aes_cbc_hmac_sha1.c', + 'openssl/crypto/evp/e_aes_cbc_hmac_sha256.c', 'openssl/crypto/evp/e_bf.c', 'openssl/crypto/evp/e_camellia.c', 'openssl/crypto/evp/e_cast.c', @@ -372,7 +379,6 @@ 'openssl/crypto/evp/evp_cnf.c', 'openssl/crypto/evp/evp_enc.c', 'openssl/crypto/evp/evp_err.c', - 'openssl/crypto/evp/evp_fips.c', 'openssl/crypto/evp/evp_key.c', 'openssl/crypto/evp/evp_lib.c', 'openssl/crypto/evp/evp_pbe.c', @@ -416,8 +422,6 @@ 'openssl/crypto/krb5/krb5_asn.c', 'openssl/crypto/lhash/lh_stats.c', 'openssl/crypto/lhash/lhash.c', - 'openssl/crypto/md2/md2_dgst.c', - 'openssl/crypto/md2/md2_one.c', 'openssl/crypto/md4/md4_dgst.c', 'openssl/crypto/md4/md4_one.c', 'openssl/crypto/md5/md5_dgst.c', @@ -433,6 +437,7 @@ 'openssl/crypto/modes/cts128.c', 'openssl/crypto/modes/gcm128.c', 'openssl/crypto/modes/ofb128.c', + 'openssl/crypto/modes/wrap128.c', 'openssl/crypto/modes/xts128.c', 'openssl/crypto/o_dir.c', 'openssl/crypto/o_fips.c', @@ -540,10 +545,6 @@ 'openssl/crypto/srp/srp_lib.c', 'openssl/crypto/srp/srp_vfy.c', 'openssl/crypto/stack/stack.c', - 'openssl/crypto/store/str_err.c', - 'openssl/crypto/store/str_lib.c', - 'openssl/crypto/store/str_mem.c', - 'openssl/crypto/store/str_meth.c', 'openssl/crypto/ts/ts_asn1.c', 'openssl/crypto/ts/ts_conf.c', 'openssl/crypto/ts/ts_err.c', @@ -556,7 +557,6 @@ 'openssl/crypto/ts/ts_rsp_verify.c', 'openssl/crypto/ts/ts_verify_ctx.c', 'openssl/crypto/txt_db/txt_db.c', - 'openssl/crypto/ui/ui_compat.c', 'openssl/crypto/ui/ui_err.c', 'openssl/crypto/ui/ui_lib.c', 'openssl/crypto/ui/ui_openssl.c', @@ -619,6 +619,7 @@ 'openssl/crypto/x509v3/v3_pmaps.c', 'openssl/crypto/x509v3/v3_prn.c', 'openssl/crypto/x509v3/v3_purp.c', + 'openssl/crypto/x509v3/v3_scts.c', 'openssl/crypto/x509v3/v3_skey.c', 'openssl/crypto/x509v3/v3_sxnet.c', 'openssl/crypto/x509v3/v3_utl.c', @@ -632,7 +633,7 @@ 'openssl/engines/e_gmp.c', 'openssl/engines/e_nuron.c', 'openssl/engines/e_sureware.c', - 'openssl/engines/e_ubsec.c' + 'openssl/engines/e_ubsec.c', ], 'openssl_sources_no_asm': [ 'openssl/crypto/aes/aes_cbc.c', @@ -650,20 +651,21 @@ 'openssl/crypto/rc4/rc4_skey.c', 'openssl/crypto/whrlpool/wp_block.c' ], - 'openssl_sources_ia32_elf_gas': [ + 'openssl_sources_asm_latest_ia32_elf_gas': [ 'asm/x86-elf-gas/aes/aes-586.s', 'asm/x86-elf-gas/aes/aesni-x86.s', 'asm/x86-elf-gas/aes/vpaes-x86.s', - 'asm/x86-elf-gas/bf/bf-686.s', + 'asm/x86-elf-gas/bf/bf-586.s', + 'asm/x86-elf-gas/bn/bn-586.s', + 'asm/x86-elf-gas/bn/co-586.s', 'asm/x86-elf-gas/bn/x86-mont.s', - 'asm/x86-elf-gas/bn/x86.s', + 'asm/x86-elf-gas/bn/x86-gf2m.s', 'asm/x86-elf-gas/camellia/cmll-x86.s', 'asm/x86-elf-gas/cast/cast-586.s', 'asm/x86-elf-gas/des/crypt586.s', 'asm/x86-elf-gas/des/des-586.s', 'asm/x86-elf-gas/md5/md5-586.s', 'asm/x86-elf-gas/rc4/rc4-586.s', - 'asm/x86-elf-gas/rc5/rc5-586.s', 'asm/x86-elf-gas/ripemd/rmd-586.s', 'asm/x86-elf-gas/sha/sha1-586.s', 'asm/x86-elf-gas/sha/sha256-586.s', @@ -671,51 +673,117 @@ 'asm/x86-elf-gas/whrlpool/wp-mmx.s', 'asm/x86-elf-gas/modes/ghash-x86.s', 'asm/x86-elf-gas/x86cpuid.s', + ], + 'openssl_sources_asm_obsolete_ia32_elf_gas': [ + 'asm_obsolete/x86-elf-gas/aes/aes-586.s', + 'asm_obsolete/x86-elf-gas/aes/aesni-x86.s', + 'asm_obsolete/x86-elf-gas/aes/vpaes-x86.s', + 'asm_obsolete/x86-elf-gas/bf/bf-586.s', + 'asm_obsolete/x86-elf-gas/bn/bn-586.s', + 'asm_obsolete/x86-elf-gas/bn/co-586.s', + 'asm_obsolete/x86-elf-gas/bn/x86-mont.s', + 'asm_obsolete/x86-elf-gas/bn/x86-gf2m.s', + 'asm_obsolete/x86-elf-gas/camellia/cmll-x86.s', + 'asm_obsolete/x86-elf-gas/cast/cast-586.s', + 'asm_obsolete/x86-elf-gas/des/crypt586.s', + 'asm_obsolete/x86-elf-gas/des/des-586.s', + 'asm_obsolete/x86-elf-gas/md5/md5-586.s', + 'asm_obsolete/x86-elf-gas/rc4/rc4-586.s', + 'asm_obsolete/x86-elf-gas/ripemd/rmd-586.s', + 'asm_obsolete/x86-elf-gas/sha/sha1-586.s', + 'asm_obsolete/x86-elf-gas/sha/sha256-586.s', + 'asm_obsolete/x86-elf-gas/sha/sha512-586.s', + 'asm_obsolete/x86-elf-gas/whrlpool/wp-mmx.s', + 'asm_obsolete/x86-elf-gas/modes/ghash-x86.s', + 'asm_obsolete/x86-elf-gas/x86cpuid.s', + ], + 'openssl_sources_common_ia32': [ 'openssl/crypto/whrlpool/wp_block.c' ], - 'openssl_sources_x64_elf_gas': [ + 'openssl_sources_asm_latest_x64_elf_gas': [ 'asm/x64-elf-gas/aes/aes-x86_64.s', + 'asm/x64-elf-gas/aes/aesni-mb-x86_64.s', + 'asm/x64-elf-gas/aes/aesni-sha256-x86_64.s', 'asm/x64-elf-gas/aes/aesni-x86_64.s', 'asm/x64-elf-gas/aes/vpaes-x86_64.s', 'asm/x64-elf-gas/aes/bsaes-x86_64.s', 'asm/x64-elf-gas/aes/aesni-sha1-x86_64.s', - 'asm/x64-elf-gas/bn/modexp512-x86_64.s', + 'asm/x64-elf-gas/bn/rsaz-avx2.s', + 'asm/x64-elf-gas/bn/rsaz-x86_64.s', 'asm/x64-elf-gas/bn/x86_64-mont.s', 'asm/x64-elf-gas/bn/x86_64-mont5.s', 'asm/x64-elf-gas/bn/x86_64-gf2m.s', 'asm/x64-elf-gas/camellia/cmll-x86_64.s', + 'asm/x64-elf-gas/ec/ecp_nistz256-x86_64.s', 'asm/x64-elf-gas/md5/md5-x86_64.s', 'asm/x64-elf-gas/rc4/rc4-x86_64.s', 'asm/x64-elf-gas/rc4/rc4-md5-x86_64.s', + 'asm/x64-elf-gas/sha/sha1-mb-x86_64.s', 'asm/x64-elf-gas/sha/sha1-x86_64.s', + 'asm/x64-elf-gas/sha/sha256-mb-x86_64.s', 'asm/x64-elf-gas/sha/sha256-x86_64.s', 'asm/x64-elf-gas/sha/sha512-x86_64.s', 'asm/x64-elf-gas/whrlpool/wp-x86_64.s', + 'asm/x64-elf-gas/modes/aesni-gcm-x86_64.s', 'asm/x64-elf-gas/modes/ghash-x86_64.s', 'asm/x64-elf-gas/x86_64cpuid.s', + ], + 'openssl_sources_asm_obsolete_x64_elf_gas': [ + 'asm_obsolete/x64-elf-gas/aes/aes-x86_64.s', + 'asm_obsolete/x64-elf-gas/aes/aesni-mb-x86_64.s', + 'asm_obsolete/x64-elf-gas/aes/aesni-sha256-x86_64.s', + 'asm_obsolete/x64-elf-gas/aes/aesni-x86_64.s', + 'asm_obsolete/x64-elf-gas/aes/vpaes-x86_64.s', + 'asm_obsolete/x64-elf-gas/aes/bsaes-x86_64.s', + 'asm_obsolete/x64-elf-gas/aes/aesni-sha1-x86_64.s', + 'asm_obsolete/x64-elf-gas/bn/rsaz-avx2.s', + 'asm_obsolete/x64-elf-gas/bn/rsaz-x86_64.s', + 'asm_obsolete/x64-elf-gas/bn/x86_64-mont.s', + 'asm_obsolete/x64-elf-gas/bn/x86_64-mont5.s', + 'asm_obsolete/x64-elf-gas/bn/x86_64-gf2m.s', + 'asm_obsolete/x64-elf-gas/camellia/cmll-x86_64.s', + 'asm_obsolete/x64-elf-gas/ec/ecp_nistz256-x86_64.s', + 'asm_obsolete/x64-elf-gas/md5/md5-x86_64.s', + 'asm_obsolete/x64-elf-gas/rc4/rc4-x86_64.s', + 'asm_obsolete/x64-elf-gas/rc4/rc4-md5-x86_64.s', + 'asm_obsolete/x64-elf-gas/sha/sha1-mb-x86_64.s', + 'asm_obsolete/x64-elf-gas/sha/sha1-x86_64.s', + 'asm_obsolete/x64-elf-gas/sha/sha256-mb-x86_64.s', + 'asm_obsolete/x64-elf-gas/sha/sha256-x86_64.s', + 'asm_obsolete/x64-elf-gas/sha/sha512-x86_64.s', + 'asm_obsolete/x64-elf-gas/whrlpool/wp-x86_64.s', + 'asm_obsolete/x64-elf-gas/modes/aesni-gcm-x86_64.s', + 'asm_obsolete/x64-elf-gas/modes/ghash-x86_64.s', + 'asm_obsolete/x64-elf-gas/x86_64cpuid.s', + ], + 'openssl_sources_common_x64_elf_gas': [ # Non-generated asm 'openssl/crypto/bn/asm/x86_64-gcc.c', # No asm available 'openssl/crypto/bf/bf_enc.c', + 'openssl/crypto/bn/rsaz_exp.c', 'openssl/crypto/cast/c_enc.c', 'openssl/crypto/camellia/cmll_misc.c', 'openssl/crypto/des/des_enc.c', - 'openssl/crypto/des/fcrypt_b.c' + 'openssl/crypto/des/fcrypt_b.c', + 'openssl/crypto/ec/ecp_nistz256.c', + 'openssl/crypto/ui/ui_compat.c' ], - 'openssl_sources_ia32_mac_gas': [ + 'openssl_sources_asm_latest_ia32_mac_gas': [ 'asm/x86-macosx-gas/aes/aes-586.s', 'asm/x86-macosx-gas/aes/aesni-x86.s', 'asm/x86-macosx-gas/aes/vpaes-x86.s', - 'asm/x86-macosx-gas/bf/bf-686.s', + 'asm/x86-macosx-gas/bf/bf-586.s', + 'asm/x86-macosx-gas/bn/bn-586.s', + 'asm/x86-macosx-gas/bn/co-586.s', 'asm/x86-macosx-gas/bn/x86-mont.s', - 'asm/x86-macosx-gas/bn/x86.s', + 'asm/x86-macosx-gas/bn/x86-gf2m.s', 'asm/x86-macosx-gas/camellia/cmll-x86.s', 'asm/x86-macosx-gas/cast/cast-586.s', 'asm/x86-macosx-gas/des/crypt586.s', 'asm/x86-macosx-gas/des/des-586.s', 'asm/x86-macosx-gas/md5/md5-586.s', 'asm/x86-macosx-gas/rc4/rc4-586.s', - 'asm/x86-macosx-gas/rc5/rc5-586.s', 'asm/x86-macosx-gas/ripemd/rmd-586.s', 'asm/x86-macosx-gas/sha/sha1-586.s', 'asm/x86-macosx-gas/sha/sha256-586.s', @@ -723,45 +791,108 @@ 'asm/x86-macosx-gas/whrlpool/wp-mmx.s', 'asm/x86-macosx-gas/modes/ghash-x86.s', 'asm/x86-macosx-gas/x86cpuid.s', - 'openssl/crypto/whrlpool/wp_block.c' ], - 'openssl_sources_x64_mac_gas': [ + 'openssl_sources_asm_obsolete_ia32_mac_gas': [ + 'asm_obsolete/x86-macosx-gas/aes/aes-586.s', + 'asm_obsolete/x86-macosx-gas/aes/aesni-x86.s', + 'asm_obsolete/x86-macosx-gas/aes/vpaes-x86.s', + 'asm_obsolete/x86-macosx-gas/bf/bf-586.s', + 'asm_obsolete/x86-macosx-gas/bn/bn-586.s', + 'asm_obsolete/x86-macosx-gas/bn/co-586.s', + 'asm_obsolete/x86-macosx-gas/bn/x86-mont.s', + 'asm_obsolete/x86-macosx-gas/bn/x86-gf2m.s', + 'asm_obsolete/x86-macosx-gas/camellia/cmll-x86.s', + 'asm_obsolete/x86-macosx-gas/cast/cast-586.s', + 'asm_obsolete/x86-macosx-gas/des/crypt586.s', + 'asm_obsolete/x86-macosx-gas/des/des-586.s', + 'asm_obsolete/x86-macosx-gas/md5/md5-586.s', + 'asm_obsolete/x86-macosx-gas/rc4/rc4-586.s', + 'asm_obsolete/x86-macosx-gas/ripemd/rmd-586.s', + 'asm_obsolete/x86-macosx-gas/sha/sha1-586.s', + 'asm_obsolete/x86-macosx-gas/sha/sha256-586.s', + 'asm_obsolete/x86-macosx-gas/sha/sha512-586.s', + 'asm_obsolete/x86-macosx-gas/whrlpool/wp-mmx.s', + 'asm_obsolete/x86-macosx-gas/modes/ghash-x86.s', + 'asm_obsolete/x86-macosx-gas/x86cpuid.s', + ], + 'openssl_sources_asm_latest_x64_mac_gas': [ 'asm/x64-macosx-gas/aes/aes-x86_64.s', 'asm/x64-macosx-gas/aes/aesni-x86_64.s', 'asm/x64-macosx-gas/aes/vpaes-x86_64.s', + 'asm/x64-macosx-gas/aes/aesni-mb-x86_64.s', + 'asm/x64-macosx-gas/aes/aesni-sha256-x86_64.s', 'asm/x64-macosx-gas/aes/bsaes-x86_64.s', 'asm/x64-macosx-gas/aes/aesni-sha1-x86_64.s', - 'asm/x64-macosx-gas/bn/modexp512-x86_64.s', + 'asm/x64-macosx-gas/bn/rsaz-avx2.s', + 'asm/x64-macosx-gas/bn/rsaz-x86_64.s', 'asm/x64-macosx-gas/bn/x86_64-mont.s', 'asm/x64-macosx-gas/bn/x86_64-mont5.s', 'asm/x64-macosx-gas/bn/x86_64-gf2m.s', 'asm/x64-macosx-gas/camellia/cmll-x86_64.s', + 'asm/x64-macosx-gas/ec/ecp_nistz256-x86_64.s', 'asm/x64-macosx-gas/md5/md5-x86_64.s', - 'asm/x64-macosx-gas/rc4/rc4-x86_64.s', - 'asm/x64-macosx-gas/rc4/rc4-md5-x86_64.s', + 'asm/x64-macosx-gas/sha/sha1-mb-x86_64.s', 'asm/x64-macosx-gas/sha/sha1-x86_64.s', + 'asm/x64-macosx-gas/sha/sha256-mb-x86_64.s', 'asm/x64-macosx-gas/sha/sha256-x86_64.s', 'asm/x64-macosx-gas/sha/sha512-x86_64.s', 'asm/x64-macosx-gas/whrlpool/wp-x86_64.s', + 'asm/x64-macosx-gas/modes/aesni-gcm-x86_64.s', 'asm/x64-macosx-gas/modes/ghash-x86_64.s', 'asm/x64-macosx-gas/x86_64cpuid.s', + ], + 'openssl_sources_asm_obsolete_x64_mac_gas': [ + 'asm_obsolete/x64-macosx-gas/aes/aes-x86_64.s', + 'asm_obsolete/x64-macosx-gas/aes/aesni-x86_64.s', + 'asm_obsolete/x64-macosx-gas/aes/vpaes-x86_64.s', + 'asm_obsolete/x64-macosx-gas/aes/aesni-mb-x86_64.s', + 'asm_obsolete/x64-macosx-gas/aes/aesni-sha256-x86_64.s', + 'asm_obsolete/x64-macosx-gas/aes/bsaes-x86_64.s', + 'asm_obsolete/x64-macosx-gas/aes/aesni-sha1-x86_64.s', + 'asm_obsolete/x64-macosx-gas/bn/rsaz-avx2.s', + 'asm_obsolete/x64-macosx-gas/bn/rsaz-x86_64.s', + 'asm_obsolete/x64-macosx-gas/bn/x86_64-mont.s', + 'asm_obsolete/x64-macosx-gas/bn/x86_64-mont5.s', + 'asm_obsolete/x64-macosx-gas/bn/x86_64-gf2m.s', + 'asm_obsolete/x64-macosx-gas/camellia/cmll-x86_64.s', + 'asm_obsolete/x64-macosx-gas/ec/ecp_nistz256-x86_64.s', + 'asm_obsolete/x64-macosx-gas/md5/md5-x86_64.s', + 'asm_obsolete/x64-macosx-gas/sha/sha1-mb-x86_64.s', + 'asm_obsolete/x64-macosx-gas/sha/sha1-x86_64.s', + 'asm_obsolete/x64-macosx-gas/sha/sha256-mb-x86_64.s', + 'asm_obsolete/x64-macosx-gas/sha/sha256-x86_64.s', + 'asm_obsolete/x64-macosx-gas/sha/sha512-x86_64.s', + 'asm_obsolete/x64-macosx-gas/whrlpool/wp-x86_64.s', + 'asm_obsolete/x64-macosx-gas/modes/aesni-gcm-x86_64.s', + 'asm_obsolete/x64-macosx-gas/modes/ghash-x86_64.s', + 'asm_obsolete/x64-macosx-gas/x86_64cpuid.s', + ], + 'openssl_sources_common_x64_mac_gas': [ # Non-generated asm 'openssl/crypto/bn/asm/x86_64-gcc.c', # No asm available 'openssl/crypto/bf/bf_enc.c', + 'openssl/crypto/bn/rsaz_exp.c', 'openssl/crypto/cast/c_enc.c', 'openssl/crypto/camellia/cmll_misc.c', 'openssl/crypto/des/des_enc.c', - 'openssl/crypto/des/fcrypt_b.c' + 'openssl/crypto/des/fcrypt_b.c', + 'openssl/crypto/ec/ecp_nistz256.c', + 'openssl/crypto/ui/ui_compat.c', + 'openssl/crypto/rc4/rc4_skey.c', + 'openssl/crypto/rc4/rc4_enc.c', ], - 'openssl_sources_arm_elf_gas': [ - 'asm/arm-elf-gas/aes/aes-armv4.s', - 'asm/arm-elf-gas/bn/armv4-mont.s', - 'asm/arm-elf-gas/bn/armv4-gf2m.s', - 'asm/arm-elf-gas/sha/sha1-armv4-large.s', - 'asm/arm-elf-gas/sha/sha512-armv4.s', - 'asm/arm-elf-gas/sha/sha256-armv4.s', - 'asm/arm-elf-gas/modes/ghash-armv4.s', + 'openssl_sources_arm_void_gas': [ + 'asm/arm-void-gas/aes/aes-armv4.S', + 'asm/arm-void-gas/aes/bsaes-armv7.S', + 'asm/arm-void-gas/aes/aesv8-armx.S', + 'asm/arm-void-gas/bn/armv4-mont.S', + 'asm/arm-void-gas/bn/armv4-gf2m.S', + 'asm/arm-void-gas/sha/sha1-armv4-large.S', + 'asm/arm-void-gas/sha/sha512-armv4.S', + 'asm/arm-void-gas/sha/sha256-armv4.S', + 'asm/arm-void-gas/modes/ghash-armv4.S', + 'asm/arm-void-gas/modes/ghashv8-armx.S', # No asm available 'openssl/crypto/aes/aes_cbc.c', 'openssl/crypto/bf/bf_enc.c', @@ -774,25 +905,52 @@ 'openssl/crypto/des/fcrypt_b.c', 'openssl/crypto/rc4/rc4_enc.c', 'openssl/crypto/rc4/rc4_skey.c', + 'openssl/crypto/ui/ui_compat.c', 'openssl/crypto/whrlpool/wp_block.c', # PCAP stuff 'openssl/crypto/armcap.c', 'openssl/crypto/armv4cpuid.S', ], - 'openssl_sources_ia32_win_masm': [ + 'openssl_sources_arm64_linux64_gas': [ + 'asm/arm64-linux64-gas/aes/aesv8-armx.S', + 'asm/arm64-linux64-gas/modes/ghashv8-armx.S', + 'asm/arm64-linux64-gas/sha/sha1-armv8.S', + 'asm/arm64-linux64-gas/sha/sha256-armv8.S', + 'asm/arm64-linux64-gas/sha/sha512-armv8.S', + # No asm available + 'openssl/crypto/aes/aes_core.c', + 'openssl/crypto/aes/aes_cbc.c', + 'openssl/crypto/bn/bn_asm.c', + 'openssl/crypto/bf/bf_enc.c', + 'openssl/crypto/cast/c_enc.c', + 'openssl/crypto/camellia/camellia.c', + 'openssl/crypto/camellia/cmll_cbc.c', + 'openssl/crypto/camellia/cmll_misc.c', + 'openssl/crypto/des/des_enc.c', + 'openssl/crypto/des/fcrypt_b.c', + 'openssl/crypto/rc4/rc4_enc.c', + 'openssl/crypto/rc4/rc4_skey.c', + 'openssl/crypto/whrlpool/wp_block.c', + 'openssl/crypto/mem_clr.c', + # PCAP stuff + 'openssl/crypto/armcap.c', + 'openssl/crypto/arm64cpuid.S', + ], + 'openssl_sources_asm_ia32_win_masm': [ 'asm/x86-win32-masm/aes/aes-586.asm', 'asm/x86-win32-masm/aes/aesni-x86.asm', 'asm/x86-win32-masm/aes/vpaes-x86.asm', - 'asm/x86-win32-masm/bf/bf-686.asm', + 'asm/x86-win32-masm/bf/bf-586.asm', + 'asm/x86-win32-masm/bn/bn-586.asm', + 'asm/x86-win32-masm/bn/co-586.asm', 'asm/x86-win32-masm/bn/x86-mont.asm', - 'asm/x86-win32-masm/bn/x86.asm', + 'asm/x86-win32-masm/bn/x86-gf2m.asm', 'asm/x86-win32-masm/camellia/cmll-x86.asm', 'asm/x86-win32-masm/cast/cast-586.asm', 'asm/x86-win32-masm/des/crypt586.asm', 'asm/x86-win32-masm/des/des-586.asm', 'asm/x86-win32-masm/md5/md5-586.asm', 'asm/x86-win32-masm/rc4/rc4-586.asm', - 'asm/x86-win32-masm/rc5/rc5-586.asm', 'asm/x86-win32-masm/ripemd/rmd-586.asm', 'asm/x86-win32-masm/sha/sha1-586.asm', 'asm/x86-win32-masm/sha/sha256-586.asm', @@ -800,35 +958,131 @@ 'asm/x86-win32-masm/whrlpool/wp-mmx.asm', 'asm/x86-win32-masm/modes/ghash-x86.asm', 'asm/x86-win32-masm/x86cpuid.asm', - 'openssl/crypto/whrlpool/wp_block.c' ], - 'openssl_sources_x64_win_masm': [ + 'openssl_sources_asm_latest_x64_win_masm': [ 'asm/x64-win32-masm/aes/aes-x86_64.asm', 'asm/x64-win32-masm/aes/aesni-x86_64.asm', + 'asm/x64-win32-masm/aes/aesni-mb-x86_64.asm', + 'asm/x64-win32-masm/aes/aesni-sha256-x86_64.asm', 'asm/x64-win32-masm/aes/vpaes-x86_64.asm', 'asm/x64-win32-masm/aes/bsaes-x86_64.asm', 'asm/x64-win32-masm/aes/aesni-sha1-x86_64.asm', - 'asm/x64-win32-masm/bn/modexp512-x86_64.asm', + 'asm/x64-win32-masm/bn/rsaz-avx2.asm', + 'asm/x64-win32-masm/bn/rsaz-x86_64.asm', 'asm/x64-win32-masm/bn/x86_64-mont.asm', 'asm/x64-win32-masm/bn/x86_64-mont5.asm', 'asm/x64-win32-masm/bn/x86_64-gf2m.asm', 'asm/x64-win32-masm/camellia/cmll-x86_64.asm', + 'asm/x64-win32-masm/ec/ecp_nistz256-x86_64.asm', 'asm/x64-win32-masm/md5/md5-x86_64.asm', 'asm/x64-win32-masm/rc4/rc4-x86_64.asm', 'asm/x64-win32-masm/rc4/rc4-md5-x86_64.asm', + 'asm/x64-win32-masm/sha/sha1-mb-x86_64.asm', 'asm/x64-win32-masm/sha/sha1-x86_64.asm', + 'asm/x64-win32-masm/sha/sha256-mb-x86_64.asm', 'asm/x64-win32-masm/sha/sha256-x86_64.asm', 'asm/x64-win32-masm/sha/sha512-x86_64.asm', 'asm/x64-win32-masm/whrlpool/wp-x86_64.asm', + 'asm/x64-win32-masm/modes/aesni-gcm-x86_64.asm', 'asm/x64-win32-masm/modes/ghash-x86_64.asm', 'asm/x64-win32-masm/x86_64cpuid.asm', + ], + 'openssl_sources_asm_obsolete_x64_win_masm': [ + 'asm_obsolete/x64-win32-masm/aes/aes-x86_64.asm', + 'asm_obsolete/x64-win32-masm/aes/aesni-x86_64.asm', + 'asm_obsolete/x64-win32-masm/aes/aesni-mb-x86_64.asm', + 'asm_obsolete/x64-win32-masm/aes/aesni-sha256-x86_64.asm', + 'asm_obsolete/x64-win32-masm/aes/vpaes-x86_64.asm', + 'asm_obsolete/x64-win32-masm/aes/bsaes-x86_64.asm', + 'asm_obsolete/x64-win32-masm/aes/aesni-sha1-x86_64.asm', + 'asm_obsolete/x64-win32-masm/bn/rsaz-avx2.asm', + 'asm_obsolete/x64-win32-masm/bn/rsaz-x86_64.asm', + 'asm_obsolete/x64-win32-masm/bn/x86_64-mont.asm', + 'asm_obsolete/x64-win32-masm/bn/x86_64-mont5.asm', + 'asm_obsolete/x64-win32-masm/bn/x86_64-gf2m.asm', + 'asm_obsolete/x64-win32-masm/camellia/cmll-x86_64.asm', + 'asm_obsolete/x64-win32-masm/ec/ecp_nistz256-x86_64.asm', + 'asm_obsolete/x64-win32-masm/md5/md5-x86_64.asm', + 'asm_obsolete/x64-win32-masm/rc4/rc4-x86_64.asm', + 'asm_obsolete/x64-win32-masm/rc4/rc4-md5-x86_64.asm', + 'asm_obsolete/x64-win32-masm/sha/sha1-mb-x86_64.asm', + 'asm_obsolete/x64-win32-masm/sha/sha1-x86_64.asm', + 'asm_obsolete/x64-win32-masm/sha/sha256-mb-x86_64.asm', + 'asm_obsolete/x64-win32-masm/sha/sha256-x86_64.asm', + 'asm_obsolete/x64-win32-masm/sha/sha512-x86_64.asm', + 'asm_obsolete/x64-win32-masm/whrlpool/wp-x86_64.asm', + 'asm_obsolete/x64-win32-masm/modes/aesni-gcm-x86_64.asm', + 'asm_obsolete/x64-win32-masm/modes/ghash-x86_64.asm', + 'asm_obsolete/x64-win32-masm/x86_64cpuid.asm', + ], + 'openssl_sources_common_x64_win_masm': [ # No asm available 'openssl/crypto/bn/bn_asm.c', 'openssl/crypto/bf/bf_enc.c', + 'openssl/crypto/bn/rsaz_exp.c', 'openssl/crypto/cast/c_enc.c', 'openssl/crypto/camellia/cmll_misc.c', 'openssl/crypto/des/des_enc.c', - 'openssl/crypto/des/fcrypt_b.c' + 'openssl/crypto/des/fcrypt_b.c', + 'openssl/crypto/ec/ecp_nistz256.c', + 'openssl/crypto/ui/ui_compat.c' + ], + 'openssl_sources_ia32_win_masm': [ + '<@(openssl_sources_asm_ia32_win_masm)', + '<@(openssl_sources_common_ia32)', + ], + # + # Asm files are changed depending on the version of assembler. + # We provide two sets of asm files, one is asm_latest(avx2 and + # addx supported) and the other asm_obsolete(without avx1/2 and addx) + # The asm_latest follows the version as defined in + # https://github.com/openssl/openssl/blob/OpenSSL_1_0_2-stable/crypto/ec/asm/ecp_nistz256-avx2.pl#L45-L67 + # + 'conditions': [ + ['(OS=="win" and MSVS_VERSION>="2012") or ' + 'llvm_version>="3.3" or gas_version>="2.23"', { + 'openssl_sources_x64_win_masm': [ + '<@(openssl_sources_asm_latest_x64_win_masm)', + '<@(openssl_sources_common_x64_win_masm)', + ], + 'openssl_sources_ia32_mac_gas': [ + '<@(openssl_sources_asm_latest_ia32_mac_gas)', + '<@(openssl_sources_common_ia32)', + ], + 'openssl_sources_x64_mac_gas': [ + '<@(openssl_sources_asm_latest_x64_mac_gas)', + '<@(openssl_sources_common_x64_mac_gas)', + ], + 'openssl_sources_ia32_elf_gas': [ + '<@(openssl_sources_asm_latest_ia32_elf_gas)', + '<@(openssl_sources_common_ia32)', + ], + 'openssl_sources_x64_elf_gas': [ + '<@(openssl_sources_asm_latest_x64_elf_gas)', + '<@(openssl_sources_common_x64_elf_gas)', + ], + }, { + 'openssl_sources_x64_win_masm': [ + '<@(openssl_sources_asm_obsolete_x64_win_masm)', + '<@(openssl_sources_common_x64_win_masm)', + ], + 'openssl_sources_ia32_mac_gas': [ + '<@(openssl_sources_asm_obsolete_ia32_mac_gas)', + '<@(openssl_sources_common_ia32)', + ], + 'openssl_sources_x64_mac_gas': [ + '<@(openssl_sources_asm_obsolete_x64_mac_gas)', + '<@(openssl_sources_common_x64_mac_gas)', + ], + 'openssl_sources_ia32_elf_gas': [ + '<@(openssl_sources_asm_obsolete_ia32_elf_gas)', + '<@(openssl_sources_common_ia32)', + ], + 'openssl_sources_x64_elf_gas': [ + '<@(openssl_sources_asm_obsolete_x64_elf_gas)', + '<@(openssl_sources_common_x64_elf_gas)', + ], + }] ], 'openssl_cli_sources': [ 'openssl/apps/app_rand.c', @@ -893,6 +1147,12 @@ 'SHA512_ASM', 'GHASH_ASM', ], + 'openssl_defines_arm64': [ + 'OPENSSL_CPUID_OBJ', + 'SHA1_ASM', + 'SHA256_ASM', + 'SHA512_ASM', + ], 'openssl_defines_non_arm': [ 'VPAES_ASM', 'BN_ASM', @@ -909,6 +1169,11 @@ ], 'openssl_defines_all_win': [ 'DSO_WIN32', + '_CRT_SECURE_NO_DEPRECATE', + # following two defines are moved from openssconf.h + 'OPENSSL_NO_DYNAMIC_ENGINE', + # to avoid build errors on Win. See openssl/engines/e_capi.c + 'OPENSSL_NO_CAPIENG', ], 'openssl_defines_all_non_win': [ 'DSO_DLFCN', @@ -916,12 +1181,15 @@ ], 'openssl_defines_ia32_elf': [ '<@(openssl_defines_non_arm)', + 'OPENSSL_BN_ASM_PART_WORDS', ], 'openssl_defines_ia32_mac': [ '<@(openssl_defines_non_arm)', + 'OPENSSL_BN_ASM_PART_WORDS', ], 'openssl_defines_ia32_win': [ '<@(openssl_defines_non_arm)', + 'OPENSSL_BN_ASM_PART_WORDS', ], 'openssl_defines_x64_elf': [ '<@(openssl_defines_non_arm)', diff --git a/deps/openssl/openssl/CHANGES b/deps/openssl/openssl/CHANGES index 74179ab8723f8a..3044aa7dd91d44 100644 --- a/deps/openssl/openssl/CHANGES +++ b/deps/openssl/openssl/CHANGES @@ -2,7 +2,49 @@ OpenSSL CHANGES _______________ - Changes between 1.0.1l and 1.0.1m [19 Mar 2015] + Changes between 1.0.2 and 1.0.2a [19 Mar 2015] + + *) ClientHello sigalgs DoS fix + + If a client connects to an OpenSSL 1.0.2 server and renegotiates with an + invalid signature algorithms extension a NULL pointer dereference will + occur. This can be exploited in a DoS attack against the server. + + This issue was was reported to OpenSSL by David Ramos of Stanford + University. + (CVE-2015-0291) + [Stephen Henson and Matt Caswell] + + *) Multiblock corrupted pointer fix + + OpenSSL 1.0.2 introduced the "multiblock" performance improvement. This + feature only applies on 64 bit x86 architecture platforms that support AES + NI instructions. A defect in the implementation of "multiblock" can cause + OpenSSL's internal write buffer to become incorrectly set to NULL when + using non-blocking IO. Typically, when the user application is using a + socket BIO for writing, this will only result in a failed connection. + However if some other BIO is used then it is likely that a segmentation + fault will be triggered, thus enabling a potential DoS attack. + + This issue was reported to OpenSSL by Daniel Danner and Rainer Mueller. + (CVE-2015-0290) + [Matt Caswell] + + *) Segmentation fault in DTLSv1_listen fix + + The DTLSv1_listen function is intended to be stateless and processes the + initial ClientHello from many peers. It is common for user code to loop + over the call to DTLSv1_listen until a valid ClientHello is received with + an associated cookie. A defect in the implementation of DTLSv1_listen means + that state is preserved in the SSL object from one invocation to the next + that can lead to a segmentation fault. Errors processing the initial + ClientHello can trigger this scenario. An example of such an error could be + that a DTLS1.0 only client is attempting to connect to a DTLS1.2 only + server. + + This issue was reported to OpenSSL by Per Allansson. + (CVE-2015-0207) + [Matt Caswell] *) Segmentation fault in ASN1_TYPE_cmp fix @@ -15,6 +57,20 @@ (CVE-2015-0286) [Stephen Henson] + *) Segmentation fault for invalid PSS parameters fix + + The signature verification routines will crash with a NULL pointer + dereference if presented with an ASN.1 signature using the RSA PSS + algorithm and invalid parameters. Since these routines are used to verify + certificate signature algorithms this can be used to crash any + certificate verification operation and exploited in a DoS attack. Any + application which performs certificate verification is vulnerable including + OpenSSL clients and servers which enable client authentication. + + This issue was was reported to OpenSSL by Brian Carpenter. + (CVE-2015-0208) + [Stephen Henson] + *) ASN.1 structure reuse memory corruption fix Reusing a structure in ASN.1 parsing may allow an attacker to cause @@ -53,6 +109,36 @@ (CVE-2015-0293) [Emilia Käsper] + *) Empty CKE with client auth and DHE fix + + If client auth is used then a server can seg fault in the event of a DHE + ciphersuite being selected and a zero length ClientKeyExchange message + being sent by the client. This could be exploited in a DoS attack. + (CVE-2015-1787) + [Matt Caswell] + + *) Handshake with unseeded PRNG fix + + Under certain conditions an OpenSSL 1.0.2 client can complete a handshake + with an unseeded PRNG. The conditions are: + - The client is on a platform where the PRNG has not been seeded + automatically, and the user has not seeded manually + - A protocol specific client method version has been used (i.e. not + SSL_client_methodv23) + - A ciphersuite is used that does not require additional random data from + the PRNG beyond the initial ClientHello client random (e.g. PSK-RC4-SHA). + + If the handshake succeeds then the client random that has been used will + have been generated from a PRNG with insufficient entropy and therefore the + output may be predictable. + + For example using the following command with an unseeded openssl will + succeed on an unpatched platform: + + openssl s_client -psk 1a2b3c4d -tls1_2 -cipher PSK-RC4-SHA + (CVE-2015-0285) + [Matt Caswell] + *) Use After Free following d2i_ECPrivatekey error fix A malformed EC private key file consumed via the d2i_ECPrivateKey function @@ -79,6 +165,336 @@ *) Removed the export ciphers from the DEFAULT ciphers [Kurt Roeckx] + Changes between 1.0.1l and 1.0.2 [22 Jan 2015] + + *) Facilitate "universal" ARM builds targeting range of ARM ISAs, e.g. + ARMv5 through ARMv8, as opposite to "locking" it to single one. + So far those who have to target multiple plaforms would compromise + and argue that binary targeting say ARMv5 would still execute on + ARMv8. "Universal" build resolves this compromise by providing + near-optimal performance even on newer platforms. + [Andy Polyakov] + + *) Accelerated NIST P-256 elliptic curve implementation for x86_64 + (other platforms pending). + [Shay Gueron & Vlad Krasnov (Intel Corp), Andy Polyakov] + + *) Add support for the SignedCertificateTimestampList certificate and + OCSP response extensions from RFC6962. + [Rob Stradling] + + *) Fix ec_GFp_simple_points_make_affine (thus, EC_POINTs_mul etc.) + for corner cases. (Certain input points at infinity could lead to + bogus results, with non-infinity inputs mapped to infinity too.) + [Bodo Moeller] + + *) Initial support for PowerISA 2.0.7, first implemented in POWER8. + This covers AES, SHA256/512 and GHASH. "Initial" means that most + common cases are optimized and there still is room for further + improvements. Vector Permutation AES for Altivec is also added. + [Andy Polyakov] + + *) Add support for little-endian ppc64 Linux target. + [Marcelo Cerri (IBM)] + + *) Initial support for AMRv8 ISA crypto extensions. This covers AES, + SHA1, SHA256 and GHASH. "Initial" means that most common cases + are optimized and there still is room for further improvements. + Both 32- and 64-bit modes are supported. + [Andy Polyakov, Ard Biesheuvel (Linaro)] + + *) Improved ARMv7 NEON support. + [Andy Polyakov] + + *) Support for SPARC Architecture 2011 crypto extensions, first + implemented in SPARC T4. This covers AES, DES, Camellia, SHA1, + SHA256/512, MD5, GHASH and modular exponentiation. + [Andy Polyakov, David Miller] + + *) Accelerated modular exponentiation for Intel processors, a.k.a. + RSAZ. + [Shay Gueron & Vlad Krasnov (Intel Corp)] + + *) Support for new and upcoming Intel processors, including AVX2, + BMI and SHA ISA extensions. This includes additional "stitched" + implementations, AESNI-SHA256 and GCM, and multi-buffer support + for TLS encrypt. + + This work was sponsored by Intel Corp. + [Andy Polyakov] + + *) Support for DTLS 1.2. This adds two sets of DTLS methods: DTLS_*_method() + supports both DTLS 1.2 and 1.0 and should use whatever version the peer + supports and DTLSv1_2_*_method() which supports DTLS 1.2 only. + [Steve Henson] + + *) Use algorithm specific chains in SSL_CTX_use_certificate_chain_file(): + this fixes a limiation in previous versions of OpenSSL. + [Steve Henson] + + *) Extended RSA OAEP support via EVP_PKEY API. Options to specify digest, + MGF1 digest and OAEP label. + [Steve Henson] + + *) Add EVP support for key wrapping algorithms, to avoid problems with + existing code the flag EVP_CIPHER_CTX_WRAP_ALLOW has to be set in + the EVP_CIPHER_CTX or an error is returned. Add AES and DES3 wrap + algorithms and include tests cases. + [Steve Henson] + + *) Add functions to allocate and set the fields of an ECDSA_METHOD + structure. + [Douglas E. Engert, Steve Henson] + + *) New functions OPENSSL_gmtime_diff and ASN1_TIME_diff to find the + difference in days and seconds between two tm or ASN1_TIME structures. + [Steve Henson] + + *) Add -rev test option to s_server to just reverse order of characters + received by client and send back to server. Also prints an abbreviated + summary of the connection parameters. + [Steve Henson] + + *) New option -brief for s_client and s_server to print out a brief summary + of connection parameters. + [Steve Henson] + + *) Add callbacks for arbitrary TLS extensions. + [Trevor Perrin and Ben Laurie] + + *) New option -crl_download in several openssl utilities to download CRLs + from CRLDP extension in certificates. + [Steve Henson] + + *) New options -CRL and -CRLform for s_client and s_server for CRLs. + [Steve Henson] + + *) New function X509_CRL_diff to generate a delta CRL from the difference + of two full CRLs. Add support to "crl" utility. + [Steve Henson] + + *) New functions to set lookup_crls function and to retrieve + X509_STORE from X509_STORE_CTX. + [Steve Henson] + + *) Print out deprecated issuer and subject unique ID fields in + certificates. + [Steve Henson] + + *) Extend OCSP I/O functions so they can be used for simple general purpose + HTTP as well as OCSP. New wrapper function which can be used to download + CRLs using the OCSP API. + [Steve Henson] + + *) Delegate command line handling in s_client/s_server to SSL_CONF APIs. + [Steve Henson] + + *) SSL_CONF* functions. These provide a common framework for application + configuration using configuration files or command lines. + [Steve Henson] + + *) SSL/TLS tracing code. This parses out SSL/TLS records using the + message callback and prints the results. Needs compile time option + "enable-ssl-trace". New options to s_client and s_server to enable + tracing. + [Steve Henson] + + *) New ctrl and macro to retrieve supported points extensions. + Print out extension in s_server and s_client. + [Steve Henson] + + *) New functions to retrieve certificate signature and signature + OID NID. + [Steve Henson] + + *) Add functions to retrieve and manipulate the raw cipherlist sent by a + client to OpenSSL. + [Steve Henson] + + *) New Suite B modes for TLS code. These use and enforce the requirements + of RFC6460: restrict ciphersuites, only permit Suite B algorithms and + only use Suite B curves. The Suite B modes can be set by using the + strings "SUITEB128", "SUITEB192" or "SUITEB128ONLY" for the cipherstring. + [Steve Henson] + + *) New chain verification flags for Suite B levels of security. Check + algorithms are acceptable when flags are set in X509_verify_cert. + [Steve Henson] + + *) Make tls1_check_chain return a set of flags indicating checks passed + by a certificate chain. Add additional tests to handle client + certificates: checks for matching certificate type and issuer name + comparison. + [Steve Henson] + + *) If an attempt is made to use a signature algorithm not in the peer + preference list abort the handshake. If client has no suitable + signature algorithms in response to a certificate request do not + use the certificate. + [Steve Henson] + + *) If server EC tmp key is not in client preference list abort handshake. + [Steve Henson] + + *) Add support for certificate stores in CERT structure. This makes it + possible to have different stores per SSL structure or one store in + the parent SSL_CTX. Include distint stores for certificate chain + verification and chain building. New ctrl SSL_CTRL_BUILD_CERT_CHAIN + to build and store a certificate chain in CERT structure: returing + an error if the chain cannot be built: this will allow applications + to test if a chain is correctly configured. + + Note: if the CERT based stores are not set then the parent SSL_CTX + store is used to retain compatibility with existing behaviour. + + [Steve Henson] + + *) New function ssl_set_client_disabled to set a ciphersuite disabled + mask based on the current session, check mask when sending client + hello and checking the requested ciphersuite. + [Steve Henson] + + *) New ctrls to retrieve and set certificate types in a certificate + request message. Print out received values in s_client. If certificate + types is not set with custom values set sensible values based on + supported signature algorithms. + [Steve Henson] + + *) Support for distinct client and server supported signature algorithms. + [Steve Henson] + + *) Add certificate callback. If set this is called whenever a certificate + is required by client or server. An application can decide which + certificate chain to present based on arbitrary criteria: for example + supported signature algorithms. Add very simple example to s_server. + This fixes many of the problems and restrictions of the existing client + certificate callback: for example you can now clear an existing + certificate and specify the whole chain. + [Steve Henson] + + *) Add new "valid_flags" field to CERT_PKEY structure which determines what + the certificate can be used for (if anything). Set valid_flags field + in new tls1_check_chain function. Simplify ssl_set_cert_masks which used + to have similar checks in it. + + Add new "cert_flags" field to CERT structure and include a "strict mode". + This enforces some TLS certificate requirements (such as only permitting + certificate signature algorithms contained in the supported algorithms + extension) which some implementations ignore: this option should be used + with caution as it could cause interoperability issues. + [Steve Henson] + + *) Update and tidy signature algorithm extension processing. Work out + shared signature algorithms based on preferences and peer algorithms + and print them out in s_client and s_server. Abort handshake if no + shared signature algorithms. + [Steve Henson] + + *) Add new functions to allow customised supported signature algorithms + for SSL and SSL_CTX structures. Add options to s_client and s_server + to support them. + [Steve Henson] + + *) New function SSL_certs_clear() to delete all references to certificates + from an SSL structure. Before this once a certificate had been added + it couldn't be removed. + [Steve Henson] + + *) Integrate hostname, email address and IP address checking with certificate + verification. New verify options supporting checking in opensl utility. + [Steve Henson] + + *) Fixes and wildcard matching support to hostname and email checking + functions. Add manual page. + [Florian Weimer (Red Hat Product Security Team)] + + *) New functions to check a hostname email or IP address against a + certificate. Add options x509 utility to print results of checks against + a certificate. + [Steve Henson] + + *) Fix OCSP checking. + [Rob Stradling and Ben Laurie] + + *) Initial experimental support for explicitly trusted non-root CAs. + OpenSSL still tries to build a complete chain to a root but if an + intermediate CA has a trust setting included that is used. The first + setting is used: whether to trust (e.g., -addtrust option to the x509 + utility) or reject. + [Steve Henson] + + *) Add -trusted_first option which attempts to find certificates in the + trusted store even if an untrusted chain is also supplied. + [Steve Henson] + + *) MIPS assembly pack updates: support for MIPS32r2 and SmartMIPS ASE, + platform support for Linux and Android. + [Andy Polyakov] + + *) Support for linux-x32, ILP32 environment in x86_64 framework. + [Andy Polyakov] + + *) Experimental multi-implementation support for FIPS capable OpenSSL. + When in FIPS mode the approved implementations are used as normal, + when not in FIPS mode the internal unapproved versions are used instead. + This means that the FIPS capable OpenSSL isn't forced to use the + (often lower perfomance) FIPS implementations outside FIPS mode. + [Steve Henson] + + *) Transparently support X9.42 DH parameters when calling + PEM_read_bio_DHparameters. This means existing applications can handle + the new parameter format automatically. + [Steve Henson] + + *) Initial experimental support for X9.42 DH parameter format: mainly + to support use of 'q' parameter for RFC5114 parameters. + [Steve Henson] + + *) Add DH parameters from RFC5114 including test data to dhtest. + [Steve Henson] + + *) Support for automatic EC temporary key parameter selection. If enabled + the most preferred EC parameters are automatically used instead of + hardcoded fixed parameters. Now a server just has to call: + SSL_CTX_set_ecdh_auto(ctx, 1) and the server will automatically + support ECDH and use the most appropriate parameters. + [Steve Henson] + + *) Enhance and tidy EC curve and point format TLS extension code. Use + static structures instead of allocation if default values are used. + New ctrls to set curves we wish to support and to retrieve shared curves. + Print out shared curves in s_server. New options to s_server and s_client + to set list of supported curves. + [Steve Henson] + + *) New ctrls to retrieve supported signature algorithms and + supported curve values as an array of NIDs. Extend openssl utility + to print out received values. + [Steve Henson] + + *) Add new APIs EC_curve_nist2nid and EC_curve_nid2nist which convert + between NIDs and the more common NIST names such as "P-256". Enhance + ecparam utility and ECC method to recognise the NIST names for curves. + [Steve Henson] + + *) Enhance SSL/TLS certificate chain handling to support different + chains for each certificate instead of one chain in the parent SSL_CTX. + [Steve Henson] + + *) Support for fixed DH ciphersuite client authentication: where both + server and client use DH certificates with common parameters. + [Steve Henson] + + *) Support for fixed DH ciphersuites: those requiring DH server + certificates. + [Steve Henson] + + *) New function i2d_re_X509_tbs for re-encoding the TBS portion of + the certificate. + Note: Related 1.0.2-beta specific macros X509_get_cert_info, + X509_CINF_set_modified, X509_CINF_get_issuer, X509_CINF_get_extensions and + X509_CINF_get_signature were reverted post internal team review. + Changes between 1.0.1k and 1.0.1l [15 Jan 2015] *) Build fixes for the Windows and OpenVMS platforms diff --git a/deps/openssl/openssl/Configure b/deps/openssl/openssl/Configure index d7ecf973c0a698..f776e23359e283 100755 --- a/deps/openssl/openssl/Configure +++ b/deps/openssl/openssl/Configure @@ -105,6 +105,8 @@ my $usage="Usage: Configure [no- ...] [enable- ...] [experimenta my $gcc_devteam_warn = "-Wall -pedantic -DPEDANTIC -Wno-long-long -Wsign-compare -Wmissing-prototypes -Wshadow -Wformat -Werror -DCRYPTO_MDEBUG_ALL -DCRYPTO_MDEBUG_ABORT -DREF_CHECK -DOPENSSL_NO_DEPRECATED"; +my $clang_disabled_warnings = "-Wno-language-extension-token -Wno-extended-offsetof -Wno-padded -Wno-shorten-64-to-32 -Wno-format-nonliteral -Wno-missing-noreturn -Wno-unused-parameter -Wno-sign-conversion -Wno-unreachable-code -Wno-conversion -Wno-documentation -Wno-missing-variable-declarations -Wno-cast-align -Wno-incompatible-pointer-types-discards-qualifiers -Wno-missing-variable-declarations -Wno-missing-field-initializers -Wno-unused-macros -Wno-disabled-macro-expansion -Wno-conditional-uninitialized -Wno-switch-enum"; + my $strict_warnings = 0; my $x86_gcc_des="DES_PTR DES_RISC1 DES_UNROLL"; @@ -124,24 +126,25 @@ my $tlib="-lnsl -lsocket"; my $bits1="THIRTY_TWO_BIT "; my $bits2="SIXTY_FOUR_BIT "; -my $x86_asm="x86cpuid.o:bn-586.o co-586.o x86-mont.o x86-gf2m.o:des-586.o crypt586.o:aes-586.o vpaes-x86.o aesni-x86.o:bf-586.o:md5-586.o:sha1-586.o sha256-586.o sha512-586.o:cast-586.o:rc4-586.o:rmd-586.o:rc5-586.o:wp_block.o wp-mmx.o:cmll-x86.o:ghash-x86.o:"; +my $x86_asm="x86cpuid.o:bn-586.o co-586.o x86-mont.o x86-gf2m.o::des-586.o crypt586.o:aes-586.o vpaes-x86.o aesni-x86.o:bf-586.o:md5-586.o:sha1-586.o sha256-586.o sha512-586.o:cast-586.o:rc4-586.o:rmd-586.o:rc5-586.o:wp_block.o wp-mmx.o:cmll-x86.o:ghash-x86.o:"; my $x86_elf_asm="$x86_asm:elf"; -my $x86_64_asm="x86_64cpuid.o:x86_64-gcc.o x86_64-mont.o x86_64-mont5.o x86_64-gf2m.o modexp512-x86_64.o::aes-x86_64.o vpaes-x86_64.o bsaes-x86_64.o aesni-x86_64.o aesni-sha1-x86_64.o::md5-x86_64.o:sha1-x86_64.o sha256-x86_64.o sha512-x86_64.o::rc4-x86_64.o rc4-md5-x86_64.o:::wp-x86_64.o:cmll-x86_64.o cmll_misc.o:ghash-x86_64.o:"; -my $ia64_asm="ia64cpuid.o:bn-ia64.o ia64-mont.o::aes_core.o aes_cbc.o aes-ia64.o::md5-ia64.o:sha1-ia64.o sha256-ia64.o sha512-ia64.o::rc4-ia64.o rc4_skey.o:::::ghash-ia64.o::void"; -my $sparcv9_asm="sparcv9cap.o sparccpuid.o:bn-sparcv9.o sparcv9-mont.o sparcv9a-mont.o:des_enc-sparc.o fcrypt_b.o:aes_core.o aes_cbc.o aes-sparcv9.o:::sha1-sparcv9.o sha256-sparcv9.o sha512-sparcv9.o:::::::ghash-sparcv9.o::void"; -my $sparcv8_asm=":sparcv8.o:des_enc-sparc.o fcrypt_b.o:::::::::::::void"; -my $alpha_asm="alphacpuid.o:bn_asm.o alpha-mont.o:::::sha1-alpha.o:::::::ghash-alpha.o::void"; -my $mips32_asm=":bn-mips.o::aes_cbc.o aes-mips.o:::sha1-mips.o sha256-mips.o::::::::"; -my $mips64_asm=":bn-mips.o mips-mont.o::aes_cbc.o aes-mips.o:::sha1-mips.o sha256-mips.o sha512-mips.o::::::::"; -my $s390x_asm="s390xcap.o s390xcpuid.o:bn-s390x.o s390x-mont.o s390x-gf2m.o::aes-s390x.o aes-ctr.o aes-xts.o:::sha1-s390x.o sha256-s390x.o sha512-s390x.o::rc4-s390x.o:::::ghash-s390x.o:"; -my $armv4_asm="armcap.o armv4cpuid.o:bn_asm.o armv4-mont.o armv4-gf2m.o::aes_cbc.o aes-armv4.o:::sha1-armv4-large.o sha256-armv4.o sha512-armv4.o:::::::ghash-armv4.o::void"; -my $parisc11_asm="pariscid.o:bn_asm.o parisc-mont.o::aes_core.o aes_cbc.o aes-parisc.o:::sha1-parisc.o sha256-parisc.o sha512-parisc.o::rc4-parisc.o:::::ghash-parisc.o::32"; -my $parisc20_asm="pariscid.o:pa-risc2W.o parisc-mont.o::aes_core.o aes_cbc.o aes-parisc.o:::sha1-parisc.o sha256-parisc.o sha512-parisc.o::rc4-parisc.o:::::ghash-parisc.o::64"; -my $ppc32_asm="ppccpuid.o ppccap.o:bn-ppc.o ppc-mont.o ppc64-mont.o::aes_core.o aes_cbc.o aes-ppc.o:::sha1-ppc.o sha256-ppc.o::::::::"; -my $ppc64_asm="ppccpuid.o ppccap.o:bn-ppc.o ppc-mont.o ppc64-mont.o::aes_core.o aes_cbc.o aes-ppc.o:::sha1-ppc.o sha256-ppc.o sha512-ppc.o::::::::"; -my $no_asm=":::::::::::::::void"; +my $x86_64_asm="x86_64cpuid.o:x86_64-gcc.o x86_64-mont.o x86_64-mont5.o x86_64-gf2m.o rsaz_exp.o rsaz-x86_64.o rsaz-avx2.o:ecp_nistz256.o ecp_nistz256-x86_64.o::aes-x86_64.o vpaes-x86_64.o bsaes-x86_64.o aesni-x86_64.o aesni-sha1-x86_64.o aesni-sha256-x86_64.o aesni-mb-x86_64.o::md5-x86_64.o:sha1-x86_64.o sha256-x86_64.o sha512-x86_64.o sha1-mb-x86_64.o sha256-mb-x86_64.o::rc4-x86_64.o rc4-md5-x86_64.o:::wp-x86_64.o:cmll-x86_64.o cmll_misc.o:ghash-x86_64.o aesni-gcm-x86_64.o:"; +my $ia64_asm="ia64cpuid.o:bn-ia64.o ia64-mont.o:::aes_core.o aes_cbc.o aes-ia64.o::md5-ia64.o:sha1-ia64.o sha256-ia64.o sha512-ia64.o::rc4-ia64.o rc4_skey.o:::::ghash-ia64.o::void"; +my $sparcv9_asm="sparcv9cap.o sparccpuid.o:bn-sparcv9.o sparcv9-mont.o sparcv9a-mont.o vis3-mont.o sparct4-mont.o sparcv9-gf2m.o::des_enc-sparc.o fcrypt_b.o dest4-sparcv9.o:aes_core.o aes_cbc.o aes-sparcv9.o aest4-sparcv9.o::md5-sparcv9.o:sha1-sparcv9.o sha256-sparcv9.o sha512-sparcv9.o::::::camellia.o cmll_misc.o cmll_cbc.o cmllt4-sparcv9.o:ghash-sparcv9.o::void"; +my $sparcv8_asm=":sparcv8.o::des_enc-sparc.o fcrypt_b.o:::::::::::::void"; +my $alpha_asm="alphacpuid.o:bn_asm.o alpha-mont.o::::::sha1-alpha.o:::::::ghash-alpha.o::void"; +my $mips64_asm=":bn-mips.o mips-mont.o:::aes_cbc.o aes-mips.o:::sha1-mips.o sha256-mips.o sha512-mips.o::::::::"; +my $mips32_asm=$mips64_asm; $mips32_asm =~ s/\s*sha512\-mips\.o//; +my $s390x_asm="s390xcap.o s390xcpuid.o:bn-s390x.o s390x-mont.o s390x-gf2m.o:::aes-s390x.o aes-ctr.o aes-xts.o:::sha1-s390x.o sha256-s390x.o sha512-s390x.o::rc4-s390x.o:::::ghash-s390x.o:"; +my $armv4_asm="armcap.o armv4cpuid.o:bn_asm.o armv4-mont.o armv4-gf2m.o:::aes_cbc.o aes-armv4.o bsaes-armv7.o aesv8-armx.o:::sha1-armv4-large.o sha256-armv4.o sha512-armv4.o:::::::ghash-armv4.o ghashv8-armx.o::void"; +my $aarch64_asm="armcap.o arm64cpuid.o mem_clr.o::::aes_core.o aes_cbc.o aesv8-armx.o:::sha1-armv8.o sha256-armv8.o sha512-armv8.o:::::::ghashv8-armx.o:"; +my $parisc11_asm="pariscid.o:bn_asm.o parisc-mont.o:::aes_core.o aes_cbc.o aes-parisc.o:::sha1-parisc.o sha256-parisc.o sha512-parisc.o::rc4-parisc.o:::::ghash-parisc.o::32"; +my $parisc20_asm="pariscid.o:pa-risc2W.o parisc-mont.o:::aes_core.o aes_cbc.o aes-parisc.o:::sha1-parisc.o sha256-parisc.o sha512-parisc.o::rc4-parisc.o:::::ghash-parisc.o::64"; +my $ppc64_asm="ppccpuid.o ppccap.o:bn-ppc.o ppc-mont.o ppc64-mont.o:::aes_core.o aes_cbc.o aes-ppc.o vpaes-ppc.o aesp8-ppc.o:::sha1-ppc.o sha256-ppc.o sha512-ppc.o sha256p8-ppc.o sha512p8-ppc.o:::::::ghashp8-ppc.o:"; +my $ppc32_asm=$ppc64_asm; +my $no_asm="::::::::::::::::void"; # As for $BSDthreads. Idea is to maintain "collective" set of flags, # which would cover all BSD flavors. -pthread applies to them all, @@ -152,7 +155,7 @@ my $no_asm=":::::::::::::::void"; # seems to be sufficient? my $BSDthreads="-pthread -D_THREAD_SAFE -D_REENTRANT"; -#config-string $cc : $cflags : $unistd : $thread_cflag : $sys_id : $lflags : $bn_ops : $cpuid_obj : $bn_obj : $des_obj : $aes_obj : $bf_obj : $md5_obj : $sha1_obj : $cast_obj : $rc4_obj : $rmd160_obj : $rc5_obj : $wp_obj : $cmll_obj : $modes_obj : $engines_obj : $dso_scheme : $shared_target : $shared_cflag : $shared_ldflag : $shared_extension : $ranlib : $arflags : $multilib +#config-string $cc : $cflags : $unistd : $thread_cflag : $sys_id : $lflags : $bn_ops : $cpuid_obj : $bn_obj : $ec_obj : $des_obj : $aes_obj : $bf_obj : $md5_obj : $sha1_obj : $cast_obj : $rc4_obj : $rmd160_obj : $rc5_obj : $wp_obj : $cmll_obj : $modes_obj : $engines_obj : $dso_scheme : $shared_target : $shared_cflag : $shared_ldflag : $shared_extension : $ranlib : $arflags : $multilib my %table=( # File 'TABLE' (created by 'make TABLE') contains the data from this list, @@ -174,14 +177,14 @@ my %table=( "debug-ben-debug-64", "gcc:$gcc_devteam_warn -Wno-error=overlength-strings -DBN_DEBUG -DCONF_DEBUG -DDEBUG_SAFESTACK -DDEBUG_UNUSED -g3 -O3 -pipe::${BSDthreads}:::SIXTY_FOUR_BIT_LONG RC4_CHUNK DES_INT DES_UNROLL:${x86_64_asm}:elf:dlfcn:bsd-gcc-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", "debug-ben-macos", "cc:$gcc_devteam_warn -arch i386 -DBN_DEBUG -DCONF_DEBUG -DDEBUG_SAFESTACK -DDEBUG_UNUSED -DOPENSSL_THREADS -D_REENTRANT -DDSO_DLFCN -DHAVE_DLFCN_H -O3 -DL_ENDIAN -g3 -pipe::(unknown)::-Wl,-search_paths_first::::", "debug-ben-macos-gcc46", "gcc-mp-4.6:$gcc_devteam_warn -Wconversion -DBN_DEBUG -DCONF_DEBUG -DDEBUG_SAFESTACK -DDEBUG_UNUSED -DOPENSSL_THREADS -D_REENTRANT -DDSO_DLFCN -DHAVE_DLFCN_H -O3 -DL_ENDIAN -g3 -pipe::(unknown)::::::", -"debug-ben-darwin64","cc:$gcc_devteam_warn -Wno-language-extension-token -Wno-extended-offsetof -arch x86_64 -O3 -DL_ENDIAN -Wall::-D_REENTRANT:MACOSX:-Wl,-search_paths_first%:SIXTY_FOUR_BIT_LONG RC4_CHAR RC4_CHUNK DES_INT DES_UNROLL:".eval{my $asm=$x86_64_asm;$asm=~s/rc4\-[^:]+//;$asm}.":macosx:dlfcn:darwin-shared:-fPIC -fno-common:-arch x86_64 -dynamiclib:.\$(SHLIB_MAJOR).\$(SHLIB_MINOR).dylib", +"debug-ben-darwin64","cc:$gcc_devteam_warn -g -Wno-language-extension-token -Wno-extended-offsetof -arch x86_64 -O3 -DL_ENDIAN -Wall::-D_REENTRANT:MACOSX:-Wl,-search_paths_first%:SIXTY_FOUR_BIT_LONG RC4_CHAR RC4_CHUNK DES_INT DES_UNROLL:".eval{my $asm=$x86_64_asm;$asm=~s/rc4\-[^:]+//;$asm}.":macosx:dlfcn:darwin-shared:-fPIC -fno-common:-arch x86_64 -dynamiclib:.\$(SHLIB_MAJOR).\$(SHLIB_MINOR).dylib", +"debug-ben-debug-64-clang", "clang:$gcc_devteam_warn -Wno-error=overlength-strings -Wno-error=extended-offsetof -Qunused-arguments -DBN_DEBUG -DCONF_DEBUG -DDEBUG_SAFESTACK -DDEBUG_UNUSED -g3 -O3 -pipe::${BSDthreads}:::SIXTY_FOUR_BIT_LONG RC4_CHUNK DES_INT DES_UNROLL:${x86_64_asm}:elf:dlfcn:bsd-gcc-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", "debug-ben-no-opt", "gcc: -Wall -Wmissing-prototypes -Wstrict-prototypes -Wmissing-declarations -DDEBUG_SAFESTACK -DCRYPTO_MDEBUG -Werror -DL_ENDIAN -DTERMIOS -Wall -g3::(unknown)::::::", "debug-ben-strict", "gcc:-DBN_DEBUG -DREF_CHECK -DCONF_DEBUG -DBN_CTX_DEBUG -DCRYPTO_MDEBUG -DCONST_STRICT -O2 -Wall -Wshadow -Werror -Wpointer-arith -Wcast-qual -Wwrite-strings -pipe::(unknown)::::::", "debug-rse","cc:-DTERMIOS -DL_ENDIAN -pipe -O -g -ggdb3 -Wall::(unknown):::BN_LLONG ${x86_gcc_des} ${x86_gcc_opts}:${x86_elf_asm}", "debug-bodo", "gcc:$gcc_devteam_warn -Wno-error=overlength-strings -DBN_DEBUG -DBN_DEBUG_RAND -DCONF_DEBUG -DBIO_PAIR_DEBUG -m64 -DL_ENDIAN -DTERMIO -g -DMD32_REG_T=int::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHUNK DES_INT DES_UNROLL:${x86_64_asm}:elf:dlfcn:linux-shared:-fPIC:-m64:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR):::64", -"debug-ulf", "gcc:-DTERMIOS -DL_ENDIAN -march=i486 -Wall -DBN_DEBUG -DBN_DEBUG_RAND -DREF_CHECK -DCONF_DEBUG -DBN_CTX_DEBUG -DCRYPTO_MDEBUG -DOPENSSL_NO_ASM -g -Wformat -Wshadow -Wmissing-prototypes -Wmissing-declarations:::CYGWIN32:::${no_asm}:win32:cygwin-shared:::.dll", "debug-steve64", "gcc:$gcc_devteam_warn -m64 -DL_ENDIAN -DTERMIO -DCONF_DEBUG -DDEBUG_SAFESTACK -Wno-overlength-strings -g::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHUNK DES_INT DES_UNROLL:${x86_64_asm}:elf:dlfcn:linux-shared:-fPIC:-m64:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", -"debug-steve32", "gcc:$gcc_devteam_warn -m32 -DL_ENDIAN -DCONF_DEBUG -DDEBUG_SAFESTACK -g -pipe::-D_REENTRANT::-rdynamic -ldl:BN_LLONG ${x86_gcc_des} ${x86_gcc_opts}:${x86_elf_asm}:dlfcn:linux-shared:-fPIC:-m32:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", +"debug-steve32", "gcc:$gcc_devteam_warn -m32 -DL_ENDIAN -DCONF_DEBUG -DDEBUG_SAFESTACK -Wno-overlength-strings -g -pipe::-D_REENTRANT::-rdynamic -ldl:BN_LLONG ${x86_gcc_des} ${x86_gcc_opts}:${x86_elf_asm}:dlfcn:linux-shared:-fPIC:-m32:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", "debug-steve-opt", "gcc:$gcc_devteam_warn -m64 -O3 -DL_ENDIAN -DTERMIO -DCONF_DEBUG -DDEBUG_SAFESTACK -g::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHUNK DES_INT DES_UNROLL:${x86_64_asm}:elf:dlfcn:linux-shared:-fPIC:-m64:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", "debug-levitte-linux-elf","gcc:-DLEVITTE_DEBUG -DBN_DEBUG -DREF_CHECK -DCONF_DEBUG -DCRYPTO_MDEBUG -DL_ENDIAN -ggdb -g3 -Wall::-D_REENTRANT::-ldl:BN_LLONG ${x86_gcc_des} ${x86_gcc_opts}:${x86_elf_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", "debug-levitte-linux-noasm","gcc:-DLEVITTE_DEBUG -DBN_DEBUG -DREF_CHECK -DCONF_DEBUG -DCRYPTO_MDEBUG -DOPENSSL_NO_ASM -DL_ENDIAN -ggdb -g3 -Wall::-D_REENTRANT::-ldl:BN_LLONG ${x86_gcc_des} ${x86_gcc_opts}:${no_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", @@ -193,9 +196,9 @@ my %table=( "debug-linux-ppro","gcc:-DBN_DEBUG -DREF_CHECK -DCONF_DEBUG -DBN_CTX_DEBUG -DCRYPTO_MDEBUG -DL_ENDIAN -g -mcpu=pentiumpro -Wall::-D_REENTRANT::-ldl:BN_LLONG ${x86_gcc_des} ${x86_gcc_opts}:${x86_elf_asm}:dlfcn", "debug-linux-elf","gcc:-DBN_DEBUG -DREF_CHECK -DCONF_DEBUG -DBN_CTX_DEBUG -DCRYPTO_MDEBUG -DL_ENDIAN -g -march=i486 -Wall::-D_REENTRANT::-lefence -ldl:BN_LLONG ${x86_gcc_des} ${x86_gcc_opts}:${x86_elf_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", "debug-linux-elf-noefence","gcc:-DBN_DEBUG -DREF_CHECK -DCONF_DEBUG -DBN_CTX_DEBUG -DCRYPTO_MDEBUG -DL_ENDIAN -g -march=i486 -Wall::-D_REENTRANT::-ldl:BN_LLONG ${x86_gcc_des} ${x86_gcc_opts}:${x86_elf_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", -"debug-linux-ia32-aes", "gcc:-DAES_EXPERIMENTAL -DL_ENDIAN -O3 -fomit-frame-pointer -Wall::-D_REENTRANT::-ldl:BN_LLONG ${x86_gcc_des} ${x86_gcc_opts}:x86cpuid.o:bn-586.o co-586.o x86-mont.o:des-586.o crypt586.o:aes_x86core.o aes_cbc.o aesni-x86.o:bf-586.o:md5-586.o:sha1-586.o sha256-586.o sha512-586.o:cast-586.o:rc4-586.o:rmd-586.o:rc5-586.o:wp_block.o wp-mmx.o::ghash-x86.o::elf:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", +"debug-linux-ia32-aes", "gcc:-DAES_EXPERIMENTAL -DL_ENDIAN -O3 -fomit-frame-pointer -Wall::-D_REENTRANT::-ldl:BN_LLONG ${x86_gcc_des} ${x86_gcc_opts}:x86cpuid.o:bn-586.o co-586.o x86-mont.o::des-586.o crypt586.o:aes_x86core.o aes_cbc.o aesni-x86.o:bf-586.o:md5-586.o:sha1-586.o sha256-586.o sha512-586.o:cast-586.o:rc4-586.o:rmd-586.o:rc5-586.o:wp_block.o wp-mmx.o::ghash-x86.o::elf:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", "debug-linux-generic32","gcc:-DBN_DEBUG -DREF_CHECK -DCONF_DEBUG -DCRYPTO_MDEBUG -g -Wall::-D_REENTRANT::-ldl:BN_LLONG RC4_CHAR RC4_CHUNK DES_INT DES_UNROLL BF_PTR:${no_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", -"debug-linux-generic64","gcc:-DBN_DEBUG -DREF_CHECK -DCONF_DEBUG -DCRYPTO_MDEBUG -g -Wall::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHAR RC4_CHUNK DES_INT DES_UNROLL BF_PTR:${no_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", +"debug-linux-generic64","gcc:-DBN_DEBUG -DREF_CHECK -DCONF_DEBUG -DCRYPTO_MDEBUG -DTERMIO -g -Wall::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHAR RC4_CHUNK DES_INT DES_UNROLL BF_PTR:${no_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", "debug-linux-x86_64","gcc:-DBN_DEBUG -DREF_CHECK -DCONF_DEBUG -DCRYPTO_MDEBUG -m64 -DL_ENDIAN -g -Wall::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHUNK DES_INT DES_UNROLL:${x86_64_asm}:elf:dlfcn:linux-shared:-fPIC:-m64:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR):::64", "dist", "cc:-O::(unknown)::::::", @@ -225,7 +228,7 @@ my %table=( "solaris64-x86_64-gcc","gcc:-m64 -O3 -Wall -DL_ENDIAN::-D_REENTRANT::-lsocket -lnsl -ldl:SIXTY_FOUR_BIT_LONG RC4_CHUNK DES_INT DES_UNROLL:${x86_64_asm}:elf:dlfcn:solaris-shared:-fPIC:-m64 -shared -static-libgcc:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR):::/64", #### Solaris x86 with Sun C setups -"solaris-x86-cc","cc:-fast -O -Xa::-D_REENTRANT::-lsocket -lnsl -ldl:BN_LLONG RC4_CHAR RC4_CHUNK DES_PTR DES_UNROLL BF_PTR:${no_asm}:dlfcn:solaris-shared:-KPIC:-G -dy -z text:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", +"solaris-x86-cc","cc:-fast -xarch=generic -O -Xa::-D_REENTRANT::-lsocket -lnsl -ldl:BN_LLONG RC4_CHAR RC4_CHUNK DES_PTR DES_UNROLL BF_PTR:${no_asm}:dlfcn:solaris-shared:-KPIC:-G -dy -z text:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", "solaris64-x86_64-cc","cc:-fast -xarch=amd64 -xstrconst -Xa -DL_ENDIAN::-D_REENTRANT::-lsocket -lnsl -ldl:SIXTY_FOUR_BIT_LONG RC4_CHUNK DES_INT DES_UNROLL:${x86_64_asm}:elf:dlfcn:solaris-shared:-KPIC:-xarch=amd64 -G -dy -z text:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR):::/64", #### SPARC Solaris with GNU C setups @@ -300,7 +303,7 @@ my %table=( "hpux-parisc-gcc","gcc:-O3 -DB_ENDIAN -DBN_DIV2W::-D_REENTRANT::-Wl,+s -ldld:BN_LLONG DES_PTR DES_UNROLL DES_RISC1:${no_asm}:dl:hpux-shared:-fPIC:-shared:.sl.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", "hpux-parisc1_1-gcc","gcc:-O3 -DB_ENDIAN -DBN_DIV2W::-D_REENTRANT::-Wl,+s -ldld:BN_LLONG DES_PTR DES_UNROLL DES_RISC1:${parisc11_asm}:dl:hpux-shared:-fPIC:-shared:.sl.\$(SHLIB_MAJOR).\$(SHLIB_MINOR):::/pa1.1", "hpux-parisc2-gcc","gcc:-march=2.0 -O3 -DB_ENDIAN -D_REENTRANT::::-Wl,+s -ldld:SIXTY_FOUR_BIT RC4_CHAR RC4_CHUNK DES_PTR DES_UNROLL DES_RISC1:".eval{my $asm=$parisc20_asm;$asm=~s/2W\./2\./;$asm=~s/:64/:32/;$asm}.":dl:hpux-shared:-fPIC:-shared:.sl.\$(SHLIB_MAJOR).\$(SHLIB_MINOR):::/pa20_32", -"hpux64-parisc2-gcc","gcc:-O3 -DB_ENDIAN -D_REENTRANT::::-ldl:SIXTY_FOUR_BIT_LONG MD2_CHAR RC4_INDEX RC4_CHAR DES_UNROLL DES_RISC1 DES_INT::pa-risc2W.o::::::::::::::void:dlfcn:hpux-shared:-fpic:-shared:.sl.\$(SHLIB_MAJOR).\$(SHLIB_MINOR):::/pa20_64", +"hpux64-parisc2-gcc","gcc:-O3 -DB_ENDIAN -D_REENTRANT::::-ldl:SIXTY_FOUR_BIT_LONG MD2_CHAR RC4_INDEX RC4_CHAR DES_UNROLL DES_RISC1 DES_INT::pa-risc2W.o:::::::::::::::void:dlfcn:hpux-shared:-fpic:-shared:.sl.\$(SHLIB_MAJOR).\$(SHLIB_MINOR):::/pa20_64", # More attempts at unified 10.X and 11.X targets for HP C compiler. # @@ -347,20 +350,57 @@ my %table=( # throw in -D[BL]_ENDIAN, whichever appropriate... "linux-generic32","gcc:-O3 -fomit-frame-pointer -Wall::-D_REENTRANT::-ldl:BN_LLONG RC4_CHAR RC4_CHUNK DES_INT DES_UNROLL BF_PTR:${no_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", "linux-ppc", "gcc:-DB_ENDIAN -O3 -Wall::-D_REENTRANT::-ldl:BN_LLONG RC4_CHAR RC4_CHUNK DES_RISC1 DES_UNROLL:${ppc32_asm}:linux32:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", -# It's believed that majority of ARM toolchains predefine appropriate -march. -# If you compiler does not, do complement config command line with one! -"linux-armv4", "gcc:-O3 -Wall::-D_REENTRANT::-ldl:BN_LLONG RC4_CHAR RC4_CHUNK DES_INT DES_UNROLL BF_PTR:${armv4_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", + +####################################################################### +# Note that -march is not among compiler options in below linux-armv4 +# target line. Not specifying one is intentional to give you choice to: +# +# a) rely on your compiler default by not specifying one; +# b) specify your target platform explicitly for optimal performance, +# e.g. -march=armv6 or -march=armv7-a; +# c) build "universal" binary that targets *range* of platforms by +# specifying minimum and maximum supported architecture; +# +# As for c) option. It actually makes no sense to specify maximum to be +# less than ARMv7, because it's the least requirement for run-time +# switch between platform-specific code paths. And without run-time +# switch performance would be equivalent to one for minimum. Secondly, +# there are some natural limitations that you'd have to accept and +# respect. Most notably you can *not* build "universal" binary for +# big-endian platform. This is because ARMv7 processor always picks +# instructions in little-endian order. Another similar limitation is +# that -mthumb can't "cross" -march=armv6t2 boundary, because that's +# where it became Thumb-2. Well, this limitation is a bit artificial, +# because it's not really impossible, but it's deemed too tricky to +# support. And of course you have to be sure that your binutils are +# actually up to the task of handling maximum target platform. With all +# this in mind here is an example of how to configure "universal" build: +# +# ./Configure linux-armv4 -march=armv6 -D__ARM_MAX_ARCH__=8 +# +"linux-armv4", "gcc: -O3 -Wall::-D_REENTRANT::-ldl:BN_LLONG RC4_CHAR RC4_CHUNK DES_INT DES_UNROLL BF_PTR:${armv4_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", +"linux-aarch64","gcc: -O3 -Wall::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHAR RC4_CHUNK DES_INT DES_UNROLL BF_PTR:${aarch64_asm}:linux64:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", +# Configure script adds minimally required -march for assembly support, +# if no -march was specified at command line. mips32 and mips64 below +# refer to contemporary MIPS Architecture specifications, MIPS32 and +# MIPS64, rather than to kernel bitness. +"linux-mips32", "gcc:-mabi=32 -O3 -Wall -DBN_DIV3W::-D_REENTRANT::-ldl:BN_LLONG RC4_CHAR RC4_CHUNK DES_INT DES_UNROLL BF_PTR:${mips32_asm}:o32:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", +"linux-mips64", "gcc:-mabi=n32 -O3 -Wall -DBN_DIV3W::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT RC4_CHAR RC4_CHUNK DES_INT DES_UNROLL BF_PTR:${mips64_asm}:n32:dlfcn:linux-shared:-fPIC:-mabi=n32:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR):::32", +"linux64-mips64", "gcc:-mabi=64 -O3 -Wall -DBN_DIV3W::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHAR RC4_CHUNK DES_INT DES_UNROLL BF_PTR:${mips64_asm}:64:dlfcn:linux-shared:-fPIC:-mabi=64:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR):::64", #### IA-32 targets... -"linux-ia32-icc", "icc:-DL_ENDIAN -O2 -no_cpprt::-D_REENTRANT::-ldl:BN_LLONG ${x86_gcc_des} ${x86_gcc_opts}:${x86_elf_asm}:dlfcn:linux-shared:-KPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", +"linux-ia32-icc", "icc:-DL_ENDIAN -O2::-D_REENTRANT::-ldl -no_cpprt:BN_LLONG ${x86_gcc_des} ${x86_gcc_opts}:${x86_elf_asm}:dlfcn:linux-shared:-KPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", "linux-elf", "gcc:-DL_ENDIAN -O3 -fomit-frame-pointer -Wall::-D_REENTRANT::-ldl:BN_LLONG ${x86_gcc_des} ${x86_gcc_opts}:${x86_elf_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", "linux-aout", "gcc:-DL_ENDIAN -O3 -fomit-frame-pointer -march=i486 -Wall::(unknown):::BN_LLONG ${x86_gcc_des} ${x86_gcc_opts}:${x86_asm}:a.out", #### "linux-generic64","gcc:-O3 -Wall::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHAR RC4_CHUNK DES_INT DES_UNROLL BF_PTR:${no_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", "linux-ppc64", "gcc:-m64 -DB_ENDIAN -O3 -Wall::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHAR RC4_CHUNK DES_RISC1 DES_UNROLL:${ppc64_asm}:linux64:dlfcn:linux-shared:-fPIC:-m64:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR):::64", -"linux-ia64", "gcc:-DL_ENDIAN -O3 -Wall::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHUNK DES_UNROLL DES_INT:${ia64_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", -"linux-ia64-ecc","ecc:-DL_ENDIAN -O2 -Wall -no_cpprt::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHUNK DES_INT:${ia64_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", -"linux-ia64-icc","icc:-DL_ENDIAN -O2 -Wall -no_cpprt::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHUNK DES_RISC1 DES_INT:${ia64_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", +"linux-ppc64le","gcc:-m64 -DL_ENDIAN -O3 -Wall::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHAR RC4_CHUNK DES_RISC1 DES_UNROLL:$ppc64_asm:linux64le:dlfcn:linux-shared:-fPIC:-m64:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR):::", +"linux-ia64", "gcc:-DL_ENDIAN -DTERMIO -O3 -Wall::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHUNK DES_UNROLL DES_INT:${ia64_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", +"linux-ia64-icc","icc:-DL_ENDIAN -O2 -Wall::-D_REENTRANT::-ldl -no_cpprt:SIXTY_FOUR_BIT_LONG RC4_CHUNK DES_RISC1 DES_INT:${ia64_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", "linux-x86_64", "gcc:-m64 -DL_ENDIAN -O3 -Wall::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHUNK DES_INT DES_UNROLL:${x86_64_asm}:elf:dlfcn:linux-shared:-fPIC:-m64:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR):::64", +"linux-x86_64-clang", "clang: -m64 -DL_ENDIAN -O3 -Weverything $clang_disabled_warnings -Qunused-arguments::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHUNK DES_INT DES_UNROLL:${x86_64_asm}:elf:dlfcn:linux-shared:-fPIC:-m64:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR):::64", +"linux-x86_64-icc", "icc:-DL_ENDIAN -O2::-D_REENTRANT::-ldl -no_cpprt:SIXTY_FOUR_BIT_LONG RC4_CHUNK DES_INT DES_UNROLL:${x86_64_asm}:elf:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR):::64", +"linux-x32", "gcc:-mx32 -DL_ENDIAN -O3 -Wall::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT RC4_CHUNK_LL DES_INT DES_UNROLL:${x86_64_asm}:elf:dlfcn:linux-shared:-fPIC:-mx32:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR):::x32", "linux64-s390x", "gcc:-m64 -DB_ENDIAN -O3 -Wall::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHAR RC4_CHUNK DES_INT DES_UNROLL:${s390x_asm}:64:dlfcn:linux-shared:-fPIC:-m64:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR):::64", #### So called "highgprs" target for z/Architecture CPUs # "Highgprs" is kernel feature first implemented in Linux 2.6.32, see @@ -407,6 +447,7 @@ my %table=( "android","gcc:-mandroid -I\$(ANDROID_DEV)/include -B\$(ANDROID_DEV)/lib -O3 -fomit-frame-pointer -Wall::-D_REENTRANT::-ldl:BN_LLONG RC4_CHAR RC4_CHUNK DES_INT DES_UNROLL BF_PTR:${no_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", "android-x86","gcc:-mandroid -I\$(ANDROID_DEV)/include -B\$(ANDROID_DEV)/lib -O3 -fomit-frame-pointer -Wall::-D_REENTRANT::-ldl:BN_LLONG ${x86_gcc_des} ${x86_gcc_opts}:".eval{my $asm=${x86_elf_asm};$asm=~s/:elf/:android/;$asm}.":dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", "android-armv7","gcc:-march=armv7-a -mandroid -I\$(ANDROID_DEV)/include -B\$(ANDROID_DEV)/lib -O3 -fomit-frame-pointer -Wall::-D_REENTRANT::-ldl:BN_LLONG RC4_CHAR RC4_CHUNK DES_INT DES_UNROLL BF_PTR:${armv4_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", +"android-mips","gcc:-mandroid -I\$(ANDROID_DEV)/include -B\$(ANDROID_DEV)/lib -O3 -Wall::-D_REENTRANT::-ldl:BN_LLONG RC4_CHAR RC4_CHUNK DES_INT DES_UNROLL BF_PTR:${mips32_asm}:o32:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", #### *BSD [do see comment about ${BSDthreads} above!] "BSD-generic32","gcc:-O3 -fomit-frame-pointer -Wall::${BSDthreads}:::BN_LLONG RC2_CHAR RC4_INDEX DES_INT DES_UNROLL:${no_asm}:dlfcn:bsd-gcc-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", @@ -454,11 +495,11 @@ my %table=( # UnixWare 2.0x fails destest with -O. "unixware-2.0","cc:-DFILIO_H -DNO_STRINGS_H::-Kthread::-lsocket -lnsl -lresolv -lx:${x86_gcc_des} ${x86_gcc_opts}:::", "unixware-2.1","cc:-O -DFILIO_H::-Kthread::-lsocket -lnsl -lresolv -lx:${x86_gcc_des} ${x86_gcc_opts}:::", -"unixware-7","cc:-O -DFILIO_H -Kalloca::-Kthread::-lsocket -lnsl:BN_LLONG MD2_CHAR RC4_INDEX ${x86_gcc_des}:${x86_elf_asm}:dlfcn:svr5-shared:-Kpic::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", -"unixware-7-gcc","gcc:-DL_ENDIAN -DFILIO_H -O3 -fomit-frame-pointer -march=pentium -Wall::-D_REENTRANT::-lsocket -lnsl:BN_LLONG ${x86_gcc_des} ${x86_gcc_opts}:${x86_elf_asm}:dlfcn:gnu-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", +"unixware-7","cc:-O -DFILIO_H -Kalloca::-Kthread::-lsocket -lnsl:BN_LLONG MD2_CHAR RC4_INDEX ${x86_gcc_des}:${x86_elf_asm}-1:dlfcn:svr5-shared:-Kpic::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", +"unixware-7-gcc","gcc:-DL_ENDIAN -DFILIO_H -O3 -fomit-frame-pointer -march=pentium -Wall::-D_REENTRANT::-lsocket -lnsl:BN_LLONG ${x86_gcc_des} ${x86_gcc_opts}:${x86_elf_asm}-1:dlfcn:gnu-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", # SCO 5 - Ben Laurie says the -O breaks the SCO cc. -"sco5-cc", "cc:-belf::(unknown)::-lsocket -lnsl:${x86_gcc_des} ${x86_gcc_opts}:${x86_elf_asm}:dlfcn:svr3-shared:-Kpic::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", -"sco5-gcc", "gcc:-O3 -fomit-frame-pointer::(unknown)::-lsocket -lnsl:BN_LLONG ${x86_gcc_des} ${x86_gcc_opts}:${x86_elf_asm}:dlfcn:svr3-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", +"sco5-cc", "cc:-belf::(unknown)::-lsocket -lnsl:${x86_gcc_des} ${x86_gcc_opts}:${x86_elf_asm}-1:dlfcn:svr3-shared:-Kpic::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", +"sco5-gcc", "gcc:-O3 -fomit-frame-pointer::(unknown)::-lsocket -lnsl:BN_LLONG ${x86_gcc_des} ${x86_gcc_opts}:${x86_elf_asm}-1:dlfcn:svr3-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", #### IBM's AIX. "aix3-cc", "cc:-O -DB_ENDIAN -qmaxmem=16384::(unknown):AIX::BN_LLONG RC4_CHAR:::", @@ -518,9 +559,9 @@ my %table=( # Visual C targets # # Win64 targets, WIN64I denotes IA-64 and WIN64A - AMD64 -"VC-WIN64I","cl:-W3 -Gs0 -Gy -nologo -DOPENSSL_SYSNAME_WIN32 -DWIN32_LEAN_AND_MEAN -DL_ENDIAN -DUNICODE -D_UNICODE -D_CRT_SECURE_NO_DEPRECATE:::WIN64I::SIXTY_FOUR_BIT RC4_CHUNK_LL DES_INT EXPORT_VAR_AS_FN:ia64cpuid.o:ia64.o ia64-mont.o::aes_core.o aes_cbc.o aes-ia64.o::md5-ia64.o:sha1-ia64.o sha256-ia64.o sha512-ia64.o:::::::ghash-ia64.o::ias:win32", +"VC-WIN64I","cl:-W3 -Gs0 -Gy -nologo -DOPENSSL_SYSNAME_WIN32 -DWIN32_LEAN_AND_MEAN -DL_ENDIAN -DUNICODE -D_UNICODE -D_CRT_SECURE_NO_DEPRECATE:::WIN64I::SIXTY_FOUR_BIT RC4_CHUNK_LL DES_INT EXPORT_VAR_AS_FN:ia64cpuid.o:ia64.o ia64-mont.o:::aes_core.o aes_cbc.o aes-ia64.o::md5-ia64.o:sha1-ia64.o sha256-ia64.o sha512-ia64.o:::::::ghash-ia64.o::ias:win32", "VC-WIN64A","cl:-W3 -Gs0 -Gy -nologo -DOPENSSL_SYSNAME_WIN32 -DWIN32_LEAN_AND_MEAN -DL_ENDIAN -DUNICODE -D_UNICODE -D_CRT_SECURE_NO_DEPRECATE:::WIN64A::SIXTY_FOUR_BIT RC4_CHUNK_LL DES_INT EXPORT_VAR_AS_FN:".eval{my $asm=$x86_64_asm;$asm=~s/x86_64-gcc\.o/bn_asm.o/;$asm}.":auto:win32", -"debug-VC-WIN64I","cl:-W3 -Gs0 -Gy -Zi -nologo -DOPENSSL_SYSNAME_WIN32 -DWIN32_LEAN_AND_MEAN -DL_ENDIAN -DUNICODE -D_UNICODE -D_CRT_SECURE_NO_DEPRECATE:::WIN64I::SIXTY_FOUR_BIT RC4_CHUNK_LL DES_INT EXPORT_VAR_AS_FN:ia64cpuid.o:ia64.o::aes_core.o aes_cbc.o aes-ia64.o::md5-ia64.o:sha1-ia64.o sha256-ia64.o sha512-ia64.o:::::::ghash-ia64.o::ias:win32", +"debug-VC-WIN64I","cl:-W3 -Gs0 -Gy -Zi -nologo -DOPENSSL_SYSNAME_WIN32 -DWIN32_LEAN_AND_MEAN -DL_ENDIAN -DUNICODE -D_UNICODE -D_CRT_SECURE_NO_DEPRECATE:::WIN64I::SIXTY_FOUR_BIT RC4_CHUNK_LL DES_INT EXPORT_VAR_AS_FN:ia64cpuid.o:ia64.o:::aes_core.o aes_cbc.o aes-ia64.o::md5-ia64.o:sha1-ia64.o sha256-ia64.o sha512-ia64.o:::::::ghash-ia64.o::ias:win32", "debug-VC-WIN64A","cl:-W3 -Gs0 -Gy -Zi -nologo -DOPENSSL_SYSNAME_WIN32 -DWIN32_LEAN_AND_MEAN -DL_ENDIAN -DUNICODE -D_UNICODE -D_CRT_SECURE_NO_DEPRECATE:::WIN64A::SIXTY_FOUR_BIT RC4_CHUNK_LL DES_INT EXPORT_VAR_AS_FN:".eval{my $asm=$x86_64_asm;$asm=~s/x86_64-gcc\.o/bn_asm.o/;$asm}.":auto:win32", # x86 Win32 target defaults to ANSI API, if you want UNICODE, complement # 'perl Configure VC-WIN32' with '-DUNICODE -D_UNICODE' @@ -547,9 +588,8 @@ my %table=( "UWIN", "cc:-DTERMIOS -DL_ENDIAN -O -Wall:::UWIN::BN_LLONG ${x86_gcc_des} ${x86_gcc_opts}:${no_asm}:win32", # Cygwin -"Cygwin-pre1.3", "gcc:-DTERMIOS -DL_ENDIAN -fomit-frame-pointer -O3 -m486 -Wall::(unknown):CYGWIN32::BN_LLONG ${x86_gcc_des} ${x86_gcc_opts}:${no_asm}:win32", -"Cygwin", "gcc:-DTERMIOS -DL_ENDIAN -fomit-frame-pointer -O3 -march=i486 -Wall:::CYGWIN32::BN_LLONG ${x86_gcc_des} ${x86_gcc_opts}:${x86_asm}:coff:dlfcn:cygwin-shared:-D_WINDLL:-shared:.dll.a", -"debug-Cygwin", "gcc:-DTERMIOS -DL_ENDIAN -march=i486 -Wall -DBN_DEBUG -DREF_CHECK -DCONF_DEBUG -DBN_CTX_DEBUG -DCRYPTO_MDEBUG -DOPENSSL_NO_ASM -g -Wformat -Wshadow -Wmissing-prototypes -Wmissing-declarations -Werror:::CYGWIN32:::${no_asm}:dlfcn:cygwin-shared:-D_WINDLL:-shared:.dll.a", +"Cygwin", "gcc:-DTERMIOS -DL_ENDIAN -fomit-frame-pointer -O3 -march=i486 -Wall:::CYGWIN::BN_LLONG ${x86_gcc_des} ${x86_gcc_opts}:${x86_asm}:coff:dlfcn:cygwin-shared:-D_WINDLL:-shared:.dll.a", +"Cygwin-x86_64", "gcc:-DTERMIOS -DL_ENDIAN -O3 -Wall:::CYGWIN::SIXTY_FOUR_BIT_LONG RC4_CHUNK DES_INT DES_UNROLL:${x86_64_asm}:mingw64:dlfcn:cygwin-shared:-D_WINDLL:-shared:.dll.a", # NetWare from David Ward (dsward@novell.com) # requires either MetroWerks NLM development tools, or gcc / nlmconv @@ -581,7 +621,8 @@ my %table=( "darwin64-ppc-cc","cc:-arch ppc64 -O3 -DB_ENDIAN::-D_REENTRANT:MACOSX:-Wl,-search_paths_first%:SIXTY_FOUR_BIT_LONG RC4_CHAR RC4_CHUNK DES_UNROLL BF_PTR:${ppc64_asm}:osx64:dlfcn:darwin-shared:-fPIC -fno-common:-arch ppc64 -dynamiclib:.\$(SHLIB_MAJOR).\$(SHLIB_MINOR).dylib", "darwin-i386-cc","cc:-arch i386 -O3 -fomit-frame-pointer -DL_ENDIAN::-D_REENTRANT:MACOSX:-Wl,-search_paths_first%:BN_LLONG RC4_INT RC4_CHUNK DES_UNROLL BF_PTR:".eval{my $asm=$x86_asm;$asm=~s/cast\-586\.o//;$asm}.":macosx:dlfcn:darwin-shared:-fPIC -fno-common:-arch i386 -dynamiclib:.\$(SHLIB_MAJOR).\$(SHLIB_MINOR).dylib", "debug-darwin-i386-cc","cc:-arch i386 -g3 -DL_ENDIAN::-D_REENTRANT:MACOSX:-Wl,-search_paths_first%:BN_LLONG RC4_INT RC4_CHUNK DES_UNROLL BF_PTR:${x86_asm}:macosx:dlfcn:darwin-shared:-fPIC -fno-common:-arch i386 -dynamiclib:.\$(SHLIB_MAJOR).\$(SHLIB_MINOR).dylib", -"darwin64-x86_64-cc","cc:-arch x86_64 -O3 -DL_ENDIAN -Wall::-D_REENTRANT:MACOSX:-Wl,-search_paths_first%:SIXTY_FOUR_BIT_LONG RC4_CHAR RC4_CHUNK DES_INT DES_UNROLL:".eval{my $asm=$x86_64_asm;$asm=~s/rc4\-[^:]+//;$asm}.":macosx:dlfcn:darwin-shared:-fPIC -fno-common:-arch x86_64 -dynamiclib:.\$(SHLIB_MAJOR).\$(SHLIB_MINOR).dylib", +"darwin64-x86_64-cc","cc:-arch x86_64 -O3 -DL_ENDIAN -Wall::-D_REENTRANT:MACOSX:-Wl,-search_paths_first%:SIXTY_FOUR_BIT_LONG RC4_CHUNK DES_INT DES_UNROLL:".eval{my $asm=$x86_64_asm;$asm=~s/rc4\-[^:]+//;$asm}.":macosx:dlfcn:darwin-shared:-fPIC -fno-common:-arch x86_64 -dynamiclib:.\$(SHLIB_MAJOR).\$(SHLIB_MINOR).dylib", +"debug-darwin64-x86_64-cc","cc:-arch x86_64 -ggdb -g2 -O0 -DL_ENDIAN -Wall::-D_REENTRANT:MACOSX:-Wl,-search_paths_first%:SIXTY_FOUR_BIT_LONG RC4_CHUNK DES_INT DES_UNROLL:".eval{my $asm=$x86_64_asm;$asm=~s/rc4\-[^:]+//;$asm}.":macosx:dlfcn:darwin-shared:-fPIC -fno-common:-arch x86_64 -dynamiclib:.\$(SHLIB_MAJOR).\$(SHLIB_MINOR).dylib", "debug-darwin-ppc-cc","cc:-DBN_DEBUG -DREF_CHECK -DCONF_DEBUG -DCRYPTO_MDEBUG -DB_ENDIAN -g -Wall -O::-D_REENTRANT:MACOSX::BN_LLONG RC4_CHAR RC4_CHUNK DES_UNROLL BF_PTR:${ppc32_asm}:osx32:dlfcn:darwin-shared:-fPIC:-dynamiclib:.\$(SHLIB_MAJOR).\$(SHLIB_MINOR).dylib", # iPhoneOS/iOS "iphoneos-cross","llvm-gcc:-O3 -isysroot \$(CROSS_TOP)/SDKs/\$(CROSS_SDK) -fomit-frame-pointer -fno-common::-D_REENTRANT:iOS:-Wl,-search_paths_first%:BN_LLONG RC4_CHAR RC4_CHUNK DES_UNROLL BF_PTR:${no_asm}:dlfcn:darwin-shared:-fPIC -fno-common:-dynamiclib:.\$(SHLIB_MAJOR).\$(SHLIB_MINOR).dylib", @@ -634,6 +675,7 @@ my $idx_lflags = $idx++; my $idx_bn_ops = $idx++; my $idx_cpuid_obj = $idx++; my $idx_bn_obj = $idx++; +my $idx_ec_obj = $idx++; my $idx_des_obj = $idx++; my $idx_aes_obj = $idx++; my $idx_bf_obj = $idx++; @@ -714,11 +756,13 @@ my %disabled = ( # "what" => "comment" [or special keyword "experimental "ec_nistp_64_gcc_128" => "default", "gmp" => "default", "jpake" => "experimental", + "libunbound" => "experimental", "md2" => "default", "rc5" => "default", "rfc3779" => "default", "sctp" => "default", "shared" => "default", + "ssl-trace" => "default", "store" => "experimental", "unit-test" => "default", "zlib" => "default", @@ -728,7 +772,7 @@ my @experimental = (); # This is what $depflags will look like with the above defaults # (we need this to see if we should advise the user to run "make depend"): -my $default_depflags = " -DOPENSSL_NO_EC_NISTP_64_GCC_128 -DOPENSSL_NO_GMP -DOPENSSL_NO_JPAKE -DOPENSSL_NO_MD2 -DOPENSSL_NO_RC5 -DOPENSSL_NO_RFC3779 -DOPENSSL_NO_SCTP -DOPENSSL_NO_STORE -DOPENSSL_NO_UNIT_TEST"; +my $default_depflags = " -DOPENSSL_NO_EC_NISTP_64_GCC_128 -DOPENSSL_NO_GMP -DOPENSSL_NO_JPAKE -DOPENSSL_NO_LIBUNBOUND -DOPENSSL_NO_MD2 -DOPENSSL_NO_RC5 -DOPENSSL_NO_RFC3779 -DOPENSSL_NO_SCTP -DOPENSSL_NO_SSL_TRACE -DOPENSSL_NO_STORE -DOPENSSL_NO_UNIT_TEST"; # Explicit "no-..." options will be collected in %disabled along with the defaults. # To remove something from %disabled, use "enable-foo" (unless it's experimental). @@ -873,16 +917,7 @@ PROCESS_ARGS: } elsif (/^[-+]/) { - if (/^-[lL](.*)$/ or /^-Wl,/) - { - $libs.=$_." "; - } - elsif (/^-[^-]/ or /^\+/) - { - $_ =~ s/%([0-9a-f]{1,2})/chr(hex($1))/gei; - $flags.=$_." "; - } - elsif (/^--prefix=(.*)$/) + if (/^--prefix=(.*)$/) { $prefix=$1; } @@ -926,10 +961,14 @@ PROCESS_ARGS: { $cross_compile_prefix=$1; } - else + elsif (/^-[lL](.*)$/ or /^-Wl,/) + { + $libs.=$_." "; + } + else # common if (/^[-+]/), just pass down... { - print STDERR $usage; - exit(1); + $_ =~ s/%([0-9a-f]{1,2})/chr(hex($1))/gei; + $flags.=$_." "; } } elsif ($_ =~ /^([^:]+):(.+)$/) @@ -1164,6 +1203,7 @@ my $lflags = $fields[$idx_lflags]; my $bn_ops = $fields[$idx_bn_ops]; my $cpuid_obj = $fields[$idx_cpuid_obj]; my $bn_obj = $fields[$idx_bn_obj]; +my $ec_obj = $fields[$idx_ec_obj]; my $des_obj = $fields[$idx_des_obj]; my $aes_obj = $fields[$idx_aes_obj]; my $bf_obj = $fields[$idx_bf_obj]; @@ -1209,6 +1249,12 @@ if ($target =~ /^mingw/ && `$cc --target-help 2>&1` !~ m/\-mno\-cygwin/m) $shared_ldflag =~ s/\-mno\-cygwin\s*//; } +if ($target =~ /linux.*\-mips/ && !$no_asm && $flags !~ /\-m(ips|arch=)/) { + # minimally required architecture flags for assembly modules + $cflags="-mips2 $cflags" if ($target =~ /mips32/); + $cflags="-mips3 $cflags" if ($target =~ /mips64/); +} + my $no_shared_warn=0; my $no_user_cflags=0; @@ -1335,7 +1381,7 @@ $lflags="$libs$lflags" if ($libs ne ""); if ($no_asm) { - $cpuid_obj=$bn_obj= + $cpuid_obj=$bn_obj=$ec_obj= $des_obj=$aes_obj=$bf_obj=$cast_obj=$rc4_obj=$rc5_obj=$cmll_obj= $modes_obj=$sha1_obj=$md5_obj=$rmd160_obj=$wp_obj=$engines_obj=""; } @@ -1416,6 +1462,7 @@ if ($target =~ /\-icc$/) # Intel C compiler } if ($iccver>=8) { + $cflags=~s/\-KPIC/-fPIC/; # Eliminate unnecessary dependency from libirc.a. This is # essential for shared library support, as otherwise # apps/openssl can end up in endless loop upon startup... @@ -1423,12 +1470,17 @@ if ($target =~ /\-icc$/) # Intel C compiler } if ($iccver>=9) { - $cflags.=" -i-static"; - $cflags=~s/\-no_cpprt/-no-cpprt/; + $lflags.=" -i-static"; + $lflags=~s/\-no_cpprt/-no-cpprt/; } if ($iccver>=10) { - $cflags=~s/\-i\-static/-static-intel/; + $lflags=~s/\-i\-static/-static-intel/; + } + if ($iccver>=11) + { + $cflags.=" -no-intel-extensions"; # disable Cilk + $lflags=~s/\-no\-cpprt/-no-cxxlib/; } } @@ -1509,7 +1561,7 @@ if ($rmd160_obj =~ /\.o$/) } if ($aes_obj =~ /\.o$/) { - $cflags.=" -DAES_ASM"; + $cflags.=" -DAES_ASM" if ($aes_obj =~ m/\baes\-/);; # aes-ctr.o is not a real file, only indication that assembler # module implements AES_ctr32_encrypt... $cflags.=" -DAES_CTR_ASM" if ($aes_obj =~ s/\s*aes\-ctr\.o//); @@ -1531,10 +1583,14 @@ else { $wp_obj="wp_block.o"; } $cmll_obj=$cmll_enc unless ($cmll_obj =~ /.o$/); -if ($modes_obj =~ /ghash/) +if ($modes_obj =~ /ghash\-/) { $cflags.=" -DGHASH_ASM"; } +if ($ec_obj =~ /ecp_nistz256/) + { + $cflags.=" -DECP_NISTZ256_ASM"; + } # "Stringify" the C flags string. This permits it to be made part of a string # and works as well on command lines. @@ -1575,7 +1631,7 @@ if ($shlib_version_number =~ /(^[0-9]*)\.([0-9\.]*)/) if ($strict_warnings) { my $wopt; - die "ERROR --strict-warnings requires gcc" unless ($cc =~ /gcc$/); + die "ERROR --strict-warnings requires gcc or clang" unless ($cc =~ /gcc$/ or $cc =~ /clang$/); foreach $wopt (split /\s+/, $gcc_devteam_warn) { $cflags .= " $wopt" unless ($cflags =~ /$wopt/) @@ -1638,6 +1694,7 @@ while () s/^EXE_EXT=.*$/EXE_EXT= $exe_ext/; s/^CPUID_OBJ=.*$/CPUID_OBJ= $cpuid_obj/; s/^BN_ASM=.*$/BN_ASM= $bn_obj/; + s/^EC_ASM=.*$/EC_ASM= $ec_obj/; s/^DES_ENC=.*$/DES_ENC= $des_obj/; s/^AES_ENC=.*$/AES_ENC= $aes_obj/; s/^BF_ENC=.*$/BF_ENC= $bf_obj/; @@ -1699,6 +1756,7 @@ print "CFLAG =$cflags\n"; print "EX_LIBS =$lflags\n"; print "CPUID_OBJ =$cpuid_obj\n"; print "BN_ASM =$bn_obj\n"; +print "EC_ASM =$ec_obj\n"; print "DES_ENC =$des_obj\n"; print "AES_ENC =$aes_obj\n"; print "BF_ENC =$bf_obj\n"; @@ -1997,7 +2055,7 @@ BEGIN VALUE "ProductVersion", "$version\\0" // Optional: //VALUE "Comments", "\\0" - VALUE "LegalCopyright", "Copyright © 1998-2005 The OpenSSL Project. Copyright © 1995-1998 Eric A. Young, Tim J. Hudson. All rights reserved.\\0" + VALUE "LegalCopyright", "Copyright © 1998-2005 The OpenSSL Project. Copyright © 1995-1998 Eric A. Young, Tim J. Hudson. All rights reserved.\\0" //VALUE "LegalTrademarks", "\\0" //VALUE "PrivateBuild", "\\0" //VALUE "SpecialBuild", "\\0" @@ -2106,12 +2164,12 @@ sub print_table_entry { my $target = shift; - (my $cc,my $cflags,my $unistd,my $thread_cflag,my $sys_id,my $lflags, - my $bn_ops,my $cpuid_obj,my $bn_obj,my $des_obj,my $aes_obj, my $bf_obj, - my $md5_obj,my $sha1_obj,my $cast_obj,my $rc4_obj,my $rmd160_obj, - my $rc5_obj,my $wp_obj,my $cmll_obj,my $modes_obj, my $engines_obj, - my $perlasm_scheme,my $dso_scheme,my $shared_target,my $shared_cflag, - my $shared_ldflag,my $shared_extension,my $ranlib,my $arflags,my $multilib)= + my ($cc, $cflags, $unistd, $thread_cflag, $sys_id, $lflags, + $bn_ops, $cpuid_obj, $bn_obj, $ec_obj, $des_obj, $aes_obj, $bf_obj, + $md5_obj, $sha1_obj, $cast_obj, $rc4_obj, $rmd160_obj, + $rc5_obj, $wp_obj, $cmll_obj, $modes_obj, $engines_obj, + $perlasm_scheme, $dso_scheme, $shared_target, $shared_cflag, + $shared_ldflag, $shared_extension, $ranlib, $arflags, $multilib)= split(/\s*:\s*/,$table{$target} . ":" x 30 , -1); print <. -OpenSSL 1.0.1e was released on Feb 11th, 2013. +OpenSSL 1.0.1a was released on Apr 19th, 2012. In addition to the current stable release, you can also access daily snapshots of the OpenSSL development version at makefile.$BRANCH +make -f makefile.$BRANCH init diff --git a/deps/openssl/openssl/GitMake b/deps/openssl/openssl/GitMake new file mode 100755 index 00000000000000..47beffd6bc1f70 --- /dev/null +++ b/deps/openssl/openssl/GitMake @@ -0,0 +1,5 @@ +#!/bin/sh + +BRANCH=`git rev-parse --abbrev-ref HEAD` + +make -f makefile.$BRANCH $@ diff --git a/deps/openssl/openssl/Makefile b/deps/openssl/openssl/Makefile index d3f31f0764c8a9..c6b9e9d5cb7b2f 100644 --- a/deps/openssl/openssl/Makefile +++ b/deps/openssl/openssl/Makefile @@ -4,16 +4,16 @@ ## Makefile for OpenSSL ## -VERSION=1.0.1m +VERSION=1.0.2a MAJOR=1 -MINOR=0.1 +MINOR=0.2 SHLIB_VERSION_NUMBER=1.0.0 SHLIB_VERSION_HISTORY= SHLIB_MAJOR=1 SHLIB_MINOR=0.0 SHLIB_EXT= PLATFORM=dist -OPTIONS= no-ec_nistp_64_gcc_128 no-gmp no-jpake no-krb5 no-md2 no-rc5 no-rfc3779 no-sctp no-shared no-store no-unit-test no-zlib no-zlib-dynamic static-engine +OPTIONS= no-ec_nistp_64_gcc_128 no-gmp no-jpake no-krb5 no-libunbound no-md2 no-rc5 no-rfc3779 no-sctp no-shared no-ssl-trace no-store no-unit-test no-zlib no-zlib-dynamic static-engine CONFIGURE_ARGS=dist SHLIB_TARGET= @@ -61,7 +61,7 @@ OPENSSLDIR=/usr/local/ssl CC= cc CFLAG= -O -DEPFLAG= -DOPENSSL_NO_EC_NISTP_64_GCC_128 -DOPENSSL_NO_GMP -DOPENSSL_NO_JPAKE -DOPENSSL_NO_MD2 -DOPENSSL_NO_RC5 -DOPENSSL_NO_RFC3779 -DOPENSSL_NO_SCTP -DOPENSSL_NO_STORE -DOPENSSL_NO_UNIT_TEST +DEPFLAG= -DOPENSSL_NO_EC_NISTP_64_GCC_128 -DOPENSSL_NO_GMP -DOPENSSL_NO_JPAKE -DOPENSSL_NO_LIBUNBOUND -DOPENSSL_NO_MD2 -DOPENSSL_NO_RC5 -DOPENSSL_NO_RFC3779 -DOPENSSL_NO_SCTP -DOPENSSL_NO_SSL_TRACE -DOPENSSL_NO_STORE -DOPENSSL_NO_UNIT_TEST PEX_LIBS= EX_LIBS= EXE_EXT= @@ -71,7 +71,7 @@ RANLIB= /usr/bin/ranlib NM= nm PERL= /usr/bin/perl TAR= tar -TARFLAGS= --no-recursion --record-size=10240 +TARFLAGS= --no-recursion MAKEDEPPROG=makedepend LIBDIR=lib @@ -90,6 +90,7 @@ PROCESSOR= # CPUID module collects small commonly used assembler snippets CPUID_OBJ= mem_clr.o BN_ASM= bn_asm.o +EC_ASM= DES_ENC= des_enc.o fcrypt_b.o AES_ENC= aes_core.o aes_cbc.o BF_ENC= bf_enc.o @@ -223,8 +224,8 @@ BUILDENV= PLATFORM='$(PLATFORM)' PROCESSOR='$(PROCESSOR)' \ EXE_EXT='$(EXE_EXT)' SHARED_LIBS='$(SHARED_LIBS)' \ SHLIB_EXT='$(SHLIB_EXT)' SHLIB_TARGET='$(SHLIB_TARGET)' \ PEX_LIBS='$(PEX_LIBS)' EX_LIBS='$(EX_LIBS)' \ - CPUID_OBJ='$(CPUID_OBJ)' \ - BN_ASM='$(BN_ASM)' DES_ENC='$(DES_ENC)' \ + CPUID_OBJ='$(CPUID_OBJ)' BN_ASM='$(BN_ASM)' \ + EC_ASM='$(EC_ASM)' DES_ENC='$(DES_ENC)' \ AES_ENC='$(AES_ENC)' CMLL_ENC='$(CMLL_ENC)' \ BF_ENC='$(BF_ENC)' CAST_ENC='$(CAST_ENC)' \ RC4_ENC='$(RC4_ENC)' RC5_ENC='$(RC5_ENC)' \ @@ -328,7 +329,7 @@ clean-shared: done; \ fi; \ ( set -x; rm -f lib$$i$(SHLIB_EXT) ); \ - if [ "$(PLATFORM)" = "Cygwin" ]; then \ + if expr "$(PLATFORM)" : "Cygwin" >/dev/null; then \ ( set -x; rm -f cyg$$i$(SHLIB_EXT) lib$$i$(SHLIB_EXT).a ); \ fi; \ done @@ -377,11 +378,11 @@ libssl.pc: Makefile echo 'libdir=$${exec_prefix}/$(LIBDIR)'; \ echo 'includedir=$${prefix}/include'; \ echo ''; \ - echo 'Name: OpenSSL'; \ + echo 'Name: OpenSSL-libssl'; \ echo 'Description: Secure Sockets Layer and cryptography libraries'; \ echo 'Version: '$(VERSION); \ - echo 'Requires: '; \ - echo 'Libs: -L$${libdir} -lssl -lcrypto'; \ + echo 'Requires.private: libcrypto'; \ + echo 'Libs: -L$${libdir} -lssl'; \ echo 'Libs.private: $(EX_LIBS)'; \ echo 'Cflags: -I$${includedir} $(KRB5_INCLUDES)' ) > libssl.pc @@ -394,10 +395,7 @@ openssl.pc: Makefile echo 'Name: OpenSSL'; \ echo 'Description: Secure Sockets Layer and cryptography libraries and tools'; \ echo 'Version: '$(VERSION); \ - echo 'Requires: '; \ - echo 'Libs: -L$${libdir} -lssl -lcrypto'; \ - echo 'Libs.private: $(EX_LIBS)'; \ - echo 'Cflags: -I$${includedir} $(KRB5_INCLUDES)' ) > openssl.pc + echo 'Requires: libssl libcrypto' ) > openssl.pc Makefile: Makefile.org Configure config @echo "Makefile is older than Makefile.org, Configure or config." @@ -573,11 +571,7 @@ install_sw: do \ if [ -f "$$i" -o -f "$$i.a" ]; then \ ( echo installing $$i; \ - if [ "$(PLATFORM)" != "Cygwin" ]; then \ - cp $$i $(INSTALL_PREFIX)$(INSTALLTOP)/$(LIBDIR)/$$i.new; \ - chmod 555 $(INSTALL_PREFIX)$(INSTALLTOP)/$(LIBDIR)/$$i.new; \ - mv -f $(INSTALL_PREFIX)$(INSTALLTOP)/$(LIBDIR)/$$i.new $(INSTALL_PREFIX)$(INSTALLTOP)/$(LIBDIR)/$$i; \ - else \ + if expr "$(PLATFORM)" : "Cygwin" >/dev/null; then \ c=`echo $$i | sed 's/^lib\(.*\)\.dll\.a/cyg\1-$(SHLIB_VERSION_NUMBER).dll/'`; \ cp $$c $(INSTALL_PREFIX)$(INSTALLTOP)/bin/$$c.new; \ chmod 755 $(INSTALL_PREFIX)$(INSTALLTOP)/bin/$$c.new; \ @@ -585,6 +579,10 @@ install_sw: cp $$i $(INSTALL_PREFIX)$(INSTALLTOP)/$(LIBDIR)/$$i.new; \ chmod 644 $(INSTALL_PREFIX)$(INSTALLTOP)/$(LIBDIR)/$$i.new; \ mv -f $(INSTALL_PREFIX)$(INSTALLTOP)/$(LIBDIR)/$$i.new $(INSTALL_PREFIX)$(INSTALLTOP)/$(LIBDIR)/$$i; \ + else \ + cp $$i $(INSTALL_PREFIX)$(INSTALLTOP)/$(LIBDIR)/$$i.new; \ + chmod 555 $(INSTALL_PREFIX)$(INSTALLTOP)/$(LIBDIR)/$$i.new; \ + mv -f $(INSTALL_PREFIX)$(INSTALLTOP)/$(LIBDIR)/$$i.new $(INSTALL_PREFIX)$(INSTALLTOP)/$(LIBDIR)/$$i; \ fi ); \ if expr $(PLATFORM) : 'mingw' > /dev/null; then \ ( case $$i in \ @@ -617,6 +615,10 @@ install_sw: install_html_docs: here="`pwd`"; \ + filecase=; \ + case "$(PLATFORM)" in DJGPP|Cygwin*|mingw*|darwin*-*-cc) \ + filecase=-i; \ + esac; \ for subdir in apps crypto ssl; do \ mkdir -p $(INSTALL_PREFIX)$(HTMLDIR)/$$subdir; \ for i in doc/$$subdir/*.pod; do \ @@ -645,9 +647,9 @@ install_docs: @pod2man="`cd ./util; ./pod2mantest $(PERL)`"; \ here="`pwd`"; \ filecase=; \ - if [ "$(PLATFORM)" = "DJGPP" -o "$(PLATFORM)" = "Cygwin" -o "$(PLATFORM)" = "mingw" ]; then \ + case "$(PLATFORM)" in DJGPP|Cygwin*|mingw*|darwin*-*-cc) \ filecase=-i; \ - fi; \ + esac; \ set -e; for i in doc/apps/*.pod; do \ fn=`basename $$i .pod`; \ sec=`$(PERL) util/extract-section.pl 1 < $$i`; \ diff --git a/deps/openssl/openssl/Makefile.bak b/deps/openssl/openssl/Makefile.bak index 6e30d434504b33..190c0b627d95db 100644 --- a/deps/openssl/openssl/Makefile.bak +++ b/deps/openssl/openssl/Makefile.bak @@ -4,16 +4,16 @@ ## Makefile for OpenSSL ## -VERSION=1.0.1m-dev +VERSION=1.0.2a-dev MAJOR=1 -MINOR=0.1 +MINOR=0.2 SHLIB_VERSION_NUMBER=1.0.0 SHLIB_VERSION_HISTORY= SHLIB_MAJOR=1 SHLIB_MINOR=0.0 SHLIB_EXT= PLATFORM=gcc -OPTIONS= no-ec_nistp_64_gcc_128 no-gmp no-jpake no-krb5 no-md2 no-rc5 no-rfc3779 no-sctp no-shared no-store no-unit-test no-zlib no-zlib-dynamic static-engine +OPTIONS= no-ec_nistp_64_gcc_128 no-gmp no-jpake no-krb5 no-libunbound no-md2 no-rc5 no-rfc3779 no-sctp no-shared no-ssl-trace no-store no-unit-test no-zlib no-zlib-dynamic static-engine CONFIGURE_ARGS=gcc SHLIB_TARGET= @@ -61,7 +61,7 @@ OPENSSLDIR=/usr/local/ssl CC= gcc CFLAG= -O3 -DEPFLAG= -DOPENSSL_NO_EC_NISTP_64_GCC_128 -DOPENSSL_NO_GMP -DOPENSSL_NO_JPAKE -DOPENSSL_NO_MD2 -DOPENSSL_NO_RC5 -DOPENSSL_NO_RFC3779 -DOPENSSL_NO_SCTP -DOPENSSL_NO_STORE -DOPENSSL_NO_UNIT_TEST +DEPFLAG= -DOPENSSL_NO_EC_NISTP_64_GCC_128 -DOPENSSL_NO_GMP -DOPENSSL_NO_JPAKE -DOPENSSL_NO_LIBUNBOUND -DOPENSSL_NO_MD2 -DOPENSSL_NO_RC5 -DOPENSSL_NO_RFC3779 -DOPENSSL_NO_SCTP -DOPENSSL_NO_SSL_TRACE -DOPENSSL_NO_STORE -DOPENSSL_NO_UNIT_TEST PEX_LIBS= EX_LIBS= EXE_EXT= @@ -71,7 +71,7 @@ RANLIB= /usr/bin/ranlib NM= nm PERL= /usr/bin/perl TAR= tar -TARFLAGS= --no-recursion --record-size=10240 +TARFLAGS= --no-recursion MAKEDEPPROG= gcc LIBDIR=lib @@ -90,6 +90,7 @@ PROCESSOR= # CPUID module collects small commonly used assembler snippets CPUID_OBJ= mem_clr.o BN_ASM= bn_asm.o +EC_ASM= DES_ENC= des_enc.o fcrypt_b.o AES_ENC= aes_core.o aes_cbc.o BF_ENC= bf_enc.o @@ -223,8 +224,8 @@ BUILDENV= PLATFORM='$(PLATFORM)' PROCESSOR='$(PROCESSOR)' \ EXE_EXT='$(EXE_EXT)' SHARED_LIBS='$(SHARED_LIBS)' \ SHLIB_EXT='$(SHLIB_EXT)' SHLIB_TARGET='$(SHLIB_TARGET)' \ PEX_LIBS='$(PEX_LIBS)' EX_LIBS='$(EX_LIBS)' \ - CPUID_OBJ='$(CPUID_OBJ)' \ - BN_ASM='$(BN_ASM)' DES_ENC='$(DES_ENC)' \ + CPUID_OBJ='$(CPUID_OBJ)' BN_ASM='$(BN_ASM)' \ + EC_ASM='$(EC_ASM)' DES_ENC='$(DES_ENC)' \ AES_ENC='$(AES_ENC)' CMLL_ENC='$(CMLL_ENC)' \ BF_ENC='$(BF_ENC)' CAST_ENC='$(CAST_ENC)' \ RC4_ENC='$(RC4_ENC)' RC5_ENC='$(RC5_ENC)' \ @@ -328,7 +329,7 @@ clean-shared: done; \ fi; \ ( set -x; rm -f lib$$i$(SHLIB_EXT) ); \ - if [ "$(PLATFORM)" = "Cygwin" ]; then \ + if expr "$(PLATFORM)" : "Cygwin" >/dev/null; then \ ( set -x; rm -f cyg$$i$(SHLIB_EXT) lib$$i$(SHLIB_EXT).a ); \ fi; \ done @@ -377,11 +378,11 @@ libssl.pc: Makefile echo 'libdir=$${exec_prefix}/$(LIBDIR)'; \ echo 'includedir=$${prefix}/include'; \ echo ''; \ - echo 'Name: OpenSSL'; \ + echo 'Name: OpenSSL-libssl'; \ echo 'Description: Secure Sockets Layer and cryptography libraries'; \ echo 'Version: '$(VERSION); \ - echo 'Requires: '; \ - echo 'Libs: -L$${libdir} -lssl -lcrypto'; \ + echo 'Requires.private: libcrypto'; \ + echo 'Libs: -L$${libdir} -lssl'; \ echo 'Libs.private: $(EX_LIBS)'; \ echo 'Cflags: -I$${includedir} $(KRB5_INCLUDES)' ) > libssl.pc @@ -394,10 +395,7 @@ openssl.pc: Makefile echo 'Name: OpenSSL'; \ echo 'Description: Secure Sockets Layer and cryptography libraries and tools'; \ echo 'Version: '$(VERSION); \ - echo 'Requires: '; \ - echo 'Libs: -L$${libdir} -lssl -lcrypto'; \ - echo 'Libs.private: $(EX_LIBS)'; \ - echo 'Cflags: -I$${includedir} $(KRB5_INCLUDES)' ) > openssl.pc + echo 'Requires: libssl libcrypto' ) > openssl.pc Makefile: Makefile.org Configure config @echo "Makefile is older than Makefile.org, Configure or config." @@ -573,11 +571,7 @@ install_sw: do \ if [ -f "$$i" -o -f "$$i.a" ]; then \ ( echo installing $$i; \ - if [ "$(PLATFORM)" != "Cygwin" ]; then \ - cp $$i $(INSTALL_PREFIX)$(INSTALLTOP)/$(LIBDIR)/$$i.new; \ - chmod 555 $(INSTALL_PREFIX)$(INSTALLTOP)/$(LIBDIR)/$$i.new; \ - mv -f $(INSTALL_PREFIX)$(INSTALLTOP)/$(LIBDIR)/$$i.new $(INSTALL_PREFIX)$(INSTALLTOP)/$(LIBDIR)/$$i; \ - else \ + if expr "$(PLATFORM)" : "Cygwin" >/dev/null; then \ c=`echo $$i | sed 's/^lib\(.*\)\.dll\.a/cyg\1-$(SHLIB_VERSION_NUMBER).dll/'`; \ cp $$c $(INSTALL_PREFIX)$(INSTALLTOP)/bin/$$c.new; \ chmod 755 $(INSTALL_PREFIX)$(INSTALLTOP)/bin/$$c.new; \ @@ -585,6 +579,10 @@ install_sw: cp $$i $(INSTALL_PREFIX)$(INSTALLTOP)/$(LIBDIR)/$$i.new; \ chmod 644 $(INSTALL_PREFIX)$(INSTALLTOP)/$(LIBDIR)/$$i.new; \ mv -f $(INSTALL_PREFIX)$(INSTALLTOP)/$(LIBDIR)/$$i.new $(INSTALL_PREFIX)$(INSTALLTOP)/$(LIBDIR)/$$i; \ + else \ + cp $$i $(INSTALL_PREFIX)$(INSTALLTOP)/$(LIBDIR)/$$i.new; \ + chmod 555 $(INSTALL_PREFIX)$(INSTALLTOP)/$(LIBDIR)/$$i.new; \ + mv -f $(INSTALL_PREFIX)$(INSTALLTOP)/$(LIBDIR)/$$i.new $(INSTALL_PREFIX)$(INSTALLTOP)/$(LIBDIR)/$$i; \ fi ); \ if expr $(PLATFORM) : 'mingw' > /dev/null; then \ ( case $$i in \ @@ -617,6 +615,10 @@ install_sw: install_html_docs: here="`pwd`"; \ + filecase=; \ + case "$(PLATFORM)" in DJGPP|Cygwin*|mingw*|darwin*-*-cc) \ + filecase=-i; \ + esac; \ for subdir in apps crypto ssl; do \ mkdir -p $(INSTALL_PREFIX)$(HTMLDIR)/$$subdir; \ for i in doc/$$subdir/*.pod; do \ @@ -645,9 +647,9 @@ install_docs: @pod2man="`cd ./util; ./pod2mantest $(PERL)`"; \ here="`pwd`"; \ filecase=; \ - if [ "$(PLATFORM)" = "DJGPP" -o "$(PLATFORM)" = "Cygwin" -o "$(PLATFORM)" = "mingw" ]; then \ + case "$(PLATFORM)" in DJGPP|Cygwin*|mingw*|darwin*-*-cc) \ filecase=-i; \ - fi; \ + esac; \ set -e; for i in doc/apps/*.pod; do \ fn=`basename $$i .pod`; \ sec=`$(PERL) util/extract-section.pl 1 < $$i`; \ diff --git a/deps/openssl/openssl/Makefile.org b/deps/openssl/openssl/Makefile.org index c92806f9201fe5..b7a3f96c9f4ab5 100644 --- a/deps/openssl/openssl/Makefile.org +++ b/deps/openssl/openssl/Makefile.org @@ -69,7 +69,7 @@ RANLIB= ranlib NM= nm PERL= perl TAR= tar -TARFLAGS= --no-recursion --record-size=10240 +TARFLAGS= --no-recursion MAKEDEPPROG=makedepend LIBDIR=lib @@ -88,6 +88,7 @@ PROCESSOR= # CPUID module collects small commonly used assembler snippets CPUID_OBJ= BN_ASM= bn_asm.o +EC_ASM= DES_ENC= des_enc.o fcrypt_b.o AES_ENC= aes_core.o aes_cbc.o BF_ENC= bf_enc.o @@ -221,8 +222,8 @@ BUILDENV= PLATFORM='$(PLATFORM)' PROCESSOR='$(PROCESSOR)' \ EXE_EXT='$(EXE_EXT)' SHARED_LIBS='$(SHARED_LIBS)' \ SHLIB_EXT='$(SHLIB_EXT)' SHLIB_TARGET='$(SHLIB_TARGET)' \ PEX_LIBS='$(PEX_LIBS)' EX_LIBS='$(EX_LIBS)' \ - CPUID_OBJ='$(CPUID_OBJ)' \ - BN_ASM='$(BN_ASM)' DES_ENC='$(DES_ENC)' \ + CPUID_OBJ='$(CPUID_OBJ)' BN_ASM='$(BN_ASM)' \ + EC_ASM='$(EC_ASM)' DES_ENC='$(DES_ENC)' \ AES_ENC='$(AES_ENC)' CMLL_ENC='$(CMLL_ENC)' \ BF_ENC='$(BF_ENC)' CAST_ENC='$(CAST_ENC)' \ RC4_ENC='$(RC4_ENC)' RC5_ENC='$(RC5_ENC)' \ @@ -326,7 +327,7 @@ clean-shared: done; \ fi; \ ( set -x; rm -f lib$$i$(SHLIB_EXT) ); \ - if [ "$(PLATFORM)" = "Cygwin" ]; then \ + if expr "$(PLATFORM)" : "Cygwin" >/dev/null; then \ ( set -x; rm -f cyg$$i$(SHLIB_EXT) lib$$i$(SHLIB_EXT).a ); \ fi; \ done @@ -375,11 +376,11 @@ libssl.pc: Makefile echo 'libdir=$${exec_prefix}/$(LIBDIR)'; \ echo 'includedir=$${prefix}/include'; \ echo ''; \ - echo 'Name: OpenSSL'; \ + echo 'Name: OpenSSL-libssl'; \ echo 'Description: Secure Sockets Layer and cryptography libraries'; \ echo 'Version: '$(VERSION); \ - echo 'Requires: '; \ - echo 'Libs: -L$${libdir} -lssl -lcrypto'; \ + echo 'Requires.private: libcrypto'; \ + echo 'Libs: -L$${libdir} -lssl'; \ echo 'Libs.private: $(EX_LIBS)'; \ echo 'Cflags: -I$${includedir} $(KRB5_INCLUDES)' ) > libssl.pc @@ -392,10 +393,7 @@ openssl.pc: Makefile echo 'Name: OpenSSL'; \ echo 'Description: Secure Sockets Layer and cryptography libraries and tools'; \ echo 'Version: '$(VERSION); \ - echo 'Requires: '; \ - echo 'Libs: -L$${libdir} -lssl -lcrypto'; \ - echo 'Libs.private: $(EX_LIBS)'; \ - echo 'Cflags: -I$${includedir} $(KRB5_INCLUDES)' ) > openssl.pc + echo 'Requires: libssl libcrypto' ) > openssl.pc Makefile: Makefile.org Configure config @echo "Makefile is older than Makefile.org, Configure or config." @@ -571,11 +569,7 @@ install_sw: do \ if [ -f "$$i" -o -f "$$i.a" ]; then \ ( echo installing $$i; \ - if [ "$(PLATFORM)" != "Cygwin" ]; then \ - cp $$i $(INSTALL_PREFIX)$(INSTALLTOP)/$(LIBDIR)/$$i.new; \ - chmod 555 $(INSTALL_PREFIX)$(INSTALLTOP)/$(LIBDIR)/$$i.new; \ - mv -f $(INSTALL_PREFIX)$(INSTALLTOP)/$(LIBDIR)/$$i.new $(INSTALL_PREFIX)$(INSTALLTOP)/$(LIBDIR)/$$i; \ - else \ + if expr "$(PLATFORM)" : "Cygwin" >/dev/null; then \ c=`echo $$i | sed 's/^lib\(.*\)\.dll\.a/cyg\1-$(SHLIB_VERSION_NUMBER).dll/'`; \ cp $$c $(INSTALL_PREFIX)$(INSTALLTOP)/bin/$$c.new; \ chmod 755 $(INSTALL_PREFIX)$(INSTALLTOP)/bin/$$c.new; \ @@ -583,6 +577,10 @@ install_sw: cp $$i $(INSTALL_PREFIX)$(INSTALLTOP)/$(LIBDIR)/$$i.new; \ chmod 644 $(INSTALL_PREFIX)$(INSTALLTOP)/$(LIBDIR)/$$i.new; \ mv -f $(INSTALL_PREFIX)$(INSTALLTOP)/$(LIBDIR)/$$i.new $(INSTALL_PREFIX)$(INSTALLTOP)/$(LIBDIR)/$$i; \ + else \ + cp $$i $(INSTALL_PREFIX)$(INSTALLTOP)/$(LIBDIR)/$$i.new; \ + chmod 555 $(INSTALL_PREFIX)$(INSTALLTOP)/$(LIBDIR)/$$i.new; \ + mv -f $(INSTALL_PREFIX)$(INSTALLTOP)/$(LIBDIR)/$$i.new $(INSTALL_PREFIX)$(INSTALLTOP)/$(LIBDIR)/$$i; \ fi ); \ if expr $(PLATFORM) : 'mingw' > /dev/null; then \ ( case $$i in \ @@ -615,6 +613,10 @@ install_sw: install_html_docs: here="`pwd`"; \ + filecase=; \ + case "$(PLATFORM)" in DJGPP|Cygwin*|mingw*|darwin*-*-cc) \ + filecase=-i; \ + esac; \ for subdir in apps crypto ssl; do \ mkdir -p $(INSTALL_PREFIX)$(HTMLDIR)/$$subdir; \ for i in doc/$$subdir/*.pod; do \ @@ -643,9 +645,9 @@ install_docs: @pod2man="`cd ./util; ./pod2mantest $(PERL)`"; \ here="`pwd`"; \ filecase=; \ - if [ "$(PLATFORM)" = "DJGPP" -o "$(PLATFORM)" = "Cygwin" -o "$(PLATFORM)" = "mingw" ]; then \ + case "$(PLATFORM)" in DJGPP|Cygwin*|mingw*|darwin*-*-cc) \ filecase=-i; \ - fi; \ + esac; \ set -e; for i in doc/apps/*.pod; do \ fn=`basename $$i .pod`; \ sec=`$(PERL) util/extract-section.pl 1 < $$i`; \ diff --git a/deps/openssl/openssl/NEWS b/deps/openssl/openssl/NEWS index 12616d2c1ff50e..682c583da56cd7 100644 --- a/deps/openssl/openssl/NEWS +++ b/deps/openssl/openssl/NEWS @@ -5,16 +5,33 @@ This file gives a brief overview of the major changes between each OpenSSL release. For more details please read the CHANGES file. - Major changes between OpenSSL 1.0.1l and OpenSSL 1.0.1m [19 Mar 2015] + Major changes between OpenSSL 1.0.2 and OpenSSL 1.0.2a [19 Mar 2015] + o OpenSSL 1.0.2 ClientHello sigalgs DoS fix (CVE-2015-0291) + o Multiblock corrupted pointer fix (CVE-2015-0290) + o Segmentation fault in DTLSv1_listen fix (CVE-2015-0207) o Segmentation fault in ASN1_TYPE_cmp fix (CVE-2015-0286) + o Segmentation fault for invalid PSS parameters fix (CVE-2015-0208) o ASN.1 structure reuse memory corruption fix (CVE-2015-0287) o PKCS7 NULL pointer dereferences fix (CVE-2015-0289) o DoS via reachable assert in SSLv2 servers fix (CVE-2015-0293) + o Empty CKE with client auth and DHE fix (CVE-2015-1787) + o Handshake with unseeded PRNG fix (CVE-2015-0285) o Use After Free following d2i_ECPrivatekey error fix (CVE-2015-0209) o X509_to_X509_REQ NULL pointer deref fix (CVE-2015-0288) o Removed the export ciphers from the DEFAULT ciphers + Major changes between OpenSSL 1.0.1l and OpenSSL 1.0.2 [22 Jan 2015]: + + o Suite B support for TLS 1.2 and DTLS 1.2 + o Support for DTLS 1.2 + o TLS automatic EC curve selection. + o API to set TLS supported signature algorithms and curves + o SSL_CONF configuration API. + o TLS Brainpool support. + o ALPN support. + o CMS support for RSA-PSS, RSA-OAEP, ECDH and X9.42 DH. + Major changes between OpenSSL 1.0.1k and OpenSSL 1.0.1l [15 Jan 2015] o Build fixes for the Windows and OpenVMS platforms diff --git a/deps/openssl/openssl/README b/deps/openssl/openssl/README index ecdcfb2370d1f8..8ce093dd43ac3b 100644 --- a/deps/openssl/openssl/README +++ b/deps/openssl/openssl/README @@ -1,5 +1,5 @@ - OpenSSL 1.0.1m 19 Mar 2015 + OpenSSL 1.0.2a 19 Mar 2015 Copyright (c) 1998-2011 The OpenSSL Project Copyright (c) 1995-1998 Eric A. Young, Tim J. Hudson @@ -90,32 +90,6 @@ SSL/TLS Client and Server Tests Handling of S/MIME signed or encrypted mail - - PATENTS - ------- - - Various companies hold various patents for various algorithms in various - locations around the world. _YOU_ are responsible for ensuring that your use - of any algorithms is legal by checking if there are any patents in your - country. The file contains some of the patents that we know about or are - rumored to exist. This is not a definitive list. - - RSA Security holds software patents on the RC5 algorithm. If you - intend to use this cipher, you must contact RSA Security for - licensing conditions. Their web page is http://www.rsasecurity.com/. - - RC4 is a trademark of RSA Security, so use of this label should perhaps - only be used with RSA Security's permission. - - The IDEA algorithm is patented by Ascom in Austria, France, Germany, Italy, - Japan, the Netherlands, Spain, Sweden, Switzerland, UK and the USA. They - should be contacted if that algorithm is to be used; their web page is - http://www.ascom.ch/. - - NTT and Mitsubishi have patents and pending patents on the Camellia - algorithm, but allow use at no charge without requiring an explicit - licensing agreement: http://info.isl.ntt.co.jp/crypt/eng/info/chiteki.html - INSTALLATION ------------ @@ -161,8 +135,7 @@ - Problem Description (steps that will reproduce the problem, if known) - Stack Traceback (if the application dumps core) - Report the bug to the OpenSSL project via the Request Tracker - (http://www.openssl.org/support/rt.html) by mail to: + Email the report to: openssl-bugs@openssl.org @@ -170,10 +143,11 @@ or support queries. Just because something doesn't work the way you expect does not mean it is necessarily a bug in OpenSSL. - Note that mail to openssl-bugs@openssl.org is recorded in the publicly - readable request tracker database and is forwarded to a public - mailing list. Confidential mail may be sent to openssl-security@openssl.org - (PGP key available from the key servers). + Note that mail to openssl-bugs@openssl.org is recorded in the public + request tracker database (see https://www.openssl.org/support/rt.html + for details) and also forwarded to a public mailing list. Confidential + mail may be sent to openssl-security@openssl.org (PGP key available from + the key servers). HOW TO CONTRIBUTE TO OpenSSL ---------------------------- diff --git a/deps/openssl/openssl/apps/apps.c b/deps/openssl/openssl/apps/apps.c index 9862afde3a775f..b0acbc7c14fa29 100644 --- a/deps/openssl/openssl/apps/apps.c +++ b/deps/openssl/openssl/apps/apps.c @@ -119,7 +119,7 @@ #include #include #include -#if !defined(OPENSSL_SYSNAME_WIN32) && !defined(NETWARE_CLIB) +#if !defined(OPENSSL_SYSNAME_WIN32) && !defined(OPENSSL_SYSNAME_WINCE) && !defined(NETWARE_CLIB) # include #endif #include @@ -285,6 +285,8 @@ int str2fmt(char *s) return (FORMAT_PKCS12); else if ((*s == 'E') || (*s == 'e')) return (FORMAT_ENGINE); + else if ((*s == 'H') || (*s == 'h')) + return FORMAT_HTTP; else if ((*s == 'P') || (*s == 'p')) { if (s[1] == 'V' || s[1] == 'v') return FORMAT_PVK; @@ -787,12 +789,72 @@ static int load_pkcs12(BIO *err, BIO *in, const char *desc, return ret; } +int load_cert_crl_http(const char *url, BIO *err, + X509 **pcert, X509_CRL **pcrl) +{ + char *host = NULL, *port = NULL, *path = NULL; + BIO *bio = NULL; + OCSP_REQ_CTX *rctx = NULL; + int use_ssl, rv = 0; + if (!OCSP_parse_url(url, &host, &port, &path, &use_ssl)) + goto err; + if (use_ssl) { + if (err) + BIO_puts(err, "https not supported\n"); + goto err; + } + bio = BIO_new_connect(host); + if (!bio || !BIO_set_conn_port(bio, port)) + goto err; + rctx = OCSP_REQ_CTX_new(bio, 1024); + if (!rctx) + goto err; + if (!OCSP_REQ_CTX_http(rctx, "GET", path)) + goto err; + if (!OCSP_REQ_CTX_add1_header(rctx, "Host", host)) + goto err; + if (pcert) { + do { + rv = X509_http_nbio(rctx, pcert); + } + while (rv == -1); + } else { + do { + rv = X509_CRL_http_nbio(rctx, pcrl); + } while (rv == -1); + } + + err: + if (host) + OPENSSL_free(host); + if (path) + OPENSSL_free(path); + if (port) + OPENSSL_free(port); + if (bio) + BIO_free_all(bio); + if (rctx) + OCSP_REQ_CTX_free(rctx); + if (rv != 1) { + if (bio && err) + BIO_printf(bio_err, "Error loading %s from %s\n", + pcert ? "certificate" : "CRL", url); + ERR_print_errors(bio_err); + } + return rv; +} + X509 *load_cert(BIO *err, const char *file, int format, const char *pass, ENGINE *e, const char *cert_descrip) { X509 *x = NULL; BIO *cert; + if (format == FORMAT_HTTP) { + load_cert_crl_http(file, err, &x, NULL); + return x; + } + if ((cert = BIO_new(BIO_s_file())) == NULL) { ERR_print_errors(err); goto end; @@ -850,6 +912,49 @@ X509 *load_cert(BIO *err, const char *file, int format, return (x); } +X509_CRL *load_crl(const char *infile, int format) +{ + X509_CRL *x = NULL; + BIO *in = NULL; + + if (format == FORMAT_HTTP) { + load_cert_crl_http(infile, bio_err, NULL, &x); + return x; + } + + in = BIO_new(BIO_s_file()); + if (in == NULL) { + ERR_print_errors(bio_err); + goto end; + } + + if (infile == NULL) + BIO_set_fp(in, stdin, BIO_NOCLOSE); + else { + if (BIO_read_filename(in, infile) <= 0) { + perror(infile); + goto end; + } + } + if (format == FORMAT_ASN1) + x = d2i_X509_CRL_bio(in, NULL); + else if (format == FORMAT_PEM) + x = PEM_read_bio_X509_CRL(in, NULL, NULL, NULL); + else { + BIO_printf(bio_err, "bad input format specified for input crl\n"); + goto end; + } + if (x == NULL) { + BIO_printf(bio_err, "unable to load CRL\n"); + ERR_print_errors(bio_err); + goto end; + } + + end: + BIO_free(in); + return (x); +} + EVP_PKEY *load_key(BIO *err, const char *file, int format, int maybe_stdin, const char *pass, ENGINE *e, const char *key_descrip) { @@ -2159,6 +2264,9 @@ int args_verify(char ***pargs, int *pargc, char **oldargs = *pargs; char *arg = **pargs, *argn = (*pargs)[1]; time_t at_time = 0; + char *hostname = NULL; + char *email = NULL; + char *ipasc = NULL; if (!strcmp(arg, "-policy")) { if (!argn) *badarg = 1; @@ -2212,6 +2320,21 @@ int args_verify(char ***pargs, int *pargc, at_time = (time_t)timestamp; } (*pargs)++; + } else if (strcmp(arg, "-verify_hostname") == 0) { + if (!argn) + *badarg = 1; + hostname = argn; + (*pargs)++; + } else if (strcmp(arg, "-verify_email") == 0) { + if (!argn) + *badarg = 1; + email = argn; + (*pargs)++; + } else if (strcmp(arg, "-verify_ip") == 0) { + if (!argn) + *badarg = 1; + ipasc = argn; + (*pargs)++; } else if (!strcmp(arg, "-ignore_critical")) flags |= X509_V_FLAG_IGNORE_CRITICAL; else if (!strcmp(arg, "-issuer_checks")) @@ -2238,6 +2361,16 @@ int args_verify(char ***pargs, int *pargc, flags |= X509_V_FLAG_NOTIFY_POLICY; else if (!strcmp(arg, "-check_ss_sig")) flags |= X509_V_FLAG_CHECK_SS_SIGNATURE; + else if (!strcmp(arg, "-trusted_first")) + flags |= X509_V_FLAG_TRUSTED_FIRST; + else if (!strcmp(arg, "-suiteB_128_only")) + flags |= X509_V_FLAG_SUITEB_128_LOS_ONLY; + else if (!strcmp(arg, "-suiteB_128")) + flags |= X509_V_FLAG_SUITEB_128_LOS; + else if (!strcmp(arg, "-suiteB_192")) + flags |= X509_V_FLAG_SUITEB_192_LOS; + else if (!strcmp(arg, "-partial_chain")) + flags |= X509_V_FLAG_PARTIAL_CHAIN; else return 0; @@ -2267,6 +2400,15 @@ int args_verify(char ***pargs, int *pargc, if (at_time) X509_VERIFY_PARAM_set_time(*pm, at_time); + if (hostname && !X509_VERIFY_PARAM_set1_host(*pm, hostname, 0)) + *badarg = 1; + + if (email && !X509_VERIFY_PARAM_set1_email(*pm, email, 0)) + *badarg = 1; + + if (ipasc && !X509_VERIFY_PARAM_set1_ip_asc(*pm, ipasc)) + *badarg = 1; + end: (*pargs)++; @@ -2550,6 +2692,9 @@ void jpake_client_auth(BIO *out, BIO *conn, const char *secret) BIO_puts(out, "JPAKE authentication succeeded, setting PSK\n"); + if (psk_key) + OPENSSL_free(psk_key); + psk_key = BN_bn2hex(JPAKE_get_shared_key(ctx)); BIO_pop(bconn); @@ -2579,6 +2724,9 @@ void jpake_server_auth(BIO *out, BIO *conn, const char *secret) BIO_puts(out, "JPAKE authentication succeeded, setting PSK\n"); + if (psk_key) + OPENSSL_free(psk_key); + psk_key = BN_bn2hex(JPAKE_get_shared_key(ctx)); BIO_pop(bconn); @@ -2589,7 +2737,7 @@ void jpake_server_auth(BIO *out, BIO *conn, const char *secret) #endif -#if !defined(OPENSSL_NO_TLSEXT) && !defined(OPENSSL_NO_NEXTPROTONEG) +#ifndef OPENSSL_NO_TLSEXT /*- * next_protos_parse parses a comma separated list of strings into a string * in a format suitable for passing to SSL_CTX_set_next_protos_advertised. @@ -2628,8 +2776,106 @@ unsigned char *next_protos_parse(unsigned short *outlen, const char *in) *outlen = len + 1; return out; } -#endif /* !OPENSSL_NO_TLSEXT && - * !OPENSSL_NO_NEXTPROTONEG */ +#endif /* ndef OPENSSL_NO_TLSEXT */ + +void print_cert_checks(BIO *bio, X509 *x, + const char *checkhost, + const char *checkemail, const char *checkip) +{ + if (x == NULL) + return; + if (checkhost) { + BIO_printf(bio, "Hostname %s does%s match certificate\n", + checkhost, X509_check_host(x, checkhost, 0, 0, NULL) == 1 + ? "" : " NOT"); + } + + if (checkemail) { + BIO_printf(bio, "Email %s does%s match certificate\n", + checkemail, X509_check_email(x, checkemail, 0, + 0) ? "" : " NOT"); + } + + if (checkip) { + BIO_printf(bio, "IP %s does%s match certificate\n", + checkip, X509_check_ip_asc(x, checkip, 0) ? "" : " NOT"); + } +} + +/* Get first http URL from a DIST_POINT structure */ + +static const char *get_dp_url(DIST_POINT *dp) +{ + GENERAL_NAMES *gens; + GENERAL_NAME *gen; + int i, gtype; + ASN1_STRING *uri; + if (!dp->distpoint || dp->distpoint->type != 0) + return NULL; + gens = dp->distpoint->name.fullname; + for (i = 0; i < sk_GENERAL_NAME_num(gens); i++) { + gen = sk_GENERAL_NAME_value(gens, i); + uri = GENERAL_NAME_get0_value(gen, >ype); + if (gtype == GEN_URI && ASN1_STRING_length(uri) > 6) { + char *uptr = (char *)ASN1_STRING_data(uri); + if (!strncmp(uptr, "http://", 7)) + return uptr; + } + } + return NULL; +} + +/* + * Look through a CRLDP structure and attempt to find an http URL to + * downloads a CRL from. + */ + +static X509_CRL *load_crl_crldp(STACK_OF(DIST_POINT) *crldp) +{ + int i; + const char *urlptr = NULL; + for (i = 0; i < sk_DIST_POINT_num(crldp); i++) { + DIST_POINT *dp = sk_DIST_POINT_value(crldp, i); + urlptr = get_dp_url(dp); + if (urlptr) + return load_crl(urlptr, FORMAT_HTTP); + } + return NULL; +} + +/* + * Example of downloading CRLs from CRLDP: not usable for real world as it + * always downloads, doesn't support non-blocking I/O and doesn't cache + * anything. + */ + +static STACK_OF(X509_CRL) *crls_http_cb(X509_STORE_CTX *ctx, X509_NAME *nm) +{ + X509 *x; + STACK_OF(X509_CRL) *crls = NULL; + X509_CRL *crl; + STACK_OF(DIST_POINT) *crldp; + x = X509_STORE_CTX_get_current_cert(ctx); + crldp = X509_get_ext_d2i(x, NID_crl_distribution_points, NULL, NULL); + crl = load_crl_crldp(crldp); + sk_DIST_POINT_pop_free(crldp, DIST_POINT_free); + if (!crl) + return NULL; + crls = sk_X509_CRL_new_null(); + sk_X509_CRL_push(crls, crl); + /* Try to download delta CRL */ + crldp = X509_get_ext_d2i(x, NID_freshest_crl, NULL, NULL); + crl = load_crl_crldp(crldp); + sk_DIST_POINT_pop_free(crldp, DIST_POINT_free); + if (crl) + sk_X509_CRL_push(crls, crl); + return crls; +} + +void store_setup_crl_download(X509_STORE *st) +{ + X509_STORE_set_lookup_crls_cb(st, crls_http_cb); +} /* * Platform-specific sections diff --git a/deps/openssl/openssl/apps/apps.h b/deps/openssl/openssl/apps/apps.h index 33b293e5b2151b..8276e708694dd1 100644 --- a/deps/openssl/openssl/apps/apps.h +++ b/deps/openssl/openssl/apps/apps.h @@ -205,7 +205,7 @@ extern BIO *bio_err; # endif # endif -# ifdef OPENSSL_SYSNAME_WIN32 +# if defined(OPENSSL_SYSNAME_WIN32) || defined(OPENSSL_SYSNAME_WINCE) # define openssl_fdset(a,b) FD_SET((unsigned int)a, b) # else # define openssl_fdset(a,b) FD_SET(a, b) @@ -245,6 +245,9 @@ int app_passwd(BIO *err, char *arg1, char *arg2, char **pass1, char **pass2); int add_oid_section(BIO *err, CONF *conf); X509 *load_cert(BIO *err, const char *file, int format, const char *pass, ENGINE *e, const char *cert_descrip); +X509_CRL *load_crl(const char *infile, int format); +int load_cert_crl_http(const char *url, BIO *err, + X509 **pcert, X509_CRL **pcrl); EVP_PKEY *load_key(BIO *err, const char *file, int format, int maybe_stdin, const char *pass, ENGINE *e, const char *key_descrip); EVP_PKEY *load_pubkey(BIO *err, const char *file, int format, int maybe_stdin, @@ -262,8 +265,9 @@ ENGINE *setup_engine(BIO *err, const char *engine, int debug); # ifndef OPENSSL_NO_OCSP OCSP_RESPONSE *process_responder(BIO *err, OCSP_REQUEST *req, - char *host, char *path, char *port, - int use_ssl, STACK_OF(CONF_VALUE) *headers, + const char *host, const char *path, + const char *port, int use_ssl, + const STACK_OF(CONF_VALUE) *headers, int req_timeout); # endif @@ -334,10 +338,15 @@ void jpake_client_auth(BIO *out, BIO *conn, const char *secret); void jpake_server_auth(BIO *out, BIO *conn, const char *secret); # endif -# if !defined(OPENSSL_NO_TLSEXT) && !defined(OPENSSL_NO_NEXTPROTONEG) +# ifndef OPENSSL_NO_TLSEXT unsigned char *next_protos_parse(unsigned short *outlen, const char *in); -# endif /* !OPENSSL_NO_TLSEXT && - * !OPENSSL_NO_NEXTPROTONEG */ +# endif /* ndef OPENSSL_NO_TLSEXT */ + +void print_cert_checks(BIO *bio, X509 *x, + const char *checkhost, + const char *checkemail, const char *checkip); + +void store_setup_crl_download(X509_STORE *st); # define FORMAT_UNDEF 0 # define FORMAT_ASN1 1 @@ -353,6 +362,7 @@ unsigned char *next_protos_parse(unsigned short *outlen, const char *in); # define FORMAT_ASN1RSA 10 /* DER RSAPubicKey format */ # define FORMAT_MSBLOB 11 /* MS Key blob format */ # define FORMAT_PVK 12 /* MS PVK file format */ +# define FORMAT_HTTP 13 /* Download using HTTP */ # define EXT_COPY_NONE 0 # define EXT_COPY_ADD 1 diff --git a/deps/openssl/openssl/apps/ca.c b/deps/openssl/openssl/apps/ca.c index 5d29a64c57a07a..d64ec4f14ce591 100644 --- a/deps/openssl/openssl/apps/ca.c +++ b/deps/openssl/openssl/apps/ca.c @@ -479,6 +479,11 @@ int MAIN(int argc, char **argv) goto bad; infile = *(++argv); dorevoke = 1; + } else if (strcmp(*argv, "-valid") == 0) { + if (--argc < 1) + goto bad; + infile = *(++argv); + dorevoke = 2; } else if (strcmp(*argv, "-extensions") == 0) { if (--argc < 1) goto bad; @@ -1441,6 +1446,8 @@ int MAIN(int argc, char **argv) revcert = load_cert(bio_err, infile, FORMAT_PEM, NULL, e, infile); if (revcert == NULL) goto err; + if (dorevoke == 2) + rev_type = -1; j = do_revoke(revcert, db, rev_type, rev_arg); if (j <= 0) goto err; @@ -1968,8 +1975,12 @@ static int do_body(X509 **xret, EVP_PKEY *pkey, X509 *x509, if (enddate == NULL) X509_time_adj_ex(X509_get_notAfter(ret), days, 0, NULL); - else + else { + int tdays; ASN1_TIME_set_string(X509_get_notAfter(ret), enddate); + ASN1_TIME_diff(&tdays, NULL, NULL, X509_get_notAfter(ret)); + days = tdays; + } if (!X509_set_subject_name(ret, subject)) goto err; @@ -2409,13 +2420,20 @@ static int do_revoke(X509 *x509, CA_DB *db, int type, char *value) } /* Revoke Certificate */ - ok = do_revoke(x509, db, type, value); + if (type == -1) + ok = 1; + else + ok = do_revoke(x509, db, type, value); goto err; } else if (index_name_cmp_noconst(row, rrow)) { BIO_printf(bio_err, "ERROR:name does not match %s\n", row[DB_name]); goto err; + } else if (type == -1) { + BIO_printf(bio_err, "ERROR:Already present, serial number %s\n", + row[DB_serial]); + goto err; } else if (rrow[DB_type][0] == 'R') { BIO_printf(bio_err, "ERROR:Already revoked, serial number %s\n", row[DB_serial]); diff --git a/deps/openssl/openssl/apps/ciphers.c b/deps/openssl/openssl/apps/ciphers.c index f299175f11e15f..66636d2dfd18e2 100644 --- a/deps/openssl/openssl/apps/ciphers.c +++ b/deps/openssl/openssl/apps/ciphers.c @@ -85,6 +85,9 @@ int MAIN(int argc, char **argv) { int ret = 1, i; int verbose = 0, Verbose = 0; +#ifndef OPENSSL_NO_SSL_TRACE + int stdname = 0; +#endif const char **pp; const char *p; int badops = 0; @@ -119,6 +122,10 @@ int MAIN(int argc, char **argv) verbose = 1; else if (strcmp(*argv, "-V") == 0) verbose = Verbose = 1; +#ifndef OPENSSL_NO_SSL_TRACE + else if (strcmp(*argv, "-stdname") == 0) + stdname = verbose = 1; +#endif #ifndef OPENSSL_NO_SSL2 else if (strcmp(*argv, "-ssl2") == 0) meth = SSLv2_client_method(); @@ -202,7 +209,14 @@ int MAIN(int argc, char **argv) id1, id2, id3); } } - +#ifndef OPENSSL_NO_SSL_TRACE + if (stdname) { + const char *nm = SSL_CIPHER_standard_name(c); + if (nm == NULL) + nm = "UNKNOWN"; + BIO_printf(STDout, "%s - ", nm); + } +#endif BIO_puts(STDout, SSL_CIPHER_description(c, buf, sizeof buf)); } } diff --git a/deps/openssl/openssl/apps/cms.c b/deps/openssl/openssl/apps/cms.c index 0cc4b46d8e2825..2c922537c5c0b6 100644 --- a/deps/openssl/openssl/apps/cms.c +++ b/deps/openssl/openssl/apps/cms.c @@ -75,6 +75,8 @@ static void receipt_request_print(BIO *out, CMS_ContentInfo *cms); static CMS_ReceiptRequest *make_receipt_request(STACK_OF(OPENSSL_STRING) *rr_to, int rr_allorfirst, STACK_OF(OPENSSL_STRING) *rr_from); +static int cms_set_pkey_param(EVP_PKEY_CTX *pctx, + STACK_OF(OPENSSL_STRING) *param); # define SMIME_OP 0x10 # define SMIME_IP 0x20 @@ -98,6 +100,14 @@ static CMS_ReceiptRequest *make_receipt_request(STACK_OF(OPENSSL_STRING) int verify_err = 0; +typedef struct cms_key_param_st cms_key_param; + +struct cms_key_param_st { + int idx; + STACK_OF(OPENSSL_STRING) *param; + cms_key_param *next; +}; + int MAIN(int, char **); int MAIN(int argc, char **argv) @@ -112,7 +122,7 @@ int MAIN(int argc, char **argv) STACK_OF(OPENSSL_STRING) *sksigners = NULL, *skkeys = NULL; char *certfile = NULL, *keyfile = NULL, *contfile = NULL; char *certsoutfile = NULL; - const EVP_CIPHER *cipher = NULL; + const EVP_CIPHER *cipher = NULL, *wrap_cipher = NULL; CMS_ContentInfo *cms = NULL, *rcms = NULL; X509_STORE *store = NULL; X509 *cert = NULL, *recip = NULL, *signer = NULL; @@ -140,6 +150,8 @@ int MAIN(int argc, char **argv) unsigned char *pwri_pass = NULL, *pwri_tmp = NULL; size_t secret_keylen = 0, secret_keyidlen = 0; + cms_key_param *key_first = NULL, *key_param = NULL; + ASN1_OBJECT *econtent_type = NULL; X509_VERIFY_PARAM *vpm = NULL; @@ -201,6 +213,8 @@ int MAIN(int argc, char **argv) cipher = EVP_des_ede3_cbc(); else if (!strcmp(*args, "-des")) cipher = EVP_des_cbc(); + else if (!strcmp(*args, "-des3-wrap")) + wrap_cipher = EVP_des_ede3_wrap(); # endif # ifndef OPENSSL_NO_SEED else if (!strcmp(*args, "-seed")) @@ -221,6 +235,12 @@ int MAIN(int argc, char **argv) cipher = EVP_aes_192_cbc(); else if (!strcmp(*args, "-aes256")) cipher = EVP_aes_256_cbc(); + else if (!strcmp(*args, "-aes128-wrap")) + wrap_cipher = EVP_aes_128_wrap(); + else if (!strcmp(*args, "-aes192-wrap")) + wrap_cipher = EVP_aes_192_wrap(); + else if (!strcmp(*args, "-aes256-wrap")) + wrap_cipher = EVP_aes_256_wrap(); # endif # ifndef OPENSSL_NO_CAMELLIA else if (!strcmp(*args, "-camellia128")) @@ -378,7 +398,17 @@ int MAIN(int argc, char **argv) } else if (!strcmp(*args, "-recip")) { if (!args[1]) goto argerr; - recipfile = *++args; + if (operation == SMIME_ENCRYPT) { + if (!encerts) + encerts = sk_X509_new_null(); + cert = load_cert(bio_err, *++args, FORMAT_PEM, + NULL, e, "recipient certificate file"); + if (!cert) + goto end; + sk_X509_push(encerts, cert); + cert = NULL; + } else + recipfile = *++args; } else if (!strcmp(*args, "-certsout")) { if (!args[1]) goto argerr; @@ -413,6 +443,40 @@ int MAIN(int argc, char **argv) if (!args[1]) goto argerr; keyform = str2fmt(*++args); + } else if (!strcmp(*args, "-keyopt")) { + int keyidx = -1; + if (!args[1]) + goto argerr; + if (operation == SMIME_ENCRYPT) { + if (encerts) + keyidx += sk_X509_num(encerts); + } else { + if (keyfile || signerfile) + keyidx++; + if (skkeys) + keyidx += sk_OPENSSL_STRING_num(skkeys); + } + if (keyidx < 0) { + BIO_printf(bio_err, "No key specified\n"); + goto argerr; + } + if (key_param == NULL || key_param->idx != keyidx) { + cms_key_param *nparam; + nparam = OPENSSL_malloc(sizeof(cms_key_param)); + if(!nparam) { + BIO_printf(bio_err, "Out of memory\n"); + goto argerr; + } + nparam->idx = keyidx; + nparam->param = sk_OPENSSL_STRING_new_null(); + nparam->next = NULL; + if (key_first == NULL) + key_first = nparam; + else + key_param->next = nparam; + key_param = nparam; + } + sk_OPENSSL_STRING_push(key_param->param, *++args); } else if (!strcmp(*args, "-rctform")) { if (!args[1]) goto argerr; @@ -502,7 +566,7 @@ int MAIN(int argc, char **argv) badarg = 1; } } else if (operation == SMIME_ENCRYPT) { - if (!*args && !secret_key && !pwri_pass) { + if (!*args && !secret_key && !pwri_pass && !encerts) { BIO_printf(bio_err, "No recipient(s) certificate(s) specified\n"); badarg = 1; } @@ -567,6 +631,7 @@ int MAIN(int argc, char **argv) "-inkey file input private key (if not signer or recipient)\n"); BIO_printf(bio_err, "-keyform arg input private key format (PEM or ENGINE)\n"); + BIO_printf(bio_err, "-keyopt nm:v set public key parameters\n"); BIO_printf(bio_err, "-out file output file\n"); BIO_printf(bio_err, "-outform arg output format SMIME (default), PEM or DER\n"); @@ -650,7 +715,7 @@ int MAIN(int argc, char **argv) goto end; } - if (*args) + if (*args && !encerts) encerts = sk_X509_new_null(); while (*args) { if (!(cert = load_cert(bio_err, *args, FORMAT_PEM, @@ -802,10 +867,39 @@ int MAIN(int argc, char **argv) } else if (operation == SMIME_COMPRESS) { cms = CMS_compress(in, -1, flags); } else if (operation == SMIME_ENCRYPT) { + int i; flags |= CMS_PARTIAL; - cms = CMS_encrypt(encerts, in, cipher, flags); + cms = CMS_encrypt(NULL, in, cipher, flags); if (!cms) goto end; + for (i = 0; i < sk_X509_num(encerts); i++) { + CMS_RecipientInfo *ri; + cms_key_param *kparam; + int tflags = flags; + X509 *x = sk_X509_value(encerts, i); + for (kparam = key_first; kparam; kparam = kparam->next) { + if (kparam->idx == i) { + tflags |= CMS_KEY_PARAM; + break; + } + } + ri = CMS_add1_recipient_cert(cms, x, tflags); + if (!ri) + goto end; + if (kparam) { + EVP_PKEY_CTX *pctx; + pctx = CMS_RecipientInfo_get0_pkey_ctx(ri); + if (!cms_set_pkey_param(pctx, kparam->param)) + goto end; + } + if (CMS_RecipientInfo_type(ri) == CMS_RECIPINFO_AGREE + && wrap_cipher) { + EVP_CIPHER_CTX *wctx; + wctx = CMS_RecipientInfo_kari_get0_ctx(ri); + EVP_EncryptInit_ex(wctx, wrap_cipher, NULL, NULL, NULL); + } + } + if (secret_key) { if (!CMS_add0_recipient_key(cms, NID_undef, secret_key, secret_keylen, @@ -878,8 +972,11 @@ int MAIN(int argc, char **argv) flags |= CMS_REUSE_DIGEST; for (i = 0; i < sk_OPENSSL_STRING_num(sksigners); i++) { CMS_SignerInfo *si; + cms_key_param *kparam; + int tflags = flags; signerfile = sk_OPENSSL_STRING_value(sksigners, i); keyfile = sk_OPENSSL_STRING_value(skkeys, i); + signer = load_cert(bio_err, signerfile, FORMAT_PEM, NULL, e, "signer certificate"); if (!signer) @@ -888,9 +985,21 @@ int MAIN(int argc, char **argv) "signing key file"); if (!key) goto end; - si = CMS_add1_signer(cms, signer, key, sign_md, flags); + for (kparam = key_first; kparam; kparam = kparam->next) { + if (kparam->idx == i) { + tflags |= CMS_KEY_PARAM; + break; + } + } + si = CMS_add1_signer(cms, signer, key, sign_md, tflags); if (!si) goto end; + if (kparam) { + EVP_PKEY_CTX *pctx; + pctx = CMS_SignerInfo_get0_pkey_ctx(si); + if (!cms_set_pkey_param(pctx, kparam->param)) + goto end; + } if (rr && !CMS_add1_ReceiptRequest(si, rr)) goto end; X509_free(signer); @@ -1045,6 +1154,13 @@ int MAIN(int argc, char **argv) sk_OPENSSL_STRING_free(rr_to); if (rr_from) sk_OPENSSL_STRING_free(rr_from); + for (key_param = key_first; key_param;) { + cms_key_param *tparam; + sk_OPENSSL_STRING_free(key_param->param); + tparam = key_param->next; + OPENSSL_free(key_param); + key_param = tparam; + } X509_STORE_free(store); X509_free(cert); X509_free(recip); @@ -1218,4 +1334,22 @@ static CMS_ReceiptRequest *make_receipt_request(STACK_OF(OPENSSL_STRING) return NULL; } +static int cms_set_pkey_param(EVP_PKEY_CTX *pctx, + STACK_OF(OPENSSL_STRING) *param) +{ + char *keyopt; + int i; + if (sk_OPENSSL_STRING_num(param) <= 0) + return 1; + for (i = 0; i < sk_OPENSSL_STRING_num(param); i++) { + keyopt = sk_OPENSSL_STRING_value(param, i); + if (pkey_ctrl_string(pctx, keyopt) <= 0) { + BIO_printf(bio_err, "parameter error \"%s\"\n", keyopt); + ERR_print_errors(bio_err); + return 0; + } + } + return 1; +} + #endif diff --git a/deps/openssl/openssl/apps/crl.c b/deps/openssl/openssl/apps/crl.c index 0a05870ca1fc83..c9c3a5f6d19651 100644 --- a/deps/openssl/openssl/apps/crl.c +++ b/deps/openssl/openssl/apps/crl.c @@ -96,7 +96,6 @@ static const char *crl_usage[] = { NULL }; -static X509_CRL *load_crl(char *file, int format); static BIO *bio_out = NULL; int MAIN(int, char **); @@ -106,10 +105,10 @@ int MAIN(int argc, char **argv) unsigned long nmflag = 0; X509_CRL *x = NULL; char *CAfile = NULL, *CApath = NULL; - int ret = 1, i, num, badops = 0; + int ret = 1, i, num, badops = 0, badsig = 0; BIO *out = NULL; - int informat, outformat; - char *infile = NULL, *outfile = NULL; + int informat, outformat, keyformat; + char *infile = NULL, *outfile = NULL, *crldiff = NULL, *keyfile = NULL; int hash = 0, issuer = 0, lastupdate = 0, nextupdate = 0, noout = 0, text = 0; #ifndef OPENSSL_NO_MD5 @@ -147,6 +146,7 @@ int MAIN(int argc, char **argv) informat = FORMAT_PEM; outformat = FORMAT_PEM; + keyformat = FORMAT_PEM; argc--; argv++; @@ -173,6 +173,18 @@ int MAIN(int argc, char **argv) if (--argc < 1) goto bad; infile = *(++argv); + } else if (strcmp(*argv, "-gendelta") == 0) { + if (--argc < 1) + goto bad; + crldiff = *(++argv); + } else if (strcmp(*argv, "-key") == 0) { + if (--argc < 1) + goto bad; + keyfile = *(++argv); + } else if (strcmp(*argv, "-keyform") == 0) { + if (--argc < 1) + goto bad; + keyformat = str2fmt(*(++argv)); } else if (strcmp(*argv, "-out") == 0) { if (--argc < 1) goto bad; @@ -214,6 +226,8 @@ int MAIN(int argc, char **argv) fingerprint = ++num; else if (strcmp(*argv, "-crlnumber") == 0) crlnumber = ++num; + else if (strcmp(*argv, "-badsig") == 0) + badsig = 1; else if ((md_alg = EVP_get_digestbyname(*argv + 1))) { /* ok */ digest = md_alg; @@ -281,6 +295,33 @@ int MAIN(int argc, char **argv) BIO_printf(bio_err, "verify OK\n"); } + if (crldiff) { + X509_CRL *newcrl, *delta; + if (!keyfile) { + BIO_puts(bio_err, "Missing CRL signing key\n"); + goto end; + } + newcrl = load_crl(crldiff, informat); + if (!newcrl) + goto end; + pkey = load_key(bio_err, keyfile, keyformat, 0, NULL, NULL, + "CRL signing key"); + if (!pkey) { + X509_CRL_free(newcrl); + goto end; + } + delta = X509_CRL_diff(x, newcrl, pkey, digest, 0); + X509_CRL_free(newcrl); + EVP_PKEY_free(pkey); + if (delta) { + X509_CRL_free(x); + x = delta; + } else { + BIO_puts(bio_err, "Error creating delta CRL\n"); + goto end; + } + } + if (num) { for (i = 1; i <= num; i++) { if (issuer == i) { @@ -369,6 +410,9 @@ int MAIN(int argc, char **argv) goto end; } + if (badsig) + x->signature->data[x->signature->length - 1] ^= 0x1; + if (outformat == FORMAT_ASN1) i = (int)i2d_X509_CRL_bio(out, x); else if (outformat == FORMAT_PEM) @@ -383,6 +427,8 @@ int MAIN(int argc, char **argv) } ret = 0; end: + if (ret != 0) + ERR_print_errors(bio_err); BIO_free_all(out); BIO_free_all(bio_out); bio_out = NULL; @@ -394,41 +440,3 @@ int MAIN(int argc, char **argv) apps_shutdown(); OPENSSL_EXIT(ret); } - -static X509_CRL *load_crl(char *infile, int format) -{ - X509_CRL *x = NULL; - BIO *in = NULL; - - in = BIO_new(BIO_s_file()); - if (in == NULL) { - ERR_print_errors(bio_err); - goto end; - } - - if (infile == NULL) - BIO_set_fp(in, stdin, BIO_NOCLOSE); - else { - if (BIO_read_filename(in, infile) <= 0) { - perror(infile); - goto end; - } - } - if (format == FORMAT_ASN1) - x = d2i_X509_CRL_bio(in, NULL); - else if (format == FORMAT_PEM) - x = PEM_read_bio_X509_CRL(in, NULL, NULL, NULL); - else { - BIO_printf(bio_err, "bad input format specified for input crl\n"); - goto end; - } - if (x == NULL) { - BIO_printf(bio_err, "unable to load CRL\n"); - ERR_print_errors(bio_err); - goto end; - } - - end: - BIO_free(in); - return (x); -} diff --git a/deps/openssl/openssl/apps/dgst.c b/deps/openssl/openssl/apps/dgst.c index ad2f2348ac09c9..95e5fa3fc7b7d5 100644 --- a/deps/openssl/openssl/apps/dgst.c +++ b/deps/openssl/openssl/apps/dgst.c @@ -103,7 +103,7 @@ int MAIN(int, char **); int MAIN(int argc, char **argv) { - ENGINE *e = NULL; + ENGINE *e = NULL, *impl = NULL; unsigned char *buf = NULL; int i, err = 1; const EVP_MD *md = NULL, *m; @@ -124,6 +124,7 @@ int MAIN(int argc, char **argv) char *passargin = NULL, *passin = NULL; #ifndef OPENSSL_NO_ENGINE char *engine = NULL; + int engine_impl = 0; #endif char *hmac_key = NULL; char *mac_name = NULL; @@ -199,7 +200,8 @@ int MAIN(int argc, char **argv) break; engine = *(++argv); e = setup_engine(bio_err, engine, 0); - } + } else if (strcmp(*argv, "-engine_impl") == 0) + engine_impl = 1; #endif else if (strcmp(*argv, "-hex") == 0) out_bin = 0; @@ -284,6 +286,10 @@ int MAIN(int argc, char **argv) EVP_MD_do_all_sorted(list_md_fn, bio_err); goto end; } +#ifndef OPENSSL_NO_ENGINE + if (engine_impl) + impl = e; +#endif in = BIO_new(BIO_s_file()); bmd = BIO_new(BIO_f_md()); @@ -357,7 +363,7 @@ int MAIN(int argc, char **argv) if (mac_name) { EVP_PKEY_CTX *mac_ctx = NULL; int r = 0; - if (!init_gen_str(bio_err, &mac_ctx, mac_name, e, 0)) + if (!init_gen_str(bio_err, &mac_ctx, mac_name, impl, 0)) goto mac_end; if (macopts) { char *macopt; @@ -391,7 +397,7 @@ int MAIN(int argc, char **argv) } if (hmac_key) { - sigkey = EVP_PKEY_new_mac_key(EVP_PKEY_HMAC, e, + sigkey = EVP_PKEY_new_mac_key(EVP_PKEY_HMAC, impl, (unsigned char *)hmac_key, -1); if (!sigkey) goto end; @@ -407,9 +413,9 @@ int MAIN(int argc, char **argv) goto end; } if (do_verify) - r = EVP_DigestVerifyInit(mctx, &pctx, md, NULL, sigkey); + r = EVP_DigestVerifyInit(mctx, &pctx, md, impl, sigkey); else - r = EVP_DigestSignInit(mctx, &pctx, md, NULL, sigkey); + r = EVP_DigestSignInit(mctx, &pctx, md, impl, sigkey); if (!r) { BIO_printf(bio_err, "Error setting context\n"); ERR_print_errors(bio_err); @@ -429,9 +435,15 @@ int MAIN(int argc, char **argv) } /* we use md as a filter, reading from 'in' */ else { + EVP_MD_CTX *mctx = NULL; + if (!BIO_get_md_ctx(bmd, &mctx)) { + BIO_printf(bio_err, "Error getting context\n"); + ERR_print_errors(bio_err); + goto end; + } if (md == NULL) md = EVP_md5(); - if (!BIO_set_md(bmd, md)) { + if (!EVP_DigestInit_ex(mctx, md, impl)) { BIO_printf(bio_err, "Error setting digest %s\n", pname); ERR_print_errors(bio_err); goto end; @@ -483,7 +495,8 @@ int MAIN(int argc, char **argv) EVP_PKEY_asn1_get0_info(NULL, NULL, NULL, NULL, &sig_name, ameth); } - md_name = EVP_MD_name(md); + if (md) + md_name = EVP_MD_name(md); } err = 0; for (i = 0; i < argc; i++) { @@ -581,9 +594,12 @@ int do_fp(BIO *out, unsigned char *buf, BIO *bp, int sep, int binout, BIO_printf(out, "%02x", buf[i]); BIO_printf(out, " *%s\n", file); } else { - if (sig_name) - BIO_printf(out, "%s-%s(%s)= ", sig_name, md_name, file); - else if (md_name) + if (sig_name) { + BIO_puts(out, sig_name); + if (md_name) + BIO_printf(out, "-%s", md_name); + BIO_printf(out, "(%s)= ", file); + } else if (md_name) BIO_printf(out, "%s(%s)= ", md_name, file); else BIO_printf(out, "(%s)= ", file); diff --git a/deps/openssl/openssl/apps/dhparam.c b/deps/openssl/openssl/apps/dhparam.c index f86311c28fd363..57199a8d2ad867 100644 --- a/deps/openssl/openssl/apps/dhparam.c +++ b/deps/openssl/openssl/apps/dhparam.c @@ -130,7 +130,7 @@ # undef PROG # define PROG dhparam_main -# define DEFBITS 512 +# define DEFBITS 2048 /*- * -inform arg - input format - default PEM (DER or PEM) @@ -254,7 +254,7 @@ int MAIN(int argc, char **argv) BIO_printf(bio_err, " -5 generate parameters using 5 as the generator value\n"); BIO_printf(bio_err, - " numbits number of bits in to generate (default 512)\n"); + " numbits number of bits in to generate (default 2048)\n"); # ifndef OPENSSL_NO_ENGINE BIO_printf(bio_err, " -engine e use engine e, possibly a hardware device.\n"); @@ -489,9 +489,12 @@ int MAIN(int argc, char **argv) if (!noout) { if (outformat == FORMAT_ASN1) i = i2d_DHparams_bio(out, dh); - else if (outformat == FORMAT_PEM) - i = PEM_write_bio_DHparams(out, dh); - else { + else if (outformat == FORMAT_PEM) { + if (dh->q) + i = PEM_write_bio_DHxparams(out, dh); + else + i = PEM_write_bio_DHparams(out, dh); + } else { BIO_printf(bio_err, "bad output format specified for outfile\n"); goto end; } diff --git a/deps/openssl/openssl/apps/ecparam.c b/deps/openssl/openssl/apps/ecparam.c index 1f340a9b84e024..06ac77b838a380 100644 --- a/deps/openssl/openssl/apps/ecparam.c +++ b/deps/openssl/openssl/apps/ecparam.c @@ -370,6 +370,9 @@ int MAIN(int argc, char **argv) } else nid = OBJ_sn2nid(curve_name); + if (nid == 0) + nid = EC_curve_nist2nid(curve_name); + if (nid == 0) { BIO_printf(bio_err, "unknown curve name (%s)\n", curve_name); goto end; diff --git a/deps/openssl/openssl/apps/gendh.c b/deps/openssl/openssl/apps/gendh.c index adaa101783dfd1..fef6f1b1774d15 100644 --- a/deps/openssl/openssl/apps/gendh.c +++ b/deps/openssl/openssl/apps/gendh.c @@ -80,7 +80,7 @@ # include # include -# define DEFBITS 512 +# define DEFBITS 2048 # undef PROG # define PROG gendh_main diff --git a/deps/openssl/openssl/apps/genrsa.c b/deps/openssl/openssl/apps/genrsa.c index 2eabadcc6b28ef..91e6550a5767c5 100644 --- a/deps/openssl/openssl/apps/genrsa.c +++ b/deps/openssl/openssl/apps/genrsa.c @@ -80,7 +80,7 @@ # include # include -# define DEFBITS 1024 +# define DEFBITS 2048 # undef PROG # define PROG genrsa_main diff --git a/deps/openssl/openssl/apps/makeapps.com b/deps/openssl/openssl/apps/makeapps.com index 71a333674473c8..47457afc7c5731 100644 --- a/deps/openssl/openssl/apps/makeapps.com +++ b/deps/openssl/openssl/apps/makeapps.com @@ -776,7 +776,7 @@ $ IF F$TYPE(USER_CCFLAGS) .NES. "" THEN CCEXTRAFLAGS = USER_CCFLAGS $ CCDISABLEWARNINGS = "" !!! "MAYLOSEDATA3" !!! "LONGLONGTYPE,LONGLONGSUFX,FOUNDCR" $ IF F$TYPE(USER_CCDISABLEWARNINGS) .NES. "" $ THEN -$ IF CCDISABLEWARNINGS .NES. "" THEN CCDISABLEWARNINGS = CCDISABLEWARNINGS + "," +$ IF CCDISABLEWARNINGS .NES. THEN CCDISABLEWARNINGS = CCDISABLEWARNINGS + "," $ CCDISABLEWARNINGS = CCDISABLEWARNINGS + USER_CCDISABLEWARNINGS $ ENDIF $! diff --git a/deps/openssl/openssl/apps/ocsp.c b/deps/openssl/openssl/apps/ocsp.c index 71def2611af962..ebb3732cd76f37 100644 --- a/deps/openssl/openssl/apps/ocsp.c +++ b/deps/openssl/openssl/apps/ocsp.c @@ -110,16 +110,17 @@ static int print_ocsp_summary(BIO *out, OCSP_BASICRESP *bs, OCSP_REQUEST *req, static int make_ocsp_response(OCSP_RESPONSE **resp, OCSP_REQUEST *req, CA_DB *db, X509 *ca, X509 *rcert, - EVP_PKEY *rkey, STACK_OF(X509) *rother, - unsigned long flags, int nmin, int ndays); + EVP_PKEY *rkey, const EVP_MD *md, + STACK_OF(X509) *rother, unsigned long flags, + int nmin, int ndays, int badsig); static char **lookup_serial(CA_DB *db, ASN1_INTEGER *ser); -static BIO *init_responder(char *port); +static BIO *init_responder(const char *port); static int do_responder(OCSP_REQUEST **preq, BIO **pcbio, BIO *acbio, - char *port); + const char *port); static int send_ocsp_response(BIO *cbio, OCSP_RESPONSE *resp); -static OCSP_RESPONSE *query_responder(BIO *err, BIO *cbio, char *path, - STACK_OF(CONF_VALUE) *headers, +static OCSP_RESPONSE *query_responder(BIO *err, BIO *cbio, const char *path, + const STACK_OF(CONF_VALUE) *headers, OCSP_REQUEST *req, int req_timeout); # undef PROG @@ -154,12 +155,14 @@ int MAIN(int argc, char **argv) long nsec = MAX_VALIDITY_PERIOD, maxage = -1; char *CAfile = NULL, *CApath = NULL; X509_STORE *store = NULL; + X509_VERIFY_PARAM *vpm = NULL; STACK_OF(X509) *sign_other = NULL, *verify_other = NULL, *rother = NULL; char *sign_certfile = NULL, *verify_certfile = NULL, *rcertfile = NULL; unsigned long sign_flags = 0, verify_flags = 0, rflags = 0; int ret = 1; int accept_count = -1; int badarg = 0; + int badsig = 0; int i; int ignore_err = 0; STACK_OF(OPENSSL_STRING) *reqnames = NULL; @@ -170,7 +173,7 @@ int MAIN(int argc, char **argv) char *rca_filename = NULL; CA_DB *rdb = NULL; int nmin = 0, ndays = -1; - const EVP_MD *cert_id_md = NULL; + const EVP_MD *cert_id_md = NULL, *rsign_md = NULL; if (bio_err == NULL) bio_err = BIO_new_fp(stderr, BIO_NOCLOSE); @@ -264,6 +267,8 @@ int MAIN(int argc, char **argv) verify_flags |= OCSP_TRUSTOTHER; else if (!strcmp(*args, "-no_intern")) verify_flags |= OCSP_NOINTERN; + else if (!strcmp(*args, "-badsig")) + badsig = 1; else if (!strcmp(*args, "-text")) { req_text = 1; resp_text = 1; @@ -320,6 +325,10 @@ int MAIN(int argc, char **argv) CApath = *args; } else badarg = 1; + } else if (args_verify(&args, NULL, &badarg, bio_err, &vpm)) { + if (badarg) + goto end; + continue; } else if (!strcmp(*args, "-validity_period")) { if (args[1]) { args++; @@ -465,6 +474,14 @@ int MAIN(int argc, char **argv) rcertfile = *args; } else badarg = 1; + } else if (!strcmp(*args, "-rmd")) { + if (args[1]) { + args++; + rsign_md = EVP_get_digestbyname(*args); + if (!rsign_md) + badarg = 1; + } else + badarg = 1; } else if ((cert_id_md = EVP_get_digestbyname((*args) + 1)) == NULL) { badarg = 1; } @@ -582,7 +599,10 @@ int MAIN(int argc, char **argv) add_nonce = 0; if (!req && reqin) { - derbio = BIO_new_file(reqin, "rb"); + if (!strcmp(reqin, "-")) + derbio = BIO_new_fp(stdin, BIO_NOCLOSE); + else + derbio = BIO_new_file(reqin, "rb"); if (!derbio) { BIO_printf(bio_err, "Error Opening OCSP request file\n"); goto end; @@ -679,7 +699,10 @@ int MAIN(int argc, char **argv) OCSP_REQUEST_print(out, req, 0); if (reqout) { - derbio = BIO_new_file(reqout, "wb"); + if (!strcmp(reqout, "-")) + derbio = BIO_new_fp(stdout, BIO_NOCLOSE); + else + derbio = BIO_new_file(reqout, "wb"); if (!derbio) { BIO_printf(bio_err, "Error opening file %s\n", reqout); goto end; @@ -704,7 +727,7 @@ int MAIN(int argc, char **argv) if (rdb) { i = make_ocsp_response(&resp, req, rdb, rca_cert, rsigner, rkey, - rother, rflags, nmin, ndays); + rsign_md, rother, rflags, nmin, ndays, badsig); if (cbio) send_ocsp_response(cbio, resp); } else if (host) { @@ -719,7 +742,10 @@ int MAIN(int argc, char **argv) goto end; # endif } else if (respin) { - derbio = BIO_new_file(respin, "rb"); + if (!strcmp(respin, "-")) + derbio = BIO_new_fp(stdin, BIO_NOCLOSE); + else + derbio = BIO_new_file(respin, "rb"); if (!derbio) { BIO_printf(bio_err, "Error Opening OCSP response file\n"); goto end; @@ -739,7 +765,10 @@ int MAIN(int argc, char **argv) done_resp: if (respout) { - derbio = BIO_new_file(respout, "wb"); + if (!strcmp(respout, "-")) + derbio = BIO_new_fp(stdout, BIO_NOCLOSE); + else + derbio = BIO_new_file(respout, "wb"); if (!derbio) { BIO_printf(bio_err, "Error opening file %s\n", respout); goto end; @@ -776,6 +805,10 @@ int MAIN(int argc, char **argv) resp = NULL; goto redo_accept; } + ret = 0; + goto end; + } else if (ridx_filename) { + ret = 0; goto end; } @@ -783,6 +816,8 @@ int MAIN(int argc, char **argv) store = setup_verify(bio_err, CAfile, CApath); if (!store) goto end; + if (vpm) + X509_STORE_set1_param(store, vpm); if (verify_certfile) { verify_other = load_certs(bio_err, verify_certfile, FORMAT_PEM, NULL, e, "validator certificate"); @@ -797,37 +832,38 @@ int MAIN(int argc, char **argv) goto end; } + ret = 0; + if (!noverify) { if (req && ((i = OCSP_check_nonce(req, bs)) <= 0)) { if (i == -1) BIO_printf(bio_err, "WARNING: no nonce in response\n"); else { BIO_printf(bio_err, "Nonce Verify error\n"); + ret = 1; goto end; } } i = OCSP_basic_verify(bs, verify_other, store, verify_flags); - if (i < 0) - i = OCSP_basic_verify(bs, NULL, store, 0); - if (i <= 0) { BIO_printf(bio_err, "Response Verify Failure\n"); ERR_print_errors(bio_err); + ret = 1; } else BIO_printf(bio_err, "Response verify OK\n"); } if (!print_ocsp_summary(out, bs, req, reqnames, ids, nsec, maxage)) - goto end; - - ret = 0; + ret = 1; end: ERR_print_errors(bio_err); X509_free(signer); X509_STORE_free(store); + if (vpm) + X509_VERIFY_PARAM_free(vpm); EVP_PKEY_free(key); EVP_PKEY_free(rkey); X509_free(issuer); @@ -982,8 +1018,9 @@ static int print_ocsp_summary(BIO *out, OCSP_BASICRESP *bs, OCSP_REQUEST *req, static int make_ocsp_response(OCSP_RESPONSE **resp, OCSP_REQUEST *req, CA_DB *db, X509 *ca, X509 *rcert, - EVP_PKEY *rkey, STACK_OF(X509) *rother, - unsigned long flags, int nmin, int ndays) + EVP_PKEY *rkey, const EVP_MD *rmd, + STACK_OF(X509) *rother, unsigned long flags, + int nmin, int ndays, int badsig) { ASN1_TIME *thisupd = NULL, *nextupd = NULL; OCSP_CERTID *cid, *ca_id = NULL; @@ -1067,7 +1104,10 @@ static int make_ocsp_response(OCSP_RESPONSE **resp, OCSP_REQUEST *req, OCSP_copy_nonce(bs, req); - OCSP_basic_sign(bs, rcert, rkey, NULL, rother, flags); + OCSP_basic_sign(bs, rcert, rkey, rmd, rother, flags); + + if (badsig) + bs->signature->data[bs->signature->length - 1] ^= 0x1; *resp = OCSP_response_create(OCSP_RESPONSE_STATUS_SUCCESSFUL, bs); @@ -1103,7 +1143,7 @@ static char **lookup_serial(CA_DB *db, ASN1_INTEGER *ser) /* Quick and dirty OCSP server: read in and parse input request */ -static BIO *init_responder(char *port) +static BIO *init_responder(const char *port) { BIO *acbio = NULL, *bufbio = NULL; bufbio = BIO_new(BIO_f_buffer()); @@ -1135,7 +1175,7 @@ static BIO *init_responder(char *port) } static int do_responder(OCSP_REQUEST **preq, BIO **pcbio, BIO *acbio, - char *port) + const char *port) { int have_post = 0, len; OCSP_REQUEST *req = NULL; @@ -1196,8 +1236,8 @@ static int send_ocsp_response(BIO *cbio, OCSP_RESPONSE *resp) return 1; } -static OCSP_RESPONSE *query_responder(BIO *err, BIO *cbio, char *path, - STACK_OF(CONF_VALUE) *headers, +static OCSP_RESPONSE *query_responder(BIO *err, BIO *cbio, const char *path, + const STACK_OF(CONF_VALUE) *headers, OCSP_REQUEST *req, int req_timeout) { int fd; @@ -1284,8 +1324,9 @@ static OCSP_RESPONSE *query_responder(BIO *err, BIO *cbio, char *path, } OCSP_RESPONSE *process_responder(BIO *err, OCSP_REQUEST *req, - char *host, char *path, char *port, - int use_ssl, STACK_OF(CONF_VALUE) *headers, + const char *host, const char *path, + const char *port, int use_ssl, + const STACK_OF(CONF_VALUE) *headers, int req_timeout) { BIO *cbio = NULL; diff --git a/deps/openssl/openssl/apps/openssl-vms.cnf b/deps/openssl/openssl/apps/openssl-vms.cnf index 45e46a0fb4394b..94baac12fdb034 100644 --- a/deps/openssl/openssl/apps/openssl-vms.cnf +++ b/deps/openssl/openssl/apps/openssl-vms.cnf @@ -103,7 +103,7 @@ emailAddress = optional #################################################################### [ req ] -default_bits = 1024 +default_bits = 2048 default_keyfile = privkey.pem distinguished_name = req_distinguished_name attributes = req_attributes diff --git a/deps/openssl/openssl/apps/openssl.cnf b/deps/openssl/openssl/apps/openssl.cnf index 18760c6e673d76..1eb86c40126308 100644 --- a/deps/openssl/openssl/apps/openssl.cnf +++ b/deps/openssl/openssl/apps/openssl.cnf @@ -103,7 +103,7 @@ emailAddress = optional #################################################################### [ req ] -default_bits = 1024 +default_bits = 2048 default_keyfile = privkey.pem distinguished_name = req_distinguished_name attributes = req_attributes diff --git a/deps/openssl/openssl/apps/pkcs8.c b/deps/openssl/openssl/apps/pkcs8.c index 5c7290ed09bc5c..5099e18417e900 100644 --- a/deps/openssl/openssl/apps/pkcs8.c +++ b/deps/openssl/openssl/apps/pkcs8.c @@ -124,6 +124,16 @@ int MAIN(int argc, char **argv) } } else badarg = 1; + } else if (!strcmp(*args, "-v2prf")) { + if (args[1]) { + args++; + pbe_nid = OBJ_txt2nid(*args); + if (!EVP_PBE_find(EVP_PBE_TYPE_PRF, pbe_nid, NULL, NULL, 0)) { + BIO_printf(bio_err, "Unknown PRF algorithm %s\n", *args); + badarg = 1; + } + } else + badarg = 1; } else if (!strcmp(*args, "-inform")) { if (args[1]) { args++; diff --git a/deps/openssl/openssl/apps/s_apps.h b/deps/openssl/openssl/apps/s_apps.h index 2769b899ed35d9..5b54bfdc4e1ca3 100644 --- a/deps/openssl/openssl/apps/s_apps.h +++ b/deps/openssl/openssl/apps/s_apps.h @@ -152,15 +152,21 @@ typedef fd_mask fd_set; #define PROTOCOL "tcp" int do_server(int port, int type, int *ret, - int (*cb) (char *hostname, int s, unsigned char *context), - unsigned char *context); + int (*cb) (char *hostname, int s, int stype, + unsigned char *context), unsigned char *context, + int naccept); #ifdef HEADER_X509_H int MS_CALLBACK verify_callback(int ok, X509_STORE_CTX *ctx); #endif #ifdef HEADER_SSL_H int set_cert_stuff(SSL_CTX *ctx, char *cert_file, char *key_file); -int set_cert_key_stuff(SSL_CTX *ctx, X509 *cert, EVP_PKEY *key); +int set_cert_key_stuff(SSL_CTX *ctx, X509 *cert, EVP_PKEY *key, + STACK_OF(X509) *chain, int build_chain); +int ssl_print_sigalgs(BIO *out, SSL *s); +int ssl_print_point_formats(BIO *out, SSL *s); +int ssl_print_curves(BIO *out, SSL *s, int noshared); #endif +int ssl_print_tmp_key(BIO *out, SSL *s); int init_client(int *sock, char *server, int port, int type); int should_retry(int i); int extract_port(char *str, short *port_ptr); @@ -182,3 +188,24 @@ int MS_CALLBACK generate_cookie_callback(SSL *ssl, unsigned char *cookie, unsigned int *cookie_len); int MS_CALLBACK verify_cookie_callback(SSL *ssl, unsigned char *cookie, unsigned int cookie_len); + +typedef struct ssl_excert_st SSL_EXCERT; + +void ssl_ctx_set_excert(SSL_CTX *ctx, SSL_EXCERT *exc); +void ssl_excert_free(SSL_EXCERT *exc); +int args_excert(char ***pargs, int *pargc, + int *badarg, BIO *err, SSL_EXCERT **pexc); +int load_excert(SSL_EXCERT **pexc, BIO *err); +void print_ssl_summary(BIO *bio, SSL *s); +#ifdef HEADER_SSL_H +int args_ssl(char ***pargs, int *pargc, SSL_CONF_CTX *cctx, + int *badarg, BIO *err, STACK_OF(OPENSSL_STRING) **pstr); +int args_ssl_call(SSL_CTX *ctx, BIO *err, SSL_CONF_CTX *cctx, + STACK_OF(OPENSSL_STRING) *str, int no_ecdhe, int no_jpake); +int ssl_ctx_add_crls(SSL_CTX *ctx, STACK_OF(X509_CRL) *crls, + int crl_download); +int ssl_load_stores(SSL_CTX *ctx, const char *vfyCApath, + const char *vfyCAfile, const char *chCApath, + const char *chCAfile, STACK_OF(X509_CRL) *crls, + int crl_download); +#endif diff --git a/deps/openssl/openssl/apps/s_cb.c b/deps/openssl/openssl/apps/s_cb.c index 7918500a6cd7c7..f6e6bcd765e1db 100644 --- a/deps/openssl/openssl/apps/s_cb.c +++ b/deps/openssl/openssl/apps/s_cb.c @@ -125,6 +125,7 @@ #define COOKIE_SECRET_LENGTH 16 int verify_depth = 0; +int verify_quiet = 0; int verify_error = X509_V_OK; int verify_return_error = 0; unsigned char cookie_secret[COOKIE_SECRET_LENGTH]; @@ -139,13 +140,16 @@ int MS_CALLBACK verify_callback(int ok, X509_STORE_CTX *ctx) err = X509_STORE_CTX_get_error(ctx); depth = X509_STORE_CTX_get_error_depth(ctx); - BIO_printf(bio_err, "depth=%d ", depth); - if (err_cert) { - X509_NAME_print_ex(bio_err, X509_get_subject_name(err_cert), - 0, XN_FLAG_ONELINE); - BIO_puts(bio_err, "\n"); - } else - BIO_puts(bio_err, "\n"); + if (!verify_quiet || !ok) { + BIO_printf(bio_err, "depth=%d ", depth); + if (err_cert) { + X509_NAME_print_ex(bio_err, + X509_get_subject_name(err_cert), + 0, XN_FLAG_ONELINE); + BIO_puts(bio_err, "\n"); + } else + BIO_puts(bio_err, "\n"); + } if (!ok) { BIO_printf(bio_err, "verify error:num=%d:%s\n", err, X509_verify_cert_error_string(err)); @@ -178,13 +182,14 @@ int MS_CALLBACK verify_callback(int ok, X509_STORE_CTX *ctx) BIO_printf(bio_err, "\n"); break; case X509_V_ERR_NO_EXPLICIT_POLICY: - policies_print(bio_err, ctx); + if (!verify_quiet) + policies_print(bio_err, ctx); break; } - if (err == X509_V_OK && ok == 2) + if (err == X509_V_OK && ok == 2 && !verify_quiet) policies_print(bio_err, ctx); - - BIO_printf(bio_err, "verify return:%d\n", ok); + if (ok && !verify_quiet) + BIO_printf(bio_err, "verify return:%d\n", ok); return (ok); } @@ -245,8 +250,10 @@ int set_cert_stuff(SSL_CTX *ctx, char *cert_file, char *key_file) return (1); } -int set_cert_key_stuff(SSL_CTX *ctx, X509 *cert, EVP_PKEY *key) +int set_cert_key_stuff(SSL_CTX *ctx, X509 *cert, EVP_PKEY *key, + STACK_OF(X509) *chain, int build_chain) { + int chflags = chain ? SSL_BUILD_CHAIN_FLAG_CHECK : 0; if (cert == NULL) return 1; if (SSL_CTX_use_certificate(ctx, cert) <= 0) { @@ -254,6 +261,7 @@ int set_cert_key_stuff(SSL_CTX *ctx, X509 *cert, EVP_PKEY *key) ERR_print_errors(bio_err); return 0; } + if (SSL_CTX_use_PrivateKey(ctx, key) <= 0) { BIO_printf(bio_err, "error setting private key\n"); ERR_print_errors(bio_err); @@ -268,6 +276,263 @@ int set_cert_key_stuff(SSL_CTX *ctx, X509 *cert, EVP_PKEY *key) "Private key does not match the certificate public key\n"); return 0; } + if (chain && !SSL_CTX_set1_chain(ctx, chain)) { + BIO_printf(bio_err, "error setting certificate chain\n"); + ERR_print_errors(bio_err); + return 0; + } + if (build_chain && !SSL_CTX_build_cert_chain(ctx, chflags)) { + BIO_printf(bio_err, "error building certificate chain\n"); + ERR_print_errors(bio_err); + return 0; + } + return 1; +} + +static void ssl_print_client_cert_types(BIO *bio, SSL *s) +{ + const unsigned char *p; + int i; + int cert_type_num = SSL_get0_certificate_types(s, &p); + if (!cert_type_num) + return; + BIO_puts(bio, "Client Certificate Types: "); + for (i = 0; i < cert_type_num; i++) { + unsigned char cert_type = p[i]; + char *cname; + switch (cert_type) { + case TLS_CT_RSA_SIGN: + cname = "RSA sign"; + break; + + case TLS_CT_DSS_SIGN: + cname = "DSA sign"; + break; + + case TLS_CT_RSA_FIXED_DH: + cname = "RSA fixed DH"; + break; + + case TLS_CT_DSS_FIXED_DH: + cname = "DSS fixed DH"; + break; + + case TLS_CT_ECDSA_SIGN: + cname = "ECDSA sign"; + break; + + case TLS_CT_RSA_FIXED_ECDH: + cname = "RSA fixed ECDH"; + break; + + case TLS_CT_ECDSA_FIXED_ECDH: + cname = "ECDSA fixed ECDH"; + break; + + case TLS_CT_GOST94_SIGN: + cname = "GOST94 Sign"; + break; + + case TLS_CT_GOST01_SIGN: + cname = "GOST01 Sign"; + break; + + default: + cname = NULL; + } + + if (i) + BIO_puts(bio, ", "); + + if (cname) + BIO_puts(bio, cname); + else + BIO_printf(bio, "UNKNOWN (%d),", cert_type); + } + BIO_puts(bio, "\n"); +} + +static int do_print_sigalgs(BIO *out, SSL *s, int shared) +{ + int i, nsig, client; + client = SSL_is_server(s) ? 0 : 1; + if (shared) + nsig = SSL_get_shared_sigalgs(s, -1, NULL, NULL, NULL, NULL, NULL); + else + nsig = SSL_get_sigalgs(s, -1, NULL, NULL, NULL, NULL, NULL); + if (nsig == 0) + return 1; + + if (shared) + BIO_puts(out, "Shared "); + + if (client) + BIO_puts(out, "Requested "); + BIO_puts(out, "Signature Algorithms: "); + for (i = 0; i < nsig; i++) { + int hash_nid, sign_nid; + unsigned char rhash, rsign; + const char *sstr = NULL; + if (shared) + SSL_get_shared_sigalgs(s, i, &sign_nid, &hash_nid, NULL, + &rsign, &rhash); + else + SSL_get_sigalgs(s, i, &sign_nid, &hash_nid, NULL, &rsign, &rhash); + if (i) + BIO_puts(out, ":"); + if (sign_nid == EVP_PKEY_RSA) + sstr = "RSA"; + else if (sign_nid == EVP_PKEY_DSA) + sstr = "DSA"; + else if (sign_nid == EVP_PKEY_EC) + sstr = "ECDSA"; + if (sstr) + BIO_printf(out, "%s+", sstr); + else + BIO_printf(out, "0x%02X+", (int)rsign); + if (hash_nid != NID_undef) + BIO_printf(out, "%s", OBJ_nid2sn(hash_nid)); + else + BIO_printf(out, "0x%02X", (int)rhash); + } + BIO_puts(out, "\n"); + return 1; +} + +int ssl_print_sigalgs(BIO *out, SSL *s) +{ + int mdnid; + if (!SSL_is_server(s)) + ssl_print_client_cert_types(out, s); + do_print_sigalgs(out, s, 0); + do_print_sigalgs(out, s, 1); + if (SSL_get_peer_signature_nid(s, &mdnid)) + BIO_printf(out, "Peer signing digest: %s\n", OBJ_nid2sn(mdnid)); + return 1; +} + +#ifndef OPENSSL_NO_EC +int ssl_print_point_formats(BIO *out, SSL *s) +{ + int i, nformats; + const char *pformats; + nformats = SSL_get0_ec_point_formats(s, &pformats); + if (nformats <= 0) + return 1; + BIO_puts(out, "Supported Elliptic Curve Point Formats: "); + for (i = 0; i < nformats; i++, pformats++) { + if (i) + BIO_puts(out, ":"); + switch (*pformats) { + case TLSEXT_ECPOINTFORMAT_uncompressed: + BIO_puts(out, "uncompressed"); + break; + + case TLSEXT_ECPOINTFORMAT_ansiX962_compressed_prime: + BIO_puts(out, "ansiX962_compressed_prime"); + break; + + case TLSEXT_ECPOINTFORMAT_ansiX962_compressed_char2: + BIO_puts(out, "ansiX962_compressed_char2"); + break; + + default: + BIO_printf(out, "unknown(%d)", (int)*pformats); + break; + + } + } + if (nformats <= 0) + BIO_puts(out, "NONE"); + BIO_puts(out, "\n"); + return 1; +} + +int ssl_print_curves(BIO *out, SSL *s, int noshared) +{ + int i, ncurves, *curves, nid; + const char *cname; + ncurves = SSL_get1_curves(s, NULL); + if (ncurves <= 0) + return 1; + curves = OPENSSL_malloc(ncurves * sizeof(int)); + if(!curves) { + BIO_puts(out, "Malloc error getting supported curves\n"); + return 0; + } + SSL_get1_curves(s, curves); + + + BIO_puts(out, "Supported Elliptic Curves: "); + for (i = 0; i < ncurves; i++) { + if (i) + BIO_puts(out, ":"); + nid = curves[i]; + /* If unrecognised print out hex version */ + if (nid & TLSEXT_nid_unknown) + BIO_printf(out, "0x%04X", nid & 0xFFFF); + else { + /* Use NIST name for curve if it exists */ + cname = EC_curve_nid2nist(nid); + if (!cname) + cname = OBJ_nid2sn(nid); + BIO_printf(out, "%s", cname); + } + } + if (ncurves == 0) + BIO_puts(out, "NONE"); + OPENSSL_free(curves); + if (noshared) { + BIO_puts(out, "\n"); + return 1; + } + BIO_puts(out, "\nShared Elliptic curves: "); + ncurves = SSL_get_shared_curve(s, -1); + for (i = 0; i < ncurves; i++) { + if (i) + BIO_puts(out, ":"); + nid = SSL_get_shared_curve(s, i); + cname = EC_curve_nid2nist(nid); + if (!cname) + cname = OBJ_nid2sn(nid); + BIO_printf(out, "%s", cname); + } + if (ncurves == 0) + BIO_puts(out, "NONE"); + BIO_puts(out, "\n"); + return 1; +} +#endif +int ssl_print_tmp_key(BIO *out, SSL *s) +{ + EVP_PKEY *key; + if (!SSL_get_server_tmp_key(s, &key)) + return 1; + BIO_puts(out, "Server Temp Key: "); + switch (EVP_PKEY_id(key)) { + case EVP_PKEY_RSA: + BIO_printf(out, "RSA, %d bits\n", EVP_PKEY_bits(key)); + break; + + case EVP_PKEY_DH: + BIO_printf(out, "DH, %d bits\n", EVP_PKEY_bits(key)); + break; +#ifndef OPENSSL_NO_ECDH + case EVP_PKEY_EC: + { + EC_KEY *ec = EVP_PKEY_get1_EC_KEY(key); + int nid; + const char *cname; + nid = EC_GROUP_get_curve_name(EC_KEY_get0_group(ec)); + EC_KEY_free(ec); + cname = EC_curve_nid2nist(nid); + if (!cname) + cname = OBJ_nid2sn(nid); + BIO_printf(out, "ECDH, %s, %d bits\n", cname, EVP_PKEY_bits(key)); + } +#endif + } + EVP_PKEY_free(key); return 1; } @@ -883,3 +1148,504 @@ int MS_CALLBACK verify_cookie_callback(SSL *ssl, unsigned char *cookie, return 0; } + +/* + * Example of extended certificate handling. Where the standard support of + * one certificate per algorithm is not sufficient an application can decide + * which certificate(s) to use at runtime based on whatever criteria it deems + * appropriate. + */ + +/* Linked list of certificates, keys and chains */ +struct ssl_excert_st { + int certform; + const char *certfile; + int keyform; + const char *keyfile; + const char *chainfile; + X509 *cert; + EVP_PKEY *key; + STACK_OF(X509) *chain; + int build_chain; + struct ssl_excert_st *next, *prev; +}; + +struct chain_flags { + int flag; + const char *name; +}; + +struct chain_flags chain_flags_list[] = { + {CERT_PKEY_VALID, "Overall Validity"}, + {CERT_PKEY_SIGN, "Sign with EE key"}, + {CERT_PKEY_EE_SIGNATURE, "EE signature"}, + {CERT_PKEY_CA_SIGNATURE, "CA signature"}, + {CERT_PKEY_EE_PARAM, "EE key parameters"}, + {CERT_PKEY_CA_PARAM, "CA key parameters"}, + {CERT_PKEY_EXPLICIT_SIGN, "Explicity sign with EE key"}, + {CERT_PKEY_ISSUER_NAME, "Issuer Name"}, + {CERT_PKEY_CERT_TYPE, "Certificate Type"}, + {0, NULL} +}; + +static void print_chain_flags(BIO *out, SSL *s, int flags) +{ + struct chain_flags *ctmp = chain_flags_list; + while (ctmp->name) { + BIO_printf(out, "\t%s: %s\n", ctmp->name, + flags & ctmp->flag ? "OK" : "NOT OK"); + ctmp++; + } + BIO_printf(out, "\tSuite B: "); + if (SSL_set_cert_flags(s, 0) & SSL_CERT_FLAG_SUITEB_128_LOS) + BIO_puts(out, flags & CERT_PKEY_SUITEB ? "OK\n" : "NOT OK\n"); + else + BIO_printf(out, "not tested\n"); +} + +/* + * Very basic selection callback: just use any certificate chain reported as + * valid. More sophisticated could prioritise according to local policy. + */ +static int set_cert_cb(SSL *ssl, void *arg) +{ + int i, rv; + SSL_EXCERT *exc = arg; +#ifdef CERT_CB_TEST_RETRY + static int retry_cnt; + if (retry_cnt < 5) { + retry_cnt++; + fprintf(stderr, "Certificate callback retry test: count %d\n", + retry_cnt); + return -1; + } +#endif + SSL_certs_clear(ssl); + + if (!exc) + return 1; + + /* + * Go to end of list and traverse backwards since we prepend newer + * entries this retains the original order. + */ + while (exc->next) + exc = exc->next; + + i = 0; + + while (exc) { + i++; + rv = SSL_check_chain(ssl, exc->cert, exc->key, exc->chain); + BIO_printf(bio_err, "Checking cert chain %d:\nSubject: ", i); + X509_NAME_print_ex(bio_err, X509_get_subject_name(exc->cert), 0, + XN_FLAG_ONELINE); + BIO_puts(bio_err, "\n"); + + print_chain_flags(bio_err, ssl, rv); + if (rv & CERT_PKEY_VALID) { + SSL_use_certificate(ssl, exc->cert); + SSL_use_PrivateKey(ssl, exc->key); + /* + * NB: we wouldn't normally do this as it is not efficient + * building chains on each connection better to cache the chain + * in advance. + */ + if (exc->build_chain) { + if (!SSL_build_cert_chain(ssl, 0)) + return 0; + } else if (exc->chain) + SSL_set1_chain(ssl, exc->chain); + } + exc = exc->prev; + } + return 1; +} + +void ssl_ctx_set_excert(SSL_CTX *ctx, SSL_EXCERT *exc) +{ + SSL_CTX_set_cert_cb(ctx, set_cert_cb, exc); +} + +static int ssl_excert_prepend(SSL_EXCERT **pexc) +{ + SSL_EXCERT *exc; + exc = OPENSSL_malloc(sizeof(SSL_EXCERT)); + if (!exc) + return 0; + exc->certfile = NULL; + exc->keyfile = NULL; + exc->chainfile = NULL; + exc->cert = NULL; + exc->key = NULL; + exc->chain = NULL; + exc->prev = NULL; + exc->build_chain = 0; + + exc->next = *pexc; + *pexc = exc; + + if (exc->next) { + exc->certform = exc->next->certform; + exc->keyform = exc->next->keyform; + exc->next->prev = exc; + } else { + exc->certform = FORMAT_PEM; + exc->keyform = FORMAT_PEM; + } + return 1; + +} + +void ssl_excert_free(SSL_EXCERT *exc) +{ + SSL_EXCERT *curr; + while (exc) { + if (exc->cert) + X509_free(exc->cert); + if (exc->key) + EVP_PKEY_free(exc->key); + if (exc->chain) + sk_X509_pop_free(exc->chain, X509_free); + curr = exc; + exc = exc->next; + OPENSSL_free(curr); + } +} + +int load_excert(SSL_EXCERT **pexc, BIO *err) +{ + SSL_EXCERT *exc = *pexc; + if (!exc) + return 1; + /* If nothing in list, free and set to NULL */ + if (!exc->certfile && !exc->next) { + ssl_excert_free(exc); + *pexc = NULL; + return 1; + } + for (; exc; exc = exc->next) { + if (!exc->certfile) { + BIO_printf(err, "Missing filename\n"); + return 0; + } + exc->cert = load_cert(err, exc->certfile, exc->certform, + NULL, NULL, "Server Certificate"); + if (!exc->cert) + return 0; + if (exc->keyfile) { + exc->key = load_key(err, exc->keyfile, exc->keyform, + 0, NULL, NULL, "Server Key"); + } else { + exc->key = load_key(err, exc->certfile, exc->certform, + 0, NULL, NULL, "Server Key"); + } + if (!exc->key) + return 0; + if (exc->chainfile) { + exc->chain = load_certs(err, + exc->chainfile, FORMAT_PEM, + NULL, NULL, "Server Chain"); + if (!exc->chain) + return 0; + } + } + return 1; +} + +int args_excert(char ***pargs, int *pargc, + int *badarg, BIO *err, SSL_EXCERT **pexc) +{ + char *arg = **pargs, *argn = (*pargs)[1]; + SSL_EXCERT *exc = *pexc; + int narg = 2; + if (!exc) { + if (ssl_excert_prepend(&exc)) + *pexc = exc; + else { + BIO_printf(err, "Error initialising xcert\n"); + *badarg = 1; + goto err; + } + } + if (strcmp(arg, "-xcert") == 0) { + if (!argn) { + *badarg = 1; + return 1; + } + if (exc->certfile && !ssl_excert_prepend(&exc)) { + BIO_printf(err, "Error adding xcert\n"); + *badarg = 1; + goto err; + } + exc->certfile = argn; + } else if (strcmp(arg, "-xkey") == 0) { + if (!argn) { + *badarg = 1; + return 1; + } + if (exc->keyfile) { + BIO_printf(err, "Key already specified\n"); + *badarg = 1; + return 1; + } + exc->keyfile = argn; + } else if (strcmp(arg, "-xchain") == 0) { + if (!argn) { + *badarg = 1; + return 1; + } + if (exc->chainfile) { + BIO_printf(err, "Chain already specified\n"); + *badarg = 1; + return 1; + } + exc->chainfile = argn; + } else if (strcmp(arg, "-xchain_build") == 0) { + narg = 1; + exc->build_chain = 1; + } else if (strcmp(arg, "-xcertform") == 0) { + if (!argn) { + *badarg = 1; + goto err; + } + exc->certform = str2fmt(argn); + } else if (strcmp(arg, "-xkeyform") == 0) { + if (!argn) { + *badarg = 1; + goto err; + } + exc->keyform = str2fmt(argn); + } else + return 0; + + (*pargs) += narg; + + if (pargc) + *pargc -= narg; + + *pexc = exc; + + return 1; + + err: + ERR_print_errors(err); + ssl_excert_free(exc); + *pexc = NULL; + return 1; +} + +static void print_raw_cipherlist(BIO *bio, SSL *s) +{ + const unsigned char *rlist; + static const unsigned char scsv_id[] = { 0, 0, 0xFF }; + size_t i, rlistlen, num; + if (!SSL_is_server(s)) + return; + num = SSL_get0_raw_cipherlist(s, NULL); + rlistlen = SSL_get0_raw_cipherlist(s, &rlist); + BIO_puts(bio, "Client cipher list: "); + for (i = 0; i < rlistlen; i += num, rlist += num) { + const SSL_CIPHER *c = SSL_CIPHER_find(s, rlist); + if (i) + BIO_puts(bio, ":"); + if (c) + BIO_puts(bio, SSL_CIPHER_get_name(c)); + else if (!memcmp(rlist, scsv_id - num + 3, num)) + BIO_puts(bio, "SCSV"); + else { + size_t j; + BIO_puts(bio, "0x"); + for (j = 0; j < num; j++) + BIO_printf(bio, "%02X", rlist[j]); + } + } + BIO_puts(bio, "\n"); +} + +void print_ssl_summary(BIO *bio, SSL *s) +{ + const SSL_CIPHER *c; + X509 *peer; + /* + * const char *pnam = SSL_is_server(s) ? "client" : "server"; + */ + BIO_printf(bio, "Protocol version: %s\n", SSL_get_version(s)); + print_raw_cipherlist(bio, s); + c = SSL_get_current_cipher(s); + BIO_printf(bio, "Ciphersuite: %s\n", SSL_CIPHER_get_name(c)); + do_print_sigalgs(bio, s, 0); + peer = SSL_get_peer_certificate(s); + if (peer) { + int nid; + BIO_puts(bio, "Peer certificate: "); + X509_NAME_print_ex(bio, X509_get_subject_name(peer), + 0, XN_FLAG_ONELINE); + BIO_puts(bio, "\n"); + if (SSL_get_peer_signature_nid(s, &nid)) + BIO_printf(bio, "Hash used: %s\n", OBJ_nid2sn(nid)); + } else + BIO_puts(bio, "No peer certificate\n"); + if (peer) + X509_free(peer); +#ifndef OPENSSL_NO_EC + ssl_print_point_formats(bio, s); + if (SSL_is_server(s)) + ssl_print_curves(bio, s, 1); + else + ssl_print_tmp_key(bio, s); +#else + if (!SSL_is_server(s)) + ssl_print_tmp_key(bio, s); +#endif +} + +int args_ssl(char ***pargs, int *pargc, SSL_CONF_CTX *cctx, + int *badarg, BIO *err, STACK_OF(OPENSSL_STRING) **pstr) +{ + char *arg = **pargs, *argn = (*pargs)[1]; + int rv; + + /* Attempt to run SSL configuration command */ + rv = SSL_CONF_cmd_argv(cctx, pargc, pargs); + /* If parameter not recognised just return */ + if (rv == 0) + return 0; + /* see if missing argument error */ + if (rv == -3) { + BIO_printf(err, "%s needs an argument\n", arg); + *badarg = 1; + goto end; + } + /* Check for some other error */ + if (rv < 0) { + BIO_printf(err, "Error with command: \"%s %s\"\n", + arg, argn ? argn : ""); + *badarg = 1; + goto end; + } + /* Store command and argument */ + /* If only one argument processed store value as NULL */ + if (rv == 1) + argn = NULL; + if (!*pstr) + *pstr = sk_OPENSSL_STRING_new_null(); + if (!*pstr || !sk_OPENSSL_STRING_push(*pstr, arg) || + !sk_OPENSSL_STRING_push(*pstr, argn)) { + BIO_puts(err, "Memory allocation failure\n"); + goto end; + } + + end: + if (*badarg) + ERR_print_errors(err); + + return 1; +} + +int args_ssl_call(SSL_CTX *ctx, BIO *err, SSL_CONF_CTX *cctx, + STACK_OF(OPENSSL_STRING) *str, int no_ecdhe, int no_jpake) +{ + int i; + SSL_CONF_CTX_set_ssl_ctx(cctx, ctx); + for (i = 0; i < sk_OPENSSL_STRING_num(str); i += 2) { + const char *param = sk_OPENSSL_STRING_value(str, i); + const char *value = sk_OPENSSL_STRING_value(str, i + 1); + /* + * If no_ecdhe or named curve already specified don't need a default. + */ + if (!no_ecdhe && !strcmp(param, "-named_curve")) + no_ecdhe = 1; +#ifndef OPENSSL_NO_JPAKE + if (!no_jpake && !strcmp(param, "-cipher")) { + BIO_puts(err, "JPAKE sets cipher to PSK\n"); + return 0; + } +#endif + if (SSL_CONF_cmd(cctx, param, value) <= 0) { + BIO_printf(err, "Error with command: \"%s %s\"\n", + param, value ? value : ""); + ERR_print_errors(err); + return 0; + } + } + /* + * This is a special case to keep existing s_server functionality: if we + * don't have any curve specified *and* we haven't disabled ECDHE then + * use P-256. + */ + if (!no_ecdhe) { + if (SSL_CONF_cmd(cctx, "-named_curve", "P-256") <= 0) { + BIO_puts(err, "Error setting EC curve\n"); + ERR_print_errors(err); + return 0; + } + } +#ifndef OPENSSL_NO_JPAKE + if (!no_jpake) { + if (SSL_CONF_cmd(cctx, "-cipher", "PSK") <= 0) { + BIO_puts(err, "Error setting cipher to PSK\n"); + ERR_print_errors(err); + return 0; + } + } +#endif + if (!SSL_CONF_CTX_finish(cctx)) { + BIO_puts(err, "Error finishing context\n"); + ERR_print_errors(err); + return 0; + } + return 1; +} + +static int add_crls_store(X509_STORE *st, STACK_OF(X509_CRL) *crls) +{ + X509_CRL *crl; + int i; + for (i = 0; i < sk_X509_CRL_num(crls); i++) { + crl = sk_X509_CRL_value(crls, i); + X509_STORE_add_crl(st, crl); + } + return 1; +} + +int ssl_ctx_add_crls(SSL_CTX *ctx, STACK_OF(X509_CRL) *crls, int crl_download) +{ + X509_STORE *st; + st = SSL_CTX_get_cert_store(ctx); + add_crls_store(st, crls); + if (crl_download) + store_setup_crl_download(st); + return 1; +} + +int ssl_load_stores(SSL_CTX *ctx, + const char *vfyCApath, const char *vfyCAfile, + const char *chCApath, const char *chCAfile, + STACK_OF(X509_CRL) *crls, int crl_download) +{ + X509_STORE *vfy = NULL, *ch = NULL; + int rv = 0; + if (vfyCApath || vfyCAfile) { + vfy = X509_STORE_new(); + if (!X509_STORE_load_locations(vfy, vfyCAfile, vfyCApath)) + goto err; + add_crls_store(vfy, crls); + SSL_CTX_set1_verify_cert_store(ctx, vfy); + if (crl_download) + store_setup_crl_download(vfy); + } + if (chCApath || chCAfile) { + ch = X509_STORE_new(); + if (!X509_STORE_load_locations(ch, chCAfile, chCApath)) + goto err; + SSL_CTX_set1_chain_cert_store(ctx, ch); + } + rv = 1; + err: + if (vfy) + X509_STORE_free(vfy); + if (ch) + X509_STORE_free(ch); + return rv; +} diff --git a/deps/openssl/openssl/apps/s_client.c b/deps/openssl/openssl/apps/s_client.c index b34d38afea5623..7112b246d1d0de 100644 --- a/deps/openssl/openssl/apps/s_client.c +++ b/deps/openssl/openssl/apps/s_client.c @@ -209,6 +209,7 @@ typedef unsigned int u_int; extern int verify_depth; extern int verify_error; extern int verify_return_error; +extern int verify_quiet; #ifdef FIONBIO static int c_nbio = 0; @@ -231,8 +232,10 @@ static void print_stuff(BIO *berr, SSL *con, int full); static int ocsp_resp_cb(SSL *s, void *arg); #endif static BIO *bio_c_out = NULL; +static BIO *bio_c_msg = NULL; static int c_quiet = 0; static int c_ign_eof = 0; +static int c_brief = 0; #ifndef OPENSSL_NO_PSK /* Default PSK identity and key */ @@ -311,6 +314,12 @@ static void sc_usage(void) BIO_printf(bio_err, " -connect host:port - who to connect to (default is %s:%s)\n", SSL_HOST_NAME, PORT_STR); + BIO_printf(bio_err, + " -verify_host host - check peer certificate matches \"host\"\n"); + BIO_printf(bio_err, + " -verify_email email - check peer certificate matches \"email\"\n"); + BIO_printf(bio_err, + " -verify_ip ipaddr - check peer certificate matches \"ipaddr\"\n"); BIO_printf(bio_err, " -verify arg - turn on peer certificate verification\n"); @@ -418,11 +427,15 @@ static void sc_usage(void) " -status - request certificate status from server\n"); BIO_printf(bio_err, " -no_ticket - disable use of RFC4507bis session tickets\n"); -# ifndef OPENSSL_NO_NEXTPROTONEG + BIO_printf(bio_err, + " -serverinfo types - send empty ClientHello extensions (comma-separated numbers)\n"); +#endif +#ifndef OPENSSL_NO_NEXTPROTONEG BIO_printf(bio_err, " -nextprotoneg arg - enable NPN extension, considering named protocols supported (comma-separated list)\n"); -# endif #endif + BIO_printf(bio_err, + " -alpn arg - enable ALPN extension, considering named protocols supported (comma-separated list)\n"); BIO_printf(bio_err, " -legacy_renegotiation - enable use of legacy renegotiation (dangerous)\n"); #ifndef OPENSSL_NO_SRTP @@ -610,6 +623,27 @@ static int next_proto_cb(SSL *s, unsigned char **out, unsigned char *outlen, return SSL_TLSEXT_ERR_OK; } # endif /* ndef OPENSSL_NO_NEXTPROTONEG */ + +static int serverinfo_cli_parse_cb(SSL *s, unsigned int ext_type, + const unsigned char *in, size_t inlen, + int *al, void *arg) +{ + char pem_name[100]; + unsigned char ext_buf[4 + 65536]; + + /* Reconstruct the type/len fields prior to extension data */ + ext_buf[0] = ext_type >> 8; + ext_buf[1] = ext_type & 0xFF; + ext_buf[2] = inlen >> 8; + ext_buf[3] = inlen & 0xFF; + memcpy(ext_buf + 4, in, inlen); + + BIO_snprintf(pem_name, sizeof(pem_name), "SERVERINFO FOR EXTENSION %d", + ext_type); + PEM_write_bio(bio_c_out, pem_name, "", ext_buf, 4 + inlen); + return 1; +} + #endif enum { @@ -625,7 +659,7 @@ int MAIN(int, char **); int MAIN(int argc, char **argv) { - unsigned int off = 0, clr = 0; + int build_chain = 0; SSL *con = NULL; #ifndef OPENSSL_NO_KRB5 KSSL_CTX *kctx; @@ -638,13 +672,16 @@ int MAIN(int argc, char **argv) short port = PORT; int full_log = 1; char *host = SSL_HOST_NAME; - char *cert_file = NULL, *key_file = NULL; + char *cert_file = NULL, *key_file = NULL, *chain_file = NULL; int cert_format = FORMAT_PEM, key_format = FORMAT_PEM; char *passarg = NULL, *pass = NULL; X509 *cert = NULL; EVP_PKEY *key = NULL; - char *CApath = NULL, *CAfile = NULL, *cipher = NULL; - int reconnect = 0, badop = 0, verify = SSL_VERIFY_NONE, bugs = 0; + STACK_OF(X509) *chain = NULL; + char *CApath = NULL, *CAfile = NULL; + char *chCApath = NULL, *chCAfile = NULL; + char *vfyCApath = NULL, *vfyCAfile = NULL; + int reconnect = 0, badop = 0, verify = SSL_VERIFY_NONE; int crlf = 0; int write_tty, read_tty, write_ssl, read_ssl, tty_on, ssl_pending; SSL_CTX *ctx = NULL; @@ -677,6 +714,10 @@ int MAIN(int argc, char **argv) # ifndef OPENSSL_NO_NEXTPROTONEG const char *next_proto_neg_in = NULL; # endif + const char *alpn_in = NULL; +# define MAX_SI_TYPES 100 + unsigned short serverinfo_types[MAX_SI_TYPES]; + int serverinfo_types_count = 0; #endif char *sess_in = NULL; char *sess_out = NULL; @@ -686,13 +727,25 @@ int MAIN(int argc, char **argv) int enable_timeouts = 0; long socket_mtu = 0; #ifndef OPENSSL_NO_JPAKE - char *jpake_secret = NULL; + static char *jpake_secret = NULL; +# define no_jpake !jpake_secret +#else +# define no_jpake 1 #endif #ifndef OPENSSL_NO_SRP char *srppass = NULL; int srp_lateuser = 0; SRP_ARG srp_arg = { NULL, NULL, 0, 0, 0, 1024 }; #endif + SSL_EXCERT *exc = NULL; + + SSL_CONF_CTX *cctx = NULL; + STACK_OF(OPENSSL_STRING) *ssl_args = NULL; + + char *crl_file = NULL; + int crl_format = FORMAT_PEM; + int crl_download = 0; + STACK_OF(X509_CRL) *crls = NULL; meth = SSLv23_client_method(); @@ -710,6 +763,12 @@ int MAIN(int argc, char **argv) if (!load_config(bio_err, NULL)) goto end; + cctx = SSL_CONF_CTX_new(); + if (!cctx) + goto end; + SSL_CONF_CTX_set_flags(cctx, SSL_CONF_FLAG_CLIENT); + SSL_CONF_CTX_set_flags(cctx, SSL_CONF_FLAG_CMDLINE); + if (((cbuf = OPENSSL_malloc(BUFSIZZ)) == NULL) || ((sbuf = OPENSSL_malloc(BUFSIZZ)) == NULL) || ((mbuf = OPENSSL_malloc(BUFSIZZ)) == NULL)) { @@ -746,12 +805,19 @@ int MAIN(int argc, char **argv) if (--argc < 1) goto bad; verify_depth = atoi(*(++argv)); - BIO_printf(bio_err, "verify depth is %d\n", verify_depth); + if (!c_quiet) + BIO_printf(bio_err, "verify depth is %d\n", verify_depth); } else if (strcmp(*argv, "-cert") == 0) { if (--argc < 1) goto bad; cert_file = *(++argv); - } else if (strcmp(*argv, "-sess_out") == 0) { + } else if (strcmp(*argv, "-CRL") == 0) { + if (--argc < 1) + goto bad; + crl_file = *(++argv); + } else if (strcmp(*argv, "-crl_download") == 0) + crl_download = 1; + else if (strcmp(*argv, "-sess_out") == 0) { if (--argc < 1) goto bad; sess_out = *(++argv); @@ -763,13 +829,31 @@ int MAIN(int argc, char **argv) if (--argc < 1) goto bad; cert_format = str2fmt(*(++argv)); + } else if (strcmp(*argv, "-CRLform") == 0) { + if (--argc < 1) + goto bad; + crl_format = str2fmt(*(++argv)); } else if (args_verify(&argv, &argc, &badarg, bio_err, &vpm)) { if (badarg) goto bad; continue; } else if (strcmp(*argv, "-verify_return_error") == 0) verify_return_error = 1; - else if (strcmp(*argv, "-prexit") == 0) + else if (strcmp(*argv, "-verify_quiet") == 0) + verify_quiet = 1; + else if (strcmp(*argv, "-brief") == 0) { + c_brief = 1; + verify_quiet = 1; + c_quiet = 1; + } else if (args_excert(&argv, &argc, &badarg, bio_err, &exc)) { + if (badarg) + goto bad; + continue; + } else if (args_ssl(&argv, &argc, cctx, &badarg, bio_err, &ssl_args)) { + if (badarg) + goto bad; + continue; + } else if (strcmp(*argv, "-prexit") == 0) prexit = 1; else if (strcmp(*argv, "-crlf") == 0) crlf = 1; @@ -796,6 +880,15 @@ int MAIN(int argc, char **argv) #endif else if (strcmp(*argv, "-msg") == 0) c_msg = 1; + else if (strcmp(*argv, "-msgfile") == 0) { + if (--argc < 1) + goto bad; + bio_c_msg = BIO_new_file(*(++argv), "w"); + } +#ifndef OPENSSL_NO_SSL_TRACE + else if (strcmp(*argv, "-trace") == 0) + c_msg = 2; +#endif else if (strcmp(*argv, "-showcerts") == 0) c_showcerts = 1; else if (strcmp(*argv, "-nbio_test") == 0) @@ -864,11 +957,15 @@ int MAIN(int argc, char **argv) meth = TLSv1_client_method(); #endif #ifndef OPENSSL_NO_DTLS1 - else if (strcmp(*argv, "-dtls1") == 0) { + else if (strcmp(*argv, "-dtls") == 0) { + meth = DTLS_client_method(); + socket_type = SOCK_DGRAM; + } else if (strcmp(*argv, "-dtls1") == 0) { meth = DTLSv1_client_method(); socket_type = SOCK_DGRAM; - } else if (strcmp(*argv, "-fallback_scsv") == 0) { - fallback_scsv = 1; + } else if (strcmp(*argv, "-dtls1_2") == 0) { + meth = DTLSv1_2_client_method(); + socket_type = SOCK_DGRAM; } else if (strcmp(*argv, "-timeout") == 0) enable_timeouts = 1; else if (strcmp(*argv, "-mtu") == 0) { @@ -877,9 +974,9 @@ int MAIN(int argc, char **argv) socket_mtu = atol(*(++argv)); } #endif - else if (strcmp(*argv, "-bugs") == 0) - bugs = 1; - else if (strcmp(*argv, "-keyform") == 0) { + else if (strcmp(*argv, "-fallback_scsv") == 0) { + fallback_scsv = 1; + } else if (strcmp(*argv, "-keyform") == 0) { if (--argc < 1) goto bad; key_format = str2fmt(*(++argv)); @@ -887,6 +984,10 @@ int MAIN(int argc, char **argv) if (--argc < 1) goto bad; passarg = *(++argv); + } else if (strcmp(*argv, "-cert_chain") == 0) { + if (--argc < 1) + goto bad; + chain_file = *(++argv); } else if (strcmp(*argv, "-key") == 0) { if (--argc < 1) goto bad; @@ -897,27 +998,30 @@ int MAIN(int argc, char **argv) if (--argc < 1) goto bad; CApath = *(++argv); - } else if (strcmp(*argv, "-CAfile") == 0) { + } else if (strcmp(*argv, "-chainCApath") == 0) { + if (--argc < 1) + goto bad; + chCApath = *(++argv); + } else if (strcmp(*argv, "-verifyCApath") == 0) { + if (--argc < 1) + goto bad; + vfyCApath = *(++argv); + } else if (strcmp(*argv, "-build_chain") == 0) + build_chain = 1; + else if (strcmp(*argv, "-CAfile") == 0) { if (--argc < 1) goto bad; CAfile = *(++argv); - } else if (strcmp(*argv, "-no_tls1_2") == 0) - off |= SSL_OP_NO_TLSv1_2; - else if (strcmp(*argv, "-no_tls1_1") == 0) - off |= SSL_OP_NO_TLSv1_1; - else if (strcmp(*argv, "-no_tls1") == 0) - off |= SSL_OP_NO_TLSv1; - else if (strcmp(*argv, "-no_ssl3") == 0) - off |= SSL_OP_NO_SSLv3; - else if (strcmp(*argv, "-no_ssl2") == 0) - off |= SSL_OP_NO_SSLv2; - else if (strcmp(*argv, "-no_comp") == 0) { - off |= SSL_OP_NO_COMPRESSION; + } else if (strcmp(*argv, "-chainCAfile") == 0) { + if (--argc < 1) + goto bad; + chCAfile = *(++argv); + } else if (strcmp(*argv, "-verifyCAfile") == 0) { + if (--argc < 1) + goto bad; + vfyCAfile = *(++argv); } #ifndef OPENSSL_NO_TLSEXT - else if (strcmp(*argv, "-no_ticket") == 0) { - off |= SSL_OP_NO_TICKET; - } # ifndef OPENSSL_NO_NEXTPROTONEG else if (strcmp(*argv, "-nextprotoneg") == 0) { if (--argc < 1) @@ -925,20 +1029,32 @@ int MAIN(int argc, char **argv) next_proto_neg_in = *(++argv); } # endif -#endif - else if (strcmp(*argv, "-serverpref") == 0) - off |= SSL_OP_CIPHER_SERVER_PREFERENCE; - else if (strcmp(*argv, "-legacy_renegotiation") == 0) - off |= SSL_OP_ALLOW_UNSAFE_LEGACY_RENEGOTIATION; - else if (strcmp(*argv, "-legacy_server_connect") == 0) { - off |= SSL_OP_LEGACY_SERVER_CONNECT; - } else if (strcmp(*argv, "-no_legacy_server_connect") == 0) { - clr |= SSL_OP_LEGACY_SERVER_CONNECT; - } else if (strcmp(*argv, "-cipher") == 0) { + else if (strcmp(*argv, "-alpn") == 0) { + if (--argc < 1) + goto bad; + alpn_in = *(++argv); + } else if (strcmp(*argv, "-serverinfo") == 0) { + char *c; + int start = 0; + int len; + if (--argc < 1) goto bad; - cipher = *(++argv); + c = *(++argv); + serverinfo_types_count = 0; + len = strlen(c); + for (i = 0; i <= len; ++i) { + if (i == len || c[i] == ',') { + serverinfo_types[serverinfo_types_count] + = atoi(c + start); + serverinfo_types_count++; + start = i + 1; + } + if (serverinfo_types_count == MAX_SI_TYPES) + break; + } } +#endif #ifdef FIONBIO else if (strcmp(*argv, "-nbio") == 0) { c_nbio = 1; @@ -1029,11 +1145,6 @@ int MAIN(int argc, char **argv) goto end; } psk_identity = "JPAKE"; - if (cipher) { - BIO_printf(bio_err, "JPAKE sets cipher to PSK\n"); - goto end; - } - cipher = "PSK"; } #endif @@ -1092,6 +1203,33 @@ int MAIN(int argc, char **argv) } } + if (chain_file) { + chain = load_certs(bio_err, chain_file, FORMAT_PEM, + NULL, e, "client certificate chain"); + if (!chain) + goto end; + } + + if (crl_file) { + X509_CRL *crl; + crl = load_crl(crl_file, crl_format); + if (!crl) { + BIO_puts(bio_err, "Error loading CRL\n"); + ERR_print_errors(bio_err); + goto end; + } + crls = sk_X509_CRL_new_null(); + if (!crls || !sk_X509_CRL_push(crls, crl)) { + BIO_puts(bio_err, "Error adding CRL\n"); + ERR_print_errors(bio_err); + X509_CRL_free(crl); + goto end; + } + } + + if (!load_excert(&exc, bio_err)) + goto end; + if (!app_RAND_load_file(NULL, bio_err, 1) && inrand == NULL && !RAND_status()) { BIO_printf(bio_err, @@ -1102,8 +1240,10 @@ int MAIN(int argc, char **argv) app_RAND_load_files(inrand)); if (bio_c_out == NULL) { - if (c_quiet && !c_debug && !c_msg) { + if (c_quiet && !c_debug) { bio_c_out = BIO_new(BIO_s_null()); + if (c_msg && !bio_c_msg) + bio_c_msg = BIO_new_fp(stdout, BIO_NOCLOSE); } else { if (bio_c_out == NULL) bio_c_out = BIO_new_fp(stdout, BIO_NOCLOSE); @@ -1125,6 +1265,17 @@ int MAIN(int argc, char **argv) if (vpm) SSL_CTX_set1_param(ctx, vpm); + if (!args_ssl_call(ctx, bio_err, cctx, ssl_args, 1, no_jpake)) { + ERR_print_errors(bio_err); + goto end; + } + + if (!ssl_load_stores(ctx, vfyCApath, vfyCAfile, chCApath, chCAfile, + crls, crl_download)) { + BIO_printf(bio_err, "Error loading store locations\n"); + ERR_print_errors(bio_err); + goto end; + } #ifndef OPENSSL_NO_ENGINE if (ssl_client_engine) { if (!SSL_CTX_set_client_cert_engine(ctx, ssl_client_engine)) { @@ -1154,35 +1305,43 @@ int MAIN(int argc, char **argv) if (srtp_profiles != NULL) SSL_CTX_set_tlsext_use_srtp(ctx, srtp_profiles); #endif - if (bugs) - SSL_CTX_set_options(ctx, SSL_OP_ALL | off); - else - SSL_CTX_set_options(ctx, off); + if (exc) + ssl_ctx_set_excert(ctx, exc); - if (clr) - SSL_CTX_clear_options(ctx, clr); - -#if !defined(OPENSSL_NO_TLSEXT) && !defined(OPENSSL_NO_NEXTPROTONEG) +#if !defined(OPENSSL_NO_TLSEXT) +# if !defined(OPENSSL_NO_NEXTPROTONEG) if (next_proto.data) SSL_CTX_set_next_proto_select_cb(ctx, next_proto_cb, &next_proto); +# endif + if (alpn_in) { + unsigned short alpn_len; + unsigned char *alpn = next_protos_parse(&alpn_len, alpn_in); + + if (alpn == NULL) { + BIO_printf(bio_err, "Error parsing -alpn argument\n"); + goto end; + } + SSL_CTX_set_alpn_protos(ctx, alpn, alpn_len); + OPENSSL_free(alpn); + } +#endif +#ifndef OPENSSL_NO_TLSEXT + for (i = 0; i < serverinfo_types_count; i++) { + SSL_CTX_add_client_custom_ext(ctx, + serverinfo_types[i], + NULL, NULL, NULL, + serverinfo_cli_parse_cb, NULL); + } #endif if (state) SSL_CTX_set_info_callback(ctx, apps_ssl_info_callback); - if (cipher != NULL) - if (!SSL_CTX_set_cipher_list(ctx, cipher)) { - BIO_printf(bio_err, "error setting cipher list\n"); - ERR_print_errors(bio_err); - goto end; - } #if 0 - else - SSL_CTX_set_cipher_list(ctx, getenv("SSL_CIPHER")); + else + SSL_CTX_set_cipher_list(ctx, getenv("SSL_CIPHER")); #endif SSL_CTX_set_verify(ctx, verify, verify_callback); - if (!set_cert_key_stuff(ctx, cert, key)) - goto end; if ((!SSL_CTX_load_verify_locations(ctx, CAfile, CApath)) || (!SSL_CTX_set_default_verify_paths(ctx))) { @@ -1192,6 +1351,11 @@ int MAIN(int argc, char **argv) ERR_print_errors(bio_err); /* goto end; */ } + + ssl_ctx_add_crls(ctx, crls, crl_download); + if (!set_cert_key_stuff(ctx, cert, key, chain, build_chain)) + goto end; + #ifndef OPENSSL_NO_TLSEXT if (servername != NULL) { tlsextcbp.biodebug = bio_err; @@ -1283,7 +1447,7 @@ int MAIN(int argc, char **argv) if (c_Pause & 0x01) SSL_set_debug(con, 1); - if (SSL_version(con) == DTLS1_VERSION) { + if (socket_type == SOCK_DGRAM) { sbio = BIO_new_dgram(s, BIO_NOCLOSE); if (getsockname(s, &peer, (void *)&peerlen) < 0) { @@ -1337,8 +1501,13 @@ int MAIN(int argc, char **argv) BIO_set_callback_arg(sbio, (char *)bio_c_out); } if (c_msg) { - SSL_set_msg_callback(con, msg_cb); - SSL_set_msg_callback_arg(con, bio_c_out); +#ifndef OPENSSL_NO_SSL_TRACE + if (c_msg == 2) + SSL_set_msg_callback(con, SSL_trace); + else +#endif + SSL_set_msg_callback(con, msg_cb); + SSL_set_msg_callback_arg(con, bio_c_msg ? bio_c_msg : bio_c_out); } #ifndef OPENSSL_NO_TLSEXT if (c_tlsextdebug) { @@ -1521,6 +1690,11 @@ int MAIN(int argc, char **argv) BIO_printf(bio_err, "Error writing session file %s\n", sess_out); } + if (c_brief) { + BIO_puts(bio_err, "CONNECTION ESTABLISHED\n"); + print_ssl_summary(bio_err, con); + } + print_stuff(bio_c_out, con, full_log); if (full_log > 0) full_log--; @@ -1785,7 +1959,10 @@ int MAIN(int argc, char **argv) break; case SSL_ERROR_SYSCALL: ret = get_last_socket_error(); - BIO_printf(bio_err, "read:errno=%d\n", ret); + if (c_brief) + BIO_puts(bio_err, "CONNECTION CLOSED BY SERVER\n"); + else + BIO_printf(bio_err, "read:errno=%d\n", ret); goto shut; case SSL_ERROR_ZERO_RETURN: BIO_printf(bio_c_out, "closed\n"); @@ -1885,12 +2062,25 @@ int MAIN(int argc, char **argv) SSL_CTX_free(ctx); if (cert) X509_free(cert); + if (crls) + sk_X509_CRL_pop_free(crls, X509_CRL_free); if (key) EVP_PKEY_free(key); + if (chain) + sk_X509_pop_free(chain, X509_free); if (pass) OPENSSL_free(pass); if (vpm) X509_VERIFY_PARAM_free(vpm); + ssl_excert_free(exc); + if (ssl_args) + sk_OPENSSL_STRING_free(ssl_args); + if (cctx) + SSL_CONF_CTX_free(cctx); +#ifndef OPENSSL_NO_JPAKE + if (jpake_secret && psk_key) + OPENSSL_free(psk_key); +#endif if (cbuf != NULL) { OPENSSL_cleanse(cbuf, BUFSIZZ); OPENSSL_free(cbuf); @@ -1907,6 +2097,10 @@ int MAIN(int argc, char **argv) BIO_free(bio_c_out); bio_c_out = NULL; } + if (bio_c_msg != NULL) { + BIO_free(bio_c_msg); + bio_c_msg = NULL; + } apps_shutdown(); OPENSSL_EXIT(ret); } @@ -2000,6 +2194,9 @@ static void print_stuff(BIO *bio, SSL *s, int full) BIO_write(bio, "\n", 1); } + ssl_print_sigalgs(bio, s); + ssl_print_tmp_key(bio, s); + BIO_printf(bio, "---\nSSL handshake has read %ld bytes and written %ld bytes\n", BIO_number_read(SSL_get_rbio(s)), @@ -2039,7 +2236,8 @@ static void print_stuff(BIO *bio, SSL *s, int full) } #endif -#if !defined(OPENSSL_NO_TLSEXT) && !defined(OPENSSL_NO_NEXTPROTONEG) +#if !defined(OPENSSL_NO_TLSEXT) +# if !defined(OPENSSL_NO_NEXTPROTONEG) if (next_proto.status != -1) { const unsigned char *proto; unsigned int proto_len; @@ -2048,6 +2246,18 @@ static void print_stuff(BIO *bio, SSL *s, int full) BIO_write(bio, proto, proto_len); BIO_write(bio, "\n", 1); } +# endif + { + const unsigned char *proto; + unsigned int proto_len; + SSL_get0_alpn_selected(s, &proto, &proto_len); + if (proto_len > 0) { + BIO_printf(bio, "ALPN protocol: "); + BIO_write(bio, proto, proto_len); + BIO_write(bio, "\n", 1); + } else + BIO_printf(bio, "No ALPN negotiated\n"); + } #endif #ifndef OPENSSL_NO_SRTP diff --git a/deps/openssl/openssl/apps/s_server.c b/deps/openssl/openssl/apps/s_server.c index bcf5c3313714c1..a8491acfdd3af0 100644 --- a/deps/openssl/openssl/apps/s_server.c +++ b/deps/openssl/openssl/apps/s_server.c @@ -209,14 +209,17 @@ typedef unsigned int u_int; #ifndef OPENSSL_NO_RSA static RSA MS_CALLBACK *tmp_rsa_cb(SSL *s, int is_export, int keylength); #endif -static int sv_body(char *hostname, int s, unsigned char *context); -static int www_body(char *hostname, int s, unsigned char *context); +static int sv_body(char *hostname, int s, int stype, unsigned char *context); +static int www_body(char *hostname, int s, int stype, unsigned char *context); +static int rev_body(char *hostname, int s, int stype, unsigned char *context); static void close_accept_socket(void); static void sv_usage(void); static int init_ssl_connection(SSL *s); static void print_stats(BIO *bp, SSL_CTX *ctx); static int generate_session_id(const SSL *ssl, unsigned char *id, unsigned int *id_len); +static void init_session_cache_ctx(SSL_CTX *sctx); +static void free_sessions(void); #ifndef OPENSSL_NO_DH static DH *load_dh_param(const char *dhfile); static DH *get_dh512(void); @@ -268,16 +271,16 @@ static int accept_socket = -1; #undef PROG #define PROG s_server_main -extern int verify_depth, verify_return_error; +extern int verify_depth, verify_return_error, verify_quiet; -static char *cipher = NULL; static int s_server_verify = SSL_VERIFY_NONE; static int s_server_session_id_context = 1; /* anything will do */ -static const char *s_cert_file = TEST_CERT, *s_key_file = NULL; +static const char *s_cert_file = TEST_CERT, *s_key_file = + NULL, *s_chain_file = NULL; #ifndef OPENSSL_NO_TLSEXT static const char *s_cert_file2 = TEST_CERT2, *s_key_file2 = NULL; #endif -static char *s_dcert_file = NULL, *s_dkey_file = NULL; +static char *s_dcert_file = NULL, *s_dkey_file = NULL, *s_dchain_file = NULL; #ifdef FIONBIO static int s_nbio = 0; #endif @@ -290,14 +293,18 @@ static SSL_CTX *ctx2 = NULL; static int www = 0; static BIO *bio_s_out = NULL; +static BIO *bio_s_msg = NULL; static int s_debug = 0; #ifndef OPENSSL_NO_TLSEXT static int s_tlsextdebug = 0; static int s_tlsextstatus = 0; static int cert_status_cb(SSL *s, void *arg); #endif +static int no_resume_ephemeral = 0; static int s_msg = 0; static int s_quiet = 0; +static int s_ign_eof = 0; +static int s_brief = 0; static char *keymatexportlabel = NULL; static int keymatexportlen = 20; @@ -314,6 +321,12 @@ static long socket_mtu; static int cert_chain = 0; #endif +#ifndef OPENSSL_NO_TLSEXT +static BIO *serverinfo_in = NULL; +static const char *s_serverinfo_file = NULL; + +#endif + #ifndef OPENSSL_NO_PSK static char *psk_identity = "Client_identity"; char *psk_key = NULL; /* by default PSK is not used */ @@ -429,12 +442,13 @@ static int MS_CALLBACK ssl_srp_server_param_cb(SSL *s, int *ad, void *arg) static void s_server_init(void) { accept_socket = -1; - cipher = NULL; s_server_verify = SSL_VERIFY_NONE; s_dcert_file = NULL; s_dkey_file = NULL; + s_dchain_file = NULL; s_cert_file = TEST_CERT; s_key_file = NULL; + s_chain_file = NULL; # ifndef OPENSSL_NO_TLSEXT s_cert_file2 = TEST_CERT2; s_key_file2 = NULL; @@ -451,6 +465,7 @@ static void s_server_init(void) s_debug = 0; s_msg = 0; s_quiet = 0; + s_brief = 0; hack = 0; # ifndef OPENSSL_NO_ENGINE engine_id = NULL; @@ -464,6 +479,12 @@ static void sv_usage(void) BIO_printf(bio_err, "\n"); BIO_printf(bio_err, " -accept arg - port to accept on (default is %d)\n", PORT); + BIO_printf(bio_err, + " -verify_host host - check peer certificate matches \"host\"\n"); + BIO_printf(bio_err, + " -verify_email email - check peer certificate matches \"email\"\n"); + BIO_printf(bio_err, + " -verify_ip ipaddr - check peer certificate matches \"ipaddr\"\n"); BIO_printf(bio_err, " -context arg - set session ID context\n"); BIO_printf(bio_err, " -verify arg - turn on peer certificate verification\n"); @@ -473,6 +494,16 @@ static void sv_usage(void) " -verify_return_error - return verification errors\n"); BIO_printf(bio_err, " -cert arg - certificate file to use\n"); BIO_printf(bio_err, " (default is %s)\n", TEST_CERT); +#ifndef OPENSSL_NO_TLSEXT + BIO_printf(bio_err, + " -serverinfo arg - PEM serverinfo file for certificate\n"); + BIO_printf(bio_err, + " -auth - send and receive RFC 5878 TLS auth extensions and supplemental data\n"); + BIO_printf(bio_err, + " -auth_require_reneg - Do not send TLS auth extensions until renegotiation\n"); +#endif + BIO_printf(bio_err, + " -no_resumption_on_reneg - set SSL_OP_NO_SESSION_RESUMPTION_ON_RENEGOTIATION flag\n"); BIO_printf(bio_err, " -crl_check - check the peer certificate has not been revoked by its CA.\n" " The CRL(s) are appended to the certificate file\n"); @@ -549,6 +580,7 @@ static void sv_usage(void) BIO_printf(bio_err, " -tls1_1 - Just talk TLSv1.1\n"); BIO_printf(bio_err, " -tls1 - Just talk TLSv1\n"); BIO_printf(bio_err, " -dtls1 - Just talk DTLSv1\n"); + BIO_printf(bio_err, " -dtls1_2 - Just talk DTLSv1.2\n"); BIO_printf(bio_err, " -timeout - Enable timeouts\n"); BIO_printf(bio_err, " -mtu - Set link layer MTU\n"); BIO_printf(bio_err, " -chain - Read a certificate chain\n"); @@ -608,6 +640,8 @@ static void sv_usage(void) BIO_printf(bio_err, " -use_srtp profiles - Offer SRTP key management with a colon-separated profile list\n"); # endif + BIO_printf(bio_err, + " -alpn arg - set the advertised protocols for the ALPN extension (comma-separated list)\n"); #endif BIO_printf(bio_err, " -keymatexport label - Export keying material using label\n"); @@ -968,12 +1002,53 @@ static int next_proto_cb(SSL *s, const unsigned char **data, } # endif /* ndef OPENSSL_NO_NEXTPROTONEG */ -#endif +/* This the context that we pass to alpn_cb */ +typedef struct tlsextalpnctx_st { + unsigned char *data; + unsigned short len; +} tlsextalpnctx; + +static int alpn_cb(SSL *s, const unsigned char **out, unsigned char *outlen, + const unsigned char *in, unsigned int inlen, void *arg) +{ + tlsextalpnctx *alpn_ctx = arg; + + if (!s_quiet) { + /* We can assume that |in| is syntactically valid. */ + unsigned i; + BIO_printf(bio_s_out, "ALPN protocols advertised by the client: "); + for (i = 0; i < inlen;) { + if (i) + BIO_write(bio_s_out, ", ", 2); + BIO_write(bio_s_out, &in[i + 1], in[i]); + i += in[i] + 1; + } + BIO_write(bio_s_out, "\n", 1); + } + + if (SSL_select_next_proto + ((unsigned char **)out, outlen, alpn_ctx->data, alpn_ctx->len, in, + inlen) != OPENSSL_NPN_NEGOTIATED) { + return SSL_TLSEXT_ERR_NOACK; + } + + if (!s_quiet) { + BIO_printf(bio_s_out, "ALPN protocols selected: "); + BIO_write(bio_s_out, *out, *outlen); + BIO_write(bio_s_out, "\n", 1); + } + + return SSL_TLSEXT_ERR_OK; +} +#endif /* ndef OPENSSL_NO_TLSEXT */ int MAIN(int, char **); #ifndef OPENSSL_NO_JPAKE static char *jpake_secret = NULL; +# define no_jpake !jpake_secret +#else +# define no_jpake 1 #endif #ifndef OPENSSL_NO_SRP static srpsrvparm srp_callback_parm; @@ -988,18 +1063,14 @@ int MAIN(int argc, char *argv[]) int badarg = 0; short port = PORT; char *CApath = NULL, *CAfile = NULL; + char *chCApath = NULL, *chCAfile = NULL; + char *vfyCApath = NULL, *vfyCAfile = NULL; unsigned char *context = NULL; char *dhfile = NULL; -#ifndef OPENSSL_NO_ECDH - char *named_curve = NULL; -#endif - int badop = 0, bugs = 0; + int badop = 0; int ret = 1; - int off = 0; - int no_tmp_rsa = 0, no_dhe = 0, nocert = 0; -#ifndef OPENSSL_NO_ECDH - int no_ecdhe; -#endif + int build_chain = 0; + int no_tmp_rsa = 0, no_dhe = 0, no_ecdhe = 0, nocert = 0; int state = 0; const SSL_METHOD *meth = NULL; int socket_type = SOCK_STREAM; @@ -1010,16 +1081,20 @@ int MAIN(int argc, char *argv[]) char *dpassarg = NULL, *dpass = NULL; int s_dcert_format = FORMAT_PEM, s_dkey_format = FORMAT_PEM; X509 *s_cert = NULL, *s_dcert = NULL; + STACK_OF(X509) *s_chain = NULL, *s_dchain = NULL; EVP_PKEY *s_key = NULL, *s_dkey = NULL; - int no_cache = 0; + int no_cache = 0, ext_cache = 0; + int rev = 0, naccept = -1; #ifndef OPENSSL_NO_TLSEXT EVP_PKEY *s_key2 = NULL; X509 *s_cert2 = NULL; tlsextctx tlsextcbp = { NULL, NULL, SSL_TLSEXT_ERR_ALERT_WARNING }; # ifndef OPENSSL_NO_NEXTPROTONEG const char *next_proto_neg_in = NULL; - tlsextnextprotoctx next_proto; + tlsextnextprotoctx next_proto = { NULL, 0 }; # endif + const char *alpn_in = NULL; + tlsextalpnctx alpn_ctx = { NULL, 0 }; #endif #ifndef OPENSSL_NO_PSK /* by default do not send a PSK identity hint */ @@ -1029,6 +1104,15 @@ int MAIN(int argc, char *argv[]) char *srpuserseed = NULL; char *srp_verifier_file = NULL; #endif + SSL_EXCERT *exc = NULL; + SSL_CONF_CTX *cctx = NULL; + STACK_OF(OPENSSL_STRING) *ssl_args = NULL; + + char *crl_file = NULL; + int crl_format = FORMAT_PEM; + int crl_download = 0; + STACK_OF(X509_CRL) *crls = NULL; + meth = SSLv23_server_method(); local_argc = argc; @@ -1045,6 +1129,12 @@ int MAIN(int argc, char *argv[]) if (!load_config(bio_err, NULL)) goto end; + cctx = SSL_CONF_CTX_new(); + if (!cctx) + goto end; + SSL_CONF_CTX_set_flags(cctx, SSL_CONF_FLAG_SERVER); + SSL_CONF_CTX_set_flags(cctx, SSL_CONF_FLAG_CMDLINE); + verify_depth = 0; #ifdef FIONBIO s_nbio = 0; @@ -1060,12 +1150,21 @@ int MAIN(int argc, char *argv[]) goto bad; if (!extract_port(*(++argv), &port)) goto bad; + } else if (strcmp(*argv, "-naccept") == 0) { + if (--argc < 1) + goto bad; + naccept = atol(*(++argv)); + if (naccept <= 0) { + BIO_printf(bio_err, "bad accept value %s\n", *argv); + goto bad; + } } else if (strcmp(*argv, "-verify") == 0) { s_server_verify = SSL_VERIFY_PEER | SSL_VERIFY_CLIENT_ONCE; if (--argc < 1) goto bad; verify_depth = atoi(*(++argv)); - BIO_printf(bio_err, "verify depth is %d\n", verify_depth); + if (!s_quiet) + BIO_printf(bio_err, "verify depth is %d\n", verify_depth); } else if (strcmp(*argv, "-Verify") == 0) { s_server_verify = SSL_VERIFY_PEER | SSL_VERIFY_FAIL_IF_NO_PEER_CERT | @@ -1073,9 +1172,10 @@ int MAIN(int argc, char *argv[]) if (--argc < 1) goto bad; verify_depth = atoi(*(++argv)); - BIO_printf(bio_err, - "verify depth is %d, must return a certificate\n", - verify_depth); + if (!s_quiet) + BIO_printf(bio_err, + "verify depth is %d, must return a certificate\n", + verify_depth); } else if (strcmp(*argv, "-context") == 0) { if (--argc < 1) goto bad; @@ -1084,7 +1184,20 @@ int MAIN(int argc, char *argv[]) if (--argc < 1) goto bad; s_cert_file = *(++argv); - } else if (strcmp(*argv, "-certform") == 0) { + } else if (strcmp(*argv, "-CRL") == 0) { + if (--argc < 1) + goto bad; + crl_file = *(++argv); + } else if (strcmp(*argv, "-crl_download") == 0) + crl_download = 1; +#ifndef OPENSSL_NO_TLSEXT + else if (strcmp(*argv, "-serverinfo") == 0) { + if (--argc < 1) + goto bad; + s_serverinfo_file = *(++argv); + } +#endif + else if (strcmp(*argv, "-certform") == 0) { if (--argc < 1) goto bad; s_cert_format = str2fmt(*(++argv)); @@ -1100,19 +1213,15 @@ int MAIN(int argc, char *argv[]) if (--argc < 1) goto bad; passarg = *(++argv); - } else if (strcmp(*argv, "-dhparam") == 0) { + } else if (strcmp(*argv, "-cert_chain") == 0) { if (--argc < 1) goto bad; - dhfile = *(++argv); - } -#ifndef OPENSSL_NO_ECDH - else if (strcmp(*argv, "-named_curve") == 0) { + s_chain_file = *(++argv); + } else if (strcmp(*argv, "-dhparam") == 0) { if (--argc < 1) goto bad; - named_curve = *(++argv); - } -#endif - else if (strcmp(*argv, "-dcertform") == 0) { + dhfile = *(++argv); + } else if (strcmp(*argv, "-dcertform") == 0) { if (--argc < 1) goto bad; s_dcert_format = str2fmt(*(++argv)); @@ -1132,32 +1241,62 @@ int MAIN(int argc, char *argv[]) if (--argc < 1) goto bad; s_dkey_file = *(++argv); + } else if (strcmp(*argv, "-dcert_chain") == 0) { + if (--argc < 1) + goto bad; + s_dchain_file = *(++argv); } else if (strcmp(*argv, "-nocert") == 0) { nocert = 1; } else if (strcmp(*argv, "-CApath") == 0) { if (--argc < 1) goto bad; CApath = *(++argv); + } else if (strcmp(*argv, "-chainCApath") == 0) { + if (--argc < 1) + goto bad; + chCApath = *(++argv); + } else if (strcmp(*argv, "-verifyCApath") == 0) { + if (--argc < 1) + goto bad; + vfyCApath = *(++argv); } else if (strcmp(*argv, "-no_cache") == 0) no_cache = 1; - else if (args_verify(&argv, &argc, &badarg, bio_err, &vpm)) { + else if (strcmp(*argv, "-ext_cache") == 0) + ext_cache = 1; + else if (strcmp(*argv, "-CRLform") == 0) { + if (--argc < 1) + goto bad; + crl_format = str2fmt(*(++argv)); + } else if (args_verify(&argv, &argc, &badarg, bio_err, &vpm)) { + if (badarg) + goto bad; + continue; + } else if (args_excert(&argv, &argc, &badarg, bio_err, &exc)) { + if (badarg) + goto bad; + continue; + } else if (args_ssl(&argv, &argc, cctx, &badarg, bio_err, &ssl_args)) { if (badarg) goto bad; continue; } else if (strcmp(*argv, "-verify_return_error") == 0) verify_return_error = 1; - else if (strcmp(*argv, "-serverpref") == 0) { - off |= SSL_OP_CIPHER_SERVER_PREFERENCE; - } else if (strcmp(*argv, "-legacy_renegotiation") == 0) - off |= SSL_OP_ALLOW_UNSAFE_LEGACY_RENEGOTIATION; - else if (strcmp(*argv, "-cipher") == 0) { + else if (strcmp(*argv, "-verify_quiet") == 0) + verify_quiet = 1; + else if (strcmp(*argv, "-build_chain") == 0) + build_chain = 1; + else if (strcmp(*argv, "-CAfile") == 0) { if (--argc < 1) goto bad; - cipher = *(++argv); - } else if (strcmp(*argv, "-CAfile") == 0) { + CAfile = *(++argv); + } else if (strcmp(*argv, "-chainCAfile") == 0) { if (--argc < 1) goto bad; - CAfile = *(++argv); + chCAfile = *(++argv); + } else if (strcmp(*argv, "-verifyCAfile") == 0) { + if (--argc < 1) + goto bad; + vfyCAfile = *(++argv); } #ifdef FIONBIO else if (strcmp(*argv, "-nbio") == 0) { @@ -1169,7 +1308,11 @@ int MAIN(int argc, char *argv[]) s_nbio = 1; #endif s_nbio_test = 1; - } else if (strcmp(*argv, "-debug") == 0) { + } else if (strcmp(*argv, "-ign_eof") == 0) + s_ign_eof = 1; + else if (strcmp(*argv, "-no_ign_eof") == 0) + s_ign_eof = 0; + else if (strcmp(*argv, "-debug") == 0) { s_debug = 1; } #ifndef OPENSSL_NO_TLSEXT @@ -1200,7 +1343,17 @@ int MAIN(int argc, char *argv[]) #endif else if (strcmp(*argv, "-msg") == 0) { s_msg = 1; - } else if (strcmp(*argv, "-hack") == 0) { + } else if (strcmp(*argv, "-msgfile") == 0) { + if (--argc < 1) + goto bad; + bio_s_msg = BIO_new_file(*(++argv), "w"); + } +#ifndef OPENSSL_NO_SSL_TRACE + else if (strcmp(*argv, "-trace") == 0) { + s_msg = 2; + } +#endif + else if (strcmp(*argv, "-hack") == 0) { hack = 1; } else if (strcmp(*argv, "-state") == 0) { state = 1; @@ -1208,18 +1361,19 @@ int MAIN(int argc, char *argv[]) s_crlf = 1; } else if (strcmp(*argv, "-quiet") == 0) { s_quiet = 1; - } else if (strcmp(*argv, "-bugs") == 0) { - bugs = 1; + } else if (strcmp(*argv, "-brief") == 0) { + s_quiet = 1; + s_brief = 1; + verify_quiet = 1; } else if (strcmp(*argv, "-no_tmp_rsa") == 0) { no_tmp_rsa = 1; } else if (strcmp(*argv, "-no_dhe") == 0) { no_dhe = 1; - } -#ifndef OPENSSL_NO_ECDH - else if (strcmp(*argv, "-no_ecdhe") == 0) { + } else if (strcmp(*argv, "-no_ecdhe") == 0) { no_ecdhe = 1; + } else if (strcmp(*argv, "-no_resume_ephemeral") == 0) { + no_resume_ephemeral = 1; } -#endif #ifndef OPENSSL_NO_PSK else if (strcmp(*argv, "-psk_hint") == 0) { if (--argc < 1) @@ -1252,32 +1406,18 @@ int MAIN(int argc, char *argv[]) meth = TLSv1_server_method(); } #endif - else if (strcmp(*argv, "-www") == 0) { + else if (strcmp(*argv, "-rev") == 0) { + rev = 1; + } else if (strcmp(*argv, "-www") == 0) { www = 1; } else if (strcmp(*argv, "-WWW") == 0) { www = 2; } else if (strcmp(*argv, "-HTTP") == 0) { www = 3; - } else if (strcmp(*argv, "-no_ssl2") == 0) { - off |= SSL_OP_NO_SSLv2; - } else if (strcmp(*argv, "-no_ssl3") == 0) { - off |= SSL_OP_NO_SSLv3; - } else if (strcmp(*argv, "-no_tls1") == 0) { - off |= SSL_OP_NO_TLSv1; - } else if (strcmp(*argv, "-no_tls1_1") == 0) { - off |= SSL_OP_NO_TLSv1_1; - } else if (strcmp(*argv, "-no_tls1_2") == 0) { - off |= SSL_OP_NO_TLSv1_2; - } else if (strcmp(*argv, "-no_comp") == 0) { - off |= SSL_OP_NO_COMPRESSION; } -#ifndef OPENSSL_NO_TLSEXT - else if (strcmp(*argv, "-no_ticket") == 0) { - off |= SSL_OP_NO_TICKET; - } -#endif #ifndef OPENSSL_NO_SSL2 else if (strcmp(*argv, "-ssl2") == 0) { + no_ecdhe = 1; meth = SSLv2_server_method(); } #endif @@ -1296,9 +1436,15 @@ int MAIN(int argc, char *argv[]) } #endif #ifndef OPENSSL_NO_DTLS1 - else if (strcmp(*argv, "-dtls1") == 0) { + else if (strcmp(*argv, "-dtls") == 0) { + meth = DTLS_server_method(); + socket_type = SOCK_DGRAM; + } else if (strcmp(*argv, "-dtls1") == 0) { meth = DTLSv1_server_method(); socket_type = SOCK_DGRAM; + } else if (strcmp(*argv, "-dtls1_2") == 0) { + meth = DTLSv1_2_server_method(); + socket_type = SOCK_DGRAM; } else if (strcmp(*argv, "-timeout") == 0) enable_timeouts = 1; else if (strcmp(*argv, "-mtu") == 0) { @@ -1348,6 +1494,11 @@ int MAIN(int argc, char *argv[]) next_proto_neg_in = *(++argv); } # endif + else if (strcmp(*argv, "-alpn") == 0) { + if (--argc < 1) + goto bad; + alpn_in = *(++argv); + } #endif #if !defined(OPENSSL_NO_JPAKE) && !defined(OPENSSL_NO_PSK) else if (strcmp(*argv, "-jpake") == 0) { @@ -1400,11 +1551,6 @@ int MAIN(int argc, char *argv[]) goto end; } psk_identity = "JPAKE"; - if (cipher) { - BIO_printf(bio_err, "JPAKE sets cipher to PSK\n"); - goto end; - } - cipher = "PSK"; } #endif @@ -1427,6 +1573,9 @@ int MAIN(int argc, char *argv[]) s_key_file2 = s_cert_file2; #endif + if (!load_excert(&exc, bio_err)) + goto end; + if (nocert == 0) { s_key = load_key(bio_err, s_key_file, s_key_format, 0, pass, e, "server certificate private key file"); @@ -1442,6 +1591,12 @@ int MAIN(int argc, char *argv[]) ERR_print_errors(bio_err); goto end; } + if (s_chain_file) { + s_chain = load_certs(bio_err, s_chain_file, FORMAT_PEM, + NULL, e, "server certificate chain"); + if (!s_chain) + goto end; + } #ifndef OPENSSL_NO_TLSEXT if (tlsextcbp.servername) { s_key2 = load_key(bio_err, s_key_file2, s_key_format, 0, pass, e, @@ -1459,9 +1614,10 @@ int MAIN(int argc, char *argv[]) goto end; } } -#endif +#endif /* OPENSSL_NO_TLSEXT */ } -#if !defined(OPENSSL_NO_TLSEXT) && !defined(OPENSSL_NO_NEXTPROTONEG) +#if !defined(OPENSSL_NO_TLSEXT) +# if !defined(OPENSSL_NO_NEXTPROTONEG) if (next_proto_neg_in) { unsigned short len; next_proto.data = next_protos_parse(&len, next_proto_neg_in); @@ -1471,8 +1627,34 @@ int MAIN(int argc, char *argv[]) } else { next_proto.data = NULL; } +# endif + alpn_ctx.data = NULL; + if (alpn_in) { + unsigned short len; + alpn_ctx.data = next_protos_parse(&len, alpn_in); + if (alpn_ctx.data == NULL) + goto end; + alpn_ctx.len = len; + } #endif + if (crl_file) { + X509_CRL *crl; + crl = load_crl(crl_file, crl_format); + if (!crl) { + BIO_puts(bio_err, "Error loading CRL\n"); + ERR_print_errors(bio_err); + goto end; + } + crls = sk_X509_CRL_new_null(); + if (!crls || !sk_X509_CRL_push(crls, crl)) { + BIO_puts(bio_err, "Error adding CRL\n"); + ERR_print_errors(bio_err); + X509_CRL_free(crl); + goto end; + } + } + if (s_dcert_file) { if (s_dkey_file == NULL) @@ -1492,6 +1674,12 @@ int MAIN(int argc, char *argv[]) ERR_print_errors(bio_err); goto end; } + if (s_dchain_file) { + s_dchain = load_certs(bio_err, s_dchain_file, FORMAT_PEM, + NULL, e, "second server certificate chain"); + if (!s_dchain) + goto end; + } } @@ -1505,8 +1693,10 @@ int MAIN(int argc, char *argv[]) app_RAND_load_files(inrand)); if (bio_s_out == NULL) { - if (s_quiet && !s_debug && !s_msg) { + if (s_quiet && !s_debug) { bio_s_out = BIO_new(BIO_s_null()); + if (s_msg && !bio_s_msg) + bio_s_msg = BIO_new_fp(stdout, BIO_NOCLOSE); } else { if (bio_s_out == NULL) bio_s_out = BIO_new_fp(stdout, BIO_NOCLOSE); @@ -1546,16 +1736,17 @@ int MAIN(int argc, char *argv[]) BIO_printf(bio_err, "id_prefix '%s' set.\n", session_id_prefix); } SSL_CTX_set_quiet_shutdown(ctx, 1); - if (bugs) - SSL_CTX_set_options(ctx, SSL_OP_ALL); if (hack) SSL_CTX_set_options(ctx, SSL_OP_NETSCAPE_DEMO_CIPHER_CHANGE_BUG); - SSL_CTX_set_options(ctx, off); + if (exc) + ssl_ctx_set_excert(ctx, exc); if (state) SSL_CTX_set_info_callback(ctx, apps_ssl_info_callback); if (no_cache) SSL_CTX_set_session_cache_mode(ctx, SSL_SESS_CACHE_OFF); + else if (ext_cache) + init_session_cache_ctx(ctx); else SSL_CTX_sess_set_cache_size(ctx, 128); @@ -1586,6 +1777,17 @@ int MAIN(int argc, char *argv[]) if (vpm) SSL_CTX_set1_param(ctx, vpm); + ssl_ctx_add_crls(ctx, crls, 0); + + if (!args_ssl_call(ctx, bio_err, cctx, ssl_args, no_ecdhe, no_jpake)) + goto end; + + if (!ssl_load_stores(ctx, vfyCApath, vfyCAfile, chCApath, chCAfile, + crls, crl_download)) { + BIO_printf(bio_err, "Error loading store locations\n"); + ERR_print_errors(bio_err); + goto end; + } #ifndef OPENSSL_NO_TLSEXT if (s_cert2) { ctx2 = SSL_CTX_new(meth); @@ -1613,17 +1815,18 @@ int MAIN(int argc, char *argv[]) BIO_printf(bio_err, "id_prefix '%s' set.\n", session_id_prefix); } SSL_CTX_set_quiet_shutdown(ctx2, 1); - if (bugs) - SSL_CTX_set_options(ctx2, SSL_OP_ALL); if (hack) SSL_CTX_set_options(ctx2, SSL_OP_NETSCAPE_DEMO_CIPHER_CHANGE_BUG); - SSL_CTX_set_options(ctx2, off); + if (exc) + ssl_ctx_set_excert(ctx2, exc); if (state) SSL_CTX_set_info_callback(ctx2, apps_ssl_info_callback); if (no_cache) SSL_CTX_set_session_cache_mode(ctx2, SSL_SESS_CACHE_OFF); + else if (ext_cache) + init_session_cache_ctx(ctx2); else SSL_CTX_sess_set_cache_size(ctx2, 128); @@ -1633,12 +1836,20 @@ int MAIN(int argc, char *argv[]) } if (vpm) SSL_CTX_set1_param(ctx2, vpm); + + ssl_ctx_add_crls(ctx2, crls, 0); + + if (!args_ssl_call(ctx2, bio_err, cctx, ssl_args, no_ecdhe, no_jpake)) + goto end; + } # ifndef OPENSSL_NO_NEXTPROTONEG if (next_proto.data) SSL_CTX_set_next_protos_advertised_cb(ctx, next_proto_cb, &next_proto); # endif + if (alpn_ctx.data) + SSL_CTX_set_alpn_select_cb(ctx, alpn_cb, &alpn_ctx); #endif #ifndef OPENSSL_NO_DH @@ -1678,54 +1889,21 @@ int MAIN(int argc, char *argv[]) } #endif -#ifndef OPENSSL_NO_ECDH - if (!no_ecdhe) { - EC_KEY *ecdh = NULL; - - if (named_curve) { - int nid = OBJ_sn2nid(named_curve); - - if (nid == 0) { - BIO_printf(bio_err, "unknown curve name (%s)\n", named_curve); - goto end; - } - ecdh = EC_KEY_new_by_curve_name(nid); - if (ecdh == NULL) { - BIO_printf(bio_err, "unable to create curve (%s)\n", - named_curve); - goto end; - } - } - - if (ecdh != NULL) { - BIO_printf(bio_s_out, "Setting temp ECDH parameters\n"); - } else { - BIO_printf(bio_s_out, "Using default temp ECDH parameters\n"); - ecdh = EC_KEY_new_by_curve_name(NID_X9_62_prime256v1); - if (ecdh == NULL) { - BIO_printf(bio_err, "unable to create curve (nistp256)\n"); - goto end; - } - } - (void)BIO_flush(bio_s_out); - - SSL_CTX_set_tmp_ecdh(ctx, ecdh); -# ifndef OPENSSL_NO_TLSEXT - if (ctx2) - SSL_CTX_set_tmp_ecdh(ctx2, ecdh); -# endif - EC_KEY_free(ecdh); + if (!set_cert_key_stuff(ctx, s_cert, s_key, s_chain, build_chain)) + goto end; +#ifndef OPENSSL_NO_TLSEXT + if (s_serverinfo_file != NULL + && !SSL_CTX_use_serverinfo_file(ctx, s_serverinfo_file)) { + ERR_print_errors(bio_err); + goto end; } #endif - - if (!set_cert_key_stuff(ctx, s_cert, s_key)) - goto end; #ifndef OPENSSL_NO_TLSEXT - if (ctx2 && !set_cert_key_stuff(ctx2, s_cert2, s_key2)) + if (ctx2 && !set_cert_key_stuff(ctx2, s_cert2, s_key2, NULL, build_chain)) goto end; #endif if (s_dcert != NULL) { - if (!set_cert_key_stuff(ctx, s_dcert, s_dkey)) + if (!set_cert_key_stuff(ctx, s_dcert, s_dkey, s_dchain, build_chain)) goto end; } #ifndef OPENSSL_NO_RSA @@ -1784,20 +1962,6 @@ int MAIN(int argc, char *argv[]) } #endif - if (cipher != NULL) { - if (!SSL_CTX_set_cipher_list(ctx, cipher)) { - BIO_printf(bio_err, "error setting cipher list\n"); - ERR_print_errors(bio_err); - goto end; - } -#ifndef OPENSSL_NO_TLSEXT - if (ctx2 && !SSL_CTX_set_cipher_list(ctx2, cipher)) { - BIO_printf(bio_err, "error setting cipher list\n"); - ERR_print_errors(bio_err); - goto end; - } -#endif - } SSL_CTX_set_verify(ctx, s_server_verify, verify_callback); SSL_CTX_set_session_id_context(ctx, (void *)&s_server_session_id_context, sizeof s_server_session_id_context); @@ -1849,10 +2013,15 @@ int MAIN(int argc, char *argv[]) BIO_printf(bio_s_out, "ACCEPT\n"); (void)BIO_flush(bio_s_out); - if (www) - do_server(port, socket_type, &accept_socket, www_body, context); + if (rev) + do_server(port, socket_type, &accept_socket, rev_body, context, + naccept); + else if (www) + do_server(port, socket_type, &accept_socket, www_body, context, + naccept); else - do_server(port, socket_type, &accept_socket, sv_body, context); + do_server(port, socket_type, &accept_socket, sv_body, context, + naccept); print_stats(bio_s_out, ctx); ret = 0; end: @@ -1860,18 +2029,25 @@ int MAIN(int argc, char *argv[]) SSL_CTX_free(ctx); if (s_cert) X509_free(s_cert); + if (crls) + sk_X509_CRL_pop_free(crls, X509_CRL_free); if (s_dcert) X509_free(s_dcert); if (s_key) EVP_PKEY_free(s_key); if (s_dkey) EVP_PKEY_free(s_dkey); + if (s_chain) + sk_X509_pop_free(s_chain, X509_free); + if (s_dchain) + sk_X509_pop_free(s_dchain, X509_free); if (pass) OPENSSL_free(pass); if (dpass) OPENSSL_free(dpass); if (vpm) X509_VERIFY_PARAM_free(vpm); + free_sessions(); #ifndef OPENSSL_NO_TLSEXT if (tlscstatp.host) OPENSSL_free(tlscstatp.host); @@ -1885,11 +2061,32 @@ int MAIN(int argc, char *argv[]) X509_free(s_cert2); if (s_key2) EVP_PKEY_free(s_key2); + if (serverinfo_in != NULL) + BIO_free(serverinfo_in); +# ifndef OPENSSL_NO_NEXTPROTONEG + if (next_proto.data) + OPENSSL_free(next_proto.data); +# endif + if (alpn_ctx.data) + OPENSSL_free(alpn_ctx.data); +#endif + ssl_excert_free(exc); + if (ssl_args) + sk_OPENSSL_STRING_free(ssl_args); + if (cctx) + SSL_CONF_CTX_free(cctx); +#ifndef OPENSSL_NO_JPAKE + if (jpake_secret && psk_key) + OPENSSL_free(psk_key); #endif if (bio_s_out != NULL) { BIO_free(bio_s_out); bio_s_out = NULL; } + if (bio_s_msg != NULL) { + BIO_free(bio_s_msg); + bio_s_msg = NULL; + } apps_shutdown(); OPENSSL_EXIT(ret); } @@ -1922,7 +2119,7 @@ static void print_stats(BIO *bio, SSL_CTX *ssl_ctx) SSL_CTX_sess_get_cache_size(ssl_ctx)); } -static int sv_body(char *hostname, int s, unsigned char *context) +static int sv_body(char *hostname, int s, int stype, unsigned char *context) { char *buf = NULL; fd_set readfds; @@ -1986,7 +2183,7 @@ static int sv_body(char *hostname, int s, unsigned char *context) # endif #endif - if (SSL_version(con) == DTLS1_VERSION) { + if (stype == SOCK_DGRAM) { sbio = BIO_new_dgram(s, BIO_NOCLOSE); @@ -2045,8 +2242,13 @@ static int sv_body(char *hostname, int s, unsigned char *context) BIO_set_callback_arg(SSL_get_rbio(con), (char *)bio_s_out); } if (s_msg) { - SSL_set_msg_callback(con, msg_cb); - SSL_set_msg_callback_arg(con, bio_s_out); +#ifndef OPENSSL_NO_SSL_TRACE + if (s_msg == 2) + SSL_set_msg_callback(con, SSL_trace); + else +#endif + SSL_set_msg_callback(con, msg_cb); + SSL_set_msg_callback_arg(con, bio_s_msg ? bio_s_msg : bio_s_out); } #ifndef OPENSSL_NO_TLSEXT if (s_tlsextdebug) { @@ -2144,7 +2346,7 @@ static int sv_body(char *hostname, int s, unsigned char *context) assert(lf_num == 0); } else i = raw_read_stdin(buf, bufsize); - if (!s_quiet) { + if (!s_quiet && !s_brief) { if ((i <= 0) || (buf[0] == 'Q')) { BIO_printf(bio_s_out, "DONE\n"); SHUTDOWN(s); @@ -2357,6 +2559,16 @@ static int init_ssl_connection(SSL *con) unsigned char *exportedkeymat; i = SSL_accept(con); +#ifdef CERT_CB_TEST_RETRY + { + while (i <= 0 && SSL_get_error(con, i) == SSL_ERROR_WANT_X509_LOOKUP + && SSL_state(con) == SSL3_ST_SR_CLNT_HELLO_C) { + fprintf(stderr, + "LOOKUP from certificate callback during accept\n"); + i = SSL_accept(con); + } + } +#endif #ifndef OPENSSL_NO_SRP while (i <= 0 && SSL_get_error(con, i) == SSL_ERROR_WANT_X509_LOOKUP) { BIO_printf(bio_s_out, "LOOKUP during accept %s\n", @@ -2372,6 +2584,7 @@ static int init_ssl_connection(SSL *con) i = SSL_accept(con); } #endif + if (i <= 0) { if (BIO_sock_should_retry(i)) { BIO_printf(bio_s_out, "DELAY\n"); @@ -2383,11 +2596,15 @@ static int init_ssl_connection(SSL *con) if (verify_error != X509_V_OK) { BIO_printf(bio_err, "verify error:%s\n", X509_verify_cert_error_string(verify_error)); - } else - ERR_print_errors(bio_err); + } + /* Always print any error messages */ + ERR_print_errors(bio_err); return (0); } + if (s_brief) + print_ssl_summary(bio_err, con); + PEM_write_bio_SSL_SESSION(bio_s_out, SSL_get_session(con)); peer = SSL_get_peer_certificate(con); @@ -2404,6 +2621,11 @@ static int init_ssl_connection(SSL *con) if (SSL_get_shared_ciphers(con, buf, sizeof buf) != NULL) BIO_printf(bio_s_out, "Shared ciphers:%s\n", buf); str = SSL_CIPHER_get_name(SSL_get_current_cipher(con)); + ssl_print_sigalgs(bio_s_out, con); +#ifndef OPENSSL_NO_EC + ssl_print_point_formats(bio_s_out, con); + ssl_print_curves(bio_s_out, con, 0); +#endif BIO_printf(bio_s_out, "CIPHER is %s\n", (str != NULL) ? str : "(NONE)"); #if !defined(OPENSSL_NO_TLSEXT) && !defined(OPENSSL_NO_NEXTPROTONEG) @@ -2503,7 +2725,7 @@ static int load_CA(SSL_CTX *ctx, char *file) } #endif -static int www_body(char *hostname, int s, unsigned char *context) +static int www_body(char *hostname, int s, int stype, unsigned char *context) { char *buf = NULL; int ret = 1; @@ -2578,8 +2800,13 @@ static int www_body(char *hostname, int s, unsigned char *context) BIO_set_callback_arg(SSL_get_rbio(con), (char *)bio_s_out); } if (s_msg) { - SSL_set_msg_callback(con, msg_cb); - SSL_set_msg_callback_arg(con, bio_s_out); +#ifndef OPENSSL_NO_SSL_TRACE + if (s_msg == 2) + SSL_set_msg_callback(con, SSL_trace); + else +#endif + SSL_set_msg_callback(con, msg_cb); + SSL_set_msg_callback_arg(con, bio_s_msg ? bio_s_msg : bio_s_out); } for (;;) { @@ -2698,6 +2925,10 @@ static int www_body(char *hostname, int s, unsigned char *context) } BIO_puts(io, "\n"); } + ssl_print_sigalgs(io, con); +#ifndef OPENSSL_NO_EC + ssl_print_curves(io, con, 0); +#endif BIO_printf(io, (SSL_cache_hit(con) ? "---\nReused, " : "---\nNew, ")); c = SSL_get_current_cipher(con); @@ -2880,6 +3111,140 @@ static int www_body(char *hostname, int s, unsigned char *context) return (ret); } +static int rev_body(char *hostname, int s, int stype, unsigned char *context) +{ + char *buf = NULL; + int i; + int ret = 1; + SSL *con; + BIO *io, *ssl_bio, *sbio; +#ifndef OPENSSL_NO_KRB5 + KSSL_CTX *kctx; +#endif + + buf = OPENSSL_malloc(bufsize); + if (buf == NULL) + return (0); + io = BIO_new(BIO_f_buffer()); + ssl_bio = BIO_new(BIO_f_ssl()); + if ((io == NULL) || (ssl_bio == NULL)) + goto err; + + /* lets make the output buffer a reasonable size */ + if (!BIO_set_write_buffer_size(io, bufsize)) + goto err; + + if ((con = SSL_new(ctx)) == NULL) + goto err; +#ifndef OPENSSL_NO_TLSEXT + if (s_tlsextdebug) { + SSL_set_tlsext_debug_callback(con, tlsext_cb); + SSL_set_tlsext_debug_arg(con, bio_s_out); + } +#endif +#ifndef OPENSSL_NO_KRB5 + if ((kctx = kssl_ctx_new()) != NULL) { + kssl_ctx_setstring(kctx, KSSL_SERVICE, KRB5SVC); + kssl_ctx_setstring(kctx, KSSL_KEYTAB, KRB5KEYTAB); + } +#endif /* OPENSSL_NO_KRB5 */ + if (context) + SSL_set_session_id_context(con, context, strlen((char *)context)); + + sbio = BIO_new_socket(s, BIO_NOCLOSE); + SSL_set_bio(con, sbio, sbio); + SSL_set_accept_state(con); + + BIO_set_ssl(ssl_bio, con, BIO_CLOSE); + BIO_push(io, ssl_bio); +#ifdef CHARSET_EBCDIC + io = BIO_push(BIO_new(BIO_f_ebcdic_filter()), io); +#endif + + if (s_debug) { + SSL_set_debug(con, 1); + BIO_set_callback(SSL_get_rbio(con), bio_dump_callback); + BIO_set_callback_arg(SSL_get_rbio(con), (char *)bio_s_out); + } + if (s_msg) { +#ifndef OPENSSL_NO_SSL_TRACE + if (s_msg == 2) + SSL_set_msg_callback(con, SSL_trace); + else +#endif + SSL_set_msg_callback(con, msg_cb); + SSL_set_msg_callback_arg(con, bio_s_msg ? bio_s_msg : bio_s_out); + } + + for (;;) { + i = BIO_do_handshake(io); + if (i > 0) + break; + if (!BIO_should_retry(io)) { + BIO_puts(bio_err, "CONNECTION FAILURE\n"); + ERR_print_errors(bio_err); + goto end; + } + } + BIO_printf(bio_err, "CONNECTION ESTABLISHED\n"); + print_ssl_summary(bio_err, con); + + for (;;) { + i = BIO_gets(io, buf, bufsize - 1); + if (i < 0) { /* error */ + if (!BIO_should_retry(io)) { + if (!s_quiet) + ERR_print_errors(bio_err); + goto err; + } else { + BIO_printf(bio_s_out, "read R BLOCK\n"); +#if defined(OPENSSL_SYS_NETWARE) + delay(1000); +#elif !defined(OPENSSL_SYS_MSDOS) && !defined(__DJGPP__) + sleep(1); +#endif + continue; + } + } else if (i == 0) { /* end of input */ + ret = 1; + BIO_printf(bio_err, "CONNECTION CLOSED\n"); + goto end; + } else { + char *p = buf + i - 1; + while (i && (*p == '\n' || *p == '\r')) { + p--; + i--; + } + if (!s_ign_eof && i == 5 && !strncmp(buf, "CLOSE", 5)) { + ret = 1; + BIO_printf(bio_err, "CONNECTION CLOSED\n"); + goto end; + } + BUF_reverse((unsigned char *)buf, NULL, i); + buf[i] = '\n'; + BIO_write(io, buf, i + 1); + for (;;) { + i = BIO_flush(io); + if (i > 0) + break; + if (!BIO_should_retry(io)) + goto end; + } + } + } + end: + /* make sure we re-use sessions */ + SSL_set_shutdown(con, SSL_SENT_SHUTDOWN | SSL_RECEIVED_SHUTDOWN); + + err: + + if (buf != NULL) + OPENSSL_free(buf); + if (io != NULL) + BIO_free_all(io); + return (ret); +} + #ifndef OPENSSL_NO_RSA static RSA MS_CALLBACK *tmp_rsa_cb(SSL *s, int is_export, int keylength) { @@ -2934,3 +3299,116 @@ static int generate_session_id(const SSL *ssl, unsigned char *id, return 0; return 1; } + +/* + * By default s_server uses an in-memory cache which caches SSL_SESSION + * structures without any serialisation. This hides some bugs which only + * become apparent in deployed servers. By implementing a basic external + * session cache some issues can be debugged using s_server. + */ + +typedef struct simple_ssl_session_st { + unsigned char *id; + unsigned int idlen; + unsigned char *der; + int derlen; + struct simple_ssl_session_st *next; +} simple_ssl_session; + +static simple_ssl_session *first = NULL; + +static int add_session(SSL *ssl, SSL_SESSION *session) +{ + simple_ssl_session *sess; + unsigned char *p; + + sess = OPENSSL_malloc(sizeof(simple_ssl_session)); + if(!sess) { + BIO_printf(bio_err, "Out of memory adding session to external cache\n"); + return 0; + } + + SSL_SESSION_get_id(session, &sess->idlen); + sess->derlen = i2d_SSL_SESSION(session, NULL); + + sess->id = BUF_memdup(SSL_SESSION_get_id(session, NULL), sess->idlen); + + sess->der = OPENSSL_malloc(sess->derlen); + if(!sess->id || !sess->der) { + BIO_printf(bio_err, "Out of memory adding session to external cache\n"); + + if(sess->id) + OPENSSL_free(sess->id); + if(sess->der) + OPENSSL_free(sess->der); + OPENSSL_free(sess); + return 0; + } + p = sess->der; + i2d_SSL_SESSION(session, &p); + + sess->next = first; + first = sess; + BIO_printf(bio_err, "New session added to external cache\n"); + return 0; +} + +static SSL_SESSION *get_session(SSL *ssl, unsigned char *id, int idlen, + int *do_copy) +{ + simple_ssl_session *sess; + *do_copy = 0; + for (sess = first; sess; sess = sess->next) { + if (idlen == (int)sess->idlen && !memcmp(sess->id, id, idlen)) { + const unsigned char *p = sess->der; + BIO_printf(bio_err, "Lookup session: cache hit\n"); + return d2i_SSL_SESSION(NULL, &p, sess->derlen); + } + } + BIO_printf(bio_err, "Lookup session: cache miss\n"); + return NULL; +} + +static void del_session(SSL_CTX *sctx, SSL_SESSION *session) +{ + simple_ssl_session *sess, *prev = NULL; + const unsigned char *id; + unsigned int idlen; + id = SSL_SESSION_get_id(session, &idlen); + for (sess = first; sess; sess = sess->next) { + if (idlen == sess->idlen && !memcmp(sess->id, id, idlen)) { + if (prev) + prev->next = sess->next; + else + first = sess->next; + OPENSSL_free(sess->id); + OPENSSL_free(sess->der); + OPENSSL_free(sess); + return; + } + prev = sess; + } +} + +static void init_session_cache_ctx(SSL_CTX *sctx) +{ + SSL_CTX_set_session_cache_mode(sctx, + SSL_SESS_CACHE_NO_INTERNAL | + SSL_SESS_CACHE_SERVER); + SSL_CTX_sess_set_new_cb(sctx, add_session); + SSL_CTX_sess_set_get_cb(sctx, get_session); + SSL_CTX_sess_set_remove_cb(sctx, del_session); +} + +static void free_sessions(void) +{ + simple_ssl_session *sess, *tsess; + for (sess = first; sess;) { + OPENSSL_free(sess->id); + OPENSSL_free(sess->der); + tsess = sess; + sess = sess->next; + OPENSSL_free(tsess); + } + first = NULL; +} diff --git a/deps/openssl/openssl/apps/s_socket.c b/deps/openssl/openssl/apps/s_socket.c index 9e5565d16a4b3c..77a7688f8d0c81 100644 --- a/deps/openssl/openssl/apps/s_socket.c +++ b/deps/openssl/openssl/apps/s_socket.c @@ -290,8 +290,9 @@ static int init_client_ip(int *sock, unsigned char ip[4], int port, int type) } int do_server(int port, int type, int *ret, - int (*cb) (char *hostname, int s, unsigned char *context), - unsigned char *context) + int (*cb) (char *hostname, int s, int stype, + unsigned char *context), unsigned char *context, + int naccept) { int sock; char *name = NULL; @@ -313,12 +314,14 @@ int do_server(int port, int type, int *ret, } } else sock = accept_socket; - i = (*cb) (name, sock, context); + i = (*cb) (name, sock, type, context); if (name != NULL) OPENSSL_free(name); if (type == SOCK_STREAM) SHUTDOWN2(sock); - if (i < 0) { + if (naccept != -1) + naccept--; + if (i < 0 || naccept == 0) { SHUTDOWN2(accept_socket); return (i); } diff --git a/deps/openssl/openssl/apps/smime.c b/deps/openssl/openssl/apps/smime.c index 7b351f601cbb55..764509f23f47c3 100644 --- a/deps/openssl/openssl/apps/smime.c +++ b/deps/openssl/openssl/apps/smime.c @@ -632,6 +632,12 @@ int MAIN(int argc, char **argv) p7 = PKCS7_sign(NULL, NULL, other, in, flags); if (!p7) goto end; + if (flags & PKCS7_NOCERTS) { + for (i = 0; i < sk_X509_num(other); i++) { + X509 *x = sk_X509_value(other, i); + PKCS7_add_certificate(p7, x); + } + } } else flags |= PKCS7_REUSE_DIGEST; for (i = 0; i < sk_OPENSSL_STRING_num(sksigners); i++) { diff --git a/deps/openssl/openssl/apps/speed.c b/deps/openssl/openssl/apps/speed.c index 7d9fd8ac9823d7..7b1acc18994d8d 100644 --- a/deps/openssl/openssl/apps/speed.c +++ b/deps/openssl/openssl/apps/speed.c @@ -366,6 +366,8 @@ static void *KDF1_SHA1(const void *in, size_t inlen, void *out, } # endif /* OPENSSL_NO_ECDH */ +static void multiblock_speed(const EVP_CIPHER *evp_cipher); + int MAIN(int, char **); int MAIN(int argc, char **argv) @@ -646,6 +648,7 @@ int MAIN(int argc, char **argv) # ifndef NO_FORK int multi = 0; # endif + int multiblock = 0; # ifndef TIMES usertime = -1; @@ -776,6 +779,9 @@ int MAIN(int argc, char **argv) mr = 1; j--; /* Otherwise, -mr gets confused with an * algorithm. */ + } else if (argc > 0 && !strcmp(*argv, "-mb")) { + multiblock = 1; + j--; } else # ifndef OPENSSL_NO_MD2 if (strcmp(*argv, "md2") == 0) @@ -1941,6 +1947,20 @@ int MAIN(int argc, char **argv) # endif if (doit[D_EVP]) { +# ifdef EVP_CIPH_FLAG_TLS1_1_MULTIBLOCK + if (multiblock && evp_cipher) { + if (! + (EVP_CIPHER_flags(evp_cipher) & + EVP_CIPH_FLAG_TLS1_1_MULTIBLOCK)) { + fprintf(stderr, "%s is not multi-block capable\n", + OBJ_nid2ln(evp_cipher->nid)); + goto end; + } + multiblock_speed(evp_cipher); + mret = 0; + goto end; + } +# endif for (j = 0; j < SIZE_NUM; j++) { if (evp_cipher) { EVP_CIPHER_CTX ctx; @@ -2742,4 +2762,112 @@ static int do_multi(int multi) return 1; } # endif + +static void multiblock_speed(const EVP_CIPHER *evp_cipher) +{ + static int mblengths[] = + { 8 * 1024, 2 * 8 * 1024, 4 * 8 * 1024, 8 * 8 * 1024, 8 * 16 * 1024 }; + int j, count, num = sizeof(lengths) / sizeof(lengths[0]); + const char *alg_name; + unsigned char *inp, *out, no_key[32], no_iv[16]; + EVP_CIPHER_CTX ctx; + double d = 0.0; + + inp = OPENSSL_malloc(mblengths[num - 1]); + out = OPENSSL_malloc(mblengths[num - 1] + 1024); + if(!inp || !out) { + BIO_printf(bio_err,"Out of memory\n"); + goto end; + } + + + EVP_CIPHER_CTX_init(&ctx); + EVP_EncryptInit_ex(&ctx, evp_cipher, NULL, no_key, no_iv); + EVP_CIPHER_CTX_ctrl(&ctx, EVP_CTRL_AEAD_SET_MAC_KEY, sizeof(no_key), + no_key); + alg_name = OBJ_nid2ln(evp_cipher->nid); + + for (j = 0; j < num; j++) { + print_message(alg_name, 0, mblengths[j]); + Time_F(START); + for (count = 0, run = 1; run && count < 0x7fffffff; count++) { + unsigned char aad[13]; + EVP_CTRL_TLS1_1_MULTIBLOCK_PARAM mb_param; + size_t len = mblengths[j]; + int packlen; + + memset(aad, 0, 8); /* avoid uninitialized values */ + aad[8] = 23; /* SSL3_RT_APPLICATION_DATA */ + aad[9] = 3; /* version */ + aad[10] = 2; + aad[11] = 0; /* length */ + aad[12] = 0; + mb_param.out = NULL; + mb_param.inp = aad; + mb_param.len = len; + mb_param.interleave = 8; + + packlen = EVP_CIPHER_CTX_ctrl(&ctx, + EVP_CTRL_TLS1_1_MULTIBLOCK_AAD, + sizeof(mb_param), &mb_param); + + if (packlen > 0) { + mb_param.out = out; + mb_param.inp = inp; + mb_param.len = len; + EVP_CIPHER_CTX_ctrl(&ctx, + EVP_CTRL_TLS1_1_MULTIBLOCK_ENCRYPT, + sizeof(mb_param), &mb_param); + } else { + int pad; + + RAND_bytes(out, 16); + len += 16; + aad[11] = len >> 8; + aad[12] = len; + pad = EVP_CIPHER_CTX_ctrl(&ctx, + EVP_CTRL_AEAD_TLS1_AAD, 13, aad); + EVP_Cipher(&ctx, out, inp, len + pad); + } + } + d = Time_F(STOP); + BIO_printf(bio_err, + mr ? "+R:%d:%s:%f\n" + : "%d %s's in %.2fs\n", count, "evp", d); + results[D_EVP][j] = ((double)count) / d * mblengths[j]; + } + + if (mr) { + fprintf(stdout, "+H"); + for (j = 0; j < num; j++) + fprintf(stdout, ":%d", mblengths[j]); + fprintf(stdout, "\n"); + fprintf(stdout, "+F:%d:%s", D_EVP, alg_name); + for (j = 0; j < num; j++) + fprintf(stdout, ":%.2f", results[D_EVP][j]); + fprintf(stdout, "\n"); + } else { + fprintf(stdout, + "The 'numbers' are in 1000s of bytes per second processed.\n"); + fprintf(stdout, "type "); + for (j = 0; j < num; j++) + fprintf(stdout, "%7d bytes", mblengths[j]); + fprintf(stdout, "\n"); + fprintf(stdout, "%-24s", alg_name); + + for (j = 0; j < num; j++) { + if (results[D_EVP][j] > 10000) + fprintf(stdout, " %11.2fk", results[D_EVP][j] / 1e3); + else + fprintf(stdout, " %11.2f ", results[D_EVP][j]); + } + fprintf(stdout, "\n"); + } + +end: + if(inp) + OPENSSL_free(inp); + if(out) + OPENSSL_free(out); +} #endif diff --git a/deps/openssl/openssl/apps/verify.c b/deps/openssl/openssl/apps/verify.c index 7295c769f304da..b3ba53d97f02ae 100644 --- a/deps/openssl/openssl/apps/verify.c +++ b/deps/openssl/openssl/apps/verify.c @@ -88,6 +88,7 @@ int MAIN(int argc, char **argv) X509_STORE *cert_ctx = NULL; X509_LOOKUP *lookup = NULL; X509_VERIFY_PARAM *vpm = NULL; + int crl_download = 0; #ifndef OPENSSL_NO_ENGINE char *engine = NULL; #endif @@ -136,7 +137,8 @@ int MAIN(int argc, char **argv) if (argc-- < 1) goto end; crlfile = *(++argv); - } + } else if (strcmp(*argv, "-crl_download") == 0) + crl_download = 1; #ifndef OPENSSL_NO_ENGINE else if (strcmp(*argv, "-engine") == 0) { if (--argc < 1) @@ -214,6 +216,9 @@ int MAIN(int argc, char **argv) } ret = 0; + + if (crl_download) + store_setup_crl_download(cert_ctx); if (argc < 1) { if (1 != check(cert_ctx, NULL, untrusted, trusted, crls, e)) ret = -1; diff --git a/deps/openssl/openssl/apps/x509.c b/deps/openssl/openssl/apps/x509.c index 929359b0da0be6..864a60dda2e7ff 100644 --- a/deps/openssl/openssl/apps/x509.c +++ b/deps/openssl/openssl/apps/x509.c @@ -150,6 +150,9 @@ static const char *x509_usage[] = { " -engine e - use engine e, possibly a hardware device.\n", #endif " -certopt arg - various certificate text options\n", + " -checkhost host - check certificate matches \"host\"\n", + " -checkemail email - check certificate matches \"email\"\n", + " -checkip ipaddr - check certificate matches \"ipaddr\"\n", NULL }; @@ -163,6 +166,9 @@ static int x509_certify(X509_STORE *ctx, char *CAfile, const EVP_MD *digest, char *section, ASN1_INTEGER *sno); static int purpose_print(BIO *bio, X509 *cert, X509_PURPOSE *pt); static int reqfile = 0; +#ifdef OPENSSL_SSL_DEBUG_BROKEN_PROTOCOL +static int force_version = 2; +#endif int MAIN(int, char **); @@ -174,15 +180,16 @@ int MAIN(int argc, char **argv) X509 *x = NULL, *xca = NULL; ASN1_OBJECT *objtmp; STACK_OF(OPENSSL_STRING) *sigopts = NULL; - EVP_PKEY *Upkey = NULL, *CApkey = NULL; + EVP_PKEY *Upkey = NULL, *CApkey = NULL, *fkey = NULL; ASN1_INTEGER *sno = NULL; - int i, num, badops = 0; + int i, num, badops = 0, badsig = 0; BIO *out = NULL; BIO *STDout = NULL; STACK_OF(ASN1_OBJECT) *trust = NULL, *reject = NULL; int informat, outformat, keyformat, CAformat, CAkeyformat; char *infile = NULL, *outfile = NULL, *keyfile = NULL, *CAfile = NULL; char *CAkeyfile = NULL, *CAserial = NULL; + char *fkeyfile = NULL; char *alias = NULL; int text = 0, serial = 0, subject = 0, issuer = 0, startdate = 0, enddate = 0; @@ -208,6 +215,9 @@ int MAIN(int argc, char **argv) int need_rand = 0; int checkend = 0, checkoffset = 0; unsigned long nmflag = 0, certflag = 0; + char *checkhost = NULL; + char *checkemail = NULL; + char *checkip = NULL; #ifndef OPENSSL_NO_ENGINE char *engine = NULL; #endif @@ -274,7 +284,15 @@ int MAIN(int argc, char **argv) sigopts = sk_OPENSSL_STRING_new_null(); if (!sigopts || !sk_OPENSSL_STRING_push(sigopts, *(++argv))) goto bad; - } else if (strcmp(*argv, "-days") == 0) { + } +#ifdef OPENSSL_SSL_DEBUG_BROKEN_PROTOCOL + else if (strcmp(*argv, "-force_version") == 0) { + if (--argc < 1) + goto bad; + force_version = atoi(*(++argv)) - 1; + } +#endif + else if (strcmp(*argv, "-days") == 0) { if (--argc < 1) goto bad; days = atoi(*(++argv)); @@ -327,6 +345,10 @@ int MAIN(int argc, char **argv) goto bad; if (!(sno = s2i_ASN1_INTEGER(NULL, *(++argv)))) goto bad; + } else if (strcmp(*argv, "-force_pubkey") == 0) { + if (--argc < 1) + goto bad; + fkeyfile = *(++argv); } else if (strcmp(*argv, "-addtrust") == 0) { if (--argc < 1) goto bad; @@ -424,6 +446,18 @@ int MAIN(int argc, char **argv) goto bad; checkoffset = atoi(*(++argv)); checkend = 1; + } else if (strcmp(*argv, "-checkhost") == 0) { + if (--argc < 1) + goto bad; + checkhost = *(++argv); + } else if (strcmp(*argv, "-checkemail") == 0) { + if (--argc < 1) + goto bad; + checkemail = *(++argv); + } else if (strcmp(*argv, "-checkip") == 0) { + if (--argc < 1) + goto bad; + checkip = *(++argv); } else if (strcmp(*argv, "-noout") == 0) noout = ++num; else if (strcmp(*argv, "-trustout") == 0) @@ -447,6 +481,8 @@ int MAIN(int argc, char **argv) #endif else if (strcmp(*argv, "-ocspid") == 0) ocspid = ++num; + else if (strcmp(*argv, "-badsig") == 0) + badsig = 1; else if ((md_alg = EVP_get_digestbyname(*argv + 1))) { /* ok */ digest = md_alg; @@ -484,6 +520,13 @@ int MAIN(int argc, char **argv) goto end; } + if (fkeyfile) { + fkey = load_pubkey(bio_err, fkeyfile, keyformat, 0, + NULL, e, "Forced key"); + if (fkey == NULL) + goto end; + } + if ((CAkeyfile == NULL) && (CA_flag) && (CAformat == FORMAT_PEM)) { CAkeyfile = CAfile; } else if ((CA_flag) && (CAkeyfile == NULL)) { @@ -605,10 +648,13 @@ int MAIN(int argc, char **argv) X509_gmtime_adj(X509_get_notBefore(x), 0); X509_time_adj_ex(X509_get_notAfter(x), days, 0, NULL); - - pkey = X509_REQ_get_pubkey(req); - X509_set_pubkey(x, pkey); - EVP_PKEY_free(pkey); + if (fkey) + X509_set_pubkey(x, fkey); + else { + pkey = X509_REQ_get_pubkey(req); + X509_set_pubkey(x, pkey); + EVP_PKEY_free(pkey); + } } else x = load_cert(bio_err, infile, informat, NULL, e, "Certificate"); @@ -937,11 +983,16 @@ int MAIN(int argc, char **argv) goto end; } + print_cert_checks(STDout, x, checkhost, checkemail, checkip); + if (noout) { ret = 0; goto end; } + if (badsig) + x->signature->data[x->signature->length - 1] ^= 0x1; + if (outformat == FORMAT_ASN1) i = i2d_X509_bio(out, x); else if (outformat == FORMAT_PEM) { @@ -982,6 +1033,7 @@ int MAIN(int argc, char **argv) X509_free(xca); EVP_PKEY_free(Upkey); EVP_PKEY_free(CApkey); + EVP_PKEY_free(fkey); if (sigopts) sk_OPENSSL_STRING_free(sigopts); X509_REQ_free(rq); @@ -1101,7 +1153,11 @@ static int x509_certify(X509_STORE *ctx, char *CAfile, const EVP_MD *digest, if (conf) { X509V3_CTX ctx2; +#ifdef OPENSSL_SSL_DEBUG_BROKEN_PROTOCOL + X509_set_version(x, force_version); +#else X509_set_version(x, 2); /* version 3 certificate */ +#endif X509V3_set_ctx(&ctx2, xca, x, NULL, NULL, 0); X509V3_set_nconf(&ctx2, conf); if (!X509V3_EXT_add_nconf(conf, &ctx2, section, x)) @@ -1186,7 +1242,11 @@ static int sign(X509 *x, EVP_PKEY *pkey, int days, int clrext, } if (conf) { X509V3_CTX ctx; +#ifdef OPENSSL_SSL_DEBUG_BROKEN_PROTOCOL + X509_set_version(x, force_version); +#else X509_set_version(x, 2); /* version 3 certificate */ +#endif X509V3_set_ctx(&ctx, x, x, NULL, NULL, 0); X509V3_set_nconf(&ctx, conf); if (!X509V3_EXT_add_nconf(conf, &ctx, section, x)) diff --git a/deps/openssl/openssl/config b/deps/openssl/openssl/config index 41fa2a6b8296d9..77f730f093e61b 100755 --- a/deps/openssl/openssl/config +++ b/deps/openssl/openssl/config @@ -587,15 +587,33 @@ case "$GUESSOS" in fi ;; ppc64-*-linux2) + if [ -z "$KERNEL_BITS" ]; then + echo "WARNING! If you wish to build 64-bit library, then you have to" + echo " invoke './Configure linux-ppc64' *manually*." + if [ "$TEST" = "false" -a -t 1 ]; then + echo " You have about 5 seconds to press Ctrl-C to abort." + (trap "stty `stty -g`" 2 0; stty -icanon min 0 time 50; read waste) <&1 + fi + fi + if [ "$KERNEL_BITS" = "64" ]; then + OUT="linux-ppc64" + else + OUT="linux-ppc" + (echo "__LP64__" | gcc -E -x c - 2>/dev/null | grep "^__LP64__" 2>&1 > /dev/null) || options="$options -m32" + fi + ;; + ppc64le-*-linux2) OUT="linux-ppc64le" ;; + ppc-*-linux2) OUT="linux-ppc" ;; + mips64*-*-linux2) echo "WARNING! If you wish to build 64-bit library, then you have to" - echo " invoke './Configure linux-ppc64' *manually*." + echo " invoke './Configure linux64-mips64' *manually*." if [ "$TEST" = "false" -a -t 1 ]; then echo " You have about 5 seconds to press Ctrl-C to abort." (trap "stty `stty -g`" 2 0; stty -icanon min 0 time 50; read waste) <&1 fi - OUT="linux-ppc" + OUT="linux-mips64" ;; - ppc-*-linux2) OUT="linux-ppc" ;; + mips*-*-linux2) OUT="linux-mips32" ;; ppc60x-*-vxworks*) OUT="vxworks-ppc60x" ;; ppcgen-*-vxworks*) OUT="vxworks-ppcgen" ;; pentium-*-vxworks*) OUT="vxworks-pentium" ;; @@ -644,6 +662,7 @@ case "$GUESSOS" in armv[1-3]*-*-linux2) OUT="linux-generic32" ;; armv[7-9]*-*-linux2) OUT="linux-armv4"; options="$options -march=armv7-a" ;; arm*-*-linux2) OUT="linux-armv4" ;; + aarch64-*-linux2) OUT="linux-aarch64" ;; sh*b-*-linux2) OUT="linux-generic32"; options="$options -DB_ENDIAN" ;; sh*-*-linux2) OUT="linux-generic32"; options="$options -DL_ENDIAN" ;; m68k*-*-linux2) OUT="linux-generic32"; options="$options -DB_ENDIAN" ;; diff --git a/deps/openssl/openssl/crypto/Makefile b/deps/openssl/openssl/crypto/Makefile index 2355661f40fa31..9a39e934ad5c8e 100644 --- a/deps/openssl/openssl/crypto/Makefile +++ b/deps/openssl/openssl/crypto/Makefile @@ -74,9 +74,9 @@ ia64cpuid.s: ia64cpuid.S; $(CC) $(CFLAGS) -E ia64cpuid.S > $@ ppccpuid.s: ppccpuid.pl; $(PERL) ppccpuid.pl $(PERLASM_SCHEME) $@ pariscid.s: pariscid.pl; $(PERL) pariscid.pl $(PERLASM_SCHEME) $@ alphacpuid.s: alphacpuid.pl - (preproc=/tmp/$$$$.$@; trap "rm $$preproc" INT; \ + (preproc=$$$$.$@.S; trap "rm $$preproc" INT; \ $(PERL) alphacpuid.pl > $$preproc && \ - $(CC) -E $$preproc > $@ && rm $$preproc) + $(CC) -E -P $$preproc > $@ && rm $$preproc) testapps: [ -z "$(THIS)" ] || ( if echo $(SDIRS) | fgrep ' des '; \ @@ -88,7 +88,7 @@ subdirs: @target=all; $(RECURSIVE_MAKE) files: - $(PERL) $(TOP)/util/files.pl Makefile >> $(TOP)/MINFO + $(PERL) $(TOP)/util/files.pl "CPUID_OBJ=$(CPUID_OBJ)" Makefile >> $(TOP)/MINFO @target=files; $(RECURSIVE_MAKE) links: @@ -102,7 +102,7 @@ lib: $(LIB) @touch lib $(LIB): $(LIBOBJ) $(AR) $(LIB) $(LIBOBJ) - [ -z "$(FIPSLIBDIR)" ] || $(AR) $(LIB) $(FIPSLIBDIR)fipscanister.o + test -z "$(FIPSLIBDIR)" || $(AR) $(LIB) $(FIPSLIBDIR)fipscanister.o $(RANLIB) $(LIB) || echo Never mind. shared: buildinf.h lib subdirs diff --git a/deps/openssl/openssl/crypto/aes/Makefile b/deps/openssl/openssl/crypto/aes/Makefile index 45ede0a0b459c6..b94ca72a41a343 100644 --- a/deps/openssl/openssl/crypto/aes/Makefile +++ b/deps/openssl/openssl/crypto/aes/Makefile @@ -65,12 +65,22 @@ aesni-x86_64.s: asm/aesni-x86_64.pl $(PERL) asm/aesni-x86_64.pl $(PERLASM_SCHEME) > $@ aesni-sha1-x86_64.s: asm/aesni-sha1-x86_64.pl $(PERL) asm/aesni-sha1-x86_64.pl $(PERLASM_SCHEME) > $@ +aesni-sha256-x86_64.s: asm/aesni-sha256-x86_64.pl + $(PERL) asm/aesni-sha256-x86_64.pl $(PERLASM_SCHEME) > $@ +aesni-mb-x86_64.s: asm/aesni-mb-x86_64.pl + $(PERL) asm/aesni-mb-x86_64.pl $(PERLASM_SCHEME) > $@ aes-sparcv9.s: asm/aes-sparcv9.pl $(PERL) asm/aes-sparcv9.pl $(CFLAGS) > $@ +aest4-sparcv9.s: asm/aest4-sparcv9.pl ../perlasm/sparcv9_modes.pl + $(PERL) asm/aest4-sparcv9.pl $(CFLAGS) > $@ aes-ppc.s: asm/aes-ppc.pl $(PERL) asm/aes-ppc.pl $(PERLASM_SCHEME) $@ +vpaes-ppc.s: asm/vpaes-ppc.pl + $(PERL) asm/vpaes-ppc.pl $(PERLASM_SCHEME) $@ +aesp8-ppc.s: asm/aesp8-ppc.pl + $(PERL) asm/aesp8-ppc.pl $(PERLASM_SCHEME) $@ aes-parisc.s: asm/aes-parisc.pl $(PERL) asm/aes-parisc.pl $(PERLASM_SCHEME) $@ @@ -78,12 +88,18 @@ aes-parisc.s: asm/aes-parisc.pl aes-mips.S: asm/aes-mips.pl $(PERL) asm/aes-mips.pl $(PERLASM_SCHEME) $@ +aesv8-armx.S: asm/aesv8-armx.pl + $(PERL) asm/aesv8-armx.pl $(PERLASM_SCHEME) $@ +aesv8-armx.o: aesv8-armx.S + # GNU make "catch all" aes-%.S: asm/aes-%.pl; $(PERL) $< $(PERLASM_SCHEME) > $@ aes-armv4.o: aes-armv4.S +bsaes-%.S: asm/bsaes-%.pl; $(PERL) $< $(PERLASM_SCHEME) $@ +bsaes-armv7.o: bsaes-armv7.S files: - $(PERL) $(TOP)/util/files.pl Makefile >> $(TOP)/MINFO + $(PERL) $(TOP)/util/files.pl "AES_ENC=$(AES_ENC)" Makefile >> $(TOP)/MINFO links: @$(PERL) $(TOP)/util/mklink.pl ../../include/openssl $(EXHEADER) @@ -147,7 +163,7 @@ aes_wrap.o: ../../e_os.h ../../include/openssl/aes.h aes_wrap.o: ../../include/openssl/bio.h ../../include/openssl/buffer.h aes_wrap.o: ../../include/openssl/crypto.h ../../include/openssl/e_os2.h aes_wrap.o: ../../include/openssl/err.h ../../include/openssl/lhash.h -aes_wrap.o: ../../include/openssl/opensslconf.h +aes_wrap.o: ../../include/openssl/modes.h ../../include/openssl/opensslconf.h aes_wrap.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h aes_wrap.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h aes_wrap.o: ../../include/openssl/symhacks.h ../cryptlib.h aes_wrap.c diff --git a/deps/openssl/openssl/crypto/aes/aes_wrap.c b/deps/openssl/openssl/crypto/aes/aes_wrap.c index b1ab8e25ef3703..b7b64d57a4870a 100644 --- a/deps/openssl/openssl/crypto/aes/aes_wrap.c +++ b/deps/openssl/openssl/crypto/aes/aes_wrap.c @@ -54,197 +54,19 @@ #include "cryptlib.h" #include -#include - -static const unsigned char default_iv[] = { - 0xA6, 0xA6, 0xA6, 0xA6, 0xA6, 0xA6, 0xA6, 0xA6, -}; +#include int AES_wrap_key(AES_KEY *key, const unsigned char *iv, unsigned char *out, const unsigned char *in, unsigned int inlen) { - unsigned char *A, B[16], *R; - unsigned int i, j, t; - if ((inlen & 0x7) || (inlen < 8)) - return -1; - A = B; - t = 1; - memcpy(out + 8, in, inlen); - if (!iv) - iv = default_iv; - - memcpy(A, iv, 8); - - for (j = 0; j < 6; j++) { - R = out + 8; - for (i = 0; i < inlen; i += 8, t++, R += 8) { - memcpy(B + 8, R, 8); - AES_encrypt(B, B, key); - A[7] ^= (unsigned char)(t & 0xff); - if (t > 0xff) { - A[6] ^= (unsigned char)((t >> 8) & 0xff); - A[5] ^= (unsigned char)((t >> 16) & 0xff); - A[4] ^= (unsigned char)((t >> 24) & 0xff); - } - memcpy(R, B + 8, 8); - } - } - memcpy(out, A, 8); - return inlen + 8; + return CRYPTO_128_wrap(key, iv, out, in, inlen, (block128_f) AES_encrypt); } int AES_unwrap_key(AES_KEY *key, const unsigned char *iv, unsigned char *out, const unsigned char *in, unsigned int inlen) { - unsigned char *A, B[16], *R; - unsigned int i, j, t; - inlen -= 8; - if (inlen & 0x7) - return -1; - if (inlen < 8) - return -1; - A = B; - t = 6 * (inlen >> 3); - memcpy(A, in, 8); - memcpy(out, in + 8, inlen); - for (j = 0; j < 6; j++) { - R = out + inlen - 8; - for (i = 0; i < inlen; i += 8, t--, R -= 8) { - A[7] ^= (unsigned char)(t & 0xff); - if (t > 0xff) { - A[6] ^= (unsigned char)((t >> 8) & 0xff); - A[5] ^= (unsigned char)((t >> 16) & 0xff); - A[4] ^= (unsigned char)((t >> 24) & 0xff); - } - memcpy(B + 8, R, 8); - AES_decrypt(B, B, key); - memcpy(R, B + 8, 8); - } - } - if (!iv) - iv = default_iv; - if (memcmp(A, iv, 8)) { - OPENSSL_cleanse(out, inlen); - return 0; - } - return inlen; -} - -#ifdef AES_WRAP_TEST - -int AES_wrap_unwrap_test(const unsigned char *kek, int keybits, - const unsigned char *iv, - const unsigned char *eout, - const unsigned char *key, int keylen) -{ - unsigned char *otmp = NULL, *ptmp = NULL; - int r, ret = 0; - AES_KEY wctx; - otmp = OPENSSL_malloc(keylen + 8); - ptmp = OPENSSL_malloc(keylen); - if (!otmp || !ptmp) - return 0; - if (AES_set_encrypt_key(kek, keybits, &wctx)) - goto err; - r = AES_wrap_key(&wctx, iv, otmp, key, keylen); - if (r <= 0) - goto err; - - if (eout && memcmp(eout, otmp, keylen)) - goto err; - - if (AES_set_decrypt_key(kek, keybits, &wctx)) - goto err; - r = AES_unwrap_key(&wctx, iv, ptmp, otmp, r); - - if (memcmp(key, ptmp, keylen)) - goto err; - - ret = 1; - - err: - if (otmp) - OPENSSL_free(otmp); - if (ptmp) - OPENSSL_free(ptmp); - - return ret; - + return CRYPTO_128_unwrap(key, iv, out, in, inlen, + (block128_f) AES_decrypt); } - -int main(int argc, char **argv) -{ - - static const unsigned char kek[] = { - 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, - 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, - 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, - 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f - }; - - static const unsigned char key[] = { - 0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77, - 0x88, 0x99, 0xaa, 0xbb, 0xcc, 0xdd, 0xee, 0xff, - 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, - 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f - }; - - static const unsigned char e1[] = { - 0x1f, 0xa6, 0x8b, 0x0a, 0x81, 0x12, 0xb4, 0x47, - 0xae, 0xf3, 0x4b, 0xd8, 0xfb, 0x5a, 0x7b, 0x82, - 0x9d, 0x3e, 0x86, 0x23, 0x71, 0xd2, 0xcf, 0xe5 - }; - - static const unsigned char e2[] = { - 0x96, 0x77, 0x8b, 0x25, 0xae, 0x6c, 0xa4, 0x35, - 0xf9, 0x2b, 0x5b, 0x97, 0xc0, 0x50, 0xae, 0xd2, - 0x46, 0x8a, 0xb8, 0xa1, 0x7a, 0xd8, 0x4e, 0x5d - }; - - static const unsigned char e3[] = { - 0x64, 0xe8, 0xc3, 0xf9, 0xce, 0x0f, 0x5b, 0xa2, - 0x63, 0xe9, 0x77, 0x79, 0x05, 0x81, 0x8a, 0x2a, - 0x93, 0xc8, 0x19, 0x1e, 0x7d, 0x6e, 0x8a, 0xe7 - }; - - static const unsigned char e4[] = { - 0x03, 0x1d, 0x33, 0x26, 0x4e, 0x15, 0xd3, 0x32, - 0x68, 0xf2, 0x4e, 0xc2, 0x60, 0x74, 0x3e, 0xdc, - 0xe1, 0xc6, 0xc7, 0xdd, 0xee, 0x72, 0x5a, 0x93, - 0x6b, 0xa8, 0x14, 0x91, 0x5c, 0x67, 0x62, 0xd2 - }; - - static const unsigned char e5[] = { - 0xa8, 0xf9, 0xbc, 0x16, 0x12, 0xc6, 0x8b, 0x3f, - 0xf6, 0xe6, 0xf4, 0xfb, 0xe3, 0x0e, 0x71, 0xe4, - 0x76, 0x9c, 0x8b, 0x80, 0xa3, 0x2c, 0xb8, 0x95, - 0x8c, 0xd5, 0xd1, 0x7d, 0x6b, 0x25, 0x4d, 0xa1 - }; - - static const unsigned char e6[] = { - 0x28, 0xc9, 0xf4, 0x04, 0xc4, 0xb8, 0x10, 0xf4, - 0xcb, 0xcc, 0xb3, 0x5c, 0xfb, 0x87, 0xf8, 0x26, - 0x3f, 0x57, 0x86, 0xe2, 0xd8, 0x0e, 0xd3, 0x26, - 0xcb, 0xc7, 0xf0, 0xe7, 0x1a, 0x99, 0xf4, 0x3b, - 0xfb, 0x98, 0x8b, 0x9b, 0x7a, 0x02, 0xdd, 0x21 - }; - - AES_KEY wctx, xctx; - int ret; - ret = AES_wrap_unwrap_test(kek, 128, NULL, e1, key, 16); - fprintf(stderr, "Key test result %d\n", ret); - ret = AES_wrap_unwrap_test(kek, 192, NULL, e2, key, 16); - fprintf(stderr, "Key test result %d\n", ret); - ret = AES_wrap_unwrap_test(kek, 256, NULL, e3, key, 16); - fprintf(stderr, "Key test result %d\n", ret); - ret = AES_wrap_unwrap_test(kek, 192, NULL, e4, key, 24); - fprintf(stderr, "Key test result %d\n", ret); - ret = AES_wrap_unwrap_test(kek, 256, NULL, e5, key, 24); - fprintf(stderr, "Key test result %d\n", ret); - ret = AES_wrap_unwrap_test(kek, 256, NULL, e6, key, 32); - fprintf(stderr, "Key test result %d\n", ret); -} - -#endif diff --git a/deps/openssl/openssl/crypto/aes/aes_x86core.c b/deps/openssl/openssl/crypto/aes/aes_x86core.c index 1defbb1abfb28b..c869ed7198520d 100644 --- a/deps/openssl/openssl/crypto/aes/aes_x86core.c +++ b/deps/openssl/openssl/crypto/aes/aes_x86core.c @@ -89,8 +89,10 @@ typedef unsigned long long u64; #endif #undef ROTATE -#if defined(_MSC_VER) || defined(__ICC) -# define ROTATE(a,n) _lrotl(a,n) +#if defined(_MSC_VER) +# define ROTATE(a,n) _lrotl(a,n) +#elif defined(__ICC) +# define ROTATE(a,n) _rotl(a,n) #elif defined(__GNUC__) && __GNUC__>=2 # if defined(__i386) || defined(__i386__) || defined(__x86_64) || defined(__x86_64__) # define ROTATE(a,n) ({ register unsigned int ret; \ diff --git a/deps/openssl/openssl/crypto/aes/asm/aes-586.pl b/deps/openssl/openssl/crypto/aes/asm/aes-586.pl index 687ed811be4796..451d0e0ed1e214 100755 --- a/deps/openssl/openssl/crypto/aes/asm/aes-586.pl +++ b/deps/openssl/openssl/crypto/aes/asm/aes-586.pl @@ -39,7 +39,7 @@ # but exhibits up to 10% improvement on other cores. # # Second version is "monolithic" replacement for aes_core.c, which in -# addition to AES_[de|en]crypt implements private_AES_set_[de|en]cryption_key. +# addition to AES_[de|en]crypt implements AES_set_[de|en]cryption_key. # This made it possible to implement little-endian variant of the # algorithm without modifying the base C code. Motivating factor for # the undertaken effort was that it appeared that in tight IA-32 @@ -103,11 +103,12 @@ # byte for 128-bit key. # # ECB encrypt ECB decrypt CBC large chunk -# P4 56[60] 84[100] 23 -# AMD K8 48[44] 70[79] 18 -# PIII 41[50] 61[91] 24 -# Core 2 32[38] 45[70] 18.5 -# Pentium 120 160 77 +# P4 52[54] 83[95] 23 +# AMD K8 46[41] 66[70] 18 +# PIII 41[50] 60[77] 24 +# Core 2 31[36] 45[64] 18.5 +# Atom 76[100] 96[138] 60 +# Pentium 115 150 77 # # Version 4.1 switches to compact S-box even in key schedule setup. # @@ -242,7 +243,7 @@ sub encvert() { my ($te,@s) = @_; - my $v0 = $acc, $v1 = $key; + my ($v0,$v1) = ($acc,$key); &mov ($v0,$s[3]); # copy s3 &mov (&DWP(4,"esp"),$s[2]); # save s2 @@ -299,7 +300,7 @@ () # Another experimental routine, which features "horizontal spin," but # eliminates one reference to stack. Strangely enough runs slower... sub enchoriz() -{ my $v0 = $key, $v1 = $acc; +{ my ($v0,$v1) = ($key,$acc); &movz ($v0,&LB($s0)); # 3, 2, 1, 0* &rotr ($s2,8); # 8,11,10, 9 @@ -427,7 +428,7 @@ () ###################################################################### sub enccompact() -{ my $Fn = mov; +{ my $Fn = \&mov; while ($#_>5) { pop(@_); $Fn=sub{}; } my ($i,$te,@s)=@_; my $tmp = $key; @@ -476,24 +477,25 @@ () my $tmp = $tbl; my $r2 = $key ; - &mov ($acc,$s[$i]); - &and ($acc,0x80808080); - &mov ($tmp,$acc); - &shr ($tmp,7); + &and ($tmp,$s[$i]); &lea ($r2,&DWP(0,$s[$i],$s[$i])); - &sub ($acc,$tmp); + &mov ($acc,$tmp); + &shr ($tmp,7); &and ($r2,0xfefefefe); - &and ($acc,0x1b1b1b1b); + &sub ($acc,$tmp); &mov ($tmp,$s[$i]); + &and ($acc,0x1b1b1b1b); + &rotr ($tmp,16); &xor ($acc,$r2); # r2 + &mov ($r2,$s[$i]); &xor ($s[$i],$acc); # r0 ^ r2 + &rotr ($r2,16+8); + &xor ($acc,$tmp); &rotl ($s[$i],24); - &xor ($s[$i],$acc) # ROTATE(r2^r0,24) ^ r2 - &rotr ($tmp,16); - &xor ($s[$i],$tmp); - &rotr ($tmp,8); - &xor ($s[$i],$tmp); + &xor ($acc,$r2); + &mov ($tmp,0x80808080) if ($i!=1); + &xor ($s[$i],$acc); # ROTATE(r2^r0,24) ^ r2 } &function_begin_B("_x86_AES_encrypt_compact"); @@ -526,6 +528,7 @@ () &enccompact(1,$tbl,$s1,$s2,$s3,$s0,1); &enccompact(2,$tbl,$s2,$s3,$s0,$s1,1); &enccompact(3,$tbl,$s3,$s0,$s1,$s2,1); + &mov ($tbl,0x80808080); &enctransform(2); &enctransform(3); &enctransform(0); @@ -607,82 +610,84 @@ () &pshufw ("mm5","mm4",0x0d); # 15,14,11,10 &movd ("eax","mm1"); # 5, 4, 1, 0 &movd ("ebx","mm5"); # 15,14,11,10 + &mov ($__key,$key); &movz ($acc,&LB("eax")); # 0 - &movz ("ecx",&BP(-128,$tbl,$acc,1)); # 0 - &pshufw ("mm2","mm0",0x0d); # 7, 6, 3, 2 &movz ("edx",&HB("eax")); # 1 + &pshufw ("mm2","mm0",0x0d); # 7, 6, 3, 2 + &movz ("ecx",&BP(-128,$tbl,$acc,1)); # 0 + &movz ($key,&LB("ebx")); # 10 &movz ("edx",&BP(-128,$tbl,"edx",1)); # 1 - &shl ("edx",8); # 1 &shr ("eax",16); # 5, 4 + &shl ("edx",8); # 1 - &movz ($acc,&LB("ebx")); # 10 - &movz ($acc,&BP(-128,$tbl,$acc,1)); # 10 + &movz ($acc,&BP(-128,$tbl,$key,1)); # 10 + &movz ($key,&HB("ebx")); # 11 &shl ($acc,16); # 10 - &or ("ecx",$acc); # 10 &pshufw ("mm6","mm4",0x08); # 13,12, 9, 8 - &movz ($acc,&HB("ebx")); # 11 - &movz ($acc,&BP(-128,$tbl,$acc,1)); # 11 + &or ("ecx",$acc); # 10 + &movz ($acc,&BP(-128,$tbl,$key,1)); # 11 + &movz ($key,&HB("eax")); # 5 &shl ($acc,24); # 11 - &or ("edx",$acc); # 11 &shr ("ebx",16); # 15,14 + &or ("edx",$acc); # 11 - &movz ($acc,&HB("eax")); # 5 - &movz ($acc,&BP(-128,$tbl,$acc,1)); # 5 + &movz ($acc,&BP(-128,$tbl,$key,1)); # 5 + &movz ($key,&HB("ebx")); # 15 &shl ($acc,8); # 5 &or ("ecx",$acc); # 5 - &movz ($acc,&HB("ebx")); # 15 - &movz ($acc,&BP(-128,$tbl,$acc,1)); # 15 + &movz ($acc,&BP(-128,$tbl,$key,1)); # 15 + &movz ($key,&LB("eax")); # 4 &shl ($acc,24); # 15 &or ("ecx",$acc); # 15 - &movd ("mm0","ecx"); # t[0] collected - &movz ($acc,&LB("eax")); # 4 - &movz ("ecx",&BP(-128,$tbl,$acc,1)); # 4 + &movz ($acc,&BP(-128,$tbl,$key,1)); # 4 + &movz ($key,&LB("ebx")); # 14 &movd ("eax","mm2"); # 7, 6, 3, 2 - &movz ($acc,&LB("ebx")); # 14 - &movz ($acc,&BP(-128,$tbl,$acc,1)); # 14 - &shl ($acc,16); # 14 + &movd ("mm0","ecx"); # t[0] collected + &movz ("ecx",&BP(-128,$tbl,$key,1)); # 14 + &movz ($key,&HB("eax")); # 3 + &shl ("ecx",16); # 14 + &movd ("ebx","mm6"); # 13,12, 9, 8 &or ("ecx",$acc); # 14 - &movd ("ebx","mm6"); # 13,12, 9, 8 - &movz ($acc,&HB("eax")); # 3 - &movz ($acc,&BP(-128,$tbl,$acc,1)); # 3 + &movz ($acc,&BP(-128,$tbl,$key,1)); # 3 + &movz ($key,&HB("ebx")); # 9 &shl ($acc,24); # 3 &or ("ecx",$acc); # 3 - &movz ($acc,&HB("ebx")); # 9 - &movz ($acc,&BP(-128,$tbl,$acc,1)); # 9 + &movz ($acc,&BP(-128,$tbl,$key,1)); # 9 + &movz ($key,&LB("ebx")); # 8 &shl ($acc,8); # 9 + &shr ("ebx",16); # 13,12 &or ("ecx",$acc); # 9 - &movd ("mm1","ecx"); # t[1] collected - &movz ($acc,&LB("ebx")); # 8 - &movz ("ecx",&BP(-128,$tbl,$acc,1)); # 8 - &shr ("ebx",16); # 13,12 - &movz ($acc,&LB("eax")); # 2 - &movz ($acc,&BP(-128,$tbl,$acc,1)); # 2 - &shl ($acc,16); # 2 - &or ("ecx",$acc); # 2 + &movz ($acc,&BP(-128,$tbl,$key,1)); # 8 + &movz ($key,&LB("eax")); # 2 &shr ("eax",16); # 7, 6 + &movd ("mm1","ecx"); # t[1] collected + &movz ("ecx",&BP(-128,$tbl,$key,1)); # 2 + &movz ($key,&HB("eax")); # 7 + &shl ("ecx",16); # 2 + &and ("eax",0xff); # 6 + &or ("ecx",$acc); # 2 &punpckldq ("mm0","mm1"); # t[0,1] collected - &movz ($acc,&HB("eax")); # 7 - &movz ($acc,&BP(-128,$tbl,$acc,1)); # 7 + &movz ($acc,&BP(-128,$tbl,$key,1)); # 7 + &movz ($key,&HB("ebx")); # 13 &shl ($acc,24); # 7 - &or ("ecx",$acc); # 7 - &and ("eax",0xff); # 6 + &and ("ebx",0xff); # 12 &movz ("eax",&BP(-128,$tbl,"eax",1)); # 6 + &or ("ecx",$acc); # 7 &shl ("eax",16); # 6 + &movz ($acc,&BP(-128,$tbl,$key,1)); # 13 &or ("edx","eax"); # 6 - &movz ($acc,&HB("ebx")); # 13 - &movz ($acc,&BP(-128,$tbl,$acc,1)); # 13 &shl ($acc,8); # 13 - &or ("ecx",$acc); # 13 - &movd ("mm4","ecx"); # t[2] collected - &and ("ebx",0xff); # 12 &movz ("ebx",&BP(-128,$tbl,"ebx",1)); # 12 + &or ("ecx",$acc); # 13 &or ("edx","ebx"); # 12 + &mov ($key,$__key); + &movd ("mm4","ecx"); # t[2] collected &movd ("mm5","edx"); # t[3] collected &punpckldq ("mm4","mm5"); # t[2,3] collected @@ -1222,7 +1227,7 @@ () ###################################################################### sub deccompact() -{ my $Fn = mov; +{ my $Fn = \&mov; while ($#_>5) { pop(@_); $Fn=sub{}; } my ($i,$td,@s)=@_; my $tmp = $key; @@ -1270,30 +1275,30 @@ () my $tp4 = @s[($i+3)%4]; $tp4 = @s[3] if ($i==1); my $tp8 = $tbl; - &mov ($acc,$s[$i]); - &and ($acc,0x80808080); - &mov ($tmp,$acc); + &mov ($tmp,0x80808080); + &and ($tmp,$s[$i]); + &mov ($acc,$tmp); &shr ($tmp,7); &lea ($tp2,&DWP(0,$s[$i],$s[$i])); &sub ($acc,$tmp); &and ($tp2,0xfefefefe); &and ($acc,0x1b1b1b1b); - &xor ($acc,$tp2); - &mov ($tp2,$acc); + &xor ($tp2,$acc); + &mov ($tmp,0x80808080); - &and ($acc,0x80808080); - &mov ($tmp,$acc); + &and ($tmp,$tp2); + &mov ($acc,$tmp); &shr ($tmp,7); &lea ($tp4,&DWP(0,$tp2,$tp2)); &sub ($acc,$tmp); &and ($tp4,0xfefefefe); &and ($acc,0x1b1b1b1b); &xor ($tp2,$s[$i]); # tp2^tp1 - &xor ($acc,$tp4); - &mov ($tp4,$acc); + &xor ($tp4,$acc); + &mov ($tmp,0x80808080); - &and ($acc,0x80808080); - &mov ($tmp,$acc); + &and ($tmp,$tp4); + &mov ($acc,$tmp); &shr ($tmp,7); &lea ($tp8,&DWP(0,$tp4,$tp4)); &sub ($acc,$tmp); @@ -1305,13 +1310,13 @@ () &xor ($s[$i],$tp2); &xor ($tp2,$tp8); - &rotl ($tp2,24); &xor ($s[$i],$tp4); &xor ($tp4,$tp8); - &rotl ($tp4,16); + &rotl ($tp2,24); &xor ($s[$i],$tp8); # ^= tp8^(tp4^tp1)^(tp2^tp1) - &rotl ($tp8,8); + &rotl ($tp4,16); &xor ($s[$i],$tp2); # ^= ROTATE(tp8^tp2^tp1,24) + &rotl ($tp8,8); &xor ($s[$i],$tp4); # ^= ROTATE(tp8^tp4^tp1,16) &mov ($s[0],$__s0) if($i==2); #prefetch $s0 &mov ($s[1],$__s1) if($i==3); #prefetch $s1 @@ -1389,85 +1394,87 @@ () sub sse_deccompact() { &pshufw ("mm1","mm0",0x0c); # 7, 6, 1, 0 + &pshufw ("mm5","mm4",0x09); # 13,12,11,10 &movd ("eax","mm1"); # 7, 6, 1, 0 + &movd ("ebx","mm5"); # 13,12,11,10 + &mov ($__key,$key); - &pshufw ("mm5","mm4",0x09); # 13,12,11,10 &movz ($acc,&LB("eax")); # 0 - &movz ("ecx",&BP(-128,$tbl,$acc,1)); # 0 - &movd ("ebx","mm5"); # 13,12,11,10 &movz ("edx",&HB("eax")); # 1 + &pshufw ("mm2","mm0",0x06); # 3, 2, 5, 4 + &movz ("ecx",&BP(-128,$tbl,$acc,1)); # 0 + &movz ($key,&LB("ebx")); # 10 &movz ("edx",&BP(-128,$tbl,"edx",1)); # 1 + &shr ("eax",16); # 7, 6 &shl ("edx",8); # 1 - &pshufw ("mm2","mm0",0x06); # 3, 2, 5, 4 - &movz ($acc,&LB("ebx")); # 10 - &movz ($acc,&BP(-128,$tbl,$acc,1)); # 10 + &movz ($acc,&BP(-128,$tbl,$key,1)); # 10 + &movz ($key,&HB("ebx")); # 11 &shl ($acc,16); # 10 + &pshufw ("mm6","mm4",0x03); # 9, 8,15,14 &or ("ecx",$acc); # 10 - &shr ("eax",16); # 7, 6 - &movz ($acc,&HB("ebx")); # 11 - &movz ($acc,&BP(-128,$tbl,$acc,1)); # 11 + &movz ($acc,&BP(-128,$tbl,$key,1)); # 11 + &movz ($key,&HB("eax")); # 7 &shl ($acc,24); # 11 - &or ("edx",$acc); # 11 &shr ("ebx",16); # 13,12 + &or ("edx",$acc); # 11 - &pshufw ("mm6","mm4",0x03); # 9, 8,15,14 - &movz ($acc,&HB("eax")); # 7 - &movz ($acc,&BP(-128,$tbl,$acc,1)); # 7 + &movz ($acc,&BP(-128,$tbl,$key,1)); # 7 + &movz ($key,&HB("ebx")); # 13 &shl ($acc,24); # 7 &or ("ecx",$acc); # 7 - &movz ($acc,&HB("ebx")); # 13 - &movz ($acc,&BP(-128,$tbl,$acc,1)); # 13 + &movz ($acc,&BP(-128,$tbl,$key,1)); # 13 + &movz ($key,&LB("eax")); # 6 &shl ($acc,8); # 13 + &movd ("eax","mm2"); # 3, 2, 5, 4 &or ("ecx",$acc); # 13 - &movd ("mm0","ecx"); # t[0] collected - &movz ($acc,&LB("eax")); # 6 - &movd ("eax","mm2"); # 3, 2, 5, 4 - &movz ("ecx",&BP(-128,$tbl,$acc,1)); # 6 - &shl ("ecx",16); # 6 - &movz ($acc,&LB("ebx")); # 12 + &movz ($acc,&BP(-128,$tbl,$key,1)); # 6 + &movz ($key,&LB("ebx")); # 12 + &shl ($acc,16); # 6 &movd ("ebx","mm6"); # 9, 8,15,14 - &movz ($acc,&BP(-128,$tbl,$acc,1)); # 12 + &movd ("mm0","ecx"); # t[0] collected + &movz ("ecx",&BP(-128,$tbl,$key,1)); # 12 + &movz ($key,&LB("eax")); # 4 &or ("ecx",$acc); # 12 - &movz ($acc,&LB("eax")); # 4 - &movz ($acc,&BP(-128,$tbl,$acc,1)); # 4 + &movz ($acc,&BP(-128,$tbl,$key,1)); # 4 + &movz ($key,&LB("ebx")); # 14 &or ("edx",$acc); # 4 - &movz ($acc,&LB("ebx")); # 14 - &movz ($acc,&BP(-128,$tbl,$acc,1)); # 14 + &movz ($acc,&BP(-128,$tbl,$key,1)); # 14 + &movz ($key,&HB("eax")); # 5 &shl ($acc,16); # 14 + &shr ("eax",16); # 3, 2 &or ("edx",$acc); # 14 - &movd ("mm1","edx"); # t[1] collected - &movz ($acc,&HB("eax")); # 5 - &movz ("edx",&BP(-128,$tbl,$acc,1)); # 5 - &shl ("edx",8); # 5 - &movz ($acc,&HB("ebx")); # 15 - &shr ("eax",16); # 3, 2 - &movz ($acc,&BP(-128,$tbl,$acc,1)); # 15 - &shl ($acc,24); # 15 - &or ("edx",$acc); # 15 + &movz ($acc,&BP(-128,$tbl,$key,1)); # 5 + &movz ($key,&HB("ebx")); # 15 &shr ("ebx",16); # 9, 8 + &shl ($acc,8); # 5 + &movd ("mm1","edx"); # t[1] collected + &movz ("edx",&BP(-128,$tbl,$key,1)); # 15 + &movz ($key,&HB("ebx")); # 9 + &shl ("edx",24); # 15 + &and ("ebx",0xff); # 8 + &or ("edx",$acc); # 15 &punpckldq ("mm0","mm1"); # t[0,1] collected - &movz ($acc,&HB("ebx")); # 9 - &movz ($acc,&BP(-128,$tbl,$acc,1)); # 9 + &movz ($acc,&BP(-128,$tbl,$key,1)); # 9 + &movz ($key,&LB("eax")); # 2 &shl ($acc,8); # 9 - &or ("ecx",$acc); # 9 - &and ("ebx",0xff); # 8 + &movz ("eax",&HB("eax")); # 3 &movz ("ebx",&BP(-128,$tbl,"ebx",1)); # 8 + &or ("ecx",$acc); # 9 + &movz ($acc,&BP(-128,$tbl,$key,1)); # 2 &or ("edx","ebx"); # 8 - &movz ($acc,&LB("eax")); # 2 - &movz ($acc,&BP(-128,$tbl,$acc,1)); # 2 &shl ($acc,16); # 2 - &or ("edx",$acc); # 2 - &movd ("mm4","edx"); # t[2] collected - &movz ("eax",&HB("eax")); # 3 &movz ("eax",&BP(-128,$tbl,"eax",1)); # 3 + &or ("edx",$acc); # 2 &shl ("eax",24); # 3 &or ("ecx","eax"); # 3 + &mov ($key,$__key); + &movd ("mm4","edx"); # t[2] collected &movd ("mm5","ecx"); # t[3] collected &punpckldq ("mm4","mm5"); # t[2,3] collected @@ -2181,8 +2188,8 @@ () &mov ("ecx",240/4); &xor ("eax","eax"); &align (4); - &data_word(0xABF3F689); # rep stosd - &set_label("skip_ezero") + &data_word(0xABF3F689); # rep stosd + &set_label("skip_ezero"); &mov ("esp",$_esp); &popf (); &set_label("drop_out"); @@ -2301,8 +2308,8 @@ () &mov ("ecx",240/4); &xor ("eax","eax"); &align (4); - &data_word(0xABF3F689); # rep stosd - &set_label("skip_dzero") + &data_word(0xABF3F689); # rep stosd + &set_label("skip_dzero"); &mov ("esp",$_esp); &popf (); &function_end_A(); @@ -2865,32 +2872,32 @@ () { my ($i,$key,$tp1,$tp2,$tp4,$tp8) = @_; my $tmp = $tbl; - &mov ($acc,$tp1); - &and ($acc,0x80808080); - &mov ($tmp,$acc); - &shr ($tmp,7); + &mov ($tmp,0x80808080); + &and ($tmp,$tp1); &lea ($tp2,&DWP(0,$tp1,$tp1)); + &mov ($acc,$tmp); + &shr ($tmp,7); &sub ($acc,$tmp); &and ($tp2,0xfefefefe); &and ($acc,0x1b1b1b1b); - &xor ($acc,$tp2); - &mov ($tp2,$acc); + &xor ($tp2,$acc); + &mov ($tmp,0x80808080); - &and ($acc,0x80808080); - &mov ($tmp,$acc); - &shr ($tmp,7); + &and ($tmp,$tp2); &lea ($tp4,&DWP(0,$tp2,$tp2)); + &mov ($acc,$tmp); + &shr ($tmp,7); &sub ($acc,$tmp); &and ($tp4,0xfefefefe); &and ($acc,0x1b1b1b1b); &xor ($tp2,$tp1); # tp2^tp1 - &xor ($acc,$tp4); - &mov ($tp4,$acc); + &xor ($tp4,$acc); + &mov ($tmp,0x80808080); - &and ($acc,0x80808080); - &mov ($tmp,$acc); - &shr ($tmp,7); + &and ($tmp,$tp4); &lea ($tp8,&DWP(0,$tp4,$tp4)); + &mov ($acc,$tmp); + &shr ($tmp,7); &xor ($tp4,$tp1); # tp4^tp1 &sub ($acc,$tmp); &and ($tp8,0xfefefefe); diff --git a/deps/openssl/openssl/crypto/aes/asm/aes-armv4.pl b/deps/openssl/openssl/crypto/aes/asm/aes-armv4.pl index 86b86c4a0fbda8..4f8917089f6c2a 100644 --- a/deps/openssl/openssl/crypto/aes/asm/aes-armv4.pl +++ b/deps/openssl/openssl/crypto/aes/asm/aes-armv4.pl @@ -1,7 +1,7 @@ #!/usr/bin/env perl # ==================================================================== -# Written by Andy Polyakov for the OpenSSL +# Written by Andy Polyakov for the OpenSSL # project. The module is, however, dual licensed under OpenSSL and # CRYPTOGAMS licenses depending on where you obtain it. For further # details see http://www.openssl.org/~appro/cryptogams/. @@ -51,9 +51,23 @@ $rounds="r12"; $code=<<___; -#include "arm_arch.h" +#ifndef __KERNEL__ +# include "arm_arch.h" +#else +# define __ARM_ARCH__ __LINUX_ARM_ARCH__ +#endif + .text +#if __ARM_ARCH__<7 +.code 32 +#else +.syntax unified +# ifdef __thumb2__ +.thumb +# else .code 32 +# endif +#endif .type AES_Te,%object .align 5 @@ -167,7 +181,11 @@ .type AES_encrypt,%function .align 5 AES_encrypt: +#if __ARM_ARCH__<7 sub r3,pc,#8 @ AES_encrypt +#else + adr r3,AES_encrypt +#endif stmdb sp!,{r1,r4-r12,lr} mov $rounds,r0 @ inp mov $key,r2 @@ -409,11 +427,21 @@ .align 5 private_AES_set_encrypt_key: _armv4_AES_set_encrypt_key: +#if __ARM_ARCH__<7 sub r3,pc,#8 @ AES_set_encrypt_key +#else + adr r3,private_AES_set_encrypt_key +#endif teq r0,#0 +#if __ARM_ARCH__>=7 + itt eq @ Thumb2 thing, sanity check in ARM +#endif moveq r0,#-1 beq .Labrt teq r2,#0 +#if __ARM_ARCH__>=7 + itt eq @ Thumb2 thing, sanity check in ARM +#endif moveq r0,#-1 beq .Labrt @@ -422,6 +450,9 @@ teq r1,#192 beq .Lok teq r1,#256 +#if __ARM_ARCH__>=7 + itt ne @ Thumb2 thing, sanity check in ARM +#endif movne r0,#-1 bne .Labrt @@ -576,6 +607,9 @@ str $s2,[$key,#-16] subs $rounds,$rounds,#1 str $s3,[$key,#-12] +#if __ARM_ARCH__>=7 + itt eq @ Thumb2 thing, sanity check in ARM +#endif subeq r2,$key,#216 beq .Ldone @@ -645,6 +679,9 @@ str $s2,[$key,#-24] subs $rounds,$rounds,#1 str $s3,[$key,#-20] +#if __ARM_ARCH__>=7 + itt eq @ Thumb2 thing, sanity check in ARM +#endif subeq r2,$key,#256 beq .Ldone @@ -674,11 +711,17 @@ str $i3,[$key,#-4] b .L256_loop +.align 2 .Ldone: mov r0,#0 ldmia sp!,{r4-r12,lr} -.Labrt: tst lr,#1 +.Labrt: +#if __ARM_ARCH__>=5 + ret @ bx lr +#else + tst lr,#1 moveq pc,lr @ be binary compatible with V4, yet bx lr @ interoperable with Thumb ISA:-) +#endif .size private_AES_set_encrypt_key,.-private_AES_set_encrypt_key .global private_AES_set_decrypt_key @@ -688,34 +731,57 @@ str lr,[sp,#-4]! @ push lr bl _armv4_AES_set_encrypt_key teq r0,#0 - ldrne lr,[sp],#4 @ pop lr + ldr lr,[sp],#4 @ pop lr bne .Labrt - stmdb sp!,{r4-r12} + mov r0,r2 @ AES_set_encrypt_key preserves r2, + mov r1,r2 @ which is AES_KEY *key + b _armv4_AES_set_enc2dec_key +.size private_AES_set_decrypt_key,.-private_AES_set_decrypt_key - ldr $rounds,[r2,#240] @ AES_set_encrypt_key preserves r2, - mov $key,r2 @ which is AES_KEY *key - mov $i1,r2 - add $i2,r2,$rounds,lsl#4 +@ void AES_set_enc2dec_key(const AES_KEY *inp,AES_KEY *out) +.global AES_set_enc2dec_key +.type AES_set_enc2dec_key,%function +.align 5 +AES_set_enc2dec_key: +_armv4_AES_set_enc2dec_key: + stmdb sp!,{r4-r12,lr} + + ldr $rounds,[r0,#240] + mov $i1,r0 @ input + add $i2,r0,$rounds,lsl#4 + mov $key,r1 @ ouput + add $tbl,r1,$rounds,lsl#4 + str $rounds,[r1,#240] + +.Linv: ldr $s0,[$i1],#16 + ldr $s1,[$i1,#-12] + ldr $s2,[$i1,#-8] + ldr $s3,[$i1,#-4] + ldr $t1,[$i2],#-16 + ldr $t2,[$i2,#16+4] + ldr $t3,[$i2,#16+8] + ldr $i3,[$i2,#16+12] + str $s0,[$tbl],#-16 + str $s1,[$tbl,#16+4] + str $s2,[$tbl,#16+8] + str $s3,[$tbl,#16+12] + str $t1,[$key],#16 + str $t2,[$key,#-12] + str $t3,[$key,#-8] + str $i3,[$key,#-4] + teq $i1,$i2 + bne .Linv -.Linv: ldr $s0,[$i1] + ldr $s0,[$i1] ldr $s1,[$i1,#4] ldr $s2,[$i1,#8] ldr $s3,[$i1,#12] - ldr $t1,[$i2] - ldr $t2,[$i2,#4] - ldr $t3,[$i2,#8] - ldr $i3,[$i2,#12] - str $s0,[$i2],#-16 - str $s1,[$i2,#16+4] - str $s2,[$i2,#16+8] - str $s3,[$i2,#16+12] - str $t1,[$i1],#16 - str $t2,[$i1,#-12] - str $t3,[$i1,#-8] - str $i3,[$i1,#-4] - teq $i1,$i2 - bne .Linv + str $s0,[$key] + str $s1,[$key,#4] + str $s2,[$key,#8] + str $s3,[$key,#12] + sub $key,$key,$rounds,lsl#3 ___ $mask80=$i1; $mask1b=$i2; @@ -773,7 +839,7 @@ moveq pc,lr @ be binary compatible with V4, yet bx lr @ interoperable with Thumb ISA:-) #endif -.size private_AES_set_decrypt_key,.-private_AES_set_decrypt_key +.size AES_set_enc2dec_key,.-AES_set_enc2dec_key .type AES_Td,%object .align 5 @@ -883,7 +949,11 @@ .type AES_decrypt,%function .align 5 AES_decrypt: +#if __ARM_ARCH__<7 sub r3,pc,#8 @ AES_decrypt +#else + adr r3,AES_decrypt +#endif stmdb sp!,{r1,r4-r12,lr} mov $rounds,r0 @ inp mov $key,r2 @@ -1080,8 +1150,9 @@ ldrb $t3,[$tbl,$i3] @ Td4[s0>>0] and $i3,lr,$s1,lsr#8 + add $s1,$tbl,$s1,lsr#24 ldrb $i1,[$tbl,$i1] @ Td4[s1>>0] - ldrb $s1,[$tbl,$s1,lsr#24] @ Td4[s1>>24] + ldrb $s1,[$s1] @ Td4[s1>>24] ldrb $i2,[$tbl,$i2] @ Td4[s1>>16] eor $s0,$i1,$s0,lsl#24 ldrb $i3,[$tbl,$i3] @ Td4[s1>>8] @@ -1094,7 +1165,8 @@ ldrb $i2,[$tbl,$i2] @ Td4[s2>>0] and $i3,lr,$s2,lsr#16 - ldrb $s2,[$tbl,$s2,lsr#24] @ Td4[s2>>24] + add $s2,$tbl,$s2,lsr#24 + ldrb $s2,[$s2] @ Td4[s2>>24] eor $s0,$s0,$i1,lsl#8 ldrb $i3,[$tbl,$i3] @ Td4[s2>>16] eor $s1,$i2,$s1,lsl#16 @@ -1106,8 +1178,9 @@ ldrb $i2,[$tbl,$i2] @ Td4[s3>>8] and $i3,lr,$s3 @ i2 + add $s3,$tbl,$s3,lsr#24 ldrb $i3,[$tbl,$i3] @ Td4[s3>>0] - ldrb $s3,[$tbl,$s3,lsr#24] @ Td4[s3>>24] + ldrb $s3,[$s3] @ Td4[s3>>24] eor $s0,$s0,$i1,lsl#16 ldr $i1,[$key,#0] eor $s1,$s1,$i2,lsl#8 @@ -1130,5 +1203,15 @@ ___ $code =~ s/\bbx\s+lr\b/.word\t0xe12fff1e/gm; # make it possible to compile with -march=armv4 +$code =~ s/\bret\b/bx\tlr/gm; + +open SELF,$0; +while() { + next if (/^#!/); + last if (!s/^#/@/ and !/^$/); + print; +} +close SELF; + print $code; close STDOUT; # enforce flush diff --git a/deps/openssl/openssl/crypto/aes/asm/aes-mips.pl b/deps/openssl/openssl/crypto/aes/asm/aes-mips.pl index 537c8d3172b684..4de3ee26bb74cf 100644 --- a/deps/openssl/openssl/crypto/aes/asm/aes-mips.pl +++ b/deps/openssl/openssl/crypto/aes/asm/aes-mips.pl @@ -20,6 +20,13 @@ # thing about this module is its endian neutrality, which means that # it processes data without ever changing byte order... +# September 2012 +# +# Add MIPS32R2 (~10% less instructions) and SmartMIPS ASE (further +# ~25% less instructions) code. Note that there is no run-time switch, +# instead, code path is chosen upon pre-process time, pass -mips32r2 +# or/and -msmartmips. + ###################################################################### # There is a number of MIPS ABI in use, O32 and N32/64 are most # widely used. Then there is a new contender: NUBI. It appears that if @@ -47,11 +54,12 @@ # ($s0,$s1,$s2,$s3,$s4,$s5,$s6,$s7)=map("\$$_",(16..23)); # ($gp,$sp,$fp,$ra)=map("\$$_",(28..31)); # -$flavour = shift; # supported flavours are o32,n32,64,nubi32,nubi64 +$flavour = shift || "o32"; # supported flavours are o32,n32,64,nubi32,nubi64 if ($flavour =~ /64|n32/i) { $PTR_ADD="dadd"; # incidentally works even on n32 $PTR_SUB="dsub"; # incidentally works even on n32 + $PTR_INS="dins"; $REG_S="sd"; $REG_L="ld"; $PTR_SLL="dsll"; # incidentally works even on n32 @@ -59,6 +67,7 @@ } else { $PTR_ADD="add"; $PTR_SUB="sub"; + $PTR_INS="ins"; $REG_S="sw"; $REG_L="lw"; $PTR_SLL="sll"; @@ -89,7 +98,11 @@ # include #endif -#if !defined(__vxworks) || defined(__pic__) +#if defined(__mips_smartmips) && !defined(_MIPS_ARCH_MIPS32R2) +#define _MIPS_ARCH_MIPS32R2 +#endif + +#if !defined(__mips_eabi) && (!defined(__vxworks) || defined(__pic__)) .option pic2 #endif .set noat @@ -125,6 +138,89 @@ xor $s3,$t3 sub $cnt,1 +#if defined(__mips_smartmips) + ext $i0,$s1,16,8 +.Loop_enc: + ext $i1,$s2,16,8 + ext $i2,$s3,16,8 + ext $i3,$s0,16,8 + lwxs $t0,$i0($Tbl) # Te1[s1>>16] + ext $i0,$s2,8,8 + lwxs $t1,$i1($Tbl) # Te1[s2>>16] + ext $i1,$s3,8,8 + lwxs $t2,$i2($Tbl) # Te1[s3>>16] + ext $i2,$s0,8,8 + lwxs $t3,$i3($Tbl) # Te1[s0>>16] + ext $i3,$s1,8,8 + + lwxs $t4,$i0($Tbl) # Te2[s2>>8] + ext $i0,$s3,0,8 + lwxs $t5,$i1($Tbl) # Te2[s3>>8] + ext $i1,$s0,0,8 + lwxs $t6,$i2($Tbl) # Te2[s0>>8] + ext $i2,$s1,0,8 + lwxs $t7,$i3($Tbl) # Te2[s1>>8] + ext $i3,$s2,0,8 + + lwxs $t8,$i0($Tbl) # Te3[s3] + ext $i0,$s0,24,8 + lwxs $t9,$i1($Tbl) # Te3[s0] + ext $i1,$s1,24,8 + lwxs $t10,$i2($Tbl) # Te3[s1] + ext $i2,$s2,24,8 + lwxs $t11,$i3($Tbl) # Te3[s2] + ext $i3,$s3,24,8 + + rotr $t0,$t0,8 + rotr $t1,$t1,8 + rotr $t2,$t2,8 + rotr $t3,$t3,8 + + rotr $t4,$t4,16 + rotr $t5,$t5,16 + rotr $t6,$t6,16 + rotr $t7,$t7,16 + + xor $t0,$t4 + lwxs $t4,$i0($Tbl) # Te0[s0>>24] + xor $t1,$t5 + lwxs $t5,$i1($Tbl) # Te0[s1>>24] + xor $t2,$t6 + lwxs $t6,$i2($Tbl) # Te0[s2>>24] + xor $t3,$t7 + lwxs $t7,$i3($Tbl) # Te0[s3>>24] + + rotr $t8,$t8,24 + lw $s0,0($key0) + rotr $t9,$t9,24 + lw $s1,4($key0) + rotr $t10,$t10,24 + lw $s2,8($key0) + rotr $t11,$t11,24 + lw $s3,12($key0) + + xor $t0,$t8 + xor $t1,$t9 + xor $t2,$t10 + xor $t3,$t11 + + xor $t0,$t4 + xor $t1,$t5 + xor $t2,$t6 + xor $t3,$t7 + + sub $cnt,1 + $PTR_ADD $key0,16 + xor $s0,$t0 + xor $s1,$t1 + xor $s2,$t2 + xor $s3,$t3 + .set noreorder + bnez $cnt,.Loop_enc + ext $i0,$s1,16,8 + + _xtr $i0,$s1,16-2 +#else _xtr $i0,$s1,16-2 .Loop_enc: _xtr $i1,$s2,16-2 @@ -138,19 +234,29 @@ $PTR_ADD $i1,$Tbl $PTR_ADD $i2,$Tbl $PTR_ADD $i3,$Tbl +#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2) + lw $t0,0($i0) # Te1[s1>>16] + _xtr $i0,$s2,8-2 + lw $t1,0($i1) # Te1[s2>>16] + _xtr $i1,$s3,8-2 + lw $t2,0($i2) # Te1[s3>>16] + _xtr $i2,$s0,8-2 + lw $t3,0($i3) # Te1[s0>>16] + _xtr $i3,$s1,8-2 +#else lwl $t0,3($i0) # Te1[s1>>16] lwl $t1,3($i1) # Te1[s2>>16] lwl $t2,3($i2) # Te1[s3>>16] lwl $t3,3($i3) # Te1[s0>>16] lwr $t0,2($i0) # Te1[s1>>16] - lwr $t1,2($i1) # Te1[s2>>16] - lwr $t2,2($i2) # Te1[s3>>16] - lwr $t3,2($i3) # Te1[s0>>16] - _xtr $i0,$s2,8-2 + lwr $t1,2($i1) # Te1[s2>>16] _xtr $i1,$s3,8-2 + lwr $t2,2($i2) # Te1[s3>>16] _xtr $i2,$s0,8-2 + lwr $t3,2($i3) # Te1[s0>>16] _xtr $i3,$s1,8-2 +#endif and $i0,0x3fc and $i1,0x3fc and $i2,0x3fc @@ -159,19 +265,88 @@ $PTR_ADD $i1,$Tbl $PTR_ADD $i2,$Tbl $PTR_ADD $i3,$Tbl +#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2) + rotr $t0,$t0,8 + rotr $t1,$t1,8 + rotr $t2,$t2,8 + rotr $t3,$t3,8 +# if defined(_MIPSEL) + lw $t4,0($i0) # Te2[s2>>8] + _xtr $i0,$s3,0-2 + lw $t5,0($i1) # Te2[s3>>8] + _xtr $i1,$s0,0-2 + lw $t6,0($i2) # Te2[s0>>8] + _xtr $i2,$s1,0-2 + lw $t7,0($i3) # Te2[s1>>8] + _xtr $i3,$s2,0-2 + + and $i0,0x3fc + and $i1,0x3fc + and $i2,0x3fc + and $i3,0x3fc + $PTR_ADD $i0,$Tbl + $PTR_ADD $i1,$Tbl + $PTR_ADD $i2,$Tbl + $PTR_ADD $i3,$Tbl + lw $t8,0($i0) # Te3[s3] + $PTR_INS $i0,$s0,2,8 + lw $t9,0($i1) # Te3[s0] + $PTR_INS $i1,$s1,2,8 + lw $t10,0($i2) # Te3[s1] + $PTR_INS $i2,$s2,2,8 + lw $t11,0($i3) # Te3[s2] + $PTR_INS $i3,$s3,2,8 +# else + lw $t4,0($i0) # Te2[s2>>8] + $PTR_INS $i0,$s3,2,8 + lw $t5,0($i1) # Te2[s3>>8] + $PTR_INS $i1,$s0,2,8 + lw $t6,0($i2) # Te2[s0>>8] + $PTR_INS $i2,$s1,2,8 + lw $t7,0($i3) # Te2[s1>>8] + $PTR_INS $i3,$s2,2,8 + + lw $t8,0($i0) # Te3[s3] + _xtr $i0,$s0,24-2 + lw $t9,0($i1) # Te3[s0] + _xtr $i1,$s1,24-2 + lw $t10,0($i2) # Te3[s1] + _xtr $i2,$s2,24-2 + lw $t11,0($i3) # Te3[s2] + _xtr $i3,$s3,24-2 + + and $i0,0x3fc + and $i1,0x3fc + and $i2,0x3fc + and $i3,0x3fc + $PTR_ADD $i0,$Tbl + $PTR_ADD $i1,$Tbl + $PTR_ADD $i2,$Tbl + $PTR_ADD $i3,$Tbl +# endif + rotr $t4,$t4,16 + rotr $t5,$t5,16 + rotr $t6,$t6,16 + rotr $t7,$t7,16 + + rotr $t8,$t8,24 + rotr $t9,$t9,24 + rotr $t10,$t10,24 + rotr $t11,$t11,24 +#else lwl $t4,2($i0) # Te2[s2>>8] lwl $t5,2($i1) # Te2[s3>>8] lwl $t6,2($i2) # Te2[s0>>8] lwl $t7,2($i3) # Te2[s1>>8] lwr $t4,1($i0) # Te2[s2>>8] - lwr $t5,1($i1) # Te2[s3>>8] - lwr $t6,1($i2) # Te2[s0>>8] - lwr $t7,1($i3) # Te2[s1>>8] - _xtr $i0,$s3,0-2 + lwr $t5,1($i1) # Te2[s3>>8] _xtr $i1,$s0,0-2 + lwr $t6,1($i2) # Te2[s0>>8] _xtr $i2,$s1,0-2 + lwr $t7,1($i3) # Te2[s1>>8] _xtr $i3,$s2,0-2 + and $i0,0x3fc and $i1,0x3fc and $i2,0x3fc @@ -185,14 +360,14 @@ lwl $t10,1($i2) # Te3[s1] lwl $t11,1($i3) # Te3[s2] lwr $t8,0($i0) # Te3[s3] - lwr $t9,0($i1) # Te3[s0] - lwr $t10,0($i2) # Te3[s1] - lwr $t11,0($i3) # Te3[s2] - _xtr $i0,$s0,24-2 + lwr $t9,0($i1) # Te3[s0] _xtr $i1,$s1,24-2 + lwr $t10,0($i2) # Te3[s1] _xtr $i2,$s2,24-2 + lwr $t11,0($i3) # Te3[s2] _xtr $i3,$s3,24-2 + and $i0,0x3fc and $i1,0x3fc and $i2,0x3fc @@ -201,24 +376,24 @@ $PTR_ADD $i1,$Tbl $PTR_ADD $i2,$Tbl $PTR_ADD $i3,$Tbl +#endif xor $t0,$t4 - xor $t1,$t5 - xor $t2,$t6 - xor $t3,$t7 lw $t4,0($i0) # Te0[s0>>24] + xor $t1,$t5 lw $t5,0($i1) # Te0[s1>>24] + xor $t2,$t6 lw $t6,0($i2) # Te0[s2>>24] + xor $t3,$t7 lw $t7,0($i3) # Te0[s3>>24] - lw $s0,0($key0) - lw $s1,4($key0) - lw $s2,8($key0) - lw $s3,12($key0) - xor $t0,$t8 + lw $s0,0($key0) xor $t1,$t9 + lw $s1,4($key0) xor $t2,$t10 + lw $s2,8($key0) xor $t3,$t11 + lw $s3,12($key0) xor $t0,$t4 xor $t1,$t5 @@ -234,6 +409,7 @@ .set noreorder bnez $cnt,.Loop_enc _xtr $i0,$s1,16-2 +#endif .set reorder _xtr $i1,$s2,16-2 @@ -248,14 +424,14 @@ $PTR_ADD $i2,$Tbl $PTR_ADD $i3,$Tbl lbu $t0,2($i0) # Te4[s1>>16] - lbu $t1,2($i1) # Te4[s2>>16] - lbu $t2,2($i2) # Te4[s3>>16] - lbu $t3,2($i3) # Te4[s0>>16] - _xtr $i0,$s2,8-2 + lbu $t1,2($i1) # Te4[s2>>16] _xtr $i1,$s3,8-2 + lbu $t2,2($i2) # Te4[s3>>16] _xtr $i2,$s0,8-2 + lbu $t3,2($i3) # Te4[s0>>16] _xtr $i3,$s1,8-2 + and $i0,0x3fc and $i1,0x3fc and $i2,0x3fc @@ -264,15 +440,44 @@ $PTR_ADD $i1,$Tbl $PTR_ADD $i2,$Tbl $PTR_ADD $i3,$Tbl +#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2) +# if defined(_MIPSEL) lbu $t4,2($i0) # Te4[s2>>8] + $PTR_INS $i0,$s0,2,8 lbu $t5,2($i1) # Te4[s3>>8] + $PTR_INS $i1,$s1,2,8 lbu $t6,2($i2) # Te4[s0>>8] + $PTR_INS $i2,$s2,2,8 lbu $t7,2($i3) # Te4[s1>>8] + $PTR_INS $i3,$s3,2,8 + lbu $t8,2($i0) # Te4[s0>>24] + _xtr $i0,$s3,0-2 + lbu $t9,2($i1) # Te4[s1>>24] + _xtr $i1,$s0,0-2 + lbu $t10,2($i2) # Te4[s2>>24] + _xtr $i2,$s1,0-2 + lbu $t11,2($i3) # Te4[s3>>24] + _xtr $i3,$s2,0-2 + + and $i0,0x3fc + and $i1,0x3fc + and $i2,0x3fc + and $i3,0x3fc + $PTR_ADD $i0,$Tbl + $PTR_ADD $i1,$Tbl + $PTR_ADD $i2,$Tbl + $PTR_ADD $i3,$Tbl +# else + lbu $t4,2($i0) # Te4[s2>>8] _xtr $i0,$s0,24-2 + lbu $t5,2($i1) # Te4[s3>>8] _xtr $i1,$s1,24-2 + lbu $t6,2($i2) # Te4[s0>>8] _xtr $i2,$s2,24-2 + lbu $t7,2($i3) # Te4[s1>>8] _xtr $i3,$s3,24-2 + and $i0,0x3fc and $i1,0x3fc and $i2,0x3fc @@ -282,18 +487,76 @@ $PTR_ADD $i2,$Tbl $PTR_ADD $i3,$Tbl lbu $t8,2($i0) # Te4[s0>>24] + $PTR_INS $i0,$s3,2,8 lbu $t9,2($i1) # Te4[s1>>24] + $PTR_INS $i1,$s0,2,8 lbu $t10,2($i2) # Te4[s2>>24] + $PTR_INS $i2,$s1,2,8 lbu $t11,2($i3) # Te4[s3>>24] + $PTR_INS $i3,$s2,2,8 +# endif + _ins $t0,16 + _ins $t1,16 + _ins $t2,16 + _ins $t3,16 + _ins2 $t0,$t4,8 + lbu $t4,2($i0) # Te4[s3] + _ins2 $t1,$t5,8 + lbu $t5,2($i1) # Te4[s0] + _ins2 $t2,$t6,8 + lbu $t6,2($i2) # Te4[s1] + _ins2 $t3,$t7,8 + lbu $t7,2($i3) # Te4[s2] + + _ins2 $t0,$t8,24 + lw $s0,0($key0) + _ins2 $t1,$t9,24 + lw $s1,4($key0) + _ins2 $t2,$t10,24 + lw $s2,8($key0) + _ins2 $t3,$t11,24 + lw $s3,12($key0) + + _ins2 $t0,$t4,0 + _ins2 $t1,$t5,0 + _ins2 $t2,$t6,0 + _ins2 $t3,$t7,0 +#else + lbu $t4,2($i0) # Te4[s2>>8] + _xtr $i0,$s0,24-2 + lbu $t5,2($i1) # Te4[s3>>8] + _xtr $i1,$s1,24-2 + lbu $t6,2($i2) # Te4[s0>>8] + _xtr $i2,$s2,24-2 + lbu $t7,2($i3) # Te4[s1>>8] + _xtr $i3,$s3,24-2 + + and $i0,0x3fc + and $i1,0x3fc + and $i2,0x3fc + and $i3,0x3fc + $PTR_ADD $i0,$Tbl + $PTR_ADD $i1,$Tbl + $PTR_ADD $i2,$Tbl + $PTR_ADD $i3,$Tbl + lbu $t8,2($i0) # Te4[s0>>24] _xtr $i0,$s3,0-2 + lbu $t9,2($i1) # Te4[s1>>24] _xtr $i1,$s0,0-2 + lbu $t10,2($i2) # Te4[s2>>24] _xtr $i2,$s1,0-2 + lbu $t11,2($i3) # Te4[s3>>24] _xtr $i3,$s2,0-2 + and $i0,0x3fc and $i1,0x3fc and $i2,0x3fc and $i3,0x3fc + $PTR_ADD $i0,$Tbl + $PTR_ADD $i1,$Tbl + $PTR_ADD $i2,$Tbl + $PTR_ADD $i3,$Tbl _ins $t0,16 _ins $t1,16 @@ -306,27 +569,21 @@ _ins $t7,8 xor $t0,$t4 - xor $t1,$t5 - xor $t2,$t6 - xor $t3,$t7 - - $PTR_ADD $i0,$Tbl - $PTR_ADD $i1,$Tbl - $PTR_ADD $i2,$Tbl - $PTR_ADD $i3,$Tbl lbu $t4,2($i0) # Te4[s3] + xor $t1,$t5 lbu $t5,2($i1) # Te4[s0] + xor $t2,$t6 lbu $t6,2($i2) # Te4[s1] + xor $t3,$t7 lbu $t7,2($i3) # Te4[s2] _ins $t8,24 - _ins $t9,24 - _ins $t10,24 - _ins $t11,24 - lw $s0,0($key0) + _ins $t9,24 lw $s1,4($key0) + _ins $t10,24 lw $s2,8($key0) + _ins $t11,24 lw $s3,12($key0) xor $t0,$t8 @@ -343,7 +600,7 @@ xor $t1,$t5 xor $t2,$t6 xor $t3,$t7 - +#endif xor $s0,$t0 xor $s1,$t1 xor $s2,$t2 @@ -455,6 +712,89 @@ xor $s3,$t3 sub $cnt,1 +#if defined(__mips_smartmips) + ext $i0,$s3,16,8 +.Loop_dec: + ext $i1,$s0,16,8 + ext $i2,$s1,16,8 + ext $i3,$s2,16,8 + lwxs $t0,$i0($Tbl) # Td1[s3>>16] + ext $i0,$s2,8,8 + lwxs $t1,$i1($Tbl) # Td1[s0>>16] + ext $i1,$s3,8,8 + lwxs $t2,$i2($Tbl) # Td1[s1>>16] + ext $i2,$s0,8,8 + lwxs $t3,$i3($Tbl) # Td1[s2>>16] + ext $i3,$s1,8,8 + + lwxs $t4,$i0($Tbl) # Td2[s2>>8] + ext $i0,$s1,0,8 + lwxs $t5,$i1($Tbl) # Td2[s3>>8] + ext $i1,$s2,0,8 + lwxs $t6,$i2($Tbl) # Td2[s0>>8] + ext $i2,$s3,0,8 + lwxs $t7,$i3($Tbl) # Td2[s1>>8] + ext $i3,$s0,0,8 + + lwxs $t8,$i0($Tbl) # Td3[s1] + ext $i0,$s0,24,8 + lwxs $t9,$i1($Tbl) # Td3[s2] + ext $i1,$s1,24,8 + lwxs $t10,$i2($Tbl) # Td3[s3] + ext $i2,$s2,24,8 + lwxs $t11,$i3($Tbl) # Td3[s0] + ext $i3,$s3,24,8 + + rotr $t0,$t0,8 + rotr $t1,$t1,8 + rotr $t2,$t2,8 + rotr $t3,$t3,8 + + rotr $t4,$t4,16 + rotr $t5,$t5,16 + rotr $t6,$t6,16 + rotr $t7,$t7,16 + + xor $t0,$t4 + lwxs $t4,$i0($Tbl) # Td0[s0>>24] + xor $t1,$t5 + lwxs $t5,$i1($Tbl) # Td0[s1>>24] + xor $t2,$t6 + lwxs $t6,$i2($Tbl) # Td0[s2>>24] + xor $t3,$t7 + lwxs $t7,$i3($Tbl) # Td0[s3>>24] + + rotr $t8,$t8,24 + lw $s0,0($key0) + rotr $t9,$t9,24 + lw $s1,4($key0) + rotr $t10,$t10,24 + lw $s2,8($key0) + rotr $t11,$t11,24 + lw $s3,12($key0) + + xor $t0,$t8 + xor $t1,$t9 + xor $t2,$t10 + xor $t3,$t11 + + xor $t0,$t4 + xor $t1,$t5 + xor $t2,$t6 + xor $t3,$t7 + + sub $cnt,1 + $PTR_ADD $key0,16 + xor $s0,$t0 + xor $s1,$t1 + xor $s2,$t2 + xor $s3,$t3 + .set noreorder + bnez $cnt,.Loop_dec + ext $i0,$s3,16,8 + + _xtr $i0,$s3,16-2 +#else _xtr $i0,$s3,16-2 .Loop_dec: _xtr $i1,$s0,16-2 @@ -468,19 +808,88 @@ $PTR_ADD $i1,$Tbl $PTR_ADD $i2,$Tbl $PTR_ADD $i3,$Tbl +#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2) + lw $t0,0($i0) # Td1[s3>>16] + _xtr $i0,$s2,8-2 + lw $t1,0($i1) # Td1[s0>>16] + _xtr $i1,$s3,8-2 + lw $t2,0($i2) # Td1[s1>>16] + _xtr $i2,$s0,8-2 + lw $t3,0($i3) # Td1[s2>>16] + _xtr $i3,$s1,8-2 +#else lwl $t0,3($i0) # Td1[s3>>16] lwl $t1,3($i1) # Td1[s0>>16] lwl $t2,3($i2) # Td1[s1>>16] lwl $t3,3($i3) # Td1[s2>>16] lwr $t0,2($i0) # Td1[s3>>16] - lwr $t1,2($i1) # Td1[s0>>16] - lwr $t2,2($i2) # Td1[s1>>16] - lwr $t3,2($i3) # Td1[s2>>16] - _xtr $i0,$s2,8-2 + lwr $t1,2($i1) # Td1[s0>>16] _xtr $i1,$s3,8-2 + lwr $t2,2($i2) # Td1[s1>>16] _xtr $i2,$s0,8-2 + lwr $t3,2($i3) # Td1[s2>>16] _xtr $i3,$s1,8-2 +#endif + + and $i0,0x3fc + and $i1,0x3fc + and $i2,0x3fc + and $i3,0x3fc + $PTR_ADD $i0,$Tbl + $PTR_ADD $i1,$Tbl + $PTR_ADD $i2,$Tbl + $PTR_ADD $i3,$Tbl +#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2) + rotr $t0,$t0,8 + rotr $t1,$t1,8 + rotr $t2,$t2,8 + rotr $t3,$t3,8 +# if defined(_MIPSEL) + lw $t4,0($i0) # Td2[s2>>8] + _xtr $i0,$s1,0-2 + lw $t5,0($i1) # Td2[s3>>8] + _xtr $i1,$s2,0-2 + lw $t6,0($i2) # Td2[s0>>8] + _xtr $i2,$s3,0-2 + lw $t7,0($i3) # Td2[s1>>8] + _xtr $i3,$s0,0-2 + + and $i0,0x3fc + and $i1,0x3fc + and $i2,0x3fc + and $i3,0x3fc + $PTR_ADD $i0,$Tbl + $PTR_ADD $i1,$Tbl + $PTR_ADD $i2,$Tbl + $PTR_ADD $i3,$Tbl + lw $t8,0($i0) # Td3[s1] + $PTR_INS $i0,$s0,2,8 + lw $t9,0($i1) # Td3[s2] + $PTR_INS $i1,$s1,2,8 + lw $t10,0($i2) # Td3[s3] + $PTR_INS $i2,$s2,2,8 + lw $t11,0($i3) # Td3[s0] + $PTR_INS $i3,$s3,2,8 +#else + lw $t4,0($i0) # Td2[s2>>8] + $PTR_INS $i0,$s1,2,8 + lw $t5,0($i1) # Td2[s3>>8] + $PTR_INS $i1,$s2,2,8 + lw $t6,0($i2) # Td2[s0>>8] + $PTR_INS $i2,$s3,2,8 + lw $t7,0($i3) # Td2[s1>>8] + $PTR_INS $i3,$s0,2,8 + + lw $t8,0($i0) # Td3[s1] + _xtr $i0,$s0,24-2 + lw $t9,0($i1) # Td3[s2] + _xtr $i1,$s1,24-2 + lw $t10,0($i2) # Td3[s3] + _xtr $i2,$s2,24-2 + lw $t11,0($i3) # Td3[s0] + _xtr $i3,$s3,24-2 + and $i0,0x3fc and $i1,0x3fc and $i2,0x3fc @@ -489,19 +898,30 @@ $PTR_ADD $i1,$Tbl $PTR_ADD $i2,$Tbl $PTR_ADD $i3,$Tbl +#endif + rotr $t4,$t4,16 + rotr $t5,$t5,16 + rotr $t6,$t6,16 + rotr $t7,$t7,16 + + rotr $t8,$t8,24 + rotr $t9,$t9,24 + rotr $t10,$t10,24 + rotr $t11,$t11,24 +#else lwl $t4,2($i0) # Td2[s2>>8] lwl $t5,2($i1) # Td2[s3>>8] lwl $t6,2($i2) # Td2[s0>>8] lwl $t7,2($i3) # Td2[s1>>8] lwr $t4,1($i0) # Td2[s2>>8] - lwr $t5,1($i1) # Td2[s3>>8] - lwr $t6,1($i2) # Td2[s0>>8] - lwr $t7,1($i3) # Td2[s1>>8] - _xtr $i0,$s1,0-2 + lwr $t5,1($i1) # Td2[s3>>8] _xtr $i1,$s2,0-2 + lwr $t6,1($i2) # Td2[s0>>8] _xtr $i2,$s3,0-2 + lwr $t7,1($i3) # Td2[s1>>8] _xtr $i3,$s0,0-2 + and $i0,0x3fc and $i1,0x3fc and $i2,0x3fc @@ -515,14 +935,14 @@ lwl $t10,1($i2) # Td3[s3] lwl $t11,1($i3) # Td3[s0] lwr $t8,0($i0) # Td3[s1] - lwr $t9,0($i1) # Td3[s2] - lwr $t10,0($i2) # Td3[s3] - lwr $t11,0($i3) # Td3[s0] - _xtr $i0,$s0,24-2 + lwr $t9,0($i1) # Td3[s2] _xtr $i1,$s1,24-2 + lwr $t10,0($i2) # Td3[s3] _xtr $i2,$s2,24-2 + lwr $t11,0($i3) # Td3[s0] _xtr $i3,$s3,24-2 + and $i0,0x3fc and $i1,0x3fc and $i2,0x3fc @@ -531,27 +951,25 @@ $PTR_ADD $i1,$Tbl $PTR_ADD $i2,$Tbl $PTR_ADD $i3,$Tbl +#endif xor $t0,$t4 - xor $t1,$t5 - xor $t2,$t6 - xor $t3,$t7 - - lw $t4,0($i0) # Td0[s0>>24] + xor $t1,$t5 lw $t5,0($i1) # Td0[s1>>24] + xor $t2,$t6 lw $t6,0($i2) # Td0[s2>>24] + xor $t3,$t7 lw $t7,0($i3) # Td0[s3>>24] - lw $s0,0($key0) - lw $s1,4($key0) - lw $s2,8($key0) - lw $s3,12($key0) - xor $t0,$t8 + lw $s0,0($key0) xor $t1,$t9 + lw $s1,4($key0) xor $t2,$t10 + lw $s2,8($key0) xor $t3,$t11 + lw $s3,12($key0) xor $t0,$t4 xor $t1,$t5 @@ -567,38 +985,39 @@ .set noreorder bnez $cnt,.Loop_dec _xtr $i0,$s3,16-2 +#endif .set reorder lw $t4,1024($Tbl) # prefetch Td4 - lw $t5,1024+32($Tbl) - lw $t6,1024+64($Tbl) - lw $t7,1024+96($Tbl) - lw $t8,1024+128($Tbl) - lw $t9,1024+160($Tbl) - lw $t10,1024+192($Tbl) - lw $t11,1024+224($Tbl) - _xtr $i0,$s3,16 + lw $t5,1024+32($Tbl) _xtr $i1,$s0,16 + lw $t6,1024+64($Tbl) _xtr $i2,$s1,16 + lw $t7,1024+96($Tbl) _xtr $i3,$s2,16 + lw $t8,1024+128($Tbl) and $i0,0xff + lw $t9,1024+160($Tbl) and $i1,0xff + lw $t10,1024+192($Tbl) and $i2,0xff + lw $t11,1024+224($Tbl) and $i3,0xff + $PTR_ADD $i0,$Tbl $PTR_ADD $i1,$Tbl $PTR_ADD $i2,$Tbl $PTR_ADD $i3,$Tbl lbu $t0,1024($i0) # Td4[s3>>16] - lbu $t1,1024($i1) # Td4[s0>>16] - lbu $t2,1024($i2) # Td4[s1>>16] - lbu $t3,1024($i3) # Td4[s2>>16] - _xtr $i0,$s2,8 + lbu $t1,1024($i1) # Td4[s0>>16] _xtr $i1,$s3,8 + lbu $t2,1024($i2) # Td4[s1>>16] _xtr $i2,$s0,8 + lbu $t3,1024($i3) # Td4[s2>>16] _xtr $i3,$s1,8 + and $i0,0xff and $i1,0xff and $i2,0xff @@ -607,29 +1026,108 @@ $PTR_ADD $i1,$Tbl $PTR_ADD $i2,$Tbl $PTR_ADD $i3,$Tbl +#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2) +# if defined(_MIPSEL) lbu $t4,1024($i0) # Td4[s2>>8] + $PTR_INS $i0,$s0,0,8 lbu $t5,1024($i1) # Td4[s3>>8] + $PTR_INS $i1,$s1,0,8 lbu $t6,1024($i2) # Td4[s0>>8] + $PTR_INS $i2,$s2,0,8 lbu $t7,1024($i3) # Td4[s1>>8] + $PTR_INS $i3,$s3,0,8 + lbu $t8,1024($i0) # Td4[s0>>24] + _xtr $i0,$s1,0 + lbu $t9,1024($i1) # Td4[s1>>24] + _xtr $i1,$s2,0 + lbu $t10,1024($i2) # Td4[s2>>24] + _xtr $i2,$s3,0 + lbu $t11,1024($i3) # Td4[s3>>24] + _xtr $i3,$s0,0 + + $PTR_ADD $i0,$Tbl + $PTR_ADD $i1,$Tbl + $PTR_ADD $i2,$Tbl + $PTR_ADD $i3,$Tbl +# else + lbu $t4,1024($i0) # Td4[s2>>8] _xtr $i0,$s0,24 + lbu $t5,1024($i1) # Td4[s3>>8] _xtr $i1,$s1,24 + lbu $t6,1024($i2) # Td4[s0>>8] _xtr $i2,$s2,24 + lbu $t7,1024($i3) # Td4[s1>>8] _xtr $i3,$s3,24 + $PTR_ADD $i0,$Tbl $PTR_ADD $i1,$Tbl $PTR_ADD $i2,$Tbl $PTR_ADD $i3,$Tbl lbu $t8,1024($i0) # Td4[s0>>24] + $PTR_INS $i0,$s1,0,8 lbu $t9,1024($i1) # Td4[s1>>24] + $PTR_INS $i1,$s2,0,8 lbu $t10,1024($i2) # Td4[s2>>24] + $PTR_INS $i2,$s3,0,8 lbu $t11,1024($i3) # Td4[s3>>24] + $PTR_INS $i3,$s0,0,8 +# endif + _ins $t0,16 + _ins $t1,16 + _ins $t2,16 + _ins $t3,16 + + _ins2 $t0,$t4,8 + lbu $t4,1024($i0) # Td4[s1] + _ins2 $t1,$t5,8 + lbu $t5,1024($i1) # Td4[s2] + _ins2 $t2,$t6,8 + lbu $t6,1024($i2) # Td4[s3] + _ins2 $t3,$t7,8 + lbu $t7,1024($i3) # Td4[s0] + + _ins2 $t0,$t8,24 + lw $s0,0($key0) + _ins2 $t1,$t9,24 + lw $s1,4($key0) + _ins2 $t2,$t10,24 + lw $s2,8($key0) + _ins2 $t3,$t11,24 + lw $s3,12($key0) + _ins2 $t0,$t4,0 + _ins2 $t1,$t5,0 + _ins2 $t2,$t6,0 + _ins2 $t3,$t7,0 +#else + lbu $t4,1024($i0) # Td4[s2>>8] + _xtr $i0,$s0,24 + lbu $t5,1024($i1) # Td4[s3>>8] + _xtr $i1,$s1,24 + lbu $t6,1024($i2) # Td4[s0>>8] + _xtr $i2,$s2,24 + lbu $t7,1024($i3) # Td4[s1>>8] + _xtr $i3,$s3,24 + + $PTR_ADD $i0,$Tbl + $PTR_ADD $i1,$Tbl + $PTR_ADD $i2,$Tbl + $PTR_ADD $i3,$Tbl + lbu $t8,1024($i0) # Td4[s0>>24] _xtr $i0,$s1,0 + lbu $t9,1024($i1) # Td4[s1>>24] _xtr $i1,$s2,0 + lbu $t10,1024($i2) # Td4[s2>>24] _xtr $i2,$s3,0 + lbu $t11,1024($i3) # Td4[s3>>24] _xtr $i3,$s0,0 + $PTR_ADD $i0,$Tbl + $PTR_ADD $i1,$Tbl + $PTR_ADD $i2,$Tbl + $PTR_ADD $i3,$Tbl + _ins $t0,16 _ins $t1,16 _ins $t2,16 @@ -641,44 +1139,38 @@ _ins $t7,8 xor $t0,$t4 - xor $t1,$t5 - xor $t2,$t6 - xor $t3,$t7 - - $PTR_ADD $i0,$Tbl - $PTR_ADD $i1,$Tbl - $PTR_ADD $i2,$Tbl - $PTR_ADD $i3,$Tbl lbu $t4,1024($i0) # Td4[s1] + xor $t1,$t5 lbu $t5,1024($i1) # Td4[s2] + xor $t2,$t6 lbu $t6,1024($i2) # Td4[s3] + xor $t3,$t7 lbu $t7,1024($i3) # Td4[s0] _ins $t8,24 - _ins $t9,24 - _ins $t10,24 - _ins $t11,24 - lw $s0,0($key0) + _ins $t9,24 lw $s1,4($key0) + _ins $t10,24 lw $s2,8($key0) + _ins $t11,24 lw $s3,12($key0) - _ins $t4,0 - _ins $t5,0 - _ins $t6,0 - _ins $t7,0 - - xor $t0,$t8 xor $t1,$t9 xor $t2,$t10 xor $t3,$t11 + _ins $t4,0 + _ins $t5,0 + _ins $t6,0 + _ins $t7,0 + xor $t0,$t4 xor $t1,$t5 xor $t2,$t6 xor $t3,$t7 +#endif xor $s0,$t0 xor $s1,$t1 @@ -791,7 +1283,7 @@ beqz $inp,.Lekey_done li $t0,-1 beqz $key,.Lekey_done - $PTR_ADD $rcon,$Tbl,1024+256 + $PTR_ADD $rcon,$Tbl,256 .set reorder lwl $rk0,0+$MSB($inp) # load 128 bits @@ -843,10 +1335,10 @@ $PTR_ADD $i1,$Tbl $PTR_ADD $i2,$Tbl $PTR_ADD $i3,$Tbl - lbu $i0,1024($i0) - lbu $i1,1024($i1) - lbu $i2,1024($i2) - lbu $i3,1024($i3) + lbu $i0,0($i0) + lbu $i1,0($i1) + lbu $i2,0($i2) + lbu $i3,0($i3) sw $rk0,0($key) sw $rk1,4($key) @@ -898,10 +1390,10 @@ $PTR_ADD $i1,$Tbl $PTR_ADD $i2,$Tbl $PTR_ADD $i3,$Tbl - lbu $i0,1024($i0) - lbu $i1,1024($i1) - lbu $i2,1024($i2) - lbu $i3,1024($i3) + lbu $i0,0($i0) + lbu $i1,0($i1) + lbu $i2,0($i2) + lbu $i3,0($i3) sw $rk0,0($key) sw $rk1,4($key) @@ -957,10 +1449,10 @@ $PTR_ADD $i1,$Tbl $PTR_ADD $i2,$Tbl $PTR_ADD $i3,$Tbl - lbu $i0,1024($i0) - lbu $i1,1024($i1) - lbu $i2,1024($i2) - lbu $i3,1024($i3) + lbu $i0,0($i0) + lbu $i1,0($i1) + lbu $i2,0($i2) + lbu $i3,0($i3) sw $rk0,0($key) sw $rk1,4($key) @@ -999,10 +1491,10 @@ $PTR_ADD $i1,$Tbl $PTR_ADD $i2,$Tbl $PTR_ADD $i3,$Tbl - lbu $i0,1024($i0) - lbu $i1,1024($i1) - lbu $i2,1024($i2) - lbu $i3,1024($i3) + lbu $i0,0($i0) + lbu $i1,0($i1) + lbu $i2,0($i2) + lbu $i3,0($i3) sll $i0,24 sll $i1,16 sll $i2,8 @@ -1064,7 +1556,7 @@ ___ $code.=<<___; .set reorder - la $Tbl,AES_Te # PIC-ified 'load address' + la $Tbl,AES_Te4 # PIC-ified 'load address' bal _mips_AES_set_encrypt_key @@ -1119,7 +1611,7 @@ ___ $code.=<<___; .set reorder - la $Tbl,AES_Te # PIC-ified 'load address' + la $Tbl,AES_Te4 # PIC-ified 'load address' bal _mips_AES_set_encrypt_key @@ -1190,6 +1682,16 @@ xor $tpb,$tp9,$tp2 xor $tpd,$tp9,$tp4 +#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2) + rotr $tp1,$tpd,16 + xor $tpe,$tp2 + rotr $tp2,$tp9,8 + xor $tpe,$tp1 + rotr $tp4,$tpb,24 + xor $tpe,$tp2 + lw $tp1,4($key) # modulo-scheduled + xor $tpe,$tp4 +#else _ror $tp1,$tpd,16 xor $tpe,$tp2 _ror $tp2,$tpd,-16 @@ -1204,6 +1706,7 @@ xor $tpe,$tp1 lw $tp1,4($key) # modulo-scheduled xor $tpe,$tp2 +#endif sub $cnt,1 sw $tpe,0($key) $PTR_ADD $key,4 @@ -1234,7 +1737,7 @@ # Tables are kept in endian-neutral manner $code.=<<___; .rdata -.align 6 +.align 10 AES_Te: .byte 0xc6,0x63,0x63,0xa5, 0xf8,0x7c,0x7c,0x84 # Te0 .byte 0xee,0x77,0x77,0x99, 0xf6,0x7b,0x7b,0x8d @@ -1365,46 +1868,6 @@ .byte 0x7b,0xb0,0xb0,0xcb, 0xa8,0x54,0x54,0xfc .byte 0x6d,0xbb,0xbb,0xd6, 0x2c,0x16,0x16,0x3a -.byte 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5 # Te4 -.byte 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76 -.byte 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0 -.byte 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0 -.byte 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc -.byte 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15 -.byte 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a -.byte 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75 -.byte 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0 -.byte 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84 -.byte 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b -.byte 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf -.byte 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85 -.byte 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8 -.byte 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5 -.byte 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2 -.byte 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17 -.byte 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73 -.byte 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88 -.byte 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb -.byte 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c -.byte 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79 -.byte 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9 -.byte 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08 -.byte 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6 -.byte 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a -.byte 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e -.byte 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e -.byte 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94 -.byte 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf -.byte 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68 -.byte 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16 - -.byte 0x01,0x00,0x00,0x00, 0x02,0x00,0x00,0x00 # rcon -.byte 0x04,0x00,0x00,0x00, 0x08,0x00,0x00,0x00 -.byte 0x10,0x00,0x00,0x00, 0x20,0x00,0x00,0x00 -.byte 0x40,0x00,0x00,0x00, 0x80,0x00,0x00,0x00 -.byte 0x1B,0x00,0x00,0x00, 0x36,0x00,0x00,0x00 - -.align 6 AES_Td: .byte 0x51,0xf4,0xa7,0x50, 0x7e,0x41,0x65,0x53 # Td0 .byte 0x1a,0x17,0xa4,0xc3, 0x3a,0x27,0x5e,0x96 @@ -1567,6 +2030,46 @@ .byte 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61 .byte 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26 .byte 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d + +AES_Te4: +.byte 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5 # Te4 +.byte 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76 +.byte 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0 +.byte 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0 +.byte 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc +.byte 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15 +.byte 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a +.byte 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75 +.byte 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0 +.byte 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84 +.byte 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b +.byte 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf +.byte 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85 +.byte 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8 +.byte 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5 +.byte 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2 +.byte 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17 +.byte 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73 +.byte 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88 +.byte 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb +.byte 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c +.byte 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79 +.byte 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9 +.byte 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08 +.byte 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6 +.byte 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a +.byte 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e +.byte 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e +.byte 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94 +.byte 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf +.byte 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68 +.byte 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16 + +.byte 0x01,0x00,0x00,0x00, 0x02,0x00,0x00,0x00 # rcon +.byte 0x04,0x00,0x00,0x00, 0x08,0x00,0x00,0x00 +.byte 0x10,0x00,0x00,0x00, 0x20,0x00,0x00,0x00 +.byte 0x40,0x00,0x00,0x00, 0x80,0x00,0x00,0x00 +.byte 0x1B,0x00,0x00,0x00, 0x36,0x00,0x00,0x00 ___ foreach (split("\n",$code)) { @@ -1583,6 +2086,9 @@ s/_ins\s+(\$[0-9]+),(\$[0-9]+),([0-9]+)/ sprintf("sll\t$1,$2,%d",$big_endian ? eval($3) : eval("24-$3"))/e or + s/_ins2\s+(\$[0-9]+),(\$[0-9]+),([0-9]+)/ + sprintf("ins\t$1,$2,%d,8",$big_endian ? eval($3) + : eval("24-$3"))/e or s/_ror\s+(\$[0-9]+),(\$[0-9]+),(\-?[0-9]+)/ sprintf("srl\t$1,$2,%d",$big_endian ? eval($3) : eval("$3*-1"))/e or @@ -1605,6 +2111,11 @@ sprintf("$1%d($3)",eval("$2-$2%4+($2%4+1)&3"))/e; } + if (!$big_endian) { + s/(rotr\s+\$[0-9]+,\$[0-9]+),([0-9]+)/sprintf("$1,%d",32-$2)/e; + s/(ext\s+\$[0-9]+,\$[0-9]+),([0-9]+),8/sprintf("$1,%d,8",24-$2)/e; + } + print $_,"\n"; } diff --git a/deps/openssl/openssl/crypto/aes/asm/aes-ppc.pl b/deps/openssl/openssl/crypto/aes/asm/aes-ppc.pl index 7c52cbe5f9fa9c..7a99fc3d0452e3 100644 --- a/deps/openssl/openssl/crypto/aes/asm/aes-ppc.pl +++ b/deps/openssl/openssl/crypto/aes/asm/aes-ppc.pl @@ -45,6 +45,8 @@ $PUSH ="stw"; } else { die "nonsense $flavour"; } +$LITTLE_ENDIAN = ($flavour=~/le$/) ? $SIZE_T : 0; + $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; ( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or ( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or @@ -68,7 +70,7 @@ () $Tbl0="r3"; $Tbl1="r6"; $Tbl2="r7"; -$Tbl3="r2"; +$Tbl3=$out; # stay away from "r2"; $out is offloaded to stack $s0="r8"; $s1="r9"; @@ -76,7 +78,7 @@ () $s3="r11"; $t0="r12"; -$t1="r13"; +$t1="r0"; # stay away from "r13"; $t2="r14"; $t3="r15"; @@ -100,9 +102,6 @@ () $acc14="r30"; $acc15="r31"; -# stay away from TLS pointer -if ($SIZE_T==8) { die if ($t1 ne "r13"); $t1="r0"; } -else { die if ($Tbl3 ne "r2"); $Tbl3=$t0; $t0="r0"; } $mask80=$Tbl2; $mask1b=$Tbl3; @@ -337,8 +336,7 @@ () $STU $sp,-$FRAME($sp) mflr r0 - $PUSH $toc,`$FRAME-$SIZE_T*20`($sp) - $PUSH r13,`$FRAME-$SIZE_T*19`($sp) + $PUSH $out,`$FRAME-$SIZE_T*19`($sp) $PUSH r14,`$FRAME-$SIZE_T*18`($sp) $PUSH r15,`$FRAME-$SIZE_T*17`($sp) $PUSH r16,`$FRAME-$SIZE_T*16`($sp) @@ -365,16 +363,61 @@ () bne Lenc_unaligned Lenc_unaligned_ok: +___ +$code.=<<___ if (!$LITTLE_ENDIAN); lwz $s0,0($inp) lwz $s1,4($inp) lwz $s2,8($inp) lwz $s3,12($inp) +___ +$code.=<<___ if ($LITTLE_ENDIAN); + lwz $t0,0($inp) + lwz $t1,4($inp) + lwz $t2,8($inp) + lwz $t3,12($inp) + rotlwi $s0,$t0,8 + rotlwi $s1,$t1,8 + rotlwi $s2,$t2,8 + rotlwi $s3,$t3,8 + rlwimi $s0,$t0,24,0,7 + rlwimi $s1,$t1,24,0,7 + rlwimi $s2,$t2,24,0,7 + rlwimi $s3,$t3,24,0,7 + rlwimi $s0,$t0,24,16,23 + rlwimi $s1,$t1,24,16,23 + rlwimi $s2,$t2,24,16,23 + rlwimi $s3,$t3,24,16,23 +___ +$code.=<<___; bl LAES_Te bl Lppc_AES_encrypt_compact + $POP $out,`$FRAME-$SIZE_T*19`($sp) +___ +$code.=<<___ if ($LITTLE_ENDIAN); + rotlwi $t0,$s0,8 + rotlwi $t1,$s1,8 + rotlwi $t2,$s2,8 + rotlwi $t3,$s3,8 + rlwimi $t0,$s0,24,0,7 + rlwimi $t1,$s1,24,0,7 + rlwimi $t2,$s2,24,0,7 + rlwimi $t3,$s3,24,0,7 + rlwimi $t0,$s0,24,16,23 + rlwimi $t1,$s1,24,16,23 + rlwimi $t2,$s2,24,16,23 + rlwimi $t3,$s3,24,16,23 + stw $t0,0($out) + stw $t1,4($out) + stw $t2,8($out) + stw $t3,12($out) +___ +$code.=<<___ if (!$LITTLE_ENDIAN); stw $s0,0($out) stw $s1,4($out) stw $s2,8($out) stw $s3,12($out) +___ +$code.=<<___; b Lenc_done Lenc_unaligned: @@ -417,6 +460,7 @@ () bl LAES_Te bl Lppc_AES_encrypt_compact + $POP $out,`$FRAME-$SIZE_T*19`($sp) extrwi $acc00,$s0,8,0 extrwi $acc01,$s0,8,8 @@ -449,8 +493,6 @@ () Lenc_done: $POP r0,`$FRAME+$LRSAVE`($sp) - $POP $toc,`$FRAME-$SIZE_T*20`($sp) - $POP r13,`$FRAME-$SIZE_T*19`($sp) $POP r14,`$FRAME-$SIZE_T*18`($sp) $POP r15,`$FRAME-$SIZE_T*17`($sp) $POP r16,`$FRAME-$SIZE_T*16`($sp) @@ -764,6 +806,7 @@ () blr .long 0 .byte 0,12,0x14,0,0,0,0,0 +.size .AES_encrypt,.-.AES_encrypt .globl .AES_decrypt .align 7 @@ -771,8 +814,7 @@ () $STU $sp,-$FRAME($sp) mflr r0 - $PUSH $toc,`$FRAME-$SIZE_T*20`($sp) - $PUSH r13,`$FRAME-$SIZE_T*19`($sp) + $PUSH $out,`$FRAME-$SIZE_T*19`($sp) $PUSH r14,`$FRAME-$SIZE_T*18`($sp) $PUSH r15,`$FRAME-$SIZE_T*17`($sp) $PUSH r16,`$FRAME-$SIZE_T*16`($sp) @@ -799,16 +841,61 @@ () bne Ldec_unaligned Ldec_unaligned_ok: +___ +$code.=<<___ if (!$LITTLE_ENDIAN); lwz $s0,0($inp) lwz $s1,4($inp) lwz $s2,8($inp) lwz $s3,12($inp) +___ +$code.=<<___ if ($LITTLE_ENDIAN); + lwz $t0,0($inp) + lwz $t1,4($inp) + lwz $t2,8($inp) + lwz $t3,12($inp) + rotlwi $s0,$t0,8 + rotlwi $s1,$t1,8 + rotlwi $s2,$t2,8 + rotlwi $s3,$t3,8 + rlwimi $s0,$t0,24,0,7 + rlwimi $s1,$t1,24,0,7 + rlwimi $s2,$t2,24,0,7 + rlwimi $s3,$t3,24,0,7 + rlwimi $s0,$t0,24,16,23 + rlwimi $s1,$t1,24,16,23 + rlwimi $s2,$t2,24,16,23 + rlwimi $s3,$t3,24,16,23 +___ +$code.=<<___; bl LAES_Td bl Lppc_AES_decrypt_compact + $POP $out,`$FRAME-$SIZE_T*19`($sp) +___ +$code.=<<___ if ($LITTLE_ENDIAN); + rotlwi $t0,$s0,8 + rotlwi $t1,$s1,8 + rotlwi $t2,$s2,8 + rotlwi $t3,$s3,8 + rlwimi $t0,$s0,24,0,7 + rlwimi $t1,$s1,24,0,7 + rlwimi $t2,$s2,24,0,7 + rlwimi $t3,$s3,24,0,7 + rlwimi $t0,$s0,24,16,23 + rlwimi $t1,$s1,24,16,23 + rlwimi $t2,$s2,24,16,23 + rlwimi $t3,$s3,24,16,23 + stw $t0,0($out) + stw $t1,4($out) + stw $t2,8($out) + stw $t3,12($out) +___ +$code.=<<___ if (!$LITTLE_ENDIAN); stw $s0,0($out) stw $s1,4($out) stw $s2,8($out) stw $s3,12($out) +___ +$code.=<<___; b Ldec_done Ldec_unaligned: @@ -851,6 +938,7 @@ () bl LAES_Td bl Lppc_AES_decrypt_compact + $POP $out,`$FRAME-$SIZE_T*19`($sp) extrwi $acc00,$s0,8,0 extrwi $acc01,$s0,8,8 @@ -883,8 +971,6 @@ () Ldec_done: $POP r0,`$FRAME+$LRSAVE`($sp) - $POP $toc,`$FRAME-$SIZE_T*20`($sp) - $POP r13,`$FRAME-$SIZE_T*19`($sp) $POP r14,`$FRAME-$SIZE_T*18`($sp) $POP r15,`$FRAME-$SIZE_T*17`($sp) $POP r16,`$FRAME-$SIZE_T*16`($sp) @@ -1355,6 +1441,7 @@ () blr .long 0 .byte 0,12,0x14,0,0,0,0,0 +.size .AES_decrypt,.-.AES_decrypt .asciz "AES for PPC, CRYPTOGAMS by " .align 7 diff --git a/deps/openssl/openssl/crypto/aes/asm/aes-x86_64.pl b/deps/openssl/openssl/crypto/aes/asm/aes-x86_64.pl index 34cbb5d844299f..47f416375d1eb4 100755 --- a/deps/openssl/openssl/crypto/aes/asm/aes-x86_64.pl +++ b/deps/openssl/openssl/crypto/aes/asm/aes-x86_64.pl @@ -19,9 +19,10 @@ # Performance in number of cycles per processed byte for 128-bit key: # # ECB encrypt ECB decrypt CBC large chunk -# AMD64 33 41 13.0 -# EM64T 38 59 18.6(*) -# Core 2 30 43 14.5(*) +# AMD64 33 43 13.0 +# EM64T 38 56 18.6(*) +# Core 2 30 42 14.5(*) +# Atom 65 86 32.1(*) # # (*) with hyper-threading off @@ -366,68 +367,66 @@ () movzb `&lo("$s0")`,$t0 movzb `&lo("$s1")`,$t1 movzb `&lo("$s2")`,$t2 - movzb ($sbox,$t0,1),$t0 - movzb ($sbox,$t1,1),$t1 - movzb ($sbox,$t2,1),$t2 - movzb `&lo("$s3")`,$t3 movzb `&hi("$s1")`,$acc0 movzb `&hi("$s2")`,$acc1 + shr \$16,$s2 + movzb `&hi("$s3")`,$acc2 + movzb ($sbox,$t0,1),$t0 + movzb ($sbox,$t1,1),$t1 + movzb ($sbox,$t2,1),$t2 movzb ($sbox,$t3,1),$t3 - movzb ($sbox,$acc0,1),$t4 #$t0 - movzb ($sbox,$acc1,1),$t5 #$t1 - movzb `&hi("$s3")`,$acc2 + movzb ($sbox,$acc0,1),$t4 #$t0 movzb `&hi("$s0")`,$acc0 - shr \$16,$s2 + movzb ($sbox,$acc1,1),$t5 #$t1 + movzb `&lo("$s2")`,$acc1 movzb ($sbox,$acc2,1),$acc2 #$t2 movzb ($sbox,$acc0,1),$acc0 #$t3 - shr \$16,$s3 - movzb `&lo("$s2")`,$acc1 shl \$8,$t4 + shr \$16,$s3 shl \$8,$t5 - movzb ($sbox,$acc1,1),$acc1 #$t0 xor $t4,$t0 - xor $t5,$t1 - - movzb `&lo("$s3")`,$t4 shr \$16,$s0 + movzb `&lo("$s3")`,$t4 shr \$16,$s1 - movzb `&lo("$s0")`,$t5 + xor $t5,$t1 shl \$8,$acc2 - shl \$8,$acc0 - movzb ($sbox,$t4,1),$t4 #$t1 - movzb ($sbox,$t5,1),$t5 #$t2 + movzb `&lo("$s0")`,$t5 + movzb ($sbox,$acc1,1),$acc1 #$t0 xor $acc2,$t2 - xor $acc0,$t3 + shl \$8,$acc0 movzb `&lo("$s1")`,$acc2 - movzb `&hi("$s3")`,$acc0 shl \$16,$acc1 - movzb ($sbox,$acc2,1),$acc2 #$t3 - movzb ($sbox,$acc0,1),$acc0 #$t0 + xor $acc0,$t3 + movzb ($sbox,$t4,1),$t4 #$t1 + movzb `&hi("$s3")`,$acc0 + movzb ($sbox,$t5,1),$t5 #$t2 xor $acc1,$t0 - movzb `&hi("$s0")`,$acc1 shr \$8,$s2 + movzb `&hi("$s0")`,$acc1 + shl \$16,$t4 shr \$8,$s1 + shl \$16,$t5 + xor $t4,$t1 + movzb ($sbox,$acc2,1),$acc2 #$t3 + movzb ($sbox,$acc0,1),$acc0 #$t0 movzb ($sbox,$acc1,1),$acc1 #$t1 movzb ($sbox,$s2,1),$s3 #$t3 movzb ($sbox,$s1,1),$s2 #$t2 - shl \$16,$t4 - shl \$16,$t5 + shl \$16,$acc2 - xor $t4,$t1 xor $t5,$t2 - xor $acc2,$t3 - shl \$24,$acc0 + xor $acc2,$t3 shl \$24,$acc1 - shl \$24,$s3 xor $acc0,$t0 - shl \$24,$s2 + shl \$24,$s3 xor $acc1,$t1 + shl \$24,$s2 mov $t0,$s0 mov $t1,$s1 xor $t2,$s2 @@ -466,12 +465,12 @@ () { my ($t3,$r20,$r21)=($acc2,"%r8d","%r9d"); $code.=<<___; - mov $s0,$acc0 - mov $s1,$acc1 - and \$0x80808080,$acc0 - and \$0x80808080,$acc1 - mov $acc0,$t0 - mov $acc1,$t1 + mov \$0x80808080,$t0 + mov \$0x80808080,$t1 + and $s0,$t0 + and $s1,$t1 + mov $t0,$acc0 + mov $t1,$acc1 shr \$7,$t0 lea ($s0,$s0),$r20 shr \$7,$t1 @@ -489,25 +488,25 @@ () xor $r20,$s0 xor $r21,$s1 - mov $s2,$acc0 - mov $s3,$acc1 + mov \$0x80808080,$t2 rol \$24,$s0 + mov \$0x80808080,$t3 rol \$24,$s1 - and \$0x80808080,$acc0 - and \$0x80808080,$acc1 + and $s2,$t2 + and $s3,$t3 xor $r20,$s0 xor $r21,$s1 - mov $acc0,$t2 - mov $acc1,$t3 + mov $t2,$acc0 ror \$16,$t0 + mov $t3,$acc1 ror \$16,$t1 - shr \$7,$t2 lea ($s2,$s2),$r20 + shr \$7,$t2 xor $t0,$s0 - xor $t1,$s1 shr \$7,$t3 - lea ($s3,$s3),$r21 + xor $t1,$s1 ror \$8,$t0 + lea ($s3,$s3),$r21 ror \$8,$t1 sub $t2,$acc0 sub $t3,$acc1 @@ -523,23 +522,23 @@ () xor $acc0,$r20 xor $acc1,$r21 + ror \$16,$t2 xor $r20,$s2 + ror \$16,$t3 xor $r21,$s3 rol \$24,$s2 + mov 0($sbox),$acc0 # prefetch Te4 rol \$24,$s3 xor $r20,$s2 - xor $r21,$s3 - mov 0($sbox),$acc0 # prefetch Te4 - ror \$16,$t2 - ror \$16,$t3 mov 64($sbox),$acc1 - xor $t2,$s2 - xor $t3,$s3 + xor $r21,$s3 mov 128($sbox),$r20 + xor $t2,$s2 ror \$8,$t2 + xor $t3,$s3 ror \$8,$t3 - mov 192($sbox),$r21 xor $t2,$s2 + mov 192($sbox),$r21 xor $t3,$s3 ___ } @@ -936,70 +935,69 @@ () movzb `&lo("$s0")`,$t0 movzb `&lo("$s1")`,$t1 movzb `&lo("$s2")`,$t2 - movzb ($sbox,$t0,1),$t0 - movzb ($sbox,$t1,1),$t1 - movzb ($sbox,$t2,1),$t2 - movzb `&lo("$s3")`,$t3 movzb `&hi("$s3")`,$acc0 movzb `&hi("$s0")`,$acc1 + shr \$16,$s3 + movzb `&hi("$s1")`,$acc2 + movzb ($sbox,$t0,1),$t0 + movzb ($sbox,$t1,1),$t1 + movzb ($sbox,$t2,1),$t2 movzb ($sbox,$t3,1),$t3 - movzb ($sbox,$acc0,1),$t4 #$t0 - movzb ($sbox,$acc1,1),$t5 #$t1 - movzb `&hi("$s1")`,$acc2 + movzb ($sbox,$acc0,1),$t4 #$t0 movzb `&hi("$s2")`,$acc0 - shr \$16,$s2 + movzb ($sbox,$acc1,1),$t5 #$t1 movzb ($sbox,$acc2,1),$acc2 #$t2 movzb ($sbox,$acc0,1),$acc0 #$t3 - shr \$16,$s3 - movzb `&lo("$s2")`,$acc1 - shl \$8,$t4 + shr \$16,$s2 shl \$8,$t5 - movzb ($sbox,$acc1,1),$acc1 #$t0 - xor $t4,$t0 - xor $t5,$t1 - - movzb `&lo("$s3")`,$t4 + shl \$8,$t4 + movzb `&lo("$s2")`,$acc1 shr \$16,$s0 + xor $t4,$t0 shr \$16,$s1 - movzb `&lo("$s0")`,$t5 + movzb `&lo("$s3")`,$t4 + shl \$8,$acc2 + xor $t5,$t1 shl \$8,$acc0 - movzb ($sbox,$t4,1),$t4 #$t1 - movzb ($sbox,$t5,1),$t5 #$t2 + movzb `&lo("$s0")`,$t5 + movzb ($sbox,$acc1,1),$acc1 #$t0 xor $acc2,$t2 - xor $acc0,$t3 - movzb `&lo("$s1")`,$acc2 - movzb `&hi("$s1")`,$acc0 + shl \$16,$acc1 + xor $acc0,$t3 + movzb ($sbox,$t4,1),$t4 #$t1 + movzb `&hi("$s1")`,$acc0 movzb ($sbox,$acc2,1),$acc2 #$t3 - movzb ($sbox,$acc0,1),$acc0 #$t0 xor $acc1,$t0 - + movzb ($sbox,$t5,1),$t5 #$t2 movzb `&hi("$s2")`,$acc1 + + shl \$16,$acc2 shl \$16,$t4 shl \$16,$t5 - movzb ($sbox,$acc1,1),$s1 #$t1 + xor $acc2,$t3 + movzb `&hi("$s3")`,$acc2 xor $t4,$t1 + shr \$8,$s0 xor $t5,$t2 - movzb `&hi("$s3")`,$acc1 - shr \$8,$s0 - shl \$16,$acc2 - movzb ($sbox,$acc1,1),$s2 #$t2 + movzb ($sbox,$acc0,1),$acc0 #$t0 + movzb ($sbox,$acc1,1),$s1 #$t1 + movzb ($sbox,$acc2,1),$s2 #$t2 movzb ($sbox,$s0,1),$s3 #$t3 - xor $acc2,$t3 + mov $t0,$s0 shl \$24,$acc0 shl \$24,$s1 shl \$24,$s2 - xor $acc0,$t0 + xor $acc0,$s0 shl \$24,$s3 xor $t1,$s1 - mov $t0,$s0 xor $t2,$s2 xor $t3,$s3 ___ @@ -1014,12 +1012,12 @@ () my $prefetch = shift; $code.=<<___; - mov $tp10,$acc0 - mov $tp18,$acc8 - and $mask80,$acc0 - and $mask80,$acc8 - mov $acc0,$tp40 - mov $acc8,$tp48 + mov $mask80,$tp40 + mov $mask80,$tp48 + and $tp10,$tp40 + and $tp18,$tp48 + mov $tp40,$acc0 + mov $tp48,$acc8 shr \$7,$tp40 lea ($tp10,$tp10),$tp20 shr \$7,$tp48 @@ -1030,15 +1028,15 @@ () and $maskfe,$tp28 and $mask1b,$acc0 and $mask1b,$acc8 - xor $tp20,$acc0 - xor $tp28,$acc8 - mov $acc0,$tp20 - mov $acc8,$tp28 - - and $mask80,$acc0 - and $mask80,$acc8 - mov $acc0,$tp80 - mov $acc8,$tp88 + xor $acc0,$tp20 + xor $acc8,$tp28 + mov $mask80,$tp80 + mov $mask80,$tp88 + + and $tp20,$tp80 + and $tp28,$tp88 + mov $tp80,$acc0 + mov $tp88,$acc8 shr \$7,$tp80 lea ($tp20,$tp20),$tp40 shr \$7,$tp88 @@ -1049,15 +1047,15 @@ () and $maskfe,$tp48 and $mask1b,$acc0 and $mask1b,$acc8 - xor $tp40,$acc0 - xor $tp48,$acc8 - mov $acc0,$tp40 - mov $acc8,$tp48 - - and $mask80,$acc0 - and $mask80,$acc8 - mov $acc0,$tp80 - mov $acc8,$tp88 + xor $acc0,$tp40 + xor $acc8,$tp48 + mov $mask80,$tp80 + mov $mask80,$tp88 + + and $tp40,$tp80 + and $tp48,$tp88 + mov $tp80,$acc0 + mov $tp88,$acc8 shr \$7,$tp80 xor $tp10,$tp20 # tp2^=tp1 shr \$7,$tp88 @@ -1082,51 +1080,51 @@ () mov $tp10,$acc0 mov $tp18,$acc8 xor $tp80,$tp40 # tp4^tp1^=tp8 - xor $tp88,$tp48 # tp4^tp1^=tp8 shr \$32,$acc0 + xor $tp88,$tp48 # tp4^tp1^=tp8 shr \$32,$acc8 xor $tp20,$tp80 # tp8^=tp8^tp2^tp1=tp2^tp1 - xor $tp28,$tp88 # tp8^=tp8^tp2^tp1=tp2^tp1 rol \$8,`&LO("$tp10")` # ROTATE(tp1^tp8,8) + xor $tp28,$tp88 # tp8^=tp8^tp2^tp1=tp2^tp1 rol \$8,`&LO("$tp18")` # ROTATE(tp1^tp8,8) xor $tp40,$tp80 # tp2^tp1^=tp8^tp4^tp1=tp8^tp4^tp2 + rol \$8,`&LO("$acc0")` # ROTATE(tp1^tp8,8) xor $tp48,$tp88 # tp2^tp1^=tp8^tp4^tp1=tp8^tp4^tp2 - rol \$8,`&LO("$acc0")` # ROTATE(tp1^tp8,8) rol \$8,`&LO("$acc8")` # ROTATE(tp1^tp8,8) xor `&LO("$tp80")`,`&LO("$tp10")` - xor `&LO("$tp88")`,`&LO("$tp18")` shr \$32,$tp80 + xor `&LO("$tp88")`,`&LO("$tp18")` shr \$32,$tp88 xor `&LO("$tp80")`,`&LO("$acc0")` xor `&LO("$tp88")`,`&LO("$acc8")` mov $tp20,$tp80 - mov $tp28,$tp88 - shr \$32,$tp80 - shr \$32,$tp88 rol \$24,`&LO("$tp20")` # ROTATE(tp2^tp1^tp8,24) + mov $tp28,$tp88 rol \$24,`&LO("$tp28")` # ROTATE(tp2^tp1^tp8,24) - rol \$24,`&LO("$tp80")` # ROTATE(tp2^tp1^tp8,24) - rol \$24,`&LO("$tp88")` # ROTATE(tp2^tp1^tp8,24) + shr \$32,$tp80 xor `&LO("$tp20")`,`&LO("$tp10")` + shr \$32,$tp88 xor `&LO("$tp28")`,`&LO("$tp18")` + rol \$24,`&LO("$tp80")` # ROTATE(tp2^tp1^tp8,24) mov $tp40,$tp20 + rol \$24,`&LO("$tp88")` # ROTATE(tp2^tp1^tp8,24) mov $tp48,$tp28 + shr \$32,$tp20 xor `&LO("$tp80")`,`&LO("$acc0")` + shr \$32,$tp28 xor `&LO("$tp88")`,`&LO("$acc8")` `"mov 0($sbox),$mask80" if ($prefetch)` - shr \$32,$tp20 - shr \$32,$tp28 - `"mov 64($sbox),$maskfe" if ($prefetch)` rol \$16,`&LO("$tp40")` # ROTATE(tp4^tp1^tp8,16) + `"mov 64($sbox),$maskfe" if ($prefetch)` rol \$16,`&LO("$tp48")` # ROTATE(tp4^tp1^tp8,16) `"mov 128($sbox),$mask1b" if ($prefetch)` rol \$16,`&LO("$tp20")` # ROTATE(tp4^tp1^tp8,16) - rol \$16,`&LO("$tp28")` # ROTATE(tp4^tp1^tp8,16) `"mov 192($sbox),$tp80" if ($prefetch)` xor `&LO("$tp40")`,`&LO("$tp10")` + rol \$16,`&LO("$tp28")` # ROTATE(tp4^tp1^tp8,16) xor `&LO("$tp48")`,`&LO("$tp18")` `"mov 256($sbox),$tp88" if ($prefetch)` xor `&LO("$tp20")`,`&LO("$acc0")` @@ -1302,10 +1300,6 @@ () call _x86_64_AES_set_encrypt_key - mov 8(%rsp),%r15 - mov 16(%rsp),%r14 - mov 24(%rsp),%r13 - mov 32(%rsp),%r12 mov 40(%rsp),%rbp mov 48(%rsp),%rbx add \$56,%rsp diff --git a/deps/openssl/openssl/crypto/aes/asm/aesni-mb-x86_64.pl b/deps/openssl/openssl/crypto/aes/asm/aesni-mb-x86_64.pl new file mode 100644 index 00000000000000..33b1aed3c0b4c3 --- /dev/null +++ b/deps/openssl/openssl/crypto/aes/asm/aesni-mb-x86_64.pl @@ -0,0 +1,1395 @@ +#!/usr/bin/env perl + +# ==================================================================== +# Written by Andy Polyakov for the OpenSSL +# project. The module is, however, dual licensed under OpenSSL and +# CRYPTOGAMS licenses depending on where you obtain it. For further +# details see http://www.openssl.org/~appro/cryptogams/. +# ==================================================================== + +# Multi-buffer AES-NI procedures process several independent buffers +# in parallel by interleaving independent instructions. +# +# Cycles per byte for interleave factor 4: +# +# asymptotic measured +# --------------------------- +# Westmere 5.00/4=1.25 5.13/4=1.28 +# Atom 15.0/4=3.75 ?15.7/4=3.93 +# Sandy Bridge 5.06/4=1.27 5.18/4=1.29 +# Ivy Bridge 5.06/4=1.27 5.14/4=1.29 +# Haswell 4.44/4=1.11 4.44/4=1.11 +# Bulldozer 5.75/4=1.44 5.76/4=1.44 +# +# Cycles per byte for interleave factor 8 (not implemented for +# pre-AVX processors, where higher interleave factor incidentally +# doesn't result in improvement): +# +# asymptotic measured +# --------------------------- +# Sandy Bridge 5.06/8=0.64 7.10/8=0.89(*) +# Ivy Bridge 5.06/8=0.64 7.14/8=0.89(*) +# Haswell 5.00/8=0.63 5.00/8=0.63 +# Bulldozer 5.75/8=0.72 5.77/8=0.72 +# +# (*) Sandy/Ivy Bridge are known to handle high interleave factors +# suboptimally; + +$flavour = shift; +$output = shift; +if ($flavour =~ /\./) { $output = $flavour; undef $flavour; } + +$win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/); + +$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; +( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or +( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or +die "can't locate x86_64-xlate.pl"; + +$avx=0; + +if (`$ENV{CC} -Wa,-v -c -o /dev/null -x assembler /dev/null 2>&1` + =~ /GNU assembler version ([2-9]\.[0-9]+)/) { + $avx = ($1>=2.19) + ($1>=2.22); +} + +if (!$avx && $win64 && ($flavour =~ /nasm/ || $ENV{ASM} =~ /nasm/) && + `nasm -v 2>&1` =~ /NASM version ([2-9]\.[0-9]+)/) { + $avx = ($1>=2.09) + ($1>=2.10); +} + +if (!$avx && $win64 && ($flavour =~ /masm/ || $ENV{ASM} =~ /ml64/) && + `ml64 2>&1` =~ /Version ([0-9]+)\./) { + $avx = ($1>=10) + ($1>=11); +} + +if (!$avx && `$ENV{CC} -v 2>&1` =~ /(^clang version|based on LLVM) ([3-9]\.[0-9]+)/) { + $avx = ($2>=3.0) + ($2>3.0); +} + +open OUT,"| \"$^X\" $xlate $flavour $output"; +*STDOUT=*OUT; + +# void aesni_multi_cbc_encrypt ( +# struct { void *inp,*out; int blocks; double iv[2]; } inp[8]; +# const AES_KEY *key, +# int num); /* 1 or 2 */ +# +$inp="%rdi"; # 1st arg +$key="%rsi"; # 2nd arg +$num="%edx"; + +@inptr=map("%r$_",(8..11)); +@outptr=map("%r$_",(12..15)); + +($rndkey0,$rndkey1)=("%xmm0","%xmm1"); +@out=map("%xmm$_",(2..5)); +@inp=map("%xmm$_",(6..9)); +($counters,$mask,$zero)=map("%xmm$_",(10..12)); + +($rounds,$one,$sink,$offset)=("%eax","%ecx","%rbp","%rbx"); + +$code.=<<___; +.text + +.extern OPENSSL_ia32cap_P + +.globl aesni_multi_cbc_encrypt +.type aesni_multi_cbc_encrypt,\@function,3 +.align 32 +aesni_multi_cbc_encrypt: +___ +$code.=<<___ if ($avx); + cmp \$2,$num + jb .Lenc_non_avx + mov OPENSSL_ia32cap_P+4(%rip),%ecx + test \$`1<<28`,%ecx # AVX bit + jnz _avx_cbc_enc_shortcut + jmp .Lenc_non_avx +.align 16 +.Lenc_non_avx: +___ +$code.=<<___; + mov %rsp,%rax + push %rbx + push %rbp + push %r12 + push %r13 + push %r14 + push %r15 +___ +$code.=<<___ if ($win64); + lea -0xa8(%rsp),%rsp + movaps %xmm6,(%rsp) + movaps %xmm7,0x10(%rsp) + movaps %xmm8,0x20(%rsp) + movaps %xmm9,0x30(%rsp) + movaps %xmm10,0x40(%rsp) + movaps %xmm11,0x50(%rsp) + movaps %xmm12,0x60(%rsp) + movaps %xmm13,-0x68(%rax) # not used, saved to share se_handler + movaps %xmm14,-0x58(%rax) + movaps %xmm15,-0x48(%rax) +___ +$code.=<<___; + # stack layout + # + # +0 output sink + # +16 input sink [original %rsp and $num] + # +32 counters + + sub \$48,%rsp + and \$-64,%rsp + mov %rax,16(%rsp) # original %rsp + +.Lenc4x_body: + movdqu ($key),$zero # 0-round key + lea 0x78($key),$key # size optimization + lea 40*2($inp),$inp + +.Lenc4x_loop_grande: + mov $num,24(%rsp) # original $num + xor $num,$num +___ +for($i=0;$i<4;$i++) { + $code.=<<___; + mov `40*$i+16-40*2`($inp),$one # borrow $one for number of blocks + mov `40*$i+0-40*2`($inp),@inptr[$i] + cmp $num,$one + mov `40*$i+8-40*2`($inp),@outptr[$i] + cmovg $one,$num # find maximum + test $one,$one + movdqu `40*$i+24-40*2`($inp),@out[$i] # load IV + mov $one,`32+4*$i`(%rsp) # initialize counters + cmovle %rsp,@inptr[$i] # cancel input +___ +} +$code.=<<___; + test $num,$num + jz .Lenc4x_done + + movups 0x10-0x78($key),$rndkey1 + pxor $zero,@out[0] + movups 0x20-0x78($key),$rndkey0 + pxor $zero,@out[1] + mov 0xf0-0x78($key),$rounds + pxor $zero,@out[2] + movdqu (@inptr[0]),@inp[0] # load inputs + pxor $zero,@out[3] + movdqu (@inptr[1]),@inp[1] + pxor @inp[0],@out[0] + movdqu (@inptr[2]),@inp[2] + pxor @inp[1],@out[1] + movdqu (@inptr[3]),@inp[3] + pxor @inp[2],@out[2] + pxor @inp[3],@out[3] + movdqa 32(%rsp),$counters # load counters + xor $offset,$offset + jmp .Loop_enc4x + +.align 32 +.Loop_enc4x: + add \$16,$offset + lea 16(%rsp),$sink # sink pointer + mov \$1,$one # constant of 1 + sub $offset,$sink + + aesenc $rndkey1,@out[0] + prefetcht0 31(@inptr[0],$offset) # prefetch input + prefetcht0 31(@inptr[1],$offset) + aesenc $rndkey1,@out[1] + prefetcht0 31(@inptr[2],$offset) + prefetcht0 31(@inptr[2],$offset) + aesenc $rndkey1,@out[2] + aesenc $rndkey1,@out[3] + movups 0x30-0x78($key),$rndkey1 +___ +for($i=0;$i<4;$i++) { +my $rndkey = ($i&1) ? $rndkey1 : $rndkey0; +$code.=<<___; + cmp `32+4*$i`(%rsp),$one + aesenc $rndkey,@out[0] + aesenc $rndkey,@out[1] + aesenc $rndkey,@out[2] + cmovge $sink,@inptr[$i] # cancel input + cmovg $sink,@outptr[$i] # sink output + aesenc $rndkey,@out[3] + movups `0x40+16*$i-0x78`($key),$rndkey +___ +} +$code.=<<___; + movdqa $counters,$mask + aesenc $rndkey0,@out[0] + prefetcht0 15(@outptr[0],$offset) # prefetch output + prefetcht0 15(@outptr[1],$offset) + aesenc $rndkey0,@out[1] + prefetcht0 15(@outptr[2],$offset) + prefetcht0 15(@outptr[3],$offset) + aesenc $rndkey0,@out[2] + aesenc $rndkey0,@out[3] + movups 0x80-0x78($key),$rndkey0 + pxor $zero,$zero + + aesenc $rndkey1,@out[0] + pcmpgtd $zero,$mask + movdqu -0x78($key),$zero # reload 0-round key + aesenc $rndkey1,@out[1] + paddd $mask,$counters # decrement counters + movdqa $counters,32(%rsp) # update counters + aesenc $rndkey1,@out[2] + aesenc $rndkey1,@out[3] + movups 0x90-0x78($key),$rndkey1 + + cmp \$11,$rounds + + aesenc $rndkey0,@out[0] + aesenc $rndkey0,@out[1] + aesenc $rndkey0,@out[2] + aesenc $rndkey0,@out[3] + movups 0xa0-0x78($key),$rndkey0 + + jb .Lenc4x_tail + + aesenc $rndkey1,@out[0] + aesenc $rndkey1,@out[1] + aesenc $rndkey1,@out[2] + aesenc $rndkey1,@out[3] + movups 0xb0-0x78($key),$rndkey1 + + aesenc $rndkey0,@out[0] + aesenc $rndkey0,@out[1] + aesenc $rndkey0,@out[2] + aesenc $rndkey0,@out[3] + movups 0xc0-0x78($key),$rndkey0 + + je .Lenc4x_tail + + aesenc $rndkey1,@out[0] + aesenc $rndkey1,@out[1] + aesenc $rndkey1,@out[2] + aesenc $rndkey1,@out[3] + movups 0xd0-0x78($key),$rndkey1 + + aesenc $rndkey0,@out[0] + aesenc $rndkey0,@out[1] + aesenc $rndkey0,@out[2] + aesenc $rndkey0,@out[3] + movups 0xe0-0x78($key),$rndkey0 + jmp .Lenc4x_tail + +.align 32 +.Lenc4x_tail: + aesenc $rndkey1,@out[0] + aesenc $rndkey1,@out[1] + aesenc $rndkey1,@out[2] + aesenc $rndkey1,@out[3] + movdqu (@inptr[0],$offset),@inp[0] + movdqu 0x10-0x78($key),$rndkey1 + + aesenclast $rndkey0,@out[0] + movdqu (@inptr[1],$offset),@inp[1] + pxor $zero,@inp[0] + aesenclast $rndkey0,@out[1] + movdqu (@inptr[2],$offset),@inp[2] + pxor $zero,@inp[1] + aesenclast $rndkey0,@out[2] + movdqu (@inptr[3],$offset),@inp[3] + pxor $zero,@inp[2] + aesenclast $rndkey0,@out[3] + movdqu 0x20-0x78($key),$rndkey0 + pxor $zero,@inp[3] + + movups @out[0],-16(@outptr[0],$offset) + pxor @inp[0],@out[0] + movups @out[1],-16(@outptr[1],$offset) + pxor @inp[1],@out[1] + movups @out[2],-16(@outptr[2],$offset) + pxor @inp[2],@out[2] + movups @out[3],-16(@outptr[3],$offset) + pxor @inp[3],@out[3] + + dec $num + jnz .Loop_enc4x + + mov 16(%rsp),%rax # original %rsp + mov 24(%rsp),$num + + #pxor @inp[0],@out[0] + #pxor @inp[1],@out[1] + #movdqu @out[0],`40*0+24-40*2`($inp) # output iv FIX ME! + #pxor @inp[2],@out[2] + #movdqu @out[1],`40*1+24-40*2`($inp) + #pxor @inp[3],@out[3] + #movdqu @out[2],`40*2+24-40*2`($inp) # won't fix, let caller + #movdqu @out[3],`40*3+24-40*2`($inp) # figure this out... + + lea `40*4`($inp),$inp + dec $num + jnz .Lenc4x_loop_grande + +.Lenc4x_done: +___ +$code.=<<___ if ($win64); + movaps -0xd8(%rax),%xmm6 + movaps -0xc8(%rax),%xmm7 + movaps -0xb8(%rax),%xmm8 + movaps -0xa8(%rax),%xmm9 + movaps -0x98(%rax),%xmm10 + movaps -0x88(%rax),%xmm11 + movaps -0x78(%rax),%xmm12 + #movaps -0x68(%rax),%xmm13 + #movaps -0x58(%rax),%xmm14 + #movaps -0x48(%rax),%xmm15 +___ +$code.=<<___; + mov -48(%rax),%r15 + mov -40(%rax),%r14 + mov -32(%rax),%r13 + mov -24(%rax),%r12 + mov -16(%rax),%rbp + mov -8(%rax),%rbx + lea (%rax),%rsp +.Lenc4x_epilogue: + ret +.size aesni_multi_cbc_encrypt,.-aesni_multi_cbc_encrypt + +.globl aesni_multi_cbc_decrypt +.type aesni_multi_cbc_decrypt,\@function,3 +.align 32 +aesni_multi_cbc_decrypt: +___ +$code.=<<___ if ($avx); + cmp \$2,$num + jb .Ldec_non_avx + mov OPENSSL_ia32cap_P+4(%rip),%ecx + test \$`1<<28`,%ecx # AVX bit + jnz _avx_cbc_dec_shortcut + jmp .Ldec_non_avx +.align 16 +.Ldec_non_avx: +___ +$code.=<<___; + mov %rsp,%rax + push %rbx + push %rbp + push %r12 + push %r13 + push %r14 + push %r15 +___ +$code.=<<___ if ($win64); + lea -0xa8(%rsp),%rsp + movaps %xmm6,(%rsp) + movaps %xmm7,0x10(%rsp) + movaps %xmm8,0x20(%rsp) + movaps %xmm9,0x30(%rsp) + movaps %xmm10,0x40(%rsp) + movaps %xmm11,0x50(%rsp) + movaps %xmm12,0x60(%rsp) + movaps %xmm13,-0x68(%rax) # not used, saved to share se_handler + movaps %xmm14,-0x58(%rax) + movaps %xmm15,-0x48(%rax) +___ +$code.=<<___; + # stack layout + # + # +0 output sink + # +16 input sink [original %rsp and $num] + # +32 counters + + sub \$48,%rsp + and \$-64,%rsp + mov %rax,16(%rsp) # original %rsp + +.Ldec4x_body: + movdqu ($key),$zero # 0-round key + lea 0x78($key),$key # size optimization + lea 40*2($inp),$inp + +.Ldec4x_loop_grande: + mov $num,24(%rsp) # original $num + xor $num,$num +___ +for($i=0;$i<4;$i++) { + $code.=<<___; + mov `40*$i+16-40*2`($inp),$one # borrow $one for number of blocks + mov `40*$i+0-40*2`($inp),@inptr[$i] + cmp $num,$one + mov `40*$i+8-40*2`($inp),@outptr[$i] + cmovg $one,$num # find maximum + test $one,$one + movdqu `40*$i+24-40*2`($inp),@inp[$i] # load IV + mov $one,`32+4*$i`(%rsp) # initialize counters + cmovle %rsp,@inptr[$i] # cancel input +___ +} +$code.=<<___; + test $num,$num + jz .Ldec4x_done + + movups 0x10-0x78($key),$rndkey1 + movups 0x20-0x78($key),$rndkey0 + mov 0xf0-0x78($key),$rounds + movdqu (@inptr[0]),@out[0] # load inputs + movdqu (@inptr[1]),@out[1] + pxor $zero,@out[0] + movdqu (@inptr[2]),@out[2] + pxor $zero,@out[1] + movdqu (@inptr[3]),@out[3] + pxor $zero,@out[2] + pxor $zero,@out[3] + movdqa 32(%rsp),$counters # load counters + xor $offset,$offset + jmp .Loop_dec4x + +.align 32 +.Loop_dec4x: + add \$16,$offset + lea 16(%rsp),$sink # sink pointer + mov \$1,$one # constant of 1 + sub $offset,$sink + + aesdec $rndkey1,@out[0] + prefetcht0 31(@inptr[0],$offset) # prefetch input + prefetcht0 31(@inptr[1],$offset) + aesdec $rndkey1,@out[1] + prefetcht0 31(@inptr[2],$offset) + prefetcht0 31(@inptr[3],$offset) + aesdec $rndkey1,@out[2] + aesdec $rndkey1,@out[3] + movups 0x30-0x78($key),$rndkey1 +___ +for($i=0;$i<4;$i++) { +my $rndkey = ($i&1) ? $rndkey1 : $rndkey0; +$code.=<<___; + cmp `32+4*$i`(%rsp),$one + aesdec $rndkey,@out[0] + aesdec $rndkey,@out[1] + aesdec $rndkey,@out[2] + cmovge $sink,@inptr[$i] # cancel input + cmovg $sink,@outptr[$i] # sink output + aesdec $rndkey,@out[3] + movups `0x40+16*$i-0x78`($key),$rndkey +___ +} +$code.=<<___; + movdqa $counters,$mask + aesdec $rndkey0,@out[0] + prefetcht0 15(@outptr[0],$offset) # prefetch output + prefetcht0 15(@outptr[1],$offset) + aesdec $rndkey0,@out[1] + prefetcht0 15(@outptr[2],$offset) + prefetcht0 15(@outptr[3],$offset) + aesdec $rndkey0,@out[2] + aesdec $rndkey0,@out[3] + movups 0x80-0x78($key),$rndkey0 + pxor $zero,$zero + + aesdec $rndkey1,@out[0] + pcmpgtd $zero,$mask + movdqu -0x78($key),$zero # reload 0-round key + aesdec $rndkey1,@out[1] + paddd $mask,$counters # decrement counters + movdqa $counters,32(%rsp) # update counters + aesdec $rndkey1,@out[2] + aesdec $rndkey1,@out[3] + movups 0x90-0x78($key),$rndkey1 + + cmp \$11,$rounds + + aesdec $rndkey0,@out[0] + aesdec $rndkey0,@out[1] + aesdec $rndkey0,@out[2] + aesdec $rndkey0,@out[3] + movups 0xa0-0x78($key),$rndkey0 + + jb .Ldec4x_tail + + aesdec $rndkey1,@out[0] + aesdec $rndkey1,@out[1] + aesdec $rndkey1,@out[2] + aesdec $rndkey1,@out[3] + movups 0xb0-0x78($key),$rndkey1 + + aesdec $rndkey0,@out[0] + aesdec $rndkey0,@out[1] + aesdec $rndkey0,@out[2] + aesdec $rndkey0,@out[3] + movups 0xc0-0x78($key),$rndkey0 + + je .Ldec4x_tail + + aesdec $rndkey1,@out[0] + aesdec $rndkey1,@out[1] + aesdec $rndkey1,@out[2] + aesdec $rndkey1,@out[3] + movups 0xd0-0x78($key),$rndkey1 + + aesdec $rndkey0,@out[0] + aesdec $rndkey0,@out[1] + aesdec $rndkey0,@out[2] + aesdec $rndkey0,@out[3] + movups 0xe0-0x78($key),$rndkey0 + jmp .Ldec4x_tail + +.align 32 +.Ldec4x_tail: + aesdec $rndkey1,@out[0] + aesdec $rndkey1,@out[1] + aesdec $rndkey1,@out[2] + pxor $rndkey0,@inp[0] + pxor $rndkey0,@inp[1] + aesdec $rndkey1,@out[3] + movdqu 0x10-0x78($key),$rndkey1 + pxor $rndkey0,@inp[2] + pxor $rndkey0,@inp[3] + movdqu 0x20-0x78($key),$rndkey0 + + aesdeclast @inp[0],@out[0] + aesdeclast @inp[1],@out[1] + movdqu -16(@inptr[0],$offset),@inp[0] # load next IV + movdqu -16(@inptr[1],$offset),@inp[1] + aesdeclast @inp[2],@out[2] + aesdeclast @inp[3],@out[3] + movdqu -16(@inptr[2],$offset),@inp[2] + movdqu -16(@inptr[3],$offset),@inp[3] + + movups @out[0],-16(@outptr[0],$offset) + movdqu (@inptr[0],$offset),@out[0] + movups @out[1],-16(@outptr[1],$offset) + movdqu (@inptr[1],$offset),@out[1] + pxor $zero,@out[0] + movups @out[2],-16(@outptr[2],$offset) + movdqu (@inptr[2],$offset),@out[2] + pxor $zero,@out[1] + movups @out[3],-16(@outptr[3],$offset) + movdqu (@inptr[3],$offset),@out[3] + pxor $zero,@out[2] + pxor $zero,@out[3] + + dec $num + jnz .Loop_dec4x + + mov 16(%rsp),%rax # original %rsp + mov 24(%rsp),$num + + lea `40*4`($inp),$inp + dec $num + jnz .Ldec4x_loop_grande + +.Ldec4x_done: +___ +$code.=<<___ if ($win64); + movaps -0xd8(%rax),%xmm6 + movaps -0xc8(%rax),%xmm7 + movaps -0xb8(%rax),%xmm8 + movaps -0xa8(%rax),%xmm9 + movaps -0x98(%rax),%xmm10 + movaps -0x88(%rax),%xmm11 + movaps -0x78(%rax),%xmm12 + #movaps -0x68(%rax),%xmm13 + #movaps -0x58(%rax),%xmm14 + #movaps -0x48(%rax),%xmm15 +___ +$code.=<<___; + mov -48(%rax),%r15 + mov -40(%rax),%r14 + mov -32(%rax),%r13 + mov -24(%rax),%r12 + mov -16(%rax),%rbp + mov -8(%rax),%rbx + lea (%rax),%rsp +.Ldec4x_epilogue: + ret +.size aesni_multi_cbc_decrypt,.-aesni_multi_cbc_decrypt +___ + + if ($avx) {{{ +my @ptr=map("%r$_",(8..15)); +my $offload=$sink; + +my @out=map("%xmm$_",(2..9)); +my @inp=map("%xmm$_",(10..13)); +my ($counters,$zero)=("%xmm14","%xmm15"); + +$code.=<<___; +.type aesni_multi_cbc_encrypt_avx,\@function,3 +.align 32 +aesni_multi_cbc_encrypt_avx: +_avx_cbc_enc_shortcut: + mov %rsp,%rax + push %rbx + push %rbp + push %r12 + push %r13 + push %r14 + push %r15 +___ +$code.=<<___ if ($win64); + lea -0xa8(%rsp),%rsp + movaps %xmm6,(%rsp) + movaps %xmm7,0x10(%rsp) + movaps %xmm8,0x20(%rsp) + movaps %xmm9,0x30(%rsp) + movaps %xmm10,0x40(%rsp) + movaps %xmm11,0x50(%rsp) + movaps %xmm12,-0x78(%rax) + movaps %xmm13,-0x68(%rax) + movaps %xmm14,-0x58(%rax) + movaps %xmm15,-0x48(%rax) +___ +$code.=<<___; + # stack layout + # + # +0 output sink + # +16 input sink [original %rsp and $num] + # +32 counters + # +64 distances between inputs and outputs + # +128 off-load area for @inp[0..3] + + sub \$192,%rsp + and \$-128,%rsp + mov %rax,16(%rsp) # original %rsp + +.Lenc8x_body: + vzeroupper + vmovdqu ($key),$zero # 0-round key + lea 0x78($key),$key # size optimization + lea 40*4($inp),$inp + shr \$1,$num + +.Lenc8x_loop_grande: + #mov $num,24(%rsp) # original $num + xor $num,$num +___ +for($i=0;$i<8;$i++) { + my $temp = $i ? $offload : $offset; + $code.=<<___; + mov `40*$i+16-40*4`($inp),$one # borrow $one for number of blocks + mov `40*$i+0-40*4`($inp),@ptr[$i] # input pointer + cmp $num,$one + mov `40*$i+8-40*4`($inp),$temp # output pointer + cmovg $one,$num # find maximum + test $one,$one + vmovdqu `40*$i+24-40*4`($inp),@out[$i] # load IV + mov $one,`32+4*$i`(%rsp) # initialize counters + cmovle %rsp,@ptr[$i] # cancel input + sub @ptr[$i],$temp # distance between input and output + mov $temp,`64+8*$i`(%rsp) # initialize distances +___ +} +$code.=<<___; + test $num,$num + jz .Lenc8x_done + + vmovups 0x10-0x78($key),$rndkey1 + vmovups 0x20-0x78($key),$rndkey0 + mov 0xf0-0x78($key),$rounds + + vpxor (@ptr[0]),$zero,@inp[0] # load inputs and xor with 0-round + lea 128(%rsp),$offload # offload area + vpxor (@ptr[1]),$zero,@inp[1] + vpxor (@ptr[2]),$zero,@inp[2] + vpxor (@ptr[3]),$zero,@inp[3] + vpxor @inp[0],@out[0],@out[0] + vpxor (@ptr[4]),$zero,@inp[0] + vpxor @inp[1],@out[1],@out[1] + vpxor (@ptr[5]),$zero,@inp[1] + vpxor @inp[2],@out[2],@out[2] + vpxor (@ptr[6]),$zero,@inp[2] + vpxor @inp[3],@out[3],@out[3] + vpxor (@ptr[7]),$zero,@inp[3] + vpxor @inp[0],@out[4],@out[4] + mov \$1,$one # constant of 1 + vpxor @inp[1],@out[5],@out[5] + vpxor @inp[2],@out[6],@out[6] + vpxor @inp[3],@out[7],@out[7] + jmp .Loop_enc8x + +.align 32 +.Loop_enc8x: +___ +for($i=0;$i<8;$i++) { +my $rndkey=($i&1)?$rndkey0:$rndkey1; +$code.=<<___; + vaesenc $rndkey,@out[0],@out[0] + cmp 32+4*$i(%rsp),$one +___ +$code.=<<___ if ($i); + mov 64+8*$i(%rsp),$offset +___ +$code.=<<___; + vaesenc $rndkey,@out[1],@out[1] + prefetcht0 31(@ptr[$i]) # prefetch input + vaesenc $rndkey,@out[2],@out[2] +___ +$code.=<<___ if ($i>1); + prefetcht0 15(@ptr[$i-2]) # prefetch output +___ +$code.=<<___; + vaesenc $rndkey,@out[3],@out[3] + lea (@ptr[$i],$offset),$offset + cmovge %rsp,@ptr[$i] # cancel input + vaesenc $rndkey,@out[4],@out[4] + cmovg %rsp,$offset # sink output + vaesenc $rndkey,@out[5],@out[5] + sub @ptr[$i],$offset + vaesenc $rndkey,@out[6],@out[6] + vpxor 16(@ptr[$i]),$zero,@inp[$i%4] # load input and xor with 0-round + mov $offset,64+8*$i(%rsp) + vaesenc $rndkey,@out[7],@out[7] + vmovups `16*(3+$i)-0x78`($key),$rndkey + lea 16(@ptr[$i],$offset),@ptr[$i] # switch to output +___ +$code.=<<___ if ($i<4) + vmovdqu @inp[$i%4],`16*$i`($offload) # off-load +___ +} +$code.=<<___; + vmovdqu 32(%rsp),$counters + prefetcht0 15(@ptr[$i-2]) # prefetch output + prefetcht0 15(@ptr[$i-1]) + cmp \$11,$rounds + jb .Lenc8x_tail + + vaesenc $rndkey1,@out[0],@out[0] + vaesenc $rndkey1,@out[1],@out[1] + vaesenc $rndkey1,@out[2],@out[2] + vaesenc $rndkey1,@out[3],@out[3] + vaesenc $rndkey1,@out[4],@out[4] + vaesenc $rndkey1,@out[5],@out[5] + vaesenc $rndkey1,@out[6],@out[6] + vaesenc $rndkey1,@out[7],@out[7] + vmovups 0xb0-0x78($key),$rndkey1 + + vaesenc $rndkey0,@out[0],@out[0] + vaesenc $rndkey0,@out[1],@out[1] + vaesenc $rndkey0,@out[2],@out[2] + vaesenc $rndkey0,@out[3],@out[3] + vaesenc $rndkey0,@out[4],@out[4] + vaesenc $rndkey0,@out[5],@out[5] + vaesenc $rndkey0,@out[6],@out[6] + vaesenc $rndkey0,@out[7],@out[7] + vmovups 0xc0-0x78($key),$rndkey0 + je .Lenc8x_tail + + vaesenc $rndkey1,@out[0],@out[0] + vaesenc $rndkey1,@out[1],@out[1] + vaesenc $rndkey1,@out[2],@out[2] + vaesenc $rndkey1,@out[3],@out[3] + vaesenc $rndkey1,@out[4],@out[4] + vaesenc $rndkey1,@out[5],@out[5] + vaesenc $rndkey1,@out[6],@out[6] + vaesenc $rndkey1,@out[7],@out[7] + vmovups 0xd0-0x78($key),$rndkey1 + + vaesenc $rndkey0,@out[0],@out[0] + vaesenc $rndkey0,@out[1],@out[1] + vaesenc $rndkey0,@out[2],@out[2] + vaesenc $rndkey0,@out[3],@out[3] + vaesenc $rndkey0,@out[4],@out[4] + vaesenc $rndkey0,@out[5],@out[5] + vaesenc $rndkey0,@out[6],@out[6] + vaesenc $rndkey0,@out[7],@out[7] + vmovups 0xe0-0x78($key),$rndkey0 + +.Lenc8x_tail: + vaesenc $rndkey1,@out[0],@out[0] + vpxor $zero,$zero,$zero + vaesenc $rndkey1,@out[1],@out[1] + vaesenc $rndkey1,@out[2],@out[2] + vpcmpgtd $zero,$counters,$zero + vaesenc $rndkey1,@out[3],@out[3] + vaesenc $rndkey1,@out[4],@out[4] + vpaddd $counters,$zero,$zero # decrement counters + vmovdqu 48(%rsp),$counters + vaesenc $rndkey1,@out[5],@out[5] + mov 64(%rsp),$offset # pre-load 1st offset + vaesenc $rndkey1,@out[6],@out[6] + vaesenc $rndkey1,@out[7],@out[7] + vmovups 0x10-0x78($key),$rndkey1 + + vaesenclast $rndkey0,@out[0],@out[0] + vmovdqa $zero,32(%rsp) # update counters + vpxor $zero,$zero,$zero + vaesenclast $rndkey0,@out[1],@out[1] + vaesenclast $rndkey0,@out[2],@out[2] + vpcmpgtd $zero,$counters,$zero + vaesenclast $rndkey0,@out[3],@out[3] + vaesenclast $rndkey0,@out[4],@out[4] + vpaddd $zero,$counters,$counters # decrement counters + vmovdqu -0x78($key),$zero # 0-round + vaesenclast $rndkey0,@out[5],@out[5] + vaesenclast $rndkey0,@out[6],@out[6] + vmovdqa $counters,48(%rsp) # update counters + vaesenclast $rndkey0,@out[7],@out[7] + vmovups 0x20-0x78($key),$rndkey0 + + vmovups @out[0],-16(@ptr[0]) # write output + sub $offset,@ptr[0] # switch to input + vpxor 0x00($offload),@out[0],@out[0] + vmovups @out[1],-16(@ptr[1]) + sub `64+1*8`(%rsp),@ptr[1] + vpxor 0x10($offload),@out[1],@out[1] + vmovups @out[2],-16(@ptr[2]) + sub `64+2*8`(%rsp),@ptr[2] + vpxor 0x20($offload),@out[2],@out[2] + vmovups @out[3],-16(@ptr[3]) + sub `64+3*8`(%rsp),@ptr[3] + vpxor 0x30($offload),@out[3],@out[3] + vmovups @out[4],-16(@ptr[4]) + sub `64+4*8`(%rsp),@ptr[4] + vpxor @inp[0],@out[4],@out[4] + vmovups @out[5],-16(@ptr[5]) + sub `64+5*8`(%rsp),@ptr[5] + vpxor @inp[1],@out[5],@out[5] + vmovups @out[6],-16(@ptr[6]) + sub `64+6*8`(%rsp),@ptr[6] + vpxor @inp[2],@out[6],@out[6] + vmovups @out[7],-16(@ptr[7]) + sub `64+7*8`(%rsp),@ptr[7] + vpxor @inp[3],@out[7],@out[7] + + dec $num + jnz .Loop_enc8x + + mov 16(%rsp),%rax # original %rsp + #mov 24(%rsp),$num + #lea `40*8`($inp),$inp + #dec $num + #jnz .Lenc8x_loop_grande + +.Lenc8x_done: + vzeroupper +___ +$code.=<<___ if ($win64); + movaps -0xd8(%rax),%xmm6 + movaps -0xc8(%rax),%xmm7 + movaps -0xb8(%rax),%xmm8 + movaps -0xa8(%rax),%xmm9 + movaps -0x98(%rax),%xmm10 + movaps -0x88(%rax),%xmm11 + movaps -0x78(%rax),%xmm12 + movaps -0x68(%rax),%xmm13 + movaps -0x58(%rax),%xmm14 + movaps -0x48(%rax),%xmm15 +___ +$code.=<<___; + mov -48(%rax),%r15 + mov -40(%rax),%r14 + mov -32(%rax),%r13 + mov -24(%rax),%r12 + mov -16(%rax),%rbp + mov -8(%rax),%rbx + lea (%rax),%rsp +.Lenc8x_epilogue: + ret +.size aesni_multi_cbc_encrypt_avx,.-aesni_multi_cbc_encrypt_avx + +.type aesni_multi_cbc_decrypt_avx,\@function,3 +.align 32 +aesni_multi_cbc_decrypt_avx: +_avx_cbc_dec_shortcut: + mov %rsp,%rax + push %rbx + push %rbp + push %r12 + push %r13 + push %r14 + push %r15 +___ +$code.=<<___ if ($win64); + lea -0xa8(%rsp),%rsp + movaps %xmm6,(%rsp) + movaps %xmm7,0x10(%rsp) + movaps %xmm8,0x20(%rsp) + movaps %xmm9,0x30(%rsp) + movaps %xmm10,0x40(%rsp) + movaps %xmm11,0x50(%rsp) + movaps %xmm12,-0x78(%rax) + movaps %xmm13,-0x68(%rax) + movaps %xmm14,-0x58(%rax) + movaps %xmm15,-0x48(%rax) +___ +$code.=<<___; + # stack layout + # + # +0 output sink + # +16 input sink [original %rsp and $num] + # +32 counters + # +64 distances between inputs and outputs + # +128 off-load area for @inp[0..3] + # +192 IV/input offload + + sub \$256,%rsp + and \$-256,%rsp + sub \$192,%rsp + mov %rax,16(%rsp) # original %rsp + +.Ldec8x_body: + vzeroupper + vmovdqu ($key),$zero # 0-round key + lea 0x78($key),$key # size optimization + lea 40*4($inp),$inp + shr \$1,$num + +.Ldec8x_loop_grande: + #mov $num,24(%rsp) # original $num + xor $num,$num +___ +for($i=0;$i<8;$i++) { + my $temp = $i ? $offload : $offset; + $code.=<<___; + mov `40*$i+16-40*4`($inp),$one # borrow $one for number of blocks + mov `40*$i+0-40*4`($inp),@ptr[$i] # input pointer + cmp $num,$one + mov `40*$i+8-40*4`($inp),$temp # output pointer + cmovg $one,$num # find maximum + test $one,$one + vmovdqu `40*$i+24-40*4`($inp),@out[$i] # load IV + mov $one,`32+4*$i`(%rsp) # initialize counters + cmovle %rsp,@ptr[$i] # cancel input + sub @ptr[$i],$temp # distance between input and output + mov $temp,`64+8*$i`(%rsp) # initialize distances + vmovdqu @out[$i],`192+16*$i`(%rsp) # offload IV +___ +} +$code.=<<___; + test $num,$num + jz .Ldec8x_done + + vmovups 0x10-0x78($key),$rndkey1 + vmovups 0x20-0x78($key),$rndkey0 + mov 0xf0-0x78($key),$rounds + lea 192+128(%rsp),$offload # offload area + + vmovdqu (@ptr[0]),@out[0] # load inputs + vmovdqu (@ptr[1]),@out[1] + vmovdqu (@ptr[2]),@out[2] + vmovdqu (@ptr[3]),@out[3] + vmovdqu (@ptr[4]),@out[4] + vmovdqu (@ptr[5]),@out[5] + vmovdqu (@ptr[6]),@out[6] + vmovdqu (@ptr[7]),@out[7] + vmovdqu @out[0],0x00($offload) # offload inputs + vpxor $zero,@out[0],@out[0] # xor inputs with 0-round + vmovdqu @out[1],0x10($offload) + vpxor $zero,@out[1],@out[1] + vmovdqu @out[2],0x20($offload) + vpxor $zero,@out[2],@out[2] + vmovdqu @out[3],0x30($offload) + vpxor $zero,@out[3],@out[3] + vmovdqu @out[4],0x40($offload) + vpxor $zero,@out[4],@out[4] + vmovdqu @out[5],0x50($offload) + vpxor $zero,@out[5],@out[5] + vmovdqu @out[6],0x60($offload) + vpxor $zero,@out[6],@out[6] + vmovdqu @out[7],0x70($offload) + vpxor $zero,@out[7],@out[7] + xor \$0x80,$offload + mov \$1,$one # constant of 1 + jmp .Loop_dec8x + +.align 32 +.Loop_dec8x: +___ +for($i=0;$i<8;$i++) { +my $rndkey=($i&1)?$rndkey0:$rndkey1; +$code.=<<___; + vaesdec $rndkey,@out[0],@out[0] + cmp 32+4*$i(%rsp),$one +___ +$code.=<<___ if ($i); + mov 64+8*$i(%rsp),$offset +___ +$code.=<<___; + vaesdec $rndkey,@out[1],@out[1] + prefetcht0 31(@ptr[$i]) # prefetch input + vaesdec $rndkey,@out[2],@out[2] +___ +$code.=<<___ if ($i>1); + prefetcht0 15(@ptr[$i-2]) # prefetch output +___ +$code.=<<___; + vaesdec $rndkey,@out[3],@out[3] + lea (@ptr[$i],$offset),$offset + cmovge %rsp,@ptr[$i] # cancel input + vaesdec $rndkey,@out[4],@out[4] + cmovg %rsp,$offset # sink output + vaesdec $rndkey,@out[5],@out[5] + sub @ptr[$i],$offset + vaesdec $rndkey,@out[6],@out[6] + vmovdqu 16(@ptr[$i]),@inp[$i%4] # load input + mov $offset,64+8*$i(%rsp) + vaesdec $rndkey,@out[7],@out[7] + vmovups `16*(3+$i)-0x78`($key),$rndkey + lea 16(@ptr[$i],$offset),@ptr[$i] # switch to output +___ +$code.=<<___ if ($i<4); + vmovdqu @inp[$i%4],`128+16*$i`(%rsp) # off-load +___ +} +$code.=<<___; + vmovdqu 32(%rsp),$counters + prefetcht0 15(@ptr[$i-2]) # prefetch output + prefetcht0 15(@ptr[$i-1]) + cmp \$11,$rounds + jb .Ldec8x_tail + + vaesdec $rndkey1,@out[0],@out[0] + vaesdec $rndkey1,@out[1],@out[1] + vaesdec $rndkey1,@out[2],@out[2] + vaesdec $rndkey1,@out[3],@out[3] + vaesdec $rndkey1,@out[4],@out[4] + vaesdec $rndkey1,@out[5],@out[5] + vaesdec $rndkey1,@out[6],@out[6] + vaesdec $rndkey1,@out[7],@out[7] + vmovups 0xb0-0x78($key),$rndkey1 + + vaesdec $rndkey0,@out[0],@out[0] + vaesdec $rndkey0,@out[1],@out[1] + vaesdec $rndkey0,@out[2],@out[2] + vaesdec $rndkey0,@out[3],@out[3] + vaesdec $rndkey0,@out[4],@out[4] + vaesdec $rndkey0,@out[5],@out[5] + vaesdec $rndkey0,@out[6],@out[6] + vaesdec $rndkey0,@out[7],@out[7] + vmovups 0xc0-0x78($key),$rndkey0 + je .Ldec8x_tail + + vaesdec $rndkey1,@out[0],@out[0] + vaesdec $rndkey1,@out[1],@out[1] + vaesdec $rndkey1,@out[2],@out[2] + vaesdec $rndkey1,@out[3],@out[3] + vaesdec $rndkey1,@out[4],@out[4] + vaesdec $rndkey1,@out[5],@out[5] + vaesdec $rndkey1,@out[6],@out[6] + vaesdec $rndkey1,@out[7],@out[7] + vmovups 0xd0-0x78($key),$rndkey1 + + vaesdec $rndkey0,@out[0],@out[0] + vaesdec $rndkey0,@out[1],@out[1] + vaesdec $rndkey0,@out[2],@out[2] + vaesdec $rndkey0,@out[3],@out[3] + vaesdec $rndkey0,@out[4],@out[4] + vaesdec $rndkey0,@out[5],@out[5] + vaesdec $rndkey0,@out[6],@out[6] + vaesdec $rndkey0,@out[7],@out[7] + vmovups 0xe0-0x78($key),$rndkey0 + +.Ldec8x_tail: + vaesdec $rndkey1,@out[0],@out[0] + vpxor $zero,$zero,$zero + vaesdec $rndkey1,@out[1],@out[1] + vaesdec $rndkey1,@out[2],@out[2] + vpcmpgtd $zero,$counters,$zero + vaesdec $rndkey1,@out[3],@out[3] + vaesdec $rndkey1,@out[4],@out[4] + vpaddd $counters,$zero,$zero # decrement counters + vmovdqu 48(%rsp),$counters + vaesdec $rndkey1,@out[5],@out[5] + mov 64(%rsp),$offset # pre-load 1st offset + vaesdec $rndkey1,@out[6],@out[6] + vaesdec $rndkey1,@out[7],@out[7] + vmovups 0x10-0x78($key),$rndkey1 + + vaesdeclast $rndkey0,@out[0],@out[0] + vmovdqa $zero,32(%rsp) # update counters + vpxor $zero,$zero,$zero + vaesdeclast $rndkey0,@out[1],@out[1] + vpxor 0x00($offload),@out[0],@out[0] # xor with IV + vaesdeclast $rndkey0,@out[2],@out[2] + vpxor 0x10($offload),@out[1],@out[1] + vpcmpgtd $zero,$counters,$zero + vaesdeclast $rndkey0,@out[3],@out[3] + vpxor 0x20($offload),@out[2],@out[2] + vaesdeclast $rndkey0,@out[4],@out[4] + vpxor 0x30($offload),@out[3],@out[3] + vpaddd $zero,$counters,$counters # decrement counters + vmovdqu -0x78($key),$zero # 0-round + vaesdeclast $rndkey0,@out[5],@out[5] + vpxor 0x40($offload),@out[4],@out[4] + vaesdeclast $rndkey0,@out[6],@out[6] + vpxor 0x50($offload),@out[5],@out[5] + vmovdqa $counters,48(%rsp) # update counters + vaesdeclast $rndkey0,@out[7],@out[7] + vpxor 0x60($offload),@out[6],@out[6] + vmovups 0x20-0x78($key),$rndkey0 + + vmovups @out[0],-16(@ptr[0]) # write output + sub $offset,@ptr[0] # switch to input + vmovdqu 128+0(%rsp),@out[0] + vpxor 0x70($offload),@out[7],@out[7] + vmovups @out[1],-16(@ptr[1]) + sub `64+1*8`(%rsp),@ptr[1] + vmovdqu @out[0],0x00($offload) + vpxor $zero,@out[0],@out[0] + vmovdqu 128+16(%rsp),@out[1] + vmovups @out[2],-16(@ptr[2]) + sub `64+2*8`(%rsp),@ptr[2] + vmovdqu @out[1],0x10($offload) + vpxor $zero,@out[1],@out[1] + vmovdqu 128+32(%rsp),@out[2] + vmovups @out[3],-16(@ptr[3]) + sub `64+3*8`(%rsp),@ptr[3] + vmovdqu @out[2],0x20($offload) + vpxor $zero,@out[2],@out[2] + vmovdqu 128+48(%rsp),@out[3] + vmovups @out[4],-16(@ptr[4]) + sub `64+4*8`(%rsp),@ptr[4] + vmovdqu @out[3],0x30($offload) + vpxor $zero,@out[3],@out[3] + vmovdqu @inp[0],0x40($offload) + vpxor @inp[0],$zero,@out[4] + vmovups @out[5],-16(@ptr[5]) + sub `64+5*8`(%rsp),@ptr[5] + vmovdqu @inp[1],0x50($offload) + vpxor @inp[1],$zero,@out[5] + vmovups @out[6],-16(@ptr[6]) + sub `64+6*8`(%rsp),@ptr[6] + vmovdqu @inp[2],0x60($offload) + vpxor @inp[2],$zero,@out[6] + vmovups @out[7],-16(@ptr[7]) + sub `64+7*8`(%rsp),@ptr[7] + vmovdqu @inp[3],0x70($offload) + vpxor @inp[3],$zero,@out[7] + + xor \$128,$offload + dec $num + jnz .Loop_dec8x + + mov 16(%rsp),%rax # original %rsp + #mov 24(%rsp),$num + #lea `40*8`($inp),$inp + #dec $num + #jnz .Ldec8x_loop_grande + +.Ldec8x_done: + vzeroupper +___ +$code.=<<___ if ($win64); + movaps -0xd8(%rax),%xmm6 + movaps -0xc8(%rax),%xmm7 + movaps -0xb8(%rax),%xmm8 + movaps -0xa8(%rax),%xmm9 + movaps -0x98(%rax),%xmm10 + movaps -0x88(%rax),%xmm11 + movaps -0x78(%rax),%xmm12 + movaps -0x68(%rax),%xmm13 + movaps -0x58(%rax),%xmm14 + movaps -0x48(%rax),%xmm15 +___ +$code.=<<___; + mov -48(%rax),%r15 + mov -40(%rax),%r14 + mov -32(%rax),%r13 + mov -24(%rax),%r12 + mov -16(%rax),%rbp + mov -8(%rax),%rbx + lea (%rax),%rsp +.Ldec8x_epilogue: + ret +.size aesni_multi_cbc_decrypt_avx,.-aesni_multi_cbc_decrypt_avx +___ + }}} + +if ($win64) { +# EXCEPTION_DISPOSITION handler (EXCEPTION_RECORD *rec,ULONG64 frame, +# CONTEXT *context,DISPATCHER_CONTEXT *disp) +$rec="%rcx"; +$frame="%rdx"; +$context="%r8"; +$disp="%r9"; + +$code.=<<___; +.extern __imp_RtlVirtualUnwind +.type se_handler,\@abi-omnipotent +.align 16 +se_handler: + push %rsi + push %rdi + push %rbx + push %rbp + push %r12 + push %r13 + push %r14 + push %r15 + pushfq + sub \$64,%rsp + + mov 120($context),%rax # pull context->Rax + mov 248($context),%rbx # pull context->Rip + + mov 8($disp),%rsi # disp->ImageBase + mov 56($disp),%r11 # disp->HandlerData + + mov 0(%r11),%r10d # HandlerData[0] + lea (%rsi,%r10),%r10 # prologue label + cmp %r10,%rbx # context->Rip<.Lprologue + jb .Lin_prologue + + mov 152($context),%rax # pull context->Rsp + + mov 4(%r11),%r10d # HandlerData[1] + lea (%rsi,%r10),%r10 # epilogue label + cmp %r10,%rbx # context->Rip>=.Lepilogue + jae .Lin_prologue + + mov 16(%rax),%rax # pull saved stack pointer + + mov -8(%rax),%rbx + mov -16(%rax),%rbp + mov -24(%rax),%r12 + mov -32(%rax),%r13 + mov -40(%rax),%r14 + mov -48(%rax),%r15 + mov %rbx,144($context) # restore context->Rbx + mov %rbp,160($context) # restore context->Rbp + mov %r12,216($context) # restore cotnext->R12 + mov %r13,224($context) # restore cotnext->R13 + mov %r14,232($context) # restore cotnext->R14 + mov %r15,240($context) # restore cotnext->R15 + + lea -56-10*16(%rax),%rsi + lea 512($context),%rdi # &context.Xmm6 + mov \$20,%ecx + .long 0xa548f3fc # cld; rep movsq + +.Lin_prologue: + mov 8(%rax),%rdi + mov 16(%rax),%rsi + mov %rax,152($context) # restore context->Rsp + mov %rsi,168($context) # restore context->Rsi + mov %rdi,176($context) # restore context->Rdi + + mov 40($disp),%rdi # disp->ContextRecord + mov $context,%rsi # context + mov \$154,%ecx # sizeof(CONTEXT) + .long 0xa548f3fc # cld; rep movsq + + mov $disp,%rsi + xor %rcx,%rcx # arg1, UNW_FLAG_NHANDLER + mov 8(%rsi),%rdx # arg2, disp->ImageBase + mov 0(%rsi),%r8 # arg3, disp->ControlPc + mov 16(%rsi),%r9 # arg4, disp->FunctionEntry + mov 40(%rsi),%r10 # disp->ContextRecord + lea 56(%rsi),%r11 # &disp->HandlerData + lea 24(%rsi),%r12 # &disp->EstablisherFrame + mov %r10,32(%rsp) # arg5 + mov %r11,40(%rsp) # arg6 + mov %r12,48(%rsp) # arg7 + mov %rcx,56(%rsp) # arg8, (NULL) + call *__imp_RtlVirtualUnwind(%rip) + + mov \$1,%eax # ExceptionContinueSearch + add \$64,%rsp + popfq + pop %r15 + pop %r14 + pop %r13 + pop %r12 + pop %rbp + pop %rbx + pop %rdi + pop %rsi + ret +.size se_handler,.-se_handler + +.section .pdata +.align 4 + .rva .LSEH_begin_aesni_multi_cbc_encrypt + .rva .LSEH_end_aesni_multi_cbc_encrypt + .rva .LSEH_info_aesni_multi_cbc_encrypt + .rva .LSEH_begin_aesni_multi_cbc_decrypt + .rva .LSEH_end_aesni_multi_cbc_decrypt + .rva .LSEH_info_aesni_multi_cbc_decrypt +___ +$code.=<<___ if ($avx); + .rva .LSEH_begin_aesni_multi_cbc_encrypt_avx + .rva .LSEH_end_aesni_multi_cbc_encrypt_avx + .rva .LSEH_info_aesni_multi_cbc_encrypt_avx + .rva .LSEH_begin_aesni_multi_cbc_decrypt_avx + .rva .LSEH_end_aesni_multi_cbc_decrypt_avx + .rva .LSEH_info_aesni_multi_cbc_decrypt_avx +___ +$code.=<<___; +.section .xdata +.align 8 +.LSEH_info_aesni_multi_cbc_encrypt: + .byte 9,0,0,0 + .rva se_handler + .rva .Lenc4x_body,.Lenc4x_epilogue # HandlerData[] +.LSEH_info_aesni_multi_cbc_decrypt: + .byte 9,0,0,0 + .rva se_handler + .rva .Ldec4x_body,.Ldec4x_epilogue # HandlerData[] +___ +$code.=<<___ if ($avx); +.LSEH_info_aesni_multi_cbc_encrypt_avx: + .byte 9,0,0,0 + .rva se_handler + .rva .Lenc8x_body,.Lenc8x_epilogue # HandlerData[] +.LSEH_info_aesni_multi_cbc_decrypt_avx: + .byte 9,0,0,0 + .rva se_handler + .rva .Ldec8x_body,.Ldec8x_epilogue # HandlerData[] +___ +} +#################################################################### + +sub rex { + local *opcode=shift; + my ($dst,$src)=@_; + my $rex=0; + + $rex|=0x04 if($dst>=8); + $rex|=0x01 if($src>=8); + push @opcode,$rex|0x40 if($rex); +} + +sub aesni { + my $line=shift; + my @opcode=(0x66); + + if ($line=~/(aeskeygenassist)\s+\$([x0-9a-f]+),\s*%xmm([0-9]+),\s*%xmm([0-9]+)/) { + rex(\@opcode,$4,$3); + push @opcode,0x0f,0x3a,0xdf; + push @opcode,0xc0|($3&7)|(($4&7)<<3); # ModR/M + my $c=$2; + push @opcode,$c=~/^0/?oct($c):$c; + return ".byte\t".join(',',@opcode); + } + elsif ($line=~/(aes[a-z]+)\s+%xmm([0-9]+),\s*%xmm([0-9]+)/) { + my %opcodelet = ( + "aesimc" => 0xdb, + "aesenc" => 0xdc, "aesenclast" => 0xdd, + "aesdec" => 0xde, "aesdeclast" => 0xdf + ); + return undef if (!defined($opcodelet{$1})); + rex(\@opcode,$3,$2); + push @opcode,0x0f,0x38,$opcodelet{$1}; + push @opcode,0xc0|($2&7)|(($3&7)<<3); # ModR/M + return ".byte\t".join(',',@opcode); + } + elsif ($line=~/(aes[a-z]+)\s+([0x1-9a-fA-F]*)\(%rsp\),\s*%xmm([0-9]+)/) { + my %opcodelet = ( + "aesenc" => 0xdc, "aesenclast" => 0xdd, + "aesdec" => 0xde, "aesdeclast" => 0xdf + ); + return undef if (!defined($opcodelet{$1})); + my $off = $2; + push @opcode,0x44 if ($3>=8); + push @opcode,0x0f,0x38,$opcodelet{$1}; + push @opcode,0x44|(($3&7)<<3),0x24; # ModR/M + push @opcode,($off=~/^0/?oct($off):$off)&0xff; + return ".byte\t".join(',',@opcode); + } + return $line; +} + +$code =~ s/\`([^\`]*)\`/eval($1)/gem; +$code =~ s/\b(aes.*%xmm[0-9]+).*$/aesni($1)/gem; + +print $code; +close STDOUT; diff --git a/deps/openssl/openssl/crypto/aes/asm/aesni-sha1-x86_64.pl b/deps/openssl/openssl/crypto/aes/asm/aesni-sha1-x86_64.pl index 3c8f6c19e7bd4a..97992adca7c348 100644 --- a/deps/openssl/openssl/crypto/aes/asm/aesni-sha1-x86_64.pl +++ b/deps/openssl/openssl/crypto/aes/asm/aesni-sha1-x86_64.pl @@ -21,16 +21,25 @@ # subroutine: # # AES-128-CBC +SHA1 stitch gain -# Westmere 3.77[+5.6] 9.37 6.65 +41% -# Sandy Bridge 5.05[+5.2(6.3)] 10.25(11.35) 6.16(7.08) +67%(+60%) +# Westmere 3.77[+5.3] 9.07 6.55 +38% +# Sandy Bridge 5.05[+5.0(6.1)] 10.06(11.15) 5.98(7.05) +68%(+58%) +# Ivy Bridge 5.05[+4.6] 9.65 5.54 +74% +# Haswell 4.43[+3.6(4.2)] 8.00(8.58) 4.55(5.21) +75%(+65%) +# Bulldozer 5.77[+6.0] 11.72 6.37 +84% # # AES-192-CBC -# Westmere 4.51 10.11 6.97 +45% -# Sandy Bridge 6.05 11.25(12.35) 6.34(7.27) +77%(+70%) +# Westmere 4.51 9.81 6.80 +44% +# Sandy Bridge 6.05 11.06(12.15) 6.11(7.19) +81%(+69%) +# Ivy Bridge 6.05 10.65 6.07 +75% +# Haswell 5.29 8.86(9.44) 5.32(5.32) +67%(+77%) +# Bulldozer 6.89 12.84 6.96 +84% # # AES-256-CBC -# Westmere 5.25 10.85 7.25 +50% -# Sandy Bridge 7.05 12.25(13.35) 7.06(7.70) +74%(+73%) +# Westmere 5.25 10.55 7.21 +46% +# Sandy Bridge 7.05 12.06(13.15) 7.12(7.72) +69%(+70%) +# Ivy Bridge 7.05 11.65 7.12 +64% +# Haswell 6.19 9.76(10.34) 6.21(6.25) +57%(+65%) +# Bulldozer 8.00 13.95 8.25 +69% # # (*) There are two code paths: SSSE3 and AVX. See sha1-568.pl for # background information. Above numbers in parentheses are SSSE3 @@ -45,8 +54,25 @@ # standalone AESNI-CBC decrypt: # # AES-128-CBC AES-192-CBC AES-256-CBC -# Westmere 1.31 1.55 1.80 -# Sandy Bridge 0.93 1.06 1.22 +# Westmere 1.25 1.50 1.75 +# Sandy Bridge 0.74 0.91 1.09 +# Ivy Bridge 0.74 0.90 1.11 +# Haswell 0.63 0.76 0.88 +# Bulldozer 0.70 0.85 0.99 + +# And indeed: +# +# AES-256-CBC +SHA1 stitch gain +# Westmere 1.75 7.20 6.68 +7.8% +# Sandy Bridge 1.09 6.09(7.22) 5.82(6.95) +4.6%(+3.9%) +# Ivy Bridge 1.11 5.70 5.45 +4.6% +# Haswell 0.88 4.45(5.00) 4.39(4.69) +1.4%(*)(+6.6%) +# Bulldozer 0.99 6.95 5.95 +17%(**) +# +# (*) Tiny improvement coefficient on Haswell is because we compare +# AVX1 stitch to sum with AVX2 SHA1. +# (**) Execution is fully dominated by integer code sequence and +# SIMD still hardly shows [in single-process benchmark;-] $flavour = shift; $output = shift; @@ -68,6 +94,11 @@ $avx=1 if (!$avx && $win64 && ($flavour =~ /masm/ || $ENV{ASM} =~ /ml64/) && `ml64 2>&1` =~ /Version ([0-9]+)\./ && $1>=10); +$avx=1 if (!$avx && `$ENV{CC} -v 2>&1` =~ /(^clang version|based on LLVM) ([3-9]\.[0-9]+)/ && $2>=3.0); + +$shaext=1; ### set to zero if compiling for 1.0.1 + +$stitched_decrypt=0; open OUT,"| \"$^X\" $xlate $flavour $output"; *STDOUT=*OUT; @@ -86,11 +117,15 @@ .globl aesni_cbc_sha1_enc .type aesni_cbc_sha1_enc,\@abi-omnipotent -.align 16 +.align 32 aesni_cbc_sha1_enc: # caller should check for SSSE3 and AES-NI bits mov OPENSSL_ia32cap_P+0(%rip),%r10d - mov OPENSSL_ia32cap_P+4(%rip),%r11d + mov OPENSSL_ia32cap_P+4(%rip),%r11 +___ +$code.=<<___ if ($shaext); + bt \$61,%r11 # check SHA bit + jc aesni_cbc_sha1_enc_shaext ___ $code.=<<___ if ($avx); and \$`1<<28`,%r11d # mask AVX bit @@ -112,10 +147,21 @@ my @Tx=map("%xmm$_",(8..10)); my @V=($A,$B,$C,$D,$E)=("%eax","%ebx","%ecx","%edx","%ebp"); # size optimization my @T=("%esi","%edi"); -my $j=0; my $jj=0; my $r=0; my $sn=0; +my $j=0; my $jj=0; my $r=0; my $sn=0; my $rx=0; my $K_XX_XX="%r11"; -my ($iv,$in,$rndkey0)=map("%xmm$_",(11..13)); -my @rndkey=("%xmm14","%xmm15"); +my ($rndkey0,$iv,$in)=map("%xmm$_",(11..13)); # for enc +my @rndkey=("%xmm14","%xmm15"); # for enc +my ($inout0,$inout1,$inout2,$inout3)=map("%xmm$_",(12..15)); # for dec + +if (1) { # reassign for Atom Silvermont + # The goal is to minimize amount of instructions with more than + # 3 prefix bytes. Or in more practical terms to keep AES-NI *and* + # SSSE3 instructions to upper half of the register bank. + @X=map("%xmm$_",(8..11,4..7)); + @Tx=map("%xmm$_",(12,13,3)); + ($iv,$in,$rndkey0)=map("%xmm$_",(2,14,15)); + @rndkey=("%xmm0","%xmm1"); +} sub AUTOLOAD() # thunk [simplified] 32-bit style perlasm { my $opcode = $AUTOLOAD; $opcode =~ s/.*:://; @@ -129,7 +175,7 @@ () $code.=<<___; .type aesni_cbc_sha1_enc_ssse3,\@function,6 -.align 16 +.align 32 aesni_cbc_sha1_enc_ssse3: mov `($win64?56:8)`(%rsp),$inp # load 7th argument #shr \$6,$len # debugging artefact @@ -161,16 +207,16 @@ () mov $in0,%r12 # reassign arguments mov $out,%r13 mov $len,%r14 - mov $key,%r15 + lea 112($key),%r15 # size optimization movdqu ($ivp),$iv # load IV mov $ivp,88(%rsp) # save $ivp ___ -my ($in0,$out,$len,$key)=map("%r$_",(12..15)); # reassign arguments +($in0,$out,$len,$key)=map("%r$_",(12..15)); # reassign arguments my $rounds="${ivp}d"; $code.=<<___; shl \$6,$len sub $in0,$out - mov 240($key),$rounds + mov 240-112($key),$rounds add $inp,$len # end of input lea K_XX_XX(%rip),$K_XX_XX @@ -180,19 +226,22 @@ () mov 12($ctx),$D mov $B,@T[0] # magic seed mov 16($ctx),$E + mov $C,@T[1] + xor $D,@T[1] + and @T[1],@T[0] - movdqa 64($K_XX_XX),@X[2] # pbswap mask + movdqa 64($K_XX_XX),@Tx[2] # pbswap mask movdqa 0($K_XX_XX),@Tx[1] # K_00_19 movdqu 0($inp),@X[-4&7] # load input to %xmm[0-3] movdqu 16($inp),@X[-3&7] movdqu 32($inp),@X[-2&7] movdqu 48($inp),@X[-1&7] - pshufb @X[2],@X[-4&7] # byte swap + pshufb @Tx[2],@X[-4&7] # byte swap + pshufb @Tx[2],@X[-3&7] + pshufb @Tx[2],@X[-2&7] add \$64,$inp - pshufb @X[2],@X[-3&7] - pshufb @X[2],@X[-2&7] - pshufb @X[2],@X[-1&7] paddd @Tx[1],@X[-4&7] # add K_00_19 + pshufb @Tx[2],@X[-1&7] paddd @Tx[1],@X[-3&7] paddd @Tx[1],@X[-2&7] movdqa @X[-4&7],0(%rsp) # X[]+K xfer to IALU @@ -201,8 +250,8 @@ () psubd @Tx[1],@X[-3&7] movdqa @X[-2&7],32(%rsp) psubd @Tx[1],@X[-2&7] - movups ($key),$rndkey0 # $key[0] - movups 16($key),$rndkey[0] # forward reference + movups -112($key),$rndkey0 # $key[0] + movups 16-112($key),$rndkey[0] # forward reference jmp .Loop_ssse3 ___ @@ -219,31 +268,31 @@ () ___ $code.=<<___; xorps $in,$iv + movups `32+16*$k-112`($key),$rndkey[1] aesenc $rndkey[0],$iv - movups `32+16*$k`($key),$rndkey[1] ___ } elsif ($k==9) { $sn++; $code.=<<___; cmp \$11,$rounds jb .Laesenclast$sn - movups `32+16*($k+0)`($key),$rndkey[1] + movups `32+16*($k+0)-112`($key),$rndkey[1] aesenc $rndkey[0],$iv - movups `32+16*($k+1)`($key),$rndkey[0] + movups `32+16*($k+1)-112`($key),$rndkey[0] aesenc $rndkey[1],$iv je .Laesenclast$sn - movups `32+16*($k+2)`($key),$rndkey[1] + movups `32+16*($k+2)-112`($key),$rndkey[1] aesenc $rndkey[0],$iv - movups `32+16*($k+3)`($key),$rndkey[0] + movups `32+16*($k+3)-112`($key),$rndkey[0] aesenc $rndkey[1],$iv .Laesenclast$sn: aesenclast $rndkey[0],$iv - movups 16($key),$rndkey[1] # forward reference + movups 16-112($key),$rndkey[1] # forward reference ___ } else { $code.=<<___; + movups `32+16*$k-112`($key),$rndkey[1] aesenc $rndkey[0],$iv - movups `32+16*$k`($key),$rndkey[1] ___ } $r++; unshift(@rndkey,pop(@rndkey)); @@ -255,74 +304,75 @@ () my @insns = (&$body,&$body,&$body,&$body); # 40 instructions my ($a,$b,$c,$d,$e); - &movdqa (@X[0],@X[-3&7]); - eval(shift(@insns)); + eval(shift(@insns)); # ror + &pshufd (@X[0],@X[-4&7],0xee); # was &movdqa (@X[0],@X[-3&7]); eval(shift(@insns)); &movdqa (@Tx[0],@X[-1&7]); - &palignr(@X[0],@X[-4&7],8); # compose "X[-14]" in "X[0]" + &paddd (@Tx[1],@X[-1&7]); eval(shift(@insns)); eval(shift(@insns)); - &paddd (@Tx[1],@X[-1&7]); + &punpcklqdq(@X[0],@X[-3&7]); # compose "X[-14]" in "X[0]", was &palignr(@X[0],@X[-4&7],8); eval(shift(@insns)); + eval(shift(@insns)); # rol eval(shift(@insns)); &psrldq (@Tx[0],4); # "X[-3]", 3 dwords eval(shift(@insns)); eval(shift(@insns)); + &pxor (@X[0],@X[-4&7]); # "X[0]"^="X[-16]" eval(shift(@insns)); - eval(shift(@insns)); - + eval(shift(@insns)); # ror &pxor (@Tx[0],@X[-2&7]); # "X[-3]"^"X[-8]" eval(shift(@insns)); eval(shift(@insns)); eval(shift(@insns)); - eval(shift(@insns)); &pxor (@X[0],@Tx[0]); # "X[0]"^="X[-3]"^"X[-8]" eval(shift(@insns)); - eval(shift(@insns)); + eval(shift(@insns)); # rol &movdqa (eval(16*(($Xi-1)&3))."(%rsp)",@Tx[1]); # X[]+K xfer to IALU eval(shift(@insns)); eval(shift(@insns)); &movdqa (@Tx[2],@X[0]); - &movdqa (@Tx[0],@X[0]); - eval(shift(@insns)); eval(shift(@insns)); eval(shift(@insns)); + eval(shift(@insns)); # ror + &movdqa (@Tx[0],@X[0]); eval(shift(@insns)); &pslldq (@Tx[2],12); # "X[0]"<<96, extract one dword &paddd (@X[0],@X[0]); eval(shift(@insns)); eval(shift(@insns)); - eval(shift(@insns)); - eval(shift(@insns)); &psrld (@Tx[0],31); eval(shift(@insns)); + eval(shift(@insns)); # rol eval(shift(@insns)); &movdqa (@Tx[1],@Tx[2]); eval(shift(@insns)); eval(shift(@insns)); &psrld (@Tx[2],30); - &por (@X[0],@Tx[0]); # "X[0]"<<<=1 eval(shift(@insns)); + eval(shift(@insns)); # ror + &por (@X[0],@Tx[0]); # "X[0]"<<<=1 eval(shift(@insns)); eval(shift(@insns)); eval(shift(@insns)); &pslld (@Tx[1],2); &pxor (@X[0],@Tx[2]); - eval(shift(@insns)); eval(shift(@insns)); &movdqa (@Tx[2],eval(16*(($Xi)/5))."($K_XX_XX)"); # K_XX_XX + eval(shift(@insns)); # rol eval(shift(@insns)); eval(shift(@insns)); &pxor (@X[0],@Tx[1]); # "X[0]"^=("X[0]">>96)<<<2 + &pshufd (@Tx[1],@X[-1&7],0xee) if ($Xi==7); # was &movdqa (@Tx[0],@X[-1&7]) in Xupdate_ssse3_32_79 foreach (@insns) { eval; } # remaining instructions [if any] @@ -333,27 +383,30 @@ () sub Xupdate_ssse3_32_79() { use integer; my $body = shift; - my @insns = (&$body,&$body,&$body,&$body); # 32 to 48 instructions + my @insns = (&$body,&$body,&$body,&$body); # 32 to 44 instructions my ($a,$b,$c,$d,$e); - &movdqa (@Tx[0],@X[-1&7]) if ($Xi==8); - eval(shift(@insns)); # body_20_39 + eval(shift(@insns)) if ($Xi==8); &pxor (@X[0],@X[-4&7]); # "X[0]"="X[-32]"^"X[-16]" - &palignr(@Tx[0],@X[-2&7],8); # compose "X[-6]" + eval(shift(@insns)) if ($Xi==8); + eval(shift(@insns)); # body_20_39 eval(shift(@insns)); + eval(shift(@insns)) if (@insns[1] =~ /_ror/); + eval(shift(@insns)) if (@insns[0] =~ /_ror/); + &punpcklqdq(@Tx[0],@X[-1&7]); # compose "X[-6]", was &palignr(@Tx[0],@X[-2&7],8); eval(shift(@insns)); eval(shift(@insns)); # rol &pxor (@X[0],@X[-7&7]); # "X[0]"^="X[-28]" eval(shift(@insns)); - eval(shift(@insns)) if (@insns[0] !~ /&ro[rl]/); + eval(shift(@insns)); if ($Xi%5) { &movdqa (@Tx[2],@Tx[1]);# "perpetuate" K_XX_XX... } else { # ... or load next one &movdqa (@Tx[2],eval(16*($Xi/5))."($K_XX_XX)"); } - &paddd (@Tx[1],@X[-1&7]); eval(shift(@insns)); # ror + &paddd (@Tx[1],@X[-1&7]); eval(shift(@insns)); &pxor (@X[0],@Tx[0]); # "X[0]"^="X[-6]" @@ -361,29 +414,31 @@ () eval(shift(@insns)); eval(shift(@insns)); eval(shift(@insns)); # rol + eval(shift(@insns)) if (@insns[0] =~ /_ror/); &movdqa (@Tx[0],@X[0]); - &movdqa (eval(16*(($Xi-1)&3))."(%rsp)",@Tx[1]); # X[]+K xfer to IALU eval(shift(@insns)); eval(shift(@insns)); + &movdqa (eval(16*(($Xi-1)&3))."(%rsp)",@Tx[1]); # X[]+K xfer to IALU eval(shift(@insns)); # ror eval(shift(@insns)); + eval(shift(@insns)); # body_20_39 &pslld (@X[0],2); - eval(shift(@insns)); # body_20_39 eval(shift(@insns)); - &psrld (@Tx[0],30); eval(shift(@insns)); - eval(shift(@insns)); # rol + &psrld (@Tx[0],30); + eval(shift(@insns)) if (@insns[0] =~ /_rol/);# rol eval(shift(@insns)); eval(shift(@insns)); eval(shift(@insns)); # ror - eval(shift(@insns)); &por (@X[0],@Tx[0]); # "X[0]"<<<=2 - eval(shift(@insns)); # body_20_39 eval(shift(@insns)); - &movdqa (@Tx[1],@X[0]) if ($Xi<19); + eval(shift(@insns)); # body_20_39 + eval(shift(@insns)) if (@insns[1] =~ /_rol/); + eval(shift(@insns)) if (@insns[0] =~ /_rol/); + &pshufd(@Tx[1],@X[-1&7],0xee) if ($Xi<19); # was &movdqa (@Tx[1],@X[0]) eval(shift(@insns)); eval(shift(@insns)); # rol eval(shift(@insns)); @@ -404,9 +459,10 @@ () my ($a,$b,$c,$d,$e); eval(shift(@insns)); - &paddd (@Tx[1],@X[-1&7]); eval(shift(@insns)); eval(shift(@insns)); + eval(shift(@insns)); + &paddd (@Tx[1],@X[-1&7]); eval(shift(@insns)); eval(shift(@insns)); @@ -415,17 +471,17 @@ () foreach (@insns) { eval; } # remaining instructions &cmp ($inp,$len); - &je (".Ldone_ssse3"); + &je (shift); unshift(@Tx,pop(@Tx)); - &movdqa (@X[2],"64($K_XX_XX)"); # pbswap mask + &movdqa (@Tx[2],"64($K_XX_XX)"); # pbswap mask &movdqa (@Tx[1],"0($K_XX_XX)"); # K_00_19 &movdqu (@X[-4&7],"0($inp)"); # load input &movdqu (@X[-3&7],"16($inp)"); &movdqu (@X[-2&7],"32($inp)"); &movdqu (@X[-1&7],"48($inp)"); - &pshufb (@X[-4&7],@X[2]); # byte swap + &pshufb (@X[-4&7],@Tx[2]); # byte swap &add ($inp,64); $Xi=0; @@ -439,7 +495,10 @@ () eval(shift(@insns)); eval(shift(@insns)); - &pshufb (@X[($Xi-3)&7],@X[2]); + eval(shift(@insns)); + &pshufb (@X[($Xi-3)&7],@Tx[2]); + eval(shift(@insns)); + eval(shift(@insns)); eval(shift(@insns)); eval(shift(@insns)); &paddd (@X[($Xi-4)&7],@Tx[1]); @@ -450,6 +509,8 @@ () &movdqa (eval(16*$Xi)."(%rsp)",@X[($Xi-4)&7]); # X[]+K xfer to IALU eval(shift(@insns)); eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); &psubd (@X[($Xi-4)&7],@Tx[1]); foreach (@insns) { eval; } @@ -465,76 +526,106 @@ () foreach (@insns) { eval; } } -sub body_00_19 () { - use integer; - my ($k,$n); - my @r=( +my @body_00_19 = ( '($a,$b,$c,$d,$e)=@V;'. - '&add ($e,eval(4*($j&15))."(%rsp)");', # X[]+K xfer - '&xor ($c,$d);', - '&mov (@T[1],$a);', # $b in next round - '&$_rol ($a,5);', - '&and (@T[0],$c);', # ($b&($c^$d)) - '&xor ($c,$d);', # restore $c - '&xor (@T[0],$d);', - '&add ($e,$a);', '&$_ror ($b,$j?7:2);', # $b>>>2 - '&add ($e,@T[0]);' .'$j++; unshift(@V,pop(@V)); unshift(@T,pop(@T));' + '&xor (@T[0],$d);', + '&mov (@T[1],$a);', # $b for next round + + '&add ($e,eval(4*($j&15))."(%rsp)");',# X[]+K xfer + '&xor ($b,$c);', # $c^$d for next round + + '&$_rol ($a,5);', + '&add ($e,@T[0]);', + '&and (@T[1],$b);', # ($b&($c^$d)) for next round + + '&xor ($b,$c);', # restore $b + '&add ($e,$a);' .'$j++; unshift(@V,pop(@V)); unshift(@T,pop(@T));' ); + +sub body_00_19 () { # ((c^d)&b)^d + # on start @T[0]=(c^d)&b + return &body_20_39() if ($rx==19); $rx++; + + use integer; + my ($k,$n); + my @r=@body_00_19; + $n = scalar(@r); $k = (($jj+1)*12/20)*20*$n/12; # 12 aesencs per these 20 rounds @r[$k%$n].='&$aesenc();' if ($jj==$k/$n); $jj++; + return @r; } -sub body_20_39 () { - use integer; - my ($k,$n); - my @r=( +my @body_20_39 = ( '($a,$b,$c,$d,$e)=@V;'. - '&add ($e,eval(4*($j++&15))."(%rsp)");', # X[]+K xfer - '&xor (@T[0],$d);', # ($b^$d) - '&mov (@T[1],$a);', # $b in next round + '&add ($e,eval(4*($j&15))."(%rsp)");',# X[]+K xfer + '&xor (@T[0],$d) if($j==19);'. + '&xor (@T[0],$c) if($j> 19);', # ($b^$d^$c) + '&mov (@T[1],$a);', # $b for next round + '&$_rol ($a,5);', - '&xor (@T[0],$c);', # ($b^$d^$c) - '&add ($e,$a);', + '&add ($e,@T[0]);', + '&xor (@T[1],$c) if ($j< 79);', # $b^$d for next round + '&$_ror ($b,7);', # $b>>>2 - '&add ($e,@T[0]);' .'unshift(@V,pop(@V)); unshift(@T,pop(@T));' + '&add ($e,$a);' .'$j++; unshift(@V,pop(@V)); unshift(@T,pop(@T));' ); + +sub body_20_39 () { # b^d^c + # on entry @T[0]=b^d + return &body_40_59() if ($rx==39); $rx++; + + use integer; + my ($k,$n); + my @r=@body_20_39; + $n = scalar(@r); $k = (($jj+1)*8/20)*20*$n/8; # 8 aesencs per these 20 rounds - @r[$k%$n].='&$aesenc();' if ($jj==$k/$n); + @r[$k%$n].='&$aesenc();' if ($jj==$k/$n && $rx!=20); $jj++; + return @r; } -sub body_40_59 () { - use integer; - my ($k,$n); - my @r=( +my @body_40_59 = ( '($a,$b,$c,$d,$e)=@V;'. - '&mov (@T[1],$c);', - '&xor ($c,$d);', - '&add ($e,eval(4*($j++&15))."(%rsp)");', # X[]+K xfer - '&and (@T[1],$d);', - '&and (@T[0],$c);', # ($b&($c^$d)) + '&add ($e,eval(4*($j&15))."(%rsp)");',# X[]+K xfer + '&and (@T[0],$c) if ($j>=40);', # (b^c)&(c^d) + '&xor ($c,$d) if ($j>=40);', # restore $c + '&$_ror ($b,7);', # $b>>>2 - '&add ($e,@T[1]);', - '&mov (@T[1],$a);', # $b in next round + '&mov (@T[1],$a);', # $b for next round + '&xor (@T[0],$c);', + '&$_rol ($a,5);', '&add ($e,@T[0]);', - '&xor ($c,$d);', # restore $c - '&add ($e,$a);' .'unshift(@V,pop(@V)); unshift(@T,pop(@T));' + '&xor (@T[1],$c) if ($j==59);'. + '&xor (@T[1],$b) if ($j< 59);', # b^c for next round + + '&xor ($b,$c) if ($j< 59);', # c^d for next round + '&add ($e,$a);' .'$j++; unshift(@V,pop(@V)); unshift(@T,pop(@T));' ); + +sub body_40_59 () { # ((b^c)&(c^d))^c + # on entry @T[0]=(b^c), (c^=d) + $rx++; + + use integer; + my ($k,$n); + my @r=@body_40_59; + $n = scalar(@r); $k=(($jj+1)*12/20)*20*$n/12; # 12 aesencs per these 20 rounds - @r[$k%$n].='&$aesenc();' if ($jj==$k/$n); + @r[$k%$n].='&$aesenc();' if ($jj==$k/$n && $rx!=40); $jj++; + return @r; } $code.=<<___; -.align 16 +.align 32 .Loop_ssse3: ___ &Xupdate_ssse3_16_31(\&body_00_19); @@ -553,7 +644,7 @@ () &Xupdate_ssse3_32_79(\&body_40_59); &Xupdate_ssse3_32_79(\&body_40_59); &Xupdate_ssse3_32_79(\&body_20_39); - &Xuplast_ssse3_80(\&body_20_39); # can jump to "done" + &Xuplast_ssse3_80(\&body_20_39,".Ldone_ssse3"); # can jump to "done" $saved_j=$j; @saved_V=@V; $saved_r=$r; @saved_rndkey=@rndkey; @@ -575,11 +666,13 @@ () mov @T[0],4($ctx) mov @T[0],$B # magic seed mov $C,8($ctx) + mov $C,@T[1] mov $D,12($ctx) + xor $D,@T[1] mov $E,16($ctx) + and @T[1],@T[0] jmp .Loop_ssse3 -.align 16 .Ldone_ssse3: ___ $jj=$j=$saved_j; @V=@saved_V; @@ -631,7 +724,278 @@ () .size aesni_cbc_sha1_enc_ssse3,.-aesni_cbc_sha1_enc_ssse3 ___ -$j=$jj=$r=$sn=0; + if ($stitched_decrypt) {{{ +# reset +($in0,$out,$len,$key,$ivp,$ctx,$inp)=("%rdi","%rsi","%rdx","%rcx","%r8","%r9","%r10"); +$j=$jj=$r=$rx=0; +$Xi=4; + +# reassign for Atom Silvermont (see above) +($inout0,$inout1,$inout2,$inout3,$rndkey0)=map("%xmm$_",(0..4)); +@X=map("%xmm$_",(8..13,6,7)); +@Tx=map("%xmm$_",(14,15,5)); + +my @aes256_dec = ( + '&movdqu($inout0,"0x00($in0)");', + '&movdqu($inout1,"0x10($in0)"); &pxor ($inout0,$rndkey0);', + '&movdqu($inout2,"0x20($in0)"); &pxor ($inout1,$rndkey0);', + '&movdqu($inout3,"0x30($in0)"); &pxor ($inout2,$rndkey0);', + + '&pxor ($inout3,$rndkey0); &movups ($rndkey0,"16-112($key)");', + '&movaps("64(%rsp)",@X[2]);', # save IV, originally @X[3] + undef,undef + ); +for ($i=0;$i<13;$i++) { + push (@aes256_dec,( + '&aesdec ($inout0,$rndkey0);', + '&aesdec ($inout1,$rndkey0);', + '&aesdec ($inout2,$rndkey0);', + '&aesdec ($inout3,$rndkey0); &movups($rndkey0,"'.(16*($i+2)-112).'($key)");' + )); + push (@aes256_dec,(undef,undef)) if (($i>=3 && $i<=5) || $i>=11); + push (@aes256_dec,(undef,undef)) if ($i==5); +} +push(@aes256_dec,( + '&aesdeclast ($inout0,$rndkey0); &movups (@X[0],"0x00($in0)");', + '&aesdeclast ($inout1,$rndkey0); &movups (@X[1],"0x10($in0)");', + '&aesdeclast ($inout2,$rndkey0); &movups (@X[2],"0x20($in0)");', + '&aesdeclast ($inout3,$rndkey0); &movups (@X[3],"0x30($in0)");', + + '&xorps ($inout0,"64(%rsp)"); &movdqu ($rndkey0,"-112($key)");', + '&xorps ($inout1,@X[0]); &movups ("0x00($out,$in0)",$inout0);', + '&xorps ($inout2,@X[1]); &movups ("0x10($out,$in0)",$inout1);', + '&xorps ($inout3,@X[2]); &movups ("0x20($out,$in0)",$inout2);', + + '&movups ("0x30($out,$in0)",$inout3);' + )); + +sub body_00_19_dec () { # ((c^d)&b)^d + # on start @T[0]=(c^d)&b + return &body_20_39_dec() if ($rx==19); + + my @r=@body_00_19; + + unshift (@r,@aes256_dec[$rx]) if (@aes256_dec[$rx]); + $rx++; + + return @r; +} + +sub body_20_39_dec () { # b^d^c + # on entry @T[0]=b^d + return &body_40_59_dec() if ($rx==39); + + my @r=@body_20_39; + + unshift (@r,@aes256_dec[$rx]) if (@aes256_dec[$rx]); + $rx++; + + return @r; +} + +sub body_40_59_dec () { # ((b^c)&(c^d))^c + # on entry @T[0]=(b^c), (c^=d) + + my @r=@body_40_59; + + unshift (@r,@aes256_dec[$rx]) if (@aes256_dec[$rx]); + $rx++; + + return @r; +} + +$code.=<<___; +.globl aesni256_cbc_sha1_dec +.type aesni256_cbc_sha1_dec,\@abi-omnipotent +.align 32 +aesni256_cbc_sha1_dec: + # caller should check for SSSE3 and AES-NI bits + mov OPENSSL_ia32cap_P+0(%rip),%r10d + mov OPENSSL_ia32cap_P+4(%rip),%r11d +___ +$code.=<<___ if ($avx); + and \$`1<<28`,%r11d # mask AVX bit + and \$`1<<30`,%r10d # mask "Intel CPU" bit + or %r11d,%r10d + cmp \$`1<<28|1<<30`,%r10d + je aesni256_cbc_sha1_dec_avx +___ +$code.=<<___; + jmp aesni256_cbc_sha1_dec_ssse3 + ret +.size aesni256_cbc_sha1_dec,.-aesni256_cbc_sha1_dec + +.type aesni256_cbc_sha1_dec_ssse3,\@function,6 +.align 32 +aesni256_cbc_sha1_dec_ssse3: + mov `($win64?56:8)`(%rsp),$inp # load 7th argument + push %rbx + push %rbp + push %r12 + push %r13 + push %r14 + push %r15 + lea `-104-($win64?10*16:0)`(%rsp),%rsp +___ +$code.=<<___ if ($win64); + movaps %xmm6,96+0(%rsp) + movaps %xmm7,96+16(%rsp) + movaps %xmm8,96+32(%rsp) + movaps %xmm9,96+48(%rsp) + movaps %xmm10,96+64(%rsp) + movaps %xmm11,96+80(%rsp) + movaps %xmm12,96+96(%rsp) + movaps %xmm13,96+112(%rsp) + movaps %xmm14,96+128(%rsp) + movaps %xmm15,96+144(%rsp) +.Lprologue_dec_ssse3: +___ +$code.=<<___; + mov $in0,%r12 # reassign arguments + mov $out,%r13 + mov $len,%r14 + lea 112($key),%r15 # size optimization + movdqu ($ivp),@X[3] # load IV + #mov $ivp,88(%rsp) # save $ivp +___ +($in0,$out,$len,$key)=map("%r$_",(12..15)); # reassign arguments +$code.=<<___; + shl \$6,$len + sub $in0,$out + add $inp,$len # end of input + + lea K_XX_XX(%rip),$K_XX_XX + mov 0($ctx),$A # load context + mov 4($ctx),$B + mov 8($ctx),$C + mov 12($ctx),$D + mov $B,@T[0] # magic seed + mov 16($ctx),$E + mov $C,@T[1] + xor $D,@T[1] + and @T[1],@T[0] + + movdqa 64($K_XX_XX),@Tx[2] # pbswap mask + movdqa 0($K_XX_XX),@Tx[1] # K_00_19 + movdqu 0($inp),@X[-4&7] # load input to %xmm[0-3] + movdqu 16($inp),@X[-3&7] + movdqu 32($inp),@X[-2&7] + movdqu 48($inp),@X[-1&7] + pshufb @Tx[2],@X[-4&7] # byte swap + add \$64,$inp + pshufb @Tx[2],@X[-3&7] + pshufb @Tx[2],@X[-2&7] + pshufb @Tx[2],@X[-1&7] + paddd @Tx[1],@X[-4&7] # add K_00_19 + paddd @Tx[1],@X[-3&7] + paddd @Tx[1],@X[-2&7] + movdqa @X[-4&7],0(%rsp) # X[]+K xfer to IALU + psubd @Tx[1],@X[-4&7] # restore X[] + movdqa @X[-3&7],16(%rsp) + psubd @Tx[1],@X[-3&7] + movdqa @X[-2&7],32(%rsp) + psubd @Tx[1],@X[-2&7] + movdqu -112($key),$rndkey0 # $key[0] + jmp .Loop_dec_ssse3 + +.align 32 +.Loop_dec_ssse3: +___ + &Xupdate_ssse3_16_31(\&body_00_19_dec); + &Xupdate_ssse3_16_31(\&body_00_19_dec); + &Xupdate_ssse3_16_31(\&body_00_19_dec); + &Xupdate_ssse3_16_31(\&body_00_19_dec); + &Xupdate_ssse3_32_79(\&body_00_19_dec); + &Xupdate_ssse3_32_79(\&body_20_39_dec); + &Xupdate_ssse3_32_79(\&body_20_39_dec); + &Xupdate_ssse3_32_79(\&body_20_39_dec); + &Xupdate_ssse3_32_79(\&body_20_39_dec); + &Xupdate_ssse3_32_79(\&body_20_39_dec); + &Xupdate_ssse3_32_79(\&body_40_59_dec); + &Xupdate_ssse3_32_79(\&body_40_59_dec); + &Xupdate_ssse3_32_79(\&body_40_59_dec); + &Xupdate_ssse3_32_79(\&body_40_59_dec); + &Xupdate_ssse3_32_79(\&body_40_59_dec); + &Xupdate_ssse3_32_79(\&body_20_39_dec); + &Xuplast_ssse3_80(\&body_20_39_dec,".Ldone_dec_ssse3"); # can jump to "done" + + $saved_j=$j; @saved_V=@V; + $saved_rx=$rx; + + &Xloop_ssse3(\&body_20_39_dec); + &Xloop_ssse3(\&body_20_39_dec); + &Xloop_ssse3(\&body_20_39_dec); + + eval(@aes256_dec[-1]); # last store +$code.=<<___; + lea 64($in0),$in0 + + add 0($ctx),$A # update context + add 4($ctx),@T[0] + add 8($ctx),$C + add 12($ctx),$D + mov $A,0($ctx) + add 16($ctx),$E + mov @T[0],4($ctx) + mov @T[0],$B # magic seed + mov $C,8($ctx) + mov $C,@T[1] + mov $D,12($ctx) + xor $D,@T[1] + mov $E,16($ctx) + and @T[1],@T[0] + jmp .Loop_dec_ssse3 + +.Ldone_dec_ssse3: +___ + $jj=$j=$saved_j; @V=@saved_V; + $rx=$saved_rx; + + &Xtail_ssse3(\&body_20_39_dec); + &Xtail_ssse3(\&body_20_39_dec); + &Xtail_ssse3(\&body_20_39_dec); + + eval(@aes256_dec[-1]); # last store +$code.=<<___; + add 0($ctx),$A # update context + add 4($ctx),@T[0] + add 8($ctx),$C + mov $A,0($ctx) + add 12($ctx),$D + mov @T[0],4($ctx) + add 16($ctx),$E + mov $C,8($ctx) + mov $D,12($ctx) + mov $E,16($ctx) + movups @X[3],($ivp) # write IV +___ +$code.=<<___ if ($win64); + movaps 96+0(%rsp),%xmm6 + movaps 96+16(%rsp),%xmm7 + movaps 96+32(%rsp),%xmm8 + movaps 96+48(%rsp),%xmm9 + movaps 96+64(%rsp),%xmm10 + movaps 96+80(%rsp),%xmm11 + movaps 96+96(%rsp),%xmm12 + movaps 96+112(%rsp),%xmm13 + movaps 96+128(%rsp),%xmm14 + movaps 96+144(%rsp),%xmm15 +___ +$code.=<<___; + lea `104+($win64?10*16:0)`(%rsp),%rsi + mov 0(%rsi),%r15 + mov 8(%rsi),%r14 + mov 16(%rsi),%r13 + mov 24(%rsi),%r12 + mov 32(%rsi),%rbp + mov 40(%rsi),%rbx + lea 48(%rsi),%rsp +.Lepilogue_dec_ssse3: + ret +.size aesni256_cbc_sha1_dec_ssse3,.-aesni256_cbc_sha1_dec_ssse3 +___ + }}} +$j=$jj=$r=$rx=0; if ($avx) { my ($in0,$out,$len,$key,$ivp,$ctx,$inp)=("%rdi","%rsi","%rdx","%rcx","%r8","%r9","%r10"); @@ -641,13 +1005,17 @@ () my @Tx=map("%xmm$_",(8..10)); my @V=($A,$B,$C,$D,$E)=("%eax","%ebx","%ecx","%edx","%ebp"); # size optimization my @T=("%esi","%edi"); +my ($rndkey0,$iv,$in)=map("%xmm$_",(11..13)); +my @rndkey=("%xmm14","%xmm15"); +my ($inout0,$inout1,$inout2,$inout3)=map("%xmm$_",(12..15)); # for dec +my $Kx=@Tx[2]; my $_rol=sub { &shld(@_[0],@_) }; my $_ror=sub { &shrd(@_[0],@_) }; $code.=<<___; .type aesni_cbc_sha1_enc_avx,\@function,6 -.align 16 +.align 32 aesni_cbc_sha1_enc_avx: mov `($win64?56:8)`(%rsp),$inp # load 7th argument #shr \$6,$len # debugging artefact @@ -680,17 +1048,16 @@ () mov $in0,%r12 # reassign arguments mov $out,%r13 mov $len,%r14 - mov $key,%r15 + lea 112($key),%r15 # size optimization vmovdqu ($ivp),$iv # load IV mov $ivp,88(%rsp) # save $ivp ___ -my ($in0,$out,$len,$key)=map("%r$_",(12..15)); # reassign arguments +($in0,$out,$len,$key)=map("%r$_",(12..15)); # reassign arguments my $rounds="${ivp}d"; $code.=<<___; shl \$6,$len sub $in0,$out - mov 240($key),$rounds - add \$112,$key # size optimization + mov 240-112($key),$rounds add $inp,$len # end of input lea K_XX_XX(%rip),$K_XX_XX @@ -700,9 +1067,12 @@ () mov 12($ctx),$D mov $B,@T[0] # magic seed mov 16($ctx),$E + mov $C,@T[1] + xor $D,@T[1] + and @T[1],@T[0] vmovdqa 64($K_XX_XX),@X[2] # pbswap mask - vmovdqa 0($K_XX_XX),@Tx[1] # K_00_19 + vmovdqa 0($K_XX_XX),$Kx # K_00_19 vmovdqu 0($inp),@X[-4&7] # load input to %xmm[0-3] vmovdqu 16($inp),@X[-3&7] vmovdqu 32($inp),@X[-2&7] @@ -712,13 +1082,13 @@ () vpshufb @X[2],@X[-3&7],@X[-3&7] vpshufb @X[2],@X[-2&7],@X[-2&7] vpshufb @X[2],@X[-1&7],@X[-1&7] - vpaddd @Tx[1],@X[-4&7],@X[0] # add K_00_19 - vpaddd @Tx[1],@X[-3&7],@X[1] - vpaddd @Tx[1],@X[-2&7],@X[2] + vpaddd $Kx,@X[-4&7],@X[0] # add K_00_19 + vpaddd $Kx,@X[-3&7],@X[1] + vpaddd $Kx,@X[-2&7],@X[2] vmovdqa @X[0],0(%rsp) # X[]+K xfer to IALU vmovdqa @X[1],16(%rsp) vmovdqa @X[2],32(%rsp) - vmovups -112($key),$rndkey0 # $key[0] + vmovups -112($key),$rndkey[1] # $key[0] vmovups 16-112($key),$rndkey[0] # forward reference jmp .Loop_avx ___ @@ -728,14 +1098,14 @@ () my ($n,$k)=($r/10,$r%10); if ($k==0) { $code.=<<___; - vmovups `16*$n`($in0),$in # load input - vxorps $rndkey0,$in,$in + vmovdqu `16*$n`($in0),$in # load input + vpxor $rndkey[1],$in,$in ___ $code.=<<___ if ($n); vmovups $iv,`16*($n-1)`($out,$in0) # write output ___ $code.=<<___; - vxorps $in,$iv,$iv + vpxor $in,$iv,$iv vaesenc $rndkey[0],$iv,$iv vmovups `32+16*$k-112`($key),$rndkey[1] ___ @@ -755,6 +1125,7 @@ () vmovups `32+16*($k+3)-112`($key),$rndkey[0] .Lvaesenclast$sn: vaesenclast $rndkey[0],$iv,$iv + vmovups -112($key),$rndkey[0] vmovups 16-112($key),$rndkey[1] # forward reference ___ } else { @@ -778,10 +1149,10 @@ () eval(shift(@insns)); eval(shift(@insns)); - &vpaddd (@Tx[1],@Tx[1],@X[-1&7]); + &vpaddd (@Tx[1],$Kx,@X[-1&7]); eval(shift(@insns)); eval(shift(@insns)); - &vpsrldq(@Tx[0],@X[-1&7],4); # "X[-3]", 3 dwords + &vpsrldq(@Tx[0],@X[-1&7],4); # "X[-3]", 3 dwords eval(shift(@insns)); eval(shift(@insns)); &vpxor (@X[0],@X[0],@X[-4&7]); # "X[0]"^="X[-16]" @@ -807,31 +1178,31 @@ () eval(shift(@insns)); eval(shift(@insns)); - &vpslldq(@Tx[2],@X[0],12); # "X[0]"<<96, extract one dword + &vpslldq(@Tx[1],@X[0],12); # "X[0]"<<96, extract one dword &vpaddd (@X[0],@X[0],@X[0]); eval(shift(@insns)); eval(shift(@insns)); eval(shift(@insns)); eval(shift(@insns)); - &vpsrld (@Tx[1],@Tx[2],30); &vpor (@X[0],@X[0],@Tx[0]); # "X[0]"<<<=1 + &vpsrld (@Tx[0],@Tx[1],30); eval(shift(@insns)); eval(shift(@insns)); eval(shift(@insns)); eval(shift(@insns)); - &vpslld (@Tx[2],@Tx[2],2); - &vpxor (@X[0],@X[0],@Tx[1]); + &vpslld (@Tx[1],@Tx[1],2); + &vpxor (@X[0],@X[0],@Tx[0]); eval(shift(@insns)); eval(shift(@insns)); eval(shift(@insns)); eval(shift(@insns)); - &vpxor (@X[0],@X[0],@Tx[2]); # "X[0]"^=("X[0]">>96)<<<2 + &vpxor (@X[0],@X[0],@Tx[1]); # "X[0]"^=("X[0]">>96)<<<2 eval(shift(@insns)); eval(shift(@insns)); - &vmovdqa (@Tx[2],eval(16*(($Xi)/5))."($K_XX_XX)"); # K_XX_XX + &vmovdqa ($Kx,eval(16*(($Xi)/5))."($K_XX_XX)") if ($Xi%5==0); # K_XX_XX eval(shift(@insns)); eval(shift(@insns)); @@ -839,7 +1210,6 @@ () foreach (@insns) { eval; } # remaining instructions [if any] $Xi++; push(@X,shift(@X)); # "rotate" X[] - push(@Tx,shift(@Tx)); } sub Xupdate_avx_32_79() @@ -858,12 +1228,8 @@ () &vpxor (@X[0],@X[0],@X[-7&7]); # "X[0]"^="X[-28]" eval(shift(@insns)); eval(shift(@insns)) if (@insns[0] !~ /&ro[rl]/); - if ($Xi%5) { - &vmovdqa (@Tx[2],@Tx[1]);# "perpetuate" K_XX_XX... - } else { # ... or load next one - &vmovdqa (@Tx[2],eval(16*($Xi/5))."($K_XX_XX)"); - } - &vpaddd (@Tx[1],@Tx[1],@X[-1&7]); + &vpaddd (@Tx[1],$Kx,@X[-1&7]); + &vmovdqa ($Kx,eval(16*($Xi/5))."($K_XX_XX)") if ($Xi%5==0); eval(shift(@insns)); # ror eval(shift(@insns)); @@ -893,7 +1259,6 @@ () &vpor (@X[0],@X[0],@Tx[0]); # "X[0]"<<<=2 eval(shift(@insns)); # body_20_39 eval(shift(@insns)); - &vmovdqa (@Tx[1],@X[0]) if ($Xi<19); eval(shift(@insns)); eval(shift(@insns)); # rol eval(shift(@insns)); @@ -904,7 +1269,6 @@ () foreach (@insns) { eval; } # remaining instructions $Xi++; push(@X,shift(@X)); # "rotate" X[] - push(@Tx,shift(@Tx)); } sub Xuplast_avx_80() @@ -914,28 +1278,26 @@ () my ($a,$b,$c,$d,$e); eval(shift(@insns)); - &vpaddd (@Tx[1],@Tx[1],@X[-1&7]); + &vpaddd (@Tx[1],$Kx,@X[-1&7]); eval(shift(@insns)); eval(shift(@insns)); eval(shift(@insns)); eval(shift(@insns)); - &movdqa (eval(16*(($Xi-1)&3))."(%rsp)",@Tx[1]); # X[]+K xfer IALU + &vmovdqa (eval(16*(($Xi-1)&3))."(%rsp)",@Tx[1]); # X[]+K xfer IALU foreach (@insns) { eval; } # remaining instructions &cmp ($inp,$len); - &je (".Ldone_avx"); + &je (shift); - unshift(@Tx,pop(@Tx)); - - &vmovdqa(@X[2],"64($K_XX_XX)"); # pbswap mask - &vmovdqa(@Tx[1],"0($K_XX_XX)"); # K_00_19 + &vmovdqa(@Tx[1],"64($K_XX_XX)"); # pbswap mask + &vmovdqa($Kx,"0($K_XX_XX)"); # K_00_19 &vmovdqu(@X[-4&7],"0($inp)"); # load input &vmovdqu(@X[-3&7],"16($inp)"); &vmovdqu(@X[-2&7],"32($inp)"); &vmovdqu(@X[-1&7],"48($inp)"); - &vpshufb(@X[-4&7],@X[-4&7],@X[2]); # byte swap + &vpshufb(@X[-4&7],@X[-4&7],@Tx[1]); # byte swap &add ($inp,64); $Xi=0; @@ -949,15 +1311,15 @@ () eval(shift(@insns)); eval(shift(@insns)); - &vpshufb(@X[($Xi-3)&7],@X[($Xi-3)&7],@X[2]); + &vpshufb(@X[($Xi-3)&7],@X[($Xi-3)&7],@Tx[1]); eval(shift(@insns)); eval(shift(@insns)); - &vpaddd (@X[$Xi&7],@X[($Xi-4)&7],@Tx[1]); + &vpaddd (@Tx[0],@X[($Xi-4)&7],$Kx); eval(shift(@insns)); eval(shift(@insns)); eval(shift(@insns)); eval(shift(@insns)); - &vmovdqa(eval(16*$Xi)."(%rsp)",@X[$Xi&7]); # X[]+K xfer to IALU + &vmovdqa(eval(16*$Xi)."(%rsp)",@Tx[0]); # X[]+K xfer to IALU eval(shift(@insns)); eval(shift(@insns)); @@ -975,7 +1337,7 @@ () } $code.=<<___; -.align 16 +.align 32 .Loop_avx: ___ &Xupdate_avx_16_31(\&body_00_19); @@ -994,7 +1356,7 @@ () &Xupdate_avx_32_79(\&body_40_59); &Xupdate_avx_32_79(\&body_40_59); &Xupdate_avx_32_79(\&body_20_39); - &Xuplast_avx_80(\&body_20_39); # can jump to "done" + &Xuplast_avx_80(\&body_20_39,".Ldone_avx"); # can jump to "done" $saved_j=$j; @saved_V=@V; $saved_r=$r; @saved_rndkey=@rndkey; @@ -1016,11 +1378,13 @@ () mov @T[0],4($ctx) mov @T[0],$B # magic seed mov $C,8($ctx) + mov $C,@T[1] mov $D,12($ctx) + xor $D,@T[1] mov $E,16($ctx) + and @T[1],@T[0] jmp .Loop_avx -.align 16 .Ldone_avx: ___ $jj=$j=$saved_j; @V=@saved_V; @@ -1072,6 +1436,218 @@ () ret .size aesni_cbc_sha1_enc_avx,.-aesni_cbc_sha1_enc_avx ___ + + if ($stitched_decrypt) {{{ +# reset +($in0,$out,$len,$key,$ivp,$ctx,$inp)=("%rdi","%rsi","%rdx","%rcx","%r8","%r9","%r10"); + +$j=$jj=$r=$rx=0; +$Xi=4; + +@aes256_dec = ( + '&vpxor ($inout0,$rndkey0,"0x00($in0)");', + '&vpxor ($inout1,$rndkey0,"0x10($in0)");', + '&vpxor ($inout2,$rndkey0,"0x20($in0)");', + '&vpxor ($inout3,$rndkey0,"0x30($in0)");', + + '&vmovups($rndkey0,"16-112($key)");', + '&vmovups("64(%rsp)",@X[2]);', # save IV, originally @X[3] + undef,undef + ); +for ($i=0;$i<13;$i++) { + push (@aes256_dec,( + '&vaesdec ($inout0,$inout0,$rndkey0);', + '&vaesdec ($inout1,$inout1,$rndkey0);', + '&vaesdec ($inout2,$inout2,$rndkey0);', + '&vaesdec ($inout3,$inout3,$rndkey0); &vmovups($rndkey0,"'.(16*($i+2)-112).'($key)");' + )); + push (@aes256_dec,(undef,undef)) if (($i>=3 && $i<=5) || $i>=11); + push (@aes256_dec,(undef,undef)) if ($i==5); +} +push(@aes256_dec,( + '&vaesdeclast ($inout0,$inout0,$rndkey0); &vmovups(@X[0],"0x00($in0)");', + '&vaesdeclast ($inout1,$inout1,$rndkey0); &vmovups(@X[1],"0x10($in0)");', + '&vaesdeclast ($inout2,$inout2,$rndkey0); &vmovups(@X[2],"0x20($in0)");', + '&vaesdeclast ($inout3,$inout3,$rndkey0); &vmovups(@X[3],"0x30($in0)");', + + '&vxorps ($inout0,$inout0,"64(%rsp)"); &vmovdqu($rndkey0,"-112($key)");', + '&vxorps ($inout1,$inout1,@X[0]); &vmovups("0x00($out,$in0)",$inout0);', + '&vxorps ($inout2,$inout2,@X[1]); &vmovups("0x10($out,$in0)",$inout1);', + '&vxorps ($inout3,$inout3,@X[2]); &vmovups("0x20($out,$in0)",$inout2);', + + '&vmovups ("0x30($out,$in0)",$inout3);' + )); + +$code.=<<___; +.type aesni256_cbc_sha1_dec_avx,\@function,6 +.align 32 +aesni256_cbc_sha1_dec_avx: + mov `($win64?56:8)`(%rsp),$inp # load 7th argument + push %rbx + push %rbp + push %r12 + push %r13 + push %r14 + push %r15 + lea `-104-($win64?10*16:0)`(%rsp),%rsp +___ +$code.=<<___ if ($win64); + movaps %xmm6,96+0(%rsp) + movaps %xmm7,96+16(%rsp) + movaps %xmm8,96+32(%rsp) + movaps %xmm9,96+48(%rsp) + movaps %xmm10,96+64(%rsp) + movaps %xmm11,96+80(%rsp) + movaps %xmm12,96+96(%rsp) + movaps %xmm13,96+112(%rsp) + movaps %xmm14,96+128(%rsp) + movaps %xmm15,96+144(%rsp) +.Lprologue_dec_avx: +___ +$code.=<<___; + vzeroall + mov $in0,%r12 # reassign arguments + mov $out,%r13 + mov $len,%r14 + lea 112($key),%r15 # size optimization + vmovdqu ($ivp),@X[3] # load IV +___ +($in0,$out,$len,$key)=map("%r$_",(12..15)); # reassign arguments +$code.=<<___; + shl \$6,$len + sub $in0,$out + add $inp,$len # end of input + + lea K_XX_XX(%rip),$K_XX_XX + mov 0($ctx),$A # load context + mov 4($ctx),$B + mov 8($ctx),$C + mov 12($ctx),$D + mov $B,@T[0] # magic seed + mov 16($ctx),$E + mov $C,@T[1] + xor $D,@T[1] + and @T[1],@T[0] + + vmovdqa 64($K_XX_XX),@X[2] # pbswap mask + vmovdqa 0($K_XX_XX),$Kx # K_00_19 + vmovdqu 0($inp),@X[-4&7] # load input to %xmm[0-3] + vmovdqu 16($inp),@X[-3&7] + vmovdqu 32($inp),@X[-2&7] + vmovdqu 48($inp),@X[-1&7] + vpshufb @X[2],@X[-4&7],@X[-4&7] # byte swap + add \$64,$inp + vpshufb @X[2],@X[-3&7],@X[-3&7] + vpshufb @X[2],@X[-2&7],@X[-2&7] + vpshufb @X[2],@X[-1&7],@X[-1&7] + vpaddd $Kx,@X[-4&7],@X[0] # add K_00_19 + vpaddd $Kx,@X[-3&7],@X[1] + vpaddd $Kx,@X[-2&7],@X[2] + vmovdqa @X[0],0(%rsp) # X[]+K xfer to IALU + vmovdqa @X[1],16(%rsp) + vmovdqa @X[2],32(%rsp) + vmovups -112($key),$rndkey0 # $key[0] + jmp .Loop_dec_avx + +.align 32 +.Loop_dec_avx: +___ + &Xupdate_avx_16_31(\&body_00_19_dec); + &Xupdate_avx_16_31(\&body_00_19_dec); + &Xupdate_avx_16_31(\&body_00_19_dec); + &Xupdate_avx_16_31(\&body_00_19_dec); + &Xupdate_avx_32_79(\&body_00_19_dec); + &Xupdate_avx_32_79(\&body_20_39_dec); + &Xupdate_avx_32_79(\&body_20_39_dec); + &Xupdate_avx_32_79(\&body_20_39_dec); + &Xupdate_avx_32_79(\&body_20_39_dec); + &Xupdate_avx_32_79(\&body_20_39_dec); + &Xupdate_avx_32_79(\&body_40_59_dec); + &Xupdate_avx_32_79(\&body_40_59_dec); + &Xupdate_avx_32_79(\&body_40_59_dec); + &Xupdate_avx_32_79(\&body_40_59_dec); + &Xupdate_avx_32_79(\&body_40_59_dec); + &Xupdate_avx_32_79(\&body_20_39_dec); + &Xuplast_avx_80(\&body_20_39_dec,".Ldone_dec_avx"); # can jump to "done" + + $saved_j=$j; @saved_V=@V; + $saved_rx=$rx; + + &Xloop_avx(\&body_20_39_dec); + &Xloop_avx(\&body_20_39_dec); + &Xloop_avx(\&body_20_39_dec); + + eval(@aes256_dec[-1]); # last store +$code.=<<___; + lea 64($in0),$in0 + + add 0($ctx),$A # update context + add 4($ctx),@T[0] + add 8($ctx),$C + add 12($ctx),$D + mov $A,0($ctx) + add 16($ctx),$E + mov @T[0],4($ctx) + mov @T[0],$B # magic seed + mov $C,8($ctx) + mov $C,@T[1] + mov $D,12($ctx) + xor $D,@T[1] + mov $E,16($ctx) + and @T[1],@T[0] + jmp .Loop_dec_avx + +.Ldone_dec_avx: +___ + $jj=$j=$saved_j; @V=@saved_V; + $rx=$saved_rx; + + &Xtail_avx(\&body_20_39_dec); + &Xtail_avx(\&body_20_39_dec); + &Xtail_avx(\&body_20_39_dec); + + eval(@aes256_dec[-1]); # last store +$code.=<<___; + + add 0($ctx),$A # update context + add 4($ctx),@T[0] + add 8($ctx),$C + mov $A,0($ctx) + add 12($ctx),$D + mov @T[0],4($ctx) + add 16($ctx),$E + mov $C,8($ctx) + mov $D,12($ctx) + mov $E,16($ctx) + vmovups @X[3],($ivp) # write IV + vzeroall +___ +$code.=<<___ if ($win64); + movaps 96+0(%rsp),%xmm6 + movaps 96+16(%rsp),%xmm7 + movaps 96+32(%rsp),%xmm8 + movaps 96+48(%rsp),%xmm9 + movaps 96+64(%rsp),%xmm10 + movaps 96+80(%rsp),%xmm11 + movaps 96+96(%rsp),%xmm12 + movaps 96+112(%rsp),%xmm13 + movaps 96+128(%rsp),%xmm14 + movaps 96+144(%rsp),%xmm15 +___ +$code.=<<___; + lea `104+($win64?10*16:0)`(%rsp),%rsi + mov 0(%rsi),%r15 + mov 8(%rsi),%r14 + mov 16(%rsi),%r13 + mov 24(%rsi),%r12 + mov 32(%rsi),%rbp + mov 40(%rsi),%rbx + lea 48(%rsi),%rsp +.Lepilogue_dec_avx: + ret +.size aesni256_cbc_sha1_dec_avx,.-aesni256_cbc_sha1_dec_avx +___ + }}} } $code.=<<___; .align 64 @@ -1081,11 +1657,180 @@ () .long 0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc # K_40_59 .long 0xca62c1d6,0xca62c1d6,0xca62c1d6,0xca62c1d6 # K_60_79 .long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f # pbswap mask +.byte 0xf,0xe,0xd,0xc,0xb,0xa,0x9,0x8,0x7,0x6,0x5,0x4,0x3,0x2,0x1,0x0 .asciz "AESNI-CBC+SHA1 stitch for x86_64, CRYPTOGAMS by " .align 64 ___ + if ($shaext) {{{ +($in0,$out,$len,$key,$ivp,$ctx,$inp)=("%rdi","%rsi","%rdx","%rcx","%r8","%r9","%r10"); + +$rounds="%r11d"; + +($iv,$in,$rndkey0)=map("%xmm$_",(2,14,15)); +@rndkey=("%xmm0","%xmm1"); +$r=0; + +my ($BSWAP,$ABCD,$E,$E_,$ABCD_SAVE,$E_SAVE)=map("%xmm$_",(7..12)); +my @MSG=map("%xmm$_",(3..6)); + +$code.=<<___; +.type aesni_cbc_sha1_enc_shaext,\@function,6 +.align 32 +aesni_cbc_sha1_enc_shaext: + mov `($win64?56:8)`(%rsp),$inp # load 7th argument +___ +$code.=<<___ if ($win64); + lea `-8-10*16`(%rsp),%rsp + movaps %xmm6,-8-10*16(%rax) + movaps %xmm7,-8-9*16(%rax) + movaps %xmm8,-8-8*16(%rax) + movaps %xmm9,-8-7*16(%rax) + movaps %xmm10,-8-6*16(%rax) + movaps %xmm11,-8-5*16(%rax) + movaps %xmm12,-8-4*16(%rax) + movaps %xmm13,-8-3*16(%rax) + movaps %xmm14,-8-2*16(%rax) + movaps %xmm15,-8-1*16(%rax) +.Lprologue_shaext: +___ +$code.=<<___; + movdqu ($ctx),$ABCD + movd 16($ctx),$E + movdqa K_XX_XX+0x50(%rip),$BSWAP # byte-n-word swap + + mov 240($key),$rounds + sub $in0,$out + movups ($key),$rndkey0 # $key[0] + movups 16($key),$rndkey[0] # forward reference + lea 112($key),$key # size optimization + + pshufd \$0b00011011,$ABCD,$ABCD # flip word order + pshufd \$0b00011011,$E,$E # flip word order + jmp .Loop_shaext +.align 16 +.Loop_shaext: +___ + &$aesenc(); +$code.=<<___; + movdqu ($inp),@MSG[0] + movdqa $E,$E_SAVE # offload $E + pshufb $BSWAP,@MSG[0] + movdqu 0x10($inp),@MSG[1] + movdqa $ABCD,$ABCD_SAVE # offload $ABCD +___ + &$aesenc(); +$code.=<<___; + pshufb $BSWAP,@MSG[1] + + paddd @MSG[0],$E + movdqu 0x20($inp),@MSG[2] + lea 0x40($inp),$inp + pxor $E_SAVE,@MSG[0] # black magic +___ + &$aesenc(); +$code.=<<___; + pxor $E_SAVE,@MSG[0] # black magic + movdqa $ABCD,$E_ + pshufb $BSWAP,@MSG[2] + sha1rnds4 \$0,$E,$ABCD # 0-3 + sha1nexte @MSG[1],$E_ +___ + &$aesenc(); +$code.=<<___; + sha1msg1 @MSG[1],@MSG[0] + movdqu -0x10($inp),@MSG[3] + movdqa $ABCD,$E + pshufb $BSWAP,@MSG[3] +___ + &$aesenc(); +$code.=<<___; + sha1rnds4 \$0,$E_,$ABCD # 4-7 + sha1nexte @MSG[2],$E + pxor @MSG[2],@MSG[0] + sha1msg1 @MSG[2],@MSG[1] +___ + &$aesenc(); + +for($i=2;$i<20-4;$i++) { +$code.=<<___; + movdqa $ABCD,$E_ + sha1rnds4 \$`int($i/5)`,$E,$ABCD # 8-11 + sha1nexte @MSG[3],$E_ +___ + &$aesenc(); +$code.=<<___; + sha1msg2 @MSG[3],@MSG[0] + pxor @MSG[3],@MSG[1] + sha1msg1 @MSG[3],@MSG[2] +___ + ($E,$E_)=($E_,$E); + push(@MSG,shift(@MSG)); + + &$aesenc(); +} +$code.=<<___; + movdqa $ABCD,$E_ + sha1rnds4 \$3,$E,$ABCD # 64-67 + sha1nexte @MSG[3],$E_ + sha1msg2 @MSG[3],@MSG[0] + pxor @MSG[3],@MSG[1] +___ + &$aesenc(); +$code.=<<___; + movdqa $ABCD,$E + sha1rnds4 \$3,$E_,$ABCD # 68-71 + sha1nexte @MSG[0],$E + sha1msg2 @MSG[0],@MSG[1] +___ + &$aesenc(); +$code.=<<___; + movdqa $E_SAVE,@MSG[0] + movdqa $ABCD,$E_ + sha1rnds4 \$3,$E,$ABCD # 72-75 + sha1nexte @MSG[1],$E_ +___ + &$aesenc(); +$code.=<<___; + movdqa $ABCD,$E + sha1rnds4 \$3,$E_,$ABCD # 76-79 + sha1nexte $MSG[0],$E +___ + while($r<40) { &$aesenc(); } # remaining aesenc's +$code.=<<___; + dec $len + + paddd $ABCD_SAVE,$ABCD + movups $iv,48($out,$in0) # write output + lea 64($in0),$in0 + jnz .Loop_shaext + + pshufd \$0b00011011,$ABCD,$ABCD + pshufd \$0b00011011,$E,$E + movups $iv,($ivp) # write IV + movdqu $ABCD,($ctx) + movd $E,16($ctx) +___ +$code.=<<___ if ($win64); + movaps -8-10*16(%rax),%xmm6 + movaps -8-9*16(%rax),%xmm7 + movaps -8-8*16(%rax),%xmm8 + movaps -8-7*16(%rax),%xmm9 + movaps -8-6*16(%rax),%xmm10 + movaps -8-5*16(%rax),%xmm11 + movaps -8-4*16(%rax),%xmm12 + movaps -8-3*16(%rax),%xmm13 + movaps -8-2*16(%rax),%xmm14 + movaps -8-1*16(%rax),%xmm15 + mov %rax,%rsp +.Lepilogue_shaext: +___ +$code.=<<___; + ret +.size aesni_cbc_sha1_enc_shaext,.-aesni_cbc_sha1_enc_shaext +___ + }}} # EXCEPTION_DISPOSITION handler (EXCEPTION_RECORD *rec,ULONG64 frame, # CONTEXT *context,DISPATCHER_CONTEXT *disp) if ($win64) { @@ -1127,7 +1872,21 @@ () lea (%rsi,%r10),%r10 # epilogue label cmp %r10,%rbx # context->Rip>=epilogue label jae .Lcommon_seh_tail +___ +$code.=<<___ if ($shaext); + lea aesni_cbc_sha1_enc_shaext(%rip),%r10 + cmp %r10,%rbx + jb .Lseh_no_shaext + lea (%rax),%rsi + lea 512($context),%rdi # &context.Xmm6 + mov \$20,%ecx + .long 0xa548f3fc # cld; rep movsq + lea 168(%rax),%rax # adjust stack pointer + jmp .Lcommon_seh_tail +.Lseh_no_shaext: +___ +$code.=<<___; lea 96(%rax),%rsi lea 512($context),%rdi # &context.Xmm6 mov \$20,%ecx @@ -1199,6 +1958,11 @@ () .rva .LSEH_end_aesni_cbc_sha1_enc_avx .rva .LSEH_info_aesni_cbc_sha1_enc_avx ___ +$code.=<<___ if ($shaext); + .rva .LSEH_begin_aesni_cbc_sha1_enc_shaext + .rva .LSEH_end_aesni_cbc_sha1_enc_shaext + .rva .LSEH_info_aesni_cbc_sha1_enc_shaext +___ $code.=<<___; .section .xdata .align 8 @@ -1213,6 +1977,12 @@ () .rva ssse3_handler .rva .Lprologue_avx,.Lepilogue_avx # HandlerData[] ___ +$code.=<<___ if ($shaext); +.LSEH_info_aesni_cbc_sha1_enc_shaext: + .byte 9,0,0,0 + .rva ssse3_handler + .rva .Lprologue_shaext,.Lepilogue_shaext # HandlerData[] +___ } #################################################################### @@ -1223,28 +1993,65 @@ sub rex { $rex|=0x04 if($dst>=8); $rex|=0x01 if($src>=8); - push @opcode,$rex|0x40 if($rex); + unshift @opcode,$rex|0x40 if($rex); +} + +sub sha1rnds4 { + if (@_[0] =~ /\$([x0-9a-f]+),\s*%xmm([0-9]+),\s*%xmm([0-9]+)/) { + my @opcode=(0x0f,0x3a,0xcc); + rex(\@opcode,$3,$2); + push @opcode,0xc0|($2&7)|(($3&7)<<3); # ModR/M + my $c=$1; + push @opcode,$c=~/^0/?oct($c):$c; + return ".byte\t".join(',',@opcode); + } else { + return "sha1rnds4\t".@_[0]; + } +} + +sub sha1op38 { + my $instr = shift; + my %opcodelet = ( + "sha1nexte" => 0xc8, + "sha1msg1" => 0xc9, + "sha1msg2" => 0xca ); + + if (defined($opcodelet{$instr}) && @_[0] =~ /%xmm([0-9]+),\s*%xmm([0-9]+)/) { + my @opcode=(0x0f,0x38); + rex(\@opcode,$2,$1); + push @opcode,$opcodelet{$instr}; + push @opcode,0xc0|($1&7)|(($2&7)<<3); # ModR/M + return ".byte\t".join(',',@opcode); + } else { + return $instr."\t".@_[0]; + } } sub aesni { my $line=shift; - my @opcode=(0x66); + my @opcode=(0x0f,0x38); if ($line=~/(aes[a-z]+)\s+%xmm([0-9]+),\s*%xmm([0-9]+)/) { my %opcodelet = ( - "aesenc" => 0xdc, "aesenclast" => 0xdd + "aesenc" => 0xdc, "aesenclast" => 0xdd, + "aesdec" => 0xde, "aesdeclast" => 0xdf ); return undef if (!defined($opcodelet{$1})); rex(\@opcode,$3,$2); - push @opcode,0x0f,0x38,$opcodelet{$1}; - push @opcode,0xc0|($2&7)|(($3&7)<<3); # ModR/M + push @opcode,$opcodelet{$1},0xc0|($2&7)|(($3&7)<<3); # ModR/M + unshift @opcode,0x66; return ".byte\t".join(',',@opcode); } return $line; } -$code =~ s/\`([^\`]*)\`/eval($1)/gem; -$code =~ s/\b(aes.*%xmm[0-9]+).*$/aesni($1)/gem; +foreach (split("\n",$code)) { + s/\`([^\`]*)\`/eval $1/geo; + + s/\b(sha1rnds4)\s+(.*)/sha1rnds4($2)/geo or + s/\b(sha1[^\s]*)\s+(.*)/sha1op38($1,$2)/geo or + s/\b(aes.*%xmm[0-9]+).*$/aesni($1)/geo; -print $code; + print $_,"\n"; +} close STDOUT; diff --git a/deps/openssl/openssl/crypto/aes/asm/aesni-sha256-x86_64.pl b/deps/openssl/openssl/crypto/aes/asm/aesni-sha256-x86_64.pl new file mode 100644 index 00000000000000..c1fce89834fc3f --- /dev/null +++ b/deps/openssl/openssl/crypto/aes/asm/aesni-sha256-x86_64.pl @@ -0,0 +1,1708 @@ +#!/usr/bin/env perl +# +# ==================================================================== +# Written by Andy Polyakov for the OpenSSL +# project. The module is, however, dual licensed under OpenSSL and +# CRYPTOGAMS licenses depending on where you obtain it. For further +# details see http://www.openssl.org/~appro/cryptogams/. +# ==================================================================== +# +# January 2013 +# +# This is AESNI-CBC+SHA256 stitch implementation. The idea, as spelled +# in http://download.intel.com/design/intarch/papers/323686.pdf, is +# that since AESNI-CBC encrypt exhibit *very* low instruction-level +# parallelism, interleaving it with another algorithm would allow to +# utilize processor resources better and achieve better performance. +# SHA256 instruction sequences(*) are taken from sha512-x86_64.pl and +# AESNI code is weaved into it. As SHA256 dominates execution time, +# stitch performance does not depend on AES key length. Below are +# performance numbers in cycles per processed byte, less is better, +# for standalone AESNI-CBC encrypt, standalone SHA256, and stitched +# subroutine: +# +# AES-128/-192/-256+SHA256 this(**)gain +# Sandy Bridge 5.05/6.05/7.05+11.6 13.0 +28%/36%/43% +# Ivy Bridge 5.05/6.05/7.05+10.3 11.6 +32%/41%/50% +# Haswell 4.43/5.29/6.19+7.80 8.79 +39%/49%/59% +# Bulldozer 5.77/6.89/8.00+13.7 13.7 +42%/50%/58% +# +# (*) there are XOP, AVX1 and AVX2 code pathes, meaning that +# Westmere is omitted from loop, this is because gain was not +# estimated high enough to justify the effort; +# (**) these are EVP-free results, results obtained with 'speed +# -evp aes-256-cbc-hmac-sha256' will vary by percent or two; + +$flavour = shift; +$output = shift; +if ($flavour =~ /\./) { $output = $flavour; undef $flavour; } + +$win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/); + +$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; +( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or +( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or +die "can't locate x86_64-xlate.pl"; + +if (`$ENV{CC} -Wa,-v -c -o /dev/null -x assembler /dev/null 2>&1` + =~ /GNU assembler version ([2-9]\.[0-9]+)/) { + $avx = ($1>=2.19) + ($1>=2.22); +} + +if (!$avx && $win64 && ($flavour =~ /nasm/ || $ENV{ASM} =~ /nasm/) && + `nasm -v 2>&1` =~ /NASM version ([2-9]\.[0-9]+)/) { + $avx = ($1>=2.09) + ($1>=2.10); +} + +if (!$avx && $win64 && ($flavour =~ /masm/ || $ENV{ASM} =~ /ml64/) && + `ml64 2>&1` =~ /Version ([0-9]+)\./) { + $avx = ($1>=10) + ($1>=12); +} + +if (!$avx && `$ENV{CC} -v 2>&1` =~ /(^clang version|based on LLVM) ([3-9]\.[0-9]+)/) { + $avx = ($2>=3.0) + ($2>3.0); +} + +$shaext=$avx; ### set to zero if compiling for 1.0.1 +$avx=1 if (!$shaext && $avx); + +open OUT,"| \"$^X\" $xlate $flavour $output"; +*STDOUT=*OUT; + +$func="aesni_cbc_sha256_enc"; +$TABLE="K256"; +$SZ=4; +@ROT=($A,$B,$C,$D,$E,$F,$G,$H)=("%eax","%ebx","%ecx","%edx", + "%r8d","%r9d","%r10d","%r11d"); +($T1,$a0,$a1,$a2,$a3)=("%r12d","%r13d","%r14d","%r15d","%esi"); +@Sigma0=( 2,13,22); +@Sigma1=( 6,11,25); +@sigma0=( 7,18, 3); +@sigma1=(17,19,10); +$rounds=64; + +######################################################################## +# void aesni_cbc_sha256_enc(const void *inp, +# void *out, +# size_t length, +# const AES_KEY *key, +# unsigned char *iv, +# SHA256_CTX *ctx, +# const void *in0); +($inp, $out, $len, $key, $ivp, $ctx, $in0) = +("%rdi","%rsi","%rdx","%rcx","%r8","%r9","%r10"); + +$Tbl="%rbp"; + +$_inp="16*$SZ+0*8(%rsp)"; +$_out="16*$SZ+1*8(%rsp)"; +$_end="16*$SZ+2*8(%rsp)"; +$_key="16*$SZ+3*8(%rsp)"; +$_ivp="16*$SZ+4*8(%rsp)"; +$_ctx="16*$SZ+5*8(%rsp)"; +$_in0="16*$SZ+6*8(%rsp)"; +$_rsp="16*$SZ+7*8(%rsp)"; +$framesz=16*$SZ+8*8; + +$code=<<___; +.text + +.extern OPENSSL_ia32cap_P +.globl $func +.type $func,\@abi-omnipotent +.align 16 +$func: +___ + if ($avx) { +$code.=<<___; + lea OPENSSL_ia32cap_P(%rip),%r11 + mov \$1,%eax + cmp \$0,`$win64?"%rcx":"%rdi"` + je .Lprobe + mov 0(%r11),%eax + mov 4(%r11),%r10 +___ +$code.=<<___ if ($shaext); + bt \$61,%r10 # check for SHA + jc ${func}_shaext +___ +$code.=<<___; + mov %r10,%r11 + shr \$32,%r11 + + test \$`1<<11`,%r10d # check for XOP + jnz ${func}_xop +___ +$code.=<<___ if ($avx>1); + and \$`1<<8|1<<5|1<<3`,%r11d # check for BMI2+AVX2+BMI1 + cmp \$`1<<8|1<<5|1<<3`,%r11d + je ${func}_avx2 +___ +$code.=<<___; + and \$`1<<30`,%eax # mask "Intel CPU" bit + and \$`1<<28|1<<9`,%r10d # mask AVX+SSSE3 bits + or %eax,%r10d + cmp \$`1<<28|1<<9|1<<30`,%r10d + je ${func}_avx + ud2 +___ + } +$code.=<<___; + xor %eax,%eax + cmp \$0,`$win64?"%rcx":"%rdi"` + je .Lprobe + ud2 +.Lprobe: + ret +.size $func,.-$func + +.align 64 +.type $TABLE,\@object +$TABLE: + .long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 + .long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 + .long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 + .long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 + .long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 + .long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 + .long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 + .long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 + .long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc + .long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc + .long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da + .long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da + .long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 + .long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 + .long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967 + .long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967 + .long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 + .long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 + .long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 + .long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 + .long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 + .long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 + .long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070 + .long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070 + .long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 + .long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 + .long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 + .long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 + .long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 + .long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 + .long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 + .long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 + + .long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f + .long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f + .long 0,0,0,0, 0,0,0,0, -1,-1,-1,-1 + .long 0,0,0,0, 0,0,0,0 + .asciz "AESNI-CBC+SHA256 stitch for x86_64, CRYPTOGAMS by " +.align 64 +___ + +###################################################################### +# SIMD code paths +# +{{{ +($iv,$inout,$roundkey,$temp, + $mask10,$mask12,$mask14,$offload)=map("%xmm$_",(8..15)); + +$aesni_cbc_idx=0; +@aesni_cbc_block = ( +## &vmovdqu ($roundkey,"0x00-0x80($inp)");' +## &vmovdqu ($inout,($inp)); +## &mov ($_inp,$inp); + + '&vpxor ($inout,$inout,$roundkey);'. + ' &vmovdqu ($roundkey,"0x10-0x80($inp)");', + + '&vpxor ($inout,$inout,$iv);', + + '&vaesenc ($inout,$inout,$roundkey);'. + ' &vmovdqu ($roundkey,"0x20-0x80($inp)");', + + '&vaesenc ($inout,$inout,$roundkey);'. + ' &vmovdqu ($roundkey,"0x30-0x80($inp)");', + + '&vaesenc ($inout,$inout,$roundkey);'. + ' &vmovdqu ($roundkey,"0x40-0x80($inp)");', + + '&vaesenc ($inout,$inout,$roundkey);'. + ' &vmovdqu ($roundkey,"0x50-0x80($inp)");', + + '&vaesenc ($inout,$inout,$roundkey);'. + ' &vmovdqu ($roundkey,"0x60-0x80($inp)");', + + '&vaesenc ($inout,$inout,$roundkey);'. + ' &vmovdqu ($roundkey,"0x70-0x80($inp)");', + + '&vaesenc ($inout,$inout,$roundkey);'. + ' &vmovdqu ($roundkey,"0x80-0x80($inp)");', + + '&vaesenc ($inout,$inout,$roundkey);'. + ' &vmovdqu ($roundkey,"0x90-0x80($inp)");', + + '&vaesenc ($inout,$inout,$roundkey);'. + ' &vmovdqu ($roundkey,"0xa0-0x80($inp)");', + + '&vaesenclast ($temp,$inout,$roundkey);'. + ' &vaesenc ($inout,$inout,$roundkey);'. + ' &vmovdqu ($roundkey,"0xb0-0x80($inp)");', + + '&vpand ($iv,$temp,$mask10);'. + ' &vaesenc ($inout,$inout,$roundkey);'. + ' &vmovdqu ($roundkey,"0xc0-0x80($inp)");', + + '&vaesenclast ($temp,$inout,$roundkey);'. + ' &vaesenc ($inout,$inout,$roundkey);'. + ' &vmovdqu ($roundkey,"0xd0-0x80($inp)");', + + '&vpand ($temp,$temp,$mask12);'. + ' &vaesenc ($inout,$inout,$roundkey);'. + '&vmovdqu ($roundkey,"0xe0-0x80($inp)");', + + '&vpor ($iv,$iv,$temp);'. + ' &vaesenclast ($temp,$inout,$roundkey);'. + ' &vmovdqu ($roundkey,"0x00-0x80($inp)");' + +## &mov ($inp,$_inp); +## &mov ($out,$_out); +## &vpand ($temp,$temp,$mask14); +## &vpor ($iv,$iv,$temp); +## &vmovdqu ($iv,($out,$inp); +## &lea (inp,16($inp)); +); + +my $a4=$T1; +my ($a,$b,$c,$d,$e,$f,$g,$h); + +sub AUTOLOAD() # thunk [simplified] 32-bit style perlasm +{ my $opcode = $AUTOLOAD; $opcode =~ s/.*:://; + my $arg = pop; + $arg = "\$$arg" if ($arg*1 eq $arg); + $code .= "\t$opcode\t".join(',',$arg,reverse @_)."\n"; +} + +sub body_00_15 () { + ( + '($a,$b,$c,$d,$e,$f,$g,$h)=@ROT;'. + + '&ror ($a0,$Sigma1[2]-$Sigma1[1])', + '&mov ($a,$a1)', + '&mov ($a4,$f)', + + '&xor ($a0,$e)', + '&ror ($a1,$Sigma0[2]-$Sigma0[1])', + '&xor ($a4,$g)', # f^g + + '&ror ($a0,$Sigma1[1]-$Sigma1[0])', + '&xor ($a1,$a)', + '&and ($a4,$e)', # (f^g)&e + + @aesni_cbc_block[$aesni_cbc_idx++]. + '&xor ($a0,$e)', + '&add ($h,$SZ*($i&15)."(%rsp)")', # h+=X[i]+K[i] + '&mov ($a2,$a)', + + '&ror ($a1,$Sigma0[1]-$Sigma0[0])', + '&xor ($a4,$g)', # Ch(e,f,g)=((f^g)&e)^g + '&xor ($a2,$b)', # a^b, b^c in next round + + '&ror ($a0,$Sigma1[0])', # Sigma1(e) + '&add ($h,$a4)', # h+=Ch(e,f,g) + '&and ($a3,$a2)', # (b^c)&(a^b) + + '&xor ($a1,$a)', + '&add ($h,$a0)', # h+=Sigma1(e) + '&xor ($a3,$b)', # Maj(a,b,c)=Ch(a^b,c,b) + + '&add ($d,$h)', # d+=h + '&ror ($a1,$Sigma0[0])', # Sigma0(a) + '&add ($h,$a3)', # h+=Maj(a,b,c) + + '&mov ($a0,$d)', + '&add ($a1,$h);'. # h+=Sigma0(a) + '($a2,$a3) = ($a3,$a2); unshift(@ROT,pop(@ROT)); $i++;' + ); +} + +if ($avx) {{ +###################################################################### +# XOP code path +# +$code.=<<___; +.type ${func}_xop,\@function,6 +.align 64 +${func}_xop: +.Lxop_shortcut: + mov `($win64?56:8)`(%rsp),$in0 # load 7th parameter + push %rbx + push %rbp + push %r12 + push %r13 + push %r14 + push %r15 + mov %rsp,%r11 # copy %rsp + sub \$`$framesz+$win64*16*10`,%rsp + and \$-64,%rsp # align stack frame + + shl \$6,$len + sub $inp,$out # re-bias + sub $inp,$in0 + add $inp,$len # end of input + + #mov $inp,$_inp # saved later + mov $out,$_out + mov $len,$_end + #mov $key,$_key # remains resident in $inp register + mov $ivp,$_ivp + mov $ctx,$_ctx + mov $in0,$_in0 + mov %r11,$_rsp +___ +$code.=<<___ if ($win64); + movaps %xmm6,`$framesz+16*0`(%rsp) + movaps %xmm7,`$framesz+16*1`(%rsp) + movaps %xmm8,`$framesz+16*2`(%rsp) + movaps %xmm9,`$framesz+16*3`(%rsp) + movaps %xmm10,`$framesz+16*4`(%rsp) + movaps %xmm11,`$framesz+16*5`(%rsp) + movaps %xmm12,`$framesz+16*6`(%rsp) + movaps %xmm13,`$framesz+16*7`(%rsp) + movaps %xmm14,`$framesz+16*8`(%rsp) + movaps %xmm15,`$framesz+16*9`(%rsp) +___ +$code.=<<___; +.Lprologue_xop: + vzeroall + + mov $inp,%r12 # borrow $a4 + lea 0x80($key),$inp # size optimization, reassign + lea $TABLE+`$SZ*2*$rounds+32`(%rip),%r13 # borrow $a0 + mov 0xf0-0x80($inp),%r14d # rounds, borrow $a1 + mov $ctx,%r15 # borrow $a2 + mov $in0,%rsi # borrow $a3 + vmovdqu ($ivp),$iv # load IV + sub \$9,%r14 + + mov $SZ*0(%r15),$A + mov $SZ*1(%r15),$B + mov $SZ*2(%r15),$C + mov $SZ*3(%r15),$D + mov $SZ*4(%r15),$E + mov $SZ*5(%r15),$F + mov $SZ*6(%r15),$G + mov $SZ*7(%r15),$H + + vmovdqa 0x00(%r13,%r14,8),$mask14 + vmovdqa 0x10(%r13,%r14,8),$mask12 + vmovdqa 0x20(%r13,%r14,8),$mask10 + vmovdqu 0x00-0x80($inp),$roundkey + jmp .Lloop_xop +___ + if ($SZ==4) { # SHA256 + my @X = map("%xmm$_",(0..3)); + my ($t0,$t1,$t2,$t3) = map("%xmm$_",(4..7)); + +$code.=<<___; +.align 16 +.Lloop_xop: + vmovdqa $TABLE+`$SZ*2*$rounds`(%rip),$t3 + vmovdqu 0x00(%rsi,%r12),@X[0] + vmovdqu 0x10(%rsi,%r12),@X[1] + vmovdqu 0x20(%rsi,%r12),@X[2] + vmovdqu 0x30(%rsi,%r12),@X[3] + vpshufb $t3,@X[0],@X[0] + lea $TABLE(%rip),$Tbl + vpshufb $t3,@X[1],@X[1] + vpshufb $t3,@X[2],@X[2] + vpaddd 0x00($Tbl),@X[0],$t0 + vpshufb $t3,@X[3],@X[3] + vpaddd 0x20($Tbl),@X[1],$t1 + vpaddd 0x40($Tbl),@X[2],$t2 + vpaddd 0x60($Tbl),@X[3],$t3 + vmovdqa $t0,0x00(%rsp) + mov $A,$a1 + vmovdqa $t1,0x10(%rsp) + mov $B,$a3 + vmovdqa $t2,0x20(%rsp) + xor $C,$a3 # magic + vmovdqa $t3,0x30(%rsp) + mov $E,$a0 + jmp .Lxop_00_47 + +.align 16 +.Lxop_00_47: + sub \$-16*2*$SZ,$Tbl # size optimization + vmovdqu (%r12),$inout # $a4 + mov %r12,$_inp # $a4 +___ +sub XOP_256_00_47 () { +my $j = shift; +my $body = shift; +my @X = @_; +my @insns = (&$body,&$body,&$body,&$body); # 104 instructions + + &vpalignr ($t0,@X[1],@X[0],$SZ); # X[1..4] + eval(shift(@insns)); + eval(shift(@insns)); + &vpalignr ($t3,@X[3],@X[2],$SZ); # X[9..12] + eval(shift(@insns)); + eval(shift(@insns)); + &vprotd ($t1,$t0,8*$SZ-$sigma0[1]); + eval(shift(@insns)); + eval(shift(@insns)); + &vpsrld ($t0,$t0,$sigma0[2]); + eval(shift(@insns)); + eval(shift(@insns)); + &vpaddd (@X[0],@X[0],$t3); # X[0..3] += X[9..12] + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + &vprotd ($t2,$t1,$sigma0[1]-$sigma0[0]); + eval(shift(@insns)); + eval(shift(@insns)); + &vpxor ($t0,$t0,$t1); + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + &vprotd ($t3,@X[3],8*$SZ-$sigma1[1]); + eval(shift(@insns)); + eval(shift(@insns)); + &vpxor ($t0,$t0,$t2); # sigma0(X[1..4]) + eval(shift(@insns)); + eval(shift(@insns)); + &vpsrld ($t2,@X[3],$sigma1[2]); + eval(shift(@insns)); + eval(shift(@insns)); + &vpaddd (@X[0],@X[0],$t0); # X[0..3] += sigma0(X[1..4]) + eval(shift(@insns)); + eval(shift(@insns)); + &vprotd ($t1,$t3,$sigma1[1]-$sigma1[0]); + eval(shift(@insns)); + eval(shift(@insns)); + &vpxor ($t3,$t3,$t2); + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + &vpxor ($t3,$t3,$t1); # sigma1(X[14..15]) + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + &vpsrldq ($t3,$t3,8); + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + &vpaddd (@X[0],@X[0],$t3); # X[0..1] += sigma1(X[14..15]) + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + &vprotd ($t3,@X[0],8*$SZ-$sigma1[1]); + eval(shift(@insns)); + eval(shift(@insns)); + &vpsrld ($t2,@X[0],$sigma1[2]); + eval(shift(@insns)); + eval(shift(@insns)); + &vprotd ($t1,$t3,$sigma1[1]-$sigma1[0]); + eval(shift(@insns)); + eval(shift(@insns)); + &vpxor ($t3,$t3,$t2); + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + &vpxor ($t3,$t3,$t1); # sigma1(X[16..17]) + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + &vpslldq ($t3,$t3,8); # 22 instructions + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + &vpaddd (@X[0],@X[0],$t3); # X[2..3] += sigma1(X[16..17]) + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + &vpaddd ($t2,@X[0],16*2*$j."($Tbl)"); + foreach (@insns) { eval; } # remaining instructions + &vmovdqa (16*$j."(%rsp)",$t2); +} + + $aesni_cbc_idx=0; + for ($i=0,$j=0; $j<4; $j++) { + &XOP_256_00_47($j,\&body_00_15,@X); + push(@X,shift(@X)); # rotate(@X) + } + &mov ("%r12",$_inp); # borrow $a4 + &vpand ($temp,$temp,$mask14); + &mov ("%r15",$_out); # borrow $a2 + &vpor ($iv,$iv,$temp); + &vmovdqu ("(%r15,%r12)",$iv); # write output + &lea ("%r12","16(%r12)"); # inp++ + + &cmpb ($SZ-1+16*2*$SZ."($Tbl)",0); + &jne (".Lxop_00_47"); + + &vmovdqu ($inout,"(%r12)"); + &mov ($_inp,"%r12"); + + $aesni_cbc_idx=0; + for ($i=0; $i<16; ) { + foreach(body_00_15()) { eval; } + } + } +$code.=<<___; + mov $_inp,%r12 # borrow $a4 + mov $_out,%r13 # borrow $a0 + mov $_ctx,%r15 # borrow $a2 + mov $_in0,%rsi # borrow $a3 + + vpand $mask14,$temp,$temp + mov $a1,$A + vpor $temp,$iv,$iv + vmovdqu $iv,(%r13,%r12) # write output + lea 16(%r12),%r12 # inp++ + + add $SZ*0(%r15),$A + add $SZ*1(%r15),$B + add $SZ*2(%r15),$C + add $SZ*3(%r15),$D + add $SZ*4(%r15),$E + add $SZ*5(%r15),$F + add $SZ*6(%r15),$G + add $SZ*7(%r15),$H + + cmp $_end,%r12 + + mov $A,$SZ*0(%r15) + mov $B,$SZ*1(%r15) + mov $C,$SZ*2(%r15) + mov $D,$SZ*3(%r15) + mov $E,$SZ*4(%r15) + mov $F,$SZ*5(%r15) + mov $G,$SZ*6(%r15) + mov $H,$SZ*7(%r15) + + jb .Lloop_xop + + mov $_ivp,$ivp + mov $_rsp,%rsi + vmovdqu $iv,($ivp) # output IV + vzeroall +___ +$code.=<<___ if ($win64); + movaps `$framesz+16*0`(%rsp),%xmm6 + movaps `$framesz+16*1`(%rsp),%xmm7 + movaps `$framesz+16*2`(%rsp),%xmm8 + movaps `$framesz+16*3`(%rsp),%xmm9 + movaps `$framesz+16*4`(%rsp),%xmm10 + movaps `$framesz+16*5`(%rsp),%xmm11 + movaps `$framesz+16*6`(%rsp),%xmm12 + movaps `$framesz+16*7`(%rsp),%xmm13 + movaps `$framesz+16*8`(%rsp),%xmm14 + movaps `$framesz+16*9`(%rsp),%xmm15 +___ +$code.=<<___; + mov (%rsi),%r15 + mov 8(%rsi),%r14 + mov 16(%rsi),%r13 + mov 24(%rsi),%r12 + mov 32(%rsi),%rbp + mov 40(%rsi),%rbx + lea 48(%rsi),%rsp +.Lepilogue_xop: + ret +.size ${func}_xop,.-${func}_xop +___ +###################################################################### +# AVX+shrd code path +# +local *ror = sub { &shrd(@_[0],@_) }; + +$code.=<<___; +.type ${func}_avx,\@function,6 +.align 64 +${func}_avx: +.Lavx_shortcut: + mov `($win64?56:8)`(%rsp),$in0 # load 7th parameter + push %rbx + push %rbp + push %r12 + push %r13 + push %r14 + push %r15 + mov %rsp,%r11 # copy %rsp + sub \$`$framesz+$win64*16*10`,%rsp + and \$-64,%rsp # align stack frame + + shl \$6,$len + sub $inp,$out # re-bias + sub $inp,$in0 + add $inp,$len # end of input + + #mov $inp,$_inp # saved later + mov $out,$_out + mov $len,$_end + #mov $key,$_key # remains resident in $inp register + mov $ivp,$_ivp + mov $ctx,$_ctx + mov $in0,$_in0 + mov %r11,$_rsp +___ +$code.=<<___ if ($win64); + movaps %xmm6,`$framesz+16*0`(%rsp) + movaps %xmm7,`$framesz+16*1`(%rsp) + movaps %xmm8,`$framesz+16*2`(%rsp) + movaps %xmm9,`$framesz+16*3`(%rsp) + movaps %xmm10,`$framesz+16*4`(%rsp) + movaps %xmm11,`$framesz+16*5`(%rsp) + movaps %xmm12,`$framesz+16*6`(%rsp) + movaps %xmm13,`$framesz+16*7`(%rsp) + movaps %xmm14,`$framesz+16*8`(%rsp) + movaps %xmm15,`$framesz+16*9`(%rsp) +___ +$code.=<<___; +.Lprologue_avx: + vzeroall + + mov $inp,%r12 # borrow $a4 + lea 0x80($key),$inp # size optimization, reassign + lea $TABLE+`$SZ*2*$rounds+32`(%rip),%r13 # borrow $a0 + mov 0xf0-0x80($inp),%r14d # rounds, borrow $a1 + mov $ctx,%r15 # borrow $a2 + mov $in0,%rsi # borrow $a3 + vmovdqu ($ivp),$iv # load IV + sub \$9,%r14 + + mov $SZ*0(%r15),$A + mov $SZ*1(%r15),$B + mov $SZ*2(%r15),$C + mov $SZ*3(%r15),$D + mov $SZ*4(%r15),$E + mov $SZ*5(%r15),$F + mov $SZ*6(%r15),$G + mov $SZ*7(%r15),$H + + vmovdqa 0x00(%r13,%r14,8),$mask14 + vmovdqa 0x10(%r13,%r14,8),$mask12 + vmovdqa 0x20(%r13,%r14,8),$mask10 + vmovdqu 0x00-0x80($inp),$roundkey +___ + if ($SZ==4) { # SHA256 + my @X = map("%xmm$_",(0..3)); + my ($t0,$t1,$t2,$t3) = map("%xmm$_",(4..7)); + +$code.=<<___; + jmp .Lloop_avx +.align 16 +.Lloop_avx: + vmovdqa $TABLE+`$SZ*2*$rounds`(%rip),$t3 + vmovdqu 0x00(%rsi,%r12),@X[0] + vmovdqu 0x10(%rsi,%r12),@X[1] + vmovdqu 0x20(%rsi,%r12),@X[2] + vmovdqu 0x30(%rsi,%r12),@X[3] + vpshufb $t3,@X[0],@X[0] + lea $TABLE(%rip),$Tbl + vpshufb $t3,@X[1],@X[1] + vpshufb $t3,@X[2],@X[2] + vpaddd 0x00($Tbl),@X[0],$t0 + vpshufb $t3,@X[3],@X[3] + vpaddd 0x20($Tbl),@X[1],$t1 + vpaddd 0x40($Tbl),@X[2],$t2 + vpaddd 0x60($Tbl),@X[3],$t3 + vmovdqa $t0,0x00(%rsp) + mov $A,$a1 + vmovdqa $t1,0x10(%rsp) + mov $B,$a3 + vmovdqa $t2,0x20(%rsp) + xor $C,$a3 # magic + vmovdqa $t3,0x30(%rsp) + mov $E,$a0 + jmp .Lavx_00_47 + +.align 16 +.Lavx_00_47: + sub \$-16*2*$SZ,$Tbl # size optimization + vmovdqu (%r12),$inout # $a4 + mov %r12,$_inp # $a4 +___ +sub Xupdate_256_AVX () { + ( + '&vpalignr ($t0,@X[1],@X[0],$SZ)', # X[1..4] + '&vpalignr ($t3,@X[3],@X[2],$SZ)', # X[9..12] + '&vpsrld ($t2,$t0,$sigma0[0]);', + '&vpaddd (@X[0],@X[0],$t3)', # X[0..3] += X[9..12] + '&vpsrld ($t3,$t0,$sigma0[2])', + '&vpslld ($t1,$t0,8*$SZ-$sigma0[1]);', + '&vpxor ($t0,$t3,$t2)', + '&vpshufd ($t3,@X[3],0b11111010)',# X[14..15] + '&vpsrld ($t2,$t2,$sigma0[1]-$sigma0[0]);', + '&vpxor ($t0,$t0,$t1)', + '&vpslld ($t1,$t1,$sigma0[1]-$sigma0[0]);', + '&vpxor ($t0,$t0,$t2)', + '&vpsrld ($t2,$t3,$sigma1[2]);', + '&vpxor ($t0,$t0,$t1)', # sigma0(X[1..4]) + '&vpsrlq ($t3,$t3,$sigma1[0]);', + '&vpaddd (@X[0],@X[0],$t0)', # X[0..3] += sigma0(X[1..4]) + '&vpxor ($t2,$t2,$t3);', + '&vpsrlq ($t3,$t3,$sigma1[1]-$sigma1[0])', + '&vpxor ($t2,$t2,$t3)', # sigma1(X[14..15]) + '&vpshufd ($t2,$t2,0b10000100)', + '&vpsrldq ($t2,$t2,8)', + '&vpaddd (@X[0],@X[0],$t2)', # X[0..1] += sigma1(X[14..15]) + '&vpshufd ($t3,@X[0],0b01010000)',# X[16..17] + '&vpsrld ($t2,$t3,$sigma1[2])', + '&vpsrlq ($t3,$t3,$sigma1[0])', + '&vpxor ($t2,$t2,$t3);', + '&vpsrlq ($t3,$t3,$sigma1[1]-$sigma1[0])', + '&vpxor ($t2,$t2,$t3)', + '&vpshufd ($t2,$t2,0b11101000)', + '&vpslldq ($t2,$t2,8)', + '&vpaddd (@X[0],@X[0],$t2)' # X[2..3] += sigma1(X[16..17]) + ); +} + +sub AVX_256_00_47 () { +my $j = shift; +my $body = shift; +my @X = @_; +my @insns = (&$body,&$body,&$body,&$body); # 104 instructions + + foreach (Xupdate_256_AVX()) { # 29 instructions + eval; + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + } + &vpaddd ($t2,@X[0],16*2*$j."($Tbl)"); + foreach (@insns) { eval; } # remaining instructions + &vmovdqa (16*$j."(%rsp)",$t2); +} + + $aesni_cbc_idx=0; + for ($i=0,$j=0; $j<4; $j++) { + &AVX_256_00_47($j,\&body_00_15,@X); + push(@X,shift(@X)); # rotate(@X) + } + &mov ("%r12",$_inp); # borrow $a4 + &vpand ($temp,$temp,$mask14); + &mov ("%r15",$_out); # borrow $a2 + &vpor ($iv,$iv,$temp); + &vmovdqu ("(%r15,%r12)",$iv); # write output + &lea ("%r12","16(%r12)"); # inp++ + + &cmpb ($SZ-1+16*2*$SZ."($Tbl)",0); + &jne (".Lavx_00_47"); + + &vmovdqu ($inout,"(%r12)"); + &mov ($_inp,"%r12"); + + $aesni_cbc_idx=0; + for ($i=0; $i<16; ) { + foreach(body_00_15()) { eval; } + } + + } +$code.=<<___; + mov $_inp,%r12 # borrow $a4 + mov $_out,%r13 # borrow $a0 + mov $_ctx,%r15 # borrow $a2 + mov $_in0,%rsi # borrow $a3 + + vpand $mask14,$temp,$temp + mov $a1,$A + vpor $temp,$iv,$iv + vmovdqu $iv,(%r13,%r12) # write output + lea 16(%r12),%r12 # inp++ + + add $SZ*0(%r15),$A + add $SZ*1(%r15),$B + add $SZ*2(%r15),$C + add $SZ*3(%r15),$D + add $SZ*4(%r15),$E + add $SZ*5(%r15),$F + add $SZ*6(%r15),$G + add $SZ*7(%r15),$H + + cmp $_end,%r12 + + mov $A,$SZ*0(%r15) + mov $B,$SZ*1(%r15) + mov $C,$SZ*2(%r15) + mov $D,$SZ*3(%r15) + mov $E,$SZ*4(%r15) + mov $F,$SZ*5(%r15) + mov $G,$SZ*6(%r15) + mov $H,$SZ*7(%r15) + jb .Lloop_avx + + mov $_ivp,$ivp + mov $_rsp,%rsi + vmovdqu $iv,($ivp) # output IV + vzeroall +___ +$code.=<<___ if ($win64); + movaps `$framesz+16*0`(%rsp),%xmm6 + movaps `$framesz+16*1`(%rsp),%xmm7 + movaps `$framesz+16*2`(%rsp),%xmm8 + movaps `$framesz+16*3`(%rsp),%xmm9 + movaps `$framesz+16*4`(%rsp),%xmm10 + movaps `$framesz+16*5`(%rsp),%xmm11 + movaps `$framesz+16*6`(%rsp),%xmm12 + movaps `$framesz+16*7`(%rsp),%xmm13 + movaps `$framesz+16*8`(%rsp),%xmm14 + movaps `$framesz+16*9`(%rsp),%xmm15 +___ +$code.=<<___; + mov (%rsi),%r15 + mov 8(%rsi),%r14 + mov 16(%rsi),%r13 + mov 24(%rsi),%r12 + mov 32(%rsi),%rbp + mov 40(%rsi),%rbx + lea 48(%rsi),%rsp +.Lepilogue_avx: + ret +.size ${func}_avx,.-${func}_avx +___ + +if ($avx>1) {{ +###################################################################### +# AVX2+BMI code path +# +my $a5=$SZ==4?"%esi":"%rsi"; # zap $inp +my $PUSH8=8*2*$SZ; +use integer; + +sub bodyx_00_15 () { + # at start $a1 should be zero, $a3 - $b^$c and $a4 copy of $f + ( + '($a,$b,$c,$d,$e,$f,$g,$h)=@ROT;'. + + '&add ($h,(32*($i/(16/$SZ))+$SZ*($i%(16/$SZ)))%$PUSH8.$base)', # h+=X[i]+K[i] + '&and ($a4,$e)', # f&e + '&rorx ($a0,$e,$Sigma1[2])', + '&rorx ($a2,$e,$Sigma1[1])', + + '&lea ($a,"($a,$a1)")', # h+=Sigma0(a) from the past + '&lea ($h,"($h,$a4)")', + '&andn ($a4,$e,$g)', # ~e&g + '&xor ($a0,$a2)', + + '&rorx ($a1,$e,$Sigma1[0])', + '&lea ($h,"($h,$a4)")', # h+=Ch(e,f,g)=(e&f)+(~e&g) + '&xor ($a0,$a1)', # Sigma1(e) + '&mov ($a2,$a)', + + '&rorx ($a4,$a,$Sigma0[2])', + '&lea ($h,"($h,$a0)")', # h+=Sigma1(e) + '&xor ($a2,$b)', # a^b, b^c in next round + '&rorx ($a1,$a,$Sigma0[1])', + + '&rorx ($a0,$a,$Sigma0[0])', + '&lea ($d,"($d,$h)")', # d+=h + '&and ($a3,$a2)', # (b^c)&(a^b) + @aesni_cbc_block[$aesni_cbc_idx++]. + '&xor ($a1,$a4)', + + '&xor ($a3,$b)', # Maj(a,b,c)=Ch(a^b,c,b) + '&xor ($a1,$a0)', # Sigma0(a) + '&lea ($h,"($h,$a3)");'. # h+=Maj(a,b,c) + '&mov ($a4,$e)', # copy of f in future + + '($a2,$a3) = ($a3,$a2); unshift(@ROT,pop(@ROT)); $i++;' + ); + # and at the finish one has to $a+=$a1 +} + +$code.=<<___; +.type ${func}_avx2,\@function,6 +.align 64 +${func}_avx2: +.Lavx2_shortcut: + mov `($win64?56:8)`(%rsp),$in0 # load 7th parameter + push %rbx + push %rbp + push %r12 + push %r13 + push %r14 + push %r15 + mov %rsp,%r11 # copy %rsp + sub \$`2*$SZ*$rounds+8*8+$win64*16*10`,%rsp + and \$-256*$SZ,%rsp # align stack frame + add \$`2*$SZ*($rounds-8)`,%rsp + + shl \$6,$len + sub $inp,$out # re-bias + sub $inp,$in0 + add $inp,$len # end of input + + #mov $inp,$_inp # saved later + #mov $out,$_out # kept in $offload + mov $len,$_end + #mov $key,$_key # remains resident in $inp register + mov $ivp,$_ivp + mov $ctx,$_ctx + mov $in0,$_in0 + mov %r11,$_rsp +___ +$code.=<<___ if ($win64); + movaps %xmm6,`$framesz+16*0`(%rsp) + movaps %xmm7,`$framesz+16*1`(%rsp) + movaps %xmm8,`$framesz+16*2`(%rsp) + movaps %xmm9,`$framesz+16*3`(%rsp) + movaps %xmm10,`$framesz+16*4`(%rsp) + movaps %xmm11,`$framesz+16*5`(%rsp) + movaps %xmm12,`$framesz+16*6`(%rsp) + movaps %xmm13,`$framesz+16*7`(%rsp) + movaps %xmm14,`$framesz+16*8`(%rsp) + movaps %xmm15,`$framesz+16*9`(%rsp) +___ +$code.=<<___; +.Lprologue_avx2: + vzeroall + + mov $inp,%r13 # borrow $a0 + vpinsrq \$1,$out,$offload,$offload + lea 0x80($key),$inp # size optimization, reassign + lea $TABLE+`$SZ*2*$rounds+32`(%rip),%r12 # borrow $a4 + mov 0xf0-0x80($inp),%r14d # rounds, borrow $a1 + mov $ctx,%r15 # borrow $a2 + mov $in0,%rsi # borrow $a3 + vmovdqu ($ivp),$iv # load IV + lea -9(%r14),%r14 + + vmovdqa 0x00(%r12,%r14,8),$mask14 + vmovdqa 0x10(%r12,%r14,8),$mask12 + vmovdqa 0x20(%r12,%r14,8),$mask10 + + sub \$-16*$SZ,%r13 # inp++, size optimization + mov $SZ*0(%r15),$A + lea (%rsi,%r13),%r12 # borrow $a0 + mov $SZ*1(%r15),$B + cmp $len,%r13 # $_end + mov $SZ*2(%r15),$C + cmove %rsp,%r12 # next block or random data + mov $SZ*3(%r15),$D + mov $SZ*4(%r15),$E + mov $SZ*5(%r15),$F + mov $SZ*6(%r15),$G + mov $SZ*7(%r15),$H + vmovdqu 0x00-0x80($inp),$roundkey +___ + if ($SZ==4) { # SHA256 + my @X = map("%ymm$_",(0..3)); + my ($t0,$t1,$t2,$t3) = map("%ymm$_",(4..7)); + +$code.=<<___; + jmp .Loop_avx2 +.align 16 +.Loop_avx2: + vmovdqa $TABLE+`$SZ*2*$rounds`(%rip),$t3 + vmovdqu -16*$SZ+0(%rsi,%r13),%xmm0 + vmovdqu -16*$SZ+16(%rsi,%r13),%xmm1 + vmovdqu -16*$SZ+32(%rsi,%r13),%xmm2 + vmovdqu -16*$SZ+48(%rsi,%r13),%xmm3 + + vinserti128 \$1,(%r12),@X[0],@X[0] + vinserti128 \$1,16(%r12),@X[1],@X[1] + vpshufb $t3,@X[0],@X[0] + vinserti128 \$1,32(%r12),@X[2],@X[2] + vpshufb $t3,@X[1],@X[1] + vinserti128 \$1,48(%r12),@X[3],@X[3] + + lea $TABLE(%rip),$Tbl + vpshufb $t3,@X[2],@X[2] + lea -16*$SZ(%r13),%r13 + vpaddd 0x00($Tbl),@X[0],$t0 + vpshufb $t3,@X[3],@X[3] + vpaddd 0x20($Tbl),@X[1],$t1 + vpaddd 0x40($Tbl),@X[2],$t2 + vpaddd 0x60($Tbl),@X[3],$t3 + vmovdqa $t0,0x00(%rsp) + xor $a1,$a1 + vmovdqa $t1,0x20(%rsp) + lea -$PUSH8(%rsp),%rsp + mov $B,$a3 + vmovdqa $t2,0x00(%rsp) + xor $C,$a3 # magic + vmovdqa $t3,0x20(%rsp) + mov $F,$a4 + sub \$-16*2*$SZ,$Tbl # size optimization + jmp .Lavx2_00_47 + +.align 16 +.Lavx2_00_47: + vmovdqu (%r13),$inout + vpinsrq \$0,%r13,$offload,$offload +___ + +sub AVX2_256_00_47 () { +my $j = shift; +my $body = shift; +my @X = @_; +my @insns = (&$body,&$body,&$body,&$body); # 96 instructions +my $base = "+2*$PUSH8(%rsp)"; + + &lea ("%rsp","-$PUSH8(%rsp)") if (($j%2)==0); + foreach (Xupdate_256_AVX()) { # 29 instructions + eval; + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + } + &vpaddd ($t2,@X[0],16*2*$j."($Tbl)"); + foreach (@insns) { eval; } # remaining instructions + &vmovdqa ((32*$j)%$PUSH8."(%rsp)",$t2); +} + $aesni_cbc_idx=0; + for ($i=0,$j=0; $j<4; $j++) { + &AVX2_256_00_47($j,\&bodyx_00_15,@X); + push(@X,shift(@X)); # rotate(@X) + } + &vmovq ("%r13",$offload); # borrow $a0 + &vpextrq ("%r15",$offload,1); # borrow $a2 + &vpand ($temp,$temp,$mask14); + &vpor ($iv,$iv,$temp); + &vmovdqu ("(%r15,%r13)",$iv); # write output + &lea ("%r13","16(%r13)"); # inp++ + + &lea ($Tbl,16*2*$SZ."($Tbl)"); + &cmpb (($SZ-1)."($Tbl)",0); + &jne (".Lavx2_00_47"); + + &vmovdqu ($inout,"(%r13)"); + &vpinsrq ($offload,$offload,"%r13",0); + + $aesni_cbc_idx=0; + for ($i=0; $i<16; ) { + my $base=$i<8?"+$PUSH8(%rsp)":"(%rsp)"; + foreach(bodyx_00_15()) { eval; } + } + } +$code.=<<___; + vpextrq \$1,$offload,%r12 # $_out, borrow $a4 + vmovq $offload,%r13 # $_inp, borrow $a0 + mov `2*$SZ*$rounds+5*8`(%rsp),%r15 # $_ctx, borrow $a2 + add $a1,$A + lea `2*$SZ*($rounds-8)`(%rsp),$Tbl + + vpand $mask14,$temp,$temp + vpor $temp,$iv,$iv + vmovdqu $iv,(%r12,%r13) # write output + lea 16(%r13),%r13 + + add $SZ*0(%r15),$A + add $SZ*1(%r15),$B + add $SZ*2(%r15),$C + add $SZ*3(%r15),$D + add $SZ*4(%r15),$E + add $SZ*5(%r15),$F + add $SZ*6(%r15),$G + add $SZ*7(%r15),$H + + mov $A,$SZ*0(%r15) + mov $B,$SZ*1(%r15) + mov $C,$SZ*2(%r15) + mov $D,$SZ*3(%r15) + mov $E,$SZ*4(%r15) + mov $F,$SZ*5(%r15) + mov $G,$SZ*6(%r15) + mov $H,$SZ*7(%r15) + + cmp `$PUSH8+2*8`($Tbl),%r13 # $_end + je .Ldone_avx2 + + xor $a1,$a1 + mov $B,$a3 + mov $F,$a4 + xor $C,$a3 # magic + jmp .Lower_avx2 +.align 16 +.Lower_avx2: + vmovdqu (%r13),$inout + vpinsrq \$0,%r13,$offload,$offload +___ + $aesni_cbc_idx=0; + for ($i=0; $i<16; ) { + my $base="+16($Tbl)"; + foreach(bodyx_00_15()) { eval; } + &lea ($Tbl,"-$PUSH8($Tbl)") if ($i==8); + } +$code.=<<___; + vmovq $offload,%r13 # borrow $a0 + vpextrq \$1,$offload,%r15 # borrow $a2 + vpand $mask14,$temp,$temp + vpor $temp,$iv,$iv + lea -$PUSH8($Tbl),$Tbl + vmovdqu $iv,(%r15,%r13) # write output + lea 16(%r13),%r13 # inp++ + cmp %rsp,$Tbl + jae .Lower_avx2 + + mov `2*$SZ*$rounds+5*8`(%rsp),%r15 # $_ctx, borrow $a2 + lea 16*$SZ(%r13),%r13 + mov `2*$SZ*$rounds+6*8`(%rsp),%rsi # $_in0, borrow $a3 + add $a1,$A + lea `2*$SZ*($rounds-8)`(%rsp),%rsp + + add $SZ*0(%r15),$A + add $SZ*1(%r15),$B + add $SZ*2(%r15),$C + add $SZ*3(%r15),$D + add $SZ*4(%r15),$E + add $SZ*5(%r15),$F + add $SZ*6(%r15),$G + lea (%rsi,%r13),%r12 + add $SZ*7(%r15),$H + + cmp $_end,%r13 + + mov $A,$SZ*0(%r15) + cmove %rsp,%r12 # next block or stale data + mov $B,$SZ*1(%r15) + mov $C,$SZ*2(%r15) + mov $D,$SZ*3(%r15) + mov $E,$SZ*4(%r15) + mov $F,$SZ*5(%r15) + mov $G,$SZ*6(%r15) + mov $H,$SZ*7(%r15) + + jbe .Loop_avx2 + lea (%rsp),$Tbl + +.Ldone_avx2: + lea ($Tbl),%rsp + mov $_ivp,$ivp + mov $_rsp,%rsi + vmovdqu $iv,($ivp) # output IV + vzeroall +___ +$code.=<<___ if ($win64); + movaps `$framesz+16*0`(%rsp),%xmm6 + movaps `$framesz+16*1`(%rsp),%xmm7 + movaps `$framesz+16*2`(%rsp),%xmm8 + movaps `$framesz+16*3`(%rsp),%xmm9 + movaps `$framesz+16*4`(%rsp),%xmm10 + movaps `$framesz+16*5`(%rsp),%xmm11 + movaps `$framesz+16*6`(%rsp),%xmm12 + movaps `$framesz+16*7`(%rsp),%xmm13 + movaps `$framesz+16*8`(%rsp),%xmm14 + movaps `$framesz+16*9`(%rsp),%xmm15 +___ +$code.=<<___; + mov (%rsi),%r15 + mov 8(%rsi),%r14 + mov 16(%rsi),%r13 + mov 24(%rsi),%r12 + mov 32(%rsi),%rbp + mov 40(%rsi),%rbx + lea 48(%rsi),%rsp +.Lepilogue_avx2: + ret +.size ${func}_avx2,.-${func}_avx2 +___ +}} +}} +{{ +my ($in0,$out,$len,$key,$ivp,$ctx,$inp)=("%rdi","%rsi","%rdx","%rcx","%r8","%r9","%r10"); + +my ($rounds,$Tbl)=("%r11d","%rbx"); + +my ($iv,$in,$rndkey0)=map("%xmm$_",(6,14,15)); +my @rndkey=("%xmm4","%xmm5"); +my $r=0; +my $sn=0; + +my ($Wi,$ABEF,$CDGH,$TMP,$BSWAP,$ABEF_SAVE,$CDGH_SAVE)=map("%xmm$_",(0..3,7..9)); +my @MSG=map("%xmm$_",(10..13)); + +my $aesenc=sub { + use integer; + my ($n,$k)=($r/10,$r%10); + if ($k==0) { + $code.=<<___; + movups `16*$n`($in0),$in # load input + xorps $rndkey0,$in +___ + $code.=<<___ if ($n); + movups $iv,`16*($n-1)`($out,$in0) # write output +___ + $code.=<<___; + xorps $in,$iv + movups `32+16*$k-112`($key),$rndkey[1] + aesenc $rndkey[0],$iv +___ + } elsif ($k==9) { + $sn++; + $code.=<<___; + cmp \$11,$rounds + jb .Laesenclast$sn + movups `32+16*($k+0)-112`($key),$rndkey[1] + aesenc $rndkey[0],$iv + movups `32+16*($k+1)-112`($key),$rndkey[0] + aesenc $rndkey[1],$iv + je .Laesenclast$sn + movups `32+16*($k+2)-112`($key),$rndkey[1] + aesenc $rndkey[0],$iv + movups `32+16*($k+3)-112`($key),$rndkey[0] + aesenc $rndkey[1],$iv +.Laesenclast$sn: + aesenclast $rndkey[0],$iv + movups 16-112($key),$rndkey[1] # forward reference + nop +___ + } else { + $code.=<<___; + movups `32+16*$k-112`($key),$rndkey[1] + aesenc $rndkey[0],$iv +___ + } + $r++; unshift(@rndkey,pop(@rndkey)); +}; + +if ($shaext) { +my $Tbl="%rax"; + +$code.=<<___; +.type ${func}_shaext,\@function,6 +.align 32 +${func}_shaext: + mov `($win64?56:8)`(%rsp),$inp # load 7th argument +___ +$code.=<<___ if ($win64); + lea `-8-10*16`(%rsp),%rsp + movaps %xmm6,-8-10*16(%rax) + movaps %xmm7,-8-9*16(%rax) + movaps %xmm8,-8-8*16(%rax) + movaps %xmm9,-8-7*16(%rax) + movaps %xmm10,-8-6*16(%rax) + movaps %xmm11,-8-5*16(%rax) + movaps %xmm12,-8-4*16(%rax) + movaps %xmm13,-8-3*16(%rax) + movaps %xmm14,-8-2*16(%rax) + movaps %xmm15,-8-1*16(%rax) +.Lprologue_shaext: +___ +$code.=<<___; + lea K256+0x80(%rip),$Tbl + movdqu ($ctx),$ABEF # DCBA + movdqu 16($ctx),$CDGH # HGFE + movdqa 0x200-0x80($Tbl),$TMP # byte swap mask + + mov 240($key),$rounds + sub $in0,$out + movups ($key),$rndkey0 # $key[0] + movups 16($key),$rndkey[0] # forward reference + lea 112($key),$key # size optimization + + pshufd \$0x1b,$ABEF,$Wi # ABCD + pshufd \$0xb1,$ABEF,$ABEF # CDAB + pshufd \$0x1b,$CDGH,$CDGH # EFGH + movdqa $TMP,$BSWAP # offload + palignr \$8,$CDGH,$ABEF # ABEF + punpcklqdq $Wi,$CDGH # CDGH + + jmp .Loop_shaext + +.align 16 +.Loop_shaext: + movdqu ($inp),@MSG[0] + movdqu 0x10($inp),@MSG[1] + movdqu 0x20($inp),@MSG[2] + pshufb $TMP,@MSG[0] + movdqu 0x30($inp),@MSG[3] + + movdqa 0*32-0x80($Tbl),$Wi + paddd @MSG[0],$Wi + pshufb $TMP,@MSG[1] + movdqa $CDGH,$CDGH_SAVE # offload + movdqa $ABEF,$ABEF_SAVE # offload +___ + &$aesenc(); +$code.=<<___; + sha256rnds2 $ABEF,$CDGH # 0-3 + pshufd \$0x0e,$Wi,$Wi +___ + &$aesenc(); +$code.=<<___; + sha256rnds2 $CDGH,$ABEF + + movdqa 1*32-0x80($Tbl),$Wi + paddd @MSG[1],$Wi + pshufb $TMP,@MSG[2] + lea 0x40($inp),$inp +___ + &$aesenc(); +$code.=<<___; + sha256rnds2 $ABEF,$CDGH # 4-7 + pshufd \$0x0e,$Wi,$Wi +___ + &$aesenc(); +$code.=<<___; + sha256rnds2 $CDGH,$ABEF + + movdqa 2*32-0x80($Tbl),$Wi + paddd @MSG[2],$Wi + pshufb $TMP,@MSG[3] + sha256msg1 @MSG[1],@MSG[0] +___ + &$aesenc(); +$code.=<<___; + sha256rnds2 $ABEF,$CDGH # 8-11 + pshufd \$0x0e,$Wi,$Wi + movdqa @MSG[3],$TMP + palignr \$4,@MSG[2],$TMP + paddd $TMP,@MSG[0] +___ + &$aesenc(); +$code.=<<___; + sha256rnds2 $CDGH,$ABEF + + movdqa 3*32-0x80($Tbl),$Wi + paddd @MSG[3],$Wi + sha256msg2 @MSG[3],@MSG[0] + sha256msg1 @MSG[2],@MSG[1] +___ + &$aesenc(); +$code.=<<___; + sha256rnds2 $ABEF,$CDGH # 12-15 + pshufd \$0x0e,$Wi,$Wi +___ + &$aesenc(); +$code.=<<___; + movdqa @MSG[0],$TMP + palignr \$4,@MSG[3],$TMP + paddd $TMP,@MSG[1] + sha256rnds2 $CDGH,$ABEF +___ +for($i=4;$i<16-3;$i++) { + &$aesenc() if (($r%10)==0); +$code.=<<___; + movdqa $i*32-0x80($Tbl),$Wi + paddd @MSG[0],$Wi + sha256msg2 @MSG[0],@MSG[1] + sha256msg1 @MSG[3],@MSG[2] +___ + &$aesenc(); +$code.=<<___; + sha256rnds2 $ABEF,$CDGH # 16-19... + pshufd \$0x0e,$Wi,$Wi + movdqa @MSG[1],$TMP + palignr \$4,@MSG[0],$TMP + paddd $TMP,@MSG[2] +___ + &$aesenc(); + &$aesenc() if ($r==19); +$code.=<<___; + sha256rnds2 $CDGH,$ABEF +___ + push(@MSG,shift(@MSG)); +} +$code.=<<___; + movdqa 13*32-0x80($Tbl),$Wi + paddd @MSG[0],$Wi + sha256msg2 @MSG[0],@MSG[1] + sha256msg1 @MSG[3],@MSG[2] +___ + &$aesenc(); +$code.=<<___; + sha256rnds2 $ABEF,$CDGH # 52-55 + pshufd \$0x0e,$Wi,$Wi + movdqa @MSG[1],$TMP + palignr \$4,@MSG[0],$TMP + paddd $TMP,@MSG[2] +___ + &$aesenc(); + &$aesenc(); +$code.=<<___; + sha256rnds2 $CDGH,$ABEF + + movdqa 14*32-0x80($Tbl),$Wi + paddd @MSG[1],$Wi + sha256msg2 @MSG[1],@MSG[2] + movdqa $BSWAP,$TMP +___ + &$aesenc(); +$code.=<<___; + sha256rnds2 $ABEF,$CDGH # 56-59 + pshufd \$0x0e,$Wi,$Wi +___ + &$aesenc(); +$code.=<<___; + sha256rnds2 $CDGH,$ABEF + + movdqa 15*32-0x80($Tbl),$Wi + paddd @MSG[2],$Wi +___ + &$aesenc(); + &$aesenc(); +$code.=<<___; + sha256rnds2 $ABEF,$CDGH # 60-63 + pshufd \$0x0e,$Wi,$Wi +___ + &$aesenc(); +$code.=<<___; + sha256rnds2 $CDGH,$ABEF + #pxor $CDGH,$rndkey0 # black magic +___ + while ($r<40) { &$aesenc(); } # remaining aesenc's +$code.=<<___; + #xorps $CDGH,$rndkey0 # black magic + paddd $CDGH_SAVE,$CDGH + paddd $ABEF_SAVE,$ABEF + + dec $len + movups $iv,48($out,$in0) # write output + lea 64($in0),$in0 + jnz .Loop_shaext + + pshufd \$0xb1,$CDGH,$CDGH # DCHG + pshufd \$0x1b,$ABEF,$TMP # FEBA + pshufd \$0xb1,$ABEF,$ABEF # BAFE + punpckhqdq $CDGH,$ABEF # DCBA + palignr \$8,$TMP,$CDGH # HGFE + + movups $iv,($ivp) # write IV + movdqu $ABEF,($ctx) + movdqu $CDGH,16($ctx) +___ +$code.=<<___ if ($win64); + movaps 0*16(%rsp),%xmm6 + movaps 1*16(%rsp),%xmm7 + movaps 2*16(%rsp),%xmm8 + movaps 3*16(%rsp),%xmm9 + movaps 4*16(%rsp),%xmm10 + movaps 5*16(%rsp),%xmm11 + movaps 6*16(%rsp),%xmm12 + movaps 7*16(%rsp),%xmm13 + movaps 8*16(%rsp),%xmm14 + movaps 9*16(%rsp),%xmm15 + lea 8+10*16(%rsp),%rsp +.Lepilogue_shaext: +___ +$code.=<<___; + ret +.size ${func}_shaext,.-${func}_shaext +___ +} +}}}}} + +# EXCEPTION_DISPOSITION handler (EXCEPTION_RECORD *rec,ULONG64 frame, +# CONTEXT *context,DISPATCHER_CONTEXT *disp) +if ($win64) { +$rec="%rcx"; +$frame="%rdx"; +$context="%r8"; +$disp="%r9"; + +$code.=<<___ if ($avx); +.extern __imp_RtlVirtualUnwind +.type se_handler,\@abi-omnipotent +.align 16 +se_handler: + push %rsi + push %rdi + push %rbx + push %rbp + push %r12 + push %r13 + push %r14 + push %r15 + pushfq + sub \$64,%rsp + + mov 120($context),%rax # pull context->Rax + mov 248($context),%rbx # pull context->Rip + + mov 8($disp),%rsi # disp->ImageBase + mov 56($disp),%r11 # disp->HanderlData + + mov 0(%r11),%r10d # HandlerData[0] + lea (%rsi,%r10),%r10 # prologue label + cmp %r10,%rbx # context->RipRsp + + mov 4(%r11),%r10d # HandlerData[1] + lea (%rsi,%r10),%r10 # epilogue label + cmp %r10,%rbx # context->Rip>=epilogue label + jae .Lin_prologue +___ +$code.=<<___ if ($shaext); + lea aesni_cbc_sha256_enc_shaext(%rip),%r10 + cmp %r10,%rbx + jb .Lnot_in_shaext + + lea (%rax),%rsi + lea 512($context),%rdi # &context.Xmm6 + mov \$20,%ecx + .long 0xa548f3fc # cld; rep movsq + lea 168(%rax),%rax # adjust stack pointer + jmp .Lin_prologue +.Lnot_in_shaext: +___ +$code.=<<___ if ($avx>1); + lea .Lavx2_shortcut(%rip),%r10 + cmp %r10,%rbx # context->RipRbx + mov %rbp,160($context) # restore context->Rbp + mov %r12,216($context) # restore context->R12 + mov %r13,224($context) # restore context->R13 + mov %r14,232($context) # restore context->R14 + mov %r15,240($context) # restore context->R15 + + lea 16*$SZ+8*8(%rsi),%rsi # Xmm6- save area + lea 512($context),%rdi # &context.Xmm6 + mov \$20,%ecx + .long 0xa548f3fc # cld; rep movsq + +.Lin_prologue: + mov 8(%rax),%rdi + mov 16(%rax),%rsi + mov %rax,152($context) # restore context->Rsp + mov %rsi,168($context) # restore context->Rsi + mov %rdi,176($context) # restore context->Rdi + + mov 40($disp),%rdi # disp->ContextRecord + mov $context,%rsi # context + mov \$154,%ecx # sizeof(CONTEXT) + .long 0xa548f3fc # cld; rep movsq + + mov $disp,%rsi + xor %rcx,%rcx # arg1, UNW_FLAG_NHANDLER + mov 8(%rsi),%rdx # arg2, disp->ImageBase + mov 0(%rsi),%r8 # arg3, disp->ControlPc + mov 16(%rsi),%r9 # arg4, disp->FunctionEntry + mov 40(%rsi),%r10 # disp->ContextRecord + lea 56(%rsi),%r11 # &disp->HandlerData + lea 24(%rsi),%r12 # &disp->EstablisherFrame + mov %r10,32(%rsp) # arg5 + mov %r11,40(%rsp) # arg6 + mov %r12,48(%rsp) # arg7 + mov %rcx,56(%rsp) # arg8, (NULL) + call *__imp_RtlVirtualUnwind(%rip) + + mov \$1,%eax # ExceptionContinueSearch + add \$64,%rsp + popfq + pop %r15 + pop %r14 + pop %r13 + pop %r12 + pop %rbp + pop %rbx + pop %rdi + pop %rsi + ret +.size se_handler,.-se_handler + +.section .pdata + .rva .LSEH_begin_${func}_xop + .rva .LSEH_end_${func}_xop + .rva .LSEH_info_${func}_xop + + .rva .LSEH_begin_${func}_avx + .rva .LSEH_end_${func}_avx + .rva .LSEH_info_${func}_avx +___ +$code.=<<___ if ($avx>1); + .rva .LSEH_begin_${func}_avx2 + .rva .LSEH_end_${func}_avx2 + .rva .LSEH_info_${func}_avx2 +___ +$code.=<<___ if ($shaext); + .rva .LSEH_begin_${func}_shaext + .rva .LSEH_end_${func}_shaext + .rva .LSEH_info_${func}_shaext +___ +$code.=<<___ if ($avx); +.section .xdata +.align 8 +.LSEH_info_${func}_xop: + .byte 9,0,0,0 + .rva se_handler + .rva .Lprologue_xop,.Lepilogue_xop # HandlerData[] + +.LSEH_info_${func}_avx: + .byte 9,0,0,0 + .rva se_handler + .rva .Lprologue_avx,.Lepilogue_avx # HandlerData[] +___ +$code.=<<___ if ($avx>1); +.LSEH_info_${func}_avx2: + .byte 9,0,0,0 + .rva se_handler + .rva .Lprologue_avx2,.Lepilogue_avx2 # HandlerData[] +___ +$code.=<<___ if ($shaext); +.LSEH_info_${func}_shaext: + .byte 9,0,0,0 + .rva se_handler + .rva .Lprologue_shaext,.Lepilogue_shaext # HandlerData[] +___ +} + +#################################################################### +sub rex { + local *opcode=shift; + my ($dst,$src)=@_; + my $rex=0; + + $rex|=0x04 if($dst>=8); + $rex|=0x01 if($src>=8); + unshift @opcode,$rex|0x40 if($rex); +} + +{ + my %opcodelet = ( + "sha256rnds2" => 0xcb, + "sha256msg1" => 0xcc, + "sha256msg2" => 0xcd ); + + sub sha256op38 { + my $instr = shift; + + if (defined($opcodelet{$instr}) && @_[0] =~ /%xmm([0-9]+),\s*%xmm([0-9]+)/) { + my @opcode=(0x0f,0x38); + rex(\@opcode,$2,$1); + push @opcode,$opcodelet{$instr}; + push @opcode,0xc0|($1&7)|(($2&7)<<3); # ModR/M + return ".byte\t".join(',',@opcode); + } else { + return $instr."\t".@_[0]; + } + } +} + +$code =~ s/\`([^\`]*)\`/eval $1/gem; +$code =~ s/\b(sha256[^\s]*)\s+(.*)/sha256op38($1,$2)/gem; +print $code; +close STDOUT; diff --git a/deps/openssl/openssl/crypto/aes/asm/aesni-x86.pl b/deps/openssl/openssl/crypto/aes/asm/aesni-x86.pl index 3dc345b585f623..3deb86aed636e1 100644 --- a/deps/openssl/openssl/crypto/aes/asm/aesni-x86.pl +++ b/deps/openssl/openssl/crypto/aes/asm/aesni-x86.pl @@ -1,7 +1,7 @@ #!/usr/bin/env perl # ==================================================================== -# Written by Andy Polyakov for the OpenSSL +# Written by Andy Polyakov for the OpenSSL # project. The module is, however, dual licensed under OpenSSL and # CRYPTOGAMS licenses depending on where you obtain it. For further # details see http://www.openssl.org/~appro/cryptogams/. @@ -43,6 +43,17 @@ # Add aesni_xts_[en|de]crypt. Westmere spends 1.50 cycles processing # one byte out of 8KB with 128-bit key, Sandy Bridge - 1.09. +###################################################################### +# Current large-block performance in cycles per byte processed with +# 128-bit key (less is better). +# +# CBC en-/decrypt CTR XTS ECB +# Westmere 3.77/1.37 1.37 1.52 1.27 +# * Bridge 5.07/0.98 0.99 1.09 0.91 +# Haswell 4.44/0.80 0.97 1.03 0.72 +# Atom 5.77/3.56 3.67 4.03 3.46 +# Bulldozer 5.80/0.98 1.05 1.24 0.93 + $PREFIX="aesni"; # if $PREFIX is set to "AES", the script # generates drop-in replacement for # crypto/aes/asm/aes-586.pl:-) @@ -54,8 +65,8 @@ &asm_init($ARGV[0],$0); -if ($PREFIX eq "aesni") { $movekey=*movups; } -else { $movekey=*movups; } +if ($PREFIX eq "aesni") { $movekey=\&movups; } +else { $movekey=\&movups; } $len="eax"; $rounds="ecx"; @@ -196,37 +207,71 @@ sub aesni_generate1 # fully unrolled loop # every *2nd* cycle. Thus 3x interleave was the one providing optimal # utilization, i.e. when subroutine's throughput is virtually same as # of non-interleaved subroutine [for number of input blocks up to 3]. -# This is why it makes no sense to implement 2x subroutine. -# aes[enc|dec] latency in next processor generation is 8, but the -# instructions can be scheduled every cycle. Optimal interleave for -# new processor is therefore 8x, but it's unfeasible to accommodate it -# in XMM registers addreassable in 32-bit mode and therefore 6x is -# used instead... +# This is why it originally made no sense to implement 2x subroutine. +# But times change and it became appropriate to spend extra 192 bytes +# on 2x subroutine on Atom Silvermont account. For processors that +# can schedule aes[enc|dec] every cycle optimal interleave factor +# equals to corresponding instructions latency. 8x is optimal for +# * Bridge, but it's unfeasible to accommodate such implementation +# in XMM registers addreassable in 32-bit mode and therefore maximum +# of 6x is used instead... + +sub aesni_generate2 +{ my $p=shift; + + &function_begin_B("_aesni_${p}rypt2"); + &$movekey ($rndkey0,&QWP(0,$key)); + &shl ($rounds,4); + &$movekey ($rndkey1,&QWP(16,$key)); + &xorps ($inout0,$rndkey0); + &pxor ($inout1,$rndkey0); + &$movekey ($rndkey0,&QWP(32,$key)); + &lea ($key,&DWP(32,$key,$rounds)); + &neg ($rounds); + &add ($rounds,16); + + &set_label("${p}2_loop"); + eval"&aes${p} ($inout0,$rndkey1)"; + eval"&aes${p} ($inout1,$rndkey1)"; + &$movekey ($rndkey1,&QWP(0,$key,$rounds)); + &add ($rounds,32); + eval"&aes${p} ($inout0,$rndkey0)"; + eval"&aes${p} ($inout1,$rndkey0)"; + &$movekey ($rndkey0,&QWP(-16,$key,$rounds)); + &jnz (&label("${p}2_loop")); + eval"&aes${p} ($inout0,$rndkey1)"; + eval"&aes${p} ($inout1,$rndkey1)"; + eval"&aes${p}last ($inout0,$rndkey0)"; + eval"&aes${p}last ($inout1,$rndkey0)"; + &ret(); + &function_end_B("_aesni_${p}rypt2"); +} sub aesni_generate3 { my $p=shift; &function_begin_B("_aesni_${p}rypt3"); &$movekey ($rndkey0,&QWP(0,$key)); - &shr ($rounds,1); + &shl ($rounds,4); &$movekey ($rndkey1,&QWP(16,$key)); - &lea ($key,&DWP(32,$key)); &xorps ($inout0,$rndkey0); &pxor ($inout1,$rndkey0); &pxor ($inout2,$rndkey0); - &$movekey ($rndkey0,&QWP(0,$key)); + &$movekey ($rndkey0,&QWP(32,$key)); + &lea ($key,&DWP(32,$key,$rounds)); + &neg ($rounds); + &add ($rounds,16); &set_label("${p}3_loop"); eval"&aes${p} ($inout0,$rndkey1)"; eval"&aes${p} ($inout1,$rndkey1)"; - &dec ($rounds); eval"&aes${p} ($inout2,$rndkey1)"; - &$movekey ($rndkey1,&QWP(16,$key)); + &$movekey ($rndkey1,&QWP(0,$key,$rounds)); + &add ($rounds,32); eval"&aes${p} ($inout0,$rndkey0)"; eval"&aes${p} ($inout1,$rndkey0)"; - &lea ($key,&DWP(32,$key)); eval"&aes${p} ($inout2,$rndkey0)"; - &$movekey ($rndkey0,&QWP(0,$key)); + &$movekey ($rndkey0,&QWP(-16,$key,$rounds)); &jnz (&label("${p}3_loop")); eval"&aes${p} ($inout0,$rndkey1)"; eval"&aes${p} ($inout1,$rndkey1)"; @@ -248,27 +293,29 @@ sub aesni_generate4 &function_begin_B("_aesni_${p}rypt4"); &$movekey ($rndkey0,&QWP(0,$key)); &$movekey ($rndkey1,&QWP(16,$key)); - &shr ($rounds,1); - &lea ($key,&DWP(32,$key)); + &shl ($rounds,4); &xorps ($inout0,$rndkey0); &pxor ($inout1,$rndkey0); &pxor ($inout2,$rndkey0); &pxor ($inout3,$rndkey0); - &$movekey ($rndkey0,&QWP(0,$key)); + &$movekey ($rndkey0,&QWP(32,$key)); + &lea ($key,&DWP(32,$key,$rounds)); + &neg ($rounds); + &data_byte (0x0f,0x1f,0x40,0x00); + &add ($rounds,16); &set_label("${p}4_loop"); eval"&aes${p} ($inout0,$rndkey1)"; eval"&aes${p} ($inout1,$rndkey1)"; - &dec ($rounds); eval"&aes${p} ($inout2,$rndkey1)"; eval"&aes${p} ($inout3,$rndkey1)"; - &$movekey ($rndkey1,&QWP(16,$key)); + &$movekey ($rndkey1,&QWP(0,$key,$rounds)); + &add ($rounds,32); eval"&aes${p} ($inout0,$rndkey0)"; eval"&aes${p} ($inout1,$rndkey0)"; - &lea ($key,&DWP(32,$key)); eval"&aes${p} ($inout2,$rndkey0)"; eval"&aes${p} ($inout3,$rndkey0)"; - &$movekey ($rndkey0,&QWP(0,$key)); + &$movekey ($rndkey0,&QWP(-16,$key,$rounds)); &jnz (&label("${p}4_loop")); eval"&aes${p} ($inout0,$rndkey1)"; @@ -289,43 +336,43 @@ sub aesni_generate6 &function_begin_B("_aesni_${p}rypt6"); &static_label("_aesni_${p}rypt6_enter"); &$movekey ($rndkey0,&QWP(0,$key)); - &shr ($rounds,1); + &shl ($rounds,4); &$movekey ($rndkey1,&QWP(16,$key)); - &lea ($key,&DWP(32,$key)); &xorps ($inout0,$rndkey0); &pxor ($inout1,$rndkey0); # pxor does better here - eval"&aes${p} ($inout0,$rndkey1)"; &pxor ($inout2,$rndkey0); - eval"&aes${p} ($inout1,$rndkey1)"; + eval"&aes${p} ($inout0,$rndkey1)"; &pxor ($inout3,$rndkey0); - &dec ($rounds); - eval"&aes${p} ($inout2,$rndkey1)"; &pxor ($inout4,$rndkey0); - eval"&aes${p} ($inout3,$rndkey1)"; + eval"&aes${p} ($inout1,$rndkey1)"; + &lea ($key,&DWP(32,$key,$rounds)); + &neg ($rounds); + eval"&aes${p} ($inout2,$rndkey1)"; &pxor ($inout5,$rndkey0); + &add ($rounds,16); + eval"&aes${p} ($inout3,$rndkey1)"; eval"&aes${p} ($inout4,$rndkey1)"; - &$movekey ($rndkey0,&QWP(0,$key)); eval"&aes${p} ($inout5,$rndkey1)"; + &$movekey ($rndkey0,&QWP(-16,$key,$rounds)); &jmp (&label("_aesni_${p}rypt6_enter")); &set_label("${p}6_loop",16); eval"&aes${p} ($inout0,$rndkey1)"; eval"&aes${p} ($inout1,$rndkey1)"; - &dec ($rounds); eval"&aes${p} ($inout2,$rndkey1)"; eval"&aes${p} ($inout3,$rndkey1)"; eval"&aes${p} ($inout4,$rndkey1)"; eval"&aes${p} ($inout5,$rndkey1)"; - &set_label("_aesni_${p}rypt6_enter",16); - &$movekey ($rndkey1,&QWP(16,$key)); + &set_label("_aesni_${p}rypt6_enter"); + &$movekey ($rndkey1,&QWP(0,$key,$rounds)); + &add ($rounds,32); eval"&aes${p} ($inout0,$rndkey0)"; eval"&aes${p} ($inout1,$rndkey0)"; - &lea ($key,&DWP(32,$key)); eval"&aes${p} ($inout2,$rndkey0)"; eval"&aes${p} ($inout3,$rndkey0)"; eval"&aes${p} ($inout4,$rndkey0)"; eval"&aes${p} ($inout5,$rndkey0)"; - &$movekey ($rndkey0,&QWP(0,$key)); + &$movekey ($rndkey0,&QWP(-16,$key,$rounds)); &jnz (&label("${p}6_loop")); eval"&aes${p} ($inout0,$rndkey1)"; @@ -343,6 +390,8 @@ sub aesni_generate6 &ret(); &function_end_B("_aesni_${p}rypt6"); } +&aesni_generate2("enc") if ($PREFIX eq "aesni"); +&aesni_generate2("dec"); &aesni_generate3("enc") if ($PREFIX eq "aesni"); &aesni_generate3("dec"); &aesni_generate4("enc") if ($PREFIX eq "aesni"); @@ -446,8 +495,7 @@ sub aesni_generate6 &jmp (&label("ecb_ret")); &set_label("ecb_enc_two",16); - &xorps ($inout2,$inout2); - &call ("_aesni_encrypt3"); + &call ("_aesni_encrypt2"); &movups (&QWP(0,$out),$inout0); &movups (&QWP(0x10,$out),$inout1); &jmp (&label("ecb_ret")); @@ -547,8 +595,7 @@ sub aesni_generate6 &jmp (&label("ecb_ret")); &set_label("ecb_dec_two",16); - &xorps ($inout2,$inout2); - &call ("_aesni_decrypt3"); + &call ("_aesni_decrypt2"); &movups (&QWP(0,$out),$inout0); &movups (&QWP(0x10,$out),$inout1); &jmp (&label("ecb_ret")); @@ -610,11 +657,13 @@ sub aesni_generate6 &mov (&DWP(24,"esp"),$key_); &mov (&DWP(28,"esp"),$key_); - &shr ($rounds,1); + &shl ($rounds,4); + &mov ($rounds_,16); &lea ($key_,&DWP(0,$key)); &movdqa ($inout3,&QWP(0,"esp")); &movdqa ($inout0,$ivec); - &mov ($rounds_,$rounds); + &lea ($key,&DWP(32,$key,$rounds)); + &sub ($rounds_,$rounds); &pshufb ($ivec,$inout3); &set_label("ccm64_enc_outer"); @@ -625,33 +674,31 @@ sub aesni_generate6 &xorps ($inout0,$rndkey0); &$movekey ($rndkey1,&QWP(16,$key_)); &xorps ($rndkey0,$in0); - &lea ($key,&DWP(32,$key_)); &xorps ($cmac,$rndkey0); # cmac^=inp - &$movekey ($rndkey0,&QWP(0,$key)); + &$movekey ($rndkey0,&QWP(32,$key_)); &set_label("ccm64_enc2_loop"); &aesenc ($inout0,$rndkey1); - &dec ($rounds); &aesenc ($cmac,$rndkey1); - &$movekey ($rndkey1,&QWP(16,$key)); + &$movekey ($rndkey1,&QWP(0,$key,$rounds)); + &add ($rounds,32); &aesenc ($inout0,$rndkey0); - &lea ($key,&DWP(32,$key)); &aesenc ($cmac,$rndkey0); - &$movekey ($rndkey0,&QWP(0,$key)); + &$movekey ($rndkey0,&QWP(-16,$key,$rounds)); &jnz (&label("ccm64_enc2_loop")); &aesenc ($inout0,$rndkey1); &aesenc ($cmac,$rndkey1); &paddq ($ivec,&QWP(16,"esp")); + &dec ($len); &aesenclast ($inout0,$rndkey0); &aesenclast ($cmac,$rndkey0); - &dec ($len); &lea ($inp,&DWP(16,$inp)); &xorps ($in0,$inout0); # inp^=E(ivec) &movdqa ($inout0,$ivec); &movups (&QWP(0,$out),$in0); # save output - &lea ($out,&DWP(16,$out)); &pshufb ($inout0,$inout3); + &lea ($out,&DWP(16,$out)); &jnz (&label("ccm64_enc_outer")); &mov ("esp",&DWP(48,"esp")); @@ -700,15 +747,19 @@ sub aesni_generate6 { &aesni_inline_generate1("enc"); } else { &call ("_aesni_encrypt1"); } + &shl ($rounds_,4); + &mov ($rounds,16); &movups ($in0,&QWP(0,$inp)); # load inp &paddq ($ivec,&QWP(16,"esp")); &lea ($inp,&QWP(16,$inp)); + &sub ($rounds,$rounds_); + &lea ($key,&DWP(32,$key_,$rounds_)); + &mov ($rounds_,$rounds); &jmp (&label("ccm64_dec_outer")); &set_label("ccm64_dec_outer",16); &xorps ($in0,$inout0); # inp ^= E(ivec) &movdqa ($inout0,$ivec); - &mov ($rounds,$rounds_); &movups (&QWP(0,$out),$in0); # save output &lea ($out,&DWP(16,$out)); &pshufb ($inout0,$inout3); @@ -717,34 +768,33 @@ sub aesni_generate6 &jz (&label("ccm64_dec_break")); &$movekey ($rndkey0,&QWP(0,$key_)); - &shr ($rounds,1); + &mov ($rounds,$rounds_); &$movekey ($rndkey1,&QWP(16,$key_)); &xorps ($in0,$rndkey0); - &lea ($key,&DWP(32,$key_)); &xorps ($inout0,$rndkey0); &xorps ($cmac,$in0); # cmac^=out - &$movekey ($rndkey0,&QWP(0,$key)); + &$movekey ($rndkey0,&QWP(32,$key_)); &set_label("ccm64_dec2_loop"); &aesenc ($inout0,$rndkey1); - &dec ($rounds); &aesenc ($cmac,$rndkey1); - &$movekey ($rndkey1,&QWP(16,$key)); + &$movekey ($rndkey1,&QWP(0,$key,$rounds)); + &add ($rounds,32); &aesenc ($inout0,$rndkey0); - &lea ($key,&DWP(32,$key)); &aesenc ($cmac,$rndkey0); - &$movekey ($rndkey0,&QWP(0,$key)); + &$movekey ($rndkey0,&QWP(-16,$key,$rounds)); &jnz (&label("ccm64_dec2_loop")); &movups ($in0,&QWP(0,$inp)); # load inp &paddq ($ivec,&QWP(16,"esp")); &aesenc ($inout0,$rndkey1); &aesenc ($cmac,$rndkey1); - &lea ($inp,&QWP(16,$inp)); &aesenclast ($inout0,$rndkey0); &aesenclast ($cmac,$rndkey0); + &lea ($inp,&QWP(16,$inp)); &jmp (&label("ccm64_dec_outer")); &set_label("ccm64_dec_break",16); + &mov ($rounds,&DWP(240,$key_)); &mov ($key,$key_); if ($inline) { &aesni_inline_generate1("enc",$cmac,$in0); } @@ -763,7 +813,7 @@ sub aesni_generate6 # const char *ivec); # # Handles only complete blocks, operates on 32-bit counter and -# does not update *ivec! (see engine/eng_aesni.c for details) +# does not update *ivec! (see crypto/modes/ctr128.c for details) # # stack layout: # 0 pshufb mask @@ -810,66 +860,61 @@ sub aesni_generate6 # compose 2 vectors of 3x32-bit counters &bswap ($rounds_); - &pxor ($rndkey1,$rndkey1); &pxor ($rndkey0,$rndkey0); + &pxor ($rndkey1,$rndkey1); &movdqa ($inout0,&QWP(0,"esp")); # load byte-swap mask - &pinsrd ($rndkey1,$rounds_,0); + &pinsrd ($rndkey0,$rounds_,0); &lea ($key_,&DWP(3,$rounds_)); - &pinsrd ($rndkey0,$key_,0); + &pinsrd ($rndkey1,$key_,0); &inc ($rounds_); - &pinsrd ($rndkey1,$rounds_,1); + &pinsrd ($rndkey0,$rounds_,1); &inc ($key_); - &pinsrd ($rndkey0,$key_,1); + &pinsrd ($rndkey1,$key_,1); &inc ($rounds_); - &pinsrd ($rndkey1,$rounds_,2); + &pinsrd ($rndkey0,$rounds_,2); &inc ($key_); - &pinsrd ($rndkey0,$key_,2); - &movdqa (&QWP(48,"esp"),$rndkey1); # save 1st triplet - &pshufb ($rndkey1,$inout0); # byte swap - &movdqa (&QWP(64,"esp"),$rndkey0); # save 2nd triplet + &pinsrd ($rndkey1,$key_,2); + &movdqa (&QWP(48,"esp"),$rndkey0); # save 1st triplet &pshufb ($rndkey0,$inout0); # byte swap + &movdqu ($inout4,&QWP(0,$key)); # key[0] + &movdqa (&QWP(64,"esp"),$rndkey1); # save 2nd triplet + &pshufb ($rndkey1,$inout0); # byte swap - &pshufd ($inout0,$rndkey1,3<<6); # place counter to upper dword - &pshufd ($inout1,$rndkey1,2<<6); + &pshufd ($inout0,$rndkey0,3<<6); # place counter to upper dword + &pshufd ($inout1,$rndkey0,2<<6); &cmp ($len,6); &jb (&label("ctr32_tail")); - &movdqa (&QWP(32,"esp"),$inout5); # save counter-less ivec - &shr ($rounds,1); + &pxor ($inout5,$inout4); # counter-less ivec^key[0] + &shl ($rounds,4); + &mov ($rounds_,16); + &movdqa (&QWP(32,"esp"),$inout5); # save counter-less ivec^key[0] &mov ($key_,$key); # backup $key - &mov ($rounds_,$rounds); # backup $rounds + &sub ($rounds_,$rounds); # backup twisted $rounds + &lea ($key,&DWP(32,$key,$rounds)); &sub ($len,6); &jmp (&label("ctr32_loop6")); &set_label("ctr32_loop6",16); - &pshufd ($inout2,$rndkey1,1<<6); - &movdqa ($rndkey1,&QWP(32,"esp")); # pull counter-less ivec - &pshufd ($inout3,$rndkey0,3<<6); - &por ($inout0,$rndkey1); # merge counter-less ivec - &pshufd ($inout4,$rndkey0,2<<6); - &por ($inout1,$rndkey1); - &pshufd ($inout5,$rndkey0,1<<6); - &por ($inout2,$rndkey1); - &por ($inout3,$rndkey1); - &por ($inout4,$rndkey1); - &por ($inout5,$rndkey1); - - # inlining _aesni_encrypt6's prologue gives ~4% improvement... - &$movekey ($rndkey0,&QWP(0,$key_)); - &$movekey ($rndkey1,&QWP(16,$key_)); - &lea ($key,&DWP(32,$key_)); - &dec ($rounds); - &pxor ($inout0,$rndkey0); + # inlining _aesni_encrypt6's prologue gives ~6% improvement... + &pshufd ($inout2,$rndkey0,1<<6); + &movdqa ($rndkey0,&QWP(32,"esp")); # pull counter-less ivec + &pshufd ($inout3,$rndkey1,3<<6); + &pxor ($inout0,$rndkey0); # merge counter-less ivec + &pshufd ($inout4,$rndkey1,2<<6); &pxor ($inout1,$rndkey0); - &aesenc ($inout0,$rndkey1); + &pshufd ($inout5,$rndkey1,1<<6); + &$movekey ($rndkey1,&QWP(16,$key_)); &pxor ($inout2,$rndkey0); - &aesenc ($inout1,$rndkey1); &pxor ($inout3,$rndkey0); - &aesenc ($inout2,$rndkey1); + &aesenc ($inout0,$rndkey1); &pxor ($inout4,$rndkey0); - &aesenc ($inout3,$rndkey1); &pxor ($inout5,$rndkey0); + &aesenc ($inout1,$rndkey1); + &$movekey ($rndkey0,&QWP(32,$key_)); + &mov ($rounds,$rounds_); + &aesenc ($inout2,$rndkey1); + &aesenc ($inout3,$rndkey1); &aesenc ($inout4,$rndkey1); - &$movekey ($rndkey0,&QWP(0,$key)); &aesenc ($inout5,$rndkey1); &call (&label("_aesni_encrypt6_enter")); @@ -882,12 +927,12 @@ sub aesni_generate6 &movups (&QWP(0,$out),$inout0); &movdqa ($rndkey0,&QWP(16,"esp")); # load increment &xorps ($inout2,$rndkey1); - &movdqa ($rndkey1,&QWP(48,"esp")); # load 1st triplet + &movdqa ($rndkey1,&QWP(64,"esp")); # load 2nd triplet &movups (&QWP(0x10,$out),$inout1); &movups (&QWP(0x20,$out),$inout2); - &paddd ($rndkey1,$rndkey0); # 1st triplet increment - &paddd ($rndkey0,&QWP(64,"esp")); # 2nd triplet increment + &paddd ($rndkey1,$rndkey0); # 2nd triplet increment + &paddd ($rndkey0,&QWP(48,"esp")); # 1st triplet increment &movdqa ($inout0,&QWP(0,"esp")); # load byte swap mask &movups ($inout1,&QWP(0x30,$inp)); @@ -895,44 +940,44 @@ sub aesni_generate6 &xorps ($inout3,$inout1); &movups ($inout1,&QWP(0x50,$inp)); &lea ($inp,&DWP(0x60,$inp)); - &movdqa (&QWP(48,"esp"),$rndkey1); # save 1st triplet - &pshufb ($rndkey1,$inout0); # byte swap + &movdqa (&QWP(48,"esp"),$rndkey0); # save 1st triplet + &pshufb ($rndkey0,$inout0); # byte swap &xorps ($inout4,$inout2); &movups (&QWP(0x30,$out),$inout3); &xorps ($inout5,$inout1); - &movdqa (&QWP(64,"esp"),$rndkey0); # save 2nd triplet - &pshufb ($rndkey0,$inout0); # byte swap + &movdqa (&QWP(64,"esp"),$rndkey1); # save 2nd triplet + &pshufb ($rndkey1,$inout0); # byte swap &movups (&QWP(0x40,$out),$inout4); - &pshufd ($inout0,$rndkey1,3<<6); + &pshufd ($inout0,$rndkey0,3<<6); &movups (&QWP(0x50,$out),$inout5); &lea ($out,&DWP(0x60,$out)); - &mov ($rounds,$rounds_); - &pshufd ($inout1,$rndkey1,2<<6); + &pshufd ($inout1,$rndkey0,2<<6); &sub ($len,6); &jnc (&label("ctr32_loop6")); &add ($len,6); &jz (&label("ctr32_ret")); + &movdqu ($inout5,&QWP(0,$key_)); &mov ($key,$key_); - &lea ($rounds,&DWP(1,"",$rounds,2)); # restore $rounds - &movdqa ($inout5,&QWP(32,"esp")); # pull count-less ivec + &pxor ($inout5,&QWP(32,"esp")); # restore count-less ivec + &mov ($rounds,&DWP(240,$key_)); # restore $rounds &set_label("ctr32_tail"); &por ($inout0,$inout5); &cmp ($len,2); &jb (&label("ctr32_one")); - &pshufd ($inout2,$rndkey1,1<<6); + &pshufd ($inout2,$rndkey0,1<<6); &por ($inout1,$inout5); &je (&label("ctr32_two")); - &pshufd ($inout3,$rndkey0,3<<6); + &pshufd ($inout3,$rndkey1,3<<6); &por ($inout2,$inout5); &cmp ($len,4); &jb (&label("ctr32_three")); - &pshufd ($inout4,$rndkey0,2<<6); + &pshufd ($inout4,$rndkey1,2<<6); &por ($inout3,$inout5); &je (&label("ctr32_four")); @@ -970,7 +1015,7 @@ sub aesni_generate6 &jmp (&label("ctr32_ret")); &set_label("ctr32_two",16); - &call ("_aesni_encrypt3"); + &call ("_aesni_encrypt2"); &movups ($inout3,&QWP(0,$inp)); &movups ($inout4,&QWP(0x10,$inp)); &xorps ($inout0,$inout3); @@ -1057,8 +1102,10 @@ sub aesni_generate6 &sub ($len,16*6); &jc (&label("xts_enc_short")); - &shr ($rounds,1); - &mov ($rounds_,$rounds); + &shl ($rounds,4); + &mov ($rounds_,16); + &sub ($rounds_,$rounds); + &lea ($key,&DWP(32,$key,$rounds)); &jmp (&label("xts_enc_loop6")); &set_label("xts_enc_loop6",16); @@ -1080,6 +1127,7 @@ sub aesni_generate6 &pxor ($inout5,$tweak); # inline _aesni_encrypt6 prologue and flip xor with tweak and key[0] + &mov ($rounds,$rounds_); # restore $rounds &movdqu ($inout1,&QWP(16*1,$inp)); &xorps ($inout0,$rndkey0); # input^=rndkey[0] &movdqu ($inout2,&QWP(16*2,$inp)); @@ -1096,19 +1144,17 @@ sub aesni_generate6 &pxor ($inout5,$rndkey1); &$movekey ($rndkey1,&QWP(16,$key_)); - &lea ($key,&DWP(32,$key_)); &pxor ($inout1,&QWP(16*1,"esp")); - &aesenc ($inout0,$rndkey1); &pxor ($inout2,&QWP(16*2,"esp")); - &aesenc ($inout1,$rndkey1); + &aesenc ($inout0,$rndkey1); &pxor ($inout3,&QWP(16*3,"esp")); - &dec ($rounds); - &aesenc ($inout2,$rndkey1); &pxor ($inout4,&QWP(16*4,"esp")); - &aesenc ($inout3,$rndkey1); + &aesenc ($inout1,$rndkey1); &pxor ($inout5,$rndkey0); + &$movekey ($rndkey0,&QWP(32,$key_)); + &aesenc ($inout2,$rndkey1); + &aesenc ($inout3,$rndkey1); &aesenc ($inout4,$rndkey1); - &$movekey ($rndkey0,&QWP(0,$key)); &aesenc ($inout5,$rndkey1); &call (&label("_aesni_encrypt6_enter")); @@ -1135,13 +1181,12 @@ sub aesni_generate6 &paddq ($tweak,$tweak); # &psllq($tweak,1); &pand ($twres,$twmask); # isolate carry and residue &pcmpgtd($twtmp,$tweak); # broadcast upper bits - &mov ($rounds,$rounds_); # restore $rounds &pxor ($tweak,$twres); &sub ($len,16*6); &jnc (&label("xts_enc_loop6")); - &lea ($rounds,&DWP(1,"",$rounds,2)); # restore $rounds + &mov ($rounds,&DWP(240,$key_)); # restore $rounds &mov ($key,$key_); # restore $key &mov ($rounds_,$rounds); @@ -1241,9 +1286,8 @@ sub aesni_generate6 &lea ($inp,&DWP(16*2,$inp)); &xorps ($inout0,$inout3); # input^=tweak &xorps ($inout1,$inout4); - &xorps ($inout2,$inout2); - &call ("_aesni_encrypt3"); + &call ("_aesni_encrypt2"); &xorps ($inout0,$inout3); # output^=tweak &xorps ($inout1,$inout4); @@ -1399,8 +1443,10 @@ sub aesni_generate6 &sub ($len,16*6); &jc (&label("xts_dec_short")); - &shr ($rounds,1); - &mov ($rounds_,$rounds); + &shl ($rounds,4); + &mov ($rounds_,16); + &sub ($rounds_,$rounds); + &lea ($key,&DWP(32,$key,$rounds)); &jmp (&label("xts_dec_loop6")); &set_label("xts_dec_loop6",16); @@ -1422,6 +1468,7 @@ sub aesni_generate6 &pxor ($inout5,$tweak); # inline _aesni_encrypt6 prologue and flip xor with tweak and key[0] + &mov ($rounds,$rounds_); &movdqu ($inout1,&QWP(16*1,$inp)); &xorps ($inout0,$rndkey0); # input^=rndkey[0] &movdqu ($inout2,&QWP(16*2,$inp)); @@ -1438,19 +1485,17 @@ sub aesni_generate6 &pxor ($inout5,$rndkey1); &$movekey ($rndkey1,&QWP(16,$key_)); - &lea ($key,&DWP(32,$key_)); &pxor ($inout1,&QWP(16*1,"esp")); - &aesdec ($inout0,$rndkey1); &pxor ($inout2,&QWP(16*2,"esp")); - &aesdec ($inout1,$rndkey1); + &aesdec ($inout0,$rndkey1); &pxor ($inout3,&QWP(16*3,"esp")); - &dec ($rounds); - &aesdec ($inout2,$rndkey1); &pxor ($inout4,&QWP(16*4,"esp")); - &aesdec ($inout3,$rndkey1); + &aesdec ($inout1,$rndkey1); &pxor ($inout5,$rndkey0); + &$movekey ($rndkey0,&QWP(32,$key_)); + &aesdec ($inout2,$rndkey1); + &aesdec ($inout3,$rndkey1); &aesdec ($inout4,$rndkey1); - &$movekey ($rndkey0,&QWP(0,$key)); &aesdec ($inout5,$rndkey1); &call (&label("_aesni_decrypt6_enter")); @@ -1477,13 +1522,12 @@ sub aesni_generate6 &paddq ($tweak,$tweak); # &psllq($tweak,1); &pand ($twres,$twmask); # isolate carry and residue &pcmpgtd($twtmp,$tweak); # broadcast upper bits - &mov ($rounds,$rounds_); # restore $rounds &pxor ($tweak,$twres); &sub ($len,16*6); &jnc (&label("xts_dec_loop6")); - &lea ($rounds,&DWP(1,"",$rounds,2)); # restore $rounds + &mov ($rounds,&DWP(240,$key_)); # restore $rounds &mov ($key,$key_); # restore $key &mov ($rounds_,$rounds); @@ -1584,7 +1628,7 @@ sub aesni_generate6 &xorps ($inout0,$inout3); # input^=tweak &xorps ($inout1,$inout4); - &call ("_aesni_decrypt3"); + &call ("_aesni_decrypt2"); &xorps ($inout0,$inout3); # output^=tweak &xorps ($inout1,$inout4); @@ -1816,7 +1860,7 @@ sub aesni_generate6 &movups (&QWP(0x10,$out),$inout1); &lea ($inp,&DWP(0x60,$inp)); &movups (&QWP(0x20,$out),$inout2); - &mov ($rounds,$rounds_) # restore $rounds + &mov ($rounds,$rounds_); # restore $rounds &movups (&QWP(0x30,$out),$inout3); &mov ($key,$key_); # restore $key &movups (&QWP(0x40,$out),$inout4); @@ -1884,8 +1928,7 @@ sub aesni_generate6 &jmp (&label("cbc_dec_tail_collected")); &set_label("cbc_dec_two",16); - &xorps ($inout2,$inout2); - &call ("_aesni_decrypt3"); + &call ("_aesni_decrypt2"); &xorps ($inout0,$ivec); &xorps ($inout1,$in0); &movups (&QWP(0,$out),$inout0); @@ -2015,7 +2058,7 @@ sub aesni_generate6 &set_label("12rounds",16); &movq ("xmm2",&QWP(16,"eax")); # remaining 1/3 of *userKey &mov ($rounds,11); - &$movekey (&QWP(-16,$key),"xmm0") # round 0 + &$movekey (&QWP(-16,$key),"xmm0"); # round 0 &aeskeygenassist("xmm1","xmm2",0x01); # round 1,2 &call (&label("key_192a_cold")); &aeskeygenassist("xmm1","xmm2",0x02); # round 2,3 @@ -2152,7 +2195,7 @@ sub aesni_generate6 &mov ($key,&wparam(2)); &call ("_aesni_set_encrypt_key"); &mov ($key,&wparam(2)); - &shl ($rounds,4) # rounds-1 after _aesni_set_encrypt_key + &shl ($rounds,4); # rounds-1 after _aesni_set_encrypt_key &test ("eax","eax"); &jnz (&label("dec_key_ret")); &lea ("eax",&DWP(16,$key,$rounds)); # end of key schedule diff --git a/deps/openssl/openssl/crypto/aes/asm/aesni-x86_64.pl b/deps/openssl/openssl/crypto/aes/asm/aesni-x86_64.pl index c9270dfddc182d..5f6174635f68f5 100644 --- a/deps/openssl/openssl/crypto/aes/asm/aesni-x86_64.pl +++ b/deps/openssl/openssl/crypto/aes/asm/aesni-x86_64.pl @@ -1,7 +1,7 @@ #!/usr/bin/env perl # # ==================================================================== -# Written by Andy Polyakov for the OpenSSL +# Written by Andy Polyakov for the OpenSSL # project. The module is, however, dual licensed under OpenSSL and # CRYPTOGAMS licenses depending on where you obtain it. For further # details see http://www.openssl.org/~appro/cryptogams/. @@ -129,8 +129,8 @@ # # Further data for other parallelizable modes: # -# CBC decrypt 1.16 0.93 0.93 -# CTR 1.14 0.91 n/a +# CBC decrypt 1.16 0.93 0.74 +# CTR 1.14 0.91 0.74 # # Well, given 3x column it's probably inappropriate to call the limit # asymptotic, if it can be surpassed, isn't it? What happens there? @@ -153,10 +153,25 @@ # April 2011 # -# Add aesni_xts_[en|de]crypt. Westmere spends 1.33 cycles processing -# one byte out of 8KB with 128-bit key, Sandy Bridge - 0.97. Just like +# Add aesni_xts_[en|de]crypt. Westmere spends 1.25 cycles processing +# one byte out of 8KB with 128-bit key, Sandy Bridge - 0.90. Just like # in CTR mode AES instruction interleave factor was chosen to be 6x. +###################################################################### +# Current large-block performance in cycles per byte processed with +# 128-bit key (less is better). +# +# CBC en-/decrypt CTR XTS ECB +# Westmere 3.77/1.25 1.25 1.25 1.26 +# * Bridge 5.07/0.74 0.75 0.90 0.85 +# Haswell 4.44/0.63 0.63 0.73 0.63 +# Atom 5.75/3.54 3.56 4.12 3.87(*) +# Bulldozer 5.77/0.70 0.72 0.90 0.70 +# +# (*) Atom ECB result is suboptimal because of penalties incurred +# by operations on %xmm8-15. As ECB is not considered +# critical, nothing was done to mitigate the problem. + $PREFIX="aesni"; # if $PREFIX is set to "AES", the script # generates drop-in replacement for # crypto/aes/asm/aes-x86_64.pl:-) @@ -180,6 +195,7 @@ ("%rdi","%rsi","%rdx","%rcx"); # Unix order $code=".text\n"; +$code.=".extern OPENSSL_ia32cap_P\n"; $rounds="%eax"; # input to and changed by aesni_[en|de]cryptN !!! # this is natural Unix argument order for public $PREFIX_[ecb|cbc]_encrypt ... @@ -272,10 +288,49 @@ sub aesni_generate1 { # every *2nd* cycle. Thus 3x interleave was the one providing optimal # utilization, i.e. when subroutine's throughput is virtually same as # of non-interleaved subroutine [for number of input blocks up to 3]. -# This is why it makes no sense to implement 2x subroutine. -# aes[enc|dec] latency in next processor generation is 8, but the -# instructions can be scheduled every cycle. Optimal interleave for -# new processor is therefore 8x... +# This is why it originally made no sense to implement 2x subroutine. +# But times change and it became appropriate to spend extra 192 bytes +# on 2x subroutine on Atom Silvermont account. For processors that +# can schedule aes[enc|dec] every cycle optimal interleave factor +# equals to corresponding instructions latency. 8x is optimal for +# * Bridge and "super-optimal" for other Intel CPUs... + +sub aesni_generate2 { +my $dir=shift; +# As already mentioned it takes in $key and $rounds, which are *not* +# preserved. $inout[0-1] is cipher/clear text... +$code.=<<___; +.type _aesni_${dir}rypt2,\@abi-omnipotent +.align 16 +_aesni_${dir}rypt2: + $movkey ($key),$rndkey0 + shl \$4,$rounds + $movkey 16($key),$rndkey1 + xorps $rndkey0,$inout0 + xorps $rndkey0,$inout1 + $movkey 32($key),$rndkey0 + lea 32($key,$rounds),$key + neg %rax # $rounds + add \$16,%rax + +.L${dir}_loop2: + aes${dir} $rndkey1,$inout0 + aes${dir} $rndkey1,$inout1 + $movkey ($key,%rax),$rndkey1 + add \$32,%rax + aes${dir} $rndkey0,$inout0 + aes${dir} $rndkey0,$inout1 + $movkey -16($key,%rax),$rndkey0 + jnz .L${dir}_loop2 + + aes${dir} $rndkey1,$inout0 + aes${dir} $rndkey1,$inout1 + aes${dir}last $rndkey0,$inout0 + aes${dir}last $rndkey0,$inout1 + ret +.size _aesni_${dir}rypt2,.-_aesni_${dir}rypt2 +___ +} sub aesni_generate3 { my $dir=shift; # As already mentioned it takes in $key and $rounds, which are *not* @@ -285,25 +340,26 @@ sub aesni_generate3 { .align 16 _aesni_${dir}rypt3: $movkey ($key),$rndkey0 - shr \$1,$rounds + shl \$4,$rounds $movkey 16($key),$rndkey1 - lea 32($key),$key xorps $rndkey0,$inout0 xorps $rndkey0,$inout1 xorps $rndkey0,$inout2 - $movkey ($key),$rndkey0 + $movkey 32($key),$rndkey0 + lea 32($key,$rounds),$key + neg %rax # $rounds + add \$16,%rax .L${dir}_loop3: aes${dir} $rndkey1,$inout0 aes${dir} $rndkey1,$inout1 - dec $rounds aes${dir} $rndkey1,$inout2 - $movkey 16($key),$rndkey1 + $movkey ($key,%rax),$rndkey1 + add \$32,%rax aes${dir} $rndkey0,$inout0 aes${dir} $rndkey0,$inout1 - lea 32($key),$key aes${dir} $rndkey0,$inout2 - $movkey ($key),$rndkey0 + $movkey -16($key,%rax),$rndkey0 jnz .L${dir}_loop3 aes${dir} $rndkey1,$inout0 @@ -329,28 +385,30 @@ sub aesni_generate4 { .align 16 _aesni_${dir}rypt4: $movkey ($key),$rndkey0 - shr \$1,$rounds + shl \$4,$rounds $movkey 16($key),$rndkey1 - lea 32($key),$key xorps $rndkey0,$inout0 xorps $rndkey0,$inout1 xorps $rndkey0,$inout2 xorps $rndkey0,$inout3 - $movkey ($key),$rndkey0 + $movkey 32($key),$rndkey0 + lea 32($key,$rounds),$key + neg %rax # $rounds + .byte 0x0f,0x1f,0x00 + add \$16,%rax .L${dir}_loop4: aes${dir} $rndkey1,$inout0 aes${dir} $rndkey1,$inout1 - dec $rounds aes${dir} $rndkey1,$inout2 aes${dir} $rndkey1,$inout3 - $movkey 16($key),$rndkey1 + $movkey ($key,%rax),$rndkey1 + add \$32,%rax aes${dir} $rndkey0,$inout0 aes${dir} $rndkey0,$inout1 - lea 32($key),$key aes${dir} $rndkey0,$inout2 aes${dir} $rndkey0,$inout3 - $movkey ($key),$rndkey0 + $movkey -16($key,%rax),$rndkey0 jnz .L${dir}_loop4 aes${dir} $rndkey1,$inout0 @@ -374,43 +432,43 @@ sub aesni_generate6 { .align 16 _aesni_${dir}rypt6: $movkey ($key),$rndkey0 - shr \$1,$rounds + shl \$4,$rounds $movkey 16($key),$rndkey1 - lea 32($key),$key xorps $rndkey0,$inout0 pxor $rndkey0,$inout1 - aes${dir} $rndkey1,$inout0 pxor $rndkey0,$inout2 + aes${dir} $rndkey1,$inout0 + lea 32($key,$rounds),$key + neg %rax # $rounds aes${dir} $rndkey1,$inout1 pxor $rndkey0,$inout3 - aes${dir} $rndkey1,$inout2 pxor $rndkey0,$inout4 - aes${dir} $rndkey1,$inout3 + aes${dir} $rndkey1,$inout2 pxor $rndkey0,$inout5 - dec $rounds + add \$16,%rax + aes${dir} $rndkey1,$inout3 aes${dir} $rndkey1,$inout4 - $movkey ($key),$rndkey0 aes${dir} $rndkey1,$inout5 + $movkey -16($key,%rax),$rndkey0 jmp .L${dir}_loop6_enter .align 16 .L${dir}_loop6: aes${dir} $rndkey1,$inout0 aes${dir} $rndkey1,$inout1 - dec $rounds aes${dir} $rndkey1,$inout2 aes${dir} $rndkey1,$inout3 aes${dir} $rndkey1,$inout4 aes${dir} $rndkey1,$inout5 -.L${dir}_loop6_enter: # happens to be 16-byte aligned - $movkey 16($key),$rndkey1 +.L${dir}_loop6_enter: + $movkey ($key,%rax),$rndkey1 + add \$32,%rax aes${dir} $rndkey0,$inout0 aes${dir} $rndkey0,$inout1 - lea 32($key),$key aes${dir} $rndkey0,$inout2 aes${dir} $rndkey0,$inout3 aes${dir} $rndkey0,$inout4 aes${dir} $rndkey0,$inout5 - $movkey ($key),$rndkey0 + $movkey -16($key,%rax),$rndkey0 jnz .L${dir}_loop6 aes${dir} $rndkey1,$inout0 @@ -438,52 +496,51 @@ sub aesni_generate8 { .align 16 _aesni_${dir}rypt8: $movkey ($key),$rndkey0 - shr \$1,$rounds + shl \$4,$rounds $movkey 16($key),$rndkey1 - lea 32($key),$key xorps $rndkey0,$inout0 xorps $rndkey0,$inout1 - aes${dir} $rndkey1,$inout0 pxor $rndkey0,$inout2 - aes${dir} $rndkey1,$inout1 pxor $rndkey0,$inout3 - aes${dir} $rndkey1,$inout2 pxor $rndkey0,$inout4 - aes${dir} $rndkey1,$inout3 + lea 32($key,$rounds),$key + neg %rax # $rounds + aes${dir} $rndkey1,$inout0 + add \$16,%rax pxor $rndkey0,$inout5 - dec $rounds - aes${dir} $rndkey1,$inout4 + aes${dir} $rndkey1,$inout1 pxor $rndkey0,$inout6 - aes${dir} $rndkey1,$inout5 pxor $rndkey0,$inout7 - $movkey ($key),$rndkey0 + aes${dir} $rndkey1,$inout2 + aes${dir} $rndkey1,$inout3 + aes${dir} $rndkey1,$inout4 + aes${dir} $rndkey1,$inout5 aes${dir} $rndkey1,$inout6 aes${dir} $rndkey1,$inout7 - $movkey 16($key),$rndkey1 + $movkey -16($key,%rax),$rndkey0 jmp .L${dir}_loop8_enter .align 16 .L${dir}_loop8: aes${dir} $rndkey1,$inout0 aes${dir} $rndkey1,$inout1 - dec $rounds aes${dir} $rndkey1,$inout2 aes${dir} $rndkey1,$inout3 aes${dir} $rndkey1,$inout4 aes${dir} $rndkey1,$inout5 aes${dir} $rndkey1,$inout6 aes${dir} $rndkey1,$inout7 - $movkey 16($key),$rndkey1 -.L${dir}_loop8_enter: # happens to be 16-byte aligned +.L${dir}_loop8_enter: + $movkey ($key,%rax),$rndkey1 + add \$32,%rax aes${dir} $rndkey0,$inout0 aes${dir} $rndkey0,$inout1 - lea 32($key),$key aes${dir} $rndkey0,$inout2 aes${dir} $rndkey0,$inout3 aes${dir} $rndkey0,$inout4 aes${dir} $rndkey0,$inout5 aes${dir} $rndkey0,$inout6 aes${dir} $rndkey0,$inout7 - $movkey ($key),$rndkey0 + $movkey -16($key,%rax),$rndkey0 jnz .L${dir}_loop8 aes${dir} $rndkey1,$inout0 @@ -506,6 +563,8 @@ sub aesni_generate8 { .size _aesni_${dir}rypt8,.-_aesni_${dir}rypt8 ___ } +&aesni_generate2("enc") if ($PREFIX eq "aesni"); +&aesni_generate2("dec"); &aesni_generate3("enc") if ($PREFIX eq "aesni"); &aesni_generate3("dec"); &aesni_generate4("enc") if ($PREFIX eq "aesni"); @@ -637,8 +696,7 @@ sub aesni_generate8 { jmp .Lecb_ret .align 16 .Lecb_enc_two: - xorps $inout2,$inout2 - call _aesni_encrypt3 + call _aesni_encrypt2 movups $inout0,($out) movups $inout1,0x10($out) jmp .Lecb_ret @@ -774,8 +832,7 @@ sub aesni_generate8 { jmp .Lecb_ret .align 16 .Lecb_dec_two: - xorps $inout2,$inout2 - call _aesni_decrypt3 + call _aesni_decrypt2 movups $inout0,($out) movups $inout1,0x10($out) jmp .Lecb_ret @@ -842,7 +899,8 @@ sub aesni_generate8 { { my $cmac="%r9"; # 6th argument -my $increment="%xmm6"; +my $increment="%xmm9"; +my $iv="%xmm6"; my $bswap_mask="%xmm7"; $code.=<<___; @@ -865,49 +923,49 @@ sub aesni_generate8 { movdqa .Lincrement64(%rip),$increment movdqa .Lbswap_mask(%rip),$bswap_mask - shr \$1,$rounds + shl \$4,$rounds + mov \$16,$rnds_ lea 0($key),$key_ movdqu ($cmac),$inout1 movdqa $iv,$inout0 - mov $rounds,$rnds_ + lea 32($key,$rounds),$key # end of key schedule pshufb $bswap_mask,$iv + sub %rax,%r10 # twisted $rounds jmp .Lccm64_enc_outer .align 16 .Lccm64_enc_outer: $movkey ($key_),$rndkey0 - mov $rnds_,$rounds + mov %r10,%rax movups ($inp),$in0 # load inp xorps $rndkey0,$inout0 # counter $movkey 16($key_),$rndkey1 xorps $in0,$rndkey0 - lea 32($key_),$key xorps $rndkey0,$inout1 # cmac^=inp - $movkey ($key),$rndkey0 + $movkey 32($key_),$rndkey0 .Lccm64_enc2_loop: aesenc $rndkey1,$inout0 - dec $rounds aesenc $rndkey1,$inout1 - $movkey 16($key),$rndkey1 + $movkey ($key,%rax),$rndkey1 + add \$32,%rax aesenc $rndkey0,$inout0 - lea 32($key),$key aesenc $rndkey0,$inout1 - $movkey 0($key),$rndkey0 + $movkey -16($key,%rax),$rndkey0 jnz .Lccm64_enc2_loop aesenc $rndkey1,$inout0 aesenc $rndkey1,$inout1 paddq $increment,$iv + dec $len aesenclast $rndkey0,$inout0 aesenclast $rndkey0,$inout1 - dec $len lea 16($inp),$inp xorps $inout0,$in0 # inp ^= E(iv) movdqa $iv,$inout0 movups $in0,($out) # save output - lea 16($out),$out pshufb $bswap_mask,$inout0 + lea 16($out),$out jnz .Lccm64_enc_outer movups $inout1,($cmac) @@ -953,15 +1011,19 @@ sub aesni_generate8 { ___ &aesni_generate1("enc",$key,$rounds); $code.=<<___; + shl \$4,$rnds_ + mov \$16,$rounds movups ($inp),$in0 # load inp paddq $increment,$iv lea 16($inp),$inp + sub %r10,%rax # twisted $rounds + lea 32($key_,$rnds_),$key # end of key schedule + mov %rax,%r10 jmp .Lccm64_dec_outer .align 16 .Lccm64_dec_outer: xorps $inout0,$in0 # inp ^= E(iv) movdqa $iv,$inout0 - mov $rnds_,$rounds movups $in0,($out) # save output lea 16($out),$out pshufb $bswap_mask,$inout0 @@ -970,36 +1032,36 @@ sub aesni_generate8 { jz .Lccm64_dec_break $movkey ($key_),$rndkey0 - shr \$1,$rounds + mov %r10,%rax $movkey 16($key_),$rndkey1 xorps $rndkey0,$in0 - lea 32($key_),$key xorps $rndkey0,$inout0 xorps $in0,$inout1 # cmac^=out - $movkey ($key),$rndkey0 - + $movkey 32($key_),$rndkey0 + jmp .Lccm64_dec2_loop +.align 16 .Lccm64_dec2_loop: aesenc $rndkey1,$inout0 - dec $rounds aesenc $rndkey1,$inout1 - $movkey 16($key),$rndkey1 + $movkey ($key,%rax),$rndkey1 + add \$32,%rax aesenc $rndkey0,$inout0 - lea 32($key),$key aesenc $rndkey0,$inout1 - $movkey 0($key),$rndkey0 + $movkey -16($key,%rax),$rndkey0 jnz .Lccm64_dec2_loop movups ($inp),$in0 # load inp paddq $increment,$iv aesenc $rndkey1,$inout0 aesenc $rndkey1,$inout1 - lea 16($inp),$inp aesenclast $rndkey0,$inout0 aesenclast $rndkey0,$inout1 + lea 16($inp),$inp jmp .Lccm64_dec_outer .align 16 .Lccm64_dec_break: #xorps $in0,$inout1 # cmac^=out + mov 240($key_),$rounds ___ &aesni_generate1("enc",$key_,$rounds,$inout1,$in0); $code.=<<___; @@ -1024,220 +1086,479 @@ sub aesni_generate8 { # const char *ivec); # # Handles only complete blocks, operates on 32-bit counter and -# does not update *ivec! (see engine/eng_aesni.c for details) +# does not update *ivec! (see crypto/modes/ctr128.c for details) # +# Overhaul based on suggestions from Shay Gueron and Vlad Krasnov, +# http://rt.openssl.org/Ticket/Display.html?id=3021&user=guest&pass=guest. +# Keywords are full unroll and modulo-schedule counter calculations +# with zero-round key xor. { -my $reserved = $win64?0:-0x28; -my ($in0,$in1,$in2,$in3)=map("%xmm$_",(8..11)); -my ($iv0,$iv1,$ivec)=("%xmm12","%xmm13","%xmm14"); -my $bswap_mask="%xmm15"; +my ($in0,$in1,$in2,$in3,$in4,$in5)=map("%xmm$_",(10..15)); +my ($key0,$ctr)=("${key_}d","${ivp}d"); +my $frame_size = 0x80 + ($win64?160:0); $code.=<<___; .globl aesni_ctr32_encrypt_blocks .type aesni_ctr32_encrypt_blocks,\@function,5 .align 16 aesni_ctr32_encrypt_blocks: + lea (%rsp),%rax + push %rbp + sub \$$frame_size,%rsp + and \$-16,%rsp # Linux kernel stack can be incorrectly seeded ___ $code.=<<___ if ($win64); - lea -0xc8(%rsp),%rsp - movaps %xmm6,0x20(%rsp) - movaps %xmm7,0x30(%rsp) - movaps %xmm8,0x40(%rsp) - movaps %xmm9,0x50(%rsp) - movaps %xmm10,0x60(%rsp) - movaps %xmm11,0x70(%rsp) - movaps %xmm12,0x80(%rsp) - movaps %xmm13,0x90(%rsp) - movaps %xmm14,0xa0(%rsp) - movaps %xmm15,0xb0(%rsp) + movaps %xmm6,-0xa8(%rax) + movaps %xmm7,-0x98(%rax) + movaps %xmm8,-0x88(%rax) + movaps %xmm9,-0x78(%rax) + movaps %xmm10,-0x68(%rax) + movaps %xmm11,-0x58(%rax) + movaps %xmm12,-0x48(%rax) + movaps %xmm13,-0x38(%rax) + movaps %xmm14,-0x28(%rax) + movaps %xmm15,-0x18(%rax) .Lctr32_body: ___ $code.=<<___; + lea -8(%rax),%rbp + cmp \$1,$len je .Lctr32_one_shortcut - movdqu ($ivp),$ivec - movdqa .Lbswap_mask(%rip),$bswap_mask - xor $rounds,$rounds - pextrd \$3,$ivec,$rnds_ # pull 32-bit counter - pinsrd \$3,$rounds,$ivec # wipe 32-bit counter - + movdqu ($ivp),$inout0 + movdqu ($key),$rndkey0 + mov 12($ivp),$ctr # counter LSB + pxor $rndkey0,$inout0 + mov 12($key),$key0 # 0-round key LSB + movdqa $inout0,0x00(%rsp) # populate counter block + bswap $ctr + movdqa $inout0,$inout1 + movdqa $inout0,$inout2 + movdqa $inout0,$inout3 + movdqa $inout0,0x40(%rsp) + movdqa $inout0,0x50(%rsp) + movdqa $inout0,0x60(%rsp) + mov %rdx,%r10 # borrow %rdx + movdqa $inout0,0x70(%rsp) + + lea 1($ctr),%rax + lea 2($ctr),%rdx + bswap %eax + bswap %edx + xor $key0,%eax + xor $key0,%edx + pinsrd \$3,%eax,$inout1 + lea 3($ctr),%rax + movdqa $inout1,0x10(%rsp) + pinsrd \$3,%edx,$inout2 + bswap %eax + mov %r10,%rdx # restore %rdx + lea 4($ctr),%r10 + movdqa $inout2,0x20(%rsp) + xor $key0,%eax + bswap %r10d + pinsrd \$3,%eax,$inout3 + xor $key0,%r10d + movdqa $inout3,0x30(%rsp) + lea 5($ctr),%r9 + mov %r10d,0x40+12(%rsp) + bswap %r9d + lea 6($ctr),%r10 mov 240($key),$rounds # key->rounds - bswap $rnds_ - pxor $iv0,$iv0 # vector of 3 32-bit counters - pxor $iv1,$iv1 # vector of 3 32-bit counters - pinsrd \$0,$rnds_,$iv0 - lea 3($rnds_),$key_ - pinsrd \$0,$key_,$iv1 - inc $rnds_ - pinsrd \$1,$rnds_,$iv0 - inc $key_ - pinsrd \$1,$key_,$iv1 - inc $rnds_ - pinsrd \$2,$rnds_,$iv0 - inc $key_ - pinsrd \$2,$key_,$iv1 - movdqa $iv0,$reserved(%rsp) - pshufb $bswap_mask,$iv0 - movdqa $iv1,`$reserved+0x10`(%rsp) - pshufb $bswap_mask,$iv1 - - pshufd \$`3<<6`,$iv0,$inout0 # place counter to upper dword - pshufd \$`2<<6`,$iv0,$inout1 - pshufd \$`1<<6`,$iv0,$inout2 - cmp \$6,$len + xor $key0,%r9d + bswap %r10d + mov %r9d,0x50+12(%rsp) + xor $key0,%r10d + lea 7($ctr),%r9 + mov %r10d,0x60+12(%rsp) + bswap %r9d + mov OPENSSL_ia32cap_P+4(%rip),%r10d + xor $key0,%r9d + and \$`1<<26|1<<22`,%r10d # isolate XSAVE+MOVBE + mov %r9d,0x70+12(%rsp) + + $movkey 0x10($key),$rndkey1 + + movdqa 0x40(%rsp),$inout4 + movdqa 0x50(%rsp),$inout5 + + cmp \$8,$len jb .Lctr32_tail - shr \$1,$rounds - mov $key,$key_ # backup $key - mov $rounds,$rnds_ # backup $rounds + sub \$6,$len + cmp \$`1<<22`,%r10d # check for MOVBE without XSAVE + je .Lctr32_6x + + lea 0x80($key),$key # size optimization + sub \$2,$len + jmp .Lctr32_loop8 + +.align 16 +.Lctr32_6x: + shl \$4,$rounds + mov \$48,$rnds_ + bswap $key0 + lea 32($key,$rounds),$key # end of key schedule + sub %rax,%r10 # twisted $rounds jmp .Lctr32_loop6 .align 16 .Lctr32_loop6: - pshufd \$`3<<6`,$iv1,$inout3 - por $ivec,$inout0 # merge counter-less ivec - $movkey ($key_),$rndkey0 - pshufd \$`2<<6`,$iv1,$inout4 - por $ivec,$inout1 - $movkey 16($key_),$rndkey1 - pshufd \$`1<<6`,$iv1,$inout5 - por $ivec,$inout2 - por $ivec,$inout3 - xorps $rndkey0,$inout0 - por $ivec,$inout4 - por $ivec,$inout5 - - # inline _aesni_encrypt6 and interleave last rounds - # with own code... + add \$6,$ctr + $movkey -48($key,$rnds_),$rndkey0 + aesenc $rndkey1,$inout0 + mov $ctr,%eax + xor $key0,%eax + aesenc $rndkey1,$inout1 + movbe %eax,`0x00+12`(%rsp) + lea 1($ctr),%eax + aesenc $rndkey1,$inout2 + xor $key0,%eax + movbe %eax,`0x10+12`(%rsp) + aesenc $rndkey1,$inout3 + lea 2($ctr),%eax + xor $key0,%eax + aesenc $rndkey1,$inout4 + movbe %eax,`0x20+12`(%rsp) + lea 3($ctr),%eax + aesenc $rndkey1,$inout5 + $movkey -32($key,$rnds_),$rndkey1 + xor $key0,%eax - pxor $rndkey0,$inout1 + aesenc $rndkey0,$inout0 + movbe %eax,`0x30+12`(%rsp) + lea 4($ctr),%eax + aesenc $rndkey0,$inout1 + xor $key0,%eax + movbe %eax,`0x40+12`(%rsp) + aesenc $rndkey0,$inout2 + lea 5($ctr),%eax + xor $key0,%eax + aesenc $rndkey0,$inout3 + movbe %eax,`0x50+12`(%rsp) + mov %r10,%rax # mov $rnds_,$rounds + aesenc $rndkey0,$inout4 + aesenc $rndkey0,$inout5 + $movkey -16($key,$rnds_),$rndkey0 + + call .Lenc_loop6 + + movdqu ($inp),$inout6 + movdqu 0x10($inp),$inout7 + movdqu 0x20($inp),$in0 + movdqu 0x30($inp),$in1 + movdqu 0x40($inp),$in2 + movdqu 0x50($inp),$in3 + lea 0x60($inp),$inp + $movkey -64($key,$rnds_),$rndkey1 + pxor $inout0,$inout6 + movaps 0x00(%rsp),$inout0 + pxor $inout1,$inout7 + movaps 0x10(%rsp),$inout1 + pxor $inout2,$in0 + movaps 0x20(%rsp),$inout2 + pxor $inout3,$in1 + movaps 0x30(%rsp),$inout3 + pxor $inout4,$in2 + movaps 0x40(%rsp),$inout4 + pxor $inout5,$in3 + movaps 0x50(%rsp),$inout5 + movdqu $inout6,($out) + movdqu $inout7,0x10($out) + movdqu $in0,0x20($out) + movdqu $in1,0x30($out) + movdqu $in2,0x40($out) + movdqu $in3,0x50($out) + lea 0x60($out),$out + + sub \$6,$len + jnc .Lctr32_loop6 + + add \$6,$len + jz .Lctr32_done + + lea -48($rnds_),$rounds + lea -80($key,$rnds_),$key # restore $key + neg $rounds + shr \$4,$rounds # restore $rounds + jmp .Lctr32_tail + +.align 32 +.Lctr32_loop8: + add \$8,$ctr + movdqa 0x60(%rsp),$inout6 aesenc $rndkey1,$inout0 - lea 32($key_),$key - pxor $rndkey0,$inout2 + mov $ctr,%r9d + movdqa 0x70(%rsp),$inout7 aesenc $rndkey1,$inout1 - movdqa .Lincrement32(%rip),$iv1 - pxor $rndkey0,$inout3 + bswap %r9d + $movkey 0x20-0x80($key),$rndkey0 aesenc $rndkey1,$inout2 - movdqa $reserved(%rsp),$iv0 - pxor $rndkey0,$inout4 + xor $key0,%r9d + nop aesenc $rndkey1,$inout3 - pxor $rndkey0,$inout5 - $movkey ($key),$rndkey0 - dec $rounds + mov %r9d,0x00+12(%rsp) + lea 1($ctr),%r9 aesenc $rndkey1,$inout4 aesenc $rndkey1,$inout5 - jmp .Lctr32_enc_loop6_enter -.align 16 -.Lctr32_enc_loop6: + aesenc $rndkey1,$inout6 + aesenc $rndkey1,$inout7 + $movkey 0x30-0x80($key),$rndkey1 +___ +for($i=2;$i<8;$i++) { +my $rndkeyx = ($i&1)?$rndkey1:$rndkey0; +$code.=<<___; + bswap %r9d + aesenc $rndkeyx,$inout0 + aesenc $rndkeyx,$inout1 + xor $key0,%r9d + .byte 0x66,0x90 + aesenc $rndkeyx,$inout2 + aesenc $rndkeyx,$inout3 + mov %r9d,`0x10*($i-1)`+12(%rsp) + lea $i($ctr),%r9 + aesenc $rndkeyx,$inout4 + aesenc $rndkeyx,$inout5 + aesenc $rndkeyx,$inout6 + aesenc $rndkeyx,$inout7 + $movkey `0x20+0x10*$i`-0x80($key),$rndkeyx +___ +} +$code.=<<___; + bswap %r9d + aesenc $rndkey0,$inout0 + aesenc $rndkey0,$inout1 + aesenc $rndkey0,$inout2 + xor $key0,%r9d + movdqu 0x00($inp),$in0 + aesenc $rndkey0,$inout3 + mov %r9d,0x70+12(%rsp) + cmp \$11,$rounds + aesenc $rndkey0,$inout4 + aesenc $rndkey0,$inout5 + aesenc $rndkey0,$inout6 + aesenc $rndkey0,$inout7 + $movkey 0xa0-0x80($key),$rndkey0 + + jb .Lctr32_enc_done + aesenc $rndkey1,$inout0 aesenc $rndkey1,$inout1 - dec $rounds aesenc $rndkey1,$inout2 aesenc $rndkey1,$inout3 aesenc $rndkey1,$inout4 aesenc $rndkey1,$inout5 -.Lctr32_enc_loop6_enter: - $movkey 16($key),$rndkey1 + aesenc $rndkey1,$inout6 + aesenc $rndkey1,$inout7 + $movkey 0xb0-0x80($key),$rndkey1 + aesenc $rndkey0,$inout0 aesenc $rndkey0,$inout1 - lea 32($key),$key aesenc $rndkey0,$inout2 aesenc $rndkey0,$inout3 aesenc $rndkey0,$inout4 aesenc $rndkey0,$inout5 - $movkey ($key),$rndkey0 - jnz .Lctr32_enc_loop6 + aesenc $rndkey0,$inout6 + aesenc $rndkey0,$inout7 + $movkey 0xc0-0x80($key),$rndkey0 + je .Lctr32_enc_done aesenc $rndkey1,$inout0 - paddd $iv1,$iv0 # increment counter vector aesenc $rndkey1,$inout1 - paddd `$reserved+0x10`(%rsp),$iv1 aesenc $rndkey1,$inout2 - movdqa $iv0,$reserved(%rsp) # save counter vector aesenc $rndkey1,$inout3 - movdqa $iv1,`$reserved+0x10`(%rsp) aesenc $rndkey1,$inout4 - pshufb $bswap_mask,$iv0 # byte swap aesenc $rndkey1,$inout5 - pshufb $bswap_mask,$iv1 + aesenc $rndkey1,$inout6 + aesenc $rndkey1,$inout7 + $movkey 0xd0-0x80($key),$rndkey1 - aesenclast $rndkey0,$inout0 - movups ($inp),$in0 # load input - aesenclast $rndkey0,$inout1 + aesenc $rndkey0,$inout0 + aesenc $rndkey0,$inout1 + aesenc $rndkey0,$inout2 + aesenc $rndkey0,$inout3 + aesenc $rndkey0,$inout4 + aesenc $rndkey0,$inout5 + aesenc $rndkey0,$inout6 + aesenc $rndkey0,$inout7 + $movkey 0xe0-0x80($key),$rndkey0 + jmp .Lctr32_enc_done + +.align 16 +.Lctr32_enc_done: + movdqu 0x10($inp),$in1 + pxor $rndkey0,$in0 + movdqu 0x20($inp),$in2 + pxor $rndkey0,$in1 + movdqu 0x30($inp),$in3 + pxor $rndkey0,$in2 + movdqu 0x40($inp),$in4 + pxor $rndkey0,$in3 + movdqu 0x50($inp),$in5 + pxor $rndkey0,$in4 + pxor $rndkey0,$in5 + aesenc $rndkey1,$inout0 + aesenc $rndkey1,$inout1 + aesenc $rndkey1,$inout2 + aesenc $rndkey1,$inout3 + aesenc $rndkey1,$inout4 + aesenc $rndkey1,$inout5 + aesenc $rndkey1,$inout6 + aesenc $rndkey1,$inout7 + movdqu 0x60($inp),$rndkey1 + lea 0x80($inp),$inp + + aesenclast $in0,$inout0 + pxor $rndkey0,$rndkey1 + movdqu 0x70-0x80($inp),$in0 + aesenclast $in1,$inout1 + pxor $rndkey0,$in0 + movdqa 0x00(%rsp),$in1 # load next counter block + aesenclast $in2,$inout2 + aesenclast $in3,$inout3 + movdqa 0x10(%rsp),$in2 + movdqa 0x20(%rsp),$in3 + aesenclast $in4,$inout4 + aesenclast $in5,$inout5 + movdqa 0x30(%rsp),$in4 + movdqa 0x40(%rsp),$in5 + aesenclast $rndkey1,$inout6 + movdqa 0x50(%rsp),$rndkey0 + $movkey 0x10-0x80($key),$rndkey1 + aesenclast $in0,$inout7 + + movups $inout0,($out) # store output + movdqa $in1,$inout0 + movups $inout1,0x10($out) + movdqa $in2,$inout1 + movups $inout2,0x20($out) + movdqa $in3,$inout2 + movups $inout3,0x30($out) + movdqa $in4,$inout3 + movups $inout4,0x40($out) + movdqa $in5,$inout4 + movups $inout5,0x50($out) + movdqa $rndkey0,$inout5 + movups $inout6,0x60($out) + movups $inout7,0x70($out) + lea 0x80($out),$out + + sub \$8,$len + jnc .Lctr32_loop8 + + add \$8,$len + jz .Lctr32_done + lea -0x80($key),$key + +.Lctr32_tail: + lea 16($key),$key + cmp \$4,$len + jb .Lctr32_loop3 + je .Lctr32_loop4 + + shl \$4,$rounds + movdqa 0x60(%rsp),$inout6 + pxor $inout7,$inout7 + + $movkey 16($key),$rndkey0 + aesenc $rndkey1,$inout0 + aesenc $rndkey1,$inout1 + lea 32-16($key,$rounds),$key + neg %rax + aesenc $rndkey1,$inout2 + add \$16,%rax + movups ($inp),$in0 + aesenc $rndkey1,$inout3 + aesenc $rndkey1,$inout4 + movups 0x10($inp),$in1 + movups 0x20($inp),$in2 + aesenc $rndkey1,$inout5 + aesenc $rndkey1,$inout6 + + call .Lenc_loop8_enter + + movdqu 0x30($inp),$in3 + pxor $in0,$inout0 + movdqu 0x40($inp),$in0 + pxor $in1,$inout1 + movdqu $inout0,($out) + pxor $in2,$inout2 + movdqu $inout1,0x10($out) + pxor $in3,$inout3 + movdqu $inout2,0x20($out) + pxor $in0,$inout4 + movdqu $inout3,0x30($out) + movdqu $inout4,0x40($out) + cmp \$6,$len + jb .Lctr32_done + + movups 0x50($inp),$in1 + xorps $in1,$inout5 + movups $inout5,0x50($out) + je .Lctr32_done + + movups 0x60($inp),$in2 + xorps $in2,$inout6 + movups $inout6,0x60($out) + jmp .Lctr32_done + +.align 32 +.Lctr32_loop4: + aesenc $rndkey1,$inout0 + lea 16($key),$key + dec $rounds + aesenc $rndkey1,$inout1 + aesenc $rndkey1,$inout2 + aesenc $rndkey1,$inout3 + $movkey ($key),$rndkey1 + jnz .Lctr32_loop4 + aesenclast $rndkey1,$inout0 + aesenclast $rndkey1,$inout1 + movups ($inp),$in0 movups 0x10($inp),$in1 - aesenclast $rndkey0,$inout2 + aesenclast $rndkey1,$inout2 + aesenclast $rndkey1,$inout3 movups 0x20($inp),$in2 - aesenclast $rndkey0,$inout3 movups 0x30($inp),$in3 - aesenclast $rndkey0,$inout4 - movups 0x40($inp),$rndkey1 - aesenclast $rndkey0,$inout5 - movups 0x50($inp),$rndkey0 - lea 0x60($inp),$inp - - xorps $inout0,$in0 # xor - pshufd \$`3<<6`,$iv0,$inout0 - xorps $inout1,$in1 - pshufd \$`2<<6`,$iv0,$inout1 - movups $in0,($out) # store output - xorps $inout2,$in2 - pshufd \$`1<<6`,$iv0,$inout2 - movups $in1,0x10($out) - xorps $inout3,$in3 - movups $in2,0x20($out) - xorps $inout4,$rndkey1 - movups $in3,0x30($out) - xorps $inout5,$rndkey0 - movups $rndkey1,0x40($out) - movups $rndkey0,0x50($out) - lea 0x60($out),$out - mov $rnds_,$rounds - sub \$6,$len - jnc .Lctr32_loop6 - add \$6,$len - jz .Lctr32_done - mov $key_,$key # restore $key - lea 1($rounds,$rounds),$rounds # restore original value + xorps $in0,$inout0 + movups $inout0,($out) + xorps $in1,$inout1 + movups $inout1,0x10($out) + pxor $in2,$inout2 + movdqu $inout2,0x20($out) + pxor $in3,$inout3 + movdqu $inout3,0x30($out) + jmp .Lctr32_done + +.align 32 +.Lctr32_loop3: + aesenc $rndkey1,$inout0 + lea 16($key),$key + dec $rounds + aesenc $rndkey1,$inout1 + aesenc $rndkey1,$inout2 + $movkey ($key),$rndkey1 + jnz .Lctr32_loop3 + aesenclast $rndkey1,$inout0 + aesenclast $rndkey1,$inout1 + aesenclast $rndkey1,$inout2 -.Lctr32_tail: - por $ivec,$inout0 movups ($inp),$in0 + xorps $in0,$inout0 + movups $inout0,($out) cmp \$2,$len - jb .Lctr32_one + jb .Lctr32_done - por $ivec,$inout1 movups 0x10($inp),$in1 - je .Lctr32_two + xorps $in1,$inout1 + movups $inout1,0x10($out) + je .Lctr32_done - pshufd \$`3<<6`,$iv1,$inout3 - por $ivec,$inout2 movups 0x20($inp),$in2 - cmp \$4,$len - jb .Lctr32_three - - pshufd \$`2<<6`,$iv1,$inout4 - por $ivec,$inout3 - movups 0x30($inp),$in3 - je .Lctr32_four - - por $ivec,$inout4 - xorps $inout5,$inout5 - - call _aesni_encrypt6 - - movups 0x40($inp),$rndkey1 - xorps $inout0,$in0 - xorps $inout1,$in1 - movups $in0,($out) - xorps $inout2,$in2 - movups $in1,0x10($out) - xorps $inout3,$in3 - movups $in2,0x20($out) - xorps $inout4,$rndkey1 - movups $in3,0x30($out) - movups $rndkey1,0x40($out) + xorps $in2,$inout2 + movups $inout2,0x20($out) jmp .Lctr32_done .align 16 @@ -1245,64 +1566,32 @@ sub aesni_generate8 { movups ($ivp),$inout0 movups ($inp),$in0 mov 240($key),$rounds # key->rounds -.Lctr32_one: ___ &aesni_generate1("enc",$key,$rounds); $code.=<<___; - xorps $inout0,$in0 - movups $in0,($out) - jmp .Lctr32_done - -.align 16 -.Lctr32_two: - xorps $inout2,$inout2 - call _aesni_encrypt3 - xorps $inout0,$in0 - xorps $inout1,$in1 - movups $in0,($out) - movups $in1,0x10($out) - jmp .Lctr32_done - -.align 16 -.Lctr32_three: - call _aesni_encrypt3 - xorps $inout0,$in0 - xorps $inout1,$in1 - movups $in0,($out) - xorps $inout2,$in2 - movups $in1,0x10($out) - movups $in2,0x20($out) + xorps $in0,$inout0 + movups $inout0,($out) jmp .Lctr32_done .align 16 -.Lctr32_four: - call _aesni_encrypt4 - xorps $inout0,$in0 - xorps $inout1,$in1 - movups $in0,($out) - xorps $inout2,$in2 - movups $in1,0x10($out) - xorps $inout3,$in3 - movups $in2,0x20($out) - movups $in3,0x30($out) - .Lctr32_done: ___ $code.=<<___ if ($win64); - movaps 0x20(%rsp),%xmm6 - movaps 0x30(%rsp),%xmm7 - movaps 0x40(%rsp),%xmm8 - movaps 0x50(%rsp),%xmm9 - movaps 0x60(%rsp),%xmm10 - movaps 0x70(%rsp),%xmm11 - movaps 0x80(%rsp),%xmm12 - movaps 0x90(%rsp),%xmm13 - movaps 0xa0(%rsp),%xmm14 - movaps 0xb0(%rsp),%xmm15 - lea 0xc8(%rsp),%rsp -.Lctr32_ret: + movaps -0xa0(%rbp),%xmm6 + movaps -0x90(%rbp),%xmm7 + movaps -0x80(%rbp),%xmm8 + movaps -0x70(%rbp),%xmm9 + movaps -0x60(%rbp),%xmm10 + movaps -0x50(%rbp),%xmm11 + movaps -0x40(%rbp),%xmm12 + movaps -0x30(%rbp),%xmm13 + movaps -0x20(%rbp),%xmm14 + movaps -0x10(%rbp),%xmm15 ___ $code.=<<___; + lea (%rbp),%rsp + pop %rbp +.Lctr32_epilogue: ret .size aesni_ctr32_encrypt_blocks,.-aesni_ctr32_encrypt_blocks ___ @@ -1317,243 +1606,286 @@ sub aesni_generate8 { my @tweak=map("%xmm$_",(10..15)); my ($twmask,$twres,$twtmp)=("%xmm8","%xmm9",@tweak[4]); my ($key2,$ivp,$len_)=("%r8","%r9","%r9"); -my $frame_size = 0x68 + ($win64?160:0); +my $frame_size = 0x70 + ($win64?160:0); $code.=<<___; .globl aesni_xts_encrypt .type aesni_xts_encrypt,\@function,6 .align 16 aesni_xts_encrypt: - lea -$frame_size(%rsp),%rsp + lea (%rsp),%rax + push %rbp + sub \$$frame_size,%rsp + and \$-16,%rsp # Linux kernel stack can be incorrectly seeded ___ $code.=<<___ if ($win64); - movaps %xmm6,0x60(%rsp) - movaps %xmm7,0x70(%rsp) - movaps %xmm8,0x80(%rsp) - movaps %xmm9,0x90(%rsp) - movaps %xmm10,0xa0(%rsp) - movaps %xmm11,0xb0(%rsp) - movaps %xmm12,0xc0(%rsp) - movaps %xmm13,0xd0(%rsp) - movaps %xmm14,0xe0(%rsp) - movaps %xmm15,0xf0(%rsp) + movaps %xmm6,-0xa8(%rax) + movaps %xmm7,-0x98(%rax) + movaps %xmm8,-0x88(%rax) + movaps %xmm9,-0x78(%rax) + movaps %xmm10,-0x68(%rax) + movaps %xmm11,-0x58(%rax) + movaps %xmm12,-0x48(%rax) + movaps %xmm13,-0x38(%rax) + movaps %xmm14,-0x28(%rax) + movaps %xmm15,-0x18(%rax) .Lxts_enc_body: ___ $code.=<<___; - movups ($ivp),@tweak[5] # load clear-text tweak + lea -8(%rax),%rbp + movups ($ivp),$inout0 # load clear-text tweak mov 240(%r8),$rounds # key2->rounds mov 240($key),$rnds_ # key1->rounds ___ # generate the tweak - &aesni_generate1("enc",$key2,$rounds,@tweak[5]); + &aesni_generate1("enc",$key2,$rounds,$inout0); $code.=<<___; + $movkey ($key),$rndkey0 # zero round key mov $key,$key_ # backup $key mov $rnds_,$rounds # backup $rounds + shl \$4,$rnds_ mov $len,$len_ # backup $len and \$-16,$len + $movkey 16($key,$rnds_),$rndkey1 # last round key + movdqa .Lxts_magic(%rip),$twmask - pxor $twtmp,$twtmp - pcmpgtd @tweak[5],$twtmp # broadcast upper bits + movdqa $inout0,@tweak[5] + pshufd \$0x5f,$inout0,$twres + pxor $rndkey0,$rndkey1 ___ + # alternative tweak calculation algorithm is based on suggestions + # by Shay Gueron. psrad doesn't conflict with AES-NI instructions + # and should help in the future... for ($i=0;$i<4;$i++) { $code.=<<___; - pshufd \$0x13,$twtmp,$twres - pxor $twtmp,$twtmp + movdqa $twres,$twtmp + paddd $twres,$twres movdqa @tweak[5],@tweak[$i] - paddq @tweak[5],@tweak[5] # psllq 1,$tweak - pand $twmask,$twres # isolate carry and residue - pcmpgtd @tweak[5],$twtmp # broadcat upper bits - pxor $twres,@tweak[5] + psrad \$31,$twtmp # broadcast upper bits + paddq @tweak[5],@tweak[5] + pand $twmask,$twtmp + pxor $rndkey0,@tweak[$i] + pxor $twtmp,@tweak[5] ___ } $code.=<<___; + movdqa @tweak[5],@tweak[4] + psrad \$31,$twres + paddq @tweak[5],@tweak[5] + pand $twmask,$twres + pxor $rndkey0,@tweak[4] + pxor $twres,@tweak[5] + movaps $rndkey1,0x60(%rsp) # save round[0]^round[last] + sub \$16*6,$len jc .Lxts_enc_short - shr \$1,$rounds - sub \$1,$rounds - mov $rounds,$rnds_ + mov \$16+96,$rounds + lea 32($key_,$rnds_),$key # end of key schedule + sub %r10,%rax # twisted $rounds + $movkey 16($key_),$rndkey1 + mov %rax,%r10 # backup twisted $rounds + lea .Lxts_magic(%rip),%r8 jmp .Lxts_enc_grandloop -.align 16 +.align 32 .Lxts_enc_grandloop: - pshufd \$0x13,$twtmp,$twres - movdqa @tweak[5],@tweak[4] - paddq @tweak[5],@tweak[5] # psllq 1,$tweak movdqu `16*0`($inp),$inout0 # load input - pand $twmask,$twres # isolate carry and residue + movdqa $rndkey0,$twmask movdqu `16*1`($inp),$inout1 - pxor $twres,@tweak[5] - + pxor @tweak[0],$inout0 movdqu `16*2`($inp),$inout2 - pxor @tweak[0],$inout0 # input^=tweak - movdqu `16*3`($inp),$inout3 pxor @tweak[1],$inout1 - movdqu `16*4`($inp),$inout4 + aesenc $rndkey1,$inout0 + movdqu `16*3`($inp),$inout3 pxor @tweak[2],$inout2 - movdqu `16*5`($inp),$inout5 - lea `16*6`($inp),$inp + aesenc $rndkey1,$inout1 + movdqu `16*4`($inp),$inout4 pxor @tweak[3],$inout3 - $movkey ($key_),$rndkey0 + aesenc $rndkey1,$inout2 + movdqu `16*5`($inp),$inout5 + pxor @tweak[5],$twmask # round[0]^=tweak[5] + movdqa 0x60(%rsp),$twres # load round[0]^round[last] pxor @tweak[4],$inout4 - pxor @tweak[5],$inout5 + aesenc $rndkey1,$inout3 + $movkey 32($key_),$rndkey0 + lea `16*6`($inp),$inp + pxor $twmask,$inout5 - # inline _aesni_encrypt6 and interleave first and last rounds - # with own code... - $movkey 16($key_),$rndkey1 - pxor $rndkey0,$inout0 - pxor $rndkey0,$inout1 - movdqa @tweak[0],`16*0`(%rsp) # put aside tweaks - aesenc $rndkey1,$inout0 - lea 32($key_),$key - pxor $rndkey0,$inout2 + pxor $twres,@tweak[0] + aesenc $rndkey1,$inout4 + pxor $twres,@tweak[1] + movdqa @tweak[0],`16*0`(%rsp) # put aside tweaks^last round key + aesenc $rndkey1,$inout5 + $movkey 48($key_),$rndkey1 + pxor $twres,@tweak[2] + + aesenc $rndkey0,$inout0 + pxor $twres,@tweak[3] movdqa @tweak[1],`16*1`(%rsp) - aesenc $rndkey1,$inout1 - pxor $rndkey0,$inout3 + aesenc $rndkey0,$inout1 + pxor $twres,@tweak[4] movdqa @tweak[2],`16*2`(%rsp) - aesenc $rndkey1,$inout2 - pxor $rndkey0,$inout4 - movdqa @tweak[3],`16*3`(%rsp) - aesenc $rndkey1,$inout3 - pxor $rndkey0,$inout5 - $movkey ($key),$rndkey0 - dec $rounds + aesenc $rndkey0,$inout2 + aesenc $rndkey0,$inout3 + pxor $twres,$twmask movdqa @tweak[4],`16*4`(%rsp) - aesenc $rndkey1,$inout4 - movdqa @tweak[5],`16*5`(%rsp) - aesenc $rndkey1,$inout5 - pxor $twtmp,$twtmp - pcmpgtd @tweak[5],$twtmp - jmp .Lxts_enc_loop6_enter - -.align 16 + aesenc $rndkey0,$inout4 + aesenc $rndkey0,$inout5 + $movkey 64($key_),$rndkey0 + movdqa $twmask,`16*5`(%rsp) + pshufd \$0x5f,@tweak[5],$twres + jmp .Lxts_enc_loop6 +.align 32 .Lxts_enc_loop6: aesenc $rndkey1,$inout0 aesenc $rndkey1,$inout1 - dec $rounds aesenc $rndkey1,$inout2 aesenc $rndkey1,$inout3 aesenc $rndkey1,$inout4 aesenc $rndkey1,$inout5 -.Lxts_enc_loop6_enter: - $movkey 16($key),$rndkey1 + $movkey -64($key,%rax),$rndkey1 + add \$32,%rax + aesenc $rndkey0,$inout0 aesenc $rndkey0,$inout1 - lea 32($key),$key aesenc $rndkey0,$inout2 aesenc $rndkey0,$inout3 aesenc $rndkey0,$inout4 aesenc $rndkey0,$inout5 - $movkey ($key),$rndkey0 + $movkey -80($key,%rax),$rndkey0 jnz .Lxts_enc_loop6 - pshufd \$0x13,$twtmp,$twres - pxor $twtmp,$twtmp - paddq @tweak[5],@tweak[5] # psllq 1,$tweak + movdqa (%r8),$twmask + movdqa $twres,$twtmp + paddd $twres,$twres aesenc $rndkey1,$inout0 - pand $twmask,$twres # isolate carry and residue + paddq @tweak[5],@tweak[5] + psrad \$31,$twtmp aesenc $rndkey1,$inout1 - pcmpgtd @tweak[5],$twtmp # broadcast upper bits + pand $twmask,$twtmp + $movkey ($key_),@tweak[0] # load round[0] aesenc $rndkey1,$inout2 - pxor $twres,@tweak[5] aesenc $rndkey1,$inout3 aesenc $rndkey1,$inout4 + pxor $twtmp,@tweak[5] + movaps @tweak[0],@tweak[1] # copy round[0] aesenc $rndkey1,$inout5 - $movkey 16($key),$rndkey1 + $movkey -64($key),$rndkey1 - pshufd \$0x13,$twtmp,$twres - pxor $twtmp,$twtmp - movdqa @tweak[5],@tweak[0] - paddq @tweak[5],@tweak[5] # psllq 1,$tweak + movdqa $twres,$twtmp aesenc $rndkey0,$inout0 - pand $twmask,$twres # isolate carry and residue + paddd $twres,$twres + pxor @tweak[5],@tweak[0] aesenc $rndkey0,$inout1 - pcmpgtd @tweak[5],$twtmp # broadcat upper bits + psrad \$31,$twtmp + paddq @tweak[5],@tweak[5] aesenc $rndkey0,$inout2 - pxor $twres,@tweak[5] aesenc $rndkey0,$inout3 + pand $twmask,$twtmp + movaps @tweak[1],@tweak[2] aesenc $rndkey0,$inout4 + pxor $twtmp,@tweak[5] + movdqa $twres,$twtmp aesenc $rndkey0,$inout5 - $movkey 32($key),$rndkey0 + $movkey -48($key),$rndkey0 - pshufd \$0x13,$twtmp,$twres - pxor $twtmp,$twtmp - movdqa @tweak[5],@tweak[1] - paddq @tweak[5],@tweak[5] # psllq 1,$tweak + paddd $twres,$twres aesenc $rndkey1,$inout0 - pand $twmask,$twres # isolate carry and residue + pxor @tweak[5],@tweak[1] + psrad \$31,$twtmp aesenc $rndkey1,$inout1 - pcmpgtd @tweak[5],$twtmp # broadcat upper bits + paddq @tweak[5],@tweak[5] + pand $twmask,$twtmp aesenc $rndkey1,$inout2 - pxor $twres,@tweak[5] aesenc $rndkey1,$inout3 + movdqa @tweak[3],`16*3`(%rsp) + pxor $twtmp,@tweak[5] aesenc $rndkey1,$inout4 + movaps @tweak[2],@tweak[3] + movdqa $twres,$twtmp aesenc $rndkey1,$inout5 + $movkey -32($key),$rndkey1 - pshufd \$0x13,$twtmp,$twres - pxor $twtmp,$twtmp - movdqa @tweak[5],@tweak[2] - paddq @tweak[5],@tweak[5] # psllq 1,$tweak - aesenclast $rndkey0,$inout0 - pand $twmask,$twres # isolate carry and residue - aesenclast $rndkey0,$inout1 - pcmpgtd @tweak[5],$twtmp # broadcat upper bits - aesenclast $rndkey0,$inout2 - pxor $twres,@tweak[5] - aesenclast $rndkey0,$inout3 - aesenclast $rndkey0,$inout4 - aesenclast $rndkey0,$inout5 - - pshufd \$0x13,$twtmp,$twres - pxor $twtmp,$twtmp - movdqa @tweak[5],@tweak[3] - paddq @tweak[5],@tweak[5] # psllq 1,$tweak - xorps `16*0`(%rsp),$inout0 # output^=tweak - pand $twmask,$twres # isolate carry and residue - xorps `16*1`(%rsp),$inout1 - pcmpgtd @tweak[5],$twtmp # broadcat upper bits + paddd $twres,$twres + aesenc $rndkey0,$inout0 + pxor @tweak[5],@tweak[2] + psrad \$31,$twtmp + aesenc $rndkey0,$inout1 + paddq @tweak[5],@tweak[5] + pand $twmask,$twtmp + aesenc $rndkey0,$inout2 + aesenc $rndkey0,$inout3 + aesenc $rndkey0,$inout4 + pxor $twtmp,@tweak[5] + movaps @tweak[3],@tweak[4] + aesenc $rndkey0,$inout5 + + movdqa $twres,$rndkey0 + paddd $twres,$twres + aesenc $rndkey1,$inout0 + pxor @tweak[5],@tweak[3] + psrad \$31,$rndkey0 + aesenc $rndkey1,$inout1 + paddq @tweak[5],@tweak[5] + pand $twmask,$rndkey0 + aesenc $rndkey1,$inout2 + aesenc $rndkey1,$inout3 + pxor $rndkey0,@tweak[5] + $movkey ($key_),$rndkey0 + aesenc $rndkey1,$inout4 + aesenc $rndkey1,$inout5 + $movkey 16($key_),$rndkey1 + + pxor @tweak[5],@tweak[4] + aesenclast `16*0`(%rsp),$inout0 + psrad \$31,$twres + paddq @tweak[5],@tweak[5] + aesenclast `16*1`(%rsp),$inout1 + aesenclast `16*2`(%rsp),$inout2 + pand $twmask,$twres + mov %r10,%rax # restore $rounds + aesenclast `16*3`(%rsp),$inout3 + aesenclast `16*4`(%rsp),$inout4 + aesenclast `16*5`(%rsp),$inout5 pxor $twres,@tweak[5] - xorps `16*2`(%rsp),$inout2 - movups $inout0,`16*0`($out) # write output - xorps `16*3`(%rsp),$inout3 - movups $inout1,`16*1`($out) - xorps `16*4`(%rsp),$inout4 - movups $inout2,`16*2`($out) - xorps `16*5`(%rsp),$inout5 - movups $inout3,`16*3`($out) - mov $rnds_,$rounds # restore $rounds - movups $inout4,`16*4`($out) - movups $inout5,`16*5`($out) lea `16*6`($out),$out + movups $inout0,`-16*6`($out) # write output + movups $inout1,`-16*5`($out) + movups $inout2,`-16*4`($out) + movups $inout3,`-16*3`($out) + movups $inout4,`-16*2`($out) + movups $inout5,`-16*1`($out) sub \$16*6,$len jnc .Lxts_enc_grandloop - lea 3($rounds,$rounds),$rounds # restore original value + mov \$16+96,$rounds + sub $rnds_,$rounds mov $key_,$key # restore $key - mov $rounds,$rnds_ # backup $rounds + shr \$4,$rounds # restore original value .Lxts_enc_short: + mov $rounds,$rnds_ # backup $rounds + pxor $rndkey0,@tweak[0] add \$16*6,$len jz .Lxts_enc_done + pxor $rndkey0,@tweak[1] cmp \$0x20,$len jb .Lxts_enc_one + pxor $rndkey0,@tweak[2] je .Lxts_enc_two + pxor $rndkey0,@tweak[3] cmp \$0x40,$len jb .Lxts_enc_three + pxor $rndkey0,@tweak[4] je .Lxts_enc_four - pshufd \$0x13,$twtmp,$twres - movdqa @tweak[5],@tweak[4] - paddq @tweak[5],@tweak[5] # psllq 1,$tweak - movdqu ($inp),$inout0 - pand $twmask,$twres # isolate carry and residue - movdqu 16*1($inp),$inout1 - pxor $twres,@tweak[5] - + movdqu ($inp),$inout0 + movdqu 16*1($inp),$inout1 movdqu 16*2($inp),$inout2 pxor @tweak[0],$inout0 movdqu 16*3($inp),$inout3 @@ -1602,7 +1934,7 @@ sub aesni_generate8 { xorps @tweak[0],$inout0 xorps @tweak[1],$inout1 - call _aesni_encrypt3 + call _aesni_encrypt2 xorps @tweak[0],$inout0 movdqa @tweak[2],@tweak[0] @@ -1648,15 +1980,15 @@ sub aesni_generate8 { call _aesni_encrypt4 - xorps @tweak[0],$inout0 - movdqa @tweak[5],@tweak[0] - xorps @tweak[1],$inout1 - xorps @tweak[2],$inout2 - movups $inout0,($out) - xorps @tweak[3],$inout3 - movups $inout1,16*1($out) - movups $inout2,16*2($out) - movups $inout3,16*3($out) + pxor @tweak[0],$inout0 + movdqa @tweak[4],@tweak[0] + pxor @tweak[1],$inout1 + pxor @tweak[2],$inout2 + movdqu $inout0,($out) + pxor @tweak[3],$inout3 + movdqu $inout1,16*1($out) + movdqu $inout2,16*2($out) + movdqu $inout3,16*3($out) lea 16*4($out),$out jmp .Lxts_enc_done @@ -1691,19 +2023,20 @@ sub aesni_generate8 { .Lxts_enc_ret: ___ $code.=<<___ if ($win64); - movaps 0x60(%rsp),%xmm6 - movaps 0x70(%rsp),%xmm7 - movaps 0x80(%rsp),%xmm8 - movaps 0x90(%rsp),%xmm9 - movaps 0xa0(%rsp),%xmm10 - movaps 0xb0(%rsp),%xmm11 - movaps 0xc0(%rsp),%xmm12 - movaps 0xd0(%rsp),%xmm13 - movaps 0xe0(%rsp),%xmm14 - movaps 0xf0(%rsp),%xmm15 + movaps -0xa0(%rbp),%xmm6 + movaps -0x90(%rbp),%xmm7 + movaps -0x80(%rbp),%xmm8 + movaps -0x70(%rbp),%xmm9 + movaps -0x60(%rbp),%xmm10 + movaps -0x50(%rbp),%xmm11 + movaps -0x40(%rbp),%xmm12 + movaps -0x30(%rbp),%xmm13 + movaps -0x20(%rbp),%xmm14 + movaps -0x10(%rbp),%xmm15 ___ $code.=<<___; - lea $frame_size(%rsp),%rsp + lea (%rbp),%rsp + pop %rbp .Lxts_enc_epilogue: ret .size aesni_xts_encrypt,.-aesni_xts_encrypt @@ -1714,28 +2047,32 @@ sub aesni_generate8 { .type aesni_xts_decrypt,\@function,6 .align 16 aesni_xts_decrypt: - lea -$frame_size(%rsp),%rsp + lea (%rsp),%rax + push %rbp + sub \$$frame_size,%rsp + and \$-16,%rsp # Linux kernel stack can be incorrectly seeded ___ $code.=<<___ if ($win64); - movaps %xmm6,0x60(%rsp) - movaps %xmm7,0x70(%rsp) - movaps %xmm8,0x80(%rsp) - movaps %xmm9,0x90(%rsp) - movaps %xmm10,0xa0(%rsp) - movaps %xmm11,0xb0(%rsp) - movaps %xmm12,0xc0(%rsp) - movaps %xmm13,0xd0(%rsp) - movaps %xmm14,0xe0(%rsp) - movaps %xmm15,0xf0(%rsp) + movaps %xmm6,-0xa8(%rax) + movaps %xmm7,-0x98(%rax) + movaps %xmm8,-0x88(%rax) + movaps %xmm9,-0x78(%rax) + movaps %xmm10,-0x68(%rax) + movaps %xmm11,-0x58(%rax) + movaps %xmm12,-0x48(%rax) + movaps %xmm13,-0x38(%rax) + movaps %xmm14,-0x28(%rax) + movaps %xmm15,-0x18(%rax) .Lxts_dec_body: ___ $code.=<<___; - movups ($ivp),@tweak[5] # load clear-text tweak + lea -8(%rax),%rbp + movups ($ivp),$inout0 # load clear-text tweak mov 240($key2),$rounds # key2->rounds mov 240($key),$rnds_ # key1->rounds ___ # generate the tweak - &aesni_generate1("enc",$key2,$rounds,@tweak[5]); + &aesni_generate1("enc",$key2,$rounds,$inout0); $code.=<<___; xor %eax,%eax # if ($len%16) len-=16; test \$15,$len @@ -1743,213 +2080,249 @@ sub aesni_generate8 { shl \$4,%rax sub %rax,$len + $movkey ($key),$rndkey0 # zero round key mov $key,$key_ # backup $key mov $rnds_,$rounds # backup $rounds + shl \$4,$rnds_ mov $len,$len_ # backup $len and \$-16,$len + $movkey 16($key,$rnds_),$rndkey1 # last round key + movdqa .Lxts_magic(%rip),$twmask - pxor $twtmp,$twtmp - pcmpgtd @tweak[5],$twtmp # broadcast upper bits + movdqa $inout0,@tweak[5] + pshufd \$0x5f,$inout0,$twres + pxor $rndkey0,$rndkey1 ___ for ($i=0;$i<4;$i++) { $code.=<<___; - pshufd \$0x13,$twtmp,$twres - pxor $twtmp,$twtmp + movdqa $twres,$twtmp + paddd $twres,$twres movdqa @tweak[5],@tweak[$i] - paddq @tweak[5],@tweak[5] # psllq 1,$tweak - pand $twmask,$twres # isolate carry and residue - pcmpgtd @tweak[5],$twtmp # broadcat upper bits - pxor $twres,@tweak[5] + psrad \$31,$twtmp # broadcast upper bits + paddq @tweak[5],@tweak[5] + pand $twmask,$twtmp + pxor $rndkey0,@tweak[$i] + pxor $twtmp,@tweak[5] ___ } $code.=<<___; + movdqa @tweak[5],@tweak[4] + psrad \$31,$twres + paddq @tweak[5],@tweak[5] + pand $twmask,$twres + pxor $rndkey0,@tweak[4] + pxor $twres,@tweak[5] + movaps $rndkey1,0x60(%rsp) # save round[0]^round[last] + sub \$16*6,$len jc .Lxts_dec_short - shr \$1,$rounds - sub \$1,$rounds - mov $rounds,$rnds_ + mov \$16+96,$rounds + lea 32($key_,$rnds_),$key # end of key schedule + sub %r10,%rax # twisted $rounds + $movkey 16($key_),$rndkey1 + mov %rax,%r10 # backup twisted $rounds + lea .Lxts_magic(%rip),%r8 jmp .Lxts_dec_grandloop -.align 16 +.align 32 .Lxts_dec_grandloop: - pshufd \$0x13,$twtmp,$twres - movdqa @tweak[5],@tweak[4] - paddq @tweak[5],@tweak[5] # psllq 1,$tweak movdqu `16*0`($inp),$inout0 # load input - pand $twmask,$twres # isolate carry and residue + movdqa $rndkey0,$twmask movdqu `16*1`($inp),$inout1 - pxor $twres,@tweak[5] - + pxor @tweak[0],$inout0 movdqu `16*2`($inp),$inout2 - pxor @tweak[0],$inout0 # input^=tweak - movdqu `16*3`($inp),$inout3 pxor @tweak[1],$inout1 - movdqu `16*4`($inp),$inout4 + aesdec $rndkey1,$inout0 + movdqu `16*3`($inp),$inout3 pxor @tweak[2],$inout2 - movdqu `16*5`($inp),$inout5 - lea `16*6`($inp),$inp + aesdec $rndkey1,$inout1 + movdqu `16*4`($inp),$inout4 pxor @tweak[3],$inout3 - $movkey ($key_),$rndkey0 + aesdec $rndkey1,$inout2 + movdqu `16*5`($inp),$inout5 + pxor @tweak[5],$twmask # round[0]^=tweak[5] + movdqa 0x60(%rsp),$twres # load round[0]^round[last] pxor @tweak[4],$inout4 - pxor @tweak[5],$inout5 + aesdec $rndkey1,$inout3 + $movkey 32($key_),$rndkey0 + lea `16*6`($inp),$inp + pxor $twmask,$inout5 - # inline _aesni_decrypt6 and interleave first and last rounds - # with own code... - $movkey 16($key_),$rndkey1 - pxor $rndkey0,$inout0 - pxor $rndkey0,$inout1 - movdqa @tweak[0],`16*0`(%rsp) # put aside tweaks - aesdec $rndkey1,$inout0 - lea 32($key_),$key - pxor $rndkey0,$inout2 - movdqa @tweak[1],`16*1`(%rsp) - aesdec $rndkey1,$inout1 - pxor $rndkey0,$inout3 - movdqa @tweak[2],`16*2`(%rsp) - aesdec $rndkey1,$inout2 - pxor $rndkey0,$inout4 - movdqa @tweak[3],`16*3`(%rsp) - aesdec $rndkey1,$inout3 - pxor $rndkey0,$inout5 - $movkey ($key),$rndkey0 - dec $rounds - movdqa @tweak[4],`16*4`(%rsp) + pxor $twres,@tweak[0] aesdec $rndkey1,$inout4 - movdqa @tweak[5],`16*5`(%rsp) + pxor $twres,@tweak[1] + movdqa @tweak[0],`16*0`(%rsp) # put aside tweaks^last round key aesdec $rndkey1,$inout5 - pxor $twtmp,$twtmp - pcmpgtd @tweak[5],$twtmp - jmp .Lxts_dec_loop6_enter + $movkey 48($key_),$rndkey1 + pxor $twres,@tweak[2] -.align 16 + aesdec $rndkey0,$inout0 + pxor $twres,@tweak[3] + movdqa @tweak[1],`16*1`(%rsp) + aesdec $rndkey0,$inout1 + pxor $twres,@tweak[4] + movdqa @tweak[2],`16*2`(%rsp) + aesdec $rndkey0,$inout2 + aesdec $rndkey0,$inout3 + pxor $twres,$twmask + movdqa @tweak[4],`16*4`(%rsp) + aesdec $rndkey0,$inout4 + aesdec $rndkey0,$inout5 + $movkey 64($key_),$rndkey0 + movdqa $twmask,`16*5`(%rsp) + pshufd \$0x5f,@tweak[5],$twres + jmp .Lxts_dec_loop6 +.align 32 .Lxts_dec_loop6: aesdec $rndkey1,$inout0 aesdec $rndkey1,$inout1 - dec $rounds aesdec $rndkey1,$inout2 aesdec $rndkey1,$inout3 aesdec $rndkey1,$inout4 aesdec $rndkey1,$inout5 -.Lxts_dec_loop6_enter: - $movkey 16($key),$rndkey1 + $movkey -64($key,%rax),$rndkey1 + add \$32,%rax + aesdec $rndkey0,$inout0 aesdec $rndkey0,$inout1 - lea 32($key),$key aesdec $rndkey0,$inout2 aesdec $rndkey0,$inout3 aesdec $rndkey0,$inout4 aesdec $rndkey0,$inout5 - $movkey ($key),$rndkey0 + $movkey -80($key,%rax),$rndkey0 jnz .Lxts_dec_loop6 - pshufd \$0x13,$twtmp,$twres - pxor $twtmp,$twtmp - paddq @tweak[5],@tweak[5] # psllq 1,$tweak + movdqa (%r8),$twmask + movdqa $twres,$twtmp + paddd $twres,$twres aesdec $rndkey1,$inout0 - pand $twmask,$twres # isolate carry and residue + paddq @tweak[5],@tweak[5] + psrad \$31,$twtmp aesdec $rndkey1,$inout1 - pcmpgtd @tweak[5],$twtmp # broadcast upper bits + pand $twmask,$twtmp + $movkey ($key_),@tweak[0] # load round[0] aesdec $rndkey1,$inout2 - pxor $twres,@tweak[5] aesdec $rndkey1,$inout3 aesdec $rndkey1,$inout4 + pxor $twtmp,@tweak[5] + movaps @tweak[0],@tweak[1] # copy round[0] aesdec $rndkey1,$inout5 - $movkey 16($key),$rndkey1 + $movkey -64($key),$rndkey1 - pshufd \$0x13,$twtmp,$twres - pxor $twtmp,$twtmp - movdqa @tweak[5],@tweak[0] - paddq @tweak[5],@tweak[5] # psllq 1,$tweak + movdqa $twres,$twtmp aesdec $rndkey0,$inout0 - pand $twmask,$twres # isolate carry and residue + paddd $twres,$twres + pxor @tweak[5],@tweak[0] aesdec $rndkey0,$inout1 - pcmpgtd @tweak[5],$twtmp # broadcat upper bits + psrad \$31,$twtmp + paddq @tweak[5],@tweak[5] aesdec $rndkey0,$inout2 - pxor $twres,@tweak[5] aesdec $rndkey0,$inout3 + pand $twmask,$twtmp + movaps @tweak[1],@tweak[2] aesdec $rndkey0,$inout4 + pxor $twtmp,@tweak[5] + movdqa $twres,$twtmp aesdec $rndkey0,$inout5 - $movkey 32($key),$rndkey0 + $movkey -48($key),$rndkey0 - pshufd \$0x13,$twtmp,$twres - pxor $twtmp,$twtmp - movdqa @tweak[5],@tweak[1] - paddq @tweak[5],@tweak[5] # psllq 1,$tweak + paddd $twres,$twres aesdec $rndkey1,$inout0 - pand $twmask,$twres # isolate carry and residue + pxor @tweak[5],@tweak[1] + psrad \$31,$twtmp aesdec $rndkey1,$inout1 - pcmpgtd @tweak[5],$twtmp # broadcat upper bits + paddq @tweak[5],@tweak[5] + pand $twmask,$twtmp aesdec $rndkey1,$inout2 - pxor $twres,@tweak[5] aesdec $rndkey1,$inout3 + movdqa @tweak[3],`16*3`(%rsp) + pxor $twtmp,@tweak[5] aesdec $rndkey1,$inout4 + movaps @tweak[2],@tweak[3] + movdqa $twres,$twtmp aesdec $rndkey1,$inout5 + $movkey -32($key),$rndkey1 - pshufd \$0x13,$twtmp,$twres - pxor $twtmp,$twtmp - movdqa @tweak[5],@tweak[2] - paddq @tweak[5],@tweak[5] # psllq 1,$tweak - aesdeclast $rndkey0,$inout0 - pand $twmask,$twres # isolate carry and residue - aesdeclast $rndkey0,$inout1 - pcmpgtd @tweak[5],$twtmp # broadcat upper bits - aesdeclast $rndkey0,$inout2 - pxor $twres,@tweak[5] - aesdeclast $rndkey0,$inout3 - aesdeclast $rndkey0,$inout4 - aesdeclast $rndkey0,$inout5 - - pshufd \$0x13,$twtmp,$twres - pxor $twtmp,$twtmp - movdqa @tweak[5],@tweak[3] - paddq @tweak[5],@tweak[5] # psllq 1,$tweak - xorps `16*0`(%rsp),$inout0 # output^=tweak - pand $twmask,$twres # isolate carry and residue - xorps `16*1`(%rsp),$inout1 - pcmpgtd @tweak[5],$twtmp # broadcat upper bits + paddd $twres,$twres + aesdec $rndkey0,$inout0 + pxor @tweak[5],@tweak[2] + psrad \$31,$twtmp + aesdec $rndkey0,$inout1 + paddq @tweak[5],@tweak[5] + pand $twmask,$twtmp + aesdec $rndkey0,$inout2 + aesdec $rndkey0,$inout3 + aesdec $rndkey0,$inout4 + pxor $twtmp,@tweak[5] + movaps @tweak[3],@tweak[4] + aesdec $rndkey0,$inout5 + + movdqa $twres,$rndkey0 + paddd $twres,$twres + aesdec $rndkey1,$inout0 + pxor @tweak[5],@tweak[3] + psrad \$31,$rndkey0 + aesdec $rndkey1,$inout1 + paddq @tweak[5],@tweak[5] + pand $twmask,$rndkey0 + aesdec $rndkey1,$inout2 + aesdec $rndkey1,$inout3 + pxor $rndkey0,@tweak[5] + $movkey ($key_),$rndkey0 + aesdec $rndkey1,$inout4 + aesdec $rndkey1,$inout5 + $movkey 16($key_),$rndkey1 + + pxor @tweak[5],@tweak[4] + aesdeclast `16*0`(%rsp),$inout0 + psrad \$31,$twres + paddq @tweak[5],@tweak[5] + aesdeclast `16*1`(%rsp),$inout1 + aesdeclast `16*2`(%rsp),$inout2 + pand $twmask,$twres + mov %r10,%rax # restore $rounds + aesdeclast `16*3`(%rsp),$inout3 + aesdeclast `16*4`(%rsp),$inout4 + aesdeclast `16*5`(%rsp),$inout5 pxor $twres,@tweak[5] - xorps `16*2`(%rsp),$inout2 - movups $inout0,`16*0`($out) # write output - xorps `16*3`(%rsp),$inout3 - movups $inout1,`16*1`($out) - xorps `16*4`(%rsp),$inout4 - movups $inout2,`16*2`($out) - xorps `16*5`(%rsp),$inout5 - movups $inout3,`16*3`($out) - mov $rnds_,$rounds # restore $rounds - movups $inout4,`16*4`($out) - movups $inout5,`16*5`($out) lea `16*6`($out),$out + movups $inout0,`-16*6`($out) # write output + movups $inout1,`-16*5`($out) + movups $inout2,`-16*4`($out) + movups $inout3,`-16*3`($out) + movups $inout4,`-16*2`($out) + movups $inout5,`-16*1`($out) sub \$16*6,$len jnc .Lxts_dec_grandloop - lea 3($rounds,$rounds),$rounds # restore original value + mov \$16+96,$rounds + sub $rnds_,$rounds mov $key_,$key # restore $key - mov $rounds,$rnds_ # backup $rounds + shr \$4,$rounds # restore original value .Lxts_dec_short: + mov $rounds,$rnds_ # backup $rounds + pxor $rndkey0,@tweak[0] + pxor $rndkey0,@tweak[1] add \$16*6,$len jz .Lxts_dec_done + pxor $rndkey0,@tweak[2] cmp \$0x20,$len jb .Lxts_dec_one + pxor $rndkey0,@tweak[3] je .Lxts_dec_two + pxor $rndkey0,@tweak[4] cmp \$0x40,$len jb .Lxts_dec_three je .Lxts_dec_four - pshufd \$0x13,$twtmp,$twres - movdqa @tweak[5],@tweak[4] - paddq @tweak[5],@tweak[5] # psllq 1,$tweak - movdqu ($inp),$inout0 - pand $twmask,$twres # isolate carry and residue - movdqu 16*1($inp),$inout1 - pxor $twres,@tweak[5] - + movdqu ($inp),$inout0 + movdqu 16*1($inp),$inout1 movdqu 16*2($inp),$inout2 pxor @tweak[0],$inout0 movdqu 16*3($inp),$inout3 @@ -2008,7 +2381,7 @@ sub aesni_generate8 { xorps @tweak[0],$inout0 xorps @tweak[1],$inout1 - call _aesni_decrypt3 + call _aesni_decrypt2 xorps @tweak[0],$inout0 movdqa @tweak[2],@tweak[0] @@ -2034,7 +2407,7 @@ sub aesni_generate8 { xorps @tweak[0],$inout0 movdqa @tweak[3],@tweak[0] xorps @tweak[1],$inout1 - movdqa @tweak[5],@tweak[1] + movdqa @tweak[4],@tweak[1] xorps @tweak[2],$inout2 movups $inout0,($out) movups $inout1,16*1($out) @@ -2044,14 +2417,8 @@ sub aesni_generate8 { .align 16 .Lxts_dec_four: - pshufd \$0x13,$twtmp,$twres - movdqa @tweak[5],@tweak[4] - paddq @tweak[5],@tweak[5] # psllq 1,$tweak - movups ($inp),$inout0 - pand $twmask,$twres # isolate carry and residue - movups 16*1($inp),$inout1 - pxor $twres,@tweak[5] - + movups ($inp),$inout0 + movups 16*1($inp),$inout1 movups 16*2($inp),$inout2 xorps @tweak[0],$inout0 movups 16*3($inp),$inout3 @@ -2062,16 +2429,16 @@ sub aesni_generate8 { call _aesni_decrypt4 - xorps @tweak[0],$inout0 + pxor @tweak[0],$inout0 movdqa @tweak[4],@tweak[0] - xorps @tweak[1],$inout1 + pxor @tweak[1],$inout1 movdqa @tweak[5],@tweak[1] - xorps @tweak[2],$inout2 - movups $inout0,($out) - xorps @tweak[3],$inout3 - movups $inout1,16*1($out) - movups $inout2,16*2($out) - movups $inout3,16*3($out) + pxor @tweak[2],$inout2 + movdqu $inout0,($out) + pxor @tweak[3],$inout3 + movdqu $inout1,16*1($out) + movdqu $inout2,16*2($out) + movdqu $inout3,16*3($out) lea 16*4($out),$out jmp .Lxts_dec_done @@ -2117,19 +2484,20 @@ sub aesni_generate8 { .Lxts_dec_ret: ___ $code.=<<___ if ($win64); - movaps 0x60(%rsp),%xmm6 - movaps 0x70(%rsp),%xmm7 - movaps 0x80(%rsp),%xmm8 - movaps 0x90(%rsp),%xmm9 - movaps 0xa0(%rsp),%xmm10 - movaps 0xb0(%rsp),%xmm11 - movaps 0xc0(%rsp),%xmm12 - movaps 0xd0(%rsp),%xmm13 - movaps 0xe0(%rsp),%xmm14 - movaps 0xf0(%rsp),%xmm15 + movaps -0xa0(%rbp),%xmm6 + movaps -0x90(%rbp),%xmm7 + movaps -0x80(%rbp),%xmm8 + movaps -0x70(%rbp),%xmm9 + movaps -0x60(%rbp),%xmm10 + movaps -0x50(%rbp),%xmm11 + movaps -0x40(%rbp),%xmm12 + movaps -0x30(%rbp),%xmm13 + movaps -0x20(%rbp),%xmm14 + movaps -0x10(%rbp),%xmm15 ___ $code.=<<___; - lea $frame_size(%rsp),%rsp + lea (%rbp),%rsp + pop %rbp .Lxts_dec_epilogue: ret .size aesni_xts_decrypt,.-aesni_xts_decrypt @@ -2141,7 +2509,10 @@ sub aesni_generate8 { # size_t length, const AES_KEY *key, # unsigned char *ivp,const int enc); { -my $reserved = $win64?0x40:-0x18; # used in decrypt +my $frame_size = 0x10 + ($win64?0xa0:0); # used in decrypt +my ($iv,$in0,$in1,$in2,$in3,$in4)=map("%xmm$_",(10..15)); +my $inp_=$key_; + $code.=<<___; .globl ${PREFIX}_cbc_encrypt .type ${PREFIX}_cbc_encrypt,\@function,6 @@ -2197,276 +2568,398 @@ sub aesni_generate8 { #--------------------------- CBC DECRYPT ------------------------------# .align 16 .Lcbc_decrypt: + lea (%rsp),%rax + push %rbp + sub \$$frame_size,%rsp + and \$-16,%rsp # Linux kernel stack can be incorrectly seeded ___ $code.=<<___ if ($win64); - lea -0x58(%rsp),%rsp - movaps %xmm6,(%rsp) - movaps %xmm7,0x10(%rsp) - movaps %xmm8,0x20(%rsp) - movaps %xmm9,0x30(%rsp) + movaps %xmm6,0x10(%rsp) + movaps %xmm7,0x20(%rsp) + movaps %xmm8,0x30(%rsp) + movaps %xmm9,0x40(%rsp) + movaps %xmm10,0x50(%rsp) + movaps %xmm11,0x60(%rsp) + movaps %xmm12,0x70(%rsp) + movaps %xmm13,0x80(%rsp) + movaps %xmm14,0x90(%rsp) + movaps %xmm15,0xa0(%rsp) .Lcbc_decrypt_body: ___ $code.=<<___; + lea -8(%rax),%rbp movups ($ivp),$iv mov $rnds_,$rounds - cmp \$0x70,$len + cmp \$0x50,$len jbe .Lcbc_dec_tail - shr \$1,$rnds_ - sub \$0x70,$len - mov $rnds_,$rounds - movaps $iv,$reserved(%rsp) + + $movkey ($key),$rndkey0 + movdqu 0x00($inp),$inout0 # load input + movdqu 0x10($inp),$inout1 + movdqa $inout0,$in0 + movdqu 0x20($inp),$inout2 + movdqa $inout1,$in1 + movdqu 0x30($inp),$inout3 + movdqa $inout2,$in2 + movdqu 0x40($inp),$inout4 + movdqa $inout3,$in3 + movdqu 0x50($inp),$inout5 + movdqa $inout4,$in4 + mov OPENSSL_ia32cap_P+4(%rip),%r9d + cmp \$0x70,$len + jbe .Lcbc_dec_six_or_seven + + and \$`1<<26|1<<22`,%r9d # isolate XSAVE+MOVBE + sub \$0x50,$len + cmp \$`1<<22`,%r9d # check for MOVBE without XSAVE + je .Lcbc_dec_loop6_enter + sub \$0x20,$len + lea 0x70($key),$key # size optimization jmp .Lcbc_dec_loop8_enter .align 16 .Lcbc_dec_loop8: - movaps $rndkey0,$reserved(%rsp) # save IV movups $inout7,($out) lea 0x10($out),$out .Lcbc_dec_loop8_enter: - $movkey ($key),$rndkey0 - movups ($inp),$inout0 # load input - movups 0x10($inp),$inout1 - $movkey 16($key),$rndkey1 + movdqu 0x60($inp),$inout6 + pxor $rndkey0,$inout0 + movdqu 0x70($inp),$inout7 + pxor $rndkey0,$inout1 + $movkey 0x10-0x70($key),$rndkey1 + pxor $rndkey0,$inout2 + xor $inp_,$inp_ + cmp \$0x70,$len # is there at least 0x60 bytes ahead? + pxor $rndkey0,$inout3 + pxor $rndkey0,$inout4 + pxor $rndkey0,$inout5 + pxor $rndkey0,$inout6 - lea 32($key),$key - movdqu 0x20($inp),$inout2 - xorps $rndkey0,$inout0 - movdqu 0x30($inp),$inout3 - xorps $rndkey0,$inout1 - movdqu 0x40($inp),$inout4 aesdec $rndkey1,$inout0 - pxor $rndkey0,$inout2 - movdqu 0x50($inp),$inout5 + pxor $rndkey0,$inout7 + $movkey 0x20-0x70($key),$rndkey0 aesdec $rndkey1,$inout1 - pxor $rndkey0,$inout3 - movdqu 0x60($inp),$inout6 aesdec $rndkey1,$inout2 - pxor $rndkey0,$inout4 - movdqu 0x70($inp),$inout7 aesdec $rndkey1,$inout3 - pxor $rndkey0,$inout5 - dec $rounds aesdec $rndkey1,$inout4 - pxor $rndkey0,$inout6 aesdec $rndkey1,$inout5 - pxor $rndkey0,$inout7 - $movkey ($key),$rndkey0 aesdec $rndkey1,$inout6 + setnc ${inp_}b + shl \$7,$inp_ aesdec $rndkey1,$inout7 - $movkey 16($key),$rndkey1 - - call .Ldec_loop8_enter + add $inp,$inp_ + $movkey 0x30-0x70($key),$rndkey1 +___ +for($i=1;$i<12;$i++) { +my $rndkeyx = ($i&1)?$rndkey0:$rndkey1; +$code.=<<___ if ($i==7); + cmp \$11,$rounds +___ +$code.=<<___; + aesdec $rndkeyx,$inout0 + aesdec $rndkeyx,$inout1 + aesdec $rndkeyx,$inout2 + aesdec $rndkeyx,$inout3 + aesdec $rndkeyx,$inout4 + aesdec $rndkeyx,$inout5 + aesdec $rndkeyx,$inout6 + aesdec $rndkeyx,$inout7 + $movkey `0x30+0x10*$i`-0x70($key),$rndkeyx +___ +$code.=<<___ if ($i<6 || (!($i&1) && $i>7)); + nop +___ +$code.=<<___ if ($i==7); + jb .Lcbc_dec_done +___ +$code.=<<___ if ($i==9); + je .Lcbc_dec_done +___ +$code.=<<___ if ($i==11); + jmp .Lcbc_dec_done +___ +} +$code.=<<___; +.align 16 +.Lcbc_dec_done: + aesdec $rndkey1,$inout0 + aesdec $rndkey1,$inout1 + pxor $rndkey0,$iv + pxor $rndkey0,$in0 + aesdec $rndkey1,$inout2 + aesdec $rndkey1,$inout3 + pxor $rndkey0,$in1 + pxor $rndkey0,$in2 + aesdec $rndkey1,$inout4 + aesdec $rndkey1,$inout5 + pxor $rndkey0,$in3 + pxor $rndkey0,$in4 + aesdec $rndkey1,$inout6 + aesdec $rndkey1,$inout7 + movdqu 0x50($inp),$rndkey1 + + aesdeclast $iv,$inout0 + movdqu 0x60($inp),$iv # borrow $iv + pxor $rndkey0,$rndkey1 + aesdeclast $in0,$inout1 + pxor $rndkey0,$iv + movdqu 0x70($inp),$rndkey0 # next IV + aesdeclast $in1,$inout2 + lea 0x80($inp),$inp + movdqu 0x00($inp_),$in0 + aesdeclast $in2,$inout3 + aesdeclast $in3,$inout4 + movdqu 0x10($inp_),$in1 + movdqu 0x20($inp_),$in2 + aesdeclast $in4,$inout5 + aesdeclast $rndkey1,$inout6 + movdqu 0x30($inp_),$in3 + movdqu 0x40($inp_),$in4 + aesdeclast $iv,$inout7 + movdqa $rndkey0,$iv # return $iv + movdqu 0x50($inp_),$rndkey1 + $movkey -0x70($key),$rndkey0 + + movups $inout0,($out) # store output + movdqa $in0,$inout0 + movups $inout1,0x10($out) + movdqa $in1,$inout1 + movups $inout2,0x20($out) + movdqa $in2,$inout2 + movups $inout3,0x30($out) + movdqa $in3,$inout3 + movups $inout4,0x40($out) + movdqa $in4,$inout4 + movups $inout5,0x50($out) + movdqa $rndkey1,$inout5 + movups $inout6,0x60($out) + lea 0x70($out),$out - movups ($inp),$rndkey1 # re-load input - movups 0x10($inp),$rndkey0 - xorps $reserved(%rsp),$inout0 # ^= IV - xorps $rndkey1,$inout1 - movups 0x20($inp),$rndkey1 - xorps $rndkey0,$inout2 - movups 0x30($inp),$rndkey0 - xorps $rndkey1,$inout3 - movups 0x40($inp),$rndkey1 - xorps $rndkey0,$inout4 - movups 0x50($inp),$rndkey0 - xorps $rndkey1,$inout5 - movups 0x60($inp),$rndkey1 - xorps $rndkey0,$inout6 - movups 0x70($inp),$rndkey0 # IV - xorps $rndkey1,$inout7 - movups $inout0,($out) - movups $inout1,0x10($out) - movups $inout2,0x20($out) - movups $inout3,0x30($out) - mov $rnds_,$rounds # restore $rounds - movups $inout4,0x40($out) - mov $key_,$key # restore $key - movups $inout5,0x50($out) - lea 0x80($inp),$inp - movups $inout6,0x60($out) - lea 0x70($out),$out sub \$0x80,$len ja .Lcbc_dec_loop8 movaps $inout7,$inout0 - movaps $rndkey0,$iv + lea -0x70($key),$key add \$0x70,$len jle .Lcbc_dec_tail_collected - movups $inout0,($out) - lea 1($rnds_,$rnds_),$rounds + movups $inout7,($out) lea 0x10($out),$out + cmp \$0x50,$len + jbe .Lcbc_dec_tail + + movaps $in0,$inout0 +.Lcbc_dec_six_or_seven: + cmp \$0x60,$len + ja .Lcbc_dec_seven + + movaps $inout5,$inout6 + call _aesni_decrypt6 + pxor $iv,$inout0 # ^= IV + movaps $inout6,$iv + pxor $in0,$inout1 + movdqu $inout0,($out) + pxor $in1,$inout2 + movdqu $inout1,0x10($out) + pxor $in2,$inout3 + movdqu $inout2,0x20($out) + pxor $in3,$inout4 + movdqu $inout3,0x30($out) + pxor $in4,$inout5 + movdqu $inout4,0x40($out) + lea 0x50($out),$out + movdqa $inout5,$inout0 + jmp .Lcbc_dec_tail_collected + +.align 16 +.Lcbc_dec_seven: + movups 0x60($inp),$inout6 + xorps $inout7,$inout7 + call _aesni_decrypt8 + movups 0x50($inp),$inout7 + pxor $iv,$inout0 # ^= IV + movups 0x60($inp),$iv + pxor $in0,$inout1 + movdqu $inout0,($out) + pxor $in1,$inout2 + movdqu $inout1,0x10($out) + pxor $in2,$inout3 + movdqu $inout2,0x20($out) + pxor $in3,$inout4 + movdqu $inout3,0x30($out) + pxor $in4,$inout5 + movdqu $inout4,0x40($out) + pxor $inout7,$inout6 + movdqu $inout5,0x50($out) + lea 0x60($out),$out + movdqa $inout6,$inout0 + jmp .Lcbc_dec_tail_collected + +.align 16 +.Lcbc_dec_loop6: + movups $inout5,($out) + lea 0x10($out),$out + movdqu 0x00($inp),$inout0 # load input + movdqu 0x10($inp),$inout1 + movdqa $inout0,$in0 + movdqu 0x20($inp),$inout2 + movdqa $inout1,$in1 + movdqu 0x30($inp),$inout3 + movdqa $inout2,$in2 + movdqu 0x40($inp),$inout4 + movdqa $inout3,$in3 + movdqu 0x50($inp),$inout5 + movdqa $inout4,$in4 +.Lcbc_dec_loop6_enter: + lea 0x60($inp),$inp + movdqa $inout5,$inout6 + + call _aesni_decrypt6 + + pxor $iv,$inout0 # ^= IV + movdqa $inout6,$iv + pxor $in0,$inout1 + movdqu $inout0,($out) + pxor $in1,$inout2 + movdqu $inout1,0x10($out) + pxor $in2,$inout3 + movdqu $inout2,0x20($out) + pxor $in3,$inout4 + mov $key_,$key + movdqu $inout3,0x30($out) + pxor $in4,$inout5 + mov $rnds_,$rounds + movdqu $inout4,0x40($out) + lea 0x50($out),$out + sub \$0x60,$len + ja .Lcbc_dec_loop6 + + movdqa $inout5,$inout0 + add \$0x50,$len + jle .Lcbc_dec_tail_collected + movups $inout5,($out) + lea 0x10($out),$out + .Lcbc_dec_tail: movups ($inp),$inout0 - movaps $inout0,$in0 - cmp \$0x10,$len + sub \$0x10,$len jbe .Lcbc_dec_one movups 0x10($inp),$inout1 - movaps $inout1,$in1 - cmp \$0x20,$len + movaps $inout0,$in0 + sub \$0x10,$len jbe .Lcbc_dec_two movups 0x20($inp),$inout2 - movaps $inout2,$in2 - cmp \$0x30,$len + movaps $inout1,$in1 + sub \$0x10,$len jbe .Lcbc_dec_three movups 0x30($inp),$inout3 - cmp \$0x40,$len + movaps $inout2,$in2 + sub \$0x10,$len jbe .Lcbc_dec_four movups 0x40($inp),$inout4 - cmp \$0x50,$len - jbe .Lcbc_dec_five - - movups 0x50($inp),$inout5 - cmp \$0x60,$len - jbe .Lcbc_dec_six - - movups 0x60($inp),$inout6 - movaps $iv,$reserved(%rsp) # save IV - call _aesni_decrypt8 - movups ($inp),$rndkey1 - movups 0x10($inp),$rndkey0 - xorps $reserved(%rsp),$inout0 # ^= IV - xorps $rndkey1,$inout1 - movups 0x20($inp),$rndkey1 - xorps $rndkey0,$inout2 - movups 0x30($inp),$rndkey0 - xorps $rndkey1,$inout3 - movups 0x40($inp),$rndkey1 - xorps $rndkey0,$inout4 - movups 0x50($inp),$rndkey0 - xorps $rndkey1,$inout5 - movups 0x60($inp),$iv # IV - xorps $rndkey0,$inout6 - movups $inout0,($out) - movups $inout1,0x10($out) - movups $inout2,0x20($out) - movups $inout3,0x30($out) - movups $inout4,0x40($out) - movups $inout5,0x50($out) - lea 0x60($out),$out - movaps $inout6,$inout0 - sub \$0x70,$len + movaps $inout3,$in3 + movaps $inout4,$in4 + xorps $inout5,$inout5 + call _aesni_decrypt6 + pxor $iv,$inout0 + movaps $in4,$iv + pxor $in0,$inout1 + movdqu $inout0,($out) + pxor $in1,$inout2 + movdqu $inout1,0x10($out) + pxor $in2,$inout3 + movdqu $inout2,0x20($out) + pxor $in3,$inout4 + movdqu $inout3,0x30($out) + lea 0x40($out),$out + movdqa $inout4,$inout0 + sub \$0x10,$len jmp .Lcbc_dec_tail_collected + .align 16 .Lcbc_dec_one: + movaps $inout0,$in0 ___ &aesni_generate1("dec",$key,$rounds); $code.=<<___; xorps $iv,$inout0 movaps $in0,$iv - sub \$0x10,$len jmp .Lcbc_dec_tail_collected .align 16 .Lcbc_dec_two: - xorps $inout2,$inout2 - call _aesni_decrypt3 - xorps $iv,$inout0 - xorps $in0,$inout1 - movups $inout0,($out) + movaps $inout1,$in1 + call _aesni_decrypt2 + pxor $iv,$inout0 movaps $in1,$iv - movaps $inout1,$inout0 + pxor $in0,$inout1 + movdqu $inout0,($out) + movdqa $inout1,$inout0 lea 0x10($out),$out - sub \$0x20,$len jmp .Lcbc_dec_tail_collected .align 16 .Lcbc_dec_three: + movaps $inout2,$in2 call _aesni_decrypt3 - xorps $iv,$inout0 - xorps $in0,$inout1 - movups $inout0,($out) - xorps $in1,$inout2 - movups $inout1,0x10($out) + pxor $iv,$inout0 movaps $in2,$iv - movaps $inout2,$inout0 + pxor $in0,$inout1 + movdqu $inout0,($out) + pxor $in1,$inout2 + movdqu $inout1,0x10($out) + movdqa $inout2,$inout0 lea 0x20($out),$out - sub \$0x30,$len jmp .Lcbc_dec_tail_collected .align 16 .Lcbc_dec_four: + movaps $inout3,$in3 call _aesni_decrypt4 - xorps $iv,$inout0 - movups 0x30($inp),$iv - xorps $in0,$inout1 - movups $inout0,($out) - xorps $in1,$inout2 - movups $inout1,0x10($out) - xorps $in2,$inout3 - movups $inout2,0x20($out) - movaps $inout3,$inout0 + pxor $iv,$inout0 + movaps $in3,$iv + pxor $in0,$inout1 + movdqu $inout0,($out) + pxor $in1,$inout2 + movdqu $inout1,0x10($out) + pxor $in2,$inout3 + movdqu $inout2,0x20($out) + movdqa $inout3,$inout0 lea 0x30($out),$out - sub \$0x40,$len - jmp .Lcbc_dec_tail_collected -.align 16 -.Lcbc_dec_five: - xorps $inout5,$inout5 - call _aesni_decrypt6 - movups 0x10($inp),$rndkey1 - movups 0x20($inp),$rndkey0 - xorps $iv,$inout0 - xorps $in0,$inout1 - xorps $rndkey1,$inout2 - movups 0x30($inp),$rndkey1 - xorps $rndkey0,$inout3 - movups 0x40($inp),$iv - xorps $rndkey1,$inout4 - movups $inout0,($out) - movups $inout1,0x10($out) - movups $inout2,0x20($out) - movups $inout3,0x30($out) - lea 0x40($out),$out - movaps $inout4,$inout0 - sub \$0x50,$len - jmp .Lcbc_dec_tail_collected -.align 16 -.Lcbc_dec_six: - call _aesni_decrypt6 - movups 0x10($inp),$rndkey1 - movups 0x20($inp),$rndkey0 - xorps $iv,$inout0 - xorps $in0,$inout1 - xorps $rndkey1,$inout2 - movups 0x30($inp),$rndkey1 - xorps $rndkey0,$inout3 - movups 0x40($inp),$rndkey0 - xorps $rndkey1,$inout4 - movups 0x50($inp),$iv - xorps $rndkey0,$inout5 - movups $inout0,($out) - movups $inout1,0x10($out) - movups $inout2,0x20($out) - movups $inout3,0x30($out) - movups $inout4,0x40($out) - lea 0x50($out),$out - movaps $inout5,$inout0 - sub \$0x60,$len jmp .Lcbc_dec_tail_collected + .align 16 .Lcbc_dec_tail_collected: - and \$15,$len movups $iv,($ivp) + and \$15,$len jnz .Lcbc_dec_tail_partial movups $inout0,($out) jmp .Lcbc_dec_ret .align 16 .Lcbc_dec_tail_partial: - movaps $inout0,$reserved(%rsp) + movaps $inout0,(%rsp) mov \$16,%rcx mov $out,%rdi sub $len,%rcx - lea $reserved(%rsp),%rsi + lea (%rsp),%rsi .long 0x9066A4F3 # rep movsb .Lcbc_dec_ret: ___ $code.=<<___ if ($win64); - movaps (%rsp),%xmm6 - movaps 0x10(%rsp),%xmm7 - movaps 0x20(%rsp),%xmm8 - movaps 0x30(%rsp),%xmm9 - lea 0x58(%rsp),%rsp + movaps 0x10(%rsp),%xmm6 + movaps 0x20(%rsp),%xmm7 + movaps 0x30(%rsp),%xmm8 + movaps 0x40(%rsp),%xmm9 + movaps 0x50(%rsp),%xmm10 + movaps 0x60(%rsp),%xmm11 + movaps 0x70(%rsp),%xmm12 + movaps 0x80(%rsp),%xmm13 + movaps 0x90(%rsp),%xmm14 + movaps 0xa0(%rsp),%xmm15 ___ $code.=<<___; + lea (%rbp),%rsp + pop %rbp .Lcbc_ret: ret .size ${PREFIX}_cbc_encrypt,.-${PREFIX}_cbc_encrypt @@ -2733,6 +3226,8 @@ sub aesni_generate8 { .long 1,0,0,0 .Lxts_magic: .long 0x87,0,1,0 +.Lincrement1: + .byte 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1 .asciz "AES for Intel AES-NI, CRYPTOGAMS by " .align 64 @@ -2791,45 +3286,9 @@ sub aesni_generate8 { jmp .Lcommon_seh_tail .size ecb_ccm64_se_handler,.-ecb_ccm64_se_handler -.type ctr32_se_handler,\@abi-omnipotent +.type ctr_xts_se_handler,\@abi-omnipotent .align 16 -ctr32_se_handler: - push %rsi - push %rdi - push %rbx - push %rbp - push %r12 - push %r13 - push %r14 - push %r15 - pushfq - sub \$64,%rsp - - mov 120($context),%rax # pull context->Rax - mov 248($context),%rbx # pull context->Rip - - lea .Lctr32_body(%rip),%r10 - cmp %r10,%rbx # context->Rip<"prologue" label - jb .Lcommon_seh_tail - - mov 152($context),%rax # pull context->Rsp - - lea .Lctr32_ret(%rip),%r10 - cmp %r10,%rbx - jae .Lcommon_seh_tail - - lea 0x20(%rax),%rsi # %xmm save area - lea 512($context),%rdi # &context.Xmm6 - mov \$20,%ecx # 10*sizeof(%xmm0)/sizeof(%rax) - .long 0xa548f3fc # cld; rep movsq - lea 0xc8(%rax),%rax # adjust stack pointer - - jmp .Lcommon_seh_tail -.size ctr32_se_handler,.-ctr32_se_handler - -.type xts_se_handler,\@abi-omnipotent -.align 16 -xts_se_handler: +ctr_xts_se_handler: push %rsi push %rdi push %rbx @@ -2859,14 +3318,14 @@ sub aesni_generate8 { cmp %r10,%rbx # context->Rip>=epilogue label jae .Lcommon_seh_tail - lea 0x60(%rax),%rsi # %xmm save area + mov 160($context),%rax # pull context->Rbp + lea -0xa0(%rax),%rsi # %xmm save area lea 512($context),%rdi # & context.Xmm6 mov \$20,%ecx # 10*sizeof(%xmm0)/sizeof(%rax) .long 0xa548f3fc # cld; rep movsq - lea 0x68+160(%rax),%rax # adjust stack pointer - jmp .Lcommon_seh_tail -.size xts_se_handler,.-xts_se_handler + jmp .Lcommon_rbp_tail +.size ctr_xts_se_handler,.-ctr_xts_se_handler ___ $code.=<<___; .type cbc_se_handler,\@abi-omnipotent @@ -2898,11 +3357,16 @@ sub aesni_generate8 { cmp %r10,%rbx # context->Rip>="epilogue" label jae .Lcommon_seh_tail - lea 0(%rax),%rsi # top of stack + lea 16(%rax),%rsi # %xmm save area lea 512($context),%rdi # &context.Xmm6 - mov \$8,%ecx # 4*sizeof(%xmm0)/sizeof(%rax) + mov \$20,%ecx # 10*sizeof(%xmm0)/sizeof(%rax) .long 0xa548f3fc # cld; rep movsq - lea 0x58(%rax),%rax # adjust stack pointer + +.Lcommon_rbp_tail: + mov 160($context),%rax # pull context->Rbp + mov (%rax),%rbp # restore saved %rbp + lea 8(%rax),%rax # adjust stack pointer + mov %rbp,160($context) # restore context->Rbp jmp .Lcommon_seh_tail .Lrestore_cbc_rax: @@ -3006,14 +3470,15 @@ sub aesni_generate8 { .rva .Lccm64_dec_body,.Lccm64_dec_ret # HandlerData[] .LSEH_info_ctr32: .byte 9,0,0,0 - .rva ctr32_se_handler + .rva ctr_xts_se_handler + .rva .Lctr32_body,.Lctr32_epilogue # HandlerData[] .LSEH_info_xts_enc: .byte 9,0,0,0 - .rva xts_se_handler + .rva ctr_xts_se_handler .rva .Lxts_enc_body,.Lxts_enc_epilogue # HandlerData[] .LSEH_info_xts_dec: .byte 9,0,0,0 - .rva xts_se_handler + .rva ctr_xts_se_handler .rva .Lxts_dec_body,.Lxts_dec_epilogue # HandlerData[] ___ $code.=<<___; @@ -3060,11 +3525,30 @@ sub aesni { push @opcode,0xc0|($2&7)|(($3&7)<<3); # ModR/M return ".byte\t".join(',',@opcode); } + elsif ($line=~/(aes[a-z]+)\s+([0x1-9a-fA-F]*)\(%rsp\),\s*%xmm([0-9]+)/) { + my %opcodelet = ( + "aesenc" => 0xdc, "aesenclast" => 0xdd, + "aesdec" => 0xde, "aesdeclast" => 0xdf + ); + return undef if (!defined($opcodelet{$1})); + my $off = $2; + push @opcode,0x44 if ($3>=8); + push @opcode,0x0f,0x38,$opcodelet{$1}; + push @opcode,0x44|(($3&7)<<3),0x24; # ModR/M + push @opcode,($off=~/^0/?oct($off):$off)&0xff; + return ".byte\t".join(',',@opcode); + } return $line; } +sub movbe { + ".byte 0x0f,0x38,0xf1,0x44,0x24,".shift; +} + $code =~ s/\`([^\`]*)\`/eval($1)/gem; $code =~ s/\b(aes.*%xmm[0-9]+).*$/aesni($1)/gem; +#$code =~ s/\bmovbe\s+%eax/bswap %eax; mov %eax/gm; # debugging artefact +$code =~ s/\bmovbe\s+%eax,\s*([0-9]+)\(%rsp\)/movbe($1)/gem; print $code; diff --git a/deps/openssl/openssl/crypto/aes/asm/aesp8-ppc.pl b/deps/openssl/openssl/crypto/aes/asm/aesp8-ppc.pl new file mode 100755 index 00000000000000..a1891cc03caa6b --- /dev/null +++ b/deps/openssl/openssl/crypto/aes/asm/aesp8-ppc.pl @@ -0,0 +1,1942 @@ +#!/usr/bin/env perl +# +# ==================================================================== +# Written by Andy Polyakov for the OpenSSL +# project. The module is, however, dual licensed under OpenSSL and +# CRYPTOGAMS licenses depending on where you obtain it. For further +# details see http://www.openssl.org/~appro/cryptogams/. +# ==================================================================== +# +# This module implements support for AES instructions as per PowerISA +# specification version 2.07, first implemented by POWER8 processor. +# The module is endian-agnostic in sense that it supports both big- +# and little-endian cases. Data alignment in parallelizable modes is +# handled with VSX loads and stores, which implies MSR.VSX flag being +# set. It should also be noted that ISA specification doesn't prohibit +# alignment exceptions for these instructions on page boundaries. +# Initially alignment was handled in pure AltiVec/VMX way [when data +# is aligned programmatically, which in turn guarantees exception- +# free execution], but it turned to hamper performance when vcipher +# instructions are interleaved. It's reckoned that eventual +# misalignment penalties at page boundaries are in average lower +# than additional overhead in pure AltiVec approach. + +$flavour = shift; + +if ($flavour =~ /64/) { + $SIZE_T =8; + $LRSAVE =2*$SIZE_T; + $STU ="stdu"; + $POP ="ld"; + $PUSH ="std"; + $UCMP ="cmpld"; + $SHL ="sldi"; +} elsif ($flavour =~ /32/) { + $SIZE_T =4; + $LRSAVE =$SIZE_T; + $STU ="stwu"; + $POP ="lwz"; + $PUSH ="stw"; + $UCMP ="cmplw"; + $SHL ="slwi"; +} else { die "nonsense $flavour"; } + +$LITTLE_ENDIAN = ($flavour=~/le$/) ? $SIZE_T : 0; + +$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; +( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or +( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or +die "can't locate ppc-xlate.pl"; + +open STDOUT,"| $^X $xlate $flavour ".shift || die "can't call $xlate: $!"; + +$FRAME=8*$SIZE_T; +$prefix="aes_p8"; + +$sp="r1"; +$vrsave="r12"; + +######################################################################### +{{{ # Key setup procedures # +my ($inp,$bits,$out,$ptr,$cnt,$rounds)=map("r$_",(3..8)); +my ($zero,$in0,$in1,$key,$rcon,$mask,$tmp)=map("v$_",(0..6)); +my ($stage,$outperm,$outmask,$outhead,$outtail)=map("v$_",(7..11)); + +$code.=<<___; +.machine "any" + +.text + +.align 7 +rcon: +.long 0x01000000, 0x01000000, 0x01000000, 0x01000000 ?rev +.long 0x1b000000, 0x1b000000, 0x1b000000, 0x1b000000 ?rev +.long 0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c ?rev +.long 0,0,0,0 ?asis +Lconsts: + mflr r0 + bcl 20,31,\$+4 + mflr $ptr #vvvvv "distance between . and rcon + addi $ptr,$ptr,-0x48 + mtlr r0 + blr + .long 0 + .byte 0,12,0x14,0,0,0,0,0 +.asciz "AES for PowerISA 2.07, CRYPTOGAMS by " + +.globl .${prefix}_set_encrypt_key +.align 5 +.${prefix}_set_encrypt_key: +Lset_encrypt_key: + mflr r11 + $PUSH r11,$LRSAVE($sp) + + li $ptr,-1 + ${UCMP}i $inp,0 + beq- Lenc_key_abort # if ($inp==0) return -1; + ${UCMP}i $out,0 + beq- Lenc_key_abort # if ($out==0) return -1; + li $ptr,-2 + cmpwi $bits,128 + blt- Lenc_key_abort + cmpwi $bits,256 + bgt- Lenc_key_abort + andi. r0,$bits,0x3f + bne- Lenc_key_abort + + lis r0,0xfff0 + mfspr $vrsave,256 + mtspr 256,r0 + + bl Lconsts + mtlr r11 + + neg r9,$inp + lvx $in0,0,$inp + addi $inp,$inp,15 # 15 is not typo + lvsr $key,0,r9 # borrow $key + li r8,0x20 + cmpwi $bits,192 + lvx $in1,0,$inp + le?vspltisb $mask,0x0f # borrow $mask + lvx $rcon,0,$ptr + le?vxor $key,$key,$mask # adjust for byte swap + lvx $mask,r8,$ptr + addi $ptr,$ptr,0x10 + vperm $in0,$in0,$in1,$key # align [and byte swap in LE] + li $cnt,8 + vxor $zero,$zero,$zero + mtctr $cnt + + ?lvsr $outperm,0,$out + vspltisb $outmask,-1 + lvx $outhead,0,$out + ?vperm $outmask,$zero,$outmask,$outperm + + blt Loop128 + addi $inp,$inp,8 + beq L192 + addi $inp,$inp,8 + b L256 + +.align 4 +Loop128: + vperm $key,$in0,$in0,$mask # rotate-n-splat + vsldoi $tmp,$zero,$in0,12 # >>32 + vperm $outtail,$in0,$in0,$outperm # rotate + vsel $stage,$outhead,$outtail,$outmask + vmr $outhead,$outtail + vcipherlast $key,$key,$rcon + stvx $stage,0,$out + addi $out,$out,16 + + vxor $in0,$in0,$tmp + vsldoi $tmp,$zero,$tmp,12 # >>32 + vxor $in0,$in0,$tmp + vsldoi $tmp,$zero,$tmp,12 # >>32 + vxor $in0,$in0,$tmp + vadduwm $rcon,$rcon,$rcon + vxor $in0,$in0,$key + bdnz Loop128 + + lvx $rcon,0,$ptr # last two round keys + + vperm $key,$in0,$in0,$mask # rotate-n-splat + vsldoi $tmp,$zero,$in0,12 # >>32 + vperm $outtail,$in0,$in0,$outperm # rotate + vsel $stage,$outhead,$outtail,$outmask + vmr $outhead,$outtail + vcipherlast $key,$key,$rcon + stvx $stage,0,$out + addi $out,$out,16 + + vxor $in0,$in0,$tmp + vsldoi $tmp,$zero,$tmp,12 # >>32 + vxor $in0,$in0,$tmp + vsldoi $tmp,$zero,$tmp,12 # >>32 + vxor $in0,$in0,$tmp + vadduwm $rcon,$rcon,$rcon + vxor $in0,$in0,$key + + vperm $key,$in0,$in0,$mask # rotate-n-splat + vsldoi $tmp,$zero,$in0,12 # >>32 + vperm $outtail,$in0,$in0,$outperm # rotate + vsel $stage,$outhead,$outtail,$outmask + vmr $outhead,$outtail + vcipherlast $key,$key,$rcon + stvx $stage,0,$out + addi $out,$out,16 + + vxor $in0,$in0,$tmp + vsldoi $tmp,$zero,$tmp,12 # >>32 + vxor $in0,$in0,$tmp + vsldoi $tmp,$zero,$tmp,12 # >>32 + vxor $in0,$in0,$tmp + vxor $in0,$in0,$key + vperm $outtail,$in0,$in0,$outperm # rotate + vsel $stage,$outhead,$outtail,$outmask + vmr $outhead,$outtail + stvx $stage,0,$out + + addi $inp,$out,15 # 15 is not typo + addi $out,$out,0x50 + + li $rounds,10 + b Ldone + +.align 4 +L192: + lvx $tmp,0,$inp + li $cnt,4 + vperm $outtail,$in0,$in0,$outperm # rotate + vsel $stage,$outhead,$outtail,$outmask + vmr $outhead,$outtail + stvx $stage,0,$out + addi $out,$out,16 + vperm $in1,$in1,$tmp,$key # align [and byte swap in LE] + vspltisb $key,8 # borrow $key + mtctr $cnt + vsububm $mask,$mask,$key # adjust the mask + +Loop192: + vperm $key,$in1,$in1,$mask # roate-n-splat + vsldoi $tmp,$zero,$in0,12 # >>32 + vcipherlast $key,$key,$rcon + + vxor $in0,$in0,$tmp + vsldoi $tmp,$zero,$tmp,12 # >>32 + vxor $in0,$in0,$tmp + vsldoi $tmp,$zero,$tmp,12 # >>32 + vxor $in0,$in0,$tmp + + vsldoi $stage,$zero,$in1,8 + vspltw $tmp,$in0,3 + vxor $tmp,$tmp,$in1 + vsldoi $in1,$zero,$in1,12 # >>32 + vadduwm $rcon,$rcon,$rcon + vxor $in1,$in1,$tmp + vxor $in0,$in0,$key + vxor $in1,$in1,$key + vsldoi $stage,$stage,$in0,8 + + vperm $key,$in1,$in1,$mask # rotate-n-splat + vsldoi $tmp,$zero,$in0,12 # >>32 + vperm $outtail,$stage,$stage,$outperm # rotate + vsel $stage,$outhead,$outtail,$outmask + vmr $outhead,$outtail + vcipherlast $key,$key,$rcon + stvx $stage,0,$out + addi $out,$out,16 + + vsldoi $stage,$in0,$in1,8 + vxor $in0,$in0,$tmp + vsldoi $tmp,$zero,$tmp,12 # >>32 + vperm $outtail,$stage,$stage,$outperm # rotate + vsel $stage,$outhead,$outtail,$outmask + vmr $outhead,$outtail + vxor $in0,$in0,$tmp + vsldoi $tmp,$zero,$tmp,12 # >>32 + vxor $in0,$in0,$tmp + stvx $stage,0,$out + addi $out,$out,16 + + vspltw $tmp,$in0,3 + vxor $tmp,$tmp,$in1 + vsldoi $in1,$zero,$in1,12 # >>32 + vadduwm $rcon,$rcon,$rcon + vxor $in1,$in1,$tmp + vxor $in0,$in0,$key + vxor $in1,$in1,$key + vperm $outtail,$in0,$in0,$outperm # rotate + vsel $stage,$outhead,$outtail,$outmask + vmr $outhead,$outtail + stvx $stage,0,$out + addi $inp,$out,15 # 15 is not typo + addi $out,$out,16 + bdnz Loop192 + + li $rounds,12 + addi $out,$out,0x20 + b Ldone + +.align 4 +L256: + lvx $tmp,0,$inp + li $cnt,7 + li $rounds,14 + vperm $outtail,$in0,$in0,$outperm # rotate + vsel $stage,$outhead,$outtail,$outmask + vmr $outhead,$outtail + stvx $stage,0,$out + addi $out,$out,16 + vperm $in1,$in1,$tmp,$key # align [and byte swap in LE] + mtctr $cnt + +Loop256: + vperm $key,$in1,$in1,$mask # rotate-n-splat + vsldoi $tmp,$zero,$in0,12 # >>32 + vperm $outtail,$in1,$in1,$outperm # rotate + vsel $stage,$outhead,$outtail,$outmask + vmr $outhead,$outtail + vcipherlast $key,$key,$rcon + stvx $stage,0,$out + addi $out,$out,16 + + vxor $in0,$in0,$tmp + vsldoi $tmp,$zero,$tmp,12 # >>32 + vxor $in0,$in0,$tmp + vsldoi $tmp,$zero,$tmp,12 # >>32 + vxor $in0,$in0,$tmp + vadduwm $rcon,$rcon,$rcon + vxor $in0,$in0,$key + vperm $outtail,$in0,$in0,$outperm # rotate + vsel $stage,$outhead,$outtail,$outmask + vmr $outhead,$outtail + stvx $stage,0,$out + addi $inp,$out,15 # 15 is not typo + addi $out,$out,16 + bdz Ldone + + vspltw $key,$in0,3 # just splat + vsldoi $tmp,$zero,$in1,12 # >>32 + vsbox $key,$key + + vxor $in1,$in1,$tmp + vsldoi $tmp,$zero,$tmp,12 # >>32 + vxor $in1,$in1,$tmp + vsldoi $tmp,$zero,$tmp,12 # >>32 + vxor $in1,$in1,$tmp + + vxor $in1,$in1,$key + b Loop256 + +.align 4 +Ldone: + lvx $in1,0,$inp # redundant in aligned case + vsel $in1,$outhead,$in1,$outmask + stvx $in1,0,$inp + li $ptr,0 + mtspr 256,$vrsave + stw $rounds,0($out) + +Lenc_key_abort: + mr r3,$ptr + blr + .long 0 + .byte 0,12,0x14,1,0,0,3,0 + .long 0 +.size .${prefix}_set_encrypt_key,.-.${prefix}_set_encrypt_key + +.globl .${prefix}_set_decrypt_key +.align 5 +.${prefix}_set_decrypt_key: + $STU $sp,-$FRAME($sp) + mflr r10 + $PUSH r10,$FRAME+$LRSAVE($sp) + bl Lset_encrypt_key + mtlr r10 + + cmpwi r3,0 + bne- Ldec_key_abort + + slwi $cnt,$rounds,4 + subi $inp,$out,240 # first round key + srwi $rounds,$rounds,1 + add $out,$inp,$cnt # last round key + mtctr $rounds + +Ldeckey: + lwz r0, 0($inp) + lwz r6, 4($inp) + lwz r7, 8($inp) + lwz r8, 12($inp) + addi $inp,$inp,16 + lwz r9, 0($out) + lwz r10,4($out) + lwz r11,8($out) + lwz r12,12($out) + stw r0, 0($out) + stw r6, 4($out) + stw r7, 8($out) + stw r8, 12($out) + subi $out,$out,16 + stw r9, -16($inp) + stw r10,-12($inp) + stw r11,-8($inp) + stw r12,-4($inp) + bdnz Ldeckey + + xor r3,r3,r3 # return value +Ldec_key_abort: + addi $sp,$sp,$FRAME + blr + .long 0 + .byte 0,12,4,1,0x80,0,3,0 + .long 0 +.size .${prefix}_set_decrypt_key,.-.${prefix}_set_decrypt_key +___ +}}} +######################################################################### +{{{ # Single block en- and decrypt procedures # +sub gen_block () { +my $dir = shift; +my $n = $dir eq "de" ? "n" : ""; +my ($inp,$out,$key,$rounds,$idx)=map("r$_",(3..7)); + +$code.=<<___; +.globl .${prefix}_${dir}crypt +.align 5 +.${prefix}_${dir}crypt: + lwz $rounds,240($key) + lis r0,0xfc00 + mfspr $vrsave,256 + li $idx,15 # 15 is not typo + mtspr 256,r0 + + lvx v0,0,$inp + neg r11,$out + lvx v1,$idx,$inp + lvsl v2,0,$inp # inpperm + le?vspltisb v4,0x0f + ?lvsl v3,0,r11 # outperm + le?vxor v2,v2,v4 + li $idx,16 + vperm v0,v0,v1,v2 # align [and byte swap in LE] + lvx v1,0,$key + ?lvsl v5,0,$key # keyperm + srwi $rounds,$rounds,1 + lvx v2,$idx,$key + addi $idx,$idx,16 + subi $rounds,$rounds,1 + ?vperm v1,v1,v2,v5 # align round key + + vxor v0,v0,v1 + lvx v1,$idx,$key + addi $idx,$idx,16 + mtctr $rounds + +Loop_${dir}c: + ?vperm v2,v2,v1,v5 + v${n}cipher v0,v0,v2 + lvx v2,$idx,$key + addi $idx,$idx,16 + ?vperm v1,v1,v2,v5 + v${n}cipher v0,v0,v1 + lvx v1,$idx,$key + addi $idx,$idx,16 + bdnz Loop_${dir}c + + ?vperm v2,v2,v1,v5 + v${n}cipher v0,v0,v2 + lvx v2,$idx,$key + ?vperm v1,v1,v2,v5 + v${n}cipherlast v0,v0,v1 + + vspltisb v2,-1 + vxor v1,v1,v1 + li $idx,15 # 15 is not typo + ?vperm v2,v1,v2,v3 # outmask + le?vxor v3,v3,v4 + lvx v1,0,$out # outhead + vperm v0,v0,v0,v3 # rotate [and byte swap in LE] + vsel v1,v1,v0,v2 + lvx v4,$idx,$out + stvx v1,0,$out + vsel v0,v0,v4,v2 + stvx v0,$idx,$out + + mtspr 256,$vrsave + blr + .long 0 + .byte 0,12,0x14,0,0,0,3,0 + .long 0 +.size .${prefix}_${dir}crypt,.-.${prefix}_${dir}crypt +___ +} +&gen_block("en"); +&gen_block("de"); +}}} +######################################################################### +{{{ # CBC en- and decrypt procedures # +my ($inp,$out,$len,$key,$ivp,$enc,$rounds,$idx)=map("r$_",(3..10)); +my ($rndkey0,$rndkey1,$inout,$tmp)= map("v$_",(0..3)); +my ($ivec,$inptail,$inpperm,$outhead,$outperm,$outmask,$keyperm)= + map("v$_",(4..10)); +$code.=<<___; +.globl .${prefix}_cbc_encrypt +.align 5 +.${prefix}_cbc_encrypt: + ${UCMP}i $len,16 + bltlr- + + cmpwi $enc,0 # test direction + lis r0,0xffe0 + mfspr $vrsave,256 + mtspr 256,r0 + + li $idx,15 + vxor $rndkey0,$rndkey0,$rndkey0 + le?vspltisb $tmp,0x0f + + lvx $ivec,0,$ivp # load [unaligned] iv + lvsl $inpperm,0,$ivp + lvx $inptail,$idx,$ivp + le?vxor $inpperm,$inpperm,$tmp + vperm $ivec,$ivec,$inptail,$inpperm + + neg r11,$inp + ?lvsl $keyperm,0,$key # prepare for unaligned key + lwz $rounds,240($key) + + lvsr $inpperm,0,r11 # prepare for unaligned load + lvx $inptail,0,$inp + addi $inp,$inp,15 # 15 is not typo + le?vxor $inpperm,$inpperm,$tmp + + ?lvsr $outperm,0,$out # prepare for unaligned store + vspltisb $outmask,-1 + lvx $outhead,0,$out + ?vperm $outmask,$rndkey0,$outmask,$outperm + le?vxor $outperm,$outperm,$tmp + + srwi $rounds,$rounds,1 + li $idx,16 + subi $rounds,$rounds,1 + beq Lcbc_dec + +Lcbc_enc: + vmr $inout,$inptail + lvx $inptail,0,$inp + addi $inp,$inp,16 + mtctr $rounds + subi $len,$len,16 # len-=16 + + lvx $rndkey0,0,$key + vperm $inout,$inout,$inptail,$inpperm + lvx $rndkey1,$idx,$key + addi $idx,$idx,16 + ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm + vxor $inout,$inout,$rndkey0 + lvx $rndkey0,$idx,$key + addi $idx,$idx,16 + vxor $inout,$inout,$ivec + +Loop_cbc_enc: + ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm + vcipher $inout,$inout,$rndkey1 + lvx $rndkey1,$idx,$key + addi $idx,$idx,16 + ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm + vcipher $inout,$inout,$rndkey0 + lvx $rndkey0,$idx,$key + addi $idx,$idx,16 + bdnz Loop_cbc_enc + + ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm + vcipher $inout,$inout,$rndkey1 + lvx $rndkey1,$idx,$key + li $idx,16 + ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm + vcipherlast $ivec,$inout,$rndkey0 + ${UCMP}i $len,16 + + vperm $tmp,$ivec,$ivec,$outperm + vsel $inout,$outhead,$tmp,$outmask + vmr $outhead,$tmp + stvx $inout,0,$out + addi $out,$out,16 + bge Lcbc_enc + + b Lcbc_done + +.align 4 +Lcbc_dec: + ${UCMP}i $len,128 + bge _aesp8_cbc_decrypt8x + vmr $tmp,$inptail + lvx $inptail,0,$inp + addi $inp,$inp,16 + mtctr $rounds + subi $len,$len,16 # len-=16 + + lvx $rndkey0,0,$key + vperm $tmp,$tmp,$inptail,$inpperm + lvx $rndkey1,$idx,$key + addi $idx,$idx,16 + ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm + vxor $inout,$tmp,$rndkey0 + lvx $rndkey0,$idx,$key + addi $idx,$idx,16 + +Loop_cbc_dec: + ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm + vncipher $inout,$inout,$rndkey1 + lvx $rndkey1,$idx,$key + addi $idx,$idx,16 + ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm + vncipher $inout,$inout,$rndkey0 + lvx $rndkey0,$idx,$key + addi $idx,$idx,16 + bdnz Loop_cbc_dec + + ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm + vncipher $inout,$inout,$rndkey1 + lvx $rndkey1,$idx,$key + li $idx,16 + ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm + vncipherlast $inout,$inout,$rndkey0 + ${UCMP}i $len,16 + + vxor $inout,$inout,$ivec + vmr $ivec,$tmp + vperm $tmp,$inout,$inout,$outperm + vsel $inout,$outhead,$tmp,$outmask + vmr $outhead,$tmp + stvx $inout,0,$out + addi $out,$out,16 + bge Lcbc_dec + +Lcbc_done: + addi $out,$out,-1 + lvx $inout,0,$out # redundant in aligned case + vsel $inout,$outhead,$inout,$outmask + stvx $inout,0,$out + + neg $enc,$ivp # write [unaligned] iv + li $idx,15 # 15 is not typo + vxor $rndkey0,$rndkey0,$rndkey0 + vspltisb $outmask,-1 + le?vspltisb $tmp,0x0f + ?lvsl $outperm,0,$enc + ?vperm $outmask,$rndkey0,$outmask,$outperm + le?vxor $outperm,$outperm,$tmp + lvx $outhead,0,$ivp + vperm $ivec,$ivec,$ivec,$outperm + vsel $inout,$outhead,$ivec,$outmask + lvx $inptail,$idx,$ivp + stvx $inout,0,$ivp + vsel $inout,$ivec,$inptail,$outmask + stvx $inout,$idx,$ivp + + mtspr 256,$vrsave + blr + .long 0 + .byte 0,12,0x14,0,0,0,6,0 + .long 0 +___ +######################################################################### +{{ # Optimized CBC decrypt procedure # +my $key_="r11"; +my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,8,26..31)); + $x00=0 if ($flavour =~ /osx/); +my ($in0, $in1, $in2, $in3, $in4, $in5, $in6, $in7 )=map("v$_",(0..3,10..13)); +my ($out0,$out1,$out2,$out3,$out4,$out5,$out6,$out7)=map("v$_",(14..21)); +my $rndkey0="v23"; # v24-v25 rotating buffer for first found keys + # v26-v31 last 6 round keys +my ($tmp,$keyperm)=($in3,$in4); # aliases with "caller", redundant assignment + +$code.=<<___; +.align 5 +_aesp8_cbc_decrypt8x: + $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp) + li r10,`$FRAME+8*16+15` + li r11,`$FRAME+8*16+31` + stvx v20,r10,$sp # ABI says so + addi r10,r10,32 + stvx v21,r11,$sp + addi r11,r11,32 + stvx v22,r10,$sp + addi r10,r10,32 + stvx v23,r11,$sp + addi r11,r11,32 + stvx v24,r10,$sp + addi r10,r10,32 + stvx v25,r11,$sp + addi r11,r11,32 + stvx v26,r10,$sp + addi r10,r10,32 + stvx v27,r11,$sp + addi r11,r11,32 + stvx v28,r10,$sp + addi r10,r10,32 + stvx v29,r11,$sp + addi r11,r11,32 + stvx v30,r10,$sp + stvx v31,r11,$sp + li r0,-1 + stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave + li $x10,0x10 + $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp) + li $x20,0x20 + $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp) + li $x30,0x30 + $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp) + li $x40,0x40 + $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp) + li $x50,0x50 + $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp) + li $x60,0x60 + $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp) + li $x70,0x70 + mtspr 256,r0 + + subi $rounds,$rounds,3 # -4 in total + subi $len,$len,128 # bias + + lvx $rndkey0,$x00,$key # load key schedule + lvx v30,$x10,$key + addi $key,$key,0x20 + lvx v31,$x00,$key + ?vperm $rndkey0,$rndkey0,v30,$keyperm + addi $key_,$sp,$FRAME+15 + mtctr $rounds + +Load_cbc_dec_key: + ?vperm v24,v30,v31,$keyperm + lvx v30,$x10,$key + addi $key,$key,0x20 + stvx v24,$x00,$key_ # off-load round[1] + ?vperm v25,v31,v30,$keyperm + lvx v31,$x00,$key + stvx v25,$x10,$key_ # off-load round[2] + addi $key_,$key_,0x20 + bdnz Load_cbc_dec_key + + lvx v26,$x10,$key + ?vperm v24,v30,v31,$keyperm + lvx v27,$x20,$key + stvx v24,$x00,$key_ # off-load round[3] + ?vperm v25,v31,v26,$keyperm + lvx v28,$x30,$key + stvx v25,$x10,$key_ # off-load round[4] + addi $key_,$sp,$FRAME+15 # rewind $key_ + ?vperm v26,v26,v27,$keyperm + lvx v29,$x40,$key + ?vperm v27,v27,v28,$keyperm + lvx v30,$x50,$key + ?vperm v28,v28,v29,$keyperm + lvx v31,$x60,$key + ?vperm v29,v29,v30,$keyperm + lvx $out0,$x70,$key # borrow $out0 + ?vperm v30,v30,v31,$keyperm + lvx v24,$x00,$key_ # pre-load round[1] + ?vperm v31,v31,$out0,$keyperm + lvx v25,$x10,$key_ # pre-load round[2] + + #lvx $inptail,0,$inp # "caller" already did this + #addi $inp,$inp,15 # 15 is not typo + subi $inp,$inp,15 # undo "caller" + + le?li $idx,8 + lvx_u $in0,$x00,$inp # load first 8 "words" + le?lvsl $inpperm,0,$idx + le?vspltisb $tmp,0x0f + lvx_u $in1,$x10,$inp + le?vxor $inpperm,$inpperm,$tmp # transform for lvx_u/stvx_u + lvx_u $in2,$x20,$inp + le?vperm $in0,$in0,$in0,$inpperm + lvx_u $in3,$x30,$inp + le?vperm $in1,$in1,$in1,$inpperm + lvx_u $in4,$x40,$inp + le?vperm $in2,$in2,$in2,$inpperm + vxor $out0,$in0,$rndkey0 + lvx_u $in5,$x50,$inp + le?vperm $in3,$in3,$in3,$inpperm + vxor $out1,$in1,$rndkey0 + lvx_u $in6,$x60,$inp + le?vperm $in4,$in4,$in4,$inpperm + vxor $out2,$in2,$rndkey0 + lvx_u $in7,$x70,$inp + addi $inp,$inp,0x80 + le?vperm $in5,$in5,$in5,$inpperm + vxor $out3,$in3,$rndkey0 + le?vperm $in6,$in6,$in6,$inpperm + vxor $out4,$in4,$rndkey0 + le?vperm $in7,$in7,$in7,$inpperm + vxor $out5,$in5,$rndkey0 + vxor $out6,$in6,$rndkey0 + vxor $out7,$in7,$rndkey0 + + mtctr $rounds + b Loop_cbc_dec8x +.align 5 +Loop_cbc_dec8x: + vncipher $out0,$out0,v24 + vncipher $out1,$out1,v24 + vncipher $out2,$out2,v24 + vncipher $out3,$out3,v24 + vncipher $out4,$out4,v24 + vncipher $out5,$out5,v24 + vncipher $out6,$out6,v24 + vncipher $out7,$out7,v24 + lvx v24,$x20,$key_ # round[3] + addi $key_,$key_,0x20 + + vncipher $out0,$out0,v25 + vncipher $out1,$out1,v25 + vncipher $out2,$out2,v25 + vncipher $out3,$out3,v25 + vncipher $out4,$out4,v25 + vncipher $out5,$out5,v25 + vncipher $out6,$out6,v25 + vncipher $out7,$out7,v25 + lvx v25,$x10,$key_ # round[4] + bdnz Loop_cbc_dec8x + + subic $len,$len,128 # $len-=128 + vncipher $out0,$out0,v24 + vncipher $out1,$out1,v24 + vncipher $out2,$out2,v24 + vncipher $out3,$out3,v24 + vncipher $out4,$out4,v24 + vncipher $out5,$out5,v24 + vncipher $out6,$out6,v24 + vncipher $out7,$out7,v24 + + subfe. r0,r0,r0 # borrow?-1:0 + vncipher $out0,$out0,v25 + vncipher $out1,$out1,v25 + vncipher $out2,$out2,v25 + vncipher $out3,$out3,v25 + vncipher $out4,$out4,v25 + vncipher $out5,$out5,v25 + vncipher $out6,$out6,v25 + vncipher $out7,$out7,v25 + + and r0,r0,$len + vncipher $out0,$out0,v26 + vncipher $out1,$out1,v26 + vncipher $out2,$out2,v26 + vncipher $out3,$out3,v26 + vncipher $out4,$out4,v26 + vncipher $out5,$out5,v26 + vncipher $out6,$out6,v26 + vncipher $out7,$out7,v26 + + add $inp,$inp,r0 # $inp is adjusted in such + # way that at exit from the + # loop inX-in7 are loaded + # with last "words" + vncipher $out0,$out0,v27 + vncipher $out1,$out1,v27 + vncipher $out2,$out2,v27 + vncipher $out3,$out3,v27 + vncipher $out4,$out4,v27 + vncipher $out5,$out5,v27 + vncipher $out6,$out6,v27 + vncipher $out7,$out7,v27 + + addi $key_,$sp,$FRAME+15 # rewind $key_ + vncipher $out0,$out0,v28 + vncipher $out1,$out1,v28 + vncipher $out2,$out2,v28 + vncipher $out3,$out3,v28 + vncipher $out4,$out4,v28 + vncipher $out5,$out5,v28 + vncipher $out6,$out6,v28 + vncipher $out7,$out7,v28 + lvx v24,$x00,$key_ # re-pre-load round[1] + + vncipher $out0,$out0,v29 + vncipher $out1,$out1,v29 + vncipher $out2,$out2,v29 + vncipher $out3,$out3,v29 + vncipher $out4,$out4,v29 + vncipher $out5,$out5,v29 + vncipher $out6,$out6,v29 + vncipher $out7,$out7,v29 + lvx v25,$x10,$key_ # re-pre-load round[2] + + vncipher $out0,$out0,v30 + vxor $ivec,$ivec,v31 # xor with last round key + vncipher $out1,$out1,v30 + vxor $in0,$in0,v31 + vncipher $out2,$out2,v30 + vxor $in1,$in1,v31 + vncipher $out3,$out3,v30 + vxor $in2,$in2,v31 + vncipher $out4,$out4,v30 + vxor $in3,$in3,v31 + vncipher $out5,$out5,v30 + vxor $in4,$in4,v31 + vncipher $out6,$out6,v30 + vxor $in5,$in5,v31 + vncipher $out7,$out7,v30 + vxor $in6,$in6,v31 + + vncipherlast $out0,$out0,$ivec + vncipherlast $out1,$out1,$in0 + lvx_u $in0,$x00,$inp # load next input block + vncipherlast $out2,$out2,$in1 + lvx_u $in1,$x10,$inp + vncipherlast $out3,$out3,$in2 + le?vperm $in0,$in0,$in0,$inpperm + lvx_u $in2,$x20,$inp + vncipherlast $out4,$out4,$in3 + le?vperm $in1,$in1,$in1,$inpperm + lvx_u $in3,$x30,$inp + vncipherlast $out5,$out5,$in4 + le?vperm $in2,$in2,$in2,$inpperm + lvx_u $in4,$x40,$inp + vncipherlast $out6,$out6,$in5 + le?vperm $in3,$in3,$in3,$inpperm + lvx_u $in5,$x50,$inp + vncipherlast $out7,$out7,$in6 + le?vperm $in4,$in4,$in4,$inpperm + lvx_u $in6,$x60,$inp + vmr $ivec,$in7 + le?vperm $in5,$in5,$in5,$inpperm + lvx_u $in7,$x70,$inp + addi $inp,$inp,0x80 + + le?vperm $out0,$out0,$out0,$inpperm + le?vperm $out1,$out1,$out1,$inpperm + stvx_u $out0,$x00,$out + le?vperm $in6,$in6,$in6,$inpperm + vxor $out0,$in0,$rndkey0 + le?vperm $out2,$out2,$out2,$inpperm + stvx_u $out1,$x10,$out + le?vperm $in7,$in7,$in7,$inpperm + vxor $out1,$in1,$rndkey0 + le?vperm $out3,$out3,$out3,$inpperm + stvx_u $out2,$x20,$out + vxor $out2,$in2,$rndkey0 + le?vperm $out4,$out4,$out4,$inpperm + stvx_u $out3,$x30,$out + vxor $out3,$in3,$rndkey0 + le?vperm $out5,$out5,$out5,$inpperm + stvx_u $out4,$x40,$out + vxor $out4,$in4,$rndkey0 + le?vperm $out6,$out6,$out6,$inpperm + stvx_u $out5,$x50,$out + vxor $out5,$in5,$rndkey0 + le?vperm $out7,$out7,$out7,$inpperm + stvx_u $out6,$x60,$out + vxor $out6,$in6,$rndkey0 + stvx_u $out7,$x70,$out + addi $out,$out,0x80 + vxor $out7,$in7,$rndkey0 + + mtctr $rounds + beq Loop_cbc_dec8x # did $len-=128 borrow? + + addic. $len,$len,128 + beq Lcbc_dec8x_done + nop + nop + +Loop_cbc_dec8x_tail: # up to 7 "words" tail... + vncipher $out1,$out1,v24 + vncipher $out2,$out2,v24 + vncipher $out3,$out3,v24 + vncipher $out4,$out4,v24 + vncipher $out5,$out5,v24 + vncipher $out6,$out6,v24 + vncipher $out7,$out7,v24 + lvx v24,$x20,$key_ # round[3] + addi $key_,$key_,0x20 + + vncipher $out1,$out1,v25 + vncipher $out2,$out2,v25 + vncipher $out3,$out3,v25 + vncipher $out4,$out4,v25 + vncipher $out5,$out5,v25 + vncipher $out6,$out6,v25 + vncipher $out7,$out7,v25 + lvx v25,$x10,$key_ # round[4] + bdnz Loop_cbc_dec8x_tail + + vncipher $out1,$out1,v24 + vncipher $out2,$out2,v24 + vncipher $out3,$out3,v24 + vncipher $out4,$out4,v24 + vncipher $out5,$out5,v24 + vncipher $out6,$out6,v24 + vncipher $out7,$out7,v24 + + vncipher $out1,$out1,v25 + vncipher $out2,$out2,v25 + vncipher $out3,$out3,v25 + vncipher $out4,$out4,v25 + vncipher $out5,$out5,v25 + vncipher $out6,$out6,v25 + vncipher $out7,$out7,v25 + + vncipher $out1,$out1,v26 + vncipher $out2,$out2,v26 + vncipher $out3,$out3,v26 + vncipher $out4,$out4,v26 + vncipher $out5,$out5,v26 + vncipher $out6,$out6,v26 + vncipher $out7,$out7,v26 + + vncipher $out1,$out1,v27 + vncipher $out2,$out2,v27 + vncipher $out3,$out3,v27 + vncipher $out4,$out4,v27 + vncipher $out5,$out5,v27 + vncipher $out6,$out6,v27 + vncipher $out7,$out7,v27 + + vncipher $out1,$out1,v28 + vncipher $out2,$out2,v28 + vncipher $out3,$out3,v28 + vncipher $out4,$out4,v28 + vncipher $out5,$out5,v28 + vncipher $out6,$out6,v28 + vncipher $out7,$out7,v28 + + vncipher $out1,$out1,v29 + vncipher $out2,$out2,v29 + vncipher $out3,$out3,v29 + vncipher $out4,$out4,v29 + vncipher $out5,$out5,v29 + vncipher $out6,$out6,v29 + vncipher $out7,$out7,v29 + + vncipher $out1,$out1,v30 + vxor $ivec,$ivec,v31 # last round key + vncipher $out2,$out2,v30 + vxor $in1,$in1,v31 + vncipher $out3,$out3,v30 + vxor $in2,$in2,v31 + vncipher $out4,$out4,v30 + vxor $in3,$in3,v31 + vncipher $out5,$out5,v30 + vxor $in4,$in4,v31 + vncipher $out6,$out6,v30 + vxor $in5,$in5,v31 + vncipher $out7,$out7,v30 + vxor $in6,$in6,v31 + + cmplwi $len,32 # switch($len) + blt Lcbc_dec8x_one + nop + beq Lcbc_dec8x_two + cmplwi $len,64 + blt Lcbc_dec8x_three + nop + beq Lcbc_dec8x_four + cmplwi $len,96 + blt Lcbc_dec8x_five + nop + beq Lcbc_dec8x_six + +Lcbc_dec8x_seven: + vncipherlast $out1,$out1,$ivec + vncipherlast $out2,$out2,$in1 + vncipherlast $out3,$out3,$in2 + vncipherlast $out4,$out4,$in3 + vncipherlast $out5,$out5,$in4 + vncipherlast $out6,$out6,$in5 + vncipherlast $out7,$out7,$in6 + vmr $ivec,$in7 + + le?vperm $out1,$out1,$out1,$inpperm + le?vperm $out2,$out2,$out2,$inpperm + stvx_u $out1,$x00,$out + le?vperm $out3,$out3,$out3,$inpperm + stvx_u $out2,$x10,$out + le?vperm $out4,$out4,$out4,$inpperm + stvx_u $out3,$x20,$out + le?vperm $out5,$out5,$out5,$inpperm + stvx_u $out4,$x30,$out + le?vperm $out6,$out6,$out6,$inpperm + stvx_u $out5,$x40,$out + le?vperm $out7,$out7,$out7,$inpperm + stvx_u $out6,$x50,$out + stvx_u $out7,$x60,$out + addi $out,$out,0x70 + b Lcbc_dec8x_done + +.align 5 +Lcbc_dec8x_six: + vncipherlast $out2,$out2,$ivec + vncipherlast $out3,$out3,$in2 + vncipherlast $out4,$out4,$in3 + vncipherlast $out5,$out5,$in4 + vncipherlast $out6,$out6,$in5 + vncipherlast $out7,$out7,$in6 + vmr $ivec,$in7 + + le?vperm $out2,$out2,$out2,$inpperm + le?vperm $out3,$out3,$out3,$inpperm + stvx_u $out2,$x00,$out + le?vperm $out4,$out4,$out4,$inpperm + stvx_u $out3,$x10,$out + le?vperm $out5,$out5,$out5,$inpperm + stvx_u $out4,$x20,$out + le?vperm $out6,$out6,$out6,$inpperm + stvx_u $out5,$x30,$out + le?vperm $out7,$out7,$out7,$inpperm + stvx_u $out6,$x40,$out + stvx_u $out7,$x50,$out + addi $out,$out,0x60 + b Lcbc_dec8x_done + +.align 5 +Lcbc_dec8x_five: + vncipherlast $out3,$out3,$ivec + vncipherlast $out4,$out4,$in3 + vncipherlast $out5,$out5,$in4 + vncipherlast $out6,$out6,$in5 + vncipherlast $out7,$out7,$in6 + vmr $ivec,$in7 + + le?vperm $out3,$out3,$out3,$inpperm + le?vperm $out4,$out4,$out4,$inpperm + stvx_u $out3,$x00,$out + le?vperm $out5,$out5,$out5,$inpperm + stvx_u $out4,$x10,$out + le?vperm $out6,$out6,$out6,$inpperm + stvx_u $out5,$x20,$out + le?vperm $out7,$out7,$out7,$inpperm + stvx_u $out6,$x30,$out + stvx_u $out7,$x40,$out + addi $out,$out,0x50 + b Lcbc_dec8x_done + +.align 5 +Lcbc_dec8x_four: + vncipherlast $out4,$out4,$ivec + vncipherlast $out5,$out5,$in4 + vncipherlast $out6,$out6,$in5 + vncipherlast $out7,$out7,$in6 + vmr $ivec,$in7 + + le?vperm $out4,$out4,$out4,$inpperm + le?vperm $out5,$out5,$out5,$inpperm + stvx_u $out4,$x00,$out + le?vperm $out6,$out6,$out6,$inpperm + stvx_u $out5,$x10,$out + le?vperm $out7,$out7,$out7,$inpperm + stvx_u $out6,$x20,$out + stvx_u $out7,$x30,$out + addi $out,$out,0x40 + b Lcbc_dec8x_done + +.align 5 +Lcbc_dec8x_three: + vncipherlast $out5,$out5,$ivec + vncipherlast $out6,$out6,$in5 + vncipherlast $out7,$out7,$in6 + vmr $ivec,$in7 + + le?vperm $out5,$out5,$out5,$inpperm + le?vperm $out6,$out6,$out6,$inpperm + stvx_u $out5,$x00,$out + le?vperm $out7,$out7,$out7,$inpperm + stvx_u $out6,$x10,$out + stvx_u $out7,$x20,$out + addi $out,$out,0x30 + b Lcbc_dec8x_done + +.align 5 +Lcbc_dec8x_two: + vncipherlast $out6,$out6,$ivec + vncipherlast $out7,$out7,$in6 + vmr $ivec,$in7 + + le?vperm $out6,$out6,$out6,$inpperm + le?vperm $out7,$out7,$out7,$inpperm + stvx_u $out6,$x00,$out + stvx_u $out7,$x10,$out + addi $out,$out,0x20 + b Lcbc_dec8x_done + +.align 5 +Lcbc_dec8x_one: + vncipherlast $out7,$out7,$ivec + vmr $ivec,$in7 + + le?vperm $out7,$out7,$out7,$inpperm + stvx_u $out7,0,$out + addi $out,$out,0x10 + +Lcbc_dec8x_done: + le?vperm $ivec,$ivec,$ivec,$inpperm + stvx_u $ivec,0,$ivp # write [unaligned] iv + + li r10,`$FRAME+15` + li r11,`$FRAME+31` + stvx $inpperm,r10,$sp # wipe copies of round keys + addi r10,r10,32 + stvx $inpperm,r11,$sp + addi r11,r11,32 + stvx $inpperm,r10,$sp + addi r10,r10,32 + stvx $inpperm,r11,$sp + addi r11,r11,32 + stvx $inpperm,r10,$sp + addi r10,r10,32 + stvx $inpperm,r11,$sp + addi r11,r11,32 + stvx $inpperm,r10,$sp + addi r10,r10,32 + stvx $inpperm,r11,$sp + addi r11,r11,32 + + mtspr 256,$vrsave + lvx v20,r10,$sp # ABI says so + addi r10,r10,32 + lvx v21,r11,$sp + addi r11,r11,32 + lvx v22,r10,$sp + addi r10,r10,32 + lvx v23,r11,$sp + addi r11,r11,32 + lvx v24,r10,$sp + addi r10,r10,32 + lvx v25,r11,$sp + addi r11,r11,32 + lvx v26,r10,$sp + addi r10,r10,32 + lvx v27,r11,$sp + addi r11,r11,32 + lvx v28,r10,$sp + addi r10,r10,32 + lvx v29,r11,$sp + addi r11,r11,32 + lvx v30,r10,$sp + lvx v31,r11,$sp + $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp) + $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp) + $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp) + $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp) + $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp) + $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp) + addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T` + blr + .long 0 + .byte 0,12,0x04,0,0x80,6,6,0 + .long 0 +.size .${prefix}_cbc_encrypt,.-.${prefix}_cbc_encrypt +___ +}} }}} + +######################################################################### +{{{ # CTR procedure[s] # +my ($inp,$out,$len,$key,$ivp,$x10,$rounds,$idx)=map("r$_",(3..10)); +my ($rndkey0,$rndkey1,$inout,$tmp)= map("v$_",(0..3)); +my ($ivec,$inptail,$inpperm,$outhead,$outperm,$outmask,$keyperm,$one)= + map("v$_",(4..11)); +my $dat=$tmp; + +$code.=<<___; +.globl .${prefix}_ctr32_encrypt_blocks +.align 5 +.${prefix}_ctr32_encrypt_blocks: + ${UCMP}i $len,1 + bltlr- + + lis r0,0xfff0 + mfspr $vrsave,256 + mtspr 256,r0 + + li $idx,15 + vxor $rndkey0,$rndkey0,$rndkey0 + le?vspltisb $tmp,0x0f + + lvx $ivec,0,$ivp # load [unaligned] iv + lvsl $inpperm,0,$ivp + lvx $inptail,$idx,$ivp + vspltisb $one,1 + le?vxor $inpperm,$inpperm,$tmp + vperm $ivec,$ivec,$inptail,$inpperm + vsldoi $one,$rndkey0,$one,1 + + neg r11,$inp + ?lvsl $keyperm,0,$key # prepare for unaligned key + lwz $rounds,240($key) + + lvsr $inpperm,0,r11 # prepare for unaligned load + lvx $inptail,0,$inp + addi $inp,$inp,15 # 15 is not typo + le?vxor $inpperm,$inpperm,$tmp + + srwi $rounds,$rounds,1 + li $idx,16 + subi $rounds,$rounds,1 + + ${UCMP}i $len,8 + bge _aesp8_ctr32_encrypt8x + + ?lvsr $outperm,0,$out # prepare for unaligned store + vspltisb $outmask,-1 + lvx $outhead,0,$out + ?vperm $outmask,$rndkey0,$outmask,$outperm + le?vxor $outperm,$outperm,$tmp + + lvx $rndkey0,0,$key + mtctr $rounds + lvx $rndkey1,$idx,$key + addi $idx,$idx,16 + ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm + vxor $inout,$ivec,$rndkey0 + lvx $rndkey0,$idx,$key + addi $idx,$idx,16 + b Loop_ctr32_enc + +.align 5 +Loop_ctr32_enc: + ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm + vcipher $inout,$inout,$rndkey1 + lvx $rndkey1,$idx,$key + addi $idx,$idx,16 + ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm + vcipher $inout,$inout,$rndkey0 + lvx $rndkey0,$idx,$key + addi $idx,$idx,16 + bdnz Loop_ctr32_enc + + vadduwm $ivec,$ivec,$one + vmr $dat,$inptail + lvx $inptail,0,$inp + addi $inp,$inp,16 + subic. $len,$len,1 # blocks-- + + ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm + vcipher $inout,$inout,$rndkey1 + lvx $rndkey1,$idx,$key + vperm $dat,$dat,$inptail,$inpperm + li $idx,16 + ?vperm $rndkey1,$rndkey0,$rndkey1,$keyperm + lvx $rndkey0,0,$key + vxor $dat,$dat,$rndkey1 # last round key + vcipherlast $inout,$inout,$dat + + lvx $rndkey1,$idx,$key + addi $idx,$idx,16 + vperm $inout,$inout,$inout,$outperm + vsel $dat,$outhead,$inout,$outmask + mtctr $rounds + ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm + vmr $outhead,$inout + vxor $inout,$ivec,$rndkey0 + lvx $rndkey0,$idx,$key + addi $idx,$idx,16 + stvx $dat,0,$out + addi $out,$out,16 + bne Loop_ctr32_enc + + addi $out,$out,-1 + lvx $inout,0,$out # redundant in aligned case + vsel $inout,$outhead,$inout,$outmask + stvx $inout,0,$out + + mtspr 256,$vrsave + blr + .long 0 + .byte 0,12,0x14,0,0,0,6,0 + .long 0 +___ +######################################################################### +{{ # Optimized CTR procedure # +my $key_="r11"; +my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,8,26..31)); + $x00=0 if ($flavour =~ /osx/); +my ($in0, $in1, $in2, $in3, $in4, $in5, $in6, $in7 )=map("v$_",(0..3,10,12..14)); +my ($out0,$out1,$out2,$out3,$out4,$out5,$out6,$out7)=map("v$_",(15..22)); +my $rndkey0="v23"; # v24-v25 rotating buffer for first found keys + # v26-v31 last 6 round keys +my ($tmp,$keyperm)=($in3,$in4); # aliases with "caller", redundant assignment +my ($two,$three,$four)=($outhead,$outperm,$outmask); + +$code.=<<___; +.align 5 +_aesp8_ctr32_encrypt8x: + $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp) + li r10,`$FRAME+8*16+15` + li r11,`$FRAME+8*16+31` + stvx v20,r10,$sp # ABI says so + addi r10,r10,32 + stvx v21,r11,$sp + addi r11,r11,32 + stvx v22,r10,$sp + addi r10,r10,32 + stvx v23,r11,$sp + addi r11,r11,32 + stvx v24,r10,$sp + addi r10,r10,32 + stvx v25,r11,$sp + addi r11,r11,32 + stvx v26,r10,$sp + addi r10,r10,32 + stvx v27,r11,$sp + addi r11,r11,32 + stvx v28,r10,$sp + addi r10,r10,32 + stvx v29,r11,$sp + addi r11,r11,32 + stvx v30,r10,$sp + stvx v31,r11,$sp + li r0,-1 + stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave + li $x10,0x10 + $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp) + li $x20,0x20 + $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp) + li $x30,0x30 + $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp) + li $x40,0x40 + $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp) + li $x50,0x50 + $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp) + li $x60,0x60 + $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp) + li $x70,0x70 + mtspr 256,r0 + + subi $rounds,$rounds,3 # -4 in total + + lvx $rndkey0,$x00,$key # load key schedule + lvx v30,$x10,$key + addi $key,$key,0x20 + lvx v31,$x00,$key + ?vperm $rndkey0,$rndkey0,v30,$keyperm + addi $key_,$sp,$FRAME+15 + mtctr $rounds + +Load_ctr32_enc_key: + ?vperm v24,v30,v31,$keyperm + lvx v30,$x10,$key + addi $key,$key,0x20 + stvx v24,$x00,$key_ # off-load round[1] + ?vperm v25,v31,v30,$keyperm + lvx v31,$x00,$key + stvx v25,$x10,$key_ # off-load round[2] + addi $key_,$key_,0x20 + bdnz Load_ctr32_enc_key + + lvx v26,$x10,$key + ?vperm v24,v30,v31,$keyperm + lvx v27,$x20,$key + stvx v24,$x00,$key_ # off-load round[3] + ?vperm v25,v31,v26,$keyperm + lvx v28,$x30,$key + stvx v25,$x10,$key_ # off-load round[4] + addi $key_,$sp,$FRAME+15 # rewind $key_ + ?vperm v26,v26,v27,$keyperm + lvx v29,$x40,$key + ?vperm v27,v27,v28,$keyperm + lvx v30,$x50,$key + ?vperm v28,v28,v29,$keyperm + lvx v31,$x60,$key + ?vperm v29,v29,v30,$keyperm + lvx $out0,$x70,$key # borrow $out0 + ?vperm v30,v30,v31,$keyperm + lvx v24,$x00,$key_ # pre-load round[1] + ?vperm v31,v31,$out0,$keyperm + lvx v25,$x10,$key_ # pre-load round[2] + + vadduwm $two,$one,$one + subi $inp,$inp,15 # undo "caller" + $SHL $len,$len,4 + + vadduwm $out1,$ivec,$one # counter values ... + vadduwm $out2,$ivec,$two + vxor $out0,$ivec,$rndkey0 # ... xored with rndkey[0] + le?li $idx,8 + vadduwm $out3,$out1,$two + vxor $out1,$out1,$rndkey0 + le?lvsl $inpperm,0,$idx + vadduwm $out4,$out2,$two + vxor $out2,$out2,$rndkey0 + le?vspltisb $tmp,0x0f + vadduwm $out5,$out3,$two + vxor $out3,$out3,$rndkey0 + le?vxor $inpperm,$inpperm,$tmp # transform for lvx_u/stvx_u + vadduwm $out6,$out4,$two + vxor $out4,$out4,$rndkey0 + vadduwm $out7,$out5,$two + vxor $out5,$out5,$rndkey0 + vadduwm $ivec,$out6,$two # next counter value + vxor $out6,$out6,$rndkey0 + vxor $out7,$out7,$rndkey0 + + mtctr $rounds + b Loop_ctr32_enc8x +.align 5 +Loop_ctr32_enc8x: + vcipher $out0,$out0,v24 + vcipher $out1,$out1,v24 + vcipher $out2,$out2,v24 + vcipher $out3,$out3,v24 + vcipher $out4,$out4,v24 + vcipher $out5,$out5,v24 + vcipher $out6,$out6,v24 + vcipher $out7,$out7,v24 +Loop_ctr32_enc8x_middle: + lvx v24,$x20,$key_ # round[3] + addi $key_,$key_,0x20 + + vcipher $out0,$out0,v25 + vcipher $out1,$out1,v25 + vcipher $out2,$out2,v25 + vcipher $out3,$out3,v25 + vcipher $out4,$out4,v25 + vcipher $out5,$out5,v25 + vcipher $out6,$out6,v25 + vcipher $out7,$out7,v25 + lvx v25,$x10,$key_ # round[4] + bdnz Loop_ctr32_enc8x + + subic r11,$len,256 # $len-256, borrow $key_ + vcipher $out0,$out0,v24 + vcipher $out1,$out1,v24 + vcipher $out2,$out2,v24 + vcipher $out3,$out3,v24 + vcipher $out4,$out4,v24 + vcipher $out5,$out5,v24 + vcipher $out6,$out6,v24 + vcipher $out7,$out7,v24 + + subfe r0,r0,r0 # borrow?-1:0 + vcipher $out0,$out0,v25 + vcipher $out1,$out1,v25 + vcipher $out2,$out2,v25 + vcipher $out3,$out3,v25 + vcipher $out4,$out4,v25 + vcipher $out5,$out5,v25 + vcipher $out6,$out6,v25 + vcipher $out7,$out7,v25 + + and r0,r0,r11 + addi $key_,$sp,$FRAME+15 # rewind $key_ + vcipher $out0,$out0,v26 + vcipher $out1,$out1,v26 + vcipher $out2,$out2,v26 + vcipher $out3,$out3,v26 + vcipher $out4,$out4,v26 + vcipher $out5,$out5,v26 + vcipher $out6,$out6,v26 + vcipher $out7,$out7,v26 + lvx v24,$x00,$key_ # re-pre-load round[1] + + subic $len,$len,129 # $len-=129 + vcipher $out0,$out0,v27 + addi $len,$len,1 # $len-=128 really + vcipher $out1,$out1,v27 + vcipher $out2,$out2,v27 + vcipher $out3,$out3,v27 + vcipher $out4,$out4,v27 + vcipher $out5,$out5,v27 + vcipher $out6,$out6,v27 + vcipher $out7,$out7,v27 + lvx v25,$x10,$key_ # re-pre-load round[2] + + vcipher $out0,$out0,v28 + lvx_u $in0,$x00,$inp # load input + vcipher $out1,$out1,v28 + lvx_u $in1,$x10,$inp + vcipher $out2,$out2,v28 + lvx_u $in2,$x20,$inp + vcipher $out3,$out3,v28 + lvx_u $in3,$x30,$inp + vcipher $out4,$out4,v28 + lvx_u $in4,$x40,$inp + vcipher $out5,$out5,v28 + lvx_u $in5,$x50,$inp + vcipher $out6,$out6,v28 + lvx_u $in6,$x60,$inp + vcipher $out7,$out7,v28 + lvx_u $in7,$x70,$inp + addi $inp,$inp,0x80 + + vcipher $out0,$out0,v29 + le?vperm $in0,$in0,$in0,$inpperm + vcipher $out1,$out1,v29 + le?vperm $in1,$in1,$in1,$inpperm + vcipher $out2,$out2,v29 + le?vperm $in2,$in2,$in2,$inpperm + vcipher $out3,$out3,v29 + le?vperm $in3,$in3,$in3,$inpperm + vcipher $out4,$out4,v29 + le?vperm $in4,$in4,$in4,$inpperm + vcipher $out5,$out5,v29 + le?vperm $in5,$in5,$in5,$inpperm + vcipher $out6,$out6,v29 + le?vperm $in6,$in6,$in6,$inpperm + vcipher $out7,$out7,v29 + le?vperm $in7,$in7,$in7,$inpperm + + add $inp,$inp,r0 # $inp is adjusted in such + # way that at exit from the + # loop inX-in7 are loaded + # with last "words" + subfe. r0,r0,r0 # borrow?-1:0 + vcipher $out0,$out0,v30 + vxor $in0,$in0,v31 # xor with last round key + vcipher $out1,$out1,v30 + vxor $in1,$in1,v31 + vcipher $out2,$out2,v30 + vxor $in2,$in2,v31 + vcipher $out3,$out3,v30 + vxor $in3,$in3,v31 + vcipher $out4,$out4,v30 + vxor $in4,$in4,v31 + vcipher $out5,$out5,v30 + vxor $in5,$in5,v31 + vcipher $out6,$out6,v30 + vxor $in6,$in6,v31 + vcipher $out7,$out7,v30 + vxor $in7,$in7,v31 + + bne Lctr32_enc8x_break # did $len-129 borrow? + + vcipherlast $in0,$out0,$in0 + vcipherlast $in1,$out1,$in1 + vadduwm $out1,$ivec,$one # counter values ... + vcipherlast $in2,$out2,$in2 + vadduwm $out2,$ivec,$two + vxor $out0,$ivec,$rndkey0 # ... xored with rndkey[0] + vcipherlast $in3,$out3,$in3 + vadduwm $out3,$out1,$two + vxor $out1,$out1,$rndkey0 + vcipherlast $in4,$out4,$in4 + vadduwm $out4,$out2,$two + vxor $out2,$out2,$rndkey0 + vcipherlast $in5,$out5,$in5 + vadduwm $out5,$out3,$two + vxor $out3,$out3,$rndkey0 + vcipherlast $in6,$out6,$in6 + vadduwm $out6,$out4,$two + vxor $out4,$out4,$rndkey0 + vcipherlast $in7,$out7,$in7 + vadduwm $out7,$out5,$two + vxor $out5,$out5,$rndkey0 + le?vperm $in0,$in0,$in0,$inpperm + vadduwm $ivec,$out6,$two # next counter value + vxor $out6,$out6,$rndkey0 + le?vperm $in1,$in1,$in1,$inpperm + vxor $out7,$out7,$rndkey0 + mtctr $rounds + + vcipher $out0,$out0,v24 + stvx_u $in0,$x00,$out + le?vperm $in2,$in2,$in2,$inpperm + vcipher $out1,$out1,v24 + stvx_u $in1,$x10,$out + le?vperm $in3,$in3,$in3,$inpperm + vcipher $out2,$out2,v24 + stvx_u $in2,$x20,$out + le?vperm $in4,$in4,$in4,$inpperm + vcipher $out3,$out3,v24 + stvx_u $in3,$x30,$out + le?vperm $in5,$in5,$in5,$inpperm + vcipher $out4,$out4,v24 + stvx_u $in4,$x40,$out + le?vperm $in6,$in6,$in6,$inpperm + vcipher $out5,$out5,v24 + stvx_u $in5,$x50,$out + le?vperm $in7,$in7,$in7,$inpperm + vcipher $out6,$out6,v24 + stvx_u $in6,$x60,$out + vcipher $out7,$out7,v24 + stvx_u $in7,$x70,$out + addi $out,$out,0x80 + + b Loop_ctr32_enc8x_middle + +.align 5 +Lctr32_enc8x_break: + cmpwi $len,-0x60 + blt Lctr32_enc8x_one + nop + beq Lctr32_enc8x_two + cmpwi $len,-0x40 + blt Lctr32_enc8x_three + nop + beq Lctr32_enc8x_four + cmpwi $len,-0x20 + blt Lctr32_enc8x_five + nop + beq Lctr32_enc8x_six + cmpwi $len,0x00 + blt Lctr32_enc8x_seven + +Lctr32_enc8x_eight: + vcipherlast $out0,$out0,$in0 + vcipherlast $out1,$out1,$in1 + vcipherlast $out2,$out2,$in2 + vcipherlast $out3,$out3,$in3 + vcipherlast $out4,$out4,$in4 + vcipherlast $out5,$out5,$in5 + vcipherlast $out6,$out6,$in6 + vcipherlast $out7,$out7,$in7 + + le?vperm $out0,$out0,$out0,$inpperm + le?vperm $out1,$out1,$out1,$inpperm + stvx_u $out0,$x00,$out + le?vperm $out2,$out2,$out2,$inpperm + stvx_u $out1,$x10,$out + le?vperm $out3,$out3,$out3,$inpperm + stvx_u $out2,$x20,$out + le?vperm $out4,$out4,$out4,$inpperm + stvx_u $out3,$x30,$out + le?vperm $out5,$out5,$out5,$inpperm + stvx_u $out4,$x40,$out + le?vperm $out6,$out6,$out6,$inpperm + stvx_u $out5,$x50,$out + le?vperm $out7,$out7,$out7,$inpperm + stvx_u $out6,$x60,$out + stvx_u $out7,$x70,$out + addi $out,$out,0x80 + b Lctr32_enc8x_done + +.align 5 +Lctr32_enc8x_seven: + vcipherlast $out0,$out0,$in1 + vcipherlast $out1,$out1,$in2 + vcipherlast $out2,$out2,$in3 + vcipherlast $out3,$out3,$in4 + vcipherlast $out4,$out4,$in5 + vcipherlast $out5,$out5,$in6 + vcipherlast $out6,$out6,$in7 + + le?vperm $out0,$out0,$out0,$inpperm + le?vperm $out1,$out1,$out1,$inpperm + stvx_u $out0,$x00,$out + le?vperm $out2,$out2,$out2,$inpperm + stvx_u $out1,$x10,$out + le?vperm $out3,$out3,$out3,$inpperm + stvx_u $out2,$x20,$out + le?vperm $out4,$out4,$out4,$inpperm + stvx_u $out3,$x30,$out + le?vperm $out5,$out5,$out5,$inpperm + stvx_u $out4,$x40,$out + le?vperm $out6,$out6,$out6,$inpperm + stvx_u $out5,$x50,$out + stvx_u $out6,$x60,$out + addi $out,$out,0x70 + b Lctr32_enc8x_done + +.align 5 +Lctr32_enc8x_six: + vcipherlast $out0,$out0,$in2 + vcipherlast $out1,$out1,$in3 + vcipherlast $out2,$out2,$in4 + vcipherlast $out3,$out3,$in5 + vcipherlast $out4,$out4,$in6 + vcipherlast $out5,$out5,$in7 + + le?vperm $out0,$out0,$out0,$inpperm + le?vperm $out1,$out1,$out1,$inpperm + stvx_u $out0,$x00,$out + le?vperm $out2,$out2,$out2,$inpperm + stvx_u $out1,$x10,$out + le?vperm $out3,$out3,$out3,$inpperm + stvx_u $out2,$x20,$out + le?vperm $out4,$out4,$out4,$inpperm + stvx_u $out3,$x30,$out + le?vperm $out5,$out5,$out5,$inpperm + stvx_u $out4,$x40,$out + stvx_u $out5,$x50,$out + addi $out,$out,0x60 + b Lctr32_enc8x_done + +.align 5 +Lctr32_enc8x_five: + vcipherlast $out0,$out0,$in3 + vcipherlast $out1,$out1,$in4 + vcipherlast $out2,$out2,$in5 + vcipherlast $out3,$out3,$in6 + vcipherlast $out4,$out4,$in7 + + le?vperm $out0,$out0,$out0,$inpperm + le?vperm $out1,$out1,$out1,$inpperm + stvx_u $out0,$x00,$out + le?vperm $out2,$out2,$out2,$inpperm + stvx_u $out1,$x10,$out + le?vperm $out3,$out3,$out3,$inpperm + stvx_u $out2,$x20,$out + le?vperm $out4,$out4,$out4,$inpperm + stvx_u $out3,$x30,$out + stvx_u $out4,$x40,$out + addi $out,$out,0x50 + b Lctr32_enc8x_done + +.align 5 +Lctr32_enc8x_four: + vcipherlast $out0,$out0,$in4 + vcipherlast $out1,$out1,$in5 + vcipherlast $out2,$out2,$in6 + vcipherlast $out3,$out3,$in7 + + le?vperm $out0,$out0,$out0,$inpperm + le?vperm $out1,$out1,$out1,$inpperm + stvx_u $out0,$x00,$out + le?vperm $out2,$out2,$out2,$inpperm + stvx_u $out1,$x10,$out + le?vperm $out3,$out3,$out3,$inpperm + stvx_u $out2,$x20,$out + stvx_u $out3,$x30,$out + addi $out,$out,0x40 + b Lctr32_enc8x_done + +.align 5 +Lctr32_enc8x_three: + vcipherlast $out0,$out0,$in5 + vcipherlast $out1,$out1,$in6 + vcipherlast $out2,$out2,$in7 + + le?vperm $out0,$out0,$out0,$inpperm + le?vperm $out1,$out1,$out1,$inpperm + stvx_u $out0,$x00,$out + le?vperm $out2,$out2,$out2,$inpperm + stvx_u $out1,$x10,$out + stvx_u $out2,$x20,$out + addi $out,$out,0x30 + b Lcbc_dec8x_done + +.align 5 +Lctr32_enc8x_two: + vcipherlast $out0,$out0,$in6 + vcipherlast $out1,$out1,$in7 + + le?vperm $out0,$out0,$out0,$inpperm + le?vperm $out1,$out1,$out1,$inpperm + stvx_u $out0,$x00,$out + stvx_u $out1,$x10,$out + addi $out,$out,0x20 + b Lcbc_dec8x_done + +.align 5 +Lctr32_enc8x_one: + vcipherlast $out0,$out0,$in7 + + le?vperm $out0,$out0,$out0,$inpperm + stvx_u $out0,0,$out + addi $out,$out,0x10 + +Lctr32_enc8x_done: + li r10,`$FRAME+15` + li r11,`$FRAME+31` + stvx $inpperm,r10,$sp # wipe copies of round keys + addi r10,r10,32 + stvx $inpperm,r11,$sp + addi r11,r11,32 + stvx $inpperm,r10,$sp + addi r10,r10,32 + stvx $inpperm,r11,$sp + addi r11,r11,32 + stvx $inpperm,r10,$sp + addi r10,r10,32 + stvx $inpperm,r11,$sp + addi r11,r11,32 + stvx $inpperm,r10,$sp + addi r10,r10,32 + stvx $inpperm,r11,$sp + addi r11,r11,32 + + mtspr 256,$vrsave + lvx v20,r10,$sp # ABI says so + addi r10,r10,32 + lvx v21,r11,$sp + addi r11,r11,32 + lvx v22,r10,$sp + addi r10,r10,32 + lvx v23,r11,$sp + addi r11,r11,32 + lvx v24,r10,$sp + addi r10,r10,32 + lvx v25,r11,$sp + addi r11,r11,32 + lvx v26,r10,$sp + addi r10,r10,32 + lvx v27,r11,$sp + addi r11,r11,32 + lvx v28,r10,$sp + addi r10,r10,32 + lvx v29,r11,$sp + addi r11,r11,32 + lvx v30,r10,$sp + lvx v31,r11,$sp + $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp) + $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp) + $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp) + $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp) + $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp) + $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp) + addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T` + blr + .long 0 + .byte 0,12,0x04,0,0x80,6,6,0 + .long 0 +.size .${prefix}_ctr32_encrypt_blocks,.-.${prefix}_ctr32_encrypt_blocks +___ +}} }}} + +my $consts=1; +foreach(split("\n",$code)) { + s/\`([^\`]*)\`/eval($1)/geo; + + # constants table endian-specific conversion + if ($consts && m/\.(long|byte)\s+(.+)\s+(\?[a-z]*)$/o) { + my $conv=$3; + my @bytes=(); + + # convert to endian-agnostic format + if ($1 eq "long") { + foreach (split(/,\s*/,$2)) { + my $l = /^0/?oct:int; + push @bytes,($l>>24)&0xff,($l>>16)&0xff,($l>>8)&0xff,$l&0xff; + } + } else { + @bytes = map(/^0/?oct:int,split(/,\s*/,$2)); + } + + # little-endian conversion + if ($flavour =~ /le$/o) { + SWITCH: for($conv) { + /\?inv/ && do { @bytes=map($_^0xf,@bytes); last; }; + /\?rev/ && do { @bytes=reverse(@bytes); last; }; + } + } + + #emit + print ".byte\t",join(',',map (sprintf("0x%02x",$_),@bytes)),"\n"; + next; + } + $consts=0 if (m/Lconsts:/o); # end of table + + # instructions prefixed with '?' are endian-specific and need + # to be adjusted accordingly... + if ($flavour =~ /le$/o) { # little-endian + s/le\?//o or + s/be\?/#be#/o or + s/\?lvsr/lvsl/o or + s/\?lvsl/lvsr/o or + s/\?(vperm\s+v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+)/$1$3$2$4/o or + s/\?(vsldoi\s+v[0-9]+,\s*)(v[0-9]+,)\s*(v[0-9]+,\s*)([0-9]+)/$1$3$2 16-$4/o or + s/\?(vspltw\s+v[0-9]+,\s*)(v[0-9]+,)\s*([0-9])/$1$2 3-$3/o; + } else { # big-endian + s/le\?/#le#/o or + s/be\?//o or + s/\?([a-z]+)/$1/o; + } + + print $_,"\n"; +} + +close STDOUT; diff --git a/deps/openssl/openssl/crypto/aes/asm/aest4-sparcv9.pl b/deps/openssl/openssl/crypto/aes/asm/aest4-sparcv9.pl new file mode 100644 index 00000000000000..536f23b47c70bd --- /dev/null +++ b/deps/openssl/openssl/crypto/aes/asm/aest4-sparcv9.pl @@ -0,0 +1,919 @@ +#!/usr/bin/env perl + +# ==================================================================== +# Written by David S. Miller and Andy Polyakov +# . The module is licensed under 2-clause BSD +# license. October 2012. All rights reserved. +# ==================================================================== + +###################################################################### +# AES for SPARC T4. +# +# AES round instructions complete in 3 cycles and can be issued every +# cycle. It means that round calculations should take 4*rounds cycles, +# because any given round instruction depends on result of *both* +# previous instructions: +# +# |0 |1 |2 |3 |4 +# |01|01|01| +# |23|23|23| +# |01|01|... +# |23|... +# +# Provided that fxor [with IV] takes 3 cycles to complete, critical +# path length for CBC encrypt would be 3+4*rounds, or in other words +# it should process one byte in at least (3+4*rounds)/16 cycles. This +# estimate doesn't account for "collateral" instructions, such as +# fetching input from memory, xor-ing it with zero-round key and +# storing the result. Yet, *measured* performance [for data aligned +# at 64-bit boundary!] deviates from this equation by less than 0.5%: +# +# 128-bit key 192- 256- +# CBC encrypt 2.70/2.90(*) 3.20/3.40 3.70/3.90 +# (*) numbers after slash are for +# misaligned data. +# +# Out-of-order execution logic managed to fully overlap "collateral" +# instructions with those on critical path. Amazing! +# +# As with Intel AES-NI, question is if it's possible to improve +# performance of parallelizeable modes by interleaving round +# instructions. Provided round instruction latency and throughput +# optimal interleave factor is 2. But can we expect 2x performance +# improvement? Well, as round instructions can be issued one per +# cycle, they don't saturate the 2-way issue pipeline and therefore +# there is room for "collateral" calculations... Yet, 2x speed-up +# over CBC encrypt remains unattaintable: +# +# 128-bit key 192- 256- +# CBC decrypt 1.64/2.11 1.89/2.37 2.23/2.61 +# CTR 1.64/2.08(*) 1.89/2.33 2.23/2.61 +# (*) numbers after slash are for +# misaligned data. +# +# Estimates based on amount of instructions under assumption that +# round instructions are not pairable with any other instruction +# suggest that latter is the actual case and pipeline runs +# underutilized. It should be noted that T4 out-of-order execution +# logic is so capable that performance gain from 2x interleave is +# not even impressive, ~7-13% over non-interleaved code, largest +# for 256-bit keys. + +# To anchor to something else, software implementation processes +# one byte in 29 cycles with 128-bit key on same processor. Intel +# Sandy Bridge encrypts byte in 5.07 cycles in CBC mode and decrypts +# in 0.93, naturally with AES-NI. + +$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; +push(@INC,"${dir}","${dir}../../perlasm"); +require "sparcv9_modes.pl"; + +&asm_init(@ARGV); + +$::evp=1; # if $evp is set to 0, script generates module with +# AES_[en|de]crypt, AES_set_[en|de]crypt_key and AES_cbc_encrypt entry +# points. These however are not fully compatible with openssl/aes.h, +# because they expect AES_KEY to be aligned at 64-bit boundary. When +# used through EVP, alignment is arranged at EVP layer. Second thing +# that is arranged by EVP is at least 32-bit alignment of IV. + +###################################################################### +# single-round subroutines +# +{ +my ($inp,$out,$key,$rounds,$tmp,$mask)=map("%o$_",(0..5)); + +$code.=<<___ if ($::abibits==64); +.register %g2,#scratch +.register %g3,#scratch + +___ +$code.=<<___; +.text + +.globl aes_t4_encrypt +.align 32 +aes_t4_encrypt: + andcc $inp, 7, %g1 ! is input aligned? + andn $inp, 7, $inp + + ldx [$key + 0], %g4 + ldx [$key + 8], %g5 + + ldx [$inp + 0], %o4 + bz,pt %icc, 1f + ldx [$inp + 8], %o5 + ldx [$inp + 16], $inp + sll %g1, 3, %g1 + sub %g0, %g1, %o3 + sllx %o4, %g1, %o4 + sllx %o5, %g1, %g1 + srlx %o5, %o3, %o5 + srlx $inp, %o3, %o3 + or %o5, %o4, %o4 + or %o3, %g1, %o5 +1: + ld [$key + 240], $rounds + ldd [$key + 16], %f12 + ldd [$key + 24], %f14 + xor %g4, %o4, %o4 + xor %g5, %o5, %o5 + movxtod %o4, %f0 + movxtod %o5, %f2 + srl $rounds, 1, $rounds + ldd [$key + 32], %f16 + sub $rounds, 1, $rounds + ldd [$key + 40], %f18 + add $key, 48, $key + +.Lenc: + aes_eround01 %f12, %f0, %f2, %f4 + aes_eround23 %f14, %f0, %f2, %f2 + ldd [$key + 0], %f12 + ldd [$key + 8], %f14 + sub $rounds,1,$rounds + aes_eround01 %f16, %f4, %f2, %f0 + aes_eround23 %f18, %f4, %f2, %f2 + ldd [$key + 16], %f16 + ldd [$key + 24], %f18 + brnz,pt $rounds, .Lenc + add $key, 32, $key + + andcc $out, 7, $tmp ! is output aligned? + aes_eround01 %f12, %f0, %f2, %f4 + aes_eround23 %f14, %f0, %f2, %f2 + aes_eround01_l %f16, %f4, %f2, %f0 + aes_eround23_l %f18, %f4, %f2, %f2 + + bnz,pn %icc, 2f + nop + + std %f0, [$out + 0] + retl + std %f2, [$out + 8] + +2: alignaddrl $out, %g0, $out + mov 0xff, $mask + srl $mask, $tmp, $mask + + faligndata %f0, %f0, %f4 + faligndata %f0, %f2, %f6 + faligndata %f2, %f2, %f8 + + stda %f4, [$out + $mask]0xc0 ! partial store + std %f6, [$out + 8] + add $out, 16, $out + orn %g0, $mask, $mask + retl + stda %f8, [$out + $mask]0xc0 ! partial store +.type aes_t4_encrypt,#function +.size aes_t4_encrypt,.-aes_t4_encrypt + +.globl aes_t4_decrypt +.align 32 +aes_t4_decrypt: + andcc $inp, 7, %g1 ! is input aligned? + andn $inp, 7, $inp + + ldx [$key + 0], %g4 + ldx [$key + 8], %g5 + + ldx [$inp + 0], %o4 + bz,pt %icc, 1f + ldx [$inp + 8], %o5 + ldx [$inp + 16], $inp + sll %g1, 3, %g1 + sub %g0, %g1, %o3 + sllx %o4, %g1, %o4 + sllx %o5, %g1, %g1 + srlx %o5, %o3, %o5 + srlx $inp, %o3, %o3 + or %o5, %o4, %o4 + or %o3, %g1, %o5 +1: + ld [$key + 240], $rounds + ldd [$key + 16], %f12 + ldd [$key + 24], %f14 + xor %g4, %o4, %o4 + xor %g5, %o5, %o5 + movxtod %o4, %f0 + movxtod %o5, %f2 + srl $rounds, 1, $rounds + ldd [$key + 32], %f16 + sub $rounds, 1, $rounds + ldd [$key + 40], %f18 + add $key, 48, $key + +.Ldec: + aes_dround01 %f12, %f0, %f2, %f4 + aes_dround23 %f14, %f0, %f2, %f2 + ldd [$key + 0], %f12 + ldd [$key + 8], %f14 + sub $rounds,1,$rounds + aes_dround01 %f16, %f4, %f2, %f0 + aes_dround23 %f18, %f4, %f2, %f2 + ldd [$key + 16], %f16 + ldd [$key + 24], %f18 + brnz,pt $rounds, .Ldec + add $key, 32, $key + + andcc $out, 7, $tmp ! is output aligned? + aes_dround01 %f12, %f0, %f2, %f4 + aes_dround23 %f14, %f0, %f2, %f2 + aes_dround01_l %f16, %f4, %f2, %f0 + aes_dround23_l %f18, %f4, %f2, %f2 + + bnz,pn %icc, 2f + nop + + std %f0, [$out + 0] + retl + std %f2, [$out + 8] + +2: alignaddrl $out, %g0, $out + mov 0xff, $mask + srl $mask, $tmp, $mask + + faligndata %f0, %f0, %f4 + faligndata %f0, %f2, %f6 + faligndata %f2, %f2, %f8 + + stda %f4, [$out + $mask]0xc0 ! partial store + std %f6, [$out + 8] + add $out, 16, $out + orn %g0, $mask, $mask + retl + stda %f8, [$out + $mask]0xc0 ! partial store +.type aes_t4_decrypt,#function +.size aes_t4_decrypt,.-aes_t4_decrypt +___ +} + +###################################################################### +# key setup subroutines +# +{ +my ($inp,$bits,$out,$tmp)=map("%o$_",(0..5)); +$code.=<<___; +.globl aes_t4_set_encrypt_key +.align 32 +aes_t4_set_encrypt_key: +.Lset_encrypt_key: + and $inp, 7, $tmp + alignaddr $inp, %g0, $inp + cmp $bits, 192 + ldd [$inp + 0], %f0 + bl,pt %icc,.L128 + ldd [$inp + 8], %f2 + + be,pt %icc,.L192 + ldd [$inp + 16], %f4 + brz,pt $tmp, .L256aligned + ldd [$inp + 24], %f6 + + ldd [$inp + 32], %f8 + faligndata %f0, %f2, %f0 + faligndata %f2, %f4, %f2 + faligndata %f4, %f6, %f4 + faligndata %f6, %f8, %f6 +.L256aligned: +___ +for ($i=0; $i<6; $i++) { + $code.=<<___; + std %f0, [$out + `32*$i+0`] + aes_kexpand1 %f0, %f6, $i, %f0 + std %f2, [$out + `32*$i+8`] + aes_kexpand2 %f2, %f0, %f2 + std %f4, [$out + `32*$i+16`] + aes_kexpand0 %f4, %f2, %f4 + std %f6, [$out + `32*$i+24`] + aes_kexpand2 %f6, %f4, %f6 +___ +} +$code.=<<___; + std %f0, [$out + `32*$i+0`] + aes_kexpand1 %f0, %f6, $i, %f0 + std %f2, [$out + `32*$i+8`] + aes_kexpand2 %f2, %f0, %f2 + std %f4, [$out + `32*$i+16`] + std %f6, [$out + `32*$i+24`] + std %f0, [$out + `32*$i+32`] + std %f2, [$out + `32*$i+40`] + + mov 14, $tmp + st $tmp, [$out + 240] + retl + xor %o0, %o0, %o0 + +.align 16 +.L192: + brz,pt $tmp, .L192aligned + nop + + ldd [$inp + 24], %f6 + faligndata %f0, %f2, %f0 + faligndata %f2, %f4, %f2 + faligndata %f4, %f6, %f4 +.L192aligned: +___ +for ($i=0; $i<7; $i++) { + $code.=<<___; + std %f0, [$out + `24*$i+0`] + aes_kexpand1 %f0, %f4, $i, %f0 + std %f2, [$out + `24*$i+8`] + aes_kexpand2 %f2, %f0, %f2 + std %f4, [$out + `24*$i+16`] + aes_kexpand2 %f4, %f2, %f4 +___ +} +$code.=<<___; + std %f0, [$out + `24*$i+0`] + aes_kexpand1 %f0, %f4, $i, %f0 + std %f2, [$out + `24*$i+8`] + aes_kexpand2 %f2, %f0, %f2 + std %f4, [$out + `24*$i+16`] + std %f0, [$out + `24*$i+24`] + std %f2, [$out + `24*$i+32`] + + mov 12, $tmp + st $tmp, [$out + 240] + retl + xor %o0, %o0, %o0 + +.align 16 +.L128: + brz,pt $tmp, .L128aligned + nop + + ldd [$inp + 16], %f4 + faligndata %f0, %f2, %f0 + faligndata %f2, %f4, %f2 +.L128aligned: +___ +for ($i=0; $i<10; $i++) { + $code.=<<___; + std %f0, [$out + `16*$i+0`] + aes_kexpand1 %f0, %f2, $i, %f0 + std %f2, [$out + `16*$i+8`] + aes_kexpand2 %f2, %f0, %f2 +___ +} +$code.=<<___; + std %f0, [$out + `16*$i+0`] + std %f2, [$out + `16*$i+8`] + + mov 10, $tmp + st $tmp, [$out + 240] + retl + xor %o0, %o0, %o0 +.type aes_t4_set_encrypt_key,#function +.size aes_t4_set_encrypt_key,.-aes_t4_set_encrypt_key + +.globl aes_t4_set_decrypt_key +.align 32 +aes_t4_set_decrypt_key: + mov %o7, %o5 + call .Lset_encrypt_key + nop + + mov %o5, %o7 + sll $tmp, 4, $inp ! $tmp is number of rounds + add $tmp, 2, $tmp + add $out, $inp, $inp ! $inp=$out+16*rounds + srl $tmp, 2, $tmp ! $tmp=(rounds+2)/4 + +.Lkey_flip: + ldd [$out + 0], %f0 + ldd [$out + 8], %f2 + ldd [$out + 16], %f4 + ldd [$out + 24], %f6 + ldd [$inp + 0], %f8 + ldd [$inp + 8], %f10 + ldd [$inp - 16], %f12 + ldd [$inp - 8], %f14 + sub $tmp, 1, $tmp + std %f0, [$inp + 0] + std %f2, [$inp + 8] + std %f4, [$inp - 16] + std %f6, [$inp - 8] + std %f8, [$out + 0] + std %f10, [$out + 8] + std %f12, [$out + 16] + std %f14, [$out + 24] + add $out, 32, $out + brnz $tmp, .Lkey_flip + sub $inp, 32, $inp + + retl + xor %o0, %o0, %o0 +.type aes_t4_set_decrypt_key,#function +.size aes_t4_set_decrypt_key,.-aes_t4_set_decrypt_key +___ +} + +{{{ +my ($inp,$out,$len,$key,$ivec,$enc)=map("%i$_",(0..5)); +my ($ileft,$iright,$ooff,$omask,$ivoff)=map("%l$_",(1..7)); + +$code.=<<___; +.align 32 +_aes128_encrypt_1x: +___ +for ($i=0; $i<4; $i++) { + $code.=<<___; + aes_eround01 %f`16+8*$i+0`, %f0, %f2, %f4 + aes_eround23 %f`16+8*$i+2`, %f0, %f2, %f2 + aes_eround01 %f`16+8*$i+4`, %f4, %f2, %f0 + aes_eround23 %f`16+8*$i+6`, %f4, %f2, %f2 +___ +} +$code.=<<___; + aes_eround01 %f48, %f0, %f2, %f4 + aes_eround23 %f50, %f0, %f2, %f2 + aes_eround01_l %f52, %f4, %f2, %f0 + retl + aes_eround23_l %f54, %f4, %f2, %f2 +.type _aes128_encrypt_1x,#function +.size _aes128_encrypt_1x,.-_aes128_encrypt_1x + +.align 32 +_aes128_encrypt_2x: +___ +for ($i=0; $i<4; $i++) { + $code.=<<___; + aes_eround01 %f`16+8*$i+0`, %f0, %f2, %f8 + aes_eround23 %f`16+8*$i+2`, %f0, %f2, %f2 + aes_eround01 %f`16+8*$i+0`, %f4, %f6, %f10 + aes_eround23 %f`16+8*$i+2`, %f4, %f6, %f6 + aes_eround01 %f`16+8*$i+4`, %f8, %f2, %f0 + aes_eround23 %f`16+8*$i+6`, %f8, %f2, %f2 + aes_eround01 %f`16+8*$i+4`, %f10, %f6, %f4 + aes_eround23 %f`16+8*$i+6`, %f10, %f6, %f6 +___ +} +$code.=<<___; + aes_eround01 %f48, %f0, %f2, %f8 + aes_eround23 %f50, %f0, %f2, %f2 + aes_eround01 %f48, %f4, %f6, %f10 + aes_eround23 %f50, %f4, %f6, %f6 + aes_eround01_l %f52, %f8, %f2, %f0 + aes_eround23_l %f54, %f8, %f2, %f2 + aes_eround01_l %f52, %f10, %f6, %f4 + retl + aes_eround23_l %f54, %f10, %f6, %f6 +.type _aes128_encrypt_2x,#function +.size _aes128_encrypt_2x,.-_aes128_encrypt_2x + +.align 32 +_aes128_loadkey: + ldx [$key + 0], %g4 + ldx [$key + 8], %g5 +___ +for ($i=2; $i<22;$i++) { # load key schedule + $code.=<<___; + ldd [$key + `8*$i`], %f`12+2*$i` +___ +} +$code.=<<___; + retl + nop +.type _aes128_loadkey,#function +.size _aes128_loadkey,.-_aes128_loadkey +_aes128_load_enckey=_aes128_loadkey +_aes128_load_deckey=_aes128_loadkey + +___ + +&alg_cbc_encrypt_implement("aes",128); +if ($::evp) { + &alg_ctr32_implement("aes",128); + &alg_xts_implement("aes",128,"en"); + &alg_xts_implement("aes",128,"de"); +} +&alg_cbc_decrypt_implement("aes",128); + +$code.=<<___; +.align 32 +_aes128_decrypt_1x: +___ +for ($i=0; $i<4; $i++) { + $code.=<<___; + aes_dround01 %f`16+8*$i+0`, %f0, %f2, %f4 + aes_dround23 %f`16+8*$i+2`, %f0, %f2, %f2 + aes_dround01 %f`16+8*$i+4`, %f4, %f2, %f0 + aes_dround23 %f`16+8*$i+6`, %f4, %f2, %f2 +___ +} +$code.=<<___; + aes_dround01 %f48, %f0, %f2, %f4 + aes_dround23 %f50, %f0, %f2, %f2 + aes_dround01_l %f52, %f4, %f2, %f0 + retl + aes_dround23_l %f54, %f4, %f2, %f2 +.type _aes128_decrypt_1x,#function +.size _aes128_decrypt_1x,.-_aes128_decrypt_1x + +.align 32 +_aes128_decrypt_2x: +___ +for ($i=0; $i<4; $i++) { + $code.=<<___; + aes_dround01 %f`16+8*$i+0`, %f0, %f2, %f8 + aes_dround23 %f`16+8*$i+2`, %f0, %f2, %f2 + aes_dround01 %f`16+8*$i+0`, %f4, %f6, %f10 + aes_dround23 %f`16+8*$i+2`, %f4, %f6, %f6 + aes_dround01 %f`16+8*$i+4`, %f8, %f2, %f0 + aes_dround23 %f`16+8*$i+6`, %f8, %f2, %f2 + aes_dround01 %f`16+8*$i+4`, %f10, %f6, %f4 + aes_dround23 %f`16+8*$i+6`, %f10, %f6, %f6 +___ +} +$code.=<<___; + aes_dround01 %f48, %f0, %f2, %f8 + aes_dround23 %f50, %f0, %f2, %f2 + aes_dround01 %f48, %f4, %f6, %f10 + aes_dround23 %f50, %f4, %f6, %f6 + aes_dround01_l %f52, %f8, %f2, %f0 + aes_dround23_l %f54, %f8, %f2, %f2 + aes_dround01_l %f52, %f10, %f6, %f4 + retl + aes_dround23_l %f54, %f10, %f6, %f6 +.type _aes128_decrypt_2x,#function +.size _aes128_decrypt_2x,.-_aes128_decrypt_2x +___ + +$code.=<<___; +.align 32 +_aes192_encrypt_1x: +___ +for ($i=0; $i<5; $i++) { + $code.=<<___; + aes_eround01 %f`16+8*$i+0`, %f0, %f2, %f4 + aes_eround23 %f`16+8*$i+2`, %f0, %f2, %f2 + aes_eround01 %f`16+8*$i+4`, %f4, %f2, %f0 + aes_eround23 %f`16+8*$i+6`, %f4, %f2, %f2 +___ +} +$code.=<<___; + aes_eround01 %f56, %f0, %f2, %f4 + aes_eround23 %f58, %f0, %f2, %f2 + aes_eround01_l %f60, %f4, %f2, %f0 + retl + aes_eround23_l %f62, %f4, %f2, %f2 +.type _aes192_encrypt_1x,#function +.size _aes192_encrypt_1x,.-_aes192_encrypt_1x + +.align 32 +_aes192_encrypt_2x: +___ +for ($i=0; $i<5; $i++) { + $code.=<<___; + aes_eround01 %f`16+8*$i+0`, %f0, %f2, %f8 + aes_eround23 %f`16+8*$i+2`, %f0, %f2, %f2 + aes_eround01 %f`16+8*$i+0`, %f4, %f6, %f10 + aes_eround23 %f`16+8*$i+2`, %f4, %f6, %f6 + aes_eround01 %f`16+8*$i+4`, %f8, %f2, %f0 + aes_eround23 %f`16+8*$i+6`, %f8, %f2, %f2 + aes_eround01 %f`16+8*$i+4`, %f10, %f6, %f4 + aes_eround23 %f`16+8*$i+6`, %f10, %f6, %f6 +___ +} +$code.=<<___; + aes_eround01 %f56, %f0, %f2, %f8 + aes_eround23 %f58, %f0, %f2, %f2 + aes_eround01 %f56, %f4, %f6, %f10 + aes_eround23 %f58, %f4, %f6, %f6 + aes_eround01_l %f60, %f8, %f2, %f0 + aes_eround23_l %f62, %f8, %f2, %f2 + aes_eround01_l %f60, %f10, %f6, %f4 + retl + aes_eround23_l %f62, %f10, %f6, %f6 +.type _aes192_encrypt_2x,#function +.size _aes192_encrypt_2x,.-_aes192_encrypt_2x + +.align 32 +_aes256_encrypt_1x: + aes_eround01 %f16, %f0, %f2, %f4 + aes_eround23 %f18, %f0, %f2, %f2 + ldd [$key + 208], %f16 + ldd [$key + 216], %f18 + aes_eround01 %f20, %f4, %f2, %f0 + aes_eround23 %f22, %f4, %f2, %f2 + ldd [$key + 224], %f20 + ldd [$key + 232], %f22 +___ +for ($i=1; $i<6; $i++) { + $code.=<<___; + aes_eround01 %f`16+8*$i+0`, %f0, %f2, %f4 + aes_eround23 %f`16+8*$i+2`, %f0, %f2, %f2 + aes_eround01 %f`16+8*$i+4`, %f4, %f2, %f0 + aes_eround23 %f`16+8*$i+6`, %f4, %f2, %f2 +___ +} +$code.=<<___; + aes_eround01 %f16, %f0, %f2, %f4 + aes_eround23 %f18, %f0, %f2, %f2 + ldd [$key + 16], %f16 + ldd [$key + 24], %f18 + aes_eround01_l %f20, %f4, %f2, %f0 + aes_eround23_l %f22, %f4, %f2, %f2 + ldd [$key + 32], %f20 + retl + ldd [$key + 40], %f22 +.type _aes256_encrypt_1x,#function +.size _aes256_encrypt_1x,.-_aes256_encrypt_1x + +.align 32 +_aes256_encrypt_2x: + aes_eround01 %f16, %f0, %f2, %f8 + aes_eround23 %f18, %f0, %f2, %f2 + aes_eround01 %f16, %f4, %f6, %f10 + aes_eround23 %f18, %f4, %f6, %f6 + ldd [$key + 208], %f16 + ldd [$key + 216], %f18 + aes_eround01 %f20, %f8, %f2, %f0 + aes_eround23 %f22, %f8, %f2, %f2 + aes_eround01 %f20, %f10, %f6, %f4 + aes_eround23 %f22, %f10, %f6, %f6 + ldd [$key + 224], %f20 + ldd [$key + 232], %f22 +___ +for ($i=1; $i<6; $i++) { + $code.=<<___; + aes_eround01 %f`16+8*$i+0`, %f0, %f2, %f8 + aes_eround23 %f`16+8*$i+2`, %f0, %f2, %f2 + aes_eround01 %f`16+8*$i+0`, %f4, %f6, %f10 + aes_eround23 %f`16+8*$i+2`, %f4, %f6, %f6 + aes_eround01 %f`16+8*$i+4`, %f8, %f2, %f0 + aes_eround23 %f`16+8*$i+6`, %f8, %f2, %f2 + aes_eround01 %f`16+8*$i+4`, %f10, %f6, %f4 + aes_eround23 %f`16+8*$i+6`, %f10, %f6, %f6 +___ +} +$code.=<<___; + aes_eround01 %f16, %f0, %f2, %f8 + aes_eround23 %f18, %f0, %f2, %f2 + aes_eround01 %f16, %f4, %f6, %f10 + aes_eround23 %f18, %f4, %f6, %f6 + ldd [$key + 16], %f16 + ldd [$key + 24], %f18 + aes_eround01_l %f20, %f8, %f2, %f0 + aes_eround23_l %f22, %f8, %f2, %f2 + aes_eround01_l %f20, %f10, %f6, %f4 + aes_eround23_l %f22, %f10, %f6, %f6 + ldd [$key + 32], %f20 + retl + ldd [$key + 40], %f22 +.type _aes256_encrypt_2x,#function +.size _aes256_encrypt_2x,.-_aes256_encrypt_2x + +.align 32 +_aes192_loadkey: + ldx [$key + 0], %g4 + ldx [$key + 8], %g5 +___ +for ($i=2; $i<26;$i++) { # load key schedule + $code.=<<___; + ldd [$key + `8*$i`], %f`12+2*$i` +___ +} +$code.=<<___; + retl + nop +.type _aes192_loadkey,#function +.size _aes192_loadkey,.-_aes192_loadkey +_aes256_loadkey=_aes192_loadkey +_aes192_load_enckey=_aes192_loadkey +_aes192_load_deckey=_aes192_loadkey +_aes256_load_enckey=_aes192_loadkey +_aes256_load_deckey=_aes192_loadkey +___ + +&alg_cbc_encrypt_implement("aes",256); +&alg_cbc_encrypt_implement("aes",192); +if ($::evp) { + &alg_ctr32_implement("aes",256); + &alg_xts_implement("aes",256,"en"); + &alg_xts_implement("aes",256,"de"); + &alg_ctr32_implement("aes",192); +} +&alg_cbc_decrypt_implement("aes",192); +&alg_cbc_decrypt_implement("aes",256); + +$code.=<<___; +.align 32 +_aes256_decrypt_1x: + aes_dround01 %f16, %f0, %f2, %f4 + aes_dround23 %f18, %f0, %f2, %f2 + ldd [$key + 208], %f16 + ldd [$key + 216], %f18 + aes_dround01 %f20, %f4, %f2, %f0 + aes_dround23 %f22, %f4, %f2, %f2 + ldd [$key + 224], %f20 + ldd [$key + 232], %f22 +___ +for ($i=1; $i<6; $i++) { + $code.=<<___; + aes_dround01 %f`16+8*$i+0`, %f0, %f2, %f4 + aes_dround23 %f`16+8*$i+2`, %f0, %f2, %f2 + aes_dround01 %f`16+8*$i+4`, %f4, %f2, %f0 + aes_dround23 %f`16+8*$i+6`, %f4, %f2, %f2 +___ +} +$code.=<<___; + aes_dround01 %f16, %f0, %f2, %f4 + aes_dround23 %f18, %f0, %f2, %f2 + ldd [$key + 16], %f16 + ldd [$key + 24], %f18 + aes_dround01_l %f20, %f4, %f2, %f0 + aes_dround23_l %f22, %f4, %f2, %f2 + ldd [$key + 32], %f20 + retl + ldd [$key + 40], %f22 +.type _aes256_decrypt_1x,#function +.size _aes256_decrypt_1x,.-_aes256_decrypt_1x + +.align 32 +_aes256_decrypt_2x: + aes_dround01 %f16, %f0, %f2, %f8 + aes_dround23 %f18, %f0, %f2, %f2 + aes_dround01 %f16, %f4, %f6, %f10 + aes_dround23 %f18, %f4, %f6, %f6 + ldd [$key + 208], %f16 + ldd [$key + 216], %f18 + aes_dround01 %f20, %f8, %f2, %f0 + aes_dround23 %f22, %f8, %f2, %f2 + aes_dround01 %f20, %f10, %f6, %f4 + aes_dround23 %f22, %f10, %f6, %f6 + ldd [$key + 224], %f20 + ldd [$key + 232], %f22 +___ +for ($i=1; $i<6; $i++) { + $code.=<<___; + aes_dround01 %f`16+8*$i+0`, %f0, %f2, %f8 + aes_dround23 %f`16+8*$i+2`, %f0, %f2, %f2 + aes_dround01 %f`16+8*$i+0`, %f4, %f6, %f10 + aes_dround23 %f`16+8*$i+2`, %f4, %f6, %f6 + aes_dround01 %f`16+8*$i+4`, %f8, %f2, %f0 + aes_dround23 %f`16+8*$i+6`, %f8, %f2, %f2 + aes_dround01 %f`16+8*$i+4`, %f10, %f6, %f4 + aes_dround23 %f`16+8*$i+6`, %f10, %f6, %f6 +___ +} +$code.=<<___; + aes_dround01 %f16, %f0, %f2, %f8 + aes_dround23 %f18, %f0, %f2, %f2 + aes_dround01 %f16, %f4, %f6, %f10 + aes_dround23 %f18, %f4, %f6, %f6 + ldd [$key + 16], %f16 + ldd [$key + 24], %f18 + aes_dround01_l %f20, %f8, %f2, %f0 + aes_dround23_l %f22, %f8, %f2, %f2 + aes_dround01_l %f20, %f10, %f6, %f4 + aes_dround23_l %f22, %f10, %f6, %f6 + ldd [$key + 32], %f20 + retl + ldd [$key + 40], %f22 +.type _aes256_decrypt_2x,#function +.size _aes256_decrypt_2x,.-_aes256_decrypt_2x + +.align 32 +_aes192_decrypt_1x: +___ +for ($i=0; $i<5; $i++) { + $code.=<<___; + aes_dround01 %f`16+8*$i+0`, %f0, %f2, %f4 + aes_dround23 %f`16+8*$i+2`, %f0, %f2, %f2 + aes_dround01 %f`16+8*$i+4`, %f4, %f2, %f0 + aes_dround23 %f`16+8*$i+6`, %f4, %f2, %f2 +___ +} +$code.=<<___; + aes_dround01 %f56, %f0, %f2, %f4 + aes_dround23 %f58, %f0, %f2, %f2 + aes_dround01_l %f60, %f4, %f2, %f0 + retl + aes_dround23_l %f62, %f4, %f2, %f2 +.type _aes192_decrypt_1x,#function +.size _aes192_decrypt_1x,.-_aes192_decrypt_1x + +.align 32 +_aes192_decrypt_2x: +___ +for ($i=0; $i<5; $i++) { + $code.=<<___; + aes_dround01 %f`16+8*$i+0`, %f0, %f2, %f8 + aes_dround23 %f`16+8*$i+2`, %f0, %f2, %f2 + aes_dround01 %f`16+8*$i+0`, %f4, %f6, %f10 + aes_dround23 %f`16+8*$i+2`, %f4, %f6, %f6 + aes_dround01 %f`16+8*$i+4`, %f8, %f2, %f0 + aes_dround23 %f`16+8*$i+6`, %f8, %f2, %f2 + aes_dround01 %f`16+8*$i+4`, %f10, %f6, %f4 + aes_dround23 %f`16+8*$i+6`, %f10, %f6, %f6 +___ +} +$code.=<<___; + aes_dround01 %f56, %f0, %f2, %f8 + aes_dround23 %f58, %f0, %f2, %f2 + aes_dround01 %f56, %f4, %f6, %f10 + aes_dround23 %f58, %f4, %f6, %f6 + aes_dround01_l %f60, %f8, %f2, %f0 + aes_dround23_l %f62, %f8, %f2, %f2 + aes_dround01_l %f60, %f10, %f6, %f4 + retl + aes_dround23_l %f62, %f10, %f6, %f6 +.type _aes192_decrypt_2x,#function +.size _aes192_decrypt_2x,.-_aes192_decrypt_2x +___ +}}} + +if (!$::evp) { +$code.=<<___; +.global AES_encrypt +AES_encrypt=aes_t4_encrypt +.global AES_decrypt +AES_decrypt=aes_t4_decrypt +.global AES_set_encrypt_key +.align 32 +AES_set_encrypt_key: + andcc %o2, 7, %g0 ! check alignment + bnz,a,pn %icc, 1f + mov -1, %o0 + brz,a,pn %o0, 1f + mov -1, %o0 + brz,a,pn %o2, 1f + mov -1, %o0 + andncc %o1, 0x1c0, %g0 + bnz,a,pn %icc, 1f + mov -2, %o0 + cmp %o1, 128 + bl,a,pn %icc, 1f + mov -2, %o0 + b aes_t4_set_encrypt_key + nop +1: retl + nop +.type AES_set_encrypt_key,#function +.size AES_set_encrypt_key,.-AES_set_encrypt_key + +.global AES_set_decrypt_key +.align 32 +AES_set_decrypt_key: + andcc %o2, 7, %g0 ! check alignment + bnz,a,pn %icc, 1f + mov -1, %o0 + brz,a,pn %o0, 1f + mov -1, %o0 + brz,a,pn %o2, 1f + mov -1, %o0 + andncc %o1, 0x1c0, %g0 + bnz,a,pn %icc, 1f + mov -2, %o0 + cmp %o1, 128 + bl,a,pn %icc, 1f + mov -2, %o0 + b aes_t4_set_decrypt_key + nop +1: retl + nop +.type AES_set_decrypt_key,#function +.size AES_set_decrypt_key,.-AES_set_decrypt_key +___ + +my ($inp,$out,$len,$key,$ivec,$enc)=map("%o$_",(0..5)); + +$code.=<<___; +.globl AES_cbc_encrypt +.align 32 +AES_cbc_encrypt: + ld [$key + 240], %g1 + nop + brz $enc, .Lcbc_decrypt + cmp %g1, 12 + + bl,pt %icc, aes128_t4_cbc_encrypt + nop + be,pn %icc, aes192_t4_cbc_encrypt + nop + ba aes256_t4_cbc_encrypt + nop + +.Lcbc_decrypt: + bl,pt %icc, aes128_t4_cbc_decrypt + nop + be,pn %icc, aes192_t4_cbc_decrypt + nop + ba aes256_t4_cbc_decrypt + nop +.type AES_cbc_encrypt,#function +.size AES_cbc_encrypt,.-AES_cbc_encrypt +___ +} +$code.=<<___; +.asciz "AES for SPARC T4, David S. Miller, Andy Polyakov" +.align 4 +___ + +&emit_assembler(); + +close STDOUT; diff --git a/deps/openssl/openssl/crypto/aes/asm/aesv8-armx.pl b/deps/openssl/openssl/crypto/aes/asm/aesv8-armx.pl new file mode 100755 index 00000000000000..1e93f86852b403 --- /dev/null +++ b/deps/openssl/openssl/crypto/aes/asm/aesv8-armx.pl @@ -0,0 +1,962 @@ +#!/usr/bin/env perl +# +# ==================================================================== +# Written by Andy Polyakov for the OpenSSL +# project. The module is, however, dual licensed under OpenSSL and +# CRYPTOGAMS licenses depending on where you obtain it. For further +# details see http://www.openssl.org/~appro/cryptogams/. +# ==================================================================== +# +# This module implements support for ARMv8 AES instructions. The +# module is endian-agnostic in sense that it supports both big- and +# little-endian cases. As does it support both 32- and 64-bit modes +# of operation. Latter is achieved by limiting amount of utilized +# registers to 16, which implies additional NEON load and integer +# instructions. This has no effect on mighty Apple A7, where results +# are literally equal to the theoretical estimates based on AES +# instruction latencies and issue rates. On Cortex-A53, an in-order +# execution core, this costs up to 10-15%, which is partially +# compensated by implementing dedicated code path for 128-bit +# CBC encrypt case. On Cortex-A57 parallelizable mode performance +# seems to be limited by sheer amount of NEON instructions... +# +# Performance in cycles per byte processed with 128-bit key: +# +# CBC enc CBC dec CTR +# Apple A7 2.39 1.20 1.20 +# Cortex-A53 2.45 1.87 1.94 +# Cortex-A57 3.64 1.34 1.32 + +$flavour = shift; +open STDOUT,">".shift; + +$prefix="aes_v8"; + +$code=<<___; +#include "arm_arch.h" + +#if __ARM_MAX_ARCH__>=7 +.text +___ +$code.=".arch armv8-a+crypto\n" if ($flavour =~ /64/); +$code.=".arch armv7-a\n.fpu neon\n.code 32\n" if ($flavour !~ /64/); + #^^^^^^ this is done to simplify adoption by not depending + # on latest binutils. + +# Assembler mnemonics are an eclectic mix of 32- and 64-bit syntax, +# NEON is mostly 32-bit mnemonics, integer - mostly 64. Goal is to +# maintain both 32- and 64-bit codes within single module and +# transliterate common code to either flavour with regex vodoo. +# +{{{ +my ($inp,$bits,$out,$ptr,$rounds)=("x0","w1","x2","x3","w12"); +my ($zero,$rcon,$mask,$in0,$in1,$tmp,$key)= + $flavour=~/64/? map("q$_",(0..6)) : map("q$_",(0..3,8..10)); + + +$code.=<<___; +.align 5 +rcon: +.long 0x01,0x01,0x01,0x01 +.long 0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d // rotate-n-splat +.long 0x1b,0x1b,0x1b,0x1b + +.globl ${prefix}_set_encrypt_key +.type ${prefix}_set_encrypt_key,%function +.align 5 +${prefix}_set_encrypt_key: +.Lenc_key: +___ +$code.=<<___ if ($flavour =~ /64/); + stp x29,x30,[sp,#-16]! + add x29,sp,#0 +___ +$code.=<<___; + mov $ptr,#-1 + cmp $inp,#0 + b.eq .Lenc_key_abort + cmp $out,#0 + b.eq .Lenc_key_abort + mov $ptr,#-2 + cmp $bits,#128 + b.lt .Lenc_key_abort + cmp $bits,#256 + b.gt .Lenc_key_abort + tst $bits,#0x3f + b.ne .Lenc_key_abort + + adr $ptr,rcon + cmp $bits,#192 + + veor $zero,$zero,$zero + vld1.8 {$in0},[$inp],#16 + mov $bits,#8 // reuse $bits + vld1.32 {$rcon,$mask},[$ptr],#32 + + b.lt .Loop128 + b.eq .L192 + b .L256 + +.align 4 +.Loop128: + vtbl.8 $key,{$in0},$mask + vext.8 $tmp,$zero,$in0,#12 + vst1.32 {$in0},[$out],#16 + aese $key,$zero + subs $bits,$bits,#1 + + veor $in0,$in0,$tmp + vext.8 $tmp,$zero,$tmp,#12 + veor $in0,$in0,$tmp + vext.8 $tmp,$zero,$tmp,#12 + veor $key,$key,$rcon + veor $in0,$in0,$tmp + vshl.u8 $rcon,$rcon,#1 + veor $in0,$in0,$key + b.ne .Loop128 + + vld1.32 {$rcon},[$ptr] + + vtbl.8 $key,{$in0},$mask + vext.8 $tmp,$zero,$in0,#12 + vst1.32 {$in0},[$out],#16 + aese $key,$zero + + veor $in0,$in0,$tmp + vext.8 $tmp,$zero,$tmp,#12 + veor $in0,$in0,$tmp + vext.8 $tmp,$zero,$tmp,#12 + veor $key,$key,$rcon + veor $in0,$in0,$tmp + vshl.u8 $rcon,$rcon,#1 + veor $in0,$in0,$key + + vtbl.8 $key,{$in0},$mask + vext.8 $tmp,$zero,$in0,#12 + vst1.32 {$in0},[$out],#16 + aese $key,$zero + + veor $in0,$in0,$tmp + vext.8 $tmp,$zero,$tmp,#12 + veor $in0,$in0,$tmp + vext.8 $tmp,$zero,$tmp,#12 + veor $key,$key,$rcon + veor $in0,$in0,$tmp + veor $in0,$in0,$key + vst1.32 {$in0},[$out] + add $out,$out,#0x50 + + mov $rounds,#10 + b .Ldone + +.align 4 +.L192: + vld1.8 {$in1},[$inp],#8 + vmov.i8 $key,#8 // borrow $key + vst1.32 {$in0},[$out],#16 + vsub.i8 $mask,$mask,$key // adjust the mask + +.Loop192: + vtbl.8 $key,{$in1},$mask + vext.8 $tmp,$zero,$in0,#12 + vst1.32 {$in1},[$out],#8 + aese $key,$zero + subs $bits,$bits,#1 + + veor $in0,$in0,$tmp + vext.8 $tmp,$zero,$tmp,#12 + veor $in0,$in0,$tmp + vext.8 $tmp,$zero,$tmp,#12 + veor $in0,$in0,$tmp + + vdup.32 $tmp,${in0}[3] + veor $tmp,$tmp,$in1 + veor $key,$key,$rcon + vext.8 $in1,$zero,$in1,#12 + vshl.u8 $rcon,$rcon,#1 + veor $in1,$in1,$tmp + veor $in0,$in0,$key + veor $in1,$in1,$key + vst1.32 {$in0},[$out],#16 + b.ne .Loop192 + + mov $rounds,#12 + add $out,$out,#0x20 + b .Ldone + +.align 4 +.L256: + vld1.8 {$in1},[$inp] + mov $bits,#7 + mov $rounds,#14 + vst1.32 {$in0},[$out],#16 + +.Loop256: + vtbl.8 $key,{$in1},$mask + vext.8 $tmp,$zero,$in0,#12 + vst1.32 {$in1},[$out],#16 + aese $key,$zero + subs $bits,$bits,#1 + + veor $in0,$in0,$tmp + vext.8 $tmp,$zero,$tmp,#12 + veor $in0,$in0,$tmp + vext.8 $tmp,$zero,$tmp,#12 + veor $key,$key,$rcon + veor $in0,$in0,$tmp + vshl.u8 $rcon,$rcon,#1 + veor $in0,$in0,$key + vst1.32 {$in0},[$out],#16 + b.eq .Ldone + + vdup.32 $key,${in0}[3] // just splat + vext.8 $tmp,$zero,$in1,#12 + aese $key,$zero + + veor $in1,$in1,$tmp + vext.8 $tmp,$zero,$tmp,#12 + veor $in1,$in1,$tmp + vext.8 $tmp,$zero,$tmp,#12 + veor $in1,$in1,$tmp + + veor $in1,$in1,$key + b .Loop256 + +.Ldone: + str $rounds,[$out] + mov $ptr,#0 + +.Lenc_key_abort: + mov x0,$ptr // return value + `"ldr x29,[sp],#16" if ($flavour =~ /64/)` + ret +.size ${prefix}_set_encrypt_key,.-${prefix}_set_encrypt_key + +.globl ${prefix}_set_decrypt_key +.type ${prefix}_set_decrypt_key,%function +.align 5 +${prefix}_set_decrypt_key: +___ +$code.=<<___ if ($flavour =~ /64/); + stp x29,x30,[sp,#-16]! + add x29,sp,#0 +___ +$code.=<<___ if ($flavour !~ /64/); + stmdb sp!,{r4,lr} +___ +$code.=<<___; + bl .Lenc_key + + cmp x0,#0 + b.ne .Ldec_key_abort + + sub $out,$out,#240 // restore original $out + mov x4,#-16 + add $inp,$out,x12,lsl#4 // end of key schedule + + vld1.32 {v0.16b},[$out] + vld1.32 {v1.16b},[$inp] + vst1.32 {v0.16b},[$inp],x4 + vst1.32 {v1.16b},[$out],#16 + +.Loop_imc: + vld1.32 {v0.16b},[$out] + vld1.32 {v1.16b},[$inp] + aesimc v0.16b,v0.16b + aesimc v1.16b,v1.16b + vst1.32 {v0.16b},[$inp],x4 + vst1.32 {v1.16b},[$out],#16 + cmp $inp,$out + b.hi .Loop_imc + + vld1.32 {v0.16b},[$out] + aesimc v0.16b,v0.16b + vst1.32 {v0.16b},[$inp] + + eor x0,x0,x0 // return value +.Ldec_key_abort: +___ +$code.=<<___ if ($flavour !~ /64/); + ldmia sp!,{r4,pc} +___ +$code.=<<___ if ($flavour =~ /64/); + ldp x29,x30,[sp],#16 + ret +___ +$code.=<<___; +.size ${prefix}_set_decrypt_key,.-${prefix}_set_decrypt_key +___ +}}} +{{{ +sub gen_block () { +my $dir = shift; +my ($e,$mc) = $dir eq "en" ? ("e","mc") : ("d","imc"); +my ($inp,$out,$key)=map("x$_",(0..2)); +my $rounds="w3"; +my ($rndkey0,$rndkey1,$inout)=map("q$_",(0..3)); + +$code.=<<___; +.globl ${prefix}_${dir}crypt +.type ${prefix}_${dir}crypt,%function +.align 5 +${prefix}_${dir}crypt: + ldr $rounds,[$key,#240] + vld1.32 {$rndkey0},[$key],#16 + vld1.8 {$inout},[$inp] + sub $rounds,$rounds,#2 + vld1.32 {$rndkey1},[$key],#16 + +.Loop_${dir}c: + aes$e $inout,$rndkey0 + vld1.32 {$rndkey0},[$key],#16 + aes$mc $inout,$inout + subs $rounds,$rounds,#2 + aes$e $inout,$rndkey1 + vld1.32 {$rndkey1},[$key],#16 + aes$mc $inout,$inout + b.gt .Loop_${dir}c + + aes$e $inout,$rndkey0 + vld1.32 {$rndkey0},[$key] + aes$mc $inout,$inout + aes$e $inout,$rndkey1 + veor $inout,$inout,$rndkey0 + + vst1.8 {$inout},[$out] + ret +.size ${prefix}_${dir}crypt,.-${prefix}_${dir}crypt +___ +} +&gen_block("en"); +&gen_block("de"); +}}} +{{{ +my ($inp,$out,$len,$key,$ivp)=map("x$_",(0..4)); my $enc="w5"; +my ($rounds,$cnt,$key_,$step,$step1)=($enc,"w6","x7","x8","x12"); +my ($dat0,$dat1,$in0,$in1,$tmp0,$tmp1,$ivec,$rndlast)=map("q$_",(0..7)); + +my ($dat,$tmp,$rndzero_n_last)=($dat0,$tmp0,$tmp1); + +### q8-q15 preloaded key schedule + +$code.=<<___; +.globl ${prefix}_cbc_encrypt +.type ${prefix}_cbc_encrypt,%function +.align 5 +${prefix}_cbc_encrypt: +___ +$code.=<<___ if ($flavour =~ /64/); + stp x29,x30,[sp,#-16]! + add x29,sp,#0 +___ +$code.=<<___ if ($flavour !~ /64/); + mov ip,sp + stmdb sp!,{r4-r8,lr} + vstmdb sp!,{d8-d15} @ ABI specification says so + ldmia ip,{r4-r5} @ load remaining args +___ +$code.=<<___; + subs $len,$len,#16 + mov $step,#16 + b.lo .Lcbc_abort + cclr $step,eq + + cmp $enc,#0 // en- or decrypting? + ldr $rounds,[$key,#240] + and $len,$len,#-16 + vld1.8 {$ivec},[$ivp] + vld1.8 {$dat},[$inp],$step + + vld1.32 {q8-q9},[$key] // load key schedule... + sub $rounds,$rounds,#6 + add $key_,$key,x5,lsl#4 // pointer to last 7 round keys + sub $rounds,$rounds,#2 + vld1.32 {q10-q11},[$key_],#32 + vld1.32 {q12-q13},[$key_],#32 + vld1.32 {q14-q15},[$key_],#32 + vld1.32 {$rndlast},[$key_] + + add $key_,$key,#32 + mov $cnt,$rounds + b.eq .Lcbc_dec + + cmp $rounds,#2 + veor $dat,$dat,$ivec + veor $rndzero_n_last,q8,$rndlast + b.eq .Lcbc_enc128 + +.Loop_cbc_enc: + aese $dat,q8 + vld1.32 {q8},[$key_],#16 + aesmc $dat,$dat + subs $cnt,$cnt,#2 + aese $dat,q9 + vld1.32 {q9},[$key_],#16 + aesmc $dat,$dat + b.gt .Loop_cbc_enc + + aese $dat,q8 + aesmc $dat,$dat + subs $len,$len,#16 + aese $dat,q9 + aesmc $dat,$dat + cclr $step,eq + aese $dat,q10 + aesmc $dat,$dat + add $key_,$key,#16 + aese $dat,q11 + aesmc $dat,$dat + vld1.8 {q8},[$inp],$step + aese $dat,q12 + aesmc $dat,$dat + veor q8,q8,$rndzero_n_last + aese $dat,q13 + aesmc $dat,$dat + vld1.32 {q9},[$key_],#16 // re-pre-load rndkey[1] + aese $dat,q14 + aesmc $dat,$dat + aese $dat,q15 + + mov $cnt,$rounds + veor $ivec,$dat,$rndlast + vst1.8 {$ivec},[$out],#16 + b.hs .Loop_cbc_enc + + b .Lcbc_done + +.align 5 +.Lcbc_enc128: + vld1.32 {$in0-$in1},[$key_] + aese $dat,q8 + aesmc $dat,$dat + b .Lenter_cbc_enc128 +.Loop_cbc_enc128: + aese $dat,q8 + aesmc $dat,$dat + vst1.8 {$ivec},[$out],#16 +.Lenter_cbc_enc128: + aese $dat,q9 + aesmc $dat,$dat + subs $len,$len,#16 + aese $dat,$in0 + aesmc $dat,$dat + cclr $step,eq + aese $dat,$in1 + aesmc $dat,$dat + aese $dat,q10 + aesmc $dat,$dat + aese $dat,q11 + aesmc $dat,$dat + vld1.8 {q8},[$inp],$step + aese $dat,q12 + aesmc $dat,$dat + aese $dat,q13 + aesmc $dat,$dat + aese $dat,q14 + aesmc $dat,$dat + veor q8,q8,$rndzero_n_last + aese $dat,q15 + veor $ivec,$dat,$rndlast + b.hs .Loop_cbc_enc128 + + vst1.8 {$ivec},[$out],#16 + b .Lcbc_done +___ +{ +my ($dat2,$in2,$tmp2)=map("q$_",(10,11,9)); +$code.=<<___; +.align 5 +.Lcbc_dec: + vld1.8 {$dat2},[$inp],#16 + subs $len,$len,#32 // bias + add $cnt,$rounds,#2 + vorr $in1,$dat,$dat + vorr $dat1,$dat,$dat + vorr $in2,$dat2,$dat2 + b.lo .Lcbc_dec_tail + + vorr $dat1,$dat2,$dat2 + vld1.8 {$dat2},[$inp],#16 + vorr $in0,$dat,$dat + vorr $in1,$dat1,$dat1 + vorr $in2,$dat2,$dat2 + +.Loop3x_cbc_dec: + aesd $dat0,q8 + aesd $dat1,q8 + aesd $dat2,q8 + vld1.32 {q8},[$key_],#16 + aesimc $dat0,$dat0 + aesimc $dat1,$dat1 + aesimc $dat2,$dat2 + subs $cnt,$cnt,#2 + aesd $dat0,q9 + aesd $dat1,q9 + aesd $dat2,q9 + vld1.32 {q9},[$key_],#16 + aesimc $dat0,$dat0 + aesimc $dat1,$dat1 + aesimc $dat2,$dat2 + b.gt .Loop3x_cbc_dec + + aesd $dat0,q8 + aesd $dat1,q8 + aesd $dat2,q8 + veor $tmp0,$ivec,$rndlast + aesimc $dat0,$dat0 + aesimc $dat1,$dat1 + aesimc $dat2,$dat2 + veor $tmp1,$in0,$rndlast + aesd $dat0,q9 + aesd $dat1,q9 + aesd $dat2,q9 + veor $tmp2,$in1,$rndlast + subs $len,$len,#0x30 + aesimc $dat0,$dat0 + aesimc $dat1,$dat1 + aesimc $dat2,$dat2 + vorr $ivec,$in2,$in2 + mov.lo x6,$len // x6, $cnt, is zero at this point + aesd $dat0,q12 + aesd $dat1,q12 + aesd $dat2,q12 + add $inp,$inp,x6 // $inp is adjusted in such way that + // at exit from the loop $dat1-$dat2 + // are loaded with last "words" + aesimc $dat0,$dat0 + aesimc $dat1,$dat1 + aesimc $dat2,$dat2 + mov $key_,$key + aesd $dat0,q13 + aesd $dat1,q13 + aesd $dat2,q13 + vld1.8 {$in0},[$inp],#16 + aesimc $dat0,$dat0 + aesimc $dat1,$dat1 + aesimc $dat2,$dat2 + vld1.8 {$in1},[$inp],#16 + aesd $dat0,q14 + aesd $dat1,q14 + aesd $dat2,q14 + vld1.8 {$in2},[$inp],#16 + aesimc $dat0,$dat0 + aesimc $dat1,$dat1 + aesimc $dat2,$dat2 + vld1.32 {q8},[$key_],#16 // re-pre-load rndkey[0] + aesd $dat0,q15 + aesd $dat1,q15 + aesd $dat2,q15 + + add $cnt,$rounds,#2 + veor $tmp0,$tmp0,$dat0 + veor $tmp1,$tmp1,$dat1 + veor $dat2,$dat2,$tmp2 + vld1.32 {q9},[$key_],#16 // re-pre-load rndkey[1] + vorr $dat0,$in0,$in0 + vst1.8 {$tmp0},[$out],#16 + vorr $dat1,$in1,$in1 + vst1.8 {$tmp1},[$out],#16 + vst1.8 {$dat2},[$out],#16 + vorr $dat2,$in2,$in2 + b.hs .Loop3x_cbc_dec + + cmn $len,#0x30 + b.eq .Lcbc_done + nop + +.Lcbc_dec_tail: + aesd $dat1,q8 + aesd $dat2,q8 + vld1.32 {q8},[$key_],#16 + aesimc $dat1,$dat1 + aesimc $dat2,$dat2 + subs $cnt,$cnt,#2 + aesd $dat1,q9 + aesd $dat2,q9 + vld1.32 {q9},[$key_],#16 + aesimc $dat1,$dat1 + aesimc $dat2,$dat2 + b.gt .Lcbc_dec_tail + + aesd $dat1,q8 + aesd $dat2,q8 + aesimc $dat1,$dat1 + aesimc $dat2,$dat2 + aesd $dat1,q9 + aesd $dat2,q9 + aesimc $dat1,$dat1 + aesimc $dat2,$dat2 + aesd $dat1,q12 + aesd $dat2,q12 + aesimc $dat1,$dat1 + aesimc $dat2,$dat2 + cmn $len,#0x20 + aesd $dat1,q13 + aesd $dat2,q13 + aesimc $dat1,$dat1 + aesimc $dat2,$dat2 + veor $tmp1,$ivec,$rndlast + aesd $dat1,q14 + aesd $dat2,q14 + aesimc $dat1,$dat1 + aesimc $dat2,$dat2 + veor $tmp2,$in1,$rndlast + aesd $dat1,q15 + aesd $dat2,q15 + b.eq .Lcbc_dec_one + veor $tmp1,$tmp1,$dat1 + veor $tmp2,$tmp2,$dat2 + vorr $ivec,$in2,$in2 + vst1.8 {$tmp1},[$out],#16 + vst1.8 {$tmp2},[$out],#16 + b .Lcbc_done + +.Lcbc_dec_one: + veor $tmp1,$tmp1,$dat2 + vorr $ivec,$in2,$in2 + vst1.8 {$tmp1},[$out],#16 + +.Lcbc_done: + vst1.8 {$ivec},[$ivp] +.Lcbc_abort: +___ +} +$code.=<<___ if ($flavour !~ /64/); + vldmia sp!,{d8-d15} + ldmia sp!,{r4-r8,pc} +___ +$code.=<<___ if ($flavour =~ /64/); + ldr x29,[sp],#16 + ret +___ +$code.=<<___; +.size ${prefix}_cbc_encrypt,.-${prefix}_cbc_encrypt +___ +}}} +{{{ +my ($inp,$out,$len,$key,$ivp)=map("x$_",(0..4)); +my ($rounds,$cnt,$key_)=("w5","w6","x7"); +my ($ctr,$tctr0,$tctr1,$tctr2)=map("w$_",(8..10,12)); +my $step="x12"; # aliases with $tctr2 + +my ($dat0,$dat1,$in0,$in1,$tmp0,$tmp1,$ivec,$rndlast)=map("q$_",(0..7)); +my ($dat2,$in2,$tmp2)=map("q$_",(10,11,9)); + +my ($dat,$tmp)=($dat0,$tmp0); + +### q8-q15 preloaded key schedule + +$code.=<<___; +.globl ${prefix}_ctr32_encrypt_blocks +.type ${prefix}_ctr32_encrypt_blocks,%function +.align 5 +${prefix}_ctr32_encrypt_blocks: +___ +$code.=<<___ if ($flavour =~ /64/); + stp x29,x30,[sp,#-16]! + add x29,sp,#0 +___ +$code.=<<___ if ($flavour !~ /64/); + mov ip,sp + stmdb sp!,{r4-r10,lr} + vstmdb sp!,{d8-d15} @ ABI specification says so + ldr r4, [ip] @ load remaining arg +___ +$code.=<<___; + ldr $rounds,[$key,#240] + + ldr $ctr, [$ivp, #12] + vld1.32 {$dat0},[$ivp] + + vld1.32 {q8-q9},[$key] // load key schedule... + sub $rounds,$rounds,#4 + mov $step,#16 + cmp $len,#2 + add $key_,$key,x5,lsl#4 // pointer to last 5 round keys + sub $rounds,$rounds,#2 + vld1.32 {q12-q13},[$key_],#32 + vld1.32 {q14-q15},[$key_],#32 + vld1.32 {$rndlast},[$key_] + add $key_,$key,#32 + mov $cnt,$rounds + cclr $step,lo +#ifndef __ARMEB__ + rev $ctr, $ctr +#endif + vorr $dat1,$dat0,$dat0 + add $tctr1, $ctr, #1 + vorr $dat2,$dat0,$dat0 + add $ctr, $ctr, #2 + vorr $ivec,$dat0,$dat0 + rev $tctr1, $tctr1 + vmov.32 ${dat1}[3],$tctr1 + b.ls .Lctr32_tail + rev $tctr2, $ctr + sub $len,$len,#3 // bias + vmov.32 ${dat2}[3],$tctr2 + b .Loop3x_ctr32 + +.align 4 +.Loop3x_ctr32: + aese $dat0,q8 + aese $dat1,q8 + aese $dat2,q8 + vld1.32 {q8},[$key_],#16 + aesmc $dat0,$dat0 + aesmc $dat1,$dat1 + aesmc $dat2,$dat2 + subs $cnt,$cnt,#2 + aese $dat0,q9 + aese $dat1,q9 + aese $dat2,q9 + vld1.32 {q9},[$key_],#16 + aesmc $dat0,$dat0 + aesmc $dat1,$dat1 + aesmc $dat2,$dat2 + b.gt .Loop3x_ctr32 + + aese $dat0,q8 + aese $dat1,q8 + aese $dat2,q8 + mov $key_,$key + aesmc $tmp0,$dat0 + vld1.8 {$in0},[$inp],#16 + aesmc $tmp1,$dat1 + aesmc $dat2,$dat2 + vorr $dat0,$ivec,$ivec + aese $tmp0,q9 + vld1.8 {$in1},[$inp],#16 + aese $tmp1,q9 + aese $dat2,q9 + vorr $dat1,$ivec,$ivec + aesmc $tmp0,$tmp0 + vld1.8 {$in2},[$inp],#16 + aesmc $tmp1,$tmp1 + aesmc $tmp2,$dat2 + vorr $dat2,$ivec,$ivec + add $tctr0,$ctr,#1 + aese $tmp0,q12 + aese $tmp1,q12 + aese $tmp2,q12 + veor $in0,$in0,$rndlast + add $tctr1,$ctr,#2 + aesmc $tmp0,$tmp0 + aesmc $tmp1,$tmp1 + aesmc $tmp2,$tmp2 + veor $in1,$in1,$rndlast + add $ctr,$ctr,#3 + aese $tmp0,q13 + aese $tmp1,q13 + aese $tmp2,q13 + veor $in2,$in2,$rndlast + rev $tctr0,$tctr0 + aesmc $tmp0,$tmp0 + vld1.32 {q8},[$key_],#16 // re-pre-load rndkey[0] + aesmc $tmp1,$tmp1 + aesmc $tmp2,$tmp2 + vmov.32 ${dat0}[3], $tctr0 + rev $tctr1,$tctr1 + aese $tmp0,q14 + aese $tmp1,q14 + aese $tmp2,q14 + vmov.32 ${dat1}[3], $tctr1 + rev $tctr2,$ctr + aesmc $tmp0,$tmp0 + aesmc $tmp1,$tmp1 + aesmc $tmp2,$tmp2 + vmov.32 ${dat2}[3], $tctr2 + subs $len,$len,#3 + aese $tmp0,q15 + aese $tmp1,q15 + aese $tmp2,q15 + + mov $cnt,$rounds + veor $in0,$in0,$tmp0 + veor $in1,$in1,$tmp1 + veor $in2,$in2,$tmp2 + vld1.32 {q9},[$key_],#16 // re-pre-load rndkey[1] + vst1.8 {$in0},[$out],#16 + vst1.8 {$in1},[$out],#16 + vst1.8 {$in2},[$out],#16 + b.hs .Loop3x_ctr32 + + adds $len,$len,#3 + b.eq .Lctr32_done + cmp $len,#1 + mov $step,#16 + cclr $step,eq + +.Lctr32_tail: + aese $dat0,q8 + aese $dat1,q8 + vld1.32 {q8},[$key_],#16 + aesmc $dat0,$dat0 + aesmc $dat1,$dat1 + subs $cnt,$cnt,#2 + aese $dat0,q9 + aese $dat1,q9 + vld1.32 {q9},[$key_],#16 + aesmc $dat0,$dat0 + aesmc $dat1,$dat1 + b.gt .Lctr32_tail + + aese $dat0,q8 + aese $dat1,q8 + aesmc $dat0,$dat0 + aesmc $dat1,$dat1 + aese $dat0,q9 + aese $dat1,q9 + aesmc $dat0,$dat0 + aesmc $dat1,$dat1 + vld1.8 {$in0},[$inp],$step + aese $dat0,q12 + aese $dat1,q12 + vld1.8 {$in1},[$inp] + aesmc $dat0,$dat0 + aesmc $dat1,$dat1 + aese $dat0,q13 + aese $dat1,q13 + aesmc $dat0,$dat0 + aesmc $dat1,$dat1 + aese $dat0,q14 + aese $dat1,q14 + veor $in0,$in0,$rndlast + aesmc $dat0,$dat0 + aesmc $dat1,$dat1 + veor $in1,$in1,$rndlast + aese $dat0,q15 + aese $dat1,q15 + + cmp $len,#1 + veor $in0,$in0,$dat0 + veor $in1,$in1,$dat1 + vst1.8 {$in0},[$out],#16 + b.eq .Lctr32_done + vst1.8 {$in1},[$out] + +.Lctr32_done: +___ +$code.=<<___ if ($flavour !~ /64/); + vldmia sp!,{d8-d15} + ldmia sp!,{r4-r10,pc} +___ +$code.=<<___ if ($flavour =~ /64/); + ldr x29,[sp],#16 + ret +___ +$code.=<<___; +.size ${prefix}_ctr32_encrypt_blocks,.-${prefix}_ctr32_encrypt_blocks +___ +}}} +$code.=<<___; +#endif +___ +######################################## +if ($flavour =~ /64/) { ######## 64-bit code + my %opcode = ( + "aesd" => 0x4e285800, "aese" => 0x4e284800, + "aesimc"=> 0x4e287800, "aesmc" => 0x4e286800 ); + + local *unaes = sub { + my ($mnemonic,$arg)=@_; + + $arg =~ m/[qv]([0-9]+)[^,]*,\s*[qv]([0-9]+)/o && + sprintf ".inst\t0x%08x\t//%s %s", + $opcode{$mnemonic}|$1|($2<<5), + $mnemonic,$arg; + }; + + foreach(split("\n",$code)) { + s/\`([^\`]*)\`/eval($1)/geo; + + s/\bq([0-9]+)\b/"v".($1<8?$1:$1+8).".16b"/geo; # old->new registers + s/@\s/\/\//o; # old->new style commentary + + #s/[v]?(aes\w+)\s+([qv].*)/unaes($1,$2)/geo or + s/cclr\s+([wx])([^,]+),\s*([a-z]+)/csel $1$2,$1zr,$1$2,$3/o or + s/mov\.([a-z]+)\s+([wx][0-9]+),\s*([wx][0-9]+)/csel $2,$3,$2,$1/o or + s/vmov\.i8/movi/o or # fix up legacy mnemonics + s/vext\.8/ext/o or + s/vrev32\.8/rev32/o or + s/vtst\.8/cmtst/o or + s/vshr/ushr/o or + s/^(\s+)v/$1/o or # strip off v prefix + s/\bbx\s+lr\b/ret/o; + + # fix up remainig legacy suffixes + s/\.[ui]?8//o; + m/\],#8/o and s/\.16b/\.8b/go; + s/\.[ui]?32//o and s/\.16b/\.4s/go; + s/\.[ui]?64//o and s/\.16b/\.2d/go; + s/\.[42]([sd])\[([0-3])\]/\.$1\[$2\]/o; + + print $_,"\n"; + } +} else { ######## 32-bit code + my %opcode = ( + "aesd" => 0xf3b00340, "aese" => 0xf3b00300, + "aesimc"=> 0xf3b003c0, "aesmc" => 0xf3b00380 ); + + local *unaes = sub { + my ($mnemonic,$arg)=@_; + + if ($arg =~ m/[qv]([0-9]+)[^,]*,\s*[qv]([0-9]+)/o) { + my $word = $opcode{$mnemonic}|(($1&7)<<13)|(($1&8)<<19) + |(($2&7)<<1) |(($2&8)<<2); + # since ARMv7 instructions are always encoded little-endian. + # correct solution is to use .inst directive, but older + # assemblers don't implement it:-( + sprintf ".byte\t0x%02x,0x%02x,0x%02x,0x%02x\t@ %s %s", + $word&0xff,($word>>8)&0xff, + ($word>>16)&0xff,($word>>24)&0xff, + $mnemonic,$arg; + } + }; + + sub unvtbl { + my $arg=shift; + + $arg =~ m/q([0-9]+),\s*\{q([0-9]+)\},\s*q([0-9]+)/o && + sprintf "vtbl.8 d%d,{q%d},d%d\n\t". + "vtbl.8 d%d,{q%d},d%d", 2*$1,$2,2*$3, 2*$1+1,$2,2*$3+1; + } + + sub unvdup32 { + my $arg=shift; + + $arg =~ m/q([0-9]+),\s*q([0-9]+)\[([0-3])\]/o && + sprintf "vdup.32 q%d,d%d[%d]",$1,2*$2+($3>>1),$3&1; + } + + sub unvmov32 { + my $arg=shift; + + $arg =~ m/q([0-9]+)\[([0-3])\],(.*)/o && + sprintf "vmov.32 d%d[%d],%s",2*$1+($2>>1),$2&1,$3; + } + + foreach(split("\n",$code)) { + s/\`([^\`]*)\`/eval($1)/geo; + + s/\b[wx]([0-9]+)\b/r$1/go; # new->old registers + s/\bv([0-9])\.[12468]+[bsd]\b/q$1/go; # new->old registers + s/\/\/\s?/@ /o; # new->old style commentary + + # fix up remainig new-style suffixes + s/\{q([0-9]+)\},\s*\[(.+)\],#8/sprintf "{d%d},[$2]!",2*$1/eo or + s/\],#[0-9]+/]!/o; + + s/[v]?(aes\w+)\s+([qv].*)/unaes($1,$2)/geo or + s/cclr\s+([^,]+),\s*([a-z]+)/mov$2 $1,#0/o or + s/vtbl\.8\s+(.*)/unvtbl($1)/geo or + s/vdup\.32\s+(.*)/unvdup32($1)/geo or + s/vmov\.32\s+(.*)/unvmov32($1)/geo or + s/^(\s+)b\./$1b/o or + s/^(\s+)mov\./$1mov/o or + s/^(\s+)ret/$1bx\tlr/o; + + print $_,"\n"; + } +} + +close STDOUT; diff --git a/deps/openssl/openssl/crypto/aes/asm/bsaes-armv7.pl b/deps/openssl/openssl/crypto/aes/asm/bsaes-armv7.pl new file mode 100644 index 00000000000000..fcc81d1a493374 --- /dev/null +++ b/deps/openssl/openssl/crypto/aes/asm/bsaes-armv7.pl @@ -0,0 +1,2469 @@ +#!/usr/bin/env perl + +# ==================================================================== +# Written by Andy Polyakov for the OpenSSL +# project. The module is, however, dual licensed under OpenSSL and +# CRYPTOGAMS licenses depending on where you obtain it. For further +# details see http://www.openssl.org/~appro/cryptogams/. +# +# Specific modes and adaptation for Linux kernel by Ard Biesheuvel +# . Permission to use under GPL terms is +# granted. +# ==================================================================== + +# Bit-sliced AES for ARM NEON +# +# February 2012. +# +# This implementation is direct adaptation of bsaes-x86_64 module for +# ARM NEON. Except that this module is endian-neutral [in sense that +# it can be compiled for either endianness] by courtesy of vld1.8's +# neutrality. Initial version doesn't implement interface to OpenSSL, +# only low-level primitives and unsupported entry points, just enough +# to collect performance results, which for Cortex-A8 core are: +# +# encrypt 19.5 cycles per byte processed with 128-bit key +# decrypt 22.1 cycles per byte processed with 128-bit key +# key conv. 440 cycles per 128-bit key/0.18 of 8x block +# +# Snapdragon S4 encrypts byte in 17.6 cycles and decrypts in 19.7, +# which is [much] worse than anticipated (for further details see +# http://www.openssl.org/~appro/Snapdragon-S4.html). +# +# Cortex-A15 manages in 14.2/16.1 cycles [when integer-only code +# manages in 20.0 cycles]. +# +# When comparing to x86_64 results keep in mind that NEON unit is +# [mostly] single-issue and thus can't [fully] benefit from +# instruction-level parallelism. And when comparing to aes-armv4 +# results keep in mind key schedule conversion overhead (see +# bsaes-x86_64.pl for further details)... +# +# + +# April-August 2013 +# +# Add CBC, CTR and XTS subroutines, adapt for kernel use. +# +# + +while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {} +open STDOUT,">$output"; + +my ($inp,$out,$len,$key)=("r0","r1","r2","r3"); +my @XMM=map("q$_",(0..15)); + +{ +my ($key,$rounds,$const)=("r4","r5","r6"); + +sub Dlo() { shift=~m|q([1]?[0-9])|?"d".($1*2):""; } +sub Dhi() { shift=~m|q([1]?[0-9])|?"d".($1*2+1):""; } + +sub Sbox { +# input in lsb > [b0, b1, b2, b3, b4, b5, b6, b7] < msb +# output in lsb > [b0, b1, b4, b6, b3, b7, b2, b5] < msb +my @b=@_[0..7]; +my @t=@_[8..11]; +my @s=@_[12..15]; + &InBasisChange (@b); + &Inv_GF256 (@b[6,5,0,3,7,1,4,2],@t,@s); + &OutBasisChange (@b[7,1,4,2,6,5,0,3]); +} + +sub InBasisChange { +# input in lsb > [b0, b1, b2, b3, b4, b5, b6, b7] < msb +# output in lsb > [b6, b5, b0, b3, b7, b1, b4, b2] < msb +my @b=@_[0..7]; +$code.=<<___; + veor @b[2], @b[2], @b[1] + veor @b[5], @b[5], @b[6] + veor @b[3], @b[3], @b[0] + veor @b[6], @b[6], @b[2] + veor @b[5], @b[5], @b[0] + + veor @b[6], @b[6], @b[3] + veor @b[3], @b[3], @b[7] + veor @b[7], @b[7], @b[5] + veor @b[3], @b[3], @b[4] + veor @b[4], @b[4], @b[5] + + veor @b[2], @b[2], @b[7] + veor @b[3], @b[3], @b[1] + veor @b[1], @b[1], @b[5] +___ +} + +sub OutBasisChange { +# input in lsb > [b0, b1, b2, b3, b4, b5, b6, b7] < msb +# output in lsb > [b6, b1, b2, b4, b7, b0, b3, b5] < msb +my @b=@_[0..7]; +$code.=<<___; + veor @b[0], @b[0], @b[6] + veor @b[1], @b[1], @b[4] + veor @b[4], @b[4], @b[6] + veor @b[2], @b[2], @b[0] + veor @b[6], @b[6], @b[1] + + veor @b[1], @b[1], @b[5] + veor @b[5], @b[5], @b[3] + veor @b[3], @b[3], @b[7] + veor @b[7], @b[7], @b[5] + veor @b[2], @b[2], @b[5] + + veor @b[4], @b[4], @b[7] +___ +} + +sub InvSbox { +# input in lsb > [b0, b1, b2, b3, b4, b5, b6, b7] < msb +# output in lsb > [b0, b1, b6, b4, b2, b7, b3, b5] < msb +my @b=@_[0..7]; +my @t=@_[8..11]; +my @s=@_[12..15]; + &InvInBasisChange (@b); + &Inv_GF256 (@b[5,1,2,6,3,7,0,4],@t,@s); + &InvOutBasisChange (@b[3,7,0,4,5,1,2,6]); +} + +sub InvInBasisChange { # OutBasisChange in reverse (with twist) +my @b=@_[5,1,2,6,3,7,0,4]; +$code.=<<___ + veor @b[1], @b[1], @b[7] + veor @b[4], @b[4], @b[7] + + veor @b[7], @b[7], @b[5] + veor @b[1], @b[1], @b[3] + veor @b[2], @b[2], @b[5] + veor @b[3], @b[3], @b[7] + + veor @b[6], @b[6], @b[1] + veor @b[2], @b[2], @b[0] + veor @b[5], @b[5], @b[3] + veor @b[4], @b[4], @b[6] + veor @b[0], @b[0], @b[6] + veor @b[1], @b[1], @b[4] +___ +} + +sub InvOutBasisChange { # InBasisChange in reverse +my @b=@_[2,5,7,3,6,1,0,4]; +$code.=<<___; + veor @b[1], @b[1], @b[5] + veor @b[2], @b[2], @b[7] + + veor @b[3], @b[3], @b[1] + veor @b[4], @b[4], @b[5] + veor @b[7], @b[7], @b[5] + veor @b[3], @b[3], @b[4] + veor @b[5], @b[5], @b[0] + veor @b[3], @b[3], @b[7] + veor @b[6], @b[6], @b[2] + veor @b[2], @b[2], @b[1] + veor @b[6], @b[6], @b[3] + + veor @b[3], @b[3], @b[0] + veor @b[5], @b[5], @b[6] +___ +} + +sub Mul_GF4 { +#;************************************************************* +#;* Mul_GF4: Input x0-x1,y0-y1 Output x0-x1 Temp t0 (8) * +#;************************************************************* +my ($x0,$x1,$y0,$y1,$t0,$t1)=@_; +$code.=<<___; + veor $t0, $y0, $y1 + vand $t0, $t0, $x0 + veor $x0, $x0, $x1 + vand $t1, $x1, $y0 + vand $x0, $x0, $y1 + veor $x1, $t1, $t0 + veor $x0, $x0, $t1 +___ +} + +sub Mul_GF4_N { # not used, see next subroutine +# multiply and scale by N +my ($x0,$x1,$y0,$y1,$t0)=@_; +$code.=<<___; + veor $t0, $y0, $y1 + vand $t0, $t0, $x0 + veor $x0, $x0, $x1 + vand $x1, $x1, $y0 + vand $x0, $x0, $y1 + veor $x1, $x1, $x0 + veor $x0, $x0, $t0 +___ +} + +sub Mul_GF4_N_GF4 { +# interleaved Mul_GF4_N and Mul_GF4 +my ($x0,$x1,$y0,$y1,$t0, + $x2,$x3,$y2,$y3,$t1)=@_; +$code.=<<___; + veor $t0, $y0, $y1 + veor $t1, $y2, $y3 + vand $t0, $t0, $x0 + vand $t1, $t1, $x2 + veor $x0, $x0, $x1 + veor $x2, $x2, $x3 + vand $x1, $x1, $y0 + vand $x3, $x3, $y2 + vand $x0, $x0, $y1 + vand $x2, $x2, $y3 + veor $x1, $x1, $x0 + veor $x2, $x2, $x3 + veor $x0, $x0, $t0 + veor $x3, $x3, $t1 +___ +} +sub Mul_GF16_2 { +my @x=@_[0..7]; +my @y=@_[8..11]; +my @t=@_[12..15]; +$code.=<<___; + veor @t[0], @x[0], @x[2] + veor @t[1], @x[1], @x[3] +___ + &Mul_GF4 (@x[0], @x[1], @y[0], @y[1], @t[2..3]); +$code.=<<___; + veor @y[0], @y[0], @y[2] + veor @y[1], @y[1], @y[3] +___ + Mul_GF4_N_GF4 (@t[0], @t[1], @y[0], @y[1], @t[3], + @x[2], @x[3], @y[2], @y[3], @t[2]); +$code.=<<___; + veor @x[0], @x[0], @t[0] + veor @x[2], @x[2], @t[0] + veor @x[1], @x[1], @t[1] + veor @x[3], @x[3], @t[1] + + veor @t[0], @x[4], @x[6] + veor @t[1], @x[5], @x[7] +___ + &Mul_GF4_N_GF4 (@t[0], @t[1], @y[0], @y[1], @t[3], + @x[6], @x[7], @y[2], @y[3], @t[2]); +$code.=<<___; + veor @y[0], @y[0], @y[2] + veor @y[1], @y[1], @y[3] +___ + &Mul_GF4 (@x[4], @x[5], @y[0], @y[1], @t[2..3]); +$code.=<<___; + veor @x[4], @x[4], @t[0] + veor @x[6], @x[6], @t[0] + veor @x[5], @x[5], @t[1] + veor @x[7], @x[7], @t[1] +___ +} +sub Inv_GF256 { +#;******************************************************************** +#;* Inv_GF256: Input x0-x7 Output x0-x7 Temp t0-t3,s0-s3 (144) * +#;******************************************************************** +my @x=@_[0..7]; +my @t=@_[8..11]; +my @s=@_[12..15]; +# direct optimizations from hardware +$code.=<<___; + veor @t[3], @x[4], @x[6] + veor @t[2], @x[5], @x[7] + veor @t[1], @x[1], @x[3] + veor @s[1], @x[7], @x[6] + vmov @t[0], @t[2] + veor @s[0], @x[0], @x[2] + + vorr @t[2], @t[2], @t[1] + veor @s[3], @t[3], @t[0] + vand @s[2], @t[3], @s[0] + vorr @t[3], @t[3], @s[0] + veor @s[0], @s[0], @t[1] + vand @t[0], @t[0], @t[1] + veor @t[1], @x[3], @x[2] + vand @s[3], @s[3], @s[0] + vand @s[1], @s[1], @t[1] + veor @t[1], @x[4], @x[5] + veor @s[0], @x[1], @x[0] + veor @t[3], @t[3], @s[1] + veor @t[2], @t[2], @s[1] + vand @s[1], @t[1], @s[0] + vorr @t[1], @t[1], @s[0] + veor @t[3], @t[3], @s[3] + veor @t[0], @t[0], @s[1] + veor @t[2], @t[2], @s[2] + veor @t[1], @t[1], @s[3] + veor @t[0], @t[0], @s[2] + vand @s[0], @x[7], @x[3] + veor @t[1], @t[1], @s[2] + vand @s[1], @x[6], @x[2] + vand @s[2], @x[5], @x[1] + vorr @s[3], @x[4], @x[0] + veor @t[3], @t[3], @s[0] + veor @t[1], @t[1], @s[2] + veor @t[0], @t[0], @s[3] + veor @t[2], @t[2], @s[1] + + @ Inv_GF16 \t0, \t1, \t2, \t3, \s0, \s1, \s2, \s3 + + @ new smaller inversion + + vand @s[2], @t[3], @t[1] + vmov @s[0], @t[0] + + veor @s[1], @t[2], @s[2] + veor @s[3], @t[0], @s[2] + veor @s[2], @t[0], @s[2] @ @s[2]=@s[3] + + vbsl @s[1], @t[1], @t[0] + vbsl @s[3], @t[3], @t[2] + veor @t[3], @t[3], @t[2] + + vbsl @s[0], @s[1], @s[2] + vbsl @t[0], @s[2], @s[1] + + vand @s[2], @s[0], @s[3] + veor @t[1], @t[1], @t[0] + + veor @s[2], @s[2], @t[3] +___ +# output in s3, s2, s1, t1 + +# Mul_GF16_2 \x0, \x1, \x2, \x3, \x4, \x5, \x6, \x7, \t2, \t3, \t0, \t1, \s0, \s1, \s2, \s3 + +# Mul_GF16_2 \x0, \x1, \x2, \x3, \x4, \x5, \x6, \x7, \s3, \s2, \s1, \t1, \s0, \t0, \t2, \t3 + &Mul_GF16_2(@x,@s[3,2,1],@t[1],@s[0],@t[0,2,3]); + +### output msb > [x3,x2,x1,x0,x7,x6,x5,x4] < lsb +} + +# AES linear components + +sub ShiftRows { +my @x=@_[0..7]; +my @t=@_[8..11]; +my $mask=pop; +$code.=<<___; + vldmia $key!, {@t[0]-@t[3]} + veor @t[0], @t[0], @x[0] + veor @t[1], @t[1], @x[1] + vtbl.8 `&Dlo(@x[0])`, {@t[0]}, `&Dlo($mask)` + vtbl.8 `&Dhi(@x[0])`, {@t[0]}, `&Dhi($mask)` + vldmia $key!, {@t[0]} + veor @t[2], @t[2], @x[2] + vtbl.8 `&Dlo(@x[1])`, {@t[1]}, `&Dlo($mask)` + vtbl.8 `&Dhi(@x[1])`, {@t[1]}, `&Dhi($mask)` + vldmia $key!, {@t[1]} + veor @t[3], @t[3], @x[3] + vtbl.8 `&Dlo(@x[2])`, {@t[2]}, `&Dlo($mask)` + vtbl.8 `&Dhi(@x[2])`, {@t[2]}, `&Dhi($mask)` + vldmia $key!, {@t[2]} + vtbl.8 `&Dlo(@x[3])`, {@t[3]}, `&Dlo($mask)` + vtbl.8 `&Dhi(@x[3])`, {@t[3]}, `&Dhi($mask)` + vldmia $key!, {@t[3]} + veor @t[0], @t[0], @x[4] + veor @t[1], @t[1], @x[5] + vtbl.8 `&Dlo(@x[4])`, {@t[0]}, `&Dlo($mask)` + vtbl.8 `&Dhi(@x[4])`, {@t[0]}, `&Dhi($mask)` + veor @t[2], @t[2], @x[6] + vtbl.8 `&Dlo(@x[5])`, {@t[1]}, `&Dlo($mask)` + vtbl.8 `&Dhi(@x[5])`, {@t[1]}, `&Dhi($mask)` + veor @t[3], @t[3], @x[7] + vtbl.8 `&Dlo(@x[6])`, {@t[2]}, `&Dlo($mask)` + vtbl.8 `&Dhi(@x[6])`, {@t[2]}, `&Dhi($mask)` + vtbl.8 `&Dlo(@x[7])`, {@t[3]}, `&Dlo($mask)` + vtbl.8 `&Dhi(@x[7])`, {@t[3]}, `&Dhi($mask)` +___ +} + +sub MixColumns { +# modified to emit output in order suitable for feeding back to aesenc[last] +my @x=@_[0..7]; +my @t=@_[8..15]; +my $inv=@_[16]; # optional +$code.=<<___; + vext.8 @t[0], @x[0], @x[0], #12 @ x0 <<< 32 + vext.8 @t[1], @x[1], @x[1], #12 + veor @x[0], @x[0], @t[0] @ x0 ^ (x0 <<< 32) + vext.8 @t[2], @x[2], @x[2], #12 + veor @x[1], @x[1], @t[1] + vext.8 @t[3], @x[3], @x[3], #12 + veor @x[2], @x[2], @t[2] + vext.8 @t[4], @x[4], @x[4], #12 + veor @x[3], @x[3], @t[3] + vext.8 @t[5], @x[5], @x[5], #12 + veor @x[4], @x[4], @t[4] + vext.8 @t[6], @x[6], @x[6], #12 + veor @x[5], @x[5], @t[5] + vext.8 @t[7], @x[7], @x[7], #12 + veor @x[6], @x[6], @t[6] + + veor @t[1], @t[1], @x[0] + veor @x[7], @x[7], @t[7] + vext.8 @x[0], @x[0], @x[0], #8 @ (x0 ^ (x0 <<< 32)) <<< 64) + veor @t[2], @t[2], @x[1] + veor @t[0], @t[0], @x[7] + veor @t[1], @t[1], @x[7] + vext.8 @x[1], @x[1], @x[1], #8 + veor @t[5], @t[5], @x[4] + veor @x[0], @x[0], @t[0] + veor @t[6], @t[6], @x[5] + veor @x[1], @x[1], @t[1] + vext.8 @t[0], @x[4], @x[4], #8 + veor @t[4], @t[4], @x[3] + vext.8 @t[1], @x[5], @x[5], #8 + veor @t[7], @t[7], @x[6] + vext.8 @x[4], @x[3], @x[3], #8 + veor @t[3], @t[3], @x[2] + vext.8 @x[5], @x[7], @x[7], #8 + veor @t[4], @t[4], @x[7] + vext.8 @x[3], @x[6], @x[6], #8 + veor @t[3], @t[3], @x[7] + vext.8 @x[6], @x[2], @x[2], #8 + veor @x[7], @t[1], @t[5] +___ +$code.=<<___ if (!$inv); + veor @x[2], @t[0], @t[4] + veor @x[4], @x[4], @t[3] + veor @x[5], @x[5], @t[7] + veor @x[3], @x[3], @t[6] + @ vmov @x[2], @t[0] + veor @x[6], @x[6], @t[2] + @ vmov @x[7], @t[1] +___ +$code.=<<___ if ($inv); + veor @t[3], @t[3], @x[4] + veor @x[5], @x[5], @t[7] + veor @x[2], @x[3], @t[6] + veor @x[3], @t[0], @t[4] + veor @x[4], @x[6], @t[2] + vmov @x[6], @t[3] + @ vmov @x[7], @t[1] +___ +} + +sub InvMixColumns_orig { +my @x=@_[0..7]; +my @t=@_[8..15]; + +$code.=<<___; + @ multiplication by 0x0e + vext.8 @t[7], @x[7], @x[7], #12 + vmov @t[2], @x[2] + veor @x[2], @x[2], @x[5] @ 2 5 + veor @x[7], @x[7], @x[5] @ 7 5 + vext.8 @t[0], @x[0], @x[0], #12 + vmov @t[5], @x[5] + veor @x[5], @x[5], @x[0] @ 5 0 [1] + veor @x[0], @x[0], @x[1] @ 0 1 + vext.8 @t[1], @x[1], @x[1], #12 + veor @x[1], @x[1], @x[2] @ 1 25 + veor @x[0], @x[0], @x[6] @ 01 6 [2] + vext.8 @t[3], @x[3], @x[3], #12 + veor @x[1], @x[1], @x[3] @ 125 3 [4] + veor @x[2], @x[2], @x[0] @ 25 016 [3] + veor @x[3], @x[3], @x[7] @ 3 75 + veor @x[7], @x[7], @x[6] @ 75 6 [0] + vext.8 @t[6], @x[6], @x[6], #12 + vmov @t[4], @x[4] + veor @x[6], @x[6], @x[4] @ 6 4 + veor @x[4], @x[4], @x[3] @ 4 375 [6] + veor @x[3], @x[3], @x[7] @ 375 756=36 + veor @x[6], @x[6], @t[5] @ 64 5 [7] + veor @x[3], @x[3], @t[2] @ 36 2 + vext.8 @t[5], @t[5], @t[5], #12 + veor @x[3], @x[3], @t[4] @ 362 4 [5] +___ + my @y = @x[7,5,0,2,1,3,4,6]; +$code.=<<___; + @ multiplication by 0x0b + veor @y[1], @y[1], @y[0] + veor @y[0], @y[0], @t[0] + vext.8 @t[2], @t[2], @t[2], #12 + veor @y[1], @y[1], @t[1] + veor @y[0], @y[0], @t[5] + vext.8 @t[4], @t[4], @t[4], #12 + veor @y[1], @y[1], @t[6] + veor @y[0], @y[0], @t[7] + veor @t[7], @t[7], @t[6] @ clobber t[7] + + veor @y[3], @y[3], @t[0] + veor @y[1], @y[1], @y[0] + vext.8 @t[0], @t[0], @t[0], #12 + veor @y[2], @y[2], @t[1] + veor @y[4], @y[4], @t[1] + vext.8 @t[1], @t[1], @t[1], #12 + veor @y[2], @y[2], @t[2] + veor @y[3], @y[3], @t[2] + veor @y[5], @y[5], @t[2] + veor @y[2], @y[2], @t[7] + vext.8 @t[2], @t[2], @t[2], #12 + veor @y[3], @y[3], @t[3] + veor @y[6], @y[6], @t[3] + veor @y[4], @y[4], @t[3] + veor @y[7], @y[7], @t[4] + vext.8 @t[3], @t[3], @t[3], #12 + veor @y[5], @y[5], @t[4] + veor @y[7], @y[7], @t[7] + veor @t[7], @t[7], @t[5] @ clobber t[7] even more + veor @y[3], @y[3], @t[5] + veor @y[4], @y[4], @t[4] + + veor @y[5], @y[5], @t[7] + vext.8 @t[4], @t[4], @t[4], #12 + veor @y[6], @y[6], @t[7] + veor @y[4], @y[4], @t[7] + + veor @t[7], @t[7], @t[5] + vext.8 @t[5], @t[5], @t[5], #12 + + @ multiplication by 0x0d + veor @y[4], @y[4], @y[7] + veor @t[7], @t[7], @t[6] @ restore t[7] + veor @y[7], @y[7], @t[4] + vext.8 @t[6], @t[6], @t[6], #12 + veor @y[2], @y[2], @t[0] + veor @y[7], @y[7], @t[5] + vext.8 @t[7], @t[7], @t[7], #12 + veor @y[2], @y[2], @t[2] + + veor @y[3], @y[3], @y[1] + veor @y[1], @y[1], @t[1] + veor @y[0], @y[0], @t[0] + veor @y[3], @y[3], @t[0] + veor @y[1], @y[1], @t[5] + veor @y[0], @y[0], @t[5] + vext.8 @t[0], @t[0], @t[0], #12 + veor @y[1], @y[1], @t[7] + veor @y[0], @y[0], @t[6] + veor @y[3], @y[3], @y[1] + veor @y[4], @y[4], @t[1] + vext.8 @t[1], @t[1], @t[1], #12 + + veor @y[7], @y[7], @t[7] + veor @y[4], @y[4], @t[2] + veor @y[5], @y[5], @t[2] + veor @y[2], @y[2], @t[6] + veor @t[6], @t[6], @t[3] @ clobber t[6] + vext.8 @t[2], @t[2], @t[2], #12 + veor @y[4], @y[4], @y[7] + veor @y[3], @y[3], @t[6] + + veor @y[6], @y[6], @t[6] + veor @y[5], @y[5], @t[5] + vext.8 @t[5], @t[5], @t[5], #12 + veor @y[6], @y[6], @t[4] + vext.8 @t[4], @t[4], @t[4], #12 + veor @y[5], @y[5], @t[6] + veor @y[6], @y[6], @t[7] + vext.8 @t[7], @t[7], @t[7], #12 + veor @t[6], @t[6], @t[3] @ restore t[6] + vext.8 @t[3], @t[3], @t[3], #12 + + @ multiplication by 0x09 + veor @y[4], @y[4], @y[1] + veor @t[1], @t[1], @y[1] @ t[1]=y[1] + veor @t[0], @t[0], @t[5] @ clobber t[0] + vext.8 @t[6], @t[6], @t[6], #12 + veor @t[1], @t[1], @t[5] + veor @y[3], @y[3], @t[0] + veor @t[0], @t[0], @y[0] @ t[0]=y[0] + veor @t[1], @t[1], @t[6] + veor @t[6], @t[6], @t[7] @ clobber t[6] + veor @y[4], @y[4], @t[1] + veor @y[7], @y[7], @t[4] + veor @y[6], @y[6], @t[3] + veor @y[5], @y[5], @t[2] + veor @t[4], @t[4], @y[4] @ t[4]=y[4] + veor @t[3], @t[3], @y[3] @ t[3]=y[3] + veor @t[5], @t[5], @y[5] @ t[5]=y[5] + veor @t[2], @t[2], @y[2] @ t[2]=y[2] + veor @t[3], @t[3], @t[7] + veor @XMM[5], @t[5], @t[6] + veor @XMM[6], @t[6], @y[6] @ t[6]=y[6] + veor @XMM[2], @t[2], @t[6] + veor @XMM[7], @t[7], @y[7] @ t[7]=y[7] + + vmov @XMM[0], @t[0] + vmov @XMM[1], @t[1] + @ vmov @XMM[2], @t[2] + vmov @XMM[3], @t[3] + vmov @XMM[4], @t[4] + @ vmov @XMM[5], @t[5] + @ vmov @XMM[6], @t[6] + @ vmov @XMM[7], @t[7] +___ +} + +sub InvMixColumns { +my @x=@_[0..7]; +my @t=@_[8..15]; + +# Thanks to Jussi Kivilinna for providing pointer to +# +# | 0e 0b 0d 09 | | 02 03 01 01 | | 05 00 04 00 | +# | 09 0e 0b 0d | = | 01 02 03 01 | x | 00 05 00 04 | +# | 0d 09 0e 0b | | 01 01 02 03 | | 04 00 05 00 | +# | 0b 0d 09 0e | | 03 01 01 02 | | 00 04 00 05 | + +$code.=<<___; + @ multiplication by 0x05-0x00-0x04-0x00 + vext.8 @t[0], @x[0], @x[0], #8 + vext.8 @t[6], @x[6], @x[6], #8 + vext.8 @t[7], @x[7], @x[7], #8 + veor @t[0], @t[0], @x[0] + vext.8 @t[1], @x[1], @x[1], #8 + veor @t[6], @t[6], @x[6] + vext.8 @t[2], @x[2], @x[2], #8 + veor @t[7], @t[7], @x[7] + vext.8 @t[3], @x[3], @x[3], #8 + veor @t[1], @t[1], @x[1] + vext.8 @t[4], @x[4], @x[4], #8 + veor @t[2], @t[2], @x[2] + vext.8 @t[5], @x[5], @x[5], #8 + veor @t[3], @t[3], @x[3] + veor @t[4], @t[4], @x[4] + veor @t[5], @t[5], @x[5] + + veor @x[0], @x[0], @t[6] + veor @x[1], @x[1], @t[6] + veor @x[2], @x[2], @t[0] + veor @x[4], @x[4], @t[2] + veor @x[3], @x[3], @t[1] + veor @x[1], @x[1], @t[7] + veor @x[2], @x[2], @t[7] + veor @x[4], @x[4], @t[6] + veor @x[5], @x[5], @t[3] + veor @x[3], @x[3], @t[6] + veor @x[6], @x[6], @t[4] + veor @x[4], @x[4], @t[7] + veor @x[5], @x[5], @t[7] + veor @x[7], @x[7], @t[5] +___ + &MixColumns (@x,@t,1); # flipped 2<->3 and 4<->6 +} + +sub swapmove { +my ($a,$b,$n,$mask,$t)=@_; +$code.=<<___; + vshr.u64 $t, $b, #$n + veor $t, $t, $a + vand $t, $t, $mask + veor $a, $a, $t + vshl.u64 $t, $t, #$n + veor $b, $b, $t +___ +} +sub swapmove2x { +my ($a0,$b0,$a1,$b1,$n,$mask,$t0,$t1)=@_; +$code.=<<___; + vshr.u64 $t0, $b0, #$n + vshr.u64 $t1, $b1, #$n + veor $t0, $t0, $a0 + veor $t1, $t1, $a1 + vand $t0, $t0, $mask + vand $t1, $t1, $mask + veor $a0, $a0, $t0 + vshl.u64 $t0, $t0, #$n + veor $a1, $a1, $t1 + vshl.u64 $t1, $t1, #$n + veor $b0, $b0, $t0 + veor $b1, $b1, $t1 +___ +} + +sub bitslice { +my @x=reverse(@_[0..7]); +my ($t0,$t1,$t2,$t3)=@_[8..11]; +$code.=<<___; + vmov.i8 $t0,#0x55 @ compose .LBS0 + vmov.i8 $t1,#0x33 @ compose .LBS1 +___ + &swapmove2x(@x[0,1,2,3],1,$t0,$t2,$t3); + &swapmove2x(@x[4,5,6,7],1,$t0,$t2,$t3); +$code.=<<___; + vmov.i8 $t0,#0x0f @ compose .LBS2 +___ + &swapmove2x(@x[0,2,1,3],2,$t1,$t2,$t3); + &swapmove2x(@x[4,6,5,7],2,$t1,$t2,$t3); + + &swapmove2x(@x[0,4,1,5],4,$t0,$t2,$t3); + &swapmove2x(@x[2,6,3,7],4,$t0,$t2,$t3); +} + +$code.=<<___; +#ifndef __KERNEL__ +# include "arm_arch.h" + +# define VFP_ABI_PUSH vstmdb sp!,{d8-d15} +# define VFP_ABI_POP vldmia sp!,{d8-d15} +# define VFP_ABI_FRAME 0x40 +#else +# define VFP_ABI_PUSH +# define VFP_ABI_POP +# define VFP_ABI_FRAME 0 +# define BSAES_ASM_EXTENDED_KEY +# define XTS_CHAIN_TWEAK +# define __ARM_ARCH__ __LINUX_ARM_ARCH__ +# define __ARM_MAX_ARCH__ __LINUX_ARM_ARCH__ +#endif + +#ifdef __thumb__ +# define adrl adr +#endif + +#if __ARM_MAX_ARCH__>=7 +.arch armv7-a +.fpu neon + +.text +.syntax unified @ ARMv7-capable assembler is expected to handle this +#ifdef __thumb2__ +.thumb +#else +.code 32 +#endif + +.type _bsaes_decrypt8,%function +.align 4 +_bsaes_decrypt8: + adr $const,_bsaes_decrypt8 + vldmia $key!, {@XMM[9]} @ round 0 key + add $const,$const,#.LM0ISR-_bsaes_decrypt8 + + vldmia $const!, {@XMM[8]} @ .LM0ISR + veor @XMM[10], @XMM[0], @XMM[9] @ xor with round0 key + veor @XMM[11], @XMM[1], @XMM[9] + vtbl.8 `&Dlo(@XMM[0])`, {@XMM[10]}, `&Dlo(@XMM[8])` + vtbl.8 `&Dhi(@XMM[0])`, {@XMM[10]}, `&Dhi(@XMM[8])` + veor @XMM[12], @XMM[2], @XMM[9] + vtbl.8 `&Dlo(@XMM[1])`, {@XMM[11]}, `&Dlo(@XMM[8])` + vtbl.8 `&Dhi(@XMM[1])`, {@XMM[11]}, `&Dhi(@XMM[8])` + veor @XMM[13], @XMM[3], @XMM[9] + vtbl.8 `&Dlo(@XMM[2])`, {@XMM[12]}, `&Dlo(@XMM[8])` + vtbl.8 `&Dhi(@XMM[2])`, {@XMM[12]}, `&Dhi(@XMM[8])` + veor @XMM[14], @XMM[4], @XMM[9] + vtbl.8 `&Dlo(@XMM[3])`, {@XMM[13]}, `&Dlo(@XMM[8])` + vtbl.8 `&Dhi(@XMM[3])`, {@XMM[13]}, `&Dhi(@XMM[8])` + veor @XMM[15], @XMM[5], @XMM[9] + vtbl.8 `&Dlo(@XMM[4])`, {@XMM[14]}, `&Dlo(@XMM[8])` + vtbl.8 `&Dhi(@XMM[4])`, {@XMM[14]}, `&Dhi(@XMM[8])` + veor @XMM[10], @XMM[6], @XMM[9] + vtbl.8 `&Dlo(@XMM[5])`, {@XMM[15]}, `&Dlo(@XMM[8])` + vtbl.8 `&Dhi(@XMM[5])`, {@XMM[15]}, `&Dhi(@XMM[8])` + veor @XMM[11], @XMM[7], @XMM[9] + vtbl.8 `&Dlo(@XMM[6])`, {@XMM[10]}, `&Dlo(@XMM[8])` + vtbl.8 `&Dhi(@XMM[6])`, {@XMM[10]}, `&Dhi(@XMM[8])` + vtbl.8 `&Dlo(@XMM[7])`, {@XMM[11]}, `&Dlo(@XMM[8])` + vtbl.8 `&Dhi(@XMM[7])`, {@XMM[11]}, `&Dhi(@XMM[8])` +___ + &bitslice (@XMM[0..7, 8..11]); +$code.=<<___; + sub $rounds,$rounds,#1 + b .Ldec_sbox +.align 4 +.Ldec_loop: +___ + &ShiftRows (@XMM[0..7, 8..12]); +$code.=".Ldec_sbox:\n"; + &InvSbox (@XMM[0..7, 8..15]); +$code.=<<___; + subs $rounds,$rounds,#1 + bcc .Ldec_done +___ + &InvMixColumns (@XMM[0,1,6,4,2,7,3,5, 8..15]); +$code.=<<___; + vldmia $const, {@XMM[12]} @ .LISR + ite eq @ Thumb2 thing, sanity check in ARM + addeq $const,$const,#0x10 + bne .Ldec_loop + vldmia $const, {@XMM[12]} @ .LISRM0 + b .Ldec_loop +.align 4 +.Ldec_done: +___ + &bitslice (@XMM[0,1,6,4,2,7,3,5, 8..11]); +$code.=<<___; + vldmia $key, {@XMM[8]} @ last round key + veor @XMM[6], @XMM[6], @XMM[8] + veor @XMM[4], @XMM[4], @XMM[8] + veor @XMM[2], @XMM[2], @XMM[8] + veor @XMM[7], @XMM[7], @XMM[8] + veor @XMM[3], @XMM[3], @XMM[8] + veor @XMM[5], @XMM[5], @XMM[8] + veor @XMM[0], @XMM[0], @XMM[8] + veor @XMM[1], @XMM[1], @XMM[8] + bx lr +.size _bsaes_decrypt8,.-_bsaes_decrypt8 + +.type _bsaes_const,%object +.align 6 +_bsaes_const: +.LM0ISR: @ InvShiftRows constants + .quad 0x0a0e0206070b0f03, 0x0004080c0d010509 +.LISR: + .quad 0x0504070602010003, 0x0f0e0d0c080b0a09 +.LISRM0: + .quad 0x01040b0e0205080f, 0x0306090c00070a0d +.LM0SR: @ ShiftRows constants + .quad 0x0a0e02060f03070b, 0x0004080c05090d01 +.LSR: + .quad 0x0504070600030201, 0x0f0e0d0c0a09080b +.LSRM0: + .quad 0x0304090e00050a0f, 0x01060b0c0207080d +.LM0: + .quad 0x02060a0e03070b0f, 0x0004080c0105090d +.LREVM0SR: + .quad 0x090d01050c000408, 0x03070b0f060a0e02 +.asciz "Bit-sliced AES for NEON, CRYPTOGAMS by " +.align 6 +.size _bsaes_const,.-_bsaes_const + +.type _bsaes_encrypt8,%function +.align 4 +_bsaes_encrypt8: + adr $const,_bsaes_encrypt8 + vldmia $key!, {@XMM[9]} @ round 0 key + sub $const,$const,#_bsaes_encrypt8-.LM0SR + + vldmia $const!, {@XMM[8]} @ .LM0SR +_bsaes_encrypt8_alt: + veor @XMM[10], @XMM[0], @XMM[9] @ xor with round0 key + veor @XMM[11], @XMM[1], @XMM[9] + vtbl.8 `&Dlo(@XMM[0])`, {@XMM[10]}, `&Dlo(@XMM[8])` + vtbl.8 `&Dhi(@XMM[0])`, {@XMM[10]}, `&Dhi(@XMM[8])` + veor @XMM[12], @XMM[2], @XMM[9] + vtbl.8 `&Dlo(@XMM[1])`, {@XMM[11]}, `&Dlo(@XMM[8])` + vtbl.8 `&Dhi(@XMM[1])`, {@XMM[11]}, `&Dhi(@XMM[8])` + veor @XMM[13], @XMM[3], @XMM[9] + vtbl.8 `&Dlo(@XMM[2])`, {@XMM[12]}, `&Dlo(@XMM[8])` + vtbl.8 `&Dhi(@XMM[2])`, {@XMM[12]}, `&Dhi(@XMM[8])` + veor @XMM[14], @XMM[4], @XMM[9] + vtbl.8 `&Dlo(@XMM[3])`, {@XMM[13]}, `&Dlo(@XMM[8])` + vtbl.8 `&Dhi(@XMM[3])`, {@XMM[13]}, `&Dhi(@XMM[8])` + veor @XMM[15], @XMM[5], @XMM[9] + vtbl.8 `&Dlo(@XMM[4])`, {@XMM[14]}, `&Dlo(@XMM[8])` + vtbl.8 `&Dhi(@XMM[4])`, {@XMM[14]}, `&Dhi(@XMM[8])` + veor @XMM[10], @XMM[6], @XMM[9] + vtbl.8 `&Dlo(@XMM[5])`, {@XMM[15]}, `&Dlo(@XMM[8])` + vtbl.8 `&Dhi(@XMM[5])`, {@XMM[15]}, `&Dhi(@XMM[8])` + veor @XMM[11], @XMM[7], @XMM[9] + vtbl.8 `&Dlo(@XMM[6])`, {@XMM[10]}, `&Dlo(@XMM[8])` + vtbl.8 `&Dhi(@XMM[6])`, {@XMM[10]}, `&Dhi(@XMM[8])` + vtbl.8 `&Dlo(@XMM[7])`, {@XMM[11]}, `&Dlo(@XMM[8])` + vtbl.8 `&Dhi(@XMM[7])`, {@XMM[11]}, `&Dhi(@XMM[8])` +_bsaes_encrypt8_bitslice: +___ + &bitslice (@XMM[0..7, 8..11]); +$code.=<<___; + sub $rounds,$rounds,#1 + b .Lenc_sbox +.align 4 +.Lenc_loop: +___ + &ShiftRows (@XMM[0..7, 8..12]); +$code.=".Lenc_sbox:\n"; + &Sbox (@XMM[0..7, 8..15]); +$code.=<<___; + subs $rounds,$rounds,#1 + bcc .Lenc_done +___ + &MixColumns (@XMM[0,1,4,6,3,7,2,5, 8..15]); +$code.=<<___; + vldmia $const, {@XMM[12]} @ .LSR + ite eq @ Thumb2 thing, samity check in ARM + addeq $const,$const,#0x10 + bne .Lenc_loop + vldmia $const, {@XMM[12]} @ .LSRM0 + b .Lenc_loop +.align 4 +.Lenc_done: +___ + # output in lsb > [t0, t1, t4, t6, t3, t7, t2, t5] < msb + &bitslice (@XMM[0,1,4,6,3,7,2,5, 8..11]); +$code.=<<___; + vldmia $key, {@XMM[8]} @ last round key + veor @XMM[4], @XMM[4], @XMM[8] + veor @XMM[6], @XMM[6], @XMM[8] + veor @XMM[3], @XMM[3], @XMM[8] + veor @XMM[7], @XMM[7], @XMM[8] + veor @XMM[2], @XMM[2], @XMM[8] + veor @XMM[5], @XMM[5], @XMM[8] + veor @XMM[0], @XMM[0], @XMM[8] + veor @XMM[1], @XMM[1], @XMM[8] + bx lr +.size _bsaes_encrypt8,.-_bsaes_encrypt8 +___ +} +{ +my ($out,$inp,$rounds,$const)=("r12","r4","r5","r6"); + +sub bitslice_key { +my @x=reverse(@_[0..7]); +my ($bs0,$bs1,$bs2,$t2,$t3)=@_[8..12]; + + &swapmove (@x[0,1],1,$bs0,$t2,$t3); +$code.=<<___; + @ &swapmove(@x[2,3],1,$t0,$t2,$t3); + vmov @x[2], @x[0] + vmov @x[3], @x[1] +___ + #&swapmove2x(@x[4,5,6,7],1,$t0,$t2,$t3); + + &swapmove2x (@x[0,2,1,3],2,$bs1,$t2,$t3); +$code.=<<___; + @ &swapmove2x(@x[4,6,5,7],2,$t1,$t2,$t3); + vmov @x[4], @x[0] + vmov @x[6], @x[2] + vmov @x[5], @x[1] + vmov @x[7], @x[3] +___ + &swapmove2x (@x[0,4,1,5],4,$bs2,$t2,$t3); + &swapmove2x (@x[2,6,3,7],4,$bs2,$t2,$t3); +} + +$code.=<<___; +.type _bsaes_key_convert,%function +.align 4 +_bsaes_key_convert: + adr $const,_bsaes_key_convert + vld1.8 {@XMM[7]}, [$inp]! @ load round 0 key + sub $const,$const,#_bsaes_key_convert-.LM0 + vld1.8 {@XMM[15]}, [$inp]! @ load round 1 key + + vmov.i8 @XMM[8], #0x01 @ bit masks + vmov.i8 @XMM[9], #0x02 + vmov.i8 @XMM[10], #0x04 + vmov.i8 @XMM[11], #0x08 + vmov.i8 @XMM[12], #0x10 + vmov.i8 @XMM[13], #0x20 + vldmia $const, {@XMM[14]} @ .LM0 + +#ifdef __ARMEL__ + vrev32.8 @XMM[7], @XMM[7] + vrev32.8 @XMM[15], @XMM[15] +#endif + sub $rounds,$rounds,#1 + vstmia $out!, {@XMM[7]} @ save round 0 key + b .Lkey_loop + +.align 4 +.Lkey_loop: + vtbl.8 `&Dlo(@XMM[7])`,{@XMM[15]},`&Dlo(@XMM[14])` + vtbl.8 `&Dhi(@XMM[7])`,{@XMM[15]},`&Dhi(@XMM[14])` + vmov.i8 @XMM[6], #0x40 + vmov.i8 @XMM[15], #0x80 + + vtst.8 @XMM[0], @XMM[7], @XMM[8] + vtst.8 @XMM[1], @XMM[7], @XMM[9] + vtst.8 @XMM[2], @XMM[7], @XMM[10] + vtst.8 @XMM[3], @XMM[7], @XMM[11] + vtst.8 @XMM[4], @XMM[7], @XMM[12] + vtst.8 @XMM[5], @XMM[7], @XMM[13] + vtst.8 @XMM[6], @XMM[7], @XMM[6] + vtst.8 @XMM[7], @XMM[7], @XMM[15] + vld1.8 {@XMM[15]}, [$inp]! @ load next round key + vmvn @XMM[0], @XMM[0] @ "pnot" + vmvn @XMM[1], @XMM[1] + vmvn @XMM[5], @XMM[5] + vmvn @XMM[6], @XMM[6] +#ifdef __ARMEL__ + vrev32.8 @XMM[15], @XMM[15] +#endif + subs $rounds,$rounds,#1 + vstmia $out!,{@XMM[0]-@XMM[7]} @ write bit-sliced round key + bne .Lkey_loop + + vmov.i8 @XMM[7],#0x63 @ compose .L63 + @ don't save last round key + bx lr +.size _bsaes_key_convert,.-_bsaes_key_convert +___ +} + +if (0) { # following four functions are unsupported interface + # used for benchmarking... +$code.=<<___; +.globl bsaes_enc_key_convert +.type bsaes_enc_key_convert,%function +.align 4 +bsaes_enc_key_convert: + stmdb sp!,{r4-r6,lr} + vstmdb sp!,{d8-d15} @ ABI specification says so + + ldr r5,[$inp,#240] @ pass rounds + mov r4,$inp @ pass key + mov r12,$out @ pass key schedule + bl _bsaes_key_convert + veor @XMM[7],@XMM[7],@XMM[15] @ fix up last round key + vstmia r12, {@XMM[7]} @ save last round key + + vldmia sp!,{d8-d15} + ldmia sp!,{r4-r6,pc} +.size bsaes_enc_key_convert,.-bsaes_enc_key_convert + +.globl bsaes_encrypt_128 +.type bsaes_encrypt_128,%function +.align 4 +bsaes_encrypt_128: + stmdb sp!,{r4-r6,lr} + vstmdb sp!,{d8-d15} @ ABI specification says so +.Lenc128_loop: + vld1.8 {@XMM[0]-@XMM[1]}, [$inp]! @ load input + vld1.8 {@XMM[2]-@XMM[3]}, [$inp]! + mov r4,$key @ pass the key + vld1.8 {@XMM[4]-@XMM[5]}, [$inp]! + mov r5,#10 @ pass rounds + vld1.8 {@XMM[6]-@XMM[7]}, [$inp]! + + bl _bsaes_encrypt8 + + vst1.8 {@XMM[0]-@XMM[1]}, [$out]! @ write output + vst1.8 {@XMM[4]}, [$out]! + vst1.8 {@XMM[6]}, [$out]! + vst1.8 {@XMM[3]}, [$out]! + vst1.8 {@XMM[7]}, [$out]! + vst1.8 {@XMM[2]}, [$out]! + subs $len,$len,#0x80 + vst1.8 {@XMM[5]}, [$out]! + bhi .Lenc128_loop + + vldmia sp!,{d8-d15} + ldmia sp!,{r4-r6,pc} +.size bsaes_encrypt_128,.-bsaes_encrypt_128 + +.globl bsaes_dec_key_convert +.type bsaes_dec_key_convert,%function +.align 4 +bsaes_dec_key_convert: + stmdb sp!,{r4-r6,lr} + vstmdb sp!,{d8-d15} @ ABI specification says so + + ldr r5,[$inp,#240] @ pass rounds + mov r4,$inp @ pass key + mov r12,$out @ pass key schedule + bl _bsaes_key_convert + vldmia $out, {@XMM[6]} + vstmia r12, {@XMM[15]} @ save last round key + veor @XMM[7], @XMM[7], @XMM[6] @ fix up round 0 key + vstmia $out, {@XMM[7]} + + vldmia sp!,{d8-d15} + ldmia sp!,{r4-r6,pc} +.size bsaes_dec_key_convert,.-bsaes_dec_key_convert + +.globl bsaes_decrypt_128 +.type bsaes_decrypt_128,%function +.align 4 +bsaes_decrypt_128: + stmdb sp!,{r4-r6,lr} + vstmdb sp!,{d8-d15} @ ABI specification says so +.Ldec128_loop: + vld1.8 {@XMM[0]-@XMM[1]}, [$inp]! @ load input + vld1.8 {@XMM[2]-@XMM[3]}, [$inp]! + mov r4,$key @ pass the key + vld1.8 {@XMM[4]-@XMM[5]}, [$inp]! + mov r5,#10 @ pass rounds + vld1.8 {@XMM[6]-@XMM[7]}, [$inp]! + + bl _bsaes_decrypt8 + + vst1.8 {@XMM[0]-@XMM[1]}, [$out]! @ write output + vst1.8 {@XMM[6]}, [$out]! + vst1.8 {@XMM[4]}, [$out]! + vst1.8 {@XMM[2]}, [$out]! + vst1.8 {@XMM[7]}, [$out]! + vst1.8 {@XMM[3]}, [$out]! + subs $len,$len,#0x80 + vst1.8 {@XMM[5]}, [$out]! + bhi .Ldec128_loop + + vldmia sp!,{d8-d15} + ldmia sp!,{r4-r6,pc} +.size bsaes_decrypt_128,.-bsaes_decrypt_128 +___ +} +{ +my ($inp,$out,$len,$key, $ivp,$fp,$rounds)=map("r$_",(0..3,8..10)); +my ($keysched)=("sp"); + +$code.=<<___; +.extern AES_cbc_encrypt +.extern AES_decrypt + +.global bsaes_cbc_encrypt +.type bsaes_cbc_encrypt,%function +.align 5 +bsaes_cbc_encrypt: +#ifndef __KERNEL__ + cmp $len, #128 +#ifndef __thumb__ + blo AES_cbc_encrypt +#else + bhs 1f + b AES_cbc_encrypt +1: +#endif +#endif + + @ it is up to the caller to make sure we are called with enc == 0 + + mov ip, sp + stmdb sp!, {r4-r10, lr} + VFP_ABI_PUSH + ldr $ivp, [ip] @ IV is 1st arg on the stack + mov $len, $len, lsr#4 @ len in 16 byte blocks + sub sp, #0x10 @ scratch space to carry over the IV + mov $fp, sp @ save sp + + ldr $rounds, [$key, #240] @ get # of rounds +#ifndef BSAES_ASM_EXTENDED_KEY + @ allocate the key schedule on the stack + sub r12, sp, $rounds, lsl#7 @ 128 bytes per inner round key + add r12, #`128-32` @ sifze of bit-slices key schedule + + @ populate the key schedule + mov r4, $key @ pass key + mov r5, $rounds @ pass # of rounds + mov sp, r12 @ sp is $keysched + bl _bsaes_key_convert + vldmia $keysched, {@XMM[6]} + vstmia r12, {@XMM[15]} @ save last round key + veor @XMM[7], @XMM[7], @XMM[6] @ fix up round 0 key + vstmia $keysched, {@XMM[7]} +#else + ldr r12, [$key, #244] + eors r12, #1 + beq 0f + + @ populate the key schedule + str r12, [$key, #244] + mov r4, $key @ pass key + mov r5, $rounds @ pass # of rounds + add r12, $key, #248 @ pass key schedule + bl _bsaes_key_convert + add r4, $key, #248 + vldmia r4, {@XMM[6]} + vstmia r12, {@XMM[15]} @ save last round key + veor @XMM[7], @XMM[7], @XMM[6] @ fix up round 0 key + vstmia r4, {@XMM[7]} + +.align 2 +0: +#endif + + vld1.8 {@XMM[15]}, [$ivp] @ load IV + b .Lcbc_dec_loop + +.align 4 +.Lcbc_dec_loop: + subs $len, $len, #0x8 + bmi .Lcbc_dec_loop_finish + + vld1.8 {@XMM[0]-@XMM[1]}, [$inp]! @ load input + vld1.8 {@XMM[2]-@XMM[3]}, [$inp]! +#ifndef BSAES_ASM_EXTENDED_KEY + mov r4, $keysched @ pass the key +#else + add r4, $key, #248 +#endif + vld1.8 {@XMM[4]-@XMM[5]}, [$inp]! + mov r5, $rounds + vld1.8 {@XMM[6]-@XMM[7]}, [$inp] + sub $inp, $inp, #0x60 + vstmia $fp, {@XMM[15]} @ put aside IV + + bl _bsaes_decrypt8 + + vldmia $fp, {@XMM[14]} @ reload IV + vld1.8 {@XMM[8]-@XMM[9]}, [$inp]! @ reload input + veor @XMM[0], @XMM[0], @XMM[14] @ ^= IV + vld1.8 {@XMM[10]-@XMM[11]}, [$inp]! + veor @XMM[1], @XMM[1], @XMM[8] + veor @XMM[6], @XMM[6], @XMM[9] + vld1.8 {@XMM[12]-@XMM[13]}, [$inp]! + veor @XMM[4], @XMM[4], @XMM[10] + veor @XMM[2], @XMM[2], @XMM[11] + vld1.8 {@XMM[14]-@XMM[15]}, [$inp]! + veor @XMM[7], @XMM[7], @XMM[12] + vst1.8 {@XMM[0]-@XMM[1]}, [$out]! @ write output + veor @XMM[3], @XMM[3], @XMM[13] + vst1.8 {@XMM[6]}, [$out]! + veor @XMM[5], @XMM[5], @XMM[14] + vst1.8 {@XMM[4]}, [$out]! + vst1.8 {@XMM[2]}, [$out]! + vst1.8 {@XMM[7]}, [$out]! + vst1.8 {@XMM[3]}, [$out]! + vst1.8 {@XMM[5]}, [$out]! + + b .Lcbc_dec_loop + +.Lcbc_dec_loop_finish: + adds $len, $len, #8 + beq .Lcbc_dec_done + + vld1.8 {@XMM[0]}, [$inp]! @ load input + cmp $len, #2 + blo .Lcbc_dec_one + vld1.8 {@XMM[1]}, [$inp]! +#ifndef BSAES_ASM_EXTENDED_KEY + mov r4, $keysched @ pass the key +#else + add r4, $key, #248 +#endif + mov r5, $rounds + vstmia $fp, {@XMM[15]} @ put aside IV + beq .Lcbc_dec_two + vld1.8 {@XMM[2]}, [$inp]! + cmp $len, #4 + blo .Lcbc_dec_three + vld1.8 {@XMM[3]}, [$inp]! + beq .Lcbc_dec_four + vld1.8 {@XMM[4]}, [$inp]! + cmp $len, #6 + blo .Lcbc_dec_five + vld1.8 {@XMM[5]}, [$inp]! + beq .Lcbc_dec_six + vld1.8 {@XMM[6]}, [$inp]! + sub $inp, $inp, #0x70 + + bl _bsaes_decrypt8 + + vldmia $fp, {@XMM[14]} @ reload IV + vld1.8 {@XMM[8]-@XMM[9]}, [$inp]! @ reload input + veor @XMM[0], @XMM[0], @XMM[14] @ ^= IV + vld1.8 {@XMM[10]-@XMM[11]}, [$inp]! + veor @XMM[1], @XMM[1], @XMM[8] + veor @XMM[6], @XMM[6], @XMM[9] + vld1.8 {@XMM[12]-@XMM[13]}, [$inp]! + veor @XMM[4], @XMM[4], @XMM[10] + veor @XMM[2], @XMM[2], @XMM[11] + vld1.8 {@XMM[15]}, [$inp]! + veor @XMM[7], @XMM[7], @XMM[12] + vst1.8 {@XMM[0]-@XMM[1]}, [$out]! @ write output + veor @XMM[3], @XMM[3], @XMM[13] + vst1.8 {@XMM[6]}, [$out]! + vst1.8 {@XMM[4]}, [$out]! + vst1.8 {@XMM[2]}, [$out]! + vst1.8 {@XMM[7]}, [$out]! + vst1.8 {@XMM[3]}, [$out]! + b .Lcbc_dec_done +.align 4 +.Lcbc_dec_six: + sub $inp, $inp, #0x60 + bl _bsaes_decrypt8 + vldmia $fp,{@XMM[14]} @ reload IV + vld1.8 {@XMM[8]-@XMM[9]}, [$inp]! @ reload input + veor @XMM[0], @XMM[0], @XMM[14] @ ^= IV + vld1.8 {@XMM[10]-@XMM[11]}, [$inp]! + veor @XMM[1], @XMM[1], @XMM[8] + veor @XMM[6], @XMM[6], @XMM[9] + vld1.8 {@XMM[12]}, [$inp]! + veor @XMM[4], @XMM[4], @XMM[10] + veor @XMM[2], @XMM[2], @XMM[11] + vld1.8 {@XMM[15]}, [$inp]! + veor @XMM[7], @XMM[7], @XMM[12] + vst1.8 {@XMM[0]-@XMM[1]}, [$out]! @ write output + vst1.8 {@XMM[6]}, [$out]! + vst1.8 {@XMM[4]}, [$out]! + vst1.8 {@XMM[2]}, [$out]! + vst1.8 {@XMM[7]}, [$out]! + b .Lcbc_dec_done +.align 4 +.Lcbc_dec_five: + sub $inp, $inp, #0x50 + bl _bsaes_decrypt8 + vldmia $fp, {@XMM[14]} @ reload IV + vld1.8 {@XMM[8]-@XMM[9]}, [$inp]! @ reload input + veor @XMM[0], @XMM[0], @XMM[14] @ ^= IV + vld1.8 {@XMM[10]-@XMM[11]}, [$inp]! + veor @XMM[1], @XMM[1], @XMM[8] + veor @XMM[6], @XMM[6], @XMM[9] + vld1.8 {@XMM[15]}, [$inp]! + veor @XMM[4], @XMM[4], @XMM[10] + vst1.8 {@XMM[0]-@XMM[1]}, [$out]! @ write output + veor @XMM[2], @XMM[2], @XMM[11] + vst1.8 {@XMM[6]}, [$out]! + vst1.8 {@XMM[4]}, [$out]! + vst1.8 {@XMM[2]}, [$out]! + b .Lcbc_dec_done +.align 4 +.Lcbc_dec_four: + sub $inp, $inp, #0x40 + bl _bsaes_decrypt8 + vldmia $fp, {@XMM[14]} @ reload IV + vld1.8 {@XMM[8]-@XMM[9]}, [$inp]! @ reload input + veor @XMM[0], @XMM[0], @XMM[14] @ ^= IV + vld1.8 {@XMM[10]}, [$inp]! + veor @XMM[1], @XMM[1], @XMM[8] + veor @XMM[6], @XMM[6], @XMM[9] + vld1.8 {@XMM[15]}, [$inp]! + veor @XMM[4], @XMM[4], @XMM[10] + vst1.8 {@XMM[0]-@XMM[1]}, [$out]! @ write output + vst1.8 {@XMM[6]}, [$out]! + vst1.8 {@XMM[4]}, [$out]! + b .Lcbc_dec_done +.align 4 +.Lcbc_dec_three: + sub $inp, $inp, #0x30 + bl _bsaes_decrypt8 + vldmia $fp, {@XMM[14]} @ reload IV + vld1.8 {@XMM[8]-@XMM[9]}, [$inp]! @ reload input + veor @XMM[0], @XMM[0], @XMM[14] @ ^= IV + vld1.8 {@XMM[15]}, [$inp]! + veor @XMM[1], @XMM[1], @XMM[8] + veor @XMM[6], @XMM[6], @XMM[9] + vst1.8 {@XMM[0]-@XMM[1]}, [$out]! @ write output + vst1.8 {@XMM[6]}, [$out]! + b .Lcbc_dec_done +.align 4 +.Lcbc_dec_two: + sub $inp, $inp, #0x20 + bl _bsaes_decrypt8 + vldmia $fp, {@XMM[14]} @ reload IV + vld1.8 {@XMM[8]}, [$inp]! @ reload input + veor @XMM[0], @XMM[0], @XMM[14] @ ^= IV + vld1.8 {@XMM[15]}, [$inp]! @ reload input + veor @XMM[1], @XMM[1], @XMM[8] + vst1.8 {@XMM[0]-@XMM[1]}, [$out]! @ write output + b .Lcbc_dec_done +.align 4 +.Lcbc_dec_one: + sub $inp, $inp, #0x10 + mov $rounds, $out @ save original out pointer + mov $out, $fp @ use the iv scratch space as out buffer + mov r2, $key + vmov @XMM[4],@XMM[15] @ just in case ensure that IV + vmov @XMM[5],@XMM[0] @ and input are preserved + bl AES_decrypt + vld1.8 {@XMM[0]}, [$fp,:64] @ load result + veor @XMM[0], @XMM[0], @XMM[4] @ ^= IV + vmov @XMM[15], @XMM[5] @ @XMM[5] holds input + vst1.8 {@XMM[0]}, [$rounds] @ write output + +.Lcbc_dec_done: +#ifndef BSAES_ASM_EXTENDED_KEY + vmov.i32 q0, #0 + vmov.i32 q1, #0 +.Lcbc_dec_bzero: @ wipe key schedule [if any] + vstmia $keysched!, {q0-q1} + cmp $keysched, $fp + bne .Lcbc_dec_bzero +#endif + + mov sp, $fp + add sp, #0x10 @ add sp,$fp,#0x10 is no good for thumb + vst1.8 {@XMM[15]}, [$ivp] @ return IV + VFP_ABI_POP + ldmia sp!, {r4-r10, pc} +.size bsaes_cbc_encrypt,.-bsaes_cbc_encrypt +___ +} +{ +my ($inp,$out,$len,$key, $ctr,$fp,$rounds)=(map("r$_",(0..3,8..10))); +my $const = "r6"; # shared with _bsaes_encrypt8_alt +my $keysched = "sp"; + +$code.=<<___; +.extern AES_encrypt +.global bsaes_ctr32_encrypt_blocks +.type bsaes_ctr32_encrypt_blocks,%function +.align 5 +bsaes_ctr32_encrypt_blocks: + cmp $len, #8 @ use plain AES for + blo .Lctr_enc_short @ small sizes + + mov ip, sp + stmdb sp!, {r4-r10, lr} + VFP_ABI_PUSH + ldr $ctr, [ip] @ ctr is 1st arg on the stack + sub sp, sp, #0x10 @ scratch space to carry over the ctr + mov $fp, sp @ save sp + + ldr $rounds, [$key, #240] @ get # of rounds +#ifndef BSAES_ASM_EXTENDED_KEY + @ allocate the key schedule on the stack + sub r12, sp, $rounds, lsl#7 @ 128 bytes per inner round key + add r12, #`128-32` @ size of bit-sliced key schedule + + @ populate the key schedule + mov r4, $key @ pass key + mov r5, $rounds @ pass # of rounds + mov sp, r12 @ sp is $keysched + bl _bsaes_key_convert + veor @XMM[7],@XMM[7],@XMM[15] @ fix up last round key + vstmia r12, {@XMM[7]} @ save last round key + + vld1.8 {@XMM[0]}, [$ctr] @ load counter + add $ctr, $const, #.LREVM0SR-.LM0 @ borrow $ctr + vldmia $keysched, {@XMM[4]} @ load round0 key +#else + ldr r12, [$key, #244] + eors r12, #1 + beq 0f + + @ populate the key schedule + str r12, [$key, #244] + mov r4, $key @ pass key + mov r5, $rounds @ pass # of rounds + add r12, $key, #248 @ pass key schedule + bl _bsaes_key_convert + veor @XMM[7],@XMM[7],@XMM[15] @ fix up last round key + vstmia r12, {@XMM[7]} @ save last round key + +.align 2 +0: add r12, $key, #248 + vld1.8 {@XMM[0]}, [$ctr] @ load counter + adrl $ctr, .LREVM0SR @ borrow $ctr + vldmia r12, {@XMM[4]} @ load round0 key + sub sp, #0x10 @ place for adjusted round0 key +#endif + + vmov.i32 @XMM[8],#1 @ compose 1<<96 + veor @XMM[9],@XMM[9],@XMM[9] + vrev32.8 @XMM[0],@XMM[0] + vext.8 @XMM[8],@XMM[9],@XMM[8],#4 + vrev32.8 @XMM[4],@XMM[4] + vadd.u32 @XMM[9],@XMM[8],@XMM[8] @ compose 2<<96 + vstmia $keysched, {@XMM[4]} @ save adjusted round0 key + b .Lctr_enc_loop + +.align 4 +.Lctr_enc_loop: + vadd.u32 @XMM[10], @XMM[8], @XMM[9] @ compose 3<<96 + vadd.u32 @XMM[1], @XMM[0], @XMM[8] @ +1 + vadd.u32 @XMM[2], @XMM[0], @XMM[9] @ +2 + vadd.u32 @XMM[3], @XMM[0], @XMM[10] @ +3 + vadd.u32 @XMM[4], @XMM[1], @XMM[10] + vadd.u32 @XMM[5], @XMM[2], @XMM[10] + vadd.u32 @XMM[6], @XMM[3], @XMM[10] + vadd.u32 @XMM[7], @XMM[4], @XMM[10] + vadd.u32 @XMM[10], @XMM[5], @XMM[10] @ next counter + + @ Borrow prologue from _bsaes_encrypt8 to use the opportunity + @ to flip byte order in 32-bit counter + + vldmia $keysched, {@XMM[9]} @ load round0 key +#ifndef BSAES_ASM_EXTENDED_KEY + add r4, $keysched, #0x10 @ pass next round key +#else + add r4, $key, #`248+16` +#endif + vldmia $ctr, {@XMM[8]} @ .LREVM0SR + mov r5, $rounds @ pass rounds + vstmia $fp, {@XMM[10]} @ save next counter + sub $const, $ctr, #.LREVM0SR-.LSR @ pass constants + + bl _bsaes_encrypt8_alt + + subs $len, $len, #8 + blo .Lctr_enc_loop_done + + vld1.8 {@XMM[8]-@XMM[9]}, [$inp]! @ load input + vld1.8 {@XMM[10]-@XMM[11]}, [$inp]! + veor @XMM[0], @XMM[8] + veor @XMM[1], @XMM[9] + vld1.8 {@XMM[12]-@XMM[13]}, [$inp]! + veor @XMM[4], @XMM[10] + veor @XMM[6], @XMM[11] + vld1.8 {@XMM[14]-@XMM[15]}, [$inp]! + veor @XMM[3], @XMM[12] + vst1.8 {@XMM[0]-@XMM[1]}, [$out]! @ write output + veor @XMM[7], @XMM[13] + veor @XMM[2], @XMM[14] + vst1.8 {@XMM[4]}, [$out]! + veor @XMM[5], @XMM[15] + vst1.8 {@XMM[6]}, [$out]! + vmov.i32 @XMM[8], #1 @ compose 1<<96 + vst1.8 {@XMM[3]}, [$out]! + veor @XMM[9], @XMM[9], @XMM[9] + vst1.8 {@XMM[7]}, [$out]! + vext.8 @XMM[8], @XMM[9], @XMM[8], #4 + vst1.8 {@XMM[2]}, [$out]! + vadd.u32 @XMM[9],@XMM[8],@XMM[8] @ compose 2<<96 + vst1.8 {@XMM[5]}, [$out]! + vldmia $fp, {@XMM[0]} @ load counter + + bne .Lctr_enc_loop + b .Lctr_enc_done + +.align 4 +.Lctr_enc_loop_done: + add $len, $len, #8 + vld1.8 {@XMM[8]}, [$inp]! @ load input + veor @XMM[0], @XMM[8] + vst1.8 {@XMM[0]}, [$out]! @ write output + cmp $len, #2 + blo .Lctr_enc_done + vld1.8 {@XMM[9]}, [$inp]! + veor @XMM[1], @XMM[9] + vst1.8 {@XMM[1]}, [$out]! + beq .Lctr_enc_done + vld1.8 {@XMM[10]}, [$inp]! + veor @XMM[4], @XMM[10] + vst1.8 {@XMM[4]}, [$out]! + cmp $len, #4 + blo .Lctr_enc_done + vld1.8 {@XMM[11]}, [$inp]! + veor @XMM[6], @XMM[11] + vst1.8 {@XMM[6]}, [$out]! + beq .Lctr_enc_done + vld1.8 {@XMM[12]}, [$inp]! + veor @XMM[3], @XMM[12] + vst1.8 {@XMM[3]}, [$out]! + cmp $len, #6 + blo .Lctr_enc_done + vld1.8 {@XMM[13]}, [$inp]! + veor @XMM[7], @XMM[13] + vst1.8 {@XMM[7]}, [$out]! + beq .Lctr_enc_done + vld1.8 {@XMM[14]}, [$inp] + veor @XMM[2], @XMM[14] + vst1.8 {@XMM[2]}, [$out]! + +.Lctr_enc_done: + vmov.i32 q0, #0 + vmov.i32 q1, #0 +#ifndef BSAES_ASM_EXTENDED_KEY +.Lctr_enc_bzero: @ wipe key schedule [if any] + vstmia $keysched!, {q0-q1} + cmp $keysched, $fp + bne .Lctr_enc_bzero +#else + vstmia $keysched, {q0-q1} +#endif + + mov sp, $fp + add sp, #0x10 @ add sp,$fp,#0x10 is no good for thumb + VFP_ABI_POP + ldmia sp!, {r4-r10, pc} @ return + +.align 4 +.Lctr_enc_short: + ldr ip, [sp] @ ctr pointer is passed on stack + stmdb sp!, {r4-r8, lr} + + mov r4, $inp @ copy arguments + mov r5, $out + mov r6, $len + mov r7, $key + ldr r8, [ip, #12] @ load counter LSW + vld1.8 {@XMM[1]}, [ip] @ load whole counter value +#ifdef __ARMEL__ + rev r8, r8 +#endif + sub sp, sp, #0x10 + vst1.8 {@XMM[1]}, [sp,:64] @ copy counter value + sub sp, sp, #0x10 + +.Lctr_enc_short_loop: + add r0, sp, #0x10 @ input counter value + mov r1, sp @ output on the stack + mov r2, r7 @ key + + bl AES_encrypt + + vld1.8 {@XMM[0]}, [r4]! @ load input + vld1.8 {@XMM[1]}, [sp,:64] @ load encrypted counter + add r8, r8, #1 +#ifdef __ARMEL__ + rev r0, r8 + str r0, [sp, #0x1c] @ next counter value +#else + str r8, [sp, #0x1c] @ next counter value +#endif + veor @XMM[0],@XMM[0],@XMM[1] + vst1.8 {@XMM[0]}, [r5]! @ store output + subs r6, r6, #1 + bne .Lctr_enc_short_loop + + vmov.i32 q0, #0 + vmov.i32 q1, #0 + vstmia sp!, {q0-q1} + + ldmia sp!, {r4-r8, pc} +.size bsaes_ctr32_encrypt_blocks,.-bsaes_ctr32_encrypt_blocks +___ +} +{ +###################################################################### +# void bsaes_xts_[en|de]crypt(const char *inp,char *out,size_t len, +# const AES_KEY *key1, const AES_KEY *key2, +# const unsigned char iv[16]); +# +my ($inp,$out,$len,$key,$rounds,$magic,$fp)=(map("r$_",(7..10,1..3))); +my $const="r6"; # returned by _bsaes_key_convert +my $twmask=@XMM[5]; +my @T=@XMM[6..7]; + +$code.=<<___; +.globl bsaes_xts_encrypt +.type bsaes_xts_encrypt,%function +.align 4 +bsaes_xts_encrypt: + mov ip, sp + stmdb sp!, {r4-r10, lr} @ 0x20 + VFP_ABI_PUSH + mov r6, sp @ future $fp + + mov $inp, r0 + mov $out, r1 + mov $len, r2 + mov $key, r3 + + sub r0, sp, #0x10 @ 0x10 + bic r0, #0xf @ align at 16 bytes + mov sp, r0 + +#ifdef XTS_CHAIN_TWEAK + ldr r0, [ip] @ pointer to input tweak +#else + @ generate initial tweak + ldr r0, [ip, #4] @ iv[] + mov r1, sp + ldr r2, [ip, #0] @ key2 + bl AES_encrypt + mov r0,sp @ pointer to initial tweak +#endif + + ldr $rounds, [$key, #240] @ get # of rounds + mov $fp, r6 +#ifndef BSAES_ASM_EXTENDED_KEY + @ allocate the key schedule on the stack + sub r12, sp, $rounds, lsl#7 @ 128 bytes per inner round key + @ add r12, #`128-32` @ size of bit-sliced key schedule + sub r12, #`32+16` @ place for tweak[9] + + @ populate the key schedule + mov r4, $key @ pass key + mov r5, $rounds @ pass # of rounds + mov sp, r12 + add r12, #0x90 @ pass key schedule + bl _bsaes_key_convert + veor @XMM[7], @XMM[7], @XMM[15] @ fix up last round key + vstmia r12, {@XMM[7]} @ save last round key +#else + ldr r12, [$key, #244] + eors r12, #1 + beq 0f + + str r12, [$key, #244] + mov r4, $key @ pass key + mov r5, $rounds @ pass # of rounds + add r12, $key, #248 @ pass key schedule + bl _bsaes_key_convert + veor @XMM[7], @XMM[7], @XMM[15] @ fix up last round key + vstmia r12, {@XMM[7]} + +.align 2 +0: sub sp, #0x90 @ place for tweak[9] +#endif + + vld1.8 {@XMM[8]}, [r0] @ initial tweak + adr $magic, .Lxts_magic + + subs $len, #0x80 + blo .Lxts_enc_short + b .Lxts_enc_loop + +.align 4 +.Lxts_enc_loop: + vldmia $magic, {$twmask} @ load XTS magic + vshr.s64 @T[0], @XMM[8], #63 + mov r0, sp + vand @T[0], @T[0], $twmask +___ +for($i=9;$i<16;$i++) { +$code.=<<___; + vadd.u64 @XMM[$i], @XMM[$i-1], @XMM[$i-1] + vst1.64 {@XMM[$i-1]}, [r0,:128]! + vswp `&Dhi("@T[0]")`,`&Dlo("@T[0]")` + vshr.s64 @T[1], @XMM[$i], #63 + veor @XMM[$i], @XMM[$i], @T[0] + vand @T[1], @T[1], $twmask +___ + @T=reverse(@T); + +$code.=<<___ if ($i>=10); + vld1.8 {@XMM[$i-10]}, [$inp]! +___ +$code.=<<___ if ($i>=11); + veor @XMM[$i-11], @XMM[$i-11], @XMM[$i-3] +___ +} +$code.=<<___; + vadd.u64 @XMM[8], @XMM[15], @XMM[15] + vst1.64 {@XMM[15]}, [r0,:128]! + vswp `&Dhi("@T[0]")`,`&Dlo("@T[0]")` + veor @XMM[8], @XMM[8], @T[0] + vst1.64 {@XMM[8]}, [r0,:128] @ next round tweak + + vld1.8 {@XMM[6]-@XMM[7]}, [$inp]! + veor @XMM[5], @XMM[5], @XMM[13] +#ifndef BSAES_ASM_EXTENDED_KEY + add r4, sp, #0x90 @ pass key schedule +#else + add r4, $key, #248 @ pass key schedule +#endif + veor @XMM[6], @XMM[6], @XMM[14] + mov r5, $rounds @ pass rounds + veor @XMM[7], @XMM[7], @XMM[15] + mov r0, sp + + bl _bsaes_encrypt8 + + vld1.64 {@XMM[ 8]-@XMM[ 9]}, [r0,:128]! + vld1.64 {@XMM[10]-@XMM[11]}, [r0,:128]! + veor @XMM[0], @XMM[0], @XMM[ 8] + vld1.64 {@XMM[12]-@XMM[13]}, [r0,:128]! + veor @XMM[1], @XMM[1], @XMM[ 9] + veor @XMM[8], @XMM[4], @XMM[10] + vst1.8 {@XMM[0]-@XMM[1]}, [$out]! + veor @XMM[9], @XMM[6], @XMM[11] + vld1.64 {@XMM[14]-@XMM[15]}, [r0,:128]! + veor @XMM[10], @XMM[3], @XMM[12] + vst1.8 {@XMM[8]-@XMM[9]}, [$out]! + veor @XMM[11], @XMM[7], @XMM[13] + veor @XMM[12], @XMM[2], @XMM[14] + vst1.8 {@XMM[10]-@XMM[11]}, [$out]! + veor @XMM[13], @XMM[5], @XMM[15] + vst1.8 {@XMM[12]-@XMM[13]}, [$out]! + + vld1.64 {@XMM[8]}, [r0,:128] @ next round tweak + + subs $len, #0x80 + bpl .Lxts_enc_loop + +.Lxts_enc_short: + adds $len, #0x70 + bmi .Lxts_enc_done + + vldmia $magic, {$twmask} @ load XTS magic + vshr.s64 @T[0], @XMM[8], #63 + mov r0, sp + vand @T[0], @T[0], $twmask +___ +for($i=9;$i<16;$i++) { +$code.=<<___; + vadd.u64 @XMM[$i], @XMM[$i-1], @XMM[$i-1] + vst1.64 {@XMM[$i-1]}, [r0,:128]! + vswp `&Dhi("@T[0]")`,`&Dlo("@T[0]")` + vshr.s64 @T[1], @XMM[$i], #63 + veor @XMM[$i], @XMM[$i], @T[0] + vand @T[1], @T[1], $twmask +___ + @T=reverse(@T); + +$code.=<<___ if ($i>=10); + vld1.8 {@XMM[$i-10]}, [$inp]! + subs $len, #0x10 + bmi .Lxts_enc_`$i-9` +___ +$code.=<<___ if ($i>=11); + veor @XMM[$i-11], @XMM[$i-11], @XMM[$i-3] +___ +} +$code.=<<___; + sub $len, #0x10 + vst1.64 {@XMM[15]}, [r0,:128] @ next round tweak + + vld1.8 {@XMM[6]}, [$inp]! + veor @XMM[5], @XMM[5], @XMM[13] +#ifndef BSAES_ASM_EXTENDED_KEY + add r4, sp, #0x90 @ pass key schedule +#else + add r4, $key, #248 @ pass key schedule +#endif + veor @XMM[6], @XMM[6], @XMM[14] + mov r5, $rounds @ pass rounds + mov r0, sp + + bl _bsaes_encrypt8 + + vld1.64 {@XMM[ 8]-@XMM[ 9]}, [r0,:128]! + vld1.64 {@XMM[10]-@XMM[11]}, [r0,:128]! + veor @XMM[0], @XMM[0], @XMM[ 8] + vld1.64 {@XMM[12]-@XMM[13]}, [r0,:128]! + veor @XMM[1], @XMM[1], @XMM[ 9] + veor @XMM[8], @XMM[4], @XMM[10] + vst1.8 {@XMM[0]-@XMM[1]}, [$out]! + veor @XMM[9], @XMM[6], @XMM[11] + vld1.64 {@XMM[14]}, [r0,:128]! + veor @XMM[10], @XMM[3], @XMM[12] + vst1.8 {@XMM[8]-@XMM[9]}, [$out]! + veor @XMM[11], @XMM[7], @XMM[13] + veor @XMM[12], @XMM[2], @XMM[14] + vst1.8 {@XMM[10]-@XMM[11]}, [$out]! + vst1.8 {@XMM[12]}, [$out]! + + vld1.64 {@XMM[8]}, [r0,:128] @ next round tweak + b .Lxts_enc_done +.align 4 +.Lxts_enc_6: + vst1.64 {@XMM[14]}, [r0,:128] @ next round tweak + + veor @XMM[4], @XMM[4], @XMM[12] +#ifndef BSAES_ASM_EXTENDED_KEY + add r4, sp, #0x90 @ pass key schedule +#else + add r4, $key, #248 @ pass key schedule +#endif + veor @XMM[5], @XMM[5], @XMM[13] + mov r5, $rounds @ pass rounds + mov r0, sp + + bl _bsaes_encrypt8 + + vld1.64 {@XMM[ 8]-@XMM[ 9]}, [r0,:128]! + vld1.64 {@XMM[10]-@XMM[11]}, [r0,:128]! + veor @XMM[0], @XMM[0], @XMM[ 8] + vld1.64 {@XMM[12]-@XMM[13]}, [r0,:128]! + veor @XMM[1], @XMM[1], @XMM[ 9] + veor @XMM[8], @XMM[4], @XMM[10] + vst1.8 {@XMM[0]-@XMM[1]}, [$out]! + veor @XMM[9], @XMM[6], @XMM[11] + veor @XMM[10], @XMM[3], @XMM[12] + vst1.8 {@XMM[8]-@XMM[9]}, [$out]! + veor @XMM[11], @XMM[7], @XMM[13] + vst1.8 {@XMM[10]-@XMM[11]}, [$out]! + + vld1.64 {@XMM[8]}, [r0,:128] @ next round tweak + b .Lxts_enc_done + +@ put this in range for both ARM and Thumb mode adr instructions +.align 5 +.Lxts_magic: + .quad 1, 0x87 + +.align 5 +.Lxts_enc_5: + vst1.64 {@XMM[13]}, [r0,:128] @ next round tweak + + veor @XMM[3], @XMM[3], @XMM[11] +#ifndef BSAES_ASM_EXTENDED_KEY + add r4, sp, #0x90 @ pass key schedule +#else + add r4, $key, #248 @ pass key schedule +#endif + veor @XMM[4], @XMM[4], @XMM[12] + mov r5, $rounds @ pass rounds + mov r0, sp + + bl _bsaes_encrypt8 + + vld1.64 {@XMM[ 8]-@XMM[ 9]}, [r0,:128]! + vld1.64 {@XMM[10]-@XMM[11]}, [r0,:128]! + veor @XMM[0], @XMM[0], @XMM[ 8] + vld1.64 {@XMM[12]}, [r0,:128]! + veor @XMM[1], @XMM[1], @XMM[ 9] + veor @XMM[8], @XMM[4], @XMM[10] + vst1.8 {@XMM[0]-@XMM[1]}, [$out]! + veor @XMM[9], @XMM[6], @XMM[11] + veor @XMM[10], @XMM[3], @XMM[12] + vst1.8 {@XMM[8]-@XMM[9]}, [$out]! + vst1.8 {@XMM[10]}, [$out]! + + vld1.64 {@XMM[8]}, [r0,:128] @ next round tweak + b .Lxts_enc_done +.align 4 +.Lxts_enc_4: + vst1.64 {@XMM[12]}, [r0,:128] @ next round tweak + + veor @XMM[2], @XMM[2], @XMM[10] +#ifndef BSAES_ASM_EXTENDED_KEY + add r4, sp, #0x90 @ pass key schedule +#else + add r4, $key, #248 @ pass key schedule +#endif + veor @XMM[3], @XMM[3], @XMM[11] + mov r5, $rounds @ pass rounds + mov r0, sp + + bl _bsaes_encrypt8 + + vld1.64 {@XMM[ 8]-@XMM[ 9]}, [r0,:128]! + vld1.64 {@XMM[10]-@XMM[11]}, [r0,:128]! + veor @XMM[0], @XMM[0], @XMM[ 8] + veor @XMM[1], @XMM[1], @XMM[ 9] + veor @XMM[8], @XMM[4], @XMM[10] + vst1.8 {@XMM[0]-@XMM[1]}, [$out]! + veor @XMM[9], @XMM[6], @XMM[11] + vst1.8 {@XMM[8]-@XMM[9]}, [$out]! + + vld1.64 {@XMM[8]}, [r0,:128] @ next round tweak + b .Lxts_enc_done +.align 4 +.Lxts_enc_3: + vst1.64 {@XMM[11]}, [r0,:128] @ next round tweak + + veor @XMM[1], @XMM[1], @XMM[9] +#ifndef BSAES_ASM_EXTENDED_KEY + add r4, sp, #0x90 @ pass key schedule +#else + add r4, $key, #248 @ pass key schedule +#endif + veor @XMM[2], @XMM[2], @XMM[10] + mov r5, $rounds @ pass rounds + mov r0, sp + + bl _bsaes_encrypt8 + + vld1.64 {@XMM[8]-@XMM[9]}, [r0,:128]! + vld1.64 {@XMM[10]}, [r0,:128]! + veor @XMM[0], @XMM[0], @XMM[ 8] + veor @XMM[1], @XMM[1], @XMM[ 9] + veor @XMM[8], @XMM[4], @XMM[10] + vst1.8 {@XMM[0]-@XMM[1]}, [$out]! + vst1.8 {@XMM[8]}, [$out]! + + vld1.64 {@XMM[8]}, [r0,:128] @ next round tweak + b .Lxts_enc_done +.align 4 +.Lxts_enc_2: + vst1.64 {@XMM[10]}, [r0,:128] @ next round tweak + + veor @XMM[0], @XMM[0], @XMM[8] +#ifndef BSAES_ASM_EXTENDED_KEY + add r4, sp, #0x90 @ pass key schedule +#else + add r4, $key, #248 @ pass key schedule +#endif + veor @XMM[1], @XMM[1], @XMM[9] + mov r5, $rounds @ pass rounds + mov r0, sp + + bl _bsaes_encrypt8 + + vld1.64 {@XMM[8]-@XMM[9]}, [r0,:128]! + veor @XMM[0], @XMM[0], @XMM[ 8] + veor @XMM[1], @XMM[1], @XMM[ 9] + vst1.8 {@XMM[0]-@XMM[1]}, [$out]! + + vld1.64 {@XMM[8]}, [r0,:128] @ next round tweak + b .Lxts_enc_done +.align 4 +.Lxts_enc_1: + mov r0, sp + veor @XMM[0], @XMM[8] + mov r1, sp + vst1.8 {@XMM[0]}, [sp,:128] + mov r2, $key + mov r4, $fp @ preserve fp + + bl AES_encrypt + + vld1.8 {@XMM[0]}, [sp,:128] + veor @XMM[0], @XMM[0], @XMM[8] + vst1.8 {@XMM[0]}, [$out]! + mov $fp, r4 + + vmov @XMM[8], @XMM[9] @ next round tweak + +.Lxts_enc_done: +#ifndef XTS_CHAIN_TWEAK + adds $len, #0x10 + beq .Lxts_enc_ret + sub r6, $out, #0x10 + +.Lxts_enc_steal: + ldrb r0, [$inp], #1 + ldrb r1, [$out, #-0x10] + strb r0, [$out, #-0x10] + strb r1, [$out], #1 + + subs $len, #1 + bhi .Lxts_enc_steal + + vld1.8 {@XMM[0]}, [r6] + mov r0, sp + veor @XMM[0], @XMM[0], @XMM[8] + mov r1, sp + vst1.8 {@XMM[0]}, [sp,:128] + mov r2, $key + mov r4, $fp @ preserve fp + + bl AES_encrypt + + vld1.8 {@XMM[0]}, [sp,:128] + veor @XMM[0], @XMM[0], @XMM[8] + vst1.8 {@XMM[0]}, [r6] + mov $fp, r4 +#endif + +.Lxts_enc_ret: + bic r0, $fp, #0xf + vmov.i32 q0, #0 + vmov.i32 q1, #0 +#ifdef XTS_CHAIN_TWEAK + ldr r1, [$fp, #0x20+VFP_ABI_FRAME] @ chain tweak +#endif +.Lxts_enc_bzero: @ wipe key schedule [if any] + vstmia sp!, {q0-q1} + cmp sp, r0 + bne .Lxts_enc_bzero + + mov sp, $fp +#ifdef XTS_CHAIN_TWEAK + vst1.8 {@XMM[8]}, [r1] +#endif + VFP_ABI_POP + ldmia sp!, {r4-r10, pc} @ return + +.size bsaes_xts_encrypt,.-bsaes_xts_encrypt + +.globl bsaes_xts_decrypt +.type bsaes_xts_decrypt,%function +.align 4 +bsaes_xts_decrypt: + mov ip, sp + stmdb sp!, {r4-r10, lr} @ 0x20 + VFP_ABI_PUSH + mov r6, sp @ future $fp + + mov $inp, r0 + mov $out, r1 + mov $len, r2 + mov $key, r3 + + sub r0, sp, #0x10 @ 0x10 + bic r0, #0xf @ align at 16 bytes + mov sp, r0 + +#ifdef XTS_CHAIN_TWEAK + ldr r0, [ip] @ pointer to input tweak +#else + @ generate initial tweak + ldr r0, [ip, #4] @ iv[] + mov r1, sp + ldr r2, [ip, #0] @ key2 + bl AES_encrypt + mov r0, sp @ pointer to initial tweak +#endif + + ldr $rounds, [$key, #240] @ get # of rounds + mov $fp, r6 +#ifndef BSAES_ASM_EXTENDED_KEY + @ allocate the key schedule on the stack + sub r12, sp, $rounds, lsl#7 @ 128 bytes per inner round key + @ add r12, #`128-32` @ size of bit-sliced key schedule + sub r12, #`32+16` @ place for tweak[9] + + @ populate the key schedule + mov r4, $key @ pass key + mov r5, $rounds @ pass # of rounds + mov sp, r12 + add r12, #0x90 @ pass key schedule + bl _bsaes_key_convert + add r4, sp, #0x90 + vldmia r4, {@XMM[6]} + vstmia r12, {@XMM[15]} @ save last round key + veor @XMM[7], @XMM[7], @XMM[6] @ fix up round 0 key + vstmia r4, {@XMM[7]} +#else + ldr r12, [$key, #244] + eors r12, #1 + beq 0f + + str r12, [$key, #244] + mov r4, $key @ pass key + mov r5, $rounds @ pass # of rounds + add r12, $key, #248 @ pass key schedule + bl _bsaes_key_convert + add r4, $key, #248 + vldmia r4, {@XMM[6]} + vstmia r12, {@XMM[15]} @ save last round key + veor @XMM[7], @XMM[7], @XMM[6] @ fix up round 0 key + vstmia r4, {@XMM[7]} + +.align 2 +0: sub sp, #0x90 @ place for tweak[9] +#endif + vld1.8 {@XMM[8]}, [r0] @ initial tweak + adr $magic, .Lxts_magic + + tst $len, #0xf @ if not multiple of 16 + it ne @ Thumb2 thing, sanity check in ARM + subne $len, #0x10 @ subtract another 16 bytes + subs $len, #0x80 + + blo .Lxts_dec_short + b .Lxts_dec_loop + +.align 4 +.Lxts_dec_loop: + vldmia $magic, {$twmask} @ load XTS magic + vshr.s64 @T[0], @XMM[8], #63 + mov r0, sp + vand @T[0], @T[0], $twmask +___ +for($i=9;$i<16;$i++) { +$code.=<<___; + vadd.u64 @XMM[$i], @XMM[$i-1], @XMM[$i-1] + vst1.64 {@XMM[$i-1]}, [r0,:128]! + vswp `&Dhi("@T[0]")`,`&Dlo("@T[0]")` + vshr.s64 @T[1], @XMM[$i], #63 + veor @XMM[$i], @XMM[$i], @T[0] + vand @T[1], @T[1], $twmask +___ + @T=reverse(@T); + +$code.=<<___ if ($i>=10); + vld1.8 {@XMM[$i-10]}, [$inp]! +___ +$code.=<<___ if ($i>=11); + veor @XMM[$i-11], @XMM[$i-11], @XMM[$i-3] +___ +} +$code.=<<___; + vadd.u64 @XMM[8], @XMM[15], @XMM[15] + vst1.64 {@XMM[15]}, [r0,:128]! + vswp `&Dhi("@T[0]")`,`&Dlo("@T[0]")` + veor @XMM[8], @XMM[8], @T[0] + vst1.64 {@XMM[8]}, [r0,:128] @ next round tweak + + vld1.8 {@XMM[6]-@XMM[7]}, [$inp]! + veor @XMM[5], @XMM[5], @XMM[13] +#ifndef BSAES_ASM_EXTENDED_KEY + add r4, sp, #0x90 @ pass key schedule +#else + add r4, $key, #248 @ pass key schedule +#endif + veor @XMM[6], @XMM[6], @XMM[14] + mov r5, $rounds @ pass rounds + veor @XMM[7], @XMM[7], @XMM[15] + mov r0, sp + + bl _bsaes_decrypt8 + + vld1.64 {@XMM[ 8]-@XMM[ 9]}, [r0,:128]! + vld1.64 {@XMM[10]-@XMM[11]}, [r0,:128]! + veor @XMM[0], @XMM[0], @XMM[ 8] + vld1.64 {@XMM[12]-@XMM[13]}, [r0,:128]! + veor @XMM[1], @XMM[1], @XMM[ 9] + veor @XMM[8], @XMM[6], @XMM[10] + vst1.8 {@XMM[0]-@XMM[1]}, [$out]! + veor @XMM[9], @XMM[4], @XMM[11] + vld1.64 {@XMM[14]-@XMM[15]}, [r0,:128]! + veor @XMM[10], @XMM[2], @XMM[12] + vst1.8 {@XMM[8]-@XMM[9]}, [$out]! + veor @XMM[11], @XMM[7], @XMM[13] + veor @XMM[12], @XMM[3], @XMM[14] + vst1.8 {@XMM[10]-@XMM[11]}, [$out]! + veor @XMM[13], @XMM[5], @XMM[15] + vst1.8 {@XMM[12]-@XMM[13]}, [$out]! + + vld1.64 {@XMM[8]}, [r0,:128] @ next round tweak + + subs $len, #0x80 + bpl .Lxts_dec_loop + +.Lxts_dec_short: + adds $len, #0x70 + bmi .Lxts_dec_done + + vldmia $magic, {$twmask} @ load XTS magic + vshr.s64 @T[0], @XMM[8], #63 + mov r0, sp + vand @T[0], @T[0], $twmask +___ +for($i=9;$i<16;$i++) { +$code.=<<___; + vadd.u64 @XMM[$i], @XMM[$i-1], @XMM[$i-1] + vst1.64 {@XMM[$i-1]}, [r0,:128]! + vswp `&Dhi("@T[0]")`,`&Dlo("@T[0]")` + vshr.s64 @T[1], @XMM[$i], #63 + veor @XMM[$i], @XMM[$i], @T[0] + vand @T[1], @T[1], $twmask +___ + @T=reverse(@T); + +$code.=<<___ if ($i>=10); + vld1.8 {@XMM[$i-10]}, [$inp]! + subs $len, #0x10 + bmi .Lxts_dec_`$i-9` +___ +$code.=<<___ if ($i>=11); + veor @XMM[$i-11], @XMM[$i-11], @XMM[$i-3] +___ +} +$code.=<<___; + sub $len, #0x10 + vst1.64 {@XMM[15]}, [r0,:128] @ next round tweak + + vld1.8 {@XMM[6]}, [$inp]! + veor @XMM[5], @XMM[5], @XMM[13] +#ifndef BSAES_ASM_EXTENDED_KEY + add r4, sp, #0x90 @ pass key schedule +#else + add r4, $key, #248 @ pass key schedule +#endif + veor @XMM[6], @XMM[6], @XMM[14] + mov r5, $rounds @ pass rounds + mov r0, sp + + bl _bsaes_decrypt8 + + vld1.64 {@XMM[ 8]-@XMM[ 9]}, [r0,:128]! + vld1.64 {@XMM[10]-@XMM[11]}, [r0,:128]! + veor @XMM[0], @XMM[0], @XMM[ 8] + vld1.64 {@XMM[12]-@XMM[13]}, [r0,:128]! + veor @XMM[1], @XMM[1], @XMM[ 9] + veor @XMM[8], @XMM[6], @XMM[10] + vst1.8 {@XMM[0]-@XMM[1]}, [$out]! + veor @XMM[9], @XMM[4], @XMM[11] + vld1.64 {@XMM[14]}, [r0,:128]! + veor @XMM[10], @XMM[2], @XMM[12] + vst1.8 {@XMM[8]-@XMM[9]}, [$out]! + veor @XMM[11], @XMM[7], @XMM[13] + veor @XMM[12], @XMM[3], @XMM[14] + vst1.8 {@XMM[10]-@XMM[11]}, [$out]! + vst1.8 {@XMM[12]}, [$out]! + + vld1.64 {@XMM[8]}, [r0,:128] @ next round tweak + b .Lxts_dec_done +.align 4 +.Lxts_dec_6: + vst1.64 {@XMM[14]}, [r0,:128] @ next round tweak + + veor @XMM[4], @XMM[4], @XMM[12] +#ifndef BSAES_ASM_EXTENDED_KEY + add r4, sp, #0x90 @ pass key schedule +#else + add r4, $key, #248 @ pass key schedule +#endif + veor @XMM[5], @XMM[5], @XMM[13] + mov r5, $rounds @ pass rounds + mov r0, sp + + bl _bsaes_decrypt8 + + vld1.64 {@XMM[ 8]-@XMM[ 9]}, [r0,:128]! + vld1.64 {@XMM[10]-@XMM[11]}, [r0,:128]! + veor @XMM[0], @XMM[0], @XMM[ 8] + vld1.64 {@XMM[12]-@XMM[13]}, [r0,:128]! + veor @XMM[1], @XMM[1], @XMM[ 9] + veor @XMM[8], @XMM[6], @XMM[10] + vst1.8 {@XMM[0]-@XMM[1]}, [$out]! + veor @XMM[9], @XMM[4], @XMM[11] + veor @XMM[10], @XMM[2], @XMM[12] + vst1.8 {@XMM[8]-@XMM[9]}, [$out]! + veor @XMM[11], @XMM[7], @XMM[13] + vst1.8 {@XMM[10]-@XMM[11]}, [$out]! + + vld1.64 {@XMM[8]}, [r0,:128] @ next round tweak + b .Lxts_dec_done +.align 4 +.Lxts_dec_5: + vst1.64 {@XMM[13]}, [r0,:128] @ next round tweak + + veor @XMM[3], @XMM[3], @XMM[11] +#ifndef BSAES_ASM_EXTENDED_KEY + add r4, sp, #0x90 @ pass key schedule +#else + add r4, $key, #248 @ pass key schedule +#endif + veor @XMM[4], @XMM[4], @XMM[12] + mov r5, $rounds @ pass rounds + mov r0, sp + + bl _bsaes_decrypt8 + + vld1.64 {@XMM[ 8]-@XMM[ 9]}, [r0,:128]! + vld1.64 {@XMM[10]-@XMM[11]}, [r0,:128]! + veor @XMM[0], @XMM[0], @XMM[ 8] + vld1.64 {@XMM[12]}, [r0,:128]! + veor @XMM[1], @XMM[1], @XMM[ 9] + veor @XMM[8], @XMM[6], @XMM[10] + vst1.8 {@XMM[0]-@XMM[1]}, [$out]! + veor @XMM[9], @XMM[4], @XMM[11] + veor @XMM[10], @XMM[2], @XMM[12] + vst1.8 {@XMM[8]-@XMM[9]}, [$out]! + vst1.8 {@XMM[10]}, [$out]! + + vld1.64 {@XMM[8]}, [r0,:128] @ next round tweak + b .Lxts_dec_done +.align 4 +.Lxts_dec_4: + vst1.64 {@XMM[12]}, [r0,:128] @ next round tweak + + veor @XMM[2], @XMM[2], @XMM[10] +#ifndef BSAES_ASM_EXTENDED_KEY + add r4, sp, #0x90 @ pass key schedule +#else + add r4, $key, #248 @ pass key schedule +#endif + veor @XMM[3], @XMM[3], @XMM[11] + mov r5, $rounds @ pass rounds + mov r0, sp + + bl _bsaes_decrypt8 + + vld1.64 {@XMM[ 8]-@XMM[ 9]}, [r0,:128]! + vld1.64 {@XMM[10]-@XMM[11]}, [r0,:128]! + veor @XMM[0], @XMM[0], @XMM[ 8] + veor @XMM[1], @XMM[1], @XMM[ 9] + veor @XMM[8], @XMM[6], @XMM[10] + vst1.8 {@XMM[0]-@XMM[1]}, [$out]! + veor @XMM[9], @XMM[4], @XMM[11] + vst1.8 {@XMM[8]-@XMM[9]}, [$out]! + + vld1.64 {@XMM[8]}, [r0,:128] @ next round tweak + b .Lxts_dec_done +.align 4 +.Lxts_dec_3: + vst1.64 {@XMM[11]}, [r0,:128] @ next round tweak + + veor @XMM[1], @XMM[1], @XMM[9] +#ifndef BSAES_ASM_EXTENDED_KEY + add r4, sp, #0x90 @ pass key schedule +#else + add r4, $key, #248 @ pass key schedule +#endif + veor @XMM[2], @XMM[2], @XMM[10] + mov r5, $rounds @ pass rounds + mov r0, sp + + bl _bsaes_decrypt8 + + vld1.64 {@XMM[8]-@XMM[9]}, [r0,:128]! + vld1.64 {@XMM[10]}, [r0,:128]! + veor @XMM[0], @XMM[0], @XMM[ 8] + veor @XMM[1], @XMM[1], @XMM[ 9] + veor @XMM[8], @XMM[6], @XMM[10] + vst1.8 {@XMM[0]-@XMM[1]}, [$out]! + vst1.8 {@XMM[8]}, [$out]! + + vld1.64 {@XMM[8]}, [r0,:128] @ next round tweak + b .Lxts_dec_done +.align 4 +.Lxts_dec_2: + vst1.64 {@XMM[10]}, [r0,:128] @ next round tweak + + veor @XMM[0], @XMM[0], @XMM[8] +#ifndef BSAES_ASM_EXTENDED_KEY + add r4, sp, #0x90 @ pass key schedule +#else + add r4, $key, #248 @ pass key schedule +#endif + veor @XMM[1], @XMM[1], @XMM[9] + mov r5, $rounds @ pass rounds + mov r0, sp + + bl _bsaes_decrypt8 + + vld1.64 {@XMM[8]-@XMM[9]}, [r0,:128]! + veor @XMM[0], @XMM[0], @XMM[ 8] + veor @XMM[1], @XMM[1], @XMM[ 9] + vst1.8 {@XMM[0]-@XMM[1]}, [$out]! + + vld1.64 {@XMM[8]}, [r0,:128] @ next round tweak + b .Lxts_dec_done +.align 4 +.Lxts_dec_1: + mov r0, sp + veor @XMM[0], @XMM[8] + mov r1, sp + vst1.8 {@XMM[0]}, [sp,:128] + mov r2, $key + mov r4, $fp @ preserve fp + mov r5, $magic @ preserve magic + + bl AES_decrypt + + vld1.8 {@XMM[0]}, [sp,:128] + veor @XMM[0], @XMM[0], @XMM[8] + vst1.8 {@XMM[0]}, [$out]! + mov $fp, r4 + mov $magic, r5 + + vmov @XMM[8], @XMM[9] @ next round tweak + +.Lxts_dec_done: +#ifndef XTS_CHAIN_TWEAK + adds $len, #0x10 + beq .Lxts_dec_ret + + @ calculate one round of extra tweak for the stolen ciphertext + vldmia $magic, {$twmask} + vshr.s64 @XMM[6], @XMM[8], #63 + vand @XMM[6], @XMM[6], $twmask + vadd.u64 @XMM[9], @XMM[8], @XMM[8] + vswp `&Dhi("@XMM[6]")`,`&Dlo("@XMM[6]")` + veor @XMM[9], @XMM[9], @XMM[6] + + @ perform the final decryption with the last tweak value + vld1.8 {@XMM[0]}, [$inp]! + mov r0, sp + veor @XMM[0], @XMM[0], @XMM[9] + mov r1, sp + vst1.8 {@XMM[0]}, [sp,:128] + mov r2, $key + mov r4, $fp @ preserve fp + + bl AES_decrypt + + vld1.8 {@XMM[0]}, [sp,:128] + veor @XMM[0], @XMM[0], @XMM[9] + vst1.8 {@XMM[0]}, [$out] + + mov r6, $out +.Lxts_dec_steal: + ldrb r1, [$out] + ldrb r0, [$inp], #1 + strb r1, [$out, #0x10] + strb r0, [$out], #1 + + subs $len, #1 + bhi .Lxts_dec_steal + + vld1.8 {@XMM[0]}, [r6] + mov r0, sp + veor @XMM[0], @XMM[8] + mov r1, sp + vst1.8 {@XMM[0]}, [sp,:128] + mov r2, $key + + bl AES_decrypt + + vld1.8 {@XMM[0]}, [sp,:128] + veor @XMM[0], @XMM[0], @XMM[8] + vst1.8 {@XMM[0]}, [r6] + mov $fp, r4 +#endif + +.Lxts_dec_ret: + bic r0, $fp, #0xf + vmov.i32 q0, #0 + vmov.i32 q1, #0 +#ifdef XTS_CHAIN_TWEAK + ldr r1, [$fp, #0x20+VFP_ABI_FRAME] @ chain tweak +#endif +.Lxts_dec_bzero: @ wipe key schedule [if any] + vstmia sp!, {q0-q1} + cmp sp, r0 + bne .Lxts_dec_bzero + + mov sp, $fp +#ifdef XTS_CHAIN_TWEAK + vst1.8 {@XMM[8]}, [r1] +#endif + VFP_ABI_POP + ldmia sp!, {r4-r10, pc} @ return + +.size bsaes_xts_decrypt,.-bsaes_xts_decrypt +___ +} +$code.=<<___; +#endif +___ + +$code =~ s/\`([^\`]*)\`/eval($1)/gem; + +open SELF,$0; +while() { + next if (/^#!/); + last if (!s/^#/@/ and !/^$/); + print; +} +close SELF; + +print $code; + +close STDOUT; diff --git a/deps/openssl/openssl/crypto/aes/asm/bsaes-x86_64.pl b/deps/openssl/openssl/crypto/aes/asm/bsaes-x86_64.pl index 41b90f08443f51..3f7d33c45bce71 100644 --- a/deps/openssl/openssl/crypto/aes/asm/bsaes-x86_64.pl +++ b/deps/openssl/openssl/crypto/aes/asm/bsaes-x86_64.pl @@ -38,8 +38,9 @@ # Emilia's this(*) difference # # Core 2 9.30 8.69 +7% -# Nehalem(**) 7.63 6.98 +9% -# Atom 17.1 17.4 -2%(***) +# Nehalem(**) 7.63 6.88 +11% +# Atom 17.1 16.4 +4% +# Silvermont - 12.9 # # (*) Comparison is not completely fair, because "this" is ECB, # i.e. no extra processing such as counter values calculation @@ -50,14 +51,6 @@ # (**) Results were collected on Westmere, which is considered to # be equivalent to Nehalem for this code. # -# (***) Slowdown on Atom is rather strange per se, because original -# implementation has a number of 9+-bytes instructions, which -# are bad for Atom front-end, and which I eliminated completely. -# In attempt to address deterioration sbox() was tested in FP -# SIMD "domain" (movaps instead of movdqa, xorps instead of -# pxor, etc.). While it resulted in nominal 4% improvement on -# Atom, it hurted Westmere by more than 2x factor. -# # As for key schedule conversion subroutine. Interface to OpenSSL # relies on per-invocation on-the-fly conversion. This naturally # has impact on performance, especially for short inputs. Conversion @@ -67,7 +60,7 @@ # conversion conversion/8x block # Core 2 240 0.22 # Nehalem 180 0.20 -# Atom 430 0.19 +# Atom 430 0.20 # # The ratio values mean that 128-byte blocks will be processed # 16-18% slower, 256-byte blocks - 9-10%, 384-byte blocks - 6-7%, @@ -83,9 +76,10 @@ # Add decryption procedure. Performance in CPU cycles spent to decrypt # one byte out of 4096-byte buffer with 128-bit key is: # -# Core 2 9.83 -# Nehalem 7.74 -# Atom 19.0 +# Core 2 9.98 +# Nehalem 7.80 +# Atom 17.9 +# Silvermont 14.0 # # November 2011. # @@ -434,21 +428,21 @@ sub ShiftRows { $code.=<<___; pxor 0x00($key),@x[0] pxor 0x10($key),@x[1] - pshufb $mask,@x[0] pxor 0x20($key),@x[2] - pshufb $mask,@x[1] pxor 0x30($key),@x[3] - pshufb $mask,@x[2] + pshufb $mask,@x[0] + pshufb $mask,@x[1] pxor 0x40($key),@x[4] - pshufb $mask,@x[3] pxor 0x50($key),@x[5] - pshufb $mask,@x[4] + pshufb $mask,@x[2] + pshufb $mask,@x[3] pxor 0x60($key),@x[6] - pshufb $mask,@x[5] pxor 0x70($key),@x[7] + pshufb $mask,@x[4] + pshufb $mask,@x[5] pshufb $mask,@x[6] - lea 0x80($key),$key pshufb $mask,@x[7] + lea 0x80($key),$key ___ } @@ -820,18 +814,18 @@ sub bitslice { movdqa 0x50($const), @XMM[8] # .LM0SR pxor @XMM[9], @XMM[0] # xor with round0 key pxor @XMM[9], @XMM[1] - pshufb @XMM[8], @XMM[0] pxor @XMM[9], @XMM[2] - pshufb @XMM[8], @XMM[1] pxor @XMM[9], @XMM[3] - pshufb @XMM[8], @XMM[2] + pshufb @XMM[8], @XMM[0] + pshufb @XMM[8], @XMM[1] pxor @XMM[9], @XMM[4] - pshufb @XMM[8], @XMM[3] pxor @XMM[9], @XMM[5] - pshufb @XMM[8], @XMM[4] + pshufb @XMM[8], @XMM[2] + pshufb @XMM[8], @XMM[3] pxor @XMM[9], @XMM[6] - pshufb @XMM[8], @XMM[5] pxor @XMM[9], @XMM[7] + pshufb @XMM[8], @XMM[4] + pshufb @XMM[8], @XMM[5] pshufb @XMM[8], @XMM[6] pshufb @XMM[8], @XMM[7] _bsaes_encrypt8_bitslice: @@ -884,18 +878,18 @@ sub bitslice { movdqa -0x30($const), @XMM[8] # .LM0ISR pxor @XMM[9], @XMM[0] # xor with round0 key pxor @XMM[9], @XMM[1] - pshufb @XMM[8], @XMM[0] pxor @XMM[9], @XMM[2] - pshufb @XMM[8], @XMM[1] pxor @XMM[9], @XMM[3] - pshufb @XMM[8], @XMM[2] + pshufb @XMM[8], @XMM[0] + pshufb @XMM[8], @XMM[1] pxor @XMM[9], @XMM[4] - pshufb @XMM[8], @XMM[3] pxor @XMM[9], @XMM[5] - pshufb @XMM[8], @XMM[4] + pshufb @XMM[8], @XMM[2] + pshufb @XMM[8], @XMM[3] pxor @XMM[9], @XMM[6] - pshufb @XMM[8], @XMM[5] pxor @XMM[9], @XMM[7] + pshufb @XMM[8], @XMM[4] + pshufb @XMM[8], @XMM[5] pshufb @XMM[8], @XMM[6] pshufb @XMM[8], @XMM[7] ___ @@ -1937,21 +1931,21 @@ sub bitslice_key { movdqa -0x10(%r11), @XMM[8] # .LSWPUPM0SR pxor @XMM[9], @XMM[0] # xor with round0 key pxor @XMM[9], @XMM[1] - pshufb @XMM[8], @XMM[0] pxor @XMM[9], @XMM[2] - pshufb @XMM[8], @XMM[1] pxor @XMM[9], @XMM[3] - pshufb @XMM[8], @XMM[2] + pshufb @XMM[8], @XMM[0] + pshufb @XMM[8], @XMM[1] pxor @XMM[9], @XMM[4] - pshufb @XMM[8], @XMM[3] pxor @XMM[9], @XMM[5] - pshufb @XMM[8], @XMM[4] + pshufb @XMM[8], @XMM[2] + pshufb @XMM[8], @XMM[3] pxor @XMM[9], @XMM[6] - pshufb @XMM[8], @XMM[5] pxor @XMM[9], @XMM[7] + pshufb @XMM[8], @XMM[4] + pshufb @XMM[8], @XMM[5] pshufb @XMM[8], @XMM[6] - lea .LBS0(%rip), %r11 # constants table pshufb @XMM[8], @XMM[7] + lea .LBS0(%rip), %r11 # constants table mov %ebx,%r10d # pass rounds call _bsaes_encrypt8_bitslice diff --git a/deps/openssl/openssl/crypto/aes/asm/vpaes-ppc.pl b/deps/openssl/openssl/crypto/aes/asm/vpaes-ppc.pl new file mode 100644 index 00000000000000..7fda60ed9e4d59 --- /dev/null +++ b/deps/openssl/openssl/crypto/aes/asm/vpaes-ppc.pl @@ -0,0 +1,1512 @@ +#!/usr/bin/env perl + +###################################################################### +## Constant-time SSSE3 AES core implementation. +## version 0.1 +## +## By Mike Hamburg (Stanford University), 2009 +## Public domain. +## +## For details see http://shiftleft.org/papers/vector_aes/ and +## http://crypto.stanford.edu/vpaes/. + +# CBC encrypt/decrypt performance in cycles per byte processed with +# 128-bit key. +# +# aes-ppc.pl this +# G4e 35.5/52.1/(23.8) 11.9(*)/15.4 +# POWER6 42.7/54.3/(28.2) 63.0/92.8(**) +# POWER7 32.3/42.9/(18.4) 18.5/23.3 +# +# (*) This is ~10% worse than reported in paper. The reason is +# twofold. This module doesn't make any assumption about +# key schedule (or data for that matter) alignment and handles +# it in-line. Secondly it, being transliterated from +# vpaes-x86_64.pl, relies on "nested inversion" better suited +# for Intel CPUs. +# (**) Inadequate POWER6 performance is due to astronomic AltiVec +# latency, 9 cycles per simple logical operation. + +$flavour = shift; + +if ($flavour =~ /64/) { + $SIZE_T =8; + $LRSAVE =2*$SIZE_T; + $STU ="stdu"; + $POP ="ld"; + $PUSH ="std"; + $UCMP ="cmpld"; +} elsif ($flavour =~ /32/) { + $SIZE_T =4; + $LRSAVE =$SIZE_T; + $STU ="stwu"; + $POP ="lwz"; + $PUSH ="stw"; + $UCMP ="cmplw"; +} else { die "nonsense $flavour"; } + +$sp="r1"; +$FRAME=6*$SIZE_T+13*16; # 13*16 is for v20-v31 offload + +$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; +( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or +( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or +die "can't locate ppc-xlate.pl"; + +open STDOUT,"| $^X $xlate $flavour ".shift || die "can't call $xlate: $!"; + +$code.=<<___; +.machine "any" + +.text + +.align 7 # totally strategic alignment +_vpaes_consts: +Lk_mc_forward: # mc_forward + .long 0x01020300, 0x05060704, 0x090a0b08, 0x0d0e0f0c ?inv + .long 0x05060704, 0x090a0b08, 0x0d0e0f0c, 0x01020300 ?inv + .long 0x090a0b08, 0x0d0e0f0c, 0x01020300, 0x05060704 ?inv + .long 0x0d0e0f0c, 0x01020300, 0x05060704, 0x090a0b08 ?inv +Lk_mc_backward: # mc_backward + .long 0x03000102, 0x07040506, 0x0b08090a, 0x0f0c0d0e ?inv + .long 0x0f0c0d0e, 0x03000102, 0x07040506, 0x0b08090a ?inv + .long 0x0b08090a, 0x0f0c0d0e, 0x03000102, 0x07040506 ?inv + .long 0x07040506, 0x0b08090a, 0x0f0c0d0e, 0x03000102 ?inv +Lk_sr: # sr + .long 0x00010203, 0x04050607, 0x08090a0b, 0x0c0d0e0f ?inv + .long 0x00050a0f, 0x04090e03, 0x080d0207, 0x0c01060b ?inv + .long 0x0009020b, 0x040d060f, 0x08010a03, 0x0c050e07 ?inv + .long 0x000d0a07, 0x04010e0b, 0x0805020f, 0x0c090603 ?inv + +## +## "Hot" constants +## +Lk_inv: # inv, inva + .long 0xf001080d, 0x0f06050e, 0x020c0b0a, 0x09030704 ?rev + .long 0xf0070b0f, 0x060a0401, 0x09080502, 0x0c0e0d03 ?rev +Lk_ipt: # input transform (lo, hi) + .long 0x00702a5a, 0x98e8b2c2, 0x08782252, 0x90e0baca ?rev + .long 0x004d7c31, 0x7d30014c, 0x81ccfdb0, 0xfcb180cd ?rev +Lk_sbo: # sbou, sbot + .long 0x00c7bd6f, 0x176dd2d0, 0x78a802c5, 0x7abfaa15 ?rev + .long 0x006abb5f, 0xa574e4cf, 0xfa352b41, 0xd1901e8e ?rev +Lk_sb1: # sb1u, sb1t + .long 0x0023e2fa, 0x15d41836, 0xefd92e0d, 0xc1ccf73b ?rev + .long 0x003e50cb, 0x8fe19bb1, 0x44f52a14, 0x6e7adfa5 ?rev +Lk_sb2: # sb2u, sb2t + .long 0x0029e10a, 0x4088eb69, 0x4a2382ab, 0xc863a1c2 ?rev + .long 0x0024710b, 0xc6937ae2, 0xcd2f98bc, 0x55e9b75e ?rev + +## +## Decryption stuff +## +Lk_dipt: # decryption input transform + .long 0x005f540b, 0x045b500f, 0x1a454e11, 0x1e414a15 ?rev + .long 0x00650560, 0xe683e386, 0x94f191f4, 0x72177712 ?rev +Lk_dsbo: # decryption sbox final output + .long 0x0040f97e, 0x53ea8713, 0x2d3e94d4, 0xb96daac7 ?rev + .long 0x001d4493, 0x0f56d712, 0x9c8ec5d8, 0x59814bca ?rev +Lk_dsb9: # decryption sbox output *9*u, *9*t + .long 0x00d6869a, 0x53031c85, 0xc94c994f, 0x501fd5ca ?rev + .long 0x0049d7ec, 0x89173bc0, 0x65a5fbb2, 0x9e2c5e72 ?rev +Lk_dsbd: # decryption sbox output *D*u, *D*t + .long 0x00a2b1e6, 0xdfcc577d, 0x39442a88, 0x139b6ef5 ?rev + .long 0x00cbc624, 0xf7fae23c, 0xd3efde15, 0x0d183129 ?rev +Lk_dsbb: # decryption sbox output *B*u, *B*t + .long 0x0042b496, 0x926422d0, 0x04d4f2b0, 0xf6462660 ?rev + .long 0x006759cd, 0xa69894c1, 0x6baa5532, 0x3e0cfff3 ?rev +Lk_dsbe: # decryption sbox output *E*u, *E*t + .long 0x00d0d426, 0x9692f246, 0xb0f6b464, 0x04604222 ?rev + .long 0x00c1aaff, 0xcda6550c, 0x323e5998, 0x6bf36794 ?rev + +## +## Key schedule constants +## +Lk_dksd: # decryption key schedule: invskew x*D + .long 0x0047e4a3, 0x5d1ab9fe, 0xf9be1d5a, 0xa4e34007 ?rev + .long 0x008336b5, 0xf477c241, 0x1e9d28ab, 0xea69dc5f ?rev +Lk_dksb: # decryption key schedule: invskew x*B + .long 0x00d55085, 0x1fca4f9a, 0x994cc91c, 0x8653d603 ?rev + .long 0x004afcb6, 0xa7ed5b11, 0xc882347e, 0x6f2593d9 ?rev +Lk_dkse: # decryption key schedule: invskew x*E + 0x63 + .long 0x00d6c91f, 0xca1c03d5, 0x86504f99, 0x4c9a8553 ?rev + .long 0xe87bdc4f, 0x059631a2, 0x8714b320, 0x6af95ecd ?rev +Lk_dks9: # decryption key schedule: invskew x*9 + .long 0x00a7d97e, 0xc86f11b6, 0xfc5b2582, 0x3493ed4a ?rev + .long 0x00331427, 0x62517645, 0xcefddae9, 0xac9fb88b ?rev + +Lk_rcon: # rcon + .long 0xb6ee9daf, 0xb991831f, 0x817d7c4d, 0x08982a70 ?asis +Lk_s63: + .long 0x5b5b5b5b, 0x5b5b5b5b, 0x5b5b5b5b, 0x5b5b5b5b ?asis + +Lk_opt: # output transform + .long 0x0060b6d6, 0x29499fff, 0x0868bede, 0x214197f7 ?rev + .long 0x00ecbc50, 0x51bded01, 0xe00c5cb0, 0xb15d0de1 ?rev +Lk_deskew: # deskew tables: inverts the sbox's "skew" + .long 0x00e3a447, 0x40a3e407, 0x1af9be5d, 0x5ab9fe1d ?rev + .long 0x0069ea83, 0xdcb5365f, 0x771e9df4, 0xabc24128 ?rev +.align 5 +Lconsts: + mflr r0 + bcl 20,31,\$+4 + mflr r12 #vvvvv "distance between . and _vpaes_consts + addi r12,r12,-0x308 + mtlr r0 + blr + .long 0 + .byte 0,12,0x14,0,0,0,0,0 +.asciz "Vector Permutation AES for AltiVec, Mike Hamburg (Stanford University)" +.align 6 +___ + +my ($inptail,$inpperm,$outhead,$outperm,$outmask,$keyperm) = map("v$_",(26..31)); +{ +my ($inp,$out,$key) = map("r$_",(3..5)); + +my ($invlo,$invhi,$iptlo,$ipthi,$sbou,$sbot) = map("v$_",(10..15)); +my ($sb1u,$sb1t,$sb2u,$sb2t) = map("v$_",(16..19)); +my ($sb9u,$sb9t,$sbdu,$sbdt,$sbbu,$sbbt,$sbeu,$sbet)=map("v$_",(16..23)); + +$code.=<<___; +## +## _aes_preheat +## +## Fills register %r10 -> .aes_consts (so you can -fPIC) +## and %xmm9-%xmm15 as specified below. +## +.align 4 +_vpaes_encrypt_preheat: + mflr r8 + bl Lconsts + mtlr r8 + li r11, 0xc0 # Lk_inv + li r10, 0xd0 + li r9, 0xe0 # Lk_ipt + li r8, 0xf0 + vxor v7, v7, v7 # 0x00..00 + vspltisb v8,4 # 0x04..04 + vspltisb v9,0x0f # 0x0f..0f + lvx $invlo, r12, r11 + li r11, 0x100 + lvx $invhi, r12, r10 + li r10, 0x110 + lvx $iptlo, r12, r9 + li r9, 0x120 + lvx $ipthi, r12, r8 + li r8, 0x130 + lvx $sbou, r12, r11 + li r11, 0x140 + lvx $sbot, r12, r10 + li r10, 0x150 + lvx $sb1u, r12, r9 + lvx $sb1t, r12, r8 + lvx $sb2u, r12, r11 + lvx $sb2t, r12, r10 + blr + .long 0 + .byte 0,12,0x14,0,0,0,0,0 + +## +## _aes_encrypt_core +## +## AES-encrypt %xmm0. +## +## Inputs: +## %xmm0 = input +## %xmm9-%xmm15 as in _vpaes_preheat +## (%rdx) = scheduled keys +## +## Output in %xmm0 +## Clobbers %xmm1-%xmm6, %r9, %r10, %r11, %rax +## +## +.align 5 +_vpaes_encrypt_core: + lwz r8, 240($key) # pull rounds + li r9, 16 + lvx v5, 0, $key # vmovdqu (%r9), %xmm5 # round0 key + li r11, 0x10 + lvx v6, r9, $key + addi r9, r9, 16 + ?vperm v5, v5, v6, $keyperm # align round key + addi r10, r11, 0x40 + vsrb v1, v0, v8 # vpsrlb \$4, %xmm0, %xmm0 + vperm v0, $iptlo, $iptlo, v0 # vpshufb %xmm1, %xmm2, %xmm1 + vperm v1, $ipthi, $ipthi, v1 # vpshufb %xmm0, %xmm3, %xmm2 + vxor v0, v0, v5 # vpxor %xmm5, %xmm1, %xmm0 + vxor v0, v0, v1 # vpxor %xmm2, %xmm0, %xmm0 + mtctr r8 + b Lenc_entry + +.align 4 +Lenc_loop: + # middle of middle round + vperm v4, $sb1t, v7, v2 # vpshufb %xmm2, %xmm13, %xmm4 # 4 = sb1u + lvx v1, r12, r11 # vmovdqa -0x40(%r11,%r10), %xmm1 # .Lk_mc_forward[] + addi r11, r11, 16 + vperm v0, $sb1u, v7, v3 # vpshufb %xmm3, %xmm12, %xmm0 # 0 = sb1t + vxor v4, v4, v5 # vpxor %xmm5, %xmm4, %xmm4 # 4 = sb1u + k + andi. r11, r11, 0x30 # and \$0x30, %r11 # ... mod 4 + vperm v5, $sb2t, v7, v2 # vpshufb %xmm2, %xmm15, %xmm5 # 4 = sb2u + vxor v0, v0, v4 # vpxor %xmm4, %xmm0, %xmm0 # 0 = A + vperm v2, $sb2u, v7, v3 # vpshufb %xmm3, %xmm14, %xmm2 # 2 = sb2t + lvx v4, r12, r10 # vmovdqa (%r11,%r10), %xmm4 # .Lk_mc_backward[] + addi r10, r11, 0x40 + vperm v3, v0, v7, v1 # vpshufb %xmm1, %xmm0, %xmm3 # 0 = B + vxor v2, v2, v5 # vpxor %xmm5, %xmm2, %xmm2 # 2 = 2A + vperm v0, v0, v7, v4 # vpshufb %xmm4, %xmm0, %xmm0 # 3 = D + vxor v3, v3, v2 # vpxor %xmm2, %xmm3, %xmm3 # 0 = 2A+B + vperm v4, v3, v7, v1 # vpshufb %xmm1, %xmm3, %xmm4 # 0 = 2B+C + vxor v0, v0, v3 # vpxor %xmm3, %xmm0, %xmm0 # 3 = 2A+B+D + vxor v0, v0, v4 # vpxor %xmm4, %xmm0, %xmm0 # 0 = 2A+3B+C+D + +Lenc_entry: + # top of round + vsrb v1, v0, v8 # vpsrlb \$4, %xmm0, %xmm0 # 1 = i + vperm v5, $invhi, $invhi, v0 # vpshufb %xmm1, %xmm11, %xmm5 # 2 = a/k + vxor v0, v0, v1 # vpxor %xmm0, %xmm1, %xmm1 # 0 = j + vperm v3, $invlo, $invlo, v1 # vpshufb %xmm0, %xmm10, %xmm3 # 3 = 1/i + vperm v4, $invlo, $invlo, v0 # vpshufb %xmm1, %xmm10, %xmm4 # 4 = 1/j + vand v0, v0, v9 + vxor v3, v3, v5 # vpxor %xmm5, %xmm3, %xmm3 # 3 = iak = 1/i + a/k + vxor v4, v4, v5 # vpxor %xmm5, %xmm4, %xmm4 # 4 = jak = 1/j + a/k + vperm v2, $invlo, v7, v3 # vpshufb %xmm3, %xmm10, %xmm2 # 2 = 1/iak + vmr v5, v6 + lvx v6, r9, $key # vmovdqu (%r9), %xmm5 + vperm v3, $invlo, v7, v4 # vpshufb %xmm4, %xmm10, %xmm3 # 3 = 1/jak + addi r9, r9, 16 + vxor v2, v2, v0 # vpxor %xmm1, %xmm2, %xmm2 # 2 = io + ?vperm v5, v5, v6, $keyperm # align round key + vxor v3, v3, v1 # vpxor %xmm0, %xmm3, %xmm3 # 3 = jo + bdnz Lenc_loop + + # middle of last round + addi r10, r11, 0x80 + # vmovdqa -0x60(%r10), %xmm4 # 3 : sbou .Lk_sbo + # vmovdqa -0x50(%r10), %xmm0 # 0 : sbot .Lk_sbo+16 + vperm v4, $sbou, v7, v2 # vpshufb %xmm2, %xmm4, %xmm4 # 4 = sbou + lvx v1, r12, r10 # vmovdqa 0x40(%r11,%r10), %xmm1 # .Lk_sr[] + vperm v0, $sbot, v7, v3 # vpshufb %xmm3, %xmm0, %xmm0 # 0 = sb1t + vxor v4, v4, v5 # vpxor %xmm5, %xmm4, %xmm4 # 4 = sb1u + k + vxor v0, v0, v4 # vpxor %xmm4, %xmm0, %xmm0 # 0 = A + vperm v0, v0, v7, v1 # vpshufb %xmm1, %xmm0, %xmm0 + blr + .long 0 + .byte 0,12,0x14,0,0,0,0,0 + +.globl .vpaes_encrypt +.align 5 +.vpaes_encrypt: + $STU $sp,-$FRAME($sp) + li r10,`15+6*$SIZE_T` + li r11,`31+6*$SIZE_T` + mflr r6 + mfspr r7, 256 # save vrsave + stvx v20,r10,$sp + addi r10,r10,32 + stvx v21,r11,$sp + addi r11,r11,32 + stvx v22,r10,$sp + addi r10,r10,32 + stvx v23,r11,$sp + addi r11,r11,32 + stvx v24,r10,$sp + addi r10,r10,32 + stvx v25,r11,$sp + addi r11,r11,32 + stvx v26,r10,$sp + addi r10,r10,32 + stvx v27,r11,$sp + addi r11,r11,32 + stvx v28,r10,$sp + addi r10,r10,32 + stvx v29,r11,$sp + addi r11,r11,32 + stvx v30,r10,$sp + stvx v31,r11,$sp + stw r7,`$FRAME-4`($sp) # save vrsave + li r0, -1 + $PUSH r6,`$FRAME+$LRSAVE`($sp) + mtspr 256, r0 # preserve all AltiVec registers + + bl _vpaes_encrypt_preheat + + ?lvsl $inpperm, 0, $inp # prepare for unaligned access + lvx v0, 0, $inp + addi $inp, $inp, 15 # 15 is not a typo + ?lvsr $outperm, 0, $out + ?lvsl $keyperm, 0, $key # prepare for unaligned access + vnor $outmask, v7, v7 # 0xff..ff + lvx $inptail, 0, $inp # redundant in aligned case + ?vperm $outmask, v7, $outmask, $outperm + lvx $outhead, 0, $out + ?vperm v0, v0, $inptail, $inpperm + + bl _vpaes_encrypt_core + + vperm v0, v0, v0, $outperm # rotate right/left + vsel v1, $outhead, v0, $outmask + vmr $outhead, v0 + stvx v1, 0, $out + addi $out, $out, 15 # 15 is not a typo + ######## + + lvx v1, 0, $out # redundant in aligned case + vsel v1, $outhead, v1, $outmask + stvx v1, 0, $out + + li r10,`15+6*$SIZE_T` + li r11,`31+6*$SIZE_T` + mtlr r6 + mtspr 256, r7 # restore vrsave + lvx v20,r10,$sp + addi r10,r10,32 + lvx v21,r11,$sp + addi r11,r11,32 + lvx v22,r10,$sp + addi r10,r10,32 + lvx v23,r11,$sp + addi r11,r11,32 + lvx v24,r10,$sp + addi r10,r10,32 + lvx v25,r11,$sp + addi r11,r11,32 + lvx v26,r10,$sp + addi r10,r10,32 + lvx v27,r11,$sp + addi r11,r11,32 + lvx v28,r10,$sp + addi r10,r10,32 + lvx v29,r11,$sp + addi r11,r11,32 + lvx v30,r10,$sp + lvx v31,r11,$sp + addi $sp,$sp,$FRAME + blr + .long 0 + .byte 0,12,0x04,1,0x80,0,3,0 + .long 0 +.size .vpaes_encrypt,.-.vpaes_encrypt + +.align 4 +_vpaes_decrypt_preheat: + mflr r8 + bl Lconsts + mtlr r8 + li r11, 0xc0 # Lk_inv + li r10, 0xd0 + li r9, 0x160 # Ldipt + li r8, 0x170 + vxor v7, v7, v7 # 0x00..00 + vspltisb v8,4 # 0x04..04 + vspltisb v9,0x0f # 0x0f..0f + lvx $invlo, r12, r11 + li r11, 0x180 + lvx $invhi, r12, r10 + li r10, 0x190 + lvx $iptlo, r12, r9 + li r9, 0x1a0 + lvx $ipthi, r12, r8 + li r8, 0x1b0 + lvx $sbou, r12, r11 + li r11, 0x1c0 + lvx $sbot, r12, r10 + li r10, 0x1d0 + lvx $sb9u, r12, r9 + li r9, 0x1e0 + lvx $sb9t, r12, r8 + li r8, 0x1f0 + lvx $sbdu, r12, r11 + li r11, 0x200 + lvx $sbdt, r12, r10 + li r10, 0x210 + lvx $sbbu, r12, r9 + lvx $sbbt, r12, r8 + lvx $sbeu, r12, r11 + lvx $sbet, r12, r10 + blr + .long 0 + .byte 0,12,0x14,0,0,0,0,0 + +## +## Decryption core +## +## Same API as encryption core. +## +.align 4 +_vpaes_decrypt_core: + lwz r8, 240($key) # pull rounds + li r9, 16 + lvx v5, 0, $key # vmovdqu (%r9), %xmm4 # round0 key + li r11, 0x30 + lvx v6, r9, $key + addi r9, r9, 16 + ?vperm v5, v5, v6, $keyperm # align round key + vsrb v1, v0, v8 # vpsrlb \$4, %xmm0, %xmm0 + vperm v0, $iptlo, $iptlo, v0 # vpshufb %xmm1, %xmm2, %xmm2 + vperm v1, $ipthi, $ipthi, v1 # vpshufb %xmm0, %xmm1, %xmm0 + vxor v0, v0, v5 # vpxor %xmm4, %xmm2, %xmm2 + vxor v0, v0, v1 # vpxor %xmm2, %xmm0, %xmm0 + mtctr r8 + b Ldec_entry + +.align 4 +Ldec_loop: +# +# Inverse mix columns +# + lvx v0, r12, r11 # v5 and v0 are flipped + # vmovdqa -0x20(%r10),%xmm4 # 4 : sb9u + # vmovdqa -0x10(%r10),%xmm1 # 0 : sb9t + vperm v4, $sb9u, v7, v2 # vpshufb %xmm2, %xmm4, %xmm4 # 4 = sb9u + subi r11, r11, 16 + vperm v1, $sb9t, v7, v3 # vpshufb %xmm3, %xmm1, %xmm1 # 0 = sb9t + andi. r11, r11, 0x30 + vxor v5, v5, v4 # vpxor %xmm4, %xmm0, %xmm0 + # vmovdqa 0x00(%r10),%xmm4 # 4 : sbdu + vxor v5, v5, v1 # vpxor %xmm1, %xmm0, %xmm0 # 0 = ch + # vmovdqa 0x10(%r10),%xmm1 # 0 : sbdt + + vperm v4, $sbdu, v7, v2 # vpshufb %xmm2, %xmm4, %xmm4 # 4 = sbdu + vperm v5, v5, v7, v0 # vpshufb %xmm5, %xmm0, %xmm0 # MC ch + vperm v1, $sbdt, v7, v3 # vpshufb %xmm3, %xmm1, %xmm1 # 0 = sbdt + vxor v5, v5, v4 # vpxor %xmm4, %xmm0, %xmm0 # 4 = ch + # vmovdqa 0x20(%r10), %xmm4 # 4 : sbbu + vxor v5, v5, v1 # vpxor %xmm1, %xmm0, %xmm0 # 0 = ch + # vmovdqa 0x30(%r10), %xmm1 # 0 : sbbt + + vperm v4, $sbbu, v7, v2 # vpshufb %xmm2, %xmm4, %xmm4 # 4 = sbbu + vperm v5, v5, v7, v0 # vpshufb %xmm5, %xmm0, %xmm0 # MC ch + vperm v1, $sbbt, v7, v3 # vpshufb %xmm3, %xmm1, %xmm1 # 0 = sbbt + vxor v5, v5, v4 # vpxor %xmm4, %xmm0, %xmm0 # 4 = ch + # vmovdqa 0x40(%r10), %xmm4 # 4 : sbeu + vxor v5, v5, v1 # vpxor %xmm1, %xmm0, %xmm0 # 0 = ch + # vmovdqa 0x50(%r10), %xmm1 # 0 : sbet + + vperm v4, $sbeu, v7, v2 # vpshufb %xmm2, %xmm4, %xmm4 # 4 = sbeu + vperm v5, v5, v7, v0 # vpshufb %xmm5, %xmm0, %xmm0 # MC ch + vperm v1, $sbet, v7, v3 # vpshufb %xmm3, %xmm1, %xmm1 # 0 = sbet + vxor v0, v5, v4 # vpxor %xmm4, %xmm0, %xmm0 # 4 = ch + vxor v0, v0, v1 # vpxor %xmm1, %xmm0, %xmm0 # 0 = ch + +Ldec_entry: + # top of round + vsrb v1, v0, v8 # vpsrlb \$4, %xmm0, %xmm0 # 1 = i + vperm v2, $invhi, $invhi, v0 # vpshufb %xmm1, %xmm11, %xmm2 # 2 = a/k + vxor v0, v0, v1 # vpxor %xmm0, %xmm1, %xmm1 # 0 = j + vperm v3, $invlo, $invlo, v1 # vpshufb %xmm0, %xmm10, %xmm3 # 3 = 1/i + vperm v4, $invlo, $invlo, v0 # vpshufb %xmm1, %xmm10, %xmm4 # 4 = 1/j + vand v0, v0, v9 + vxor v3, v3, v2 # vpxor %xmm2, %xmm3, %xmm3 # 3 = iak = 1/i + a/k + vxor v4, v4, v2 # vpxor %xmm2, %xmm4, %xmm4 # 4 = jak = 1/j + a/k + vperm v2, $invlo, v7, v3 # vpshufb %xmm3, %xmm10, %xmm2 # 2 = 1/iak + vmr v5, v6 + lvx v6, r9, $key # vmovdqu (%r9), %xmm0 + vperm v3, $invlo, v7, v4 # vpshufb %xmm4, %xmm10, %xmm3 # 3 = 1/jak + addi r9, r9, 16 + vxor v2, v2, v0 # vpxor %xmm1, %xmm2, %xmm2 # 2 = io + ?vperm v5, v5, v6, $keyperm # align round key + vxor v3, v3, v1 # vpxor %xmm0, %xmm3, %xmm3 # 3 = jo + bdnz Ldec_loop + + # middle of last round + addi r10, r11, 0x80 + # vmovdqa 0x60(%r10), %xmm4 # 3 : sbou + vperm v4, $sbou, v7, v2 # vpshufb %xmm2, %xmm4, %xmm4 # 4 = sbou + # vmovdqa 0x70(%r10), %xmm1 # 0 : sbot + lvx v2, r12, r10 # vmovdqa -0x160(%r11), %xmm2 # .Lk_sr-.Lk_dsbd=-0x160 + vperm v1, $sbot, v7, v3 # vpshufb %xmm3, %xmm1, %xmm1 # 0 = sb1t + vxor v4, v4, v5 # vpxor %xmm0, %xmm4, %xmm4 # 4 = sb1u + k + vxor v0, v1, v4 # vpxor %xmm4, %xmm1, %xmm0 # 0 = A + vperm v0, v0, v7, v2 # vpshufb %xmm2, %xmm0, %xmm0 + blr + .long 0 + .byte 0,12,0x14,0,0,0,0,0 + +.globl .vpaes_decrypt +.align 5 +.vpaes_decrypt: + $STU $sp,-$FRAME($sp) + li r10,`15+6*$SIZE_T` + li r11,`31+6*$SIZE_T` + mflr r6 + mfspr r7, 256 # save vrsave + stvx v20,r10,$sp + addi r10,r10,32 + stvx v21,r11,$sp + addi r11,r11,32 + stvx v22,r10,$sp + addi r10,r10,32 + stvx v23,r11,$sp + addi r11,r11,32 + stvx v24,r10,$sp + addi r10,r10,32 + stvx v25,r11,$sp + addi r11,r11,32 + stvx v26,r10,$sp + addi r10,r10,32 + stvx v27,r11,$sp + addi r11,r11,32 + stvx v28,r10,$sp + addi r10,r10,32 + stvx v29,r11,$sp + addi r11,r11,32 + stvx v30,r10,$sp + stvx v31,r11,$sp + stw r7,`$FRAME-4`($sp) # save vrsave + li r0, -1 + $PUSH r6,`$FRAME+$LRSAVE`($sp) + mtspr 256, r0 # preserve all AltiVec registers + + bl _vpaes_decrypt_preheat + + ?lvsl $inpperm, 0, $inp # prepare for unaligned access + lvx v0, 0, $inp + addi $inp, $inp, 15 # 15 is not a typo + ?lvsr $outperm, 0, $out + ?lvsl $keyperm, 0, $key + vnor $outmask, v7, v7 # 0xff..ff + lvx $inptail, 0, $inp # redundant in aligned case + ?vperm $outmask, v7, $outmask, $outperm + lvx $outhead, 0, $out + ?vperm v0, v0, $inptail, $inpperm + + bl _vpaes_decrypt_core + + vperm v0, v0, v0, $outperm # rotate right/left + vsel v1, $outhead, v0, $outmask + vmr $outhead, v0 + stvx v1, 0, $out + addi $out, $out, 15 # 15 is not a typo + ######## + + lvx v1, 0, $out # redundant in aligned case + vsel v1, $outhead, v1, $outmask + stvx v1, 0, $out + + li r10,`15+6*$SIZE_T` + li r11,`31+6*$SIZE_T` + mtlr r6 + mtspr 256, r7 # restore vrsave + lvx v20,r10,$sp + addi r10,r10,32 + lvx v21,r11,$sp + addi r11,r11,32 + lvx v22,r10,$sp + addi r10,r10,32 + lvx v23,r11,$sp + addi r11,r11,32 + lvx v24,r10,$sp + addi r10,r10,32 + lvx v25,r11,$sp + addi r11,r11,32 + lvx v26,r10,$sp + addi r10,r10,32 + lvx v27,r11,$sp + addi r11,r11,32 + lvx v28,r10,$sp + addi r10,r10,32 + lvx v29,r11,$sp + addi r11,r11,32 + lvx v30,r10,$sp + lvx v31,r11,$sp + addi $sp,$sp,$FRAME + blr + .long 0 + .byte 0,12,0x04,1,0x80,0,3,0 + .long 0 +.size .vpaes_decrypt,.-.vpaes_decrypt + +.globl .vpaes_cbc_encrypt +.align 5 +.vpaes_cbc_encrypt: + ${UCMP}i r5,16 + bltlr- + + $STU $sp,-`($FRAME+2*$SIZE_T)`($sp) + mflr r0 + li r10,`15+6*$SIZE_T` + li r11,`31+6*$SIZE_T` + mfspr r12, 256 + stvx v20,r10,$sp + addi r10,r10,32 + stvx v21,r11,$sp + addi r11,r11,32 + stvx v22,r10,$sp + addi r10,r10,32 + stvx v23,r11,$sp + addi r11,r11,32 + stvx v24,r10,$sp + addi r10,r10,32 + stvx v25,r11,$sp + addi r11,r11,32 + stvx v26,r10,$sp + addi r10,r10,32 + stvx v27,r11,$sp + addi r11,r11,32 + stvx v28,r10,$sp + addi r10,r10,32 + stvx v29,r11,$sp + addi r11,r11,32 + stvx v30,r10,$sp + stvx v31,r11,$sp + stw r12,`$FRAME-4`($sp) # save vrsave + $PUSH r30,`$FRAME+$SIZE_T*0`($sp) + $PUSH r31,`$FRAME+$SIZE_T*1`($sp) + li r9, -16 + $PUSH r0, `$FRAME+$SIZE_T*2+$LRSAVE`($sp) + + and r30, r5, r9 # copy length&-16 + mr r5, r6 # copy pointer to key + mr r31, r7 # copy pointer to iv + blt Lcbc_abort + cmpwi r8, 0 # test direction + li r6, -1 + mr r7, r12 # copy vrsave + mtspr 256, r6 # preserve all AltiVec registers + + lvx v24, 0, r31 # load [potentially unaligned] iv + li r9, 15 + ?lvsl $inpperm, 0, r31 + lvx v25, r9, r31 + ?vperm v24, v24, v25, $inpperm + + neg r8, $inp # prepare for unaligned access + vxor v7, v7, v7 + ?lvsl $keyperm, 0, $key + ?lvsr $outperm, 0, $out + ?lvsr $inpperm, 0, r8 # -$inp + vnor $outmask, v7, v7 # 0xff..ff + lvx $inptail, 0, $inp + ?vperm $outmask, v7, $outmask, $outperm + addi $inp, $inp, 15 # 15 is not a typo + lvx $outhead, 0, $out + + beq Lcbc_decrypt + + bl _vpaes_encrypt_preheat + li r0, 16 + +Lcbc_enc_loop: + vmr v0, $inptail + lvx $inptail, 0, $inp + addi $inp, $inp, 16 + ?vperm v0, v0, $inptail, $inpperm + vxor v0, v0, v24 # ^= iv + + bl _vpaes_encrypt_core + + vmr v24, v0 # put aside iv + sub. r30, r30, r0 # len -= 16 + vperm v0, v0, v0, $outperm # rotate right/left + vsel v1, $outhead, v0, $outmask + vmr $outhead, v0 + stvx v1, 0, $out + addi $out, $out, 16 + bne Lcbc_enc_loop + + b Lcbc_done + +.align 5 +Lcbc_decrypt: + bl _vpaes_decrypt_preheat + li r0, 16 + +Lcbc_dec_loop: + vmr v0, $inptail + lvx $inptail, 0, $inp + addi $inp, $inp, 16 + ?vperm v0, v0, $inptail, $inpperm + vmr v25, v0 # put aside input + + bl _vpaes_decrypt_core + + vxor v0, v0, v24 # ^= iv + vmr v24, v25 + sub. r30, r30, r0 # len -= 16 + vperm v0, v0, v0, $outperm # rotate right/left + vsel v1, $outhead, v0, $outmask + vmr $outhead, v0 + stvx v1, 0, $out + addi $out, $out, 16 + bne Lcbc_dec_loop + +Lcbc_done: + addi $out, $out, -1 + lvx v1, 0, $out # redundant in aligned case + vsel v1, $outhead, v1, $outmask + stvx v1, 0, $out + + neg r8, r31 # write [potentially unaligned] iv + ?lvsl $outperm, 0, r8 + li r6, 15 + vnor $outmask, v7, v7 # 0xff..ff + ?vperm $outmask, v7, $outmask, $outperm + lvx $outhead, 0, r31 + vperm v24, v24, v24, $outperm # rotate right/left + vsel v0, $outhead, v24, $outmask + lvx v1, r6, r31 + stvx v0, 0, r31 + vsel v1, v24, v1, $outmask + stvx v1, r6, r31 + + mtspr 256, r7 # restore vrsave + li r10,`15+6*$SIZE_T` + li r11,`31+6*$SIZE_T` + lvx v20,r10,$sp + addi r10,r10,32 + lvx v21,r11,$sp + addi r11,r11,32 + lvx v22,r10,$sp + addi r10,r10,32 + lvx v23,r11,$sp + addi r11,r11,32 + lvx v24,r10,$sp + addi r10,r10,32 + lvx v25,r11,$sp + addi r11,r11,32 + lvx v26,r10,$sp + addi r10,r10,32 + lvx v27,r11,$sp + addi r11,r11,32 + lvx v28,r10,$sp + addi r10,r10,32 + lvx v29,r11,$sp + addi r11,r11,32 + lvx v30,r10,$sp + lvx v31,r11,$sp +Lcbc_abort: + $POP r0, `$FRAME+$SIZE_T*2+$LRSAVE`($sp) + $POP r30,`$FRAME+$SIZE_T*0`($sp) + $POP r31,`$FRAME+$SIZE_T*1`($sp) + mtlr r0 + addi $sp,$sp,`$FRAME+$SIZE_T*2` + blr + .long 0 + .byte 0,12,0x04,1,0x80,2,6,0 + .long 0 +.size .vpaes_cbc_encrypt,.-.vpaes_cbc_encrypt +___ +} +{ +my ($inp,$bits,$out)=map("r$_",(3..5)); +my $dir="cr1"; +my ($invlo,$invhi,$iptlo,$ipthi,$rcon) = map("v$_",(10..13,24)); + +$code.=<<___; +######################################################## +## ## +## AES key schedule ## +## ## +######################################################## +.align 4 +_vpaes_key_preheat: + mflr r8 + bl Lconsts + mtlr r8 + li r11, 0xc0 # Lk_inv + li r10, 0xd0 + li r9, 0xe0 # L_ipt + li r8, 0xf0 + + vspltisb v8,4 # 0x04..04 + vxor v9,v9,v9 # 0x00..00 + lvx $invlo, r12, r11 # Lk_inv + li r11, 0x120 + lvx $invhi, r12, r10 + li r10, 0x130 + lvx $iptlo, r12, r9 # Lk_ipt + li r9, 0x220 + lvx $ipthi, r12, r8 + li r8, 0x230 + + lvx v14, r12, r11 # Lk_sb1 + li r11, 0x240 + lvx v15, r12, r10 + li r10, 0x250 + + lvx v16, r12, r9 # Lk_dksd + li r9, 0x260 + lvx v17, r12, r8 + li r8, 0x270 + lvx v18, r12, r11 # Lk_dksb + li r11, 0x280 + lvx v19, r12, r10 + li r10, 0x290 + lvx v20, r12, r9 # Lk_dkse + li r9, 0x2a0 + lvx v21, r12, r8 + li r8, 0x2b0 + lvx v22, r12, r11 # Lk_dks9 + lvx v23, r12, r10 + + lvx v24, r12, r9 # Lk_rcon + lvx v25, 0, r12 # Lk_mc_forward[0] + lvx v26, r12, r8 # Lks63 + blr + .long 0 + .byte 0,12,0x14,0,0,0,0,0 + +.align 4 +_vpaes_schedule_core: + mflr r7 + + bl _vpaes_key_preheat # load the tables + + #lvx v0, 0, $inp # vmovdqu (%rdi), %xmm0 # load key (unaligned) + neg r8, $inp # prepare for unaligned access + lvx v0, 0, $inp + addi $inp, $inp, 15 # 15 is not typo + ?lvsr $inpperm, 0, r8 # -$inp + lvx v6, 0, $inp # v6 serves as inptail + addi $inp, $inp, 8 + ?vperm v0, v0, v6, $inpperm + + # input transform + vmr v3, v0 # vmovdqa %xmm0, %xmm3 + bl _vpaes_schedule_transform + vmr v7, v0 # vmovdqa %xmm0, %xmm7 + + bne $dir, Lschedule_am_decrypting + + # encrypting, output zeroth round key after transform + li r8, 0x30 # mov \$0x30,%r8d + addi r10, r12, 0x80 # lea .Lk_sr(%rip),%r10 + + ?lvsr $outperm, 0, $out # prepare for unaligned access + vnor $outmask, v9, v9 # 0xff..ff + lvx $outhead, 0, $out + ?vperm $outmask, v9, $outmask, $outperm + + #stvx v0, 0, $out # vmovdqu %xmm0, (%rdx) + vperm v1, v0, v0, $outperm # rotate right/left + vsel v2, $outhead, v1, $outmask + vmr $outhead, v1 + stvx v2, 0, $out + b Lschedule_go + +Lschedule_am_decrypting: + srwi r8, $bits, 1 # shr \$1,%r8d + andi. r8, r8, 32 # and \$32,%r8d + xori r8, r8, 32 # xor \$32,%r8d # nbits==192?0:32 + addi r10, r12, 0x80 # lea .Lk_sr(%rip),%r10 + # decrypting, output zeroth round key after shiftrows + lvx v1, r8, r10 # vmovdqa (%r8,%r10), %xmm1 + vperm v4, v3, v3, v1 # vpshufb %xmm1, %xmm3, %xmm3 + + neg r0, $out # prepare for unaligned access + ?lvsl $outperm, 0, r0 + addi $out, $out, 15 # 15 is not typo + vnor $outmask, v9, v9 # 0xff..ff + lvx $outhead, 0, $out + ?vperm $outmask, $outmask, v9, $outperm + + #stvx v4, 0, $out # vmovdqu %xmm3, (%rdx) + vperm v4, v4, v4, $outperm # rotate right/left + vsel v2, $outhead, v4, $outmask + vmr $outhead, v4 + stvx v2, 0, $out + xori r8, r8, 0x30 # xor \$0x30, %r8 + +Lschedule_go: + cmplwi $bits, 192 # cmp \$192, %esi + bgt Lschedule_256 + beq Lschedule_192 + # 128: fall though + +## +## .schedule_128 +## +## 128-bit specific part of key schedule. +## +## This schedule is really simple, because all its parts +## are accomplished by the subroutines. +## +Lschedule_128: + li r0, 10 # mov \$10, %esi + mtctr r0 + +Loop_schedule_128: + bl _vpaes_schedule_round + bdz Lschedule_mangle_last # dec %esi + bl _vpaes_schedule_mangle # write output + b Loop_schedule_128 + +## +## .aes_schedule_192 +## +## 192-bit specific part of key schedule. +## +## The main body of this schedule is the same as the 128-bit +## schedule, but with more smearing. The long, high side is +## stored in %xmm7 as before, and the short, low side is in +## the high bits of %xmm6. +## +## This schedule is somewhat nastier, however, because each +## round produces 192 bits of key material, or 1.5 round keys. +## Therefore, on each cycle we do 2 rounds and produce 3 round +## keys. +## +.align 4 +Lschedule_192: + li r0, 4 # mov \$4, %esi + lvx v0, 0, $inp + ?vperm v0, v6, v0, $inpperm + ?vsldoi v0, v3, v0, 8 # vmovdqu 8(%rdi),%xmm0 # load key part 2 (very unaligned) + bl _vpaes_schedule_transform # input transform + ?vsldoi v6, v0, v9, 8 + ?vsldoi v6, v9, v6, 8 # clobber "low" side with zeros + mtctr r0 + +Loop_schedule_192: + bl _vpaes_schedule_round + ?vsldoi v0, v6, v0, 8 # vpalignr \$8,%xmm6,%xmm0,%xmm0 + bl _vpaes_schedule_mangle # save key n + bl _vpaes_schedule_192_smear + bl _vpaes_schedule_mangle # save key n+1 + bl _vpaes_schedule_round + bdz Lschedule_mangle_last # dec %esi + bl _vpaes_schedule_mangle # save key n+2 + bl _vpaes_schedule_192_smear + b Loop_schedule_192 + +## +## .aes_schedule_256 +## +## 256-bit specific part of key schedule. +## +## The structure here is very similar to the 128-bit +## schedule, but with an additional "low side" in +## %xmm6. The low side's rounds are the same as the +## high side's, except no rcon and no rotation. +## +.align 4 +Lschedule_256: + li r0, 7 # mov \$7, %esi + addi $inp, $inp, 8 + lvx v0, 0, $inp # vmovdqu 16(%rdi),%xmm0 # load key part 2 (unaligned) + ?vperm v0, v6, v0, $inpperm + bl _vpaes_schedule_transform # input transform + mtctr r0 + +Loop_schedule_256: + bl _vpaes_schedule_mangle # output low result + vmr v6, v0 # vmovdqa %xmm0, %xmm6 # save cur_lo in xmm6 + + # high round + bl _vpaes_schedule_round + bdz Lschedule_mangle_last # dec %esi + bl _vpaes_schedule_mangle + + # low round. swap xmm7 and xmm6 + ?vspltw v0, v0, 3 # vpshufd \$0xFF, %xmm0, %xmm0 + vmr v5, v7 # vmovdqa %xmm7, %xmm5 + vmr v7, v6 # vmovdqa %xmm6, %xmm7 + bl _vpaes_schedule_low_round + vmr v7, v5 # vmovdqa %xmm5, %xmm7 + + b Loop_schedule_256 +## +## .aes_schedule_mangle_last +## +## Mangler for last round of key schedule +## Mangles %xmm0 +## when encrypting, outputs out(%xmm0) ^ 63 +## when decrypting, outputs unskew(%xmm0) +## +## Always called right before return... jumps to cleanup and exits +## +.align 4 +Lschedule_mangle_last: + # schedule last round key from xmm0 + li r11, 0x2e0 # lea .Lk_deskew(%rip),%r11 + li r9, 0x2f0 + bne $dir, Lschedule_mangle_last_dec + + # encrypting + lvx v1, r8, r10 # vmovdqa (%r8,%r10),%xmm1 + li r11, 0x2c0 # lea .Lk_opt(%rip), %r11 # prepare to output transform + li r9, 0x2d0 # prepare to output transform + vperm v0, v0, v0, v1 # vpshufb %xmm1, %xmm0, %xmm0 # output permute + + lvx $iptlo, r11, r12 # reload $ipt + lvx $ipthi, r9, r12 + addi $out, $out, 16 # add \$16, %rdx + vxor v0, v0, v26 # vpxor .Lk_s63(%rip), %xmm0, %xmm0 + bl _vpaes_schedule_transform # output transform + + #stvx v0, r0, $out # vmovdqu %xmm0, (%rdx) # save last key + vperm v0, v0, v0, $outperm # rotate right/left + vsel v2, $outhead, v0, $outmask + vmr $outhead, v0 + stvx v2, 0, $out + + addi $out, $out, 15 # 15 is not typo + lvx v1, 0, $out # redundant in aligned case + vsel v1, $outhead, v1, $outmask + stvx v1, 0, $out + b Lschedule_mangle_done + +.align 4 +Lschedule_mangle_last_dec: + lvx $iptlo, r11, r12 # reload $ipt + lvx $ipthi, r9, r12 + addi $out, $out, -16 # add \$-16, %rdx + vxor v0, v0, v26 # vpxor .Lk_s63(%rip), %xmm0, %xmm0 + bl _vpaes_schedule_transform # output transform + + #stvx v0, r0, $out # vmovdqu %xmm0, (%rdx) # save last key + vperm v0, v0, v0, $outperm # rotate right/left + vsel v2, $outhead, v0, $outmask + vmr $outhead, v0 + stvx v2, 0, $out + + addi $out, $out, -15 # -15 is not typo + lvx v1, 0, $out # redundant in aligned case + vsel v1, $outhead, v1, $outmask + stvx v1, 0, $out + +Lschedule_mangle_done: + mtlr r7 + # cleanup + vxor v0, v0, v0 # vpxor %xmm0, %xmm0, %xmm0 + vxor v1, v1, v1 # vpxor %xmm1, %xmm1, %xmm1 + vxor v2, v2, v2 # vpxor %xmm2, %xmm2, %xmm2 + vxor v3, v3, v3 # vpxor %xmm3, %xmm3, %xmm3 + vxor v4, v4, v4 # vpxor %xmm4, %xmm4, %xmm4 + vxor v5, v5, v5 # vpxor %xmm5, %xmm5, %xmm5 + vxor v6, v6, v6 # vpxor %xmm6, %xmm6, %xmm6 + vxor v7, v7, v7 # vpxor %xmm7, %xmm7, %xmm7 + + blr + .long 0 + .byte 0,12,0x14,0,0,0,0,0 + +## +## .aes_schedule_192_smear +## +## Smear the short, low side in the 192-bit key schedule. +## +## Inputs: +## %xmm7: high side, b a x y +## %xmm6: low side, d c 0 0 +## %xmm13: 0 +## +## Outputs: +## %xmm6: b+c+d b+c 0 0 +## %xmm0: b+c+d b+c b a +## +.align 4 +_vpaes_schedule_192_smear: + ?vspltw v0, v7, 3 + ?vsldoi v1, v9, v6, 12 # vpshufd \$0x80, %xmm6, %xmm1 # d c 0 0 -> c 0 0 0 + ?vsldoi v0, v7, v0, 8 # vpshufd \$0xFE, %xmm7, %xmm0 # b a _ _ -> b b b a + vxor v6, v6, v1 # vpxor %xmm1, %xmm6, %xmm6 # -> c+d c 0 0 + vxor v6, v6, v0 # vpxor %xmm0, %xmm6, %xmm6 # -> b+c+d b+c b a + vmr v0, v6 + ?vsldoi v6, v6, v9, 8 + ?vsldoi v6, v9, v6, 8 # clobber low side with zeros + blr + .long 0 + .byte 0,12,0x14,0,0,0,0,0 + +## +## .aes_schedule_round +## +## Runs one main round of the key schedule on %xmm0, %xmm7 +## +## Specifically, runs subbytes on the high dword of %xmm0 +## then rotates it by one byte and xors into the low dword of +## %xmm7. +## +## Adds rcon from low byte of %xmm8, then rotates %xmm8 for +## next rcon. +## +## Smears the dwords of %xmm7 by xoring the low into the +## second low, result into third, result into highest. +## +## Returns results in %xmm7 = %xmm0. +## Clobbers %xmm1-%xmm4, %r11. +## +.align 4 +_vpaes_schedule_round: + # extract rcon from xmm8 + #vxor v4, v4, v4 # vpxor %xmm4, %xmm4, %xmm4 + ?vsldoi v1, $rcon, v9, 15 # vpalignr \$15, %xmm8, %xmm4, %xmm1 + ?vsldoi $rcon, $rcon, $rcon, 15 # vpalignr \$15, %xmm8, %xmm8, %xmm8 + vxor v7, v7, v1 # vpxor %xmm1, %xmm7, %xmm7 + + # rotate + ?vspltw v0, v0, 3 # vpshufd \$0xFF, %xmm0, %xmm0 + ?vsldoi v0, v0, v0, 1 # vpalignr \$1, %xmm0, %xmm0, %xmm0 + + # fall through... + + # low round: same as high round, but no rotation and no rcon. +_vpaes_schedule_low_round: + # smear xmm7 + ?vsldoi v1, v9, v7, 12 # vpslldq \$4, %xmm7, %xmm1 + vxor v7, v7, v1 # vpxor %xmm1, %xmm7, %xmm7 + vspltisb v1, 0x0f # 0x0f..0f + ?vsldoi v4, v9, v7, 8 # vpslldq \$8, %xmm7, %xmm4 + + # subbytes + vand v1, v1, v0 # vpand %xmm9, %xmm0, %xmm1 # 0 = k + vsrb v0, v0, v8 # vpsrlb \$4, %xmm0, %xmm0 # 1 = i + vxor v7, v7, v4 # vpxor %xmm4, %xmm7, %xmm7 + vperm v2, $invhi, v9, v1 # vpshufb %xmm1, %xmm11, %xmm2 # 2 = a/k + vxor v1, v1, v0 # vpxor %xmm0, %xmm1, %xmm1 # 0 = j + vperm v3, $invlo, v9, v0 # vpshufb %xmm0, %xmm10, %xmm3 # 3 = 1/i + vxor v3, v3, v2 # vpxor %xmm2, %xmm3, %xmm3 # 3 = iak = 1/i + a/k + vperm v4, $invlo, v9, v1 # vpshufb %xmm1, %xmm10, %xmm4 # 4 = 1/j + vxor v7, v7, v26 # vpxor .Lk_s63(%rip), %xmm7, %xmm7 + vperm v3, $invlo, v9, v3 # vpshufb %xmm3, %xmm10, %xmm3 # 2 = 1/iak + vxor v4, v4, v2 # vpxor %xmm2, %xmm4, %xmm4 # 4 = jak = 1/j + a/k + vperm v2, $invlo, v9, v4 # vpshufb %xmm4, %xmm10, %xmm2 # 3 = 1/jak + vxor v3, v3, v1 # vpxor %xmm1, %xmm3, %xmm3 # 2 = io + vxor v2, v2, v0 # vpxor %xmm0, %xmm2, %xmm2 # 3 = jo + vperm v4, v15, v9, v3 # vpshufb %xmm3, %xmm13, %xmm4 # 4 = sbou + vperm v1, v14, v9, v2 # vpshufb %xmm2, %xmm12, %xmm1 # 0 = sb1t + vxor v1, v1, v4 # vpxor %xmm4, %xmm1, %xmm1 # 0 = sbox output + + # add in smeared stuff + vxor v0, v1, v7 # vpxor %xmm7, %xmm1, %xmm0 + vxor v7, v1, v7 # vmovdqa %xmm0, %xmm7 + blr + .long 0 + .byte 0,12,0x14,0,0,0,0,0 + +## +## .aes_schedule_transform +## +## Linear-transform %xmm0 according to tables at (%r11) +## +## Requires that %xmm9 = 0x0F0F... as in preheat +## Output in %xmm0 +## Clobbers %xmm2 +## +.align 4 +_vpaes_schedule_transform: + #vand v1, v0, v9 # vpand %xmm9, %xmm0, %xmm1 + vsrb v2, v0, v8 # vpsrlb \$4, %xmm0, %xmm0 + # vmovdqa (%r11), %xmm2 # lo + vperm v0, $iptlo, $iptlo, v0 # vpshufb %xmm1, %xmm2, %xmm2 + # vmovdqa 16(%r11), %xmm1 # hi + vperm v2, $ipthi, $ipthi, v2 # vpshufb %xmm0, %xmm1, %xmm0 + vxor v0, v0, v2 # vpxor %xmm2, %xmm0, %xmm0 + blr + .long 0 + .byte 0,12,0x14,0,0,0,0,0 + +## +## .aes_schedule_mangle +## +## Mangle xmm0 from (basis-transformed) standard version +## to our version. +## +## On encrypt, +## xor with 0x63 +## multiply by circulant 0,1,1,1 +## apply shiftrows transform +## +## On decrypt, +## xor with 0x63 +## multiply by "inverse mixcolumns" circulant E,B,D,9 +## deskew +## apply shiftrows transform +## +## +## Writes out to (%rdx), and increments or decrements it +## Keeps track of round number mod 4 in %r8 +## Preserves xmm0 +## Clobbers xmm1-xmm5 +## +.align 4 +_vpaes_schedule_mangle: + #vmr v4, v0 # vmovdqa %xmm0, %xmm4 # save xmm0 for later + # vmovdqa .Lk_mc_forward(%rip),%xmm5 + bne $dir, Lschedule_mangle_dec + + # encrypting + vxor v4, v0, v26 # vpxor .Lk_s63(%rip), %xmm0, %xmm4 + addi $out, $out, 16 # add \$16, %rdx + vperm v4, v4, v4, v25 # vpshufb %xmm5, %xmm4, %xmm4 + vperm v1, v4, v4, v25 # vpshufb %xmm5, %xmm4, %xmm1 + vperm v3, v1, v1, v25 # vpshufb %xmm5, %xmm1, %xmm3 + vxor v4, v4, v1 # vpxor %xmm1, %xmm4, %xmm4 + lvx v1, r8, r10 # vmovdqa (%r8,%r10), %xmm1 + vxor v3, v3, v4 # vpxor %xmm4, %xmm3, %xmm3 + + vperm v3, v3, v3, v1 # vpshufb %xmm1, %xmm3, %xmm3 + addi r8, r8, -16 # add \$-16, %r8 + andi. r8, r8, 0x30 # and \$0x30, %r8 + + #stvx v3, 0, $out # vmovdqu %xmm3, (%rdx) + vperm v1, v3, v3, $outperm # rotate right/left + vsel v2, $outhead, v1, $outmask + vmr $outhead, v1 + stvx v2, 0, $out + blr + +.align 4 +Lschedule_mangle_dec: + # inverse mix columns + # lea .Lk_dksd(%rip),%r11 + vsrb v1, v0, v8 # vpsrlb \$4, %xmm4, %xmm1 # 1 = hi + #and v4, v0, v9 # vpand %xmm9, %xmm4, %xmm4 # 4 = lo + + # vmovdqa 0x00(%r11), %xmm2 + vperm v2, v16, v16, v0 # vpshufb %xmm4, %xmm2, %xmm2 + # vmovdqa 0x10(%r11), %xmm3 + vperm v3, v17, v17, v1 # vpshufb %xmm1, %xmm3, %xmm3 + vxor v3, v3, v2 # vpxor %xmm2, %xmm3, %xmm3 + vperm v3, v3, v9, v25 # vpshufb %xmm5, %xmm3, %xmm3 + + # vmovdqa 0x20(%r11), %xmm2 + vperm v2, v18, v18, v0 # vpshufb %xmm4, %xmm2, %xmm2 + vxor v2, v2, v3 # vpxor %xmm3, %xmm2, %xmm2 + # vmovdqa 0x30(%r11), %xmm3 + vperm v3, v19, v19, v1 # vpshufb %xmm1, %xmm3, %xmm3 + vxor v3, v3, v2 # vpxor %xmm2, %xmm3, %xmm3 + vperm v3, v3, v9, v25 # vpshufb %xmm5, %xmm3, %xmm3 + + # vmovdqa 0x40(%r11), %xmm2 + vperm v2, v20, v20, v0 # vpshufb %xmm4, %xmm2, %xmm2 + vxor v2, v2, v3 # vpxor %xmm3, %xmm2, %xmm2 + # vmovdqa 0x50(%r11), %xmm3 + vperm v3, v21, v21, v1 # vpshufb %xmm1, %xmm3, %xmm3 + vxor v3, v3, v2 # vpxor %xmm2, %xmm3, %xmm3 + + # vmovdqa 0x60(%r11), %xmm2 + vperm v2, v22, v22, v0 # vpshufb %xmm4, %xmm2, %xmm2 + vperm v3, v3, v9, v25 # vpshufb %xmm5, %xmm3, %xmm3 + # vmovdqa 0x70(%r11), %xmm4 + vperm v4, v23, v23, v1 # vpshufb %xmm1, %xmm4, %xmm4 + lvx v1, r8, r10 # vmovdqa (%r8,%r10), %xmm1 + vxor v2, v2, v3 # vpxor %xmm3, %xmm2, %xmm2 + vxor v3, v4, v2 # vpxor %xmm2, %xmm4, %xmm3 + + addi $out, $out, -16 # add \$-16, %rdx + + vperm v3, v3, v3, v1 # vpshufb %xmm1, %xmm3, %xmm3 + addi r8, r8, -16 # add \$-16, %r8 + andi. r8, r8, 0x30 # and \$0x30, %r8 + + #stvx v3, 0, $out # vmovdqu %xmm3, (%rdx) + vperm v1, v3, v3, $outperm # rotate right/left + vsel v2, $outhead, v1, $outmask + vmr $outhead, v1 + stvx v2, 0, $out + blr + .long 0 + .byte 0,12,0x14,0,0,0,0,0 + +.globl .vpaes_set_encrypt_key +.align 5 +.vpaes_set_encrypt_key: + $STU $sp,-$FRAME($sp) + li r10,`15+6*$SIZE_T` + li r11,`31+6*$SIZE_T` + mflr r0 + mfspr r6, 256 # save vrsave + stvx v20,r10,$sp + addi r10,r10,32 + stvx v21,r11,$sp + addi r11,r11,32 + stvx v22,r10,$sp + addi r10,r10,32 + stvx v23,r11,$sp + addi r11,r11,32 + stvx v24,r10,$sp + addi r10,r10,32 + stvx v25,r11,$sp + addi r11,r11,32 + stvx v26,r10,$sp + addi r10,r10,32 + stvx v27,r11,$sp + addi r11,r11,32 + stvx v28,r10,$sp + addi r10,r10,32 + stvx v29,r11,$sp + addi r11,r11,32 + stvx v30,r10,$sp + stvx v31,r11,$sp + stw r6,`$FRAME-4`($sp) # save vrsave + li r7, -1 + $PUSH r0, `$FRAME+$LRSAVE`($sp) + mtspr 256, r7 # preserve all AltiVec registers + + srwi r9, $bits, 5 # shr \$5,%eax + addi r9, r9, 6 # add \$5,%eax + stw r9, 240($out) # mov %eax,240(%rdx) # AES_KEY->rounds = nbits/32+5; + + cmplw $dir, $bits, $bits # set encrypt direction + li r8, 0x30 # mov \$0x30,%r8d + bl _vpaes_schedule_core + + $POP r0, `$FRAME+$LRSAVE`($sp) + li r10,`15+6*$SIZE_T` + li r11,`31+6*$SIZE_T` + mtspr 256, r6 # restore vrsave + mtlr r0 + xor r3, r3, r3 + lvx v20,r10,$sp + addi r10,r10,32 + lvx v21,r11,$sp + addi r11,r11,32 + lvx v22,r10,$sp + addi r10,r10,32 + lvx v23,r11,$sp + addi r11,r11,32 + lvx v24,r10,$sp + addi r10,r10,32 + lvx v25,r11,$sp + addi r11,r11,32 + lvx v26,r10,$sp + addi r10,r10,32 + lvx v27,r11,$sp + addi r11,r11,32 + lvx v28,r10,$sp + addi r10,r10,32 + lvx v29,r11,$sp + addi r11,r11,32 + lvx v30,r10,$sp + lvx v31,r11,$sp + addi $sp,$sp,$FRAME + blr + .long 0 + .byte 0,12,0x04,1,0x80,0,3,0 + .long 0 +.size .vpaes_set_encrypt_key,.-.vpaes_set_encrypt_key + +.globl .vpaes_set_decrypt_key +.align 4 +.vpaes_set_decrypt_key: + $STU $sp,-$FRAME($sp) + li r10,`15+6*$SIZE_T` + li r11,`31+6*$SIZE_T` + mflr r0 + mfspr r6, 256 # save vrsave + stvx v20,r10,$sp + addi r10,r10,32 + stvx v21,r11,$sp + addi r11,r11,32 + stvx v22,r10,$sp + addi r10,r10,32 + stvx v23,r11,$sp + addi r11,r11,32 + stvx v24,r10,$sp + addi r10,r10,32 + stvx v25,r11,$sp + addi r11,r11,32 + stvx v26,r10,$sp + addi r10,r10,32 + stvx v27,r11,$sp + addi r11,r11,32 + stvx v28,r10,$sp + addi r10,r10,32 + stvx v29,r11,$sp + addi r11,r11,32 + stvx v30,r10,$sp + stvx v31,r11,$sp + stw r6,`$FRAME-4`($sp) # save vrsave + li r7, -1 + $PUSH r0, `$FRAME+$LRSAVE`($sp) + mtspr 256, r7 # preserve all AltiVec registers + + srwi r9, $bits, 5 # shr \$5,%eax + addi r9, r9, 6 # add \$5,%eax + stw r9, 240($out) # mov %eax,240(%rdx) # AES_KEY->rounds = nbits/32+5; + + slwi r9, r9, 4 # shl \$4,%eax + add $out, $out, r9 # lea (%rdx,%rax),%rdx + + cmplwi $dir, $bits, 0 # set decrypt direction + srwi r8, $bits, 1 # shr \$1,%r8d + andi. r8, r8, 32 # and \$32,%r8d + xori r8, r8, 32 # xor \$32,%r8d # nbits==192?0:32 + bl _vpaes_schedule_core + + $POP r0, `$FRAME+$LRSAVE`($sp) + li r10,`15+6*$SIZE_T` + li r11,`31+6*$SIZE_T` + mtspr 256, r6 # restore vrsave + mtlr r0 + xor r3, r3, r3 + lvx v20,r10,$sp + addi r10,r10,32 + lvx v21,r11,$sp + addi r11,r11,32 + lvx v22,r10,$sp + addi r10,r10,32 + lvx v23,r11,$sp + addi r11,r11,32 + lvx v24,r10,$sp + addi r10,r10,32 + lvx v25,r11,$sp + addi r11,r11,32 + lvx v26,r10,$sp + addi r10,r10,32 + lvx v27,r11,$sp + addi r11,r11,32 + lvx v28,r10,$sp + addi r10,r10,32 + lvx v29,r11,$sp + addi r11,r11,32 + lvx v30,r10,$sp + lvx v31,r11,$sp + addi $sp,$sp,$FRAME + blr + .long 0 + .byte 0,12,0x04,1,0x80,0,3,0 + .long 0 +.size .vpaes_set_decrypt_key,.-.vpaes_set_decrypt_key +___ +} + +my $consts=1; +foreach (split("\n",$code)) { + s/\`([^\`]*)\`/eval $1/geo; + + # constants table endian-specific conversion + if ($consts && m/\.long\s+(.+)\s+(\?[a-z]*)$/o) { + my $conv=$2; + my @bytes=(); + + # convert to endian-agnostic format + foreach (split(/,\s+/,$1)) { + my $l = /^0/?oct:int; + push @bytes,($l>>24)&0xff,($l>>16)&0xff,($l>>8)&0xff,$l&0xff; + } + + # little-endian conversion + if ($flavour =~ /le$/o) { + SWITCH: for($conv) { + /\?inv/ && do { @bytes=map($_^0xf,@bytes); last; }; + /\?rev/ && do { @bytes=reverse(@bytes); last; }; + } + } + + #emit + print ".byte\t",join(',',map (sprintf("0x%02x",$_),@bytes)),"\n"; + next; + } + $consts=0 if (m/Lconsts:/o); # end of table + + # instructions prefixed with '?' are endian-specific and need + # to be adjusted accordingly... + if ($flavour =~ /le$/o) { # little-endian + s/\?lvsr/lvsl/o or + s/\?lvsl/lvsr/o or + s/\?(vperm\s+v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+)/$1$3$2$4/o or + s/\?(vsldoi\s+v[0-9]+,\s*)(v[0-9]+,)\s*(v[0-9]+,\s*)([0-9]+)/$1$3$2 16-$4/o or + s/\?(vspltw\s+v[0-9]+,\s*)(v[0-9]+,)\s*([0-9])/$1$2 3-$3/o; + } else { # big-endian + s/\?([a-z]+)/$1/o; + } + + print $_,"\n"; +} + +close STDOUT; diff --git a/deps/openssl/openssl/crypto/aes/asm/vpaes-x86.pl b/deps/openssl/openssl/crypto/aes/asm/vpaes-x86.pl index 1533e2c3042c7f..2ba149c3f9d599 100644 --- a/deps/openssl/openssl/crypto/aes/asm/vpaes-x86.pl +++ b/deps/openssl/openssl/crypto/aes/asm/vpaes-x86.pl @@ -27,9 +27,10 @@ # # aes-586.pl vpaes-x86.pl # -# Core 2(**) 29.1/42.3/18.3 22.0/25.6(***) -# Nehalem 27.9/40.4/18.1 10.3/12.0 -# Atom 102./119./60.1 64.5/85.3(***) +# Core 2(**) 28.1/41.4/18.3 21.9/25.2(***) +# Nehalem 27.9/40.4/18.1 10.2/11.9 +# Atom 70.7/92.1/60.1 61.1/75.4(***) +# Silvermont 45.4/62.9/24.1 49.2/61.1(***) # # (*) "Hyper-threading" in the context refers rather to cache shared # among multiple cores, than to specifically Intel HTT. As vast @@ -40,8 +41,8 @@ # (**) "Core 2" refers to initial 65nm design, a.k.a. Conroe. # # (***) Less impressive improvement on Core 2 and Atom is due to slow -# pshufb, yet it's respectable +32%/65% improvement on Core 2 -# and +58%/40% on Atom (as implied, over "hyper-threading-safe" +# pshufb, yet it's respectable +28%/64% improvement on Core 2 +# and +15% on Atom (as implied, over "hyper-threading-safe" # code path). # # @@ -183,35 +184,35 @@ &movdqa ("xmm1","xmm6") &movdqa ("xmm2",&QWP($k_ipt,$const)); &pandn ("xmm1","xmm0"); - &movdqu ("xmm5",&QWP(0,$key)); - &psrld ("xmm1",4); &pand ("xmm0","xmm6"); + &movdqu ("xmm5",&QWP(0,$key)); &pshufb ("xmm2","xmm0"); &movdqa ("xmm0",&QWP($k_ipt+16,$const)); - &pshufb ("xmm0","xmm1"); &pxor ("xmm2","xmm5"); - &pxor ("xmm0","xmm2"); + &psrld ("xmm1",4); &add ($key,16); + &pshufb ("xmm0","xmm1"); &lea ($base,&DWP($k_mc_backward,$const)); + &pxor ("xmm0","xmm2"); &jmp (&label("enc_entry")); &set_label("enc_loop",16); # middle of middle round &movdqa ("xmm4",&QWP($k_sb1,$const)); # 4 : sb1u - &pshufb ("xmm4","xmm2"); # 4 = sb1u - &pxor ("xmm4","xmm5"); # 4 = sb1u + k &movdqa ("xmm0",&QWP($k_sb1+16,$const));# 0 : sb1t + &pshufb ("xmm4","xmm2"); # 4 = sb1u &pshufb ("xmm0","xmm3"); # 0 = sb1t - &pxor ("xmm0","xmm4"); # 0 = A + &pxor ("xmm4","xmm5"); # 4 = sb1u + k &movdqa ("xmm5",&QWP($k_sb2,$const)); # 4 : sb2u - &pshufb ("xmm5","xmm2"); # 4 = sb2u + &pxor ("xmm0","xmm4"); # 0 = A &movdqa ("xmm1",&QWP(-0x40,$base,$magic));# .Lk_mc_forward[] + &pshufb ("xmm5","xmm2"); # 4 = sb2u &movdqa ("xmm2",&QWP($k_sb2+16,$const));# 2 : sb2t - &pshufb ("xmm2","xmm3"); # 2 = sb2t - &pxor ("xmm2","xmm5"); # 2 = 2A &movdqa ("xmm4",&QWP(0,$base,$magic)); # .Lk_mc_backward[] + &pshufb ("xmm2","xmm3"); # 2 = sb2t &movdqa ("xmm3","xmm0"); # 3 = A + &pxor ("xmm2","xmm5"); # 2 = 2A &pshufb ("xmm0","xmm1"); # 0 = B &add ($key,16); # next key &pxor ("xmm0","xmm2"); # 0 = 2A+B @@ -220,30 +221,30 @@ &pxor ("xmm3","xmm0"); # 3 = 2A+B+D &pshufb ("xmm0","xmm1"); # 0 = 2B+C &and ($magic,0x30); # ... mod 4 - &pxor ("xmm0","xmm3"); # 0 = 2A+3B+C+D &sub ($round,1); # nr-- + &pxor ("xmm0","xmm3"); # 0 = 2A+3B+C+D &set_label("enc_entry"); # top of round &movdqa ("xmm1","xmm6"); # 1 : i + &movdqa ("xmm5",&QWP($k_inv+16,$const));# 2 : a/k &pandn ("xmm1","xmm0"); # 1 = i<<4 &psrld ("xmm1",4); # 1 = i &pand ("xmm0","xmm6"); # 0 = k - &movdqa ("xmm5",&QWP($k_inv+16,$const));# 2 : a/k &pshufb ("xmm5","xmm0"); # 2 = a/k - &pxor ("xmm0","xmm1"); # 0 = j &movdqa ("xmm3","xmm7"); # 3 : 1/i + &pxor ("xmm0","xmm1"); # 0 = j &pshufb ("xmm3","xmm1"); # 3 = 1/i - &pxor ("xmm3","xmm5"); # 3 = iak = 1/i + a/k &movdqa ("xmm4","xmm7"); # 4 : 1/j + &pxor ("xmm3","xmm5"); # 3 = iak = 1/i + a/k &pshufb ("xmm4","xmm0"); # 4 = 1/j - &pxor ("xmm4","xmm5"); # 4 = jak = 1/j + a/k &movdqa ("xmm2","xmm7"); # 2 : 1/iak + &pxor ("xmm4","xmm5"); # 4 = jak = 1/j + a/k &pshufb ("xmm2","xmm3"); # 2 = 1/iak - &pxor ("xmm2","xmm0"); # 2 = io &movdqa ("xmm3","xmm7"); # 3 : 1/jak - &movdqu ("xmm5",&QWP(0,$key)); + &pxor ("xmm2","xmm0"); # 2 = io &pshufb ("xmm3","xmm4"); # 3 = 1/jak + &movdqu ("xmm5",&QWP(0,$key)); &pxor ("xmm3","xmm1"); # 3 = jo &jnz (&label("enc_loop")); @@ -265,8 +266,8 @@ ## Same API as encryption core. ## &function_begin_B("_vpaes_decrypt_core"); - &mov ($round,&DWP(240,$key)); &lea ($base,&DWP($k_dsbd,$const)); + &mov ($round,&DWP(240,$key)); &movdqa ("xmm1","xmm6"); &movdqa ("xmm2",&QWP($k_dipt-$k_dsbd,$base)); &pandn ("xmm1","xmm0"); @@ -292,62 +293,61 @@ ## Inverse mix columns ## &movdqa ("xmm4",&QWP(-0x20,$base)); # 4 : sb9u + &movdqa ("xmm1",&QWP(-0x10,$base)); # 0 : sb9t &pshufb ("xmm4","xmm2"); # 4 = sb9u - &pxor ("xmm4","xmm0"); - &movdqa ("xmm0",&QWP(-0x10,$base)); # 0 : sb9t - &pshufb ("xmm0","xmm3"); # 0 = sb9t - &pxor ("xmm0","xmm4"); # 0 = ch - &add ($key,16); # next round key - - &pshufb ("xmm0","xmm5"); # MC ch + &pshufb ("xmm1","xmm3"); # 0 = sb9t + &pxor ("xmm0","xmm4"); &movdqa ("xmm4",&QWP(0,$base)); # 4 : sbdu - &pshufb ("xmm4","xmm2"); # 4 = sbdu - &pxor ("xmm4","xmm0"); # 4 = ch - &movdqa ("xmm0",&QWP(0x10,$base)); # 0 : sbdt - &pshufb ("xmm0","xmm3"); # 0 = sbdt - &pxor ("xmm0","xmm4"); # 0 = ch - &sub ($round,1); # nr-- + &pxor ("xmm0","xmm1"); # 0 = ch + &movdqa ("xmm1",&QWP(0x10,$base)); # 0 : sbdt + &pshufb ("xmm4","xmm2"); # 4 = sbdu &pshufb ("xmm0","xmm5"); # MC ch + &pshufb ("xmm1","xmm3"); # 0 = sbdt + &pxor ("xmm0","xmm4"); # 4 = ch &movdqa ("xmm4",&QWP(0x20,$base)); # 4 : sbbu - &pshufb ("xmm4","xmm2"); # 4 = sbbu - &pxor ("xmm4","xmm0"); # 4 = ch - &movdqa ("xmm0",&QWP(0x30,$base)); # 0 : sbbt - &pshufb ("xmm0","xmm3"); # 0 = sbbt - &pxor ("xmm0","xmm4"); # 0 = ch + &pxor ("xmm0","xmm1"); # 0 = ch + &movdqa ("xmm1",&QWP(0x30,$base)); # 0 : sbbt + &pshufb ("xmm4","xmm2"); # 4 = sbbu &pshufb ("xmm0","xmm5"); # MC ch + &pshufb ("xmm1","xmm3"); # 0 = sbbt + &pxor ("xmm0","xmm4"); # 4 = ch &movdqa ("xmm4",&QWP(0x40,$base)); # 4 : sbeu - &pshufb ("xmm4","xmm2"); # 4 = sbeu - &pxor ("xmm4","xmm0"); # 4 = ch - &movdqa ("xmm0",&QWP(0x50,$base)); # 0 : sbet - &pshufb ("xmm0","xmm3"); # 0 = sbet - &pxor ("xmm0","xmm4"); # 0 = ch + &pxor ("xmm0","xmm1"); # 0 = ch + &movdqa ("xmm1",&QWP(0x50,$base)); # 0 : sbet + &pshufb ("xmm4","xmm2"); # 4 = sbeu + &pshufb ("xmm0","xmm5"); # MC ch + &pshufb ("xmm1","xmm3"); # 0 = sbet + &pxor ("xmm0","xmm4"); # 4 = ch + &add ($key,16); # next round key &palignr("xmm5","xmm5",12); + &pxor ("xmm0","xmm1"); # 0 = ch + &sub ($round,1); # nr-- &set_label("dec_entry"); # top of round &movdqa ("xmm1","xmm6"); # 1 : i + &movdqa ("xmm2",&QWP($k_inv+16,$const));# 2 : a/k &pandn ("xmm1","xmm0"); # 1 = i<<4 - &psrld ("xmm1",4); # 1 = i &pand ("xmm0","xmm6"); # 0 = k - &movdqa ("xmm2",&QWP($k_inv+16,$const));# 2 : a/k + &psrld ("xmm1",4); # 1 = i &pshufb ("xmm2","xmm0"); # 2 = a/k - &pxor ("xmm0","xmm1"); # 0 = j &movdqa ("xmm3","xmm7"); # 3 : 1/i + &pxor ("xmm0","xmm1"); # 0 = j &pshufb ("xmm3","xmm1"); # 3 = 1/i - &pxor ("xmm3","xmm2"); # 3 = iak = 1/i + a/k &movdqa ("xmm4","xmm7"); # 4 : 1/j + &pxor ("xmm3","xmm2"); # 3 = iak = 1/i + a/k &pshufb ("xmm4","xmm0"); # 4 = 1/j &pxor ("xmm4","xmm2"); # 4 = jak = 1/j + a/k &movdqa ("xmm2","xmm7"); # 2 : 1/iak &pshufb ("xmm2","xmm3"); # 2 = 1/iak - &pxor ("xmm2","xmm0"); # 2 = io &movdqa ("xmm3","xmm7"); # 3 : 1/jak + &pxor ("xmm2","xmm0"); # 2 = io &pshufb ("xmm3","xmm4"); # 3 = 1/jak - &pxor ("xmm3","xmm1"); # 3 = jo &movdqu ("xmm0",&QWP(0,$key)); + &pxor ("xmm3","xmm1"); # 3 = jo &jnz (&label("dec_loop")); # middle of last round @@ -542,12 +542,12 @@ ## %xmm0: b+c+d b+c b a ## &function_begin_B("_vpaes_schedule_192_smear"); - &pshufd ("xmm0","xmm6",0x80); # d c 0 0 -> c 0 0 0 - &pxor ("xmm6","xmm0"); # -> c+d c 0 0 + &pshufd ("xmm1","xmm6",0x80); # d c 0 0 -> c 0 0 0 &pshufd ("xmm0","xmm7",0xFE); # b a _ _ -> b b b a + &pxor ("xmm6","xmm1"); # -> c+d c 0 0 + &pxor ("xmm1","xmm1"); &pxor ("xmm6","xmm0"); # -> b+c+d b+c b a &movdqa ("xmm0","xmm6"); - &pxor ("xmm1","xmm1"); &movhlps("xmm6","xmm1"); # clobber low side with zeros &ret (); &function_end_B("_vpaes_schedule_192_smear"); diff --git a/deps/openssl/openssl/crypto/aes/asm/vpaes-x86_64.pl b/deps/openssl/openssl/crypto/aes/asm/vpaes-x86_64.pl index bd7f45b8509174..f2ef318fae4e13 100644 --- a/deps/openssl/openssl/crypto/aes/asm/vpaes-x86_64.pl +++ b/deps/openssl/openssl/crypto/aes/asm/vpaes-x86_64.pl @@ -27,9 +27,10 @@ # # aes-x86_64.pl vpaes-x86_64.pl # -# Core 2(**) 30.5/43.7/14.3 21.8/25.7(***) -# Nehalem 30.5/42.2/14.6 9.8/11.8 -# Atom 63.9/79.0/32.1 64.0/84.8(***) +# Core 2(**) 29.6/41.1/14.3 21.9/25.2(***) +# Nehalem 29.6/40.3/14.6 10.0/11.8 +# Atom 57.3/74.2/32.1 60.9/77.2(***) +# Silvermont 52.7/64.0/19.5 48.8/60.8(***) # # (*) "Hyper-threading" in the context refers rather to cache shared # among multiple cores, than to specifically Intel HTT. As vast @@ -40,7 +41,7 @@ # (**) "Core 2" refers to initial 65nm design, a.k.a. Conroe. # # (***) Less impressive improvement on Core 2 and Atom is due to slow -# pshufb, yet it's respectable +40%/78% improvement on Core 2 +# pshufb, yet it's respectable +36%/62% improvement on Core 2 # (as implied, over "hyper-threading-safe" code path). # # @@ -95,8 +96,8 @@ movdqa .Lk_ipt+16(%rip), %xmm0 # ipthi pshufb %xmm1, %xmm0 pxor %xmm5, %xmm2 - pxor %xmm2, %xmm0 add \$16, %r9 + pxor %xmm2, %xmm0 lea .Lk_mc_backward(%rip),%r10 jmp .Lenc_entry @@ -104,19 +105,19 @@ .Lenc_loop: # middle of middle round movdqa %xmm13, %xmm4 # 4 : sb1u - pshufb %xmm2, %xmm4 # 4 = sb1u - pxor %xmm5, %xmm4 # 4 = sb1u + k movdqa %xmm12, %xmm0 # 0 : sb1t + pshufb %xmm2, %xmm4 # 4 = sb1u pshufb %xmm3, %xmm0 # 0 = sb1t - pxor %xmm4, %xmm0 # 0 = A + pxor %xmm5, %xmm4 # 4 = sb1u + k movdqa %xmm15, %xmm5 # 4 : sb2u - pshufb %xmm2, %xmm5 # 4 = sb2u + pxor %xmm4, %xmm0 # 0 = A movdqa -0x40(%r11,%r10), %xmm1 # .Lk_mc_forward[] + pshufb %xmm2, %xmm5 # 4 = sb2u + movdqa (%r11,%r10), %xmm4 # .Lk_mc_backward[] movdqa %xmm14, %xmm2 # 2 : sb2t pshufb %xmm3, %xmm2 # 2 = sb2t - pxor %xmm5, %xmm2 # 2 = 2A - movdqa (%r11,%r10), %xmm4 # .Lk_mc_backward[] movdqa %xmm0, %xmm3 # 3 = A + pxor %xmm5, %xmm2 # 2 = 2A pshufb %xmm1, %xmm0 # 0 = B add \$16, %r9 # next key pxor %xmm2, %xmm0 # 0 = 2A+B @@ -125,30 +126,30 @@ pxor %xmm0, %xmm3 # 3 = 2A+B+D pshufb %xmm1, %xmm0 # 0 = 2B+C and \$0x30, %r11 # ... mod 4 - pxor %xmm3, %xmm0 # 0 = 2A+3B+C+D sub \$1,%rax # nr-- + pxor %xmm3, %xmm0 # 0 = 2A+3B+C+D .Lenc_entry: # top of round movdqa %xmm9, %xmm1 # 1 : i + movdqa %xmm11, %xmm5 # 2 : a/k pandn %xmm0, %xmm1 # 1 = i<<4 psrld \$4, %xmm1 # 1 = i pand %xmm9, %xmm0 # 0 = k - movdqa %xmm11, %xmm5 # 2 : a/k pshufb %xmm0, %xmm5 # 2 = a/k - pxor %xmm1, %xmm0 # 0 = j movdqa %xmm10, %xmm3 # 3 : 1/i + pxor %xmm1, %xmm0 # 0 = j pshufb %xmm1, %xmm3 # 3 = 1/i - pxor %xmm5, %xmm3 # 3 = iak = 1/i + a/k movdqa %xmm10, %xmm4 # 4 : 1/j + pxor %xmm5, %xmm3 # 3 = iak = 1/i + a/k pshufb %xmm0, %xmm4 # 4 = 1/j - pxor %xmm5, %xmm4 # 4 = jak = 1/j + a/k movdqa %xmm10, %xmm2 # 2 : 1/iak + pxor %xmm5, %xmm4 # 4 = jak = 1/j + a/k pshufb %xmm3, %xmm2 # 2 = 1/iak - pxor %xmm0, %xmm2 # 2 = io movdqa %xmm10, %xmm3 # 3 : 1/jak - movdqu (%r9), %xmm5 + pxor %xmm0, %xmm2 # 2 = io pshufb %xmm4, %xmm3 # 3 = 1/jak + movdqu (%r9), %xmm5 pxor %xmm1, %xmm3 # 3 = jo jnz .Lenc_loop @@ -201,62 +202,61 @@ ## Inverse mix columns ## movdqa -0x20(%r10),%xmm4 # 4 : sb9u + movdqa -0x10(%r10),%xmm1 # 0 : sb9t pshufb %xmm2, %xmm4 # 4 = sb9u - pxor %xmm0, %xmm4 - movdqa -0x10(%r10),%xmm0 # 0 : sb9t - pshufb %xmm3, %xmm0 # 0 = sb9t - pxor %xmm4, %xmm0 # 0 = ch - add \$16, %r9 # next round key - - pshufb %xmm5, %xmm0 # MC ch + pshufb %xmm3, %xmm1 # 0 = sb9t + pxor %xmm4, %xmm0 movdqa 0x00(%r10),%xmm4 # 4 : sbdu + pxor %xmm1, %xmm0 # 0 = ch + movdqa 0x10(%r10),%xmm1 # 0 : sbdt + pshufb %xmm2, %xmm4 # 4 = sbdu - pxor %xmm0, %xmm4 # 4 = ch - movdqa 0x10(%r10),%xmm0 # 0 : sbdt - pshufb %xmm3, %xmm0 # 0 = sbdt - pxor %xmm4, %xmm0 # 0 = ch - sub \$1,%rax # nr-- - pshufb %xmm5, %xmm0 # MC ch + pshufb %xmm3, %xmm1 # 0 = sbdt + pxor %xmm4, %xmm0 # 4 = ch movdqa 0x20(%r10),%xmm4 # 4 : sbbu + pxor %xmm1, %xmm0 # 0 = ch + movdqa 0x30(%r10),%xmm1 # 0 : sbbt + pshufb %xmm2, %xmm4 # 4 = sbbu - pxor %xmm0, %xmm4 # 4 = ch - movdqa 0x30(%r10),%xmm0 # 0 : sbbt - pshufb %xmm3, %xmm0 # 0 = sbbt - pxor %xmm4, %xmm0 # 0 = ch - pshufb %xmm5, %xmm0 # MC ch + pshufb %xmm3, %xmm1 # 0 = sbbt + pxor %xmm4, %xmm0 # 4 = ch movdqa 0x40(%r10),%xmm4 # 4 : sbeu - pshufb %xmm2, %xmm4 # 4 = sbeu - pxor %xmm0, %xmm4 # 4 = ch - movdqa 0x50(%r10),%xmm0 # 0 : sbet - pshufb %xmm3, %xmm0 # 0 = sbet - pxor %xmm4, %xmm0 # 0 = ch + pxor %xmm1, %xmm0 # 0 = ch + movdqa 0x50(%r10),%xmm1 # 0 : sbet + pshufb %xmm2, %xmm4 # 4 = sbeu + pshufb %xmm5, %xmm0 # MC ch + pshufb %xmm3, %xmm1 # 0 = sbet + pxor %xmm4, %xmm0 # 4 = ch + add \$16, %r9 # next round key palignr \$12, %xmm5, %xmm5 - + pxor %xmm1, %xmm0 # 0 = ch + sub \$1,%rax # nr-- + .Ldec_entry: # top of round movdqa %xmm9, %xmm1 # 1 : i pandn %xmm0, %xmm1 # 1 = i<<4 + movdqa %xmm11, %xmm2 # 2 : a/k psrld \$4, %xmm1 # 1 = i pand %xmm9, %xmm0 # 0 = k - movdqa %xmm11, %xmm2 # 2 : a/k pshufb %xmm0, %xmm2 # 2 = a/k - pxor %xmm1, %xmm0 # 0 = j movdqa %xmm10, %xmm3 # 3 : 1/i + pxor %xmm1, %xmm0 # 0 = j pshufb %xmm1, %xmm3 # 3 = 1/i - pxor %xmm2, %xmm3 # 3 = iak = 1/i + a/k movdqa %xmm10, %xmm4 # 4 : 1/j + pxor %xmm2, %xmm3 # 3 = iak = 1/i + a/k pshufb %xmm0, %xmm4 # 4 = 1/j pxor %xmm2, %xmm4 # 4 = jak = 1/j + a/k movdqa %xmm10, %xmm2 # 2 : 1/iak pshufb %xmm3, %xmm2 # 2 = 1/iak - pxor %xmm0, %xmm2 # 2 = io movdqa %xmm10, %xmm3 # 3 : 1/jak + pxor %xmm0, %xmm2 # 2 = io pshufb %xmm4, %xmm3 # 3 = 1/jak - pxor %xmm1, %xmm3 # 3 = jo movdqu (%r9), %xmm0 + pxor %xmm1, %xmm3 # 3 = jo jnz .Ldec_loop # middle of last round @@ -464,12 +464,12 @@ .type _vpaes_schedule_192_smear,\@abi-omnipotent .align 16 _vpaes_schedule_192_smear: - pshufd \$0x80, %xmm6, %xmm0 # d c 0 0 -> c 0 0 0 - pxor %xmm0, %xmm6 # -> c+d c 0 0 + pshufd \$0x80, %xmm6, %xmm1 # d c 0 0 -> c 0 0 0 pshufd \$0xFE, %xmm7, %xmm0 # b a _ _ -> b b b a + pxor %xmm1, %xmm6 # -> c+d c 0 0 + pxor %xmm1, %xmm1 pxor %xmm0, %xmm6 # -> b+c+d b+c b a movdqa %xmm6, %xmm0 - pxor %xmm1, %xmm1 movhlps %xmm1, %xmm6 # clobber low side with zeros ret .size _vpaes_schedule_192_smear,.-_vpaes_schedule_192_smear diff --git a/deps/openssl/openssl/crypto/arm64cpuid.S b/deps/openssl/openssl/crypto/arm64cpuid.S new file mode 100644 index 00000000000000..4778ac1deacc30 --- /dev/null +++ b/deps/openssl/openssl/crypto/arm64cpuid.S @@ -0,0 +1,46 @@ +#include "arm_arch.h" + +.text +.arch armv8-a+crypto + +.align 5 +.global _armv7_neon_probe +.type _armv7_neon_probe,%function +_armv7_neon_probe: + orr v15.16b, v15.16b, v15.16b + ret +.size _armv7_neon_probe,.-_armv7_neon_probe + +.global _armv7_tick +.type _armv7_tick,%function +_armv7_tick: + mrs x0, CNTVCT_EL0 + ret +.size _armv7_tick,.-_armv7_tick + +.global _armv8_aes_probe +.type _armv8_aes_probe,%function +_armv8_aes_probe: + aese v0.16b, v0.16b + ret +.size _armv8_aes_probe,.-_armv8_aes_probe + +.global _armv8_sha1_probe +.type _armv8_sha1_probe,%function +_armv8_sha1_probe: + sha1h s0, s0 + ret +.size _armv8_sha1_probe,.-_armv8_sha1_probe + +.global _armv8_sha256_probe +.type _armv8_sha256_probe,%function +_armv8_sha256_probe: + sha256su0 v0.4s, v0.4s + ret +.size _armv8_sha256_probe,.-_armv8_sha256_probe +.global _armv8_pmull_probe +.type _armv8_pmull_probe,%function +_armv8_pmull_probe: + pmull v0.1q, v0.1d, v0.1d + ret +.size _armv8_pmull_probe,.-_armv8_pmull_probe diff --git a/deps/openssl/openssl/crypto/arm_arch.h b/deps/openssl/openssl/crypto/arm_arch.h index b654371625960d..9d6e58880d0e60 100644 --- a/deps/openssl/openssl/crypto/arm_arch.h +++ b/deps/openssl/openssl/crypto/arm_arch.h @@ -10,13 +10,24 @@ # define __ARMEL__ # endif # elif defined(__GNUC__) +# if defined(__aarch64__) +# define __ARM_ARCH__ 8 +# if __BYTE_ORDER__==__ORDER_BIG_ENDIAN__ +# define __ARMEB__ +# else +# define __ARMEL__ +# endif /* * Why doesn't gcc define __ARM_ARCH__? Instead it defines * bunch of below macros. See all_architectires[] table in * gcc/config/arm/arm.c. On a side note it defines * __ARMEL__/__ARMEB__ for little-/big-endian. */ -# if defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || \ +# elif defined(__ARM_ARCH) +# define __ARM_ARCH__ __ARM_ARCH +# elif defined(__ARM_ARCH_8A__) +# define __ARM_ARCH__ 8 +# elif defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || \ defined(__ARM_ARCH_7R__)|| defined(__ARM_ARCH_7M__) || \ defined(__ARM_ARCH_7EM__) # define __ARM_ARCH__ 7 @@ -41,11 +52,27 @@ # include # endif +# if !defined(__ARM_MAX_ARCH__) +# define __ARM_MAX_ARCH__ __ARM_ARCH__ +# endif + +# if __ARM_MAX_ARCH__<__ARM_ARCH__ +# error "__ARM_MAX_ARCH__ can't be less than __ARM_ARCH__" +# elif __ARM_MAX_ARCH__!=__ARM_ARCH__ +# if __ARM_ARCH__<7 && __ARM_MAX_ARCH__>=7 && defined(__ARMEB__) +# error "can't build universal big-endian binary" +# endif +# endif + # if !__ASSEMBLER__ extern unsigned int OPENSSL_armcap_P; - -# define ARMV7_NEON (1<<0) -# define ARMV7_TICK (1<<1) # endif +# define ARMV7_NEON (1<<0) +# define ARMV7_TICK (1<<1) +# define ARMV8_AES (1<<2) +# define ARMV8_SHA1 (1<<3) +# define ARMV8_SHA256 (1<<4) +# define ARMV8_PMULL (1<<5) + #endif diff --git a/deps/openssl/openssl/crypto/armcap.c b/deps/openssl/openssl/crypto/armcap.c index 28522ea8678454..356fa152871fac 100644 --- a/deps/openssl/openssl/crypto/armcap.c +++ b/deps/openssl/openssl/crypto/armcap.c @@ -7,8 +7,18 @@ #include "arm_arch.h" -unsigned int OPENSSL_armcap_P; +unsigned int OPENSSL_armcap_P = 0; +#if __ARM_MAX_ARCH__<7 +void OPENSSL_cpuid_setup(void) +{ +} + +unsigned long OPENSSL_rdtsc(void) +{ + return 0; +} +#else static sigset_t all_masked; static sigjmp_buf ill_jmp; @@ -22,9 +32,13 @@ static void ill_handler(int sig) * ARM compilers support inline assembler... */ void _armv7_neon_probe(void); -unsigned int _armv7_tick(void); +void _armv8_aes_probe(void); +void _armv8_sha1_probe(void); +void _armv8_sha256_probe(void); +void _armv8_pmull_probe(void); +unsigned long _armv7_tick(void); -unsigned int OPENSSL_rdtsc(void) +unsigned long OPENSSL_rdtsc(void) { if (OPENSSL_armcap_P & ARMV7_TICK) return _armv7_tick(); @@ -32,9 +46,44 @@ unsigned int OPENSSL_rdtsc(void) return 0; } -#if defined(__GNUC__) && __GNUC__>=2 +/* + * Use a weak reference to getauxval() so we can use it if it is available but + * don't break the build if it is not. + */ +# if defined(__GNUC__) && __GNUC__>=2 void OPENSSL_cpuid_setup(void) __attribute__ ((constructor)); -#endif +extern unsigned long getauxval(unsigned long type) __attribute__ ((weak)); +# else +static unsigned long (*getauxval) (unsigned long) = NULL; +# endif + +/* + * ARM puts the the feature bits for Crypto Extensions in AT_HWCAP2, whereas + * AArch64 used AT_HWCAP. + */ +# if defined(__arm__) || defined (__arm) +# define HWCAP 16 + /* AT_HWCAP */ +# define HWCAP_NEON (1 << 12) + +# define HWCAP_CE 26 + /* AT_HWCAP2 */ +# define HWCAP_CE_AES (1 << 0) +# define HWCAP_CE_PMULL (1 << 1) +# define HWCAP_CE_SHA1 (1 << 2) +# define HWCAP_CE_SHA256 (1 << 3) +# elif defined(__aarch64__) +# define HWCAP 16 + /* AT_HWCAP */ +# define HWCAP_NEON (1 << 1) + +# define HWCAP_CE HWCAP +# define HWCAP_CE_AES (1 << 3) +# define HWCAP_CE_PMULL (1 << 4) +# define HWCAP_CE_SHA1 (1 << 5) +# define HWCAP_CE_SHA256 (1 << 6) +# endif + void OPENSSL_cpuid_setup(void) { char *e; @@ -47,7 +96,7 @@ void OPENSSL_cpuid_setup(void) trigger = 1; if ((e = getenv("OPENSSL_armcap"))) { - OPENSSL_armcap_P = strtoul(e, NULL, 0); + OPENSSL_armcap_P = (unsigned int)strtoul(e, NULL, 0); return; } @@ -67,9 +116,42 @@ void OPENSSL_cpuid_setup(void) sigprocmask(SIG_SETMASK, &ill_act.sa_mask, &oset); sigaction(SIGILL, &ill_act, &ill_oact); - if (sigsetjmp(ill_jmp, 1) == 0) { + if (getauxval != NULL) { + if (getauxval(HWCAP) & HWCAP_NEON) { + unsigned long hwcap = getauxval(HWCAP_CE); + + OPENSSL_armcap_P |= ARMV7_NEON; + + if (hwcap & HWCAP_CE_AES) + OPENSSL_armcap_P |= ARMV8_AES; + + if (hwcap & HWCAP_CE_PMULL) + OPENSSL_armcap_P |= ARMV8_PMULL; + + if (hwcap & HWCAP_CE_SHA1) + OPENSSL_armcap_P |= ARMV8_SHA1; + + if (hwcap & HWCAP_CE_SHA256) + OPENSSL_armcap_P |= ARMV8_SHA256; + } + } else if (sigsetjmp(ill_jmp, 1) == 0) { _armv7_neon_probe(); OPENSSL_armcap_P |= ARMV7_NEON; + if (sigsetjmp(ill_jmp, 1) == 0) { + _armv8_pmull_probe(); + OPENSSL_armcap_P |= ARMV8_PMULL | ARMV8_AES; + } else if (sigsetjmp(ill_jmp, 1) == 0) { + _armv8_aes_probe(); + OPENSSL_armcap_P |= ARMV8_AES; + } + if (sigsetjmp(ill_jmp, 1) == 0) { + _armv8_sha1_probe(); + OPENSSL_armcap_P |= ARMV8_SHA1; + } + if (sigsetjmp(ill_jmp, 1) == 0) { + _armv8_sha256_probe(); + OPENSSL_armcap_P |= ARMV8_SHA256; + } } if (sigsetjmp(ill_jmp, 1) == 0) { _armv7_tick(); @@ -79,3 +161,4 @@ void OPENSSL_cpuid_setup(void) sigaction(SIGILL, &ill_oact, NULL); sigprocmask(SIG_SETMASK, &oset, NULL); } +#endif diff --git a/deps/openssl/openssl/crypto/armv4cpuid.S b/deps/openssl/openssl/crypto/armv4cpuid.S index 2d618deaa43e43..65010ae4fe065c 100644 --- a/deps/openssl/openssl/crypto/armv4cpuid.S +++ b/deps/openssl/openssl/crypto/armv4cpuid.S @@ -4,20 +4,6 @@ .code 32 .align 5 -.global _armv7_neon_probe -.type _armv7_neon_probe,%function -_armv7_neon_probe: - .word 0xf26ee1fe @ vorr q15,q15,q15 - .word 0xe12fff1e @ bx lr -.size _armv7_neon_probe,.-_armv7_neon_probe - -.global _armv7_tick -.type _armv7_tick,%function -_armv7_tick: - mrc p15,0,r0,c9,c13,0 - .word 0xe12fff1e @ bx lr -.size _armv7_tick,.-_armv7_tick - .global OPENSSL_atomic_add .type OPENSSL_atomic_add,%function OPENSSL_atomic_add: @@ -28,7 +14,7 @@ OPENSSL_atomic_add: cmp r2,#0 bne .Ladd mov r0,r3 - .word 0xe12fff1e @ bx lr + bx lr #else stmdb sp!,{r4-r6,lr} ldr r2,.Lspinlock @@ -81,62 +67,131 @@ OPENSSL_cleanse: adds r1,r1,#4 bne .Little .Lcleanse_done: +#if __ARM_ARCH__>=5 + bx lr +#else tst lr,#1 moveq pc,lr .word 0xe12fff1e @ bx lr +#endif .size OPENSSL_cleanse,.-OPENSSL_cleanse +#if __ARM_MAX_ARCH__>=7 +.arch armv7-a +.fpu neon + +.align 5 +.global _armv7_neon_probe +.type _armv7_neon_probe,%function +_armv7_neon_probe: + vorr q0,q0,q0 + bx lr +.size _armv7_neon_probe,.-_armv7_neon_probe + +.global _armv7_tick +.type _armv7_tick,%function +_armv7_tick: + mrrc p15,1,r0,r1,c14 @ CNTVCT + bx lr +.size _armv7_tick,.-_armv7_tick + +.global _armv8_aes_probe +.type _armv8_aes_probe,%function +_armv8_aes_probe: + .byte 0x00,0x03,0xb0,0xf3 @ aese.8 q0,q0 + bx lr +.size _armv8_aes_probe,.-_armv8_aes_probe + +.global _armv8_sha1_probe +.type _armv8_sha1_probe,%function +_armv8_sha1_probe: + .byte 0x40,0x0c,0x00,0xf2 @ sha1c.32 q0,q0,q0 + bx lr +.size _armv8_sha1_probe,.-_armv8_sha1_probe + +.global _armv8_sha256_probe +.type _armv8_sha256_probe,%function +_armv8_sha256_probe: + .byte 0x40,0x0c,0x00,0xf3 @ sha256h.32 q0,q0,q0 + bx lr +.size _armv8_sha256_probe,.-_armv8_sha256_probe +.global _armv8_pmull_probe +.type _armv8_pmull_probe,%function +_armv8_pmull_probe: + .byte 0x00,0x0e,0xa0,0xf2 @ vmull.p64 q0,d0,d0 + bx lr +.size _armv8_pmull_probe,.-_armv8_pmull_probe +#endif + .global OPENSSL_wipe_cpu .type OPENSSL_wipe_cpu,%function OPENSSL_wipe_cpu: +#if __ARM_MAX_ARCH__>=7 ldr r0,.LOPENSSL_armcap adr r1,.LOPENSSL_armcap ldr r0,[r1,r0] +#endif eor r2,r2,r2 eor r3,r3,r3 eor ip,ip,ip +#if __ARM_MAX_ARCH__>=7 tst r0,#1 beq .Lwipe_done - .word 0xf3000150 @ veor q0, q0, q0 - .word 0xf3022152 @ veor q1, q1, q1 - .word 0xf3044154 @ veor q2, q2, q2 - .word 0xf3066156 @ veor q3, q3, q3 - .word 0xf34001f0 @ veor q8, q8, q8 - .word 0xf34221f2 @ veor q9, q9, q9 - .word 0xf34441f4 @ veor q10, q10, q10 - .word 0xf34661f6 @ veor q11, q11, q11 - .word 0xf34881f8 @ veor q12, q12, q12 - .word 0xf34aa1fa @ veor q13, q13, q13 - .word 0xf34cc1fc @ veor q14, q14, q14 - .word 0xf34ee1fe @ veor q15, q15, q15 + veor q0, q0, q0 + veor q1, q1, q1 + veor q2, q2, q2 + veor q3, q3, q3 + veor q8, q8, q8 + veor q9, q9, q9 + veor q10, q10, q10 + veor q11, q11, q11 + veor q12, q12, q12 + veor q13, q13, q13 + veor q14, q14, q14 + veor q15, q15, q15 .Lwipe_done: +#endif mov r0,sp +#if __ARM_ARCH__>=5 + bx lr +#else tst lr,#1 moveq pc,lr .word 0xe12fff1e @ bx lr +#endif .size OPENSSL_wipe_cpu,.-OPENSSL_wipe_cpu .global OPENSSL_instrument_bus .type OPENSSL_instrument_bus,%function OPENSSL_instrument_bus: eor r0,r0,r0 +#if __ARM_ARCH__>=5 + bx lr +#else tst lr,#1 moveq pc,lr .word 0xe12fff1e @ bx lr +#endif .size OPENSSL_instrument_bus,.-OPENSSL_instrument_bus .global OPENSSL_instrument_bus2 .type OPENSSL_instrument_bus2,%function OPENSSL_instrument_bus2: eor r0,r0,r0 +#if __ARM_ARCH__>=5 + bx lr +#else tst lr,#1 moveq pc,lr .word 0xe12fff1e @ bx lr +#endif .size OPENSSL_instrument_bus2,.-OPENSSL_instrument_bus2 .align 5 +#if __ARM_MAX_ARCH__>=7 .LOPENSSL_armcap: .word OPENSSL_armcap_P-.LOPENSSL_armcap +#endif #if __ARM_ARCH__>=6 .align 5 #else diff --git a/deps/openssl/openssl/crypto/asn1/Makefile b/deps/openssl/openssl/crypto/asn1/Makefile index f7787005d454ab..2e2a097399be6f 100644 --- a/deps/openssl/openssl/crypto/asn1/Makefile +++ b/deps/openssl/openssl/crypto/asn1/Makefile @@ -174,7 +174,7 @@ a_gentm.o: ../../include/openssl/err.h ../../include/openssl/lhash.h a_gentm.o: ../../include/openssl/opensslconf.h ../../include/openssl/opensslv.h a_gentm.o: ../../include/openssl/ossl_typ.h ../../include/openssl/safestack.h a_gentm.o: ../../include/openssl/stack.h ../../include/openssl/symhacks.h -a_gentm.o: ../cryptlib.h ../o_time.h a_gentm.c +a_gentm.o: ../cryptlib.h ../o_time.h a_gentm.c asn1_locl.h a_i2d_fp.o: ../../e_os.h ../../include/openssl/asn1.h a_i2d_fp.o: ../../include/openssl/bio.h ../../include/openssl/buffer.h a_i2d_fp.o: ../../include/openssl/crypto.h ../../include/openssl/e_os2.h @@ -275,6 +275,7 @@ a_time.o: ../../include/openssl/lhash.h ../../include/openssl/opensslconf.h a_time.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h a_time.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h a_time.o: ../../include/openssl/symhacks.h ../cryptlib.h ../o_time.h a_time.c +a_time.o: asn1_locl.h a_type.o: ../../e_os.h ../../include/openssl/asn1.h a_type.o: ../../include/openssl/asn1t.h ../../include/openssl/bio.h a_type.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h @@ -291,7 +292,7 @@ a_utctm.o: ../../include/openssl/err.h ../../include/openssl/lhash.h a_utctm.o: ../../include/openssl/opensslconf.h ../../include/openssl/opensslv.h a_utctm.o: ../../include/openssl/ossl_typ.h ../../include/openssl/safestack.h a_utctm.o: ../../include/openssl/stack.h ../../include/openssl/symhacks.h -a_utctm.o: ../cryptlib.h ../o_time.h a_utctm.c +a_utctm.o: ../cryptlib.h ../o_time.h a_utctm.c asn1_locl.h a_utf8.o: ../../e_os.h ../../include/openssl/asn1.h ../../include/openssl/bio.h a_utf8.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h a_utf8.o: ../../include/openssl/e_os2.h ../../include/openssl/err.h diff --git a/deps/openssl/openssl/crypto/asn1/a_gentm.c b/deps/openssl/openssl/crypto/asn1/a_gentm.c index 8b3ef716e334f6..fa76dcac91f34b 100644 --- a/deps/openssl/openssl/crypto/asn1/a_gentm.c +++ b/deps/openssl/openssl/crypto/asn1/a_gentm.c @@ -65,6 +65,7 @@ #include "cryptlib.h" #include "o_time.h" #include +#include "asn1_locl.h" #if 0 @@ -117,7 +118,7 @@ ASN1_GENERALIZEDTIME *d2i_ASN1_GENERALIZEDTIME(ASN1_GENERALIZEDTIME **a, #endif -int ASN1_GENERALIZEDTIME_check(ASN1_GENERALIZEDTIME *d) +int asn1_generalizedtime_to_tm(struct tm *tm, const ASN1_GENERALIZEDTIME *d) { static const int min[9] = { 0, 0, 1, 1, 0, 0, 0, 0, 0 }; static const int max[9] = { 99, 99, 12, 31, 23, 59, 59, 12, 59 }; @@ -139,6 +140,8 @@ int ASN1_GENERALIZEDTIME_check(ASN1_GENERALIZEDTIME *d) for (i = 0; i < 7; i++) { if ((i == 6) && ((a[o] == 'Z') || (a[o] == '+') || (a[o] == '-'))) { i++; + if (tm) + tm->tm_sec = 0; break; } if ((a[o] < '0') || (a[o] > '9')) @@ -155,6 +158,31 @@ int ASN1_GENERALIZEDTIME_check(ASN1_GENERALIZEDTIME *d) if ((n < min[i]) || (n > max[i])) goto err; + if (tm) { + switch (i) { + case 0: + tm->tm_year = n * 100 - 1900; + break; + case 1: + tm->tm_year += n; + break; + case 2: + tm->tm_mon = n - 1; + break; + case 3: + tm->tm_mday = n; + break; + case 4: + tm->tm_hour = n; + break; + case 5: + tm->tm_min = n; + break; + case 6: + tm->tm_sec = n; + break; + } + } } /* * Optional fractional seconds: decimal point followed by one or more @@ -174,6 +202,7 @@ int ASN1_GENERALIZEDTIME_check(ASN1_GENERALIZEDTIME *d) if (a[o] == 'Z') o++; else if ((a[o] == '+') || (a[o] == '-')) { + int offsign = a[o] == '-' ? -1 : 1, offset = 0; o++; if (o + 4 > l) goto err; @@ -187,9 +216,17 @@ int ASN1_GENERALIZEDTIME_check(ASN1_GENERALIZEDTIME *d) n = (n * 10) + a[o] - '0'; if ((n < min[i]) || (n > max[i])) goto err; + if (tm) { + if (i == 7) + offset = n * 3600; + else if (i == 8) + offset += n * 60; + } o++; } - } else { + if (offset && !OPENSSL_gmtime_adj(tm, 0, offset * offsign)) + return 0; + } else if (a[o]) { /* Missing time zone information. */ goto err; } @@ -198,6 +235,11 @@ int ASN1_GENERALIZEDTIME_check(ASN1_GENERALIZEDTIME *d) return (0); } +int ASN1_GENERALIZEDTIME_check(const ASN1_GENERALIZEDTIME *d) +{ + return asn1_generalizedtime_to_tm(NULL, d); +} + int ASN1_GENERALIZEDTIME_set_string(ASN1_GENERALIZEDTIME *s, const char *str) { ASN1_GENERALIZEDTIME t; diff --git a/deps/openssl/openssl/crypto/asn1/a_time.c b/deps/openssl/openssl/crypto/asn1/a_time.c index c81f0de5a9e826..fcb2d565cdd0ac 100644 --- a/deps/openssl/openssl/crypto/asn1/a_time.c +++ b/deps/openssl/openssl/crypto/asn1/a_time.c @@ -66,6 +66,7 @@ #include "cryptlib.h" #include "o_time.h" #include +#include "asn1_locl.h" IMPLEMENT_ASN1_MSTRING(ASN1_TIME, B_ASN1_TIME) @@ -196,3 +197,32 @@ int ASN1_TIME_set_string(ASN1_TIME *s, const char *str) return 1; } + +static int asn1_time_to_tm(struct tm *tm, const ASN1_TIME *t) +{ + if (t == NULL) { + time_t now_t; + time(&now_t); + if (OPENSSL_gmtime(&now_t, tm)) + return 1; + return 0; + } + + if (t->type == V_ASN1_UTCTIME) + return asn1_utctime_to_tm(tm, t); + else if (t->type == V_ASN1_GENERALIZEDTIME) + return asn1_generalizedtime_to_tm(tm, t); + + return 0; +} + +int ASN1_TIME_diff(int *pday, int *psec, + const ASN1_TIME *from, const ASN1_TIME *to) +{ + struct tm tm_from, tm_to; + if (!asn1_time_to_tm(&tm_from, from)) + return 0; + if (!asn1_time_to_tm(&tm_to, to)) + return 0; + return OPENSSL_gmtime_diff(pday, psec, &tm_from, &tm_to); +} diff --git a/deps/openssl/openssl/crypto/asn1/a_utctm.c b/deps/openssl/openssl/crypto/asn1/a_utctm.c index 179de6d395900a..724a10be4ed6ab 100644 --- a/deps/openssl/openssl/crypto/asn1/a_utctm.c +++ b/deps/openssl/openssl/crypto/asn1/a_utctm.c @@ -61,6 +61,7 @@ #include "cryptlib.h" #include "o_time.h" #include +#include "asn1_locl.h" #if 0 int i2d_ASN1_UTCTIME(ASN1_UTCTIME *a, unsigned char **pp) @@ -109,7 +110,7 @@ ASN1_UTCTIME *d2i_ASN1_UTCTIME(ASN1_UTCTIME **a, unsigned char **pp, #endif -int ASN1_UTCTIME_check(ASN1_UTCTIME *d) +int asn1_utctime_to_tm(struct tm *tm, const ASN1_UTCTIME *d) { static const int min[8] = { 0, 1, 1, 0, 0, 0, 0, 0 }; static const int max[8] = { 99, 12, 31, 23, 59, 59, 12, 59 }; @@ -127,6 +128,8 @@ int ASN1_UTCTIME_check(ASN1_UTCTIME *d) for (i = 0; i < 6; i++) { if ((i == 5) && ((a[o] == 'Z') || (a[o] == '+') || (a[o] == '-'))) { i++; + if (tm) + tm->tm_sec = 0; break; } if ((a[o] < '0') || (a[o] > '9')) @@ -143,10 +146,33 @@ int ASN1_UTCTIME_check(ASN1_UTCTIME *d) if ((n < min[i]) || (n > max[i])) goto err; + if (tm) { + switch (i) { + case 0: + tm->tm_year = n < 50 ? n + 100 : n; + break; + case 1: + tm->tm_mon = n - 1; + break; + case 2: + tm->tm_mday = n; + break; + case 3: + tm->tm_hour = n; + break; + case 4: + tm->tm_min = n; + break; + case 5: + tm->tm_sec = n; + break; + } + } } if (a[o] == 'Z') o++; else if ((a[o] == '+') || (a[o] == '-')) { + int offsign = a[o] == '-' ? -1 : 1, offset = 0; o++; if (o + 4 > l) goto err; @@ -160,12 +186,25 @@ int ASN1_UTCTIME_check(ASN1_UTCTIME *d) n = (n * 10) + a[o] - '0'; if ((n < min[i]) || (n > max[i])) goto err; + if (tm) { + if (i == 6) + offset = n * 3600; + else if (i == 7) + offset += n * 60; + } o++; } + if (offset && !OPENSSL_gmtime_adj(tm, 0, offset * offsign)) + return 0; } - return (o == l); + return o == l; err: - return (0); + return 0; +} + +int ASN1_UTCTIME_check(const ASN1_UTCTIME *d) +{ + return asn1_utctime_to_tm(NULL, d); } int ASN1_UTCTIME_set_string(ASN1_UTCTIME *s, const char *str) @@ -249,43 +288,26 @@ ASN1_UTCTIME *ASN1_UTCTIME_adj(ASN1_UTCTIME *s, time_t t, int ASN1_UTCTIME_cmp_time_t(const ASN1_UTCTIME *s, time_t t) { - struct tm *tm; - struct tm data; - int offset; - int year; - -#define g2(p) (((p)[0]-'0')*10+(p)[1]-'0') - - if (s->data[12] == 'Z') - offset = 0; - else { - offset = g2(s->data + 13) * 60 + g2(s->data + 15); - if (s->data[12] == '-') - offset = -offset; - } + struct tm stm, ttm; + int day, sec; - t -= offset * 60; /* FIXME: may overflow in extreme cases */ + if (!asn1_utctime_to_tm(&stm, s)) + return -2; - tm = OPENSSL_gmtime(&t, &data); - /* - * NB: -1, 0, 1 already valid return values so use -2 to indicate error. - */ - if (tm == NULL) + if (!OPENSSL_gmtime(&t, &ttm)) return -2; -#define return_cmp(a,b) if ((a)<(b)) return -1; else if ((a)>(b)) return 1 - year = g2(s->data); - if (year < 50) - year += 100; - return_cmp(year, tm->tm_year); - return_cmp(g2(s->data + 2) - 1, tm->tm_mon); - return_cmp(g2(s->data + 4), tm->tm_mday); - return_cmp(g2(s->data + 6), tm->tm_hour); - return_cmp(g2(s->data + 8), tm->tm_min); - return_cmp(g2(s->data + 10), tm->tm_sec); -#undef g2 -#undef return_cmp + if (!OPENSSL_gmtime_diff(&day, &sec, &ttm, &stm)) + return -2; + if (day > 0) + return 1; + if (day < 0) + return -1; + if (sec > 0) + return 1; + if (sec < 0) + return -1; return 0; } diff --git a/deps/openssl/openssl/crypto/asn1/ameth_lib.c b/deps/openssl/openssl/crypto/asn1/ameth_lib.c index 45f3f4056049f8..02300dfedf0697 100644 --- a/deps/openssl/openssl/crypto/asn1/ameth_lib.c +++ b/deps/openssl/openssl/crypto/asn1/ameth_lib.c @@ -68,6 +68,7 @@ extern const EVP_PKEY_ASN1_METHOD rsa_asn1_meths[]; extern const EVP_PKEY_ASN1_METHOD dsa_asn1_meths[]; extern const EVP_PKEY_ASN1_METHOD dh_asn1_meth; +extern const EVP_PKEY_ASN1_METHOD dhx_asn1_meth; extern const EVP_PKEY_ASN1_METHOD eckey_asn1_meth; extern const EVP_PKEY_ASN1_METHOD hmac_asn1_meth; extern const EVP_PKEY_ASN1_METHOD cmac_asn1_meth; @@ -92,7 +93,10 @@ static const EVP_PKEY_ASN1_METHOD *standard_methods[] = { &eckey_asn1_meth, #endif &hmac_asn1_meth, - &cmac_asn1_meth + &cmac_asn1_meth, +#ifndef OPENSSL_NO_DH + &dhx_asn1_meth +#endif }; typedef int sk_cmp_fn_type(const char *const *a, const char *const *b); diff --git a/deps/openssl/openssl/crypto/asn1/asn1.h b/deps/openssl/openssl/crypto/asn1/asn1.h index 39b7833f582f26..68e791fcdbe8e5 100644 --- a/deps/openssl/openssl/crypto/asn1/asn1.h +++ b/deps/openssl/openssl/crypto/asn1/asn1.h @@ -207,13 +207,13 @@ typedef struct asn1_const_ctx_st { # define ASN1_OBJECT_FLAG_CRITICAL 0x02/* critical x509v3 object id */ # define ASN1_OBJECT_FLAG_DYNAMIC_STRINGS 0x04/* internal use */ # define ASN1_OBJECT_FLAG_DYNAMIC_DATA 0x08/* internal use */ -typedef struct asn1_object_st { +struct asn1_object_st { const char *sn, *ln; int nid; int length; const unsigned char *data; /* data remains const after init */ int flags; /* Should we free this one */ -} ASN1_OBJECT; +}; # define ASN1_STRING_FLAG_BITS_LEFT 0x08/* Set if 0x07 has bits left value */ /* @@ -843,7 +843,7 @@ int ASN1_INTEGER_cmp(const ASN1_INTEGER *x, const ASN1_INTEGER *y); DECLARE_ASN1_FUNCTIONS(ASN1_ENUMERATED) -int ASN1_UTCTIME_check(ASN1_UTCTIME *a); +int ASN1_UTCTIME_check(const ASN1_UTCTIME *a); ASN1_UTCTIME *ASN1_UTCTIME_set(ASN1_UTCTIME *s, time_t t); ASN1_UTCTIME *ASN1_UTCTIME_adj(ASN1_UTCTIME *s, time_t t, int offset_day, long offset_sec); @@ -853,13 +853,15 @@ int ASN1_UTCTIME_cmp_time_t(const ASN1_UTCTIME *s, time_t t); time_t ASN1_UTCTIME_get(const ASN1_UTCTIME *s); # endif -int ASN1_GENERALIZEDTIME_check(ASN1_GENERALIZEDTIME *a); +int ASN1_GENERALIZEDTIME_check(const ASN1_GENERALIZEDTIME *a); ASN1_GENERALIZEDTIME *ASN1_GENERALIZEDTIME_set(ASN1_GENERALIZEDTIME *s, time_t t); ASN1_GENERALIZEDTIME *ASN1_GENERALIZEDTIME_adj(ASN1_GENERALIZEDTIME *s, time_t t, int offset_day, long offset_sec); int ASN1_GENERALIZEDTIME_set_string(ASN1_GENERALIZEDTIME *s, const char *str); +int ASN1_TIME_diff(int *pday, int *psec, + const ASN1_TIME *from, const ASN1_TIME *to); DECLARE_ASN1_FUNCTIONS(ASN1_OCTET_STRING) ASN1_OCTET_STRING *ASN1_OCTET_STRING_dup(const ASN1_OCTET_STRING *a); diff --git a/deps/openssl/openssl/crypto/asn1/asn1_locl.h b/deps/openssl/openssl/crypto/asn1/asn1_locl.h index 9f5ed847271832..4c004fab9a8dd0 100644 --- a/deps/openssl/openssl/crypto/asn1/asn1_locl.h +++ b/deps/openssl/openssl/crypto/asn1/asn1_locl.h @@ -59,6 +59,9 @@ /* Internal ASN1 structures and functions: not for application use */ +int asn1_utctime_to_tm(struct tm *tm, const ASN1_UTCTIME *d); +int asn1_generalizedtime_to_tm(struct tm *tm, const ASN1_GENERALIZEDTIME *d); + /* ASN1 print context structure */ struct asn1_pctx_st { diff --git a/deps/openssl/openssl/crypto/asn1/t_x509.c b/deps/openssl/openssl/crypto/asn1/t_x509.c index 4e7c45dd5ddc89..8aab55130c9545 100644 --- a/deps/openssl/openssl/crypto/asn1/t_x509.c +++ b/deps/openssl/openssl/crypto/asn1/t_x509.c @@ -228,6 +228,21 @@ int X509_print_ex(BIO *bp, X509 *x, unsigned long nmflags, } } + if (!(cflag & X509_FLAG_NO_IDS)) { + if (ci->issuerUID) { + if (BIO_printf(bp, "%8sIssuer Unique ID: ", "") <= 0) + goto err; + if (!X509_signature_dump(bp, ci->issuerUID, 12)) + goto err; + } + if (ci->subjectUID) { + if (BIO_printf(bp, "%8sSubject Unique ID: ", "") <= 0) + goto err; + if (!X509_signature_dump(bp, ci->subjectUID, 12)) + goto err; + } + } + if (!(cflag & X509_FLAG_NO_EXTENSIONS)) X509V3_extensions_print(bp, "X509v3 extensions", ci->extensions, cflag, 8); diff --git a/deps/openssl/openssl/crypto/asn1/x_crl.c b/deps/openssl/openssl/crypto/asn1/x_crl.c index e258c714b2499b..027950330d8b67 100644 --- a/deps/openssl/openssl/crypto/asn1/x_crl.c +++ b/deps/openssl/openssl/crypto/asn1/x_crl.c @@ -58,8 +58,8 @@ #include #include "cryptlib.h" -#include "asn1_locl.h" #include +#include "asn1_locl.h" #include #include @@ -341,6 +341,8 @@ ASN1_SEQUENCE_ref(X509_CRL, crl_cb, CRYPTO_LOCK_X509_CRL) = { IMPLEMENT_ASN1_FUNCTIONS(X509_REVOKED) +IMPLEMENT_ASN1_DUP_FUNCTION(X509_REVOKED) + IMPLEMENT_ASN1_FUNCTIONS(X509_CRL_INFO) IMPLEMENT_ASN1_FUNCTIONS(X509_CRL) diff --git a/deps/openssl/openssl/crypto/asn1/x_x509.c b/deps/openssl/openssl/crypto/asn1/x_x509.c index d51b76e79e1680..55319acf9ce18d 100644 --- a/deps/openssl/openssl/crypto/asn1/x_x509.c +++ b/deps/openssl/openssl/crypto/asn1/x_x509.c @@ -208,3 +208,23 @@ int i2d_X509_AUX(X509 *a, unsigned char **pp) length += i2d_X509_CERT_AUX(a->aux, pp); return length; } + +int i2d_re_X509_tbs(X509 *x, unsigned char **pp) +{ + x->cert_info->enc.modified = 1; + return i2d_X509_CINF(x->cert_info, pp); +} + +void X509_get0_signature(ASN1_BIT_STRING **psig, X509_ALGOR **palg, + const X509 *x) +{ + if (psig) + *psig = x->signature; + if (palg) + *palg = x->sig_alg; +} + +int X509_get_signature_nid(const X509 *x) +{ + return OBJ_obj2nid(x->sig_alg->algorithm); +} diff --git a/deps/openssl/openssl/crypto/bio/b_dump.c b/deps/openssl/openssl/crypto/bio/b_dump.c index 3293c724c94f86..ed8e521449a445 100644 --- a/deps/openssl/openssl/crypto/bio/b_dump.c +++ b/deps/openssl/openssl/crypto/bio/b_dump.c @@ -182,3 +182,28 @@ int BIO_dump_indent(BIO *bp, const char *s, int len, int indent) { return BIO_dump_indent_cb(write_bio, bp, s, len, indent); } + +int BIO_hex_string(BIO *out, int indent, int width, unsigned char *data, + int datalen) +{ + int i, j = 0; + + if (datalen < 1) + return 1; + + for (i = 0; i < datalen - 1; i++) { + if (i && !j) + BIO_printf(out, "%*s", indent, ""); + + BIO_printf(out, "%02X:", data[i]); + + j = (j + 1) % width; + if (!j) + BIO_printf(out, "\n"); + } + + if (i && !j) + BIO_printf(out, "%*s", indent, ""); + BIO_printf(out, "%02X", data[datalen - 1]); + return 1; +} diff --git a/deps/openssl/openssl/crypto/bio/b_sock.c b/deps/openssl/openssl/crypto/bio/b_sock.c index bda882c40b0831..5bad0a2bada284 100644 --- a/deps/openssl/openssl/crypto/bio/b_sock.c +++ b/deps/openssl/openssl/crypto/bio/b_sock.c @@ -225,13 +225,17 @@ int BIO_get_port(const char *str, unsigned short *port_ptr) int BIO_sock_error(int sock) { int j, i; - int size; + union { + size_t s; + int i; + } size; # if defined(OPENSSL_SYS_BEOS_R5) return 0; # endif - size = sizeof(int); + /* heuristic way to adapt for platforms that expect 64-bit optlen */ + size.s = 0, size.i = sizeof(j); /* * Note: under Windows the third parameter is of type (char *) whereas * under other systems it is (void *) if you don't have a cast it will diff --git a/deps/openssl/openssl/crypto/bio/bio.h b/deps/openssl/openssl/crypto/bio/bio.h index be9cd0eff39b58..7878fb1984f241 100644 --- a/deps/openssl/openssl/crypto/bio/bio.h +++ b/deps/openssl/openssl/crypto/bio/bio.h @@ -174,6 +174,7 @@ extern "C" { # define BIO_CTRL_DGRAM_SET_NEXT_TIMEOUT 45/* Next DTLS handshake timeout * to adjust socket timeouts */ +# define BIO_CTRL_DGRAM_SET_DONT_FRAG 48 # define BIO_CTRL_DGRAM_GET_MTU_OVERHEAD 49 @@ -725,6 +726,9 @@ int BIO_dump_indent(BIO *b, const char *bytes, int len, int indent); int BIO_dump_fp(FILE *fp, const char *s, int len); int BIO_dump_indent_fp(FILE *fp, const char *s, int len, int indent); # endif +int BIO_hex_string(BIO *out, int indent, int width, unsigned char *data, + int datalen); + struct hostent *BIO_gethostbyname(const char *name); /*- * We might want a thread-safe interface too: @@ -761,8 +765,8 @@ int BIO_dgram_sctp_wait_for_dry(BIO *b); int BIO_dgram_sctp_msg_waiting(BIO *b); # endif BIO *BIO_new_fd(int fd, int close_flag); -BIO *BIO_new_connect(char *host_port); -BIO *BIO_new_accept(char *host_port); +BIO *BIO_new_connect(const char *host_port); +BIO *BIO_new_accept(const char *host_port); int BIO_new_bio_pair(BIO **bio1, size_t writebuf1, BIO **bio2, size_t writebuf2); diff --git a/deps/openssl/openssl/crypto/bio/bio_err.c b/deps/openssl/openssl/crypto/bio/bio_err.c index 6dd6162fc93aa7..d9007aa3d32da0 100644 --- a/deps/openssl/openssl/crypto/bio/bio_err.c +++ b/deps/openssl/openssl/crypto/bio/bio_err.c @@ -1,6 +1,6 @@ /* crypto/bio/bio_err.c */ /* ==================================================================== - * Copyright (c) 1999-2011 The OpenSSL Project. All rights reserved. + * Copyright (c) 1999-2015 The OpenSSL Project. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions diff --git a/deps/openssl/openssl/crypto/bio/bss_acpt.c b/deps/openssl/openssl/crypto/bio/bss_acpt.c index d08292c3e9ac6c..4a5e39bd387b39 100644 --- a/deps/openssl/openssl/crypto/bio/bss_acpt.c +++ b/deps/openssl/openssl/crypto/bio/bss_acpt.c @@ -445,7 +445,7 @@ static int acpt_puts(BIO *bp, const char *str) return (ret); } -BIO *BIO_new_accept(char *str) +BIO *BIO_new_accept(const char *str) { BIO *ret; diff --git a/deps/openssl/openssl/crypto/bio/bss_conn.c b/deps/openssl/openssl/crypto/bio/bss_conn.c index 6a5e8de8812ac7..42d0afffbc6c4c 100644 --- a/deps/openssl/openssl/crypto/bio/bss_conn.c +++ b/deps/openssl/openssl/crypto/bio/bss_conn.c @@ -585,7 +585,7 @@ static int conn_puts(BIO *bp, const char *str) return (ret); } -BIO *BIO_new_connect(char *str) +BIO *BIO_new_connect(const char *str) { BIO *ret; diff --git a/deps/openssl/openssl/crypto/bio/bss_dgram.c b/deps/openssl/openssl/crypto/bio/bss_dgram.c index b495db26e142fc..388d90d02ef319 100644 --- a/deps/openssl/openssl/crypto/bio/bss_dgram.c +++ b/deps/openssl/openssl/crypto/bio/bss_dgram.c @@ -65,7 +65,7 @@ #include #ifndef OPENSSL_NO_DGRAM -# if defined(OPENSSL_SYS_WIN32) || defined(OPENSSL_SYS_VMS) +# if defined(OPENSSL_SYS_VMS) # include # endif @@ -80,6 +80,10 @@ # define IP_MTU 14 /* linux is lame */ # endif +# if OPENSSL_USE_IPV6 && !defined(IPPROTO_IPV6) +# define IPPROTO_IPV6 41 /* windows is lame */ +# endif + # if defined(__FreeBSD__) && defined(IN6_IS_ADDR_V4MAPPED) /* Standard definition causes type-punning problems. */ # undef IN6_IS_ADDR_V4MAPPED @@ -495,8 +499,8 @@ static long dgram_ctrl(BIO *b, int cmd, long num, void *ptr) int *ip; struct sockaddr *to = NULL; bio_dgram_data *data = NULL; -# if defined(OPENSSL_SYS_LINUX) && (defined(IP_MTU_DISCOVER) || defined(IP_MTU)) int sockopt_val = 0; +# if defined(OPENSSL_SYS_LINUX) && (defined(IP_MTU_DISCOVER) || defined(IP_MTU)) socklen_t sockopt_len; /* assume that system supporting IP_MTU is * modern enough to define socklen_t */ socklen_t addr_len; @@ -881,6 +885,61 @@ static long dgram_ctrl(BIO *b, int cmd, long num, void *ptr) ret = 0; break; # endif + case BIO_CTRL_DGRAM_SET_DONT_FRAG: + sockopt_val = num ? 1 : 0; + + switch (data->peer.sa.sa_family) { + case AF_INET: +# if defined(IP_DONTFRAG) + if ((ret = setsockopt(b->num, IPPROTO_IP, IP_DONTFRAG, + &sockopt_val, sizeof(sockopt_val))) < 0) { + perror("setsockopt"); + ret = -1; + } +# elif defined(OPENSSL_SYS_LINUX) && defined(IP_MTUDISCOVER) + if ((sockopt_val = num ? IP_PMTUDISC_PROBE : IP_PMTUDISC_DONT), + (ret = setsockopt(b->num, IPPROTO_IP, IP_MTU_DISCOVER, + &sockopt_val, sizeof(sockopt_val))) < 0) { + perror("setsockopt"); + ret = -1; + } +# elif defined(OPENSSL_SYS_WINDOWS) && defined(IP_DONTFRAGMENT) + if ((ret = setsockopt(b->num, IPPROTO_IP, IP_DONTFRAGMENT, + (const char *)&sockopt_val, + sizeof(sockopt_val))) < 0) { + perror("setsockopt"); + ret = -1; + } +# else + ret = -1; +# endif + break; +# if OPENSSL_USE_IPV6 + case AF_INET6: +# if defined(IPV6_DONTFRAG) + if ((ret = setsockopt(b->num, IPPROTO_IPV6, IPV6_DONTFRAG, + (const void *)&sockopt_val, + sizeof(sockopt_val))) < 0) { + perror("setsockopt"); + ret = -1; + } +# elif defined(OPENSSL_SYS_LINUX) && defined(IPV6_MTUDISCOVER) + if ((sockopt_val = num ? IP_PMTUDISC_PROBE : IP_PMTUDISC_DONT), + (ret = setsockopt(b->num, IPPROTO_IPV6, IPV6_MTU_DISCOVER, + &sockopt_val, sizeof(sockopt_val))) < 0) { + perror("setsockopt"); + ret = -1; + } +# else + ret = -1; +# endif + break; +# endif + default: + ret = -1; + break; + } + break; case BIO_CTRL_DGRAM_GET_MTU_OVERHEAD: ret = dgram_get_mtu_overhead(data); break; @@ -1994,11 +2053,22 @@ int BIO_dgram_non_fatal_error(int err) static void get_current_time(struct timeval *t) { -# ifdef OPENSSL_SYS_WIN32 - struct _timeb tb; - _ftime(&tb); - t->tv_sec = (long)tb.time; - t->tv_usec = (long)tb.millitm * 1000; +# if defined(_WIN32) + SYSTEMTIME st; + union { + unsigned __int64 ul; + FILETIME ft; + } now; + + GetSystemTime(&st); + SystemTimeToFileTime(&st, &now.ft); +# ifdef __MINGW32__ + now.ul -= 116444736000000000ULL; +# else + now.ul -= 116444736000000000UI64; /* re-bias to 1/1/1970 */ +# endif + t->tv_sec = (long)(now.ul / 10000000); + t->tv_usec = ((int)(now.ul % 10000000)) / 10; # elif defined(OPENSSL_SYS_VMS) struct timeb tb; ftime(&tb); diff --git a/deps/openssl/openssl/crypto/bio/bss_fd.c b/deps/openssl/openssl/crypto/bio/bss_fd.c index ccef578154d710..5f4e34481b0ed6 100644 --- a/deps/openssl/openssl/crypto/bio/bss_fd.c +++ b/deps/openssl/openssl/crypto/bio/bss_fd.c @@ -63,9 +63,27 @@ #if defined(OPENSSL_NO_POSIX_IO) /* - * One can argue that one should implement dummy placeholder for - * BIO_s_fd here... + * Dummy placeholder for BIO_s_fd... */ +BIO *BIO_new_fd(int fd, int close_flag) +{ + return NULL; +} + +int BIO_fd_non_fatal_error(int err) +{ + return 0; +} + +int BIO_fd_should_retry(int i) +{ + return 0; +} + +BIO_METHOD *BIO_s_fd(void) +{ + return NULL; +} #else /* * As for unconditional usage of "UPLINK" interface in this module. diff --git a/deps/openssl/openssl/crypto/bn/Makefile b/deps/openssl/openssl/crypto/bn/Makefile index 6dd136be5d6b7f..5361dc82785d18 100644 --- a/deps/openssl/openssl/crypto/bn/Makefile +++ b/deps/openssl/openssl/crypto/bn/Makefile @@ -77,6 +77,12 @@ sparcv9a-mont.s: asm/sparcv9a-mont.pl $(PERL) asm/sparcv9a-mont.pl $(CFLAGS) > $@ sparcv9-mont.s: asm/sparcv9-mont.pl $(PERL) asm/sparcv9-mont.pl $(CFLAGS) > $@ +vis3-mont.s: asm/vis3-mont.pl + $(PERL) asm/vis3-mont.pl $(CFLAGS) > $@ +sparct4-mont.S: asm/sparct4-mont.pl + $(PERL) asm/sparct4-mont.pl $(CFLAGS) > $@ +sparcv9-gf2m.S: asm/sparcv9-gf2m.pl + $(PERL) asm/sparcv9-gf2m.pl $(CFLAGS) > $@ bn-mips3.o: asm/mips3.s @if [ "$(CC)" = "gcc" ]; then \ @@ -102,8 +108,10 @@ x86_64-mont5.s: asm/x86_64-mont5.pl $(PERL) asm/x86_64-mont5.pl $(PERLASM_SCHEME) > $@ x86_64-gf2m.s: asm/x86_64-gf2m.pl $(PERL) asm/x86_64-gf2m.pl $(PERLASM_SCHEME) > $@ -modexp512-x86_64.s: asm/modexp512-x86_64.pl - $(PERL) asm/modexp512-x86_64.pl $(PERLASM_SCHEME) > $@ +rsaz-x86_64.s: asm/rsaz-x86_64.pl + $(PERL) asm/rsaz-x86_64.pl $(PERLASM_SCHEME) > $@ +rsaz-avx2.s: asm/rsaz-avx2.pl + $(PERL) asm/rsaz-avx2.pl $(PERLASM_SCHEME) > $@ bn-ia64.s: asm/ia64.S $(CC) $(CFLAGS) -E asm/ia64.S > $@ @@ -125,14 +133,15 @@ ppc-mont.s: asm/ppc-mont.pl;$(PERL) asm/ppc-mont.pl $(PERLASM_SCHEME) $@ ppc64-mont.s: asm/ppc64-mont.pl;$(PERL) asm/ppc64-mont.pl $(PERLASM_SCHEME) $@ alpha-mont.s: asm/alpha-mont.pl - (preproc=/tmp/$$$$.$@; trap "rm $$preproc" INT; \ + (preproc=$$$$.$@.S; trap "rm $$preproc" INT; \ $(PERL) asm/alpha-mont.pl > $$preproc && \ - $(CC) -E $$preproc > $@ && rm $$preproc) + $(CC) -E -P $$preproc > $@ && rm $$preproc) # GNU make "catch all" -%-mont.s: asm/%-mont.pl; $(PERL) $< $(PERLASM_SCHEME) $@ +%-mont.S: asm/%-mont.pl; $(PERL) $< $(PERLASM_SCHEME) $@ %-gf2m.S: asm/%-gf2m.pl; $(PERL) $< $(PERLASM_SCHEME) $@ +armv4-mont.o: armv4-mont.S armv4-gf2m.o: armv4-gf2m.S files: diff --git a/deps/openssl/openssl/crypto/bn/asm/armv4-gf2m.pl b/deps/openssl/openssl/crypto/bn/asm/armv4-gf2m.pl index c52e0b75b5b623..8f529c95cf0509 100644 --- a/deps/openssl/openssl/crypto/bn/asm/armv4-gf2m.pl +++ b/deps/openssl/openssl/crypto/bn/asm/armv4-gf2m.pl @@ -20,48 +20,26 @@ # length, more for longer keys. Even though NEON 1x1 multiplication # runs in even less cycles, ~30, improvement is measurable only on # longer keys. One has to optimize code elsewhere to get NEON glow... +# +# April 2014 +# +# Double bn_GF2m_mul_2x2 performance by using algorithm from paper +# referred below, which improves ECDH and ECDSA verify benchmarks +# by 18-40%. +# +# Câmara, D.; Gouvêa, C. P. L.; López, J. & Dahab, R.: Fast Software +# Polynomial Multiplication on ARM Processors using the NEON Engine. +# +# http://conradoplg.cryptoland.net/files/2010/12/mocrysen13.pdf while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {} open STDOUT,">$output"; -sub Dlo() { shift=~m|q([1]?[0-9])|?"d".($1*2):""; } -sub Dhi() { shift=~m|q([1]?[0-9])|?"d".($1*2+1):""; } -sub Q() { shift=~m|d([1-3]?[02468])|?"q".($1/2):""; } - $code=<<___; #include "arm_arch.h" .text .code 32 - -#if __ARM_ARCH__>=7 -.fpu neon - -.type mul_1x1_neon,%function -.align 5 -mul_1x1_neon: - vshl.u64 `&Dlo("q1")`,d16,#8 @ q1-q3 are slided $a - vmull.p8 `&Q("d0")`,d16,d17 @ a·bb - vshl.u64 `&Dlo("q2")`,d16,#16 - vmull.p8 q1,`&Dlo("q1")`,d17 @ a<<8·bb - vshl.u64 `&Dlo("q3")`,d16,#24 - vmull.p8 q2,`&Dlo("q2")`,d17 @ a<<16·bb - vshr.u64 `&Dlo("q1")`,#8 - vmull.p8 q3,`&Dlo("q3")`,d17 @ a<<24·bb - vshl.u64 `&Dhi("q1")`,#24 - veor d0,`&Dlo("q1")` - vshr.u64 `&Dlo("q2")`,#16 - veor d0,`&Dhi("q1")` - vshl.u64 `&Dhi("q2")`,#16 - veor d0,`&Dlo("q2")` - vshr.u64 `&Dlo("q3")`,#24 - veor d0,`&Dhi("q2")` - vshl.u64 `&Dhi("q3")`,#8 - veor d0,`&Dlo("q3")` - veor d0,`&Dhi("q3")` - bx lr -.size mul_1x1_neon,.-mul_1x1_neon -#endif ___ ################ # private interface to mul_1x1_ialu @@ -159,56 +137,17 @@ # void bn_GF2m_mul_2x2(BN_ULONG *r, # BN_ULONG a1,BN_ULONG a0, # BN_ULONG b1,BN_ULONG b0); # r[3..0]=a1a0·b1b0 - -($A1,$B1,$A0,$B0,$A1B1,$A0B0)=map("d$_",(18..23)); - +{ $code.=<<___; .global bn_GF2m_mul_2x2 .type bn_GF2m_mul_2x2,%function .align 5 bn_GF2m_mul_2x2: -#if __ARM_ARCH__>=7 +#if __ARM_MAX_ARCH__>=7 ldr r12,.LOPENSSL_armcap .Lpic: ldr r12,[pc,r12] tst r12,#1 - beq .Lialu - - veor $A1,$A1 - vmov.32 $B1,r3,r3 @ two copies of b1 - vmov.32 ${A1}[0],r1 @ a1 - - veor $A0,$A0 - vld1.32 ${B0}[],[sp,:32] @ two copies of b0 - vmov.32 ${A0}[0],r2 @ a0 - mov r12,lr - - vmov d16,$A1 - vmov d17,$B1 - bl mul_1x1_neon @ a1·b1 - vmov $A1B1,d0 - - vmov d16,$A0 - vmov d17,$B0 - bl mul_1x1_neon @ a0·b0 - vmov $A0B0,d0 - - veor d16,$A0,$A1 - veor d17,$B0,$B1 - veor $A0,$A0B0,$A1B1 - bl mul_1x1_neon @ (a0+a1)·(b0+b1) - - veor d0,$A0 @ (a0+a1)·(b0+b1)-a0·b0-a1·b1 - vshl.u64 d1,d0,#32 - vshr.u64 d0,d0,#32 - veor $A0B0,d1 - veor $A1B1,d0 - vst1.32 {${A0B0}[0]},[r0,:32]! - vst1.32 {${A0B0}[1]},[r0,:32]! - vst1.32 {${A1B1}[0]},[r0,:32]! - vst1.32 {${A1B1}[1]},[r0,:32] - bx r12 -.align 4 -.Lialu: + bne .LNEON #endif ___ $ret="r10"; # reassigned 1st argument @@ -260,8 +199,72 @@ moveq pc,lr @ be binary compatible with V4, yet bx lr @ interoperable with Thumb ISA:-) #endif +___ +} +{ +my ($r,$t0,$t1,$t2,$t3)=map("q$_",(0..3,8..12)); +my ($a,$b,$k48,$k32,$k16)=map("d$_",(26..31)); + +$code.=<<___; +#if __ARM_MAX_ARCH__>=7 +.arch armv7-a +.fpu neon + +.align 5 +.LNEON: + ldr r12, [sp] @ 5th argument + vmov.32 $a, r2, r1 + vmov.32 $b, r12, r3 + vmov.i64 $k48, #0x0000ffffffffffff + vmov.i64 $k32, #0x00000000ffffffff + vmov.i64 $k16, #0x000000000000ffff + + vext.8 $t0#lo, $a, $a, #1 @ A1 + vmull.p8 $t0, $t0#lo, $b @ F = A1*B + vext.8 $r#lo, $b, $b, #1 @ B1 + vmull.p8 $r, $a, $r#lo @ E = A*B1 + vext.8 $t1#lo, $a, $a, #2 @ A2 + vmull.p8 $t1, $t1#lo, $b @ H = A2*B + vext.8 $t3#lo, $b, $b, #2 @ B2 + vmull.p8 $t3, $a, $t3#lo @ G = A*B2 + vext.8 $t2#lo, $a, $a, #3 @ A3 + veor $t0, $t0, $r @ L = E + F + vmull.p8 $t2, $t2#lo, $b @ J = A3*B + vext.8 $r#lo, $b, $b, #3 @ B3 + veor $t1, $t1, $t3 @ M = G + H + vmull.p8 $r, $a, $r#lo @ I = A*B3 + veor $t0#lo, $t0#lo, $t0#hi @ t0 = (L) (P0 + P1) << 8 + vand $t0#hi, $t0#hi, $k48 + vext.8 $t3#lo, $b, $b, #4 @ B4 + veor $t1#lo, $t1#lo, $t1#hi @ t1 = (M) (P2 + P3) << 16 + vand $t1#hi, $t1#hi, $k32 + vmull.p8 $t3, $a, $t3#lo @ K = A*B4 + veor $t2, $t2, $r @ N = I + J + veor $t0#lo, $t0#lo, $t0#hi + veor $t1#lo, $t1#lo, $t1#hi + veor $t2#lo, $t2#lo, $t2#hi @ t2 = (N) (P4 + P5) << 24 + vand $t2#hi, $t2#hi, $k16 + vext.8 $t0, $t0, $t0, #15 + veor $t3#lo, $t3#lo, $t3#hi @ t3 = (K) (P6 + P7) << 32 + vmov.i64 $t3#hi, #0 + vext.8 $t1, $t1, $t1, #14 + veor $t2#lo, $t2#lo, $t2#hi + vmull.p8 $r, $a, $b @ D = A*B + vext.8 $t3, $t3, $t3, #12 + vext.8 $t2, $t2, $t2, #13 + veor $t0, $t0, $t1 + veor $t2, $t2, $t3 + veor $r, $r, $t0 + veor $r, $r, $t2 + + vst1.32 {$r}, [r0] + ret @ bx lr +#endif +___ +} +$code.=<<___; .size bn_GF2m_mul_2x2,.-bn_GF2m_mul_2x2 -#if __ARM_ARCH__>=7 +#if __ARM_MAX_ARCH__>=7 .align 5 .LOPENSSL_armcap: .word OPENSSL_armcap_P-(.Lpic+8) @@ -269,10 +272,18 @@ .asciz "GF(2^m) Multiplication for ARMv4/NEON, CRYPTOGAMS by " .align 5 +#if __ARM_MAX_ARCH__>=7 .comm OPENSSL_armcap_P,4,4 +#endif ___ -$code =~ s/\`([^\`]*)\`/eval $1/gem; -$code =~ s/\bbx\s+lr\b/.word\t0xe12fff1e/gm; # make it possible to compile with -march=armv4 -print $code; +foreach (split("\n",$code)) { + s/\`([^\`]*)\`/eval $1/geo; + + s/\bq([0-9]+)#(lo|hi)/sprintf "d%d",2*$1+($2 eq "hi")/geo or + s/\bret\b/bx lr/go or + s/\bbx\s+lr\b/.word\t0xe12fff1e/go; # make it possible to compile with -march=armv4 + + print $_,"\n"; +} close STDOUT; # enforce flush diff --git a/deps/openssl/openssl/crypto/bn/asm/armv4-mont.pl b/deps/openssl/openssl/crypto/bn/asm/armv4-mont.pl index f78a8b5f0f5573..1d330e9f8aa311 100644 --- a/deps/openssl/openssl/crypto/bn/asm/armv4-mont.pl +++ b/deps/openssl/openssl/crypto/bn/asm/armv4-mont.pl @@ -1,7 +1,7 @@ #!/usr/bin/env perl # ==================================================================== -# Written by Andy Polyakov for the OpenSSL +# Written by Andy Polyakov for the OpenSSL # project. The module is, however, dual licensed under OpenSSL and # CRYPTOGAMS licenses depending on where you obtain it. For further # details see http://www.openssl.org/~appro/cryptogams/. @@ -23,6 +23,21 @@ # than 1/2KB. Windows CE port would be trivial, as it's exclusively # about decorations, ABI and instruction syntax are identical. +# November 2013 +# +# Add NEON code path, which handles lengths divisible by 8. RSA/DSA +# performance improvement on Cortex-A8 is ~45-100% depending on key +# length, more for longer keys. On Cortex-A15 the span is ~10-105%. +# On Snapdragon S4 improvement was measured to vary from ~70% to +# incredible ~380%, yes, 4.8x faster, for RSA4096 sign. But this is +# rather because original integer-only code seems to perform +# suboptimally on S4. Situation on Cortex-A9 is unfortunately +# different. It's being looked into, but the trouble is that +# performance for vectors longer than 256 bits is actually couple +# of percent worse than for integer-only code. The code is chosen +# for execution on all NEON-capable processors, because gain on +# others outweighs the marginal loss on Cortex-A9. + while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {} open STDOUT,">$output"; @@ -52,16 +67,40 @@ $_num="$num,#15*4"; $_bpend=$_num; $code=<<___; +#include "arm_arch.h" + .text +.code 32 + +#if __ARM_MAX_ARCH__>=7 +.align 5 +.LOPENSSL_armcap: +.word OPENSSL_armcap_P-bn_mul_mont +#endif .global bn_mul_mont .type bn_mul_mont,%function -.align 2 +.align 5 bn_mul_mont: + ldr ip,[sp,#4] @ load num stmdb sp!,{r0,r2} @ sp points at argument block - ldr $num,[sp,#3*4] @ load num - cmp $num,#2 +#if __ARM_MAX_ARCH__>=7 + tst ip,#7 + bne .Lialu + adr r0,bn_mul_mont + ldr r2,.LOPENSSL_armcap + ldr r0,[r0,r2] + tst r0,#1 @ NEON available? + ldmia sp, {r0,r2} + beq .Lialu + add sp,sp,#8 + b bn_mul8x_mont_neon +.align 4 +.Lialu: +#endif + cmp ip,#2 + mov $num,ip @ load num movlt r0,#0 addlt sp,sp,#2*4 blt .Labrt @@ -191,14 +230,447 @@ ldmia sp!,{r4-r12,lr} @ restore registers add sp,sp,#2*4 @ skip over {r0,r2} mov r0,#1 -.Labrt: tst lr,#1 +.Labrt: +#if __ARM_ARCH__>=5 + ret @ bx lr +#else + tst lr,#1 moveq pc,lr @ be binary compatible with V4, yet bx lr @ interoperable with Thumb ISA:-) +#endif .size bn_mul_mont,.-bn_mul_mont -.asciz "Montgomery multiplication for ARMv4, CRYPTOGAMS by " +___ +{ +sub Dlo() { shift=~m|q([1]?[0-9])|?"d".($1*2):""; } +sub Dhi() { shift=~m|q([1]?[0-9])|?"d".($1*2+1):""; } + +my ($A0,$A1,$A2,$A3)=map("d$_",(0..3)); +my ($N0,$N1,$N2,$N3)=map("d$_",(4..7)); +my ($Z,$Temp)=("q4","q5"); +my ($A0xB,$A1xB,$A2xB,$A3xB,$A4xB,$A5xB,$A6xB,$A7xB)=map("q$_",(6..13)); +my ($Bi,$Ni,$M0)=map("d$_",(28..31)); +my $zero=&Dlo($Z); +my $temp=&Dlo($Temp); + +my ($rptr,$aptr,$bptr,$nptr,$n0,$num)=map("r$_",(0..5)); +my ($tinptr,$toutptr,$inner,$outer)=map("r$_",(6..9)); + +$code.=<<___; +#if __ARM_MAX_ARCH__>=7 +.arch armv7-a +.fpu neon + +.type bn_mul8x_mont_neon,%function +.align 5 +bn_mul8x_mont_neon: + mov ip,sp + stmdb sp!,{r4-r11} + vstmdb sp!,{d8-d15} @ ABI specification says so + ldmia ip,{r4-r5} @ load rest of parameter block + + sub $toutptr,sp,#16 + vld1.32 {${Bi}[0]}, [$bptr,:32]! + sub $toutptr,$toutptr,$num,lsl#4 + vld1.32 {$A0-$A3}, [$aptr]! @ can't specify :32 :-( + and $toutptr,$toutptr,#-64 + vld1.32 {${M0}[0]}, [$n0,:32] + mov sp,$toutptr @ alloca + veor $zero,$zero,$zero + subs $inner,$num,#8 + vzip.16 $Bi,$zero + + vmull.u32 $A0xB,$Bi,${A0}[0] + vmull.u32 $A1xB,$Bi,${A0}[1] + vmull.u32 $A2xB,$Bi,${A1}[0] + vshl.i64 $temp,`&Dhi("$A0xB")`,#16 + vmull.u32 $A3xB,$Bi,${A1}[1] + + vadd.u64 $temp,$temp,`&Dlo("$A0xB")` + veor $zero,$zero,$zero + vmul.u32 $Ni,$temp,$M0 + + vmull.u32 $A4xB,$Bi,${A2}[0] + vld1.32 {$N0-$N3}, [$nptr]! + vmull.u32 $A5xB,$Bi,${A2}[1] + vmull.u32 $A6xB,$Bi,${A3}[0] + vzip.16 $Ni,$zero + vmull.u32 $A7xB,$Bi,${A3}[1] + + bne .LNEON_1st + + @ special case for num=8, everything is in register bank... + + vmlal.u32 $A0xB,$Ni,${N0}[0] + sub $outer,$num,#1 + vmlal.u32 $A1xB,$Ni,${N0}[1] + vmlal.u32 $A2xB,$Ni,${N1}[0] + vmlal.u32 $A3xB,$Ni,${N1}[1] + + vmlal.u32 $A4xB,$Ni,${N2}[0] + vmov $Temp,$A0xB + vmlal.u32 $A5xB,$Ni,${N2}[1] + vmov $A0xB,$A1xB + vmlal.u32 $A6xB,$Ni,${N3}[0] + vmov $A1xB,$A2xB + vmlal.u32 $A7xB,$Ni,${N3}[1] + vmov $A2xB,$A3xB + vmov $A3xB,$A4xB + vshr.u64 $temp,$temp,#16 + vmov $A4xB,$A5xB + vmov $A5xB,$A6xB + vadd.u64 $temp,$temp,`&Dhi("$Temp")` + vmov $A6xB,$A7xB + veor $A7xB,$A7xB + vshr.u64 $temp,$temp,#16 + + b .LNEON_outer8 + +.align 4 +.LNEON_outer8: + vld1.32 {${Bi}[0]}, [$bptr,:32]! + veor $zero,$zero,$zero + vzip.16 $Bi,$zero + vadd.u64 `&Dlo("$A0xB")`,`&Dlo("$A0xB")`,$temp + + vmlal.u32 $A0xB,$Bi,${A0}[0] + vmlal.u32 $A1xB,$Bi,${A0}[1] + vmlal.u32 $A2xB,$Bi,${A1}[0] + vshl.i64 $temp,`&Dhi("$A0xB")`,#16 + vmlal.u32 $A3xB,$Bi,${A1}[1] + + vadd.u64 $temp,$temp,`&Dlo("$A0xB")` + veor $zero,$zero,$zero + subs $outer,$outer,#1 + vmul.u32 $Ni,$temp,$M0 + + vmlal.u32 $A4xB,$Bi,${A2}[0] + vmlal.u32 $A5xB,$Bi,${A2}[1] + vmlal.u32 $A6xB,$Bi,${A3}[0] + vzip.16 $Ni,$zero + vmlal.u32 $A7xB,$Bi,${A3}[1] + + vmlal.u32 $A0xB,$Ni,${N0}[0] + vmlal.u32 $A1xB,$Ni,${N0}[1] + vmlal.u32 $A2xB,$Ni,${N1}[0] + vmlal.u32 $A3xB,$Ni,${N1}[1] + + vmlal.u32 $A4xB,$Ni,${N2}[0] + vmov $Temp,$A0xB + vmlal.u32 $A5xB,$Ni,${N2}[1] + vmov $A0xB,$A1xB + vmlal.u32 $A6xB,$Ni,${N3}[0] + vmov $A1xB,$A2xB + vmlal.u32 $A7xB,$Ni,${N3}[1] + vmov $A2xB,$A3xB + vmov $A3xB,$A4xB + vshr.u64 $temp,$temp,#16 + vmov $A4xB,$A5xB + vmov $A5xB,$A6xB + vadd.u64 $temp,$temp,`&Dhi("$Temp")` + vmov $A6xB,$A7xB + veor $A7xB,$A7xB + vshr.u64 $temp,$temp,#16 + + bne .LNEON_outer8 + + vadd.u64 `&Dlo("$A0xB")`,`&Dlo("$A0xB")`,$temp + mov $toutptr,sp + vshr.u64 $temp,`&Dlo("$A0xB")`,#16 + mov $inner,$num + vadd.u64 `&Dhi("$A0xB")`,`&Dhi("$A0xB")`,$temp + add $tinptr,sp,#16 + vshr.u64 $temp,`&Dhi("$A0xB")`,#16 + vzip.16 `&Dlo("$A0xB")`,`&Dhi("$A0xB")` + + b .LNEON_tail2 + +.align 4 +.LNEON_1st: + vmlal.u32 $A0xB,$Ni,${N0}[0] + vld1.32 {$A0-$A3}, [$aptr]! + vmlal.u32 $A1xB,$Ni,${N0}[1] + subs $inner,$inner,#8 + vmlal.u32 $A2xB,$Ni,${N1}[0] + vmlal.u32 $A3xB,$Ni,${N1}[1] + + vmlal.u32 $A4xB,$Ni,${N2}[0] + vld1.32 {$N0-$N1}, [$nptr]! + vmlal.u32 $A5xB,$Ni,${N2}[1] + vst1.64 {$A0xB-$A1xB}, [$toutptr,:256]! + vmlal.u32 $A6xB,$Ni,${N3}[0] + vmlal.u32 $A7xB,$Ni,${N3}[1] + vst1.64 {$A2xB-$A3xB}, [$toutptr,:256]! + + vmull.u32 $A0xB,$Bi,${A0}[0] + vld1.32 {$N2-$N3}, [$nptr]! + vmull.u32 $A1xB,$Bi,${A0}[1] + vst1.64 {$A4xB-$A5xB}, [$toutptr,:256]! + vmull.u32 $A2xB,$Bi,${A1}[0] + vmull.u32 $A3xB,$Bi,${A1}[1] + vst1.64 {$A6xB-$A7xB}, [$toutptr,:256]! + + vmull.u32 $A4xB,$Bi,${A2}[0] + vmull.u32 $A5xB,$Bi,${A2}[1] + vmull.u32 $A6xB,$Bi,${A3}[0] + vmull.u32 $A7xB,$Bi,${A3}[1] + + bne .LNEON_1st + + vmlal.u32 $A0xB,$Ni,${N0}[0] + add $tinptr,sp,#16 + vmlal.u32 $A1xB,$Ni,${N0}[1] + sub $aptr,$aptr,$num,lsl#2 @ rewind $aptr + vmlal.u32 $A2xB,$Ni,${N1}[0] + vld1.64 {$Temp}, [sp,:128] + vmlal.u32 $A3xB,$Ni,${N1}[1] + sub $outer,$num,#1 + + vmlal.u32 $A4xB,$Ni,${N2}[0] + vst1.64 {$A0xB-$A1xB}, [$toutptr,:256]! + vmlal.u32 $A5xB,$Ni,${N2}[1] + vshr.u64 $temp,$temp,#16 + vld1.64 {$A0xB}, [$tinptr, :128]! + vmlal.u32 $A6xB,$Ni,${N3}[0] + vst1.64 {$A2xB-$A3xB}, [$toutptr,:256]! + vmlal.u32 $A7xB,$Ni,${N3}[1] + + vst1.64 {$A4xB-$A5xB}, [$toutptr,:256]! + vadd.u64 $temp,$temp,`&Dhi("$Temp")` + veor $Z,$Z,$Z + vst1.64 {$A6xB-$A7xB}, [$toutptr,:256]! + vld1.64 {$A1xB-$A2xB}, [$tinptr, :256]! + vst1.64 {$Z}, [$toutptr,:128] + vshr.u64 $temp,$temp,#16 + + b .LNEON_outer + +.align 4 +.LNEON_outer: + vld1.32 {${Bi}[0]}, [$bptr,:32]! + sub $nptr,$nptr,$num,lsl#2 @ rewind $nptr + vld1.32 {$A0-$A3}, [$aptr]! + veor $zero,$zero,$zero + mov $toutptr,sp + vzip.16 $Bi,$zero + sub $inner,$num,#8 + vadd.u64 `&Dlo("$A0xB")`,`&Dlo("$A0xB")`,$temp + + vmlal.u32 $A0xB,$Bi,${A0}[0] + vld1.64 {$A3xB-$A4xB},[$tinptr,:256]! + vmlal.u32 $A1xB,$Bi,${A0}[1] + vmlal.u32 $A2xB,$Bi,${A1}[0] + vld1.64 {$A5xB-$A6xB},[$tinptr,:256]! + vmlal.u32 $A3xB,$Bi,${A1}[1] + + vshl.i64 $temp,`&Dhi("$A0xB")`,#16 + veor $zero,$zero,$zero + vadd.u64 $temp,$temp,`&Dlo("$A0xB")` + vld1.64 {$A7xB},[$tinptr,:128]! + vmul.u32 $Ni,$temp,$M0 + + vmlal.u32 $A4xB,$Bi,${A2}[0] + vld1.32 {$N0-$N3}, [$nptr]! + vmlal.u32 $A5xB,$Bi,${A2}[1] + vmlal.u32 $A6xB,$Bi,${A3}[0] + vzip.16 $Ni,$zero + vmlal.u32 $A7xB,$Bi,${A3}[1] + +.LNEON_inner: + vmlal.u32 $A0xB,$Ni,${N0}[0] + vld1.32 {$A0-$A3}, [$aptr]! + vmlal.u32 $A1xB,$Ni,${N0}[1] + subs $inner,$inner,#8 + vmlal.u32 $A2xB,$Ni,${N1}[0] + vmlal.u32 $A3xB,$Ni,${N1}[1] + vst1.64 {$A0xB-$A1xB}, [$toutptr,:256]! + + vmlal.u32 $A4xB,$Ni,${N2}[0] + vld1.64 {$A0xB}, [$tinptr, :128]! + vmlal.u32 $A5xB,$Ni,${N2}[1] + vst1.64 {$A2xB-$A3xB}, [$toutptr,:256]! + vmlal.u32 $A6xB,$Ni,${N3}[0] + vld1.64 {$A1xB-$A2xB}, [$tinptr, :256]! + vmlal.u32 $A7xB,$Ni,${N3}[1] + vst1.64 {$A4xB-$A5xB}, [$toutptr,:256]! + + vmlal.u32 $A0xB,$Bi,${A0}[0] + vld1.64 {$A3xB-$A4xB}, [$tinptr, :256]! + vmlal.u32 $A1xB,$Bi,${A0}[1] + vst1.64 {$A6xB-$A7xB}, [$toutptr,:256]! + vmlal.u32 $A2xB,$Bi,${A1}[0] + vld1.64 {$A5xB-$A6xB}, [$tinptr, :256]! + vmlal.u32 $A3xB,$Bi,${A1}[1] + vld1.32 {$N0-$N3}, [$nptr]! + + vmlal.u32 $A4xB,$Bi,${A2}[0] + vld1.64 {$A7xB}, [$tinptr, :128]! + vmlal.u32 $A5xB,$Bi,${A2}[1] + vmlal.u32 $A6xB,$Bi,${A3}[0] + vmlal.u32 $A7xB,$Bi,${A3}[1] + + bne .LNEON_inner + + vmlal.u32 $A0xB,$Ni,${N0}[0] + add $tinptr,sp,#16 + vmlal.u32 $A1xB,$Ni,${N0}[1] + sub $aptr,$aptr,$num,lsl#2 @ rewind $aptr + vmlal.u32 $A2xB,$Ni,${N1}[0] + vld1.64 {$Temp}, [sp,:128] + vmlal.u32 $A3xB,$Ni,${N1}[1] + subs $outer,$outer,#1 + + vmlal.u32 $A4xB,$Ni,${N2}[0] + vst1.64 {$A0xB-$A1xB}, [$toutptr,:256]! + vmlal.u32 $A5xB,$Ni,${N2}[1] + vld1.64 {$A0xB}, [$tinptr, :128]! + vshr.u64 $temp,$temp,#16 + vst1.64 {$A2xB-$A3xB}, [$toutptr,:256]! + vmlal.u32 $A6xB,$Ni,${N3}[0] + vld1.64 {$A1xB-$A2xB}, [$tinptr, :256]! + vmlal.u32 $A7xB,$Ni,${N3}[1] + + vst1.64 {$A4xB-$A5xB}, [$toutptr,:256]! + vadd.u64 $temp,$temp,`&Dhi("$Temp")` + vst1.64 {$A6xB-$A7xB}, [$toutptr,:256]! + vshr.u64 $temp,$temp,#16 + + bne .LNEON_outer + + mov $toutptr,sp + mov $inner,$num + +.LNEON_tail: + vadd.u64 `&Dlo("$A0xB")`,`&Dlo("$A0xB")`,$temp + vld1.64 {$A3xB-$A4xB}, [$tinptr, :256]! + vshr.u64 $temp,`&Dlo("$A0xB")`,#16 + vadd.u64 `&Dhi("$A0xB")`,`&Dhi("$A0xB")`,$temp + vld1.64 {$A5xB-$A6xB}, [$tinptr, :256]! + vshr.u64 $temp,`&Dhi("$A0xB")`,#16 + vld1.64 {$A7xB}, [$tinptr, :128]! + vzip.16 `&Dlo("$A0xB")`,`&Dhi("$A0xB")` + +.LNEON_tail2: + vadd.u64 `&Dlo("$A1xB")`,`&Dlo("$A1xB")`,$temp + vst1.32 {`&Dlo("$A0xB")`[0]}, [$toutptr, :32]! + vshr.u64 $temp,`&Dlo("$A1xB")`,#16 + vadd.u64 `&Dhi("$A1xB")`,`&Dhi("$A1xB")`,$temp + vshr.u64 $temp,`&Dhi("$A1xB")`,#16 + vzip.16 `&Dlo("$A1xB")`,`&Dhi("$A1xB")` + + vadd.u64 `&Dlo("$A2xB")`,`&Dlo("$A2xB")`,$temp + vst1.32 {`&Dlo("$A1xB")`[0]}, [$toutptr, :32]! + vshr.u64 $temp,`&Dlo("$A2xB")`,#16 + vadd.u64 `&Dhi("$A2xB")`,`&Dhi("$A2xB")`,$temp + vshr.u64 $temp,`&Dhi("$A2xB")`,#16 + vzip.16 `&Dlo("$A2xB")`,`&Dhi("$A2xB")` + + vadd.u64 `&Dlo("$A3xB")`,`&Dlo("$A3xB")`,$temp + vst1.32 {`&Dlo("$A2xB")`[0]}, [$toutptr, :32]! + vshr.u64 $temp,`&Dlo("$A3xB")`,#16 + vadd.u64 `&Dhi("$A3xB")`,`&Dhi("$A3xB")`,$temp + vshr.u64 $temp,`&Dhi("$A3xB")`,#16 + vzip.16 `&Dlo("$A3xB")`,`&Dhi("$A3xB")` + + vadd.u64 `&Dlo("$A4xB")`,`&Dlo("$A4xB")`,$temp + vst1.32 {`&Dlo("$A3xB")`[0]}, [$toutptr, :32]! + vshr.u64 $temp,`&Dlo("$A4xB")`,#16 + vadd.u64 `&Dhi("$A4xB")`,`&Dhi("$A4xB")`,$temp + vshr.u64 $temp,`&Dhi("$A4xB")`,#16 + vzip.16 `&Dlo("$A4xB")`,`&Dhi("$A4xB")` + + vadd.u64 `&Dlo("$A5xB")`,`&Dlo("$A5xB")`,$temp + vst1.32 {`&Dlo("$A4xB")`[0]}, [$toutptr, :32]! + vshr.u64 $temp,`&Dlo("$A5xB")`,#16 + vadd.u64 `&Dhi("$A5xB")`,`&Dhi("$A5xB")`,$temp + vshr.u64 $temp,`&Dhi("$A5xB")`,#16 + vzip.16 `&Dlo("$A5xB")`,`&Dhi("$A5xB")` + + vadd.u64 `&Dlo("$A6xB")`,`&Dlo("$A6xB")`,$temp + vst1.32 {`&Dlo("$A5xB")`[0]}, [$toutptr, :32]! + vshr.u64 $temp,`&Dlo("$A6xB")`,#16 + vadd.u64 `&Dhi("$A6xB")`,`&Dhi("$A6xB")`,$temp + vld1.64 {$A0xB}, [$tinptr, :128]! + vshr.u64 $temp,`&Dhi("$A6xB")`,#16 + vzip.16 `&Dlo("$A6xB")`,`&Dhi("$A6xB")` + + vadd.u64 `&Dlo("$A7xB")`,`&Dlo("$A7xB")`,$temp + vst1.32 {`&Dlo("$A6xB")`[0]}, [$toutptr, :32]! + vshr.u64 $temp,`&Dlo("$A7xB")`,#16 + vadd.u64 `&Dhi("$A7xB")`,`&Dhi("$A7xB")`,$temp + vld1.64 {$A1xB-$A2xB}, [$tinptr, :256]! + vshr.u64 $temp,`&Dhi("$A7xB")`,#16 + vzip.16 `&Dlo("$A7xB")`,`&Dhi("$A7xB")` + subs $inner,$inner,#8 + vst1.32 {`&Dlo("$A7xB")`[0]}, [$toutptr, :32]! + + bne .LNEON_tail + + vst1.32 {${temp}[0]}, [$toutptr, :32] @ top-most bit + sub $nptr,$nptr,$num,lsl#2 @ rewind $nptr + subs $aptr,sp,#0 @ clear carry flag + add $bptr,sp,$num,lsl#2 + +.LNEON_sub: + ldmia $aptr!, {r4-r7} + ldmia $nptr!, {r8-r11} + sbcs r8, r4,r8 + sbcs r9, r5,r9 + sbcs r10,r6,r10 + sbcs r11,r7,r11 + teq $aptr,$bptr @ preserves carry + stmia $rptr!, {r8-r11} + bne .LNEON_sub + + ldr r10, [$aptr] @ load top-most bit + veor q0,q0,q0 + sub r11,$bptr,sp @ this is num*4 + veor q1,q1,q1 + mov $aptr,sp + sub $rptr,$rptr,r11 @ rewind $rptr + mov $nptr,$bptr @ second 3/4th of frame + sbcs r10,r10,#0 @ result is carry flag + +.LNEON_copy_n_zap: + ldmia $aptr!, {r4-r7} + ldmia $rptr, {r8-r11} + movcc r8, r4 + vst1.64 {q0-q1}, [$nptr,:256]! @ wipe + movcc r9, r5 + movcc r10,r6 + vst1.64 {q0-q1}, [$nptr,:256]! @ wipe + movcc r11,r7 + ldmia $aptr, {r4-r7} + stmia $rptr!, {r8-r11} + sub $aptr,$aptr,#16 + ldmia $rptr, {r8-r11} + movcc r8, r4 + vst1.64 {q0-q1}, [$aptr,:256]! @ wipe + movcc r9, r5 + movcc r10,r6 + vst1.64 {q0-q1}, [$nptr,:256]! @ wipe + movcc r11,r7 + teq $aptr,$bptr @ preserves carry + stmia $rptr!, {r8-r11} + bne .LNEON_copy_n_zap + + sub sp,ip,#96 + vldmia sp!,{d8-d15} + ldmia sp!,{r4-r11} + ret @ bx lr +.size bn_mul8x_mont_neon,.-bn_mul8x_mont_neon +#endif +___ +} +$code.=<<___; +.asciz "Montgomery multiplication for ARMv4/NEON, CRYPTOGAMS by " .align 2 +#if __ARM_MAX_ARCH__>=7 +.comm OPENSSL_armcap_P,4,4 +#endif ___ +$code =~ s/\`([^\`]*)\`/eval $1/gem; $code =~ s/\bbx\s+lr\b/.word\t0xe12fff1e/gm; # make it possible to compile with -march=armv4 +$code =~ s/\bret\b/bx lr/gm; print $code; close STDOUT; diff --git a/deps/openssl/openssl/crypto/bn/asm/mips-mont.pl b/deps/openssl/openssl/crypto/bn/asm/mips-mont.pl index caae04ed3a8644..a33cdf411121ff 100644 --- a/deps/openssl/openssl/crypto/bn/asm/mips-mont.pl +++ b/deps/openssl/openssl/crypto/bn/asm/mips-mont.pl @@ -46,7 +46,7 @@ # ($s0,$s1,$s2,$s3,$s4,$s5,$s6,$s7)=map("\$$_",(16..23)); # ($gp,$sp,$fp,$ra)=map("\$$_",(28..31)); # -$flavour = shift; # supported flavours are o32,n32,64,nubi32,nubi64 +$flavour = shift || "o32"; # supported flavours are o32,n32,64,nubi32,nubi64 if ($flavour =~ /64|n32/i) { $PTR_ADD="dadd"; # incidentally works even on n32 diff --git a/deps/openssl/openssl/crypto/bn/asm/mips.pl b/deps/openssl/openssl/crypto/bn/asm/mips.pl index 215c9a7483257b..acafde5e5685cc 100644 --- a/deps/openssl/openssl/crypto/bn/asm/mips.pl +++ b/deps/openssl/openssl/crypto/bn/asm/mips.pl @@ -48,7 +48,7 @@ # has to content with 40-85% improvement depending on benchmark and # key length, more for longer keys. -$flavour = shift; +$flavour = shift || "o32"; while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {} open STDOUT,">$output"; diff --git a/deps/openssl/openssl/crypto/bn/asm/mips3.s b/deps/openssl/openssl/crypto/bn/asm/mips3.s new file mode 100644 index 00000000000000..dca4105c7db1b4 --- /dev/null +++ b/deps/openssl/openssl/crypto/bn/asm/mips3.s @@ -0,0 +1,2201 @@ +.rdata +.asciiz "mips3.s, Version 1.1" +.asciiz "MIPS III/IV ISA artwork by Andy Polyakov " + +/* + * ==================================================================== + * Written by Andy Polyakov for the OpenSSL + * project. + * + * Rights for redistribution and usage in source and binary forms are + * granted according to the OpenSSL license. Warranty of any kind is + * disclaimed. + * ==================================================================== + */ + +/* + * This is my modest contributon to the OpenSSL project (see + * http://www.openssl.org/ for more information about it) and is + * a drop-in MIPS III/IV ISA replacement for crypto/bn/bn_asm.c + * module. For updates see http://fy.chalmers.se/~appro/hpe/. + * + * The module is designed to work with either of the "new" MIPS ABI(5), + * namely N32 or N64, offered by IRIX 6.x. It's not ment to work under + * IRIX 5.x not only because it doesn't support new ABIs but also + * because 5.x kernels put R4x00 CPU into 32-bit mode and all those + * 64-bit instructions (daddu, dmultu, etc.) found below gonna only + * cause illegal instruction exception:-( + * + * In addition the code depends on preprocessor flags set up by MIPSpro + * compiler driver (either as or cc) and therefore (probably?) can't be + * compiled by the GNU assembler. GNU C driver manages fine though... + * I mean as long as -mmips-as is specified or is the default option, + * because then it simply invokes /usr/bin/as which in turn takes + * perfect care of the preprocessor definitions. Another neat feature + * offered by the MIPSpro assembler is an optimization pass. This gave + * me the opportunity to have the code looking more regular as all those + * architecture dependent instruction rescheduling details were left to + * the assembler. Cool, huh? + * + * Performance improvement is astonishing! 'apps/openssl speed rsa dsa' + * goes way over 3 times faster! + * + * + */ +#include +#include + +#if _MIPS_ISA>=4 +#define MOVNZ(cond,dst,src) \ + movn dst,src,cond +#else +#define MOVNZ(cond,dst,src) \ + .set noreorder; \ + bnezl cond,.+8; \ + move dst,src; \ + .set reorder +#endif + +.text + +.set noat +.set reorder + +#define MINUS4 v1 + +.align 5 +LEAF(bn_mul_add_words) + .set noreorder + bgtzl a2,.L_bn_mul_add_words_proceed + ld t0,0(a1) + jr ra + move v0,zero + .set reorder + +.L_bn_mul_add_words_proceed: + li MINUS4,-4 + and ta0,a2,MINUS4 + move v0,zero + beqz ta0,.L_bn_mul_add_words_tail + +.L_bn_mul_add_words_loop: + dmultu t0,a3 + ld t1,0(a0) + ld t2,8(a1) + ld t3,8(a0) + ld ta0,16(a1) + ld ta1,16(a0) + daddu t1,v0 + sltu v0,t1,v0 /* All manuals say it "compares 32-bit + * values", but it seems to work fine + * even on 64-bit registers. */ + mflo AT + mfhi t0 + daddu t1,AT + daddu v0,t0 + sltu AT,t1,AT + sd t1,0(a0) + daddu v0,AT + + dmultu t2,a3 + ld ta2,24(a1) + ld ta3,24(a0) + daddu t3,v0 + sltu v0,t3,v0 + mflo AT + mfhi t2 + daddu t3,AT + daddu v0,t2 + sltu AT,t3,AT + sd t3,8(a0) + daddu v0,AT + + dmultu ta0,a3 + subu a2,4 + PTR_ADD a0,32 + PTR_ADD a1,32 + daddu ta1,v0 + sltu v0,ta1,v0 + mflo AT + mfhi ta0 + daddu ta1,AT + daddu v0,ta0 + sltu AT,ta1,AT + sd ta1,-16(a0) + daddu v0,AT + + + dmultu ta2,a3 + and ta0,a2,MINUS4 + daddu ta3,v0 + sltu v0,ta3,v0 + mflo AT + mfhi ta2 + daddu ta3,AT + daddu v0,ta2 + sltu AT,ta3,AT + sd ta3,-8(a0) + daddu v0,AT + .set noreorder + bgtzl ta0,.L_bn_mul_add_words_loop + ld t0,0(a1) + + bnezl a2,.L_bn_mul_add_words_tail + ld t0,0(a1) + .set reorder + +.L_bn_mul_add_words_return: + jr ra + +.L_bn_mul_add_words_tail: + dmultu t0,a3 + ld t1,0(a0) + subu a2,1 + daddu t1,v0 + sltu v0,t1,v0 + mflo AT + mfhi t0 + daddu t1,AT + daddu v0,t0 + sltu AT,t1,AT + sd t1,0(a0) + daddu v0,AT + beqz a2,.L_bn_mul_add_words_return + + ld t0,8(a1) + dmultu t0,a3 + ld t1,8(a0) + subu a2,1 + daddu t1,v0 + sltu v0,t1,v0 + mflo AT + mfhi t0 + daddu t1,AT + daddu v0,t0 + sltu AT,t1,AT + sd t1,8(a0) + daddu v0,AT + beqz a2,.L_bn_mul_add_words_return + + ld t0,16(a1) + dmultu t0,a3 + ld t1,16(a0) + daddu t1,v0 + sltu v0,t1,v0 + mflo AT + mfhi t0 + daddu t1,AT + daddu v0,t0 + sltu AT,t1,AT + sd t1,16(a0) + daddu v0,AT + jr ra +END(bn_mul_add_words) + +.align 5 +LEAF(bn_mul_words) + .set noreorder + bgtzl a2,.L_bn_mul_words_proceed + ld t0,0(a1) + jr ra + move v0,zero + .set reorder + +.L_bn_mul_words_proceed: + li MINUS4,-4 + and ta0,a2,MINUS4 + move v0,zero + beqz ta0,.L_bn_mul_words_tail + +.L_bn_mul_words_loop: + dmultu t0,a3 + ld t2,8(a1) + ld ta0,16(a1) + ld ta2,24(a1) + mflo AT + mfhi t0 + daddu v0,AT + sltu t1,v0,AT + sd v0,0(a0) + daddu v0,t1,t0 + + dmultu t2,a3 + subu a2,4 + PTR_ADD a0,32 + PTR_ADD a1,32 + mflo AT + mfhi t2 + daddu v0,AT + sltu t3,v0,AT + sd v0,-24(a0) + daddu v0,t3,t2 + + dmultu ta0,a3 + mflo AT + mfhi ta0 + daddu v0,AT + sltu ta1,v0,AT + sd v0,-16(a0) + daddu v0,ta1,ta0 + + + dmultu ta2,a3 + and ta0,a2,MINUS4 + mflo AT + mfhi ta2 + daddu v0,AT + sltu ta3,v0,AT + sd v0,-8(a0) + daddu v0,ta3,ta2 + .set noreorder + bgtzl ta0,.L_bn_mul_words_loop + ld t0,0(a1) + + bnezl a2,.L_bn_mul_words_tail + ld t0,0(a1) + .set reorder + +.L_bn_mul_words_return: + jr ra + +.L_bn_mul_words_tail: + dmultu t0,a3 + subu a2,1 + mflo AT + mfhi t0 + daddu v0,AT + sltu t1,v0,AT + sd v0,0(a0) + daddu v0,t1,t0 + beqz a2,.L_bn_mul_words_return + + ld t0,8(a1) + dmultu t0,a3 + subu a2,1 + mflo AT + mfhi t0 + daddu v0,AT + sltu t1,v0,AT + sd v0,8(a0) + daddu v0,t1,t0 + beqz a2,.L_bn_mul_words_return + + ld t0,16(a1) + dmultu t0,a3 + mflo AT + mfhi t0 + daddu v0,AT + sltu t1,v0,AT + sd v0,16(a0) + daddu v0,t1,t0 + jr ra +END(bn_mul_words) + +.align 5 +LEAF(bn_sqr_words) + .set noreorder + bgtzl a2,.L_bn_sqr_words_proceed + ld t0,0(a1) + jr ra + move v0,zero + .set reorder + +.L_bn_sqr_words_proceed: + li MINUS4,-4 + and ta0,a2,MINUS4 + move v0,zero + beqz ta0,.L_bn_sqr_words_tail + +.L_bn_sqr_words_loop: + dmultu t0,t0 + ld t2,8(a1) + ld ta0,16(a1) + ld ta2,24(a1) + mflo t1 + mfhi t0 + sd t1,0(a0) + sd t0,8(a0) + + dmultu t2,t2 + subu a2,4 + PTR_ADD a0,64 + PTR_ADD a1,32 + mflo t3 + mfhi t2 + sd t3,-48(a0) + sd t2,-40(a0) + + dmultu ta0,ta0 + mflo ta1 + mfhi ta0 + sd ta1,-32(a0) + sd ta0,-24(a0) + + + dmultu ta2,ta2 + and ta0,a2,MINUS4 + mflo ta3 + mfhi ta2 + sd ta3,-16(a0) + sd ta2,-8(a0) + + .set noreorder + bgtzl ta0,.L_bn_sqr_words_loop + ld t0,0(a1) + + bnezl a2,.L_bn_sqr_words_tail + ld t0,0(a1) + .set reorder + +.L_bn_sqr_words_return: + move v0,zero + jr ra + +.L_bn_sqr_words_tail: + dmultu t0,t0 + subu a2,1 + mflo t1 + mfhi t0 + sd t1,0(a0) + sd t0,8(a0) + beqz a2,.L_bn_sqr_words_return + + ld t0,8(a1) + dmultu t0,t0 + subu a2,1 + mflo t1 + mfhi t0 + sd t1,16(a0) + sd t0,24(a0) + beqz a2,.L_bn_sqr_words_return + + ld t0,16(a1) + dmultu t0,t0 + mflo t1 + mfhi t0 + sd t1,32(a0) + sd t0,40(a0) + jr ra +END(bn_sqr_words) + +.align 5 +LEAF(bn_add_words) + .set noreorder + bgtzl a3,.L_bn_add_words_proceed + ld t0,0(a1) + jr ra + move v0,zero + .set reorder + +.L_bn_add_words_proceed: + li MINUS4,-4 + and AT,a3,MINUS4 + move v0,zero + beqz AT,.L_bn_add_words_tail + +.L_bn_add_words_loop: + ld ta0,0(a2) + subu a3,4 + ld t1,8(a1) + and AT,a3,MINUS4 + ld t2,16(a1) + PTR_ADD a2,32 + ld t3,24(a1) + PTR_ADD a0,32 + ld ta1,-24(a2) + PTR_ADD a1,32 + ld ta2,-16(a2) + ld ta3,-8(a2) + daddu ta0,t0 + sltu t8,ta0,t0 + daddu t0,ta0,v0 + sltu v0,t0,ta0 + sd t0,-32(a0) + daddu v0,t8 + + daddu ta1,t1 + sltu t9,ta1,t1 + daddu t1,ta1,v0 + sltu v0,t1,ta1 + sd t1,-24(a0) + daddu v0,t9 + + daddu ta2,t2 + sltu t8,ta2,t2 + daddu t2,ta2,v0 + sltu v0,t2,ta2 + sd t2,-16(a0) + daddu v0,t8 + + daddu ta3,t3 + sltu t9,ta3,t3 + daddu t3,ta3,v0 + sltu v0,t3,ta3 + sd t3,-8(a0) + daddu v0,t9 + + .set noreorder + bgtzl AT,.L_bn_add_words_loop + ld t0,0(a1) + + bnezl a3,.L_bn_add_words_tail + ld t0,0(a1) + .set reorder + +.L_bn_add_words_return: + jr ra + +.L_bn_add_words_tail: + ld ta0,0(a2) + daddu ta0,t0 + subu a3,1 + sltu t8,ta0,t0 + daddu t0,ta0,v0 + sltu v0,t0,ta0 + sd t0,0(a0) + daddu v0,t8 + beqz a3,.L_bn_add_words_return + + ld t1,8(a1) + ld ta1,8(a2) + daddu ta1,t1 + subu a3,1 + sltu t9,ta1,t1 + daddu t1,ta1,v0 + sltu v0,t1,ta1 + sd t1,8(a0) + daddu v0,t9 + beqz a3,.L_bn_add_words_return + + ld t2,16(a1) + ld ta2,16(a2) + daddu ta2,t2 + sltu t8,ta2,t2 + daddu t2,ta2,v0 + sltu v0,t2,ta2 + sd t2,16(a0) + daddu v0,t8 + jr ra +END(bn_add_words) + +.align 5 +LEAF(bn_sub_words) + .set noreorder + bgtzl a3,.L_bn_sub_words_proceed + ld t0,0(a1) + jr ra + move v0,zero + .set reorder + +.L_bn_sub_words_proceed: + li MINUS4,-4 + and AT,a3,MINUS4 + move v0,zero + beqz AT,.L_bn_sub_words_tail + +.L_bn_sub_words_loop: + ld ta0,0(a2) + subu a3,4 + ld t1,8(a1) + and AT,a3,MINUS4 + ld t2,16(a1) + PTR_ADD a2,32 + ld t3,24(a1) + PTR_ADD a0,32 + ld ta1,-24(a2) + PTR_ADD a1,32 + ld ta2,-16(a2) + ld ta3,-8(a2) + sltu t8,t0,ta0 + dsubu t0,ta0 + dsubu ta0,t0,v0 + sd ta0,-32(a0) + MOVNZ (t0,v0,t8) + + sltu t9,t1,ta1 + dsubu t1,ta1 + dsubu ta1,t1,v0 + sd ta1,-24(a0) + MOVNZ (t1,v0,t9) + + + sltu t8,t2,ta2 + dsubu t2,ta2 + dsubu ta2,t2,v0 + sd ta2,-16(a0) + MOVNZ (t2,v0,t8) + + sltu t9,t3,ta3 + dsubu t3,ta3 + dsubu ta3,t3,v0 + sd ta3,-8(a0) + MOVNZ (t3,v0,t9) + + .set noreorder + bgtzl AT,.L_bn_sub_words_loop + ld t0,0(a1) + + bnezl a3,.L_bn_sub_words_tail + ld t0,0(a1) + .set reorder + +.L_bn_sub_words_return: + jr ra + +.L_bn_sub_words_tail: + ld ta0,0(a2) + subu a3,1 + sltu t8,t0,ta0 + dsubu t0,ta0 + dsubu ta0,t0,v0 + MOVNZ (t0,v0,t8) + sd ta0,0(a0) + beqz a3,.L_bn_sub_words_return + + ld t1,8(a1) + subu a3,1 + ld ta1,8(a2) + sltu t9,t1,ta1 + dsubu t1,ta1 + dsubu ta1,t1,v0 + MOVNZ (t1,v0,t9) + sd ta1,8(a0) + beqz a3,.L_bn_sub_words_return + + ld t2,16(a1) + ld ta2,16(a2) + sltu t8,t2,ta2 + dsubu t2,ta2 + dsubu ta2,t2,v0 + MOVNZ (t2,v0,t8) + sd ta2,16(a0) + jr ra +END(bn_sub_words) + +#undef MINUS4 + +.align 5 +LEAF(bn_div_3_words) + .set reorder + move a3,a0 /* we know that bn_div_words doesn't + * touch a3, ta2, ta3 and preserves a2 + * so that we can save two arguments + * and return address in registers + * instead of stack:-) + */ + ld a0,(a3) + move ta2,a1 + ld a1,-8(a3) + bne a0,a2,.L_bn_div_3_words_proceed + li v0,-1 + jr ra +.L_bn_div_3_words_proceed: + move ta3,ra + bal bn_div_words + move ra,ta3 + dmultu ta2,v0 + ld t2,-16(a3) + move ta0,zero + mfhi t1 + mflo t0 + sltu t8,t1,v1 +.L_bn_div_3_words_inner_loop: + bnez t8,.L_bn_div_3_words_inner_loop_done + sgeu AT,t2,t0 + seq t9,t1,v1 + and AT,t9 + sltu t3,t0,ta2 + daddu v1,a2 + dsubu t1,t3 + dsubu t0,ta2 + sltu t8,t1,v1 + sltu ta0,v1,a2 + or t8,ta0 + .set noreorder + beqzl AT,.L_bn_div_3_words_inner_loop + dsubu v0,1 + .set reorder +.L_bn_div_3_words_inner_loop_done: + jr ra +END(bn_div_3_words) + +.align 5 +LEAF(bn_div_words) + .set noreorder + bnezl a2,.L_bn_div_words_proceed + move v1,zero + jr ra + li v0,-1 /* I'd rather signal div-by-zero + * which can be done with 'break 7' */ + +.L_bn_div_words_proceed: + bltz a2,.L_bn_div_words_body + move t9,v1 + dsll a2,1 + bgtz a2,.-4 + addu t9,1 + + .set reorder + negu t1,t9 + li t2,-1 + dsll t2,t1 + and t2,a0 + dsrl AT,a1,t1 + .set noreorder + bnezl t2,.+8 + break 6 /* signal overflow */ + .set reorder + dsll a0,t9 + dsll a1,t9 + or a0,AT + +#define QT ta0 +#define HH ta1 +#define DH v1 +.L_bn_div_words_body: + dsrl DH,a2,32 + sgeu AT,a0,a2 + .set noreorder + bnezl AT,.+8 + dsubu a0,a2 + .set reorder + + li QT,-1 + dsrl HH,a0,32 + dsrl QT,32 /* q=0xffffffff */ + beq DH,HH,.L_bn_div_words_skip_div1 + ddivu zero,a0,DH + mflo QT +.L_bn_div_words_skip_div1: + dmultu a2,QT + dsll t3,a0,32 + dsrl AT,a1,32 + or t3,AT + mflo t0 + mfhi t1 +.L_bn_div_words_inner_loop1: + sltu t2,t3,t0 + seq t8,HH,t1 + sltu AT,HH,t1 + and t2,t8 + sltu v0,t0,a2 + or AT,t2 + .set noreorder + beqz AT,.L_bn_div_words_inner_loop1_done + dsubu t1,v0 + dsubu t0,a2 + b .L_bn_div_words_inner_loop1 + dsubu QT,1 + .set reorder +.L_bn_div_words_inner_loop1_done: + + dsll a1,32 + dsubu a0,t3,t0 + dsll v0,QT,32 + + li QT,-1 + dsrl HH,a0,32 + dsrl QT,32 /* q=0xffffffff */ + beq DH,HH,.L_bn_div_words_skip_div2 + ddivu zero,a0,DH + mflo QT +.L_bn_div_words_skip_div2: +#undef DH + dmultu a2,QT + dsll t3,a0,32 + dsrl AT,a1,32 + or t3,AT + mflo t0 + mfhi t1 +.L_bn_div_words_inner_loop2: + sltu t2,t3,t0 + seq t8,HH,t1 + sltu AT,HH,t1 + and t2,t8 + sltu v1,t0,a2 + or AT,t2 + .set noreorder + beqz AT,.L_bn_div_words_inner_loop2_done + dsubu t1,v1 + dsubu t0,a2 + b .L_bn_div_words_inner_loop2 + dsubu QT,1 + .set reorder +.L_bn_div_words_inner_loop2_done: +#undef HH + + dsubu a0,t3,t0 + or v0,QT + dsrl v1,a0,t9 /* v1 contains remainder if anybody wants it */ + dsrl a2,t9 /* restore a2 */ + jr ra +#undef QT +END(bn_div_words) + +#define a_0 t0 +#define a_1 t1 +#define a_2 t2 +#define a_3 t3 +#define b_0 ta0 +#define b_1 ta1 +#define b_2 ta2 +#define b_3 ta3 + +#define a_4 s0 +#define a_5 s2 +#define a_6 s4 +#define a_7 a1 /* once we load a[7] we don't need a anymore */ +#define b_4 s1 +#define b_5 s3 +#define b_6 s5 +#define b_7 a2 /* once we load b[7] we don't need b anymore */ + +#define t_1 t8 +#define t_2 t9 + +#define c_1 v0 +#define c_2 v1 +#define c_3 a3 + +#define FRAME_SIZE 48 + +.align 5 +LEAF(bn_mul_comba8) + .set noreorder + PTR_SUB sp,FRAME_SIZE + .frame sp,64,ra + .set reorder + ld a_0,0(a1) /* If compiled with -mips3 option on + * R5000 box assembler barks on this + * line with "shouldn't have mult/div + * as last instruction in bb (R10K + * bug)" warning. If anybody out there + * has a clue about how to circumvent + * this do send me a note. + * + */ + ld b_0,0(a2) + ld a_1,8(a1) + ld a_2,16(a1) + ld a_3,24(a1) + ld b_1,8(a2) + ld b_2,16(a2) + ld b_3,24(a2) + dmultu a_0,b_0 /* mul_add_c(a[0],b[0],c1,c2,c3); */ + sd s0,0(sp) + sd s1,8(sp) + sd s2,16(sp) + sd s3,24(sp) + sd s4,32(sp) + sd s5,40(sp) + mflo c_1 + mfhi c_2 + + dmultu a_0,b_1 /* mul_add_c(a[0],b[1],c2,c3,c1); */ + ld a_4,32(a1) + ld a_5,40(a1) + ld a_6,48(a1) + ld a_7,56(a1) + ld b_4,32(a2) + ld b_5,40(a2) + mflo t_1 + mfhi t_2 + daddu c_2,t_1 + sltu AT,c_2,t_1 + daddu c_3,t_2,AT + dmultu a_1,b_0 /* mul_add_c(a[1],b[0],c2,c3,c1); */ + ld b_6,48(a2) + ld b_7,56(a2) + sd c_1,0(a0) /* r[0]=c1; */ + mflo t_1 + mfhi t_2 + daddu c_2,t_1 + sltu AT,c_2,t_1 + daddu t_2,AT + daddu c_3,t_2 + sltu c_1,c_3,t_2 + sd c_2,8(a0) /* r[1]=c2; */ + + dmultu a_2,b_0 /* mul_add_c(a[2],b[0],c3,c1,c2); */ + mflo t_1 + mfhi t_2 + daddu c_3,t_1 + sltu AT,c_3,t_1 + daddu t_2,AT + daddu c_1,t_2 + dmultu a_1,b_1 /* mul_add_c(a[1],b[1],c3,c1,c2); */ + mflo t_1 + mfhi t_2 + daddu c_3,t_1 + sltu AT,c_3,t_1 + daddu t_2,AT + daddu c_1,t_2 + sltu c_2,c_1,t_2 + dmultu a_0,b_2 /* mul_add_c(a[0],b[2],c3,c1,c2); */ + mflo t_1 + mfhi t_2 + daddu c_3,t_1 + sltu AT,c_3,t_1 + daddu t_2,AT + daddu c_1,t_2 + sltu AT,c_1,t_2 + daddu c_2,AT + sd c_3,16(a0) /* r[2]=c3; */ + + dmultu a_0,b_3 /* mul_add_c(a[0],b[3],c1,c2,c3); */ + mflo t_1 + mfhi t_2 + daddu c_1,t_1 + sltu AT,c_1,t_1 + daddu t_2,AT + daddu c_2,t_2 + sltu c_3,c_2,t_2 + dmultu a_1,b_2 /* mul_add_c(a[1],b[2],c1,c2,c3); */ + mflo t_1 + mfhi t_2 + daddu c_1,t_1 + sltu AT,c_1,t_1 + daddu t_2,AT + daddu c_2,t_2 + sltu AT,c_2,t_2 + daddu c_3,AT + dmultu a_2,b_1 /* mul_add_c(a[2],b[1],c1,c2,c3); */ + mflo t_1 + mfhi t_2 + daddu c_1,t_1 + sltu AT,c_1,t_1 + daddu t_2,AT + daddu c_2,t_2 + sltu AT,c_2,t_2 + daddu c_3,AT + dmultu a_3,b_0 /* mul_add_c(a[3],b[0],c1,c2,c3); */ + mflo t_1 + mfhi t_2 + daddu c_1,t_1 + sltu AT,c_1,t_1 + daddu t_2,AT + daddu c_2,t_2 + sltu AT,c_2,t_2 + daddu c_3,AT + sd c_1,24(a0) /* r[3]=c1; */ + + dmultu a_4,b_0 /* mul_add_c(a[4],b[0],c2,c3,c1); */ + mflo t_1 + mfhi t_2 + daddu c_2,t_1 + sltu AT,c_2,t_1 + daddu t_2,AT + daddu c_3,t_2 + sltu c_1,c_3,t_2 + dmultu a_3,b_1 /* mul_add_c(a[3],b[1],c2,c3,c1); */ + mflo t_1 + mfhi t_2 + daddu c_2,t_1 + sltu AT,c_2,t_1 + daddu t_2,AT + daddu c_3,t_2 + sltu AT,c_3,t_2 + daddu c_1,AT + dmultu a_2,b_2 /* mul_add_c(a[2],b[2],c2,c3,c1); */ + mflo t_1 + mfhi t_2 + daddu c_2,t_1 + sltu AT,c_2,t_1 + daddu t_2,AT + daddu c_3,t_2 + sltu AT,c_3,t_2 + daddu c_1,AT + dmultu a_1,b_3 /* mul_add_c(a[1],b[3],c2,c3,c1); */ + mflo t_1 + mfhi t_2 + daddu c_2,t_1 + sltu AT,c_2,t_1 + daddu t_2,AT + daddu c_3,t_2 + sltu AT,c_3,t_2 + daddu c_1,AT + dmultu a_0,b_4 /* mul_add_c(a[0],b[4],c2,c3,c1); */ + mflo t_1 + mfhi t_2 + daddu c_2,t_1 + sltu AT,c_2,t_1 + daddu t_2,AT + daddu c_3,t_2 + sltu AT,c_3,t_2 + daddu c_1,AT + sd c_2,32(a0) /* r[4]=c2; */ + + dmultu a_0,b_5 /* mul_add_c(a[0],b[5],c3,c1,c2); */ + mflo t_1 + mfhi t_2 + daddu c_3,t_1 + sltu AT,c_3,t_1 + daddu t_2,AT + daddu c_1,t_2 + sltu c_2,c_1,t_2 + dmultu a_1,b_4 /* mul_add_c(a[1],b[4],c3,c1,c2); */ + mflo t_1 + mfhi t_2 + daddu c_3,t_1 + sltu AT,c_3,t_1 + daddu t_2,AT + daddu c_1,t_2 + sltu AT,c_1,t_2 + daddu c_2,AT + dmultu a_2,b_3 /* mul_add_c(a[2],b[3],c3,c1,c2); */ + mflo t_1 + mfhi t_2 + daddu c_3,t_1 + sltu AT,c_3,t_1 + daddu t_2,AT + daddu c_1,t_2 + sltu AT,c_1,t_2 + daddu c_2,AT + dmultu a_3,b_2 /* mul_add_c(a[3],b[2],c3,c1,c2); */ + mflo t_1 + mfhi t_2 + daddu c_3,t_1 + sltu AT,c_3,t_1 + daddu t_2,AT + daddu c_1,t_2 + sltu AT,c_1,t_2 + daddu c_2,AT + dmultu a_4,b_1 /* mul_add_c(a[4],b[1],c3,c1,c2); */ + mflo t_1 + mfhi t_2 + daddu c_3,t_1 + sltu AT,c_3,t_1 + daddu t_2,AT + daddu c_1,t_2 + sltu AT,c_1,t_2 + daddu c_2,AT + dmultu a_5,b_0 /* mul_add_c(a[5],b[0],c3,c1,c2); */ + mflo t_1 + mfhi t_2 + daddu c_3,t_1 + sltu AT,c_3,t_1 + daddu t_2,AT + daddu c_1,t_2 + sltu AT,c_1,t_2 + daddu c_2,AT + sd c_3,40(a0) /* r[5]=c3; */ + + dmultu a_6,b_0 /* mul_add_c(a[6],b[0],c1,c2,c3); */ + mflo t_1 + mfhi t_2 + daddu c_1,t_1 + sltu AT,c_1,t_1 + daddu t_2,AT + daddu c_2,t_2 + sltu c_3,c_2,t_2 + dmultu a_5,b_1 /* mul_add_c(a[5],b[1],c1,c2,c3); */ + mflo t_1 + mfhi t_2 + daddu c_1,t_1 + sltu AT,c_1,t_1 + daddu t_2,AT + daddu c_2,t_2 + sltu AT,c_2,t_2 + daddu c_3,AT + dmultu a_4,b_2 /* mul_add_c(a[4],b[2],c1,c2,c3); */ + mflo t_1 + mfhi t_2 + daddu c_1,t_1 + sltu AT,c_1,t_1 + daddu t_2,AT + daddu c_2,t_2 + sltu AT,c_2,t_2 + daddu c_3,AT + dmultu a_3,b_3 /* mul_add_c(a[3],b[3],c1,c2,c3); */ + mflo t_1 + mfhi t_2 + daddu c_1,t_1 + sltu AT,c_1,t_1 + daddu t_2,AT + daddu c_2,t_2 + sltu AT,c_2,t_2 + daddu c_3,AT + dmultu a_2,b_4 /* mul_add_c(a[2],b[4],c1,c2,c3); */ + mflo t_1 + mfhi t_2 + daddu c_1,t_1 + sltu AT,c_1,t_1 + daddu t_2,AT + daddu c_2,t_2 + sltu AT,c_2,t_2 + daddu c_3,AT + dmultu a_1,b_5 /* mul_add_c(a[1],b[5],c1,c2,c3); */ + mflo t_1 + mfhi t_2 + daddu c_1,t_1 + sltu AT,c_1,t_1 + daddu t_2,AT + daddu c_2,t_2 + sltu AT,c_2,t_2 + daddu c_3,AT + dmultu a_0,b_6 /* mul_add_c(a[0],b[6],c1,c2,c3); */ + mflo t_1 + mfhi t_2 + daddu c_1,t_1 + sltu AT,c_1,t_1 + daddu t_2,AT + daddu c_2,t_2 + sltu AT,c_2,t_2 + daddu c_3,AT + sd c_1,48(a0) /* r[6]=c1; */ + + dmultu a_0,b_7 /* mul_add_c(a[0],b[7],c2,c3,c1); */ + mflo t_1 + mfhi t_2 + daddu c_2,t_1 + sltu AT,c_2,t_1 + daddu t_2,AT + daddu c_3,t_2 + sltu c_1,c_3,t_2 + dmultu a_1,b_6 /* mul_add_c(a[1],b[6],c2,c3,c1); */ + mflo t_1 + mfhi t_2 + daddu c_2,t_1 + sltu AT,c_2,t_1 + daddu t_2,AT + daddu c_3,t_2 + sltu AT,c_3,t_2 + daddu c_1,AT + dmultu a_2,b_5 /* mul_add_c(a[2],b[5],c2,c3,c1); */ + mflo t_1 + mfhi t_2 + daddu c_2,t_1 + sltu AT,c_2,t_1 + daddu t_2,AT + daddu c_3,t_2 + sltu AT,c_3,t_2 + daddu c_1,AT + dmultu a_3,b_4 /* mul_add_c(a[3],b[4],c2,c3,c1); */ + mflo t_1 + mfhi t_2 + daddu c_2,t_1 + sltu AT,c_2,t_1 + daddu t_2,AT + daddu c_3,t_2 + sltu AT,c_3,t_2 + daddu c_1,AT + dmultu a_4,b_3 /* mul_add_c(a[4],b[3],c2,c3,c1); */ + mflo t_1 + mfhi t_2 + daddu c_2,t_1 + sltu AT,c_2,t_1 + daddu t_2,AT + daddu c_3,t_2 + sltu AT,c_3,t_2 + daddu c_1,AT + dmultu a_5,b_2 /* mul_add_c(a[5],b[2],c2,c3,c1); */ + mflo t_1 + mfhi t_2 + daddu c_2,t_1 + sltu AT,c_2,t_1 + daddu t_2,AT + daddu c_3,t_2 + sltu AT,c_3,t_2 + daddu c_1,AT + dmultu a_6,b_1 /* mul_add_c(a[6],b[1],c2,c3,c1); */ + mflo t_1 + mfhi t_2 + daddu c_2,t_1 + sltu AT,c_2,t_1 + daddu t_2,AT + daddu c_3,t_2 + sltu AT,c_3,t_2 + daddu c_1,AT + dmultu a_7,b_0 /* mul_add_c(a[7],b[0],c2,c3,c1); */ + mflo t_1 + mfhi t_2 + daddu c_2,t_1 + sltu AT,c_2,t_1 + daddu t_2,AT + daddu c_3,t_2 + sltu AT,c_3,t_2 + daddu c_1,AT + sd c_2,56(a0) /* r[7]=c2; */ + + dmultu a_7,b_1 /* mul_add_c(a[7],b[1],c3,c1,c2); */ + mflo t_1 + mfhi t_2 + daddu c_3,t_1 + sltu AT,c_3,t_1 + daddu t_2,AT + daddu c_1,t_2 + sltu c_2,c_1,t_2 + dmultu a_6,b_2 /* mul_add_c(a[6],b[2],c3,c1,c2); */ + mflo t_1 + mfhi t_2 + daddu c_3,t_1 + sltu AT,c_3,t_1 + daddu t_2,AT + daddu c_1,t_2 + sltu AT,c_1,t_2 + daddu c_2,AT + dmultu a_5,b_3 /* mul_add_c(a[5],b[3],c3,c1,c2); */ + mflo t_1 + mfhi t_2 + daddu c_3,t_1 + sltu AT,c_3,t_1 + daddu t_2,AT + daddu c_1,t_2 + sltu AT,c_1,t_2 + daddu c_2,AT + dmultu a_4,b_4 /* mul_add_c(a[4],b[4],c3,c1,c2); */ + mflo t_1 + mfhi t_2 + daddu c_3,t_1 + sltu AT,c_3,t_1 + daddu t_2,AT + daddu c_1,t_2 + sltu AT,c_1,t_2 + daddu c_2,AT + dmultu a_3,b_5 /* mul_add_c(a[3],b[5],c3,c1,c2); */ + mflo t_1 + mfhi t_2 + daddu c_3,t_1 + sltu AT,c_3,t_1 + daddu t_2,AT + daddu c_1,t_2 + sltu AT,c_1,t_2 + daddu c_2,AT + dmultu a_2,b_6 /* mul_add_c(a[2],b[6],c3,c1,c2); */ + mflo t_1 + mfhi t_2 + daddu c_3,t_1 + sltu AT,c_3,t_1 + daddu t_2,AT + daddu c_1,t_2 + sltu AT,c_1,t_2 + daddu c_2,AT + dmultu a_1,b_7 /* mul_add_c(a[1],b[7],c3,c1,c2); */ + mflo t_1 + mfhi t_2 + daddu c_3,t_1 + sltu AT,c_3,t_1 + daddu t_2,AT + daddu c_1,t_2 + sltu AT,c_1,t_2 + daddu c_2,AT + sd c_3,64(a0) /* r[8]=c3; */ + + dmultu a_2,b_7 /* mul_add_c(a[2],b[7],c1,c2,c3); */ + mflo t_1 + mfhi t_2 + daddu c_1,t_1 + sltu AT,c_1,t_1 + daddu t_2,AT + daddu c_2,t_2 + sltu c_3,c_2,t_2 + dmultu a_3,b_6 /* mul_add_c(a[3],b[6],c1,c2,c3); */ + mflo t_1 + mfhi t_2 + daddu c_1,t_1 + sltu AT,c_1,t_1 + daddu t_2,AT + daddu c_2,t_2 + sltu AT,c_2,t_2 + daddu c_3,AT + dmultu a_4,b_5 /* mul_add_c(a[4],b[5],c1,c2,c3); */ + mflo t_1 + mfhi t_2 + daddu c_1,t_1 + sltu AT,c_1,t_1 + daddu t_2,AT + daddu c_2,t_2 + sltu AT,c_2,t_2 + daddu c_3,AT + dmultu a_5,b_4 /* mul_add_c(a[5],b[4],c1,c2,c3); */ + mflo t_1 + mfhi t_2 + daddu c_1,t_1 + sltu AT,c_1,t_1 + daddu t_2,AT + daddu c_2,t_2 + sltu AT,c_2,t_2 + daddu c_3,AT + dmultu a_6,b_3 /* mul_add_c(a[6],b[3],c1,c2,c3); */ + mflo t_1 + mfhi t_2 + daddu c_1,t_1 + sltu AT,c_1,t_1 + daddu t_2,AT + daddu c_2,t_2 + sltu AT,c_2,t_2 + daddu c_3,AT + dmultu a_7,b_2 /* mul_add_c(a[7],b[2],c1,c2,c3); */ + mflo t_1 + mfhi t_2 + daddu c_1,t_1 + sltu AT,c_1,t_1 + daddu t_2,AT + daddu c_2,t_2 + sltu AT,c_2,t_2 + daddu c_3,AT + sd c_1,72(a0) /* r[9]=c1; */ + + dmultu a_7,b_3 /* mul_add_c(a[7],b[3],c2,c3,c1); */ + mflo t_1 + mfhi t_2 + daddu c_2,t_1 + sltu AT,c_2,t_1 + daddu t_2,AT + daddu c_3,t_2 + sltu c_1,c_3,t_2 + dmultu a_6,b_4 /* mul_add_c(a[6],b[4],c2,c3,c1); */ + mflo t_1 + mfhi t_2 + daddu c_2,t_1 + sltu AT,c_2,t_1 + daddu t_2,AT + daddu c_3,t_2 + sltu AT,c_3,t_2 + daddu c_1,AT + dmultu a_5,b_5 /* mul_add_c(a[5],b[5],c2,c3,c1); */ + mflo t_1 + mfhi t_2 + daddu c_2,t_1 + sltu AT,c_2,t_1 + daddu t_2,AT + daddu c_3,t_2 + sltu AT,c_3,t_2 + daddu c_1,AT + dmultu a_4,b_6 /* mul_add_c(a[4],b[6],c2,c3,c1); */ + mflo t_1 + mfhi t_2 + daddu c_2,t_1 + sltu AT,c_2,t_1 + daddu t_2,AT + daddu c_3,t_2 + sltu AT,c_3,t_2 + daddu c_1,AT + dmultu a_3,b_7 /* mul_add_c(a[3],b[7],c2,c3,c1); */ + mflo t_1 + mfhi t_2 + daddu c_2,t_1 + sltu AT,c_2,t_1 + daddu t_2,AT + daddu c_3,t_2 + sltu AT,c_3,t_2 + daddu c_1,AT + sd c_2,80(a0) /* r[10]=c2; */ + + dmultu a_4,b_7 /* mul_add_c(a[4],b[7],c3,c1,c2); */ + mflo t_1 + mfhi t_2 + daddu c_3,t_1 + sltu AT,c_3,t_1 + daddu t_2,AT + daddu c_1,t_2 + sltu c_2,c_1,t_2 + dmultu a_5,b_6 /* mul_add_c(a[5],b[6],c3,c1,c2); */ + mflo t_1 + mfhi t_2 + daddu c_3,t_1 + sltu AT,c_3,t_1 + daddu t_2,AT + daddu c_1,t_2 + sltu AT,c_1,t_2 + daddu c_2,AT + dmultu a_6,b_5 /* mul_add_c(a[6],b[5],c3,c1,c2); */ + mflo t_1 + mfhi t_2 + daddu c_3,t_1 + sltu AT,c_3,t_1 + daddu t_2,AT + daddu c_1,t_2 + sltu AT,c_1,t_2 + daddu c_2,AT + dmultu a_7,b_4 /* mul_add_c(a[7],b[4],c3,c1,c2); */ + mflo t_1 + mfhi t_2 + daddu c_3,t_1 + sltu AT,c_3,t_1 + daddu t_2,AT + daddu c_1,t_2 + sltu AT,c_1,t_2 + daddu c_2,AT + sd c_3,88(a0) /* r[11]=c3; */ + + dmultu a_7,b_5 /* mul_add_c(a[7],b[5],c1,c2,c3); */ + mflo t_1 + mfhi t_2 + daddu c_1,t_1 + sltu AT,c_1,t_1 + daddu t_2,AT + daddu c_2,t_2 + sltu c_3,c_2,t_2 + dmultu a_6,b_6 /* mul_add_c(a[6],b[6],c1,c2,c3); */ + mflo t_1 + mfhi t_2 + daddu c_1,t_1 + sltu AT,c_1,t_1 + daddu t_2,AT + daddu c_2,t_2 + sltu AT,c_2,t_2 + daddu c_3,AT + dmultu a_5,b_7 /* mul_add_c(a[5],b[7],c1,c2,c3); */ + mflo t_1 + mfhi t_2 + daddu c_1,t_1 + sltu AT,c_1,t_1 + daddu t_2,AT + daddu c_2,t_2 + sltu AT,c_2,t_2 + daddu c_3,AT + sd c_1,96(a0) /* r[12]=c1; */ + + dmultu a_6,b_7 /* mul_add_c(a[6],b[7],c2,c3,c1); */ + mflo t_1 + mfhi t_2 + daddu c_2,t_1 + sltu AT,c_2,t_1 + daddu t_2,AT + daddu c_3,t_2 + sltu c_1,c_3,t_2 + dmultu a_7,b_6 /* mul_add_c(a[7],b[6],c2,c3,c1); */ + mflo t_1 + mfhi t_2 + daddu c_2,t_1 + sltu AT,c_2,t_1 + daddu t_2,AT + daddu c_3,t_2 + sltu AT,c_3,t_2 + daddu c_1,AT + sd c_2,104(a0) /* r[13]=c2; */ + + dmultu a_7,b_7 /* mul_add_c(a[7],b[7],c3,c1,c2); */ + ld s0,0(sp) + ld s1,8(sp) + ld s2,16(sp) + ld s3,24(sp) + ld s4,32(sp) + ld s5,40(sp) + mflo t_1 + mfhi t_2 + daddu c_3,t_1 + sltu AT,c_3,t_1 + daddu t_2,AT + daddu c_1,t_2 + sd c_3,112(a0) /* r[14]=c3; */ + sd c_1,120(a0) /* r[15]=c1; */ + + PTR_ADD sp,FRAME_SIZE + + jr ra +END(bn_mul_comba8) + +.align 5 +LEAF(bn_mul_comba4) + .set reorder + ld a_0,0(a1) + ld b_0,0(a2) + ld a_1,8(a1) + ld a_2,16(a1) + dmultu a_0,b_0 /* mul_add_c(a[0],b[0],c1,c2,c3); */ + ld a_3,24(a1) + ld b_1,8(a2) + ld b_2,16(a2) + ld b_3,24(a2) + mflo c_1 + mfhi c_2 + sd c_1,0(a0) + + dmultu a_0,b_1 /* mul_add_c(a[0],b[1],c2,c3,c1); */ + mflo t_1 + mfhi t_2 + daddu c_2,t_1 + sltu AT,c_2,t_1 + daddu c_3,t_2,AT + dmultu a_1,b_0 /* mul_add_c(a[1],b[0],c2,c3,c1); */ + mflo t_1 + mfhi t_2 + daddu c_2,t_1 + sltu AT,c_2,t_1 + daddu t_2,AT + daddu c_3,t_2 + sltu c_1,c_3,t_2 + sd c_2,8(a0) + + dmultu a_2,b_0 /* mul_add_c(a[2],b[0],c3,c1,c2); */ + mflo t_1 + mfhi t_2 + daddu c_3,t_1 + sltu AT,c_3,t_1 + daddu t_2,AT + daddu c_1,t_2 + dmultu a_1,b_1 /* mul_add_c(a[1],b[1],c3,c1,c2); */ + mflo t_1 + mfhi t_2 + daddu c_3,t_1 + sltu AT,c_3,t_1 + daddu t_2,AT + daddu c_1,t_2 + sltu c_2,c_1,t_2 + dmultu a_0,b_2 /* mul_add_c(a[0],b[2],c3,c1,c2); */ + mflo t_1 + mfhi t_2 + daddu c_3,t_1 + sltu AT,c_3,t_1 + daddu t_2,AT + daddu c_1,t_2 + sltu AT,c_1,t_2 + daddu c_2,AT + sd c_3,16(a0) + + dmultu a_0,b_3 /* mul_add_c(a[0],b[3],c1,c2,c3); */ + mflo t_1 + mfhi t_2 + daddu c_1,t_1 + sltu AT,c_1,t_1 + daddu t_2,AT + daddu c_2,t_2 + sltu c_3,c_2,t_2 + dmultu a_1,b_2 /* mul_add_c(a[1],b[2],c1,c2,c3); */ + mflo t_1 + mfhi t_2 + daddu c_1,t_1 + sltu AT,c_1,t_1 + daddu t_2,AT + daddu c_2,t_2 + sltu AT,c_2,t_2 + daddu c_3,AT + dmultu a_2,b_1 /* mul_add_c(a[2],b[1],c1,c2,c3); */ + mflo t_1 + mfhi t_2 + daddu c_1,t_1 + sltu AT,c_1,t_1 + daddu t_2,AT + daddu c_2,t_2 + sltu AT,c_2,t_2 + daddu c_3,AT + dmultu a_3,b_0 /* mul_add_c(a[3],b[0],c1,c2,c3); */ + mflo t_1 + mfhi t_2 + daddu c_1,t_1 + sltu AT,c_1,t_1 + daddu t_2,AT + daddu c_2,t_2 + sltu AT,c_2,t_2 + daddu c_3,AT + sd c_1,24(a0) + + dmultu a_3,b_1 /* mul_add_c(a[3],b[1],c2,c3,c1); */ + mflo t_1 + mfhi t_2 + daddu c_2,t_1 + sltu AT,c_2,t_1 + daddu t_2,AT + daddu c_3,t_2 + sltu c_1,c_3,t_2 + dmultu a_2,b_2 /* mul_add_c(a[2],b[2],c2,c3,c1); */ + mflo t_1 + mfhi t_2 + daddu c_2,t_1 + sltu AT,c_2,t_1 + daddu t_2,AT + daddu c_3,t_2 + sltu AT,c_3,t_2 + daddu c_1,AT + dmultu a_1,b_3 /* mul_add_c(a[1],b[3],c2,c3,c1); */ + mflo t_1 + mfhi t_2 + daddu c_2,t_1 + sltu AT,c_2,t_1 + daddu t_2,AT + daddu c_3,t_2 + sltu AT,c_3,t_2 + daddu c_1,AT + sd c_2,32(a0) + + dmultu a_2,b_3 /* mul_add_c(a[2],b[3],c3,c1,c2); */ + mflo t_1 + mfhi t_2 + daddu c_3,t_1 + sltu AT,c_3,t_1 + daddu t_2,AT + daddu c_1,t_2 + sltu c_2,c_1,t_2 + dmultu a_3,b_2 /* mul_add_c(a[3],b[2],c3,c1,c2); */ + mflo t_1 + mfhi t_2 + daddu c_3,t_1 + sltu AT,c_3,t_1 + daddu t_2,AT + daddu c_1,t_2 + sltu AT,c_1,t_2 + daddu c_2,AT + sd c_3,40(a0) + + dmultu a_3,b_3 /* mul_add_c(a[3],b[3],c1,c2,c3); */ + mflo t_1 + mfhi t_2 + daddu c_1,t_1 + sltu AT,c_1,t_1 + daddu t_2,AT + daddu c_2,t_2 + sd c_1,48(a0) + sd c_2,56(a0) + + jr ra +END(bn_mul_comba4) + +#undef a_4 +#undef a_5 +#undef a_6 +#undef a_7 +#define a_4 b_0 +#define a_5 b_1 +#define a_6 b_2 +#define a_7 b_3 + +.align 5 +LEAF(bn_sqr_comba8) + .set reorder + ld a_0,0(a1) + ld a_1,8(a1) + ld a_2,16(a1) + ld a_3,24(a1) + + dmultu a_0,a_0 /* mul_add_c(a[0],b[0],c1,c2,c3); */ + ld a_4,32(a1) + ld a_5,40(a1) + ld a_6,48(a1) + ld a_7,56(a1) + mflo c_1 + mfhi c_2 + sd c_1,0(a0) + + dmultu a_0,a_1 /* mul_add_c2(a[0],b[1],c2,c3,c1); */ + mflo t_1 + mfhi t_2 + slt c_1,t_2,zero + dsll t_2,1 + slt a2,t_1,zero + daddu t_2,a2 + dsll t_1,1 + daddu c_2,t_1 + sltu AT,c_2,t_1 + daddu c_3,t_2,AT + sd c_2,8(a0) + + dmultu a_2,a_0 /* mul_add_c2(a[2],b[0],c3,c1,c2); */ + mflo t_1 + mfhi t_2 + slt c_2,t_2,zero + dsll t_2,1 + slt a2,t_1,zero + daddu t_2,a2 + dsll t_1,1 + daddu c_3,t_1 + sltu AT,c_3,t_1 + daddu t_2,AT + daddu c_1,t_2 + sltu AT,c_1,t_2 + daddu c_2,AT + dmultu a_1,a_1 /* mul_add_c(a[1],b[1],c3,c1,c2); */ + mflo t_1 + mfhi t_2 + daddu c_3,t_1 + sltu AT,c_3,t_1 + daddu t_2,AT + daddu c_1,t_2 + sltu AT,c_1,t_2 + daddu c_2,AT + sd c_3,16(a0) + + dmultu a_0,a_3 /* mul_add_c2(a[0],b[3],c1,c2,c3); */ + mflo t_1 + mfhi t_2 + slt c_3,t_2,zero + dsll t_2,1 + slt a2,t_1,zero + daddu t_2,a2 + dsll t_1,1 + daddu c_1,t_1 + sltu AT,c_1,t_1 + daddu t_2,AT + daddu c_2,t_2 + sltu AT,c_2,t_2 + daddu c_3,AT + dmultu a_1,a_2 /* mul_add_c2(a[1],b[2],c1,c2,c3); */ + mflo t_1 + mfhi t_2 + slt AT,t_2,zero + daddu c_3,AT + dsll t_2,1 + slt a2,t_1,zero + daddu t_2,a2 + dsll t_1,1 + daddu c_1,t_1 + sltu AT,c_1,t_1 + daddu t_2,AT + daddu c_2,t_2 + sltu AT,c_2,t_2 + daddu c_3,AT + sd c_1,24(a0) + + dmultu a_4,a_0 /* mul_add_c2(a[4],b[0],c2,c3,c1); */ + mflo t_1 + mfhi t_2 + slt c_1,t_2,zero + dsll t_2,1 + slt a2,t_1,zero + daddu t_2,a2 + dsll t_1,1 + daddu c_2,t_1 + sltu AT,c_2,t_1 + daddu t_2,AT + daddu c_3,t_2 + sltu AT,c_3,t_2 + daddu c_1,AT + dmultu a_3,a_1 /* mul_add_c2(a[3],b[1],c2,c3,c1); */ + mflo t_1 + mfhi t_2 + slt AT,t_2,zero + daddu c_1,AT + dsll t_2,1 + slt a2,t_1,zero + daddu t_2,a2 + dsll t_1,1 + daddu c_2,t_1 + sltu AT,c_2,t_1 + daddu t_2,AT + daddu c_3,t_2 + sltu AT,c_3,t_2 + daddu c_1,AT + dmultu a_2,a_2 /* mul_add_c(a[2],b[2],c2,c3,c1); */ + mflo t_1 + mfhi t_2 + daddu c_2,t_1 + sltu AT,c_2,t_1 + daddu t_2,AT + daddu c_3,t_2 + sltu AT,c_3,t_2 + daddu c_1,AT + sd c_2,32(a0) + + dmultu a_0,a_5 /* mul_add_c2(a[0],b[5],c3,c1,c2); */ + mflo t_1 + mfhi t_2 + slt c_2,t_2,zero + dsll t_2,1 + slt a2,t_1,zero + daddu t_2,a2 + dsll t_1,1 + daddu c_3,t_1 + sltu AT,c_3,t_1 + daddu t_2,AT + daddu c_1,t_2 + sltu AT,c_1,t_2 + daddu c_2,AT + dmultu a_1,a_4 /* mul_add_c2(a[1],b[4],c3,c1,c2); */ + mflo t_1 + mfhi t_2 + slt AT,t_2,zero + daddu c_2,AT + dsll t_2,1 + slt a2,t_1,zero + daddu t_2,a2 + dsll t_1,1 + daddu c_3,t_1 + sltu AT,c_3,t_1 + daddu t_2,AT + daddu c_1,t_2 + sltu AT,c_1,t_2 + daddu c_2,AT + dmultu a_2,a_3 /* mul_add_c2(a[2],b[3],c3,c1,c2); */ + mflo t_1 + mfhi t_2 + slt AT,t_2,zero + daddu c_2,AT + dsll t_2,1 + slt a2,t_1,zero + daddu t_2,a2 + dsll t_1,1 + daddu c_3,t_1 + sltu AT,c_3,t_1 + daddu t_2,AT + daddu c_1,t_2 + sltu AT,c_1,t_2 + daddu c_2,AT + sd c_3,40(a0) + + dmultu a_6,a_0 /* mul_add_c2(a[6],b[0],c1,c2,c3); */ + mflo t_1 + mfhi t_2 + slt c_3,t_2,zero + dsll t_2,1 + slt a2,t_1,zero + daddu t_2,a2 + dsll t_1,1 + daddu c_1,t_1 + sltu AT,c_1,t_1 + daddu t_2,AT + daddu c_2,t_2 + sltu AT,c_2,t_2 + daddu c_3,AT + dmultu a_5,a_1 /* mul_add_c2(a[5],b[1],c1,c2,c3); */ + mflo t_1 + mfhi t_2 + slt AT,t_2,zero + daddu c_3,AT + dsll t_2,1 + slt a2,t_1,zero + daddu t_2,a2 + dsll t_1,1 + daddu c_1,t_1 + sltu AT,c_1,t_1 + daddu t_2,AT + daddu c_2,t_2 + sltu AT,c_2,t_2 + daddu c_3,AT + dmultu a_4,a_2 /* mul_add_c2(a[4],b[2],c1,c2,c3); */ + mflo t_1 + mfhi t_2 + slt AT,t_2,zero + daddu c_3,AT + dsll t_2,1 + slt a2,t_1,zero + daddu t_2,a2 + dsll t_1,1 + daddu c_1,t_1 + sltu AT,c_1,t_1 + daddu t_2,AT + daddu c_2,t_2 + sltu AT,c_2,t_2 + daddu c_3,AT + dmultu a_3,a_3 /* mul_add_c(a[3],b[3],c1,c2,c3); */ + mflo t_1 + mfhi t_2 + daddu c_1,t_1 + sltu AT,c_1,t_1 + daddu t_2,AT + daddu c_2,t_2 + sltu AT,c_2,t_2 + daddu c_3,AT + sd c_1,48(a0) + + dmultu a_0,a_7 /* mul_add_c2(a[0],b[7],c2,c3,c1); */ + mflo t_1 + mfhi t_2 + slt c_1,t_2,zero + dsll t_2,1 + slt a2,t_1,zero + daddu t_2,a2 + dsll t_1,1 + daddu c_2,t_1 + sltu AT,c_2,t_1 + daddu t_2,AT + daddu c_3,t_2 + sltu AT,c_3,t_2 + daddu c_1,AT + dmultu a_1,a_6 /* mul_add_c2(a[1],b[6],c2,c3,c1); */ + mflo t_1 + mfhi t_2 + slt AT,t_2,zero + daddu c_1,AT + dsll t_2,1 + slt a2,t_1,zero + daddu t_2,a2 + dsll t_1,1 + daddu c_2,t_1 + sltu AT,c_2,t_1 + daddu t_2,AT + daddu c_3,t_2 + sltu AT,c_3,t_2 + daddu c_1,AT + dmultu a_2,a_5 /* mul_add_c2(a[2],b[5],c2,c3,c1); */ + mflo t_1 + mfhi t_2 + slt AT,t_2,zero + daddu c_1,AT + dsll t_2,1 + slt a2,t_1,zero + daddu t_2,a2 + dsll t_1,1 + daddu c_2,t_1 + sltu AT,c_2,t_1 + daddu t_2,AT + daddu c_3,t_2 + sltu AT,c_3,t_2 + daddu c_1,AT + dmultu a_3,a_4 /* mul_add_c2(a[3],b[4],c2,c3,c1); */ + mflo t_1 + mfhi t_2 + slt AT,t_2,zero + daddu c_1,AT + dsll t_2,1 + slt a2,t_1,zero + daddu t_2,a2 + dsll t_1,1 + daddu c_2,t_1 + sltu AT,c_2,t_1 + daddu t_2,AT + daddu c_3,t_2 + sltu AT,c_3,t_2 + daddu c_1,AT + sd c_2,56(a0) + + dmultu a_7,a_1 /* mul_add_c2(a[7],b[1],c3,c1,c2); */ + mflo t_1 + mfhi t_2 + slt c_2,t_2,zero + dsll t_2,1 + slt a2,t_1,zero + daddu t_2,a2 + dsll t_1,1 + daddu c_3,t_1 + sltu AT,c_3,t_1 + daddu t_2,AT + daddu c_1,t_2 + sltu AT,c_1,t_2 + daddu c_2,AT + dmultu a_6,a_2 /* mul_add_c2(a[6],b[2],c3,c1,c2); */ + mflo t_1 + mfhi t_2 + slt AT,t_2,zero + daddu c_2,AT + dsll t_2,1 + slt a2,t_1,zero + daddu t_2,a2 + dsll t_1,1 + daddu c_3,t_1 + sltu AT,c_3,t_1 + daddu t_2,AT + daddu c_1,t_2 + sltu AT,c_1,t_2 + daddu c_2,AT + dmultu a_5,a_3 /* mul_add_c2(a[5],b[3],c3,c1,c2); */ + mflo t_1 + mfhi t_2 + slt AT,t_2,zero + daddu c_2,AT + dsll t_2,1 + slt a2,t_1,zero + daddu t_2,a2 + dsll t_1,1 + daddu c_3,t_1 + sltu AT,c_3,t_1 + daddu t_2,AT + daddu c_1,t_2 + sltu AT,c_1,t_2 + daddu c_2,AT + dmultu a_4,a_4 /* mul_add_c(a[4],b[4],c3,c1,c2); */ + mflo t_1 + mfhi t_2 + daddu c_3,t_1 + sltu AT,c_3,t_1 + daddu t_2,AT + daddu c_1,t_2 + sltu AT,c_1,t_2 + daddu c_2,AT + sd c_3,64(a0) + + dmultu a_2,a_7 /* mul_add_c2(a[2],b[7],c1,c2,c3); */ + mflo t_1 + mfhi t_2 + slt c_3,t_2,zero + dsll t_2,1 + slt a2,t_1,zero + daddu t_2,a2 + dsll t_1,1 + daddu c_1,t_1 + sltu AT,c_1,t_1 + daddu t_2,AT + daddu c_2,t_2 + sltu AT,c_2,t_2 + daddu c_3,AT + dmultu a_3,a_6 /* mul_add_c2(a[3],b[6],c1,c2,c3); */ + mflo t_1 + mfhi t_2 + slt AT,t_2,zero + daddu c_3,AT + dsll t_2,1 + slt a2,t_1,zero + daddu t_2,a2 + dsll t_1,1 + daddu c_1,t_1 + sltu AT,c_1,t_1 + daddu t_2,AT + daddu c_2,t_2 + sltu AT,c_2,t_2 + daddu c_3,AT + dmultu a_4,a_5 /* mul_add_c2(a[4],b[5],c1,c2,c3); */ + mflo t_1 + mfhi t_2 + slt AT,t_2,zero + daddu c_3,AT + dsll t_2,1 + slt a2,t_1,zero + daddu t_2,a2 + dsll t_1,1 + daddu c_1,t_1 + sltu AT,c_1,t_1 + daddu t_2,AT + daddu c_2,t_2 + sltu AT,c_2,t_2 + daddu c_3,AT + sd c_1,72(a0) + + dmultu a_7,a_3 /* mul_add_c2(a[7],b[3],c2,c3,c1); */ + mflo t_1 + mfhi t_2 + slt c_1,t_2,zero + dsll t_2,1 + slt a2,t_1,zero + daddu t_2,a2 + dsll t_1,1 + daddu c_2,t_1 + sltu AT,c_2,t_1 + daddu t_2,AT + daddu c_3,t_2 + sltu AT,c_3,t_2 + daddu c_1,AT + dmultu a_6,a_4 /* mul_add_c2(a[6],b[4],c2,c3,c1); */ + mflo t_1 + mfhi t_2 + slt AT,t_2,zero + daddu c_1,AT + dsll t_2,1 + slt a2,t_1,zero + daddu t_2,a2 + dsll t_1,1 + daddu c_2,t_1 + sltu AT,c_2,t_1 + daddu t_2,AT + daddu c_3,t_2 + sltu AT,c_3,t_2 + daddu c_1,AT + dmultu a_5,a_5 /* mul_add_c(a[5],b[5],c2,c3,c1); */ + mflo t_1 + mfhi t_2 + daddu c_2,t_1 + sltu AT,c_2,t_1 + daddu t_2,AT + daddu c_3,t_2 + sltu AT,c_3,t_2 + daddu c_1,AT + sd c_2,80(a0) + + dmultu a_4,a_7 /* mul_add_c2(a[4],b[7],c3,c1,c2); */ + mflo t_1 + mfhi t_2 + slt c_2,t_2,zero + dsll t_2,1 + slt a2,t_1,zero + daddu t_2,a2 + dsll t_1,1 + daddu c_3,t_1 + sltu AT,c_3,t_1 + daddu t_2,AT + daddu c_1,t_2 + sltu AT,c_1,t_2 + daddu c_2,AT + dmultu a_5,a_6 /* mul_add_c2(a[5],b[6],c3,c1,c2); */ + mflo t_1 + mfhi t_2 + slt AT,t_2,zero + daddu c_2,AT + dsll t_2,1 + slt a2,t_1,zero + daddu t_2,a2 + dsll t_1,1 + daddu c_3,t_1 + sltu AT,c_3,t_1 + daddu t_2,AT + daddu c_1,t_2 + sltu AT,c_1,t_2 + daddu c_2,AT + sd c_3,88(a0) + + dmultu a_7,a_5 /* mul_add_c2(a[7],b[5],c1,c2,c3); */ + mflo t_1 + mfhi t_2 + slt c_3,t_2,zero + dsll t_2,1 + slt a2,t_1,zero + daddu t_2,a2 + dsll t_1,1 + daddu c_1,t_1 + sltu AT,c_1,t_1 + daddu t_2,AT + daddu c_2,t_2 + sltu AT,c_2,t_2 + daddu c_3,AT + dmultu a_6,a_6 /* mul_add_c(a[6],b[6],c1,c2,c3); */ + mflo t_1 + mfhi t_2 + daddu c_1,t_1 + sltu AT,c_1,t_1 + daddu t_2,AT + daddu c_2,t_2 + sltu AT,c_2,t_2 + daddu c_3,AT + sd c_1,96(a0) + + dmultu a_6,a_7 /* mul_add_c2(a[6],b[7],c2,c3,c1); */ + mflo t_1 + mfhi t_2 + slt c_1,t_2,zero + dsll t_2,1 + slt a2,t_1,zero + daddu t_2,a2 + dsll t_1,1 + daddu c_2,t_1 + sltu AT,c_2,t_1 + daddu t_2,AT + daddu c_3,t_2 + sltu AT,c_3,t_2 + daddu c_1,AT + sd c_2,104(a0) + + dmultu a_7,a_7 /* mul_add_c(a[7],b[7],c3,c1,c2); */ + mflo t_1 + mfhi t_2 + daddu c_3,t_1 + sltu AT,c_3,t_1 + daddu t_2,AT + daddu c_1,t_2 + sd c_3,112(a0) + sd c_1,120(a0) + + jr ra +END(bn_sqr_comba8) + +.align 5 +LEAF(bn_sqr_comba4) + .set reorder + ld a_0,0(a1) + ld a_1,8(a1) + ld a_2,16(a1) + ld a_3,24(a1) + dmultu a_0,a_0 /* mul_add_c(a[0],b[0],c1,c2,c3); */ + mflo c_1 + mfhi c_2 + sd c_1,0(a0) + + dmultu a_0,a_1 /* mul_add_c2(a[0],b[1],c2,c3,c1); */ + mflo t_1 + mfhi t_2 + slt c_1,t_2,zero + dsll t_2,1 + slt a2,t_1,zero + daddu t_2,a2 + dsll t_1,1 + daddu c_2,t_1 + sltu AT,c_2,t_1 + daddu c_3,t_2,AT + sd c_2,8(a0) + + dmultu a_2,a_0 /* mul_add_c2(a[2],b[0],c3,c1,c2); */ + mflo t_1 + mfhi t_2 + slt c_2,t_2,zero + dsll t_2,1 + slt a2,t_1,zero + daddu t_2,a2 + dsll t_1,1 + daddu c_3,t_1 + sltu AT,c_3,t_1 + daddu t_2,AT + daddu c_1,t_2 + sltu AT,c_1,t_2 + daddu c_2,AT + dmultu a_1,a_1 /* mul_add_c(a[1],b[1],c3,c1,c2); */ + mflo t_1 + mfhi t_2 + daddu c_3,t_1 + sltu AT,c_3,t_1 + daddu t_2,AT + daddu c_1,t_2 + sltu AT,c_1,t_2 + daddu c_2,AT + sd c_3,16(a0) + + dmultu a_0,a_3 /* mul_add_c2(a[0],b[3],c1,c2,c3); */ + mflo t_1 + mfhi t_2 + slt c_3,t_2,zero + dsll t_2,1 + slt a2,t_1,zero + daddu t_2,a2 + dsll t_1,1 + daddu c_1,t_1 + sltu AT,c_1,t_1 + daddu t_2,AT + daddu c_2,t_2 + sltu AT,c_2,t_2 + daddu c_3,AT + dmultu a_1,a_2 /* mul_add_c(a2[1],b[2],c1,c2,c3); */ + mflo t_1 + mfhi t_2 + slt AT,t_2,zero + daddu c_3,AT + dsll t_2,1 + slt a2,t_1,zero + daddu t_2,a2 + dsll t_1,1 + daddu c_1,t_1 + sltu AT,c_1,t_1 + daddu t_2,AT + daddu c_2,t_2 + sltu AT,c_2,t_2 + daddu c_3,AT + sd c_1,24(a0) + + dmultu a_3,a_1 /* mul_add_c2(a[3],b[1],c2,c3,c1); */ + mflo t_1 + mfhi t_2 + slt c_1,t_2,zero + dsll t_2,1 + slt a2,t_1,zero + daddu t_2,a2 + dsll t_1,1 + daddu c_2,t_1 + sltu AT,c_2,t_1 + daddu t_2,AT + daddu c_3,t_2 + sltu AT,c_3,t_2 + daddu c_1,AT + dmultu a_2,a_2 /* mul_add_c(a[2],b[2],c2,c3,c1); */ + mflo t_1 + mfhi t_2 + daddu c_2,t_1 + sltu AT,c_2,t_1 + daddu t_2,AT + daddu c_3,t_2 + sltu AT,c_3,t_2 + daddu c_1,AT + sd c_2,32(a0) + + dmultu a_2,a_3 /* mul_add_c2(a[2],b[3],c3,c1,c2); */ + mflo t_1 + mfhi t_2 + slt c_2,t_2,zero + dsll t_2,1 + slt a2,t_1,zero + daddu t_2,a2 + dsll t_1,1 + daddu c_3,t_1 + sltu AT,c_3,t_1 + daddu t_2,AT + daddu c_1,t_2 + sltu AT,c_1,t_2 + daddu c_2,AT + sd c_3,40(a0) + + dmultu a_3,a_3 /* mul_add_c(a[3],b[3],c1,c2,c3); */ + mflo t_1 + mfhi t_2 + daddu c_1,t_1 + sltu AT,c_1,t_1 + daddu t_2,AT + daddu c_2,t_2 + sd c_1,48(a0) + sd c_2,56(a0) + + jr ra +END(bn_sqr_comba4) diff --git a/deps/openssl/openssl/crypto/bn/asm/modexp512-x86_64.pl b/deps/openssl/openssl/crypto/bn/asm/modexp512-x86_64.pl deleted file mode 100644 index bfd6e975416de8..00000000000000 --- a/deps/openssl/openssl/crypto/bn/asm/modexp512-x86_64.pl +++ /dev/null @@ -1,1497 +0,0 @@ -#!/usr/bin/env perl -# -# Copyright (c) 2010-2011 Intel Corp. -# Author: Vinodh.Gopal@intel.com -# Jim Guilford -# Erdinc.Ozturk@intel.com -# Maxim.Perminov@intel.com -# -# More information about algorithm used can be found at: -# http://www.cse.buffalo.edu/srds2009/escs2009_submission_Gopal.pdf -# -# ==================================================================== -# Copyright (c) 2011 The OpenSSL Project. All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions -# are met: -# -# 1. Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# -# 2. Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in -# the documentation and/or other materials provided with the -# distribution. -# -# 3. All advertising materials mentioning features or use of this -# software must display the following acknowledgment: -# "This product includes software developed by the OpenSSL Project -# for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)" -# -# 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to -# endorse or promote products derived from this software without -# prior written permission. For written permission, please contact -# licensing@OpenSSL.org. -# -# 5. Products derived from this software may not be called "OpenSSL" -# nor may "OpenSSL" appear in their names without prior written -# permission of the OpenSSL Project. -# -# 6. Redistributions of any form whatsoever must retain the following -# acknowledgment: -# "This product includes software developed by the OpenSSL Project -# for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)" -# -# THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY -# EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR -# ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT -# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; -# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) -# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, -# STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED -# OF THE POSSIBILITY OF SUCH DAMAGE. -# ==================================================================== - -$flavour = shift; -$output = shift; -if ($flavour =~ /\./) { $output = $flavour; undef $flavour; } - -my $win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/); - -$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; -( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or -( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or -die "can't locate x86_64-xlate.pl"; - -open OUT,"| \"$^X\" $xlate $flavour $output"; -*STDOUT=*OUT; - -use strict; -my $code=".text\n\n"; -my $m=0; - -# -# Define x512 macros -# - -#MULSTEP_512_ADD MACRO x7, x6, x5, x4, x3, x2, x1, x0, dst, src1, src2, add_src, tmp1, tmp2 -# -# uses rax, rdx, and args -sub MULSTEP_512_ADD -{ - my ($x, $DST, $SRC2, $ASRC, $OP, $TMP)=@_; - my @X=@$x; # make a copy -$code.=<<___; - mov (+8*0)($SRC2), %rax - mul $OP # rdx:rax = %OP * [0] - mov ($ASRC), $X[0] - add %rax, $X[0] - adc \$0, %rdx - mov $X[0], $DST -___ -for(my $i=1;$i<8;$i++) { -$code.=<<___; - mov %rdx, $TMP - - mov (+8*$i)($SRC2), %rax - mul $OP # rdx:rax = %OP * [$i] - mov (+8*$i)($ASRC), $X[$i] - add %rax, $X[$i] - adc \$0, %rdx - add $TMP, $X[$i] - adc \$0, %rdx -___ -} -$code.=<<___; - mov %rdx, $X[0] -___ -} - -#MULSTEP_512 MACRO x7, x6, x5, x4, x3, x2, x1, x0, dst, src2, src1_val, tmp -# -# uses rax, rdx, and args -sub MULSTEP_512 -{ - my ($x, $DST, $SRC2, $OP, $TMP)=@_; - my @X=@$x; # make a copy -$code.=<<___; - mov (+8*0)($SRC2), %rax - mul $OP # rdx:rax = %OP * [0] - add %rax, $X[0] - adc \$0, %rdx - mov $X[0], $DST -___ -for(my $i=1;$i<8;$i++) { -$code.=<<___; - mov %rdx, $TMP - - mov (+8*$i)($SRC2), %rax - mul $OP # rdx:rax = %OP * [$i] - add %rax, $X[$i] - adc \$0, %rdx - add $TMP, $X[$i] - adc \$0, %rdx -___ -} -$code.=<<___; - mov %rdx, $X[0] -___ -} - -# -# Swizzle Macros -# - -# macro to copy data from flat space to swizzled table -#MACRO swizzle pDst, pSrc, tmp1, tmp2 -# pDst and pSrc are modified -sub swizzle -{ - my ($pDst, $pSrc, $cnt, $d0)=@_; -$code.=<<___; - mov \$8, $cnt -loop_$m: - mov ($pSrc), $d0 - mov $d0#w, ($pDst) - shr \$16, $d0 - mov $d0#w, (+64*1)($pDst) - shr \$16, $d0 - mov $d0#w, (+64*2)($pDst) - shr \$16, $d0 - mov $d0#w, (+64*3)($pDst) - lea 8($pSrc), $pSrc - lea 64*4($pDst), $pDst - dec $cnt - jnz loop_$m -___ - - $m++; -} - -# macro to copy data from swizzled table to flat space -#MACRO unswizzle pDst, pSrc, tmp*3 -sub unswizzle -{ - my ($pDst, $pSrc, $cnt, $d0, $d1)=@_; -$code.=<<___; - mov \$4, $cnt -loop_$m: - movzxw (+64*3+256*0)($pSrc), $d0 - movzxw (+64*3+256*1)($pSrc), $d1 - shl \$16, $d0 - shl \$16, $d1 - mov (+64*2+256*0)($pSrc), $d0#w - mov (+64*2+256*1)($pSrc), $d1#w - shl \$16, $d0 - shl \$16, $d1 - mov (+64*1+256*0)($pSrc), $d0#w - mov (+64*1+256*1)($pSrc), $d1#w - shl \$16, $d0 - shl \$16, $d1 - mov (+64*0+256*0)($pSrc), $d0#w - mov (+64*0+256*1)($pSrc), $d1#w - mov $d0, (+8*0)($pDst) - mov $d1, (+8*1)($pDst) - lea 256*2($pSrc), $pSrc - lea 8*2($pDst), $pDst - sub \$1, $cnt - jnz loop_$m -___ - - $m++; -} - -# -# Data Structures -# - -# Reduce Data -# -# -# Offset Value -# 0C0 Carries -# 0B8 X2[10] -# 0B0 X2[9] -# 0A8 X2[8] -# 0A0 X2[7] -# 098 X2[6] -# 090 X2[5] -# 088 X2[4] -# 080 X2[3] -# 078 X2[2] -# 070 X2[1] -# 068 X2[0] -# 060 X1[12] P[10] -# 058 X1[11] P[9] Z[8] -# 050 X1[10] P[8] Z[7] -# 048 X1[9] P[7] Z[6] -# 040 X1[8] P[6] Z[5] -# 038 X1[7] P[5] Z[4] -# 030 X1[6] P[4] Z[3] -# 028 X1[5] P[3] Z[2] -# 020 X1[4] P[2] Z[1] -# 018 X1[3] P[1] Z[0] -# 010 X1[2] P[0] Y[2] -# 008 X1[1] Q[1] Y[1] -# 000 X1[0] Q[0] Y[0] - -my $X1_offset = 0; # 13 qwords -my $X2_offset = $X1_offset + 13*8; # 11 qwords -my $Carries_offset = $X2_offset + 11*8; # 1 qword -my $Q_offset = 0; # 2 qwords -my $P_offset = $Q_offset + 2*8; # 11 qwords -my $Y_offset = 0; # 3 qwords -my $Z_offset = $Y_offset + 3*8; # 9 qwords - -my $Red_Data_Size = $Carries_offset + 1*8; # (25 qwords) - -# -# Stack Frame -# -# -# offset value -# ... -# ... -# 280 Garray - -# 278 tmp16[15] -# ... ... -# 200 tmp16[0] - -# 1F8 tmp[7] -# ... ... -# 1C0 tmp[0] - -# 1B8 GT[7] -# ... ... -# 180 GT[0] - -# 178 Reduce Data -# ... ... -# 0B8 Reduce Data -# 0B0 reserved -# 0A8 reserved -# 0A0 reserved -# 098 reserved -# 090 reserved -# 088 reduce result addr -# 080 exp[8] - -# ... -# 048 exp[1] -# 040 exp[0] - -# 038 reserved -# 030 loop_idx -# 028 pg -# 020 i -# 018 pData ; arg 4 -# 010 pG ; arg 2 -# 008 pResult ; arg 1 -# 000 rsp ; stack pointer before subtract - -my $rsp_offset = 0; -my $pResult_offset = 8*1 + $rsp_offset; -my $pG_offset = 8*1 + $pResult_offset; -my $pData_offset = 8*1 + $pG_offset; -my $i_offset = 8*1 + $pData_offset; -my $pg_offset = 8*1 + $i_offset; -my $loop_idx_offset = 8*1 + $pg_offset; -my $reserved1_offset = 8*1 + $loop_idx_offset; -my $exp_offset = 8*1 + $reserved1_offset; -my $red_result_addr_offset= 8*9 + $exp_offset; -my $reserved2_offset = 8*1 + $red_result_addr_offset; -my $Reduce_Data_offset = 8*5 + $reserved2_offset; -my $GT_offset = $Red_Data_Size + $Reduce_Data_offset; -my $tmp_offset = 8*8 + $GT_offset; -my $tmp16_offset = 8*8 + $tmp_offset; -my $garray_offset = 8*16 + $tmp16_offset; -my $mem_size = 8*8*32 + $garray_offset; - -# -# Offsets within Reduce Data -# -# -# struct MODF_2FOLD_MONT_512_C1_DATA { -# UINT64 t[8][8]; -# UINT64 m[8]; -# UINT64 m1[8]; /* 2^768 % m */ -# UINT64 m2[8]; /* 2^640 % m */ -# UINT64 k1[2]; /* (- 1/m) % 2^128 */ -# }; - -my $T = 0; -my $M = 512; # = 8 * 8 * 8 -my $M1 = 576; # = 8 * 8 * 9 /* += 8 * 8 */ -my $M2 = 640; # = 8 * 8 * 10 /* += 8 * 8 */ -my $K1 = 704; # = 8 * 8 * 11 /* += 8 * 8 */ - -# -# FUNCTIONS -# - -{{{ -# -# MULADD_128x512 : Function to multiply 128-bits (2 qwords) by 512-bits (8 qwords) -# and add 512-bits (8 qwords) -# to get 640 bits (10 qwords) -# Input: 128-bit mul source: [rdi+8*1], rbp -# 512-bit mul source: [rsi+8*n] -# 512-bit add source: r15, r14, ..., r9, r8 -# Output: r9, r8, r15, r14, r13, r12, r11, r10, [rcx+8*1], [rcx+8*0] -# Clobbers all regs except: rcx, rsi, rdi -$code.=<<___; -.type MULADD_128x512,\@abi-omnipotent -.align 16 -MULADD_128x512: -___ - &MULSTEP_512([map("%r$_",(8..15))], "(+8*0)(%rcx)", "%rsi", "%rbp", "%rbx"); -$code.=<<___; - mov (+8*1)(%rdi), %rbp -___ - &MULSTEP_512([map("%r$_",(9..15,8))], "(+8*1)(%rcx)", "%rsi", "%rbp", "%rbx"); -$code.=<<___; - ret -.size MULADD_128x512,.-MULADD_128x512 -___ -}}} - -{{{ -#MULADD_256x512 MACRO pDst, pA, pB, OP, TMP, X7, X6, X5, X4, X3, X2, X1, X0 -# -# Inputs: pDst: Destination (768 bits, 12 qwords) -# pA: Multiplicand (1024 bits, 16 qwords) -# pB: Multiplicand (512 bits, 8 qwords) -# Dst = Ah * B + Al -# where Ah is (in qwords) A[15:12] (256 bits) and Al is A[7:0] (512 bits) -# Results in X3 X2 X1 X0 X7 X6 X5 X4 Dst[3:0] -# Uses registers: arguments, RAX, RDX -sub MULADD_256x512 -{ - my ($pDst, $pA, $pB, $OP, $TMP, $X)=@_; -$code.=<<___; - mov (+8*12)($pA), $OP -___ - &MULSTEP_512_ADD($X, "(+8*0)($pDst)", $pB, $pA, $OP, $TMP); - push(@$X,shift(@$X)); - -$code.=<<___; - mov (+8*13)($pA), $OP -___ - &MULSTEP_512($X, "(+8*1)($pDst)", $pB, $OP, $TMP); - push(@$X,shift(@$X)); - -$code.=<<___; - mov (+8*14)($pA), $OP -___ - &MULSTEP_512($X, "(+8*2)($pDst)", $pB, $OP, $TMP); - push(@$X,shift(@$X)); - -$code.=<<___; - mov (+8*15)($pA), $OP -___ - &MULSTEP_512($X, "(+8*3)($pDst)", $pB, $OP, $TMP); - push(@$X,shift(@$X)); -} - -# -# mont_reduce(UINT64 *x, /* 1024 bits, 16 qwords */ -# UINT64 *m, /* 512 bits, 8 qwords */ -# MODF_2FOLD_MONT_512_C1_DATA *data, -# UINT64 *r) /* 512 bits, 8 qwords */ -# Input: x (number to be reduced): tmp16 (Implicit) -# m (modulus): [pM] (Implicit) -# data (reduce data): [pData] (Implicit) -# Output: r (result): Address in [red_res_addr] -# result also in: r9, r8, r15, r14, r13, r12, r11, r10 - -my @X=map("%r$_",(8..15)); - -$code.=<<___; -.type mont_reduce,\@abi-omnipotent -.align 16 -mont_reduce: -___ - -my $STACK_DEPTH = 8; - # - # X1 = Xh * M1 + Xl -$code.=<<___; - lea (+$Reduce_Data_offset+$X1_offset+$STACK_DEPTH)(%rsp), %rdi # pX1 (Dst) 769 bits, 13 qwords - mov (+$pData_offset+$STACK_DEPTH)(%rsp), %rsi # pM1 (Bsrc) 512 bits, 8 qwords - add \$$M1, %rsi - lea (+$tmp16_offset+$STACK_DEPTH)(%rsp), %rcx # X (Asrc) 1024 bits, 16 qwords - -___ - - &MULADD_256x512("%rdi", "%rcx", "%rsi", "%rbp", "%rbx", \@X); # rotates @X 4 times - # results in r11, r10, r9, r8, r15, r14, r13, r12, X1[3:0] - -$code.=<<___; - xor %rax, %rax - # X1 += xl - add (+8*8)(%rcx), $X[4] - adc (+8*9)(%rcx), $X[5] - adc (+8*10)(%rcx), $X[6] - adc (+8*11)(%rcx), $X[7] - adc \$0, %rax - # X1 is now rax, r11-r8, r15-r12, tmp16[3:0] - - # - # check for carry ;; carry stored in rax - mov $X[4], (+8*8)(%rdi) # rdi points to X1 - mov $X[5], (+8*9)(%rdi) - mov $X[6], %rbp - mov $X[7], (+8*11)(%rdi) - - mov %rax, (+$Reduce_Data_offset+$Carries_offset+$STACK_DEPTH)(%rsp) - - mov (+8*0)(%rdi), $X[4] - mov (+8*1)(%rdi), $X[5] - mov (+8*2)(%rdi), $X[6] - mov (+8*3)(%rdi), $X[7] - - # X1 is now stored in: X1[11], rbp, X1[9:8], r15-r8 - # rdi -> X1 - # rsi -> M1 - - # - # X2 = Xh * M2 + Xl - # do first part (X2 = Xh * M2) - add \$8*10, %rdi # rdi -> pXh ; 128 bits, 2 qwords - # Xh is actually { [rdi+8*1], rbp } - add \$`$M2-$M1`, %rsi # rsi -> M2 - lea (+$Reduce_Data_offset+$X2_offset+$STACK_DEPTH)(%rsp), %rcx # rcx -> pX2 ; 641 bits, 11 qwords -___ - unshift(@X,pop(@X)); unshift(@X,pop(@X)); -$code.=<<___; - - call MULADD_128x512 # args in rcx, rdi / rbp, rsi, r15-r8 - # result in r9, r8, r15, r14, r13, r12, r11, r10, X2[1:0] - mov (+$Reduce_Data_offset+$Carries_offset+$STACK_DEPTH)(%rsp), %rax - - # X2 += Xl - add (+8*8-8*10)(%rdi), $X[6] # (-8*10) is to adjust rdi -> Xh to Xl - adc (+8*9-8*10)(%rdi), $X[7] - mov $X[6], (+8*8)(%rcx) - mov $X[7], (+8*9)(%rcx) - - adc %rax, %rax - mov %rax, (+$Reduce_Data_offset+$Carries_offset+$STACK_DEPTH)(%rsp) - - lea (+$Reduce_Data_offset+$Q_offset+$STACK_DEPTH)(%rsp), %rdi # rdi -> pQ ; 128 bits, 2 qwords - add \$`$K1-$M2`, %rsi # rsi -> pK1 ; 128 bits, 2 qwords - - # MUL_128x128t128 rdi, rcx, rsi ; Q = X2 * K1 (bottom half) - # B1:B0 = rsi[1:0] = K1[1:0] - # A1:A0 = rcx[1:0] = X2[1:0] - # Result = rdi[1],rbp = Q[1],rbp - mov (%rsi), %r8 # B0 - mov (+8*1)(%rsi), %rbx # B1 - - mov (%rcx), %rax # A0 - mul %r8 # B0 - mov %rax, %rbp - mov %rdx, %r9 - - mov (+8*1)(%rcx), %rax # A1 - mul %r8 # B0 - add %rax, %r9 - - mov (%rcx), %rax # A0 - mul %rbx # B1 - add %rax, %r9 - - mov %r9, (+8*1)(%rdi) - # end MUL_128x128t128 - - sub \$`$K1-$M`, %rsi - - mov (%rcx), $X[6] - mov (+8*1)(%rcx), $X[7] # r9:r8 = X2[1:0] - - call MULADD_128x512 # args in rcx, rdi / rbp, rsi, r15-r8 - # result in r9, r8, r15, r14, r13, r12, r11, r10, X2[1:0] - - # load first half of m to rdx, rdi, rbx, rax - # moved this here for efficiency - mov (+8*0)(%rsi), %rax - mov (+8*1)(%rsi), %rbx - mov (+8*2)(%rsi), %rdi - mov (+8*3)(%rsi), %rdx - - # continue with reduction - mov (+$Reduce_Data_offset+$Carries_offset+$STACK_DEPTH)(%rsp), %rbp - - add (+8*8)(%rcx), $X[6] - adc (+8*9)(%rcx), $X[7] - - #accumulate the final carry to rbp - adc %rbp, %rbp - - # Add in overflow corrections: R = (X2>>128) += T[overflow] - # R = {r9, r8, r15, r14, ..., r10} - shl \$3, %rbp - mov (+$pData_offset+$STACK_DEPTH)(%rsp), %rcx # rsi -> Data (and points to T) - add %rcx, %rbp # pT ; 512 bits, 8 qwords, spread out - - # rsi will be used to generate a mask after the addition - xor %rsi, %rsi - - add (+8*8*0)(%rbp), $X[0] - adc (+8*8*1)(%rbp), $X[1] - adc (+8*8*2)(%rbp), $X[2] - adc (+8*8*3)(%rbp), $X[3] - adc (+8*8*4)(%rbp), $X[4] - adc (+8*8*5)(%rbp), $X[5] - adc (+8*8*6)(%rbp), $X[6] - adc (+8*8*7)(%rbp), $X[7] - - # if there is a carry: rsi = 0xFFFFFFFFFFFFFFFF - # if carry is clear: rsi = 0x0000000000000000 - sbb \$0, %rsi - - # if carry is clear, subtract 0. Otherwise, subtract 256 bits of m - and %rsi, %rax - and %rsi, %rbx - and %rsi, %rdi - and %rsi, %rdx - - mov \$1, %rbp - sub %rax, $X[0] - sbb %rbx, $X[1] - sbb %rdi, $X[2] - sbb %rdx, $X[3] - - # if there is a borrow: rbp = 0 - # if there is no borrow: rbp = 1 - # this is used to save the borrows in between the first half and the 2nd half of the subtraction of m - sbb \$0, %rbp - - #load second half of m to rdx, rdi, rbx, rax - - add \$$M, %rcx - mov (+8*4)(%rcx), %rax - mov (+8*5)(%rcx), %rbx - mov (+8*6)(%rcx), %rdi - mov (+8*7)(%rcx), %rdx - - # use the rsi mask as before - # if carry is clear, subtract 0. Otherwise, subtract 256 bits of m - and %rsi, %rax - and %rsi, %rbx - and %rsi, %rdi - and %rsi, %rdx - - # if rbp = 0, there was a borrow before, it is moved to the carry flag - # if rbp = 1, there was not a borrow before, carry flag is cleared - sub \$1, %rbp - - sbb %rax, $X[4] - sbb %rbx, $X[5] - sbb %rdi, $X[6] - sbb %rdx, $X[7] - - # write R back to memory - - mov (+$red_result_addr_offset+$STACK_DEPTH)(%rsp), %rsi - mov $X[0], (+8*0)(%rsi) - mov $X[1], (+8*1)(%rsi) - mov $X[2], (+8*2)(%rsi) - mov $X[3], (+8*3)(%rsi) - mov $X[4], (+8*4)(%rsi) - mov $X[5], (+8*5)(%rsi) - mov $X[6], (+8*6)(%rsi) - mov $X[7], (+8*7)(%rsi) - - ret -.size mont_reduce,.-mont_reduce -___ -}}} - -{{{ -#MUL_512x512 MACRO pDst, pA, pB, x7, x6, x5, x4, x3, x2, x1, x0, tmp*2 -# -# Inputs: pDst: Destination (1024 bits, 16 qwords) -# pA: Multiplicand (512 bits, 8 qwords) -# pB: Multiplicand (512 bits, 8 qwords) -# Uses registers rax, rdx, args -# B operand in [pB] and also in x7...x0 -sub MUL_512x512 -{ - my ($pDst, $pA, $pB, $x, $OP, $TMP, $pDst_o)=@_; - my ($pDst, $pDst_o) = ($pDst =~ m/([^+]*)\+?(.*)?/); - my @X=@$x; # make a copy - -$code.=<<___; - mov (+8*0)($pA), $OP - - mov $X[0], %rax - mul $OP # rdx:rax = %OP * [0] - mov %rax, (+$pDst_o+8*0)($pDst) - mov %rdx, $X[0] -___ -for(my $i=1;$i<8;$i++) { -$code.=<<___; - mov $X[$i], %rax - mul $OP # rdx:rax = %OP * [$i] - add %rax, $X[$i-1] - adc \$0, %rdx - mov %rdx, $X[$i] -___ -} - -for(my $i=1;$i<8;$i++) { -$code.=<<___; - mov (+8*$i)($pA), $OP -___ - - &MULSTEP_512(\@X, "(+$pDst_o+8*$i)($pDst)", $pB, $OP, $TMP); - push(@X,shift(@X)); -} - -$code.=<<___; - mov $X[0], (+$pDst_o+8*8)($pDst) - mov $X[1], (+$pDst_o+8*9)($pDst) - mov $X[2], (+$pDst_o+8*10)($pDst) - mov $X[3], (+$pDst_o+8*11)($pDst) - mov $X[4], (+$pDst_o+8*12)($pDst) - mov $X[5], (+$pDst_o+8*13)($pDst) - mov $X[6], (+$pDst_o+8*14)($pDst) - mov $X[7], (+$pDst_o+8*15)($pDst) -___ -} - -# -# mont_mul_a3b : subroutine to compute (Src1 * Src2) % M (all 512-bits) -# Input: src1: Address of source 1: rdi -# src2: Address of source 2: rsi -# Output: dst: Address of destination: [red_res_addr] -# src2 and result also in: r9, r8, r15, r14, r13, r12, r11, r10 -# Temp: Clobbers [tmp16], all registers -$code.=<<___; -.type mont_mul_a3b,\@abi-omnipotent -.align 16 -mont_mul_a3b: - # - # multiply tmp = src1 * src2 - # For multiply: dst = rcx, src1 = rdi, src2 = rsi - # stack depth is extra 8 from call -___ - &MUL_512x512("%rsp+$tmp16_offset+8", "%rdi", "%rsi", [map("%r$_",(10..15,8..9))], "%rbp", "%rbx"); -$code.=<<___; - # - # Dst = tmp % m - # Call reduce(tmp, m, data, dst) - - # tail recursion optimization: jmp to mont_reduce and return from there - jmp mont_reduce - # call mont_reduce - # ret -.size mont_mul_a3b,.-mont_mul_a3b -___ -}}} - -{{{ -#SQR_512 MACRO pDest, pA, x7, x6, x5, x4, x3, x2, x1, x0, tmp*4 -# -# Input in memory [pA] and also in x7...x0 -# Uses all argument registers plus rax and rdx -# -# This version computes all of the off-diagonal terms into memory, -# and then it adds in the diagonal terms - -sub SQR_512 -{ - my ($pDst, $pA, $x, $A, $tmp, $x7, $x6, $pDst_o)=@_; - my ($pDst, $pDst_o) = ($pDst =~ m/([^+]*)\+?(.*)?/); - my @X=@$x; # make a copy -$code.=<<___; - # ------------------ - # first pass 01...07 - # ------------------ - mov $X[0], $A - - mov $X[1],%rax - mul $A - mov %rax, (+$pDst_o+8*1)($pDst) -___ -for(my $i=2;$i<8;$i++) { -$code.=<<___; - mov %rdx, $X[$i-2] - mov $X[$i],%rax - mul $A - add %rax, $X[$i-2] - adc \$0, %rdx -___ -} -$code.=<<___; - mov %rdx, $x7 - - mov $X[0], (+$pDst_o+8*2)($pDst) - - # ------------------ - # second pass 12...17 - # ------------------ - - mov (+8*1)($pA), $A - - mov (+8*2)($pA),%rax - mul $A - add %rax, $X[1] - adc \$0, %rdx - mov $X[1], (+$pDst_o+8*3)($pDst) - - mov %rdx, $X[0] - mov (+8*3)($pA),%rax - mul $A - add %rax, $X[2] - adc \$0, %rdx - add $X[0], $X[2] - adc \$0, %rdx - mov $X[2], (+$pDst_o+8*4)($pDst) - - mov %rdx, $X[0] - mov (+8*4)($pA),%rax - mul $A - add %rax, $X[3] - adc \$0, %rdx - add $X[0], $X[3] - adc \$0, %rdx - - mov %rdx, $X[0] - mov (+8*5)($pA),%rax - mul $A - add %rax, $X[4] - adc \$0, %rdx - add $X[0], $X[4] - adc \$0, %rdx - - mov %rdx, $X[0] - mov $X[6],%rax - mul $A - add %rax, $X[5] - adc \$0, %rdx - add $X[0], $X[5] - adc \$0, %rdx - - mov %rdx, $X[0] - mov $X[7],%rax - mul $A - add %rax, $x7 - adc \$0, %rdx - add $X[0], $x7 - adc \$0, %rdx - - mov %rdx, $X[1] - - # ------------------ - # third pass 23...27 - # ------------------ - mov (+8*2)($pA), $A - - mov (+8*3)($pA),%rax - mul $A - add %rax, $X[3] - adc \$0, %rdx - mov $X[3], (+$pDst_o+8*5)($pDst) - - mov %rdx, $X[0] - mov (+8*4)($pA),%rax - mul $A - add %rax, $X[4] - adc \$0, %rdx - add $X[0], $X[4] - adc \$0, %rdx - mov $X[4], (+$pDst_o+8*6)($pDst) - - mov %rdx, $X[0] - mov (+8*5)($pA),%rax - mul $A - add %rax, $X[5] - adc \$0, %rdx - add $X[0], $X[5] - adc \$0, %rdx - - mov %rdx, $X[0] - mov $X[6],%rax - mul $A - add %rax, $x7 - adc \$0, %rdx - add $X[0], $x7 - adc \$0, %rdx - - mov %rdx, $X[0] - mov $X[7],%rax - mul $A - add %rax, $X[1] - adc \$0, %rdx - add $X[0], $X[1] - adc \$0, %rdx - - mov %rdx, $X[2] - - # ------------------ - # fourth pass 34...37 - # ------------------ - - mov (+8*3)($pA), $A - - mov (+8*4)($pA),%rax - mul $A - add %rax, $X[5] - adc \$0, %rdx - mov $X[5], (+$pDst_o+8*7)($pDst) - - mov %rdx, $X[0] - mov (+8*5)($pA),%rax - mul $A - add %rax, $x7 - adc \$0, %rdx - add $X[0], $x7 - adc \$0, %rdx - mov $x7, (+$pDst_o+8*8)($pDst) - - mov %rdx, $X[0] - mov $X[6],%rax - mul $A - add %rax, $X[1] - adc \$0, %rdx - add $X[0], $X[1] - adc \$0, %rdx - - mov %rdx, $X[0] - mov $X[7],%rax - mul $A - add %rax, $X[2] - adc \$0, %rdx - add $X[0], $X[2] - adc \$0, %rdx - - mov %rdx, $X[5] - - # ------------------ - # fifth pass 45...47 - # ------------------ - mov (+8*4)($pA), $A - - mov (+8*5)($pA),%rax - mul $A - add %rax, $X[1] - adc \$0, %rdx - mov $X[1], (+$pDst_o+8*9)($pDst) - - mov %rdx, $X[0] - mov $X[6],%rax - mul $A - add %rax, $X[2] - adc \$0, %rdx - add $X[0], $X[2] - adc \$0, %rdx - mov $X[2], (+$pDst_o+8*10)($pDst) - - mov %rdx, $X[0] - mov $X[7],%rax - mul $A - add %rax, $X[5] - adc \$0, %rdx - add $X[0], $X[5] - adc \$0, %rdx - - mov %rdx, $X[1] - - # ------------------ - # sixth pass 56...57 - # ------------------ - mov (+8*5)($pA), $A - - mov $X[6],%rax - mul $A - add %rax, $X[5] - adc \$0, %rdx - mov $X[5], (+$pDst_o+8*11)($pDst) - - mov %rdx, $X[0] - mov $X[7],%rax - mul $A - add %rax, $X[1] - adc \$0, %rdx - add $X[0], $X[1] - adc \$0, %rdx - mov $X[1], (+$pDst_o+8*12)($pDst) - - mov %rdx, $X[2] - - # ------------------ - # seventh pass 67 - # ------------------ - mov $X[6], $A - - mov $X[7],%rax - mul $A - add %rax, $X[2] - adc \$0, %rdx - mov $X[2], (+$pDst_o+8*13)($pDst) - - mov %rdx, (+$pDst_o+8*14)($pDst) - - # start finalize (add in squares, and double off-terms) - mov (+$pDst_o+8*1)($pDst), $X[0] - mov (+$pDst_o+8*2)($pDst), $X[1] - mov (+$pDst_o+8*3)($pDst), $X[2] - mov (+$pDst_o+8*4)($pDst), $X[3] - mov (+$pDst_o+8*5)($pDst), $X[4] - mov (+$pDst_o+8*6)($pDst), $X[5] - - mov (+8*3)($pA), %rax - mul %rax - mov %rax, $x6 - mov %rdx, $X[6] - - add $X[0], $X[0] - adc $X[1], $X[1] - adc $X[2], $X[2] - adc $X[3], $X[3] - adc $X[4], $X[4] - adc $X[5], $X[5] - adc \$0, $X[6] - - mov (+8*0)($pA), %rax - mul %rax - mov %rax, (+$pDst_o+8*0)($pDst) - mov %rdx, $A - - mov (+8*1)($pA), %rax - mul %rax - - add $A, $X[0] - adc %rax, $X[1] - adc \$0, %rdx - - mov %rdx, $A - mov $X[0], (+$pDst_o+8*1)($pDst) - mov $X[1], (+$pDst_o+8*2)($pDst) - - mov (+8*2)($pA), %rax - mul %rax - - add $A, $X[2] - adc %rax, $X[3] - adc \$0, %rdx - - mov %rdx, $A - - mov $X[2], (+$pDst_o+8*3)($pDst) - mov $X[3], (+$pDst_o+8*4)($pDst) - - xor $tmp, $tmp - add $A, $X[4] - adc $x6, $X[5] - adc \$0, $tmp - - mov $X[4], (+$pDst_o+8*5)($pDst) - mov $X[5], (+$pDst_o+8*6)($pDst) - - # %%tmp has 0/1 in column 7 - # %%A6 has a full value in column 7 - - mov (+$pDst_o+8*7)($pDst), $X[0] - mov (+$pDst_o+8*8)($pDst), $X[1] - mov (+$pDst_o+8*9)($pDst), $X[2] - mov (+$pDst_o+8*10)($pDst), $X[3] - mov (+$pDst_o+8*11)($pDst), $X[4] - mov (+$pDst_o+8*12)($pDst), $X[5] - mov (+$pDst_o+8*13)($pDst), $x6 - mov (+$pDst_o+8*14)($pDst), $x7 - - mov $X[7], %rax - mul %rax - mov %rax, $X[7] - mov %rdx, $A - - add $X[0], $X[0] - adc $X[1], $X[1] - adc $X[2], $X[2] - adc $X[3], $X[3] - adc $X[4], $X[4] - adc $X[5], $X[5] - adc $x6, $x6 - adc $x7, $x7 - adc \$0, $A - - add $tmp, $X[0] - - mov (+8*4)($pA), %rax - mul %rax - - add $X[6], $X[0] - adc %rax, $X[1] - adc \$0, %rdx - - mov %rdx, $tmp - - mov $X[0], (+$pDst_o+8*7)($pDst) - mov $X[1], (+$pDst_o+8*8)($pDst) - - mov (+8*5)($pA), %rax - mul %rax - - add $tmp, $X[2] - adc %rax, $X[3] - adc \$0, %rdx - - mov %rdx, $tmp - - mov $X[2], (+$pDst_o+8*9)($pDst) - mov $X[3], (+$pDst_o+8*10)($pDst) - - mov (+8*6)($pA), %rax - mul %rax - - add $tmp, $X[4] - adc %rax, $X[5] - adc \$0, %rdx - - mov $X[4], (+$pDst_o+8*11)($pDst) - mov $X[5], (+$pDst_o+8*12)($pDst) - - add %rdx, $x6 - adc $X[7], $x7 - adc \$0, $A - - mov $x6, (+$pDst_o+8*13)($pDst) - mov $x7, (+$pDst_o+8*14)($pDst) - mov $A, (+$pDst_o+8*15)($pDst) -___ -} - -# -# sqr_reduce: subroutine to compute Result = reduce(Result * Result) -# -# input and result also in: r9, r8, r15, r14, r13, r12, r11, r10 -# -$code.=<<___; -.type sqr_reduce,\@abi-omnipotent -.align 16 -sqr_reduce: - mov (+$pResult_offset+8)(%rsp), %rcx -___ - &SQR_512("%rsp+$tmp16_offset+8", "%rcx", [map("%r$_",(10..15,8..9))], "%rbx", "%rbp", "%rsi", "%rdi"); -$code.=<<___; - # tail recursion optimization: jmp to mont_reduce and return from there - jmp mont_reduce - # call mont_reduce - # ret -.size sqr_reduce,.-sqr_reduce -___ -}}} - -# -# MAIN FUNCTION -# - -#mod_exp_512(UINT64 *result, /* 512 bits, 8 qwords */ -# UINT64 *g, /* 512 bits, 8 qwords */ -# UINT64 *exp, /* 512 bits, 8 qwords */ -# struct mod_ctx_512 *data) - -# window size = 5 -# table size = 2^5 = 32 -#table_entries equ 32 -#table_size equ table_entries * 8 -$code.=<<___; -.globl mod_exp_512 -.type mod_exp_512,\@function,4 -mod_exp_512: - push %rbp - push %rbx - push %r12 - push %r13 - push %r14 - push %r15 - - # adjust stack down and then align it with cache boundary - mov %rsp, %r8 - sub \$$mem_size, %rsp - and \$-64, %rsp - - # store previous stack pointer and arguments - mov %r8, (+$rsp_offset)(%rsp) - mov %rdi, (+$pResult_offset)(%rsp) - mov %rsi, (+$pG_offset)(%rsp) - mov %rcx, (+$pData_offset)(%rsp) -.Lbody: - # transform g into montgomery space - # GT = reduce(g * C2) = reduce(g * (2^256)) - # reduce expects to have the input in [tmp16] - pxor %xmm4, %xmm4 - movdqu (+16*0)(%rsi), %xmm0 - movdqu (+16*1)(%rsi), %xmm1 - movdqu (+16*2)(%rsi), %xmm2 - movdqu (+16*3)(%rsi), %xmm3 - movdqa %xmm4, (+$tmp16_offset+16*0)(%rsp) - movdqa %xmm4, (+$tmp16_offset+16*1)(%rsp) - movdqa %xmm4, (+$tmp16_offset+16*6)(%rsp) - movdqa %xmm4, (+$tmp16_offset+16*7)(%rsp) - movdqa %xmm0, (+$tmp16_offset+16*2)(%rsp) - movdqa %xmm1, (+$tmp16_offset+16*3)(%rsp) - movdqa %xmm2, (+$tmp16_offset+16*4)(%rsp) - movdqa %xmm3, (+$tmp16_offset+16*5)(%rsp) - - # load pExp before rdx gets blown away - movdqu (+16*0)(%rdx), %xmm0 - movdqu (+16*1)(%rdx), %xmm1 - movdqu (+16*2)(%rdx), %xmm2 - movdqu (+16*3)(%rdx), %xmm3 - - lea (+$GT_offset)(%rsp), %rbx - mov %rbx, (+$red_result_addr_offset)(%rsp) - call mont_reduce - - # Initialize tmp = C - lea (+$tmp_offset)(%rsp), %rcx - xor %rax, %rax - mov %rax, (+8*0)(%rcx) - mov %rax, (+8*1)(%rcx) - mov %rax, (+8*3)(%rcx) - mov %rax, (+8*4)(%rcx) - mov %rax, (+8*5)(%rcx) - mov %rax, (+8*6)(%rcx) - mov %rax, (+8*7)(%rcx) - mov %rax, (+$exp_offset+8*8)(%rsp) - movq \$1, (+8*2)(%rcx) - - lea (+$garray_offset)(%rsp), %rbp - mov %rcx, %rsi # pTmp - mov %rbp, %rdi # Garray[][0] -___ - - &swizzle("%rdi", "%rcx", "%rax", "%rbx"); - - # for (rax = 31; rax != 0; rax--) { - # tmp = reduce(tmp * G) - # swizzle(pg, tmp); - # pg += 2; } -$code.=<<___; - mov \$31, %rax - mov %rax, (+$i_offset)(%rsp) - mov %rbp, (+$pg_offset)(%rsp) - # rsi -> pTmp - mov %rsi, (+$red_result_addr_offset)(%rsp) - mov (+8*0)(%rsi), %r10 - mov (+8*1)(%rsi), %r11 - mov (+8*2)(%rsi), %r12 - mov (+8*3)(%rsi), %r13 - mov (+8*4)(%rsi), %r14 - mov (+8*5)(%rsi), %r15 - mov (+8*6)(%rsi), %r8 - mov (+8*7)(%rsi), %r9 -init_loop: - lea (+$GT_offset)(%rsp), %rdi - call mont_mul_a3b - lea (+$tmp_offset)(%rsp), %rsi - mov (+$pg_offset)(%rsp), %rbp - add \$2, %rbp - mov %rbp, (+$pg_offset)(%rsp) - mov %rsi, %rcx # rcx = rsi = addr of tmp -___ - - &swizzle("%rbp", "%rcx", "%rax", "%rbx"); -$code.=<<___; - mov (+$i_offset)(%rsp), %rax - sub \$1, %rax - mov %rax, (+$i_offset)(%rsp) - jne init_loop - - # - # Copy exponent onto stack - movdqa %xmm0, (+$exp_offset+16*0)(%rsp) - movdqa %xmm1, (+$exp_offset+16*1)(%rsp) - movdqa %xmm2, (+$exp_offset+16*2)(%rsp) - movdqa %xmm3, (+$exp_offset+16*3)(%rsp) - - - # - # Do exponentiation - # Initialize result to G[exp{511:507}] - mov (+$exp_offset+62)(%rsp), %eax - mov %rax, %rdx - shr \$11, %rax - and \$0x07FF, %edx - mov %edx, (+$exp_offset+62)(%rsp) - lea (+$garray_offset)(%rsp,%rax,2), %rsi - mov (+$pResult_offset)(%rsp), %rdx -___ - - &unswizzle("%rdx", "%rsi", "%rbp", "%rbx", "%rax"); - - # - # Loop variables - # rcx = [loop_idx] = index: 510-5 to 0 by 5 -$code.=<<___; - movq \$505, (+$loop_idx_offset)(%rsp) - - mov (+$pResult_offset)(%rsp), %rcx - mov %rcx, (+$red_result_addr_offset)(%rsp) - mov (+8*0)(%rcx), %r10 - mov (+8*1)(%rcx), %r11 - mov (+8*2)(%rcx), %r12 - mov (+8*3)(%rcx), %r13 - mov (+8*4)(%rcx), %r14 - mov (+8*5)(%rcx), %r15 - mov (+8*6)(%rcx), %r8 - mov (+8*7)(%rcx), %r9 - jmp sqr_2 - -main_loop_a3b: - call sqr_reduce - call sqr_reduce - call sqr_reduce -sqr_2: - call sqr_reduce - call sqr_reduce - - # - # Do multiply, first look up proper value in Garray - mov (+$loop_idx_offset)(%rsp), %rcx # bit index - mov %rcx, %rax - shr \$4, %rax # rax is word pointer - mov (+$exp_offset)(%rsp,%rax,2), %edx - and \$15, %rcx - shrq %cl, %rdx - and \$0x1F, %rdx - - lea (+$garray_offset)(%rsp,%rdx,2), %rsi - lea (+$tmp_offset)(%rsp), %rdx - mov %rdx, %rdi -___ - - &unswizzle("%rdx", "%rsi", "%rbp", "%rbx", "%rax"); - # rdi = tmp = pG - - # - # Call mod_mul_a1(pDst, pSrc1, pSrc2, pM, pData) - # result result pG M Data -$code.=<<___; - mov (+$pResult_offset)(%rsp), %rsi - call mont_mul_a3b - - # - # finish loop - mov (+$loop_idx_offset)(%rsp), %rcx - sub \$5, %rcx - mov %rcx, (+$loop_idx_offset)(%rsp) - jge main_loop_a3b - - # - -end_main_loop_a3b: - # transform result out of Montgomery space - # result = reduce(result) - mov (+$pResult_offset)(%rsp), %rdx - pxor %xmm4, %xmm4 - movdqu (+16*0)(%rdx), %xmm0 - movdqu (+16*1)(%rdx), %xmm1 - movdqu (+16*2)(%rdx), %xmm2 - movdqu (+16*3)(%rdx), %xmm3 - movdqa %xmm4, (+$tmp16_offset+16*4)(%rsp) - movdqa %xmm4, (+$tmp16_offset+16*5)(%rsp) - movdqa %xmm4, (+$tmp16_offset+16*6)(%rsp) - movdqa %xmm4, (+$tmp16_offset+16*7)(%rsp) - movdqa %xmm0, (+$tmp16_offset+16*0)(%rsp) - movdqa %xmm1, (+$tmp16_offset+16*1)(%rsp) - movdqa %xmm2, (+$tmp16_offset+16*2)(%rsp) - movdqa %xmm3, (+$tmp16_offset+16*3)(%rsp) - call mont_reduce - - # If result > m, subract m - # load result into r15:r8 - mov (+$pResult_offset)(%rsp), %rax - mov (+8*0)(%rax), %r8 - mov (+8*1)(%rax), %r9 - mov (+8*2)(%rax), %r10 - mov (+8*3)(%rax), %r11 - mov (+8*4)(%rax), %r12 - mov (+8*5)(%rax), %r13 - mov (+8*6)(%rax), %r14 - mov (+8*7)(%rax), %r15 - - # subtract m - mov (+$pData_offset)(%rsp), %rbx - add \$$M, %rbx - - sub (+8*0)(%rbx), %r8 - sbb (+8*1)(%rbx), %r9 - sbb (+8*2)(%rbx), %r10 - sbb (+8*3)(%rbx), %r11 - sbb (+8*4)(%rbx), %r12 - sbb (+8*5)(%rbx), %r13 - sbb (+8*6)(%rbx), %r14 - sbb (+8*7)(%rbx), %r15 - - # if Carry is clear, replace result with difference - mov (+8*0)(%rax), %rsi - mov (+8*1)(%rax), %rdi - mov (+8*2)(%rax), %rcx - mov (+8*3)(%rax), %rdx - cmovnc %r8, %rsi - cmovnc %r9, %rdi - cmovnc %r10, %rcx - cmovnc %r11, %rdx - mov %rsi, (+8*0)(%rax) - mov %rdi, (+8*1)(%rax) - mov %rcx, (+8*2)(%rax) - mov %rdx, (+8*3)(%rax) - - mov (+8*4)(%rax), %rsi - mov (+8*5)(%rax), %rdi - mov (+8*6)(%rax), %rcx - mov (+8*7)(%rax), %rdx - cmovnc %r12, %rsi - cmovnc %r13, %rdi - cmovnc %r14, %rcx - cmovnc %r15, %rdx - mov %rsi, (+8*4)(%rax) - mov %rdi, (+8*5)(%rax) - mov %rcx, (+8*6)(%rax) - mov %rdx, (+8*7)(%rax) - - mov (+$rsp_offset)(%rsp), %rsi - mov 0(%rsi),%r15 - mov 8(%rsi),%r14 - mov 16(%rsi),%r13 - mov 24(%rsi),%r12 - mov 32(%rsi),%rbx - mov 40(%rsi),%rbp - lea 48(%rsi),%rsp -.Lepilogue: - ret -.size mod_exp_512, . - mod_exp_512 -___ - -if ($win64) { -# EXCEPTION_DISPOSITION handler (EXCEPTION_RECORD *rec,ULONG64 frame, -# CONTEXT *context,DISPATCHER_CONTEXT *disp) -my $rec="%rcx"; -my $frame="%rdx"; -my $context="%r8"; -my $disp="%r9"; - -$code.=<<___; -.extern __imp_RtlVirtualUnwind -.type mod_exp_512_se_handler,\@abi-omnipotent -.align 16 -mod_exp_512_se_handler: - push %rsi - push %rdi - push %rbx - push %rbp - push %r12 - push %r13 - push %r14 - push %r15 - pushfq - sub \$64,%rsp - - mov 120($context),%rax # pull context->Rax - mov 248($context),%rbx # pull context->Rip - - lea .Lbody(%rip),%r10 - cmp %r10,%rbx # context->RipRsp - - lea .Lepilogue(%rip),%r10 - cmp %r10,%rbx # context->Rip>=epilogue label - jae .Lin_prologue - - mov $rsp_offset(%rax),%rax # pull saved Rsp - - mov 32(%rax),%rbx - mov 40(%rax),%rbp - mov 24(%rax),%r12 - mov 16(%rax),%r13 - mov 8(%rax),%r14 - mov 0(%rax),%r15 - lea 48(%rax),%rax - mov %rbx,144($context) # restore context->Rbx - mov %rbp,160($context) # restore context->Rbp - mov %r12,216($context) # restore context->R12 - mov %r13,224($context) # restore context->R13 - mov %r14,232($context) # restore context->R14 - mov %r15,240($context) # restore context->R15 - -.Lin_prologue: - mov 8(%rax),%rdi - mov 16(%rax),%rsi - mov %rax,152($context) # restore context->Rsp - mov %rsi,168($context) # restore context->Rsi - mov %rdi,176($context) # restore context->Rdi - - mov 40($disp),%rdi # disp->ContextRecord - mov $context,%rsi # context - mov \$154,%ecx # sizeof(CONTEXT) - .long 0xa548f3fc # cld; rep movsq - - mov $disp,%rsi - xor %rcx,%rcx # arg1, UNW_FLAG_NHANDLER - mov 8(%rsi),%rdx # arg2, disp->ImageBase - mov 0(%rsi),%r8 # arg3, disp->ControlPc - mov 16(%rsi),%r9 # arg4, disp->FunctionEntry - mov 40(%rsi),%r10 # disp->ContextRecord - lea 56(%rsi),%r11 # &disp->HandlerData - lea 24(%rsi),%r12 # &disp->EstablisherFrame - mov %r10,32(%rsp) # arg5 - mov %r11,40(%rsp) # arg6 - mov %r12,48(%rsp) # arg7 - mov %rcx,56(%rsp) # arg8, (NULL) - call *__imp_RtlVirtualUnwind(%rip) - - mov \$1,%eax # ExceptionContinueSearch - add \$64,%rsp - popfq - pop %r15 - pop %r14 - pop %r13 - pop %r12 - pop %rbp - pop %rbx - pop %rdi - pop %rsi - ret -.size mod_exp_512_se_handler,.-mod_exp_512_se_handler - -.section .pdata -.align 4 - .rva .LSEH_begin_mod_exp_512 - .rva .LSEH_end_mod_exp_512 - .rva .LSEH_info_mod_exp_512 - -.section .xdata -.align 8 -.LSEH_info_mod_exp_512: - .byte 9,0,0,0 - .rva mod_exp_512_se_handler -___ -} - -sub reg_part { -my ($reg,$conv)=@_; - if ($reg =~ /%r[0-9]+/) { $reg .= $conv; } - elsif ($conv eq "b") { $reg =~ s/%[er]([^x]+)x?/%$1l/; } - elsif ($conv eq "w") { $reg =~ s/%[er](.+)/%$1/; } - elsif ($conv eq "d") { $reg =~ s/%[er](.+)/%e$1/; } - return $reg; -} - -$code =~ s/(%[a-z0-9]+)#([bwd])/reg_part($1,$2)/gem; -$code =~ s/\`([^\`]*)\`/eval $1/gem; -$code =~ s/(\(\+[^)]+\))/eval $1/gem; -print $code; -close STDOUT; diff --git a/deps/openssl/openssl/crypto/bn/asm/ppc-mont.pl b/deps/openssl/openssl/crypto/bn/asm/ppc-mont.pl index f9b6992ccc82bd..da69c6aaaf6a55 100644 --- a/deps/openssl/openssl/crypto/bn/asm/ppc-mont.pl +++ b/deps/openssl/openssl/crypto/bn/asm/ppc-mont.pl @@ -325,6 +325,7 @@ .long 0 .byte 0,12,4,0,0x80,12,6,0 .long 0 +.size .bn_mul_mont_int,.-.bn_mul_mont_int .asciz "Montgomery Multiplication for PPC, CRYPTOGAMS by " ___ diff --git a/deps/openssl/openssl/crypto/bn/asm/ppc.pl b/deps/openssl/openssl/crypto/bn/asm/ppc.pl index 1249ce22998897..04df1fe5cc6534 100644 --- a/deps/openssl/openssl/crypto/bn/asm/ppc.pl +++ b/deps/openssl/openssl/crypto/bn/asm/ppc.pl @@ -392,6 +392,7 @@ .long 0 .byte 0,12,0x14,0,0,0,2,0 .long 0 +.size .bn_sqr_comba4,.-.bn_sqr_comba4 # # NOTE: The following label name should be changed to @@ -819,6 +820,7 @@ .long 0 .byte 0,12,0x14,0,0,0,2,0 .long 0 +.size .bn_sqr_comba8,.-.bn_sqr_comba8 # # NOTE: The following label name should be changed to @@ -972,6 +974,7 @@ .long 0 .byte 0,12,0x14,0,0,0,3,0 .long 0 +.size .bn_mul_comba4,.-.bn_mul_comba4 # # NOTE: The following label name should be changed to @@ -1510,6 +1513,7 @@ .long 0 .byte 0,12,0x14,0,0,0,3,0 .long 0 +.size .bn_mul_comba8,.-.bn_mul_comba8 # # NOTE: The following label name should be changed to @@ -1560,6 +1564,7 @@ .long 0 .byte 0,12,0x14,0,0,0,4,0 .long 0 +.size .bn_sub_words,.-.bn_sub_words # # NOTE: The following label name should be changed to @@ -1605,6 +1610,7 @@ .long 0 .byte 0,12,0x14,0,0,0,4,0 .long 0 +.size .bn_add_words,.-.bn_add_words # # NOTE: The following label name should be changed to @@ -1720,6 +1726,7 @@ .long 0 .byte 0,12,0x14,0,0,0,3,0 .long 0 +.size .bn_div_words,.-.bn_div_words # # NOTE: The following label name should be changed to @@ -1761,6 +1768,7 @@ .long 0 .byte 0,12,0x14,0,0,0,3,0 .long 0 +.size .bn_sqr_words,.-.bn_sqr_words # # NOTE: The following label name should be changed to @@ -1866,6 +1874,7 @@ .long 0 .byte 0,12,0x14,0,0,0,4,0 .long 0 +.size bn_mul_words,.-bn_mul_words # # NOTE: The following label name should be changed to @@ -1991,6 +2000,7 @@ .long 0 .byte 0,12,0x14,0,0,0,4,0 .long 0 +.size .bn_mul_add_words,.-.bn_mul_add_words .align 4 EOF $data =~ s/\`([^\`]*)\`/eval $1/gem; diff --git a/deps/openssl/openssl/crypto/bn/asm/ppc64-mont.pl b/deps/openssl/openssl/crypto/bn/asm/ppc64-mont.pl index a14e769ad055d9..68e3733e3f79cd 100644 --- a/deps/openssl/openssl/crypto/bn/asm/ppc64-mont.pl +++ b/deps/openssl/openssl/crypto/bn/asm/ppc64-mont.pl @@ -1,7 +1,7 @@ #!/usr/bin/env perl # ==================================================================== -# Written by Andy Polyakov for the OpenSSL +# Written by Andy Polyakov for the OpenSSL # project. The module is, however, dual licensed under OpenSSL and # CRYPTOGAMS licenses depending on where you obtain it. For further # details see http://www.openssl.org/~appro/cryptogams/. @@ -65,6 +65,14 @@ # others alternative would be to break dependence on upper halves of # GPRs by sticking to 32-bit integer operations... +# December 2012 + +# Remove above mentioned dependence on GPRs' upper halves in 32-bit +# build. No signal masking overhead, but integer instructions are +# *more* numerous... It's still "universally" faster than 32-bit +# ppc-mont.pl, but improvement coefficient is not as impressive +# for longer keys... + $flavour = shift; if ($flavour =~ /32/) { @@ -110,6 +118,9 @@ $j="r11"; $i="r12"; # non-volatile registers +$c1="r19"; +$n1="r20"; +$a1="r21"; $nap_d="r22"; # interleaved ap and np in double format $a0="r23"; # ap[0] $t0="r24"; # temporary registers @@ -180,8 +191,8 @@ # . . # +-------------------------------+ # . . -# -12*size_t +-------------------------------+ -# | 10 saved gpr, r22-r31 | +# -13*size_t +-------------------------------+ +# | 13 saved gpr, r19-r31 | # . . # . . # -12*8 +-------------------------------+ @@ -215,6 +226,9 @@ mr $i,$sp $STUX $sp,$sp,$tp ; alloca + $PUSH r19,`-12*8-13*$SIZE_T`($i) + $PUSH r20,`-12*8-12*$SIZE_T`($i) + $PUSH r21,`-12*8-11*$SIZE_T`($i) $PUSH r22,`-12*8-10*$SIZE_T`($i) $PUSH r23,`-12*8-9*$SIZE_T`($i) $PUSH r24,`-12*8-8*$SIZE_T`($i) @@ -237,40 +251,26 @@ stfd f29,`-3*8`($i) stfd f30,`-2*8`($i) stfd f31,`-1*8`($i) -___ -$code.=<<___ if ($SIZE_T==8); - ld $a0,0($ap) ; pull ap[0] value - ld $n0,0($n0) ; pull n0[0] value - ld $t3,0($bp) ; bp[0] -___ -$code.=<<___ if ($SIZE_T==4); - mr $t1,$n0 - lwz $a0,0($ap) ; pull ap[0,1] value - lwz $t0,4($ap) - lwz $n0,0($t1) ; pull n0[0,1] value - lwz $t1,4($t1) - lwz $t3,0($bp) ; bp[0,1] - lwz $t2,4($bp) - insrdi $a0,$t0,32,0 - insrdi $n0,$t1,32,0 - insrdi $t3,$t2,32,0 -___ -$code.=<<___; + addi $tp,$sp,`$FRAME+$TRANSFER+8+64` li $i,-64 add $nap_d,$tp,$num and $nap_d,$nap_d,$i ; align to 64 bytes - - mulld $t7,$a0,$t3 ; ap[0]*bp[0] ; nap_d is off by 1, because it's used with stfdu/lfdu addi $nap_d,$nap_d,-8 srwi $j,$num,`3+1` ; counter register, num/2 - mulld $t7,$t7,$n0 ; tp[0]*n0 addi $j,$j,-1 addi $tp,$sp,`$FRAME+$TRANSFER-8` li $carry,0 mtctr $j +___ + +$code.=<<___ if ($SIZE_T==8); + ld $a0,0($ap) ; pull ap[0] value + ld $t3,0($bp) ; bp[0] + ld $n0,0($n0) ; pull n0[0] value + mulld $t7,$a0,$t3 ; ap[0]*bp[0] ; transfer bp[0] to FPU as 4x16-bit values extrdi $t0,$t3,16,48 extrdi $t1,$t3,16,32 @@ -280,6 +280,8 @@ std $t1,`$FRAME+8`($sp) std $t2,`$FRAME+16`($sp) std $t3,`$FRAME+24`($sp) + + mulld $t7,$t7,$n0 ; tp[0]*n0 ; transfer (ap[0]*bp[0])*n0 to FPU as 4x16-bit values extrdi $t4,$t7,16,48 extrdi $t5,$t7,16,32 @@ -289,21 +291,61 @@ std $t5,`$FRAME+40`($sp) std $t6,`$FRAME+48`($sp) std $t7,`$FRAME+56`($sp) -___ -$code.=<<___ if ($SIZE_T==8); - lwz $t0,4($ap) ; load a[j] as 32-bit word pair - lwz $t1,0($ap) - lwz $t2,12($ap) ; load a[j+1] as 32-bit word pair + + extrdi $t0,$a0,32,32 ; lwz $t0,4($ap) + extrdi $t1,$a0,32,0 ; lwz $t1,0($ap) + lwz $t2,12($ap) ; load a[1] as 32-bit word pair lwz $t3,8($ap) - lwz $t4,4($np) ; load n[j] as 32-bit word pair + lwz $t4,4($np) ; load n[0] as 32-bit word pair lwz $t5,0($np) - lwz $t6,12($np) ; load n[j+1] as 32-bit word pair + lwz $t6,12($np) ; load n[1] as 32-bit word pair lwz $t7,8($np) ___ $code.=<<___ if ($SIZE_T==4); - lwz $t0,0($ap) ; load a[j..j+3] as 32-bit word pairs - lwz $t1,4($ap) - lwz $t2,8($ap) + lwz $a0,0($ap) ; pull ap[0,1] value + mr $n1,$n0 + lwz $a1,4($ap) + li $c1,0 + lwz $t1,0($bp) ; bp[0,1] + lwz $t3,4($bp) + lwz $n0,0($n1) ; pull n0[0,1] value + lwz $n1,4($n1) + + mullw $t4,$a0,$t1 ; mulld ap[0]*bp[0] + mulhwu $t5,$a0,$t1 + mullw $t6,$a1,$t1 + mullw $t7,$a0,$t3 + add $t5,$t5,$t6 + add $t5,$t5,$t7 + ; transfer bp[0] to FPU as 4x16-bit values + extrwi $t0,$t1,16,16 + extrwi $t1,$t1,16,0 + extrwi $t2,$t3,16,16 + extrwi $t3,$t3,16,0 + std $t0,`$FRAME+0`($sp) ; yes, std in 32-bit build + std $t1,`$FRAME+8`($sp) + std $t2,`$FRAME+16`($sp) + std $t3,`$FRAME+24`($sp) + + mullw $t0,$t4,$n0 ; mulld tp[0]*n0 + mulhwu $t1,$t4,$n0 + mullw $t2,$t5,$n0 + mullw $t3,$t4,$n1 + add $t1,$t1,$t2 + add $t1,$t1,$t3 + ; transfer (ap[0]*bp[0])*n0 to FPU as 4x16-bit values + extrwi $t4,$t0,16,16 + extrwi $t5,$t0,16,0 + extrwi $t6,$t1,16,16 + extrwi $t7,$t1,16,0 + std $t4,`$FRAME+32`($sp) ; yes, std in 32-bit build + std $t5,`$FRAME+40`($sp) + std $t6,`$FRAME+48`($sp) + std $t7,`$FRAME+56`($sp) + + mr $t0,$a0 ; lwz $t0,0($ap) + mr $t1,$a1 ; lwz $t1,4($ap) + lwz $t2,8($ap) ; load a[j..j+3] as 32-bit word pairs lwz $t3,12($ap) lwz $t4,0($np) ; load n[j..j+3] as 32-bit word pairs lwz $t5,4($np) @@ -319,7 +361,7 @@ lfd $nb,`$FRAME+40`($sp) lfd $nc,`$FRAME+48`($sp) lfd $nd,`$FRAME+56`($sp) - std $t0,`$FRAME+64`($sp) + std $t0,`$FRAME+64`($sp) ; yes, std even in 32-bit build std $t1,`$FRAME+72`($sp) std $t2,`$FRAME+80`($sp) std $t3,`$FRAME+88`($sp) @@ -441,7 +483,7 @@ lwz $t7,12($np) ___ $code.=<<___; - std $t0,`$FRAME+64`($sp) + std $t0,`$FRAME+64`($sp) ; yes, std even in 32-bit build std $t1,`$FRAME+72`($sp) std $t2,`$FRAME+80`($sp) std $t3,`$FRAME+88`($sp) @@ -449,6 +491,9 @@ std $t5,`$FRAME+104`($sp) std $t6,`$FRAME+112`($sp) std $t7,`$FRAME+120`($sp) +___ +if ($SIZE_T==8 or $flavour =~ /osx/) { +$code.=<<___; ld $t0,`$FRAME+0`($sp) ld $t1,`$FRAME+8`($sp) ld $t2,`$FRAME+16`($sp) @@ -457,6 +502,20 @@ ld $t5,`$FRAME+40`($sp) ld $t6,`$FRAME+48`($sp) ld $t7,`$FRAME+56`($sp) +___ +} else { +$code.=<<___; + lwz $t1,`$FRAME+0`($sp) + lwz $t0,`$FRAME+4`($sp) + lwz $t3,`$FRAME+8`($sp) + lwz $t2,`$FRAME+12`($sp) + lwz $t5,`$FRAME+16`($sp) + lwz $t4,`$FRAME+20`($sp) + lwz $t7,`$FRAME+24`($sp) + lwz $t6,`$FRAME+28`($sp) +___ +} +$code.=<<___; lfd $A0,`$FRAME+64`($sp) lfd $A1,`$FRAME+72`($sp) lfd $A2,`$FRAME+80`($sp) @@ -488,7 +547,9 @@ fmadd $T0b,$A0,$bb,$dotb stfd $A2,24($nap_d) ; save a[j+1] in double format stfd $A3,32($nap_d) - +___ +if ($SIZE_T==8 or $flavour =~ /osx/) { +$code.=<<___; fmadd $T1a,$A0,$bc,$T1a fmadd $T1b,$A0,$bd,$T1b fmadd $T2a,$A1,$bc,$T2a @@ -561,11 +622,123 @@ stfd $T3b,`$FRAME+56`($sp) std $t0,8($tp) ; tp[j-1] stdu $t4,16($tp) ; tp[j] +___ +} else { +$code.=<<___; + fmadd $T1a,$A0,$bc,$T1a + fmadd $T1b,$A0,$bd,$T1b + addc $t0,$t0,$carry + adde $t1,$t1,$c1 + srwi $carry,$t0,16 + fmadd $T2a,$A1,$bc,$T2a + fmadd $T2b,$A1,$bd,$T2b + stfd $N0,40($nap_d) ; save n[j] in double format + stfd $N1,48($nap_d) + srwi $c1,$t1,16 + insrwi $carry,$t1,16,0 + fmadd $T3a,$A2,$bc,$T3a + fmadd $T3b,$A2,$bd,$T3b + addc $t2,$t2,$carry + adde $t3,$t3,$c1 + srwi $carry,$t2,16 + fmul $dota,$A3,$bc + fmul $dotb,$A3,$bd + stfd $N2,56($nap_d) ; save n[j+1] in double format + stfdu $N3,64($nap_d) + insrwi $t0,$t2,16,0 ; 0..31 bits + srwi $c1,$t3,16 + insrwi $carry,$t3,16,0 + + fmadd $T1a,$N1,$na,$T1a + fmadd $T1b,$N1,$nb,$T1b + lwz $t3,`$FRAME+32`($sp) ; permuted $t1 + lwz $t2,`$FRAME+36`($sp) ; permuted $t0 + addc $t4,$t4,$carry + adde $t5,$t5,$c1 + srwi $carry,$t4,16 + fmadd $T2a,$N2,$na,$T2a + fmadd $T2b,$N2,$nb,$T2b + srwi $c1,$t5,16 + insrwi $carry,$t5,16,0 + fmadd $T3a,$N3,$na,$T3a + fmadd $T3b,$N3,$nb,$T3b + addc $t6,$t6,$carry + adde $t7,$t7,$c1 + srwi $carry,$t6,16 + fmadd $T0a,$N0,$na,$T0a + fmadd $T0b,$N0,$nb,$T0b + insrwi $t4,$t6,16,0 ; 32..63 bits + srwi $c1,$t7,16 + insrwi $carry,$t7,16,0 + + fmadd $T1a,$N0,$nc,$T1a + fmadd $T1b,$N0,$nd,$T1b + lwz $t7,`$FRAME+40`($sp) ; permuted $t3 + lwz $t6,`$FRAME+44`($sp) ; permuted $t2 + addc $t2,$t2,$carry + adde $t3,$t3,$c1 + srwi $carry,$t2,16 + fmadd $T2a,$N1,$nc,$T2a + fmadd $T2b,$N1,$nd,$T2b + stw $t0,12($tp) ; tp[j-1] + stw $t4,8($tp) + srwi $c1,$t3,16 + insrwi $carry,$t3,16,0 + fmadd $T3a,$N2,$nc,$T3a + fmadd $T3b,$N2,$nd,$T3b + lwz $t1,`$FRAME+48`($sp) ; permuted $t5 + lwz $t0,`$FRAME+52`($sp) ; permuted $t4 + addc $t6,$t6,$carry + adde $t7,$t7,$c1 + srwi $carry,$t6,16 + fmadd $dota,$N3,$nc,$dota + fmadd $dotb,$N3,$nd,$dotb + insrwi $t2,$t6,16,0 ; 64..95 bits + srwi $c1,$t7,16 + insrwi $carry,$t7,16,0 + + fctid $T0a,$T0a + fctid $T0b,$T0b + lwz $t5,`$FRAME+56`($sp) ; permuted $t7 + lwz $t4,`$FRAME+60`($sp) ; permuted $t6 + addc $t0,$t0,$carry + adde $t1,$t1,$c1 + srwi $carry,$t0,16 + fctid $T1a,$T1a + fctid $T1b,$T1b + srwi $c1,$t1,16 + insrwi $carry,$t1,16,0 + fctid $T2a,$T2a + fctid $T2b,$T2b + addc $t4,$t4,$carry + adde $t5,$t5,$c1 + srwi $carry,$t4,16 + fctid $T3a,$T3a + fctid $T3b,$T3b + insrwi $t0,$t4,16,0 ; 96..127 bits + srwi $c1,$t5,16 + insrwi $carry,$t5,16,0 + + stfd $T0a,`$FRAME+0`($sp) + stfd $T0b,`$FRAME+8`($sp) + stfd $T1a,`$FRAME+16`($sp) + stfd $T1b,`$FRAME+24`($sp) + stfd $T2a,`$FRAME+32`($sp) + stfd $T2b,`$FRAME+40`($sp) + stfd $T3a,`$FRAME+48`($sp) + stfd $T3b,`$FRAME+56`($sp) + stw $t2,20($tp) ; tp[j] + stwu $t0,16($tp) +___ +} +$code.=<<___; bdnz- L1st fctid $dota,$dota fctid $dotb,$dotb - +___ +if ($SIZE_T==8 or $flavour =~ /osx/) { +$code.=<<___; ld $t0,`$FRAME+0`($sp) ld $t1,`$FRAME+8`($sp) ld $t2,`$FRAME+16`($sp) @@ -611,33 +784,117 @@ insrdi $t6,$t7,48,0 srdi $ovf,$t7,48 std $t6,8($tp) ; tp[num-1] +___ +} else { +$code.=<<___; + lwz $t1,`$FRAME+0`($sp) + lwz $t0,`$FRAME+4`($sp) + lwz $t3,`$FRAME+8`($sp) + lwz $t2,`$FRAME+12`($sp) + lwz $t5,`$FRAME+16`($sp) + lwz $t4,`$FRAME+20`($sp) + lwz $t7,`$FRAME+24`($sp) + lwz $t6,`$FRAME+28`($sp) + stfd $dota,`$FRAME+64`($sp) + stfd $dotb,`$FRAME+72`($sp) + addc $t0,$t0,$carry + adde $t1,$t1,$c1 + srwi $carry,$t0,16 + insrwi $carry,$t1,16,0 + srwi $c1,$t1,16 + addc $t2,$t2,$carry + adde $t3,$t3,$c1 + srwi $carry,$t2,16 + insrwi $t0,$t2,16,0 ; 0..31 bits + insrwi $carry,$t3,16,0 + srwi $c1,$t3,16 + addc $t4,$t4,$carry + adde $t5,$t5,$c1 + srwi $carry,$t4,16 + insrwi $carry,$t5,16,0 + srwi $c1,$t5,16 + addc $t6,$t6,$carry + adde $t7,$t7,$c1 + srwi $carry,$t6,16 + insrwi $t4,$t6,16,0 ; 32..63 bits + insrwi $carry,$t7,16,0 + srwi $c1,$t7,16 + stw $t0,12($tp) ; tp[j-1] + stw $t4,8($tp) + + lwz $t3,`$FRAME+32`($sp) ; permuted $t1 + lwz $t2,`$FRAME+36`($sp) ; permuted $t0 + lwz $t7,`$FRAME+40`($sp) ; permuted $t3 + lwz $t6,`$FRAME+44`($sp) ; permuted $t2 + lwz $t1,`$FRAME+48`($sp) ; permuted $t5 + lwz $t0,`$FRAME+52`($sp) ; permuted $t4 + lwz $t5,`$FRAME+56`($sp) ; permuted $t7 + lwz $t4,`$FRAME+60`($sp) ; permuted $t6 + + addc $t2,$t2,$carry + adde $t3,$t3,$c1 + srwi $carry,$t2,16 + insrwi $carry,$t3,16,0 + srwi $c1,$t3,16 + addc $t6,$t6,$carry + adde $t7,$t7,$c1 + srwi $carry,$t6,16 + insrwi $t2,$t6,16,0 ; 64..95 bits + insrwi $carry,$t7,16,0 + srwi $c1,$t7,16 + addc $t0,$t0,$carry + adde $t1,$t1,$c1 + srwi $carry,$t0,16 + insrwi $carry,$t1,16,0 + srwi $c1,$t1,16 + addc $t4,$t4,$carry + adde $t5,$t5,$c1 + srwi $carry,$t4,16 + insrwi $t0,$t4,16,0 ; 96..127 bits + insrwi $carry,$t5,16,0 + srwi $c1,$t5,16 + stw $t2,20($tp) ; tp[j] + stwu $t0,16($tp) + + lwz $t7,`$FRAME+64`($sp) + lwz $t6,`$FRAME+68`($sp) + lwz $t5,`$FRAME+72`($sp) + lwz $t4,`$FRAME+76`($sp) + + addc $t6,$t6,$carry + adde $t7,$t7,$c1 + srwi $carry,$t6,16 + insrwi $carry,$t7,16,0 + srwi $c1,$t7,16 + addc $t4,$t4,$carry + adde $t5,$t5,$c1 + + insrwi $t6,$t4,16,0 + srwi $t4,$t4,16 + insrwi $t4,$t5,16,0 + srwi $ovf,$t5,16 + stw $t6,12($tp) ; tp[num-1] + stw $t4,8($tp) +___ +} +$code.=<<___; slwi $t7,$num,2 subf $nap_d,$t7,$nap_d ; rewind pointer li $i,8 ; i=1 .align 5 Louter: -___ -$code.=<<___ if ($SIZE_T==8); - ldx $t3,$bp,$i ; bp[i] -___ -$code.=<<___ if ($SIZE_T==4); - add $t0,$bp,$i - lwz $t3,0($t0) ; bp[i,i+1] - lwz $t0,4($t0) - insrdi $t3,$t0,32,0 -___ -$code.=<<___; - ld $t6,`$FRAME+$TRANSFER+8`($sp) ; tp[0] - mulld $t7,$a0,$t3 ; ap[0]*bp[i] - addi $tp,$sp,`$FRAME+$TRANSFER` - add $t7,$t7,$t6 ; ap[0]*bp[i]+tp[0] li $carry,0 - mulld $t7,$t7,$n0 ; tp[0]*n0 mtctr $j +___ +$code.=<<___ if ($SIZE_T==8); + ldx $t3,$bp,$i ; bp[i] + ld $t6,`$FRAME+$TRANSFER+8`($sp) ; tp[0] + mulld $t7,$a0,$t3 ; ap[0]*bp[i] + add $t7,$t7,$t6 ; ap[0]*bp[i]+tp[0] ; transfer bp[i] to FPU as 4x16-bit values extrdi $t0,$t3,16,48 extrdi $t1,$t3,16,32 @@ -647,6 +904,8 @@ std $t1,`$FRAME+8`($sp) std $t2,`$FRAME+16`($sp) std $t3,`$FRAME+24`($sp) + + mulld $t7,$t7,$n0 ; tp[0]*n0 ; transfer (ap[0]*bp[i]+tp[0])*n0 to FPU as 4x16-bit values extrdi $t4,$t7,16,48 extrdi $t5,$t7,16,32 @@ -656,7 +915,50 @@ std $t5,`$FRAME+40`($sp) std $t6,`$FRAME+48`($sp) std $t7,`$FRAME+56`($sp) +___ +$code.=<<___ if ($SIZE_T==4); + add $t0,$bp,$i + li $c1,0 + lwz $t1,0($t0) ; bp[i,i+1] + lwz $t3,4($t0) + + mullw $t4,$a0,$t1 ; ap[0]*bp[i] + lwz $t0,`$FRAME+$TRANSFER+8+4`($sp) ; tp[0] + mulhwu $t5,$a0,$t1 + lwz $t2,`$FRAME+$TRANSFER+8`($sp) ; tp[0] + mullw $t6,$a1,$t1 + mullw $t7,$a0,$t3 + add $t5,$t5,$t6 + add $t5,$t5,$t7 + addc $t4,$t4,$t0 ; ap[0]*bp[i]+tp[0] + adde $t5,$t5,$t2 + ; transfer bp[i] to FPU as 4x16-bit values + extrwi $t0,$t1,16,16 + extrwi $t1,$t1,16,0 + extrwi $t2,$t3,16,16 + extrwi $t3,$t3,16,0 + std $t0,`$FRAME+0`($sp) ; yes, std in 32-bit build + std $t1,`$FRAME+8`($sp) + std $t2,`$FRAME+16`($sp) + std $t3,`$FRAME+24`($sp) + mullw $t0,$t4,$n0 ; mulld tp[0]*n0 + mulhwu $t1,$t4,$n0 + mullw $t2,$t5,$n0 + mullw $t3,$t4,$n1 + add $t1,$t1,$t2 + add $t1,$t1,$t3 + ; transfer (ap[0]*bp[i]+tp[0])*n0 to FPU as 4x16-bit values + extrwi $t4,$t0,16,16 + extrwi $t5,$t0,16,0 + extrwi $t6,$t1,16,16 + extrwi $t7,$t1,16,0 + std $t4,`$FRAME+32`($sp) ; yes, std in 32-bit build + std $t5,`$FRAME+40`($sp) + std $t6,`$FRAME+48`($sp) + std $t7,`$FRAME+56`($sp) +___ +$code.=<<___; lfd $A0,8($nap_d) ; load a[j] in double format lfd $A1,16($nap_d) lfd $A2,24($nap_d) ; load a[j+1] in double format @@ -769,7 +1071,9 @@ fmul $dotb,$A3,$bd lfd $A2,24($nap_d) ; load a[j+1] in double format lfd $A3,32($nap_d) - +___ +if ($SIZE_T==8 or $flavour =~ /osx/) { +$code.=<<___; fmadd $T1a,$N1,$na,$T1a fmadd $T1b,$N1,$nb,$T1b ld $t0,`$FRAME+0`($sp) @@ -856,10 +1160,131 @@ addze $carry,$carry std $t3,-16($tp) ; tp[j-1] std $t5,-8($tp) ; tp[j] +___ +} else { +$code.=<<___; + fmadd $T1a,$N1,$na,$T1a + fmadd $T1b,$N1,$nb,$T1b + lwz $t1,`$FRAME+0`($sp) + lwz $t0,`$FRAME+4`($sp) + fmadd $T2a,$N2,$na,$T2a + fmadd $T2b,$N2,$nb,$T2b + lwz $t3,`$FRAME+8`($sp) + lwz $t2,`$FRAME+12`($sp) + fmadd $T3a,$N3,$na,$T3a + fmadd $T3b,$N3,$nb,$T3b + lwz $t5,`$FRAME+16`($sp) + lwz $t4,`$FRAME+20`($sp) + addc $t0,$t0,$carry + adde $t1,$t1,$c1 + srwi $carry,$t0,16 + fmadd $T0a,$N0,$na,$T0a + fmadd $T0b,$N0,$nb,$T0b + lwz $t7,`$FRAME+24`($sp) + lwz $t6,`$FRAME+28`($sp) + srwi $c1,$t1,16 + insrwi $carry,$t1,16,0 + + fmadd $T1a,$N0,$nc,$T1a + fmadd $T1b,$N0,$nd,$T1b + addc $t2,$t2,$carry + adde $t3,$t3,$c1 + srwi $carry,$t2,16 + fmadd $T2a,$N1,$nc,$T2a + fmadd $T2b,$N1,$nd,$T2b + insrwi $t0,$t2,16,0 ; 0..31 bits + srwi $c1,$t3,16 + insrwi $carry,$t3,16,0 + fmadd $T3a,$N2,$nc,$T3a + fmadd $T3b,$N2,$nd,$T3b + lwz $t2,12($tp) ; tp[j] + lwz $t3,8($tp) + addc $t4,$t4,$carry + adde $t5,$t5,$c1 + srwi $carry,$t4,16 + fmadd $dota,$N3,$nc,$dota + fmadd $dotb,$N3,$nd,$dotb + srwi $c1,$t5,16 + insrwi $carry,$t5,16,0 + + fctid $T0a,$T0a + addc $t6,$t6,$carry + adde $t7,$t7,$c1 + srwi $carry,$t6,16 + fctid $T0b,$T0b + insrwi $t4,$t6,16,0 ; 32..63 bits + srwi $c1,$t7,16 + insrwi $carry,$t7,16,0 + fctid $T1a,$T1a + addc $t0,$t0,$t2 + adde $t4,$t4,$t3 + lwz $t3,`$FRAME+32`($sp) ; permuted $t1 + lwz $t2,`$FRAME+36`($sp) ; permuted $t0 + fctid $T1b,$T1b + addze $carry,$carry + addze $c1,$c1 + stw $t0,4($tp) ; tp[j-1] + stw $t4,0($tp) + fctid $T2a,$T2a + addc $t2,$t2,$carry + adde $t3,$t3,$c1 + srwi $carry,$t2,16 + lwz $t7,`$FRAME+40`($sp) ; permuted $t3 + lwz $t6,`$FRAME+44`($sp) ; permuted $t2 + fctid $T2b,$T2b + srwi $c1,$t3,16 + insrwi $carry,$t3,16,0 + lwz $t1,`$FRAME+48`($sp) ; permuted $t5 + lwz $t0,`$FRAME+52`($sp) ; permuted $t4 + fctid $T3a,$T3a + addc $t6,$t6,$carry + adde $t7,$t7,$c1 + srwi $carry,$t6,16 + lwz $t5,`$FRAME+56`($sp) ; permuted $t7 + lwz $t4,`$FRAME+60`($sp) ; permuted $t6 + fctid $T3b,$T3b + + insrwi $t2,$t6,16,0 ; 64..95 bits + insrwi $carry,$t7,16,0 + srwi $c1,$t7,16 + lwz $t6,20($tp) + lwzu $t7,16($tp) + addc $t0,$t0,$carry + stfd $T0a,`$FRAME+0`($sp) + adde $t1,$t1,$c1 + srwi $carry,$t0,16 + stfd $T0b,`$FRAME+8`($sp) + insrwi $carry,$t1,16,0 + srwi $c1,$t1,16 + addc $t4,$t4,$carry + stfd $T1a,`$FRAME+16`($sp) + adde $t5,$t5,$c1 + srwi $carry,$t4,16 + insrwi $t0,$t4,16,0 ; 96..127 bits + stfd $T1b,`$FRAME+24`($sp) + insrwi $carry,$t5,16,0 + srwi $c1,$t5,16 + + addc $t2,$t2,$t6 + stfd $T2a,`$FRAME+32`($sp) + adde $t0,$t0,$t7 + stfd $T2b,`$FRAME+40`($sp) + addze $carry,$carry + stfd $T3a,`$FRAME+48`($sp) + addze $c1,$c1 + stfd $T3b,`$FRAME+56`($sp) + stw $t2,-4($tp) ; tp[j] + stw $t0,-8($tp) +___ +} +$code.=<<___; bdnz- Linner fctid $dota,$dota fctid $dotb,$dotb +___ +if ($SIZE_T==8 or $flavour =~ /osx/) { +$code.=<<___; ld $t0,`$FRAME+0`($sp) ld $t1,`$FRAME+8`($sp) ld $t2,`$FRAME+16`($sp) @@ -926,7 +1351,116 @@ insrdi $t6,$t7,48,0 srdi $ovf,$t7,48 std $t6,0($tp) ; tp[num-1] +___ +} else { +$code.=<<___; + lwz $t1,`$FRAME+0`($sp) + lwz $t0,`$FRAME+4`($sp) + lwz $t3,`$FRAME+8`($sp) + lwz $t2,`$FRAME+12`($sp) + lwz $t5,`$FRAME+16`($sp) + lwz $t4,`$FRAME+20`($sp) + lwz $t7,`$FRAME+24`($sp) + lwz $t6,`$FRAME+28`($sp) + stfd $dota,`$FRAME+64`($sp) + stfd $dotb,`$FRAME+72`($sp) + addc $t0,$t0,$carry + adde $t1,$t1,$c1 + srwi $carry,$t0,16 + insrwi $carry,$t1,16,0 + srwi $c1,$t1,16 + addc $t2,$t2,$carry + adde $t3,$t3,$c1 + srwi $carry,$t2,16 + insrwi $t0,$t2,16,0 ; 0..31 bits + lwz $t2,12($tp) ; tp[j] + insrwi $carry,$t3,16,0 + srwi $c1,$t3,16 + lwz $t3,8($tp) + addc $t4,$t4,$carry + adde $t5,$t5,$c1 + srwi $carry,$t4,16 + insrwi $carry,$t5,16,0 + srwi $c1,$t5,16 + addc $t6,$t6,$carry + adde $t7,$t7,$c1 + srwi $carry,$t6,16 + insrwi $t4,$t6,16,0 ; 32..63 bits + insrwi $carry,$t7,16,0 + srwi $c1,$t7,16 + + addc $t0,$t0,$t2 + adde $t4,$t4,$t3 + addze $carry,$carry + addze $c1,$c1 + stw $t0,4($tp) ; tp[j-1] + stw $t4,0($tp) + + lwz $t3,`$FRAME+32`($sp) ; permuted $t1 + lwz $t2,`$FRAME+36`($sp) ; permuted $t0 + lwz $t7,`$FRAME+40`($sp) ; permuted $t3 + lwz $t6,`$FRAME+44`($sp) ; permuted $t2 + lwz $t1,`$FRAME+48`($sp) ; permuted $t5 + lwz $t0,`$FRAME+52`($sp) ; permuted $t4 + lwz $t5,`$FRAME+56`($sp) ; permuted $t7 + lwz $t4,`$FRAME+60`($sp) ; permuted $t6 + + addc $t2,$t2,$carry + adde $t3,$t3,$c1 + srwi $carry,$t2,16 + insrwi $carry,$t3,16,0 + srwi $c1,$t3,16 + addc $t6,$t6,$carry + adde $t7,$t7,$c1 + srwi $carry,$t6,16 + insrwi $t2,$t6,16,0 ; 64..95 bits + lwz $t6,20($tp) + insrwi $carry,$t7,16,0 + srwi $c1,$t7,16 + lwzu $t7,16($tp) + addc $t0,$t0,$carry + adde $t1,$t1,$c1 + srwi $carry,$t0,16 + insrwi $carry,$t1,16,0 + srwi $c1,$t1,16 + addc $t4,$t4,$carry + adde $t5,$t5,$c1 + srwi $carry,$t4,16 + insrwi $t0,$t4,16,0 ; 96..127 bits + insrwi $carry,$t5,16,0 + srwi $c1,$t5,16 + + addc $t2,$t2,$t6 + adde $t0,$t0,$t7 + lwz $t7,`$FRAME+64`($sp) + lwz $t6,`$FRAME+68`($sp) + addze $carry,$carry + addze $c1,$c1 + lwz $t5,`$FRAME+72`($sp) + lwz $t4,`$FRAME+76`($sp) + + addc $t6,$t6,$carry + adde $t7,$t7,$c1 + stw $t2,-4($tp) ; tp[j] + stw $t0,-8($tp) + addc $t6,$t6,$ovf + addze $t7,$t7 + srwi $carry,$t6,16 + insrwi $carry,$t7,16,0 + srwi $c1,$t7,16 + addc $t4,$t4,$carry + adde $t5,$t5,$c1 + + insrwi $t6,$t4,16,0 + srwi $t4,$t4,16 + insrwi $t4,$t5,16,0 + srwi $ovf,$t5,16 + stw $t6,4($tp) ; tp[num-1] + stw $t4,0($tp) +___ +} +$code.=<<___; slwi $t7,$num,2 addi $i,$i,8 subf $nap_d,$t7,$nap_d ; rewind pointer @@ -994,14 +1528,14 @@ mtctr $j .align 4 -Lsub: ld $t0,8($tp) ; load tp[j..j+3] in 64-bit word order - ldu $t2,16($tp) +Lsub: lwz $t0,12($tp) ; load tp[j..j+3] in 64-bit word order + lwz $t1,8($tp) + lwz $t2,20($tp) + lwzu $t3,16($tp) lwz $t4,4($np) ; load np[j..j+3] in 32-bit word order lwz $t5,8($np) lwz $t6,12($np) lwzu $t7,16($np) - extrdi $t1,$t0,32,0 - extrdi $t3,$t2,32,0 subfe $t4,$t4,$t0 ; tp[j]-np[j] stw $t0,4($ap) ; save tp[j..j+3] in 32-bit word order subfe $t5,$t5,$t1 ; tp[j+1]-np[j+1] @@ -1052,6 +1586,9 @@ $code.=<<___; $POP $i,0($sp) li r3,1 ; signal "handled" + $POP r19,`-12*8-13*$SIZE_T`($i) + $POP r20,`-12*8-12*$SIZE_T`($i) + $POP r21,`-12*8-11*$SIZE_T`($i) $POP r22,`-12*8-10*$SIZE_T`($i) $POP r23,`-12*8-9*$SIZE_T`($i) $POP r24,`-12*8-8*$SIZE_T`($i) @@ -1077,8 +1614,9 @@ mr $sp,$i blr .long 0 - .byte 0,12,4,0,0x8c,10,6,0 + .byte 0,12,4,0,0x8c,13,6,0 .long 0 +.size .$fname,.-.$fname .asciz "Montgomery Multiplication for PPC64, CRYPTOGAMS by " ___ diff --git a/deps/openssl/openssl/crypto/bn/asm/rsaz-avx2.pl b/deps/openssl/openssl/crypto/bn/asm/rsaz-avx2.pl new file mode 100755 index 00000000000000..3b6ccf83d13e4b --- /dev/null +++ b/deps/openssl/openssl/crypto/bn/asm/rsaz-avx2.pl @@ -0,0 +1,1898 @@ +#!/usr/bin/env perl + +############################################################################## +# # +# Copyright (c) 2012, Intel Corporation # +# # +# All rights reserved. # +# # +# Redistribution and use in source and binary forms, with or without # +# modification, are permitted provided that the following conditions are # +# met: # +# # +# * Redistributions of source code must retain the above copyright # +# notice, this list of conditions and the following disclaimer. # +# # +# * Redistributions in binary form must reproduce the above copyright # +# notice, this list of conditions and the following disclaimer in the # +# documentation and/or other materials provided with the # +# distribution. # +# # +# * Neither the name of the Intel Corporation nor the names of its # +# contributors may be used to endorse or promote products derived from # +# this software without specific prior written permission. # +# # +# # +# THIS SOFTWARE IS PROVIDED BY INTEL CORPORATION ""AS IS"" AND ANY # +# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE # +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR # +# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL CORPORATION OR # +# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, # +# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, # +# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR # +# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF # +# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING # +# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS # +# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # +# # +############################################################################## +# Developers and authors: # +# Shay Gueron (1, 2), and Vlad Krasnov (1) # +# (1) Intel Corporation, Israel Development Center, Haifa, Israel # +# (2) University of Haifa, Israel # +############################################################################## +# Reference: # +# [1] S. Gueron, V. Krasnov: "Software Implementation of Modular # +# Exponentiation, Using Advanced Vector Instructions Architectures", # +# F. Ozbudak and F. Rodriguez-Henriquez (Eds.): WAIFI 2012, LNCS 7369, # +# pp. 119?135, 2012. Springer-Verlag Berlin Heidelberg 2012 # +# [2] S. Gueron: "Efficient Software Implementations of Modular # +# Exponentiation", Journal of Cryptographic Engineering 2:31-43 (2012). # +# [3] S. Gueron, V. Krasnov: "Speeding up Big-numbers Squaring",IEEE # +# Proceedings of 9th International Conference on Information Technology: # +# New Generations (ITNG 2012), pp.821-823 (2012) # +# [4] S. Gueron, V. Krasnov: "[PATCH] Efficient and side channel analysis # +# resistant 1024-bit modular exponentiation, for optimizing RSA2048 # +# on AVX2 capable x86_64 platforms", # +# http://rt.openssl.org/Ticket/Display.html?id=2850&user=guest&pass=guest# +############################################################################## +# +# +13% improvement over original submission by +# +# rsa2048 sign/sec OpenSSL 1.0.1 scalar(*) this +# 2.3GHz Haswell 621 765/+23% 1113/+79% +# 2.3GHz Broadwell(**) 688 1200(***)/+74% 1120/+63% +# +# (*) if system doesn't support AVX2, for reference purposes; +# (**) scaled to 2.3GHz to simplify comparison; +# (***) scalar AD*X code is faster than AVX2 and is preferred code +# path for Broadwell; + +$flavour = shift; +$output = shift; +if ($flavour =~ /\./) { $output = $flavour; undef $flavour; } + +$win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/); + +$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; +( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or +( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or +die "can't locate x86_64-xlate.pl"; + +if (`$ENV{CC} -Wa,-v -c -o /dev/null -x assembler /dev/null 2>&1` + =~ /GNU assembler version ([2-9]\.[0-9]+)/) { + $avx = ($1>=2.19) + ($1>=2.22); + $addx = ($1>=2.23); +} + +if (!$avx && $win64 && ($flavour =~ /nasm/ || $ENV{ASM} =~ /nasm/) && + `nasm -v 2>&1` =~ /NASM version ([2-9]\.[0-9]+)/) { + $avx = ($1>=2.09) + ($1>=2.10); + $addx = ($1>=2.10); +} + +if (!$avx && $win64 && ($flavour =~ /masm/ || $ENV{ASM} =~ /ml64/) && + `ml64 2>&1` =~ /Version ([0-9]+)\./) { + $avx = ($1>=10) + ($1>=11); + $addx = ($1>=11); +} + +if (!$avx && `$ENV{CC} -v 2>&1` =~ /(^clang version|based on LLVM) ([3-9])\.([0-9]+)/) { + my $ver = $2 + $3/100.0; # 3.1->3.01, 3.10->3.10 + $avx = ($ver>=3.0) + ($ver>=3.01); + $addx = ($ver>=3.03); +} + +open OUT,"| \"$^X\" $xlate $flavour $output"; +*STDOUT = *OUT; + +if ($avx>1) {{{ +{ # void AMS_WW( +my $rp="%rdi"; # BN_ULONG *rp, +my $ap="%rsi"; # const BN_ULONG *ap, +my $np="%rdx"; # const BN_ULONG *np, +my $n0="%ecx"; # const BN_ULONG n0, +my $rep="%r8d"; # int repeat); + +# The registers that hold the accumulated redundant result +# The AMM works on 1024 bit operands, and redundant word size is 29 +# Therefore: ceil(1024/29)/4 = 9 +my $ACC0="%ymm0"; +my $ACC1="%ymm1"; +my $ACC2="%ymm2"; +my $ACC3="%ymm3"; +my $ACC4="%ymm4"; +my $ACC5="%ymm5"; +my $ACC6="%ymm6"; +my $ACC7="%ymm7"; +my $ACC8="%ymm8"; +my $ACC9="%ymm9"; +# Registers that hold the broadcasted words of bp, currently used +my $B1="%ymm10"; +my $B2="%ymm11"; +# Registers that hold the broadcasted words of Y, currently used +my $Y1="%ymm12"; +my $Y2="%ymm13"; +# Helper registers +my $TEMP1="%ymm14"; +my $AND_MASK="%ymm15"; +# alu registers that hold the first words of the ACC +my $r0="%r9"; +my $r1="%r10"; +my $r2="%r11"; +my $r3="%r12"; + +my $i="%r14d"; # loop counter +my $tmp = "%r15"; + +my $FrameSize=32*18+32*8; # place for A^2 and 2*A + +my $aap=$r0; +my $tp0="%rbx"; +my $tp1=$r3; +my $tpa=$tmp; + +$np="%r13"; # reassigned argument + +$code.=<<___; +.text + +.globl rsaz_1024_sqr_avx2 +.type rsaz_1024_sqr_avx2,\@function,5 +.align 64 +rsaz_1024_sqr_avx2: # 702 cycles, 14% faster than rsaz_1024_mul_avx2 + lea (%rsp), %rax + push %rbx + push %rbp + push %r12 + push %r13 + push %r14 + push %r15 + vzeroupper +___ +$code.=<<___ if ($win64); + lea -0xa8(%rsp),%rsp + vmovaps %xmm6,-0xd8(%rax) + vmovaps %xmm7,-0xc8(%rax) + vmovaps %xmm8,-0xb8(%rax) + vmovaps %xmm9,-0xa8(%rax) + vmovaps %xmm10,-0x98(%rax) + vmovaps %xmm11,-0x88(%rax) + vmovaps %xmm12,-0x78(%rax) + vmovaps %xmm13,-0x68(%rax) + vmovaps %xmm14,-0x58(%rax) + vmovaps %xmm15,-0x48(%rax) +.Lsqr_1024_body: +___ +$code.=<<___; + mov %rax,%rbp + mov %rdx, $np # reassigned argument + sub \$$FrameSize, %rsp + mov $np, $tmp + sub \$-128, $rp # size optimization + sub \$-128, $ap + sub \$-128, $np + + and \$4095, $tmp # see if $np crosses page + add \$32*10, $tmp + shr \$12, $tmp + vpxor $ACC9,$ACC9,$ACC9 + jz .Lsqr_1024_no_n_copy + + # unaligned 256-bit load that crosses page boundary can + # cause >2x performance degradation here, so if $np does + # cross page boundary, copy it to stack and make sure stack + # frame doesn't... + sub \$32*10,%rsp + vmovdqu 32*0-128($np), $ACC0 + and \$-2048, %rsp + vmovdqu 32*1-128($np), $ACC1 + vmovdqu 32*2-128($np), $ACC2 + vmovdqu 32*3-128($np), $ACC3 + vmovdqu 32*4-128($np), $ACC4 + vmovdqu 32*5-128($np), $ACC5 + vmovdqu 32*6-128($np), $ACC6 + vmovdqu 32*7-128($np), $ACC7 + vmovdqu 32*8-128($np), $ACC8 + lea $FrameSize+128(%rsp),$np + vmovdqu $ACC0, 32*0-128($np) + vmovdqu $ACC1, 32*1-128($np) + vmovdqu $ACC2, 32*2-128($np) + vmovdqu $ACC3, 32*3-128($np) + vmovdqu $ACC4, 32*4-128($np) + vmovdqu $ACC5, 32*5-128($np) + vmovdqu $ACC6, 32*6-128($np) + vmovdqu $ACC7, 32*7-128($np) + vmovdqu $ACC8, 32*8-128($np) + vmovdqu $ACC9, 32*9-128($np) # $ACC9 is zero + +.Lsqr_1024_no_n_copy: + and \$-1024, %rsp + + vmovdqu 32*1-128($ap), $ACC1 + vmovdqu 32*2-128($ap), $ACC2 + vmovdqu 32*3-128($ap), $ACC3 + vmovdqu 32*4-128($ap), $ACC4 + vmovdqu 32*5-128($ap), $ACC5 + vmovdqu 32*6-128($ap), $ACC6 + vmovdqu 32*7-128($ap), $ACC7 + vmovdqu 32*8-128($ap), $ACC8 + + lea 192(%rsp), $tp0 # 64+128=192 + vpbroadcastq .Land_mask(%rip), $AND_MASK + jmp .LOOP_GRANDE_SQR_1024 + +.align 32 +.LOOP_GRANDE_SQR_1024: + lea 32*18+128(%rsp), $aap # size optimization + lea 448(%rsp), $tp1 # 64+128+256=448 + + # the squaring is performed as described in Variant B of + # "Speeding up Big-Number Squaring", so start by calculating + # the A*2=A+A vector + vpaddq $ACC1, $ACC1, $ACC1 + vpbroadcastq 32*0-128($ap), $B1 + vpaddq $ACC2, $ACC2, $ACC2 + vmovdqa $ACC1, 32*0-128($aap) + vpaddq $ACC3, $ACC3, $ACC3 + vmovdqa $ACC2, 32*1-128($aap) + vpaddq $ACC4, $ACC4, $ACC4 + vmovdqa $ACC3, 32*2-128($aap) + vpaddq $ACC5, $ACC5, $ACC5 + vmovdqa $ACC4, 32*3-128($aap) + vpaddq $ACC6, $ACC6, $ACC6 + vmovdqa $ACC5, 32*4-128($aap) + vpaddq $ACC7, $ACC7, $ACC7 + vmovdqa $ACC6, 32*5-128($aap) + vpaddq $ACC8, $ACC8, $ACC8 + vmovdqa $ACC7, 32*6-128($aap) + vpxor $ACC9, $ACC9, $ACC9 + vmovdqa $ACC8, 32*7-128($aap) + + vpmuludq 32*0-128($ap), $B1, $ACC0 + vpbroadcastq 32*1-128($ap), $B2 + vmovdqu $ACC9, 32*9-192($tp0) # zero upper half + vpmuludq $B1, $ACC1, $ACC1 + vmovdqu $ACC9, 32*10-448($tp1) + vpmuludq $B1, $ACC2, $ACC2 + vmovdqu $ACC9, 32*11-448($tp1) + vpmuludq $B1, $ACC3, $ACC3 + vmovdqu $ACC9, 32*12-448($tp1) + vpmuludq $B1, $ACC4, $ACC4 + vmovdqu $ACC9, 32*13-448($tp1) + vpmuludq $B1, $ACC5, $ACC5 + vmovdqu $ACC9, 32*14-448($tp1) + vpmuludq $B1, $ACC6, $ACC6 + vmovdqu $ACC9, 32*15-448($tp1) + vpmuludq $B1, $ACC7, $ACC7 + vmovdqu $ACC9, 32*16-448($tp1) + vpmuludq $B1, $ACC8, $ACC8 + vpbroadcastq 32*2-128($ap), $B1 + vmovdqu $ACC9, 32*17-448($tp1) + + mov $ap, $tpa + mov \$4, $i + jmp .Lsqr_entry_1024 +___ +$TEMP0=$Y1; +$TEMP2=$Y2; +$code.=<<___; +.align 32 +.LOOP_SQR_1024: + vpbroadcastq 32*1-128($tpa), $B2 + vpmuludq 32*0-128($ap), $B1, $ACC0 + vpaddq 32*0-192($tp0), $ACC0, $ACC0 + vpmuludq 32*0-128($aap), $B1, $ACC1 + vpaddq 32*1-192($tp0), $ACC1, $ACC1 + vpmuludq 32*1-128($aap), $B1, $ACC2 + vpaddq 32*2-192($tp0), $ACC2, $ACC2 + vpmuludq 32*2-128($aap), $B1, $ACC3 + vpaddq 32*3-192($tp0), $ACC3, $ACC3 + vpmuludq 32*3-128($aap), $B1, $ACC4 + vpaddq 32*4-192($tp0), $ACC4, $ACC4 + vpmuludq 32*4-128($aap), $B1, $ACC5 + vpaddq 32*5-192($tp0), $ACC5, $ACC5 + vpmuludq 32*5-128($aap), $B1, $ACC6 + vpaddq 32*6-192($tp0), $ACC6, $ACC6 + vpmuludq 32*6-128($aap), $B1, $ACC7 + vpaddq 32*7-192($tp0), $ACC7, $ACC7 + vpmuludq 32*7-128($aap), $B1, $ACC8 + vpbroadcastq 32*2-128($tpa), $B1 + vpaddq 32*8-192($tp0), $ACC8, $ACC8 +.Lsqr_entry_1024: + vmovdqu $ACC0, 32*0-192($tp0) + vmovdqu $ACC1, 32*1-192($tp0) + + vpmuludq 32*1-128($ap), $B2, $TEMP0 + vpaddq $TEMP0, $ACC2, $ACC2 + vpmuludq 32*1-128($aap), $B2, $TEMP1 + vpaddq $TEMP1, $ACC3, $ACC3 + vpmuludq 32*2-128($aap), $B2, $TEMP2 + vpaddq $TEMP2, $ACC4, $ACC4 + vpmuludq 32*3-128($aap), $B2, $TEMP0 + vpaddq $TEMP0, $ACC5, $ACC5 + vpmuludq 32*4-128($aap), $B2, $TEMP1 + vpaddq $TEMP1, $ACC6, $ACC6 + vpmuludq 32*5-128($aap), $B2, $TEMP2 + vpaddq $TEMP2, $ACC7, $ACC7 + vpmuludq 32*6-128($aap), $B2, $TEMP0 + vpaddq $TEMP0, $ACC8, $ACC8 + vpmuludq 32*7-128($aap), $B2, $ACC0 + vpbroadcastq 32*3-128($tpa), $B2 + vpaddq 32*9-192($tp0), $ACC0, $ACC0 + + vmovdqu $ACC2, 32*2-192($tp0) + vmovdqu $ACC3, 32*3-192($tp0) + + vpmuludq 32*2-128($ap), $B1, $TEMP2 + vpaddq $TEMP2, $ACC4, $ACC4 + vpmuludq 32*2-128($aap), $B1, $TEMP0 + vpaddq $TEMP0, $ACC5, $ACC5 + vpmuludq 32*3-128($aap), $B1, $TEMP1 + vpaddq $TEMP1, $ACC6, $ACC6 + vpmuludq 32*4-128($aap), $B1, $TEMP2 + vpaddq $TEMP2, $ACC7, $ACC7 + vpmuludq 32*5-128($aap), $B1, $TEMP0 + vpaddq $TEMP0, $ACC8, $ACC8 + vpmuludq 32*6-128($aap), $B1, $TEMP1 + vpaddq $TEMP1, $ACC0, $ACC0 + vpmuludq 32*7-128($aap), $B1, $ACC1 + vpbroadcastq 32*4-128($tpa), $B1 + vpaddq 32*10-448($tp1), $ACC1, $ACC1 + + vmovdqu $ACC4, 32*4-192($tp0) + vmovdqu $ACC5, 32*5-192($tp0) + + vpmuludq 32*3-128($ap), $B2, $TEMP0 + vpaddq $TEMP0, $ACC6, $ACC6 + vpmuludq 32*3-128($aap), $B2, $TEMP1 + vpaddq $TEMP1, $ACC7, $ACC7 + vpmuludq 32*4-128($aap), $B2, $TEMP2 + vpaddq $TEMP2, $ACC8, $ACC8 + vpmuludq 32*5-128($aap), $B2, $TEMP0 + vpaddq $TEMP0, $ACC0, $ACC0 + vpmuludq 32*6-128($aap), $B2, $TEMP1 + vpaddq $TEMP1, $ACC1, $ACC1 + vpmuludq 32*7-128($aap), $B2, $ACC2 + vpbroadcastq 32*5-128($tpa), $B2 + vpaddq 32*11-448($tp1), $ACC2, $ACC2 + + vmovdqu $ACC6, 32*6-192($tp0) + vmovdqu $ACC7, 32*7-192($tp0) + + vpmuludq 32*4-128($ap), $B1, $TEMP0 + vpaddq $TEMP0, $ACC8, $ACC8 + vpmuludq 32*4-128($aap), $B1, $TEMP1 + vpaddq $TEMP1, $ACC0, $ACC0 + vpmuludq 32*5-128($aap), $B1, $TEMP2 + vpaddq $TEMP2, $ACC1, $ACC1 + vpmuludq 32*6-128($aap), $B1, $TEMP0 + vpaddq $TEMP0, $ACC2, $ACC2 + vpmuludq 32*7-128($aap), $B1, $ACC3 + vpbroadcastq 32*6-128($tpa), $B1 + vpaddq 32*12-448($tp1), $ACC3, $ACC3 + + vmovdqu $ACC8, 32*8-192($tp0) + vmovdqu $ACC0, 32*9-192($tp0) + lea 8($tp0), $tp0 + + vpmuludq 32*5-128($ap), $B2, $TEMP2 + vpaddq $TEMP2, $ACC1, $ACC1 + vpmuludq 32*5-128($aap), $B2, $TEMP0 + vpaddq $TEMP0, $ACC2, $ACC2 + vpmuludq 32*6-128($aap), $B2, $TEMP1 + vpaddq $TEMP1, $ACC3, $ACC3 + vpmuludq 32*7-128($aap), $B2, $ACC4 + vpbroadcastq 32*7-128($tpa), $B2 + vpaddq 32*13-448($tp1), $ACC4, $ACC4 + + vmovdqu $ACC1, 32*10-448($tp1) + vmovdqu $ACC2, 32*11-448($tp1) + + vpmuludq 32*6-128($ap), $B1, $TEMP0 + vpaddq $TEMP0, $ACC3, $ACC3 + vpmuludq 32*6-128($aap), $B1, $TEMP1 + vpbroadcastq 32*8-128($tpa), $ACC0 # borrow $ACC0 for $B1 + vpaddq $TEMP1, $ACC4, $ACC4 + vpmuludq 32*7-128($aap), $B1, $ACC5 + vpbroadcastq 32*0+8-128($tpa), $B1 # for next iteration + vpaddq 32*14-448($tp1), $ACC5, $ACC5 + + vmovdqu $ACC3, 32*12-448($tp1) + vmovdqu $ACC4, 32*13-448($tp1) + lea 8($tpa), $tpa + + vpmuludq 32*7-128($ap), $B2, $TEMP0 + vpaddq $TEMP0, $ACC5, $ACC5 + vpmuludq 32*7-128($aap), $B2, $ACC6 + vpaddq 32*15-448($tp1), $ACC6, $ACC6 + + vpmuludq 32*8-128($ap), $ACC0, $ACC7 + vmovdqu $ACC5, 32*14-448($tp1) + vpaddq 32*16-448($tp1), $ACC7, $ACC7 + vmovdqu $ACC6, 32*15-448($tp1) + vmovdqu $ACC7, 32*16-448($tp1) + lea 8($tp1), $tp1 + + dec $i + jnz .LOOP_SQR_1024 +___ +$ZERO = $ACC9; +$TEMP0 = $B1; +$TEMP2 = $B2; +$TEMP3 = $Y1; +$TEMP4 = $Y2; +$code.=<<___; + #we need to fix indexes 32-39 to avoid overflow + vmovdqu 32*8(%rsp), $ACC8 # 32*8-192($tp0), + vmovdqu 32*9(%rsp), $ACC1 # 32*9-192($tp0) + vmovdqu 32*10(%rsp), $ACC2 # 32*10-192($tp0) + lea 192(%rsp), $tp0 # 64+128=192 + + vpsrlq \$29, $ACC8, $TEMP1 + vpand $AND_MASK, $ACC8, $ACC8 + vpsrlq \$29, $ACC1, $TEMP2 + vpand $AND_MASK, $ACC1, $ACC1 + + vpermq \$0x93, $TEMP1, $TEMP1 + vpxor $ZERO, $ZERO, $ZERO + vpermq \$0x93, $TEMP2, $TEMP2 + + vpblendd \$3, $ZERO, $TEMP1, $TEMP0 + vpblendd \$3, $TEMP1, $TEMP2, $TEMP1 + vpaddq $TEMP0, $ACC8, $ACC8 + vpblendd \$3, $TEMP2, $ZERO, $TEMP2 + vpaddq $TEMP1, $ACC1, $ACC1 + vpaddq $TEMP2, $ACC2, $ACC2 + vmovdqu $ACC1, 32*9-192($tp0) + vmovdqu $ACC2, 32*10-192($tp0) + + mov (%rsp), %rax + mov 8(%rsp), $r1 + mov 16(%rsp), $r2 + mov 24(%rsp), $r3 + vmovdqu 32*1(%rsp), $ACC1 + vmovdqu 32*2-192($tp0), $ACC2 + vmovdqu 32*3-192($tp0), $ACC3 + vmovdqu 32*4-192($tp0), $ACC4 + vmovdqu 32*5-192($tp0), $ACC5 + vmovdqu 32*6-192($tp0), $ACC6 + vmovdqu 32*7-192($tp0), $ACC7 + + mov %rax, $r0 + imull $n0, %eax + and \$0x1fffffff, %eax + vmovd %eax, $Y1 + + mov %rax, %rdx + imulq -128($np), %rax + vpbroadcastq $Y1, $Y1 + add %rax, $r0 + mov %rdx, %rax + imulq 8-128($np), %rax + shr \$29, $r0 + add %rax, $r1 + mov %rdx, %rax + imulq 16-128($np), %rax + add $r0, $r1 + add %rax, $r2 + imulq 24-128($np), %rdx + add %rdx, $r3 + + mov $r1, %rax + imull $n0, %eax + and \$0x1fffffff, %eax + + mov \$9, $i + jmp .LOOP_REDUCE_1024 + +.align 32 +.LOOP_REDUCE_1024: + vmovd %eax, $Y2 + vpbroadcastq $Y2, $Y2 + + vpmuludq 32*1-128($np), $Y1, $TEMP0 + mov %rax, %rdx + imulq -128($np), %rax + vpaddq $TEMP0, $ACC1, $ACC1 + add %rax, $r1 + vpmuludq 32*2-128($np), $Y1, $TEMP1 + mov %rdx, %rax + imulq 8-128($np), %rax + vpaddq $TEMP1, $ACC2, $ACC2 + vpmuludq 32*3-128($np), $Y1, $TEMP2 + .byte 0x67 + add %rax, $r2 + .byte 0x67 + mov %rdx, %rax + imulq 16-128($np), %rax + shr \$29, $r1 + vpaddq $TEMP2, $ACC3, $ACC3 + vpmuludq 32*4-128($np), $Y1, $TEMP0 + add %rax, $r3 + add $r1, $r2 + vpaddq $TEMP0, $ACC4, $ACC4 + vpmuludq 32*5-128($np), $Y1, $TEMP1 + mov $r2, %rax + imull $n0, %eax + vpaddq $TEMP1, $ACC5, $ACC5 + vpmuludq 32*6-128($np), $Y1, $TEMP2 + and \$0x1fffffff, %eax + vpaddq $TEMP2, $ACC6, $ACC6 + vpmuludq 32*7-128($np), $Y1, $TEMP0 + vpaddq $TEMP0, $ACC7, $ACC7 + vpmuludq 32*8-128($np), $Y1, $TEMP1 + vmovd %eax, $Y1 + #vmovdqu 32*1-8-128($np), $TEMP2 # moved below + vpaddq $TEMP1, $ACC8, $ACC8 + #vmovdqu 32*2-8-128($np), $TEMP0 # moved below + vpbroadcastq $Y1, $Y1 + + vpmuludq 32*1-8-128($np), $Y2, $TEMP2 # see above + vmovdqu 32*3-8-128($np), $TEMP1 + mov %rax, %rdx + imulq -128($np), %rax + vpaddq $TEMP2, $ACC1, $ACC1 + vpmuludq 32*2-8-128($np), $Y2, $TEMP0 # see above + vmovdqu 32*4-8-128($np), $TEMP2 + add %rax, $r2 + mov %rdx, %rax + imulq 8-128($np), %rax + vpaddq $TEMP0, $ACC2, $ACC2 + add $r3, %rax + shr \$29, $r2 + vpmuludq $Y2, $TEMP1, $TEMP1 + vmovdqu 32*5-8-128($np), $TEMP0 + add $r2, %rax + vpaddq $TEMP1, $ACC3, $ACC3 + vpmuludq $Y2, $TEMP2, $TEMP2 + vmovdqu 32*6-8-128($np), $TEMP1 + .byte 0x67 + mov %rax, $r3 + imull $n0, %eax + vpaddq $TEMP2, $ACC4, $ACC4 + vpmuludq $Y2, $TEMP0, $TEMP0 + .byte 0xc4,0x41,0x7e,0x6f,0x9d,0x58,0x00,0x00,0x00 # vmovdqu 32*7-8-128($np), $TEMP2 + and \$0x1fffffff, %eax + vpaddq $TEMP0, $ACC5, $ACC5 + vpmuludq $Y2, $TEMP1, $TEMP1 + vmovdqu 32*8-8-128($np), $TEMP0 + vpaddq $TEMP1, $ACC6, $ACC6 + vpmuludq $Y2, $TEMP2, $TEMP2 + vmovdqu 32*9-8-128($np), $ACC9 + vmovd %eax, $ACC0 # borrow ACC0 for Y2 + imulq -128($np), %rax + vpaddq $TEMP2, $ACC7, $ACC7 + vpmuludq $Y2, $TEMP0, $TEMP0 + vmovdqu 32*1-16-128($np), $TEMP1 + vpbroadcastq $ACC0, $ACC0 + vpaddq $TEMP0, $ACC8, $ACC8 + vpmuludq $Y2, $ACC9, $ACC9 + vmovdqu 32*2-16-128($np), $TEMP2 + add %rax, $r3 + +___ +($ACC0,$Y2)=($Y2,$ACC0); +$code.=<<___; + vmovdqu 32*1-24-128($np), $ACC0 + vpmuludq $Y1, $TEMP1, $TEMP1 + vmovdqu 32*3-16-128($np), $TEMP0 + vpaddq $TEMP1, $ACC1, $ACC1 + vpmuludq $Y2, $ACC0, $ACC0 + vpmuludq $Y1, $TEMP2, $TEMP2 + .byte 0xc4,0x41,0x7e,0x6f,0xb5,0xf0,0xff,0xff,0xff # vmovdqu 32*4-16-128($np), $TEMP1 + vpaddq $ACC1, $ACC0, $ACC0 + vpaddq $TEMP2, $ACC2, $ACC2 + vpmuludq $Y1, $TEMP0, $TEMP0 + vmovdqu 32*5-16-128($np), $TEMP2 + .byte 0x67 + vmovq $ACC0, %rax + vmovdqu $ACC0, (%rsp) # transfer $r0-$r3 + vpaddq $TEMP0, $ACC3, $ACC3 + vpmuludq $Y1, $TEMP1, $TEMP1 + vmovdqu 32*6-16-128($np), $TEMP0 + vpaddq $TEMP1, $ACC4, $ACC4 + vpmuludq $Y1, $TEMP2, $TEMP2 + vmovdqu 32*7-16-128($np), $TEMP1 + vpaddq $TEMP2, $ACC5, $ACC5 + vpmuludq $Y1, $TEMP0, $TEMP0 + vmovdqu 32*8-16-128($np), $TEMP2 + vpaddq $TEMP0, $ACC6, $ACC6 + vpmuludq $Y1, $TEMP1, $TEMP1 + shr \$29, $r3 + vmovdqu 32*9-16-128($np), $TEMP0 + add $r3, %rax + vpaddq $TEMP1, $ACC7, $ACC7 + vpmuludq $Y1, $TEMP2, $TEMP2 + #vmovdqu 32*2-24-128($np), $TEMP1 # moved below + mov %rax, $r0 + imull $n0, %eax + vpaddq $TEMP2, $ACC8, $ACC8 + vpmuludq $Y1, $TEMP0, $TEMP0 + and \$0x1fffffff, %eax + vmovd %eax, $Y1 + vmovdqu 32*3-24-128($np), $TEMP2 + .byte 0x67 + vpaddq $TEMP0, $ACC9, $ACC9 + vpbroadcastq $Y1, $Y1 + + vpmuludq 32*2-24-128($np), $Y2, $TEMP1 # see above + vmovdqu 32*4-24-128($np), $TEMP0 + mov %rax, %rdx + imulq -128($np), %rax + mov 8(%rsp), $r1 + vpaddq $TEMP1, $ACC2, $ACC1 + vpmuludq $Y2, $TEMP2, $TEMP2 + vmovdqu 32*5-24-128($np), $TEMP1 + add %rax, $r0 + mov %rdx, %rax + imulq 8-128($np), %rax + .byte 0x67 + shr \$29, $r0 + mov 16(%rsp), $r2 + vpaddq $TEMP2, $ACC3, $ACC2 + vpmuludq $Y2, $TEMP0, $TEMP0 + vmovdqu 32*6-24-128($np), $TEMP2 + add %rax, $r1 + mov %rdx, %rax + imulq 16-128($np), %rax + vpaddq $TEMP0, $ACC4, $ACC3 + vpmuludq $Y2, $TEMP1, $TEMP1 + vmovdqu 32*7-24-128($np), $TEMP0 + imulq 24-128($np), %rdx # future $r3 + add %rax, $r2 + lea ($r0,$r1), %rax + vpaddq $TEMP1, $ACC5, $ACC4 + vpmuludq $Y2, $TEMP2, $TEMP2 + vmovdqu 32*8-24-128($np), $TEMP1 + mov %rax, $r1 + imull $n0, %eax + vpmuludq $Y2, $TEMP0, $TEMP0 + vpaddq $TEMP2, $ACC6, $ACC5 + vmovdqu 32*9-24-128($np), $TEMP2 + and \$0x1fffffff, %eax + vpaddq $TEMP0, $ACC7, $ACC6 + vpmuludq $Y2, $TEMP1, $TEMP1 + add 24(%rsp), %rdx + vpaddq $TEMP1, $ACC8, $ACC7 + vpmuludq $Y2, $TEMP2, $TEMP2 + vpaddq $TEMP2, $ACC9, $ACC8 + vmovq $r3, $ACC9 + mov %rdx, $r3 + + dec $i + jnz .LOOP_REDUCE_1024 +___ +($ACC0,$Y2)=($Y2,$ACC0); +$code.=<<___; + lea 448(%rsp), $tp1 # size optimization + vpaddq $ACC9, $Y2, $ACC0 + vpxor $ZERO, $ZERO, $ZERO + + vpaddq 32*9-192($tp0), $ACC0, $ACC0 + vpaddq 32*10-448($tp1), $ACC1, $ACC1 + vpaddq 32*11-448($tp1), $ACC2, $ACC2 + vpaddq 32*12-448($tp1), $ACC3, $ACC3 + vpaddq 32*13-448($tp1), $ACC4, $ACC4 + vpaddq 32*14-448($tp1), $ACC5, $ACC5 + vpaddq 32*15-448($tp1), $ACC6, $ACC6 + vpaddq 32*16-448($tp1), $ACC7, $ACC7 + vpaddq 32*17-448($tp1), $ACC8, $ACC8 + + vpsrlq \$29, $ACC0, $TEMP1 + vpand $AND_MASK, $ACC0, $ACC0 + vpsrlq \$29, $ACC1, $TEMP2 + vpand $AND_MASK, $ACC1, $ACC1 + vpsrlq \$29, $ACC2, $TEMP3 + vpermq \$0x93, $TEMP1, $TEMP1 + vpand $AND_MASK, $ACC2, $ACC2 + vpsrlq \$29, $ACC3, $TEMP4 + vpermq \$0x93, $TEMP2, $TEMP2 + vpand $AND_MASK, $ACC3, $ACC3 + vpermq \$0x93, $TEMP3, $TEMP3 + + vpblendd \$3, $ZERO, $TEMP1, $TEMP0 + vpermq \$0x93, $TEMP4, $TEMP4 + vpblendd \$3, $TEMP1, $TEMP2, $TEMP1 + vpaddq $TEMP0, $ACC0, $ACC0 + vpblendd \$3, $TEMP2, $TEMP3, $TEMP2 + vpaddq $TEMP1, $ACC1, $ACC1 + vpblendd \$3, $TEMP3, $TEMP4, $TEMP3 + vpaddq $TEMP2, $ACC2, $ACC2 + vpblendd \$3, $TEMP4, $ZERO, $TEMP4 + vpaddq $TEMP3, $ACC3, $ACC3 + vpaddq $TEMP4, $ACC4, $ACC4 + + vpsrlq \$29, $ACC0, $TEMP1 + vpand $AND_MASK, $ACC0, $ACC0 + vpsrlq \$29, $ACC1, $TEMP2 + vpand $AND_MASK, $ACC1, $ACC1 + vpsrlq \$29, $ACC2, $TEMP3 + vpermq \$0x93, $TEMP1, $TEMP1 + vpand $AND_MASK, $ACC2, $ACC2 + vpsrlq \$29, $ACC3, $TEMP4 + vpermq \$0x93, $TEMP2, $TEMP2 + vpand $AND_MASK, $ACC3, $ACC3 + vpermq \$0x93, $TEMP3, $TEMP3 + + vpblendd \$3, $ZERO, $TEMP1, $TEMP0 + vpermq \$0x93, $TEMP4, $TEMP4 + vpblendd \$3, $TEMP1, $TEMP2, $TEMP1 + vpaddq $TEMP0, $ACC0, $ACC0 + vpblendd \$3, $TEMP2, $TEMP3, $TEMP2 + vpaddq $TEMP1, $ACC1, $ACC1 + vmovdqu $ACC0, 32*0-128($rp) + vpblendd \$3, $TEMP3, $TEMP4, $TEMP3 + vpaddq $TEMP2, $ACC2, $ACC2 + vmovdqu $ACC1, 32*1-128($rp) + vpblendd \$3, $TEMP4, $ZERO, $TEMP4 + vpaddq $TEMP3, $ACC3, $ACC3 + vmovdqu $ACC2, 32*2-128($rp) + vpaddq $TEMP4, $ACC4, $ACC4 + vmovdqu $ACC3, 32*3-128($rp) +___ +$TEMP5=$ACC0; +$code.=<<___; + vpsrlq \$29, $ACC4, $TEMP1 + vpand $AND_MASK, $ACC4, $ACC4 + vpsrlq \$29, $ACC5, $TEMP2 + vpand $AND_MASK, $ACC5, $ACC5 + vpsrlq \$29, $ACC6, $TEMP3 + vpermq \$0x93, $TEMP1, $TEMP1 + vpand $AND_MASK, $ACC6, $ACC6 + vpsrlq \$29, $ACC7, $TEMP4 + vpermq \$0x93, $TEMP2, $TEMP2 + vpand $AND_MASK, $ACC7, $ACC7 + vpsrlq \$29, $ACC8, $TEMP5 + vpermq \$0x93, $TEMP3, $TEMP3 + vpand $AND_MASK, $ACC8, $ACC8 + vpermq \$0x93, $TEMP4, $TEMP4 + + vpblendd \$3, $ZERO, $TEMP1, $TEMP0 + vpermq \$0x93, $TEMP5, $TEMP5 + vpblendd \$3, $TEMP1, $TEMP2, $TEMP1 + vpaddq $TEMP0, $ACC4, $ACC4 + vpblendd \$3, $TEMP2, $TEMP3, $TEMP2 + vpaddq $TEMP1, $ACC5, $ACC5 + vpblendd \$3, $TEMP3, $TEMP4, $TEMP3 + vpaddq $TEMP2, $ACC6, $ACC6 + vpblendd \$3, $TEMP4, $TEMP5, $TEMP4 + vpaddq $TEMP3, $ACC7, $ACC7 + vpaddq $TEMP4, $ACC8, $ACC8 + + vpsrlq \$29, $ACC4, $TEMP1 + vpand $AND_MASK, $ACC4, $ACC4 + vpsrlq \$29, $ACC5, $TEMP2 + vpand $AND_MASK, $ACC5, $ACC5 + vpsrlq \$29, $ACC6, $TEMP3 + vpermq \$0x93, $TEMP1, $TEMP1 + vpand $AND_MASK, $ACC6, $ACC6 + vpsrlq \$29, $ACC7, $TEMP4 + vpermq \$0x93, $TEMP2, $TEMP2 + vpand $AND_MASK, $ACC7, $ACC7 + vpsrlq \$29, $ACC8, $TEMP5 + vpermq \$0x93, $TEMP3, $TEMP3 + vpand $AND_MASK, $ACC8, $ACC8 + vpermq \$0x93, $TEMP4, $TEMP4 + + vpblendd \$3, $ZERO, $TEMP1, $TEMP0 + vpermq \$0x93, $TEMP5, $TEMP5 + vpblendd \$3, $TEMP1, $TEMP2, $TEMP1 + vpaddq $TEMP0, $ACC4, $ACC4 + vpblendd \$3, $TEMP2, $TEMP3, $TEMP2 + vpaddq $TEMP1, $ACC5, $ACC5 + vmovdqu $ACC4, 32*4-128($rp) + vpblendd \$3, $TEMP3, $TEMP4, $TEMP3 + vpaddq $TEMP2, $ACC6, $ACC6 + vmovdqu $ACC5, 32*5-128($rp) + vpblendd \$3, $TEMP4, $TEMP5, $TEMP4 + vpaddq $TEMP3, $ACC7, $ACC7 + vmovdqu $ACC6, 32*6-128($rp) + vpaddq $TEMP4, $ACC8, $ACC8 + vmovdqu $ACC7, 32*7-128($rp) + vmovdqu $ACC8, 32*8-128($rp) + + mov $rp, $ap + dec $rep + jne .LOOP_GRANDE_SQR_1024 + + vzeroall + mov %rbp, %rax +___ +$code.=<<___ if ($win64); + movaps -0xd8(%rax),%xmm6 + movaps -0xc8(%rax),%xmm7 + movaps -0xb8(%rax),%xmm8 + movaps -0xa8(%rax),%xmm9 + movaps -0x98(%rax),%xmm10 + movaps -0x88(%rax),%xmm11 + movaps -0x78(%rax),%xmm12 + movaps -0x68(%rax),%xmm13 + movaps -0x58(%rax),%xmm14 + movaps -0x48(%rax),%xmm15 +___ +$code.=<<___; + mov -48(%rax),%r15 + mov -40(%rax),%r14 + mov -32(%rax),%r13 + mov -24(%rax),%r12 + mov -16(%rax),%rbp + mov -8(%rax),%rbx + lea (%rax),%rsp # restore %rsp +.Lsqr_1024_epilogue: + ret +.size rsaz_1024_sqr_avx2,.-rsaz_1024_sqr_avx2 +___ +} + +{ # void AMM_WW( +my $rp="%rdi"; # BN_ULONG *rp, +my $ap="%rsi"; # const BN_ULONG *ap, +my $bp="%rdx"; # const BN_ULONG *bp, +my $np="%rcx"; # const BN_ULONG *np, +my $n0="%r8d"; # unsigned int n0); + +# The registers that hold the accumulated redundant result +# The AMM works on 1024 bit operands, and redundant word size is 29 +# Therefore: ceil(1024/29)/4 = 9 +my $ACC0="%ymm0"; +my $ACC1="%ymm1"; +my $ACC2="%ymm2"; +my $ACC3="%ymm3"; +my $ACC4="%ymm4"; +my $ACC5="%ymm5"; +my $ACC6="%ymm6"; +my $ACC7="%ymm7"; +my $ACC8="%ymm8"; +my $ACC9="%ymm9"; + +# Registers that hold the broadcasted words of multiplier, currently used +my $Bi="%ymm10"; +my $Yi="%ymm11"; + +# Helper registers +my $TEMP0=$ACC0; +my $TEMP1="%ymm12"; +my $TEMP2="%ymm13"; +my $ZERO="%ymm14"; +my $AND_MASK="%ymm15"; + +# alu registers that hold the first words of the ACC +my $r0="%r9"; +my $r1="%r10"; +my $r2="%r11"; +my $r3="%r12"; + +my $i="%r14d"; +my $tmp="%r15"; + +$bp="%r13"; # reassigned argument + +$code.=<<___; +.globl rsaz_1024_mul_avx2 +.type rsaz_1024_mul_avx2,\@function,5 +.align 64 +rsaz_1024_mul_avx2: + lea (%rsp), %rax + push %rbx + push %rbp + push %r12 + push %r13 + push %r14 + push %r15 +___ +$code.=<<___ if ($win64); + vzeroupper + lea -0xa8(%rsp),%rsp + vmovaps %xmm6,-0xd8(%rax) + vmovaps %xmm7,-0xc8(%rax) + vmovaps %xmm8,-0xb8(%rax) + vmovaps %xmm9,-0xa8(%rax) + vmovaps %xmm10,-0x98(%rax) + vmovaps %xmm11,-0x88(%rax) + vmovaps %xmm12,-0x78(%rax) + vmovaps %xmm13,-0x68(%rax) + vmovaps %xmm14,-0x58(%rax) + vmovaps %xmm15,-0x48(%rax) +.Lmul_1024_body: +___ +$code.=<<___; + mov %rax,%rbp + vzeroall + mov %rdx, $bp # reassigned argument + sub \$64,%rsp + + # unaligned 256-bit load that crosses page boundary can + # cause severe performance degradation here, so if $ap does + # cross page boundary, swap it with $bp [meaning that caller + # is advised to lay down $ap and $bp next to each other, so + # that only one can cross page boundary]. + .byte 0x67,0x67 + mov $ap, $tmp + and \$4095, $tmp + add \$32*10, $tmp + shr \$12, $tmp + mov $ap, $tmp + cmovnz $bp, $ap + cmovnz $tmp, $bp + + mov $np, $tmp + sub \$-128,$ap # size optimization + sub \$-128,$np + sub \$-128,$rp + + and \$4095, $tmp # see if $np crosses page + add \$32*10, $tmp + .byte 0x67,0x67 + shr \$12, $tmp + jz .Lmul_1024_no_n_copy + + # unaligned 256-bit load that crosses page boundary can + # cause severe performance degradation here, so if $np does + # cross page boundary, copy it to stack and make sure stack + # frame doesn't... + sub \$32*10,%rsp + vmovdqu 32*0-128($np), $ACC0 + and \$-512, %rsp + vmovdqu 32*1-128($np), $ACC1 + vmovdqu 32*2-128($np), $ACC2 + vmovdqu 32*3-128($np), $ACC3 + vmovdqu 32*4-128($np), $ACC4 + vmovdqu 32*5-128($np), $ACC5 + vmovdqu 32*6-128($np), $ACC6 + vmovdqu 32*7-128($np), $ACC7 + vmovdqu 32*8-128($np), $ACC8 + lea 64+128(%rsp),$np + vmovdqu $ACC0, 32*0-128($np) + vpxor $ACC0, $ACC0, $ACC0 + vmovdqu $ACC1, 32*1-128($np) + vpxor $ACC1, $ACC1, $ACC1 + vmovdqu $ACC2, 32*2-128($np) + vpxor $ACC2, $ACC2, $ACC2 + vmovdqu $ACC3, 32*3-128($np) + vpxor $ACC3, $ACC3, $ACC3 + vmovdqu $ACC4, 32*4-128($np) + vpxor $ACC4, $ACC4, $ACC4 + vmovdqu $ACC5, 32*5-128($np) + vpxor $ACC5, $ACC5, $ACC5 + vmovdqu $ACC6, 32*6-128($np) + vpxor $ACC6, $ACC6, $ACC6 + vmovdqu $ACC7, 32*7-128($np) + vpxor $ACC7, $ACC7, $ACC7 + vmovdqu $ACC8, 32*8-128($np) + vmovdqa $ACC0, $ACC8 + vmovdqu $ACC9, 32*9-128($np) # $ACC9 is zero after vzeroall +.Lmul_1024_no_n_copy: + and \$-64,%rsp + + mov ($bp), %rbx + vpbroadcastq ($bp), $Bi + vmovdqu $ACC0, (%rsp) # clear top of stack + xor $r0, $r0 + .byte 0x67 + xor $r1, $r1 + xor $r2, $r2 + xor $r3, $r3 + + vmovdqu .Land_mask(%rip), $AND_MASK + mov \$9, $i + vmovdqu $ACC9, 32*9-128($rp) # $ACC9 is zero after vzeroall + jmp .Loop_mul_1024 + +.align 32 +.Loop_mul_1024: + vpsrlq \$29, $ACC3, $ACC9 # correct $ACC3(*) + mov %rbx, %rax + imulq -128($ap), %rax + add $r0, %rax + mov %rbx, $r1 + imulq 8-128($ap), $r1 + add 8(%rsp), $r1 + + mov %rax, $r0 + imull $n0, %eax + and \$0x1fffffff, %eax + + mov %rbx, $r2 + imulq 16-128($ap), $r2 + add 16(%rsp), $r2 + + mov %rbx, $r3 + imulq 24-128($ap), $r3 + add 24(%rsp), $r3 + vpmuludq 32*1-128($ap),$Bi,$TEMP0 + vmovd %eax, $Yi + vpaddq $TEMP0,$ACC1,$ACC1 + vpmuludq 32*2-128($ap),$Bi,$TEMP1 + vpbroadcastq $Yi, $Yi + vpaddq $TEMP1,$ACC2,$ACC2 + vpmuludq 32*3-128($ap),$Bi,$TEMP2 + vpand $AND_MASK, $ACC3, $ACC3 # correct $ACC3 + vpaddq $TEMP2,$ACC3,$ACC3 + vpmuludq 32*4-128($ap),$Bi,$TEMP0 + vpaddq $TEMP0,$ACC4,$ACC4 + vpmuludq 32*5-128($ap),$Bi,$TEMP1 + vpaddq $TEMP1,$ACC5,$ACC5 + vpmuludq 32*6-128($ap),$Bi,$TEMP2 + vpaddq $TEMP2,$ACC6,$ACC6 + vpmuludq 32*7-128($ap),$Bi,$TEMP0 + vpermq \$0x93, $ACC9, $ACC9 # correct $ACC3 + vpaddq $TEMP0,$ACC7,$ACC7 + vpmuludq 32*8-128($ap),$Bi,$TEMP1 + vpbroadcastq 8($bp), $Bi + vpaddq $TEMP1,$ACC8,$ACC8 + + mov %rax,%rdx + imulq -128($np),%rax + add %rax,$r0 + mov %rdx,%rax + imulq 8-128($np),%rax + add %rax,$r1 + mov %rdx,%rax + imulq 16-128($np),%rax + add %rax,$r2 + shr \$29, $r0 + imulq 24-128($np),%rdx + add %rdx,$r3 + add $r0, $r1 + + vpmuludq 32*1-128($np),$Yi,$TEMP2 + vmovq $Bi, %rbx + vpaddq $TEMP2,$ACC1,$ACC1 + vpmuludq 32*2-128($np),$Yi,$TEMP0 + vpaddq $TEMP0,$ACC2,$ACC2 + vpmuludq 32*3-128($np),$Yi,$TEMP1 + vpaddq $TEMP1,$ACC3,$ACC3 + vpmuludq 32*4-128($np),$Yi,$TEMP2 + vpaddq $TEMP2,$ACC4,$ACC4 + vpmuludq 32*5-128($np),$Yi,$TEMP0 + vpaddq $TEMP0,$ACC5,$ACC5 + vpmuludq 32*6-128($np),$Yi,$TEMP1 + vpaddq $TEMP1,$ACC6,$ACC6 + vpmuludq 32*7-128($np),$Yi,$TEMP2 + vpblendd \$3, $ZERO, $ACC9, $ACC9 # correct $ACC3 + vpaddq $TEMP2,$ACC7,$ACC7 + vpmuludq 32*8-128($np),$Yi,$TEMP0 + vpaddq $ACC9, $ACC3, $ACC3 # correct $ACC3 + vpaddq $TEMP0,$ACC8,$ACC8 + + mov %rbx, %rax + imulq -128($ap),%rax + add %rax,$r1 + vmovdqu -8+32*1-128($ap),$TEMP1 + mov %rbx, %rax + imulq 8-128($ap),%rax + add %rax,$r2 + vmovdqu -8+32*2-128($ap),$TEMP2 + + mov $r1, %rax + imull $n0, %eax + and \$0x1fffffff, %eax + + imulq 16-128($ap),%rbx + add %rbx,$r3 + vpmuludq $Bi,$TEMP1,$TEMP1 + vmovd %eax, $Yi + vmovdqu -8+32*3-128($ap),$TEMP0 + vpaddq $TEMP1,$ACC1,$ACC1 + vpmuludq $Bi,$TEMP2,$TEMP2 + vpbroadcastq $Yi, $Yi + vmovdqu -8+32*4-128($ap),$TEMP1 + vpaddq $TEMP2,$ACC2,$ACC2 + vpmuludq $Bi,$TEMP0,$TEMP0 + vmovdqu -8+32*5-128($ap),$TEMP2 + vpaddq $TEMP0,$ACC3,$ACC3 + vpmuludq $Bi,$TEMP1,$TEMP1 + vmovdqu -8+32*6-128($ap),$TEMP0 + vpaddq $TEMP1,$ACC4,$ACC4 + vpmuludq $Bi,$TEMP2,$TEMP2 + vmovdqu -8+32*7-128($ap),$TEMP1 + vpaddq $TEMP2,$ACC5,$ACC5 + vpmuludq $Bi,$TEMP0,$TEMP0 + vmovdqu -8+32*8-128($ap),$TEMP2 + vpaddq $TEMP0,$ACC6,$ACC6 + vpmuludq $Bi,$TEMP1,$TEMP1 + vmovdqu -8+32*9-128($ap),$ACC9 + vpaddq $TEMP1,$ACC7,$ACC7 + vpmuludq $Bi,$TEMP2,$TEMP2 + vpaddq $TEMP2,$ACC8,$ACC8 + vpmuludq $Bi,$ACC9,$ACC9 + vpbroadcastq 16($bp), $Bi + + mov %rax,%rdx + imulq -128($np),%rax + add %rax,$r1 + vmovdqu -8+32*1-128($np),$TEMP0 + mov %rdx,%rax + imulq 8-128($np),%rax + add %rax,$r2 + vmovdqu -8+32*2-128($np),$TEMP1 + shr \$29, $r1 + imulq 16-128($np),%rdx + add %rdx,$r3 + add $r1, $r2 + + vpmuludq $Yi,$TEMP0,$TEMP0 + vmovq $Bi, %rbx + vmovdqu -8+32*3-128($np),$TEMP2 + vpaddq $TEMP0,$ACC1,$ACC1 + vpmuludq $Yi,$TEMP1,$TEMP1 + vmovdqu -8+32*4-128($np),$TEMP0 + vpaddq $TEMP1,$ACC2,$ACC2 + vpmuludq $Yi,$TEMP2,$TEMP2 + vmovdqu -8+32*5-128($np),$TEMP1 + vpaddq $TEMP2,$ACC3,$ACC3 + vpmuludq $Yi,$TEMP0,$TEMP0 + vmovdqu -8+32*6-128($np),$TEMP2 + vpaddq $TEMP0,$ACC4,$ACC4 + vpmuludq $Yi,$TEMP1,$TEMP1 + vmovdqu -8+32*7-128($np),$TEMP0 + vpaddq $TEMP1,$ACC5,$ACC5 + vpmuludq $Yi,$TEMP2,$TEMP2 + vmovdqu -8+32*8-128($np),$TEMP1 + vpaddq $TEMP2,$ACC6,$ACC6 + vpmuludq $Yi,$TEMP0,$TEMP0 + vmovdqu -8+32*9-128($np),$TEMP2 + vpaddq $TEMP0,$ACC7,$ACC7 + vpmuludq $Yi,$TEMP1,$TEMP1 + vpaddq $TEMP1,$ACC8,$ACC8 + vpmuludq $Yi,$TEMP2,$TEMP2 + vpaddq $TEMP2,$ACC9,$ACC9 + + vmovdqu -16+32*1-128($ap),$TEMP0 + mov %rbx,%rax + imulq -128($ap),%rax + add $r2,%rax + + vmovdqu -16+32*2-128($ap),$TEMP1 + mov %rax,$r2 + imull $n0, %eax + and \$0x1fffffff, %eax + + imulq 8-128($ap),%rbx + add %rbx,$r3 + vpmuludq $Bi,$TEMP0,$TEMP0 + vmovd %eax, $Yi + vmovdqu -16+32*3-128($ap),$TEMP2 + vpaddq $TEMP0,$ACC1,$ACC1 + vpmuludq $Bi,$TEMP1,$TEMP1 + vpbroadcastq $Yi, $Yi + vmovdqu -16+32*4-128($ap),$TEMP0 + vpaddq $TEMP1,$ACC2,$ACC2 + vpmuludq $Bi,$TEMP2,$TEMP2 + vmovdqu -16+32*5-128($ap),$TEMP1 + vpaddq $TEMP2,$ACC3,$ACC3 + vpmuludq $Bi,$TEMP0,$TEMP0 + vmovdqu -16+32*6-128($ap),$TEMP2 + vpaddq $TEMP0,$ACC4,$ACC4 + vpmuludq $Bi,$TEMP1,$TEMP1 + vmovdqu -16+32*7-128($ap),$TEMP0 + vpaddq $TEMP1,$ACC5,$ACC5 + vpmuludq $Bi,$TEMP2,$TEMP2 + vmovdqu -16+32*8-128($ap),$TEMP1 + vpaddq $TEMP2,$ACC6,$ACC6 + vpmuludq $Bi,$TEMP0,$TEMP0 + vmovdqu -16+32*9-128($ap),$TEMP2 + vpaddq $TEMP0,$ACC7,$ACC7 + vpmuludq $Bi,$TEMP1,$TEMP1 + vpaddq $TEMP1,$ACC8,$ACC8 + vpmuludq $Bi,$TEMP2,$TEMP2 + vpbroadcastq 24($bp), $Bi + vpaddq $TEMP2,$ACC9,$ACC9 + + vmovdqu -16+32*1-128($np),$TEMP0 + mov %rax,%rdx + imulq -128($np),%rax + add %rax,$r2 + vmovdqu -16+32*2-128($np),$TEMP1 + imulq 8-128($np),%rdx + add %rdx,$r3 + shr \$29, $r2 + + vpmuludq $Yi,$TEMP0,$TEMP0 + vmovq $Bi, %rbx + vmovdqu -16+32*3-128($np),$TEMP2 + vpaddq $TEMP0,$ACC1,$ACC1 + vpmuludq $Yi,$TEMP1,$TEMP1 + vmovdqu -16+32*4-128($np),$TEMP0 + vpaddq $TEMP1,$ACC2,$ACC2 + vpmuludq $Yi,$TEMP2,$TEMP2 + vmovdqu -16+32*5-128($np),$TEMP1 + vpaddq $TEMP2,$ACC3,$ACC3 + vpmuludq $Yi,$TEMP0,$TEMP0 + vmovdqu -16+32*6-128($np),$TEMP2 + vpaddq $TEMP0,$ACC4,$ACC4 + vpmuludq $Yi,$TEMP1,$TEMP1 + vmovdqu -16+32*7-128($np),$TEMP0 + vpaddq $TEMP1,$ACC5,$ACC5 + vpmuludq $Yi,$TEMP2,$TEMP2 + vmovdqu -16+32*8-128($np),$TEMP1 + vpaddq $TEMP2,$ACC6,$ACC6 + vpmuludq $Yi,$TEMP0,$TEMP0 + vmovdqu -16+32*9-128($np),$TEMP2 + vpaddq $TEMP0,$ACC7,$ACC7 + vpmuludq $Yi,$TEMP1,$TEMP1 + vmovdqu -24+32*1-128($ap),$TEMP0 + vpaddq $TEMP1,$ACC8,$ACC8 + vpmuludq $Yi,$TEMP2,$TEMP2 + vmovdqu -24+32*2-128($ap),$TEMP1 + vpaddq $TEMP2,$ACC9,$ACC9 + + add $r2, $r3 + imulq -128($ap),%rbx + add %rbx,$r3 + + mov $r3, %rax + imull $n0, %eax + and \$0x1fffffff, %eax + + vpmuludq $Bi,$TEMP0,$TEMP0 + vmovd %eax, $Yi + vmovdqu -24+32*3-128($ap),$TEMP2 + vpaddq $TEMP0,$ACC1,$ACC1 + vpmuludq $Bi,$TEMP1,$TEMP1 + vpbroadcastq $Yi, $Yi + vmovdqu -24+32*4-128($ap),$TEMP0 + vpaddq $TEMP1,$ACC2,$ACC2 + vpmuludq $Bi,$TEMP2,$TEMP2 + vmovdqu -24+32*5-128($ap),$TEMP1 + vpaddq $TEMP2,$ACC3,$ACC3 + vpmuludq $Bi,$TEMP0,$TEMP0 + vmovdqu -24+32*6-128($ap),$TEMP2 + vpaddq $TEMP0,$ACC4,$ACC4 + vpmuludq $Bi,$TEMP1,$TEMP1 + vmovdqu -24+32*7-128($ap),$TEMP0 + vpaddq $TEMP1,$ACC5,$ACC5 + vpmuludq $Bi,$TEMP2,$TEMP2 + vmovdqu -24+32*8-128($ap),$TEMP1 + vpaddq $TEMP2,$ACC6,$ACC6 + vpmuludq $Bi,$TEMP0,$TEMP0 + vmovdqu -24+32*9-128($ap),$TEMP2 + vpaddq $TEMP0,$ACC7,$ACC7 + vpmuludq $Bi,$TEMP1,$TEMP1 + vpaddq $TEMP1,$ACC8,$ACC8 + vpmuludq $Bi,$TEMP2,$TEMP2 + vpbroadcastq 32($bp), $Bi + vpaddq $TEMP2,$ACC9,$ACC9 + add \$32, $bp # $bp++ + + vmovdqu -24+32*1-128($np),$TEMP0 + imulq -128($np),%rax + add %rax,$r3 + shr \$29, $r3 + + vmovdqu -24+32*2-128($np),$TEMP1 + vpmuludq $Yi,$TEMP0,$TEMP0 + vmovq $Bi, %rbx + vmovdqu -24+32*3-128($np),$TEMP2 + vpaddq $TEMP0,$ACC1,$ACC0 # $ACC0==$TEMP0 + vpmuludq $Yi,$TEMP1,$TEMP1 + vmovdqu $ACC0, (%rsp) # transfer $r0-$r3 + vpaddq $TEMP1,$ACC2,$ACC1 + vmovdqu -24+32*4-128($np),$TEMP0 + vpmuludq $Yi,$TEMP2,$TEMP2 + vmovdqu -24+32*5-128($np),$TEMP1 + vpaddq $TEMP2,$ACC3,$ACC2 + vpmuludq $Yi,$TEMP0,$TEMP0 + vmovdqu -24+32*6-128($np),$TEMP2 + vpaddq $TEMP0,$ACC4,$ACC3 + vpmuludq $Yi,$TEMP1,$TEMP1 + vmovdqu -24+32*7-128($np),$TEMP0 + vpaddq $TEMP1,$ACC5,$ACC4 + vpmuludq $Yi,$TEMP2,$TEMP2 + vmovdqu -24+32*8-128($np),$TEMP1 + vpaddq $TEMP2,$ACC6,$ACC5 + vpmuludq $Yi,$TEMP0,$TEMP0 + vmovdqu -24+32*9-128($np),$TEMP2 + mov $r3, $r0 + vpaddq $TEMP0,$ACC7,$ACC6 + vpmuludq $Yi,$TEMP1,$TEMP1 + add (%rsp), $r0 + vpaddq $TEMP1,$ACC8,$ACC7 + vpmuludq $Yi,$TEMP2,$TEMP2 + vmovq $r3, $TEMP1 + vpaddq $TEMP2,$ACC9,$ACC8 + + dec $i + jnz .Loop_mul_1024 +___ + +# (*) Original implementation was correcting ACC1-ACC3 for overflow +# after 7 loop runs, or after 28 iterations, or 56 additions. +# But as we underutilize resources, it's possible to correct in +# each iteration with marginal performance loss. But then, as +# we do it in each iteration, we can correct less digits, and +# avoid performance penalties completely. Also note that we +# correct only three digits out of four. This works because +# most significant digit is subjected to less additions. + +$TEMP0 = $ACC9; +$TEMP3 = $Bi; +$TEMP4 = $Yi; +$code.=<<___; + vpermq \$0, $AND_MASK, $AND_MASK + vpaddq (%rsp), $TEMP1, $ACC0 + + vpsrlq \$29, $ACC0, $TEMP1 + vpand $AND_MASK, $ACC0, $ACC0 + vpsrlq \$29, $ACC1, $TEMP2 + vpand $AND_MASK, $ACC1, $ACC1 + vpsrlq \$29, $ACC2, $TEMP3 + vpermq \$0x93, $TEMP1, $TEMP1 + vpand $AND_MASK, $ACC2, $ACC2 + vpsrlq \$29, $ACC3, $TEMP4 + vpermq \$0x93, $TEMP2, $TEMP2 + vpand $AND_MASK, $ACC3, $ACC3 + + vpblendd \$3, $ZERO, $TEMP1, $TEMP0 + vpermq \$0x93, $TEMP3, $TEMP3 + vpblendd \$3, $TEMP1, $TEMP2, $TEMP1 + vpermq \$0x93, $TEMP4, $TEMP4 + vpaddq $TEMP0, $ACC0, $ACC0 + vpblendd \$3, $TEMP2, $TEMP3, $TEMP2 + vpaddq $TEMP1, $ACC1, $ACC1 + vpblendd \$3, $TEMP3, $TEMP4, $TEMP3 + vpaddq $TEMP2, $ACC2, $ACC2 + vpblendd \$3, $TEMP4, $ZERO, $TEMP4 + vpaddq $TEMP3, $ACC3, $ACC3 + vpaddq $TEMP4, $ACC4, $ACC4 + + vpsrlq \$29, $ACC0, $TEMP1 + vpand $AND_MASK, $ACC0, $ACC0 + vpsrlq \$29, $ACC1, $TEMP2 + vpand $AND_MASK, $ACC1, $ACC1 + vpsrlq \$29, $ACC2, $TEMP3 + vpermq \$0x93, $TEMP1, $TEMP1 + vpand $AND_MASK, $ACC2, $ACC2 + vpsrlq \$29, $ACC3, $TEMP4 + vpermq \$0x93, $TEMP2, $TEMP2 + vpand $AND_MASK, $ACC3, $ACC3 + vpermq \$0x93, $TEMP3, $TEMP3 + + vpblendd \$3, $ZERO, $TEMP1, $TEMP0 + vpermq \$0x93, $TEMP4, $TEMP4 + vpblendd \$3, $TEMP1, $TEMP2, $TEMP1 + vpaddq $TEMP0, $ACC0, $ACC0 + vpblendd \$3, $TEMP2, $TEMP3, $TEMP2 + vpaddq $TEMP1, $ACC1, $ACC1 + vpblendd \$3, $TEMP3, $TEMP4, $TEMP3 + vpaddq $TEMP2, $ACC2, $ACC2 + vpblendd \$3, $TEMP4, $ZERO, $TEMP4 + vpaddq $TEMP3, $ACC3, $ACC3 + vpaddq $TEMP4, $ACC4, $ACC4 + + vmovdqu $ACC0, 0-128($rp) + vmovdqu $ACC1, 32-128($rp) + vmovdqu $ACC2, 64-128($rp) + vmovdqu $ACC3, 96-128($rp) +___ + +$TEMP5=$ACC0; +$code.=<<___; + vpsrlq \$29, $ACC4, $TEMP1 + vpand $AND_MASK, $ACC4, $ACC4 + vpsrlq \$29, $ACC5, $TEMP2 + vpand $AND_MASK, $ACC5, $ACC5 + vpsrlq \$29, $ACC6, $TEMP3 + vpermq \$0x93, $TEMP1, $TEMP1 + vpand $AND_MASK, $ACC6, $ACC6 + vpsrlq \$29, $ACC7, $TEMP4 + vpermq \$0x93, $TEMP2, $TEMP2 + vpand $AND_MASK, $ACC7, $ACC7 + vpsrlq \$29, $ACC8, $TEMP5 + vpermq \$0x93, $TEMP3, $TEMP3 + vpand $AND_MASK, $ACC8, $ACC8 + vpermq \$0x93, $TEMP4, $TEMP4 + + vpblendd \$3, $ZERO, $TEMP1, $TEMP0 + vpermq \$0x93, $TEMP5, $TEMP5 + vpblendd \$3, $TEMP1, $TEMP2, $TEMP1 + vpaddq $TEMP0, $ACC4, $ACC4 + vpblendd \$3, $TEMP2, $TEMP3, $TEMP2 + vpaddq $TEMP1, $ACC5, $ACC5 + vpblendd \$3, $TEMP3, $TEMP4, $TEMP3 + vpaddq $TEMP2, $ACC6, $ACC6 + vpblendd \$3, $TEMP4, $TEMP5, $TEMP4 + vpaddq $TEMP3, $ACC7, $ACC7 + vpaddq $TEMP4, $ACC8, $ACC8 + + vpsrlq \$29, $ACC4, $TEMP1 + vpand $AND_MASK, $ACC4, $ACC4 + vpsrlq \$29, $ACC5, $TEMP2 + vpand $AND_MASK, $ACC5, $ACC5 + vpsrlq \$29, $ACC6, $TEMP3 + vpermq \$0x93, $TEMP1, $TEMP1 + vpand $AND_MASK, $ACC6, $ACC6 + vpsrlq \$29, $ACC7, $TEMP4 + vpermq \$0x93, $TEMP2, $TEMP2 + vpand $AND_MASK, $ACC7, $ACC7 + vpsrlq \$29, $ACC8, $TEMP5 + vpermq \$0x93, $TEMP3, $TEMP3 + vpand $AND_MASK, $ACC8, $ACC8 + vpermq \$0x93, $TEMP4, $TEMP4 + + vpblendd \$3, $ZERO, $TEMP1, $TEMP0 + vpermq \$0x93, $TEMP5, $TEMP5 + vpblendd \$3, $TEMP1, $TEMP2, $TEMP1 + vpaddq $TEMP0, $ACC4, $ACC4 + vpblendd \$3, $TEMP2, $TEMP3, $TEMP2 + vpaddq $TEMP1, $ACC5, $ACC5 + vpblendd \$3, $TEMP3, $TEMP4, $TEMP3 + vpaddq $TEMP2, $ACC6, $ACC6 + vpblendd \$3, $TEMP4, $TEMP5, $TEMP4 + vpaddq $TEMP3, $ACC7, $ACC7 + vpaddq $TEMP4, $ACC8, $ACC8 + + vmovdqu $ACC4, 128-128($rp) + vmovdqu $ACC5, 160-128($rp) + vmovdqu $ACC6, 192-128($rp) + vmovdqu $ACC7, 224-128($rp) + vmovdqu $ACC8, 256-128($rp) + vzeroupper + + mov %rbp, %rax +___ +$code.=<<___ if ($win64); + movaps -0xd8(%rax),%xmm6 + movaps -0xc8(%rax),%xmm7 + movaps -0xb8(%rax),%xmm8 + movaps -0xa8(%rax),%xmm9 + movaps -0x98(%rax),%xmm10 + movaps -0x88(%rax),%xmm11 + movaps -0x78(%rax),%xmm12 + movaps -0x68(%rax),%xmm13 + movaps -0x58(%rax),%xmm14 + movaps -0x48(%rax),%xmm15 +___ +$code.=<<___; + mov -48(%rax),%r15 + mov -40(%rax),%r14 + mov -32(%rax),%r13 + mov -24(%rax),%r12 + mov -16(%rax),%rbp + mov -8(%rax),%rbx + lea (%rax),%rsp # restore %rsp +.Lmul_1024_epilogue: + ret +.size rsaz_1024_mul_avx2,.-rsaz_1024_mul_avx2 +___ +} +{ +my ($out,$inp) = $win64 ? ("%rcx","%rdx") : ("%rdi","%rsi"); +my @T = map("%r$_",(8..11)); + +$code.=<<___; +.globl rsaz_1024_red2norm_avx2 +.type rsaz_1024_red2norm_avx2,\@abi-omnipotent +.align 32 +rsaz_1024_red2norm_avx2: + sub \$-128,$inp # size optimization + xor %rax,%rax +___ + +for ($j=0,$i=0; $i<16; $i++) { + my $k=0; + while (29*$j<64*($i+1)) { # load data till boundary + $code.=" mov `8*$j-128`($inp), @T[0]\n"; + $j++; $k++; push(@T,shift(@T)); + } + $l=$k; + while ($k>1) { # shift loaded data but last value + $code.=" shl \$`29*($j-$k)`,@T[-$k]\n"; + $k--; + } + $code.=<<___; # shift last value + mov @T[-1], @T[0] + shl \$`29*($j-1)`, @T[-1] + shr \$`-29*($j-1)`, @T[0] +___ + while ($l) { # accumulate all values + $code.=" add @T[-$l], %rax\n"; + $l--; + } + $code.=<<___; + adc \$0, @T[0] # consume eventual carry + mov %rax, 8*$i($out) + mov @T[0], %rax +___ + push(@T,shift(@T)); +} +$code.=<<___; + ret +.size rsaz_1024_red2norm_avx2,.-rsaz_1024_red2norm_avx2 + +.globl rsaz_1024_norm2red_avx2 +.type rsaz_1024_norm2red_avx2,\@abi-omnipotent +.align 32 +rsaz_1024_norm2red_avx2: + sub \$-128,$out # size optimization + mov ($inp),@T[0] + mov \$0x1fffffff,%eax +___ +for ($j=0,$i=0; $i<16; $i++) { + $code.=" mov `8*($i+1)`($inp),@T[1]\n" if ($i<15); + $code.=" xor @T[1],@T[1]\n" if ($i==15); + my $k=1; + while (29*($j+1)<64*($i+1)) { + $code.=<<___; + mov @T[0],@T[-$k] + shr \$`29*$j`,@T[-$k] + and %rax,@T[-$k] # &0x1fffffff + mov @T[-$k],`8*$j-128`($out) +___ + $j++; $k++; + } + $code.=<<___; + shrd \$`29*$j`,@T[1],@T[0] + and %rax,@T[0] + mov @T[0],`8*$j-128`($out) +___ + $j++; + push(@T,shift(@T)); +} +$code.=<<___; + mov @T[0],`8*$j-128`($out) # zero + mov @T[0],`8*($j+1)-128`($out) + mov @T[0],`8*($j+2)-128`($out) + mov @T[0],`8*($j+3)-128`($out) + ret +.size rsaz_1024_norm2red_avx2,.-rsaz_1024_norm2red_avx2 +___ +} +{ +my ($out,$inp,$power) = $win64 ? ("%rcx","%rdx","%r8d") : ("%rdi","%rsi","%edx"); + +$code.=<<___; +.globl rsaz_1024_scatter5_avx2 +.type rsaz_1024_scatter5_avx2,\@abi-omnipotent +.align 32 +rsaz_1024_scatter5_avx2: + vzeroupper + vmovdqu .Lscatter_permd(%rip),%ymm5 + shl \$4,$power + lea ($out,$power),$out + mov \$9,%eax + jmp .Loop_scatter_1024 + +.align 32 +.Loop_scatter_1024: + vmovdqu ($inp),%ymm0 + lea 32($inp),$inp + vpermd %ymm0,%ymm5,%ymm0 + vmovdqu %xmm0,($out) + lea 16*32($out),$out + dec %eax + jnz .Loop_scatter_1024 + + vzeroupper + ret +.size rsaz_1024_scatter5_avx2,.-rsaz_1024_scatter5_avx2 + +.globl rsaz_1024_gather5_avx2 +.type rsaz_1024_gather5_avx2,\@abi-omnipotent +.align 32 +rsaz_1024_gather5_avx2: +___ +$code.=<<___ if ($win64); + lea -0x88(%rsp),%rax + vzeroupper +.LSEH_begin_rsaz_1024_gather5: + # I can't trust assembler to use specific encoding:-( + .byte 0x48,0x8d,0x60,0xe0 #lea -0x20(%rax),%rsp + .byte 0xc5,0xf8,0x29,0x70,0xe0 #vmovaps %xmm6,-0x20(%rax) + .byte 0xc5,0xf8,0x29,0x78,0xf0 #vmovaps %xmm7,-0x10(%rax) + .byte 0xc5,0x78,0x29,0x40,0x00 #vmovaps %xmm8,0(%rax) + .byte 0xc5,0x78,0x29,0x48,0x10 #vmovaps %xmm9,0x10(%rax) + .byte 0xc5,0x78,0x29,0x50,0x20 #vmovaps %xmm10,0x20(%rax) + .byte 0xc5,0x78,0x29,0x58,0x30 #vmovaps %xmm11,0x30(%rax) + .byte 0xc5,0x78,0x29,0x60,0x40 #vmovaps %xmm12,0x40(%rax) + .byte 0xc5,0x78,0x29,0x68,0x50 #vmovaps %xmm13,0x50(%rax) + .byte 0xc5,0x78,0x29,0x70,0x60 #vmovaps %xmm14,0x60(%rax) + .byte 0xc5,0x78,0x29,0x78,0x70 #vmovaps %xmm15,0x70(%rax) +___ +$code.=<<___; + lea .Lgather_table(%rip),%r11 + mov $power,%eax + and \$3,$power + shr \$2,%eax # cache line number + shl \$4,$power # offset within cache line + + vmovdqu -32(%r11),%ymm7 # .Lgather_permd + vpbroadcastb 8(%r11,%rax), %xmm8 + vpbroadcastb 7(%r11,%rax), %xmm9 + vpbroadcastb 6(%r11,%rax), %xmm10 + vpbroadcastb 5(%r11,%rax), %xmm11 + vpbroadcastb 4(%r11,%rax), %xmm12 + vpbroadcastb 3(%r11,%rax), %xmm13 + vpbroadcastb 2(%r11,%rax), %xmm14 + vpbroadcastb 1(%r11,%rax), %xmm15 + + lea 64($inp,$power),$inp + mov \$64,%r11 # size optimization + mov \$9,%eax + jmp .Loop_gather_1024 + +.align 32 +.Loop_gather_1024: + vpand -64($inp), %xmm8,%xmm0 + vpand ($inp), %xmm9,%xmm1 + vpand 64($inp), %xmm10,%xmm2 + vpand ($inp,%r11,2), %xmm11,%xmm3 + vpor %xmm0,%xmm1,%xmm1 + vpand 64($inp,%r11,2), %xmm12,%xmm4 + vpor %xmm2,%xmm3,%xmm3 + vpand ($inp,%r11,4), %xmm13,%xmm5 + vpor %xmm1,%xmm3,%xmm3 + vpand 64($inp,%r11,4), %xmm14,%xmm6 + vpor %xmm4,%xmm5,%xmm5 + vpand -128($inp,%r11,8), %xmm15,%xmm2 + lea ($inp,%r11,8),$inp + vpor %xmm3,%xmm5,%xmm5 + vpor %xmm2,%xmm6,%xmm6 + vpor %xmm5,%xmm6,%xmm6 + vpermd %ymm6,%ymm7,%ymm6 + vmovdqu %ymm6,($out) + lea 32($out),$out + dec %eax + jnz .Loop_gather_1024 + + vpxor %ymm0,%ymm0,%ymm0 + vmovdqu %ymm0,($out) + vzeroupper +___ +$code.=<<___ if ($win64); + movaps (%rsp),%xmm6 + movaps 0x10(%rsp),%xmm7 + movaps 0x20(%rsp),%xmm8 + movaps 0x30(%rsp),%xmm9 + movaps 0x40(%rsp),%xmm10 + movaps 0x50(%rsp),%xmm11 + movaps 0x60(%rsp),%xmm12 + movaps 0x70(%rsp),%xmm13 + movaps 0x80(%rsp),%xmm14 + movaps 0x90(%rsp),%xmm15 + lea 0xa8(%rsp),%rsp +.LSEH_end_rsaz_1024_gather5: +___ +$code.=<<___; + ret +.size rsaz_1024_gather5_avx2,.-rsaz_1024_gather5_avx2 +___ +} + +$code.=<<___; +.extern OPENSSL_ia32cap_P +.globl rsaz_avx2_eligible +.type rsaz_avx2_eligible,\@abi-omnipotent +.align 32 +rsaz_avx2_eligible: + mov OPENSSL_ia32cap_P+8(%rip),%eax +___ +$code.=<<___ if ($addx); + mov \$`1<<8|1<<19`,%ecx + mov \$0,%edx + and %eax,%ecx + cmp \$`1<<8|1<<19`,%ecx # check for BMI2+AD*X + cmove %edx,%eax +___ +$code.=<<___; + and \$`1<<5`,%eax + shr \$5,%eax + ret +.size rsaz_avx2_eligible,.-rsaz_avx2_eligible + +.align 64 +.Land_mask: + .quad 0x1fffffff,0x1fffffff,0x1fffffff,-1 +.Lscatter_permd: + .long 0,2,4,6,7,7,7,7 +.Lgather_permd: + .long 0,7,1,7,2,7,3,7 +.Lgather_table: + .byte 0,0,0,0,0,0,0,0, 0xff,0,0,0,0,0,0,0 +.align 64 +___ + +if ($win64) { +$rec="%rcx"; +$frame="%rdx"; +$context="%r8"; +$disp="%r9"; + +$code.=<<___ +.extern __imp_RtlVirtualUnwind +.type rsaz_se_handler,\@abi-omnipotent +.align 16 +rsaz_se_handler: + push %rsi + push %rdi + push %rbx + push %rbp + push %r12 + push %r13 + push %r14 + push %r15 + pushfq + sub \$64,%rsp + + mov 120($context),%rax # pull context->Rax + mov 248($context),%rbx # pull context->Rip + + mov 8($disp),%rsi # disp->ImageBase + mov 56($disp),%r11 # disp->HandlerData + + mov 0(%r11),%r10d # HandlerData[0] + lea (%rsi,%r10),%r10 # prologue label + cmp %r10,%rbx # context->RipRsp + + mov 4(%r11),%r10d # HandlerData[1] + lea (%rsi,%r10),%r10 # epilogue label + cmp %r10,%rbx # context->Rip>=epilogue label + jae .Lcommon_seh_tail + + mov 160($context),%rax # pull context->Rbp + + mov -48(%rax),%r15 + mov -40(%rax),%r14 + mov -32(%rax),%r13 + mov -24(%rax),%r12 + mov -16(%rax),%rbp + mov -8(%rax),%rbx + mov %r15,240($context) + mov %r14,232($context) + mov %r13,224($context) + mov %r12,216($context) + mov %rbp,160($context) + mov %rbx,144($context) + + lea -0xd8(%rax),%rsi # %xmm save area + lea 512($context),%rdi # & context.Xmm6 + mov \$20,%ecx # 10*sizeof(%xmm0)/sizeof(%rax) + .long 0xa548f3fc # cld; rep movsq + +.Lcommon_seh_tail: + mov 8(%rax),%rdi + mov 16(%rax),%rsi + mov %rax,152($context) # restore context->Rsp + mov %rsi,168($context) # restore context->Rsi + mov %rdi,176($context) # restore context->Rdi + + mov 40($disp),%rdi # disp->ContextRecord + mov $context,%rsi # context + mov \$154,%ecx # sizeof(CONTEXT) + .long 0xa548f3fc # cld; rep movsq + + mov $disp,%rsi + xor %rcx,%rcx # arg1, UNW_FLAG_NHANDLER + mov 8(%rsi),%rdx # arg2, disp->ImageBase + mov 0(%rsi),%r8 # arg3, disp->ControlPc + mov 16(%rsi),%r9 # arg4, disp->FunctionEntry + mov 40(%rsi),%r10 # disp->ContextRecord + lea 56(%rsi),%r11 # &disp->HandlerData + lea 24(%rsi),%r12 # &disp->EstablisherFrame + mov %r10,32(%rsp) # arg5 + mov %r11,40(%rsp) # arg6 + mov %r12,48(%rsp) # arg7 + mov %rcx,56(%rsp) # arg8, (NULL) + call *__imp_RtlVirtualUnwind(%rip) + + mov \$1,%eax # ExceptionContinueSearch + add \$64,%rsp + popfq + pop %r15 + pop %r14 + pop %r13 + pop %r12 + pop %rbp + pop %rbx + pop %rdi + pop %rsi + ret +.size rsaz_se_handler,.-rsaz_se_handler + +.section .pdata +.align 4 + .rva .LSEH_begin_rsaz_1024_sqr_avx2 + .rva .LSEH_end_rsaz_1024_sqr_avx2 + .rva .LSEH_info_rsaz_1024_sqr_avx2 + + .rva .LSEH_begin_rsaz_1024_mul_avx2 + .rva .LSEH_end_rsaz_1024_mul_avx2 + .rva .LSEH_info_rsaz_1024_mul_avx2 + + .rva .LSEH_begin_rsaz_1024_gather5 + .rva .LSEH_end_rsaz_1024_gather5 + .rva .LSEH_info_rsaz_1024_gather5 +.section .xdata +.align 8 +.LSEH_info_rsaz_1024_sqr_avx2: + .byte 9,0,0,0 + .rva rsaz_se_handler + .rva .Lsqr_1024_body,.Lsqr_1024_epilogue +.LSEH_info_rsaz_1024_mul_avx2: + .byte 9,0,0,0 + .rva rsaz_se_handler + .rva .Lmul_1024_body,.Lmul_1024_epilogue +.LSEH_info_rsaz_1024_gather5: + .byte 0x01,0x33,0x16,0x00 + .byte 0x36,0xf8,0x09,0x00 #vmovaps 0x90(rsp),xmm15 + .byte 0x31,0xe8,0x08,0x00 #vmovaps 0x80(rsp),xmm14 + .byte 0x2c,0xd8,0x07,0x00 #vmovaps 0x70(rsp),xmm13 + .byte 0x27,0xc8,0x06,0x00 #vmovaps 0x60(rsp),xmm12 + .byte 0x22,0xb8,0x05,0x00 #vmovaps 0x50(rsp),xmm11 + .byte 0x1d,0xa8,0x04,0x00 #vmovaps 0x40(rsp),xmm10 + .byte 0x18,0x98,0x03,0x00 #vmovaps 0x30(rsp),xmm9 + .byte 0x13,0x88,0x02,0x00 #vmovaps 0x20(rsp),xmm8 + .byte 0x0e,0x78,0x01,0x00 #vmovaps 0x10(rsp),xmm7 + .byte 0x09,0x68,0x00,0x00 #vmovaps 0x00(rsp),xmm6 + .byte 0x04,0x01,0x15,0x00 #sub rsp,0xa8 +___ +} + +foreach (split("\n",$code)) { + s/\`([^\`]*)\`/eval($1)/ge; + + s/\b(sh[rl]d?\s+\$)(-?[0-9]+)/$1.$2%64/ge or + + s/\b(vmov[dq])\b(.+)%ymm([0-9]+)/$1$2%xmm$3/go or + s/\b(vmovdqu)\b(.+)%x%ymm([0-9]+)/$1$2%xmm$3/go or + s/\b(vpinsr[qd])\b(.+)%ymm([0-9]+)/$1$2%xmm$3/go or + s/\b(vpextr[qd])\b(.+)%ymm([0-9]+)/$1$2%xmm$3/go or + s/\b(vpbroadcast[qd]\s+)%ymm([0-9]+)/$1%xmm$2/go; + print $_,"\n"; +} + +}}} else {{{ +print <<___; # assembler is too old +.text + +.globl rsaz_avx2_eligible +.type rsaz_avx2_eligible,\@abi-omnipotent +rsaz_avx2_eligible: + xor %eax,%eax + ret +.size rsaz_avx2_eligible,.-rsaz_avx2_eligible + +.globl rsaz_1024_sqr_avx2 +.globl rsaz_1024_mul_avx2 +.globl rsaz_1024_norm2red_avx2 +.globl rsaz_1024_red2norm_avx2 +.globl rsaz_1024_scatter5_avx2 +.globl rsaz_1024_gather5_avx2 +.type rsaz_1024_sqr_avx2,\@abi-omnipotent +rsaz_1024_sqr_avx2: +rsaz_1024_mul_avx2: +rsaz_1024_norm2red_avx2: +rsaz_1024_red2norm_avx2: +rsaz_1024_scatter5_avx2: +rsaz_1024_gather5_avx2: + .byte 0x0f,0x0b # ud2 + ret +.size rsaz_1024_sqr_avx2,.-rsaz_1024_sqr_avx2 +___ +}}} + +close STDOUT; diff --git a/deps/openssl/openssl/crypto/bn/asm/rsaz-x86_64.pl b/deps/openssl/openssl/crypto/bn/asm/rsaz-x86_64.pl new file mode 100755 index 00000000000000..3bd45dbac01d5b --- /dev/null +++ b/deps/openssl/openssl/crypto/bn/asm/rsaz-x86_64.pl @@ -0,0 +1,2144 @@ +#!/usr/bin/env perl + +############################################################################## +# # +# Copyright (c) 2012, Intel Corporation # +# # +# All rights reserved. # +# # +# Redistribution and use in source and binary forms, with or without # +# modification, are permitted provided that the following conditions are # +# met: # +# # +# * Redistributions of source code must retain the above copyright # +# notice, this list of conditions and the following disclaimer. # +# # +# * Redistributions in binary form must reproduce the above copyright # +# notice, this list of conditions and the following disclaimer in the # +# documentation and/or other materials provided with the # +# distribution. # +# # +# * Neither the name of the Intel Corporation nor the names of its # +# contributors may be used to endorse or promote products derived from # +# this software without specific prior written permission. # +# # +# # +# THIS SOFTWARE IS PROVIDED BY INTEL CORPORATION ""AS IS"" AND ANY # +# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE # +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR # +# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL CORPORATION OR # +# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, # +# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, # +# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR # +# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF # +# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING # +# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS # +# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # +# # +############################################################################## +# Developers and authors: # +# Shay Gueron (1, 2), and Vlad Krasnov (1) # +# (1) Intel Architecture Group, Microprocessor and Chipset Development, # +# Israel Development Center, Haifa, Israel # +# (2) University of Haifa # +############################################################################## +# Reference: # +# [1] S. Gueron, "Efficient Software Implementations of Modular # +# Exponentiation", http://eprint.iacr.org/2011/239 # +# [2] S. Gueron, V. Krasnov. "Speeding up Big-Numbers Squaring". # +# IEEE Proceedings of 9th International Conference on Information # +# Technology: New Generations (ITNG 2012), 821-823 (2012). # +# [3] S. Gueron, Efficient Software Implementations of Modular Exponentiation# +# Journal of Cryptographic Engineering 2:31-43 (2012). # +# [4] S. Gueron, V. Krasnov: "[PATCH] Efficient and side channel analysis # +# resistant 512-bit and 1024-bit modular exponentiation for optimizing # +# RSA1024 and RSA2048 on x86_64 platforms", # +# http://rt.openssl.org/Ticket/Display.html?id=2582&user=guest&pass=guest# +############################################################################## + +# While original submission covers 512- and 1024-bit exponentiation, +# this module is limited to 512-bit version only (and as such +# accelerates RSA1024 sign). This is because improvement for longer +# keys is not high enough to justify the effort, highest measured +# was ~5% on Westmere. [This is relative to OpenSSL 1.0.2, upcoming +# for the moment of this writing!] Nor does this module implement +# "monolithic" complete exponentiation jumbo-subroutine, but adheres +# to more modular mixture of C and assembly. And it's optimized even +# for processors other than Intel Core family (see table below for +# improvement coefficients). +# +# +# RSA1024 sign/sec this/original |this/rsax(*) this/fips(*) +# ----------------+--------------------------- +# Opteron +13% |+5% +20% +# Bulldozer -0% |-1% +10% +# P4 +11% |+7% +8% +# Westmere +5% |+14% +17% +# Sandy Bridge +2% |+12% +29% +# Ivy Bridge +1% |+11% +35% +# Haswell(**) -0% |+12% +39% +# Atom +13% |+11% +4% +# VIA Nano +70% |+9% +25% +# +# (*) rsax engine and fips numbers are presented for reference +# purposes; +# (**) MULX was attempted, but found to give only marginal improvement; + +$flavour = shift; +$output = shift; +if ($flavour =~ /\./) { $output = $flavour; undef $flavour; } + +$win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/); + +$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; +( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or +( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or +die "can't locate x86_64-xlate.pl"; + +open OUT,"| \"$^X\" $xlate $flavour $output"; +*STDOUT=*OUT; + +if (`$ENV{CC} -Wa,-v -c -o /dev/null -x assembler /dev/null 2>&1` + =~ /GNU assembler version ([2-9]\.[0-9]+)/) { + $addx = ($1>=2.23); +} + +if (!$addx && $win64 && ($flavour =~ /nasm/ || $ENV{ASM} =~ /nasm/) && + `nasm -v 2>&1` =~ /NASM version ([2-9]\.[0-9]+)/) { + $addx = ($1>=2.10); +} + +if (!$addx && $win64 && ($flavour =~ /masm/ || $ENV{ASM} =~ /ml64/) && + `ml64 2>&1` =~ /Version ([0-9]+)\./) { + $addx = ($1>=12); +} + +if (!$addx && `$ENV{CC} -v 2>&1` =~ /(^clang version|based on LLVM) ([3-9])\.([0-9]+)/) { + my $ver = $2 + $3/100.0; # 3.1->3.01, 3.10->3.10 + $addx = ($ver>=3.03); +} + +($out, $inp, $mod) = ("%rdi", "%rsi", "%rbp"); # common internal API +{ +my ($out,$inp,$mod,$n0,$times) = ("%rdi","%rsi","%rdx","%rcx","%r8d"); + +$code.=<<___; +.text + +.extern OPENSSL_ia32cap_P + +.globl rsaz_512_sqr +.type rsaz_512_sqr,\@function,5 +.align 32 +rsaz_512_sqr: # 25-29% faster than rsaz_512_mul + push %rbx + push %rbp + push %r12 + push %r13 + push %r14 + push %r15 + + subq \$128+24, %rsp +.Lsqr_body: + movq $mod, %rbp # common argument + movq ($inp), %rdx + movq 8($inp), %rax + movq $n0, 128(%rsp) +___ +$code.=<<___ if ($addx); + movl \$0x80100,%r11d + andl OPENSSL_ia32cap_P+8(%rip),%r11d + cmpl \$0x80100,%r11d # check for MULX and ADO/CX + je .Loop_sqrx +___ +$code.=<<___; + jmp .Loop_sqr + +.align 32 +.Loop_sqr: + movl $times,128+8(%rsp) +#first iteration + movq %rdx, %rbx + mulq %rdx + movq %rax, %r8 + movq 16($inp), %rax + movq %rdx, %r9 + + mulq %rbx + addq %rax, %r9 + movq 24($inp), %rax + movq %rdx, %r10 + adcq \$0, %r10 + + mulq %rbx + addq %rax, %r10 + movq 32($inp), %rax + movq %rdx, %r11 + adcq \$0, %r11 + + mulq %rbx + addq %rax, %r11 + movq 40($inp), %rax + movq %rdx, %r12 + adcq \$0, %r12 + + mulq %rbx + addq %rax, %r12 + movq 48($inp), %rax + movq %rdx, %r13 + adcq \$0, %r13 + + mulq %rbx + addq %rax, %r13 + movq 56($inp), %rax + movq %rdx, %r14 + adcq \$0, %r14 + + mulq %rbx + addq %rax, %r14 + movq %rbx, %rax + movq %rdx, %r15 + adcq \$0, %r15 + + addq %r8, %r8 #shlq \$1, %r8 + movq %r9, %rcx + adcq %r9, %r9 #shld \$1, %r8, %r9 + + mulq %rax + movq %rax, (%rsp) + addq %rdx, %r8 + adcq \$0, %r9 + + movq %r8, 8(%rsp) + shrq \$63, %rcx + +#second iteration + movq 8($inp), %r8 + movq 16($inp), %rax + mulq %r8 + addq %rax, %r10 + movq 24($inp), %rax + movq %rdx, %rbx + adcq \$0, %rbx + + mulq %r8 + addq %rax, %r11 + movq 32($inp), %rax + adcq \$0, %rdx + addq %rbx, %r11 + movq %rdx, %rbx + adcq \$0, %rbx + + mulq %r8 + addq %rax, %r12 + movq 40($inp), %rax + adcq \$0, %rdx + addq %rbx, %r12 + movq %rdx, %rbx + adcq \$0, %rbx + + mulq %r8 + addq %rax, %r13 + movq 48($inp), %rax + adcq \$0, %rdx + addq %rbx, %r13 + movq %rdx, %rbx + adcq \$0, %rbx + + mulq %r8 + addq %rax, %r14 + movq 56($inp), %rax + adcq \$0, %rdx + addq %rbx, %r14 + movq %rdx, %rbx + adcq \$0, %rbx + + mulq %r8 + addq %rax, %r15 + movq %r8, %rax + adcq \$0, %rdx + addq %rbx, %r15 + movq %rdx, %r8 + movq %r10, %rdx + adcq \$0, %r8 + + add %rdx, %rdx + lea (%rcx,%r10,2), %r10 #shld \$1, %rcx, %r10 + movq %r11, %rbx + adcq %r11, %r11 #shld \$1, %r10, %r11 + + mulq %rax + addq %rax, %r9 + adcq %rdx, %r10 + adcq \$0, %r11 + + movq %r9, 16(%rsp) + movq %r10, 24(%rsp) + shrq \$63, %rbx + +#third iteration + movq 16($inp), %r9 + movq 24($inp), %rax + mulq %r9 + addq %rax, %r12 + movq 32($inp), %rax + movq %rdx, %rcx + adcq \$0, %rcx + + mulq %r9 + addq %rax, %r13 + movq 40($inp), %rax + adcq \$0, %rdx + addq %rcx, %r13 + movq %rdx, %rcx + adcq \$0, %rcx + + mulq %r9 + addq %rax, %r14 + movq 48($inp), %rax + adcq \$0, %rdx + addq %rcx, %r14 + movq %rdx, %rcx + adcq \$0, %rcx + + mulq %r9 + movq %r12, %r10 + lea (%rbx,%r12,2), %r12 #shld \$1, %rbx, %r12 + addq %rax, %r15 + movq 56($inp), %rax + adcq \$0, %rdx + addq %rcx, %r15 + movq %rdx, %rcx + adcq \$0, %rcx + + mulq %r9 + shrq \$63, %r10 + addq %rax, %r8 + movq %r9, %rax + adcq \$0, %rdx + addq %rcx, %r8 + movq %rdx, %r9 + adcq \$0, %r9 + + movq %r13, %rcx + leaq (%r10,%r13,2), %r13 #shld \$1, %r12, %r13 + + mulq %rax + addq %rax, %r11 + adcq %rdx, %r12 + adcq \$0, %r13 + + movq %r11, 32(%rsp) + movq %r12, 40(%rsp) + shrq \$63, %rcx + +#fourth iteration + movq 24($inp), %r10 + movq 32($inp), %rax + mulq %r10 + addq %rax, %r14 + movq 40($inp), %rax + movq %rdx, %rbx + adcq \$0, %rbx + + mulq %r10 + addq %rax, %r15 + movq 48($inp), %rax + adcq \$0, %rdx + addq %rbx, %r15 + movq %rdx, %rbx + adcq \$0, %rbx + + mulq %r10 + movq %r14, %r12 + leaq (%rcx,%r14,2), %r14 #shld \$1, %rcx, %r14 + addq %rax, %r8 + movq 56($inp), %rax + adcq \$0, %rdx + addq %rbx, %r8 + movq %rdx, %rbx + adcq \$0, %rbx + + mulq %r10 + shrq \$63, %r12 + addq %rax, %r9 + movq %r10, %rax + adcq \$0, %rdx + addq %rbx, %r9 + movq %rdx, %r10 + adcq \$0, %r10 + + movq %r15, %rbx + leaq (%r12,%r15,2),%r15 #shld \$1, %r14, %r15 + + mulq %rax + addq %rax, %r13 + adcq %rdx, %r14 + adcq \$0, %r15 + + movq %r13, 48(%rsp) + movq %r14, 56(%rsp) + shrq \$63, %rbx + +#fifth iteration + movq 32($inp), %r11 + movq 40($inp), %rax + mulq %r11 + addq %rax, %r8 + movq 48($inp), %rax + movq %rdx, %rcx + adcq \$0, %rcx + + mulq %r11 + addq %rax, %r9 + movq 56($inp), %rax + adcq \$0, %rdx + movq %r8, %r12 + leaq (%rbx,%r8,2), %r8 #shld \$1, %rbx, %r8 + addq %rcx, %r9 + movq %rdx, %rcx + adcq \$0, %rcx + + mulq %r11 + shrq \$63, %r12 + addq %rax, %r10 + movq %r11, %rax + adcq \$0, %rdx + addq %rcx, %r10 + movq %rdx, %r11 + adcq \$0, %r11 + + movq %r9, %rcx + leaq (%r12,%r9,2), %r9 #shld \$1, %r8, %r9 + + mulq %rax + addq %rax, %r15 + adcq %rdx, %r8 + adcq \$0, %r9 + + movq %r15, 64(%rsp) + movq %r8, 72(%rsp) + shrq \$63, %rcx + +#sixth iteration + movq 40($inp), %r12 + movq 48($inp), %rax + mulq %r12 + addq %rax, %r10 + movq 56($inp), %rax + movq %rdx, %rbx + adcq \$0, %rbx + + mulq %r12 + addq %rax, %r11 + movq %r12, %rax + movq %r10, %r15 + leaq (%rcx,%r10,2), %r10 #shld \$1, %rcx, %r10 + adcq \$0, %rdx + shrq \$63, %r15 + addq %rbx, %r11 + movq %rdx, %r12 + adcq \$0, %r12 + + movq %r11, %rbx + leaq (%r15,%r11,2), %r11 #shld \$1, %r10, %r11 + + mulq %rax + addq %rax, %r9 + adcq %rdx, %r10 + adcq \$0, %r11 + + movq %r9, 80(%rsp) + movq %r10, 88(%rsp) + +#seventh iteration + movq 48($inp), %r13 + movq 56($inp), %rax + mulq %r13 + addq %rax, %r12 + movq %r13, %rax + movq %rdx, %r13 + adcq \$0, %r13 + + xorq %r14, %r14 + shlq \$1, %rbx + adcq %r12, %r12 #shld \$1, %rbx, %r12 + adcq %r13, %r13 #shld \$1, %r12, %r13 + adcq %r14, %r14 #shld \$1, %r13, %r14 + + mulq %rax + addq %rax, %r11 + adcq %rdx, %r12 + adcq \$0, %r13 + + movq %r11, 96(%rsp) + movq %r12, 104(%rsp) + +#eighth iteration + movq 56($inp), %rax + mulq %rax + addq %rax, %r13 + adcq \$0, %rdx + + addq %rdx, %r14 + + movq %r13, 112(%rsp) + movq %r14, 120(%rsp) + + movq (%rsp), %r8 + movq 8(%rsp), %r9 + movq 16(%rsp), %r10 + movq 24(%rsp), %r11 + movq 32(%rsp), %r12 + movq 40(%rsp), %r13 + movq 48(%rsp), %r14 + movq 56(%rsp), %r15 + + call __rsaz_512_reduce + + addq 64(%rsp), %r8 + adcq 72(%rsp), %r9 + adcq 80(%rsp), %r10 + adcq 88(%rsp), %r11 + adcq 96(%rsp), %r12 + adcq 104(%rsp), %r13 + adcq 112(%rsp), %r14 + adcq 120(%rsp), %r15 + sbbq %rcx, %rcx + + call __rsaz_512_subtract + + movq %r8, %rdx + movq %r9, %rax + movl 128+8(%rsp), $times + movq $out, $inp + + decl $times + jnz .Loop_sqr +___ +if ($addx) { +$code.=<<___; + jmp .Lsqr_tail + +.align 32 +.Loop_sqrx: + movl $times,128+8(%rsp) + movq $out, %xmm0 # off-load + movq %rbp, %xmm1 # off-load +#first iteration + mulx %rax, %r8, %r9 + + mulx 16($inp), %rcx, %r10 + xor %rbp, %rbp # cf=0, of=0 + + mulx 24($inp), %rax, %r11 + adcx %rcx, %r9 + + mulx 32($inp), %rcx, %r12 + adcx %rax, %r10 + + mulx 40($inp), %rax, %r13 + adcx %rcx, %r11 + + .byte 0xc4,0x62,0xf3,0xf6,0xb6,0x30,0x00,0x00,0x00 # mulx 48($inp), %rcx, %r14 + adcx %rax, %r12 + adcx %rcx, %r13 + + .byte 0xc4,0x62,0xfb,0xf6,0xbe,0x38,0x00,0x00,0x00 # mulx 56($inp), %rax, %r15 + adcx %rax, %r14 + adcx %rbp, %r15 # %rbp is 0 + + mov %r9, %rcx + shld \$1, %r8, %r9 + shl \$1, %r8 + + xor %ebp, %ebp + mulx %rdx, %rax, %rdx + adcx %rdx, %r8 + mov 8($inp), %rdx + adcx %rbp, %r9 + + mov %rax, (%rsp) + mov %r8, 8(%rsp) + +#second iteration + mulx 16($inp), %rax, %rbx + adox %rax, %r10 + adcx %rbx, %r11 + + .byte 0xc4,0x62,0xc3,0xf6,0x86,0x18,0x00,0x00,0x00 # mulx 24($inp), $out, %r8 + adox $out, %r11 + adcx %r8, %r12 + + mulx 32($inp), %rax, %rbx + adox %rax, %r12 + adcx %rbx, %r13 + + mulx 40($inp), $out, %r8 + adox $out, %r13 + adcx %r8, %r14 + + .byte 0xc4,0xe2,0xfb,0xf6,0x9e,0x30,0x00,0x00,0x00 # mulx 48($inp), %rax, %rbx + adox %rax, %r14 + adcx %rbx, %r15 + + .byte 0xc4,0x62,0xc3,0xf6,0x86,0x38,0x00,0x00,0x00 # mulx 56($inp), $out, %r8 + adox $out, %r15 + adcx %rbp, %r8 + adox %rbp, %r8 + + mov %r11, %rbx + shld \$1, %r10, %r11 + shld \$1, %rcx, %r10 + + xor %ebp,%ebp + mulx %rdx, %rax, %rcx + mov 16($inp), %rdx + adcx %rax, %r9 + adcx %rcx, %r10 + adcx %rbp, %r11 + + mov %r9, 16(%rsp) + .byte 0x4c,0x89,0x94,0x24,0x18,0x00,0x00,0x00 # mov %r10, 24(%rsp) + +#third iteration + .byte 0xc4,0x62,0xc3,0xf6,0x8e,0x18,0x00,0x00,0x00 # mulx 24($inp), $out, %r9 + adox $out, %r12 + adcx %r9, %r13 + + mulx 32($inp), %rax, %rcx + adox %rax, %r13 + adcx %rcx, %r14 + + mulx 40($inp), $out, %r9 + adox $out, %r14 + adcx %r9, %r15 + + .byte 0xc4,0xe2,0xfb,0xf6,0x8e,0x30,0x00,0x00,0x00 # mulx 48($inp), %rax, %rcx + adox %rax, %r15 + adcx %rcx, %r8 + + .byte 0xc4,0x62,0xc3,0xf6,0x8e,0x38,0x00,0x00,0x00 # mulx 56($inp), $out, %r9 + adox $out, %r8 + adcx %rbp, %r9 + adox %rbp, %r9 + + mov %r13, %rcx + shld \$1, %r12, %r13 + shld \$1, %rbx, %r12 + + xor %ebp, %ebp + mulx %rdx, %rax, %rdx + adcx %rax, %r11 + adcx %rdx, %r12 + mov 24($inp), %rdx + adcx %rbp, %r13 + + mov %r11, 32(%rsp) + .byte 0x4c,0x89,0xa4,0x24,0x28,0x00,0x00,0x00 # mov %r12, 40(%rsp) + +#fourth iteration + .byte 0xc4,0xe2,0xfb,0xf6,0x9e,0x20,0x00,0x00,0x00 # mulx 32($inp), %rax, %rbx + adox %rax, %r14 + adcx %rbx, %r15 + + mulx 40($inp), $out, %r10 + adox $out, %r15 + adcx %r10, %r8 + + mulx 48($inp), %rax, %rbx + adox %rax, %r8 + adcx %rbx, %r9 + + mulx 56($inp), $out, %r10 + adox $out, %r9 + adcx %rbp, %r10 + adox %rbp, %r10 + + .byte 0x66 + mov %r15, %rbx + shld \$1, %r14, %r15 + shld \$1, %rcx, %r14 + + xor %ebp, %ebp + mulx %rdx, %rax, %rdx + adcx %rax, %r13 + adcx %rdx, %r14 + mov 32($inp), %rdx + adcx %rbp, %r15 + + mov %r13, 48(%rsp) + mov %r14, 56(%rsp) + +#fifth iteration + .byte 0xc4,0x62,0xc3,0xf6,0x9e,0x28,0x00,0x00,0x00 # mulx 40($inp), $out, %r11 + adox $out, %r8 + adcx %r11, %r9 + + mulx 48($inp), %rax, %rcx + adox %rax, %r9 + adcx %rcx, %r10 + + mulx 56($inp), $out, %r11 + adox $out, %r10 + adcx %rbp, %r11 + adox %rbp, %r11 + + mov %r9, %rcx + shld \$1, %r8, %r9 + shld \$1, %rbx, %r8 + + xor %ebp, %ebp + mulx %rdx, %rax, %rdx + adcx %rax, %r15 + adcx %rdx, %r8 + mov 40($inp), %rdx + adcx %rbp, %r9 + + mov %r15, 64(%rsp) + mov %r8, 72(%rsp) + +#sixth iteration + .byte 0xc4,0xe2,0xfb,0xf6,0x9e,0x30,0x00,0x00,0x00 # mulx 48($inp), %rax, %rbx + adox %rax, %r10 + adcx %rbx, %r11 + + .byte 0xc4,0x62,0xc3,0xf6,0xa6,0x38,0x00,0x00,0x00 # mulx 56($inp), $out, %r12 + adox $out, %r11 + adcx %rbp, %r12 + adox %rbp, %r12 + + mov %r11, %rbx + shld \$1, %r10, %r11 + shld \$1, %rcx, %r10 + + xor %ebp, %ebp + mulx %rdx, %rax, %rdx + adcx %rax, %r9 + adcx %rdx, %r10 + mov 48($inp), %rdx + adcx %rbp, %r11 + + mov %r9, 80(%rsp) + mov %r10, 88(%rsp) + +#seventh iteration + .byte 0xc4,0x62,0xfb,0xf6,0xae,0x38,0x00,0x00,0x00 # mulx 56($inp), %rax, %r13 + adox %rax, %r12 + adox %rbp, %r13 + + xor %r14, %r14 + shld \$1, %r13, %r14 + shld \$1, %r12, %r13 + shld \$1, %rbx, %r12 + + xor %ebp, %ebp + mulx %rdx, %rax, %rdx + adcx %rax, %r11 + adcx %rdx, %r12 + mov 56($inp), %rdx + adcx %rbp, %r13 + + .byte 0x4c,0x89,0x9c,0x24,0x60,0x00,0x00,0x00 # mov %r11, 96(%rsp) + .byte 0x4c,0x89,0xa4,0x24,0x68,0x00,0x00,0x00 # mov %r12, 104(%rsp) + +#eighth iteration + mulx %rdx, %rax, %rdx + adox %rax, %r13 + adox %rbp, %rdx + + .byte 0x66 + add %rdx, %r14 + + movq %r13, 112(%rsp) + movq %r14, 120(%rsp) + movq %xmm0, $out + movq %xmm1, %rbp + + movq 128(%rsp), %rdx # pull $n0 + movq (%rsp), %r8 + movq 8(%rsp), %r9 + movq 16(%rsp), %r10 + movq 24(%rsp), %r11 + movq 32(%rsp), %r12 + movq 40(%rsp), %r13 + movq 48(%rsp), %r14 + movq 56(%rsp), %r15 + + call __rsaz_512_reducex + + addq 64(%rsp), %r8 + adcq 72(%rsp), %r9 + adcq 80(%rsp), %r10 + adcq 88(%rsp), %r11 + adcq 96(%rsp), %r12 + adcq 104(%rsp), %r13 + adcq 112(%rsp), %r14 + adcq 120(%rsp), %r15 + sbbq %rcx, %rcx + + call __rsaz_512_subtract + + movq %r8, %rdx + movq %r9, %rax + movl 128+8(%rsp), $times + movq $out, $inp + + decl $times + jnz .Loop_sqrx + +.Lsqr_tail: +___ +} +$code.=<<___; + + leaq 128+24+48(%rsp), %rax + movq -48(%rax), %r15 + movq -40(%rax), %r14 + movq -32(%rax), %r13 + movq -24(%rax), %r12 + movq -16(%rax), %rbp + movq -8(%rax), %rbx + leaq (%rax), %rsp +.Lsqr_epilogue: + ret +.size rsaz_512_sqr,.-rsaz_512_sqr +___ +} +{ +my ($out,$ap,$bp,$mod,$n0) = ("%rdi","%rsi","%rdx","%rcx","%r8"); +$code.=<<___; +.globl rsaz_512_mul +.type rsaz_512_mul,\@function,5 +.align 32 +rsaz_512_mul: + push %rbx + push %rbp + push %r12 + push %r13 + push %r14 + push %r15 + + subq \$128+24, %rsp +.Lmul_body: + movq $out, %xmm0 # off-load arguments + movq $mod, %xmm1 + movq $n0, 128(%rsp) +___ +$code.=<<___ if ($addx); + movl \$0x80100,%r11d + andl OPENSSL_ia32cap_P+8(%rip),%r11d + cmpl \$0x80100,%r11d # check for MULX and ADO/CX + je .Lmulx +___ +$code.=<<___; + movq ($bp), %rbx # pass b[0] + movq $bp, %rbp # pass argument + call __rsaz_512_mul + + movq %xmm0, $out + movq %xmm1, %rbp + + movq (%rsp), %r8 + movq 8(%rsp), %r9 + movq 16(%rsp), %r10 + movq 24(%rsp), %r11 + movq 32(%rsp), %r12 + movq 40(%rsp), %r13 + movq 48(%rsp), %r14 + movq 56(%rsp), %r15 + + call __rsaz_512_reduce +___ +$code.=<<___ if ($addx); + jmp .Lmul_tail + +.align 32 +.Lmulx: + movq $bp, %rbp # pass argument + movq ($bp), %rdx # pass b[0] + call __rsaz_512_mulx + + movq %xmm0, $out + movq %xmm1, %rbp + + movq 128(%rsp), %rdx # pull $n0 + movq (%rsp), %r8 + movq 8(%rsp), %r9 + movq 16(%rsp), %r10 + movq 24(%rsp), %r11 + movq 32(%rsp), %r12 + movq 40(%rsp), %r13 + movq 48(%rsp), %r14 + movq 56(%rsp), %r15 + + call __rsaz_512_reducex +.Lmul_tail: +___ +$code.=<<___; + addq 64(%rsp), %r8 + adcq 72(%rsp), %r9 + adcq 80(%rsp), %r10 + adcq 88(%rsp), %r11 + adcq 96(%rsp), %r12 + adcq 104(%rsp), %r13 + adcq 112(%rsp), %r14 + adcq 120(%rsp), %r15 + sbbq %rcx, %rcx + + call __rsaz_512_subtract + + leaq 128+24+48(%rsp), %rax + movq -48(%rax), %r15 + movq -40(%rax), %r14 + movq -32(%rax), %r13 + movq -24(%rax), %r12 + movq -16(%rax), %rbp + movq -8(%rax), %rbx + leaq (%rax), %rsp +.Lmul_epilogue: + ret +.size rsaz_512_mul,.-rsaz_512_mul +___ +} +{ +my ($out,$ap,$bp,$mod,$n0,$pwr) = ("%rdi","%rsi","%rdx","%rcx","%r8","%r9d"); +$code.=<<___; +.globl rsaz_512_mul_gather4 +.type rsaz_512_mul_gather4,\@function,6 +.align 32 +rsaz_512_mul_gather4: + push %rbx + push %rbp + push %r12 + push %r13 + push %r14 + push %r15 + + mov $pwr, $pwr + subq \$128+24, %rsp +.Lmul_gather4_body: +___ +$code.=<<___ if ($addx); + movl \$0x80100,%r11d + andl OPENSSL_ia32cap_P+8(%rip),%r11d + cmpl \$0x80100,%r11d # check for MULX and ADO/CX + je .Lmulx_gather +___ +$code.=<<___; + movl 64($bp,$pwr,4), %eax + movq $out, %xmm0 # off-load arguments + movl ($bp,$pwr,4), %ebx + movq $mod, %xmm1 + movq $n0, 128(%rsp) + + shlq \$32, %rax + or %rax, %rbx + movq ($ap), %rax + movq 8($ap), %rcx + leaq 128($bp,$pwr,4), %rbp + mulq %rbx # 0 iteration + movq %rax, (%rsp) + movq %rcx, %rax + movq %rdx, %r8 + + mulq %rbx + movd (%rbp), %xmm4 + addq %rax, %r8 + movq 16($ap), %rax + movq %rdx, %r9 + adcq \$0, %r9 + + mulq %rbx + movd 64(%rbp), %xmm5 + addq %rax, %r9 + movq 24($ap), %rax + movq %rdx, %r10 + adcq \$0, %r10 + + mulq %rbx + pslldq \$4, %xmm5 + addq %rax, %r10 + movq 32($ap), %rax + movq %rdx, %r11 + adcq \$0, %r11 + + mulq %rbx + por %xmm5, %xmm4 + addq %rax, %r11 + movq 40($ap), %rax + movq %rdx, %r12 + adcq \$0, %r12 + + mulq %rbx + addq %rax, %r12 + movq 48($ap), %rax + movq %rdx, %r13 + adcq \$0, %r13 + + mulq %rbx + leaq 128(%rbp), %rbp + addq %rax, %r13 + movq 56($ap), %rax + movq %rdx, %r14 + adcq \$0, %r14 + + mulq %rbx + movq %xmm4, %rbx + addq %rax, %r14 + movq ($ap), %rax + movq %rdx, %r15 + adcq \$0, %r15 + + leaq 8(%rsp), %rdi + movl \$7, %ecx + jmp .Loop_mul_gather + +.align 32 +.Loop_mul_gather: + mulq %rbx + addq %rax, %r8 + movq 8($ap), %rax + movq %r8, (%rdi) + movq %rdx, %r8 + adcq \$0, %r8 + + mulq %rbx + movd (%rbp), %xmm4 + addq %rax, %r9 + movq 16($ap), %rax + adcq \$0, %rdx + addq %r9, %r8 + movq %rdx, %r9 + adcq \$0, %r9 + + mulq %rbx + movd 64(%rbp), %xmm5 + addq %rax, %r10 + movq 24($ap), %rax + adcq \$0, %rdx + addq %r10, %r9 + movq %rdx, %r10 + adcq \$0, %r10 + + mulq %rbx + pslldq \$4, %xmm5 + addq %rax, %r11 + movq 32($ap), %rax + adcq \$0, %rdx + addq %r11, %r10 + movq %rdx, %r11 + adcq \$0, %r11 + + mulq %rbx + por %xmm5, %xmm4 + addq %rax, %r12 + movq 40($ap), %rax + adcq \$0, %rdx + addq %r12, %r11 + movq %rdx, %r12 + adcq \$0, %r12 + + mulq %rbx + addq %rax, %r13 + movq 48($ap), %rax + adcq \$0, %rdx + addq %r13, %r12 + movq %rdx, %r13 + adcq \$0, %r13 + + mulq %rbx + addq %rax, %r14 + movq 56($ap), %rax + adcq \$0, %rdx + addq %r14, %r13 + movq %rdx, %r14 + adcq \$0, %r14 + + mulq %rbx + movq %xmm4, %rbx + addq %rax, %r15 + movq ($ap), %rax + adcq \$0, %rdx + addq %r15, %r14 + movq %rdx, %r15 + adcq \$0, %r15 + + leaq 128(%rbp), %rbp + leaq 8(%rdi), %rdi + + decl %ecx + jnz .Loop_mul_gather + + movq %r8, (%rdi) + movq %r9, 8(%rdi) + movq %r10, 16(%rdi) + movq %r11, 24(%rdi) + movq %r12, 32(%rdi) + movq %r13, 40(%rdi) + movq %r14, 48(%rdi) + movq %r15, 56(%rdi) + + movq %xmm0, $out + movq %xmm1, %rbp + + movq (%rsp), %r8 + movq 8(%rsp), %r9 + movq 16(%rsp), %r10 + movq 24(%rsp), %r11 + movq 32(%rsp), %r12 + movq 40(%rsp), %r13 + movq 48(%rsp), %r14 + movq 56(%rsp), %r15 + + call __rsaz_512_reduce +___ +$code.=<<___ if ($addx); + jmp .Lmul_gather_tail + +.align 32 +.Lmulx_gather: + mov 64($bp,$pwr,4), %eax + movq $out, %xmm0 # off-load arguments + lea 128($bp,$pwr,4), %rbp + mov ($bp,$pwr,4), %edx + movq $mod, %xmm1 + mov $n0, 128(%rsp) + + shl \$32, %rax + or %rax, %rdx + mulx ($ap), %rbx, %r8 # 0 iteration + mov %rbx, (%rsp) + xor %edi, %edi # cf=0, of=0 + + mulx 8($ap), %rax, %r9 + movd (%rbp), %xmm4 + + mulx 16($ap), %rbx, %r10 + movd 64(%rbp), %xmm5 + adcx %rax, %r8 + + mulx 24($ap), %rax, %r11 + pslldq \$4, %xmm5 + adcx %rbx, %r9 + + mulx 32($ap), %rbx, %r12 + por %xmm5, %xmm4 + adcx %rax, %r10 + + mulx 40($ap), %rax, %r13 + adcx %rbx, %r11 + + mulx 48($ap), %rbx, %r14 + lea 128(%rbp), %rbp + adcx %rax, %r12 + + mulx 56($ap), %rax, %r15 + movq %xmm4, %rdx + adcx %rbx, %r13 + adcx %rax, %r14 + mov %r8, %rbx + adcx %rdi, %r15 # %rdi is 0 + + mov \$-7, %rcx + jmp .Loop_mulx_gather + +.align 32 +.Loop_mulx_gather: + mulx ($ap), %rax, %r8 + adcx %rax, %rbx + adox %r9, %r8 + + mulx 8($ap), %rax, %r9 + .byte 0x66,0x0f,0x6e,0xa5,0x00,0x00,0x00,0x00 # movd (%rbp), %xmm4 + adcx %rax, %r8 + adox %r10, %r9 + + mulx 16($ap), %rax, %r10 + movd 64(%rbp), %xmm5 + lea 128(%rbp), %rbp + adcx %rax, %r9 + adox %r11, %r10 + + .byte 0xc4,0x62,0xfb,0xf6,0x9e,0x18,0x00,0x00,0x00 # mulx 24($ap), %rax, %r11 + pslldq \$4, %xmm5 + por %xmm5, %xmm4 + adcx %rax, %r10 + adox %r12, %r11 + + mulx 32($ap), %rax, %r12 + adcx %rax, %r11 + adox %r13, %r12 + + mulx 40($ap), %rax, %r13 + adcx %rax, %r12 + adox %r14, %r13 + + .byte 0xc4,0x62,0xfb,0xf6,0xb6,0x30,0x00,0x00,0x00 # mulx 48($ap), %rax, %r14 + adcx %rax, %r13 + adox %r15, %r14 + + mulx 56($ap), %rax, %r15 + movq %xmm4, %rdx + mov %rbx, 64(%rsp,%rcx,8) + adcx %rax, %r14 + adox %rdi, %r15 + mov %r8, %rbx + adcx %rdi, %r15 # cf=0 + + inc %rcx # of=0 + jnz .Loop_mulx_gather + + mov %r8, 64(%rsp) + mov %r9, 64+8(%rsp) + mov %r10, 64+16(%rsp) + mov %r11, 64+24(%rsp) + mov %r12, 64+32(%rsp) + mov %r13, 64+40(%rsp) + mov %r14, 64+48(%rsp) + mov %r15, 64+56(%rsp) + + movq %xmm0, $out + movq %xmm1, %rbp + + mov 128(%rsp), %rdx # pull $n0 + mov (%rsp), %r8 + mov 8(%rsp), %r9 + mov 16(%rsp), %r10 + mov 24(%rsp), %r11 + mov 32(%rsp), %r12 + mov 40(%rsp), %r13 + mov 48(%rsp), %r14 + mov 56(%rsp), %r15 + + call __rsaz_512_reducex + +.Lmul_gather_tail: +___ +$code.=<<___; + addq 64(%rsp), %r8 + adcq 72(%rsp), %r9 + adcq 80(%rsp), %r10 + adcq 88(%rsp), %r11 + adcq 96(%rsp), %r12 + adcq 104(%rsp), %r13 + adcq 112(%rsp), %r14 + adcq 120(%rsp), %r15 + sbbq %rcx, %rcx + + call __rsaz_512_subtract + + leaq 128+24+48(%rsp), %rax + movq -48(%rax), %r15 + movq -40(%rax), %r14 + movq -32(%rax), %r13 + movq -24(%rax), %r12 + movq -16(%rax), %rbp + movq -8(%rax), %rbx + leaq (%rax), %rsp +.Lmul_gather4_epilogue: + ret +.size rsaz_512_mul_gather4,.-rsaz_512_mul_gather4 +___ +} +{ +my ($out,$ap,$mod,$n0,$tbl,$pwr) = ("%rdi","%rsi","%rdx","%rcx","%r8","%r9d"); +$code.=<<___; +.globl rsaz_512_mul_scatter4 +.type rsaz_512_mul_scatter4,\@function,6 +.align 32 +rsaz_512_mul_scatter4: + push %rbx + push %rbp + push %r12 + push %r13 + push %r14 + push %r15 + + mov $pwr, $pwr + subq \$128+24, %rsp +.Lmul_scatter4_body: + leaq ($tbl,$pwr,4), $tbl + movq $out, %xmm0 # off-load arguments + movq $mod, %xmm1 + movq $tbl, %xmm2 + movq $n0, 128(%rsp) + + movq $out, %rbp +___ +$code.=<<___ if ($addx); + movl \$0x80100,%r11d + andl OPENSSL_ia32cap_P+8(%rip),%r11d + cmpl \$0x80100,%r11d # check for MULX and ADO/CX + je .Lmulx_scatter +___ +$code.=<<___; + movq ($out),%rbx # pass b[0] + call __rsaz_512_mul + + movq %xmm0, $out + movq %xmm1, %rbp + + movq (%rsp), %r8 + movq 8(%rsp), %r9 + movq 16(%rsp), %r10 + movq 24(%rsp), %r11 + movq 32(%rsp), %r12 + movq 40(%rsp), %r13 + movq 48(%rsp), %r14 + movq 56(%rsp), %r15 + + call __rsaz_512_reduce +___ +$code.=<<___ if ($addx); + jmp .Lmul_scatter_tail + +.align 32 +.Lmulx_scatter: + movq ($out), %rdx # pass b[0] + call __rsaz_512_mulx + + movq %xmm0, $out + movq %xmm1, %rbp + + movq 128(%rsp), %rdx # pull $n0 + movq (%rsp), %r8 + movq 8(%rsp), %r9 + movq 16(%rsp), %r10 + movq 24(%rsp), %r11 + movq 32(%rsp), %r12 + movq 40(%rsp), %r13 + movq 48(%rsp), %r14 + movq 56(%rsp), %r15 + + call __rsaz_512_reducex + +.Lmul_scatter_tail: +___ +$code.=<<___; + addq 64(%rsp), %r8 + adcq 72(%rsp), %r9 + adcq 80(%rsp), %r10 + adcq 88(%rsp), %r11 + adcq 96(%rsp), %r12 + adcq 104(%rsp), %r13 + adcq 112(%rsp), %r14 + adcq 120(%rsp), %r15 + movq %xmm2, $inp + sbbq %rcx, %rcx + + call __rsaz_512_subtract + + movl %r8d, 64*0($inp) # scatter + shrq \$32, %r8 + movl %r9d, 64*2($inp) + shrq \$32, %r9 + movl %r10d, 64*4($inp) + shrq \$32, %r10 + movl %r11d, 64*6($inp) + shrq \$32, %r11 + movl %r12d, 64*8($inp) + shrq \$32, %r12 + movl %r13d, 64*10($inp) + shrq \$32, %r13 + movl %r14d, 64*12($inp) + shrq \$32, %r14 + movl %r15d, 64*14($inp) + shrq \$32, %r15 + movl %r8d, 64*1($inp) + movl %r9d, 64*3($inp) + movl %r10d, 64*5($inp) + movl %r11d, 64*7($inp) + movl %r12d, 64*9($inp) + movl %r13d, 64*11($inp) + movl %r14d, 64*13($inp) + movl %r15d, 64*15($inp) + + leaq 128+24+48(%rsp), %rax + movq -48(%rax), %r15 + movq -40(%rax), %r14 + movq -32(%rax), %r13 + movq -24(%rax), %r12 + movq -16(%rax), %rbp + movq -8(%rax), %rbx + leaq (%rax), %rsp +.Lmul_scatter4_epilogue: + ret +.size rsaz_512_mul_scatter4,.-rsaz_512_mul_scatter4 +___ +} +{ +my ($out,$inp,$mod,$n0) = ("%rdi","%rsi","%rdx","%rcx"); +$code.=<<___; +.globl rsaz_512_mul_by_one +.type rsaz_512_mul_by_one,\@function,4 +.align 32 +rsaz_512_mul_by_one: + push %rbx + push %rbp + push %r12 + push %r13 + push %r14 + push %r15 + + subq \$128+24, %rsp +.Lmul_by_one_body: +___ +$code.=<<___ if ($addx); + movl OPENSSL_ia32cap_P+8(%rip),%eax +___ +$code.=<<___; + movq $mod, %rbp # reassign argument + movq $n0, 128(%rsp) + + movq ($inp), %r8 + pxor %xmm0, %xmm0 + movq 8($inp), %r9 + movq 16($inp), %r10 + movq 24($inp), %r11 + movq 32($inp), %r12 + movq 40($inp), %r13 + movq 48($inp), %r14 + movq 56($inp), %r15 + + movdqa %xmm0, (%rsp) + movdqa %xmm0, 16(%rsp) + movdqa %xmm0, 32(%rsp) + movdqa %xmm0, 48(%rsp) + movdqa %xmm0, 64(%rsp) + movdqa %xmm0, 80(%rsp) + movdqa %xmm0, 96(%rsp) +___ +$code.=<<___ if ($addx); + andl \$0x80100,%eax + cmpl \$0x80100,%eax # check for MULX and ADO/CX + je .Lby_one_callx +___ +$code.=<<___; + call __rsaz_512_reduce +___ +$code.=<<___ if ($addx); + jmp .Lby_one_tail +.align 32 +.Lby_one_callx: + movq 128(%rsp), %rdx # pull $n0 + call __rsaz_512_reducex +.Lby_one_tail: +___ +$code.=<<___; + movq %r8, ($out) + movq %r9, 8($out) + movq %r10, 16($out) + movq %r11, 24($out) + movq %r12, 32($out) + movq %r13, 40($out) + movq %r14, 48($out) + movq %r15, 56($out) + + leaq 128+24+48(%rsp), %rax + movq -48(%rax), %r15 + movq -40(%rax), %r14 + movq -32(%rax), %r13 + movq -24(%rax), %r12 + movq -16(%rax), %rbp + movq -8(%rax), %rbx + leaq (%rax), %rsp +.Lmul_by_one_epilogue: + ret +.size rsaz_512_mul_by_one,.-rsaz_512_mul_by_one +___ +} +{ # __rsaz_512_reduce + # + # input: %r8-%r15, %rbp - mod, 128(%rsp) - n0 + # output: %r8-%r15 + # clobbers: everything except %rbp and %rdi +$code.=<<___; +.type __rsaz_512_reduce,\@abi-omnipotent +.align 32 +__rsaz_512_reduce: + movq %r8, %rbx + imulq 128+8(%rsp), %rbx + movq 0(%rbp), %rax + movl \$8, %ecx + jmp .Lreduction_loop + +.align 32 +.Lreduction_loop: + mulq %rbx + movq 8(%rbp), %rax + negq %r8 + movq %rdx, %r8 + adcq \$0, %r8 + + mulq %rbx + addq %rax, %r9 + movq 16(%rbp), %rax + adcq \$0, %rdx + addq %r9, %r8 + movq %rdx, %r9 + adcq \$0, %r9 + + mulq %rbx + addq %rax, %r10 + movq 24(%rbp), %rax + adcq \$0, %rdx + addq %r10, %r9 + movq %rdx, %r10 + adcq \$0, %r10 + + mulq %rbx + addq %rax, %r11 + movq 32(%rbp), %rax + adcq \$0, %rdx + addq %r11, %r10 + movq 128+8(%rsp), %rsi + #movq %rdx, %r11 + #adcq \$0, %r11 + adcq \$0, %rdx + movq %rdx, %r11 + + mulq %rbx + addq %rax, %r12 + movq 40(%rbp), %rax + adcq \$0, %rdx + imulq %r8, %rsi + addq %r12, %r11 + movq %rdx, %r12 + adcq \$0, %r12 + + mulq %rbx + addq %rax, %r13 + movq 48(%rbp), %rax + adcq \$0, %rdx + addq %r13, %r12 + movq %rdx, %r13 + adcq \$0, %r13 + + mulq %rbx + addq %rax, %r14 + movq 56(%rbp), %rax + adcq \$0, %rdx + addq %r14, %r13 + movq %rdx, %r14 + adcq \$0, %r14 + + mulq %rbx + movq %rsi, %rbx + addq %rax, %r15 + movq 0(%rbp), %rax + adcq \$0, %rdx + addq %r15, %r14 + movq %rdx, %r15 + adcq \$0, %r15 + + decl %ecx + jne .Lreduction_loop + + ret +.size __rsaz_512_reduce,.-__rsaz_512_reduce +___ +} +if ($addx) { + # __rsaz_512_reducex + # + # input: %r8-%r15, %rbp - mod, 128(%rsp) - n0 + # output: %r8-%r15 + # clobbers: everything except %rbp and %rdi +$code.=<<___; +.type __rsaz_512_reducex,\@abi-omnipotent +.align 32 +__rsaz_512_reducex: + #movq 128+8(%rsp), %rdx # pull $n0 + imulq %r8, %rdx + xorq %rsi, %rsi # cf=0,of=0 + movl \$8, %ecx + jmp .Lreduction_loopx + +.align 32 +.Lreduction_loopx: + mov %r8, %rbx + mulx 0(%rbp), %rax, %r8 + adcx %rbx, %rax + adox %r9, %r8 + + mulx 8(%rbp), %rax, %r9 + adcx %rax, %r8 + adox %r10, %r9 + + mulx 16(%rbp), %rbx, %r10 + adcx %rbx, %r9 + adox %r11, %r10 + + mulx 24(%rbp), %rbx, %r11 + adcx %rbx, %r10 + adox %r12, %r11 + + .byte 0xc4,0x62,0xe3,0xf6,0xa5,0x20,0x00,0x00,0x00 # mulx 32(%rbp), %rbx, %r12 + mov %rdx, %rax + mov %r8, %rdx + adcx %rbx, %r11 + adox %r13, %r12 + + mulx 128+8(%rsp), %rbx, %rdx + mov %rax, %rdx + + mulx 40(%rbp), %rax, %r13 + adcx %rax, %r12 + adox %r14, %r13 + + .byte 0xc4,0x62,0xfb,0xf6,0xb5,0x30,0x00,0x00,0x00 # mulx 48(%rbp), %rax, %r14 + adcx %rax, %r13 + adox %r15, %r14 + + mulx 56(%rbp), %rax, %r15 + mov %rbx, %rdx + adcx %rax, %r14 + adox %rsi, %r15 # %rsi is 0 + adcx %rsi, %r15 # cf=0 + + decl %ecx # of=0 + jne .Lreduction_loopx + + ret +.size __rsaz_512_reducex,.-__rsaz_512_reducex +___ +} +{ # __rsaz_512_subtract + # input: %r8-%r15, %rdi - $out, %rbp - $mod, %rcx - mask + # output: + # clobbers: everything but %rdi, %rsi and %rbp +$code.=<<___; +.type __rsaz_512_subtract,\@abi-omnipotent +.align 32 +__rsaz_512_subtract: + movq %r8, ($out) + movq %r9, 8($out) + movq %r10, 16($out) + movq %r11, 24($out) + movq %r12, 32($out) + movq %r13, 40($out) + movq %r14, 48($out) + movq %r15, 56($out) + + movq 0($mod), %r8 + movq 8($mod), %r9 + negq %r8 + notq %r9 + andq %rcx, %r8 + movq 16($mod), %r10 + andq %rcx, %r9 + notq %r10 + movq 24($mod), %r11 + andq %rcx, %r10 + notq %r11 + movq 32($mod), %r12 + andq %rcx, %r11 + notq %r12 + movq 40($mod), %r13 + andq %rcx, %r12 + notq %r13 + movq 48($mod), %r14 + andq %rcx, %r13 + notq %r14 + movq 56($mod), %r15 + andq %rcx, %r14 + notq %r15 + andq %rcx, %r15 + + addq ($out), %r8 + adcq 8($out), %r9 + adcq 16($out), %r10 + adcq 24($out), %r11 + adcq 32($out), %r12 + adcq 40($out), %r13 + adcq 48($out), %r14 + adcq 56($out), %r15 + + movq %r8, ($out) + movq %r9, 8($out) + movq %r10, 16($out) + movq %r11, 24($out) + movq %r12, 32($out) + movq %r13, 40($out) + movq %r14, 48($out) + movq %r15, 56($out) + + ret +.size __rsaz_512_subtract,.-__rsaz_512_subtract +___ +} +{ # __rsaz_512_mul + # + # input: %rsi - ap, %rbp - bp + # ouput: + # clobbers: everything +my ($ap,$bp) = ("%rsi","%rbp"); +$code.=<<___; +.type __rsaz_512_mul,\@abi-omnipotent +.align 32 +__rsaz_512_mul: + leaq 8(%rsp), %rdi + + movq ($ap), %rax + mulq %rbx + movq %rax, (%rdi) + movq 8($ap), %rax + movq %rdx, %r8 + + mulq %rbx + addq %rax, %r8 + movq 16($ap), %rax + movq %rdx, %r9 + adcq \$0, %r9 + + mulq %rbx + addq %rax, %r9 + movq 24($ap), %rax + movq %rdx, %r10 + adcq \$0, %r10 + + mulq %rbx + addq %rax, %r10 + movq 32($ap), %rax + movq %rdx, %r11 + adcq \$0, %r11 + + mulq %rbx + addq %rax, %r11 + movq 40($ap), %rax + movq %rdx, %r12 + adcq \$0, %r12 + + mulq %rbx + addq %rax, %r12 + movq 48($ap), %rax + movq %rdx, %r13 + adcq \$0, %r13 + + mulq %rbx + addq %rax, %r13 + movq 56($ap), %rax + movq %rdx, %r14 + adcq \$0, %r14 + + mulq %rbx + addq %rax, %r14 + movq ($ap), %rax + movq %rdx, %r15 + adcq \$0, %r15 + + leaq 8($bp), $bp + leaq 8(%rdi), %rdi + + movl \$7, %ecx + jmp .Loop_mul + +.align 32 +.Loop_mul: + movq ($bp), %rbx + mulq %rbx + addq %rax, %r8 + movq 8($ap), %rax + movq %r8, (%rdi) + movq %rdx, %r8 + adcq \$0, %r8 + + mulq %rbx + addq %rax, %r9 + movq 16($ap), %rax + adcq \$0, %rdx + addq %r9, %r8 + movq %rdx, %r9 + adcq \$0, %r9 + + mulq %rbx + addq %rax, %r10 + movq 24($ap), %rax + adcq \$0, %rdx + addq %r10, %r9 + movq %rdx, %r10 + adcq \$0, %r10 + + mulq %rbx + addq %rax, %r11 + movq 32($ap), %rax + adcq \$0, %rdx + addq %r11, %r10 + movq %rdx, %r11 + adcq \$0, %r11 + + mulq %rbx + addq %rax, %r12 + movq 40($ap), %rax + adcq \$0, %rdx + addq %r12, %r11 + movq %rdx, %r12 + adcq \$0, %r12 + + mulq %rbx + addq %rax, %r13 + movq 48($ap), %rax + adcq \$0, %rdx + addq %r13, %r12 + movq %rdx, %r13 + adcq \$0, %r13 + + mulq %rbx + addq %rax, %r14 + movq 56($ap), %rax + adcq \$0, %rdx + addq %r14, %r13 + movq %rdx, %r14 + leaq 8($bp), $bp + adcq \$0, %r14 + + mulq %rbx + addq %rax, %r15 + movq ($ap), %rax + adcq \$0, %rdx + addq %r15, %r14 + movq %rdx, %r15 + adcq \$0, %r15 + + leaq 8(%rdi), %rdi + + decl %ecx + jnz .Loop_mul + + movq %r8, (%rdi) + movq %r9, 8(%rdi) + movq %r10, 16(%rdi) + movq %r11, 24(%rdi) + movq %r12, 32(%rdi) + movq %r13, 40(%rdi) + movq %r14, 48(%rdi) + movq %r15, 56(%rdi) + + ret +.size __rsaz_512_mul,.-__rsaz_512_mul +___ +} +if ($addx) { + # __rsaz_512_mulx + # + # input: %rsi - ap, %rbp - bp + # ouput: + # clobbers: everything +my ($ap,$bp,$zero) = ("%rsi","%rbp","%rdi"); +$code.=<<___; +.type __rsaz_512_mulx,\@abi-omnipotent +.align 32 +__rsaz_512_mulx: + mulx ($ap), %rbx, %r8 # initial %rdx preloaded by caller + mov \$-6, %rcx + + mulx 8($ap), %rax, %r9 + movq %rbx, 8(%rsp) + + mulx 16($ap), %rbx, %r10 + adc %rax, %r8 + + mulx 24($ap), %rax, %r11 + adc %rbx, %r9 + + mulx 32($ap), %rbx, %r12 + adc %rax, %r10 + + mulx 40($ap), %rax, %r13 + adc %rbx, %r11 + + mulx 48($ap), %rbx, %r14 + adc %rax, %r12 + + mulx 56($ap), %rax, %r15 + mov 8($bp), %rdx + adc %rbx, %r13 + adc %rax, %r14 + adc \$0, %r15 + + xor $zero, $zero # cf=0,of=0 + jmp .Loop_mulx + +.align 32 +.Loop_mulx: + movq %r8, %rbx + mulx ($ap), %rax, %r8 + adcx %rax, %rbx + adox %r9, %r8 + + mulx 8($ap), %rax, %r9 + adcx %rax, %r8 + adox %r10, %r9 + + mulx 16($ap), %rax, %r10 + adcx %rax, %r9 + adox %r11, %r10 + + mulx 24($ap), %rax, %r11 + adcx %rax, %r10 + adox %r12, %r11 + + .byte 0x3e,0xc4,0x62,0xfb,0xf6,0xa6,0x20,0x00,0x00,0x00 # mulx 32($ap), %rax, %r12 + adcx %rax, %r11 + adox %r13, %r12 + + mulx 40($ap), %rax, %r13 + adcx %rax, %r12 + adox %r14, %r13 + + mulx 48($ap), %rax, %r14 + adcx %rax, %r13 + adox %r15, %r14 + + mulx 56($ap), %rax, %r15 + movq 64($bp,%rcx,8), %rdx + movq %rbx, 8+64-8(%rsp,%rcx,8) + adcx %rax, %r14 + adox $zero, %r15 + adcx $zero, %r15 # cf=0 + + inc %rcx # of=0 + jnz .Loop_mulx + + movq %r8, %rbx + mulx ($ap), %rax, %r8 + adcx %rax, %rbx + adox %r9, %r8 + + .byte 0xc4,0x62,0xfb,0xf6,0x8e,0x08,0x00,0x00,0x00 # mulx 8($ap), %rax, %r9 + adcx %rax, %r8 + adox %r10, %r9 + + .byte 0xc4,0x62,0xfb,0xf6,0x96,0x10,0x00,0x00,0x00 # mulx 16($ap), %rax, %r10 + adcx %rax, %r9 + adox %r11, %r10 + + mulx 24($ap), %rax, %r11 + adcx %rax, %r10 + adox %r12, %r11 + + mulx 32($ap), %rax, %r12 + adcx %rax, %r11 + adox %r13, %r12 + + mulx 40($ap), %rax, %r13 + adcx %rax, %r12 + adox %r14, %r13 + + .byte 0xc4,0x62,0xfb,0xf6,0xb6,0x30,0x00,0x00,0x00 # mulx 48($ap), %rax, %r14 + adcx %rax, %r13 + adox %r15, %r14 + + .byte 0xc4,0x62,0xfb,0xf6,0xbe,0x38,0x00,0x00,0x00 # mulx 56($ap), %rax, %r15 + adcx %rax, %r14 + adox $zero, %r15 + adcx $zero, %r15 + + mov %rbx, 8+64-8(%rsp) + mov %r8, 8+64(%rsp) + mov %r9, 8+64+8(%rsp) + mov %r10, 8+64+16(%rsp) + mov %r11, 8+64+24(%rsp) + mov %r12, 8+64+32(%rsp) + mov %r13, 8+64+40(%rsp) + mov %r14, 8+64+48(%rsp) + mov %r15, 8+64+56(%rsp) + + ret +.size __rsaz_512_mulx,.-__rsaz_512_mulx +___ +} +{ +my ($out,$inp,$power)= $win64 ? ("%rcx","%rdx","%r8d") : ("%rdi","%rsi","%edx"); +$code.=<<___; +.globl rsaz_512_scatter4 +.type rsaz_512_scatter4,\@abi-omnipotent +.align 16 +rsaz_512_scatter4: + leaq ($out,$power,4), $out + movl \$8, %r9d + jmp .Loop_scatter +.align 16 +.Loop_scatter: + movq ($inp), %rax + leaq 8($inp), $inp + movl %eax, ($out) + shrq \$32, %rax + movl %eax, 64($out) + leaq 128($out), $out + decl %r9d + jnz .Loop_scatter + ret +.size rsaz_512_scatter4,.-rsaz_512_scatter4 + +.globl rsaz_512_gather4 +.type rsaz_512_gather4,\@abi-omnipotent +.align 16 +rsaz_512_gather4: + leaq ($inp,$power,4), $inp + movl \$8, %r9d + jmp .Loop_gather +.align 16 +.Loop_gather: + movl ($inp), %eax + movl 64($inp), %r8d + leaq 128($inp), $inp + shlq \$32, %r8 + or %r8, %rax + movq %rax, ($out) + leaq 8($out), $out + decl %r9d + jnz .Loop_gather + ret +.size rsaz_512_gather4,.-rsaz_512_gather4 +___ +} + +# EXCEPTION_DISPOSITION handler (EXCEPTION_RECORD *rec,ULONG64 frame, +# CONTEXT *context,DISPATCHER_CONTEXT *disp) +if ($win64) { +$rec="%rcx"; +$frame="%rdx"; +$context="%r8"; +$disp="%r9"; + +$code.=<<___; +.extern __imp_RtlVirtualUnwind +.type se_handler,\@abi-omnipotent +.align 16 +se_handler: + push %rsi + push %rdi + push %rbx + push %rbp + push %r12 + push %r13 + push %r14 + push %r15 + pushfq + sub \$64,%rsp + + mov 120($context),%rax # pull context->Rax + mov 248($context),%rbx # pull context->Rip + + mov 8($disp),%rsi # disp->ImageBase + mov 56($disp),%r11 # disp->HandlerData + + mov 0(%r11),%r10d # HandlerData[0] + lea (%rsi,%r10),%r10 # end of prologue label + cmp %r10,%rbx # context->RipRsp + + mov 4(%r11),%r10d # HandlerData[1] + lea (%rsi,%r10),%r10 # epilogue label + cmp %r10,%rbx # context->Rip>=epilogue label + jae .Lcommon_seh_tail + + lea 128+24+48(%rax),%rax + + mov -8(%rax),%rbx + mov -16(%rax),%rbp + mov -24(%rax),%r12 + mov -32(%rax),%r13 + mov -40(%rax),%r14 + mov -48(%rax),%r15 + mov %rbx,144($context) # restore context->Rbx + mov %rbp,160($context) # restore context->Rbp + mov %r12,216($context) # restore context->R12 + mov %r13,224($context) # restore context->R13 + mov %r14,232($context) # restore context->R14 + mov %r15,240($context) # restore context->R15 + +.Lcommon_seh_tail: + mov 8(%rax),%rdi + mov 16(%rax),%rsi + mov %rax,152($context) # restore context->Rsp + mov %rsi,168($context) # restore context->Rsi + mov %rdi,176($context) # restore context->Rdi + + mov 40($disp),%rdi # disp->ContextRecord + mov $context,%rsi # context + mov \$154,%ecx # sizeof(CONTEXT) + .long 0xa548f3fc # cld; rep movsq + + mov $disp,%rsi + xor %rcx,%rcx # arg1, UNW_FLAG_NHANDLER + mov 8(%rsi),%rdx # arg2, disp->ImageBase + mov 0(%rsi),%r8 # arg3, disp->ControlPc + mov 16(%rsi),%r9 # arg4, disp->FunctionEntry + mov 40(%rsi),%r10 # disp->ContextRecord + lea 56(%rsi),%r11 # &disp->HandlerData + lea 24(%rsi),%r12 # &disp->EstablisherFrame + mov %r10,32(%rsp) # arg5 + mov %r11,40(%rsp) # arg6 + mov %r12,48(%rsp) # arg7 + mov %rcx,56(%rsp) # arg8, (NULL) + call *__imp_RtlVirtualUnwind(%rip) + + mov \$1,%eax # ExceptionContinueSearch + add \$64,%rsp + popfq + pop %r15 + pop %r14 + pop %r13 + pop %r12 + pop %rbp + pop %rbx + pop %rdi + pop %rsi + ret +.size sqr_handler,.-sqr_handler + +.section .pdata +.align 4 + .rva .LSEH_begin_rsaz_512_sqr + .rva .LSEH_end_rsaz_512_sqr + .rva .LSEH_info_rsaz_512_sqr + + .rva .LSEH_begin_rsaz_512_mul + .rva .LSEH_end_rsaz_512_mul + .rva .LSEH_info_rsaz_512_mul + + .rva .LSEH_begin_rsaz_512_mul_gather4 + .rva .LSEH_end_rsaz_512_mul_gather4 + .rva .LSEH_info_rsaz_512_mul_gather4 + + .rva .LSEH_begin_rsaz_512_mul_scatter4 + .rva .LSEH_end_rsaz_512_mul_scatter4 + .rva .LSEH_info_rsaz_512_mul_scatter4 + + .rva .LSEH_begin_rsaz_512_mul_by_one + .rva .LSEH_end_rsaz_512_mul_by_one + .rva .LSEH_info_rsaz_512_mul_by_one + +.section .xdata +.align 8 +.LSEH_info_rsaz_512_sqr: + .byte 9,0,0,0 + .rva se_handler + .rva .Lsqr_body,.Lsqr_epilogue # HandlerData[] +.LSEH_info_rsaz_512_mul: + .byte 9,0,0,0 + .rva se_handler + .rva .Lmul_body,.Lmul_epilogue # HandlerData[] +.LSEH_info_rsaz_512_mul_gather4: + .byte 9,0,0,0 + .rva se_handler + .rva .Lmul_gather4_body,.Lmul_gather4_epilogue # HandlerData[] +.LSEH_info_rsaz_512_mul_scatter4: + .byte 9,0,0,0 + .rva se_handler + .rva .Lmul_scatter4_body,.Lmul_scatter4_epilogue # HandlerData[] +.LSEH_info_rsaz_512_mul_by_one: + .byte 9,0,0,0 + .rva se_handler + .rva .Lmul_by_one_body,.Lmul_by_one_epilogue # HandlerData[] +___ +} + +$code =~ s/\`([^\`]*)\`/eval $1/gem; +print $code; +close STDOUT; diff --git a/deps/openssl/openssl/crypto/bn/asm/sparct4-mont.pl b/deps/openssl/openssl/crypto/bn/asm/sparct4-mont.pl new file mode 100755 index 00000000000000..71b45002a42f73 --- /dev/null +++ b/deps/openssl/openssl/crypto/bn/asm/sparct4-mont.pl @@ -0,0 +1,1222 @@ +#!/usr/bin/env perl + +# ==================================================================== +# Written by David S. Miller and Andy Polyakov +# . The module is licensed under 2-clause BSD +# license. November 2012. All rights reserved. +# ==================================================================== + +###################################################################### +# Montgomery squaring-n-multiplication module for SPARC T4. +# +# The module consists of three parts: +# +# 1) collection of "single-op" subroutines that perform single +# operation, Montgomery squaring or multiplication, on 512-, +# 1024-, 1536- and 2048-bit operands; +# 2) collection of "multi-op" subroutines that perform 5 squaring and +# 1 multiplication operations on operands of above lengths; +# 3) fall-back and helper VIS3 subroutines. +# +# RSA sign is dominated by multi-op subroutine, while RSA verify and +# DSA - by single-op. Special note about 4096-bit RSA verify result. +# Operands are too long for dedicated hardware and it's handled by +# VIS3 code, which is why you don't see any improvement. It's surely +# possible to improve it [by deploying 'mpmul' instruction], maybe in +# the future... +# +# Performance improvement. +# +# 64-bit process, VIS3: +# sign verify sign/s verify/s +# rsa 1024 bits 0.000628s 0.000028s 1592.4 35434.4 +# rsa 2048 bits 0.003282s 0.000106s 304.7 9438.3 +# rsa 4096 bits 0.025866s 0.000340s 38.7 2940.9 +# dsa 1024 bits 0.000301s 0.000332s 3323.7 3013.9 +# dsa 2048 bits 0.001056s 0.001233s 946.9 810.8 +# +# 64-bit process, this module: +# sign verify sign/s verify/s +# rsa 1024 bits 0.000256s 0.000016s 3904.4 61411.9 +# rsa 2048 bits 0.000946s 0.000029s 1056.8 34292.7 +# rsa 4096 bits 0.005061s 0.000340s 197.6 2940.5 +# dsa 1024 bits 0.000176s 0.000195s 5674.7 5130.5 +# dsa 2048 bits 0.000296s 0.000354s 3383.2 2827.6 +# +###################################################################### +# 32-bit process, VIS3: +# sign verify sign/s verify/s +# rsa 1024 bits 0.000665s 0.000028s 1504.8 35233.3 +# rsa 2048 bits 0.003349s 0.000106s 298.6 9433.4 +# rsa 4096 bits 0.025959s 0.000341s 38.5 2934.8 +# dsa 1024 bits 0.000320s 0.000341s 3123.3 2929.6 +# dsa 2048 bits 0.001101s 0.001260s 908.2 793.4 +# +# 32-bit process, this module: +# sign verify sign/s verify/s +# rsa 1024 bits 0.000301s 0.000017s 3317.1 60240.0 +# rsa 2048 bits 0.001034s 0.000030s 966.9 33812.7 +# rsa 4096 bits 0.005244s 0.000341s 190.7 2935.4 +# dsa 1024 bits 0.000201s 0.000205s 4976.1 4879.2 +# dsa 2048 bits 0.000328s 0.000360s 3051.1 2774.2 +# +# 32-bit code is prone to performance degradation as interrupt rate +# dispatched to CPU executing the code grows. This is because in +# standard process of handling interrupt in 32-bit process context +# upper halves of most integer registers used as input or output are +# zeroed. This renders result invalid, and operation has to be re-run. +# If CPU is "bothered" with timer interrupts only, the penalty is +# hardly measurable. But in order to mitigate this problem for higher +# interrupt rates contemporary Linux kernel recognizes biased stack +# even in 32-bit process context and preserves full register contents. +# See http://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/commit/?id=517ffce4e1a03aea979fe3a18a3dd1761a24fafb +# for details. + +$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; +push(@INC,"${dir}","${dir}../../perlasm"); +require "sparcv9_modes.pl"; + +$code.=<<___; +#include "sparc_arch.h" + +#ifdef __arch64__ +.register %g2,#scratch +.register %g3,#scratch +#endif + +.section ".text",#alloc,#execinstr + +#ifdef __PIC__ +SPARC_PIC_THUNK(%g1) +#endif +___ + +######################################################################## +# Register layout for mont[mul|sqr] instructions. +# For details see "Oracle SPARC Architecture 2011" manual at +# http://www.oracle.com/technetwork/server-storage/sun-sparc-enterprise/documentation/. +# +my @R=map("%f".2*$_,(0..11,30,31,12..29)); +my @N=(map("%l$_",(0..7)),map("%o$_",(0..5))); @N=(@N,@N,@N[0..3]); +my @A=(@N[0..13],@R[14..31]); +my @B=(map("%i$_",(0..5)),map("%l$_",(0..7))); @B=(@B,@B,map("%o$_",(0..3))); + +######################################################################## +# int bn_mul_mont_t4_$NUM(u64 *rp,const u64 *ap,const u64 *bp, +# const u64 *np,const BN_ULONG *n0); +# +sub generate_bn_mul_mont_t4() { +my $NUM=shift; +my ($rp,$ap,$bp,$np,$sentinel)=map("%g$_",(1..5)); + +$code.=<<___; +.globl bn_mul_mont_t4_$NUM +.align 32 +bn_mul_mont_t4_$NUM: +#ifdef __arch64__ + mov 0,$sentinel + mov -128,%g4 +#elif defined(SPARCV9_64BIT_STACK) + SPARC_LOAD_ADDRESS_LEAF(OPENSSL_sparcv9cap_P,%g1,%g5) + ld [%g1+0],%g1 ! OPENSSL_sparcv9_P[0] + mov -2047,%g4 + and %g1,SPARCV9_64BIT_STACK,%g1 + movrz %g1,0,%g4 + mov -1,$sentinel + add %g4,-128,%g4 +#else + mov -1,$sentinel + mov -128,%g4 +#endif + sllx $sentinel,32,$sentinel + save %sp,%g4,%sp +#ifndef __arch64__ + save %sp,-128,%sp ! warm it up + save %sp,-128,%sp + save %sp,-128,%sp + save %sp,-128,%sp + save %sp,-128,%sp + save %sp,-128,%sp + restore + restore + restore + restore + restore + restore +#endif + and %sp,1,%g4 + or $sentinel,%fp,%fp + or %g4,$sentinel,$sentinel + + ! copy arguments to global registers + mov %i0,$rp + mov %i1,$ap + mov %i2,$bp + mov %i3,$np + ld [%i4+0],%f1 ! load *n0 + ld [%i4+4],%f0 + fsrc2 %f0,%f60 +___ + +# load ap[$NUM] ######################################################## +$code.=<<___; + save %sp,-128,%sp; or $sentinel,%fp,%fp +___ +for($i=0; $i<14 && $i<$NUM; $i++) { +my $lo=$i<13?@A[$i+1]:"%o7"; +$code.=<<___; + ld [$ap+$i*8+0],$lo + ld [$ap+$i*8+4],@A[$i] + sllx @A[$i],32,@A[$i] + or $lo,@A[$i],@A[$i] +___ +} +for(; $i<$NUM; $i++) { +my ($hi,$lo)=("%f".2*($i%4),"%f".(2*($i%4)+1)); +$code.=<<___; + ld [$ap+$i*8+0],$lo + ld [$ap+$i*8+4],$hi + fsrc2 $hi,@A[$i] +___ +} +# load np[$NUM] ######################################################## +$code.=<<___; + save %sp,-128,%sp; or $sentinel,%fp,%fp +___ +for($i=0; $i<14 && $i<$NUM; $i++) { +my $lo=$i<13?@N[$i+1]:"%o7"; +$code.=<<___; + ld [$np+$i*8+0],$lo + ld [$np+$i*8+4],@N[$i] + sllx @N[$i],32,@N[$i] + or $lo,@N[$i],@N[$i] +___ +} +$code.=<<___; + save %sp,-128,%sp; or $sentinel,%fp,%fp +___ +for(; $i<28 && $i<$NUM; $i++) { +my $lo=$i<27?@N[$i+1]:"%o7"; +$code.=<<___; + ld [$np+$i*8+0],$lo + ld [$np+$i*8+4],@N[$i] + sllx @N[$i],32,@N[$i] + or $lo,@N[$i],@N[$i] +___ +} +$code.=<<___; + save %sp,-128,%sp; or $sentinel,%fp,%fp +___ +for(; $i<$NUM; $i++) { +my $lo=($i<$NUM-1)?@N[$i+1]:"%o7"; +$code.=<<___; + ld [$np+$i*8+0],$lo + ld [$np+$i*8+4],@N[$i] + sllx @N[$i],32,@N[$i] + or $lo,@N[$i],@N[$i] +___ +} +$code.=<<___; + cmp $ap,$bp + be SIZE_T_CC,.Lmsquare_$NUM + nop +___ + +# load bp[$NUM] ######################################################## +$code.=<<___; + save %sp,-128,%sp; or $sentinel,%fp,%fp +___ +for($i=0; $i<14 && $i<$NUM; $i++) { +my $lo=$i<13?@B[$i+1]:"%o7"; +$code.=<<___; + ld [$bp+$i*8+0],$lo + ld [$bp+$i*8+4],@B[$i] + sllx @B[$i],32,@B[$i] + or $lo,@B[$i],@B[$i] +___ +} +$code.=<<___; + save %sp,-128,%sp; or $sentinel,%fp,%fp +___ +for(; $i<$NUM; $i++) { +my $lo=($i<$NUM-1)?@B[$i+1]:"%o7"; +$code.=<<___; + ld [$bp+$i*8+0],$lo + ld [$bp+$i*8+4],@B[$i] + sllx @B[$i],32,@B[$i] + or $lo,@B[$i],@B[$i] +___ +} +# magic ################################################################ +$code.=<<___; + .word 0x81b02920+$NUM-1 ! montmul $NUM-1 +.Lmresume_$NUM: + fbu,pn %fcc3,.Lmabort_$NUM +#ifndef __arch64__ + and %fp,$sentinel,$sentinel + brz,pn $sentinel,.Lmabort_$NUM +#endif + nop +#ifdef __arch64__ + restore + restore + restore + restore + restore +#else + restore; and %fp,$sentinel,$sentinel + restore; and %fp,$sentinel,$sentinel + restore; and %fp,$sentinel,$sentinel + restore; and %fp,$sentinel,$sentinel + brz,pn $sentinel,.Lmabort1_$NUM + restore +#endif +___ + +# save tp[$NUM] ######################################################## +for($i=0; $i<14 && $i<$NUM; $i++) { +$code.=<<___; + movxtod @A[$i],@R[$i] +___ +} +$code.=<<___; +#ifdef __arch64__ + restore +#else + and %fp,$sentinel,$sentinel + restore + and $sentinel,1,%o7 + and %fp,$sentinel,$sentinel + srl %fp,0,%fp ! just in case? + or %o7,$sentinel,$sentinel + brz,a,pn $sentinel,.Lmdone_$NUM + mov 0,%i0 ! return failure +#endif +___ +for($i=0; $i<12 && $i<$NUM; $i++) { +@R[$i] =~ /%f([0-9]+)/; +my $lo = "%f".($1+1); +$code.=<<___; + st $lo,[$rp+$i*8+0] + st @R[$i],[$rp+$i*8+4] +___ +} +for(; $i<$NUM; $i++) { +my ($hi,$lo)=("%f".2*($i%4),"%f".(2*($i%4)+1)); +$code.=<<___; + fsrc2 @R[$i],$hi + st $lo,[$rp+$i*8+0] + st $hi,[$rp+$i*8+4] +___ +} +$code.=<<___; + mov 1,%i0 ! return success +.Lmdone_$NUM: + ret + restore + +.Lmabort_$NUM: + restore + restore + restore + restore + restore +.Lmabort1_$NUM: + restore + + mov 0,%i0 ! return failure + ret + restore + +.align 32 +.Lmsquare_$NUM: + save %sp,-128,%sp; or $sentinel,%fp,%fp + save %sp,-128,%sp; or $sentinel,%fp,%fp + .word 0x81b02940+$NUM-1 ! montsqr $NUM-1 + ba .Lmresume_$NUM + nop +.type bn_mul_mont_t4_$NUM, #function +.size bn_mul_mont_t4_$NUM, .-bn_mul_mont_t4_$NUM +___ +} + +for ($i=8;$i<=32;$i+=8) { + &generate_bn_mul_mont_t4($i); +} + +######################################################################## +# +sub load_ccr { +my ($ptbl,$pwr,$ccr,$skip_wr)=@_; +$code.=<<___; + srl $pwr, 2, %o4 + and $pwr, 3, %o5 + and %o4, 7, %o4 + sll %o5, 3, %o5 ! offset within first cache line + add %o5, $ptbl, $ptbl ! of the pwrtbl + or %g0, 1, %o5 + sll %o5, %o4, $ccr +___ +$code.=<<___ if (!$skip_wr); + wr $ccr, %g0, %ccr +___ +} +sub load_b_pair { +my ($pwrtbl,$B0,$B1)=@_; + +$code.=<<___; + ldx [$pwrtbl+0*32], $B0 + ldx [$pwrtbl+8*32], $B1 + ldx [$pwrtbl+1*32], %o4 + ldx [$pwrtbl+9*32], %o5 + movvs %icc, %o4, $B0 + ldx [$pwrtbl+2*32], %o4 + movvs %icc, %o5, $B1 + ldx [$pwrtbl+10*32],%o5 + move %icc, %o4, $B0 + ldx [$pwrtbl+3*32], %o4 + move %icc, %o5, $B1 + ldx [$pwrtbl+11*32],%o5 + movneg %icc, %o4, $B0 + ldx [$pwrtbl+4*32], %o4 + movneg %icc, %o5, $B1 + ldx [$pwrtbl+12*32],%o5 + movcs %xcc, %o4, $B0 + ldx [$pwrtbl+5*32],%o4 + movcs %xcc, %o5, $B1 + ldx [$pwrtbl+13*32],%o5 + movvs %xcc, %o4, $B0 + ldx [$pwrtbl+6*32], %o4 + movvs %xcc, %o5, $B1 + ldx [$pwrtbl+14*32],%o5 + move %xcc, %o4, $B0 + ldx [$pwrtbl+7*32], %o4 + move %xcc, %o5, $B1 + ldx [$pwrtbl+15*32],%o5 + movneg %xcc, %o4, $B0 + add $pwrtbl,16*32, $pwrtbl + movneg %xcc, %o5, $B1 +___ +} +sub load_b { +my ($pwrtbl,$Bi)=@_; + +$code.=<<___; + ldx [$pwrtbl+0*32], $Bi + ldx [$pwrtbl+1*32], %o4 + ldx [$pwrtbl+2*32], %o5 + movvs %icc, %o4, $Bi + ldx [$pwrtbl+3*32], %o4 + move %icc, %o5, $Bi + ldx [$pwrtbl+4*32], %o5 + movneg %icc, %o4, $Bi + ldx [$pwrtbl+5*32], %o4 + movcs %xcc, %o5, $Bi + ldx [$pwrtbl+6*32], %o5 + movvs %xcc, %o4, $Bi + ldx [$pwrtbl+7*32], %o4 + move %xcc, %o5, $Bi + add $pwrtbl,8*32, $pwrtbl + movneg %xcc, %o4, $Bi +___ +} + +######################################################################## +# int bn_pwr5_mont_t4_$NUM(u64 *tp,const u64 *np,const BN_ULONG *n0, +# const u64 *pwrtbl,int pwr,int stride); +# +sub generate_bn_pwr5_mont_t4() { +my $NUM=shift; +my ($tp,$np,$pwrtbl,$pwr,$sentinel)=map("%g$_",(1..5)); + +$code.=<<___; +.globl bn_pwr5_mont_t4_$NUM +.align 32 +bn_pwr5_mont_t4_$NUM: +#ifdef __arch64__ + mov 0,$sentinel + mov -128,%g4 +#elif defined(SPARCV9_64BIT_STACK) + SPARC_LOAD_ADDRESS_LEAF(OPENSSL_sparcv9cap_P,%g1,%g5) + ld [%g1+0],%g1 ! OPENSSL_sparcv9_P[0] + mov -2047,%g4 + and %g1,SPARCV9_64BIT_STACK,%g1 + movrz %g1,0,%g4 + mov -1,$sentinel + add %g4,-128,%g4 +#else + mov -1,$sentinel + mov -128,%g4 +#endif + sllx $sentinel,32,$sentinel + save %sp,%g4,%sp +#ifndef __arch64__ + save %sp,-128,%sp ! warm it up + save %sp,-128,%sp + save %sp,-128,%sp + save %sp,-128,%sp + save %sp,-128,%sp + save %sp,-128,%sp + restore + restore + restore + restore + restore + restore +#endif + and %sp,1,%g4 + or $sentinel,%fp,%fp + or %g4,$sentinel,$sentinel + + ! copy arguments to global registers + mov %i0,$tp + mov %i1,$np + ld [%i2+0],%f1 ! load *n0 + ld [%i2+4],%f0 + mov %i3,$pwrtbl + srl %i4,%g0,%i4 ! pack last arguments + sllx %i5,32,$pwr + or %i4,$pwr,$pwr + fsrc2 %f0,%f60 +___ + +# load tp[$NUM] ######################################################## +$code.=<<___; + save %sp,-128,%sp; or $sentinel,%fp,%fp +___ +for($i=0; $i<14 && $i<$NUM; $i++) { +$code.=<<___; + ldx [$tp+$i*8],@A[$i] +___ +} +for(; $i<$NUM; $i++) { +$code.=<<___; + ldd [$tp+$i*8],@A[$i] +___ +} +# load np[$NUM] ######################################################## +$code.=<<___; + save %sp,-128,%sp; or $sentinel,%fp,%fp +___ +for($i=0; $i<14 && $i<$NUM; $i++) { +$code.=<<___; + ldx [$np+$i*8],@N[$i] +___ +} +$code.=<<___; + save %sp,-128,%sp; or $sentinel,%fp,%fp +___ +for(; $i<28 && $i<$NUM; $i++) { +$code.=<<___; + ldx [$np+$i*8],@N[$i] +___ +} +$code.=<<___; + save %sp,-128,%sp; or $sentinel,%fp,%fp +___ +for(; $i<$NUM; $i++) { +$code.=<<___; + ldx [$np+$i*8],@N[$i] +___ +} +# load pwrtbl[pwr] ######################################################## +$code.=<<___; + save %sp,-128,%sp; or $sentinel,%fp,%fp + + srlx $pwr, 32, %o4 ! unpack $pwr + srl $pwr, %g0, %o5 + sub %o4, 5, %o4 + mov $pwrtbl, %o7 + sllx %o4, 32, $pwr ! re-pack $pwr + or %o5, $pwr, $pwr + srl %o5, %o4, %o5 +___ + &load_ccr("%o7","%o5","%o4"); +$code.=<<___; + b .Lstride_$NUM + nop +.align 16 +.Lstride_$NUM: +___ +for($i=0; $i<14 && $i<$NUM; $i+=2) { + &load_b_pair("%o7",@B[$i],@B[$i+1]); +} +$code.=<<___; + save %sp,-128,%sp; or $sentinel,%fp,%fp +___ +for(; $i<$NUM; $i+=2) { + &load_b_pair("%i7",@B[$i],@B[$i+1]); +} +$code.=<<___; + srax $pwr, 32, %o4 ! unpack $pwr + srl $pwr, %g0, %o5 + sub %o4, 5, %o4 + mov $pwrtbl, %i7 + sllx %o4, 32, $pwr ! re-pack $pwr + or %o5, $pwr, $pwr + srl %o5, %o4, %o5 +___ + &load_ccr("%i7","%o5","%o4",1); + +# magic ################################################################ +for($i=0; $i<5; $i++) { +$code.=<<___; + .word 0x81b02940+$NUM-1 ! montsqr $NUM-1 + fbu,pn %fcc3,.Labort_$NUM +#ifndef __arch64__ + and %fp,$sentinel,$sentinel + brz,pn $sentinel,.Labort_$NUM +#endif + nop +___ +} +$code.=<<___; + wr %o4, %g0, %ccr + .word 0x81b02920+$NUM-1 ! montmul $NUM-1 + fbu,pn %fcc3,.Labort_$NUM +#ifndef __arch64__ + and %fp,$sentinel,$sentinel + brz,pn $sentinel,.Labort_$NUM +#endif + + srax $pwr, 32, %o4 +#ifdef __arch64__ + brgez %o4,.Lstride_$NUM + restore + restore + restore + restore + restore +#else + brgez %o4,.Lstride_$NUM + restore; and %fp,$sentinel,$sentinel + restore; and %fp,$sentinel,$sentinel + restore; and %fp,$sentinel,$sentinel + restore; and %fp,$sentinel,$sentinel + brz,pn $sentinel,.Labort1_$NUM + restore +#endif +___ + +# save tp[$NUM] ######################################################## +for($i=0; $i<14 && $i<$NUM; $i++) { +$code.=<<___; + movxtod @A[$i],@R[$i] +___ +} +$code.=<<___; +#ifdef __arch64__ + restore +#else + and %fp,$sentinel,$sentinel + restore + and $sentinel,1,%o7 + and %fp,$sentinel,$sentinel + srl %fp,0,%fp ! just in case? + or %o7,$sentinel,$sentinel + brz,a,pn $sentinel,.Ldone_$NUM + mov 0,%i0 ! return failure +#endif +___ +for($i=0; $i<$NUM; $i++) { +$code.=<<___; + std @R[$i],[$tp+$i*8] +___ +} +$code.=<<___; + mov 1,%i0 ! return success +.Ldone_$NUM: + ret + restore + +.Labort_$NUM: + restore + restore + restore + restore + restore +.Labort1_$NUM: + restore + + mov 0,%i0 ! return failure + ret + restore +.type bn_pwr5_mont_t4_$NUM, #function +.size bn_pwr5_mont_t4_$NUM, .-bn_pwr5_mont_t4_$NUM +___ +} + +for ($i=8;$i<=32;$i+=8) { + &generate_bn_pwr5_mont_t4($i); +} + +{ +######################################################################## +# Fall-back subroutines +# +# copy of bn_mul_mont_vis3 adjusted for vectors of 64-bit values +# +($n0,$m0,$m1,$lo0,$hi0, $lo1,$hi1,$aj,$alo,$nj,$nlo,$tj)= + (map("%g$_",(1..5)),map("%o$_",(0..5,7))); + +# int bn_mul_mont( +$rp="%o0"; # u64 *rp, +$ap="%o1"; # const u64 *ap, +$bp="%o2"; # const u64 *bp, +$np="%o3"; # const u64 *np, +$n0p="%o4"; # const BN_ULONG *n0, +$num="%o5"; # int num); # caller ensures that num is >=3 +$code.=<<___; +.globl bn_mul_mont_t4 +.align 32 +bn_mul_mont_t4: + add %sp, STACK_BIAS, %g4 ! real top of stack + sll $num, 3, $num ! size in bytes + add $num, 63, %g1 + andn %g1, 63, %g1 ! buffer size rounded up to 64 bytes + sub %g4, %g1, %g1 + andn %g1, 63, %g1 ! align at 64 byte + sub %g1, STACK_FRAME, %g1 ! new top of stack + sub %g1, %g4, %g1 + + save %sp, %g1, %sp +___ +# +-------------------------------+<----- %sp +# . . +# +-------------------------------+<----- aligned at 64 bytes +# | __int64 tmp[0] | +# +-------------------------------+ +# . . +# . . +# +-------------------------------+<----- aligned at 64 bytes +# . . +($rp,$ap,$bp,$np,$n0p,$num)=map("%i$_",(0..5)); +($t0,$t1,$t2,$t3,$cnt,$tp,$bufsz)=map("%l$_",(0..7)); +($ovf,$i)=($t0,$t1); +$code.=<<___; + ld [$n0p+0], $t0 ! pull n0[0..1] value + ld [$n0p+4], $t1 + add %sp, STACK_BIAS+STACK_FRAME, $tp + ldx [$bp+0], $m0 ! m0=bp[0] + sllx $t1, 32, $n0 + add $bp, 8, $bp + or $t0, $n0, $n0 + + ldx [$ap+0], $aj ! ap[0] + + mulx $aj, $m0, $lo0 ! ap[0]*bp[0] + umulxhi $aj, $m0, $hi0 + + ldx [$ap+8], $aj ! ap[1] + add $ap, 16, $ap + ldx [$np+0], $nj ! np[0] + + mulx $lo0, $n0, $m1 ! "tp[0]"*n0 + + mulx $aj, $m0, $alo ! ap[1]*bp[0] + umulxhi $aj, $m0, $aj ! ahi=aj + + mulx $nj, $m1, $lo1 ! np[0]*m1 + umulxhi $nj, $m1, $hi1 + + ldx [$np+8], $nj ! np[1] + + addcc $lo0, $lo1, $lo1 + add $np, 16, $np + addxc %g0, $hi1, $hi1 + + mulx $nj, $m1, $nlo ! np[1]*m1 + umulxhi $nj, $m1, $nj ! nhi=nj + + ba .L1st + sub $num, 24, $cnt ! cnt=num-3 + +.align 16 +.L1st: + addcc $alo, $hi0, $lo0 + addxc $aj, %g0, $hi0 + + ldx [$ap+0], $aj ! ap[j] + addcc $nlo, $hi1, $lo1 + add $ap, 8, $ap + addxc $nj, %g0, $hi1 ! nhi=nj + + ldx [$np+0], $nj ! np[j] + mulx $aj, $m0, $alo ! ap[j]*bp[0] + add $np, 8, $np + umulxhi $aj, $m0, $aj ! ahi=aj + + mulx $nj, $m1, $nlo ! np[j]*m1 + addcc $lo0, $lo1, $lo1 ! np[j]*m1+ap[j]*bp[0] + umulxhi $nj, $m1, $nj ! nhi=nj + addxc %g0, $hi1, $hi1 + stxa $lo1, [$tp]0xe2 ! tp[j-1] + add $tp, 8, $tp ! tp++ + + brnz,pt $cnt, .L1st + sub $cnt, 8, $cnt ! j-- +!.L1st + addcc $alo, $hi0, $lo0 + addxc $aj, %g0, $hi0 ! ahi=aj + + addcc $nlo, $hi1, $lo1 + addxc $nj, %g0, $hi1 + addcc $lo0, $lo1, $lo1 ! np[j]*m1+ap[j]*bp[0] + addxc %g0, $hi1, $hi1 + stxa $lo1, [$tp]0xe2 ! tp[j-1] + add $tp, 8, $tp + + addcc $hi0, $hi1, $hi1 + addxc %g0, %g0, $ovf ! upmost overflow bit + stxa $hi1, [$tp]0xe2 + add $tp, 8, $tp + + ba .Louter + sub $num, 16, $i ! i=num-2 + +.align 16 +.Louter: + ldx [$bp+0], $m0 ! m0=bp[i] + add $bp, 8, $bp + + sub $ap, $num, $ap ! rewind + sub $np, $num, $np + sub $tp, $num, $tp + + ldx [$ap+0], $aj ! ap[0] + ldx [$np+0], $nj ! np[0] + + mulx $aj, $m0, $lo0 ! ap[0]*bp[i] + ldx [$tp], $tj ! tp[0] + umulxhi $aj, $m0, $hi0 + ldx [$ap+8], $aj ! ap[1] + addcc $lo0, $tj, $lo0 ! ap[0]*bp[i]+tp[0] + mulx $aj, $m0, $alo ! ap[1]*bp[i] + addxc %g0, $hi0, $hi0 + mulx $lo0, $n0, $m1 ! tp[0]*n0 + umulxhi $aj, $m0, $aj ! ahi=aj + mulx $nj, $m1, $lo1 ! np[0]*m1 + add $ap, 16, $ap + umulxhi $nj, $m1, $hi1 + ldx [$np+8], $nj ! np[1] + add $np, 16, $np + addcc $lo1, $lo0, $lo1 + mulx $nj, $m1, $nlo ! np[1]*m1 + addxc %g0, $hi1, $hi1 + umulxhi $nj, $m1, $nj ! nhi=nj + + ba .Linner + sub $num, 24, $cnt ! cnt=num-3 +.align 16 +.Linner: + addcc $alo, $hi0, $lo0 + ldx [$tp+8], $tj ! tp[j] + addxc $aj, %g0, $hi0 ! ahi=aj + ldx [$ap+0], $aj ! ap[j] + add $ap, 8, $ap + addcc $nlo, $hi1, $lo1 + mulx $aj, $m0, $alo ! ap[j]*bp[i] + addxc $nj, %g0, $hi1 ! nhi=nj + ldx [$np+0], $nj ! np[j] + add $np, 8, $np + umulxhi $aj, $m0, $aj ! ahi=aj + addcc $lo0, $tj, $lo0 ! ap[j]*bp[i]+tp[j] + mulx $nj, $m1, $nlo ! np[j]*m1 + addxc %g0, $hi0, $hi0 + umulxhi $nj, $m1, $nj ! nhi=nj + addcc $lo1, $lo0, $lo1 ! np[j]*m1+ap[j]*bp[i]+tp[j] + addxc %g0, $hi1, $hi1 + stx $lo1, [$tp] ! tp[j-1] + add $tp, 8, $tp + brnz,pt $cnt, .Linner + sub $cnt, 8, $cnt +!.Linner + ldx [$tp+8], $tj ! tp[j] + addcc $alo, $hi0, $lo0 + addxc $aj, %g0, $hi0 ! ahi=aj + addcc $lo0, $tj, $lo0 ! ap[j]*bp[i]+tp[j] + addxc %g0, $hi0, $hi0 + + addcc $nlo, $hi1, $lo1 + addxc $nj, %g0, $hi1 ! nhi=nj + addcc $lo1, $lo0, $lo1 ! np[j]*m1+ap[j]*bp[i]+tp[j] + addxc %g0, $hi1, $hi1 + stx $lo1, [$tp] ! tp[j-1] + + subcc %g0, $ovf, %g0 ! move upmost overflow to CCR.xcc + addxccc $hi1, $hi0, $hi1 + addxc %g0, %g0, $ovf + stx $hi1, [$tp+8] + add $tp, 16, $tp + + brnz,pt $i, .Louter + sub $i, 8, $i + + sub $ap, $num, $ap ! rewind + sub $np, $num, $np + sub $tp, $num, $tp + ba .Lsub + subcc $num, 8, $cnt ! cnt=num-1 and clear CCR.xcc + +.align 16 +.Lsub: + ldx [$tp], $tj + add $tp, 8, $tp + ldx [$np+0], $nj + add $np, 8, $np + subccc $tj, $nj, $t2 ! tp[j]-np[j] + srlx $tj, 32, $tj + srlx $nj, 32, $nj + subccc $tj, $nj, $t3 + add $rp, 8, $rp + st $t2, [$rp-4] ! reverse order + st $t3, [$rp-8] + brnz,pt $cnt, .Lsub + sub $cnt, 8, $cnt + + sub $np, $num, $np ! rewind + sub $tp, $num, $tp + sub $rp, $num, $rp + + subc $ovf, %g0, $ovf ! handle upmost overflow bit + and $tp, $ovf, $ap + andn $rp, $ovf, $np + or $np, $ap, $ap ! ap=borrow?tp:rp + ba .Lcopy + sub $num, 8, $cnt + +.align 16 +.Lcopy: ! copy or in-place refresh + ldx [$ap+0], $t2 + add $ap, 8, $ap + stx %g0, [$tp] ! zap + add $tp, 8, $tp + stx $t2, [$rp+0] + add $rp, 8, $rp + brnz $cnt, .Lcopy + sub $cnt, 8, $cnt + + mov 1, %o0 + ret + restore +.type bn_mul_mont_t4, #function +.size bn_mul_mont_t4, .-bn_mul_mont_t4 +___ + +# int bn_mul_mont_gather5( +$rp="%o0"; # u64 *rp, +$ap="%o1"; # const u64 *ap, +$bp="%o2"; # const u64 *pwrtbl, +$np="%o3"; # const u64 *np, +$n0p="%o4"; # const BN_ULONG *n0, +$num="%o5"; # int num, # caller ensures that num is >=3 + # int power); +$code.=<<___; +.globl bn_mul_mont_gather5_t4 +.align 32 +bn_mul_mont_gather5_t4: + add %sp, STACK_BIAS, %g4 ! real top of stack + sll $num, 3, $num ! size in bytes + add $num, 63, %g1 + andn %g1, 63, %g1 ! buffer size rounded up to 64 bytes + sub %g4, %g1, %g1 + andn %g1, 63, %g1 ! align at 64 byte + sub %g1, STACK_FRAME, %g1 ! new top of stack + sub %g1, %g4, %g1 + LDPTR [%sp+STACK_7thARG], %g4 ! load power, 7th argument + + save %sp, %g1, %sp +___ +# +-------------------------------+<----- %sp +# . . +# +-------------------------------+<----- aligned at 64 bytes +# | __int64 tmp[0] | +# +-------------------------------+ +# . . +# . . +# +-------------------------------+<----- aligned at 64 bytes +# . . +($rp,$ap,$bp,$np,$n0p,$num)=map("%i$_",(0..5)); +($t0,$t1,$t2,$t3,$cnt,$tp,$bufsz,$ccr)=map("%l$_",(0..7)); +($ovf,$i)=($t0,$t1); + &load_ccr($bp,"%g4",$ccr); + &load_b($bp,$m0,"%o7"); # m0=bp[0] + +$code.=<<___; + ld [$n0p+0], $t0 ! pull n0[0..1] value + ld [$n0p+4], $t1 + add %sp, STACK_BIAS+STACK_FRAME, $tp + sllx $t1, 32, $n0 + or $t0, $n0, $n0 + + ldx [$ap+0], $aj ! ap[0] + + mulx $aj, $m0, $lo0 ! ap[0]*bp[0] + umulxhi $aj, $m0, $hi0 + + ldx [$ap+8], $aj ! ap[1] + add $ap, 16, $ap + ldx [$np+0], $nj ! np[0] + + mulx $lo0, $n0, $m1 ! "tp[0]"*n0 + + mulx $aj, $m0, $alo ! ap[1]*bp[0] + umulxhi $aj, $m0, $aj ! ahi=aj + + mulx $nj, $m1, $lo1 ! np[0]*m1 + umulxhi $nj, $m1, $hi1 + + ldx [$np+8], $nj ! np[1] + + addcc $lo0, $lo1, $lo1 + add $np, 16, $np + addxc %g0, $hi1, $hi1 + + mulx $nj, $m1, $nlo ! np[1]*m1 + umulxhi $nj, $m1, $nj ! nhi=nj + + ba .L1st_g5 + sub $num, 24, $cnt ! cnt=num-3 + +.align 16 +.L1st_g5: + addcc $alo, $hi0, $lo0 + addxc $aj, %g0, $hi0 + + ldx [$ap+0], $aj ! ap[j] + addcc $nlo, $hi1, $lo1 + add $ap, 8, $ap + addxc $nj, %g0, $hi1 ! nhi=nj + + ldx [$np+0], $nj ! np[j] + mulx $aj, $m0, $alo ! ap[j]*bp[0] + add $np, 8, $np + umulxhi $aj, $m0, $aj ! ahi=aj + + mulx $nj, $m1, $nlo ! np[j]*m1 + addcc $lo0, $lo1, $lo1 ! np[j]*m1+ap[j]*bp[0] + umulxhi $nj, $m1, $nj ! nhi=nj + addxc %g0, $hi1, $hi1 + stxa $lo1, [$tp]0xe2 ! tp[j-1] + add $tp, 8, $tp ! tp++ + + brnz,pt $cnt, .L1st_g5 + sub $cnt, 8, $cnt ! j-- +!.L1st_g5 + addcc $alo, $hi0, $lo0 + addxc $aj, %g0, $hi0 ! ahi=aj + + addcc $nlo, $hi1, $lo1 + addxc $nj, %g0, $hi1 + addcc $lo0, $lo1, $lo1 ! np[j]*m1+ap[j]*bp[0] + addxc %g0, $hi1, $hi1 + stxa $lo1, [$tp]0xe2 ! tp[j-1] + add $tp, 8, $tp + + addcc $hi0, $hi1, $hi1 + addxc %g0, %g0, $ovf ! upmost overflow bit + stxa $hi1, [$tp]0xe2 + add $tp, 8, $tp + + ba .Louter_g5 + sub $num, 16, $i ! i=num-2 + +.align 16 +.Louter_g5: + wr $ccr, %g0, %ccr +___ + &load_b($bp,$m0); # m0=bp[i] +$code.=<<___; + sub $ap, $num, $ap ! rewind + sub $np, $num, $np + sub $tp, $num, $tp + + ldx [$ap+0], $aj ! ap[0] + ldx [$np+0], $nj ! np[0] + + mulx $aj, $m0, $lo0 ! ap[0]*bp[i] + ldx [$tp], $tj ! tp[0] + umulxhi $aj, $m0, $hi0 + ldx [$ap+8], $aj ! ap[1] + addcc $lo0, $tj, $lo0 ! ap[0]*bp[i]+tp[0] + mulx $aj, $m0, $alo ! ap[1]*bp[i] + addxc %g0, $hi0, $hi0 + mulx $lo0, $n0, $m1 ! tp[0]*n0 + umulxhi $aj, $m0, $aj ! ahi=aj + mulx $nj, $m1, $lo1 ! np[0]*m1 + add $ap, 16, $ap + umulxhi $nj, $m1, $hi1 + ldx [$np+8], $nj ! np[1] + add $np, 16, $np + addcc $lo1, $lo0, $lo1 + mulx $nj, $m1, $nlo ! np[1]*m1 + addxc %g0, $hi1, $hi1 + umulxhi $nj, $m1, $nj ! nhi=nj + + ba .Linner_g5 + sub $num, 24, $cnt ! cnt=num-3 +.align 16 +.Linner_g5: + addcc $alo, $hi0, $lo0 + ldx [$tp+8], $tj ! tp[j] + addxc $aj, %g0, $hi0 ! ahi=aj + ldx [$ap+0], $aj ! ap[j] + add $ap, 8, $ap + addcc $nlo, $hi1, $lo1 + mulx $aj, $m0, $alo ! ap[j]*bp[i] + addxc $nj, %g0, $hi1 ! nhi=nj + ldx [$np+0], $nj ! np[j] + add $np, 8, $np + umulxhi $aj, $m0, $aj ! ahi=aj + addcc $lo0, $tj, $lo0 ! ap[j]*bp[i]+tp[j] + mulx $nj, $m1, $nlo ! np[j]*m1 + addxc %g0, $hi0, $hi0 + umulxhi $nj, $m1, $nj ! nhi=nj + addcc $lo1, $lo0, $lo1 ! np[j]*m1+ap[j]*bp[i]+tp[j] + addxc %g0, $hi1, $hi1 + stx $lo1, [$tp] ! tp[j-1] + add $tp, 8, $tp + brnz,pt $cnt, .Linner_g5 + sub $cnt, 8, $cnt +!.Linner_g5 + ldx [$tp+8], $tj ! tp[j] + addcc $alo, $hi0, $lo0 + addxc $aj, %g0, $hi0 ! ahi=aj + addcc $lo0, $tj, $lo0 ! ap[j]*bp[i]+tp[j] + addxc %g0, $hi0, $hi0 + + addcc $nlo, $hi1, $lo1 + addxc $nj, %g0, $hi1 ! nhi=nj + addcc $lo1, $lo0, $lo1 ! np[j]*m1+ap[j]*bp[i]+tp[j] + addxc %g0, $hi1, $hi1 + stx $lo1, [$tp] ! tp[j-1] + + subcc %g0, $ovf, %g0 ! move upmost overflow to CCR.xcc + addxccc $hi1, $hi0, $hi1 + addxc %g0, %g0, $ovf + stx $hi1, [$tp+8] + add $tp, 16, $tp + + brnz,pt $i, .Louter_g5 + sub $i, 8, $i + + sub $ap, $num, $ap ! rewind + sub $np, $num, $np + sub $tp, $num, $tp + ba .Lsub_g5 + subcc $num, 8, $cnt ! cnt=num-1 and clear CCR.xcc + +.align 16 +.Lsub_g5: + ldx [$tp], $tj + add $tp, 8, $tp + ldx [$np+0], $nj + add $np, 8, $np + subccc $tj, $nj, $t2 ! tp[j]-np[j] + srlx $tj, 32, $tj + srlx $nj, 32, $nj + subccc $tj, $nj, $t3 + add $rp, 8, $rp + st $t2, [$rp-4] ! reverse order + st $t3, [$rp-8] + brnz,pt $cnt, .Lsub_g5 + sub $cnt, 8, $cnt + + sub $np, $num, $np ! rewind + sub $tp, $num, $tp + sub $rp, $num, $rp + + subc $ovf, %g0, $ovf ! handle upmost overflow bit + and $tp, $ovf, $ap + andn $rp, $ovf, $np + or $np, $ap, $ap ! ap=borrow?tp:rp + ba .Lcopy_g5 + sub $num, 8, $cnt + +.align 16 +.Lcopy_g5: ! copy or in-place refresh + ldx [$ap+0], $t2 + add $ap, 8, $ap + stx %g0, [$tp] ! zap + add $tp, 8, $tp + stx $t2, [$rp+0] + add $rp, 8, $rp + brnz $cnt, .Lcopy_g5 + sub $cnt, 8, $cnt + + mov 1, %o0 + ret + restore +.type bn_mul_mont_gather5_t4, #function +.size bn_mul_mont_gather5_t4, .-bn_mul_mont_gather5_t4 +___ +} + +$code.=<<___; +.globl bn_flip_t4 +.align 32 +bn_flip_t4: +.Loop_flip: + ld [%o1+0], %o4 + sub %o2, 1, %o2 + ld [%o1+4], %o5 + add %o1, 8, %o1 + st %o5, [%o0+0] + st %o4, [%o0+4] + brnz %o2, .Loop_flip + add %o0, 8, %o0 + retl + nop +.type bn_flip_t4, #function +.size bn_flip_t4, .-bn_flip_t4 + +.globl bn_flip_n_scatter5_t4 +.align 32 +bn_flip_n_scatter5_t4: + sll %o3, 3, %o3 + srl %o1, 1, %o1 + add %o3, %o2, %o2 ! &pwrtbl[pwr] + sub %o1, 1, %o1 +.Loop_flip_n_scatter5: + ld [%o0+0], %o4 ! inp[i] + ld [%o0+4], %o5 + add %o0, 8, %o0 + sllx %o5, 32, %o5 + or %o4, %o5, %o5 + stx %o5, [%o2] + add %o2, 32*8, %o2 + brnz %o1, .Loop_flip_n_scatter5 + sub %o1, 1, %o1 + retl + nop +.type bn_flip_n_scatter5_t4, #function +.size bn_flip_n_scatter5_t4, .-bn_flip_n_scatter5_t4 + +.globl bn_gather5_t4 +.align 32 +bn_gather5_t4: +___ + &load_ccr("%o2","%o3","%g1"); +$code.=<<___; + sub %o1, 1, %o1 +.Loop_gather5: +___ + &load_b("%o2","%g1"); +$code.=<<___; + stx %g1, [%o0] + add %o0, 8, %o0 + brnz %o1, .Loop_gather5 + sub %o1, 1, %o1 + + retl + nop +.type bn_gather5_t4, #function +.size bn_gather5_t4, .-bn_gather5_t4 + +.asciz "Montgomery Multiplication for SPARC T4, David S. Miller, Andy Polyakov" +.align 4 +___ + +&emit_assembler(); + +close STDOUT; diff --git a/deps/openssl/openssl/crypto/bn/asm/sparcv9-gf2m.pl b/deps/openssl/openssl/crypto/bn/asm/sparcv9-gf2m.pl new file mode 100644 index 00000000000000..ab94cd917c41a0 --- /dev/null +++ b/deps/openssl/openssl/crypto/bn/asm/sparcv9-gf2m.pl @@ -0,0 +1,190 @@ +#!/usr/bin/env perl +# +# ==================================================================== +# Written by Andy Polyakov for the OpenSSL +# project. The module is, however, dual licensed under OpenSSL and +# CRYPTOGAMS licenses depending on where you obtain it. For further +# details see http://www.openssl.org/~appro/cryptogams/. +# ==================================================================== +# +# October 2012 +# +# The module implements bn_GF2m_mul_2x2 polynomial multiplication used +# in bn_gf2m.c. It's kind of low-hanging mechanical port from C for +# the time being... Except that it has two code paths: one suitable +# for all SPARCv9 processors and one for VIS3-capable ones. Former +# delivers ~25-45% more, more for longer keys, heaviest DH and DSA +# verify operations on venerable UltraSPARC II. On T4 VIS3 code is +# ~100-230% faster than gcc-generated code and ~35-90% faster than +# the pure SPARCv9 code path. + +$locals=16*8; + +$tab="%l0"; + +@T=("%g2","%g3"); +@i=("%g4","%g5"); + +($a1,$a2,$a4,$a8,$a12,$a48)=map("%o$_",(0..5)); +($lo,$hi,$b)=("%g1",$a8,"%o7"); $a=$lo; + +$code.=<<___; +#include + +#ifdef __arch64__ +.register %g2,#scratch +.register %g3,#scratch +#endif + +#ifdef __PIC__ +SPARC_PIC_THUNK(%g1) +#endif + +.globl bn_GF2m_mul_2x2 +.align 16 +bn_GF2m_mul_2x2: + SPARC_LOAD_ADDRESS_LEAF(OPENSSL_sparcv9cap_P,%g1,%g5) + ld [%g1+0],%g1 ! OPENSSL_sparcv9cap_P[0] + + andcc %g1, SPARCV9_VIS3, %g0 + bz,pn %icc,.Lsoftware + nop + + sllx %o1, 32, %o1 + sllx %o3, 32, %o3 + or %o2, %o1, %o1 + or %o4, %o3, %o3 + .word 0x95b262ab ! xmulx %o1, %o3, %o2 + .word 0x99b262cb ! xmulxhi %o1, %o3, %o4 + srlx %o2, 32, %o1 ! 13 cycles later + st %o2, [%o0+0] + st %o1, [%o0+4] + srlx %o4, 32, %o3 + st %o4, [%o0+8] + retl + st %o3, [%o0+12] + +.align 16 +.Lsoftware: + save %sp,-STACK_FRAME-$locals,%sp + + sllx %i1,32,$a + mov -1,$a12 + sllx %i3,32,$b + or %i2,$a,$a + srlx $a12,1,$a48 ! 0x7fff... + or %i4,$b,$b + srlx $a12,2,$a12 ! 0x3fff... + add %sp,STACK_BIAS+STACK_FRAME,$tab + + sllx $a,2,$a4 + mov $a,$a1 + sllx $a,1,$a2 + + srax $a4,63,@i[1] ! broadcast 61st bit + and $a48,$a4,$a4 ! (a<<2)&0x7fff... + srlx $a48,2,$a48 + srax $a2,63,@i[0] ! broadcast 62nd bit + and $a12,$a2,$a2 ! (a<<1)&0x3fff... + srax $a1,63,$lo ! broadcast 63rd bit + and $a48,$a1,$a1 ! (a<<0)&0x1fff... + + sllx $a1,3,$a8 + and $b,$lo,$lo + and $b,@i[0],@i[0] + and $b,@i[1],@i[1] + + stx %g0,[$tab+0*8] ! tab[0]=0 + xor $a1,$a2,$a12 + stx $a1,[$tab+1*8] ! tab[1]=a1 + stx $a2,[$tab+2*8] ! tab[2]=a2 + xor $a4,$a8,$a48 + stx $a12,[$tab+3*8] ! tab[3]=a1^a2 + xor $a4,$a1,$a1 + + stx $a4,[$tab+4*8] ! tab[4]=a4 + xor $a4,$a2,$a2 + stx $a1,[$tab+5*8] ! tab[5]=a1^a4 + xor $a4,$a12,$a12 + stx $a2,[$tab+6*8] ! tab[6]=a2^a4 + xor $a48,$a1,$a1 + stx $a12,[$tab+7*8] ! tab[7]=a1^a2^a4 + xor $a48,$a2,$a2 + + stx $a8,[$tab+8*8] ! tab[8]=a8 + xor $a48,$a12,$a12 + stx $a1,[$tab+9*8] ! tab[9]=a1^a8 + xor $a4,$a1,$a1 + stx $a2,[$tab+10*8] ! tab[10]=a2^a8 + xor $a4,$a2,$a2 + stx $a12,[$tab+11*8] ! tab[11]=a1^a2^a8 + + xor $a4,$a12,$a12 + stx $a48,[$tab+12*8] ! tab[12]=a4^a8 + srlx $lo,1,$hi + stx $a1,[$tab+13*8] ! tab[13]=a1^a4^a8 + sllx $lo,63,$lo + stx $a2,[$tab+14*8] ! tab[14]=a2^a4^a8 + srlx @i[0],2,@T[0] + stx $a12,[$tab+15*8] ! tab[15]=a1^a2^a4^a8 + + sllx @i[0],62,$a1 + sllx $b,3,@i[0] + srlx @i[1],3,@T[1] + and @i[0],`0xf<<3`,@i[0] + sllx @i[1],61,$a2 + ldx [$tab+@i[0]],@i[0] + srlx $b,4-3,@i[1] + xor @T[0],$hi,$hi + and @i[1],`0xf<<3`,@i[1] + xor $a1,$lo,$lo + ldx [$tab+@i[1]],@i[1] + xor @T[1],$hi,$hi + + xor @i[0],$lo,$lo + srlx $b,8-3,@i[0] + xor $a2,$lo,$lo + and @i[0],`0xf<<3`,@i[0] +___ +for($n=1;$n<14;$n++) { +$code.=<<___; + sllx @i[1],`$n*4`,@T[0] + ldx [$tab+@i[0]],@i[0] + srlx @i[1],`64-$n*4`,@T[1] + xor @T[0],$lo,$lo + srlx $b,`($n+2)*4`-3,@i[1] + xor @T[1],$hi,$hi + and @i[1],`0xf<<3`,@i[1] +___ + push(@i,shift(@i)); push(@T,shift(@T)); +} +$code.=<<___; + sllx @i[1],`$n*4`,@T[0] + ldx [$tab+@i[0]],@i[0] + srlx @i[1],`64-$n*4`,@T[1] + xor @T[0],$lo,$lo + + sllx @i[0],`($n+1)*4`,@T[0] + xor @T[1],$hi,$hi + srlx @i[0],`64-($n+1)*4`,@T[1] + xor @T[0],$lo,$lo + xor @T[1],$hi,$hi + + srlx $lo,32,%i1 + st $lo,[%i0+0] + st %i1,[%i0+4] + srlx $hi,32,%i2 + st $hi,[%i0+8] + st %i2,[%i0+12] + + ret + restore +.type bn_GF2m_mul_2x2,#function +.size bn_GF2m_mul_2x2,.-bn_GF2m_mul_2x2 +.asciz "GF(2^m) Multiplication for SPARCv9, CRYPTOGAMS by " +.align 4 +___ + +$code =~ s/\`([^\`]*)\`/eval($1)/gem; +print $code; +close STDOUT; diff --git a/deps/openssl/openssl/crypto/bn/asm/vis3-mont.pl b/deps/openssl/openssl/crypto/bn/asm/vis3-mont.pl new file mode 100644 index 00000000000000..a1357de0e99af4 --- /dev/null +++ b/deps/openssl/openssl/crypto/bn/asm/vis3-mont.pl @@ -0,0 +1,373 @@ +#!/usr/bin/env perl + +# ==================================================================== +# Written by Andy Polyakov for the OpenSSL +# project. The module is, however, dual licensed under OpenSSL and +# CRYPTOGAMS licenses depending on where you obtain it. For further +# details see http://www.openssl.org/~appro/cryptogams/. +# ==================================================================== + +# October 2012. +# +# SPARCv9 VIS3 Montgomery multiplicaion procedure suitable for T3 and +# onward. There are three new instructions used here: umulxhi, +# addxc[cc] and initializing store. On T3 RSA private key operations +# are 1.54/1.87/2.11/2.26 times faster for 512/1024/2048/4096-bit key +# lengths. This is without dedicated squaring procedure. On T4 +# corresponding coefficients are 1.47/2.10/2.80/2.90x, which is mostly +# for reference purposes, because T4 has dedicated Montgomery +# multiplication and squaring *instructions* that deliver even more. + +$bits=32; +for (@ARGV) { $bits=64 if (/\-m64/ || /\-xarch\=v9/); } +if ($bits==64) { $bias=2047; $frame=192; } +else { $bias=0; $frame=112; } + +$code.=<<___ if ($bits==64); +.register %g2,#scratch +.register %g3,#scratch +___ +$code.=<<___; +.section ".text",#alloc,#execinstr +___ + +($n0,$m0,$m1,$lo0,$hi0, $lo1,$hi1,$aj,$alo,$nj,$nlo,$tj)= + (map("%g$_",(1..5)),map("%o$_",(0..5,7))); + +# int bn_mul_mont( +$rp="%o0"; # BN_ULONG *rp, +$ap="%o1"; # const BN_ULONG *ap, +$bp="%o2"; # const BN_ULONG *bp, +$np="%o3"; # const BN_ULONG *np, +$n0p="%o4"; # const BN_ULONG *n0, +$num="%o5"; # int num); # caller ensures that num is even + # and >=6 +$code.=<<___; +.globl bn_mul_mont_vis3 +.align 32 +bn_mul_mont_vis3: + add %sp, $bias, %g4 ! real top of stack + sll $num, 2, $num ! size in bytes + add $num, 63, %g5 + andn %g5, 63, %g5 ! buffer size rounded up to 64 bytes + add %g5, %g5, %g1 + add %g5, %g1, %g1 ! 3*buffer size + sub %g4, %g1, %g1 + andn %g1, 63, %g1 ! align at 64 byte + sub %g1, $frame, %g1 ! new top of stack + sub %g1, %g4, %g1 + + save %sp, %g1, %sp +___ + +# +-------------------------------+<----- %sp +# . . +# +-------------------------------+<----- aligned at 64 bytes +# | __int64 tmp[0] | +# +-------------------------------+ +# . . +# . . +# +-------------------------------+<----- aligned at 64 bytes +# | __int64 ap[1..0] | converted ap[] +# +-------------------------------+ +# | __int64 np[1..0] | converted np[] +# +-------------------------------+ +# | __int64 ap[3..2] | +# . . +# . . +# +-------------------------------+ +($rp,$ap,$bp,$np,$n0p,$num)=map("%i$_",(0..5)); +($t0,$t1,$t2,$t3,$cnt,$tp,$bufsz,$anp)=map("%l$_",(0..7)); +($ovf,$i)=($t0,$t1); +$code.=<<___; + ld [$n0p+0], $t0 ! pull n0[0..1] value + add %sp, $bias+$frame, $tp + ld [$n0p+4], $t1 + add $tp, %g5, $anp + ld [$bp+0], $t2 ! m0=bp[0] + sllx $t1, 32, $n0 + ld [$bp+4], $t3 + or $t0, $n0, $n0 + add $bp, 8, $bp + + ld [$ap+0], $t0 ! ap[0] + sllx $t3, 32, $m0 + ld [$ap+4], $t1 + or $t2, $m0, $m0 + + ld [$ap+8], $t2 ! ap[1] + sllx $t1, 32, $aj + ld [$ap+12], $t3 + or $t0, $aj, $aj + add $ap, 16, $ap + stxa $aj, [$anp]0xe2 ! converted ap[0] + + mulx $aj, $m0, $lo0 ! ap[0]*bp[0] + umulxhi $aj, $m0, $hi0 + + ld [$np+0], $t0 ! np[0] + sllx $t3, 32, $aj + ld [$np+4], $t1 + or $t2, $aj, $aj + + ld [$np+8], $t2 ! np[1] + sllx $t1, 32, $nj + ld [$np+12], $t3 + or $t0, $nj, $nj + add $np, 16, $np + stx $nj, [$anp+8] ! converted np[0] + + mulx $lo0, $n0, $m1 ! "tp[0]"*n0 + stx $aj, [$anp+16] ! converted ap[1] + + mulx $aj, $m0, $alo ! ap[1]*bp[0] + umulxhi $aj, $m0, $aj ! ahi=aj + + mulx $nj, $m1, $lo1 ! np[0]*m1 + umulxhi $nj, $m1, $hi1 + + sllx $t3, 32, $nj + or $t2, $nj, $nj + stx $nj, [$anp+24] ! converted np[1] + add $anp, 32, $anp + + addcc $lo0, $lo1, $lo1 + addxc %g0, $hi1, $hi1 + + mulx $nj, $m1, $nlo ! np[1]*m1 + umulxhi $nj, $m1, $nj ! nhi=nj + + ba .L1st + sub $num, 24, $cnt ! cnt=num-3 + +.align 16 +.L1st: + ld [$ap+0], $t0 ! ap[j] + addcc $alo, $hi0, $lo0 + ld [$ap+4], $t1 + addxc $aj, %g0, $hi0 + + sllx $t1, 32, $aj + add $ap, 8, $ap + or $t0, $aj, $aj + stxa $aj, [$anp]0xe2 ! converted ap[j] + + ld [$np+0], $t2 ! np[j] + addcc $nlo, $hi1, $lo1 + ld [$np+4], $t3 + addxc $nj, %g0, $hi1 ! nhi=nj + + sllx $t3, 32, $nj + add $np, 8, $np + mulx $aj, $m0, $alo ! ap[j]*bp[0] + or $t2, $nj, $nj + umulxhi $aj, $m0, $aj ! ahi=aj + stx $nj, [$anp+8] ! converted np[j] + add $anp, 16, $anp ! anp++ + + mulx $nj, $m1, $nlo ! np[j]*m1 + addcc $lo0, $lo1, $lo1 ! np[j]*m1+ap[j]*bp[0] + umulxhi $nj, $m1, $nj ! nhi=nj + addxc %g0, $hi1, $hi1 + stxa $lo1, [$tp]0xe2 ! tp[j-1] + add $tp, 8, $tp ! tp++ + + brnz,pt $cnt, .L1st + sub $cnt, 8, $cnt ! j-- +!.L1st + addcc $alo, $hi0, $lo0 + addxc $aj, %g0, $hi0 ! ahi=aj + + addcc $nlo, $hi1, $lo1 + addxc $nj, %g0, $hi1 + addcc $lo0, $lo1, $lo1 ! np[j]*m1+ap[j]*bp[0] + addxc %g0, $hi1, $hi1 + stxa $lo1, [$tp]0xe2 ! tp[j-1] + add $tp, 8, $tp + + addcc $hi0, $hi1, $hi1 + addxc %g0, %g0, $ovf ! upmost overflow bit + stxa $hi1, [$tp]0xe2 + add $tp, 8, $tp + + ba .Louter + sub $num, 16, $i ! i=num-2 + +.align 16 +.Louter: + ld [$bp+0], $t2 ! m0=bp[i] + ld [$bp+4], $t3 + + sub $anp, $num, $anp ! rewind + sub $tp, $num, $tp + sub $anp, $num, $anp + + add $bp, 8, $bp + sllx $t3, 32, $m0 + ldx [$anp+0], $aj ! ap[0] + or $t2, $m0, $m0 + ldx [$anp+8], $nj ! np[0] + + mulx $aj, $m0, $lo0 ! ap[0]*bp[i] + ldx [$tp], $tj ! tp[0] + umulxhi $aj, $m0, $hi0 + ldx [$anp+16], $aj ! ap[1] + addcc $lo0, $tj, $lo0 ! ap[0]*bp[i]+tp[0] + mulx $aj, $m0, $alo ! ap[1]*bp[i] + addxc %g0, $hi0, $hi0 + mulx $lo0, $n0, $m1 ! tp[0]*n0 + umulxhi $aj, $m0, $aj ! ahi=aj + mulx $nj, $m1, $lo1 ! np[0]*m1 + umulxhi $nj, $m1, $hi1 + ldx [$anp+24], $nj ! np[1] + add $anp, 32, $anp + addcc $lo1, $lo0, $lo1 + mulx $nj, $m1, $nlo ! np[1]*m1 + addxc %g0, $hi1, $hi1 + umulxhi $nj, $m1, $nj ! nhi=nj + + ba .Linner + sub $num, 24, $cnt ! cnt=num-3 +.align 16 +.Linner: + addcc $alo, $hi0, $lo0 + ldx [$tp+8], $tj ! tp[j] + addxc $aj, %g0, $hi0 ! ahi=aj + ldx [$anp+0], $aj ! ap[j] + addcc $nlo, $hi1, $lo1 + mulx $aj, $m0, $alo ! ap[j]*bp[i] + addxc $nj, %g0, $hi1 ! nhi=nj + ldx [$anp+8], $nj ! np[j] + add $anp, 16, $anp + umulxhi $aj, $m0, $aj ! ahi=aj + addcc $lo0, $tj, $lo0 ! ap[j]*bp[i]+tp[j] + mulx $nj, $m1, $nlo ! np[j]*m1 + addxc %g0, $hi0, $hi0 + umulxhi $nj, $m1, $nj ! nhi=nj + addcc $lo1, $lo0, $lo1 ! np[j]*m1+ap[j]*bp[i]+tp[j] + addxc %g0, $hi1, $hi1 + stx $lo1, [$tp] ! tp[j-1] + add $tp, 8, $tp + brnz,pt $cnt, .Linner + sub $cnt, 8, $cnt +!.Linner + ldx [$tp+8], $tj ! tp[j] + addcc $alo, $hi0, $lo0 + addxc $aj, %g0, $hi0 ! ahi=aj + addcc $lo0, $tj, $lo0 ! ap[j]*bp[i]+tp[j] + addxc %g0, $hi0, $hi0 + + addcc $nlo, $hi1, $lo1 + addxc $nj, %g0, $hi1 ! nhi=nj + addcc $lo1, $lo0, $lo1 ! np[j]*m1+ap[j]*bp[i]+tp[j] + addxc %g0, $hi1, $hi1 + stx $lo1, [$tp] ! tp[j-1] + + subcc %g0, $ovf, %g0 ! move upmost overflow to CCR.xcc + addxccc $hi1, $hi0, $hi1 + addxc %g0, %g0, $ovf + stx $hi1, [$tp+8] + add $tp, 16, $tp + + brnz,pt $i, .Louter + sub $i, 8, $i + + sub $anp, $num, $anp ! rewind + sub $tp, $num, $tp + sub $anp, $num, $anp + ba .Lsub + subcc $num, 8, $cnt ! cnt=num-1 and clear CCR.xcc + +.align 16 +.Lsub: + ldx [$tp], $tj + add $tp, 8, $tp + ldx [$anp+8], $nj + add $anp, 16, $anp + subccc $tj, $nj, $t2 ! tp[j]-np[j] + srlx $tj, 32, $tj + srlx $nj, 32, $nj + subccc $tj, $nj, $t3 + add $rp, 8, $rp + st $t2, [$rp-4] ! reverse order + st $t3, [$rp-8] + brnz,pt $cnt, .Lsub + sub $cnt, 8, $cnt + + sub $anp, $num, $anp ! rewind + sub $tp, $num, $tp + sub $anp, $num, $anp + sub $rp, $num, $rp + + subc $ovf, %g0, $ovf ! handle upmost overflow bit + and $tp, $ovf, $ap + andn $rp, $ovf, $np + or $np, $ap, $ap ! ap=borrow?tp:rp + ba .Lcopy + sub $num, 8, $cnt + +.align 16 +.Lcopy: ! copy or in-place refresh + ld [$ap+0], $t2 + ld [$ap+4], $t3 + add $ap, 8, $ap + stx %g0, [$tp] ! zap + add $tp, 8, $tp + stx %g0, [$anp] ! zap + stx %g0, [$anp+8] + add $anp, 16, $anp + st $t3, [$rp+0] ! flip order + st $t2, [$rp+4] + add $rp, 8, $rp + brnz $cnt, .Lcopy + sub $cnt, 8, $cnt + + mov 1, %o0 + ret + restore +.type bn_mul_mont_vis3, #function +.size bn_mul_mont_vis3, .-bn_mul_mont_vis3 +.asciz "Montgomery Multiplication for SPARCv9 VIS3, CRYPTOGAMS by " +.align 4 +___ + +# Purpose of these subroutines is to explicitly encode VIS instructions, +# so that one can compile the module without having to specify VIS +# extentions on compiler command line, e.g. -xarch=v9 vs. -xarch=v9a. +# Idea is to reserve for option to produce "universal" binary and let +# programmer detect if current CPU is VIS capable at run-time. +sub unvis3 { +my ($mnemonic,$rs1,$rs2,$rd)=@_; +my %bias = ( "g" => 0, "o" => 8, "l" => 16, "i" => 24 ); +my ($ref,$opf); +my %visopf = ( "addxc" => 0x011, + "addxccc" => 0x013, + "umulxhi" => 0x016 ); + + $ref = "$mnemonic\t$rs1,$rs2,$rd"; + + if ($opf=$visopf{$mnemonic}) { + foreach ($rs1,$rs2,$rd) { + return $ref if (!/%([goli])([0-9])/); + $_=$bias{$1}+$2; + } + + return sprintf ".word\t0x%08x !%s", + 0x81b00000|$rd<<25|$rs1<<14|$opf<<5|$rs2, + $ref; + } else { + return $ref; + } +} + +foreach (split("\n",$code)) { + s/\`([^\`]*)\`/eval $1/ge; + + s/\b(umulxhi|addxc[c]{0,2})\s+(%[goli][0-7]),\s*(%[goli][0-7]),\s*(%[goli][0-7])/ + &unvis3($1,$2,$3,$4) + /ge; + + print $_,"\n"; +} + +close STDOUT; diff --git a/deps/openssl/openssl/crypto/bn/asm/x86_64-gcc.c b/deps/openssl/openssl/crypto/bn/asm/x86_64-gcc.c index 9c5074b308586c..d5488866e0cfd4 100644 --- a/deps/openssl/openssl/crypto/bn/asm/x86_64-gcc.c +++ b/deps/openssl/openssl/crypto/bn/asm/x86_64-gcc.c @@ -55,7 +55,7 @@ * machine. */ -# ifdef _WIN64 +# if defined(_WIN64) || !defined(__LP64__) # define BN_ULONG unsigned long long # else # define BN_ULONG unsigned long @@ -63,7 +63,6 @@ # undef mul # undef mul_add -# undef sqr /*- * "m"(a), "+m"(r) is the way to favor DirectPath µ-code; @@ -99,8 +98,8 @@ : "cc"); \ (r)=carry, carry=high; \ } while (0) - -# define sqr(r0,r1,a) \ +# undef sqr +# define sqr(r0,r1,a) \ asm ("mulq %2" \ : "=a"(r0),"=d"(r1) \ : "a"(a) \ @@ -204,20 +203,22 @@ BN_ULONG bn_div_words(BN_ULONG h, BN_ULONG l, BN_ULONG d) BN_ULONG bn_add_words(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, int n) { - BN_ULONG ret = 0, i = 0; + BN_ULONG ret; + size_t i = 0; if (n <= 0) return 0; - asm volatile (" subq %2,%2 \n" + asm volatile (" subq %0,%0 \n" /* clear carry */ + " jmp 1f \n" ".p2align 4 \n" "1: movq (%4,%2,8),%0 \n" " adcq (%5,%2,8),%0 \n" " movq %0,(%3,%2,8) \n" - " leaq 1(%2),%2 \n" + " lea 1(%2),%2 \n" " loop 1b \n" - " sbbq %0,%0 \n":"=&a" (ret), "+c"(n), - "=&r"(i) + " sbbq %0,%0 \n":"=&r" (ret), "+c"(n), + "+r"(i) :"r"(rp), "r"(ap), "r"(bp) :"cc", "memory"); @@ -228,20 +229,22 @@ BN_ULONG bn_add_words(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, BN_ULONG bn_sub_words(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, int n) { - BN_ULONG ret = 0, i = 0; + BN_ULONG ret; + size_t i = 0; if (n <= 0) return 0; - asm volatile (" subq %2,%2 \n" + asm volatile (" subq %0,%0 \n" /* clear borrow */ + " jmp 1f \n" ".p2align 4 \n" "1: movq (%4,%2,8),%0 \n" " sbbq (%5,%2,8),%0 \n" " movq %0,(%3,%2,8) \n" - " leaq 1(%2),%2 \n" + " lea 1(%2),%2 \n" " loop 1b \n" - " sbbq %0,%0 \n":"=&a" (ret), "+c"(n), - "=&r"(i) + " sbbq %0,%0 \n":"=&r" (ret), "+c"(n), + "+r"(i) :"r"(rp), "r"(ap), "r"(bp) :"cc", "memory"); @@ -313,55 +316,58 @@ BN_ULONG bn_sub_words(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n) */ # if 0 /* original macros are kept for reference purposes */ -# define mul_add_c(a,b,c0,c1,c2) { \ - BN_ULONG ta=(a),tb=(b); \ - t1 = ta * tb; \ - t2 = BN_UMULT_HIGH(ta,tb); \ - c0 += t1; t2 += (c0&1` + =~ /GNU assembler version ([2-9]\.[0-9]+)/) { + $addx = ($1>=2.23); +} + +if (!$addx && $win64 && ($flavour =~ /nasm/ || $ENV{ASM} =~ /nasm/) && + `nasm -v 2>&1` =~ /NASM version ([2-9]\.[0-9]+)/) { + $addx = ($1>=2.10); +} + +if (!$addx && $win64 && ($flavour =~ /masm/ || $ENV{ASM} =~ /ml64/) && + `ml64 2>&1` =~ /Version ([0-9]+)\./) { + $addx = ($1>=12); +} + # int bn_mul_mont( $rp="%rdi"; # BN_ULONG *rp, $ap="%rsi"; # const BN_ULONG *ap, @@ -61,6 +86,8 @@ $code=<<___; .text +.extern OPENSSL_ia32cap_P + .globl bn_mul_mont .type bn_mul_mont,\@function,6 .align 16 @@ -69,9 +96,16 @@ jnz .Lmul_enter cmp \$8,${num}d jb .Lmul_enter +___ +$code.=<<___ if ($addx); + mov OPENSSL_ia32cap_P+8(%rip),%r11d +___ +$code.=<<___; cmp $ap,$bp jne .Lmul4x_enter - jmp .Lsqr4x_enter + test \$7,${num}d + jz .Lsqr8x_enter + jmp .Lmul4x_enter .align 16 .Lmul_enter: @@ -227,7 +261,7 @@ lea 1($i),$i # i++ cmp $num,$i - jl .Louter + jb .Louter xor $i,$i # i=0 and clear CF! mov (%rsp),%rax # tp[0] @@ -280,6 +314,13 @@ .align 16 bn_mul4x_mont: .Lmul4x_enter: +___ +$code.=<<___ if ($addx); + and \$0x80100,%r11d + cmp \$0x80100,%r11d + je .Lmulx4x_enter +___ +$code.=<<___; push %rbx push %rbp push %r12 @@ -401,7 +442,7 @@ mov $N[1],-32(%rsp,$j,8) # tp[j-1] mov %rdx,$N[0] cmp $num,$j - jl .L1st4x + jb .L1st4x mulq $m0 # ap[j]*bp[0] add %rax,$A[0] @@ -549,7 +590,7 @@ mov $N[1],-32(%rsp,$j,8) # tp[j-1] mov %rdx,$N[0] cmp $num,$j - jl .Linner4x + jb .Linner4x mulq $m0 # ap[j]*bp[i] add %rax,$A[0] @@ -595,7 +636,7 @@ mov $N[1],(%rsp,$j,8) # store upmost overflow bit cmp $num,$i - jl .Louter4x + jb .Louter4x ___ { my @ri=("%rax","%rdx",$m0,$m1); @@ -688,25 +729,30 @@ }}} {{{ ###################################################################### -# void bn_sqr4x_mont( +# void bn_sqr8x_mont( my $rptr="%rdi"; # const BN_ULONG *rptr, my $aptr="%rsi"; # const BN_ULONG *aptr, my $bptr="%rdx"; # not used my $nptr="%rcx"; # const BN_ULONG *nptr, my $n0 ="%r8"; # const BN_ULONG *n0); -my $num ="%r9"; # int num, has to be divisible by 4 and - # not less than 8 +my $num ="%r9"; # int num, has to be divisible by 8 my ($i,$j,$tptr)=("%rbp","%rcx",$rptr); my @A0=("%r10","%r11"); my @A1=("%r12","%r13"); my ($a0,$a1,$ai)=("%r14","%r15","%rbx"); +$code.=<<___ if ($addx); +.extern bn_sqrx8x_internal # see x86_64-mont5 module +___ $code.=<<___; -.type bn_sqr4x_mont,\@function,6 -.align 16 -bn_sqr4x_mont: -.Lsqr4x_enter: +.extern bn_sqr8x_internal # see x86_64-mont5 module + +.type bn_sqr8x_mont,\@function,6 +.align 32 +bn_sqr8x_mont: +.Lsqr8x_enter: + mov %rsp,%rax push %rbx push %rbp push %r12 @@ -714,787 +760,445 @@ push %r14 push %r15 + mov ${num}d,%r10d shl \$3,${num}d # convert $num to bytes - xor %r10,%r10 - mov %rsp,%r11 # put aside %rsp - sub $num,%r10 # -$num - mov ($n0),$n0 # *n0 - lea -72(%rsp,%r10,2),%rsp # alloca(frame+2*$num) - and \$-1024,%rsp # minimize TLB usage - ############################################################## - # Stack layout - # - # +0 saved $num, used in reduction section - # +8 &t[2*$num], used in reduction section - # +32 saved $rptr - # +40 saved $nptr - # +48 saved *n0 - # +56 saved %rsp - # +64 t[2*$num] - # - mov $rptr,32(%rsp) # save $rptr - mov $nptr,40(%rsp) - mov $n0, 48(%rsp) - mov %r11, 56(%rsp) # save original %rsp -.Lsqr4x_body: + shl \$3+2,%r10 # 4*$num + neg $num + ############################################################## - # Squaring part: - # - # a) multiply-n-add everything but a[i]*a[i]; - # b) shift result of a) by 1 to the left and accumulate - # a[i]*a[i] products; + # ensure that stack frame doesn't alias with $aptr modulo + # 4096. this is done to allow memory disambiguation logic + # do its job. # - lea 32(%r10),$i # $i=-($num-32) - lea ($aptr,$num),$aptr # end of a[] buffer, ($aptr,$i)=&ap[2] - - mov $num,$j # $j=$num - - # comments apply to $num==8 case - mov -32($aptr,$i),$a0 # a[0] - lea 64(%rsp,$num,2),$tptr # end of tp[] buffer, &tp[2*$num] - mov -24($aptr,$i),%rax # a[1] - lea -32($tptr,$i),$tptr # end of tp[] window, &tp[2*$num-"$i"] - mov -16($aptr,$i),$ai # a[2] - mov %rax,$a1 - - mul $a0 # a[1]*a[0] - mov %rax,$A0[0] # a[1]*a[0] - mov $ai,%rax # a[2] - mov %rdx,$A0[1] - mov $A0[0],-24($tptr,$i) # t[1] - - xor $A0[0],$A0[0] - mul $a0 # a[2]*a[0] - add %rax,$A0[1] - mov $ai,%rax - adc %rdx,$A0[0] - mov $A0[1],-16($tptr,$i) # t[2] - - lea -16($i),$j # j=-16 - - - mov 8($aptr,$j),$ai # a[3] - mul $a1 # a[2]*a[1] - mov %rax,$A1[0] # a[2]*a[1]+t[3] - mov $ai,%rax - mov %rdx,$A1[1] - - xor $A0[1],$A0[1] - add $A1[0],$A0[0] - lea 16($j),$j - adc \$0,$A0[1] - mul $a0 # a[3]*a[0] - add %rax,$A0[0] # a[3]*a[0]+a[2]*a[1]+t[3] - mov $ai,%rax - adc %rdx,$A0[1] - mov $A0[0],-8($tptr,$j) # t[3] - jmp .Lsqr4x_1st + lea -64(%rsp,$num,4),%r11 + mov ($n0),$n0 # *n0 + sub $aptr,%r11 + and \$4095,%r11 + cmp %r11,%r10 + jb .Lsqr8x_sp_alt + sub %r11,%rsp # align with $aptr + lea -64(%rsp,$num,4),%rsp # alloca(frame+4*$num) + jmp .Lsqr8x_sp_done + +.align 32 +.Lsqr8x_sp_alt: + lea 4096-64(,$num,4),%r10 # 4096-frame-4*$num + lea -64(%rsp,$num,4),%rsp # alloca(frame+4*$num) + sub %r10,%r11 + mov \$0,%r10 + cmovc %r10,%r11 + sub %r11,%rsp +.Lsqr8x_sp_done: + and \$-64,%rsp + mov $num,%r10 + neg $num + + lea 64(%rsp,$num,2),%r11 # copy of modulus + mov $n0, 32(%rsp) + mov %rax, 40(%rsp) # save original %rsp +.Lsqr8x_body: + + mov $num,$i + movq %r11, %xmm2 # save pointer to modulus copy + shr \$3+2,$i + mov OPENSSL_ia32cap_P+8(%rip),%eax + jmp .Lsqr8x_copy_n + +.align 32 +.Lsqr8x_copy_n: + movq 8*0($nptr),%xmm0 + movq 8*1($nptr),%xmm1 + movq 8*2($nptr),%xmm3 + movq 8*3($nptr),%xmm4 + lea 8*4($nptr),$nptr + movdqa %xmm0,16*0(%r11) + movdqa %xmm1,16*1(%r11) + movdqa %xmm3,16*2(%r11) + movdqa %xmm4,16*3(%r11) + lea 16*4(%r11),%r11 + dec $i + jnz .Lsqr8x_copy_n -.align 16 -.Lsqr4x_1st: - mov ($aptr,$j),$ai # a[4] - xor $A1[0],$A1[0] - mul $a1 # a[3]*a[1] - add %rax,$A1[1] # a[3]*a[1]+t[4] - mov $ai,%rax - adc %rdx,$A1[0] - - xor $A0[0],$A0[0] - add $A1[1],$A0[1] - adc \$0,$A0[0] - mul $a0 # a[4]*a[0] - add %rax,$A0[1] # a[4]*a[0]+a[3]*a[1]+t[4] - mov $ai,%rax # a[3] - adc %rdx,$A0[0] - mov $A0[1],($tptr,$j) # t[4] - - - mov 8($aptr,$j),$ai # a[5] - xor $A1[1],$A1[1] - mul $a1 # a[4]*a[3] - add %rax,$A1[0] # a[4]*a[3]+t[5] - mov $ai,%rax - adc %rdx,$A1[1] - - xor $A0[1],$A0[1] - add $A1[0],$A0[0] - adc \$0,$A0[1] - mul $a0 # a[5]*a[2] - add %rax,$A0[0] # a[5]*a[2]+a[4]*a[3]+t[5] - mov $ai,%rax - adc %rdx,$A0[1] - mov $A0[0],8($tptr,$j) # t[5] - - mov 16($aptr,$j),$ai # a[6] - xor $A1[0],$A1[0] - mul $a1 # a[5]*a[3] - add %rax,$A1[1] # a[5]*a[3]+t[6] - mov $ai,%rax - adc %rdx,$A1[0] - - xor $A0[0],$A0[0] - add $A1[1],$A0[1] - adc \$0,$A0[0] - mul $a0 # a[6]*a[2] - add %rax,$A0[1] # a[6]*a[2]+a[5]*a[3]+t[6] - mov $ai,%rax # a[3] - adc %rdx,$A0[0] - mov $A0[1],16($tptr,$j) # t[6] - - - mov 24($aptr,$j),$ai # a[7] - xor $A1[1],$A1[1] - mul $a1 # a[6]*a[5] - add %rax,$A1[0] # a[6]*a[5]+t[7] - mov $ai,%rax - adc %rdx,$A1[1] - - xor $A0[1],$A0[1] - add $A1[0],$A0[0] - lea 32($j),$j - adc \$0,$A0[1] - mul $a0 # a[7]*a[4] - add %rax,$A0[0] # a[7]*a[4]+a[6]*a[5]+t[6] - mov $ai,%rax - adc %rdx,$A0[1] - mov $A0[0],-8($tptr,$j) # t[7] - - cmp \$0,$j - jne .Lsqr4x_1st - - xor $A1[0],$A1[0] - add $A0[1],$A1[1] - adc \$0,$A1[0] - mul $a1 # a[7]*a[5] - add %rax,$A1[1] - adc %rdx,$A1[0] - - mov $A1[1],($tptr) # t[8] - lea 16($i),$i - mov $A1[0],8($tptr) # t[9] - jmp .Lsqr4x_outer + pxor %xmm0,%xmm0 + movq $rptr,%xmm1 # save $rptr + movq %r10, %xmm3 # -$num +___ +$code.=<<___ if ($addx); + and \$0x80100,%eax + cmp \$0x80100,%eax + jne .Lsqr8x_nox -.align 16 -.Lsqr4x_outer: # comments apply to $num==6 case - mov -32($aptr,$i),$a0 # a[0] - lea 64(%rsp,$num,2),$tptr # end of tp[] buffer, &tp[2*$num] - mov -24($aptr,$i),%rax # a[1] - lea -32($tptr,$i),$tptr # end of tp[] window, &tp[2*$num-"$i"] - mov -16($aptr,$i),$ai # a[2] - mov %rax,$a1 - - mov -24($tptr,$i),$A0[0] # t[1] - xor $A0[1],$A0[1] - mul $a0 # a[1]*a[0] - add %rax,$A0[0] # a[1]*a[0]+t[1] - mov $ai,%rax # a[2] - adc %rdx,$A0[1] - mov $A0[0],-24($tptr,$i) # t[1] - - xor $A0[0],$A0[0] - add -16($tptr,$i),$A0[1] # a[2]*a[0]+t[2] - adc \$0,$A0[0] - mul $a0 # a[2]*a[0] - add %rax,$A0[1] - mov $ai,%rax - adc %rdx,$A0[0] - mov $A0[1],-16($tptr,$i) # t[2] - - lea -16($i),$j # j=-16 - xor $A1[0],$A1[0] - - - mov 8($aptr,$j),$ai # a[3] - xor $A1[1],$A1[1] - add 8($tptr,$j),$A1[0] - adc \$0,$A1[1] - mul $a1 # a[2]*a[1] - add %rax,$A1[0] # a[2]*a[1]+t[3] - mov $ai,%rax - adc %rdx,$A1[1] - - xor $A0[1],$A0[1] - add $A1[0],$A0[0] - adc \$0,$A0[1] - mul $a0 # a[3]*a[0] - add %rax,$A0[0] # a[3]*a[0]+a[2]*a[1]+t[3] - mov $ai,%rax - adc %rdx,$A0[1] - mov $A0[0],8($tptr,$j) # t[3] - - lea 16($j),$j - jmp .Lsqr4x_inner + call bn_sqrx8x_internal # see x86_64-mont5 module -.align 16 -.Lsqr4x_inner: - mov ($aptr,$j),$ai # a[4] - xor $A1[0],$A1[0] - add ($tptr,$j),$A1[1] - adc \$0,$A1[0] - mul $a1 # a[3]*a[1] - add %rax,$A1[1] # a[3]*a[1]+t[4] - mov $ai,%rax - adc %rdx,$A1[0] - - xor $A0[0],$A0[0] - add $A1[1],$A0[1] - adc \$0,$A0[0] - mul $a0 # a[4]*a[0] - add %rax,$A0[1] # a[4]*a[0]+a[3]*a[1]+t[4] - mov $ai,%rax # a[3] - adc %rdx,$A0[0] - mov $A0[1],($tptr,$j) # t[4] - - mov 8($aptr,$j),$ai # a[5] - xor $A1[1],$A1[1] - add 8($tptr,$j),$A1[0] - adc \$0,$A1[1] - mul $a1 # a[4]*a[3] - add %rax,$A1[0] # a[4]*a[3]+t[5] - mov $ai,%rax - adc %rdx,$A1[1] - - xor $A0[1],$A0[1] - add $A1[0],$A0[0] - lea 16($j),$j # j++ - adc \$0,$A0[1] - mul $a0 # a[5]*a[2] - add %rax,$A0[0] # a[5]*a[2]+a[4]*a[3]+t[5] - mov $ai,%rax - adc %rdx,$A0[1] - mov $A0[0],-8($tptr,$j) # t[5], "preloaded t[1]" below - - cmp \$0,$j - jne .Lsqr4x_inner - - xor $A1[0],$A1[0] - add $A0[1],$A1[1] - adc \$0,$A1[0] - mul $a1 # a[5]*a[3] - add %rax,$A1[1] - adc %rdx,$A1[0] - - mov $A1[1],($tptr) # t[6], "preloaded t[2]" below - mov $A1[0],8($tptr) # t[7], "preloaded t[3]" below - - add \$16,$i - jnz .Lsqr4x_outer - - # comments apply to $num==4 case - mov -32($aptr),$a0 # a[0] - lea 64(%rsp,$num,2),$tptr # end of tp[] buffer, &tp[2*$num] - mov -24($aptr),%rax # a[1] - lea -32($tptr,$i),$tptr # end of tp[] window, &tp[2*$num-"$i"] - mov -16($aptr),$ai # a[2] - mov %rax,$a1 - - xor $A0[1],$A0[1] - mul $a0 # a[1]*a[0] - add %rax,$A0[0] # a[1]*a[0]+t[1], preloaded t[1] - mov $ai,%rax # a[2] - adc %rdx,$A0[1] - mov $A0[0],-24($tptr) # t[1] - - xor $A0[0],$A0[0] - add $A1[1],$A0[1] # a[2]*a[0]+t[2], preloaded t[2] - adc \$0,$A0[0] - mul $a0 # a[2]*a[0] - add %rax,$A0[1] - mov $ai,%rax - adc %rdx,$A0[0] - mov $A0[1],-16($tptr) # t[2] - - mov -8($aptr),$ai # a[3] - mul $a1 # a[2]*a[1] - add %rax,$A1[0] # a[2]*a[1]+t[3], preloaded t[3] - mov $ai,%rax - adc \$0,%rdx - - xor $A0[1],$A0[1] - add $A1[0],$A0[0] - mov %rdx,$A1[1] - adc \$0,$A0[1] - mul $a0 # a[3]*a[0] - add %rax,$A0[0] # a[3]*a[0]+a[2]*a[1]+t[3] - mov $ai,%rax - adc %rdx,$A0[1] - mov $A0[0],-8($tptr) # t[3] - - xor $A1[0],$A1[0] - add $A0[1],$A1[1] - adc \$0,$A1[0] - mul $a1 # a[3]*a[1] - add %rax,$A1[1] - mov -16($aptr),%rax # a[2] - adc %rdx,$A1[0] - - mov $A1[1],($tptr) # t[4] - mov $A1[0],8($tptr) # t[5] - - mul $ai # a[2]*a[3] + pxor %xmm0,%xmm0 + lea 48(%rsp),%rax + lea 64(%rsp,$num,2),%rdx + shr \$3+2,$num + mov 40(%rsp),%rsi # restore %rsp + jmp .Lsqr8x_zero + +.align 32 +.Lsqr8x_nox: ___ -{ -my ($shift,$carry)=($a0,$a1); -my @S=(@A1,$ai,$n0); $code.=<<___; - add \$16,$i - xor $shift,$shift - sub $num,$i # $i=16-$num - xor $carry,$carry - - add $A1[0],%rax # t[5] - adc \$0,%rdx - mov %rax,8($tptr) # t[5] - mov %rdx,16($tptr) # t[6] - mov $carry,24($tptr) # t[7] - - mov -16($aptr,$i),%rax # a[0] - lea 64(%rsp,$num,2),$tptr - xor $A0[0],$A0[0] # t[0] - mov -24($tptr,$i,2),$A0[1] # t[1] - - lea ($shift,$A0[0],2),$S[0] # t[2*i]<<1 | shift - shr \$63,$A0[0] - lea ($j,$A0[1],2),$S[1] # t[2*i+1]<<1 | - shr \$63,$A0[1] - or $A0[0],$S[1] # | t[2*i]>>63 - mov -16($tptr,$i,2),$A0[0] # t[2*i+2] # prefetch - mov $A0[1],$shift # shift=t[2*i+1]>>63 - mul %rax # a[i]*a[i] - neg $carry # mov $carry,cf - mov -8($tptr,$i,2),$A0[1] # t[2*i+2+1] # prefetch - adc %rax,$S[0] - mov -8($aptr,$i),%rax # a[i+1] # prefetch - mov $S[0],-32($tptr,$i,2) - adc %rdx,$S[1] - - lea ($shift,$A0[0],2),$S[2] # t[2*i]<<1 | shift - mov $S[1],-24($tptr,$i,2) - sbb $carry,$carry # mov cf,$carry - shr \$63,$A0[0] - lea ($j,$A0[1],2),$S[3] # t[2*i+1]<<1 | - shr \$63,$A0[1] - or $A0[0],$S[3] # | t[2*i]>>63 - mov 0($tptr,$i,2),$A0[0] # t[2*i+2] # prefetch - mov $A0[1],$shift # shift=t[2*i+1]>>63 - mul %rax # a[i]*a[i] - neg $carry # mov $carry,cf - mov 8($tptr,$i,2),$A0[1] # t[2*i+2+1] # prefetch - adc %rax,$S[2] - mov 0($aptr,$i),%rax # a[i+1] # prefetch - mov $S[2],-16($tptr,$i,2) - adc %rdx,$S[3] - lea 16($i),$i - mov $S[3],-40($tptr,$i,2) - sbb $carry,$carry # mov cf,$carry - jmp .Lsqr4x_shift_n_add + call bn_sqr8x_internal # see x86_64-mont5 module -.align 16 -.Lsqr4x_shift_n_add: - lea ($shift,$A0[0],2),$S[0] # t[2*i]<<1 | shift - shr \$63,$A0[0] - lea ($j,$A0[1],2),$S[1] # t[2*i+1]<<1 | - shr \$63,$A0[1] - or $A0[0],$S[1] # | t[2*i]>>63 - mov -16($tptr,$i,2),$A0[0] # t[2*i+2] # prefetch - mov $A0[1],$shift # shift=t[2*i+1]>>63 - mul %rax # a[i]*a[i] - neg $carry # mov $carry,cf - mov -8($tptr,$i,2),$A0[1] # t[2*i+2+1] # prefetch - adc %rax,$S[0] - mov -8($aptr,$i),%rax # a[i+1] # prefetch - mov $S[0],-32($tptr,$i,2) - adc %rdx,$S[1] - - lea ($shift,$A0[0],2),$S[2] # t[2*i]<<1 | shift - mov $S[1],-24($tptr,$i,2) - sbb $carry,$carry # mov cf,$carry - shr \$63,$A0[0] - lea ($j,$A0[1],2),$S[3] # t[2*i+1]<<1 | - shr \$63,$A0[1] - or $A0[0],$S[3] # | t[2*i]>>63 - mov 0($tptr,$i,2),$A0[0] # t[2*i+2] # prefetch - mov $A0[1],$shift # shift=t[2*i+1]>>63 - mul %rax # a[i]*a[i] - neg $carry # mov $carry,cf - mov 8($tptr,$i,2),$A0[1] # t[2*i+2+1] # prefetch - adc %rax,$S[2] - mov 0($aptr,$i),%rax # a[i+1] # prefetch - mov $S[2],-16($tptr,$i,2) - adc %rdx,$S[3] - - lea ($shift,$A0[0],2),$S[0] # t[2*i]<<1 | shift - mov $S[3],-8($tptr,$i,2) - sbb $carry,$carry # mov cf,$carry - shr \$63,$A0[0] - lea ($j,$A0[1],2),$S[1] # t[2*i+1]<<1 | - shr \$63,$A0[1] - or $A0[0],$S[1] # | t[2*i]>>63 - mov 16($tptr,$i,2),$A0[0] # t[2*i+2] # prefetch - mov $A0[1],$shift # shift=t[2*i+1]>>63 - mul %rax # a[i]*a[i] - neg $carry # mov $carry,cf - mov 24($tptr,$i,2),$A0[1] # t[2*i+2+1] # prefetch - adc %rax,$S[0] - mov 8($aptr,$i),%rax # a[i+1] # prefetch - mov $S[0],0($tptr,$i,2) - adc %rdx,$S[1] - - lea ($shift,$A0[0],2),$S[2] # t[2*i]<<1 | shift - mov $S[1],8($tptr,$i,2) - sbb $carry,$carry # mov cf,$carry - shr \$63,$A0[0] - lea ($j,$A0[1],2),$S[3] # t[2*i+1]<<1 | - shr \$63,$A0[1] - or $A0[0],$S[3] # | t[2*i]>>63 - mov 32($tptr,$i,2),$A0[0] # t[2*i+2] # prefetch - mov $A0[1],$shift # shift=t[2*i+1]>>63 - mul %rax # a[i]*a[i] - neg $carry # mov $carry,cf - mov 40($tptr,$i,2),$A0[1] # t[2*i+2+1] # prefetch - adc %rax,$S[2] - mov 16($aptr,$i),%rax # a[i+1] # prefetch - mov $S[2],16($tptr,$i,2) - adc %rdx,$S[3] - mov $S[3],24($tptr,$i,2) - sbb $carry,$carry # mov cf,$carry - add \$32,$i - jnz .Lsqr4x_shift_n_add - - lea ($shift,$A0[0],2),$S[0] # t[2*i]<<1 | shift - shr \$63,$A0[0] - lea ($j,$A0[1],2),$S[1] # t[2*i+1]<<1 | - shr \$63,$A0[1] - or $A0[0],$S[1] # | t[2*i]>>63 - mov -16($tptr),$A0[0] # t[2*i+2] # prefetch - mov $A0[1],$shift # shift=t[2*i+1]>>63 - mul %rax # a[i]*a[i] - neg $carry # mov $carry,cf - mov -8($tptr),$A0[1] # t[2*i+2+1] # prefetch - adc %rax,$S[0] - mov -8($aptr),%rax # a[i+1] # prefetch - mov $S[0],-32($tptr) - adc %rdx,$S[1] - - lea ($shift,$A0[0],2),$S[2] # t[2*i]<<1|shift - mov $S[1],-24($tptr) - sbb $carry,$carry # mov cf,$carry - shr \$63,$A0[0] - lea ($j,$A0[1],2),$S[3] # t[2*i+1]<<1 | - shr \$63,$A0[1] - or $A0[0],$S[3] # | t[2*i]>>63 - mul %rax # a[i]*a[i] - neg $carry # mov $carry,cf - adc %rax,$S[2] - adc %rdx,$S[3] - mov $S[2],-16($tptr) - mov $S[3],-8($tptr) -___ -} -############################################################## -# Montgomery reduction part, "word-by-word" algorithm. -# -{ -my ($topbit,$nptr)=("%rbp",$aptr); -my ($m0,$m1)=($a0,$a1); -my @Ni=("%rbx","%r9"); -$code.=<<___; - mov 40(%rsp),$nptr # restore $nptr - mov 48(%rsp),$n0 # restore *n0 - xor $j,$j - mov $num,0(%rsp) # save $num - sub $num,$j # $j=-$num - mov 64(%rsp),$A0[0] # t[0] # modsched # - mov $n0,$m0 # # modsched # - lea 64(%rsp,$num,2),%rax # end of t[] buffer - lea 64(%rsp,$num),$tptr # end of t[] window - mov %rax,8(%rsp) # save end of t[] buffer - lea ($nptr,$num),$nptr # end of n[] buffer - xor $topbit,$topbit # $topbit=0 - - mov 0($nptr,$j),%rax # n[0] # modsched # - mov 8($nptr,$j),$Ni[1] # n[1] # modsched # - imulq $A0[0],$m0 # m0=t[0]*n0 # modsched # - mov %rax,$Ni[0] # # modsched # - jmp .Lsqr4x_mont_outer + pxor %xmm0,%xmm0 + lea 48(%rsp),%rax + lea 64(%rsp,$num,2),%rdx + shr \$3+2,$num + mov 40(%rsp),%rsi # restore %rsp + jmp .Lsqr8x_zero + +.align 32 +.Lsqr8x_zero: + movdqa %xmm0,16*0(%rax) # wipe t + movdqa %xmm0,16*1(%rax) + movdqa %xmm0,16*2(%rax) + movdqa %xmm0,16*3(%rax) + lea 16*4(%rax),%rax + movdqa %xmm0,16*0(%rdx) # wipe n + movdqa %xmm0,16*1(%rdx) + movdqa %xmm0,16*2(%rdx) + movdqa %xmm0,16*3(%rdx) + lea 16*4(%rdx),%rdx + dec $num + jnz .Lsqr8x_zero -.align 16 -.Lsqr4x_mont_outer: - xor $A0[1],$A0[1] - mul $m0 # n[0]*m0 - add %rax,$A0[0] # n[0]*m0+t[0] - mov $Ni[1],%rax - adc %rdx,$A0[1] - mov $n0,$m1 + mov \$1,%rax + mov -48(%rsi),%r15 + mov -40(%rsi),%r14 + mov -32(%rsi),%r13 + mov -24(%rsi),%r12 + mov -16(%rsi),%rbp + mov -8(%rsi),%rbx + lea (%rsi),%rsp +.Lsqr8x_epilogue: + ret +.size bn_sqr8x_mont,.-bn_sqr8x_mont +___ +}}} + +if ($addx) {{{ +my $bp="%rdx"; # original value - xor $A0[0],$A0[0] - add 8($tptr,$j),$A0[1] - adc \$0,$A0[0] - mul $m0 # n[1]*m0 - add %rax,$A0[1] # n[1]*m0+t[1] - mov $Ni[0],%rax - adc %rdx,$A0[0] - - imulq $A0[1],$m1 - - mov 16($nptr,$j),$Ni[0] # n[2] - xor $A1[1],$A1[1] - add $A0[1],$A1[0] - adc \$0,$A1[1] - mul $m1 # n[0]*m1 - add %rax,$A1[0] # n[0]*m1+"t[1]" - mov $Ni[0],%rax - adc %rdx,$A1[1] - mov $A1[0],8($tptr,$j) # "t[1]" - - xor $A0[1],$A0[1] - add 16($tptr,$j),$A0[0] - adc \$0,$A0[1] - mul $m0 # n[2]*m0 - add %rax,$A0[0] # n[2]*m0+t[2] - mov $Ni[1],%rax - adc %rdx,$A0[1] - - mov 24($nptr,$j),$Ni[1] # n[3] - xor $A1[0],$A1[0] - add $A0[0],$A1[1] - adc \$0,$A1[0] - mul $m1 # n[1]*m1 - add %rax,$A1[1] # n[1]*m1+"t[2]" - mov $Ni[1],%rax - adc %rdx,$A1[0] - mov $A1[1],16($tptr,$j) # "t[2]" - - xor $A0[0],$A0[0] - add 24($tptr,$j),$A0[1] - lea 32($j),$j - adc \$0,$A0[0] - mul $m0 # n[3]*m0 - add %rax,$A0[1] # n[3]*m0+t[3] - mov $Ni[0],%rax - adc %rdx,$A0[0] - jmp .Lsqr4x_mont_inner +$code.=<<___; +.type bn_mulx4x_mont,\@function,6 +.align 32 +bn_mulx4x_mont: +.Lmulx4x_enter: + mov %rsp,%rax + push %rbx + push %rbp + push %r12 + push %r13 + push %r14 + push %r15 -.align 16 -.Lsqr4x_mont_inner: - mov ($nptr,$j),$Ni[0] # n[4] - xor $A1[1],$A1[1] - add $A0[1],$A1[0] - adc \$0,$A1[1] - mul $m1 # n[2]*m1 - add %rax,$A1[0] # n[2]*m1+"t[3]" - mov $Ni[0],%rax - adc %rdx,$A1[1] - mov $A1[0],-8($tptr,$j) # "t[3]" - - xor $A0[1],$A0[1] - add ($tptr,$j),$A0[0] - adc \$0,$A0[1] - mul $m0 # n[4]*m0 - add %rax,$A0[0] # n[4]*m0+t[4] - mov $Ni[1],%rax - adc %rdx,$A0[1] - - mov 8($nptr,$j),$Ni[1] # n[5] - xor $A1[0],$A1[0] - add $A0[0],$A1[1] - adc \$0,$A1[0] - mul $m1 # n[3]*m1 - add %rax,$A1[1] # n[3]*m1+"t[4]" - mov $Ni[1],%rax - adc %rdx,$A1[0] - mov $A1[1],($tptr,$j) # "t[4]" - - xor $A0[0],$A0[0] - add 8($tptr,$j),$A0[1] - adc \$0,$A0[0] - mul $m0 # n[5]*m0 - add %rax,$A0[1] # n[5]*m0+t[5] - mov $Ni[0],%rax - adc %rdx,$A0[0] - - - mov 16($nptr,$j),$Ni[0] # n[6] - xor $A1[1],$A1[1] - add $A0[1],$A1[0] - adc \$0,$A1[1] - mul $m1 # n[4]*m1 - add %rax,$A1[0] # n[4]*m1+"t[5]" - mov $Ni[0],%rax - adc %rdx,$A1[1] - mov $A1[0],8($tptr,$j) # "t[5]" - - xor $A0[1],$A0[1] - add 16($tptr,$j),$A0[0] - adc \$0,$A0[1] - mul $m0 # n[6]*m0 - add %rax,$A0[0] # n[6]*m0+t[6] - mov $Ni[1],%rax - adc %rdx,$A0[1] - - mov 24($nptr,$j),$Ni[1] # n[7] - xor $A1[0],$A1[0] - add $A0[0],$A1[1] - adc \$0,$A1[0] - mul $m1 # n[5]*m1 - add %rax,$A1[1] # n[5]*m1+"t[6]" - mov $Ni[1],%rax - adc %rdx,$A1[0] - mov $A1[1],16($tptr,$j) # "t[6]" - - xor $A0[0],$A0[0] - add 24($tptr,$j),$A0[1] - lea 32($j),$j - adc \$0,$A0[0] - mul $m0 # n[7]*m0 - add %rax,$A0[1] # n[7]*m0+t[7] - mov $Ni[0],%rax - adc %rdx,$A0[0] - cmp \$0,$j - jne .Lsqr4x_mont_inner - - sub 0(%rsp),$j # $j=-$num # modsched # - mov $n0,$m0 # # modsched # - - xor $A1[1],$A1[1] - add $A0[1],$A1[0] - adc \$0,$A1[1] - mul $m1 # n[6]*m1 - add %rax,$A1[0] # n[6]*m1+"t[7]" - mov $Ni[1],%rax - adc %rdx,$A1[1] - mov $A1[0],-8($tptr) # "t[7]" - - xor $A0[1],$A0[1] - add ($tptr),$A0[0] # +t[8] - adc \$0,$A0[1] - mov 0($nptr,$j),$Ni[0] # n[0] # modsched # - add $topbit,$A0[0] - adc \$0,$A0[1] - - imulq 16($tptr,$j),$m0 # m0=t[0]*n0 # modsched # - xor $A1[0],$A1[0] - mov 8($nptr,$j),$Ni[1] # n[1] # modsched # - add $A0[0],$A1[1] - mov 16($tptr,$j),$A0[0] # t[0] # modsched # - adc \$0,$A1[0] - mul $m1 # n[7]*m1 - add %rax,$A1[1] # n[7]*m1+"t[8]" - mov $Ni[0],%rax # # modsched # - adc %rdx,$A1[0] - mov $A1[1],($tptr) # "t[8]" - - xor $topbit,$topbit - add 8($tptr),$A1[0] # +t[9] - adc $topbit,$topbit - add $A0[1],$A1[0] - lea 16($tptr),$tptr # "t[$num]>>128" - adc \$0,$topbit - mov $A1[0],-8($tptr) # "t[9]" - cmp 8(%rsp),$tptr # are we done? - jb .Lsqr4x_mont_outer - - mov 0(%rsp),$num # restore $num - mov $topbit,($tptr) # save $topbit + shl \$3,${num}d # convert $num to bytes + .byte 0x67 + xor %r10,%r10 + sub $num,%r10 # -$num + mov ($n0),$n0 # *n0 + lea -72(%rsp,%r10),%rsp # alloca(frame+$num+8) + lea ($bp,$num),%r10 + and \$-128,%rsp + ############################################################## + # Stack layout + # +0 num + # +8 off-loaded &b[i] + # +16 end of b[num] + # +24 saved n0 + # +32 saved rp + # +40 saved %rsp + # +48 inner counter + # +56 + # +64 tmp[num+1] + # + mov $num,0(%rsp) # save $num + shr \$5,$num + mov %r10,16(%rsp) # end of b[num] + sub \$1,$num + mov $n0, 24(%rsp) # save *n0 + mov $rp, 32(%rsp) # save $rp + mov %rax,40(%rsp) # save original %rsp + mov $num,48(%rsp) # inner counter + jmp .Lmulx4x_body + +.align 32 +.Lmulx4x_body: ___ -} -############################################################## -# Post-condition, 4x unrolled copy from bn_mul_mont -# -{ -my ($tptr,$nptr)=("%rbx",$aptr); -my @ri=("%rax","%rdx","%r10","%r11"); +my ($aptr, $bptr, $nptr, $tptr, $mi, $bi, $zero, $num)= + ("%rsi","%rdi","%rcx","%rbx","%r8","%r9","%rbp","%rax"); +my $rptr=$bptr; $code.=<<___; - mov 64(%rsp,$num),@ri[0] # tp[0] - lea 64(%rsp,$num),$tptr # upper half of t[2*$num] holds result - mov 40(%rsp),$nptr # restore $nptr - shr \$5,$num # num/4 - mov 8($tptr),@ri[1] # t[1] - xor $i,$i # i=0 and clear CF! - - mov 32(%rsp),$rptr # restore $rptr - sub 0($nptr),@ri[0] - mov 16($tptr),@ri[2] # t[2] - mov 24($tptr),@ri[3] # t[3] - sbb 8($nptr),@ri[1] - lea -1($num),$j # j=num/4-1 - jmp .Lsqr4x_sub -.align 16 -.Lsqr4x_sub: - mov @ri[0],0($rptr,$i,8) # rp[i]=tp[i]-np[i] - mov @ri[1],8($rptr,$i,8) # rp[i]=tp[i]-np[i] - sbb 16($nptr,$i,8),@ri[2] - mov 32($tptr,$i,8),@ri[0] # tp[i+1] - mov 40($tptr,$i,8),@ri[1] - sbb 24($nptr,$i,8),@ri[3] - mov @ri[2],16($rptr,$i,8) # rp[i]=tp[i]-np[i] - mov @ri[3],24($rptr,$i,8) # rp[i]=tp[i]-np[i] - sbb 32($nptr,$i,8),@ri[0] - mov 48($tptr,$i,8),@ri[2] - mov 56($tptr,$i,8),@ri[3] - sbb 40($nptr,$i,8),@ri[1] - lea 4($i),$i # i++ - dec $j # doesn't affect CF! - jnz .Lsqr4x_sub - - mov @ri[0],0($rptr,$i,8) # rp[i]=tp[i]-np[i] - mov 32($tptr,$i,8),@ri[0] # load overflow bit - sbb 16($nptr,$i,8),@ri[2] - mov @ri[1],8($rptr,$i,8) # rp[i]=tp[i]-np[i] - sbb 24($nptr,$i,8),@ri[3] - mov @ri[2],16($rptr,$i,8) # rp[i]=tp[i]-np[i] - - sbb \$0,@ri[0] # handle upmost overflow bit - mov @ri[3],24($rptr,$i,8) # rp[i]=tp[i]-np[i] - xor $i,$i # i=0 - and @ri[0],$tptr - not @ri[0] - mov $rptr,$nptr - and @ri[0],$nptr - lea -1($num),$j - or $nptr,$tptr # tp=borrow?tp:rp + lea 8($bp),$bptr + mov ($bp),%rdx # b[0], $bp==%rdx actually + lea 64+32(%rsp),$tptr + mov %rdx,$bi + + mulx 0*8($aptr),$mi,%rax # a[0]*b[0] + mulx 1*8($aptr),%r11,%r14 # a[1]*b[0] + add %rax,%r11 + mov $bptr,8(%rsp) # off-load &b[i] + mulx 2*8($aptr),%r12,%r13 # ... + adc %r14,%r12 + adc \$0,%r13 + + mov $mi,$bptr # borrow $bptr + imulq 24(%rsp),$mi # "t[0]"*n0 + xor $zero,$zero # cf=0, of=0 + + mulx 3*8($aptr),%rax,%r14 + mov $mi,%rdx + lea 4*8($aptr),$aptr + adcx %rax,%r13 + adcx $zero,%r14 # cf=0 + + mulx 0*8($nptr),%rax,%r10 + adcx %rax,$bptr # discarded + adox %r11,%r10 + mulx 1*8($nptr),%rax,%r11 + adcx %rax,%r10 + adox %r12,%r11 + .byte 0xc4,0x62,0xfb,0xf6,0xa1,0x10,0x00,0x00,0x00 # mulx 2*8($nptr),%rax,%r12 + mov 48(%rsp),$bptr # counter value + mov %r10,-4*8($tptr) + adcx %rax,%r11 + adox %r13,%r12 + mulx 3*8($nptr),%rax,%r15 + mov $bi,%rdx + mov %r11,-3*8($tptr) + adcx %rax,%r12 + adox $zero,%r15 # of=0 + lea 4*8($nptr),$nptr + mov %r12,-2*8($tptr) + + jmp .Lmulx4x_1st + +.align 32 +.Lmulx4x_1st: + adcx $zero,%r15 # cf=0, modulo-scheduled + mulx 0*8($aptr),%r10,%rax # a[4]*b[0] + adcx %r14,%r10 + mulx 1*8($aptr),%r11,%r14 # a[5]*b[0] + adcx %rax,%r11 + mulx 2*8($aptr),%r12,%rax # ... + adcx %r14,%r12 + mulx 3*8($aptr),%r13,%r14 + .byte 0x67,0x67 + mov $mi,%rdx + adcx %rax,%r13 + adcx $zero,%r14 # cf=0 + lea 4*8($aptr),$aptr + lea 4*8($tptr),$tptr + + adox %r15,%r10 + mulx 0*8($nptr),%rax,%r15 + adcx %rax,%r10 + adox %r15,%r11 + mulx 1*8($nptr),%rax,%r15 + adcx %rax,%r11 + adox %r15,%r12 + mulx 2*8($nptr),%rax,%r15 + mov %r10,-5*8($tptr) + adcx %rax,%r12 + mov %r11,-4*8($tptr) + adox %r15,%r13 + mulx 3*8($nptr),%rax,%r15 + mov $bi,%rdx + mov %r12,-3*8($tptr) + adcx %rax,%r13 + adox $zero,%r15 + lea 4*8($nptr),$nptr + mov %r13,-2*8($tptr) + + dec $bptr # of=0, pass cf + jnz .Lmulx4x_1st + + mov 0(%rsp),$num # load num + mov 8(%rsp),$bptr # re-load &b[i] + adc $zero,%r15 # modulo-scheduled + add %r15,%r14 + sbb %r15,%r15 # top-most carry + mov %r14,-1*8($tptr) + jmp .Lmulx4x_outer + +.align 32 +.Lmulx4x_outer: + mov ($bptr),%rdx # b[i] + lea 8($bptr),$bptr # b++ + sub $num,$aptr # rewind $aptr + mov %r15,($tptr) # save top-most carry + lea 64+4*8(%rsp),$tptr + sub $num,$nptr # rewind $nptr + + mulx 0*8($aptr),$mi,%r11 # a[0]*b[i] + xor %ebp,%ebp # xor $zero,$zero # cf=0, of=0 + mov %rdx,$bi + mulx 1*8($aptr),%r14,%r12 # a[1]*b[i] + adox -4*8($tptr),$mi + adcx %r14,%r11 + mulx 2*8($aptr),%r15,%r13 # ... + adox -3*8($tptr),%r11 + adcx %r15,%r12 + adox $zero,%r12 + adcx $zero,%r13 + + mov $bptr,8(%rsp) # off-load &b[i] + .byte 0x67 + mov $mi,%r15 + imulq 24(%rsp),$mi # "t[0]"*n0 + xor %ebp,%ebp # xor $zero,$zero # cf=0, of=0 + + mulx 3*8($aptr),%rax,%r14 + mov $mi,%rdx + adox -2*8($tptr),%r12 + adcx %rax,%r13 + adox -1*8($tptr),%r13 + adcx $zero,%r14 + lea 4*8($aptr),$aptr + adox $zero,%r14 + + mulx 0*8($nptr),%rax,%r10 + adcx %rax,%r15 # discarded + adox %r11,%r10 + mulx 1*8($nptr),%rax,%r11 + adcx %rax,%r10 + adox %r12,%r11 + mulx 2*8($nptr),%rax,%r12 + mov %r10,-4*8($tptr) + adcx %rax,%r11 + adox %r13,%r12 + mulx 3*8($nptr),%rax,%r15 + mov $bi,%rdx + mov %r11,-3*8($tptr) + lea 4*8($nptr),$nptr + adcx %rax,%r12 + adox $zero,%r15 # of=0 + mov 48(%rsp),$bptr # counter value + mov %r12,-2*8($tptr) + + jmp .Lmulx4x_inner + +.align 32 +.Lmulx4x_inner: + mulx 0*8($aptr),%r10,%rax # a[4]*b[i] + adcx $zero,%r15 # cf=0, modulo-scheduled + adox %r14,%r10 + mulx 1*8($aptr),%r11,%r14 # a[5]*b[i] + adcx 0*8($tptr),%r10 + adox %rax,%r11 + mulx 2*8($aptr),%r12,%rax # ... + adcx 1*8($tptr),%r11 + adox %r14,%r12 + mulx 3*8($aptr),%r13,%r14 + mov $mi,%rdx + adcx 2*8($tptr),%r12 + adox %rax,%r13 + adcx 3*8($tptr),%r13 + adox $zero,%r14 # of=0 + lea 4*8($aptr),$aptr + lea 4*8($tptr),$tptr + adcx $zero,%r14 # cf=0 + + adox %r15,%r10 + mulx 0*8($nptr),%rax,%r15 + adcx %rax,%r10 + adox %r15,%r11 + mulx 1*8($nptr),%rax,%r15 + adcx %rax,%r11 + adox %r15,%r12 + mulx 2*8($nptr),%rax,%r15 + mov %r10,-5*8($tptr) + adcx %rax,%r12 + adox %r15,%r13 + mulx 3*8($nptr),%rax,%r15 + mov $bi,%rdx + mov %r11,-4*8($tptr) + mov %r12,-3*8($tptr) + adcx %rax,%r13 + adox $zero,%r15 + lea 4*8($nptr),$nptr + mov %r13,-2*8($tptr) + + dec $bptr # of=0, pass cf + jnz .Lmulx4x_inner + + mov 0(%rsp),$num # load num + mov 8(%rsp),$bptr # re-load &b[i] + adc $zero,%r15 # modulo-scheduled + sub 0*8($tptr),$zero # pull top-most carry + adc %r15,%r14 + mov -8($nptr),$mi + sbb %r15,%r15 # top-most carry + mov %r14,-1*8($tptr) + + cmp 16(%rsp),$bptr + jne .Lmulx4x_outer + + sub %r14,$mi # compare top-most words + sbb $mi,$mi + or $mi,%r15 + + neg $num + xor %rdx,%rdx + mov 32(%rsp),$rptr # restore rp + lea 64(%rsp),$tptr pxor %xmm0,%xmm0 - lea 64(%rsp,$num,8),$nptr - movdqu ($tptr),%xmm1 - lea ($nptr,$num,8),$nptr - movdqa %xmm0,64(%rsp) # zap lower half of temporary vector - movdqa %xmm0,($nptr) # zap upper half of temporary vector - movdqu %xmm1,($rptr) - jmp .Lsqr4x_copy -.align 16 -.Lsqr4x_copy: # copy or in-place refresh - movdqu 16($tptr,$i),%xmm2 - movdqu 32($tptr,$i),%xmm1 - movdqa %xmm0,80(%rsp,$i) # zap lower half of temporary vector - movdqa %xmm0,96(%rsp,$i) # zap lower half of temporary vector - movdqa %xmm0,16($nptr,$i) # zap upper half of temporary vector - movdqa %xmm0,32($nptr,$i) # zap upper half of temporary vector - movdqu %xmm2,16($rptr,$i) - movdqu %xmm1,32($rptr,$i) - lea 32($i),$i - dec $j - jnz .Lsqr4x_copy - - movdqu 16($tptr,$i),%xmm2 - movdqa %xmm0,80(%rsp,$i) # zap lower half of temporary vector - movdqa %xmm0,16($nptr,$i) # zap upper half of temporary vector - movdqu %xmm2,16($rptr,$i) -___ -} -$code.=<<___; - mov 56(%rsp),%rsi # restore %rsp + mov 0*8($nptr,$num),%r8 + mov 1*8($nptr,$num),%r9 + neg %r8 + jmp .Lmulx4x_sub_entry + +.align 32 +.Lmulx4x_sub: + mov 0*8($nptr,$num),%r8 + mov 1*8($nptr,$num),%r9 + not %r8 +.Lmulx4x_sub_entry: + mov 2*8($nptr,$num),%r10 + not %r9 + and %r15,%r8 + mov 3*8($nptr,$num),%r11 + not %r10 + and %r15,%r9 + not %r11 + and %r15,%r10 + and %r15,%r11 + + neg %rdx # mov %rdx,%cf + adc 0*8($tptr),%r8 + adc 1*8($tptr),%r9 + movdqa %xmm0,($tptr) + adc 2*8($tptr),%r10 + adc 3*8($tptr),%r11 + movdqa %xmm0,16($tptr) + lea 4*8($tptr),$tptr + sbb %rdx,%rdx # mov %cf,%rdx + + mov %r8,0*8($rptr) + mov %r9,1*8($rptr) + mov %r10,2*8($rptr) + mov %r11,3*8($rptr) + lea 4*8($rptr),$rptr + + add \$32,$num + jnz .Lmulx4x_sub + + mov 40(%rsp),%rsi # restore %rsp mov \$1,%rax - mov 0(%rsi),%r15 - mov 8(%rsi),%r14 - mov 16(%rsi),%r13 - mov 24(%rsi),%r12 - mov 32(%rsi),%rbp - mov 40(%rsi),%rbx - lea 48(%rsi),%rsp -.Lsqr4x_epilogue: + mov -48(%rsi),%r15 + mov -40(%rsi),%r14 + mov -32(%rsi),%r13 + mov -24(%rsi),%r12 + mov -16(%rsi),%rbp + mov -8(%rsi),%rbx + lea (%rsi),%rsp +.Lmulx4x_epilogue: ret -.size bn_sqr4x_mont,.-bn_sqr4x_mont +.size bn_mulx4x_mont,.-bn_mulx4x_mont ___ }}} $code.=<<___; @@ -1581,18 +1285,22 @@ mov 120($context),%rax # pull context->Rax mov 248($context),%rbx # pull context->Rip - lea .Lsqr4x_body(%rip),%r10 + mov 8($disp),%rsi # disp->ImageBase + mov 56($disp),%r11 # disp->HandlerData + + mov 0(%r11),%r10d # HandlerData[0] + lea (%rsi,%r10),%r10 # end of prologue label cmp %r10,%rbx # context->Rip<.Lsqr_body jb .Lcommon_seh_tail mov 152($context),%rax # pull context->Rsp - lea .Lsqr4x_epilogue(%rip),%r10 + mov 4(%r11),%r10d # HandlerData[1] + lea (%rsi,%r10),%r10 # epilogue label cmp %r10,%rbx # context->Rip>=.Lsqr_epilogue jae .Lcommon_seh_tail - mov 56(%rax),%rax # pull saved stack pointer - lea 48(%rax),%rax + mov 40(%rax),%rax # pull saved stack pointer mov -8(%rax),%rbx mov -16(%rax),%rbp @@ -1657,10 +1365,16 @@ .rva .LSEH_end_bn_mul4x_mont .rva .LSEH_info_bn_mul4x_mont - .rva .LSEH_begin_bn_sqr4x_mont - .rva .LSEH_end_bn_sqr4x_mont - .rva .LSEH_info_bn_sqr4x_mont - + .rva .LSEH_begin_bn_sqr8x_mont + .rva .LSEH_end_bn_sqr8x_mont + .rva .LSEH_info_bn_sqr8x_mont +___ +$code.=<<___ if ($addx); + .rva .LSEH_begin_bn_mulx4x_mont + .rva .LSEH_end_bn_mulx4x_mont + .rva .LSEH_info_bn_mulx4x_mont +___ +$code.=<<___; .section .xdata .align 8 .LSEH_info_bn_mul_mont: @@ -1671,9 +1385,16 @@ .byte 9,0,0,0 .rva mul_handler .rva .Lmul4x_body,.Lmul4x_epilogue # HandlerData[] -.LSEH_info_bn_sqr4x_mont: +.LSEH_info_bn_sqr8x_mont: + .byte 9,0,0,0 + .rva sqr_handler + .rva .Lsqr8x_body,.Lsqr8x_epilogue # HandlerData[] +___ +$code.=<<___ if ($addx); +.LSEH_info_bn_mulx4x_mont: .byte 9,0,0,0 .rva sqr_handler + .rva .Lmulx4x_body,.Lmulx4x_epilogue # HandlerData[] ___ } diff --git a/deps/openssl/openssl/crypto/bn/asm/x86_64-mont5.pl b/deps/openssl/openssl/crypto/bn/asm/x86_64-mont5.pl index dae0fe24531802..fa22c30b1fdd60 100755 --- a/deps/openssl/openssl/crypto/bn/asm/x86_64-mont5.pl +++ b/deps/openssl/openssl/crypto/bn/asm/x86_64-mont5.pl @@ -17,6 +17,13 @@ # is implemented, so that scatter-/gathering can be tuned without # bn_exp.c modifications. +# August 2013. +# +# Add MULX/AD*X code paths and additional interfaces to optimize for +# branch prediction unit. For input lengths that are multiples of 8 +# the np argument is not just modulus value, but one interleaved +# with 0. This is to optimize post-condition... + $flavour = shift; $output = shift; if ($flavour =~ /\./) { $output = $flavour; undef $flavour; } @@ -31,6 +38,21 @@ open OUT,"| \"$^X\" $xlate $flavour $output"; *STDOUT=*OUT; +if (`$ENV{CC} -Wa,-v -c -o /dev/null -x assembler /dev/null 2>&1` + =~ /GNU assembler version ([2-9]\.[0-9]+)/) { + $addx = ($1>=2.23); +} + +if (!$addx && $win64 && ($flavour =~ /nasm/ || $ENV{ASM} =~ /nasm/) && + `nasm -v 2>&1` =~ /NASM version ([2-9]\.[0-9]+)/) { + $addx = ($1>=2.10); +} + +if (!$addx && $win64 && ($flavour =~ /masm/ || $ENV{ASM} =~ /ml64/) && + `ml64 2>&1` =~ /Version ([0-9]+)\./) { + $addx = ($1>=12); +} + # int bn_mul_mont_gather5( $rp="%rdi"; # BN_ULONG *rp, $ap="%rsi"; # const BN_ULONG *ap, @@ -53,19 +75,25 @@ $code=<<___; .text +.extern OPENSSL_ia32cap_P + .globl bn_mul_mont_gather5 .type bn_mul_mont_gather5,\@function,6 .align 64 bn_mul_mont_gather5: - test \$3,${num}d + test \$7,${num}d jnz .Lmul_enter - cmp \$8,${num}d - jb .Lmul_enter +___ +$code.=<<___ if ($addx); + mov OPENSSL_ia32cap_P+8(%rip),%r11d +___ +$code.=<<___; jmp .Lmul4x_enter .align 16 .Lmul_enter: mov ${num}d,${num}d + mov %rsp,%rax mov `($win64?56:8)`(%rsp),%r10d # load 7th argument push %rbx push %rbp @@ -78,10 +106,8 @@ lea -0x28(%rsp),%rsp movaps %xmm6,(%rsp) movaps %xmm7,0x10(%rsp) -.Lmul_alloca: ___ $code.=<<___; - mov %rsp,%rax lea 2($num),%r11 neg %r11 lea (%rsp,%r11,8),%rsp # tp=alloca(8*(num+2)) @@ -287,7 +313,7 @@ lea 1($i),$i # i++ cmp $num,$i - jl .Louter + jb .Louter xor $i,$i # i=0 and clear CF! mov (%rsp),%rax # tp[0] @@ -323,18 +349,17 @@ mov \$1,%rax ___ $code.=<<___ if ($win64); - movaps (%rsi),%xmm6 - movaps 0x10(%rsi),%xmm7 - lea 0x28(%rsi),%rsi + movaps -88(%rsi),%xmm6 + movaps -72(%rsi),%xmm7 ___ $code.=<<___; - mov (%rsi),%r15 - mov 8(%rsi),%r14 - mov 16(%rsi),%r13 - mov 24(%rsi),%r12 - mov 32(%rsi),%rbp - mov 40(%rsi),%rbx - lea 48(%rsi),%rsp + mov -48(%rsi),%r15 + mov -40(%rsi),%r14 + mov -32(%rsi),%r13 + mov -24(%rsi),%r12 + mov -16(%rsi),%rbp + mov -8(%rsi),%rbx + lea (%rsi),%rsp .Lmul_epilogue: ret .size bn_mul_mont_gather5,.-bn_mul_mont_gather5 @@ -344,11 +369,18 @@ my @N=("%r13","%rdi"); $code.=<<___; .type bn_mul4x_mont_gather5,\@function,6 -.align 16 +.align 32 bn_mul4x_mont_gather5: .Lmul4x_enter: - mov ${num}d,${num}d - mov `($win64?56:8)`(%rsp),%r10d # load 7th argument +___ +$code.=<<___ if ($addx); + and \$0x80100,%r11d + cmp \$0x80100,%r11d + je .Lmulx4x_enter +___ +$code.=<<___; + .byte 0x67 + mov %rsp,%rax push %rbx push %rbp push %r12 @@ -360,23 +392,78 @@ lea -0x28(%rsp),%rsp movaps %xmm6,(%rsp) movaps %xmm7,0x10(%rsp) -.Lmul4x_alloca: ___ $code.=<<___; - mov %rsp,%rax - lea 4($num),%r11 - neg %r11 - lea (%rsp,%r11,8),%rsp # tp=alloca(8*(num+4)) - and \$-1024,%rsp # minimize TLB usage - - mov %rax,8(%rsp,$num,8) # tp[num+1]=%rsp + .byte 0x67 + mov ${num}d,%r10d + shl \$3,${num}d + shl \$3+2,%r10d # 4*$num + neg $num # -$num + + ############################################################## + # ensure that stack frame doesn't alias with $aptr+4*$num + # modulo 4096, which covers ret[num], am[num] and n[2*num] + # (see bn_exp.c). this is done to allow memory disambiguation + # logic do its magic. [excessive frame is allocated in order + # to allow bn_from_mont8x to clear it.] + # + lea -64(%rsp,$num,2),%r11 + sub $ap,%r11 + and \$4095,%r11 + cmp %r11,%r10 + jb .Lmul4xsp_alt + sub %r11,%rsp # align with $ap + lea -64(%rsp,$num,2),%rsp # alloca(128+num*8) + jmp .Lmul4xsp_done + +.align 32 +.Lmul4xsp_alt: + lea 4096-64(,$num,2),%r10 + lea -64(%rsp,$num,2),%rsp # alloca(128+num*8) + sub %r10,%r11 + mov \$0,%r10 + cmovc %r10,%r11 + sub %r11,%rsp +.Lmul4xsp_done: + and \$-64,%rsp + neg $num + + mov %rax,40(%rsp) .Lmul4x_body: - mov $rp,16(%rsp,$num,8) # tp[num+2]=$rp - mov %rdx,%r12 # reassign $bp + + call mul4x_internal + + mov 40(%rsp),%rsi # restore %rsp + mov \$1,%rax +___ +$code.=<<___ if ($win64); + movaps -88(%rsi),%xmm6 + movaps -72(%rsi),%xmm7 +___ +$code.=<<___; + mov -48(%rsi),%r15 + mov -40(%rsi),%r14 + mov -32(%rsi),%r13 + mov -24(%rsi),%r12 + mov -16(%rsi),%rbp + mov -8(%rsi),%rbx + lea (%rsi),%rsp +.Lmul4x_epilogue: + ret +.size bn_mul4x_mont_gather5,.-bn_mul4x_mont_gather5 + +.type mul4x_internal,\@abi-omnipotent +.align 32 +mul4x_internal: + shl \$5,$num + mov `($win64?56:8)`(%rax),%r10d # load 7th argument + lea 256(%rdx,$num),%r13 + shr \$5,$num # restore $num ___ $bp="%r12"; $STRIDE=2**5*8; # 5 is "window size" $N=$STRIDE/4; # should match cache line size + $tp=$i; $code.=<<___; mov %r10,%r11 shr \$`log($N/8)/log(2)`,%r10 @@ -384,459 +471,2771 @@ not %r10 lea .Lmagic_masks(%rip),%rax and \$`2**5/($N/8)-1`,%r10 # 5 is "window size" - lea 96($bp,%r11,8),$bp # pointer within 1st cache line + lea 96(%rdx,%r11,8),$bp # pointer within 1st cache line movq 0(%rax,%r10,8),%xmm4 # set of masks denoting which movq 8(%rax,%r10,8),%xmm5 # cache line contains element + add \$7,%r11 movq 16(%rax,%r10,8),%xmm6 # denoted by 7th argument movq 24(%rax,%r10,8),%xmm7 + and \$7,%r11 movq `0*$STRIDE/4-96`($bp),%xmm0 + lea $STRIDE($bp),$tp # borrow $tp movq `1*$STRIDE/4-96`($bp),%xmm1 pand %xmm4,%xmm0 movq `2*$STRIDE/4-96`($bp),%xmm2 pand %xmm5,%xmm1 movq `3*$STRIDE/4-96`($bp),%xmm3 pand %xmm6,%xmm2 + .byte 0x67 por %xmm1,%xmm0 + movq `0*$STRIDE/4-96`($tp),%xmm1 + .byte 0x67 pand %xmm7,%xmm3 + .byte 0x67 por %xmm2,%xmm0 - lea $STRIDE($bp),$bp + movq `1*$STRIDE/4-96`($tp),%xmm2 + .byte 0x67 + pand %xmm4,%xmm1 + .byte 0x67 por %xmm3,%xmm0 + movq `2*$STRIDE/4-96`($tp),%xmm3 movq %xmm0,$m0 # m0=bp[0] + movq `3*$STRIDE/4-96`($tp),%xmm0 + mov %r13,16+8(%rsp) # save end of b[num] + mov $rp, 56+8(%rsp) # save $rp + mov ($n0),$n0 # pull n0[0] value mov ($ap),%rax - - xor $i,$i # i=0 - xor $j,$j # j=0 - - movq `0*$STRIDE/4-96`($bp),%xmm0 - movq `1*$STRIDE/4-96`($bp),%xmm1 - pand %xmm4,%xmm0 - movq `2*$STRIDE/4-96`($bp),%xmm2 - pand %xmm5,%xmm1 + lea ($ap,$num),$ap # end of a[num] + neg $num mov $n0,$m1 mulq $m0 # ap[0]*bp[0] mov %rax,$A[0] mov ($np),%rax - movq `3*$STRIDE/4-96`($bp),%xmm3 - pand %xmm6,%xmm2 - por %xmm1,%xmm0 - pand %xmm7,%xmm3 + pand %xmm5,%xmm2 + pand %xmm6,%xmm3 + por %xmm2,%xmm1 imulq $A[0],$m1 # "tp[0]"*n0 + ############################################################## + # $tp is chosen so that writing to top-most element of the + # vector occurs just "above" references to powers table, + # "above" modulo cache-line size, which effectively precludes + # possibility of memory disambiguation logic failure when + # accessing the table. + # + lea 64+8(%rsp,%r11,8),$tp mov %rdx,$A[1] - por %xmm2,%xmm0 - lea $STRIDE($bp),$bp - por %xmm3,%xmm0 + pand %xmm7,%xmm0 + por %xmm3,%xmm1 + lea 2*$STRIDE($bp),$bp + por %xmm1,%xmm0 mulq $m1 # np[0]*m1 add %rax,$A[0] # discarded - mov 8($ap),%rax + mov 8($ap,$num),%rax adc \$0,%rdx mov %rdx,$N[1] mulq $m0 add %rax,$A[1] - mov 8($np),%rax + mov 16*1($np),%rax # interleaved with 0, therefore 16*n adc \$0,%rdx mov %rdx,$A[0] mulq $m1 add %rax,$N[1] - mov 16($ap),%rax + mov 16($ap,$num),%rax adc \$0,%rdx add $A[1],$N[1] - lea 4($j),$j # j++ + lea 4*8($num),$j # j=4 + lea 16*4($np),$np adc \$0,%rdx - mov $N[1],(%rsp) + mov $N[1],($tp) mov %rdx,$N[0] jmp .L1st4x -.align 16 + +.align 32 .L1st4x: mulq $m0 # ap[j]*bp[0] add %rax,$A[0] - mov -16($np,$j,8),%rax + mov -16*2($np),%rax + lea 32($tp),$tp adc \$0,%rdx mov %rdx,$A[1] mulq $m1 # np[j]*m1 add %rax,$N[0] - mov -8($ap,$j,8),%rax + mov -8($ap,$j),%rax adc \$0,%rdx add $A[0],$N[0] # np[j]*m1+ap[j]*bp[0] adc \$0,%rdx - mov $N[0],-24(%rsp,$j,8) # tp[j-1] + mov $N[0],-24($tp) # tp[j-1] mov %rdx,$N[1] mulq $m0 # ap[j]*bp[0] add %rax,$A[1] - mov -8($np,$j,8),%rax + mov -16*1($np),%rax adc \$0,%rdx mov %rdx,$A[0] mulq $m1 # np[j]*m1 add %rax,$N[1] - mov ($ap,$j,8),%rax + mov ($ap,$j),%rax adc \$0,%rdx add $A[1],$N[1] # np[j]*m1+ap[j]*bp[0] adc \$0,%rdx - mov $N[1],-16(%rsp,$j,8) # tp[j-1] + mov $N[1],-16($tp) # tp[j-1] mov %rdx,$N[0] mulq $m0 # ap[j]*bp[0] add %rax,$A[0] - mov ($np,$j,8),%rax + mov 16*0($np),%rax adc \$0,%rdx mov %rdx,$A[1] mulq $m1 # np[j]*m1 add %rax,$N[0] - mov 8($ap,$j,8),%rax + mov 8($ap,$j),%rax adc \$0,%rdx add $A[0],$N[0] # np[j]*m1+ap[j]*bp[0] adc \$0,%rdx - mov $N[0],-8(%rsp,$j,8) # tp[j-1] + mov $N[0],-8($tp) # tp[j-1] mov %rdx,$N[1] mulq $m0 # ap[j]*bp[0] add %rax,$A[1] - mov 8($np,$j,8),%rax + mov 16*1($np),%rax adc \$0,%rdx - lea 4($j),$j # j++ mov %rdx,$A[0] mulq $m1 # np[j]*m1 add %rax,$N[1] - mov -16($ap,$j,8),%rax + mov 16($ap,$j),%rax adc \$0,%rdx add $A[1],$N[1] # np[j]*m1+ap[j]*bp[0] + lea 16*4($np),$np adc \$0,%rdx - mov $N[1],-32(%rsp,$j,8) # tp[j-1] + mov $N[1],($tp) # tp[j-1] mov %rdx,$N[0] - cmp $num,$j - jl .L1st4x + + add \$32,$j # j+=4 + jnz .L1st4x mulq $m0 # ap[j]*bp[0] add %rax,$A[0] - mov -16($np,$j,8),%rax + mov -16*2($np),%rax + lea 32($tp),$tp adc \$0,%rdx mov %rdx,$A[1] mulq $m1 # np[j]*m1 add %rax,$N[0] - mov -8($ap,$j,8),%rax + mov -8($ap),%rax adc \$0,%rdx add $A[0],$N[0] # np[j]*m1+ap[j]*bp[0] adc \$0,%rdx - mov $N[0],-24(%rsp,$j,8) # tp[j-1] + mov $N[0],-24($tp) # tp[j-1] mov %rdx,$N[1] mulq $m0 # ap[j]*bp[0] add %rax,$A[1] - mov -8($np,$j,8),%rax + mov -16*1($np),%rax adc \$0,%rdx mov %rdx,$A[0] mulq $m1 # np[j]*m1 add %rax,$N[1] - mov ($ap),%rax # ap[0] + mov ($ap,$num),%rax # ap[0] adc \$0,%rdx add $A[1],$N[1] # np[j]*m1+ap[j]*bp[0] adc \$0,%rdx - mov $N[1],-16(%rsp,$j,8) # tp[j-1] + mov $N[1],-16($tp) # tp[j-1] mov %rdx,$N[0] movq %xmm0,$m0 # bp[1] + lea ($np,$num,2),$np # rewind $np xor $N[1],$N[1] add $A[0],$N[0] adc \$0,$N[1] - mov $N[0],-8(%rsp,$j,8) - mov $N[1],(%rsp,$j,8) # store upmost overflow bit + mov $N[0],-8($tp) - lea 1($i),$i # i++ -.align 4 -.Louter4x: - xor $j,$j # j=0 - movq `0*$STRIDE/4-96`($bp),%xmm0 - movq `1*$STRIDE/4-96`($bp),%xmm1 - pand %xmm4,%xmm0 - movq `2*$STRIDE/4-96`($bp),%xmm2 - pand %xmm5,%xmm1 + jmp .Louter4x - mov (%rsp),$A[0] +.align 32 +.Louter4x: + mov ($tp,$num),$A[0] mov $n0,$m1 mulq $m0 # ap[0]*bp[i] add %rax,$A[0] # ap[0]*bp[i]+tp[0] mov ($np),%rax adc \$0,%rdx + movq `0*$STRIDE/4-96`($bp),%xmm0 + movq `1*$STRIDE/4-96`($bp),%xmm1 + pand %xmm4,%xmm0 + movq `2*$STRIDE/4-96`($bp),%xmm2 + pand %xmm5,%xmm1 movq `3*$STRIDE/4-96`($bp),%xmm3 - pand %xmm6,%xmm2 - por %xmm1,%xmm0 - pand %xmm7,%xmm3 imulq $A[0],$m1 # tp[0]*n0 + .byte 0x67 mov %rdx,$A[1] + mov $N[1],($tp) # store upmost overflow bit + pand %xmm6,%xmm2 + por %xmm1,%xmm0 + pand %xmm7,%xmm3 por %xmm2,%xmm0 + lea ($tp,$num),$tp # rewind $tp lea $STRIDE($bp),$bp por %xmm3,%xmm0 mulq $m1 # np[0]*m1 add %rax,$A[0] # "$N[0]", discarded - mov 8($ap),%rax + mov 8($ap,$num),%rax adc \$0,%rdx mov %rdx,$N[1] mulq $m0 # ap[j]*bp[i] add %rax,$A[1] - mov 8($np),%rax + mov 16*1($np),%rax # interleaved with 0, therefore 16*n adc \$0,%rdx - add 8(%rsp),$A[1] # +tp[1] + add 8($tp),$A[1] # +tp[1] adc \$0,%rdx mov %rdx,$A[0] mulq $m1 # np[j]*m1 add %rax,$N[1] - mov 16($ap),%rax + mov 16($ap,$num),%rax adc \$0,%rdx add $A[1],$N[1] # np[j]*m1+ap[j]*bp[i]+tp[j] - lea 4($j),$j # j+=2 + lea 4*8($num),$j # j=4 + lea 16*4($np),$np adc \$0,%rdx mov %rdx,$N[0] jmp .Linner4x -.align 16 + +.align 32 .Linner4x: mulq $m0 # ap[j]*bp[i] add %rax,$A[0] - mov -16($np,$j,8),%rax + mov -16*2($np),%rax adc \$0,%rdx - add -16(%rsp,$j,8),$A[0] # ap[j]*bp[i]+tp[j] + add 16($tp),$A[0] # ap[j]*bp[i]+tp[j] + lea 32($tp),$tp adc \$0,%rdx mov %rdx,$A[1] mulq $m1 # np[j]*m1 add %rax,$N[0] - mov -8($ap,$j,8),%rax + mov -8($ap,$j),%rax adc \$0,%rdx add $A[0],$N[0] adc \$0,%rdx - mov $N[1],-32(%rsp,$j,8) # tp[j-1] + mov $N[1],-32($tp) # tp[j-1] mov %rdx,$N[1] mulq $m0 # ap[j]*bp[i] add %rax,$A[1] - mov -8($np,$j,8),%rax + mov -16*1($np),%rax adc \$0,%rdx - add -8(%rsp,$j,8),$A[1] + add -8($tp),$A[1] adc \$0,%rdx mov %rdx,$A[0] mulq $m1 # np[j]*m1 add %rax,$N[1] - mov ($ap,$j,8),%rax + mov ($ap,$j),%rax adc \$0,%rdx add $A[1],$N[1] adc \$0,%rdx - mov $N[0],-24(%rsp,$j,8) # tp[j-1] + mov $N[0],-24($tp) # tp[j-1] mov %rdx,$N[0] mulq $m0 # ap[j]*bp[i] add %rax,$A[0] - mov ($np,$j,8),%rax + mov 16*0($np),%rax adc \$0,%rdx - add (%rsp,$j,8),$A[0] # ap[j]*bp[i]+tp[j] + add ($tp),$A[0] # ap[j]*bp[i]+tp[j] adc \$0,%rdx mov %rdx,$A[1] mulq $m1 # np[j]*m1 add %rax,$N[0] - mov 8($ap,$j,8),%rax + mov 8($ap,$j),%rax adc \$0,%rdx add $A[0],$N[0] adc \$0,%rdx - mov $N[1],-16(%rsp,$j,8) # tp[j-1] + mov $N[1],-16($tp) # tp[j-1] mov %rdx,$N[1] mulq $m0 # ap[j]*bp[i] add %rax,$A[1] - mov 8($np,$j,8),%rax + mov 16*1($np),%rax adc \$0,%rdx - add 8(%rsp,$j,8),$A[1] + add 8($tp),$A[1] adc \$0,%rdx - lea 4($j),$j # j++ mov %rdx,$A[0] mulq $m1 # np[j]*m1 add %rax,$N[1] - mov -16($ap,$j,8),%rax + mov 16($ap,$j),%rax adc \$0,%rdx add $A[1],$N[1] + lea 16*4($np),$np adc \$0,%rdx - mov $N[0],-40(%rsp,$j,8) # tp[j-1] + mov $N[0],-8($tp) # tp[j-1] mov %rdx,$N[0] - cmp $num,$j - jl .Linner4x + + add \$32,$j # j+=4 + jnz .Linner4x mulq $m0 # ap[j]*bp[i] add %rax,$A[0] - mov -16($np,$j,8),%rax + mov -16*2($np),%rax adc \$0,%rdx - add -16(%rsp,$j,8),$A[0] # ap[j]*bp[i]+tp[j] + add 16($tp),$A[0] # ap[j]*bp[i]+tp[j] + lea 32($tp),$tp adc \$0,%rdx mov %rdx,$A[1] mulq $m1 # np[j]*m1 add %rax,$N[0] - mov -8($ap,$j,8),%rax + mov -8($ap),%rax adc \$0,%rdx add $A[0],$N[0] adc \$0,%rdx - mov $N[1],-32(%rsp,$j,8) # tp[j-1] + mov $N[1],-32($tp) # tp[j-1] mov %rdx,$N[1] mulq $m0 # ap[j]*bp[i] add %rax,$A[1] - mov -8($np,$j,8),%rax + mov $m1,%rax + mov -16*1($np),$m1 adc \$0,%rdx - add -8(%rsp,$j,8),$A[1] + add -8($tp),$A[1] adc \$0,%rdx - lea 1($i),$i # i++ mov %rdx,$A[0] mulq $m1 # np[j]*m1 add %rax,$N[1] - mov ($ap),%rax # ap[0] + mov ($ap,$num),%rax # ap[0] adc \$0,%rdx add $A[1],$N[1] adc \$0,%rdx - mov $N[0],-24(%rsp,$j,8) # tp[j-1] + mov $N[0],-24($tp) # tp[j-1] mov %rdx,$N[0] movq %xmm0,$m0 # bp[i+1] - mov $N[1],-16(%rsp,$j,8) # tp[j-1] + mov $N[1],-16($tp) # tp[j-1] + lea ($np,$num,2),$np # rewind $np xor $N[1],$N[1] add $A[0],$N[0] adc \$0,$N[1] - add (%rsp,$num,8),$N[0] # pull upmost overflow bit - adc \$0,$N[1] - mov $N[0],-8(%rsp,$j,8) - mov $N[1],(%rsp,$j,8) # store upmost overflow bit + add ($tp),$N[0] # pull upmost overflow bit + adc \$0,$N[1] # upmost overflow bit + mov $N[0],-8($tp) - cmp $num,$i - jl .Louter4x + cmp 16+8(%rsp),$bp + jb .Louter4x ___ -{ -my @ri=("%rax","%rdx",$m0,$m1); +if (1) { $code.=<<___; - mov 16(%rsp,$num,8),$rp # restore $rp - mov 0(%rsp),@ri[0] # tp[0] - pxor %xmm0,%xmm0 - mov 8(%rsp),@ri[1] # tp[1] - shr \$2,$num # num/=4 - lea (%rsp),$ap # borrow ap for tp - xor $i,$i # i=0 and clear CF! - - sub 0($np),@ri[0] - mov 16($ap),@ri[2] # tp[2] - mov 24($ap),@ri[3] # tp[3] - sbb 8($np),@ri[1] - lea -1($num),$j # j=num/4-1 + sub $N[0],$m1 # compare top-most words + adc $j,$j # $j is zero + or $j,$N[1] + xor \$1,$N[1] + lea ($tp,$num),%rbx # tptr in .sqr4x_sub + lea ($np,$N[1],8),%rbp # nptr in .sqr4x_sub + mov %r9,%rcx + sar \$3+2,%rcx # cf=0 + mov 56+8(%rsp),%rdi # rptr in .sqr4x_sub + jmp .Lsqr4x_sub +___ +} else { +my @ri=("%rax",$bp,$m0,$m1); +my $rp="%rdx"; +$code.=<<___ + xor \$1,$N[1] + lea ($tp,$num),$tp # rewind $tp + sar \$5,$num # cf=0 + lea ($np,$N[1],8),$np + mov 56+8(%rsp),$rp # restore $rp jmp .Lsub4x -.align 16 + +.align 32 .Lsub4x: - mov @ri[0],0($rp,$i,8) # rp[i]=tp[i]-np[i] - mov @ri[1],8($rp,$i,8) # rp[i]=tp[i]-np[i] - sbb 16($np,$i,8),@ri[2] - mov 32($ap,$i,8),@ri[0] # tp[i+1] - mov 40($ap,$i,8),@ri[1] - sbb 24($np,$i,8),@ri[3] - mov @ri[2],16($rp,$i,8) # rp[i]=tp[i]-np[i] - mov @ri[3],24($rp,$i,8) # rp[i]=tp[i]-np[i] - sbb 32($np,$i,8),@ri[0] - mov 48($ap,$i,8),@ri[2] - mov 56($ap,$i,8),@ri[3] - sbb 40($np,$i,8),@ri[1] - lea 4($i),$i # i++ - dec $j # doesnn't affect CF! + .byte 0x66 + mov 8*0($tp),@ri[0] + mov 8*1($tp),@ri[1] + .byte 0x66 + sbb 16*0($np),@ri[0] + mov 8*2($tp),@ri[2] + sbb 16*1($np),@ri[1] + mov 3*8($tp),@ri[3] + lea 4*8($tp),$tp + sbb 16*2($np),@ri[2] + mov @ri[0],8*0($rp) + sbb 16*3($np),@ri[3] + lea 16*4($np),$np + mov @ri[1],8*1($rp) + mov @ri[2],8*2($rp) + mov @ri[3],8*3($rp) + lea 8*4($rp),$rp + + inc $num jnz .Lsub4x - mov @ri[0],0($rp,$i,8) # rp[i]=tp[i]-np[i] - mov 32($ap,$i,8),@ri[0] # load overflow bit - sbb 16($np,$i,8),@ri[2] - mov @ri[1],8($rp,$i,8) # rp[i]=tp[i]-np[i] - sbb 24($np,$i,8),@ri[3] - mov @ri[2],16($rp,$i,8) # rp[i]=tp[i]-np[i] + ret +___ +} +$code.=<<___; +.size mul4x_internal,.-mul4x_internal +___ +}}} + {{{ +###################################################################### +# void bn_power5( +my $rptr="%rdi"; # BN_ULONG *rptr, +my $aptr="%rsi"; # const BN_ULONG *aptr, +my $bptr="%rdx"; # const void *table, +my $nptr="%rcx"; # const BN_ULONG *nptr, +my $n0 ="%r8"; # const BN_ULONG *n0); +my $num ="%r9"; # int num, has to be divisible by 8 + # int pwr + +my ($i,$j,$tptr)=("%rbp","%rcx",$rptr); +my @A0=("%r10","%r11"); +my @A1=("%r12","%r13"); +my ($a0,$a1,$ai)=("%r14","%r15","%rbx"); - sbb \$0,@ri[0] # handle upmost overflow bit - mov @ri[3],24($rp,$i,8) # rp[i]=tp[i]-np[i] - xor $i,$i # i=0 - and @ri[0],$ap - not @ri[0] - mov $rp,$np - and @ri[0],$np - lea -1($num),$j - or $np,$ap # ap=borrow?tp:rp +$code.=<<___; +.globl bn_power5 +.type bn_power5,\@function,6 +.align 32 +bn_power5: +___ +$code.=<<___ if ($addx); + mov OPENSSL_ia32cap_P+8(%rip),%r11d + and \$0x80100,%r11d + cmp \$0x80100,%r11d + je .Lpowerx5_enter +___ +$code.=<<___; + mov %rsp,%rax + push %rbx + push %rbp + push %r12 + push %r13 + push %r14 + push %r15 +___ +$code.=<<___ if ($win64); + lea -0x28(%rsp),%rsp + movaps %xmm6,(%rsp) + movaps %xmm7,0x10(%rsp) +___ +$code.=<<___; + mov ${num}d,%r10d + shl \$3,${num}d # convert $num to bytes + shl \$3+2,%r10d # 4*$num + neg $num + mov ($n0),$n0 # *n0 + + ############################################################## + # ensure that stack frame doesn't alias with $aptr+4*$num + # modulo 4096, which covers ret[num], am[num] and n[2*num] + # (see bn_exp.c). this is done to allow memory disambiguation + # logic do its magic. + # + lea -64(%rsp,$num,2),%r11 + sub $aptr,%r11 + and \$4095,%r11 + cmp %r11,%r10 + jb .Lpwr_sp_alt + sub %r11,%rsp # align with $aptr + lea -64(%rsp,$num,2),%rsp # alloca(frame+2*$num) + jmp .Lpwr_sp_done + +.align 32 +.Lpwr_sp_alt: + lea 4096-64(,$num,2),%r10 # 4096-frame-2*$num + lea -64(%rsp,$num,2),%rsp # alloca(frame+2*$num) + sub %r10,%r11 + mov \$0,%r10 + cmovc %r10,%r11 + sub %r11,%rsp +.Lpwr_sp_done: + and \$-64,%rsp + mov $num,%r10 + neg $num + + ############################################################## + # Stack layout + # + # +0 saved $num, used in reduction section + # +8 &t[2*$num], used in reduction section + # +32 saved *n0 + # +40 saved %rsp + # +48 t[2*$num] + # + mov $n0, 32(%rsp) + mov %rax, 40(%rsp) # save original %rsp +.Lpower5_body: + movq $rptr,%xmm1 # save $rptr + movq $nptr,%xmm2 # save $nptr + movq %r10, %xmm3 # -$num + movq $bptr,%xmm4 + + call __bn_sqr8x_internal + call __bn_sqr8x_internal + call __bn_sqr8x_internal + call __bn_sqr8x_internal + call __bn_sqr8x_internal + + movq %xmm2,$nptr + movq %xmm4,$bptr + mov $aptr,$rptr + mov 40(%rsp),%rax + lea 32(%rsp),$n0 + + call mul4x_internal + + mov 40(%rsp),%rsi # restore %rsp + mov \$1,%rax + mov -48(%rsi),%r15 + mov -40(%rsi),%r14 + mov -32(%rsi),%r13 + mov -24(%rsi),%r12 + mov -16(%rsi),%rbp + mov -8(%rsi),%rbx + lea (%rsi),%rsp +.Lpower5_epilogue: + ret +.size bn_power5,.-bn_power5 + +.globl bn_sqr8x_internal +.hidden bn_sqr8x_internal +.type bn_sqr8x_internal,\@abi-omnipotent +.align 32 +bn_sqr8x_internal: +__bn_sqr8x_internal: + ############################################################## + # Squaring part: + # + # a) multiply-n-add everything but a[i]*a[i]; + # b) shift result of a) by 1 to the left and accumulate + # a[i]*a[i] products; + # + ############################################################## + # a[1]a[0] + # a[2]a[0] + # a[3]a[0] + # a[2]a[1] + # a[4]a[0] + # a[3]a[1] + # a[5]a[0] + # a[4]a[1] + # a[3]a[2] + # a[6]a[0] + # a[5]a[1] + # a[4]a[2] + # a[7]a[0] + # a[6]a[1] + # a[5]a[2] + # a[4]a[3] + # a[7]a[1] + # a[6]a[2] + # a[5]a[3] + # a[7]a[2] + # a[6]a[3] + # a[5]a[4] + # a[7]a[3] + # a[6]a[4] + # a[7]a[4] + # a[6]a[5] + # a[7]a[5] + # a[7]a[6] + # a[1]a[0] + # a[2]a[0] + # a[3]a[0] + # a[4]a[0] + # a[5]a[0] + # a[6]a[0] + # a[7]a[0] + # a[2]a[1] + # a[3]a[1] + # a[4]a[1] + # a[5]a[1] + # a[6]a[1] + # a[7]a[1] + # a[3]a[2] + # a[4]a[2] + # a[5]a[2] + # a[6]a[2] + # a[7]a[2] + # a[4]a[3] + # a[5]a[3] + # a[6]a[3] + # a[7]a[3] + # a[5]a[4] + # a[6]a[4] + # a[7]a[4] + # a[6]a[5] + # a[7]a[5] + # a[7]a[6] + # a[0]a[0] + # a[1]a[1] + # a[2]a[2] + # a[3]a[3] + # a[4]a[4] + # a[5]a[5] + # a[6]a[6] + # a[7]a[7] + + lea 32(%r10),$i # $i=-($num-32) + lea ($aptr,$num),$aptr # end of a[] buffer, ($aptr,$i)=&ap[2] + + mov $num,$j # $j=$num + + # comments apply to $num==8 case + mov -32($aptr,$i),$a0 # a[0] + lea 48+8(%rsp,$num,2),$tptr # end of tp[] buffer, &tp[2*$num] + mov -24($aptr,$i),%rax # a[1] + lea -32($tptr,$i),$tptr # end of tp[] window, &tp[2*$num-"$i"] + mov -16($aptr,$i),$ai # a[2] + mov %rax,$a1 + + mul $a0 # a[1]*a[0] + mov %rax,$A0[0] # a[1]*a[0] + mov $ai,%rax # a[2] + mov %rdx,$A0[1] + mov $A0[0],-24($tptr,$i) # t[1] + + mul $a0 # a[2]*a[0] + add %rax,$A0[1] + mov $ai,%rax + adc \$0,%rdx + mov $A0[1],-16($tptr,$i) # t[2] + mov %rdx,$A0[0] + + + mov -8($aptr,$i),$ai # a[3] + mul $a1 # a[2]*a[1] + mov %rax,$A1[0] # a[2]*a[1]+t[3] + mov $ai,%rax + mov %rdx,$A1[1] + + lea ($i),$j + mul $a0 # a[3]*a[0] + add %rax,$A0[0] # a[3]*a[0]+a[2]*a[1]+t[3] + mov $ai,%rax + mov %rdx,$A0[1] + adc \$0,$A0[1] + add $A1[0],$A0[0] + adc \$0,$A0[1] + mov $A0[0],-8($tptr,$j) # t[3] + jmp .Lsqr4x_1st + +.align 32 +.Lsqr4x_1st: + mov ($aptr,$j),$ai # a[4] + mul $a1 # a[3]*a[1] + add %rax,$A1[1] # a[3]*a[1]+t[4] + mov $ai,%rax + mov %rdx,$A1[0] + adc \$0,$A1[0] + + mul $a0 # a[4]*a[0] + add %rax,$A0[1] # a[4]*a[0]+a[3]*a[1]+t[4] + mov $ai,%rax # a[3] + mov 8($aptr,$j),$ai # a[5] + mov %rdx,$A0[0] + adc \$0,$A0[0] + add $A1[1],$A0[1] + adc \$0,$A0[0] + + + mul $a1 # a[4]*a[3] + add %rax,$A1[0] # a[4]*a[3]+t[5] + mov $ai,%rax + mov $A0[1],($tptr,$j) # t[4] + mov %rdx,$A1[1] + adc \$0,$A1[1] + + mul $a0 # a[5]*a[2] + add %rax,$A0[0] # a[5]*a[2]+a[4]*a[3]+t[5] + mov $ai,%rax + mov 16($aptr,$j),$ai # a[6] + mov %rdx,$A0[1] + adc \$0,$A0[1] + add $A1[0],$A0[0] + adc \$0,$A0[1] + + mul $a1 # a[5]*a[3] + add %rax,$A1[1] # a[5]*a[3]+t[6] + mov $ai,%rax + mov $A0[0],8($tptr,$j) # t[5] + mov %rdx,$A1[0] + adc \$0,$A1[0] + + mul $a0 # a[6]*a[2] + add %rax,$A0[1] # a[6]*a[2]+a[5]*a[3]+t[6] + mov $ai,%rax # a[3] + mov 24($aptr,$j),$ai # a[7] + mov %rdx,$A0[0] + adc \$0,$A0[0] + add $A1[1],$A0[1] + adc \$0,$A0[0] + + + mul $a1 # a[6]*a[5] + add %rax,$A1[0] # a[6]*a[5]+t[7] + mov $ai,%rax + mov $A0[1],16($tptr,$j) # t[6] + mov %rdx,$A1[1] + adc \$0,$A1[1] + lea 32($j),$j + + mul $a0 # a[7]*a[4] + add %rax,$A0[0] # a[7]*a[4]+a[6]*a[5]+t[6] + mov $ai,%rax + mov %rdx,$A0[1] + adc \$0,$A0[1] + add $A1[0],$A0[0] + adc \$0,$A0[1] + mov $A0[0],-8($tptr,$j) # t[7] + + cmp \$0,$j + jne .Lsqr4x_1st + + mul $a1 # a[7]*a[5] + add %rax,$A1[1] + lea 16($i),$i + adc \$0,%rdx + add $A0[1],$A1[1] + adc \$0,%rdx + + mov $A1[1],($tptr) # t[8] + mov %rdx,$A1[0] + mov %rdx,8($tptr) # t[9] + jmp .Lsqr4x_outer + +.align 32 +.Lsqr4x_outer: # comments apply to $num==6 case + mov -32($aptr,$i),$a0 # a[0] + lea 48+8(%rsp,$num,2),$tptr # end of tp[] buffer, &tp[2*$num] + mov -24($aptr,$i),%rax # a[1] + lea -32($tptr,$i),$tptr # end of tp[] window, &tp[2*$num-"$i"] + mov -16($aptr,$i),$ai # a[2] + mov %rax,$a1 + + mul $a0 # a[1]*a[0] + mov -24($tptr,$i),$A0[0] # t[1] + add %rax,$A0[0] # a[1]*a[0]+t[1] + mov $ai,%rax # a[2] + adc \$0,%rdx + mov $A0[0],-24($tptr,$i) # t[1] + mov %rdx,$A0[1] + + mul $a0 # a[2]*a[0] + add %rax,$A0[1] + mov $ai,%rax + adc \$0,%rdx + add -16($tptr,$i),$A0[1] # a[2]*a[0]+t[2] + mov %rdx,$A0[0] + adc \$0,$A0[0] + mov $A0[1],-16($tptr,$i) # t[2] + + xor $A1[0],$A1[0] + + mov -8($aptr,$i),$ai # a[3] + mul $a1 # a[2]*a[1] + add %rax,$A1[0] # a[2]*a[1]+t[3] + mov $ai,%rax + adc \$0,%rdx + add -8($tptr,$i),$A1[0] + mov %rdx,$A1[1] + adc \$0,$A1[1] + + mul $a0 # a[3]*a[0] + add %rax,$A0[0] # a[3]*a[0]+a[2]*a[1]+t[3] + mov $ai,%rax + adc \$0,%rdx + add $A1[0],$A0[0] + mov %rdx,$A0[1] + adc \$0,$A0[1] + mov $A0[0],-8($tptr,$i) # t[3] + + lea ($i),$j + jmp .Lsqr4x_inner + +.align 32 +.Lsqr4x_inner: + mov ($aptr,$j),$ai # a[4] + mul $a1 # a[3]*a[1] + add %rax,$A1[1] # a[3]*a[1]+t[4] + mov $ai,%rax + mov %rdx,$A1[0] + adc \$0,$A1[0] + add ($tptr,$j),$A1[1] + adc \$0,$A1[0] + + .byte 0x67 + mul $a0 # a[4]*a[0] + add %rax,$A0[1] # a[4]*a[0]+a[3]*a[1]+t[4] + mov $ai,%rax # a[3] + mov 8($aptr,$j),$ai # a[5] + mov %rdx,$A0[0] + adc \$0,$A0[0] + add $A1[1],$A0[1] + adc \$0,$A0[0] + + mul $a1 # a[4]*a[3] + add %rax,$A1[0] # a[4]*a[3]+t[5] + mov $A0[1],($tptr,$j) # t[4] + mov $ai,%rax + mov %rdx,$A1[1] + adc \$0,$A1[1] + add 8($tptr,$j),$A1[0] + lea 16($j),$j # j++ + adc \$0,$A1[1] + + mul $a0 # a[5]*a[2] + add %rax,$A0[0] # a[5]*a[2]+a[4]*a[3]+t[5] + mov $ai,%rax + adc \$0,%rdx + add $A1[0],$A0[0] + mov %rdx,$A0[1] + adc \$0,$A0[1] + mov $A0[0],-8($tptr,$j) # t[5], "preloaded t[1]" below + + cmp \$0,$j + jne .Lsqr4x_inner + + .byte 0x67 + mul $a1 # a[5]*a[3] + add %rax,$A1[1] + adc \$0,%rdx + add $A0[1],$A1[1] + adc \$0,%rdx + + mov $A1[1],($tptr) # t[6], "preloaded t[2]" below + mov %rdx,$A1[0] + mov %rdx,8($tptr) # t[7], "preloaded t[3]" below + + add \$16,$i + jnz .Lsqr4x_outer + + # comments apply to $num==4 case + mov -32($aptr),$a0 # a[0] + lea 48+8(%rsp,$num,2),$tptr # end of tp[] buffer, &tp[2*$num] + mov -24($aptr),%rax # a[1] + lea -32($tptr,$i),$tptr # end of tp[] window, &tp[2*$num-"$i"] + mov -16($aptr),$ai # a[2] + mov %rax,$a1 + + mul $a0 # a[1]*a[0] + add %rax,$A0[0] # a[1]*a[0]+t[1], preloaded t[1] + mov $ai,%rax # a[2] + mov %rdx,$A0[1] + adc \$0,$A0[1] + + mul $a0 # a[2]*a[0] + add %rax,$A0[1] + mov $ai,%rax + mov $A0[0],-24($tptr) # t[1] + mov %rdx,$A0[0] + adc \$0,$A0[0] + add $A1[1],$A0[1] # a[2]*a[0]+t[2], preloaded t[2] + mov -8($aptr),$ai # a[3] + adc \$0,$A0[0] + + mul $a1 # a[2]*a[1] + add %rax,$A1[0] # a[2]*a[1]+t[3], preloaded t[3] + mov $ai,%rax + mov $A0[1],-16($tptr) # t[2] + mov %rdx,$A1[1] + adc \$0,$A1[1] + + mul $a0 # a[3]*a[0] + add %rax,$A0[0] # a[3]*a[0]+a[2]*a[1]+t[3] + mov $ai,%rax + mov %rdx,$A0[1] + adc \$0,$A0[1] + add $A1[0],$A0[0] + adc \$0,$A0[1] + mov $A0[0],-8($tptr) # t[3] + + mul $a1 # a[3]*a[1] + add %rax,$A1[1] + mov -16($aptr),%rax # a[2] + adc \$0,%rdx + add $A0[1],$A1[1] + adc \$0,%rdx + + mov $A1[1],($tptr) # t[4] + mov %rdx,$A1[0] + mov %rdx,8($tptr) # t[5] + + mul $ai # a[2]*a[3] +___ +{ +my ($shift,$carry)=($a0,$a1); +my @S=(@A1,$ai,$n0); +$code.=<<___; + add \$16,$i + xor $shift,$shift + sub $num,$i # $i=16-$num + xor $carry,$carry + + add $A1[0],%rax # t[5] + adc \$0,%rdx + mov %rax,8($tptr) # t[5] + mov %rdx,16($tptr) # t[6] + mov $carry,24($tptr) # t[7] + + mov -16($aptr,$i),%rax # a[0] + lea 48+8(%rsp),$tptr + xor $A0[0],$A0[0] # t[0] + mov 8($tptr),$A0[1] # t[1] + + lea ($shift,$A0[0],2),$S[0] # t[2*i]<<1 | shift + shr \$63,$A0[0] + lea ($j,$A0[1],2),$S[1] # t[2*i+1]<<1 | + shr \$63,$A0[1] + or $A0[0],$S[1] # | t[2*i]>>63 + mov 16($tptr),$A0[0] # t[2*i+2] # prefetch + mov $A0[1],$shift # shift=t[2*i+1]>>63 + mul %rax # a[i]*a[i] + neg $carry # mov $carry,cf + mov 24($tptr),$A0[1] # t[2*i+2+1] # prefetch + adc %rax,$S[0] + mov -8($aptr,$i),%rax # a[i+1] # prefetch + mov $S[0],($tptr) + adc %rdx,$S[1] + + lea ($shift,$A0[0],2),$S[2] # t[2*i]<<1 | shift + mov $S[1],8($tptr) + sbb $carry,$carry # mov cf,$carry + shr \$63,$A0[0] + lea ($j,$A0[1],2),$S[3] # t[2*i+1]<<1 | + shr \$63,$A0[1] + or $A0[0],$S[3] # | t[2*i]>>63 + mov 32($tptr),$A0[0] # t[2*i+2] # prefetch + mov $A0[1],$shift # shift=t[2*i+1]>>63 + mul %rax # a[i]*a[i] + neg $carry # mov $carry,cf + mov 40($tptr),$A0[1] # t[2*i+2+1] # prefetch + adc %rax,$S[2] + mov 0($aptr,$i),%rax # a[i+1] # prefetch + mov $S[2],16($tptr) + adc %rdx,$S[3] + lea 16($i),$i + mov $S[3],24($tptr) + sbb $carry,$carry # mov cf,$carry + lea 64($tptr),$tptr + jmp .Lsqr4x_shift_n_add + +.align 32 +.Lsqr4x_shift_n_add: + lea ($shift,$A0[0],2),$S[0] # t[2*i]<<1 | shift + shr \$63,$A0[0] + lea ($j,$A0[1],2),$S[1] # t[2*i+1]<<1 | + shr \$63,$A0[1] + or $A0[0],$S[1] # | t[2*i]>>63 + mov -16($tptr),$A0[0] # t[2*i+2] # prefetch + mov $A0[1],$shift # shift=t[2*i+1]>>63 + mul %rax # a[i]*a[i] + neg $carry # mov $carry,cf + mov -8($tptr),$A0[1] # t[2*i+2+1] # prefetch + adc %rax,$S[0] + mov -8($aptr,$i),%rax # a[i+1] # prefetch + mov $S[0],-32($tptr) + adc %rdx,$S[1] + + lea ($shift,$A0[0],2),$S[2] # t[2*i]<<1 | shift + mov $S[1],-24($tptr) + sbb $carry,$carry # mov cf,$carry + shr \$63,$A0[0] + lea ($j,$A0[1],2),$S[3] # t[2*i+1]<<1 | + shr \$63,$A0[1] + or $A0[0],$S[3] # | t[2*i]>>63 + mov 0($tptr),$A0[0] # t[2*i+2] # prefetch + mov $A0[1],$shift # shift=t[2*i+1]>>63 + mul %rax # a[i]*a[i] + neg $carry # mov $carry,cf + mov 8($tptr),$A0[1] # t[2*i+2+1] # prefetch + adc %rax,$S[2] + mov 0($aptr,$i),%rax # a[i+1] # prefetch + mov $S[2],-16($tptr) + adc %rdx,$S[3] + + lea ($shift,$A0[0],2),$S[0] # t[2*i]<<1 | shift + mov $S[3],-8($tptr) + sbb $carry,$carry # mov cf,$carry + shr \$63,$A0[0] + lea ($j,$A0[1],2),$S[1] # t[2*i+1]<<1 | + shr \$63,$A0[1] + or $A0[0],$S[1] # | t[2*i]>>63 + mov 16($tptr),$A0[0] # t[2*i+2] # prefetch + mov $A0[1],$shift # shift=t[2*i+1]>>63 + mul %rax # a[i]*a[i] + neg $carry # mov $carry,cf + mov 24($tptr),$A0[1] # t[2*i+2+1] # prefetch + adc %rax,$S[0] + mov 8($aptr,$i),%rax # a[i+1] # prefetch + mov $S[0],0($tptr) + adc %rdx,$S[1] + + lea ($shift,$A0[0],2),$S[2] # t[2*i]<<1 | shift + mov $S[1],8($tptr) + sbb $carry,$carry # mov cf,$carry + shr \$63,$A0[0] + lea ($j,$A0[1],2),$S[3] # t[2*i+1]<<1 | + shr \$63,$A0[1] + or $A0[0],$S[3] # | t[2*i]>>63 + mov 32($tptr),$A0[0] # t[2*i+2] # prefetch + mov $A0[1],$shift # shift=t[2*i+1]>>63 + mul %rax # a[i]*a[i] + neg $carry # mov $carry,cf + mov 40($tptr),$A0[1] # t[2*i+2+1] # prefetch + adc %rax,$S[2] + mov 16($aptr,$i),%rax # a[i+1] # prefetch + mov $S[2],16($tptr) + adc %rdx,$S[3] + mov $S[3],24($tptr) + sbb $carry,$carry # mov cf,$carry + lea 64($tptr),$tptr + add \$32,$i + jnz .Lsqr4x_shift_n_add + + lea ($shift,$A0[0],2),$S[0] # t[2*i]<<1 | shift + .byte 0x67 + shr \$63,$A0[0] + lea ($j,$A0[1],2),$S[1] # t[2*i+1]<<1 | + shr \$63,$A0[1] + or $A0[0],$S[1] # | t[2*i]>>63 + mov -16($tptr),$A0[0] # t[2*i+2] # prefetch + mov $A0[1],$shift # shift=t[2*i+1]>>63 + mul %rax # a[i]*a[i] + neg $carry # mov $carry,cf + mov -8($tptr),$A0[1] # t[2*i+2+1] # prefetch + adc %rax,$S[0] + mov -8($aptr),%rax # a[i+1] # prefetch + mov $S[0],-32($tptr) + adc %rdx,$S[1] + + lea ($shift,$A0[0],2),$S[2] # t[2*i]<<1|shift + mov $S[1],-24($tptr) + sbb $carry,$carry # mov cf,$carry + shr \$63,$A0[0] + lea ($j,$A0[1],2),$S[3] # t[2*i+1]<<1 | + shr \$63,$A0[1] + or $A0[0],$S[3] # | t[2*i]>>63 + mul %rax # a[i]*a[i] + neg $carry # mov $carry,cf + adc %rax,$S[2] + adc %rdx,$S[3] + mov $S[2],-16($tptr) + mov $S[3],-8($tptr) +___ +} +###################################################################### +# Montgomery reduction part, "word-by-word" algorithm. +# +# This new path is inspired by multiple submissions from Intel, by +# Shay Gueron, Vlad Krasnov, Erdinc Ozturk, James Guilford, +# Vinodh Gopal... +{ +my ($nptr,$tptr,$carry,$m0)=("%rbp","%rdi","%rsi","%rbx"); - movdqu ($ap),%xmm1 - movdqa %xmm0,(%rsp) - movdqu %xmm1,($rp) - jmp .Lcopy4x -.align 16 -.Lcopy4x: # copy or in-place refresh - movdqu 16($ap,$i),%xmm2 - movdqu 32($ap,$i),%xmm1 - movdqa %xmm0,16(%rsp,$i) - movdqu %xmm2,16($rp,$i) - movdqa %xmm0,32(%rsp,$i) - movdqu %xmm1,32($rp,$i) - lea 32($i),$i - dec $j - jnz .Lcopy4x - - shl \$2,$num - movdqu 16($ap,$i),%xmm2 - movdqa %xmm0,16(%rsp,$i) - movdqu %xmm2,16($rp,$i) +$code.=<<___; + movq %xmm2,$nptr +sqr8x_reduction: + xor %rax,%rax + lea ($nptr,$num,2),%rcx # end of n[] + lea 48+8(%rsp,$num,2),%rdx # end of t[] buffer + mov %rcx,0+8(%rsp) + lea 48+8(%rsp,$num),$tptr # end of initial t[] window + mov %rdx,8+8(%rsp) + neg $num + jmp .L8x_reduction_loop + +.align 32 +.L8x_reduction_loop: + lea ($tptr,$num),$tptr # start of current t[] window + .byte 0x66 + mov 8*0($tptr),$m0 + mov 8*1($tptr),%r9 + mov 8*2($tptr),%r10 + mov 8*3($tptr),%r11 + mov 8*4($tptr),%r12 + mov 8*5($tptr),%r13 + mov 8*6($tptr),%r14 + mov 8*7($tptr),%r15 + mov %rax,(%rdx) # store top-most carry bit + lea 8*8($tptr),$tptr + + .byte 0x67 + mov $m0,%r8 + imulq 32+8(%rsp),$m0 # n0*a[0] + mov 16*0($nptr),%rax # n[0] + mov \$8,%ecx + jmp .L8x_reduce + +.align 32 +.L8x_reduce: + mulq $m0 + mov 16*1($nptr),%rax # n[1] + neg %r8 + mov %rdx,%r8 + adc \$0,%r8 + + mulq $m0 + add %rax,%r9 + mov 16*2($nptr),%rax + adc \$0,%rdx + add %r9,%r8 + mov $m0,48-8+8(%rsp,%rcx,8) # put aside n0*a[i] + mov %rdx,%r9 + adc \$0,%r9 + + mulq $m0 + add %rax,%r10 + mov 16*3($nptr),%rax + adc \$0,%rdx + add %r10,%r9 + mov 32+8(%rsp),$carry # pull n0, borrow $carry + mov %rdx,%r10 + adc \$0,%r10 + + mulq $m0 + add %rax,%r11 + mov 16*4($nptr),%rax + adc \$0,%rdx + imulq %r8,$carry # modulo-scheduled + add %r11,%r10 + mov %rdx,%r11 + adc \$0,%r11 + + mulq $m0 + add %rax,%r12 + mov 16*5($nptr),%rax + adc \$0,%rdx + add %r12,%r11 + mov %rdx,%r12 + adc \$0,%r12 + + mulq $m0 + add %rax,%r13 + mov 16*6($nptr),%rax + adc \$0,%rdx + add %r13,%r12 + mov %rdx,%r13 + adc \$0,%r13 + + mulq $m0 + add %rax,%r14 + mov 16*7($nptr),%rax + adc \$0,%rdx + add %r14,%r13 + mov %rdx,%r14 + adc \$0,%r14 + + mulq $m0 + mov $carry,$m0 # n0*a[i] + add %rax,%r15 + mov 16*0($nptr),%rax # n[0] + adc \$0,%rdx + add %r15,%r14 + mov %rdx,%r15 + adc \$0,%r15 + + dec %ecx + jnz .L8x_reduce + + lea 16*8($nptr),$nptr + xor %rax,%rax + mov 8+8(%rsp),%rdx # pull end of t[] + cmp 0+8(%rsp),$nptr # end of n[]? + jae .L8x_no_tail + + .byte 0x66 + add 8*0($tptr),%r8 + adc 8*1($tptr),%r9 + adc 8*2($tptr),%r10 + adc 8*3($tptr),%r11 + adc 8*4($tptr),%r12 + adc 8*5($tptr),%r13 + adc 8*6($tptr),%r14 + adc 8*7($tptr),%r15 + sbb $carry,$carry # top carry + + mov 48+56+8(%rsp),$m0 # pull n0*a[0] + mov \$8,%ecx + mov 16*0($nptr),%rax + jmp .L8x_tail + +.align 32 +.L8x_tail: + mulq $m0 + add %rax,%r8 + mov 16*1($nptr),%rax + mov %r8,($tptr) # save result + mov %rdx,%r8 + adc \$0,%r8 + + mulq $m0 + add %rax,%r9 + mov 16*2($nptr),%rax + adc \$0,%rdx + add %r9,%r8 + lea 8($tptr),$tptr # $tptr++ + mov %rdx,%r9 + adc \$0,%r9 + + mulq $m0 + add %rax,%r10 + mov 16*3($nptr),%rax + adc \$0,%rdx + add %r10,%r9 + mov %rdx,%r10 + adc \$0,%r10 + + mulq $m0 + add %rax,%r11 + mov 16*4($nptr),%rax + adc \$0,%rdx + add %r11,%r10 + mov %rdx,%r11 + adc \$0,%r11 + + mulq $m0 + add %rax,%r12 + mov 16*5($nptr),%rax + adc \$0,%rdx + add %r12,%r11 + mov %rdx,%r12 + adc \$0,%r12 + + mulq $m0 + add %rax,%r13 + mov 16*6($nptr),%rax + adc \$0,%rdx + add %r13,%r12 + mov %rdx,%r13 + adc \$0,%r13 + + mulq $m0 + add %rax,%r14 + mov 16*7($nptr),%rax + adc \$0,%rdx + add %r14,%r13 + mov %rdx,%r14 + adc \$0,%r14 + + mulq $m0 + mov 48-16+8(%rsp,%rcx,8),$m0# pull n0*a[i] + add %rax,%r15 + adc \$0,%rdx + add %r15,%r14 + mov 16*0($nptr),%rax # pull n[0] + mov %rdx,%r15 + adc \$0,%r15 + + dec %ecx + jnz .L8x_tail + + lea 16*8($nptr),$nptr + mov 8+8(%rsp),%rdx # pull end of t[] + cmp 0+8(%rsp),$nptr # end of n[]? + jae .L8x_tail_done # break out of loop + + mov 48+56+8(%rsp),$m0 # pull n0*a[0] + neg $carry + mov 8*0($nptr),%rax # pull n[0] + adc 8*0($tptr),%r8 + adc 8*1($tptr),%r9 + adc 8*2($tptr),%r10 + adc 8*3($tptr),%r11 + adc 8*4($tptr),%r12 + adc 8*5($tptr),%r13 + adc 8*6($tptr),%r14 + adc 8*7($tptr),%r15 + sbb $carry,$carry # top carry + + mov \$8,%ecx + jmp .L8x_tail + +.align 32 +.L8x_tail_done: + add (%rdx),%r8 # can this overflow? + xor %rax,%rax + + neg $carry +.L8x_no_tail: + adc 8*0($tptr),%r8 + adc 8*1($tptr),%r9 + adc 8*2($tptr),%r10 + adc 8*3($tptr),%r11 + adc 8*4($tptr),%r12 + adc 8*5($tptr),%r13 + adc 8*6($tptr),%r14 + adc 8*7($tptr),%r15 + adc \$0,%rax # top-most carry + mov -16($nptr),%rcx # np[num-1] + xor $carry,$carry + + movq %xmm2,$nptr # restore $nptr + + mov %r8,8*0($tptr) # store top 512 bits + mov %r9,8*1($tptr) + movq %xmm3,$num # $num is %r9, can't be moved upwards + mov %r10,8*2($tptr) + mov %r11,8*3($tptr) + mov %r12,8*4($tptr) + mov %r13,8*5($tptr) + mov %r14,8*6($tptr) + mov %r15,8*7($tptr) + lea 8*8($tptr),$tptr + + cmp %rdx,$tptr # end of t[]? + jb .L8x_reduction_loop +___ +} +############################################################## +# Post-condition, 4x unrolled +# +{ +my ($tptr,$nptr)=("%rbx","%rbp"); +$code.=<<___; + #xor %rsi,%rsi # %rsi was $carry above + sub %r15,%rcx # compare top-most words + lea (%rdi,$num),$tptr # %rdi was $tptr above + adc %rsi,%rsi + mov $num,%rcx + or %rsi,%rax + movq %xmm1,$rptr # restore $rptr + xor \$1,%rax + movq %xmm1,$aptr # prepare for back-to-back call + lea ($nptr,%rax,8),$nptr + sar \$3+2,%rcx # cf=0 + jmp .Lsqr4x_sub + +.align 32 +.Lsqr4x_sub: + .byte 0x66 + mov 8*0($tptr),%r12 + mov 8*1($tptr),%r13 + sbb 16*0($nptr),%r12 + mov 8*2($tptr),%r14 + sbb 16*1($nptr),%r13 + mov 8*3($tptr),%r15 + lea 8*4($tptr),$tptr + sbb 16*2($nptr),%r14 + mov %r12,8*0($rptr) + sbb 16*3($nptr),%r15 + lea 16*4($nptr),$nptr + mov %r13,8*1($rptr) + mov %r14,8*2($rptr) + mov %r15,8*3($rptr) + lea 8*4($rptr),$rptr + + inc %rcx # pass %cf + jnz .Lsqr4x_sub ___ } $code.=<<___; - mov 8(%rsp,$num,8),%rsi # restore %rsp - mov \$1,%rax + mov $num,%r10 # prepare for back-to-back call + neg $num # restore $num + ret +.size bn_sqr8x_internal,.-bn_sqr8x_internal +___ +{ +$code.=<<___; +.globl bn_from_montgomery +.type bn_from_montgomery,\@abi-omnipotent +.align 32 +bn_from_montgomery: + testl \$7,`($win64?"48(%rsp)":"%r9d")` + jz bn_from_mont8x + xor %eax,%eax + ret +.size bn_from_montgomery,.-bn_from_montgomery + +.type bn_from_mont8x,\@function,6 +.align 32 +bn_from_mont8x: + .byte 0x67 + mov %rsp,%rax + push %rbx + push %rbp + push %r12 + push %r13 + push %r14 + push %r15 ___ $code.=<<___ if ($win64); - movaps (%rsi),%xmm6 - movaps 0x10(%rsi),%xmm7 - lea 0x28(%rsi),%rsi + lea -0x28(%rsp),%rsp + movaps %xmm6,(%rsp) + movaps %xmm7,0x10(%rsp) ___ $code.=<<___; - mov (%rsi),%r15 - mov 8(%rsi),%r14 - mov 16(%rsi),%r13 - mov 24(%rsi),%r12 - mov 32(%rsi),%rbp - mov 40(%rsi),%rbx - lea 48(%rsi),%rsp -.Lmul4x_epilogue: + .byte 0x67 + mov ${num}d,%r10d + shl \$3,${num}d # convert $num to bytes + shl \$3+2,%r10d # 4*$num + neg $num + mov ($n0),$n0 # *n0 + + ############################################################## + # ensure that stack frame doesn't alias with $aptr+4*$num + # modulo 4096, which covers ret[num], am[num] and n[2*num] + # (see bn_exp.c). this is done to allow memory disambiguation + # logic do its magic. + # + lea -64(%rsp,$num,2),%r11 + sub $aptr,%r11 + and \$4095,%r11 + cmp %r11,%r10 + jb .Lfrom_sp_alt + sub %r11,%rsp # align with $aptr + lea -64(%rsp,$num,2),%rsp # alloca(frame+2*$num) + jmp .Lfrom_sp_done + +.align 32 +.Lfrom_sp_alt: + lea 4096-64(,$num,2),%r10 # 4096-frame-2*$num + lea -64(%rsp,$num,2),%rsp # alloca(frame+2*$num) + sub %r10,%r11 + mov \$0,%r10 + cmovc %r10,%r11 + sub %r11,%rsp +.Lfrom_sp_done: + and \$-64,%rsp + mov $num,%r10 + neg $num + + ############################################################## + # Stack layout + # + # +0 saved $num, used in reduction section + # +8 &t[2*$num], used in reduction section + # +32 saved *n0 + # +40 saved %rsp + # +48 t[2*$num] + # + mov $n0, 32(%rsp) + mov %rax, 40(%rsp) # save original %rsp +.Lfrom_body: + mov $num,%r11 + lea 48(%rsp),%rax + pxor %xmm0,%xmm0 + jmp .Lmul_by_1 + +.align 32 +.Lmul_by_1: + movdqu ($aptr),%xmm1 + movdqu 16($aptr),%xmm2 + movdqu 32($aptr),%xmm3 + movdqa %xmm0,(%rax,$num) + movdqu 48($aptr),%xmm4 + movdqa %xmm0,16(%rax,$num) + .byte 0x48,0x8d,0xb6,0x40,0x00,0x00,0x00 # lea 64($aptr),$aptr + movdqa %xmm1,(%rax) + movdqa %xmm0,32(%rax,$num) + movdqa %xmm2,16(%rax) + movdqa %xmm0,48(%rax,$num) + movdqa %xmm3,32(%rax) + movdqa %xmm4,48(%rax) + lea 64(%rax),%rax + sub \$64,%r11 + jnz .Lmul_by_1 + + movq $rptr,%xmm1 + movq $nptr,%xmm2 + .byte 0x67 + mov $nptr,%rbp + movq %r10, %xmm3 # -num +___ +$code.=<<___ if ($addx); + mov OPENSSL_ia32cap_P+8(%rip),%r11d + and \$0x80100,%r11d + cmp \$0x80100,%r11d + jne .Lfrom_mont_nox + + lea (%rax,$num),$rptr + call sqrx8x_reduction + + pxor %xmm0,%xmm0 + lea 48(%rsp),%rax + mov 40(%rsp),%rsi # restore %rsp + jmp .Lfrom_mont_zero + +.align 32 +.Lfrom_mont_nox: +___ +$code.=<<___; + call sqr8x_reduction + + pxor %xmm0,%xmm0 + lea 48(%rsp),%rax + mov 40(%rsp),%rsi # restore %rsp + jmp .Lfrom_mont_zero + +.align 32 +.Lfrom_mont_zero: + movdqa %xmm0,16*0(%rax) + movdqa %xmm0,16*1(%rax) + movdqa %xmm0,16*2(%rax) + movdqa %xmm0,16*3(%rax) + lea 16*4(%rax),%rax + sub \$32,$num + jnz .Lfrom_mont_zero + + mov \$1,%rax + mov -48(%rsi),%r15 + mov -40(%rsi),%r14 + mov -32(%rsi),%r13 + mov -24(%rsi),%r12 + mov -16(%rsi),%rbp + mov -8(%rsi),%rbx + lea (%rsi),%rsp +.Lfrom_epilogue: ret -.size bn_mul4x_mont_gather5,.-bn_mul4x_mont_gather5 +.size bn_from_mont8x,.-bn_from_mont8x ___ +} }}} + +if ($addx) {{{ +my $bp="%rdx"; # restore original value + +$code.=<<___; +.type bn_mulx4x_mont_gather5,\@function,6 +.align 32 +bn_mulx4x_mont_gather5: +.Lmulx4x_enter: + .byte 0x67 + mov %rsp,%rax + push %rbx + push %rbp + push %r12 + push %r13 + push %r14 + push %r15 +___ +$code.=<<___ if ($win64); + lea -0x28(%rsp),%rsp + movaps %xmm6,(%rsp) + movaps %xmm7,0x10(%rsp) +___ +$code.=<<___; + .byte 0x67 + mov ${num}d,%r10d + shl \$3,${num}d # convert $num to bytes + shl \$3+2,%r10d # 4*$num + neg $num # -$num + mov ($n0),$n0 # *n0 + + ############################################################## + # ensure that stack frame doesn't alias with $aptr+4*$num + # modulo 4096, which covers a[num], ret[num] and n[2*num] + # (see bn_exp.c). this is done to allow memory disambiguation + # logic do its magic. [excessive frame is allocated in order + # to allow bn_from_mont8x to clear it.] + # + lea -64(%rsp,$num,2),%r11 + sub $ap,%r11 + and \$4095,%r11 + cmp %r11,%r10 + jb .Lmulx4xsp_alt + sub %r11,%rsp # align with $aptr + lea -64(%rsp,$num,2),%rsp # alloca(frame+$num) + jmp .Lmulx4xsp_done + +.align 32 +.Lmulx4xsp_alt: + lea 4096-64(,$num,2),%r10 # 4096-frame-$num + lea -64(%rsp,$num,2),%rsp # alloca(frame+$num) + sub %r10,%r11 + mov \$0,%r10 + cmovc %r10,%r11 + sub %r11,%rsp +.Lmulx4xsp_done: + and \$-64,%rsp # ensure alignment + ############################################################## + # Stack layout + # +0 -num + # +8 off-loaded &b[i] + # +16 end of b[num] + # +24 inner counter + # +32 saved n0 + # +40 saved %rsp + # +48 + # +56 saved rp + # +64 tmp[num+1] + # + mov $n0, 32(%rsp) # save *n0 + mov %rax,40(%rsp) # save original %rsp +.Lmulx4x_body: + call mulx4x_internal + + mov 40(%rsp),%rsi # restore %rsp + mov \$1,%rax +___ +$code.=<<___ if ($win64); + movaps -88(%rsi),%xmm6 + movaps -72(%rsi),%xmm7 +___ +$code.=<<___; + mov -48(%rsi),%r15 + mov -40(%rsi),%r14 + mov -32(%rsi),%r13 + mov -24(%rsi),%r12 + mov -16(%rsi),%rbp + mov -8(%rsi),%rbx + lea (%rsi),%rsp +.Lmulx4x_epilogue: + ret +.size bn_mulx4x_mont_gather5,.-bn_mulx4x_mont_gather5 + +.type mulx4x_internal,\@abi-omnipotent +.align 32 +mulx4x_internal: + .byte 0x4c,0x89,0x8c,0x24,0x08,0x00,0x00,0x00 # mov $num,8(%rsp) # save -$num + .byte 0x67 + neg $num # restore $num + shl \$5,$num + lea 256($bp,$num),%r13 + shr \$5+5,$num + mov `($win64?56:8)`(%rax),%r10d # load 7th argument + sub \$1,$num + mov %r13,16+8(%rsp) # end of b[num] + mov $num,24+8(%rsp) # inner counter + mov $rp, 56+8(%rsp) # save $rp +___ +my ($aptr, $bptr, $nptr, $tptr, $mi, $bi, $zero, $num)= + ("%rsi","%rdi","%rcx","%rbx","%r8","%r9","%rbp","%rax"); +my $rptr=$bptr; +my $STRIDE=2**5*8; # 5 is "window size" +my $N=$STRIDE/4; # should match cache line size +$code.=<<___; + mov %r10,%r11 + shr \$`log($N/8)/log(2)`,%r10 + and \$`$N/8-1`,%r11 + not %r10 + lea .Lmagic_masks(%rip),%rax + and \$`2**5/($N/8)-1`,%r10 # 5 is "window size" + lea 96($bp,%r11,8),$bptr # pointer within 1st cache line + movq 0(%rax,%r10,8),%xmm4 # set of masks denoting which + movq 8(%rax,%r10,8),%xmm5 # cache line contains element + add \$7,%r11 + movq 16(%rax,%r10,8),%xmm6 # denoted by 7th argument + movq 24(%rax,%r10,8),%xmm7 + and \$7,%r11 + + movq `0*$STRIDE/4-96`($bptr),%xmm0 + lea $STRIDE($bptr),$tptr # borrow $tptr + movq `1*$STRIDE/4-96`($bptr),%xmm1 + pand %xmm4,%xmm0 + movq `2*$STRIDE/4-96`($bptr),%xmm2 + pand %xmm5,%xmm1 + movq `3*$STRIDE/4-96`($bptr),%xmm3 + pand %xmm6,%xmm2 + por %xmm1,%xmm0 + movq `0*$STRIDE/4-96`($tptr),%xmm1 + pand %xmm7,%xmm3 + por %xmm2,%xmm0 + movq `1*$STRIDE/4-96`($tptr),%xmm2 + por %xmm3,%xmm0 + .byte 0x67,0x67 + pand %xmm4,%xmm1 + movq `2*$STRIDE/4-96`($tptr),%xmm3 + + movq %xmm0,%rdx # bp[0] + movq `3*$STRIDE/4-96`($tptr),%xmm0 + lea 2*$STRIDE($bptr),$bptr # next &b[i] + pand %xmm5,%xmm2 + .byte 0x67,0x67 + pand %xmm6,%xmm3 + ############################################################## + # $tptr is chosen so that writing to top-most element of the + # vector occurs just "above" references to powers table, + # "above" modulo cache-line size, which effectively precludes + # possibility of memory disambiguation logic failure when + # accessing the table. + # + lea 64+8*4+8(%rsp,%r11,8),$tptr + + mov %rdx,$bi + mulx 0*8($aptr),$mi,%rax # a[0]*b[0] + mulx 1*8($aptr),%r11,%r12 # a[1]*b[0] + add %rax,%r11 + mulx 2*8($aptr),%rax,%r13 # ... + adc %rax,%r12 + adc \$0,%r13 + mulx 3*8($aptr),%rax,%r14 + + mov $mi,%r15 + imulq 32+8(%rsp),$mi # "t[0]"*n0 + xor $zero,$zero # cf=0, of=0 + mov $mi,%rdx + + por %xmm2,%xmm1 + pand %xmm7,%xmm0 + por %xmm3,%xmm1 + mov $bptr,8+8(%rsp) # off-load &b[i] + por %xmm1,%xmm0 + + .byte 0x48,0x8d,0xb6,0x20,0x00,0x00,0x00 # lea 4*8($aptr),$aptr + adcx %rax,%r13 + adcx $zero,%r14 # cf=0 + + mulx 0*16($nptr),%rax,%r10 + adcx %rax,%r15 # discarded + adox %r11,%r10 + mulx 1*16($nptr),%rax,%r11 + adcx %rax,%r10 + adox %r12,%r11 + mulx 2*16($nptr),%rax,%r12 + mov 24+8(%rsp),$bptr # counter value + .byte 0x66 + mov %r10,-8*4($tptr) + adcx %rax,%r11 + adox %r13,%r12 + mulx 3*16($nptr),%rax,%r15 + .byte 0x67,0x67 + mov $bi,%rdx + mov %r11,-8*3($tptr) + adcx %rax,%r12 + adox $zero,%r15 # of=0 + .byte 0x48,0x8d,0x89,0x40,0x00,0x00,0x00 # lea 4*16($nptr),$nptr + mov %r12,-8*2($tptr) + #jmp .Lmulx4x_1st + +.align 32 +.Lmulx4x_1st: + adcx $zero,%r15 # cf=0, modulo-scheduled + mulx 0*8($aptr),%r10,%rax # a[4]*b[0] + adcx %r14,%r10 + mulx 1*8($aptr),%r11,%r14 # a[5]*b[0] + adcx %rax,%r11 + mulx 2*8($aptr),%r12,%rax # ... + adcx %r14,%r12 + mulx 3*8($aptr),%r13,%r14 + .byte 0x67,0x67 + mov $mi,%rdx + adcx %rax,%r13 + adcx $zero,%r14 # cf=0 + lea 4*8($aptr),$aptr + lea 4*8($tptr),$tptr + + adox %r15,%r10 + mulx 0*16($nptr),%rax,%r15 + adcx %rax,%r10 + adox %r15,%r11 + mulx 1*16($nptr),%rax,%r15 + adcx %rax,%r11 + adox %r15,%r12 + mulx 2*16($nptr),%rax,%r15 + mov %r10,-5*8($tptr) + adcx %rax,%r12 + mov %r11,-4*8($tptr) + adox %r15,%r13 + mulx 3*16($nptr),%rax,%r15 + mov $bi,%rdx + mov %r12,-3*8($tptr) + adcx %rax,%r13 + adox $zero,%r15 + lea 4*16($nptr),$nptr + mov %r13,-2*8($tptr) + + dec $bptr # of=0, pass cf + jnz .Lmulx4x_1st + + mov 8(%rsp),$num # load -num + movq %xmm0,%rdx # bp[1] + adc $zero,%r15 # modulo-scheduled + lea ($aptr,$num),$aptr # rewind $aptr + add %r15,%r14 + mov 8+8(%rsp),$bptr # re-load &b[i] + adc $zero,$zero # top-most carry + mov %r14,-1*8($tptr) + jmp .Lmulx4x_outer + +.align 32 +.Lmulx4x_outer: + mov $zero,($tptr) # save top-most carry + lea 4*8($tptr,$num),$tptr # rewind $tptr + mulx 0*8($aptr),$mi,%r11 # a[0]*b[i] + xor $zero,$zero # cf=0, of=0 + mov %rdx,$bi + mulx 1*8($aptr),%r14,%r12 # a[1]*b[i] + adox -4*8($tptr),$mi # +t[0] + adcx %r14,%r11 + mulx 2*8($aptr),%r15,%r13 # ... + adox -3*8($tptr),%r11 + adcx %r15,%r12 + mulx 3*8($aptr),%rdx,%r14 + adox -2*8($tptr),%r12 + adcx %rdx,%r13 + lea ($nptr,$num,2),$nptr # rewind $nptr + lea 4*8($aptr),$aptr + adox -1*8($tptr),%r13 + adcx $zero,%r14 + adox $zero,%r14 + + .byte 0x67 + mov $mi,%r15 + imulq 32+8(%rsp),$mi # "t[0]"*n0 + + movq `0*$STRIDE/4-96`($bptr),%xmm0 + .byte 0x67,0x67 + mov $mi,%rdx + movq `1*$STRIDE/4-96`($bptr),%xmm1 + .byte 0x67 + pand %xmm4,%xmm0 + movq `2*$STRIDE/4-96`($bptr),%xmm2 + .byte 0x67 + pand %xmm5,%xmm1 + movq `3*$STRIDE/4-96`($bptr),%xmm3 + add \$$STRIDE,$bptr # next &b[i] + .byte 0x67 + pand %xmm6,%xmm2 + por %xmm1,%xmm0 + pand %xmm7,%xmm3 + xor $zero,$zero # cf=0, of=0 + mov $bptr,8+8(%rsp) # off-load &b[i] + + mulx 0*16($nptr),%rax,%r10 + adcx %rax,%r15 # discarded + adox %r11,%r10 + mulx 1*16($nptr),%rax,%r11 + adcx %rax,%r10 + adox %r12,%r11 + mulx 2*16($nptr),%rax,%r12 + adcx %rax,%r11 + adox %r13,%r12 + mulx 3*16($nptr),%rax,%r15 + mov $bi,%rdx + por %xmm2,%xmm0 + mov 24+8(%rsp),$bptr # counter value + mov %r10,-8*4($tptr) + por %xmm3,%xmm0 + adcx %rax,%r12 + mov %r11,-8*3($tptr) + adox $zero,%r15 # of=0 + mov %r12,-8*2($tptr) + lea 4*16($nptr),$nptr + jmp .Lmulx4x_inner + +.align 32 +.Lmulx4x_inner: + mulx 0*8($aptr),%r10,%rax # a[4]*b[i] + adcx $zero,%r15 # cf=0, modulo-scheduled + adox %r14,%r10 + mulx 1*8($aptr),%r11,%r14 # a[5]*b[i] + adcx 0*8($tptr),%r10 + adox %rax,%r11 + mulx 2*8($aptr),%r12,%rax # ... + adcx 1*8($tptr),%r11 + adox %r14,%r12 + mulx 3*8($aptr),%r13,%r14 + mov $mi,%rdx + adcx 2*8($tptr),%r12 + adox %rax,%r13 + adcx 3*8($tptr),%r13 + adox $zero,%r14 # of=0 + lea 4*8($aptr),$aptr + lea 4*8($tptr),$tptr + adcx $zero,%r14 # cf=0 + + adox %r15,%r10 + mulx 0*16($nptr),%rax,%r15 + adcx %rax,%r10 + adox %r15,%r11 + mulx 1*16($nptr),%rax,%r15 + adcx %rax,%r11 + adox %r15,%r12 + mulx 2*16($nptr),%rax,%r15 + mov %r10,-5*8($tptr) + adcx %rax,%r12 + adox %r15,%r13 + mov %r11,-4*8($tptr) + mulx 3*16($nptr),%rax,%r15 + mov $bi,%rdx + lea 4*16($nptr),$nptr + mov %r12,-3*8($tptr) + adcx %rax,%r13 + adox $zero,%r15 + mov %r13,-2*8($tptr) + + dec $bptr # of=0, pass cf + jnz .Lmulx4x_inner + + mov 0+8(%rsp),$num # load -num + movq %xmm0,%rdx # bp[i+1] + adc $zero,%r15 # modulo-scheduled + sub 0*8($tptr),$bptr # pull top-most carry to %cf + mov 8+8(%rsp),$bptr # re-load &b[i] + mov 16+8(%rsp),%r10 + adc %r15,%r14 + lea ($aptr,$num),$aptr # rewind $aptr + adc $zero,$zero # top-most carry + mov %r14,-1*8($tptr) + + cmp %r10,$bptr + jb .Lmulx4x_outer + + mov -16($nptr),%r10 + xor %r15,%r15 + sub %r14,%r10 # compare top-most words + adc %r15,%r15 + or %r15,$zero + xor \$1,$zero + lea ($tptr,$num),%rdi # rewind $tptr + lea ($nptr,$num,2),$nptr # rewind $nptr + .byte 0x67,0x67 + sar \$3+2,$num # cf=0 + lea ($nptr,$zero,8),%rbp + mov 56+8(%rsp),%rdx # restore rp + mov $num,%rcx + jmp .Lsqrx4x_sub # common post-condition +.size mulx4x_internal,.-mulx4x_internal +___ +} { +###################################################################### +# void bn_power5( +my $rptr="%rdi"; # BN_ULONG *rptr, +my $aptr="%rsi"; # const BN_ULONG *aptr, +my $bptr="%rdx"; # const void *table, +my $nptr="%rcx"; # const BN_ULONG *nptr, +my $n0 ="%r8"; # const BN_ULONG *n0); +my $num ="%r9"; # int num, has to be divisible by 8 + # int pwr); + +my ($i,$j,$tptr)=("%rbp","%rcx",$rptr); +my @A0=("%r10","%r11"); +my @A1=("%r12","%r13"); +my ($a0,$a1,$ai)=("%r14","%r15","%rbx"); + +$code.=<<___; +.type bn_powerx5,\@function,6 +.align 32 +bn_powerx5: +.Lpowerx5_enter: + .byte 0x67 + mov %rsp,%rax + push %rbx + push %rbp + push %r12 + push %r13 + push %r14 + push %r15 +___ +$code.=<<___ if ($win64); + lea -0x28(%rsp),%rsp + movaps %xmm6,(%rsp) + movaps %xmm7,0x10(%rsp) +___ +$code.=<<___; + .byte 0x67 + mov ${num}d,%r10d + shl \$3,${num}d # convert $num to bytes + shl \$3+2,%r10d # 4*$num + neg $num + mov ($n0),$n0 # *n0 + + ############################################################## + # ensure that stack frame doesn't alias with $aptr+4*$num + # modulo 4096, which covers ret[num], am[num] and n[2*num] + # (see bn_exp.c). this is done to allow memory disambiguation + # logic do its magic. + # + lea -64(%rsp,$num,2),%r11 + sub $aptr,%r11 + and \$4095,%r11 + cmp %r11,%r10 + jb .Lpwrx_sp_alt + sub %r11,%rsp # align with $aptr + lea -64(%rsp,$num,2),%rsp # alloca(frame+2*$num) + jmp .Lpwrx_sp_done + +.align 32 +.Lpwrx_sp_alt: + lea 4096-64(,$num,2),%r10 # 4096-frame-2*$num + lea -64(%rsp,$num,2),%rsp # alloca(frame+2*$num) + sub %r10,%r11 + mov \$0,%r10 + cmovc %r10,%r11 + sub %r11,%rsp +.Lpwrx_sp_done: + and \$-64,%rsp + mov $num,%r10 + neg $num + + ############################################################## + # Stack layout + # + # +0 saved $num, used in reduction section + # +8 &t[2*$num], used in reduction section + # +16 intermediate carry bit + # +24 top-most carry bit, used in reduction section + # +32 saved *n0 + # +40 saved %rsp + # +48 t[2*$num] + # + pxor %xmm0,%xmm0 + movq $rptr,%xmm1 # save $rptr + movq $nptr,%xmm2 # save $nptr + movq %r10, %xmm3 # -$num + movq $bptr,%xmm4 + mov $n0, 32(%rsp) + mov %rax, 40(%rsp) # save original %rsp +.Lpowerx5_body: + + call __bn_sqrx8x_internal + call __bn_sqrx8x_internal + call __bn_sqrx8x_internal + call __bn_sqrx8x_internal + call __bn_sqrx8x_internal + + mov %r10,$num # -num + mov $aptr,$rptr + movq %xmm2,$nptr + movq %xmm4,$bptr + mov 40(%rsp),%rax + + call mulx4x_internal + + mov 40(%rsp),%rsi # restore %rsp + mov \$1,%rax +___ +$code.=<<___ if ($win64); + movaps -88(%rsi),%xmm6 + movaps -72(%rsi),%xmm7 +___ +$code.=<<___; + mov -48(%rsi),%r15 + mov -40(%rsi),%r14 + mov -32(%rsi),%r13 + mov -24(%rsi),%r12 + mov -16(%rsi),%rbp + mov -8(%rsi),%rbx + lea (%rsi),%rsp +.Lpowerx5_epilogue: + ret +.size bn_powerx5,.-bn_powerx5 + +.globl bn_sqrx8x_internal +.hidden bn_sqrx8x_internal +.type bn_sqrx8x_internal,\@abi-omnipotent +.align 32 +bn_sqrx8x_internal: +__bn_sqrx8x_internal: + ################################################################## + # Squaring part: + # + # a) multiply-n-add everything but a[i]*a[i]; + # b) shift result of a) by 1 to the left and accumulate + # a[i]*a[i] products; + # + ################################################################## + # a[7]a[7]a[6]a[6]a[5]a[5]a[4]a[4]a[3]a[3]a[2]a[2]a[1]a[1]a[0]a[0] + # a[1]a[0] + # a[2]a[0] + # a[3]a[0] + # a[2]a[1] + # a[3]a[1] + # a[3]a[2] + # + # a[4]a[0] + # a[5]a[0] + # a[6]a[0] + # a[7]a[0] + # a[4]a[1] + # a[5]a[1] + # a[6]a[1] + # a[7]a[1] + # a[4]a[2] + # a[5]a[2] + # a[6]a[2] + # a[7]a[2] + # a[4]a[3] + # a[5]a[3] + # a[6]a[3] + # a[7]a[3] + # + # a[5]a[4] + # a[6]a[4] + # a[7]a[4] + # a[6]a[5] + # a[7]a[5] + # a[7]a[6] + # a[7]a[7]a[6]a[6]a[5]a[5]a[4]a[4]a[3]a[3]a[2]a[2]a[1]a[1]a[0]a[0] +___ +{ +my ($zero,$carry)=("%rbp","%rcx"); +my $aaptr=$zero; +$code.=<<___; + lea 48+8(%rsp),$tptr + lea ($aptr,$num),$aaptr + mov $num,0+8(%rsp) # save $num + mov $aaptr,8+8(%rsp) # save end of $aptr + jmp .Lsqr8x_zero_start + +.align 32 +.byte 0x66,0x66,0x66,0x2e,0x0f,0x1f,0x84,0x00,0x00,0x00,0x00,0x00 +.Lsqrx8x_zero: + .byte 0x3e + movdqa %xmm0,0*8($tptr) + movdqa %xmm0,2*8($tptr) + movdqa %xmm0,4*8($tptr) + movdqa %xmm0,6*8($tptr) +.Lsqr8x_zero_start: # aligned at 32 + movdqa %xmm0,8*8($tptr) + movdqa %xmm0,10*8($tptr) + movdqa %xmm0,12*8($tptr) + movdqa %xmm0,14*8($tptr) + lea 16*8($tptr),$tptr + sub \$64,$num + jnz .Lsqrx8x_zero + + mov 0*8($aptr),%rdx # a[0], modulo-scheduled + #xor %r9,%r9 # t[1], ex-$num, zero already + xor %r10,%r10 + xor %r11,%r11 + xor %r12,%r12 + xor %r13,%r13 + xor %r14,%r14 + xor %r15,%r15 + lea 48+8(%rsp),$tptr + xor $zero,$zero # cf=0, cf=0 + jmp .Lsqrx8x_outer_loop + +.align 32 +.Lsqrx8x_outer_loop: + mulx 1*8($aptr),%r8,%rax # a[1]*a[0] + adcx %r9,%r8 # a[1]*a[0]+=t[1] + adox %rax,%r10 + mulx 2*8($aptr),%r9,%rax # a[2]*a[0] + adcx %r10,%r9 + adox %rax,%r11 + .byte 0xc4,0xe2,0xab,0xf6,0x86,0x18,0x00,0x00,0x00 # mulx 3*8($aptr),%r10,%rax # ... + adcx %r11,%r10 + adox %rax,%r12 + .byte 0xc4,0xe2,0xa3,0xf6,0x86,0x20,0x00,0x00,0x00 # mulx 4*8($aptr),%r11,%rax + adcx %r12,%r11 + adox %rax,%r13 + mulx 5*8($aptr),%r12,%rax + adcx %r13,%r12 + adox %rax,%r14 + mulx 6*8($aptr),%r13,%rax + adcx %r14,%r13 + adox %r15,%rax + mulx 7*8($aptr),%r14,%r15 + mov 1*8($aptr),%rdx # a[1] + adcx %rax,%r14 + adox $zero,%r15 + adc 8*8($tptr),%r15 + mov %r8,1*8($tptr) # t[1] + mov %r9,2*8($tptr) # t[2] + sbb $carry,$carry # mov %cf,$carry + xor $zero,$zero # cf=0, of=0 + + + mulx 2*8($aptr),%r8,%rbx # a[2]*a[1] + mulx 3*8($aptr),%r9,%rax # a[3]*a[1] + adcx %r10,%r8 + adox %rbx,%r9 + mulx 4*8($aptr),%r10,%rbx # ... + adcx %r11,%r9 + adox %rax,%r10 + .byte 0xc4,0xe2,0xa3,0xf6,0x86,0x28,0x00,0x00,0x00 # mulx 5*8($aptr),%r11,%rax + adcx %r12,%r10 + adox %rbx,%r11 + .byte 0xc4,0xe2,0x9b,0xf6,0x9e,0x30,0x00,0x00,0x00 # mulx 6*8($aptr),%r12,%rbx + adcx %r13,%r11 + adox %r14,%r12 + .byte 0xc4,0x62,0x93,0xf6,0xb6,0x38,0x00,0x00,0x00 # mulx 7*8($aptr),%r13,%r14 + mov 2*8($aptr),%rdx # a[2] + adcx %rax,%r12 + adox %rbx,%r13 + adcx %r15,%r13 + adox $zero,%r14 # of=0 + adcx $zero,%r14 # cf=0 + + mov %r8,3*8($tptr) # t[3] + mov %r9,4*8($tptr) # t[4] + + mulx 3*8($aptr),%r8,%rbx # a[3]*a[2] + mulx 4*8($aptr),%r9,%rax # a[4]*a[2] + adcx %r10,%r8 + adox %rbx,%r9 + mulx 5*8($aptr),%r10,%rbx # ... + adcx %r11,%r9 + adox %rax,%r10 + .byte 0xc4,0xe2,0xa3,0xf6,0x86,0x30,0x00,0x00,0x00 # mulx 6*8($aptr),%r11,%rax + adcx %r12,%r10 + adox %r13,%r11 + .byte 0xc4,0x62,0x9b,0xf6,0xae,0x38,0x00,0x00,0x00 # mulx 7*8($aptr),%r12,%r13 + .byte 0x3e + mov 3*8($aptr),%rdx # a[3] + adcx %rbx,%r11 + adox %rax,%r12 + adcx %r14,%r12 + mov %r8,5*8($tptr) # t[5] + mov %r9,6*8($tptr) # t[6] + mulx 4*8($aptr),%r8,%rax # a[4]*a[3] + adox $zero,%r13 # of=0 + adcx $zero,%r13 # cf=0 + + mulx 5*8($aptr),%r9,%rbx # a[5]*a[3] + adcx %r10,%r8 + adox %rax,%r9 + mulx 6*8($aptr),%r10,%rax # ... + adcx %r11,%r9 + adox %r12,%r10 + mulx 7*8($aptr),%r11,%r12 + mov 4*8($aptr),%rdx # a[4] + mov 5*8($aptr),%r14 # a[5] + adcx %rbx,%r10 + adox %rax,%r11 + mov 6*8($aptr),%r15 # a[6] + adcx %r13,%r11 + adox $zero,%r12 # of=0 + adcx $zero,%r12 # cf=0 + + mov %r8,7*8($tptr) # t[7] + mov %r9,8*8($tptr) # t[8] + + mulx %r14,%r9,%rax # a[5]*a[4] + mov 7*8($aptr),%r8 # a[7] + adcx %r10,%r9 + mulx %r15,%r10,%rbx # a[6]*a[4] + adox %rax,%r10 + adcx %r11,%r10 + mulx %r8,%r11,%rax # a[7]*a[4] + mov %r14,%rdx # a[5] + adox %rbx,%r11 + adcx %r12,%r11 + #adox $zero,%rax # of=0 + adcx $zero,%rax # cf=0 + + mulx %r15,%r14,%rbx # a[6]*a[5] + mulx %r8,%r12,%r13 # a[7]*a[5] + mov %r15,%rdx # a[6] + lea 8*8($aptr),$aptr + adcx %r14,%r11 + adox %rbx,%r12 + adcx %rax,%r12 + adox $zero,%r13 + + .byte 0x67,0x67 + mulx %r8,%r8,%r14 # a[7]*a[6] + adcx %r8,%r13 + adcx $zero,%r14 + + cmp 8+8(%rsp),$aptr + je .Lsqrx8x_outer_break + + neg $carry # mov $carry,%cf + mov \$-8,%rcx + mov $zero,%r15 + mov 8*8($tptr),%r8 + adcx 9*8($tptr),%r9 # +=t[9] + adcx 10*8($tptr),%r10 # ... + adcx 11*8($tptr),%r11 + adc 12*8($tptr),%r12 + adc 13*8($tptr),%r13 + adc 14*8($tptr),%r14 + adc 15*8($tptr),%r15 + lea ($aptr),$aaptr + lea 2*64($tptr),$tptr + sbb %rax,%rax # mov %cf,$carry + + mov -64($aptr),%rdx # a[0] + mov %rax,16+8(%rsp) # offload $carry + mov $tptr,24+8(%rsp) + + #lea 8*8($tptr),$tptr # see 2*8*8($tptr) above + xor %eax,%eax # cf=0, of=0 + jmp .Lsqrx8x_loop + +.align 32 +.Lsqrx8x_loop: + mov %r8,%rbx + mulx 0*8($aaptr),%rax,%r8 # a[8]*a[i] + adcx %rax,%rbx # +=t[8] + adox %r9,%r8 + + mulx 1*8($aaptr),%rax,%r9 # ... + adcx %rax,%r8 + adox %r10,%r9 + + mulx 2*8($aaptr),%rax,%r10 + adcx %rax,%r9 + adox %r11,%r10 + + mulx 3*8($aaptr),%rax,%r11 + adcx %rax,%r10 + adox %r12,%r11 + + .byte 0xc4,0x62,0xfb,0xf6,0xa5,0x20,0x00,0x00,0x00 # mulx 4*8($aaptr),%rax,%r12 + adcx %rax,%r11 + adox %r13,%r12 + + mulx 5*8($aaptr),%rax,%r13 + adcx %rax,%r12 + adox %r14,%r13 + + mulx 6*8($aaptr),%rax,%r14 + mov %rbx,($tptr,%rcx,8) # store t[8+i] + mov \$0,%ebx + adcx %rax,%r13 + adox %r15,%r14 + + .byte 0xc4,0x62,0xfb,0xf6,0xbd,0x38,0x00,0x00,0x00 # mulx 7*8($aaptr),%rax,%r15 + mov 8($aptr,%rcx,8),%rdx # a[i] + adcx %rax,%r14 + adox %rbx,%r15 # %rbx is 0, of=0 + adcx %rbx,%r15 # cf=0 + + .byte 0x67 + inc %rcx # of=0 + jnz .Lsqrx8x_loop + + lea 8*8($aaptr),$aaptr + mov \$-8,%rcx + cmp 8+8(%rsp),$aaptr # done? + je .Lsqrx8x_break + + sub 16+8(%rsp),%rbx # mov 16(%rsp),%cf + .byte 0x66 + mov -64($aptr),%rdx + adcx 0*8($tptr),%r8 + adcx 1*8($tptr),%r9 + adc 2*8($tptr),%r10 + adc 3*8($tptr),%r11 + adc 4*8($tptr),%r12 + adc 5*8($tptr),%r13 + adc 6*8($tptr),%r14 + adc 7*8($tptr),%r15 + lea 8*8($tptr),$tptr + .byte 0x67 + sbb %rax,%rax # mov %cf,%rax + xor %ebx,%ebx # cf=0, of=0 + mov %rax,16+8(%rsp) # offload carry + jmp .Lsqrx8x_loop + +.align 32 +.Lsqrx8x_break: + sub 16+8(%rsp),%r8 # consume last carry + mov 24+8(%rsp),$carry # initial $tptr, borrow $carry + mov 0*8($aptr),%rdx # a[8], modulo-scheduled + xor %ebp,%ebp # xor $zero,$zero + mov %r8,0*8($tptr) + cmp $carry,$tptr # cf=0, of=0 + je .Lsqrx8x_outer_loop + + mov %r9,1*8($tptr) + mov 1*8($carry),%r9 + mov %r10,2*8($tptr) + mov 2*8($carry),%r10 + mov %r11,3*8($tptr) + mov 3*8($carry),%r11 + mov %r12,4*8($tptr) + mov 4*8($carry),%r12 + mov %r13,5*8($tptr) + mov 5*8($carry),%r13 + mov %r14,6*8($tptr) + mov 6*8($carry),%r14 + mov %r15,7*8($tptr) + mov 7*8($carry),%r15 + mov $carry,$tptr + jmp .Lsqrx8x_outer_loop + +.align 32 +.Lsqrx8x_outer_break: + mov %r9,9*8($tptr) # t[9] + movq %xmm3,%rcx # -$num + mov %r10,10*8($tptr) # ... + mov %r11,11*8($tptr) + mov %r12,12*8($tptr) + mov %r13,13*8($tptr) + mov %r14,14*8($tptr) +___ +} { +my $i="%rcx"; +$code.=<<___; + lea 48+8(%rsp),$tptr + mov ($aptr,$i),%rdx # a[0] + + mov 8($tptr),$A0[1] # t[1] + xor $A0[0],$A0[0] # t[0], of=0, cf=0 + mov 0+8(%rsp),$num # restore $num + adox $A0[1],$A0[1] + mov 16($tptr),$A1[0] # t[2] # prefetch + mov 24($tptr),$A1[1] # t[3] # prefetch + #jmp .Lsqrx4x_shift_n_add # happens to be aligned + +.align 32 +.Lsqrx4x_shift_n_add: + mulx %rdx,%rax,%rbx + adox $A1[0],$A1[0] + adcx $A0[0],%rax + .byte 0x48,0x8b,0x94,0x0e,0x08,0x00,0x00,0x00 # mov 8($aptr,$i),%rdx # a[i+1] # prefetch + .byte 0x4c,0x8b,0x97,0x20,0x00,0x00,0x00 # mov 32($tptr),$A0[0] # t[2*i+4] # prefetch + adox $A1[1],$A1[1] + adcx $A0[1],%rbx + mov 40($tptr),$A0[1] # t[2*i+4+1] # prefetch + mov %rax,0($tptr) + mov %rbx,8($tptr) + + mulx %rdx,%rax,%rbx + adox $A0[0],$A0[0] + adcx $A1[0],%rax + mov 16($aptr,$i),%rdx # a[i+2] # prefetch + mov 48($tptr),$A1[0] # t[2*i+6] # prefetch + adox $A0[1],$A0[1] + adcx $A1[1],%rbx + mov 56($tptr),$A1[1] # t[2*i+6+1] # prefetch + mov %rax,16($tptr) + mov %rbx,24($tptr) + + mulx %rdx,%rax,%rbx + adox $A1[0],$A1[0] + adcx $A0[0],%rax + mov 24($aptr,$i),%rdx # a[i+3] # prefetch + lea 32($i),$i + mov 64($tptr),$A0[0] # t[2*i+8] # prefetch + adox $A1[1],$A1[1] + adcx $A0[1],%rbx + mov 72($tptr),$A0[1] # t[2*i+8+1] # prefetch + mov %rax,32($tptr) + mov %rbx,40($tptr) + + mulx %rdx,%rax,%rbx + adox $A0[0],$A0[0] + adcx $A1[0],%rax + jrcxz .Lsqrx4x_shift_n_add_break + .byte 0x48,0x8b,0x94,0x0e,0x00,0x00,0x00,0x00 # mov 0($aptr,$i),%rdx # a[i+4] # prefetch + adox $A0[1],$A0[1] + adcx $A1[1],%rbx + mov 80($tptr),$A1[0] # t[2*i+10] # prefetch + mov 88($tptr),$A1[1] # t[2*i+10+1] # prefetch + mov %rax,48($tptr) + mov %rbx,56($tptr) + lea 64($tptr),$tptr + nop + jmp .Lsqrx4x_shift_n_add + +.align 32 +.Lsqrx4x_shift_n_add_break: + adcx $A1[1],%rbx + mov %rax,48($tptr) + mov %rbx,56($tptr) + lea 64($tptr),$tptr # end of t[] buffer +___ +} +###################################################################### +# Montgomery reduction part, "word-by-word" algorithm. +# +# This new path is inspired by multiple submissions from Intel, by +# Shay Gueron, Vlad Krasnov, Erdinc Ozturk, James Guilford, +# Vinodh Gopal... +{ +my ($nptr,$carry,$m0)=("%rbp","%rsi","%rdx"); + +$code.=<<___; + movq %xmm2,$nptr +sqrx8x_reduction: + xor %eax,%eax # initial top-most carry bit + mov 32+8(%rsp),%rbx # n0 + mov 48+8(%rsp),%rdx # "%r8", 8*0($tptr) + lea -128($nptr,$num,2),%rcx # end of n[] + #lea 48+8(%rsp,$num,2),$tptr # end of t[] buffer + mov %rcx, 0+8(%rsp) # save end of n[] + mov $tptr,8+8(%rsp) # save end of t[] + + lea 48+8(%rsp),$tptr # initial t[] window + jmp .Lsqrx8x_reduction_loop + +.align 32 +.Lsqrx8x_reduction_loop: + mov 8*1($tptr),%r9 + mov 8*2($tptr),%r10 + mov 8*3($tptr),%r11 + mov 8*4($tptr),%r12 + mov %rdx,%r8 + imulq %rbx,%rdx # n0*a[i] + mov 8*5($tptr),%r13 + mov 8*6($tptr),%r14 + mov 8*7($tptr),%r15 + mov %rax,24+8(%rsp) # store top-most carry bit + + lea 8*8($tptr),$tptr + xor $carry,$carry # cf=0,of=0 + mov \$-8,%rcx + jmp .Lsqrx8x_reduce + +.align 32 +.Lsqrx8x_reduce: + mov %r8, %rbx + mulx 16*0($nptr),%rax,%r8 # n[0] + adcx %rbx,%rax # discarded + adox %r9,%r8 + + mulx 16*1($nptr),%rbx,%r9 # n[1] + adcx %rbx,%r8 + adox %r10,%r9 + + mulx 16*2($nptr),%rbx,%r10 + adcx %rbx,%r9 + adox %r11,%r10 + + mulx 16*3($nptr),%rbx,%r11 + adcx %rbx,%r10 + adox %r12,%r11 + + .byte 0xc4,0x62,0xe3,0xf6,0xa5,0x40,0x00,0x00,0x00 # mulx 16*4($nptr),%rbx,%r12 + mov %rdx,%rax + mov %r8,%rdx + adcx %rbx,%r11 + adox %r13,%r12 + + mulx 32+8(%rsp),%rbx,%rdx # %rdx discarded + mov %rax,%rdx + mov %rax,64+48+8(%rsp,%rcx,8) # put aside n0*a[i] + + mulx 16*5($nptr),%rax,%r13 + adcx %rax,%r12 + adox %r14,%r13 + + mulx 16*6($nptr),%rax,%r14 + adcx %rax,%r13 + adox %r15,%r14 + + mulx 16*7($nptr),%rax,%r15 + mov %rbx,%rdx + adcx %rax,%r14 + adox $carry,%r15 # $carry is 0 + adcx $carry,%r15 # cf=0 + + .byte 0x67,0x67,0x67 + inc %rcx # of=0 + jnz .Lsqrx8x_reduce + + mov $carry,%rax # xor %rax,%rax + cmp 0+8(%rsp),$nptr # end of n[]? + jae .Lsqrx8x_no_tail + + mov 48+8(%rsp),%rdx # pull n0*a[0] + add 8*0($tptr),%r8 + lea 16*8($nptr),$nptr + mov \$-8,%rcx + adcx 8*1($tptr),%r9 + adcx 8*2($tptr),%r10 + adc 8*3($tptr),%r11 + adc 8*4($tptr),%r12 + adc 8*5($tptr),%r13 + adc 8*6($tptr),%r14 + adc 8*7($tptr),%r15 + lea 8*8($tptr),$tptr + sbb %rax,%rax # top carry + + xor $carry,$carry # of=0, cf=0 + mov %rax,16+8(%rsp) + jmp .Lsqrx8x_tail + +.align 32 +.Lsqrx8x_tail: + mov %r8,%rbx + mulx 16*0($nptr),%rax,%r8 + adcx %rax,%rbx + adox %r9,%r8 + + mulx 16*1($nptr),%rax,%r9 + adcx %rax,%r8 + adox %r10,%r9 + + mulx 16*2($nptr),%rax,%r10 + adcx %rax,%r9 + adox %r11,%r10 + + mulx 16*3($nptr),%rax,%r11 + adcx %rax,%r10 + adox %r12,%r11 + + .byte 0xc4,0x62,0xfb,0xf6,0xa5,0x40,0x00,0x00,0x00 # mulx 16*4($nptr),%rax,%r12 + adcx %rax,%r11 + adox %r13,%r12 + + mulx 16*5($nptr),%rax,%r13 + adcx %rax,%r12 + adox %r14,%r13 + + mulx 16*6($nptr),%rax,%r14 + adcx %rax,%r13 + adox %r15,%r14 + + mulx 16*7($nptr),%rax,%r15 + mov 72+48+8(%rsp,%rcx,8),%rdx # pull n0*a[i] + adcx %rax,%r14 + adox $carry,%r15 + mov %rbx,($tptr,%rcx,8) # save result + mov %r8,%rbx + adcx $carry,%r15 # cf=0 + + inc %rcx # of=0 + jnz .Lsqrx8x_tail + + cmp 0+8(%rsp),$nptr # end of n[]? + jae .Lsqrx8x_tail_done # break out of loop + + sub 16+8(%rsp),$carry # mov 16(%rsp),%cf + mov 48+8(%rsp),%rdx # pull n0*a[0] + lea 16*8($nptr),$nptr + adc 8*0($tptr),%r8 + adc 8*1($tptr),%r9 + adc 8*2($tptr),%r10 + adc 8*3($tptr),%r11 + adc 8*4($tptr),%r12 + adc 8*5($tptr),%r13 + adc 8*6($tptr),%r14 + adc 8*7($tptr),%r15 + lea 8*8($tptr),$tptr + sbb %rax,%rax + sub \$8,%rcx # mov \$-8,%rcx + + xor $carry,$carry # of=0, cf=0 + mov %rax,16+8(%rsp) + jmp .Lsqrx8x_tail + +.align 32 +.Lsqrx8x_tail_done: + add 24+8(%rsp),%r8 # can this overflow? + mov $carry,%rax # xor %rax,%rax + + sub 16+8(%rsp),$carry # mov 16(%rsp),%cf +.Lsqrx8x_no_tail: # %cf is 0 if jumped here + adc 8*0($tptr),%r8 + movq %xmm3,%rcx + adc 8*1($tptr),%r9 + mov 16*7($nptr),$carry + movq %xmm2,$nptr # restore $nptr + adc 8*2($tptr),%r10 + adc 8*3($tptr),%r11 + adc 8*4($tptr),%r12 + adc 8*5($tptr),%r13 + adc 8*6($tptr),%r14 + adc 8*7($tptr),%r15 + adc %rax,%rax # top-most carry + + mov 32+8(%rsp),%rbx # n0 + mov 8*8($tptr,%rcx),%rdx # modulo-scheduled "%r8" + + mov %r8,8*0($tptr) # store top 512 bits + lea 8*8($tptr),%r8 # borrow %r8 + mov %r9,8*1($tptr) + mov %r10,8*2($tptr) + mov %r11,8*3($tptr) + mov %r12,8*4($tptr) + mov %r13,8*5($tptr) + mov %r14,8*6($tptr) + mov %r15,8*7($tptr) + + lea 8*8($tptr,%rcx),$tptr # start of current t[] window + cmp 8+8(%rsp),%r8 # end of t[]? + jb .Lsqrx8x_reduction_loop +___ +} +############################################################## +# Post-condition, 4x unrolled +# +{ +my ($rptr,$nptr)=("%rdx","%rbp"); +my @ri=map("%r$_",(10..13)); +my @ni=map("%r$_",(14..15)); +$code.=<<___; + xor %rbx,%rbx + sub %r15,%rsi # compare top-most words + adc %rbx,%rbx + mov %rcx,%r10 # -$num + .byte 0x67 + or %rbx,%rax + .byte 0x67 + mov %rcx,%r9 # -$num + xor \$1,%rax + sar \$3+2,%rcx # cf=0 + #lea 48+8(%rsp,%r9),$tptr + lea ($nptr,%rax,8),$nptr + movq %xmm1,$rptr # restore $rptr + movq %xmm1,$aptr # prepare for back-to-back call + jmp .Lsqrx4x_sub + +.align 32 +.Lsqrx4x_sub: + .byte 0x66 + mov 8*0($tptr),%r12 + mov 8*1($tptr),%r13 + sbb 16*0($nptr),%r12 + mov 8*2($tptr),%r14 + sbb 16*1($nptr),%r13 + mov 8*3($tptr),%r15 + lea 8*4($tptr),$tptr + sbb 16*2($nptr),%r14 + mov %r12,8*0($rptr) + sbb 16*3($nptr),%r15 + lea 16*4($nptr),$nptr + mov %r13,8*1($rptr) + mov %r14,8*2($rptr) + mov %r15,8*3($rptr) + lea 8*4($rptr),$rptr + + inc %rcx + jnz .Lsqrx4x_sub +___ +} +$code.=<<___; + neg %r9 # restore $num + ret +.size bn_sqrx8x_internal,.-bn_sqrx8x_internal +___ +}}} { -my ($inp,$num,$tbl,$idx)=$win64?("%rcx","%rdx","%r8", "%r9") : # Win64 order - ("%rdi","%rsi","%rdx","%rcx"); # Unix order +my ($inp,$num,$tbl,$idx)=$win64?("%rcx","%edx","%r8", "%r9d") : # Win64 order + ("%rdi","%esi","%rdx","%ecx"); # Unix order my $out=$inp; my $STRIDE=2**5*8; my $N=$STRIDE/4; $code.=<<___; +.globl bn_get_bits5 +.type bn_get_bits5,\@abi-omnipotent +.align 16 +bn_get_bits5: + mov $inp,%r10 + mov $num,%ecx + shr \$3,$num + movzw (%r10,$num),%eax + and \$7,%ecx + shrl %cl,%eax + and \$31,%eax + ret +.size bn_get_bits5,.-bn_get_bits5 + .globl bn_scatter5 .type bn_scatter5,\@abi-omnipotent .align 16 @@ -868,13 +3267,13 @@ .byte 0x0f,0x29,0x7c,0x24,0x10 #movdqa %xmm7,0x10(%rsp) ___ $code.=<<___; - mov $idx,%r11 + mov $idx,%r11d shr \$`log($N/8)/log(2)`,$idx and \$`$N/8-1`,%r11 not $idx lea .Lmagic_masks(%rip),%rax and \$`2**5/($N/8)-1`,$idx # 5 is "window size" - lea 96($tbl,%r11,8),$tbl # pointer within 1st cache line + lea 128($tbl,%r11,8),$tbl # pointer within 1st cache line movq 0(%rax,$idx,8),%xmm4 # set of masks denoting which movq 8(%rax,$idx,8),%xmm5 # cache line contains element movq 16(%rax,$idx,8),%xmm6 # denoted by 7th argument @@ -882,15 +3281,16 @@ jmp .Lgather .align 16 .Lgather: - movq `0*$STRIDE/4-96`($tbl),%xmm0 - movq `1*$STRIDE/4-96`($tbl),%xmm1 + movq `0*$STRIDE/4-128`($tbl),%xmm0 + movq `1*$STRIDE/4-128`($tbl),%xmm1 pand %xmm4,%xmm0 - movq `2*$STRIDE/4-96`($tbl),%xmm2 + movq `2*$STRIDE/4-128`($tbl),%xmm2 pand %xmm5,%xmm1 - movq `3*$STRIDE/4-96`($tbl),%xmm3 + movq `3*$STRIDE/4-128`($tbl),%xmm3 pand %xmm6,%xmm2 por %xmm1,%xmm0 pand %xmm7,%xmm3 + .byte 0x67,0x67 por %xmm2,%xmm0 lea $STRIDE($tbl),$tbl por %xmm3,%xmm0 @@ -954,26 +3354,27 @@ cmp %r10,%rbx # context->RipRipRsp - mov 8(%r11),%r10d # HandlerData[2] + mov 4(%r11),%r10d # HandlerData[1] lea (%rsi,%r10),%r10 # epilogue label cmp %r10,%rbx # context->Rip>=epilogue label jae .Lcommon_seh_tail + lea .Lmul_epilogue(%rip),%r10 + cmp %r10,%rbx + jb .Lbody_40 + mov 192($context),%r10 # pull $num mov 8(%rax,%r10,8),%rax # pull saved stack pointer + jmp .Lbody_proceed + +.Lbody_40: + mov 40(%rax),%rax # pull saved stack pointer +.Lbody_proceed: - movaps (%rax),%xmm0 - movaps 16(%rax),%xmm1 - lea `40+48`(%rax),%rax + movaps -88(%rax),%xmm0 + movaps -72(%rax),%xmm1 mov -8(%rax),%rbx mov -16(%rax),%rbp @@ -1040,6 +3441,24 @@ .rva .LSEH_end_bn_mul4x_mont_gather5 .rva .LSEH_info_bn_mul4x_mont_gather5 + .rva .LSEH_begin_bn_power5 + .rva .LSEH_end_bn_power5 + .rva .LSEH_info_bn_power5 + + .rva .LSEH_begin_bn_from_mont8x + .rva .LSEH_end_bn_from_mont8x + .rva .LSEH_info_bn_from_mont8x +___ +$code.=<<___ if ($addx); + .rva .LSEH_begin_bn_mulx4x_mont_gather5 + .rva .LSEH_end_bn_mulx4x_mont_gather5 + .rva .LSEH_info_bn_mulx4x_mont_gather5 + + .rva .LSEH_begin_bn_powerx5 + .rva .LSEH_end_bn_powerx5 + .rva .LSEH_info_bn_powerx5 +___ +$code.=<<___; .rva .LSEH_begin_bn_gather5 .rva .LSEH_end_bn_gather5 .rva .LSEH_info_bn_gather5 @@ -1049,12 +3468,36 @@ .LSEH_info_bn_mul_mont_gather5: .byte 9,0,0,0 .rva mul_handler - .rva .Lmul_alloca,.Lmul_body,.Lmul_epilogue # HandlerData[] + .rva .Lmul_body,.Lmul_epilogue # HandlerData[] .align 8 .LSEH_info_bn_mul4x_mont_gather5: .byte 9,0,0,0 .rva mul_handler - .rva .Lmul4x_alloca,.Lmul4x_body,.Lmul4x_epilogue # HandlerData[] + .rva .Lmul4x_body,.Lmul4x_epilogue # HandlerData[] +.align 8 +.LSEH_info_bn_power5: + .byte 9,0,0,0 + .rva mul_handler + .rva .Lpower5_body,.Lpower5_epilogue # HandlerData[] +.align 8 +.LSEH_info_bn_from_mont8x: + .byte 9,0,0,0 + .rva mul_handler + .rva .Lfrom_body,.Lfrom_epilogue # HandlerData[] +___ +$code.=<<___ if ($addx); +.align 8 +.LSEH_info_bn_mulx4x_mont_gather5: + .byte 9,0,0,0 + .rva mul_handler + .rva .Lmulx4x_body,.Lmulx4x_epilogue # HandlerData[] +.align 8 +.LSEH_info_bn_powerx5: + .byte 9,0,0,0 + .rva mul_handler + .rva .Lpowerx5_body,.Lpowerx5_epilogue # HandlerData[] +___ +$code.=<<___; .align 8 .LSEH_info_bn_gather5: .byte 0x01,0x0d,0x05,0x00 diff --git a/deps/openssl/openssl/crypto/bn/bn.h b/deps/openssl/openssl/crypto/bn/bn.h index 7d57e9834abd70..78709d384082de 100644 --- a/deps/openssl/openssl/crypto/bn/bn.h +++ b/deps/openssl/openssl/crypto/bn/bn.h @@ -256,24 +256,6 @@ extern "C" { # define BN_HEX_FMT2 "%08X" # endif -/* - * 2011-02-22 SMS. In various places, a size_t variable or a type cast to - * size_t was used to perform integer-only operations on pointers. This - * failed on VMS with 64-bit pointers (CC /POINTER_SIZE = 64) because size_t - * is still only 32 bits. What's needed in these cases is an integer type - * with the same size as a pointer, which size_t is not certain to be. The - * only fix here is VMS-specific. - */ -# if defined(OPENSSL_SYS_VMS) -# if __INITIAL_POINTER_SIZE == 64 -# define PTR_SIZE_INT long long -# else /* __INITIAL_POINTER_SIZE == 64 */ -# define PTR_SIZE_INT int -# endif /* __INITIAL_POINTER_SIZE == 64 [else] */ -# else /* defined(OPENSSL_SYS_VMS) */ -# define PTR_SIZE_INT size_t -# endif /* defined(OPENSSL_SYS_VMS) [else] */ - # define BN_DEFAULT_BITS 1280 # define BN_FLG_MALLOCED 0x01 diff --git a/deps/openssl/openssl/crypto/bn/bn_asm.c b/deps/openssl/openssl/crypto/bn/bn_asm.c index 114acf3dc24826..03a33cffe5eebd 100644 --- a/deps/openssl/openssl/crypto/bn/bn_asm.c +++ b/deps/openssl/openssl/crypto/bn/bn_asm.c @@ -489,121 +489,144 @@ BN_ULONG bn_sub_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, * c=(c2,c1,c0) */ +# ifdef BN_LLONG /* - * Keep in mind that carrying into high part of multiplication result - * can not overflow, because it cannot be all-ones. + * Keep in mind that additions to multiplication result can not + * overflow, because its high half cannot be all-ones. */ -# ifdef BN_LLONG -# define mul_add_c(a,b,c0,c1,c2) \ - t=(BN_ULLONG)a*b; \ - t1=(BN_ULONG)Lw(t); \ - t2=(BN_ULONG)Hw(t); \ - c0=(c0+t1)&BN_MASK2; if ((c0) < t1) t2++; \ - c1=(c1+t2)&BN_MASK2; if ((c1) < t2) c2++; - -# define mul_add_c2(a,b,c0,c1,c2) \ - t=(BN_ULLONG)a*b; \ - tt=(t+t)&BN_MASK; \ - if (tt < t) c2++; \ - t1=(BN_ULONG)Lw(tt); \ - t2=(BN_ULONG)Hw(tt); \ - c0=(c0+t1)&BN_MASK2; \ - if ((c0 < t1) && (((++t2)&BN_MASK2) == 0)) c2++; \ - c1=(c1+t2)&BN_MASK2; if ((c1) < t2) c2++; - -# define sqr_add_c(a,i,c0,c1,c2) \ - t=(BN_ULLONG)a[i]*a[i]; \ - t1=(BN_ULONG)Lw(t); \ - t2=(BN_ULONG)Hw(t); \ - c0=(c0+t1)&BN_MASK2; if ((c0) < t1) t2++; \ - c1=(c1+t2)&BN_MASK2; if ((c1) < t2) c2++; +# define mul_add_c(a,b,c0,c1,c2) do { \ + BN_ULONG hi; \ + BN_ULLONG t = (BN_ULLONG)(a)*(b); \ + t += c0; /* no carry */ \ + c0 = (BN_ULONG)Lw(t); \ + hi = (BN_ULONG)Hw(t); \ + c1 = (c1+hi)&BN_MASK2; if (c1 +#endif + +#undef RSAZ_ENABLED +#if defined(OPENSSL_BN_ASM_MONT) && \ + (defined(__x86_64) || defined(__x86_64__) || \ + defined(_M_AMD64) || defined(_M_X64)) +# include "rsaz_exp.h" +# define RSAZ_ENABLED +#endif + +#undef SPARC_T4_MONT +#if defined(OPENSSL_BN_ASM_MONT) && (defined(__sparc__) || defined(__sparc)) +# include "sparc_arch.h" +extern unsigned int OPENSSL_sparcv9cap_P[]; +# define SPARC_T4_MONT #endif /* maximum precomputation table size for *variable* sliding windows */ @@ -464,6 +481,23 @@ int BN_mod_exp_mont(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, wstart = bits - 1; /* The top bit of the window */ wend = 0; /* The bottom bit of the window */ +#if 1 /* by Shay Gueron's suggestion */ + j = m->top; /* borrow j */ + if (m->d[j - 1] & (((BN_ULONG)1) << (BN_BITS2 - 1))) { + if (bn_wexpand(r, j) == NULL) + goto err; + /* 2^(top*BN_BITS2) - m */ + r->d[0] = (0 - m->d[0]) & BN_MASK2; + for (i = 1; i < j; i++) + r->d[i] = (~m->d[i]) & BN_MASK2; + r->top = j; + /* + * Upper words will be zero if the corresponding words of 'm' were + * 0xfff[...], so decrement r->top accordingly. + */ + bn_correct_top(r); + } else +#endif if (!BN_to_montgomery(r, BN_value_one(), mont, ctx)) goto err; for (;;) { @@ -515,6 +549,17 @@ int BN_mod_exp_mont(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, if (wstart < 0) break; } +#if defined(SPARC_T4_MONT) + if (OPENSSL_sparcv9cap_P[0] & (SPARCV9_VIS3 | SPARCV9_PREFER_FPU)) { + j = mont->N.top; /* borrow j */ + val[0]->d[0] = 1; /* borrow val[0] */ + for (i = 1; i < j; i++) + val[0]->d[i] = 0; + val[0]->top = j; + if (!BN_mod_mul_montgomery(rr, r, val[0], mont, ctx)) + goto err; + } else +#endif if (!BN_from_montgomery(rr, r, mont, ctx)) goto err; ret = 1; @@ -526,6 +571,27 @@ int BN_mod_exp_mont(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, return (ret); } +#if defined(SPARC_T4_MONT) +static BN_ULONG bn_get_bits(const BIGNUM *a, int bitpos) +{ + BN_ULONG ret = 0; + int wordpos; + + wordpos = bitpos / BN_BITS2; + bitpos %= BN_BITS2; + if (wordpos >= 0 && wordpos < a->top) { + ret = a->d[wordpos] & BN_MASK2; + if (bitpos) { + ret >>= bitpos; + if (++wordpos < a->top) + ret |= a->d[wordpos] << (BN_BITS2 - bitpos); + } + } + + return ret & BN_MASK2; +} +#endif + /* * BN_mod_exp_mont_consttime() stores the precomputed powers in a specific * layout so that accessing any of these table values shows the same access @@ -594,6 +660,9 @@ int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, int powerbufLen = 0; unsigned char *powerbuf = NULL; BIGNUM tmp, am; +#if defined(SPARC_T4_MONT) + unsigned int t4 = 0; +#endif bn_check_top(a); bn_check_top(p); @@ -626,21 +695,62 @@ int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, goto err; } +#ifdef RSAZ_ENABLED + /* + * If the size of the operands allow it, perform the optimized + * RSAZ exponentiation. For further information see + * crypto/bn/rsaz_exp.c and accompanying assembly modules. + */ + if ((16 == a->top) && (16 == p->top) && (BN_num_bits(m) == 1024) + && rsaz_avx2_eligible()) { + if (NULL == bn_wexpand(rr, 16)) + goto err; + RSAZ_1024_mod_exp_avx2(rr->d, a->d, p->d, m->d, mont->RR.d, + mont->n0[0]); + rr->top = 16; + rr->neg = 0; + bn_correct_top(rr); + ret = 1; + goto err; + } else if ((8 == a->top) && (8 == p->top) && (BN_num_bits(m) == 512)) { + if (NULL == bn_wexpand(rr, 8)) + goto err; + RSAZ_512_mod_exp(rr->d, a->d, p->d, m->d, mont->n0[0], mont->RR.d); + rr->top = 8; + rr->neg = 0; + bn_correct_top(rr); + ret = 1; + goto err; + } +#endif + /* Get the window size to use with size of p. */ window = BN_window_bits_for_ctime_exponent_size(bits); +#if defined(SPARC_T4_MONT) + if (window >= 5 && (top & 15) == 0 && top <= 64 && + (OPENSSL_sparcv9cap_P[1] & (CFR_MONTMUL | CFR_MONTSQR)) == + (CFR_MONTMUL | CFR_MONTSQR) && (t4 = OPENSSL_sparcv9cap_P[0])) + window = 5; + else +#endif #if defined(OPENSSL_BN_ASM_MONT5) - if (window == 6 && bits <= 1024) - window = 5; /* ~5% improvement of 2048-bit RSA sign */ + if (window >= 5) { + window = 5; /* ~5% improvement for RSA2048 sign, and even + * for RSA4096 */ + if ((top & 7) == 0) + powerbufLen += 2 * top * sizeof(m->d[0]); + } #endif + (void)0; /* * Allocate a buffer large enough to hold all of the pre-computed powers * of am, am itself and tmp. */ numPowers = 1 << window; - powerbufLen = sizeof(m->d[0]) * (top * numPowers + - ((2 * top) > - numPowers ? (2 * top) : numPowers)); + powerbufLen += sizeof(m->d[0]) * (top * numPowers + + ((2 * top) > + numPowers ? (2 * top) : numPowers)); #ifdef alloca if (powerbufLen < 3072) powerbufFree = @@ -670,15 +780,17 @@ int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, tmp.flags = am.flags = BN_FLG_STATIC_DATA; /* prepare a^0 in Montgomery domain */ -#if 1 +#if 1 /* by Shay Gueron's suggestion */ + if (m->d[top - 1] & (((BN_ULONG)1) << (BN_BITS2 - 1))) { + /* 2^(top*BN_BITS2) - m */ + tmp.d[0] = (0 - m->d[0]) & BN_MASK2; + for (i = 1; i < top; i++) + tmp.d[i] = (~m->d[i]) & BN_MASK2; + tmp.top = top; + } else +#endif if (!BN_to_montgomery(&tmp, BN_value_one(), mont, ctx)) goto err; -#else - tmp.d[0] = (0 - m->d[0]) & BN_MASK2; /* 2^(top*BN_BITS2) - m */ - for (i = 1; i < top; i++) - tmp.d[i] = (~m->d[i]) & BN_MASK2; - tmp.top = top; -#endif /* prepare a^1 in Montgomery domain */ if (a->neg || BN_ucmp(a, m) >= 0) { @@ -689,6 +801,138 @@ int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, } else if (!BN_to_montgomery(&am, a, mont, ctx)) goto err; +#if defined(SPARC_T4_MONT) + if (t4) { + typedef int (*bn_pwr5_mont_f) (BN_ULONG *tp, const BN_ULONG *np, + const BN_ULONG *n0, const void *table, + int power, int bits); + int bn_pwr5_mont_t4_8(BN_ULONG *tp, const BN_ULONG *np, + const BN_ULONG *n0, const void *table, + int power, int bits); + int bn_pwr5_mont_t4_16(BN_ULONG *tp, const BN_ULONG *np, + const BN_ULONG *n0, const void *table, + int power, int bits); + int bn_pwr5_mont_t4_24(BN_ULONG *tp, const BN_ULONG *np, + const BN_ULONG *n0, const void *table, + int power, int bits); + int bn_pwr5_mont_t4_32(BN_ULONG *tp, const BN_ULONG *np, + const BN_ULONG *n0, const void *table, + int power, int bits); + static const bn_pwr5_mont_f pwr5_funcs[4] = { + bn_pwr5_mont_t4_8, bn_pwr5_mont_t4_16, + bn_pwr5_mont_t4_24, bn_pwr5_mont_t4_32 + }; + bn_pwr5_mont_f pwr5_worker = pwr5_funcs[top / 16 - 1]; + + typedef int (*bn_mul_mont_f) (BN_ULONG *rp, const BN_ULONG *ap, + const void *bp, const BN_ULONG *np, + const BN_ULONG *n0); + int bn_mul_mont_t4_8(BN_ULONG *rp, const BN_ULONG *ap, const void *bp, + const BN_ULONG *np, const BN_ULONG *n0); + int bn_mul_mont_t4_16(BN_ULONG *rp, const BN_ULONG *ap, + const void *bp, const BN_ULONG *np, + const BN_ULONG *n0); + int bn_mul_mont_t4_24(BN_ULONG *rp, const BN_ULONG *ap, + const void *bp, const BN_ULONG *np, + const BN_ULONG *n0); + int bn_mul_mont_t4_32(BN_ULONG *rp, const BN_ULONG *ap, + const void *bp, const BN_ULONG *np, + const BN_ULONG *n0); + static const bn_mul_mont_f mul_funcs[4] = { + bn_mul_mont_t4_8, bn_mul_mont_t4_16, + bn_mul_mont_t4_24, bn_mul_mont_t4_32 + }; + bn_mul_mont_f mul_worker = mul_funcs[top / 16 - 1]; + + void bn_mul_mont_vis3(BN_ULONG *rp, const BN_ULONG *ap, + const void *bp, const BN_ULONG *np, + const BN_ULONG *n0, int num); + void bn_mul_mont_t4(BN_ULONG *rp, const BN_ULONG *ap, + const void *bp, const BN_ULONG *np, + const BN_ULONG *n0, int num); + void bn_mul_mont_gather5_t4(BN_ULONG *rp, const BN_ULONG *ap, + const void *table, const BN_ULONG *np, + const BN_ULONG *n0, int num, int power); + void bn_flip_n_scatter5_t4(const BN_ULONG *inp, size_t num, + void *table, size_t power); + void bn_gather5_t4(BN_ULONG *out, size_t num, + void *table, size_t power); + void bn_flip_t4(BN_ULONG *dst, BN_ULONG *src, size_t num); + + BN_ULONG *np = mont->N.d, *n0 = mont->n0; + int stride = 5 * (6 - (top / 16 - 1)); /* multiple of 5, but less + * than 32 */ + + /* + * BN_to_montgomery can contaminate words above .top [in + * BN_DEBUG[_DEBUG] build]... + */ + for (i = am.top; i < top; i++) + am.d[i] = 0; + for (i = tmp.top; i < top; i++) + tmp.d[i] = 0; + + bn_flip_n_scatter5_t4(tmp.d, top, powerbuf, 0); + bn_flip_n_scatter5_t4(am.d, top, powerbuf, 1); + if (!(*mul_worker) (tmp.d, am.d, am.d, np, n0) && + !(*mul_worker) (tmp.d, am.d, am.d, np, n0)) + bn_mul_mont_vis3(tmp.d, am.d, am.d, np, n0, top); + bn_flip_n_scatter5_t4(tmp.d, top, powerbuf, 2); + + for (i = 3; i < 32; i++) { + /* Calculate a^i = a^(i-1) * a */ + if (!(*mul_worker) (tmp.d, tmp.d, am.d, np, n0) && + !(*mul_worker) (tmp.d, tmp.d, am.d, np, n0)) + bn_mul_mont_vis3(tmp.d, tmp.d, am.d, np, n0, top); + bn_flip_n_scatter5_t4(tmp.d, top, powerbuf, i); + } + + /* switch to 64-bit domain */ + np = alloca(top * sizeof(BN_ULONG)); + top /= 2; + bn_flip_t4(np, mont->N.d, top); + + bits--; + for (wvalue = 0, i = bits % 5; i >= 0; i--, bits--) + wvalue = (wvalue << 1) + BN_is_bit_set(p, bits); + bn_gather5_t4(tmp.d, top, powerbuf, wvalue); + + /* + * Scan the exponent one window at a time starting from the most + * significant bits. + */ + while (bits >= 0) { + if (bits < stride) + stride = bits + 1; + bits -= stride; + wvalue = bn_get_bits(p, bits + 1); + + if ((*pwr5_worker) (tmp.d, np, n0, powerbuf, wvalue, stride)) + continue; + /* retry once and fall back */ + if ((*pwr5_worker) (tmp.d, np, n0, powerbuf, wvalue, stride)) + continue; + + bits += stride - 5; + wvalue >>= stride - 5; + wvalue &= 31; + bn_mul_mont_t4(tmp.d, tmp.d, tmp.d, np, n0, top); + bn_mul_mont_t4(tmp.d, tmp.d, tmp.d, np, n0, top); + bn_mul_mont_t4(tmp.d, tmp.d, tmp.d, np, n0, top); + bn_mul_mont_t4(tmp.d, tmp.d, tmp.d, np, n0, top); + bn_mul_mont_t4(tmp.d, tmp.d, tmp.d, np, n0, top); + bn_mul_mont_gather5_t4(tmp.d, tmp.d, powerbuf, np, n0, top, + wvalue); + } + + bn_flip_t4(tmp.d, tmp.d, top); + top *= 2; + /* back to 32-bit domain */ + tmp.top = top; + bn_correct_top(&tmp); + OPENSSL_cleanse(np, top * sizeof(BN_ULONG)); + } else +#endif #if defined(OPENSSL_BN_ASM_MONT5) if (window == 5 && top > 1) { /* @@ -707,8 +951,15 @@ int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, void bn_scatter5(const BN_ULONG *inp, size_t num, void *table, size_t power); void bn_gather5(BN_ULONG *out, size_t num, void *table, size_t power); + void bn_power5(BN_ULONG *rp, const BN_ULONG *ap, + const void *table, const BN_ULONG *np, + const BN_ULONG *n0, int num, int power); + int bn_get_bits5(const BN_ULONG *ap, int off); + int bn_from_montgomery(BN_ULONG *rp, const BN_ULONG *ap, + const BN_ULONG *not_used, const BN_ULONG *np, + const BN_ULONG *n0, int num); - BN_ULONG *np = mont->N.d, *n0 = mont->n0; + BN_ULONG *np = mont->N.d, *n0 = mont->n0, *np2; /* * BN_to_montgomery can contaminate words above .top [in @@ -719,6 +970,12 @@ int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, for (i = tmp.top; i < top; i++) tmp.d[i] = 0; + if (top & 7) + np2 = np; + else + for (np2 = am.d + top, i = 0; i < top; i++) + np2[2 * i] = np[i]; + bn_scatter5(tmp.d, top, powerbuf, 0); bn_scatter5(am.d, am.top, powerbuf, 1); bn_mul_mont(tmp.d, am.d, am.d, np, n0, top); @@ -727,7 +984,7 @@ int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, # if 0 for (i = 3; i < 32; i++) { /* Calculate a^i = a^(i-1) * a */ - bn_mul_mont_gather5(tmp.d, am.d, powerbuf, np, n0, top, i - 1); + bn_mul_mont_gather5(tmp.d, am.d, powerbuf, np2, n0, top, i - 1); bn_scatter5(tmp.d, top, powerbuf, i); } # else @@ -738,7 +995,7 @@ int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, } for (i = 3; i < 8; i += 2) { int j; - bn_mul_mont_gather5(tmp.d, am.d, powerbuf, np, n0, top, i - 1); + bn_mul_mont_gather5(tmp.d, am.d, powerbuf, np2, n0, top, i - 1); bn_scatter5(tmp.d, top, powerbuf, i); for (j = 2 * i; j < 32; j *= 2) { bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top); @@ -746,13 +1003,13 @@ int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, } } for (; i < 16; i += 2) { - bn_mul_mont_gather5(tmp.d, am.d, powerbuf, np, n0, top, i - 1); + bn_mul_mont_gather5(tmp.d, am.d, powerbuf, np2, n0, top, i - 1); bn_scatter5(tmp.d, top, powerbuf, i); bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top); bn_scatter5(tmp.d, top, powerbuf, 2 * i); } for (; i < 32; i += 2) { - bn_mul_mont_gather5(tmp.d, am.d, powerbuf, np, n0, top, i - 1); + bn_mul_mont_gather5(tmp.d, am.d, powerbuf, np2, n0, top, i - 1); bn_scatter5(tmp.d, top, powerbuf, i); } # endif @@ -765,20 +1022,34 @@ int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, * Scan the exponent one window at a time starting from the most * significant bits. */ - while (bits >= 0) { - for (wvalue = 0, i = 0; i < 5; i++, bits--) - wvalue = (wvalue << 1) + BN_is_bit_set(p, bits); + if (top & 7) + while (bits >= 0) { + for (wvalue = 0, i = 0; i < 5; i++, bits--) + wvalue = (wvalue << 1) + BN_is_bit_set(p, bits); - bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top); - bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top); - bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top); - bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top); - bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top); - bn_mul_mont_gather5(tmp.d, tmp.d, powerbuf, np, n0, top, wvalue); + bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top); + bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top); + bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top); + bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top); + bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top); + bn_mul_mont_gather5(tmp.d, tmp.d, powerbuf, np, n0, top, + wvalue); + } else { + while (bits >= 0) { + wvalue = bn_get_bits5(p->d, bits - 4); + bits -= 5; + bn_power5(tmp.d, tmp.d, powerbuf, np2, n0, top, wvalue); + } } + ret = bn_from_montgomery(tmp.d, tmp.d, NULL, np2, n0, top); tmp.top = top; bn_correct_top(&tmp); + if (ret) { + if (!BN_copy(rr, &tmp)) + ret = 0; + goto err; /* non-zero ret means it's not error */ + } } else #endif { @@ -844,6 +1115,15 @@ int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, } /* Convert the final result from montgomery to standard format */ +#if defined(SPARC_T4_MONT) + if (OPENSSL_sparcv9cap_P[0] & (SPARCV9_VIS3 | SPARCV9_PREFER_FPU)) { + am.d[0] = 1; /* borrow am */ + for (i = 1; i < top; i++) + am.d[i] = 0; + if (!BN_mod_mul_montgomery(rr, &tmp, &am, mont, ctx)) + goto err; + } else +#endif if (!BN_from_montgomery(rr, &tmp, mont, ctx)) goto err; ret = 1; diff --git a/deps/openssl/openssl/crypto/bn/bn_lcl.h b/deps/openssl/openssl/crypto/bn/bn_lcl.h index 1059d1d4b5c8f9..7cd58830e93452 100644 --- a/deps/openssl/openssl/crypto/bn/bn_lcl.h +++ b/deps/openssl/openssl/crypto/bn/bn_lcl.h @@ -204,6 +204,24 @@ extern "C" { # define BN_MUL_LOW_RECURSIVE_SIZE_NORMAL (32)/* 32 */ # define BN_MONT_CTX_SET_SIZE_WORD (64)/* 32 */ +/* + * 2011-02-22 SMS. In various places, a size_t variable or a type cast to + * size_t was used to perform integer-only operations on pointers. This + * failed on VMS with 64-bit pointers (CC /POINTER_SIZE = 64) because size_t + * is still only 32 bits. What's needed in these cases is an integer type + * with the same size as a pointer, which size_t is not certain to be. The + * only fix here is VMS-specific. + */ +# if defined(OPENSSL_SYS_VMS) +# if __INITIAL_POINTER_SIZE == 64 +# define PTR_SIZE_INT long long +# else /* __INITIAL_POINTER_SIZE == 64 */ +# define PTR_SIZE_INT int +# endif /* __INITIAL_POINTER_SIZE == 64 [else] */ +# elif !defined(PTR_SIZE_INT) /* defined(OPENSSL_SYS_VMS) */ +# define PTR_SIZE_INT size_t +# endif /* defined(OPENSSL_SYS_VMS) [else] */ + # if !defined(OPENSSL_NO_ASM) && !defined(OPENSSL_NO_INLINE_ASM) && !defined(PEDANTIC) /* * BN_UMULT_HIGH section. @@ -295,6 +313,15 @@ unsigned __int64 _umul128(unsigned __int64 a, unsigned __int64 b, : "r"(a), "r"(b)); # endif # endif +# elif defined(__aarch64__) && defined(SIXTY_FOUR_BIT_LONG) +# if defined(__GNUC__) && __GNUC__>=2 +# define BN_UMULT_HIGH(a,b) ({ \ + register BN_ULONG ret; \ + asm ("umulh %0,%1,%2" \ + : "=r"(ret) \ + : "r"(a), "r"(b)); \ + ret; }) +# endif # endif /* cpu */ # endif /* OPENSSL_NO_ASM */ diff --git a/deps/openssl/openssl/crypto/bn/rsaz_exp.c b/deps/openssl/openssl/crypto/bn/rsaz_exp.c new file mode 100644 index 00000000000000..a486b154c2e8ba --- /dev/null +++ b/deps/openssl/openssl/crypto/bn/rsaz_exp.c @@ -0,0 +1,336 @@ +/***************************************************************************** +* * +* Copyright (c) 2012, Intel Corporation * +* * +* All rights reserved. * +* * +* Redistribution and use in source and binary forms, with or without * +* modification, are permitted provided that the following conditions are * +* met: * +* * +* * Redistributions of source code must retain the above copyright * +* notice, this list of conditions and the following disclaimer. * +* * +* * Redistributions in binary form must reproduce the above copyright * +* notice, this list of conditions and the following disclaimer in the * +* documentation and/or other materials provided with the * +* distribution. * +* * +* * Neither the name of the Intel Corporation nor the names of its * +* contributors may be used to endorse or promote products derived from * +* this software without specific prior written permission. * +* * +* * +* THIS SOFTWARE IS PROVIDED BY INTEL CORPORATION ""AS IS"" AND ANY * +* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * +* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * +* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL CORPORATION OR * +* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, * +* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * +* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR * +* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF * +* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING * +* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * +* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * +* * +****************************************************************************** +* Developers and authors: * +* Shay Gueron (1, 2), and Vlad Krasnov (1) * +* (1) Intel Corporation, Israel Development Center, Haifa, Israel * +* (2) University of Haifa, Israel * +*****************************************************************************/ + +#include "rsaz_exp.h" + +/* + * See crypto/bn/asm/rsaz-avx2.pl for further details. + */ +void rsaz_1024_norm2red_avx2(void *red, const void *norm); +void rsaz_1024_mul_avx2(void *ret, const void *a, const void *b, + const void *n, BN_ULONG k); +void rsaz_1024_sqr_avx2(void *ret, const void *a, const void *n, BN_ULONG k, + int cnt); +void rsaz_1024_scatter5_avx2(void *tbl, const void *val, int i); +void rsaz_1024_gather5_avx2(void *val, const void *tbl, int i); +void rsaz_1024_red2norm_avx2(void *norm, const void *red); + +#if defined(__GNUC__) +# define ALIGN64 __attribute__((aligned(64))) +#elif defined(_MSC_VER) +# define ALIGN64 __declspec(align(64)) +#elif defined(__SUNPRO_C) +# define ALIGN64 +# pragma align 64(one,two80) +#else +/* not fatal, might hurt performance a little */ +# define ALIGN64 +#endif + +ALIGN64 static const BN_ULONG one[40] = { + 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 +}; + +ALIGN64 static const BN_ULONG two80[40] = { + 0, 0, 1 << 22, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 +}; + +void RSAZ_1024_mod_exp_avx2(BN_ULONG result_norm[16], + const BN_ULONG base_norm[16], + const BN_ULONG exponent[16], + const BN_ULONG m_norm[16], const BN_ULONG RR[16], + BN_ULONG k0) +{ + unsigned char storage[320 * 3 + 32 * 9 * 16 + 64]; /* 5.5KB */ + unsigned char *p_str = storage + (64 - ((size_t)storage % 64)); + unsigned char *a_inv, *m, *result; + unsigned char *table_s = p_str + 320 * 3; + unsigned char *R2 = table_s; /* borrow */ + int index; + int wvalue; + + if ((((size_t)p_str & 4095) + 320) >> 12) { + result = p_str; + a_inv = p_str + 320; + m = p_str + 320 * 2; /* should not cross page */ + } else { + m = p_str; /* should not cross page */ + result = p_str + 320; + a_inv = p_str + 320 * 2; + } + + rsaz_1024_norm2red_avx2(m, m_norm); + rsaz_1024_norm2red_avx2(a_inv, base_norm); + rsaz_1024_norm2red_avx2(R2, RR); + + rsaz_1024_mul_avx2(R2, R2, R2, m, k0); + rsaz_1024_mul_avx2(R2, R2, two80, m, k0); + + /* table[0] = 1 */ + rsaz_1024_mul_avx2(result, R2, one, m, k0); + /* table[1] = a_inv^1 */ + rsaz_1024_mul_avx2(a_inv, a_inv, R2, m, k0); + + rsaz_1024_scatter5_avx2(table_s, result, 0); + rsaz_1024_scatter5_avx2(table_s, a_inv, 1); + + /* table[2] = a_inv^2 */ + rsaz_1024_sqr_avx2(result, a_inv, m, k0, 1); + rsaz_1024_scatter5_avx2(table_s, result, 2); +#if 0 + /* this is almost 2x smaller and less than 1% slower */ + for (index = 3; index < 32; index++) { + rsaz_1024_mul_avx2(result, result, a_inv, m, k0); + rsaz_1024_scatter5_avx2(table_s, result, index); + } +#else + /* table[4] = a_inv^4 */ + rsaz_1024_sqr_avx2(result, result, m, k0, 1); + rsaz_1024_scatter5_avx2(table_s, result, 4); + /* table[8] = a_inv^8 */ + rsaz_1024_sqr_avx2(result, result, m, k0, 1); + rsaz_1024_scatter5_avx2(table_s, result, 8); + /* table[16] = a_inv^16 */ + rsaz_1024_sqr_avx2(result, result, m, k0, 1); + rsaz_1024_scatter5_avx2(table_s, result, 16); + /* table[17] = a_inv^17 */ + rsaz_1024_mul_avx2(result, result, a_inv, m, k0); + rsaz_1024_scatter5_avx2(table_s, result, 17); + + /* table[3] */ + rsaz_1024_gather5_avx2(result, table_s, 2); + rsaz_1024_mul_avx2(result, result, a_inv, m, k0); + rsaz_1024_scatter5_avx2(table_s, result, 3); + /* table[6] */ + rsaz_1024_sqr_avx2(result, result, m, k0, 1); + rsaz_1024_scatter5_avx2(table_s, result, 6); + /* table[12] */ + rsaz_1024_sqr_avx2(result, result, m, k0, 1); + rsaz_1024_scatter5_avx2(table_s, result, 12); + /* table[24] */ + rsaz_1024_sqr_avx2(result, result, m, k0, 1); + rsaz_1024_scatter5_avx2(table_s, result, 24); + /* table[25] */ + rsaz_1024_mul_avx2(result, result, a_inv, m, k0); + rsaz_1024_scatter5_avx2(table_s, result, 25); + + /* table[5] */ + rsaz_1024_gather5_avx2(result, table_s, 4); + rsaz_1024_mul_avx2(result, result, a_inv, m, k0); + rsaz_1024_scatter5_avx2(table_s, result, 5); + /* table[10] */ + rsaz_1024_sqr_avx2(result, result, m, k0, 1); + rsaz_1024_scatter5_avx2(table_s, result, 10); + /* table[20] */ + rsaz_1024_sqr_avx2(result, result, m, k0, 1); + rsaz_1024_scatter5_avx2(table_s, result, 20); + /* table[21] */ + rsaz_1024_mul_avx2(result, result, a_inv, m, k0); + rsaz_1024_scatter5_avx2(table_s, result, 21); + + /* table[7] */ + rsaz_1024_gather5_avx2(result, table_s, 6); + rsaz_1024_mul_avx2(result, result, a_inv, m, k0); + rsaz_1024_scatter5_avx2(table_s, result, 7); + /* table[14] */ + rsaz_1024_sqr_avx2(result, result, m, k0, 1); + rsaz_1024_scatter5_avx2(table_s, result, 14); + /* table[28] */ + rsaz_1024_sqr_avx2(result, result, m, k0, 1); + rsaz_1024_scatter5_avx2(table_s, result, 28); + /* table[29] */ + rsaz_1024_mul_avx2(result, result, a_inv, m, k0); + rsaz_1024_scatter5_avx2(table_s, result, 29); + + /* table[9] */ + rsaz_1024_gather5_avx2(result, table_s, 8); + rsaz_1024_mul_avx2(result, result, a_inv, m, k0); + rsaz_1024_scatter5_avx2(table_s, result, 9); + /* table[18] */ + rsaz_1024_sqr_avx2(result, result, m, k0, 1); + rsaz_1024_scatter5_avx2(table_s, result, 18); + /* table[19] */ + rsaz_1024_mul_avx2(result, result, a_inv, m, k0); + rsaz_1024_scatter5_avx2(table_s, result, 19); + + /* table[11] */ + rsaz_1024_gather5_avx2(result, table_s, 10); + rsaz_1024_mul_avx2(result, result, a_inv, m, k0); + rsaz_1024_scatter5_avx2(table_s, result, 11); + /* table[22] */ + rsaz_1024_sqr_avx2(result, result, m, k0, 1); + rsaz_1024_scatter5_avx2(table_s, result, 22); + /* table[23] */ + rsaz_1024_mul_avx2(result, result, a_inv, m, k0); + rsaz_1024_scatter5_avx2(table_s, result, 23); + + /* table[13] */ + rsaz_1024_gather5_avx2(result, table_s, 12); + rsaz_1024_mul_avx2(result, result, a_inv, m, k0); + rsaz_1024_scatter5_avx2(table_s, result, 13); + /* table[26] */ + rsaz_1024_sqr_avx2(result, result, m, k0, 1); + rsaz_1024_scatter5_avx2(table_s, result, 26); + /* table[27] */ + rsaz_1024_mul_avx2(result, result, a_inv, m, k0); + rsaz_1024_scatter5_avx2(table_s, result, 27); + + /* table[15] */ + rsaz_1024_gather5_avx2(result, table_s, 14); + rsaz_1024_mul_avx2(result, result, a_inv, m, k0); + rsaz_1024_scatter5_avx2(table_s, result, 15); + /* table[30] */ + rsaz_1024_sqr_avx2(result, result, m, k0, 1); + rsaz_1024_scatter5_avx2(table_s, result, 30); + /* table[31] */ + rsaz_1024_mul_avx2(result, result, a_inv, m, k0); + rsaz_1024_scatter5_avx2(table_s, result, 31); +#endif + + /* load first window */ + p_str = (unsigned char *)exponent; + wvalue = p_str[127] >> 3; + rsaz_1024_gather5_avx2(result, table_s, wvalue); + + index = 1014; + + while (index > -1) { /* loop for the remaining 127 windows */ + + rsaz_1024_sqr_avx2(result, result, m, k0, 5); + + wvalue = *((unsigned short *)&p_str[index / 8]); + wvalue = (wvalue >> (index % 8)) & 31; + index -= 5; + + rsaz_1024_gather5_avx2(a_inv, table_s, wvalue); /* borrow a_inv */ + rsaz_1024_mul_avx2(result, result, a_inv, m, k0); + } + + /* square four times */ + rsaz_1024_sqr_avx2(result, result, m, k0, 4); + + wvalue = p_str[0] & 15; + + rsaz_1024_gather5_avx2(a_inv, table_s, wvalue); /* borrow a_inv */ + rsaz_1024_mul_avx2(result, result, a_inv, m, k0); + + /* from Montgomery */ + rsaz_1024_mul_avx2(result, result, one, m, k0); + + rsaz_1024_red2norm_avx2(result_norm, result); + + OPENSSL_cleanse(storage, sizeof(storage)); +} + +/* + * See crypto/bn/rsaz-x86_64.pl for further details. + */ +void rsaz_512_mul(void *ret, const void *a, const void *b, const void *n, + BN_ULONG k); +void rsaz_512_mul_scatter4(void *ret, const void *a, const void *n, + BN_ULONG k, const void *tbl, unsigned int power); +void rsaz_512_mul_gather4(void *ret, const void *a, const void *tbl, + const void *n, BN_ULONG k, unsigned int power); +void rsaz_512_mul_by_one(void *ret, const void *a, const void *n, BN_ULONG k); +void rsaz_512_sqr(void *ret, const void *a, const void *n, BN_ULONG k, + int cnt); +void rsaz_512_scatter4(void *tbl, const BN_ULONG *val, int power); +void rsaz_512_gather4(BN_ULONG *val, const void *tbl, int power); + +void RSAZ_512_mod_exp(BN_ULONG result[8], + const BN_ULONG base[8], const BN_ULONG exponent[8], + const BN_ULONG m[8], BN_ULONG k0, const BN_ULONG RR[8]) +{ + unsigned char storage[16 * 8 * 8 + 64 * 2 + 64]; /* 1.2KB */ + unsigned char *table = storage + (64 - ((size_t)storage % 64)); + BN_ULONG *a_inv = (BN_ULONG *)(table + 16 * 8 * 8); + BN_ULONG *temp = (BN_ULONG *)(table + 16 * 8 * 8 + 8 * 8); + unsigned char *p_str = (unsigned char *)exponent; + int index; + unsigned int wvalue; + + /* table[0] = 1_inv */ + temp[0] = 0 - m[0]; + temp[1] = ~m[1]; + temp[2] = ~m[2]; + temp[3] = ~m[3]; + temp[4] = ~m[4]; + temp[5] = ~m[5]; + temp[6] = ~m[6]; + temp[7] = ~m[7]; + rsaz_512_scatter4(table, temp, 0); + + /* table [1] = a_inv^1 */ + rsaz_512_mul(a_inv, base, RR, m, k0); + rsaz_512_scatter4(table, a_inv, 1); + + /* table [2] = a_inv^2 */ + rsaz_512_sqr(temp, a_inv, m, k0, 1); + rsaz_512_scatter4(table, temp, 2); + + for (index = 3; index < 16; index++) + rsaz_512_mul_scatter4(temp, a_inv, m, k0, table, index); + + /* load first window */ + wvalue = p_str[63]; + + rsaz_512_gather4(temp, table, wvalue >> 4); + rsaz_512_sqr(temp, temp, m, k0, 4); + rsaz_512_mul_gather4(temp, temp, table, m, k0, wvalue & 0xf); + + for (index = 62; index >= 0; index--) { + wvalue = p_str[index]; + + rsaz_512_sqr(temp, temp, m, k0, 4); + rsaz_512_mul_gather4(temp, temp, table, m, k0, wvalue >> 4); + + rsaz_512_sqr(temp, temp, m, k0, 4); + rsaz_512_mul_gather4(temp, temp, table, m, k0, wvalue & 0x0f); + } + + /* from Montgomery */ + rsaz_512_mul_by_one(result, temp, m, k0); + + OPENSSL_cleanse(storage, sizeof(storage)); +} diff --git a/deps/openssl/openssl/crypto/bn/rsaz_exp.h b/deps/openssl/openssl/crypto/bn/rsaz_exp.h new file mode 100644 index 00000000000000..bb71fb1e197b22 --- /dev/null +++ b/deps/openssl/openssl/crypto/bn/rsaz_exp.h @@ -0,0 +1,47 @@ +/****************************************************************************** +* Copyright(c) 2012, Intel Corp. +* Developers and authors: +* Shay Gueron (1, 2), and Vlad Krasnov (1) +* (1) Intel Corporation, Israel Development Center, Haifa, Israel +* (2) University of Haifa, Israel +****************************************************************************** +* LICENSE: +* This submission to OpenSSL is to be made available under the OpenSSL +* license, and only to the OpenSSL project, in order to allow integration +* into the publicly distributed code. +* The use of this code, or portions of this code, or concepts embedded in +* this code, or modification of this code and/or algorithm(s) in it, or the +* use of this code for any other purpose than stated above, requires special +* licensing. +****************************************************************************** +* DISCLAIMER: +* THIS SOFTWARE IS PROVIDED BY THE CONTRIBUTORS AND THE COPYRIGHT OWNERS +* ``AS IS''. ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE CONTRIBUTORS OR THE COPYRIGHT +* OWNERS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, +* OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +* POSSIBILITY OF SUCH DAMAGE. +******************************************************************************/ + +#ifndef RSAZ_EXP_H +# define RSAZ_EXP_H + +# include + +void RSAZ_1024_mod_exp_avx2(BN_ULONG result[16], + const BN_ULONG base_norm[16], + const BN_ULONG exponent[16], + const BN_ULONG m_norm[16], const BN_ULONG RR[16], + BN_ULONG k0); +int rsaz_avx2_eligible(); + +void RSAZ_512_mod_exp(BN_ULONG result[8], + const BN_ULONG base_norm[8], const BN_ULONG exponent[8], + const BN_ULONG m_norm[8], BN_ULONG k0, + const BN_ULONG RR[8]); +#endif diff --git a/deps/openssl/openssl/crypto/buffer/buf_str.c b/deps/openssl/openssl/crypto/buffer/buf_str.c index fdde3d7db4ba0c..ebc5ab4646cef2 100644 --- a/deps/openssl/openssl/crypto/buffer/buf_str.c +++ b/deps/openssl/openssl/crypto/buffer/buf_str.c @@ -60,6 +60,15 @@ #include "cryptlib.h" #include +size_t BUF_strnlen(const char *str, size_t maxlen) +{ + const char *p; + + for (p = str; maxlen-- != 0 && *p != '\0'; ++p) ; + + return p - str; +} + char *BUF_strdup(const char *str) { if (str == NULL) @@ -74,6 +83,8 @@ char *BUF_strndup(const char *str, size_t siz) if (str == NULL) return (NULL); + siz = BUF_strnlen(str, siz); + ret = OPENSSL_malloc(siz + 1); if (ret == NULL) { BUFerr(BUF_F_BUF_STRNDUP, ERR_R_MALLOC_FAILURE); diff --git a/deps/openssl/openssl/crypto/buffer/buffer.h b/deps/openssl/openssl/crypto/buffer/buffer.h index 632df93c657ca7..c343dd772f1efb 100644 --- a/deps/openssl/openssl/crypto/buffer/buffer.h +++ b/deps/openssl/openssl/crypto/buffer/buffer.h @@ -84,6 +84,7 @@ BUF_MEM *BUF_MEM_new(void); void BUF_MEM_free(BUF_MEM *a); int BUF_MEM_grow(BUF_MEM *str, size_t len); int BUF_MEM_grow_clean(BUF_MEM *str, size_t len); +size_t BUF_strnlen(const char *str, size_t maxlen); char *BUF_strdup(const char *str); char *BUF_strndup(const char *str, size_t siz); void *BUF_memdup(const void *data, size_t siz); diff --git a/deps/openssl/openssl/crypto/camellia/Makefile b/deps/openssl/openssl/crypto/camellia/Makefile index 6ce6fc99cd20f0..60e896054f494c 100644 --- a/deps/openssl/openssl/crypto/camellia/Makefile +++ b/deps/openssl/openssl/crypto/camellia/Makefile @@ -48,6 +48,8 @@ cmll-x86.s: asm/cmll-x86.pl ../perlasm/x86asm.pl $(PERL) asm/cmll-x86.pl $(PERLASM_SCHEME) $(CFLAGS) $(PROCESSOR) > $@ cmll-x86_64.s: asm/cmll-x86_64.pl $(PERL) asm/cmll-x86_64.pl $(PERLASM_SCHEME) > $@ +cmllt4-sparcv9.s: asm/cmllt4-sparcv9.pl ../perlasm/sparcv9_modes.pl + $(PERL) asm/cmllt4-sparcv9.pl $(CFLAGS) > $@ files: $(PERL) $(TOP)/util/files.pl Makefile >> $(TOP)/MINFO diff --git a/deps/openssl/openssl/crypto/camellia/asm/cmll-x86_64.pl b/deps/openssl/openssl/crypto/camellia/asm/cmll-x86_64.pl index 9f4b82fa482195..d94f46b887e516 100644 --- a/deps/openssl/openssl/crypto/camellia/asm/cmll-x86_64.pl +++ b/deps/openssl/openssl/crypto/camellia/asm/cmll-x86_64.pl @@ -72,7 +72,7 @@ sub Camellia_Feistel { my $seed=defined(@_[1])?@_[1]:0; my $scale=$seed<0?-8:8; my $j=($i&1)*2; -my $s0=@S[($j)%4],$s1=@S[($j+1)%4],$s2=@S[($j+2)%4],$s3=@S[($j+3)%4]; +my ($s0,$s1,$s2,$s3)=(@S[($j)%4],@S[($j+1)%4],@S[($j+2)%4],@S[($j+3)%4]); $code.=<<___; xor $s0,$t0 # t0^=key[0] @@ -409,7 +409,7 @@ sub _rotl128 { push %r15 .Lkey_prologue: - mov %rdi,$keyend # put away arguments, keyBitLength + mov %edi,${keyend}d # put away arguments, keyBitLength mov %rdx,$out # keyTable mov 0(%rsi),@S[0] # load 0-127 bits diff --git a/deps/openssl/openssl/crypto/camellia/asm/cmllt4-sparcv9.pl b/deps/openssl/openssl/crypto/camellia/asm/cmllt4-sparcv9.pl new file mode 100644 index 00000000000000..a813168b42ec47 --- /dev/null +++ b/deps/openssl/openssl/crypto/camellia/asm/cmllt4-sparcv9.pl @@ -0,0 +1,929 @@ +#!/usr/bin/env perl + +# ==================================================================== +# Written by David S. Miller and Andy Polyakov +# . The module is licensed under 2-clause BSD +# license. October 2012. All rights reserved. +# ==================================================================== + +###################################################################### +# Camellia for SPARC T4. +# +# As with AES below results [for aligned data] are virtually identical +# to critical path lenths for 3-cycle instruction latency: +# +# 128-bit key 192/256- +# CBC encrypt 4.14/4.21(*) 5.46/5.52 +# (*) numbers after slash are for +# misaligned data. +# +# As with Intel AES-NI, question is if it's possible to improve +# performance of parallelizeable modes by interleaving round +# instructions. In Camellia every instruction is dependent on +# previous, which means that there is place for 2 additional ones +# in between two dependent. Can we expect 3x performance improvement? +# At least one can argue that it should be possible to break 2x +# barrier... For some reason not even 2x appears to be possible: +# +# 128-bit key 192/256- +# CBC decrypt 2.21/2.74 2.99/3.40 +# CTR 2.15/2.68(*) 2.93/3.34 +# (*) numbers after slash are for +# misaligned data. +# +# This is for 2x interleave. But compared to 1x interleave CBC decrypt +# improved by ... 0% for 128-bit key, and 11% for 192/256-bit one. +# So that out-of-order execution logic can take non-interleaved code +# to 1.87x, but can't take 2x interleaved one any further. There +# surely is some explanation... As result 3x interleave was not even +# attempted. Instead an effort was made to share specific modes +# implementations with AES module (therefore sparct4_modes.pl). +# +# To anchor to something else, software C implementation processes +# one byte in 38 cycles with 128-bit key on same processor. + +$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; +push(@INC,"${dir}","${dir}../../perlasm"); +require "sparcv9_modes.pl"; + +&asm_init(@ARGV); + +$::evp=1; # if $evp is set to 0, script generates module with +# Camellia_[en|de]crypt, Camellia_set_key and Camellia_cbc_encrypt +# entry points. These are fully compatible with openssl/camellia.h. + +###################################################################### +# single-round subroutines +# +{ +my ($inp,$out,$key,$rounds,$tmp,$mask)=map("%o$_",(0..5)); + +$code=<<___; +.text + +.globl cmll_t4_encrypt +.align 32 +cmll_t4_encrypt: + andcc $inp, 7, %g1 ! is input aligned? + andn $inp, 7, $inp + + ldx [$key + 0], %g4 + ldx [$key + 8], %g5 + + ldx [$inp + 0], %o4 + bz,pt %icc, 1f + ldx [$inp + 8], %o5 + ldx [$inp + 16], $inp + sll %g1, 3, %g1 + sub %g0, %g1, %o3 + sllx %o4, %g1, %o4 + sllx %o5, %g1, %g1 + srlx %o5, %o3, %o5 + srlx $inp, %o3, %o3 + or %o5, %o4, %o4 + or %o3, %g1, %o5 +1: + ld [$key + 272], $rounds ! grandRounds, 3 or 4 + ldd [$key + 16], %f12 + ldd [$key + 24], %f14 + xor %g4, %o4, %o4 + xor %g5, %o5, %o5 + ldd [$key + 32], %f16 + ldd [$key + 40], %f18 + movxtod %o4, %f0 + movxtod %o5, %f2 + ldd [$key + 48], %f20 + ldd [$key + 56], %f22 + sub $rounds, 1, $rounds + ldd [$key + 64], %f24 + ldd [$key + 72], %f26 + add $key, 80, $key + +.Lenc: + camellia_f %f12, %f2, %f0, %f2 + ldd [$key + 0], %f12 + sub $rounds,1,$rounds + camellia_f %f14, %f0, %f2, %f0 + ldd [$key + 8], %f14 + camellia_f %f16, %f2, %f0, %f2 + ldd [$key + 16], %f16 + camellia_f %f18, %f0, %f2, %f0 + ldd [$key + 24], %f18 + camellia_f %f20, %f2, %f0, %f2 + ldd [$key + 32], %f20 + camellia_f %f22, %f0, %f2, %f0 + ldd [$key + 40], %f22 + camellia_fl %f24, %f0, %f0 + ldd [$key + 48], %f24 + camellia_fli %f26, %f2, %f2 + ldd [$key + 56], %f26 + brnz,pt $rounds, .Lenc + add $key, 64, $key + + andcc $out, 7, $tmp ! is output aligned? + camellia_f %f12, %f2, %f0, %f2 + camellia_f %f14, %f0, %f2, %f0 + camellia_f %f16, %f2, %f0, %f2 + camellia_f %f18, %f0, %f2, %f0 + camellia_f %f20, %f2, %f0, %f4 + camellia_f %f22, %f0, %f4, %f2 + fxor %f24, %f4, %f0 + fxor %f26, %f2, %f2 + + bnz,pn %icc, 2f + nop + + std %f0, [$out + 0] + retl + std %f2, [$out + 8] + +2: alignaddrl $out, %g0, $out + mov 0xff, $mask + srl $mask, $tmp, $mask + + faligndata %f0, %f0, %f4 + faligndata %f0, %f2, %f6 + faligndata %f2, %f2, %f8 + + stda %f4, [$out + $mask]0xc0 ! partial store + std %f6, [$out + 8] + add $out, 16, $out + orn %g0, $mask, $mask + retl + stda %f8, [$out + $mask]0xc0 ! partial store +.type cmll_t4_encrypt,#function +.size cmll_t4_encrypt,.-cmll_t4_encrypt + +.globl cmll_t4_decrypt +.align 32 +cmll_t4_decrypt: + ld [$key + 272], $rounds ! grandRounds, 3 or 4 + andcc $inp, 7, %g1 ! is input aligned? + andn $inp, 7, $inp + + sll $rounds, 6, $rounds + add $rounds, $key, $key + + ldx [$inp + 0], %o4 + bz,pt %icc, 1f + ldx [$inp + 8], %o5 + ldx [$inp + 16], $inp + sll %g1, 3, %g1 + sub %g0, %g1, %g4 + sllx %o4, %g1, %o4 + sllx %o5, %g1, %g1 + srlx %o5, %g4, %o5 + srlx $inp, %g4, %g4 + or %o5, %o4, %o4 + or %g4, %g1, %o5 +1: + ldx [$key + 0], %g4 + ldx [$key + 8], %g5 + ldd [$key - 8], %f12 + ldd [$key - 16], %f14 + xor %g4, %o4, %o4 + xor %g5, %o5, %o5 + ldd [$key - 24], %f16 + ldd [$key - 32], %f18 + movxtod %o4, %f0 + movxtod %o5, %f2 + ldd [$key - 40], %f20 + ldd [$key - 48], %f22 + sub $rounds, 64, $rounds + ldd [$key - 56], %f24 + ldd [$key - 64], %f26 + sub $key, 64, $key + +.Ldec: + camellia_f %f12, %f2, %f0, %f2 + ldd [$key - 8], %f12 + sub $rounds, 64, $rounds + camellia_f %f14, %f0, %f2, %f0 + ldd [$key - 16], %f14 + camellia_f %f16, %f2, %f0, %f2 + ldd [$key - 24], %f16 + camellia_f %f18, %f0, %f2, %f0 + ldd [$key - 32], %f18 + camellia_f %f20, %f2, %f0, %f2 + ldd [$key - 40], %f20 + camellia_f %f22, %f0, %f2, %f0 + ldd [$key - 48], %f22 + camellia_fl %f24, %f0, %f0 + ldd [$key - 56], %f24 + camellia_fli %f26, %f2, %f2 + ldd [$key - 64], %f26 + brnz,pt $rounds, .Ldec + sub $key, 64, $key + + andcc $out, 7, $tmp ! is output aligned? + camellia_f %f12, %f2, %f0, %f2 + camellia_f %f14, %f0, %f2, %f0 + camellia_f %f16, %f2, %f0, %f2 + camellia_f %f18, %f0, %f2, %f0 + camellia_f %f20, %f2, %f0, %f4 + camellia_f %f22, %f0, %f4, %f2 + fxor %f26, %f4, %f0 + fxor %f24, %f2, %f2 + + bnz,pn %icc, 2f + nop + + std %f0, [$out + 0] + retl + std %f2, [$out + 8] + +2: alignaddrl $out, %g0, $out + mov 0xff, $mask + srl $mask, $tmp, $mask + + faligndata %f0, %f0, %f4 + faligndata %f0, %f2, %f6 + faligndata %f2, %f2, %f8 + + stda %f4, [$out + $mask]0xc0 ! partial store + std %f6, [$out + 8] + add $out, 16, $out + orn %g0, $mask, $mask + retl + stda %f8, [$out + $mask]0xc0 ! partial store +.type cmll_t4_decrypt,#function +.size cmll_t4_decrypt,.-cmll_t4_decrypt +___ +} + +###################################################################### +# key setup subroutines +# +{ +sub ROTL128 { + my $rot = shift; + + "srlx %o4, 64-$rot, %g4\n\t". + "sllx %o4, $rot, %o4\n\t". + "srlx %o5, 64-$rot, %g5\n\t". + "sllx %o5, $rot, %o5\n\t". + "or %o4, %g5, %o4\n\t". + "or %o5, %g4, %o5"; +} + +my ($inp,$bits,$out,$tmp)=map("%o$_",(0..5)); +$code.=<<___; +.globl cmll_t4_set_key +.align 32 +cmll_t4_set_key: + and $inp, 7, $tmp + alignaddr $inp, %g0, $inp + cmp $bits, 192 + ldd [$inp + 0], %f0 + bl,pt %icc,.L128 + ldd [$inp + 8], %f2 + + be,pt %icc,.L192 + ldd [$inp + 16], %f4 + + brz,pt $tmp, .L256aligned + ldd [$inp + 24], %f6 + + ldd [$inp + 32], %f8 + faligndata %f0, %f2, %f0 + faligndata %f2, %f4, %f2 + faligndata %f4, %f6, %f4 + b .L256aligned + faligndata %f6, %f8, %f6 + +.align 16 +.L192: + brz,a,pt $tmp, .L256aligned + fnot2 %f4, %f6 + + ldd [$inp + 24], %f6 + nop + faligndata %f0, %f2, %f0 + faligndata %f2, %f4, %f2 + faligndata %f4, %f6, %f4 + fnot2 %f4, %f6 + +.L256aligned: + std %f0, [$out + 0] ! k[0, 1] + fsrc2 %f0, %f28 + std %f2, [$out + 8] ! k[2, 3] + fsrc2 %f2, %f30 + fxor %f4, %f0, %f0 + b .L128key + fxor %f6, %f2, %f2 + +.align 16 +.L128: + brz,pt $tmp, .L128aligned + nop + + ldd [$inp + 16], %f4 + nop + faligndata %f0, %f2, %f0 + faligndata %f2, %f4, %f2 + +.L128aligned: + std %f0, [$out + 0] ! k[0, 1] + fsrc2 %f0, %f28 + std %f2, [$out + 8] ! k[2, 3] + fsrc2 %f2, %f30 + +.L128key: + mov %o7, %o5 +1: call .+8 + add %o7, SIGMA-1b, %o4 + mov %o5, %o7 + + ldd [%o4 + 0], %f16 + ldd [%o4 + 8], %f18 + ldd [%o4 + 16], %f20 + ldd [%o4 + 24], %f22 + + camellia_f %f16, %f2, %f0, %f2 + camellia_f %f18, %f0, %f2, %f0 + fxor %f28, %f0, %f0 + fxor %f30, %f2, %f2 + camellia_f %f20, %f2, %f0, %f2 + camellia_f %f22, %f0, %f2, %f0 + + bge,pn %icc, .L256key + nop + std %f0, [$out + 0x10] ! k[ 4, 5] + std %f2, [$out + 0x18] ! k[ 6, 7] + + movdtox %f0, %o4 + movdtox %f2, %o5 + `&ROTL128(15)` + stx %o4, [$out + 0x30] ! k[12, 13] + stx %o5, [$out + 0x38] ! k[14, 15] + `&ROTL128(15)` + stx %o4, [$out + 0x40] ! k[16, 17] + stx %o5, [$out + 0x48] ! k[18, 19] + `&ROTL128(15)` + stx %o4, [$out + 0x60] ! k[24, 25] + `&ROTL128(15)` + stx %o4, [$out + 0x70] ! k[28, 29] + stx %o5, [$out + 0x78] ! k[30, 31] + `&ROTL128(34)` + stx %o4, [$out + 0xa0] ! k[40, 41] + stx %o5, [$out + 0xa8] ! k[42, 43] + `&ROTL128(17)` + stx %o4, [$out + 0xc0] ! k[48, 49] + stx %o5, [$out + 0xc8] ! k[50, 51] + + movdtox %f28, %o4 ! k[ 0, 1] + movdtox %f30, %o5 ! k[ 2, 3] + `&ROTL128(15)` + stx %o4, [$out + 0x20] ! k[ 8, 9] + stx %o5, [$out + 0x28] ! k[10, 11] + `&ROTL128(30)` + stx %o4, [$out + 0x50] ! k[20, 21] + stx %o5, [$out + 0x58] ! k[22, 23] + `&ROTL128(15)` + stx %o5, [$out + 0x68] ! k[26, 27] + `&ROTL128(17)` + stx %o4, [$out + 0x80] ! k[32, 33] + stx %o5, [$out + 0x88] ! k[34, 35] + `&ROTL128(17)` + stx %o4, [$out + 0x90] ! k[36, 37] + stx %o5, [$out + 0x98] ! k[38, 39] + `&ROTL128(17)` + stx %o4, [$out + 0xb0] ! k[44, 45] + stx %o5, [$out + 0xb8] ! k[46, 47] + + mov 3, $tmp + st $tmp, [$out + 0x110] + retl + xor %o0, %o0, %o0 + +.align 16 +.L256key: + ldd [%o4 + 32], %f24 + ldd [%o4 + 40], %f26 + + std %f0, [$out + 0x30] ! k[12, 13] + std %f2, [$out + 0x38] ! k[14, 15] + + fxor %f4, %f0, %f0 + fxor %f6, %f2, %f2 + camellia_f %f24, %f2, %f0, %f2 + camellia_f %f26, %f0, %f2, %f0 + + std %f0, [$out + 0x10] ! k[ 4, 5] + std %f2, [$out + 0x18] ! k[ 6, 7] + + movdtox %f0, %o4 + movdtox %f2, %o5 + `&ROTL128(30)` + stx %o4, [$out + 0x50] ! k[20, 21] + stx %o5, [$out + 0x58] ! k[22, 23] + `&ROTL128(30)` + stx %o4, [$out + 0xa0] ! k[40, 41] + stx %o5, [$out + 0xa8] ! k[42, 43] + `&ROTL128(51)` + stx %o4, [$out + 0x100] ! k[64, 65] + stx %o5, [$out + 0x108] ! k[66, 67] + + movdtox %f4, %o4 ! k[ 8, 9] + movdtox %f6, %o5 ! k[10, 11] + `&ROTL128(15)` + stx %o4, [$out + 0x20] ! k[ 8, 9] + stx %o5, [$out + 0x28] ! k[10, 11] + `&ROTL128(15)` + stx %o4, [$out + 0x40] ! k[16, 17] + stx %o5, [$out + 0x48] ! k[18, 19] + `&ROTL128(30)` + stx %o4, [$out + 0x90] ! k[36, 37] + stx %o5, [$out + 0x98] ! k[38, 39] + `&ROTL128(34)` + stx %o4, [$out + 0xd0] ! k[52, 53] + stx %o5, [$out + 0xd8] ! k[54, 55] + ldx [$out + 0x30], %o4 ! k[12, 13] + ldx [$out + 0x38], %o5 ! k[14, 15] + `&ROTL128(15)` + stx %o4, [$out + 0x30] ! k[12, 13] + stx %o5, [$out + 0x38] ! k[14, 15] + `&ROTL128(30)` + stx %o4, [$out + 0x70] ! k[28, 29] + stx %o5, [$out + 0x78] ! k[30, 31] + srlx %o4, 32, %g4 + srlx %o5, 32, %g5 + st %o4, [$out + 0xc0] ! k[48] + st %g5, [$out + 0xc4] ! k[49] + st %o5, [$out + 0xc8] ! k[50] + st %g4, [$out + 0xcc] ! k[51] + `&ROTL128(49)` + stx %o4, [$out + 0xe0] ! k[56, 57] + stx %o5, [$out + 0xe8] ! k[58, 59] + + movdtox %f28, %o4 ! k[ 0, 1] + movdtox %f30, %o5 ! k[ 2, 3] + `&ROTL128(45)` + stx %o4, [$out + 0x60] ! k[24, 25] + stx %o5, [$out + 0x68] ! k[26, 27] + `&ROTL128(15)` + stx %o4, [$out + 0x80] ! k[32, 33] + stx %o5, [$out + 0x88] ! k[34, 35] + `&ROTL128(17)` + stx %o4, [$out + 0xb0] ! k[44, 45] + stx %o5, [$out + 0xb8] ! k[46, 47] + `&ROTL128(34)` + stx %o4, [$out + 0xf0] ! k[60, 61] + stx %o5, [$out + 0xf8] ! k[62, 63] + + mov 4, $tmp + st $tmp, [$out + 0x110] + retl + xor %o0, %o0, %o0 +.type cmll_t4_set_key,#function +.size cmll_t4_set_key,.-cmll_t4_set_key +.align 32 +SIGMA: + .long 0xa09e667f, 0x3bcc908b, 0xb67ae858, 0x4caa73b2 + .long 0xc6ef372f, 0xe94f82be, 0x54ff53a5, 0xf1d36f1c + .long 0x10e527fa, 0xde682d1d, 0xb05688c2, 0xb3e6c1fd +.type SIGMA,#object +.size SIGMA,.-SIGMA +.asciz "Camellia for SPARC T4, David S. Miller, Andy Polyakov" +___ +} + +{{{ +my ($inp,$out,$len,$key,$ivec,$enc)=map("%i$_",(0..5)); +my ($ileft,$iright,$ooff,$omask,$ivoff)=map("%l$_",(1..7)); + +$code.=<<___; +.align 32 +_cmll128_load_enckey: + ldx [$key + 0], %g4 + ldx [$key + 8], %g5 +___ +for ($i=2; $i<26;$i++) { # load key schedule + $code.=<<___; + ldd [$key + `8*$i`], %f`12+2*$i` +___ +} +$code.=<<___; + retl + nop +.type _cmll128_load_enckey,#function +.size _cmll128_load_enckey,.-_cmll128_load_enckey +_cmll256_load_enckey=_cmll128_load_enckey + +.align 32 +_cmll256_load_deckey: + ldd [$key + 64], %f62 + ldd [$key + 72], %f60 + b .Load_deckey + add $key, 64, $key +_cmll128_load_deckey: + ldd [$key + 0], %f60 + ldd [$key + 8], %f62 +.Load_deckey: +___ +for ($i=2; $i<24;$i++) { # load key schedule + $code.=<<___; + ldd [$key + `8*$i`], %f`62-2*$i` +___ +} +$code.=<<___; + ldx [$key + 192], %g4 + retl + ldx [$key + 200], %g5 +.type _cmll256_load_deckey,#function +.size _cmll256_load_deckey,.-_cmll256_load_deckey + +.align 32 +_cmll128_encrypt_1x: +___ +for ($i=0; $i<3; $i++) { + $code.=<<___; + camellia_f %f`16+16*$i+0`, %f2, %f0, %f2 + camellia_f %f`16+16*$i+2`, %f0, %f2, %f0 + camellia_f %f`16+16*$i+4`, %f2, %f0, %f2 + camellia_f %f`16+16*$i+6`, %f0, %f2, %f0 +___ +$code.=<<___ if ($i<2); + camellia_f %f`16+16*$i+8`, %f2, %f0, %f2 + camellia_f %f`16+16*$i+10`, %f0, %f2, %f0 + camellia_fl %f`16+16*$i+12`, %f0, %f0 + camellia_fli %f`16+16*$i+14`, %f2, %f2 +___ +} +$code.=<<___; + camellia_f %f56, %f2, %f0, %f4 + camellia_f %f58, %f0, %f4, %f2 + fxor %f60, %f4, %f0 + retl + fxor %f62, %f2, %f2 +.type _cmll128_encrypt_1x,#function +.size _cmll128_encrypt_1x,.-_cmll128_encrypt_1x +_cmll128_decrypt_1x=_cmll128_encrypt_1x + +.align 32 +_cmll128_encrypt_2x: +___ +for ($i=0; $i<3; $i++) { + $code.=<<___; + camellia_f %f`16+16*$i+0`, %f2, %f0, %f2 + camellia_f %f`16+16*$i+0`, %f6, %f4, %f6 + camellia_f %f`16+16*$i+2`, %f0, %f2, %f0 + camellia_f %f`16+16*$i+2`, %f4, %f6, %f4 + camellia_f %f`16+16*$i+4`, %f2, %f0, %f2 + camellia_f %f`16+16*$i+4`, %f6, %f4, %f6 + camellia_f %f`16+16*$i+6`, %f0, %f2, %f0 + camellia_f %f`16+16*$i+6`, %f4, %f6, %f4 +___ +$code.=<<___ if ($i<2); + camellia_f %f`16+16*$i+8`, %f2, %f0, %f2 + camellia_f %f`16+16*$i+8`, %f6, %f4, %f6 + camellia_f %f`16+16*$i+10`, %f0, %f2, %f0 + camellia_f %f`16+16*$i+10`, %f4, %f6, %f4 + camellia_fl %f`16+16*$i+12`, %f0, %f0 + camellia_fl %f`16+16*$i+12`, %f4, %f4 + camellia_fli %f`16+16*$i+14`, %f2, %f2 + camellia_fli %f`16+16*$i+14`, %f6, %f6 +___ +} +$code.=<<___; + camellia_f %f56, %f2, %f0, %f8 + camellia_f %f56, %f6, %f4, %f10 + camellia_f %f58, %f0, %f8, %f2 + camellia_f %f58, %f4, %f10, %f6 + fxor %f60, %f8, %f0 + fxor %f60, %f10, %f4 + fxor %f62, %f2, %f2 + retl + fxor %f62, %f6, %f6 +.type _cmll128_encrypt_2x,#function +.size _cmll128_encrypt_2x,.-_cmll128_encrypt_2x +_cmll128_decrypt_2x=_cmll128_encrypt_2x + +.align 32 +_cmll256_encrypt_1x: + camellia_f %f16, %f2, %f0, %f2 + camellia_f %f18, %f0, %f2, %f0 + ldd [$key + 208], %f16 + ldd [$key + 216], %f18 + camellia_f %f20, %f2, %f0, %f2 + camellia_f %f22, %f0, %f2, %f0 + ldd [$key + 224], %f20 + ldd [$key + 232], %f22 + camellia_f %f24, %f2, %f0, %f2 + camellia_f %f26, %f0, %f2, %f0 + ldd [$key + 240], %f24 + ldd [$key + 248], %f26 + camellia_fl %f28, %f0, %f0 + camellia_fli %f30, %f2, %f2 + ldd [$key + 256], %f28 + ldd [$key + 264], %f30 +___ +for ($i=1; $i<3; $i++) { + $code.=<<___; + camellia_f %f`16+16*$i+0`, %f2, %f0, %f2 + camellia_f %f`16+16*$i+2`, %f0, %f2, %f0 + camellia_f %f`16+16*$i+4`, %f2, %f0, %f2 + camellia_f %f`16+16*$i+6`, %f0, %f2, %f0 + camellia_f %f`16+16*$i+8`, %f2, %f0, %f2 + camellia_f %f`16+16*$i+10`, %f0, %f2, %f0 + camellia_fl %f`16+16*$i+12`, %f0, %f0 + camellia_fli %f`16+16*$i+14`, %f2, %f2 +___ +} +$code.=<<___; + camellia_f %f16, %f2, %f0, %f2 + camellia_f %f18, %f0, %f2, %f0 + ldd [$key + 16], %f16 + ldd [$key + 24], %f18 + camellia_f %f20, %f2, %f0, %f2 + camellia_f %f22, %f0, %f2, %f0 + ldd [$key + 32], %f20 + ldd [$key + 40], %f22 + camellia_f %f24, %f2, %f0, %f4 + camellia_f %f26, %f0, %f4, %f2 + ldd [$key + 48], %f24 + ldd [$key + 56], %f26 + fxor %f28, %f4, %f0 + fxor %f30, %f2, %f2 + ldd [$key + 64], %f28 + retl + ldd [$key + 72], %f30 +.type _cmll256_encrypt_1x,#function +.size _cmll256_encrypt_1x,.-_cmll256_encrypt_1x + +.align 32 +_cmll256_encrypt_2x: + camellia_f %f16, %f2, %f0, %f2 + camellia_f %f16, %f6, %f4, %f6 + camellia_f %f18, %f0, %f2, %f0 + camellia_f %f18, %f4, %f6, %f4 + ldd [$key + 208], %f16 + ldd [$key + 216], %f18 + camellia_f %f20, %f2, %f0, %f2 + camellia_f %f20, %f6, %f4, %f6 + camellia_f %f22, %f0, %f2, %f0 + camellia_f %f22, %f4, %f6, %f4 + ldd [$key + 224], %f20 + ldd [$key + 232], %f22 + camellia_f %f24, %f2, %f0, %f2 + camellia_f %f24, %f6, %f4, %f6 + camellia_f %f26, %f0, %f2, %f0 + camellia_f %f26, %f4, %f6, %f4 + ldd [$key + 240], %f24 + ldd [$key + 248], %f26 + camellia_fl %f28, %f0, %f0 + camellia_fl %f28, %f4, %f4 + camellia_fli %f30, %f2, %f2 + camellia_fli %f30, %f6, %f6 + ldd [$key + 256], %f28 + ldd [$key + 264], %f30 +___ +for ($i=1; $i<3; $i++) { + $code.=<<___; + camellia_f %f`16+16*$i+0`, %f2, %f0, %f2 + camellia_f %f`16+16*$i+0`, %f6, %f4, %f6 + camellia_f %f`16+16*$i+2`, %f0, %f2, %f0 + camellia_f %f`16+16*$i+2`, %f4, %f6, %f4 + camellia_f %f`16+16*$i+4`, %f2, %f0, %f2 + camellia_f %f`16+16*$i+4`, %f6, %f4, %f6 + camellia_f %f`16+16*$i+6`, %f0, %f2, %f0 + camellia_f %f`16+16*$i+6`, %f4, %f6, %f4 + camellia_f %f`16+16*$i+8`, %f2, %f0, %f2 + camellia_f %f`16+16*$i+8`, %f6, %f4, %f6 + camellia_f %f`16+16*$i+10`, %f0, %f2, %f0 + camellia_f %f`16+16*$i+10`, %f4, %f6, %f4 + camellia_fl %f`16+16*$i+12`, %f0, %f0 + camellia_fl %f`16+16*$i+12`, %f4, %f4 + camellia_fli %f`16+16*$i+14`, %f2, %f2 + camellia_fli %f`16+16*$i+14`, %f6, %f6 +___ +} +$code.=<<___; + camellia_f %f16, %f2, %f0, %f2 + camellia_f %f16, %f6, %f4, %f6 + camellia_f %f18, %f0, %f2, %f0 + camellia_f %f18, %f4, %f6, %f4 + ldd [$key + 16], %f16 + ldd [$key + 24], %f18 + camellia_f %f20, %f2, %f0, %f2 + camellia_f %f20, %f6, %f4, %f6 + camellia_f %f22, %f0, %f2, %f0 + camellia_f %f22, %f4, %f6, %f4 + ldd [$key + 32], %f20 + ldd [$key + 40], %f22 + camellia_f %f24, %f2, %f0, %f8 + camellia_f %f24, %f6, %f4, %f10 + camellia_f %f26, %f0, %f8, %f2 + camellia_f %f26, %f4, %f10, %f6 + ldd [$key + 48], %f24 + ldd [$key + 56], %f26 + fxor %f28, %f8, %f0 + fxor %f28, %f10, %f4 + fxor %f30, %f2, %f2 + fxor %f30, %f6, %f6 + ldd [$key + 64], %f28 + retl + ldd [$key + 72], %f30 +.type _cmll256_encrypt_2x,#function +.size _cmll256_encrypt_2x,.-_cmll256_encrypt_2x + +.align 32 +_cmll256_decrypt_1x: + camellia_f %f16, %f2, %f0, %f2 + camellia_f %f18, %f0, %f2, %f0 + ldd [$key - 8], %f16 + ldd [$key - 16], %f18 + camellia_f %f20, %f2, %f0, %f2 + camellia_f %f22, %f0, %f2, %f0 + ldd [$key - 24], %f20 + ldd [$key - 32], %f22 + camellia_f %f24, %f2, %f0, %f2 + camellia_f %f26, %f0, %f2, %f0 + ldd [$key - 40], %f24 + ldd [$key - 48], %f26 + camellia_fl %f28, %f0, %f0 + camellia_fli %f30, %f2, %f2 + ldd [$key - 56], %f28 + ldd [$key - 64], %f30 +___ +for ($i=1; $i<3; $i++) { + $code.=<<___; + camellia_f %f`16+16*$i+0`, %f2, %f0, %f2 + camellia_f %f`16+16*$i+2`, %f0, %f2, %f0 + camellia_f %f`16+16*$i+4`, %f2, %f0, %f2 + camellia_f %f`16+16*$i+6`, %f0, %f2, %f0 + camellia_f %f`16+16*$i+8`, %f2, %f0, %f2 + camellia_f %f`16+16*$i+10`, %f0, %f2, %f0 + camellia_fl %f`16+16*$i+12`, %f0, %f0 + camellia_fli %f`16+16*$i+14`, %f2, %f2 +___ +} +$code.=<<___; + camellia_f %f16, %f2, %f0, %f2 + camellia_f %f18, %f0, %f2, %f0 + ldd [$key + 184], %f16 + ldd [$key + 176], %f18 + camellia_f %f20, %f2, %f0, %f2 + camellia_f %f22, %f0, %f2, %f0 + ldd [$key + 168], %f20 + ldd [$key + 160], %f22 + camellia_f %f24, %f2, %f0, %f4 + camellia_f %f26, %f0, %f4, %f2 + ldd [$key + 152], %f24 + ldd [$key + 144], %f26 + fxor %f30, %f4, %f0 + fxor %f28, %f2, %f2 + ldd [$key + 136], %f28 + retl + ldd [$key + 128], %f30 +.type _cmll256_decrypt_1x,#function +.size _cmll256_decrypt_1x,.-_cmll256_decrypt_1x + +.align 32 +_cmll256_decrypt_2x: + camellia_f %f16, %f2, %f0, %f2 + camellia_f %f16, %f6, %f4, %f6 + camellia_f %f18, %f0, %f2, %f0 + camellia_f %f18, %f4, %f6, %f4 + ldd [$key - 8], %f16 + ldd [$key - 16], %f18 + camellia_f %f20, %f2, %f0, %f2 + camellia_f %f20, %f6, %f4, %f6 + camellia_f %f22, %f0, %f2, %f0 + camellia_f %f22, %f4, %f6, %f4 + ldd [$key - 24], %f20 + ldd [$key - 32], %f22 + camellia_f %f24, %f2, %f0, %f2 + camellia_f %f24, %f6, %f4, %f6 + camellia_f %f26, %f0, %f2, %f0 + camellia_f %f26, %f4, %f6, %f4 + ldd [$key - 40], %f24 + ldd [$key - 48], %f26 + camellia_fl %f28, %f0, %f0 + camellia_fl %f28, %f4, %f4 + camellia_fli %f30, %f2, %f2 + camellia_fli %f30, %f6, %f6 + ldd [$key - 56], %f28 + ldd [$key - 64], %f30 +___ +for ($i=1; $i<3; $i++) { + $code.=<<___; + camellia_f %f`16+16*$i+0`, %f2, %f0, %f2 + camellia_f %f`16+16*$i+0`, %f6, %f4, %f6 + camellia_f %f`16+16*$i+2`, %f0, %f2, %f0 + camellia_f %f`16+16*$i+2`, %f4, %f6, %f4 + camellia_f %f`16+16*$i+4`, %f2, %f0, %f2 + camellia_f %f`16+16*$i+4`, %f6, %f4, %f6 + camellia_f %f`16+16*$i+6`, %f0, %f2, %f0 + camellia_f %f`16+16*$i+6`, %f4, %f6, %f4 + camellia_f %f`16+16*$i+8`, %f2, %f0, %f2 + camellia_f %f`16+16*$i+8`, %f6, %f4, %f6 + camellia_f %f`16+16*$i+10`, %f0, %f2, %f0 + camellia_f %f`16+16*$i+10`, %f4, %f6, %f4 + camellia_fl %f`16+16*$i+12`, %f0, %f0 + camellia_fl %f`16+16*$i+12`, %f4, %f4 + camellia_fli %f`16+16*$i+14`, %f2, %f2 + camellia_fli %f`16+16*$i+14`, %f6, %f6 +___ +} +$code.=<<___; + camellia_f %f16, %f2, %f0, %f2 + camellia_f %f16, %f6, %f4, %f6 + camellia_f %f18, %f0, %f2, %f0 + camellia_f %f18, %f4, %f6, %f4 + ldd [$key + 184], %f16 + ldd [$key + 176], %f18 + camellia_f %f20, %f2, %f0, %f2 + camellia_f %f20, %f6, %f4, %f6 + camellia_f %f22, %f0, %f2, %f0 + camellia_f %f22, %f4, %f6, %f4 + ldd [$key + 168], %f20 + ldd [$key + 160], %f22 + camellia_f %f24, %f2, %f0, %f8 + camellia_f %f24, %f6, %f4, %f10 + camellia_f %f26, %f0, %f8, %f2 + camellia_f %f26, %f4, %f10, %f6 + ldd [$key + 152], %f24 + ldd [$key + 144], %f26 + fxor %f30, %f8, %f0 + fxor %f30, %f10, %f4 + fxor %f28, %f2, %f2 + fxor %f28, %f6, %f6 + ldd [$key + 136], %f28 + retl + ldd [$key + 128], %f30 +.type _cmll256_decrypt_2x,#function +.size _cmll256_decrypt_2x,.-_cmll256_decrypt_2x +___ + +&alg_cbc_encrypt_implement("cmll",128); +&alg_cbc_encrypt_implement("cmll",256); + +&alg_cbc_decrypt_implement("cmll",128); +&alg_cbc_decrypt_implement("cmll",256); + +if ($::evp) { + &alg_ctr32_implement("cmll",128); + &alg_ctr32_implement("cmll",256); +} +}}} + +if (!$::evp) { +$code.=<<___; +.global Camellia_encrypt +Camellia_encrypt=cmll_t4_encrypt +.global Camellia_decrypt +Camellia_decrypt=cmll_t4_decrypt +.global Camellia_set_key +.align 32 +Camellia_set_key: + andcc %o2, 7, %g0 ! double-check alignment + bnz,a,pn %icc, 1f + mov -1, %o0 + brz,a,pn %o0, 1f + mov -1, %o0 + brz,a,pn %o2, 1f + mov -1, %o0 + andncc %o1, 0x1c0, %g0 + bnz,a,pn %icc, 1f + mov -2, %o0 + cmp %o1, 128 + bl,a,pn %icc, 1f + mov -2, %o0 + b cmll_t4_set_key + nop +1: retl + nop +.type Camellia_set_key,#function +.size Camellia_set_key,.-Camellia_set_key +___ + +my ($inp,$out,$len,$key,$ivec,$enc)=map("%o$_",(0..5)); + +$code.=<<___; +.globl Camellia_cbc_encrypt +.align 32 +Camellia_cbc_encrypt: + ld [$key + 272], %g1 + nop + brz $enc, .Lcbc_decrypt + cmp %g1, 3 + + be,pt %icc, cmll128_t4_cbc_encrypt + nop + ba cmll256_t4_cbc_encrypt + nop + +.Lcbc_decrypt: + be,pt %icc, cmll128_t4_cbc_decrypt + nop + ba cmll256_t4_cbc_decrypt + nop +.type Camellia_cbc_encrypt,#function +.size Camellia_cbc_encrypt,.-Camellia_cbc_encrypt +___ +} + +&emit_assembler(); + +close STDOUT; diff --git a/deps/openssl/openssl/crypto/cast/cast_lcl.h b/deps/openssl/openssl/crypto/cast/cast_lcl.h index 7c4ad41b215748..b0f08294e37356 100644 --- a/deps/openssl/openssl/crypto/cast/cast_lcl.h +++ b/deps/openssl/openssl/crypto/cast/cast_lcl.h @@ -152,6 +152,8 @@ #if defined(OPENSSL_SYS_WIN32) && defined(_MSC_VER) # define ROTL(a,n) (_lrotl(a,n)) +#elif defined(PEDANTIC) +# define ROTL(a,n) ((((a)<<(n))&0xffffffffL)|((a)>>((32-(n))&31))) #else # define ROTL(a,n) ((((a)<<(n))&0xffffffffL)|((a)>>(32-(n)))) #endif diff --git a/deps/openssl/openssl/crypto/cms/Makefile b/deps/openssl/openssl/crypto/cms/Makefile index 9820adb21278bf..644fef399ee989 100644 --- a/deps/openssl/openssl/crypto/cms/Makefile +++ b/deps/openssl/openssl/crypto/cms/Makefile @@ -19,10 +19,10 @@ APPS= LIB=$(TOP)/libcrypto.a LIBSRC= cms_lib.c cms_asn1.c cms_att.c cms_io.c cms_smime.c cms_err.c \ cms_sd.c cms_dd.c cms_cd.c cms_env.c cms_enc.c cms_ess.c \ - cms_pwri.c + cms_pwri.c cms_kari.c LIBOBJ= cms_lib.o cms_asn1.o cms_att.o cms_io.o cms_smime.o cms_err.o \ cms_sd.o cms_dd.o cms_cd.o cms_env.o cms_enc.o cms_ess.o \ - cms_pwri.o + cms_pwri.o cms_kari.o SRC= $(LIBSRC) @@ -218,20 +218,39 @@ cms_io.o: ../../include/openssl/safestack.h ../../include/openssl/sha.h cms_io.o: ../../include/openssl/stack.h ../../include/openssl/symhacks.h cms_io.o: ../../include/openssl/x509.h ../../include/openssl/x509_vfy.h cms.h cms_io.o: cms_io.c cms_lcl.h +cms_kari.o: ../../e_os.h ../../include/openssl/aes.h +cms_kari.o: ../../include/openssl/asn1.h ../../include/openssl/asn1t.h +cms_kari.o: ../../include/openssl/bio.h ../../include/openssl/buffer.h +cms_kari.o: ../../include/openssl/cms.h ../../include/openssl/conf.h +cms_kari.o: ../../include/openssl/crypto.h ../../include/openssl/e_os2.h +cms_kari.o: ../../include/openssl/ec.h ../../include/openssl/ecdh.h +cms_kari.o: ../../include/openssl/ecdsa.h ../../include/openssl/err.h +cms_kari.o: ../../include/openssl/evp.h ../../include/openssl/lhash.h +cms_kari.o: ../../include/openssl/obj_mac.h ../../include/openssl/objects.h +cms_kari.o: ../../include/openssl/opensslconf.h +cms_kari.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h +cms_kari.o: ../../include/openssl/pem.h ../../include/openssl/pem2.h +cms_kari.o: ../../include/openssl/pkcs7.h ../../include/openssl/rand.h +cms_kari.o: ../../include/openssl/safestack.h ../../include/openssl/sha.h +cms_kari.o: ../../include/openssl/stack.h ../../include/openssl/symhacks.h +cms_kari.o: ../../include/openssl/x509.h ../../include/openssl/x509_vfy.h +cms_kari.o: ../../include/openssl/x509v3.h ../asn1/asn1_locl.h ../cryptlib.h +cms_kari.o: cms_kari.c cms_lcl.h cms_lib.o: ../../include/openssl/asn1.h ../../include/openssl/asn1t.h cms_lib.o: ../../include/openssl/bio.h ../../include/openssl/buffer.h -cms_lib.o: ../../include/openssl/crypto.h ../../include/openssl/e_os2.h -cms_lib.o: ../../include/openssl/ec.h ../../include/openssl/ecdh.h -cms_lib.o: ../../include/openssl/ecdsa.h ../../include/openssl/err.h -cms_lib.o: ../../include/openssl/evp.h ../../include/openssl/lhash.h -cms_lib.o: ../../include/openssl/obj_mac.h ../../include/openssl/objects.h -cms_lib.o: ../../include/openssl/opensslconf.h ../../include/openssl/opensslv.h -cms_lib.o: ../../include/openssl/ossl_typ.h ../../include/openssl/pem.h -cms_lib.o: ../../include/openssl/pem2.h ../../include/openssl/pkcs7.h -cms_lib.o: ../../include/openssl/safestack.h ../../include/openssl/sha.h -cms_lib.o: ../../include/openssl/stack.h ../../include/openssl/symhacks.h -cms_lib.o: ../../include/openssl/x509.h ../../include/openssl/x509_vfy.h cms.h -cms_lib.o: cms_lcl.h cms_lib.c +cms_lib.o: ../../include/openssl/conf.h ../../include/openssl/crypto.h +cms_lib.o: ../../include/openssl/e_os2.h ../../include/openssl/ec.h +cms_lib.o: ../../include/openssl/ecdh.h ../../include/openssl/ecdsa.h +cms_lib.o: ../../include/openssl/err.h ../../include/openssl/evp.h +cms_lib.o: ../../include/openssl/lhash.h ../../include/openssl/obj_mac.h +cms_lib.o: ../../include/openssl/objects.h ../../include/openssl/opensslconf.h +cms_lib.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h +cms_lib.o: ../../include/openssl/pem.h ../../include/openssl/pem2.h +cms_lib.o: ../../include/openssl/pkcs7.h ../../include/openssl/safestack.h +cms_lib.o: ../../include/openssl/sha.h ../../include/openssl/stack.h +cms_lib.o: ../../include/openssl/symhacks.h ../../include/openssl/x509.h +cms_lib.o: ../../include/openssl/x509_vfy.h ../../include/openssl/x509v3.h +cms_lib.o: cms.h cms_lcl.h cms_lib.c cms_pwri.o: ../../e_os.h ../../include/openssl/aes.h cms_pwri.o: ../../include/openssl/asn1.h ../../include/openssl/asn1t.h cms_pwri.o: ../../include/openssl/bio.h ../../include/openssl/buffer.h @@ -281,4 +300,4 @@ cms_smime.o: ../../include/openssl/pkcs7.h ../../include/openssl/safestack.h cms_smime.o: ../../include/openssl/sha.h ../../include/openssl/stack.h cms_smime.o: ../../include/openssl/symhacks.h ../../include/openssl/x509.h cms_smime.o: ../../include/openssl/x509_vfy.h ../../include/openssl/x509v3.h -cms_smime.o: ../cryptlib.h cms_lcl.h cms_smime.c +cms_smime.o: ../asn1/asn1_locl.h ../cryptlib.h cms_lcl.h cms_smime.c diff --git a/deps/openssl/openssl/crypto/cms/cms.h b/deps/openssl/openssl/crypto/cms/cms.h index a2281eda5b0a9e..e6c7f964bf48c0 100644 --- a/deps/openssl/openssl/crypto/cms/cms.h +++ b/deps/openssl/openssl/crypto/cms/cms.h @@ -72,9 +72,12 @@ typedef struct CMS_RevocationInfoChoice_st CMS_RevocationInfoChoice; typedef struct CMS_RecipientInfo_st CMS_RecipientInfo; typedef struct CMS_ReceiptRequest_st CMS_ReceiptRequest; typedef struct CMS_Receipt_st CMS_Receipt; +typedef struct CMS_RecipientEncryptedKey_st CMS_RecipientEncryptedKey; +typedef struct CMS_OtherKeyAttribute_st CMS_OtherKeyAttribute; DECLARE_STACK_OF(CMS_SignerInfo) DECLARE_STACK_OF(GENERAL_NAMES) +DECLARE_STACK_OF(CMS_RecipientEncryptedKey) DECLARE_ASN1_FUNCTIONS(CMS_ContentInfo) DECLARE_ASN1_FUNCTIONS(CMS_ReceiptRequest) DECLARE_ASN1_PRINT_FUNCTION(CMS_ContentInfo) @@ -82,6 +85,7 @@ DECLARE_ASN1_PRINT_FUNCTION(CMS_ContentInfo) # define CMS_SIGNERINFO_ISSUER_SERIAL 0 # define CMS_SIGNERINFO_KEYIDENTIFIER 1 +# define CMS_RECIPINFO_NONE -1 # define CMS_RECIPINFO_TRANS 0 # define CMS_RECIPINFO_AGREE 1 # define CMS_RECIPINFO_KEK 2 @@ -111,6 +115,7 @@ DECLARE_ASN1_PRINT_FUNCTION(CMS_ContentInfo) # define CMS_REUSE_DIGEST 0x8000 # define CMS_USE_KEYID 0x10000 # define CMS_DEBUG_DECRYPT 0x20000 +# define CMS_KEY_PARAM 0x40000 const ASN1_OBJECT *CMS_get0_type(CMS_ContentInfo *cms); @@ -189,6 +194,7 @@ int CMS_decrypt_set1_password(CMS_ContentInfo *cms, STACK_OF(CMS_RecipientInfo) *CMS_get0_RecipientInfos(CMS_ContentInfo *cms); int CMS_RecipientInfo_type(CMS_RecipientInfo *ri); +EVP_PKEY_CTX *CMS_RecipientInfo_get0_pkey_ctx(CMS_RecipientInfo *ri); CMS_ContentInfo *CMS_EnvelopedData_create(const EVP_CIPHER *cipher); CMS_RecipientInfo *CMS_add1_recipient_cert(CMS_ContentInfo *cms, X509 *recip, unsigned int flags); @@ -234,6 +240,7 @@ CMS_RecipientInfo *CMS_add0_recipient_password(CMS_ContentInfo *cms, const EVP_CIPHER *kekciph); int CMS_RecipientInfo_decrypt(CMS_ContentInfo *cms, CMS_RecipientInfo *ri); +int CMS_RecipientInfo_encrypt(CMS_ContentInfo *cms, CMS_RecipientInfo *ri); int CMS_uncompress(CMS_ContentInfo *cms, BIO *dcont, BIO *out, unsigned int flags); @@ -256,6 +263,8 @@ int CMS_SignedData_init(CMS_ContentInfo *cms); CMS_SignerInfo *CMS_add1_signer(CMS_ContentInfo *cms, X509 *signer, EVP_PKEY *pk, const EVP_MD *md, unsigned int flags); +EVP_PKEY_CTX *CMS_SignerInfo_get0_pkey_ctx(CMS_SignerInfo *si); +EVP_MD_CTX *CMS_SignerInfo_get0_md_ctx(CMS_SignerInfo *si); STACK_OF(CMS_SignerInfo) *CMS_get0_SignerInfos(CMS_ContentInfo *cms); void CMS_SignerInfo_set1_signer_cert(CMS_SignerInfo *si, X509 *signer); @@ -268,6 +277,7 @@ int CMS_set1_signers_certs(CMS_ContentInfo *cms, STACK_OF(X509) *certs, void CMS_SignerInfo_get0_algs(CMS_SignerInfo *si, EVP_PKEY **pk, X509 **signer, X509_ALGOR **pdig, X509_ALGOR **psig); +ASN1_OCTET_STRING *CMS_SignerInfo_get0_signature(CMS_SignerInfo *si); int CMS_SignerInfo_sign(CMS_SignerInfo *si); int CMS_SignerInfo_verify(CMS_SignerInfo *si); int CMS_SignerInfo_verify_content(CMS_SignerInfo *si, BIO *chain); @@ -331,8 +341,37 @@ void CMS_ReceiptRequest_get0_values(CMS_ReceiptRequest *rr, int *pallorfirst, STACK_OF(GENERAL_NAMES) **plist, STACK_OF(GENERAL_NAMES) **prto); - # endif +int CMS_RecipientInfo_kari_get0_alg(CMS_RecipientInfo *ri, + X509_ALGOR **palg, + ASN1_OCTET_STRING **pukm); +STACK_OF(CMS_RecipientEncryptedKey) +*CMS_RecipientInfo_kari_get0_reks(CMS_RecipientInfo *ri); + +int CMS_RecipientInfo_kari_get0_orig_id(CMS_RecipientInfo *ri, + X509_ALGOR **pubalg, + ASN1_BIT_STRING **pubkey, + ASN1_OCTET_STRING **keyid, + X509_NAME **issuer, + ASN1_INTEGER **sno); + +int CMS_RecipientInfo_kari_orig_id_cmp(CMS_RecipientInfo *ri, X509 *cert); + +int CMS_RecipientEncryptedKey_get0_id(CMS_RecipientEncryptedKey *rek, + ASN1_OCTET_STRING **keyid, + ASN1_GENERALIZEDTIME **tm, + CMS_OtherKeyAttribute **other, + X509_NAME **issuer, ASN1_INTEGER **sno); +int CMS_RecipientEncryptedKey_cert_cmp(CMS_RecipientEncryptedKey *rek, + X509 *cert); +int CMS_RecipientInfo_kari_set0_pkey(CMS_RecipientInfo *ri, EVP_PKEY *pk); +EVP_CIPHER_CTX *CMS_RecipientInfo_kari_get0_ctx(CMS_RecipientInfo *ri); +int CMS_RecipientInfo_kari_decrypt(CMS_ContentInfo *cms, + CMS_RecipientInfo *ri, + CMS_RecipientEncryptedKey *rek); + +int CMS_SharedInfo_encode(unsigned char **pder, X509_ALGOR *kekalg, + ASN1_OCTET_STRING *ukm, int keylen); /* BEGIN ERROR CODES */ /* @@ -377,6 +416,7 @@ void ERR_load_CMS_strings(void); # define CMS_F_CMS_ENVELOPEDDATA_CREATE 124 # define CMS_F_CMS_ENVELOPEDDATA_INIT_BIO 125 # define CMS_F_CMS_ENVELOPED_DATA_INIT 126 +# define CMS_F_CMS_ENV_ASN1_CTRL 171 # define CMS_F_CMS_FINAL 127 # define CMS_F_CMS_GET0_CERTIFICATE_CHOICES 128 # define CMS_F_CMS_GET0_CONTENT 129 @@ -388,6 +428,12 @@ void ERR_load_CMS_strings(void); # define CMS_F_CMS_RECEIPTREQUEST_CREATE0 159 # define CMS_F_CMS_RECEIPT_VERIFY 160 # define CMS_F_CMS_RECIPIENTINFO_DECRYPT 134 +# define CMS_F_CMS_RECIPIENTINFO_ENCRYPT 169 +# define CMS_F_CMS_RECIPIENTINFO_KARI_ENCRYPT 178 +# define CMS_F_CMS_RECIPIENTINFO_KARI_GET0_ALG 175 +# define CMS_F_CMS_RECIPIENTINFO_KARI_GET0_ORIG_ID 173 +# define CMS_F_CMS_RECIPIENTINFO_KARI_GET0_REKS 172 +# define CMS_F_CMS_RECIPIENTINFO_KARI_ORIG_ID_CMP 174 # define CMS_F_CMS_RECIPIENTINFO_KEKRI_DECRYPT 135 # define CMS_F_CMS_RECIPIENTINFO_KEKRI_ENCRYPT 136 # define CMS_F_CMS_RECIPIENTINFO_KEKRI_GET0_ID 137 @@ -401,6 +447,9 @@ void ERR_load_CMS_strings(void); # define CMS_F_CMS_RECIPIENTINFO_SET0_KEY 144 # define CMS_F_CMS_RECIPIENTINFO_SET0_PASSWORD 168 # define CMS_F_CMS_RECIPIENTINFO_SET0_PKEY 145 +# define CMS_F_CMS_SD_ASN1_CTRL 170 +# define CMS_F_CMS_SET1_IAS 176 +# define CMS_F_CMS_SET1_KEYID 177 # define CMS_F_CMS_SET1_SIGNERIDENTIFIER 146 # define CMS_F_CMS_SET_DETACHED 147 # define CMS_F_CMS_SIGN 148 @@ -452,6 +501,7 @@ void ERR_load_CMS_strings(void); # define CMS_R_NOT_A_SIGNED_RECEIPT 165 # define CMS_R_NOT_ENCRYPTED_DATA 122 # define CMS_R_NOT_KEK 123 +# define CMS_R_NOT_KEY_AGREEMENT 181 # define CMS_R_NOT_KEY_TRANSPORT 124 # define CMS_R_NOT_PWRI 177 # define CMS_R_NOT_SUPPORTED_FOR_THIS_KEY_TYPE 125 diff --git a/deps/openssl/openssl/crypto/cms/cms_asn1.c b/deps/openssl/openssl/crypto/cms/cms_asn1.c index f9f267afdcd7c3..81a3407f12490b 100644 --- a/deps/openssl/openssl/crypto/cms/cms_asn1.c +++ b/deps/openssl/openssl/crypto/cms/cms_asn1.c @@ -97,6 +97,8 @@ static int cms_si_cb(int operation, ASN1_VALUE **pval, const ASN1_ITEM *it, EVP_PKEY_free(si->pkey); if (si->signer) X509_free(si->signer); + if (si->pctx) + EVP_MD_CTX_cleanup(&si->mctx); } return 1; } @@ -164,10 +166,21 @@ ASN1_CHOICE(CMS_KeyAgreeRecipientIdentifier) = { ASN1_IMP(CMS_KeyAgreeRecipientIdentifier, d.rKeyId, CMS_RecipientKeyIdentifier, 0) } ASN1_CHOICE_END(CMS_KeyAgreeRecipientIdentifier) -ASN1_SEQUENCE(CMS_RecipientEncryptedKey) = { +static int cms_rek_cb(int operation, ASN1_VALUE **pval, const ASN1_ITEM *it, + void *exarg) +{ + CMS_RecipientEncryptedKey *rek = (CMS_RecipientEncryptedKey *)*pval; + if (operation == ASN1_OP_FREE_POST) { + if (rek->pkey) + EVP_PKEY_free(rek->pkey); + } + return 1; +} + +ASN1_SEQUENCE_cb(CMS_RecipientEncryptedKey, cms_rek_cb) = { ASN1_SIMPLE(CMS_RecipientEncryptedKey, rid, CMS_KeyAgreeRecipientIdentifier), ASN1_SIMPLE(CMS_RecipientEncryptedKey, encryptedKey, ASN1_OCTET_STRING) -} ASN1_SEQUENCE_END(CMS_RecipientEncryptedKey) +} ASN1_SEQUENCE_END_cb(CMS_RecipientEncryptedKey, CMS_RecipientEncryptedKey) ASN1_SEQUENCE(CMS_OriginatorPublicKey) = { ASN1_SIMPLE(CMS_OriginatorPublicKey, algorithm, X509_ALGOR), @@ -180,13 +193,29 @@ ASN1_CHOICE(CMS_OriginatorIdentifierOrKey) = { ASN1_IMP(CMS_OriginatorIdentifierOrKey, d.originatorKey, CMS_OriginatorPublicKey, 1) } ASN1_CHOICE_END(CMS_OriginatorIdentifierOrKey) -ASN1_SEQUENCE(CMS_KeyAgreeRecipientInfo) = { +static int cms_kari_cb(int operation, ASN1_VALUE **pval, const ASN1_ITEM *it, + void *exarg) +{ + CMS_KeyAgreeRecipientInfo *kari = (CMS_KeyAgreeRecipientInfo *)*pval; + if (operation == ASN1_OP_NEW_POST) { + EVP_CIPHER_CTX_init(&kari->ctx); + EVP_CIPHER_CTX_set_flags(&kari->ctx, EVP_CIPHER_CTX_FLAG_WRAP_ALLOW); + kari->pctx = NULL; + } else if (operation == ASN1_OP_FREE_POST) { + if (kari->pctx) + EVP_PKEY_CTX_free(kari->pctx); + EVP_CIPHER_CTX_cleanup(&kari->ctx); + } + return 1; +} + +ASN1_SEQUENCE_cb(CMS_KeyAgreeRecipientInfo, cms_kari_cb) = { ASN1_SIMPLE(CMS_KeyAgreeRecipientInfo, version, LONG), ASN1_EXP(CMS_KeyAgreeRecipientInfo, originator, CMS_OriginatorIdentifierOrKey, 0), ASN1_EXP_OPT(CMS_KeyAgreeRecipientInfo, ukm, ASN1_OCTET_STRING, 1), ASN1_SIMPLE(CMS_KeyAgreeRecipientInfo, keyEncryptionAlgorithm, X509_ALGOR), ASN1_SEQUENCE_OF(CMS_KeyAgreeRecipientInfo, recipientEncryptedKeys, CMS_RecipientEncryptedKey) -} ASN1_SEQUENCE_END(CMS_KeyAgreeRecipientInfo) +} ASN1_SEQUENCE_END_cb(CMS_KeyAgreeRecipientInfo, CMS_KeyAgreeRecipientInfo) ASN1_SEQUENCE(CMS_KEKIdentifier) = { ASN1_SIMPLE(CMS_KEKIdentifier, keyIdentifier, ASN1_OCTET_STRING), @@ -225,6 +254,8 @@ static int cms_ri_cb(int operation, ASN1_VALUE **pval, const ASN1_ITEM *it, EVP_PKEY_free(ktri->pkey); if (ktri->recip) X509_free(ktri->recip); + if (ktri->pctx) + EVP_PKEY_CTX_free(ktri->pctx); } else if (ri->type == CMS_RECIPINFO_KEK) { CMS_KEKRecipientInfo *kekri = ri->d.kekri; if (kekri->key) { @@ -379,3 +410,50 @@ ASN1_SEQUENCE(CMS_Receipt) = { ASN1_SIMPLE(CMS_Receipt, signedContentIdentifier, ASN1_OCTET_STRING), ASN1_SIMPLE(CMS_Receipt, originatorSignatureValue, ASN1_OCTET_STRING) } ASN1_SEQUENCE_END(CMS_Receipt) + +/* + * Utilities to encode the CMS_SharedInfo structure used during key + * derivation. + */ + +typedef struct { + X509_ALGOR *keyInfo; + ASN1_OCTET_STRING *entityUInfo; + ASN1_OCTET_STRING *suppPubInfo; +} CMS_SharedInfo; + +ASN1_SEQUENCE(CMS_SharedInfo) = { + ASN1_SIMPLE(CMS_SharedInfo, keyInfo, X509_ALGOR), + ASN1_EXP_OPT(CMS_SharedInfo, entityUInfo, ASN1_OCTET_STRING, 0), + ASN1_EXP_OPT(CMS_SharedInfo, suppPubInfo, ASN1_OCTET_STRING, 2), +} ASN1_SEQUENCE_END(CMS_SharedInfo) + +int CMS_SharedInfo_encode(unsigned char **pder, X509_ALGOR *kekalg, + ASN1_OCTET_STRING *ukm, int keylen) +{ + union { + CMS_SharedInfo *pecsi; + ASN1_VALUE *a; + } intsi = { + NULL + }; + + ASN1_OCTET_STRING oklen; + unsigned char kl[4]; + CMS_SharedInfo ecsi; + + keylen <<= 3; + kl[0] = (keylen >> 24) & 0xff; + kl[1] = (keylen >> 16) & 0xff; + kl[2] = (keylen >> 8) & 0xff; + kl[3] = keylen & 0xff; + oklen.length = 4; + oklen.data = kl; + oklen.type = V_ASN1_OCTET_STRING; + oklen.flags = 0; + ecsi.keyInfo = kekalg; + ecsi.entityUInfo = ukm; + ecsi.suppPubInfo = &oklen; + intsi.pecsi = &ecsi; + return ASN1_item_i2d(intsi.a, pder, ASN1_ITEM_rptr(CMS_SharedInfo)); +} diff --git a/deps/openssl/openssl/crypto/cms/cms_env.c b/deps/openssl/openssl/crypto/cms/cms_env.c index 1c3046ce9238d6..93c06cb00a8f37 100644 --- a/deps/openssl/openssl/crypto/cms/cms_env.c +++ b/deps/openssl/openssl/crypto/cms/cms_env.c @@ -100,6 +100,36 @@ static CMS_EnvelopedData *cms_enveloped_data_init(CMS_ContentInfo *cms) return cms_get0_enveloped(cms); } +int cms_env_asn1_ctrl(CMS_RecipientInfo *ri, int cmd) +{ + EVP_PKEY *pkey; + int i; + if (ri->type == CMS_RECIPINFO_TRANS) + pkey = ri->d.ktri->pkey; + else if (ri->type == CMS_RECIPINFO_AGREE) { + EVP_PKEY_CTX *pctx = ri->d.kari->pctx; + if (!pctx) + return 0; + pkey = EVP_PKEY_CTX_get0_pkey(pctx); + if (!pkey) + return 0; + } else + return 0; + if (!pkey->ameth || !pkey->ameth->pkey_ctrl) + return 1; + i = pkey->ameth->pkey_ctrl(pkey, ASN1_PKEY_CTRL_CMS_ENVELOPE, cmd, ri); + if (i == -2) { + CMSerr(CMS_F_CMS_ENV_ASN1_CTRL, + CMS_R_NOT_SUPPORTED_FOR_THIS_KEY_TYPE); + return 0; + } + if (i <= 0) { + CMSerr(CMS_F_CMS_ENV_ASN1_CTRL, CMS_R_CTRL_FAILURE); + return 0; + } + return 1; +} + STACK_OF(CMS_RecipientInfo) *CMS_get0_RecipientInfos(CMS_ContentInfo *cms) { CMS_EnvelopedData *env; @@ -114,6 +144,15 @@ int CMS_RecipientInfo_type(CMS_RecipientInfo *ri) return ri->type; } +EVP_PKEY_CTX *CMS_RecipientInfo_get0_pkey_ctx(CMS_RecipientInfo *ri) +{ + if (ri->type == CMS_RECIPINFO_TRANS) + return ri->d.ktri->pctx; + else if (ri->type == CMS_RECIPINFO_AGREE) + return ri->d.kari->pctx; + return NULL; +} + CMS_ContentInfo *CMS_EnvelopedData_create(const EVP_CIPHER *cipher) { CMS_ContentInfo *cms; @@ -137,19 +176,63 @@ CMS_ContentInfo *CMS_EnvelopedData_create(const EVP_CIPHER *cipher) /* Key Transport Recipient Info (KTRI) routines */ +/* Initialise a ktri based on passed certificate and key */ + +static int cms_RecipientInfo_ktri_init(CMS_RecipientInfo *ri, X509 *recip, + EVP_PKEY *pk, unsigned int flags) +{ + CMS_KeyTransRecipientInfo *ktri; + int idtype; + + ri->d.ktri = M_ASN1_new_of(CMS_KeyTransRecipientInfo); + if (!ri->d.ktri) + return 0; + ri->type = CMS_RECIPINFO_TRANS; + + ktri = ri->d.ktri; + + if (flags & CMS_USE_KEYID) { + ktri->version = 2; + idtype = CMS_RECIPINFO_KEYIDENTIFIER; + } else { + ktri->version = 0; + idtype = CMS_RECIPINFO_ISSUER_SERIAL; + } + + /* + * Not a typo: RecipientIdentifier and SignerIdentifier are the same + * structure. + */ + + if (!cms_set1_SignerIdentifier(ktri->rid, recip, idtype)) + return 0; + + CRYPTO_add(&recip->references, 1, CRYPTO_LOCK_X509); + CRYPTO_add(&pk->references, 1, CRYPTO_LOCK_EVP_PKEY); + ktri->pkey = pk; + ktri->recip = recip; + + if (flags & CMS_KEY_PARAM) { + ktri->pctx = EVP_PKEY_CTX_new(ktri->pkey, NULL); + if (!ktri->pctx) + return 0; + if (EVP_PKEY_encrypt_init(ktri->pctx) <= 0) + return 0; + } else if (!cms_env_asn1_ctrl(ri, 0)) + return 0; + return 1; +} + /* - * Add a recipient certificate. For now only handle key transport. If we ever - * handle key agreement will need updating. + * Add a recipient certificate using appropriate type of RecipientInfo */ CMS_RecipientInfo *CMS_add1_recipient_cert(CMS_ContentInfo *cms, X509 *recip, unsigned int flags) { CMS_RecipientInfo *ri = NULL; - CMS_KeyTransRecipientInfo *ktri; CMS_EnvelopedData *env; EVP_PKEY *pk = NULL; - int i, type; env = cms_get0_enveloped(cms); if (!env) goto err; @@ -159,59 +242,36 @@ CMS_RecipientInfo *CMS_add1_recipient_cert(CMS_ContentInfo *cms, if (!ri) goto merr; - /* Initialize and add key transport recipient info */ - - ri->d.ktri = M_ASN1_new_of(CMS_KeyTransRecipientInfo); - if (!ri->d.ktri) - goto merr; - ri->type = CMS_RECIPINFO_TRANS; - - ktri = ri->d.ktri; - - X509_check_purpose(recip, -1, -1); pk = X509_get_pubkey(recip); if (!pk) { CMSerr(CMS_F_CMS_ADD1_RECIPIENT_CERT, CMS_R_ERROR_GETTING_PUBLIC_KEY); goto err; } - CRYPTO_add(&recip->references, 1, CRYPTO_LOCK_X509); - ktri->pkey = pk; - ktri->recip = recip; - if (flags & CMS_USE_KEYID) { - ktri->version = 2; - if (env->version < 2) - env->version = 2; - type = CMS_RECIPINFO_KEYIDENTIFIER; - } else { - ktri->version = 0; - type = CMS_RECIPINFO_ISSUER_SERIAL; - } + switch (cms_pkey_get_ri_type(pk)) { - /* - * Not a typo: RecipientIdentifier and SignerIdentifier are the same - * structure. - */ + case CMS_RECIPINFO_TRANS: + if (!cms_RecipientInfo_ktri_init(ri, recip, pk, flags)) + goto err; + break; - if (!cms_set1_SignerIdentifier(ktri->rid, recip, type)) + case CMS_RECIPINFO_AGREE: + if (!cms_RecipientInfo_kari_init(ri, recip, pk, flags)) + goto err; + break; + + default: + CMSerr(CMS_F_CMS_ADD1_RECIPIENT_CERT, + CMS_R_NOT_SUPPORTED_FOR_THIS_KEY_TYPE); goto err; - if (pk->ameth && pk->ameth->pkey_ctrl) { - i = pk->ameth->pkey_ctrl(pk, ASN1_PKEY_CTRL_CMS_ENVELOPE, 0, ri); - if (i == -2) { - CMSerr(CMS_F_CMS_ADD1_RECIPIENT_CERT, - CMS_R_NOT_SUPPORTED_FOR_THIS_KEY_TYPE); - goto err; - } - if (i <= 0) { - CMSerr(CMS_F_CMS_ADD1_RECIPIENT_CERT, CMS_R_CTRL_FAILURE); - goto err; - } } if (!sk_CMS_RecipientInfo_push(env->recipientInfos, ri)) goto merr; + EVP_PKEY_free(pk); + return ri; merr: @@ -219,6 +279,8 @@ CMS_RecipientInfo *CMS_add1_recipient_cert(CMS_ContentInfo *cms, err: if (ri) M_ASN1_free_of(ri, CMS_RecipientInfo); + if (pk) + EVP_PKEY_free(pk); return NULL; } @@ -288,7 +350,7 @@ static int cms_RecipientInfo_ktri_encrypt(CMS_ContentInfo *cms, { CMS_KeyTransRecipientInfo *ktri; CMS_EncryptedContentInfo *ec; - EVP_PKEY_CTX *pctx = NULL; + EVP_PKEY_CTX *pctx; unsigned char *ek = NULL; size_t eklen; @@ -301,12 +363,19 @@ static int cms_RecipientInfo_ktri_encrypt(CMS_ContentInfo *cms, ktri = ri->d.ktri; ec = cms->d.envelopedData->encryptedContentInfo; - pctx = EVP_PKEY_CTX_new(ktri->pkey, NULL); - if (!pctx) - return 0; + pctx = ktri->pctx; - if (EVP_PKEY_encrypt_init(pctx) <= 0) - goto err; + if (pctx) { + if (!cms_env_asn1_ctrl(ri, 0)) + goto err; + } else { + pctx = EVP_PKEY_CTX_new(ktri->pkey, NULL); + if (!pctx) + return 0; + + if (EVP_PKEY_encrypt_init(pctx) <= 0) + goto err; + } if (EVP_PKEY_CTX_ctrl(pctx, -1, EVP_PKEY_OP_ENCRYPT, EVP_PKEY_CTRL_CMS_ENCRYPT, 0, ri) <= 0) { @@ -333,8 +402,10 @@ static int cms_RecipientInfo_ktri_encrypt(CMS_ContentInfo *cms, ret = 1; err: - if (pctx) + if (pctx) { EVP_PKEY_CTX_free(pctx); + ktri->pctx = NULL; + } if (ek) OPENSSL_free(ek); return ret; @@ -347,7 +418,7 @@ static int cms_RecipientInfo_ktri_decrypt(CMS_ContentInfo *cms, CMS_RecipientInfo *ri) { CMS_KeyTransRecipientInfo *ktri = ri->d.ktri; - EVP_PKEY_CTX *pctx = NULL; + EVP_PKEY *pkey = ktri->pkey; unsigned char *ek = NULL; size_t eklen; int ret = 0; @@ -359,20 +430,23 @@ static int cms_RecipientInfo_ktri_decrypt(CMS_ContentInfo *cms, return 0; } - pctx = EVP_PKEY_CTX_new(ktri->pkey, NULL); - if (!pctx) + ktri->pctx = EVP_PKEY_CTX_new(pkey, NULL); + if (!ktri->pctx) return 0; - if (EVP_PKEY_decrypt_init(pctx) <= 0) + if (EVP_PKEY_decrypt_init(ktri->pctx) <= 0) goto err; - if (EVP_PKEY_CTX_ctrl(pctx, -1, EVP_PKEY_OP_DECRYPT, + if (!cms_env_asn1_ctrl(ri, 1)) + goto err; + + if (EVP_PKEY_CTX_ctrl(ktri->pctx, -1, EVP_PKEY_OP_DECRYPT, EVP_PKEY_CTRL_CMS_DECRYPT, 0, ri) <= 0) { CMSerr(CMS_F_CMS_RECIPIENTINFO_KTRI_DECRYPT, CMS_R_CTRL_ERROR); goto err; } - if (EVP_PKEY_decrypt(pctx, NULL, &eklen, + if (EVP_PKEY_decrypt(ktri->pctx, NULL, &eklen, ktri->encryptedKey->data, ktri->encryptedKey->length) <= 0) goto err; @@ -384,7 +458,7 @@ static int cms_RecipientInfo_ktri_decrypt(CMS_ContentInfo *cms, goto err; } - if (EVP_PKEY_decrypt(pctx, ek, &eklen, + if (EVP_PKEY_decrypt(ktri->pctx, ek, &eklen, ktri->encryptedKey->data, ktri->encryptedKey->length) <= 0) { CMSerr(CMS_F_CMS_RECIPIENTINFO_KTRI_DECRYPT, CMS_R_CMS_LIB); @@ -402,8 +476,10 @@ static int cms_RecipientInfo_ktri_decrypt(CMS_ContentInfo *cms, ec->keylen = eklen; err: - if (pctx) - EVP_PKEY_CTX_free(pctx); + if (ktri->pctx) { + EVP_PKEY_CTX_free(ktri->pctx); + ktri->pctx = NULL; + } if (!ret && ek) OPENSSL_free(ek); @@ -745,12 +821,99 @@ int CMS_RecipientInfo_decrypt(CMS_ContentInfo *cms, CMS_RecipientInfo *ri) } } +int CMS_RecipientInfo_encrypt(CMS_ContentInfo *cms, CMS_RecipientInfo *ri) +{ + switch (ri->type) { + case CMS_RECIPINFO_TRANS: + return cms_RecipientInfo_ktri_encrypt(cms, ri); + + case CMS_RECIPINFO_AGREE: + return cms_RecipientInfo_kari_encrypt(cms, ri); + + case CMS_RECIPINFO_KEK: + return cms_RecipientInfo_kekri_encrypt(cms, ri); + break; + + case CMS_RECIPINFO_PASS: + return cms_RecipientInfo_pwri_crypt(cms, ri, 1); + break; + + default: + CMSerr(CMS_F_CMS_RECIPIENTINFO_ENCRYPT, + CMS_R_UNSUPPORTED_RECIPIENT_TYPE); + return 0; + } +} + +/* Check structures and fixup version numbers (if necessary) */ + +static void cms_env_set_originfo_version(CMS_EnvelopedData *env) +{ + CMS_OriginatorInfo *org = env->originatorInfo; + int i; + if (org == NULL) + return; + for (i = 0; i < sk_CMS_CertificateChoices_num(org->certificates); i++) { + CMS_CertificateChoices *cch; + cch = sk_CMS_CertificateChoices_value(org->certificates, i); + if (cch->type == CMS_CERTCHOICE_OTHER) { + env->version = 4; + return; + } else if (cch->type == CMS_CERTCHOICE_V2ACERT) { + if (env->version < 3) + env->version = 3; + } + } + + for (i = 0; i < sk_CMS_RevocationInfoChoice_num(org->crls); i++) { + CMS_RevocationInfoChoice *rch; + rch = sk_CMS_RevocationInfoChoice_value(org->crls, i); + if (rch->type == CMS_REVCHOICE_OTHER) { + env->version = 4; + return; + } + } +} + +static void cms_env_set_version(CMS_EnvelopedData *env) +{ + int i; + CMS_RecipientInfo *ri; + + /* + * Can't set version higher than 4 so if 4 or more already nothing to do. + */ + if (env->version >= 4) + return; + + cms_env_set_originfo_version(env); + + if (env->version >= 3) + return; + + for (i = 0; i < sk_CMS_RecipientInfo_num(env->recipientInfos); i++) { + ri = sk_CMS_RecipientInfo_value(env->recipientInfos, i); + if (ri->type == CMS_RECIPINFO_PASS || ri->type == CMS_RECIPINFO_OTHER) { + env->version = 3; + return; + } else if (ri->type != CMS_RECIPINFO_TRANS + || ri->d.ktri->version != 0) { + env->version = 2; + } + } + if (env->version == 2) + return; + if (env->originatorInfo || env->unprotectedAttrs) + env->version = 2; + env->version = 0; +} + BIO *cms_EnvelopedData_init_bio(CMS_ContentInfo *cms) { CMS_EncryptedContentInfo *ec; STACK_OF(CMS_RecipientInfo) *rinfos; CMS_RecipientInfo *ri; - int i, r, ok = 0; + int i, ok = 0; BIO *ret; /* Get BIO first to set up key */ @@ -769,32 +932,13 @@ BIO *cms_EnvelopedData_init_bio(CMS_ContentInfo *cms) for (i = 0; i < sk_CMS_RecipientInfo_num(rinfos); i++) { ri = sk_CMS_RecipientInfo_value(rinfos, i); - - switch (ri->type) { - case CMS_RECIPINFO_TRANS: - r = cms_RecipientInfo_ktri_encrypt(cms, ri); - break; - - case CMS_RECIPINFO_KEK: - r = cms_RecipientInfo_kekri_encrypt(cms, ri); - break; - - case CMS_RECIPINFO_PASS: - r = cms_RecipientInfo_pwri_crypt(cms, ri, 1); - break; - - default: - CMSerr(CMS_F_CMS_ENVELOPEDDATA_INIT_BIO, - CMS_R_UNSUPPORTED_RECIPIENT_TYPE); - goto err; - } - - if (r <= 0) { + if (CMS_RecipientInfo_encrypt(cms, ri) <= 0) { CMSerr(CMS_F_CMS_ENVELOPEDDATA_INIT_BIO, CMS_R_ERROR_SETTING_RECIPIENTINFO); goto err; } } + cms_env_set_version(cms->d.envelopedData); ok = 1; @@ -812,3 +956,19 @@ BIO *cms_EnvelopedData_init_bio(CMS_ContentInfo *cms) return NULL; } + +/* + * Get RecipientInfo type (if any) supported by a key (public or private). To + * retain compatibility with previous behaviour if the ctrl value isn't + * supported we assume key transport. + */ +int cms_pkey_get_ri_type(EVP_PKEY *pk) +{ + if (pk->ameth && pk->ameth->pkey_ctrl) { + int i, r; + i = pk->ameth->pkey_ctrl(pk, ASN1_PKEY_CTRL_CMS_RI_TYPE, 0, &r); + if (i > 0) + return r; + } + return CMS_RECIPINFO_TRANS; +} diff --git a/deps/openssl/openssl/crypto/cms/cms_err.c b/deps/openssl/openssl/crypto/cms/cms_err.c index faf2fccd3b8c40..15572ea348c8e7 100644 --- a/deps/openssl/openssl/crypto/cms/cms_err.c +++ b/deps/openssl/openssl/crypto/cms/cms_err.c @@ -1,6 +1,6 @@ /* crypto/cms/cms_err.c */ /* ==================================================================== - * Copyright (c) 1999-2009 The OpenSSL Project. All rights reserved. + * Copyright (c) 1999-2013 The OpenSSL Project. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -110,6 +110,7 @@ static ERR_STRING_DATA CMS_str_functs[] = { {ERR_FUNC(CMS_F_CMS_ENVELOPEDDATA_INIT_BIO), "cms_EnvelopedData_init_bio"}, {ERR_FUNC(CMS_F_CMS_ENVELOPED_DATA_INIT), "CMS_ENVELOPED_DATA_INIT"}, + {ERR_FUNC(CMS_F_CMS_ENV_ASN1_CTRL), "cms_env_asn1_ctrl"}, {ERR_FUNC(CMS_F_CMS_FINAL), "CMS_final"}, {ERR_FUNC(CMS_F_CMS_GET0_CERTIFICATE_CHOICES), "CMS_GET0_CERTIFICATE_CHOICES"}, @@ -124,6 +125,17 @@ static ERR_STRING_DATA CMS_str_functs[] = { "CMS_ReceiptRequest_create0"}, {ERR_FUNC(CMS_F_CMS_RECEIPT_VERIFY), "cms_Receipt_verify"}, {ERR_FUNC(CMS_F_CMS_RECIPIENTINFO_DECRYPT), "CMS_RecipientInfo_decrypt"}, + {ERR_FUNC(CMS_F_CMS_RECIPIENTINFO_ENCRYPT), "CMS_RecipientInfo_encrypt"}, + {ERR_FUNC(CMS_F_CMS_RECIPIENTINFO_KARI_ENCRYPT), + "cms_RecipientInfo_kari_encrypt"}, + {ERR_FUNC(CMS_F_CMS_RECIPIENTINFO_KARI_GET0_ALG), + "CMS_RecipientInfo_kari_get0_alg"}, + {ERR_FUNC(CMS_F_CMS_RECIPIENTINFO_KARI_GET0_ORIG_ID), + "CMS_RecipientInfo_kari_get0_orig_id"}, + {ERR_FUNC(CMS_F_CMS_RECIPIENTINFO_KARI_GET0_REKS), + "CMS_RecipientInfo_kari_get0_reks"}, + {ERR_FUNC(CMS_F_CMS_RECIPIENTINFO_KARI_ORIG_ID_CMP), + "CMS_RecipientInfo_kari_orig_id_cmp"}, {ERR_FUNC(CMS_F_CMS_RECIPIENTINFO_KEKRI_DECRYPT), "CMS_RECIPIENTINFO_KEKRI_DECRYPT"}, {ERR_FUNC(CMS_F_CMS_RECIPIENTINFO_KEKRI_ENCRYPT), @@ -150,6 +162,9 @@ static ERR_STRING_DATA CMS_str_functs[] = { "CMS_RecipientInfo_set0_password"}, {ERR_FUNC(CMS_F_CMS_RECIPIENTINFO_SET0_PKEY), "CMS_RecipientInfo_set0_pkey"}, + {ERR_FUNC(CMS_F_CMS_SD_ASN1_CTRL), "CMS_SD_ASN1_CTRL"}, + {ERR_FUNC(CMS_F_CMS_SET1_IAS), "cms_set1_ias"}, + {ERR_FUNC(CMS_F_CMS_SET1_KEYID), "cms_set1_keyid"}, {ERR_FUNC(CMS_F_CMS_SET1_SIGNERIDENTIFIER), "cms_set1_SignerIdentifier"}, {ERR_FUNC(CMS_F_CMS_SET_DETACHED), "CMS_set_detached"}, {ERR_FUNC(CMS_F_CMS_SIGN), "CMS_sign"}, @@ -221,6 +236,7 @@ static ERR_STRING_DATA CMS_str_reasons[] = { {ERR_REASON(CMS_R_NOT_A_SIGNED_RECEIPT), "not a signed receipt"}, {ERR_REASON(CMS_R_NOT_ENCRYPTED_DATA), "not encrypted data"}, {ERR_REASON(CMS_R_NOT_KEK), "not kek"}, + {ERR_REASON(CMS_R_NOT_KEY_AGREEMENT), "not key agreement"}, {ERR_REASON(CMS_R_NOT_KEY_TRANSPORT), "not key transport"}, {ERR_REASON(CMS_R_NOT_PWRI), "not pwri"}, {ERR_REASON(CMS_R_NOT_SUPPORTED_FOR_THIS_KEY_TYPE), diff --git a/deps/openssl/openssl/crypto/cms/cms_kari.c b/deps/openssl/openssl/crypto/cms/cms_kari.c new file mode 100644 index 00000000000000..f8a6cbadb06c4a --- /dev/null +++ b/deps/openssl/openssl/crypto/cms/cms_kari.c @@ -0,0 +1,461 @@ +/* crypto/cms/cms_kari.c */ +/* + * Written by Dr Stephen N Henson (steve@openssl.org) for the OpenSSL + * project. + */ +/* ==================================================================== + * Copyright (c) 2013 The OpenSSL Project. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgment: + * "This product includes software developed by the OpenSSL Project + * for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)" + * + * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to + * endorse or promote products derived from this software without + * prior written permission. For written permission, please contact + * licensing@OpenSSL.org. + * + * 5. Products derived from this software may not be called "OpenSSL" + * nor may "OpenSSL" appear in their names without prior written + * permission of the OpenSSL Project. + * + * 6. Redistributions of any form whatsoever must retain the following + * acknowledgment: + * "This product includes software developed by the OpenSSL Project + * for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)" + * + * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY + * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + * ==================================================================== + */ + +#include "cryptlib.h" +#include +#include +#include +#include +#include +#include +#include +#include "cms_lcl.h" +#include "asn1_locl.h" + +DECLARE_ASN1_ITEM(CMS_KeyAgreeRecipientInfo) +DECLARE_ASN1_ITEM(CMS_RecipientEncryptedKey) +DECLARE_ASN1_ITEM(CMS_OriginatorPublicKey) + +/* Key Agreement Recipient Info (KARI) routines */ + +int CMS_RecipientInfo_kari_get0_alg(CMS_RecipientInfo *ri, + X509_ALGOR **palg, + ASN1_OCTET_STRING **pukm) +{ + if (ri->type != CMS_RECIPINFO_AGREE) { + CMSerr(CMS_F_CMS_RECIPIENTINFO_KARI_GET0_ALG, + CMS_R_NOT_KEY_AGREEMENT); + return 0; + } + if (palg) + *palg = ri->d.kari->keyEncryptionAlgorithm; + if (pukm) + *pukm = ri->d.kari->ukm; + return 1; +} + +/* Retrieve recipient encrypted keys from a kari */ + +STACK_OF(CMS_RecipientEncryptedKey) +*CMS_RecipientInfo_kari_get0_reks(CMS_RecipientInfo *ri) +{ + if (ri->type != CMS_RECIPINFO_AGREE) { + CMSerr(CMS_F_CMS_RECIPIENTINFO_KARI_GET0_REKS, + CMS_R_NOT_KEY_AGREEMENT); + return NULL; + } + return ri->d.kari->recipientEncryptedKeys; +} + +int CMS_RecipientInfo_kari_get0_orig_id(CMS_RecipientInfo *ri, + X509_ALGOR **pubalg, + ASN1_BIT_STRING **pubkey, + ASN1_OCTET_STRING **keyid, + X509_NAME **issuer, + ASN1_INTEGER **sno) +{ + CMS_OriginatorIdentifierOrKey *oik; + if (ri->type != CMS_RECIPINFO_AGREE) { + CMSerr(CMS_F_CMS_RECIPIENTINFO_KARI_GET0_ORIG_ID, + CMS_R_NOT_KEY_AGREEMENT); + return 0; + } + oik = ri->d.kari->originator; + if (issuer) + *issuer = NULL; + if (sno) + *sno = NULL; + if (keyid) + *keyid = NULL; + if (pubalg) + *pubalg = NULL; + if (pubkey) + *pubkey = NULL; + if (oik->type == CMS_OIK_ISSUER_SERIAL) { + if (issuer) + *issuer = oik->d.issuerAndSerialNumber->issuer; + if (sno) + *sno = oik->d.issuerAndSerialNumber->serialNumber; + } else if (oik->type == CMS_OIK_KEYIDENTIFIER) { + if (keyid) + *keyid = oik->d.subjectKeyIdentifier; + } else if (oik->type == CMS_OIK_PUBKEY) { + if (pubalg) + *pubalg = oik->d.originatorKey->algorithm; + if (pubkey) + *pubkey = oik->d.originatorKey->publicKey; + } else + return 0; + return 1; +} + +int CMS_RecipientInfo_kari_orig_id_cmp(CMS_RecipientInfo *ri, X509 *cert) +{ + CMS_OriginatorIdentifierOrKey *oik; + if (ri->type != CMS_RECIPINFO_AGREE) { + CMSerr(CMS_F_CMS_RECIPIENTINFO_KARI_ORIG_ID_CMP, + CMS_R_NOT_KEY_AGREEMENT); + return -2; + } + oik = ri->d.kari->originator; + if (oik->type == CMS_OIK_ISSUER_SERIAL) + return cms_ias_cert_cmp(oik->d.issuerAndSerialNumber, cert); + else if (oik->type == CMS_OIK_KEYIDENTIFIER) + return cms_keyid_cert_cmp(oik->d.subjectKeyIdentifier, cert); + return -1; +} + +int CMS_RecipientEncryptedKey_get0_id(CMS_RecipientEncryptedKey *rek, + ASN1_OCTET_STRING **keyid, + ASN1_GENERALIZEDTIME **tm, + CMS_OtherKeyAttribute **other, + X509_NAME **issuer, ASN1_INTEGER **sno) +{ + CMS_KeyAgreeRecipientIdentifier *rid = rek->rid; + if (rid->type == CMS_REK_ISSUER_SERIAL) { + if (issuer) + *issuer = rid->d.issuerAndSerialNumber->issuer; + if (sno) + *sno = rid->d.issuerAndSerialNumber->serialNumber; + if (keyid) + *keyid = NULL; + if (tm) + *tm = NULL; + if (other) + *other = NULL; + } else if (rid->type == CMS_REK_KEYIDENTIFIER) { + if (keyid) + *keyid = rid->d.rKeyId->subjectKeyIdentifier; + if (tm) + *tm = rid->d.rKeyId->date; + if (other) + *other = rid->d.rKeyId->other; + if (issuer) + *issuer = NULL; + if (sno) + *sno = NULL; + } else + return 0; + return 1; +} + +int CMS_RecipientEncryptedKey_cert_cmp(CMS_RecipientEncryptedKey *rek, + X509 *cert) +{ + CMS_KeyAgreeRecipientIdentifier *rid = rek->rid; + if (rid->type == CMS_REK_ISSUER_SERIAL) + return cms_ias_cert_cmp(rid->d.issuerAndSerialNumber, cert); + else if (rid->type == CMS_REK_KEYIDENTIFIER) + return cms_keyid_cert_cmp(rid->d.rKeyId->subjectKeyIdentifier, cert); + else + return -1; +} + +int CMS_RecipientInfo_kari_set0_pkey(CMS_RecipientInfo *ri, EVP_PKEY *pk) +{ + EVP_PKEY_CTX *pctx; + CMS_KeyAgreeRecipientInfo *kari = ri->d.kari; + if (kari->pctx) { + EVP_PKEY_CTX_free(kari->pctx); + kari->pctx = NULL; + } + if (!pk) + return 1; + pctx = EVP_PKEY_CTX_new(pk, NULL); + if (!pctx || !EVP_PKEY_derive_init(pctx)) + goto err; + kari->pctx = pctx; + return 1; + err: + if (pctx) + EVP_PKEY_CTX_free(pctx); + return 0; +} + +EVP_CIPHER_CTX *CMS_RecipientInfo_kari_get0_ctx(CMS_RecipientInfo *ri) +{ + if (ri->type == CMS_RECIPINFO_AGREE) + return &ri->d.kari->ctx; + return NULL; +} + +/* + * Derive KEK and decrypt/encrypt with it to produce either the original CEK + * or the encrypted CEK. + */ + +static int cms_kek_cipher(unsigned char **pout, size_t *poutlen, + const unsigned char *in, size_t inlen, + CMS_KeyAgreeRecipientInfo *kari, int enc) +{ + /* Key encryption key */ + unsigned char kek[EVP_MAX_KEY_LENGTH]; + size_t keklen; + int rv = 0; + unsigned char *out = NULL; + int outlen; + keklen = EVP_CIPHER_CTX_key_length(&kari->ctx); + if (keklen > EVP_MAX_KEY_LENGTH) + return 0; + /* Derive KEK */ + if (EVP_PKEY_derive(kari->pctx, kek, &keklen) <= 0) + goto err; + /* Set KEK in context */ + if (!EVP_CipherInit_ex(&kari->ctx, NULL, NULL, kek, NULL, enc)) + goto err; + /* obtain output length of ciphered key */ + if (!EVP_CipherUpdate(&kari->ctx, NULL, &outlen, in, inlen)) + goto err; + out = OPENSSL_malloc(outlen); + if (!out) + goto err; + if (!EVP_CipherUpdate(&kari->ctx, out, &outlen, in, inlen)) + goto err; + *pout = out; + *poutlen = (size_t)outlen; + rv = 1; + + err: + OPENSSL_cleanse(kek, keklen); + if (!rv && out) + OPENSSL_free(out); + EVP_CIPHER_CTX_cleanup(&kari->ctx); + EVP_PKEY_CTX_free(kari->pctx); + kari->pctx = NULL; + return rv; +} + +int CMS_RecipientInfo_kari_decrypt(CMS_ContentInfo *cms, + CMS_RecipientInfo *ri, + CMS_RecipientEncryptedKey *rek) +{ + int rv = 0; + unsigned char *enckey = NULL, *cek = NULL; + size_t enckeylen; + size_t ceklen; + CMS_EncryptedContentInfo *ec; + enckeylen = rek->encryptedKey->length; + enckey = rek->encryptedKey->data; + /* Setup all parameters to derive KEK */ + if (!cms_env_asn1_ctrl(ri, 1)) + goto err; + /* Attempt to decrypt CEK */ + if (!cms_kek_cipher(&cek, &ceklen, enckey, enckeylen, ri->d.kari, 0)) + goto err; + ec = cms->d.envelopedData->encryptedContentInfo; + if (ec->key) { + OPENSSL_cleanse(ec->key, ec->keylen); + OPENSSL_free(ec->key); + } + ec->key = cek; + ec->keylen = ceklen; + cek = NULL; + rv = 1; + err: + if (cek) + OPENSSL_free(cek); + return rv; +} + +/* Create ephemeral key and initialise context based on it */ +static int cms_kari_create_ephemeral_key(CMS_KeyAgreeRecipientInfo *kari, + EVP_PKEY *pk) +{ + EVP_PKEY_CTX *pctx = NULL; + EVP_PKEY *ekey = NULL; + int rv = 0; + pctx = EVP_PKEY_CTX_new(pk, NULL); + if (!pctx) + goto err; + if (EVP_PKEY_keygen_init(pctx) <= 0) + goto err; + if (EVP_PKEY_keygen(pctx, &ekey) <= 0) + goto err; + EVP_PKEY_CTX_free(pctx); + pctx = EVP_PKEY_CTX_new(ekey, NULL); + if (!pctx) + goto err; + if (EVP_PKEY_derive_init(pctx) <= 0) + goto err; + kari->pctx = pctx; + rv = 1; + err: + if (!rv && pctx) + EVP_PKEY_CTX_free(pctx); + if (ekey) + EVP_PKEY_free(ekey); + return rv; +} + +/* Initialise a ktri based on passed certificate and key */ + +int cms_RecipientInfo_kari_init(CMS_RecipientInfo *ri, X509 *recip, + EVP_PKEY *pk, unsigned int flags) +{ + CMS_KeyAgreeRecipientInfo *kari; + CMS_RecipientEncryptedKey *rek = NULL; + + ri->d.kari = M_ASN1_new_of(CMS_KeyAgreeRecipientInfo); + if (!ri->d.kari) + return 0; + ri->type = CMS_RECIPINFO_AGREE; + + kari = ri->d.kari; + kari->version = 3; + + rek = M_ASN1_new_of(CMS_RecipientEncryptedKey); + if (!sk_CMS_RecipientEncryptedKey_push(kari->recipientEncryptedKeys, rek)) { + M_ASN1_free_of(rek, CMS_RecipientEncryptedKey); + return 0; + } + + if (flags & CMS_USE_KEYID) { + rek->rid->type = CMS_REK_KEYIDENTIFIER; + if (!cms_set1_keyid(&rek->rid->d.rKeyId->subjectKeyIdentifier, recip)) + return 0; + } else { + rek->rid->type = CMS_REK_ISSUER_SERIAL; + if (!cms_set1_ias(&rek->rid->d.issuerAndSerialNumber, recip)) + return 0; + } + + /* Create ephemeral key */ + if (!cms_kari_create_ephemeral_key(kari, pk)) + return 0; + + CRYPTO_add(&pk->references, 1, CRYPTO_LOCK_EVP_PKEY); + rek->pkey = pk; + return 1; +} + +static int cms_wrap_init(CMS_KeyAgreeRecipientInfo *kari, + const EVP_CIPHER *cipher) +{ + EVP_CIPHER_CTX *ctx = &kari->ctx; + const EVP_CIPHER *kekcipher; + int keylen = EVP_CIPHER_key_length(cipher); + /* If a suitable wrap algorithm is already set nothing to do */ + kekcipher = EVP_CIPHER_CTX_cipher(ctx); + + if (kekcipher) { + if (EVP_CIPHER_CTX_mode(ctx) != EVP_CIPH_WRAP_MODE) + return 0; + return 1; + } + /* + * Pick a cipher based on content encryption cipher. If it is DES3 use + * DES3 wrap otherwise use AES wrap similar to key size. + */ + if (EVP_CIPHER_type(cipher) == NID_des_ede3_cbc) + kekcipher = EVP_des_ede3_wrap(); + else if (keylen <= 16) + kekcipher = EVP_aes_128_wrap(); + else if (keylen <= 24) + kekcipher = EVP_aes_192_wrap(); + else + kekcipher = EVP_aes_256_wrap(); + return EVP_EncryptInit_ex(ctx, kekcipher, NULL, NULL, NULL); +} + +/* Encrypt content key in key agreement recipient info */ + +int cms_RecipientInfo_kari_encrypt(CMS_ContentInfo *cms, + CMS_RecipientInfo *ri) +{ + CMS_KeyAgreeRecipientInfo *kari; + CMS_EncryptedContentInfo *ec; + CMS_RecipientEncryptedKey *rek; + STACK_OF(CMS_RecipientEncryptedKey) *reks; + int i; + + if (ri->type != CMS_RECIPINFO_AGREE) { + CMSerr(CMS_F_CMS_RECIPIENTINFO_KARI_ENCRYPT, CMS_R_NOT_KEY_AGREEMENT); + return 0; + } + kari = ri->d.kari; + reks = kari->recipientEncryptedKeys; + ec = cms->d.envelopedData->encryptedContentInfo; + /* Initialise wrap algorithm parameters */ + if (!cms_wrap_init(kari, ec->cipher)) + return 0; + /* + * If no orignator key set up initialise for ephemeral key the public key + * ASN1 structure will set the actual public key value. + */ + if (kari->originator->type == -1) { + CMS_OriginatorIdentifierOrKey *oik = kari->originator; + oik->type = CMS_OIK_PUBKEY; + oik->d.originatorKey = M_ASN1_new_of(CMS_OriginatorPublicKey); + if (!oik->d.originatorKey) + return 0; + } + /* Initialise KDF algorithm */ + if (!cms_env_asn1_ctrl(ri, 0)) + return 0; + /* For each rek, derive KEK, encrypt CEK */ + for (i = 0; i < sk_CMS_RecipientEncryptedKey_num(reks); i++) { + unsigned char *enckey; + size_t enckeylen; + rek = sk_CMS_RecipientEncryptedKey_value(reks, i); + if (EVP_PKEY_derive_set_peer(kari->pctx, rek->pkey) <= 0) + return 0; + if (!cms_kek_cipher(&enckey, &enckeylen, ec->key, ec->keylen, + kari, 1)) + return 0; + ASN1_STRING_set0(rek->encryptedKey, enckey, enckeylen); + } + + return 1; + +} diff --git a/deps/openssl/openssl/crypto/cms/cms_lcl.h b/deps/openssl/openssl/crypto/cms/cms_lcl.h index 4f4c4c7434f36d..20f2c25f5ae987 100644 --- a/deps/openssl/openssl/crypto/cms/cms_lcl.h +++ b/deps/openssl/openssl/crypto/cms/cms_lcl.h @@ -84,11 +84,9 @@ typedef struct CMS_KeyTransRecipientInfo_st CMS_KeyTransRecipientInfo; typedef struct CMS_OriginatorPublicKey_st CMS_OriginatorPublicKey; typedef struct CMS_OriginatorIdentifierOrKey_st CMS_OriginatorIdentifierOrKey; typedef struct CMS_KeyAgreeRecipientInfo_st CMS_KeyAgreeRecipientInfo; -typedef struct CMS_OtherKeyAttribute_st CMS_OtherKeyAttribute; typedef struct CMS_RecipientKeyIdentifier_st CMS_RecipientKeyIdentifier; typedef struct CMS_KeyAgreeRecipientIdentifier_st CMS_KeyAgreeRecipientIdentifier; -typedef struct CMS_RecipientEncryptedKey_st CMS_RecipientEncryptedKey; typedef struct CMS_KEKIdentifier_st CMS_KEKIdentifier; typedef struct CMS_KEKRecipientInfo_st CMS_KEKRecipientInfo; typedef struct CMS_PasswordRecipientInfo_st CMS_PasswordRecipientInfo; @@ -138,6 +136,9 @@ struct CMS_SignerInfo_st { /* Signing certificate and key */ X509 *signer; EVP_PKEY *pkey; + /* Digest and public key context for alternative parameters */ + EVP_MD_CTX mctx; + EVP_PKEY_CTX *pctx; }; struct CMS_SignerIdentifier_st { @@ -194,6 +195,8 @@ struct CMS_KeyTransRecipientInfo_st { /* Recipient Key and cert */ X509 *recip; EVP_PKEY *pkey; + /* Public key context for this operation */ + EVP_PKEY_CTX *pctx; }; struct CMS_KeyAgreeRecipientInfo_st { @@ -202,6 +205,10 @@ struct CMS_KeyAgreeRecipientInfo_st { ASN1_OCTET_STRING *ukm; X509_ALGOR *keyEncryptionAlgorithm; STACK_OF(CMS_RecipientEncryptedKey) *recipientEncryptedKeys; + /* Public key context associated with current operation */ + EVP_PKEY_CTX *pctx; + /* Cipher context for CEK wrapping */ + EVP_CIPHER_CTX ctx; }; struct CMS_OriginatorIdentifierOrKey_st { @@ -221,6 +228,8 @@ struct CMS_OriginatorPublicKey_st { struct CMS_RecipientEncryptedKey_st { CMS_KeyAgreeRecipientIdentifier *rid; ASN1_OCTET_STRING *encryptedKey; + /* Public key associated with this recipient */ + EVP_PKEY *pkey; }; struct CMS_KeyAgreeRecipientIdentifier_st { @@ -394,6 +403,13 @@ DECLARE_ASN1_ALLOC_FUNCTIONS(CMS_IssuerAndSerialNumber) # define CMS_RECIPINFO_ISSUER_SERIAL 0 # define CMS_RECIPINFO_KEYIDENTIFIER 1 +# define CMS_REK_ISSUER_SERIAL 0 +# define CMS_REK_KEYIDENTIFIER 1 + +# define CMS_OIK_ISSUER_SERIAL 0 +# define CMS_OIK_KEYIDENTIFIER 1 +# define CMS_OIK_PUBKEY 2 + BIO *cms_content_bio(CMS_ContentInfo *cms); CMS_ContentInfo *cms_Data_create(void); @@ -420,6 +436,11 @@ BIO *cms_DigestAlgorithm_init_bio(X509_ALGOR *digestAlgorithm); int cms_DigestAlgorithm_find_ctx(EVP_MD_CTX *mctx, BIO *chain, X509_ALGOR *mdalg); +int cms_ias_cert_cmp(CMS_IssuerAndSerialNumber *ias, X509 *cert); +int cms_keyid_cert_cmp(ASN1_OCTET_STRING *keyid, X509 *cert); +int cms_set1_ias(CMS_IssuerAndSerialNumber **pias, X509 *cert); +int cms_set1_keyid(ASN1_OCTET_STRING **pkeyid, X509 *cert); + BIO *cms_EncryptedContent_init_bio(CMS_EncryptedContentInfo *ec); BIO *cms_EncryptedData_init_bio(CMS_ContentInfo *cms); int cms_EncryptedContent_init(CMS_EncryptedContentInfo *ec, @@ -432,6 +453,13 @@ ASN1_OCTET_STRING *cms_encode_Receipt(CMS_SignerInfo *si); BIO *cms_EnvelopedData_init_bio(CMS_ContentInfo *cms); CMS_EnvelopedData *cms_get0_enveloped(CMS_ContentInfo *cms); +int cms_env_asn1_ctrl(CMS_RecipientInfo *ri, int cmd); +int cms_pkey_get_ri_type(EVP_PKEY *pk); +/* KARI routines */ +int cms_RecipientInfo_kari_init(CMS_RecipientInfo *ri, X509 *recip, + EVP_PKEY *pk, unsigned int flags); +int cms_RecipientInfo_kari_encrypt(CMS_ContentInfo *cms, + CMS_RecipientInfo *ri); /* PWRI routines */ int cms_RecipientInfo_pwri_crypt(CMS_ContentInfo *cms, CMS_RecipientInfo *ri, diff --git a/deps/openssl/openssl/crypto/cms/cms_lib.c b/deps/openssl/openssl/crypto/cms/cms_lib.c index e9384616803806..d6cb60d02d1525 100644 --- a/deps/openssl/openssl/crypto/cms/cms_lib.c +++ b/deps/openssl/openssl/crypto/cms/cms_lib.c @@ -53,7 +53,7 @@ */ #include -#include +#include #include #include #include @@ -593,3 +593,60 @@ STACK_OF(X509_CRL) *CMS_get1_crls(CMS_ContentInfo *cms) } return crls; } + +int cms_ias_cert_cmp(CMS_IssuerAndSerialNumber *ias, X509 *cert) +{ + int ret; + ret = X509_NAME_cmp(ias->issuer, X509_get_issuer_name(cert)); + if (ret) + return ret; + return ASN1_INTEGER_cmp(ias->serialNumber, X509_get_serialNumber(cert)); +} + +int cms_keyid_cert_cmp(ASN1_OCTET_STRING *keyid, X509 *cert) +{ + X509_check_purpose(cert, -1, -1); + if (!cert->skid) + return -1; + return ASN1_OCTET_STRING_cmp(keyid, cert->skid); +} + +int cms_set1_ias(CMS_IssuerAndSerialNumber **pias, X509 *cert) +{ + CMS_IssuerAndSerialNumber *ias; + ias = M_ASN1_new_of(CMS_IssuerAndSerialNumber); + if (!ias) + goto err; + if (!X509_NAME_set(&ias->issuer, X509_get_issuer_name(cert))) + goto err; + if (!ASN1_STRING_copy(ias->serialNumber, X509_get_serialNumber(cert))) + goto err; + if (*pias) + M_ASN1_free_of(*pias, CMS_IssuerAndSerialNumber); + *pias = ias; + return 1; + err: + if (ias) + M_ASN1_free_of(ias, CMS_IssuerAndSerialNumber); + CMSerr(CMS_F_CMS_SET1_IAS, ERR_R_MALLOC_FAILURE); + return 0; +} + +int cms_set1_keyid(ASN1_OCTET_STRING **pkeyid, X509 *cert) +{ + ASN1_OCTET_STRING *keyid = NULL; + X509_check_purpose(cert, -1, -1); + if (!cert->skid) { + CMSerr(CMS_F_CMS_SET1_KEYID, CMS_R_CERTIFICATE_HAS_NO_KEYID); + return 0; + } + keyid = ASN1_STRING_dup(cert->skid); + if (!keyid) { + CMSerr(CMS_F_CMS_SET1_KEYID, ERR_R_MALLOC_FAILURE); + return 0; + } + if (*pkeyid) + ASN1_OCTET_STRING_free(*pkeyid); + *pkeyid = keyid; + return 1; +} diff --git a/deps/openssl/openssl/crypto/cms/cms_sd.c b/deps/openssl/openssl/crypto/cms/cms_sd.c index 6daa2627dbab57..721ffd5afb8535 100644 --- a/deps/openssl/openssl/crypto/cms/cms_sd.c +++ b/deps/openssl/openssl/crypto/cms/cms_sd.c @@ -55,6 +55,7 @@ #include "cryptlib.h" #include #include +#include #include #include #include @@ -197,27 +198,13 @@ int cms_set1_SignerIdentifier(CMS_SignerIdentifier *sid, X509 *cert, int type) { switch (type) { case CMS_SIGNERINFO_ISSUER_SERIAL: - sid->d.issuerAndSerialNumber = - M_ASN1_new_of(CMS_IssuerAndSerialNumber); - if (!sid->d.issuerAndSerialNumber) - goto merr; - if (!X509_NAME_set(&sid->d.issuerAndSerialNumber->issuer, - X509_get_issuer_name(cert))) - goto merr; - if (!ASN1_STRING_copy(sid->d.issuerAndSerialNumber->serialNumber, - X509_get_serialNumber(cert))) - goto merr; + if (!cms_set1_ias(&sid->d.issuerAndSerialNumber, cert)) + return 0; break; case CMS_SIGNERINFO_KEYIDENTIFIER: - if (!cert->skid) { - CMSerr(CMS_F_CMS_SET1_SIGNERIDENTIFIER, - CMS_R_CERTIFICATE_HAS_NO_KEYID); + if (!cms_set1_keyid(&sid->d.subjectKeyIdentifier, cert)) return 0; - } - sid->d.subjectKeyIdentifier = ASN1_STRING_dup(cert->skid); - if (!sid->d.subjectKeyIdentifier) - goto merr; break; default: @@ -228,11 +215,6 @@ int cms_set1_SignerIdentifier(CMS_SignerIdentifier *sid, X509 *cert, int type) sid->type = type; return 1; - - merr: - CMSerr(CMS_F_CMS_SET1_SIGNERIDENTIFIER, ERR_R_MALLOC_FAILURE); - return 0; - } int cms_SignerIdentifier_get0_signer_id(CMS_SignerIdentifier *sid, @@ -255,23 +237,32 @@ int cms_SignerIdentifier_get0_signer_id(CMS_SignerIdentifier *sid, int cms_SignerIdentifier_cert_cmp(CMS_SignerIdentifier *sid, X509 *cert) { - int ret; - if (sid->type == CMS_SIGNERINFO_ISSUER_SERIAL) { - ret = X509_NAME_cmp(sid->d.issuerAndSerialNumber->issuer, - X509_get_issuer_name(cert)); - if (ret) - return ret; - return ASN1_INTEGER_cmp(sid->d.issuerAndSerialNumber->serialNumber, - X509_get_serialNumber(cert)); - } else if (sid->type == CMS_SIGNERINFO_KEYIDENTIFIER) { - X509_check_purpose(cert, -1, -1); - if (!cert->skid) - return -1; - return ASN1_OCTET_STRING_cmp(sid->d.subjectKeyIdentifier, cert->skid); - } else + if (sid->type == CMS_SIGNERINFO_ISSUER_SERIAL) + return cms_ias_cert_cmp(sid->d.issuerAndSerialNumber, cert); + else if (sid->type == CMS_SIGNERINFO_KEYIDENTIFIER) + return cms_keyid_cert_cmp(sid->d.subjectKeyIdentifier, cert); + else return -1; } +static int cms_sd_asn1_ctrl(CMS_SignerInfo *si, int cmd) +{ + EVP_PKEY *pkey = si->pkey; + int i; + if (!pkey->ameth || !pkey->ameth->pkey_ctrl) + return 1; + i = pkey->ameth->pkey_ctrl(pkey, ASN1_PKEY_CTRL_CMS_SIGN, cmd, si); + if (i == -2) { + CMSerr(CMS_F_CMS_SD_ASN1_CTRL, CMS_R_NOT_SUPPORTED_FOR_THIS_KEY_TYPE); + return 0; + } + if (i <= 0) { + CMSerr(CMS_F_CMS_SD_ASN1_CTRL, CMS_R_CTRL_FAILURE); + return 0; + } + return 1; +} + CMS_SignerInfo *CMS_add1_signer(CMS_ContentInfo *cms, X509 *signer, EVP_PKEY *pk, const EVP_MD *md, unsigned int flags) @@ -298,6 +289,8 @@ CMS_SignerInfo *CMS_add1_signer(CMS_ContentInfo *cms, si->pkey = pk; si->signer = signer; + EVP_MD_CTX_init(&si->mctx); + si->pctx = NULL; if (flags & CMS_USE_KEYID) { si->version = 3; @@ -350,19 +343,8 @@ CMS_SignerInfo *CMS_add1_signer(CMS_ContentInfo *cms, } } - if (pk->ameth && pk->ameth->pkey_ctrl) { - i = pk->ameth->pkey_ctrl(pk, ASN1_PKEY_CTRL_CMS_SIGN, 0, si); - if (i == -2) { - CMSerr(CMS_F_CMS_ADD1_SIGNER, - CMS_R_NOT_SUPPORTED_FOR_THIS_KEY_TYPE); - goto err; - } - if (i <= 0) { - CMSerr(CMS_F_CMS_ADD1_SIGNER, CMS_R_CTRL_FAILURE); - goto err; - } - } - + if (!(flags & CMS_KEY_PARAM) && !cms_sd_asn1_ctrl(si, 0)) + goto err; if (!(flags & CMS_NOATTR)) { /* * Initialialize signed attributes strutucture so other attributes @@ -386,7 +368,8 @@ CMS_SignerInfo *CMS_add1_signer(CMS_ContentInfo *cms, if (flags & CMS_REUSE_DIGEST) { if (!cms_copy_messageDigest(cms, si)) goto err; - if (!(flags & CMS_PARTIAL) && !CMS_SignerInfo_sign(si)) + if (!(flags & (CMS_PARTIAL | CMS_KEY_PARAM)) && + !CMS_SignerInfo_sign(si)) goto err; } } @@ -397,6 +380,20 @@ CMS_SignerInfo *CMS_add1_signer(CMS_ContentInfo *cms, goto merr; } + if (flags & CMS_KEY_PARAM) { + if (flags & CMS_NOATTR) { + si->pctx = EVP_PKEY_CTX_new(si->pkey, NULL); + if (!si->pctx) + goto err; + if (EVP_PKEY_sign_init(si->pctx) <= 0) + goto err; + if (EVP_PKEY_CTX_set_signature_md(si->pctx, md) <= 0) + goto err; + } else if (EVP_DigestSignInit(&si->mctx, &si->pctx, md, NULL, pk) <= + 0) + goto err; + } + if (!sd->signerInfos) sd->signerInfos = sk_CMS_SignerInfo_new_null(); if (!sd->signerInfos || !sk_CMS_SignerInfo_push(sd->signerInfos, si)) @@ -443,6 +440,16 @@ static int cms_add1_signingTime(CMS_SignerInfo *si, ASN1_TIME *t) } +EVP_PKEY_CTX *CMS_SignerInfo_get0_pkey_ctx(CMS_SignerInfo *si) +{ + return si->pctx; +} + +EVP_MD_CTX *CMS_SignerInfo_get0_md_ctx(CMS_SignerInfo *si) +{ + return &si->mctx; +} + STACK_OF(CMS_SignerInfo) *CMS_get0_SignerInfos(CMS_ContentInfo *cms) { CMS_SignedData *sd; @@ -561,11 +568,17 @@ void CMS_SignerInfo_get0_algs(CMS_SignerInfo *si, EVP_PKEY **pk, *psig = si->signatureAlgorithm; } +ASN1_OCTET_STRING *CMS_SignerInfo_get0_signature(CMS_SignerInfo *si) +{ + return si->signature; +} + static int cms_SignerInfo_content_sign(CMS_ContentInfo *cms, CMS_SignerInfo *si, BIO *chain) { EVP_MD_CTX mctx; int r = 0; + EVP_PKEY_CTX *pctx = NULL; EVP_MD_CTX_init(&mctx); if (!si->pkey) { @@ -575,6 +588,9 @@ static int cms_SignerInfo_content_sign(CMS_ContentInfo *cms, if (!cms_DigestAlgorithm_find_ctx(&mctx, chain, si->digestAlgorithm)) goto err; + /* Set SignerInfo algortihm details if we used custom parametsr */ + if (si->pctx && !cms_sd_asn1_ctrl(si, 0)) + goto err; /* * If any signed attributes calculate and add messageDigest attribute @@ -596,6 +612,23 @@ static int cms_SignerInfo_content_sign(CMS_ContentInfo *cms, goto err; if (!CMS_SignerInfo_sign(si)) goto err; + } else if (si->pctx) { + unsigned char *sig; + size_t siglen; + unsigned char md[EVP_MAX_MD_SIZE]; + unsigned int mdlen; + pctx = si->pctx; + if (!EVP_DigestFinal_ex(&mctx, md, &mdlen)) + goto err; + siglen = EVP_PKEY_size(si->pkey); + sig = OPENSSL_malloc(siglen); + if (!sig) { + CMSerr(CMS_F_CMS_SIGNERINFO_CONTENT_SIGN, ERR_R_MALLOC_FAILURE); + goto err; + } + if (EVP_PKEY_sign(pctx, sig, &siglen, md, mdlen) <= 0) + goto err; + ASN1_STRING_set0(si->signature, sig, siglen); } else { unsigned char *sig; unsigned int siglen; @@ -616,6 +649,8 @@ static int cms_SignerInfo_content_sign(CMS_ContentInfo *cms, err: EVP_MD_CTX_cleanup(&mctx); + if (pctx) + EVP_PKEY_CTX_free(pctx); return r; } @@ -637,7 +672,7 @@ int cms_SignedData_final(CMS_ContentInfo *cms, BIO *chain) int CMS_SignerInfo_sign(CMS_SignerInfo *si) { - EVP_MD_CTX mctx; + EVP_MD_CTX *mctx = &si->mctx; EVP_PKEY_CTX *pctx; unsigned char *abuf = NULL; int alen; @@ -648,15 +683,18 @@ int CMS_SignerInfo_sign(CMS_SignerInfo *si) if (md == NULL) return 0; - EVP_MD_CTX_init(&mctx); - if (CMS_signed_get_attr_by_NID(si, NID_pkcs9_signingTime, -1) < 0) { if (!cms_add1_signingTime(si, NULL)) goto err; } - if (EVP_DigestSignInit(&mctx, &pctx, md, NULL, si->pkey) <= 0) - goto err; + if (si->pctx) + pctx = si->pctx; + else { + EVP_MD_CTX_init(mctx); + if (EVP_DigestSignInit(mctx, &pctx, md, NULL, si->pkey) <= 0) + goto err; + } if (EVP_PKEY_CTX_ctrl(pctx, -1, EVP_PKEY_OP_SIGN, EVP_PKEY_CTRL_CMS_SIGN, 0, si) <= 0) { @@ -668,15 +706,15 @@ int CMS_SignerInfo_sign(CMS_SignerInfo *si) ASN1_ITEM_rptr(CMS_Attributes_Sign)); if (!abuf) goto err; - if (EVP_DigestSignUpdate(&mctx, abuf, alen) <= 0) + if (EVP_DigestSignUpdate(mctx, abuf, alen) <= 0) goto err; - if (EVP_DigestSignFinal(&mctx, NULL, &siglen) <= 0) + if (EVP_DigestSignFinal(mctx, NULL, &siglen) <= 0) goto err; OPENSSL_free(abuf); abuf = OPENSSL_malloc(siglen); if (!abuf) goto err; - if (EVP_DigestSignFinal(&mctx, abuf, &siglen) <= 0) + if (EVP_DigestSignFinal(mctx, abuf, &siglen) <= 0) goto err; if (EVP_PKEY_CTX_ctrl(pctx, -1, EVP_PKEY_OP_SIGN, @@ -685,7 +723,7 @@ int CMS_SignerInfo_sign(CMS_SignerInfo *si) goto err; } - EVP_MD_CTX_cleanup(&mctx); + EVP_MD_CTX_cleanup(mctx); ASN1_STRING_set0(si->signature, abuf, siglen); @@ -694,15 +732,14 @@ int CMS_SignerInfo_sign(CMS_SignerInfo *si) err: if (abuf) OPENSSL_free(abuf); - EVP_MD_CTX_cleanup(&mctx); + EVP_MD_CTX_cleanup(mctx); return 0; } int CMS_SignerInfo_verify(CMS_SignerInfo *si) { - EVP_MD_CTX mctx; - EVP_PKEY_CTX *pctx; + EVP_MD_CTX *mctx = &si->mctx; unsigned char *abuf = NULL; int alen, r = -1; const EVP_MD *md = NULL; @@ -715,26 +752,29 @@ int CMS_SignerInfo_verify(CMS_SignerInfo *si) md = EVP_get_digestbyobj(si->digestAlgorithm->algorithm); if (md == NULL) return -1; - EVP_MD_CTX_init(&mctx); - if (EVP_DigestVerifyInit(&mctx, &pctx, md, NULL, si->pkey) <= 0) + EVP_MD_CTX_init(mctx); + if (EVP_DigestVerifyInit(mctx, &si->pctx, md, NULL, si->pkey) <= 0) + goto err; + + if (!cms_sd_asn1_ctrl(si, 1)) goto err; alen = ASN1_item_i2d((ASN1_VALUE *)si->signedAttrs, &abuf, ASN1_ITEM_rptr(CMS_Attributes_Verify)); if (!abuf) goto err; - r = EVP_DigestVerifyUpdate(&mctx, abuf, alen); + r = EVP_DigestVerifyUpdate(mctx, abuf, alen); OPENSSL_free(abuf); if (r <= 0) { r = -1; goto err; } - r = EVP_DigestVerifyFinal(&mctx, + r = EVP_DigestVerifyFinal(mctx, si->signature->data, si->signature->length); if (r <= 0) CMSerr(CMS_F_CMS_SIGNERINFO_VERIFY, CMS_R_VERIFICATION_FAILURE); err: - EVP_MD_CTX_cleanup(&mctx); + EVP_MD_CTX_cleanup(mctx); return r; } @@ -773,7 +813,10 @@ int CMS_SignerInfo_verify_content(CMS_SignerInfo *si, BIO *chain) { ASN1_OCTET_STRING *os = NULL; EVP_MD_CTX mctx; + EVP_PKEY_CTX *pkctx = NULL; int r = -1; + unsigned char mval[EVP_MAX_MD_SIZE]; + unsigned int mlen; EVP_MD_CTX_init(&mctx); /* If we have any signed attributes look for messageDigest value */ if (CMS_signed_get_attr_count(si) >= 0) { @@ -790,16 +833,15 @@ int CMS_SignerInfo_verify_content(CMS_SignerInfo *si, BIO *chain) if (!cms_DigestAlgorithm_find_ctx(&mctx, chain, si->digestAlgorithm)) goto err; + if (EVP_DigestFinal_ex(&mctx, mval, &mlen) <= 0) { + CMSerr(CMS_F_CMS_SIGNERINFO_VERIFY_CONTENT, + CMS_R_UNABLE_TO_FINALIZE_CONTEXT); + goto err; + } + /* If messageDigest found compare it */ if (os) { - unsigned char mval[EVP_MAX_MD_SIZE]; - unsigned int mlen; - if (EVP_DigestFinal_ex(&mctx, mval, &mlen) <= 0) { - CMSerr(CMS_F_CMS_SIGNERINFO_VERIFY_CONTENT, - CMS_R_UNABLE_TO_FINALIZE_CONTEXT); - goto err; - } if (mlen != (unsigned int)os->length) { CMSerr(CMS_F_CMS_SIGNERINFO_VERIFY_CONTENT, CMS_R_MESSAGEDIGEST_ATTRIBUTE_WRONG_LENGTH); @@ -813,8 +855,17 @@ int CMS_SignerInfo_verify_content(CMS_SignerInfo *si, BIO *chain) } else r = 1; } else { - r = EVP_VerifyFinal(&mctx, si->signature->data, - si->signature->length, si->pkey); + const EVP_MD *md = EVP_MD_CTX_md(&mctx); + pkctx = EVP_PKEY_CTX_new(si->pkey, NULL); + if (EVP_PKEY_verify_init(pkctx) <= 0) + goto err; + if (EVP_PKEY_CTX_set_signature_md(pkctx, md) <= 0) + goto err; + si->pctx = pkctx; + if (!cms_sd_asn1_ctrl(si, 1)) + goto err; + r = EVP_PKEY_verify(pkctx, si->signature->data, + si->signature->length, mval, mlen); if (r <= 0) { CMSerr(CMS_F_CMS_SIGNERINFO_VERIFY_CONTENT, CMS_R_VERIFICATION_FAILURE); @@ -823,6 +874,8 @@ int CMS_SignerInfo_verify_content(CMS_SignerInfo *si, BIO *chain) } err: + if (pkctx) + EVP_PKEY_CTX_free(pkctx); EVP_MD_CTX_cleanup(&mctx); return r; diff --git a/deps/openssl/openssl/crypto/cms/cms_smime.c b/deps/openssl/openssl/crypto/cms/cms_smime.c index 8851603f6fb1db..8729e3f9c00499 100644 --- a/deps/openssl/openssl/crypto/cms/cms_smime.c +++ b/deps/openssl/openssl/crypto/cms/cms_smime.c @@ -59,6 +59,7 @@ #include #include #include "cms_lcl.h" +#include "asn1_locl.h" static int cms_copy_content(BIO *out, BIO *in, unsigned int flags) { @@ -567,25 +568,63 @@ CMS_ContentInfo *CMS_encrypt(STACK_OF(X509) *certs, BIO *data, return NULL; } +static int cms_kari_set1_pkey(CMS_ContentInfo *cms, CMS_RecipientInfo *ri, + EVP_PKEY *pk, X509 *cert) +{ + int i; + STACK_OF(CMS_RecipientEncryptedKey) *reks; + CMS_RecipientEncryptedKey *rek; + reks = CMS_RecipientInfo_kari_get0_reks(ri); + if (!cert) + return 0; + for (i = 0; i < sk_CMS_RecipientEncryptedKey_num(reks); i++) { + int rv; + rek = sk_CMS_RecipientEncryptedKey_value(reks, i); + if (CMS_RecipientEncryptedKey_cert_cmp(rek, cert)) + continue; + CMS_RecipientInfo_kari_set0_pkey(ri, pk); + rv = CMS_RecipientInfo_kari_decrypt(cms, ri, rek); + CMS_RecipientInfo_kari_set0_pkey(ri, NULL); + if (rv > 0) + return 1; + return -1; + } + return 0; +} + int CMS_decrypt_set1_pkey(CMS_ContentInfo *cms, EVP_PKEY *pk, X509 *cert) { STACK_OF(CMS_RecipientInfo) *ris; CMS_RecipientInfo *ri; - int i, r; - int debug = 0, ri_match = 0; + int i, r, ri_type; + int debug = 0, match_ri = 0; ris = CMS_get0_RecipientInfos(cms); if (ris) debug = cms->d.envelopedData->encryptedContentInfo->debug; + ri_type = cms_pkey_get_ri_type(pk); + if (ri_type == CMS_RECIPINFO_NONE) { + CMSerr(CMS_F_CMS_DECRYPT_SET1_PKEY, + CMS_R_NOT_SUPPORTED_FOR_THIS_KEY_TYPE); + return 0; + } + for (i = 0; i < sk_CMS_RecipientInfo_num(ris); i++) { ri = sk_CMS_RecipientInfo_value(ris, i); - if (CMS_RecipientInfo_type(ri) != CMS_RECIPINFO_TRANS) + if (CMS_RecipientInfo_type(ri) != ri_type) continue; - ri_match = 1; + match_ri = 1; + if (ri_type == CMS_RECIPINFO_AGREE) { + r = cms_kari_set1_pkey(cms, ri, pk, cert); + if (r > 0) + return 1; + if (r < 0) + return 0; + } /* * If we have a cert try matching RecipientInfo otherwise try them * all. */ - if (!cert || (CMS_RecipientInfo_ktri_cert_cmp(ri, cert) == 0)) { + else if (!cert || !CMS_RecipientInfo_ktri_cert_cmp(ri, cert)) { CMS_RecipientInfo_set0_pkey(ri, pk); r = CMS_RecipientInfo_decrypt(cms, ri); CMS_RecipientInfo_set0_pkey(ri, NULL); @@ -613,7 +652,7 @@ int CMS_decrypt_set1_pkey(CMS_ContentInfo *cms, EVP_PKEY *pk, X509 *cert) } } /* If no cert and not debugging always return success */ - if (ri_match && !cert && !debug) { + if (match_ri && !cert && !debug) { ERR_clear_error(); return 1; } diff --git a/deps/openssl/openssl/crypto/cryptlib.c b/deps/openssl/openssl/crypto/cryptlib.c index 6353bbe3bc2b66..98526d73dc29d0 100644 --- a/deps/openssl/openssl/crypto/cryptlib.c +++ b/deps/openssl/openssl/crypto/cryptlib.c @@ -653,7 +653,7 @@ const char *CRYPTO_get_lock_name(int type) defined(__x86_64) || defined(__x86_64__) || \ defined(_M_AMD64) || defined(_M_X64) -unsigned int OPENSSL_ia32cap_P[2]; +extern unsigned int OPENSSL_ia32cap_P[4]; unsigned long *OPENSSL_ia32cap_loc(void) { if (sizeof(long) == 4) @@ -663,6 +663,9 @@ unsigned long *OPENSSL_ia32cap_loc(void) * is 32-bit. */ OPENSSL_ia32cap_P[1] = 0; + + OPENSSL_ia32cap_P[2] = 0; + return (unsigned long *)OPENSSL_ia32cap_P; } @@ -676,7 +679,7 @@ typedef unsigned long long IA32CAP; void OPENSSL_cpuid_setup(void) { static int trigger = 0; - IA32CAP OPENSSL_ia32_cpuid(void); + IA32CAP OPENSSL_ia32_cpuid(unsigned int *); IA32CAP vec; char *env; @@ -694,9 +697,23 @@ void OPENSSL_cpuid_setup(void) vec = strtoul(env + off, NULL, 0); # endif if (off) - vec = OPENSSL_ia32_cpuid() & ~vec; + vec = OPENSSL_ia32_cpuid(OPENSSL_ia32cap_P) & ~vec; + else if (env[0] == ':') + vec = OPENSSL_ia32_cpuid(OPENSSL_ia32cap_P); + + OPENSSL_ia32cap_P[2] = 0; + if ((env = strchr(env, ':'))) { + unsigned int vecx; + env++; + off = (env[0] == '~') ? 1 : 0; + vecx = strtoul(env + off, NULL, 0); + if (off) + OPENSSL_ia32cap_P[2] &= ~vecx; + else + OPENSSL_ia32cap_P[2] = vecx; + } } else - vec = OPENSSL_ia32_cpuid(); + vec = OPENSSL_ia32_cpuid(OPENSSL_ia32cap_P); /* * |(1<<10) sets a reserved bit to signal that variable @@ -706,6 +723,8 @@ void OPENSSL_cpuid_setup(void) OPENSSL_ia32cap_P[0] = (unsigned int)vec | (1 << 10); OPENSSL_ia32cap_P[1] = (unsigned int)(vec >> 32); } +# else +unsigned int OPENSSL_ia32cap_P[4]; # endif #else @@ -859,8 +878,12 @@ void OPENSSL_showfatal(const char *fmta, ...) if ((h = GetStdHandle(STD_ERROR_HANDLE)) != NULL && GetFileType(h) != FILE_TYPE_UNKNOWN) { /* must be console application */ + int len; + DWORD out; + va_start(ap, fmta); - vfprintf(stderr, fmta, ap); + len = _vsnprintf((char *)buf, sizeof(buf), fmta, ap); + WriteFile(h, buf, len < 0 ? sizeof(buf) : (DWORD) len, &out, NULL); va_end(ap); return; } @@ -967,7 +990,9 @@ void OpenSSLDie(const char *file, int line, const char *assertion) /* * Win32 abort() customarily shows a dialog, but we just did that... */ +# if !defined(_WIN32_WCE) raise(SIGABRT); +# endif _exit(3); #endif } diff --git a/deps/openssl/openssl/crypto/crypto-lib.com b/deps/openssl/openssl/crypto/crypto-lib.com index a136f4b0b64b94..1423cac288d243 100644 --- a/deps/openssl/openssl/crypto/crypto-lib.com +++ b/deps/openssl/openssl/crypto/crypto-lib.com @@ -214,7 +214,7 @@ $! The contents of these variables are copied from the LIBOBJ variable in the $! corresponding Makefile from each corresponding subdirectory, with .o stripped $! and spaces replaced with commas. $ LIB_ = "cryptlib,mem,mem_dbg,cversion,ex_data,cpt_err,ebcdic,"+ - - "uid,o_time,o_str,o_dir,o_fips.c,o_init,fips_ers,mem_clr" + "uid,o_time,o_str,o_dir,o_fips,o_init,fips_ers,mem_clr" $ LIB_OBJECTS = "o_names,obj_dat,obj_lib,obj_err,obj_xref" $ LIB_MD2 = "md2_dgst,md2_one" $ LIB_MD4 = "md4_dgst,md4_one" @@ -231,17 +231,19 @@ $ LIB_DES = "set_key,ecb_enc,cbc_enc,"+ - "des_enc,fcrypt_b,"+ - "fcrypt,xcbc_enc,rpc_enc,cbc_cksm,"+ - "ede_cbcm_enc,des_old,des_old2,read2pwd" +$ LIB_AES = "aes_misc,aes_ecb,aes_cfb,aes_ofb,aes_ctr,aes_ige,aes_wrap,"+ - + "aes_core,aes_cbc" $ LIB_RC2 = "rc2_ecb,rc2_skey,rc2_cbc,rc2cfb64,rc2ofb64" $ LIB_RC4 = "rc4_enc,rc4_skey,rc4_utl" $ LIB_RC5 = "rc5_skey,rc5_ecb,rc5_enc,rc5cfb64,rc5ofb64" $ LIB_IDEA = "i_cbc,i_cfb64,i_ofb64,i_ecb,i_skey" $ LIB_BF = "bf_skey,bf_ecb,bf_enc,bf_cfb64,bf_ofb64" $ LIB_CAST = "c_skey,c_ecb,c_enc,c_cfb64,c_ofb64" -$ LIB_CAMELLIA = "cmll_ecb,cmll_ofb,cmll_cfb,cmll_ctr,cmll_utl,"+ - - "camellia,cmll_misc,cmll_cbc" +$ LIB_CAMELLIA = "cmll_ecb,cmll_ofb,cmll_cfb,cmll_ctr,"+ - + "cmll_utl,camellia,cmll_misc,cmll_cbc" $ LIB_SEED = "seed,seed_ecb,seed_cbc,seed_cfb,seed_ofb" $ LIB_MODES = "cbc128,ctr128,cts128,cfb128,ofb128,gcm128,"+ - - "ccm128,xts128" + "ccm128,xts128,wrap128" $ LIB_BN_ASM = "[.asm]vms.mar,vms-helper" $ IF F$TRNLNM("OPENSSL_NO_ASM") .OR. ARCH .NES. "VAX" THEN - LIB_BN_ASM = "bn_asm" @@ -263,8 +265,8 @@ $ LIB_DSA = "dsa_gen,dsa_key,dsa_lib,dsa_asn1,dsa_vrf,dsa_sign,"+ - "dsa_err,dsa_ossl,dsa_depr,dsa_ameth,dsa_pmeth,dsa_prn" $ LIB_ECDSA = "ecs_lib,ecs_asn1,ecs_ossl,ecs_sign,ecs_vrf,ecs_err" $ LIB_DH = "dh_asn1,dh_gen,dh_key,dh_lib,dh_check,dh_err,dh_depr,"+ - - "dh_ameth,dh_pmeth,dh_prn" -$ LIB_ECDH = "ech_lib,ech_ossl,ech_key,ech_err" + "dh_ameth,dh_pmeth,dh_prn,dh_rfc5114,dh_kdf" +$ LIB_ECDH = "ech_lib,ech_ossl,ech_key,ech_err,ech_kdf" $ LIB_DSO = "dso_dl,dso_dlfcn,dso_err,dso_lib,dso_null,"+ - "dso_openssl,dso_win32,dso_vms,dso_beos" $ LIB_ENGINE = "eng_err,eng_lib,eng_list,eng_init,eng_ctrl,"+ - @@ -272,9 +274,7 @@ $ LIB_ENGINE = "eng_err,eng_lib,eng_list,eng_init,eng_ctrl,"+ - "tb_rsa,tb_dsa,tb_ecdsa,tb_dh,tb_ecdh,tb_rand,tb_store,"+ - "tb_cipher,tb_digest,tb_pkmeth,tb_asnmth,"+ - "eng_openssl,eng_cnf,eng_dyn,eng_cryptodev,"+ - - "eng_rsax,eng_rdrand" -$ LIB_AES = "aes_misc,aes_ecb,aes_cfb,aes_ofb,aes_ctr,aes_ige,aes_wrap,"+ - - "aes_core,aes_cbc" + "eng_rdrand" $ LIB_BUFFER = "buffer,buf_str,buf_err" $ LIB_BIO = "bio_lib,bio_cb,bio_err,"+ - "bss_mem,bss_null,bss_fd,"+ - @@ -298,8 +298,8 @@ $ LIB_EVP_2 = "m_null,m_md2,m_md4,m_md5,m_sha,m_sha1,m_wp," + - "bio_md,bio_b64,bio_enc,evp_err,e_null,"+ - "c_all,c_allc,c_alld,evp_lib,bio_ok,"+- "evp_pkey,evp_pbe,p5_crpt,p5_crpt2" -$ LIB_EVP_3 = "e_old,pmeth_lib,pmeth_fn,pmeth_gn,m_sigver,evp_fips,"+ - - "e_aes_cbc_hmac_sha1,e_rc4_hmac_md5" +$ LIB_EVP_3 = "e_old,pmeth_lib,pmeth_fn,pmeth_gn,m_sigver,"+ - + "e_aes_cbc_hmac_sha1,e_aes_cbc_hmac_sha256,e_rc4_hmac_md5" $ LIB_ASN1 = "a_object,a_bitstr,a_utctm,a_gentm,a_time,a_int,a_octet,"+ - "a_print,a_type,a_set,a_dup,a_d2i_fp,a_i2d_fp,"+ - "a_enum,a_utf8,a_sign,a_digest,a_verify,a_mbstr,a_strex,"+ - @@ -326,7 +326,7 @@ $ LIB_X509V3 = "v3_bcons,v3_bitst,v3_conf,v3_extku,v3_ia5,v3_lib,"+ - "v3_int,v3_enum,v3_sxnet,v3_cpols,v3_crld,v3_purp,v3_info,"+ - "v3_ocsp,v3_akeya,v3_pmaps,v3_pcons,v3_ncons,v3_pcia,v3_pci,"+ - "pcy_cache,pcy_node,pcy_data,pcy_map,pcy_tree,pcy_lib,"+ - - "v3_asid,v3_addr" + "v3_asid,v3_addr,v3_scts" $ LIB_CONF = "conf_err,conf_lib,conf_api,conf_def,conf_mod,conf_mall,conf_sap" $ LIB_TXT_DB = "txt_db" $ LIB_PKCS7 = "pk7_asn1,pk7_lib,pkcs7err,pk7_doit,pk7_smime,pk7_attr,"+ - @@ -343,7 +343,7 @@ $ LIB_UI = "ui_err,ui_lib,ui_openssl,ui_util"+LIB_UI_COMPAT $ LIB_KRB5 = "krb5_asn" $ LIB_CMS = "cms_lib,cms_asn1,cms_att,cms_io,cms_smime,cms_err,"+ - "cms_sd,cms_dd,cms_cd,cms_env,cms_enc,cms_ess,"+ - - "cms_pwri" + "cms_pwri,cms_kari" $ LIB_PQUEUE = "pqueue" $ LIB_TS = "ts_err,ts_req_utils,ts_req_print,ts_rsp_utils,ts_rsp_print,"+ - "ts_rsp_sign,ts_rsp_verify,ts_verify_ctx,ts_lib,ts_conf,"+ - @@ -1141,7 +1141,7 @@ $ IF F$TYPE(USER_CCFLAGS) .NES. "" THEN CCEXTRAFLAGS = USER_CCFLAGS $ CCDISABLEWARNINGS = "" !!! "MAYLOSEDATA3" !!! "LONGLONGTYPE,LONGLONGSUFX,FOUNDCR" $ IF F$TYPE(USER_CCDISABLEWARNINGS) .NES. "" $ THEN -$ IF CCDISABLEWARNINGS .NES. "" THEN CCDISABLEWARNINGS = CCDISABLEWARNINGS + "," +$ IF CCDISABLEWARNINGS .NES. THEN CCDISABLEWARNINGS = CCDISABLEWARNINGS + "," $ CCDISABLEWARNINGS = CCDISABLEWARNINGS + USER_CCDISABLEWARNINGS $ ENDIF $! diff --git a/deps/openssl/openssl/crypto/cversion.c b/deps/openssl/openssl/crypto/cversion.c index 9e6f50d78182f3..bfff6995c12eef 100644 --- a/deps/openssl/openssl/crypto/cversion.c +++ b/deps/openssl/openssl/crypto/cversion.c @@ -68,7 +68,11 @@ const char *SSLeay_version(int t) return OPENSSL_VERSION_TEXT; if (t == SSLEAY_BUILT_ON) { #ifdef DATE +# ifdef OPENSSL_USE_BUILD_DATE return (DATE); +# else + return ("built on: reproducible build, date unspecified"); +# endif #else return ("built on: date not available"); #endif diff --git a/deps/openssl/openssl/crypto/des/Makefile b/deps/openssl/openssl/crypto/des/Makefile index a6e1001329a533..060c64795e2974 100644 --- a/deps/openssl/openssl/crypto/des/Makefile +++ b/deps/openssl/openssl/crypto/des/Makefile @@ -61,6 +61,8 @@ des: des.o cbc3_enc.o lib des_enc-sparc.S: asm/des_enc.m4 m4 -B 8192 asm/des_enc.m4 > des_enc-sparc.S +dest4-sparcv9.s: asm/dest4-sparcv9.pl + $(PERL) asm/dest4-sparcv9.pl $(CFLAGS) > $@ des-586.s: asm/des-586.pl ../perlasm/x86asm.pl ../perlasm/cbc.pl $(PERL) asm/des-586.pl $(PERLASM_SCHEME) $(CFLAGS) > $@ diff --git a/deps/openssl/openssl/crypto/des/asm/des-586.pl b/deps/openssl/openssl/crypto/des/asm/des-586.pl index 5b5f39cebd13ca..bd6a7dd6b76b02 100644 --- a/deps/openssl/openssl/crypto/des/asm/des-586.pl +++ b/deps/openssl/openssl/crypto/des/asm/des-586.pl @@ -25,6 +25,7 @@ # the folded loop is only 3% slower than unrolled, but >7 times smaller &public_label("DES_SPtrans"); +&static_label("des_sptrans"); &DES_encrypt_internal(); &DES_decrypt_internal(); @@ -158,7 +159,7 @@ sub DES_encrypt &call (&label("pic_point")); &set_label("pic_point"); &blindpop($trans); - &lea ($trans,&DWP(&label("DES_SPtrans")."-".&label("pic_point"),$trans)); + &lea ($trans,&DWP(&label("des_sptrans")."-".&label("pic_point"),$trans)); &mov( "ecx", &wparam(1) ); @@ -315,6 +316,7 @@ sub FP_new sub DES_SPtrans { &set_label("DES_SPtrans",64); + &set_label("des_sptrans"); &data_word(0x02080800, 0x00080000, 0x02000002, 0x02080802); &data_word(0x02000000, 0x00080802, 0x00080002, 0x02000002); &data_word(0x00080802, 0x02080800, 0x02080000, 0x00000802); diff --git a/deps/openssl/openssl/crypto/des/asm/des_enc.m4 b/deps/openssl/openssl/crypto/des/asm/des_enc.m4 index 328059547820c5..dda08e126dc6c4 100644 --- a/deps/openssl/openssl/crypto/des/asm/des_enc.m4 +++ b/deps/openssl/openssl/crypto/des/asm/des_enc.m4 @@ -46,6 +46,8 @@ .ident "des_enc.m4 2.1" .file "des_enc-sparc.S" +#include + #if defined(__SUNPRO_C) && defined(__sparcv9) # define ABI64 /* They've said -xarch=v9 at command line */ #elif defined(__GNUC__) && defined(__arch64__) diff --git a/deps/openssl/openssl/crypto/des/asm/dest4-sparcv9.pl b/deps/openssl/openssl/crypto/des/asm/dest4-sparcv9.pl new file mode 100644 index 00000000000000..1dc60243d4fbda --- /dev/null +++ b/deps/openssl/openssl/crypto/des/asm/dest4-sparcv9.pl @@ -0,0 +1,617 @@ +#!/usr/bin/env perl + +# ==================================================================== +# Written by David S. Miller and Andy Polyakov +# . The module is licensed under 2-clause BSD +# license. March 2013. All rights reserved. +# ==================================================================== + +###################################################################### +# DES for SPARC T4. +# +# As with other hardware-assisted ciphers CBC encrypt results [for +# aligned data] are virtually identical to critical path lengths: +# +# DES Triple-DES +# CBC encrypt 4.14/4.15(*) 11.7/11.7 +# CBC decrypt 1.77/4.11(**) 6.42/7.47 +# +# (*) numbers after slash are for +# misaligned data; +# (**) this is result for largest +# block size, unlike all other +# cases smaller blocks results +# are better[?]; + +$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; +push(@INC,"${dir}","${dir}../../perlasm"); +require "sparcv9_modes.pl"; + +&asm_init(@ARGV); + +$code.=<<___ if ($::abibits==64); +.register %g2,#scratch +.register %g3,#scratch +___ + +$code.=<<___; +.text +___ + +{ my ($inp,$out)=("%o0","%o1"); + +$code.=<<___; +.align 32 +.globl des_t4_key_expand +.type des_t4_key_expand,#function +des_t4_key_expand: + andcc $inp, 0x7, %g0 + alignaddr $inp, %g0, $inp + bz,pt %icc, 1f + ldd [$inp + 0x00], %f0 + ldd [$inp + 0x08], %f2 + faligndata %f0, %f2, %f0 +1: des_kexpand %f0, 0, %f0 + des_kexpand %f0, 1, %f2 + std %f0, [$out + 0x00] + des_kexpand %f2, 3, %f6 + std %f2, [$out + 0x08] + des_kexpand %f2, 2, %f4 + des_kexpand %f6, 3, %f10 + std %f6, [$out + 0x18] + des_kexpand %f6, 2, %f8 + std %f4, [$out + 0x10] + des_kexpand %f10, 3, %f14 + std %f10, [$out + 0x28] + des_kexpand %f10, 2, %f12 + std %f8, [$out + 0x20] + des_kexpand %f14, 1, %f16 + std %f14, [$out + 0x38] + des_kexpand %f16, 3, %f20 + std %f12, [$out + 0x30] + des_kexpand %f16, 2, %f18 + std %f16, [$out + 0x40] + des_kexpand %f20, 3, %f24 + std %f20, [$out + 0x50] + des_kexpand %f20, 2, %f22 + std %f18, [$out + 0x48] + des_kexpand %f24, 3, %f28 + std %f24, [$out + 0x60] + des_kexpand %f24, 2, %f26 + std %f22, [$out + 0x58] + des_kexpand %f28, 1, %f30 + std %f28, [$out + 0x70] + std %f26, [$out + 0x68] + retl + std %f30, [$out + 0x78] +.size des_t4_key_expand,.-des_t4_key_expand +___ +} +{ my ($inp,$out,$len,$key,$ivec) = map("%o$_",(0..4)); + my ($ileft,$iright,$omask) = map("%g$_",(1..3)); + +$code.=<<___; +.globl des_t4_cbc_encrypt +.align 32 +des_t4_cbc_encrypt: + cmp $len, 0 + be,pn $::size_t_cc, .Lcbc_abort + nop + ld [$ivec + 0], %f0 ! load ivec + ld [$ivec + 4], %f1 + + and $inp, 7, $ileft + andn $inp, 7, $inp + sll $ileft, 3, $ileft + mov 0xff, $omask + prefetch [$inp], 20 + prefetch [$inp + 63], 20 + sub %g0, $ileft, $iright + and $out, 7, %g4 + alignaddrl $out, %g0, $out + srl $omask, %g4, $omask + srlx $len, 3, $len + movrz %g4, 0, $omask + prefetch [$out], 22 + + ldd [$key + 0x00], %f4 ! load key schedule + ldd [$key + 0x08], %f6 + ldd [$key + 0x10], %f8 + ldd [$key + 0x18], %f10 + ldd [$key + 0x20], %f12 + ldd [$key + 0x28], %f14 + ldd [$key + 0x30], %f16 + ldd [$key + 0x38], %f18 + ldd [$key + 0x40], %f20 + ldd [$key + 0x48], %f22 + ldd [$key + 0x50], %f24 + ldd [$key + 0x58], %f26 + ldd [$key + 0x60], %f28 + ldd [$key + 0x68], %f30 + ldd [$key + 0x70], %f32 + ldd [$key + 0x78], %f34 + +.Ldes_cbc_enc_loop: + ldx [$inp + 0], %g4 + brz,pt $ileft, 4f + nop + + ldx [$inp + 8], %g5 + sllx %g4, $ileft, %g4 + srlx %g5, $iright, %g5 + or %g5, %g4, %g4 +4: + movxtod %g4, %f2 + prefetch [$inp + 8+63], 20 + add $inp, 8, $inp + fxor %f2, %f0, %f0 ! ^= ivec + prefetch [$out + 63], 22 + + des_ip %f0, %f0 + des_round %f4, %f6, %f0, %f0 + des_round %f8, %f10, %f0, %f0 + des_round %f12, %f14, %f0, %f0 + des_round %f16, %f18, %f0, %f0 + des_round %f20, %f22, %f0, %f0 + des_round %f24, %f26, %f0, %f0 + des_round %f28, %f30, %f0, %f0 + des_round %f32, %f34, %f0, %f0 + des_iip %f0, %f0 + + brnz,pn $omask, 2f + sub $len, 1, $len + + std %f0, [$out + 0] + brnz,pt $len, .Ldes_cbc_enc_loop + add $out, 8, $out + + st %f0, [$ivec + 0] ! write out ivec + retl + st %f1, [$ivec + 4] +.Lcbc_abort: + retl + nop + +.align 16 +2: ldxa [$inp]0x82, %g4 ! avoid read-after-write hazard + ! and ~4x deterioration + ! in inp==out case + faligndata %f0, %f0, %f2 ! handle unaligned output + + stda %f2, [$out + $omask]0xc0 ! partial store + add $out, 8, $out + orn %g0, $omask, $omask + stda %f2, [$out + $omask]0xc0 ! partial store + + brnz,pt $len, .Ldes_cbc_enc_loop+4 + orn %g0, $omask, $omask + + st %f0, [$ivec + 0] ! write out ivec + retl + st %f1, [$ivec + 4] +.type des_t4_cbc_encrypt,#function +.size des_t4_cbc_encrypt,.-des_t4_cbc_encrypt + +.globl des_t4_cbc_decrypt +.align 32 +des_t4_cbc_decrypt: + cmp $len, 0 + be,pn $::size_t_cc, .Lcbc_abort + nop + ld [$ivec + 0], %f2 ! load ivec + ld [$ivec + 4], %f3 + + and $inp, 7, $ileft + andn $inp, 7, $inp + sll $ileft, 3, $ileft + mov 0xff, $omask + prefetch [$inp], 20 + prefetch [$inp + 63], 20 + sub %g0, $ileft, $iright + and $out, 7, %g4 + alignaddrl $out, %g0, $out + srl $omask, %g4, $omask + srlx $len, 3, $len + movrz %g4, 0, $omask + prefetch [$out], 22 + + ldd [$key + 0x78], %f4 ! load key schedule + ldd [$key + 0x70], %f6 + ldd [$key + 0x68], %f8 + ldd [$key + 0x60], %f10 + ldd [$key + 0x58], %f12 + ldd [$key + 0x50], %f14 + ldd [$key + 0x48], %f16 + ldd [$key + 0x40], %f18 + ldd [$key + 0x38], %f20 + ldd [$key + 0x30], %f22 + ldd [$key + 0x28], %f24 + ldd [$key + 0x20], %f26 + ldd [$key + 0x18], %f28 + ldd [$key + 0x10], %f30 + ldd [$key + 0x08], %f32 + ldd [$key + 0x00], %f34 + +.Ldes_cbc_dec_loop: + ldx [$inp + 0], %g4 + brz,pt $ileft, 4f + nop + + ldx [$inp + 8], %g5 + sllx %g4, $ileft, %g4 + srlx %g5, $iright, %g5 + or %g5, %g4, %g4 +4: + movxtod %g4, %f0 + prefetch [$inp + 8+63], 20 + add $inp, 8, $inp + prefetch [$out + 63], 22 + + des_ip %f0, %f0 + des_round %f4, %f6, %f0, %f0 + des_round %f8, %f10, %f0, %f0 + des_round %f12, %f14, %f0, %f0 + des_round %f16, %f18, %f0, %f0 + des_round %f20, %f22, %f0, %f0 + des_round %f24, %f26, %f0, %f0 + des_round %f28, %f30, %f0, %f0 + des_round %f32, %f34, %f0, %f0 + des_iip %f0, %f0 + + fxor %f2, %f0, %f0 ! ^= ivec + movxtod %g4, %f2 + + brnz,pn $omask, 2f + sub $len, 1, $len + + std %f0, [$out + 0] + brnz,pt $len, .Ldes_cbc_dec_loop + add $out, 8, $out + + st %f2, [$ivec + 0] ! write out ivec + retl + st %f3, [$ivec + 4] + +.align 16 +2: ldxa [$inp]0x82, %g4 ! avoid read-after-write hazard + ! and ~4x deterioration + ! in inp==out case + faligndata %f0, %f0, %f0 ! handle unaligned output + + stda %f0, [$out + $omask]0xc0 ! partial store + add $out, 8, $out + orn %g0, $omask, $omask + stda %f0, [$out + $omask]0xc0 ! partial store + + brnz,pt $len, .Ldes_cbc_dec_loop+4 + orn %g0, $omask, $omask + + st %f2, [$ivec + 0] ! write out ivec + retl + st %f3, [$ivec + 4] +.type des_t4_cbc_decrypt,#function +.size des_t4_cbc_decrypt,.-des_t4_cbc_decrypt +___ + +# One might wonder why does one have back-to-back des_iip/des_ip +# pairs between EDE passes. Indeed, aren't they inverse of each other? +# They almost are. Outcome of the pair is 32-bit words being swapped +# in target register. Consider pair of des_iip/des_ip as a way to +# perform the due swap, it's actually fastest way in this case. + +$code.=<<___; +.globl des_t4_ede3_cbc_encrypt +.align 32 +des_t4_ede3_cbc_encrypt: + cmp $len, 0 + be,pn $::size_t_cc, .Lcbc_abort + nop + ld [$ivec + 0], %f0 ! load ivec + ld [$ivec + 4], %f1 + + and $inp, 7, $ileft + andn $inp, 7, $inp + sll $ileft, 3, $ileft + mov 0xff, $omask + prefetch [$inp], 20 + prefetch [$inp + 63], 20 + sub %g0, $ileft, $iright + and $out, 7, %g4 + alignaddrl $out, %g0, $out + srl $omask, %g4, $omask + srlx $len, 3, $len + movrz %g4, 0, $omask + prefetch [$out], 22 + + ldd [$key + 0x00], %f4 ! load key schedule + ldd [$key + 0x08], %f6 + ldd [$key + 0x10], %f8 + ldd [$key + 0x18], %f10 + ldd [$key + 0x20], %f12 + ldd [$key + 0x28], %f14 + ldd [$key + 0x30], %f16 + ldd [$key + 0x38], %f18 + ldd [$key + 0x40], %f20 + ldd [$key + 0x48], %f22 + ldd [$key + 0x50], %f24 + ldd [$key + 0x58], %f26 + ldd [$key + 0x60], %f28 + ldd [$key + 0x68], %f30 + ldd [$key + 0x70], %f32 + ldd [$key + 0x78], %f34 + +.Ldes_ede3_cbc_enc_loop: + ldx [$inp + 0], %g4 + brz,pt $ileft, 4f + nop + + ldx [$inp + 8], %g5 + sllx %g4, $ileft, %g4 + srlx %g5, $iright, %g5 + or %g5, %g4, %g4 +4: + movxtod %g4, %f2 + prefetch [$inp + 8+63], 20 + add $inp, 8, $inp + fxor %f2, %f0, %f0 ! ^= ivec + prefetch [$out + 63], 22 + + des_ip %f0, %f0 + des_round %f4, %f6, %f0, %f0 + des_round %f8, %f10, %f0, %f0 + des_round %f12, %f14, %f0, %f0 + des_round %f16, %f18, %f0, %f0 + ldd [$key + 0x100-0x08], %f36 + ldd [$key + 0x100-0x10], %f38 + des_round %f20, %f22, %f0, %f0 + ldd [$key + 0x100-0x18], %f40 + ldd [$key + 0x100-0x20], %f42 + des_round %f24, %f26, %f0, %f0 + ldd [$key + 0x100-0x28], %f44 + ldd [$key + 0x100-0x30], %f46 + des_round %f28, %f30, %f0, %f0 + ldd [$key + 0x100-0x38], %f48 + ldd [$key + 0x100-0x40], %f50 + des_round %f32, %f34, %f0, %f0 + ldd [$key + 0x100-0x48], %f52 + ldd [$key + 0x100-0x50], %f54 + des_iip %f0, %f0 + + ldd [$key + 0x100-0x58], %f56 + ldd [$key + 0x100-0x60], %f58 + des_ip %f0, %f0 + ldd [$key + 0x100-0x68], %f60 + ldd [$key + 0x100-0x70], %f62 + des_round %f36, %f38, %f0, %f0 + ldd [$key + 0x100-0x78], %f36 + ldd [$key + 0x100-0x80], %f38 + des_round %f40, %f42, %f0, %f0 + des_round %f44, %f46, %f0, %f0 + des_round %f48, %f50, %f0, %f0 + ldd [$key + 0x100+0x00], %f40 + ldd [$key + 0x100+0x08], %f42 + des_round %f52, %f54, %f0, %f0 + ldd [$key + 0x100+0x10], %f44 + ldd [$key + 0x100+0x18], %f46 + des_round %f56, %f58, %f0, %f0 + ldd [$key + 0x100+0x20], %f48 + ldd [$key + 0x100+0x28], %f50 + des_round %f60, %f62, %f0, %f0 + ldd [$key + 0x100+0x30], %f52 + ldd [$key + 0x100+0x38], %f54 + des_round %f36, %f38, %f0, %f0 + ldd [$key + 0x100+0x40], %f56 + ldd [$key + 0x100+0x48], %f58 + des_iip %f0, %f0 + + ldd [$key + 0x100+0x50], %f60 + ldd [$key + 0x100+0x58], %f62 + des_ip %f0, %f0 + ldd [$key + 0x100+0x60], %f36 + ldd [$key + 0x100+0x68], %f38 + des_round %f40, %f42, %f0, %f0 + ldd [$key + 0x100+0x70], %f40 + ldd [$key + 0x100+0x78], %f42 + des_round %f44, %f46, %f0, %f0 + des_round %f48, %f50, %f0, %f0 + des_round %f52, %f54, %f0, %f0 + des_round %f56, %f58, %f0, %f0 + des_round %f60, %f62, %f0, %f0 + des_round %f36, %f38, %f0, %f0 + des_round %f40, %f42, %f0, %f0 + des_iip %f0, %f0 + + brnz,pn $omask, 2f + sub $len, 1, $len + + std %f0, [$out + 0] + brnz,pt $len, .Ldes_ede3_cbc_enc_loop + add $out, 8, $out + + st %f0, [$ivec + 0] ! write out ivec + retl + st %f1, [$ivec + 4] + +.align 16 +2: ldxa [$inp]0x82, %g4 ! avoid read-after-write hazard + ! and ~2x deterioration + ! in inp==out case + faligndata %f0, %f0, %f2 ! handle unaligned output + + stda %f2, [$out + $omask]0xc0 ! partial store + add $out, 8, $out + orn %g0, $omask, $omask + stda %f2, [$out + $omask]0xc0 ! partial store + + brnz,pt $len, .Ldes_ede3_cbc_enc_loop+4 + orn %g0, $omask, $omask + + st %f0, [$ivec + 0] ! write out ivec + retl + st %f1, [$ivec + 4] +.type des_t4_ede3_cbc_encrypt,#function +.size des_t4_ede3_cbc_encrypt,.-des_t4_ede3_cbc_encrypt + +.globl des_t4_ede3_cbc_decrypt +.align 32 +des_t4_ede3_cbc_decrypt: + cmp $len, 0 + be,pn $::size_t_cc, .Lcbc_abort + nop + ld [$ivec + 0], %f2 ! load ivec + ld [$ivec + 4], %f3 + + and $inp, 7, $ileft + andn $inp, 7, $inp + sll $ileft, 3, $ileft + mov 0xff, $omask + prefetch [$inp], 20 + prefetch [$inp + 63], 20 + sub %g0, $ileft, $iright + and $out, 7, %g4 + alignaddrl $out, %g0, $out + srl $omask, %g4, $omask + srlx $len, 3, $len + movrz %g4, 0, $omask + prefetch [$out], 22 + + ldd [$key + 0x100+0x78], %f4 ! load key schedule + ldd [$key + 0x100+0x70], %f6 + ldd [$key + 0x100+0x68], %f8 + ldd [$key + 0x100+0x60], %f10 + ldd [$key + 0x100+0x58], %f12 + ldd [$key + 0x100+0x50], %f14 + ldd [$key + 0x100+0x48], %f16 + ldd [$key + 0x100+0x40], %f18 + ldd [$key + 0x100+0x38], %f20 + ldd [$key + 0x100+0x30], %f22 + ldd [$key + 0x100+0x28], %f24 + ldd [$key + 0x100+0x20], %f26 + ldd [$key + 0x100+0x18], %f28 + ldd [$key + 0x100+0x10], %f30 + ldd [$key + 0x100+0x08], %f32 + ldd [$key + 0x100+0x00], %f34 + +.Ldes_ede3_cbc_dec_loop: + ldx [$inp + 0], %g4 + brz,pt $ileft, 4f + nop + + ldx [$inp + 8], %g5 + sllx %g4, $ileft, %g4 + srlx %g5, $iright, %g5 + or %g5, %g4, %g4 +4: + movxtod %g4, %f0 + prefetch [$inp + 8+63], 20 + add $inp, 8, $inp + prefetch [$out + 63], 22 + + des_ip %f0, %f0 + des_round %f4, %f6, %f0, %f0 + des_round %f8, %f10, %f0, %f0 + des_round %f12, %f14, %f0, %f0 + des_round %f16, %f18, %f0, %f0 + ldd [$key + 0x80+0x00], %f36 + ldd [$key + 0x80+0x08], %f38 + des_round %f20, %f22, %f0, %f0 + ldd [$key + 0x80+0x10], %f40 + ldd [$key + 0x80+0x18], %f42 + des_round %f24, %f26, %f0, %f0 + ldd [$key + 0x80+0x20], %f44 + ldd [$key + 0x80+0x28], %f46 + des_round %f28, %f30, %f0, %f0 + ldd [$key + 0x80+0x30], %f48 + ldd [$key + 0x80+0x38], %f50 + des_round %f32, %f34, %f0, %f0 + ldd [$key + 0x80+0x40], %f52 + ldd [$key + 0x80+0x48], %f54 + des_iip %f0, %f0 + + ldd [$key + 0x80+0x50], %f56 + ldd [$key + 0x80+0x58], %f58 + des_ip %f0, %f0 + ldd [$key + 0x80+0x60], %f60 + ldd [$key + 0x80+0x68], %f62 + des_round %f36, %f38, %f0, %f0 + ldd [$key + 0x80+0x70], %f36 + ldd [$key + 0x80+0x78], %f38 + des_round %f40, %f42, %f0, %f0 + des_round %f44, %f46, %f0, %f0 + des_round %f48, %f50, %f0, %f0 + ldd [$key + 0x80-0x08], %f40 + ldd [$key + 0x80-0x10], %f42 + des_round %f52, %f54, %f0, %f0 + ldd [$key + 0x80-0x18], %f44 + ldd [$key + 0x80-0x20], %f46 + des_round %f56, %f58, %f0, %f0 + ldd [$key + 0x80-0x28], %f48 + ldd [$key + 0x80-0x30], %f50 + des_round %f60, %f62, %f0, %f0 + ldd [$key + 0x80-0x38], %f52 + ldd [$key + 0x80-0x40], %f54 + des_round %f36, %f38, %f0, %f0 + ldd [$key + 0x80-0x48], %f56 + ldd [$key + 0x80-0x50], %f58 + des_iip %f0, %f0 + + ldd [$key + 0x80-0x58], %f60 + ldd [$key + 0x80-0x60], %f62 + des_ip %f0, %f0 + ldd [$key + 0x80-0x68], %f36 + ldd [$key + 0x80-0x70], %f38 + des_round %f40, %f42, %f0, %f0 + ldd [$key + 0x80-0x78], %f40 + ldd [$key + 0x80-0x80], %f42 + des_round %f44, %f46, %f0, %f0 + des_round %f48, %f50, %f0, %f0 + des_round %f52, %f54, %f0, %f0 + des_round %f56, %f58, %f0, %f0 + des_round %f60, %f62, %f0, %f0 + des_round %f36, %f38, %f0, %f0 + des_round %f40, %f42, %f0, %f0 + des_iip %f0, %f0 + + fxor %f2, %f0, %f0 ! ^= ivec + movxtod %g4, %f2 + + brnz,pn $omask, 2f + sub $len, 1, $len + + std %f0, [$out + 0] + brnz,pt $len, .Ldes_ede3_cbc_dec_loop + add $out, 8, $out + + st %f2, [$ivec + 0] ! write out ivec + retl + st %f3, [$ivec + 4] + +.align 16 +2: ldxa [$inp]0x82, %g4 ! avoid read-after-write hazard + ! and ~3x deterioration + ! in inp==out case + faligndata %f0, %f0, %f0 ! handle unaligned output + + stda %f0, [$out + $omask]0xc0 ! partial store + add $out, 8, $out + orn %g0, $omask, $omask + stda %f0, [$out + $omask]0xc0 ! partial store + + brnz,pt $len, .Ldes_ede3_cbc_dec_loop+4 + orn %g0, $omask, $omask + + st %f2, [$ivec + 0] ! write out ivec + retl + st %f3, [$ivec + 4] +.type des_t4_ede3_cbc_decrypt,#function +.size des_t4_ede3_cbc_decrypt,.-des_t4_ede3_cbc_decrypt +___ +} +$code.=<<___; +.asciz "DES for SPARC T4, David S. Miller, Andy Polyakov" +.align 4 +___ + +&emit_assembler(); + +close STDOUT; diff --git a/deps/openssl/openssl/crypto/des/des_locl.h b/deps/openssl/openssl/crypto/des/des_locl.h index 1a8e41dd29c7a2..23ea9d32a7be5f 100644 --- a/deps/openssl/openssl/crypto/des/des_locl.h +++ b/deps/openssl/openssl/crypto/des/des_locl.h @@ -162,8 +162,10 @@ } \ } -# if (defined(OPENSSL_SYS_WIN32) && defined(_MSC_VER)) || defined(__ICC) +# if (defined(OPENSSL_SYS_WIN32) && defined(_MSC_VER)) # define ROTATE(a,n) (_lrotr(a,n)) +# elif defined(__ICC) +# define ROTATE(a,n) (_rotr(a,n)) # elif defined(__GNUC__) && __GNUC__>=2 && !defined(__STRICT_ANSI__) && !defined(OPENSSL_NO_ASM) && !defined(OPENSSL_NO_INLINE_ASM) && !defined(PEDANTIC) # if defined(__i386) || defined(__i386__) || defined(__x86_64) || defined(__x86_64__) # define ROTATE(a,n) ({ register unsigned int ret; \ diff --git a/deps/openssl/openssl/crypto/des/read_pwd.c b/deps/openssl/openssl/crypto/des/read_pwd.c index 16ba0a9eb2a1f7..514a7063b4bf3d 100644 --- a/deps/openssl/openssl/crypto/des/read_pwd.c +++ b/deps/openssl/openssl/crypto/des/read_pwd.c @@ -172,7 +172,7 @@ # include #endif -#if defined(OPENSSL_SYS_MSDOS) && !defined(__CYGWIN32__) && !defined(OPENSSL_SYS_WINCE) +#if defined(OPENSSL_SYS_MSDOS) && !defined(OPENSSL_SYS_WINCE) # include # define fgets(a,b,c) noecho_fgets(a,b,c) #endif diff --git a/deps/openssl/openssl/crypto/dh/Makefile b/deps/openssl/openssl/crypto/dh/Makefile index f23b4f7fde84e8..f447907820c1a6 100644 --- a/deps/openssl/openssl/crypto/dh/Makefile +++ b/deps/openssl/openssl/crypto/dh/Makefile @@ -18,9 +18,9 @@ APPS= LIB=$(TOP)/libcrypto.a LIBSRC= dh_asn1.c dh_gen.c dh_key.c dh_lib.c dh_check.c dh_err.c dh_depr.c \ - dh_ameth.c dh_pmeth.c dh_prn.c + dh_ameth.c dh_pmeth.c dh_prn.c dh_rfc5114.c dh_kdf.c LIBOBJ= dh_asn1.o dh_gen.o dh_key.o dh_lib.o dh_check.o dh_err.o dh_depr.o \ - dh_ameth.o dh_pmeth.o dh_prn.o + dh_ameth.o dh_pmeth.o dh_prn.o dh_rfc5114.o dh_kdf.o SRC= $(LIBSRC) @@ -78,13 +78,13 @@ clean: dh_ameth.o: ../../e_os.h ../../include/openssl/asn1.h dh_ameth.o: ../../include/openssl/bio.h ../../include/openssl/bn.h -dh_ameth.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h -dh_ameth.o: ../../include/openssl/dh.h ../../include/openssl/e_os2.h -dh_ameth.o: ../../include/openssl/ec.h ../../include/openssl/ecdh.h -dh_ameth.o: ../../include/openssl/ecdsa.h ../../include/openssl/err.h -dh_ameth.o: ../../include/openssl/evp.h ../../include/openssl/lhash.h -dh_ameth.o: ../../include/openssl/obj_mac.h ../../include/openssl/objects.h -dh_ameth.o: ../../include/openssl/opensslconf.h +dh_ameth.o: ../../include/openssl/buffer.h ../../include/openssl/cms.h +dh_ameth.o: ../../include/openssl/crypto.h ../../include/openssl/dh.h +dh_ameth.o: ../../include/openssl/e_os2.h ../../include/openssl/ec.h +dh_ameth.o: ../../include/openssl/ecdh.h ../../include/openssl/ecdsa.h +dh_ameth.o: ../../include/openssl/err.h ../../include/openssl/evp.h +dh_ameth.o: ../../include/openssl/lhash.h ../../include/openssl/obj_mac.h +dh_ameth.o: ../../include/openssl/objects.h ../../include/openssl/opensslconf.h dh_ameth.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h dh_ameth.o: ../../include/openssl/pkcs7.h ../../include/openssl/safestack.h dh_ameth.o: ../../include/openssl/sha.h ../../include/openssl/stack.h @@ -132,6 +132,19 @@ dh_gen.o: ../../include/openssl/opensslconf.h ../../include/openssl/opensslv.h dh_gen.o: ../../include/openssl/ossl_typ.h ../../include/openssl/safestack.h dh_gen.o: ../../include/openssl/stack.h ../../include/openssl/symhacks.h dh_gen.o: ../cryptlib.h dh_gen.c +dh_kdf.o: ../../include/openssl/asn1.h ../../include/openssl/bio.h +dh_kdf.o: ../../include/openssl/buffer.h ../../include/openssl/cms.h +dh_kdf.o: ../../include/openssl/crypto.h ../../include/openssl/dh.h +dh_kdf.o: ../../include/openssl/e_os2.h ../../include/openssl/ec.h +dh_kdf.o: ../../include/openssl/ecdh.h ../../include/openssl/ecdsa.h +dh_kdf.o: ../../include/openssl/evp.h ../../include/openssl/lhash.h +dh_kdf.o: ../../include/openssl/obj_mac.h ../../include/openssl/objects.h +dh_kdf.o: ../../include/openssl/opensslconf.h ../../include/openssl/opensslv.h +dh_kdf.o: ../../include/openssl/ossl_typ.h ../../include/openssl/pkcs7.h +dh_kdf.o: ../../include/openssl/safestack.h ../../include/openssl/sha.h +dh_kdf.o: ../../include/openssl/stack.h ../../include/openssl/symhacks.h +dh_kdf.o: ../../include/openssl/x509.h ../../include/openssl/x509_vfy.h +dh_kdf.o: dh_kdf.c dh_key.o: ../../e_os.h ../../include/openssl/bio.h ../../include/openssl/bn.h dh_key.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h dh_key.o: ../../include/openssl/dh.h ../../include/openssl/e_os2.h @@ -158,11 +171,12 @@ dh_pmeth.o: ../../e_os.h ../../include/openssl/asn1.h dh_pmeth.o: ../../include/openssl/asn1t.h ../../include/openssl/bio.h dh_pmeth.o: ../../include/openssl/bn.h ../../include/openssl/buffer.h dh_pmeth.o: ../../include/openssl/crypto.h ../../include/openssl/dh.h -dh_pmeth.o: ../../include/openssl/e_os2.h ../../include/openssl/ec.h -dh_pmeth.o: ../../include/openssl/ecdh.h ../../include/openssl/ecdsa.h -dh_pmeth.o: ../../include/openssl/err.h ../../include/openssl/evp.h -dh_pmeth.o: ../../include/openssl/lhash.h ../../include/openssl/obj_mac.h -dh_pmeth.o: ../../include/openssl/objects.h ../../include/openssl/opensslconf.h +dh_pmeth.o: ../../include/openssl/dsa.h ../../include/openssl/e_os2.h +dh_pmeth.o: ../../include/openssl/ec.h ../../include/openssl/ecdh.h +dh_pmeth.o: ../../include/openssl/ecdsa.h ../../include/openssl/err.h +dh_pmeth.o: ../../include/openssl/evp.h ../../include/openssl/lhash.h +dh_pmeth.o: ../../include/openssl/obj_mac.h ../../include/openssl/objects.h +dh_pmeth.o: ../../include/openssl/opensslconf.h dh_pmeth.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h dh_pmeth.o: ../../include/openssl/pkcs7.h ../../include/openssl/safestack.h dh_pmeth.o: ../../include/openssl/sha.h ../../include/openssl/stack.h @@ -178,3 +192,11 @@ dh_prn.o: ../../include/openssl/objects.h ../../include/openssl/opensslconf.h dh_prn.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h dh_prn.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h dh_prn.o: ../../include/openssl/symhacks.h ../cryptlib.h dh_prn.c +dh_rfc5114.o: ../../e_os.h ../../include/openssl/bio.h +dh_rfc5114.o: ../../include/openssl/bn.h ../../include/openssl/buffer.h +dh_rfc5114.o: ../../include/openssl/crypto.h ../../include/openssl/dh.h +dh_rfc5114.o: ../../include/openssl/e_os2.h ../../include/openssl/err.h +dh_rfc5114.o: ../../include/openssl/lhash.h ../../include/openssl/opensslconf.h +dh_rfc5114.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h +dh_rfc5114.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h +dh_rfc5114.o: ../../include/openssl/symhacks.h ../cryptlib.h dh_rfc5114.c diff --git a/deps/openssl/openssl/crypto/dh/dh.h b/deps/openssl/openssl/crypto/dh/dh.h index 4cbaa9784dba88..0502f1a9cc14dc 100644 --- a/deps/openssl/openssl/crypto/dh/dh.h +++ b/deps/openssl/openssl/crypto/dh/dh.h @@ -167,6 +167,9 @@ struct dh_st { # define DH_CHECK_P_NOT_SAFE_PRIME 0x02 # define DH_UNABLE_TO_CHECK_GENERATOR 0x04 # define DH_NOT_SUITABLE_GENERATOR 0x08 +# define DH_CHECK_Q_NOT_PRIME 0x10 +# define DH_CHECK_INVALID_Q_VALUE 0x20 +# define DH_CHECK_INVALID_J_VALUE 0x40 /* DH_check_pub_key error codes */ # define DH_CHECK_PUBKEY_TOO_SMALL 0x01 @@ -217,8 +220,11 @@ int DH_check(const DH *dh, int *codes); int DH_check_pub_key(const DH *dh, const BIGNUM *pub_key, int *codes); int DH_generate_key(DH *dh); int DH_compute_key(unsigned char *key, const BIGNUM *pub_key, DH *dh); +int DH_compute_key_padded(unsigned char *key, const BIGNUM *pub_key, DH *dh); DH *d2i_DHparams(DH **a, const unsigned char **pp, long length); int i2d_DHparams(const DH *a, unsigned char **pp); +DH *d2i_DHxparams(DH **a, const unsigned char **pp, long length); +int i2d_DHxparams(const DH *a, unsigned char **pp); # ifndef OPENSSL_NO_FP_API int DHparams_print_fp(FILE *fp, const DH *x); # endif @@ -228,16 +234,109 @@ int DHparams_print(BIO *bp, const DH *x); int DHparams_print(char *bp, const DH *x); # endif +/* RFC 5114 parameters */ +DH *DH_get_1024_160(void); +DH *DH_get_2048_224(void); +DH *DH_get_2048_256(void); + +/* RFC2631 KDF */ +int DH_KDF_X9_42(unsigned char *out, size_t outlen, + const unsigned char *Z, size_t Zlen, + ASN1_OBJECT *key_oid, + const unsigned char *ukm, size_t ukmlen, const EVP_MD *md); + # define EVP_PKEY_CTX_set_dh_paramgen_prime_len(ctx, len) \ EVP_PKEY_CTX_ctrl(ctx, EVP_PKEY_DH, EVP_PKEY_OP_PARAMGEN, \ EVP_PKEY_CTRL_DH_PARAMGEN_PRIME_LEN, len, NULL) +# define EVP_PKEY_CTX_set_dh_paramgen_subprime_len(ctx, len) \ + EVP_PKEY_CTX_ctrl(ctx, EVP_PKEY_DH, EVP_PKEY_OP_PARAMGEN, \ + EVP_PKEY_CTRL_DH_PARAMGEN_SUBPRIME_LEN, len, NULL) + +# define EVP_PKEY_CTX_set_dh_paramgen_type(ctx, typ) \ + EVP_PKEY_CTX_ctrl(ctx, EVP_PKEY_DH, EVP_PKEY_OP_PARAMGEN, \ + EVP_PKEY_CTRL_DH_PARAMGEN_TYPE, typ, NULL) + # define EVP_PKEY_CTX_set_dh_paramgen_generator(ctx, gen) \ EVP_PKEY_CTX_ctrl(ctx, EVP_PKEY_DH, EVP_PKEY_OP_PARAMGEN, \ EVP_PKEY_CTRL_DH_PARAMGEN_GENERATOR, gen, NULL) +# define EVP_PKEY_CTX_set_dh_rfc5114(ctx, gen) \ + EVP_PKEY_CTX_ctrl(ctx, EVP_PKEY_DHX, EVP_PKEY_OP_PARAMGEN, \ + EVP_PKEY_CTRL_DH_RFC5114, gen, NULL) + +# define EVP_PKEY_CTX_set_dhx_rfc5114(ctx, gen) \ + EVP_PKEY_CTX_ctrl(ctx, EVP_PKEY_DHX, EVP_PKEY_OP_PARAMGEN, \ + EVP_PKEY_CTRL_DH_RFC5114, gen, NULL) + +# define EVP_PKEY_CTX_set_dh_kdf_type(ctx, kdf) \ + EVP_PKEY_CTX_ctrl(ctx, EVP_PKEY_DHX, \ + EVP_PKEY_OP_DERIVE, \ + EVP_PKEY_CTRL_DH_KDF_TYPE, kdf, NULL) + +# define EVP_PKEY_CTX_get_dh_kdf_type(ctx) \ + EVP_PKEY_CTX_ctrl(ctx, EVP_PKEY_DHX, \ + EVP_PKEY_OP_DERIVE, \ + EVP_PKEY_CTRL_DH_KDF_TYPE, -2, NULL) + +# define EVP_PKEY_CTX_set0_dh_kdf_oid(ctx, oid) \ + EVP_PKEY_CTX_ctrl(ctx, EVP_PKEY_DHX, \ + EVP_PKEY_OP_DERIVE, \ + EVP_PKEY_CTRL_DH_KDF_OID, 0, (void *)oid) + +# define EVP_PKEY_CTX_get0_dh_kdf_oid(ctx, poid) \ + EVP_PKEY_CTX_ctrl(ctx, EVP_PKEY_DHX, \ + EVP_PKEY_OP_DERIVE, \ + EVP_PKEY_CTRL_GET_DH_KDF_OID, 0, (void *)poid) + +# define EVP_PKEY_CTX_set_dh_kdf_md(ctx, md) \ + EVP_PKEY_CTX_ctrl(ctx, EVP_PKEY_DHX, \ + EVP_PKEY_OP_DERIVE, \ + EVP_PKEY_CTRL_DH_KDF_MD, 0, (void *)md) + +# define EVP_PKEY_CTX_get_dh_kdf_md(ctx, pmd) \ + EVP_PKEY_CTX_ctrl(ctx, EVP_PKEY_DHX, \ + EVP_PKEY_OP_DERIVE, \ + EVP_PKEY_CTRL_GET_DH_KDF_MD, 0, (void *)pmd) + +# define EVP_PKEY_CTX_set_dh_kdf_outlen(ctx, len) \ + EVP_PKEY_CTX_ctrl(ctx, EVP_PKEY_DHX, \ + EVP_PKEY_OP_DERIVE, \ + EVP_PKEY_CTRL_DH_KDF_OUTLEN, len, NULL) + +# define EVP_PKEY_CTX_get_dh_kdf_outlen(ctx, plen) \ + EVP_PKEY_CTX_ctrl(ctx, EVP_PKEY_DHX, \ + EVP_PKEY_OP_DERIVE, \ + EVP_PKEY_CTRL_GET_DH_KDF_OUTLEN, 0, (void *)plen) + +# define EVP_PKEY_CTX_set0_dh_kdf_ukm(ctx, p, plen) \ + EVP_PKEY_CTX_ctrl(ctx, EVP_PKEY_DHX, \ + EVP_PKEY_OP_DERIVE, \ + EVP_PKEY_CTRL_DH_KDF_UKM, plen, (void *)p) + +# define EVP_PKEY_CTX_get0_dh_kdf_ukm(ctx, p) \ + EVP_PKEY_CTX_ctrl(ctx, EVP_PKEY_DHX, \ + EVP_PKEY_OP_DERIVE, \ + EVP_PKEY_CTRL_GET_DH_KDF_UKM, 0, (void *)p) + # define EVP_PKEY_CTRL_DH_PARAMGEN_PRIME_LEN (EVP_PKEY_ALG_CTRL + 1) # define EVP_PKEY_CTRL_DH_PARAMGEN_GENERATOR (EVP_PKEY_ALG_CTRL + 2) +# define EVP_PKEY_CTRL_DH_RFC5114 (EVP_PKEY_ALG_CTRL + 3) +# define EVP_PKEY_CTRL_DH_PARAMGEN_SUBPRIME_LEN (EVP_PKEY_ALG_CTRL + 4) +# define EVP_PKEY_CTRL_DH_PARAMGEN_TYPE (EVP_PKEY_ALG_CTRL + 5) +# define EVP_PKEY_CTRL_DH_KDF_TYPE (EVP_PKEY_ALG_CTRL + 6) +# define EVP_PKEY_CTRL_DH_KDF_MD (EVP_PKEY_ALG_CTRL + 7) +# define EVP_PKEY_CTRL_GET_DH_KDF_MD (EVP_PKEY_ALG_CTRL + 8) +# define EVP_PKEY_CTRL_DH_KDF_OUTLEN (EVP_PKEY_ALG_CTRL + 9) +# define EVP_PKEY_CTRL_GET_DH_KDF_OUTLEN (EVP_PKEY_ALG_CTRL + 10) +# define EVP_PKEY_CTRL_DH_KDF_UKM (EVP_PKEY_ALG_CTRL + 11) +# define EVP_PKEY_CTRL_GET_DH_KDF_UKM (EVP_PKEY_ALG_CTRL + 12) +# define EVP_PKEY_CTRL_DH_KDF_OID (EVP_PKEY_ALG_CTRL + 13) +# define EVP_PKEY_CTRL_GET_DH_KDF_OID (EVP_PKEY_ALG_CTRL + 14) + +/* KDF types */ +# define EVP_PKEY_DH_KDF_NONE 1 +# define EVP_PKEY_DH_KDF_X9_42 2 /* BEGIN ERROR CODES */ /* @@ -252,6 +351,9 @@ void ERR_load_DH_strings(void); # define DH_F_COMPUTE_KEY 102 # define DH_F_DHPARAMS_PRINT_FP 101 # define DH_F_DH_BUILTIN_GENPARAMS 106 +# define DH_F_DH_CMS_DECRYPT 117 +# define DH_F_DH_CMS_SET_PEERKEY 118 +# define DH_F_DH_CMS_SET_SHARED_INFO 119 # define DH_F_DH_COMPUTE_KEY 114 # define DH_F_DH_GENERATE_KEY 115 # define DH_F_DH_GENERATE_PARAMETERS_EX 116 @@ -273,6 +375,7 @@ void ERR_load_DH_strings(void); # define DH_R_BN_ERROR 106 # define DH_R_DECODE_ERROR 104 # define DH_R_INVALID_PUBKEY 102 +# define DH_R_KDF_PARAMETER_ERROR 112 # define DH_R_KEYS_NOT_SET 108 # define DH_R_KEY_SIZE_TOO_SMALL 110 # define DH_R_MODULUS_TOO_LARGE 103 @@ -280,6 +383,8 @@ void ERR_load_DH_strings(void); # define DH_R_NO_PARAMETERS_SET 107 # define DH_R_NO_PRIVATE_VALUE 100 # define DH_R_PARAMETER_ENCODING_ERROR 105 +# define DH_R_PEER_KEY_ERROR 113 +# define DH_R_SHARED_INFO_ERROR 114 #ifdef __cplusplus } diff --git a/deps/openssl/openssl/crypto/dh/dh_ameth.c b/deps/openssl/openssl/crypto/dh/dh_ameth.c index 1dec109835817f..c6bfc2d3f45bcf 100644 --- a/deps/openssl/openssl/crypto/dh/dh_ameth.c +++ b/deps/openssl/openssl/crypto/dh/dh_ameth.c @@ -63,6 +63,31 @@ #include #include #include "asn1_locl.h" +#ifndef OPENSSL_NO_CMS +# include +#endif + +extern const EVP_PKEY_ASN1_METHOD dhx_asn1_meth; + +/* + * i2d/d2i like DH parameter functions which use the appropriate routine for + * PKCS#3 DH or X9.42 DH. + */ + +static DH *d2i_dhp(const EVP_PKEY *pkey, const unsigned char **pp, + long length) +{ + if (pkey->ameth == &dhx_asn1_meth) + return d2i_DHxparams(NULL, pp, length); + return d2i_DHparams(NULL, pp, length); +} + +static int i2d_dhp(const EVP_PKEY *pkey, const DH *a, unsigned char **pp) +{ + if (pkey->ameth == &dhx_asn1_meth) + return i2d_DHxparams(a, pp); + return i2d_DHparams(a, pp); +} static void int_dh_free(EVP_PKEY *pkey) { @@ -94,7 +119,7 @@ static int dh_pub_decode(EVP_PKEY *pkey, X509_PUBKEY *pubkey) pm = pstr->data; pmlen = pstr->length; - if (!(dh = d2i_DHparams(NULL, &pm, pmlen))) { + if (!(dh = d2i_dhp(pkey, &pm, pmlen))) { DHerr(DH_F_DH_PUB_DECODE, DH_R_DECODE_ERROR); goto err; } @@ -111,7 +136,7 @@ static int dh_pub_decode(EVP_PKEY *pkey, X509_PUBKEY *pubkey) } ASN1_INTEGER_free(public_key); - EVP_PKEY_assign_DH(pkey, dh); + EVP_PKEY_assign(pkey, pkey->ameth->pkey_id, dh); return 1; err: @@ -139,7 +164,7 @@ static int dh_pub_encode(X509_PUBKEY *pk, const EVP_PKEY *pkey) DHerr(DH_F_DH_PUB_ENCODE, ERR_R_MALLOC_FAILURE); goto err; } - str->length = i2d_DHparams(dh, &str->data); + str->length = i2d_dhp(pkey, dh, &str->data); if (str->length <= 0) { DHerr(DH_F_DH_PUB_ENCODE, ERR_R_MALLOC_FAILURE); goto err; @@ -159,7 +184,7 @@ static int dh_pub_encode(X509_PUBKEY *pk, const EVP_PKEY *pkey) goto err; } - if (X509_PUBKEY_set0_param(pk, OBJ_nid2obj(EVP_PKEY_DH), + if (X509_PUBKEY_set0_param(pk, OBJ_nid2obj(pkey->ameth->pkey_id), ptype, str, penc, penclen)) return 1; @@ -204,7 +229,7 @@ static int dh_priv_decode(EVP_PKEY *pkey, PKCS8_PRIV_KEY_INFO *p8) pstr = pval; pm = pstr->data; pmlen = pstr->length; - if (!(dh = d2i_DHparams(NULL, &pm, pmlen))) + if (!(dh = d2i_dhp(pkey, &pm, pmlen))) goto decerr; /* We have parameters now set private key */ if (!(dh->priv_key = ASN1_INTEGER_to_BN(privkey, NULL))) { @@ -215,7 +240,7 @@ static int dh_priv_decode(EVP_PKEY *pkey, PKCS8_PRIV_KEY_INFO *p8) if (!DH_generate_key(dh)) goto dherr; - EVP_PKEY_assign_DH(pkey, dh); + EVP_PKEY_assign(pkey, pkey->ameth->pkey_id, dh); ASN1_STRING_clear_free(privkey); @@ -243,7 +268,7 @@ static int dh_priv_encode(PKCS8_PRIV_KEY_INFO *p8, const EVP_PKEY *pkey) goto err; } - params->length = i2d_DHparams(pkey->pkey.dh, ¶ms->data); + params->length = i2d_dhp(pkey, pkey->pkey.dh, ¶ms->data); if (params->length <= 0) { DHerr(DH_F_DH_PRIV_ENCODE, ERR_R_MALLOC_FAILURE); goto err; @@ -263,7 +288,7 @@ static int dh_priv_encode(PKCS8_PRIV_KEY_INFO *p8, const EVP_PKEY *pkey) ASN1_STRING_clear_free(prkey); prkey = NULL; - if (!PKCS8_pkey_set0(p8, OBJ_nid2obj(NID_dhKeyAgreement), 0, + if (!PKCS8_pkey_set0(p8, OBJ_nid2obj(pkey->ameth->pkey_id), 0, V_ASN1_SEQUENCE, params, dp, dplen)) goto err; @@ -292,17 +317,17 @@ static int dh_param_decode(EVP_PKEY *pkey, const unsigned char **pder, int derlen) { DH *dh; - if (!(dh = d2i_DHparams(NULL, pder, derlen))) { + if (!(dh = d2i_dhp(pkey, pder, derlen))) { DHerr(DH_F_DH_PARAM_DECODE, ERR_R_DH_LIB); return 0; } - EVP_PKEY_assign_DH(pkey, dh); + EVP_PKEY_assign(pkey, pkey->ameth->pkey_id, dh); return 1; } static int dh_param_encode(const EVP_PKEY *pkey, unsigned char **pder) { - return i2d_DHparams(pkey->pkey.dh, pder); + return i2d_dhp(pkey, pkey->pkey.dh, pder); } static int do_dh_print(BIO *bp, const DH *x, int indent, @@ -334,15 +359,18 @@ static int do_dh_print(BIO *bp, const DH *x, int indent, } update_buflen(x->g, &buf_len); + update_buflen(x->q, &buf_len); + update_buflen(x->j, &buf_len); + update_buflen(x->counter, &buf_len); update_buflen(pub_key, &buf_len); update_buflen(priv_key, &buf_len); if (ptype == 2) - ktype = "PKCS#3 DH Private-Key"; + ktype = "DH Private-Key"; else if (ptype == 1) - ktype = "PKCS#3 DH Public-Key"; + ktype = "DH Public-Key"; else - ktype = "PKCS#3 DH Parameters"; + ktype = "DH Parameters"; m = OPENSSL_malloc(buf_len + 10); if (m == NULL) { @@ -364,6 +392,29 @@ static int do_dh_print(BIO *bp, const DH *x, int indent, goto err; if (!ASN1_bn_print(bp, "generator:", x->g, m, indent)) goto err; + if (x->q && !ASN1_bn_print(bp, "subgroup order:", x->q, m, indent)) + goto err; + if (x->j && !ASN1_bn_print(bp, "subgroup factor:", x->j, m, indent)) + goto err; + if (x->seed) { + int i; + BIO_indent(bp, indent, 128); + BIO_puts(bp, "seed:"); + for (i = 0; i < x->seedlen; i++) { + if ((i % 15) == 0) { + if (BIO_puts(bp, "\n") <= 0 + || !BIO_indent(bp, indent + 4, 128)) + goto err; + } + if (BIO_printf(bp, "%02x%s", x->seed[i], + ((i + 1) == x->seedlen) ? "" : ":") <= 0) + goto err; + } + if (BIO_write(bp, "\n", 1) <= 0) + return (0); + } + if (x->counter && !ASN1_bn_print(bp, "counter:", x->counter, m, indent)) + goto err; if (x->length != 0) { BIO_indent(bp, indent, 128); if (BIO_printf(bp, "recommended-private-length: %d bits\n", @@ -396,29 +447,76 @@ static int dh_cmp_parameters(const EVP_PKEY *a, const EVP_PKEY *b) if (BN_cmp(a->pkey.dh->p, b->pkey.dh->p) || BN_cmp(a->pkey.dh->g, b->pkey.dh->g)) return 0; - else - return 1; + else if (a->ameth == &dhx_asn1_meth) { + if (BN_cmp(a->pkey.dh->q, b->pkey.dh->q)) + return 0; + } + return 1; } -static int dh_copy_parameters(EVP_PKEY *to, const EVP_PKEY *from) +static int int_dh_bn_cpy(BIGNUM **dst, const BIGNUM *src) { BIGNUM *a; + if (src) { + a = BN_dup(src); + if (!a) + return 0; + } else + a = NULL; + if (*dst) + BN_free(*dst); + *dst = a; + return 1; +} - if ((a = BN_dup(from->pkey.dh->p)) == NULL) +static int int_dh_param_copy(DH *to, const DH *from, int is_x942) +{ + if (is_x942 == -1) + is_x942 = ! !from->q; + if (!int_dh_bn_cpy(&to->p, from->p)) return 0; - if (to->pkey.dh->p != NULL) - BN_free(to->pkey.dh->p); - to->pkey.dh->p = a; - - if ((a = BN_dup(from->pkey.dh->g)) == NULL) + if (!int_dh_bn_cpy(&to->g, from->g)) return 0; - if (to->pkey.dh->g != NULL) - BN_free(to->pkey.dh->g); - to->pkey.dh->g = a; - + if (is_x942) { + if (!int_dh_bn_cpy(&to->q, from->q)) + return 0; + if (!int_dh_bn_cpy(&to->j, from->j)) + return 0; + if (to->seed) { + OPENSSL_free(to->seed); + to->seed = NULL; + to->seedlen = 0; + } + if (from->seed) { + to->seed = BUF_memdup(from->seed, from->seedlen); + if (!to->seed) + return 0; + to->seedlen = from->seedlen; + } + } else + to->length = from->length; return 1; } +DH *DHparams_dup(DH *dh) +{ + DH *ret; + ret = DH_new(); + if (!ret) + return NULL; + if (!int_dh_param_copy(ret, dh, -1)) { + DH_free(ret); + return NULL; + } + return ret; +} + +static int dh_copy_parameters(EVP_PKEY *to, const EVP_PKEY *from) +{ + return int_dh_param_copy(to->pkey.dh, from->pkey.dh, + from->ameth == &dhx_asn1_meth); +} + static int dh_missing_parameters(const EVP_PKEY *a) { if (!a->pkey.dh->p || !a->pkey.dh->g) @@ -459,6 +557,33 @@ int DHparams_print(BIO *bp, const DH *x) return do_dh_print(bp, x, 4, NULL, 0); } +#ifndef OPENSSL_NO_CMS +static int dh_cms_decrypt(CMS_RecipientInfo *ri); +static int dh_cms_encrypt(CMS_RecipientInfo *ri); +#endif + +static int dh_pkey_ctrl(EVP_PKEY *pkey, int op, long arg1, void *arg2) +{ + switch (op) { +#ifndef OPENSSL_NO_CMS + + case ASN1_PKEY_CTRL_CMS_ENVELOPE: + if (arg1 == 1) + return dh_cms_decrypt(arg2); + else if (arg1 == 0) + return dh_cms_encrypt(arg2); + return -2; + + case ASN1_PKEY_CTRL_CMS_RI_TYPE: + *(int *)arg2 = CMS_RECIPINFO_AGREE; + return 1; +#endif + default: + return -2; + } + +} + const EVP_PKEY_ASN1_METHOD dh_asn1_meth = { EVP_PKEY_DH, EVP_PKEY_DH, @@ -490,3 +615,343 @@ const EVP_PKEY_ASN1_METHOD dh_asn1_meth = { int_dh_free, 0 }; + +const EVP_PKEY_ASN1_METHOD dhx_asn1_meth = { + EVP_PKEY_DHX, + EVP_PKEY_DHX, + 0, + + "X9.42 DH", + "OpenSSL X9.42 DH method", + + dh_pub_decode, + dh_pub_encode, + dh_pub_cmp, + dh_public_print, + + dh_priv_decode, + dh_priv_encode, + dh_private_print, + + int_dh_size, + dh_bits, + + dh_param_decode, + dh_param_encode, + dh_missing_parameters, + dh_copy_parameters, + dh_cmp_parameters, + dh_param_print, + 0, + + int_dh_free, + dh_pkey_ctrl +}; + +#ifndef OPENSSL_NO_CMS + +static int dh_cms_set_peerkey(EVP_PKEY_CTX *pctx, + X509_ALGOR *alg, ASN1_BIT_STRING *pubkey) +{ + ASN1_OBJECT *aoid; + int atype; + void *aval; + ASN1_INTEGER *public_key = NULL; + int rv = 0; + EVP_PKEY *pkpeer = NULL, *pk = NULL; + DH *dhpeer = NULL; + const unsigned char *p; + int plen; + + X509_ALGOR_get0(&aoid, &atype, &aval, alg); + if (OBJ_obj2nid(aoid) != NID_dhpublicnumber) + goto err; + /* Only absent parameters allowed in RFC XXXX */ + if (atype != V_ASN1_UNDEF && atype == V_ASN1_NULL) + goto err; + + pk = EVP_PKEY_CTX_get0_pkey(pctx); + if (!pk) + goto err; + if (pk->type != EVP_PKEY_DHX) + goto err; + /* Get parameters from parent key */ + dhpeer = DHparams_dup(pk->pkey.dh); + /* We have parameters now set public key */ + plen = ASN1_STRING_length(pubkey); + p = ASN1_STRING_data(pubkey); + if (!p || !plen) + goto err; + + if (!(public_key = d2i_ASN1_INTEGER(NULL, &p, plen))) { + DHerr(DH_F_DH_CMS_SET_PEERKEY, DH_R_DECODE_ERROR); + goto err; + } + + /* We have parameters now set public key */ + if (!(dhpeer->pub_key = ASN1_INTEGER_to_BN(public_key, NULL))) { + DHerr(DH_F_DH_CMS_SET_PEERKEY, DH_R_BN_DECODE_ERROR); + goto err; + } + + pkpeer = EVP_PKEY_new(); + if (!pkpeer) + goto err; + EVP_PKEY_assign(pkpeer, pk->ameth->pkey_id, dhpeer); + dhpeer = NULL; + if (EVP_PKEY_derive_set_peer(pctx, pkpeer) > 0) + rv = 1; + err: + if (public_key) + ASN1_INTEGER_free(public_key); + if (pkpeer) + EVP_PKEY_free(pkpeer); + if (dhpeer) + DH_free(dhpeer); + return rv; +} + +static int dh_cms_set_shared_info(EVP_PKEY_CTX *pctx, CMS_RecipientInfo *ri) +{ + int rv = 0; + + X509_ALGOR *alg, *kekalg = NULL; + ASN1_OCTET_STRING *ukm; + const unsigned char *p; + unsigned char *dukm = NULL; + size_t dukmlen = 0; + int keylen, plen; + const EVP_CIPHER *kekcipher; + EVP_CIPHER_CTX *kekctx; + + if (!CMS_RecipientInfo_kari_get0_alg(ri, &alg, &ukm)) + goto err; + + /* + * For DH we only have one OID permissible. If ever any more get defined + * we will need something cleverer. + */ + if (OBJ_obj2nid(alg->algorithm) != NID_id_smime_alg_ESDH) { + DHerr(DH_F_DH_CMS_SET_SHARED_INFO, DH_R_KDF_PARAMETER_ERROR); + goto err; + } + + if (EVP_PKEY_CTX_set_dh_kdf_type(pctx, EVP_PKEY_DH_KDF_X9_42) <= 0) + goto err; + + if (EVP_PKEY_CTX_set_dh_kdf_md(pctx, EVP_sha1()) <= 0) + goto err; + + if (alg->parameter->type != V_ASN1_SEQUENCE) + goto err; + + p = alg->parameter->value.sequence->data; + plen = alg->parameter->value.sequence->length; + kekalg = d2i_X509_ALGOR(NULL, &p, plen); + if (!kekalg) + goto err; + kekctx = CMS_RecipientInfo_kari_get0_ctx(ri); + if (!kekctx) + goto err; + kekcipher = EVP_get_cipherbyobj(kekalg->algorithm); + if (!kekcipher || EVP_CIPHER_mode(kekcipher) != EVP_CIPH_WRAP_MODE) + goto err; + if (!EVP_EncryptInit_ex(kekctx, kekcipher, NULL, NULL, NULL)) + goto err; + if (EVP_CIPHER_asn1_to_param(kekctx, kekalg->parameter) <= 0) + goto err; + + keylen = EVP_CIPHER_CTX_key_length(kekctx); + if (EVP_PKEY_CTX_set_dh_kdf_outlen(pctx, keylen) <= 0) + goto err; + /* Use OBJ_nid2obj to ensure we use built in OID that isn't freed */ + if (EVP_PKEY_CTX_set0_dh_kdf_oid(pctx, + OBJ_nid2obj(EVP_CIPHER_type(kekcipher))) + <= 0) + goto err; + + if (ukm) { + dukmlen = ASN1_STRING_length(ukm); + dukm = BUF_memdup(ASN1_STRING_data(ukm), dukmlen); + if (!dukm) + goto err; + } + + if (EVP_PKEY_CTX_set0_dh_kdf_ukm(pctx, dukm, dukmlen) <= 0) + goto err; + dukm = NULL; + + rv = 1; + err: + if (kekalg) + X509_ALGOR_free(kekalg); + if (dukm) + OPENSSL_free(dukm); + return rv; +} + +static int dh_cms_decrypt(CMS_RecipientInfo *ri) +{ + EVP_PKEY_CTX *pctx; + pctx = CMS_RecipientInfo_get0_pkey_ctx(ri); + if (!pctx) + return 0; + /* See if we need to set peer key */ + if (!EVP_PKEY_CTX_get0_peerkey(pctx)) { + X509_ALGOR *alg; + ASN1_BIT_STRING *pubkey; + if (!CMS_RecipientInfo_kari_get0_orig_id(ri, &alg, &pubkey, + NULL, NULL, NULL)) + return 0; + if (!alg || !pubkey) + return 0; + if (!dh_cms_set_peerkey(pctx, alg, pubkey)) { + DHerr(DH_F_DH_CMS_DECRYPT, DH_R_PEER_KEY_ERROR); + return 0; + } + } + /* Set DH derivation parameters and initialise unwrap context */ + if (!dh_cms_set_shared_info(pctx, ri)) { + DHerr(DH_F_DH_CMS_DECRYPT, DH_R_SHARED_INFO_ERROR); + return 0; + } + return 1; +} + +static int dh_cms_encrypt(CMS_RecipientInfo *ri) +{ + EVP_PKEY_CTX *pctx; + EVP_PKEY *pkey; + EVP_CIPHER_CTX *ctx; + int keylen; + X509_ALGOR *talg, *wrap_alg = NULL; + ASN1_OBJECT *aoid; + ASN1_BIT_STRING *pubkey; + ASN1_STRING *wrap_str; + ASN1_OCTET_STRING *ukm; + unsigned char *penc = NULL, *dukm = NULL; + int penclen; + size_t dukmlen = 0; + int rv = 0; + int kdf_type, wrap_nid; + const EVP_MD *kdf_md; + pctx = CMS_RecipientInfo_get0_pkey_ctx(ri); + if (!pctx) + return 0; + /* Get ephemeral key */ + pkey = EVP_PKEY_CTX_get0_pkey(pctx); + if (!CMS_RecipientInfo_kari_get0_orig_id(ri, &talg, &pubkey, + NULL, NULL, NULL)) + goto err; + X509_ALGOR_get0(&aoid, NULL, NULL, talg); + /* Is everything uninitialised? */ + if (aoid == OBJ_nid2obj(NID_undef)) { + ASN1_INTEGER *pubk; + pubk = BN_to_ASN1_INTEGER(pkey->pkey.dh->pub_key, NULL); + if (!pubk) + goto err; + /* Set the key */ + + penclen = i2d_ASN1_INTEGER(pubk, &penc); + ASN1_INTEGER_free(pubk); + if (penclen <= 0) + goto err; + ASN1_STRING_set0(pubkey, penc, penclen); + pubkey->flags &= ~(ASN1_STRING_FLAG_BITS_LEFT | 0x07); + pubkey->flags |= ASN1_STRING_FLAG_BITS_LEFT; + + penc = NULL; + X509_ALGOR_set0(talg, OBJ_nid2obj(NID_dhpublicnumber), + V_ASN1_UNDEF, NULL); + } + + /* See if custom paraneters set */ + kdf_type = EVP_PKEY_CTX_get_dh_kdf_type(pctx); + if (kdf_type <= 0) + goto err; + if (!EVP_PKEY_CTX_get_dh_kdf_md(pctx, &kdf_md)) + goto err; + + if (kdf_type == EVP_PKEY_DH_KDF_NONE) { + kdf_type = EVP_PKEY_DH_KDF_X9_42; + if (EVP_PKEY_CTX_set_dh_kdf_type(pctx, kdf_type) <= 0) + goto err; + } else if (kdf_type != EVP_PKEY_DH_KDF_X9_42) + /* Unknown KDF */ + goto err; + if (kdf_md == NULL) { + /* Only SHA1 supported */ + kdf_md = EVP_sha1(); + if (EVP_PKEY_CTX_set_dh_kdf_md(pctx, kdf_md) <= 0) + goto err; + } else if (EVP_MD_type(kdf_md) != NID_sha1) + /* Unsupported digest */ + goto err; + + if (!CMS_RecipientInfo_kari_get0_alg(ri, &talg, &ukm)) + goto err; + + /* Get wrap NID */ + ctx = CMS_RecipientInfo_kari_get0_ctx(ri); + wrap_nid = EVP_CIPHER_CTX_type(ctx); + if (EVP_PKEY_CTX_set0_dh_kdf_oid(pctx, OBJ_nid2obj(wrap_nid)) <= 0) + goto err; + keylen = EVP_CIPHER_CTX_key_length(ctx); + + /* Package wrap algorithm in an AlgorithmIdentifier */ + + wrap_alg = X509_ALGOR_new(); + if (!wrap_alg) + goto err; + wrap_alg->algorithm = OBJ_nid2obj(wrap_nid); + wrap_alg->parameter = ASN1_TYPE_new(); + if (!wrap_alg->parameter) + goto err; + if (EVP_CIPHER_param_to_asn1(ctx, wrap_alg->parameter) <= 0) + goto err; + if (ASN1_TYPE_get(wrap_alg->parameter) == NID_undef) { + ASN1_TYPE_free(wrap_alg->parameter); + wrap_alg->parameter = NULL; + } + + if (EVP_PKEY_CTX_set_dh_kdf_outlen(pctx, keylen) <= 0) + goto err; + + if (ukm) { + dukmlen = ASN1_STRING_length(ukm); + dukm = BUF_memdup(ASN1_STRING_data(ukm), dukmlen); + if (!dukm) + goto err; + } + + if (EVP_PKEY_CTX_set0_dh_kdf_ukm(pctx, dukm, dukmlen) <= 0) + goto err; + dukm = NULL; + + /* + * Now need to wrap encoding of wrap AlgorithmIdentifier into parameter + * of another AlgorithmIdentifier. + */ + penc = NULL; + penclen = i2d_X509_ALGOR(wrap_alg, &penc); + if (!penc || !penclen) + goto err; + wrap_str = ASN1_STRING_new(); + if (!wrap_str) + goto err; + ASN1_STRING_set0(wrap_str, penc, penclen); + penc = NULL; + X509_ALGOR_set0(talg, OBJ_nid2obj(NID_id_smime_alg_ESDH), + V_ASN1_SEQUENCE, wrap_str); + + rv = 1; + + err: + if (penc) + OPENSSL_free(penc); + if (wrap_alg) + X509_ALGOR_free(wrap_alg); + return rv; +} + +#endif diff --git a/deps/openssl/openssl/crypto/dh/dh_asn1.c b/deps/openssl/openssl/crypto/dh/dh_asn1.c index e6ee3cfc66cc13..f470214399b642 100644 --- a/deps/openssl/openssl/crypto/dh/dh_asn1.c +++ b/deps/openssl/openssl/crypto/dh/dh_asn1.c @@ -89,7 +89,101 @@ ASN1_SEQUENCE_cb(DHparams, dh_cb) = { IMPLEMENT_ASN1_ENCODE_FUNCTIONS_const_fname(DH, DHparams, DHparams) -DH *DHparams_dup(DH *dh) +/* + * Internal only structures for handling X9.42 DH: this gets translated to or + * from a DH structure straight away. + */ + +typedef struct { + ASN1_BIT_STRING *seed; + BIGNUM *counter; +} int_dhvparams; + +typedef struct { + BIGNUM *p; + BIGNUM *q; + BIGNUM *g; + BIGNUM *j; + int_dhvparams *vparams; +} int_dhx942_dh; + +ASN1_SEQUENCE(DHvparams) = { + ASN1_SIMPLE(int_dhvparams, seed, ASN1_BIT_STRING), + ASN1_SIMPLE(int_dhvparams, counter, BIGNUM) +} ASN1_SEQUENCE_END_name(int_dhvparams, DHvparams) + +ASN1_SEQUENCE(DHxparams) = { + ASN1_SIMPLE(int_dhx942_dh, p, BIGNUM), + ASN1_SIMPLE(int_dhx942_dh, g, BIGNUM), + ASN1_SIMPLE(int_dhx942_dh, q, BIGNUM), + ASN1_OPT(int_dhx942_dh, j, BIGNUM), + ASN1_OPT(int_dhx942_dh, vparams, DHvparams), +} ASN1_SEQUENCE_END_name(int_dhx942_dh, DHxparams) + +int_dhx942_dh *d2i_int_dhx(int_dhx942_dh **a, + const unsigned char **pp, long length); +int i2d_int_dhx(const int_dhx942_dh *a, unsigned char **pp); + +IMPLEMENT_ASN1_ENCODE_FUNCTIONS_const_fname(int_dhx942_dh, DHxparams, int_dhx) + +/* Application leve function: read in X9.42 DH parameters into DH structure */ + +DH *d2i_DHxparams(DH **a, const unsigned char **pp, long length) { - return ASN1_item_dup(ASN1_ITEM_rptr(DHparams), dh); + int_dhx942_dh *dhx = NULL; + DH *dh = NULL; + dh = DH_new(); + if (!dh) + return NULL; + dhx = d2i_int_dhx(NULL, pp, length); + if (!dhx) { + DH_free(dh); + return NULL; + } + + if (a) { + if (*a) + DH_free(*a); + *a = dh; + } + + dh->p = dhx->p; + dh->q = dhx->q; + dh->g = dhx->g; + dh->j = dhx->j; + + if (dhx->vparams) { + dh->seed = dhx->vparams->seed->data; + dh->seedlen = dhx->vparams->seed->length; + dh->counter = dhx->vparams->counter; + dhx->vparams->seed->data = NULL; + ASN1_BIT_STRING_free(dhx->vparams->seed); + OPENSSL_free(dhx->vparams); + dhx->vparams = NULL; + } + + OPENSSL_free(dhx); + return dh; +} + +int i2d_DHxparams(const DH *dh, unsigned char **pp) +{ + int_dhx942_dh dhx; + int_dhvparams dhv; + ASN1_BIT_STRING bs; + dhx.p = dh->p; + dhx.g = dh->g; + dhx.q = dh->q; + dhx.j = dh->j; + if (dh->counter && dh->seed && dh->seedlen > 0) { + bs.flags = ASN1_STRING_FLAG_BITS_LEFT; + bs.data = dh->seed; + bs.length = dh->seedlen; + dhv.seed = &bs; + dhv.counter = dh->counter; + dhx.vparams = &dhv; + } else + dhx.vparams = NULL; + + return i2d_int_dhx(&dhx, pp); } diff --git a/deps/openssl/openssl/crypto/dh/dh_check.c b/deps/openssl/openssl/crypto/dh/dh_check.c index c39ed97ba6275e..347467c6a43369 100644 --- a/deps/openssl/openssl/crypto/dh/dh_check.c +++ b/deps/openssl/openssl/crypto/dh/dh_check.c @@ -76,17 +76,43 @@ int DH_check(const DH *dh, int *ret) int ok = 0; BN_CTX *ctx = NULL; BN_ULONG l; - BIGNUM *q = NULL; + BIGNUM *t1 = NULL, *t2 = NULL; *ret = 0; ctx = BN_CTX_new(); if (ctx == NULL) goto err; - q = BN_new(); - if (q == NULL) + BN_CTX_start(ctx); + t1 = BN_CTX_get(ctx); + if (t1 == NULL) + goto err; + t2 = BN_CTX_get(ctx); + if (t2 == NULL) goto err; - if (BN_is_word(dh->g, DH_GENERATOR_2)) { + if (dh->q) { + if (BN_cmp(dh->g, BN_value_one()) <= 0) + *ret |= DH_NOT_SUITABLE_GENERATOR; + else if (BN_cmp(dh->g, dh->p) >= 0) + *ret |= DH_NOT_SUITABLE_GENERATOR; + else { + /* Check g^q == 1 mod p */ + if (!BN_mod_exp(t1, dh->g, dh->q, dh->p, ctx)) + goto err; + if (!BN_is_one(t1)) + *ret |= DH_NOT_SUITABLE_GENERATOR; + } + if (!BN_is_prime_ex(dh->q, BN_prime_checks, ctx, NULL)) + *ret |= DH_CHECK_Q_NOT_PRIME; + /* Check p == 1 mod q i.e. q divides p - 1 */ + if (!BN_div(t1, t2, dh->p, dh->q, ctx)) + goto err; + if (!BN_is_one(t2)) + *ret |= DH_CHECK_INVALID_Q_VALUE; + if (dh->j && BN_cmp(dh->j, t1)) + *ret |= DH_CHECK_INVALID_J_VALUE; + + } else if (BN_is_word(dh->g, DH_GENERATOR_2)) { l = BN_mod_word(dh->p, 24); if (l != 11) *ret |= DH_NOT_SUITABLE_GENERATOR; @@ -107,18 +133,18 @@ int DH_check(const DH *dh, int *ret) if (!BN_is_prime_ex(dh->p, BN_prime_checks, ctx, NULL)) *ret |= DH_CHECK_P_NOT_PRIME; - else { - if (!BN_rshift1(q, dh->p)) + else if (!dh->q) { + if (!BN_rshift1(t1, dh->p)) goto err; - if (!BN_is_prime_ex(q, BN_prime_checks, ctx, NULL)) + if (!BN_is_prime_ex(t1, BN_prime_checks, ctx, NULL)) *ret |= DH_CHECK_P_NOT_SAFE_PRIME; } ok = 1; err: - if (ctx != NULL) + if (ctx != NULL) { + BN_CTX_end(ctx); BN_CTX_free(ctx); - if (q != NULL) - BN_free(q); + } return (ok); } diff --git a/deps/openssl/openssl/crypto/dh/dh_err.c b/deps/openssl/openssl/crypto/dh/dh_err.c index 6ed5eb789016ed..b890cca81748c1 100644 --- a/deps/openssl/openssl/crypto/dh/dh_err.c +++ b/deps/openssl/openssl/crypto/dh/dh_err.c @@ -1,6 +1,6 @@ /* crypto/dh/dh_err.c */ /* ==================================================================== - * Copyright (c) 1999-2011 The OpenSSL Project. All rights reserved. + * Copyright (c) 1999-2013 The OpenSSL Project. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -73,6 +73,9 @@ static ERR_STRING_DATA DH_str_functs[] = { {ERR_FUNC(DH_F_COMPUTE_KEY), "COMPUTE_KEY"}, {ERR_FUNC(DH_F_DHPARAMS_PRINT_FP), "DHparams_print_fp"}, {ERR_FUNC(DH_F_DH_BUILTIN_GENPARAMS), "DH_BUILTIN_GENPARAMS"}, + {ERR_FUNC(DH_F_DH_CMS_DECRYPT), "DH_CMS_DECRYPT"}, + {ERR_FUNC(DH_F_DH_CMS_SET_PEERKEY), "DH_CMS_SET_PEERKEY"}, + {ERR_FUNC(DH_F_DH_CMS_SET_SHARED_INFO), "DH_CMS_SET_SHARED_INFO"}, {ERR_FUNC(DH_F_DH_COMPUTE_KEY), "DH_compute_key"}, {ERR_FUNC(DH_F_DH_GENERATE_KEY), "DH_generate_key"}, {ERR_FUNC(DH_F_DH_GENERATE_PARAMETERS_EX), "DH_generate_parameters_ex"}, @@ -96,6 +99,7 @@ static ERR_STRING_DATA DH_str_reasons[] = { {ERR_REASON(DH_R_BN_ERROR), "bn error"}, {ERR_REASON(DH_R_DECODE_ERROR), "decode error"}, {ERR_REASON(DH_R_INVALID_PUBKEY), "invalid public key"}, + {ERR_REASON(DH_R_KDF_PARAMETER_ERROR), "kdf parameter error"}, {ERR_REASON(DH_R_KEYS_NOT_SET), "keys not set"}, {ERR_REASON(DH_R_KEY_SIZE_TOO_SMALL), "key size too small"}, {ERR_REASON(DH_R_MODULUS_TOO_LARGE), "modulus too large"}, @@ -103,6 +107,8 @@ static ERR_STRING_DATA DH_str_reasons[] = { {ERR_REASON(DH_R_NO_PARAMETERS_SET), "no parameters set"}, {ERR_REASON(DH_R_NO_PRIVATE_VALUE), "no private value"}, {ERR_REASON(DH_R_PARAMETER_ENCODING_ERROR), "parameter encoding error"}, + {ERR_REASON(DH_R_PEER_KEY_ERROR), "peer key error"}, + {ERR_REASON(DH_R_SHARED_INFO_ERROR), "shared info error"}, {0, NULL} }; diff --git a/deps/openssl/openssl/crypto/dh/dh_kdf.c b/deps/openssl/openssl/crypto/dh/dh_kdf.c new file mode 100644 index 00000000000000..a882cb286e0eae --- /dev/null +++ b/deps/openssl/openssl/crypto/dh/dh_kdf.c @@ -0,0 +1,187 @@ +/* crypto/dh/dh_kdf.c */ +/* + * Written by Stephen Henson for the OpenSSL project. + */ +/* ==================================================================== + * Copyright (c) 2013 The OpenSSL Project. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgment: + * "This product includes software developed by the OpenSSL Project + * for use in the OpenSSL Toolkit. (http://www.openssl.org/)" + * + * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to + * endorse or promote products derived from this software without + * prior written permission. For written permission, please contact + * openssl-core@openssl.org. + * + * 5. Products derived from this software may not be called "OpenSSL" + * nor may "OpenSSL" appear in their names without prior written + * permission of the OpenSSL Project. + * + * 6. Redistributions of any form whatsoever must retain the following + * acknowledgment: + * "This product includes software developed by the OpenSSL Project + * for use in the OpenSSL Toolkit (http://www.openssl.org/)" + * + * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY + * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + * ==================================================================== + */ + +#include +#include +#include +#include +#include + +/* Key derivation from X9.42/RFC2631 */ + +#define DH_KDF_MAX (1L << 30) + +/* Skip past an ASN1 structure: for OBJECT skip content octets too */ + +static int skip_asn1(unsigned char **pp, long *plen, int exptag) +{ + const unsigned char *q = *pp; + int i, tag, xclass; + long tmplen; + i = ASN1_get_object(&q, &tmplen, &tag, &xclass, *plen); + if (i & 0x80) + return 0; + if (tag != exptag || xclass != V_ASN1_UNIVERSAL) + return 0; + if (tag == V_ASN1_OBJECT) + q += tmplen; + *plen -= q - *pp; + *pp = (unsigned char *)q; + return 1; +} + +/* + * Encode the DH shared info structure, return an offset to the counter value + * so we can update the structure without reencoding it. + */ + +static int dh_sharedinfo_encode(unsigned char **pder, unsigned char **pctr, + ASN1_OBJECT *key_oid, size_t outlen, + const unsigned char *ukm, size_t ukmlen) +{ + unsigned char *p; + int derlen; + long tlen; + /* "magic" value to check offset is sane */ + static unsigned char ctr[4] = { 0xF3, 0x17, 0x22, 0x53 }; + X509_ALGOR atmp; + ASN1_OCTET_STRING ctr_oct, ukm_oct, *pukm_oct; + ASN1_TYPE ctr_atype; + if (ukmlen > DH_KDF_MAX || outlen > DH_KDF_MAX) + return 0; + ctr_oct.data = ctr; + ctr_oct.length = 4; + ctr_oct.flags = 0; + ctr_oct.type = V_ASN1_OCTET_STRING; + ctr_atype.type = V_ASN1_OCTET_STRING; + ctr_atype.value.octet_string = &ctr_oct; + atmp.algorithm = key_oid; + atmp.parameter = &ctr_atype; + if (ukm) { + ukm_oct.type = V_ASN1_OCTET_STRING; + ukm_oct.flags = 0; + ukm_oct.data = (unsigned char *)ukm; + ukm_oct.length = ukmlen; + pukm_oct = &ukm_oct; + } else + pukm_oct = NULL; + derlen = CMS_SharedInfo_encode(pder, &atmp, pukm_oct, outlen); + if (derlen <= 0) + return 0; + p = *pder; + tlen = derlen; + if (!skip_asn1(&p, &tlen, V_ASN1_SEQUENCE)) + return 0; + if (!skip_asn1(&p, &tlen, V_ASN1_SEQUENCE)) + return 0; + if (!skip_asn1(&p, &tlen, V_ASN1_OBJECT)) + return 0; + if (!skip_asn1(&p, &tlen, V_ASN1_OCTET_STRING)) + return 0; + if (CRYPTO_memcmp(p, ctr, 4)) + return 0; + *pctr = p; + return derlen; +} + +int DH_KDF_X9_42(unsigned char *out, size_t outlen, + const unsigned char *Z, size_t Zlen, + ASN1_OBJECT *key_oid, + const unsigned char *ukm, size_t ukmlen, const EVP_MD *md) +{ + EVP_MD_CTX mctx; + int rv = 0; + unsigned int i; + size_t mdlen; + unsigned char *der = NULL, *ctr; + int derlen; + if (Zlen > DH_KDF_MAX) + return 0; + mdlen = EVP_MD_size(md); + EVP_MD_CTX_init(&mctx); + derlen = dh_sharedinfo_encode(&der, &ctr, key_oid, outlen, ukm, ukmlen); + if (derlen == 0) + goto err; + for (i = 1;; i++) { + unsigned char mtmp[EVP_MAX_MD_SIZE]; + EVP_DigestInit_ex(&mctx, md, NULL); + if (!EVP_DigestUpdate(&mctx, Z, Zlen)) + goto err; + ctr[3] = i & 0xFF; + ctr[2] = (i >> 8) & 0xFF; + ctr[1] = (i >> 16) & 0xFF; + ctr[0] = (i >> 24) & 0xFF; + if (!EVP_DigestUpdate(&mctx, der, derlen)) + goto err; + if (outlen >= mdlen) { + if (!EVP_DigestFinal(&mctx, out, NULL)) + goto err; + outlen -= mdlen; + if (outlen == 0) + break; + out += mdlen; + } else { + if (!EVP_DigestFinal(&mctx, mtmp, NULL)) + goto err; + memcpy(out, mtmp, outlen); + OPENSSL_cleanse(mtmp, mdlen); + break; + } + } + rv = 1; + err: + if (der) + OPENSSL_free(der); + EVP_MD_CTX_cleanup(&mctx); + return rv; +} diff --git a/deps/openssl/openssl/crypto/dh/dh_key.c b/deps/openssl/openssl/crypto/dh/dh_key.c index 9e1d8e58225beb..1d80fb2c5f600e 100644 --- a/deps/openssl/openssl/crypto/dh/dh_key.c +++ b/deps/openssl/openssl/crypto/dh/dh_key.c @@ -94,6 +94,20 @@ int DH_compute_key(unsigned char *key, const BIGNUM *pub_key, DH *dh) return dh->meth->compute_key(key, pub_key, dh); } +int DH_compute_key_padded(unsigned char *key, const BIGNUM *pub_key, DH *dh) +{ + int rv, pad; + rv = dh->meth->compute_key(key, pub_key, dh); + if (rv <= 0) + return rv; + pad = BN_num_bytes(dh->p) - rv; + if (pad > 0) { + memmove(key + pad, key, rv); + memset(key, 0, pad); + } + return rv + pad; +} + static DH_METHOD dh_ossl = { "OpenSSL DH Method", generate_key, diff --git a/deps/openssl/openssl/crypto/dh/dh_pmeth.c b/deps/openssl/openssl/crypto/dh/dh_pmeth.c index 65bc3882c7f214..b3a31472ab2a33 100644 --- a/deps/openssl/openssl/crypto/dh/dh_pmeth.c +++ b/deps/openssl/openssl/crypto/dh/dh_pmeth.c @@ -63,6 +63,10 @@ #include #include #include +#ifndef OPENSSL_NO_DSA +# include +#endif +#include #include "evp_locl.h" /* DH pkey context structure */ @@ -72,9 +76,23 @@ typedef struct { int prime_len; int generator; int use_dsa; + int subprime_len; + /* message digest used for parameter generation */ + const EVP_MD *md; + int rfc5114_param; /* Keygen callback info */ int gentmp[2]; - /* message digest */ + /* KDF (if any) to use for DH */ + char kdf_type; + /* OID to use for KDF */ + ASN1_OBJECT *kdf_oid; + /* Message digest to use for key derivation */ + const EVP_MD *kdf_md; + /* User key material */ + unsigned char *kdf_ukm; + size_t kdf_ukmlen; + /* KDF output length */ + size_t kdf_outlen; } DH_PKEY_CTX; static int pkey_dh_init(EVP_PKEY_CTX *ctx) @@ -84,8 +102,18 @@ static int pkey_dh_init(EVP_PKEY_CTX *ctx) if (!dctx) return 0; dctx->prime_len = 1024; + dctx->subprime_len = -1; dctx->generator = 2; dctx->use_dsa = 0; + dctx->md = NULL; + dctx->rfc5114_param = 0; + + dctx->kdf_type = EVP_PKEY_DH_KDF_NONE; + dctx->kdf_oid = NULL; + dctx->kdf_md = NULL; + dctx->kdf_ukm = NULL; + dctx->kdf_ukmlen = 0; + dctx->kdf_outlen = 0; ctx->data = dctx; ctx->keygen_info = dctx->gentmp; @@ -102,16 +130,35 @@ static int pkey_dh_copy(EVP_PKEY_CTX *dst, EVP_PKEY_CTX *src) sctx = src->data; dctx = dst->data; dctx->prime_len = sctx->prime_len; + dctx->subprime_len = sctx->subprime_len; dctx->generator = sctx->generator; dctx->use_dsa = sctx->use_dsa; + dctx->md = sctx->md; + dctx->rfc5114_param = sctx->rfc5114_param; + + dctx->kdf_type = sctx->kdf_type; + dctx->kdf_oid = OBJ_dup(sctx->kdf_oid); + if (!dctx->kdf_oid) + return 0; + dctx->kdf_md = sctx->kdf_md; + if (dctx->kdf_ukm) { + dctx->kdf_ukm = BUF_memdup(sctx->kdf_ukm, sctx->kdf_ukmlen); + dctx->kdf_ukmlen = sctx->kdf_ukmlen; + } + dctx->kdf_outlen = sctx->kdf_outlen; return 1; } static void pkey_dh_cleanup(EVP_PKEY_CTX *ctx) { DH_PKEY_CTX *dctx = ctx->data; - if (dctx) + if (dctx) { + if (dctx->kdf_ukm) + OPENSSL_free(dctx->kdf_ukm); + if (dctx->kdf_oid) + ASN1_OBJECT_free(dctx->kdf_oid); OPENSSL_free(dctx); + } } static int pkey_dh_ctrl(EVP_PKEY_CTX *ctx, int type, int p1, void *p2) @@ -124,14 +171,89 @@ static int pkey_dh_ctrl(EVP_PKEY_CTX *ctx, int type, int p1, void *p2) dctx->prime_len = p1; return 1; + case EVP_PKEY_CTRL_DH_PARAMGEN_SUBPRIME_LEN: + if (dctx->use_dsa == 0) + return -2; + dctx->subprime_len = p1; + return 1; + case EVP_PKEY_CTRL_DH_PARAMGEN_GENERATOR: + if (dctx->use_dsa) + return -2; dctx->generator = p1; return 1; + case EVP_PKEY_CTRL_DH_PARAMGEN_TYPE: +#ifdef OPENSSL_NO_DSA + if (p1 != 0) + return -2; +#else + if (p1 < 0 || p1 > 2) + return -2; +#endif + dctx->use_dsa = p1; + return 1; + + case EVP_PKEY_CTRL_DH_RFC5114: + if (p1 < 1 || p1 > 3) + return -2; + dctx->rfc5114_param = p1; + return 1; + case EVP_PKEY_CTRL_PEER_KEY: /* Default behaviour is OK */ return 1; + case EVP_PKEY_CTRL_DH_KDF_TYPE: + if (p1 == -2) + return dctx->kdf_type; + if (p1 != EVP_PKEY_DH_KDF_NONE && p1 != EVP_PKEY_DH_KDF_X9_42) + return -2; + dctx->kdf_type = p1; + return 1; + + case EVP_PKEY_CTRL_DH_KDF_MD: + dctx->kdf_md = p2; + return 1; + + case EVP_PKEY_CTRL_GET_DH_KDF_MD: + *(const EVP_MD **)p2 = dctx->kdf_md; + return 1; + + case EVP_PKEY_CTRL_DH_KDF_OUTLEN: + if (p1 <= 0) + return -2; + dctx->kdf_outlen = (size_t)p1; + return 1; + + case EVP_PKEY_CTRL_GET_DH_KDF_OUTLEN: + *(int *)p2 = dctx->kdf_outlen; + return 1; + + case EVP_PKEY_CTRL_DH_KDF_UKM: + if (dctx->kdf_ukm) + OPENSSL_free(dctx->kdf_ukm); + dctx->kdf_ukm = p2; + if (p2) + dctx->kdf_ukmlen = p1; + else + dctx->kdf_ukmlen = 0; + return 1; + + case EVP_PKEY_CTRL_GET_DH_KDF_UKM: + *(unsigned char **)p2 = dctx->kdf_ukm; + return dctx->kdf_ukmlen; + + case EVP_PKEY_CTRL_DH_KDF_OID: + if (dctx->kdf_oid) + ASN1_OBJECT_free(dctx->kdf_oid); + dctx->kdf_oid = p2; + return 1; + + case EVP_PKEY_CTRL_GET_DH_KDF_OID: + *(ASN1_OBJECT **)p2 = dctx->kdf_oid; + return 1; + default: return -2; @@ -146,30 +268,139 @@ static int pkey_dh_ctrl_str(EVP_PKEY_CTX *ctx, len = atoi(value); return EVP_PKEY_CTX_set_dh_paramgen_prime_len(ctx, len); } + if (!strcmp(type, "dh_rfc5114")) { + DH_PKEY_CTX *dctx = ctx->data; + int len; + len = atoi(value); + if (len < 0 || len > 3) + return -2; + dctx->rfc5114_param = len; + return 1; + } if (!strcmp(type, "dh_paramgen_generator")) { int len; len = atoi(value); return EVP_PKEY_CTX_set_dh_paramgen_generator(ctx, len); } + if (!strcmp(type, "dh_paramgen_subprime_len")) { + int len; + len = atoi(value); + return EVP_PKEY_CTX_set_dh_paramgen_subprime_len(ctx, len); + } + if (!strcmp(type, "dh_paramgen_type")) { + int typ; + typ = atoi(value); + return EVP_PKEY_CTX_set_dh_paramgen_type(ctx, typ); + } return -2; } +#ifndef OPENSSL_NO_DSA + +extern int dsa_builtin_paramgen(DSA *ret, size_t bits, size_t qbits, + const EVP_MD *evpmd, + const unsigned char *seed_in, size_t seed_len, + unsigned char *seed_out, int *counter_ret, + unsigned long *h_ret, BN_GENCB *cb); + +extern int dsa_builtin_paramgen2(DSA *ret, size_t L, size_t N, + const EVP_MD *evpmd, + const unsigned char *seed_in, + size_t seed_len, int idx, + unsigned char *seed_out, int *counter_ret, + unsigned long *h_ret, BN_GENCB *cb); + +static DSA *dsa_dh_generate(DH_PKEY_CTX *dctx, BN_GENCB *pcb) +{ + DSA *ret; + int rv = 0; + int prime_len = dctx->prime_len; + int subprime_len = dctx->subprime_len; + const EVP_MD *md = dctx->md; + if (dctx->use_dsa > 2) + return NULL; + ret = DSA_new(); + if (!ret) + return NULL; + if (subprime_len == -1) { + if (prime_len >= 2048) + subprime_len = 256; + else + subprime_len = 160; + } + if (md == NULL) { + if (prime_len >= 2048) + md = EVP_sha256(); + else + md = EVP_sha1(); + } + if (dctx->use_dsa == 1) + rv = dsa_builtin_paramgen(ret, prime_len, subprime_len, md, + NULL, 0, NULL, NULL, NULL, pcb); + else if (dctx->use_dsa == 2) + rv = dsa_builtin_paramgen2(ret, prime_len, subprime_len, md, + NULL, 0, -1, NULL, NULL, NULL, pcb); + if (rv <= 0) { + DSA_free(ret); + return NULL; + } + return ret; +} + +#endif + static int pkey_dh_paramgen(EVP_PKEY_CTX *ctx, EVP_PKEY *pkey) { DH *dh = NULL; DH_PKEY_CTX *dctx = ctx->data; BN_GENCB *pcb, cb; int ret; + if (dctx->rfc5114_param) { + switch (dctx->rfc5114_param) { + case 1: + dh = DH_get_1024_160(); + break; + + case 2: + dh = DH_get_2048_224(); + break; + + case 3: + dh = DH_get_2048_256(); + break; + + default: + return -2; + } + EVP_PKEY_assign(pkey, EVP_PKEY_DHX, dh); + return 1; + } + if (ctx->pkey_gencb) { pcb = &cb; evp_pkey_set_cb_translate(pcb, ctx); } else pcb = NULL; +#ifndef OPENSSL_NO_DSA + if (dctx->use_dsa) { + DSA *dsa_dh; + dsa_dh = dsa_dh_generate(dctx, pcb); + if (!dsa_dh) + return 0; + dh = DSA_dup_DH(dsa_dh); + DSA_free(dsa_dh); + if (!dh) + return 0; + EVP_PKEY_assign(pkey, EVP_PKEY_DHX, dh); + return 1; + } +#endif dh = DH_new(); if (!dh) return 0; ret = DH_generate_parameters_ex(dh, dctx->prime_len, dctx->generator, pcb); + if (ret) EVP_PKEY_assign_DH(pkey, dh); else @@ -187,7 +418,7 @@ static int pkey_dh_keygen(EVP_PKEY_CTX *ctx, EVP_PKEY *pkey) dh = DH_new(); if (!dh) return 0; - EVP_PKEY_assign_DH(pkey, dh); + EVP_PKEY_assign(pkey, ctx->pmeth->pkey_id, dh); /* Note: if error return, pkey is freed by parent routine */ if (!EVP_PKEY_copy_parameters(pkey, ctx->pkey)) return 0; @@ -198,21 +429,96 @@ static int pkey_dh_derive(EVP_PKEY_CTX *ctx, unsigned char *key, size_t *keylen) { int ret; + DH *dh; + DH_PKEY_CTX *dctx = ctx->data; + BIGNUM *dhpub; if (!ctx->pkey || !ctx->peerkey) { DHerr(DH_F_PKEY_DH_DERIVE, DH_R_KEYS_NOT_SET); return 0; } - ret = DH_compute_key(key, ctx->peerkey->pkey.dh->pub_key, - ctx->pkey->pkey.dh); - if (ret < 0) + dh = ctx->pkey->pkey.dh; + dhpub = ctx->peerkey->pkey.dh->pub_key; + if (dctx->kdf_type == EVP_PKEY_DH_KDF_NONE) { + if (key == NULL) { + *keylen = DH_size(dh); + return 1; + } + ret = DH_compute_key(key, dhpub, dh); + if (ret < 0) + return ret; + *keylen = ret; + return 1; + } else if (dctx->kdf_type == EVP_PKEY_DH_KDF_X9_42) { + unsigned char *Z = NULL; + size_t Zlen = 0; + if (!dctx->kdf_outlen || !dctx->kdf_oid) + return 0; + if (key == NULL) { + *keylen = dctx->kdf_outlen; + return 1; + } + if (*keylen != dctx->kdf_outlen) + return 0; + ret = 0; + Zlen = DH_size(dh); + Z = OPENSSL_malloc(Zlen); + if(!Z) { + goto err; + } + if (DH_compute_key_padded(Z, dhpub, dh) <= 0) + goto err; + if (!DH_KDF_X9_42(key, *keylen, Z, Zlen, dctx->kdf_oid, + dctx->kdf_ukm, dctx->kdf_ukmlen, dctx->kdf_md)) + goto err; + *keylen = dctx->kdf_outlen; + ret = 1; + err: + if (Z) { + OPENSSL_cleanse(Z, Zlen); + OPENSSL_free(Z); + } return ret; - *keylen = ret; + } return 1; } const EVP_PKEY_METHOD dh_pkey_meth = { EVP_PKEY_DH, - EVP_PKEY_FLAG_AUTOARGLEN, + 0, + pkey_dh_init, + pkey_dh_copy, + pkey_dh_cleanup, + + 0, + pkey_dh_paramgen, + + 0, + pkey_dh_keygen, + + 0, + 0, + + 0, + 0, + + 0, 0, + + 0, 0, 0, 0, + + 0, 0, + + 0, 0, + + 0, + pkey_dh_derive, + + pkey_dh_ctrl, + pkey_dh_ctrl_str +}; + +const EVP_PKEY_METHOD dhx_pkey_meth = { + EVP_PKEY_DHX, + 0, pkey_dh_init, pkey_dh_copy, pkey_dh_cleanup, diff --git a/deps/openssl/openssl/crypto/dh/dh_rfc5114.c b/deps/openssl/openssl/crypto/dh/dh_rfc5114.c new file mode 100644 index 00000000000000..e96e2aa3fc52aa --- /dev/null +++ b/deps/openssl/openssl/crypto/dh/dh_rfc5114.c @@ -0,0 +1,285 @@ +/* + * Written by Dr Stephen N Henson (steve@openssl.org) for the OpenSSL project + * 2011. + */ +/* ==================================================================== + * Copyright (c) 2011 The OpenSSL Project. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgment: + * "This product includes software developed by the OpenSSL Project + * for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)" + * + * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to + * endorse or promote products derived from this software without + * prior written permission. For written permission, please contact + * licensing@OpenSSL.org. + * + * 5. Products derived from this software may not be called "OpenSSL" + * nor may "OpenSSL" appear in their names without prior written + * permission of the OpenSSL Project. + * + * 6. Redistributions of any form whatsoever must retain the following + * acknowledgment: + * "This product includes software developed by the OpenSSL Project + * for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)" + * + * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY + * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + * ==================================================================== + * + * This product includes cryptographic software written by Eric Young + * (eay@cryptsoft.com). This product includes software written by Tim + * Hudson (tjh@cryptsoft.com). + * + */ + +#include +#include "cryptlib.h" +#include +#include + +/* DH parameters from RFC5114 */ + +#if BN_BITS2 == 64 +static const BN_ULONG dh1024_160_p[] = { + 0xDF1FB2BC2E4A4371ULL, 0xE68CFDA76D4DA708ULL, 0x45BF37DF365C1A65ULL, + 0xA151AF5F0DC8B4BDULL, 0xFAA31A4FF55BCCC0ULL, 0x4EFFD6FAE5644738ULL, + 0x98488E9C219A7372ULL, 0xACCBDD7D90C4BD70ULL, 0x24975C3CD49B83BFULL, + 0x13ECB4AEA9061123ULL, 0x9838EF1E2EE652C0ULL, 0x6073E28675A23D18ULL, + 0x9A6A9DCA52D23B61ULL, 0x52C99FBCFB06A3C6ULL, 0xDE92DE5EAE5D54ECULL, + 0xB10B8F96A080E01DULL +}; + +static const BN_ULONG dh1024_160_g[] = { + 0x855E6EEB22B3B2E5ULL, 0x858F4DCEF97C2A24ULL, 0x2D779D5918D08BC8ULL, + 0xD662A4D18E73AFA3ULL, 0x1DBF0A0169B6A28AULL, 0xA6A24C087A091F53ULL, + 0x909D0D2263F80A76ULL, 0xD7FBD7D3B9A92EE1ULL, 0x5E91547F9E2749F4ULL, + 0x160217B4B01B886AULL, 0x777E690F5504F213ULL, 0x266FEA1E5C41564BULL, + 0xD6406CFF14266D31ULL, 0xF8104DD258AC507FULL, 0x6765A442EFB99905ULL, + 0xA4D1CBD5C3FD3412ULL +}; + +static const BN_ULONG dh1024_160_q[] = { + 0x64B7CB9D49462353ULL, 0x81A8DF278ABA4E7DULL, 0x00000000F518AA87ULL +}; + +static const BN_ULONG dh2048_224_p[] = { + 0x0AC4DFFE0C10E64FULL, 0xCF9DE5384E71B81CULL, 0x7EF363E2FFA31F71ULL, + 0xE3FB73C16B8E75B9ULL, 0xC9B53DCF4BA80A29ULL, 0x23F10B0E16E79763ULL, + 0xC52172E413042E9BULL, 0xBE60E69CC928B2B9ULL, 0x80CD86A1B9E587E8ULL, + 0x315D75E198C641A4ULL, 0xCDF93ACC44328387ULL, 0x15987D9ADC0A486DULL, + 0x7310F7121FD5A074ULL, 0x278273C7DE31EFDCULL, 0x1602E714415D9330ULL, + 0x81286130BC8985DBULL, 0xB3BF8A3170918836ULL, 0x6A00E0A0B9C49708ULL, + 0xC6BA0B2C8BBC27BEULL, 0xC9F98D11ED34DBF6ULL, 0x7AD5B7D0B6C12207ULL, + 0xD91E8FEF55B7394BULL, 0x9037C9EDEFDA4DF8ULL, 0x6D3F8152AD6AC212ULL, + 0x1DE6B85A1274A0A6ULL, 0xEB3D688A309C180EULL, 0xAF9A3C407BA1DF15ULL, + 0xE6FA141DF95A56DBULL, 0xB54B1597B61D0A75ULL, 0xA20D64E5683B9FD1ULL, + 0xD660FAA79559C51FULL, 0xAD107E1E9123A9D0ULL +}; + +static const BN_ULONG dh2048_224_g[] = { + 0x84B890D3191F2BFAULL, 0x81BC087F2A7065B3ULL, 0x19C418E1F6EC0179ULL, + 0x7B5A0F1C71CFFF4CULL, 0xEDFE72FE9B6AA4BDULL, 0x81E1BCFE94B30269ULL, + 0x566AFBB48D6C0191ULL, 0xB539CCE3409D13CDULL, 0x6AA21E7F5F2FF381ULL, + 0xD9E263E4770589EFULL, 0x10E183EDD19963DDULL, 0xB70A8137150B8EEBULL, + 0x051AE3D428C8F8ACULL, 0xBB77A86F0C1AB15BULL, 0x6E3025E316A330EFULL, + 0x19529A45D6F83456ULL, 0xF180EB34118E98D1ULL, 0xB5F6C6B250717CBEULL, + 0x09939D54DA7460CDULL, 0xE247150422EA1ED4ULL, 0xB8A762D0521BC98AULL, + 0xF4D027275AC1348BULL, 0xC17669101999024AULL, 0xBE5E9001A8D66AD7ULL, + 0xC57DB17C620A8652ULL, 0xAB739D7700C29F52ULL, 0xDD921F01A70C4AFAULL, + 0xA6824A4E10B9A6F0ULL, 0x74866A08CFE4FFE3ULL, 0x6CDEBE7B89998CAFULL, + 0x9DF30B5C8FFDAC50ULL, 0xAC4032EF4F2D9AE3ULL +}; + +static const BN_ULONG dh2048_224_q[] = { + 0xBF389A99B36371EBULL, 0x1F80535A4738CEBCULL, 0xC58D93FE99717710ULL, + 0x00000000801C0D34ULL +}; + +static const BN_ULONG dh2048_256_p[] = { + 0xDB094AE91E1A1597ULL, 0x693877FAD7EF09CAULL, 0x6116D2276E11715FULL, + 0xA4B54330C198AF12ULL, 0x75F26375D7014103ULL, 0xC3A3960A54E710C3ULL, + 0xDED4010ABD0BE621ULL, 0xC0B857F689962856ULL, 0xB3CA3F7971506026ULL, + 0x1CCACB83E6B486F6ULL, 0x67E144E514056425ULL, 0xF6A167B5A41825D9ULL, + 0x3AD8347796524D8EULL, 0xF13C6D9A51BFA4ABULL, 0x2D52526735488A0EULL, + 0xB63ACAE1CAA6B790ULL, 0x4FDB70C581B23F76ULL, 0xBC39A0BF12307F5CULL, + 0xB941F54EB1E59BB8ULL, 0x6C5BFC11D45F9088ULL, 0x22E0B1EF4275BF7BULL, + 0x91F9E6725B4758C0ULL, 0x5A8A9D306BCF67EDULL, 0x209E0C6497517ABDULL, + 0x3BF4296D830E9A7CULL, 0x16C3D91134096FAAULL, 0xFAF7DF4561B2AA30ULL, + 0xE00DF8F1D61957D4ULL, 0x5D2CEED4435E3B00ULL, 0x8CEEF608660DD0F2ULL, + 0xFFBBD19C65195999ULL, 0x87A8E61DB4B6663CULL +}; + +static const BN_ULONG dh2048_256_g[] = { + 0x664B4C0F6CC41659ULL, 0x5E2327CFEF98C582ULL, 0xD647D148D4795451ULL, + 0x2F63078490F00EF8ULL, 0x184B523D1DB246C3ULL, 0xC7891428CDC67EB6ULL, + 0x7FD028370DF92B52ULL, 0xB3353BBB64E0EC37ULL, 0xECD06E1557CD0915ULL, + 0xB7D2BBD2DF016199ULL, 0xC8484B1E052588B9ULL, 0xDB2A3B7313D3FE14ULL, + 0xD052B985D182EA0AULL, 0xA4BD1BFFE83B9C80ULL, 0xDFC967C1FB3F2E55ULL, + 0xB5045AF2767164E1ULL, 0x1D14348F6F2F9193ULL, 0x64E67982428EBC83ULL, + 0x8AC376D282D6ED38ULL, 0x777DE62AAAB8A862ULL, 0xDDF463E5E9EC144BULL, + 0x0196F931C77A57F2ULL, 0xA55AE31341000A65ULL, 0x901228F8C28CBB18ULL, + 0xBC3773BF7E8C6F62ULL, 0xBE3A6C1B0C6B47B1ULL, 0xFF4FED4AAC0BB555ULL, + 0x10DBC15077BE463FULL, 0x07F4793A1A0BA125ULL, 0x4CA7B18F21EF2054ULL, + 0x2E77506660EDBD48ULL, 0x3FB32C9B73134D0BULL +}; + +static const BN_ULONG dh2048_256_q[] = { + 0xA308B0FE64F5FBD3ULL, 0x99B1A47D1EB3750BULL, 0xB447997640129DA2ULL, + 0x8CF83642A709A097ULL +}; + +#elif BN_BITS2 == 32 + +static const BN_ULONG dh1024_160_p[] = { + 0x2E4A4371, 0xDF1FB2BC, 0x6D4DA708, 0xE68CFDA7, 0x365C1A65, 0x45BF37DF, + 0x0DC8B4BD, 0xA151AF5F, 0xF55BCCC0, 0xFAA31A4F, 0xE5644738, 0x4EFFD6FA, + 0x219A7372, 0x98488E9C, 0x90C4BD70, 0xACCBDD7D, 0xD49B83BF, 0x24975C3C, + 0xA9061123, 0x13ECB4AE, 0x2EE652C0, 0x9838EF1E, 0x75A23D18, 0x6073E286, + 0x52D23B61, 0x9A6A9DCA, 0xFB06A3C6, 0x52C99FBC, 0xAE5D54EC, 0xDE92DE5E, + 0xA080E01D, 0xB10B8F96 +}; + +static const BN_ULONG dh1024_160_g[] = { + 0x22B3B2E5, 0x855E6EEB, 0xF97C2A24, 0x858F4DCE, 0x18D08BC8, 0x2D779D59, + 0x8E73AFA3, 0xD662A4D1, 0x69B6A28A, 0x1DBF0A01, 0x7A091F53, 0xA6A24C08, + 0x63F80A76, 0x909D0D22, 0xB9A92EE1, 0xD7FBD7D3, 0x9E2749F4, 0x5E91547F, + 0xB01B886A, 0x160217B4, 0x5504F213, 0x777E690F, 0x5C41564B, 0x266FEA1E, + 0x14266D31, 0xD6406CFF, 0x58AC507F, 0xF8104DD2, 0xEFB99905, 0x6765A442, + 0xC3FD3412, 0xA4D1CBD5 +}; + +static const BN_ULONG dh1024_160_q[] = { + 0x49462353, 0x64B7CB9D, 0x8ABA4E7D, 0x81A8DF27, 0xF518AA87 +}; + +static const BN_ULONG dh2048_224_p[] = { + 0x0C10E64F, 0x0AC4DFFE, 0x4E71B81C, 0xCF9DE538, 0xFFA31F71, 0x7EF363E2, + 0x6B8E75B9, 0xE3FB73C1, 0x4BA80A29, 0xC9B53DCF, 0x16E79763, 0x23F10B0E, + 0x13042E9B, 0xC52172E4, 0xC928B2B9, 0xBE60E69C, 0xB9E587E8, 0x80CD86A1, + 0x98C641A4, 0x315D75E1, 0x44328387, 0xCDF93ACC, 0xDC0A486D, 0x15987D9A, + 0x1FD5A074, 0x7310F712, 0xDE31EFDC, 0x278273C7, 0x415D9330, 0x1602E714, + 0xBC8985DB, 0x81286130, 0x70918836, 0xB3BF8A31, 0xB9C49708, 0x6A00E0A0, + 0x8BBC27BE, 0xC6BA0B2C, 0xED34DBF6, 0xC9F98D11, 0xB6C12207, 0x7AD5B7D0, + 0x55B7394B, 0xD91E8FEF, 0xEFDA4DF8, 0x9037C9ED, 0xAD6AC212, 0x6D3F8152, + 0x1274A0A6, 0x1DE6B85A, 0x309C180E, 0xEB3D688A, 0x7BA1DF15, 0xAF9A3C40, + 0xF95A56DB, 0xE6FA141D, 0xB61D0A75, 0xB54B1597, 0x683B9FD1, 0xA20D64E5, + 0x9559C51F, 0xD660FAA7, 0x9123A9D0, 0xAD107E1E +}; + +static const BN_ULONG dh2048_224_g[] = { + 0x191F2BFA, 0x84B890D3, 0x2A7065B3, 0x81BC087F, 0xF6EC0179, 0x19C418E1, + 0x71CFFF4C, 0x7B5A0F1C, 0x9B6AA4BD, 0xEDFE72FE, 0x94B30269, 0x81E1BCFE, + 0x8D6C0191, 0x566AFBB4, 0x409D13CD, 0xB539CCE3, 0x5F2FF381, 0x6AA21E7F, + 0x770589EF, 0xD9E263E4, 0xD19963DD, 0x10E183ED, 0x150B8EEB, 0xB70A8137, + 0x28C8F8AC, 0x051AE3D4, 0x0C1AB15B, 0xBB77A86F, 0x16A330EF, 0x6E3025E3, + 0xD6F83456, 0x19529A45, 0x118E98D1, 0xF180EB34, 0x50717CBE, 0xB5F6C6B2, + 0xDA7460CD, 0x09939D54, 0x22EA1ED4, 0xE2471504, 0x521BC98A, 0xB8A762D0, + 0x5AC1348B, 0xF4D02727, 0x1999024A, 0xC1766910, 0xA8D66AD7, 0xBE5E9001, + 0x620A8652, 0xC57DB17C, 0x00C29F52, 0xAB739D77, 0xA70C4AFA, 0xDD921F01, + 0x10B9A6F0, 0xA6824A4E, 0xCFE4FFE3, 0x74866A08, 0x89998CAF, 0x6CDEBE7B, + 0x8FFDAC50, 0x9DF30B5C, 0x4F2D9AE3, 0xAC4032EF +}; + +static const BN_ULONG dh2048_224_q[] = { + 0xB36371EB, 0xBF389A99, 0x4738CEBC, 0x1F80535A, 0x99717710, 0xC58D93FE, + 0x801C0D34 +}; + +static const BN_ULONG dh2048_256_p[] = { + 0x1E1A1597, 0xDB094AE9, 0xD7EF09CA, 0x693877FA, 0x6E11715F, 0x6116D227, + 0xC198AF12, 0xA4B54330, 0xD7014103, 0x75F26375, 0x54E710C3, 0xC3A3960A, + 0xBD0BE621, 0xDED4010A, 0x89962856, 0xC0B857F6, 0x71506026, 0xB3CA3F79, + 0xE6B486F6, 0x1CCACB83, 0x14056425, 0x67E144E5, 0xA41825D9, 0xF6A167B5, + 0x96524D8E, 0x3AD83477, 0x51BFA4AB, 0xF13C6D9A, 0x35488A0E, 0x2D525267, + 0xCAA6B790, 0xB63ACAE1, 0x81B23F76, 0x4FDB70C5, 0x12307F5C, 0xBC39A0BF, + 0xB1E59BB8, 0xB941F54E, 0xD45F9088, 0x6C5BFC11, 0x4275BF7B, 0x22E0B1EF, + 0x5B4758C0, 0x91F9E672, 0x6BCF67ED, 0x5A8A9D30, 0x97517ABD, 0x209E0C64, + 0x830E9A7C, 0x3BF4296D, 0x34096FAA, 0x16C3D911, 0x61B2AA30, 0xFAF7DF45, + 0xD61957D4, 0xE00DF8F1, 0x435E3B00, 0x5D2CEED4, 0x660DD0F2, 0x8CEEF608, + 0x65195999, 0xFFBBD19C, 0xB4B6663C, 0x87A8E61D +}; + +static const BN_ULONG dh2048_256_g[] = { + 0x6CC41659, 0x664B4C0F, 0xEF98C582, 0x5E2327CF, 0xD4795451, 0xD647D148, + 0x90F00EF8, 0x2F630784, 0x1DB246C3, 0x184B523D, 0xCDC67EB6, 0xC7891428, + 0x0DF92B52, 0x7FD02837, 0x64E0EC37, 0xB3353BBB, 0x57CD0915, 0xECD06E15, + 0xDF016199, 0xB7D2BBD2, 0x052588B9, 0xC8484B1E, 0x13D3FE14, 0xDB2A3B73, + 0xD182EA0A, 0xD052B985, 0xE83B9C80, 0xA4BD1BFF, 0xFB3F2E55, 0xDFC967C1, + 0x767164E1, 0xB5045AF2, 0x6F2F9193, 0x1D14348F, 0x428EBC83, 0x64E67982, + 0x82D6ED38, 0x8AC376D2, 0xAAB8A862, 0x777DE62A, 0xE9EC144B, 0xDDF463E5, + 0xC77A57F2, 0x0196F931, 0x41000A65, 0xA55AE313, 0xC28CBB18, 0x901228F8, + 0x7E8C6F62, 0xBC3773BF, 0x0C6B47B1, 0xBE3A6C1B, 0xAC0BB555, 0xFF4FED4A, + 0x77BE463F, 0x10DBC150, 0x1A0BA125, 0x07F4793A, 0x21EF2054, 0x4CA7B18F, + 0x60EDBD48, 0x2E775066, 0x73134D0B, 0x3FB32C9B +}; + +static const BN_ULONG dh2048_256_q[] = { + 0x64F5FBD3, 0xA308B0FE, 0x1EB3750B, 0x99B1A47D, 0x40129DA2, 0xB4479976, + 0xA709A097, 0x8CF83642 +}; + +#else +# error "unsupported BN_BITS2" +#endif + +/* Macro to make a BIGNUM from static data */ + +#define make_dh_bn(x) static const BIGNUM _bignum_##x = { (BN_ULONG *) x, \ + sizeof(x)/sizeof(BN_ULONG),\ + sizeof(x)/sizeof(BN_ULONG),\ + 0, BN_FLG_STATIC_DATA } + +/* + * Macro to make a DH structure from BIGNUM data. NB: although just copying + * the BIGNUM static pointers would be more efficient we can't as they get + * wiped using BN_clear_free() when DH_free() is called. + */ + +#define make_dh(x) \ +DH * DH_get_##x(void) \ + { \ + DH *dh; \ + make_dh_bn(dh##x##_p); \ + make_dh_bn(dh##x##_q); \ + make_dh_bn(dh##x##_g); \ + dh = DH_new(); \ + if (!dh) \ + return NULL; \ + dh->p = BN_dup(&_bignum_dh##x##_p); \ + dh->g = BN_dup(&_bignum_dh##x##_g); \ + dh->q = BN_dup(&_bignum_dh##x##_q); \ + if (!dh->p || !dh->q || !dh->g) \ + { \ + DH_free(dh); \ + return NULL; \ + } \ + return dh; \ + } + +make_dh(1024_160) +make_dh(2048_224) +make_dh(2048_256) diff --git a/deps/openssl/openssl/crypto/dh/dhtest.c b/deps/openssl/openssl/crypto/dh/dhtest.c index 5a4ee9a35d83d7..c9dd76bc75e175 100644 --- a/deps/openssl/openssl/crypto/dh/dhtest.c +++ b/deps/openssl/openssl/crypto/dh/dhtest.c @@ -96,6 +96,8 @@ static int MS_CALLBACK cb(int p, int n, BN_GENCB *arg); static const char rnd_seed[] = "string to make the random number generator think it has entropy"; +static int run_rfc5114_tests(void); + int main(int argc, char *argv[]) { BN_GENCB _cb; @@ -199,6 +201,8 @@ int main(int argc, char *argv[]) ret = 1; } else ret = 0; + if (!run_rfc5114_tests()) + ret = 1; err: ERR_print_errors_fp(stderr); @@ -238,4 +242,323 @@ static int MS_CALLBACK cb(int p, int n, BN_GENCB *arg) # endif return 1; } + +/* Test data from RFC 5114 */ + +static const unsigned char dhtest_1024_160_xA[] = { + 0xB9, 0xA3, 0xB3, 0xAE, 0x8F, 0xEF, 0xC1, 0xA2, 0x93, 0x04, 0x96, 0x50, + 0x70, 0x86, 0xF8, 0x45, 0x5D, 0x48, 0x94, 0x3E +}; + +static const unsigned char dhtest_1024_160_yA[] = { + 0x2A, 0x85, 0x3B, 0x3D, 0x92, 0x19, 0x75, 0x01, 0xB9, 0x01, 0x5B, 0x2D, + 0xEB, 0x3E, 0xD8, 0x4F, 0x5E, 0x02, 0x1D, 0xCC, 0x3E, 0x52, 0xF1, 0x09, + 0xD3, 0x27, 0x3D, 0x2B, 0x75, 0x21, 0x28, 0x1C, 0xBA, 0xBE, 0x0E, 0x76, + 0xFF, 0x57, 0x27, 0xFA, 0x8A, 0xCC, 0xE2, 0x69, 0x56, 0xBA, 0x9A, 0x1F, + 0xCA, 0x26, 0xF2, 0x02, 0x28, 0xD8, 0x69, 0x3F, 0xEB, 0x10, 0x84, 0x1D, + 0x84, 0xA7, 0x36, 0x00, 0x54, 0xEC, 0xE5, 0xA7, 0xF5, 0xB7, 0xA6, 0x1A, + 0xD3, 0xDF, 0xB3, 0xC6, 0x0D, 0x2E, 0x43, 0x10, 0x6D, 0x87, 0x27, 0xDA, + 0x37, 0xDF, 0x9C, 0xCE, 0x95, 0xB4, 0x78, 0x75, 0x5D, 0x06, 0xBC, 0xEA, + 0x8F, 0x9D, 0x45, 0x96, 0x5F, 0x75, 0xA5, 0xF3, 0xD1, 0xDF, 0x37, 0x01, + 0x16, 0x5F, 0xC9, 0xE5, 0x0C, 0x42, 0x79, 0xCE, 0xB0, 0x7F, 0x98, 0x95, + 0x40, 0xAE, 0x96, 0xD5, 0xD8, 0x8E, 0xD7, 0x76 +}; + +static const unsigned char dhtest_1024_160_xB[] = { + 0x93, 0x92, 0xC9, 0xF9, 0xEB, 0x6A, 0x7A, 0x6A, 0x90, 0x22, 0xF7, 0xD8, + 0x3E, 0x72, 0x23, 0xC6, 0x83, 0x5B, 0xBD, 0xDA +}; + +static const unsigned char dhtest_1024_160_yB[] = { + 0x71, 0x7A, 0x6C, 0xB0, 0x53, 0x37, 0x1F, 0xF4, 0xA3, 0xB9, 0x32, 0x94, + 0x1C, 0x1E, 0x56, 0x63, 0xF8, 0x61, 0xA1, 0xD6, 0xAD, 0x34, 0xAE, 0x66, + 0x57, 0x6D, 0xFB, 0x98, 0xF6, 0xC6, 0xCB, 0xF9, 0xDD, 0xD5, 0xA5, 0x6C, + 0x78, 0x33, 0xF6, 0xBC, 0xFD, 0xFF, 0x09, 0x55, 0x82, 0xAD, 0x86, 0x8E, + 0x44, 0x0E, 0x8D, 0x09, 0xFD, 0x76, 0x9E, 0x3C, 0xEC, 0xCD, 0xC3, 0xD3, + 0xB1, 0xE4, 0xCF, 0xA0, 0x57, 0x77, 0x6C, 0xAA, 0xF9, 0x73, 0x9B, 0x6A, + 0x9F, 0xEE, 0x8E, 0x74, 0x11, 0xF8, 0xD6, 0xDA, 0xC0, 0x9D, 0x6A, 0x4E, + 0xDB, 0x46, 0xCC, 0x2B, 0x5D, 0x52, 0x03, 0x09, 0x0E, 0xAE, 0x61, 0x26, + 0x31, 0x1E, 0x53, 0xFD, 0x2C, 0x14, 0xB5, 0x74, 0xE6, 0xA3, 0x10, 0x9A, + 0x3D, 0xA1, 0xBE, 0x41, 0xBD, 0xCE, 0xAA, 0x18, 0x6F, 0x5C, 0xE0, 0x67, + 0x16, 0xA2, 0xB6, 0xA0, 0x7B, 0x3C, 0x33, 0xFE +}; + +static const unsigned char dhtest_1024_160_Z[] = { + 0x5C, 0x80, 0x4F, 0x45, 0x4D, 0x30, 0xD9, 0xC4, 0xDF, 0x85, 0x27, 0x1F, + 0x93, 0x52, 0x8C, 0x91, 0xDF, 0x6B, 0x48, 0xAB, 0x5F, 0x80, 0xB3, 0xB5, + 0x9C, 0xAA, 0xC1, 0xB2, 0x8F, 0x8A, 0xCB, 0xA9, 0xCD, 0x3E, 0x39, 0xF3, + 0xCB, 0x61, 0x45, 0x25, 0xD9, 0x52, 0x1D, 0x2E, 0x64, 0x4C, 0x53, 0xB8, + 0x07, 0xB8, 0x10, 0xF3, 0x40, 0x06, 0x2F, 0x25, 0x7D, 0x7D, 0x6F, 0xBF, + 0xE8, 0xD5, 0xE8, 0xF0, 0x72, 0xE9, 0xB6, 0xE9, 0xAF, 0xDA, 0x94, 0x13, + 0xEA, 0xFB, 0x2E, 0x8B, 0x06, 0x99, 0xB1, 0xFB, 0x5A, 0x0C, 0xAC, 0xED, + 0xDE, 0xAE, 0xAD, 0x7E, 0x9C, 0xFB, 0xB3, 0x6A, 0xE2, 0xB4, 0x20, 0x83, + 0x5B, 0xD8, 0x3A, 0x19, 0xFB, 0x0B, 0x5E, 0x96, 0xBF, 0x8F, 0xA4, 0xD0, + 0x9E, 0x34, 0x55, 0x25, 0x16, 0x7E, 0xCD, 0x91, 0x55, 0x41, 0x6F, 0x46, + 0xF4, 0x08, 0xED, 0x31, 0xB6, 0x3C, 0x6E, 0x6D +}; + +static const unsigned char dhtest_2048_224_xA[] = { + 0x22, 0xE6, 0x26, 0x01, 0xDB, 0xFF, 0xD0, 0x67, 0x08, 0xA6, 0x80, 0xF7, + 0x47, 0xF3, 0x61, 0xF7, 0x6D, 0x8F, 0x4F, 0x72, 0x1A, 0x05, 0x48, 0xE4, + 0x83, 0x29, 0x4B, 0x0C +}; + +static const unsigned char dhtest_2048_224_yA[] = { + 0x1B, 0x3A, 0x63, 0x45, 0x1B, 0xD8, 0x86, 0xE6, 0x99, 0xE6, 0x7B, 0x49, + 0x4E, 0x28, 0x8B, 0xD7, 0xF8, 0xE0, 0xD3, 0x70, 0xBA, 0xDD, 0xA7, 0xA0, + 0xEF, 0xD2, 0xFD, 0xE7, 0xD8, 0xF6, 0x61, 0x45, 0xCC, 0x9F, 0x28, 0x04, + 0x19, 0x97, 0x5E, 0xB8, 0x08, 0x87, 0x7C, 0x8A, 0x4C, 0x0C, 0x8E, 0x0B, + 0xD4, 0x8D, 0x4A, 0x54, 0x01, 0xEB, 0x1E, 0x87, 0x76, 0xBF, 0xEE, 0xE1, + 0x34, 0xC0, 0x38, 0x31, 0xAC, 0x27, 0x3C, 0xD9, 0xD6, 0x35, 0xAB, 0x0C, + 0xE0, 0x06, 0xA4, 0x2A, 0x88, 0x7E, 0x3F, 0x52, 0xFB, 0x87, 0x66, 0xB6, + 0x50, 0xF3, 0x80, 0x78, 0xBC, 0x8E, 0xE8, 0x58, 0x0C, 0xEF, 0xE2, 0x43, + 0x96, 0x8C, 0xFC, 0x4F, 0x8D, 0xC3, 0xDB, 0x08, 0x45, 0x54, 0x17, 0x1D, + 0x41, 0xBF, 0x2E, 0x86, 0x1B, 0x7B, 0xB4, 0xD6, 0x9D, 0xD0, 0xE0, 0x1E, + 0xA3, 0x87, 0xCB, 0xAA, 0x5C, 0xA6, 0x72, 0xAF, 0xCB, 0xE8, 0xBD, 0xB9, + 0xD6, 0x2D, 0x4C, 0xE1, 0x5F, 0x17, 0xDD, 0x36, 0xF9, 0x1E, 0xD1, 0xEE, + 0xDD, 0x65, 0xCA, 0x4A, 0x06, 0x45, 0x5C, 0xB9, 0x4C, 0xD4, 0x0A, 0x52, + 0xEC, 0x36, 0x0E, 0x84, 0xB3, 0xC9, 0x26, 0xE2, 0x2C, 0x43, 0x80, 0xA3, + 0xBF, 0x30, 0x9D, 0x56, 0x84, 0x97, 0x68, 0xB7, 0xF5, 0x2C, 0xFD, 0xF6, + 0x55, 0xFD, 0x05, 0x3A, 0x7E, 0xF7, 0x06, 0x97, 0x9E, 0x7E, 0x58, 0x06, + 0xB1, 0x7D, 0xFA, 0xE5, 0x3A, 0xD2, 0xA5, 0xBC, 0x56, 0x8E, 0xBB, 0x52, + 0x9A, 0x7A, 0x61, 0xD6, 0x8D, 0x25, 0x6F, 0x8F, 0xC9, 0x7C, 0x07, 0x4A, + 0x86, 0x1D, 0x82, 0x7E, 0x2E, 0xBC, 0x8C, 0x61, 0x34, 0x55, 0x31, 0x15, + 0xB7, 0x0E, 0x71, 0x03, 0x92, 0x0A, 0xA1, 0x6D, 0x85, 0xE5, 0x2B, 0xCB, + 0xAB, 0x8D, 0x78, 0x6A, 0x68, 0x17, 0x8F, 0xA8, 0xFF, 0x7C, 0x2F, 0x5C, + 0x71, 0x64, 0x8D, 0x6F +}; + +static const unsigned char dhtest_2048_224_xB[] = { + 0x4F, 0xF3, 0xBC, 0x96, 0xC7, 0xFC, 0x6A, 0x6D, 0x71, 0xD3, 0xB3, 0x63, + 0x80, 0x0A, 0x7C, 0xDF, 0xEF, 0x6F, 0xC4, 0x1B, 0x44, 0x17, 0xEA, 0x15, + 0x35, 0x3B, 0x75, 0x90 +}; + +static const unsigned char dhtest_2048_224_yB[] = { + 0x4D, 0xCE, 0xE9, 0x92, 0xA9, 0x76, 0x2A, 0x13, 0xF2, 0xF8, 0x38, 0x44, + 0xAD, 0x3D, 0x77, 0xEE, 0x0E, 0x31, 0xC9, 0x71, 0x8B, 0x3D, 0xB6, 0xC2, + 0x03, 0x5D, 0x39, 0x61, 0x18, 0x2C, 0x3E, 0x0B, 0xA2, 0x47, 0xEC, 0x41, + 0x82, 0xD7, 0x60, 0xCD, 0x48, 0xD9, 0x95, 0x99, 0x97, 0x06, 0x22, 0xA1, + 0x88, 0x1B, 0xBA, 0x2D, 0xC8, 0x22, 0x93, 0x9C, 0x78, 0xC3, 0x91, 0x2C, + 0x66, 0x61, 0xFA, 0x54, 0x38, 0xB2, 0x07, 0x66, 0x22, 0x2B, 0x75, 0xE2, + 0x4C, 0x2E, 0x3A, 0xD0, 0xC7, 0x28, 0x72, 0x36, 0x12, 0x95, 0x25, 0xEE, + 0x15, 0xB5, 0xDD, 0x79, 0x98, 0xAA, 0x04, 0xC4, 0xA9, 0x69, 0x6C, 0xAC, + 0xD7, 0x17, 0x20, 0x83, 0xA9, 0x7A, 0x81, 0x66, 0x4E, 0xAD, 0x2C, 0x47, + 0x9E, 0x44, 0x4E, 0x4C, 0x06, 0x54, 0xCC, 0x19, 0xE2, 0x8D, 0x77, 0x03, + 0xCE, 0xE8, 0xDA, 0xCD, 0x61, 0x26, 0xF5, 0xD6, 0x65, 0xEC, 0x52, 0xC6, + 0x72, 0x55, 0xDB, 0x92, 0x01, 0x4B, 0x03, 0x7E, 0xB6, 0x21, 0xA2, 0xAC, + 0x8E, 0x36, 0x5D, 0xE0, 0x71, 0xFF, 0xC1, 0x40, 0x0A, 0xCF, 0x07, 0x7A, + 0x12, 0x91, 0x3D, 0xD8, 0xDE, 0x89, 0x47, 0x34, 0x37, 0xAB, 0x7B, 0xA3, + 0x46, 0x74, 0x3C, 0x1B, 0x21, 0x5D, 0xD9, 0xC1, 0x21, 0x64, 0xA7, 0xE4, + 0x05, 0x31, 0x18, 0xD1, 0x99, 0xBE, 0xC8, 0xEF, 0x6F, 0xC5, 0x61, 0x17, + 0x0C, 0x84, 0xC8, 0x7D, 0x10, 0xEE, 0x9A, 0x67, 0x4A, 0x1F, 0xA8, 0xFF, + 0xE1, 0x3B, 0xDF, 0xBA, 0x1D, 0x44, 0xDE, 0x48, 0x94, 0x6D, 0x68, 0xDC, + 0x0C, 0xDD, 0x77, 0x76, 0x35, 0xA7, 0xAB, 0x5B, 0xFB, 0x1E, 0x4B, 0xB7, + 0xB8, 0x56, 0xF9, 0x68, 0x27, 0x73, 0x4C, 0x18, 0x41, 0x38, 0xE9, 0x15, + 0xD9, 0xC3, 0x00, 0x2E, 0xBC, 0xE5, 0x31, 0x20, 0x54, 0x6A, 0x7E, 0x20, + 0x02, 0x14, 0x2B, 0x6C +}; + +static const unsigned char dhtest_2048_224_Z[] = { + 0x34, 0xD9, 0xBD, 0xDC, 0x1B, 0x42, 0x17, 0x6C, 0x31, 0x3F, 0xEA, 0x03, + 0x4C, 0x21, 0x03, 0x4D, 0x07, 0x4A, 0x63, 0x13, 0xBB, 0x4E, 0xCD, 0xB3, + 0x70, 0x3F, 0xFF, 0x42, 0x45, 0x67, 0xA4, 0x6B, 0xDF, 0x75, 0x53, 0x0E, + 0xDE, 0x0A, 0x9D, 0xA5, 0x22, 0x9D, 0xE7, 0xD7, 0x67, 0x32, 0x28, 0x6C, + 0xBC, 0x0F, 0x91, 0xDA, 0x4C, 0x3C, 0x85, 0x2F, 0xC0, 0x99, 0xC6, 0x79, + 0x53, 0x1D, 0x94, 0xC7, 0x8A, 0xB0, 0x3D, 0x9D, 0xEC, 0xB0, 0xA4, 0xE4, + 0xCA, 0x8B, 0x2B, 0xB4, 0x59, 0x1C, 0x40, 0x21, 0xCF, 0x8C, 0xE3, 0xA2, + 0x0A, 0x54, 0x1D, 0x33, 0x99, 0x40, 0x17, 0xD0, 0x20, 0x0A, 0xE2, 0xC9, + 0x51, 0x6E, 0x2F, 0xF5, 0x14, 0x57, 0x79, 0x26, 0x9E, 0x86, 0x2B, 0x0F, + 0xB4, 0x74, 0xA2, 0xD5, 0x6D, 0xC3, 0x1E, 0xD5, 0x69, 0xA7, 0x70, 0x0B, + 0x4C, 0x4A, 0xB1, 0x6B, 0x22, 0xA4, 0x55, 0x13, 0x53, 0x1E, 0xF5, 0x23, + 0xD7, 0x12, 0x12, 0x07, 0x7B, 0x5A, 0x16, 0x9B, 0xDE, 0xFF, 0xAD, 0x7A, + 0xD9, 0x60, 0x82, 0x84, 0xC7, 0x79, 0x5B, 0x6D, 0x5A, 0x51, 0x83, 0xB8, + 0x70, 0x66, 0xDE, 0x17, 0xD8, 0xD6, 0x71, 0xC9, 0xEB, 0xD8, 0xEC, 0x89, + 0x54, 0x4D, 0x45, 0xEC, 0x06, 0x15, 0x93, 0xD4, 0x42, 0xC6, 0x2A, 0xB9, + 0xCE, 0x3B, 0x1C, 0xB9, 0x94, 0x3A, 0x1D, 0x23, 0xA5, 0xEA, 0x3B, 0xCF, + 0x21, 0xA0, 0x14, 0x71, 0xE6, 0x7E, 0x00, 0x3E, 0x7F, 0x8A, 0x69, 0xC7, + 0x28, 0xBE, 0x49, 0x0B, 0x2F, 0xC8, 0x8C, 0xFE, 0xB9, 0x2D, 0xB6, 0xA2, + 0x15, 0xE5, 0xD0, 0x3C, 0x17, 0xC4, 0x64, 0xC9, 0xAC, 0x1A, 0x46, 0xE2, + 0x03, 0xE1, 0x3F, 0x95, 0x29, 0x95, 0xFB, 0x03, 0xC6, 0x9D, 0x3C, 0xC4, + 0x7F, 0xCB, 0x51, 0x0B, 0x69, 0x98, 0xFF, 0xD3, 0xAA, 0x6D, 0xE7, 0x3C, + 0xF9, 0xF6, 0x38, 0x69 +}; + +static const unsigned char dhtest_2048_256_xA[] = { + 0x08, 0x81, 0x38, 0x2C, 0xDB, 0x87, 0x66, 0x0C, 0x6D, 0xC1, 0x3E, 0x61, + 0x49, 0x38, 0xD5, 0xB9, 0xC8, 0xB2, 0xF2, 0x48, 0x58, 0x1C, 0xC5, 0xE3, + 0x1B, 0x35, 0x45, 0x43, 0x97, 0xFC, 0xE5, 0x0E +}; + +static const unsigned char dhtest_2048_256_yA[] = { + 0x2E, 0x93, 0x80, 0xC8, 0x32, 0x3A, 0xF9, 0x75, 0x45, 0xBC, 0x49, 0x41, + 0xDE, 0xB0, 0xEC, 0x37, 0x42, 0xC6, 0x2F, 0xE0, 0xEC, 0xE8, 0x24, 0xA6, + 0xAB, 0xDB, 0xE6, 0x6C, 0x59, 0xBE, 0xE0, 0x24, 0x29, 0x11, 0xBF, 0xB9, + 0x67, 0x23, 0x5C, 0xEB, 0xA3, 0x5A, 0xE1, 0x3E, 0x4E, 0xC7, 0x52, 0xBE, + 0x63, 0x0B, 0x92, 0xDC, 0x4B, 0xDE, 0x28, 0x47, 0xA9, 0xC6, 0x2C, 0xB8, + 0x15, 0x27, 0x45, 0x42, 0x1F, 0xB7, 0xEB, 0x60, 0xA6, 0x3C, 0x0F, 0xE9, + 0x15, 0x9F, 0xCC, 0xE7, 0x26, 0xCE, 0x7C, 0xD8, 0x52, 0x3D, 0x74, 0x50, + 0x66, 0x7E, 0xF8, 0x40, 0xE4, 0x91, 0x91, 0x21, 0xEB, 0x5F, 0x01, 0xC8, + 0xC9, 0xB0, 0xD3, 0xD6, 0x48, 0xA9, 0x3B, 0xFB, 0x75, 0x68, 0x9E, 0x82, + 0x44, 0xAC, 0x13, 0x4A, 0xF5, 0x44, 0x71, 0x1C, 0xE7, 0x9A, 0x02, 0xDC, + 0xC3, 0x42, 0x26, 0x68, 0x47, 0x80, 0xDD, 0xDC, 0xB4, 0x98, 0x59, 0x41, + 0x06, 0xC3, 0x7F, 0x5B, 0xC7, 0x98, 0x56, 0x48, 0x7A, 0xF5, 0xAB, 0x02, + 0x2A, 0x2E, 0x5E, 0x42, 0xF0, 0x98, 0x97, 0xC1, 0xA8, 0x5A, 0x11, 0xEA, + 0x02, 0x12, 0xAF, 0x04, 0xD9, 0xB4, 0xCE, 0xBC, 0x93, 0x7C, 0x3C, 0x1A, + 0x3E, 0x15, 0xA8, 0xA0, 0x34, 0x2E, 0x33, 0x76, 0x15, 0xC8, 0x4E, 0x7F, + 0xE3, 0xB8, 0xB9, 0xB8, 0x7F, 0xB1, 0xE7, 0x3A, 0x15, 0xAF, 0x12, 0xA3, + 0x0D, 0x74, 0x6E, 0x06, 0xDF, 0xC3, 0x4F, 0x29, 0x0D, 0x79, 0x7C, 0xE5, + 0x1A, 0xA1, 0x3A, 0xA7, 0x85, 0xBF, 0x66, 0x58, 0xAF, 0xF5, 0xE4, 0xB0, + 0x93, 0x00, 0x3C, 0xBE, 0xAF, 0x66, 0x5B, 0x3C, 0x2E, 0x11, 0x3A, 0x3A, + 0x4E, 0x90, 0x52, 0x69, 0x34, 0x1D, 0xC0, 0x71, 0x14, 0x26, 0x68, 0x5F, + 0x4E, 0xF3, 0x7E, 0x86, 0x8A, 0x81, 0x26, 0xFF, 0x3F, 0x22, 0x79, 0xB5, + 0x7C, 0xA6, 0x7E, 0x29 +}; + +static const unsigned char dhtest_2048_256_xB[] = { + 0x7D, 0x62, 0xA7, 0xE3, 0xEF, 0x36, 0xDE, 0x61, 0x7B, 0x13, 0xD1, 0xAF, + 0xB8, 0x2C, 0x78, 0x0D, 0x83, 0xA2, 0x3B, 0xD4, 0xEE, 0x67, 0x05, 0x64, + 0x51, 0x21, 0xF3, 0x71, 0xF5, 0x46, 0xA5, 0x3D +}; + +static const unsigned char dhtest_2048_256_yB[] = { + 0x57, 0x5F, 0x03, 0x51, 0xBD, 0x2B, 0x1B, 0x81, 0x74, 0x48, 0xBD, 0xF8, + 0x7A, 0x6C, 0x36, 0x2C, 0x1E, 0x28, 0x9D, 0x39, 0x03, 0xA3, 0x0B, 0x98, + 0x32, 0xC5, 0x74, 0x1F, 0xA2, 0x50, 0x36, 0x3E, 0x7A, 0xCB, 0xC7, 0xF7, + 0x7F, 0x3D, 0xAC, 0xBC, 0x1F, 0x13, 0x1A, 0xDD, 0x8E, 0x03, 0x36, 0x7E, + 0xFF, 0x8F, 0xBB, 0xB3, 0xE1, 0xC5, 0x78, 0x44, 0x24, 0x80, 0x9B, 0x25, + 0xAF, 0xE4, 0xD2, 0x26, 0x2A, 0x1A, 0x6F, 0xD2, 0xFA, 0xB6, 0x41, 0x05, + 0xCA, 0x30, 0xA6, 0x74, 0xE0, 0x7F, 0x78, 0x09, 0x85, 0x20, 0x88, 0x63, + 0x2F, 0xC0, 0x49, 0x23, 0x37, 0x91, 0xAD, 0x4E, 0xDD, 0x08, 0x3A, 0x97, + 0x8B, 0x88, 0x3E, 0xE6, 0x18, 0xBC, 0x5E, 0x0D, 0xD0, 0x47, 0x41, 0x5F, + 0x2D, 0x95, 0xE6, 0x83, 0xCF, 0x14, 0x82, 0x6B, 0x5F, 0xBE, 0x10, 0xD3, + 0xCE, 0x41, 0xC6, 0xC1, 0x20, 0xC7, 0x8A, 0xB2, 0x00, 0x08, 0xC6, 0x98, + 0xBF, 0x7F, 0x0B, 0xCA, 0xB9, 0xD7, 0xF4, 0x07, 0xBE, 0xD0, 0xF4, 0x3A, + 0xFB, 0x29, 0x70, 0xF5, 0x7F, 0x8D, 0x12, 0x04, 0x39, 0x63, 0xE6, 0x6D, + 0xDD, 0x32, 0x0D, 0x59, 0x9A, 0xD9, 0x93, 0x6C, 0x8F, 0x44, 0x13, 0x7C, + 0x08, 0xB1, 0x80, 0xEC, 0x5E, 0x98, 0x5C, 0xEB, 0xE1, 0x86, 0xF3, 0xD5, + 0x49, 0x67, 0x7E, 0x80, 0x60, 0x73, 0x31, 0xEE, 0x17, 0xAF, 0x33, 0x80, + 0xA7, 0x25, 0xB0, 0x78, 0x23, 0x17, 0xD7, 0xDD, 0x43, 0xF5, 0x9D, 0x7A, + 0xF9, 0x56, 0x8A, 0x9B, 0xB6, 0x3A, 0x84, 0xD3, 0x65, 0xF9, 0x22, 0x44, + 0xED, 0x12, 0x09, 0x88, 0x21, 0x93, 0x02, 0xF4, 0x29, 0x24, 0xC7, 0xCA, + 0x90, 0xB8, 0x9D, 0x24, 0xF7, 0x1B, 0x0A, 0xB6, 0x97, 0x82, 0x3D, 0x7D, + 0xEB, 0x1A, 0xFF, 0x5B, 0x0E, 0x8E, 0x4A, 0x45, 0xD4, 0x9F, 0x7F, 0x53, + 0x75, 0x7E, 0x19, 0x13 +}; + +static const unsigned char dhtest_2048_256_Z[] = { + 0x86, 0xC7, 0x0B, 0xF8, 0xD0, 0xBB, 0x81, 0xBB, 0x01, 0x07, 0x8A, 0x17, + 0x21, 0x9C, 0xB7, 0xD2, 0x72, 0x03, 0xDB, 0x2A, 0x19, 0xC8, 0x77, 0xF1, + 0xD1, 0xF1, 0x9F, 0xD7, 0xD7, 0x7E, 0xF2, 0x25, 0x46, 0xA6, 0x8F, 0x00, + 0x5A, 0xD5, 0x2D, 0xC8, 0x45, 0x53, 0xB7, 0x8F, 0xC6, 0x03, 0x30, 0xBE, + 0x51, 0xEA, 0x7C, 0x06, 0x72, 0xCA, 0xC1, 0x51, 0x5E, 0x4B, 0x35, 0xC0, + 0x47, 0xB9, 0xA5, 0x51, 0xB8, 0x8F, 0x39, 0xDC, 0x26, 0xDA, 0x14, 0xA0, + 0x9E, 0xF7, 0x47, 0x74, 0xD4, 0x7C, 0x76, 0x2D, 0xD1, 0x77, 0xF9, 0xED, + 0x5B, 0xC2, 0xF1, 0x1E, 0x52, 0xC8, 0x79, 0xBD, 0x95, 0x09, 0x85, 0x04, + 0xCD, 0x9E, 0xEC, 0xD8, 0xA8, 0xF9, 0xB3, 0xEF, 0xBD, 0x1F, 0x00, 0x8A, + 0xC5, 0x85, 0x30, 0x97, 0xD9, 0xD1, 0x83, 0x7F, 0x2B, 0x18, 0xF7, 0x7C, + 0xD7, 0xBE, 0x01, 0xAF, 0x80, 0xA7, 0xC7, 0xB5, 0xEA, 0x3C, 0xA5, 0x4C, + 0xC0, 0x2D, 0x0C, 0x11, 0x6F, 0xEE, 0x3F, 0x95, 0xBB, 0x87, 0x39, 0x93, + 0x85, 0x87, 0x5D, 0x7E, 0x86, 0x74, 0x7E, 0x67, 0x6E, 0x72, 0x89, 0x38, + 0xAC, 0xBF, 0xF7, 0x09, 0x8E, 0x05, 0xBE, 0x4D, 0xCF, 0xB2, 0x40, 0x52, + 0xB8, 0x3A, 0xEF, 0xFB, 0x14, 0x78, 0x3F, 0x02, 0x9A, 0xDB, 0xDE, 0x7F, + 0x53, 0xFA, 0xE9, 0x20, 0x84, 0x22, 0x40, 0x90, 0xE0, 0x07, 0xCE, 0xE9, + 0x4D, 0x4B, 0xF2, 0xBA, 0xCE, 0x9F, 0xFD, 0x4B, 0x57, 0xD2, 0xAF, 0x7C, + 0x72, 0x4D, 0x0C, 0xAA, 0x19, 0xBF, 0x05, 0x01, 0xF6, 0xF1, 0x7B, 0x4A, + 0xA1, 0x0F, 0x42, 0x5E, 0x3E, 0xA7, 0x60, 0x80, 0xB4, 0xB9, 0xD6, 0xB3, + 0xCE, 0xFE, 0xA1, 0x15, 0xB2, 0xCE, 0xB8, 0x78, 0x9B, 0xB8, 0xA3, 0xB0, + 0xEA, 0x87, 0xFE, 0xBE, 0x63, 0xB6, 0xC8, 0xF8, 0x46, 0xEC, 0x6D, 0xB0, + 0xC2, 0x6C, 0x5D, 0x7C +}; + +typedef struct { + DH *(*get_param) (void); + const unsigned char *xA; + size_t xA_len; + const unsigned char *yA; + size_t yA_len; + const unsigned char *xB; + size_t xB_len; + const unsigned char *yB; + size_t yB_len; + const unsigned char *Z; + size_t Z_len; +} rfc5114_td; + +# define make_rfc5114_td(pre) { \ + DH_get_##pre, \ + dhtest_##pre##_xA, sizeof(dhtest_##pre##_xA), \ + dhtest_##pre##_yA, sizeof(dhtest_##pre##_yA), \ + dhtest_##pre##_xB, sizeof(dhtest_##pre##_xB), \ + dhtest_##pre##_yB, sizeof(dhtest_##pre##_yB), \ + dhtest_##pre##_Z, sizeof(dhtest_##pre##_Z) \ + } + +static const rfc5114_td rfctd[] = { + make_rfc5114_td(1024_160), + make_rfc5114_td(2048_224), + make_rfc5114_td(2048_256) +}; + +static int run_rfc5114_tests(void) +{ + int i; + for (i = 0; i < (int)(sizeof(rfctd) / sizeof(rfc5114_td)); i++) { + DH *dhA, *dhB; + unsigned char *Z1 = NULL, *Z2 = NULL; + const rfc5114_td *td = rfctd + i; + /* Set up DH structures setting key components */ + dhA = td->get_param(); + dhB = td->get_param(); + if (!dhA || !dhB) + goto bad_err; + + dhA->priv_key = BN_bin2bn(td->xA, td->xA_len, NULL); + dhA->pub_key = BN_bin2bn(td->yA, td->yA_len, NULL); + + dhB->priv_key = BN_bin2bn(td->xB, td->xB_len, NULL); + dhB->pub_key = BN_bin2bn(td->yB, td->yB_len, NULL); + + if (!dhA->priv_key || !dhA->pub_key + || !dhB->priv_key || !dhB->pub_key) + goto bad_err; + + if ((td->Z_len != (size_t)DH_size(dhA)) + || (td->Z_len != (size_t)DH_size(dhB))) + goto err; + + Z1 = OPENSSL_malloc(DH_size(dhA)); + Z2 = OPENSSL_malloc(DH_size(dhB)); + /* + * Work out shared secrets using both sides and compare with expected + * values. + */ + if (!DH_compute_key(Z1, dhB->pub_key, dhA)) + goto bad_err; + if (!DH_compute_key(Z2, dhA->pub_key, dhB)) + goto bad_err; + + if (memcmp(Z1, td->Z, td->Z_len)) + goto err; + if (memcmp(Z2, td->Z, td->Z_len)) + goto err; + + printf("RFC5114 parameter test %d OK\n", i + 1); + + DH_free(dhA); + DH_free(dhB); + OPENSSL_free(Z1); + OPENSSL_free(Z2); + + } + return 1; + bad_err: + fprintf(stderr, "Initalisation error RFC5114 set %d\n", i + 1); + ERR_print_errors_fp(stderr); + return 0; + err: + fprintf(stderr, "Test failed RFC5114 set %d\n", i + 1); + return 0; +} + #endif diff --git a/deps/openssl/openssl/crypto/dsa/dsa.h b/deps/openssl/openssl/crypto/dsa/dsa.h index a2f0ee786370e2..545358fd02b2ad 100644 --- a/deps/openssl/openssl/crypto/dsa/dsa.h +++ b/deps/openssl/openssl/crypto/dsa/dsa.h @@ -287,6 +287,7 @@ void ERR_load_DSA_strings(void); # define DSA_F_DO_DSA_PRINT 104 # define DSA_F_DSAPARAMS_PRINT 100 # define DSA_F_DSAPARAMS_PRINT_FP 101 +# define DSA_F_DSA_BUILTIN_PARAMGEN2 126 # define DSA_F_DSA_DO_SIGN 112 # define DSA_F_DSA_DO_VERIFY 113 # define DSA_F_DSA_GENERATE_KEY 124 @@ -316,12 +317,14 @@ void ERR_load_DSA_strings(void); # define DSA_R_DATA_TOO_LARGE_FOR_KEY_SIZE 100 # define DSA_R_DECODE_ERROR 104 # define DSA_R_INVALID_DIGEST_TYPE 106 +# define DSA_R_INVALID_PARAMETERS 112 # define DSA_R_MISSING_PARAMETERS 101 # define DSA_R_MODULUS_TOO_LARGE 103 # define DSA_R_NEED_NEW_SETUP_VALUES 110 # define DSA_R_NON_FIPS_DSA_METHOD 111 # define DSA_R_NO_PARAMETERS_SET 107 # define DSA_R_PARAMETER_ENCODING_ERROR 105 +# define DSA_R_Q_NOT_PRIME 113 #ifdef __cplusplus } diff --git a/deps/openssl/openssl/crypto/dsa/dsa_ameth.c b/deps/openssl/openssl/crypto/dsa/dsa_ameth.c index a2840eaed0950c..2a5cd71371a761 100644 --- a/deps/openssl/openssl/crypto/dsa/dsa_ameth.c +++ b/deps/openssl/openssl/crypto/dsa/dsa_ameth.c @@ -601,10 +601,14 @@ static int dsa_pkey_ctrl(EVP_PKEY *pkey, int op, long arg1, void *arg2) X509_ALGOR_set0(alg2, OBJ_nid2obj(snid), V_ASN1_UNDEF, 0); } return 1; + + case ASN1_PKEY_CTRL_CMS_RI_TYPE: + *(int *)arg2 = CMS_RECIPINFO_NONE; + return 1; #endif case ASN1_PKEY_CTRL_DEFAULT_MD_NID: - *(int *)arg2 = NID_sha1; + *(int *)arg2 = NID_sha256; return 2; default: diff --git a/deps/openssl/openssl/crypto/dsa/dsa_err.c b/deps/openssl/openssl/crypto/dsa/dsa_err.c index 746f5dfe6dc789..f5ddc66b8a73f5 100644 --- a/deps/openssl/openssl/crypto/dsa/dsa_err.c +++ b/deps/openssl/openssl/crypto/dsa/dsa_err.c @@ -1,6 +1,6 @@ /* crypto/dsa/dsa_err.c */ /* ==================================================================== - * Copyright (c) 1999-2011 The OpenSSL Project. All rights reserved. + * Copyright (c) 1999-2013 The OpenSSL Project. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -74,6 +74,7 @@ static ERR_STRING_DATA DSA_str_functs[] = { {ERR_FUNC(DSA_F_DO_DSA_PRINT), "DO_DSA_PRINT"}, {ERR_FUNC(DSA_F_DSAPARAMS_PRINT), "DSAparams_print"}, {ERR_FUNC(DSA_F_DSAPARAMS_PRINT_FP), "DSAparams_print_fp"}, + {ERR_FUNC(DSA_F_DSA_BUILTIN_PARAMGEN2), "DSA_BUILTIN_PARAMGEN2"}, {ERR_FUNC(DSA_F_DSA_DO_SIGN), "DSA_do_sign"}, {ERR_FUNC(DSA_F_DSA_DO_VERIFY), "DSA_do_verify"}, {ERR_FUNC(DSA_F_DSA_GENERATE_KEY), "DSA_generate_key"}, @@ -107,12 +108,14 @@ static ERR_STRING_DATA DSA_str_reasons[] = { "data too large for key size"}, {ERR_REASON(DSA_R_DECODE_ERROR), "decode error"}, {ERR_REASON(DSA_R_INVALID_DIGEST_TYPE), "invalid digest type"}, + {ERR_REASON(DSA_R_INVALID_PARAMETERS), "invalid parameters"}, {ERR_REASON(DSA_R_MISSING_PARAMETERS), "missing parameters"}, {ERR_REASON(DSA_R_MODULUS_TOO_LARGE), "modulus too large"}, {ERR_REASON(DSA_R_NEED_NEW_SETUP_VALUES), "need new setup values"}, {ERR_REASON(DSA_R_NON_FIPS_DSA_METHOD), "non fips dsa method"}, {ERR_REASON(DSA_R_NO_PARAMETERS_SET), "no parameters set"}, {ERR_REASON(DSA_R_PARAMETER_ENCODING_ERROR), "parameter encoding error"}, + {ERR_REASON(DSA_R_Q_NOT_PRIME), "q not prime"}, {0, NULL} }; diff --git a/deps/openssl/openssl/crypto/dsa/dsa_gen.c b/deps/openssl/openssl/crypto/dsa/dsa_gen.c index c808ab60972287..89200369392410 100644 --- a/deps/openssl/openssl/crypto/dsa/dsa_gen.c +++ b/deps/openssl/openssl/crypto/dsa/dsa_gen.c @@ -86,6 +86,8 @@ # include "dsa_locl.h" # ifdef OPENSSL_FIPS +/* Workaround bug in prototype */ +# define fips_dsa_builtin_paramgen2 fips_dsa_paramgen_bad # include # endif @@ -382,4 +384,371 @@ int dsa_builtin_paramgen(DSA *ret, size_t bits, size_t qbits, BN_MONT_CTX_free(mont); return ok; } + +# ifdef OPENSSL_FIPS +# undef fips_dsa_builtin_paramgen2 +extern int fips_dsa_builtin_paramgen2(DSA *ret, size_t L, size_t N, + const EVP_MD *evpmd, + const unsigned char *seed_in, + size_t seed_len, int idx, + unsigned char *seed_out, + int *counter_ret, unsigned long *h_ret, + BN_GENCB *cb); +# endif + +/* + * This is a parameter generation algorithm for the DSA2 algorithm as + * described in FIPS 186-3. + */ + +int dsa_builtin_paramgen2(DSA *ret, size_t L, size_t N, + const EVP_MD *evpmd, const unsigned char *seed_in, + size_t seed_len, int idx, unsigned char *seed_out, + int *counter_ret, unsigned long *h_ret, + BN_GENCB *cb) +{ + int ok = -1; + unsigned char *seed = NULL, *seed_tmp = NULL; + unsigned char md[EVP_MAX_MD_SIZE]; + int mdsize; + BIGNUM *r0, *W, *X, *c, *test; + BIGNUM *g = NULL, *q = NULL, *p = NULL; + BN_MONT_CTX *mont = NULL; + int i, k, n = 0, m = 0, qsize = N >> 3; + int counter = 0; + int r = 0; + BN_CTX *ctx = NULL; + EVP_MD_CTX mctx; + unsigned int h = 2; + +# ifdef OPENSSL_FIPS + + if (FIPS_mode()) + return fips_dsa_builtin_paramgen2(ret, L, N, evpmd, + seed_in, seed_len, idx, + seed_out, counter_ret, h_ret, cb); +# endif + + EVP_MD_CTX_init(&mctx); + + if (evpmd == NULL) { + if (N == 160) + evpmd = EVP_sha1(); + else if (N == 224) + evpmd = EVP_sha224(); + else + evpmd = EVP_sha256(); + } + + mdsize = EVP_MD_size(evpmd); + /* If unverificable g generation only don't need seed */ + if (!ret->p || !ret->q || idx >= 0) { + if (seed_len == 0) + seed_len = mdsize; + + seed = OPENSSL_malloc(seed_len); + + if (seed_out) + seed_tmp = seed_out; + else + seed_tmp = OPENSSL_malloc(seed_len); + + if (!seed || !seed_tmp) + goto err; + + if (seed_in) + memcpy(seed, seed_in, seed_len); + + } + + if ((ctx = BN_CTX_new()) == NULL) + goto err; + + if ((mont = BN_MONT_CTX_new()) == NULL) + goto err; + + BN_CTX_start(ctx); + r0 = BN_CTX_get(ctx); + g = BN_CTX_get(ctx); + W = BN_CTX_get(ctx); + X = BN_CTX_get(ctx); + c = BN_CTX_get(ctx); + test = BN_CTX_get(ctx); + + /* if p, q already supplied generate g only */ + if (ret->p && ret->q) { + p = ret->p; + q = ret->q; + if (idx >= 0) + memcpy(seed_tmp, seed, seed_len); + goto g_only; + } else { + p = BN_CTX_get(ctx); + q = BN_CTX_get(ctx); + } + + if (!BN_lshift(test, BN_value_one(), L - 1)) + goto err; + for (;;) { + for (;;) { /* find q */ + unsigned char *pmd; + /* step 1 */ + if (!BN_GENCB_call(cb, 0, m++)) + goto err; + + if (!seed_in) { + if (RAND_pseudo_bytes(seed, seed_len) < 0) + goto err; + } + /* step 2 */ + if (!EVP_Digest(seed, seed_len, md, NULL, evpmd, NULL)) + goto err; + /* Take least significant bits of md */ + if (mdsize > qsize) + pmd = md + mdsize - qsize; + else + pmd = md; + + if (mdsize < qsize) + memset(md + mdsize, 0, qsize - mdsize); + + /* step 3 */ + pmd[0] |= 0x80; + pmd[qsize - 1] |= 0x01; + if (!BN_bin2bn(pmd, qsize, q)) + goto err; + + /* step 4 */ + r = BN_is_prime_fasttest_ex(q, DSS_prime_checks, ctx, + seed_in ? 1 : 0, cb); + if (r > 0) + break; + if (r != 0) + goto err; + /* Provided seed didn't produce a prime: error */ + if (seed_in) { + ok = 0; + DSAerr(DSA_F_DSA_BUILTIN_PARAMGEN2, DSA_R_Q_NOT_PRIME); + goto err; + } + + /* do a callback call */ + /* step 5 */ + } + /* Copy seed to seed_out before we mess with it */ + if (seed_out) + memcpy(seed_out, seed, seed_len); + + if (!BN_GENCB_call(cb, 2, 0)) + goto err; + if (!BN_GENCB_call(cb, 3, 0)) + goto err; + + /* step 6 */ + counter = 0; + /* "offset = 1" */ + + n = (L - 1) / (mdsize << 3); + + for (;;) { + if ((counter != 0) && !BN_GENCB_call(cb, 0, counter)) + goto err; + + /* step 7 */ + BN_zero(W); + /* now 'buf' contains "SEED + offset - 1" */ + for (k = 0; k <= n; k++) { + /* + * obtain "SEED + offset + k" by incrementing: + */ + for (i = seed_len - 1; i >= 0; i--) { + seed[i]++; + if (seed[i] != 0) + break; + } + + if (!EVP_Digest(seed, seed_len, md, NULL, evpmd, NULL)) + goto err; + + /* step 8 */ + if (!BN_bin2bn(md, mdsize, r0)) + goto err; + if (!BN_lshift(r0, r0, (mdsize << 3) * k)) + goto err; + if (!BN_add(W, W, r0)) + goto err; + } + + /* more of step 8 */ + if (!BN_mask_bits(W, L - 1)) + goto err; + if (!BN_copy(X, W)) + goto err; + if (!BN_add(X, X, test)) + goto err; + + /* step 9 */ + if (!BN_lshift1(r0, q)) + goto err; + if (!BN_mod(c, X, r0, ctx)) + goto err; + if (!BN_sub(r0, c, BN_value_one())) + goto err; + if (!BN_sub(p, X, r0)) + goto err; + + /* step 10 */ + if (BN_cmp(p, test) >= 0) { + /* step 11 */ + r = BN_is_prime_fasttest_ex(p, DSS_prime_checks, ctx, 1, cb); + if (r > 0) + goto end; /* found it */ + if (r != 0) + goto err; + } + + /* step 13 */ + counter++; + /* "offset = offset + n + 1" */ + + /* step 14 */ + if (counter >= (int)(4 * L)) + break; + } + if (seed_in) { + ok = 0; + DSAerr(DSA_F_DSA_BUILTIN_PARAMGEN2, DSA_R_INVALID_PARAMETERS); + goto err; + } + } + end: + if (!BN_GENCB_call(cb, 2, 1)) + goto err; + + g_only: + + /* We now need to generate g */ + /* Set r0=(p-1)/q */ + if (!BN_sub(test, p, BN_value_one())) + goto err; + if (!BN_div(r0, NULL, test, q, ctx)) + goto err; + + if (idx < 0) { + if (!BN_set_word(test, h)) + goto err; + } else + h = 1; + if (!BN_MONT_CTX_set(mont, p, ctx)) + goto err; + + for (;;) { + static const unsigned char ggen[4] = { 0x67, 0x67, 0x65, 0x6e }; + if (idx >= 0) { + md[0] = idx & 0xff; + md[1] = (h >> 8) & 0xff; + md[2] = h & 0xff; + if (!EVP_DigestInit_ex(&mctx, evpmd, NULL)) + goto err; + if (!EVP_DigestUpdate(&mctx, seed_tmp, seed_len)) + goto err; + if (!EVP_DigestUpdate(&mctx, ggen, sizeof(ggen))) + goto err; + if (!EVP_DigestUpdate(&mctx, md, 3)) + goto err; + if (!EVP_DigestFinal_ex(&mctx, md, NULL)) + goto err; + if (!BN_bin2bn(md, mdsize, test)) + goto err; + } + /* g=test^r0%p */ + if (!BN_mod_exp_mont(g, test, r0, p, ctx, mont)) + goto err; + if (!BN_is_one(g)) + break; + if (idx < 0 && !BN_add(test, test, BN_value_one())) + goto err; + h++; + if (idx >= 0 && h > 0xffff) + goto err; + } + + if (!BN_GENCB_call(cb, 3, 1)) + goto err; + + ok = 1; + err: + if (ok == 1) { + if (p != ret->p) { + if (ret->p) + BN_free(ret->p); + ret->p = BN_dup(p); + } + if (q != ret->q) { + if (ret->q) + BN_free(ret->q); + ret->q = BN_dup(q); + } + if (ret->g) + BN_free(ret->g); + ret->g = BN_dup(g); + if (ret->p == NULL || ret->q == NULL || ret->g == NULL) { + ok = -1; + goto err; + } + if (counter_ret != NULL) + *counter_ret = counter; + if (h_ret != NULL) + *h_ret = h; + } + if (seed) + OPENSSL_free(seed); + if (seed_out != seed_tmp) + OPENSSL_free(seed_tmp); + if (ctx) { + BN_CTX_end(ctx); + BN_CTX_free(ctx); + } + if (mont != NULL) + BN_MONT_CTX_free(mont); + EVP_MD_CTX_cleanup(&mctx); + return ok; +} + +int dsa_paramgen_check_g(DSA *dsa) +{ + BN_CTX *ctx; + BIGNUM *tmp; + BN_MONT_CTX *mont = NULL; + int rv = -1; + ctx = BN_CTX_new(); + if (!ctx) + return -1; + BN_CTX_start(ctx); + if (BN_cmp(dsa->g, BN_value_one()) <= 0) + return 0; + if (BN_cmp(dsa->g, dsa->p) >= 0) + return 0; + tmp = BN_CTX_get(ctx); + if (!tmp) + goto err; + if ((mont = BN_MONT_CTX_new()) == NULL) + goto err; + if (!BN_MONT_CTX_set(mont, dsa->p, ctx)) + goto err; + /* Work out g^q mod p */ + if (!BN_mod_exp_mont(tmp, dsa->g, dsa->q, dsa->p, ctx, mont)) + goto err; + if (!BN_cmp(tmp, BN_value_one())) + rv = 1; + else + rv = 0; + err: + BN_CTX_end(ctx); + if (mont) + BN_MONT_CTX_free(mont); + BN_CTX_free(ctx); + return rv; + +} #endif diff --git a/deps/openssl/openssl/crypto/dsa/dsa_locl.h b/deps/openssl/openssl/crypto/dsa/dsa_locl.h index f32ee964d0e335..9c23c3ef90e2df 100644 --- a/deps/openssl/openssl/crypto/dsa/dsa_locl.h +++ b/deps/openssl/openssl/crypto/dsa/dsa_locl.h @@ -59,3 +59,11 @@ int dsa_builtin_paramgen(DSA *ret, size_t bits, size_t qbits, size_t seed_len, unsigned char *seed_out, int *counter_ret, unsigned long *h_ret, BN_GENCB *cb); + +int dsa_builtin_paramgen2(DSA *ret, size_t L, size_t N, + const EVP_MD *evpmd, const unsigned char *seed_in, + size_t seed_len, int idx, unsigned char *seed_out, + int *counter_ret, unsigned long *h_ret, + BN_GENCB *cb); + +int dsa_paramgen_check_g(DSA *dsa); diff --git a/deps/openssl/openssl/crypto/dsa/dsa_ossl.c b/deps/openssl/openssl/crypto/dsa/dsa_ossl.c index b30eab0319ffca..665f40a77978d5 100644 --- a/deps/openssl/openssl/crypto/dsa/dsa_ossl.c +++ b/deps/openssl/openssl/crypto/dsa/dsa_ossl.c @@ -398,11 +398,7 @@ static int dsa_do_verify(const unsigned char *dgst, int dgst_len, ret = (BN_ucmp(&u1, sig->r) == 0); err: - /* - * XXX: surely this is wrong - if ret is 0, it just didn't verify; there - * is no error in BN. Test should be ret == -1 (Ben) - */ - if (ret != 1) + if (ret < 0) DSAerr(DSA_F_DSA_DO_VERIFY, ERR_R_BN_LIB); if (ctx != NULL) BN_CTX_free(ctx); diff --git a/deps/openssl/openssl/crypto/dsa/dsa_pmeth.c b/deps/openssl/openssl/crypto/dsa/dsa_pmeth.c index 0d480f6a707ef9..42b8bb0862511d 100644 --- a/deps/openssl/openssl/crypto/dsa/dsa_pmeth.c +++ b/deps/openssl/openssl/crypto/dsa/dsa_pmeth.c @@ -197,6 +197,10 @@ static int pkey_dsa_ctrl(EVP_PKEY_CTX *ctx, int type, int p1, void *p2) dctx->md = p2; return 1; + case EVP_PKEY_CTRL_GET_MD: + *(const EVP_MD **)p2 = dctx->md; + return 1; + case EVP_PKEY_CTRL_DIGESTINIT: case EVP_PKEY_CTRL_PKCS7_SIGN: case EVP_PKEY_CTRL_CMS_SIGN: diff --git a/deps/openssl/openssl/crypto/dso/dso_win32.c b/deps/openssl/openssl/crypto/dso/dso_win32.c index 54c943dbf30ea2..c65234e9c44bb5 100644 --- a/deps/openssl/openssl/crypto/dso/dso_win32.c +++ b/deps/openssl/openssl/crypto/dso/dso_win32.c @@ -633,7 +633,6 @@ static int win32_pathbyaddr(void *addr, char *path, int sz) CREATETOOLHELP32SNAPSHOT create_snap; CLOSETOOLHELP32SNAPSHOT close_snap; MODULE32 module_first, module_next; - int len; if (addr == NULL) { union { @@ -694,25 +693,29 @@ static int win32_pathbyaddr(void *addr, char *path, int sz) return WideCharToMultiByte(CP_ACP, 0, me32.szExePath, -1, path, sz, NULL, NULL); # else - len = (int)wcslen(me32.szExePath); - if (sz <= 0) - return len + 1; - if (len >= sz) - len = sz - 1; - for (i = 0; i < len; i++) - path[i] = (char)me32.szExePath[i]; - path[len++] = 0; - return len; + { + int i, len = (int)wcslen(me32.szExePath); + if (sz <= 0) + return len + 1; + if (len >= sz) + len = sz - 1; + for (i = 0; i < len; i++) + path[i] = (char)me32.szExePath[i]; + path[len++] = 0; + return len; + } # endif # else - len = (int)strlen(me32.szExePath); - if (sz <= 0) - return len + 1; - if (len >= sz) - len = sz - 1; - memcpy(path, me32.szExePath, len); - path[len++] = 0; - return len; + { + int len = (int)strlen(me32.szExePath); + if (sz <= 0) + return len + 1; + if (len >= sz) + len = sz - 1; + memcpy(path, me32.szExePath, len); + path[len++] = 0; + return len; + } # endif } } while ((*module_next) (hModuleSnap, &me32)); diff --git a/deps/openssl/openssl/crypto/ec/Makefile b/deps/openssl/openssl/crypto/ec/Makefile index f85fc845ca2b31..0d9f3ab256a0af 100644 --- a/deps/openssl/openssl/crypto/ec/Makefile +++ b/deps/openssl/openssl/crypto/ec/Makefile @@ -11,6 +11,8 @@ MAKEFILE= Makefile AR= ar r CFLAGS= $(INCLUDES) $(CFLAG) +ASFLAGS= $(INCLUDES) $(ASFLAG) +AFLAGS= $(ASFLAGS) GENERAL=Makefile TEST=ectest.c @@ -27,7 +29,7 @@ LIBOBJ= ec_lib.o ecp_smpl.o ecp_mont.o ecp_nist.o ec_cvt.o ec_mult.o\ ec_err.o ec_curve.o ec_check.o ec_print.o ec_asn1.o ec_key.o\ ec2_smpl.o ec2_mult.o ec_ameth.o ec_pmeth.o eck_prn.o \ ecp_nistp224.o ecp_nistp256.o ecp_nistp521.o ecp_nistputil.o \ - ecp_oct.o ec2_oct.o ec_oct.o + ecp_oct.o ec2_oct.o ec_oct.o $(EC_ASM) SRC= $(LIBSRC) @@ -46,6 +48,12 @@ lib: $(LIBOBJ) $(RANLIB) $(LIB) || echo Never mind. @touch lib +ecp_nistz256-x86_64.s: asm/ecp_nistz256-x86_64.pl + $(PERL) asm/ecp_nistz256-x86_64.pl $(PERLASM_SCHEME) > $@ + +ecp_nistz256-avx2.s: asm/ecp_nistz256-avx2.pl + $(PERL) asm/ecp_nistz256-avx2.pl $(PERLASM_SCHEME) > $@ + files: $(PERL) $(TOP)/util/files.pl Makefile >> $(TOP)/MINFO @@ -108,14 +116,14 @@ ec2_smpl.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h ec2_smpl.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h ec2_smpl.o: ../../include/openssl/symhacks.h ec2_smpl.c ec_lcl.h ec_ameth.o: ../../e_os.h ../../include/openssl/asn1.h -ec_ameth.o: ../../include/openssl/bio.h ../../include/openssl/bn.h -ec_ameth.o: ../../include/openssl/buffer.h ../../include/openssl/cms.h -ec_ameth.o: ../../include/openssl/crypto.h ../../include/openssl/e_os2.h -ec_ameth.o: ../../include/openssl/ec.h ../../include/openssl/ecdh.h -ec_ameth.o: ../../include/openssl/ecdsa.h ../../include/openssl/err.h -ec_ameth.o: ../../include/openssl/evp.h ../../include/openssl/lhash.h -ec_ameth.o: ../../include/openssl/obj_mac.h ../../include/openssl/objects.h -ec_ameth.o: ../../include/openssl/opensslconf.h +ec_ameth.o: ../../include/openssl/asn1t.h ../../include/openssl/bio.h +ec_ameth.o: ../../include/openssl/bn.h ../../include/openssl/buffer.h +ec_ameth.o: ../../include/openssl/cms.h ../../include/openssl/crypto.h +ec_ameth.o: ../../include/openssl/e_os2.h ../../include/openssl/ec.h +ec_ameth.o: ../../include/openssl/ecdh.h ../../include/openssl/ecdsa.h +ec_ameth.o: ../../include/openssl/err.h ../../include/openssl/evp.h +ec_ameth.o: ../../include/openssl/lhash.h ../../include/openssl/obj_mac.h +ec_ameth.o: ../../include/openssl/objects.h ../../include/openssl/opensslconf.h ec_ameth.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h ec_ameth.o: ../../include/openssl/pkcs7.h ../../include/openssl/safestack.h ec_ameth.o: ../../include/openssl/sha.h ../../include/openssl/stack.h @@ -196,18 +204,19 @@ ec_oct.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h ec_oct.o: ../../include/openssl/symhacks.h ec_lcl.h ec_oct.c ec_pmeth.o: ../../e_os.h ../../include/openssl/asn1.h ec_pmeth.o: ../../include/openssl/asn1t.h ../../include/openssl/bio.h -ec_pmeth.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h -ec_pmeth.o: ../../include/openssl/e_os2.h ../../include/openssl/ec.h -ec_pmeth.o: ../../include/openssl/ecdh.h ../../include/openssl/ecdsa.h -ec_pmeth.o: ../../include/openssl/err.h ../../include/openssl/evp.h -ec_pmeth.o: ../../include/openssl/lhash.h ../../include/openssl/obj_mac.h -ec_pmeth.o: ../../include/openssl/objects.h ../../include/openssl/opensslconf.h +ec_pmeth.o: ../../include/openssl/bn.h ../../include/openssl/buffer.h +ec_pmeth.o: ../../include/openssl/crypto.h ../../include/openssl/e_os2.h +ec_pmeth.o: ../../include/openssl/ec.h ../../include/openssl/ecdh.h +ec_pmeth.o: ../../include/openssl/ecdsa.h ../../include/openssl/err.h +ec_pmeth.o: ../../include/openssl/evp.h ../../include/openssl/lhash.h +ec_pmeth.o: ../../include/openssl/obj_mac.h ../../include/openssl/objects.h +ec_pmeth.o: ../../include/openssl/opensslconf.h ec_pmeth.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h ec_pmeth.o: ../../include/openssl/pkcs7.h ../../include/openssl/safestack.h ec_pmeth.o: ../../include/openssl/sha.h ../../include/openssl/stack.h ec_pmeth.o: ../../include/openssl/symhacks.h ../../include/openssl/x509.h ec_pmeth.o: ../../include/openssl/x509_vfy.h ../cryptlib.h ../evp/evp_locl.h -ec_pmeth.o: ec_pmeth.c +ec_pmeth.o: ec_lcl.h ec_pmeth.c ec_print.o: ../../include/openssl/asn1.h ../../include/openssl/bio.h ec_print.o: ../../include/openssl/bn.h ../../include/openssl/crypto.h ec_print.o: ../../include/openssl/e_os2.h ../../include/openssl/ec.h diff --git a/deps/openssl/openssl/crypto/ec/asm/ecp_nistz256-avx2.pl b/deps/openssl/openssl/crypto/ec/asm/ecp_nistz256-avx2.pl new file mode 100755 index 00000000000000..4c220aa645f14d --- /dev/null +++ b/deps/openssl/openssl/crypto/ec/asm/ecp_nistz256-avx2.pl @@ -0,0 +1,2093 @@ +#!/usr/bin/env perl + +############################################################################## +# # +# Copyright 2014 Intel Corporation # +# # +# Licensed under the Apache License, Version 2.0 (the "License"); # +# you may not use this file except in compliance with the License. # +# You may obtain a copy of the License at # +# # +# http://www.apache.org/licenses/LICENSE-2.0 # +# # +# Unless required by applicable law or agreed to in writing, software # +# distributed under the License is distributed on an "AS IS" BASIS, # +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # +# See the License for the specific language governing permissions and # +# limitations under the License. # +# # +############################################################################## +# # +# Developers and authors: # +# Shay Gueron (1, 2), and Vlad Krasnov (1) # +# (1) Intel Corporation, Israel Development Center # +# (2) University of Haifa # +# Reference: # +# S.Gueron and V.Krasnov, "Fast Prime Field Elliptic Curve Cryptography with# +# 256 Bit Primes" # +# # +############################################################################## + +$flavour = shift; +$output = shift; +if ($flavour =~ /\./) { $output = $flavour; undef $flavour; } + +$win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/); + +$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; +( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or +( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or +die "can't locate x86_64-xlate.pl"; + +open OUT,"| \"$^X\" $xlate $flavour $output"; +*STDOUT=*OUT; + +if (`$ENV{CC} -Wa,-v -c -o /dev/null -x assembler /dev/null 2>&1` + =~ /GNU assembler version ([2-9]\.[0-9]+)/) { + $avx = ($1>=2.19) + ($1>=2.22); + $addx = ($1>=2.23); +} + +if (!$addx && $win64 && ($flavour =~ /nasm/ || $ENV{ASM} =~ /nasm/) && + `nasm -v 2>&1` =~ /NASM version ([2-9]\.[0-9]+)/) { + $avx = ($1>=2.09) + ($1>=2.10); + $addx = ($1>=2.10); +} + +if (!$addx && $win64 && ($flavour =~ /masm/ || $ENV{ASM} =~ /ml64/) && + `ml64 2>&1` =~ /Version ([0-9]+)\./) { + $avx = ($1>=10) + ($1>=11); + $addx = ($1>=12); +} + +if (!$addx && `$ENV{CC} -v 2>&1` =~ /(^clang version|based on LLVM) ([3-9])\.([0-9]+)/) { + my $ver = $2 + $3/100.0; # 3.1->3.01, 3.10->3.10 + $avx = ($ver>=3.0) + ($ver>=3.01); + $addx = ($ver>=3.03); +} + +if ($avx>=2) {{ +$digit_size = "\$29"; +$n_digits = "\$9"; + +$code.=<<___; +.text + +.align 64 +.LAVX2_AND_MASK: +.LAVX2_POLY: +.quad 0x1fffffff, 0x1fffffff, 0x1fffffff, 0x1fffffff +.quad 0x1fffffff, 0x1fffffff, 0x1fffffff, 0x1fffffff +.quad 0x1fffffff, 0x1fffffff, 0x1fffffff, 0x1fffffff +.quad 0x000001ff, 0x000001ff, 0x000001ff, 0x000001ff +.quad 0x00000000, 0x00000000, 0x00000000, 0x00000000 +.quad 0x00000000, 0x00000000, 0x00000000, 0x00000000 +.quad 0x00040000, 0x00040000, 0x00040000, 0x00040000 +.quad 0x1fe00000, 0x1fe00000, 0x1fe00000, 0x1fe00000 +.quad 0x00ffffff, 0x00ffffff, 0x00ffffff, 0x00ffffff + +.LAVX2_POLY_x2: +.quad 0x7FFFFFFC, 0x7FFFFFFC, 0x7FFFFFFC, 0x7FFFFFFC +.quad 0x7FFFFFFC, 0x7FFFFFFC, 0x7FFFFFFC, 0x7FFFFFFC +.quad 0x7FFFFFFC, 0x7FFFFFFC, 0x7FFFFFFC, 0x7FFFFFFC +.quad 0x400007FC, 0x400007FC, 0x400007FC, 0x400007FC +.quad 0x3FFFFFFE, 0x3FFFFFFE, 0x3FFFFFFE, 0x3FFFFFFE +.quad 0x3FFFFFFE, 0x3FFFFFFE, 0x3FFFFFFE, 0x3FFFFFFE +.quad 0x400FFFFE, 0x400FFFFE, 0x400FFFFE, 0x400FFFFE +.quad 0x7F7FFFFE, 0x7F7FFFFE, 0x7F7FFFFE, 0x7F7FFFFE +.quad 0x03FFFFFC, 0x03FFFFFC, 0x03FFFFFC, 0x03FFFFFC + +.LAVX2_POLY_x8: +.quad 0xFFFFFFF8, 0xFFFFFFF8, 0xFFFFFFF8, 0xFFFFFFF8 +.quad 0xFFFFFFF8, 0xFFFFFFF8, 0xFFFFFFF8, 0xFFFFFFF8 +.quad 0xFFFFFFF8, 0xFFFFFFF8, 0xFFFFFFF8, 0xFFFFFFF8 +.quad 0x80000FF8, 0x80000FF8, 0x80000FF8, 0x80000FF8 +.quad 0x7FFFFFFC, 0x7FFFFFFC, 0x7FFFFFFC, 0x7FFFFFFC +.quad 0x7FFFFFFC, 0x7FFFFFFC, 0x7FFFFFFC, 0x7FFFFFFC +.quad 0x801FFFFC, 0x801FFFFC, 0x801FFFFC, 0x801FFFFC +.quad 0xFEFFFFFC, 0xFEFFFFFC, 0xFEFFFFFC, 0xFEFFFFFC +.quad 0x07FFFFF8, 0x07FFFFF8, 0x07FFFFF8, 0x07FFFFF8 + +.LONE: +.quad 0x00000020, 0x00000020, 0x00000020, 0x00000020 +.quad 0x00000000, 0x00000000, 0x00000000, 0x00000000 +.quad 0x00000000, 0x00000000, 0x00000000, 0x00000000 +.quad 0x1fffc000, 0x1fffc000, 0x1fffc000, 0x1fffc000 +.quad 0x1fffffff, 0x1fffffff, 0x1fffffff, 0x1fffffff +.quad 0x1fffffff, 0x1fffffff, 0x1fffffff, 0x1fffffff +.quad 0x1f7fffff, 0x1f7fffff, 0x1f7fffff, 0x1f7fffff +.quad 0x03ffffff, 0x03ffffff, 0x03ffffff, 0x03ffffff +.quad 0x00000000, 0x00000000, 0x00000000, 0x00000000 + +# RR = 2^266 mod p in AVX2 format, to transform from the native OpenSSL +# Montgomery form (*2^256) to our format (*2^261) + +.LTO_MONT_AVX2: +.quad 0x00000400, 0x00000400, 0x00000400, 0x00000400 +.quad 0x00000000, 0x00000000, 0x00000000, 0x00000000 +.quad 0x00000000, 0x00000000, 0x00000000, 0x00000000 +.quad 0x1ff80000, 0x1ff80000, 0x1ff80000, 0x1ff80000 +.quad 0x1fffffff, 0x1fffffff, 0x1fffffff, 0x1fffffff +.quad 0x1fffffff, 0x1fffffff, 0x1fffffff, 0x1fffffff +.quad 0x0fffffff, 0x0fffffff, 0x0fffffff, 0x0fffffff +.quad 0x1fffffff, 0x1fffffff, 0x1fffffff, 0x1fffffff +.quad 0x00000003, 0x00000003, 0x00000003, 0x00000003 + +.LFROM_MONT_AVX2: +.quad 0x00000001, 0x00000001, 0x00000001, 0x00000001 +.quad 0x00000000, 0x00000000, 0x00000000, 0x00000000 +.quad 0x00000000, 0x00000000, 0x00000000, 0x00000000 +.quad 0x1ffffe00, 0x1ffffe00, 0x1ffffe00, 0x1ffffe00 +.quad 0x1fffffff, 0x1fffffff, 0x1fffffff, 0x1fffffff +.quad 0x1fffffff, 0x1fffffff, 0x1fffffff, 0x1fffffff +.quad 0x1ffbffff, 0x1ffbffff, 0x1ffbffff, 0x1ffbffff +.quad 0x001fffff, 0x001fffff, 0x001fffff, 0x001fffff +.quad 0x00000000, 0x00000000, 0x00000000, 0x00000000 + +.LIntOne: +.long 1,1,1,1,1,1,1,1 +___ + +{ +# This function recieves a pointer to an array of four affine points +# (X, Y, <1>) and rearanges the data for AVX2 execution, while +# converting it to 2^29 radix redundant form + +my ($X0,$X1,$X2,$X3, $Y0,$Y1,$Y2,$Y3, + $T0,$T1,$T2,$T3, $T4,$T5,$T6,$T7)=map("%ymm$_",(0..15)); + +$code.=<<___; +.globl ecp_nistz256_avx2_transpose_convert +.type ecp_nistz256_avx2_transpose_convert,\@function,2 +.align 64 +ecp_nistz256_avx2_transpose_convert: + vzeroupper +___ +$code.=<<___ if ($win64); + lea -8-16*10(%rsp), %rsp + vmovaps %xmm6, -8-16*10(%rax) + vmovaps %xmm7, -8-16*9(%rax) + vmovaps %xmm8, -8-16*8(%rax) + vmovaps %xmm9, -8-16*7(%rax) + vmovaps %xmm10, -8-16*6(%rax) + vmovaps %xmm11, -8-16*5(%rax) + vmovaps %xmm12, -8-16*4(%rax) + vmovaps %xmm13, -8-16*3(%rax) + vmovaps %xmm14, -8-16*2(%rax) + vmovaps %xmm15, -8-16*1(%rax) +___ +$code.=<<___; + # Load the data + vmovdqa 32*0(%rsi), $X0 + lea 112(%rsi), %rax # size optimization + vmovdqa 32*1(%rsi), $Y0 + lea .LAVX2_AND_MASK(%rip), %rdx + vmovdqa 32*2(%rsi), $X1 + vmovdqa 32*3(%rsi), $Y1 + vmovdqa 32*4-112(%rax), $X2 + vmovdqa 32*5-112(%rax), $Y2 + vmovdqa 32*6-112(%rax), $X3 + vmovdqa 32*7-112(%rax), $Y3 + + # Transpose X and Y independently + vpunpcklqdq $X1, $X0, $T0 # T0 = [B2 A2 B0 A0] + vpunpcklqdq $X3, $X2, $T1 # T1 = [D2 C2 D0 C0] + vpunpckhqdq $X1, $X0, $T2 # T2 = [B3 A3 B1 A1] + vpunpckhqdq $X3, $X2, $T3 # T3 = [D3 C3 D1 C1] + + vpunpcklqdq $Y1, $Y0, $T4 + vpunpcklqdq $Y3, $Y2, $T5 + vpunpckhqdq $Y1, $Y0, $T6 + vpunpckhqdq $Y3, $Y2, $T7 + + vperm2i128 \$0x20, $T1, $T0, $X0 # X0 = [D0 C0 B0 A0] + vperm2i128 \$0x20, $T3, $T2, $X1 # X1 = [D1 C1 B1 A1] + vperm2i128 \$0x31, $T1, $T0, $X2 # X2 = [D2 C2 B2 A2] + vperm2i128 \$0x31, $T3, $T2, $X3 # X3 = [D3 C3 B3 A3] + + vperm2i128 \$0x20, $T5, $T4, $Y0 + vperm2i128 \$0x20, $T7, $T6, $Y1 + vperm2i128 \$0x31, $T5, $T4, $Y2 + vperm2i128 \$0x31, $T7, $T6, $Y3 + vmovdqa (%rdx), $T7 + + vpand (%rdx), $X0, $T0 # out[0] = in[0] & mask; + vpsrlq \$29, $X0, $X0 + vpand $T7, $X0, $T1 # out[1] = (in[0] >> shift) & mask; + vpsrlq \$29, $X0, $X0 + vpsllq \$6, $X1, $T2 + vpxor $X0, $T2, $T2 + vpand $T7, $T2, $T2 # out[2] = ((in[0] >> (shift*2)) ^ (in[1] << (64-shift*2))) & mask; + vpsrlq \$23, $X1, $X1 + vpand $T7, $X1, $T3 # out[3] = (in[1] >> ((shift*3)%64)) & mask; + vpsrlq \$29, $X1, $X1 + vpsllq \$12, $X2, $T4 + vpxor $X1, $T4, $T4 + vpand $T7, $T4, $T4 # out[4] = ((in[1] >> ((shift*4)%64)) ^ (in[2] << (64*2-shift*4))) & mask; + vpsrlq \$17, $X2, $X2 + vpand $T7, $X2, $T5 # out[5] = (in[2] >> ((shift*5)%64)) & mask; + vpsrlq \$29, $X2, $X2 + vpsllq \$18, $X3, $T6 + vpxor $X2, $T6, $T6 + vpand $T7, $T6, $T6 # out[6] = ((in[2] >> ((shift*6)%64)) ^ (in[3] << (64*3-shift*6))) & mask; + vpsrlq \$11, $X3, $X3 + vmovdqa $T0, 32*0(%rdi) + lea 112(%rdi), %rax # size optimization + vpand $T7, $X3, $T0 # out[7] = (in[3] >> ((shift*7)%64)) & mask; + vpsrlq \$29, $X3, $X3 # out[8] = (in[3] >> ((shift*8)%64)) & mask; + + vmovdqa $T1, 32*1(%rdi) + vmovdqa $T2, 32*2(%rdi) + vmovdqa $T3, 32*3(%rdi) + vmovdqa $T4, 32*4-112(%rax) + vmovdqa $T5, 32*5-112(%rax) + vmovdqa $T6, 32*6-112(%rax) + vmovdqa $T0, 32*7-112(%rax) + vmovdqa $X3, 32*8-112(%rax) + lea 448(%rdi), %rax # size optimization + + vpand $T7, $Y0, $T0 # out[0] = in[0] & mask; + vpsrlq \$29, $Y0, $Y0 + vpand $T7, $Y0, $T1 # out[1] = (in[0] >> shift) & mask; + vpsrlq \$29, $Y0, $Y0 + vpsllq \$6, $Y1, $T2 + vpxor $Y0, $T2, $T2 + vpand $T7, $T2, $T2 # out[2] = ((in[0] >> (shift*2)) ^ (in[1] << (64-shift*2))) & mask; + vpsrlq \$23, $Y1, $Y1 + vpand $T7, $Y1, $T3 # out[3] = (in[1] >> ((shift*3)%64)) & mask; + vpsrlq \$29, $Y1, $Y1 + vpsllq \$12, $Y2, $T4 + vpxor $Y1, $T4, $T4 + vpand $T7, $T4, $T4 # out[4] = ((in[1] >> ((shift*4)%64)) ^ (in[2] << (64*2-shift*4))) & mask; + vpsrlq \$17, $Y2, $Y2 + vpand $T7, $Y2, $T5 # out[5] = (in[2] >> ((shift*5)%64)) & mask; + vpsrlq \$29, $Y2, $Y2 + vpsllq \$18, $Y3, $T6 + vpxor $Y2, $T6, $T6 + vpand $T7, $T6, $T6 # out[6] = ((in[2] >> ((shift*6)%64)) ^ (in[3] << (64*3-shift*6))) & mask; + vpsrlq \$11, $Y3, $Y3 + vmovdqa $T0, 32*9-448(%rax) + vpand $T7, $Y3, $T0 # out[7] = (in[3] >> ((shift*7)%64)) & mask; + vpsrlq \$29, $Y3, $Y3 # out[8] = (in[3] >> ((shift*8)%64)) & mask; + + vmovdqa $T1, 32*10-448(%rax) + vmovdqa $T2, 32*11-448(%rax) + vmovdqa $T3, 32*12-448(%rax) + vmovdqa $T4, 32*13-448(%rax) + vmovdqa $T5, 32*14-448(%rax) + vmovdqa $T6, 32*15-448(%rax) + vmovdqa $T0, 32*16-448(%rax) + vmovdqa $Y3, 32*17-448(%rax) + + vzeroupper +___ +$code.=<<___ if ($win64); + movaps 16*0(%rsp), %xmm6 + movaps 16*1(%rsp), %xmm7 + movaps 16*2(%rsp), %xmm8 + movaps 16*3(%rsp), %xmm9 + movaps 16*4(%rsp), %xmm10 + movaps 16*5(%rsp), %xmm11 + movaps 16*6(%rsp), %xmm12 + movaps 16*7(%rsp), %xmm13 + movaps 16*8(%rsp), %xmm14 + movaps 16*9(%rsp), %xmm15 + lea 8+16*10(%rsp), %rsp +___ +$code.=<<___; + ret +.size ecp_nistz256_avx2_transpose_convert,.-ecp_nistz256_avx2_transpose_convert +___ +} +{ +################################################################################ +# This function recieves a pointer to an array of four AVX2 formatted points +# (X, Y, Z) convert the data to normal representation, and rearanges the data + +my ($D0,$D1,$D2,$D3, $D4,$D5,$D6,$D7, $D8)=map("%ymm$_",(0..8)); +my ($T0,$T1,$T2,$T3, $T4,$T5,$T6)=map("%ymm$_",(9..15)); + +$code.=<<___; + +.globl ecp_nistz256_avx2_convert_transpose_back +.type ecp_nistz256_avx2_convert_transpose_back,\@function,2 +.align 32 +ecp_nistz256_avx2_convert_transpose_back: + vzeroupper +___ +$code.=<<___ if ($win64); + lea -8-16*10(%rsp), %rsp + vmovaps %xmm6, -8-16*10(%rax) + vmovaps %xmm7, -8-16*9(%rax) + vmovaps %xmm8, -8-16*8(%rax) + vmovaps %xmm9, -8-16*7(%rax) + vmovaps %xmm10, -8-16*6(%rax) + vmovaps %xmm11, -8-16*5(%rax) + vmovaps %xmm12, -8-16*4(%rax) + vmovaps %xmm13, -8-16*3(%rax) + vmovaps %xmm14, -8-16*2(%rax) + vmovaps %xmm15, -8-16*1(%rax) +___ +$code.=<<___; + mov \$3, %ecx + +.Lconv_loop: + vmovdqa 32*0(%rsi), $D0 + lea 160(%rsi), %rax # size optimization + vmovdqa 32*1(%rsi), $D1 + vmovdqa 32*2(%rsi), $D2 + vmovdqa 32*3(%rsi), $D3 + vmovdqa 32*4-160(%rax), $D4 + vmovdqa 32*5-160(%rax), $D5 + vmovdqa 32*6-160(%rax), $D6 + vmovdqa 32*7-160(%rax), $D7 + vmovdqa 32*8-160(%rax), $D8 + + vpsllq \$29, $D1, $D1 + vpsllq \$58, $D2, $T0 + vpaddq $D1, $D0, $D0 + vpaddq $T0, $D0, $D0 # out[0] = (in[0]) ^ (in[1] << shift*1) ^ (in[2] << shift*2); + + vpsrlq \$6, $D2, $D2 + vpsllq \$23, $D3, $D3 + vpsllq \$52, $D4, $T1 + vpaddq $D2, $D3, $D3 + vpaddq $D3, $T1, $D1 # out[1] = (in[2] >> (64*1-shift*2)) ^ (in[3] << shift*3%64) ^ (in[4] << shift*4%64); + + vpsrlq \$12, $D4, $D4 + vpsllq \$17, $D5, $D5 + vpsllq \$46, $D6, $T2 + vpaddq $D4, $D5, $D5 + vpaddq $D5, $T2, $D2 # out[2] = (in[4] >> (64*2-shift*4)) ^ (in[5] << shift*5%64) ^ (in[6] << shift*6%64); + + vpsrlq \$18, $D6, $D6 + vpsllq \$11, $D7, $D7 + vpsllq \$40, $D8, $T3 + vpaddq $D6, $D7, $D7 + vpaddq $D7, $T3, $D3 # out[3] = (in[6] >> (64*3-shift*6)) ^ (in[7] << shift*7%64) ^ (in[8] << shift*8%64); + + vpunpcklqdq $D1, $D0, $T0 # T0 = [B2 A2 B0 A0] + vpunpcklqdq $D3, $D2, $T1 # T1 = [D2 C2 D0 C0] + vpunpckhqdq $D1, $D0, $T2 # T2 = [B3 A3 B1 A1] + vpunpckhqdq $D3, $D2, $T3 # T3 = [D3 C3 D1 C1] + + vperm2i128 \$0x20, $T1, $T0, $D0 # X0 = [D0 C0 B0 A0] + vperm2i128 \$0x20, $T3, $T2, $D1 # X1 = [D1 C1 B1 A1] + vperm2i128 \$0x31, $T1, $T0, $D2 # X2 = [D2 C2 B2 A2] + vperm2i128 \$0x31, $T3, $T2, $D3 # X3 = [D3 C3 B3 A3] + + vmovdqa $D0, 32*0(%rdi) + vmovdqa $D1, 32*3(%rdi) + vmovdqa $D2, 32*6(%rdi) + vmovdqa $D3, 32*9(%rdi) + + lea 32*9(%rsi), %rsi + lea 32*1(%rdi), %rdi + + dec %ecx + jnz .Lconv_loop + + vzeroupper +___ +$code.=<<___ if ($win64); + movaps 16*0(%rsp), %xmm6 + movaps 16*1(%rsp), %xmm7 + movaps 16*2(%rsp), %xmm8 + movaps 16*3(%rsp), %xmm9 + movaps 16*4(%rsp), %xmm10 + movaps 16*5(%rsp), %xmm11 + movaps 16*6(%rsp), %xmm12 + movaps 16*7(%rsp), %xmm13 + movaps 16*8(%rsp), %xmm14 + movaps 16*9(%rsp), %xmm15 + lea 8+16*10(%rsp), %rsp +___ +$code.=<<___; + ret +.size ecp_nistz256_avx2_convert_transpose_back,.-ecp_nistz256_avx2_convert_transpose_back +___ +} +{ +my ($r_ptr,$a_ptr,$b_ptr,$itr)=("%rdi","%rsi","%rdx","%ecx"); +my ($ACC0,$ACC1,$ACC2,$ACC3,$ACC4,$ACC5,$ACC6,$ACC7,$ACC8)=map("%ymm$_",(0..8)); +my ($B,$Y,$T0,$AND_MASK,$OVERFLOW)=map("%ymm$_",(9..13)); + +sub NORMALIZE { +my $ret=<<___; + vpsrlq $digit_size, $ACC0, $T0 + vpand $AND_MASK, $ACC0, $ACC0 + vpaddq $T0, $ACC1, $ACC1 + + vpsrlq $digit_size, $ACC1, $T0 + vpand $AND_MASK, $ACC1, $ACC1 + vpaddq $T0, $ACC2, $ACC2 + + vpsrlq $digit_size, $ACC2, $T0 + vpand $AND_MASK, $ACC2, $ACC2 + vpaddq $T0, $ACC3, $ACC3 + + vpsrlq $digit_size, $ACC3, $T0 + vpand $AND_MASK, $ACC3, $ACC3 + vpaddq $T0, $ACC4, $ACC4 + + vpsrlq $digit_size, $ACC4, $T0 + vpand $AND_MASK, $ACC4, $ACC4 + vpaddq $T0, $ACC5, $ACC5 + + vpsrlq $digit_size, $ACC5, $T0 + vpand $AND_MASK, $ACC5, $ACC5 + vpaddq $T0, $ACC6, $ACC6 + + vpsrlq $digit_size, $ACC6, $T0 + vpand $AND_MASK, $ACC6, $ACC6 + vpaddq $T0, $ACC7, $ACC7 + + vpsrlq $digit_size, $ACC7, $T0 + vpand $AND_MASK, $ACC7, $ACC7 + vpaddq $T0, $ACC8, $ACC8 + #vpand $AND_MASK, $ACC8, $ACC8 +___ + $ret; +} + +sub STORE { +my $ret=<<___; + vmovdqa $ACC0, 32*0(%rdi) + lea 160(%rdi), %rax # size optimization + vmovdqa $ACC1, 32*1(%rdi) + vmovdqa $ACC2, 32*2(%rdi) + vmovdqa $ACC3, 32*3(%rdi) + vmovdqa $ACC4, 32*4-160(%rax) + vmovdqa $ACC5, 32*5-160(%rax) + vmovdqa $ACC6, 32*6-160(%rax) + vmovdqa $ACC7, 32*7-160(%rax) + vmovdqa $ACC8, 32*8-160(%rax) +___ + $ret; +} + +$code.=<<___; +.type avx2_normalize,\@abi-omnipotent +.align 32 +avx2_normalize: + vpsrlq $digit_size, $ACC0, $T0 + vpand $AND_MASK, $ACC0, $ACC0 + vpaddq $T0, $ACC1, $ACC1 + + vpsrlq $digit_size, $ACC1, $T0 + vpand $AND_MASK, $ACC1, $ACC1 + vpaddq $T0, $ACC2, $ACC2 + + vpsrlq $digit_size, $ACC2, $T0 + vpand $AND_MASK, $ACC2, $ACC2 + vpaddq $T0, $ACC3, $ACC3 + + vpsrlq $digit_size, $ACC3, $T0 + vpand $AND_MASK, $ACC3, $ACC3 + vpaddq $T0, $ACC4, $ACC4 + + vpsrlq $digit_size, $ACC4, $T0 + vpand $AND_MASK, $ACC4, $ACC4 + vpaddq $T0, $ACC5, $ACC5 + + vpsrlq $digit_size, $ACC5, $T0 + vpand $AND_MASK, $ACC5, $ACC5 + vpaddq $T0, $ACC6, $ACC6 + + vpsrlq $digit_size, $ACC6, $T0 + vpand $AND_MASK, $ACC6, $ACC6 + vpaddq $T0, $ACC7, $ACC7 + + vpsrlq $digit_size, $ACC7, $T0 + vpand $AND_MASK, $ACC7, $ACC7 + vpaddq $T0, $ACC8, $ACC8 + #vpand $AND_MASK, $ACC8, $ACC8 + + ret +.size avx2_normalize,.-avx2_normalize + +.type avx2_normalize_n_store,\@abi-omnipotent +.align 32 +avx2_normalize_n_store: + vpsrlq $digit_size, $ACC0, $T0 + vpand $AND_MASK, $ACC0, $ACC0 + vpaddq $T0, $ACC1, $ACC1 + + vpsrlq $digit_size, $ACC1, $T0 + vpand $AND_MASK, $ACC1, $ACC1 + vmovdqa $ACC0, 32*0(%rdi) + lea 160(%rdi), %rax # size optimization + vpaddq $T0, $ACC2, $ACC2 + + vpsrlq $digit_size, $ACC2, $T0 + vpand $AND_MASK, $ACC2, $ACC2 + vmovdqa $ACC1, 32*1(%rdi) + vpaddq $T0, $ACC3, $ACC3 + + vpsrlq $digit_size, $ACC3, $T0 + vpand $AND_MASK, $ACC3, $ACC3 + vmovdqa $ACC2, 32*2(%rdi) + vpaddq $T0, $ACC4, $ACC4 + + vpsrlq $digit_size, $ACC4, $T0 + vpand $AND_MASK, $ACC4, $ACC4 + vmovdqa $ACC3, 32*3(%rdi) + vpaddq $T0, $ACC5, $ACC5 + + vpsrlq $digit_size, $ACC5, $T0 + vpand $AND_MASK, $ACC5, $ACC5 + vmovdqa $ACC4, 32*4-160(%rax) + vpaddq $T0, $ACC6, $ACC6 + + vpsrlq $digit_size, $ACC6, $T0 + vpand $AND_MASK, $ACC6, $ACC6 + vmovdqa $ACC5, 32*5-160(%rax) + vpaddq $T0, $ACC7, $ACC7 + + vpsrlq $digit_size, $ACC7, $T0 + vpand $AND_MASK, $ACC7, $ACC7 + vmovdqa $ACC6, 32*6-160(%rax) + vpaddq $T0, $ACC8, $ACC8 + #vpand $AND_MASK, $ACC8, $ACC8 + vmovdqa $ACC7, 32*7-160(%rax) + vmovdqa $ACC8, 32*8-160(%rax) + + ret +.size avx2_normalize_n_store,.-avx2_normalize_n_store + +################################################################################ +# void avx2_mul_x4(void* RESULTx4, void *Ax4, void *Bx4); +.type avx2_mul_x4,\@abi-omnipotent +.align 32 +avx2_mul_x4: + lea .LAVX2_POLY(%rip), %rax + + vpxor $ACC0, $ACC0, $ACC0 + vpxor $ACC1, $ACC1, $ACC1 + vpxor $ACC2, $ACC2, $ACC2 + vpxor $ACC3, $ACC3, $ACC3 + vpxor $ACC4, $ACC4, $ACC4 + vpxor $ACC5, $ACC5, $ACC5 + vpxor $ACC6, $ACC6, $ACC6 + vpxor $ACC7, $ACC7, $ACC7 + + vmovdqa 32*7(%rax), %ymm14 + vmovdqa 32*8(%rax), %ymm15 + + mov $n_digits, $itr + lea -512($a_ptr), $a_ptr # strategic bias to control u-op density + jmp .Lavx2_mul_x4_loop + +.align 32 +.Lavx2_mul_x4_loop: + vmovdqa 32*0($b_ptr), $B + lea 32*1($b_ptr), $b_ptr + + vpmuludq 32*0+512($a_ptr), $B, $T0 + vpmuludq 32*1+512($a_ptr), $B, $OVERFLOW # borrow $OVERFLOW + vpaddq $T0, $ACC0, $ACC0 + vpmuludq 32*2+512($a_ptr), $B, $T0 + vpaddq $OVERFLOW, $ACC1, $ACC1 + vpand $AND_MASK, $ACC0, $Y + vpmuludq 32*3+512($a_ptr), $B, $OVERFLOW + vpaddq $T0, $ACC2, $ACC2 + vpmuludq 32*4+512($a_ptr), $B, $T0 + vpaddq $OVERFLOW, $ACC3, $ACC3 + vpmuludq 32*5+512($a_ptr), $B, $OVERFLOW + vpaddq $T0, $ACC4, $ACC4 + vpmuludq 32*6+512($a_ptr), $B, $T0 + vpaddq $OVERFLOW, $ACC5, $ACC5 + vpmuludq 32*7+512($a_ptr), $B, $OVERFLOW + vpaddq $T0, $ACC6, $ACC6 + + # Skip some multiplications, optimizing for the constant poly + vpmuludq $AND_MASK, $Y, $T0 + vpaddq $OVERFLOW, $ACC7, $ACC7 + vpmuludq 32*8+512($a_ptr), $B, $ACC8 + vpaddq $T0, $ACC0, $OVERFLOW + vpaddq $T0, $ACC1, $ACC0 + vpsrlq $digit_size, $OVERFLOW, $OVERFLOW + vpaddq $T0, $ACC2, $ACC1 + vpmuludq 32*3(%rax), $Y, $T0 + vpaddq $OVERFLOW, $ACC0, $ACC0 + vpaddq $T0, $ACC3, $ACC2 + .byte 0x67 + vmovdqa $ACC4, $ACC3 + vpsllq \$18, $Y, $OVERFLOW + .byte 0x67 + vmovdqa $ACC5, $ACC4 + vpmuludq %ymm14, $Y, $T0 + vpaddq $OVERFLOW, $ACC6, $ACC5 + vpmuludq %ymm15, $Y, $OVERFLOW + vpaddq $T0, $ACC7, $ACC6 + vpaddq $OVERFLOW, $ACC8, $ACC7 + + dec $itr + jnz .Lavx2_mul_x4_loop + + vpxor $ACC8, $ACC8, $ACC8 + + ret +.size avx2_mul_x4,.-avx2_mul_x4 + +# Function optimized for the constant 1 +################################################################################ +# void avx2_mul_by1_x4(void* RESULTx4, void *Ax4); +.type avx2_mul_by1_x4,\@abi-omnipotent +.align 32 +avx2_mul_by1_x4: + lea .LAVX2_POLY(%rip), %rax + + vpxor $ACC0, $ACC0, $ACC0 + vpxor $ACC1, $ACC1, $ACC1 + vpxor $ACC2, $ACC2, $ACC2 + vpxor $ACC3, $ACC3, $ACC3 + vpxor $ACC4, $ACC4, $ACC4 + vpxor $ACC5, $ACC5, $ACC5 + vpxor $ACC6, $ACC6, $ACC6 + vpxor $ACC7, $ACC7, $ACC7 + vpxor $ACC8, $ACC8, $ACC8 + + vmovdqa 32*3+.LONE(%rip), %ymm14 + vmovdqa 32*7+.LONE(%rip), %ymm15 + + mov $n_digits, $itr + jmp .Lavx2_mul_by1_x4_loop + +.align 32 +.Lavx2_mul_by1_x4_loop: + vmovdqa 32*0($a_ptr), $B + .byte 0x48,0x8d,0xb6,0x20,0,0,0 # lea 32*1($a_ptr), $a_ptr + + vpsllq \$5, $B, $OVERFLOW + vpmuludq %ymm14, $B, $T0 + vpaddq $OVERFLOW, $ACC0, $ACC0 + vpaddq $T0, $ACC3, $ACC3 + .byte 0x67 + vpmuludq $AND_MASK, $B, $T0 + vpand $AND_MASK, $ACC0, $Y + vpaddq $T0, $ACC4, $ACC4 + vpaddq $T0, $ACC5, $ACC5 + vpaddq $T0, $ACC6, $ACC6 + vpsllq \$23, $B, $T0 + + .byte 0x67,0x67 + vpmuludq %ymm15, $B, $OVERFLOW + vpsubq $T0, $ACC6, $ACC6 + + vpmuludq $AND_MASK, $Y, $T0 + vpaddq $OVERFLOW, $ACC7, $ACC7 + vpaddq $T0, $ACC0, $OVERFLOW + vpaddq $T0, $ACC1, $ACC0 + .byte 0x67,0x67 + vpsrlq $digit_size, $OVERFLOW, $OVERFLOW + vpaddq $T0, $ACC2, $ACC1 + vpmuludq 32*3(%rax), $Y, $T0 + vpaddq $OVERFLOW, $ACC0, $ACC0 + vpaddq $T0, $ACC3, $ACC2 + vmovdqa $ACC4, $ACC3 + vpsllq \$18, $Y, $OVERFLOW + vmovdqa $ACC5, $ACC4 + vpmuludq 32*7(%rax), $Y, $T0 + vpaddq $OVERFLOW, $ACC6, $ACC5 + vpaddq $T0, $ACC7, $ACC6 + vpmuludq 32*8(%rax), $Y, $ACC7 + + dec $itr + jnz .Lavx2_mul_by1_x4_loop + + ret +.size avx2_mul_by1_x4,.-avx2_mul_by1_x4 + +################################################################################ +# void avx2_sqr_x4(void* RESULTx4, void *Ax4, void *Bx4); +.type avx2_sqr_x4,\@abi-omnipotent +.align 32 +avx2_sqr_x4: + lea .LAVX2_POLY(%rip), %rax + + vmovdqa 32*7(%rax), %ymm14 + vmovdqa 32*8(%rax), %ymm15 + + vmovdqa 32*0($a_ptr), $B + vmovdqa 32*1($a_ptr), $ACC1 + vmovdqa 32*2($a_ptr), $ACC2 + vmovdqa 32*3($a_ptr), $ACC3 + vmovdqa 32*4($a_ptr), $ACC4 + vmovdqa 32*5($a_ptr), $ACC5 + vmovdqa 32*6($a_ptr), $ACC6 + vmovdqa 32*7($a_ptr), $ACC7 + vpaddq $ACC1, $ACC1, $ACC1 # 2*$ACC0..7 + vmovdqa 32*8($a_ptr), $ACC8 + vpaddq $ACC2, $ACC2, $ACC2 + vmovdqa $ACC1, 32*0(%rcx) + vpaddq $ACC3, $ACC3, $ACC3 + vmovdqa $ACC2, 32*1(%rcx) + vpaddq $ACC4, $ACC4, $ACC4 + vmovdqa $ACC3, 32*2(%rcx) + vpaddq $ACC5, $ACC5, $ACC5 + vmovdqa $ACC4, 32*3(%rcx) + vpaddq $ACC6, $ACC6, $ACC6 + vmovdqa $ACC5, 32*4(%rcx) + vpaddq $ACC7, $ACC7, $ACC7 + vmovdqa $ACC6, 32*5(%rcx) + vpaddq $ACC8, $ACC8, $ACC8 + vmovdqa $ACC7, 32*6(%rcx) + vmovdqa $ACC8, 32*7(%rcx) + + #itr 1 + vpmuludq $B, $B, $ACC0 + vpmuludq $B, $ACC1, $ACC1 + vpand $AND_MASK, $ACC0, $Y + vpmuludq $B, $ACC2, $ACC2 + vpmuludq $B, $ACC3, $ACC3 + vpmuludq $B, $ACC4, $ACC4 + vpmuludq $B, $ACC5, $ACC5 + vpmuludq $B, $ACC6, $ACC6 + vpmuludq $AND_MASK, $Y, $T0 + vpmuludq $B, $ACC7, $ACC7 + vpmuludq $B, $ACC8, $ACC8 + vmovdqa 32*1($a_ptr), $B + + vpaddq $T0, $ACC0, $OVERFLOW + vpaddq $T0, $ACC1, $ACC0 + vpsrlq $digit_size, $OVERFLOW, $OVERFLOW + vpaddq $T0, $ACC2, $ACC1 + vpmuludq 32*3(%rax), $Y, $T0 + vpaddq $OVERFLOW, $ACC0, $ACC0 + vpaddq $T0, $ACC3, $ACC2 + vmovdqa $ACC4, $ACC3 + vpsllq \$18, $Y, $T0 + vmovdqa $ACC5, $ACC4 + vpmuludq %ymm14, $Y, $OVERFLOW + vpaddq $T0, $ACC6, $ACC5 + vpmuludq %ymm15, $Y, $T0 + vpaddq $OVERFLOW, $ACC7, $ACC6 + vpaddq $T0, $ACC8, $ACC7 + + #itr 2 + vpmuludq $B, $B, $OVERFLOW + vpand $AND_MASK, $ACC0, $Y + vpmuludq 32*1(%rcx), $B, $T0 + vpaddq $OVERFLOW, $ACC1, $ACC1 + vpmuludq 32*2(%rcx), $B, $OVERFLOW + vpaddq $T0, $ACC2, $ACC2 + vpmuludq 32*3(%rcx), $B, $T0 + vpaddq $OVERFLOW, $ACC3, $ACC3 + vpmuludq 32*4(%rcx), $B, $OVERFLOW + vpaddq $T0, $ACC4, $ACC4 + vpmuludq 32*5(%rcx), $B, $T0 + vpaddq $OVERFLOW, $ACC5, $ACC5 + vpmuludq 32*6(%rcx), $B, $OVERFLOW + vpaddq $T0, $ACC6, $ACC6 + + vpmuludq $AND_MASK, $Y, $T0 + vpaddq $OVERFLOW, $ACC7, $ACC7 + vpmuludq 32*7(%rcx), $B, $ACC8 + vmovdqa 32*2($a_ptr), $B + vpaddq $T0, $ACC0, $OVERFLOW + vpaddq $T0, $ACC1, $ACC0 + vpsrlq $digit_size, $OVERFLOW, $OVERFLOW + vpaddq $T0, $ACC2, $ACC1 + vpmuludq 32*3(%rax), $Y, $T0 + vpaddq $OVERFLOW, $ACC0, $ACC0 + vpaddq $T0, $ACC3, $ACC2 + vmovdqa $ACC4, $ACC3 + vpsllq \$18, $Y, $T0 + vmovdqa $ACC5, $ACC4 + vpmuludq %ymm14, $Y, $OVERFLOW + vpaddq $T0, $ACC6, $ACC5 + vpmuludq %ymm15, $Y, $T0 + vpaddq $OVERFLOW, $ACC7, $ACC6 + vpaddq $T0, $ACC8, $ACC7 + + #itr 3 + vpmuludq $B, $B, $T0 + vpand $AND_MASK, $ACC0, $Y + vpmuludq 32*2(%rcx), $B, $OVERFLOW + vpaddq $T0, $ACC2, $ACC2 + vpmuludq 32*3(%rcx), $B, $T0 + vpaddq $OVERFLOW, $ACC3, $ACC3 + vpmuludq 32*4(%rcx), $B, $OVERFLOW + vpaddq $T0, $ACC4, $ACC4 + vpmuludq 32*5(%rcx), $B, $T0 + vpaddq $OVERFLOW, $ACC5, $ACC5 + vpmuludq 32*6(%rcx), $B, $OVERFLOW + vpaddq $T0, $ACC6, $ACC6 + + vpmuludq $AND_MASK, $Y, $T0 + vpaddq $OVERFLOW, $ACC7, $ACC7 + vpmuludq 32*7(%rcx), $B, $ACC8 + vmovdqa 32*3($a_ptr), $B + vpaddq $T0, $ACC0, $OVERFLOW + vpaddq $T0, $ACC1, $ACC0 + vpsrlq $digit_size, $OVERFLOW, $OVERFLOW + vpaddq $T0, $ACC2, $ACC1 + vpmuludq 32*3(%rax), $Y, $T0 + vpaddq $OVERFLOW, $ACC0, $ACC0 + vpaddq $T0, $ACC3, $ACC2 + vmovdqa $ACC4, $ACC3 + vpsllq \$18, $Y, $T0 + vmovdqa $ACC5, $ACC4 + vpmuludq %ymm14, $Y, $OVERFLOW + vpaddq $T0, $ACC6, $ACC5 + vpmuludq %ymm15, $Y, $T0 + vpand $AND_MASK, $ACC0, $Y + vpaddq $OVERFLOW, $ACC7, $ACC6 + vpaddq $T0, $ACC8, $ACC7 + + #itr 4 + vpmuludq $B, $B, $OVERFLOW + vpmuludq 32*3(%rcx), $B, $T0 + vpaddq $OVERFLOW, $ACC3, $ACC3 + vpmuludq 32*4(%rcx), $B, $OVERFLOW + vpaddq $T0, $ACC4, $ACC4 + vpmuludq 32*5(%rcx), $B, $T0 + vpaddq $OVERFLOW, $ACC5, $ACC5 + vpmuludq 32*6(%rcx), $B, $OVERFLOW + vpaddq $T0, $ACC6, $ACC6 + + vpmuludq $AND_MASK, $Y, $T0 + vpaddq $OVERFLOW, $ACC7, $ACC7 + vpmuludq 32*7(%rcx), $B, $ACC8 + vmovdqa 32*4($a_ptr), $B + vpaddq $T0, $ACC0, $OVERFLOW + vpaddq $T0, $ACC1, $ACC0 + vpsrlq $digit_size, $OVERFLOW, $OVERFLOW + vpaddq $T0, $ACC2, $ACC1 + vpmuludq 32*3(%rax), $Y, $T0 + vpaddq $OVERFLOW, $ACC0, $ACC0 + vpaddq $T0, $ACC3, $ACC2 + vmovdqa $ACC4, $ACC3 + vpsllq \$18, $Y, $T0 + vmovdqa $ACC5, $ACC4 + vpmuludq %ymm14, $Y, $OVERFLOW + vpaddq $T0, $ACC6, $ACC5 + vpmuludq %ymm15, $Y, $T0 + vpand $AND_MASK, $ACC0, $Y + vpaddq $OVERFLOW, $ACC7, $ACC6 + vpaddq $T0, $ACC8, $ACC7 + + #itr 5 + vpmuludq $B, $B, $T0 + vpmuludq 32*4(%rcx), $B, $OVERFLOW + vpaddq $T0, $ACC4, $ACC4 + vpmuludq 32*5(%rcx), $B, $T0 + vpaddq $OVERFLOW, $ACC5, $ACC5 + vpmuludq 32*6(%rcx), $B, $OVERFLOW + vpaddq $T0, $ACC6, $ACC6 + + vpmuludq $AND_MASK, $Y, $T0 + vpaddq $OVERFLOW, $ACC7, $ACC7 + vpmuludq 32*7(%rcx), $B, $ACC8 + vmovdqa 32*5($a_ptr), $B + vpaddq $T0, $ACC0, $OVERFLOW + vpsrlq $digit_size, $OVERFLOW, $OVERFLOW + vpaddq $T0, $ACC1, $ACC0 + vpaddq $T0, $ACC2, $ACC1 + vpmuludq 32*3+.LAVX2_POLY(%rip), $Y, $T0 + vpaddq $OVERFLOW, $ACC0, $ACC0 + vpaddq $T0, $ACC3, $ACC2 + vmovdqa $ACC4, $ACC3 + vpsllq \$18, $Y, $T0 + vmovdqa $ACC5, $ACC4 + vpmuludq %ymm14, $Y, $OVERFLOW + vpaddq $T0, $ACC6, $ACC5 + vpmuludq %ymm15, $Y, $T0 + vpand $AND_MASK, $ACC0, $Y + vpaddq $OVERFLOW, $ACC7, $ACC6 + vpaddq $T0, $ACC8, $ACC7 + + #itr 6 + vpmuludq $B, $B, $OVERFLOW + vpmuludq 32*5(%rcx), $B, $T0 + vpaddq $OVERFLOW, $ACC5, $ACC5 + vpmuludq 32*6(%rcx), $B, $OVERFLOW + vpaddq $T0, $ACC6, $ACC6 + + vpmuludq $AND_MASK, $Y, $T0 + vpaddq $OVERFLOW, $ACC7, $ACC7 + vpmuludq 32*7(%rcx), $B, $ACC8 + vmovdqa 32*6($a_ptr), $B + vpaddq $T0, $ACC0, $OVERFLOW + vpaddq $T0, $ACC1, $ACC0 + vpsrlq $digit_size, $OVERFLOW, $OVERFLOW + vpaddq $T0, $ACC2, $ACC1 + vpmuludq 32*3(%rax), $Y, $T0 + vpaddq $OVERFLOW, $ACC0, $ACC0 + vpaddq $T0, $ACC3, $ACC2 + vmovdqa $ACC4, $ACC3 + vpsllq \$18, $Y, $T0 + vmovdqa $ACC5, $ACC4 + vpmuludq %ymm14, $Y, $OVERFLOW + vpaddq $T0, $ACC6, $ACC5 + vpmuludq %ymm15, $Y, $T0 + vpand $AND_MASK, $ACC0, $Y + vpaddq $OVERFLOW, $ACC7, $ACC6 + vpaddq $T0, $ACC8, $ACC7 + + #itr 7 + vpmuludq $B, $B, $T0 + vpmuludq 32*6(%rcx), $B, $OVERFLOW + vpaddq $T0, $ACC6, $ACC6 + + vpmuludq $AND_MASK, $Y, $T0 + vpaddq $OVERFLOW, $ACC7, $ACC7 + vpmuludq 32*7(%rcx), $B, $ACC8 + vmovdqa 32*7($a_ptr), $B + vpaddq $T0, $ACC0, $OVERFLOW + vpsrlq $digit_size, $OVERFLOW, $OVERFLOW + vpaddq $T0, $ACC1, $ACC0 + vpaddq $T0, $ACC2, $ACC1 + vpmuludq 32*3(%rax), $Y, $T0 + vpaddq $OVERFLOW, $ACC0, $ACC0 + vpaddq $T0, $ACC3, $ACC2 + vmovdqa $ACC4, $ACC3 + vpsllq \$18, $Y, $T0 + vmovdqa $ACC5, $ACC4 + vpmuludq %ymm14, $Y, $OVERFLOW + vpaddq $T0, $ACC6, $ACC5 + vpmuludq %ymm15, $Y, $T0 + vpand $AND_MASK, $ACC0, $Y + vpaddq $OVERFLOW, $ACC7, $ACC6 + vpaddq $T0, $ACC8, $ACC7 + + #itr 8 + vpmuludq $B, $B, $OVERFLOW + + vpmuludq $AND_MASK, $Y, $T0 + vpaddq $OVERFLOW, $ACC7, $ACC7 + vpmuludq 32*7(%rcx), $B, $ACC8 + vmovdqa 32*8($a_ptr), $B + vpaddq $T0, $ACC0, $OVERFLOW + vpsrlq $digit_size, $OVERFLOW, $OVERFLOW + vpaddq $T0, $ACC1, $ACC0 + vpaddq $T0, $ACC2, $ACC1 + vpmuludq 32*3(%rax), $Y, $T0 + vpaddq $OVERFLOW, $ACC0, $ACC0 + vpaddq $T0, $ACC3, $ACC2 + vmovdqa $ACC4, $ACC3 + vpsllq \$18, $Y, $T0 + vmovdqa $ACC5, $ACC4 + vpmuludq %ymm14, $Y, $OVERFLOW + vpaddq $T0, $ACC6, $ACC5 + vpmuludq %ymm15, $Y, $T0 + vpand $AND_MASK, $ACC0, $Y + vpaddq $OVERFLOW, $ACC7, $ACC6 + vpaddq $T0, $ACC8, $ACC7 + + #itr 9 + vpmuludq $B, $B, $ACC8 + + vpmuludq $AND_MASK, $Y, $T0 + vpaddq $T0, $ACC0, $OVERFLOW + vpsrlq $digit_size, $OVERFLOW, $OVERFLOW + vpaddq $T0, $ACC1, $ACC0 + vpaddq $T0, $ACC2, $ACC1 + vpmuludq 32*3(%rax), $Y, $T0 + vpaddq $OVERFLOW, $ACC0, $ACC0 + vpaddq $T0, $ACC3, $ACC2 + vmovdqa $ACC4, $ACC3 + vpsllq \$18, $Y, $T0 + vmovdqa $ACC5, $ACC4 + vpmuludq %ymm14, $Y, $OVERFLOW + vpaddq $T0, $ACC6, $ACC5 + vpmuludq %ymm15, $Y, $T0 + vpaddq $OVERFLOW, $ACC7, $ACC6 + vpaddq $T0, $ACC8, $ACC7 + + vpxor $ACC8, $ACC8, $ACC8 + + ret +.size avx2_sqr_x4,.-avx2_sqr_x4 + +################################################################################ +# void avx2_sub_x4(void* RESULTx4, void *Ax4, void *Bx4); +.type avx2_sub_x4,\@abi-omnipotent +.align 32 +avx2_sub_x4: + vmovdqa 32*0($a_ptr), $ACC0 + lea 160($a_ptr), $a_ptr + lea .LAVX2_POLY_x8+128(%rip), %rax + lea 128($b_ptr), $b_ptr + vmovdqa 32*1-160($a_ptr), $ACC1 + vmovdqa 32*2-160($a_ptr), $ACC2 + vmovdqa 32*3-160($a_ptr), $ACC3 + vmovdqa 32*4-160($a_ptr), $ACC4 + vmovdqa 32*5-160($a_ptr), $ACC5 + vmovdqa 32*6-160($a_ptr), $ACC6 + vmovdqa 32*7-160($a_ptr), $ACC7 + vmovdqa 32*8-160($a_ptr), $ACC8 + + vpaddq 32*0-128(%rax), $ACC0, $ACC0 + vpaddq 32*1-128(%rax), $ACC1, $ACC1 + vpaddq 32*2-128(%rax), $ACC2, $ACC2 + vpaddq 32*3-128(%rax), $ACC3, $ACC3 + vpaddq 32*4-128(%rax), $ACC4, $ACC4 + vpaddq 32*5-128(%rax), $ACC5, $ACC5 + vpaddq 32*6-128(%rax), $ACC6, $ACC6 + vpaddq 32*7-128(%rax), $ACC7, $ACC7 + vpaddq 32*8-128(%rax), $ACC8, $ACC8 + + vpsubq 32*0-128($b_ptr), $ACC0, $ACC0 + vpsubq 32*1-128($b_ptr), $ACC1, $ACC1 + vpsubq 32*2-128($b_ptr), $ACC2, $ACC2 + vpsubq 32*3-128($b_ptr), $ACC3, $ACC3 + vpsubq 32*4-128($b_ptr), $ACC4, $ACC4 + vpsubq 32*5-128($b_ptr), $ACC5, $ACC5 + vpsubq 32*6-128($b_ptr), $ACC6, $ACC6 + vpsubq 32*7-128($b_ptr), $ACC7, $ACC7 + vpsubq 32*8-128($b_ptr), $ACC8, $ACC8 + + ret +.size avx2_sub_x4,.-avx2_sub_x4 + +.type avx2_select_n_store,\@abi-omnipotent +.align 32 +avx2_select_n_store: + vmovdqa `8+32*9*8`(%rsp), $Y + vpor `8+32*9*8+32`(%rsp), $Y, $Y + + vpandn $ACC0, $Y, $ACC0 + vpandn $ACC1, $Y, $ACC1 + vpandn $ACC2, $Y, $ACC2 + vpandn $ACC3, $Y, $ACC3 + vpandn $ACC4, $Y, $ACC4 + vpandn $ACC5, $Y, $ACC5 + vpandn $ACC6, $Y, $ACC6 + vmovdqa `8+32*9*8+32`(%rsp), $B + vpandn $ACC7, $Y, $ACC7 + vpandn `8+32*9*8`(%rsp), $B, $B + vpandn $ACC8, $Y, $ACC8 + + vpand 32*0(%rsi), $B, $T0 + lea 160(%rsi), %rax + vpand 32*1(%rsi), $B, $Y + vpxor $T0, $ACC0, $ACC0 + vpand 32*2(%rsi), $B, $T0 + vpxor $Y, $ACC1, $ACC1 + vpand 32*3(%rsi), $B, $Y + vpxor $T0, $ACC2, $ACC2 + vpand 32*4-160(%rax), $B, $T0 + vpxor $Y, $ACC3, $ACC3 + vpand 32*5-160(%rax), $B, $Y + vpxor $T0, $ACC4, $ACC4 + vpand 32*6-160(%rax), $B, $T0 + vpxor $Y, $ACC5, $ACC5 + vpand 32*7-160(%rax), $B, $Y + vpxor $T0, $ACC6, $ACC6 + vpand 32*8-160(%rax), $B, $T0 + vmovdqa `8+32*9*8+32`(%rsp), $B + vpxor $Y, $ACC7, $ACC7 + + vpand 32*0(%rdx), $B, $Y + lea 160(%rdx), %rax + vpxor $T0, $ACC8, $ACC8 + vpand 32*1(%rdx), $B, $T0 + vpxor $Y, $ACC0, $ACC0 + vpand 32*2(%rdx), $B, $Y + vpxor $T0, $ACC1, $ACC1 + vpand 32*3(%rdx), $B, $T0 + vpxor $Y, $ACC2, $ACC2 + vpand 32*4-160(%rax), $B, $Y + vpxor $T0, $ACC3, $ACC3 + vpand 32*5-160(%rax), $B, $T0 + vpxor $Y, $ACC4, $ACC4 + vpand 32*6-160(%rax), $B, $Y + vpxor $T0, $ACC5, $ACC5 + vpand 32*7-160(%rax), $B, $T0 + vpxor $Y, $ACC6, $ACC6 + vpand 32*8-160(%rax), $B, $Y + vpxor $T0, $ACC7, $ACC7 + vpxor $Y, $ACC8, $ACC8 + `&STORE` + + ret +.size avx2_select_n_store,.-avx2_select_n_store +___ +$code.=<<___ if (0); # inlined +################################################################################ +# void avx2_mul_by2_x4(void* RESULTx4, void *Ax4); +.type avx2_mul_by2_x4,\@abi-omnipotent +.align 32 +avx2_mul_by2_x4: + vmovdqa 32*0($a_ptr), $ACC0 + lea 160($a_ptr), %rax + vmovdqa 32*1($a_ptr), $ACC1 + vmovdqa 32*2($a_ptr), $ACC2 + vmovdqa 32*3($a_ptr), $ACC3 + vmovdqa 32*4-160(%rax), $ACC4 + vmovdqa 32*5-160(%rax), $ACC5 + vmovdqa 32*6-160(%rax), $ACC6 + vmovdqa 32*7-160(%rax), $ACC7 + vmovdqa 32*8-160(%rax), $ACC8 + + vpaddq $ACC0, $ACC0, $ACC0 + vpaddq $ACC1, $ACC1, $ACC1 + vpaddq $ACC2, $ACC2, $ACC2 + vpaddq $ACC3, $ACC3, $ACC3 + vpaddq $ACC4, $ACC4, $ACC4 + vpaddq $ACC5, $ACC5, $ACC5 + vpaddq $ACC6, $ACC6, $ACC6 + vpaddq $ACC7, $ACC7, $ACC7 + vpaddq $ACC8, $ACC8, $ACC8 + + ret +.size avx2_mul_by2_x4,.-avx2_mul_by2_x4 +___ +my ($r_ptr_in,$a_ptr_in,$b_ptr_in)=("%rdi","%rsi","%rdx"); +my ($r_ptr,$a_ptr,$b_ptr)=("%r8","%r9","%r10"); + +$code.=<<___; +################################################################################ +# void ecp_nistz256_avx2_point_add_affine_x4(void* RESULTx4, void *Ax4, void *Bx4); +.globl ecp_nistz256_avx2_point_add_affine_x4 +.type ecp_nistz256_avx2_point_add_affine_x4,\@function,3 +.align 32 +ecp_nistz256_avx2_point_add_affine_x4: + mov %rsp, %rax + push %rbp + vzeroupper +___ +$code.=<<___ if ($win64); + lea -16*10(%rsp), %rsp + vmovaps %xmm6, -8-16*10(%rax) + vmovaps %xmm7, -8-16*9(%rax) + vmovaps %xmm8, -8-16*8(%rax) + vmovaps %xmm9, -8-16*7(%rax) + vmovaps %xmm10, -8-16*6(%rax) + vmovaps %xmm11, -8-16*5(%rax) + vmovaps %xmm12, -8-16*4(%rax) + vmovaps %xmm13, -8-16*3(%rax) + vmovaps %xmm14, -8-16*2(%rax) + vmovaps %xmm15, -8-16*1(%rax) +___ +$code.=<<___; + lea -8(%rax), %rbp + +# Result + 32*0 = Result.X +# Result + 32*9 = Result.Y +# Result + 32*18 = Result.Z + +# A + 32*0 = A.X +# A + 32*9 = A.Y +# A + 32*18 = A.Z + +# B + 32*0 = B.X +# B + 32*9 = B.Y + + sub \$`32*9*8+32*2+32*8`, %rsp + and \$-64, %rsp + + mov $r_ptr_in, $r_ptr + mov $a_ptr_in, $a_ptr + mov $b_ptr_in, $b_ptr + + vmovdqa 32*0($a_ptr_in), %ymm0 + vmovdqa .LAVX2_AND_MASK(%rip), $AND_MASK + vpxor %ymm1, %ymm1, %ymm1 + lea 256($a_ptr_in), %rax # size optimization + vpor 32*1($a_ptr_in), %ymm0, %ymm0 + vpor 32*2($a_ptr_in), %ymm0, %ymm0 + vpor 32*3($a_ptr_in), %ymm0, %ymm0 + vpor 32*4-256(%rax), %ymm0, %ymm0 + lea 256(%rax), %rcx # size optimization + vpor 32*5-256(%rax), %ymm0, %ymm0 + vpor 32*6-256(%rax), %ymm0, %ymm0 + vpor 32*7-256(%rax), %ymm0, %ymm0 + vpor 32*8-256(%rax), %ymm0, %ymm0 + vpor 32*9-256(%rax), %ymm0, %ymm0 + vpor 32*10-256(%rax), %ymm0, %ymm0 + vpor 32*11-256(%rax), %ymm0, %ymm0 + vpor 32*12-512(%rcx), %ymm0, %ymm0 + vpor 32*13-512(%rcx), %ymm0, %ymm0 + vpor 32*14-512(%rcx), %ymm0, %ymm0 + vpor 32*15-512(%rcx), %ymm0, %ymm0 + vpor 32*16-512(%rcx), %ymm0, %ymm0 + vpor 32*17-512(%rcx), %ymm0, %ymm0 + vpcmpeqq %ymm1, %ymm0, %ymm0 + vmovdqa %ymm0, `32*9*8`(%rsp) + + vpxor %ymm1, %ymm1, %ymm1 + vmovdqa 32*0($b_ptr), %ymm0 + lea 256($b_ptr), %rax # size optimization + vpor 32*1($b_ptr), %ymm0, %ymm0 + vpor 32*2($b_ptr), %ymm0, %ymm0 + vpor 32*3($b_ptr), %ymm0, %ymm0 + vpor 32*4-256(%rax), %ymm0, %ymm0 + lea 256(%rax), %rcx # size optimization + vpor 32*5-256(%rax), %ymm0, %ymm0 + vpor 32*6-256(%rax), %ymm0, %ymm0 + vpor 32*7-256(%rax), %ymm0, %ymm0 + vpor 32*8-256(%rax), %ymm0, %ymm0 + vpor 32*9-256(%rax), %ymm0, %ymm0 + vpor 32*10-256(%rax), %ymm0, %ymm0 + vpor 32*11-256(%rax), %ymm0, %ymm0 + vpor 32*12-512(%rcx), %ymm0, %ymm0 + vpor 32*13-512(%rcx), %ymm0, %ymm0 + vpor 32*14-512(%rcx), %ymm0, %ymm0 + vpor 32*15-512(%rcx), %ymm0, %ymm0 + vpor 32*16-512(%rcx), %ymm0, %ymm0 + vpor 32*17-512(%rcx), %ymm0, %ymm0 + vpcmpeqq %ymm1, %ymm0, %ymm0 + vmovdqa %ymm0, `32*9*8+32`(%rsp) + + # Z1^2 = Z1*Z1 + lea `32*9*2`($a_ptr), %rsi + lea `32*9*2`(%rsp), %rdi + lea `32*9*8+32*2`(%rsp), %rcx # temporary vector + call avx2_sqr_x4 + call avx2_normalize_n_store + + # U2 = X2*Z1^2 + lea `32*9*0`($b_ptr), %rsi + lea `32*9*2`(%rsp), %rdx + lea `32*9*0`(%rsp), %rdi + call avx2_mul_x4 + #call avx2_normalize + `&STORE` + + # S2 = Z1*Z1^2 = Z1^3 + lea `32*9*2`($a_ptr), %rsi + lea `32*9*2`(%rsp), %rdx + lea `32*9*1`(%rsp), %rdi + call avx2_mul_x4 + call avx2_normalize_n_store + + # S2 = S2*Y2 = Y2*Z1^3 + lea `32*9*1`($b_ptr), %rsi + lea `32*9*1`(%rsp), %rdx + lea `32*9*1`(%rsp), %rdi + call avx2_mul_x4 + call avx2_normalize_n_store + + # H = U2 - U1 = U2 - X1 + lea `32*9*0`(%rsp), %rsi + lea `32*9*0`($a_ptr), %rdx + lea `32*9*3`(%rsp), %rdi + call avx2_sub_x4 + call avx2_normalize_n_store + + # R = S2 - S1 = S2 - Y1 + lea `32*9*1`(%rsp), %rsi + lea `32*9*1`($a_ptr), %rdx + lea `32*9*4`(%rsp), %rdi + call avx2_sub_x4 + call avx2_normalize_n_store + + # Z3 = H*Z1*Z2 + lea `32*9*3`(%rsp), %rsi + lea `32*9*2`($a_ptr), %rdx + lea `32*9*2`($r_ptr), %rdi + call avx2_mul_x4 + call avx2_normalize + + lea .LONE(%rip), %rsi + lea `32*9*2`($a_ptr), %rdx + call avx2_select_n_store + + # R^2 = R^2 + lea `32*9*4`(%rsp), %rsi + lea `32*9*6`(%rsp), %rdi + lea `32*9*8+32*2`(%rsp), %rcx # temporary vector + call avx2_sqr_x4 + call avx2_normalize_n_store + + # H^2 = H^2 + lea `32*9*3`(%rsp), %rsi + lea `32*9*5`(%rsp), %rdi + call avx2_sqr_x4 + call avx2_normalize_n_store + + # H^3 = H^2*H + lea `32*9*3`(%rsp), %rsi + lea `32*9*5`(%rsp), %rdx + lea `32*9*7`(%rsp), %rdi + call avx2_mul_x4 + call avx2_normalize_n_store + + # U2 = U1*H^2 + lea `32*9*0`($a_ptr), %rsi + lea `32*9*5`(%rsp), %rdx + lea `32*9*0`(%rsp), %rdi + call avx2_mul_x4 + #call avx2_normalize + `&STORE` + + # Hsqr = U2*2 + #lea 32*9*0(%rsp), %rsi + #lea 32*9*5(%rsp), %rdi + #call avx2_mul_by2_x4 + + vpaddq $ACC0, $ACC0, $ACC0 # inlined avx2_mul_by2_x4 + lea `32*9*5`(%rsp), %rdi + vpaddq $ACC1, $ACC1, $ACC1 + vpaddq $ACC2, $ACC2, $ACC2 + vpaddq $ACC3, $ACC3, $ACC3 + vpaddq $ACC4, $ACC4, $ACC4 + vpaddq $ACC5, $ACC5, $ACC5 + vpaddq $ACC6, $ACC6, $ACC6 + vpaddq $ACC7, $ACC7, $ACC7 + vpaddq $ACC8, $ACC8, $ACC8 + call avx2_normalize_n_store + + # X3 = R^2 - H^3 + #lea 32*9*6(%rsp), %rsi + #lea 32*9*7(%rsp), %rdx + #lea 32*9*5(%rsp), %rcx + #lea 32*9*0($r_ptr), %rdi + #call avx2_sub_x4 + #NORMALIZE + #STORE + + # X3 = X3 - U2*2 + #lea 32*9*0($r_ptr), %rsi + #lea 32*9*0($r_ptr), %rdi + #call avx2_sub_x4 + #NORMALIZE + #STORE + + lea `32*9*6+128`(%rsp), %rsi + lea .LAVX2_POLY_x2+128(%rip), %rax + lea `32*9*7+128`(%rsp), %rdx + lea `32*9*5+128`(%rsp), %rcx + lea `32*9*0`($r_ptr), %rdi + + vmovdqa 32*0-128(%rsi), $ACC0 + vmovdqa 32*1-128(%rsi), $ACC1 + vmovdqa 32*2-128(%rsi), $ACC2 + vmovdqa 32*3-128(%rsi), $ACC3 + vmovdqa 32*4-128(%rsi), $ACC4 + vmovdqa 32*5-128(%rsi), $ACC5 + vmovdqa 32*6-128(%rsi), $ACC6 + vmovdqa 32*7-128(%rsi), $ACC7 + vmovdqa 32*8-128(%rsi), $ACC8 + + vpaddq 32*0-128(%rax), $ACC0, $ACC0 + vpaddq 32*1-128(%rax), $ACC1, $ACC1 + vpaddq 32*2-128(%rax), $ACC2, $ACC2 + vpaddq 32*3-128(%rax), $ACC3, $ACC3 + vpaddq 32*4-128(%rax), $ACC4, $ACC4 + vpaddq 32*5-128(%rax), $ACC5, $ACC5 + vpaddq 32*6-128(%rax), $ACC6, $ACC6 + vpaddq 32*7-128(%rax), $ACC7, $ACC7 + vpaddq 32*8-128(%rax), $ACC8, $ACC8 + + vpsubq 32*0-128(%rdx), $ACC0, $ACC0 + vpsubq 32*1-128(%rdx), $ACC1, $ACC1 + vpsubq 32*2-128(%rdx), $ACC2, $ACC2 + vpsubq 32*3-128(%rdx), $ACC3, $ACC3 + vpsubq 32*4-128(%rdx), $ACC4, $ACC4 + vpsubq 32*5-128(%rdx), $ACC5, $ACC5 + vpsubq 32*6-128(%rdx), $ACC6, $ACC6 + vpsubq 32*7-128(%rdx), $ACC7, $ACC7 + vpsubq 32*8-128(%rdx), $ACC8, $ACC8 + + vpsubq 32*0-128(%rcx), $ACC0, $ACC0 + vpsubq 32*1-128(%rcx), $ACC1, $ACC1 + vpsubq 32*2-128(%rcx), $ACC2, $ACC2 + vpsubq 32*3-128(%rcx), $ACC3, $ACC3 + vpsubq 32*4-128(%rcx), $ACC4, $ACC4 + vpsubq 32*5-128(%rcx), $ACC5, $ACC5 + vpsubq 32*6-128(%rcx), $ACC6, $ACC6 + vpsubq 32*7-128(%rcx), $ACC7, $ACC7 + vpsubq 32*8-128(%rcx), $ACC8, $ACC8 + call avx2_normalize + + lea 32*0($b_ptr), %rsi + lea 32*0($a_ptr), %rdx + call avx2_select_n_store + + # H = U2 - X3 + lea `32*9*0`(%rsp), %rsi + lea `32*9*0`($r_ptr), %rdx + lea `32*9*3`(%rsp), %rdi + call avx2_sub_x4 + call avx2_normalize_n_store + + # + lea `32*9*3`(%rsp), %rsi + lea `32*9*4`(%rsp), %rdx + lea `32*9*3`(%rsp), %rdi + call avx2_mul_x4 + call avx2_normalize_n_store + + # + lea `32*9*7`(%rsp), %rsi + lea `32*9*1`($a_ptr), %rdx + lea `32*9*1`(%rsp), %rdi + call avx2_mul_x4 + call avx2_normalize_n_store + + # + lea `32*9*3`(%rsp), %rsi + lea `32*9*1`(%rsp), %rdx + lea `32*9*1`($r_ptr), %rdi + call avx2_sub_x4 + call avx2_normalize + + lea 32*9($b_ptr), %rsi + lea 32*9($a_ptr), %rdx + call avx2_select_n_store + + #lea 32*9*0($r_ptr), %rsi + #lea 32*9*0($r_ptr), %rdi + #call avx2_mul_by1_x4 + #NORMALIZE + #STORE + + lea `32*9*1`($r_ptr), %rsi + lea `32*9*1`($r_ptr), %rdi + call avx2_mul_by1_x4 + call avx2_normalize_n_store + + vzeroupper +___ +$code.=<<___ if ($win64); + movaps %xmm6, -16*10(%rbp) + movaps %xmm7, -16*9(%rbp) + movaps %xmm8, -16*8(%rbp) + movaps %xmm9, -16*7(%rbp) + movaps %xmm10, -16*6(%rbp) + movaps %xmm11, -16*5(%rbp) + movaps %xmm12, -16*4(%rbp) + movaps %xmm13, -16*3(%rbp) + movaps %xmm14, -16*2(%rbp) + movaps %xmm15, -16*1(%rbp) +___ +$code.=<<___; + mov %rbp, %rsp + pop %rbp + ret +.size ecp_nistz256_avx2_point_add_affine_x4,.-ecp_nistz256_avx2_point_add_affine_x4 + +################################################################################ +# void ecp_nistz256_avx2_point_add_affines_x4(void* RESULTx4, void *Ax4, void *Bx4); +.globl ecp_nistz256_avx2_point_add_affines_x4 +.type ecp_nistz256_avx2_point_add_affines_x4,\@function,3 +.align 32 +ecp_nistz256_avx2_point_add_affines_x4: + mov %rsp, %rax + push %rbp + vzeroupper +___ +$code.=<<___ if ($win64); + lea -16*10(%rsp), %rsp + vmovaps %xmm6, -8-16*10(%rax) + vmovaps %xmm7, -8-16*9(%rax) + vmovaps %xmm8, -8-16*8(%rax) + vmovaps %xmm9, -8-16*7(%rax) + vmovaps %xmm10, -8-16*6(%rax) + vmovaps %xmm11, -8-16*5(%rax) + vmovaps %xmm12, -8-16*4(%rax) + vmovaps %xmm13, -8-16*3(%rax) + vmovaps %xmm14, -8-16*2(%rax) + vmovaps %xmm15, -8-16*1(%rax) +___ +$code.=<<___; + lea -8(%rax), %rbp + +# Result + 32*0 = Result.X +# Result + 32*9 = Result.Y +# Result + 32*18 = Result.Z + +# A + 32*0 = A.X +# A + 32*9 = A.Y + +# B + 32*0 = B.X +# B + 32*9 = B.Y + + sub \$`32*9*8+32*2+32*8`, %rsp + and \$-64, %rsp + + mov $r_ptr_in, $r_ptr + mov $a_ptr_in, $a_ptr + mov $b_ptr_in, $b_ptr + + vmovdqa 32*0($a_ptr_in), %ymm0 + vmovdqa .LAVX2_AND_MASK(%rip), $AND_MASK + vpxor %ymm1, %ymm1, %ymm1 + lea 256($a_ptr_in), %rax # size optimization + vpor 32*1($a_ptr_in), %ymm0, %ymm0 + vpor 32*2($a_ptr_in), %ymm0, %ymm0 + vpor 32*3($a_ptr_in), %ymm0, %ymm0 + vpor 32*4-256(%rax), %ymm0, %ymm0 + lea 256(%rax), %rcx # size optimization + vpor 32*5-256(%rax), %ymm0, %ymm0 + vpor 32*6-256(%rax), %ymm0, %ymm0 + vpor 32*7-256(%rax), %ymm0, %ymm0 + vpor 32*8-256(%rax), %ymm0, %ymm0 + vpor 32*9-256(%rax), %ymm0, %ymm0 + vpor 32*10-256(%rax), %ymm0, %ymm0 + vpor 32*11-256(%rax), %ymm0, %ymm0 + vpor 32*12-512(%rcx), %ymm0, %ymm0 + vpor 32*13-512(%rcx), %ymm0, %ymm0 + vpor 32*14-512(%rcx), %ymm0, %ymm0 + vpor 32*15-512(%rcx), %ymm0, %ymm0 + vpor 32*16-512(%rcx), %ymm0, %ymm0 + vpor 32*17-512(%rcx), %ymm0, %ymm0 + vpcmpeqq %ymm1, %ymm0, %ymm0 + vmovdqa %ymm0, `32*9*8`(%rsp) + + vpxor %ymm1, %ymm1, %ymm1 + vmovdqa 32*0($b_ptr), %ymm0 + lea 256($b_ptr), %rax # size optimization + vpor 32*1($b_ptr), %ymm0, %ymm0 + vpor 32*2($b_ptr), %ymm0, %ymm0 + vpor 32*3($b_ptr), %ymm0, %ymm0 + vpor 32*4-256(%rax), %ymm0, %ymm0 + lea 256(%rax), %rcx # size optimization + vpor 32*5-256(%rax), %ymm0, %ymm0 + vpor 32*6-256(%rax), %ymm0, %ymm0 + vpor 32*7-256(%rax), %ymm0, %ymm0 + vpor 32*8-256(%rax), %ymm0, %ymm0 + vpor 32*9-256(%rax), %ymm0, %ymm0 + vpor 32*10-256(%rax), %ymm0, %ymm0 + vpor 32*11-256(%rax), %ymm0, %ymm0 + vpor 32*12-512(%rcx), %ymm0, %ymm0 + vpor 32*13-512(%rcx), %ymm0, %ymm0 + vpor 32*14-512(%rcx), %ymm0, %ymm0 + vpor 32*15-512(%rcx), %ymm0, %ymm0 + vpor 32*16-512(%rcx), %ymm0, %ymm0 + vpor 32*17-512(%rcx), %ymm0, %ymm0 + vpcmpeqq %ymm1, %ymm0, %ymm0 + vmovdqa %ymm0, `32*9*8+32`(%rsp) + + # H = U2 - U1 = X2 - X1 + lea `32*9*0`($b_ptr), %rsi + lea `32*9*0`($a_ptr), %rdx + lea `32*9*3`(%rsp), %rdi + call avx2_sub_x4 + call avx2_normalize_n_store + + # R = S2 - S1 = Y2 - Y1 + lea `32*9*1`($b_ptr), %rsi + lea `32*9*1`($a_ptr), %rdx + lea `32*9*4`(%rsp), %rdi + call avx2_sub_x4 + call avx2_normalize_n_store + + # Z3 = H*Z1*Z2 = H + lea `32*9*3`(%rsp), %rsi + lea `32*9*2`($r_ptr), %rdi + call avx2_mul_by1_x4 + call avx2_normalize + + vmovdqa `32*9*8`(%rsp), $B + vpor `32*9*8+32`(%rsp), $B, $B + + vpandn $ACC0, $B, $ACC0 + lea .LONE+128(%rip), %rax + vpandn $ACC1, $B, $ACC1 + vpandn $ACC2, $B, $ACC2 + vpandn $ACC3, $B, $ACC3 + vpandn $ACC4, $B, $ACC4 + vpandn $ACC5, $B, $ACC5 + vpandn $ACC6, $B, $ACC6 + vpandn $ACC7, $B, $ACC7 + + vpand 32*0-128(%rax), $B, $T0 + vpandn $ACC8, $B, $ACC8 + vpand 32*1-128(%rax), $B, $Y + vpxor $T0, $ACC0, $ACC0 + vpand 32*2-128(%rax), $B, $T0 + vpxor $Y, $ACC1, $ACC1 + vpand 32*3-128(%rax), $B, $Y + vpxor $T0, $ACC2, $ACC2 + vpand 32*4-128(%rax), $B, $T0 + vpxor $Y, $ACC3, $ACC3 + vpand 32*5-128(%rax), $B, $Y + vpxor $T0, $ACC4, $ACC4 + vpand 32*6-128(%rax), $B, $T0 + vpxor $Y, $ACC5, $ACC5 + vpand 32*7-128(%rax), $B, $Y + vpxor $T0, $ACC6, $ACC6 + vpand 32*8-128(%rax), $B, $T0 + vpxor $Y, $ACC7, $ACC7 + vpxor $T0, $ACC8, $ACC8 + `&STORE` + + # R^2 = R^2 + lea `32*9*4`(%rsp), %rsi + lea `32*9*6`(%rsp), %rdi + lea `32*9*8+32*2`(%rsp), %rcx # temporary vector + call avx2_sqr_x4 + call avx2_normalize_n_store + + # H^2 = H^2 + lea `32*9*3`(%rsp), %rsi + lea `32*9*5`(%rsp), %rdi + call avx2_sqr_x4 + call avx2_normalize_n_store + + # H^3 = H^2*H + lea `32*9*3`(%rsp), %rsi + lea `32*9*5`(%rsp), %rdx + lea `32*9*7`(%rsp), %rdi + call avx2_mul_x4 + call avx2_normalize_n_store + + # U2 = U1*H^2 + lea `32*9*0`($a_ptr), %rsi + lea `32*9*5`(%rsp), %rdx + lea `32*9*0`(%rsp), %rdi + call avx2_mul_x4 + #call avx2_normalize + `&STORE` + + # Hsqr = U2*2 + #lea 32*9*0(%rsp), %rsi + #lea 32*9*5(%rsp), %rdi + #call avx2_mul_by2_x4 + + vpaddq $ACC0, $ACC0, $ACC0 # inlined avx2_mul_by2_x4 + lea `32*9*5`(%rsp), %rdi + vpaddq $ACC1, $ACC1, $ACC1 + vpaddq $ACC2, $ACC2, $ACC2 + vpaddq $ACC3, $ACC3, $ACC3 + vpaddq $ACC4, $ACC4, $ACC4 + vpaddq $ACC5, $ACC5, $ACC5 + vpaddq $ACC6, $ACC6, $ACC6 + vpaddq $ACC7, $ACC7, $ACC7 + vpaddq $ACC8, $ACC8, $ACC8 + call avx2_normalize_n_store + + # X3 = R^2 - H^3 + #lea 32*9*6(%rsp), %rsi + #lea 32*9*7(%rsp), %rdx + #lea 32*9*5(%rsp), %rcx + #lea 32*9*0($r_ptr), %rdi + #call avx2_sub_x4 + #NORMALIZE + #STORE + + # X3 = X3 - U2*2 + #lea 32*9*0($r_ptr), %rsi + #lea 32*9*0($r_ptr), %rdi + #call avx2_sub_x4 + #NORMALIZE + #STORE + + lea `32*9*6+128`(%rsp), %rsi + lea .LAVX2_POLY_x2+128(%rip), %rax + lea `32*9*7+128`(%rsp), %rdx + lea `32*9*5+128`(%rsp), %rcx + lea `32*9*0`($r_ptr), %rdi + + vmovdqa 32*0-128(%rsi), $ACC0 + vmovdqa 32*1-128(%rsi), $ACC1 + vmovdqa 32*2-128(%rsi), $ACC2 + vmovdqa 32*3-128(%rsi), $ACC3 + vmovdqa 32*4-128(%rsi), $ACC4 + vmovdqa 32*5-128(%rsi), $ACC5 + vmovdqa 32*6-128(%rsi), $ACC6 + vmovdqa 32*7-128(%rsi), $ACC7 + vmovdqa 32*8-128(%rsi), $ACC8 + + vpaddq 32*0-128(%rax), $ACC0, $ACC0 + vpaddq 32*1-128(%rax), $ACC1, $ACC1 + vpaddq 32*2-128(%rax), $ACC2, $ACC2 + vpaddq 32*3-128(%rax), $ACC3, $ACC3 + vpaddq 32*4-128(%rax), $ACC4, $ACC4 + vpaddq 32*5-128(%rax), $ACC5, $ACC5 + vpaddq 32*6-128(%rax), $ACC6, $ACC6 + vpaddq 32*7-128(%rax), $ACC7, $ACC7 + vpaddq 32*8-128(%rax), $ACC8, $ACC8 + + vpsubq 32*0-128(%rdx), $ACC0, $ACC0 + vpsubq 32*1-128(%rdx), $ACC1, $ACC1 + vpsubq 32*2-128(%rdx), $ACC2, $ACC2 + vpsubq 32*3-128(%rdx), $ACC3, $ACC3 + vpsubq 32*4-128(%rdx), $ACC4, $ACC4 + vpsubq 32*5-128(%rdx), $ACC5, $ACC5 + vpsubq 32*6-128(%rdx), $ACC6, $ACC6 + vpsubq 32*7-128(%rdx), $ACC7, $ACC7 + vpsubq 32*8-128(%rdx), $ACC8, $ACC8 + + vpsubq 32*0-128(%rcx), $ACC0, $ACC0 + vpsubq 32*1-128(%rcx), $ACC1, $ACC1 + vpsubq 32*2-128(%rcx), $ACC2, $ACC2 + vpsubq 32*3-128(%rcx), $ACC3, $ACC3 + vpsubq 32*4-128(%rcx), $ACC4, $ACC4 + vpsubq 32*5-128(%rcx), $ACC5, $ACC5 + vpsubq 32*6-128(%rcx), $ACC6, $ACC6 + vpsubq 32*7-128(%rcx), $ACC7, $ACC7 + vpsubq 32*8-128(%rcx), $ACC8, $ACC8 + call avx2_normalize + + lea 32*0($b_ptr), %rsi + lea 32*0($a_ptr), %rdx + call avx2_select_n_store + + # H = U2 - X3 + lea `32*9*0`(%rsp), %rsi + lea `32*9*0`($r_ptr), %rdx + lea `32*9*3`(%rsp), %rdi + call avx2_sub_x4 + call avx2_normalize_n_store + + # H = H*R + lea `32*9*3`(%rsp), %rsi + lea `32*9*4`(%rsp), %rdx + lea `32*9*3`(%rsp), %rdi + call avx2_mul_x4 + call avx2_normalize_n_store + + # S2 = S1 * H^3 + lea `32*9*7`(%rsp), %rsi + lea `32*9*1`($a_ptr), %rdx + lea `32*9*1`(%rsp), %rdi + call avx2_mul_x4 + call avx2_normalize_n_store + + # + lea `32*9*3`(%rsp), %rsi + lea `32*9*1`(%rsp), %rdx + lea `32*9*1`($r_ptr), %rdi + call avx2_sub_x4 + call avx2_normalize + + lea 32*9($b_ptr), %rsi + lea 32*9($a_ptr), %rdx + call avx2_select_n_store + + #lea 32*9*0($r_ptr), %rsi + #lea 32*9*0($r_ptr), %rdi + #call avx2_mul_by1_x4 + #NORMALIZE + #STORE + + lea `32*9*1`($r_ptr), %rsi + lea `32*9*1`($r_ptr), %rdi + call avx2_mul_by1_x4 + call avx2_normalize_n_store + + vzeroupper +___ +$code.=<<___ if ($win64); + movaps %xmm6, -16*10(%rbp) + movaps %xmm7, -16*9(%rbp) + movaps %xmm8, -16*8(%rbp) + movaps %xmm9, -16*7(%rbp) + movaps %xmm10, -16*6(%rbp) + movaps %xmm11, -16*5(%rbp) + movaps %xmm12, -16*4(%rbp) + movaps %xmm13, -16*3(%rbp) + movaps %xmm14, -16*2(%rbp) + movaps %xmm15, -16*1(%rbp) +___ +$code.=<<___; + mov %rbp, %rsp + pop %rbp + ret +.size ecp_nistz256_avx2_point_add_affines_x4,.-ecp_nistz256_avx2_point_add_affines_x4 + +################################################################################ +# void ecp_nistz256_avx2_to_mont(void* RESULTx4, void *Ax4); +.globl ecp_nistz256_avx2_to_mont +.type ecp_nistz256_avx2_to_mont,\@function,2 +.align 32 +ecp_nistz256_avx2_to_mont: + vzeroupper +___ +$code.=<<___ if ($win64); + lea -8-16*10(%rsp), %rsp + vmovaps %xmm6, -8-16*10(%rax) + vmovaps %xmm7, -8-16*9(%rax) + vmovaps %xmm8, -8-16*8(%rax) + vmovaps %xmm9, -8-16*7(%rax) + vmovaps %xmm10, -8-16*6(%rax) + vmovaps %xmm11, -8-16*5(%rax) + vmovaps %xmm12, -8-16*4(%rax) + vmovaps %xmm13, -8-16*3(%rax) + vmovaps %xmm14, -8-16*2(%rax) + vmovaps %xmm15, -8-16*1(%rax) +___ +$code.=<<___; + vmovdqa .LAVX2_AND_MASK(%rip), $AND_MASK + lea .LTO_MONT_AVX2(%rip), %rdx + call avx2_mul_x4 + call avx2_normalize_n_store + + vzeroupper +___ +$code.=<<___ if ($win64); + movaps 16*0(%rsp), %xmm6 + movaps 16*1(%rsp), %xmm7 + movaps 16*2(%rsp), %xmm8 + movaps 16*3(%rsp), %xmm9 + movaps 16*4(%rsp), %xmm10 + movaps 16*5(%rsp), %xmm11 + movaps 16*6(%rsp), %xmm12 + movaps 16*7(%rsp), %xmm13 + movaps 16*8(%rsp), %xmm14 + movaps 16*9(%rsp), %xmm15 + lea 8+16*10(%rsp), %rsp +___ +$code.=<<___; + ret +.size ecp_nistz256_avx2_to_mont,.-ecp_nistz256_avx2_to_mont + +################################################################################ +# void ecp_nistz256_avx2_from_mont(void* RESULTx4, void *Ax4); +.globl ecp_nistz256_avx2_from_mont +.type ecp_nistz256_avx2_from_mont,\@function,2 +.align 32 +ecp_nistz256_avx2_from_mont: + vzeroupper +___ +$code.=<<___ if ($win64); + lea -8-16*10(%rsp), %rsp + vmovaps %xmm6, -8-16*10(%rax) + vmovaps %xmm7, -8-16*9(%rax) + vmovaps %xmm8, -8-16*8(%rax) + vmovaps %xmm9, -8-16*7(%rax) + vmovaps %xmm10, -8-16*6(%rax) + vmovaps %xmm11, -8-16*5(%rax) + vmovaps %xmm12, -8-16*4(%rax) + vmovaps %xmm13, -8-16*3(%rax) + vmovaps %xmm14, -8-16*2(%rax) + vmovaps %xmm15, -8-16*1(%rax) +___ +$code.=<<___; + vmovdqa .LAVX2_AND_MASK(%rip), $AND_MASK + lea .LFROM_MONT_AVX2(%rip), %rdx + call avx2_mul_x4 + call avx2_normalize_n_store + + vzeroupper +___ +$code.=<<___ if ($win64); + movaps 16*0(%rsp), %xmm6 + movaps 16*1(%rsp), %xmm7 + movaps 16*2(%rsp), %xmm8 + movaps 16*3(%rsp), %xmm9 + movaps 16*4(%rsp), %xmm10 + movaps 16*5(%rsp), %xmm11 + movaps 16*6(%rsp), %xmm12 + movaps 16*7(%rsp), %xmm13 + movaps 16*8(%rsp), %xmm14 + movaps 16*9(%rsp), %xmm15 + lea 8+16*10(%rsp), %rsp +___ +$code.=<<___; + ret +.size ecp_nistz256_avx2_from_mont,.-ecp_nistz256_avx2_from_mont + +################################################################################ +# void ecp_nistz256_avx2_set1(void* RESULTx4); +.globl ecp_nistz256_avx2_set1 +.type ecp_nistz256_avx2_set1,\@function,1 +.align 32 +ecp_nistz256_avx2_set1: + lea .LONE+128(%rip), %rax + lea 128(%rdi), %rdi + vzeroupper + vmovdqa 32*0-128(%rax), %ymm0 + vmovdqa 32*1-128(%rax), %ymm1 + vmovdqa 32*2-128(%rax), %ymm2 + vmovdqa 32*3-128(%rax), %ymm3 + vmovdqa 32*4-128(%rax), %ymm4 + vmovdqa 32*5-128(%rax), %ymm5 + vmovdqa %ymm0, 32*0-128(%rdi) + vmovdqa 32*6-128(%rax), %ymm0 + vmovdqa %ymm1, 32*1-128(%rdi) + vmovdqa 32*7-128(%rax), %ymm1 + vmovdqa %ymm2, 32*2-128(%rdi) + vmovdqa 32*8-128(%rax), %ymm2 + vmovdqa %ymm3, 32*3-128(%rdi) + vmovdqa %ymm4, 32*4-128(%rdi) + vmovdqa %ymm5, 32*5-128(%rdi) + vmovdqa %ymm0, 32*6-128(%rdi) + vmovdqa %ymm1, 32*7-128(%rdi) + vmovdqa %ymm2, 32*8-128(%rdi) + + vzeroupper + ret +.size ecp_nistz256_avx2_set1,.-ecp_nistz256_avx2_set1 +___ +} +{ +################################################################################ +# void ecp_nistz256_avx2_multi_select_w7(void* RESULT, void *in, +# int index0, int index1, int index2, int index3); +################################################################################ + +my ($val,$in_t,$index0,$index1,$index2,$index3)=("%rdi","%rsi","%edx","%ecx","%r8d","%r9d"); +my ($INDEX0,$INDEX1,$INDEX2,$INDEX3)=map("%ymm$_",(0..3)); +my ($R0a,$R0b,$R1a,$R1b,$R2a,$R2b,$R3a,$R3b)=map("%ymm$_",(4..11)); +my ($M0,$T0,$T1,$TMP0)=map("%ymm$_",(12..15)); + +$code.=<<___; +.globl ecp_nistz256_avx2_multi_select_w7 +.type ecp_nistz256_avx2_multi_select_w7,\@function,6 +.align 32 +ecp_nistz256_avx2_multi_select_w7: + vzeroupper +___ +$code.=<<___ if ($win64); + lea -8-16*10(%rsp), %rsp + vmovaps %xmm6, -8-16*10(%rax) + vmovaps %xmm7, -8-16*9(%rax) + vmovaps %xmm8, -8-16*8(%rax) + vmovaps %xmm9, -8-16*7(%rax) + vmovaps %xmm10, -8-16*6(%rax) + vmovaps %xmm11, -8-16*5(%rax) + vmovaps %xmm12, -8-16*4(%rax) + vmovaps %xmm13, -8-16*3(%rax) + vmovaps %xmm14, -8-16*2(%rax) + vmovaps %xmm15, -8-16*1(%rax) +___ +$code.=<<___; + lea .LIntOne(%rip), %rax + + vmovd $index0, %xmm0 + vmovd $index1, %xmm1 + vmovd $index2, %xmm2 + vmovd $index3, %xmm3 + + vpxor $R0a, $R0a, $R0a + vpxor $R0b, $R0b, $R0b + vpxor $R1a, $R1a, $R1a + vpxor $R1b, $R1b, $R1b + vpxor $R2a, $R2a, $R2a + vpxor $R2b, $R2b, $R2b + vpxor $R3a, $R3a, $R3a + vpxor $R3b, $R3b, $R3b + vmovdqa (%rax), $M0 + + vpermd $INDEX0, $R0a, $INDEX0 + vpermd $INDEX1, $R0a, $INDEX1 + vpermd $INDEX2, $R0a, $INDEX2 + vpermd $INDEX3, $R0a, $INDEX3 + + mov \$64, %ecx + lea 112($val), $val # size optimization + jmp .Lmulti_select_loop_avx2 + +# INDEX=0, corresponds to the point at infty (0,0) +.align 32 +.Lmulti_select_loop_avx2: + vpcmpeqd $INDEX0, $M0, $TMP0 + + vmovdqa `32*0+32*64*2*0`($in_t), $T0 + vmovdqa `32*1+32*64*2*0`($in_t), $T1 + vpand $TMP0, $T0, $T0 + vpand $TMP0, $T1, $T1 + vpxor $T0, $R0a, $R0a + vpxor $T1, $R0b, $R0b + + vpcmpeqd $INDEX1, $M0, $TMP0 + + vmovdqa `32*0+32*64*2*1`($in_t), $T0 + vmovdqa `32*1+32*64*2*1`($in_t), $T1 + vpand $TMP0, $T0, $T0 + vpand $TMP0, $T1, $T1 + vpxor $T0, $R1a, $R1a + vpxor $T1, $R1b, $R1b + + vpcmpeqd $INDEX2, $M0, $TMP0 + + vmovdqa `32*0+32*64*2*2`($in_t), $T0 + vmovdqa `32*1+32*64*2*2`($in_t), $T1 + vpand $TMP0, $T0, $T0 + vpand $TMP0, $T1, $T1 + vpxor $T0, $R2a, $R2a + vpxor $T1, $R2b, $R2b + + vpcmpeqd $INDEX3, $M0, $TMP0 + + vmovdqa `32*0+32*64*2*3`($in_t), $T0 + vmovdqa `32*1+32*64*2*3`($in_t), $T1 + vpand $TMP0, $T0, $T0 + vpand $TMP0, $T1, $T1 + vpxor $T0, $R3a, $R3a + vpxor $T1, $R3b, $R3b + + vpaddd (%rax), $M0, $M0 # increment + lea 32*2($in_t), $in_t + + dec %ecx + jnz .Lmulti_select_loop_avx2 + + vmovdqu $R0a, 32*0-112($val) + vmovdqu $R0b, 32*1-112($val) + vmovdqu $R1a, 32*2-112($val) + vmovdqu $R1b, 32*3-112($val) + vmovdqu $R2a, 32*4-112($val) + vmovdqu $R2b, 32*5-112($val) + vmovdqu $R3a, 32*6-112($val) + vmovdqu $R3b, 32*7-112($val) + + vzeroupper +___ +$code.=<<___ if ($win64); + movaps 16*0(%rsp), %xmm6 + movaps 16*1(%rsp), %xmm7 + movaps 16*2(%rsp), %xmm8 + movaps 16*3(%rsp), %xmm9 + movaps 16*4(%rsp), %xmm10 + movaps 16*5(%rsp), %xmm11 + movaps 16*6(%rsp), %xmm12 + movaps 16*7(%rsp), %xmm13 + movaps 16*8(%rsp), %xmm14 + movaps 16*9(%rsp), %xmm15 + lea 8+16*10(%rsp), %rsp +___ +$code.=<<___; + ret +.size ecp_nistz256_avx2_multi_select_w7,.-ecp_nistz256_avx2_multi_select_w7 + +.extern OPENSSL_ia32cap_P +.globl ecp_nistz_avx2_eligible +.type ecp_nistz_avx2_eligible,\@abi-omnipotent +.align 32 +ecp_nistz_avx2_eligible: + mov OPENSSL_ia32cap_P+8(%rip),%eax + shr \$5,%eax + and \$1,%eax + ret +.size ecp_nistz_avx2_eligible,.-ecp_nistz_avx2_eligible +___ +} +}} else {{ # assembler is too old +$code.=<<___; +.text + +.globl ecp_nistz256_avx2_transpose_convert +.globl ecp_nistz256_avx2_convert_transpose_back +.globl ecp_nistz256_avx2_point_add_affine_x4 +.globl ecp_nistz256_avx2_point_add_affines_x4 +.globl ecp_nistz256_avx2_to_mont +.globl ecp_nistz256_avx2_from_mont +.globl ecp_nistz256_avx2_set1 +.globl ecp_nistz256_avx2_multi_select_w7 +.type ecp_nistz256_avx2_multi_select_w7,\@abi-omnipotent +ecp_nistz256_avx2_transpose_convert: +ecp_nistz256_avx2_convert_transpose_back: +ecp_nistz256_avx2_point_add_affine_x4: +ecp_nistz256_avx2_point_add_affines_x4: +ecp_nistz256_avx2_to_mont: +ecp_nistz256_avx2_from_mont: +ecp_nistz256_avx2_set1: +ecp_nistz256_avx2_multi_select_w7: + .byte 0x0f,0x0b # ud2 + ret +.size ecp_nistz256_avx2_multi_select_w7,.-ecp_nistz256_avx2_multi_select_w7 + +.globl ecp_nistz_avx2_eligible +.type ecp_nistz_avx2_eligible,\@abi-omnipotent +ecp_nistz_avx2_eligible: + xor %eax,%eax + ret +.size ecp_nistz_avx2_eligible,.-ecp_nistz_avx2_eligible +___ +}} + +foreach (split("\n",$code)) { + s/\`([^\`]*)\`/eval($1)/geo; + + print $_,"\n"; +} + +close STDOUT; diff --git a/deps/openssl/openssl/crypto/ec/asm/ecp_nistz256-x86_64.pl b/deps/openssl/openssl/crypto/ec/asm/ecp_nistz256-x86_64.pl new file mode 100755 index 00000000000000..5b21574a2de414 --- /dev/null +++ b/deps/openssl/openssl/crypto/ec/asm/ecp_nistz256-x86_64.pl @@ -0,0 +1,2993 @@ +#!/usr/bin/env perl + +############################################################################## +# # +# Copyright 2014 Intel Corporation # +# # +# Licensed under the Apache License, Version 2.0 (the "License"); # +# you may not use this file except in compliance with the License. # +# You may obtain a copy of the License at # +# # +# http://www.apache.org/licenses/LICENSE-2.0 # +# # +# Unless required by applicable law or agreed to in writing, software # +# distributed under the License is distributed on an "AS IS" BASIS, # +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # +# See the License for the specific language governing permissions and # +# limitations under the License. # +# # +############################################################################## +# # +# Developers and authors: # +# Shay Gueron (1, 2), and Vlad Krasnov (1) # +# (1) Intel Corporation, Israel Development Center # +# (2) University of Haifa # +# Reference: # +# S.Gueron and V.Krasnov, "Fast Prime Field Elliptic Curve Cryptography with# +# 256 Bit Primes" # +# # +############################################################################## + +# Further optimization by : +# +# this/original +# Opteron +12-49% +# Bulldozer +14-45% +# P4 +18-46% +# Westmere +12-34% +# Sandy Bridge +9-35% +# Ivy Bridge +9-35% +# Haswell +8-37% +# Broadwell +18-58% +# Atom +15-50% +# VIA Nano +43-160% +# +# Ranges denote minimum and maximum improvement coefficients depending +# on benchmark. + +$flavour = shift; +$output = shift; +if ($flavour =~ /\./) { $output = $flavour; undef $flavour; } + +$win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/); + +$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; +( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or +( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or +die "can't locate x86_64-xlate.pl"; + +open OUT,"| \"$^X\" $xlate $flavour $output"; +*STDOUT=*OUT; + +if (`$ENV{CC} -Wa,-v -c -o /dev/null -x assembler /dev/null 2>&1` + =~ /GNU assembler version ([2-9]\.[0-9]+)/) { + $avx = ($1>=2.19) + ($1>=2.22); + $addx = ($1>=2.23); +} + +if (!$addx && $win64 && ($flavour =~ /nasm/ || $ENV{ASM} =~ /nasm/) && + `nasm -v 2>&1` =~ /NASM version ([2-9]\.[0-9]+)/) { + $avx = ($1>=2.09) + ($1>=2.10); + $addx = ($1>=2.10); +} + +if (!$addx && $win64 && ($flavour =~ /masm/ || $ENV{ASM} =~ /ml64/) && + `ml64 2>&1` =~ /Version ([0-9]+)\./) { + $avx = ($1>=10) + ($1>=11); + $addx = ($1>=12); +} + +if (!$addx && `$ENV{CC} -v 2>&1` =~ /(^clang version|based on LLVM) ([3-9])\.([0-9]+)/) { + my $ver = $2 + $3/100.0; # 3.1->3.01, 3.10->3.10 + $avx = ($ver>=3.0) + ($ver>=3.01); + $addx = ($ver>=3.03); +} + +$code.=<<___; +.text +.extern OPENSSL_ia32cap_P + +# The polynomial +.align 64 +.Lpoly: +.quad 0xffffffffffffffff, 0x00000000ffffffff, 0x0000000000000000, 0xffffffff00000001 + +# 2^512 mod P precomputed for NIST P256 polynomial +.LRR: +.quad 0x0000000000000003, 0xfffffffbffffffff, 0xfffffffffffffffe, 0x00000004fffffffd + +.LOne: +.long 1,1,1,1,1,1,1,1 +.LTwo: +.long 2,2,2,2,2,2,2,2 +.LThree: +.long 3,3,3,3,3,3,3,3 +.LONE_mont: +.quad 0x0000000000000001, 0xffffffff00000000, 0xffffffffffffffff, 0x00000000fffffffe +___ + +{ +################################################################################ +# void ecp_nistz256_mul_by_2(uint64_t res[4], uint64_t a[4]); + +my ($a0,$a1,$a2,$a3)=map("%r$_",(8..11)); +my ($t0,$t1,$t2,$t3,$t4)=("%rax","%rdx","%rcx","%r12","%r13"); +my ($r_ptr,$a_ptr,$b_ptr)=("%rdi","%rsi","%rdx"); + +$code.=<<___; + +.globl ecp_nistz256_mul_by_2 +.type ecp_nistz256_mul_by_2,\@function,2 +.align 64 +ecp_nistz256_mul_by_2: + push %r12 + push %r13 + + mov 8*0($a_ptr), $a0 + mov 8*1($a_ptr), $a1 + add $a0, $a0 # a0:a3+a0:a3 + mov 8*2($a_ptr), $a2 + adc $a1, $a1 + mov 8*3($a_ptr), $a3 + lea .Lpoly(%rip), $a_ptr + mov $a0, $t0 + adc $a2, $a2 + adc $a3, $a3 + mov $a1, $t1 + sbb $t4, $t4 + + sub 8*0($a_ptr), $a0 + mov $a2, $t2 + sbb 8*1($a_ptr), $a1 + sbb 8*2($a_ptr), $a2 + mov $a3, $t3 + sbb 8*3($a_ptr), $a3 + test $t4, $t4 + + cmovz $t0, $a0 + cmovz $t1, $a1 + mov $a0, 8*0($r_ptr) + cmovz $t2, $a2 + mov $a1, 8*1($r_ptr) + cmovz $t3, $a3 + mov $a2, 8*2($r_ptr) + mov $a3, 8*3($r_ptr) + + pop %r13 + pop %r12 + ret +.size ecp_nistz256_mul_by_2,.-ecp_nistz256_mul_by_2 + +################################################################################ +# void ecp_nistz256_div_by_2(uint64_t res[4], uint64_t a[4]); +.globl ecp_nistz256_div_by_2 +.type ecp_nistz256_div_by_2,\@function,2 +.align 32 +ecp_nistz256_div_by_2: + push %r12 + push %r13 + + mov 8*0($a_ptr), $a0 + mov 8*1($a_ptr), $a1 + mov 8*2($a_ptr), $a2 + mov $a0, $t0 + mov 8*3($a_ptr), $a3 + lea .Lpoly(%rip), $a_ptr + + mov $a1, $t1 + xor $t4, $t4 + add 8*0($a_ptr), $a0 + mov $a2, $t2 + adc 8*1($a_ptr), $a1 + adc 8*2($a_ptr), $a2 + mov $a3, $t3 + adc 8*3($a_ptr), $a3 + adc \$0, $t4 + xor $a_ptr, $a_ptr # borrow $a_ptr + test \$1, $t0 + + cmovz $t0, $a0 + cmovz $t1, $a1 + cmovz $t2, $a2 + cmovz $t3, $a3 + cmovz $a_ptr, $t4 + + mov $a1, $t0 # a0:a3>>1 + shr \$1, $a0 + shl \$63, $t0 + mov $a2, $t1 + shr \$1, $a1 + or $t0, $a0 + shl \$63, $t1 + mov $a3, $t2 + shr \$1, $a2 + or $t1, $a1 + shl \$63, $t2 + shr \$1, $a3 + shl \$63, $t4 + or $t2, $a2 + or $t4, $a3 + + mov $a0, 8*0($r_ptr) + mov $a1, 8*1($r_ptr) + mov $a2, 8*2($r_ptr) + mov $a3, 8*3($r_ptr) + + pop %r13 + pop %r12 + ret +.size ecp_nistz256_div_by_2,.-ecp_nistz256_div_by_2 + +################################################################################ +# void ecp_nistz256_mul_by_3(uint64_t res[4], uint64_t a[4]); +.globl ecp_nistz256_mul_by_3 +.type ecp_nistz256_mul_by_3,\@function,2 +.align 32 +ecp_nistz256_mul_by_3: + push %r12 + push %r13 + + mov 8*0($a_ptr), $a0 + xor $t4, $t4 + mov 8*1($a_ptr), $a1 + add $a0, $a0 # a0:a3+a0:a3 + mov 8*2($a_ptr), $a2 + adc $a1, $a1 + mov 8*3($a_ptr), $a3 + mov $a0, $t0 + adc $a2, $a2 + adc $a3, $a3 + mov $a1, $t1 + adc \$0, $t4 + + sub \$-1, $a0 + mov $a2, $t2 + sbb .Lpoly+8*1(%rip), $a1 + sbb \$0, $a2 + mov $a3, $t3 + sbb .Lpoly+8*3(%rip), $a3 + test $t4, $t4 + + cmovz $t0, $a0 + cmovz $t1, $a1 + cmovz $t2, $a2 + cmovz $t3, $a3 + + xor $t4, $t4 + add 8*0($a_ptr), $a0 # a0:a3+=a_ptr[0:3] + adc 8*1($a_ptr), $a1 + mov $a0, $t0 + adc 8*2($a_ptr), $a2 + adc 8*3($a_ptr), $a3 + mov $a1, $t1 + adc \$0, $t4 + + sub \$-1, $a0 + mov $a2, $t2 + sbb .Lpoly+8*1(%rip), $a1 + sbb \$0, $a2 + mov $a3, $t3 + sbb .Lpoly+8*3(%rip), $a3 + test $t4, $t4 + + cmovz $t0, $a0 + cmovz $t1, $a1 + mov $a0, 8*0($r_ptr) + cmovz $t2, $a2 + mov $a1, 8*1($r_ptr) + cmovz $t3, $a3 + mov $a2, 8*2($r_ptr) + mov $a3, 8*3($r_ptr) + + pop %r13 + pop %r12 + ret +.size ecp_nistz256_mul_by_3,.-ecp_nistz256_mul_by_3 + +################################################################################ +# void ecp_nistz256_add(uint64_t res[4], uint64_t a[4], uint64_t b[4]); +.globl ecp_nistz256_add +.type ecp_nistz256_add,\@function,3 +.align 32 +ecp_nistz256_add: + push %r12 + push %r13 + + mov 8*0($a_ptr), $a0 + xor $t4, $t4 + mov 8*1($a_ptr), $a1 + mov 8*2($a_ptr), $a2 + mov 8*3($a_ptr), $a3 + lea .Lpoly(%rip), $a_ptr + + add 8*0($b_ptr), $a0 + adc 8*1($b_ptr), $a1 + mov $a0, $t0 + adc 8*2($b_ptr), $a2 + adc 8*3($b_ptr), $a3 + mov $a1, $t1 + adc \$0, $t4 + + sub 8*0($a_ptr), $a0 + mov $a2, $t2 + sbb 8*1($a_ptr), $a1 + sbb 8*2($a_ptr), $a2 + mov $a3, $t3 + sbb 8*3($a_ptr), $a3 + test $t4, $t4 + + cmovz $t0, $a0 + cmovz $t1, $a1 + mov $a0, 8*0($r_ptr) + cmovz $t2, $a2 + mov $a1, 8*1($r_ptr) + cmovz $t3, $a3 + mov $a2, 8*2($r_ptr) + mov $a3, 8*3($r_ptr) + + pop %r13 + pop %r12 + ret +.size ecp_nistz256_add,.-ecp_nistz256_add + +################################################################################ +# void ecp_nistz256_sub(uint64_t res[4], uint64_t a[4], uint64_t b[4]); +.globl ecp_nistz256_sub +.type ecp_nistz256_sub,\@function,3 +.align 32 +ecp_nistz256_sub: + push %r12 + push %r13 + + mov 8*0($a_ptr), $a0 + xor $t4, $t4 + mov 8*1($a_ptr), $a1 + mov 8*2($a_ptr), $a2 + mov 8*3($a_ptr), $a3 + lea .Lpoly(%rip), $a_ptr + + sub 8*0($b_ptr), $a0 + sbb 8*1($b_ptr), $a1 + mov $a0, $t0 + sbb 8*2($b_ptr), $a2 + sbb 8*3($b_ptr), $a3 + mov $a1, $t1 + sbb \$0, $t4 + + add 8*0($a_ptr), $a0 + mov $a2, $t2 + adc 8*1($a_ptr), $a1 + adc 8*2($a_ptr), $a2 + mov $a3, $t3 + adc 8*3($a_ptr), $a3 + test $t4, $t4 + + cmovz $t0, $a0 + cmovz $t1, $a1 + mov $a0, 8*0($r_ptr) + cmovz $t2, $a2 + mov $a1, 8*1($r_ptr) + cmovz $t3, $a3 + mov $a2, 8*2($r_ptr) + mov $a3, 8*3($r_ptr) + + pop %r13 + pop %r12 + ret +.size ecp_nistz256_sub,.-ecp_nistz256_sub + +################################################################################ +# void ecp_nistz256_neg(uint64_t res[4], uint64_t a[4]); +.globl ecp_nistz256_neg +.type ecp_nistz256_neg,\@function,2 +.align 32 +ecp_nistz256_neg: + push %r12 + push %r13 + + xor $a0, $a0 + xor $a1, $a1 + xor $a2, $a2 + xor $a3, $a3 + xor $t4, $t4 + + sub 8*0($a_ptr), $a0 + sbb 8*1($a_ptr), $a1 + sbb 8*2($a_ptr), $a2 + mov $a0, $t0 + sbb 8*3($a_ptr), $a3 + lea .Lpoly(%rip), $a_ptr + mov $a1, $t1 + sbb \$0, $t4 + + add 8*0($a_ptr), $a0 + mov $a2, $t2 + adc 8*1($a_ptr), $a1 + adc 8*2($a_ptr), $a2 + mov $a3, $t3 + adc 8*3($a_ptr), $a3 + test $t4, $t4 + + cmovz $t0, $a0 + cmovz $t1, $a1 + mov $a0, 8*0($r_ptr) + cmovz $t2, $a2 + mov $a1, 8*1($r_ptr) + cmovz $t3, $a3 + mov $a2, 8*2($r_ptr) + mov $a3, 8*3($r_ptr) + + pop %r13 + pop %r12 + ret +.size ecp_nistz256_neg,.-ecp_nistz256_neg +___ +} +{ +my ($r_ptr,$a_ptr,$b_org,$b_ptr)=("%rdi","%rsi","%rdx","%rbx"); +my ($acc0,$acc1,$acc2,$acc3,$acc4,$acc5,$acc6,$acc7)=map("%r$_",(8..15)); +my ($t0,$t1,$t2,$t3,$t4)=("%rcx","%rbp","%rbx","%rdx","%rax"); +my ($poly1,$poly3)=($acc6,$acc7); + +$code.=<<___; +################################################################################ +# void ecp_nistz256_to_mont( +# uint64_t res[4], +# uint64_t in[4]); +.globl ecp_nistz256_to_mont +.type ecp_nistz256_to_mont,\@function,2 +.align 32 +ecp_nistz256_to_mont: +___ +$code.=<<___ if ($addx); + mov \$0x80100, %ecx + and OPENSSL_ia32cap_P+8(%rip), %ecx +___ +$code.=<<___; + lea .LRR(%rip), $b_org + jmp .Lmul_mont +.size ecp_nistz256_to_mont,.-ecp_nistz256_to_mont + +################################################################################ +# void ecp_nistz256_mul_mont( +# uint64_t res[4], +# uint64_t a[4], +# uint64_t b[4]); + +.globl ecp_nistz256_mul_mont +.type ecp_nistz256_mul_mont,\@function,3 +.align 32 +ecp_nistz256_mul_mont: +___ +$code.=<<___ if ($addx); + mov \$0x80100, %ecx + and OPENSSL_ia32cap_P+8(%rip), %ecx +___ +$code.=<<___; +.Lmul_mont: + push %rbp + push %rbx + push %r12 + push %r13 + push %r14 + push %r15 +___ +$code.=<<___ if ($addx); + cmp \$0x80100, %ecx + je .Lmul_montx +___ +$code.=<<___; + mov $b_org, $b_ptr + mov 8*0($b_org), %rax + mov 8*0($a_ptr), $acc1 + mov 8*1($a_ptr), $acc2 + mov 8*2($a_ptr), $acc3 + mov 8*3($a_ptr), $acc4 + + call __ecp_nistz256_mul_montq +___ +$code.=<<___ if ($addx); + jmp .Lmul_mont_done + +.align 32 +.Lmul_montx: + mov $b_org, $b_ptr + mov 8*0($b_org), %rdx + mov 8*0($a_ptr), $acc1 + mov 8*1($a_ptr), $acc2 + mov 8*2($a_ptr), $acc3 + mov 8*3($a_ptr), $acc4 + lea -128($a_ptr), $a_ptr # control u-op density + + call __ecp_nistz256_mul_montx +___ +$code.=<<___; +.Lmul_mont_done: + pop %r15 + pop %r14 + pop %r13 + pop %r12 + pop %rbx + pop %rbp + ret +.size ecp_nistz256_mul_mont,.-ecp_nistz256_mul_mont + +.type __ecp_nistz256_mul_montq,\@abi-omnipotent +.align 32 +__ecp_nistz256_mul_montq: + ######################################################################## + # Multiply a by b[0] + mov %rax, $t1 + mulq $acc1 + mov .Lpoly+8*1(%rip),$poly1 + mov %rax, $acc0 + mov $t1, %rax + mov %rdx, $acc1 + + mulq $acc2 + mov .Lpoly+8*3(%rip),$poly3 + add %rax, $acc1 + mov $t1, %rax + adc \$0, %rdx + mov %rdx, $acc2 + + mulq $acc3 + add %rax, $acc2 + mov $t1, %rax + adc \$0, %rdx + mov %rdx, $acc3 + + mulq $acc4 + add %rax, $acc3 + mov $acc0, %rax + adc \$0, %rdx + xor $acc5, $acc5 + mov %rdx, $acc4 + + ######################################################################## + # First reduction step + # Basically now we want to multiply acc[0] by p256, + # and add the result to the acc. + # Due to the special form of p256 we do some optimizations + # + # acc[0] x p256[0..1] = acc[0] x 2^96 - acc[0] + # then we add acc[0] and get acc[0] x 2^96 + + mov $acc0, $t1 + shl \$32, $acc0 + mulq $poly3 + shr \$32, $t1 + add $acc0, $acc1 # +=acc[0]<<96 + adc $t1, $acc2 + adc %rax, $acc3 + mov 8*1($b_ptr), %rax + adc %rdx, $acc4 + adc \$0, $acc5 + xor $acc0, $acc0 + + ######################################################################## + # Multiply by b[1] + mov %rax, $t1 + mulq 8*0($a_ptr) + add %rax, $acc1 + mov $t1, %rax + adc \$0, %rdx + mov %rdx, $t0 + + mulq 8*1($a_ptr) + add $t0, $acc2 + adc \$0, %rdx + add %rax, $acc2 + mov $t1, %rax + adc \$0, %rdx + mov %rdx, $t0 + + mulq 8*2($a_ptr) + add $t0, $acc3 + adc \$0, %rdx + add %rax, $acc3 + mov $t1, %rax + adc \$0, %rdx + mov %rdx, $t0 + + mulq 8*3($a_ptr) + add $t0, $acc4 + adc \$0, %rdx + add %rax, $acc4 + mov $acc1, %rax + adc %rdx, $acc5 + adc \$0, $acc0 + + ######################################################################## + # Second reduction step + mov $acc1, $t1 + shl \$32, $acc1 + mulq $poly3 + shr \$32, $t1 + add $acc1, $acc2 + adc $t1, $acc3 + adc %rax, $acc4 + mov 8*2($b_ptr), %rax + adc %rdx, $acc5 + adc \$0, $acc0 + xor $acc1, $acc1 + + ######################################################################## + # Multiply by b[2] + mov %rax, $t1 + mulq 8*0($a_ptr) + add %rax, $acc2 + mov $t1, %rax + adc \$0, %rdx + mov %rdx, $t0 + + mulq 8*1($a_ptr) + add $t0, $acc3 + adc \$0, %rdx + add %rax, $acc3 + mov $t1, %rax + adc \$0, %rdx + mov %rdx, $t0 + + mulq 8*2($a_ptr) + add $t0, $acc4 + adc \$0, %rdx + add %rax, $acc4 + mov $t1, %rax + adc \$0, %rdx + mov %rdx, $t0 + + mulq 8*3($a_ptr) + add $t0, $acc5 + adc \$0, %rdx + add %rax, $acc5 + mov $acc2, %rax + adc %rdx, $acc0 + adc \$0, $acc1 + + ######################################################################## + # Third reduction step + mov $acc2, $t1 + shl \$32, $acc2 + mulq $poly3 + shr \$32, $t1 + add $acc2, $acc3 + adc $t1, $acc4 + adc %rax, $acc5 + mov 8*3($b_ptr), %rax + adc %rdx, $acc0 + adc \$0, $acc1 + xor $acc2, $acc2 + + ######################################################################## + # Multiply by b[3] + mov %rax, $t1 + mulq 8*0($a_ptr) + add %rax, $acc3 + mov $t1, %rax + adc \$0, %rdx + mov %rdx, $t0 + + mulq 8*1($a_ptr) + add $t0, $acc4 + adc \$0, %rdx + add %rax, $acc4 + mov $t1, %rax + adc \$0, %rdx + mov %rdx, $t0 + + mulq 8*2($a_ptr) + add $t0, $acc5 + adc \$0, %rdx + add %rax, $acc5 + mov $t1, %rax + adc \$0, %rdx + mov %rdx, $t0 + + mulq 8*3($a_ptr) + add $t0, $acc0 + adc \$0, %rdx + add %rax, $acc0 + mov $acc3, %rax + adc %rdx, $acc1 + adc \$0, $acc2 + + ######################################################################## + # Final reduction step + mov $acc3, $t1 + shl \$32, $acc3 + mulq $poly3 + shr \$32, $t1 + add $acc3, $acc4 + adc $t1, $acc5 + mov $acc4, $t0 + adc %rax, $acc0 + adc %rdx, $acc1 + mov $acc5, $t1 + adc \$0, $acc2 + + ######################################################################## + # Branch-less conditional subtraction of P + sub \$-1, $acc4 # .Lpoly[0] + mov $acc0, $t2 + sbb $poly1, $acc5 # .Lpoly[1] + sbb \$0, $acc0 # .Lpoly[2] + mov $acc1, $t3 + sbb $poly3, $acc1 # .Lpoly[3] + sbb \$0, $acc2 + + cmovc $t0, $acc4 + cmovc $t1, $acc5 + mov $acc4, 8*0($r_ptr) + cmovc $t2, $acc0 + mov $acc5, 8*1($r_ptr) + cmovc $t3, $acc1 + mov $acc0, 8*2($r_ptr) + mov $acc1, 8*3($r_ptr) + + ret +.size __ecp_nistz256_mul_montq,.-__ecp_nistz256_mul_montq + +################################################################################ +# void ecp_nistz256_sqr_mont( +# uint64_t res[4], +# uint64_t a[4]); + +# we optimize the square according to S.Gueron and V.Krasnov, +# "Speeding up Big-Number Squaring" +.globl ecp_nistz256_sqr_mont +.type ecp_nistz256_sqr_mont,\@function,2 +.align 32 +ecp_nistz256_sqr_mont: +___ +$code.=<<___ if ($addx); + mov \$0x80100, %ecx + and OPENSSL_ia32cap_P+8(%rip), %ecx +___ +$code.=<<___; + push %rbp + push %rbx + push %r12 + push %r13 + push %r14 + push %r15 +___ +$code.=<<___ if ($addx); + cmp \$0x80100, %ecx + je .Lsqr_montx +___ +$code.=<<___; + mov 8*0($a_ptr), %rax + mov 8*1($a_ptr), $acc6 + mov 8*2($a_ptr), $acc7 + mov 8*3($a_ptr), $acc0 + + call __ecp_nistz256_sqr_montq +___ +$code.=<<___ if ($addx); + jmp .Lsqr_mont_done + +.align 32 +.Lsqr_montx: + mov 8*0($a_ptr), %rdx + mov 8*1($a_ptr), $acc6 + mov 8*2($a_ptr), $acc7 + mov 8*3($a_ptr), $acc0 + lea -128($a_ptr), $a_ptr # control u-op density + + call __ecp_nistz256_sqr_montx +___ +$code.=<<___; +.Lsqr_mont_done: + pop %r15 + pop %r14 + pop %r13 + pop %r12 + pop %rbx + pop %rbp + ret +.size ecp_nistz256_sqr_mont,.-ecp_nistz256_sqr_mont + +.type __ecp_nistz256_sqr_montq,\@abi-omnipotent +.align 32 +__ecp_nistz256_sqr_montq: + mov %rax, $acc5 + mulq $acc6 # a[1]*a[0] + mov %rax, $acc1 + mov $acc7, %rax + mov %rdx, $acc2 + + mulq $acc5 # a[0]*a[2] + add %rax, $acc2 + mov $acc0, %rax + adc \$0, %rdx + mov %rdx, $acc3 + + mulq $acc5 # a[0]*a[3] + add %rax, $acc3 + mov $acc7, %rax + adc \$0, %rdx + mov %rdx, $acc4 + + ################################# + mulq $acc6 # a[1]*a[2] + add %rax, $acc3 + mov $acc0, %rax + adc \$0, %rdx + mov %rdx, $t1 + + mulq $acc6 # a[1]*a[3] + add %rax, $acc4 + mov $acc0, %rax + adc \$0, %rdx + add $t1, $acc4 + mov %rdx, $acc5 + adc \$0, $acc5 + + ################################# + mulq $acc7 # a[2]*a[3] + xor $acc7, $acc7 + add %rax, $acc5 + mov 8*0($a_ptr), %rax + mov %rdx, $acc6 + adc \$0, $acc6 + + add $acc1, $acc1 # acc1:6<<1 + adc $acc2, $acc2 + adc $acc3, $acc3 + adc $acc4, $acc4 + adc $acc5, $acc5 + adc $acc6, $acc6 + adc \$0, $acc7 + + mulq %rax + mov %rax, $acc0 + mov 8*1($a_ptr), %rax + mov %rdx, $t0 + + mulq %rax + add $t0, $acc1 + adc %rax, $acc2 + mov 8*2($a_ptr), %rax + adc \$0, %rdx + mov %rdx, $t0 + + mulq %rax + add $t0, $acc3 + adc %rax, $acc4 + mov 8*3($a_ptr), %rax + adc \$0, %rdx + mov %rdx, $t0 + + mulq %rax + add $t0, $acc5 + adc %rax, $acc6 + mov $acc0, %rax + adc %rdx, $acc7 + + mov .Lpoly+8*1(%rip), $a_ptr + mov .Lpoly+8*3(%rip), $t1 + + ########################################## + # Now the reduction + # First iteration + mov $acc0, $t0 + shl \$32, $acc0 + mulq $t1 + shr \$32, $t0 + add $acc0, $acc1 # +=acc[0]<<96 + adc $t0, $acc2 + adc %rax, $acc3 + mov $acc1, %rax + adc \$0, %rdx + + ########################################## + # Second iteration + mov $acc1, $t0 + shl \$32, $acc1 + mov %rdx, $acc0 + mulq $t1 + shr \$32, $t0 + add $acc1, $acc2 + adc $t0, $acc3 + adc %rax, $acc0 + mov $acc2, %rax + adc \$0, %rdx + + ########################################## + # Third iteration + mov $acc2, $t0 + shl \$32, $acc2 + mov %rdx, $acc1 + mulq $t1 + shr \$32, $t0 + add $acc2, $acc3 + adc $t0, $acc0 + adc %rax, $acc1 + mov $acc3, %rax + adc \$0, %rdx + + ########################################### + # Last iteration + mov $acc3, $t0 + shl \$32, $acc3 + mov %rdx, $acc2 + mulq $t1 + shr \$32, $t0 + add $acc3, $acc0 + adc $t0, $acc1 + adc %rax, $acc2 + adc \$0, %rdx + xor $acc3, $acc3 + + ############################################ + # Add the rest of the acc + add $acc0, $acc4 + adc $acc1, $acc5 + mov $acc4, $acc0 + adc $acc2, $acc6 + adc %rdx, $acc7 + mov $acc5, $acc1 + adc \$0, $acc3 + + sub \$-1, $acc4 # .Lpoly[0] + mov $acc6, $acc2 + sbb $a_ptr, $acc5 # .Lpoly[1] + sbb \$0, $acc6 # .Lpoly[2] + mov $acc7, $t0 + sbb $t1, $acc7 # .Lpoly[3] + sbb \$0, $acc3 + + cmovc $acc0, $acc4 + cmovc $acc1, $acc5 + mov $acc4, 8*0($r_ptr) + cmovc $acc2, $acc6 + mov $acc5, 8*1($r_ptr) + cmovc $t0, $acc7 + mov $acc6, 8*2($r_ptr) + mov $acc7, 8*3($r_ptr) + + ret +.size __ecp_nistz256_sqr_montq,.-__ecp_nistz256_sqr_montq +___ + +if ($addx) { +$code.=<<___; +.type __ecp_nistz256_mul_montx,\@abi-omnipotent +.align 32 +__ecp_nistz256_mul_montx: + ######################################################################## + # Multiply by b[0] + mulx $acc1, $acc0, $acc1 + mulx $acc2, $t0, $acc2 + mov \$32, $poly1 + xor $acc5, $acc5 # cf=0 + mulx $acc3, $t1, $acc3 + mov .Lpoly+8*3(%rip), $poly3 + adc $t0, $acc1 + mulx $acc4, $t0, $acc4 + mov $acc0, %rdx + adc $t1, $acc2 + shlx $poly1,$acc0,$t1 + adc $t0, $acc3 + shrx $poly1,$acc0,$t0 + adc \$0, $acc4 + + ######################################################################## + # First reduction step + add $t1, $acc1 + adc $t0, $acc2 + + mulx $poly3, $t0, $t1 + mov 8*1($b_ptr), %rdx + adc $t0, $acc3 + adc $t1, $acc4 + adc \$0, $acc5 + xor $acc0, $acc0 # $acc0=0,cf=0,of=0 + + ######################################################################## + # Multiply by b[1] + mulx 8*0+128($a_ptr), $t0, $t1 + adcx $t0, $acc1 + adox $t1, $acc2 + + mulx 8*1+128($a_ptr), $t0, $t1 + adcx $t0, $acc2 + adox $t1, $acc3 + + mulx 8*2+128($a_ptr), $t0, $t1 + adcx $t0, $acc3 + adox $t1, $acc4 + + mulx 8*3+128($a_ptr), $t0, $t1 + mov $acc1, %rdx + adcx $t0, $acc4 + shlx $poly1, $acc1, $t0 + adox $t1, $acc5 + shrx $poly1, $acc1, $t1 + + adcx $acc0, $acc5 + adox $acc0, $acc0 + adc \$0, $acc0 + + ######################################################################## + # Second reduction step + add $t0, $acc2 + adc $t1, $acc3 + + mulx $poly3, $t0, $t1 + mov 8*2($b_ptr), %rdx + adc $t0, $acc4 + adc $t1, $acc5 + adc \$0, $acc0 + xor $acc1 ,$acc1 # $acc1=0,cf=0,of=0 + + ######################################################################## + # Multiply by b[2] + mulx 8*0+128($a_ptr), $t0, $t1 + adcx $t0, $acc2 + adox $t1, $acc3 + + mulx 8*1+128($a_ptr), $t0, $t1 + adcx $t0, $acc3 + adox $t1, $acc4 + + mulx 8*2+128($a_ptr), $t0, $t1 + adcx $t0, $acc4 + adox $t1, $acc5 + + mulx 8*3+128($a_ptr), $t0, $t1 + mov $acc2, %rdx + adcx $t0, $acc5 + shlx $poly1, $acc2, $t0 + adox $t1, $acc0 + shrx $poly1, $acc2, $t1 + + adcx $acc1, $acc0 + adox $acc1, $acc1 + adc \$0, $acc1 + + ######################################################################## + # Third reduction step + add $t0, $acc3 + adc $t1, $acc4 + + mulx $poly3, $t0, $t1 + mov 8*3($b_ptr), %rdx + adc $t0, $acc5 + adc $t1, $acc0 + adc \$0, $acc1 + xor $acc2, $acc2 # $acc2=0,cf=0,of=0 + + ######################################################################## + # Multiply by b[3] + mulx 8*0+128($a_ptr), $t0, $t1 + adcx $t0, $acc3 + adox $t1, $acc4 + + mulx 8*1+128($a_ptr), $t0, $t1 + adcx $t0, $acc4 + adox $t1, $acc5 + + mulx 8*2+128($a_ptr), $t0, $t1 + adcx $t0, $acc5 + adox $t1, $acc0 + + mulx 8*3+128($a_ptr), $t0, $t1 + mov $acc3, %rdx + adcx $t0, $acc0 + shlx $poly1, $acc3, $t0 + adox $t1, $acc1 + shrx $poly1, $acc3, $t1 + + adcx $acc2, $acc1 + adox $acc2, $acc2 + adc \$0, $acc2 + + ######################################################################## + # Fourth reduction step + add $t0, $acc4 + adc $t1, $acc5 + + mulx $poly3, $t0, $t1 + mov $acc4, $t2 + mov .Lpoly+8*1(%rip), $poly1 + adc $t0, $acc0 + mov $acc5, $t3 + adc $t1, $acc1 + adc \$0, $acc2 + + ######################################################################## + # Branch-less conditional subtraction of P + xor %eax, %eax + mov $acc0, $t0 + sbb \$-1, $acc4 # .Lpoly[0] + sbb $poly1, $acc5 # .Lpoly[1] + sbb \$0, $acc0 # .Lpoly[2] + mov $acc1, $t1 + sbb $poly3, $acc1 # .Lpoly[3] + sbb \$0, $acc2 + + cmovc $t2, $acc4 + cmovc $t3, $acc5 + mov $acc4, 8*0($r_ptr) + cmovc $t0, $acc0 + mov $acc5, 8*1($r_ptr) + cmovc $t1, $acc1 + mov $acc0, 8*2($r_ptr) + mov $acc1, 8*3($r_ptr) + + ret +.size __ecp_nistz256_mul_montx,.-__ecp_nistz256_mul_montx + +.type __ecp_nistz256_sqr_montx,\@abi-omnipotent +.align 32 +__ecp_nistz256_sqr_montx: + mulx $acc6, $acc1, $acc2 # a[0]*a[1] + mulx $acc7, $t0, $acc3 # a[0]*a[2] + xor %eax, %eax + adc $t0, $acc2 + mulx $acc0, $t1, $acc4 # a[0]*a[3] + mov $acc6, %rdx + adc $t1, $acc3 + adc \$0, $acc4 + xor $acc5, $acc5 # $acc5=0,cf=0,of=0 + + ################################# + mulx $acc7, $t0, $t1 # a[1]*a[2] + adcx $t0, $acc3 + adox $t1, $acc4 + + mulx $acc0, $t0, $t1 # a[1]*a[3] + mov $acc7, %rdx + adcx $t0, $acc4 + adox $t1, $acc5 + adc \$0, $acc5 + + ################################# + mulx $acc0, $t0, $acc6 # a[2]*a[3] + mov 8*0+128($a_ptr), %rdx + xor $acc7, $acc7 # $acc7=0,cf=0,of=0 + adcx $acc1, $acc1 # acc1:6<<1 + adox $t0, $acc5 + adcx $acc2, $acc2 + adox $acc7, $acc6 # of=0 + + mulx %rdx, $acc0, $t1 + mov 8*1+128($a_ptr), %rdx + adcx $acc3, $acc3 + adox $t1, $acc1 + adcx $acc4, $acc4 + mulx %rdx, $t0, $t4 + mov 8*2+128($a_ptr), %rdx + adcx $acc5, $acc5 + adox $t0, $acc2 + adcx $acc6, $acc6 + .byte 0x67 + mulx %rdx, $t0, $t1 + mov 8*3+128($a_ptr), %rdx + adox $t4, $acc3 + adcx $acc7, $acc7 + adox $t0, $acc4 + mov \$32, $a_ptr + adox $t1, $acc5 + .byte 0x67,0x67 + mulx %rdx, $t0, $t4 + mov $acc0, %rdx + adox $t0, $acc6 + shlx $a_ptr, $acc0, $t0 + adox $t4, $acc7 + shrx $a_ptr, $acc0, $t4 + mov .Lpoly+8*3(%rip), $t1 + + # reduction step 1 + add $t0, $acc1 + adc $t4, $acc2 + + mulx $t1, $t0, $acc0 + mov $acc1, %rdx + adc $t0, $acc3 + shlx $a_ptr, $acc1, $t0 + adc \$0, $acc0 + shrx $a_ptr, $acc1, $t4 + + # reduction step 2 + add $t0, $acc2 + adc $t4, $acc3 + + mulx $t1, $t0, $acc1 + mov $acc2, %rdx + adc $t0, $acc0 + shlx $a_ptr, $acc2, $t0 + adc \$0, $acc1 + shrx $a_ptr, $acc2, $t4 + + # reduction step 3 + add $t0, $acc3 + adc $t4, $acc0 + + mulx $t1, $t0, $acc2 + mov $acc3, %rdx + adc $t0, $acc1 + shlx $a_ptr, $acc3, $t0 + adc \$0, $acc2 + shrx $a_ptr, $acc3, $t4 + + # reduction step 4 + add $t0, $acc0 + adc $t4, $acc1 + + mulx $t1, $t0, $acc3 + adc $t0, $acc2 + adc \$0, $acc3 + + xor $t3, $t3 # cf=0 + adc $acc0, $acc4 # accumulate upper half + mov .Lpoly+8*1(%rip), $a_ptr + adc $acc1, $acc5 + mov $acc4, $acc0 + adc $acc2, $acc6 + adc $acc3, $acc7 + mov $acc5, $acc1 + adc \$0, $t3 + + xor %eax, %eax # cf=0 + sbb \$-1, $acc4 # .Lpoly[0] + mov $acc6, $acc2 + sbb $a_ptr, $acc5 # .Lpoly[1] + sbb \$0, $acc6 # .Lpoly[2] + mov $acc7, $acc3 + sbb $t1, $acc7 # .Lpoly[3] + sbb \$0, $t3 + + cmovc $acc0, $acc4 + cmovc $acc1, $acc5 + mov $acc4, 8*0($r_ptr) + cmovc $acc2, $acc6 + mov $acc5, 8*1($r_ptr) + cmovc $acc3, $acc7 + mov $acc6, 8*2($r_ptr) + mov $acc7, 8*3($r_ptr) + + ret +.size __ecp_nistz256_sqr_montx,.-__ecp_nistz256_sqr_montx +___ +} +} +{ +my ($r_ptr,$in_ptr)=("%rdi","%rsi"); +my ($acc0,$acc1,$acc2,$acc3)=map("%r$_",(8..11)); +my ($t0,$t1,$t2)=("%rcx","%r12","%r13"); + +$code.=<<___; +################################################################################ +# void ecp_nistz256_from_mont( +# uint64_t res[4], +# uint64_t in[4]); +# This one performs Montgomery multiplication by 1, so we only need the reduction + +.globl ecp_nistz256_from_mont +.type ecp_nistz256_from_mont,\@function,2 +.align 32 +ecp_nistz256_from_mont: + push %r12 + push %r13 + + mov 8*0($in_ptr), %rax + mov .Lpoly+8*3(%rip), $t2 + mov 8*1($in_ptr), $acc1 + mov 8*2($in_ptr), $acc2 + mov 8*3($in_ptr), $acc3 + mov %rax, $acc0 + mov .Lpoly+8*1(%rip), $t1 + + ######################################### + # First iteration + mov %rax, $t0 + shl \$32, $acc0 + mulq $t2 + shr \$32, $t0 + add $acc0, $acc1 + adc $t0, $acc2 + adc %rax, $acc3 + mov $acc1, %rax + adc \$0, %rdx + + ######################################### + # Second iteration + mov $acc1, $t0 + shl \$32, $acc1 + mov %rdx, $acc0 + mulq $t2 + shr \$32, $t0 + add $acc1, $acc2 + adc $t0, $acc3 + adc %rax, $acc0 + mov $acc2, %rax + adc \$0, %rdx + + ########################################## + # Third iteration + mov $acc2, $t0 + shl \$32, $acc2 + mov %rdx, $acc1 + mulq $t2 + shr \$32, $t0 + add $acc2, $acc3 + adc $t0, $acc0 + adc %rax, $acc1 + mov $acc3, %rax + adc \$0, %rdx + + ########################################### + # Last iteration + mov $acc3, $t0 + shl \$32, $acc3 + mov %rdx, $acc2 + mulq $t2 + shr \$32, $t0 + add $acc3, $acc0 + adc $t0, $acc1 + mov $acc0, $t0 + adc %rax, $acc2 + mov $acc1, $in_ptr + adc \$0, %rdx + + ########################################### + # Branch-less conditional subtraction + sub \$-1, $acc0 + mov $acc2, %rax + sbb $t1, $acc1 + sbb \$0, $acc2 + mov %rdx, $acc3 + sbb $t2, %rdx + sbb $t2, $t2 + + cmovnz $t0, $acc0 + cmovnz $in_ptr, $acc1 + mov $acc0, 8*0($r_ptr) + cmovnz %rax, $acc2 + mov $acc1, 8*1($r_ptr) + cmovz %rdx, $acc3 + mov $acc2, 8*2($r_ptr) + mov $acc3, 8*3($r_ptr) + + pop %r13 + pop %r12 + ret +.size ecp_nistz256_from_mont,.-ecp_nistz256_from_mont +___ +} +{ +my ($val,$in_t,$index)=$win64?("%rcx","%rdx","%r8d"):("%rdi","%rsi","%edx"); +my ($ONE,$INDEX,$Ra,$Rb,$Rc,$Rd,$Re,$Rf)=map("%xmm$_",(0..7)); +my ($M0,$T0a,$T0b,$T0c,$T0d,$T0e,$T0f,$TMP0)=map("%xmm$_",(8..15)); +my ($M1,$T2a,$T2b,$TMP2,$M2,$T2a,$T2b,$TMP2)=map("%xmm$_",(8..15)); + +$code.=<<___; +################################################################################ +# void ecp_nistz256_select_w5(uint64_t *val, uint64_t *in_t, int index); +.globl ecp_nistz256_select_w5 +.type ecp_nistz256_select_w5,\@abi-omnipotent +.align 32 +ecp_nistz256_select_w5: +___ +$code.=<<___ if ($avx>1); + mov OPENSSL_ia32cap_P+8(%rip), %eax + test \$`1<<5`, %eax + jnz .Lavx2_select_w5 +___ +$code.=<<___ if ($win64); + lea -0x88(%rsp), %rax +.LSEH_begin_ecp_nistz256_select_w5: + .byte 0x48,0x8d,0x60,0xe0 #lea -0x20(%rax), %rsp + .byte 0x0f,0x29,0x70,0xe0 #movaps %xmm6, -0x20(%rax) + .byte 0x0f,0x29,0x78,0xf0 #movaps %xmm7, -0x10(%rax) + .byte 0x44,0x0f,0x29,0x00 #movaps %xmm8, 0(%rax) + .byte 0x44,0x0f,0x29,0x48,0x10 #movaps %xmm9, 0x10(%rax) + .byte 0x44,0x0f,0x29,0x50,0x20 #movaps %xmm10, 0x20(%rax) + .byte 0x44,0x0f,0x29,0x58,0x30 #movaps %xmm11, 0x30(%rax) + .byte 0x44,0x0f,0x29,0x60,0x40 #movaps %xmm12, 0x40(%rax) + .byte 0x44,0x0f,0x29,0x68,0x50 #movaps %xmm13, 0x50(%rax) + .byte 0x44,0x0f,0x29,0x70,0x60 #movaps %xmm14, 0x60(%rax) + .byte 0x44,0x0f,0x29,0x78,0x70 #movaps %xmm15, 0x70(%rax) +___ +$code.=<<___; + movdqa .LOne(%rip), $ONE + movd $index, $INDEX + + pxor $Ra, $Ra + pxor $Rb, $Rb + pxor $Rc, $Rc + pxor $Rd, $Rd + pxor $Re, $Re + pxor $Rf, $Rf + + movdqa $ONE, $M0 + pshufd \$0, $INDEX, $INDEX + + mov \$16, %rax +.Lselect_loop_sse_w5: + + movdqa $M0, $TMP0 + paddd $ONE, $M0 + pcmpeqd $INDEX, $TMP0 + + movdqa 16*0($in_t), $T0a + movdqa 16*1($in_t), $T0b + movdqa 16*2($in_t), $T0c + movdqa 16*3($in_t), $T0d + movdqa 16*4($in_t), $T0e + movdqa 16*5($in_t), $T0f + lea 16*6($in_t), $in_t + + pand $TMP0, $T0a + pand $TMP0, $T0b + por $T0a, $Ra + pand $TMP0, $T0c + por $T0b, $Rb + pand $TMP0, $T0d + por $T0c, $Rc + pand $TMP0, $T0e + por $T0d, $Rd + pand $TMP0, $T0f + por $T0e, $Re + por $T0f, $Rf + + dec %rax + jnz .Lselect_loop_sse_w5 + + movdqu $Ra, 16*0($val) + movdqu $Rb, 16*1($val) + movdqu $Rc, 16*2($val) + movdqu $Rd, 16*3($val) + movdqu $Re, 16*4($val) + movdqu $Rf, 16*5($val) +___ +$code.=<<___ if ($win64); + movaps (%rsp), %xmm6 + movaps 0x10(%rsp), %xmm7 + movaps 0x20(%rsp), %xmm8 + movaps 0x30(%rsp), %xmm9 + movaps 0x40(%rsp), %xmm10 + movaps 0x50(%rsp), %xmm11 + movaps 0x60(%rsp), %xmm12 + movaps 0x70(%rsp), %xmm13 + movaps 0x80(%rsp), %xmm14 + movaps 0x90(%rsp), %xmm15 + lea 0xa8(%rsp), %rsp +.LSEH_end_ecp_nistz256_select_w5: +___ +$code.=<<___; + ret +.size ecp_nistz256_select_w5,.-ecp_nistz256_select_w5 + +################################################################################ +# void ecp_nistz256_select_w7(uint64_t *val, uint64_t *in_t, int index); +.globl ecp_nistz256_select_w7 +.type ecp_nistz256_select_w7,\@abi-omnipotent +.align 32 +ecp_nistz256_select_w7: +___ +$code.=<<___ if ($avx>1); + mov OPENSSL_ia32cap_P+8(%rip), %eax + test \$`1<<5`, %eax + jnz .Lavx2_select_w7 +___ +$code.=<<___ if ($win64); + lea -0x88(%rsp), %rax +.LSEH_begin_ecp_nistz256_select_w7: + .byte 0x48,0x8d,0x60,0xe0 #lea -0x20(%rax), %rsp + .byte 0x0f,0x29,0x70,0xe0 #movaps %xmm6, -0x20(%rax) + .byte 0x0f,0x29,0x78,0xf0 #movaps %xmm7, -0x10(%rax) + .byte 0x44,0x0f,0x29,0x00 #movaps %xmm8, 0(%rax) + .byte 0x44,0x0f,0x29,0x48,0x10 #movaps %xmm9, 0x10(%rax) + .byte 0x44,0x0f,0x29,0x50,0x20 #movaps %xmm10, 0x20(%rax) + .byte 0x44,0x0f,0x29,0x58,0x30 #movaps %xmm11, 0x30(%rax) + .byte 0x44,0x0f,0x29,0x60,0x40 #movaps %xmm12, 0x40(%rax) + .byte 0x44,0x0f,0x29,0x68,0x50 #movaps %xmm13, 0x50(%rax) + .byte 0x44,0x0f,0x29,0x70,0x60 #movaps %xmm14, 0x60(%rax) + .byte 0x44,0x0f,0x29,0x78,0x70 #movaps %xmm15, 0x70(%rax) +___ +$code.=<<___; + movdqa .LOne(%rip), $M0 + movd $index, $INDEX + + pxor $Ra, $Ra + pxor $Rb, $Rb + pxor $Rc, $Rc + pxor $Rd, $Rd + + movdqa $M0, $ONE + pshufd \$0, $INDEX, $INDEX + mov \$64, %rax + +.Lselect_loop_sse_w7: + movdqa $M0, $TMP0 + paddd $ONE, $M0 + movdqa 16*0($in_t), $T0a + movdqa 16*1($in_t), $T0b + pcmpeqd $INDEX, $TMP0 + movdqa 16*2($in_t), $T0c + movdqa 16*3($in_t), $T0d + lea 16*4($in_t), $in_t + + pand $TMP0, $T0a + pand $TMP0, $T0b + por $T0a, $Ra + pand $TMP0, $T0c + por $T0b, $Rb + pand $TMP0, $T0d + por $T0c, $Rc + prefetcht0 255($in_t) + por $T0d, $Rd + + dec %rax + jnz .Lselect_loop_sse_w7 + + movdqu $Ra, 16*0($val) + movdqu $Rb, 16*1($val) + movdqu $Rc, 16*2($val) + movdqu $Rd, 16*3($val) +___ +$code.=<<___ if ($win64); + movaps (%rsp), %xmm6 + movaps 0x10(%rsp), %xmm7 + movaps 0x20(%rsp), %xmm8 + movaps 0x30(%rsp), %xmm9 + movaps 0x40(%rsp), %xmm10 + movaps 0x50(%rsp), %xmm11 + movaps 0x60(%rsp), %xmm12 + movaps 0x70(%rsp), %xmm13 + movaps 0x80(%rsp), %xmm14 + movaps 0x90(%rsp), %xmm15 + lea 0xa8(%rsp), %rsp +.LSEH_end_ecp_nistz256_select_w7: +___ +$code.=<<___; + ret +.size ecp_nistz256_select_w7,.-ecp_nistz256_select_w7 +___ +} +if ($avx>1) { +my ($val,$in_t,$index)=$win64?("%rcx","%rdx","%r8d"):("%rdi","%rsi","%edx"); +my ($TWO,$INDEX,$Ra,$Rb,$Rc)=map("%ymm$_",(0..4)); +my ($M0,$T0a,$T0b,$T0c,$TMP0)=map("%ymm$_",(5..9)); +my ($M1,$T1a,$T1b,$T1c,$TMP1)=map("%ymm$_",(10..14)); + +$code.=<<___; +################################################################################ +# void ecp_nistz256_avx2_select_w5(uint64_t *val, uint64_t *in_t, int index); +.type ecp_nistz256_avx2_select_w5,\@abi-omnipotent +.align 32 +ecp_nistz256_avx2_select_w5: +.Lavx2_select_w5: + vzeroupper +___ +$code.=<<___ if ($win64); + lea -0x88(%rsp), %rax +.LSEH_begin_ecp_nistz256_avx2_select_w5: + .byte 0x48,0x8d,0x60,0xe0 #lea -0x20(%rax), %rsp + .byte 0xc5,0xf8,0x29,0x70,0xe0 #vmovaps %xmm6, -0x20(%rax) + .byte 0xc5,0xf8,0x29,0x78,0xf0 #vmovaps %xmm7, -0x10(%rax) + .byte 0xc5,0x78,0x29,0x40,0x00 #vmovaps %xmm8, 8(%rax) + .byte 0xc5,0x78,0x29,0x48,0x10 #vmovaps %xmm9, 0x10(%rax) + .byte 0xc5,0x78,0x29,0x50,0x20 #vmovaps %xmm10, 0x20(%rax) + .byte 0xc5,0x78,0x29,0x58,0x30 #vmovaps %xmm11, 0x30(%rax) + .byte 0xc5,0x78,0x29,0x60,0x40 #vmovaps %xmm12, 0x40(%rax) + .byte 0xc5,0x78,0x29,0x68,0x50 #vmovaps %xmm13, 0x50(%rax) + .byte 0xc5,0x78,0x29,0x70,0x60 #vmovaps %xmm14, 0x60(%rax) + .byte 0xc5,0x78,0x29,0x78,0x70 #vmovaps %xmm15, 0x70(%rax) +___ +$code.=<<___; + vmovdqa .LTwo(%rip), $TWO + + vpxor $Ra, $Ra, $Ra + vpxor $Rb, $Rb, $Rb + vpxor $Rc, $Rc, $Rc + + vmovdqa .LOne(%rip), $M0 + vmovdqa .LTwo(%rip), $M1 + + vmovd $index, %xmm1 + vpermd $INDEX, $Ra, $INDEX + + mov \$8, %rax +.Lselect_loop_avx2_w5: + + vmovdqa 32*0($in_t), $T0a + vmovdqa 32*1($in_t), $T0b + vmovdqa 32*2($in_t), $T0c + + vmovdqa 32*3($in_t), $T1a + vmovdqa 32*4($in_t), $T1b + vmovdqa 32*5($in_t), $T1c + + vpcmpeqd $INDEX, $M0, $TMP0 + vpcmpeqd $INDEX, $M1, $TMP1 + + vpaddd $TWO, $M0, $M0 + vpaddd $TWO, $M1, $M1 + lea 32*6($in_t), $in_t + + vpand $TMP0, $T0a, $T0a + vpand $TMP0, $T0b, $T0b + vpand $TMP0, $T0c, $T0c + vpand $TMP1, $T1a, $T1a + vpand $TMP1, $T1b, $T1b + vpand $TMP1, $T1c, $T1c + + vpxor $T0a, $Ra, $Ra + vpxor $T0b, $Rb, $Rb + vpxor $T0c, $Rc, $Rc + vpxor $T1a, $Ra, $Ra + vpxor $T1b, $Rb, $Rb + vpxor $T1c, $Rc, $Rc + + dec %rax + jnz .Lselect_loop_avx2_w5 + + vmovdqu $Ra, 32*0($val) + vmovdqu $Rb, 32*1($val) + vmovdqu $Rc, 32*2($val) + vzeroupper +___ +$code.=<<___ if ($win64); + movaps (%rsp), %xmm6 + movaps 0x10(%rsp), %xmm7 + movaps 0x20(%rsp), %xmm8 + movaps 0x30(%rsp), %xmm9 + movaps 0x40(%rsp), %xmm10 + movaps 0x50(%rsp), %xmm11 + movaps 0x60(%rsp), %xmm12 + movaps 0x70(%rsp), %xmm13 + movaps 0x80(%rsp), %xmm14 + movaps 0x90(%rsp), %xmm15 + lea 0xa8(%rsp), %rsp +.LSEH_end_ecp_nistz256_avx2_select_w5: +___ +$code.=<<___; + ret +.size ecp_nistz256_avx2_select_w5,.-ecp_nistz256_avx2_select_w5 +___ +} +if ($avx>1) { +my ($val,$in_t,$index)=$win64?("%rcx","%rdx","%r8d"):("%rdi","%rsi","%edx"); +my ($THREE,$INDEX,$Ra,$Rb)=map("%ymm$_",(0..3)); +my ($M0,$T0a,$T0b,$TMP0)=map("%ymm$_",(4..7)); +my ($M1,$T1a,$T1b,$TMP1)=map("%ymm$_",(8..11)); +my ($M2,$T2a,$T2b,$TMP2)=map("%ymm$_",(12..15)); + +$code.=<<___; + +################################################################################ +# void ecp_nistz256_avx2_select_w7(uint64_t *val, uint64_t *in_t, int index); +.globl ecp_nistz256_avx2_select_w7 +.type ecp_nistz256_avx2_select_w7,\@abi-omnipotent +.align 32 +ecp_nistz256_avx2_select_w7: +.Lavx2_select_w7: + vzeroupper +___ +$code.=<<___ if ($win64); + lea -0x88(%rsp), %rax +.LSEH_begin_ecp_nistz256_avx2_select_w7: + .byte 0x48,0x8d,0x60,0xe0 #lea -0x20(%rax), %rsp + .byte 0xc5,0xf8,0x29,0x70,0xe0 #vmovaps %xmm6, -0x20(%rax) + .byte 0xc5,0xf8,0x29,0x78,0xf0 #vmovaps %xmm7, -0x10(%rax) + .byte 0xc5,0x78,0x29,0x40,0x00 #vmovaps %xmm8, 8(%rax) + .byte 0xc5,0x78,0x29,0x48,0x10 #vmovaps %xmm9, 0x10(%rax) + .byte 0xc5,0x78,0x29,0x50,0x20 #vmovaps %xmm10, 0x20(%rax) + .byte 0xc5,0x78,0x29,0x58,0x30 #vmovaps %xmm11, 0x30(%rax) + .byte 0xc5,0x78,0x29,0x60,0x40 #vmovaps %xmm12, 0x40(%rax) + .byte 0xc5,0x78,0x29,0x68,0x50 #vmovaps %xmm13, 0x50(%rax) + .byte 0xc5,0x78,0x29,0x70,0x60 #vmovaps %xmm14, 0x60(%rax) + .byte 0xc5,0x78,0x29,0x78,0x70 #vmovaps %xmm15, 0x70(%rax) +___ +$code.=<<___; + vmovdqa .LThree(%rip), $THREE + + vpxor $Ra, $Ra, $Ra + vpxor $Rb, $Rb, $Rb + + vmovdqa .LOne(%rip), $M0 + vmovdqa .LTwo(%rip), $M1 + vmovdqa .LThree(%rip), $M2 + + vmovd $index, %xmm1 + vpermd $INDEX, $Ra, $INDEX + # Skip index = 0, because it is implicitly the point at infinity + + mov \$21, %rax +.Lselect_loop_avx2_w7: + + vmovdqa 32*0($in_t), $T0a + vmovdqa 32*1($in_t), $T0b + + vmovdqa 32*2($in_t), $T1a + vmovdqa 32*3($in_t), $T1b + + vmovdqa 32*4($in_t), $T2a + vmovdqa 32*5($in_t), $T2b + + vpcmpeqd $INDEX, $M0, $TMP0 + vpcmpeqd $INDEX, $M1, $TMP1 + vpcmpeqd $INDEX, $M2, $TMP2 + + vpaddd $THREE, $M0, $M0 + vpaddd $THREE, $M1, $M1 + vpaddd $THREE, $M2, $M2 + lea 32*6($in_t), $in_t + + vpand $TMP0, $T0a, $T0a + vpand $TMP0, $T0b, $T0b + vpand $TMP1, $T1a, $T1a + vpand $TMP1, $T1b, $T1b + vpand $TMP2, $T2a, $T2a + vpand $TMP2, $T2b, $T2b + + vpxor $T0a, $Ra, $Ra + vpxor $T0b, $Rb, $Rb + vpxor $T1a, $Ra, $Ra + vpxor $T1b, $Rb, $Rb + vpxor $T2a, $Ra, $Ra + vpxor $T2b, $Rb, $Rb + + dec %rax + jnz .Lselect_loop_avx2_w7 + + + vmovdqa 32*0($in_t), $T0a + vmovdqa 32*1($in_t), $T0b + + vpcmpeqd $INDEX, $M0, $TMP0 + + vpand $TMP0, $T0a, $T0a + vpand $TMP0, $T0b, $T0b + + vpxor $T0a, $Ra, $Ra + vpxor $T0b, $Rb, $Rb + + vmovdqu $Ra, 32*0($val) + vmovdqu $Rb, 32*1($val) + vzeroupper +___ +$code.=<<___ if ($win64); + movaps (%rsp), %xmm6 + movaps 0x10(%rsp), %xmm7 + movaps 0x20(%rsp), %xmm8 + movaps 0x30(%rsp), %xmm9 + movaps 0x40(%rsp), %xmm10 + movaps 0x50(%rsp), %xmm11 + movaps 0x60(%rsp), %xmm12 + movaps 0x70(%rsp), %xmm13 + movaps 0x80(%rsp), %xmm14 + movaps 0x90(%rsp), %xmm15 + lea 0xa8(%rsp), %rsp +.LSEH_end_ecp_nistz256_avx2_select_w7: +___ +$code.=<<___; + ret +.size ecp_nistz256_avx2_select_w7,.-ecp_nistz256_avx2_select_w7 +___ +} else { +$code.=<<___; +.globl ecp_nistz256_avx2_select_w7 +.type ecp_nistz256_avx2_select_w7,\@function,3 +.align 32 +ecp_nistz256_avx2_select_w7: + .byte 0x0f,0x0b # ud2 + ret +.size ecp_nistz256_avx2_select_w7,.-ecp_nistz256_avx2_select_w7 +___ +} +{{{ +######################################################################## +# This block implements higher level point_double, point_add and +# point_add_affine. The key to performance in this case is to allow +# out-of-order execution logic to overlap computations from next step +# with tail processing from current step. By using tailored calling +# sequence we minimize inter-step overhead to give processor better +# shot at overlapping operations... +# +# You will notice that input data is copied to stack. Trouble is that +# there are no registers to spare for holding original pointers and +# reloading them, pointers, would create undesired dependencies on +# effective addresses calculation paths. In other words it's too done +# to favour out-of-order execution logic. +# + +my ($r_ptr,$a_ptr,$b_org,$b_ptr)=("%rdi","%rsi","%rdx","%rbx"); +my ($acc0,$acc1,$acc2,$acc3,$acc4,$acc5,$acc6,$acc7)=map("%r$_",(8..15)); +my ($t0,$t1,$t2,$t3,$t4)=("%rax","%rbp","%rcx",$acc4,$acc4); +my ($poly1,$poly3)=($acc6,$acc7); + +sub load_for_mul () { +my ($a,$b,$src0) = @_; +my $bias = $src0 eq "%rax" ? 0 : -128; + +" mov $b, $src0 + lea $b, $b_ptr + mov 8*0+$a, $acc1 + mov 8*1+$a, $acc2 + lea $bias+$a, $a_ptr + mov 8*2+$a, $acc3 + mov 8*3+$a, $acc4" +} + +sub load_for_sqr () { +my ($a,$src0) = @_; +my $bias = $src0 eq "%rax" ? 0 : -128; + +" mov 8*0+$a, $src0 + mov 8*1+$a, $acc6 + lea $bias+$a, $a_ptr + mov 8*2+$a, $acc7 + mov 8*3+$a, $acc0" +} + + { +######################################################################## +# operate in 4-5-0-1 "name space" that matches multiplication output +# +my ($a0,$a1,$a2,$a3,$t3,$t4)=($acc4,$acc5,$acc0,$acc1,$acc2,$acc3); + +$code.=<<___; +.type __ecp_nistz256_add_toq,\@abi-omnipotent +.align 32 +__ecp_nistz256_add_toq: + add 8*0($b_ptr), $a0 + adc 8*1($b_ptr), $a1 + mov $a0, $t0 + adc 8*2($b_ptr), $a2 + adc 8*3($b_ptr), $a3 + mov $a1, $t1 + sbb $t4, $t4 + + sub \$-1, $a0 + mov $a2, $t2 + sbb $poly1, $a1 + sbb \$0, $a2 + mov $a3, $t3 + sbb $poly3, $a3 + test $t4, $t4 + + cmovz $t0, $a0 + cmovz $t1, $a1 + mov $a0, 8*0($r_ptr) + cmovz $t2, $a2 + mov $a1, 8*1($r_ptr) + cmovz $t3, $a3 + mov $a2, 8*2($r_ptr) + mov $a3, 8*3($r_ptr) + + ret +.size __ecp_nistz256_add_toq,.-__ecp_nistz256_add_toq + +.type __ecp_nistz256_sub_fromq,\@abi-omnipotent +.align 32 +__ecp_nistz256_sub_fromq: + sub 8*0($b_ptr), $a0 + sbb 8*1($b_ptr), $a1 + mov $a0, $t0 + sbb 8*2($b_ptr), $a2 + sbb 8*3($b_ptr), $a3 + mov $a1, $t1 + sbb $t4, $t4 + + add \$-1, $a0 + mov $a2, $t2 + adc $poly1, $a1 + adc \$0, $a2 + mov $a3, $t3 + adc $poly3, $a3 + test $t4, $t4 + + cmovz $t0, $a0 + cmovz $t1, $a1 + mov $a0, 8*0($r_ptr) + cmovz $t2, $a2 + mov $a1, 8*1($r_ptr) + cmovz $t3, $a3 + mov $a2, 8*2($r_ptr) + mov $a3, 8*3($r_ptr) + + ret +.size __ecp_nistz256_sub_fromq,.-__ecp_nistz256_sub_fromq + +.type __ecp_nistz256_subq,\@abi-omnipotent +.align 32 +__ecp_nistz256_subq: + sub $a0, $t0 + sbb $a1, $t1 + mov $t0, $a0 + sbb $a2, $t2 + sbb $a3, $t3 + mov $t1, $a1 + sbb $t4, $t4 + + add \$-1, $t0 + mov $t2, $a2 + adc $poly1, $t1 + adc \$0, $t2 + mov $t3, $a3 + adc $poly3, $t3 + test $t4, $t4 + + cmovnz $t0, $a0 + cmovnz $t1, $a1 + cmovnz $t2, $a2 + cmovnz $t3, $a3 + + ret +.size __ecp_nistz256_subq,.-__ecp_nistz256_subq + +.type __ecp_nistz256_mul_by_2q,\@abi-omnipotent +.align 32 +__ecp_nistz256_mul_by_2q: + add $a0, $a0 # a0:a3+a0:a3 + adc $a1, $a1 + mov $a0, $t0 + adc $a2, $a2 + adc $a3, $a3 + mov $a1, $t1 + sbb $t4, $t4 + + sub \$-1, $a0 + mov $a2, $t2 + sbb $poly1, $a1 + sbb \$0, $a2 + mov $a3, $t3 + sbb $poly3, $a3 + test $t4, $t4 + + cmovz $t0, $a0 + cmovz $t1, $a1 + mov $a0, 8*0($r_ptr) + cmovz $t2, $a2 + mov $a1, 8*1($r_ptr) + cmovz $t3, $a3 + mov $a2, 8*2($r_ptr) + mov $a3, 8*3($r_ptr) + + ret +.size __ecp_nistz256_mul_by_2q,.-__ecp_nistz256_mul_by_2q +___ + } +sub gen_double () { + my $x = shift; + my ($src0,$sfx,$bias); + my ($S,$M,$Zsqr,$in_x,$tmp0)=map(32*$_,(0..4)); + + if ($x ne "x") { + $src0 = "%rax"; + $sfx = ""; + $bias = 0; + +$code.=<<___; +.globl ecp_nistz256_point_double +.type ecp_nistz256_point_double,\@function,2 +.align 32 +ecp_nistz256_point_double: +___ +$code.=<<___ if ($addx); + mov \$0x80100, %ecx + and OPENSSL_ia32cap_P+8(%rip), %ecx + cmp \$0x80100, %ecx + je .Lpoint_doublex +___ + } else { + $src0 = "%rdx"; + $sfx = "x"; + $bias = 128; + +$code.=<<___; +.type ecp_nistz256_point_doublex,\@function,2 +.align 32 +ecp_nistz256_point_doublex: +.Lpoint_doublex: +___ + } +$code.=<<___; + push %rbp + push %rbx + push %r12 + push %r13 + push %r14 + push %r15 + sub \$32*5+8, %rsp + + movdqu 0x00($a_ptr), %xmm0 # copy *(P256_POINT *)$a_ptr.x + mov $a_ptr, $b_ptr # backup copy + movdqu 0x10($a_ptr), %xmm1 + mov 0x20+8*0($a_ptr), $acc4 # load in_y in "5-4-0-1" order + mov 0x20+8*1($a_ptr), $acc5 + mov 0x20+8*2($a_ptr), $acc0 + mov 0x20+8*3($a_ptr), $acc1 + mov .Lpoly+8*1(%rip), $poly1 + mov .Lpoly+8*3(%rip), $poly3 + movdqa %xmm0, $in_x(%rsp) + movdqa %xmm1, $in_x+0x10(%rsp) + lea 0x20($r_ptr), $acc2 + lea 0x40($r_ptr), $acc3 + movq $r_ptr, %xmm0 + movq $acc2, %xmm1 + movq $acc3, %xmm2 + + lea $S(%rsp), $r_ptr + call __ecp_nistz256_mul_by_2$x # p256_mul_by_2(S, in_y); + + mov 0x40+8*0($a_ptr), $src0 + mov 0x40+8*1($a_ptr), $acc6 + mov 0x40+8*2($a_ptr), $acc7 + mov 0x40+8*3($a_ptr), $acc0 + lea 0x40-$bias($a_ptr), $a_ptr + lea $Zsqr(%rsp), $r_ptr + call __ecp_nistz256_sqr_mont$x # p256_sqr_mont(Zsqr, in_z); + + `&load_for_sqr("$S(%rsp)", "$src0")` + lea $S(%rsp), $r_ptr + call __ecp_nistz256_sqr_mont$x # p256_sqr_mont(S, S); + + mov 0x20($b_ptr), $src0 # $b_ptr is still valid + mov 0x40+8*0($b_ptr), $acc1 + mov 0x40+8*1($b_ptr), $acc2 + mov 0x40+8*2($b_ptr), $acc3 + mov 0x40+8*3($b_ptr), $acc4 + lea 0x40-$bias($b_ptr), $a_ptr + lea 0x20($b_ptr), $b_ptr + movq %xmm2, $r_ptr + call __ecp_nistz256_mul_mont$x # p256_mul_mont(res_z, in_z, in_y); + call __ecp_nistz256_mul_by_2$x # p256_mul_by_2(res_z, res_z); + + mov $in_x+8*0(%rsp), $acc4 # "5-4-0-1" order + mov $in_x+8*1(%rsp), $acc5 + lea $Zsqr(%rsp), $b_ptr + mov $in_x+8*2(%rsp), $acc0 + mov $in_x+8*3(%rsp), $acc1 + lea $M(%rsp), $r_ptr + call __ecp_nistz256_add_to$x # p256_add(M, in_x, Zsqr); + + mov $in_x+8*0(%rsp), $acc4 # "5-4-0-1" order + mov $in_x+8*1(%rsp), $acc5 + lea $Zsqr(%rsp), $b_ptr + mov $in_x+8*2(%rsp), $acc0 + mov $in_x+8*3(%rsp), $acc1 + lea $Zsqr(%rsp), $r_ptr + call __ecp_nistz256_sub_from$x # p256_sub(Zsqr, in_x, Zsqr); + + `&load_for_sqr("$S(%rsp)", "$src0")` + movq %xmm1, $r_ptr + call __ecp_nistz256_sqr_mont$x # p256_sqr_mont(res_y, S); +___ +{ +######## ecp_nistz256_div_by_2(res_y, res_y); ########################## +# operate in 4-5-6-7 "name space" that matches squaring output +# +my ($poly1,$poly3)=($a_ptr,$t1); +my ($a0,$a1,$a2,$a3,$t3,$t4,$t1)=($acc4,$acc5,$acc6,$acc7,$acc0,$acc1,$acc2); + +$code.=<<___; + xor $t4, $t4 + mov $a0, $t0 + add \$-1, $a0 + mov $a1, $t1 + adc $poly1, $a1 + mov $a2, $t2 + adc \$0, $a2 + mov $a3, $t3 + adc $poly3, $a3 + adc \$0, $t4 + xor $a_ptr, $a_ptr # borrow $a_ptr + test \$1, $t0 + + cmovz $t0, $a0 + cmovz $t1, $a1 + cmovz $t2, $a2 + cmovz $t3, $a3 + cmovz $a_ptr, $t4 + + mov $a1, $t0 # a0:a3>>1 + shr \$1, $a0 + shl \$63, $t0 + mov $a2, $t1 + shr \$1, $a1 + or $t0, $a0 + shl \$63, $t1 + mov $a3, $t2 + shr \$1, $a2 + or $t1, $a1 + shl \$63, $t2 + mov $a0, 8*0($r_ptr) + shr \$1, $a3 + mov $a1, 8*1($r_ptr) + shl \$63, $t4 + or $t2, $a2 + or $t4, $a3 + mov $a2, 8*2($r_ptr) + mov $a3, 8*3($r_ptr) +___ +} +$code.=<<___; + `&load_for_mul("$M(%rsp)", "$Zsqr(%rsp)", "$src0")` + lea $M(%rsp), $r_ptr + call __ecp_nistz256_mul_mont$x # p256_mul_mont(M, M, Zsqr); + + lea $tmp0(%rsp), $r_ptr + call __ecp_nistz256_mul_by_2$x + + lea $M(%rsp), $b_ptr + lea $M(%rsp), $r_ptr + call __ecp_nistz256_add_to$x # p256_mul_by_3(M, M); + + `&load_for_mul("$S(%rsp)", "$in_x(%rsp)", "$src0")` + lea $S(%rsp), $r_ptr + call __ecp_nistz256_mul_mont$x # p256_mul_mont(S, S, in_x); + + lea $tmp0(%rsp), $r_ptr + call __ecp_nistz256_mul_by_2$x # p256_mul_by_2(tmp0, S); + + `&load_for_sqr("$M(%rsp)", "$src0")` + movq %xmm0, $r_ptr + call __ecp_nistz256_sqr_mont$x # p256_sqr_mont(res_x, M); + + lea $tmp0(%rsp), $b_ptr + mov $acc6, $acc0 # harmonize sqr output and sub input + mov $acc7, $acc1 + mov $a_ptr, $poly1 + mov $t1, $poly3 + call __ecp_nistz256_sub_from$x # p256_sub(res_x, res_x, tmp0); + + mov $S+8*0(%rsp), $t0 + mov $S+8*1(%rsp), $t1 + mov $S+8*2(%rsp), $t2 + mov $S+8*3(%rsp), $acc2 # "4-5-0-1" order + lea $S(%rsp), $r_ptr + call __ecp_nistz256_sub$x # p256_sub(S, S, res_x); + + mov $M(%rsp), $src0 + lea $M(%rsp), $b_ptr + mov $acc4, $acc6 # harmonize sub output and mul input + xor %ecx, %ecx + mov $acc4, $S+8*0(%rsp) # have to save:-( + mov $acc5, $acc2 + mov $acc5, $S+8*1(%rsp) + cmovz $acc0, $acc3 + mov $acc0, $S+8*2(%rsp) + lea $S-$bias(%rsp), $a_ptr + cmovz $acc1, $acc4 + mov $acc1, $S+8*3(%rsp) + mov $acc6, $acc1 + lea $S(%rsp), $r_ptr + call __ecp_nistz256_mul_mont$x # p256_mul_mont(S, S, M); + + movq %xmm1, $b_ptr + movq %xmm1, $r_ptr + call __ecp_nistz256_sub_from$x # p256_sub(res_y, S, res_y); + + add \$32*5+8, %rsp + pop %r15 + pop %r14 + pop %r13 + pop %r12 + pop %rbx + pop %rbp + ret +.size ecp_nistz256_point_double$sfx,.-ecp_nistz256_point_double$sfx +___ +} +&gen_double("q"); + +sub gen_add () { + my $x = shift; + my ($src0,$sfx,$bias); + my ($H,$Hsqr,$R,$Rsqr,$Hcub, + $U1,$U2,$S1,$S2, + $res_x,$res_y,$res_z, + $in1_x,$in1_y,$in1_z, + $in2_x,$in2_y,$in2_z)=map(32*$_,(0..17)); + my ($Z1sqr, $Z2sqr) = ($Hsqr, $Rsqr); + + if ($x ne "x") { + $src0 = "%rax"; + $sfx = ""; + $bias = 0; + +$code.=<<___; +.globl ecp_nistz256_point_add +.type ecp_nistz256_point_add,\@function,3 +.align 32 +ecp_nistz256_point_add: +___ +$code.=<<___ if ($addx); + mov \$0x80100, %ecx + and OPENSSL_ia32cap_P+8(%rip), %ecx + cmp \$0x80100, %ecx + je .Lpoint_addx +___ + } else { + $src0 = "%rdx"; + $sfx = "x"; + $bias = 128; + +$code.=<<___; +.type ecp_nistz256_point_addx,\@function,3 +.align 32 +ecp_nistz256_point_addx: +.Lpoint_addx: +___ + } +$code.=<<___; + push %rbp + push %rbx + push %r12 + push %r13 + push %r14 + push %r15 + sub \$32*18+8, %rsp + + movdqu 0x00($a_ptr), %xmm0 # copy *(P256_POINT *)$a_ptr + movdqu 0x10($a_ptr), %xmm1 + movdqu 0x20($a_ptr), %xmm2 + movdqu 0x30($a_ptr), %xmm3 + movdqu 0x40($a_ptr), %xmm4 + movdqu 0x50($a_ptr), %xmm5 + mov $a_ptr, $b_ptr # reassign + mov $b_org, $a_ptr # reassign + movdqa %xmm0, $in1_x(%rsp) + movdqa %xmm1, $in1_x+0x10(%rsp) + por %xmm0, %xmm1 + movdqa %xmm2, $in1_y(%rsp) + movdqa %xmm3, $in1_y+0x10(%rsp) + por %xmm2, %xmm3 + movdqa %xmm4, $in1_z(%rsp) + movdqa %xmm5, $in1_z+0x10(%rsp) + por %xmm1, %xmm3 + + movdqu 0x00($a_ptr), %xmm0 # copy *(P256_POINT *)$b_ptr + pshufd \$0xb1, %xmm3, %xmm5 + movdqu 0x10($a_ptr), %xmm1 + movdqu 0x20($a_ptr), %xmm2 + por %xmm3, %xmm5 + movdqu 0x30($a_ptr), %xmm3 + mov 0x40+8*0($a_ptr), $src0 # load original in2_z + mov 0x40+8*1($a_ptr), $acc6 + mov 0x40+8*2($a_ptr), $acc7 + mov 0x40+8*3($a_ptr), $acc0 + movdqa %xmm0, $in2_x(%rsp) + pshufd \$0x1e, %xmm5, %xmm4 + movdqa %xmm1, $in2_x+0x10(%rsp) + por %xmm0, %xmm1 + movq $r_ptr, %xmm0 # save $r_ptr + movdqa %xmm2, $in2_y(%rsp) + movdqa %xmm3, $in2_y+0x10(%rsp) + por %xmm2, %xmm3 + por %xmm4, %xmm5 + pxor %xmm4, %xmm4 + por %xmm1, %xmm3 + + lea 0x40-$bias($a_ptr), $a_ptr # $a_ptr is still valid + mov $src0, $in2_z+8*0(%rsp) # make in2_z copy + mov $acc6, $in2_z+8*1(%rsp) + mov $acc7, $in2_z+8*2(%rsp) + mov $acc0, $in2_z+8*3(%rsp) + lea $Z2sqr(%rsp), $r_ptr # Z2^2 + call __ecp_nistz256_sqr_mont$x # p256_sqr_mont(Z2sqr, in2_z); + + pcmpeqd %xmm4, %xmm5 + pshufd \$0xb1, %xmm3, %xmm4 + por %xmm3, %xmm4 + pshufd \$0, %xmm5, %xmm5 # in1infty + pshufd \$0x1e, %xmm4, %xmm3 + por %xmm3, %xmm4 + pxor %xmm3, %xmm3 + pcmpeqd %xmm3, %xmm4 + pshufd \$0, %xmm4, %xmm4 # in2infty + mov 0x40+8*0($b_ptr), $src0 # load original in1_z + mov 0x40+8*1($b_ptr), $acc6 + mov 0x40+8*2($b_ptr), $acc7 + mov 0x40+8*3($b_ptr), $acc0 + + lea 0x40-$bias($b_ptr), $a_ptr + lea $Z1sqr(%rsp), $r_ptr # Z1^2 + call __ecp_nistz256_sqr_mont$x # p256_sqr_mont(Z1sqr, in1_z); + + `&load_for_mul("$Z2sqr(%rsp)", "$in2_z(%rsp)", "$src0")` + lea $S1(%rsp), $r_ptr # S1 = Z2^3 + call __ecp_nistz256_mul_mont$x # p256_mul_mont(S1, Z2sqr, in2_z); + + `&load_for_mul("$Z1sqr(%rsp)", "$in1_z(%rsp)", "$src0")` + lea $S2(%rsp), $r_ptr # S2 = Z1^3 + call __ecp_nistz256_mul_mont$x # p256_mul_mont(S2, Z1sqr, in1_z); + + `&load_for_mul("$S1(%rsp)", "$in1_y(%rsp)", "$src0")` + lea $S1(%rsp), $r_ptr # S1 = Y1*Z2^3 + call __ecp_nistz256_mul_mont$x # p256_mul_mont(S1, S1, in1_y); + + `&load_for_mul("$S2(%rsp)", "$in2_y(%rsp)", "$src0")` + lea $S2(%rsp), $r_ptr # S2 = Y2*Z1^3 + call __ecp_nistz256_mul_mont$x # p256_mul_mont(S2, S2, in2_y); + + lea $S1(%rsp), $b_ptr + lea $R(%rsp), $r_ptr # R = S2 - S1 + call __ecp_nistz256_sub_from$x # p256_sub(R, S2, S1); + + or $acc5, $acc4 # see if result is zero + movdqa %xmm4, %xmm2 + or $acc0, $acc4 + or $acc1, $acc4 + por %xmm5, %xmm2 # in1infty || in2infty + movq $acc4, %xmm3 + + `&load_for_mul("$Z2sqr(%rsp)", "$in1_x(%rsp)", "$src0")` + lea $U1(%rsp), $r_ptr # U1 = X1*Z2^2 + call __ecp_nistz256_mul_mont$x # p256_mul_mont(U1, in1_x, Z2sqr); + + `&load_for_mul("$Z1sqr(%rsp)", "$in2_x(%rsp)", "$src0")` + lea $U2(%rsp), $r_ptr # U2 = X2*Z1^2 + call __ecp_nistz256_mul_mont$x # p256_mul_mont(U2, in2_x, Z1sqr); + + lea $U1(%rsp), $b_ptr + lea $H(%rsp), $r_ptr # H = U2 - U1 + call __ecp_nistz256_sub_from$x # p256_sub(H, U2, U1); + + or $acc5, $acc4 # see if result is zero + or $acc0, $acc4 + or $acc1, $acc4 + + .byte 0x3e # predict taken + jnz .Ladd_proceed$x # is_equal(U1,U2)? + movq %xmm2, $acc0 + movq %xmm3, $acc1 + test $acc0, $acc0 + jnz .Ladd_proceed$x # (in1infty || in2infty)? + test $acc1, $acc1 + jz .Ladd_proceed$x # is_equal(S1,S2)? + + movq %xmm0, $r_ptr # restore $r_ptr + pxor %xmm0, %xmm0 + movdqu %xmm0, 0x00($r_ptr) + movdqu %xmm0, 0x10($r_ptr) + movdqu %xmm0, 0x20($r_ptr) + movdqu %xmm0, 0x30($r_ptr) + movdqu %xmm0, 0x40($r_ptr) + movdqu %xmm0, 0x50($r_ptr) + jmp .Ladd_done$x + +.align 32 +.Ladd_proceed$x: + `&load_for_sqr("$R(%rsp)", "$src0")` + lea $Rsqr(%rsp), $r_ptr # R^2 + call __ecp_nistz256_sqr_mont$x # p256_sqr_mont(Rsqr, R); + + `&load_for_mul("$H(%rsp)", "$in1_z(%rsp)", "$src0")` + lea $res_z(%rsp), $r_ptr # Z3 = H*Z1*Z2 + call __ecp_nistz256_mul_mont$x # p256_mul_mont(res_z, H, in1_z); + + `&load_for_sqr("$H(%rsp)", "$src0")` + lea $Hsqr(%rsp), $r_ptr # H^2 + call __ecp_nistz256_sqr_mont$x # p256_sqr_mont(Hsqr, H); + + `&load_for_mul("$res_z(%rsp)", "$in2_z(%rsp)", "$src0")` + lea $res_z(%rsp), $r_ptr # Z3 = H*Z1*Z2 + call __ecp_nistz256_mul_mont$x # p256_mul_mont(res_z, res_z, in2_z); + + `&load_for_mul("$Hsqr(%rsp)", "$H(%rsp)", "$src0")` + lea $Hcub(%rsp), $r_ptr # H^3 + call __ecp_nistz256_mul_mont$x # p256_mul_mont(Hcub, Hsqr, H); + + `&load_for_mul("$Hsqr(%rsp)", "$U1(%rsp)", "$src0")` + lea $U2(%rsp), $r_ptr # U1*H^2 + call __ecp_nistz256_mul_mont$x # p256_mul_mont(U2, U1, Hsqr); +___ +{ +####################################################################### +# operate in 4-5-0-1 "name space" that matches multiplication output +# +my ($acc0,$acc1,$acc2,$acc3,$t3,$t4)=($acc4,$acc5,$acc0,$acc1,$acc2,$acc3); +my ($poly1, $poly3)=($acc6,$acc7); + +$code.=<<___; + #lea $U2(%rsp), $a_ptr + #lea $Hsqr(%rsp), $r_ptr # 2*U1*H^2 + #call __ecp_nistz256_mul_by_2 # ecp_nistz256_mul_by_2(Hsqr, U2); + + add $acc0, $acc0 # a0:a3+a0:a3 + lea $Rsqr(%rsp), $a_ptr + adc $acc1, $acc1 + mov $acc0, $t0 + adc $acc2, $acc2 + adc $acc3, $acc3 + mov $acc1, $t1 + sbb $t4, $t4 + + sub \$-1, $acc0 + mov $acc2, $t2 + sbb $poly1, $acc1 + sbb \$0, $acc2 + mov $acc3, $t3 + sbb $poly3, $acc3 + test $t4, $t4 + + cmovz $t0, $acc0 + mov 8*0($a_ptr), $t0 + cmovz $t1, $acc1 + mov 8*1($a_ptr), $t1 + cmovz $t2, $acc2 + mov 8*2($a_ptr), $t2 + cmovz $t3, $acc3 + mov 8*3($a_ptr), $t3 + + call __ecp_nistz256_sub$x # p256_sub(res_x, Rsqr, Hsqr); + + lea $Hcub(%rsp), $b_ptr + lea $res_x(%rsp), $r_ptr + call __ecp_nistz256_sub_from$x # p256_sub(res_x, res_x, Hcub); + + mov $U2+8*0(%rsp), $t0 + mov $U2+8*1(%rsp), $t1 + mov $U2+8*2(%rsp), $t2 + mov $U2+8*3(%rsp), $t3 + lea $res_y(%rsp), $r_ptr + + call __ecp_nistz256_sub$x # p256_sub(res_y, U2, res_x); + + mov $acc0, 8*0($r_ptr) # save the result, as + mov $acc1, 8*1($r_ptr) # __ecp_nistz256_sub doesn't + mov $acc2, 8*2($r_ptr) + mov $acc3, 8*3($r_ptr) +___ +} +$code.=<<___; + `&load_for_mul("$S1(%rsp)", "$Hcub(%rsp)", "$src0")` + lea $S2(%rsp), $r_ptr + call __ecp_nistz256_mul_mont$x # p256_mul_mont(S2, S1, Hcub); + + `&load_for_mul("$R(%rsp)", "$res_y(%rsp)", "$src0")` + lea $res_y(%rsp), $r_ptr + call __ecp_nistz256_mul_mont$x # p256_mul_mont(res_y, R, res_y); + + lea $S2(%rsp), $b_ptr + lea $res_y(%rsp), $r_ptr + call __ecp_nistz256_sub_from$x # p256_sub(res_y, res_y, S2); + + movq %xmm0, $r_ptr # restore $r_ptr + + movdqa %xmm5, %xmm0 # copy_conditional(res_z, in2_z, in1infty); + movdqa %xmm5, %xmm1 + pandn $res_z(%rsp), %xmm0 + movdqa %xmm5, %xmm2 + pandn $res_z+0x10(%rsp), %xmm1 + movdqa %xmm5, %xmm3 + pand $in2_z(%rsp), %xmm2 + pand $in2_z+0x10(%rsp), %xmm3 + por %xmm0, %xmm2 + por %xmm1, %xmm3 + + movdqa %xmm4, %xmm0 # copy_conditional(res_z, in1_z, in2infty); + movdqa %xmm4, %xmm1 + pandn %xmm2, %xmm0 + movdqa %xmm4, %xmm2 + pandn %xmm3, %xmm1 + movdqa %xmm4, %xmm3 + pand $in1_z(%rsp), %xmm2 + pand $in1_z+0x10(%rsp), %xmm3 + por %xmm0, %xmm2 + por %xmm1, %xmm3 + movdqu %xmm2, 0x40($r_ptr) + movdqu %xmm3, 0x50($r_ptr) + + movdqa %xmm5, %xmm0 # copy_conditional(res_x, in2_x, in1infty); + movdqa %xmm5, %xmm1 + pandn $res_x(%rsp), %xmm0 + movdqa %xmm5, %xmm2 + pandn $res_x+0x10(%rsp), %xmm1 + movdqa %xmm5, %xmm3 + pand $in2_x(%rsp), %xmm2 + pand $in2_x+0x10(%rsp), %xmm3 + por %xmm0, %xmm2 + por %xmm1, %xmm3 + + movdqa %xmm4, %xmm0 # copy_conditional(res_x, in1_x, in2infty); + movdqa %xmm4, %xmm1 + pandn %xmm2, %xmm0 + movdqa %xmm4, %xmm2 + pandn %xmm3, %xmm1 + movdqa %xmm4, %xmm3 + pand $in1_x(%rsp), %xmm2 + pand $in1_x+0x10(%rsp), %xmm3 + por %xmm0, %xmm2 + por %xmm1, %xmm3 + movdqu %xmm2, 0x00($r_ptr) + movdqu %xmm3, 0x10($r_ptr) + + movdqa %xmm5, %xmm0 # copy_conditional(res_y, in2_y, in1infty); + movdqa %xmm5, %xmm1 + pandn $res_y(%rsp), %xmm0 + movdqa %xmm5, %xmm2 + pandn $res_y+0x10(%rsp), %xmm1 + movdqa %xmm5, %xmm3 + pand $in2_y(%rsp), %xmm2 + pand $in2_y+0x10(%rsp), %xmm3 + por %xmm0, %xmm2 + por %xmm1, %xmm3 + + movdqa %xmm4, %xmm0 # copy_conditional(res_y, in1_y, in2infty); + movdqa %xmm4, %xmm1 + pandn %xmm2, %xmm0 + movdqa %xmm4, %xmm2 + pandn %xmm3, %xmm1 + movdqa %xmm4, %xmm3 + pand $in1_y(%rsp), %xmm2 + pand $in1_y+0x10(%rsp), %xmm3 + por %xmm0, %xmm2 + por %xmm1, %xmm3 + movdqu %xmm2, 0x20($r_ptr) + movdqu %xmm3, 0x30($r_ptr) + +.Ladd_done$x: + add \$32*18+8, %rsp + pop %r15 + pop %r14 + pop %r13 + pop %r12 + pop %rbx + pop %rbp + ret +.size ecp_nistz256_point_add$sfx,.-ecp_nistz256_point_add$sfx +___ +} +&gen_add("q"); + +sub gen_add_affine () { + my $x = shift; + my ($src0,$sfx,$bias); + my ($U2,$S2,$H,$R,$Hsqr,$Hcub,$Rsqr, + $res_x,$res_y,$res_z, + $in1_x,$in1_y,$in1_z, + $in2_x,$in2_y)=map(32*$_,(0..14)); + my $Z1sqr = $S2; + + if ($x ne "x") { + $src0 = "%rax"; + $sfx = ""; + $bias = 0; + +$code.=<<___; +.globl ecp_nistz256_point_add_affine +.type ecp_nistz256_point_add_affine,\@function,3 +.align 32 +ecp_nistz256_point_add_affine: +___ +$code.=<<___ if ($addx); + mov \$0x80100, %ecx + and OPENSSL_ia32cap_P+8(%rip), %ecx + cmp \$0x80100, %ecx + je .Lpoint_add_affinex +___ + } else { + $src0 = "%rdx"; + $sfx = "x"; + $bias = 128; + +$code.=<<___; +.type ecp_nistz256_point_add_affinex,\@function,3 +.align 32 +ecp_nistz256_point_add_affinex: +.Lpoint_add_affinex: +___ + } +$code.=<<___; + push %rbp + push %rbx + push %r12 + push %r13 + push %r14 + push %r15 + sub \$32*15+8, %rsp + + movdqu 0x00($a_ptr), %xmm0 # copy *(P256_POINT *)$a_ptr + mov $b_org, $b_ptr # reassign + movdqu 0x10($a_ptr), %xmm1 + movdqu 0x20($a_ptr), %xmm2 + movdqu 0x30($a_ptr), %xmm3 + movdqu 0x40($a_ptr), %xmm4 + movdqu 0x50($a_ptr), %xmm5 + mov 0x40+8*0($a_ptr), $src0 # load original in1_z + mov 0x40+8*1($a_ptr), $acc6 + mov 0x40+8*2($a_ptr), $acc7 + mov 0x40+8*3($a_ptr), $acc0 + movdqa %xmm0, $in1_x(%rsp) + movdqa %xmm1, $in1_x+0x10(%rsp) + por %xmm0, %xmm1 + movdqa %xmm2, $in1_y(%rsp) + movdqa %xmm3, $in1_y+0x10(%rsp) + por %xmm2, %xmm3 + movdqa %xmm4, $in1_z(%rsp) + movdqa %xmm5, $in1_z+0x10(%rsp) + por %xmm1, %xmm3 + + movdqu 0x00($b_ptr), %xmm0 # copy *(P256_POINT_AFFINE *)$b_ptr + pshufd \$0xb1, %xmm3, %xmm5 + movdqu 0x10($b_ptr), %xmm1 + movdqu 0x20($b_ptr), %xmm2 + por %xmm3, %xmm5 + movdqu 0x30($b_ptr), %xmm3 + movdqa %xmm0, $in2_x(%rsp) + pshufd \$0x1e, %xmm5, %xmm4 + movdqa %xmm1, $in2_x+0x10(%rsp) + por %xmm0, %xmm1 + movq $r_ptr, %xmm0 # save $r_ptr + movdqa %xmm2, $in2_y(%rsp) + movdqa %xmm3, $in2_y+0x10(%rsp) + por %xmm2, %xmm3 + por %xmm4, %xmm5 + pxor %xmm4, %xmm4 + por %xmm1, %xmm3 + + lea 0x40-$bias($a_ptr), $a_ptr # $a_ptr is still valid + lea $Z1sqr(%rsp), $r_ptr # Z1^2 + call __ecp_nistz256_sqr_mont$x # p256_sqr_mont(Z1sqr, in1_z); + + pcmpeqd %xmm4, %xmm5 + pshufd \$0xb1, %xmm3, %xmm4 + mov 0x00($b_ptr), $src0 # $b_ptr is still valid + #lea 0x00($b_ptr), $b_ptr + mov $acc4, $acc1 # harmonize sqr output and mul input + por %xmm3, %xmm4 + pshufd \$0, %xmm5, %xmm5 # in1infty + pshufd \$0x1e, %xmm4, %xmm3 + mov $acc5, $acc2 + por %xmm3, %xmm4 + pxor %xmm3, %xmm3 + mov $acc6, $acc3 + pcmpeqd %xmm3, %xmm4 + pshufd \$0, %xmm4, %xmm4 # in2infty + + lea $Z1sqr-$bias(%rsp), $a_ptr + mov $acc7, $acc4 + lea $U2(%rsp), $r_ptr # U2 = X2*Z1^2 + call __ecp_nistz256_mul_mont$x # p256_mul_mont(U2, Z1sqr, in2_x); + + lea $in1_x(%rsp), $b_ptr + lea $H(%rsp), $r_ptr # H = U2 - U1 + call __ecp_nistz256_sub_from$x # p256_sub(H, U2, in1_x); + + `&load_for_mul("$Z1sqr(%rsp)", "$in1_z(%rsp)", "$src0")` + lea $S2(%rsp), $r_ptr # S2 = Z1^3 + call __ecp_nistz256_mul_mont$x # p256_mul_mont(S2, Z1sqr, in1_z); + + `&load_for_mul("$H(%rsp)", "$in1_z(%rsp)", "$src0")` + lea $res_z(%rsp), $r_ptr # Z3 = H*Z1*Z2 + call __ecp_nistz256_mul_mont$x # p256_mul_mont(res_z, H, in1_z); + + `&load_for_mul("$S2(%rsp)", "$in2_y(%rsp)", "$src0")` + lea $S2(%rsp), $r_ptr # S2 = Y2*Z1^3 + call __ecp_nistz256_mul_mont$x # p256_mul_mont(S2, S2, in2_y); + + lea $in1_y(%rsp), $b_ptr + lea $R(%rsp), $r_ptr # R = S2 - S1 + call __ecp_nistz256_sub_from$x # p256_sub(R, S2, in1_y); + + `&load_for_sqr("$H(%rsp)", "$src0")` + lea $Hsqr(%rsp), $r_ptr # H^2 + call __ecp_nistz256_sqr_mont$x # p256_sqr_mont(Hsqr, H); + + `&load_for_sqr("$R(%rsp)", "$src0")` + lea $Rsqr(%rsp), $r_ptr # R^2 + call __ecp_nistz256_sqr_mont$x # p256_sqr_mont(Rsqr, R); + + `&load_for_mul("$H(%rsp)", "$Hsqr(%rsp)", "$src0")` + lea $Hcub(%rsp), $r_ptr # H^3 + call __ecp_nistz256_mul_mont$x # p256_mul_mont(Hcub, Hsqr, H); + + `&load_for_mul("$Hsqr(%rsp)", "$in1_x(%rsp)", "$src0")` + lea $U2(%rsp), $r_ptr # U1*H^2 + call __ecp_nistz256_mul_mont$x # p256_mul_mont(U2, in1_x, Hsqr); +___ +{ +####################################################################### +# operate in 4-5-0-1 "name space" that matches multiplication output +# +my ($acc0,$acc1,$acc2,$acc3,$t3,$t4)=($acc4,$acc5,$acc0,$acc1,$acc2,$acc3); +my ($poly1, $poly3)=($acc6,$acc7); + +$code.=<<___; + #lea $U2(%rsp), $a_ptr + #lea $Hsqr(%rsp), $r_ptr # 2*U1*H^2 + #call __ecp_nistz256_mul_by_2 # ecp_nistz256_mul_by_2(Hsqr, U2); + + add $acc0, $acc0 # a0:a3+a0:a3 + lea $Rsqr(%rsp), $a_ptr + adc $acc1, $acc1 + mov $acc0, $t0 + adc $acc2, $acc2 + adc $acc3, $acc3 + mov $acc1, $t1 + sbb $t4, $t4 + + sub \$-1, $acc0 + mov $acc2, $t2 + sbb $poly1, $acc1 + sbb \$0, $acc2 + mov $acc3, $t3 + sbb $poly3, $acc3 + test $t4, $t4 + + cmovz $t0, $acc0 + mov 8*0($a_ptr), $t0 + cmovz $t1, $acc1 + mov 8*1($a_ptr), $t1 + cmovz $t2, $acc2 + mov 8*2($a_ptr), $t2 + cmovz $t3, $acc3 + mov 8*3($a_ptr), $t3 + + call __ecp_nistz256_sub$x # p256_sub(res_x, Rsqr, Hsqr); + + lea $Hcub(%rsp), $b_ptr + lea $res_x(%rsp), $r_ptr + call __ecp_nistz256_sub_from$x # p256_sub(res_x, res_x, Hcub); + + mov $U2+8*0(%rsp), $t0 + mov $U2+8*1(%rsp), $t1 + mov $U2+8*2(%rsp), $t2 + mov $U2+8*3(%rsp), $t3 + lea $H(%rsp), $r_ptr + + call __ecp_nistz256_sub$x # p256_sub(H, U2, res_x); + + mov $acc0, 8*0($r_ptr) # save the result, as + mov $acc1, 8*1($r_ptr) # __ecp_nistz256_sub doesn't + mov $acc2, 8*2($r_ptr) + mov $acc3, 8*3($r_ptr) +___ +} +$code.=<<___; + `&load_for_mul("$Hcub(%rsp)", "$in1_y(%rsp)", "$src0")` + lea $S2(%rsp), $r_ptr + call __ecp_nistz256_mul_mont$x # p256_mul_mont(S2, Hcub, in1_y); + + `&load_for_mul("$H(%rsp)", "$R(%rsp)", "$src0")` + lea $H(%rsp), $r_ptr + call __ecp_nistz256_mul_mont$x # p256_mul_mont(H, H, R); + + lea $S2(%rsp), $b_ptr + lea $res_y(%rsp), $r_ptr + call __ecp_nistz256_sub_from$x # p256_sub(res_y, H, S2); + + movq %xmm0, $r_ptr # restore $r_ptr + + movdqa %xmm5, %xmm0 # copy_conditional(res_z, ONE, in1infty); + movdqa %xmm5, %xmm1 + pandn $res_z(%rsp), %xmm0 + movdqa %xmm5, %xmm2 + pandn $res_z+0x10(%rsp), %xmm1 + movdqa %xmm5, %xmm3 + pand .LONE_mont(%rip), %xmm2 + pand .LONE_mont+0x10(%rip), %xmm3 + por %xmm0, %xmm2 + por %xmm1, %xmm3 + + movdqa %xmm4, %xmm0 # copy_conditional(res_z, in1_z, in2infty); + movdqa %xmm4, %xmm1 + pandn %xmm2, %xmm0 + movdqa %xmm4, %xmm2 + pandn %xmm3, %xmm1 + movdqa %xmm4, %xmm3 + pand $in1_z(%rsp), %xmm2 + pand $in1_z+0x10(%rsp), %xmm3 + por %xmm0, %xmm2 + por %xmm1, %xmm3 + movdqu %xmm2, 0x40($r_ptr) + movdqu %xmm3, 0x50($r_ptr) + + movdqa %xmm5, %xmm0 # copy_conditional(res_x, in2_x, in1infty); + movdqa %xmm5, %xmm1 + pandn $res_x(%rsp), %xmm0 + movdqa %xmm5, %xmm2 + pandn $res_x+0x10(%rsp), %xmm1 + movdqa %xmm5, %xmm3 + pand $in2_x(%rsp), %xmm2 + pand $in2_x+0x10(%rsp), %xmm3 + por %xmm0, %xmm2 + por %xmm1, %xmm3 + + movdqa %xmm4, %xmm0 # copy_conditional(res_x, in1_x, in2infty); + movdqa %xmm4, %xmm1 + pandn %xmm2, %xmm0 + movdqa %xmm4, %xmm2 + pandn %xmm3, %xmm1 + movdqa %xmm4, %xmm3 + pand $in1_x(%rsp), %xmm2 + pand $in1_x+0x10(%rsp), %xmm3 + por %xmm0, %xmm2 + por %xmm1, %xmm3 + movdqu %xmm2, 0x00($r_ptr) + movdqu %xmm3, 0x10($r_ptr) + + movdqa %xmm5, %xmm0 # copy_conditional(res_y, in2_y, in1infty); + movdqa %xmm5, %xmm1 + pandn $res_y(%rsp), %xmm0 + movdqa %xmm5, %xmm2 + pandn $res_y+0x10(%rsp), %xmm1 + movdqa %xmm5, %xmm3 + pand $in2_y(%rsp), %xmm2 + pand $in2_y+0x10(%rsp), %xmm3 + por %xmm0, %xmm2 + por %xmm1, %xmm3 + + movdqa %xmm4, %xmm0 # copy_conditional(res_y, in1_y, in2infty); + movdqa %xmm4, %xmm1 + pandn %xmm2, %xmm0 + movdqa %xmm4, %xmm2 + pandn %xmm3, %xmm1 + movdqa %xmm4, %xmm3 + pand $in1_y(%rsp), %xmm2 + pand $in1_y+0x10(%rsp), %xmm3 + por %xmm0, %xmm2 + por %xmm1, %xmm3 + movdqu %xmm2, 0x20($r_ptr) + movdqu %xmm3, 0x30($r_ptr) + + add \$32*15+8, %rsp + pop %r15 + pop %r14 + pop %r13 + pop %r12 + pop %rbx + pop %rbp + ret +.size ecp_nistz256_point_add_affine$sfx,.-ecp_nistz256_point_add_affine$sfx +___ +} +&gen_add_affine("q"); + +######################################################################## +# AD*X magic +# +if ($addx) { { +######################################################################## +# operate in 4-5-0-1 "name space" that matches multiplication output +# +my ($a0,$a1,$a2,$a3,$t3,$t4)=($acc4,$acc5,$acc0,$acc1,$acc2,$acc3); + +$code.=<<___; +.type __ecp_nistz256_add_tox,\@abi-omnipotent +.align 32 +__ecp_nistz256_add_tox: + xor $t4, $t4 + adc 8*0($b_ptr), $a0 + adc 8*1($b_ptr), $a1 + mov $a0, $t0 + adc 8*2($b_ptr), $a2 + adc 8*3($b_ptr), $a3 + mov $a1, $t1 + adc \$0, $t4 + + xor $t3, $t3 + sbb \$-1, $a0 + mov $a2, $t2 + sbb $poly1, $a1 + sbb \$0, $a2 + mov $a3, $t3 + sbb $poly3, $a3 + + bt \$0, $t4 + cmovnc $t0, $a0 + cmovnc $t1, $a1 + mov $a0, 8*0($r_ptr) + cmovnc $t2, $a2 + mov $a1, 8*1($r_ptr) + cmovnc $t3, $a3 + mov $a2, 8*2($r_ptr) + mov $a3, 8*3($r_ptr) + + ret +.size __ecp_nistz256_add_tox,.-__ecp_nistz256_add_tox + +.type __ecp_nistz256_sub_fromx,\@abi-omnipotent +.align 32 +__ecp_nistz256_sub_fromx: + xor $t4, $t4 + sbb 8*0($b_ptr), $a0 + sbb 8*1($b_ptr), $a1 + mov $a0, $t0 + sbb 8*2($b_ptr), $a2 + sbb 8*3($b_ptr), $a3 + mov $a1, $t1 + sbb \$0, $t4 + + xor $t3, $t3 + adc \$-1, $a0 + mov $a2, $t2 + adc $poly1, $a1 + adc \$0, $a2 + mov $a3, $t3 + adc $poly3, $a3 + + bt \$0, $t4 + cmovnc $t0, $a0 + cmovnc $t1, $a1 + mov $a0, 8*0($r_ptr) + cmovnc $t2, $a2 + mov $a1, 8*1($r_ptr) + cmovnc $t3, $a3 + mov $a2, 8*2($r_ptr) + mov $a3, 8*3($r_ptr) + + ret +.size __ecp_nistz256_sub_fromx,.-__ecp_nistz256_sub_fromx + +.type __ecp_nistz256_subx,\@abi-omnipotent +.align 32 +__ecp_nistz256_subx: + xor $t4, $t4 + sbb $a0, $t0 + sbb $a1, $t1 + mov $t0, $a0 + sbb $a2, $t2 + sbb $a3, $t3 + mov $t1, $a1 + sbb \$0, $t4 + + xor $a3 ,$a3 + adc \$-1, $t0 + mov $t2, $a2 + adc $poly1, $t1 + adc \$0, $t2 + mov $t3, $a3 + adc $poly3, $t3 + + bt \$0, $t4 + cmovc $t0, $a0 + cmovc $t1, $a1 + cmovc $t2, $a2 + cmovc $t3, $a3 + + ret +.size __ecp_nistz256_subx,.-__ecp_nistz256_subx + +.type __ecp_nistz256_mul_by_2x,\@abi-omnipotent +.align 32 +__ecp_nistz256_mul_by_2x: + xor $t4, $t4 + adc $a0, $a0 # a0:a3+a0:a3 + adc $a1, $a1 + mov $a0, $t0 + adc $a2, $a2 + adc $a3, $a3 + mov $a1, $t1 + adc \$0, $t4 + + xor $t3, $t3 + sbb \$-1, $a0 + mov $a2, $t2 + sbb $poly1, $a1 + sbb \$0, $a2 + mov $a3, $t3 + sbb $poly3, $a3 + + bt \$0, $t4 + cmovnc $t0, $a0 + cmovnc $t1, $a1 + mov $a0, 8*0($r_ptr) + cmovnc $t2, $a2 + mov $a1, 8*1($r_ptr) + cmovnc $t3, $a3 + mov $a2, 8*2($r_ptr) + mov $a3, 8*3($r_ptr) + + ret +.size __ecp_nistz256_mul_by_2x,.-__ecp_nistz256_mul_by_2x +___ + } +&gen_double("x"); +&gen_add("x"); +&gen_add_affine("x"); +} +}}} + +$code =~ s/\`([^\`]*)\`/eval $1/gem; +print $code; +close STDOUT; diff --git a/deps/openssl/openssl/crypto/ec/ec.h b/deps/openssl/openssl/crypto/ec/ec.h index c4e7aea938c23f..98edfdf8bc93cb 100644 --- a/deps/openssl/openssl/crypto/ec/ec.h +++ b/deps/openssl/openssl/crypto/ec/ec.h @@ -240,6 +240,12 @@ int EC_GROUP_set_generator(EC_GROUP *group, const EC_POINT *generator, */ const EC_POINT *EC_GROUP_get0_generator(const EC_GROUP *group); +/** Returns the montgomery data for order(Generator) + * \param group EC_GROUP object + * \return the currently used generator (possibly NULL). +*/ +BN_MONT_CTX *EC_GROUP_get_mont_data(const EC_GROUP *group); + /** Gets the order of a EC_GROUP * \param group EC_GROUP object * \param order BIGNUM to which the order is copied @@ -404,6 +410,9 @@ typedef struct { */ size_t EC_get_builtin_curves(EC_builtin_curve *r, size_t nitems); +const char *EC_curve_nid2nist(int nid); +int EC_curve_nist2nid(const char *name); + /********************************************************************/ /* EC_POINT functions */ /********************************************************************/ @@ -986,10 +995,78 @@ int EC_KEY_print_fp(FILE *fp, const EC_KEY *key, int off); # endif # define EVP_PKEY_CTX_set_ec_paramgen_curve_nid(ctx, nid) \ - EVP_PKEY_CTX_ctrl(ctx, EVP_PKEY_EC, EVP_PKEY_OP_PARAMGEN, \ + EVP_PKEY_CTX_ctrl(ctx, EVP_PKEY_EC, \ + EVP_PKEY_OP_PARAMGEN|EVP_PKEY_OP_KEYGEN, \ EVP_PKEY_CTRL_EC_PARAMGEN_CURVE_NID, nid, NULL) +# define EVP_PKEY_CTX_set_ec_param_enc(ctx, flag) \ + EVP_PKEY_CTX_ctrl(ctx, EVP_PKEY_EC, \ + EVP_PKEY_OP_PARAMGEN|EVP_PKEY_OP_KEYGEN, \ + EVP_PKEY_CTRL_EC_PARAM_ENC, flag, NULL) + +# define EVP_PKEY_CTX_set_ecdh_cofactor_mode(ctx, flag) \ + EVP_PKEY_CTX_ctrl(ctx, EVP_PKEY_EC, \ + EVP_PKEY_OP_DERIVE, \ + EVP_PKEY_CTRL_EC_ECDH_COFACTOR, flag, NULL) + +# define EVP_PKEY_CTX_get_ecdh_cofactor_mode(ctx) \ + EVP_PKEY_CTX_ctrl(ctx, EVP_PKEY_EC, \ + EVP_PKEY_OP_DERIVE, \ + EVP_PKEY_CTRL_EC_ECDH_COFACTOR, -2, NULL) + +# define EVP_PKEY_CTX_set_ecdh_kdf_type(ctx, kdf) \ + EVP_PKEY_CTX_ctrl(ctx, EVP_PKEY_EC, \ + EVP_PKEY_OP_DERIVE, \ + EVP_PKEY_CTRL_EC_KDF_TYPE, kdf, NULL) + +# define EVP_PKEY_CTX_get_ecdh_kdf_type(ctx) \ + EVP_PKEY_CTX_ctrl(ctx, EVP_PKEY_EC, \ + EVP_PKEY_OP_DERIVE, \ + EVP_PKEY_CTRL_EC_KDF_TYPE, -2, NULL) + +# define EVP_PKEY_CTX_set_ecdh_kdf_md(ctx, md) \ + EVP_PKEY_CTX_ctrl(ctx, EVP_PKEY_EC, \ + EVP_PKEY_OP_DERIVE, \ + EVP_PKEY_CTRL_EC_KDF_MD, 0, (void *)md) + +# define EVP_PKEY_CTX_get_ecdh_kdf_md(ctx, pmd) \ + EVP_PKEY_CTX_ctrl(ctx, EVP_PKEY_EC, \ + EVP_PKEY_OP_DERIVE, \ + EVP_PKEY_CTRL_GET_EC_KDF_MD, 0, (void *)pmd) + +# define EVP_PKEY_CTX_set_ecdh_kdf_outlen(ctx, len) \ + EVP_PKEY_CTX_ctrl(ctx, EVP_PKEY_EC, \ + EVP_PKEY_OP_DERIVE, \ + EVP_PKEY_CTRL_EC_KDF_OUTLEN, len, NULL) + +# define EVP_PKEY_CTX_get_ecdh_kdf_outlen(ctx, plen) \ + EVP_PKEY_CTX_ctrl(ctx, EVP_PKEY_EC, \ + EVP_PKEY_OP_DERIVE, \ + EVP_PKEY_CTRL_GET_EC_KDF_OUTLEN, 0, (void *)plen) + +# define EVP_PKEY_CTX_set0_ecdh_kdf_ukm(ctx, p, plen) \ + EVP_PKEY_CTX_ctrl(ctx, EVP_PKEY_EC, \ + EVP_PKEY_OP_DERIVE, \ + EVP_PKEY_CTRL_EC_KDF_UKM, plen, (void *)p) + +# define EVP_PKEY_CTX_get0_ecdh_kdf_ukm(ctx, p) \ + EVP_PKEY_CTX_ctrl(ctx, EVP_PKEY_EC, \ + EVP_PKEY_OP_DERIVE, \ + EVP_PKEY_CTRL_GET_EC_KDF_UKM, 0, (void *)p) + # define EVP_PKEY_CTRL_EC_PARAMGEN_CURVE_NID (EVP_PKEY_ALG_CTRL + 1) +# define EVP_PKEY_CTRL_EC_PARAM_ENC (EVP_PKEY_ALG_CTRL + 2) +# define EVP_PKEY_CTRL_EC_ECDH_COFACTOR (EVP_PKEY_ALG_CTRL + 3) +# define EVP_PKEY_CTRL_EC_KDF_TYPE (EVP_PKEY_ALG_CTRL + 4) +# define EVP_PKEY_CTRL_EC_KDF_MD (EVP_PKEY_ALG_CTRL + 5) +# define EVP_PKEY_CTRL_GET_EC_KDF_MD (EVP_PKEY_ALG_CTRL + 6) +# define EVP_PKEY_CTRL_EC_KDF_OUTLEN (EVP_PKEY_ALG_CTRL + 7) +# define EVP_PKEY_CTRL_GET_EC_KDF_OUTLEN (EVP_PKEY_ALG_CTRL + 8) +# define EVP_PKEY_CTRL_EC_KDF_UKM (EVP_PKEY_ALG_CTRL + 9) +# define EVP_PKEY_CTRL_GET_EC_KDF_UKM (EVP_PKEY_ALG_CTRL + 10) +/* KDF types */ +# define EVP_PKEY_ECDH_KDF_NONE 1 +# define EVP_PKEY_ECDH_KDF_X9_62 2 /* BEGIN ERROR CODES */ /* @@ -1007,6 +1084,8 @@ void ERR_load_EC_strings(void); # define EC_F_D2I_ECPKPARAMETERS 145 # define EC_F_D2I_ECPRIVATEKEY 146 # define EC_F_DO_EC_KEY_PRINT 221 +# define EC_F_ECDH_CMS_DECRYPT 238 +# define EC_F_ECDH_CMS_SET_SHARED_INFO 239 # define EC_F_ECKEY_PARAM2TYPE 223 # define EC_F_ECKEY_PARAM_DECODE 212 # define EC_F_ECKEY_PRIV_DECODE 213 @@ -1129,6 +1208,11 @@ void ERR_load_EC_strings(void); # define EC_F_NISTP224_PRE_COMP_NEW 227 # define EC_F_NISTP256_PRE_COMP_NEW 236 # define EC_F_NISTP521_PRE_COMP_NEW 237 +# define EC_F_ECP_NISTZ256_GET_AFFINE 240 +# define EC_F_ECP_NISTZ256_POINTS_MUL 241 +# define EC_F_ECP_NISTZ256_WINDOWED_MUL 242 +# define EC_F_ECP_NISTZ256_MULT_PRECOMPUTE 243 +# define EC_F_ECP_NISTZ256_PRE_COMP_NEW 244 # define EC_F_O2I_ECPUBLICKEY 152 # define EC_F_OLD_EC_PRIV_DECODE 222 # define EC_F_PKEY_EC_CTRL 197 @@ -1157,6 +1241,7 @@ void ERR_load_EC_strings(void); # define EC_R_INVALID_COMPRESSED_POINT 110 # define EC_R_INVALID_COMPRESSION_BIT 109 # define EC_R_INVALID_CURVE 141 +# define EC_R_INVALID_DIGEST 151 # define EC_R_INVALID_DIGEST_TYPE 138 # define EC_R_INVALID_ENCODING 102 # define EC_R_INVALID_FIELD 103 @@ -1165,6 +1250,7 @@ void ERR_load_EC_strings(void); # define EC_R_INVALID_PENTANOMIAL_BASIS 132 # define EC_R_INVALID_PRIVATE_KEY 123 # define EC_R_INVALID_TRINOMIAL_BASIS 137 +# define EC_R_KDF_PARAMETER_ERROR 148 # define EC_R_KEYS_NOT_SET 140 # define EC_R_MISSING_PARAMETERS 124 # define EC_R_MISSING_PRIVATE_KEY 125 @@ -1175,9 +1261,11 @@ void ERR_load_EC_strings(void); # define EC_R_NO_FIELD_MOD 133 # define EC_R_NO_PARAMETERS_SET 139 # define EC_R_PASSED_NULL_PARAMETER 134 +# define EC_R_PEER_KEY_ERROR 149 # define EC_R_PKPARAMETERS2GROUP_FAILURE 127 # define EC_R_POINT_AT_INFINITY 106 # define EC_R_POINT_IS_NOT_ON_CURVE 107 +# define EC_R_SHARED_INFO_ERROR 150 # define EC_R_SLOT_FULL 108 # define EC_R_UNDEFINED_GENERATOR 113 # define EC_R_UNDEFINED_ORDER 128 diff --git a/deps/openssl/openssl/crypto/ec/ec_ameth.c b/deps/openssl/openssl/crypto/ec/ec_ameth.c index 5cefb5ad303a6f..83e208cfe491c2 100644 --- a/deps/openssl/openssl/crypto/ec/ec_ameth.c +++ b/deps/openssl/openssl/crypto/ec/ec_ameth.c @@ -64,8 +64,12 @@ #ifndef OPENSSL_NO_CMS # include #endif +#include #include "asn1_locl.h" +static int ecdh_cms_decrypt(CMS_RecipientInfo *ri); +static int ecdh_cms_encrypt(CMS_RecipientInfo *ri); + static int eckey_param2type(int *pptype, void **ppval, EC_KEY *ec_key) { const EC_GROUP *group; @@ -580,10 +584,21 @@ static int ec_pkey_ctrl(EVP_PKEY *pkey, int op, long arg1, void *arg2) X509_ALGOR_set0(alg2, OBJ_nid2obj(snid), V_ASN1_UNDEF, 0); } return 1; + + case ASN1_PKEY_CTRL_CMS_ENVELOPE: + if (arg1 == 1) + return ecdh_cms_decrypt(arg2); + else if (arg1 == 0) + return ecdh_cms_encrypt(arg2); + return -2; + + case ASN1_PKEY_CTRL_CMS_RI_TYPE: + *(int *)arg2 = CMS_RECIPINFO_AGREE; + return 1; #endif case ASN1_PKEY_CTRL_DEFAULT_MD_NID: - *(int *)arg2 = NID_sha1; + *(int *)arg2 = NID_sha256; return 2; default: @@ -625,3 +640,326 @@ const EVP_PKEY_ASN1_METHOD eckey_asn1_meth = { old_ec_priv_decode, old_ec_priv_encode }; + +#ifndef OPENSSL_NO_CMS + +static int ecdh_cms_set_peerkey(EVP_PKEY_CTX *pctx, + X509_ALGOR *alg, ASN1_BIT_STRING *pubkey) +{ + ASN1_OBJECT *aoid; + int atype; + void *aval; + int rv = 0; + EVP_PKEY *pkpeer = NULL; + EC_KEY *ecpeer = NULL; + const unsigned char *p; + int plen; + X509_ALGOR_get0(&aoid, &atype, &aval, alg); + if (OBJ_obj2nid(aoid) != NID_X9_62_id_ecPublicKey) + goto err; + /* If absent parameters get group from main key */ + if (atype == V_ASN1_UNDEF || atype == V_ASN1_NULL) { + const EC_GROUP *grp; + EVP_PKEY *pk; + pk = EVP_PKEY_CTX_get0_pkey(pctx); + if (!pk) + goto err; + grp = EC_KEY_get0_group(pk->pkey.ec); + ecpeer = EC_KEY_new(); + if (!ecpeer) + goto err; + if (!EC_KEY_set_group(ecpeer, grp)) + goto err; + } else { + ecpeer = eckey_type2param(atype, aval); + if (!ecpeer) + goto err; + } + /* We have parameters now set public key */ + plen = ASN1_STRING_length(pubkey); + p = ASN1_STRING_data(pubkey); + if (!p || !plen) + goto err; + if (!o2i_ECPublicKey(&ecpeer, &p, plen)) + goto err; + pkpeer = EVP_PKEY_new(); + if (!pkpeer) + goto err; + EVP_PKEY_set1_EC_KEY(pkpeer, ecpeer); + if (EVP_PKEY_derive_set_peer(pctx, pkpeer) > 0) + rv = 1; + err: + if (ecpeer) + EC_KEY_free(ecpeer); + if (pkpeer) + EVP_PKEY_free(pkpeer); + return rv; +} + +/* Set KDF parameters based on KDF NID */ +static int ecdh_cms_set_kdf_param(EVP_PKEY_CTX *pctx, int eckdf_nid) +{ + int kdf_nid, kdfmd_nid, cofactor; + const EVP_MD *kdf_md; + if (eckdf_nid == NID_undef) + return 0; + + /* Lookup KDF type, cofactor mode and digest */ + if (!OBJ_find_sigid_algs(eckdf_nid, &kdfmd_nid, &kdf_nid)) + return 0; + + if (kdf_nid == NID_dh_std_kdf) + cofactor = 0; + else if (kdf_nid == NID_dh_cofactor_kdf) + cofactor = 1; + else + return 0; + + if (EVP_PKEY_CTX_set_ecdh_cofactor_mode(pctx, cofactor) <= 0) + return 0; + + if (EVP_PKEY_CTX_set_ecdh_kdf_type(pctx, EVP_PKEY_ECDH_KDF_X9_62) <= 0) + return 0; + + kdf_md = EVP_get_digestbynid(kdfmd_nid); + if (!kdf_md) + return 0; + + if (EVP_PKEY_CTX_set_ecdh_kdf_md(pctx, kdf_md) <= 0) + return 0; + return 1; +} + +static int ecdh_cms_set_shared_info(EVP_PKEY_CTX *pctx, CMS_RecipientInfo *ri) +{ + int rv = 0; + + X509_ALGOR *alg, *kekalg = NULL; + ASN1_OCTET_STRING *ukm; + const unsigned char *p; + unsigned char *der = NULL; + int plen, keylen; + const EVP_CIPHER *kekcipher; + EVP_CIPHER_CTX *kekctx; + + if (!CMS_RecipientInfo_kari_get0_alg(ri, &alg, &ukm)) + return 0; + + if (!ecdh_cms_set_kdf_param(pctx, OBJ_obj2nid(alg->algorithm))) { + ECerr(EC_F_ECDH_CMS_SET_SHARED_INFO, EC_R_KDF_PARAMETER_ERROR); + return 0; + } + + if (alg->parameter->type != V_ASN1_SEQUENCE) + return 0; + + p = alg->parameter->value.sequence->data; + plen = alg->parameter->value.sequence->length; + kekalg = d2i_X509_ALGOR(NULL, &p, plen); + if (!kekalg) + goto err; + kekctx = CMS_RecipientInfo_kari_get0_ctx(ri); + if (!kekctx) + goto err; + kekcipher = EVP_get_cipherbyobj(kekalg->algorithm); + if (!kekcipher || EVP_CIPHER_mode(kekcipher) != EVP_CIPH_WRAP_MODE) + goto err; + if (!EVP_EncryptInit_ex(kekctx, kekcipher, NULL, NULL, NULL)) + goto err; + if (EVP_CIPHER_asn1_to_param(kekctx, kekalg->parameter) <= 0) + goto err; + + keylen = EVP_CIPHER_CTX_key_length(kekctx); + if (EVP_PKEY_CTX_set_ecdh_kdf_outlen(pctx, keylen) <= 0) + goto err; + + plen = CMS_SharedInfo_encode(&der, kekalg, ukm, keylen); + + if (!plen) + goto err; + + if (EVP_PKEY_CTX_set0_ecdh_kdf_ukm(pctx, der, plen) <= 0) + goto err; + der = NULL; + + rv = 1; + err: + if (kekalg) + X509_ALGOR_free(kekalg); + if (der) + OPENSSL_free(der); + return rv; +} + +static int ecdh_cms_decrypt(CMS_RecipientInfo *ri) +{ + EVP_PKEY_CTX *pctx; + pctx = CMS_RecipientInfo_get0_pkey_ctx(ri); + if (!pctx) + return 0; + /* See if we need to set peer key */ + if (!EVP_PKEY_CTX_get0_peerkey(pctx)) { + X509_ALGOR *alg; + ASN1_BIT_STRING *pubkey; + if (!CMS_RecipientInfo_kari_get0_orig_id(ri, &alg, &pubkey, + NULL, NULL, NULL)) + return 0; + if (!alg || !pubkey) + return 0; + if (!ecdh_cms_set_peerkey(pctx, alg, pubkey)) { + ECerr(EC_F_ECDH_CMS_DECRYPT, EC_R_PEER_KEY_ERROR); + return 0; + } + } + /* Set ECDH derivation parameters and initialise unwrap context */ + if (!ecdh_cms_set_shared_info(pctx, ri)) { + ECerr(EC_F_ECDH_CMS_DECRYPT, EC_R_SHARED_INFO_ERROR); + return 0; + } + return 1; +} + +static int ecdh_cms_encrypt(CMS_RecipientInfo *ri) +{ + EVP_PKEY_CTX *pctx; + EVP_PKEY *pkey; + EVP_CIPHER_CTX *ctx; + int keylen; + X509_ALGOR *talg, *wrap_alg = NULL; + ASN1_OBJECT *aoid; + ASN1_BIT_STRING *pubkey; + ASN1_STRING *wrap_str; + ASN1_OCTET_STRING *ukm; + unsigned char *penc = NULL; + int penclen; + int rv = 0; + int ecdh_nid, kdf_type, kdf_nid, wrap_nid; + const EVP_MD *kdf_md; + pctx = CMS_RecipientInfo_get0_pkey_ctx(ri); + if (!pctx) + return 0; + /* Get ephemeral key */ + pkey = EVP_PKEY_CTX_get0_pkey(pctx); + if (!CMS_RecipientInfo_kari_get0_orig_id(ri, &talg, &pubkey, + NULL, NULL, NULL)) + goto err; + X509_ALGOR_get0(&aoid, NULL, NULL, talg); + /* Is everything uninitialised? */ + if (aoid == OBJ_nid2obj(NID_undef)) { + + EC_KEY *eckey = pkey->pkey.ec; + /* Set the key */ + unsigned char *p; + + penclen = i2o_ECPublicKey(eckey, NULL); + if (penclen <= 0) + goto err; + penc = OPENSSL_malloc(penclen); + if (!penc) + goto err; + p = penc; + penclen = i2o_ECPublicKey(eckey, &p); + if (penclen <= 0) + goto err; + ASN1_STRING_set0(pubkey, penc, penclen); + pubkey->flags &= ~(ASN1_STRING_FLAG_BITS_LEFT | 0x07); + pubkey->flags |= ASN1_STRING_FLAG_BITS_LEFT; + + penc = NULL; + X509_ALGOR_set0(talg, OBJ_nid2obj(NID_X9_62_id_ecPublicKey), + V_ASN1_UNDEF, NULL); + } + + /* See if custom paraneters set */ + kdf_type = EVP_PKEY_CTX_get_ecdh_kdf_type(pctx); + if (kdf_type <= 0) + goto err; + if (!EVP_PKEY_CTX_get_ecdh_kdf_md(pctx, &kdf_md)) + goto err; + ecdh_nid = EVP_PKEY_CTX_get_ecdh_cofactor_mode(pctx); + if (ecdh_nid < 0) + goto err; + else if (ecdh_nid == 0) + ecdh_nid = NID_dh_std_kdf; + else if (ecdh_nid == 1) + ecdh_nid = NID_dh_cofactor_kdf; + + if (kdf_type == EVP_PKEY_ECDH_KDF_NONE) { + kdf_type = EVP_PKEY_ECDH_KDF_X9_62; + if (EVP_PKEY_CTX_set_ecdh_kdf_type(pctx, kdf_type) <= 0) + goto err; + } else + /* Uknown KDF */ + goto err; + if (kdf_md == NULL) { + /* Fixme later for better MD */ + kdf_md = EVP_sha1(); + if (EVP_PKEY_CTX_set_ecdh_kdf_md(pctx, kdf_md) <= 0) + goto err; + } + + if (!CMS_RecipientInfo_kari_get0_alg(ri, &talg, &ukm)) + goto err; + + /* Lookup NID for KDF+cofactor+digest */ + + if (!OBJ_find_sigid_by_algs(&kdf_nid, EVP_MD_type(kdf_md), ecdh_nid)) + goto err; + /* Get wrap NID */ + ctx = CMS_RecipientInfo_kari_get0_ctx(ri); + wrap_nid = EVP_CIPHER_CTX_type(ctx); + keylen = EVP_CIPHER_CTX_key_length(ctx); + + /* Package wrap algorithm in an AlgorithmIdentifier */ + + wrap_alg = X509_ALGOR_new(); + if (!wrap_alg) + goto err; + wrap_alg->algorithm = OBJ_nid2obj(wrap_nid); + wrap_alg->parameter = ASN1_TYPE_new(); + if (!wrap_alg->parameter) + goto err; + if (EVP_CIPHER_param_to_asn1(ctx, wrap_alg->parameter) <= 0) + goto err; + if (ASN1_TYPE_get(wrap_alg->parameter) == NID_undef) { + ASN1_TYPE_free(wrap_alg->parameter); + wrap_alg->parameter = NULL; + } + + if (EVP_PKEY_CTX_set_ecdh_kdf_outlen(pctx, keylen) <= 0) + goto err; + + penclen = CMS_SharedInfo_encode(&penc, wrap_alg, ukm, keylen); + + if (!penclen) + goto err; + + if (EVP_PKEY_CTX_set0_ecdh_kdf_ukm(pctx, penc, penclen) <= 0) + goto err; + penc = NULL; + + /* + * Now need to wrap encoding of wrap AlgorithmIdentifier into parameter + * of another AlgorithmIdentifier. + */ + penclen = i2d_X509_ALGOR(wrap_alg, &penc); + if (!penc || !penclen) + goto err; + wrap_str = ASN1_STRING_new(); + if (!wrap_str) + goto err; + ASN1_STRING_set0(wrap_str, penc, penclen); + penc = NULL; + X509_ALGOR_set0(talg, OBJ_nid2obj(kdf_nid), V_ASN1_SEQUENCE, wrap_str); + + rv = 1; + + err: + if (penc) + OPENSSL_free(penc); + if (wrap_alg) + X509_ALGOR_free(wrap_alg); + return rv; +} + +#endif diff --git a/deps/openssl/openssl/crypto/ec/ec_curve.c b/deps/openssl/openssl/crypto/ec/ec_curve.c index e4bcb5dd81dc71..6dbe9d8258de43 100644 --- a/deps/openssl/openssl/crypto/ec/ec_curve.c +++ b/deps/openssl/openssl/crypto/ec/ec_curve.c @@ -69,11 +69,16 @@ * */ +#include #include "ec_lcl.h" #include #include #include +#ifdef OPENSSL_FIPS +# include +#endif + typedef struct { int field_type, /* either NID_X9_62_prime_field or * NID_X9_62_characteristic_two_field */ @@ -2282,6 +2287,554 @@ static const struct { #endif +/* + * These curves were added by Annie Yousar + * For the definition of RFC 5639 curves see + * http://www.ietf.org/rfc/rfc5639.txt These curves are generated verifiable + * at random, nevertheless the seed is omitted as parameter because the + * generation mechanism is different from those defined in ANSI X9.62. + */ + +static const struct { + EC_CURVE_DATA h; + unsigned char data[0 + 20 * 6]; +} _EC_brainpoolP160r1 = { + { + NID_X9_62_prime_field, 0, 20, 1 + }, + { + /* no seed */ + /* p */ + 0xE9, 0x5E, 0x4A, 0x5F, 0x73, 0x70, 0x59, 0xDC, 0x60, 0xDF, 0xC7, 0xAD, + 0x95, 0xB3, 0xD8, 0x13, 0x95, 0x15, 0x62, 0x0F, + /* a */ + 0x34, 0x0E, 0x7B, 0xE2, 0xA2, 0x80, 0xEB, 0x74, 0xE2, 0xBE, 0x61, 0xBA, + 0xDA, 0x74, 0x5D, 0x97, 0xE8, 0xF7, 0xC3, 0x00, + /* b */ + 0x1E, 0x58, 0x9A, 0x85, 0x95, 0x42, 0x34, 0x12, 0x13, 0x4F, 0xAA, 0x2D, + 0xBD, 0xEC, 0x95, 0xC8, 0xD8, 0x67, 0x5E, 0x58, + /* x */ + 0xBE, 0xD5, 0xAF, 0x16, 0xEA, 0x3F, 0x6A, 0x4F, 0x62, 0x93, 0x8C, 0x46, + 0x31, 0xEB, 0x5A, 0xF7, 0xBD, 0xBC, 0xDB, 0xC3, + /* y */ + 0x16, 0x67, 0xCB, 0x47, 0x7A, 0x1A, 0x8E, 0xC3, 0x38, 0xF9, 0x47, 0x41, + 0x66, 0x9C, 0x97, 0x63, 0x16, 0xDA, 0x63, 0x21, + /* order */ + 0xE9, 0x5E, 0x4A, 0x5F, 0x73, 0x70, 0x59, 0xDC, 0x60, 0xDF, 0x59, 0x91, + 0xD4, 0x50, 0x29, 0x40, 0x9E, 0x60, 0xFC, 0x09 + } +}; + +static const struct { + EC_CURVE_DATA h; + unsigned char data[0 + 20 * 6]; +} _EC_brainpoolP160t1 = { + { + NID_X9_62_prime_field, 0, 20, 1 + }, + { + /* no seed */ + /* p */ + 0xE9, 0x5E, 0x4A, 0x5F, 0x73, 0x70, 0x59, 0xDC, 0x60, 0xDF, 0xC7, 0xAD, + 0x95, 0xB3, 0xD8, 0x13, 0x95, 0x15, 0x62, 0x0F, + /* a */ + 0xE9, 0x5E, 0x4A, 0x5F, 0x73, 0x70, 0x59, 0xDC, 0x60, 0xDF, 0xC7, 0xAD, + 0x95, 0xB3, 0xD8, 0x13, 0x95, 0x15, 0x62, 0x0C, + /* b */ + 0x7A, 0x55, 0x6B, 0x6D, 0xAE, 0x53, 0x5B, 0x7B, 0x51, 0xED, 0x2C, 0x4D, + 0x7D, 0xAA, 0x7A, 0x0B, 0x5C, 0x55, 0xF3, 0x80, + /* x */ + 0xB1, 0x99, 0xB1, 0x3B, 0x9B, 0x34, 0xEF, 0xC1, 0x39, 0x7E, 0x64, 0xBA, + 0xEB, 0x05, 0xAC, 0xC2, 0x65, 0xFF, 0x23, 0x78, + /* y */ + 0xAD, 0xD6, 0x71, 0x8B, 0x7C, 0x7C, 0x19, 0x61, 0xF0, 0x99, 0x1B, 0x84, + 0x24, 0x43, 0x77, 0x21, 0x52, 0xC9, 0xE0, 0xAD, + /* order */ + 0xE9, 0x5E, 0x4A, 0x5F, 0x73, 0x70, 0x59, 0xDC, 0x60, 0xDF, 0x59, 0x91, + 0xD4, 0x50, 0x29, 0x40, 0x9E, 0x60, 0xFC, 0x09 + } +}; + +static const struct { + EC_CURVE_DATA h; + unsigned char data[0 + 24 * 6]; +} _EC_brainpoolP192r1 = { + { + NID_X9_62_prime_field, 0, 24, 1 + }, + { + /* no seed */ + /* p */ + 0xC3, 0x02, 0xF4, 0x1D, 0x93, 0x2A, 0x36, 0xCD, 0xA7, 0xA3, 0x46, 0x30, + 0x93, 0xD1, 0x8D, 0xB7, 0x8F, 0xCE, 0x47, 0x6D, 0xE1, 0xA8, 0x62, 0x97, + /* a */ + 0x6A, 0x91, 0x17, 0x40, 0x76, 0xB1, 0xE0, 0xE1, 0x9C, 0x39, 0xC0, 0x31, + 0xFE, 0x86, 0x85, 0xC1, 0xCA, 0xE0, 0x40, 0xE5, 0xC6, 0x9A, 0x28, 0xEF, + /* b */ + 0x46, 0x9A, 0x28, 0xEF, 0x7C, 0x28, 0xCC, 0xA3, 0xDC, 0x72, 0x1D, 0x04, + 0x4F, 0x44, 0x96, 0xBC, 0xCA, 0x7E, 0xF4, 0x14, 0x6F, 0xBF, 0x25, 0xC9, + /* x */ + 0xC0, 0xA0, 0x64, 0x7E, 0xAA, 0xB6, 0xA4, 0x87, 0x53, 0xB0, 0x33, 0xC5, + 0x6C, 0xB0, 0xF0, 0x90, 0x0A, 0x2F, 0x5C, 0x48, 0x53, 0x37, 0x5F, 0xD6, + /* y */ + 0x14, 0xB6, 0x90, 0x86, 0x6A, 0xBD, 0x5B, 0xB8, 0x8B, 0x5F, 0x48, 0x28, + 0xC1, 0x49, 0x00, 0x02, 0xE6, 0x77, 0x3F, 0xA2, 0xFA, 0x29, 0x9B, 0x8F, + /* order */ + 0xC3, 0x02, 0xF4, 0x1D, 0x93, 0x2A, 0x36, 0xCD, 0xA7, 0xA3, 0x46, 0x2F, + 0x9E, 0x9E, 0x91, 0x6B, 0x5B, 0xE8, 0xF1, 0x02, 0x9A, 0xC4, 0xAC, 0xC1 + } +}; + +static const struct { + EC_CURVE_DATA h; + unsigned char data[0 + 24 * 6]; +} _EC_brainpoolP192t1 = { + { + NID_X9_62_prime_field, 0, 24, 1 + }, + { + /* no seed */ + /* p */ + 0xC3, 0x02, 0xF4, 0x1D, 0x93, 0x2A, 0x36, 0xCD, 0xA7, 0xA3, 0x46, 0x30, + 0x93, 0xD1, 0x8D, 0xB7, 0x8F, 0xCE, 0x47, 0x6D, 0xE1, 0xA8, 0x62, 0x97, + /* a */ + 0xC3, 0x02, 0xF4, 0x1D, 0x93, 0x2A, 0x36, 0xCD, 0xA7, 0xA3, 0x46, 0x30, + 0x93, 0xD1, 0x8D, 0xB7, 0x8F, 0xCE, 0x47, 0x6D, 0xE1, 0xA8, 0x62, 0x94, + /* b */ + 0x13, 0xD5, 0x6F, 0xFA, 0xEC, 0x78, 0x68, 0x1E, 0x68, 0xF9, 0xDE, 0xB4, + 0x3B, 0x35, 0xBE, 0xC2, 0xFB, 0x68, 0x54, 0x2E, 0x27, 0x89, 0x7B, 0x79, + /* x */ + 0x3A, 0xE9, 0xE5, 0x8C, 0x82, 0xF6, 0x3C, 0x30, 0x28, 0x2E, 0x1F, 0xE7, + 0xBB, 0xF4, 0x3F, 0xA7, 0x2C, 0x44, 0x6A, 0xF6, 0xF4, 0x61, 0x81, 0x29, + /* y */ + 0x09, 0x7E, 0x2C, 0x56, 0x67, 0xC2, 0x22, 0x3A, 0x90, 0x2A, 0xB5, 0xCA, + 0x44, 0x9D, 0x00, 0x84, 0xB7, 0xE5, 0xB3, 0xDE, 0x7C, 0xCC, 0x01, 0xC9, + /* order */ + 0xC3, 0x02, 0xF4, 0x1D, 0x93, 0x2A, 0x36, 0xCD, 0xA7, 0xA3, 0x46, 0x2F, + 0x9E, 0x9E, 0x91, 0x6B, 0x5B, 0xE8, 0xF1, 0x02, 0x9A, 0xC4, 0xAC, 0xC1 + } +}; + +static const struct { + EC_CURVE_DATA h; + unsigned char data[0 + 28 * 6]; +} _EC_brainpoolP224r1 = { + { + NID_X9_62_prime_field, 0, 28, 1 + }, + { + /* no seed */ + /* p */ + 0xD7, 0xC1, 0x34, 0xAA, 0x26, 0x43, 0x66, 0x86, 0x2A, 0x18, 0x30, 0x25, + 0x75, 0xD1, 0xD7, 0x87, 0xB0, 0x9F, 0x07, 0x57, 0x97, 0xDA, 0x89, 0xF5, + 0x7E, 0xC8, 0xC0, 0xFF, + /* a */ + 0x68, 0xA5, 0xE6, 0x2C, 0xA9, 0xCE, 0x6C, 0x1C, 0x29, 0x98, 0x03, 0xA6, + 0xC1, 0x53, 0x0B, 0x51, 0x4E, 0x18, 0x2A, 0xD8, 0xB0, 0x04, 0x2A, 0x59, + 0xCA, 0xD2, 0x9F, 0x43, + /* b */ + 0x25, 0x80, 0xF6, 0x3C, 0xCF, 0xE4, 0x41, 0x38, 0x87, 0x07, 0x13, 0xB1, + 0xA9, 0x23, 0x69, 0xE3, 0x3E, 0x21, 0x35, 0xD2, 0x66, 0xDB, 0xB3, 0x72, + 0x38, 0x6C, 0x40, 0x0B, + /* x */ + 0x0D, 0x90, 0x29, 0xAD, 0x2C, 0x7E, 0x5C, 0xF4, 0x34, 0x08, 0x23, 0xB2, + 0xA8, 0x7D, 0xC6, 0x8C, 0x9E, 0x4C, 0xE3, 0x17, 0x4C, 0x1E, 0x6E, 0xFD, + 0xEE, 0x12, 0xC0, 0x7D, + /* y */ + 0x58, 0xAA, 0x56, 0xF7, 0x72, 0xC0, 0x72, 0x6F, 0x24, 0xC6, 0xB8, 0x9E, + 0x4E, 0xCD, 0xAC, 0x24, 0x35, 0x4B, 0x9E, 0x99, 0xCA, 0xA3, 0xF6, 0xD3, + 0x76, 0x14, 0x02, 0xCD, + /* order */ + 0xD7, 0xC1, 0x34, 0xAA, 0x26, 0x43, 0x66, 0x86, 0x2A, 0x18, 0x30, 0x25, + 0x75, 0xD0, 0xFB, 0x98, 0xD1, 0x16, 0xBC, 0x4B, 0x6D, 0xDE, 0xBC, 0xA3, + 0xA5, 0xA7, 0x93, 0x9F + } +}; + +static const struct { + EC_CURVE_DATA h; + unsigned char data[0 + 28 * 6]; +} _EC_brainpoolP224t1 = { + { + NID_X9_62_prime_field, 0, 28, 1 + }, + { + /* no seed */ + /* p */ + 0xD7, 0xC1, 0x34, 0xAA, 0x26, 0x43, 0x66, 0x86, 0x2A, 0x18, 0x30, 0x25, + 0x75, 0xD1, 0xD7, 0x87, 0xB0, 0x9F, 0x07, 0x57, 0x97, 0xDA, 0x89, 0xF5, + 0x7E, 0xC8, 0xC0, 0xFF, + /* a */ + 0xD7, 0xC1, 0x34, 0xAA, 0x26, 0x43, 0x66, 0x86, 0x2A, 0x18, 0x30, 0x25, + 0x75, 0xD1, 0xD7, 0x87, 0xB0, 0x9F, 0x07, 0x57, 0x97, 0xDA, 0x89, 0xF5, + 0x7E, 0xC8, 0xC0, 0xFC, + /* b */ + 0x4B, 0x33, 0x7D, 0x93, 0x41, 0x04, 0xCD, 0x7B, 0xEF, 0x27, 0x1B, 0xF6, + 0x0C, 0xED, 0x1E, 0xD2, 0x0D, 0xA1, 0x4C, 0x08, 0xB3, 0xBB, 0x64, 0xF1, + 0x8A, 0x60, 0x88, 0x8D, + /* x */ + 0x6A, 0xB1, 0xE3, 0x44, 0xCE, 0x25, 0xFF, 0x38, 0x96, 0x42, 0x4E, 0x7F, + 0xFE, 0x14, 0x76, 0x2E, 0xCB, 0x49, 0xF8, 0x92, 0x8A, 0xC0, 0xC7, 0x60, + 0x29, 0xB4, 0xD5, 0x80, + /* y */ + 0x03, 0x74, 0xE9, 0xF5, 0x14, 0x3E, 0x56, 0x8C, 0xD2, 0x3F, 0x3F, 0x4D, + 0x7C, 0x0D, 0x4B, 0x1E, 0x41, 0xC8, 0xCC, 0x0D, 0x1C, 0x6A, 0xBD, 0x5F, + 0x1A, 0x46, 0xDB, 0x4C, + /* order */ + 0xD7, 0xC1, 0x34, 0xAA, 0x26, 0x43, 0x66, 0x86, 0x2A, 0x18, 0x30, 0x25, + 0x75, 0xD0, 0xFB, 0x98, 0xD1, 0x16, 0xBC, 0x4B, 0x6D, 0xDE, 0xBC, 0xA3, + 0xA5, 0xA7, 0x93, 0x9F + } +}; + +static const struct { + EC_CURVE_DATA h; + unsigned char data[0 + 32 * 6]; +} _EC_brainpoolP256r1 = { + { + NID_X9_62_prime_field, 0, 32, 1 + }, + { + /* no seed */ + /* p */ + 0xA9, 0xFB, 0x57, 0xDB, 0xA1, 0xEE, 0xA9, 0xBC, 0x3E, 0x66, 0x0A, 0x90, + 0x9D, 0x83, 0x8D, 0x72, 0x6E, 0x3B, 0xF6, 0x23, 0xD5, 0x26, 0x20, 0x28, + 0x20, 0x13, 0x48, 0x1D, 0x1F, 0x6E, 0x53, 0x77, + /* a */ + 0x7D, 0x5A, 0x09, 0x75, 0xFC, 0x2C, 0x30, 0x57, 0xEE, 0xF6, 0x75, 0x30, + 0x41, 0x7A, 0xFF, 0xE7, 0xFB, 0x80, 0x55, 0xC1, 0x26, 0xDC, 0x5C, 0x6C, + 0xE9, 0x4A, 0x4B, 0x44, 0xF3, 0x30, 0xB5, 0xD9, + /* b */ + 0x26, 0xDC, 0x5C, 0x6C, 0xE9, 0x4A, 0x4B, 0x44, 0xF3, 0x30, 0xB5, 0xD9, + 0xBB, 0xD7, 0x7C, 0xBF, 0x95, 0x84, 0x16, 0x29, 0x5C, 0xF7, 0xE1, 0xCE, + 0x6B, 0xCC, 0xDC, 0x18, 0xFF, 0x8C, 0x07, 0xB6, + /* x */ + 0x8B, 0xD2, 0xAE, 0xB9, 0xCB, 0x7E, 0x57, 0xCB, 0x2C, 0x4B, 0x48, 0x2F, + 0xFC, 0x81, 0xB7, 0xAF, 0xB9, 0xDE, 0x27, 0xE1, 0xE3, 0xBD, 0x23, 0xC2, + 0x3A, 0x44, 0x53, 0xBD, 0x9A, 0xCE, 0x32, 0x62, + /* y */ + 0x54, 0x7E, 0xF8, 0x35, 0xC3, 0xDA, 0xC4, 0xFD, 0x97, 0xF8, 0x46, 0x1A, + 0x14, 0x61, 0x1D, 0xC9, 0xC2, 0x77, 0x45, 0x13, 0x2D, 0xED, 0x8E, 0x54, + 0x5C, 0x1D, 0x54, 0xC7, 0x2F, 0x04, 0x69, 0x97, + /* order */ + 0xA9, 0xFB, 0x57, 0xDB, 0xA1, 0xEE, 0xA9, 0xBC, 0x3E, 0x66, 0x0A, 0x90, + 0x9D, 0x83, 0x8D, 0x71, 0x8C, 0x39, 0x7A, 0xA3, 0xB5, 0x61, 0xA6, 0xF7, + 0x90, 0x1E, 0x0E, 0x82, 0x97, 0x48, 0x56, 0xA7 + } +}; + +static const struct { + EC_CURVE_DATA h; + unsigned char data[0 + 32 * 6]; +} _EC_brainpoolP256t1 = { + { + NID_X9_62_prime_field, 0, 32, 1 + }, + { + /* no seed */ + /* p */ + 0xA9, 0xFB, 0x57, 0xDB, 0xA1, 0xEE, 0xA9, 0xBC, 0x3E, 0x66, 0x0A, 0x90, + 0x9D, 0x83, 0x8D, 0x72, 0x6E, 0x3B, 0xF6, 0x23, 0xD5, 0x26, 0x20, 0x28, + 0x20, 0x13, 0x48, 0x1D, 0x1F, 0x6E, 0x53, 0x77, + /* a */ + 0xA9, 0xFB, 0x57, 0xDB, 0xA1, 0xEE, 0xA9, 0xBC, 0x3E, 0x66, 0x0A, 0x90, + 0x9D, 0x83, 0x8D, 0x72, 0x6E, 0x3B, 0xF6, 0x23, 0xD5, 0x26, 0x20, 0x28, + 0x20, 0x13, 0x48, 0x1D, 0x1F, 0x6E, 0x53, 0x74, + /* b */ + 0x66, 0x2C, 0x61, 0xC4, 0x30, 0xD8, 0x4E, 0xA4, 0xFE, 0x66, 0xA7, 0x73, + 0x3D, 0x0B, 0x76, 0xB7, 0xBF, 0x93, 0xEB, 0xC4, 0xAF, 0x2F, 0x49, 0x25, + 0x6A, 0xE5, 0x81, 0x01, 0xFE, 0xE9, 0x2B, 0x04, + /* x */ + 0xA3, 0xE8, 0xEB, 0x3C, 0xC1, 0xCF, 0xE7, 0xB7, 0x73, 0x22, 0x13, 0xB2, + 0x3A, 0x65, 0x61, 0x49, 0xAF, 0xA1, 0x42, 0xC4, 0x7A, 0xAF, 0xBC, 0x2B, + 0x79, 0xA1, 0x91, 0x56, 0x2E, 0x13, 0x05, 0xF4, + /* y */ + 0x2D, 0x99, 0x6C, 0x82, 0x34, 0x39, 0xC5, 0x6D, 0x7F, 0x7B, 0x22, 0xE1, + 0x46, 0x44, 0x41, 0x7E, 0x69, 0xBC, 0xB6, 0xDE, 0x39, 0xD0, 0x27, 0x00, + 0x1D, 0xAB, 0xE8, 0xF3, 0x5B, 0x25, 0xC9, 0xBE, + /* order */ + 0xA9, 0xFB, 0x57, 0xDB, 0xA1, 0xEE, 0xA9, 0xBC, 0x3E, 0x66, 0x0A, 0x90, + 0x9D, 0x83, 0x8D, 0x71, 0x8C, 0x39, 0x7A, 0xA3, 0xB5, 0x61, 0xA6, 0xF7, + 0x90, 0x1E, 0x0E, 0x82, 0x97, 0x48, 0x56, 0xA7 + } +}; + +static const struct { + EC_CURVE_DATA h; + unsigned char data[0 + 40 * 6]; +} _EC_brainpoolP320r1 = { + { + NID_X9_62_prime_field, 0, 40, 1 + }, + { + /* no seed */ + /* p */ + 0xD3, 0x5E, 0x47, 0x20, 0x36, 0xBC, 0x4F, 0xB7, 0xE1, 0x3C, 0x78, 0x5E, + 0xD2, 0x01, 0xE0, 0x65, 0xF9, 0x8F, 0xCF, 0xA6, 0xF6, 0xF4, 0x0D, 0xEF, + 0x4F, 0x92, 0xB9, 0xEC, 0x78, 0x93, 0xEC, 0x28, 0xFC, 0xD4, 0x12, 0xB1, + 0xF1, 0xB3, 0x2E, 0x27, + /* a */ + 0x3E, 0xE3, 0x0B, 0x56, 0x8F, 0xBA, 0xB0, 0xF8, 0x83, 0xCC, 0xEB, 0xD4, + 0x6D, 0x3F, 0x3B, 0xB8, 0xA2, 0xA7, 0x35, 0x13, 0xF5, 0xEB, 0x79, 0xDA, + 0x66, 0x19, 0x0E, 0xB0, 0x85, 0xFF, 0xA9, 0xF4, 0x92, 0xF3, 0x75, 0xA9, + 0x7D, 0x86, 0x0E, 0xB4, + /* b */ + 0x52, 0x08, 0x83, 0x94, 0x9D, 0xFD, 0xBC, 0x42, 0xD3, 0xAD, 0x19, 0x86, + 0x40, 0x68, 0x8A, 0x6F, 0xE1, 0x3F, 0x41, 0x34, 0x95, 0x54, 0xB4, 0x9A, + 0xCC, 0x31, 0xDC, 0xCD, 0x88, 0x45, 0x39, 0x81, 0x6F, 0x5E, 0xB4, 0xAC, + 0x8F, 0xB1, 0xF1, 0xA6, + /* x */ + 0x43, 0xBD, 0x7E, 0x9A, 0xFB, 0x53, 0xD8, 0xB8, 0x52, 0x89, 0xBC, 0xC4, + 0x8E, 0xE5, 0xBF, 0xE6, 0xF2, 0x01, 0x37, 0xD1, 0x0A, 0x08, 0x7E, 0xB6, + 0xE7, 0x87, 0x1E, 0x2A, 0x10, 0xA5, 0x99, 0xC7, 0x10, 0xAF, 0x8D, 0x0D, + 0x39, 0xE2, 0x06, 0x11, + /* y */ + 0x14, 0xFD, 0xD0, 0x55, 0x45, 0xEC, 0x1C, 0xC8, 0xAB, 0x40, 0x93, 0x24, + 0x7F, 0x77, 0x27, 0x5E, 0x07, 0x43, 0xFF, 0xED, 0x11, 0x71, 0x82, 0xEA, + 0xA9, 0xC7, 0x78, 0x77, 0xAA, 0xAC, 0x6A, 0xC7, 0xD3, 0x52, 0x45, 0xD1, + 0x69, 0x2E, 0x8E, 0xE1, + /* order */ + 0xD3, 0x5E, 0x47, 0x20, 0x36, 0xBC, 0x4F, 0xB7, 0xE1, 0x3C, 0x78, 0x5E, + 0xD2, 0x01, 0xE0, 0x65, 0xF9, 0x8F, 0xCF, 0xA5, 0xB6, 0x8F, 0x12, 0xA3, + 0x2D, 0x48, 0x2E, 0xC7, 0xEE, 0x86, 0x58, 0xE9, 0x86, 0x91, 0x55, 0x5B, + 0x44, 0xC5, 0x93, 0x11 + } +}; + +static const struct { + EC_CURVE_DATA h; + unsigned char data[0 + 40 * 6]; +} _EC_brainpoolP320t1 = { + { + NID_X9_62_prime_field, 0, 40, 1 + }, + { + /* no seed */ + /* p */ + 0xD3, 0x5E, 0x47, 0x20, 0x36, 0xBC, 0x4F, 0xB7, 0xE1, 0x3C, 0x78, 0x5E, + 0xD2, 0x01, 0xE0, 0x65, 0xF9, 0x8F, 0xCF, 0xA6, 0xF6, 0xF4, 0x0D, 0xEF, + 0x4F, 0x92, 0xB9, 0xEC, 0x78, 0x93, 0xEC, 0x28, 0xFC, 0xD4, 0x12, 0xB1, + 0xF1, 0xB3, 0x2E, 0x27, + /* a */ + 0xD3, 0x5E, 0x47, 0x20, 0x36, 0xBC, 0x4F, 0xB7, 0xE1, 0x3C, 0x78, 0x5E, + 0xD2, 0x01, 0xE0, 0x65, 0xF9, 0x8F, 0xCF, 0xA6, 0xF6, 0xF4, 0x0D, 0xEF, + 0x4F, 0x92, 0xB9, 0xEC, 0x78, 0x93, 0xEC, 0x28, 0xFC, 0xD4, 0x12, 0xB1, + 0xF1, 0xB3, 0x2E, 0x24, + /* b */ + 0xA7, 0xF5, 0x61, 0xE0, 0x38, 0xEB, 0x1E, 0xD5, 0x60, 0xB3, 0xD1, 0x47, + 0xDB, 0x78, 0x20, 0x13, 0x06, 0x4C, 0x19, 0xF2, 0x7E, 0xD2, 0x7C, 0x67, + 0x80, 0xAA, 0xF7, 0x7F, 0xB8, 0xA5, 0x47, 0xCE, 0xB5, 0xB4, 0xFE, 0xF4, + 0x22, 0x34, 0x03, 0x53, + /* x */ + 0x92, 0x5B, 0xE9, 0xFB, 0x01, 0xAF, 0xC6, 0xFB, 0x4D, 0x3E, 0x7D, 0x49, + 0x90, 0x01, 0x0F, 0x81, 0x34, 0x08, 0xAB, 0x10, 0x6C, 0x4F, 0x09, 0xCB, + 0x7E, 0xE0, 0x78, 0x68, 0xCC, 0x13, 0x6F, 0xFF, 0x33, 0x57, 0xF6, 0x24, + 0xA2, 0x1B, 0xED, 0x52, + /* y */ + 0x63, 0xBA, 0x3A, 0x7A, 0x27, 0x48, 0x3E, 0xBF, 0x66, 0x71, 0xDB, 0xEF, + 0x7A, 0xBB, 0x30, 0xEB, 0xEE, 0x08, 0x4E, 0x58, 0xA0, 0xB0, 0x77, 0xAD, + 0x42, 0xA5, 0xA0, 0x98, 0x9D, 0x1E, 0xE7, 0x1B, 0x1B, 0x9B, 0xC0, 0x45, + 0x5F, 0xB0, 0xD2, 0xC3, + /* order */ + 0xD3, 0x5E, 0x47, 0x20, 0x36, 0xBC, 0x4F, 0xB7, 0xE1, 0x3C, 0x78, 0x5E, + 0xD2, 0x01, 0xE0, 0x65, 0xF9, 0x8F, 0xCF, 0xA5, 0xB6, 0x8F, 0x12, 0xA3, + 0x2D, 0x48, 0x2E, 0xC7, 0xEE, 0x86, 0x58, 0xE9, 0x86, 0x91, 0x55, 0x5B, + 0x44, 0xC5, 0x93, 0x11 + } +}; + +static const struct { + EC_CURVE_DATA h; + unsigned char data[0 + 48 * 6]; +} _EC_brainpoolP384r1 = { + { + NID_X9_62_prime_field, 0, 48, 1 + }, + { + /* no seed */ + /* p */ + 0x8C, 0xB9, 0x1E, 0x82, 0xA3, 0x38, 0x6D, 0x28, 0x0F, 0x5D, 0x6F, 0x7E, + 0x50, 0xE6, 0x41, 0xDF, 0x15, 0x2F, 0x71, 0x09, 0xED, 0x54, 0x56, 0xB4, + 0x12, 0xB1, 0xDA, 0x19, 0x7F, 0xB7, 0x11, 0x23, 0xAC, 0xD3, 0xA7, 0x29, + 0x90, 0x1D, 0x1A, 0x71, 0x87, 0x47, 0x00, 0x13, 0x31, 0x07, 0xEC, 0x53, + /* a */ + 0x7B, 0xC3, 0x82, 0xC6, 0x3D, 0x8C, 0x15, 0x0C, 0x3C, 0x72, 0x08, 0x0A, + 0xCE, 0x05, 0xAF, 0xA0, 0xC2, 0xBE, 0xA2, 0x8E, 0x4F, 0xB2, 0x27, 0x87, + 0x13, 0x91, 0x65, 0xEF, 0xBA, 0x91, 0xF9, 0x0F, 0x8A, 0xA5, 0x81, 0x4A, + 0x50, 0x3A, 0xD4, 0xEB, 0x04, 0xA8, 0xC7, 0xDD, 0x22, 0xCE, 0x28, 0x26, + /* b */ + 0x04, 0xA8, 0xC7, 0xDD, 0x22, 0xCE, 0x28, 0x26, 0x8B, 0x39, 0xB5, 0x54, + 0x16, 0xF0, 0x44, 0x7C, 0x2F, 0xB7, 0x7D, 0xE1, 0x07, 0xDC, 0xD2, 0xA6, + 0x2E, 0x88, 0x0E, 0xA5, 0x3E, 0xEB, 0x62, 0xD5, 0x7C, 0xB4, 0x39, 0x02, + 0x95, 0xDB, 0xC9, 0x94, 0x3A, 0xB7, 0x86, 0x96, 0xFA, 0x50, 0x4C, 0x11, + /* x */ + 0x1D, 0x1C, 0x64, 0xF0, 0x68, 0xCF, 0x45, 0xFF, 0xA2, 0xA6, 0x3A, 0x81, + 0xB7, 0xC1, 0x3F, 0x6B, 0x88, 0x47, 0xA3, 0xE7, 0x7E, 0xF1, 0x4F, 0xE3, + 0xDB, 0x7F, 0xCA, 0xFE, 0x0C, 0xBD, 0x10, 0xE8, 0xE8, 0x26, 0xE0, 0x34, + 0x36, 0xD6, 0x46, 0xAA, 0xEF, 0x87, 0xB2, 0xE2, 0x47, 0xD4, 0xAF, 0x1E, + /* y */ + 0x8A, 0xBE, 0x1D, 0x75, 0x20, 0xF9, 0xC2, 0xA4, 0x5C, 0xB1, 0xEB, 0x8E, + 0x95, 0xCF, 0xD5, 0x52, 0x62, 0xB7, 0x0B, 0x29, 0xFE, 0xEC, 0x58, 0x64, + 0xE1, 0x9C, 0x05, 0x4F, 0xF9, 0x91, 0x29, 0x28, 0x0E, 0x46, 0x46, 0x21, + 0x77, 0x91, 0x81, 0x11, 0x42, 0x82, 0x03, 0x41, 0x26, 0x3C, 0x53, 0x15, + /* order */ + 0x8C, 0xB9, 0x1E, 0x82, 0xA3, 0x38, 0x6D, 0x28, 0x0F, 0x5D, 0x6F, 0x7E, + 0x50, 0xE6, 0x41, 0xDF, 0x15, 0x2F, 0x71, 0x09, 0xED, 0x54, 0x56, 0xB3, + 0x1F, 0x16, 0x6E, 0x6C, 0xAC, 0x04, 0x25, 0xA7, 0xCF, 0x3A, 0xB6, 0xAF, + 0x6B, 0x7F, 0xC3, 0x10, 0x3B, 0x88, 0x32, 0x02, 0xE9, 0x04, 0x65, 0x65 + } +}; + +static const struct { + EC_CURVE_DATA h; + unsigned char data[0 + 48 * 6]; +} _EC_brainpoolP384t1 = { + { + NID_X9_62_prime_field, 0, 48, 1 + }, + { + /* no seed */ + /* p */ + 0x8C, 0xB9, 0x1E, 0x82, 0xA3, 0x38, 0x6D, 0x28, 0x0F, 0x5D, 0x6F, 0x7E, + 0x50, 0xE6, 0x41, 0xDF, 0x15, 0x2F, 0x71, 0x09, 0xED, 0x54, 0x56, 0xB4, + 0x12, 0xB1, 0xDA, 0x19, 0x7F, 0xB7, 0x11, 0x23, 0xAC, 0xD3, 0xA7, 0x29, + 0x90, 0x1D, 0x1A, 0x71, 0x87, 0x47, 0x00, 0x13, 0x31, 0x07, 0xEC, 0x53, + /* a */ + 0x8C, 0xB9, 0x1E, 0x82, 0xA3, 0x38, 0x6D, 0x28, 0x0F, 0x5D, 0x6F, 0x7E, + 0x50, 0xE6, 0x41, 0xDF, 0x15, 0x2F, 0x71, 0x09, 0xED, 0x54, 0x56, 0xB4, + 0x12, 0xB1, 0xDA, 0x19, 0x7F, 0xB7, 0x11, 0x23, 0xAC, 0xD3, 0xA7, 0x29, + 0x90, 0x1D, 0x1A, 0x71, 0x87, 0x47, 0x00, 0x13, 0x31, 0x07, 0xEC, 0x50, + /* b */ + 0x7F, 0x51, 0x9E, 0xAD, 0xA7, 0xBD, 0xA8, 0x1B, 0xD8, 0x26, 0xDB, 0xA6, + 0x47, 0x91, 0x0F, 0x8C, 0x4B, 0x93, 0x46, 0xED, 0x8C, 0xCD, 0xC6, 0x4E, + 0x4B, 0x1A, 0xBD, 0x11, 0x75, 0x6D, 0xCE, 0x1D, 0x20, 0x74, 0xAA, 0x26, + 0x3B, 0x88, 0x80, 0x5C, 0xED, 0x70, 0x35, 0x5A, 0x33, 0xB4, 0x71, 0xEE, + /* x */ + 0x18, 0xDE, 0x98, 0xB0, 0x2D, 0xB9, 0xA3, 0x06, 0xF2, 0xAF, 0xCD, 0x72, + 0x35, 0xF7, 0x2A, 0x81, 0x9B, 0x80, 0xAB, 0x12, 0xEB, 0xD6, 0x53, 0x17, + 0x24, 0x76, 0xFE, 0xCD, 0x46, 0x2A, 0xAB, 0xFF, 0xC4, 0xFF, 0x19, 0x1B, + 0x94, 0x6A, 0x5F, 0x54, 0xD8, 0xD0, 0xAA, 0x2F, 0x41, 0x88, 0x08, 0xCC, + /* y */ + 0x25, 0xAB, 0x05, 0x69, 0x62, 0xD3, 0x06, 0x51, 0xA1, 0x14, 0xAF, 0xD2, + 0x75, 0x5A, 0xD3, 0x36, 0x74, 0x7F, 0x93, 0x47, 0x5B, 0x7A, 0x1F, 0xCA, + 0x3B, 0x88, 0xF2, 0xB6, 0xA2, 0x08, 0xCC, 0xFE, 0x46, 0x94, 0x08, 0x58, + 0x4D, 0xC2, 0xB2, 0x91, 0x26, 0x75, 0xBF, 0x5B, 0x9E, 0x58, 0x29, 0x28, + /* order */ + 0x8C, 0xB9, 0x1E, 0x82, 0xA3, 0x38, 0x6D, 0x28, 0x0F, 0x5D, 0x6F, 0x7E, + 0x50, 0xE6, 0x41, 0xDF, 0x15, 0x2F, 0x71, 0x09, 0xED, 0x54, 0x56, 0xB3, + 0x1F, 0x16, 0x6E, 0x6C, 0xAC, 0x04, 0x25, 0xA7, 0xCF, 0x3A, 0xB6, 0xAF, + 0x6B, 0x7F, 0xC3, 0x10, 0x3B, 0x88, 0x32, 0x02, 0xE9, 0x04, 0x65, 0x65 + } +}; + +static const struct { + EC_CURVE_DATA h; + unsigned char data[0 + 64 * 6]; +} _EC_brainpoolP512r1 = { + { + NID_X9_62_prime_field, 0, 64, 1 + }, + { + /* no seed */ + /* p */ + 0xAA, 0xDD, 0x9D, 0xB8, 0xDB, 0xE9, 0xC4, 0x8B, 0x3F, 0xD4, 0xE6, 0xAE, + 0x33, 0xC9, 0xFC, 0x07, 0xCB, 0x30, 0x8D, 0xB3, 0xB3, 0xC9, 0xD2, 0x0E, + 0xD6, 0x63, 0x9C, 0xCA, 0x70, 0x33, 0x08, 0x71, 0x7D, 0x4D, 0x9B, 0x00, + 0x9B, 0xC6, 0x68, 0x42, 0xAE, 0xCD, 0xA1, 0x2A, 0xE6, 0xA3, 0x80, 0xE6, + 0x28, 0x81, 0xFF, 0x2F, 0x2D, 0x82, 0xC6, 0x85, 0x28, 0xAA, 0x60, 0x56, + 0x58, 0x3A, 0x48, 0xF3, + /* a */ + 0x78, 0x30, 0xA3, 0x31, 0x8B, 0x60, 0x3B, 0x89, 0xE2, 0x32, 0x71, 0x45, + 0xAC, 0x23, 0x4C, 0xC5, 0x94, 0xCB, 0xDD, 0x8D, 0x3D, 0xF9, 0x16, 0x10, + 0xA8, 0x34, 0x41, 0xCA, 0xEA, 0x98, 0x63, 0xBC, 0x2D, 0xED, 0x5D, 0x5A, + 0xA8, 0x25, 0x3A, 0xA1, 0x0A, 0x2E, 0xF1, 0xC9, 0x8B, 0x9A, 0xC8, 0xB5, + 0x7F, 0x11, 0x17, 0xA7, 0x2B, 0xF2, 0xC7, 0xB9, 0xE7, 0xC1, 0xAC, 0x4D, + 0x77, 0xFC, 0x94, 0xCA, + /* b */ + 0x3D, 0xF9, 0x16, 0x10, 0xA8, 0x34, 0x41, 0xCA, 0xEA, 0x98, 0x63, 0xBC, + 0x2D, 0xED, 0x5D, 0x5A, 0xA8, 0x25, 0x3A, 0xA1, 0x0A, 0x2E, 0xF1, 0xC9, + 0x8B, 0x9A, 0xC8, 0xB5, 0x7F, 0x11, 0x17, 0xA7, 0x2B, 0xF2, 0xC7, 0xB9, + 0xE7, 0xC1, 0xAC, 0x4D, 0x77, 0xFC, 0x94, 0xCA, 0xDC, 0x08, 0x3E, 0x67, + 0x98, 0x40, 0x50, 0xB7, 0x5E, 0xBA, 0xE5, 0xDD, 0x28, 0x09, 0xBD, 0x63, + 0x80, 0x16, 0xF7, 0x23, + /* x */ + 0x81, 0xAE, 0xE4, 0xBD, 0xD8, 0x2E, 0xD9, 0x64, 0x5A, 0x21, 0x32, 0x2E, + 0x9C, 0x4C, 0x6A, 0x93, 0x85, 0xED, 0x9F, 0x70, 0xB5, 0xD9, 0x16, 0xC1, + 0xB4, 0x3B, 0x62, 0xEE, 0xF4, 0xD0, 0x09, 0x8E, 0xFF, 0x3B, 0x1F, 0x78, + 0xE2, 0xD0, 0xD4, 0x8D, 0x50, 0xD1, 0x68, 0x7B, 0x93, 0xB9, 0x7D, 0x5F, + 0x7C, 0x6D, 0x50, 0x47, 0x40, 0x6A, 0x5E, 0x68, 0x8B, 0x35, 0x22, 0x09, + 0xBC, 0xB9, 0xF8, 0x22, + /* y */ + 0x7D, 0xDE, 0x38, 0x5D, 0x56, 0x63, 0x32, 0xEC, 0xC0, 0xEA, 0xBF, 0xA9, + 0xCF, 0x78, 0x22, 0xFD, 0xF2, 0x09, 0xF7, 0x00, 0x24, 0xA5, 0x7B, 0x1A, + 0xA0, 0x00, 0xC5, 0x5B, 0x88, 0x1F, 0x81, 0x11, 0xB2, 0xDC, 0xDE, 0x49, + 0x4A, 0x5F, 0x48, 0x5E, 0x5B, 0xCA, 0x4B, 0xD8, 0x8A, 0x27, 0x63, 0xAE, + 0xD1, 0xCA, 0x2B, 0x2F, 0xA8, 0xF0, 0x54, 0x06, 0x78, 0xCD, 0x1E, 0x0F, + 0x3A, 0xD8, 0x08, 0x92, + /* order */ + 0xAA, 0xDD, 0x9D, 0xB8, 0xDB, 0xE9, 0xC4, 0x8B, 0x3F, 0xD4, 0xE6, 0xAE, + 0x33, 0xC9, 0xFC, 0x07, 0xCB, 0x30, 0x8D, 0xB3, 0xB3, 0xC9, 0xD2, 0x0E, + 0xD6, 0x63, 0x9C, 0xCA, 0x70, 0x33, 0x08, 0x70, 0x55, 0x3E, 0x5C, 0x41, + 0x4C, 0xA9, 0x26, 0x19, 0x41, 0x86, 0x61, 0x19, 0x7F, 0xAC, 0x10, 0x47, + 0x1D, 0xB1, 0xD3, 0x81, 0x08, 0x5D, 0xDA, 0xDD, 0xB5, 0x87, 0x96, 0x82, + 0x9C, 0xA9, 0x00, 0x69 + } +}; + +static const struct { + EC_CURVE_DATA h; + unsigned char data[0 + 64 * 6]; +} _EC_brainpoolP512t1 = { + { + NID_X9_62_prime_field, 0, 64, 1 + }, + { + /* no seed */ + /* p */ + 0xAA, 0xDD, 0x9D, 0xB8, 0xDB, 0xE9, 0xC4, 0x8B, 0x3F, 0xD4, 0xE6, 0xAE, + 0x33, 0xC9, 0xFC, 0x07, 0xCB, 0x30, 0x8D, 0xB3, 0xB3, 0xC9, 0xD2, 0x0E, + 0xD6, 0x63, 0x9C, 0xCA, 0x70, 0x33, 0x08, 0x71, 0x7D, 0x4D, 0x9B, 0x00, + 0x9B, 0xC6, 0x68, 0x42, 0xAE, 0xCD, 0xA1, 0x2A, 0xE6, 0xA3, 0x80, 0xE6, + 0x28, 0x81, 0xFF, 0x2F, 0x2D, 0x82, 0xC6, 0x85, 0x28, 0xAA, 0x60, 0x56, + 0x58, 0x3A, 0x48, 0xF3, + /* a */ + 0xAA, 0xDD, 0x9D, 0xB8, 0xDB, 0xE9, 0xC4, 0x8B, 0x3F, 0xD4, 0xE6, 0xAE, + 0x33, 0xC9, 0xFC, 0x07, 0xCB, 0x30, 0x8D, 0xB3, 0xB3, 0xC9, 0xD2, 0x0E, + 0xD6, 0x63, 0x9C, 0xCA, 0x70, 0x33, 0x08, 0x71, 0x7D, 0x4D, 0x9B, 0x00, + 0x9B, 0xC6, 0x68, 0x42, 0xAE, 0xCD, 0xA1, 0x2A, 0xE6, 0xA3, 0x80, 0xE6, + 0x28, 0x81, 0xFF, 0x2F, 0x2D, 0x82, 0xC6, 0x85, 0x28, 0xAA, 0x60, 0x56, + 0x58, 0x3A, 0x48, 0xF0, + /* b */ + 0x7C, 0xBB, 0xBC, 0xF9, 0x44, 0x1C, 0xFA, 0xB7, 0x6E, 0x18, 0x90, 0xE4, + 0x68, 0x84, 0xEA, 0xE3, 0x21, 0xF7, 0x0C, 0x0B, 0xCB, 0x49, 0x81, 0x52, + 0x78, 0x97, 0x50, 0x4B, 0xEC, 0x3E, 0x36, 0xA6, 0x2B, 0xCD, 0xFA, 0x23, + 0x04, 0x97, 0x65, 0x40, 0xF6, 0x45, 0x00, 0x85, 0xF2, 0xDA, 0xE1, 0x45, + 0xC2, 0x25, 0x53, 0xB4, 0x65, 0x76, 0x36, 0x89, 0x18, 0x0E, 0xA2, 0x57, + 0x18, 0x67, 0x42, 0x3E, + /* x */ + 0x64, 0x0E, 0xCE, 0x5C, 0x12, 0x78, 0x87, 0x17, 0xB9, 0xC1, 0xBA, 0x06, + 0xCB, 0xC2, 0xA6, 0xFE, 0xBA, 0x85, 0x84, 0x24, 0x58, 0xC5, 0x6D, 0xDE, + 0x9D, 0xB1, 0x75, 0x8D, 0x39, 0xC0, 0x31, 0x3D, 0x82, 0xBA, 0x51, 0x73, + 0x5C, 0xDB, 0x3E, 0xA4, 0x99, 0xAA, 0x77, 0xA7, 0xD6, 0x94, 0x3A, 0x64, + 0xF7, 0xA3, 0xF2, 0x5F, 0xE2, 0x6F, 0x06, 0xB5, 0x1B, 0xAA, 0x26, 0x96, + 0xFA, 0x90, 0x35, 0xDA, + /* y */ + 0x5B, 0x53, 0x4B, 0xD5, 0x95, 0xF5, 0xAF, 0x0F, 0xA2, 0xC8, 0x92, 0x37, + 0x6C, 0x84, 0xAC, 0xE1, 0xBB, 0x4E, 0x30, 0x19, 0xB7, 0x16, 0x34, 0xC0, + 0x11, 0x31, 0x15, 0x9C, 0xAE, 0x03, 0xCE, 0xE9, 0xD9, 0x93, 0x21, 0x84, + 0xBE, 0xEF, 0x21, 0x6B, 0xD7, 0x1D, 0xF2, 0xDA, 0xDF, 0x86, 0xA6, 0x27, + 0x30, 0x6E, 0xCF, 0xF9, 0x6D, 0xBB, 0x8B, 0xAC, 0xE1, 0x98, 0xB6, 0x1E, + 0x00, 0xF8, 0xB3, 0x32, + /* order */ + 0xAA, 0xDD, 0x9D, 0xB8, 0xDB, 0xE9, 0xC4, 0x8B, 0x3F, 0xD4, 0xE6, 0xAE, + 0x33, 0xC9, 0xFC, 0x07, 0xCB, 0x30, 0x8D, 0xB3, 0xB3, 0xC9, 0xD2, 0x0E, + 0xD6, 0x63, 0x9C, 0xCA, 0x70, 0x33, 0x08, 0x70, 0x55, 0x3E, 0x5C, 0x41, + 0x4C, 0xA9, 0x26, 0x19, 0x41, 0x86, 0x61, 0x19, 0x7F, 0xAC, 0x10, 0x47, + 0x1D, 0xB1, 0xD3, 0x81, 0x08, 0x5D, 0xDA, 0xDD, 0xB5, 0x87, 0x96, 0x82, + 0x9C, 0xA9, 0x00, 0x69 + } +}; + typedef struct _ec_list_element_st { int nid; const EC_CURVE_DATA *data; @@ -2343,13 +2896,15 @@ static const ec_list_element curve_list[] = { "X9.62 curve over a 239 bit prime field"}, {NID_X9_62_prime239v3, &_EC_X9_62_PRIME_239V3.h, 0, "X9.62 curve over a 239 bit prime field"}, -#ifndef OPENSSL_NO_EC_NISTP_64_GCC_128 - {NID_X9_62_prime256v1, &_EC_X9_62_PRIME_256V1.h, EC_GFp_nistp256_method, - "X9.62/SECG curve over a 256 bit prime field"}, + {NID_X9_62_prime256v1, &_EC_X9_62_PRIME_256V1.h, +#if defined(ECP_NISTZ256_ASM) + EC_GFp_nistz256_method, +#elif !defined(OPENSSL_NO_EC_NISTP_64_GCC_128) + EC_GFp_nistp256_method, #else - {NID_X9_62_prime256v1, &_EC_X9_62_PRIME_256V1.h, 0, - "X9.62/SECG curve over a 256 bit prime field"}, + 0, #endif + "X9.62/SECG curve over a 256 bit prime field"}, #ifndef OPENSSL_NO_EC2M /* characteristic two field curves */ /* NIST/SECG curves */ @@ -2460,6 +3015,35 @@ static const ec_list_element curve_list[] = { "\n\tIPSec/IKE/Oakley curve #4 over a 185 bit binary field.\n" "\tNot suitable for ECDSA.\n\tQuestionable extension field!"}, #endif + /* brainpool curves */ + {NID_brainpoolP160r1, &_EC_brainpoolP160r1.h, 0, + "RFC 5639 curve over a 160 bit prime field"}, + {NID_brainpoolP160t1, &_EC_brainpoolP160t1.h, 0, + "RFC 5639 curve over a 160 bit prime field"}, + {NID_brainpoolP192r1, &_EC_brainpoolP192r1.h, 0, + "RFC 5639 curve over a 192 bit prime field"}, + {NID_brainpoolP192t1, &_EC_brainpoolP192t1.h, 0, + "RFC 5639 curve over a 192 bit prime field"}, + {NID_brainpoolP224r1, &_EC_brainpoolP224r1.h, 0, + "RFC 5639 curve over a 224 bit prime field"}, + {NID_brainpoolP224t1, &_EC_brainpoolP224t1.h, 0, + "RFC 5639 curve over a 224 bit prime field"}, + {NID_brainpoolP256r1, &_EC_brainpoolP256r1.h, 0, + "RFC 5639 curve over a 256 bit prime field"}, + {NID_brainpoolP256t1, &_EC_brainpoolP256t1.h, 0, + "RFC 5639 curve over a 256 bit prime field"}, + {NID_brainpoolP320r1, &_EC_brainpoolP320r1.h, 0, + "RFC 5639 curve over a 320 bit prime field"}, + {NID_brainpoolP320t1, &_EC_brainpoolP320t1.h, 0, + "RFC 5639 curve over a 320 bit prime field"}, + {NID_brainpoolP384r1, &_EC_brainpoolP384r1.h, 0, + "RFC 5639 curve over a 384 bit prime field"}, + {NID_brainpoolP384t1, &_EC_brainpoolP384t1.h, 0, + "RFC 5639 curve over a 384 bit prime field"}, + {NID_brainpoolP512r1, &_EC_brainpoolP512r1.h, 0, + "RFC 5639 curve over a 512 bit prime field"}, + {NID_brainpoolP512t1, &_EC_brainpoolP512t1.h, 0, + "RFC 5639 curve over a 512 bit prime field"}, }; #define curve_list_length (sizeof(curve_list)/sizeof(ec_list_element)) @@ -2578,6 +3162,10 @@ EC_GROUP *EC_GROUP_new_by_curve_name(int nid) size_t i; EC_GROUP *ret = NULL; +#ifdef OPENSSL_FIPS + if (FIPS_mode()) + return FIPS_ec_group_new_by_curve_name(nid); +#endif if (nid <= 0) return NULL; @@ -2613,3 +3201,48 @@ size_t EC_get_builtin_curves(EC_builtin_curve *r, size_t nitems) return curve_list_length; } + +/* Functions to translate between common NIST curve names and NIDs */ + +typedef struct { + const char *name; /* NIST Name of curve */ + int nid; /* Curve NID */ +} EC_NIST_NAME; + +static EC_NIST_NAME nist_curves[] = { + {"B-163", NID_sect163r2}, + {"B-233", NID_sect233r1}, + {"B-283", NID_sect283r1}, + {"B-409", NID_sect409r1}, + {"B-571", NID_sect571r1}, + {"K-163", NID_sect163k1}, + {"K-233", NID_sect233k1}, + {"K-283", NID_sect283k1}, + {"K-409", NID_sect409k1}, + {"K-571", NID_sect571k1}, + {"P-192", NID_X9_62_prime192v1}, + {"P-224", NID_secp224r1}, + {"P-256", NID_X9_62_prime256v1}, + {"P-384", NID_secp384r1}, + {"P-521", NID_secp521r1} +}; + +const char *EC_curve_nid2nist(int nid) +{ + size_t i; + for (i = 0; i < sizeof(nist_curves) / sizeof(EC_NIST_NAME); i++) { + if (nist_curves[i].nid == nid) + return nist_curves[i].name; + } + return NULL; +} + +int EC_curve_nist2nid(const char *name) +{ + size_t i; + for (i = 0; i < sizeof(nist_curves) / sizeof(EC_NIST_NAME); i++) { + if (!strcmp(nist_curves[i].name, name)) + return nist_curves[i].nid; + } + return NID_undef; +} diff --git a/deps/openssl/openssl/crypto/ec/ec_cvt.c b/deps/openssl/openssl/crypto/ec/ec_cvt.c index 487d727407e042..5a832ba1cfa4a1 100644 --- a/deps/openssl/openssl/crypto/ec/ec_cvt.c +++ b/deps/openssl/openssl/crypto/ec/ec_cvt.c @@ -72,12 +72,20 @@ #include #include "ec_lcl.h" +#ifdef OPENSSL_FIPS +# include +#endif + EC_GROUP *EC_GROUP_new_curve_GFp(const BIGNUM *p, const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx) { const EC_METHOD *meth; EC_GROUP *ret; +#ifdef OPENSSL_FIPS + if (FIPS_mode()) + return FIPS_ec_group_new_curve_gfp(p, a, b, ctx); +#endif #if defined(OPENSSL_BN_ASM_MONT) /* * This might appear controversial, but the fact is that generic @@ -152,6 +160,10 @@ EC_GROUP *EC_GROUP_new_curve_GF2m(const BIGNUM *p, const BIGNUM *a, const EC_METHOD *meth; EC_GROUP *ret; +# ifdef OPENSSL_FIPS + if (FIPS_mode()) + return FIPS_ec_group_new_curve_gf2m(p, a, b, ctx); +# endif meth = EC_GF2m_simple_method(); ret = EC_GROUP_new(meth); diff --git a/deps/openssl/openssl/crypto/ec/ec_err.c b/deps/openssl/openssl/crypto/ec/ec_err.c index 58eae7c6668e3f..13b32c78ac7444 100644 --- a/deps/openssl/openssl/crypto/ec/ec_err.c +++ b/deps/openssl/openssl/crypto/ec/ec_err.c @@ -1,6 +1,6 @@ /* crypto/ec/ec_err.c */ /* ==================================================================== - * Copyright (c) 1999-2011 The OpenSSL Project. All rights reserved. + * Copyright (c) 1999-2014 The OpenSSL Project. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -76,6 +76,8 @@ static ERR_STRING_DATA EC_str_functs[] = { {ERR_FUNC(EC_F_D2I_ECPKPARAMETERS), "d2i_ECPKParameters"}, {ERR_FUNC(EC_F_D2I_ECPRIVATEKEY), "d2i_ECPrivateKey"}, {ERR_FUNC(EC_F_DO_EC_KEY_PRINT), "DO_EC_KEY_PRINT"}, + {ERR_FUNC(EC_F_ECDH_CMS_DECRYPT), "ECDH_CMS_DECRYPT"}, + {ERR_FUNC(EC_F_ECDH_CMS_SET_SHARED_INFO), "ECDH_CMS_SET_SHARED_INFO"}, {ERR_FUNC(EC_F_ECKEY_PARAM2TYPE), "ECKEY_PARAM2TYPE"}, {ERR_FUNC(EC_F_ECKEY_PARAM_DECODE), "ECKEY_PARAM_DECODE"}, {ERR_FUNC(EC_F_ECKEY_PRIV_DECODE), "ECKEY_PRIV_DECODE"}, @@ -237,6 +239,12 @@ static ERR_STRING_DATA EC_str_functs[] = { {ERR_FUNC(EC_F_NISTP224_PRE_COMP_NEW), "NISTP224_PRE_COMP_NEW"}, {ERR_FUNC(EC_F_NISTP256_PRE_COMP_NEW), "NISTP256_PRE_COMP_NEW"}, {ERR_FUNC(EC_F_NISTP521_PRE_COMP_NEW), "NISTP521_PRE_COMP_NEW"}, + {ERR_FUNC(EC_F_ECP_NISTZ256_GET_AFFINE), "ecp_nistz256_get_affine"}, + {ERR_FUNC(EC_F_ECP_NISTZ256_POINTS_MUL), "ecp_nistz256_points_mul"}, + {ERR_FUNC(EC_F_ECP_NISTZ256_WINDOWED_MUL), "ecp_nistz256_windowed_mul"}, + {ERR_FUNC(EC_F_ECP_NISTZ256_MULT_PRECOMPUTE), + "ecp_nistz256_mult_precompute"}, + {ERR_FUNC(EC_F_ECP_NISTZ256_PRE_COMP_NEW), "ecp_nistz256_pre_comp_new"}, {ERR_FUNC(EC_F_O2I_ECPUBLICKEY), "o2i_ECPublicKey"}, {ERR_FUNC(EC_F_OLD_EC_PRIV_DECODE), "OLD_EC_PRIV_DECODE"}, {ERR_FUNC(EC_F_PKEY_EC_CTRL), "PKEY_EC_CTRL"}, @@ -271,6 +279,7 @@ static ERR_STRING_DATA EC_str_reasons[] = { {ERR_REASON(EC_R_INVALID_COMPRESSED_POINT), "invalid compressed point"}, {ERR_REASON(EC_R_INVALID_COMPRESSION_BIT), "invalid compression bit"}, {ERR_REASON(EC_R_INVALID_CURVE), "invalid curve"}, + {ERR_REASON(EC_R_INVALID_DIGEST), "invalid digest"}, {ERR_REASON(EC_R_INVALID_DIGEST_TYPE), "invalid digest type"}, {ERR_REASON(EC_R_INVALID_ENCODING), "invalid encoding"}, {ERR_REASON(EC_R_INVALID_FIELD), "invalid field"}, @@ -279,6 +288,7 @@ static ERR_STRING_DATA EC_str_reasons[] = { {ERR_REASON(EC_R_INVALID_PENTANOMIAL_BASIS), "invalid pentanomial basis"}, {ERR_REASON(EC_R_INVALID_PRIVATE_KEY), "invalid private key"}, {ERR_REASON(EC_R_INVALID_TRINOMIAL_BASIS), "invalid trinomial basis"}, + {ERR_REASON(EC_R_KDF_PARAMETER_ERROR), "kdf parameter error"}, {ERR_REASON(EC_R_KEYS_NOT_SET), "keys not set"}, {ERR_REASON(EC_R_MISSING_PARAMETERS), "missing parameters"}, {ERR_REASON(EC_R_MISSING_PRIVATE_KEY), "missing private key"}, @@ -290,10 +300,12 @@ static ERR_STRING_DATA EC_str_reasons[] = { {ERR_REASON(EC_R_NO_FIELD_MOD), "no field mod"}, {ERR_REASON(EC_R_NO_PARAMETERS_SET), "no parameters set"}, {ERR_REASON(EC_R_PASSED_NULL_PARAMETER), "passed null parameter"}, + {ERR_REASON(EC_R_PEER_KEY_ERROR), "peer key error"}, {ERR_REASON(EC_R_PKPARAMETERS2GROUP_FAILURE), "pkparameters2group failure"}, {ERR_REASON(EC_R_POINT_AT_INFINITY), "point at infinity"}, {ERR_REASON(EC_R_POINT_IS_NOT_ON_CURVE), "point is not on curve"}, + {ERR_REASON(EC_R_SHARED_INFO_ERROR), "shared info error"}, {ERR_REASON(EC_R_SLOT_FULL), "slot full"}, {ERR_REASON(EC_R_UNDEFINED_GENERATOR), "undefined generator"}, {ERR_REASON(EC_R_UNDEFINED_ORDER), "undefined order"}, diff --git a/deps/openssl/openssl/crypto/ec/ec_lcl.h b/deps/openssl/openssl/crypto/ec/ec_lcl.h index 319e651f67ffdf..697eeb528ca944 100644 --- a/deps/openssl/openssl/crypto/ec/ec_lcl.h +++ b/deps/openssl/openssl/crypto/ec/ec_lcl.h @@ -212,6 +212,13 @@ struct ec_group_st { BIGNUM order, cofactor; int curve_name; /* optional NID for named curve */ int asn1_flag; /* flag to control the asn1 encoding */ + /* + * Kludge: upper bit of ans1_flag is used to denote structure + * version. Is set, then last field is present. This is done + * for interoperation with FIPS code. + */ +#define EC_GROUP_ASN1_FLAG_MASK 0x7fffffff +#define EC_GROUP_VERSION(p) (p->asn1_flag&~EC_GROUP_ASN1_FLAG_MASK) point_conversion_form_t asn1_form; unsigned char *seed; /* optional seed for parameters (appears in * ASN1) */ @@ -252,6 +259,7 @@ struct ec_group_st { /* method-specific */ int (*field_mod_func) (BIGNUM *, const BIGNUM *, const BIGNUM *, BN_CTX *); + BN_MONT_CTX *mont_data; /* data for ECDSA inverse */ } /* EC_GROUP */ ; struct ec_key_st { @@ -549,3 +557,20 @@ void ec_GFp_nistp_points_make_affine_internal(size_t num, void *point_array, void ec_GFp_nistp_recode_scalar_bits(unsigned char *sign, unsigned char *digit, unsigned char in); #endif +int ec_precompute_mont_data(EC_GROUP *); + +#ifdef ECP_NISTZ256_ASM +/** Returns GFp methods using montgomery multiplication, with x86-64 optimized + * P256. See http://eprint.iacr.org/2013/816. + * \return EC_METHOD object + */ +const EC_METHOD *EC_GFp_nistz256_method(void); +#endif + +#ifdef OPENSSL_FIPS +EC_GROUP *FIPS_ec_group_new_curve_gfp(const BIGNUM *p, const BIGNUM *a, + const BIGNUM *b, BN_CTX *ctx); +EC_GROUP *FIPS_ec_group_new_curve_gf2m(const BIGNUM *p, const BIGNUM *a, + const BIGNUM *b, BN_CTX *ctx); +EC_GROUP *FIPS_ec_group_new_by_curve_name(int nid); +#endif diff --git a/deps/openssl/openssl/crypto/ec/ec_lib.c b/deps/openssl/openssl/crypto/ec/ec_lib.c index 9a54f41e4e15c9..6ffd9fc16583aa 100644 --- a/deps/openssl/openssl/crypto/ec/ec_lib.c +++ b/deps/openssl/openssl/crypto/ec/ec_lib.c @@ -94,13 +94,14 @@ EC_GROUP *EC_GROUP_new(const EC_METHOD *meth) ret->meth = meth; ret->extra_data = NULL; + ret->mont_data = NULL; ret->generator = NULL; BN_init(&ret->order); BN_init(&ret->cofactor); ret->curve_name = 0; - ret->asn1_flag = 0; + ret->asn1_flag = ~EC_GROUP_ASN1_FLAG_MASK; ret->asn1_form = POINT_CONVERSION_UNCOMPRESSED; ret->seed = NULL; @@ -124,6 +125,9 @@ void EC_GROUP_free(EC_GROUP *group) EC_EX_DATA_free_all_data(&group->extra_data); + if (EC_GROUP_VERSION(group) && group->mont_data) + BN_MONT_CTX_free(group->mont_data); + if (group->generator != NULL) EC_POINT_free(group->generator); BN_free(&group->order); @@ -147,6 +151,9 @@ void EC_GROUP_clear_free(EC_GROUP *group) EC_EX_DATA_clear_free_all_data(&group->extra_data); + if (EC_GROUP_VERSION(group) && group->mont_data) + BN_MONT_CTX_free(group->mont_data); + if (group->generator != NULL) EC_POINT_clear_free(group->generator); BN_clear_free(&group->order); @@ -189,6 +196,22 @@ int EC_GROUP_copy(EC_GROUP *dest, const EC_GROUP *src) return 0; } + if (EC_GROUP_VERSION(src) && src->mont_data != NULL) { + if (dest->mont_data == NULL) { + dest->mont_data = BN_MONT_CTX_new(); + if (dest->mont_data == NULL) + return 0; + } + if (!BN_MONT_CTX_copy(dest->mont_data, src->mont_data)) + return 0; + } else { + /* src->generator == NULL */ + if (EC_GROUP_VERSION(dest) && dest->mont_data != NULL) { + BN_MONT_CTX_free(dest->mont_data); + dest->mont_data = NULL; + } + } + if (src->generator != NULL) { if (dest->generator == NULL) { dest->generator = EC_POINT_new(dest); @@ -295,6 +318,13 @@ int EC_GROUP_set_generator(EC_GROUP *group, const EC_POINT *generator, } else BN_zero(&group->cofactor); + /* + * We ignore the return value because some groups have an order with + * factors of two, which makes the Montgomery setup fail. + * |group->mont_data| will be NULL in this case. + */ + ec_precompute_mont_data(group); + return 1; } @@ -303,6 +333,11 @@ const EC_POINT *EC_GROUP_get0_generator(const EC_GROUP *group) return group->generator; } +BN_MONT_CTX *EC_GROUP_get_mont_data(const EC_GROUP *group) +{ + return EC_GROUP_VERSION(group) ? group->mont_data : NULL; +} + int EC_GROUP_get_order(const EC_GROUP *group, BIGNUM *order, BN_CTX *ctx) { if (!BN_copy(order, &group->order)) @@ -332,12 +367,13 @@ int EC_GROUP_get_curve_name(const EC_GROUP *group) void EC_GROUP_set_asn1_flag(EC_GROUP *group, int flag) { - group->asn1_flag = flag; + group->asn1_flag &= ~EC_GROUP_ASN1_FLAG_MASK; + group->asn1_flag |= flag & EC_GROUP_ASN1_FLAG_MASK; } int EC_GROUP_get_asn1_flag(const EC_GROUP *group) { - return group->asn1_flag; + return group->asn1_flag & EC_GROUP_ASN1_FLAG_MASK; } void EC_GROUP_set_point_conversion_form(EC_GROUP *group, @@ -1050,3 +1086,42 @@ int EC_GROUP_have_precompute_mult(const EC_GROUP *group) return 0; /* cannot tell whether precomputation has * been performed */ } + +/* + * ec_precompute_mont_data sets |group->mont_data| from |group->order| and + * returns one on success. On error it returns zero. + */ +int ec_precompute_mont_data(EC_GROUP *group) +{ + BN_CTX *ctx = BN_CTX_new(); + int ret = 0; + + if (!EC_GROUP_VERSION(group)) + goto err; + + if (group->mont_data) { + BN_MONT_CTX_free(group->mont_data); + group->mont_data = NULL; + } + + if (ctx == NULL) + goto err; + + group->mont_data = BN_MONT_CTX_new(); + if (!group->mont_data) + goto err; + + if (!BN_MONT_CTX_set(group->mont_data, &group->order, ctx)) { + BN_MONT_CTX_free(group->mont_data); + group->mont_data = NULL; + goto err; + } + + ret = 1; + + err: + + if (ctx) + BN_CTX_free(ctx); + return ret; +} diff --git a/deps/openssl/openssl/crypto/ec/ec_pmeth.c b/deps/openssl/openssl/crypto/ec/ec_pmeth.c index c189d3ffbf2a12..b76749010c9304 100644 --- a/deps/openssl/openssl/crypto/ec/ec_pmeth.c +++ b/deps/openssl/openssl/crypto/ec/ec_pmeth.c @@ -61,6 +61,7 @@ #include #include #include +#include "ec_lcl.h" #include #include #include "evp_locl.h" @@ -72,6 +73,19 @@ typedef struct { EC_GROUP *gen_group; /* message digest */ const EVP_MD *md; + /* Duplicate key if custom cofactor needed */ + EC_KEY *co_key; + /* Cofactor mode */ + signed char cofactor_mode; + /* KDF (if any) to use for ECDH */ + char kdf_type; + /* Message digest to use for key derivation */ + const EVP_MD *kdf_md; + /* User key material */ + unsigned char *kdf_ukm; + size_t kdf_ukmlen; + /* KDF output length */ + size_t kdf_outlen; } EC_PKEY_CTX; static int pkey_ec_init(EVP_PKEY_CTX *ctx) @@ -83,6 +97,14 @@ static int pkey_ec_init(EVP_PKEY_CTX *ctx) dctx->gen_group = NULL; dctx->md = NULL; + dctx->cofactor_mode = -1; + dctx->co_key = NULL; + dctx->kdf_type = EVP_PKEY_ECDH_KDF_NONE; + dctx->kdf_md = NULL; + dctx->kdf_outlen = 0; + dctx->kdf_ukm = NULL; + dctx->kdf_ukmlen = 0; + ctx->data = dctx; return 1; @@ -101,6 +123,22 @@ static int pkey_ec_copy(EVP_PKEY_CTX *dst, EVP_PKEY_CTX *src) return 0; } dctx->md = sctx->md; + + if (sctx->co_key) { + dctx->co_key = EC_KEY_dup(sctx->co_key); + if (!dctx->co_key) + return 0; + } + dctx->kdf_type = sctx->kdf_type; + dctx->kdf_md = sctx->kdf_md; + dctx->kdf_outlen = sctx->kdf_outlen; + if (sctx->kdf_ukm) { + dctx->kdf_ukm = BUF_memdup(sctx->kdf_ukm, sctx->kdf_ukmlen); + if (!dctx->kdf_ukm) + return 0; + } else + dctx->kdf_ukm = NULL; + dctx->kdf_ukmlen = sctx->kdf_ukmlen; return 1; } @@ -110,6 +148,10 @@ static void pkey_ec_cleanup(EVP_PKEY_CTX *ctx) if (dctx) { if (dctx->gen_group) EC_GROUP_free(dctx->gen_group); + if (dctx->co_key) + EC_KEY_free(dctx->co_key); + if (dctx->kdf_ukm) + OPENSSL_free(dctx->kdf_ukm); OPENSSL_free(dctx); } } @@ -168,18 +210,21 @@ static int pkey_ec_derive(EVP_PKEY_CTX *ctx, unsigned char *key, int ret; size_t outlen; const EC_POINT *pubkey = NULL; + EC_KEY *eckey; + EC_PKEY_CTX *dctx = ctx->data; if (!ctx->pkey || !ctx->peerkey) { ECerr(EC_F_PKEY_EC_DERIVE, EC_R_KEYS_NOT_SET); return 0; } + eckey = dctx->co_key ? dctx->co_key : ctx->pkey->pkey.ec; + if (!key) { const EC_GROUP *group; - group = EC_KEY_get0_group(ctx->pkey->pkey.ec); + group = EC_KEY_get0_group(eckey); *keylen = (EC_GROUP_get_degree(group) + 7) / 8; return 1; } - pubkey = EC_KEY_get0_public_key(ctx->peerkey->pkey.ec); /* @@ -189,12 +234,48 @@ static int pkey_ec_derive(EVP_PKEY_CTX *ctx, unsigned char *key, outlen = *keylen; - ret = ECDH_compute_key(key, outlen, pubkey, ctx->pkey->pkey.ec, 0); - if (ret < 0) - return ret; + ret = ECDH_compute_key(key, outlen, pubkey, eckey, 0); + if (ret <= 0) + return 0; *keylen = ret; return 1; } + +static int pkey_ec_kdf_derive(EVP_PKEY_CTX *ctx, + unsigned char *key, size_t *keylen) +{ + EC_PKEY_CTX *dctx = ctx->data; + unsigned char *ktmp = NULL; + size_t ktmplen; + int rv = 0; + if (dctx->kdf_type == EVP_PKEY_ECDH_KDF_NONE) + return pkey_ec_derive(ctx, key, keylen); + if (!key) { + *keylen = dctx->kdf_outlen; + return 1; + } + if (*keylen != dctx->kdf_outlen) + return 0; + if (!pkey_ec_derive(ctx, NULL, &ktmplen)) + return 0; + ktmp = OPENSSL_malloc(ktmplen); + if (!ktmp) + return 0; + if (!pkey_ec_derive(ctx, ktmp, &ktmplen)) + goto err; + /* Do KDF stuff */ + if (!ECDH_KDF_X9_62(key, *keylen, ktmp, ktmplen, + dctx->kdf_ukm, dctx->kdf_ukmlen, dctx->kdf_md)) + goto err; + rv = 1; + + err: + if (ktmp) { + OPENSSL_cleanse(ktmp, ktmplen); + OPENSSL_free(ktmp); + } + return rv; +} #endif static int pkey_ec_ctrl(EVP_PKEY_CTX *ctx, int type, int p1, void *p2) @@ -213,6 +294,90 @@ static int pkey_ec_ctrl(EVP_PKEY_CTX *ctx, int type, int p1, void *p2) dctx->gen_group = group; return 1; + case EVP_PKEY_CTRL_EC_PARAM_ENC: + if (!dctx->gen_group) { + ECerr(EC_F_PKEY_EC_CTRL, EC_R_NO_PARAMETERS_SET); + return 0; + } + EC_GROUP_set_asn1_flag(dctx->gen_group, p1); + return 1; + +#ifndef OPENSSL_NO_ECDH + case EVP_PKEY_CTRL_EC_ECDH_COFACTOR: + if (p1 == -2) { + if (dctx->cofactor_mode != -1) + return dctx->cofactor_mode; + else { + EC_KEY *ec_key = ctx->pkey->pkey.ec; + return EC_KEY_get_flags(ec_key) & EC_FLAG_COFACTOR_ECDH ? 1 : + 0; + } + } else if (p1 < -1 || p1 > 1) + return -2; + dctx->cofactor_mode = p1; + if (p1 != -1) { + EC_KEY *ec_key = ctx->pkey->pkey.ec; + if (!ec_key->group) + return -2; + /* If cofactor is 1 cofactor mode does nothing */ + if (BN_is_one(&ec_key->group->cofactor)) + return 1; + if (!dctx->co_key) { + dctx->co_key = EC_KEY_dup(ec_key); + if (!dctx->co_key) + return 0; + } + if (p1) + EC_KEY_set_flags(dctx->co_key, EC_FLAG_COFACTOR_ECDH); + else + EC_KEY_clear_flags(dctx->co_key, EC_FLAG_COFACTOR_ECDH); + } else if (dctx->co_key) { + EC_KEY_free(dctx->co_key); + dctx->co_key = NULL; + } + return 1; +#endif + + case EVP_PKEY_CTRL_EC_KDF_TYPE: + if (p1 == -2) + return dctx->kdf_type; + if (p1 != EVP_PKEY_ECDH_KDF_NONE && p1 != EVP_PKEY_ECDH_KDF_X9_62) + return -2; + dctx->kdf_type = p1; + return 1; + + case EVP_PKEY_CTRL_EC_KDF_MD: + dctx->kdf_md = p2; + return 1; + + case EVP_PKEY_CTRL_GET_EC_KDF_MD: + *(const EVP_MD **)p2 = dctx->kdf_md; + return 1; + + case EVP_PKEY_CTRL_EC_KDF_OUTLEN: + if (p1 <= 0) + return -2; + dctx->kdf_outlen = (size_t)p1; + return 1; + + case EVP_PKEY_CTRL_GET_EC_KDF_OUTLEN: + *(int *)p2 = dctx->kdf_outlen; + return 1; + + case EVP_PKEY_CTRL_EC_KDF_UKM: + if (dctx->kdf_ukm) + OPENSSL_free(dctx->kdf_ukm); + dctx->kdf_ukm = p2; + if (p2) + dctx->kdf_ukmlen = p1; + else + dctx->kdf_ukmlen = 0; + return 1; + + case EVP_PKEY_CTRL_GET_EC_KDF_UKM: + *(unsigned char **)p2 = dctx->kdf_ukm; + return dctx->kdf_ukmlen; + case EVP_PKEY_CTRL_MD: if (EVP_MD_type((const EVP_MD *)p2) != NID_sha1 && EVP_MD_type((const EVP_MD *)p2) != NID_ecdsa_with_SHA1 && @@ -226,6 +391,10 @@ static int pkey_ec_ctrl(EVP_PKEY_CTX *ctx, int type, int p1, void *p2) dctx->md = p2; return 1; + case EVP_PKEY_CTRL_GET_MD: + *(const EVP_MD **)p2 = dctx->md; + return 1; + case EVP_PKEY_CTRL_PEER_KEY: /* Default behaviour is OK */ case EVP_PKEY_CTRL_DIGESTINIT: @@ -244,7 +413,9 @@ static int pkey_ec_ctrl_str(EVP_PKEY_CTX *ctx, { if (!strcmp(type, "ec_paramgen_curve")) { int nid; - nid = OBJ_sn2nid(value); + nid = EC_curve_nist2nid(value); + if (nid == NID_undef) + nid = OBJ_sn2nid(value); if (nid == NID_undef) nid = OBJ_ln2nid(value); if (nid == NID_undef) { @@ -252,7 +423,28 @@ static int pkey_ec_ctrl_str(EVP_PKEY_CTX *ctx, return 0; } return EVP_PKEY_CTX_set_ec_paramgen_curve_nid(ctx, nid); + } else if (!strcmp(type, "ec_param_enc")) { + int param_enc; + if (!strcmp(value, "explicit")) + param_enc = 0; + else if (!strcmp(value, "named_curve")) + param_enc = OPENSSL_EC_NAMED_CURVE; + else + return -2; + return EVP_PKEY_CTX_set_ec_param_enc(ctx, param_enc); + } else if (!strcmp(type, "ecdh_kdf_md")) { + const EVP_MD *md; + if (!(md = EVP_get_digestbyname(value))) { + ECerr(EC_F_PKEY_EC_CTRL_STR, EC_R_INVALID_DIGEST); + return 0; + } + return EVP_PKEY_CTX_set_ecdh_kdf_md(ctx, md); + } else if (!strcmp(type, "ecdh_cofactor_mode")) { + int co_mode; + co_mode = atoi(value); + return EVP_PKEY_CTX_set_ecdh_cofactor_mode(ctx, co_mode); } + return -2; } @@ -279,7 +471,8 @@ static int pkey_ec_paramgen(EVP_PKEY_CTX *ctx, EVP_PKEY *pkey) static int pkey_ec_keygen(EVP_PKEY_CTX *ctx, EVP_PKEY *pkey) { EC_KEY *ec = NULL; - if (ctx->pkey == NULL) { + EC_PKEY_CTX *dctx = ctx->data; + if (ctx->pkey == NULL && dctx->gen_group == NULL) { ECerr(EC_F_PKEY_EC_KEYGEN, EC_R_NO_PARAMETERS_SET); return 0; } @@ -287,9 +480,14 @@ static int pkey_ec_keygen(EVP_PKEY_CTX *ctx, EVP_PKEY *pkey) if (!ec) return 0; EVP_PKEY_assign_EC_KEY(pkey, ec); - /* Note: if error return, pkey is freed by parent routine */ - if (!EVP_PKEY_copy_parameters(pkey, ctx->pkey)) - return 0; + if (ctx->pkey) { + /* Note: if error return, pkey is freed by parent routine */ + if (!EVP_PKEY_copy_parameters(pkey, ctx->pkey)) + return 0; + } else { + if (!EC_KEY_set_group(ec, dctx->gen_group)) + return 0; + } return EC_KEY_generate_key(pkey->pkey.ec); } @@ -322,7 +520,7 @@ const EVP_PKEY_METHOD ec_pkey_meth = { 0, #ifndef OPENSSL_NO_ECDH - pkey_ec_derive, + pkey_ec_kdf_derive, #else 0, #endif diff --git a/deps/openssl/openssl/crypto/ec/eck_prn.c b/deps/openssl/openssl/crypto/ec/eck_prn.c index a911a0ac40805d..515b262387df0f 100644 --- a/deps/openssl/openssl/crypto/ec/eck_prn.c +++ b/deps/openssl/openssl/crypto/ec/eck_prn.c @@ -171,6 +171,7 @@ int ECPKParameters_print(BIO *bp, const EC_GROUP *x, int off) if (EC_GROUP_get_asn1_flag(x)) { /* the curve parameter are given by an asn1 OID */ int nid; + const char *nname; if (!BIO_indent(bp, off, 128)) goto err; @@ -183,6 +184,13 @@ int ECPKParameters_print(BIO *bp, const EC_GROUP *x, int off) goto err; if (BIO_printf(bp, "\n") <= 0) goto err; + nname = EC_curve_nid2nist(nid); + if (nname) { + if (!BIO_indent(bp, off, 128)) + goto err; + if (BIO_printf(bp, "NIST CURVE: %s\n", nname) <= 0) + goto err; + } } else { /* explicit parameters */ int is_char_two = 0; diff --git a/deps/openssl/openssl/crypto/ec/ecp_nistp521.c b/deps/openssl/openssl/crypto/ec/ecp_nistp521.c index cc7634512d442b..360b9a3516f611 100644 --- a/deps/openssl/openssl/crypto/ec/ecp_nistp521.c +++ b/deps/openssl/openssl/crypto/ec/ecp_nistp521.c @@ -1282,11 +1282,11 @@ static void point_add(felem x3, felem y3, felem z3, felem_scalar128(tmp2, 2); /* tmp2[i] < 17*2^121 */ felem_diff128(tmp, tmp2); - /*- - * tmp[i] < 2^127 - 2^69 + 17*2^122 - * = 2^126 - 2^122 - 2^6 - 2^2 - 1 - * < 2^127 - */ + /*- + * tmp[i] < 2^127 - 2^69 + 17*2^122 + * = 2^126 - 2^122 - 2^6 - 2^2 - 1 + * < 2^127 + */ felem_reduce(y_out, tmp); copy_conditional(x_out, x2, z1_is_zero); diff --git a/deps/openssl/openssl/crypto/ec/ecp_nistz256.c b/deps/openssl/openssl/crypto/ec/ecp_nistz256.c new file mode 100644 index 00000000000000..2cd6599d8557e1 --- /dev/null +++ b/deps/openssl/openssl/crypto/ec/ecp_nistz256.c @@ -0,0 +1,1486 @@ +/****************************************************************************** + * * + * Copyright 2014 Intel Corporation * + * * + * Licensed under the Apache License, Version 2.0 (the "License"); * + * you may not use this file except in compliance with the License. * + * You may obtain a copy of the License at * + * * + * http://www.apache.org/licenses/LICENSE-2.0 * + * * + * Unless required by applicable law or agreed to in writing, software * + * distributed under the License is distributed on an "AS IS" BASIS, * + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * + * See the License for the specific language governing permissions and * + * limitations under the License. * + * * + ****************************************************************************** + * * + * Developers and authors: * + * Shay Gueron (1, 2), and Vlad Krasnov (1) * + * (1) Intel Corporation, Israel Development Center * + * (2) University of Haifa * + * Reference: * + * S.Gueron and V.Krasnov, "Fast Prime Field Elliptic Curve Cryptography with * + * 256 Bit Primes" * + * * + ******************************************************************************/ + +#include + +#include +#include +#include +#include "cryptlib.h" + +#include "ec_lcl.h" + +#if BN_BITS2 != 64 +# define TOBN(hi,lo) lo,hi +#else +# define TOBN(hi,lo) ((BN_ULONG)hi<<32|lo) +#endif + +#if defined(__GNUC__) +# define ALIGN32 __attribute((aligned(32))) +#elif defined(_MSC_VER) +# define ALIGN32 __declspec(align(32)) +#else +# define ALIGN32 +#endif + +#define ALIGNPTR(p,N) ((unsigned char *)p+N-(size_t)p%N) +#define P256_LIMBS (256/BN_BITS2) + +typedef unsigned short u16; + +typedef struct { + BN_ULONG X[P256_LIMBS]; + BN_ULONG Y[P256_LIMBS]; + BN_ULONG Z[P256_LIMBS]; +} P256_POINT; + +typedef struct { + BN_ULONG X[P256_LIMBS]; + BN_ULONG Y[P256_LIMBS]; +} P256_POINT_AFFINE; + +typedef P256_POINT_AFFINE PRECOMP256_ROW[64]; + +/* structure for precomputed multiples of the generator */ +typedef struct ec_pre_comp_st { + const EC_GROUP *group; /* Parent EC_GROUP object */ + size_t w; /* Window size */ + /* + * Constant time access to the X and Y coordinates of the pre-computed, + * generator multiplies, in the Montgomery domain. Pre-calculated + * multiplies are stored in affine form. + */ + PRECOMP256_ROW *precomp; + void *precomp_storage; + int references; +} EC_PRE_COMP; + +/* Functions implemented in assembly */ +/* Modular mul by 2: res = 2*a mod P */ +void ecp_nistz256_mul_by_2(BN_ULONG res[P256_LIMBS], + const BN_ULONG a[P256_LIMBS]); +/* Modular div by 2: res = a/2 mod P */ +void ecp_nistz256_div_by_2(BN_ULONG res[P256_LIMBS], + const BN_ULONG a[P256_LIMBS]); +/* Modular mul by 3: res = 3*a mod P */ +void ecp_nistz256_mul_by_3(BN_ULONG res[P256_LIMBS], + const BN_ULONG a[P256_LIMBS]); +/* Modular add: res = a+b mod P */ +void ecp_nistz256_add(BN_ULONG res[P256_LIMBS], + const BN_ULONG a[P256_LIMBS], + const BN_ULONG b[P256_LIMBS]); +/* Modular sub: res = a-b mod P */ +void ecp_nistz256_sub(BN_ULONG res[P256_LIMBS], + const BN_ULONG a[P256_LIMBS], + const BN_ULONG b[P256_LIMBS]); +/* Modular neg: res = -a mod P */ +void ecp_nistz256_neg(BN_ULONG res[P256_LIMBS], const BN_ULONG a[P256_LIMBS]); +/* Montgomery mul: res = a*b*2^-256 mod P */ +void ecp_nistz256_mul_mont(BN_ULONG res[P256_LIMBS], + const BN_ULONG a[P256_LIMBS], + const BN_ULONG b[P256_LIMBS]); +/* Montgomery sqr: res = a*a*2^-256 mod P */ +void ecp_nistz256_sqr_mont(BN_ULONG res[P256_LIMBS], + const BN_ULONG a[P256_LIMBS]); +/* Convert a number from Montgomery domain, by multiplying with 1 */ +void ecp_nistz256_from_mont(BN_ULONG res[P256_LIMBS], + const BN_ULONG in[P256_LIMBS]); +/* Convert a number to Montgomery domain, by multiplying with 2^512 mod P*/ +void ecp_nistz256_to_mont(BN_ULONG res[P256_LIMBS], + const BN_ULONG in[P256_LIMBS]); +/* Functions that perform constant time access to the precomputed tables */ +void ecp_nistz256_select_w5(P256_POINT * val, + const P256_POINT * in_t, int index); +void ecp_nistz256_select_w7(P256_POINT_AFFINE * val, + const P256_POINT_AFFINE * in_t, int index); + +/* One converted into the Montgomery domain */ +static const BN_ULONG ONE[P256_LIMBS] = { + TOBN(0x00000000, 0x00000001), TOBN(0xffffffff, 0x00000000), + TOBN(0xffffffff, 0xffffffff), TOBN(0x00000000, 0xfffffffe) +}; + +static void *ecp_nistz256_pre_comp_dup(void *); +static void ecp_nistz256_pre_comp_free(void *); +static void ecp_nistz256_pre_comp_clear_free(void *); +static EC_PRE_COMP *ecp_nistz256_pre_comp_new(const EC_GROUP *group); + +/* Precomputed tables for the default generator */ +#include "ecp_nistz256_table.c" + +/* Recode window to a signed digit, see ecp_nistputil.c for details */ +static unsigned int _booth_recode_w5(unsigned int in) +{ + unsigned int s, d; + + s = ~((in >> 5) - 1); + d = (1 << 6) - in - 1; + d = (d & s) | (in & ~s); + d = (d >> 1) + (d & 1); + + return (d << 1) + (s & 1); +} + +static unsigned int _booth_recode_w7(unsigned int in) +{ + unsigned int s, d; + + s = ~((in >> 7) - 1); + d = (1 << 8) - in - 1; + d = (d & s) | (in & ~s); + d = (d >> 1) + (d & 1); + + return (d << 1) + (s & 1); +} + +static void copy_conditional(BN_ULONG dst[P256_LIMBS], + const BN_ULONG src[P256_LIMBS], BN_ULONG move) +{ + BN_ULONG mask1 = -move; + BN_ULONG mask2 = ~mask1; + + dst[0] = (src[0] & mask1) ^ (dst[0] & mask2); + dst[1] = (src[1] & mask1) ^ (dst[1] & mask2); + dst[2] = (src[2] & mask1) ^ (dst[2] & mask2); + dst[3] = (src[3] & mask1) ^ (dst[3] & mask2); + if (P256_LIMBS == 8) { + dst[4] = (src[4] & mask1) ^ (dst[4] & mask2); + dst[5] = (src[5] & mask1) ^ (dst[5] & mask2); + dst[6] = (src[6] & mask1) ^ (dst[6] & mask2); + dst[7] = (src[7] & mask1) ^ (dst[7] & mask2); + } +} + +static BN_ULONG is_zero(BN_ULONG in) +{ + in |= (0 - in); + in = ~in; + in &= BN_MASK2; + in >>= BN_BITS2 - 1; + return in; +} + +static BN_ULONG is_equal(const BN_ULONG a[P256_LIMBS], + const BN_ULONG b[P256_LIMBS]) +{ + BN_ULONG res; + + res = a[0] ^ b[0]; + res |= a[1] ^ b[1]; + res |= a[2] ^ b[2]; + res |= a[3] ^ b[3]; + if (P256_LIMBS == 8) { + res |= a[4] ^ b[4]; + res |= a[5] ^ b[5]; + res |= a[6] ^ b[6]; + res |= a[7] ^ b[7]; + } + + return is_zero(res); +} + +static BN_ULONG is_one(const BN_ULONG a[P256_LIMBS]) +{ + BN_ULONG res; + + res = a[0] ^ ONE[0]; + res |= a[1] ^ ONE[1]; + res |= a[2] ^ ONE[2]; + res |= a[3] ^ ONE[3]; + if (P256_LIMBS == 8) { + res |= a[4] ^ ONE[4]; + res |= a[5] ^ ONE[5]; + res |= a[6] ^ ONE[6]; + } + + return is_zero(res); +} + +#ifndef ECP_NISTZ256_REFERENCE_IMPLEMENTATION +void ecp_nistz256_point_double(P256_POINT *r, const P256_POINT *a); +void ecp_nistz256_point_add(P256_POINT *r, + const P256_POINT *a, const P256_POINT *b); +void ecp_nistz256_point_add_affine(P256_POINT *r, + const P256_POINT *a, + const P256_POINT_AFFINE *b); +#else +/* Point double: r = 2*a */ +static void ecp_nistz256_point_double(P256_POINT *r, const P256_POINT *a) +{ + BN_ULONG S[P256_LIMBS]; + BN_ULONG M[P256_LIMBS]; + BN_ULONG Zsqr[P256_LIMBS]; + BN_ULONG tmp0[P256_LIMBS]; + + const BN_ULONG *in_x = a->X; + const BN_ULONG *in_y = a->Y; + const BN_ULONG *in_z = a->Z; + + BN_ULONG *res_x = r->X; + BN_ULONG *res_y = r->Y; + BN_ULONG *res_z = r->Z; + + ecp_nistz256_mul_by_2(S, in_y); + + ecp_nistz256_sqr_mont(Zsqr, in_z); + + ecp_nistz256_sqr_mont(S, S); + + ecp_nistz256_mul_mont(res_z, in_z, in_y); + ecp_nistz256_mul_by_2(res_z, res_z); + + ecp_nistz256_add(M, in_x, Zsqr); + ecp_nistz256_sub(Zsqr, in_x, Zsqr); + + ecp_nistz256_sqr_mont(res_y, S); + ecp_nistz256_div_by_2(res_y, res_y); + + ecp_nistz256_mul_mont(M, M, Zsqr); + ecp_nistz256_mul_by_3(M, M); + + ecp_nistz256_mul_mont(S, S, in_x); + ecp_nistz256_mul_by_2(tmp0, S); + + ecp_nistz256_sqr_mont(res_x, M); + + ecp_nistz256_sub(res_x, res_x, tmp0); + ecp_nistz256_sub(S, S, res_x); + + ecp_nistz256_mul_mont(S, S, M); + ecp_nistz256_sub(res_y, S, res_y); +} + +/* Point addition: r = a+b */ +static void ecp_nistz256_point_add(P256_POINT *r, + const P256_POINT *a, const P256_POINT *b) +{ + BN_ULONG U2[P256_LIMBS], S2[P256_LIMBS]; + BN_ULONG U1[P256_LIMBS], S1[P256_LIMBS]; + BN_ULONG Z1sqr[P256_LIMBS]; + BN_ULONG Z2sqr[P256_LIMBS]; + BN_ULONG H[P256_LIMBS], R[P256_LIMBS]; + BN_ULONG Hsqr[P256_LIMBS]; + BN_ULONG Rsqr[P256_LIMBS]; + BN_ULONG Hcub[P256_LIMBS]; + + BN_ULONG res_x[P256_LIMBS]; + BN_ULONG res_y[P256_LIMBS]; + BN_ULONG res_z[P256_LIMBS]; + + BN_ULONG in1infty, in2infty; + + const BN_ULONG *in1_x = a->X; + const BN_ULONG *in1_y = a->Y; + const BN_ULONG *in1_z = a->Z; + + const BN_ULONG *in2_x = b->X; + const BN_ULONG *in2_y = b->Y; + const BN_ULONG *in2_z = b->Z; + + /* We encode infinity as (0,0), which is not on the curve, + * so it is OK. */ + in1infty = (in1_x[0] | in1_x[1] | in1_x[2] | in1_x[3] | + in1_y[0] | in1_y[1] | in1_y[2] | in1_y[3]); + if (P256_LIMBS == 8) + in1infty |= (in1_x[4] | in1_x[5] | in1_x[6] | in1_x[7] | + in1_y[4] | in1_y[5] | in1_y[6] | in1_y[7]); + + in2infty = (in2_x[0] | in2_x[1] | in2_x[2] | in2_x[3] | + in2_y[0] | in2_y[1] | in2_y[2] | in2_y[3]); + if (P256_LIMBS == 8) + in2infty |= (in2_x[4] | in2_x[5] | in2_x[6] | in2_x[7] | + in2_y[4] | in2_y[5] | in2_y[6] | in2_y[7]); + + in1infty = is_zero(in1infty); + in2infty = is_zero(in2infty); + + ecp_nistz256_sqr_mont(Z2sqr, in2_z); /* Z2^2 */ + ecp_nistz256_sqr_mont(Z1sqr, in1_z); /* Z1^2 */ + + ecp_nistz256_mul_mont(S1, Z2sqr, in2_z); /* S1 = Z2^3 */ + ecp_nistz256_mul_mont(S2, Z1sqr, in1_z); /* S2 = Z1^3 */ + + ecp_nistz256_mul_mont(S1, S1, in1_y); /* S1 = Y1*Z2^3 */ + ecp_nistz256_mul_mont(S2, S2, in2_y); /* S2 = Y2*Z1^3 */ + ecp_nistz256_sub(R, S2, S1); /* R = S2 - S1 */ + + ecp_nistz256_mul_mont(U1, in1_x, Z2sqr); /* U1 = X1*Z2^2 */ + ecp_nistz256_mul_mont(U2, in2_x, Z1sqr); /* U2 = X2*Z1^2 */ + ecp_nistz256_sub(H, U2, U1); /* H = U2 - U1 */ + + /* + * This should not happen during sign/ecdh, so no constant time violation + */ + if (is_equal(U1, U2) && !in1infty && !in2infty) { + if (is_equal(S1, S2)) { + ecp_nistz256_point_double(r, a); + return; + } else { + memset(r, 0, sizeof(*r)); + return; + } + } + + ecp_nistz256_sqr_mont(Rsqr, R); /* R^2 */ + ecp_nistz256_mul_mont(res_z, H, in1_z); /* Z3 = H*Z1*Z2 */ + ecp_nistz256_sqr_mont(Hsqr, H); /* H^2 */ + ecp_nistz256_mul_mont(res_z, res_z, in2_z); /* Z3 = H*Z1*Z2 */ + ecp_nistz256_mul_mont(Hcub, Hsqr, H); /* H^3 */ + + ecp_nistz256_mul_mont(U2, U1, Hsqr); /* U1*H^2 */ + ecp_nistz256_mul_by_2(Hsqr, U2); /* 2*U1*H^2 */ + + ecp_nistz256_sub(res_x, Rsqr, Hsqr); + ecp_nistz256_sub(res_x, res_x, Hcub); + + ecp_nistz256_sub(res_y, U2, res_x); + + ecp_nistz256_mul_mont(S2, S1, Hcub); + ecp_nistz256_mul_mont(res_y, R, res_y); + ecp_nistz256_sub(res_y, res_y, S2); + + copy_conditional(res_x, in2_x, in1infty); + copy_conditional(res_y, in2_y, in1infty); + copy_conditional(res_z, in2_z, in1infty); + + copy_conditional(res_x, in1_x, in2infty); + copy_conditional(res_y, in1_y, in2infty); + copy_conditional(res_z, in1_z, in2infty); + + memcpy(r->X, res_x, sizeof(res_x)); + memcpy(r->Y, res_y, sizeof(res_y)); + memcpy(r->Z, res_z, sizeof(res_z)); +} + +/* Point addition when b is known to be affine: r = a+b */ +static void ecp_nistz256_point_add_affine(P256_POINT *r, + const P256_POINT *a, + const P256_POINT_AFFINE *b) +{ + BN_ULONG U2[P256_LIMBS], S2[P256_LIMBS]; + BN_ULONG Z1sqr[P256_LIMBS]; + BN_ULONG H[P256_LIMBS], R[P256_LIMBS]; + BN_ULONG Hsqr[P256_LIMBS]; + BN_ULONG Rsqr[P256_LIMBS]; + BN_ULONG Hcub[P256_LIMBS]; + + BN_ULONG res_x[P256_LIMBS]; + BN_ULONG res_y[P256_LIMBS]; + BN_ULONG res_z[P256_LIMBS]; + + BN_ULONG in1infty, in2infty; + + const BN_ULONG *in1_x = a->X; + const BN_ULONG *in1_y = a->Y; + const BN_ULONG *in1_z = a->Z; + + const BN_ULONG *in2_x = b->X; + const BN_ULONG *in2_y = b->Y; + + /* + * In affine representation we encode infty as (0,0), which is not on the + * curve, so it is OK + */ + in1infty = (in1_x[0] | in1_x[1] | in1_x[2] | in1_x[3] | + in1_y[0] | in1_y[1] | in1_y[2] | in1_y[3]); + if (P256_LIMBS == 8) + in1infty |= (in1_x[4] | in1_x[5] | in1_x[6] | in1_x[7] | + in1_y[4] | in1_y[5] | in1_y[6] | in1_y[7]); + + in2infty = (in2_x[0] | in2_x[1] | in2_x[2] | in2_x[3] | + in2_y[0] | in2_y[1] | in2_y[2] | in2_y[3]); + if (P256_LIMBS == 8) + in2infty |= (in2_x[4] | in2_x[5] | in2_x[6] | in2_x[7] | + in2_y[4] | in2_y[5] | in2_y[6] | in2_y[7]); + + in1infty = is_zero(in1infty); + in2infty = is_zero(in2infty); + + ecp_nistz256_sqr_mont(Z1sqr, in1_z); /* Z1^2 */ + + ecp_nistz256_mul_mont(U2, in2_x, Z1sqr); /* U2 = X2*Z1^2 */ + ecp_nistz256_sub(H, U2, in1_x); /* H = U2 - U1 */ + + ecp_nistz256_mul_mont(S2, Z1sqr, in1_z); /* S2 = Z1^3 */ + + ecp_nistz256_mul_mont(res_z, H, in1_z); /* Z3 = H*Z1*Z2 */ + + ecp_nistz256_mul_mont(S2, S2, in2_y); /* S2 = Y2*Z1^3 */ + ecp_nistz256_sub(R, S2, in1_y); /* R = S2 - S1 */ + + ecp_nistz256_sqr_mont(Hsqr, H); /* H^2 */ + ecp_nistz256_sqr_mont(Rsqr, R); /* R^2 */ + ecp_nistz256_mul_mont(Hcub, Hsqr, H); /* H^3 */ + + ecp_nistz256_mul_mont(U2, in1_x, Hsqr); /* U1*H^2 */ + ecp_nistz256_mul_by_2(Hsqr, U2); /* 2*U1*H^2 */ + + ecp_nistz256_sub(res_x, Rsqr, Hsqr); + ecp_nistz256_sub(res_x, res_x, Hcub); + ecp_nistz256_sub(H, U2, res_x); + + ecp_nistz256_mul_mont(S2, in1_y, Hcub); + ecp_nistz256_mul_mont(H, H, R); + ecp_nistz256_sub(res_y, H, S2); + + copy_conditional(res_x, in2_x, in1infty); + copy_conditional(res_x, in1_x, in2infty); + + copy_conditional(res_y, in2_y, in1infty); + copy_conditional(res_y, in1_y, in2infty); + + copy_conditional(res_z, ONE, in1infty); + copy_conditional(res_z, in1_z, in2infty); + + memcpy(r->X, res_x, sizeof(res_x)); + memcpy(r->Y, res_y, sizeof(res_y)); + memcpy(r->Z, res_z, sizeof(res_z)); +} +#endif + +/* r = in^-1 mod p */ +static void ecp_nistz256_mod_inverse(BN_ULONG r[P256_LIMBS], + const BN_ULONG in[P256_LIMBS]) +{ + /* + * The poly is ffffffff 00000001 00000000 00000000 00000000 ffffffff + * ffffffff ffffffff We use FLT and used poly-2 as exponent + */ + BN_ULONG p2[P256_LIMBS]; + BN_ULONG p4[P256_LIMBS]; + BN_ULONG p8[P256_LIMBS]; + BN_ULONG p16[P256_LIMBS]; + BN_ULONG p32[P256_LIMBS]; + BN_ULONG res[P256_LIMBS]; + int i; + + ecp_nistz256_sqr_mont(res, in); + ecp_nistz256_mul_mont(p2, res, in); /* 3*p */ + + ecp_nistz256_sqr_mont(res, p2); + ecp_nistz256_sqr_mont(res, res); + ecp_nistz256_mul_mont(p4, res, p2); /* f*p */ + + ecp_nistz256_sqr_mont(res, p4); + ecp_nistz256_sqr_mont(res, res); + ecp_nistz256_sqr_mont(res, res); + ecp_nistz256_sqr_mont(res, res); + ecp_nistz256_mul_mont(p8, res, p4); /* ff*p */ + + ecp_nistz256_sqr_mont(res, p8); + for (i = 0; i < 7; i++) + ecp_nistz256_sqr_mont(res, res); + ecp_nistz256_mul_mont(p16, res, p8); /* ffff*p */ + + ecp_nistz256_sqr_mont(res, p16); + for (i = 0; i < 15; i++) + ecp_nistz256_sqr_mont(res, res); + ecp_nistz256_mul_mont(p32, res, p16); /* ffffffff*p */ + + ecp_nistz256_sqr_mont(res, p32); + for (i = 0; i < 31; i++) + ecp_nistz256_sqr_mont(res, res); + ecp_nistz256_mul_mont(res, res, in); + + for (i = 0; i < 32 * 4; i++) + ecp_nistz256_sqr_mont(res, res); + ecp_nistz256_mul_mont(res, res, p32); + + for (i = 0; i < 32; i++) + ecp_nistz256_sqr_mont(res, res); + ecp_nistz256_mul_mont(res, res, p32); + + for (i = 0; i < 16; i++) + ecp_nistz256_sqr_mont(res, res); + ecp_nistz256_mul_mont(res, res, p16); + + for (i = 0; i < 8; i++) + ecp_nistz256_sqr_mont(res, res); + ecp_nistz256_mul_mont(res, res, p8); + + ecp_nistz256_sqr_mont(res, res); + ecp_nistz256_sqr_mont(res, res); + ecp_nistz256_sqr_mont(res, res); + ecp_nistz256_sqr_mont(res, res); + ecp_nistz256_mul_mont(res, res, p4); + + ecp_nistz256_sqr_mont(res, res); + ecp_nistz256_sqr_mont(res, res); + ecp_nistz256_mul_mont(res, res, p2); + + ecp_nistz256_sqr_mont(res, res); + ecp_nistz256_sqr_mont(res, res); + ecp_nistz256_mul_mont(res, res, in); + + memcpy(r, res, sizeof(res)); +} + +/* + * ecp_nistz256_bignum_to_field_elem copies the contents of |in| to |out| and + * returns one if it fits. Otherwise it returns zero. + */ +static int ecp_nistz256_bignum_to_field_elem(BN_ULONG out[P256_LIMBS], + const BIGNUM *in) +{ + if (in->top > P256_LIMBS) + return 0; + + memset(out, 0, sizeof(BN_ULONG) * P256_LIMBS); + memcpy(out, in->d, sizeof(BN_ULONG) * in->top); + return 1; +} + +/* r = sum(scalar[i]*point[i]) */ +static void ecp_nistz256_windowed_mul(const EC_GROUP *group, + P256_POINT *r, + const BIGNUM **scalar, + const EC_POINT **point, + int num, BN_CTX *ctx) +{ + int i, j; + unsigned int index; + unsigned char (*p_str)[33] = NULL; + const unsigned int window_size = 5; + const unsigned int mask = (1 << (window_size + 1)) - 1; + unsigned int wvalue; + BN_ULONG tmp[P256_LIMBS]; + ALIGN32 P256_POINT h; + const BIGNUM **scalars = NULL; + P256_POINT (*table)[16] = NULL; + void *table_storage = NULL; + + if ((table_storage = + OPENSSL_malloc(num * 16 * sizeof(P256_POINT) + 64)) == NULL + || (p_str = + OPENSSL_malloc(num * 33 * sizeof(unsigned char))) == NULL + || (scalars = OPENSSL_malloc(num * sizeof(BIGNUM *))) == NULL) { + ECerr(EC_F_ECP_NISTZ256_WINDOWED_MUL, ERR_R_MALLOC_FAILURE); + goto err; + } else { + table = (void *)ALIGNPTR(table_storage, 64); + } + + for (i = 0; i < num; i++) { + P256_POINT *row = table[i]; + + if ((BN_num_bits(scalar[i]) > 256) || BN_is_negative(scalar[i])) { + BIGNUM *mod; + + if ((mod = BN_CTX_get(ctx)) == NULL) + goto err; + if (!BN_nnmod(mod, scalar[i], &group->order, ctx)) { + ECerr(EC_F_ECP_NISTZ256_WINDOWED_MUL, ERR_R_BN_LIB); + goto err; + } + scalars[i] = mod; + } else + scalars[i] = scalar[i]; + + for (j = 0; j < scalars[i]->top * BN_BYTES; j += BN_BYTES) { + BN_ULONG d = scalars[i]->d[j / BN_BYTES]; + + p_str[i][j + 0] = d & 0xff; + p_str[i][j + 1] = (d >> 8) & 0xff; + p_str[i][j + 2] = (d >> 16) & 0xff; + p_str[i][j + 3] = (d >>= 24) & 0xff; + if (BN_BYTES == 8) { + d >>= 8; + p_str[i][j + 4] = d & 0xff; + p_str[i][j + 5] = (d >> 8) & 0xff; + p_str[i][j + 6] = (d >> 16) & 0xff; + p_str[i][j + 7] = (d >> 24) & 0xff; + } + } + for (; j < 33; j++) + p_str[i][j] = 0; + + /* table[0] is implicitly (0,0,0) (the point at infinity), + * therefore it is not stored. All other values are actually + * stored with an offset of -1 in table. + */ + + if (!ecp_nistz256_bignum_to_field_elem(row[1 - 1].X, &point[i]->X) + || !ecp_nistz256_bignum_to_field_elem(row[1 - 1].Y, &point[i]->Y) + || !ecp_nistz256_bignum_to_field_elem(row[1 - 1].Z, &point[i]->Z)) { + ECerr(EC_F_ECP_NISTZ256_WINDOWED_MUL, EC_R_COORDINATES_OUT_OF_RANGE); + goto err; + } + + ecp_nistz256_point_double(&row[ 2 - 1], &row[ 1 - 1]); + ecp_nistz256_point_add (&row[ 3 - 1], &row[ 2 - 1], &row[1 - 1]); + ecp_nistz256_point_double(&row[ 4 - 1], &row[ 2 - 1]); + ecp_nistz256_point_double(&row[ 6 - 1], &row[ 3 - 1]); + ecp_nistz256_point_double(&row[ 8 - 1], &row[ 4 - 1]); + ecp_nistz256_point_double(&row[12 - 1], &row[ 6 - 1]); + ecp_nistz256_point_add (&row[ 5 - 1], &row[ 4 - 1], &row[1 - 1]); + ecp_nistz256_point_add (&row[ 7 - 1], &row[ 6 - 1], &row[1 - 1]); + ecp_nistz256_point_add (&row[ 9 - 1], &row[ 8 - 1], &row[1 - 1]); + ecp_nistz256_point_add (&row[13 - 1], &row[12 - 1], &row[1 - 1]); + ecp_nistz256_point_double(&row[14 - 1], &row[ 7 - 1]); + ecp_nistz256_point_double(&row[10 - 1], &row[ 5 - 1]); + ecp_nistz256_point_add (&row[15 - 1], &row[14 - 1], &row[1 - 1]); + ecp_nistz256_point_add (&row[11 - 1], &row[10 - 1], &row[1 - 1]); + ecp_nistz256_point_add (&row[16 - 1], &row[15 - 1], &row[1 - 1]); + } + + index = 255; + + wvalue = p_str[0][(index - 1) / 8]; + wvalue = (wvalue >> ((index - 1) % 8)) & mask; + + ecp_nistz256_select_w5(r, table[0], _booth_recode_w5(wvalue) >> 1); + + while (index >= 5) { + for (i = (index == 255 ? 1 : 0); i < num; i++) { + unsigned int off = (index - 1) / 8; + + wvalue = p_str[i][off] | p_str[i][off + 1] << 8; + wvalue = (wvalue >> ((index - 1) % 8)) & mask; + + wvalue = _booth_recode_w5(wvalue); + + ecp_nistz256_select_w5(&h, table[i], wvalue >> 1); + + ecp_nistz256_neg(tmp, h.Y); + copy_conditional(h.Y, tmp, (wvalue & 1)); + + ecp_nistz256_point_add(r, r, &h); + } + + index -= window_size; + + ecp_nistz256_point_double(r, r); + ecp_nistz256_point_double(r, r); + ecp_nistz256_point_double(r, r); + ecp_nistz256_point_double(r, r); + ecp_nistz256_point_double(r, r); + } + + /* Final window */ + for (i = 0; i < num; i++) { + wvalue = p_str[i][0]; + wvalue = (wvalue << 1) & mask; + + wvalue = _booth_recode_w5(wvalue); + + ecp_nistz256_select_w5(&h, table[i], wvalue >> 1); + + ecp_nistz256_neg(tmp, h.Y); + copy_conditional(h.Y, tmp, wvalue & 1); + + ecp_nistz256_point_add(r, r, &h); + } + + err: + if (table_storage) + OPENSSL_free(table_storage); + if (p_str) + OPENSSL_free(p_str); + if (scalars) + OPENSSL_free(scalars); +} + +/* Coordinates of G, for which we have precomputed tables */ +const static BN_ULONG def_xG[P256_LIMBS] = { + TOBN(0x79e730d4, 0x18a9143c), TOBN(0x75ba95fc, 0x5fedb601), + TOBN(0x79fb732b, 0x77622510), TOBN(0x18905f76, 0xa53755c6) +}; + +const static BN_ULONG def_yG[P256_LIMBS] = { + TOBN(0xddf25357, 0xce95560a), TOBN(0x8b4ab8e4, 0xba19e45c), + TOBN(0xd2e88688, 0xdd21f325), TOBN(0x8571ff18, 0x25885d85) +}; + +/* + * ecp_nistz256_is_affine_G returns one if |generator| is the standard, P-256 + * generator. + */ +static int ecp_nistz256_is_affine_G(const EC_POINT *generator) +{ + return (generator->X.top == P256_LIMBS) && + (generator->Y.top == P256_LIMBS) && + (generator->Z.top == (P256_LIMBS - P256_LIMBS / 8)) && + is_equal(generator->X.d, def_xG) && + is_equal(generator->Y.d, def_yG) && is_one(generator->Z.d); +} + +static int ecp_nistz256_mult_precompute(EC_GROUP *group, BN_CTX *ctx) +{ + /* + * We precompute a table for a Booth encoded exponent (wNAF) based + * computation. Each table holds 64 values for safe access, with an + * implicit value of infinity at index zero. We use window of size 7, and + * therefore require ceil(256/7) = 37 tables. + */ + BIGNUM *order; + EC_POINT *P = NULL, *T = NULL; + const EC_POINT *generator; + EC_PRE_COMP *pre_comp; + int i, j, k, ret = 0; + size_t w; + + PRECOMP256_ROW *preComputedTable = NULL; + unsigned char *precomp_storage = NULL; + + /* if there is an old EC_PRE_COMP object, throw it away */ + EC_EX_DATA_free_data(&group->extra_data, ecp_nistz256_pre_comp_dup, + ecp_nistz256_pre_comp_free, + ecp_nistz256_pre_comp_clear_free); + + generator = EC_GROUP_get0_generator(group); + if (generator == NULL) { + ECerr(EC_F_ECP_NISTZ256_MULT_PRECOMPUTE, EC_R_UNDEFINED_GENERATOR); + return 0; + } + + if (ecp_nistz256_is_affine_G(generator)) { + /* + * No need to calculate tables for the standard generator because we + * have them statically. + */ + return 1; + } + + if ((pre_comp = ecp_nistz256_pre_comp_new(group)) == NULL) + return 0; + + if (ctx == NULL) { + ctx = BN_CTX_new(); + if (ctx == NULL) + goto err; + } + + BN_CTX_start(ctx); + order = BN_CTX_get(ctx); + + if (order == NULL) + goto err; + + if (!EC_GROUP_get_order(group, order, ctx)) + goto err; + + if (BN_is_zero(order)) { + ECerr(EC_F_ECP_NISTZ256_MULT_PRECOMPUTE, EC_R_UNKNOWN_ORDER); + goto err; + } + + w = 7; + + if ((precomp_storage = + OPENSSL_malloc(37 * 64 * sizeof(P256_POINT_AFFINE) + 64)) == NULL) { + ECerr(EC_F_ECP_NISTZ256_MULT_PRECOMPUTE, ERR_R_MALLOC_FAILURE); + goto err; + } else { + preComputedTable = (void *)ALIGNPTR(precomp_storage, 64); + } + + P = EC_POINT_new(group); + T = EC_POINT_new(group); + + /* + * The zero entry is implicitly infinity, and we skip it, storing other + * values with -1 offset. + */ + EC_POINT_copy(T, generator); + + for (k = 0; k < 64; k++) { + EC_POINT_copy(P, T); + for (j = 0; j < 37; j++) { + /* + * It would be faster to use + * ec_GFp_simple_points_make_affine and make multiple + * points affine at the same time. + */ + ec_GFp_simple_make_affine(group, P, ctx); + ecp_nistz256_bignum_to_field_elem(preComputedTable[j] + [k].X, &P->X); + ecp_nistz256_bignum_to_field_elem(preComputedTable[j] + [k].Y, &P->Y); + for (i = 0; i < 7; i++) + ec_GFp_simple_dbl(group, P, P, ctx); + } + ec_GFp_simple_add(group, T, T, generator, ctx); + } + + pre_comp->group = group; + pre_comp->w = w; + pre_comp->precomp = preComputedTable; + pre_comp->precomp_storage = precomp_storage; + + precomp_storage = NULL; + + if (!EC_EX_DATA_set_data(&group->extra_data, pre_comp, + ecp_nistz256_pre_comp_dup, + ecp_nistz256_pre_comp_free, + ecp_nistz256_pre_comp_clear_free)) { + goto err; + } + + pre_comp = NULL; + + ret = 1; + + err: + if (ctx != NULL) + BN_CTX_end(ctx); + if (pre_comp) + ecp_nistz256_pre_comp_free(pre_comp); + if (precomp_storage) + OPENSSL_free(precomp_storage); + if (P) + EC_POINT_free(P); + if (T) + EC_POINT_free(T); + return ret; +} + +/* + * Note that by default ECP_NISTZ256_AVX2 is undefined. While it's great + * code processing 4 points in parallel, corresponding serial operation + * is several times slower, because it uses 29x29=58-bit multiplication + * as opposite to 64x64=128-bit in integer-only scalar case. As result + * it doesn't provide *significant* performance improvement. Note that + * just defining ECP_NISTZ256_AVX2 is not sufficient to make it work, + * you'd need to compile even asm/ecp_nistz256-avx.pl module. + */ +#if defined(ECP_NISTZ256_AVX2) +# if !(defined(__x86_64) || defined(__x86_64__)) || \ + defined(_M_AMD64) || defined(_MX64)) || \ + !(defined(__GNUC__) || defined(_MSC_VER)) /* this is for ALIGN32 */ +# undef ECP_NISTZ256_AVX2 +# else +/* Constant time access, loading four values, from four consecutive tables */ +void ecp_nistz256_avx2_select_w7(P256_POINT_AFFINE * val, + const P256_POINT_AFFINE * in_t, int index); +void ecp_nistz256_avx2_multi_select_w7(void *result, const void *in, int index0, + int index1, int index2, int index3); +void ecp_nistz256_avx2_transpose_convert(void *RESULTx4, const void *in); +void ecp_nistz256_avx2_convert_transpose_back(void *result, const void *Ax4); +void ecp_nistz256_avx2_point_add_affine_x4(void *RESULTx4, const void *Ax4, + const void *Bx4); +void ecp_nistz256_avx2_point_add_affines_x4(void *RESULTx4, const void *Ax4, + const void *Bx4); +void ecp_nistz256_avx2_to_mont(void *RESULTx4, const void *Ax4); +void ecp_nistz256_avx2_from_mont(void *RESULTx4, const void *Ax4); +void ecp_nistz256_avx2_set1(void *RESULTx4); +int ecp_nistz_avx2_eligible(void); + +static void booth_recode_w7(unsigned char *sign, + unsigned char *digit, unsigned char in) +{ + unsigned char s, d; + + s = ~((in >> 7) - 1); + d = (1 << 8) - in - 1; + d = (d & s) | (in & ~s); + d = (d >> 1) + (d & 1); + + *sign = s & 1; + *digit = d; +} + +/* + * ecp_nistz256_avx2_mul_g performs multiplication by G, using only the + * precomputed table. It does 4 affine point additions in parallel, + * significantly speeding up point multiplication for a fixed value. + */ +static void ecp_nistz256_avx2_mul_g(P256_POINT *r, + unsigned char p_str[33], + const P256_POINT_AFFINE(*preComputedTable)[64]) +{ + const unsigned int window_size = 7; + const unsigned int mask = (1 << (window_size + 1)) - 1; + unsigned int wvalue; + /* Using 4 windows at a time */ + unsigned char sign0, digit0; + unsigned char sign1, digit1; + unsigned char sign2, digit2; + unsigned char sign3, digit3; + unsigned int index = 0; + BN_ULONG tmp[P256_LIMBS]; + int i; + + ALIGN32 BN_ULONG aX4[4 * 9 * 3] = { 0 }; + ALIGN32 BN_ULONG bX4[4 * 9 * 2] = { 0 }; + ALIGN32 P256_POINT_AFFINE point_arr[P256_LIMBS]; + ALIGN32 P256_POINT res_point_arr[P256_LIMBS]; + + /* Initial four windows */ + wvalue = *((u16 *) & p_str[0]); + wvalue = (wvalue << 1) & mask; + index += window_size; + booth_recode_w7(&sign0, &digit0, wvalue); + wvalue = *((u16 *) & p_str[(index - 1) / 8]); + wvalue = (wvalue >> ((index - 1) % 8)) & mask; + index += window_size; + booth_recode_w7(&sign1, &digit1, wvalue); + wvalue = *((u16 *) & p_str[(index - 1) / 8]); + wvalue = (wvalue >> ((index - 1) % 8)) & mask; + index += window_size; + booth_recode_w7(&sign2, &digit2, wvalue); + wvalue = *((u16 *) & p_str[(index - 1) / 8]); + wvalue = (wvalue >> ((index - 1) % 8)) & mask; + index += window_size; + booth_recode_w7(&sign3, &digit3, wvalue); + + ecp_nistz256_avx2_multi_select_w7(point_arr, preComputedTable[0], + digit0, digit1, digit2, digit3); + + ecp_nistz256_neg(tmp, point_arr[0].Y); + copy_conditional(point_arr[0].Y, tmp, sign0); + ecp_nistz256_neg(tmp, point_arr[1].Y); + copy_conditional(point_arr[1].Y, tmp, sign1); + ecp_nistz256_neg(tmp, point_arr[2].Y); + copy_conditional(point_arr[2].Y, tmp, sign2); + ecp_nistz256_neg(tmp, point_arr[3].Y); + copy_conditional(point_arr[3].Y, tmp, sign3); + + ecp_nistz256_avx2_transpose_convert(aX4, point_arr); + ecp_nistz256_avx2_to_mont(aX4, aX4); + ecp_nistz256_avx2_to_mont(&aX4[4 * 9], &aX4[4 * 9]); + ecp_nistz256_avx2_set1(&aX4[4 * 9 * 2]); + + wvalue = *((u16 *) & p_str[(index - 1) / 8]); + wvalue = (wvalue >> ((index - 1) % 8)) & mask; + index += window_size; + booth_recode_w7(&sign0, &digit0, wvalue); + wvalue = *((u16 *) & p_str[(index - 1) / 8]); + wvalue = (wvalue >> ((index - 1) % 8)) & mask; + index += window_size; + booth_recode_w7(&sign1, &digit1, wvalue); + wvalue = *((u16 *) & p_str[(index - 1) / 8]); + wvalue = (wvalue >> ((index - 1) % 8)) & mask; + index += window_size; + booth_recode_w7(&sign2, &digit2, wvalue); + wvalue = *((u16 *) & p_str[(index - 1) / 8]); + wvalue = (wvalue >> ((index - 1) % 8)) & mask; + index += window_size; + booth_recode_w7(&sign3, &digit3, wvalue); + + ecp_nistz256_avx2_multi_select_w7(point_arr, preComputedTable[4 * 1], + digit0, digit1, digit2, digit3); + + ecp_nistz256_neg(tmp, point_arr[0].Y); + copy_conditional(point_arr[0].Y, tmp, sign0); + ecp_nistz256_neg(tmp, point_arr[1].Y); + copy_conditional(point_arr[1].Y, tmp, sign1); + ecp_nistz256_neg(tmp, point_arr[2].Y); + copy_conditional(point_arr[2].Y, tmp, sign2); + ecp_nistz256_neg(tmp, point_arr[3].Y); + copy_conditional(point_arr[3].Y, tmp, sign3); + + ecp_nistz256_avx2_transpose_convert(bX4, point_arr); + ecp_nistz256_avx2_to_mont(bX4, bX4); + ecp_nistz256_avx2_to_mont(&bX4[4 * 9], &bX4[4 * 9]); + /* Optimized when both inputs are affine */ + ecp_nistz256_avx2_point_add_affines_x4(aX4, aX4, bX4); + + for (i = 2; i < 9; i++) { + wvalue = *((u16 *) & p_str[(index - 1) / 8]); + wvalue = (wvalue >> ((index - 1) % 8)) & mask; + index += window_size; + booth_recode_w7(&sign0, &digit0, wvalue); + wvalue = *((u16 *) & p_str[(index - 1) / 8]); + wvalue = (wvalue >> ((index - 1) % 8)) & mask; + index += window_size; + booth_recode_w7(&sign1, &digit1, wvalue); + wvalue = *((u16 *) & p_str[(index - 1) / 8]); + wvalue = (wvalue >> ((index - 1) % 8)) & mask; + index += window_size; + booth_recode_w7(&sign2, &digit2, wvalue); + wvalue = *((u16 *) & p_str[(index - 1) / 8]); + wvalue = (wvalue >> ((index - 1) % 8)) & mask; + index += window_size; + booth_recode_w7(&sign3, &digit3, wvalue); + + ecp_nistz256_avx2_multi_select_w7(point_arr, + preComputedTable[4 * i], + digit0, digit1, digit2, digit3); + + ecp_nistz256_neg(tmp, point_arr[0].Y); + copy_conditional(point_arr[0].Y, tmp, sign0); + ecp_nistz256_neg(tmp, point_arr[1].Y); + copy_conditional(point_arr[1].Y, tmp, sign1); + ecp_nistz256_neg(tmp, point_arr[2].Y); + copy_conditional(point_arr[2].Y, tmp, sign2); + ecp_nistz256_neg(tmp, point_arr[3].Y); + copy_conditional(point_arr[3].Y, tmp, sign3); + + ecp_nistz256_avx2_transpose_convert(bX4, point_arr); + ecp_nistz256_avx2_to_mont(bX4, bX4); + ecp_nistz256_avx2_to_mont(&bX4[4 * 9], &bX4[4 * 9]); + + ecp_nistz256_avx2_point_add_affine_x4(aX4, aX4, bX4); + } + + ecp_nistz256_avx2_from_mont(&aX4[4 * 9 * 0], &aX4[4 * 9 * 0]); + ecp_nistz256_avx2_from_mont(&aX4[4 * 9 * 1], &aX4[4 * 9 * 1]); + ecp_nistz256_avx2_from_mont(&aX4[4 * 9 * 2], &aX4[4 * 9 * 2]); + + ecp_nistz256_avx2_convert_transpose_back(res_point_arr, aX4); + /* Last window is performed serially */ + wvalue = *((u16 *) & p_str[(index - 1) / 8]); + wvalue = (wvalue >> ((index - 1) % 8)) & mask; + booth_recode_w7(&sign0, &digit0, wvalue); + ecp_nistz256_avx2_select_w7((P256_POINT_AFFINE *) r, + preComputedTable[36], digit0); + ecp_nistz256_neg(tmp, r->Y); + copy_conditional(r->Y, tmp, sign0); + memcpy(r->Z, ONE, sizeof(ONE)); + /* Sum the four windows */ + ecp_nistz256_point_add(r, r, &res_point_arr[0]); + ecp_nistz256_point_add(r, r, &res_point_arr[1]); + ecp_nistz256_point_add(r, r, &res_point_arr[2]); + ecp_nistz256_point_add(r, r, &res_point_arr[3]); +} +# endif +#endif + +static int ecp_nistz256_set_from_affine(EC_POINT *out, const EC_GROUP *group, + const P256_POINT_AFFINE *in, + BN_CTX *ctx) +{ + BIGNUM x, y; + BN_ULONG d_x[P256_LIMBS], d_y[P256_LIMBS]; + int ret = 0; + + memcpy(d_x, in->X, sizeof(d_x)); + x.d = d_x; + x.dmax = x.top = P256_LIMBS; + x.neg = 0; + x.flags = BN_FLG_STATIC_DATA; + + memcpy(d_y, in->Y, sizeof(d_y)); + y.d = d_y; + y.dmax = y.top = P256_LIMBS; + y.neg = 0; + y.flags = BN_FLG_STATIC_DATA; + + ret = EC_POINT_set_affine_coordinates_GFp(group, out, &x, &y, ctx); + + return ret; +} + +/* r = scalar*G + sum(scalars[i]*points[i]) */ +static int ecp_nistz256_points_mul(const EC_GROUP *group, + EC_POINT *r, + const BIGNUM *scalar, + size_t num, + const EC_POINT *points[], + const BIGNUM *scalars[], BN_CTX *ctx) +{ + int i = 0, ret = 0, no_precomp_for_generator = 0, p_is_infinity = 0; + size_t j; + unsigned char p_str[33] = { 0 }; + const PRECOMP256_ROW *preComputedTable = NULL; + const EC_PRE_COMP *pre_comp = NULL; + const EC_POINT *generator = NULL; + unsigned int index = 0; + const unsigned int window_size = 7; + const unsigned int mask = (1 << (window_size + 1)) - 1; + unsigned int wvalue; + ALIGN32 union { + P256_POINT p; + P256_POINT_AFFINE a; + } t, p; + BIGNUM *tmp_scalar; + + if (group->meth != r->meth) { + ECerr(EC_F_ECP_NISTZ256_POINTS_MUL, EC_R_INCOMPATIBLE_OBJECTS); + return 0; + } + if ((scalar == NULL) && (num == 0)) + return EC_POINT_set_to_infinity(group, r); + + for (j = 0; j < num; j++) { + if (group->meth != points[j]->meth) { + ECerr(EC_F_ECP_NISTZ256_POINTS_MUL, EC_R_INCOMPATIBLE_OBJECTS); + return 0; + } + } + + /* Need 256 bits for space for all coordinates. */ + bn_wexpand(&r->X, P256_LIMBS); + bn_wexpand(&r->Y, P256_LIMBS); + bn_wexpand(&r->Z, P256_LIMBS); + r->X.top = P256_LIMBS; + r->Y.top = P256_LIMBS; + r->Z.top = P256_LIMBS; + + if (scalar) { + generator = EC_GROUP_get0_generator(group); + if (generator == NULL) { + ECerr(EC_F_ECP_NISTZ256_POINTS_MUL, EC_R_UNDEFINED_GENERATOR); + goto err; + } + + /* look if we can use precomputed multiples of generator */ + pre_comp = + EC_EX_DATA_get_data(group->extra_data, ecp_nistz256_pre_comp_dup, + ecp_nistz256_pre_comp_free, + ecp_nistz256_pre_comp_clear_free); + + if (pre_comp) { + /* + * If there is a precomputed table for the generator, check that + * it was generated with the same generator. + */ + EC_POINT *pre_comp_generator = EC_POINT_new(group); + if (pre_comp_generator == NULL) + goto err; + + if (!ecp_nistz256_set_from_affine + (pre_comp_generator, group, pre_comp->precomp[0], ctx)) + goto err; + + if (0 == EC_POINT_cmp(group, generator, pre_comp_generator, ctx)) + preComputedTable = (const PRECOMP256_ROW *)pre_comp->precomp; + + EC_POINT_free(pre_comp_generator); + } + + if (preComputedTable == NULL && ecp_nistz256_is_affine_G(generator)) { + /* + * If there is no precomputed data, but the generator + * is the default, a hardcoded table of precomputed + * data is used. This is because applications, such as + * Apache, do not use EC_KEY_precompute_mult. + */ + preComputedTable = (const PRECOMP256_ROW *)ecp_nistz256_precomputed; + } + + if (preComputedTable) { + if ((BN_num_bits(scalar) > 256) + || BN_is_negative(scalar)) { + if ((tmp_scalar = BN_CTX_get(ctx)) == NULL) + goto err; + + if (!BN_nnmod(tmp_scalar, scalar, &group->order, ctx)) { + ECerr(EC_F_ECP_NISTZ256_POINTS_MUL, ERR_R_BN_LIB); + goto err; + } + scalar = tmp_scalar; + } + + for (i = 0; i < scalar->top * BN_BYTES; i += BN_BYTES) { + BN_ULONG d = scalar->d[i / BN_BYTES]; + + p_str[i + 0] = d & 0xff; + p_str[i + 1] = (d >> 8) & 0xff; + p_str[i + 2] = (d >> 16) & 0xff; + p_str[i + 3] = (d >>= 24) & 0xff; + if (BN_BYTES == 8) { + d >>= 8; + p_str[i + 4] = d & 0xff; + p_str[i + 5] = (d >> 8) & 0xff; + p_str[i + 6] = (d >> 16) & 0xff; + p_str[i + 7] = (d >> 24) & 0xff; + } + } + + for (; i < 33; i++) + p_str[i] = 0; + +#if defined(ECP_NISTZ256_AVX2) + if (ecp_nistz_avx2_eligible()) { + ecp_nistz256_avx2_mul_g(&p.p, p_str, preComputedTable); + } else +#endif + { + /* First window */ + wvalue = (p_str[0] << 1) & mask; + index += window_size; + + wvalue = _booth_recode_w7(wvalue); + + ecp_nistz256_select_w7(&p.a, preComputedTable[0], wvalue >> 1); + + ecp_nistz256_neg(p.p.Z, p.p.Y); + copy_conditional(p.p.Y, p.p.Z, wvalue & 1); + + memcpy(p.p.Z, ONE, sizeof(ONE)); + + for (i = 1; i < 37; i++) { + unsigned int off = (index - 1) / 8; + wvalue = p_str[off] | p_str[off + 1] << 8; + wvalue = (wvalue >> ((index - 1) % 8)) & mask; + index += window_size; + + wvalue = _booth_recode_w7(wvalue); + + ecp_nistz256_select_w7(&t.a, + preComputedTable[i], wvalue >> 1); + + ecp_nistz256_neg(t.p.Z, t.a.Y); + copy_conditional(t.a.Y, t.p.Z, wvalue & 1); + + ecp_nistz256_point_add_affine(&p.p, &p.p, &t.a); + } + } + } else { + p_is_infinity = 1; + no_precomp_for_generator = 1; + } + } else + p_is_infinity = 1; + + if (no_precomp_for_generator) { + /* + * Without a precomputed table for the generator, it has to be + * handled like a normal point. + */ + const BIGNUM **new_scalars; + const EC_POINT **new_points; + + new_scalars = OPENSSL_malloc((num + 1) * sizeof(BIGNUM *)); + if (!new_scalars) { + ECerr(EC_F_ECP_NISTZ256_POINTS_MUL, ERR_R_MALLOC_FAILURE); + return 0; + } + + new_points = OPENSSL_malloc((num + 1) * sizeof(EC_POINT *)); + if (!new_points) { + OPENSSL_free(new_scalars); + ECerr(EC_F_ECP_NISTZ256_POINTS_MUL, ERR_R_MALLOC_FAILURE); + return 0; + } + + memcpy(new_scalars, scalars, num * sizeof(BIGNUM *)); + new_scalars[num] = scalar; + memcpy(new_points, points, num * sizeof(EC_POINT *)); + new_points[num] = generator; + + scalars = new_scalars; + points = new_points; + num++; + } + + if (num) { + P256_POINT *out = &t.p; + if (p_is_infinity) + out = &p.p; + + ecp_nistz256_windowed_mul(group, out, scalars, points, num, ctx); + + if (!p_is_infinity) + ecp_nistz256_point_add(&p.p, &p.p, out); + } + + if (no_precomp_for_generator) { + OPENSSL_free(points); + OPENSSL_free(scalars); + } + + memcpy(r->X.d, p.p.X, sizeof(p.p.X)); + memcpy(r->Y.d, p.p.Y, sizeof(p.p.Y)); + memcpy(r->Z.d, p.p.Z, sizeof(p.p.Z)); + bn_correct_top(&r->X); + bn_correct_top(&r->Y); + bn_correct_top(&r->Z); + + ret = 1; + + err: + return ret; +} + +static int ecp_nistz256_get_affine(const EC_GROUP *group, + const EC_POINT *point, + BIGNUM *x, BIGNUM *y, BN_CTX *ctx) +{ + BN_ULONG z_inv2[P256_LIMBS]; + BN_ULONG z_inv3[P256_LIMBS]; + BN_ULONG x_aff[P256_LIMBS]; + BN_ULONG y_aff[P256_LIMBS]; + BN_ULONG point_x[P256_LIMBS], point_y[P256_LIMBS], point_z[P256_LIMBS]; + + if (EC_POINT_is_at_infinity(group, point)) { + ECerr(EC_F_ECP_NISTZ256_GET_AFFINE, EC_R_POINT_AT_INFINITY); + return 0; + } + + if (!ecp_nistz256_bignum_to_field_elem(point_x, &point->X) || + !ecp_nistz256_bignum_to_field_elem(point_y, &point->Y) || + !ecp_nistz256_bignum_to_field_elem(point_z, &point->Z)) { + ECerr(EC_F_ECP_NISTZ256_GET_AFFINE, EC_R_COORDINATES_OUT_OF_RANGE); + return 0; + } + + ecp_nistz256_mod_inverse(z_inv3, point_z); + ecp_nistz256_sqr_mont(z_inv2, z_inv3); + ecp_nistz256_mul_mont(x_aff, z_inv2, point_x); + + if (x != NULL) { + bn_wexpand(x, P256_LIMBS); + x->top = P256_LIMBS; + ecp_nistz256_from_mont(x->d, x_aff); + bn_correct_top(x); + } + + if (y != NULL) { + ecp_nistz256_mul_mont(z_inv3, z_inv3, z_inv2); + ecp_nistz256_mul_mont(y_aff, z_inv3, point_y); + bn_wexpand(y, P256_LIMBS); + y->top = P256_LIMBS; + ecp_nistz256_from_mont(y->d, y_aff); + bn_correct_top(y); + } + + return 1; +} + +static EC_PRE_COMP *ecp_nistz256_pre_comp_new(const EC_GROUP *group) +{ + EC_PRE_COMP *ret = NULL; + + if (!group) + return NULL; + + ret = (EC_PRE_COMP *)OPENSSL_malloc(sizeof(EC_PRE_COMP)); + + if (!ret) { + ECerr(EC_F_ECP_NISTZ256_PRE_COMP_NEW, ERR_R_MALLOC_FAILURE); + return ret; + } + + ret->group = group; + ret->w = 6; /* default */ + ret->precomp = NULL; + ret->precomp_storage = NULL; + ret->references = 1; + return ret; +} + +static void *ecp_nistz256_pre_comp_dup(void *src_) +{ + EC_PRE_COMP *src = src_; + + /* no need to actually copy, these objects never change! */ + CRYPTO_add(&src->references, 1, CRYPTO_LOCK_EC_PRE_COMP); + + return src_; +} + +static void ecp_nistz256_pre_comp_free(void *pre_) +{ + int i; + EC_PRE_COMP *pre = pre_; + + if (!pre) + return; + + i = CRYPTO_add(&pre->references, -1, CRYPTO_LOCK_EC_PRE_COMP); + if (i > 0) + return; + + if (pre->precomp_storage) + OPENSSL_free(pre->precomp_storage); + + OPENSSL_free(pre); +} + +static void ecp_nistz256_pre_comp_clear_free(void *pre_) +{ + int i; + EC_PRE_COMP *pre = pre_; + + if (!pre) + return; + + i = CRYPTO_add(&pre->references, -1, CRYPTO_LOCK_EC_PRE_COMP); + if (i > 0) + return; + + if (pre->precomp_storage) { + OPENSSL_cleanse(pre->precomp, + 32 * sizeof(unsigned char) * (1 << pre->w) * 2 * 37); + OPENSSL_free(pre->precomp_storage); + } + OPENSSL_cleanse(pre, sizeof *pre); + OPENSSL_free(pre); +} + +static int ecp_nistz256_window_have_precompute_mult(const EC_GROUP *group) +{ + /* There is a hard-coded table for the default generator. */ + const EC_POINT *generator = EC_GROUP_get0_generator(group); + if (generator != NULL && ecp_nistz256_is_affine_G(generator)) { + /* There is a hard-coded table for the default generator. */ + return 1; + } + + return EC_EX_DATA_get_data(group->extra_data, ecp_nistz256_pre_comp_dup, + ecp_nistz256_pre_comp_free, + ecp_nistz256_pre_comp_clear_free) != NULL; +} + +const EC_METHOD *EC_GFp_nistz256_method(void) +{ + static const EC_METHOD ret = { + EC_FLAGS_DEFAULT_OCT, + NID_X9_62_prime_field, + ec_GFp_mont_group_init, + ec_GFp_mont_group_finish, + ec_GFp_mont_group_clear_finish, + ec_GFp_mont_group_copy, + ec_GFp_mont_group_set_curve, + ec_GFp_simple_group_get_curve, + ec_GFp_simple_group_get_degree, + ec_GFp_simple_group_check_discriminant, + ec_GFp_simple_point_init, + ec_GFp_simple_point_finish, + ec_GFp_simple_point_clear_finish, + ec_GFp_simple_point_copy, + ec_GFp_simple_point_set_to_infinity, + ec_GFp_simple_set_Jprojective_coordinates_GFp, + ec_GFp_simple_get_Jprojective_coordinates_GFp, + ec_GFp_simple_point_set_affine_coordinates, + ecp_nistz256_get_affine, + 0, 0, 0, + ec_GFp_simple_add, + ec_GFp_simple_dbl, + ec_GFp_simple_invert, + ec_GFp_simple_is_at_infinity, + ec_GFp_simple_is_on_curve, + ec_GFp_simple_cmp, + ec_GFp_simple_make_affine, + ec_GFp_simple_points_make_affine, + ecp_nistz256_points_mul, /* mul */ + ecp_nistz256_mult_precompute, /* precompute_mult */ + ecp_nistz256_window_have_precompute_mult, /* have_precompute_mult */ + ec_GFp_mont_field_mul, + ec_GFp_mont_field_sqr, + 0, /* field_div */ + ec_GFp_mont_field_encode, + ec_GFp_mont_field_decode, + ec_GFp_mont_field_set_to_one + }; + + return &ret; +} diff --git a/deps/openssl/openssl/crypto/ec/ecp_nistz256_table.c b/deps/openssl/openssl/crypto/ec/ecp_nistz256_table.c new file mode 100644 index 00000000000000..216d024e01203c --- /dev/null +++ b/deps/openssl/openssl/crypto/ec/ecp_nistz256_table.c @@ -0,0 +1,9533 @@ +/* + * This is the precomputed constant time access table for the code in + * ecp_montp256.c, for the default generator. The table consists of 37 + * subtables, each subtable contains 64 affine points. The affine points are + * encoded as eight uint64's, four for the x coordinate and four for the y. + * Both values are in little-endian order. There are 37 tables because a + * signed, 6-bit wNAF form of the scalar is used and ceil(256/(6 + 1)) = 37. + * Within each table there are 64 values because the 6-bit wNAF value can + * take 64 values, ignoring the sign bit, which is implemented by performing + * a negation of the affine point when required. We would like to align it + * to 2MB in order to increase the chances of using a large page but that + * appears to lead to invalid ELF files being produced. + */ + +#if defined(__GNUC__) +__attribute((aligned(4096))) +#elif defined(_MSC_VER) +__declspec(align(4096)) +#elif defined(__SUNPRO_C) +# pragma align 4096(ecp_nistz256_precomputed) +#endif +static const BN_ULONG ecp_nistz256_precomputed[37][64 * + sizeof(P256_POINT_AFFINE) / + sizeof(BN_ULONG)] = { + {TOBN(0x79e730d4, 0x18a9143c), TOBN(0x75ba95fc, 0x5fedb601), + TOBN(0x79fb732b, 0x77622510), TOBN(0x18905f76, 0xa53755c6), + TOBN(0xddf25357, 0xce95560a), TOBN(0x8b4ab8e4, 0xba19e45c), + TOBN(0xd2e88688, 0xdd21f325), TOBN(0x8571ff18, 0x25885d85), + TOBN(0x850046d4, 0x10ddd64d), TOBN(0xaa6ae3c1, 0xa433827d), + TOBN(0x73220503, 0x8d1490d9), TOBN(0xf6bb32e4, 0x3dcf3a3b), + TOBN(0x2f3648d3, 0x61bee1a5), TOBN(0x152cd7cb, 0xeb236ff8), + TOBN(0x19a8fb0e, 0x92042dbe), TOBN(0x78c57751, 0x0a5b8a3b), + TOBN(0xffac3f90, 0x4eebc127), TOBN(0xb027f84a, 0x087d81fb), + TOBN(0x66ad77dd, 0x87cbbc98), TOBN(0x26936a3f, 0xb6ff747e), + TOBN(0xb04c5c1f, 0xc983a7eb), TOBN(0x583e47ad, 0x0861fe1a), + TOBN(0x78820831, 0x1a2ee98e), TOBN(0xd5f06a29, 0xe587cc07), + TOBN(0x74b0b50d, 0x46918dcc), TOBN(0x4650a6ed, 0xc623c173), + TOBN(0x0cdaacac, 0xe8100af2), TOBN(0x577362f5, 0x41b0176b), + TOBN(0x2d96f24c, 0xe4cbaba6), TOBN(0x17628471, 0xfad6f447), + TOBN(0x6b6c36de, 0xe5ddd22e), TOBN(0x84b14c39, 0x4c5ab863), + TOBN(0xbe1b8aae, 0xc45c61f5), TOBN(0x90ec649a, 0x94b9537d), + TOBN(0x941cb5aa, 0xd076c20c), TOBN(0xc9079605, 0x890523c8), + TOBN(0xeb309b4a, 0xe7ba4f10), TOBN(0x73c568ef, 0xe5eb882b), + TOBN(0x3540a987, 0x7e7a1f68), TOBN(0x73a076bb, 0x2dd1e916), + TOBN(0x40394737, 0x3e77664a), TOBN(0x55ae744f, 0x346cee3e), + TOBN(0xd50a961a, 0x5b17a3ad), TOBN(0x13074b59, 0x54213673), + TOBN(0x93d36220, 0xd377e44b), TOBN(0x299c2b53, 0xadff14b5), + TOBN(0xf424d44c, 0xef639f11), TOBN(0xa4c9916d, 0x4a07f75f), + TOBN(0x0746354e, 0xa0173b4f), TOBN(0x2bd20213, 0xd23c00f7), + TOBN(0xf43eaab5, 0x0c23bb08), TOBN(0x13ba5119, 0xc3123e03), + TOBN(0x2847d030, 0x3f5b9d4d), TOBN(0x6742f2f2, 0x5da67bdd), + TOBN(0xef933bdc, 0x77c94195), TOBN(0xeaedd915, 0x6e240867), + TOBN(0x27f14cd1, 0x9499a78f), TOBN(0x462ab5c5, 0x6f9b3455), + TOBN(0x8f90f02a, 0xf02cfc6b), TOBN(0xb763891e, 0xb265230d), + TOBN(0xf59da3a9, 0x532d4977), TOBN(0x21e3327d, 0xcf9eba15), + TOBN(0x123c7b84, 0xbe60bbf0), TOBN(0x56ec12f2, 0x7706df76), + TOBN(0x75c96e8f, 0x264e20e8), TOBN(0xabe6bfed, 0x59a7a841), + TOBN(0x2cc09c04, 0x44c8eb00), TOBN(0xe05b3080, 0xf0c4e16b), + TOBN(0x1eb7777a, 0xa45f3314), TOBN(0x56af7bed, 0xce5d45e3), + TOBN(0x2b6e019a, 0x88b12f1a), TOBN(0x086659cd, 0xfd835f9b), + TOBN(0x2c18dbd1, 0x9dc21ec8), TOBN(0x98f9868a, 0x0fcf8139), + TOBN(0x737d2cd6, 0x48250b49), TOBN(0xcc61c947, 0x24b3428f), + TOBN(0x0c2b4078, 0x80dd9e76), TOBN(0xc43a8991, 0x383fbe08), + TOBN(0x5f7d2d65, 0x779be5d2), TOBN(0x78719a54, 0xeb3b4ab5), + TOBN(0xea7d260a, 0x6245e404), TOBN(0x9de40795, 0x6e7fdfe0), + TOBN(0x1ff3a415, 0x8dac1ab5), TOBN(0x3e7090f1, 0x649c9073), + TOBN(0x1a768561, 0x2b944e88), TOBN(0x250f939e, 0xe57f61c8), + TOBN(0x0c0daa89, 0x1ead643d), TOBN(0x68930023, 0xe125b88e), + TOBN(0x04b71aa7, 0xd2697768), TOBN(0xabdedef5, 0xca345a33), + TOBN(0x2409d29d, 0xee37385e), TOBN(0x4ee1df77, 0xcb83e156), + TOBN(0x0cac12d9, 0x1cbb5b43), TOBN(0x170ed2f6, 0xca895637), + TOBN(0x28228cfa, 0x8ade6d66), TOBN(0x7ff57c95, 0x53238aca), + TOBN(0xccc42563, 0x4b2ed709), TOBN(0x0e356769, 0x856fd30d), + TOBN(0xbcbcd43f, 0x559e9811), TOBN(0x738477ac, 0x5395b759), + TOBN(0x35752b90, 0xc00ee17f), TOBN(0x68748390, 0x742ed2e3), + TOBN(0x7cd06422, 0xbd1f5bc1), TOBN(0xfbc08769, 0xc9e7b797), + TOBN(0xa242a35b, 0xb0cf664a), TOBN(0x126e48f7, 0x7f9707e3), + TOBN(0x1717bf54, 0xc6832660), TOBN(0xfaae7332, 0xfd12c72e), + TOBN(0x27b52db7, 0x995d586b), TOBN(0xbe29569e, 0x832237c2), + TOBN(0xe8e4193e, 0x2a65e7db), TOBN(0x152706dc, 0x2eaa1bbb), + TOBN(0x72bcd8b7, 0xbc60055b), TOBN(0x03cc23ee, 0x56e27e4b), + TOBN(0xee337424, 0xe4819370), TOBN(0xe2aa0e43, 0x0ad3da09), + TOBN(0x40b8524f, 0x6383c45d), TOBN(0xd7663554, 0x42a41b25), + TOBN(0x64efa6de, 0x778a4797), TOBN(0x2042170a, 0x7079adf4), + TOBN(0x808b0b65, 0x0bc6fb80), TOBN(0x5882e075, 0x3ffe2e6b), + TOBN(0xd5ef2f7c, 0x2c83f549), TOBN(0x54d63c80, 0x9103b723), + TOBN(0xf2f11bd6, 0x52a23f9b), TOBN(0x3670c319, 0x4b0b6587), + TOBN(0x55c4623b, 0xb1580e9e), TOBN(0x64edf7b2, 0x01efe220), + TOBN(0x97091dcb, 0xd53c5c9d), TOBN(0xf17624b6, 0xac0a177b), + TOBN(0xb0f13975, 0x2cfe2dff), TOBN(0xc1a35c0a, 0x6c7a574e), + TOBN(0x227d3146, 0x93e79987), TOBN(0x0575bf30, 0xe89cb80e), + TOBN(0x2f4e247f, 0x0d1883bb), TOBN(0xebd51226, 0x3274c3d0), + TOBN(0x5f3e51c8, 0x56ada97a), TOBN(0x4afc964d, 0x8f8b403e), + TOBN(0xa6f247ab, 0x412e2979), TOBN(0x675abd1b, 0x6f80ebda), + TOBN(0x66a2bd72, 0x5e485a1d), TOBN(0x4b2a5caf, 0x8f4f0b3c), + TOBN(0x2626927f, 0x1b847bba), TOBN(0x6c6fc7d9, 0x0502394d), + TOBN(0xfea912ba, 0xa5659ae8), TOBN(0x68363aba, 0x25e1a16e), + TOBN(0xb8842277, 0x752c41ac), TOBN(0xfe545c28, 0x2897c3fc), + TOBN(0x2d36e9e7, 0xdc4c696b), TOBN(0x5806244a, 0xfba977c5), + TOBN(0x85665e9b, 0xe39508c1), TOBN(0xf720ee25, 0x6d12597b), + TOBN(0x8a979129, 0xd2337a31), TOBN(0x5916868f, 0x0f862bdc), + TOBN(0x048099d9, 0x5dd283ba), TOBN(0xe2d1eeb6, 0xfe5bfb4e), + TOBN(0x82ef1c41, 0x7884005d), TOBN(0xa2d4ec17, 0xffffcbae), + TOBN(0x9161c53f, 0x8aa95e66), TOBN(0x5ee104e1, 0xc5fee0d0), + TOBN(0x562e4cec, 0xc135b208), TOBN(0x74e1b265, 0x4783f47d), + TOBN(0x6d2a506c, 0x5a3f3b30), TOBN(0xecead9f4, 0xc16762fc), + TOBN(0xf29dd4b2, 0xe286e5b9), TOBN(0x1b0fadc0, 0x83bb3c61), + TOBN(0x7a75023e, 0x7fac29a4), TOBN(0xc086d5f1, 0xc9477fa3), + TOBN(0x0fc61135, 0x2f6f3076), TOBN(0xc99ffa23, 0xe3912a9a), + TOBN(0x6a0b0685, 0xd2f8ba3d), TOBN(0xfdc777e8, 0xe93358a4), + TOBN(0x94a787bb, 0x35415f04), TOBN(0x640c2d6a, 0x4d23fea4), + TOBN(0x9de917da, 0x153a35b5), TOBN(0x793e8d07, 0x5d5cd074), + TOBN(0xf4f87653, 0x2de45068), TOBN(0x37c7a7e8, 0x9e2e1f6e), + TOBN(0xd0825fa2, 0xa3584069), TOBN(0xaf2cea7c, 0x1727bf42), + TOBN(0x0360a4fb, 0x9e4785a9), TOBN(0xe5fda49c, 0x27299f4a), + TOBN(0x48068e13, 0x71ac2f71), TOBN(0x83d0687b, 0x9077666f), + TOBN(0x6d3883b2, 0x15d02819), TOBN(0x6d0d7550, 0x40dd9a35), + TOBN(0x61d7cbf9, 0x1d2b469f), TOBN(0xf97b232f, 0x2efc3115), + TOBN(0xa551d750, 0xb24bcbc7), TOBN(0x11ea4949, 0x88a1e356), + TOBN(0x7669f031, 0x93cb7501), TOBN(0x595dc55e, 0xca737b8a), + TOBN(0xa4a319ac, 0xd837879f), TOBN(0x6fc1b49e, 0xed6b67b0), + TOBN(0xe3959933, 0x32f1f3af), TOBN(0x966742eb, 0x65432a2e), + TOBN(0x4b8dc9fe, 0xb4966228), TOBN(0x96cc6312, 0x43f43950), + TOBN(0x12068859, 0xc9b731ee), TOBN(0x7b948dc3, 0x56f79968), + TOBN(0x61e4ad32, 0xed1f8008), TOBN(0xe6c9267a, 0xd8b17538), + TOBN(0x1ac7c5eb, 0x857ff6fb), TOBN(0x994baaa8, 0x55f2fb10), + TOBN(0x84cf14e1, 0x1d248018), TOBN(0x5a39898b, 0x628ac508), + TOBN(0x14fde97b, 0x5fa944f5), TOBN(0xed178030, 0xd12e5ac7), + TOBN(0x042c2af4, 0x97e2feb4), TOBN(0xd36a42d7, 0xaebf7313), + TOBN(0x49d2c9eb, 0x084ffdd7), TOBN(0x9f8aa54b, 0x2ef7c76a), + TOBN(0x9200b7ba, 0x09895e70), TOBN(0x3bd0c66f, 0xddb7fb58), + TOBN(0x2d97d108, 0x78eb4cbb), TOBN(0x2d431068, 0xd84bde31), + TOBN(0x4b523eb7, 0x172ccd1f), TOBN(0x7323cb28, 0x30a6a892), + TOBN(0x97082ec0, 0xcfe153eb), TOBN(0xe97f6b6a, 0xf2aadb97), + TOBN(0x1d3d393e, 0xd1a83da1), TOBN(0xa6a7f9c7, 0x804b2a68), + TOBN(0x4a688b48, 0x2d0cb71e), TOBN(0xa9b4cc5f, 0x40585278), + TOBN(0x5e5db46a, 0xcb66e132), TOBN(0xf1be963a, 0x0d925880), + TOBN(0x944a7027, 0x0317b9e2), TOBN(0xe266f959, 0x48603d48), + TOBN(0x98db6673, 0x5c208899), TOBN(0x90472447, 0xa2fb18a3), + TOBN(0x8a966939, 0x777c619f), TOBN(0x3798142a, 0x2a3be21b), + TOBN(0xb4241cb1, 0x3298b343), TOBN(0xa3a14e49, 0xb44f65a1), + TOBN(0xc5f4d6cd, 0x3ac77acd), TOBN(0xd0288cb5, 0x52b6fc3c), + TOBN(0xd5cc8c2f, 0x1c040abc), TOBN(0xb675511e, 0x06bf9b4a), + TOBN(0xd667da37, 0x9b3aa441), TOBN(0x460d45ce, 0x51601f72), + TOBN(0xe2f73c69, 0x6755ff89), TOBN(0xdd3cf7e7, 0x473017e6), + TOBN(0x8ef5689d, 0x3cf7600d), TOBN(0x948dc4f8, 0xb1fc87b4), + TOBN(0xd9e9fe81, 0x4ea53299), TOBN(0x2d921ca2, 0x98eb6028), + TOBN(0xfaecedfd, 0x0c9803fc), TOBN(0xf38ae891, 0x4d7b4745), + TOBN(0xd8c5fccf, 0xc5e3a3d8), TOBN(0xbefd904c, 0x4079dfbf), + TOBN(0xbc6d6a58, 0xfead0197), TOBN(0x39227077, 0x695532a4), + TOBN(0x09e23e6d, 0xdbef42f5), TOBN(0x7e449b64, 0x480a9908), + TOBN(0x7b969c1a, 0xad9a2e40), TOBN(0x6231d792, 0x9591c2a4), + TOBN(0x87151456, 0x0f664534), TOBN(0x85ceae7c, 0x4b68f103), + TOBN(0xac09c4ae, 0x65578ab9), TOBN(0x33ec6868, 0xf044b10c), + TOBN(0x6ac4832b, 0x3a8ec1f1), TOBN(0x5509d128, 0x5847d5ef), + TOBN(0xf909604f, 0x763f1574), TOBN(0xb16c4303, 0xc32f63c4), + TOBN(0xb6ab2014, 0x7ca23cd3), TOBN(0xcaa7a5c6, 0xa391849d), + TOBN(0x5b0673a3, 0x75678d94), TOBN(0xc982ddd4, 0xdd303e64), + TOBN(0xfd7b000b, 0x5db6f971), TOBN(0xbba2cb1f, 0x6f876f92), + TOBN(0xc77332a3, 0x3c569426), TOBN(0xa159100c, 0x570d74f8), + TOBN(0xfd16847f, 0xdec67ef5), TOBN(0x742ee464, 0x233e76b7), + TOBN(0x0b8e4134, 0xefc2b4c8), TOBN(0xca640b86, 0x42a3e521), + TOBN(0x653a0190, 0x8ceb6aa9), TOBN(0x313c300c, 0x547852d5), + TOBN(0x24e4ab12, 0x6b237af7), TOBN(0x2ba90162, 0x8bb47af8), + TOBN(0x3d5e58d6, 0xa8219bb7), TOBN(0xc691d0bd, 0x1b06c57f), + TOBN(0x0ae4cb10, 0xd257576e), TOBN(0x3569656c, 0xd54a3dc3), + TOBN(0xe5ebaebd, 0x94cda03a), TOBN(0x934e82d3, 0x162bfe13), + TOBN(0x450ac0ba, 0xe251a0c6), TOBN(0x480b9e11, 0xdd6da526), + TOBN(0x00467bc5, 0x8cce08b5), TOBN(0xb636458c, 0x7f178d55), + TOBN(0xc5748bae, 0xa677d806), TOBN(0x2763a387, 0xdfa394eb), + TOBN(0xa12b448a, 0x7d3cebb6), TOBN(0xe7adda3e, 0x6f20d850), + TOBN(0xf63ebce5, 0x1558462c), TOBN(0x58b36143, 0x620088a8), + TOBN(0x8a2cc3ca, 0x4d63c0ee), TOBN(0x51233117, 0x0fe948ce), + TOBN(0x7463fd85, 0x222ef33b), TOBN(0xadf0c7dc, 0x7c603d6c), + TOBN(0x0ec32d3b, 0xfe7765e5), TOBN(0xccaab359, 0xbf380409), + TOBN(0xbdaa84d6, 0x8e59319c), TOBN(0xd9a4c280, 0x9c80c34d), + TOBN(0xa9d89488, 0xa059c142), TOBN(0x6f5ae714, 0xff0b9346), + TOBN(0x068f237d, 0x16fb3664), TOBN(0x5853e4c4, 0x363186ac), + TOBN(0xe2d87d23, 0x63c52f98), TOBN(0x2ec4a766, 0x81828876), + TOBN(0x47b864fa, 0xe14e7b1c), TOBN(0x0c0bc0e5, 0x69192408), + TOBN(0xe4d7681d, 0xb82e9f3e), TOBN(0x83200f0b, 0xdf25e13c), + TOBN(0x8909984c, 0x66f27280), TOBN(0x462d7b00, 0x75f73227), + TOBN(0xd90ba188, 0xf2651798), TOBN(0x74c6e18c, 0x36ab1c34), + TOBN(0xab256ea3, 0x5ef54359), TOBN(0x03466612, 0xd1aa702f), + TOBN(0x624d6049, 0x2ed22e91), TOBN(0x6fdfe0b5, 0x6f072822), + TOBN(0xeeca1115, 0x39ce2271), TOBN(0x98100a4f, 0xdb01614f), + TOBN(0xb6b0daa2, 0xa35c628f), TOBN(0xb6f94d2e, 0xc87e9a47), + TOBN(0xc6773259, 0x1d57d9ce), TOBN(0xf70bfeec, 0x03884a7b), + TOBN(0x5fb35ccf, 0xed2bad01), TOBN(0xa155cbe3, 0x1da6a5c7), + TOBN(0xc2e2594c, 0x30a92f8f), TOBN(0x649c89ce, 0x5bfafe43), + TOBN(0xd158667d, 0xe9ff257a), TOBN(0x9b359611, 0xf32c50ae), + TOBN(0x4b00b20b, 0x906014cf), TOBN(0xf3a8cfe3, 0x89bc7d3d), + TOBN(0x4ff23ffd, 0x248a7d06), TOBN(0x80c5bfb4, 0x878873fa), + TOBN(0xb7d9ad90, 0x05745981), TOBN(0x179c85db, 0x3db01994), + TOBN(0xba41b062, 0x61a6966c), TOBN(0x4d82d052, 0xeadce5a8), + TOBN(0x9e91cd3b, 0xa5e6a318), TOBN(0x47795f4f, 0x95b2dda0), + TOBN(0xecfd7c1f, 0xd55a897c), TOBN(0x009194ab, 0xb29110fb), + TOBN(0x5f0e2046, 0xe381d3b0), TOBN(0x5f3425f6, 0xa98dd291), + TOBN(0xbfa06687, 0x730d50da), TOBN(0x0423446c, 0x4b083b7f), + TOBN(0x397a247d, 0xd69d3417), TOBN(0xeb629f90, 0x387ba42a), + TOBN(0x1ee426cc, 0xd5cd79bf), TOBN(0x0032940b, 0x946c6e18), + TOBN(0x1b1e8ae0, 0x57477f58), TOBN(0xe94f7d34, 0x6d823278), + TOBN(0xc747cb96, 0x782ba21a), TOBN(0xc5254469, 0xf72b33a5), + TOBN(0x772ef6de, 0xc7f80c81), TOBN(0xd73acbfe, 0x2cd9e6b5), + TOBN(0x4075b5b1, 0x49ee90d9), TOBN(0x785c339a, 0xa06e9eba), + TOBN(0xa1030d5b, 0xabf825e0), TOBN(0xcec684c3, 0xa42931dc), + TOBN(0x42ab62c9, 0xc1586e63), TOBN(0x45431d66, 0x5ab43f2b), + TOBN(0x57c8b2c0, 0x55f7835d), TOBN(0x033da338, 0xc1b7f865), + TOBN(0x283c7513, 0xcaa76097), TOBN(0x0a624fa9, 0x36c83906), + TOBN(0x6b20afec, 0x715af2c7), TOBN(0x4b969974, 0xeba78bfd), + TOBN(0x220755cc, 0xd921d60e), TOBN(0x9b944e10, 0x7baeca13), + TOBN(0x04819d51, 0x5ded93d4), TOBN(0x9bbff86e, 0x6dddfd27), + TOBN(0x6b344130, 0x77adc612), TOBN(0xa7496529, 0xbbd803a0), + TOBN(0x1a1baaa7, 0x6d8805bd), TOBN(0xc8403902, 0x470343ad), + TOBN(0x39f59f66, 0x175adff1), TOBN(0x0b26d7fb, 0xb7d8c5b7), + TOBN(0xa875f5ce, 0x529d75e3), TOBN(0x85efc7e9, 0x41325cc2), + TOBN(0x21950b42, 0x1ff6acd3), TOBN(0xffe70484, 0x53dc6909), + TOBN(0xff4cd0b2, 0x28766127), TOBN(0xabdbe608, 0x4fb7db2b), + TOBN(0x837c9228, 0x5e1109e8), TOBN(0x26147d27, 0xf4645b5a), + TOBN(0x4d78f592, 0xf7818ed8), TOBN(0xd394077e, 0xf247fa36), + TOBN(0x0fb9c2d0, 0x488c171a), TOBN(0xa78bfbaa, 0x13685278), + TOBN(0xedfbe268, 0xd5b1fa6a), TOBN(0x0dceb8db, 0x2b7eaba7), + TOBN(0xbf9e8089, 0x9ae2b710), TOBN(0xefde7ae6, 0xa4449c96), + TOBN(0x43b7716b, 0xcc143a46), TOBN(0xd7d34194, 0xc3628c13), + TOBN(0x508cec1c, 0x3b3f64c9), TOBN(0xe20bc0ba, 0x1e5edf3f), + TOBN(0xda1deb85, 0x2f4318d4), TOBN(0xd20ebe0d, 0x5c3fa443), + TOBN(0x370b4ea7, 0x73241ea3), TOBN(0x61f1511c, 0x5e1a5f65), + TOBN(0x99a5e23d, 0x82681c62), TOBN(0xd731e383, 0xa2f54c2d), + TOBN(0x2692f36e, 0x83445904), TOBN(0x2e0ec469, 0xaf45f9c0), + TOBN(0x905a3201, 0xc67528b7), TOBN(0x88f77f34, 0xd0e5e542), + TOBN(0xf67a8d29, 0x5864687c), TOBN(0x23b92eae, 0x22df3562), + TOBN(0x5c27014b, 0x9bbec39e), TOBN(0x7ef2f226, 0x9c0f0f8d), + TOBN(0x97359638, 0x546c4d8d), TOBN(0x5f9c3fc4, 0x92f24679), + TOBN(0x912e8bed, 0xa8c8acd9), TOBN(0xec3a318d, 0x306634b0), + TOBN(0x80167f41, 0xc31cb264), TOBN(0x3db82f6f, 0x522113f2), + TOBN(0xb155bcd2, 0xdcafe197), TOBN(0xfba1da59, 0x43465283), + TOBN(0xa0425b8e, 0xb212cf53), TOBN(0x4f2e512e, 0xf8557c5f), + TOBN(0xc1286ff9, 0x25c4d56c), TOBN(0xbb8a0fea, 0xee26c851), + TOBN(0xc28f70d2, 0xe7d6107e), TOBN(0x7ee0c444, 0xe76265aa), + TOBN(0x3df277a4, 0x1d1936b1), TOBN(0x1a556e3f, 0xea9595eb), + TOBN(0x258bbbf9, 0xe7305683), TOBN(0x31eea5bf, 0x07ef5be6), + TOBN(0x0deb0e4a, 0x46c814c1), TOBN(0x5cee8449, 0xa7b730dd), + TOBN(0xeab495c5, 0xa0182bde), TOBN(0xee759f87, 0x9e27a6b4), + TOBN(0xc2cf6a68, 0x80e518ca), TOBN(0x25e8013f, 0xf14cf3f4), + TOBN(0x8fc44140, 0x7e8d7a14), TOBN(0xbb1ff3ca, 0x9556f36a), + TOBN(0x6a844385, 0x14600044), TOBN(0xba3f0c4a, 0x7451ae63), + TOBN(0xdfcac25b, 0x1f9af32a), TOBN(0x01e0db86, 0xb1f2214b), + TOBN(0x4e9a5bc2, 0xa4b596ac), TOBN(0x83927681, 0x026c2c08), + TOBN(0x3ec832e7, 0x7acaca28), TOBN(0x1bfeea57, 0xc7385b29), + TOBN(0x068212e3, 0xfd1eaf38), TOBN(0xc1329830, 0x6acf8ccc), + TOBN(0xb909f2db, 0x2aac9e59), TOBN(0x5748060d, 0xb661782a), + TOBN(0xc5ab2632, 0xc79b7a01), TOBN(0xda44c6c6, 0x00017626), + TOBN(0xf26c00e8, 0xa7ea82f0), TOBN(0x99cac80d, 0xe4299aaf), + TOBN(0xd66fe3b6, 0x7ed78be1), TOBN(0x305f725f, 0x648d02cd), + TOBN(0x33ed1bc4, 0x623fb21b), TOBN(0xfa70533e, 0x7a6319ad), + TOBN(0x17ab562d, 0xbe5ffb3e), TOBN(0x06374994, 0x56674741), + TOBN(0x69d44ed6, 0x5c46aa8e), TOBN(0x2100d5d3, 0xa8d063d1), + TOBN(0xcb9727ea, 0xa2d17c36), TOBN(0x4c2bab1b, 0x8add53b7), + TOBN(0xa084e90c, 0x15426704), TOBN(0x778afcd3, 0xa837ebea), + TOBN(0x6651f701, 0x7ce477f8), TOBN(0xa0624998, 0x46fb7a8b), + TOBN(0xdc1e6828, 0xed8a6e19), TOBN(0x33fc2336, 0x4189d9c7), + TOBN(0x026f8fe2, 0x671c39bc), TOBN(0xd40c4ccd, 0xbc6f9915), + TOBN(0xafa135bb, 0xf80e75ca), TOBN(0x12c651a0, 0x22adff2c), + TOBN(0xc40a04bd, 0x4f51ad96), TOBN(0x04820109, 0xbbe4e832), + TOBN(0x3667eb1a, 0x7f4c04cc), TOBN(0x59556621, 0xa9404f84), + TOBN(0x71cdf653, 0x7eceb50a), TOBN(0x994a44a6, 0x9b8335fa), + TOBN(0xd7faf819, 0xdbeb9b69), TOBN(0x473c5680, 0xeed4350d), + TOBN(0xb6658466, 0xda44bba2), TOBN(0x0d1bc780, 0x872bdbf3), + TOBN(0xe535f175, 0xa1962f91), TOBN(0x6ed7e061, 0xed58f5a7), + TOBN(0x177aa4c0, 0x2089a233), TOBN(0x0dbcb03a, 0xe539b413), + TOBN(0xe3dc424e, 0xbb32e38e), TOBN(0x6472e5ef, 0x6806701e), + TOBN(0xdd47ff98, 0x814be9ee), TOBN(0x6b60cfff, 0x35ace009), + TOBN(0xb8d3d931, 0x9ff91fe5), TOBN(0x039c4800, 0xf0518eed), + TOBN(0x95c37632, 0x9182cb26), TOBN(0x0763a434, 0x82fc568d), + TOBN(0x707c04d5, 0x383e76ba), TOBN(0xac98b930, 0x824e8197), + TOBN(0x92bf7c8f, 0x91230de0), TOBN(0x90876a01, 0x40959b70), + TOBN(0xdb6d96f3, 0x05968b80), TOBN(0x380a0913, 0x089f73b9), + TOBN(0x7da70b83, 0xc2c61e01), TOBN(0x95fb8394, 0x569b38c7), + TOBN(0x9a3c6512, 0x80edfe2f), TOBN(0x8f726bb9, 0x8faeaf82), + TOBN(0x8010a4a0, 0x78424bf8), TOBN(0x29672044, 0x0e844970)} + , + {TOBN(0x63c5cb81, 0x7a2ad62a), TOBN(0x7ef2b6b9, 0xac62ff54), + TOBN(0x3749bba4, 0xb3ad9db5), TOBN(0xad311f2c, 0x46d5a617), + TOBN(0xb77a8087, 0xc2ff3b6d), TOBN(0xb46feaf3, 0x367834ff), + TOBN(0xf8aa266d, 0x75d6b138), TOBN(0xfa38d320, 0xec008188), + TOBN(0x486d8ffa, 0x696946fc), TOBN(0x50fbc6d8, 0xb9cba56d), + TOBN(0x7e3d423e, 0x90f35a15), TOBN(0x7c3da195, 0xc0dd962c), + TOBN(0xe673fdb0, 0x3cfd5d8b), TOBN(0x0704b7c2, 0x889dfca5), + TOBN(0xf6ce581f, 0xf52305aa), TOBN(0x399d49eb, 0x914d5e53), + TOBN(0x380a496d, 0x6ec293cd), TOBN(0x733dbda7, 0x8e7051f5), + TOBN(0x037e388d, 0xb849140a), TOBN(0xee4b32b0, 0x5946dbf6), + TOBN(0xb1c4fda9, 0xcae368d1), TOBN(0x5001a7b0, 0xfdb0b2f3), + TOBN(0x6df59374, 0x2e3ac46e), TOBN(0x4af675f2, 0x39b3e656), + TOBN(0x44e38110, 0x39949296), TOBN(0x5b63827b, 0x361db1b5), + TOBN(0x3e5323ed, 0x206eaff5), TOBN(0x942370d2, 0xc21f4290), + TOBN(0xf2caaf2e, 0xe0d985a1), TOBN(0x192cc64b, 0x7239846d), + TOBN(0x7c0b8f47, 0xae6312f8), TOBN(0x7dc61f91, 0x96620108), + TOBN(0xb830fb5b, 0xc2da7de9), TOBN(0xd0e643df, 0x0ff8d3be), + TOBN(0x31ee77ba, 0x188a9641), TOBN(0x4e8aa3aa, 0xbcf6d502), + TOBN(0xf9fb6532, 0x9a49110f), TOBN(0xd18317f6, 0x2dd6b220), + TOBN(0x7e3ced41, 0x52c3ea5a), TOBN(0x0d296a14, 0x7d579c4a), + TOBN(0x35d6a53e, 0xed4c3717), TOBN(0x9f8240cf, 0x3d0ed2a3), + TOBN(0x8c0d4d05, 0xe5543aa5), TOBN(0x45d5bbfb, 0xdd33b4b4), + TOBN(0xfa04cc73, 0x137fd28e), TOBN(0x862ac6ef, 0xc73b3ffd), + TOBN(0x403ff9f5, 0x31f51ef2), TOBN(0x34d5e0fc, 0xbc73f5a2), + TOBN(0xf2526820, 0x08913f4f), TOBN(0xea20ed61, 0xeac93d95), + TOBN(0x51ed38b4, 0x6ca6b26c), TOBN(0x8662dcbc, 0xea4327b0), + TOBN(0x6daf295c, 0x725d2aaa), TOBN(0xbad2752f, 0x8e52dcda), + TOBN(0x2210e721, 0x0b17dacc), TOBN(0xa37f7912, 0xd51e8232), + TOBN(0x4f7081e1, 0x44cc3add), TOBN(0xd5ffa1d6, 0x87be82cf), + TOBN(0x89890b6c, 0x0edd6472), TOBN(0xada26e1a, 0x3ed17863), + TOBN(0x276f2715, 0x63483caa), TOBN(0xe6924cd9, 0x2f6077fd), + TOBN(0x05a7fe98, 0x0a466e3c), TOBN(0xf1c794b0, 0xb1902d1f), + TOBN(0xe5213688, 0x82a8042c), TOBN(0xd931cfaf, 0xcd278298), + TOBN(0x069a0ae0, 0xf597a740), TOBN(0x0adbb3f3, 0xeb59107c), + TOBN(0x983e951e, 0x5eaa8eb8), TOBN(0xe663a8b5, 0x11b48e78), + TOBN(0x1631cc0d, 0x8a03f2c5), TOBN(0x7577c11e, 0x11e271e2), + TOBN(0x33b2385c, 0x08369a90), TOBN(0x2990c59b, 0x190eb4f8), + TOBN(0x819a6145, 0xc68eac80), TOBN(0x7a786d62, 0x2ec4a014), + TOBN(0x33faadbe, 0x20ac3a8d), TOBN(0x31a21781, 0x5aba2d30), + TOBN(0x209d2742, 0xdba4f565), TOBN(0xdb2ce9e3, 0x55aa0fbb), + TOBN(0x8cef334b, 0x168984df), TOBN(0xe81dce17, 0x33879638), + TOBN(0xf6e6949c, 0x263720f0), TOBN(0x5c56feaf, 0xf593cbec), + TOBN(0x8bff5601, 0xfde58c84), TOBN(0x74e24117, 0x2eccb314), + TOBN(0xbcf01b61, 0x4c9a8a78), TOBN(0xa233e35e, 0x544c9868), + TOBN(0xb3156bf3, 0x8bd7aff1), TOBN(0x1b5ee4cb, 0x1d81b146), + TOBN(0x7ba1ac41, 0xd628a915), TOBN(0x8f3a8f9c, 0xfd89699e), + TOBN(0x7329b9c9, 0xa0748be7), TOBN(0x1d391c95, 0xa92e621f), + TOBN(0xe51e6b21, 0x4d10a837), TOBN(0xd255f53a, 0x4947b435), + TOBN(0x07669e04, 0xf1788ee3), TOBN(0xc14f27af, 0xa86938a2), + TOBN(0x8b47a334, 0xe93a01c0), TOBN(0xff627438, 0xd9366808), + TOBN(0x7a0985d8, 0xca2a5965), TOBN(0x3d9a5542, 0xd6e9b9b3), + TOBN(0xc23eb80b, 0x4cf972e8), TOBN(0x5c1c33bb, 0x4fdf72fd), + TOBN(0x0c4a58d4, 0x74a86108), TOBN(0xf8048a8f, 0xee4c5d90), + TOBN(0xe3c7c924, 0xe86d4c80), TOBN(0x28c889de, 0x056a1e60), + TOBN(0x57e2662e, 0xb214a040), TOBN(0xe8c48e98, 0x37e10347), + TOBN(0x87742862, 0x80ac748a), TOBN(0xf1c24022, 0x186b06f2), + TOBN(0xac2dd4c3, 0x5f74040a), TOBN(0x409aeb71, 0xfceac957), + TOBN(0x4fbad782, 0x55c4ec23), TOBN(0xb359ed61, 0x8a7b76ec), + TOBN(0x12744926, 0xed6f4a60), TOBN(0xe21e8d7f, 0x4b912de3), + TOBN(0xe2575a59, 0xfc705a59), TOBN(0x72f1d4de, 0xed2dbc0e), + TOBN(0x3d2b24b9, 0xeb7926b8), TOBN(0xbff88cb3, 0xcdbe5509), + TOBN(0xd0f399af, 0xe4dd640b), TOBN(0x3c5fe130, 0x2f76ed45), + TOBN(0x6f3562f4, 0x3764fb3d), TOBN(0x7b5af318, 0x3151b62d), + TOBN(0xd5bd0bc7, 0xd79ce5f3), TOBN(0xfdaf6b20, 0xec66890f), + TOBN(0x735c67ec, 0x6063540c), TOBN(0x50b259c2, 0xe5f9cb8f), + TOBN(0xb8734f9a, 0x3f99c6ab), TOBN(0xf8cc13d5, 0xa3a7bc85), + TOBN(0x80c1b305, 0xc5217659), TOBN(0xfe5364d4, 0x4ec12a54), + TOBN(0xbd87045e, 0x681345fe), TOBN(0x7f8efeb1, 0x582f897f), + TOBN(0xe8cbf1e5, 0xd5923359), TOBN(0xdb0cea9d, 0x539b9fb0), + TOBN(0x0c5b34cf, 0x49859b98), TOBN(0x5e583c56, 0xa4403cc6), + TOBN(0x11fc1a2d, 0xd48185b7), TOBN(0xc93fbc7e, 0x6e521787), + TOBN(0x47e7a058, 0x05105b8b), TOBN(0x7b4d4d58, 0xdb8260c8), + TOBN(0xe33930b0, 0x46eb842a), TOBN(0x8e844a9a, 0x7bdae56d), + TOBN(0x34ef3a9e, 0x13f7fdfc), TOBN(0xb3768f82, 0x636ca176), + TOBN(0x2821f4e0, 0x4e09e61c), TOBN(0x414dc3a1, 0xa0c7cddc), + TOBN(0xd5379437, 0x54945fcd), TOBN(0x151b6eef, 0xb3555ff1), + TOBN(0xb31bd613, 0x6339c083), TOBN(0x39ff8155, 0xdfb64701), + TOBN(0x7c3388d2, 0xe29604ab), TOBN(0x1e19084b, 0xa6b10442), + TOBN(0x17cf54c0, 0xeccd47ef), TOBN(0x89693385, 0x4a5dfb30), + TOBN(0x69d023fb, 0x47daf9f6), TOBN(0x9222840b, 0x7d91d959), + TOBN(0x439108f5, 0x803bac62), TOBN(0x0b7dd91d, 0x379bd45f), + TOBN(0xd651e827, 0xca63c581), TOBN(0x5c5d75f6, 0x509c104f), + TOBN(0x7d5fc738, 0x1f2dc308), TOBN(0x20faa7bf, 0xd98454be), + TOBN(0x95374bee, 0xa517b031), TOBN(0xf036b9b1, 0x642692ac), + TOBN(0xc5106109, 0x39842194), TOBN(0xb7e2353e, 0x49d05295), + TOBN(0xfc8c1d5c, 0xefb42ee0), TOBN(0xe04884eb, 0x08ce811c), + TOBN(0xf1f75d81, 0x7419f40e), TOBN(0x5b0ac162, 0xa995c241), + TOBN(0x120921bb, 0xc4c55646), TOBN(0x713520c2, 0x8d33cf97), + TOBN(0xb4a65a5c, 0xe98c5100), TOBN(0x6cec871d, 0x2ddd0f5a), + TOBN(0x251f0b7f, 0x9ba2e78b), TOBN(0x224a8434, 0xce3a2a5f), + TOBN(0x26827f61, 0x25f5c46f), TOBN(0x6a22bedc, 0x48545ec0), + TOBN(0x25ae5fa0, 0xb1bb5cdc), TOBN(0xd693682f, 0xfcb9b98f), + TOBN(0x32027fe8, 0x91e5d7d3), TOBN(0xf14b7d17, 0x73a07678), + TOBN(0xf88497b3, 0xc0dfdd61), TOBN(0xf7c2eec0, 0x2a8c4f48), + TOBN(0xaa5573f4, 0x3756e621), TOBN(0xc013a240, 0x1825b948), + TOBN(0x1c03b345, 0x63878572), TOBN(0xa0472bea, 0x653a4184), + TOBN(0xf4222e27, 0x0ac69a80), TOBN(0x34096d25, 0xf51e54f6), + TOBN(0x00a648cb, 0x8fffa591), TOBN(0x4e87acdc, 0x69b6527f), + TOBN(0x0575e037, 0xe285ccb4), TOBN(0x188089e4, 0x50ddcf52), + TOBN(0xaa96c9a8, 0x870ff719), TOBN(0x74a56cd8, 0x1fc7e369), + TOBN(0x41d04ee2, 0x1726931a), TOBN(0x0bbbb2c8, 0x3660ecfd), + TOBN(0xa6ef6de5, 0x24818e18), TOBN(0xe421cc51, 0xe7d57887), + TOBN(0xf127d208, 0xbea87be6), TOBN(0x16a475d3, 0xb1cdd682), + TOBN(0x9db1b684, 0x439b63f7), TOBN(0x5359b3db, 0xf0f113b6), + TOBN(0xdfccf1de, 0x8bf06e31), TOBN(0x1fdf8f44, 0xdd383901), + TOBN(0x10775cad, 0x5017e7d2), TOBN(0xdfc3a597, 0x58d11eef), + TOBN(0x6ec9c8a0, 0xb1ecff10), TOBN(0xee6ed6cc, 0x28400549), + TOBN(0xb5ad7bae, 0x1b4f8d73), TOBN(0x61b4f11d, 0xe00aaab9), + TOBN(0x7b32d69b, 0xd4eff2d7), TOBN(0x88ae6771, 0x4288b60f), + TOBN(0x159461b4, 0x37a1e723), TOBN(0x1f3d4789, 0x570aae8c), + TOBN(0x869118c0, 0x7f9871da), TOBN(0x35fbda78, 0xf635e278), + TOBN(0x738f3641, 0xe1541dac), TOBN(0x6794b13a, 0xc0dae45f), + TOBN(0x065064ac, 0x09cc0917), TOBN(0x27c53729, 0xc68540fd), + TOBN(0x0d2d4c8e, 0xef227671), TOBN(0xd23a9f80, 0xa1785a04), + TOBN(0x98c59528, 0x52650359), TOBN(0xfa09ad01, 0x74a1acad), + TOBN(0x082d5a29, 0x0b55bf5c), TOBN(0xa40f1c67, 0x419b8084), + TOBN(0x3a5c752e, 0xdcc18770), TOBN(0x4baf1f2f, 0x8825c3a5), + TOBN(0xebd63f74, 0x21b153ed), TOBN(0xa2383e47, 0xb2f64723), + TOBN(0xe7bf620a, 0x2646d19a), TOBN(0x56cb44ec, 0x03c83ffd), + TOBN(0xaf7267c9, 0x4f6be9f1), TOBN(0x8b2dfd7b, 0xc06bb5e9), + TOBN(0xb87072f2, 0xa672c5c7), TOBN(0xeacb11c8, 0x0d53c5e2), + TOBN(0x22dac29d, 0xff435932), TOBN(0x37bdb99d, 0x4408693c), + TOBN(0xf6e62fb6, 0x2899c20f), TOBN(0x3535d512, 0x447ece24), + TOBN(0xfbdc6b88, 0xff577ce3), TOBN(0x726693bd, 0x190575f2), + TOBN(0x6772b0e5, 0xab4b35a2), TOBN(0x1d8b6001, 0xf5eeaacf), + TOBN(0x728f7ce4, 0x795b9580), TOBN(0x4a20ed2a, 0x41fb81da), + TOBN(0x9f685cd4, 0x4fec01e6), TOBN(0x3ed7ddcc, 0xa7ff50ad), + TOBN(0x460fd264, 0x0c2d97fd), TOBN(0x3a241426, 0xeb82f4f9), + TOBN(0x17d1df2c, 0x6a8ea820), TOBN(0xb2b50d3b, 0xf22cc254), + TOBN(0x03856cba, 0xb7291426), TOBN(0x87fd26ae, 0x04f5ee39), + TOBN(0x9cb696cc, 0x02bee4ba), TOBN(0x53121804, 0x06820fd6), + TOBN(0xa5dfc269, 0x0212e985), TOBN(0x666f7ffa, 0x160f9a09), + TOBN(0xc503cd33, 0xbccd9617), TOBN(0x365dede4, 0xba7730a3), + TOBN(0x798c6355, 0x5ddb0786), TOBN(0xa6c3200e, 0xfc9cd3bc), + TOBN(0x060ffb2c, 0xe5e35efd), TOBN(0x99a4e25b, 0x5555a1c1), + TOBN(0x11d95375, 0xf70b3751), TOBN(0x0a57354a, 0x160e1bf6), + TOBN(0xecb3ae4b, 0xf8e4b065), TOBN(0x07a834c4, 0x2e53022b), + TOBN(0x1cd300b3, 0x8692ed96), TOBN(0x16a6f792, 0x61ee14ec), + TOBN(0x8f1063c6, 0x6a8649ed), TOBN(0xfbcdfcfe, 0x869f3e14), + TOBN(0x2cfb97c1, 0x00a7b3ec), TOBN(0xcea49b3c, 0x7130c2f1), + TOBN(0x462d044f, 0xe9d96488), TOBN(0x4b53d52e, 0x8182a0c1), + TOBN(0x84b6ddd3, 0x0391e9e9), TOBN(0x80ab7b48, 0xb1741a09), + TOBN(0xec0e15d4, 0x27d3317f), TOBN(0x8dfc1ddb, 0x1a64671e), + TOBN(0x93cc5d5f, 0xd49c5b92), TOBN(0xc995d53d, 0x3674a331), + TOBN(0x302e41ec, 0x090090ae), TOBN(0x2278a0cc, 0xedb06830), + TOBN(0x1d025932, 0xfbc99690), TOBN(0x0c32fbd2, 0xb80d68da), + TOBN(0xd79146da, 0xf341a6c1), TOBN(0xae0ba139, 0x1bef68a0), + TOBN(0xc6b8a563, 0x8d774b3a), TOBN(0x1cf307bd, 0x880ba4d7), + TOBN(0xc033bdc7, 0x19803511), TOBN(0xa9f97b3b, 0x8888c3be), + TOBN(0x3d68aebc, 0x85c6d05e), TOBN(0xc3b88a9d, 0x193919eb), + TOBN(0x2d300748, 0xc48b0ee3), TOBN(0x7506bc7c, 0x07a746c1), + TOBN(0xfc48437c, 0x6e6d57f3), TOBN(0x5bd71587, 0xcfeaa91a), + TOBN(0xa4ed0408, 0xc1bc5225), TOBN(0xd0b946db, 0x2719226d), + TOBN(0x109ecd62, 0x758d2d43), TOBN(0x75c8485a, 0x2751759b), + TOBN(0xb0b75f49, 0x9ce4177a), TOBN(0x4fa61a1e, 0x79c10c3d), + TOBN(0xc062d300, 0xa167fcd7), TOBN(0x4df3874c, 0x750f0fa8), + TOBN(0x29ae2cf9, 0x83dfedc9), TOBN(0xf8437134, 0x8d87631a), + TOBN(0xaf571711, 0x7429c8d2), TOBN(0x18d15867, 0x146d9272), + TOBN(0x83053ecf, 0x69769bb7), TOBN(0xc55eb856, 0xc479ab82), + TOBN(0x5ef7791c, 0x21b0f4b2), TOBN(0xaa5956ba, 0x3d491525), + TOBN(0x407a96c2, 0x9fe20eba), TOBN(0xf27168bb, 0xe52a5ad3), + TOBN(0x43b60ab3, 0xbf1d9d89), TOBN(0xe45c51ef, 0x710e727a), + TOBN(0xdfca5276, 0x099b4221), TOBN(0x8dc6407c, 0x2557a159), + TOBN(0x0ead8335, 0x91035895), TOBN(0x0a9db957, 0x9c55dc32), + TOBN(0xe40736d3, 0xdf61bc76), TOBN(0x13a619c0, 0x3f778cdb), + TOBN(0x6dd921a4, 0xc56ea28f), TOBN(0x76a52433, 0x2fa647b4), + TOBN(0x23591891, 0xac5bdc5d), TOBN(0xff4a1a72, 0xbac7dc01), + TOBN(0x9905e261, 0x62df8453), TOBN(0x3ac045df, 0xe63b265f), + TOBN(0x8a3f341b, 0xad53dba7), TOBN(0x8ec269cc, 0x837b625a), + TOBN(0xd71a2782, 0x3ae31189), TOBN(0x8fb4f9a3, 0x55e96120), + TOBN(0x804af823, 0xff9875cf), TOBN(0x23224f57, 0x5d442a9b), + TOBN(0x1c4d3b9e, 0xecc62679), TOBN(0x91da22fb, 0xa0e7ddb1), + TOBN(0xa370324d, 0x6c04a661), TOBN(0x9710d3b6, 0x5e376d17), + TOBN(0xed8c98f0, 0x3044e357), TOBN(0xc364ebbe, 0x6422701c), + TOBN(0x347f5d51, 0x7733d61c), TOBN(0xd55644b9, 0xcea826c3), + TOBN(0x80c6e0ad, 0x55a25548), TOBN(0x0aa7641d, 0x844220a7), + TOBN(0x1438ec81, 0x31810660), TOBN(0x9dfa6507, 0xde4b4043), + TOBN(0x10b515d8, 0xcc3e0273), TOBN(0x1b6066dd, 0x28d8cfb2), + TOBN(0xd3b04591, 0x9c9efebd), TOBN(0x425d4bdf, 0xa21c1ff4), + TOBN(0x5fe5af19, 0xd57607d3), TOBN(0xbbf773f7, 0x54481084), + TOBN(0x8435bd69, 0x94b03ed1), TOBN(0xd9ad1de3, 0x634cc546), + TOBN(0x2cf423fc, 0x00e420ca), TOBN(0xeed26d80, 0xa03096dd), + TOBN(0xd7f60be7, 0xa4db09d2), TOBN(0xf47f569d, 0x960622f7), + TOBN(0xe5925fd7, 0x7296c729), TOBN(0xeff2db26, 0x26ca2715), + TOBN(0xa6fcd014, 0xb913e759), TOBN(0x53da4786, 0x8ff4de93), + TOBN(0x14616d79, 0xc32068e1), TOBN(0xb187d664, 0xccdf352e), + TOBN(0xf7afb650, 0x1dc90b59), TOBN(0x8170e943, 0x7daa1b26), + TOBN(0xc8e3bdd8, 0x700c0a84), TOBN(0x6e8d345f, 0x6482bdfa), + TOBN(0x84cfbfa1, 0xc5c5ea50), TOBN(0xd3baf14c, 0x67960681), + TOBN(0x26398403, 0x0dd50942), TOBN(0xe4b7839c, 0x4716a663), + TOBN(0xd5f1f794, 0xe7de6dc0), TOBN(0x5cd0f4d4, 0x622aa7ce), + TOBN(0x5295f3f1, 0x59acfeec), TOBN(0x8d933552, 0x953e0607), + TOBN(0xc7db8ec5, 0x776c5722), TOBN(0xdc467e62, 0x2b5f290c), + TOBN(0xd4297e70, 0x4ff425a9), TOBN(0x4be924c1, 0x0cf7bb72), + TOBN(0x0d5dc5ae, 0xa1892131), TOBN(0x8bf8a8e3, 0xa705c992), + TOBN(0x73a0b064, 0x7a305ac5), TOBN(0x00c9ca4e, 0x9a8c77a8), + TOBN(0x5dfee80f, 0x83774bdd), TOBN(0x63131602, 0x85734485), + TOBN(0xa1b524ae, 0x914a69a9), TOBN(0xebc2ffaf, 0xd4e300d7), + TOBN(0x52c93db7, 0x7cfa46a5), TOBN(0x71e6161f, 0x21653b50), + TOBN(0x3574fc57, 0xa4bc580a), TOBN(0xc09015dd, 0xe1bc1253), + TOBN(0x4b7b47b2, 0xd174d7aa), TOBN(0x4072d8e8, 0xf3a15d04), + TOBN(0xeeb7d47f, 0xd6fa07ed), TOBN(0x6f2b9ff9, 0xedbdafb1), + TOBN(0x18c51615, 0x3760fe8a), TOBN(0x7a96e6bf, 0xf06c6c13), + TOBN(0x4d7a0410, 0x0ea2d071), TOBN(0xa1914e9b, 0x0be2a5ce), + TOBN(0x5726e357, 0xd8a3c5cf), TOBN(0x1197ecc3, 0x2abb2b13), + TOBN(0x6c0d7f7f, 0x31ae88dd), TOBN(0x15b20d1a, 0xfdbb3efe), + TOBN(0xcd06aa26, 0x70584039), TOBN(0x2277c969, 0xa7dc9747), + TOBN(0xbca69587, 0x7855d815), TOBN(0x899ea238, 0x5188b32a), + TOBN(0x37d9228b, 0x760c1c9d), TOBN(0xc7efbb11, 0x9b5c18da), + TOBN(0x7f0d1bc8, 0x19f6dbc5), TOBN(0x4875384b, 0x07e6905b), + TOBN(0xc7c50baa, 0x3ba8cd86), TOBN(0xb0ce40fb, 0xc2905de0), + TOBN(0x70840673, 0x7a231952), TOBN(0xa912a262, 0xcf43de26), + TOBN(0x9c38ddcc, 0xeb5b76c1), TOBN(0x746f5285, 0x26fc0ab4), + TOBN(0x52a63a50, 0xd62c269f), TOBN(0x60049c55, 0x99458621), + TOBN(0xe7f48f82, 0x3c2f7c9e), TOBN(0x6bd99043, 0x917d5cf3), + TOBN(0xeb1317a8, 0x8701f469), TOBN(0xbd3fe2ed, 0x9a449fe0), + TOBN(0x421e79ca, 0x12ef3d36), TOBN(0x9ee3c36c, 0x3e7ea5de), + TOBN(0xe48198b5, 0xcdff36f7), TOBN(0xaff4f967, 0xc6b82228), + TOBN(0x15e19dd0, 0xc47adb7e), TOBN(0x45699b23, 0x032e7dfa), + TOBN(0x40680c8b, 0x1fae026a), TOBN(0x5a347a48, 0x550dbf4d), + TOBN(0xe652533b, 0x3cef0d7d), TOBN(0xd94f7b18, 0x2bbb4381), + TOBN(0x838752be, 0x0e80f500), TOBN(0x8e6e2488, 0x9e9c9bfb), + TOBN(0xc9751697, 0x16caca6a), TOBN(0x866c49d8, 0x38531ad9), + TOBN(0xc917e239, 0x7151ade1), TOBN(0x2d016ec1, 0x6037c407), + TOBN(0xa407ccc9, 0x00eac3f9), TOBN(0x835f6280, 0xe2ed4748), + TOBN(0xcc54c347, 0x1cc98e0d), TOBN(0x0e969937, 0xdcb572eb), + TOBN(0x1b16c8e8, 0x8f30c9cb), TOBN(0xa606ae75, 0x373c4661), + TOBN(0x47aa689b, 0x35502cab), TOBN(0xf89014ae, 0x4d9bb64f), + TOBN(0x202f6a9c, 0x31c71f7b), TOBN(0x01f95aa3, 0x296ffe5c), + TOBN(0x5fc06014, 0x53cec3a3), TOBN(0xeb991237, 0x5f498a45), + TOBN(0xae9a935e, 0x5d91ba87), TOBN(0xc6ac6281, 0x0b564a19), + TOBN(0x8a8fe81c, 0x3bd44e69), TOBN(0x7c8b467f, 0x9dd11d45), + TOBN(0xf772251f, 0xea5b8e69), TOBN(0xaeecb3bd, 0xc5b75fbc), + TOBN(0x1aca3331, 0x887ff0e5), TOBN(0xbe5d49ff, 0x19f0a131), + TOBN(0x582c13aa, 0xe5c8646f), TOBN(0xdbaa12e8, 0x20e19980), + TOBN(0x8f40f31a, 0xf7abbd94), TOBN(0x1f13f5a8, 0x1dfc7663), + TOBN(0x5d81f1ee, 0xaceb4fc0), TOBN(0x36256002, 0x5e6f0f42), + TOBN(0x4b67d6d7, 0x751370c8), TOBN(0x2608b698, 0x03e80589), + TOBN(0xcfc0d2fc, 0x05268301), TOBN(0xa6943d39, 0x40309212), + TOBN(0x192a90c2, 0x1fd0e1c2), TOBN(0xb209f113, 0x37f1dc76), + TOBN(0xefcc5e06, 0x97bf1298), TOBN(0xcbdb6730, 0x219d639e), + TOBN(0xd009c116, 0xb81e8c6f), TOBN(0xa3ffdde3, 0x1a7ce2e5), + TOBN(0xc53fbaaa, 0xa914d3ba), TOBN(0x836d500f, 0x88df85ee), + TOBN(0xd98dc71b, 0x66ee0751), TOBN(0x5a3d7005, 0x714516fd), + TOBN(0x21d3634d, 0x39eedbba), TOBN(0x35cd2e68, 0x0455a46d), + TOBN(0xc8cafe65, 0xf9d7eb0c), TOBN(0xbda3ce9e, 0x00cefb3e), + TOBN(0xddc17a60, 0x2c9cf7a4), TOBN(0x01572ee4, 0x7bcb8773), + TOBN(0xa92b2b01, 0x8c7548df), TOBN(0x732fd309, 0xa84600e3), + TOBN(0xe22109c7, 0x16543a40), TOBN(0x9acafd36, 0xfede3c6c), + TOBN(0xfb206852, 0x6824e614), TOBN(0x2a4544a9, 0xda25dca0), + TOBN(0x25985262, 0x91d60b06), TOBN(0x281b7be9, 0x28753545), + TOBN(0xec667b1a, 0x90f13b27), TOBN(0x33a83aff, 0x940e2eb4), + TOBN(0x80009862, 0xd5d721d5), TOBN(0x0c3357a3, 0x5bd3a182), + TOBN(0x27f3a83b, 0x7aa2cda4), TOBN(0xb58ae74e, 0xf6f83085), + TOBN(0x2a911a81, 0x2e6dad6b), TOBN(0xde286051, 0xf43d6c5b), + TOBN(0x4bdccc41, 0xf996c4d8), TOBN(0xe7312ec0, 0x0ae1e24e)} + , + {TOBN(0xf8d112e7, 0x6e6485b3), TOBN(0x4d3e24db, 0x771c52f8), + TOBN(0x48e3ee41, 0x684a2f6d), TOBN(0x7161957d, 0x21d95551), + TOBN(0x19631283, 0xcdb12a6c), TOBN(0xbf3fa882, 0x2e50e164), + TOBN(0xf6254b63, 0x3166cc73), TOBN(0x3aefa7ae, 0xaee8cc38), + TOBN(0x79b0fe62, 0x3b36f9fd), TOBN(0x26543b23, 0xfde19fc0), + TOBN(0x136e64a0, 0x958482ef), TOBN(0x23f63771, 0x9b095825), + TOBN(0x14cfd596, 0xb6a1142e), TOBN(0x5ea6aac6, 0x335aac0b), + TOBN(0x86a0e8bd, 0xf3081dd5), TOBN(0x5fb89d79, 0x003dc12a), + TOBN(0xf615c33a, 0xf72e34d4), TOBN(0x0bd9ea40, 0x110eec35), + TOBN(0x1c12bc5b, 0xc1dea34e), TOBN(0x686584c9, 0x49ae4699), + TOBN(0x13ad95d3, 0x8c97b942), TOBN(0x4609561a, 0x4e5c7562), + TOBN(0x9e94a4ae, 0xf2737f89), TOBN(0xf57594c6, 0x371c78b6), + TOBN(0x0f0165fc, 0xe3779ee3), TOBN(0xe00e7f9d, 0xbd495d9e), + TOBN(0x1fa4efa2, 0x20284e7a), TOBN(0x4564bade, 0x47ac6219), + TOBN(0x90e6312a, 0xc4708e8e), TOBN(0x4f5725fb, 0xa71e9adf), + TOBN(0xe95f55ae, 0x3d684b9f), TOBN(0x47f7ccb1, 0x1e94b415), + TOBN(0x7322851b, 0x8d946581), TOBN(0xf0d13133, 0xbdf4a012), + TOBN(0xa3510f69, 0x6584dae0), TOBN(0x03a7c171, 0x3c9f6c6d), + TOBN(0x5be97f38, 0xe475381a), TOBN(0xca1ba422, 0x85823334), + TOBN(0xf83cc5c7, 0x0be17dda), TOBN(0x158b1494, 0x0b918c0f), + TOBN(0xda3a77e5, 0x522e6b69), TOBN(0x69c908c3, 0xbbcd6c18), + TOBN(0x1f1b9e48, 0xd924fd56), TOBN(0x37c64e36, 0xaa4bb3f7), + TOBN(0x5a4fdbdf, 0xee478d7d), TOBN(0xba75c8bc, 0x0193f7a0), + TOBN(0x84bc1e84, 0x56cd16df), TOBN(0x1fb08f08, 0x46fad151), + TOBN(0x8a7cabf9, 0x842e9f30), TOBN(0xa331d4bf, 0x5eab83af), + TOBN(0xd272cfba, 0x017f2a6a), TOBN(0x27560abc, 0x83aba0e3), + TOBN(0x94b83387, 0x0e3a6b75), TOBN(0x25c6aea2, 0x6b9f50f5), + TOBN(0x803d691d, 0xb5fdf6d0), TOBN(0x03b77509, 0xe6333514), + TOBN(0x36178903, 0x61a341c1), TOBN(0x3604dc60, 0x0cfd6142), + TOBN(0x022295eb, 0x8533316c), TOBN(0x3dbde4ac, 0x44af2922), + TOBN(0x898afc5d, 0x1c7eef69), TOBN(0x58896805, 0xd14f4fa1), + TOBN(0x05002160, 0x203c21ca), TOBN(0x6f0d1f30, 0x40ef730b), + TOBN(0x8e8c44d4, 0x196224f8), TOBN(0x75a4ab95, 0x374d079d), + TOBN(0x79085ecc, 0x7d48f123), TOBN(0x56f04d31, 0x1bf65ad8), + TOBN(0xe220bf1c, 0xbda602b2), TOBN(0x73ee1742, 0xf9612c69), + TOBN(0x76008fc8, 0x084fd06b), TOBN(0x4000ef9f, 0xf11380d1), + TOBN(0x48201b4b, 0x12cfe297), TOBN(0x3eee129c, 0x292f74e5), + TOBN(0xe1fe114e, 0xc9e874e8), TOBN(0x899b055c, 0x92c5fc41), + TOBN(0x4e477a64, 0x3a39c8cf), TOBN(0x82f09efe, 0x78963cc9), + TOBN(0x6fd3fd8f, 0xd333f863), TOBN(0x85132b2a, 0xdc949c63), + TOBN(0x7e06a3ab, 0x516eb17b), TOBN(0x73bec06f, 0xd2c7372b), + TOBN(0xe4f74f55, 0xba896da6), TOBN(0xbb4afef8, 0x8e9eb40f), + TOBN(0x2d75bec8, 0xe61d66b0), TOBN(0x02bda4b4, 0xef29300b), + TOBN(0x8bbaa8de, 0x026baa5a), TOBN(0xff54befd, 0xa07f4440), + TOBN(0xbd9b8b1d, 0xbe7a2af3), TOBN(0xec51caa9, 0x4fb74a72), + TOBN(0xb9937a4b, 0x63879697), TOBN(0x7c9a9d20, 0xec2687d5), + TOBN(0x1773e44f, 0x6ef5f014), TOBN(0x8abcf412, 0xe90c6900), + TOBN(0x387bd022, 0x8142161e), TOBN(0x50393755, 0xfcb6ff2a), + TOBN(0x9813fd56, 0xed6def63), TOBN(0x53cf6482, 0x7d53106c), + TOBN(0x991a35bd, 0x431f7ac1), TOBN(0xf1e274dd, 0x63e65faf), + TOBN(0xf63ffa3c, 0x44cc7880), TOBN(0x411a426b, 0x7c256981), + TOBN(0xb698b9fd, 0x93a420e0), TOBN(0x89fdddc0, 0xae53f8fe), + TOBN(0x766e0722, 0x32398baa), TOBN(0x205fee42, 0x5cfca031), + TOBN(0xa49f5341, 0x7a029cf2), TOBN(0xa88c68b8, 0x4023890d), + TOBN(0xbc275041, 0x7337aaa8), TOBN(0x9ed364ad, 0x0eb384f4), + TOBN(0xe0816f85, 0x29aba92f), TOBN(0x2e9e1941, 0x04e38a88), + TOBN(0x57eef44a, 0x3dafd2d5), TOBN(0x35d1fae5, 0x97ed98d8), + TOBN(0x50628c09, 0x2307f9b1), TOBN(0x09d84aae, 0xd6cba5c6), + TOBN(0x67071bc7, 0x88aaa691), TOBN(0x2dea57a9, 0xafe6cb03), + TOBN(0xdfe11bb4, 0x3d78ac01), TOBN(0x7286418c, 0x7fd7aa51), + TOBN(0xfabf7709, 0x77f7195a), TOBN(0x8ec86167, 0xadeb838f), + TOBN(0xea1285a8, 0xbb4f012d), TOBN(0xd6883503, 0x9a3eab3f), + TOBN(0xee5d24f8, 0x309004c2), TOBN(0xa96e4b76, 0x13ffe95e), + TOBN(0x0cdffe12, 0xbd223ea4), TOBN(0x8f5c2ee5, 0xb6739a53), + TOBN(0x5cb4aaa5, 0xdd968198), TOBN(0xfa131c52, 0x72413a6c), + TOBN(0x53d46a90, 0x9536d903), TOBN(0xb270f0d3, 0x48606d8e), + TOBN(0x518c7564, 0xa053a3bc), TOBN(0x088254b7, 0x1a86caef), + TOBN(0xb3ba8cb4, 0x0ab5efd0), TOBN(0x5c59900e, 0x4605945d), + TOBN(0xecace1dd, 0xa1887395), TOBN(0x40960f36, 0x932a65de), + TOBN(0x9611ff5c, 0x3aa95529), TOBN(0xc58215b0, 0x7c1e5a36), + TOBN(0xd48c9b58, 0xf0e1a524), TOBN(0xb406856b, 0xf590dfb8), + TOBN(0xc7605e04, 0x9cd95662), TOBN(0x0dd036ee, 0xa33ecf82), + TOBN(0xa50171ac, 0xc33156b3), TOBN(0xf09d24ea, 0x4a80172e), + TOBN(0x4e1f72c6, 0x76dc8eef), TOBN(0xe60caadc, 0x5e3d44ee), + TOBN(0x006ef8a6, 0x979b1d8f), TOBN(0x60908a1c, 0x97788d26), + TOBN(0x6e08f95b, 0x266feec0), TOBN(0x618427c2, 0x22e8c94e), + TOBN(0x3d613339, 0x59145a65), TOBN(0xcd9bc368, 0xfa406337), + TOBN(0x82d11be3, 0x2d8a52a0), TOBN(0xf6877b27, 0x97a1c590), + TOBN(0x837a819b, 0xf5cbdb25), TOBN(0x2a4fd1d8, 0xde090249), + TOBN(0x622a7de7, 0x74990e5f), TOBN(0x840fa5a0, 0x7945511b), + TOBN(0x30b974be, 0x6558842d), TOBN(0x70df8c64, 0x17f3d0a6), + TOBN(0x7c803520, 0x7542e46d), TOBN(0x7251fe7f, 0xe4ecc823), + TOBN(0xe59134cb, 0x5e9aac9a), TOBN(0x11bb0934, 0xf0045d71), + TOBN(0x53e5d9b5, 0xdbcb1d4e), TOBN(0x8d97a905, 0x92defc91), + TOBN(0xfe289327, 0x7946d3f9), TOBN(0xe132bd24, 0x07472273), + TOBN(0xeeeb510c, 0x1eb6ae86), TOBN(0x777708c5, 0xf0595067), + TOBN(0x18e2c8cd, 0x1297029e), TOBN(0x2c61095c, 0xbbf9305e), + TOBN(0xe466c258, 0x6b85d6d9), TOBN(0x8ac06c36, 0xda1ea530), + TOBN(0xa365dc39, 0xa1304668), TOBN(0xe4a9c885, 0x07f89606), + TOBN(0x65a4898f, 0xacc7228d), TOBN(0x3e2347ff, 0x84ca8303), + TOBN(0xa5f6fb77, 0xea7d23a3), TOBN(0x2fac257d, 0x672a71cd), + TOBN(0x6908bef8, 0x7e6a44d3), TOBN(0x8ff87566, 0x891d3d7a), + TOBN(0xe58e90b3, 0x6b0cf82e), TOBN(0x6438d246, 0x2615b5e7), + TOBN(0x07b1f8fc, 0x669c145a), TOBN(0xb0d8b2da, 0x36f1e1cb), + TOBN(0x54d5dadb, 0xd9184c4d), TOBN(0x3dbb18d5, 0xf93d9976), + TOBN(0x0a3e0f56, 0xd1147d47), TOBN(0x2afa8c8d, 0xa0a48609), + TOBN(0x275353e8, 0xbc36742c), TOBN(0x898f427e, 0xeea0ed90), + TOBN(0x26f4947e, 0x3e477b00), TOBN(0x8ad8848a, 0x308741e3), + TOBN(0x6c703c38, 0xd74a2a46), TOBN(0x5e3e05a9, 0x9ba17ba2), + TOBN(0xc1fa6f66, 0x4ab9a9e4), TOBN(0x474a2d9a, 0x3841d6ec), + TOBN(0x871239ad, 0x653ae326), TOBN(0x14bcf72a, 0xa74cbb43), + TOBN(0x8737650e, 0x20d4c083), TOBN(0x3df86536, 0x110ed4af), + TOBN(0xd2d86fe7, 0xb53ca555), TOBN(0x688cb00d, 0xabd5d538), + TOBN(0xcf81bda3, 0x1ad38468), TOBN(0x7ccfe3cc, 0xf01167b6), + TOBN(0xcf4f47e0, 0x6c4c1fe6), TOBN(0x557e1f1a, 0x298bbb79), + TOBN(0xf93b974f, 0x30d45a14), TOBN(0x174a1d2d, 0x0baf97c4), + TOBN(0x7a003b30, 0xc51fbf53), TOBN(0xd8940991, 0xee68b225), + TOBN(0x5b0aa7b7, 0x1c0f4173), TOBN(0x975797c9, 0xa20a7153), + TOBN(0x26e08c07, 0xe3533d77), TOBN(0xd7222e6a, 0x2e341c99), + TOBN(0x9d60ec3d, 0x8d2dc4ed), TOBN(0xbdfe0d8f, 0x7c476cf8), + TOBN(0x1fe59ab6, 0x1d056605), TOBN(0xa9ea9df6, 0x86a8551f), + TOBN(0x8489941e, 0x47fb8d8c), TOBN(0xfeb874eb, 0x4a7f1b10), + TOBN(0xfe5fea86, 0x7ee0d98f), TOBN(0x201ad34b, 0xdbf61864), + TOBN(0x45d8fe47, 0x37c031d4), TOBN(0xd5f49fae, 0x795f0822), + TOBN(0xdb0fb291, 0xc7f4a40c), TOBN(0x2e69d9c1, 0x730ddd92), + TOBN(0x754e1054, 0x49d76987), TOBN(0x8a24911d, 0x7662db87), + TOBN(0x61fc1810, 0x60a71676), TOBN(0xe852d1a8, 0xf66a8ad1), + TOBN(0x172bbd65, 0x6417231e), TOBN(0x0d6de7bd, 0x3babb11f), + TOBN(0x6fde6f88, 0xc8e347f8), TOBN(0x1c587547, 0x9bd99cc3), + TOBN(0x78e54ed0, 0x34076950), TOBN(0x97f0f334, 0x796e83ba), + TOBN(0xe4dbe1ce, 0x4924867a), TOBN(0xbd5f51b0, 0x60b84917), + TOBN(0x37530040, 0x3cb09a79), TOBN(0xdb3fe0f8, 0xff1743d8), + TOBN(0xed7894d8, 0x556fa9db), TOBN(0xfa262169, 0x23412fbf), + TOBN(0x563be0db, 0xba7b9291), TOBN(0x6ca8b8c0, 0x0c9fb234), + TOBN(0xed406aa9, 0xbd763802), TOBN(0xc21486a0, 0x65303da1), + TOBN(0x61ae291e, 0xc7e62ec4), TOBN(0x622a0492, 0xdf99333e), + TOBN(0x7fd80c9d, 0xbb7a8ee0), TOBN(0xdc2ed3bc, 0x6c01aedb), + TOBN(0x35c35a12, 0x08be74ec), TOBN(0xd540cb1a, 0x469f671f), + TOBN(0xd16ced4e, 0xcf84f6c7), TOBN(0x8561fb9c, 0x2d090f43), + TOBN(0x7e693d79, 0x6f239db4), TOBN(0xa736f928, 0x77bd0d94), + TOBN(0x07b4d929, 0x2c1950ee), TOBN(0xda177543, 0x56dc11b3), + TOBN(0xa5dfbbaa, 0x7a6a878e), TOBN(0x1c70cb29, 0x4decb08a), + TOBN(0xfba28c8b, 0x6f0f7c50), TOBN(0xa8eba2b8, 0x854dcc6d), + TOBN(0x5ff8e89a, 0x36b78642), TOBN(0x070c1c8e, 0xf6873adf), + TOBN(0xbbd3c371, 0x6484d2e4), TOBN(0xfb78318f, 0x0d414129), + TOBN(0x2621a39c, 0x6ad93b0b), TOBN(0x979d74c2, 0xa9e917f7), + TOBN(0xfc195647, 0x61fb0428), TOBN(0x4d78954a, 0xbee624d4), + TOBN(0xb94896e0, 0xb8ae86fd), TOBN(0x6667ac0c, 0xc91c8b13), + TOBN(0x9f180512, 0x43bcf832), TOBN(0xfbadf8b7, 0xa0010137), + TOBN(0xc69b4089, 0xb3ba8aa7), TOBN(0xfac4bacd, 0xe687ce85), + TOBN(0x9164088d, 0x977eab40), TOBN(0x51f4c5b6, 0x2760b390), + TOBN(0xd238238f, 0x340dd553), TOBN(0x358566c3, 0xdb1d31c9), + TOBN(0x3a5ad69e, 0x5068f5ff), TOBN(0xf31435fc, 0xdaff6b06), + TOBN(0xae549a5b, 0xd6debff0), TOBN(0x59e5f0b7, 0x75e01331), + TOBN(0x5d492fb8, 0x98559acf), TOBN(0x96018c2e, 0x4db79b50), + TOBN(0x55f4a48f, 0x609f66aa), TOBN(0x1943b3af, 0x4900a14f), + TOBN(0xc22496df, 0x15a40d39), TOBN(0xb2a44684, 0x4c20f7c5), + TOBN(0x76a35afa, 0x3b98404c), TOBN(0xbec75725, 0xff5d1b77), + TOBN(0xb67aa163, 0xbea06444), TOBN(0x27e95bb2, 0xf724b6f2), + TOBN(0x3c20e3e9, 0xd238c8ab), TOBN(0x1213754e, 0xddd6ae17), + TOBN(0x8c431020, 0x716e0f74), TOBN(0x6679c82e, 0xffc095c2), + TOBN(0x2eb3adf4, 0xd0ac2932), TOBN(0x2cc970d3, 0x01bb7a76), + TOBN(0x70c71f2f, 0x740f0e66), TOBN(0x545c616b, 0x2b6b23cc), + TOBN(0x4528cfcb, 0xb40a8bd7), TOBN(0xff839633, 0x2ab27722), + TOBN(0x049127d9, 0x025ac99a), TOBN(0xd314d4a0, 0x2b63e33b), + TOBN(0xc8c310e7, 0x28d84519), TOBN(0x0fcb8983, 0xb3bc84ba), + TOBN(0x2cc52261, 0x38634818), TOBN(0x501814f4, 0xb44c2e0b), + TOBN(0xf7e181aa, 0x54dfdba3), TOBN(0xcfd58ff0, 0xe759718c), + TOBN(0xf90cdb14, 0xd3b507a8), TOBN(0x57bd478e, 0xc50bdad8), + TOBN(0x29c197e2, 0x50e5f9aa), TOBN(0x4db6eef8, 0xe40bc855), + TOBN(0x2cc8f21a, 0xd1fc0654), TOBN(0xc71cc963, 0x81269d73), + TOBN(0xecfbb204, 0x077f49f9), TOBN(0xdde92571, 0xca56b793), + TOBN(0x9abed6a3, 0xf97ad8f7), TOBN(0xe6c19d3f, 0x924de3bd), + TOBN(0x8dce92f4, 0xa140a800), TOBN(0x85f44d1e, 0x1337af07), + TOBN(0x5953c08b, 0x09d64c52), TOBN(0xa1b5e49f, 0xf5df9749), + TOBN(0x336a8fb8, 0x52735f7d), TOBN(0xb332b6db, 0x9add676b), + TOBN(0x558b88a0, 0xb4511aa4), TOBN(0x09788752, 0xdbd5cc55), + TOBN(0x16b43b9c, 0xd8cd52bd), TOBN(0x7f0bc5a0, 0xc2a2696b), + TOBN(0x146e12d4, 0xc11f61ef), TOBN(0x9ce10754, 0x3a83e79e), + TOBN(0x08ec73d9, 0x6cbfca15), TOBN(0x09ff29ad, 0x5b49653f), + TOBN(0xe31b72bd, 0xe7da946e), TOBN(0xebf9eb3b, 0xee80a4f2), + TOBN(0xd1aabd08, 0x17598ce4), TOBN(0x18b5fef4, 0x53f37e80), + TOBN(0xd5d5cdd3, 0x5958cd79), TOBN(0x3580a1b5, 0x1d373114), + TOBN(0xa36e4c91, 0xfa935726), TOBN(0xa38c534d, 0xef20d760), + TOBN(0x7088e40a, 0x2ff5845b), TOBN(0xe5bb40bd, 0xbd78177f), + TOBN(0x4f06a7a8, 0x857f9920), TOBN(0xe3cc3e50, 0xe968f05d), + TOBN(0x1d68b7fe, 0xe5682d26), TOBN(0x5206f76f, 0xaec7f87c), + TOBN(0x41110530, 0x041951ab), TOBN(0x58ec52c1, 0xd4b5a71a), + TOBN(0xf3488f99, 0x0f75cf9a), TOBN(0xf411951f, 0xba82d0d5), + TOBN(0x27ee75be, 0x618895ab), TOBN(0xeae060d4, 0x6d8aab14), + TOBN(0x9ae1df73, 0x7fb54dc2), TOBN(0x1f3e391b, 0x25963649), + TOBN(0x242ec32a, 0xfe055081), TOBN(0x5bd450ef, 0x8491c9bd), + TOBN(0x367efc67, 0x981eb389), TOBN(0xed7e1928, 0x3a0550d5), + TOBN(0x362e776b, 0xab3ce75c), TOBN(0xe890e308, 0x1f24c523), + TOBN(0xb961b682, 0xfeccef76), TOBN(0x8b8e11f5, 0x8bba6d92), + TOBN(0x8f2ccc4c, 0x2b2375c4), TOBN(0x0d7f7a52, 0xe2f86cfa), + TOBN(0xfd94d30a, 0x9efe5633), TOBN(0x2d8d246b, 0x5451f934), + TOBN(0x2234c6e3, 0x244e6a00), TOBN(0xde2b5b0d, 0xddec8c50), + TOBN(0x2ce53c5a, 0xbf776f5b), TOBN(0x6f724071, 0x60357b05), + TOBN(0xb2593717, 0x71bf3f7a), TOBN(0x87d2501c, 0x440c4a9f), + TOBN(0x440552e1, 0x87b05340), TOBN(0xb7bf7cc8, 0x21624c32), + TOBN(0x4155a6ce, 0x22facddb), TOBN(0x5a4228cb, 0x889837ef), + TOBN(0xef87d6d6, 0xfd4fd671), TOBN(0xa233687e, 0xc2daa10e), + TOBN(0x75622244, 0x03c0eb96), TOBN(0x7632d184, 0x8bf19be6), + TOBN(0x05d0f8e9, 0x40735ff4), TOBN(0x3a3e6e13, 0xc00931f1), + TOBN(0x31ccde6a, 0xdafe3f18), TOBN(0xf381366a, 0xcfe51207), + TOBN(0x24c222a9, 0x60167d92), TOBN(0x62f9d6f8, 0x7529f18c), + TOBN(0x412397c0, 0x0353b114), TOBN(0x334d89dc, 0xef808043), + TOBN(0xd9ec63ba, 0x2a4383ce), TOBN(0xcec8e937, 0x5cf92ba0), + TOBN(0xfb8b4288, 0xc8be74c0), TOBN(0x67d6912f, 0x105d4391), + TOBN(0x7b996c46, 0x1b913149), TOBN(0x36aae2ef, 0x3a4e02da), + TOBN(0xb68aa003, 0x972de594), TOBN(0x284ec70d, 0x4ec6d545), + TOBN(0xf3d2b2d0, 0x61391d54), TOBN(0x69c5d5d6, 0xfe114e92), + TOBN(0xbe0f00b5, 0xb4482dff), TOBN(0xe1596fa5, 0xf5bf33c5), + TOBN(0x10595b56, 0x96a71cba), TOBN(0x944938b2, 0xfdcadeb7), + TOBN(0xa282da4c, 0xfccd8471), TOBN(0x98ec05f3, 0x0d37bfe1), + TOBN(0xe171ce1b, 0x0698304a), TOBN(0x2d691444, 0x21bdf79b), + TOBN(0xd0cd3b74, 0x1b21dec1), TOBN(0x712ecd8b, 0x16a15f71), + TOBN(0x8d4c00a7, 0x00fd56e1), TOBN(0x02ec9692, 0xf9527c18), + TOBN(0x21c44937, 0x4a3e42e1), TOBN(0x9176fbab, 0x1392ae0a), + TOBN(0x8726f1ba, 0x44b7b618), TOBN(0xb4d7aae9, 0xf1de491c), + TOBN(0xf91df7b9, 0x07b582c0), TOBN(0x7e116c30, 0xef60aa3a), + TOBN(0x99270f81, 0x466265d7), TOBN(0xb15b6fe2, 0x4df7adf0), + TOBN(0xfe33b2d3, 0xf9738f7f), TOBN(0x48553ab9, 0xd6d70f95), + TOBN(0x2cc72ac8, 0xc21e94db), TOBN(0x795ac38d, 0xbdc0bbee), + TOBN(0x0a1be449, 0x2e40478f), TOBN(0x81bd3394, 0x052bde55), + TOBN(0x63c8dbe9, 0x56b3c4f2), TOBN(0x017a99cf, 0x904177cc), + TOBN(0x947bbddb, 0x4d010fc1), TOBN(0xacf9b00b, 0xbb2c9b21), + TOBN(0x2970bc8d, 0x47173611), TOBN(0x1a4cbe08, 0xac7d756f), + TOBN(0x06d9f4aa, 0x67d541a2), TOBN(0xa3e8b689, 0x59c2cf44), + TOBN(0xaad066da, 0x4d88f1dd), TOBN(0xc604f165, 0x7ad35dea), + TOBN(0x7edc0720, 0x4478ca67), TOBN(0xa10dfae0, 0xba02ce06), + TOBN(0xeceb1c76, 0xaf36f4e4), TOBN(0x994b2292, 0xaf3f8f48), + TOBN(0xbf9ed77b, 0x77c8a68c), TOBN(0x74f544ea, 0x51744c9d), + TOBN(0x82d05bb9, 0x8113a757), TOBN(0x4ef2d2b4, 0x8a9885e4), + TOBN(0x1e332be5, 0x1aa7865f), TOBN(0x22b76b18, 0x290d1a52), + TOBN(0x308a2310, 0x44351683), TOBN(0x9d861896, 0xa3f22840), + TOBN(0x5959ddcd, 0x841ed947), TOBN(0x0def0c94, 0x154b73bf), + TOBN(0xf0105417, 0x4c7c15e0), TOBN(0x539bfb02, 0x3a277c32), + TOBN(0xe699268e, 0xf9dccf5f), TOBN(0x9f5796a5, 0x0247a3bd), + TOBN(0x8b839de8, 0x4f157269), TOBN(0xc825c1e5, 0x7a30196b), + TOBN(0x6ef0aabc, 0xdc8a5a91), TOBN(0xf4a8ce6c, 0x498b7fe6), + TOBN(0x1cce35a7, 0x70cbac78), TOBN(0x83488e9b, 0xf6b23958), + TOBN(0x0341a070, 0xd76cb011), TOBN(0xda6c9d06, 0xae1b2658), + TOBN(0xb701fb30, 0xdd648c52), TOBN(0x994ca02c, 0x52fb9fd1), + TOBN(0x06933117, 0x6f563086), TOBN(0x3d2b8100, 0x17856bab), + TOBN(0xe89f48c8, 0x5963a46e), TOBN(0x658ab875, 0xa99e61c7), + TOBN(0x6e296f87, 0x4b8517b4), TOBN(0x36c4fcdc, 0xfc1bc656), + TOBN(0xde5227a1, 0xa3906def), TOBN(0x9fe95f57, 0x62418945), + TOBN(0x20c91e81, 0xfdd96cde), TOBN(0x5adbe47e, 0xda4480de), + TOBN(0xa009370f, 0x396de2b6), TOBN(0x98583d4b, 0xf0ecc7bd), + TOBN(0xf44f6b57, 0xe51d0672), TOBN(0x03d6b078, 0x556b1984), + TOBN(0x27dbdd93, 0xb0b64912), TOBN(0x9b3a3434, 0x15687b09), + TOBN(0x0dba6461, 0x51ec20a9), TOBN(0xec93db7f, 0xff28187c), + TOBN(0x00ff8c24, 0x66e48bdd), TOBN(0x2514f2f9, 0x11ccd78e), + TOBN(0xeba11f4f, 0xe1250603), TOBN(0x8a22cd41, 0x243fa156), + TOBN(0xa4e58df4, 0xb283e4c6), TOBN(0x78c29859, 0x8b39783f), + TOBN(0x5235aee2, 0xa5259809), TOBN(0xc16284b5, 0x0e0227dd), + TOBN(0xa5f57916, 0x1338830d), TOBN(0x6d4b8a6b, 0xd2123fca), + TOBN(0x236ea68a, 0xf9c546f8), TOBN(0xc1d36873, 0xfa608d36), + TOBN(0xcd76e495, 0x8d436d13), TOBN(0xd4d9c221, 0x8fb080af), + TOBN(0x665c1728, 0xe8ad3fb5), TOBN(0xcf1ebe4d, 0xb3d572e0), + TOBN(0xa7a8746a, 0x584c5e20), TOBN(0x267e4ea1, 0xb9dc7035), + TOBN(0x593a15cf, 0xb9548c9b), TOBN(0x5e6e2135, 0x4bd012f3), + TOBN(0xdf31cc6a, 0x8c8f936e), TOBN(0x8af84d04, 0xb5c241dc), + TOBN(0x63990a6f, 0x345efb86), TOBN(0x6fef4e61, 0xb9b962cb)} + , + {TOBN(0xf6368f09, 0x25722608), TOBN(0x131260db, 0x131cf5c6), + TOBN(0x40eb353b, 0xfab4f7ac), TOBN(0x85c78880, 0x37eee829), + TOBN(0x4c1581ff, 0xc3bdf24e), TOBN(0x5bff75cb, 0xf5c3c5a8), + TOBN(0x35e8c83f, 0xa14e6f40), TOBN(0xb81d1c0f, 0x0295e0ca), + TOBN(0xfcde7cc8, 0xf43a730f), TOBN(0xe89b6f3c, 0x33ab590e), + TOBN(0xc823f529, 0xad03240b), TOBN(0x82b79afe, 0x98bea5db), + TOBN(0x568f2856, 0x962fe5de), TOBN(0x0c590adb, 0x60c591f3), + TOBN(0x1fc74a14, 0x4a28a858), TOBN(0x3b662498, 0xb3203f4c), + TOBN(0x91e3cf0d, 0x6c39765a), TOBN(0xa2db3acd, 0xac3cca0b), + TOBN(0x288f2f08, 0xcb953b50), TOBN(0x2414582c, 0xcf43cf1a), + TOBN(0x8dec8bbc, 0x60eee9a8), TOBN(0x54c79f02, 0x729aa042), + TOBN(0xd81cd5ec, 0x6532f5d5), TOBN(0xa672303a, 0xcf82e15f), + TOBN(0x376aafa8, 0x719c0563), TOBN(0xcd8ad2dc, 0xbc5fc79f), + TOBN(0x303fdb9f, 0xcb750cd3), TOBN(0x14ff052f, 0x4418b08e), + TOBN(0xf75084cf, 0x3e2d6520), TOBN(0x7ebdf0f8, 0x144ed509), + TOBN(0xf43bf0f2, 0xd3f25b98), TOBN(0x86ad71cf, 0xa354d837), + TOBN(0xb827fe92, 0x26f43572), TOBN(0xdfd3ab5b, 0x5d824758), + TOBN(0x315dd23a, 0x539094c1), TOBN(0x85c0e37a, 0x66623d68), + TOBN(0x575c7972, 0x7be19ae0), TOBN(0x616a3396, 0xdf0d36b5), + TOBN(0xa1ebb3c8, 0x26b1ff7e), TOBN(0x635b9485, 0x140ad453), + TOBN(0x92bf3cda, 0xda430c0b), TOBN(0x4702850e, 0x3a96dac6), + TOBN(0xc91cf0a5, 0x15ac326a), TOBN(0x95de4f49, 0xab8c25e4), + TOBN(0xb01bad09, 0xe265c17c), TOBN(0x24e45464, 0x087b3881), + TOBN(0xd43e583c, 0xe1fac5ca), TOBN(0xe17cb318, 0x6ead97a6), + TOBN(0x6cc39243, 0x74dcec46), TOBN(0x33cfc02d, 0x54c2b73f), + TOBN(0x82917844, 0xf26cd99c), TOBN(0x8819dd95, 0xd1773f89), + TOBN(0x09572aa6, 0x0871f427), TOBN(0x8e0cf365, 0xf6f01c34), + TOBN(0x7fa52988, 0xbff1f5af), TOBN(0x4eb357ea, 0xe75e8e50), + TOBN(0xd9d0c8c4, 0x868af75d), TOBN(0xd7325cff, 0x45c8c7ea), + TOBN(0xab471996, 0xcc81ecb0), TOBN(0xff5d55f3, 0x611824ed), + TOBN(0xbe314541, 0x1977a0ee), TOBN(0x5085c4c5, 0x722038c6), + TOBN(0x2d5335bf, 0xf94bb495), TOBN(0x894ad8a6, 0xc8e2a082), + TOBN(0x5c3e2341, 0xada35438), TOBN(0xf4a9fc89, 0x049b8c4e), + TOBN(0xbeeb355a, 0x9f17cf34), TOBN(0x3f311e0e, 0x6c91fe10), + TOBN(0xc2d20038, 0x92ab9891), TOBN(0x257bdcc1, 0x3e8ce9a9), + TOBN(0x1b2d9789, 0x88c53bee), TOBN(0x927ce89a, 0xcdba143a), + TOBN(0xb0a32cca, 0x523db280), TOBN(0x5c889f8a, 0x50d43783), + TOBN(0x503e04b3, 0x4897d16f), TOBN(0x8cdb6e78, 0x08f5f2e8), + TOBN(0x6ab91cf0, 0x179c8e74), TOBN(0xd8874e52, 0x48211d60), + TOBN(0xf948d4d5, 0xea851200), TOBN(0x4076d41e, 0xe6f9840a), + TOBN(0xc20e263c, 0x47b517ea), TOBN(0x79a448fd, 0x30685e5e), + TOBN(0xe55f6f78, 0xf90631a0), TOBN(0x88a790b1, 0xa79e6346), + TOBN(0x62160c7d, 0x80969fe8), TOBN(0x54f92fd4, 0x41491bb9), + TOBN(0xa6645c23, 0x5c957526), TOBN(0xf44cc5ae, 0xbea3ce7b), + TOBN(0xf7628327, 0x8b1e68b7), TOBN(0xc731ad7a, 0x303f29d3), + TOBN(0xfe5a9ca9, 0x57d03ecb), TOBN(0x96c0d50c, 0x41bc97a7), + TOBN(0xc4669fe7, 0x9b4f7f24), TOBN(0xfdd781d8, 0x3d9967ef), + TOBN(0x7892c7c3, 0x5d2c208d), TOBN(0x8bf64f7c, 0xae545cb3), + TOBN(0xc01f862c, 0x467be912), TOBN(0xf4c85ee9, 0xc73d30cc), + TOBN(0x1fa6f4be, 0x6ab83ec7), TOBN(0xa07a3c1c, 0x4e3e3cf9), + TOBN(0x87f8ef45, 0x0c00beb3), TOBN(0x30e2c2b3, 0x000d4c3e), + TOBN(0x1aa00b94, 0xfe08bf5b), TOBN(0x32c133aa, 0x9224ef52), + TOBN(0x38df16bb, 0x32e5685d), TOBN(0x68a9e069, 0x58e6f544), + TOBN(0x495aaff7, 0xcdc5ebc6), TOBN(0xf894a645, 0x378b135f), + TOBN(0xf316350a, 0x09e27ecf), TOBN(0xeced201e, 0x58f7179d), + TOBN(0x2eec273c, 0xe97861ba), TOBN(0x47ec2cae, 0xd693be2e), + TOBN(0xfa4c97c4, 0xf68367ce), TOBN(0xe4f47d0b, 0xbe5a5755), + TOBN(0x17de815d, 0xb298a979), TOBN(0xd7eca659, 0xc177dc7d), + TOBN(0x20fdbb71, 0x49ded0a3), TOBN(0x4cb2aad4, 0xfb34d3c5), + TOBN(0x2cf31d28, 0x60858a33), TOBN(0x3b6873ef, 0xa24aa40f), + TOBN(0x540234b2, 0x2c11bb37), TOBN(0x2d0366dd, 0xed4c74a3), + TOBN(0xf9a968da, 0xeec5f25d), TOBN(0x36601068, 0x67b63142), + TOBN(0x07cd6d2c, 0x68d7b6d4), TOBN(0xa8f74f09, 0x0c842942), + TOBN(0xe2751404, 0x7768b1ee), TOBN(0x4b5f7e89, 0xfe62aee4), + TOBN(0xc6a77177, 0x89070d26), TOBN(0xa1f28e4e, 0xdd1c8bc7), + TOBN(0xea5f4f06, 0x469e1f17), TOBN(0x78fc242a, 0xfbdb78e0), + TOBN(0xc9c7c592, 0x8b0588f1), TOBN(0xb6b7a0fd, 0x1535921e), + TOBN(0xcc5bdb91, 0xbde5ae35), TOBN(0xb42c485e, 0x12ff1864), + TOBN(0xa1113e13, 0xdbab98aa), TOBN(0xde9d469b, 0xa17b1024), + TOBN(0x23f48b37, 0xc0462d3a), TOBN(0x3752e537, 0x7c5c078d), + TOBN(0xe3a86add, 0x15544eb9), TOBN(0xf013aea7, 0x80fba279), + TOBN(0x8b5bb76c, 0xf22001b5), TOBN(0xe617ba14, 0xf02891ab), + TOBN(0xd39182a6, 0x936219d3), TOBN(0x5ce1f194, 0xae51cb19), + TOBN(0xc78f8598, 0xbf07a74c), TOBN(0x6d7158f2, 0x22cbf1bc), + TOBN(0x3b846b21, 0xe300ce18), TOBN(0x35fba630, 0x2d11275d), + TOBN(0x5fe25c36, 0xa0239b9b), TOBN(0xd8beb35d, 0xdf05d940), + TOBN(0x4db02bb0, 0x1f7e320d), TOBN(0x0641c364, 0x6da320ea), + TOBN(0x6d95fa5d, 0x821389a3), TOBN(0x92699748, 0x8fcd8e3d), + TOBN(0x316fef17, 0xceb6c143), TOBN(0x67fcb841, 0xd933762b), + TOBN(0xbb837e35, 0x118b17f8), TOBN(0x4b92552f, 0x9fd24821), + TOBN(0xae6bc70e, 0x46aca793), TOBN(0x1cf0b0e4, 0xe579311b), + TOBN(0x8dc631be, 0x5802f716), TOBN(0x099bdc6f, 0xbddbee4d), + TOBN(0xcc352bb2, 0x0caf8b05), TOBN(0xf74d505a, 0x72d63df2), + TOBN(0xb9876d4b, 0x91c4f408), TOBN(0x1ce18473, 0x9e229b2d), + TOBN(0x49507597, 0x83abdb4a), TOBN(0x850fbcb6, 0xdee84b18), + TOBN(0x6325236e, 0x609e67dc), TOBN(0x04d831d9, 0x9336c6d8), + TOBN(0x8deaae3b, 0xfa12d45d), TOBN(0xe425f8ce, 0x4746e246), + TOBN(0x8004c175, 0x24f5f31e), TOBN(0xaca16d8f, 0xad62c3b7), + TOBN(0x0dc15a6a, 0x9152f934), TOBN(0xf1235e5d, 0xed0e12c1), + TOBN(0xc33c06ec, 0xda477dac), TOBN(0x76be8732, 0xb2ea0006), + TOBN(0xcf3f7831, 0x0c0cd313), TOBN(0x3c524553, 0xa614260d), + TOBN(0x31a756f8, 0xcab22d15), TOBN(0x03ee10d1, 0x77827a20), + TOBN(0xd1e059b2, 0x1994ef20), TOBN(0x2a653b69, 0x638ae318), + TOBN(0x70d5eb58, 0x2f699010), TOBN(0x279739f7, 0x09f5f84a), + TOBN(0x5da4663c, 0x8b799336), TOBN(0xfdfdf14d, 0x203c37eb), + TOBN(0x32d8a9dc, 0xa1dbfb2d), TOBN(0xab40cff0, 0x77d48f9b), + TOBN(0xc018b383, 0xd20b42d5), TOBN(0xf9a810ef, 0x9f78845f), + TOBN(0x40af3753, 0xbdba9df0), TOBN(0xb90bdcfc, 0x131dfdf9), + TOBN(0x18720591, 0xf01ab782), TOBN(0xc823f211, 0x6af12a88), + TOBN(0xa51b80f3, 0x0dc14401), TOBN(0xde248f77, 0xfb2dfbe3), + TOBN(0xef5a44e5, 0x0cafe751), TOBN(0x73997c9c, 0xd4dcd221), + TOBN(0x32fd86d1, 0xde854024), TOBN(0xd5b53adc, 0xa09b84bb), + TOBN(0x008d7a11, 0xdcedd8d1), TOBN(0x406bd1c8, 0x74b32c84), + TOBN(0x5d4472ff, 0x05dde8b1), TOBN(0x2e25f2cd, 0xfce2b32f), + TOBN(0xbec0dd5e, 0x29dfc254), TOBN(0x4455fcf6, 0x2b98b267), + TOBN(0x0b4d43a5, 0xc72df2ad), TOBN(0xea70e6be, 0x48a75397), + TOBN(0x2aad6169, 0x5820f3bf), TOBN(0xf410d2dd, 0x9e37f68f), + TOBN(0x70fb7dba, 0x7be5ac83), TOBN(0x636bb645, 0x36ec3eec), + TOBN(0x27104ea3, 0x9754e21c), TOBN(0xbc87a3e6, 0x8d63c373), + TOBN(0x483351d7, 0x4109db9a), TOBN(0x0fa724e3, 0x60134da7), + TOBN(0x9ff44c29, 0xb0720b16), TOBN(0x2dd0cf13, 0x06aceead), + TOBN(0x5942758c, 0xe26929a6), TOBN(0x96c5db92, 0xb766a92b), + TOBN(0xcec7d4c0, 0x5f18395e), TOBN(0xd3f22744, 0x1f80d032), + TOBN(0x7a68b37a, 0xcb86075b), TOBN(0x074764dd, 0xafef92db), + TOBN(0xded1e950, 0x7bc7f389), TOBN(0xc580c850, 0xb9756460), + TOBN(0xaeeec2a4, 0x7da48157), TOBN(0x3f0b4e7f, 0x82c587b3), + TOBN(0x231c6de8, 0xa9f19c53), TOBN(0x5717bd73, 0x6974e34e), + TOBN(0xd9e1d216, 0xf1508fa9), TOBN(0x9f112361, 0xdadaa124), + TOBN(0x80145e31, 0x823b7348), TOBN(0x4dd8f0d5, 0xac634069), + TOBN(0xe3d82fc7, 0x2297c258), TOBN(0x276fcfee, 0x9cee7431), + TOBN(0x8eb61b5e, 0x2bc0aea9), TOBN(0x4f668fd5, 0xde329431), + TOBN(0x03a32ab1, 0x38e4b87e), TOBN(0xe1374517, 0x73d0ef0b), + TOBN(0x1a46f7e6, 0x853ac983), TOBN(0xc3bdf42e, 0x68e78a57), + TOBN(0xacf20785, 0x2ea96dd1), TOBN(0xa10649b9, 0xf1638460), + TOBN(0xf2369f0b, 0x879fbbed), TOBN(0x0ff0ae86, 0xda9d1869), + TOBN(0x5251d759, 0x56766f45), TOBN(0x4984d8c0, 0x2be8d0fc), + TOBN(0x7ecc95a6, 0xd21008f0), TOBN(0x29bd54a0, 0x3a1a1c49), + TOBN(0xab9828c5, 0xd26c50f3), TOBN(0x32c0087c, 0x51d0d251), + TOBN(0x9bac3ce6, 0x0c1cdb26), TOBN(0xcd94d947, 0x557ca205), + TOBN(0x1b1bd598, 0x9db1fdcd), TOBN(0x0eda0108, 0xa3d8b149), + TOBN(0x95066610, 0x56152fcc), TOBN(0xc2f037e6, 0xe7192b33), + TOBN(0xdeffb41a, 0xc92e05a4), TOBN(0x1105f6c2, 0xc2f6c62e), + TOBN(0x68e73500, 0x8733913c), TOBN(0xcce86163, 0x3f3adc40), + TOBN(0xf407a942, 0x38a278e9), TOBN(0xd13c1b9d, 0x2ab21292), + TOBN(0x93ed7ec7, 0x1c74cf5c), TOBN(0x8887dc48, 0xf1a4c1b4), + TOBN(0x3830ff30, 0x4b3a11f1), TOBN(0x358c5a3c, 0x58937cb6), + TOBN(0x027dc404, 0x89022829), TOBN(0x40e93977, 0x3b798f79), + TOBN(0x90ad3337, 0x38be6ead), TOBN(0x9c23f6bc, 0xf34c0a5d), + TOBN(0xd1711a35, 0xfbffd8bb), TOBN(0x60fcfb49, 0x1949d3dd), + TOBN(0x09c8ef4b, 0x7825d93a), TOBN(0x24233cff, 0xa0a8c968), + TOBN(0x67ade46c, 0xe6d982af), TOBN(0xebb6bf3e, 0xe7544d7c), + TOBN(0xd6b9ba76, 0x3d8bd087), TOBN(0x46fe382d, 0x4dc61280), + TOBN(0xbd39a7e8, 0xb5bdbd75), TOBN(0xab381331, 0xb8f228fe), + TOBN(0x0709a77c, 0xce1c4300), TOBN(0x6a247e56, 0xf337ceac), + TOBN(0x8f34f21b, 0x636288be), TOBN(0x9dfdca74, 0xc8a7c305), + TOBN(0x6decfd1b, 0xea919e04), TOBN(0xcdf2688d, 0x8e1991f8), + TOBN(0xe607df44, 0xd0f8a67e), TOBN(0xd985df4b, 0x0b58d010), + TOBN(0x57f834c5, 0x0c24f8f4), TOBN(0xe976ef56, 0xa0bf01ae), + TOBN(0x536395ac, 0xa1c32373), TOBN(0x351027aa, 0x734c0a13), + TOBN(0xd2f1b5d6, 0x5e6bd5bc), TOBN(0x2b539e24, 0x223debed), + TOBN(0xd4994cec, 0x0eaa1d71), TOBN(0x2a83381d, 0x661dcf65), + TOBN(0x5f1aed2f, 0x7b54c740), TOBN(0x0bea3fa5, 0xd6dda5ee), + TOBN(0x9d4fb684, 0x36cc6134), TOBN(0x8eb9bbf3, 0xc0a443dd), + TOBN(0xfc500e2e, 0x383b7d2a), TOBN(0x7aad621c, 0x5b775257), + TOBN(0x69284d74, 0x0a8f7cc0), TOBN(0xe820c2ce, 0x07562d65), + TOBN(0xbf9531b9, 0x499758ee), TOBN(0x73e95ca5, 0x6ee0cc2d), + TOBN(0xf61790ab, 0xfbaf50a5), TOBN(0xdf55e76b, 0x684e0750), + TOBN(0xec516da7, 0xf176b005), TOBN(0x575553bb, 0x7a2dddc7), + TOBN(0x37c87ca3, 0x553afa73), TOBN(0x315f3ffc, 0x4d55c251), + TOBN(0xe846442a, 0xaf3e5d35), TOBN(0x61b91149, 0x6495ff28), + TOBN(0x23cc95d3, 0xfa326dc3), TOBN(0x1df4da1f, 0x18fc2cea), + TOBN(0x24bf9adc, 0xd0a37d59), TOBN(0xb6710053, 0x320d6e1e), + TOBN(0x96f9667e, 0x618344d1), TOBN(0xcc7ce042, 0xa06445af), + TOBN(0xa02d8514, 0xd68dbc3a), TOBN(0x4ea109e4, 0x280b5a5b), + TOBN(0x5741a7ac, 0xb40961bf), TOBN(0x4ada5937, 0x6aa56bfa), + TOBN(0x7feb9145, 0x02b765d1), TOBN(0x561e97be, 0xe6ad1582), + TOBN(0xbbc4a5b6, 0xda3982f5), TOBN(0x0c2659ed, 0xb546f468), + TOBN(0xb8e7e6aa, 0x59612d20), TOBN(0xd83dfe20, 0xac19e8e0), + TOBN(0x8530c45f, 0xb835398c), TOBN(0x6106a8bf, 0xb38a41c2), + TOBN(0x21e8f9a6, 0x35f5dcdb), TOBN(0x39707137, 0xcae498ed), + TOBN(0x70c23834, 0xd8249f00), TOBN(0x9f14b58f, 0xab2537a0), + TOBN(0xd043c365, 0x5f61c0c2), TOBN(0xdc5926d6, 0x09a194a7), + TOBN(0xddec0339, 0x8e77738a), TOBN(0xd07a63ef, 0xfba46426), + TOBN(0x2e58e79c, 0xee7f6e86), TOBN(0xe59b0459, 0xff32d241), + TOBN(0xc5ec84e5, 0x20fa0338), TOBN(0x97939ac8, 0xeaff5ace), + TOBN(0x0310a4e3, 0xb4a38313), TOBN(0x9115fba2, 0x8f9d9885), + TOBN(0x8dd710c2, 0x5fadf8c3), TOBN(0x66be38a2, 0xce19c0e2), + TOBN(0xd42a279c, 0x4cfe5022), TOBN(0x597bb530, 0x0e24e1b8), + TOBN(0x3cde86b7, 0xc153ca7f), TOBN(0xa8d30fb3, 0x707d63bd), + TOBN(0xac905f92, 0xbd60d21e), TOBN(0x98e7ffb6, 0x7b9a54ab), + TOBN(0xd7147df8, 0xe9726a30), TOBN(0xb5e216ff, 0xafce3533), + TOBN(0xb550b799, 0x2ff1ec40), TOBN(0x6b613b87, 0xa1e953fd), + TOBN(0x87b88dba, 0x792d5610), TOBN(0x2ee1270a, 0xa190fbe1), + TOBN(0x02f4e2dc, 0x2ef581da), TOBN(0x016530e4, 0xeff82a95), + TOBN(0xcbb93dfd, 0x8fd6ee89), TOBN(0x16d3d986, 0x46848fff), + TOBN(0x600eff24, 0x1da47adf), TOBN(0x1b9754a0, 0x0ad47a71), + TOBN(0x8f9266df, 0x70c33b98), TOBN(0xaadc87ae, 0xdf34186e), + TOBN(0x0d2ce8e1, 0x4ad24132), TOBN(0x8a47cbfc, 0x19946eba), + TOBN(0x47feeb66, 0x62b5f3af), TOBN(0xcefab561, 0x0abb3734), + TOBN(0x449de60e, 0x19f35cb1), TOBN(0x39f8db14, 0x157f0eb9), + TOBN(0xffaecc5b, 0x3c61bfd6), TOBN(0xa5a4d41d, 0x41216703), + TOBN(0x7f8fabed, 0x224e1cc2), TOBN(0x0d5a8186, 0x871ad953), + TOBN(0xf10774f7, 0xd22da9a9), TOBN(0x45b8a678, 0xcc8a9b0d), + TOBN(0xd9c2e722, 0xbdc32cff), TOBN(0xbf71b5f5, 0x337202a5), + TOBN(0x95c57f2f, 0x69fc4db9), TOBN(0xb6dad34c, 0x765d01e1), + TOBN(0x7e0bd13f, 0xcb904635), TOBN(0x61751253, 0x763a588c), + TOBN(0xd85c2997, 0x81af2c2d), TOBN(0xc0f7d9c4, 0x81b9d7da), + TOBN(0x838a34ae, 0x08533e8d), TOBN(0x15c4cb08, 0x311d8311), + TOBN(0x97f83285, 0x8e121e14), TOBN(0xeea7dc1e, 0x85000a5f), + TOBN(0x0c6059b6, 0x5d256274), TOBN(0xec9beace, 0xb95075c0), + TOBN(0x173daad7, 0x1df97828), TOBN(0xbf851cb5, 0xa8937877), + TOBN(0xb083c594, 0x01646f3c), TOBN(0x3bad30cf, 0x50c6d352), + TOBN(0xfeb2b202, 0x496bbcea), TOBN(0x3cf9fd4f, 0x18a1e8ba), + TOBN(0xd26de7ff, 0x1c066029), TOBN(0x39c81e9e, 0x4e9ed4f8), + TOBN(0xd8be0cb9, 0x7b390d35), TOBN(0x01df2bbd, 0x964aab27), + TOBN(0x3e8c1a65, 0xc3ef64f8), TOBN(0x567291d1, 0x716ed1dd), + TOBN(0x95499c6c, 0x5f5406d3), TOBN(0x71fdda39, 0x5ba8e23f), + TOBN(0xcfeb320e, 0xd5096ece), TOBN(0xbe7ba92b, 0xca66dd16), + TOBN(0x4608d36b, 0xc6fb5a7d), TOBN(0xe3eea15a, 0x6d2dd0e0), + TOBN(0x75b0a3eb, 0x8f97a36a), TOBN(0xf59814cc, 0x1c83de1e), + TOBN(0x56c9c5b0, 0x1c33c23f), TOBN(0xa96c1da4, 0x6faa4136), + TOBN(0x46bf2074, 0xde316551), TOBN(0x3b866e7b, 0x1f756c8f), + TOBN(0x727727d8, 0x1495ed6b), TOBN(0xb2394243, 0xb682dce7), + TOBN(0x8ab8454e, 0x758610f3), TOBN(0xc243ce84, 0x857d72a4), + TOBN(0x7b320d71, 0xdbbf370f), TOBN(0xff9afa37, 0x78e0f7ca), + TOBN(0x0119d1e0, 0xea7b523f), TOBN(0xb997f8cb, 0x058c7d42), + TOBN(0x285bcd2a, 0x37bbb184), TOBN(0x51dcec49, 0xa45d1fa6), + TOBN(0x6ade3b64, 0xe29634cb), TOBN(0x080c94a7, 0x26b86ef1), + TOBN(0xba583db1, 0x2283fbe3), TOBN(0x902bddc8, 0x5a9315ed), + TOBN(0x07c1ccb3, 0x86964bec), TOBN(0x78f4eacf, 0xb6258301), + TOBN(0x4bdf3a49, 0x56f90823), TOBN(0xba0f5080, 0x741d777b), + TOBN(0x091d71c3, 0xf38bf760), TOBN(0x9633d50f, 0x9b625b02), + TOBN(0x03ecb743, 0xb8c9de61), TOBN(0xb4751254, 0x5de74720), + TOBN(0x9f9defc9, 0x74ce1cb2), TOBN(0x774a4f6a, 0x00bd32ef), + TOBN(0xaca385f7, 0x73848f22), TOBN(0x53dad716, 0xf3f8558e), + TOBN(0xab7b34b0, 0x93c471f9), TOBN(0xf530e069, 0x19644bc7), + TOBN(0x3d9fb1ff, 0xdd59d31a), TOBN(0x4382e0df, 0x08daa795), + TOBN(0x165c6f4b, 0xd5cc88d7), TOBN(0xeaa392d5, 0x4a18c900), + TOBN(0x94203c67, 0x648024ee), TOBN(0x188763f2, 0x8c2fabcd), + TOBN(0xa80f87ac, 0xbbaec835), TOBN(0x632c96e0, 0xf29d8d54), + TOBN(0x29b0a60e, 0x4c00a95e), TOBN(0x2ef17f40, 0xe011e9fa), + TOBN(0xf6c0e1d1, 0x15b77223), TOBN(0xaaec2c62, 0x14b04e32), + TOBN(0xd35688d8, 0x3d84e58c), TOBN(0x2af5094c, 0x958571db), + TOBN(0x4fff7e19, 0x760682a6), TOBN(0x4cb27077, 0xe39a407c), + TOBN(0x0f59c547, 0x4ff0e321), TOBN(0x169f34a6, 0x1b34c8ff), + TOBN(0x2bff1096, 0x52bc1ba7), TOBN(0xa25423b7, 0x83583544), + TOBN(0x5d55d5d5, 0x0ac8b782), TOBN(0xff6622ec, 0x2db3c892), + TOBN(0x48fce741, 0x6b8bb642), TOBN(0x31d6998c, 0x69d7e3dc), + TOBN(0xdbaf8004, 0xcadcaed0), TOBN(0x801b0142, 0xd81d053c), + TOBN(0x94b189fc, 0x59630ec6), TOBN(0x120e9934, 0xaf762c8e), + TOBN(0x53a29aa4, 0xfdc6a404), TOBN(0x19d8e01e, 0xa1909948), + TOBN(0x3cfcabf1, 0xd7e89681), TOBN(0x3321a50d, 0x4e132d37), + TOBN(0xd0496863, 0xe9a86111), TOBN(0x8c0cde61, 0x06a3bc65), + TOBN(0xaf866c49, 0xfc9f8eef), TOBN(0x2066350e, 0xff7f5141), + TOBN(0x4f8a4689, 0xe56ddfbd), TOBN(0xea1b0c07, 0xfe32983a), + TOBN(0x2b317462, 0x873cb8cb), TOBN(0x658deddc, 0x2d93229f), + TOBN(0x65efaf4d, 0x0f64ef58), TOBN(0xfe43287d, 0x730cc7a8), + TOBN(0xaebc0c72, 0x3d047d70), TOBN(0x92efa539, 0xd92d26c9), + TOBN(0x06e78457, 0x94b56526), TOBN(0x415cb80f, 0x0961002d), + TOBN(0x89e5c565, 0x76dcb10f), TOBN(0x8bbb6982, 0xff9259fe), + TOBN(0x4fe8795b, 0x9abc2668), TOBN(0xb5d4f534, 0x1e678fb1), + TOBN(0x6601f3be, 0x7b7da2b9), TOBN(0x98da59e2, 0xa13d6805), + TOBN(0x190d8ea6, 0x01799a52), TOBN(0xa20cec41, 0xb86d2952), + TOBN(0x3062ffb2, 0x7fff2a7c), TOBN(0x741b32e5, 0x79f19d37), + TOBN(0xf80d8181, 0x4eb57d47), TOBN(0x7a2d0ed4, 0x16aef06b), + TOBN(0x09735fb0, 0x1cecb588), TOBN(0x1641caaa, 0xc6061f5b)} + , + {TOBN(0x7f99824f, 0x20151427), TOBN(0x206828b6, 0x92430206), + TOBN(0xaa9097d7, 0xe1112357), TOBN(0xacf9a2f2, 0x09e414ec), + TOBN(0xdbdac9da, 0x27915356), TOBN(0x7e0734b7, 0x001efee3), + TOBN(0x54fab5bb, 0xd2b288e2), TOBN(0x4c630fc4, 0xf62dd09c), + TOBN(0x8537107a, 0x1ac2703b), TOBN(0xb49258d8, 0x6bc857b5), + TOBN(0x57df14de, 0xbcdaccd1), TOBN(0x24ab68d7, 0xc4ae8529), + TOBN(0x7ed8b5d4, 0x734e59d0), TOBN(0x5f8740c8, 0xc495cc80), + TOBN(0x84aedd5a, 0x291db9b3), TOBN(0x80b360f8, 0x4fb995be), + TOBN(0xae915f5d, 0x5fa067d1), TOBN(0x4134b57f, 0x9668960c), + TOBN(0xbd3656d6, 0xa48edaac), TOBN(0xdac1e3e4, 0xfc1d7436), + TOBN(0x674ff869, 0xd81fbb26), TOBN(0x449ed3ec, 0xb26c33d4), + TOBN(0x85138705, 0xd94203e8), TOBN(0xccde538b, 0xbeeb6f4a), + TOBN(0x55d5c68d, 0xa61a76fa), TOBN(0x598b441d, 0xca1554dc), + TOBN(0xd39923b9, 0x773b279c), TOBN(0x33331d3c, 0x36bf9efc), + TOBN(0x2d4c848e, 0x298de399), TOBN(0xcfdb8e77, 0xa1a27f56), + TOBN(0x94c855ea, 0x57b8ab70), TOBN(0xdcdb9dae, 0x6f7879ba), + TOBN(0x7bdff8c2, 0x019f2a59), TOBN(0xb3ce5bb3, 0xcb4fbc74), + TOBN(0xea907f68, 0x8a9173dd), TOBN(0x6cd3d0d3, 0x95a75439), + TOBN(0x92ecc4d6, 0xefed021c), TOBN(0x09a9f9b0, 0x6a77339a), + TOBN(0x87ca6b15, 0x7188c64a), TOBN(0x10c29968, 0x44899158), + TOBN(0x5859a229, 0xed6e82ef), TOBN(0x16f338e3, 0x65ebaf4e), + TOBN(0x0cd31387, 0x5ead67ae), TOBN(0x1c73d228, 0x54ef0bb4), + TOBN(0x4cb55131, 0x74a5c8c7), TOBN(0x01cd2970, 0x7f69ad6a), + TOBN(0xa04d00dd, 0xe966f87e), TOBN(0xd96fe447, 0x0b7b0321), + TOBN(0x342ac06e, 0x88fbd381), TOBN(0x02cd4a84, 0x5c35a493), + TOBN(0xe8fa89de, 0x54f1bbcd), TOBN(0x341d6367, 0x2575ed4c), + TOBN(0xebe357fb, 0xd238202b), TOBN(0x600b4d1a, 0xa984ead9), + TOBN(0xc35c9f44, 0x52436ea0), TOBN(0x96fe0a39, 0xa370751b), + TOBN(0x4c4f0736, 0x7f636a38), TOBN(0x9f943fb7, 0x0e76d5cb), + TOBN(0xb03510ba, 0xa8b68b8b), TOBN(0xc246780a, 0x9ed07a1f), + TOBN(0x3c051415, 0x6d549fc2), TOBN(0xc2953f31, 0x607781ca), + TOBN(0x955e2c69, 0xd8d95413), TOBN(0xb300fadc, 0x7bd282e3), + TOBN(0x81fe7b50, 0x87e9189f), TOBN(0xdb17375c, 0xf42dda27), + TOBN(0x22f7d896, 0xcf0a5904), TOBN(0xa0e57c5a, 0xebe348e6), + TOBN(0xa61011d3, 0xf40e3c80), TOBN(0xb1189321, 0x8db705c5), + TOBN(0x4ed9309e, 0x50fedec3), TOBN(0xdcf14a10, 0x4d6d5c1d), + TOBN(0x056c265b, 0x55691342), TOBN(0xe8e08504, 0x91049dc7), + TOBN(0x131329f5, 0xc9bae20a), TOBN(0x96c8b3e8, 0xd9dccdb4), + TOBN(0x8c5ff838, 0xfb4ee6b4), TOBN(0xfc5a9aeb, 0x41e8ccf0), + TOBN(0x7417b764, 0xfae050c6), TOBN(0x0953c3d7, 0x00452080), + TOBN(0x21372682, 0x38dfe7e8), TOBN(0xea417e15, 0x2bb79d4b), + TOBN(0x59641f1c, 0x76e7cf2d), TOBN(0x271e3059, 0xea0bcfcc), + TOBN(0x624c7dfd, 0x7253ecbd), TOBN(0x2f552e25, 0x4fca6186), + TOBN(0xcbf84ecd, 0x4d866e9c), TOBN(0x73967709, 0xf68d4610), + TOBN(0xa14b1163, 0xc27901b4), TOBN(0xfd9236e0, 0x899b8bf3), + TOBN(0x42b091ec, 0xcbc6da0a), TOBN(0xbb1dac6f, 0x5ad1d297), + TOBN(0x80e61d53, 0xa91cf76e), TOBN(0x4110a412, 0xd31f1ee7), + TOBN(0x2d87c3ba, 0x13efcf77), TOBN(0x1f374bb4, 0xdf450d76), + TOBN(0x5e78e2f2, 0x0d188dab), TOBN(0xe3968ed0, 0xf4b885ef), + TOBN(0x46c0568e, 0x7314570f), TOBN(0x31616338, 0x01170521), + TOBN(0x18e1e7e2, 0x4f0c8afe), TOBN(0x4caa75ff, 0xdeea78da), + TOBN(0x82db67f2, 0x7c5d8a51), TOBN(0x36a44d86, 0x6f505370), + TOBN(0xd72c5bda, 0x0333974f), TOBN(0x5db516ae, 0x27a70146), + TOBN(0x34705281, 0x210ef921), TOBN(0xbff17a8f, 0x0c9c38e5), + TOBN(0x78f4814e, 0x12476da1), TOBN(0xc1e16613, 0x33c16980), + TOBN(0x9e5b386f, 0x424d4bca), TOBN(0x4c274e87, 0xc85740de), + TOBN(0xb6a9b88d, 0x6c2f5226), TOBN(0x14d1b944, 0x550d7ca8), + TOBN(0x580c85fc, 0x1fc41709), TOBN(0xc1da368b, 0x54c6d519), + TOBN(0x2b0785ce, 0xd5113cf7), TOBN(0x0670f633, 0x5a34708f), + TOBN(0x46e23767, 0x15cc3f88), TOBN(0x1b480cfa, 0x50c72c8f), + TOBN(0x20288602, 0x4147519a), TOBN(0xd0981eac, 0x26b372f0), + TOBN(0xa9d4a7ca, 0xa785ebc8), TOBN(0xd953c50d, 0xdbdf58e9), + TOBN(0x9d6361cc, 0xfd590f8f), TOBN(0x72e9626b, 0x44e6c917), + TOBN(0x7fd96110, 0x22eb64cf), TOBN(0x863ebb7e, 0x9eb288f3), + TOBN(0x6e6ab761, 0x6aca8ee7), TOBN(0x97d10b39, 0xd7b40358), + TOBN(0x1687d377, 0x1e5feb0d), TOBN(0xc83e50e4, 0x8265a27a), + TOBN(0x8f75a9fe, 0xc954b313), TOBN(0xcc2e8f47, 0x310d1f61), + TOBN(0xf5ba81c5, 0x6557d0e0), TOBN(0x25f9680c, 0x3eaf6207), + TOBN(0xf95c6609, 0x4354080b), TOBN(0x5225bfa5, 0x7bf2fe1c), + TOBN(0xc5c004e2, 0x5c7d98fa), TOBN(0x3561bf1c, 0x019aaf60), + TOBN(0x5e6f9f17, 0xba151474), TOBN(0xdec2f934, 0xb04f6eca), + TOBN(0x64e368a1, 0x269acb1e), TOBN(0x1332d9e4, 0x0cdda493), + TOBN(0x60d6cf69, 0xdf23de05), TOBN(0x66d17da2, 0x009339a0), + TOBN(0x9fcac985, 0x0a693923), TOBN(0xbcf057fc, 0xed7c6a6d), + TOBN(0xc3c5c8c5, 0xf0b5662c), TOBN(0x25318dd8, 0xdcba4f24), + TOBN(0x60e8cb75, 0x082b69ff), TOBN(0x7c23b3ee, 0x1e728c01), + TOBN(0x15e10a0a, 0x097e4403), TOBN(0xcb3d0a86, 0x19854665), + TOBN(0x88d8e211, 0xd67d4826), TOBN(0xb39af66e, 0x0b9d2839), + TOBN(0xa5f94588, 0xbd475ca8), TOBN(0xe06b7966, 0xc077b80b), + TOBN(0xfedb1485, 0xda27c26c), TOBN(0xd290d33a, 0xfe0fd5e0), + TOBN(0xa40bcc47, 0xf34fb0fa), TOBN(0xb4760cc8, 0x1fb1ab09), + TOBN(0x8fca0993, 0xa273bfe3), TOBN(0x13e4fe07, 0xf70b213c), + TOBN(0x3bcdb992, 0xfdb05163), TOBN(0x8c484b11, 0x0c2b19b6), + TOBN(0x1acb815f, 0xaaf2e3e2), TOBN(0xc6905935, 0xb89ff1b4), + TOBN(0xb2ad6f9d, 0x586e74e1), TOBN(0x488883ad, 0x67b80484), + TOBN(0x758aa2c7, 0x369c3ddb), TOBN(0x8ab74e69, 0x9f9afd31), + TOBN(0x10fc2d28, 0x5e21beb1), TOBN(0x3484518a, 0x318c42f9), + TOBN(0x377427dc, 0x53cf40c3), TOBN(0x9de0781a, 0x391bc1d9), + TOBN(0x8faee858, 0x693807e1), TOBN(0xa3865327, 0x4e81ccc7), + TOBN(0x02c30ff2, 0x6f835b84), TOBN(0xb604437b, 0x0d3d38d4), + TOBN(0xb3fc8a98, 0x5ca1823d), TOBN(0xb82f7ec9, 0x03be0324), + TOBN(0xee36d761, 0xcf684a33), TOBN(0x5a01df0e, 0x9f29bf7d), + TOBN(0x686202f3, 0x1306583d), TOBN(0x05b10da0, 0x437c622e), + TOBN(0xbf9aaa0f, 0x076a7bc8), TOBN(0x25e94efb, 0x8f8f4e43), + TOBN(0x8a35c9b7, 0xfa3dc26d), TOBN(0xe0e5fb93, 0x96ff03c5), + TOBN(0xa77e3843, 0xebc394ce), TOBN(0xcede6595, 0x8361de60), + TOBN(0xd27c22f6, 0xa1993545), TOBN(0xab01cc36, 0x24d671ba), + TOBN(0x63fa2877, 0xa169c28e), TOBN(0x925ef904, 0x2eb08376), + TOBN(0x3b2fa3cf, 0x53aa0b32), TOBN(0xb27beb5b, 0x71c49d7a), + TOBN(0xb60e1834, 0xd105e27f), TOBN(0xd6089788, 0x4f68570d), + TOBN(0x23094ce0, 0xd6fbc2ac), TOBN(0x738037a1, 0x815ff551), + TOBN(0xda73b1bb, 0x6bef119c), TOBN(0xdcf6c430, 0xeef506ba), + TOBN(0x00e4fe7b, 0xe3ef104a), TOBN(0xebdd9a2c, 0x0a065628), + TOBN(0x853a81c3, 0x8792043e), TOBN(0x22ad6ece, 0xb3b59108), + TOBN(0x9fb813c0, 0x39cd297d), TOBN(0x8ec7e16e, 0x05bda5d9), + TOBN(0x2834797c, 0x0d104b96), TOBN(0xcc11a2e7, 0x7c511510), + TOBN(0x96ca5a53, 0x96ee6380), TOBN(0x054c8655, 0xcea38742), + TOBN(0xb5946852, 0xd54dfa7d), TOBN(0x97c422e7, 0x1f4ab207), + TOBN(0xbf907509, 0x0c22b540), TOBN(0x2cde42aa, 0xb7c267d4), + TOBN(0xba18f9ed, 0x5ab0d693), TOBN(0x3ba62aa6, 0x6e4660d9), + TOBN(0xb24bf97b, 0xab9ea96a), TOBN(0x5d039642, 0xe3b60e32), + TOBN(0x4e6a4506, 0x7c4d9bd5), TOBN(0x666c5b9e, 0x7ed4a6a4), + TOBN(0xfa3fdcd9, 0x8edbd7cc), TOBN(0x4660bb87, 0xc6ccd753), + TOBN(0x9ae90820, 0x21e6b64f), TOBN(0x8a56a713, 0xb36bfb3f), + TOBN(0xabfce096, 0x5726d47f), TOBN(0x9eed01b2, 0x0b1a9a7f), + TOBN(0x30e9cad4, 0x4eb74a37), TOBN(0x7b2524cc, 0x53e9666d), + TOBN(0x6a29683b, 0x8f4b002f), TOBN(0xc2200d7a, 0x41f4fc20), + TOBN(0xcf3af47a, 0x3a338acc), TOBN(0x6539a4fb, 0xe7128975), + TOBN(0xcec31c14, 0xc33c7fcf), TOBN(0x7eb6799b, 0xc7be322b), + TOBN(0x119ef4e9, 0x6646f623), TOBN(0x7b7a26a5, 0x54d7299b), + TOBN(0xcb37f08d, 0x403f46f2), TOBN(0x94b8fc43, 0x1a0ec0c7), + TOBN(0xbb8514e3, 0xc332142f), TOBN(0xf3ed2c33, 0xe80d2a7a), + TOBN(0x8d2080af, 0xb639126c), TOBN(0xf7b6be60, 0xe3553ade), + TOBN(0x3950aa9f, 0x1c7e2b09), TOBN(0x847ff958, 0x6410f02b), + TOBN(0x877b7cf5, 0x678a31b0), TOBN(0xd50301ae, 0x3998b620), + TOBN(0x734257c5, 0xc00fb396), TOBN(0xf9fb18a0, 0x04e672a6), + TOBN(0xff8bd8eb, 0xe8758851), TOBN(0x1e64e4c6, 0x5d99ba44), + TOBN(0x4b8eaedf, 0x7dfd93b7), TOBN(0xba2f2a98, 0x04e76b8c), + TOBN(0x7d790cba, 0xe8053433), TOBN(0xc8e725a0, 0x3d2c9585), + TOBN(0x58c5c476, 0xcdd8f5ed), TOBN(0xd106b952, 0xefa9fe1d), + TOBN(0x3c5c775b, 0x0eff13a9), TOBN(0x242442ba, 0xe057b930), + TOBN(0xe9f458d4, 0xc9b70cbd), TOBN(0x69b71448, 0xa3cdb89a), + TOBN(0x41ee46f6, 0x0e2ed742), TOBN(0x573f1045, 0x40067493), + TOBN(0xb1e154ff, 0x9d54c304), TOBN(0x2ad0436a, 0x8d3a7502), + TOBN(0xee4aaa2d, 0x431a8121), TOBN(0xcd38b3ab, 0x886f11ed), + TOBN(0x57d49ea6, 0x034a0eb7), TOBN(0xd2b773bd, 0xf7e85e58), + TOBN(0x4a559ac4, 0x9b5c1f14), TOBN(0xc444be1a, 0x3e54df2b), + TOBN(0x13aad704, 0xeda41891), TOBN(0xcd927bec, 0x5eb5c788), + TOBN(0xeb3c8516, 0xe48c8a34), TOBN(0x1b7ac812, 0x4b546669), + TOBN(0x1815f896, 0x594df8ec), TOBN(0x87c6a79c, 0x79227865), + TOBN(0xae02a2f0, 0x9b56ddbd), TOBN(0x1339b5ac, 0x8a2f1cf3), + TOBN(0xf2b569c7, 0x839dff0d), TOBN(0xb0b9e864, 0xfee9a43d), + TOBN(0x4ff8ca41, 0x77bb064e), TOBN(0x145a2812, 0xfd249f63), + TOBN(0x3ab7beac, 0xf86f689a), TOBN(0x9bafec27, 0x01d35f5e), + TOBN(0x28054c65, 0x4265aa91), TOBN(0xa4b18304, 0x035efe42), + TOBN(0x6887b0e6, 0x9639dec7), TOBN(0xf4b8f6ad, 0x3d52aea5), + TOBN(0xfb9293cc, 0x971a8a13), TOBN(0x3f159e5d, 0x4c934d07), + TOBN(0x2c50e9b1, 0x09acbc29), TOBN(0x08eb65e6, 0x7154d129), + TOBN(0x4feff589, 0x30b75c3e), TOBN(0x0bb82fe2, 0x94491c93), + TOBN(0xd8ac377a, 0x89af62bb), TOBN(0xd7b51490, 0x9685e49f), + TOBN(0xabca9a7b, 0x04497f19), TOBN(0x1b35ed0a, 0x1a7ad13f), + TOBN(0x6b601e21, 0x3ec86ed6), TOBN(0xda91fcb9, 0xce0c76f1), + TOBN(0x9e28507b, 0xd7ab27e1), TOBN(0x7c19a555, 0x63945b7b), + TOBN(0x6b43f0a1, 0xaafc9827), TOBN(0x443b4fbd, 0x3aa55b91), + TOBN(0x962b2e65, 0x6962c88f), TOBN(0x139da8d4, 0xce0db0ca), + TOBN(0xb93f05dd, 0x1b8d6c4f), TOBN(0x779cdff7, 0x180b9824), + TOBN(0xbba23fdd, 0xae57c7b7), TOBN(0x345342f2, 0x1b932522), + TOBN(0xfd9c80fe, 0x556d4aa3), TOBN(0xa03907ba, 0x6525bb61), + TOBN(0x38b010e1, 0xff218933), TOBN(0xc066b654, 0xaa52117b), + TOBN(0x8e141920, 0x94f2e6ea), TOBN(0x66a27dca, 0x0d32f2b2), + TOBN(0x69c7f993, 0x048b3717), TOBN(0xbf5a989a, 0xb178ae1c), + TOBN(0x49fa9058, 0x564f1d6b), TOBN(0x27ec6e15, 0xd31fde4e), + TOBN(0x4cce0373, 0x7276e7fc), TOBN(0x64086d79, 0x89d6bf02), + TOBN(0x5a72f046, 0x4ccdd979), TOBN(0x909c3566, 0x47775631), + TOBN(0x1c07bc6b, 0x75dd7125), TOBN(0xb4c6bc97, 0x87a0428d), + TOBN(0x507ece52, 0xfdeb6b9d), TOBN(0xfca56512, 0xb2c95432), + TOBN(0x15d97181, 0xd0e8bd06), TOBN(0x384dd317, 0xc6bb46ea), + TOBN(0x5441ea20, 0x3952b624), TOBN(0xbcf70dee, 0x4e7dc2fb), + TOBN(0x372b016e, 0x6628e8c3), TOBN(0x07a0d667, 0xb60a7522), + TOBN(0xcf05751b, 0x0a344ee2), TOBN(0x0ec09a48, 0x118bdeec), + TOBN(0x6e4b3d4e, 0xd83dce46), TOBN(0x43a6316d, 0x99d2fc6e), + TOBN(0xa99d8989, 0x56cf044c), TOBN(0x7c7f4454, 0xae3e5fb7), + TOBN(0xb2e6b121, 0xfbabbe92), TOBN(0x281850fb, 0xe1330076), + TOBN(0x093581ec, 0x97890015), TOBN(0x69b1dded, 0x75ff77f5), + TOBN(0x7cf0b18f, 0xab105105), TOBN(0x953ced31, 0xa89ccfef), + TOBN(0x3151f85f, 0xeb914009), TOBN(0x3c9f1b87, 0x88ed48ad), + TOBN(0xc9aba1a1, 0x4a7eadcb), TOBN(0x928e7501, 0x522e71cf), + TOBN(0xeaede727, 0x3a2e4f83), TOBN(0x467e10d1, 0x1ce3bbd3), + TOBN(0xf3442ac3, 0xb955dcf0), TOBN(0xba96307d, 0xd3d5e527), + TOBN(0xf763a10e, 0xfd77f474), TOBN(0x5d744bd0, 0x6a6e1ff0), + TOBN(0xd287282a, 0xa777899e), TOBN(0xe20eda8f, 0xd03f3cde), + TOBN(0x6a7e75bb, 0x50b07d31), TOBN(0x0b7e2a94, 0x6f379de4), + TOBN(0x31cb64ad, 0x19f593cf), TOBN(0x7b1a9e4f, 0x1e76ef1d), + TOBN(0xe18c9c9d, 0xb62d609c), TOBN(0x439bad6d, 0xe779a650), + TOBN(0x219d9066, 0xe032f144), TOBN(0x1db632b8, 0xe8b2ec6a), + TOBN(0xff0d0fd4, 0xfda12f78), TOBN(0x56fb4c2d, 0x2a25d265), + TOBN(0x5f4e2ee1, 0x255a03f1), TOBN(0x61cd6af2, 0xe96af176), + TOBN(0xe0317ba8, 0xd068bc97), TOBN(0x927d6bab, 0x264b988e), + TOBN(0xa18f07e0, 0xe90fb21e), TOBN(0x00fd2b80, 0xbba7fca1), + TOBN(0x20387f27, 0x95cd67b5), TOBN(0x5b89a4e7, 0xd39707f7), + TOBN(0x8f83ad3f, 0x894407ce), TOBN(0xa0025b94, 0x6c226132), + TOBN(0xc79563c7, 0xf906c13b), TOBN(0x5f548f31, 0x4e7bb025), + TOBN(0x2b4c6b8f, 0xeac6d113), TOBN(0xa67e3f9c, 0x0e813c76), + TOBN(0x3982717c, 0x3fe1f4b9), TOBN(0x58865819, 0x26d8050e), + TOBN(0x99f3640c, 0xf7f06f20), TOBN(0xdc610216, 0x2a66ebc2), + TOBN(0x52f2c175, 0x767a1e08), TOBN(0x05660e1a, 0x5999871b), + TOBN(0x6b0f1762, 0x6d3c4693), TOBN(0xf0e7d627, 0x37ed7bea), + TOBN(0xc51758c7, 0xb75b226d), TOBN(0x40a88628, 0x1f91613b), + TOBN(0x889dbaa7, 0xbbb38ce0), TOBN(0xe0404b65, 0xbddcad81), + TOBN(0xfebccd3a, 0x8bc9671f), TOBN(0xfbf9a357, 0xee1f5375), + TOBN(0x5dc169b0, 0x28f33398), TOBN(0xb07ec11d, 0x72e90f65), + TOBN(0xae7f3b4a, 0xfaab1eb1), TOBN(0xd970195e, 0x5f17538a), + TOBN(0x52b05cbe, 0x0181e640), TOBN(0xf5debd62, 0x2643313d), + TOBN(0x76148154, 0x5df31f82), TOBN(0x23e03b33, 0x3a9e13c5), + TOBN(0xff758949, 0x4fde0c1f), TOBN(0xbf8a1abe, 0xe5b6ec20), + TOBN(0x702278fb, 0x87e1db6c), TOBN(0xc447ad7a, 0x35ed658f), + TOBN(0x48d4aa38, 0x03d0ccf2), TOBN(0x80acb338, 0x819a7c03), + TOBN(0x9bc7c89e, 0x6e17cecc), TOBN(0x46736b8b, 0x03be1d82), + TOBN(0xd65d7b60, 0xc0432f96), TOBN(0xddebe7a3, 0xdeb5442f), + TOBN(0x79a25307, 0x7dff69a2), TOBN(0x37a56d94, 0x02cf3122), + TOBN(0x8bab8aed, 0xf2350d0a), TOBN(0x13c3f276, 0x037b0d9a), + TOBN(0xc664957c, 0x44c65cae), TOBN(0x88b44089, 0xc2e71a88), + TOBN(0xdb88e5a3, 0x5cb02664), TOBN(0x5d4c0bf1, 0x8686c72e), + TOBN(0xea3d9b62, 0xa682d53e), TOBN(0x9b605ef4, 0x0b2ad431), + TOBN(0x71bac202, 0xc69645d0), TOBN(0xa115f03a, 0x6a1b66e7), + TOBN(0xfe2c563a, 0x158f4dc4), TOBN(0xf715b3a0, 0x4d12a78c), + TOBN(0x8f7f0a48, 0xd413213a), TOBN(0x2035806d, 0xc04becdb), + TOBN(0xecd34a99, 0x5d8587f5), TOBN(0x4d8c3079, 0x9f6d3a71), + TOBN(0x1b2a2a67, 0x8d95a8f6), TOBN(0xc58c9d7d, 0xf2110d0d), + TOBN(0xdeee81d5, 0xcf8fba3f), TOBN(0xa42be3c0, 0x0c7cdf68), + TOBN(0x2126f742, 0xd43b5eaa), TOBN(0x054a0766, 0xdfa59b85), + TOBN(0x9d0d5e36, 0x126bfd45), TOBN(0xa1f8fbd7, 0x384f8a8f), + TOBN(0x317680f5, 0xd563fccc), TOBN(0x48ca5055, 0xf280a928), + TOBN(0xe00b81b2, 0x27b578cf), TOBN(0x10aad918, 0x2994a514), + TOBN(0xd9e07b62, 0xb7bdc953), TOBN(0x9f0f6ff2, 0x5bc086dd), + TOBN(0x09d1ccff, 0x655eee77), TOBN(0x45475f79, 0x5bef7df1), + TOBN(0x3faa28fa, 0x86f702cc), TOBN(0x92e60905, 0x0f021f07), + TOBN(0xe9e62968, 0x7f8fa8c6), TOBN(0xbd71419a, 0xf036ea2c), + TOBN(0x171ee1cc, 0x6028da9a), TOBN(0x5352fe1a, 0xc251f573), + TOBN(0xf8ff236e, 0x3fa997f4), TOBN(0xd831b6c9, 0xa5749d5f), + TOBN(0x7c872e1d, 0xe350e2c2), TOBN(0xc56240d9, 0x1e0ce403), + TOBN(0xf9deb077, 0x6974f5cb), TOBN(0x7d50ba87, 0x961c3728), + TOBN(0xd6f89426, 0x5a3a2518), TOBN(0xcf817799, 0xc6303d43), + TOBN(0x510a0471, 0x619e5696), TOBN(0xab049ff6, 0x3a5e307b), + TOBN(0xe4cdf9b0, 0xfeb13ec7), TOBN(0xd5e97117, 0x9d8ff90c), + TOBN(0xf6f64d06, 0x9afa96af), TOBN(0x00d0bf5e, 0x9d2012a2), + TOBN(0xe63f301f, 0x358bcdc0), TOBN(0x07689e99, 0x0a9d47f8), + TOBN(0x1f689e2f, 0x4f43d43a), TOBN(0x4d542a16, 0x90920904), + TOBN(0xaea293d5, 0x9ca0a707), TOBN(0xd061fe45, 0x8ac68065), + TOBN(0x1033bf1b, 0x0090008c), TOBN(0x29749558, 0xc08a6db6), + TOBN(0x74b5fc59, 0xc1d5d034), TOBN(0xf712e9f6, 0x67e215e0), + TOBN(0xfd520cbd, 0x860200e6), TOBN(0x0229acb4, 0x3ea22588), + TOBN(0x9cd1e14c, 0xfff0c82e), TOBN(0x87684b62, 0x59c69e73), + TOBN(0xda85e61c, 0x96ccb989), TOBN(0x2d5dbb02, 0xa3d06493), + TOBN(0xf22ad33a, 0xe86b173c), TOBN(0xe8e41ea5, 0xa79ff0e3), + TOBN(0x01d2d725, 0xdd0d0c10), TOBN(0x31f39088, 0x032d28f9), + TOBN(0x7b3f71e1, 0x7829839e), TOBN(0x0cf691b4, 0x4502ae58), + TOBN(0xef658dbd, 0xbefc6115), TOBN(0xa5cd6ee5, 0xb3ab5314), + TOBN(0x206c8d7b, 0x5f1d2347), TOBN(0x794645ba, 0x4cc2253a), + TOBN(0xd517d8ff, 0x58389e08), TOBN(0x4fa20dee, 0x9f847288), + TOBN(0xeba072d8, 0xd797770a), TOBN(0x7360c91d, 0xbf429e26), + TOBN(0x7200a3b3, 0x80af8279), TOBN(0x6a1c9150, 0x82dadce3), + TOBN(0x0ee6d3a7, 0xc35d8794), TOBN(0x042e6558, 0x0356bae5), + TOBN(0x9f59698d, 0x643322fd), TOBN(0x9379ae15, 0x50a61967), + TOBN(0x64b9ae62, 0xfcc9981e), TOBN(0xaed3d631, 0x6d2934c6), + TOBN(0x2454b302, 0x5e4e65eb), TOBN(0xab09f647, 0xf9950428)} + , + {TOBN(0xb2083a12, 0x22248acc), TOBN(0x1f6ec0ef, 0x3264e366), + TOBN(0x5659b704, 0x5afdee28), TOBN(0x7a823a40, 0xe6430bb5), + TOBN(0x24592a04, 0xe1900a79), TOBN(0xcde09d4a, 0xc9ee6576), + TOBN(0x52b6463f, 0x4b5ea54a), TOBN(0x1efe9ed3, 0xd3ca65a7), + TOBN(0xe27a6dbe, 0x305406dd), TOBN(0x8eb7dc7f, 0xdd5d1957), + TOBN(0xf54a6876, 0x387d4d8f), TOBN(0x9c479409, 0xc7762de4), + TOBN(0xbe4d5b5d, 0x99b30778), TOBN(0x25380c56, 0x6e793682), + TOBN(0x602d37f3, 0xdac740e3), TOBN(0x140deabe, 0x1566e4ae), + TOBN(0x4481d067, 0xafd32acf), TOBN(0xd8f0fcca, 0xe1f71ccf), + TOBN(0xd208dd0c, 0xb596f2da), TOBN(0xd049d730, 0x9aad93f9), + TOBN(0xc79f263d, 0x42ab580e), TOBN(0x09411bb1, 0x23f707b4), + TOBN(0x8cfde1ff, 0x835e0eda), TOBN(0x72707490, 0x90f03402), + TOBN(0xeaee6126, 0xc49a861e), TOBN(0x024f3b65, 0xe14f0d06), + TOBN(0x51a3f1e8, 0xc69bfc17), TOBN(0xc3c3a8e9, 0xa7686381), + TOBN(0x3400752c, 0xb103d4c8), TOBN(0x02bc4613, 0x9218b36b), + TOBN(0xc67f75eb, 0x7651504a), TOBN(0xd6848b56, 0xd02aebfa), + TOBN(0xbd9802e6, 0xc30fa92b), TOBN(0x5a70d96d, 0x9a552784), + TOBN(0x9085c4ea, 0x3f83169b), TOBN(0xfa9423bb, 0x06908228), + TOBN(0x2ffebe12, 0xfe97a5b9), TOBN(0x85da6049, 0x71b99118), + TOBN(0x9cbc2f7f, 0x63178846), TOBN(0xfd96bc70, 0x9153218e), + TOBN(0x958381db, 0x1782269b), TOBN(0xae34bf79, 0x2597e550), + TOBN(0xbb5c6064, 0x5f385153), TOBN(0x6f0e96af, 0xe3088048), + TOBN(0xbf6a0215, 0x77884456), TOBN(0xb3b5688c, 0x69310ea7), + TOBN(0x17c94295, 0x04fad2de), TOBN(0xe020f0e5, 0x17896d4d), + TOBN(0x730ba0ab, 0x0976505f), TOBN(0x567f6813, 0x095e2ec5), + TOBN(0x47062010, 0x6331ab71), TOBN(0x72cfa977, 0x41d22b9f), + TOBN(0x33e55ead, 0x8a2373da), TOBN(0xa8d0d5f4, 0x7ba45a68), + TOBN(0xba1d8f9c, 0x03029d15), TOBN(0x8f34f1cc, 0xfc55b9f3), + TOBN(0xcca4428d, 0xbbe5a1a9), TOBN(0x8187fd5f, 0x3126bd67), + TOBN(0x0036973a, 0x48105826), TOBN(0xa39b6663, 0xb8bd61a0), + TOBN(0x6d42deef, 0x2d65a808), TOBN(0x4969044f, 0x94636b19), + TOBN(0xf611ee47, 0xdd5d564c), TOBN(0x7b2f3a49, 0xd2873077), + TOBN(0x94157d45, 0x300eb294), TOBN(0x2b2a656e, 0x169c1494), + TOBN(0xc000dd76, 0xd3a47aa9), TOBN(0xa2864e4f, 0xa6243ea4), + TOBN(0x82716c47, 0xdb89842e), TOBN(0x12dfd7d7, 0x61479fb7), + TOBN(0x3b9a2c56, 0xe0b2f6dc), TOBN(0x46be862a, 0xd7f85d67), + TOBN(0x03b0d8dd, 0x0f82b214), TOBN(0x460c34f9, 0xf103cbc6), + TOBN(0xf32e5c03, 0x18d79e19), TOBN(0x8b8888ba, 0xa84117f8), + TOBN(0x8f3c37dc, 0xc0722677), TOBN(0x10d21be9, 0x1c1c0f27), + TOBN(0xd47c8468, 0xe0f7a0c6), TOBN(0x9bf02213, 0xadecc0e0), + TOBN(0x0baa7d12, 0x42b48b99), TOBN(0x1bcb665d, 0x48424096), + TOBN(0x8b847cd6, 0xebfb5cfb), TOBN(0x87c2ae56, 0x9ad4d10d), + TOBN(0xf1cbb122, 0x0de36726), TOBN(0xe7043c68, 0x3fdfbd21), + TOBN(0x4bd0826a, 0x4e79d460), TOBN(0x11f5e598, 0x4bd1a2cb), + TOBN(0x97554160, 0xb7fe7b6e), TOBN(0x7d16189a, 0x400a3fb2), + TOBN(0xd73e9bea, 0xe328ca1e), TOBN(0x0dd04b97, 0xe793d8cc), + TOBN(0xa9c83c9b, 0x506db8cc), TOBN(0x5cd47aae, 0xcf38814c), + TOBN(0x26fc430d, 0xb64b45e6), TOBN(0x079b5499, 0xd818ea84), + TOBN(0xebb01102, 0xc1c24a3b), TOBN(0xca24e568, 0x1c161c1a), + TOBN(0x103eea69, 0x36f00a4a), TOBN(0x9ad76ee8, 0x76176c7b), + TOBN(0x97451fc2, 0x538e0ff7), TOBN(0x94f89809, 0x6604b3b0), + TOBN(0x6311436e, 0x3249cfd7), TOBN(0x27b4a7bd, 0x41224f69), + TOBN(0x03b5d21a, 0xe0ac2941), TOBN(0x279b0254, 0xc2d31937), + TOBN(0x3307c052, 0xcac992d0), TOBN(0x6aa7cb92, 0xefa8b1f3), + TOBN(0x5a182580, 0x0d37c7a5), TOBN(0x13380c37, 0x342d5422), + TOBN(0x92ac2d66, 0xd5d2ef92), TOBN(0x035a70c9, 0x030c63c6), + TOBN(0xc16025dd, 0x4ce4f152), TOBN(0x1f419a71, 0xf9df7c06), + TOBN(0x6d5b2214, 0x91e4bb14), TOBN(0xfc43c6cc, 0x839fb4ce), + TOBN(0x49f06591, 0x925d6b2d), TOBN(0x4b37d9d3, 0x62186598), + TOBN(0x8c54a971, 0xd01b1629), TOBN(0xe1a9c29f, 0x51d50e05), + TOBN(0x5109b785, 0x71ba1861), TOBN(0x48b22d5c, 0xd0c8f93d), + TOBN(0xe8fa84a7, 0x8633bb93), TOBN(0x53fba6ba, 0x5aebbd08), + TOBN(0x7ff27df3, 0xe5eea7d8), TOBN(0x521c8796, 0x68ca7158), + TOBN(0xb9d5133b, 0xce6f1a05), TOBN(0x2d50cd53, 0xfd0ebee4), + TOBN(0xc82115d6, 0xc5a3ef16), TOBN(0x993eff9d, 0xba079221), + TOBN(0xe4da2c5e, 0x4b5da81c), TOBN(0x9a89dbdb, 0x8033fd85), + TOBN(0x60819ebf, 0x2b892891), TOBN(0x53902b21, 0x5d14a4d5), + TOBN(0x6ac35051, 0xd7fda421), TOBN(0xcc6ab885, 0x61c83284), + TOBN(0x14eba133, 0xf74cff17), TOBN(0x240aaa03, 0xecb813f2), + TOBN(0xcfbb6540, 0x6f665bee), TOBN(0x084b1fe4, 0xa425ad73), + TOBN(0x009d5d16, 0xd081f6a6), TOBN(0x35304fe8, 0xeef82c90), + TOBN(0xf20346d5, 0xaa9eaa22), TOBN(0x0ada9f07, 0xac1c91e3), + TOBN(0xa6e21678, 0x968a6144), TOBN(0x54c1f77c, 0x07b31a1e), + TOBN(0xd6bb787e, 0x5781fbe1), TOBN(0x61bd2ee0, 0xe31f1c4a), + TOBN(0xf25aa1e9, 0x781105fc), TOBN(0x9cf2971f, 0x7b2f8e80), + TOBN(0x26d15412, 0xcdff919b), TOBN(0x01db4ebe, 0x34bc896e), + TOBN(0x7d9b3e23, 0xb40df1cf), TOBN(0x59337373, 0x94e971b4), + TOBN(0xbf57bd14, 0x669cf921), TOBN(0x865daedf, 0x0c1a1064), + TOBN(0x3eb70bd3, 0x83279125), TOBN(0xbc3d5b9f, 0x34ecdaab), + TOBN(0x91e3ed7e, 0x5f755caf), TOBN(0x49699f54, 0xd41e6f02), + TOBN(0x185770e1, 0xd4a7a15b), TOBN(0x08f3587a, 0xeaac87e7), + TOBN(0x352018db, 0x473133ea), TOBN(0x674ce719, 0x04fd30fc), + TOBN(0x7b8d9835, 0x088b3e0e), TOBN(0x7a0356a9, 0x5d0d47a1), + TOBN(0x9d9e7659, 0x6474a3c4), TOBN(0x61ea48a7, 0xff66966c), + TOBN(0x30417758, 0x0f3e4834), TOBN(0xfdbb21c2, 0x17a9afcb), + TOBN(0x756fa17f, 0x2f9a67b3), TOBN(0x2a6b2421, 0xa245c1a8), + TOBN(0x64be2794, 0x4af02291), TOBN(0xade465c6, 0x2a5804fe), + TOBN(0x8dffbd39, 0xa6f08fd7), TOBN(0xc4efa84c, 0xaa14403b), + TOBN(0xa1b91b2a, 0x442b0f5c), TOBN(0xb748e317, 0xcf997736), + TOBN(0x8d1b62bf, 0xcee90e16), TOBN(0x907ae271, 0x0b2078c0), + TOBN(0xdf31534b, 0x0c9bcddd), TOBN(0x043fb054, 0x39adce83), + TOBN(0x99031043, 0xd826846a), TOBN(0x61a9c0d6, 0xb144f393), + TOBN(0xdab48046, 0x47718427), TOBN(0xdf17ff9b, 0x6e830f8b), + TOBN(0x408d7ee8, 0xe49a1347), TOBN(0x6ac71e23, 0x91c1d4ae), + TOBN(0xc8cbb9fd, 0x1defd73c), TOBN(0x19840657, 0xbbbbfec5), + TOBN(0x39db1cb5, 0x9e7ef8ea), TOBN(0x78aa8296, 0x64105f30), + TOBN(0xa3d9b7f0, 0xa3738c29), TOBN(0x0a2f235a, 0xbc3250a3), + TOBN(0x55e506f6, 0x445e4caf), TOBN(0x0974f73d, 0x33475f7a), + TOBN(0xd37dbba3, 0x5ba2f5a8), TOBN(0x542c6e63, 0x6af40066), + TOBN(0x26d99b53, 0xc5d73e2c), TOBN(0x06060d7d, 0x6c3ca33e), + TOBN(0xcdbef1c2, 0x065fef4a), TOBN(0x77e60f7d, 0xfd5b92e3), + TOBN(0xd7c549f0, 0x26708350), TOBN(0x201b3ad0, 0x34f121bf), + TOBN(0x5fcac2a1, 0x0334fc14), TOBN(0x8a9a9e09, 0x344552f6), + TOBN(0x7dd8a1d3, 0x97653082), TOBN(0x5fc0738f, 0x79d4f289), + TOBN(0x787d244d, 0x17d2d8c3), TOBN(0xeffc6345, 0x70830684), + TOBN(0x5ddb96dd, 0xe4f73ae5), TOBN(0x8efb14b1, 0x172549a5), + TOBN(0x6eb73eee, 0x2245ae7a), TOBN(0xbca4061e, 0xea11f13e), + TOBN(0xb577421d, 0x30b01f5d), TOBN(0xaa688b24, 0x782e152c), + TOBN(0x67608e71, 0xbd3502ba), TOBN(0x4ef41f24, 0xb4de75a0), + TOBN(0xb08dde5e, 0xfd6125e5), TOBN(0xde484825, 0xa409543f), + TOBN(0x1f198d98, 0x65cc2295), TOBN(0x428a3771, 0x6e0edfa2), + TOBN(0x4f9697a2, 0xadf35fc7), TOBN(0x01a43c79, 0xf7cac3c7), + TOBN(0xb05d7059, 0x0fd3659a), TOBN(0x8927f30c, 0xbb7f2d9a), + TOBN(0x4023d1ac, 0x8cf984d3), TOBN(0x32125ed3, 0x02897a45), + TOBN(0xfb572dad, 0x3d414205), TOBN(0x73000ef2, 0xe3fa82a9), + TOBN(0x4c0868e9, 0xf10a5581), TOBN(0x5b61fc67, 0x6b0b3ca5), + TOBN(0xc1258d5b, 0x7cae440c), TOBN(0x21c08b41, 0x402b7531), + TOBN(0xf61a8955, 0xde932321), TOBN(0x3568faf8, 0x2d1408af), + TOBN(0x71b15e99, 0x9ecf965b), TOBN(0xf14ed248, 0xe917276f), + TOBN(0xc6f4caa1, 0x820cf9e2), TOBN(0x681b20b2, 0x18d83c7e), + TOBN(0x6cde738d, 0xc6c01120), TOBN(0x71db0813, 0xae70e0db), + TOBN(0x95fc0644, 0x74afe18c), TOBN(0x34619053, 0x129e2be7), + TOBN(0x80615cea, 0xdb2a3b15), TOBN(0x0a49a19e, 0xdb4c7073), + TOBN(0x0e1b84c8, 0x8fd2d367), TOBN(0xd74bf462, 0x033fb8aa), + TOBN(0x889f6d65, 0x533ef217), TOBN(0x7158c7e4, 0xc3ca2e87), + TOBN(0xfb670dfb, 0xdc2b4167), TOBN(0x75910a01, 0x844c257f), + TOBN(0xf336bf07, 0xcf88577d), TOBN(0x22245250, 0xe45e2ace), + TOBN(0x2ed92e8d, 0x7ca23d85), TOBN(0x29f8be4c, 0x2b812f58), + TOBN(0xdd9ebaa7, 0x076fe12b), TOBN(0x3f2400cb, 0xae1537f9), + TOBN(0x1aa93528, 0x17bdfb46), TOBN(0xc0f98430, 0x67883b41), + TOBN(0x5590ede1, 0x0170911d), TOBN(0x7562f5bb, 0x34d4b17f), + TOBN(0xe1fa1df2, 0x1826b8d2), TOBN(0xb40b796a, 0x6bd80d59), + TOBN(0xd65bf197, 0x3467ba92), TOBN(0x8c9b46db, 0xf70954b0), + TOBN(0x97c8a0f3, 0x0e78f15d), TOBN(0xa8f3a69a, 0x85a4c961), + TOBN(0x4242660f, 0x61e4ce9b), TOBN(0xbf06aab3, 0x6ea6790c), + TOBN(0xc6706f8e, 0xec986416), TOBN(0x9e56dec1, 0x9a9fc225), + TOBN(0x527c46f4, 0x9a9898d9), TOBN(0xd799e77b, 0x5633cdef), + TOBN(0x24eacc16, 0x7d9e4297), TOBN(0xabb61cea, 0x6b1cb734), + TOBN(0xbee2e8a7, 0xf778443c), TOBN(0x3bb42bf1, 0x29de2fe6), + TOBN(0xcbed86a1, 0x3003bb6f), TOBN(0xd3918e6c, 0xd781cdf6), + TOBN(0x4bee3271, 0x9a5103f1), TOBN(0x5243efc6, 0xf50eac06), + TOBN(0xb8e122cb, 0x6adcc119), TOBN(0x1b7faa84, 0xc0b80a08), + TOBN(0x32c3d1bd, 0x6dfcd08c), TOBN(0x129dec4e, 0x0be427de), + TOBN(0x98ab679c, 0x1d263c83), TOBN(0xafc83cb7, 0xcef64eff), + TOBN(0x85eb6088, 0x2fa6be76), TOBN(0x892585fb, 0x1328cbfe), + TOBN(0xc154d3ed, 0xcf618dda), TOBN(0xc44f601b, 0x3abaf26e), + TOBN(0x7bf57d0b, 0x2be1fdfd), TOBN(0xa833bd2d, 0x21137fee), + TOBN(0x9353af36, 0x2db591a8), TOBN(0xc76f26dc, 0x5562a056), + TOBN(0x1d87e47d, 0x3fdf5a51), TOBN(0x7afb5f93, 0x55c9cab0), + TOBN(0x91bbf58f, 0x89e0586e), TOBN(0x7c72c018, 0x0d843709), + TOBN(0xa9a5aafb, 0x99b5c3dc), TOBN(0xa48a0f1d, 0x3844aeb0), + TOBN(0x7178b7dd, 0xb667e482), TOBN(0x453985e9, 0x6e23a59a), + TOBN(0x4a54c860, 0x01b25dd8), TOBN(0x0dd37f48, 0xfb897c8a), + TOBN(0x5f8aa610, 0x0ea90cd9), TOBN(0xc8892c68, 0x16d5830d), + TOBN(0xeb4befc0, 0xef514ca5), TOBN(0x478eb679, 0xe72c9ee6), + TOBN(0x9bca20da, 0xdbc40d5f), TOBN(0xf015de21, 0xdde4f64a), + TOBN(0xaa6a4de0, 0xeaf4b8a5), TOBN(0x68cfd9ca, 0x4bc60e32), + TOBN(0x668a4b01, 0x7fd15e70), TOBN(0xd9f0694a, 0xf27dc09d), + TOBN(0xf6c3cad5, 0xba708bcd), TOBN(0x5cd2ba69, 0x5bb95c2a), + TOBN(0xaa28c1d3, 0x33c0a58f), TOBN(0x23e274e3, 0xabc77870), + TOBN(0x44c3692d, 0xdfd20a4a), TOBN(0x091c5fd3, 0x81a66653), + TOBN(0x6c0bb691, 0x09a0757d), TOBN(0x9072e8b9, 0x667343ea), + TOBN(0x31d40eb0, 0x80848bec), TOBN(0x95bd480a, 0x79fd36cc), + TOBN(0x01a77c61, 0x65ed43f5), TOBN(0xafccd127, 0x2e0d40bf), + TOBN(0xeccfc82d, 0x1cc1884b), TOBN(0xc85ac201, 0x5d4753b4), + TOBN(0xc7a6caac, 0x658e099f), TOBN(0xcf46369e, 0x04b27390), + TOBN(0xe2e7d049, 0x506467ea), TOBN(0x481b63a2, 0x37cdeccc), + TOBN(0x4029abd8, 0xed80143a), TOBN(0x28bfe3c7, 0xbcb00b88), + TOBN(0x3bec1009, 0x0643d84a), TOBN(0x885f3668, 0xabd11041), + TOBN(0xdb02432c, 0xf83a34d6), TOBN(0x32f7b360, 0x719ceebe), + TOBN(0xf06c7837, 0xdad1fe7a), TOBN(0x60a157a9, 0x5441a0b0), + TOBN(0x704970e9, 0xe2d47550), TOBN(0xcd2bd553, 0x271b9020), + TOBN(0xff57f82f, 0x33e24a0b), TOBN(0x9cbee23f, 0xf2565079), + TOBN(0x16353427, 0xeb5f5825), TOBN(0x276feec4, 0xe948d662), + TOBN(0xd1b62bc6, 0xda10032b), TOBN(0x718351dd, 0xf0e72a53), + TOBN(0x93452076, 0x2420e7ba), TOBN(0x96368fff, 0x3a00118d), + TOBN(0x00ce2d26, 0x150a49e4), TOBN(0x0c28b636, 0x3f04706b), + TOBN(0xbad65a46, 0x58b196d0), TOBN(0x6c8455fc, 0xec9f8b7c), + TOBN(0xe90c895f, 0x2d71867e), TOBN(0x5c0be31b, 0xedf9f38c), + TOBN(0x2a37a15e, 0xd8f6ec04), TOBN(0x239639e7, 0x8cd85251), + TOBN(0xd8975315, 0x9c7c4c6b), TOBN(0x603aa3c0, 0xd7409af7), + TOBN(0xb8d53d0c, 0x007132fb), TOBN(0x68d12af7, 0xa6849238), + TOBN(0xbe0607e7, 0xbf5d9279), TOBN(0x9aa50055, 0xaada74ce), + TOBN(0xe81079cb, 0xba7e8ccb), TOBN(0x610c71d1, 0xa5f4ff5e), + TOBN(0x9e2ee1a7, 0x5aa07093), TOBN(0xca84004b, 0xa75da47c), + TOBN(0x074d3951, 0x3de75401), TOBN(0xf938f756, 0xbb311592), + TOBN(0x96197618, 0x00a43421), TOBN(0x39a25362, 0x07bc78c8), + TOBN(0x278f710a, 0x0a171276), TOBN(0xb28446ea, 0x8d1a8f08), + TOBN(0x184781bf, 0xe3b6a661), TOBN(0x7751cb1d, 0xe6d279f7), + TOBN(0xf8ff95d6, 0xc59eb662), TOBN(0x186d90b7, 0x58d3dea7), + TOBN(0x0e4bb6c1, 0xdfb4f754), TOBN(0x5c5cf56b, 0x2b2801dc), + TOBN(0xc561e452, 0x1f54564d), TOBN(0xb4fb8c60, 0xf0dd7f13), + TOBN(0xf8849630, 0x33ff98c7), TOBN(0x9619fffa, 0xcf17769c), + TOBN(0xf8090bf6, 0x1bfdd80a), TOBN(0x14d9a149, 0x422cfe63), + TOBN(0xb354c360, 0x6f6df9ea), TOBN(0xdbcf770d, 0x218f17ea), + TOBN(0x207db7c8, 0x79eb3480), TOBN(0x213dbda8, 0x559b6a26), + TOBN(0xac4c200b, 0x29fc81b3), TOBN(0xebc3e09f, 0x171d87c1), + TOBN(0x91799530, 0x1481aa9e), TOBN(0x051b92e1, 0x92e114fa), + TOBN(0xdf8f92e9, 0xecb5537f), TOBN(0x44b1b2cc, 0x290c7483), + TOBN(0xa711455a, 0x2adeb016), TOBN(0x964b6856, 0x81a10c2c), + TOBN(0x4f159d99, 0xcec03623), TOBN(0x05532225, 0xef3271ea), + TOBN(0xb231bea3, 0xc5ee4849), TOBN(0x57a54f50, 0x7094f103), + TOBN(0x3e2d421d, 0x9598b352), TOBN(0xe865a49c, 0x67412ab4), + TOBN(0xd2998a25, 0x1cc3a912), TOBN(0x5d092808, 0x0c74d65d), + TOBN(0x73f45908, 0x4088567a), TOBN(0xeb6b280e, 0x1f214a61), + TOBN(0x8c9adc34, 0xcaf0c13d), TOBN(0x39d12938, 0xf561fb80), + TOBN(0xb2dc3a5e, 0xbc6edfb4), TOBN(0x7485b1b1, 0xfe4d210e), + TOBN(0x062e0400, 0xe186ae72), TOBN(0x91e32d5c, 0x6eeb3b88), + TOBN(0x6df574d7, 0x4be59224), TOBN(0xebc88ccc, 0x716d55f3), + TOBN(0x26c2e6d0, 0xcad6ed33), TOBN(0xc6e21e7d, 0x0d3e8b10), + TOBN(0x2cc5840e, 0x5bcc36bb), TOBN(0x9292445e, 0x7da74f69), + TOBN(0x8be8d321, 0x4e5193a8), TOBN(0x3ec23629, 0x8df06413), + TOBN(0xc7e9ae85, 0xb134defa), TOBN(0x6073b1d0, 0x1bb2d475), + TOBN(0xb9ad615e, 0x2863c00d), TOBN(0x9e29493d, 0x525f4ac4), + TOBN(0xc32b1dea, 0x4e9acf4f), TOBN(0x3e1f01c8, 0xa50db88d), + TOBN(0xb05d70ea, 0x04da916c), TOBN(0x714b0d0a, 0xd865803e), + TOBN(0x4bd493fc, 0x9920cb5e), TOBN(0x5b44b1f7, 0x92c7a3ac), + TOBN(0xa2a77293, 0xbcec9235), TOBN(0x5ee06e87, 0xcd378553), + TOBN(0xceff8173, 0xda621607), TOBN(0x2bb03e4c, 0x99f5d290), + TOBN(0x2945106a, 0xa6f734ac), TOBN(0xb5056604, 0xd25c4732), + TOBN(0x5945920c, 0xe079afee), TOBN(0x686e17a0, 0x6789831f), + TOBN(0x5966bee8, 0xb74a5ae5), TOBN(0x38a673a2, 0x1e258d46), + TOBN(0xbd1cc1f2, 0x83141c95), TOBN(0x3b2ecf4f, 0x0e96e486), + TOBN(0xcd3aa896, 0x74e5fc78), TOBN(0x415ec10c, 0x2482fa7a), + TOBN(0x15234419, 0x80503380), TOBN(0x513d917a, 0xd314b392), + TOBN(0xb0b52f4e, 0x63caecae), TOBN(0x07bf22ad, 0x2dc7780b), + TOBN(0xe761e8a1, 0xe4306839), TOBN(0x1b3be962, 0x5dd7feaa), + TOBN(0x4fe728de, 0x74c778f1), TOBN(0xf1fa0bda, 0x5e0070f6), + TOBN(0x85205a31, 0x6ec3f510), TOBN(0x2c7e4a14, 0xd2980475), + TOBN(0xde3c19c0, 0x6f30ebfd), TOBN(0xdb1c1f38, 0xd4b7e644), + TOBN(0xfe291a75, 0x5dce364a), TOBN(0xb7b22a3c, 0x058f5be3), + TOBN(0x2cd2c302, 0x37fea38c), TOBN(0x2930967a, 0x2e17be17), + TOBN(0x87f009de, 0x0c061c65), TOBN(0xcb014aac, 0xedc6ed44), + TOBN(0x49bd1cb4, 0x3bafb1eb), TOBN(0x81bd8b5c, 0x282d3688), + TOBN(0x1cdab87e, 0xf01a17af), TOBN(0x21f37ac4, 0xe710063b), + TOBN(0x5a6c5676, 0x42fc8193), TOBN(0xf4753e70, 0x56a6015c), + TOBN(0x020f795e, 0xa15b0a44), TOBN(0x8f37c8d7, 0x8958a958), + TOBN(0x63b7e89b, 0xa4b675b5), TOBN(0xb4fb0c0c, 0x0fc31aea), + TOBN(0xed95e639, 0xa7ff1f2e), TOBN(0x9880f5a3, 0x619614fb), + TOBN(0xdeb6ff02, 0x947151ab), TOBN(0x5bc5118c, 0xa868dcdb), + TOBN(0xd8da2055, 0x4c20cea5), TOBN(0xcac2776e, 0x14c4d69a), + TOBN(0xcccb22c1, 0x622d599b), TOBN(0xa4ddb653, 0x68a9bb50), + TOBN(0x2c4ff151, 0x1b4941b4), TOBN(0xe1ff19b4, 0x6efba588), + TOBN(0x35034363, 0xc48345e0), TOBN(0x45542e3d, 0x1e29dfc4), + TOBN(0xf197cb91, 0x349f7aed), TOBN(0x3b2b5a00, 0x8fca8420), + TOBN(0x7c175ee8, 0x23aaf6d8), TOBN(0x54dcf421, 0x35af32b6), + TOBN(0x0ba14307, 0x27d6561e), TOBN(0x879d5ee4, 0xd175b1e2), + TOBN(0xc7c43673, 0x99807db5), TOBN(0x77a54455, 0x9cd55bcd), + TOBN(0xe6c2ff13, 0x0105c072), TOBN(0x18f7a99f, 0x8dda7da4), + TOBN(0x4c301820, 0x0e2d35c1), TOBN(0x06a53ca0, 0xd9cc6c82), + TOBN(0xaa21cc1e, 0xf1aa1d9e), TOBN(0x32414334, 0x4a75b1e8), + TOBN(0x2a6d1328, 0x0ebe9fdc), TOBN(0x16bd173f, 0x98a4755a), + TOBN(0xfbb9b245, 0x2133ffd9), TOBN(0x39a8b2f1, 0x830f1a20), + TOBN(0x484bc97d, 0xd5a1f52a), TOBN(0xd6aebf56, 0xa40eddf8), + TOBN(0x32257acb, 0x76ccdac6), TOBN(0xaf4d36ec, 0x1586ff27), + TOBN(0x8eaa8863, 0xf8de7dd1), TOBN(0x0045d5cf, 0x88647c16)} + , + {TOBN(0xa6f3d574, 0xc005979d), TOBN(0xc2072b42, 0x6a40e350), + TOBN(0xfca5c156, 0x8de2ecf9), TOBN(0xa8c8bf5b, 0xa515344e), + TOBN(0x97aee555, 0x114df14a), TOBN(0xd4374a4d, 0xfdc5ec6b), + TOBN(0x754cc28f, 0x2ca85418), TOBN(0x71cb9e27, 0xd3c41f78), + TOBN(0x89105079, 0x03605c39), TOBN(0xf0843d9e, 0xa142c96c), + TOBN(0xf3744934, 0x16923684), TOBN(0x732caa2f, 0xfa0a2893), + TOBN(0xb2e8c270, 0x61160170), TOBN(0xc32788cc, 0x437fbaa3), + TOBN(0x39cd818e, 0xa6eda3ac), TOBN(0xe2e94239, 0x9e2b2e07), + TOBN(0x6967d39b, 0x0260e52a), TOBN(0xd42585cc, 0x90653325), + TOBN(0x0d9bd605, 0x21ca7954), TOBN(0x4fa20877, 0x81ed57b3), + TOBN(0x60c1eff8, 0xe34a0bbe), TOBN(0x56b0040c, 0x84f6ef64), + TOBN(0x28be2b24, 0xb1af8483), TOBN(0xb2278163, 0xf5531614), + TOBN(0x8df27545, 0x5922ac1c), TOBN(0xa7b3ef5c, 0xa52b3f63), + TOBN(0x8e77b214, 0x71de57c4), TOBN(0x31682c10, 0x834c008b), + TOBN(0xc76824f0, 0x4bd55d31), TOBN(0xb6d1c086, 0x17b61c71), + TOBN(0x31db0903, 0xc2a5089d), TOBN(0x9c092172, 0x184e5d3f), + TOBN(0xdd7ced5b, 0xc00cc638), TOBN(0x1a2015eb, 0x61278fc2), + TOBN(0x2e8e5288, 0x6a37f8d6), TOBN(0xc457786f, 0xe79933ad), + TOBN(0xb3fe4cce, 0x2c51211a), TOBN(0xad9b10b2, 0x24c20498), + TOBN(0x90d87a4f, 0xd28db5e5), TOBN(0x698cd105, 0x3aca2fc3), + TOBN(0x4f112d07, 0xe91b536d), TOBN(0xceb982f2, 0x9eba09d6), + TOBN(0x3c157b2c, 0x197c396f), TOBN(0xe23c2d41, 0x7b66eb24), + TOBN(0x480c57d9, 0x3f330d37), TOBN(0xb3a4c8a1, 0x79108deb), + TOBN(0x702388de, 0xcb199ce5), TOBN(0x0b019211, 0xb944a8d4), + TOBN(0x24f2a692, 0x840bb336), TOBN(0x7c353bdc, 0xa669fa7b), + TOBN(0xda20d6fc, 0xdec9c300), TOBN(0x625fbe2f, 0xa13a4f17), + TOBN(0xa2b1b61a, 0xdbc17328), TOBN(0x008965bf, 0xa9515621), + TOBN(0x49690939, 0xc620ff46), TOBN(0x182dd27d, 0x8717e91c), + TOBN(0x5ace5035, 0xea6c3997), TOBN(0x54259aaa, 0xc2610bef), + TOBN(0xef18bb3f, 0x3c80dd39), TOBN(0x6910b95b, 0x5fc3fa39), + TOBN(0xfce2f510, 0x43e09aee), TOBN(0xced56c9f, 0xa7675665), + TOBN(0x10e265ac, 0xd872db61), TOBN(0x6982812e, 0xae9fce69), + TOBN(0x29be11c6, 0xce800998), TOBN(0x72bb1752, 0xb90360d9), + TOBN(0x2c193197, 0x5a4ad590), TOBN(0x2ba2f548, 0x9fc1dbc0), + TOBN(0x7fe4eebb, 0xe490ebe0), TOBN(0x12a0a4cd, 0x7fae11c0), + TOBN(0x7197cf81, 0xe903ba37), TOBN(0xcf7d4aa8, 0xde1c6dd8), + TOBN(0x92af6bf4, 0x3fd5684c), TOBN(0x2b26eecf, 0x80360aa1), + TOBN(0xbd960f30, 0x00546a82), TOBN(0x407b3c43, 0xf59ad8fe), + TOBN(0x86cae5fe, 0x249c82ba), TOBN(0x9e0faec7, 0x2463744c), + TOBN(0x87f551e8, 0x94916272), TOBN(0x033f9344, 0x6ceb0615), + TOBN(0x1e5eb0d1, 0x8be82e84), TOBN(0x89967f0e, 0x7a582fef), + TOBN(0xbcf687d5, 0xa6e921fa), TOBN(0xdfee4cf3, 0xd37a09ba), + TOBN(0x94f06965, 0xb493c465), TOBN(0x638b9a1c, 0x7635c030), + TOBN(0x76667864, 0x66f05e9f), TOBN(0xccaf6808, 0xc04da725), + TOBN(0xca2eb690, 0x768fccfc), TOBN(0xf402d37d, 0xb835b362), + TOBN(0x0efac0d0, 0xe2fdfcce), TOBN(0xefc9cdef, 0xb638d990), + TOBN(0x2af12b72, 0xd1669a8b), TOBN(0x33c536bc, 0x5774ccbd), + TOBN(0x30b21909, 0xfb34870e), TOBN(0xc38fa2f7, 0x7df25aca), + TOBN(0x74c5f02b, 0xbf81f3f5), TOBN(0x0525a5ae, 0xaf7e4581), + TOBN(0x88d2aaba, 0x433c54ae), TOBN(0xed9775db, 0x806a56c5), + TOBN(0xd320738a, 0xc0edb37d), TOBN(0x25fdb6ee, 0x66cc1f51), + TOBN(0xac661d17, 0x10600d76), TOBN(0x931ec1f3, 0xbdd1ed76), + TOBN(0x65c11d62, 0x19ee43f1), TOBN(0x5cd57c3e, 0x60829d97), + TOBN(0xd26c91a3, 0x984be6e8), TOBN(0xf08d9309, 0x8b0c53bd), + TOBN(0x94bc9e5b, 0xc016e4ea), TOBN(0xd3916839, 0x11d43d2b), + TOBN(0x886c5ad7, 0x73701155), TOBN(0xe0377626, 0x20b00715), + TOBN(0x7f01c9ec, 0xaa80ba59), TOBN(0x3083411a, 0x68538e51), + TOBN(0x970370f1, 0xe88128af), TOBN(0x625cc3db, 0x91dec14b), + TOBN(0xfef9666c, 0x01ac3107), TOBN(0xb2a8d577, 0xd5057ac3), + TOBN(0xb0f26299, 0x92be5df7), TOBN(0xf579c8e5, 0x00353924), + TOBN(0xb8fa3d93, 0x1341ed7a), TOBN(0x4223272c, 0xa7b59d49), + TOBN(0x3dcb1947, 0x83b8c4a4), TOBN(0x4e413c01, 0xed1302e4), + TOBN(0x6d999127, 0xe17e44ce), TOBN(0xee86bf75, 0x33b3adfb), + TOBN(0xf6902fe6, 0x25aa96ca), TOBN(0xb73540e4, 0xe5aae47d), + TOBN(0x32801d7b, 0x1b4a158c), TOBN(0xe571c99e, 0x27e2a369), + TOBN(0x40cb76c0, 0x10d9f197), TOBN(0xc308c289, 0x3167c0ae), + TOBN(0xa6ef9dd3, 0xeb7958f2), TOBN(0xa7226dfc, 0x300879b1), + TOBN(0x6cd0b362, 0x7edf0636), TOBN(0x4efbce6c, 0x7bc37eed), + TOBN(0x75f92a05, 0x8d699021), TOBN(0x586d4c79, 0x772566e3), + TOBN(0x378ca5f1, 0x761ad23a), TOBN(0x650d86fc, 0x1465a8ac), + TOBN(0x7a4ed457, 0x842ba251), TOBN(0x6b65e3e6, 0x42234933), + TOBN(0xaf1543b7, 0x31aad657), TOBN(0xa4cefe98, 0xcbfec369), + TOBN(0xb587da90, 0x9f47befb), TOBN(0x6562e9fb, 0x41312d13), + TOBN(0xa691ea59, 0xeff1cefe), TOBN(0xcc30477a, 0x05fc4cf6), + TOBN(0xa1632461, 0x0b0ffd3d), TOBN(0xa1f16f3b, 0x5b355956), + TOBN(0x5b148d53, 0x4224ec24), TOBN(0xdc834e7b, 0xf977012a), + TOBN(0x7bfc5e75, 0xb2c69dbc), TOBN(0x3aa77a29, 0x03c3da6c), + TOBN(0xde0df03c, 0xca910271), TOBN(0xcbd5ca4a, 0x7806dc55), + TOBN(0xe1ca5807, 0x6db476cb), TOBN(0xfde15d62, 0x5f37a31e), + TOBN(0xf49af520, 0xf41af416), TOBN(0x96c5c5b1, 0x7d342db5), + TOBN(0x155c43b7, 0xeb4ceb9b), TOBN(0x2e993010, 0x4e77371a), + TOBN(0x1d2987da, 0x675d43af), TOBN(0xef2bc1c0, 0x8599fd72), + TOBN(0x96894b7b, 0x9342f6b2), TOBN(0x201eadf2, 0x7c8e71f0), + TOBN(0xf3479d9f, 0x4a1f3efc), TOBN(0xe0f8a742, 0x702a9704), + TOBN(0xeafd44b6, 0xb3eba40c), TOBN(0xf9739f29, 0xc1c1e0d0), + TOBN(0x0091471a, 0x619d505e), TOBN(0xc15f9c96, 0x9d7c263e), + TOBN(0x5be47285, 0x83afbe33), TOBN(0xa3b6d6af, 0x04f1e092), + TOBN(0xe76526b9, 0x751a9d11), TOBN(0x2ec5b26d, 0x9a4ae4d2), + TOBN(0xeb66f4d9, 0x02f6fb8d), TOBN(0x4063c561, 0x96912164), + TOBN(0xeb7050c1, 0x80ef3000), TOBN(0x288d1c33, 0xeaa5b3f0), + TOBN(0xe87c68d6, 0x07806fd8), TOBN(0xb2f7f9d5, 0x4bbbf50f), + TOBN(0x25972f3a, 0xac8d6627), TOBN(0xf8547774, 0x10e8c13b), + TOBN(0xcc50ef6c, 0x872b4a60), TOBN(0xab2a34a4, 0x4613521b), + TOBN(0x39c5c190, 0x983e15d1), TOBN(0x61dde5df, 0x59905512), + TOBN(0xe417f621, 0x9f2275f3), TOBN(0x0750c8b6, 0x451d894b), + TOBN(0x75b04ab9, 0x78b0bdaa), TOBN(0x3bfd9fd4, 0x458589bd), + TOBN(0xf1013e30, 0xee9120b6), TOBN(0x2b51af93, 0x23a4743e), + TOBN(0xea96ffae, 0x48d14d9e), TOBN(0x71dc0dbe, 0x698a1d32), + TOBN(0x914962d2, 0x0180cca4), TOBN(0x1ae60677, 0xc3568963), + TOBN(0x8cf227b1, 0x437bc444), TOBN(0xc650c83b, 0xc9962c7a), + TOBN(0x23c2c7dd, 0xfe7ccfc4), TOBN(0xf925c89d, 0x1b929d48), + TOBN(0x4460f74b, 0x06783c33), TOBN(0xac2c8d49, 0xa590475a), + TOBN(0xfb40b407, 0xb807bba0), TOBN(0x9d1e362d, 0x69ff8f3a), + TOBN(0xa33e9681, 0xcbef64a4), TOBN(0x67ece5fa, 0x332fb4b2), + TOBN(0x6900a99b, 0x739f10e3), TOBN(0xc3341ca9, 0xff525925), + TOBN(0xee18a626, 0xa9e2d041), TOBN(0xa5a83685, 0x29580ddd), + TOBN(0xf3470c81, 0x9d7de3cd), TOBN(0xedf02586, 0x2062cf9c), + TOBN(0xf43522fa, 0xc010edb0), TOBN(0x30314135, 0x13a4b1ae), + TOBN(0xc792e02a, 0xdb22b94b), TOBN(0x993d8ae9, 0xa1eaa45b), + TOBN(0x8aad6cd3, 0xcd1e1c63), TOBN(0x89529ca7, 0xc5ce688a), + TOBN(0x2ccee3aa, 0xe572a253), TOBN(0xe02b6438, 0x02a21efb), + TOBN(0xa7091b6e, 0xc9430358), TOBN(0x06d1b1fa, 0x9d7db504), + TOBN(0x58846d32, 0xc4744733), TOBN(0x40517c71, 0x379f9e34), + TOBN(0x2f65655f, 0x130ef6ca), TOBN(0x526e4488, 0xf1f3503f), + TOBN(0x8467bd17, 0x7ee4a976), TOBN(0x1d9dc913, 0x921363d1), + TOBN(0xd8d24c33, 0xb069e041), TOBN(0x5eb5da0a, 0x2cdf7f51), + TOBN(0x1c0f3cb1, 0x197b994f), TOBN(0x3c95a6c5, 0x2843eae9), + TOBN(0x7766ffc9, 0xa6097ea5), TOBN(0x7bea4093, 0xd723b867), + TOBN(0xb48e1f73, 0x4db378f9), TOBN(0x70025b00, 0xe37b77ac), + TOBN(0x943dc8e7, 0xaf24ad46), TOBN(0xb98a15ac, 0x16d00a85), + TOBN(0x3adc38ba, 0x2743b004), TOBN(0xb1c7f4f7, 0x334415ee), + TOBN(0xea43df8f, 0x1e62d05a), TOBN(0x32618905, 0x9d76a3b6), + TOBN(0x2fbd0bb5, 0xa23a0f46), TOBN(0x5bc971db, 0x6a01918c), + TOBN(0x7801d94a, 0xb4743f94), TOBN(0xb94df65e, 0x676ae22b), + TOBN(0xaafcbfab, 0xaf95894c), TOBN(0x7b9bdc07, 0x276b2241), + TOBN(0xeaf98362, 0x5bdda48b), TOBN(0x5977faf2, 0xa3fcb4df), + TOBN(0xbed042ef, 0x052c4b5b), TOBN(0x9fe87f71, 0x067591f0), + TOBN(0xc89c73ca, 0x22f24ec7), TOBN(0x7d37fa9e, 0xe64a9f1b), + TOBN(0x2710841a, 0x15562627), TOBN(0x2c01a613, 0xc243b034), + TOBN(0x1d135c56, 0x2bc68609), TOBN(0xc2ca1715, 0x8b03f1f6), + TOBN(0xc9966c2d, 0x3eb81d82), TOBN(0xc02abf4a, 0x8f6df13e), + TOBN(0x77b34bd7, 0x8f72b43b), TOBN(0xaff6218f, 0x360c82b0), + TOBN(0x0aa5726c, 0x8d55b9d2), TOBN(0xdc0adbe9, 0x99e9bffb), + TOBN(0x9097549c, 0xefb9e72a), TOBN(0x16755712, 0x9dfb3111), + TOBN(0xdd8bf984, 0xf26847f9), TOBN(0xbcb8e387, 0xdfb30cb7), + TOBN(0xc1fd32a7, 0x5171ef9c), TOBN(0x977f3fc7, 0x389b363f), + TOBN(0x116eaf2b, 0xf4babda0), TOBN(0xfeab68bd, 0xf7113c8e), + TOBN(0xd1e3f064, 0xb7def526), TOBN(0x1ac30885, 0xe0b3fa02), + TOBN(0x1c5a6e7b, 0x40142d9d), TOBN(0x839b5603, 0x30921c0b), + TOBN(0x48f301fa, 0x36a116a3), TOBN(0x380e1107, 0xcfd9ee6d), + TOBN(0x7945ead8, 0x58854be1), TOBN(0x4111c12e, 0xcbd4d49d), + TOBN(0xece3b1ec, 0x3a29c2ef), TOBN(0x6356d404, 0x8d3616f5), + TOBN(0x9f0d6a8f, 0x594d320e), TOBN(0x0989316d, 0xf651ccd2), + TOBN(0x6c32117a, 0x0f8fdde4), TOBN(0x9abe5cc5, 0xa26a9bbc), + TOBN(0xcff560fb, 0x9723f671), TOBN(0x21b2a12d, 0x7f3d593c), + TOBN(0xe4cb18da, 0x24ba0696), TOBN(0x186e2220, 0xc3543384), + TOBN(0x722f64e0, 0x88312c29), TOBN(0x94282a99, 0x17dc7752), + TOBN(0x62467bbf, 0x5a85ee89), TOBN(0xf435c650, 0xf10076a0), + TOBN(0xc9ff1539, 0x43b3a50b), TOBN(0x7132130c, 0x1a53efbc), + TOBN(0x31bfe063, 0xf7b0c5b7), TOBN(0xb0179a7d, 0x4ea994cc), + TOBN(0x12d064b3, 0xc85f455b), TOBN(0x47259328, 0x8f6e0062), + TOBN(0xf64e590b, 0xb875d6d9), TOBN(0x22dd6225, 0xad92bcc7), + TOBN(0xb658038e, 0xb9c3bd6d), TOBN(0x00cdb0d6, 0xfbba27c8), + TOBN(0x0c681337, 0x1062c45d), TOBN(0xd8515b8c, 0x2d33407d), + TOBN(0xcb8f699e, 0x8cbb5ecf), TOBN(0x8c4347f8, 0xc608d7d8), + TOBN(0x2c11850a, 0xbb3e00db), TOBN(0x20a8dafd, 0xecb49d19), + TOBN(0xbd781480, 0x45ee2f40), TOBN(0x75e354af, 0x416b60cf), + TOBN(0xde0b58a1, 0x8d49a8c4), TOBN(0xe40e94e2, 0xfa359536), + TOBN(0xbd4fa59f, 0x62accd76), TOBN(0x05cf466a, 0x8c762837), + TOBN(0xb5abda99, 0x448c277b), TOBN(0x5a9e01bf, 0x48b13740), + TOBN(0x9d457798, 0x326aad8d), TOBN(0xbdef4954, 0xc396f7e7), + TOBN(0x6fb274a2, 0xc253e292), TOBN(0x2800bf0a, 0x1cfe53e7), + TOBN(0x22426d31, 0x44438fd4), TOBN(0xef233923, 0x5e259f9a), + TOBN(0x4188503c, 0x03f66264), TOBN(0x9e5e7f13, 0x7f9fdfab), + TOBN(0x565eb76c, 0x5fcc1aba), TOBN(0xea632548, 0x59b5bff8), + TOBN(0x5587c087, 0xaab6d3fa), TOBN(0x92b639ea, 0x6ce39c1b), + TOBN(0x0706e782, 0x953b135c), TOBN(0x7308912e, 0x425268ef), + TOBN(0x599e92c7, 0x090e7469), TOBN(0x83b90f52, 0x9bc35e75), + TOBN(0x4750b3d0, 0x244975b3), TOBN(0xf3a44358, 0x11965d72), + TOBN(0x179c6774, 0x9c8dc751), TOBN(0xff18cdfe, 0xd23d9ff0), + TOBN(0xc4013833, 0x2028e247), TOBN(0x96e280e2, 0xf3bfbc79), + TOBN(0xf60417bd, 0xd0880a84), TOBN(0x263c9f3d, 0x2a568151), + TOBN(0x36be15b3, 0x2d2ce811), TOBN(0x846dc0c2, 0xf8291d21), + TOBN(0x5cfa0ecb, 0x789fcfdb), TOBN(0x45a0beed, 0xd7535b9a), + TOBN(0xec8e9f07, 0x96d69af1), TOBN(0x31a7c5b8, 0x599ab6dc), + TOBN(0xd36d45ef, 0xf9e2e09f), TOBN(0x3cf49ef1, 0xdcee954b), + TOBN(0x6be34cf3, 0x086cff9b), TOBN(0x88dbd491, 0x39a3360f), + TOBN(0x1e96b8cc, 0x0dbfbd1d), TOBN(0xc1e5f7bf, 0xcb7e2552), + TOBN(0x0547b214, 0x28819d98), TOBN(0xc770dd9c, 0x7aea9dcb), + TOBN(0xaef0d4c7, 0x041d68c8), TOBN(0xcc2b9818, 0x13cb9ba8), + TOBN(0x7fc7bc76, 0xfe86c607), TOBN(0x6b7b9337, 0x502a9a95), + TOBN(0x1948dc27, 0xd14dab63), TOBN(0x249dd198, 0xdae047be), + TOBN(0xe8356584, 0xa981a202), TOBN(0x3531dd18, 0x3a893387), + TOBN(0x1be11f90, 0xc85c7209), TOBN(0x93d2fe1e, 0xe2a52b5a), + TOBN(0x8225bfe2, 0xec6d6b97), TOBN(0x9cf6d6f4, 0xbd0aa5de), + TOBN(0x911459cb, 0x54779f5f), TOBN(0x5649cddb, 0x86aeb1f3), + TOBN(0x32133579, 0x3f26ce5a), TOBN(0xc289a102, 0x550f431e), + TOBN(0x559dcfda, 0x73b84c6f), TOBN(0x84973819, 0xee3ac4d7), + TOBN(0xb51e55e6, 0xf2606a82), TOBN(0xe25f7061, 0x90f2fb57), + TOBN(0xacef6c2a, 0xb1a4e37c), TOBN(0x864e359d, 0x5dcf2706), + TOBN(0x479e6b18, 0x7ce57316), TOBN(0x2cab2500, 0x3a96b23d), + TOBN(0xed489862, 0x8ef16df7), TOBN(0x2056538c, 0xef3758b5), + TOBN(0xa7df865e, 0xf15d3101), TOBN(0x80c5533a, 0x61b553d7), + TOBN(0x366e1997, 0x4ed14294), TOBN(0x6620741f, 0xb3c0bcd6), + TOBN(0x21d1d9c4, 0xedc45418), TOBN(0x005b859e, 0xc1cc4a9d), + TOBN(0xdf01f630, 0xa1c462f0), TOBN(0x15d06cf3, 0xf26820c7), + TOBN(0x9f7f24ee, 0x3484be47), TOBN(0x2ff33e96, 0x4a0c902f), + TOBN(0x00bdf457, 0x5a0bc453), TOBN(0x2378dfaf, 0x1aa238db), + TOBN(0x272420ec, 0x856720f2), TOBN(0x2ad9d95b, 0x96797291), + TOBN(0xd1242cc6, 0x768a1558), TOBN(0x2e287f8b, 0x5cc86aa8), + TOBN(0x796873d0, 0x990cecaa), TOBN(0xade55f81, 0x675d4080), + TOBN(0x2645eea3, 0x21f0cd84), TOBN(0x7a1efa0f, 0xb4e17d02), + TOBN(0xf6858420, 0x037cc061), TOBN(0x682e05f0, 0xd5d43e12), + TOBN(0x59c36994, 0x27218710), TOBN(0x85cbba4d, 0x3f7cd2fc), + TOBN(0x726f9729, 0x7a3cd22a), TOBN(0x9f8cd5dc, 0x4a628397), + TOBN(0x17b93ab9, 0xc23165ed), TOBN(0xff5f5dbf, 0x122823d4), + TOBN(0xc1e4e4b5, 0x654a446d), TOBN(0xd1a9496f, 0x677257ba), + TOBN(0x6387ba94, 0xde766a56), TOBN(0x23608bc8, 0x521ec74a), + TOBN(0x16a522d7, 0x6688c4d4), TOBN(0x9d6b4282, 0x07373abd), + TOBN(0xa62f07ac, 0xb42efaa3), TOBN(0xf73e00f7, 0xe3b90180), + TOBN(0x36175fec, 0x49421c3e), TOBN(0xc4e44f9b, 0x3dcf2678), + TOBN(0x76df436b, 0x7220f09f), TOBN(0x172755fb, 0x3aa8b6cf), + TOBN(0xbab89d57, 0x446139cc), TOBN(0x0a0a6e02, 0x5fe0208f), + TOBN(0xcdbb63e2, 0x11e5d399), TOBN(0x33ecaa12, 0xa8977f0b), + TOBN(0x59598b21, 0xf7c42664), TOBN(0xb3e91b32, 0xab65d08a), + TOBN(0x035822ee, 0xf4502526), TOBN(0x1dcf0176, 0x720a82a9), + TOBN(0x50f8598f, 0x3d589e02), TOBN(0xdf0478ff, 0xb1d63d2c), + TOBN(0x8b8068bd, 0x1571cd07), TOBN(0x30c3aa4f, 0xd79670cd), + TOBN(0x25e8fd4b, 0x941ade7f), TOBN(0x3d1debdc, 0x32790011), + TOBN(0x65b6dcbd, 0x3a3f9ff0), TOBN(0x282736a4, 0x793de69c), + TOBN(0xef69a0c3, 0xd41d3bd3), TOBN(0xb533b8c9, 0x07a26bde), + TOBN(0xe2801d97, 0xdb2edf9f), TOBN(0xdc4a8269, 0xe1877af0), + TOBN(0x6c1c5851, 0x3d590dbe), TOBN(0x84632f6b, 0xee4e9357), + TOBN(0xd36d36b7, 0x79b33374), TOBN(0xb46833e3, 0x9bbca2e6), + TOBN(0x37893913, 0xf7fc0586), TOBN(0x385315f7, 0x66bf4719), + TOBN(0x72c56293, 0xb31855dc), TOBN(0xd1416d4e, 0x849061fe), + TOBN(0xbeb3ab78, 0x51047213), TOBN(0x447f6e61, 0xf040c996), + TOBN(0xd06d310d, 0x638b1d0c), TOBN(0xe28a413f, 0xbad1522e), + TOBN(0x685a76cb, 0x82003f86), TOBN(0x610d07f7, 0x0bcdbca3), + TOBN(0x6ff66021, 0x9ca4c455), TOBN(0x7df39b87, 0xcea10eec), + TOBN(0xb9255f96, 0xe22db218), TOBN(0x8cc6d9eb, 0x08a34c44), + TOBN(0xcd4ffb86, 0x859f9276), TOBN(0x8fa15eb2, 0x50d07335), + TOBN(0xdf553845, 0xcf2c24b5), TOBN(0x89f66a9f, 0x52f9c3ba), + TOBN(0x8f22b5b9, 0xe4a7ceb3), TOBN(0xaffef809, 0x0e134686), + TOBN(0x3e53e1c6, 0x8eb8fac2), TOBN(0x93c1e4eb, 0x28aec98e), + TOBN(0xb6b91ec5, 0x32a43bcb), TOBN(0x2dbfa947, 0xb2d74a51), + TOBN(0xe065d190, 0xca84bad7), TOBN(0xfb13919f, 0xad58e65c), + TOBN(0x3c41718b, 0xf1cb6e31), TOBN(0x688969f0, 0x06d05c3f), + TOBN(0xd4f94ce7, 0x21264d45), TOBN(0xfdfb65e9, 0x7367532b), + TOBN(0x5b1be8b1, 0x0945a39d), TOBN(0x229f789c, 0x2b8baf3b), + TOBN(0xd8f41f3e, 0x6f49f15d), TOBN(0x678ce828, 0x907f0792), + TOBN(0xc69ace82, 0xfca6e867), TOBN(0x106451ae, 0xd01dcc89), + TOBN(0x1bb4f7f0, 0x19fc32d2), TOBN(0x64633dfc, 0xb00c52d2), + TOBN(0x8f13549a, 0xad9ea445), TOBN(0x99a3bf50, 0xfb323705), + TOBN(0x0c9625a2, 0x534d4dbc), TOBN(0x45b8f1d1, 0xc2a2fea3), + TOBN(0x76ec21a1, 0xa530fc1a), TOBN(0x4bac9c2a, 0x9e5bd734), + TOBN(0x5996d76a, 0x7b4e3587), TOBN(0x0045cdee, 0x1182d9e3), + TOBN(0x1aee24b9, 0x1207f13d), TOBN(0x66452e97, 0x97345a41), + TOBN(0x16e5b054, 0x9f950cd0), TOBN(0x9cc72fb1, 0xd7fdd075), + TOBN(0x6edd61e7, 0x66249663), TOBN(0xde4caa4d, 0xf043cccb), + TOBN(0x11b1f57a, 0x55c7ac17), TOBN(0x779cbd44, 0x1a85e24d), + TOBN(0x78030f86, 0xe46081e7), TOBN(0xfd4a6032, 0x8e20f643), + TOBN(0xcc7a6488, 0x0a750c0f), TOBN(0x39bacfe3, 0x4e548e83), + TOBN(0x3d418c76, 0x0c110f05), TOBN(0x3e4daa4c, 0xb1f11588), + TOBN(0x2733e7b5, 0x5ffc69ff), TOBN(0x46f147bc, 0x92053127), + TOBN(0x885b2434, 0xd722df94), TOBN(0x6a444f65, 0xe6fc6b7c)} + , + {TOBN(0x7a1a465a, 0xc3f16ea8), TOBN(0x115a461d, 0xb2f1d11c), + TOBN(0x4767dd95, 0x6c68a172), TOBN(0x3392f2eb, 0xd13a4698), + TOBN(0xc7a99ccd, 0xe526cdc7), TOBN(0x8e537fdc, 0x22292b81), + TOBN(0x76d8cf69, 0xa6d39198), TOBN(0xffc5ff43, 0x2446852d), + TOBN(0x97b14f7e, 0xa90567e6), TOBN(0x513257b7, 0xb6ae5cb7), + TOBN(0x85454a3c, 0x9f10903d), TOBN(0xd8d2c9ad, 0x69bc3724), + TOBN(0x38da9324, 0x6b29cb44), TOBN(0xb540a21d, 0x77c8cbac), + TOBN(0x9bbfe435, 0x01918e42), TOBN(0xfffa707a, 0x56c3614e), + TOBN(0x0ce4e3f1, 0xd4e353b7), TOBN(0x062d8a14, 0xef46b0a0), + TOBN(0x6408d5ab, 0x574b73fd), TOBN(0xbc41d1c9, 0xd3273ffd), + TOBN(0x3538e1e7, 0x6be77800), TOBN(0x71fe8b37, 0xc5655031), + TOBN(0x1cd91621, 0x6b9b331a), TOBN(0xad825d0b, 0xbb388f73), + TOBN(0x56c2e05b, 0x1cb76219), TOBN(0x0ec0bf91, 0x71567e7e), + TOBN(0xe7076f86, 0x61c4c910), TOBN(0xd67b085b, 0xbabc04d9), + TOBN(0x9fb90459, 0x5e93a96a), TOBN(0x7526c1ea, 0xfbdc249a), + TOBN(0x0d44d367, 0xecdd0bb7), TOBN(0x95399917, 0x9dc0d695), + TOBN(0x61360ee9, 0x9e240d18), TOBN(0x057cdcac, 0xb4b94466), + TOBN(0xe7667cd1, 0x2fe5325c), TOBN(0x1fa297b5, 0x21974e3b), + TOBN(0xfa4081e7, 0xdb083d76), TOBN(0x31993be6, 0xf206bd15), + TOBN(0x8949269b, 0x14c19f8c), TOBN(0x21468d72, 0xa9d92357), + TOBN(0x2ccbc583, 0xa4c506ec), TOBN(0x957ed188, 0xd1acfe97), + TOBN(0x8baed833, 0x12f1aea2), TOBN(0xef2a6cb4, 0x8325362d), + TOBN(0x130dde42, 0x8e195c43), TOBN(0xc842025a, 0x0e6050c6), + TOBN(0x2da972a7, 0x08686a5d), TOBN(0xb52999a1, 0xe508b4a8), + TOBN(0xd9f090b9, 0x10a5a8bd), TOBN(0xca91d249, 0x096864da), + TOBN(0x8e6a93be, 0x3f67dbc1), TOBN(0xacae6fba, 0xf5f4764c), + TOBN(0x1563c6e0, 0xd21411a0), TOBN(0x28fa787f, 0xda0a4ad8), + TOBN(0xd524491c, 0x908c8030), TOBN(0x1257ba0e, 0x4c795f07), + TOBN(0x83f49167, 0xceca9754), TOBN(0x426d2cf6, 0x4b7939a0), + TOBN(0x2555e355, 0x723fd0bf), TOBN(0xa96e6d06, 0xc4f144e2), + TOBN(0x4768a8dd, 0x87880e61), TOBN(0x15543815, 0xe508e4d5), + TOBN(0x09d7e772, 0xb1b65e15), TOBN(0x63439dd6, 0xac302fa0), + TOBN(0xb93f802f, 0xc14e35c2), TOBN(0x71735b7c, 0x4341333c), + TOBN(0x03a25104, 0x16d4f362), TOBN(0x3f4d069b, 0xbf433c8e), + TOBN(0x0d83ae01, 0xf78f5a7c), TOBN(0x50a8ffbe, 0x7c4eed07), + TOBN(0xc74f8906, 0x76e10f83), TOBN(0x7d080966, 0x9ddaf8e1), + TOBN(0xb11df8e1, 0x698e04cc), TOBN(0x877be203, 0x169005c8), + TOBN(0x32749e8c, 0x4f3c6179), TOBN(0x2dbc9d0a, 0x7853fc05), + TOBN(0x187d4f93, 0x9454d937), TOBN(0xe682ce9d, 0xb4800e1b), + TOBN(0xa9129ad8, 0x165e68e8), TOBN(0x0fe29735, 0xbe7f785b), + TOBN(0x5303f40c, 0x5b9e02b7), TOBN(0xa37c9692, 0x35ee04e8), + TOBN(0x5f46cc20, 0x34d6632b), TOBN(0x55ef72b2, 0x96ac545b), + TOBN(0xabec5c1f, 0x7b91b062), TOBN(0x0a79e1c7, 0xbb33e821), + TOBN(0xbb04b428, 0x3a9f4117), TOBN(0x0de1f28f, 0xfd2a475a), + TOBN(0x31019ccf, 0x3a4434b4), TOBN(0xa3458111, 0x1a7954dc), + TOBN(0xa9dac80d, 0xe34972a7), TOBN(0xb043d054, 0x74f6b8dd), + TOBN(0x021c319e, 0x11137b1a), TOBN(0x00a754ce, 0xed5cc03f), + TOBN(0x0aa2c794, 0xcbea5ad4), TOBN(0x093e67f4, 0x70c015b6), + TOBN(0x72cdfee9, 0xc97e3f6b), TOBN(0xc10bcab4, 0xb6da7461), + TOBN(0x3b02d2fc, 0xb59806b9), TOBN(0x85185e89, 0xa1de6f47), + TOBN(0x39e6931f, 0x0eb6c4d4), TOBN(0x4d4440bd, 0xd4fa5b04), + TOBN(0x5418786e, 0x34be7eb8), TOBN(0x6380e521, 0x9d7259bc), + TOBN(0x20ac0351, 0xd598d710), TOBN(0x272c4166, 0xcb3a4da4), + TOBN(0xdb82fe1a, 0xca71de1f), TOBN(0x746e79f2, 0xd8f54b0f), + TOBN(0x6e7fc736, 0x4b573e9b), TOBN(0x75d03f46, 0xfd4b5040), + TOBN(0x5c1cc36d, 0x0b98d87b), TOBN(0x513ba3f1, 0x1f472da1), + TOBN(0x79d0af26, 0xabb177dd), TOBN(0xf82ab568, 0x7891d564), + TOBN(0x2b6768a9, 0x72232173), TOBN(0xefbb3bb0, 0x8c1f6619), + TOBN(0xb29c11db, 0xa6d18358), TOBN(0x519e2797, 0xb0916d3a), + TOBN(0xd4dc18f0, 0x9188e290), TOBN(0x648e86e3, 0x98b0ca7f), + TOBN(0x859d3145, 0x983c38b5), TOBN(0xb14f176c, 0x637abc8b), + TOBN(0x2793fb9d, 0xcaff7be6), TOBN(0xebe5a55f, 0x35a66a5a), + TOBN(0x7cec1dcd, 0x9f87dc59), TOBN(0x7c595cd3, 0xfbdbf560), + TOBN(0x5b543b22, 0x26eb3257), TOBN(0x69080646, 0xc4c935fd), + TOBN(0x7f2e4403, 0x81e9ede3), TOBN(0x243c3894, 0xcaf6df0a), + TOBN(0x7c605bb1, 0x1c073b11), TOBN(0xcd06a541, 0xba6a4a62), + TOBN(0x29168949, 0x49d4e2e5), TOBN(0x33649d07, 0x4af66880), + TOBN(0xbfc0c885, 0xe9a85035), TOBN(0xb4e52113, 0xfc410f4b), + TOBN(0xdca3b706, 0x78a6513b), TOBN(0x92ea4a2a, 0x9edb1943), + TOBN(0x02642216, 0xdb6e2dd8), TOBN(0x9b45d0b4, 0x9fd57894), + TOBN(0x114e70db, 0xc69d11ae), TOBN(0x1477dd19, 0x4c57595f), + TOBN(0xbc2208b4, 0xec77c272), TOBN(0x95c5b4d7, 0xdb68f59c), + TOBN(0xb8c4fc63, 0x42e532b7), TOBN(0x386ba422, 0x9ae35290), + TOBN(0xfb5dda42, 0xd201ecbc), TOBN(0x2353dc8b, 0xa0e38fd6), + TOBN(0x9a0b85ea, 0x68f7e978), TOBN(0x96ec5682, 0x2ad6d11f), + TOBN(0x5e279d6c, 0xe5f6886d), TOBN(0xd3fe03cd, 0x3cb1914d), + TOBN(0xfe541fa4, 0x7ea67c77), TOBN(0x952bd2af, 0xe3ea810c), + TOBN(0x791fef56, 0x8d01d374), TOBN(0xa3a1c621, 0x0f11336e), + TOBN(0x5ad0d5a9, 0xc7ec6d79), TOBN(0xff7038af, 0x3225c342), + TOBN(0x003c6689, 0xbc69601b), TOBN(0x25059bc7, 0x45e8747d), + TOBN(0xfa4965b2, 0xf2086fbf), TOBN(0xf6840ea6, 0x86916078), + TOBN(0xd7ac7620, 0x70081d6c), TOBN(0xe600da31, 0xb5328645), + TOBN(0x01916f63, 0x529b8a80), TOBN(0xe80e4858, 0x2d7d6f3e), + TOBN(0x29eb0fe8, 0xd664ca7c), TOBN(0xf017637b, 0xe7b43b0c), + TOBN(0x9a75c806, 0x76cb2566), TOBN(0x8f76acb1, 0xb24892d9), + TOBN(0x7ae7b9cc, 0x1f08fe45), TOBN(0x19ef7329, 0x6a4907d8), + TOBN(0x2db4ab71, 0x5f228bf0), TOBN(0xf3cdea39, 0x817032d7), + TOBN(0x0b1f482e, 0xdcabe3c0), TOBN(0x3baf76b4, 0xbb86325c), + TOBN(0xd49065e0, 0x10089465), TOBN(0x3bab5d29, 0x8e77c596), + TOBN(0x7636c3a6, 0x193dbd95), TOBN(0xdef5d294, 0xb246e499), + TOBN(0xb22c58b9, 0x286b2475), TOBN(0xa0b93939, 0xcd80862b), + TOBN(0x3002c83a, 0xf0992388), TOBN(0x6de01f9b, 0xeacbe14c), + TOBN(0x6aac688e, 0xadd70482), TOBN(0x708de92a, 0x7b4a4e8a), + TOBN(0x75b6dd73, 0x758a6eef), TOBN(0xea4bf352, 0x725b3c43), + TOBN(0x10041f2c, 0x87912868), TOBN(0xb1b1be95, 0xef09297a), + TOBN(0x19ae23c5, 0xa9f3860a), TOBN(0xc4f0f839, 0x515dcf4b), + TOBN(0x3c7ecca3, 0x97f6306a), TOBN(0x744c44ae, 0x68a3a4b0), + TOBN(0x69cd13a0, 0xb3a1d8a2), TOBN(0x7cad0a1e, 0x5256b578), + TOBN(0xea653fcd, 0x33791d9e), TOBN(0x9cc2a05d, 0x74b2e05f), + TOBN(0x73b391dc, 0xfd7affa2), TOBN(0xddb7091e, 0xb6b05442), + TOBN(0xc71e27bf, 0x8538a5c6), TOBN(0x195c63dd, 0x89abff17), + TOBN(0xfd315285, 0x1b71e3da), TOBN(0x9cbdfda7, 0xfa680fa0), + TOBN(0x9db876ca, 0x849d7eab), TOBN(0xebe2764b, 0x3c273271), + TOBN(0x663357e3, 0xf208dcea), TOBN(0x8c5bd833, 0x565b1b70), + TOBN(0xccc3b4f5, 0x9837fc0d), TOBN(0x9b641ba8, 0xa79cf00f), + TOBN(0x7428243d, 0xdfdf3990), TOBN(0x83a594c4, 0x020786b1), + TOBN(0xb712451a, 0x526c4502), TOBN(0x9d39438e, 0x6adb3f93), + TOBN(0xfdb261e3, 0xe9ff0ccd), TOBN(0x80344e3c, 0xe07af4c3), + TOBN(0x75900d7c, 0x2fa4f126), TOBN(0x08a3b865, 0x5c99a232), + TOBN(0x2478b6bf, 0xdb25e0c3), TOBN(0x482cc2c2, 0x71db2edf), + TOBN(0x37df7e64, 0x5f321bb8), TOBN(0x8a93821b, 0x9a8005b4), + TOBN(0x3fa2f10c, 0xcc8c1958), TOBN(0x0d332218, 0x2c269d0a), + TOBN(0x20ab8119, 0xe246b0e6), TOBN(0xb39781e4, 0xd349fd17), + TOBN(0xd293231e, 0xb31aa100), TOBN(0x4b779c97, 0xbb032168), + TOBN(0x4b3f19e1, 0xc8470500), TOBN(0x45b7efe9, 0x0c4c869d), + TOBN(0xdb84f38a, 0xa1a6bbcc), TOBN(0x3b59cb15, 0xb2fddbc1), + TOBN(0xba5514df, 0x3fd165e8), TOBN(0x499fd6a9, 0x061f8811), + TOBN(0x72cd1fe0, 0xbfef9f00), TOBN(0x120a4bb9, 0x79ad7e8a), + TOBN(0xf2ffd095, 0x5f4a5ac5), TOBN(0xcfd174f1, 0x95a7a2f0), + TOBN(0xd42301ba, 0x9d17baf1), TOBN(0xd2fa487a, 0x77f22089), + TOBN(0x9cb09efe, 0xb1dc77e1), TOBN(0xe9566939, 0x21c99682), + TOBN(0x8c546901, 0x6c6067bb), TOBN(0xfd378574, 0x61c24456), + TOBN(0x2b6a6cbe, 0x81796b33), TOBN(0x62d550f6, 0x58e87f8b), + TOBN(0x1b763e1c, 0x7f1b01b4), TOBN(0x4b93cfea, 0x1b1b5e12), + TOBN(0xb9345238, 0x1d531696), TOBN(0x57201c00, 0x88cdde69), + TOBN(0xdde92251, 0x9a86afc7), TOBN(0xe3043895, 0xbd35cea8), + TOBN(0x7608c1e1, 0x8555970d), TOBN(0x8267dfa9, 0x2535935e), + TOBN(0xd4c60a57, 0x322ea38b), TOBN(0xe0bf7977, 0x804ef8b5), + TOBN(0x1a0dab28, 0xc06fece4), TOBN(0xd405991e, 0x94e7b49d), + TOBN(0xc542b6d2, 0x706dab28), TOBN(0xcb228da3, 0xa91618fb), + TOBN(0x224e4164, 0x107d1cea), TOBN(0xeb9fdab3, 0xd0f5d8f1), + TOBN(0xc02ba386, 0x0d6e41cd), TOBN(0x676a72c5, 0x9b1f7146), + TOBN(0xffd6dd98, 0x4d6cb00b), TOBN(0xcef9c5ca, 0xde2e8d7c), + TOBN(0xa1bbf5d7, 0x641c7936), TOBN(0x1b95b230, 0xee8f772e), + TOBN(0xf765a92e, 0xe8ac25b1), TOBN(0xceb04cfc, 0x3a18b7c6), + TOBN(0x27944cef, 0x0acc8966), TOBN(0xcbb3c957, 0x434c1004), + TOBN(0x9c9971a1, 0xa43ff93c), TOBN(0x5bc2db17, 0xa1e358a9), + TOBN(0x45b4862e, 0xa8d9bc82), TOBN(0x70ebfbfb, 0x2201e052), + TOBN(0xafdf64c7, 0x92871591), TOBN(0xea5bcae6, 0xb42d0219), + TOBN(0xde536c55, 0x2ad8f03c), TOBN(0xcd6c3f4d, 0xa76aa33c), + TOBN(0xbeb5f623, 0x0bca6de3), TOBN(0xdd20dd99, 0xb1e706fd), + TOBN(0x90b3ff9d, 0xac9059d4), TOBN(0x2d7b2902, 0x7ccccc4e), + TOBN(0x8a090a59, 0xce98840f), TOBN(0xa5d947e0, 0x8410680a), + TOBN(0x49ae346a, 0x923379a5), TOBN(0x7dbc84f9, 0xb28a3156), + TOBN(0xfd40d916, 0x54a1aff2), TOBN(0xabf318ba, 0x3a78fb9b), + TOBN(0x50152ed8, 0x3029f95e), TOBN(0x9fc1dd77, 0xc58ad7fa), + TOBN(0x5fa57915, 0x13595c17), TOBN(0xb9504668, 0x8f62b3a9), + TOBN(0x907b5b24, 0xff3055b0), TOBN(0x2e995e35, 0x9a84f125), + TOBN(0x87dacf69, 0x7e9bbcfb), TOBN(0x95d0c1d6, 0xe86d96e3), + TOBN(0x65726e3c, 0x2d95a75c), TOBN(0x2c3c9001, 0xacd27f21), + TOBN(0x1deab561, 0x6c973f57), TOBN(0x108b7e2c, 0xa5221643), + TOBN(0x5fee9859, 0xc4ef79d4), TOBN(0xbd62b88a, 0x40d4b8c6), + TOBN(0xb4dd29c4, 0x197c75d6), TOBN(0x266a6df2, 0xb7076feb), + TOBN(0x9512d0ea, 0x4bf2df11), TOBN(0x1320c24f, 0x6b0cc9ec), + TOBN(0x6bb1e0e1, 0x01a59596), TOBN(0x8317c5bb, 0xeff9aaac), + TOBN(0x65bb405e, 0x385aa6c9), TOBN(0x613439c1, 0x8f07988f), + TOBN(0xd730049f, 0x16a66e91), TOBN(0xe97f2820, 0xfa1b0e0d), + TOBN(0x4131e003, 0x304c28ea), TOBN(0x820ab732, 0x526bac62), + TOBN(0xb2ac9ef9, 0x28714423), TOBN(0x54ecfffa, 0xadb10cb2), + TOBN(0x8781476e, 0xf886a4cc), TOBN(0x4b2c87b5, 0xdb2f8d49), + TOBN(0xe857cd20, 0x0a44295d), TOBN(0x707d7d21, 0x58c6b044), + TOBN(0xae8521f9, 0xf596757c), TOBN(0x87448f03, 0x67b2b714), + TOBN(0x13a9bc45, 0x5ebcd58d), TOBN(0x79bcced9, 0x9122d3c1), + TOBN(0x3c644247, 0x9e076642), TOBN(0x0cf22778, 0x2df4767d), + TOBN(0x5e61aee4, 0x71d444b6), TOBN(0x211236bf, 0xc5084a1d), + TOBN(0x7e15bc9a, 0x4fd3eaf6), TOBN(0x68df2c34, 0xab622bf5), + TOBN(0x9e674f0f, 0x59bf4f36), TOBN(0xf883669b, 0xd7f34d73), + TOBN(0xc48ac1b8, 0x31497b1d), TOBN(0x323b925d, 0x5106703b), + TOBN(0x22156f42, 0x74082008), TOBN(0xeffc521a, 0xc8482bcb), + TOBN(0x5c6831bf, 0x12173479), TOBN(0xcaa2528f, 0xc4739490), + TOBN(0x84d2102a, 0x8f1b3c4d), TOBN(0xcf64dfc1, 0x2d9bec0d), + TOBN(0x433febad, 0x78a546ef), TOBN(0x1f621ec3, 0x7b73cef1), + TOBN(0x6aecd627, 0x37338615), TOBN(0x162082ab, 0x01d8edf6), + TOBN(0x833a8119, 0x19e86b66), TOBN(0x6023a251, 0xd299b5db), + TOBN(0xf5bb0c3a, 0xbbf04b89), TOBN(0x6735eb69, 0xae749a44), + TOBN(0xd0e058c5, 0x4713de3b), TOBN(0xfdf2593e, 0x2c3d4ccd), + TOBN(0x1b8f414e, 0xfdd23667), TOBN(0xdd52aaca, 0xfa2015ee), + TOBN(0x3e31b517, 0xbd9625ff), TOBN(0x5ec9322d, 0x8db5918c), + TOBN(0xbc73ac85, 0xa96f5294), TOBN(0x82aa5bf3, 0x61a0666a), + TOBN(0x49755810, 0xbf08ac42), TOBN(0xd21cdfd5, 0x891cedfc), + TOBN(0x918cb57b, 0x67f8be10), TOBN(0x365d1a7c, 0x56ffa726), + TOBN(0x2435c504, 0x6532de93), TOBN(0xc0fc5e10, 0x2674cd02), + TOBN(0x6e51fcf8, 0x9cbbb142), TOBN(0x1d436e5a, 0xafc50692), + TOBN(0x766bffff, 0x3fbcae22), TOBN(0x3148c2fd, 0xfd55d3b8), + TOBN(0x52c7fdc9, 0x233222fa), TOBN(0x89ff1092, 0xe419fb6b), + TOBN(0x3cd6db99, 0x25254977), TOBN(0x2e85a161, 0x1cf12ca7), + TOBN(0xadd2547c, 0xdc810bc9), TOBN(0xea3f458f, 0x9d257c22), + TOBN(0x642c1fbe, 0x27d6b19b), TOBN(0xed07e6b5, 0x140481a6), + TOBN(0x6ada1d42, 0x86d2e0f8), TOBN(0xe5920122, 0x0e8a9fd5), + TOBN(0x02c936af, 0x708c1b49), TOBN(0x60f30fee, 0x2b4bfaff), + TOBN(0x6637ad06, 0x858e6a61), TOBN(0xce4c7767, 0x3fd374d0), + TOBN(0x39d54b2d, 0x7188defb), TOBN(0xa8c9d250, 0xf56a6b66), + TOBN(0x58fc0f5e, 0xb24fe1dc), TOBN(0x9eaf9dee, 0x6b73f24c), + TOBN(0xa90d588b, 0x33650705), TOBN(0xde5b62c5, 0xaf2ec729), + TOBN(0x5c72cfae, 0xd3c2b36e), TOBN(0x868c19d5, 0x034435da), + TOBN(0x88605f93, 0xe17ee145), TOBN(0xaa60c4ee, 0x77a5d5b1), + TOBN(0xbcf5bfd2, 0x3b60c472), TOBN(0xaf4ef13c, 0xeb1d3049), + TOBN(0x373f44fc, 0xe13895c9), TOBN(0xf29b382f, 0x0cbc9822), + TOBN(0x1bfcb853, 0x73efaef6), TOBN(0xcf56ac9c, 0xa8c96f40), + TOBN(0xd7adf109, 0x7a191e24), TOBN(0x98035f44, 0xbf8a8dc2), + TOBN(0xf40a71b9, 0x1e750c84), TOBN(0xc57f7b0c, 0x5dc6c469), + TOBN(0x49a0e79c, 0x6fbc19c1), TOBN(0x6b0f5889, 0xa48ebdb8), + TOBN(0x5d3fd084, 0xa07c4e9f), TOBN(0xc3830111, 0xab27de14), + TOBN(0x0e4929fe, 0x33e08dcc), TOBN(0xf4a5ad24, 0x40bb73a3), + TOBN(0xde86c2bf, 0x490f97ca), TOBN(0x288f09c6, 0x67a1ce18), + TOBN(0x364bb886, 0x1844478d), TOBN(0x7840fa42, 0xceedb040), + TOBN(0x1269fdd2, 0x5a631b37), TOBN(0x94761f1e, 0xa47c8b7d), + TOBN(0xfc0c2e17, 0x481c6266), TOBN(0x85e16ea2, 0x3daa5fa7), + TOBN(0xccd86033, 0x92491048), TOBN(0x0c2f6963, 0xf4d402d7), + TOBN(0x6336f7df, 0xdf6a865c), TOBN(0x0a2a463c, 0xb5c02a87), + TOBN(0xb0e29be7, 0xbf2f12ee), TOBN(0xf0a22002, 0x66bad988), + TOBN(0x27f87e03, 0x9123c1d7), TOBN(0x21669c55, 0x328a8c98), + TOBN(0x186b9803, 0x92f14529), TOBN(0xd3d056cc, 0x63954df3), + TOBN(0x2f03fd58, 0x175a46f6), TOBN(0x63e34ebe, 0x11558558), + TOBN(0xe13fedee, 0x5b80cfa5), TOBN(0xe872a120, 0xd401dbd1), + TOBN(0x52657616, 0xe8a9d667), TOBN(0xbc8da4b6, 0xe08d6693), + TOBN(0x370fb9bb, 0x1b703e75), TOBN(0x6773b186, 0xd4338363), + TOBN(0x18dad378, 0xecef7bff), TOBN(0xaac787ed, 0x995677da), + TOBN(0x4801ea8b, 0x0437164b), TOBN(0xf430ad20, 0x73fe795e), + TOBN(0xb164154d, 0x8ee5eb73), TOBN(0x0884ecd8, 0x108f7c0e), + TOBN(0x0e6ec096, 0x5f520698), TOBN(0x640631fe, 0x44f7b8d9), + TOBN(0x92fd34fc, 0xa35a68b9), TOBN(0x9c5a4b66, 0x4d40cf4e), + TOBN(0x949454bf, 0x80b6783d), TOBN(0x80e701fe, 0x3a320a10), + TOBN(0x8d1a564a, 0x1a0a39b2), TOBN(0x1436d53d, 0x320587db), + TOBN(0xf5096e6d, 0x6556c362), TOBN(0xbc23a3c0, 0xe2455d7e), + TOBN(0x3a7aee54, 0x807230f9), TOBN(0x9ba1cfa6, 0x22ae82fd), + TOBN(0x833a057a, 0x99c5d706), TOBN(0x8be85f4b, 0x842315c9), + TOBN(0xd083179a, 0x66a72f12), TOBN(0x2fc77d5d, 0xcdcc73cd), + TOBN(0x22b88a80, 0x5616ee30), TOBN(0xfb09548f, 0xe7ab1083), + TOBN(0x8ad6ab0d, 0x511270cd), TOBN(0x61f6c57a, 0x6924d9ab), + TOBN(0xa0f7bf72, 0x90aecb08), TOBN(0x849f87c9, 0x0df784a4), + TOBN(0x27c79c15, 0xcfaf1d03), TOBN(0xbbf9f675, 0xc463face), + TOBN(0x91502c65, 0x765ba543), TOBN(0x18ce3cac, 0x42ea60dd), + TOBN(0xe5cee6ac, 0x6e43ecb3), TOBN(0x63e4e910, 0x68f2aeeb), + TOBN(0x26234fa3, 0xc85932ee), TOBN(0x96883e8b, 0x4c90c44d), + TOBN(0x29b9e738, 0xa18a50f6), TOBN(0xbfc62b2a, 0x3f0420df), + TOBN(0xd22a7d90, 0x6d3e1fa9), TOBN(0x17115618, 0xfe05b8a3), + TOBN(0x2a0c9926, 0xbb2b9c01), TOBN(0xc739fcc6, 0xe07e76a2), + TOBN(0x540e9157, 0x165e439a), TOBN(0x06353a62, 0x6a9063d8), + TOBN(0x84d95594, 0x61e927a3), TOBN(0x013b9b26, 0xe2e0be7f), + TOBN(0x4feaec3b, 0x973497f1), TOBN(0x15c0f94e, 0x093ebc2d), + TOBN(0x6af5f227, 0x33af0583), TOBN(0x0c2af206, 0xc61f3340), + TOBN(0xd25dbdf1, 0x4457397c), TOBN(0x2e8ed017, 0xcabcbae0), + TOBN(0xe3010938, 0xc2815306), TOBN(0xbaa99337, 0xe8c6cd68), + TOBN(0x08513182, 0x3b0ec7de), TOBN(0x1e1b822b, 0x58df05df), + TOBN(0x5c14842f, 0xa5c3b683), TOBN(0x98fe977e, 0x3eba34ce), + TOBN(0xfd2316c2, 0x0d5e8873), TOBN(0xe48d839a, 0xbd0d427d), + TOBN(0x495b2218, 0x623fc961), TOBN(0x24ee56e7, 0xb46fba5e), + TOBN(0x9184a55b, 0x91e4de58), TOBN(0xa7488ca5, 0xdfdea288), + TOBN(0xa723862e, 0xa8dcc943), TOBN(0x92d762b2, 0x849dc0fc), + TOBN(0x3c444a12, 0x091ff4a9), TOBN(0x581113fa, 0x0cada274), + TOBN(0xb9de0a45, 0x30d8eae2), TOBN(0x5e0fcd85, 0xdf6b41ea), + TOBN(0x6233ea68, 0xc094dbb5), TOBN(0xb77d062e, 0xd968d410), + TOBN(0x3e719bbc, 0x58b3002d), TOBN(0x68e7dd3d, 0x3dc49d58), + TOBN(0x8d825740, 0x013a5e58), TOBN(0x21311747, 0x3c9e3c1b), + TOBN(0x0cb0a2a7, 0x7c99b6ab), TOBN(0x5c48a3b3, 0xc2f888f2)} + , + {TOBN(0xc7913e91, 0x991724f3), TOBN(0x5eda799c, 0x39cbd686), + TOBN(0xddb595c7, 0x63d4fc1e), TOBN(0x6b63b80b, 0xac4fed54), + TOBN(0x6ea0fc69, 0x7e5fb516), TOBN(0x737708ba, 0xd0f1c964), + TOBN(0x9628745f, 0x11a92ca5), TOBN(0x61f37958, 0x9a86967a), + TOBN(0x9af39b2c, 0xaa665072), TOBN(0x78322fa4, 0xefd324ef), + TOBN(0x3d153394, 0xc327bd31), TOBN(0x81d5f271, 0x3129dab0), + TOBN(0xc72e0c42, 0xf48027f5), TOBN(0xaa40cdbc, 0x8536e717), + TOBN(0xf45a657a, 0x2d369d0f), TOBN(0xb03bbfc4, 0xea7f74e6), + TOBN(0x46a8c418, 0x0d738ded), TOBN(0x6f1a5bb0, 0xe0de5729), + TOBN(0xf10230b9, 0x8ba81675), TOBN(0x32c6f30c, 0x112b33d4), + TOBN(0x7559129d, 0xd8fffb62), TOBN(0x6a281b47, 0xb459bf05), + TOBN(0x77c1bd3a, 0xfa3b6776), TOBN(0x0709b380, 0x7829973a), + TOBN(0x8c26b232, 0xa3326505), TOBN(0x38d69272, 0xee1d41bf), + TOBN(0x0459453e, 0xffe32afa), TOBN(0xce8143ad, 0x7cb3ea87), + TOBN(0x932ec1fa, 0x7e6ab666), TOBN(0x6cd2d230, 0x22286264), + TOBN(0x459a46fe, 0x6736f8ed), TOBN(0x50bf0d00, 0x9eca85bb), + TOBN(0x0b825852, 0x877a21ec), TOBN(0x300414a7, 0x0f537a94), + TOBN(0x3f1cba40, 0x21a9a6a2), TOBN(0x50824eee, 0x76943c00), + TOBN(0xa0dbfcec, 0xf83cba5d), TOBN(0xf9538148, 0x93b4f3c0), + TOBN(0x61744162, 0x48f24dd7), TOBN(0x5322d64d, 0xe4fb09dd), + TOBN(0x57447384, 0x3d9325f3), TOBN(0xa9bef2d0, 0xf371cb84), + TOBN(0x77d2188b, 0xa61e36c5), TOBN(0xbbd6a7d7, 0xc602df72), + TOBN(0xba3aa902, 0x8f61bc0b), TOBN(0xf49085ed, 0x6ed0b6a1), + TOBN(0x8bc625d6, 0xae6e8298), TOBN(0x832b0b1d, 0xa2e9c01d), + TOBN(0xa337c447, 0xf1f0ced1), TOBN(0x800cc793, 0x9492dd2b), + TOBN(0x4b93151d, 0xbea08efa), TOBN(0x820cf3f8, 0xde0a741e), + TOBN(0xff1982dc, 0x1c0f7d13), TOBN(0xef921960, 0x84dde6ca), + TOBN(0x1ad7d972, 0x45f96ee3), TOBN(0x319c8dbe, 0x29dea0c7), + TOBN(0xd3ea3871, 0x7b82b99b), TOBN(0x75922d4d, 0x470eb624), + TOBN(0x8f66ec54, 0x3b95d466), TOBN(0x66e673cc, 0xbee1e346), + TOBN(0x6afe67c4, 0xb5f2b89a), TOBN(0x3de9c1e6, 0x290e5cd3), + TOBN(0x8c278bb6, 0x310a2ada), TOBN(0x420fa384, 0x0bdb323b), + TOBN(0x0ae1d63b, 0x0eb919b0), TOBN(0xd74ee51d, 0xa74b9620), + TOBN(0x395458d0, 0xa674290c), TOBN(0x324c930f, 0x4620a510), + TOBN(0x2d1f4d19, 0xfbac27d4), TOBN(0x4086e8ca, 0x9bedeeac), + TOBN(0x0cdd211b, 0x9b679ab8), TOBN(0x5970167d, 0x7090fec4), + TOBN(0x3420f2c9, 0xfaf1fc63), TOBN(0x616d333a, 0x328c8bb4), + TOBN(0x7d65364c, 0x57f1fe4a), TOBN(0x9343e877, 0x55e5c73a), + TOBN(0x5795176b, 0xe970e78c), TOBN(0xa36ccebf, 0x60533627), + TOBN(0xfc7c7380, 0x09cdfc1b), TOBN(0xb39a2afe, 0xb3fec326), + TOBN(0xb7ff1ba1, 0x6224408a), TOBN(0xcc856e92, 0x247cfc5e), + TOBN(0x01f102e7, 0xc18bc493), TOBN(0x4613ab74, 0x2091c727), + TOBN(0xaa25e89c, 0xc420bf2b), TOBN(0x00a53176, 0x90337ec2), + TOBN(0xd2be9f43, 0x7d025fc7), TOBN(0x3316fb85, 0x6e6fe3dc), + TOBN(0x27520af5, 0x9ac50814), TOBN(0xfdf95e78, 0x9a8e4223), + TOBN(0xb7e7df2a, 0x56bec5a0), TOBN(0xf7022f7d, 0xdf159e5d), + TOBN(0x93eeeab1, 0xcac1fe8f), TOBN(0x8040188c, 0x37451168), + TOBN(0x7ee8aa8a, 0xd967dce6), TOBN(0xfa0e79e7, 0x3abc9299), + TOBN(0x67332cfc, 0x2064cfd1), TOBN(0x339c31de, 0xb0651934), + TOBN(0x719b28d5, 0x2a3bcbea), TOBN(0xee74c82b, 0x9d6ae5c6), + TOBN(0x0927d05e, 0xbaf28ee6), TOBN(0x82cecf2c, 0x9d719028), + TOBN(0x0b0d353e, 0xddb30289), TOBN(0xfe4bb977, 0xfddb2e29), + TOBN(0xbb5bb990, 0x640bfd9e), TOBN(0xd226e277, 0x82f62108), + TOBN(0x4bf00985, 0x02ffdd56), TOBN(0x7756758a, 0x2ca1b1b5), + TOBN(0xc32b62a3, 0x5285fe91), TOBN(0xedbc546a, 0x8c9cd140), + TOBN(0x1e47a013, 0xaf5cb008), TOBN(0xbca7e720, 0x073ce8f2), + TOBN(0xe10b2ab8, 0x17a91cae), TOBN(0xb89aab65, 0x08e27f63), + TOBN(0x7b3074a7, 0xdba3ddf9), TOBN(0x1c20ce09, 0x330c2972), + TOBN(0x6b9917b4, 0x5fcf7e33), TOBN(0xe6793743, 0x945ceb42), + TOBN(0x18fc2215, 0x5c633d19), TOBN(0xad1adb3c, 0xc7485474), + TOBN(0x646f9679, 0x6424c49b), TOBN(0xf888dfe8, 0x67c241c9), + TOBN(0xe12d4b93, 0x24f68b49), TOBN(0x9a6b62d8, 0xa571df20), + TOBN(0x81b4b26d, 0x179483cb), TOBN(0x666f9632, 0x9511fae2), + TOBN(0xd281b3e4, 0xd53aa51f), TOBN(0x7f96a765, 0x7f3dbd16), + TOBN(0xa7f8b5bf, 0x074a30ce), TOBN(0xd7f52107, 0x005a32e6), + TOBN(0x6f9e0907, 0x50237ed4), TOBN(0x2f21da47, 0x8096fa2b), + TOBN(0xf3e19cb4, 0xeec863a0), TOBN(0xd18f77fd, 0x9527620a), + TOBN(0x9505c81c, 0x407c1cf8), TOBN(0x9998db4e, 0x1b6ec284), + TOBN(0x7e3389e5, 0xc247d44d), TOBN(0x12507141, 0x3f4f3d80), + TOBN(0xd4ba0110, 0x4a78a6c7), TOBN(0x312874a0, 0x767720be), + TOBN(0xded059a6, 0x75944370), TOBN(0xd6123d90, 0x3b2c0bdd), + TOBN(0xa56b717b, 0x51c108e3), TOBN(0x9bb7940e, 0x070623e9), + TOBN(0x794e2d59, 0x84ac066c), TOBN(0xf5954a92, 0xe68c69a0), + TOBN(0x28c52458, 0x4fd99dcc), TOBN(0x60e639fc, 0xb1012517), + TOBN(0xc2e60125, 0x7de79248), TOBN(0xe9ef6404, 0xf12fc6d7), + TOBN(0x4c4f2808, 0x2a3b5d32), TOBN(0x865ad32e, 0xc768eb8a), + TOBN(0xac02331b, 0x13fb70b6), TOBN(0x037b44c1, 0x95599b27), + TOBN(0x1a860fc4, 0x60bd082c), TOBN(0xa2e25745, 0xc980cd01), + TOBN(0xee3387a8, 0x1da0263e), TOBN(0x931bfb95, 0x2d10f3d6), + TOBN(0x5b687270, 0xa1f24a32), TOBN(0xf140e65d, 0xca494b86), + TOBN(0x4f4ddf91, 0xb2f1ac7a), TOBN(0xf99eaabb, 0x760fee27), + TOBN(0x57f4008a, 0x49c228e5), TOBN(0x090be440, 0x1cf713bb), + TOBN(0xac91fbe4, 0x5004f022), TOBN(0xd838c2c2, 0x569e1af6), + TOBN(0xd6c7d20b, 0x0f1daaa5), TOBN(0xaa063ac1, 0x1bbb02c0), + TOBN(0x0938a422, 0x59558a78), TOBN(0x5343c669, 0x8435da2f), + TOBN(0x96f67b18, 0x034410dc), TOBN(0x7cc1e424, 0x84510804), + TOBN(0x86a1543f, 0x16dfbb7d), TOBN(0x921fa942, 0x5b5bd592), + TOBN(0x9dcccb6e, 0xb33dd03c), TOBN(0x8581ddd9, 0xb843f51e), + TOBN(0x54935fcb, 0x81d73c9e), TOBN(0x6d07e979, 0x0a5e97ab), + TOBN(0x4dc7b30a, 0xcf3a6bab), TOBN(0x147ab1f3, 0x170bee11), + TOBN(0x0aaf8e3d, 0x9fafdee4), TOBN(0xfab3dbcb, 0x538a8b95), + TOBN(0x405df4b3, 0x6ef13871), TOBN(0xf1f4e9cb, 0x088d5a49), + TOBN(0x9bcd24d3, 0x66b33f1d), TOBN(0x3b97b820, 0x5ce445c0), + TOBN(0xe2926549, 0xba93ff61), TOBN(0xd9c341ce, 0x4dafe616), + TOBN(0xfb30a76e, 0x16efb6f3), TOBN(0xdf24b8ca, 0x605b953c), + TOBN(0x8bd52afe, 0xc2fffb9f), TOBN(0xbbac5ff7, 0xe19d0b96), + TOBN(0x43c01b87, 0x459afccd), TOBN(0x6bd45143, 0xb7432652), + TOBN(0x84734530, 0x55b5d78e), TOBN(0x81088fdb, 0x1554ba7d), + TOBN(0xada0a52c, 0x1e269375), TOBN(0xf9f037c4, 0x2dc5ec10), + TOBN(0xc0660607, 0x94bfbc11), TOBN(0xc0a630bb, 0xc9c40d2f), + TOBN(0x5efc797e, 0xab64c31e), TOBN(0xffdb1dab, 0x74507144), + TOBN(0xf6124287, 0x1ca6790c), TOBN(0xe9609d81, 0xe69bf1bf), + TOBN(0xdb898595, 0x00d24fc9), TOBN(0x9c750333, 0xe51fb417), + TOBN(0x51830a91, 0xfef7bbde), TOBN(0x0ce67dc8, 0x945f585c), + TOBN(0x9a730ed4, 0x4763eb50), TOBN(0x24a0e221, 0xc1ab0d66), + TOBN(0x643b6393, 0x648748f3), TOBN(0x1982daa1, 0x6d3c6291), + TOBN(0x6f00a9f7, 0x8bbc5549), TOBN(0x7a1783e1, 0x7f36384e), + TOBN(0xe8346323, 0xde977f50), TOBN(0x91ab688d, 0xb245502a), + TOBN(0x331ab6b5, 0x6d0bdd66), TOBN(0x0a6ef32e, 0x64b71229), + TOBN(0x1028150e, 0xfe7c352f), TOBN(0x27e04350, 0xce7b39d3), + TOBN(0x2a3c8acd, 0xc1070c82), TOBN(0xfb2034d3, 0x80c9feef), + TOBN(0x2d729621, 0x709f3729), TOBN(0x8df290bf, 0x62cb4549), + TOBN(0x02f99f33, 0xfc2e4326), TOBN(0x3b30076d, 0x5eddf032), + TOBN(0xbb21f8cf, 0x0c652fb5), TOBN(0x314fb49e, 0xed91cf7b), + TOBN(0xa013eca5, 0x2f700750), TOBN(0x2b9e3c23, 0x712a4575), + TOBN(0xe5355557, 0xaf30fbb0), TOBN(0x1ada3516, 0x7c77e771), + TOBN(0x45f6ecb2, 0x7b135670), TOBN(0xe85d19df, 0x7cfc202e), + TOBN(0x0f1b50c7, 0x58d1be9f), TOBN(0x5ebf2c0a, 0xead2e344), + TOBN(0x1531fe4e, 0xabc199c9), TOBN(0xc7032592, 0x56bab0ae), + TOBN(0x16ab2e48, 0x6c1fec54), TOBN(0x0f87fda8, 0x04280188), + TOBN(0xdc9f46fc, 0x609e4a74), TOBN(0x2a44a143, 0xba667f91), + TOBN(0xbc3d8b95, 0xb4d83436), TOBN(0xa01e4bd0, 0xc7bd2958), + TOBN(0x7b182932, 0x73483c90), TOBN(0xa79c6aa1, 0xa7c7b598), + TOBN(0xbf3983c6, 0xeaaac07e), TOBN(0x8f18181e, 0x96e0d4e6), + TOBN(0x8553d37c, 0x051af62b), TOBN(0xe9a998eb, 0x0bf94496), + TOBN(0xe0844f9f, 0xb0d59aa1), TOBN(0x983fd558, 0xe6afb813), + TOBN(0x9670c0ca, 0x65d69804), TOBN(0x732b22de, 0x6ea5ff2d), + TOBN(0xd7640ba9, 0x5fd8623b), TOBN(0x9f619163, 0xa6351782), + TOBN(0x0bfc27ee, 0xacee5043), TOBN(0xae419e73, 0x2eb10f02), + TOBN(0x19c028d1, 0x8943fb05), TOBN(0x71f01cf7, 0xff13aa2a), + TOBN(0x7790737e, 0x8887a132), TOBN(0x67513309, 0x66318410), + TOBN(0x9819e8a3, 0x7ddb795e), TOBN(0xfecb8ef5, 0xdad100b2), + TOBN(0x59f74a22, 0x3021926a), TOBN(0xb7c28a49, 0x6f9b4c1c), + TOBN(0xed1a733f, 0x912ad0ab), TOBN(0x42a910af, 0x01a5659c), + TOBN(0x3842c6e0, 0x7bd68cab), TOBN(0x2b57fa38, 0x76d70ac8), + TOBN(0x8a6707a8, 0x3c53aaeb), TOBN(0x62c1c510, 0x65b4db18), + TOBN(0x8de2c1fb, 0xb2d09dc7), TOBN(0xc3dfed12, 0x266bd23b), + TOBN(0x927d039b, 0xd5b27db6), TOBN(0x2fb2f0f1, 0x103243da), + TOBN(0xf855a07b, 0x80be7399), TOBN(0xed9327ce, 0x1f9f27a8), + TOBN(0xa0bd99c7, 0x729bdef7), TOBN(0x2b67125e, 0x28250d88), + TOBN(0x784b26e8, 0x8670ced7), TOBN(0xe3dfe41f, 0xc31bd3b4), + TOBN(0x9e353a06, 0xbcc85cbc), TOBN(0x302e2909, 0x60178a9d), + TOBN(0x860abf11, 0xa6eac16e), TOBN(0x76447000, 0xaa2b3aac), + TOBN(0x46ff9d19, 0x850afdab), TOBN(0x35bdd6a5, 0xfdb2d4c1), + TOBN(0xe82594b0, 0x7e5c9ce9), TOBN(0x0f379e53, 0x20af346e), + TOBN(0x608b31e3, 0xbc65ad4a), TOBN(0x710c6b12, 0x267c4826), + TOBN(0x51c966f9, 0x71954cf1), TOBN(0xb1cec793, 0x0d0aa215), + TOBN(0x1f155989, 0x86bd23a8), TOBN(0xae2ff99c, 0xf9452e86), + TOBN(0xd8dd953c, 0x340ceaa2), TOBN(0x26355275, 0x2e2e9333), + TOBN(0x15d4e5f9, 0x8586f06d), TOBN(0xd6bf94a8, 0xf7cab546), + TOBN(0x33c59a0a, 0xb76a9af0), TOBN(0x52740ab3, 0xba095af7), + TOBN(0xc444de8a, 0x24389ca0), TOBN(0xcc6f9863, 0x706da0cb), + TOBN(0xb5a741a7, 0x6b2515cf), TOBN(0x71c41601, 0x9585c749), + TOBN(0x78350d4f, 0xe683de97), TOBN(0x31d61524, 0x63d0b5f5), + TOBN(0x7a0cc5e1, 0xfbce090b), TOBN(0xaac927ed, 0xfbcb2a5b), + TOBN(0xe920de49, 0x20d84c35), TOBN(0x8c06a0b6, 0x22b4de26), + TOBN(0xd34dd58b, 0xafe7ddf3), TOBN(0x55851fed, 0xc1e6e55b), + TOBN(0xd1395616, 0x960696e7), TOBN(0x940304b2, 0x5f22705f), + TOBN(0x6f43f861, 0xb0a2a860), TOBN(0xcf121282, 0x0e7cc981), + TOBN(0x12186212, 0x0ab64a96), TOBN(0x09215b9a, 0xb789383c), + TOBN(0x311eb305, 0x37387c09), TOBN(0xc5832fce, 0xf03ee760), + TOBN(0x30358f58, 0x32f7ea19), TOBN(0xe01d3c34, 0x91d53551), + TOBN(0x1ca5ee41, 0xda48ea80), TOBN(0x34e71e8e, 0xcf4fa4c1), + TOBN(0x312abd25, 0x7af1e1c7), TOBN(0xe3afcdeb, 0x2153f4a5), + TOBN(0x9d5c84d7, 0x00235e9a), TOBN(0x0308d3f4, 0x8c4c836f), + TOBN(0xc0a66b04, 0x89332de5), TOBN(0x610dd399, 0x89e566ef), + TOBN(0xf8eea460, 0xd1ac1635), TOBN(0x84cbb3fb, 0x20a2c0df), + TOBN(0x40afb488, 0xe74a48c5), TOBN(0x29738198, 0xd326b150), + TOBN(0x2a17747f, 0xa6d74081), TOBN(0x60ea4c05, 0x55a26214), + TOBN(0x53514bb4, 0x1f88c5fe), TOBN(0xedd64567, 0x7e83426c), + TOBN(0xd5d6cbec, 0x96460b25), TOBN(0xa12fd0ce, 0x68dc115e), + TOBN(0xc5bc3ed2, 0x697840ea), TOBN(0x969876a8, 0xa6331e31), + TOBN(0x60c36217, 0x472ff580), TOBN(0xf4229705, 0x4ad41393), + TOBN(0x4bd99ef0, 0xa03b8b92), TOBN(0x501c7317, 0xc144f4f6), + TOBN(0x159009b3, 0x18464945), TOBN(0x6d5e594c, 0x74c5c6be), + TOBN(0x2d587011, 0x321a3660), TOBN(0xd1e184b1, 0x3898d022), + TOBN(0x5ba04752, 0x4c6a7e04), TOBN(0x47fa1e2b, 0x45550b65), + TOBN(0x9419daf0, 0x48c0a9a5), TOBN(0x66362953, 0x7c243236), + TOBN(0xcd0744b1, 0x5cb12a88), TOBN(0x561b6f9a, 0x2b646188), + TOBN(0x599415a5, 0x66c2c0c0), TOBN(0xbe3f0859, 0x0f83f09a), + TOBN(0x9141c5be, 0xb92041b8), TOBN(0x01ae38c7, 0x26477d0d), + TOBN(0xca8b71f3, 0xd12c7a94), TOBN(0xfab5b31f, 0x765c70db), + TOBN(0x76ae7492, 0x487443e9), TOBN(0x8595a310, 0x990d1349), + TOBN(0xf8dbeda8, 0x7d460a37), TOBN(0x7f7ad082, 0x1e45a38f), + TOBN(0xed1d4db6, 0x1059705a), TOBN(0xa3dd492a, 0xe6b9c697), + TOBN(0x4b92ee3a, 0x6eb38bd5), TOBN(0xbab2609d, 0x67cc0bb7), + TOBN(0x7fc4fe89, 0x6e70ee82), TOBN(0xeff2c56e, 0x13e6b7e3), + TOBN(0x9b18959e, 0x34d26fca), TOBN(0x2517ab66, 0x889d6b45), + TOBN(0xf167b4e0, 0xbdefdd4f), TOBN(0x69958465, 0xf366e401), + TOBN(0x5aa368ab, 0xa73bbec0), TOBN(0x12148709, 0x7b240c21), + TOBN(0x378c3233, 0x18969006), TOBN(0xcb4d73ce, 0xe1fe53d1), + TOBN(0x5f50a80e, 0x130c4361), TOBN(0xd67f5951, 0x7ef5212b), + TOBN(0xf145e21e, 0x9e70c72e), TOBN(0xb2e52e29, 0x5566d2fb), + TOBN(0x44eaba4a, 0x032397f5), TOBN(0x5e56937b, 0x7e31a7de), + TOBN(0x68dcf517, 0x456c61e1), TOBN(0xbc2e954a, 0xa8b0a388), + TOBN(0xe3552fa7, 0x60a8b755), TOBN(0x03442dae, 0x73ad0cde), + TOBN(0x37ffe747, 0xceb26210), TOBN(0x983545e8, 0x787baef9), + TOBN(0x8b8c8535, 0x86a3de31), TOBN(0xc621dbcb, 0xfacd46db), + TOBN(0x82e442e9, 0x59266fbb), TOBN(0xa3514c37, 0x339d471c), + TOBN(0x3a11b771, 0x62cdad96), TOBN(0xf0cb3b3c, 0xecf9bdf0), + TOBN(0x3fcbdbce, 0x478e2135), TOBN(0x7547b5cf, 0xbda35342), + TOBN(0xa97e81f1, 0x8a677af6), TOBN(0xc8c2bf83, 0x28817987), + TOBN(0xdf07eaaf, 0x45580985), TOBN(0xc68d1f05, 0xc93b45cb), + TOBN(0x106aa2fe, 0xc77b4cac), TOBN(0x4c1d8afc, 0x04a7ae86), + TOBN(0xdb41c3fd, 0x9eb45ab2), TOBN(0x5b234b5b, 0xd4b22e74), + TOBN(0xda253dec, 0xf215958a), TOBN(0x67e0606e, 0xa04edfa0), + TOBN(0xabbbf070, 0xef751b11), TOBN(0xf352f175, 0xf6f06dce), + TOBN(0xdfc4b6af, 0x6839f6b4), TOBN(0x53ddf9a8, 0x9959848e), + TOBN(0xda49c379, 0xc21520b0), TOBN(0x90864ff0, 0xdbd5d1b6), + TOBN(0x2f055d23, 0x5f49c7f7), TOBN(0xe51e4e6a, 0xa796b2d8), + TOBN(0xc361a67f, 0x5c9dc340), TOBN(0x5ad53c37, 0xbca7c620), + TOBN(0xda1d6588, 0x32c756d0), TOBN(0xad60d911, 0x8bb67e13), + TOBN(0xd6c47bdf, 0x0eeec8c6), TOBN(0x4a27fec1, 0x078a1821), + TOBN(0x081f7415, 0xc3099524), TOBN(0x8effdf0b, 0x82cd8060), + TOBN(0xdb70ec1c, 0x65842df8), TOBN(0x8821b358, 0xd319a901), + TOBN(0x72ee56ee, 0xde42b529), TOBN(0x5bb39592, 0x236e4286), + TOBN(0xd1183316, 0xfd6f7140), TOBN(0xf9fadb5b, 0xbd8e81f7), + TOBN(0x701d5e0c, 0x5a02d962), TOBN(0xfdee4dbf, 0x1b601324), + TOBN(0xbed17407, 0x35d7620e), TOBN(0x04e3c2c3, 0xf48c0012), + TOBN(0x9ee29da7, 0x3455449a), TOBN(0x562cdef4, 0x91a836c4), + TOBN(0x8f682a5f, 0x47701097), TOBN(0x617125d8, 0xff88d0c2), + TOBN(0x948fda24, 0x57bb86dd), TOBN(0x348abb8f, 0x289f7286), + TOBN(0xeb10eab5, 0x99d94bbd), TOBN(0xd51ba28e, 0x4684d160), + TOBN(0xabe0e51c, 0x30c8f41a), TOBN(0x66588b45, 0x13254f4a), + TOBN(0x147ebf01, 0xfad097a5), TOBN(0x49883ea8, 0x610e815d), + TOBN(0xe44d60ba, 0x8a11de56), TOBN(0xa970de6e, 0x827a7a6d), + TOBN(0x2be41424, 0x5e17fc19), TOBN(0xd833c657, 0x01214057), + TOBN(0x1375813b, 0x363e723f), TOBN(0x6820bb88, 0xe6a52e9b), + TOBN(0x7e7f6970, 0xd875d56a), TOBN(0xd6a0a9ac, 0x51fbf6bf), + TOBN(0x54ba8790, 0xa3083c12), TOBN(0xebaeb23d, 0x6ae7eb64), + TOBN(0xa8685c3a, 0xb99a907a), TOBN(0xf1e74550, 0x026bf40b), + TOBN(0x7b73a027, 0xc802cd9e), TOBN(0x9a8a927c, 0x4fef4635), + TOBN(0xe1b6f60c, 0x08191224), TOBN(0xc4126ebb, 0xde4ec091), + TOBN(0xe1dff4dc, 0x4ae38d84), TOBN(0xde3f57db, 0x4f2ef985), + TOBN(0x34964337, 0xd446a1dd), TOBN(0x7bf217a0, 0x859e77f6), + TOBN(0x8ff10527, 0x8e1d13f5), TOBN(0xa304ef03, 0x74eeae27), + TOBN(0xfc6f5e47, 0xd19dfa5a), TOBN(0xdb007de3, 0x7fad982b), + TOBN(0x28205ad1, 0x613715f5), TOBN(0x251e6729, 0x7889529e), + TOBN(0x72705184, 0x1ae98e78), TOBN(0xf818537d, 0x271cac32), + TOBN(0xc8a15b7e, 0xb7f410f5), TOBN(0xc474356f, 0x81f62393), + TOBN(0x92dbdc5a, 0xc242316b), TOBN(0xabe060ac, 0xdbf4aff5), + TOBN(0x6e8c38fe, 0x909a8ec6), TOBN(0x43e514e5, 0x6116cb94), + TOBN(0x2078fa38, 0x07d784f9), TOBN(0x1161a880, 0xf4b5b357), + TOBN(0x5283ce79, 0x13adea3d), TOBN(0x0756c3e6, 0xcc6a910b), + TOBN(0x60bcfe01, 0xaaa79697), TOBN(0x04a73b29, 0x56391db1), + TOBN(0xdd8dad47, 0x189b45a0), TOBN(0xbfac0dd0, 0x48d5b8d9), + TOBN(0x34ab3af5, 0x7d3d2ec2), TOBN(0x6fa2fc2d, 0x207bd3af), + TOBN(0x9ff40092, 0x66550ded), TOBN(0x719b3e87, 0x1fd5b913), + TOBN(0xa573a496, 0x6d17fbc7), TOBN(0x0cd1a70a, 0x73d2b24e), + TOBN(0x34e2c5ca, 0xb2676937), TOBN(0xe7050b06, 0xbf669f21), + TOBN(0xfbe948b6, 0x1ede9046), TOBN(0xa0530051, 0x97662659), + TOBN(0x58cbd4ed, 0xf10124c5), TOBN(0xde2646e4, 0xdd6c06c8), + TOBN(0x332f8108, 0x8cad38c0), TOBN(0x471b7e90, 0x6bd68ae2), + TOBN(0x56ac3fb2, 0x0d8e27a3), TOBN(0xb54660db, 0x136b4b0d), + TOBN(0x123a1e11, 0xa6fd8de4), TOBN(0x44dbffea, 0xa37799ef), + TOBN(0x4540b977, 0xce6ac17c), TOBN(0x495173a8, 0xaf60acef)} + , + {TOBN(0x9ebb284d, 0x391c2a82), TOBN(0xbcdd4863, 0x158308e8), + TOBN(0x006f16ec, 0x83f1edca), TOBN(0xa13e2c37, 0x695dc6c8), + TOBN(0x2ab756f0, 0x4a057a87), TOBN(0xa8765500, 0xa6b48f98), + TOBN(0x4252face, 0x68651c44), TOBN(0xa52b540b, 0xe1765e02), + TOBN(0x4f922fc5, 0x16a0d2bb), TOBN(0x0d5cc16c, 0x1a623499), + TOBN(0x9241cf3a, 0x57c62c8b), TOBN(0x2f5e6961, 0xfd1b667f), + TOBN(0x5c15c70b, 0xf5a01797), TOBN(0x3d20b44d, 0x60956192), + TOBN(0x04911b37, 0x071fdb52), TOBN(0xf648f916, 0x8d6f0f7b), + TOBN(0x6dc1acaf, 0xe60b7cf7), TOBN(0x25860a50, 0x84a9d869), + TOBN(0x56fc6f09, 0xe7ba8ac4), TOBN(0x828c5bd0, 0x6148d29e), + TOBN(0xac6b435e, 0xdc55ae5f), TOBN(0xa527f56c, 0xc0117411), + TOBN(0x94d5045e, 0xfd24342c), TOBN(0x2c4c0a35, 0x70b67c0d), + TOBN(0x027cc8b8, 0xfac61d9a), TOBN(0x7d25e062, 0xe3c6fe8a), + TOBN(0xe08805bf, 0xe5bff503), TOBN(0x13271e6c, 0x6ff632f7), + TOBN(0x55dca6c0, 0x232f76a5), TOBN(0x8957c32d, 0x701ef426), + TOBN(0xee728bcb, 0xa10a5178), TOBN(0x5ea60411, 0xb62c5173), + TOBN(0xfc4e964e, 0xd0b8892b), TOBN(0x9ea17683, 0x9301bb74), + TOBN(0x6265c5ae, 0xfcc48626), TOBN(0xe60cf82e, 0xbb3e9102), + TOBN(0x57adf797, 0xd4df5531), TOBN(0x235b59a1, 0x8deeefe2), + TOBN(0x60adcf58, 0x3f306eb1), TOBN(0x105c2753, 0x3d09492d), + TOBN(0x4090914b, 0xb5def996), TOBN(0x1cb69c83, 0x233dd1e7), + TOBN(0xc1e9c1d3, 0x9b3d5e76), TOBN(0x1f3338ed, 0xfccf6012), + TOBN(0xb1e95d0d, 0x2f5378a8), TOBN(0xacf4c2c7, 0x2f00cd21), + TOBN(0x6e984240, 0xeb5fe290), TOBN(0xd66c038d, 0x248088ae), + TOBN(0x804d264a, 0xf94d70cf), TOBN(0xbdb802ef, 0x7314bf7e), + TOBN(0x8fb54de2, 0x4333ed02), TOBN(0x740461e0, 0x285635d9), + TOBN(0x4113b2c8, 0x365e9383), TOBN(0xea762c83, 0x3fdef652), + TOBN(0x4eec6e2e, 0x47b956c1), TOBN(0xa3d814be, 0x65620fa4), + TOBN(0x9ad5462b, 0xb4d8bc50), TOBN(0x181c0b16, 0xa9195770), + TOBN(0xebd4fe1c, 0x78412a68), TOBN(0xae0341bc, 0xc0dff48c), + TOBN(0xb6bc45cf, 0x7003e866), TOBN(0xf11a6dea, 0x8a24a41b), + TOBN(0x5407151a, 0xd04c24c2), TOBN(0x62c9d27d, 0xda5b7b68), + TOBN(0x2e964235, 0x88cceff6), TOBN(0x8594c54f, 0x8b07ed69), + TOBN(0x1578e73c, 0xc84d0d0d), TOBN(0x7b4e1055, 0xff532868), + TOBN(0xa348c0d5, 0xb5ec995a), TOBN(0xbf4b9d55, 0x14289a54), + TOBN(0x9ba155a6, 0x58fbd777), TOBN(0x186ed7a8, 0x1a84491d), + TOBN(0xd4992b30, 0x614c0900), TOBN(0xda98d121, 0xbd00c24b), + TOBN(0x7f534dc8, 0x7ec4bfa1), TOBN(0x4a5ff674, 0x37dc34bc), + TOBN(0x68c196b8, 0x1d7ea1d7), TOBN(0x38cf2893, 0x80a6d208), + TOBN(0xfd56cd09, 0xe3cbbd6e), TOBN(0xec72e27e, 0x4205a5b6), + TOBN(0x15ea68f5, 0xa44f77f7), TOBN(0x7aa5f9fd, 0xb43c52bc), + TOBN(0x86ff676f, 0x94f0e609), TOBN(0xa4cde963, 0x2e2d432b), + TOBN(0x8cafa0c0, 0xeee470af), TOBN(0x84137d0e, 0x8a3f5ec8), + TOBN(0xebb40411, 0xfaa31231), TOBN(0xa239c13f, 0x6f7f7ccf), + TOBN(0x32865719, 0xa8afd30b), TOBN(0x86798328, 0x8a826dce), + TOBN(0xdf04e891, 0xc4a8fbe0), TOBN(0xbb6b6e1b, 0xebf56ad3), + TOBN(0x0a695b11, 0x471f1ff0), TOBN(0xd76c3389, 0xbe15baf0), + TOBN(0x018edb95, 0xbe96c43e), TOBN(0xf2beaaf4, 0x90794158), + TOBN(0x152db09e, 0xc3076a27), TOBN(0x5e82908e, 0xe416545d), + TOBN(0xa2c41272, 0x356d6f2e), TOBN(0xdc9c9642, 0x31fd74e1), + TOBN(0x66ceb88d, 0x519bf615), TOBN(0xe29ecd76, 0x05a2274e), + TOBN(0x3a0473c4, 0xbf5e2fa0), TOBN(0x6b6eb671, 0x64284e67), + TOBN(0xe8b97932, 0xb88756dd), TOBN(0xed4e8652, 0xf17e3e61), + TOBN(0xc2dd1499, 0x3ee1c4a4), TOBN(0xc0aaee17, 0x597f8c0e), + TOBN(0x15c4edb9, 0x6c168af3), TOBN(0x6563c7bf, 0xb39ae875), + TOBN(0xadfadb6f, 0x20adb436), TOBN(0xad55e8c9, 0x9a042ac0), + TOBN(0x975a1ed8, 0xb76da1f5), TOBN(0x10dfa466, 0xa58acb94), + TOBN(0x8dd7f7e3, 0xac060282), TOBN(0x6813e66a, 0x572a051e), + TOBN(0xb4ccae1e, 0x350cb901), TOBN(0xb653d656, 0x50cb7822), + TOBN(0x42484710, 0xdfab3b87), TOBN(0xcd7ee537, 0x9b670fd0), + TOBN(0x0a50b12e, 0x523b8bf6), TOBN(0x8009eb5b, 0x8f910c1b), + TOBN(0xf535af82, 0x4a167588), TOBN(0x0f835f9c, 0xfb2a2abd), + TOBN(0xf59b2931, 0x2afceb62), TOBN(0xc797df2a, 0x169d383f), + TOBN(0xeb3f5fb0, 0x66ac02b0), TOBN(0x029d4c6f, 0xdaa2d0ca), + TOBN(0xd4059bc1, 0xafab4bc5), TOBN(0x833f5c6f, 0x56783247), + TOBN(0xb5346630, 0x8d2d3605), TOBN(0x83387891, 0xd34d8433), + TOBN(0xd973b30f, 0xadd9419a), TOBN(0xbcca1099, 0xafe3fce8), + TOBN(0x08178315, 0x0809aac6), TOBN(0x01b7f21a, 0x540f0f11), + TOBN(0x65c29219, 0x909523c8), TOBN(0xa62f648f, 0xa3a1c741), + TOBN(0x88598d4f, 0x60c9e55a), TOBN(0xbce9141b, 0x0e4f347a), + TOBN(0x9af97d84, 0x35f9b988), TOBN(0x0210da62, 0x320475b6), + TOBN(0x3c076e22, 0x9191476c), TOBN(0x7520dbd9, 0x44fc7834), + TOBN(0x6a6b2cfe, 0xc1ab1bbd), TOBN(0xef8a65be, 0xdc650938), + TOBN(0x72855540, 0x805d7bc4), TOBN(0xda389396, 0xed11fdfd), + TOBN(0xa9d5bd36, 0x74660876), TOBN(0x11d67c54, 0xb45dff35), + TOBN(0x6af7d148, 0xa4f5da94), TOBN(0xbb8d4c3f, 0xc0bbeb31), + TOBN(0x87a7ebd1, 0xe0a1b12a), TOBN(0x1e4ef88d, 0x770ba95f), + TOBN(0x8c33345c, 0xdc2ae9cb), TOBN(0xcecf1276, 0x01cc8403), + TOBN(0x687c012e, 0x1b39b80f), TOBN(0xfd90d0ad, 0x35c33ba4), + TOBN(0xa3ef5a67, 0x5c9661c2), TOBN(0x368fc88e, 0xe017429e), + TOBN(0xd30c6761, 0x196a2fa2), TOBN(0x931b9817, 0xbd5b312e), + TOBN(0xba01000c, 0x72f54a31), TOBN(0xa203d2c8, 0x66eaa541), + TOBN(0xf2abdee0, 0x98939db3), TOBN(0xe37d6c2c, 0x3e606c02), + TOBN(0xf2921574, 0x521ff643), TOBN(0x2781b3c4, 0xd7e2fca3), + TOBN(0x664300b0, 0x7850ec06), TOBN(0xac5a38b9, 0x7d3a10cf), + TOBN(0x9233188d, 0xe34ab39d), TOBN(0xe77057e4, 0x5072cbb9), + TOBN(0xbcf0c042, 0xb59e78df), TOBN(0x4cfc91e8, 0x1d97de52), + TOBN(0x4661a26c, 0x3ee0ca4a), TOBN(0x5620a4c1, 0xfb8507bc), + TOBN(0x4b44d4aa, 0x049f842c), TOBN(0xceabc5d5, 0x1540e82b), + TOBN(0x306710fd, 0x15c6f156), TOBN(0xbe5ae52b, 0x63db1d72), + TOBN(0x06f1e7e6, 0x334957f1), TOBN(0x57e388f0, 0x31144a70), + TOBN(0xfb69bb2f, 0xdf96447b), TOBN(0x0f78ebd3, 0x73e38a12), + TOBN(0xb8222605, 0x2b7ce542), TOBN(0xe6d4ce99, 0x7472bde1), + TOBN(0x53e16ebe, 0x09d2f4da), TOBN(0x180ff42e, 0x53b92b2e), + TOBN(0xc59bcc02, 0x2c34a1c6), TOBN(0x3803d6f9, 0x422c46c2), + TOBN(0x18aff74f, 0x5c14a8a2), TOBN(0x55aebf80, 0x10a08b28), + TOBN(0x66097d58, 0x7135593f), TOBN(0x32e6eff7, 0x2be570cd), + TOBN(0x584e6a10, 0x2a8c860d), TOBN(0xcd185890, 0xa2eb4163), + TOBN(0x7ceae99d, 0x6d97e134), TOBN(0xd42c6b70, 0xdd8447ce), + TOBN(0x59ddbb4a, 0xb8c50273), TOBN(0x03c612df, 0x3cf34e1e), + TOBN(0x84b9ca15, 0x04b6c5a0), TOBN(0x35216f39, 0x18f0e3a3), + TOBN(0x3ec2d2bc, 0xbd986c00), TOBN(0x8bf546d9, 0xd19228fe), + TOBN(0xd1c655a4, 0x4cd623c3), TOBN(0x366ce718, 0x502b8e5a), + TOBN(0x2cfc84b4, 0xeea0bfe7), TOBN(0xe01d5cee, 0xcf443e8e), + TOBN(0x8ec045d9, 0x036520f8), TOBN(0xdfb3c3d1, 0x92d40e98), + TOBN(0x0bac4cce, 0xcc559a04), TOBN(0x35eccae5, 0x240ea6b1), + TOBN(0x180b32db, 0xf8a5a0ac), TOBN(0x547972a5, 0xeb699700), + TOBN(0xa3765801, 0xca26bca0), TOBN(0x57e09d0e, 0xa647f25a), + TOBN(0xb956970e, 0x2fdd23cc), TOBN(0xb80288bc, 0x5682e971), + TOBN(0xe6e6d91e, 0x9ae86ebc), TOBN(0x0564c83f, 0x8c9f1939), + TOBN(0x551932a2, 0x39560368), TOBN(0xe893752b, 0x049c28e2), + TOBN(0x0b03cee5, 0xa6a158c3), TOBN(0xe12d656b, 0x04964263), + TOBN(0x4b47554e, 0x63e3bc1d), TOBN(0xc719b6a2, 0x45044ff7), + TOBN(0x4f24d30a, 0xe48daa07), TOBN(0xa3f37556, 0xc8c1edc3), + TOBN(0x9a47bf76, 0x0700d360), TOBN(0xbb1a1824, 0x822ae4e2), + TOBN(0x22e275a3, 0x89f1fb4c), TOBN(0x72b1aa23, 0x9968c5f5), + TOBN(0xa75feaca, 0xbe063f64), TOBN(0x9b392f43, 0xbce47a09), + TOBN(0xd4241509, 0x1ad07aca), TOBN(0x4b0c591b, 0x8d26cd0f), + TOBN(0x2d42ddfd, 0x92f1169a), TOBN(0x63aeb1ac, 0x4cbf2392), + TOBN(0x1de9e877, 0x0691a2af), TOBN(0xebe79af7, 0xd98021da), + TOBN(0xcfdf2a4e, 0x40e50acf), TOBN(0xf0a98ad7, 0xaf01d665), + TOBN(0xefb640bf, 0x1831be1f), TOBN(0x6fe8bd2f, 0x80e9ada0), + TOBN(0x94c103a1, 0x6cafbc91), TOBN(0x170f8759, 0x8308e08c), + TOBN(0x5de2d2ab, 0x9780ff4f), TOBN(0x666466bc, 0x45b201f2), + TOBN(0x58af2010, 0xf5b343bc), TOBN(0x0f2e400a, 0xf2f142fe), + TOBN(0x3483bfde, 0xa85f4bdf), TOBN(0xf0b1d093, 0x03bfeaa9), + TOBN(0x2ea01b95, 0xc7081603), TOBN(0xe943e4c9, 0x3dba1097), + TOBN(0x47be92ad, 0xb438f3a6), TOBN(0x00bb7742, 0xe5bf6636), + TOBN(0x136b7083, 0x824297b4), TOBN(0x9d0e5580, 0x5584455f), + TOBN(0xab48cedc, 0xf1c7d69e), TOBN(0x53a9e481, 0x2a256e76), + TOBN(0x0402b0e0, 0x65eb2413), TOBN(0xdadbbb84, 0x8fc407a7), + TOBN(0xa65cd5a4, 0x8d7f5492), TOBN(0x21d44293, 0x74bae294), + TOBN(0x66917ce6, 0x3b5f1cc4), TOBN(0x37ae52ea, 0xce872e62), + TOBN(0xbb087b72, 0x2905f244), TOBN(0x12077086, 0x1e6af74f), + TOBN(0x4b644e49, 0x1058edea), TOBN(0x827510e3, 0xb638ca1d), + TOBN(0x8cf2b704, 0x6038591c), TOBN(0xffc8b47a, 0xfe635063), + TOBN(0x3ae220e6, 0x1b4d5e63), TOBN(0xbd864742, 0x9d961b4b), + TOBN(0x610c107e, 0x9bd16bed), TOBN(0x4270352a, 0x1127147b), + TOBN(0x7d17ffe6, 0x64cfc50e), TOBN(0x50dee01a, 0x1e36cb42), + TOBN(0x068a7622, 0x35dc5f9a), TOBN(0x9a08d536, 0xdf53f62c), + TOBN(0x4ed71457, 0x6be5f7de), TOBN(0xd93006f8, 0xc2263c9e), + TOBN(0xe073694c, 0xcacacb36), TOBN(0x2ff7a5b4, 0x3ae118ab), + TOBN(0x3cce53f1, 0xcd871236), TOBN(0xf156a39d, 0xc2aa6d52), + TOBN(0x9cc5f271, 0xb198d76d), TOBN(0xbc615b6f, 0x81383d39), + TOBN(0xa54538e8, 0xde3eee6b), TOBN(0x58c77538, 0xab910d91), + TOBN(0x31e5bdbc, 0x58d278bd), TOBN(0x3cde4adf, 0xb963acae), + TOBN(0xb1881fd2, 0x5302169c), TOBN(0x8ca60fa0, 0xa989ed8b), + TOBN(0xa1999458, 0xff96a0ee), TOBN(0xc1141f03, 0xac6c283d), + TOBN(0x7677408d, 0x6dfafed3), TOBN(0x33a01653, 0x39661588), + TOBN(0x3c9c15ec, 0x0b726fa0), TOBN(0x090cfd93, 0x6c9b56da), + TOBN(0xe34f4bae, 0xa3c40af5), TOBN(0x3469eadb, 0xd21129f1), + TOBN(0xcc51674a, 0x1e207ce8), TOBN(0x1e293b24, 0xc83b1ef9), + TOBN(0x17173d13, 0x1e6c0bb4), TOBN(0x19004695, 0x90776d35), + TOBN(0xe7980e34, 0x6de6f922), TOBN(0x873554cb, 0xf4dd9a22), + TOBN(0x0316c627, 0xcbf18a51), TOBN(0x4d93651b, 0x3032c081), + TOBN(0x207f2771, 0x3946834d), TOBN(0x2c08d7b4, 0x30cdbf80), + TOBN(0x137a4fb4, 0x86df2a61), TOBN(0xa1ed9c07, 0xecf7b4a2), + TOBN(0xb2e460e2, 0x7bd042ff), TOBN(0xb7f5e2fa, 0x5f62f5ec), + TOBN(0x7aa6ec6b, 0xcc2423b7), TOBN(0x75ce0a7f, 0xba63eea7), + TOBN(0x67a45fb1, 0xf250a6e1), TOBN(0x93bc919c, 0xe53cdc9f), + TOBN(0x9271f56f, 0x871942df), TOBN(0x2372ff6f, 0x7859ad66), + TOBN(0x5f4c2b96, 0x33cb1a78), TOBN(0xe3e29101, 0x5838aa83), + TOBN(0xa7ed1611, 0xe4e8110c), TOBN(0x2a2d70d5, 0x330198ce), + TOBN(0xbdf132e8, 0x6720efe0), TOBN(0xe61a8962, 0x66a471bf), + TOBN(0x796d3a85, 0x825808bd), TOBN(0x51dc3cb7, 0x3fd6e902), + TOBN(0x643c768a, 0x916219d1), TOBN(0x36cd7685, 0xa2ad7d32), + TOBN(0xe3db9d05, 0xb22922a4), TOBN(0x6494c87e, 0xdba29660), + TOBN(0xf0ac91df, 0xbcd2ebc7), TOBN(0x4deb57a0, 0x45107f8d), + TOBN(0x42271f59, 0xc3d12a73), TOBN(0x5f71687c, 0xa5c2c51d), + TOBN(0xcb1f50c6, 0x05797bcb), TOBN(0x29ed0ed9, 0xd6d34eb0), + TOBN(0xe5fe5b47, 0x4683c2eb), TOBN(0x4956eeb5, 0x97447c46), + TOBN(0x5b163a43, 0x71207167), TOBN(0x93fa2fed, 0x0248c5ef), + TOBN(0x67930af2, 0x31f63950), TOBN(0xa77797c1, 0x14caa2c9), + TOBN(0x526e80ee, 0x27ac7e62), TOBN(0xe1e6e626, 0x58b28aec), + TOBN(0x636178b0, 0xb3c9fef0), TOBN(0xaf7752e0, 0x6d5f90be), + TOBN(0x94ecaf18, 0xeece51cf), TOBN(0x2864d0ed, 0xca806e1f), + TOBN(0x6de2e383, 0x97c69134), TOBN(0x5a42c316, 0xeb291293), + TOBN(0xc7779219, 0x6a60bae0), TOBN(0xa24de346, 0x6b7599d1), + TOBN(0x49d374aa, 0xb75d4941), TOBN(0x98900586, 0x2d501ff0), + TOBN(0x9f16d40e, 0xeb7974cf), TOBN(0x1033860b, 0xcdd8c115), + TOBN(0xb6c69ac8, 0x2094cec3), TOBN(0x9976fb88, 0x403b770c), + TOBN(0x1dea026c, 0x4859590d), TOBN(0xb6acbb46, 0x8562d1fd), + TOBN(0x7cd6c461, 0x44569d85), TOBN(0xc3190a36, 0x97f0891d), + TOBN(0xc6f53195, 0x48d5a17d), TOBN(0x7d919966, 0xd749abc8), + TOBN(0x65104837, 0xdd1c8a20), TOBN(0x7e5410c8, 0x2f683419), + TOBN(0x958c3ca8, 0xbe94022e), TOBN(0x605c3197, 0x6145dac2), + TOBN(0x3fc07501, 0x01683d54), TOBN(0x1d7127c5, 0x595b1234), + TOBN(0x10b8f87c, 0x9481277f), TOBN(0x677db2a8, 0xe65a1adb), + TOBN(0xec2fccaa, 0xddce3345), TOBN(0x2a6811b7, 0x012a4350), + TOBN(0x96760ff1, 0xac598bdc), TOBN(0x054d652a, 0xd1bf4128), + TOBN(0x0a1151d4, 0x92a21005), TOBN(0xad7f3971, 0x33110fdf), + TOBN(0x8c95928c, 0x1960100f), TOBN(0x6c91c825, 0x7bf03362), + TOBN(0xc8c8b2a2, 0xce309f06), TOBN(0xfdb27b59, 0xca27204b), + TOBN(0xd223eaa5, 0x0848e32e), TOBN(0xb93e4b2e, 0xe7bfaf1e), + TOBN(0xc5308ae6, 0x44aa3ded), TOBN(0x317a666a, 0xc015d573), + TOBN(0xc888ce23, 0x1a979707), TOBN(0xf141c1e6, 0x0d5c4958), + TOBN(0xb53b7de5, 0x61906373), TOBN(0x858dbade, 0xeb999595), + TOBN(0x8cbb47b2, 0xa59e5c36), TOBN(0x660318b3, 0xdcf4e842), + TOBN(0xbd161ccd, 0x12ba4b7a), TOBN(0xf399daab, 0xf8c8282a), + TOBN(0x1587633a, 0xeeb2130d), TOBN(0xa465311a, 0xda38dd7d), + TOBN(0x5f75eec8, 0x64d3779b), TOBN(0x3c5d0476, 0xad64c171), + TOBN(0x87410371, 0x2a914428), TOBN(0x8096a891, 0x90e2fc29), + TOBN(0xd3d2ae9d, 0x23b3ebc2), TOBN(0x90bdd6db, 0xa580cfd6), + TOBN(0x52dbb7f3, 0xc5b01f6c), TOBN(0xe68eded4, 0xe102a2dc), + TOBN(0x17785b77, 0x99eb6df0), TOBN(0x26c3cc51, 0x7386b779), + TOBN(0x345ed988, 0x6417a48e), TOBN(0xe990b4e4, 0x07d6ef31), + TOBN(0x0f456b7e, 0x2586abba), TOBN(0x239ca6a5, 0x59c96e9a), + TOBN(0xe327459c, 0xe2eb4206), TOBN(0x3a4c3313, 0xa002b90a), + TOBN(0x2a114806, 0xf6a3f6fb), TOBN(0xad5cad2f, 0x85c251dd), + TOBN(0x92c1f613, 0xf5a784d3), TOBN(0xec7bfacf, 0x349766d5), + TOBN(0x04b3cd33, 0x3e23cb3b), TOBN(0x3979fe84, 0xc5a64b2d), + TOBN(0x192e2720, 0x7e589106), TOBN(0xa60c43d1, 0xa15b527f), + TOBN(0x2dae9082, 0xbe7cf3a6), TOBN(0xcc86ba92, 0xbc967274), + TOBN(0xf28a2ce8, 0xaea0a8a9), TOBN(0x404ca6d9, 0x6ee988b3), + TOBN(0xfd7e9c5d, 0x005921b8), TOBN(0xf56297f1, 0x44e79bf9), + TOBN(0xa163b460, 0x0d75ddc2), TOBN(0x30b23616, 0xa1f2be87), + TOBN(0x4b070d21, 0xbfe50e2b), TOBN(0x7ef8cfd0, 0xe1bfede1), + TOBN(0xadba0011, 0x2aac4ae0), TOBN(0x2a3e7d01, 0xb9ebd033), + TOBN(0x995277ec, 0xe38d9d1c), TOBN(0xb500249e, 0x9c5d2de3), + TOBN(0x8912b820, 0xf13ca8c9), TOBN(0xc8798114, 0x877793af), + TOBN(0x19e6125d, 0xec3f1dec), TOBN(0x07b1f040, 0x911178da), + TOBN(0xd93ededa, 0x904a6738), TOBN(0x55187a5a, 0x0bebedcd), + TOBN(0xf7d04722, 0xeb329d41), TOBN(0xf449099e, 0xf170b391), + TOBN(0xfd317a69, 0xca99f828), TOBN(0x50c3db2b, 0x34a4976d), + TOBN(0xe9ba7784, 0x3757b392), TOBN(0x326caefd, 0xaa3ca05a), + TOBN(0x78e5293b, 0xf1e593d4), TOBN(0x7842a937, 0x0d98fd13), + TOBN(0xe694bf96, 0x5f96b10d), TOBN(0x373a9df6, 0x06a8cd05), + TOBN(0x997d1e51, 0xe8f0c7fc), TOBN(0x1d019790, 0x63fd972e), + TOBN(0x0064d858, 0x5499fb32), TOBN(0x7b67bad9, 0x77a8aeb7), + TOBN(0x1d3eb977, 0x2d08eec5), TOBN(0x5fc047a6, 0xcbabae1d), + TOBN(0x0577d159, 0xe54a64bb), TOBN(0x8862201b, 0xc43497e4), + TOBN(0xad6b4e28, 0x2ce0608d), TOBN(0x8b687b7d, 0x0b167aac), + TOBN(0x6ed4d367, 0x8b2ecfa9), TOBN(0x24dfe62d, 0xa90c3c38), + TOBN(0xa1862e10, 0x3fe5c42b), TOBN(0x1ca73dca, 0xd5732a9f), + TOBN(0x35f038b7, 0x76bb87ad), TOBN(0x674976ab, 0xf242b81f), + TOBN(0x4f2bde7e, 0xb0fd90cd), TOBN(0x6efc172e, 0xa7fdf092), + TOBN(0x3806b69b, 0x92222f1f), TOBN(0x5a2459ca, 0x6cf7ae70), + TOBN(0x6789f69c, 0xa85217ee), TOBN(0x5f232b5e, 0xe3dc85ac), + TOBN(0x660e3ec5, 0x48e9e516), TOBN(0x124b4e47, 0x3197eb31), + TOBN(0x10a0cb13, 0xaafcca23), TOBN(0x7bd63ba4, 0x8213224f), + TOBN(0xaffad7cc, 0x290a7f4f), TOBN(0x6b409c9e, 0x0286b461), + TOBN(0x58ab809f, 0xffa407af), TOBN(0xc3122eed, 0xc68ac073), + TOBN(0x17bf9e50, 0x4ef24d7e), TOBN(0x5d929794, 0x3e2a5811), + TOBN(0x519bc867, 0x02902e01), TOBN(0x76bba5da, 0x39c8a851), + TOBN(0xe9f9669c, 0xda94951e), TOBN(0x4b6af58d, 0x66b8d418), + TOBN(0xfa321074, 0x17d426a4), TOBN(0xc78e66a9, 0x9dde6027), + TOBN(0x0516c083, 0x4a53b964), TOBN(0xfc659d38, 0xff602330), + TOBN(0x0ab55e5c, 0x58c5c897), TOBN(0x985099b2, 0x838bc5df), + TOBN(0x061d9efc, 0xc52fc238), TOBN(0x712b2728, 0x6ac1da3f), + TOBN(0xfb658149, 0x9283fe08), TOBN(0x4954ac94, 0xb8aaa2f7), + TOBN(0x85c0ada4, 0x7fb2e74f), TOBN(0xee8ba98e, 0xb89926b0), + TOBN(0xe4f9d37d, 0x23d1af5b), TOBN(0x14ccdbf9, 0xba9b015e), + TOBN(0xb674481b, 0x7bfe7178), TOBN(0x4e1debae, 0x65405868), + TOBN(0x061b2821, 0xc48c867d), TOBN(0x69c15b35, 0x513b30ea), + TOBN(0x3b4a1666, 0x36871088), TOBN(0xe5e29f5d, 0x1220b1ff), + TOBN(0x4b82bb35, 0x233d9f4d), TOBN(0x4e076333, 0x18cdc675)} + , + {TOBN(0x0d53f5c7, 0xa3e6fced), TOBN(0xe8cbbdd5, 0xf45fbdeb), + TOBN(0xf85c01df, 0x13339a70), TOBN(0x0ff71880, 0x142ceb81), + TOBN(0x4c4e8774, 0xbd70437a), TOBN(0x5fb32891, 0xba0bda6a), + TOBN(0x1cdbebd2, 0xf18bd26e), TOBN(0x2f9526f1, 0x03a9d522), + TOBN(0x40ce3051, 0x92c4d684), TOBN(0x8b04d725, 0x7612efcd), + TOBN(0xb9dcda36, 0x6f9cae20), TOBN(0x0edc4d24, 0xf058856c), + TOBN(0x64f2e6bf, 0x85427900), TOBN(0x3de81295, 0xdc09dfea), + TOBN(0xd41b4487, 0x379bf26c), TOBN(0x50b62c6d, 0x6df135a9), + TOBN(0xd4f8e3b4, 0xc72dfe67), TOBN(0xc416b0f6, 0x90e19fdf), + TOBN(0x18b9098d, 0x4c13bd35), TOBN(0xac11118a, 0x15b8cb9e), + TOBN(0xf598a318, 0xf0062841), TOBN(0xbfe0602f, 0x89f356f4), + TOBN(0x7ae3637e, 0x30177a0c), TOBN(0x34097747, 0x61136537), + TOBN(0x0db2fb5e, 0xd005832a), TOBN(0x5f5efd3b, 0x91042e4f), + TOBN(0x8c4ffdc6, 0xed70f8ca), TOBN(0xe4645d0b, 0xb52da9cc), + TOBN(0x9596f58b, 0xc9001d1f), TOBN(0x52c8f0bc, 0x4e117205), + TOBN(0xfd4aa0d2, 0xe398a084), TOBN(0x815bfe3a, 0x104f49de), + TOBN(0x97e5443f, 0x23885e5f), TOBN(0xf72f8f99, 0xe8433aab), + TOBN(0xbd00b154, 0xe4d4e604), TOBN(0xd0b35e6a, 0xe5e173ff), + TOBN(0x57b2a048, 0x9164722d), TOBN(0x3e3c665b, 0x88761ec8), + TOBN(0x6bdd1397, 0x3da83832), TOBN(0x3c8b1a1e, 0x73dafe3b), + TOBN(0x4497ace6, 0x54317cac), TOBN(0xbe600ab9, 0x521771b3), + TOBN(0xb42e409e, 0xb0dfe8b8), TOBN(0x386a67d7, 0x3942310f), + TOBN(0x25548d8d, 0x4431cc28), TOBN(0xa7cff142, 0x985dc524), + TOBN(0x4d60f5a1, 0x93c4be32), TOBN(0x83ebd5c8, 0xd071c6e1), + TOBN(0xba3a80a7, 0xb1fd2b0b), TOBN(0x9b3ad396, 0x5bec33e8), + TOBN(0xb3868d61, 0x79743fb3), TOBN(0xcfd169fc, 0xfdb462fa), + TOBN(0xd3b499d7, 0x9ce0a6af), TOBN(0x55dc1cf1, 0xe42d3ff8), + TOBN(0x04fb9e6c, 0xc6c3e1b2), TOBN(0x47e6961d, 0x6f69a474), + TOBN(0x54eb3acc, 0xe548b37b), TOBN(0xb38e7542, 0x84d40549), + TOBN(0x8c3daa51, 0x7b341b4f), TOBN(0x2f6928ec, 0x690bf7fa), + TOBN(0x0496b323, 0x86ce6c41), TOBN(0x01be1c55, 0x10adadcd), + TOBN(0xc04e67e7, 0x4bb5faf9), TOBN(0x3cbaf678, 0xe15c9985), + TOBN(0x8cd12145, 0x50ca4247), TOBN(0xba1aa47a, 0xe7dd30aa), + TOBN(0x2f81ddf1, 0xe58fee24), TOBN(0x03452936, 0xeec9b0e8), + TOBN(0x8bdc3b81, 0x243aea96), TOBN(0x9a2919af, 0x15c3d0e5), + TOBN(0x9ea640ec, 0x10948361), TOBN(0x5ac86d5b, 0x6e0bcccf), + TOBN(0xf892d918, 0xc36cf440), TOBN(0xaed3e837, 0xc939719c), + TOBN(0xb07b08d2, 0xc0218b64), TOBN(0x6f1bcbba, 0xce9790dd), + TOBN(0x4a84d6ed, 0x60919b8e), TOBN(0xd8900791, 0x8ac1f9eb), + TOBN(0xf84941aa, 0x0dd5daef), TOBN(0xb22fe40a, 0x67fd62c5), + TOBN(0x97e15ba2, 0x157f2db3), TOBN(0xbda2fc8f, 0x8e28ca9c), + TOBN(0x5d050da4, 0x37b9f454), TOBN(0x3d57eb57, 0x2379d72e), + TOBN(0xe9b5eba2, 0xfb5ee997), TOBN(0x01648ca2, 0xe11538ca), + TOBN(0x32bb76f6, 0xf6327974), TOBN(0x338f14b8, 0xff3f4bb7), + TOBN(0x524d226a, 0xd7ab9a2d), TOBN(0x9c00090d, 0x7dfae958), + TOBN(0x0ba5f539, 0x8751d8c2), TOBN(0x8afcbcdd, 0x3ab8262d), + TOBN(0x57392729, 0xe99d043b), TOBN(0xef51263b, 0xaebc943a), + TOBN(0x9feace93, 0x20862935), TOBN(0x639efc03, 0xb06c817b), + TOBN(0x1fe054b3, 0x66b4be7a), TOBN(0x3f25a9de, 0x84a37a1e), + TOBN(0xf39ef1ad, 0x78d75cd9), TOBN(0xd7b58f49, 0x5062c1b5), + TOBN(0x6f74f9a9, 0xff563436), TOBN(0xf718ff29, 0xe8af51e7), + TOBN(0x5234d313, 0x15e97fec), TOBN(0xb6a8e2b1, 0x292f1c0a), + TOBN(0xa7f53aa8, 0x327720c1), TOBN(0x956ca322, 0xba092cc8), + TOBN(0x8f03d64a, 0x28746c4d), TOBN(0x51fe1782, 0x66d0d392), + TOBN(0xd19b34db, 0x3c832c80), TOBN(0x60dccc5c, 0x6da2e3b4), + TOBN(0x245dd62e, 0x0a104ccc), TOBN(0xa7ab1de1, 0x620b21fd), + TOBN(0xb293ae0b, 0x3893d123), TOBN(0xf7b75783, 0xb15ee71c), + TOBN(0x5aa3c614, 0x42a9468b), TOBN(0xd686123c, 0xdb15d744), + TOBN(0x8c616891, 0xa7ab4116), TOBN(0x6fcd72c8, 0xa4e6a459), + TOBN(0xac219110, 0x77e5fad7), TOBN(0xfb6a20e7, 0x704fa46b), + TOBN(0xe839be7d, 0x341d81dc), TOBN(0xcddb6889, 0x32148379), + TOBN(0xda6211a1, 0xf7026ead), TOBN(0xf3b2575f, 0xf4d1cc5e), + TOBN(0x40cfc8f6, 0xa7a73ae6), TOBN(0x83879a5e, 0x61d5b483), + TOBN(0xc5acb1ed, 0x41a50ebc), TOBN(0x59a60cc8, 0x3c07d8fa), + TOBN(0x1b73bdce, 0xb1876262), TOBN(0x2b0d79f0, 0x12af4ee9), + TOBN(0x8bcf3b0b, 0xd46e1d07), TOBN(0x17d6af9d, 0xe45d152f), + TOBN(0x73520461, 0x6d736451), TOBN(0x43cbbd97, 0x56b0bf5a), + TOBN(0xb0833a5b, 0xd5999b9d), TOBN(0x702614f0, 0xeb72e398), + TOBN(0x0aadf01a, 0x59c3e9f8), TOBN(0x40200e77, 0xce6b3d16), + TOBN(0xda22bdd3, 0xdeddafad), TOBN(0x76dedaf4, 0x310d72e1), + TOBN(0x49ef807c, 0x4bc2e88f), TOBN(0x6ba81291, 0x146dd5a5), + TOBN(0xa1a4077a, 0x7d8d59e9), TOBN(0x87b6a2e7, 0x802db349), + TOBN(0xd5679997, 0x1b4e598e), TOBN(0xf499ef1f, 0x06fe4b1d), + TOBN(0x3978d3ae, 0xfcb267c5), TOBN(0xb582b557, 0x235786d0), + TOBN(0x32b3b2ca, 0x1715cb07), TOBN(0x4c3de6a2, 0x8480241d), + TOBN(0x63b5ffed, 0xcb571ecd), TOBN(0xeaf53900, 0xed2fe9a9), + TOBN(0xdec98d4a, 0xc3b81990), TOBN(0x1cb83722, 0x9e0cc8fe), + TOBN(0xfe0b0491, 0xd2b427b9), TOBN(0x0f2386ac, 0xe983a66c), + TOBN(0x930c4d1e, 0xb3291213), TOBN(0xa2f82b2e, 0x59a62ae4), + TOBN(0x77233853, 0xf93e89e3), TOBN(0x7f8063ac, 0x11777c7f), + TOBN(0xff0eb567, 0x59ad2877), TOBN(0x6f454642, 0x9865c754), + TOBN(0xe6fe701a, 0x236e9a84), TOBN(0xc586ef16, 0x06e40fc3), + TOBN(0x3f62b6e0, 0x24bafad9), TOBN(0xc8b42bd2, 0x64da906a), + TOBN(0xc98e1eb4, 0xda3276a0), TOBN(0x30d0e5fc, 0x06cbf852), + TOBN(0x1b6b2ae1, 0xe8b4dfd4), TOBN(0xd754d5c7, 0x8301cbac), + TOBN(0x66097629, 0x112a39ac), TOBN(0xf86b5999, 0x93ba4ab9), + TOBN(0x26c9dea7, 0x99f9d581), TOBN(0x0473b1a8, 0xc2fafeaa), + TOBN(0x1469af55, 0x3b2505a5), TOBN(0x227d16d7, 0xd6a43323), + TOBN(0x3316f73c, 0xad3d97f9), TOBN(0x52bf3bb5, 0x1f137455), + TOBN(0x953eafeb, 0x09954e7c), TOBN(0xa721dfed, 0xdd732411), + TOBN(0xb4929821, 0x141d4579), TOBN(0x3411321c, 0xaa3bd435), + TOBN(0xafb355aa, 0x17fa6015), TOBN(0xb4e7ef4a, 0x18e42f0e), + TOBN(0x604ac97c, 0x59371000), TOBN(0xe1c48c70, 0x7f759c18), + TOBN(0x3f62ecc5, 0xa5db6b65), TOBN(0x0a78b173, 0x38a21495), + TOBN(0x6be1819d, 0xbcc8ad94), TOBN(0x70dc04f6, 0xd89c3400), + TOBN(0x462557b4, 0xa6b4840a), TOBN(0x544c6ade, 0x60bd21c0), + TOBN(0x6a00f24e, 0x907a544b), TOBN(0xa7520dcb, 0x313da210), + TOBN(0xfe939b75, 0x11e4994b), TOBN(0x918b6ba6, 0xbc275d70), + TOBN(0xd3e5e0fc, 0x644be892), TOBN(0x707a9816, 0xfdaf6c42), + TOBN(0x60145567, 0xf15c13fe), TOBN(0x4818ebaa, 0xe130a54a), + TOBN(0x28aad3ad, 0x58d2f767), TOBN(0xdc5267fd, 0xd7e7c773), + TOBN(0x4919cc88, 0xc3afcc98), TOBN(0xaa2e6ab0, 0x2db8cd4b), + TOBN(0xd46fec04, 0xd0c63eaa), TOBN(0xa1cb92c5, 0x19ffa832), + TOBN(0x678dd178, 0xe43a631f), TOBN(0xfb5ae1cd, 0x3dc788b3), + TOBN(0x68b4fb90, 0x6e77de04), TOBN(0x7992bcf0, 0xf06dbb97), + TOBN(0x896e6a13, 0xc417c01d), TOBN(0x8d96332c, 0xb956be01), + TOBN(0x902fc93a, 0x413aa2b9), TOBN(0x99a4d915, 0xfc98c8a5), + TOBN(0x52c29407, 0x565f1137), TOBN(0x4072690f, 0x21e4f281), + TOBN(0x36e607cf, 0x02ff6072), TOBN(0xa47d2ca9, 0x8ad98cdc), + TOBN(0xbf471d1e, 0xf5f56609), TOBN(0xbcf86623, 0xf264ada0), + TOBN(0xb70c0687, 0xaa9e5cb6), TOBN(0xc98124f2, 0x17401c6c), + TOBN(0x8189635f, 0xd4a61435), TOBN(0xd28fb8af, 0xa9d98ea6), + TOBN(0xb9a67c2a, 0x40c251f8), TOBN(0x88cd5d87, 0xa2da44be), + TOBN(0x437deb96, 0xe09b5423), TOBN(0x150467db, 0x64287dc1), + TOBN(0xe161debb, 0xcdabb839), TOBN(0xa79e9742, 0xf1839a3e), + TOBN(0xbb8dd3c2, 0x652d202b), TOBN(0x7b3e67f7, 0xe9f97d96), + TOBN(0x5aa5d78f, 0xb1cb6ac9), TOBN(0xffa13e8e, 0xca1d0d45), + TOBN(0x369295dd, 0x2ba5bf95), TOBN(0xd68bd1f8, 0x39aff05e), + TOBN(0xaf0d86f9, 0x26d783f2), TOBN(0x543a59b3, 0xfc3aafc1), + TOBN(0x3fcf81d2, 0x7b7da97c), TOBN(0xc990a056, 0xd25dee46), + TOBN(0x3e6775b8, 0x519cce2c), TOBN(0xfc9af71f, 0xae13d863), + TOBN(0x774a4a6f, 0x47c1605c), TOBN(0x46ba4245, 0x2fd205e8), + TOBN(0xa06feea4, 0xd3fd524d), TOBN(0x1e724641, 0x6de1acc2), + TOBN(0xf53816f1, 0x334e2b42), TOBN(0x49e5918e, 0x922f0024), + TOBN(0x439530b6, 0x65c7322d), TOBN(0xcf12cc01, 0xb3c1b3fb), + TOBN(0xc70b0186, 0x0172f685), TOBN(0xb915ee22, 0x1b58391d), + TOBN(0x9afdf03b, 0xa317db24), TOBN(0x87dec659, 0x17b8ffc4), + TOBN(0x7f46597b, 0xe4d3d050), TOBN(0x80a1c1ed, 0x006500e7), + TOBN(0x84902a96, 0x78bf030e), TOBN(0xfb5e9c9a, 0x50560148), + TOBN(0x6dae0a92, 0x63362426), TOBN(0xdcaeecf4, 0xa9e30c40), + TOBN(0xc0d887bb, 0x518d0c6b), TOBN(0x99181152, 0xcb985b9d), + TOBN(0xad186898, 0xef7bc381), TOBN(0x18168ffb, 0x9ee46201), + TOBN(0x9a04cdaa, 0x2502753c), TOBN(0xbb279e26, 0x51407c41), + TOBN(0xeacb03aa, 0xf23564e5), TOBN(0x18336582, 0x71e61016), + TOBN(0x8684b8c4, 0xeb809877), TOBN(0xb336e18d, 0xea0e672e), + TOBN(0xefb601f0, 0x34ee5867), TOBN(0x2733edbe, 0x1341cfd1), + TOBN(0xb15e809a, 0x26025c3c), TOBN(0xe6e981a6, 0x9350df88), + TOBN(0x92376237, 0x8502fd8e), TOBN(0x4791f216, 0x0c12be9b), + TOBN(0xb7256789, 0x25f02425), TOBN(0xec863194, 0x7a974443), + TOBN(0x7c0ce882, 0xfb41cc52), TOBN(0xc266ff7e, 0xf25c07f2), + TOBN(0x3d4da8c3, 0x017025f3), TOBN(0xefcf628c, 0xfb9579b4), + TOBN(0x5c4d0016, 0x1f3716ec), TOBN(0x9c27ebc4, 0x6801116e), + TOBN(0x5eba0ea1, 0x1da1767e), TOBN(0xfe151452, 0x47004c57), + TOBN(0x3ace6df6, 0x8c2373b7), TOBN(0x75c3dffe, 0x5dbc37ac), + TOBN(0x3dc32a73, 0xddc925fc), TOBN(0xb679c841, 0x2f65ee0b), + TOBN(0x715a3295, 0x451cbfeb), TOBN(0xd9889768, 0xf76e9a29), + TOBN(0xec20ce7f, 0xb28ad247), TOBN(0xe99146c4, 0x00894d79), + TOBN(0x71457d7c, 0x9f5e3ea7), TOBN(0x097b2662, 0x38030031), + TOBN(0xdb7f6ae6, 0xcf9f82a8), TOBN(0x319decb9, 0x438f473a), + TOBN(0xa63ab386, 0x283856c3), TOBN(0x13e3172f, 0xb06a361b), + TOBN(0x2959f8dc, 0x7d5a006c), TOBN(0x2dbc27c6, 0x75fba752), + TOBN(0xc1227ab2, 0x87c22c9e), TOBN(0x06f61f75, 0x71a268b2), + TOBN(0x1b6bb971, 0x04779ce2), TOBN(0xaca83812, 0x0aadcb1d), + TOBN(0x297ae0bc, 0xaeaab2d5), TOBN(0xa5c14ee7, 0x5bfb9f13), + TOBN(0xaa00c583, 0xf17a62c7), TOBN(0x39eb962c, 0x173759f6), + TOBN(0x1eeba1d4, 0x86c9a88f), TOBN(0x0ab6c37a, 0xdf016c5e), + TOBN(0xa2a147db, 0xa28a0749), TOBN(0x246c20d6, 0xee519165), + TOBN(0x5068d1b1, 0xd3810715), TOBN(0xb1e7018c, 0x748160b9), + TOBN(0x03f5b1fa, 0xf380ff62), TOBN(0xef7fb1dd, 0xf3cb2c1e), + TOBN(0xeab539a8, 0xfc91a7da), TOBN(0x83ddb707, 0xf3f9b561), + TOBN(0xc550e211, 0xfe7df7a4), TOBN(0xa7cd07f2, 0x063f6f40), + TOBN(0xb0de3635, 0x2976879c), TOBN(0xb5f83f85, 0xe55741da), + TOBN(0x4ea9d25e, 0xf3d8ac3d), TOBN(0x6fe2066f, 0x62819f02), + TOBN(0x4ab2b9c2, 0xcef4a564), TOBN(0x1e155d96, 0x5ffa2de3), + TOBN(0x0eb0a19b, 0xc3a72d00), TOBN(0x4037665b, 0x8513c31b), + TOBN(0x2fb2b6bf, 0x04c64637), TOBN(0x45c34d6e, 0x08cdc639), + TOBN(0x56f1e10f, 0xf01fd796), TOBN(0x4dfb8101, 0xfe3667b8), + TOBN(0xe0eda253, 0x9021d0c0), TOBN(0x7a94e9ff, 0x8a06c6ab), + TOBN(0x2d3bb0d9, 0xbb9aa882), TOBN(0xea20e4e5, 0xec05fd10), + TOBN(0xed7eeb5f, 0x1a1ca64e), TOBN(0x2fa6b43c, 0xc6327cbd), + TOBN(0xb577e3cf, 0x3aa91121), TOBN(0x8c6bd5ea, 0x3a34079b), + TOBN(0xd7e5ba39, 0x60e02fc0), TOBN(0xf16dd2c3, 0x90141bf8), + TOBN(0xb57276d9, 0x80101b98), TOBN(0x760883fd, 0xb82f0f66), + TOBN(0x89d7de75, 0x4bc3eff3), TOBN(0x03b60643, 0x5dc2ab40), + TOBN(0xcd6e53df, 0xe05beeac), TOBN(0xf2f1e862, 0xbc3325cd), + TOBN(0xdd0f7921, 0x774f03c3), TOBN(0x97ca7221, 0x4552cc1b), + TOBN(0x5a0d6afe, 0x1cd19f72), TOBN(0xa20915dc, 0xf183fbeb), + TOBN(0x9fda4b40, 0x832c403c), TOBN(0x32738edd, 0xbe425442), + TOBN(0x469a1df6, 0xb5eccf1a), TOBN(0x4b5aff42, 0x28bbe1f0), + TOBN(0x31359d7f, 0x570dfc93), TOBN(0xa18be235, 0xf0088628), + TOBN(0xa5b30fba, 0xb00ed3a9), TOBN(0x34c61374, 0x73cdf8be), + TOBN(0x2c5c5f46, 0xabc56797), TOBN(0x5cecf93d, 0xb82a8ae2), + TOBN(0x7d3dbe41, 0xa968fbf0), TOBN(0xd23d4583, 0x1a5c7f3d), + TOBN(0xf28f69a0, 0xc087a9c7), TOBN(0xc2d75471, 0x474471ca), + TOBN(0x36ec9f4a, 0x4eb732ec), TOBN(0x6c943bbd, 0xb1ca6bed), + TOBN(0xd64535e1, 0xf2457892), TOBN(0x8b84a8ea, 0xf7e2ac06), + TOBN(0xe0936cd3, 0x2499dd5f), TOBN(0x12053d7e, 0x0ed04e57), + TOBN(0x4bdd0076, 0xe4305d9d), TOBN(0x34a527b9, 0x1f67f0a2), + TOBN(0xe79a4af0, 0x9cec46ea), TOBN(0xb15347a1, 0x658b9bc7), + TOBN(0x6bd2796f, 0x35af2f75), TOBN(0xac957990, 0x4051c435), + TOBN(0x2669dda3, 0xc33a655d), TOBN(0x5d503c2e, 0x88514aa3), + TOBN(0xdfa11337, 0x3753dd41), TOBN(0x3f054673, 0x0b754f78), + TOBN(0xbf185677, 0x496125bd), TOBN(0xfb0023c8, 0x3775006c), + TOBN(0xfa0f072f, 0x3a037899), TOBN(0x4222b6eb, 0x0e4aea57), + TOBN(0x3dde5e76, 0x7866d25a), TOBN(0xb6eb04f8, 0x4837aa6f), + TOBN(0x5315591a, 0x2cf1cdb8), TOBN(0x6dfb4f41, 0x2d4e683c), + TOBN(0x7e923ea4, 0x48ee1f3a), TOBN(0x9604d9f7, 0x05a2afd5), + TOBN(0xbe1d4a33, 0x40ea4948), TOBN(0x5b45f1f4, 0xb44cbd2f), + TOBN(0x5faf8376, 0x4acc757e), TOBN(0xa7cf9ab8, 0x63d68ff7), + TOBN(0x8ad62f69, 0xdf0e404b), TOBN(0xd65f33c2, 0x12bdafdf), + TOBN(0xc365de15, 0xa377b14e), TOBN(0x6bf5463b, 0x8e39f60c), + TOBN(0x62030d2d, 0x2ce68148), TOBN(0xd95867ef, 0xe6f843a8), + TOBN(0xd39a0244, 0xef5ab017), TOBN(0x0bd2d8c1, 0x4ab55d12), + TOBN(0xc9503db3, 0x41639169), TOBN(0x2d4e25b0, 0xf7660c8a), + TOBN(0x760cb3b5, 0xe224c5d7), TOBN(0xfa3baf8c, 0x68616919), + TOBN(0x9fbca113, 0x8d142552), TOBN(0x1ab18bf1, 0x7669ebf5), + TOBN(0x55e6f53e, 0x9bdf25dd), TOBN(0x04cc0bf3, 0xcb6cd154), + TOBN(0x595bef49, 0x95e89080), TOBN(0xfe9459a8, 0x104a9ac1), + TOBN(0xad2d89ca, 0xcce9bb32), TOBN(0xddea65e1, 0xf7de8285), + TOBN(0x62ed8c35, 0xb351bd4b), TOBN(0x4150ff36, 0x0c0e19a7), + TOBN(0x86e3c801, 0x345f4e47), TOBN(0x3bf21f71, 0x203a266c), + TOBN(0x7ae110d4, 0x855b1f13), TOBN(0x5d6aaf6a, 0x07262517), + TOBN(0x1e0f12e1, 0x813d28f1), TOBN(0x6000e11d, 0x7ad7a523), + TOBN(0xc7d8deef, 0xc744a17b), TOBN(0x1e990b48, 0x14c05a00), + TOBN(0x68fddaee, 0x93e976d5), TOBN(0x696241d1, 0x46610d63), + TOBN(0xb204e7c3, 0x893dda88), TOBN(0x8bccfa65, 0x6a3a6946), + TOBN(0xb59425b4, 0xc5cd1411), TOBN(0x701b4042, 0xff3658b1), + TOBN(0xe3e56bca, 0x4784cf93), TOBN(0x27de5f15, 0x8fe68d60), + TOBN(0x4ab9cfce, 0xf8d53f19), TOBN(0xddb10311, 0xa40a730d), + TOBN(0x6fa73cd1, 0x4eee0a8a), TOBN(0xfd548748, 0x5249719d), + TOBN(0x49d66316, 0xa8123ef0), TOBN(0x73c32db4, 0xe7f95438), + TOBN(0x2e2ed209, 0x0d9e7854), TOBN(0xf98a9329, 0x9d9f0507), + TOBN(0xc5d33cf6, 0x0c6aa20a), TOBN(0x9a32ba14, 0x75279bb2), + TOBN(0x7e3202cb, 0x774a7307), TOBN(0x64ed4bc4, 0xe8c42dbd), + TOBN(0xc20f1a06, 0xd4caed0d), TOBN(0xb8021407, 0x171d22b3), + TOBN(0xd426ca04, 0xd13268d7), TOBN(0x92377007, 0x25f4d126), + TOBN(0x4204cbc3, 0x71f21a85), TOBN(0x18461b7a, 0xf82369ba), + TOBN(0xc0c07d31, 0x3fc858f9), TOBN(0x5deb5a50, 0xe2bab569), + TOBN(0xd5959d46, 0xd5eea89e), TOBN(0xfdff8424, 0x08437f4b), + TOBN(0xf21071e4, 0x3cfe254f), TOBN(0x72417696, 0x95468321), + TOBN(0x5d8288b9, 0x102cae3e), TOBN(0x2d143e3d, 0xf1965dff), + TOBN(0x00c9a376, 0xa078d847), TOBN(0x6fc0da31, 0x26028731), + TOBN(0xa2baeadf, 0xe45083a2), TOBN(0x66bc7218, 0x5e5b4bcd), + TOBN(0x2c826442, 0xd04b8e7f), TOBN(0xc19f5451, 0x6c4b586b), + TOBN(0x60182c49, 0x5b7eeed5), TOBN(0xd9954ecd, 0x7aa9dfa1), + TOBN(0xa403a8ec, 0xc73884ad), TOBN(0x7fb17de2, 0x9bb39041), + TOBN(0x694b64c5, 0xabb020e8), TOBN(0x3d18c184, 0x19c4eec7), + TOBN(0x9c4673ef, 0x1c4793e5), TOBN(0xc7b8aeb5, 0x056092e6), + TOBN(0x3aa1ca43, 0xf0f8c16b), TOBN(0x224ed5ec, 0xd679b2f6), + TOBN(0x0d56eeaf, 0x55a205c9), TOBN(0xbfe115ba, 0x4b8e028b), + TOBN(0x97e60849, 0x3927f4fe), TOBN(0xf91fbf94, 0x759aa7c5), + TOBN(0x985af769, 0x6be90a51), TOBN(0xc1277b78, 0x78ccb823), + TOBN(0x395b656e, 0xe7a75952), TOBN(0x00df7de0, 0x928da5f5), + TOBN(0x09c23175, 0x4ca4454f), TOBN(0x4ec971f4, 0x7aa2d3c1), + TOBN(0x45c3c507, 0xe75d9ccc), TOBN(0x63b7be8a, 0x3dc90306), + TOBN(0x37e09c66, 0x5db44bdc), TOBN(0x50d60da1, 0x6841c6a2), + TOBN(0x6f9b65ee, 0x08df1b12), TOBN(0x38734879, 0x7ff089df), + TOBN(0x9c331a66, 0x3fe8013d), TOBN(0x017f5de9, 0x5f42fcc8), + TOBN(0x43077866, 0xe8e57567), TOBN(0xc9f781ce, 0xf9fcdb18), + TOBN(0x38131dda, 0x9b12e174), TOBN(0x25d84aa3, 0x8a03752a), + TOBN(0x45e09e09, 0x4d0c0ce2), TOBN(0x1564008b, 0x92bebba5), + TOBN(0xf7e8ad31, 0xa87284c7), TOBN(0xb7c4b46c, 0x97e7bbaa), + TOBN(0x3e22a7b3, 0x97acf4ec), TOBN(0x0426c400, 0x5ea8b640), + TOBN(0x5e3295a6, 0x4e969285), TOBN(0x22aabc59, 0xa6a45670), + TOBN(0xb929714c, 0x5f5942bc), TOBN(0x9a6168bd, 0xfa3182ed), + TOBN(0x2216a665, 0x104152ba), TOBN(0x46908d03, 0xb6926368)} + , + {TOBN(0xa9f5d874, 0x5a1251fb), TOBN(0x967747a8, 0xc72725c7), + TOBN(0x195c33e5, 0x31ffe89e), TOBN(0x609d210f, 0xe964935e), + TOBN(0xcafd6ca8, 0x2fe12227), TOBN(0xaf9b5b96, 0x0426469d), + TOBN(0x2e9ee04c, 0x5693183c), TOBN(0x1084a333, 0xc8146fef), + TOBN(0x96649933, 0xaed1d1f7), TOBN(0x566eaff3, 0x50563090), + TOBN(0x345057f0, 0xad2e39cf), TOBN(0x148ff65b, 0x1f832124), + TOBN(0x042e89d4, 0xcf94cf0d), TOBN(0x319bec84, 0x520c58b3), + TOBN(0x2a267626, 0x5361aa0d), TOBN(0xc86fa302, 0x8fbc87ad), + TOBN(0xfc83d2ab, 0x5c8b06d5), TOBN(0xb1a785a2, 0xfe4eac46), + TOBN(0xb99315bc, 0x846f7779), TOBN(0xcf31d816, 0xef9ea505), + TOBN(0x2391fe6a, 0x15d7dc85), TOBN(0x2f132b04, 0xb4016b33), + TOBN(0x29547fe3, 0x181cb4c7), TOBN(0xdb66d8a6, 0x650155a1), + TOBN(0x6b66d7e1, 0xadc1696f), TOBN(0x98ebe593, 0x0acd72d0), + TOBN(0x65f24550, 0xcc1b7435), TOBN(0xce231393, 0xb4b9a5ec), + TOBN(0x234a22d4, 0xdb067df9), TOBN(0x98dda095, 0xcaff9b00), + TOBN(0x1bbc75a0, 0x6100c9c1), TOBN(0x1560a9c8, 0x939cf695), + TOBN(0xcf006d3e, 0x99e0925f), TOBN(0x2dd74a96, 0x6322375a), + TOBN(0xc58b446a, 0xb56af5ba), TOBN(0x50292683, 0xe0b9b4f1), + TOBN(0xe2c34cb4, 0x1aeaffa3), TOBN(0x8b17203f, 0x9b9587c1), + TOBN(0x6d559207, 0xead1350c), TOBN(0x2b66a215, 0xfb7f9604), + TOBN(0x0850325e, 0xfe51bf74), TOBN(0x9c4f579e, 0x5e460094), + TOBN(0x5c87b92a, 0x76da2f25), TOBN(0x889de4e0, 0x6febef33), + TOBN(0x6900ec06, 0x646083ce), TOBN(0xbe2a0335, 0xbfe12773), + TOBN(0xadd1da35, 0xc5344110), TOBN(0x757568b7, 0xb802cd20), + TOBN(0x75559779, 0x00f7e6c8), TOBN(0x38e8b94f, 0x0facd2f0), + TOBN(0xfea1f3af, 0x03fde375), TOBN(0x5e11a1d8, 0x75881dfc), + TOBN(0xb3a6b02e, 0xc1e2f2ef), TOBN(0x193d2bbb, 0xc605a6c5), + TOBN(0x325ffeee, 0x339a0b2d), TOBN(0x27b6a724, 0x9e0c8846), + TOBN(0xe4050f1c, 0xf1c367ca), TOBN(0x9bc85a9b, 0xc90fbc7d), + TOBN(0xa373c4a2, 0xe1a11032), TOBN(0xb64232b7, 0xad0393a9), + TOBN(0xf5577eb0, 0x167dad29), TOBN(0x1604f301, 0x94b78ab2), + TOBN(0x0baa94af, 0xe829348b), TOBN(0x77fbd8dd, 0x41654342), + TOBN(0xdab50ea5, 0xb964e39a), TOBN(0xd4c29e3c, 0xd0d3c76e), + TOBN(0x80dae67c, 0x56d11964), TOBN(0x7307a8bf, 0xe5ffcc2f), + TOBN(0x65bbc1aa, 0x91708c3b), TOBN(0xa151e62c, 0x28bf0eeb), + TOBN(0x6cb53381, 0x6fa34db7), TOBN(0x5139e05c, 0xa29403a8), + TOBN(0x6ff651b4, 0x94a7cd2e), TOBN(0x5671ffd1, 0x0699336c), + TOBN(0x6f5fd2cc, 0x979a896a), TOBN(0x11e893a8, 0xd8148cef), + TOBN(0x988906a1, 0x65cf7b10), TOBN(0x81b67178, 0xc50d8485), + TOBN(0x7c0deb35, 0x8a35b3de), TOBN(0x423ac855, 0xc1d29799), + TOBN(0xaf580d87, 0xdac50b74), TOBN(0x28b2b89f, 0x5869734c), + TOBN(0x99a3b936, 0x874e28fb), TOBN(0xbb2c9190, 0x25f3f73a), + TOBN(0x199f6918, 0x84a9d5b7), TOBN(0x7ebe2325, 0x7e770374), + TOBN(0xf442e107, 0x0738efe2), TOBN(0xcf9f3f56, 0xcf9082d2), + TOBN(0x719f69e1, 0x09618708), TOBN(0xcc9e8364, 0xc183f9b1), + TOBN(0xec203a95, 0x366a21af), TOBN(0x6aec5d6d, 0x068b141f), + TOBN(0xee2df78a, 0x994f04e9), TOBN(0xb39ccae8, 0x271245b0), + TOBN(0xb875a4a9, 0x97e43f4f), TOBN(0x507dfe11, 0xdb2cea98), + TOBN(0x4fbf81cb, 0x489b03e9), TOBN(0xdb86ec5b, 0x6ec414fa), + TOBN(0xfad444f9, 0xf51b3ae5), TOBN(0xca7d33d6, 0x1914e3fe), + TOBN(0xa9c32f5c, 0x0ae6c4d0), TOBN(0xa9ca1d1e, 0x73969568), + TOBN(0x98043c31, 0x1aa7467e), TOBN(0xe832e75c, 0xe21b5ac6), + TOBN(0x314b7aea, 0x5232123d), TOBN(0x08307c8c, 0x65ae86db), + TOBN(0x06e7165c, 0xaa4668ed), TOBN(0xb170458b, 0xb4d3ec39), + TOBN(0x4d2e3ec6, 0xc19bb986), TOBN(0xc5f34846, 0xae0304ed), + TOBN(0x917695a0, 0x6c9f9722), TOBN(0x6c7f7317, 0x4cab1c0a), + TOBN(0x6295940e, 0x9d6d2e8b), TOBN(0xd318b8c1, 0x549f7c97), + TOBN(0x22453204, 0x97713885), TOBN(0x468d834b, 0xa8a440fe), + TOBN(0xd81fe5b2, 0xbfba796e), TOBN(0x152364db, 0x6d71f116), + TOBN(0xbb8c7c59, 0xb5b66e53), TOBN(0x0b12c61b, 0x2641a192), + TOBN(0x31f14802, 0xfcf0a7fd), TOBN(0x42fd0789, 0x5488b01e), + TOBN(0x71d78d6d, 0x9952b498), TOBN(0x8eb572d9, 0x07ac5201), + TOBN(0xe0a2a44c, 0x4d194a88), TOBN(0xd2b63fd9, 0xba017e66), + TOBN(0x78efc6c8, 0xf888aefc), TOBN(0xb76f6bda, 0x4a881a11), + TOBN(0x187f314b, 0xb46c2397), TOBN(0x004cf566, 0x5ded2819), + TOBN(0xa9ea5704, 0x38764d34), TOBN(0xbba45217, 0x78084709), + TOBN(0x06474571, 0x1171121e), TOBN(0xad7b7eb1, 0xe7c9b671), + TOBN(0xdacfbc40, 0x730f7507), TOBN(0x178cd8c6, 0xc7ad7bd1), + TOBN(0xbf0be101, 0xb2a67238), TOBN(0x3556d367, 0xaf9c14f2), + TOBN(0x104b7831, 0xa5662075), TOBN(0x58ca59bb, 0x79d9e60a), + TOBN(0x4bc45392, 0xa569a73b), TOBN(0x517a52e8, 0x5698f6c9), + TOBN(0x85643da5, 0xaeadd755), TOBN(0x1aed0cd5, 0x2a581b84), + TOBN(0xb9b4ff84, 0x80af1372), TOBN(0x244c3113, 0xf1ba5d1f), + TOBN(0x2a5dacbe, 0xf5f98d31), TOBN(0x2c3323e8, 0x4375bc2a), + TOBN(0x17a3ab4a, 0x5594b1dd), TOBN(0xa1928bfb, 0xceb4797e), + TOBN(0xe83af245, 0xe4886a19), TOBN(0x8979d546, 0x72b5a74a), + TOBN(0xa0f726bc, 0x19f9e967), TOBN(0xd9d03152, 0xe8fbbf4e), + TOBN(0xcfd6f51d, 0xb7707d40), TOBN(0x633084d9, 0x63f6e6e0), + TOBN(0xedcd9cdc, 0x55667eaf), TOBN(0x73b7f92b, 0x2e44d56f), + TOBN(0xfb2e39b6, 0x4e962b14), TOBN(0x7d408f6e, 0xf671fcbf), + TOBN(0xcc634ddc, 0x164a89bb), TOBN(0x74a42bb2, 0x3ef3bd05), + TOBN(0x1280dbb2, 0x428decbb), TOBN(0x6103f6bb, 0x402c8596), + TOBN(0xfa2bf581, 0x355a5752), TOBN(0x562f96a8, 0x00946674), + TOBN(0x4e4ca16d, 0x6da0223b), TOBN(0xfe47819f, 0x28d3aa25), + TOBN(0x9eea3075, 0xf8dfcf8a), TOBN(0xa284f0aa, 0x95669825), + TOBN(0xb3fca250, 0x867d3fd8), TOBN(0x20757b5f, 0x269d691e), + TOBN(0xf2c24020, 0x93b8a5de), TOBN(0xd3f93359, 0xebc06da6), + TOBN(0x1178293e, 0xb2739c33), TOBN(0xd2a3e770, 0xbcd686e5), + TOBN(0xa76f49f4, 0xcd941534), TOBN(0x0d37406b, 0xe3c71c0e), + TOBN(0x172d9397, 0x3b97f7e3), TOBN(0xec17e239, 0xbd7fd0de), + TOBN(0xe3290551, 0x6f496ba2), TOBN(0x6a693172, 0x36ad50e7), + TOBN(0xc4e539a2, 0x83e7eff5), TOBN(0x752737e7, 0x18e1b4cf), + TOBN(0xa2f7932c, 0x68af43ee), TOBN(0x5502468e, 0x703d00bd), + TOBN(0xe5dc978f, 0x2fb061f5), TOBN(0xc9a1904a, 0x28c815ad), + TOBN(0xd3af538d, 0x470c56a4), TOBN(0x159abc5f, 0x193d8ced), + TOBN(0x2a37245f, 0x20108ef3), TOBN(0xfa17081e, 0x223f7178), + TOBN(0x27b0fb2b, 0x10c8c0f5), TOBN(0x2102c3ea, 0x40650547), + TOBN(0x594564df, 0x8ac3bfa7), TOBN(0x98102033, 0x509dad96), + TOBN(0x6989643f, 0xf1d18a13), TOBN(0x35eebd91, 0xd7fc5af0), + TOBN(0x078d096a, 0xfaeaafd8), TOBN(0xb7a89341, 0xdef3de98), + TOBN(0x2a206e8d, 0xecf2a73a), TOBN(0x066a6397, 0x8e551994), + TOBN(0x3a6a088a, 0xb98d53a2), TOBN(0x0ce7c67c, 0x2d1124aa), + TOBN(0x48cec671, 0x759a113c), TOBN(0xe3b373d3, 0x4f6f67fa), + TOBN(0x5455d479, 0xfd36727b), TOBN(0xe5a428ee, 0xa13c0d81), + TOBN(0xb853dbc8, 0x1c86682b), TOBN(0xb78d2727, 0xb8d02b2a), + TOBN(0xaaf69bed, 0x8ebc329a), TOBN(0xdb6b40b3, 0x293b2148), + TOBN(0xe42ea77d, 0xb8c4961f), TOBN(0xb1a12f7c, 0x20e5e0ab), + TOBN(0xa0ec5274, 0x79e8b05e), TOBN(0x68027391, 0xfab60a80), + TOBN(0x6bfeea5f, 0x16b1bd5e), TOBN(0xf957e420, 0x4de30ad3), + TOBN(0xcbaf664e, 0x6a353b9e), TOBN(0x5c873312, 0x26d14feb), + TOBN(0x4e87f98c, 0xb65f57cb), TOBN(0xdb60a621, 0x5e0cdd41), + TOBN(0x67c16865, 0xa6881440), TOBN(0x1093ef1a, 0x46ab52aa), + TOBN(0xc095afb5, 0x3f4ece64), TOBN(0x6a6bb02e, 0x7604551a), + TOBN(0x55d44b4e, 0x0b26b8cd), TOBN(0xe5f9a999, 0xf971268a), + TOBN(0xc08ec425, 0x11a7de84), TOBN(0x83568095, 0xfda469dd), + TOBN(0x737bfba1, 0x6c6c90a2), TOBN(0x1cb9c4a0, 0xbe229831), + TOBN(0x93bccbba, 0xbb2eec64), TOBN(0xa0c23b64, 0xda03adbe), + TOBN(0x5f7aa00a, 0xe0e86ac4), TOBN(0x470b941e, 0xfc1401e6), + TOBN(0x5ad8d679, 0x9df43574), TOBN(0x4ccfb8a9, 0x0f65d810), + TOBN(0x1bce80e3, 0xaa7fbd81), TOBN(0x273291ad, 0x9508d20a), + TOBN(0xf5c4b46b, 0x42a92806), TOBN(0x810684ec, 0xa86ab44a), + TOBN(0x4591640b, 0xca0bc9f8), TOBN(0xb5efcdfc, 0x5c4b6054), + TOBN(0x16fc8907, 0x6e9edd12), TOBN(0xe29d0b50, 0xd4d792f9), + TOBN(0xa45fd01c, 0x9b03116d), TOBN(0x85035235, 0xc81765a4), + TOBN(0x1fe2a9b2, 0xb4b4b67c), TOBN(0xc1d10df0, 0xe8020604), + TOBN(0x9d64abfc, 0xbc8058d8), TOBN(0x8943b9b2, 0x712a0fbb), + TOBN(0x90eed914, 0x3b3def04), TOBN(0x85ab3aa2, 0x4ce775ff), + TOBN(0x605fd4ca, 0x7bbc9040), TOBN(0x8b34a564, 0xe2c75dfb), + TOBN(0x41ffc94a, 0x10358560), TOBN(0x2d8a5072, 0x9e5c28aa), + TOBN(0xe915a0fc, 0x4cc7eb15), TOBN(0xe9efab05, 0x8f6d0f5d), + TOBN(0xdbab47a9, 0xd19e9b91), TOBN(0x8cfed745, 0x0276154c), + TOBN(0x154357ae, 0x2cfede0d), TOBN(0x520630df, 0x19f5a4ef), + TOBN(0x25759f7c, 0xe382360f), TOBN(0xb6db05c9, 0x88bf5857), + TOBN(0x2917d61d, 0x6c58d46c), TOBN(0x14f8e491, 0xfd20cb7a), + TOBN(0xb68a727a, 0x11c20340), TOBN(0x0386f86f, 0xaf7ccbb6), + TOBN(0x5c8bc6cc, 0xfee09a20), TOBN(0x7d76ff4a, 0xbb7eea35), + TOBN(0xa7bdebe7, 0xdb15be7a), TOBN(0x67a08054, 0xd89f0302), + TOBN(0x56bf0ea9, 0xc1193364), TOBN(0xc8244467, 0x62837ebe), + TOBN(0x32bd8e8b, 0x20d841b8), TOBN(0x127a0548, 0xdbb8a54f), + TOBN(0x83dd4ca6, 0x63b20236), TOBN(0x87714718, 0x203491fa), + TOBN(0x4dabcaaa, 0xaa8a5288), TOBN(0x91cc0c8a, 0xaf23a1c9), + TOBN(0x34c72c6a, 0x3f220e0c), TOBN(0xbcc20bdf, 0x1232144a), + TOBN(0x6e2f42da, 0xa20ede1b), TOBN(0xc441f00c, 0x74a00515), + TOBN(0xbf46a5b6, 0x734b8c4b), TOBN(0x57409503, 0x7b56c9a4), + TOBN(0x9f735261, 0xe4585d45), TOBN(0x9231faed, 0x6734e642), + TOBN(0x1158a176, 0xbe70ee6c), TOBN(0x35f1068d, 0x7c3501bf), + TOBN(0x6beef900, 0xa2d26115), TOBN(0x649406f2, 0xef0afee3), + TOBN(0x3f43a60a, 0xbc2420a1), TOBN(0x509002a7, 0xd5aee4ac), + TOBN(0xb46836a5, 0x3ff3571b), TOBN(0x24f98b78, 0x837927c1), + TOBN(0x6254256a, 0x4533c716), TOBN(0xf27abb0b, 0xd07ee196), + TOBN(0xd7cf64fc, 0x5c6d5bfd), TOBN(0x6915c751, 0xf0cd7a77), + TOBN(0xd9f59012, 0x8798f534), TOBN(0x772b0da8, 0xf81d8b5f), + TOBN(0x1244260c, 0x2e03fa69), TOBN(0x36cf0e3a, 0x3be1a374), + TOBN(0x6e7c1633, 0xef06b960), TOBN(0xa71a4c55, 0x671f90f6), + TOBN(0x7a941251, 0x33c673db), TOBN(0xc0bea510, 0x73e8c131), + TOBN(0x61a8a699, 0xd4f6c734), TOBN(0x25e78c88, 0x341ed001), + TOBN(0x5c18acf8, 0x8e2f7d90), TOBN(0xfdbf33d7, 0x77be32cd), + TOBN(0x0a085cd7, 0xd2eb5ee9), TOBN(0x2d702cfb, 0xb3201115), + TOBN(0xb6e0ebdb, 0x85c88ce8), TOBN(0x23a3ce3c, 0x1e01d617), + TOBN(0x3041618e, 0x567333ac), TOBN(0x9dd0fd8f, 0x157edb6b), + TOBN(0x27f74702, 0xb57872b8), TOBN(0x2ef26b4f, 0x657d5fe1), + TOBN(0x95426f0a, 0x57cf3d40), TOBN(0x847e2ad1, 0x65a6067a), + TOBN(0xd474d9a0, 0x09996a74), TOBN(0x16a56acd, 0x2a26115c), + TOBN(0x02a615c3, 0xd16f4d43), TOBN(0xcc3fc965, 0xaadb85b7), + TOBN(0x386bda73, 0xce07d1b0), TOBN(0xd82910c2, 0x58ad4178), + TOBN(0x124f82cf, 0xcd2617f4), TOBN(0xcc2f5e8d, 0xef691770), + TOBN(0x82702550, 0xb8c30ccc), TOBN(0x7b856aea, 0x1a8e575a), + TOBN(0xbb822fef, 0xb1ab9459), TOBN(0x085928bc, 0xec24e38e), + TOBN(0x5d0402ec, 0xba8f4b4d), TOBN(0xc07cd4ba, 0x00b4d58b), + TOBN(0x5d8dffd5, 0x29227e7a), TOBN(0x61d44d0c, 0x31bf386f), + TOBN(0xe486dc2b, 0x135e6f4d), TOBN(0x680962eb, 0xe79410ef), + TOBN(0xa61bd343, 0xf10088b5), TOBN(0x6aa76076, 0xe2e28686), + TOBN(0x80463d11, 0x8fb98871), TOBN(0xcb26f5c3, 0xbbc76aff), + TOBN(0xd4ab8edd, 0xfbe03614), TOBN(0xc8eb579b, 0xc0cf2dee), + TOBN(0xcc004c15, 0xc93bae41), TOBN(0x46fbae5d, 0x3aeca3b2), + TOBN(0x671235cf, 0x0f1e9ab1), TOBN(0xadfba934, 0x9ec285c1), + TOBN(0x88ded013, 0xf216c980), TOBN(0xc8ac4fb8, 0xf79e0bc1), + TOBN(0xa29b89c6, 0xfb97a237), TOBN(0xb697b780, 0x9922d8e7), + TOBN(0x3142c639, 0xddb945b5), TOBN(0x447b06c7, 0xe094c3a9), + TOBN(0xcdcb3642, 0x72266c90), TOBN(0x633aad08, 0xa9385046), + TOBN(0xa36c936b, 0xb57c6477), TOBN(0x871f8b64, 0xe94dbcc6), + TOBN(0x28d0fb62, 0xa591a67b), TOBN(0x9d40e081, 0xc1d926f5), + TOBN(0x3111eaf6, 0xf2d84b5a), TOBN(0x228993f9, 0xa565b644), + TOBN(0x0ccbf592, 0x2c83188b), TOBN(0xf87b30ab, 0x3df3e197), + TOBN(0xb8658b31, 0x7642bca8), TOBN(0x1a032d7f, 0x52800f17), + TOBN(0x051dcae5, 0x79bf9445), TOBN(0xeba6b8ee, 0x54a2e253), + TOBN(0x5c8b9cad, 0xd4485692), TOBN(0x84bda40e, 0x8986e9be), + TOBN(0xd16d16a4, 0x2f0db448), TOBN(0x8ec80050, 0xa14d4188), + TOBN(0xb2b26107, 0x98fa7aaa), TOBN(0x41209ee4, 0xf073aa4e), + TOBN(0xf1570359, 0xf2d6b19b), TOBN(0xcbe6868c, 0xfc577caf), + TOBN(0x186c4bdc, 0x32c04dd3), TOBN(0xa6c35fae, 0xcfeee397), + TOBN(0xb4a1b312, 0xf086c0cf), TOBN(0xe0a5ccc6, 0xd9461fe2), + TOBN(0xc32278aa, 0x1536189f), TOBN(0x1126c55f, 0xba6df571), + TOBN(0x0f71a602, 0xb194560e), TOBN(0x8b2d7405, 0x324bd6e1), + TOBN(0x8481939e, 0x3738be71), TOBN(0xb5090b1a, 0x1a4d97a9), + TOBN(0x116c65a3, 0xf05ba915), TOBN(0x21863ad3, 0xaae448aa), + TOBN(0xd24e2679, 0xa7aae5d3), TOBN(0x7076013d, 0x0de5c1c4), + TOBN(0x2d50f8ba, 0xbb05b629), TOBN(0x73c1abe2, 0x6e66efbb), + TOBN(0xefd4b422, 0xf2488af7), TOBN(0xe4105d02, 0x663ba575), + TOBN(0x7eb60a8b, 0x53a69457), TOBN(0x62210008, 0xc945973b), + TOBN(0xfb255478, 0x77a50ec6), TOBN(0xbf0392f7, 0x0a37a72c), + TOBN(0xa0a7a19c, 0x4be18e7a), TOBN(0x90d8ea16, 0x25b1e0af), + TOBN(0x7582a293, 0xef953f57), TOBN(0x90a64d05, 0xbdc5465a), + TOBN(0xca79c497, 0xe2510717), TOBN(0x560dbb7c, 0x18cb641f), + TOBN(0x1d8e3286, 0x4b66abfb), TOBN(0xd26f52e5, 0x59030900), + TOBN(0x1ee3f643, 0x5584941a), TOBN(0x6d3b3730, 0x569f5958), + TOBN(0x9ff2a62f, 0x4789dba5), TOBN(0x91fcb815, 0x72b5c9b7), + TOBN(0xf446cb7d, 0x6c8f9a0e), TOBN(0x48f625c1, 0x39b7ecb5), + TOBN(0xbabae801, 0x1c6219b8), TOBN(0xe7a562d9, 0x28ac2f23), + TOBN(0xe1b48732, 0x26e20588), TOBN(0x06ee1cad, 0x775af051), + TOBN(0xda29ae43, 0xfaff79f7), TOBN(0xc141a412, 0x652ee9e0), + TOBN(0x1e127f6f, 0x195f4bd0), TOBN(0x29c6ab4f, 0x072f34f8), + TOBN(0x7b7c1477, 0x30448112), TOBN(0x82b51af1, 0xe4a38656), + TOBN(0x2bf2028a, 0x2f315010), TOBN(0xc9a4a01f, 0x6ea88cd4), + TOBN(0xf63e95d8, 0x257e5818), TOBN(0xdd8efa10, 0xb4519b16), + TOBN(0xed8973e0, 0x0da910bf), TOBN(0xed49d077, 0x5c0fe4a9), + TOBN(0xac3aac5e, 0xb7caee1e), TOBN(0x1033898d, 0xa7f4da57), + TOBN(0x42145c0e, 0x5c6669b9), TOBN(0x42daa688, 0xc1aa2aa0), + TOBN(0x629cc15c, 0x1a1d885a), TOBN(0x25572ec0, 0xf4b76817), + TOBN(0x8312e435, 0x9c8f8f28), TOBN(0x8107f8cd, 0x81965490), + TOBN(0x516ff3a3, 0x6fa6110c), TOBN(0x74fb1eb1, 0xfb93561f), + TOBN(0x6c0c9047, 0x8457522b), TOBN(0xcfd32104, 0x6bb8bdc6), + TOBN(0x2d6884a2, 0xcc80ad57), TOBN(0x7c27fc35, 0x86a9b637), + TOBN(0x3461baed, 0xadf4e8cd), TOBN(0x1d56251a, 0x617242f0), + TOBN(0x0b80d209, 0xc955bef4), TOBN(0xdf02cad2, 0x06adb047), + TOBN(0xf0d7cb91, 0x5ec74fee), TOBN(0xd2503375, 0x1111ba44), + TOBN(0x9671755e, 0xdf53cb36), TOBN(0x54dcb612, 0x3368551b), + TOBN(0x66d69aac, 0xc8a025a4), TOBN(0x6be946c6, 0xe77ef445), + TOBN(0x719946d1, 0xa995e094), TOBN(0x65e848f6, 0xe51e04d8), + TOBN(0xe62f3300, 0x6a1e3113), TOBN(0x1541c7c1, 0x501de503), + TOBN(0x4daac9fa, 0xf4acfade), TOBN(0x0e585897, 0x44cd0b71), + TOBN(0x544fd869, 0x0a51cd77), TOBN(0x60fc20ed, 0x0031016d), + TOBN(0x58b404ec, 0xa4276867), TOBN(0x46f6c3cc, 0x34f34993), + TOBN(0x477ca007, 0xc636e5bd), TOBN(0x8018f5e5, 0x7c458b47), + TOBN(0xa1202270, 0xe47b668f), TOBN(0xcef48ccd, 0xee14f203), + TOBN(0x23f98bae, 0x62ff9b4d), TOBN(0x55acc035, 0xc589eddd), + TOBN(0x3fe712af, 0x64db4444), TOBN(0x19e9d634, 0xbecdd480), + TOBN(0xe08bc047, 0xa930978a), TOBN(0x2dbf24ec, 0xa1280733), + TOBN(0x3c0ae38c, 0x2cd706b2), TOBN(0x5b012a5b, 0x359017b9), + TOBN(0x3943c38c, 0x72e0f5ae), TOBN(0x786167ea, 0x57176fa3), + TOBN(0xe5f9897d, 0x594881dc), TOBN(0x6b5efad8, 0xcfb820c1), + TOBN(0xb2179093, 0xd55018de), TOBN(0x39ad7d32, 0x0bac56ce), + TOBN(0xb55122e0, 0x2cfc0e81), TOBN(0x117c4661, 0xf6d89daa), + TOBN(0x362d01e1, 0xcb64fa09), TOBN(0x6a309b4e, 0x3e9c4ddd), + TOBN(0xfa979fb7, 0xabea49b1), TOBN(0xb4b1d27d, 0x10e2c6c5), + TOBN(0xbd61c2c4, 0x23afde7a), TOBN(0xeb6614f8, 0x9786d358), + TOBN(0x4a5d816b, 0x7f6f7459), TOBN(0xe431a44f, 0x09360e7b), + TOBN(0x8c27a032, 0xc309914c), TOBN(0xcea5d68a, 0xcaede3d8), + TOBN(0x3668f665, 0x3a0a3f95), TOBN(0x89369416, 0x7ceba27b), + TOBN(0x89981fad, 0xe4728fe9), TOBN(0x7102c8a0, 0x8a093562), + TOBN(0xbb80310e, 0x235d21c8), TOBN(0x505e55d1, 0xbefb7f7b), + TOBN(0xa0a90811, 0x12958a67), TOBN(0xd67e106a, 0x4d851fef), + TOBN(0xb84011a9, 0x431dd80e), TOBN(0xeb7c7cca, 0x73306cd9), + TOBN(0x20fadd29, 0xd1b3b730), TOBN(0x83858b5b, 0xfe37b3d3), + TOBN(0xbf4cd193, 0xb6251d5c), TOBN(0x1cca1fd3, 0x1352d952), + TOBN(0xc66157a4, 0x90fbc051), TOBN(0x7990a638, 0x89b98636),} + , + {TOBN(0xe5aa692a, 0x87dec0e1), TOBN(0x010ded8d, 0xf7b39d00), + TOBN(0x7b1b80c8, 0x54cfa0b5), TOBN(0x66beb876, 0xa0f8ea28), + TOBN(0x50d7f531, 0x3476cd0e), TOBN(0xa63d0e65, 0xb08d3949), + TOBN(0x1a09eea9, 0x53479fc6), TOBN(0x82ae9891, 0xf499e742), + TOBN(0xab58b910, 0x5ca7d866), TOBN(0x582967e2, 0x3adb3b34), + TOBN(0x89ae4447, 0xcceac0bc), TOBN(0x919c667c, 0x7bf56af5), + TOBN(0x9aec17b1, 0x60f5dcd7), TOBN(0xec697b9f, 0xddcaadbc), + TOBN(0x0b98f341, 0x463467f5), TOBN(0xb187f1f7, 0xa967132f), + TOBN(0x90fe7a1d, 0x214aeb18), TOBN(0x1506af3c, 0x741432f7), + TOBN(0xbb5565f9, 0xe591a0c4), TOBN(0x10d41a77, 0xb44f1bc3), + TOBN(0xa09d65e4, 0xa84bde96), TOBN(0x42f060d8, 0xf20a6a1c), + TOBN(0x652a3bfd, 0xf27f9ce7), TOBN(0xb6bdb65c, 0x3b3d739f), + TOBN(0xeb5ddcb6, 0xec7fae9f), TOBN(0x995f2714, 0xefb66e5a), + TOBN(0xdee95d8e, 0x69445d52), TOBN(0x1b6c2d46, 0x09e27620), + TOBN(0x32621c31, 0x8129d716), TOBN(0xb03909f1, 0x0958c1aa), + TOBN(0x8c468ef9, 0x1af4af63), TOBN(0x162c429f, 0xfba5cdf6), + TOBN(0x2f682343, 0x753b9371), TOBN(0x29cab45a, 0x5f1f9cd7), + TOBN(0x571623ab, 0xb245db96), TOBN(0xc507db09, 0x3fd79999), + TOBN(0x4e2ef652, 0xaf036c32), TOBN(0x86f0cc78, 0x05018e5c), + TOBN(0xc10a73d4, 0xab8be350), TOBN(0x6519b397, 0x7e826327), + TOBN(0xe8cb5eef, 0x9c053df7), TOBN(0x8de25b37, 0xb300ea6f), + TOBN(0xdb03fa92, 0xc849cffb), TOBN(0x242e43a7, 0xe84169bb), + TOBN(0xe4fa51f4, 0xdd6f958e), TOBN(0x6925a77f, 0xf4445a8d), + TOBN(0xe6e72a50, 0xe90d8949), TOBN(0xc66648e3, 0x2b1f6390), + TOBN(0xb2ab1957, 0x173e460c), TOBN(0x1bbbce75, 0x30704590), + TOBN(0xc0a90dbd, 0xdb1c7162), TOBN(0x505e399e, 0x15cdd65d), + TOBN(0x68434dcb, 0x57797ab7), TOBN(0x60ad35ba, 0x6a2ca8e8), + TOBN(0x4bfdb1e0, 0xde3336c1), TOBN(0xbbef99eb, 0xd8b39015), + TOBN(0x6c3b96f3, 0x1711ebec), TOBN(0x2da40f1f, 0xce98fdc4), + TOBN(0xb99774d3, 0x57b4411f), TOBN(0x87c8bdf4, 0x15b65bb6), + TOBN(0xda3a89e3, 0xc2eef12d), TOBN(0xde95bb9b, 0x3c7471f3), + TOBN(0x600f225b, 0xd812c594), TOBN(0x54907c5d, 0x2b75a56b), + TOBN(0xa93cc5f0, 0x8db60e35), TOBN(0x743e3cd6, 0xfa833319), + TOBN(0x7dad5c41, 0xf81683c9), TOBN(0x70c1e7d9, 0x9c34107e), + TOBN(0x0edc4a39, 0xa6be0907), TOBN(0x36d47035, 0x86d0b7d3), + TOBN(0x8c76da03, 0x272bfa60), TOBN(0x0b4a07ea, 0x0f08a414), + TOBN(0x699e4d29, 0x45c1dd53), TOBN(0xcadc5898, 0x231debb5), + TOBN(0xdf49fcc7, 0xa77f00e0), TOBN(0x93057bbf, 0xa73e5a0e), + TOBN(0x2f8b7ecd, 0x027a4cd1), TOBN(0x114734b3, 0xc614011a), + TOBN(0xe7a01db7, 0x67677c68), TOBN(0x89d9be5e, 0x7e273f4f), + TOBN(0xd225cb2e, 0x089808ef), TOBN(0xf1f7a27d, 0xd59e4107), + TOBN(0x53afc761, 0x8211b9c9), TOBN(0x0361bc67, 0xe6819159), + TOBN(0x2a865d0b, 0x7f071426), TOBN(0x6a3c1810, 0xe7072567), + TOBN(0x3e3bca1e, 0x0d6bcabd), TOBN(0xa1b02bc1, 0x408591bc), + TOBN(0xe0deee59, 0x31fba239), TOBN(0xf47424d3, 0x98bd91d1), + TOBN(0x0f8886f4, 0x071a3c1d), TOBN(0x3f7d41e8, 0xa819233b), + TOBN(0x708623c2, 0xcf6eb998), TOBN(0x86bb49af, 0x609a287f), + TOBN(0x942bb249, 0x63c90762), TOBN(0x0ef6eea5, 0x55a9654b), + TOBN(0x5f6d2d72, 0x36f5defe), TOBN(0xfa9922dc, 0x56f99176), + TOBN(0x6c8c5ece, 0xf78ce0c7), TOBN(0x7b44589d, 0xbe09b55e), + TOBN(0xe11b3bca, 0x9ea83770), TOBN(0xd7fa2c7f, 0x2ab71547), + TOBN(0x2a3dd6fa, 0x2a1ddcc0), TOBN(0x09acb430, 0x5a7b7707), + TOBN(0x4add4a2e, 0x649d4e57), TOBN(0xcd53a2b0, 0x1917526e), + TOBN(0xc5262330, 0x20b44ac4), TOBN(0x4028746a, 0xbaa2c31d), + TOBN(0x51318390, 0x64291d4c), TOBN(0xbf48f151, 0xee5ad909), + TOBN(0xcce57f59, 0x7b185681), TOBN(0x7c3ac1b0, 0x4854d442), + TOBN(0x65587dc3, 0xc093c171), TOBN(0xae7acb24, 0x24f42b65), + TOBN(0x5a338adb, 0x955996cb), TOBN(0xc8e65675, 0x6051f91b), + TOBN(0x66711fba, 0x28b8d0b1), TOBN(0x15d74137, 0xb6c10a90), + TOBN(0x70cdd7eb, 0x3a232a80), TOBN(0xc9e2f07f, 0x6191ed24), + TOBN(0xa80d1db6, 0xf79588c0), TOBN(0xfa52fc69, 0xb55768cc), + TOBN(0x0b4df1ae, 0x7f54438a), TOBN(0x0cadd1a7, 0xf9b46a4f), + TOBN(0xb40ea6b3, 0x1803dd6f), TOBN(0x488e4fa5, 0x55eaae35), + TOBN(0x9f047d55, 0x382e4e16), TOBN(0xc9b5b7e0, 0x2f6e0c98), + TOBN(0x6b1bd2d3, 0x95762649), TOBN(0xa9604ee7, 0xc7aea3f6), + TOBN(0x3646ff27, 0x6dc6f896), TOBN(0x9bf0e7f5, 0x2860bad1), + TOBN(0x2d92c821, 0x7cb44b92), TOBN(0xa2f5ce63, 0xaea9c182), + TOBN(0xd0a2afb1, 0x9154a5fd), TOBN(0x482e474c, 0x95801da6), + TOBN(0xc19972d0, 0xb611c24b), TOBN(0x1d468e65, 0x60a8f351), + TOBN(0xeb758069, 0x7bcf6421), TOBN(0xec9dd0ee, 0x88fbc491), + TOBN(0x5b59d2bf, 0x956c2e32), TOBN(0x73dc6864, 0xdcddf94e), + TOBN(0xfd5e2321, 0xbcee7665), TOBN(0xa7b4f8ef, 0x5e9a06c4), + TOBN(0xfba918dd, 0x7280f855), TOBN(0xbbaac260, 0x8baec688), + TOBN(0xa3b3f00f, 0x33400f42), TOBN(0x3d2dba29, 0x66f2e6e4), + TOBN(0xb6f71a94, 0x98509375), TOBN(0x8f33031f, 0xcea423cc), + TOBN(0x009b8dd0, 0x4807e6fb), TOBN(0x5163cfe5, 0x5cdb954c), + TOBN(0x03cc8f17, 0xcf41c6e8), TOBN(0xf1f03c2a, 0x037b925c), + TOBN(0xc39c19cc, 0x66d2427c), TOBN(0x823d24ba, 0x7b6c18e4), + TOBN(0x32ef9013, 0x901f0b4f), TOBN(0x684360f1, 0xf8941c2e), + TOBN(0x0ebaff52, 0x2c28092e), TOBN(0x7891e4e3, 0x256c932f), + TOBN(0x51264319, 0xac445e3d), TOBN(0x553432e7, 0x8ea74381), + TOBN(0xe6eeaa69, 0x67e9c50a), TOBN(0x27ced284, 0x62e628c7), + TOBN(0x3f96d375, 0x7a4afa57), TOBN(0xde0a14c3, 0xe484c150), + TOBN(0x364a24eb, 0x38bd9923), TOBN(0x1df18da0, 0xe5177422), + TOBN(0x174e8f82, 0xd8d38a9b), TOBN(0x2e97c600, 0xe7de1391), + TOBN(0xc5709850, 0xa1c175dd), TOBN(0x969041a0, 0x32ae5035), + TOBN(0xcbfd533b, 0x76a2086b), TOBN(0xd6bba71b, 0xd7c2e8fe), + TOBN(0xb2d58ee6, 0x099dfb67), TOBN(0x3a8b342d, 0x064a85d9), + TOBN(0x3bc07649, 0x522f9be3), TOBN(0x690c075b, 0xdf1f49a8), + TOBN(0x80e1aee8, 0x3854ec42), TOBN(0x2a7dbf44, 0x17689dc7), + TOBN(0xc004fc0e, 0x3faf4078), TOBN(0xb2f02e9e, 0xdf11862c), + TOBN(0xf10a5e0f, 0xa0a1b7b3), TOBN(0x30aca623, 0x8936ec80), + TOBN(0xf83cbf05, 0x02f40d9a), TOBN(0x4681c468, 0x2c318a4d), + TOBN(0x98575618, 0x0e9c2674), TOBN(0xbe79d046, 0x1847092e), + TOBN(0xaf1e480a, 0x78bd01e0), TOBN(0x6dd359e4, 0x72a51db9), + TOBN(0x62ce3821, 0xe3afbab6), TOBN(0xc5cee5b6, 0x17733199), + TOBN(0xe08b30d4, 0x6ffd9fbb), TOBN(0x6e5bc699, 0x36c610b7), + TOBN(0xf343cff2, 0x9ce262cf), TOBN(0xca2e4e35, 0x68b914c1), + TOBN(0x011d64c0, 0x16de36c5), TOBN(0xe0b10fdd, 0x42e2b829), + TOBN(0x78942981, 0x6685aaf8), TOBN(0xe7511708, 0x230ede97), + TOBN(0x671ed8fc, 0x3b922bf8), TOBN(0xe4d8c0a0, 0x4c29b133), + TOBN(0x87eb1239, 0x3b6e99c4), TOBN(0xaff3974c, 0x8793beba), + TOBN(0x03749405, 0x2c18df9b), TOBN(0xc5c3a293, 0x91007139), + TOBN(0x6a77234f, 0xe37a0b95), TOBN(0x02c29a21, 0xb661c96b), + TOBN(0xc3aaf1d6, 0x141ecf61), TOBN(0x9195509e, 0x3bb22f53), + TOBN(0x29597404, 0x22d51357), TOBN(0x1b083822, 0x537bed60), + TOBN(0xcd7d6e35, 0xe07289f0), TOBN(0x1f94c48c, 0x6dd86eff), + TOBN(0xc8bb1f82, 0xeb0f9cfa), TOBN(0x9ee0b7e6, 0x1b2eb97d), + TOBN(0x5a52fe2e, 0x34d74e31), TOBN(0xa352c310, 0x3bf79ab6), + TOBN(0x97ff6c5a, 0xabfeeb8f), TOBN(0xbfbe8fef, 0xf5c97305), + TOBN(0xd6081ce6, 0xa7904608), TOBN(0x1f812f3a, 0xc4fca249), + TOBN(0x9b24bc9a, 0xb9e5e200), TOBN(0x91022c67, 0x38012ee8), + TOBN(0xe83d9c5d, 0x30a713a1), TOBN(0x4876e3f0, 0x84ef0f93), + TOBN(0xc9777029, 0xc1fbf928), TOBN(0xef7a6bb3, 0xbce7d2a4), + TOBN(0xb8067228, 0xdfa2a659), TOBN(0xd5cd3398, 0xd877a48f), + TOBN(0xbea4fd8f, 0x025d0f3f), TOBN(0xd67d2e35, 0x2eae7c2b), + TOBN(0x184de7d7, 0xcc5f4394), TOBN(0xb5551b5c, 0x4536e142), + TOBN(0x2e89b212, 0xd34aa60a), TOBN(0x14a96fea, 0xf50051d5), + TOBN(0x4e21ef74, 0x0d12bb0b), TOBN(0xc522f020, 0x60b9677e), + TOBN(0x8b12e467, 0x2df7731d), TOBN(0x39f80382, 0x7b326d31), + TOBN(0xdfb8630c, 0x39024a94), TOBN(0xaacb96a8, 0x97319452), + TOBN(0xd68a3961, 0xeda3867c), TOBN(0x0c58e2b0, 0x77c4ffca), + TOBN(0x3d545d63, 0x4da919fa), TOBN(0xef79b69a, 0xf15e2289), + TOBN(0x54bc3d3d, 0x808bab10), TOBN(0xc8ab3007, 0x45f82c37), + TOBN(0xc12738b6, 0x7c4a658a), TOBN(0xb3c47639, 0x40e72182), + TOBN(0x3b77be46, 0x8798e44f), TOBN(0xdc047df2, 0x17a7f85f), + TOBN(0x2439d4c5, 0x5e59d92d), TOBN(0xcedca475, 0xe8e64d8d), + TOBN(0xa724cd0d, 0x87ca9b16), TOBN(0x35e4fd59, 0xa5540dfe), + TOBN(0xf8c1ff18, 0xe4bcf6b1), TOBN(0x856d6285, 0x295018fa), + TOBN(0x433f665c, 0x3263c949), TOBN(0xa6a76dd6, 0xa1f21409), + TOBN(0x17d32334, 0xcc7b4f79), TOBN(0xa1d03122, 0x06720e4a), + TOBN(0xadb6661d, 0x81d9bed5), TOBN(0xf0d6fb02, 0x11db15d1), + TOBN(0x7fd11ad5, 0x1fb747d2), TOBN(0xab50f959, 0x3033762b), + TOBN(0x2a7e711b, 0xfbefaf5a), TOBN(0xc7393278, 0x3fef2bbf), + TOBN(0xe29fa244, 0x0df6f9be), TOBN(0x9092757b, 0x71efd215), + TOBN(0xee60e311, 0x4f3d6fd9), TOBN(0x338542d4, 0x0acfb78b), + TOBN(0x44a23f08, 0x38961a0f), TOBN(0x1426eade, 0x986987ca), + TOBN(0x36e6ee2e, 0x4a863cc6), TOBN(0x48059420, 0x628b8b79), + TOBN(0x30303ad8, 0x7396e1de), TOBN(0x5c8bdc48, 0x38c5aad1), + TOBN(0x3e40e11f, 0x5c8f5066), TOBN(0xabd6e768, 0x8d246bbd), + TOBN(0x68aa40bb, 0x23330a01), TOBN(0xd23f5ee4, 0xc34eafa0), + TOBN(0x3bbee315, 0x5de02c21), TOBN(0x18dd4397, 0xd1d8dd06), + TOBN(0x3ba1939a, 0x122d7b44), TOBN(0xe6d3b40a, 0xa33870d6), + TOBN(0x8e620f70, 0x1c4fe3f8), TOBN(0xf6bba1a5, 0xd3a50cbf), + TOBN(0x4a78bde5, 0xcfc0aee0), TOBN(0x847edc46, 0xc08c50bd), + TOBN(0xbaa2439c, 0xad63c9b2), TOBN(0xceb4a728, 0x10fc2acb), + TOBN(0xa419e40e, 0x26da033d), TOBN(0x6cc3889d, 0x03e02683), + TOBN(0x1cd28559, 0xfdccf725), TOBN(0x0fd7e0f1, 0x8d13d208), + TOBN(0x01b9733b, 0x1f0df9d4), TOBN(0x8cc2c5f3, 0xa2b5e4f3), + TOBN(0x43053bfa, 0x3a304fd4), TOBN(0x8e87665c, 0x0a9f1aa7), + TOBN(0x087f29ec, 0xd73dc965), TOBN(0x15ace455, 0x3e9023db), + TOBN(0x2370e309, 0x2bce28b4), TOBN(0xf9723442, 0xb6b1e84a), + TOBN(0xbeee662e, 0xb72d9f26), TOBN(0xb19396de, 0xf0e47109), + TOBN(0x85b1fa73, 0xe13289d0), TOBN(0x436cf77e, 0x54e58e32), + TOBN(0x0ec833b3, 0xe990ef77), TOBN(0x7373e3ed, 0x1b11fc25), + TOBN(0xbe0eda87, 0x0fc332ce), TOBN(0xced04970, 0x8d7ea856), + TOBN(0xf85ff785, 0x7e977ca0), TOBN(0xb66ee8da, 0xdfdd5d2b), + TOBN(0xf5e37950, 0x905af461), TOBN(0x587b9090, 0x966d487c), + TOBN(0x6a198a1b, 0x32ba0127), TOBN(0xa7720e07, 0x141615ac), + TOBN(0xa23f3499, 0x996ef2f2), TOBN(0xef5f64b4, 0x470bcb3d), + TOBN(0xa526a962, 0x92b8c559), TOBN(0x0c14aac0, 0x69740a0f), + TOBN(0x0d41a9e3, 0xa6bdc0a5), TOBN(0x97d52106, 0x9c48aef4), + TOBN(0xcf16bd30, 0x3e7c253b), TOBN(0xcc834b1a, 0x47fdedc1), + TOBN(0x7362c6e5, 0x373aab2e), TOBN(0x264ed85e, 0xc5f590ff), + TOBN(0x7a46d9c0, 0x66d41870), TOBN(0xa50c20b1, 0x4787ba09), + TOBN(0x185e7e51, 0xe3d44635), TOBN(0xb3b3e080, 0x31e2d8dc), + TOBN(0xbed1e558, 0xa179e9d9), TOBN(0x2daa3f79, 0x74a76781), + TOBN(0x4372baf2, 0x3a40864f), TOBN(0x46900c54, 0x4fe75cb5), + TOBN(0xb95f171e, 0xf76765d0), TOBN(0x4ad726d2, 0x95c87502), + TOBN(0x2ec769da, 0x4d7c99bd), TOBN(0x5e2ddd19, 0xc36cdfa8), + TOBN(0xc22117fc, 0xa93e6dea), TOBN(0xe8a2583b, 0x93771123), + TOBN(0xbe2f6089, 0xfa08a3a2), TOBN(0x4809d5ed, 0x8f0e1112), + TOBN(0x3b414aa3, 0xda7a095e), TOBN(0x9049acf1, 0x26f5aadd), + TOBN(0x78d46a4d, 0x6be8b84a), TOBN(0xd66b1963, 0xb732b9b3), + TOBN(0x5c2ac2a0, 0xde6e9555), TOBN(0xcf52d098, 0xb5bd8770), + TOBN(0x15a15fa6, 0x0fd28921), TOBN(0x56ccb81e, 0x8b27536d), + TOBN(0x0f0d8ab8, 0x9f4ccbb8), TOBN(0xed5f44d2, 0xdb221729), + TOBN(0x43141988, 0x00bed10c), TOBN(0xc94348a4, 0x1d735b8b), + TOBN(0x79f3e9c4, 0x29ef8479), TOBN(0x4c13a4e3, 0x614c693f), + TOBN(0x32c9af56, 0x8e143a14), TOBN(0xbc517799, 0xe29ac5c4), + TOBN(0x05e17992, 0x2774856f), TOBN(0x6e52fb05, 0x6c1bf55f), + TOBN(0xaeda4225, 0xe4f19e16), TOBN(0x70f4728a, 0xaf5ccb26), + TOBN(0x5d2118d1, 0xb2947f22), TOBN(0xc827ea16, 0x281d6fb9), + TOBN(0x8412328d, 0x8cf0eabd), TOBN(0x45ee9fb2, 0x03ef9dcf), + TOBN(0x8e700421, 0xbb937d63), TOBN(0xdf8ff2d5, 0xcc4b37a6), + TOBN(0xa4c0d5b2, 0x5ced7b68), TOBN(0x6537c1ef, 0xc7308f59), + TOBN(0x25ce6a26, 0x3b37f8e8), TOBN(0x170e9a9b, 0xdeebc6ce), + TOBN(0xdd037952, 0x8728d72c), TOBN(0x445b0e55, 0x850154bc), + TOBN(0x4b7d0e06, 0x83a7337b), TOBN(0x1e3416d4, 0xffecf249), + TOBN(0x24840eff, 0x66a2b71f), TOBN(0xd0d9a50a, 0xb37cc26d), + TOBN(0xe2198150, 0x6fe28ef7), TOBN(0x3cc5ef16, 0x23324c7f), + TOBN(0x220f3455, 0x769b5263), TOBN(0xe2ade2f1, 0xa10bf475), + TOBN(0x28cd20fa, 0x458d3671), TOBN(0x1549722c, 0x2dc4847b), + TOBN(0x6dd01e55, 0x591941e3), TOBN(0x0e6fbcea, 0x27128ccb), + TOBN(0xae1a1e6b, 0x3bef0262), TOBN(0xfa8c472c, 0x8f54e103), + TOBN(0x7539c0a8, 0x72c052ec), TOBN(0xd7b27369, 0x5a3490e9), + TOBN(0x143fe1f1, 0x71684349), TOBN(0x36b4722e, 0x32e19b97), + TOBN(0xdc059227, 0x90980aff), TOBN(0x175c9c88, 0x9e13d674), + TOBN(0xa7de5b22, 0x6e6bfdb1), TOBN(0x5ea5b7b2, 0xbedb4b46), + TOBN(0xd5570191, 0xd34a6e44), TOBN(0xfcf60d2e, 0xa24ff7e6), + TOBN(0x614a392d, 0x677819e1), TOBN(0x7be74c7e, 0xaa5a29e8), + TOBN(0xab50fece, 0x63c85f3f), TOBN(0xaca2e2a9, 0x46cab337), + TOBN(0x7f700388, 0x122a6fe3), TOBN(0xdb69f703, 0x882a04a8), + TOBN(0x9a77935d, 0xcf7aed57), TOBN(0xdf16207c, 0x8d91c86f), + TOBN(0x2fca49ab, 0x63ed9998), TOBN(0xa3125c44, 0xa77ddf96), + TOBN(0x05dd8a86, 0x24344072), TOBN(0xa023dda2, 0xfec3fb56), + TOBN(0x421b41fc, 0x0c743032), TOBN(0x4f2120c1, 0x5e438639), + TOBN(0xfb7cae51, 0xc83c1b07), TOBN(0xb2370caa, 0xcac2171a), + TOBN(0x2eb2d962, 0x6cc820fb), TOBN(0x59feee5c, 0xb85a44bf), + TOBN(0x94620fca, 0x5b6598f0), TOBN(0x6b922cae, 0x7e314051), + TOBN(0xff8745ad, 0x106bed4e), TOBN(0x546e71f5, 0xdfa1e9ab), + TOBN(0x935c1e48, 0x1ec29487), TOBN(0x9509216c, 0x4d936530), + TOBN(0xc7ca3067, 0x85c9a2db), TOBN(0xd6ae5152, 0x6be8606f), + TOBN(0x09dbcae6, 0xe14c651d), TOBN(0xc9536e23, 0x9bc32f96), + TOBN(0xa90535a9, 0x34521b03), TOBN(0xf39c526c, 0x878756ff), + TOBN(0x383172ec, 0x8aedf03c), TOBN(0x20a8075e, 0xefe0c034), + TOBN(0xf22f9c62, 0x64026422), TOBN(0x8dd10780, 0x24b9d076), + TOBN(0x944c742a, 0x3bef2950), TOBN(0x55b9502e, 0x88a2b00b), + TOBN(0xa59e14b4, 0x86a09817), TOBN(0xa39dd3ac, 0x47bb4071), + TOBN(0x55137f66, 0x3be0592f), TOBN(0x07fcafd4, 0xc9e63f5b), + TOBN(0x963652ee, 0x346eb226), TOBN(0x7dfab085, 0xec2facb7), + TOBN(0x273bf2b8, 0x691add26), TOBN(0x30d74540, 0xf2b46c44), + TOBN(0x05e8e73e, 0xf2c2d065), TOBN(0xff9b8a00, 0xd42eeac9), + TOBN(0x2fcbd205, 0x97209d22), TOBN(0xeb740ffa, 0xde14ea2c), + TOBN(0xc71ff913, 0xa8aef518), TOBN(0x7bfc74bb, 0xfff4cfa2), + TOBN(0x1716680c, 0xb6b36048), TOBN(0x121b2cce, 0x9ef79af1), + TOBN(0xbff3c836, 0xa01eb3d3), TOBN(0x50eb1c6a, 0x5f79077b), + TOBN(0xa48c32d6, 0xa004bbcf), TOBN(0x47a59316, 0x7d64f61d), + TOBN(0x6068147f, 0x93102016), TOBN(0x12c5f654, 0x94d12576), + TOBN(0xefb071a7, 0xc9bc6b91), TOBN(0x7c2da0c5, 0x6e23ea95), + TOBN(0xf4fd45b6, 0xd4a1dd5d), TOBN(0x3e7ad9b6, 0x9122b13c), + TOBN(0x342ca118, 0xe6f57a48), TOBN(0x1c2e94a7, 0x06f8288f), + TOBN(0x99e68f07, 0x5a97d231), TOBN(0x7c80de97, 0x4d838758), + TOBN(0xbce0f5d0, 0x05872727), TOBN(0xbe5d95c2, 0x19c4d016), + TOBN(0x921d5cb1, 0x9c2492ee), TOBN(0x42192dc1, 0x404d6fb3), + TOBN(0x4c84dcd1, 0x32f988d3), TOBN(0xde26d61f, 0xa17b8e85), + TOBN(0xc466dcb6, 0x137c7408), TOBN(0x9a38d7b6, 0x36a266da), + TOBN(0x7ef5cb06, 0x83bebf1b), TOBN(0xe5cdcbbf, 0x0fd014e3), + TOBN(0x30aa376d, 0xf65965a0), TOBN(0x60fe88c2, 0xebb3e95e), + TOBN(0x33fd0b61, 0x66ee6f20), TOBN(0x8827dcdb, 0x3f41f0a0), + TOBN(0xbf8a9d24, 0x0c56c690), TOBN(0x40265dad, 0xddb7641d), + TOBN(0x522b05bf, 0x3a6b662b), TOBN(0x466d1dfe, 0xb1478c9b), + TOBN(0xaa616962, 0x1484469b), TOBN(0x0db60549, 0x02df8f9f), + TOBN(0xc37bca02, 0x3cb8bf51), TOBN(0x5effe346, 0x21371ce8), + TOBN(0xe8f65264, 0xff112c32), TOBN(0x8a9c736d, 0x7b971fb2), + TOBN(0xa4f19470, 0x7b75080d), TOBN(0xfc3f2c5a, 0x8839c59b), + TOBN(0x1d6c777e, 0x5aeb49c2), TOBN(0xf3db034d, 0xda1addfe), + TOBN(0xd76fee5a, 0x5535affc), TOBN(0x0853ac70, 0xb92251fd), + TOBN(0x37e3d594, 0x8b2a29d5), TOBN(0x28f1f457, 0x4de00ddb), + TOBN(0x8083c1b5, 0xf42c328b), TOBN(0xd8ef1d8f, 0xe493c73b), + TOBN(0x96fb6260, 0x41dc61bd), TOBN(0xf74e8a9d, 0x27ee2f8a), + TOBN(0x7c605a80, 0x2c946a5d), TOBN(0xeed48d65, 0x3839ccfd), + TOBN(0x9894344f, 0x3a29467a), TOBN(0xde81e949, 0xc51eba6d), + TOBN(0xdaea066b, 0xa5e5c2f2), TOBN(0x3fc8a614, 0x08c8c7b3), + TOBN(0x7adff88f, 0x06d0de9f), TOBN(0xbbc11cf5, 0x3b75ce0a), + TOBN(0x9fbb7acc, 0xfbbc87d5), TOBN(0xa1458e26, 0x7badfde2)} + , + {TOBN(0x1cb43668, 0xe039c256), TOBN(0x5f26fb8b, 0x7c17fd5d), + TOBN(0xeee426af, 0x79aa062b), TOBN(0x072002d0, 0xd78fbf04), + TOBN(0x4c9ca237, 0xe84fb7e3), TOBN(0xb401d8a1, 0x0c82133d), + TOBN(0xaaa52592, 0x6d7e4181), TOBN(0xe9430833, 0x73dbb152), + TOBN(0xf92dda31, 0xbe24319a), TOBN(0x03f7d28b, 0xe095a8e7), + TOBN(0xa52fe840, 0x98782185), TOBN(0x276ddafe, 0x29c24dbc), + TOBN(0x80cd5496, 0x1d7a64eb), TOBN(0xe4360889, 0x7f1dbe42), + TOBN(0x2f81a877, 0x8438d2d5), TOBN(0x7e4d52a8, 0x85169036), + TOBN(0x19e3d5b1, 0x1d59715d), TOBN(0xc7eaa762, 0xd788983e), + TOBN(0xe5a730b0, 0xabf1f248), TOBN(0xfbab8084, 0xfae3fd83), + TOBN(0x65e50d21, 0x53765b2f), TOBN(0xbdd4e083, 0xfa127f3d), + TOBN(0x9cf3c074, 0x397b1b10), TOBN(0x59f8090c, 0xb1b59fd3), + TOBN(0x7b15fd9d, 0x615faa8f), TOBN(0x8fa1eb40, 0x968554ed), + TOBN(0x7bb4447e, 0x7aa44882), TOBN(0x2bb2d0d1, 0x029fff32), + TOBN(0x075e2a64, 0x6caa6d2f), TOBN(0x8eb879de, 0x22e7351b), + TOBN(0xbcd5624e, 0x9a506c62), TOBN(0x218eaef0, 0xa87e24dc), + TOBN(0x37e56847, 0x44ddfa35), TOBN(0x9ccfc5c5, 0xdab3f747), + TOBN(0x9ac1df3f, 0x1ee96cf4), TOBN(0x0c0571a1, 0x3b480b8f), + TOBN(0x2fbeb3d5, 0x4b3a7b3c), TOBN(0x35c03669, 0x5dcdbb99), + TOBN(0x52a0f5dc, 0xb2415b3a), TOBN(0xd57759b4, 0x4413ed9a), + TOBN(0x1fe647d8, 0x3d30a2c5), TOBN(0x0857f77e, 0xf78a81dc), + TOBN(0x11d5a334, 0x131a4a9b), TOBN(0xc0a94af9, 0x29d393f5), + TOBN(0xbc3a5c0b, 0xdaa6ec1a), TOBN(0xba9fe493, 0x88d2d7ed), + TOBN(0xbb4335b4, 0xbb614797), TOBN(0x991c4d68, 0x72f83533), + TOBN(0x53258c28, 0xd2f01cb3), TOBN(0x93d6eaa3, 0xd75db0b1), + TOBN(0x419a2b0d, 0xe87d0db4), TOBN(0xa1e48f03, 0xd8fe8493), + TOBN(0xf747faf6, 0xc508b23a), TOBN(0xf137571a, 0x35d53549), + TOBN(0x9f5e58e2, 0xfcf9b838), TOBN(0xc7186cee, 0xa7fd3cf5), + TOBN(0x77b868ce, 0xe978a1d3), TOBN(0xe3a68b33, 0x7ab92d04), + TOBN(0x51029794, 0x87a5b862), TOBN(0x5f0606c3, 0x3a61d41d), + TOBN(0x2814be27, 0x6f9326f1), TOBN(0x2f521c14, 0xc6fe3c2e), + TOBN(0x17464d7d, 0xacdf7351), TOBN(0x10f5f9d3, 0x777f7e44), + TOBN(0xce8e616b, 0x269fb37d), TOBN(0xaaf73804, 0x7de62de5), + TOBN(0xaba11175, 0x4fdd4153), TOBN(0x515759ba, 0x3770b49b), + TOBN(0x8b09ebf8, 0xaa423a61), TOBN(0x592245a1, 0xcd41fb92), + TOBN(0x1cba8ec1, 0x9b4c8936), TOBN(0xa87e91e3, 0xaf36710e), + TOBN(0x1fd84ce4, 0x3d34a2e3), TOBN(0xee3759ce, 0xb43b5d61), + TOBN(0x895bc78c, 0x619186c7), TOBN(0xf19c3809, 0xcbb9725a), + TOBN(0xc0be21aa, 0xde744b1f), TOBN(0xa7d222b0, 0x60f8056b), + TOBN(0x74be6157, 0xb23efe11), TOBN(0x6fab2b4f, 0x0cd68253), + TOBN(0xad33ea5f, 0x4bf1d725), TOBN(0x9c1d8ee2, 0x4f6c950f), + TOBN(0x544ee78a, 0xa377af06), TOBN(0x54f489bb, 0x94a113e1), + TOBN(0x8f11d634, 0x992fb7e8), TOBN(0x0169a7aa, 0xa2a44347), + TOBN(0x1d49d4af, 0x95020e00), TOBN(0x95945722, 0xe08e120b), + TOBN(0xb6e33878, 0xa4d32282), TOBN(0xe36e029d, 0x48020ae7), + TOBN(0xe05847fb, 0x37a9b750), TOBN(0xf876812c, 0xb29e3819), + TOBN(0x84ad138e, 0xd23a17f0), TOBN(0x6d7b4480, 0xf0b3950e), + TOBN(0xdfa8aef4, 0x2fd67ae0), TOBN(0x8d3eea24, 0x52333af6), + TOBN(0x0d052075, 0xb15d5acc), TOBN(0xc6d9c79f, 0xbd815bc4), + TOBN(0x8dcafd88, 0xdfa36cf2), TOBN(0x908ccbe2, 0x38aa9070), + TOBN(0x638722c4, 0xba35afce), TOBN(0x5a3da8b0, 0xfd6abf0b), + TOBN(0x2dce252c, 0xc9c335c1), TOBN(0x84e7f0de, 0x65aa799b), + TOBN(0x2101a522, 0xb99a72cb), TOBN(0x06de6e67, 0x87618016), + TOBN(0x5ff8c7cd, 0xe6f3653e), TOBN(0x0a821ab5, 0xc7a6754a), + TOBN(0x7e3fa52b, 0x7cb0b5a2), TOBN(0xa7fb121c, 0xc9048790), + TOBN(0x1a725020, 0x06ce053a), TOBN(0xb490a31f, 0x04e929b0), + TOBN(0xe17be47d, 0x62dd61ad), TOBN(0x781a961c, 0x6be01371), + TOBN(0x1063bfd3, 0xdae3cbba), TOBN(0x35647406, 0x7f73c9ba), + TOBN(0xf50e957b, 0x2736a129), TOBN(0xa6313702, 0xed13f256), + TOBN(0x9436ee65, 0x3a19fcc5), TOBN(0xcf2bdb29, 0xe7a4c8b6), + TOBN(0xb06b1244, 0xc5f95cd8), TOBN(0xda8c8af0, 0xf4ab95f4), + TOBN(0x1bae59c2, 0xb9e5836d), TOBN(0x07d51e7e, 0x3acffffc), + TOBN(0x01e15e6a, 0xc2ccbcda), TOBN(0x3bc1923f, 0x8528c3e0), + TOBN(0x43324577, 0xa49fead4), TOBN(0x61a1b884, 0x2aa7a711), + TOBN(0xf9a86e08, 0x700230ef), TOBN(0x0af585a1, 0xbd19adf8), + TOBN(0x7645f361, 0xf55ad8f2), TOBN(0x6e676223, 0x46c3614c), + TOBN(0x23cb257c, 0x4e774d3f), TOBN(0x82a38513, 0xac102d1b), + TOBN(0x9bcddd88, 0x7b126aa5), TOBN(0xe716998b, 0xeefd3ee4), + TOBN(0x4239d571, 0xfb167583), TOBN(0xdd011c78, 0xd16c8f8a), + TOBN(0x271c2895, 0x69a27519), TOBN(0x9ce0a3b7, 0xd2d64b6a), + TOBN(0x8c977289, 0xd5ec6738), TOBN(0xa3b49f9a, 0x8840ef6b), + TOBN(0x808c14c9, 0x9a453419), TOBN(0x5c00295b, 0x0cf0a2d5), + TOBN(0x524414fb, 0x1d4bcc76), TOBN(0xb07691d2, 0x459a88f1), + TOBN(0x77f43263, 0xf70d110f), TOBN(0x64ada5e0, 0xb7abf9f3), + TOBN(0xafd0f94e, 0x5b544cf5), TOBN(0xb4a13a15, 0xfd2713fe), + TOBN(0xb99b7d6e, 0x250c74f4), TOBN(0x097f2f73, 0x20324e45), + TOBN(0x994b37d8, 0xaffa8208), TOBN(0xc3c31b0b, 0xdc29aafc), + TOBN(0x3da74651, 0x7a3a607f), TOBN(0xd8e1b8c1, 0xfe6955d6), + TOBN(0x716e1815, 0xc8418682), TOBN(0x541d487f, 0x7dc91d97), + TOBN(0x48a04669, 0xc6996982), TOBN(0xf39cab15, 0x83a6502e), + TOBN(0x025801a0, 0xe68db055), TOBN(0xf3569758, 0xba3338d5), + TOBN(0xb0c8c0aa, 0xee2afa84), TOBN(0x4f6985d3, 0xfb6562d1), + TOBN(0x351f1f15, 0x132ed17a), TOBN(0x510ed0b4, 0xc04365fe), + TOBN(0xa3f98138, 0xe5b1f066), TOBN(0xbc9d95d6, 0x32df03dc), + TOBN(0xa83ccf6e, 0x19abd09e), TOBN(0x0b4097c1, 0x4ff17edb), + TOBN(0x58a5c478, 0xd64a06ce), TOBN(0x2ddcc3fd, 0x544a58fd), + TOBN(0xd449503d, 0x9e8153b8), TOBN(0x3324fd02, 0x7774179b), + TOBN(0xaf5d47c8, 0xdbd9120c), TOBN(0xeb860162, 0x34fa94db), + TOBN(0x5817bdd1, 0x972f07f4), TOBN(0xe5579e2e, 0xd27bbceb), + TOBN(0x86847a1f, 0x5f11e5a6), TOBN(0xb39ed255, 0x7c3cf048), + TOBN(0xe1076417, 0xa2f62e55), TOBN(0x6b9ab38f, 0x1bcf82a2), + TOBN(0x4bb7c319, 0x7aeb29f9), TOBN(0xf6d17da3, 0x17227a46), + TOBN(0xab53ddbd, 0x0f968c00), TOBN(0xa03da7ec, 0x000c880b), + TOBN(0x7b239624, 0x6a9ad24d), TOBN(0x612c0401, 0x01ec60d0), + TOBN(0x70d10493, 0x109f5df1), TOBN(0xfbda4030, 0x80af7550), + TOBN(0x30b93f95, 0xc6b9a9b3), TOBN(0x0c74ec71, 0x007d9418), + TOBN(0x94175564, 0x6edb951f), TOBN(0x5f4a9d78, 0x7f22c282), + TOBN(0xb7870895, 0xb38d1196), TOBN(0xbc593df3, 0xa228ce7c), + TOBN(0xc78c5bd4, 0x6af3641a), TOBN(0x7802200b, 0x3d9b3dcc), + TOBN(0x0dc73f32, 0x8be33304), TOBN(0x847ed87d, 0x61ffb79a), + TOBN(0xf85c974e, 0x6d671192), TOBN(0x1e14100a, 0xde16f60f), + TOBN(0x45cb0d5a, 0x95c38797), TOBN(0x18923bba, 0x9b022da4), + TOBN(0xef2be899, 0xbbe7e86e), TOBN(0x4a1510ee, 0x216067bf), + TOBN(0xd98c8154, 0x84d5ce3e), TOBN(0x1af777f0, 0xf92a2b90), + TOBN(0x9fbcb400, 0x4ef65724), TOBN(0x3e04a4c9, 0x3c0ca6fe), + TOBN(0xfb3e2cb5, 0x55002994), TOBN(0x1f3a93c5, 0x5363ecab), + TOBN(0x1fe00efe, 0x3923555b), TOBN(0x744bedd9, 0x1e1751ea), + TOBN(0x3fb2db59, 0x6ab69357), TOBN(0x8dbd7365, 0xf5e6618b), + TOBN(0x99d53099, 0xdf1ea40e), TOBN(0xb3f24a0b, 0x57d61e64), + TOBN(0xd088a198, 0x596eb812), TOBN(0x22c8361b, 0x5762940b), + TOBN(0x66f01f97, 0xf9c0d95c), TOBN(0x88461172, 0x8e43cdae), + TOBN(0x11599a7f, 0xb72b15c3), TOBN(0x135a7536, 0x420d95cc), + TOBN(0x2dcdf0f7, 0x5f7ae2f6), TOBN(0x15fc6e1d, 0xd7fa6da2), + TOBN(0x81ca829a, 0xd1d441b6), TOBN(0x84c10cf8, 0x04a106b6), + TOBN(0xa9b26c95, 0xa73fbbd0), TOBN(0x7f24e0cb, 0x4d8f6ee8), + TOBN(0x48b45937, 0x1e25a043), TOBN(0xf8a74fca, 0x036f3dfe), + TOBN(0x1ed46585, 0xc9f84296), TOBN(0x7fbaa8fb, 0x3bc278b0), + TOBN(0xa8e96cd4, 0x6c4fcbd0), TOBN(0x940a1202, 0x73b60a5f), + TOBN(0x34aae120, 0x55a4aec8), TOBN(0x550e9a74, 0xdbd742f0), + TOBN(0x794456d7, 0x228c68ab), TOBN(0x492f8868, 0xa4e25ec6), + TOBN(0x682915ad, 0xb2d8f398), TOBN(0xf13b51cc, 0x5b84c953), + TOBN(0xcda90ab8, 0x5bb917d6), TOBN(0x4b615560, 0x4ea3dee1), + TOBN(0x578b4e85, 0x0a52c1c8), TOBN(0xeab1a695, 0x20b75fc4), + TOBN(0x60c14f3c, 0xaa0bb3c6), TOBN(0x220f448a, 0xb8216094), + TOBN(0x4fe7ee31, 0xb0e63d34), TOBN(0xf4600572, 0xa9e54fab), + TOBN(0xc0493334, 0xd5e7b5a4), TOBN(0x8589fb92, 0x06d54831), + TOBN(0xaa70f5cc, 0x6583553a), TOBN(0x0879094a, 0xe25649e5), + TOBN(0xcc904507, 0x10044652), TOBN(0xebb0696d, 0x02541c4f), + TOBN(0x5a171fde, 0xb9718710), TOBN(0x38f1bed8, 0xf374a9f5), + TOBN(0xc8c582e1, 0xba39bdc1), TOBN(0xfc457b0a, 0x908cc0ce), + TOBN(0x9a187fd4, 0x883841e2), TOBN(0x8ec25b39, 0x38725381), + TOBN(0x2553ed05, 0x96f84395), TOBN(0x095c7661, 0x6f6c6897), + TOBN(0x917ac85c, 0x4bdc5610), TOBN(0xb2885fe4, 0x179eb301), + TOBN(0x5fc65547, 0x8b78bdcc), TOBN(0x4a9fc893, 0xe59e4699), + TOBN(0xbb7ff0cd, 0x3ce299af), TOBN(0x195be9b3, 0xadf38b20), + TOBN(0x6a929c87, 0xd38ddb8f), TOBN(0x55fcc99c, 0xb21a51b9), + TOBN(0x2b695b4c, 0x721a4593), TOBN(0xed1e9a15, 0x768eaac2), + TOBN(0xfb63d71c, 0x7489f914), TOBN(0xf98ba31c, 0x78118910), + TOBN(0x80291373, 0x9b128eb4), TOBN(0x7801214e, 0xd448af4a), + TOBN(0xdbd2e22b, 0x55418dd3), TOBN(0xeffb3c0d, 0xd3998242), + TOBN(0xdfa6077c, 0xc7bf3827), TOBN(0xf2165bcb, 0x47f8238f), + TOBN(0xfe37cf68, 0x8564d554), TOBN(0xe5f825c4, 0x0a81fb98), + TOBN(0x43cc4f67, 0xffed4d6f), TOBN(0xbc609578, 0xb50a34b0), + TOBN(0x8aa8fcf9, 0x5041faf1), TOBN(0x5659f053, 0x651773b6), + TOBN(0xe87582c3, 0x6044d63b), TOBN(0xa6089409, 0x0cdb0ca0), + TOBN(0x8c993e0f, 0xbfb2bcf6), TOBN(0xfc64a719, 0x45985cfc), + TOBN(0x15c4da80, 0x83dbedba), TOBN(0x804ae112, 0x2be67df7), + TOBN(0xda4c9658, 0xa23defde), TOBN(0x12002ddd, 0x5156e0d3), + TOBN(0xe68eae89, 0x5dd21b96), TOBN(0x8b99f28b, 0xcf44624d), + TOBN(0x0ae00808, 0x1ec8897a), TOBN(0xdd0a9303, 0x6712f76e), + TOBN(0x96237522, 0x4e233de4), TOBN(0x192445b1, 0x2b36a8a5), + TOBN(0xabf9ff74, 0x023993d9), TOBN(0x21f37bf4, 0x2aad4a8f), + TOBN(0x340a4349, 0xf8bd2bbd), TOBN(0x1d902cd9, 0x4868195d), + TOBN(0x3d27bbf1, 0xe5fdb6f1), TOBN(0x7a5ab088, 0x124f9f1c), + TOBN(0xc466ab06, 0xf7a09e03), TOBN(0x2f8a1977, 0x31f2c123), + TOBN(0xda355dc7, 0x041b6657), TOBN(0xcb840d12, 0x8ece2a7c), + TOBN(0xb600ad9f, 0x7db32675), TOBN(0x78fea133, 0x07a06f1b), + TOBN(0x5d032269, 0xb31f6094), TOBN(0x07753ef5, 0x83ec37aa), + TOBN(0x03485aed, 0x9c0bea78), TOBN(0x41bb3989, 0xbc3f4524), + TOBN(0x09403761, 0x697f726d), TOBN(0x6109beb3, 0xdf394820), + TOBN(0x804111ea, 0x3b6d1145), TOBN(0xb6271ea9, 0xa8582654), + TOBN(0x619615e6, 0x24e66562), TOBN(0xa2554945, 0xd7b6ad9c), + TOBN(0xd9c4985e, 0x99bfe35f), TOBN(0x9770ccc0, 0x7b51cdf6), + TOBN(0x7c327013, 0x92881832), TOBN(0x8777d45f, 0x286b26d1), + TOBN(0x9bbeda22, 0xd847999d), TOBN(0x03aa33b6, 0xc3525d32), + TOBN(0x4b7b96d4, 0x28a959a1), TOBN(0xbb3786e5, 0x31e5d234), + TOBN(0xaeb5d3ce, 0x6961f247), TOBN(0x20aa85af, 0x02f93d3f), + TOBN(0x9cd1ad3d, 0xd7a7ae4f), TOBN(0xbf6688f0, 0x781adaa8), + TOBN(0xb1b40e86, 0x7469cead), TOBN(0x1904c524, 0x309fca48), + TOBN(0x9b7312af, 0x4b54bbc7), TOBN(0xbe24bf8f, 0x593affa2), + TOBN(0xbe5e0790, 0xbd98764b), TOBN(0xa0f45f17, 0xa26e299e), + TOBN(0x4af0d2c2, 0x6b8fe4c7), TOBN(0xef170db1, 0x8ae8a3e6), + TOBN(0x0e8d61a0, 0x29e0ccc1), TOBN(0xcd53e87e, 0x60ad36ca), + TOBN(0x328c6623, 0xc8173822), TOBN(0x7ee1767d, 0xa496be55), + TOBN(0x89f13259, 0x648945af), TOBN(0x9e45a5fd, 0x25c8009c), + TOBN(0xaf2febd9, 0x1f61ab8c), TOBN(0x43f6bc86, 0x8a275385), + TOBN(0x87792348, 0xf2142e79), TOBN(0x17d89259, 0xc6e6238a), + TOBN(0x7536d2f6, 0x4a839d9b), TOBN(0x1f428fce, 0x76a1fbdc), + TOBN(0x1c109601, 0x0db06dfe), TOBN(0xbfc16bc1, 0x50a3a3cc), + TOBN(0xf9cbd9ec, 0x9b30f41b), TOBN(0x5b5da0d6, 0x00138cce), + TOBN(0xec1d0a48, 0x56ef96a7), TOBN(0xb47eb848, 0x982bf842), + TOBN(0x66deae32, 0xec3f700d), TOBN(0x4e43c42c, 0xaa1181e0), + TOBN(0xa1d72a31, 0xd1a4aa2a), TOBN(0x440d4668, 0xc004f3ce), + TOBN(0x0d6a2d3b, 0x45fe8a7a), TOBN(0x820e52e2, 0xfb128365), + TOBN(0x29ac5fcf, 0x25e51b09), TOBN(0x180cd2bf, 0x2023d159), + TOBN(0xa9892171, 0xa1ebf90e), TOBN(0xf97c4c87, 0x7c132181), + TOBN(0x9f1dc724, 0xc03dbb7e), TOBN(0xae043765, 0x018cbbe4), + TOBN(0xfb0b2a36, 0x0767d153), TOBN(0xa8e2f4d6, 0x249cbaeb), + TOBN(0x172a5247, 0xd95ea168), TOBN(0x1758fada, 0x2970764a), + TOBN(0xac803a51, 0x1d978169), TOBN(0x299cfe2e, 0xde77e01b), + TOBN(0x652a1e17, 0xb0a98927), TOBN(0x2e26e1d1, 0x20014495), + TOBN(0x7ae0af9f, 0x7175b56a), TOBN(0xc2e22a80, 0xd64b9f95), + TOBN(0x4d0ff9fb, 0xd90a060a), TOBN(0x496a27db, 0xbaf38085), + TOBN(0x32305401, 0xda776bcf), TOBN(0xb8cdcef6, 0x725f209e), + TOBN(0x61ba0f37, 0x436a0bba), TOBN(0x263fa108, 0x76860049), + TOBN(0x92beb98e, 0xda3542cf), TOBN(0xa2d4d14a, 0xd5849538), + TOBN(0x989b9d68, 0x12e9a1bc), TOBN(0x61d9075c, 0x5f6e3268), + TOBN(0x352c6aa9, 0x99ace638), TOBN(0xde4e4a55, 0x920f43ff), + TOBN(0xe5e4144a, 0xd673c017), TOBN(0x667417ae, 0x6f6e05ea), + TOBN(0x613416ae, 0xdcd1bd56), TOBN(0x5eb36201, 0x86693711), + TOBN(0x2d7bc504, 0x3a1aa914), TOBN(0x175a1299, 0x76dc5975), + TOBN(0xe900e0f2, 0x3fc8125c), TOBN(0x569ef68c, 0x11198875), + TOBN(0x9012db63, 0x63a113b4), TOBN(0xe3bd3f56, 0x98835766), + TOBN(0xa5c94a52, 0x76412dea), TOBN(0xad9e2a09, 0xaa735e5c), + TOBN(0x405a984c, 0x508b65e9), TOBN(0xbde4a1d1, 0x6df1a0d1), + TOBN(0x1a9433a1, 0xdfba80da), TOBN(0xe9192ff9, 0x9440ad2e), + TOBN(0x9f649696, 0x5099fe92), TOBN(0x25ddb65c, 0x0b27a54a), + TOBN(0x178279dd, 0xc590da61), TOBN(0x5479a999, 0xfbde681a), + TOBN(0xd0e84e05, 0x013fe162), TOBN(0xbe11dc92, 0x632d471b), + TOBN(0xdf0b0c45, 0xfc0e089f), TOBN(0x04fb15b0, 0x4c144025), + TOBN(0xa61d5fc2, 0x13c99927), TOBN(0xa033e9e0, 0x3de2eb35), + TOBN(0xf8185d5c, 0xb8dacbb4), TOBN(0x9a88e265, 0x8644549d), + TOBN(0xf717af62, 0x54671ff6), TOBN(0x4bd4241b, 0x5fa58603), + TOBN(0x06fba40b, 0xe67773c0), TOBN(0xc1d933d2, 0x6a2847e9), + TOBN(0xf4f5acf3, 0x689e2c70), TOBN(0x92aab0e7, 0x46bafd31), + TOBN(0x798d76aa, 0x3473f6e5), TOBN(0xcc6641db, 0x93141934), + TOBN(0xcae27757, 0xd31e535e), TOBN(0x04cc43b6, 0x87c2ee11), + TOBN(0x8d1f9675, 0x2e029ffa), TOBN(0xc2150672, 0xe4cc7a2c), + TOBN(0x3b03c1e0, 0x8d68b013), TOBN(0xa9d6816f, 0xedf298f3), + TOBN(0x1bfbb529, 0xa2804464), TOBN(0x95a52fae, 0x5db22125), + TOBN(0x55b32160, 0x0e1cb64e), TOBN(0x004828f6, 0x7e7fc9fe), + TOBN(0x13394b82, 0x1bb0fb93), TOBN(0xb6293a2d, 0x35f1a920), + TOBN(0xde35ef21, 0xd145d2d9), TOBN(0xbe6225b3, 0xbb8fa603), + TOBN(0x00fc8f6b, 0x32cf252d), TOBN(0xa28e52e6, 0x117cf8c2), + TOBN(0x9d1dc89b, 0x4c371e6d), TOBN(0xcebe0675, 0x36ef0f28), + TOBN(0x5de05d09, 0xa4292f81), TOBN(0xa8303593, 0x353e3083), + TOBN(0xa1715b0a, 0x7e37a9bb), TOBN(0x8c56f61e, 0x2b8faec3), + TOBN(0x52507431, 0x33c9b102), TOBN(0x0130cefc, 0xa44431f0), + TOBN(0x56039fa0, 0xbd865cfb), TOBN(0x4b03e578, 0xbc5f1dd7), + TOBN(0x40edf2e4, 0xbabe7224), TOBN(0xc752496d, 0x3a1988f6), + TOBN(0xd1572d3b, 0x564beb6b), TOBN(0x0db1d110, 0x39a1c608), + TOBN(0x568d1934, 0x16f60126), TOBN(0x05ae9668, 0xf354af33), + TOBN(0x19de6d37, 0xc92544f2), TOBN(0xcc084353, 0xa35837d5), + TOBN(0xcbb6869c, 0x1a514ece), TOBN(0xb633e728, 0x2e1d1066), + TOBN(0xf15dd69f, 0x936c581c), TOBN(0x96e7b8ce, 0x7439c4f9), + TOBN(0x5e676f48, 0x2e448a5b), TOBN(0xb2ca7d5b, 0xfd916bbb), + TOBN(0xd55a2541, 0xf5024025), TOBN(0x47bc5769, 0xe4c2d937), + TOBN(0x7d31b92a, 0x0362189f), TOBN(0x83f3086e, 0xef7816f9), + TOBN(0xf9f46d94, 0xb587579a), TOBN(0xec2d22d8, 0x30e76c5f), + TOBN(0x27d57461, 0xb000ffcf), TOBN(0xbb7e65f9, 0x364ffc2c), + TOBN(0x7c7c9477, 0x6652a220), TOBN(0x61618f89, 0xd696c981), + TOBN(0x5021701d, 0x89effff3), TOBN(0xf2c8ff8e, 0x7c314163), + TOBN(0x2da413ad, 0x8efb4d3e), TOBN(0x937b5adf, 0xce176d95), + TOBN(0x22867d34, 0x2a67d51c), TOBN(0x262b9b10, 0x18eb3ac9), + TOBN(0x4e314fe4, 0xc43ff28b), TOBN(0x76476627, 0x6a664e7a), + TOBN(0x3e90e40b, 0xb7a565c2), TOBN(0x8588993a, 0xc1acf831), + TOBN(0xd7b501d6, 0x8f938829), TOBN(0x996627ee, 0x3edd7d4c), + TOBN(0x37d44a62, 0x90cd34c7), TOBN(0xa8327499, 0xf3833e8d), + TOBN(0x2e18917d, 0x4bf50353), TOBN(0x85dd726b, 0x556765fb), + TOBN(0x54fe65d6, 0x93d5ab66), TOBN(0x3ddbaced, 0x915c25fe), + TOBN(0xa799d9a4, 0x12f22e85), TOBN(0xe2a24867, 0x6d06f6bc), + TOBN(0xf4f1ee56, 0x43ca1637), TOBN(0xfda2828b, 0x61ece30a), + TOBN(0x758c1a3e, 0xa2dee7a6), TOBN(0xdcde2f3c, 0x734b2284), + TOBN(0xaba445d2, 0x4eaba6ad), TOBN(0x35aaf668, 0x76cee0a7), + TOBN(0x7e0b04a9, 0xe5aa049a), TOBN(0xe74083ad, 0x91103e84), + TOBN(0xbeb183ce, 0x40afecc3), TOBN(0x6b89de9f, 0xea043f7a),} + , + {TOBN(0x0e299d23, 0xfe67ba66), TOBN(0x91450760, 0x93cf2f34), + TOBN(0xf45b5ea9, 0x97fcf913), TOBN(0x5be00843, 0x8bd7ddda), + TOBN(0x358c3e05, 0xd53ff04d), TOBN(0xbf7ccdc3, 0x5de91ef7), + TOBN(0xad684dbf, 0xb69ec1a0), TOBN(0x367e7cf2, 0x801fd997), + TOBN(0x0ca1f3b7, 0xb0dc8595), TOBN(0x27de4608, 0x9f1d9f2e), + TOBN(0x1af3bf39, 0xbadd82a7), TOBN(0x79356a79, 0x65862448), + TOBN(0xc0602345, 0xf5f9a052), TOBN(0x1a8b0f89, 0x139a42f9), + TOBN(0xb53eee42, 0x844d40fc), TOBN(0x93b0bfe5, 0x4e5b6368), + TOBN(0x5434dd02, 0xc024789c), TOBN(0x90dca9ea, 0x41b57bfc), + TOBN(0x8aa898e2, 0x243398df), TOBN(0xf607c834, 0x894a94bb), + TOBN(0xbb07be97, 0xc2c99b76), TOBN(0x6576ba67, 0x18c29302), + TOBN(0x3d79efcc, 0xe703a88c), TOBN(0xf259ced7, 0xb6a0d106), + TOBN(0x0f893a5d, 0xc8de610b), TOBN(0xe8c515fb, 0x67e223ce), + TOBN(0x7774bfa6, 0x4ead6dc5), TOBN(0x89d20f95, 0x925c728f), + TOBN(0x7a1e0966, 0x098583ce), TOBN(0xa2eedb94, 0x93f2a7d7), + TOBN(0x1b282097, 0x4c304d4a), TOBN(0x0842e3da, 0xc077282d), + TOBN(0xe4d972a3, 0x3b9e2d7b), TOBN(0x7cc60b27, 0xc48218ff), + TOBN(0x8fc70838, 0x84149d91), TOBN(0x5c04346f, 0x2f461ecc), + TOBN(0xebe9fdf2, 0x614650a9), TOBN(0x5e35b537, 0xc1f666ac), + TOBN(0x645613d1, 0x88babc83), TOBN(0x88cace3a, 0xc5e1c93e), + TOBN(0x209ca375, 0x3de92e23), TOBN(0xccb03cc8, 0x5fbbb6e3), + TOBN(0xccb90f03, 0xd7b1487e), TOBN(0xfa9c2a38, 0xc710941f), + TOBN(0x756c3823, 0x6724ceed), TOBN(0x3a902258, 0x192d0323), + TOBN(0xb150e519, 0xea5e038e), TOBN(0xdcba2865, 0xc7427591), + TOBN(0xe549237f, 0x78890732), TOBN(0xc443bef9, 0x53fcb4d9), + TOBN(0x9884d8a6, 0xeb3480d6), TOBN(0x8a35b6a1, 0x3048b186), + TOBN(0xb4e44716, 0x65e9a90a), TOBN(0x45bf380d, 0x653006c0), + TOBN(0x8f3f820d, 0x4fe9ae3b), TOBN(0x244a35a0, 0x979a3b71), + TOBN(0xa1010e9d, 0x74cd06ff), TOBN(0x9c17c7df, 0xaca3eeac), + TOBN(0x74c86cd3, 0x8063aa2b), TOBN(0x8595c4b3, 0x734614ff), + TOBN(0xa3de00ca, 0x990f62cc), TOBN(0xd9bed213, 0xca0c3be5), + TOBN(0x7886078a, 0xdf8ce9f5), TOBN(0xddb27ce3, 0x5cd44444), + TOBN(0xed374a66, 0x58926ddd), TOBN(0x138b2d49, 0x908015b8), + TOBN(0x886c6579, 0xde1f7ab8), TOBN(0x888b9aa0, 0xc3020b7a), + TOBN(0xd3ec034e, 0x3a96e355), TOBN(0xba65b0b8, 0xf30fbe9a), + TOBN(0x064c8e50, 0xff21367a), TOBN(0x1f508ea4, 0x0b04b46e), + TOBN(0x98561a49, 0x747c866c), TOBN(0xbbb1e5fe, 0x0518a062), + TOBN(0x20ff4e8b, 0xecdc3608), TOBN(0x7f55cded, 0x20184027), + TOBN(0x8d73ec95, 0xf38c85f0), TOBN(0x5b589fdf, 0x8bc3b8c3), + TOBN(0xbe95dd98, 0x0f12b66f), TOBN(0xf5bd1a09, 0x0e338e01), + TOBN(0x65163ae5, 0x5e915918), TOBN(0x6158d6d9, 0x86f8a46b), + TOBN(0x8466b538, 0xeeebf99c), TOBN(0xca8761f6, 0xbca477ef), + TOBN(0xaf3449c2, 0x9ebbc601), TOBN(0xef3b0f41, 0xe0c3ae2f), + TOBN(0xaa6c577d, 0x5de63752), TOBN(0xe9166601, 0x64682a51), + TOBN(0x5a3097be, 0xfc15aa1e), TOBN(0x40d12548, 0xb54b0745), + TOBN(0x5bad4706, 0x519a5f12), TOBN(0xed03f717, 0xa439dee6), + TOBN(0x0794bb6c, 0x4a02c499), TOBN(0xf725083d, 0xcffe71d2), + TOBN(0x2cad7519, 0x0f3adcaf), TOBN(0x7f68ea1c, 0x43729310), + TOBN(0xe747c8c7, 0xb7ffd977), TOBN(0xec104c35, 0x80761a22), + TOBN(0x8395ebaf, 0x5a3ffb83), TOBN(0xfb3261f4, 0xe4b63db7), + TOBN(0x53544960, 0xd883e544), TOBN(0x13520d70, 0x8cc2eeb8), + TOBN(0x08f6337b, 0xd3d65f99), TOBN(0x83997db2, 0x781cf95b), + TOBN(0xce6ff106, 0x0dbd2c01), TOBN(0x4f8eea6b, 0x1f9ce934), + TOBN(0x546f7c4b, 0x0e993921), TOBN(0x6236a324, 0x5e753fc7), + TOBN(0x65a41f84, 0xa16022e9), TOBN(0x0c18d878, 0x43d1dbb2), + TOBN(0x73c55640, 0x2d4cef9c), TOBN(0xa0428108, 0x70444c74), + TOBN(0x68e4f15e, 0x9afdfb3c), TOBN(0x49a56143, 0x5bdfb6df), + TOBN(0xa9bc1bd4, 0x5f823d97), TOBN(0xbceb5970, 0xea111c2a), + TOBN(0x366b455f, 0xb269bbc4), TOBN(0x7cd85e1e, 0xe9bc5d62), + TOBN(0xc743c41c, 0x4f18b086), TOBN(0xa4b40990, 0x95294fb9), + TOBN(0x9c7c581d, 0x26ee8382), TOBN(0xcf17dcc5, 0x359d638e), + TOBN(0xee8273ab, 0xb728ae3d), TOBN(0x1d112926, 0xf821f047), + TOBN(0x11498477, 0x50491a74), TOBN(0x687fa761, 0xfde0dfb9), + TOBN(0x2c258022, 0x7ea435ab), TOBN(0x6b8bdb94, 0x91ce7e3f), + TOBN(0x4c5b5dc9, 0x3bf834aa), TOBN(0x04371819, 0x4f6c7e4b), + TOBN(0xc284e00a, 0x3736bcad), TOBN(0x0d881118, 0x21ae8f8d), + TOBN(0xf9cf0f82, 0xf48c8e33), TOBN(0xa11fd075, 0xa1bf40db), + TOBN(0xdceab0de, 0xdc2733e5), TOBN(0xc560a8b5, 0x8e986bd7), + TOBN(0x48dd1fe2, 0x3929d097), TOBN(0x3885b290, 0x92f188f1), + TOBN(0x0f2ae613, 0xda6fcdac), TOBN(0x9054303e, 0xb662a46c), + TOBN(0xb6871e44, 0x0738042a), TOBN(0x98e6a977, 0xbdaf6449), + TOBN(0xd8bc0650, 0xd1c9df1b), TOBN(0xef3d6451, 0x36e098f9), + TOBN(0x03fbae82, 0xb6d72d28), TOBN(0x77ca9db1, 0xf5d84080), + TOBN(0x8a112cff, 0xa58efc1c), TOBN(0x518d761c, 0xc564cb4a), + TOBN(0x69b5740e, 0xf0d1b5ce), TOBN(0x717039cc, 0xe9eb1785), + TOBN(0x3fe29f90, 0x22f53382), TOBN(0x8e54ba56, 0x6bc7c95c), + TOBN(0x9c806d8a, 0xf7f91d0f), TOBN(0x3b61b0f1, 0xa82a5728), + TOBN(0x4640032d, 0x94d76754), TOBN(0x273eb5de, 0x47d834c6), + TOBN(0x2988abf7, 0x7b4e4d53), TOBN(0xb7ce66bf, 0xde401777), + TOBN(0x9fba6b32, 0x715071b3), TOBN(0x82413c24, 0xad3a1a98), + TOBN(0x5b7fc8c4, 0xe0e8ad93), TOBN(0xb5679aee, 0x5fab868d), + TOBN(0xb1f9d2fa, 0x2b3946f3), TOBN(0x458897dc, 0x5685b50a), + TOBN(0x1e98c930, 0x89d0caf3), TOBN(0x39564c5f, 0x78642e92), + TOBN(0x1b77729a, 0x0dbdaf18), TOBN(0xf9170722, 0x579e82e6), + TOBN(0x680c0317, 0xe4515fa5), TOBN(0xf85cff84, 0xfb0c790f), + TOBN(0xc7a82aab, 0x6d2e0765), TOBN(0x7446bca9, 0x35c82b32), + TOBN(0x5de607aa, 0x6d63184f), TOBN(0x7c1a46a8, 0x262803a6), + TOBN(0xd218313d, 0xaebe8035), TOBN(0x92113ffd, 0xc73c51f8), + TOBN(0x4b38e083, 0x12e7e46c), TOBN(0x69d0a37a, 0x56126bd5), + TOBN(0xfb3f324b, 0x73c07e04), TOBN(0xa0c22f67, 0x8fda7267), + TOBN(0x8f2c0051, 0x4d2c7d8f), TOBN(0xbc45ced3, 0xcbe2cae5), + TOBN(0xe1c6cf07, 0xa8f0f277), TOBN(0xbc392312, 0x1eb99a98), + TOBN(0x75537b7e, 0x3cc8ac85), TOBN(0x8d725f57, 0xdd02753b), + TOBN(0xfd05ff64, 0xb737df2f), TOBN(0x55fe8712, 0xf6d2531d), + TOBN(0x57ce04a9, 0x6ab6b01c), TOBN(0x69a02a89, 0x7cd93724), + TOBN(0x4f82ac35, 0xcf86699b), TOBN(0x8242d3ad, 0x9cb4b232), + TOBN(0x713d0f65, 0xd62105e5), TOBN(0xbb222bfa, 0x2d29be61), + TOBN(0xf2f9a79e, 0x6cfbef09), TOBN(0xfc24d8d3, 0xd5d6782f), + TOBN(0x5db77085, 0xd4129967), TOBN(0xdb81c3cc, 0xdc3c2a43), + TOBN(0x9d655fc0, 0x05d8d9a3), TOBN(0x3f5d057a, 0x54298026), + TOBN(0x1157f56d, 0x88c54694), TOBN(0xb26baba5, 0x9b09573e), + TOBN(0x2cab03b0, 0x22adffd1), TOBN(0x60a412c8, 0xdd69f383), + TOBN(0xed76e98b, 0x54b25039), TOBN(0xd4ee67d3, 0x687e714d), + TOBN(0x87739648, 0x7b00b594), TOBN(0xce419775, 0xc9ef709b), + TOBN(0x40f76f85, 0x1c203a40), TOBN(0x30d352d6, 0xeafd8f91), + TOBN(0xaf196d3d, 0x95578dd2), TOBN(0xea4bb3d7, 0x77cc3f3d), + TOBN(0x42a5bd03, 0xb98e782b), TOBN(0xac958c40, 0x0624920d), + TOBN(0xb838134c, 0xfc56fcc8), TOBN(0x86ec4ccf, 0x89572e5e), + TOBN(0x69c43526, 0x9be47be0), TOBN(0x323b7dd8, 0xcb28fea1), + TOBN(0xfa5538ba, 0x3a6c67e5), TOBN(0xef921d70, 0x1d378e46), + TOBN(0xf92961fc, 0x3c4b880e), TOBN(0x3f6f914e, 0x98940a67), + TOBN(0xa990eb0a, 0xfef0ff39), TOBN(0xa6c2920f, 0xf0eeff9c), + TOBN(0xca804166, 0x51b8d9a3), TOBN(0x42531bc9, 0x0ffb0db1), + TOBN(0x72ce4718, 0xaa82e7ce), TOBN(0x6e199913, 0xdf574741), + TOBN(0xd5f1b13d, 0xd5d36946), TOBN(0x8255dc65, 0xf68f0194), + TOBN(0xdc9df4cd, 0x8710d230), TOBN(0x3453c20f, 0x138c1988), + TOBN(0x9af98dc0, 0x89a6ef01), TOBN(0x4dbcc3f0, 0x9857df85), + TOBN(0x34805601, 0x5c1ad924), TOBN(0x40448da5, 0xd0493046), + TOBN(0xf629926d, 0x4ee343e2), TOBN(0x6343f1bd, 0x90e8a301), + TOBN(0xefc93491, 0x40815b3f), TOBN(0xf882a423, 0xde8f66fb), + TOBN(0x3a12d5f4, 0xe7db9f57), TOBN(0x7dfba38a, 0x3c384c27), + TOBN(0x7a904bfd, 0x6fc660b1), TOBN(0xeb6c5db3, 0x2773b21c), + TOBN(0xc350ee66, 0x1cdfe049), TOBN(0x9baac0ce, 0x44540f29), + TOBN(0xbc57b6ab, 0xa5ec6aad), TOBN(0x167ce8c3, 0x0a7c1baa), + TOBN(0xb23a03a5, 0x53fb2b56), TOBN(0x6ce141e7, 0x4e057f78), + TOBN(0x796525c3, 0x89e490d9), TOBN(0x0bc95725, 0xa31a7e75), + TOBN(0x1ec56791, 0x1220fd06), TOBN(0x716e3a3c, 0x408b0bd6), + TOBN(0x31cd6bf7, 0xe8ebeba9), TOBN(0xa7326ca6, 0xbee6b670), + TOBN(0x3d9f851c, 0xcd090c43), TOBN(0x561e8f13, 0xf12c3988), + TOBN(0x50490b6a, 0x904b7be4), TOBN(0x61690ce1, 0x0410737b), + TOBN(0x299e9a37, 0x0f009052), TOBN(0x258758f0, 0xf026092e), + TOBN(0x9fa255f3, 0xfdfcdc0f), TOBN(0xdbc9fb1f, 0xc0e1bcd2), + TOBN(0x35f9dd6e, 0x24651840), TOBN(0xdca45a84, 0xa5c59abc), + TOBN(0x103d396f, 0xecca4938), TOBN(0x4532da0a, 0xb97b3f29), + TOBN(0xc4135ea5, 0x1999a6bf), TOBN(0x3aa9505a, 0x5e6bf2ee), + TOBN(0xf77cef06, 0x3f5be093), TOBN(0x97d1a0f8, 0xa943152e), + TOBN(0x2cb0ebba, 0x2e1c21dd), TOBN(0xf41b29fc, 0x2c6797c4), + TOBN(0xc6e17321, 0xb300101f), TOBN(0x4422b0e9, 0xd0d79a89), + TOBN(0x49e4901c, 0x92f1bfc4), TOBN(0x06ab1f8f, 0xe1e10ed9), + TOBN(0x84d35577, 0xdb2926b8), TOBN(0xca349d39, 0x356e8ec2), + TOBN(0x70b63d32, 0x343bf1a9), TOBN(0x8fd3bd28, 0x37d1a6b1), + TOBN(0x0454879c, 0x316865b4), TOBN(0xee959ff6, 0xc458efa2), + TOBN(0x0461dcf8, 0x9706dc3f), TOBN(0x737db0e2, 0x164e4b2e), + TOBN(0x09262680, 0x2f8843c8), TOBN(0x54498bbc, 0x7745e6f6), + TOBN(0x359473fa, 0xa29e24af), TOBN(0xfcc3c454, 0x70aa87a1), + TOBN(0xfd2c4bf5, 0x00573ace), TOBN(0xb65b514e, 0x28dd1965), + TOBN(0xe46ae7cf, 0x2193e393), TOBN(0x60e9a4e1, 0xf5444d97), + TOBN(0xe7594e96, 0x00ff38ed), TOBN(0x43d84d2f, 0x0a0e0f02), + TOBN(0x8b6db141, 0xee398a21), TOBN(0xb88a56ae, 0xe3bcc5be), + TOBN(0x0a1aa52f, 0x373460ea), TOBN(0x20da1a56, 0x160bb19b), + TOBN(0xfb54999d, 0x65bf0384), TOBN(0x71a14d24, 0x5d5a180e), + TOBN(0xbc44db7b, 0x21737b04), TOBN(0xd84fcb18, 0x01dd8e92), + TOBN(0x80de937b, 0xfa44b479), TOBN(0x53505499, 0x5c98fd4f), + TOBN(0x1edb12ab, 0x28f08727), TOBN(0x4c58b582, 0xa5f3ef53), + TOBN(0xbfb236d8, 0x8327f246), TOBN(0xc3a3bfaa, 0x4d7df320), + TOBN(0xecd96c59, 0xb96024f2), TOBN(0xfc293a53, 0x7f4e0433), + TOBN(0x5341352b, 0x5acf6e10), TOBN(0xc50343fd, 0xafe652c3), + TOBN(0x4af3792d, 0x18577a7f), TOBN(0xe1a4c617, 0xaf16823d), + TOBN(0x9b26d0cd, 0x33425d0a), TOBN(0x306399ed, 0x9b7bc47f), + TOBN(0x2a792f33, 0x706bb20b), TOBN(0x31219614, 0x98111055), + TOBN(0x864ec064, 0x87f5d28b), TOBN(0x11392d91, 0x962277fd), + TOBN(0xb5aa7942, 0xbb6aed5f), TOBN(0x080094dc, 0x47e799d9), + TOBN(0x4afa588c, 0x208ba19b), TOBN(0xd3e7570f, 0x8512f284), + TOBN(0xcbae64e6, 0x02f5799a), TOBN(0xdeebe7ef, 0x514b9492), + TOBN(0x30300f98, 0xe5c298ff), TOBN(0x17f561be, 0x3678361f), + TOBN(0xf52ff312, 0x98cb9a16), TOBN(0x6233c3bc, 0x5562d490), + TOBN(0x7bfa15a1, 0x92e3a2cb), TOBN(0x961bcfd1, 0xe6365119), + TOBN(0x3bdd29bf, 0x2c8c53b1), TOBN(0x739704df, 0x822844ba), + TOBN(0x7dacfb58, 0x7e7b754b), TOBN(0x23360791, 0xa806c9b9), + TOBN(0xe7eb88c9, 0x23504452), TOBN(0x2983e996, 0x852c1783), + TOBN(0xdd4ae529, 0x958d881d), TOBN(0x026bae03, 0x262c7b3c), + TOBN(0x3a6f9193, 0x960b52d1), TOBN(0xd0980f90, 0x92696cfb), + TOBN(0x4c1f428c, 0xd5f30851), TOBN(0x94dfed27, 0x2a4f6630), + TOBN(0x4df53772, 0xfc5d48a4), TOBN(0xdd2d5a2f, 0x933260ce), + TOBN(0x574115bd, 0xd44cc7a5), TOBN(0x4ba6b20d, 0xbd12533a), + TOBN(0x30e93cb8, 0x243057c9), TOBN(0x794c486a, 0x14de320e), + TOBN(0xe925d4ce, 0xf21496e4), TOBN(0xf951d198, 0xec696331), + TOBN(0x9810e2de, 0x3e8d812f), TOBN(0xd0a47259, 0x389294ab), + TOBN(0x513ba2b5, 0x0e3bab66), TOBN(0x462caff5, 0xabad306f), + TOBN(0xe2dc6d59, 0xaf04c49e), TOBN(0x1aeb8750, 0xe0b84b0b), + TOBN(0xc034f12f, 0x2f7d0ca2), TOBN(0x6d2e8128, 0xe06acf2f), + TOBN(0x801f4f83, 0x21facc2f), TOBN(0xa1170c03, 0xf40ef607), + TOBN(0xfe0a1d4f, 0x7805a99c), TOBN(0xbde56a36, 0xcc26aba5), + TOBN(0x5b1629d0, 0x35531f40), TOBN(0xac212c2b, 0x9afa6108), + TOBN(0x30a06bf3, 0x15697be5), TOBN(0x6f0545dc, 0x2c63c7c1), + TOBN(0x5d8cb842, 0x7ccdadaf), TOBN(0xd52e379b, 0xac7015bb), + TOBN(0xc4f56147, 0xf462c23e), TOBN(0xd44a4298, 0x46bc24b0), + TOBN(0xbc73d23a, 0xe2856d4f), TOBN(0x61cedd8c, 0x0832bcdf), + TOBN(0x60953556, 0x99f241d7), TOBN(0xee4adbd7, 0x001a349d), + TOBN(0x0b35bf6a, 0xaa89e491), TOBN(0x7f0076f4, 0x136f7546), + TOBN(0xd19a18ba, 0x9264da3d), TOBN(0x6eb2d2cd, 0x62a7a28b), + TOBN(0xcdba941f, 0x8761c971), TOBN(0x1550518b, 0xa3be4a5d), + TOBN(0xd0e8e2f0, 0x57d0b70c), TOBN(0xeea8612e, 0xcd133ba3), + TOBN(0x814670f0, 0x44416aec), TOBN(0x424db6c3, 0x30775061), + TOBN(0xd96039d1, 0x16213fd1), TOBN(0xc61e7fa5, 0x18a3478f), + TOBN(0xa805bdcc, 0xcb0c5021), TOBN(0xbdd6f3a8, 0x0cc616dd), + TOBN(0x06009667, 0x5d97f7e2), TOBN(0x31db0fc1, 0xaf0bf4b6), + TOBN(0x23680ed4, 0x5491627a), TOBN(0xb99a3c66, 0x7d741fb1), + TOBN(0xe9bb5f55, 0x36b1ff92), TOBN(0x29738577, 0x512b388d), + TOBN(0xdb8a2ce7, 0x50fcf263), TOBN(0x385346d4, 0x6c4f7b47), + TOBN(0xbe86c5ef, 0x31631f9e), TOBN(0xbf91da21, 0x03a57a29), + TOBN(0xc3b1f796, 0x7b23f821), TOBN(0x0f7d00d2, 0x770db354), + TOBN(0x8ffc6c3b, 0xd8fe79da), TOBN(0xcc5e8c40, 0xd525c996), + TOBN(0x4640991d, 0xcfff632a), TOBN(0x64d97e8c, 0x67112528), + TOBN(0xc232d973, 0x02f1cd1e), TOBN(0xce87eacb, 0x1dd212a4), + TOBN(0x6e4c8c73, 0xe69802f7), TOBN(0x12ef0290, 0x1fffddbd), + TOBN(0x941ec74e, 0x1bcea6e2), TOBN(0xd0b54024, 0x3cb92cbb), + TOBN(0x809fb9d4, 0x7e8f9d05), TOBN(0x3bf16159, 0xf2992aae), + TOBN(0xad40f279, 0xf8a7a838), TOBN(0x11aea631, 0x05615660), + TOBN(0xbf52e6f1, 0xa01f6fa1), TOBN(0xef046995, 0x3dc2aec9), + TOBN(0x785dbec9, 0xd8080711), TOBN(0xe1aec60a, 0x9fdedf76), + TOBN(0xece797b5, 0xfa21c126), TOBN(0xc66e898f, 0x05e52732), + TOBN(0x39bb69c4, 0x08811fdb), TOBN(0x8bfe1ef8, 0x2fc7f082), + TOBN(0xc8e7a393, 0x174f4138), TOBN(0xfba8ad1d, 0xd58d1f98), + TOBN(0xbc21d0ce, 0xbfd2fd5b), TOBN(0x0b839a82, 0x6ee60d61), + TOBN(0xaacf7658, 0xafd22253), TOBN(0xb526bed8, 0xaae396b3), + TOBN(0xccc1bbc2, 0x38564464), TOBN(0x9e3ff947, 0x8c45bc73), + TOBN(0xcde9bca3, 0x58188a78), TOBN(0x138b8ee0, 0xd73bf8f7), + TOBN(0x5c7e234c, 0x4123c489), TOBN(0x66e69368, 0xfa643297), + TOBN(0x0629eeee, 0x39a15fa3), TOBN(0x95fab881, 0xa9e2a927), + TOBN(0xb2497007, 0xeafbb1e1), TOBN(0xd75c9ce6, 0xe75b7a93), + TOBN(0x3558352d, 0xefb68d78), TOBN(0xa2f26699, 0x223f6396), + TOBN(0xeb911ecf, 0xe469b17a), TOBN(0x62545779, 0xe72d3ec2), + TOBN(0x8ea47de7, 0x82cb113f), TOBN(0xebe4b086, 0x4e1fa98d), + TOBN(0xec2d5ed7, 0x8cdfedb1), TOBN(0xa535c077, 0xfe211a74), + TOBN(0x9678109b, 0x11d244c5), TOBN(0xf17c8bfb, 0xbe299a76), + TOBN(0xb651412e, 0xfb11fbc4), TOBN(0xea0b5482, 0x94ab3f65), + TOBN(0xd8dffd95, 0x0cf78243), TOBN(0x2e719e57, 0xce0361d4), + TOBN(0x9007f085, 0x304ddc5b), TOBN(0x095e8c6d, 0x4daba2ea), + TOBN(0x5a33cdb4, 0x3f9d28a9), TOBN(0x85b95cd8, 0xe2283003), + TOBN(0xbcd6c819, 0xb9744733), TOBN(0x29c5f538, 0xfc7f5783), + TOBN(0x6c49b2fa, 0xd59038e4), TOBN(0x68349cc1, 0x3bbe1018), + TOBN(0xcc490c1d, 0x21830ee5), TOBN(0x36f9c4ee, 0xe9bfa297), + TOBN(0x58fd7294, 0x48de1a94), TOBN(0xaadb13a8, 0x4e8f2cdc), + TOBN(0x515eaaa0, 0x81313dba), TOBN(0xc76bb468, 0xc2152dd8), + TOBN(0x357f8d75, 0xa653dbf8), TOBN(0xe4d8c4d1, 0xb14ac143), + TOBN(0xbdb8e675, 0xb055cb40), TOBN(0x898f8e7b, 0x977b5167), + TOBN(0xecc65651, 0xb82fb863), TOBN(0x56544814, 0x6d88f01f), + TOBN(0xb0928e95, 0x263a75a9), TOBN(0xcfb6836f, 0x1a22fcda), + TOBN(0x651d14db, 0x3f3bd37c), TOBN(0x1d3837fb, 0xb6ad4664), + TOBN(0x7c5fb538, 0xff4f94ab), TOBN(0x7243c712, 0x6d7fb8f2), + TOBN(0xef13d60c, 0xa85c5287), TOBN(0x18cfb7c7, 0x4bb8dd1b), + TOBN(0x82f9bfe6, 0x72908219), TOBN(0x35c4592b, 0x9d5144ab), + TOBN(0x52734f37, 0x9cf4b42f), TOBN(0x6bac55e7, 0x8c60ddc4), + TOBN(0xb5cd811e, 0x94dea0f6), TOBN(0x259ecae4, 0xe18cc1a3), + TOBN(0x6a0e836e, 0x15e660f8), TOBN(0x6c639ea6, 0x0e02bff2), + TOBN(0x8721b8cb, 0x7e1026fd), TOBN(0x9e73b50b, 0x63261942), + TOBN(0xb8c70974, 0x77f01da3), TOBN(0x1839e6a6, 0x8268f57f), + TOBN(0x571b9415, 0x5150b805), TOBN(0x1892389e, 0xf92c7097), + TOBN(0x8d69c18e, 0x4a084b95), TOBN(0x7014c512, 0xbe5b495c), + TOBN(0x4780db36, 0x1b07523c), TOBN(0x2f6219ce, 0x2c1c64fa), + TOBN(0xc38b81b0, 0x602c105a), TOBN(0xab4f4f20, 0x5dc8e360), + TOBN(0x20d3c982, 0xcf7d62d2), TOBN(0x1f36e29d, 0x23ba8150), + TOBN(0x48ae0bf0, 0x92763f9e), TOBN(0x7a527e6b, 0x1d3a7007), + TOBN(0xb4a89097, 0x581a85e3), TOBN(0x1f1a520f, 0xdc158be5), + TOBN(0xf98db37d, 0x167d726e), TOBN(0x8802786e, 0x1113e862)} + , + {TOBN(0xefb2149e, 0x36f09ab0), TOBN(0x03f163ca, 0x4a10bb5b), + TOBN(0xd0297045, 0x06e20998), TOBN(0x56f0af00, 0x1b5a3bab), + TOBN(0x7af4cfec, 0x70880e0d), TOBN(0x7332a66f, 0xbe3d913f), + TOBN(0x32e6c84a, 0x7eceb4bd), TOBN(0xedc4a79a, 0x9c228f55), + TOBN(0xc37c7dd0, 0xc55c4496), TOBN(0xa6a96357, 0x25bbabd2), + TOBN(0x5b7e63f2, 0xadd7f363), TOBN(0x9dce3782, 0x2e73f1df), + TOBN(0xe1e5a16a, 0xb2b91f71), TOBN(0xe4489823, 0x5ba0163c), + TOBN(0xf2759c32, 0xf6e515ad), TOBN(0xa5e2f1f8, 0x8615eecf), + TOBN(0x74519be7, 0xabded551), TOBN(0x03d358b8, 0xc8b74410), + TOBN(0x4d00b10b, 0x0e10d9a9), TOBN(0x6392b0b1, 0x28da52b7), + TOBN(0x6744a298, 0x0b75c904), TOBN(0xc305b0ae, 0xa8f7f96c), + TOBN(0x042e421d, 0x182cf932), TOBN(0xf6fc5d50, 0x9e4636ca), + TOBN(0x795847c9, 0xd64cc78c), TOBN(0x6c50621b, 0x9b6cb27b), + TOBN(0x07099bf8, 0xdf8022ab), TOBN(0x48f862eb, 0xc04eda1d), + TOBN(0xd12732ed, 0xe1603c16), TOBN(0x19a80e0f, 0x5c9a9450), + TOBN(0xe2257f54, 0xb429b4fc), TOBN(0x66d3b2c6, 0x45460515), + TOBN(0x6ca4f87e, 0x822e37be), TOBN(0x73f237b4, 0x253bda4e), + TOBN(0xf747f3a2, 0x41190aeb), TOBN(0xf06fa36f, 0x804cf284), + TOBN(0x0a6bbb6e, 0xfc621c12), TOBN(0x5d624b64, 0x40b80ec6), + TOBN(0x4b072425, 0x7ba556f3), TOBN(0x7fa0c354, 0x3e2d20a8), + TOBN(0xe921fa31, 0xe3229d41), TOBN(0xa929c652, 0x94531bd4), + TOBN(0x84156027, 0xa6d38209), TOBN(0xf3d69f73, 0x6bdb97bd), + TOBN(0x8906d19a, 0x16833631), TOBN(0x68a34c2e, 0x03d51be3), + TOBN(0xcb59583b, 0x0e511cd8), TOBN(0x99ce6bfd, 0xfdc132a8), + TOBN(0x3facdaaa, 0xffcdb463), TOBN(0x658bbc1a, 0x34a38b08), + TOBN(0x12a801f8, 0xf1a9078d), TOBN(0x1567bcf9, 0x6ab855de), + TOBN(0xe08498e0, 0x3572359b), TOBN(0xcf0353e5, 0x8659e68b), + TOBN(0xbb86e9c8, 0x7d23807c), TOBN(0xbc08728d, 0x2198e8a2), + TOBN(0x8de2b7bc, 0x453cadd6), TOBN(0x203900a7, 0xbc0bc1f8), + TOBN(0xbcd86e47, 0xa6abd3af), TOBN(0x911cac12, 0x8502effb), + TOBN(0x2d550242, 0xec965469), TOBN(0x0e9f7692, 0x29e0017e), + TOBN(0x633f078f, 0x65979885), TOBN(0xfb87d449, 0x4cf751ef), + TOBN(0xe1790e4b, 0xfc25419a), TOBN(0x36467203, 0x4bff3cfd), + TOBN(0xc8db6386, 0x25b6e83f), TOBN(0x6cc69f23, 0x6cad6fd2), + TOBN(0x0219e45a, 0x6bc68bb9), TOBN(0xe43d79b6, 0x297f7334), + TOBN(0x7d445368, 0x465dc97c), TOBN(0x4b9eea32, 0x2a0b949a), + TOBN(0x1b96c6ba, 0x6102d021), TOBN(0xeaafac78, 0x2f4461ea), + TOBN(0xd4b85c41, 0xc49f19a8), TOBN(0x275c28e4, 0xcf538875), + TOBN(0x35451a9d, 0xdd2e54e0), TOBN(0x6991adb5, 0x0605618b), + TOBN(0x5b8b4bcd, 0x7b36cd24), TOBN(0x372a4f8c, 0x56f37216), + TOBN(0xc890bd73, 0xa6a5da60), TOBN(0x6f083da0, 0xdc4c9ff0), + TOBN(0xf4e14d94, 0xf0536e57), TOBN(0xf9ee1eda, 0xaaec8243), + TOBN(0x571241ec, 0x8bdcf8e7), TOBN(0xa5db8271, 0x0b041e26), + TOBN(0x9a0b9a99, 0xe3fff040), TOBN(0xcaaf21dd, 0x7c271202), + TOBN(0xb4e2b2e1, 0x4f0dd2e8), TOBN(0xe77e7c4f, 0x0a377ac7), + TOBN(0x69202c3f, 0x0d7a2198), TOBN(0xf759b7ff, 0x28200eb8), + TOBN(0xc87526ed, 0xdcfe314e), TOBN(0xeb84c524, 0x53d5cf99), + TOBN(0xb1b52ace, 0x515138b6), TOBN(0x5aa7ff8c, 0x23fca3f4), + TOBN(0xff0b13c3, 0xb9791a26), TOBN(0x960022da, 0xcdd58b16), + TOBN(0xdbd55c92, 0x57aad2de), TOBN(0x3baaaaa3, 0xf30fe619), + TOBN(0x9a4b2346, 0x0d881efd), TOBN(0x506416c0, 0x46325e2a), + TOBN(0x91381e76, 0x035c18d4), TOBN(0xb3bb68be, 0xf27817b0), + TOBN(0x15bfb8bf, 0x5116f937), TOBN(0x7c64a586, 0xc1268943), + TOBN(0x71e25cc3, 0x8419a2c8), TOBN(0x9fd6b0c4, 0x8335f463), + TOBN(0x4bf0ba3c, 0xe8ee0e0e), TOBN(0x6f6fba60, 0x298c21fa), + TOBN(0x57d57b39, 0xae66bee0), TOBN(0x292d5130, 0x22672544), + TOBN(0xf451105d, 0xbab093b3), TOBN(0x012f59b9, 0x02839986), + TOBN(0x8a915802, 0x3474a89c), TOBN(0x048c919c, 0x2de03e97), + TOBN(0xc476a2b5, 0x91071cd5), TOBN(0x791ed89a, 0x034970a5), + TOBN(0x89bd9042, 0xe1b7994b), TOBN(0x8eaf5179, 0xa1057ffd), + TOBN(0x6066e2a2, 0xd551ee10), TOBN(0x87a8f1d8, 0x727e09a6), + TOBN(0x00d08bab, 0x2c01148d), TOBN(0x6da8e4f1, 0x424f33fe), + TOBN(0x466d17f0, 0xcf9a4e71), TOBN(0xff502010, 0x3bf5cb19), + TOBN(0xdccf97d8, 0xd062ecc0), TOBN(0x80c0d9af, 0x81d80ac4), + TOBN(0xe87771d8, 0x033f2876), TOBN(0xb0186ec6, 0x7d5cc3db), + TOBN(0x58e8bb80, 0x3bc9bc1d), TOBN(0x4d1395cc, 0x6f6ef60e), + TOBN(0xa73c62d6, 0x186244a0), TOBN(0x918e5f23, 0x110a5b53), + TOBN(0xed4878ca, 0x741b7eab), TOBN(0x3038d71a, 0xdbe03e51), + TOBN(0x840204b7, 0xa93c3246), TOBN(0x21ab6069, 0xa0b9b4cd), + TOBN(0xf5fa6e2b, 0xb1d64218), TOBN(0x1de6ad0e, 0xf3d56191), + TOBN(0x570aaa88, 0xff1929c7), TOBN(0xc6df4c6b, 0x640e87b5), + TOBN(0xde8a74f2, 0xc65f0ccc), TOBN(0x8b972fd5, 0xe6f6cc01), + TOBN(0x3fff36b6, 0x0b846531), TOBN(0xba7e45e6, 0x10a5e475), + TOBN(0x84a1d10e, 0x4145b6c5), TOBN(0xf1f7f91a, 0x5e046d9d), + TOBN(0x0317a692, 0x44de90d7), TOBN(0x951a1d4a, 0xf199c15e), + TOBN(0x91f78046, 0xc9d73deb), TOBN(0x74c82828, 0xfab8224f), + TOBN(0xaa6778fc, 0xe7560b90), TOBN(0xb4073e61, 0xa7e824ce), + TOBN(0xff0d693c, 0xd642eba8), TOBN(0x7ce2e57a, 0x5dccef38), + TOBN(0x89c2c789, 0x1df1ad46), TOBN(0x83a06922, 0x098346fd), + TOBN(0x2d715d72, 0xda2fc177), TOBN(0x7b6dd71d, 0x85b6cf1d), + TOBN(0xc60a6d0a, 0x73fa9cb0), TOBN(0xedd3992e, 0x328bf5a9), + TOBN(0xc380ddd0, 0x832c8c82), TOBN(0xd182d410, 0xa2a0bf50), + TOBN(0x7d9d7438, 0xd9a528db), TOBN(0xe8b1a0e9, 0xcaf53994), + TOBN(0xddd6e5fe, 0x0e19987c), TOBN(0xacb8df03, 0x190b059d), + TOBN(0x53703a32, 0x8300129f), TOBN(0x1f637662, 0x68c43bfd), + TOBN(0xbcbd1913, 0x00e54051), TOBN(0x812fcc62, 0x7bf5a8c5), + TOBN(0x3f969d5f, 0x29fb85da), TOBN(0x72f4e00a, 0x694759e8), + TOBN(0x426b6e52, 0x790726b7), TOBN(0x617bbc87, 0x3bdbb209), + TOBN(0x511f8bb9, 0x97aee317), TOBN(0x812a4096, 0xe81536a8), + TOBN(0x137dfe59, 0x3ac09b9b), TOBN(0x0682238f, 0xba8c9a7a), + TOBN(0x7072ead6, 0xaeccb4bd), TOBN(0x6a34e9aa, 0x692ba633), + TOBN(0xc82eaec2, 0x6fff9d33), TOBN(0xfb753512, 0x1d4d2b62), + TOBN(0x1a0445ff, 0x1d7aadab), TOBN(0x65d38260, 0xd5f6a67c), + TOBN(0x6e62fb08, 0x91cfb26f), TOBN(0xef1e0fa5, 0x5c7d91d6), + TOBN(0x47e7c7ba, 0x33db72cd), TOBN(0x017cbc09, 0xfa7c74b2), + TOBN(0x3c931590, 0xf50a503c), TOBN(0xcac54f60, 0x616baa42), + TOBN(0x9b6cd380, 0xb2369f0f), TOBN(0x97d3a70d, 0x23c76151), + TOBN(0x5f9dd6fc, 0x9862a9c6), TOBN(0x044c4ab2, 0x12312f51), + TOBN(0x035ea0fd, 0x834a2ddc), TOBN(0x49e6b862, 0xcc7b826d), + TOBN(0xb03d6883, 0x62fce490), TOBN(0x62f2497a, 0xb37e36e9), + TOBN(0x04b005b6, 0xc6458293), TOBN(0x36bb5276, 0xe8d10af7), + TOBN(0xacf2dc13, 0x8ee617b8), TOBN(0x470d2d35, 0xb004b3d4), + TOBN(0x06790832, 0xfeeb1b77), TOBN(0x2bb75c39, 0x85657f9c), + TOBN(0xd70bd4ed, 0xc0f60004), TOBN(0xfe797ecc, 0x219b018b), + TOBN(0x9b5bec2a, 0x753aebcc), TOBN(0xdaf9f3dc, 0xc939eca5), + TOBN(0xd6bc6833, 0xd095ad09), TOBN(0x98abdd51, 0xdaa4d2fc), + TOBN(0xd9840a31, 0x8d168be5), TOBN(0xcf7c10e0, 0x2325a23c), + TOBN(0xa5c02aa0, 0x7e6ecfaf), TOBN(0x2462e7e6, 0xb5bfdf18), + TOBN(0xab2d8a8b, 0xa0cc3f12), TOBN(0x68dd485d, 0xbc672a29), + TOBN(0x72039752, 0x596f2cd3), TOBN(0x5d3eea67, 0xa0cf3d8d), + TOBN(0x810a1a81, 0xe6602671), TOBN(0x8f144a40, 0x14026c0c), + TOBN(0xbc753a6d, 0x76b50f85), TOBN(0xc4dc21e8, 0x645cd4a4), + TOBN(0xc5262dea, 0x521d0378), TOBN(0x802b8e0e, 0x05011c6f), + TOBN(0x1ba19cbb, 0x0b4c19ea), TOBN(0x21db64b5, 0xebf0aaec), + TOBN(0x1f394ee9, 0x70342f9d), TOBN(0x93a10aee, 0x1bc44a14), + TOBN(0xa7eed31b, 0x3efd0baa), TOBN(0x6e7c824e, 0x1d154e65), + TOBN(0xee23fa81, 0x9966e7ee), TOBN(0x64ec4aa8, 0x05b7920d), + TOBN(0x2d44462d, 0x2d90aad4), TOBN(0xf44dd195, 0xdf277ad5), + TOBN(0x8d6471f1, 0xbb46b6a1), TOBN(0x1e65d313, 0xfd885090), + TOBN(0x33a800f5, 0x13a977b4), TOBN(0xaca9d721, 0x0797e1ef), + TOBN(0x9a5a85a0, 0xfcff6a17), TOBN(0x9970a3f3, 0x1eca7cee), + TOBN(0xbb9f0d6b, 0xc9504be3), TOBN(0xe0c504be, 0xadd24ee2), + TOBN(0x7e09d956, 0x77fcc2f4), TOBN(0xef1a5227, 0x65bb5fc4), + TOBN(0x145d4fb1, 0x8b9286aa), TOBN(0x66fd0c5d, 0x6649028b), + TOBN(0x98857ceb, 0x1bf4581c), TOBN(0xe635e186, 0xaca7b166), + TOBN(0x278ddd22, 0x659722ac), TOBN(0xa0903c4c, 0x1db68007), + TOBN(0x366e4589, 0x48f21402), TOBN(0x31b49c14, 0xb96abda2), + TOBN(0x329c4b09, 0xe0403190), TOBN(0x97197ca3, 0xd29f43fe), + TOBN(0x8073dd1e, 0x274983d8), TOBN(0xda1a3bde, 0x55717c8f), + TOBN(0xfd3d4da2, 0x0361f9d1), TOBN(0x1332d081, 0x4c7de1ce), + TOBN(0x9b7ef7a3, 0xaa6d0e10), TOBN(0x17db2e73, 0xf54f1c4a), + TOBN(0xaf3dffae, 0x4cd35567), TOBN(0xaaa2f406, 0xe56f4e71), + TOBN(0x8966759e, 0x7ace3fc7), TOBN(0x9594eacf, 0x45a8d8c6), + TOBN(0x8de3bd8b, 0x91834e0e), TOBN(0xafe4ca53, 0x548c0421), + TOBN(0xfdd7e856, 0xe6ee81c6), TOBN(0x8f671beb, 0x6b891a3a), + TOBN(0xf7a58f2b, 0xfae63829), TOBN(0x9ab186fb, 0x9c11ac9f), + TOBN(0x8d6eb369, 0x10b5be76), TOBN(0x046b7739, 0xfb040bcd), + TOBN(0xccb4529f, 0xcb73de88), TOBN(0x1df0fefc, 0xcf26be03), + TOBN(0xad7757a6, 0xbcfcd027), TOBN(0xa8786c75, 0xbb3165ca), + TOBN(0xe9db1e34, 0x7e99a4d9), TOBN(0x99ee86df, 0xb06c504b), + TOBN(0x5b7c2ddd, 0xc15c9f0a), TOBN(0xdf87a734, 0x4295989e), + TOBN(0x59ece47c, 0x03d08fda), TOBN(0xb074d3dd, 0xad5fc702), + TOBN(0x20407903, 0x51a03776), TOBN(0x2bb1f77b, 0x2a608007), + TOBN(0x25c58f4f, 0xe1153185), TOBN(0xe6df62f6, 0x766e6447), + TOBN(0xefb3d1be, 0xed51275a), TOBN(0x5de47dc7, 0x2f0f483f), + TOBN(0x7932d98e, 0x97c2bedf), TOBN(0xd5c11927, 0x0219f8a1), + TOBN(0x9d751200, 0xa73a294e), TOBN(0x5f88434a, 0x9dc20172), + TOBN(0xd28d9fd3, 0xa26f506a), TOBN(0xa890cd31, 0x9d1dcd48), + TOBN(0x0aebaec1, 0x70f4d3b4), TOBN(0xfd1a1369, 0x0ffc8d00), + TOBN(0xb9d9c240, 0x57d57838), TOBN(0x45929d26, 0x68bac361), + TOBN(0x5a2cd060, 0x25b15ca6), TOBN(0x4b3c83e1, 0x6e474446), + TOBN(0x1aac7578, 0xee1e5134), TOBN(0xa418f5d6, 0xc91e2f41), + TOBN(0x6936fc8a, 0x213ed68b), TOBN(0x860ae7ed, 0x510a5224), + TOBN(0x63660335, 0xdef09b53), TOBN(0x641b2897, 0xcd79c98d), + TOBN(0x29bd38e1, 0x01110f35), TOBN(0x79c26f42, 0x648b1937), + TOBN(0x64dae519, 0x9d9164f4), TOBN(0xd85a2310, 0x0265c273), + TOBN(0x7173dd5d, 0x4b07e2b1), TOBN(0xd144c4cb, 0x8d9ea221), + TOBN(0xe8b04ea4, 0x1105ab14), TOBN(0x92dda542, 0xfe80d8f1), + TOBN(0xe9982fa8, 0xcf03dce6), TOBN(0x8b5ea965, 0x1a22cffc), + TOBN(0xf7f4ea7f, 0x3fad88c4), TOBN(0x62db773e, 0x6a5ba95c), + TOBN(0xd20f02fb, 0x93f24567), TOBN(0xfd46c69a, 0x315257ca), + TOBN(0x0ac74cc7, 0x8bcab987), TOBN(0x46f31c01, 0x5ceca2f5), + TOBN(0x40aedb59, 0x888b219e), TOBN(0xe50ecc37, 0xe1fccd02), + TOBN(0x1bcd9dad, 0x911f816c), TOBN(0x583cc1ec, 0x8db9b00c), + TOBN(0xf3cd2e66, 0xa483bf11), TOBN(0xfa08a6f5, 0xb1b2c169), + TOBN(0xf375e245, 0x4be9fa28), TOBN(0x99a7ffec, 0x5b6d011f), + TOBN(0x6a3ebddb, 0xc4ae62da), TOBN(0x6cea00ae, 0x374aef5d), + TOBN(0xab5fb98d, 0x9d4d05bc), TOBN(0x7cba1423, 0xd560f252), + TOBN(0x49b2cc21, 0x208490de), TOBN(0x1ca66ec3, 0xbcfb2879), + TOBN(0x7f1166b7, 0x1b6fb16f), TOBN(0xfff63e08, 0x65fe5db3), + TOBN(0xb8345abe, 0x8b2610be), TOBN(0xb732ed80, 0x39de3df4), + TOBN(0x0e24ed50, 0x211c32b4), TOBN(0xd10d8a69, 0x848ff27d), + TOBN(0xc1074398, 0xed4de248), TOBN(0xd7cedace, 0x10488927), + TOBN(0xa4aa6bf8, 0x85673e13), TOBN(0xb46bae91, 0x6daf30af), + TOBN(0x07088472, 0xfcef7ad8), TOBN(0x61151608, 0xd4b35e97), + TOBN(0xbcfe8f26, 0xdde29986), TOBN(0xeb84c4c7, 0xd5a34c79), + TOBN(0xc1eec55c, 0x164e1214), TOBN(0x891be86d, 0xa147bb03), + TOBN(0x9fab4d10, 0x0ba96835), TOBN(0xbf01e9b8, 0xa5c1ae9f), + TOBN(0x6b4de139, 0xb186ebc0), TOBN(0xd5c74c26, 0x85b91bca), + TOBN(0x5086a99c, 0xc2d93854), TOBN(0xeed62a7b, 0xa7a9dfbc), + TOBN(0x8778ed6f, 0x76b7618a), TOBN(0xbff750a5, 0x03b66062), + TOBN(0x4cb7be22, 0xb65186db), TOBN(0x369dfbf0, 0xcc3a6d13), + TOBN(0xc7dab26c, 0x7191a321), TOBN(0x9edac3f9, 0x40ed718e), + TOBN(0xbc142b36, 0xd0cfd183), TOBN(0xc8af82f6, 0x7c991693), + TOBN(0xb3d1e4d8, 0x97ce0b2a), TOBN(0xe6d7c87f, 0xc3a55cdf), + TOBN(0x35846b95, 0x68b81afe), TOBN(0x018d12af, 0xd3c239d8), + TOBN(0x2b2c6208, 0x01206e15), TOBN(0xe0e42453, 0xa3b882c6), + TOBN(0x854470a3, 0xa50162d5), TOBN(0x08157478, 0x7017a62a), + TOBN(0x18bd3fb4, 0x820357c7), TOBN(0x992039ae, 0x6f1458ad), + TOBN(0x9a1df3c5, 0x25b44aa1), TOBN(0x2d780357, 0xed3d5281), + TOBN(0x58cf7e4d, 0xc77ad4d4), TOBN(0xd49a7998, 0xf9df4fc4), + TOBN(0x4465a8b5, 0x1d71205e), TOBN(0xa0ee0ea6, 0x649254aa), + TOBN(0x4b5eeecf, 0xab7bd771), TOBN(0x6c873073, 0x35c262b9), + TOBN(0xdc5bd648, 0x3c9d61e7), TOBN(0x233d6d54, 0x321460d2), + TOBN(0xd20c5626, 0xfc195bcc), TOBN(0x25445958, 0x04d78b63), + TOBN(0xe03fcb3d, 0x17ec8ef3), TOBN(0x54b690d1, 0x46b8f781), + TOBN(0x82fa2c8a, 0x21230646), TOBN(0xf51aabb9, 0x084f418c), + TOBN(0xff4fbec1, 0x1a30ba43), TOBN(0x6a5acf73, 0x743c9df7), + TOBN(0x1da2b357, 0xd635b4d5), TOBN(0xc3de68dd, 0xecd5c1da), + TOBN(0xa689080b, 0xd61af0dd), TOBN(0xdea5938a, 0xd665bf99), + TOBN(0x0231d71a, 0xfe637294), TOBN(0x01968aa6, 0xa5a81cd8), + TOBN(0x11252d50, 0x048e63b5), TOBN(0xc446bc52, 0x6ca007e9), + TOBN(0xef8c50a6, 0x96d6134b), TOBN(0x9361fbf5, 0x9e09a05c), + TOBN(0xf17f85a6, 0xdca3291a), TOBN(0xb178d548, 0xff251a21), + TOBN(0x87f6374b, 0xa4df3915), TOBN(0x566ce1bf, 0x2fd5d608), + TOBN(0x425cba4d, 0x7de35102), TOBN(0x6b745f8f, 0x58c5d5e2), + TOBN(0x88402af6, 0x63122edf), TOBN(0x3190f9ed, 0x3b989a89), + TOBN(0x4ad3d387, 0xebba3156), TOBN(0xef385ad9, 0xc7c469a5), + TOBN(0xb08281de, 0x3f642c29), TOBN(0x20be0888, 0x910ffb88), + TOBN(0xf353dd4a, 0xd5292546), TOBN(0x3f1627de, 0x8377a262), + TOBN(0xa5faa013, 0xeefcd638), TOBN(0x8f3bf626, 0x74cc77c3), + TOBN(0x32618f65, 0xa348f55e), TOBN(0x5787c0dc, 0x9fefeb9e), + TOBN(0xf1673aa2, 0xd9a23e44), TOBN(0x88dfa993, 0x4e10690d), + TOBN(0x1ced1b36, 0x2bf91108), TOBN(0x9193ceca, 0x3af48649), + TOBN(0xfb34327d, 0x2d738fc5), TOBN(0x6697b037, 0x975fee6c), + TOBN(0x2f485da0, 0xc04079a5), TOBN(0x2cdf5735, 0x2feaa1ac), + TOBN(0x76944420, 0xbd55659e), TOBN(0x7973e32b, 0x4376090c), + TOBN(0x86bb4fe1, 0x163b591a), TOBN(0x10441aed, 0xc196f0ca), + TOBN(0x3b431f4a, 0x045ad915), TOBN(0x6c11b437, 0xa4afacb1), + TOBN(0x30b0c7db, 0x71fdbbd8), TOBN(0xb642931f, 0xeda65acd), + TOBN(0x4baae6e8, 0x9c92b235), TOBN(0xa73bbd0e, 0x6b3993a1), + TOBN(0xd06d60ec, 0x693dd031), TOBN(0x03cab91b, 0x7156881c), + TOBN(0xd615862f, 0x1db3574b), TOBN(0x485b0185, 0x64bb061a), + TOBN(0x27434988, 0xa0181e06), TOBN(0x2cd61ad4, 0xc1c0c757), + TOBN(0x3effed5a, 0x2ff9f403), TOBN(0x8dc98d8b, 0x62239029), + TOBN(0x2206021e, 0x1f17b70d), TOBN(0xafbec0ca, 0xbf510015), + TOBN(0x9fed7164, 0x80130dfa), TOBN(0x306dc2b5, 0x8a02dcf5), + TOBN(0x48f06620, 0xfeb10fc0), TOBN(0x78d1e1d5, 0x5a57cf51), + TOBN(0xadef8c5a, 0x192ef710), TOBN(0x88afbd4b, 0x3b7431f9), + TOBN(0x7e1f7407, 0x64250c9e), TOBN(0x6e31318d, 0xb58bec07), + TOBN(0xfd4fc4b8, 0x24f89b4e), TOBN(0x65a5dd88, 0x48c36a2a), + TOBN(0x4f1eccff, 0xf024baa7), TOBN(0x22a21cf2, 0xcba94650), + TOBN(0x95d29dee, 0x42a554f7), TOBN(0x828983a5, 0x002ec4ba), + TOBN(0x8112a1f7, 0x8badb73d), TOBN(0x79ea8897, 0xa27c1839), + TOBN(0x8969a5a7, 0xd065fd83), TOBN(0xf49af791, 0xb262a0bc), + TOBN(0xfcdea8b6, 0xaf2b5127), TOBN(0x10e913e1, 0x564c2dbc), + TOBN(0x51239d14, 0xbc21ef51), TOBN(0xe51c3ceb, 0x4ce57292), + TOBN(0x795ff068, 0x47bbcc3b), TOBN(0x86b46e1e, 0xbd7e11e6), + TOBN(0x0ea6ba23, 0x80041ef4), TOBN(0xd72fe505, 0x6262342e), + TOBN(0x8abc6dfd, 0x31d294d4), TOBN(0xbbe017a2, 0x1278c2c9), + TOBN(0xb1fcfa09, 0xb389328a), TOBN(0x322fbc62, 0xd01771b5), + TOBN(0x04c0d063, 0x60b045bf), TOBN(0xdb652edc, 0x10e52d01), + TOBN(0x50ef932c, 0x03ec6627), TOBN(0xde1b3b2d, 0xc1ee50e3), + TOBN(0x5ab7bdc5, 0xdc37a90d), TOBN(0xfea67213, 0x31e33a96), + TOBN(0x6482b5cb, 0x4f2999aa), TOBN(0x38476cc6, 0xb8cbf0dd), + TOBN(0x93ebfacb, 0x173405bb), TOBN(0x15cdafe7, 0xe52369ec), + TOBN(0xd42d5ba4, 0xd935b7db), TOBN(0x648b6004, 0x1c99a4cd), + TOBN(0x785101bd, 0xa3b5545b), TOBN(0x4bf2c38a, 0x9dd67faf), + TOBN(0xb1aadc63, 0x4442449c), TOBN(0xe0e9921a, 0x33ad4fb8), + TOBN(0x5c552313, 0xaa686d82), TOBN(0xdee635fa, 0x465d866c), + TOBN(0xbc3c224a, 0x18ee6e8a), TOBN(0xeed748a6, 0xed42e02f), + TOBN(0xe70f930a, 0xd474cd08), TOBN(0x774ea6ec, 0xfff24adf), + TOBN(0x03e2de1c, 0xf3480d4a), TOBN(0xf0d8edc7, 0xbc8acf1a), + TOBN(0xf23e3303, 0x68295a9c), TOBN(0xfadd5f68, 0xc546a97d), + TOBN(0x895597ad, 0x96f8acb1), TOBN(0xbddd49d5, 0x671bdae2), + TOBN(0x16fcd528, 0x21dd43f4), TOBN(0xa5a45412, 0x6619141a)} + , + {TOBN(0x8ce9b6bf, 0xc360e25a), TOBN(0xe6425195, 0x075a1a78), + TOBN(0x9dc756a8, 0x481732f4), TOBN(0x83c0440f, 0x5432b57a), + TOBN(0xc670b3f1, 0xd720281f), TOBN(0x2205910e, 0xd135e051), + TOBN(0xded14b0e, 0xdb052be7), TOBN(0x697b3d27, 0xc568ea39), + TOBN(0x2e599b9a, 0xfb3ff9ed), TOBN(0x28c2e0ab, 0x17f6515c), + TOBN(0x1cbee4fd, 0x474da449), TOBN(0x071279a4, 0x4f364452), + TOBN(0x97abff66, 0x01fbe855), TOBN(0x3ee394e8, 0x5fda51c4), + TOBN(0x190385f6, 0x67597c0b), TOBN(0x6e9fccc6, 0xa27ee34b), + TOBN(0x0b89de93, 0x14092ebb), TOBN(0xf17256bd, 0x428e240c), + TOBN(0xcf89a7f3, 0x93d2f064), TOBN(0x4f57841e, 0xe1ed3b14), + TOBN(0x4ee14405, 0xe708d855), TOBN(0x856aae72, 0x03f1c3d0), + TOBN(0xc8e5424f, 0xbdd7eed5), TOBN(0x3333e4ef, 0x73ab4270), + TOBN(0x3bc77ade, 0xdda492f8), TOBN(0xc11a3aea, 0x78297205), + TOBN(0x5e89a3e7, 0x34931b4c), TOBN(0x17512e2e, 0x9f5694bb), + TOBN(0x5dc349f3, 0x177bf8b6), TOBN(0x232ea4ba, 0x08c7ff3e), + TOBN(0x9c4f9d16, 0xf511145d), TOBN(0xccf109a3, 0x33b379c3), + TOBN(0xe75e7a88, 0xa1f25897), TOBN(0x7ac6961f, 0xa1b5d4d8), + TOBN(0xe3e10773, 0x08f3ed5c), TOBN(0x208a54ec, 0x0a892dfb), + TOBN(0xbe826e19, 0x78660710), TOBN(0x0cf70a97, 0x237df2c8), + TOBN(0x418a7340, 0xed704da5), TOBN(0xa3eeb9a9, 0x08ca33fd), + TOBN(0x49d96233, 0x169bca96), TOBN(0x04d286d4, 0x2da6aafb), + TOBN(0xc09606ec, 0xa0c2fa94), TOBN(0x8869d0d5, 0x23ff0fb3), + TOBN(0xa99937e5, 0xd0150d65), TOBN(0xa92e2503, 0x240c14c9), + TOBN(0x656bf945, 0x108e2d49), TOBN(0x152a733a, 0xa2f59e2b), + TOBN(0xb4323d58, 0x8434a920), TOBN(0xc0af8e93, 0x622103c5), + TOBN(0x667518ef, 0x938dbf9a), TOBN(0xa1843073, 0x83a9cdf2), + TOBN(0x350a94aa, 0x5447ab80), TOBN(0xe5e5a325, 0xc75a3d61), + TOBN(0x74ba507f, 0x68411a9e), TOBN(0x10581fc1, 0x594f70c5), + TOBN(0x60e28570, 0x80eb24a9), TOBN(0x7bedfb4d, 0x488e0cfd), + TOBN(0x721ebbd7, 0xc259cdb8), TOBN(0x0b0da855, 0xbc6390a9), + TOBN(0x2b4d04db, 0xde314c70), TOBN(0xcdbf1fbc, 0x6c32e846), + TOBN(0x33833eab, 0xb162fc9e), TOBN(0x9939b48b, 0xb0dd3ab7), + TOBN(0x5aaa98a7, 0xcb0c9c8c), TOBN(0x75105f30, 0x81c4375c), + TOBN(0xceee5057, 0x5ef1c90f), TOBN(0xb31e065f, 0xc23a17bf), + TOBN(0x5364d275, 0xd4b6d45a), TOBN(0xd363f3ad, 0x62ec8996), + TOBN(0xb5d21239, 0x4391c65b), TOBN(0x84564765, 0xebb41b47), + TOBN(0x20d18ecc, 0x37107c78), TOBN(0xacff3b6b, 0x570c2a66), + TOBN(0x22f975d9, 0x9bd0d845), TOBN(0xef0a0c46, 0xba178fa0), + TOBN(0x1a419651, 0x76b6028e), TOBN(0xc49ec674, 0x248612d4), + TOBN(0x5b6ac4f2, 0x7338af55), TOBN(0x06145e62, 0x7bee5a36), + TOBN(0x33e95d07, 0xe75746b5), TOBN(0x1c1e1f6d, 0xc40c78be), + TOBN(0x967833ef, 0x222ff8e2), TOBN(0x4bedcf6a, 0xb49180ad), + TOBN(0x6b37e9c1, 0x3d7a4c8a), TOBN(0x2748887c, 0x6ddfe760), + TOBN(0xf7055123, 0xaa3a5bbc), TOBN(0x954ff225, 0x7bbb8e74), + TOBN(0xc42b8ab1, 0x97c3dfb9), TOBN(0x55a549b0, 0xcf168154), + TOBN(0xad6748e7, 0xc1b50692), TOBN(0x2775780f, 0x6fc5cbcb), + TOBN(0x4eab80b8, 0xe1c9d7c8), TOBN(0x8c69dae1, 0x3fdbcd56), + TOBN(0x47e6b4fb, 0x9969eace), TOBN(0x002f1085, 0xa705cb5a), + TOBN(0x4e23ca44, 0x6d3fea55), TOBN(0xb4ae9c86, 0xf4810568), + TOBN(0x47bfb91b, 0x2a62f27d), TOBN(0x60deb4c9, 0xd9bac28c), + TOBN(0xa892d894, 0x7de6c34c), TOBN(0x4ee68259, 0x4494587d), + TOBN(0x914ee14e, 0x1a3f8a5b), TOBN(0xbb113eaa, 0x28700385), + TOBN(0x81ca03b9, 0x2115b4c9), TOBN(0x7c163d38, 0x8908cad1), + TOBN(0xc912a118, 0xaa18179a), TOBN(0xe09ed750, 0x886e3081), + TOBN(0xa676e3fa, 0x26f516ca), TOBN(0x753cacf7, 0x8e732f91), + TOBN(0x51592aea, 0x833da8b4), TOBN(0xc626f42f, 0x4cbea8aa), + TOBN(0xef9dc899, 0xa7b56eaf), TOBN(0x00c0e52c, 0x34ef7316), + TOBN(0x5b1e4e24, 0xfe818a86), TOBN(0x9d31e20d, 0xc538be47), + TOBN(0x22eb932d, 0x3ed68974), TOBN(0xe44bbc08, 0x7c4e87c4), + TOBN(0x4121086e, 0x0dde9aef), TOBN(0x8e6b9cff, 0x134f4345), + TOBN(0x96892c1f, 0x711b0eb9), TOBN(0xb905f2c8, 0x780ab954), + TOBN(0xace26309, 0xa20792db), TOBN(0xec8ac9b3, 0x0684e126), + TOBN(0x486ad8b6, 0xb40a2447), TOBN(0x60121fc1, 0x9fe3fb24), + TOBN(0x5626fccf, 0x1a8e3b3f), TOBN(0x4e568622, 0x6ad1f394), + TOBN(0xda7aae0d, 0x196aa5a1), TOBN(0xe0df8c77, 0x1041b5fb), + TOBN(0x451465d9, 0x26b318b7), TOBN(0xc29b6e55, 0x7ab136e9), + TOBN(0x2c2ab48b, 0x71148463), TOBN(0xb5738de3, 0x64454a76), + TOBN(0x54ccf9a0, 0x5a03abe4), TOBN(0x377c0296, 0x0427d58e), + TOBN(0x73f5f0b9, 0x2bb39c1f), TOBN(0x14373f2c, 0xe608d8c5), + TOBN(0xdcbfd314, 0x00fbb805), TOBN(0xdf18fb20, 0x83afdcfb), + TOBN(0x81a57f42, 0x42b3523f), TOBN(0xe958532d, 0x87f650fb), + TOBN(0xaa8dc8b6, 0x8b0a7d7c), TOBN(0x1b75dfb7, 0x150166be), + TOBN(0x90e4f7c9, 0x2d7d1413), TOBN(0x67e2d6b5, 0x9834f597), + TOBN(0x4fd4f4f9, 0xa808c3e8), TOBN(0xaf8237e0, 0xd5281ec1), + TOBN(0x25ab5fdc, 0x84687cee), TOBN(0xc5ded6b1, 0xa5b26c09), + TOBN(0x8e4a5aec, 0xc8ea7650), TOBN(0x23b73e5c, 0x14cc417f), + TOBN(0x2bfb4318, 0x3037bf52), TOBN(0xb61e6db5, 0x78c725d7), + TOBN(0x8efd4060, 0xbbb3e5d7), TOBN(0x2e014701, 0xdbac488e), + TOBN(0xac75cf9a, 0x360aa449), TOBN(0xb70cfd05, 0x79634d08), + TOBN(0xa591536d, 0xfffb15ef), TOBN(0xb2c37582, 0xd07c106c), + TOBN(0xb4293fdc, 0xf50225f9), TOBN(0xc52e175c, 0xb0e12b03), + TOBN(0xf649c3ba, 0xd0a8bf64), TOBN(0x745a8fef, 0xeb8ae3c6), + TOBN(0x30d7e5a3, 0x58321bc3), TOBN(0xb1732be7, 0x0bc4df48), + TOBN(0x1f217993, 0xe9ea5058), TOBN(0xf7a71cde, 0x3e4fd745), + TOBN(0x86cc533e, 0x894c5bbb), TOBN(0x6915c7d9, 0x69d83082), + TOBN(0xa6aa2d05, 0x5815c244), TOBN(0xaeeee592, 0x49b22ce5), + TOBN(0x89e39d13, 0x78135486), TOBN(0x3a275c1f, 0x16b76f2f), + TOBN(0xdb6bcc1b, 0xe036e8f5), TOBN(0x4df69b21, 0x5e4709f5), + TOBN(0xa188b250, 0x2d0f39aa), TOBN(0x622118bb, 0x15a85947), + TOBN(0x2ebf520f, 0xfde0f4fa), TOBN(0xa40e9f29, 0x4860e539), + TOBN(0x7b6a51eb, 0x22b57f0f), TOBN(0x849a33b9, 0x7e80644a), + TOBN(0x50e5d16f, 0x1cf095fe), TOBN(0xd754b54e, 0xec55f002), + TOBN(0x5cfbbb22, 0x236f4a98), TOBN(0x0b0c59e9, 0x066800bb), + TOBN(0x4ac69a8f, 0x5a9a7774), TOBN(0x2b33f804, 0xd6bec948), + TOBN(0xb3729295, 0x32e6c466), TOBN(0x68956d0f, 0x4e599c73), + TOBN(0xa47a249f, 0x155c31cc), TOBN(0x24d80f0d, 0xe1ce284e), + TOBN(0xcd821dfb, 0x988baf01), TOBN(0xe6331a7d, 0xdbb16647), + TOBN(0x1eb8ad33, 0x094cb960), TOBN(0x593cca38, 0xc91bbca5), + TOBN(0x384aac8d, 0x26567456), TOBN(0x40fa0309, 0xc04b6490), + TOBN(0x97834cd6, 0xdab6c8f6), TOBN(0x68a7318d, 0x3f91e55f), + TOBN(0xa00fd04e, 0xfc4d3157), TOBN(0xb56f8ab2, 0x2bf3bdea), + TOBN(0x014f5648, 0x4fa57172), TOBN(0x948c5860, 0x450abdb3), + TOBN(0x342b5df0, 0x0ebd4f08), TOBN(0x3e5168cd, 0x0e82938e), + TOBN(0x7aedc1ce, 0xb0df5dd0), TOBN(0x6bbbc6d9, 0xe5732516), + TOBN(0xc7bfd486, 0x605daaa6), TOBN(0x46fd72b7, 0xbb9a6c9e), + TOBN(0xe4847fb1, 0xa124fb89), TOBN(0x75959cbd, 0xa2d8ffbc), + TOBN(0x42579f65, 0xc8a588ee), TOBN(0x368c92e6, 0xb80b499d), + TOBN(0xea4ef6cd, 0x999a5df1), TOBN(0xaa73bb7f, 0x936fe604), + TOBN(0xf347a70d, 0x6457d188), TOBN(0x86eda86b, 0x8b7a388b), + TOBN(0xb7cdff06, 0x0ccd6013), TOBN(0xbeb1b6c7, 0xd0053fb2), + TOBN(0x0b022387, 0x99240a9f), TOBN(0x1bbb384f, 0x776189b2), + TOBN(0x8695e71e, 0x9066193a), TOBN(0x2eb50097, 0x06ffac7e), + TOBN(0x0654a9c0, 0x4a7d2caa), TOBN(0x6f3fb3d1, 0xa5aaa290), + TOBN(0x835db041, 0xff476e8f), TOBN(0x540b8b0b, 0xc42295e4), + TOBN(0xa5c73ac9, 0x05e214f5), TOBN(0x9a74075a, 0x56a0b638), + TOBN(0x2e4b1090, 0xce9e680b), TOBN(0x57a5b479, 0x6b8d9afa), + TOBN(0x0dca48e7, 0x26bfe65c), TOBN(0x097e391c, 0x7290c307), + TOBN(0x683c462e, 0x6669e72e), TOBN(0xf505be1e, 0x062559ac), + TOBN(0x5fbe3ea1, 0xe3a3035a), TOBN(0x6431ebf6, 0x9cd50da8), + TOBN(0xfd169d5c, 0x1f6407f2), TOBN(0x8d838a95, 0x60fce6b8), + TOBN(0x2a2bfa7f, 0x650006f0), TOBN(0xdfd7dad3, 0x50c0fbb2), + TOBN(0x92452495, 0xccf9ad96), TOBN(0x183bf494, 0xd95635f9), + TOBN(0x02d5df43, 0x4a7bd989), TOBN(0x505385cc, 0xa5431095), + TOBN(0xdd98e67d, 0xfd43f53e), TOBN(0xd61e1a6c, 0x500c34a9), + TOBN(0x5a4b46c6, 0x4a8a3d62), TOBN(0x8469c4d0, 0x247743d2), + TOBN(0x2bb3a13d, 0x88f7e433), TOBN(0x62b23a10, 0x01be5849), + TOBN(0xe83596b4, 0xa63d1a4c), TOBN(0x454e7fea, 0x7d183f3e), + TOBN(0x643fce61, 0x17afb01c), TOBN(0x4e65e5e6, 0x1c4c3638), + TOBN(0x41d85ea1, 0xef74c45b), TOBN(0x2cfbfa66, 0xae328506), + TOBN(0x98b078f5, 0x3ada7da9), TOBN(0xd985fe37, 0xec752fbb), + TOBN(0xeece68fe, 0x5a0148b4), TOBN(0x6f9a55c7, 0x2d78136d), + TOBN(0x232dccc4, 0xd2b729ce), TOBN(0xa27e0dfd, 0x90aafbc4), + TOBN(0x96474452, 0x12b4603e), TOBN(0xa876c551, 0x6b706d14), + TOBN(0xdf145fcf, 0x69a9d412), TOBN(0xe2ab75b7, 0x2d479c34), + TOBN(0x12df9a76, 0x1a23ff97), TOBN(0xc6138992, 0x5d359d10), + TOBN(0x6e51c7ae, 0xfa835f22), TOBN(0x69a79cb1, 0xc0fcc4d9), + TOBN(0xf57f350d, 0x594cc7e1), TOBN(0x3079ca63, 0x3350ab79), + TOBN(0x226fb614, 0x9aff594a), TOBN(0x35afec02, 0x6d59a62b), + TOBN(0x9bee46f4, 0x06ed2c6e), TOBN(0x58da1735, 0x7d939a57), + TOBN(0x44c50402, 0x8fd1797e), TOBN(0xd8853e7c, 0x5ccea6ca), + TOBN(0x4065508d, 0xa35fcd5f), TOBN(0x8965df8c, 0x495ccaeb), + TOBN(0x0f2da850, 0x12e1a962), TOBN(0xee471b94, 0xc1cf1cc4), + TOBN(0xcef19bc8, 0x0a08fb75), TOBN(0x704958f5, 0x81de3591), + TOBN(0x2867f8b2, 0x3aef4f88), TOBN(0x8d749384, 0xea9f9a5f), + TOBN(0x1b385537, 0x8c9049f4), TOBN(0x5be948f3, 0x7b92d8b6), + TOBN(0xd96f725d, 0xb6e2bd6b), TOBN(0x37a222bc, 0x958c454d), + TOBN(0xe7c61abb, 0x8809bf61), TOBN(0x46f07fbc, 0x1346f18d), + TOBN(0xfb567a7a, 0xe87c0d1c), TOBN(0x84a461c8, 0x7ef3d07a), + TOBN(0x0a5adce6, 0xd9278d98), TOBN(0x24d94813, 0x9dfc73e1), + TOBN(0x4f3528b6, 0x054321c3), TOBN(0x2e03fdde, 0x692ea706), + TOBN(0x10e60619, 0x47b533c0), TOBN(0x1a8bc73f, 0x2ca3c055), + TOBN(0xae58d4b2, 0x1bb62b8f), TOBN(0xb2045a73, 0x584a24e3), + TOBN(0x3ab3d5af, 0xbd76e195), TOBN(0x478dd1ad, 0x6938a810), + TOBN(0x6ffab393, 0x6ee3d5cb), TOBN(0xdfb693db, 0x22b361e4), + TOBN(0xf9694496, 0x51dbf1a7), TOBN(0xcab4b4ef, 0x08a2e762), + TOBN(0xe8c92f25, 0xd39bba9a), TOBN(0x850e61bc, 0xf1464d96), + TOBN(0xb7e830e3, 0xdc09508b), TOBN(0xfaf6d2cf, 0x74317655), + TOBN(0x72606ceb, 0xdf690355), TOBN(0x48bb92b3, 0xd0c3ded6), + TOBN(0x65b75484, 0x5c7cf892), TOBN(0xf6cd7ac9, 0xd5d5f01f), + TOBN(0xc2c30a59, 0x96401d69), TOBN(0x91268650, 0xed921878), + TOBN(0x380bf913, 0xb78c558f), TOBN(0x43c0baeb, 0xc8afdaa9), + TOBN(0x377f61d5, 0x54f169d3), TOBN(0xf8da07e3, 0xae5ff20b), + TOBN(0xb676c49d, 0xa8a90ea8), TOBN(0x81c1ff2b, 0x83a29b21), + TOBN(0x383297ac, 0x2ad8d276), TOBN(0x3001122f, 0xba89f982), + TOBN(0xe1d794be, 0x6718e448), TOBN(0x246c1482, 0x7c3e6e13), + TOBN(0x56646ef8, 0x5d26b5ef), TOBN(0x80f5091e, 0x88069cdd), + TOBN(0xc5992e2f, 0x724bdd38), TOBN(0x02e915b4, 0x8471e8c7), + TOBN(0x96ff320a, 0x0d0ff2a9), TOBN(0xbf886487, 0x4384d1a0), + TOBN(0xbbe1e6a6, 0xc93f72d6), TOBN(0xd5f75d12, 0xcad800ea), + TOBN(0xfa40a09f, 0xe7acf117), TOBN(0x32c8cdd5, 0x7581a355), + TOBN(0x74221992, 0x7023c499), TOBN(0xa8afe5d7, 0x38ec3901), + TOBN(0x5691afcb, 0xa90e83f0), TOBN(0x41bcaa03, 0x0b8f8eac), + TOBN(0xe38b5ff9, 0x8d2668d5), TOBN(0x0715281a, 0x7ad81965), + TOBN(0x1bc8fc7c, 0x03c6ce11), TOBN(0xcbbee6e2, 0x8b650436), + TOBN(0x06b00fe8, 0x0cdb9808), TOBN(0x17d6e066, 0xfe3ed315), + TOBN(0x2e9d38c6, 0x4d0b5018), TOBN(0xab8bfd56, 0x844dcaef), + TOBN(0x42894a59, 0x513aed8b), TOBN(0xf77f3b6d, 0x314bd07a), + TOBN(0xbbdecb8f, 0x8e42b582), TOBN(0xf10e2fa8, 0xd2390fe6), + TOBN(0xefb95022, 0x62a2f201), TOBN(0x4d59ea50, 0x50ee32b0), + TOBN(0xd87f7728, 0x6da789a8), TOBN(0xcf98a2cf, 0xf79492c4), + TOBN(0xf9577239, 0x720943c2), TOBN(0xba044cf5, 0x3990b9d0), + TOBN(0x5aa8e823, 0x95f2884a), TOBN(0x834de6ed, 0x0278a0af), + TOBN(0xc8e1ee9a, 0x5f25bd12), TOBN(0x9259ceaa, 0x6f7ab271), + TOBN(0x7e6d97a2, 0x77d00b76), TOBN(0x5c0c6eea, 0xa437832a), + TOBN(0x5232c20f, 0x5606b81d), TOBN(0xabd7b375, 0x0d991ee5), + TOBN(0x4d2bfe35, 0x8632d951), TOBN(0x78f85146, 0x98ed9364), + TOBN(0x951873f0, 0xf30c3282), TOBN(0x0da8ac80, 0xa789230b), + TOBN(0x3ac7789c, 0x5398967f), TOBN(0xa69b8f7f, 0xbdda0fb5), + TOBN(0xe5db7717, 0x6add8545), TOBN(0x1b71cb66, 0x72c49b66), + TOBN(0xd8560739, 0x68421d77), TOBN(0x03840fe8, 0x83e3afea), + TOBN(0xb391dad5, 0x1ec69977), TOBN(0xae243fb9, 0x307f6726), + TOBN(0xc88ac87b, 0xe8ca160c), TOBN(0x5174cced, 0x4ce355f4), + TOBN(0x98a35966, 0xe58ba37d), TOBN(0xfdcc8da2, 0x7817335d), + TOBN(0x5b752830, 0x83fbc7bf), TOBN(0x68e419d4, 0xd9c96984), + TOBN(0x409a39f4, 0x02a40380), TOBN(0x88940faf, 0x1fe977bc), + TOBN(0xc640a94b, 0x8f8edea6), TOBN(0x1e22cd17, 0xed11547d), + TOBN(0xe28568ce, 0x59ffc3e2), TOBN(0x60aa1b55, 0xc1dee4e7), + TOBN(0xc67497c8, 0x837cb363), TOBN(0x06fb438a, 0x105a2bf2), + TOBN(0x30357ec4, 0x500d8e20), TOBN(0x1ad9095d, 0x0670db10), + TOBN(0x7f589a05, 0xc73b7cfd), TOBN(0xf544607d, 0x880d6d28), + TOBN(0x17ba93b1, 0xa20ef103), TOBN(0xad859130, 0x6ba6577b), + TOBN(0x65c91cf6, 0x6fa214a0), TOBN(0xd7d49c6c, 0x27990da5), + TOBN(0xecd9ec8d, 0x20bb569d), TOBN(0xbd4b2502, 0xeeffbc33), + TOBN(0x2056ca5a, 0x6bed0467), TOBN(0x7916a1f7, 0x5b63728c), + TOBN(0xd4f9497d, 0x53a4f566), TOBN(0x89734664, 0x97b56810), + TOBN(0xf8e1da74, 0x0494a621), TOBN(0x82546a93, 0x8d011c68), + TOBN(0x1f3acb19, 0xc61ac162), TOBN(0x52f8fa9c, 0xabad0d3e), + TOBN(0x15356523, 0xb4b7ea43), TOBN(0x5a16ad61, 0xae608125), + TOBN(0xb0bcb87f, 0x4faed184), TOBN(0x5f236b1d, 0x5029f45f), + TOBN(0xd42c7607, 0x0bc6b1fc), TOBN(0xc644324e, 0x68aefce3), + TOBN(0x8e191d59, 0x5c5d8446), TOBN(0xc0208077, 0x13ae1979), + TOBN(0xadcaee55, 0x3ba59cc7), TOBN(0x20ed6d6b, 0xa2cb81ba), + TOBN(0x0952ba19, 0xb6efcffc), TOBN(0x60f12d68, 0x97c0b87c), + TOBN(0x4ee2c7c4, 0x9caa30bc), TOBN(0x767238b7, 0x97fbff4e), + TOBN(0xebc73921, 0x501b5d92), TOBN(0x3279e3df, 0xc2a37737), + TOBN(0x9fc12bc8, 0x6d197543), TOBN(0xfa94dc6f, 0x0a40db4e), + TOBN(0x7392b41a, 0x530ccbbd), TOBN(0x87c82146, 0xea823525), + TOBN(0xa52f984c, 0x05d98d0c), TOBN(0x2ae57d73, 0x5ef6974c), + TOBN(0x9377f7bf, 0x3042a6dd), TOBN(0xb1a007c0, 0x19647a64), + TOBN(0xfaa9079a, 0x0cca9767), TOBN(0x3d81a25b, 0xf68f72d5), + TOBN(0x752067f8, 0xff81578e), TOBN(0x78622150, 0x9045447d), + TOBN(0xc0c22fcf, 0x0505aa6f), TOBN(0x1030f0a6, 0x6bed1c77), + TOBN(0x31f29f15, 0x1f0bd739), TOBN(0x2d7989c7, 0xe6debe85), + TOBN(0x5c070e72, 0x8e677e98), TOBN(0x0a817bd3, 0x06e81fd5), + TOBN(0xc110d830, 0xb0f2ac95), TOBN(0x48d0995a, 0xab20e64e), + TOBN(0x0f3e00e1, 0x7729cd9a), TOBN(0x2a570c20, 0xdd556946), + TOBN(0x912dbcfd, 0x4e86214d), TOBN(0x2d014ee2, 0xcf615498), + TOBN(0x55e2b1e6, 0x3530d76e), TOBN(0xc5135ae4, 0xfd0fd6d1), + TOBN(0x0066273a, 0xd4f3049f), TOBN(0xbb8e9893, 0xe7087477), + TOBN(0x2dba1ddb, 0x14c6e5fd), TOBN(0xdba37886, 0x51f57e6c), + TOBN(0x5aaee0a6, 0x5a72f2cf), TOBN(0x1208bfbf, 0x7bea5642), + TOBN(0xf5c6aa3b, 0x67872c37), TOBN(0xd726e083, 0x43f93224), + TOBN(0x1854daa5, 0x061f1658), TOBN(0xc0016df1, 0xdf0cd2b3), + TOBN(0xc2a3f23e, 0x833d50de), TOBN(0x73b681d2, 0xbbbd3017), + TOBN(0x2f046dc4, 0x3ac343c0), TOBN(0x9c847e7d, 0x85716421), + TOBN(0xe1e13c91, 0x0917eed4), TOBN(0x3fc9eebd, 0x63a1b9c6), + TOBN(0x0f816a72, 0x7fe02299), TOBN(0x6335ccc2, 0x294f3319), + TOBN(0x3820179f, 0x4745c5be), TOBN(0xe647b782, 0x922f066e), + TOBN(0xc22e49de, 0x02cafb8a), TOBN(0x299bc2ff, 0xfcc2eccc), + TOBN(0x9a8feea2, 0x6e0e8282), TOBN(0xa627278b, 0xfe893205), + TOBN(0xa7e19733, 0x7933e47b), TOBN(0xf4ff6b13, 0x2e766402), + TOBN(0xa4d8be0a, 0x98440d9f), TOBN(0x658f5c2f, 0x38938808), + TOBN(0x90b75677, 0xc95b3b3e), TOBN(0xfa044269, 0x3137b6ff), + TOBN(0x077b039b, 0x43c47c29), TOBN(0xcca95dd3, 0x8a6445b2), + TOBN(0x0b498ba4, 0x2333fc4c), TOBN(0x274f8e68, 0xf736a1b1), + TOBN(0x6ca348fd, 0x5f1d4b2e), TOBN(0x24d3be78, 0xa8f10199), + TOBN(0x8535f858, 0xca14f530), TOBN(0xa6e7f163, 0x5b982e51), + TOBN(0x847c8512, 0x36e1bf62), TOBN(0xf6a7c58e, 0x03448418), + TOBN(0x583f3703, 0xf9374ab6), TOBN(0x864f9195, 0x6e564145), + TOBN(0x33bc3f48, 0x22526d50), TOBN(0x9f323c80, 0x1262a496), + TOBN(0xaa97a7ae, 0x3f046a9a), TOBN(0x70da183e, 0xdf8a039a), + TOBN(0x5b68f71c, 0x52aa0ba6), TOBN(0x9be0fe51, 0x21459c2d), + TOBN(0xc1e17eb6, 0xcbc613e5), TOBN(0x33131d55, 0x497ea61c), + TOBN(0x2f69d39e, 0xaf7eded5), TOBN(0x73c2f434, 0xde6af11b), + TOBN(0x4ca52493, 0xa4a375fa), TOBN(0x5f06787c, 0xb833c5c2), + TOBN(0x814e091f, 0x3e6e71cf), TOBN(0x76451f57, 0x8b746666)} + , + {TOBN(0x80f9bdef, 0x694db7e0), TOBN(0xedca8787, 0xb9fcddc6), + TOBN(0x51981c34, 0x03b8dce1), TOBN(0x4274dcf1, 0x70e10ba1), + TOBN(0xf72743b8, 0x6def6d1a), TOBN(0xd25b1670, 0xebdb1866), + TOBN(0xc4491e8c, 0x050c6f58), TOBN(0x2be2b2ab, 0x87fbd7f5), + TOBN(0x3e0e5c9d, 0xd111f8ec), TOBN(0xbcc33f8d, 0xb7c4e760), + TOBN(0x702f9a91, 0xbd392a51), TOBN(0x7da4a795, 0xc132e92d), + TOBN(0x1a0b0ae3, 0x0bb1151b), TOBN(0x54febac8, 0x02e32251), + TOBN(0xea3a5082, 0x694e9e78), TOBN(0xe58ffec1, 0xe4fe40b8), + TOBN(0xf85592fc, 0xd1e0cf9e), TOBN(0xdea75f0d, 0xc0e7b2e8), + TOBN(0xc04215cf, 0xc135584e), TOBN(0x174fc727, 0x2f57092a), + TOBN(0xe7277877, 0xeb930bea), TOBN(0x504caccb, 0x5eb02a5a), + TOBN(0xf9fe08f7, 0xf5241b9b), TOBN(0xe7fb62f4, 0x8d5ca954), + TOBN(0xfbb8349d, 0x29c4120b), TOBN(0x9f94391f, 0xc0d0d915), + TOBN(0xc4074fa7, 0x5410ba51), TOBN(0xa66adbf6, 0x150a5911), + TOBN(0xc164543c, 0x34bfca38), TOBN(0xe0f27560, 0xb9e1ccfc), + TOBN(0x99da0f53, 0xe820219c), TOBN(0xe8234498, 0xc6b4997a), + TOBN(0xcfb88b76, 0x9d4c5423), TOBN(0x9e56eb10, 0xb0521c49), + TOBN(0x418e0b5e, 0xbe8700a1), TOBN(0x00cbaad6, 0xf93cb58a), + TOBN(0xe923fbde, 0xd92a5e67), TOBN(0xca4979ac, 0x1f347f11), + TOBN(0x89162d85, 0x6bc0585b), TOBN(0xdd6254af, 0xac3c70e3), + TOBN(0x7b23c513, 0x516e19e4), TOBN(0x56e2e847, 0xc5c4d593), + TOBN(0x9f727d73, 0x5ce71ef6), TOBN(0x5b6304a6, 0xf79a44c5), + TOBN(0x6638a736, 0x3ab7e433), TOBN(0x1adea470, 0xfe742f83), + TOBN(0xe054b854, 0x5b7fc19f), TOBN(0xf935381a, 0xba1d0698), + TOBN(0x546eab2d, 0x799e9a74), TOBN(0x96239e0e, 0xa949f729), + TOBN(0xca274c6b, 0x7090055a), TOBN(0x835142c3, 0x9020c9b0), + TOBN(0xa405667a, 0xa2e8807f), TOBN(0x29f2c085, 0x1aa3d39e), + TOBN(0xcc555d64, 0x42fc72f5), TOBN(0xe856e0e7, 0xfbeacb3c), + TOBN(0xb5504f9d, 0x918e4936), TOBN(0x65035ef6, 0xb2513982), + TOBN(0x0553a0c2, 0x6f4d9cb9), TOBN(0x6cb10d56, 0xbea85509), + TOBN(0x48d957b7, 0xa242da11), TOBN(0x16a4d3dd, 0x672b7268), + TOBN(0x3d7e637c, 0x8502a96b), TOBN(0x27c7032b, 0x730d463b), + TOBN(0xbdc02b18, 0xe4136a14), TOBN(0xbacf969d, 0x678e32bf), + TOBN(0xc98d89a3, 0xdd9c3c03), TOBN(0x7b92420a, 0x23becc4f), + TOBN(0xd4b41f78, 0xc64d565c), TOBN(0x9f969d00, 0x10f28295), + TOBN(0xec7f7f76, 0xb13d051a), TOBN(0x08945e1e, 0xa92da585), + TOBN(0x55366b7d, 0x5846426f), TOBN(0xe7d09e89, 0x247d441d), + TOBN(0x510b404d, 0x736fbf48), TOBN(0x7fa003d0, 0xe784bd7d), + TOBN(0x25f7614f, 0x17fd9596), TOBN(0x49e0e0a1, 0x35cb98db), + TOBN(0x2c65957b, 0x2e83a76a), TOBN(0x5d40da8d, 0xcddbe0f8), + TOBN(0xf2b8c405, 0x050bad24), TOBN(0x8918426d, 0xc2aa4823), + TOBN(0x2aeab3dd, 0xa38365a7), TOBN(0x72031717, 0x7c91b690), + TOBN(0x8b00d699, 0x60a94120), TOBN(0x478a255d, 0xe99eaeec), + TOBN(0xbf656a5f, 0x6f60aafd), TOBN(0xdfd7cb75, 0x5dee77b3), + TOBN(0x37f68bb4, 0xa595939d), TOBN(0x03556479, 0x28740217), + TOBN(0x8e740e7c, 0x84ad7612), TOBN(0xd89bc843, 0x9044695f), + TOBN(0xf7f3da5d, 0x85a9184d), TOBN(0x562563bb, 0x9fc0b074), + TOBN(0x06d2e6aa, 0xf88a888e), TOBN(0x612d8643, 0x161fbe7c), + TOBN(0x465edba7, 0xf64085e7), TOBN(0xb230f304, 0x29aa8511), + TOBN(0x53388426, 0xcda2d188), TOBN(0x90885735, 0x4b666649), + TOBN(0x6f02ff9a, 0x652f54f6), TOBN(0x65c82294, 0x5fae2bf0), + TOBN(0x7816ade0, 0x62f5eee3), TOBN(0xdcdbdf43, 0xfcc56d70), + TOBN(0x9fb3bba3, 0x54530bb2), TOBN(0xbde3ef77, 0xcb0869ea), + TOBN(0x89bc9046, 0x0b431163), TOBN(0x4d03d7d2, 0xe4819a35), + TOBN(0x33ae4f9e, 0x43b6a782), TOBN(0x216db307, 0x9c88a686), + TOBN(0x91dd88e0, 0x00ffedd9), TOBN(0xb280da9f, 0x12bd4840), + TOBN(0x32a7cb8a, 0x1635e741), TOBN(0xfe14008a, 0x78be02a7), + TOBN(0x3fafb334, 0x1b7ae030), TOBN(0x7fd508e7, 0x5add0ce9), + TOBN(0x72c83219, 0xd607ad51), TOBN(0x0f229c0a, 0x8d40964a), + TOBN(0x1be2c336, 0x1c878da2), TOBN(0xe0c96742, 0xeab2ab86), + TOBN(0x458f8691, 0x3e538cd7), TOBN(0xa7001f6c, 0x8e08ad53), + TOBN(0x52b8c6e6, 0xbf5d15ff), TOBN(0x548234a4, 0x011215dd), + TOBN(0xff5a9d2d, 0x3d5b4045), TOBN(0xb0ffeeb6, 0x4a904190), + TOBN(0x55a3aca4, 0x48607f8b), TOBN(0x8cbd665c, 0x30a0672a), + TOBN(0x87f834e0, 0x42583068), TOBN(0x02da2aeb, 0xf3f6e683), + TOBN(0x6b763e5d, 0x05c12248), TOBN(0x7230378f, 0x65a8aefc), + TOBN(0x93bd80b5, 0x71e8e5ca), TOBN(0x53ab041c, 0xb3b62524), + TOBN(0x1b860513, 0x6c9c552e), TOBN(0xe84d402c, 0xd5524e66), + TOBN(0xa37f3573, 0xf37f5937), TOBN(0xeb0f6c7d, 0xd1e4fca5), + TOBN(0x2965a554, 0xac8ab0fc), TOBN(0x17fbf56c, 0x274676ac), + TOBN(0x2e2f6bd9, 0xacf7d720), TOBN(0x41fc8f88, 0x10224766), + TOBN(0x517a14b3, 0x85d53bef), TOBN(0xdae327a5, 0x7d76a7d1), + TOBN(0x6ad0a065, 0xc4818267), TOBN(0x33aa189b, 0x37c1bbc1), + TOBN(0x64970b52, 0x27392a92), TOBN(0x21699a1c, 0x2d1535ea), + TOBN(0xcd20779c, 0xc2d7a7fd), TOBN(0xe3186059, 0x99c83cf2), + TOBN(0x9b69440b, 0x72c0b8c7), TOBN(0xa81497d7, 0x7b9e0e4d), + TOBN(0x515d5c89, 0x1f5f82dc), TOBN(0x9a7f67d7, 0x6361079e), + TOBN(0xa8da81e3, 0x11a35330), TOBN(0xe44990c4, 0x4b18be1b), + TOBN(0xc7d5ed95, 0xaf103e59), TOBN(0xece8aba7, 0x8dac9261), + TOBN(0xbe82b099, 0x9394b8d3), TOBN(0x6830f09a, 0x16adfe83), + TOBN(0x250a29b4, 0x88172d01), TOBN(0x8b20bd65, 0xcaff9e02), + TOBN(0xb8a7661e, 0xe8a6329a), TOBN(0x4520304d, 0xd3fce920), + TOBN(0xae45da1f, 0x2b47f7ef), TOBN(0xe07f5288, 0x5bffc540), + TOBN(0xf7997009, 0x3464f874), TOBN(0x2244c2cd, 0xa6fa1f38), + TOBN(0x43c41ac1, 0x94d7d9b1), TOBN(0x5bafdd82, 0xc82e7f17), + TOBN(0xdf0614c1, 0x5fda0fca), TOBN(0x74b043a7, 0xa8ae37ad), + TOBN(0x3ba6afa1, 0x9e71734c), TOBN(0x15d5437e, 0x9c450f2e), + TOBN(0x4a5883fe, 0x67e242b1), TOBN(0x5143bdc2, 0x2c1953c2), + TOBN(0x542b8b53, 0xfc5e8920), TOBN(0x363bf9a8, 0x9a9cee08), + TOBN(0x02375f10, 0xc3486e08), TOBN(0x2037543b, 0x8c5e70d2), + TOBN(0x7109bccc, 0x625640b4), TOBN(0xcbc1051e, 0x8bc62c3b), + TOBN(0xf8455fed, 0x803f26ea), TOBN(0x6badceab, 0xeb372424), + TOBN(0xa2a9ce7c, 0x6b53f5f9), TOBN(0x64246595, 0x1b176d99), + TOBN(0xb1298d36, 0xb95c081b), TOBN(0x53505bb8, 0x1d9a9ee6), + TOBN(0x3f6f9e61, 0xf2ba70b0), TOBN(0xd07e16c9, 0x8afad453), + TOBN(0x9f1694bb, 0xe7eb4a6a), TOBN(0xdfebced9, 0x3cb0bc8e), + TOBN(0x92d3dcdc, 0x53868c8b), TOBN(0x174311a2, 0x386107a6), + TOBN(0x4109e07c, 0x689b4e64), TOBN(0x30e4587f, 0x2df3dcb6), + TOBN(0x841aea31, 0x0811b3b2), TOBN(0x6144d41d, 0x0cce43ea), + TOBN(0x464c4581, 0x2a9a7803), TOBN(0xd03d371f, 0x3e158930), + TOBN(0xc676d7f2, 0xb1f3390b), TOBN(0x9f7a1b8c, 0xa5b61272), + TOBN(0x4ebebfc9, 0xc2e127a9), TOBN(0x4602500c, 0x5dd997bf), + TOBN(0x7f09771c, 0x4711230f), TOBN(0x058eb37c, 0x020f09c1), + TOBN(0xab693d4b, 0xfee5e38b), TOBN(0x9289eb1f, 0x4653cbc0), + TOBN(0xbecf46ab, 0xd51b9cf5), TOBN(0xd2aa9c02, 0x9f0121af), + TOBN(0x36aaf7d2, 0xe90dc274), TOBN(0x909e4ea0, 0x48b95a3c), + TOBN(0xe6b70496, 0x6f32dbdb), TOBN(0x672188a0, 0x8b030b3e), + TOBN(0xeeffe5b3, 0xcfb617e2), TOBN(0x87e947de, 0x7c82709e), + TOBN(0xa44d2b39, 0x1770f5a7), TOBN(0xe4d4d791, 0x0e44eb82), + TOBN(0x42e69d1e, 0x3f69712a), TOBN(0xbf11c4d6, 0xac6a820e), + TOBN(0xb5e7f3e5, 0x42c4224c), TOBN(0xd6b4e81c, 0x449d941c), + TOBN(0x5d72bd16, 0x5450e878), TOBN(0x6a61e28a, 0xee25ac54), + TOBN(0x33272094, 0xe6f1cd95), TOBN(0x7512f30d, 0x0d18673f), + TOBN(0x32f7a4ca, 0x5afc1464), TOBN(0x2f095656, 0x6bbb977b), + TOBN(0x586f47ca, 0xa8226200), TOBN(0x02c868ad, 0x1ac07369), + TOBN(0x4ef2b845, 0xc613acbe), TOBN(0x43d7563e, 0x0386054c), + TOBN(0x54da9dc7, 0xab952578), TOBN(0xb5423df2, 0x26e84d0b), + TOBN(0xa8b64eeb, 0x9b872042), TOBN(0xac205782, 0x5990f6df), + TOBN(0x4ff696eb, 0x21f4c77a), TOBN(0x1a79c3e4, 0xaab273af), + TOBN(0x29bc922e, 0x9436b3f1), TOBN(0xff807ef8, 0xd6d9a27a), + TOBN(0x82acea3d, 0x778f22a0), TOBN(0xfb10b2e8, 0x5b5e7469), + TOBN(0xc0b16980, 0x2818ee7d), TOBN(0x011afff4, 0xc91c1a2f), + TOBN(0x95a6d126, 0xad124418), TOBN(0x31c081a5, 0xe72e295f), + TOBN(0x36bb283a, 0xf2f4db75), TOBN(0xd115540f, 0x7acef462), + TOBN(0xc7f3a8f8, 0x33f6746c), TOBN(0x21e46f65, 0xfea990ca), + TOBN(0x915fd5c5, 0xcaddb0a9), TOBN(0xbd41f016, 0x78614555), + TOBN(0x346f4434, 0x426ffb58), TOBN(0x80559436, 0x14dbc204), + TOBN(0xf3dd20fe, 0x5a969b7f), TOBN(0x9d59e956, 0xe899a39a), + TOBN(0xf1b0971c, 0x8ad4cf4b), TOBN(0x03448860, 0x2ffb8fb8), + TOBN(0xf071ac3c, 0x65340ba4), TOBN(0x408d0596, 0xb27fd758), + TOBN(0xe7c78ea4, 0x98c364b0), TOBN(0xa4aac4a5, 0x051e8ab5), + TOBN(0xb9e1d560, 0x485d9002), TOBN(0x9acd518a, 0x88844455), + TOBN(0xe4ca688f, 0xd06f56c0), TOBN(0xa48af70d, 0xdf027972), + TOBN(0x691f0f04, 0x5e9a609d), TOBN(0xa9dd82cd, 0xee61270e), + TOBN(0x8903ca63, 0xa0ef18d3), TOBN(0x9fb7ee35, 0x3d6ca3bd), + TOBN(0xa7b4a09c, 0xabf47d03), TOBN(0x4cdada01, 0x1c67de8e), + TOBN(0x52003749, 0x9355a244), TOBN(0xe77fd2b6, 0x4f2151a9), + TOBN(0x695d6cf6, 0x66b4efcb), TOBN(0xc5a0cacf, 0xda2cfe25), + TOBN(0x104efe5c, 0xef811865), TOBN(0xf52813e8, 0x9ea5cc3d), + TOBN(0x855683dc, 0x40b58dbc), TOBN(0x0338ecde, 0x175fcb11), + TOBN(0xf9a05637, 0x74921592), TOBN(0xb4f1261d, 0xb9bb9d31), + TOBN(0x551429b7, 0x4e9c5459), TOBN(0xbe182e6f, 0x6ea71f53), + TOBN(0xd3a3b07c, 0xdfc50573), TOBN(0x9ba1afda, 0x62be8d44), + TOBN(0x9bcfd2cb, 0x52ab65d3), TOBN(0xdf11d547, 0xa9571802), + TOBN(0x099403ee, 0x02a2404a), TOBN(0x497406f4, 0x21088a71), + TOBN(0x99479409, 0x5004ae71), TOBN(0xbdb42078, 0xa812c362), + TOBN(0x2b72a30f, 0xd8828442), TOBN(0x283add27, 0xfcb5ed1c), + TOBN(0xf7c0e200, 0x66a40015), TOBN(0x3e3be641, 0x08b295ef), + TOBN(0xac127dc1, 0xe038a675), TOBN(0x729deff3, 0x8c5c6320), + TOBN(0xb7df8fd4, 0xa90d2c53), TOBN(0x9b74b0ec, 0x681e7cd3), + TOBN(0x5cb5a623, 0xdab407e5), TOBN(0xcdbd3615, 0x76b340c6), + TOBN(0xa184415a, 0x7d28392c), TOBN(0xc184c1d8, 0xe96f7830), + TOBN(0xc3204f19, 0x81d3a80f), TOBN(0xfde0c841, 0xc8e02432), + TOBN(0x78203b3e, 0x8149e0c1), TOBN(0x5904bdbb, 0x08053a73), + TOBN(0x30fc1dd1, 0x101b6805), TOBN(0x43c223bc, 0x49aa6d49), + TOBN(0x9ed67141, 0x7a174087), TOBN(0x311469a0, 0xd5997008), + TOBN(0xb189b684, 0x5e43fc61), TOBN(0xf3282375, 0xe0d3ab57), + TOBN(0x4fa34b67, 0xb1181da8), TOBN(0x621ed0b2, 0x99ee52b8), + TOBN(0x9b178de1, 0xad990676), TOBN(0xd51de67b, 0x56d54065), + TOBN(0x2a2c27c4, 0x7538c201), TOBN(0x33856ec8, 0x38a40f5c), + TOBN(0x2522fc15, 0xbe6cdcde), TOBN(0x1e603f33, 0x9f0c6f89), + TOBN(0x7994edc3, 0x103e30a6), TOBN(0x033a00db, 0x220c853e), + TOBN(0xd3cfa409, 0xf7bb7fd7), TOBN(0x70f8781e, 0x462d18f6), + TOBN(0xbbd82980, 0x687fe295), TOBN(0x6eef4c32, 0x595669f3), + TOBN(0x86a9303b, 0x2f7e85c3), TOBN(0x5fce4621, 0x71988f9b), + TOBN(0x5b935bf6, 0xc138acb5), TOBN(0x30ea7d67, 0x25661212), + TOBN(0xef1eb5f4, 0xe51ab9a2), TOBN(0x0587c98a, 0xae067c78), + TOBN(0xb3ce1b3c, 0x77ca9ca6), TOBN(0x2a553d4d, 0x54b5f057), + TOBN(0xc7898236, 0x4da29ec2), TOBN(0xdbdd5d13, 0xb9c57316), + TOBN(0xc57d6e6b, 0x2cd80d47), TOBN(0x80b460cf, 0xfe9e7391), + TOBN(0x98648cab, 0xf963c31e), TOBN(0x67f9f633, 0xcc4d32fd), + TOBN(0x0af42a9d, 0xfdf7c687), TOBN(0x55f292a3, 0x0b015ea7), + TOBN(0x89e468b2, 0xcd21ab3d), TOBN(0xe504f022, 0xc393d392), + TOBN(0xab21e1d4, 0xa5013af9), TOBN(0xe3283f78, 0xc2c28acb), + TOBN(0xf38b35f6, 0x226bf99f), TOBN(0xe8354274, 0x0e291e69), + TOBN(0x61673a15, 0xb20c162d), TOBN(0xc101dc75, 0xb04fbdbe), + TOBN(0x8323b4c2, 0x255bd617), TOBN(0x6c969693, 0x6c2a9154), + TOBN(0xc6e65860, 0x62679387), TOBN(0x8e01db0c, 0xb8c88e23), + TOBN(0x33c42873, 0x893a5559), TOBN(0x7630f04b, 0x47a3e149), + TOBN(0xb5d80805, 0xddcf35f8), TOBN(0x582ca080, 0x77dfe732), + TOBN(0x2c7156e1, 0x0b1894a0), TOBN(0x92034001, 0xd81c68c0), + TOBN(0xed225d00, 0xc8b115b5), TOBN(0x237f9c22, 0x83b907f2), + TOBN(0x0ea2f32f, 0x4470e2c0), TOBN(0xb725f7c1, 0x58be4e95), + TOBN(0x0f1dcafa, 0xb1ae5463), TOBN(0x59ed5187, 0x1ba2fc04), + TOBN(0xf6e0f316, 0xd0115d4d), TOBN(0x5180b12f, 0xd3691599), + TOBN(0x157e32c9, 0x527f0a41), TOBN(0x7b0b081d, 0xa8e0ecc0), + TOBN(0x6dbaaa8a, 0xbf4f0dd0), TOBN(0x99b289c7, 0x4d252696), + TOBN(0x79b7755e, 0xdbf864fe), TOBN(0x6974e2b1, 0x76cad3ab), + TOBN(0x35dbbee2, 0x06ddd657), TOBN(0xe7cbdd11, 0x2ff3a96d), + TOBN(0x88381968, 0x076be758), TOBN(0x2d737e72, 0x08c91f5d), + TOBN(0x5f83ab62, 0x86ec3776), TOBN(0x98aa649d, 0x945fa7a1), + TOBN(0xf477ec37, 0x72ef0933), TOBN(0x66f52b1e, 0x098c17b1), + TOBN(0x9eec58fb, 0xd803738b), TOBN(0x91aaade7, 0xe4e86aa4), + TOBN(0x6b1ae617, 0xa5b51492), TOBN(0x63272121, 0xbbc45974), + TOBN(0x7e0e28f0, 0x862c5129), TOBN(0x0a8f79a9, 0x3321a4a0), + TOBN(0xe26d1664, 0x5041c88f), TOBN(0x0571b805, 0x53233e3a), + TOBN(0xd1b0ccde, 0xc9520711), TOBN(0x55a9e4ed, 0x3c8b84bf), + TOBN(0x9426bd39, 0xa1fef314), TOBN(0x4f5f638e, 0x6eb93f2b), + TOBN(0xba2a1ed3, 0x2bf9341b), TOBN(0xd63c1321, 0x4d42d5a9), + TOBN(0xd2964a89, 0x316dc7c5), TOBN(0xd1759606, 0xca511851), + TOBN(0xd8a9201f, 0xf9e6ed35), TOBN(0xb7b5ee45, 0x6736925a), + TOBN(0x0a83fbbc, 0x99581af7), TOBN(0x3076bc40, 0x64eeb051), + TOBN(0x5511c98c, 0x02dec312), TOBN(0x270de898, 0x238dcb78), + TOBN(0x2cf4cf9c, 0x539c08c9), TOBN(0xa70cb65e, 0x38d3b06e), + TOBN(0xb12ec10e, 0xcfe57bbd), TOBN(0x82c7b656, 0x35a0c2b5), + TOBN(0xddc7d5cd, 0x161c67bd), TOBN(0xe32e8985, 0xae3a32cc), + TOBN(0x7aba9444, 0xd11a5529), TOBN(0xe964ed02, 0x2427fa1a), + TOBN(0x1528392d, 0x24a1770a), TOBN(0xa152ce2c, 0x12c72fcd), + TOBN(0x714553a4, 0x8ec07649), TOBN(0x18b4c290, 0x459dd453), + TOBN(0xea32b714, 0x7b64b110), TOBN(0xb871bfa5, 0x2e6f07a2), + TOBN(0xb67112e5, 0x9e2e3c9b), TOBN(0xfbf250e5, 0x44aa90f6), + TOBN(0xf77aedb8, 0xbd539006), TOBN(0x3b0cdf9a, 0xd172a66f), + TOBN(0xedf69fea, 0xf8c51187), TOBN(0x05bb67ec, 0x741e4da7), + TOBN(0x47df0f32, 0x08114345), TOBN(0x56facb07, 0xbb9792b1), + TOBN(0xf3e007e9, 0x8f6229e4), TOBN(0x62d103f4, 0x526fba0f), + TOBN(0x4f33bef7, 0xb0339d79), TOBN(0x9841357b, 0xb59bfec1), + TOBN(0xfa8dbb59, 0xc34e6705), TOBN(0xc3c7180b, 0x7fdaa84c), + TOBN(0xf95872fc, 0xa4108537), TOBN(0x8750cc3b, 0x932a3e5a), + TOBN(0xb61cc69d, 0xb7275d7d), TOBN(0xffa0168b, 0x2e59b2e9), + TOBN(0xca032abc, 0x6ecbb493), TOBN(0x1d86dbd3, 0x2c9082d8), + TOBN(0xae1e0b67, 0xe28ef5ba), TOBN(0x2c9a4699, 0xcb18e169), + TOBN(0x0ecd0e33, 0x1e6bbd20), TOBN(0x571b360e, 0xaf5e81d2), + TOBN(0xcd9fea58, 0x101c1d45), TOBN(0x6651788e, 0x18880452), + TOBN(0xa9972635, 0x1f8dd446), TOBN(0x44bed022, 0xe37281d0), + TOBN(0x094b2b2d, 0x33da525d), TOBN(0xf193678e, 0x13144fd8), + TOBN(0xb8ab5ba4, 0xf4c1061d), TOBN(0x4343b5fa, 0xdccbe0f4), + TOBN(0xa8702371, 0x63812713), TOBN(0x47bf6d2d, 0xf7611d93), + TOBN(0x46729b8c, 0xbd21e1d7), TOBN(0x7484d4e0, 0xd629e77d), + TOBN(0x830e6eea, 0x60dbac1f), TOBN(0x23d8c484, 0xda06a2f7), + TOBN(0x896714b0, 0x50ca535b), TOBN(0xdc8d3644, 0xebd97a9b), + TOBN(0x106ef9fa, 0xb12177b4), TOBN(0xf79bf464, 0x534d5d9c), + TOBN(0x2537a349, 0xa6ab360b), TOBN(0xc7c54253, 0xa00c744f), + TOBN(0xb3c7a047, 0xe5911a76), TOBN(0x61ffa5c8, 0x647f1ee7), + TOBN(0x15aed36f, 0x8f56ab42), TOBN(0x6a0d41b0, 0xa3ff9ac9), + TOBN(0x68f469f5, 0xcc30d357), TOBN(0xbe9adf81, 0x6b72be96), + TOBN(0x1cd926fe, 0x903ad461), TOBN(0x7e89e38f, 0xcaca441b), + TOBN(0xf0f82de5, 0xfacf69d4), TOBN(0x363b7e76, 0x4775344c), + TOBN(0x6894f312, 0xb2e36d04), TOBN(0x3c6cb4fe, 0x11d1c9a5), + TOBN(0x85d9c339, 0x4008e1f2), TOBN(0x5e9a85ea, 0x249f326c), + TOBN(0xdc35c60a, 0x678c5e06), TOBN(0xc08b944f, 0x9f86fba9), + TOBN(0xde40c02c, 0x89f71f0f), TOBN(0xad8f3e31, 0xff3da3c0), + TOBN(0x3ea5096b, 0x42125ded), TOBN(0x13879cbf, 0xa7379183), + TOBN(0x6f4714a5, 0x6b306a0b), TOBN(0x359c2ea6, 0x67646c5e), + TOBN(0xfacf8943, 0x07726368), TOBN(0x07a58935, 0x65ff431e), + TOBN(0x24d661d1, 0x68754ab0), TOBN(0x801fce1d, 0x6f429a76), + TOBN(0xc068a85f, 0xa58ce769), TOBN(0xedc35c54, 0x5d5eca2b), + TOBN(0xea31276f, 0xa3f660d1), TOBN(0xa0184ebe, 0xb8fc7167), + TOBN(0x0f20f21a, 0x1d8db0ae), TOBN(0xd96d095f, 0x56c35e12), + TOBN(0xedf402b5, 0xf8c2a25b), TOBN(0x1bb772b9, 0x059204b6), + TOBN(0x50cbeae2, 0x19b4e34c), TOBN(0x93109d80, 0x3fa0845a), + TOBN(0x54f7ccf7, 0x8ef59fb5), TOBN(0x3b438fe2, 0x88070963), + TOBN(0x9e28c659, 0x31f3ba9b), TOBN(0x9cc31b46, 0xead9da92), + TOBN(0x3c2f0ba9, 0xb733aa5f), TOBN(0xdece47cb, 0xf05af235), + TOBN(0xf8e3f715, 0xa2ac82a5), TOBN(0xc97ba641, 0x2203f18a), + TOBN(0xc3af5504, 0x09c11060), TOBN(0x56ea2c05, 0x46af512d), + TOBN(0xfac28daf, 0xf3f28146), TOBN(0x87fab43a, 0x959ef494),} + , + {TOBN(0x09891641, 0xd4c5105f), TOBN(0x1ae80f8e, 0x6d7fbd65), + TOBN(0x9d67225f, 0xbee6bdb0), TOBN(0x3b433b59, 0x7fc4d860), + TOBN(0x44e66db6, 0x93e85638), TOBN(0xf7b59252, 0xe3e9862f), + TOBN(0xdb785157, 0x665c32ec), TOBN(0x702fefd7, 0xae362f50), + TOBN(0x3754475d, 0x0fefb0c3), TOBN(0xd48fb56b, 0x46d7c35d), + TOBN(0xa070b633, 0x363798a4), TOBN(0xae89f3d2, 0x8fdb98e6), + TOBN(0x970b89c8, 0x6363d14c), TOBN(0x89817521, 0x67abd27d), + TOBN(0x9bf7d474, 0x44d5a021), TOBN(0xb3083baf, 0xcac72aee), + TOBN(0x389741de, 0xbe949a44), TOBN(0x638e9388, 0x546a4fa5), + TOBN(0x3fe6419c, 0xa0047bdc), TOBN(0x7047f648, 0xaaea57ca), + TOBN(0x54e48a90, 0x41fbab17), TOBN(0xda8e0b28, 0x576bdba2), + TOBN(0xe807eebc, 0xc72afddc), TOBN(0x07d3336d, 0xf42577bf), + TOBN(0x62a8c244, 0xbfe20925), TOBN(0x91c19ac3, 0x8fdce867), + TOBN(0x5a96a5d5, 0xdd387063), TOBN(0x61d587d4, 0x21d324f6), + TOBN(0xe87673a2, 0xa37173ea), TOBN(0x23848008, 0x53778b65), + TOBN(0x10f8441e, 0x05bab43e), TOBN(0xfa11fe12, 0x4621efbe), + TOBN(0x047b772e, 0x81685d7b), TOBN(0x23f27d81, 0xbf34a976), + TOBN(0xc27608e2, 0x915f48ef), TOBN(0x3b0b43fa, 0xa521d5c3), + TOBN(0x7613fb26, 0x63ca7284), TOBN(0x7f5729b4, 0x1d4db837), + TOBN(0x87b14898, 0x583b526b), TOBN(0x00b732a6, 0xbbadd3d1), + TOBN(0x8e02f426, 0x2048e396), TOBN(0x436b50b6, 0x383d9de4), + TOBN(0xf78d3481, 0x471e85ad), TOBN(0x8b01ea6a, 0xd005c8d6), + TOBN(0xd3c7afee, 0x97015c07), TOBN(0x46cdf1a9, 0x4e3ba2ae), + TOBN(0x7a42e501, 0x83d3a1d2), TOBN(0xd54b5268, 0xb541dff4), + TOBN(0x3f24cf30, 0x4e23e9bc), TOBN(0x4387f816, 0x126e3624), + TOBN(0x26a46a03, 0x3b0b6d61), TOBN(0xaf1bc845, 0x8b2d777c), + TOBN(0x25c401ba, 0x527de79c), TOBN(0x0e1346d4, 0x4261bbb6), + TOBN(0x4b96c44b, 0x287b4bc7), TOBN(0x658493c7, 0x5254562f), + TOBN(0x23f949fe, 0xb8a24a20), TOBN(0x17ebfed1, 0xf52ca53f), + TOBN(0x9b691bbe, 0xbcfb4853), TOBN(0x5617ff6b, 0x6278a05d), + TOBN(0x241b34c5, 0xe3c99ebd), TOBN(0xfc64242e, 0x1784156a), + TOBN(0x4206482f, 0x695d67df), TOBN(0xb967ce0e, 0xee27c011), + TOBN(0x65db3751, 0x21c80b5d), TOBN(0x2e7a563c, 0xa31ecca0), + TOBN(0xe56ffc4e, 0x5238a07e), TOBN(0x3d6c2966, 0x32ced854), + TOBN(0xe99d7d1a, 0xaf70b885), TOBN(0xafc3bad9, 0x2d686459), + TOBN(0x9c78bf46, 0x0cc8ba5b), TOBN(0x5a439519, 0x18955aa3), + TOBN(0xf8b517a8, 0x5fe4e314), TOBN(0xe60234d0, 0xfcb8906f), + TOBN(0xffe542ac, 0xf2061b23), TOBN(0x287e191f, 0x6b4cb59c), + TOBN(0x21857ddc, 0x09d877d8), TOBN(0x1c23478c, 0x14678941), + TOBN(0xbbf0c056, 0xb6e05ea4), TOBN(0x82da4b53, 0xb01594fe), + TOBN(0xf7526791, 0xfadb8608), TOBN(0x049e832d, 0x7b74cdf6), + TOBN(0xa43581cc, 0xc2b90a34), TOBN(0x73639eb8, 0x9360b10c), + TOBN(0x4fba331f, 0xe1e4a71b), TOBN(0x6ffd6b93, 0x8072f919), + TOBN(0x6e53271c, 0x65679032), TOBN(0x67206444, 0xf14272ce), + TOBN(0xc0f734a3, 0xb2335834), TOBN(0x9526205a, 0x90ef6860), + TOBN(0xcb8be717, 0x04e2bb0d), TOBN(0x2418871e, 0x02f383fa), + TOBN(0xd7177681, 0x4082c157), TOBN(0xcc914ad0, 0x29c20073), + TOBN(0xf186c1eb, 0xe587e728), TOBN(0x6fdb3c22, 0x61bcd5fd), + TOBN(0x30d014a6, 0xf2f9f8e9), TOBN(0x963ece23, 0x4fec49d2), + TOBN(0x862025c5, 0x9605a8d9), TOBN(0x39874445, 0x19f8929a), + TOBN(0x01b6ff65, 0x12bf476a), TOBN(0x598a64d8, 0x09cf7d91), + TOBN(0xd7ec7749, 0x93be56ca), TOBN(0x10899785, 0xcbb33615), + TOBN(0xb8a092fd, 0x02eee3ad), TOBN(0xa86b3d35, 0x30145270), + TOBN(0x323d98c6, 0x8512b675), TOBN(0x4b8bc785, 0x62ebb40f), + TOBN(0x7d301f54, 0x413f9cde), TOBN(0xa5e4fb4f, 0x2bab5664), + TOBN(0x1d2b252d, 0x1cbfec23), TOBN(0xfcd576bb, 0xe177120d), + TOBN(0x04427d3e, 0x83731a34), TOBN(0x2bb9028e, 0xed836e8e), + TOBN(0xb36acff8, 0xb612ca7c), TOBN(0xb88fe5ef, 0xd3d9c73a), + TOBN(0xbe2a6bc6, 0xedea4eb3), TOBN(0x43b93133, 0x488eec77), + TOBN(0xf41ff566, 0xb17106e1), TOBN(0x469e9172, 0x654efa32), + TOBN(0xb4480f04, 0x41c23fa3), TOBN(0xb4712eb0, 0xc1989a2e), + TOBN(0x3ccbba0f, 0x93a29ca7), TOBN(0x6e205c14, 0xd619428c), + TOBN(0x90db7957, 0xb3641686), TOBN(0x0432691d, 0x45ac8b4e), + TOBN(0x07a759ac, 0xf64e0350), TOBN(0x0514d89c, 0x9c972517), + TOBN(0x1701147f, 0xa8e67fc3), TOBN(0x9e2e0b8b, 0xab2085be), + TOBN(0xd5651824, 0xac284e57), TOBN(0x890d4325, 0x74893664), + TOBN(0x8a7c5e6e, 0xc55e68a3), TOBN(0xbf12e90b, 0x4339c85a), + TOBN(0x31846b85, 0xf922b655), TOBN(0x9a54ce4d, 0x0bf4d700), + TOBN(0xd7f4e83a, 0xf1a14295), TOBN(0x916f955c, 0xb285d4f9), + TOBN(0xe57bb0e0, 0x99ffdaba), TOBN(0x28a43034, 0xeab0d152), + TOBN(0x0a36ffa2, 0xb8a9cef8), TOBN(0x5517407e, 0xb9ec051a), + TOBN(0x9c796096, 0xea68e672), TOBN(0x853db5fb, 0xfb3c77fb), + TOBN(0x21474ba9, 0xe864a51a), TOBN(0x6c267699, 0x6e8a1b8b), + TOBN(0x7c823626, 0x94120a28), TOBN(0xe61e9a48, 0x8383a5db), + TOBN(0x7dd75003, 0x9f84216d), TOBN(0xab020d07, 0xad43cd85), + TOBN(0x9437ae48, 0xda12c659), TOBN(0x6449c2eb, 0xe65452ad), + TOBN(0xcc7c4c1c, 0x2cf9d7c1), TOBN(0x1320886a, 0xee95e5ab), + TOBN(0xbb7b9056, 0xbeae170c), TOBN(0xc8a5b250, 0xdbc0d662), + TOBN(0x4ed81432, 0xc11d2303), TOBN(0x7da66912, 0x1f03769f), + TOBN(0x3ac7a5fd, 0x84539828), TOBN(0x14dada94, 0x3bccdd02), + TOBN(0x8b84c321, 0x7ef6b0d1), TOBN(0x52a9477a, 0x7c933f22), + TOBN(0x5ef6728a, 0xfd440b82), TOBN(0x5c3bd859, 0x6ce4bd5e), + TOBN(0x918b80f5, 0xf22c2d3e), TOBN(0x368d5040, 0xb7bb6cc5), + TOBN(0xb66142a1, 0x2695a11c), TOBN(0x60ac583a, 0xeb19ea70), + TOBN(0x317cbb98, 0x0eab2437), TOBN(0x8cc08c55, 0x5e2654c8), + TOBN(0xfe2d6520, 0xe6d8307f), TOBN(0xe9f147f3, 0x57428993), + TOBN(0x5f9c7d14, 0xd2fd6cf1), TOBN(0xa3ecd064, 0x2d4fcbb0), + TOBN(0xad83fef0, 0x8e7341f7), TOBN(0x643f23a0, 0x3a63115c), + TOBN(0xd38a78ab, 0xe65ab743), TOBN(0xbf7c75b1, 0x35edc89c), + TOBN(0x3dd8752e, 0x530df568), TOBN(0xf85c4a76, 0xe308c682), + TOBN(0x4c9955b2, 0xe68acf37), TOBN(0xa544df3d, 0xab32af85), + TOBN(0x4b8ec3f5, 0xa25cf493), TOBN(0x4d8f2764, 0x1a622feb), + TOBN(0x7bb4f7aa, 0xf0dcbc49), TOBN(0x7de551f9, 0x70bbb45b), + TOBN(0xcfd0f3e4, 0x9f2ca2e5), TOBN(0xece58709, 0x1f5c76ef), + TOBN(0x32920edd, 0x167d79ae), TOBN(0x039df8a2, 0xfa7d7ec1), + TOBN(0xf46206c0, 0xbb30af91), TOBN(0x1ff5e2f5, 0x22676b59), + TOBN(0x11f4a039, 0x6ea51d66), TOBN(0x506c1445, 0x807d7a26), + TOBN(0x60da5705, 0x755a9b24), TOBN(0x8fc8cc32, 0x1f1a319e), + TOBN(0x83642d4d, 0x9433d67d), TOBN(0x7fa5cb8f, 0x6a7dd296), + TOBN(0x576591db, 0x9b7bde07), TOBN(0x13173d25, 0x419716fb), + TOBN(0xea30599d, 0xd5b340ff), TOBN(0xfc6b5297, 0xb0fe76c5), + TOBN(0x1c6968c8, 0xab8f5adc), TOBN(0xf723c7f5, 0x901c928d), + TOBN(0x4203c321, 0x9773d402), TOBN(0xdf7c6aa3, 0x1b51dd47), + TOBN(0x3d49e37a, 0x552be23c), TOBN(0x57febee8, 0x0b5a6e87), + TOBN(0xc5ecbee4, 0x7bd8e739), TOBN(0x79d44994, 0xae63bf75), + TOBN(0x168bd00f, 0x38fb8923), TOBN(0x75d48ee4, 0xd0533130), + TOBN(0x554f77aa, 0xdb5cdf33), TOBN(0x3396e896, 0x3c696769), + TOBN(0x2fdddbf2, 0xd3fd674e), TOBN(0xbbb8f6ee, 0x99d0e3e5), + TOBN(0x51b90651, 0xcbae2f70), TOBN(0xefc4bc05, 0x93aaa8eb), + TOBN(0x8ecd8689, 0xdd1df499), TOBN(0x1aee99a8, 0x22f367a5), + TOBN(0x95d485b9, 0xae8274c5), TOBN(0x6c14d445, 0x7d30b39c), + TOBN(0xbafea90b, 0xbcc1ef81), TOBN(0x7c5f317a, 0xa459a2ed), + TOBN(0x01211075, 0x4ef44227), TOBN(0xa17bed6e, 0xdc20f496), + TOBN(0x0cdfe424, 0x819853cd), TOBN(0x13793298, 0xf71e2ce7), + TOBN(0x3c1f3078, 0xdbbe307b), TOBN(0x6dd1c20e, 0x76ee9936), + TOBN(0x23ee4b57, 0x423caa20), TOBN(0x4ac3793b, 0x8efb840e), + TOBN(0x934438eb, 0xed1f8ca0), TOBN(0x3e546658, 0x4ebb25a2), + TOBN(0xc415af0e, 0xc069896f), TOBN(0xc13eddb0, 0x9a5aa43d), + TOBN(0x7a04204f, 0xd49eb8f6), TOBN(0xd0d5bdfc, 0xd74f1670), + TOBN(0x3697e286, 0x56fc0558), TOBN(0x10207371, 0x01cebade), + TOBN(0x5f87e690, 0x0647a82b), TOBN(0x908e0ed4, 0x8f40054f), + TOBN(0xa9f633d4, 0x79853803), TOBN(0x8ed13c9a, 0x4a28b252), + TOBN(0x3e2ef676, 0x1f460f64), TOBN(0x53930b9b, 0x36d06336), + TOBN(0x347073ac, 0x8fc4979b), TOBN(0x84380e0e, 0x5ecd5597), + TOBN(0xe3b22c6b, 0xc4fe3c39), TOBN(0xba4a8153, 0x6c7bebdf), + TOBN(0xf23ab6b7, 0x25693459), TOBN(0x53bc3770, 0x14922b11), + TOBN(0x4645c8ab, 0x5afc60db), TOBN(0xaa022355, 0x20b9f2a3), + TOBN(0x52a2954c, 0xce0fc507), TOBN(0x8c2731bb, 0x7ce1c2e7), + TOBN(0xf39608ab, 0x18a0339d), TOBN(0xac7a658d, 0x3735436c), + TOBN(0xb22c2b07, 0xcd992b4f), TOBN(0x4e83daec, 0xf40dcfd4), + TOBN(0x8a34c7be, 0x2f39ea3e), TOBN(0xef0c005f, 0xb0a56d2e), + TOBN(0x62731f6a, 0x6edd8038), TOBN(0x5721d740, 0x4e3cb075), + TOBN(0x1ea41511, 0xfbeeee1b), TOBN(0xd1ef5e73, 0xef1d0c05), + TOBN(0x42feefd1, 0x73c07d35), TOBN(0xe530a00a, 0x8a329493), + TOBN(0x5d55b7fe, 0xf15ebfb0), TOBN(0x549de03c, 0xd322491a), + TOBN(0xf7b5f602, 0x745b3237), TOBN(0x3632a3a2, 0x1ab6e2b6), + TOBN(0x0d3bba89, 0x0ef59f78), TOBN(0x0dfc6443, 0xc9e52b9a), + TOBN(0x1dc79699, 0x72631447), TOBN(0xef033917, 0xb3be20b1), + TOBN(0x0c92735d, 0xb1383948), TOBN(0xc1fc29a2, 0xc0dd7d7d), + TOBN(0x6485b697, 0x403ed068), TOBN(0x13bfaab3, 0xaac93bdc), + TOBN(0x410dc6a9, 0x0deeaf52), TOBN(0xb003fb02, 0x4c641c15), + TOBN(0x1384978c, 0x5bc504c4), TOBN(0x37640487, 0x864a6a77), + TOBN(0x05991bc6, 0x222a77da), TOBN(0x62260a57, 0x5e47eb11), + TOBN(0xc7af6613, 0xf21b432c), TOBN(0x22f3acc9, 0xab4953e9), + TOBN(0x52934922, 0x8e41d155), TOBN(0x4d024568, 0x3ac059ef), + TOBN(0xb0201755, 0x4d884411), TOBN(0xce8055cf, 0xa59a178f), + TOBN(0xcd77d1af, 0xf6204549), TOBN(0xa0a00a3e, 0xc7066759), + TOBN(0x471071ef, 0x0272c229), TOBN(0x009bcf6b, 0xd3c4b6b0), + TOBN(0x2a2638a8, 0x22305177), TOBN(0xd51d59df, 0x41645bbf), + TOBN(0xa81142fd, 0xc0a7a3c0), TOBN(0xa17eca6d, 0x4c7063ee), + TOBN(0x0bb887ed, 0x60d9dcec), TOBN(0xd6d28e51, 0x20ad2455), + TOBN(0xebed6308, 0xa67102ba), TOBN(0x042c3114, 0x8bffa408), + TOBN(0xfd099ac5, 0x8aa68e30), TOBN(0x7a6a3d7c, 0x1483513e), + TOBN(0xffcc6b75, 0xba2d8f0c), TOBN(0x54dacf96, 0x1e78b954), + TOBN(0xf645696f, 0xa4a9af89), TOBN(0x3a411940, 0x06ac98ec), + TOBN(0x41b8b3f6, 0x22a67a20), TOBN(0x2d0b1e0f, 0x99dec626), + TOBN(0x27c89192, 0x40be34e8), TOBN(0xc7162b37, 0x91907f35), + TOBN(0x90188ec1, 0xa956702b), TOBN(0xca132f7d, 0xdf93769c), + TOBN(0x3ece44f9, 0x0e2025b4), TOBN(0x67aaec69, 0x0c62f14c), + TOBN(0xad741418, 0x22e3cc11), TOBN(0xcf9b75c3, 0x7ff9a50e), + TOBN(0x02fa2b16, 0x4d348272), TOBN(0xbd99d61a, 0x9959d56d), + TOBN(0xbc4f19db, 0x18762916), TOBN(0xcc7cce50, 0x49c1ac80), + TOBN(0x4d59ebaa, 0xd846bd83), TOBN(0x8775a9dc, 0xa9202849), + TOBN(0x07ec4ae1, 0x6e1f4ca9), TOBN(0x27eb5875, 0xba893f11), + TOBN(0x00284d51, 0x662cc565), TOBN(0x82353a6b, 0x0db4138d), + TOBN(0xd9c7aaaa, 0xaa32a594), TOBN(0xf5528b5e, 0xa5669c47), + TOBN(0xf3220231, 0x2f23c5ff), TOBN(0xe3e8147a, 0x6affa3a1), + TOBN(0xfb423d5c, 0x202ddda0), TOBN(0x3d6414ac, 0x6b871bd4), + TOBN(0x586f82e1, 0xa51a168a), TOBN(0xb712c671, 0x48ae5448), + TOBN(0x9a2e4bd1, 0x76233eb8), TOBN(0x0188223a, 0x78811ca9), + TOBN(0x553c5e21, 0xf7c18de1), TOBN(0x7682e451, 0xb27bb286), + TOBN(0x3ed036b3, 0x0e51e929), TOBN(0xf487211b, 0xec9cb34f), + TOBN(0x0d094277, 0x0c24efc8), TOBN(0x0349fd04, 0xbef737a4), + TOBN(0x6d1c9dd2, 0x514cdd28), TOBN(0x29c135ff, 0x30da9521), + TOBN(0xea6e4508, 0xf78b0b6f), TOBN(0x176f5dd2, 0x678c143c), + TOBN(0x08148418, 0x4be21e65), TOBN(0x27f7525c, 0xe7df38c4), + TOBN(0x1fb70e09, 0x748ab1a4), TOBN(0x9cba50a0, 0x5efe4433), + TOBN(0x7846c7a6, 0x15f75af2), TOBN(0x2a7c2c57, 0x5ee73ea8), + TOBN(0x42e566a4, 0x3f0a449a), TOBN(0x45474c3b, 0xad90fc3d), + TOBN(0x7447be3d, 0x8b61d057), TOBN(0x3e9d1cf1, 0x3a4ec092), + TOBN(0x1603e453, 0xf380a6e6), TOBN(0x0b86e431, 0x9b1437c2), + TOBN(0x7a4173f2, 0xef29610a), TOBN(0x8fa729a7, 0xf03d57f7), + TOBN(0x3e186f6e, 0x6c9c217e), TOBN(0xbe1d3079, 0x91919524), + TOBN(0x92a62a70, 0x153d4fb1), TOBN(0x32ed3e34, 0xd68c2f71), + TOBN(0xd785027f, 0x9eb1a8b7), TOBN(0xbc37eb77, 0xc5b22fe8), + TOBN(0x466b34f0, 0xb9d6a191), TOBN(0x008a89af, 0x9a05f816), + TOBN(0x19b028fb, 0x7d42c10a), TOBN(0x7fe8c92f, 0x49b3f6b8), + TOBN(0x58907cc0, 0xa5a0ade3), TOBN(0xb3154f51, 0x559d1a7c), + TOBN(0x5066efb6, 0xd9790ed6), TOBN(0xa77a0cbc, 0xa6aa793b), + TOBN(0x1a915f3c, 0x223e042e), TOBN(0x1c5def04, 0x69c5874b), + TOBN(0x0e830078, 0x73b6c1da), TOBN(0x55cf85d2, 0xfcd8557a), + TOBN(0x0f7c7c76, 0x0460f3b1), TOBN(0x87052acb, 0x46e58063), + TOBN(0x09212b80, 0x907eae66), TOBN(0x3cb068e0, 0x4d721c89), + TOBN(0xa87941ae, 0xdd45ac1c), TOBN(0xde8d5c0d, 0x0daa0dbb), + TOBN(0xda421fdc, 0xe3502e6e), TOBN(0xc8944201, 0x4d89a084), + TOBN(0x7307ba5e, 0xf0c24bfb), TOBN(0xda212beb, 0x20bde0ef), + TOBN(0xea2da24b, 0xf82ce682), TOBN(0x058d3816, 0x07f71fe4), + TOBN(0x35a02462, 0x5ffad8de), TOBN(0xcd7b05dc, 0xaadcefab), + TOBN(0xd442f8ed, 0x1d9f54ec), TOBN(0x8be3d618, 0xb2d3b5ca), + TOBN(0xe2220ed0, 0xe06b2ce2), TOBN(0x82699a5f, 0x1b0da4c0), + TOBN(0x3ff106f5, 0x71c0c3a7), TOBN(0x8f580f5a, 0x0d34180c), + TOBN(0x4ebb120e, 0x22d7d375), TOBN(0x5e5782cc, 0xe9513675), + TOBN(0x2275580c, 0x99c82a70), TOBN(0xe8359fbf, 0x15ea8c4c), + TOBN(0x53b48db8, 0x7b415e70), TOBN(0xaacf2240, 0x100c6014), + TOBN(0x9faaccf5, 0xe4652f1d), TOBN(0xbd6fdd2a, 0xd56157b2), + TOBN(0xa4f4fb1f, 0x6261ec50), TOBN(0x244e55ad, 0x476bcd52), + TOBN(0x881c9305, 0x047d320b), TOBN(0x1ca983d5, 0x6181263f), + TOBN(0x354e9a44, 0x278fb8ee), TOBN(0xad2dbc0f, 0x396e4964), + TOBN(0x723f3aa2, 0x9268b3de), TOBN(0x0d1ca29a, 0xe6e0609a), + TOBN(0x794866aa, 0x6cf44252), TOBN(0x0b59f3e3, 0x01af87ed), + TOBN(0xe234e5ff, 0x7f4a6c51), TOBN(0xa8768fd2, 0x61dc2f7e), + TOBN(0xdafc7332, 0x0a94d81f), TOBN(0xd7f84282, 0x06938ce1), + TOBN(0xae0b3c0e, 0x0546063e), TOBN(0x7fbadcb2, 0x5d61abc6), + TOBN(0xd5d7a2c9, 0x369ac400), TOBN(0xa5978d09, 0xae67d10c), + TOBN(0x290f211e, 0x4f85eaac), TOBN(0xe61e2ad1, 0xfacac681), + TOBN(0xae125225, 0x388384cd), TOBN(0xa7fb68e9, 0xccfde30f), + TOBN(0x7a59b936, 0x3daed4c2), TOBN(0x80a9aa40, 0x2606f789), + TOBN(0xb40c1ea5, 0xf6a6d90a), TOBN(0x948364d3, 0x514d5885), + TOBN(0x062ebc60, 0x70985182), TOBN(0xa6db5b0e, 0x33310895), + TOBN(0x64a12175, 0xe329c2f5), TOBN(0xc5f25bd2, 0x90ea237e), + TOBN(0x7915c524, 0x2d0a4c23), TOBN(0xeb5d26e4, 0x6bb3cc52), + TOBN(0x369a9116, 0xc09e2c92), TOBN(0x0c527f92, 0xcf182cf8), + TOBN(0x9e591938, 0x2aede0ac), TOBN(0xb2922208, 0x6cc34939), + TOBN(0x3c9d8962, 0x99a34361), TOBN(0x3c81836d, 0xc1905fe6), + TOBN(0x4bfeb57f, 0xa001ec5a), TOBN(0xe993f5bb, 0xa0dc5dba), + TOBN(0x47884109, 0x724a1380), TOBN(0x8a0369ab, 0x32fe9a04), + TOBN(0xea068d60, 0x8c927db8), TOBN(0xbf5f37cf, 0x94655741), + TOBN(0x47d402a2, 0x04b6c7ea), TOBN(0x4551c295, 0x6af259cb), + TOBN(0x698b71e7, 0xed77ee8b), TOBN(0xbddf7bd0, 0xf309d5c7), + TOBN(0x6201c22c, 0x34e780ca), TOBN(0xab04f7d8, 0x4c295ef4), + TOBN(0x1c947294, 0x4313a8ce), TOBN(0xe532e4ac, 0x92ca4cfe), + TOBN(0x89738f80, 0xd0a7a97a), TOBN(0xec088c88, 0xa580fd5b), + TOBN(0x612b1ecc, 0x42ce9e51), TOBN(0x8f9840fd, 0xb25fdd2a), + TOBN(0x3cda78c0, 0x01e7f839), TOBN(0x546b3d3a, 0xece05480), + TOBN(0x271719a9, 0x80d30916), TOBN(0x45497107, 0x584c20c4), + TOBN(0xaf8f9478, 0x5bc78608), TOBN(0x28c7d484, 0x277e2a4c), + TOBN(0xfce01767, 0x88a2ffe4), TOBN(0xdc506a35, 0x28e169a5), + TOBN(0x0ea10861, 0x7af9c93a), TOBN(0x1ed24361, 0x03fa0e08), + TOBN(0x96eaaa92, 0xa3d694e7), TOBN(0xc0f43b4d, 0xef50bc74), + TOBN(0xce6aa58c, 0x64114db4), TOBN(0x8218e8ea, 0x7c000fd4), + TOBN(0xac815dfb, 0x185f8844), TOBN(0xcd7e90cb, 0x1557abfb), + TOBN(0x23d16655, 0xafbfecdf), TOBN(0x80f3271f, 0x085cac4a), + TOBN(0x7fc39aa7, 0xd0e62f47), TOBN(0x88d519d1, 0x460a48e5), + TOBN(0x59559ac4, 0xd28f101e), TOBN(0x7981d9e9, 0xca9ae816), + TOBN(0x5c38652c, 0x9ac38203), TOBN(0x86eaf87f, 0x57657fe5), + TOBN(0x568fc472, 0xe21f5416), TOBN(0x2afff39c, 0xe7e597b5), + TOBN(0x3adbbb07, 0x256d4eab), TOBN(0x22598692, 0x8285ab89), + TOBN(0x35f8112a, 0x041caefe), TOBN(0x95df02e3, 0xa5064c8b), + TOBN(0x4d63356e, 0xc7004bf3), TOBN(0x230a08f4, 0xdb83c7de), + TOBN(0xca27b270, 0x8709a7b7), TOBN(0x0d1c4cc4, 0xcb9abd2d), + TOBN(0x8a0bc66e, 0x7550fee8), TOBN(0x369cd4c7, 0x9cf7247e), + TOBN(0x75562e84, 0x92b5b7e7), TOBN(0x8fed0da0, 0x5802af7b), + TOBN(0x6a7091c2, 0xe48fb889), TOBN(0x26882c13, 0x7b8a9d06), + TOBN(0xa2498663, 0x1b82a0e2), TOBN(0x844ed736, 0x3518152d), + TOBN(0x282f476f, 0xd86e27c7), TOBN(0xa04edaca, 0x04afefdc), + TOBN(0x8b256ebc, 0x6119e34d), TOBN(0x56a413e9, 0x0787d78b),} + , + {TOBN(0x82ee061d, 0x5a74be50), TOBN(0xe41781c4, 0xdea16ff5), + TOBN(0xe0b0c81e, 0x99bfc8a2), TOBN(0x624f4d69, 0x0b547e2d), + TOBN(0x3a83545d, 0xbdcc9ae4), TOBN(0x2573dbb6, 0x409b1e8e), + TOBN(0x482960c4, 0xa6c93539), TOBN(0xf01059ad, 0x5ae18798), + TOBN(0x715c9f97, 0x3112795f), TOBN(0xe8244437, 0x984e6ee1), + TOBN(0x55cb4858, 0xecb66bcd), TOBN(0x7c136735, 0xabaffbee), + TOBN(0x54661595, 0x5dbec38e), TOBN(0x51c0782c, 0x388ad153), + TOBN(0x9ba4c53a, 0xc6e0952f), TOBN(0x27e6782a, 0x1b21dfa8), + TOBN(0x682f903d, 0x4ed2dbc2), TOBN(0x0eba59c8, 0x7c3b2d83), + TOBN(0x8e9dc84d, 0x9c7e9335), TOBN(0x5f9b21b0, 0x0eb226d7), + TOBN(0xe33bd394, 0xaf267bae), TOBN(0xaa86cc25, 0xbe2e15ae), + TOBN(0x4f0bf67d, 0x6a8ec500), TOBN(0x5846aa44, 0xf9630658), + TOBN(0xfeb09740, 0xe2c2bf15), TOBN(0x627a2205, 0xa9e99704), + TOBN(0xec8d73d0, 0xc2fbc565), TOBN(0x223eed8f, 0xc20c8de8), + TOBN(0x1ee32583, 0xa8363b49), TOBN(0x1a0b6cb9, 0xc9c2b0a6), + TOBN(0x49f7c3d2, 0x90dbc85c), TOBN(0xa8dfbb97, 0x1ef4c1ac), + TOBN(0xafb34d4c, 0x65c7c2ab), TOBN(0x1d4610e7, 0xe2c5ea84), + TOBN(0x893f6d1b, 0x973c4ab5), TOBN(0xa3cdd7e9, 0x945ba5c4), + TOBN(0x60514983, 0x064417ee), TOBN(0x1459b23c, 0xad6bdf2b), + TOBN(0x23b2c341, 0x5cf726c3), TOBN(0x3a829635, 0x32d6354a), + TOBN(0x294f901f, 0xab192c18), TOBN(0xec5fcbfe, 0x7030164f), + TOBN(0xe2e2fcb7, 0xe2246ba6), TOBN(0x1e7c88b3, 0x221a1a0c), + TOBN(0x72c7dd93, 0xc92d88c5), TOBN(0x41c2148e, 0x1106fb59), + TOBN(0x547dd4f5, 0xa0f60f14), TOBN(0xed9b52b2, 0x63960f31), + TOBN(0x6c8349eb, 0xb0a5b358), TOBN(0xb154c5c2, 0x9e7e2ed6), + TOBN(0xcad5eccf, 0xeda462db), TOBN(0xf2d6dbe4, 0x2de66b69), + TOBN(0x426aedf3, 0x8665e5b2), TOBN(0x488a8513, 0x7b7f5723), + TOBN(0x15cc43b3, 0x8bcbb386), TOBN(0x27ad0af3, 0xd791d879), + TOBN(0xc16c236e, 0x846e364f), TOBN(0x7f33527c, 0xdea50ca0), + TOBN(0xc4810775, 0x0926b86d), TOBN(0x6c2a3609, 0x0598e70c), + TOBN(0xa6755e52, 0xf024e924), TOBN(0xe0fa07a4, 0x9db4afca), + TOBN(0x15c3ce7d, 0x66831790), TOBN(0x5b4ef350, 0xa6cbb0d6), + TOBN(0x2c4aafc4, 0xb6205969), TOBN(0x42563f02, 0xf6c7854f), + TOBN(0x016aced5, 0x1d983b48), TOBN(0xfeb356d8, 0x99949755), + TOBN(0x8c2a2c81, 0xd1a39bd7), TOBN(0x8f44340f, 0xe6934ae9), + TOBN(0x148cf91c, 0x447904da), TOBN(0x7340185f, 0x0f51a926), + TOBN(0x2f8f00fb, 0x7409ab46), TOBN(0x057e78e6, 0x80e289b2), + TOBN(0x03e5022c, 0xa888e5d1), TOBN(0x3c87111a, 0x9dede4e2), + TOBN(0x5b9b0e1c, 0x7809460b), TOBN(0xe751c852, 0x71c9abc7), + TOBN(0x8b944e28, 0xc7cc1dc9), TOBN(0x4f201ffa, 0x1d3cfa08), + TOBN(0x02fc905c, 0x3e6721ce), TOBN(0xd52d70da, 0xd0b3674c), + TOBN(0x5dc2e5ca, 0x18810da4), TOBN(0xa984b273, 0x5c69dd99), + TOBN(0x63b92527, 0x84de5ca4), TOBN(0x2f1c9872, 0xc852dec4), + TOBN(0x18b03593, 0xc2e3de09), TOBN(0x19d70b01, 0x9813dc2f), + TOBN(0x42806b2d, 0xa6dc1d29), TOBN(0xd3030009, 0xf871e144), + TOBN(0xa1feb333, 0xaaf49276), TOBN(0xb5583b9e, 0xc70bc04b), + TOBN(0x1db0be78, 0x95695f20), TOBN(0xfc841811, 0x89d012b5), + TOBN(0x6409f272, 0x05f61643), TOBN(0x40d34174, 0xd5883128), + TOBN(0xd79196f5, 0x67419833), TOBN(0x6059e252, 0x863b7b08), + TOBN(0x84da1817, 0x1c56700c), TOBN(0x5758ee56, 0xb28d3ec4), + TOBN(0x7da2771d, 0x013b0ea6), TOBN(0xfddf524b, 0x54c5e9b9), + TOBN(0x7df4faf8, 0x24305d80), TOBN(0x58f5c1bf, 0x3a97763f), + TOBN(0xa5af37f1, 0x7c696042), TOBN(0xd4cba22c, 0x4a2538de), + TOBN(0x211cb995, 0x9ea42600), TOBN(0xcd105f41, 0x7b069889), + TOBN(0xb1e1cf19, 0xddb81e74), TOBN(0x472f2d89, 0x5157b8ca), + TOBN(0x086fb008, 0xee9db885), TOBN(0x365cd570, 0x0f26d131), + TOBN(0x284b02bb, 0xa2be7053), TOBN(0xdcbbf7c6, 0x7ab9a6d6), + TOBN(0x4425559c, 0x20f7a530), TOBN(0x961f2dfa, 0x188767c8), + TOBN(0xe2fd9435, 0x70dc80c4), TOBN(0x104d6b63, 0xf0784120), + TOBN(0x7f592bc1, 0x53567122), TOBN(0xf6bc1246, 0xf688ad77), + TOBN(0x05214c05, 0x0f15dde9), TOBN(0xa47a76a8, 0x0d5f2b82), + TOBN(0xbb254d30, 0x62e82b62), TOBN(0x11a05fe0, 0x3ec955ee), + TOBN(0x7eaff46e, 0x9d529b36), TOBN(0x55ab1301, 0x8f9e3df6), + TOBN(0xc463e371, 0x99317698), TOBN(0xfd251438, 0xccda47ad), + TOBN(0xca9c3547, 0x23d695ea), TOBN(0x48ce626e, 0x16e589b5), + TOBN(0x6b5b64c7, 0xb187d086), TOBN(0xd02e1794, 0xb2207948), + TOBN(0x8b58e98f, 0x7198111d), TOBN(0x90ca6305, 0xdcf9c3cc), + TOBN(0x5691fe72, 0xf34089b0), TOBN(0x60941af1, 0xfc7c80ff), + TOBN(0xa09bc0a2, 0x22eb51e5), TOBN(0xc0bb7244, 0xaa9cf09a), + TOBN(0x36a8077f, 0x80159f06), TOBN(0x8b5c989e, 0xdddc560e), + TOBN(0x19d2f316, 0x512e1f43), TOBN(0x02eac554, 0xad08ff62), + TOBN(0x012ab84c, 0x07d20b4e), TOBN(0x37d1e115, 0xd6d4e4e1), + TOBN(0xb6443e1a, 0xab7b19a8), TOBN(0xf08d067e, 0xdef8cd45), + TOBN(0x63adf3e9, 0x685e03da), TOBN(0xcf15a10e, 0x4792b916), + TOBN(0xf44bcce5, 0xb738a425), TOBN(0xebe131d5, 0x9636b2fd), + TOBN(0x94068841, 0x7850d605), TOBN(0x09684eaa, 0xb40d749d), + TOBN(0x8c3c669c, 0x72ba075b), TOBN(0x89f78b55, 0xba469015), + TOBN(0x5706aade, 0x3e9f8ba8), TOBN(0x6d8bd565, 0xb32d7ed7), + TOBN(0x25f4e63b, 0x805f08d6), TOBN(0x7f48200d, 0xc3bcc1b5), + TOBN(0x4e801968, 0xb025d847), TOBN(0x74afac04, 0x87cbe0a8), + TOBN(0x43ed2c2b, 0x7e63d690), TOBN(0xefb6bbf0, 0x0223cdb8), + TOBN(0x4fec3cae, 0x2884d3fe), TOBN(0x065ecce6, 0xd75e25a4), + TOBN(0x6c2294ce, 0x69f79071), TOBN(0x0d9a8e5f, 0x044b8666), + TOBN(0x5009f238, 0x17b69d8f), TOBN(0x3c29f8fe, 0xc5dfdaf7), + TOBN(0x9067528f, 0xebae68c4), TOBN(0x5b385632, 0x30c5ba21), + TOBN(0x540df119, 0x1fdd1aec), TOBN(0xcf37825b, 0xcfba4c78), + TOBN(0x77eff980, 0xbeb11454), TOBN(0x40a1a991, 0x60c1b066), + TOBN(0xe8018980, 0xf889a1c7), TOBN(0xb9c52ae9, 0x76c24be0), + TOBN(0x05fbbcce, 0x45650ef4), TOBN(0xae000f10, 0x8aa29ac7), + TOBN(0x884b7172, 0x4f04c470), TOBN(0x7cd4fde2, 0x19bb5c25), + TOBN(0x6477b22a, 0xe8840869), TOBN(0xa8868859, 0x5fbd0686), + TOBN(0xf23cc02e, 0x1116dfba), TOBN(0x76cd563f, 0xd87d7776), + TOBN(0xe2a37598, 0xa9d82abf), TOBN(0x5f188ccb, 0xe6c170f5), + TOBN(0x81682200, 0x5066b087), TOBN(0xda22c212, 0xc7155ada), + TOBN(0x151e5d3a, 0xfbddb479), TOBN(0x4b606b84, 0x6d715b99), + TOBN(0x4a73b54b, 0xf997cb2e), TOBN(0x9a1bfe43, 0x3ecd8b66), + TOBN(0x1c312809, 0x2a67d48a), TOBN(0xcd6a671e, 0x031fa9e2), + TOBN(0xbec3312a, 0x0e43a34a), TOBN(0x1d935639, 0x55ef47d3), + TOBN(0x5ea02489, 0x8fea73ea), TOBN(0x8247b364, 0xa035afb2), + TOBN(0xb58300a6, 0x5265b54c), TOBN(0x3286662f, 0x722c7148), + TOBN(0xb77fd76b, 0xb4ec4c20), TOBN(0xf0a12fa7, 0x0f3fe3fd), + TOBN(0xf845bbf5, 0x41d8c7e8), TOBN(0xe4d969ca, 0x5ec10aa8), + TOBN(0x4c0053b7, 0x43e232a3), TOBN(0xdc7a3fac, 0x37f8a45a), + TOBN(0x3c4261c5, 0x20d81c8f), TOBN(0xfd4b3453, 0xb00eab00), + TOBN(0x76d48f86, 0xd36e3062), TOBN(0x626c5277, 0xa143ff02), + TOBN(0x538174de, 0xaf76f42e), TOBN(0x2267aa86, 0x6407ceac), + TOBN(0xfad76351, 0x72e572d5), TOBN(0xab861af7, 0xba7330eb), + TOBN(0xa0a1c8c7, 0x418d8657), TOBN(0x988821cb, 0x20289a52), + TOBN(0x79732522, 0xcccc18ad), TOBN(0xaadf3f8d, 0xf1a6e027), + TOBN(0xf7382c93, 0x17c2354d), TOBN(0x5ce1680c, 0xd818b689), + TOBN(0x359ebbfc, 0xd9ecbee9), TOBN(0x4330689c, 0x1cae62ac), + TOBN(0xb55ce5b4, 0xc51ac38a), TOBN(0x7921dfea, 0xfe238ee8), + TOBN(0x3972bef8, 0x271d1ca5), TOBN(0x3e423bc7, 0xe8aabd18), + TOBN(0x57b09f3f, 0x44a3e5e3), TOBN(0x5da886ae, 0x7b444d66), + TOBN(0x68206634, 0xa9964375), TOBN(0x356a2fa3, 0x699cd0ff), + TOBN(0xaf0faa24, 0xdba515e9), TOBN(0x536e1f5c, 0xb321d79a), + TOBN(0xd3b9913a, 0x5c04e4ea), TOBN(0xd549dcfe, 0xd6f11513), + TOBN(0xee227bf5, 0x79fd1d94), TOBN(0x9f35afee, 0xb43f2c67), + TOBN(0xd2638d24, 0xf1314f53), TOBN(0x62baf948, 0xcabcd822), + TOBN(0x5542de29, 0x4ef48db0), TOBN(0xb3eb6a04, 0xfc5f6bb2), + TOBN(0x23c110ae, 0x1208e16a), TOBN(0x1a4d15b5, 0xf8363e24), + TOBN(0x30716844, 0x164be00b), TOBN(0xa8e24824, 0xf6f4690d), + TOBN(0x548773a2, 0x90b170cf), TOBN(0xa1bef331, 0x42f191f4), + TOBN(0x70f418d0, 0x9247aa97), TOBN(0xea06028e, 0x48be9147), + TOBN(0xe13122f3, 0xdbfb894e), TOBN(0xbe9b79f6, 0xce274b18), + TOBN(0x85a49de5, 0xca58aadf), TOBN(0x24957758, 0x11487351), + TOBN(0x111def61, 0xbb939099), TOBN(0x1d6a974a, 0x26d13694), + TOBN(0x4474b4ce, 0xd3fc253b), TOBN(0x3a1485e6, 0x4c5db15e), + TOBN(0xe79667b4, 0x147c15b4), TOBN(0xe34f553b, 0x7bc61301), + TOBN(0x032b80f8, 0x17094381), TOBN(0x55d8bafd, 0x723eaa21), + TOBN(0x5a987995, 0xf1c0e74e), TOBN(0x5a9b292e, 0xebba289c), + TOBN(0x413cd4b2, 0xeb4c8251), TOBN(0x98b5d243, 0xd162db0a), + TOBN(0xbb47bf66, 0x68342520), TOBN(0x08d68949, 0xbaa862d1), + TOBN(0x11f349c7, 0xe906abcd), TOBN(0x454ce985, 0xed7bf00e), + TOBN(0xacab5c9e, 0xb55b803b), TOBN(0xb03468ea, 0x31e3c16d), + TOBN(0x5c24213d, 0xd273bf12), TOBN(0x211538eb, 0x71587887), + TOBN(0x198e4a2f, 0x731dea2d), TOBN(0xd5856cf2, 0x74ed7b2a), + TOBN(0x86a632eb, 0x13a664fe), TOBN(0x932cd909, 0xbda41291), + TOBN(0x850e95d4, 0xc0c4ddc0), TOBN(0xc0f422f8, 0x347fc2c9), + TOBN(0xe68cbec4, 0x86076bcb), TOBN(0xf9e7c0c0, 0xcd6cd286), + TOBN(0x65994ddb, 0x0f5f27ca), TOBN(0xe85461fb, 0xa80d59ff), + TOBN(0xff05481a, 0x66601023), TOBN(0xc665427a, 0xfc9ebbfb), + TOBN(0xb0571a69, 0x7587fd52), TOBN(0x935289f8, 0x8d49efce), + TOBN(0x61becc60, 0xea420688), TOBN(0xb22639d9, 0x13a786af), + TOBN(0x1a8e6220, 0x361ecf90), TOBN(0x001f23e0, 0x25506463), + TOBN(0xe4ae9b5d, 0x0a5c2b79), TOBN(0xebc9cdad, 0xd8149db5), + TOBN(0xb33164a1, 0x934aa728), TOBN(0x750eb00e, 0xae9b60f3), + TOBN(0x5a91615b, 0x9b9cfbfd), TOBN(0x97015cbf, 0xef45f7f6), + TOBN(0xb462c4a5, 0xbf5151df), TOBN(0x21adcc41, 0xb07118f2), + TOBN(0xd60c545b, 0x043fa42c), TOBN(0xfc21aa54, 0xe96be1ab), + TOBN(0xe84bc32f, 0x4e51ea80), TOBN(0x3dae45f0, 0x259b5d8d), + TOBN(0xbb73c7eb, 0xc38f1b5e), TOBN(0xe405a74a, 0xe8ae617d), + TOBN(0xbb1ae9c6, 0x9f1c56bd), TOBN(0x8c176b98, 0x49f196a4), + TOBN(0xc448f311, 0x6875092b), TOBN(0xb5afe3de, 0x9f976033), + TOBN(0xa8dafd49, 0x145813e5), TOBN(0x687fc4d9, 0xe2b34226), + TOBN(0xf2dfc92d, 0x4c7ff57f), TOBN(0x004e3fc1, 0x401f1b46), + TOBN(0x5afddab6, 0x1430c9ab), TOBN(0x0bdd41d3, 0x2238e997), + TOBN(0xf0947430, 0x418042ae), TOBN(0x71f9adda, 0xcdddc4cb), + TOBN(0x7090c016, 0xc52dd907), TOBN(0xd9bdf44d, 0x29e2047f), + TOBN(0xe6f1fe80, 0x1b1011a6), TOBN(0xb63accbc, 0xd9acdc78), + TOBN(0xcfc7e235, 0x1272a95b), TOBN(0x0c667717, 0xa6276ac8), + TOBN(0x3c0d3709, 0xe2d7eef7), TOBN(0x5add2b06, 0x9a685b3e), + TOBN(0x363ad32d, 0x14ea5d65), TOBN(0xf8e01f06, 0x8d7dd506), + TOBN(0xc9ea2213, 0x75b4aac6), TOBN(0xed2a2bf9, 0x0d353466), + TOBN(0x439d79b5, 0xe9d3a7c3), TOBN(0x8e0ee5a6, 0x81b7f34b), + TOBN(0xcf3dacf5, 0x1dc4ba75), TOBN(0x1d3d1773, 0xeb3310c7), + TOBN(0xa8e67112, 0x7747ae83), TOBN(0x31f43160, 0x197d6b40), + TOBN(0x0521ccee, 0xcd961400), TOBN(0x67246f11, 0xf6535768), + TOBN(0x702fcc5a, 0xef0c3133), TOBN(0x247cc45d, 0x7e16693b), + TOBN(0xfd484e49, 0xc729b749), TOBN(0x522cef7d, 0xb218320f), + TOBN(0xe56ef405, 0x59ab93b3), TOBN(0x225fba11, 0x9f181071), + TOBN(0x33bd6595, 0x15330ed0), TOBN(0xc4be69d5, 0x1ddb32f7), + TOBN(0x264c7668, 0x0448087c), TOBN(0xac30903f, 0x71432dae), + TOBN(0x3851b266, 0x00f9bf47), TOBN(0x400ed311, 0x6cdd6d03), + TOBN(0x045e79fe, 0xf8fd2424), TOBN(0xfdfd974a, 0xfa6da98b), + TOBN(0x45c9f641, 0x0c1e673a), TOBN(0x76f2e733, 0x5b2c5168), + TOBN(0x1adaebb5, 0x2a601753), TOBN(0xb286514c, 0xc57c2d49), + TOBN(0xd8769670, 0x1e0bfd24), TOBN(0x950c547e, 0x04478922), + TOBN(0xd1d41969, 0xe5d32bfe), TOBN(0x30bc1472, 0x750d6c3e), + TOBN(0x8f3679fe, 0xe0e27f3a), TOBN(0x8f64a7dc, 0xa4a6ee0c), + TOBN(0x2fe59937, 0x633dfb1f), TOBN(0xea82c395, 0x977f2547), + TOBN(0xcbdfdf1a, 0x661ea646), TOBN(0xc7ccc591, 0xb9085451), + TOBN(0x82177962, 0x81761e13), TOBN(0xda57596f, 0x9196885c), + TOBN(0xbc17e849, 0x28ffbd70), TOBN(0x1e6e0a41, 0x2671d36f), + TOBN(0x61ae872c, 0x4152fcf5), TOBN(0x441c87b0, 0x9e77e754), + TOBN(0xd0799dd5, 0xa34dff09), TOBN(0x766b4e44, 0x88a6b171), + TOBN(0xdc06a512, 0x11f1c792), TOBN(0xea02ae93, 0x4be35c3e), + TOBN(0xe5ca4d6d, 0xe90c469e), TOBN(0x4df4368e, 0x56e4ff5c), + TOBN(0x7817acab, 0x4baef62e), TOBN(0x9f5a2202, 0xa85b91e8), + TOBN(0x9666ebe6, 0x6ce57610), TOBN(0x32ad31f3, 0xf73bfe03), + TOBN(0x628330a4, 0x25bcf4d6), TOBN(0xea950593, 0x515056e6), + TOBN(0x59811c89, 0xe1332156), TOBN(0xc89cf1fe, 0x8c11b2d7), + TOBN(0x75b63913, 0x04e60cc0), TOBN(0xce811e8d, 0x4625d375), + TOBN(0x030e43fc, 0x2d26e562), TOBN(0xfbb30b4b, 0x608d36a0), + TOBN(0x634ff82c, 0x48528118), TOBN(0x7c6fe085, 0xcd285911), + TOBN(0x7f2830c0, 0x99358f28), TOBN(0x2e60a95e, 0x665e6c09), + TOBN(0x08407d3d, 0x9b785dbf), TOBN(0x530889ab, 0xa759bce7), + TOBN(0xf228e0e6, 0x52f61239), TOBN(0x2b6d1461, 0x6879be3c), + TOBN(0xe6902c04, 0x51a7bbf7), TOBN(0x30ad99f0, 0x76f24a64), + TOBN(0x66d9317a, 0x98bc6da0), TOBN(0xf4f877f3, 0xcb596ac0), + TOBN(0xb05ff62d, 0x4c44f119), TOBN(0x4555f536, 0xe9b77416), + TOBN(0xc7c0d059, 0x8caed63b), TOBN(0x0cd2b7ce, 0xc358b2a9), + TOBN(0x3f33287b, 0x46945fa3), TOBN(0xf8785b20, 0xd67c8791), + TOBN(0xc54a7a61, 0x9637bd08), TOBN(0x54d4598c, 0x18be79d7), + TOBN(0x889e5acb, 0xc46d7ce1), TOBN(0x9a515bb7, 0x8b085877), + TOBN(0xfac1a03d, 0x0b7a5050), TOBN(0x7d3e738a, 0xf2926035), + TOBN(0x861cc2ce, 0x2a6cb0eb), TOBN(0x6f2e2955, 0x8f7adc79), + TOBN(0x61c4d451, 0x33016376), TOBN(0xd9fd2c80, 0x5ad59090), + TOBN(0xe5a83738, 0xb2b836a1), TOBN(0x855b41a0, 0x7c0d6622), + TOBN(0x186fe317, 0x7cc19af1), TOBN(0x6465c1ff, 0xfdd99acb), + TOBN(0x46e5c23f, 0x6974b99e), TOBN(0x75a7cf8b, 0xa2717cbe), + TOBN(0x4d2ebc3f, 0x062be658), TOBN(0x094b4447, 0x5f209c98), + TOBN(0x4af285ed, 0xb940cb5a), TOBN(0x6706d792, 0x7cc82f10), + TOBN(0xc8c8776c, 0x030526fa), TOBN(0xfa8e6f76, 0xa0da9140), + TOBN(0x77ea9d34, 0x591ee4f0), TOBN(0x5f46e337, 0x40274166), + TOBN(0x1bdf98bb, 0xea671457), TOBN(0xd7c08b46, 0x862a1fe2), + TOBN(0x46cc303c, 0x1c08ad63), TOBN(0x99543440, 0x4c845e7b), + TOBN(0x1b8fbdb5, 0x48f36bf7), TOBN(0x5b82c392, 0x8c8273a7), + TOBN(0x08f712c4, 0x928435d5), TOBN(0x071cf0f1, 0x79330380), + TOBN(0xc74c2d24, 0xa8da054a), TOBN(0xcb0e7201, 0x43c46b5c), + TOBN(0x0ad7337a, 0xc0b7eff3), TOBN(0x8552225e, 0xc5e48b3c), + TOBN(0xe6f78b0c, 0x73f13a5f), TOBN(0x5e70062e, 0x82349cbe), + TOBN(0x6b8d5048, 0xe7073969), TOBN(0x392d2a29, 0xc33cb3d2), + TOBN(0xee4f727c, 0x4ecaa20f), TOBN(0xa068c99e, 0x2ccde707), + TOBN(0xfcd5651f, 0xb87a2913), TOBN(0xea3e3c15, 0x3cc252f0), + TOBN(0x777d92df, 0x3b6cd3e4), TOBN(0x7a414143, 0xc5a732e7), + TOBN(0xa895951a, 0xa71ff493), TOBN(0xfe980c92, 0xbbd37cf6), + TOBN(0x45bd5e64, 0xdecfeeff), TOBN(0x910dc2a9, 0xa44c43e9), + TOBN(0xcb403f26, 0xcca9f54d), TOBN(0x928bbdfb, 0x9303f6db), + TOBN(0x3c37951e, 0xa9eee67c), TOBN(0x3bd61a52, 0xf79961c3), + TOBN(0x09a238e6, 0x395c9a79), TOBN(0x6940ca2d, 0x61eb352d), + TOBN(0x7d1e5c5e, 0xc1875631), TOBN(0x1e19742c, 0x1e1b20d1), + TOBN(0x4633d908, 0x23fc2e6e), TOBN(0xa76e29a9, 0x08959149), + TOBN(0x61069d9c, 0x84ed7da5), TOBN(0x0baa11cf, 0x5dbcad51), + TOBN(0xd01eec64, 0x961849da), TOBN(0x93b75f1f, 0xaf3d8c28), + TOBN(0x57bc4f9f, 0x1ca2ee44), TOBN(0x5a26322d, 0x00e00558), + TOBN(0x1888d658, 0x61a023ef), TOBN(0x1d72aab4, 0xb9e5246e), + TOBN(0xa9a26348, 0xe5563ec0), TOBN(0xa0971963, 0xc3439a43), + TOBN(0x567dd54b, 0xadb9b5b7), TOBN(0x73fac1a1, 0xc45a524b), + TOBN(0x8fe97ef7, 0xfe38e608), TOBN(0x608748d2, 0x3f384f48), + TOBN(0xb0571794, 0xc486094f), TOBN(0x869254a3, 0x8bf3a8d6), + TOBN(0x148a8dd1, 0x310b0e25), TOBN(0x99ab9f3f, 0x9aa3f7d8), + TOBN(0x0927c68a, 0x6706c02e), TOBN(0x22b5e76c, 0x69790e6c), + TOBN(0x6c325260, 0x6c71376c), TOBN(0x53a57690, 0x09ef6657), + TOBN(0x8d63f852, 0xedffcf3a), TOBN(0xb4d2ed04, 0x3c0a6f55), + TOBN(0xdb3aa8de, 0x12519b9e), TOBN(0x5d38e9c4, 0x1e0a569a), + TOBN(0x871528bf, 0x303747e2), TOBN(0xa208e77c, 0xf5b5c18d), + TOBN(0x9d129c88, 0xca6bf923), TOBN(0xbcbf197f, 0xbf02839f), + TOBN(0x9b9bf030, 0x27323194), TOBN(0x3b055a8b, 0x339ca59d), + TOBN(0xb46b2312, 0x0f669520), TOBN(0x19789f1f, 0x497e5f24), + TOBN(0x9c499468, 0xaaf01801), TOBN(0x72ee1190, 0x8b69d59c), + TOBN(0x8bd39595, 0xacf4c079), TOBN(0x3ee11ece, 0x8e0cd048), + TOBN(0xebde86ec, 0x1ed66f18), TOBN(0x225d906b, 0xd61fce43), + TOBN(0x5cab07d6, 0xe8bed74d), TOBN(0x16e4617f, 0x27855ab7), + TOBN(0x6568aadd, 0xb2fbc3dd), TOBN(0xedb5484f, 0x8aeddf5b), + TOBN(0x878f20e8, 0x6dcf2fad), TOBN(0x3516497c, 0x615f5699),} + , + {TOBN(0xef0a3fec, 0xfa181e69), TOBN(0x9ea02f81, 0x30d69a98), + TOBN(0xb2e9cf8e, 0x66eab95d), TOBN(0x520f2beb, 0x24720021), + TOBN(0x621c540a, 0x1df84361), TOBN(0x12037721, 0x71fa6d5d), + TOBN(0x6e3c7b51, 0x0ff5f6ff), TOBN(0x817a069b, 0xabb2bef3), + TOBN(0x83572fb6, 0xb294cda6), TOBN(0x6ce9bf75, 0xb9039f34), + TOBN(0x20e012f0, 0x095cbb21), TOBN(0xa0aecc1b, 0xd063f0da), + TOBN(0x57c21c3a, 0xf02909e5), TOBN(0xc7d59ecf, 0x48ce9cdc), + TOBN(0x2732b844, 0x8ae336f8), TOBN(0x056e3723, 0x3f4f85f4), + TOBN(0x8a10b531, 0x89e800ca), TOBN(0x50fe0c17, 0x145208fd), + TOBN(0x9e43c0d3, 0xb714ba37), TOBN(0x427d200e, 0x34189acc), + TOBN(0x05dee24f, 0xe616e2c0), TOBN(0x9c25f4c8, 0xee1854c1), + TOBN(0x4d3222a5, 0x8f342a73), TOBN(0x0807804f, 0xa027c952), + TOBN(0xc222653a, 0x4f0d56f3), TOBN(0x961e4047, 0xca28b805), + TOBN(0x2c03f8b0, 0x4a73434b), TOBN(0x4c966787, 0xab712a19), + TOBN(0xcc196c42, 0x864fee42), TOBN(0xc1be93da, 0x5b0ece5c), + TOBN(0xa87d9f22, 0xc131c159), TOBN(0x2bb6d593, 0xdce45655), + TOBN(0x22c49ec9, 0xb809b7ce), TOBN(0x8a41486b, 0xe2c72c2c), + TOBN(0x813b9420, 0xfea0bf36), TOBN(0xb3d36ee9, 0xa66dac69), + TOBN(0x6fddc08a, 0x328cc987), TOBN(0x0a3bcd2c, 0x3a326461), + TOBN(0x7103c49d, 0xd810dbba), TOBN(0xf9d81a28, 0x4b78a4c4), + TOBN(0x3de865ad, 0xe4d55941), TOBN(0xdedafa5e, 0x30384087), + TOBN(0x6f414abb, 0x4ef18b9b), TOBN(0x9ee9ea42, 0xfaee5268), + TOBN(0x260faa16, 0x37a55a4a), TOBN(0xeb19a514, 0x015f93b9), + TOBN(0x51d7ebd2, 0x9e9c3598), TOBN(0x523fc56d, 0x1932178e), + TOBN(0x501d070c, 0xb98fe684), TOBN(0xd60fbe9a, 0x124a1458), + TOBN(0xa45761c8, 0x92bc6b3f), TOBN(0xf5384858, 0xfe6f27cb), + TOBN(0x4b0271f7, 0xb59e763b), TOBN(0x3d4606a9, 0x5b5a8e5e), + TOBN(0x1eda5d9b, 0x05a48292), TOBN(0xda7731d0, 0xe6fec446), + TOBN(0xa3e33693, 0x90d45871), TOBN(0xe9764040, 0x06166d8d), + TOBN(0xb5c33682, 0x89a90403), TOBN(0x4bd17983, 0x72f1d637), + TOBN(0xa616679e, 0xd5d2c53a), TOBN(0x5ec4bcd8, 0xfdcf3b87), + TOBN(0xae6d7613, 0xb66a694e), TOBN(0x7460fc76, 0xe3fc27e5), + TOBN(0x70469b82, 0x95caabee), TOBN(0xde024ca5, 0x889501e3), + TOBN(0x6bdadc06, 0x076ed265), TOBN(0x0cb1236b, 0x5a0ef8b2), + TOBN(0x4065ddbf, 0x0972ebf9), TOBN(0xf1dd3875, 0x22aca432), + TOBN(0xa88b97cf, 0x744aff76), TOBN(0xd1359afd, 0xfe8e3d24), + TOBN(0x52a3ba2b, 0x91502cf3), TOBN(0x2c3832a8, 0x084db75d), + TOBN(0x04a12ddd, 0xde30b1c9), TOBN(0x7802eabc, 0xe31fd60c), + TOBN(0x33707327, 0xa37fddab), TOBN(0x65d6f2ab, 0xfaafa973), + TOBN(0x3525c5b8, 0x11e6f91a), TOBN(0x76aeb0c9, 0x5f46530b), + TOBN(0xe8815ff6, 0x2f93a675), TOBN(0xa6ec9684, 0x05f48679), + TOBN(0x6dcbb556, 0x358ae884), TOBN(0x0af61472, 0xe19e3873), + TOBN(0x72334372, 0xa5f696be), TOBN(0xc65e57ea, 0x6f22fb70), + TOBN(0x268da30c, 0x946cea90), TOBN(0x136a8a87, 0x65681b2a), + TOBN(0xad5e81dc, 0x0f9f44d4), TOBN(0xf09a6960, 0x2c46585a), + TOBN(0xd1649164, 0xc447d1b1), TOBN(0x3b4b36c8, 0x879dc8b1), + TOBN(0x20d4177b, 0x3b6b234c), TOBN(0x096a2505, 0x1730d9d0), + TOBN(0x0611b9b8, 0xef80531d), TOBN(0xba904b3b, 0x64bb495d), + TOBN(0x1192d9d4, 0x93a3147a), TOBN(0x9f30a5dc, 0x9a565545), + TOBN(0x90b1f9cb, 0x6ef07212), TOBN(0x29958546, 0x0d87fc13), + TOBN(0xd3323eff, 0xc17db9ba), TOBN(0xcb18548c, 0xcb1644a8), + TOBN(0x18a306d4, 0x4f49ffbc), TOBN(0x28d658f1, 0x4c2e8684), + TOBN(0x44ba60cd, 0xa99f8c71), TOBN(0x67b7abdb, 0x4bf742ff), + TOBN(0x66310f9c, 0x914b3f99), TOBN(0xae430a32, 0xf412c161), + TOBN(0x1e6776d3, 0x88ace52f), TOBN(0x4bc0fa24, 0x52d7067d), + TOBN(0x03c286aa, 0x8f07cd1b), TOBN(0x4cb8f38c, 0xa985b2c1), + TOBN(0x83ccbe80, 0x8c3bff36), TOBN(0x005a0bd2, 0x5263e575), + TOBN(0x460d7dda, 0x259bdcd1), TOBN(0x4a1c5642, 0xfa5cab6b), + TOBN(0x2b7bdbb9, 0x9fe4fc88), TOBN(0x09418e28, 0xcc97bbb5), + TOBN(0xd8274fb4, 0xa12321ae), TOBN(0xb137007d, 0x5c87b64e), + TOBN(0x80531fe1, 0xc63c4962), TOBN(0x50541e89, 0x981fdb25), + TOBN(0xdc1291a1, 0xfd4c2b6b), TOBN(0xc0693a17, 0xa6df4fca), + TOBN(0xb2c4604e, 0x0117f203), TOBN(0x245f1963, 0x0a99b8d0), + TOBN(0xaedc20aa, 0xc6212c44), TOBN(0xb1ed4e56, 0x520f52a8), + TOBN(0xfe48f575, 0xf8547be3), TOBN(0x0a7033cd, 0xa9e45f98), + TOBN(0x4b45d3a9, 0x18c50100), TOBN(0xb2a6cd6a, 0xa61d41da), + TOBN(0x60bbb4f5, 0x57933c6b), TOBN(0xa7538ebd, 0x2b0d7ffc), + TOBN(0x9ea3ab8d, 0x8cd626b6), TOBN(0x8273a484, 0x3601625a), + TOBN(0x88859845, 0x0168e508), TOBN(0x8cbc9bb2, 0x99a94abd), + TOBN(0x713ac792, 0xfab0a671), TOBN(0xa3995b19, 0x6c9ebffc), + TOBN(0xe711668e, 0x1239e152), TOBN(0x56892558, 0xbbb8dff4), + TOBN(0x8bfc7dab, 0xdbf17963), TOBN(0x5b59fe5a, 0xb3de1253), + TOBN(0x7e3320eb, 0x34a9f7ae), TOBN(0xe5e8cf72, 0xd751efe4), + TOBN(0x7ea003bc, 0xd9be2f37), TOBN(0xc0f551a0, 0xb6c08ef7), + TOBN(0x56606268, 0x038f6725), TOBN(0x1dd38e35, 0x6d92d3b6), + TOBN(0x07dfce7c, 0xc3cbd686), TOBN(0x4e549e04, 0x651c5da8), + TOBN(0x4058f93b, 0x08b19340), TOBN(0xc2fae6f4, 0xcac6d89d), + TOBN(0x4bad8a8c, 0x8f159cc7), TOBN(0x0ddba4b3, 0xcb0b601c), + TOBN(0xda4fc7b5, 0x1dd95f8c), TOBN(0x1d163cd7, 0xcea5c255), + TOBN(0x30707d06, 0x274a8c4c), TOBN(0x79d9e008, 0x2802e9ce), + TOBN(0x02a29ebf, 0xe6ddd505), TOBN(0x37064e74, 0xb50bed1a), + TOBN(0x3f6bae65, 0xa7327d57), TOBN(0x3846f5f1, 0xf83920bc), + TOBN(0x87c37491, 0x60df1b9b), TOBN(0x4cfb2895, 0x2d1da29f), + TOBN(0x10a478ca, 0x4ed1743c), TOBN(0x390c6030, 0x3edd47c6), + TOBN(0x8f3e5312, 0x8c0a78de), TOBN(0xccd02bda, 0x1e85df70), + TOBN(0xd6c75c03, 0xa61b6582), TOBN(0x0762921c, 0xfc0eebd1), + TOBN(0xd34d0823, 0xd85010c0), TOBN(0xd73aaacb, 0x0044cf1f), + TOBN(0xfb4159bb, 0xa3b5e78a), TOBN(0x2287c7f7, 0xe5826f3f), + TOBN(0x4aeaf742, 0x580b1a01), TOBN(0xf080415d, 0x60423b79), + TOBN(0xe12622cd, 0xa7dea144), TOBN(0x49ea4996, 0x59d62472), + TOBN(0xb42991ef, 0x571f3913), TOBN(0x0610f214, 0xf5b25a8a), + TOBN(0x47adc585, 0x30b79e8f), TOBN(0xf90e3df6, 0x07a065a2), + TOBN(0x5d0a5deb, 0x43e2e034), TOBN(0x53fb5a34, 0x444024aa), + TOBN(0xa8628c68, 0x6b0c9f7f), TOBN(0x9c69c29c, 0xac563656), + TOBN(0x5a231feb, 0xbace47b6), TOBN(0xbdce0289, 0x9ea5a2ec), + TOBN(0x05da1fac, 0x9463853e), TOBN(0x96812c52, 0x509e78aa), + TOBN(0xd3fb5771, 0x57151692), TOBN(0xeb2721f8, 0xd98e1c44), + TOBN(0xc0506087, 0x32399be1), TOBN(0xda5a5511, 0xd979d8b8), + TOBN(0x737ed55d, 0xc6f56780), TOBN(0xe20d3004, 0x0dc7a7f4), + TOBN(0x02ce7301, 0xf5941a03), TOBN(0x91ef5215, 0xed30f83a), + TOBN(0x28727fc1, 0x4092d85f), TOBN(0x72d223c6, 0x5c49e41a), + TOBN(0xa7cf30a2, 0xba6a4d81), TOBN(0x7c086209, 0xb030d87d), + TOBN(0x04844c7d, 0xfc588b09), TOBN(0x728cd499, 0x5874bbb0), + TOBN(0xcc1281ee, 0xe84c0495), TOBN(0x0769b5ba, 0xec31958f), + TOBN(0x665c228b, 0xf99c2471), TOBN(0xf2d8a11b, 0x191eb110), + TOBN(0x4594f494, 0xd36d7024), TOBN(0x482ded8b, 0xcdcb25a1), + TOBN(0xc958a9d8, 0xdadd4885), TOBN(0x7004477e, 0xf1d2b547), + TOBN(0x0a45f6ef, 0x2a0af550), TOBN(0x4fc739d6, 0x2f8d6351), + TOBN(0x75cdaf27, 0x786f08a9), TOBN(0x8700bb26, 0x42c2737f), + TOBN(0x855a7141, 0x1c4e2670), TOBN(0x810188c1, 0x15076fef), + TOBN(0xc251d0c9, 0xabcd3297), TOBN(0xae4c8967, 0xf48108eb), + TOBN(0xbd146de7, 0x18ceed30), TOBN(0xf9d4f07a, 0xc986bced), + TOBN(0x5ad98ed5, 0x83fa1e08), TOBN(0x7780d33e, 0xbeabd1fb), + TOBN(0xe330513c, 0x903b1196), TOBN(0xba11de9e, 0xa47bc8c4), + TOBN(0x684334da, 0x02c2d064), TOBN(0x7ecf360d, 0xa48de23b), + TOBN(0x57a1b474, 0x0a9089d8), TOBN(0xf28fa439, 0xff36734c), + TOBN(0xf2a482cb, 0xea4570b3), TOBN(0xee65d68b, 0xa5ebcee9), + TOBN(0x988d0036, 0xb9694cd5), TOBN(0x53edd0e9, 0x37885d32), + TOBN(0xe37e3307, 0xbeb9bc6d), TOBN(0xe9abb907, 0x9f5c6768), + TOBN(0x4396ccd5, 0x51f2160f), TOBN(0x2500888c, 0x47336da6), + TOBN(0x383f9ed9, 0x926fce43), TOBN(0x809dd1c7, 0x04da2930), + TOBN(0x30f6f596, 0x8a4cb227), TOBN(0x0d700c7f, 0x73a56b38), + TOBN(0x1825ea33, 0xab64a065), TOBN(0xaab9b735, 0x1338df80), + TOBN(0x1516100d, 0x9b63f57f), TOBN(0x2574395a, 0x27a6a634), + TOBN(0xb5560fb6, 0x700a1acd), TOBN(0xe823fd73, 0xfd999681), + TOBN(0xda915d1f, 0x6cb4e1ba), TOBN(0x0d030118, 0x6ebe00a3), + TOBN(0x744fb0c9, 0x89fca8cd), TOBN(0x970d01db, 0xf9da0e0b), + TOBN(0x0ad8c564, 0x7931d76f), TOBN(0xb15737bf, 0xf659b96a), + TOBN(0xdc9933e8, 0xa8b484e7), TOBN(0xb2fdbdf9, 0x7a26dec7), + TOBN(0x2349e9a4, 0x9f1f0136), TOBN(0x7860368e, 0x70fddddb), + TOBN(0xd93d2c1c, 0xf9ad3e18), TOBN(0x6d6c5f17, 0x689f4e79), + TOBN(0x7a544d91, 0xb24ff1b6), TOBN(0x3e12a5eb, 0xfe16cd8c), + TOBN(0x543574e9, 0xa56b872f), TOBN(0xa1ad550c, 0xfcf68ea2), + TOBN(0x689e37d2, 0x3f560ef7), TOBN(0x8c54b9ca, 0xc9d47a8b), + TOBN(0x46d40a4a, 0x088ac342), TOBN(0xec450c7c, 0x1576c6d0), + TOBN(0xb589e31c, 0x1f9689e9), TOBN(0xdacf2602, 0xb8781718), + TOBN(0xa89237c6, 0xc8cb6b42), TOBN(0x1326fc93, 0xb96ef381), + TOBN(0x55d56c6d, 0xb5f07825), TOBN(0xacba2eea, 0x7449e22d), + TOBN(0x74e0887a, 0x633c3000), TOBN(0xcb6cd172, 0xd7cbcf71), + TOBN(0x309e81de, 0xc36cf1be), TOBN(0x07a18a6d, 0x60ae399b), + TOBN(0xb36c2679, 0x9edce57e), TOBN(0x52b892f4, 0xdf001d41), + TOBN(0xd884ae5d, 0x16a1f2c6), TOBN(0x9b329424, 0xefcc370a), + TOBN(0x3120daf2, 0xbd2e21df), TOBN(0x55298d2d, 0x02470a99), + TOBN(0x0b78af6c, 0xa05db32e), TOBN(0x5c76a331, 0x601f5636), + TOBN(0xaae861ff, 0xf8a4f29c), TOBN(0x70dc9240, 0xd68f8d49), + TOBN(0x960e649f, 0x81b1321c), TOBN(0x3d2c801b, 0x8792e4ce), + TOBN(0xf479f772, 0x42521876), TOBN(0x0bed93bc, 0x416c79b1), + TOBN(0xa67fbc05, 0x263e5bc9), TOBN(0x01e8e630, 0x521db049), + TOBN(0x76f26738, 0xc6f3431e), TOBN(0xe609cb02, 0xe3267541), + TOBN(0xb10cff2d, 0x818c877c), TOBN(0x1f0e75ce, 0x786a13cb), + TOBN(0xf4fdca64, 0x1158544d), TOBN(0x5d777e89, 0x6cb71ed0), + TOBN(0x3c233737, 0xa9aa4755), TOBN(0x7b453192, 0xe527ab40), + TOBN(0xdb59f688, 0x39f05ffe), TOBN(0x8f4f4be0, 0x6d82574e), + TOBN(0xcce3450c, 0xee292d1b), TOBN(0xaa448a12, 0x61ccd086), + TOBN(0xabce91b3, 0xf7914967), TOBN(0x4537f09b, 0x1908a5ed), + TOBN(0xa812421e, 0xf51042e7), TOBN(0xfaf5cebc, 0xec0b3a34), + TOBN(0x730ffd87, 0x4ca6b39a), TOBN(0x70fb72ed, 0x02efd342), + TOBN(0xeb4735f9, 0xd75c8edb), TOBN(0xc11f2157, 0xc278aa51), + TOBN(0xc459f635, 0xbf3bfebf), TOBN(0x3a1ff0b4, 0x6bd9601f), + TOBN(0xc9d12823, 0xc420cb73), TOBN(0x3e9af3e2, 0x3c2915a3), + TOBN(0xe0c82c72, 0xb41c3440), TOBN(0x175239e5, 0xe3039a5f), + TOBN(0xe1084b8a, 0x558795a3), TOBN(0x328d0a1d, 0xd01e5c60), + TOBN(0x0a495f2e, 0xd3788a04), TOBN(0x25d8ff16, 0x66c11a9f), + TOBN(0xf5155f05, 0x9ed692d6), TOBN(0x954fa107, 0x4f425fe4), + TOBN(0xd16aabf2, 0xe98aaa99), TOBN(0x90cd8ba0, 0x96b0f88a), + TOBN(0x957f4782, 0xc154026a), TOBN(0x54ee0734, 0x52af56d2), + TOBN(0xbcf89e54, 0x45b4147a), TOBN(0x3d102f21, 0x9a52816c), + TOBN(0x6808517e, 0x39b62e77), TOBN(0x92e25421, 0x69169ad8), + TOBN(0xd721d871, 0xbb608558), TOBN(0x60e4ebae, 0xf6d4ff9b), + TOBN(0x0ba10819, 0x41f2763e), TOBN(0xca2e45be, 0x51ee3247), + TOBN(0x66d172ec, 0x2bfd7a5f), TOBN(0x528a8f2f, 0x74d0b12d), + TOBN(0xe17f1e38, 0xdabe70dc), TOBN(0x1d5d7316, 0x9f93983c), + TOBN(0x51b2184a, 0xdf423e31), TOBN(0xcb417291, 0xaedb1a10), + TOBN(0x2054ca93, 0x625bcab9), TOBN(0x54396860, 0xa98998f0), + TOBN(0x4e53f6c4, 0xa54ae57e), TOBN(0x0ffeb590, 0xee648e9d), + TOBN(0xfbbdaadc, 0x6afaf6bc), TOBN(0xf88ae796, 0xaa3bfb8a), + TOBN(0x209f1d44, 0xd2359ed9), TOBN(0xac68dd03, 0xf3544ce2), + TOBN(0xf378da47, 0xfd51e569), TOBN(0xe1abd860, 0x2cc80097), + TOBN(0x23ca18d9, 0x343b6e3a), TOBN(0x480797e8, 0xb40a1bae), + TOBN(0xd1f0c717, 0x533f3e67), TOBN(0x44896970, 0x06e6cdfc), + TOBN(0x8ca21055, 0x52a82e8d), TOBN(0xb2caf785, 0x78460cdc), + TOBN(0x4c1b7b62, 0xe9037178), TOBN(0xefc09d2c, 0xdb514b58), + TOBN(0x5f2df9ee, 0x9113be5c), TOBN(0x2fbda78f, 0xb3f9271c), + TOBN(0xe09a81af, 0x8f83fc54), TOBN(0x06b13866, 0x8afb5141), + TOBN(0x38f6480f, 0x43e3865d), TOBN(0x72dd77a8, 0x1ddf47d9), + TOBN(0xf2a8e971, 0x4c205ff7), TOBN(0x46d449d8, 0x9d088ad8), + TOBN(0x926619ea, 0x185d706f), TOBN(0xe47e02eb, 0xc7dd7f62), + TOBN(0xe7f120a7, 0x8cbc2031), TOBN(0xc18bef00, 0x998d4ac9), + TOBN(0x18f37a9c, 0x6bdf22da), TOBN(0xefbc432f, 0x90dc82df), + TOBN(0xc52cef8e, 0x5d703651), TOBN(0x82887ba0, 0xd99881a5), + TOBN(0x7cec9dda, 0xb920ec1d), TOBN(0xd0d7e8c3, 0xec3e8d3b), + TOBN(0x445bc395, 0x4ca88747), TOBN(0xedeaa2e0, 0x9fd53535), + TOBN(0x461b1d93, 0x6cc87475), TOBN(0xd92a52e2, 0x6d2383bd), + TOBN(0xfabccb59, 0xd7903546), TOBN(0x6111a761, 0x3d14b112), + TOBN(0x0ae584fe, 0xb3d5f612), TOBN(0x5ea69b8d, 0x60e828ec), + TOBN(0x6c078985, 0x54087030), TOBN(0x649cab04, 0xac4821fe), + TOBN(0x25ecedcf, 0x8bdce214), TOBN(0xb5622f72, 0x86af7361), + TOBN(0x0e1227aa, 0x7038b9e2), TOBN(0xd0efb273, 0xac20fa77), + TOBN(0x817ff88b, 0x79df975b), TOBN(0x856bf286, 0x1999503e), + TOBN(0xb4d5351f, 0x5038ec46), TOBN(0x740a52c5, 0xfc42af6e), + TOBN(0x2e38bb15, 0x2cbb1a3f), TOBN(0xc3eb99fe, 0x17a83429), + TOBN(0xca4fcbf1, 0xdd66bb74), TOBN(0x880784d6, 0xcde5e8fc), + TOBN(0xddc84c1c, 0xb4e7a0be), TOBN(0x8780510d, 0xbd15a72f), + TOBN(0x44bcf1af, 0x81ec30e1), TOBN(0x141e50a8, 0x0a61073e), + TOBN(0x0d955718, 0x47be87ae), TOBN(0x68a61417, 0xf76a4372), + TOBN(0xf57e7e87, 0xc607c3d3), TOBN(0x043afaf8, 0x5252f332), + TOBN(0xcc14e121, 0x1552a4d2), TOBN(0xb6dee692, 0xbb4d4ab4), + TOBN(0xb6ab74c8, 0xa03816a4), TOBN(0x84001ae4, 0x6f394a29), + TOBN(0x5bed8344, 0xd795fb45), TOBN(0x57326e7d, 0xb79f55a5), + TOBN(0xc9533ce0, 0x4accdffc), TOBN(0x53473caf, 0x3993fa04), + TOBN(0x7906eb93, 0xa13df4c8), TOBN(0xa73e51f6, 0x97cbe46f), + TOBN(0xd1ab3ae1, 0x0ae4ccf8), TOBN(0x25614508, 0x8a5b3dbc), + TOBN(0x61eff962, 0x11a71b27), TOBN(0xdf71412b, 0x6bb7fa39), + TOBN(0xb31ba6b8, 0x2bd7f3ef), TOBN(0xb0b9c415, 0x69180d29), + TOBN(0xeec14552, 0x014cdde5), TOBN(0x702c624b, 0x227b4bbb), + TOBN(0x2b15e8c2, 0xd3e988f3), TOBN(0xee3bcc6d, 0xa4f7fd04), + TOBN(0x9d00822a, 0x42ac6c85), TOBN(0x2db0cea6, 0x1df9f2b7), + TOBN(0xd7cad2ab, 0x42de1e58), TOBN(0x346ed526, 0x2d6fbb61), + TOBN(0xb3962995, 0x1a2faf09), TOBN(0x2fa8a580, 0x7c25612e), + TOBN(0x30ae04da, 0x7cf56490), TOBN(0x75662908, 0x0eea3961), + TOBN(0x3609f5c5, 0x3d080847), TOBN(0xcb081d39, 0x5241d4f6), + TOBN(0xb4fb3810, 0x77961a63), TOBN(0xc20c5984, 0x2abb66fc), + TOBN(0x3d40aa7c, 0xf902f245), TOBN(0x9cb12736, 0x4e536b1e), + TOBN(0x5eda24da, 0x99b3134f), TOBN(0xafbd9c69, 0x5cd011af), + TOBN(0x9a16e30a, 0xc7088c7d), TOBN(0x5ab65710, 0x3207389f), + TOBN(0x1b09547f, 0xe7407a53), TOBN(0x2322f9d7, 0x4fdc6eab), + TOBN(0xc0f2f22d, 0x7430de4d), TOBN(0x19382696, 0xe68ca9a9), + TOBN(0x17f1eff1, 0x918e5868), TOBN(0xe3b5b635, 0x586f4204), + TOBN(0x146ef980, 0x3fbc4341), TOBN(0x359f2c80, 0x5b5eed4e), + TOBN(0x9f35744e, 0x7482e41d), TOBN(0x9a9ac3ec, 0xf3b224c2), + TOBN(0x9161a6fe, 0x91fc50ae), TOBN(0x89ccc66b, 0xc613fa7c), + TOBN(0x89268b14, 0xc732f15a), TOBN(0x7cd6f4e2, 0xb467ed03), + TOBN(0xfbf79869, 0xce56b40e), TOBN(0xf93e094c, 0xc02dde98), + TOBN(0xefe0c3a8, 0xedee2cd7), TOBN(0x90f3ffc0, 0xb268fd42), + TOBN(0x81a7fd56, 0x08241aed), TOBN(0x95ab7ad8, 0x00b1afe8), + TOBN(0x40127056, 0x3e310d52), TOBN(0xd3ffdeb1, 0x09d9fc43), + TOBN(0xc8f85c91, 0xd11a8594), TOBN(0x2e74d258, 0x31cf6db8), + TOBN(0x829c7ca3, 0x02b5dfd0), TOBN(0xe389cfbe, 0x69143c86), + TOBN(0xd01b6405, 0x941768d8), TOBN(0x45103995, 0x03bf825d), + TOBN(0xcc4ee166, 0x56cd17e2), TOBN(0xbea3c283, 0xba037e79), + TOBN(0x4e1ac06e, 0xd9a47520), TOBN(0xfbfe18aa, 0xaf852404), + TOBN(0x5615f8e2, 0x8087648a), TOBN(0x7301e47e, 0xb9d150d9), + TOBN(0x79f9f9dd, 0xb299b977), TOBN(0x76697a7b, 0xa5b78314), + TOBN(0x10d67468, 0x7d7c90e7), TOBN(0x7afffe03, 0x937210b5), + TOBN(0x5aef3e4b, 0x28c22cee), TOBN(0xefb0ecd8, 0x09fd55ae), + TOBN(0x4cea7132, 0x0d2a5d6a), TOBN(0x9cfb5fa1, 0x01db6357), + TOBN(0x395e0b57, 0xf36e1ac5), TOBN(0x008fa9ad, 0x36cafb7d), + TOBN(0x8f6cdf70, 0x5308c4db), TOBN(0x51527a37, 0x95ed2477), + TOBN(0xba0dee30, 0x5bd21311), TOBN(0x6ed41b22, 0x909c90d7), + TOBN(0xc5f6b758, 0x7c8696d3), TOBN(0x0db8eaa8, 0x3ce83a80), + TOBN(0xd297fe37, 0xb24b4b6f), TOBN(0xfe58afe8, 0x522d1f0d), + TOBN(0x97358736, 0x8c98dbd9), TOBN(0x6bc226ca, 0x9454a527), + TOBN(0xa12b384e, 0xce53c2d0), TOBN(0x779d897d, 0x5e4606da), + TOBN(0xa53e47b0, 0x73ec12b0), TOBN(0x462dbbba, 0x5756f1ad), + TOBN(0x69fe09f2, 0xcafe37b6), TOBN(0x273d1ebf, 0xecce2e17), + TOBN(0x8ac1d538, 0x3cf607fd), TOBN(0x8035f7ff, 0x12e10c25),} + , + {TOBN(0x854d34c7, 0x7e6c5520), TOBN(0xc27df9ef, 0xdcb9ea58), + TOBN(0x405f2369, 0xd686666d), TOBN(0x29d1febf, 0x0417aa85), + TOBN(0x9846819e, 0x93470afe), TOBN(0x3e6a9669, 0xe2a27f9e), + TOBN(0x24d008a2, 0xe31e6504), TOBN(0xdba7cecf, 0x9cb7680a), + TOBN(0xecaff541, 0x338d6e43), TOBN(0x56f7dd73, 0x4541d5cc), + TOBN(0xb5d426de, 0x96bc88ca), TOBN(0x48d94f6b, 0x9ed3a2c3), + TOBN(0x6354a3bb, 0x2ef8279c), TOBN(0xd575465b, 0x0b1867f2), + TOBN(0xef99b0ff, 0x95225151), TOBN(0xf3e19d88, 0xf94500d8), + TOBN(0x92a83268, 0xe32dd620), TOBN(0x913ec99f, 0x627849a2), + TOBN(0xedd8fdfa, 0x2c378882), TOBN(0xaf96f33e, 0xee6f8cfe), + TOBN(0xc06737e5, 0xdc3fa8a5), TOBN(0x236bb531, 0xb0b03a1d), + TOBN(0x33e59f29, 0x89f037b0), TOBN(0x13f9b5a7, 0xd9a12a53), + TOBN(0x0d0df6ce, 0x51efb310), TOBN(0xcb5b2eb4, 0x958df5be), + TOBN(0xd6459e29, 0x36158e59), TOBN(0x82aae2b9, 0x1466e336), + TOBN(0xfb658a39, 0x411aa636), TOBN(0x7152ecc5, 0xd4c0a933), + TOBN(0xf10c758a, 0x49f026b7), TOBN(0xf4837f97, 0xcb09311f), + TOBN(0xddfb02c4, 0xc753c45f), TOBN(0x18ca81b6, 0xf9c840fe), + TOBN(0x846fd09a, 0xb0f8a3e6), TOBN(0xb1162add, 0xe7733dbc), + TOBN(0x7070ad20, 0x236e3ab6), TOBN(0xf88cdaf5, 0xb2a56326), + TOBN(0x05fc8719, 0x997cbc7a), TOBN(0x442cd452, 0x4b665272), + TOBN(0x7807f364, 0xb71698f5), TOBN(0x6ba418d2, 0x9f7b605e), + TOBN(0xfd20b00f, 0xa03b2cbb), TOBN(0x883eca37, 0xda54386f), + TOBN(0xff0be43f, 0xf3437f24), TOBN(0xe910b432, 0xa48bb33c), + TOBN(0x4963a128, 0x329df765), TOBN(0xac1dd556, 0xbe2fe6f7), + TOBN(0x557610f9, 0x24a0a3fc), TOBN(0x38e17bf4, 0xe881c3f9), + TOBN(0x6ba84faf, 0xed0dac99), TOBN(0xd4a222c3, 0x59eeb918), + TOBN(0xc79c1dbe, 0x13f542b6), TOBN(0x1fc65e0d, 0xe425d457), + TOBN(0xeffb754f, 0x1debb779), TOBN(0x638d8fd0, 0x9e08af60), + TOBN(0x994f523a, 0x626332d5), TOBN(0x7bc38833, 0x5561bb44), + TOBN(0x005ed4b0, 0x3d845ea2), TOBN(0xd39d3ee1, 0xc2a1f08a), + TOBN(0x6561fdd3, 0xe7676b0d), TOBN(0x620e35ff, 0xfb706017), + TOBN(0x36ce424f, 0xf264f9a8), TOBN(0xc4c3419f, 0xda2681f7), + TOBN(0xfb6afd2f, 0x69beb6e8), TOBN(0x3a50b993, 0x6d700d03), + TOBN(0xc840b2ad, 0x0c83a14f), TOBN(0x573207be, 0x54085bef), + TOBN(0x5af882e3, 0x09fe7e5b), TOBN(0x957678a4, 0x3b40a7e1), + TOBN(0x172d4bdd, 0x543056e2), TOBN(0x9c1b26b4, 0x0df13c0a), + TOBN(0x1c30861c, 0xf405ff06), TOBN(0xebac86bd, 0x486e828b), + TOBN(0xe791a971, 0x636933fc), TOBN(0x50e7c2be, 0x7aeee947), + TOBN(0xc3d4a095, 0xfa90d767), TOBN(0xae60eb7b, 0xe670ab7b), + TOBN(0x17633a64, 0x397b056d), TOBN(0x93a21f33, 0x105012aa), + TOBN(0x663c370b, 0xabb88643), TOBN(0x91df36d7, 0x22e21599), + TOBN(0x183ba835, 0x8b761671), TOBN(0x381eea1d, 0x728f3bf1), + TOBN(0xb9b2f1ba, 0x39966e6c), TOBN(0x7c464a28, 0xe7295492), + TOBN(0x0fd5f70a, 0x09b26b7f), TOBN(0xa9aba1f9, 0xfbe009df), + TOBN(0x857c1f22, 0x369b87ad), TOBN(0x3c00e5d9, 0x32fca556), + TOBN(0x1ad74cab, 0x90b06466), TOBN(0xa7112386, 0x550faaf2), + TOBN(0x7435e198, 0x6d9bd5f5), TOBN(0x2dcc7e38, 0x59c3463f), + TOBN(0xdc7df748, 0xca7bd4b2), TOBN(0x13cd4c08, 0x9dec2f31), + TOBN(0x0d3b5df8, 0xe3237710), TOBN(0x0dadb26e, 0xcbd2f7b0), + TOBN(0x9f5966ab, 0xe4aa082b), TOBN(0x666ec8de, 0x350e966e), + TOBN(0x1bfd1ed5, 0xee524216), TOBN(0xcd93c59b, 0x41dab0b6), + TOBN(0x658a8435, 0xd186d6ba), TOBN(0x1b7d34d2, 0x159d1195), + TOBN(0x5936e460, 0x22caf46b), TOBN(0x6a45dd8f, 0x9a96fe4f), + TOBN(0xf7925434, 0xb98f474e), TOBN(0x41410412, 0x0053ef15), + TOBN(0x71cf8d12, 0x41de97bf), TOBN(0xb8547b61, 0xbd80bef4), + TOBN(0xb47d3970, 0xc4db0037), TOBN(0xf1bcd328, 0xfef20dff), + TOBN(0x31a92e09, 0x10caad67), TOBN(0x1f591960, 0x5531a1e1), + TOBN(0x3bb852e0, 0x5f4fc840), TOBN(0x63e297ca, 0x93a72c6c), + TOBN(0x3c2b0b2e, 0x49abad67), TOBN(0x6ec405fc, 0xed3db0d9), + TOBN(0xdc14a530, 0x7fef1d40), TOBN(0xccd19846, 0x280896fc), + TOBN(0x00f83176, 0x9bb81648), TOBN(0xd69eb485, 0x653120d0), + TOBN(0xd17d75f4, 0x4ccabc62), TOBN(0x34a07f82, 0xb749fcb1), + TOBN(0x2c3af787, 0xbbfb5554), TOBN(0xb06ed4d0, 0x62e283f8), + TOBN(0x5722889f, 0xa19213a0), TOBN(0x162b085e, 0xdcf3c7b4), + TOBN(0xbcaecb31, 0xe0dd3eca), TOBN(0xc6237fbc, 0xe52f13a5), + TOBN(0xcc2b6b03, 0x27bac297), TOBN(0x2ae1cac5, 0xb917f54a), + TOBN(0x474807d4, 0x7845ae4f), TOBN(0xfec7dd92, 0xce5972e0), + TOBN(0xc3bd2541, 0x1d7915bb), TOBN(0x66f85dc4, 0xd94907ca), + TOBN(0xd981b888, 0xbdbcf0ca), TOBN(0xd75f5da6, 0xdf279e9f), + TOBN(0x128bbf24, 0x7054e934), TOBN(0x3c6ff6e5, 0x81db134b), + TOBN(0x795b7cf4, 0x047d26e4), TOBN(0xf370f7b8, 0x5049ec37), + TOBN(0xc6712d4d, 0xced945af), TOBN(0xdf30b5ec, 0x095642bc), + TOBN(0x9b034c62, 0x4896246e), TOBN(0x5652c016, 0xee90bbd1), + TOBN(0xeb38636f, 0x87fedb73), TOBN(0x5e32f847, 0x0135a613), + TOBN(0x0703b312, 0xcf933c83), TOBN(0xd05bb76e, 0x1a7f47e6), + TOBN(0x825e4f0c, 0x949c2415), TOBN(0x569e5622, 0x7250d6f8), + TOBN(0xbbe9eb3a, 0x6568013e), TOBN(0x8dbd203f, 0x22f243fc), + TOBN(0x9dbd7694, 0xb342734a), TOBN(0x8f6d12f8, 0x46afa984), + TOBN(0xb98610a2, 0xc9eade29), TOBN(0xbab4f323, 0x47dd0f18), + TOBN(0x5779737b, 0x671c0d46), TOBN(0x10b6a7c6, 0xd3e0a42a), + TOBN(0xfb19ddf3, 0x3035b41c), TOBN(0xd336343f, 0x99c45895), + TOBN(0x61fe4938, 0x54c857e5), TOBN(0xc4d506be, 0xae4e57d5), + TOBN(0x3cd8c8cb, 0xbbc33f75), TOBN(0x7281f08a, 0x9262c77d), + TOBN(0x083f4ea6, 0xf11a2823), TOBN(0x8895041e, 0x9fba2e33), + TOBN(0xfcdfea49, 0x9c438edf), TOBN(0x7678dcc3, 0x91edba44), + TOBN(0xf07b3b87, 0xe2ba50f0), TOBN(0xc13888ef, 0x43948c1b), + TOBN(0xc2135ad4, 0x1140af42), TOBN(0x8e5104f3, 0x926ed1a7), + TOBN(0xf24430cb, 0x88f6695f), TOBN(0x0ce0637b, 0x6d73c120), + TOBN(0xb2db01e6, 0xfe631e8f), TOBN(0x1c5563d7, 0xd7bdd24b), + TOBN(0x8daea3ba, 0x369ad44f), TOBN(0x000c81b6, 0x8187a9f9), + TOBN(0x5f48a951, 0xaae1fd9a), TOBN(0xe35626c7, 0x8d5aed8a), + TOBN(0x20952763, 0x0498c622), TOBN(0x76d17634, 0x773aa504), + TOBN(0x36d90dda, 0xeb300f7a), TOBN(0x9dcf7dfc, 0xedb5e801), + TOBN(0x645cb268, 0x74d5244c), TOBN(0xa127ee79, 0x348e3aa2), + TOBN(0x488acc53, 0x575f1dbb), TOBN(0x95037e85, 0x80e6161e), + TOBN(0x57e59283, 0x292650d0), TOBN(0xabe67d99, 0x14938216), + TOBN(0x3c7f944b, 0x3f8e1065), TOBN(0xed908cb6, 0x330e8924), + TOBN(0x08ee8fd5, 0x6f530136), TOBN(0x2227b7d5, 0xd7ffc169), + TOBN(0x4f55c893, 0xb5cd6dd5), TOBN(0x82225e11, 0xa62796e8), + TOBN(0x5c6cead1, 0xcb18e12c), TOBN(0x4381ae0c, 0x84f5a51a), + TOBN(0x345913d3, 0x7fafa4c8), TOBN(0x3d918082, 0x0491aac0), + TOBN(0x9347871f, 0x3e69264c), TOBN(0xbea9dd3c, 0xb4f4f0cd), + TOBN(0xbda5d067, 0x3eadd3e7), TOBN(0x0033c1b8, 0x0573bcd8), + TOBN(0x25589379, 0x5da2486c), TOBN(0xcb89ee5b, 0x86abbee7), + TOBN(0x8fe0a8f3, 0x22532e5d), TOBN(0xb6410ff0, 0x727dfc4c), + TOBN(0x619b9d58, 0x226726db), TOBN(0x5ec25669, 0x7a2b2dc7), + TOBN(0xaf4d2e06, 0x4c3beb01), TOBN(0x852123d0, 0x7acea556), + TOBN(0x0e9470fa, 0xf783487a), TOBN(0x75a7ea04, 0x5664b3eb), + TOBN(0x4ad78f35, 0x6798e4ba), TOBN(0x9214e6e5, 0xc7d0e091), + TOBN(0xc420b488, 0xb1290403), TOBN(0x64049e0a, 0xfc295749), + TOBN(0x03ef5af1, 0x3ae9841f), TOBN(0xdbe4ca19, 0xb0b662a6), + TOBN(0x46845c5f, 0xfa453458), TOBN(0xf8dabf19, 0x10b66722), + TOBN(0xb650f0aa, 0xcce2793b), TOBN(0x71db851e, 0xc5ec47c1), + TOBN(0x3eb78f3e, 0x3b234fa9), TOBN(0xb0c60f35, 0xfc0106ce), + TOBN(0x05427121, 0x774eadbd), TOBN(0x25367faf, 0xce323863), + TOBN(0x7541b5c9, 0xcd086976), TOBN(0x4ff069e2, 0xdc507ad1), + TOBN(0x74145256, 0x8776e667), TOBN(0x6e76142c, 0xb23c6bb5), + TOBN(0xdbf30712, 0x1b3a8a87), TOBN(0x60e7363e, 0x98450836), + TOBN(0x5741450e, 0xb7366d80), TOBN(0xe4ee14ca, 0x4837dbdf), + TOBN(0xa765eb9b, 0x69d4316f), TOBN(0x04548dca, 0x8ef43825), + TOBN(0x9c9f4e4c, 0x5ae888eb), TOBN(0x733abb51, 0x56e9ac99), + TOBN(0xdaad3c20, 0xba6ac029), TOBN(0x9b8dd3d3, 0x2ba3e38e), + TOBN(0xa9bb4c92, 0x0bc5d11a), TOBN(0xf20127a7, 0x9c5f88a3), + TOBN(0x4f52b06e, 0x161d3cb8), TOBN(0x26c1ff09, 0x6afaf0a6), + TOBN(0x32670d2f, 0x7189e71f), TOBN(0xc6438748, 0x5ecf91e7), + TOBN(0x15758e57, 0xdb757a21), TOBN(0x427d09f8, 0x290a9ce5), + TOBN(0x846a308f, 0x38384a7a), TOBN(0xaac3acb4, 0xb0732b99), + TOBN(0x9e941009, 0x17845819), TOBN(0x95cba111, 0xa7ce5e03), + TOBN(0x6f3d4f7f, 0xb00009c4), TOBN(0xb8396c27, 0x8ff28b5f), + TOBN(0xb1a9ae43, 0x1c97975d), TOBN(0x9d7ba8af, 0xe5d9fed5), + TOBN(0x338cf09f, 0x34f485b6), TOBN(0xbc0ddacc, 0x64122516), + TOBN(0xa450da12, 0x05d471fe), TOBN(0x4c3a6250, 0x628dd8c9), + TOBN(0x69c7d103, 0xd1295837), TOBN(0xa2893e50, 0x3807eb2f), + TOBN(0xd6e1e1de, 0xbdb41491), TOBN(0xc630745b, 0x5e138235), + TOBN(0xc892109e, 0x48661ae1), TOBN(0x8d17e7eb, 0xea2b2674), + TOBN(0x00ec0f87, 0xc328d6b5), TOBN(0x6d858645, 0xf079ff9e), + TOBN(0x6cdf243e, 0x19115ead), TOBN(0x1ce1393e, 0x4bac4fcf), + TOBN(0x2c960ed0, 0x9c29f25b), TOBN(0x59be4d8e, 0x9d388a05), + TOBN(0x0d46e06c, 0xd0def72b), TOBN(0xb923db5d, 0xe0342748), + TOBN(0xf7d3aacd, 0x936d4a3d), TOBN(0x558519cc, 0x0b0b099e), + TOBN(0x3ea8ebf8, 0x827097ef), TOBN(0x259353db, 0xd054f55d), + TOBN(0x84c89abc, 0x6d2ed089), TOBN(0x5c548b69, 0x8e096a7c), + TOBN(0xd587f616, 0x994b995d), TOBN(0x4d1531f6, 0xa5845601), + TOBN(0x792ab31e, 0x451fd9f0), TOBN(0xc8b57bb2, 0x65adf6ca), + TOBN(0x68440fcb, 0x1cd5ad73), TOBN(0xb9c860e6, 0x6144da4f), + TOBN(0x2ab286aa, 0x8462beb8), TOBN(0xcc6b8fff, 0xef46797f), + TOBN(0xac820da4, 0x20c8a471), TOBN(0x69ae05a1, 0x77ff7faf), + TOBN(0xb9163f39, 0xbfb5da77), TOBN(0xbd03e590, 0x2c73ab7a), + TOBN(0x7e862b5e, 0xb2940d9e), TOBN(0x3c663d86, 0x4b9af564), + TOBN(0xd8309031, 0xbde3033d), TOBN(0x298231b2, 0xd42c5bc6), + TOBN(0x42090d2c, 0x552ad093), TOBN(0xa4799d1c, 0xff854695), + TOBN(0x0a88b5d6, 0xd31f0d00), TOBN(0xf8b40825, 0xa2f26b46), + TOBN(0xec29b1ed, 0xf1bd7218), TOBN(0xd491c53b, 0x4b24c86e), + TOBN(0xd2fe588f, 0x3395ea65), TOBN(0x6f3764f7, 0x4456ef15), + TOBN(0xdb43116d, 0xcdc34800), TOBN(0xcdbcd456, 0xc1e33955), + TOBN(0xefdb5540, 0x74ab286b), TOBN(0x948c7a51, 0xd18c5d7c), + TOBN(0xeb81aa37, 0x7378058e), TOBN(0x41c746a1, 0x04411154), + TOBN(0xa10c73bc, 0xfb828ac7), TOBN(0x6439be91, 0x9d972b29), + TOBN(0x4bf3b4b0, 0x43a2fbad), TOBN(0x39e6dadf, 0x82b5e840), + TOBN(0x4f716408, 0x6397bd4c), TOBN(0x0f7de568, 0x7f1eeccb), + TOBN(0x5865c5a1, 0xd2ffbfc1), TOBN(0xf74211fa, 0x4ccb6451), + TOBN(0x66368a88, 0xc0b32558), TOBN(0x5b539dc2, 0x9ad7812e), + TOBN(0x579483d0, 0x2f3af6f6), TOBN(0x52132078, 0x99934ece), + TOBN(0x50b9650f, 0xdcc9e983), TOBN(0xca989ec9, 0xaee42b8a), + TOBN(0x6a44c829, 0xd6f62f99), TOBN(0x8f06a309, 0x4c2a7c0c), + TOBN(0x4ea2b3a0, 0x98a0cb0a), TOBN(0x5c547b70, 0xbeee8364), + TOBN(0x461d40e1, 0x682afe11), TOBN(0x9e0fc77a, 0x7b41c0a8), + TOBN(0x79e4aefd, 0xe20d5d36), TOBN(0x2916e520, 0x32dd9f63), + TOBN(0xf59e52e8, 0x3f883faf), TOBN(0x396f9639, 0x2b868d35), + TOBN(0xc902a9df, 0x4ca19881), TOBN(0x0fc96822, 0xdb2401a6), + TOBN(0x41237587, 0x66f1c68d), TOBN(0x10fc6de3, 0xfb476c0d), + TOBN(0xf8b6b579, 0x841f5d90), TOBN(0x2ba8446c, 0xfa24f44a), + TOBN(0xa237b920, 0xef4a9975), TOBN(0x60bb6004, 0x2330435f), + TOBN(0xd6f4ab5a, 0xcfb7e7b5), TOBN(0xb2ac5097, 0x83435391), + TOBN(0xf036ee2f, 0xb0d1ea67), TOBN(0xae779a6a, 0x74c56230), + TOBN(0x59bff8c8, 0xab838ae6), TOBN(0xcd83ca99, 0x9b38e6f0), + TOBN(0xbb27bef5, 0xe33deed3), TOBN(0xe6356f6f, 0x001892a8), + TOBN(0xbf3be6cc, 0x7adfbd3e), TOBN(0xaecbc81c, 0x33d1ac9d), + TOBN(0xe4feb909, 0xe6e861dc), TOBN(0x90a247a4, 0x53f5f801), + TOBN(0x01c50acb, 0x27346e57), TOBN(0xce29242e, 0x461acc1b), + TOBN(0x04dd214a, 0x2f998a91), TOBN(0x271ee9b1, 0xd4baf27b), + TOBN(0x7e3027d1, 0xe8c26722), TOBN(0x21d1645c, 0x1820dce5), + TOBN(0x086f242c, 0x7501779c), TOBN(0xf0061407, 0xfa0e8009), + TOBN(0xf23ce477, 0x60187129), TOBN(0x05bbdedb, 0x0fde9bd0), + TOBN(0x682f4832, 0x25d98473), TOBN(0xf207fe85, 0x5c658427), + TOBN(0xb6fdd7ba, 0x4166ffa1), TOBN(0x0c314056, 0x9eed799d), + TOBN(0x0db8048f, 0x4107e28f), TOBN(0x74ed3871, 0x41216840), + TOBN(0x74489f8f, 0x56a3c06e), TOBN(0x1e1c005b, 0x12777134), + TOBN(0xdb332a73, 0xf37ec3c3), TOBN(0xc65259bd, 0xdd59eba0), + TOBN(0x2291709c, 0xdb4d3257), TOBN(0x9a793b25, 0xbd389390), + TOBN(0xf39fe34b, 0xe43756f0), TOBN(0x2f76bdce, 0x9afb56c9), + TOBN(0x9f37867a, 0x61208b27), TOBN(0xea1d4307, 0x089972c3), + TOBN(0x8c595330, 0x8bdf623a), TOBN(0x5f5accda, 0x8441fb7d), + TOBN(0xfafa9418, 0x32ddfd95), TOBN(0x6ad40c5a, 0x0fde9be7), + TOBN(0x43faba89, 0xaeca8709), TOBN(0xc64a7cf1, 0x2c248a9d), + TOBN(0x16620252, 0x72637a76), TOBN(0xaee1c791, 0x22b8d1bb), + TOBN(0xf0f798fd, 0x21a843b2), TOBN(0x56e4ed4d, 0x8d005cb1), + TOBN(0x355f7780, 0x1f0d8abe), TOBN(0x197b04cf, 0x34522326), + TOBN(0x41f9b31f, 0xfd42c13f), TOBN(0x5ef7feb2, 0xb40f933d), + TOBN(0x27326f42, 0x5d60bad4), TOBN(0x027ecdb2, 0x8c92cf89), + TOBN(0x04aae4d1, 0x4e3352fe), TOBN(0x08414d2f, 0x73591b90), + TOBN(0x5ed6124e, 0xb7da7d60), TOBN(0xb985b931, 0x4d13d4ec), + TOBN(0xa592d3ab, 0x96bf36f9), TOBN(0x012dbed5, 0xbbdf51df), + TOBN(0xa57963c0, 0xdf6c177d), TOBN(0x010ec869, 0x87ca29cf), + TOBN(0xba1700f6, 0xbf926dff), TOBN(0x7c9fdbd1, 0xf4bf6bc2), + TOBN(0xdc18dc8f, 0x64da11f5), TOBN(0xa6074b7a, 0xd938ae75), + TOBN(0x14270066, 0xe84f44a4), TOBN(0x99998d38, 0xd27b954e), + TOBN(0xc1be8ab2, 0xb4f38e9a), TOBN(0x8bb55bbf, 0x15c01016), + TOBN(0xf73472b4, 0x0ea2ab30), TOBN(0xd365a340, 0xf73d68dd), + TOBN(0xc01a7168, 0x19c2e1eb), TOBN(0x32f49e37, 0x34061719), + TOBN(0xb73c57f1, 0x01d8b4d6), TOBN(0x03c8423c, 0x26b47700), + TOBN(0x321d0bc8, 0xa4d8826a), TOBN(0x6004213c, 0x4bc0e638), + TOBN(0xf78c64a1, 0xc1c06681), TOBN(0x16e0a16f, 0xef018e50), + TOBN(0x31cbdf91, 0xdb42b2b3), TOBN(0xf8f4ffce, 0xe0d36f58), + TOBN(0xcdcc71cd, 0x4cc5e3e0), TOBN(0xd55c7cfa, 0xa129e3e0), + TOBN(0xccdb6ba0, 0x0fb2cbf1), TOBN(0x6aba0005, 0xc4bce3cb), + TOBN(0x501cdb30, 0xd232cfc4), TOBN(0x9ddcf12e, 0xd58a3cef), + TOBN(0x02d2cf9c, 0x87e09149), TOBN(0xdc5d7ec7, 0x2c976257), + TOBN(0x6447986e, 0x0b50d7dd), TOBN(0x88fdbaf7, 0x807f112a), + TOBN(0x58c9822a, 0xb00ae9f6), TOBN(0x6abfb950, 0x6d3d27e0), + TOBN(0xd0a74487, 0x8a429f4f), TOBN(0x0649712b, 0xdb516609), + TOBN(0xb826ba57, 0xe769b5df), TOBN(0x82335df2, 0x1fc7aaf2), + TOBN(0x2389f067, 0x5c93d995), TOBN(0x59ac367a, 0x68677be6), + TOBN(0xa77985ff, 0x21d9951b), TOBN(0x038956fb, 0x85011cce), + TOBN(0x608e48cb, 0xbb734e37), TOBN(0xc08c0bf2, 0x2be5b26f), + TOBN(0x17bbdd3b, 0xf9b1a0d9), TOBN(0xeac7d898, 0x10483319), + TOBN(0xc95c4baf, 0xbc1a6dea), TOBN(0xfdd0e2bf, 0x172aafdb), + TOBN(0x40373cbc, 0x8235c41a), TOBN(0x14303f21, 0xfb6f41d5), + TOBN(0xba063621, 0x0408f237), TOBN(0xcad3b09a, 0xecd2d1ed), + TOBN(0x4667855a, 0x52abb6a2), TOBN(0xba9157dc, 0xaa8b417b), + TOBN(0xfe7f3507, 0x4f013efb), TOBN(0x1b112c4b, 0xaa38c4a2), + TOBN(0xa1406a60, 0x9ba64345), TOBN(0xe53cba33, 0x6993c80b), + TOBN(0x45466063, 0xded40d23), TOBN(0x3d5f1f4d, 0x54908e25), + TOBN(0x9ebefe62, 0x403c3c31), TOBN(0x274ea0b5, 0x0672a624), + TOBN(0xff818d99, 0x451d1b71), TOBN(0x80e82643, 0x8f79cf79), + TOBN(0xa165df13, 0x73ce37f5), TOBN(0xa744ef4f, 0xfe3a21fd), + TOBN(0x73f1e7f5, 0xcf551396), TOBN(0xc616898e, 0x868c676b), + TOBN(0x671c28c7, 0x8c442c36), TOBN(0xcfe5e558, 0x5e0a317d), + TOBN(0x1242d818, 0x7051f476), TOBN(0x56fad2a6, 0x14f03442), + TOBN(0x262068bc, 0x0a44d0f6), TOBN(0xdfa2cd6e, 0xce6edf4e), + TOBN(0x0f43813a, 0xd15d1517), TOBN(0x61214cb2, 0x377d44f5), + TOBN(0xd399aa29, 0xc639b35f), TOBN(0x42136d71, 0x54c51c19), + TOBN(0x9774711b, 0x08417221), TOBN(0x0a5546b3, 0x52545a57), + TOBN(0x80624c41, 0x1150582d), TOBN(0x9ec5c418, 0xfbc555bc), + TOBN(0x2c87dcad, 0x771849f1), TOBN(0xb0c932c5, 0x01d7bf6f), + TOBN(0x6aa5cd3e, 0x89116eb2), TOBN(0xd378c25a, 0x51ca7bd3), + TOBN(0xc612a0da, 0x9e6e3e31), TOBN(0x0417a54d, 0xb68ad5d0), + TOBN(0x00451e4a, 0x22c6edb8), TOBN(0x9fbfe019, 0xb42827ce), + TOBN(0x2fa92505, 0xba9384a2), TOBN(0x21b8596e, 0x64ad69c1), + TOBN(0x8f4fcc49, 0x983b35a6), TOBN(0xde093760, 0x72754672), + TOBN(0x2f14ccc8, 0xf7bffe6d), TOBN(0x27566bff, 0x5d94263d), + TOBN(0xb5b4e9c6, 0x2df3ec30), TOBN(0x94f1d7d5, 0x3e6ea6ba), + TOBN(0x97b7851a, 0xaaca5e9b), TOBN(0x518aa521, 0x56713b97), + TOBN(0x3357e8c7, 0x150a61f6), TOBN(0x7842e7e2, 0xec2c2b69), + TOBN(0x8dffaf65, 0x6868a548), TOBN(0xd963bd82, 0xe068fc81), + TOBN(0x64da5c8b, 0x65917733), TOBN(0x927090ff, 0x7b247328),} + , + {TOBN(0x214bc9a7, 0xd298c241), TOBN(0xe3b697ba, 0x56807cfd), + TOBN(0xef1c7802, 0x4564eadb), TOBN(0xdde8cdcf, 0xb48149c5), + TOBN(0x946bf0a7, 0x5a4d2604), TOBN(0x27154d7f, 0x6c1538af), + TOBN(0x95cc9230, 0xde5b1fcc), TOBN(0xd88519e9, 0x66864f82), + TOBN(0xb828dd1a, 0x7cb1282c), TOBN(0xa08d7626, 0xbe46973a), + TOBN(0x6baf8d40, 0xe708d6b2), TOBN(0x72571fa1, 0x4daeb3f3), + TOBN(0x85b1732f, 0xf22dfd98), TOBN(0x87ab01a7, 0x0087108d), + TOBN(0xaaaafea8, 0x5988207a), TOBN(0xccc832f8, 0x69f00755), + TOBN(0x964d950e, 0x36ff3bf0), TOBN(0x8ad20f6f, 0xf0b34638), + TOBN(0x4d9177b3, 0xb5d7585f), TOBN(0xcf839760, 0xef3f019f), + TOBN(0x582fc5b3, 0x8288c545), TOBN(0x2f8e4e9b, 0x13116bd1), + TOBN(0xf91e1b2f, 0x332120ef), TOBN(0xcf568724, 0x2a17dd23), + TOBN(0x488f1185, 0xca8d9d1a), TOBN(0xadf2c77d, 0xd987ded2), + TOBN(0x5f3039f0, 0x60c46124), TOBN(0xe5d70b75, 0x71e095f4), + TOBN(0x82d58650, 0x6260e70f), TOBN(0x39d75ea7, 0xf750d105), + TOBN(0x8cf3d0b1, 0x75bac364), TOBN(0xf3a7564d, 0x21d01329), + TOBN(0x182f04cd, 0x2f52d2a7), TOBN(0x4fde149a, 0xe2df565a), + TOBN(0xb80c5eec, 0xa79fb2f7), TOBN(0xab491d7b, 0x22ddc897), + TOBN(0x99d76c18, 0xc6312c7f), TOBN(0xca0d5f3d, 0x6aa41a57), + TOBN(0x71207325, 0xd15363a0), TOBN(0xe82aa265, 0xbeb252c2), + TOBN(0x94ab4700, 0xec3128c2), TOBN(0x6c76d862, 0x8e383f49), + TOBN(0xdc36b150, 0xc03024eb), TOBN(0xfb439477, 0x53daac69), + TOBN(0xfc68764a, 0x8dc79623), TOBN(0x5b86995d, 0xb440fbb2), + TOBN(0xd66879bf, 0xccc5ee0d), TOBN(0x05228942, 0x95aa8bd3), + TOBN(0xb51a40a5, 0x1e6a75c1), TOBN(0x24327c76, 0x0ea7d817), + TOBN(0x06630182, 0x07774597), TOBN(0xd6fdbec3, 0x97fa7164), + TOBN(0x20c99dfb, 0x13c90f48), TOBN(0xd6ac5273, 0x686ef263), + TOBN(0xc6a50bdc, 0xfef64eeb), TOBN(0xcd87b281, 0x86fdfc32), + TOBN(0xb24aa43e, 0x3fcd3efc), TOBN(0xdd26c034, 0xb8088e9a), + TOBN(0xa5ef4dc9, 0xbd3d46ea), TOBN(0xa2f99d58, 0x8a4c6a6f), + TOBN(0xddabd355, 0x2f1da46c), TOBN(0x72c3f8ce, 0x1afacdd1), + TOBN(0xd90c4eee, 0x92d40578), TOBN(0xd28bb41f, 0xca623b94), + TOBN(0x50fc0711, 0x745edc11), TOBN(0x9dd9ad7d, 0x3dc87558), + TOBN(0xce6931fb, 0xb49d1e64), TOBN(0x6c77a0a2, 0xc98bd0f9), + TOBN(0x62b9a629, 0x6baf7cb1), TOBN(0xcf065f91, 0xccf72d22), + TOBN(0x7203cce9, 0x79639071), TOBN(0x09ae4885, 0xf9cb732f), + TOBN(0x5e7c3bec, 0xee8314f3), TOBN(0x1c068aed, 0xdbea298f), + TOBN(0x08d381f1, 0x7c80acec), TOBN(0x03b56be8, 0xe330495b), + TOBN(0xaeffb8f2, 0x9222882d), TOBN(0x95ff38f6, 0xc4af8bf7), + TOBN(0x50e32d35, 0x1fc57d8c), TOBN(0x6635be52, 0x17b444f0), + TOBN(0x04d15276, 0xa5177900), TOBN(0x4e1dbb47, 0xf6858752), + TOBN(0x5b475622, 0xc615796c), TOBN(0xa6fa0387, 0x691867bf), + TOBN(0xed7f5d56, 0x2844c6d0), TOBN(0xc633cf9b, 0x03a2477d), + TOBN(0xf6be5c40, 0x2d3721d6), TOBN(0xaf312eb7, 0xe9fd68e6), + TOBN(0x242792d2, 0xe7417ce1), TOBN(0xff42bc71, 0x970ee7f5), + TOBN(0x1ff4dc6d, 0x5c67a41e), TOBN(0x77709b7b, 0x20882a58), + TOBN(0x3554731d, 0xbe217f2c), TOBN(0x2af2a8cd, 0x5bb72177), + TOBN(0x58eee769, 0x591dd059), TOBN(0xbb2930c9, 0x4bba6477), + TOBN(0x863ee047, 0x7d930cfc), TOBN(0x4c262ad1, 0x396fd1f4), + TOBN(0xf4765bc8, 0x039af7e1), TOBN(0x2519834b, 0x5ba104f6), + TOBN(0x7cd61b4c, 0xd105f961), TOBN(0xa5415da5, 0xd63bca54), + TOBN(0x778280a0, 0x88a1f17c), TOBN(0xc4968949, 0x2329512c), + TOBN(0x174a9126, 0xcecdaa7a), TOBN(0xfc8c7e0e, 0x0b13247b), + TOBN(0x29c110d2, 0x3484c1c4), TOBN(0xf8eb8757, 0x831dfc3b), + TOBN(0x022f0212, 0xc0067452), TOBN(0x3f6f69ee, 0x7b9b926c), + TOBN(0x09032da0, 0xef42daf4), TOBN(0x79f00ade, 0x83f80de4), + TOBN(0x6210db71, 0x81236c97), TOBN(0x74f7685b, 0x3ee0781f), + TOBN(0x4df7da7b, 0xa3e41372), TOBN(0x2aae38b1, 0xb1a1553e), + TOBN(0x1688e222, 0xf6dd9d1b), TOBN(0x57695448, 0x5b8b6487), + TOBN(0x478d2127, 0x4b2edeaa), TOBN(0xb2818fa5, 0x1e85956a), + TOBN(0x1e6addda, 0xf176f2c0), TOBN(0x01ca4604, 0xe2572658), + TOBN(0x0a404ded, 0x85342ffb), TOBN(0x8cf60f96, 0x441838d6), + TOBN(0x9bbc691c, 0xc9071c4a), TOBN(0xfd588744, 0x34442803), + TOBN(0x97101c85, 0x809c0d81), TOBN(0xa7fb754c, 0x8c456f7f), + TOBN(0xc95f3c5c, 0xd51805e1), TOBN(0xab4ccd39, 0xb299dca8), + TOBN(0x3e03d20b, 0x47eaf500), TOBN(0xfa3165c1, 0xd7b80893), + TOBN(0x005e8b54, 0xe160e552), TOBN(0xdc4972ba, 0x9019d11f), + TOBN(0x21a6972e, 0x0c9a4a7a), TOBN(0xa52c258f, 0x37840fd7), + TOBN(0xf8559ff4, 0xc1e99d81), TOBN(0x08e1a7d6, 0xa3c617c0), + TOBN(0xb398fd43, 0x248c6ba7), TOBN(0x6ffedd91, 0xd1283794), + TOBN(0x8a6a59d2, 0xd629d208), TOBN(0xa9d141d5, 0x3490530e), + TOBN(0x42f6fc18, 0x38505989), TOBN(0x09bf250d, 0x479d94ee), + TOBN(0x223ad3b1, 0xb3822790), TOBN(0x6c5926c0, 0x93b8971c), + TOBN(0x609efc7e, 0x75f7fa62), TOBN(0x45d66a6d, 0x1ec2d989), + TOBN(0x4422d663, 0x987d2792), TOBN(0x4a73caad, 0x3eb31d2b), + TOBN(0xf06c2ac1, 0xa32cb9e6), TOBN(0xd9445c5f, 0x91aeba84), + TOBN(0x6af7a1d5, 0xaf71013f), TOBN(0xe68216e5, 0x0bedc946), + TOBN(0xf4cba30b, 0xd27370a0), TOBN(0x7981afbf, 0x870421cc), + TOBN(0x02496a67, 0x9449f0e1), TOBN(0x86cfc4be, 0x0a47edae), + TOBN(0x3073c936, 0xb1feca22), TOBN(0xf5694612, 0x03f8f8fb), + TOBN(0xd063b723, 0x901515ea), TOBN(0x4c6c77a5, 0x749cf038), + TOBN(0x6361e360, 0xab9e5059), TOBN(0x596cf171, 0xa76a37c0), + TOBN(0x800f53fa, 0x6530ae7a), TOBN(0x0f5e631e, 0x0792a7a6), + TOBN(0x5cc29c24, 0xefdb81c9), TOBN(0xa269e868, 0x3f9c40ba), + TOBN(0xec14f9e1, 0x2cb7191e), TOBN(0x78ea1bd8, 0xe5b08ea6), + TOBN(0x3c65aa9b, 0x46332bb9), TOBN(0x84cc22b3, 0xbf80ce25), + TOBN(0x0098e9e9, 0xd49d5bf1), TOBN(0xcd4ec1c6, 0x19087da4), + TOBN(0x3c9d07c5, 0xaef6e357), TOBN(0x839a0268, 0x9f8f64b8), + TOBN(0xc5e9eb62, 0xc6d8607f), TOBN(0x759689f5, 0x6aa995e4), + TOBN(0x70464669, 0xbbb48317), TOBN(0x921474bf, 0xe402417d), + TOBN(0xcabe135b, 0x2a354c8c), TOBN(0xd51e52d2, 0x812fa4b5), + TOBN(0xec741096, 0x53311fe8), TOBN(0x4f774535, 0xb864514b), + TOBN(0xbcadd671, 0x5bde48f8), TOBN(0xc9703873, 0x2189bc7d), + TOBN(0x5d45299e, 0xc709ee8a), TOBN(0xd1287ee2, 0x845aaff8), + TOBN(0x7d1f8874, 0xdb1dbf1f), TOBN(0xea46588b, 0x990c88d6), + TOBN(0x60ba649a, 0x84368313), TOBN(0xd5fdcbce, 0x60d543ae), + TOBN(0x90b46d43, 0x810d5ab0), TOBN(0x6739d8f9, 0x04d7e5cc), + TOBN(0x021c1a58, 0x0d337c33), TOBN(0x00a61162, 0x68e67c40), + TOBN(0x95ef413b, 0x379f0a1f), TOBN(0xfe126605, 0xe9e2ab95), + TOBN(0x67578b85, 0x2f5f199c), TOBN(0xf5c00329, 0x2cb84913), + TOBN(0xf7956430, 0x37577dd8), TOBN(0x83b82af4, 0x29c5fe88), + TOBN(0x9c1bea26, 0xcdbdc132), TOBN(0x589fa086, 0x9c04339e), + TOBN(0x033e9538, 0xb13799df), TOBN(0x85fa8b21, 0xd295d034), + TOBN(0xdf17f73f, 0xbd9ddcca), TOBN(0xf32bd122, 0xddb66334), + TOBN(0x55ef88a7, 0x858b044c), TOBN(0x1f0d69c2, 0x5aa9e397), + TOBN(0x55fd9cc3, 0x40d85559), TOBN(0xc774df72, 0x7785ddb2), + TOBN(0x5dcce9f6, 0xd3bd2e1c), TOBN(0xeb30da20, 0xa85dfed0), + TOBN(0x5ed7f5bb, 0xd3ed09c4), TOBN(0x7d42a35c, 0x82a9c1bd), + TOBN(0xcf3de995, 0x9890272d), TOBN(0x75f3432a, 0x3e713a10), + TOBN(0x5e13479f, 0xe28227b8), TOBN(0xb8561ea9, 0xfefacdc8), + TOBN(0xa6a297a0, 0x8332aafd), TOBN(0x9b0d8bb5, 0x73809b62), + TOBN(0xd2fa1cfd, 0x0c63036f), TOBN(0x7a16eb55, 0xbd64bda8), + TOBN(0x3f5cf5f6, 0x78e62ddc), TOBN(0x2267c454, 0x07fd752b), + TOBN(0x5e361b6b, 0x5e437bbe), TOBN(0x95c59501, 0x8354e075), + TOBN(0xec725f85, 0xf2b254d9), TOBN(0x844b617d, 0x2cb52b4e), + TOBN(0xed8554f5, 0xcf425fb5), TOBN(0xab67703e, 0x2af9f312), + TOBN(0x4cc34ec1, 0x3cf48283), TOBN(0xb09daa25, 0x9c8a705e), + TOBN(0xd1e9d0d0, 0x5b7d4f84), TOBN(0x4df6ef64, 0xdb38929d), + TOBN(0xe16b0763, 0xaa21ba46), TOBN(0xc6b1d178, 0xa293f8fb), + TOBN(0x0ff5b602, 0xd520aabf), TOBN(0x94d671bd, 0xc339397a), + TOBN(0x7c7d98cf, 0x4f5792fa), TOBN(0x7c5e0d67, 0x11215261), + TOBN(0x9b19a631, 0xa7c5a6d4), TOBN(0xc8511a62, 0x7a45274d), + TOBN(0x0c16621c, 0xa5a60d99), TOBN(0xf7fbab88, 0xcf5e48cb), + TOBN(0xab1e6ca2, 0xf7ddee08), TOBN(0x83bd08ce, 0xe7867f3c), + TOBN(0xf7e48e8a, 0x2ac13e27), TOBN(0x4494f6df, 0x4eb1a9f5), + TOBN(0xedbf84eb, 0x981f0a62), TOBN(0x49badc32, 0x536438f0), + TOBN(0x50bea541, 0x004f7571), TOBN(0xbac67d10, 0xdf1c94ee), + TOBN(0x253d73a1, 0xb727bc31), TOBN(0xb3d01cf2, 0x30686e28), + TOBN(0x51b77b1b, 0x55fd0b8b), TOBN(0xa099d183, 0xfeec3173), + TOBN(0x202b1fb7, 0x670e72b7), TOBN(0xadc88b33, 0xa8e1635f), + TOBN(0x34e8216a, 0xf989d905), TOBN(0xc2e68d20, 0x29b58d01), + TOBN(0x11f81c92, 0x6fe55a93), TOBN(0x15f1462a, 0x8f296f40), + TOBN(0x1915d375, 0xea3d62f2), TOBN(0xa17765a3, 0x01c8977d), + TOBN(0x7559710a, 0xe47b26f6), TOBN(0xe0bd29c8, 0x535077a5), + TOBN(0x615f976d, 0x08d84858), TOBN(0x370dfe85, 0x69ced5c1), + TOBN(0xbbc7503c, 0xa734fa56), TOBN(0xfbb9f1ec, 0x91ac4574), + TOBN(0x95d7ec53, 0x060dd7ef), TOBN(0xeef2dacd, 0x6e657979), + TOBN(0x54511af3, 0xe2a08235), TOBN(0x1e324aa4, 0x1f4aea3d), + TOBN(0x550e7e71, 0xe6e67671), TOBN(0xbccd5190, 0xbf52faf7), + TOBN(0xf880d316, 0x223cc62a), TOBN(0x0d402c7e, 0x2b32eb5d), + TOBN(0xa40bc039, 0x306a5a3b), TOBN(0x4e0a41fd, 0x96783a1b), + TOBN(0xa1e8d39a, 0x0253cdd4), TOBN(0x6480be26, 0xc7388638), + TOBN(0xee365e1d, 0x2285f382), TOBN(0x188d8d8f, 0xec0b5c36), + TOBN(0x34ef1a48, 0x1f0f4d82), TOBN(0x1a8f43e1, 0xa487d29a), + TOBN(0x8168226d, 0x77aefb3a), TOBN(0xf69a751e, 0x1e72c253), + TOBN(0x8e04359a, 0xe9594df1), TOBN(0x475ffd7d, 0xd14c0467), + TOBN(0xb5a2c2b1, 0x3844e95c), TOBN(0x85caf647, 0xdd12ef94), + TOBN(0x1ecd2a9f, 0xf1063d00), TOBN(0x1dd2e229, 0x23843311), + TOBN(0x38f0e09d, 0x73d17244), TOBN(0x3ede7746, 0x8fc653f1), + TOBN(0xae4459f5, 0xdc20e21c), TOBN(0x00db2ffa, 0x6a8599ea), + TOBN(0x11682c39, 0x30cfd905), TOBN(0x4934d074, 0xa5c112a6), + TOBN(0xbdf063c5, 0x568bfe95), TOBN(0x779a440a, 0x016c441a), + TOBN(0x0c23f218, 0x97d6fbdc), TOBN(0xd3a5cd87, 0xe0776aac), + TOBN(0xcee37f72, 0xd712e8db), TOBN(0xfb28c70d, 0x26f74e8d), + TOBN(0xffe0c728, 0xb61301a0), TOBN(0xa6282168, 0xd3724354), + TOBN(0x7ff4cb00, 0x768ffedc), TOBN(0xc51b3088, 0x03b02de9), + TOBN(0xa5a8147c, 0x3902dda5), TOBN(0x35d2f706, 0xfe6973b4), + TOBN(0x5ac2efcf, 0xc257457e), TOBN(0x933f48d4, 0x8700611b), + TOBN(0xc365af88, 0x4912beb2), TOBN(0x7f5a4de6, 0x162edf94), + TOBN(0xc646ba7c, 0x0c32f34b), TOBN(0x632c6af3, 0xb2091074), + TOBN(0x58d4f2e3, 0x753e43a9), TOBN(0x70e1d217, 0x24d4e23f), + TOBN(0xb24bf729, 0xafede6a6), TOBN(0x7f4a94d8, 0x710c8b60), + TOBN(0xaad90a96, 0x8d4faa6a), TOBN(0xd9ed0b32, 0xb066b690), + TOBN(0x52fcd37b, 0x78b6dbfd), TOBN(0x0b64615e, 0x8bd2b431), + TOBN(0x228e2048, 0xcfb9fad5), TOBN(0xbeaa386d, 0x240b76bd), + TOBN(0x2d6681c8, 0x90dad7bc), TOBN(0x3e553fc3, 0x06d38f5e), + TOBN(0xf27cdb9b, 0x9d5f9750), TOBN(0x3e85c52a, 0xd28c5b0e), + TOBN(0x190795af, 0x5247c39b), TOBN(0x547831eb, 0xbddd6828), + TOBN(0xf327a227, 0x4a82f424), TOBN(0x36919c78, 0x7e47f89d), + TOBN(0xe4783919, 0x43c7392c), TOBN(0xf101b9aa, 0x2316fefe), + TOBN(0xbcdc9e9c, 0x1c5009d2), TOBN(0xfb55ea13, 0x9cd18345), + TOBN(0xf5b5e231, 0xa3ce77c7), TOBN(0xde6b4527, 0xd2f2cb3d), + TOBN(0x10f6a333, 0x9bb26f5f), TOBN(0x1e85db8e, 0x044d85b6), + TOBN(0xc3697a08, 0x94197e54), TOBN(0x65e18cc0, 0xa7cb4ea8), + TOBN(0xa38c4f50, 0xa471fe6e), TOBN(0xf031747a, 0x2f13439c), + TOBN(0x53c4a6ba, 0xc007318b), TOBN(0xa8da3ee5, 0x1deccb3d), + TOBN(0x0555b31c, 0x558216b1), TOBN(0x90c7810c, 0x2f79e6c2), + TOBN(0x9b669f4d, 0xfe8eed3c), TOBN(0x70398ec8, 0xe0fac126), + TOBN(0xa96a449e, 0xf701b235), TOBN(0x0ceecdb3, 0xeb94f395), + TOBN(0x285fc368, 0xd0cb7431), TOBN(0x0d37bb52, 0x16a18c64), + TOBN(0x05110d38, 0xb880d2dd), TOBN(0xa60f177b, 0x65930d57), + TOBN(0x7da34a67, 0xf36235f5), TOBN(0x47f5e17c, 0x183816b9), + TOBN(0xc7664b57, 0xdb394af4), TOBN(0x39ba215d, 0x7036f789), + TOBN(0x46d2ca0e, 0x2f27b472), TOBN(0xc42647ee, 0xf73a84b7), + TOBN(0x44bc7545, 0x64488f1d), TOBN(0xaa922708, 0xf4cf85d5), + TOBN(0x721a01d5, 0x53e4df63), TOBN(0x649c0c51, 0x5db46ced), + TOBN(0x6bf0d64e, 0x3cffcb6c), TOBN(0xe3bf93fe, 0x50f71d96), + TOBN(0x75044558, 0xbcc194a0), TOBN(0x16ae3372, 0x6afdc554), + TOBN(0xbfc01adf, 0x5ca48f3f), TOBN(0x64352f06, 0xe22a9b84), + TOBN(0xcee54da1, 0xc1099e4a), TOBN(0xbbda54e8, 0xfa1b89c0), + TOBN(0x166a3df5, 0x6f6e55fb), TOBN(0x1ca44a24, 0x20176f88), + TOBN(0x936afd88, 0xdfb7b5ff), TOBN(0xe34c2437, 0x8611d4a0), + TOBN(0x7effbb75, 0x86142103), TOBN(0x6704ba1b, 0x1f34fc4d), + TOBN(0x7c2a468f, 0x10c1b122), TOBN(0x36b3a610, 0x8c6aace9), + TOBN(0xabfcc0a7, 0x75a0d050), TOBN(0x066f9197, 0x3ce33e32), + TOBN(0xce905ef4, 0x29fe09be), TOBN(0x89ee25ba, 0xa8376351), + TOBN(0x2a3ede22, 0xfd29dc76), TOBN(0x7fd32ed9, 0x36f17260), + TOBN(0x0cadcf68, 0x284b4126), TOBN(0x63422f08, 0xa7951fc8), + TOBN(0x562b24f4, 0x0807e199), TOBN(0xfe9ce5d1, 0x22ad4490), + TOBN(0xc2f51b10, 0x0db2b1b4), TOBN(0xeb3613ff, 0xe4541d0d), + TOBN(0xbd2c4a05, 0x2680813b), TOBN(0x527aa55d, 0x561b08d6), + TOBN(0xa9f8a40e, 0xa7205558), TOBN(0xe3eea56f, 0x243d0bec), + TOBN(0x7b853817, 0xa0ff58b3), TOBN(0xb67d3f65, 0x1a69e627), + TOBN(0x0b76bbb9, 0xa869b5d6), TOBN(0xa3afeb82, 0x546723ed), + TOBN(0x5f24416d, 0x3e554892), TOBN(0x8413b53d, 0x430e2a45), + TOBN(0x99c56aee, 0x9032a2a0), TOBN(0x09432bf6, 0xeec367b1), + TOBN(0x552850c6, 0xdaf0ecc1), TOBN(0x49ebce55, 0x5bc92048), + TOBN(0xdfb66ba6, 0x54811307), TOBN(0x1b84f797, 0x6f298597), + TOBN(0x79590481, 0x8d1d7a0d), TOBN(0xd9fabe03, 0x3a6fa556), + TOBN(0xa40f9c59, 0xba9e5d35), TOBN(0xcb1771c1, 0xf6247577), + TOBN(0x542a47ca, 0xe9a6312b), TOBN(0xa34b3560, 0x552dd8c5), + TOBN(0xfdf94de0, 0x0d794716), TOBN(0xd46124a9, 0x9c623094), + TOBN(0x56b7435d, 0x68afe8b4), TOBN(0x27f20540, 0x6c0d8ea1), + TOBN(0x12b77e14, 0x73186898), TOBN(0xdbc3dd46, 0x7479490f), + TOBN(0x951a9842, 0xc03b0c05), TOBN(0x8b1b3bb3, 0x7921bc96), + TOBN(0xa573b346, 0x2b202e0a), TOBN(0x77e4665d, 0x47254d56), + TOBN(0x08b70dfc, 0xd23e3984), TOBN(0xab86e8bc, 0xebd14236), + TOBN(0xaa3e07f8, 0x57114ba7), TOBN(0x5ac71689, 0xab0ef4f2), + TOBN(0x88fca384, 0x0139d9af), TOBN(0x72733f88, 0x76644af0), + TOBN(0xf122f72a, 0x65d74f4a), TOBN(0x13931577, 0xa5626c7a), + TOBN(0xd5b5d9eb, 0x70f8d5a4), TOBN(0x375adde7, 0xd7bbb228), + TOBN(0x31e88b86, 0x0c1c0b32), TOBN(0xd1f568c4, 0x173edbaa), + TOBN(0x1592fc83, 0x5459df02), TOBN(0x2beac0fb, 0x0fcd9a7e), + TOBN(0xb0a6fdb8, 0x1b473b0a), TOBN(0xe3224c6f, 0x0fe8fc48), + TOBN(0x680bd00e, 0xe87edf5b), TOBN(0x30385f02, 0x20e77cf5), + TOBN(0xe9ab98c0, 0x4d42d1b2), TOBN(0x72d191d2, 0xd3816d77), + TOBN(0x1564daca, 0x0917d9e5), TOBN(0x394eab59, 0x1f8fed7f), + TOBN(0xa209aa8d, 0x7fbb3896), TOBN(0x5564f3b9, 0xbe6ac98e), + TOBN(0xead21d05, 0xd73654ef), TOBN(0x68d1a9c4, 0x13d78d74), + TOBN(0x61e01708, 0x6d4973a0), TOBN(0x83da3500, 0x46e6d32a), + TOBN(0x6a3dfca4, 0x68ae0118), TOBN(0xa1b9a4c9, 0xd02da069), + TOBN(0x0b2ff9c7, 0xebab8302), TOBN(0x98af07c3, 0x944ba436), + TOBN(0x85997326, 0x995f0f9f), TOBN(0x467fade0, 0x71b58bc6), + TOBN(0x47e4495a, 0xbd625a2b), TOBN(0xfdd2d01d, 0x33c3b8cd), + TOBN(0x2c38ae28, 0xc693f9fa), TOBN(0x48622329, 0x348f7999), + TOBN(0x97bf738e, 0x2161f583), TOBN(0x15ee2fa7, 0x565e8cc9), + TOBN(0xa1a5c845, 0x5777e189), TOBN(0xcc10bee0, 0x456f2829), + TOBN(0x8ad95c56, 0xda762bd5), TOBN(0x152e2214, 0xe9d91da8), + TOBN(0x975b0e72, 0x7cb23c74), TOBN(0xfd5d7670, 0xa90c66df), + TOBN(0xb5b5b8ad, 0x225ffc53), TOBN(0xab6dff73, 0xfaded2ae), + TOBN(0xebd56781, 0x6f4cbe9d), TOBN(0x0ed8b249, 0x6a574bd7), + TOBN(0x41c246fe, 0x81a881fa), TOBN(0x91564805, 0xc3db9c70), + TOBN(0xd7c12b08, 0x5b862809), TOBN(0x1facd1f1, 0x55858d7b), + TOBN(0x7693747c, 0xaf09e92a), TOBN(0x3b69dcba, 0x189a425f), + TOBN(0x0be28e9f, 0x967365ef), TOBN(0x57300eb2, 0xe801f5c9), + TOBN(0x93b8ac6a, 0xd583352f), TOBN(0xa2cf1f89, 0xcd05b2b7), + TOBN(0x7c0c9b74, 0x4dcc40cc), TOBN(0xfee38c45, 0xada523fb), + TOBN(0xb49a4dec, 0x1099cc4d), TOBN(0x325c377f, 0x69f069c6), + TOBN(0xe12458ce, 0x476cc9ff), TOBN(0x580e0b6c, 0xc6d4cb63), + TOBN(0xd561c8b7, 0x9072289b), TOBN(0x0377f264, 0xa619e6da), + TOBN(0x26685362, 0x88e591a5), TOBN(0xa453a7bd, 0x7523ca2b), + TOBN(0x8a9536d2, 0xc1df4533), TOBN(0xc8e50f2f, 0xbe972f79), + TOBN(0xd433e50f, 0x6d3549cf), TOBN(0x6f33696f, 0xfacd665e), + TOBN(0x695bfdac, 0xce11fcb4), TOBN(0x810ee252, 0xaf7c9860), + TOBN(0x65450fe1, 0x7159bb2c), TOBN(0xf7dfbebe, 0x758b357b), + TOBN(0x2b057e74, 0xd69fea72), TOBN(0xd485717a, 0x92731745),} + , + {TOBN(0x896c42e8, 0xee36860c), TOBN(0xdaf04dfd, 0x4113c22d), + TOBN(0x1adbb7b7, 0x44104213), TOBN(0xe5fd5fa1, 0x1fd394ea), + TOBN(0x68235d94, 0x1a4e0551), TOBN(0x6772cfbe, 0x18d10151), + TOBN(0x276071e3, 0x09984523), TOBN(0xe4e879de, 0x5a56ba98), + TOBN(0xaaafafb0, 0x285b9491), TOBN(0x01a0be88, 0x1e4c705e), + TOBN(0xff1d4f5d, 0x2ad9caab), TOBN(0x6e349a4a, 0xc37a233f), + TOBN(0xcf1c1246, 0x4a1c6a16), TOBN(0xd99e6b66, 0x29383260), + TOBN(0xea3d4366, 0x5f6d5471), TOBN(0x36974d04, 0xff8cc89b), + TOBN(0xc26c49a1, 0xcfe89d80), TOBN(0xb42c026d, 0xda9c8371), + TOBN(0xca6c013a, 0xdad066d2), TOBN(0xfb8f7228, 0x56a4f3ee), + TOBN(0x08b579ec, 0xd850935b), TOBN(0x34c1a74c, 0xd631e1b3), + TOBN(0xcb5fe596, 0xac198534), TOBN(0x39ff21f6, 0xe1f24f25), + TOBN(0x27f29e14, 0x8f929057), TOBN(0x7a64ae06, 0xc0c853df), + TOBN(0x256cd183, 0x58e9c5ce), TOBN(0x9d9cce82, 0xded092a5), + TOBN(0xcc6e5979, 0x6e93b7c7), TOBN(0xe1e47092, 0x31bb9e27), + TOBN(0xb70b3083, 0xaa9e29a0), TOBN(0xbf181a75, 0x3785e644), + TOBN(0xf53f2c65, 0x8ead09f7), TOBN(0x1335e1d5, 0x9780d14d), + TOBN(0x69cc20e0, 0xcd1b66bc), TOBN(0x9b670a37, 0xbbe0bfc8), + TOBN(0xce53dc81, 0x28efbeed), TOBN(0x0c74e77c, 0x8326a6e5), + TOBN(0x3604e0d2, 0xb88e9a63), TOBN(0xbab38fca, 0x13dc2248), + TOBN(0x8ed6e8c8, 0x5c0a3f1e), TOBN(0xbcad2492, 0x7c87c37f), + TOBN(0xfdfb62bb, 0x9ee3b78d), TOBN(0xeba8e477, 0xcbceba46), + TOBN(0x37d38cb0, 0xeeaede4b), TOBN(0x0bc498e8, 0x7976deb6), + TOBN(0xb2944c04, 0x6b6147fb), TOBN(0x8b123f35, 0xf71f9609), + TOBN(0xa155dcc7, 0xde79dc24), TOBN(0xf1168a32, 0x558f69cd), + TOBN(0xbac21595, 0x0d1850df), TOBN(0x15c8295b, 0xb204c848), + TOBN(0xf661aa36, 0x7d8184ff), TOBN(0xc396228e, 0x30447bdb), + TOBN(0x11cd5143, 0xbde4a59e), TOBN(0xe3a26e3b, 0x6beab5e6), + TOBN(0xd3b3a13f, 0x1402b9d0), TOBN(0x573441c3, 0x2c7bc863), + TOBN(0x4b301ec4, 0x578c3e6e), TOBN(0xc26fc9c4, 0x0adaf57e), + TOBN(0x96e71bfd, 0x7493cea3), TOBN(0xd05d4b3f, 0x1af81456), + TOBN(0xdaca2a8a, 0x6a8c608f), TOBN(0x53ef07f6, 0x0725b276), + TOBN(0x07a5fbd2, 0x7824fc56), TOBN(0x34675218, 0x13289077), + TOBN(0x5bf69fd5, 0xe0c48349), TOBN(0xa613ddd3, 0xb6aa7875), + TOBN(0x7f78c19c, 0x5450d866), TOBN(0x46f4409c, 0x8f84a481), + TOBN(0x9f1d1928, 0x90fce239), TOBN(0x016c4168, 0xb2ce44b9), + TOBN(0xbae023f0, 0xc7435978), TOBN(0xb152c888, 0x20e30e19), + TOBN(0x9c241645, 0xe3fa6faf), TOBN(0x735d95c1, 0x84823e60), + TOBN(0x03197573, 0x03955317), TOBN(0x0b4b02a9, 0xf03b4995), + TOBN(0x076bf559, 0x70274600), TOBN(0x32c5cc53, 0xaaf57508), + TOBN(0xe8af6d1f, 0x60624129), TOBN(0xb7bc5d64, 0x9a5e2b5e), + TOBN(0x3814b048, 0x5f082d72), TOBN(0x76f267f2, 0xce19677a), + TOBN(0x626c630f, 0xb36eed93), TOBN(0x55230cd7, 0x3bf56803), + TOBN(0x78837949, 0xce2736a0), TOBN(0x0d792d60, 0xaa6c55f1), + TOBN(0x0318dbfd, 0xd5c7c5d2), TOBN(0xb38f8da7, 0x072b342d), + TOBN(0x3569bddc, 0x7b8de38a), TOBN(0xf25b5887, 0xa1c94842), + TOBN(0xb2d5b284, 0x2946ad60), TOBN(0x854f29ad, 0xe9d1707e), + TOBN(0xaa5159dc, 0x2c6a4509), TOBN(0x899f94c0, 0x57189837), + TOBN(0xcf6adc51, 0xf4a55b03), TOBN(0x261762de, 0x35e3b2d5), + TOBN(0x4cc43012, 0x04827b51), TOBN(0xcd22a113, 0xc6021442), + TOBN(0xce2fd61a, 0x247c9569), TOBN(0x59a50973, 0xd152beca), + TOBN(0x6c835a11, 0x63a716d4), TOBN(0xc26455ed, 0x187dedcf), + TOBN(0x27f536e0, 0x49ce89e7), TOBN(0x18908539, 0xcc890cb5), + TOBN(0x308909ab, 0xd83c2aa1), TOBN(0xecd3142b, 0x1ab73bd3), + TOBN(0x6a85bf59, 0xb3f5ab84), TOBN(0x3c320a68, 0xf2bea4c6), + TOBN(0xad8dc538, 0x6da4541f), TOBN(0xeaf34eb0, 0xb7c41186), + TOBN(0x1c780129, 0x977c97c4), TOBN(0x5ff9beeb, 0xc57eb9fa), + TOBN(0xa24d0524, 0xc822c478), TOBN(0xfd8eec2a, 0x461cd415), + TOBN(0xfbde194e, 0xf027458c), TOBN(0xb4ff5319, 0x1d1be115), + TOBN(0x63f874d9, 0x4866d6f4), TOBN(0x35c75015, 0xb21ad0c9), + TOBN(0xa6b5c9d6, 0x46ac49d2), TOBN(0x42c77c0b, 0x83137aa9), + TOBN(0x24d000fc, 0x68225a38), TOBN(0x0f63cfc8, 0x2fe1e907), + TOBN(0x22d1b01b, 0xc6441f95), TOBN(0x7d38f719, 0xec8e448f), + TOBN(0x9b33fa5f, 0x787fb1ba), TOBN(0x94dcfda1, 0x190158df), + TOBN(0xc47cb339, 0x5f6d4a09), TOBN(0x6b4f355c, 0xee52b826), + TOBN(0x3d100f5d, 0xf51b930a), TOBN(0xf4512fac, 0x9f668f69), + TOBN(0x546781d5, 0x206c4c74), TOBN(0xd021d4d4, 0xcb4d2e48), + TOBN(0x494a54c2, 0xca085c2d), TOBN(0xf1dbaca4, 0x520850a8), + TOBN(0x63c79326, 0x490a1aca), TOBN(0xcb64dd9c, 0x41526b02), + TOBN(0xbb772591, 0xa2979258), TOBN(0x3f582970, 0x48d97846), + TOBN(0xd66b70d1, 0x7c213ba7), TOBN(0xc28febb5, 0xe8a0ced4), + TOBN(0x6b911831, 0xc10338c1), TOBN(0x0d54e389, 0xbf0126f3), + TOBN(0x7048d460, 0x4af206ee), TOBN(0x786c88f6, 0x77e97cb9), + TOBN(0xd4375ae1, 0xac64802e), TOBN(0x469bcfe1, 0xd53ec11c), + TOBN(0xfc9b340d, 0x47062230), TOBN(0xe743bb57, 0xc5b4a3ac), + TOBN(0xfe00b4aa, 0x59ef45ac), TOBN(0x29a4ef23, 0x59edf188), + TOBN(0x40242efe, 0xb483689b), TOBN(0x2575d3f6, 0x513ac262), + TOBN(0xf30037c8, 0x0ca6db72), TOBN(0xc9fcce82, 0x98864be2), + TOBN(0x84a112ff, 0x0149362d), TOBN(0x95e57582, 0x1c4ae971), + TOBN(0x1fa4b1a8, 0x945cf86c), TOBN(0x4525a734, 0x0b024a2f), + TOBN(0xe76c8b62, 0x8f338360), TOBN(0x483ff593, 0x28edf32b), + TOBN(0x67e8e90a, 0x298b1aec), TOBN(0x9caab338, 0x736d9a21), + TOBN(0x5c09d2fd, 0x66892709), TOBN(0x2496b4dc, 0xb55a1d41), + TOBN(0x93f5fb1a, 0xe24a4394), TOBN(0x08c75049, 0x6fa8f6c1), + TOBN(0xcaead1c2, 0xc905d85f), TOBN(0xe9d7f790, 0x0733ae57), + TOBN(0x24c9a65c, 0xf07cdd94), TOBN(0x7389359c, 0xa4b55931), + TOBN(0xf58709b7, 0x367e45f7), TOBN(0x1f203067, 0xcb7e7adc), + TOBN(0x82444bff, 0xc7b72818), TOBN(0x07303b35, 0xbaac8033), + TOBN(0x1e1ee4e4, 0xd13b7ea1), TOBN(0xe6489b24, 0xe0e74180), + TOBN(0xa5f2c610, 0x7e70ef70), TOBN(0xa1655412, 0xbdd10894), + TOBN(0x555ebefb, 0x7af4194e), TOBN(0x533c1c3c, 0x8e89bd9c), + TOBN(0x735b9b57, 0x89895856), TOBN(0x15fb3cd2, 0x567f5c15), + TOBN(0x057fed45, 0x526f09fd), TOBN(0xe8a4f10c, 0x8128240a), + TOBN(0x9332efc4, 0xff2bfd8d), TOBN(0x214e77a0, 0xbd35aa31), + TOBN(0x32896d73, 0x14faa40e), TOBN(0x767867ec, 0x01e5f186), + TOBN(0xc9adf8f1, 0x17a1813e), TOBN(0xcb6cda78, 0x54741795), + TOBN(0xb7521b6d, 0x349d51aa), TOBN(0xf56b5a9e, 0xe3c7b8e9), + TOBN(0xc6f1e5c9, 0x32a096df), TOBN(0x083667c4, 0xa3635024), + TOBN(0x365ea135, 0x18087f2f), TOBN(0xf1b8eaac, 0xd136e45d), + TOBN(0xc8a0e484, 0x73aec989), TOBN(0xd75a324b, 0x142c9259), + TOBN(0xb7b4d001, 0x01dae185), TOBN(0x45434e0b, 0x9b7a94bc), + TOBN(0xf54339af, 0xfbd8cb0b), TOBN(0xdcc4569e, 0xe98ef49e), + TOBN(0x7789318a, 0x09a51299), TOBN(0x81b4d206, 0xb2b025d8), + TOBN(0xf64aa418, 0xfae85792), TOBN(0x3e50258f, 0xacd7baf7), + TOBN(0xdce84cdb, 0x2996864b), TOBN(0xa2e67089, 0x1f485fa4), + TOBN(0xb28b2bb6, 0x534c6a5a), TOBN(0x31a7ec6b, 0xc94b9d39), + TOBN(0x1d217766, 0xd6bc20da), TOBN(0x4acdb5ec, 0x86761190), + TOBN(0x68726328, 0x73701063), TOBN(0x4d24ee7c, 0x2128c29b), + TOBN(0xc072ebd3, 0xa19fd868), TOBN(0x612e481c, 0xdb8ddd3b), + TOBN(0xb4e1d754, 0x1a64d852), TOBN(0x00ef95ac, 0xc4c6c4ab), + TOBN(0x1536d2ed, 0xaa0a6c46), TOBN(0x61294086, 0x43774790), + TOBN(0x54af25e8, 0x343fda10), TOBN(0x9ff9d98d, 0xfd25d6f2), + TOBN(0x0746af7c, 0x468b8835), TOBN(0x977a31cb, 0x730ecea7), + TOBN(0xa5096b80, 0xc2cf4a81), TOBN(0xaa986833, 0x6458c37a), + TOBN(0x6af29bf3, 0xa6bd9d34), TOBN(0x6a62fe9b, 0x33c5d854), + TOBN(0x50e6c304, 0xb7133b5e), TOBN(0x04b60159, 0x7d6e6848), + TOBN(0x4cd296df, 0x5579bea4), TOBN(0x10e35ac8, 0x5ceedaf1), + TOBN(0x04c4c5fd, 0xe3bcc5b1), TOBN(0x95f9ee8a, 0x89412cf9), + TOBN(0x2c9459ee, 0x82b6eb0f), TOBN(0x2e845765, 0x95c2aadd), + TOBN(0x774a84ae, 0xd327fcfe), TOBN(0xd8c93722, 0x0368d476), + TOBN(0x0dbd5748, 0xf83e8a3b), TOBN(0xa579aa96, 0x8d2495f3), + TOBN(0x535996a0, 0xae496e9b), TOBN(0x07afbfe9, 0xb7f9bcc2), + TOBN(0x3ac1dc6d, 0x5b7bd293), TOBN(0x3b592cff, 0x7022323d), + TOBN(0xba0deb98, 0x9c0a3e76), TOBN(0x18e78e9f, 0x4b197acb), + TOBN(0x211cde10, 0x296c36ef), TOBN(0x7ee89672, 0x82c4da77), + TOBN(0xb617d270, 0xa57836da), TOBN(0xf0cd9c31, 0x9cb7560b), + TOBN(0x01fdcbf7, 0xe455fe90), TOBN(0x3fb53cbb, 0x7e7334f3), + TOBN(0x781e2ea4, 0x4e7de4ec), TOBN(0x8adab3ad, 0x0b384fd0), + TOBN(0x129eee2f, 0x53d64829), TOBN(0x7a471e17, 0xa261492b), + TOBN(0xe4f9adb9, 0xe4cb4a2c), TOBN(0x3d359f6f, 0x97ba2c2d), + TOBN(0x346c6786, 0x0aacd697), TOBN(0x92b444c3, 0x75c2f8a8), + TOBN(0xc79fa117, 0xd85df44e), TOBN(0x56782372, 0x398ddf31), + TOBN(0x60e690f2, 0xbbbab3b8), TOBN(0x4851f8ae, 0x8b04816b), + TOBN(0xc72046ab, 0x9c92e4d2), TOBN(0x518c74a1, 0x7cf3136b), + TOBN(0xff4eb50a, 0xf9877d4c), TOBN(0x14578d90, 0xa919cabb), + TOBN(0x8218f8c4, 0xac5eb2b6), TOBN(0xa3ccc547, 0x542016e4), + TOBN(0x025bf48e, 0x327f8349), TOBN(0xf3e97346, 0xf43cb641), + TOBN(0xdc2bafdf, 0x500f1085), TOBN(0x57167876, 0x2f063055), + TOBN(0x5bd914b9, 0x411925a6), TOBN(0x7c078d48, 0xa1123de5), + TOBN(0xee6bf835, 0x182b165d), TOBN(0xb11b5e5b, 0xba519727), + TOBN(0xe33ea76c, 0x1eea7b85), TOBN(0x2352b461, 0x92d4f85e), + TOBN(0xf101d334, 0xafe115bb), TOBN(0xfabc1294, 0x889175a3), + TOBN(0x7f6bcdc0, 0x5233f925), TOBN(0xe0a802db, 0xe77fec55), + TOBN(0xbdb47b75, 0x8069b659), TOBN(0x1c5e12de, 0xf98fbd74), + TOBN(0x869c58c6, 0x4b8457ee), TOBN(0xa5360f69, 0x4f7ea9f7), + TOBN(0xe576c09f, 0xf460b38f), TOBN(0x6b70d548, 0x22b7fb36), + TOBN(0x3fd237f1, 0x3bfae315), TOBN(0x33797852, 0xcbdff369), + TOBN(0x97df25f5, 0x25b516f9), TOBN(0x46f388f2, 0xba38ad2d), + TOBN(0x656c4658, 0x89d8ddbb), TOBN(0x8830b26e, 0x70f38ee8), + TOBN(0x4320fd5c, 0xde1212b0), TOBN(0xc34f30cf, 0xe4a2edb2), + TOBN(0xabb131a3, 0x56ab64b8), TOBN(0x7f77f0cc, 0xd99c5d26), + TOBN(0x66856a37, 0xbf981d94), TOBN(0x19e76d09, 0x738bd76e), + TOBN(0xe76c8ac3, 0x96238f39), TOBN(0xc0a482be, 0xa830b366), + TOBN(0xb7b8eaff, 0x0b4eb499), TOBN(0x8ecd83bc, 0x4bfb4865), + TOBN(0x971b2cb7, 0xa2f3776f), TOBN(0xb42176a4, 0xf4b88adf), + TOBN(0xb9617df5, 0xbe1fa446), TOBN(0x8b32d508, 0xcd031bd2), + TOBN(0x1c6bd47d, 0x53b618c0), TOBN(0xc424f46c, 0x6a227923), + TOBN(0x7303ffde, 0xdd92d964), TOBN(0xe9712878, 0x71b5abf2), + TOBN(0x8f48a632, 0xf815561d), TOBN(0x85f48ff5, 0xd3c055d1), + TOBN(0x222a1427, 0x7525684f), TOBN(0xd0d841a0, 0x67360cc3), + TOBN(0x4245a926, 0x0b9267c6), TOBN(0xc78913f1, 0xcf07f863), + TOBN(0xaa844c8e, 0x4d0d9e24), TOBN(0xa42ad522, 0x3d5f9017), + TOBN(0xbd371749, 0xa2c989d5), TOBN(0x928292df, 0xe1f5e78e), + TOBN(0x493b383e, 0x0a1ea6da), TOBN(0x5136fd8d, 0x13aee529), + TOBN(0x860c44b1, 0xf2c34a99), TOBN(0x3b00aca4, 0xbf5855ac), + TOBN(0xabf6aaa0, 0xfaaf37be), TOBN(0x65f43682, 0x2a53ec08), + TOBN(0x1d9a5801, 0xa11b12e1), TOBN(0x78a7ab2c, 0xe20ed475), + TOBN(0x0de1067e, 0x9a41e0d5), TOBN(0x30473f5f, 0x305023ea), + TOBN(0xdd3ae09d, 0x169c7d97), TOBN(0x5cd5baa4, 0xcfaef9cd), + TOBN(0x5cd7440b, 0x65a44803), TOBN(0xdc13966a, 0x47f364de), + TOBN(0x077b2be8, 0x2b8357c1), TOBN(0x0cb1b4c5, 0xe9d57c2a), + TOBN(0x7a4ceb32, 0x05ff363e), TOBN(0xf310fa4d, 0xca35a9ef), + TOBN(0xdbb7b352, 0xf97f68c6), TOBN(0x0c773b50, 0x0b02cf58), + TOBN(0xea2e4821, 0x3c1f96d9), TOBN(0xffb357b0, 0xeee01815), + TOBN(0xb9c924cd, 0xe0f28039), TOBN(0x0b36c95a, 0x46a3fbe4), + TOBN(0x1faaaea4, 0x5e46db6c), TOBN(0xcae575c3, 0x1928aaff), + TOBN(0x7f671302, 0xa70dab86), TOBN(0xfcbd12a9, 0x71c58cfc), + TOBN(0xcbef9acf, 0xbee0cb92), TOBN(0x573da0b9, 0xf8c1b583), + TOBN(0x4752fcfe, 0x0d41d550), TOBN(0xe7eec0e3, 0x2155cffe), + TOBN(0x0fc39fcb, 0x545ae248), TOBN(0x522cb8d1, 0x8065f44e), + TOBN(0x263c962a, 0x70cbb96c), TOBN(0xe034362a, 0xbcd124a9), + TOBN(0xf120db28, 0x3c2ae58d), TOBN(0xb9a38d49, 0xfef6d507), + TOBN(0xb1fd2a82, 0x1ff140fd), TOBN(0xbd162f30, 0x20aee7e0), + TOBN(0x4e17a5d4, 0xcb251949), TOBN(0x2aebcb83, 0x4f7e1c3d), + TOBN(0x608eb25f, 0x937b0527), TOBN(0xf42e1e47, 0xeb7d9997), + TOBN(0xeba699c4, 0xb8a53a29), TOBN(0x1f921c71, 0xe091b536), + TOBN(0xcce29e7b, 0x5b26bbd5), TOBN(0x7a8ef5ed, 0x3b61a680), + TOBN(0xe5ef8043, 0xba1f1c7e), TOBN(0x16ea8217, 0x18158dda), + TOBN(0x01778a2b, 0x599ff0f9), TOBN(0x68a923d7, 0x8104fc6b), + TOBN(0x5bfa44df, 0xda694ff3), TOBN(0x4f7199db, 0xf7667f12), + TOBN(0xc06d8ff6, 0xe46f2a79), TOBN(0x08b5dead, 0xe9f8131d), + TOBN(0x02519a59, 0xabb4ce7c), TOBN(0xc4f710bc, 0xb42aec3e), + TOBN(0x3d77b057, 0x78bde41a), TOBN(0x6474bf80, 0xb4186b5a), + TOBN(0x048b3f67, 0x88c65741), TOBN(0xc64519de, 0x03c7c154), + TOBN(0xdf073846, 0x0edfcc4f), TOBN(0x319aa737, 0x48f1aa6b), + TOBN(0x8b9f8a02, 0xca909f77), TOBN(0x90258139, 0x7580bfef), + TOBN(0xd8bfd3ca, 0xc0c22719), TOBN(0xc60209e4, 0xc9ca151e), + TOBN(0x7a744ab5, 0xd9a1a69c), TOBN(0x6de5048b, 0x14937f8f), + TOBN(0x171938d8, 0xe115ac04), TOBN(0x7df70940, 0x1c6b16d2), + TOBN(0xa6aeb663, 0x7f8e94e7), TOBN(0xc130388e, 0x2a2cf094), + TOBN(0x1850be84, 0x77f54e6e), TOBN(0x9f258a72, 0x65d60fe5), + TOBN(0xff7ff0c0, 0x6c9146d6), TOBN(0x039aaf90, 0xe63a830b), + TOBN(0x38f27a73, 0x9460342f), TOBN(0x4703148c, 0x3f795f8a), + TOBN(0x1bb5467b, 0x9681a97e), TOBN(0x00931ba5, 0xecaeb594), + TOBN(0xcdb6719d, 0x786f337c), TOBN(0xd9c01cd2, 0xe704397d), + TOBN(0x0f4a3f20, 0x555c2fef), TOBN(0x00452509, 0x7c0af223), + TOBN(0x54a58047, 0x84db8e76), TOBN(0x3bacf1aa, 0x93c8aa06), + TOBN(0x11ca957c, 0xf7919422), TOBN(0x50641053, 0x78cdaa40), + TOBN(0x7a303874, 0x9f7144ae), TOBN(0x170c963f, 0x43d4acfd), + TOBN(0x5e148149, 0x58ddd3ef), TOBN(0xa7bde582, 0x9e72dba8), + TOBN(0x0769da8b, 0x6fa68750), TOBN(0xfa64e532, 0x572e0249), + TOBN(0xfcaadf9d, 0x2619ad31), TOBN(0x87882daa, 0xa7b349cd), + TOBN(0x9f6eb731, 0x6c67a775), TOBN(0xcb10471a, 0xefc5d0b1), + TOBN(0xb433750c, 0xe1b806b2), TOBN(0x19c5714d, 0x57b1ae7e), + TOBN(0xc0dc8b7b, 0xed03fd3f), TOBN(0xdd03344f, 0x31bc194e), + TOBN(0xa66c52a7, 0x8c6320b5), TOBN(0x8bc82ce3, 0xd0b6fd93), + TOBN(0xf8e13501, 0xb35f1341), TOBN(0xe53156dd, 0x25a43e42), + TOBN(0xd3adf27e, 0x4daeb85c), TOBN(0xb81d8379, 0xbbeddeb5), + TOBN(0x1b0b546e, 0x2e435867), TOBN(0x9020eb94, 0xeba5dd60), + TOBN(0x37d91161, 0x8210cb9d), TOBN(0x4c596b31, 0x5c91f1cf), + TOBN(0xb228a90f, 0x0e0b040d), TOBN(0xbaf02d82, 0x45ff897f), + TOBN(0x2aac79e6, 0x00fa6122), TOBN(0x24828817, 0x8e36f557), + TOBN(0xb9521d31, 0x113ec356), TOBN(0x9e48861e, 0x15eff1f8), + TOBN(0x2aa1d412, 0xe0d41715), TOBN(0x71f86203, 0x53f131b8), + TOBN(0xf60da8da, 0x3fd19408), TOBN(0x4aa716dc, 0x278d9d99), + TOBN(0x394531f7, 0xa8c51c90), TOBN(0xb560b0e8, 0xf59db51c), + TOBN(0xa28fc992, 0xfa34bdad), TOBN(0xf024fa14, 0x9cd4f8bd), + TOBN(0x5cf530f7, 0x23a9d0d3), TOBN(0x615ca193, 0xe28c9b56), + TOBN(0x6d2a483d, 0x6f73c51e), TOBN(0xa4cb2412, 0xea0dc2dd), + TOBN(0x50663c41, 0x1eb917ff), TOBN(0x3d3a74cf, 0xeade299e), + TOBN(0x29b3990f, 0x4a7a9202), TOBN(0xa9bccf59, 0xa7b15c3d), + TOBN(0x66a3ccdc, 0xa5df9208), TOBN(0x48027c14, 0x43f2f929), + TOBN(0xd385377c, 0x40b557f0), TOBN(0xe001c366, 0xcd684660), + TOBN(0x1b18ed6b, 0xe2183a27), TOBN(0x879738d8, 0x63210329), + TOBN(0xa687c74b, 0xbda94882), TOBN(0xd1bbcc48, 0xa684b299), + TOBN(0xaf6f1112, 0x863b3724), TOBN(0x6943d1b4, 0x2c8ce9f8), + TOBN(0xe044a3bb, 0x098cafb4), TOBN(0x27ed2310, 0x60d48caf), + TOBN(0x542b5675, 0x3a31b84d), TOBN(0xcbf3dd50, 0xfcddbed7), + TOBN(0x25031f16, 0x41b1d830), TOBN(0xa7ec851d, 0xcb0c1e27), + TOBN(0xac1c8fe0, 0xb5ae75db), TOBN(0xb24c7557, 0x08c52120), + TOBN(0x57f811dc, 0x1d4636c3), TOBN(0xf8436526, 0x681a9939), + TOBN(0x1f6bc6d9, 0x9c81adb3), TOBN(0x840f8ac3, 0x5b7d80d4), + TOBN(0x731a9811, 0xf4387f1a), TOBN(0x7c501cd3, 0xb5156880), + TOBN(0xa5ca4a07, 0xdfe68867), TOBN(0xf123d8f0, 0x5fcea120), + TOBN(0x1fbb0e71, 0xd607039e), TOBN(0x2b70e215, 0xcd3a4546), + TOBN(0x32d2f01d, 0x53324091), TOBN(0xb796ff08, 0x180ab19b), + TOBN(0x32d87a86, 0x3c57c4aa), TOBN(0x2aed9caf, 0xb7c49a27), + TOBN(0x9fb35eac, 0x31630d98), TOBN(0x338e8cdf, 0x5c3e20a3), + TOBN(0x80f16182, 0x66cde8db), TOBN(0x4e159980, 0x2d72fd36), + TOBN(0xd7b8f13b, 0x9b6e5072), TOBN(0xf5213907, 0x3b7b5dc1), + TOBN(0x4d431f1d, 0x8ce4396e), TOBN(0x37a1a680, 0xa7ed2142), + TOBN(0xbf375696, 0xd01aaf6b), TOBN(0xaa1c0c54, 0xe63aab66), + TOBN(0x3014368b, 0x4ed80940), TOBN(0x67e6d056, 0x7a6fcedd), + TOBN(0x7c208c49, 0xca97579f), TOBN(0xfe3d7a81, 0xa23597f6), + TOBN(0x5e203202, 0x7e096ae2), TOBN(0xb1f3e1e7, 0x24b39366), + TOBN(0x26da26f3, 0x2fdcdffc), TOBN(0x79422f1d, 0x6097be83),} + , + {TOBN(0x263a2cfb, 0x9db3b381), TOBN(0x9c3a2dee, 0xd4df0a4b), + TOBN(0x728d06e9, 0x7d04e61f), TOBN(0x8b1adfbc, 0x42449325), + TOBN(0x6ec1d939, 0x7e053a1b), TOBN(0xee2be5c7, 0x66daf707), + TOBN(0x80ba1e14, 0x810ac7ab), TOBN(0xdd2ae778, 0xf530f174), + TOBN(0x0435d97a, 0x205b9d8b), TOBN(0x6eb8f064, 0x056756d4), + TOBN(0xd5e88a8b, 0xb6f8210e), TOBN(0x070ef12d, 0xec9fd9ea), + TOBN(0x4d849505, 0x3bcc876a), TOBN(0x12a75338, 0xa7404ce3), + TOBN(0xd22b49e1, 0xb8a1db5e), TOBN(0xec1f2051, 0x14bfa5ad), + TOBN(0xadbaeb79, 0xb6828f36), TOBN(0x9d7a0258, 0x01bd5b9e), + TOBN(0xeda01e0d, 0x1e844b0c), TOBN(0x4b625175, 0x887edfc9), + TOBN(0x14109fdd, 0x9669b621), TOBN(0x88a2ca56, 0xf6f87b98), + TOBN(0xfe2eb788, 0x170df6bc), TOBN(0x0cea06f4, 0xffa473f9), + TOBN(0x43ed81b5, 0xc4e83d33), TOBN(0xd9f35879, 0x5efd488b), + TOBN(0x164a620f, 0x9deb4d0f), TOBN(0xc6927bdb, 0xac6a7394), + TOBN(0x45c28df7, 0x9f9e0f03), TOBN(0x2868661e, 0xfcd7e1a9), + TOBN(0x7cf4e8d0, 0xffa348f1), TOBN(0x6bd4c284, 0x398538e0), + TOBN(0x2618a091, 0x289a8619), TOBN(0xef796e60, 0x6671b173), + TOBN(0x664e46e5, 0x9090c632), TOBN(0xa38062d4, 0x1e66f8fb), + TOBN(0x6c744a20, 0x0573274e), TOBN(0xd07b67e4, 0xa9271394), + TOBN(0x391223b2, 0x6bdc0e20), TOBN(0xbe2d93f1, 0xeb0a05a7), + TOBN(0xf23e2e53, 0x3f36d141), TOBN(0xe84bb3d4, 0x4dfca442), + TOBN(0xb804a48d, 0x6b7c023a), TOBN(0x1e16a8fa, 0x76431c3b), + TOBN(0x1b5452ad, 0xddd472e0), TOBN(0x7d405ee7, 0x0d1ee127), + TOBN(0x50fc6f1d, 0xffa27599), TOBN(0x351ac53c, 0xbf391b35), + TOBN(0x7efa14b8, 0x4444896b), TOBN(0x64974d2f, 0xf94027fb), + TOBN(0xefdcd0e8, 0xde84487d), TOBN(0x8c45b260, 0x2b48989b), + TOBN(0xa8fcbbc2, 0xd8463487), TOBN(0xd1b2b3f7, 0x3fbc476c), + TOBN(0x21d005b7, 0xc8f443c0), TOBN(0x518f2e67, 0x40c0139c), + TOBN(0x56036e8c, 0x06d75fc1), TOBN(0x2dcf7bb7, 0x3249a89f), + TOBN(0x81dd1d3d, 0xe245e7dd), TOBN(0xf578dc4b, 0xebd6e2a7), + TOBN(0x4c028903, 0xdf2ce7a0), TOBN(0xaee36288, 0x9c39afac), + TOBN(0xdc847c31, 0x146404ab), TOBN(0x6304c0d8, 0xa4e97818), + TOBN(0xae51dca2, 0xa91f6791), TOBN(0x2abe4190, 0x9baa9efc), + TOBN(0xd9d2e2f4, 0x559c7ac1), TOBN(0xe82f4b51, 0xfc9f773a), + TOBN(0xa7713027, 0x4073e81c), TOBN(0xc0276fac, 0xfbb596fc), + TOBN(0x1d819fc9, 0xa684f70c), TOBN(0x29b47fdd, 0xc9f7b1e0), + TOBN(0x358de103, 0x459b1940), TOBN(0xec881c59, 0x5b013e93), + TOBN(0x51574c93, 0x49532ad3), TOBN(0x2db1d445, 0xb37b46de), + TOBN(0xc6445b87, 0xdf239fd8), TOBN(0xc718af75, 0x151d24ee), + TOBN(0xaea1c4a4, 0xf43c6259), TOBN(0x40c0e5d7, 0x70be02f7), + TOBN(0x6a4590f4, 0x721b33f2), TOBN(0x2124f1fb, 0xfedf04ea), + TOBN(0xf8e53cde, 0x9745efe7), TOBN(0xe7e10432, 0x65f046d9), + TOBN(0xc3fca28e, 0xe4d0c7e6), TOBN(0x847e339a, 0x87253b1b), + TOBN(0x9b595348, 0x3743e643), TOBN(0xcb6a0a0b, 0x4fd12fc5), + TOBN(0xfb6836c3, 0x27d02dcc), TOBN(0x5ad00982, 0x7a68bcc2), + TOBN(0x1b24b44c, 0x005e912d), TOBN(0xcc83d20f, 0x811fdcfe), + TOBN(0x36527ec1, 0x666fba0c), TOBN(0x69948197, 0x14754635), + TOBN(0xfcdcb1a8, 0x556da9c2), TOBN(0xa5934267, 0x81a732b2), + TOBN(0xec1214ed, 0xa714181d), TOBN(0x609ac13b, 0x6067b341), + TOBN(0xff4b4c97, 0xa545df1f), TOBN(0xa1240501, 0x34d2076b), + TOBN(0x6efa0c23, 0x1409ca97), TOBN(0x254cc1a8, 0x20638c43), + TOBN(0xd4e363af, 0xdcfb46cd), TOBN(0x62c2adc3, 0x03942a27), + TOBN(0xc67b9df0, 0x56e46483), TOBN(0xa55abb20, 0x63736356), + TOBN(0xab93c098, 0xc551bc52), TOBN(0x382b49f9, 0xb15fe64b), + TOBN(0x9ec221ad, 0x4dff8d47), TOBN(0x79caf615, 0x437df4d6), + TOBN(0x5f13dc64, 0xbb456509), TOBN(0xe4c589d9, 0x191f0714), + TOBN(0x27b6a8ab, 0x3fd40e09), TOBN(0xe455842e, 0x77313ea9), + TOBN(0x8b51d1e2, 0x1f55988b), TOBN(0x5716dd73, 0x062bbbfc), + TOBN(0x633c11e5, 0x4e8bf3de), TOBN(0x9a0e77b6, 0x1b85be3b), + TOBN(0x56510729, 0x0911cca6), TOBN(0x27e76495, 0xefa6590f), + TOBN(0xe4ac8b33, 0x070d3aab), TOBN(0x2643672b, 0x9a2cd5e5), + TOBN(0x52eff79b, 0x1cfc9173), TOBN(0x665ca49b, 0x90a7c13f), + TOBN(0x5a8dda59, 0xb3efb998), TOBN(0x8a5b922d, 0x052f1341), + TOBN(0xae9ebbab, 0x3cf9a530), TOBN(0x35986e7b, 0xf56da4d7), + TOBN(0x3a636b5c, 0xff3513cc), TOBN(0xbb0cf8ba, 0x3198f7dd), + TOBN(0xb8d40522, 0x41f16f86), TOBN(0x760575d8, 0xde13a7bf), + TOBN(0x36f74e16, 0x9f7aa181), TOBN(0x163a3ecf, 0xf509ed1c), + TOBN(0x6aead61f, 0x3c40a491), TOBN(0x158c95fc, 0xdfe8fcaa), + TOBN(0xa3991b6e, 0x13cda46f), TOBN(0x79482415, 0x342faed0), + TOBN(0xf3ba5bde, 0x666b5970), TOBN(0x1d52e6bc, 0xb26ab6dd), + TOBN(0x768ba1e7, 0x8608dd3d), TOBN(0x4930db2a, 0xea076586), + TOBN(0xd9575714, 0xe7dc1afa), TOBN(0x1fc7bf7d, 0xf7c58817), + TOBN(0x6b47accd, 0xd9eee96c), TOBN(0x0ca277fb, 0xe58cec37), + TOBN(0x113fe413, 0xe702c42a), TOBN(0xdd1764ee, 0xc47cbe51), + TOBN(0x041e7cde, 0x7b3ed739), TOBN(0x50cb7459, 0x5ce9e1c0), + TOBN(0x35568513, 0x2925b212), TOBN(0x7cff95c4, 0x001b081c), + TOBN(0x63ee4cbd, 0x8088b454), TOBN(0xdb7f32f7, 0x9a9e0c8a), + TOBN(0xb377d418, 0x6b2447cb), TOBN(0xe3e982aa, 0xd370219b), + TOBN(0x06ccc1e4, 0xc2a2a593), TOBN(0x72c36865, 0x0773f24f), + TOBN(0xa13b4da7, 0x95859423), TOBN(0x8bbf1d33, 0x75040c8f), + TOBN(0x726f0973, 0xda50c991), TOBN(0x48afcd5b, 0x822d6ee2), + TOBN(0xe5fc718b, 0x20fd7771), TOBN(0xb9e8e77d, 0xfd0807a1), + TOBN(0x7f5e0f44, 0x99a7703d), TOBN(0x6972930e, 0x618e36f3), + TOBN(0x2b7c77b8, 0x23807bbe), TOBN(0xe5b82405, 0xcb27ff50), + TOBN(0xba8b8be3, 0xbd379062), TOBN(0xd64b7a1d, 0x2dce4a92), + TOBN(0x040a73c5, 0xb2952e37), TOBN(0x0a9e252e, 0xd438aeca), + TOBN(0xdd43956b, 0xc39d3bcb), TOBN(0x1a31ca00, 0xb32b2d63), + TOBN(0xd67133b8, 0x5c417a18), TOBN(0xd08e4790, 0x2ef442c8), + TOBN(0x98cb1ae9, 0x255c0980), TOBN(0x4bd86381, 0x2b4a739f), + TOBN(0x5a5c31e1, 0x1e4a45a1), TOBN(0x1e5d55fe, 0x9cb0db2f), + TOBN(0x74661b06, 0x8ff5cc29), TOBN(0x026b389f, 0x0eb8a4f4), + TOBN(0x536b21a4, 0x58848c24), TOBN(0x2e5bf8ec, 0x81dc72b0), + TOBN(0x03c187d0, 0xad886aac), TOBN(0x5c16878a, 0xb771b645), + TOBN(0xb07dfc6f, 0xc74045ab), TOBN(0x2c6360bf, 0x7800caed), + TOBN(0x24295bb5, 0xb9c972a3), TOBN(0xc9e6f88e, 0x7c9a6dba), + TOBN(0x90ffbf24, 0x92a79aa6), TOBN(0xde29d50a, 0x41c26ac2), + TOBN(0x9f0af483, 0xd309cbe6), TOBN(0x5b020d8a, 0xe0bced4f), + TOBN(0x606e986d, 0xb38023e3), TOBN(0xad8f2c9d, 0x1abc6933), + TOBN(0x19292e1d, 0xe7400e93), TOBN(0xfe3e18a9, 0x52be5e4d), + TOBN(0xe8e9771d, 0x2e0680bf), TOBN(0x8c5bec98, 0xc54db063), + TOBN(0x2af9662a, 0x74a55d1f), TOBN(0xe3fbf28f, 0x046f66d8), + TOBN(0xa3a72ab4, 0xd4dc4794), TOBN(0x09779f45, 0x5c7c2dd8), + TOBN(0xd893bdaf, 0xc3d19d8d), TOBN(0xd5a75094, 0x57d6a6df), + TOBN(0x8cf8fef9, 0x952e6255), TOBN(0x3da67cfb, 0xda9a8aff), + TOBN(0x4c23f62a, 0x2c160dcd), TOBN(0x34e6c5e3, 0x8f90eaef), + TOBN(0x35865519, 0xa9a65d5a), TOBN(0x07c48aae, 0x8fd38a3d), + TOBN(0xb7e7aeda, 0x50068527), TOBN(0x2c09ef23, 0x1c90936a), + TOBN(0x31ecfeb6, 0xe879324c), TOBN(0xa0871f6b, 0xfb0ec938), + TOBN(0xb1f0fb68, 0xd84d835d), TOBN(0xc90caf39, 0x861dc1e6), + TOBN(0x12e5b046, 0x7594f8d7), TOBN(0x26897ae2, 0x65012b92), + TOBN(0xbcf68a08, 0xa4d6755d), TOBN(0x403ee41c, 0x0991fbda), + TOBN(0x733e343e, 0x3bbf17e8), TOBN(0xd2c7980d, 0x679b3d65), + TOBN(0x33056232, 0xd2e11305), TOBN(0x966be492, 0xf3c07a6f), + TOBN(0x6a8878ff, 0xbb15509d), TOBN(0xff221101, 0x0a9b59a4), + TOBN(0x6c9f564a, 0xabe30129), TOBN(0xc6f2c940, 0x336e64cf), + TOBN(0x0fe75262, 0x8b0c8022), TOBN(0xbe0267e9, 0x6ae8db87), + TOBN(0x22e192f1, 0x93bc042b), TOBN(0xf085b534, 0xb237c458), + TOBN(0xa0d192bd, 0x832c4168), TOBN(0x7a76e9e3, 0xbdf6271d), + TOBN(0x52a882fa, 0xb88911b5), TOBN(0xc85345e4, 0xb4db0eb5), + TOBN(0xa3be02a6, 0x81a7c3ff), TOBN(0x51889c8c, 0xf0ec0469), + TOBN(0x9d031369, 0xa5e829e5), TOBN(0xcbb4c6fc, 0x1607aa41), + TOBN(0x75ac59a6, 0x241d84c1), TOBN(0xc043f2bf, 0x8829e0ee), + TOBN(0x82a38f75, 0x8ea5e185), TOBN(0x8bda40b9, 0xd87cbd9f), + TOBN(0x9e65e75e, 0x2d8fc601), TOBN(0x3d515f74, 0xa35690b3), + TOBN(0x534acf4f, 0xda79e5ac), TOBN(0x68b83b3a, 0x8630215f), + TOBN(0x5c748b2e, 0xd085756e), TOBN(0xb0317258, 0xe5d37cb2), + TOBN(0x6735841a, 0xc5ccc2c4), TOBN(0x7d7dc96b, 0x3d9d5069), + TOBN(0xa147e410, 0xfd1754bd), TOBN(0x65296e94, 0xd399ddd5), + TOBN(0xf6b5b2d0, 0xbc8fa5bc), TOBN(0x8a5ead67, 0x500c277b), + TOBN(0x214625e6, 0xdfa08a5d), TOBN(0x51fdfedc, 0x959cf047), + TOBN(0x6bc9430b, 0x289fca32), TOBN(0xe36ff0cf, 0x9d9bdc3f), + TOBN(0x2fe187cb, 0x58ea0ede), TOBN(0xed66af20, 0x5a900b3f), + TOBN(0x00e0968b, 0x5fa9f4d6), TOBN(0x2d4066ce, 0x37a362e7), + TOBN(0xa99a9748, 0xbd07e772), TOBN(0x710989c0, 0x06a4f1d0), + TOBN(0xd5dedf35, 0xce40cbd8), TOBN(0xab55c5f0, 0x1743293d), + TOBN(0x766f1144, 0x8aa24e2c), TOBN(0x94d874f8, 0x605fbcb4), + TOBN(0xa365f0e8, 0xa518001b), TOBN(0xee605eb6, 0x9d04ef0f), + TOBN(0x5a3915cd, 0xba8d4d25), TOBN(0x44c0e1b8, 0xb5113472), + TOBN(0xcbb024e8, 0x8b6740dc), TOBN(0x89087a53, 0xee1d4f0c), + TOBN(0xa88fa05c, 0x1fc4e372), TOBN(0x8bf395cb, 0xaf8b3af2), + TOBN(0x1e71c9a1, 0xdeb8568b), TOBN(0xa35daea0, 0x80fb3d32), + TOBN(0xe8b6f266, 0x2cf8fb81), TOBN(0x6d51afe8, 0x9490696a), + TOBN(0x81beac6e, 0x51803a19), TOBN(0xe3d24b7f, 0x86219080), + TOBN(0x727cfd9d, 0xdf6f463c), TOBN(0x8c6865ca, 0x72284ee8), + TOBN(0x32c88b7d, 0xb743f4ef), TOBN(0x3793909b, 0xe7d11dce), + TOBN(0xd398f922, 0x2ff2ebe8), TOBN(0x2c70ca44, 0xe5e49796), + TOBN(0xdf4d9929, 0xcb1131b1), TOBN(0x7826f298, 0x25888e79), + TOBN(0x4d3a112c, 0xf1d8740a), TOBN(0x00384cb6, 0x270afa8b), + TOBN(0xcb64125b, 0x3ab48095), TOBN(0x3451c256, 0x62d05106), + TOBN(0xd73d577d, 0xa4955845), TOBN(0x39570c16, 0xbf9f4433), + TOBN(0xd7dfaad3, 0xadecf263), TOBN(0xf1c3d8d1, 0xdc76e102), + TOBN(0x5e774a58, 0x54c6a836), TOBN(0xdad4b672, 0x3e92d47b), + TOBN(0xbe7e990f, 0xf0d796a0), TOBN(0x5fc62478, 0xdf0e8b02), + TOBN(0x8aae8bf4, 0x030c00ad), TOBN(0x3d2db93b, 0x9004ba0f), + TOBN(0xe48c8a79, 0xd85d5ddc), TOBN(0xe907caa7, 0x6bb07f34), + TOBN(0x58db343a, 0xa39eaed5), TOBN(0x0ea6e007, 0xadaf5724), + TOBN(0xe00df169, 0xd23233f3), TOBN(0x3e322796, 0x77cb637f), + TOBN(0x1f897c0e, 0x1da0cf6c), TOBN(0xa651f5d8, 0x31d6bbdd), + TOBN(0xdd61af19, 0x1a230c76), TOBN(0xbd527272, 0xcdaa5e4a), + TOBN(0xca753636, 0xd0abcd7e), TOBN(0x78bdd37c, 0x370bd8dc), + TOBN(0xc23916c2, 0x17cd93fe), TOBN(0x65b97a4d, 0xdadce6e2), + TOBN(0xe04ed4eb, 0x174e42f8), TOBN(0x1491ccaa, 0xbb21480a), + TOBN(0x145a8280, 0x23196332), TOBN(0x3c3862d7, 0x587b479a), + TOBN(0x9f4a88a3, 0x01dcd0ed), TOBN(0x4da2b7ef, 0x3ea12f1f), + TOBN(0xf8e7ae33, 0xb126e48e), TOBN(0x404a0b32, 0xf494e237), + TOBN(0x9beac474, 0xc55acadb), TOBN(0x4ee5cf3b, 0xcbec9fd9), + TOBN(0x336b33b9, 0x7df3c8c3), TOBN(0xbd905fe3, 0xb76808fd), + TOBN(0x8f436981, 0xaa45c16a), TOBN(0x255c5bfa, 0x3dd27b62), + TOBN(0x71965cbf, 0xc3dd9b4d), TOBN(0xce23edbf, 0xfc068a87), + TOBN(0xb78d4725, 0x745b029b), TOBN(0x74610713, 0xcefdd9bd), + TOBN(0x7116f75f, 0x1266bf52), TOBN(0x02046722, 0x18e49bb6), + TOBN(0xdf43df9f, 0x3d6f19e3), TOBN(0xef1bc7d0, 0xe685cb2f), + TOBN(0xcddb27c1, 0x7078c432), TOBN(0xe1961b9c, 0xb77fedb7), + TOBN(0x1edc2f5c, 0xc2290570), TOBN(0x2c3fefca, 0x19cbd886), + TOBN(0xcf880a36, 0xc2af389a), TOBN(0x96c610fd, 0xbda71cea), + TOBN(0xf03977a9, 0x32aa8463), TOBN(0x8eb7763f, 0x8586d90a), + TOBN(0x3f342454, 0x2a296e77), TOBN(0xc8718683, 0x42837a35), + TOBN(0x7dc71090, 0x6a09c731), TOBN(0x54778ffb, 0x51b816db), + TOBN(0x6b33bfec, 0xaf06defd), TOBN(0xfe3c105f, 0x8592b70b), + TOBN(0xf937fda4, 0x61da6114), TOBN(0x3c13e651, 0x4c266ad7), + TOBN(0xe363a829, 0x855938e8), TOBN(0x2eeb5d9e, 0x9de54b72), + TOBN(0xbeb93b0e, 0x20ccfab9), TOBN(0x3dffbb5f, 0x25e61a25), + TOBN(0x7f655e43, 0x1acc093d), TOBN(0x0cb6cc3d, 0x3964ce61), + TOBN(0x6ab283a1, 0xe5e9b460), TOBN(0x55d787c5, 0xa1c7e72d), + TOBN(0x4d2efd47, 0xdeadbf02), TOBN(0x11e80219, 0xac459068), + TOBN(0x810c7626, 0x71f311f0), TOBN(0xfa17ef8d, 0x4ab6ef53), + TOBN(0xaf47fd25, 0x93e43bff), TOBN(0x5cb5ff3f, 0x0be40632), + TOBN(0x54687106, 0x8ee61da3), TOBN(0x7764196e, 0xb08afd0f), + TOBN(0x831ab3ed, 0xf0290a8f), TOBN(0xcae81966, 0xcb47c387), + TOBN(0xaad7dece, 0x184efb4f), TOBN(0xdcfc53b3, 0x4749110e), + TOBN(0x6698f23c, 0x4cb632f9), TOBN(0xc42a1ad6, 0xb91f8067), + TOBN(0xb116a81d, 0x6284180a), TOBN(0xebedf5f8, 0xe901326f), + TOBN(0xf2274c9f, 0x97e3e044), TOBN(0x42018520, 0x11d09fc9), + TOBN(0x56a65f17, 0xd18e6e23), TOBN(0x2ea61e2a, 0x352b683c), + TOBN(0x27d291bc, 0x575eaa94), TOBN(0x9e7bc721, 0xb8ff522d), + TOBN(0x5f7268bf, 0xa7f04d6f), TOBN(0x5868c73f, 0xaba41748), + TOBN(0x9f85c2db, 0x7be0eead), TOBN(0x511e7842, 0xff719135), + TOBN(0x5a06b1e9, 0xc5ea90d7), TOBN(0x0c19e283, 0x26fab631), + TOBN(0x8af8f0cf, 0xe9206c55), TOBN(0x89389cb4, 0x3553c06a), + TOBN(0x39dbed97, 0xf65f8004), TOBN(0x0621b037, 0xc508991d), + TOBN(0x1c52e635, 0x96e78cc4), TOBN(0x5385c8b2, 0x0c06b4a8), + TOBN(0xd84ddfdb, 0xb0e87d03), TOBN(0xc49dfb66, 0x934bafad), + TOBN(0x7071e170, 0x59f70772), TOBN(0x3a073a84, 0x3a1db56b), + TOBN(0x03494903, 0x3b8af190), TOBN(0x7d882de3, 0xd32920f0), + TOBN(0x91633f0a, 0xb2cf8940), TOBN(0x72b0b178, 0x6f948f51), + TOBN(0x2d28dc30, 0x782653c8), TOBN(0x88829849, 0xdb903a05), + TOBN(0xb8095d0c, 0x6a19d2bb), TOBN(0x4b9e7f0c, 0x86f782cb), + TOBN(0x7af73988, 0x2d907064), TOBN(0xd12be0fe, 0x8b32643c), + TOBN(0x358ed23d, 0x0e165dc3), TOBN(0x3d47ce62, 0x4e2378ce), + TOBN(0x7e2bb0b9, 0xfeb8a087), TOBN(0x3246e8ae, 0xe29e10b9), + TOBN(0x459f4ec7, 0x03ce2b4d), TOBN(0xe9b4ca1b, 0xbbc077cf), + TOBN(0x2613b4f2, 0x0e9940c1), TOBN(0xfc598bb9, 0x047d1eb1), + TOBN(0x9744c62b, 0x45036099), TOBN(0xa9dee742, 0x167c65d8), + TOBN(0x0c511525, 0xdabe1943), TOBN(0xda110554, 0x93c6c624), + TOBN(0xae00a52c, 0x651a3be2), TOBN(0xcda5111d, 0x884449a6), + TOBN(0x063c06f4, 0xff33bed1), TOBN(0x73baaf9a, 0x0d3d76b4), + TOBN(0x52fb0c9d, 0x7fc63668), TOBN(0x6886c9dd, 0x0c039cde), + TOBN(0x602bd599, 0x55b22351), TOBN(0xb00cab02, 0x360c7c13), + TOBN(0x8cb616bc, 0x81b69442), TOBN(0x41486700, 0xb55c3cee), + TOBN(0x71093281, 0xf49ba278), TOBN(0xad956d9c, 0x64a50710), + TOBN(0x9561f28b, 0x638a7e81), TOBN(0x54155cdf, 0x5980ddc3), + TOBN(0xb2db4a96, 0xd26f247a), TOBN(0x9d774e4e, 0x4787d100), + TOBN(0x1a9e6e2e, 0x078637d2), TOBN(0x1c363e2d, 0x5e0ae06a), + TOBN(0x7493483e, 0xe9cfa354), TOBN(0x76843cb3, 0x7f74b98d), + TOBN(0xbaca6591, 0xd4b66947), TOBN(0xb452ce98, 0x04460a8c), + TOBN(0x6830d246, 0x43768f55), TOBN(0xf4197ed8, 0x7dff12df), + TOBN(0x6521b472, 0x400dd0f7), TOBN(0x59f5ca8f, 0x4b1e7093), + TOBN(0x6feff11b, 0x080338ae), TOBN(0x0ada31f6, 0xa29ca3c6), + TOBN(0x24794eb6, 0x94a2c215), TOBN(0xd83a43ab, 0x05a57ab4), + TOBN(0x264a543a, 0x2a6f89fe), TOBN(0x2c2a3868, 0xdd5ec7c2), + TOBN(0xd3373940, 0x8439d9b2), TOBN(0x715ea672, 0x0acd1f11), + TOBN(0x42c1d235, 0xe7e6cc19), TOBN(0x81ce6e96, 0xb990585c), + TOBN(0x04e5dfe0, 0xd809c7bd), TOBN(0xd7b2580c, 0x8f1050ab), + TOBN(0x6d91ad78, 0xd8a4176f), TOBN(0x0af556ee, 0x4e2e897c), + TOBN(0x162a8b73, 0x921de0ac), TOBN(0x52ac9c22, 0x7ea78400), + TOBN(0xee2a4eea, 0xefce2174), TOBN(0xbe61844e, 0x6d637f79), + TOBN(0x0491f1bc, 0x789a283b), TOBN(0x72d3ac3d, 0x880836f4), + TOBN(0xaa1c5ea3, 0x88e5402d), TOBN(0x1b192421, 0xd5cc473d), + TOBN(0x5c0b9998, 0x9dc84cac), TOBN(0xb0a8482d, 0x9c6e75b8), + TOBN(0x639961d0, 0x3a191ce2), TOBN(0xda3bc865, 0x6d837930), + TOBN(0xca990653, 0x056e6f8f), TOBN(0x84861c41, 0x64d133a7), + TOBN(0x8b403276, 0x746abe40), TOBN(0xb7b4d51a, 0xebf8e303), + TOBN(0x05b43211, 0x220a255d), TOBN(0xc997152c, 0x02419e6e), + TOBN(0x76ff47b6, 0x630c2fea), TOBN(0x50518677, 0x281fdade), + TOBN(0x3283b8ba, 0xcf902b0b), TOBN(0x8d4b4eb5, 0x37db303b), + TOBN(0xcc89f42d, 0x755011bc), TOBN(0xb43d74bb, 0xdd09d19b), + TOBN(0x65746bc9, 0x8adba350), TOBN(0x364eaf8c, 0xb51c1927), + TOBN(0x13c76596, 0x10ad72ec), TOBN(0x30045121, 0xf8d40c20), + TOBN(0x6d2d99b7, 0xea7b979b), TOBN(0xcd78cd74, 0xe6fb3bcd), + TOBN(0x11e45a9e, 0x86cffbfe), TOBN(0x78a61cf4, 0x637024f6), + TOBN(0xd06bc872, 0x3d502295), TOBN(0xf1376854, 0x458cb288), + TOBN(0xb9db26a1, 0x342f8586), TOBN(0xf33effcf, 0x4beee09e), + TOBN(0xd7e0c4cd, 0xb30cfb3a), TOBN(0x6d09b8c1, 0x6c9db4c8), + TOBN(0x40ba1a42, 0x07c8d9df), TOBN(0x6fd495f7, 0x1c52c66d), + TOBN(0xfb0e169f, 0x275264da), TOBN(0x80c2b746, 0xe57d8362), + TOBN(0xedd987f7, 0x49ad7222), TOBN(0xfdc229af, 0x4398ec7b),} + , + {TOBN(0xb0d1ed84, 0x52666a58), TOBN(0x4bcb6e00, 0xe6a9c3c2), + TOBN(0x3c57411c, 0x26906408), TOBN(0xcfc20755, 0x13556400), + TOBN(0xa08b1c50, 0x5294dba3), TOBN(0xa30ba286, 0x8b7dd31e), + TOBN(0xd70ba90e, 0x991eca74), TOBN(0x094e142c, 0xe762c2b9), + TOBN(0xb81d783e, 0x979f3925), TOBN(0x1efd130a, 0xaf4c89a7), + TOBN(0x525c2144, 0xfd1bf7fa), TOBN(0x4b296904, 0x1b265a9e), + TOBN(0xed8e9634, 0xb9db65b6), TOBN(0x35c82e32, 0x03599d8a), + TOBN(0xdaa7a54f, 0x403563f3), TOBN(0x9df088ad, 0x022c38ab), + TOBN(0xe5cfb066, 0xbb3fd30a), TOBN(0x429169da, 0xeff0354e), + TOBN(0x809cf852, 0x3524e36c), TOBN(0x136f4fb3, 0x0155be1d), + TOBN(0x4826af01, 0x1fbba712), TOBN(0x6ef0f0b4, 0x506ba1a1), + TOBN(0xd9928b31, 0x77aea73e), TOBN(0xe2bf6af2, 0x5eaa244e), + TOBN(0x8d084f12, 0x4237b64b), TOBN(0x688ebe99, 0xe3ecfd07), + TOBN(0x57b8a70c, 0xf6845dd8), TOBN(0x808fc59c, 0x5da4a325), + TOBN(0xa9032b2b, 0xa3585862), TOBN(0xb66825d5, 0xedf29386), + TOBN(0xb5a5a8db, 0x431ec29b), TOBN(0xbb143a98, 0x3a1e8dc8), + TOBN(0x35ee94ce, 0x12ae381b), TOBN(0x3a7f176c, 0x86ccda90), + TOBN(0xc63a657e, 0x4606eaca), TOBN(0x9ae5a380, 0x43cd04df), + TOBN(0x9bec8d15, 0xed251b46), TOBN(0x1f5d6d30, 0xcaca5e64), + TOBN(0x347b3b35, 0x9ff20f07), TOBN(0x4d65f034, 0xf7e4b286), + TOBN(0x9e93ba24, 0xf111661e), TOBN(0xedced484, 0xb105eb04), + TOBN(0x96dc9ba1, 0xf424b578), TOBN(0xbf8f66b7, 0xe83e9069), + TOBN(0x872d4df4, 0xd7ed8216), TOBN(0xbf07f377, 0x8e2cbecf), + TOBN(0x4281d899, 0x98e73754), TOBN(0xfec85fbb, 0x8aab8708), + TOBN(0x9a3c0dee, 0xa5ba5b0b), TOBN(0xe6a116ce, 0x42d05299), + TOBN(0xae9775fe, 0xe9b02d42), TOBN(0x72b05200, 0xa1545cb6), + TOBN(0xbc506f7d, 0x31a3b4ea), TOBN(0xe5893078, 0x8bbd9b32), + TOBN(0xc8bc5f37, 0xe4b12a97), TOBN(0x6b000c06, 0x4a73b671), + TOBN(0x13b5bf22, 0x765fa7d0), TOBN(0x59805bf0, 0x1d6a5370), + TOBN(0x67a5e29d, 0x4280db98), TOBN(0x4f53916f, 0x776b1ce3), + TOBN(0x714ff61f, 0x33ddf626), TOBN(0x4206238e, 0xa085d103), + TOBN(0x1c50d4b7, 0xe5809ee3), TOBN(0x999f450d, 0x85f8eb1d), + TOBN(0x658a6051, 0xe4c79e9b), TOBN(0x1394cb73, 0xc66a9fea), + TOBN(0x27f31ed5, 0xc6be7b23), TOBN(0xf4c88f36, 0x5aa6f8fe), + TOBN(0x0fb0721f, 0x4aaa499e), TOBN(0x68b3a7d5, 0xe3fb2a6b), + TOBN(0xa788097d, 0x3a92851d), TOBN(0x060e7f8a, 0xe96f4913), + TOBN(0x82eebe73, 0x1a3a93bc), TOBN(0x42bbf465, 0xa21adc1a), + TOBN(0xc10b6fa4, 0xef030efd), TOBN(0x247aa4c7, 0x87b097bb), + TOBN(0x8b8dc632, 0xf60c77da), TOBN(0x6ffbc26a, 0xc223523e), + TOBN(0xa4f6ff11, 0x344579cf), TOBN(0x5825653c, 0x980250f6), + TOBN(0xb2dd097e, 0xbc1aa2b9), TOBN(0x07889393, 0x37a0333a), + TOBN(0x1cf55e71, 0x37a0db38), TOBN(0x2648487f, 0x792c1613), + TOBN(0xdad01336, 0x3fcef261), TOBN(0x6239c81d, 0x0eabf129), + TOBN(0x8ee761de, 0x9d276be2), TOBN(0x406a7a34, 0x1eda6ad3), + TOBN(0x4bf367ba, 0x4a493b31), TOBN(0x54f20a52, 0x9bf7f026), + TOBN(0xb696e062, 0x9795914b), TOBN(0xcddab96d, 0x8bf236ac), + TOBN(0x4ff2c70a, 0xed25ea13), TOBN(0xfa1d09eb, 0x81cbbbe7), + TOBN(0x88fc8c87, 0x468544c5), TOBN(0x847a670d, 0x696b3317), + TOBN(0xf133421e, 0x64bcb626), TOBN(0xaea638c8, 0x26dee0b5), + TOBN(0xd6e7680b, 0xb310346c), TOBN(0xe06f4097, 0xd5d4ced3), + TOBN(0x09961452, 0x7512a30b), TOBN(0xf3d867fd, 0xe589a59a), + TOBN(0x2e73254f, 0x52d0c180), TOBN(0x9063d8a3, 0x333c74ac), + TOBN(0xeda6c595, 0xd314e7bc), TOBN(0x2ee7464b, 0x467899ed), + TOBN(0x1cef423c, 0x0a1ed5d3), TOBN(0x217e76ea, 0x69cc7613), + TOBN(0x27ccce1f, 0xe7cda917), TOBN(0x12d8016b, 0x8a893f16), + TOBN(0xbcd6de84, 0x9fc74f6b), TOBN(0xfa5817e2, 0xf3144e61), + TOBN(0x1f354164, 0x0821ee4c), TOBN(0x1583eab4, 0x0bc61992), + TOBN(0x7490caf6, 0x1d72879f), TOBN(0x998ad9f3, 0xf76ae7b2), + TOBN(0x1e181950, 0xa41157f7), TOBN(0xa9d7e1e6, 0xe8da3a7e), + TOBN(0x963784eb, 0x8426b95f), TOBN(0x0ee4ed6e, 0x542e2a10), + TOBN(0xb79d4cc5, 0xac751e7b), TOBN(0x93f96472, 0xfd4211bd), + TOBN(0x8c72d3d2, 0xc8de4fc6), TOBN(0x7b69cbf5, 0xdf44f064), + TOBN(0x3da90ca2, 0xf4bf94e1), TOBN(0x1a5325f8, 0xf12894e2), + TOBN(0x0a437f6c, 0x7917d60b), TOBN(0x9be70486, 0x96c9cb5d), + TOBN(0xb4d880bf, 0xe1dc5c05), TOBN(0xd738adda, 0xeebeeb57), + TOBN(0x6f0119d3, 0xdf0fe6a3), TOBN(0x5c686e55, 0x66eaaf5a), + TOBN(0x9cb10b50, 0xdfd0b7ec), TOBN(0xbdd0264b, 0x6a497c21), + TOBN(0xfc093514, 0x8c546c96), TOBN(0x58a947fa, 0x79dbf42a), + TOBN(0xc0b48d4e, 0x49ccd6d7), TOBN(0xff8fb02c, 0x88bd5580), + TOBN(0xc75235e9, 0x07d473b2), TOBN(0x4fab1ac5, 0xa2188af3), + TOBN(0x030fa3bc, 0x97576ec0), TOBN(0xe8c946e8, 0x0b7e7d2f), + TOBN(0x40a5c9cc, 0x70305600), TOBN(0x6d8260a9, 0xc8b013b4), + TOBN(0x0368304f, 0x70bba85c), TOBN(0xad090da1, 0xa4a0d311), + TOBN(0x7170e870, 0x2415eec1), TOBN(0xbfba35fe, 0x8461ea47), + TOBN(0x6279019a, 0xc1e91938), TOBN(0xa47638f3, 0x1afc415f), + TOBN(0x36c65cbb, 0xbcba0e0f), TOBN(0x02160efb, 0x034e2c48), + TOBN(0xe6c51073, 0x615cd9e4), TOBN(0x498ec047, 0xf1243c06), + TOBN(0x3e5a8809, 0xb17b3d8c), TOBN(0x5cd99e61, 0x0cc565f1), + TOBN(0x81e312df, 0x7851dafe), TOBN(0xf156f5ba, 0xa79061e2), + TOBN(0x80d62b71, 0x880c590e), TOBN(0xbec9746f, 0x0a39faa1), + TOBN(0x1d98a9c1, 0xc8ed1f7a), TOBN(0x09e43bb5, 0xa81d5ff2), + TOBN(0xd5f00f68, 0x0da0794a), TOBN(0x412050d9, 0x661aa836), + TOBN(0xa89f7c4e, 0x90747e40), TOBN(0x6dc05ebb, 0xb62a3686), + TOBN(0xdf4de847, 0x308e3353), TOBN(0x53868fbb, 0x9fb53bb9), + TOBN(0x2b09d2c3, 0xcfdcf7dd), TOBN(0x41a9fce3, 0x723fcab4), + TOBN(0x73d905f7, 0x07f57ca3), TOBN(0x080f9fb1, 0xac8e1555), + TOBN(0x7c088e84, 0x9ba7a531), TOBN(0x07d35586, 0xed9a147f), + TOBN(0x602846ab, 0xaf48c336), TOBN(0x7320fd32, 0x0ccf0e79), + TOBN(0xaa780798, 0xb18bd1ff), TOBN(0x52c2e300, 0xafdd2905), + TOBN(0xf27ea3d6, 0x434267cd), TOBN(0x8b96d16d, 0x15605b5f), + TOBN(0x7bb31049, 0x4b45706b), TOBN(0xe7f58b8e, 0x743d25f8), + TOBN(0xe9b5e45b, 0x87f30076), TOBN(0xd19448d6, 0x5d053d5a), + TOBN(0x1ecc8cb9, 0xd3210a04), TOBN(0x6bc7d463, 0xdafb5269), + TOBN(0x3e59b10a, 0x67c3489f), TOBN(0x1769788c, 0x65641e1b), + TOBN(0x8a53b82d, 0xbd6cb838), TOBN(0x7066d6e6, 0x236d5f22), + TOBN(0x03aa1c61, 0x6908536e), TOBN(0xc971da0d, 0x66ae9809), + TOBN(0x01b3a86b, 0xc49a2fac), TOBN(0x3b8420c0, 0x3092e77a), + TOBN(0x02057300, 0x7d6fb556), TOBN(0x6941b2a1, 0xbff40a87), + TOBN(0x140b6308, 0x0658ff2a), TOBN(0x87804363, 0x3424ab36), + TOBN(0x0253bd51, 0x5751e299), TOBN(0xc75bcd76, 0x449c3e3a), + TOBN(0x92eb4090, 0x7f8f875d), TOBN(0x9c9d754e, 0x56c26bbf), + TOBN(0x158cea61, 0x8110bbe7), TOBN(0x62a6b802, 0x745f91ea), + TOBN(0xa79c41aa, 0xc6e7394b), TOBN(0x445b6a83, 0xad57ef10), + TOBN(0x0c5277eb, 0x6ea6f40c), TOBN(0x319fe96b, 0x88633365), + TOBN(0x0b0fc61f, 0x385f63cb), TOBN(0x41250c84, 0x22bdd127), + TOBN(0x67d153f1, 0x09e942c2), TOBN(0x60920d08, 0xc021ad5d), + TOBN(0x229f5746, 0x724d81a5), TOBN(0xb7ffb892, 0x5bba3299), + TOBN(0x518c51a1, 0xde413032), TOBN(0x2a9bfe77, 0x3c2fd94c), + TOBN(0xcbcde239, 0x3191f4fd), TOBN(0x43093e16, 0xd3d6ada1), + TOBN(0x184579f3, 0x58769606), TOBN(0x2c94a8b3, 0xd236625c), + TOBN(0x6922b9c0, 0x5c437d8e), TOBN(0x3d4ae423, 0xd8d9f3c8), + TOBN(0xf72c31c1, 0x2e7090a2), TOBN(0x4ac3f5f3, 0xd76a55bd), + TOBN(0x342508fc, 0x6b6af991), TOBN(0x0d527100, 0x1b5cebbd), + TOBN(0xb84740d0, 0xdd440dd7), TOBN(0x748ef841, 0x780162fd), + TOBN(0xa8dbfe0e, 0xdfc6fafb), TOBN(0xeadfdf05, 0xf7300f27), + TOBN(0x7d06555f, 0xfeba4ec9), TOBN(0x12c56f83, 0x9e25fa97), + TOBN(0x77f84203, 0xd39b8c34), TOBN(0xed8b1be6, 0x3125eddb), + TOBN(0x5bbf2441, 0xf6e39dc5), TOBN(0xb00f6ee6, 0x6a5d678a), + TOBN(0xba456ecf, 0x57d0ea99), TOBN(0xdcae0f58, 0x17e06c43), + TOBN(0x01643de4, 0x0f5b4baa), TOBN(0x2c324341, 0xd161b9be), + TOBN(0x80177f55, 0xe126d468), TOBN(0xed325f1f, 0x76748e09), + TOBN(0x6116004a, 0xcfa9bdc2), TOBN(0x2d8607e6, 0x3a9fb468), + TOBN(0x0e573e27, 0x6009d660), TOBN(0x3a525d2e, 0x8d10c5a1), + TOBN(0xd26cb45c, 0x3b9009a0), TOBN(0xb6b0cdc0, 0xde9d7448), + TOBN(0x949c9976, 0xe1337c26), TOBN(0x6faadebd, 0xd73d68e5), + TOBN(0x9e158614, 0xf1b768d9), TOBN(0x22dfa557, 0x9cc4f069), + TOBN(0xccd6da17, 0xbe93c6d6), TOBN(0x24866c61, 0xa504f5b9), + TOBN(0x2121353c, 0x8d694da1), TOBN(0x1c6ca580, 0x0140b8c6), + TOBN(0xc245ad8c, 0xe964021e), TOBN(0xb83bffba, 0x032b82b3), + TOBN(0xfaa220c6, 0x47ef9898), TOBN(0x7e8d3ac6, 0x982c948a), + TOBN(0x1faa2091, 0xbc2d124a), TOBN(0xbd54c3dd, 0x05b15ff4), + TOBN(0x386bf3ab, 0xc87c6fb7), TOBN(0xfb2b0563, 0xfdeb6f66), + TOBN(0x4e77c557, 0x5b45afb4), TOBN(0xe9ded649, 0xefb8912d), + TOBN(0x7ec9bbf5, 0x42f6e557), TOBN(0x2570dfff, 0x62671f00), + TOBN(0x2b3bfb78, 0x88e084bd), TOBN(0xa024b238, 0xf37fe5b4), + TOBN(0x44e7dc04, 0x95649aee), TOBN(0x498ca255, 0x5e7ec1d8), + TOBN(0x3bc766ea, 0xaaa07e86), TOBN(0x0db6facb, 0xf3608586), + TOBN(0xbadd2549, 0xbdc259c8), TOBN(0x95af3c6e, 0x041c649f), + TOBN(0xb36a928c, 0x02e30afb), TOBN(0x9b5356ad, 0x008a88b8), + TOBN(0x4b67a5f1, 0xcf1d9e9d), TOBN(0xc6542e47, 0xa5d8d8ce), + TOBN(0x73061fe8, 0x7adfb6cc), TOBN(0xcc826fd3, 0x98678141), + TOBN(0x00e758b1, 0x3c80515a), TOBN(0x6afe3247, 0x41485083), + TOBN(0x0fcb08b9, 0xb6ae8a75), TOBN(0xb8cf388d, 0x4acf51e1), + TOBN(0x344a5560, 0x6961b9d6), TOBN(0x1a6778b8, 0x6a97fd0c), + TOBN(0xd840fdc1, 0xecc4c7e3), TOBN(0xde9fe47d, 0x16db68cc), + TOBN(0xe95f89de, 0xa3e216aa), TOBN(0x84f1a6a4, 0x9594a8be), + TOBN(0x7ddc7d72, 0x5a7b162b), TOBN(0xc5cfda19, 0xadc817a3), + TOBN(0x80a5d350, 0x78b58d46), TOBN(0x93365b13, 0x82978f19), + TOBN(0x2e44d225, 0x26a1fc90), TOBN(0x0d6d10d2, 0x4d70705d), + TOBN(0xd94b6b10, 0xd70c45f4), TOBN(0x0f201022, 0xb216c079), + TOBN(0xcec966c5, 0x658fde41), TOBN(0xa8d2bc7d, 0x7e27601d), + TOBN(0xbfcce3e1, 0xff230be7), TOBN(0x3394ff6b, 0x0033ffb5), + TOBN(0xd890c509, 0x8132c9af), TOBN(0xaac4b0eb, 0x361e7868), + TOBN(0x5194ded3, 0xe82d15aa), TOBN(0x4550bd2e, 0x23ae6b7d), + TOBN(0x3fda318e, 0xea5399d4), TOBN(0xd989bffa, 0x91638b80), + TOBN(0x5ea124d0, 0xa14aa12d), TOBN(0x1fb1b899, 0x3667b944), + TOBN(0x95ec7969, 0x44c44d6a), TOBN(0x91df144a, 0x57e86137), + TOBN(0x915fd620, 0x73adac44), TOBN(0x8f01732d, 0x59a83801), + TOBN(0xec579d25, 0x3aa0a633), TOBN(0x06de5e7c, 0xc9d6d59c), + TOBN(0xc132f958, 0xb1ef8010), TOBN(0x29476f96, 0xe65c1a02), + TOBN(0x336a77c0, 0xd34c3565), TOBN(0xef1105b2, 0x1b9f1e9e), + TOBN(0x63e6d08b, 0xf9e08002), TOBN(0x9aff2f21, 0xc613809e), + TOBN(0xb5754f85, 0x3a80e75d), TOBN(0xde71853e, 0x6bbda681), + TOBN(0x86f041df, 0x8197fd7a), TOBN(0x8b332e08, 0x127817fa), + TOBN(0x05d99be8, 0xb9c20cda), TOBN(0x89f7aad5, 0xd5cd0c98), + TOBN(0x7ef936fe, 0x5bb94183), TOBN(0x92ca0753, 0xb05cd7f2), + TOBN(0x9d65db11, 0x74a1e035), TOBN(0x02628cc8, 0x13eaea92), + TOBN(0xf2d9e242, 0x49e4fbf2), TOBN(0x94fdfd9b, 0xe384f8b7), + TOBN(0x65f56054, 0x63428c6b), TOBN(0x2f7205b2, 0x90b409a5), + TOBN(0xf778bb78, 0xff45ae11), TOBN(0xa13045be, 0xc5ee53b2), + TOBN(0xe00a14ff, 0x03ef77fe), TOBN(0x689cd59f, 0xffef8bef), + TOBN(0x3578f0ed, 0x1e9ade22), TOBN(0xe99f3ec0, 0x6268b6a8), + TOBN(0xa2057d91, 0xea1b3c3e), TOBN(0x2d1a7053, 0xb8823a4a), + TOBN(0xabbb336a, 0x2cca451e), TOBN(0xcd2466e3, 0x2218bb5d), + TOBN(0x3ac1f42f, 0xc8cb762d), TOBN(0x7e312aae, 0x7690211f), + TOBN(0xebb9bd73, 0x45d07450), TOBN(0x207c4b82, 0x46c2213f), + TOBN(0x99d425c1, 0x375913ec), TOBN(0x94e45e96, 0x67908220), + TOBN(0xc08f3087, 0xcd67dbf6), TOBN(0xa5670fbe, 0xc0887056), + TOBN(0x6717b64a, 0x66f5b8fc), TOBN(0xd5a56aea, 0x786fec28), + TOBN(0xa8c3f55f, 0xc0ff4952), TOBN(0xa77fefae, 0x457ac49b), + TOBN(0x29882d7c, 0x98379d44), TOBN(0xd000bdfb, 0x509edc8a), + TOBN(0xc6f95979, 0xe66fe464), TOBN(0x504a6115, 0xfa61bde0), + TOBN(0x56b3b871, 0xeffea31a), TOBN(0x2d3de26d, 0xf0c21a54), + TOBN(0x21dbff31, 0x834753bf), TOBN(0xe67ecf49, 0x69269d86), + TOBN(0x7a176952, 0x151fe690), TOBN(0x03515804, 0x7f2adb5f), + TOBN(0xee794b15, 0xd1b62a8d), TOBN(0xf004ceec, 0xaae454e6), + TOBN(0x0897ea7c, 0xf0386fac), TOBN(0x3b62ff12, 0xd1fca751), + TOBN(0x154181df, 0x1b7a04ec), TOBN(0x2008e04a, 0xfb5847ec), + TOBN(0xd147148e, 0x41dbd772), TOBN(0x2b419f73, 0x22942654), + TOBN(0x669f30d3, 0xe9c544f7), TOBN(0x52a2c223, 0xc8540149), + TOBN(0x5da9ee14, 0x634dfb02), TOBN(0x5f074ff0, 0xf47869f3), + TOBN(0x74ee878d, 0xa3933acc), TOBN(0xe6510651, 0x4fe35ed1), + TOBN(0xb3eb9482, 0xf1012e7a), TOBN(0x51013cc0, 0xa8a566ae), + TOBN(0xdd5e9243, 0x47c00d3b), TOBN(0x7fde089d, 0x946bb0e5), + TOBN(0x030754fe, 0xc731b4b3), TOBN(0x12a136a4, 0x99fda062), + TOBN(0x7c1064b8, 0x5a1a35bc), TOBN(0xbf1f5763, 0x446c84ef), + TOBN(0xed29a56d, 0xa16d4b34), TOBN(0x7fba9d09, 0xdca21c4f), + TOBN(0x66d7ac00, 0x6d8de486), TOBN(0x60061987, 0x73a2a5e1), + TOBN(0x8b400f86, 0x9da28ff0), TOBN(0x3133f708, 0x43c4599c), + TOBN(0x9911c9b8, 0xee28cb0d), TOBN(0xcd7e2874, 0x8e0af61d), + TOBN(0x5a85f0f2, 0x72ed91fc), TOBN(0x85214f31, 0x9cd4a373), + TOBN(0x881fe5be, 0x1925253c), TOBN(0xd8dc98e0, 0x91e8bc76), + TOBN(0x7120affe, 0x585cc3a2), TOBN(0x724952ed, 0x735bf97a), + TOBN(0x5581e7dc, 0x3eb34581), TOBN(0x5cbff4f2, 0xe52ee57d), + TOBN(0x8d320a0e, 0x87d8cc7b), TOBN(0x9beaa7f3, 0xf1d280d0), + TOBN(0x7a0b9571, 0x9beec704), TOBN(0x9126332e, 0x5b7f0057), + TOBN(0x01fbc1b4, 0x8ed3bd6d), TOBN(0x35bb2c12, 0xd945eb24), + TOBN(0x6404694e, 0x9a8ae255), TOBN(0xb6092eec, 0x8d6abfb3), + TOBN(0x4d76143f, 0xcc058865), TOBN(0x7b0a5af2, 0x6e249922), + TOBN(0x8aef9440, 0x6a50d353), TOBN(0xe11e4bcc, 0x64f0e07a), + TOBN(0x4472993a, 0xa14a90fa), TOBN(0x7706e20c, 0xba0c51d4), + TOBN(0xf403292f, 0x1532672d), TOBN(0x52573bfa, 0x21829382), + TOBN(0x6a7bb6a9, 0x3b5bdb83), TOBN(0x08da65c0, 0xa4a72318), + TOBN(0xc58d22aa, 0x63eb065f), TOBN(0x1717596c, 0x1b15d685), + TOBN(0x112df0d0, 0xb266d88b), TOBN(0xf688ae97, 0x5941945a), + TOBN(0x487386e3, 0x7c292cac), TOBN(0x42f3b50d, 0x57d6985c), + TOBN(0x6da4f998, 0x6a90fc34), TOBN(0xc8f257d3, 0x65ca8a8d), + TOBN(0xc2feabca, 0x6951f762), TOBN(0xe1bc81d0, 0x74c323ac), + TOBN(0x1bc68f67, 0x251a2a12), TOBN(0x10d86587, 0xbe8a70dc), + TOBN(0xd648af7f, 0xf0f84d2e), TOBN(0xf0aa9ebc, 0x6a43ac92), + TOBN(0x69e3be04, 0x27596893), TOBN(0xb6bb02a6, 0x45bf452b), + TOBN(0x0875c11a, 0xf4c698c8), TOBN(0x6652b5c7, 0xbece3794), + TOBN(0x7b3755fd, 0x4f5c0499), TOBN(0x6ea16558, 0xb5532b38), + TOBN(0xd1c69889, 0xa2e96ef7), TOBN(0x9c773c3a, 0x61ed8f48), + TOBN(0x2b653a40, 0x9b323abc), TOBN(0xe26605e1, 0xf0e1d791), + TOBN(0x45d41064, 0x4a87157a), TOBN(0x8f9a78b7, 0xcbbce616), + TOBN(0xcf1e44aa, 0xc407eddd), TOBN(0x81ddd1d8, 0xa35b964f), + TOBN(0x473e339e, 0xfd083999), TOBN(0x6c94bdde, 0x8e796802), + TOBN(0x5a304ada, 0x8545d185), TOBN(0x82ae44ea, 0x738bb8cb), + TOBN(0x628a35e3, 0xdf87e10e), TOBN(0xd3624f3d, 0xa15b9fe3), + TOBN(0xcc44209b, 0x14be4254), TOBN(0x7d0efcbc, 0xbdbc2ea5), + TOBN(0x1f603362, 0x04c37bbe), TOBN(0x21f363f5, 0x56a5852c), + TOBN(0xa1503d1c, 0xa8501550), TOBN(0x2251e0e1, 0xd8ab10bb), + TOBN(0xde129c96, 0x6961c51c), TOBN(0x1f7246a4, 0x81910f68), + TOBN(0x2eb744ee, 0x5f2591f2), TOBN(0x3c47d33f, 0x5e627157), + TOBN(0x4d6d62c9, 0x22f3bd68), TOBN(0x6120a64b, 0xcb8df856), + TOBN(0x3a9ac6c0, 0x7b5d07df), TOBN(0xa92b9558, 0x7ef39783), + TOBN(0xe128a134, 0xab3a9b4f), TOBN(0x41c18807, 0xb1252f05), + TOBN(0xfc7ed089, 0x80ba9b1c), TOBN(0xac8dc6de, 0xc532a9dd), + TOBN(0xbf829cef, 0x55246809), TOBN(0x101b784f, 0x5b4ee80f), + TOBN(0xc09945bb, 0xb6f11603), TOBN(0x57b09dbe, 0x41d2801e), + TOBN(0xfba5202f, 0xa97534a8), TOBN(0x7fd8ae5f, 0xc17b9614), + TOBN(0xa50ba666, 0x78308435), TOBN(0x9572f77c, 0xd3868c4d), + TOBN(0x0cef7bfd, 0x2dd7aab0), TOBN(0xe7958e08, 0x2c7c79ff), + TOBN(0x81262e42, 0x25346689), TOBN(0x716da290, 0xb07c7004), + TOBN(0x35f911ea, 0xb7950ee3), TOBN(0x6fd72969, 0x261d21b5), + TOBN(0x52389803, 0x08b640d3), TOBN(0x5b0026ee, 0x887f12a1), + TOBN(0x20e21660, 0x742e9311), TOBN(0x0ef6d541, 0x5ff77ff7), + TOBN(0x969127f0, 0xf9c41135), TOBN(0xf21d60c9, 0x68a64993), + TOBN(0x656e5d0c, 0xe541875c), TOBN(0xf1e0f84e, 0xa1d3c233), + TOBN(0x9bcca359, 0x06002d60), TOBN(0xbe2da60c, 0x06191552), + TOBN(0x5da8bbae, 0x61181ec3), TOBN(0x9f04b823, 0x65806f19), + TOBN(0xf1604a7d, 0xd4b79bb8), TOBN(0xaee806fb, 0x52c878c8), + TOBN(0x34144f11, 0x8d47b8e8), TOBN(0x72edf52b, 0x949f9054), + TOBN(0xebfca84e, 0x2127015a), TOBN(0x9051d0c0, 0x9cb7cef3), + TOBN(0x86e8fe58, 0x296deec8), TOBN(0x33b28188, 0x41010d74),} + , + {TOBN(0x01079383, 0x171b445f), TOBN(0x9bcf21e3, 0x8131ad4c), + TOBN(0x8cdfe205, 0xc93987e8), TOBN(0xe63f4152, 0xc92e8c8f), + TOBN(0x729462a9, 0x30add43d), TOBN(0x62ebb143, 0xc980f05a), + TOBN(0x4f3954e5, 0x3b06e968), TOBN(0xfe1d75ad, 0x242cf6b1), + TOBN(0x5f95c6c7, 0xaf8685c8), TOBN(0xd4c1c8ce, 0x2f8f01aa), + TOBN(0xc44bbe32, 0x2574692a), TOBN(0xb8003478, 0xd4a4a068), + TOBN(0x7c8fc6e5, 0x2eca3cdb), TOBN(0xea1db16b, 0xec04d399), + TOBN(0xb05bc82e, 0x8f2bc5cf), TOBN(0x763d517f, 0xf44793d2), + TOBN(0x4451c1b8, 0x08bd98d0), TOBN(0x644b1cd4, 0x6575f240), + TOBN(0x6907eb33, 0x7375d270), TOBN(0x56c8bebd, 0xfa2286bd), + TOBN(0xc713d2ac, 0xc4632b46), TOBN(0x17da427a, 0xafd60242), + TOBN(0x313065b7, 0xc95c7546), TOBN(0xf8239898, 0xbf17a3de), + TOBN(0xf3b7963f, 0x4c830320), TOBN(0x842c7aa0, 0x903203e3), + TOBN(0xaf22ca0a, 0xe7327afb), TOBN(0x38e13092, 0x967609b6), + TOBN(0x73b8fb62, 0x757558f1), TOBN(0x3cc3e831, 0xf7eca8c1), + TOBN(0xe4174474, 0xf6331627), TOBN(0xa77989ca, 0xc3c40234), + TOBN(0xe5fd17a1, 0x44a081e0), TOBN(0xd797fb7d, 0xb70e296a), + TOBN(0x2b472b30, 0x481f719c), TOBN(0x0e632a98, 0xfe6f8c52), + TOBN(0x89ccd116, 0xc5f0c284), TOBN(0xf51088af, 0x2d987c62), + TOBN(0x2a2bccda, 0x4c2de6cf), TOBN(0x810f9efe, 0xf679f0f9), + TOBN(0xb0f394b9, 0x7ffe4b3e), TOBN(0x0b691d21, 0xe5fa5d21), + TOBN(0xb0bd7747, 0x9dfbbc75), TOBN(0xd2830fda, 0xfaf78b00), + TOBN(0xf78c249c, 0x52434f57), TOBN(0x4b1f7545, 0x98096dab), + TOBN(0x73bf6f94, 0x8ff8c0b3), TOBN(0x34aef03d, 0x454e134c), + TOBN(0xf8d151f4, 0xb7ac7ec5), TOBN(0xd6ceb95a, 0xe50da7d5), + TOBN(0xa1b492b0, 0xdc3a0eb8), TOBN(0x75157b69, 0xb3dd2863), + TOBN(0xe2c4c74e, 0xc5413d62), TOBN(0xbe329ff7, 0xbc5fc4c7), + TOBN(0x835a2aea, 0x60fa9dda), TOBN(0xf117f5ad, 0x7445cb87), + TOBN(0xae8317f4, 0xb0166f7a), TOBN(0xfbd3e3f7, 0xceec74e6), + TOBN(0xfdb516ac, 0xe0874bfd), TOBN(0x3d846019, 0xc681f3a3), + TOBN(0x0b12ee5c, 0x7c1620b0), TOBN(0xba68b4dd, 0x2b63c501), + TOBN(0xac03cd32, 0x6668c51e), TOBN(0x2a6279f7, 0x4e0bcb5b), + TOBN(0x17bd69b0, 0x6ae85c10), TOBN(0x72946979, 0x1dfdd3a6), + TOBN(0xd9a03268, 0x2c078bec), TOBN(0x41c6a658, 0xbfd68a52), + TOBN(0xcdea1024, 0x0e023900), TOBN(0xbaeec121, 0xb10d144d), + TOBN(0x5a600e74, 0x058ab8dc), TOBN(0x1333af21, 0xbb89ccdd), + TOBN(0xdf25eae0, 0x3aaba1f1), TOBN(0x2cada16e, 0x3b7144cf), + TOBN(0x657ee27d, 0x71ab98bc), TOBN(0x99088b4c, 0x7a6fc96e), + TOBN(0x05d5c0a0, 0x3549dbd4), TOBN(0x42cbdf8f, 0xf158c3ac), + TOBN(0x3fb6b3b0, 0x87edd685), TOBN(0x22071cf6, 0x86f064d0), + TOBN(0xd2d6721f, 0xff2811e5), TOBN(0xdb81b703, 0xfe7fae8c), + TOBN(0x3cfb74ef, 0xd3f1f7bb), TOBN(0x0cdbcd76, 0x16cdeb5d), + TOBN(0x4f39642a, 0x566a808c), TOBN(0x02b74454, 0x340064d6), + TOBN(0xfabbadca, 0x0528fa6f), TOBN(0xe4c3074c, 0xd3fc0bb6), + TOBN(0xb32cb8b0, 0xb796d219), TOBN(0xc3e95f4f, 0x34741dd9), + TOBN(0x87212125, 0x68edf6f5), TOBN(0x7a03aee4, 0xa2b9cb8e), + TOBN(0x0cd3c376, 0xf53a89aa), TOBN(0x0d8af9b1, 0x948a28dc), + TOBN(0xcf86a3f4, 0x902ab04f), TOBN(0x8aacb62a, 0x7f42002d), + TOBN(0x106985eb, 0xf62ffd52), TOBN(0xe670b54e, 0x5797bf10), + TOBN(0x4b405209, 0xc5e30aef), TOBN(0x12c97a20, 0x4365b5e9), + TOBN(0x104646ce, 0x1fe32093), TOBN(0x13cb4ff6, 0x3907a8c9), + TOBN(0x8b9f30d1, 0xd46e726b), TOBN(0xe1985e21, 0xaba0f499), + TOBN(0xc573dea9, 0x10a230cd), TOBN(0x24f46a93, 0xcd30f947), + TOBN(0xf2623fcf, 0xabe2010a), TOBN(0x3f278cb2, 0x73f00e4f), + TOBN(0xed55c67d, 0x50b920eb), TOBN(0xf1cb9a2d, 0x8e760571), + TOBN(0x7c50d109, 0x0895b709), TOBN(0x4207cf07, 0x190d4369), + TOBN(0x3b027e81, 0xc4127fe1), TOBN(0xa9f8b9ad, 0x3ae9c566), + TOBN(0x5ab10851, 0xacbfbba5), TOBN(0xa747d648, 0x569556f5), + TOBN(0xcc172b5c, 0x2ba97bf7), TOBN(0x15e0f77d, 0xbcfa3324), + TOBN(0xa345b797, 0x7686279d), TOBN(0x5a723480, 0xe38003d3), + TOBN(0xfd8e139f, 0x8f5fcda8), TOBN(0xf3e558c4, 0xbdee5bfd), + TOBN(0xd76cbaf4, 0xe33f9f77), TOBN(0x3a4c97a4, 0x71771969), + TOBN(0xda27e84b, 0xf6dce6a7), TOBN(0xff373d96, 0x13e6c2d1), + TOBN(0xf115193c, 0xd759a6e9), TOBN(0x3f9b7025, 0x63d2262c), + TOBN(0xd9764a31, 0x317cd062), TOBN(0x30779d8e, 0x199f8332), + TOBN(0xd8074106, 0x16b11b0b), TOBN(0x7917ab9f, 0x78aeaed8), + TOBN(0xb67a9cbe, 0x28fb1d8e), TOBN(0x2e313563, 0x136eda33), + TOBN(0x010b7069, 0xa371a86c), TOBN(0x44d90fa2, 0x6744e6b7), + TOBN(0x68190867, 0xd6b3e243), TOBN(0x9fe6cd9d, 0x59048c48), + TOBN(0xb900b028, 0x95731538), TOBN(0xa012062f, 0x32cae04f), + TOBN(0x8107c8bc, 0x9399d082), TOBN(0x47e8c54a, 0x41df12e2), + TOBN(0x14ba5117, 0xb6ef3f73), TOBN(0x22260bea, 0x81362f0b), + TOBN(0x90ea261e, 0x1a18cc20), TOBN(0x2192999f, 0x2321d636), + TOBN(0xef64d314, 0xe311b6a0), TOBN(0xd7401e4c, 0x3b54a1f5), + TOBN(0x19019983, 0x6fbca2ba), TOBN(0x46ad3293, 0x8fbffc4b), + TOBN(0xa142d3f6, 0x3786bf40), TOBN(0xeb5cbc26, 0xb67039fc), + TOBN(0x9cb0ae6c, 0x252bd479), TOBN(0x05e0f88a, 0x12b5848f), + TOBN(0x78f6d2b2, 0xa5c97663), TOBN(0x6f6e149b, 0xc162225c), + TOBN(0xe602235c, 0xde601a89), TOBN(0xd17bbe98, 0xf373be1f), + TOBN(0xcaf49a5b, 0xa8471827), TOBN(0x7e1a0a85, 0x18aaa116), + TOBN(0x6c833196, 0x270580c3), TOBN(0x1e233839, 0xf1c98a14), + TOBN(0x67b2f7b4, 0xae34e0a5), TOBN(0x47ac8745, 0xd8ce7289), + TOBN(0x2b74779a, 0x100dd467), TOBN(0x274a4337, 0x4ee50d09), + TOBN(0x603dcf13, 0x83608bc9), TOBN(0xcd9da6c3, 0xc89e8388), + TOBN(0x2660199f, 0x355116ac), TOBN(0xcc38bb59, 0xb6d18eed), + TOBN(0x3075f31f, 0x2f4bc071), TOBN(0x9774457f, 0x265dc57e), + TOBN(0x06a6a9c8, 0xc6db88bb), TOBN(0x6429d07f, 0x4ec98e04), + TOBN(0x8d05e57b, 0x05ecaa8b), TOBN(0x20f140b1, 0x7872ea7b), + TOBN(0xdf8c0f09, 0xca494693), TOBN(0x48d3a020, 0xf252e909), + TOBN(0x4c5c29af, 0x57b14b12), TOBN(0x7e6fa37d, 0xbf47ad1c), + TOBN(0x66e7b506, 0x49a0c938), TOBN(0xb72c0d48, 0x6be5f41f), + TOBN(0x6a6242b8, 0xb2359412), TOBN(0xcd35c774, 0x8e859480), + TOBN(0x12536fea, 0x87baa627), TOBN(0x58c1fec1, 0xf72aa680), + TOBN(0x6c29b637, 0x601e5dc9), TOBN(0x9e3c3c1c, 0xde9e01b9), + TOBN(0xefc8127b, 0x2bcfe0b0), TOBN(0x35107102, 0x2a12f50d), + TOBN(0x6ccd6cb1, 0x4879b397), TOBN(0xf792f804, 0xf8a82f21), + TOBN(0x509d4804, 0xa9b46402), TOBN(0xedddf85d, 0xc10f0850), + TOBN(0x928410dc, 0x4b6208aa), TOBN(0xf6229c46, 0x391012dc), + TOBN(0xc5a7c41e, 0x7727b9b6), TOBN(0x289e4e4b, 0xaa444842), + TOBN(0x049ba1d9, 0xe9a947ea), TOBN(0x44f9e47f, 0x83c8debc), + TOBN(0xfa77a1fe, 0x611f8b8e), TOBN(0xfd2e416a, 0xf518f427), + TOBN(0xc5fffa70, 0x114ebac3), TOBN(0xfe57c4e9, 0x5d89697b), + TOBN(0xfdd053ac, 0xb1aaf613), TOBN(0x31df210f, 0xea585a45), + TOBN(0x318cc10e, 0x24985034), TOBN(0x1a38efd1, 0x5f1d6130), + TOBN(0xbf86f237, 0x0b1e9e21), TOBN(0xb258514d, 0x1dbe88aa), + TOBN(0x1e38a588, 0x90c1baf9), TOBN(0x2936a01e, 0xbdb9b692), + TOBN(0xd576de98, 0x6dd5b20c), TOBN(0xb586bf71, 0x70f98ecf), + TOBN(0xcccf0f12, 0xc42d2fd7), TOBN(0x8717e61c, 0xfb35bd7b), + TOBN(0x8b1e5722, 0x35e6fc06), TOBN(0x3477728f, 0x0b3e13d5), + TOBN(0x150c294d, 0xaa8a7372), TOBN(0xc0291d43, 0x3bfa528a), + TOBN(0xc6c8bc67, 0xcec5a196), TOBN(0xdeeb31e4, 0x5c2e8a7c), + TOBN(0xba93e244, 0xfb6e1c51), TOBN(0xb9f8b71b, 0x2e28e156), + TOBN(0xce65a287, 0x968a2ab9), TOBN(0xe3c5ce69, 0x46bbcb1f), + TOBN(0xf8c835b9, 0xe7ae3f30), TOBN(0x16bbee26, 0xff72b82b), + TOBN(0x665e2017, 0xfd42cd22), TOBN(0x1e139970, 0xf8b1d2a0), + TOBN(0x125cda29, 0x79204932), TOBN(0x7aee94a5, 0x49c3bee5), + TOBN(0x68c70160, 0x89821a66), TOBN(0xf7c37678, 0x8f981669), + TOBN(0xd90829fc, 0x48cc3645), TOBN(0x346af049, 0xd70addfc), + TOBN(0x2057b232, 0x370bf29c), TOBN(0xf90c73ce, 0x42e650ee), + TOBN(0xe03386ea, 0xa126ab90), TOBN(0x0e266e7e, 0x975a087b), + TOBN(0x80578eb9, 0x0fca65d9), TOBN(0x7e2989ea, 0x16af45b8), + TOBN(0x7438212d, 0xcac75a4e), TOBN(0x38c7ca39, 0x4fef36b8), + TOBN(0x8650c494, 0xd402676a), TOBN(0x26ab5a66, 0xf72c7c48), + TOBN(0x4e6cb426, 0xce3a464e), TOBN(0xf8f99896, 0x2b72f841), + TOBN(0x8c318491, 0x1a335cc8), TOBN(0x563459ba, 0x6a5913e4), + TOBN(0x1b920d61, 0xc7b32919), TOBN(0x805ab8b6, 0xa02425ad), + TOBN(0x2ac512da, 0x8d006086), TOBN(0x6ca4846a, 0xbcf5c0fd), + TOBN(0xafea51d8, 0xac2138d7), TOBN(0xcb647545, 0x344cd443), + TOBN(0x0429ee8f, 0xbd7d9040), TOBN(0xee66a2de, 0x819b9c96), + TOBN(0x54f9ec25, 0xdea7d744), TOBN(0x2ffea642, 0x671721bb), + TOBN(0x4f19dbd1, 0x114344ea), TOBN(0x04304536, 0xfd0dbc8b), + TOBN(0x014b50aa, 0x29ec7f91), TOBN(0xb5fc22fe, 0xbb06014d), + TOBN(0x60d963a9, 0x1ee682e0), TOBN(0xdf48abc0, 0xfe85c727), + TOBN(0x0cadba13, 0x2e707c2d), TOBN(0xde608d3a, 0xa645aeff), + TOBN(0x05f1c28b, 0xedafd883), TOBN(0x3c362ede, 0xbd94de1f), + TOBN(0x8dd0629d, 0x13593e41), TOBN(0x0a5e736f, 0x766d6eaf), + TOBN(0xbfa92311, 0xf68cf9d1), TOBN(0xa4f9ef87, 0xc1797556), + TOBN(0x10d75a1f, 0x5601c209), TOBN(0x651c374c, 0x09b07361), + TOBN(0x49950b58, 0x88b5cead), TOBN(0x0ef00058, 0x6fa9dbaa), + TOBN(0xf51ddc26, 0x4e15f33a), TOBN(0x1f8b5ca6, 0x2ef46140), + TOBN(0x343ac0a3, 0xee9523f0), TOBN(0xbb75eab2, 0x975ea978), + TOBN(0x1bccf332, 0x107387f4), TOBN(0x790f9259, 0x9ab0062e), + TOBN(0xf1a363ad, 0x1e4f6a5f), TOBN(0x06e08b84, 0x62519a50), + TOBN(0x60915187, 0x7265f1ee), TOBN(0x6a80ca34, 0x93ae985e), + TOBN(0x81b29768, 0xaaba4864), TOBN(0xb13cabf2, 0x8d52a7d6), + TOBN(0xb5c36348, 0x8ead03f1), TOBN(0xc932ad95, 0x81c7c1c0), + TOBN(0x5452708e, 0xcae1e27b), TOBN(0x9dac4269, 0x1b0df648), + TOBN(0x233e3f0c, 0xdfcdb8bc), TOBN(0xe6ceccdf, 0xec540174), + TOBN(0xbd0d845e, 0x95081181), TOBN(0xcc8a7920, 0x699355d5), + TOBN(0x111c0f6d, 0xc3b375a8), TOBN(0xfd95bc6b, 0xfd51e0dc), + TOBN(0x4a106a26, 0x6888523a), TOBN(0x4d142bd6, 0xcb01a06d), + TOBN(0x79bfd289, 0xadb9b397), TOBN(0x0bdbfb94, 0xe9863914), + TOBN(0x29d8a229, 0x1660f6a6), TOBN(0x7f6abcd6, 0x551c042d), + TOBN(0x13039deb, 0x0ac3ffe8), TOBN(0xa01be628, 0xec8523fb), + TOBN(0x6ea34103, 0x0ca1c328), TOBN(0xc74114bd, 0xb903928e), + TOBN(0x8aa4ff4e, 0x9e9144b0), TOBN(0x7064091f, 0x7f9a4b17), + TOBN(0xa3f4f521, 0xe447f2c4), TOBN(0x81b8da7a, 0x604291f0), + TOBN(0xd680bc46, 0x7d5926de), TOBN(0x84f21fd5, 0x34a1202f), + TOBN(0x1d1e3181, 0x4e9df3d8), TOBN(0x1ca4861a, 0x39ab8d34), + TOBN(0x809ddeec, 0x5b19aa4a), TOBN(0x59f72f7e, 0x4d329366), + TOBN(0xa2f93f41, 0x386d5087), TOBN(0x40bf739c, 0xdd67d64f), + TOBN(0xb4494205, 0x66702158), TOBN(0xc33c65be, 0x73b1e178), + TOBN(0xcdcd657c, 0x38ca6153), TOBN(0x97f4519a, 0xdc791976), + TOBN(0xcc7c7f29, 0xcd6e1f39), TOBN(0x38de9cfb, 0x7e3c3932), + TOBN(0xe448eba3, 0x7b793f85), TOBN(0xe9f8dbf9, 0xf067e914), + TOBN(0xc0390266, 0xf114ae87), TOBN(0x39ed75a7, 0xcd6a8e2a), + TOBN(0xadb14848, 0x7ffba390), TOBN(0x67f8cb8b, 0x6af9bc09), + TOBN(0x322c3848, 0x9c7476db), TOBN(0xa320fecf, 0x52a538d6), + TOBN(0xe0493002, 0xb2aced2b), TOBN(0xdfba1809, 0x616bd430), + TOBN(0x531c4644, 0xc331be70), TOBN(0xbc04d32e, 0x90d2e450), + TOBN(0x1805a0d1, 0x0f9f142d), TOBN(0x2c44a0c5, 0x47ee5a23), + TOBN(0x31875a43, 0x3989b4e3), TOBN(0x6b1949fd, 0x0c063481), + TOBN(0x2dfb9e08, 0xbe0f4492), TOBN(0x3ff0da03, 0xe9d5e517), + TOBN(0x03dbe9a1, 0xf79466a8), TOBN(0x0b87bcd0, 0x15ea9932), + TOBN(0xeb64fc83, 0xab1f58ab), TOBN(0x6d9598da, 0x817edc8a), + TOBN(0x699cff66, 0x1d3b67e5), TOBN(0x645c0f29, 0x92635853), + TOBN(0x253cdd82, 0xeabaf21c), TOBN(0x82b9602a, 0x2241659e), + TOBN(0x2cae07ec, 0x2d9f7091), TOBN(0xbe4c720c, 0x8b48cd9b), + TOBN(0x6ce5bc03, 0x6f08d6c9), TOBN(0x36e8a997, 0xaf10bf40), + TOBN(0x83422d21, 0x3e10ff12), TOBN(0x7b26d3eb, 0xbcc12494), + TOBN(0xb240d2d0, 0xc9469ad6), TOBN(0xc4a11b4d, 0x30afa05b), + TOBN(0x4b604ace, 0xdd6ba286), TOBN(0x18486600, 0x3ee2864c), + TOBN(0x5869d6ba, 0x8d9ce5be), TOBN(0x0d8f68c5, 0xff4bfb0d), + TOBN(0xb69f210b, 0x5700cf73), TOBN(0x61f6653a, 0x6d37c135), + TOBN(0xff3d432b, 0x5aff5a48), TOBN(0x0d81c4b9, 0x72ba3a69), + TOBN(0xee879ae9, 0xfa1899ef), TOBN(0xbac7e2a0, 0x2d6acafd), + TOBN(0xd6d93f6c, 0x1c664399), TOBN(0x4c288de1, 0x5bcb135d), + TOBN(0x83031dab, 0x9dab7cbf), TOBN(0xfe23feb0, 0x3abbf5f0), + TOBN(0x9f1b2466, 0xcdedca85), TOBN(0x140bb710, 0x1a09538c), + TOBN(0xac8ae851, 0x5e11115d), TOBN(0x0d63ff67, 0x6f03f59e), + TOBN(0x755e5551, 0x7d234afb), TOBN(0x61c2db4e, 0x7e208fc1), + TOBN(0xaa9859ce, 0xf28a4b5d), TOBN(0xbdd6d4fc, 0x34af030f), + TOBN(0xd1c4a26d, 0x3be01cb1), TOBN(0x9ba14ffc, 0x243aa07c), + TOBN(0xf95cd3a9, 0xb2503502), TOBN(0xe379bc06, 0x7d2a93ab), + TOBN(0x3efc18e9, 0xd4ca8d68), TOBN(0x083558ec, 0x80bb412a), + TOBN(0xd903b940, 0x9645a968), TOBN(0xa499f0b6, 0x9ba6054f), + TOBN(0x208b573c, 0xb8349abe), TOBN(0x3baab3e5, 0x30b4fc1c), + TOBN(0x87e978ba, 0xcb524990), TOBN(0x3524194e, 0xccdf0e80), + TOBN(0x62711725, 0x7d4bcc42), TOBN(0xe90a3d9b, 0xb90109ba), + TOBN(0x3b1bdd57, 0x1323e1e0), TOBN(0xb78e9bd5, 0x5eae1599), + TOBN(0x0794b746, 0x9e03d278), TOBN(0x80178605, 0xd70e6297), + TOBN(0x171792f8, 0x99c97855), TOBN(0x11b393ee, 0xf5a86b5c), + TOBN(0x48ef6582, 0xd8884f27), TOBN(0xbd44737a, 0xbf19ba5f), + TOBN(0x8698de4c, 0xa42062c6), TOBN(0x8975eb80, 0x61ce9c54), + TOBN(0xd50e57c7, 0xd7fe71f3), TOBN(0x15342190, 0xbc97ce38), + TOBN(0x51bda2de, 0x4df07b63), TOBN(0xba12aeae, 0x200eb87d), + TOBN(0xabe135d2, 0xa9b4f8f6), TOBN(0x04619d65, 0xfad6d99c), + TOBN(0x4a6683a7, 0x7994937c), TOBN(0x7a778c8b, 0x6f94f09a), + TOBN(0x8c508623, 0x20a71b89), TOBN(0x241a2aed, 0x1c229165), + TOBN(0x352be595, 0xaaf83a99), TOBN(0x9fbfee7f, 0x1562bac8), + TOBN(0xeaf658b9, 0x5c4017e3), TOBN(0x1dc7f9e0, 0x15120b86), + TOBN(0xd84f13dd, 0x4c034d6f), TOBN(0x283dd737, 0xeaea3038), + TOBN(0x197f2609, 0xcd85d6a2), TOBN(0x6ebbc345, 0xfae60177), + TOBN(0xb80f031b, 0x4e12fede), TOBN(0xde55d0c2, 0x07a2186b), + TOBN(0x1fb3e37f, 0x24dcdd5a), TOBN(0x8d602da5, 0x7ed191fb), + TOBN(0x108fb056, 0x76023e0d), TOBN(0x70178c71, 0x459c20c0), + TOBN(0xfad5a386, 0x3fe54cf0), TOBN(0xa4a3ec4f, 0x02bbb475), + TOBN(0x1aa5ec20, 0x919d94d7), TOBN(0x5d3b63b5, 0xa81e4ab3), + TOBN(0x7fa733d8, 0x5ad3d2af), TOBN(0xfbc586dd, 0xd1ac7a37), + TOBN(0x282925de, 0x40779614), TOBN(0xfe0ffffb, 0xe74a242a), + TOBN(0x3f39e67f, 0x906151e5), TOBN(0xcea27f5f, 0x55e10649), + TOBN(0xdca1d4e1, 0xc17cf7b7), TOBN(0x0c326d12, 0x2fe2362d), + TOBN(0x05f7ac33, 0x7dd35df3), TOBN(0x0c3b7639, 0xc396dbdf), + TOBN(0x0912f5ac, 0x03b7db1c), TOBN(0x9dea4b70, 0x5c9ed4a9), + TOBN(0x475e6e53, 0xaae3f639), TOBN(0xfaba0e7c, 0xfc278bac), + TOBN(0x16f9e221, 0x9490375f), TOBN(0xaebf9746, 0xa5a7ed0a), + TOBN(0x45f9af3f, 0xf41ad5d6), TOBN(0x03c4623c, 0xb2e99224), + TOBN(0x82c5bb5c, 0xb3cf56aa), TOBN(0x64311819, 0x34567ed3), + TOBN(0xec57f211, 0x8be489ac), TOBN(0x2821895d, 0xb9a1104b), + TOBN(0x610dc875, 0x6064e007), TOBN(0x8e526f3f, 0x5b20d0fe), + TOBN(0x6e71ca77, 0x5b645aee), TOBN(0x3d1dcb9f, 0x800e10ff), + TOBN(0x36b51162, 0x189cf6de), TOBN(0x2c5a3e30, 0x6bb17353), + TOBN(0xc186cd3e, 0x2a6c6fbf), TOBN(0xa74516fa, 0x4bf97906), + TOBN(0x5b4b8f4b, 0x279d6901), TOBN(0x0c4e57b4, 0x2b573743), + TOBN(0x75fdb229, 0xb6e386b6), TOBN(0xb46793fd, 0x99deac27), + TOBN(0xeeec47ea, 0xcf712629), TOBN(0xe965f3c4, 0xcbc3b2dd), + TOBN(0x8dd1fb83, 0x425c6559), TOBN(0x7fc00ee6, 0x0af06fda), + TOBN(0xe98c9225, 0x33d956df), TOBN(0x0f1ef335, 0x4fbdc8a2), + TOBN(0x2abb5145, 0xb79b8ea2), TOBN(0x40fd2945, 0xbdbff288), + TOBN(0x6a814ac4, 0xd7185db7), TOBN(0xc4329d6f, 0xc084609a), + TOBN(0xc9ba7b52, 0xed1be45d), TOBN(0x891dd20d, 0xe4cd2c74), + TOBN(0x5a4d4a7f, 0x824139b1), TOBN(0x66c17716, 0xb873c710), + TOBN(0x5e5bc141, 0x2843c4e0), TOBN(0xd5ac4817, 0xb97eb5bf), + TOBN(0xc0f8af54, 0x450c95c7), TOBN(0xc91b3fa0, 0x318406c5), + TOBN(0x360c340a, 0xab9d97f8), TOBN(0xfb57bd07, 0x90a2d611), + TOBN(0x4339ae3c, 0xa6a6f7e5), TOBN(0x9c1fcd2a, 0x2feb8a10), + TOBN(0x972bcca9, 0xc7ea7432), TOBN(0x1b0b924c, 0x308076f6), + TOBN(0x80b2814a, 0x2a5b4ca5), TOBN(0x2f78f55b, 0x61ef3b29), + TOBN(0xf838744a, 0xc18a414f), TOBN(0xc611eaae, 0x903d0a86), + TOBN(0x94dabc16, 0x2a453f55), TOBN(0xe6f2e3da, 0x14efb279), + TOBN(0x5b7a6017, 0x9320dc3c), TOBN(0x692e382f, 0x8df6b5a4), + TOBN(0x3f5e15e0, 0x2d40fa90), TOBN(0xc87883ae, 0x643dd318), + TOBN(0x511053e4, 0x53544774), TOBN(0x834d0ecc, 0x3adba2bc), + TOBN(0x4215d7f7, 0xbae371f5), TOBN(0xfcfd57bf, 0x6c8663bc), + TOBN(0xded2383d, 0xd6901b1d), TOBN(0x3b49fbb4, 0xb5587dc3), + TOBN(0xfd44a08d, 0x07625f62), TOBN(0x3ee4d65b, 0x9de9b762),} + , + {TOBN(0x64e5137d, 0x0d63d1fa), TOBN(0x658fc052, 0x02a9d89f), + TOBN(0x48894874, 0x50436309), TOBN(0xe9ae30f8, 0xd598da61), + TOBN(0x2ed710d1, 0x818baf91), TOBN(0xe27e9e06, 0x8b6a0c20), + TOBN(0x1e28dcfb, 0x1c1a6b44), TOBN(0x883acb64, 0xd6ac57dc), + TOBN(0x8735728d, 0xc2c6ff70), TOBN(0x79d6122f, 0xc5dc2235), + TOBN(0x23f5d003, 0x19e277f9), TOBN(0x7ee84e25, 0xdded8cc7), + TOBN(0x91a8afb0, 0x63cd880a), TOBN(0x3f3ea7c6, 0x3574af60), + TOBN(0x0cfcdc84, 0x02de7f42), TOBN(0x62d0792f, 0xb31aa152), + TOBN(0x8e1b4e43, 0x8a5807ce), TOBN(0xad283893, 0xe4109a7e), + TOBN(0xc30cc9cb, 0xafd59dda), TOBN(0xf65f36c6, 0x3d8d8093), + TOBN(0xdf31469e, 0xa60d32b2), TOBN(0xee93df4b, 0x3e8191c8), + TOBN(0x9c1017c5, 0x355bdeb5), TOBN(0xd2623185, 0x8616aa28), + TOBN(0xb02c83f9, 0xdec31a21), TOBN(0x988c8b23, 0x6ad9d573), + TOBN(0x53e983ae, 0xa57be365), TOBN(0xe968734d, 0x646f834e), + TOBN(0x9137ea8f, 0x5da6309b), TOBN(0x10f3a624, 0xc1f1ce16), + TOBN(0x782a9ea2, 0xca440921), TOBN(0xdf94739e, 0x5b46f1b5), + TOBN(0x9f9be006, 0xcce85c9b), TOBN(0x360e70d6, 0xa4c7c2d3), + TOBN(0x2cd5beea, 0xaefa1e60), TOBN(0x64cf63c0, 0x8c3d2b6d), + TOBN(0xfb107fa3, 0xe1cf6f90), TOBN(0xb7e937c6, 0xd5e044e6), + TOBN(0x74e8ca78, 0xce34db9f), TOBN(0x4f8b36c1, 0x3e210bd0), + TOBN(0x1df165a4, 0x34a35ea8), TOBN(0x3418e0f7, 0x4d4412f6), + TOBN(0x5af1f8af, 0x518836c3), TOBN(0x42ceef4d, 0x130e1965), + TOBN(0x5560ca0b, 0x543a1957), TOBN(0xc33761e5, 0x886cb123), + TOBN(0x66624b1f, 0xfe98ed30), TOBN(0xf772f4bf, 0x1090997d), + TOBN(0xf4e540bb, 0x4885d410), TOBN(0x7287f810, 0x9ba5f8d7), + TOBN(0x22d0d865, 0xde98dfb1), TOBN(0x49ff51a1, 0xbcfbb8a3), + TOBN(0xb6b6fa53, 0x6bc3012e), TOBN(0x3d31fd72, 0x170d541d), + TOBN(0x8018724f, 0x4b0f4966), TOBN(0x79e7399f, 0x87dbde07), + TOBN(0x56f8410e, 0xf4f8b16a), TOBN(0x97241afe, 0xc47b266a), + TOBN(0x0a406b8e, 0x6d9c87c1), TOBN(0x803f3e02, 0xcd42ab1b), + TOBN(0x7f0309a8, 0x04dbec69), TOBN(0xa83b85f7, 0x3bbad05f), + TOBN(0xc6097273, 0xad8e197f), TOBN(0xc097440e, 0x5067adc1), + TOBN(0x730eafb6, 0x3524ff16), TOBN(0xd7f9b51e, 0x823fc6ce), + TOBN(0x27bd0d32, 0x443e4ac0), TOBN(0x40c59ad9, 0x4d66f217), + TOBN(0x6c33136f, 0x17c387a4), TOBN(0x5043b8d5, 0xeb86804d), + TOBN(0x74970312, 0x675a73c9), TOBN(0x838fdb31, 0xf16669b6), + TOBN(0xc507b6dd, 0x418e7ddd), TOBN(0x39888d93, 0x472f19d6), + TOBN(0x7eae26be, 0x0c27eb4d), TOBN(0x17b53ed3, 0xfbabb884), + TOBN(0xfc27021b, 0x2b01ae4f), TOBN(0x88462e87, 0xcf488682), + TOBN(0xbee096ec, 0x215e2d87), TOBN(0xeb2fea9a, 0xd242e29b), + TOBN(0x5d985b5f, 0xb821fc28), TOBN(0x89d2e197, 0xdc1e2ad2), + TOBN(0x55b566b8, 0x9030ba62), TOBN(0xe3fd41b5, 0x4f41b1c6), + TOBN(0xb738ac2e, 0xb9a96d61), TOBN(0x7f8567ca, 0x369443f4), + TOBN(0x8698622d, 0xf803a440), TOBN(0x2b586236, 0x8fe2f4dc), + TOBN(0xbbcc00c7, 0x56b95bce), TOBN(0x5ec03906, 0x616da680), + TOBN(0x79162ee6, 0x72214252), TOBN(0x43132b63, 0x86a892d2), + TOBN(0x4bdd3ff2, 0x2f3263bf), TOBN(0xd5b3733c, 0x9cd0a142), + TOBN(0x592eaa82, 0x44415ccb), TOBN(0x663e8924, 0x8d5474ea), + TOBN(0x8058a25e, 0x5236344e), TOBN(0x82e8df9d, 0xbda76ee6), + TOBN(0xdcf6efd8, 0x11cc3d22), TOBN(0x00089cda, 0x3b4ab529), + TOBN(0x91d3a071, 0xbd38a3db), TOBN(0x4ea97fc0, 0xef72b925), + TOBN(0x0c9fc15b, 0xea3edf75), TOBN(0x5a6297cd, 0xa4348ed3), + TOBN(0x0d38ab35, 0xce7c42d4), TOBN(0x9fd493ef, 0x82feab10), + TOBN(0x46056b6d, 0x82111b45), TOBN(0xda11dae1, 0x73efc5c3), + TOBN(0xdc740278, 0x5545a7fb), TOBN(0xbdb2601c, 0x40d507e6), + TOBN(0x121dfeeb, 0x7066fa58), TOBN(0x214369a8, 0x39ae8c2a), + TOBN(0x195709cb, 0x06e0956c), TOBN(0x4c9d254f, 0x010cd34b), + TOBN(0xf51e13f7, 0x0471a532), TOBN(0xe19d6791, 0x1e73054d), + TOBN(0xf702a628, 0xdb5c7be3), TOBN(0xc7141218, 0xb24dde05), + TOBN(0xdc18233c, 0xf29b2e2e), TOBN(0x3a6bd1e8, 0x85342dba), + TOBN(0x3f747fa0, 0xb311898c), TOBN(0xe2a272e4, 0xcd0eac65), + TOBN(0x4bba5851, 0xf914d0bc), TOBN(0x7a1a9660, 0xc4a43ee3), + TOBN(0xe5a367ce, 0xa1c8cde9), TOBN(0x9d958ba9, 0x7271abe3), + TOBN(0xf3ff7eb6, 0x3d1615cd), TOBN(0xa2280dce, 0xf5ae20b0), + TOBN(0x56dba5c1, 0xcf640147), TOBN(0xea5a2e3d, 0x5e83d118), + TOBN(0x04cd6b6d, 0xda24c511), TOBN(0x1c0f4671, 0xe854d214), + TOBN(0x91a6b7a9, 0x69565381), TOBN(0xdc966240, 0xdecf1f5b), + TOBN(0x1b22d21c, 0xfcf5d009), TOBN(0x2a05f641, 0x9021dbd5), + TOBN(0x8c0ed566, 0xd4312483), TOBN(0x5179a95d, 0x643e216f), + TOBN(0xcc185fec, 0x17044493), TOBN(0xb3063339, 0x54991a21), + TOBN(0xd801ecdb, 0x0081a726), TOBN(0x0149b0c6, 0x4fa89bbb), + TOBN(0xafe9065a, 0x4391b6b9), TOBN(0xedc92786, 0xd633f3a3), + TOBN(0xe408c24a, 0xae6a8e13), TOBN(0x85833fde, 0x9f3897ab), + TOBN(0x43800e7e, 0xd81a0715), TOBN(0xde08e346, 0xb44ffc5f), + TOBN(0x7094184c, 0xcdeff2e0), TOBN(0x49f9387b, 0x165eaed1), + TOBN(0x635d6129, 0x777c468a), TOBN(0x8c0dcfd1, 0x538c2dd8), + TOBN(0xd6d9d9e3, 0x7a6a308b), TOBN(0x62375830, 0x4c2767d3), + TOBN(0x874a8bc6, 0xf38cbeb6), TOBN(0xd94d3f1a, 0xccb6fd9e), + TOBN(0x92a9735b, 0xba21f248), TOBN(0x272ad0e5, 0x6cd1efb0), + TOBN(0x7437b69c, 0x05b03284), TOBN(0xe7f04702, 0x6948c225), + TOBN(0x8a56c04a, 0xcba2ecec), TOBN(0x0c181270, 0xe3a73e41), + TOBN(0x6cb34e9d, 0x03e93725), TOBN(0xf77c8713, 0x496521a9), + TOBN(0x94569183, 0xfa7f9f90), TOBN(0xf2e7aa4c, 0x8c9707ad), + TOBN(0xced2c9ba, 0x26c1c9a3), TOBN(0x9109fe96, 0x40197507), + TOBN(0x9ae868a9, 0xe9adfe1c), TOBN(0x3984403d, 0x314e39bb), + TOBN(0xb5875720, 0xf2fe378f), TOBN(0x33f901e0, 0xba44a628), + TOBN(0xea1125fe, 0x3652438c), TOBN(0xae9ec4e6, 0x9dd1f20b), + TOBN(0x1e740d9e, 0xbebf7fbd), TOBN(0x6dbd3ddc, 0x42dbe79c), + TOBN(0x62082aec, 0xedd36776), TOBN(0xf612c478, 0xe9859039), + TOBN(0xa493b201, 0x032f7065), TOBN(0xebd4d8f2, 0x4ff9b211), + TOBN(0x3f23a0aa, 0xaac4cb32), TOBN(0xea3aadb7, 0x15ed4005), + TOBN(0xacf17ea4, 0xafa27e63), TOBN(0x56125c1a, 0xc11fd66c), + TOBN(0x266344a4, 0x3794f8dc), TOBN(0xdcca923a, 0x483c5c36), + TOBN(0x2d6b6bbf, 0x3f9d10a0), TOBN(0xb320c5ca, 0x81d9bdf3), + TOBN(0x620e28ff, 0x47b50a95), TOBN(0x933e3b01, 0xcef03371), + TOBN(0xf081bf85, 0x99100153), TOBN(0x183be9a0, 0xc3a8c8d6), + TOBN(0x4e3ddc5a, 0xd6bbe24d), TOBN(0xc6c74630, 0x53843795), + TOBN(0x78193dd7, 0x65ec2d4c), TOBN(0xb8df26cc, 0xcd3c89b2), + TOBN(0x98dbe399, 0x5a483f8d), TOBN(0x72d8a957, 0x7dd3313a), + TOBN(0x65087294, 0xab0bd375), TOBN(0xfcd89248, 0x7c259d16), + TOBN(0x8a9443d7, 0x7613aa81), TOBN(0x80100800, 0x85fe6584), + TOBN(0x70fc4dbc, 0x7fb10288), TOBN(0xf58280d3, 0xe86beee8), + TOBN(0x14fdd82f, 0x7c978c38), TOBN(0xdf1204c1, 0x0de44d7b), + TOBN(0xa08a1c84, 0x4160252f), TOBN(0x591554ca, 0xc17646a5), + TOBN(0x214a37d6, 0xa05bd525), TOBN(0x48d5f09b, 0x07957b3c), + TOBN(0x0247cdcb, 0xd7109bc9), TOBN(0x40f9e4bb, 0x30599ce7), + TOBN(0xc325fa03, 0xf46ad2ec), TOBN(0x00f766cf, 0xc3e3f9ee), + TOBN(0xab556668, 0xd43a4577), TOBN(0x68d30a61, 0x3ee03b93), + TOBN(0x7ddc81ea, 0x77b46a08), TOBN(0xcf5a6477, 0xc7480699), + TOBN(0x43a8cb34, 0x6633f683), TOBN(0x1b867e6b, 0x92363c60), + TOBN(0x43921114, 0x1f60558e), TOBN(0xcdbcdd63, 0x2f41450e), + TOBN(0x7fc04601, 0xcc630e8b), TOBN(0xea7c66d5, 0x97038b43), + TOBN(0x7259b8a5, 0x04e99fd8), TOBN(0x98a8dd12, 0x4785549a), + TOBN(0x0e459a7c, 0x840552e1), TOBN(0xcdfcf4d0, 0x4bb0909e), + TOBN(0x34a86db2, 0x53758da7), TOBN(0xe643bb83, 0xeac997e1), + TOBN(0x96400bd7, 0x530c5b7e), TOBN(0x9f97af87, 0xb41c8b52), + TOBN(0x34fc8820, 0xfbeee3f9), TOBN(0x93e53490, 0x49091afd), + TOBN(0x764b9be5, 0x9a31f35c), TOBN(0x71f37864, 0x57e3d924), + TOBN(0x02fb34e0, 0x943aa75e), TOBN(0xa18c9c58, 0xab8ff6e4), + TOBN(0x080f31b1, 0x33cf0d19), TOBN(0x5c9682db, 0x083518a7), + TOBN(0x873d4ca6, 0xb709c3de), TOBN(0x64a84262, 0x3575b8f0), + TOBN(0x6275da1f, 0x020154bb), TOBN(0x97678caa, 0xd17cf1ab), + TOBN(0x8779795f, 0x951a95c3), TOBN(0xdd35b163, 0x50fccc08), + TOBN(0x32709627, 0x33d8f031), TOBN(0x3c5ab10a, 0x498dd85c), + TOBN(0xb6c185c3, 0x41dca566), TOBN(0x7de7feda, 0xd8622aa3), + TOBN(0x99e84d92, 0x901b6dfb), TOBN(0x30a02b0e, 0x7c4ad288), + TOBN(0xc7c81daa, 0x2fd3cf36), TOBN(0xd1319547, 0xdf89e59f), + TOBN(0xb2be8184, 0xcd496733), TOBN(0xd5f449eb, 0x93d3412b), + TOBN(0x7ea41b1b, 0x25fe531d), TOBN(0xf9797432, 0x6a1d5646), + TOBN(0x86067f72, 0x2bde501a), TOBN(0xf91481c0, 0x0c85e89c), + TOBN(0xca8ee465, 0xf8b05bc6), TOBN(0x1844e1cf, 0x02e83cda), + TOBN(0xca82114a, 0xb4dbe33b), TOBN(0x0f9f8769, 0x4eabfde2), + TOBN(0x4936b1c0, 0x38b27fe2), TOBN(0x63b6359b, 0xaba402df), + TOBN(0x40c0ea2f, 0x656bdbab), TOBN(0x9c992a89, 0x6580c39c), + TOBN(0x600e8f15, 0x2a60aed1), TOBN(0xeb089ca4, 0xe0bf49df), + TOBN(0x9c233d7d, 0x2d42d99a), TOBN(0x648d3f95, 0x4c6bc2fa), + TOBN(0xdcc383a8, 0xe1add3f3), TOBN(0xf42c0c6a, 0x4f64a348), + TOBN(0x2abd176f, 0x0030dbdb), TOBN(0x4de501a3, 0x7d6c215e), + TOBN(0x4a107c1f, 0x4b9a64bc), TOBN(0xa77f0ad3, 0x2496cd59), + TOBN(0xfb78ac62, 0x7688dffb), TOBN(0x7025a2ca, 0x67937d8e), + TOBN(0xfde8b2d1, 0xd1a8f4e7), TOBN(0xf5b3da47, 0x7354927c), + TOBN(0xe48606a3, 0xd9205735), TOBN(0xac477cc6, 0xe177b917), + TOBN(0xfb1f73d2, 0xa883239a), TOBN(0xe12572f6, 0xcc8b8357), + TOBN(0x9d355e9c, 0xfb1f4f86), TOBN(0x89b795f8, 0xd9f3ec6e), + TOBN(0x27be56f1, 0xb54398dc), TOBN(0x1890efd7, 0x3fedeed5), + TOBN(0x62f77f1f, 0x9c6d0140), TOBN(0x7ef0e314, 0x596f0ee4), + TOBN(0x50ca6631, 0xcc61dab3), TOBN(0x4a39801d, 0xf4866e4f), + TOBN(0x66c8d032, 0xae363b39), TOBN(0x22c591e5, 0x2ead66aa), + TOBN(0x954ba308, 0xde02a53e), TOBN(0x2a6c060f, 0xd389f357), + TOBN(0xe6cfcde8, 0xfbf40b66), TOBN(0x8e02fc56, 0xc6340ce1), + TOBN(0xe4957795, 0x73adb4ba), TOBN(0x7b86122c, 0xa7b03805), + TOBN(0x63f83512, 0x0c8e6fa6), TOBN(0x83660ea0, 0x057d7804), + TOBN(0xbad79105, 0x21ba473c), TOBN(0xb6c50bee, 0xded5389d), + TOBN(0xee2caf4d, 0xaa7c9bc0), TOBN(0xd97b8de4, 0x8c4e98a7), + TOBN(0xa9f63e70, 0xab3bbddb), TOBN(0x3898aabf, 0x2597815a), + TOBN(0x7659af89, 0xac15b3d9), TOBN(0xedf7725b, 0x703ce784), + TOBN(0x25470fab, 0xe085116b), TOBN(0x04a43375, 0x87285310), + TOBN(0x4e39187e, 0xe2bfd52f), TOBN(0x36166b44, 0x7d9ebc74), + TOBN(0x92ad433c, 0xfd4b322c), TOBN(0x726aa817, 0xba79ab51), + TOBN(0xf96eacd8, 0xc1db15eb), TOBN(0xfaf71e91, 0x0476be63), + TOBN(0xdd69a640, 0x641fad98), TOBN(0xb7995918, 0x29622559), + TOBN(0x03c6daa5, 0xde4199dc), TOBN(0x92cadc97, 0xad545eb4), + TOBN(0x1028238b, 0x256534e4), TOBN(0x73e80ce6, 0x8595409a), + TOBN(0x690d4c66, 0xd05dc59b), TOBN(0xc95f7b8f, 0x981dee80), + TOBN(0xf4337014, 0xd856ac25), TOBN(0x441bd9dd, 0xac524dca), + TOBN(0x640b3d85, 0x5f0499f5), TOBN(0x39cf84a9, 0xd5fda182), + TOBN(0x04e7b055, 0xb2aa95a0), TOBN(0x29e33f0a, 0x0ddf1860), + TOBN(0x082e74b5, 0x423f6b43), TOBN(0x217edeb9, 0x0aaa2b0f), + TOBN(0x58b83f35, 0x83cbea55), TOBN(0xc485ee4d, 0xbc185d70), + TOBN(0x833ff03b, 0x1e5f6992), TOBN(0xb5b9b9cc, 0xcf0c0dd5), + TOBN(0x7caaee8e, 0x4e9e8a50), TOBN(0x462e907b, 0x6269dafd), + TOBN(0x6ed5cee9, 0xfbe791c6), TOBN(0x68ca3259, 0xed430790), + TOBN(0x2b72bdf2, 0x13b5ba88), TOBN(0x60294c8a, 0x35ef0ac4), + TOBN(0x9c3230ed, 0x19b99b08), TOBN(0x560fff17, 0x6c2589aa), + TOBN(0x552b8487, 0xd6770374), TOBN(0xa373202d, 0x9a56f685), + TOBN(0xd3e7f907, 0x45f175d9), TOBN(0x3c2f315f, 0xd080d810), + TOBN(0x1130e9dd, 0x7b9520e8), TOBN(0xc078f9e2, 0x0af037b5), + TOBN(0x38cd2ec7, 0x1e9c104c), TOBN(0x0f684368, 0xc472fe92), + TOBN(0xd3f1b5ed, 0x6247e7ef), TOBN(0xb32d33a9, 0x396dfe21), + TOBN(0x46f59cf4, 0x4a9aa2c2), TOBN(0x69cd5168, 0xff0f7e41), + TOBN(0x3f59da0f, 0x4b3234da), TOBN(0xcf0b0235, 0xb4579ebe), + TOBN(0x6d1cbb25, 0x6d2476c7), TOBN(0x4f0837e6, 0x9dc30f08), + TOBN(0x9a4075bb, 0x906f6e98), TOBN(0x253bb434, 0xc761e7d1), + TOBN(0xde2e645f, 0x6e73af10), TOBN(0xb89a4060, 0x0c5f131c), + TOBN(0xd12840c5, 0xb8cc037f), TOBN(0x3d093a5b, 0x7405bb47), + TOBN(0x6202c253, 0x206348b8), TOBN(0xbf5d57fc, 0xc55a3ca7), + TOBN(0x89f6c90c, 0x8c3bef48), TOBN(0x23ac7623, 0x5a0a960a), + TOBN(0xdfbd3d6b, 0x552b42ab), TOBN(0x3ef22458, 0x132061f6), + TOBN(0xd74e9bda, 0xc97e6516), TOBN(0x88779360, 0xc230f49e), + TOBN(0xa6ec1de3, 0x1e74ea49), TOBN(0x581dcee5, 0x3fb645a2), + TOBN(0xbaef2391, 0x8f483f14), TOBN(0x6d2dddfc, 0xd137d13b), + TOBN(0x54cde50e, 0xd2743a42), TOBN(0x89a34fc5, 0xe4d97e67), + TOBN(0x13f1f5b3, 0x12e08ce5), TOBN(0xa80540b8, 0xa7f0b2ca), + TOBN(0x854bcf77, 0x01982805), TOBN(0xb8653ffd, 0x233bea04), + TOBN(0x8e7b8787, 0x02b0b4c9), TOBN(0x2675261f, 0x9acb170a), + TOBN(0x061a9d90, 0x930c14e5), TOBN(0xb59b30e0, 0xdef0abea), + TOBN(0x1dc19ea6, 0x0200ec7d), TOBN(0xb6f4a3f9, 0x0bce132b), + TOBN(0xb8d5de90, 0xf13e27e0), TOBN(0xbaee5ef0, 0x1fade16f), + TOBN(0x6f406aaa, 0xe4c6cf38), TOBN(0xab4cfe06, 0xd1369815), + TOBN(0x0dcffe87, 0xefd550c6), TOBN(0x9d4f59c7, 0x75ff7d39), + TOBN(0xb02553b1, 0x51deb6ad), TOBN(0x812399a4, 0xb1877749), + TOBN(0xce90f71f, 0xca6006e1), TOBN(0xc32363a6, 0xb02b6e77), + TOBN(0x02284fbe, 0xdc36c64d), TOBN(0x86c81e31, 0xa7e1ae61), + TOBN(0x2576c7e5, 0xb909d94a), TOBN(0x8b6f7d02, 0x818b2bb0), + TOBN(0xeca3ed07, 0x56faa38a), TOBN(0xa3790e6c, 0x9305bb54), + TOBN(0xd784eeda, 0x7bc73061), TOBN(0xbd56d369, 0x6dd50614), + TOBN(0xd6575949, 0x229a8aa9), TOBN(0xdcca8f47, 0x4595ec28), + TOBN(0x814305c1, 0x06ab4fe6), TOBN(0xc8c39768, 0x24f43f16), + TOBN(0xe2a45f36, 0x523f2b36), TOBN(0x995c6493, 0x920d93bb), + TOBN(0xf8afdab7, 0x90f1632b), TOBN(0x79ebbecd, 0x1c295954), + TOBN(0xc7bb3ddb, 0x79592f48), TOBN(0x67216a7b, 0x5f88e998), + TOBN(0xd91f098b, 0xbc01193e), TOBN(0xf7d928a5, 0xb1db83fc), + TOBN(0x55e38417, 0xe991f600), TOBN(0x2a91113e, 0x2981a934), + TOBN(0xcbc9d648, 0x06b13bde), TOBN(0xb011b6ac, 0x0755ff44), + TOBN(0x6f4cb518, 0x045ec613), TOBN(0x522d2d31, 0xc2f5930a), + TOBN(0x5acae1af, 0x382e65de), TOBN(0x57643067, 0x27bc966f), + TOBN(0x5e12705d, 0x1c7193f0), TOBN(0xf0f32f47, 0x3be8858e), + TOBN(0x785c3d7d, 0x96c6dfc7), TOBN(0xd75b4a20, 0xbf31795d), + TOBN(0x91acf17b, 0x342659d4), TOBN(0xe596ea34, 0x44f0378f), + TOBN(0x4515708f, 0xce52129d), TOBN(0x17387e1e, 0x79f2f585), + TOBN(0x72cfd2e9, 0x49dee168), TOBN(0x1ae05223, 0x3e2af239), + TOBN(0x009e75be, 0x1d94066a), TOBN(0x6cca31c7, 0x38abf413), + TOBN(0xb50bd61d, 0x9bc49908), TOBN(0x4a9b4a8c, 0xf5e2bc1e), + TOBN(0xeb6cc5f7, 0x946f83ac), TOBN(0x27da93fc, 0xebffab28), + TOBN(0xea314c96, 0x4821c8c5), TOBN(0x8de49ded, 0xa83c15f4), + TOBN(0x7a64cf20, 0x7af33004), TOBN(0x45f1bfeb, 0xc9627e10), + TOBN(0x878b0626, 0x54b9df60), TOBN(0x5e4fdc3c, 0xa95c0b33), + TOBN(0xe54a37ca, 0xc2035d8e), TOBN(0x9087cda9, 0x80f20b8c), + TOBN(0x36f61c23, 0x8319ade4), TOBN(0x766f287a, 0xde8cfdf8), + TOBN(0x48821948, 0x346f3705), TOBN(0x49a7b853, 0x16e4f4a2), + TOBN(0xb9b3f8a7, 0x5cedadfd), TOBN(0x8f562815, 0x8db2a815), + TOBN(0xc0b7d554, 0x01f68f95), TOBN(0x12971e27, 0x688a208e), + TOBN(0xc9f8b696, 0xd0ff34fc), TOBN(0x20824de2, 0x1222718c), + TOBN(0x7213cf9f, 0x0c95284d), TOBN(0xe2ad741b, 0xdc158240), + TOBN(0x0ee3a6df, 0x54043ccf), TOBN(0x16ff479b, 0xd84412b3), + TOBN(0xf6c74ee0, 0xdfc98af0), TOBN(0xa78a169f, 0x52fcd2fb), + TOBN(0xd8ae8746, 0x99c930e9), TOBN(0x1d33e858, 0x49e117a5), + TOBN(0x7581fcb4, 0x6624759f), TOBN(0xde50644f, 0x5bedc01d), + TOBN(0xbeec5d00, 0xcaf3155e), TOBN(0x672d66ac, 0xbc73e75f), + TOBN(0x86b9d8c6, 0x270b01db), TOBN(0xd249ef83, 0x50f55b79), + TOBN(0x6131d6d4, 0x73978fe3), TOBN(0xcc4e4542, 0x754b00a1), + TOBN(0x4e05df05, 0x57dfcfe9), TOBN(0x94b29cdd, 0x51ef6bf0), + TOBN(0xe4530cff, 0x9bc7edf2), TOBN(0x8ac236fd, 0xd3da65f3), + TOBN(0x0faf7d5f, 0xc8eb0b48), TOBN(0x4d2de14c, 0x660eb039), + TOBN(0xc006bba7, 0x60430e54), TOBN(0x10a2d0d6, 0xda3289ab), + TOBN(0x9c037a5d, 0xd7979c59), TOBN(0x04d1f3d3, 0xa116d944), + TOBN(0x9ff22473, 0x8a0983cd), TOBN(0x28e25b38, 0xc883cabb), + TOBN(0xe968dba5, 0x47a58995), TOBN(0x2c80b505, 0x774eebdf), + TOBN(0xee763b71, 0x4a953beb), TOBN(0x502e223f, 0x1642e7f6), + TOBN(0x6fe4b641, 0x61d5e722), TOBN(0x9d37c5b0, 0xdbef5316), + TOBN(0x0115ed70, 0xf8330bc7), TOBN(0x139850e6, 0x75a72789), + TOBN(0x27d7faec, 0xffceccc2), TOBN(0x3016a860, 0x4fd9f7f6), + TOBN(0xc492ec64, 0x4cd8f64c), TOBN(0x58a2d790, 0x279d7b51), + TOBN(0x0ced1fc5, 0x1fc75256), TOBN(0x3e658aed, 0x8f433017), + TOBN(0x0b61942e, 0x05da59eb), TOBN(0xba3d60a3, 0x0ddc3722), + TOBN(0x7c311cd1, 0x742e7f87), TOBN(0x6473ffee, 0xf6b01b6e),} + , + {TOBN(0x8303604f, 0x692ac542), TOBN(0xf079ffe1, 0x227b91d3), + TOBN(0x19f63e63, 0x15aaf9bd), TOBN(0xf99ee565, 0xf1f344fb), + TOBN(0x8a1d661f, 0xd6219199), TOBN(0x8c883bc6, 0xd48ce41c), + TOBN(0x1065118f, 0x3c74d904), TOBN(0x713889ee, 0x0faf8b1b), + TOBN(0x972b3f8f, 0x81a1b3be), TOBN(0x4f3ce145, 0xce2764a0), + TOBN(0xe2d0f1cc, 0x28c4f5f7), TOBN(0xdeee0c0d, 0xc7f3985b), + TOBN(0x7df4adc0, 0xd39e25c3), TOBN(0x40619820, 0xc467a080), + TOBN(0x440ebc93, 0x61cf5a58), TOBN(0x527729a6, 0x422ad600), + TOBN(0xca6c0937, 0xb1b76ba6), TOBN(0x1a2eab85, 0x4d2026dc), + TOBN(0xb1715e15, 0x19d9ae0a), TOBN(0xf1ad9199, 0xbac4a026), + TOBN(0x35b3dfb8, 0x07ea7b0e), TOBN(0xedf5496f, 0x3ed9eb89), + TOBN(0x8932e5ff, 0x2d6d08ab), TOBN(0xf314874e, 0x25bd2731), + TOBN(0xefb26a75, 0x3f73f449), TOBN(0x1d1c94f8, 0x8d44fc79), + TOBN(0x49f0fbc5, 0x3bc0dc4d), TOBN(0xb747ea0b, 0x3698a0d0), + TOBN(0x5218c3fe, 0x228d291e), TOBN(0x35b804b5, 0x43c129d6), + TOBN(0xfac859b8, 0xd1acc516), TOBN(0x6c10697d, 0x95d6e668), + TOBN(0xc38e438f, 0x0876fd4e), TOBN(0x45f0c307, 0x83d2f383), + TOBN(0x203cc2ec, 0xb10934cb), TOBN(0x6a8f2439, 0x2c9d46ee), + TOBN(0xf16b431b, 0x65ccde7b), TOBN(0x41e2cd18, 0x27e76a6f), + TOBN(0xb9c8cf8f, 0x4e3484d7), TOBN(0x64426efd, 0x8315244a), + TOBN(0x1c0a8e44, 0xfc94dea3), TOBN(0x34c8cdbf, 0xdad6a0b0), + TOBN(0x919c3840, 0x04113cef), TOBN(0xfd32fba4, 0x15490ffa), + TOBN(0x58d190f6, 0x795dcfb7), TOBN(0xfef01b03, 0x83588baf), + TOBN(0x9e6d1d63, 0xca1fc1c0), TOBN(0x53173f96, 0xf0a41ac9), + TOBN(0x2b1d402a, 0xba16f73b), TOBN(0x2fb31014, 0x8cf9b9fc), + TOBN(0x2d51e60e, 0x446ef7bf), TOBN(0xc731021b, 0xb91e1745), + TOBN(0x9d3b4724, 0x4fee99d4), TOBN(0x4bca48b6, 0xfac5c1ea), + TOBN(0x70f5f514, 0xbbea9af7), TOBN(0x751f55a5, 0x974c283a), + TOBN(0x6e30251a, 0xcb452fdb), TOBN(0x31ee6965, 0x50f30650), + TOBN(0xb0b3e508, 0x933548d9), TOBN(0xb8949a4f, 0xf4b0ef5b), + TOBN(0x208b8326, 0x3c88f3bd), TOBN(0xab147c30, 0xdb1d9989), + TOBN(0xed6515fd, 0x44d4df03), TOBN(0x17a12f75, 0xe72eb0c5), + TOBN(0x3b59796d, 0x36cf69db), TOBN(0x1219eee9, 0x56670c18), + TOBN(0xfe3341f7, 0x7a070d8e), TOBN(0x9b70130b, 0xa327f90c), + TOBN(0x36a32462, 0x0ae18e0e), TOBN(0x2021a623, 0x46c0a638), + TOBN(0x251b5817, 0xc62eb0d4), TOBN(0x87bfbcdf, 0x4c762293), + TOBN(0xf78ab505, 0xcdd61d64), TOBN(0x8c7a53fc, 0xc8c18857), + TOBN(0xa653ce6f, 0x16147515), TOBN(0x9c923aa5, 0xea7d52d5), + TOBN(0xc24709cb, 0x5c18871f), TOBN(0x7d53bec8, 0x73b3cc74), + TOBN(0x59264aff, 0xfdd1d4c4), TOBN(0x5555917e, 0x240da582), + TOBN(0xcae8bbda, 0x548f5a0e), TOBN(0x1910eaba, 0x3bbfbbe1), + TOBN(0xae579685, 0x7677afc3), TOBN(0x49ea61f1, 0x73ff0b5c), + TOBN(0x78655478, 0x4f7c3922), TOBN(0x95d337cd, 0x20c68eef), + TOBN(0x68f1e1e5, 0xdf779ab9), TOBN(0x14b491b0, 0xb5cf69a8), + TOBN(0x7a6cbbe0, 0x28e3fe89), TOBN(0xe7e1fee4, 0xc5aac0eb), + TOBN(0x7f47eda5, 0x697e5140), TOBN(0x4f450137, 0xb454921f), + TOBN(0xdb625f84, 0x95cd8185), TOBN(0x74be0ba1, 0xcdb2e583), + TOBN(0xaee4fd7c, 0xdd5e6de4), TOBN(0x4251437d, 0xe8101739), + TOBN(0x686d72a0, 0xac620366), TOBN(0x4be3fb9c, 0xb6d59344), + TOBN(0x6e8b44e7, 0xa1eb75b9), TOBN(0x84e39da3, 0x91a5c10c), + TOBN(0x37cc1490, 0xb38f0409), TOBN(0x02951943, 0x2c2ade82), + TOBN(0x9b688783, 0x1190a2d8), TOBN(0x25627d14, 0x231182ba), + TOBN(0x6eb550aa, 0x658a6d87), TOBN(0x1405aaa7, 0xcf9c7325), + TOBN(0xd147142e, 0x5c8748c9), TOBN(0x7f637e4f, 0x53ede0e0), + TOBN(0xf8ca2776, 0x14ffad2c), TOBN(0xe58fb1bd, 0xbafb6791), + TOBN(0x17158c23, 0xbf8f93fc), TOBN(0x7f15b373, 0x0a4a4655), + TOBN(0x39d4add2, 0xd842ca72), TOBN(0xa71e4391, 0x3ed96305), + TOBN(0x5bb09cbe, 0x6700be14), TOBN(0x68d69d54, 0xd8befcf6), + TOBN(0xa45f5367, 0x37183bcf), TOBN(0x7152b7bb, 0x3370dff7), + TOBN(0xcf887baa, 0xbf12525b), TOBN(0xe7ac7bdd, 0xd6d1e3cd), + TOBN(0x25914f78, 0x81fdad90), TOBN(0xcf638f56, 0x0d2cf6ab), + TOBN(0xb90bc03f, 0xcc054de5), TOBN(0x932811a7, 0x18b06350), + TOBN(0x2f00b330, 0x9bbd11ff), TOBN(0x76108a6f, 0xb4044974), + TOBN(0x801bb9e0, 0xa851d266), TOBN(0x0dd099be, 0xbf8990c1), + TOBN(0x58c5aaaa, 0xabe32986), TOBN(0x0fe9dd2a, 0x50d59c27), + TOBN(0x84951ff4, 0x8d307305), TOBN(0x6c23f829, 0x86529b78), + TOBN(0x50bb2218, 0x0b136a79), TOBN(0x7e2174de, 0x77a20996), + TOBN(0x6f00a4b9, 0xc0bb4da6), TOBN(0x89a25a17, 0xefdde8da), + TOBN(0xf728a27e, 0xc11ee01d), TOBN(0xf900553a, 0xe5f10dfb), + TOBN(0x189a83c8, 0x02ec893c), TOBN(0x3ca5bdc1, 0x23f66d77), + TOBN(0x98781537, 0x97eada9f), TOBN(0x59c50ab3, 0x10256230), + TOBN(0x346042d9, 0x323c69b3), TOBN(0x1b715a6d, 0x2c460449), + TOBN(0xa41dd476, 0x6ae06e0b), TOBN(0xcdd7888e, 0x9d42e25f), + TOBN(0x0f395f74, 0x56b25a20), TOBN(0xeadfe0ae, 0x8700e27e), + TOBN(0xb09d52a9, 0x69950093), TOBN(0x3525d9cb, 0x327f8d40), + TOBN(0xb8235a94, 0x67df886a), TOBN(0x77e4b0dd, 0x035faec2), + TOBN(0x115eb20a, 0x517d7061), TOBN(0x77fe3433, 0x6c2df683), + TOBN(0x6870ddc7, 0xcdc6fc67), TOBN(0xb1610588, 0x0b87de83), + TOBN(0x343584ca, 0xd9c4ddbe), TOBN(0xb3164f1c, 0x3d754be2), + TOBN(0x0731ed3a, 0xc1e6c894), TOBN(0x26327dec, 0x4f6b904c), + TOBN(0x9d49c6de, 0x97b5cd32), TOBN(0x40835dae, 0xb5eceecd), + TOBN(0xc66350ed, 0xd9ded7fe), TOBN(0x8aeebb5c, 0x7a678804), + TOBN(0x51d42fb7, 0x5b8ee9ec), TOBN(0xd7a17bdd, 0x8e3ca118), + TOBN(0x40d7511a, 0x2ef4400e), TOBN(0xc48990ac, 0x875a66f4), + TOBN(0x8de07d2a, 0x2199e347), TOBN(0xbee75556, 0x2a39e051), + TOBN(0x56918786, 0x916e51dc), TOBN(0xeb191313, 0x4a2d89ec), + TOBN(0x6679610d, 0x37d341ed), TOBN(0x434fbb41, 0x56d51c2b), + TOBN(0xe54b7ee7, 0xd7492dba), TOBN(0xaa33a79a, 0x59021493), + TOBN(0x49fc5054, 0xe4bd6d3d), TOBN(0x09540f04, 0x5ab551d0), + TOBN(0x8acc9085, 0x4942d3a6), TOBN(0x231af02f, 0x2d28323b), + TOBN(0x93458cac, 0x0992c163), TOBN(0x1fef8e71, 0x888e3bb4), + TOBN(0x27578da5, 0xbe8c268c), TOBN(0xcc8be792, 0xe805ec00), + TOBN(0x29267bae, 0xc61c3855), TOBN(0xebff429d, 0x58c1fd3b), + TOBN(0x22d886c0, 0x8c0b93b8), TOBN(0xca5e00b2, 0x2ddb8953), + TOBN(0xcf330117, 0xc3fed8b7), TOBN(0xd49ac6fa, 0x819c01f6), + TOBN(0x6ddaa6bd, 0x3c0fbd54), TOBN(0x91743068, 0x8049a2cf), + TOBN(0xd67f981e, 0xaff2ef81), TOBN(0xc3654d35, 0x2818ae80), + TOBN(0x81d05044, 0x1b2aa892), TOBN(0x2db067bf, 0x3d099328), + TOBN(0xe7c79e86, 0x703dcc97), TOBN(0xe66f9b37, 0xe133e215), + TOBN(0xcdf119a6, 0xe39a7a5c), TOBN(0x47c60de3, 0x876f1b61), + TOBN(0x6e405939, 0xd860f1b2), TOBN(0x3e9a1dbc, 0xf5ed4d4a), + TOBN(0x3f23619e, 0xc9b6bcbd), TOBN(0x5ee790cf, 0x734e4497), + TOBN(0xf0a834b1, 0x5bdaf9bb), TOBN(0x02cedda7, 0x4ca295f0), + TOBN(0x4619aa2b, 0xcb8e378c), TOBN(0xe5613244, 0xcc987ea4), + TOBN(0x0bc022cc, 0x76b23a50), TOBN(0x4a2793ad, 0x0a6c21ce), + TOBN(0x38328780, 0x89cac3f5), TOBN(0x29176f1b, 0xcba26d56), + TOBN(0x06296187, 0x4f6f59eb), TOBN(0x86e9bca9, 0x8bdc658e), + TOBN(0x2ca9c4d3, 0x57e30402), TOBN(0x5438b216, 0x516a09bb), + TOBN(0x0a6a063c, 0x7672765a), TOBN(0x37a3ce64, 0x0547b9bf), + TOBN(0x42c099c8, 0x98b1a633), TOBN(0xb5ab800d, 0x05ee6961), + TOBN(0xf1963f59, 0x11a5acd6), TOBN(0xbaee6157, 0x46201063), + TOBN(0x36d9a649, 0xa596210a), TOBN(0xaed04363, 0x1ba7138c), + TOBN(0xcf817d1c, 0xa4a82b76), TOBN(0x5586960e, 0xf3806be9), + TOBN(0x7ab67c89, 0x09dc6bb5), TOBN(0x52ace7a0, 0x114fe7eb), + TOBN(0xcd987618, 0xcbbc9b70), TOBN(0x4f06fd5a, 0x604ca5e1), + TOBN(0x90af14ca, 0x6dbde133), TOBN(0x1afe4322, 0x948a3264), + TOBN(0xa70d2ca6, 0xc44b2c6c), TOBN(0xab726799, 0x0ef87dfe), + TOBN(0x310f64dc, 0x2e696377), TOBN(0x49b42e68, 0x4c8126a0), + TOBN(0x0ea444c3, 0xcea0b176), TOBN(0x53a8ddf7, 0xcb269182), + TOBN(0xf3e674eb, 0xbbba9dcb), TOBN(0x0d2878a8, 0xd8669d33), + TOBN(0x04b935d5, 0xd019b6a3), TOBN(0xbb5cf88e, 0x406f1e46), + TOBN(0xa1912d16, 0x5b57c111), TOBN(0x9803fc21, 0x19ebfd78), + TOBN(0x4f231c9e, 0xc07764a9), TOBN(0xd93286ee, 0xb75bd055), + TOBN(0x83a9457d, 0x8ee6c9de), TOBN(0x04695915, 0x6087ec90), + TOBN(0x14c6dd8a, 0x58d6cd46), TOBN(0x9cb633b5, 0x8e6634d2), + TOBN(0xc1305047, 0xf81bc328), TOBN(0x12ede0e2, 0x26a177e5), + TOBN(0x332cca62, 0x065a6f4f), TOBN(0xc3a47ecd, 0x67be487b), + TOBN(0x741eb187, 0x0f47ed1c), TOBN(0x99e66e58, 0xe7598b14), + TOBN(0x6f0544ca, 0x63d0ff12), TOBN(0xe5efc784, 0xb610a05f), + TOBN(0xf72917b1, 0x7cad7b47), TOBN(0x3ff6ea20, 0xf2cac0c0), + TOBN(0xcc23791b, 0xf21db8b7), TOBN(0x7dac70b1, 0xd7d93565), + TOBN(0x682cda1d, 0x694bdaad), TOBN(0xeb88bb8c, 0x1023516d), + TOBN(0xc4c634b4, 0xdfdbeb1b), TOBN(0x22f5ca72, 0xb4ee4dea), + TOBN(0x1045a368, 0xe6524821), TOBN(0xed9e8a3f, 0x052b18b2), + TOBN(0x9b7f2cb1, 0xb961f49a), TOBN(0x7fee2ec1, 0x7b009670), + TOBN(0x350d8754, 0x22507a6d), TOBN(0x561bd711, 0x4db55f1d), + TOBN(0x4c189ccc, 0x320bbcaf), TOBN(0x568434cf, 0xdf1de48c), + TOBN(0x6af1b00e, 0x0fa8f128), TOBN(0xf0ba9d02, 0x8907583c), + TOBN(0x735a4004, 0x32ff9f60), TOBN(0x3dd8e4b6, 0xc25dcf33), + TOBN(0xf2230f16, 0x42c74cef), TOBN(0xd8117623, 0x013fa8ad), + TOBN(0x36822876, 0xf51fe76e), TOBN(0x8a6811cc, 0x11d62589), + TOBN(0xc3fc7e65, 0x46225718), TOBN(0xb7df2c9f, 0xc82fdbcd), + TOBN(0x3b1d4e52, 0xdd7b205b), TOBN(0xb6959478, 0x47a2e414), + TOBN(0x05e4d793, 0xefa91148), TOBN(0xb47ed446, 0xfd2e9675), + TOBN(0x1a7098b9, 0x04c9d9bf), TOBN(0x661e2881, 0x1b793048), + TOBN(0xb1a16966, 0xb01ee461), TOBN(0xbc521308, 0x2954746f), + TOBN(0xc909a0fc, 0x2477de50), TOBN(0xd80bb41c, 0x7dbd51ef), + TOBN(0xa85be7ec, 0x53294905), TOBN(0x6d465b18, 0x83958f97), + TOBN(0x16f6f330, 0xfb6840fd), TOBN(0xfaaeb214, 0x3401e6c8), + TOBN(0xaf83d30f, 0xccb5b4f8), TOBN(0x22885739, 0x266dec4b), + TOBN(0x51b4367c, 0x7bc467df), TOBN(0x926562e3, 0xd842d27a), + TOBN(0xdfcb6614, 0x0fea14a6), TOBN(0xeb394dae, 0xf2734cd9), + TOBN(0x3eeae5d2, 0x11c0be98), TOBN(0xb1e6ed11, 0x814e8165), + TOBN(0x191086bc, 0xe52bce1c), TOBN(0x14b74cc6, 0xa75a04da), + TOBN(0x63cf1186, 0x8c060985), TOBN(0x071047de, 0x2dbd7f7c), + TOBN(0x4e433b8b, 0xce0942ca), TOBN(0xecbac447, 0xd8fec61d), + TOBN(0x8f0ed0e2, 0xebf3232f), TOBN(0xfff80f9e, 0xc52a2edd), + TOBN(0xad9ab433, 0x75b55fdb), TOBN(0x73ca7820, 0xe42e0c11), + TOBN(0x6dace0a0, 0xe6251b46), TOBN(0x89bc6b5c, 0x4c0d932d), + TOBN(0x3438cd77, 0x095da19a), TOBN(0x2f24a939, 0x8d48bdfb), + TOBN(0x99b47e46, 0x766561b7), TOBN(0x736600e6, 0x0ed0322a), + TOBN(0x06a47cb1, 0x638e1865), TOBN(0x927c1c2d, 0xcb136000), + TOBN(0x29542337, 0x0cc5df69), TOBN(0x99b37c02, 0x09d649a9), + TOBN(0xc5f0043c, 0x6aefdb27), TOBN(0x6cdd9987, 0x1be95c27), + TOBN(0x69850931, 0x390420d2), TOBN(0x299c40ac, 0x0983efa4), + TOBN(0x3a05e778, 0xaf39aead), TOBN(0x84274408, 0x43a45193), + TOBN(0x6bcd0fb9, 0x91a711a0), TOBN(0x461592c8, 0x9f52ab17), + TOBN(0xb49302b4, 0xda3c6ed6), TOBN(0xc51fddc7, 0x330d7067), + TOBN(0x94babeb6, 0xda50d531), TOBN(0x521b840d, 0xa6a7b9da), + TOBN(0x5305151e, 0x404bdc89), TOBN(0x1bcde201, 0xd0d07449), + TOBN(0xf427a78b, 0x3b76a59a), TOBN(0xf84841ce, 0x07791a1b), + TOBN(0xebd314be, 0xbf91ed1c), TOBN(0x8e61d34c, 0xbf172943), + TOBN(0x1d5dc451, 0x5541b892), TOBN(0xb186ee41, 0xfc9d9e54), + TOBN(0x9d9f345e, 0xd5bf610d), TOBN(0x3e7ba65d, 0xf6acca9f), + TOBN(0x9dda787a, 0xa8369486), TOBN(0x09f9dab7, 0x8eb5ba53), + TOBN(0x5afb2033, 0xd6481bc3), TOBN(0x76f4ce30, 0xafa62104), + TOBN(0xa8fa00cf, 0xf4f066b5), TOBN(0x89ab5143, 0x461dafc2), + TOBN(0x44339ed7, 0xa3389998), TOBN(0x2ff862f1, 0xbc214903), + TOBN(0x2c88f985, 0xb05556e3), TOBN(0xcd96058e, 0x3467081e), + TOBN(0x7d6a4176, 0xedc637ea), TOBN(0xe1743d09, 0x36a5acdc), + TOBN(0x66fd72e2, 0x7eb37726), TOBN(0xf7fa264e, 0x1481a037), + TOBN(0x9fbd3bde, 0x45f4aa79), TOBN(0xed1e0147, 0x767c3e22), + TOBN(0x7621f979, 0x82e7abe2), TOBN(0x19eedc72, 0x45f633f8), + TOBN(0xe69b155e, 0x6137bf3a), TOBN(0xa0ad13ce, 0x414ee94e), + TOBN(0x93e3d524, 0x1c0e651a), TOBN(0xab1a6e2a, 0x02ce227e), + TOBN(0xe7af1797, 0x4ab27eca), TOBN(0x245446de, 0xbd444f39), + TOBN(0x59e22a21, 0x56c07613), TOBN(0x43deafce, 0xf4275498), + TOBN(0x10834ccb, 0x67fd0946), TOBN(0xa75841e5, 0x47406edf), + TOBN(0xebd6a677, 0x7b0ac93d), TOBN(0xa6e37b0d, 0x78f5e0d7), + TOBN(0x2516c096, 0x76f5492b), TOBN(0x1e4bf888, 0x9ac05f3a), + TOBN(0xcdb42ce0, 0x4df0ba2b), TOBN(0x935d5cfd, 0x5062341b), + TOBN(0x8a303333, 0x82acac20), TOBN(0x429438c4, 0x5198b00e), + TOBN(0x1d083bc9, 0x049d33fa), TOBN(0x58b82dda, 0x946f67ff), + TOBN(0xac3e2db8, 0x67a1d6a3), TOBN(0x62e6bead, 0x1798aac8), + TOBN(0xfc85980f, 0xde46c58c), TOBN(0xa7f69379, 0x69c8d7be), + TOBN(0x23557927, 0x837b35ec), TOBN(0x06a933d8, 0xe0790c0c), + TOBN(0x827c0e9b, 0x077ff55d), TOBN(0x53977798, 0xbb26e680), + TOBN(0x59530874, 0x1d9cb54f), TOBN(0xcca3f449, 0x4aac53ef), + TOBN(0x11dc5c87, 0xa07eda0f), TOBN(0xc138bccf, 0xfd6400c8), + TOBN(0x549680d3, 0x13e5da72), TOBN(0xc93eed82, 0x4540617e), + TOBN(0xfd3db157, 0x4d0b75c0), TOBN(0x9716eb42, 0x6386075b), + TOBN(0x0639605c, 0x817b2c16), TOBN(0x09915109, 0xf1e4f201), + TOBN(0x35c9a928, 0x5cca6c3b), TOBN(0xb25f7d1a, 0x3505c900), + TOBN(0xeb9f7d20, 0x630480c4), TOBN(0xc3c7b8c6, 0x2a1a501c), + TOBN(0x3f99183c, 0x5a1f8e24), TOBN(0xfdb118fa, 0x9dd255f0), + TOBN(0xb9b18b90, 0xc27f62a6), TOBN(0xe8f732f7, 0x396ec191), + TOBN(0x524a2d91, 0x0be786ab), TOBN(0x5d32adef, 0x0ac5a0f5), + TOBN(0x9b53d4d6, 0x9725f694), TOBN(0x032a76c6, 0x0510ba89), + TOBN(0x840391a3, 0xebeb1544), TOBN(0x44b7b88c, 0x3ed73ac3), + TOBN(0xd24bae7a, 0x256cb8b3), TOBN(0x7ceb151a, 0xe394cb12), + TOBN(0xbd6b66d0, 0x5bc1e6a8), TOBN(0xec70cecb, 0x090f07bf), + TOBN(0x270644ed, 0x7d937589), TOBN(0xee9e1a3d, 0x5f1dccfe), + TOBN(0xb0d40a84, 0x745b98d2), TOBN(0xda429a21, 0x2556ed40), + TOBN(0xf676eced, 0x85148cb9), TOBN(0x5a22d40c, 0xded18936), + TOBN(0x3bc4b9e5, 0x70e8a4ce), TOBN(0xbfd1445b, 0x9eae0379), + TOBN(0xf23f2c0c, 0x1a0bd47e), TOBN(0xa9c0bb31, 0xe1845531), + TOBN(0x9ddc4d60, 0x0a4c3f6b), TOBN(0xbdfaad79, 0x2c15ef44), + TOBN(0xce55a236, 0x7f484acc), TOBN(0x08653ca7, 0x055b1f15), + TOBN(0x2efa8724, 0x538873a3), TOBN(0x09299e5d, 0xace1c7e7), + TOBN(0x07afab66, 0xade332ba), TOBN(0x9be1fdf6, 0x92dd71b7), + TOBN(0xa49b5d59, 0x5758b11c), TOBN(0x0b852893, 0xc8654f40), + TOBN(0xb63ef6f4, 0x52379447), TOBN(0xd4957d29, 0x105e690c), + TOBN(0x7d484363, 0x646559b0), TOBN(0xf4a8273c, 0x49788a8e), + TOBN(0xee406cb8, 0x34ce54a9), TOBN(0x1e1c260f, 0xf86fda9b), + TOBN(0xe150e228, 0xcf6a4a81), TOBN(0x1fa3b6a3, 0x1b488772), + TOBN(0x1e6ff110, 0xc5a9c15b), TOBN(0xc6133b91, 0x8ad6aa47), + TOBN(0x8ac5d55c, 0x9dffa978), TOBN(0xba1d1c1d, 0x5f3965f2), + TOBN(0xf969f4e0, 0x7732b52f), TOBN(0xfceecdb5, 0xa5172a07), + TOBN(0xb0120a5f, 0x10f2b8f5), TOBN(0xc83a6cdf, 0x5c4c2f63), + TOBN(0x4d47a491, 0xf8f9c213), TOBN(0xd9e1cce5, 0xd3f1bbd5), + TOBN(0x0d91bc7c, 0xaba7e372), TOBN(0xfcdc74c8, 0xdfd1a2db), + TOBN(0x05efa800, 0x374618e5), TOBN(0x11216969, 0x15a7925e), + TOBN(0xd4c89823, 0xf6021c5d), TOBN(0x880d5e84, 0xeff14423), + TOBN(0x6523bc5a, 0x6dcd1396), TOBN(0xd1acfdfc, 0x113c978b), + TOBN(0xb0c164e8, 0xbbb66840), TOBN(0xf7f4301e, 0x72b58459), + TOBN(0xc29ad4a6, 0xa638e8ec), TOBN(0xf5ab8961, 0x46b78699), + TOBN(0x9dbd7974, 0x0e954750), TOBN(0x0121de88, 0x64f9d2c6), + TOBN(0x2e597b42, 0xd985232e), TOBN(0x55b6c3c5, 0x53451777), + TOBN(0xbb53e547, 0x519cb9fb), TOBN(0xf134019f, 0x8428600d), + TOBN(0x5a473176, 0xe081791a), TOBN(0x2f3e2263, 0x35fb0c08), + TOBN(0xb28c3017, 0x73d273b0), TOBN(0xccd21076, 0x7721ef9a), + TOBN(0x054cc292, 0xb650dc39), TOBN(0x662246de, 0x6188045e), + TOBN(0x904b52fa, 0x6b83c0d1), TOBN(0xa72df267, 0x97e9cd46), + TOBN(0x886b43cd, 0x899725e4), TOBN(0x2b651688, 0xd849ff22), + TOBN(0x60479b79, 0x02f34533), TOBN(0x5e354c14, 0x0c77c148), + TOBN(0xb4bb7581, 0xa8537c78), TOBN(0x188043d7, 0xefe1495f), + TOBN(0x9ba12f42, 0x8c1d5026), TOBN(0x2e0c8a26, 0x93d4aaab), + TOBN(0xbdba7b8b, 0xaa57c450), TOBN(0x140c9ad6, 0x9bbdafef), + TOBN(0x2067aa42, 0x25ac0f18), TOBN(0xf7b1295b, 0x04d1fbf3), + TOBN(0x14829111, 0xa4b04824), TOBN(0x2ce3f192, 0x33bd5e91), + TOBN(0x9c7a1d55, 0x8f2e1b72), TOBN(0xfe932286, 0x302aa243), + TOBN(0x497ca7b4, 0xd4be9554), TOBN(0xb8e821b8, 0xe0547a6e), + TOBN(0xfb2838be, 0x67e573e0), TOBN(0x05891db9, 0x4084c44b), + TOBN(0x91311373, 0x96c1c2c5), TOBN(0x6aebfa3f, 0xd958444b), + TOBN(0xac9cdce9, 0xe56e55c1), TOBN(0x7148ced3, 0x2caa46d0), + TOBN(0x2e10c7ef, 0xb61fe8eb), TOBN(0x9fd835da, 0xff97cf4d),} + , + {TOBN(0xa36da109, 0x081e9387), TOBN(0xfb9780d7, 0x8c935828), + TOBN(0xd5940332, 0xe540b015), TOBN(0xc9d7b51b, 0xe0f466fa), + TOBN(0xfaadcd41, 0xd6d9f671), TOBN(0xba6c1e28, 0xb1a2ac17), + TOBN(0x066a7833, 0xed201e5f), TOBN(0x19d99719, 0xf90f462b), + TOBN(0xf431f462, 0x060b5f61), TOBN(0xa56f46b4, 0x7bd057c2), + TOBN(0x348dca6c, 0x47e1bf65), TOBN(0x9a38783e, 0x41bcf1ff), + TOBN(0x7a5d33a9, 0xda710718), TOBN(0x5a779987, 0x2e0aeaf6), + TOBN(0xca87314d, 0x2d29d187), TOBN(0xfa0edc3e, 0xc687d733), + TOBN(0x9df33621, 0x6a31e09b), TOBN(0xde89e44d, 0xc1350e35), + TOBN(0x29214871, 0x4ca0cf52), TOBN(0xdf379672, 0x0b88a538), + TOBN(0xc92a510a, 0x2591d61b), TOBN(0x79aa87d7, 0x585b447b), + TOBN(0xf67db604, 0xe5287f77), TOBN(0x1697c8bf, 0x5efe7a80), + TOBN(0x1c894849, 0xcb198ac7), TOBN(0xa884a93d, 0x0f264665), + TOBN(0x2da964ef, 0x9b200678), TOBN(0x3c351b87, 0x009834e6), + TOBN(0xafb2ef9f, 0xe2c4b44b), TOBN(0x580f6c47, 0x3326790c), + TOBN(0xb8480521, 0x0b02264a), TOBN(0x8ba6f9e2, 0x42a194e2), + TOBN(0xfc87975f, 0x8fb54738), TOBN(0x35160788, 0x27c3ead3), + TOBN(0x834116d2, 0xb74a085a), TOBN(0x53c99a73, 0xa62fe996), + TOBN(0x87585be0, 0x5b81c51b), TOBN(0x925bafa8, 0xbe0852b7), + TOBN(0x76a4fafd, 0xa84d19a7), TOBN(0x39a45982, 0x585206d4), + TOBN(0x499b6ab6, 0x5eb03c0e), TOBN(0xf19b7954, 0x72bc3fde), + TOBN(0xa86b5b9c, 0x6e3a80d2), TOBN(0xe4377508, 0x6d42819f), + TOBN(0xc1663650, 0xbb3ee8a3), TOBN(0x75eb14fc, 0xb132075f), + TOBN(0xa8ccc906, 0x7ad834f6), TOBN(0xea6a2474, 0xe6e92ffd), + TOBN(0x9d72fd95, 0x0f8d6758), TOBN(0xcb84e101, 0x408c07dd), + TOBN(0xb9114bfd, 0xa5e23221), TOBN(0x358b5fe2, 0xe94e742c), + TOBN(0x1c0577ec, 0x95f40e75), TOBN(0xf0155451, 0x3d73f3d6), + TOBN(0x9d55cd67, 0xbd1b9b66), TOBN(0x63e86e78, 0xaf8d63c7), + TOBN(0x39d934ab, 0xd3c095f1), TOBN(0x04b261be, 0xe4b76d71), + TOBN(0x1d2e6970, 0xe73e6984), TOBN(0x879fb23b, 0x5e5fcb11), + TOBN(0x11506c72, 0xdfd75490), TOBN(0x3a97d085, 0x61bcf1c1), + TOBN(0x43201d82, 0xbf5e7007), TOBN(0x7f0ac52f, 0x798232a7), + TOBN(0x2715cbc4, 0x6eb564d4), TOBN(0x8d6c752c, 0x9e570e29), + TOBN(0xf80247c8, 0x9ef5fd5d), TOBN(0xc3c66b46, 0xd53eb514), + TOBN(0x9666b401, 0x0f87de56), TOBN(0xce62c06f, 0xc6c603b5), + TOBN(0xae7b4c60, 0x7e4fc942), TOBN(0x38ac0b77, 0x663a9c19), + TOBN(0xcb4d20ee, 0x4b049136), TOBN(0x8b63bf12, 0x356a4613), + TOBN(0x1221aef6, 0x70e08128), TOBN(0xe62d8c51, 0x4acb6b16), + TOBN(0x71f64a67, 0x379e7896), TOBN(0xb25237a2, 0xcafd7fa5), + TOBN(0xf077bd98, 0x3841ba6a), TOBN(0xc4ac0244, 0x3cd16e7e), + TOBN(0x548ba869, 0x21fea4ca), TOBN(0xd36d0817, 0xf3dfdac1), + TOBN(0x09d8d71f, 0xf4685faf), TOBN(0x8eff66be, 0xc52c459a), + TOBN(0x182faee7, 0x0b57235e), TOBN(0xee3c39b1, 0x0106712b), + TOBN(0x5107331f, 0xc0fcdcb0), TOBN(0x669fb9dc, 0xa51054ba), + TOBN(0xb25101fb, 0x319d7682), TOBN(0xb0293129, 0x0a982fee), + TOBN(0x51c1c9b9, 0x0261b344), TOBN(0x0e008c5b, 0xbfd371fa), + TOBN(0xd866dd1c, 0x0278ca33), TOBN(0x666f76a6, 0xe5aa53b1), + TOBN(0xe5cfb779, 0x6013a2cf), TOBN(0x1d3a1aad, 0xa3521836), + TOBN(0xcedd2531, 0x73faa485), TOBN(0xc8ee6c4f, 0xc0a76878), + TOBN(0xddbccfc9, 0x2a11667d), TOBN(0x1a418ea9, 0x1c2f695a), + TOBN(0xdb11bd92, 0x51f73971), TOBN(0x3e4b3c82, 0xda2ed89f), + TOBN(0x9a44f3f4, 0xe73e0319), TOBN(0xd1e3de0f, 0x303431af), + TOBN(0x3c5604ff, 0x50f75f9c), TOBN(0x1d8eddf3, 0x7e752b22), + TOBN(0x0ef074dd, 0x3c9a1118), TOBN(0xd0ffc172, 0xccb86d7b), + TOBN(0xabd1ece3, 0x037d90f2), TOBN(0xe3f307d6, 0x6055856c), + TOBN(0x422f9328, 0x7e4c6daf), TOBN(0x902aac66, 0x334879a0), + TOBN(0xb6a1e7bf, 0x94cdfade), TOBN(0x6c97e1ed, 0x7fc6d634), + TOBN(0x662ad24d, 0xa2fb63f8), TOBN(0xf81be1b9, 0xa5928405), + TOBN(0x86d765e4, 0xd14b4206), TOBN(0xbecc2e0e, 0x8fa0db65), + TOBN(0xa28838e0, 0xb17fc76c), TOBN(0xe49a602a, 0xe37cf24e), + TOBN(0x76b4131a, 0x567193ec), TOBN(0xaf3c305a, 0xe5f6e70b), + TOBN(0x9587bd39, 0x031eebdd), TOBN(0x5709def8, 0x71bbe831), + TOBN(0x57059983, 0x0eb2b669), TOBN(0x4d80ce1b, 0x875b7029), + TOBN(0x838a7da8, 0x0364ac16), TOBN(0x2f431d23, 0xbe1c83ab), + TOBN(0xe56812a6, 0xf9294dd3), TOBN(0xb448d01f, 0x9b4b0d77), + TOBN(0xf3ae6061, 0x04e8305c), TOBN(0x2bead645, 0x94d8c63e), + TOBN(0x0a85434d, 0x84fd8b07), TOBN(0x537b983f, 0xf7a9dee5), + TOBN(0xedcc5f18, 0xef55bd85), TOBN(0x2041af62, 0x21c6cf8b), + TOBN(0x8e52874c, 0xb940c71e), TOBN(0x211935a9, 0xdb5f4b3a), + TOBN(0x94350492, 0x301b1dc3), TOBN(0x33d2646d, 0x29958620), + TOBN(0x16b0d64b, 0xef911404), TOBN(0x9d1f25ea, 0x9a3c5ef4), + TOBN(0x20f200eb, 0x4a352c78), TOBN(0x43929f2c, 0x4bd0b428), + TOBN(0xa5656667, 0xc7196e29), TOBN(0x7992c2f0, 0x9391be48), + TOBN(0xaaa97cbd, 0x9ee0cd6e), TOBN(0x51b0310c, 0x3dc8c9bf), + TOBN(0x237f8acf, 0xdd9f22cb), TOBN(0xbb1d81a1, 0xb585d584), + TOBN(0x8d5d85f5, 0x8c416388), TOBN(0x0d6e5a5a, 0x42fe474f), + TOBN(0xe7812766, 0x38235d4e), TOBN(0x1c62bd67, 0x496e3298), + TOBN(0x8378660c, 0x3f175bc8), TOBN(0x4d04e189, 0x17afdd4d), + TOBN(0x32a81601, 0x85a8068c), TOBN(0xdb58e4e1, 0x92b29a85), + TOBN(0xe8a65b86, 0xc70d8a3b), TOBN(0x5f0e6f4e, 0x98a0403b), + TOBN(0x08129684, 0x69ed2370), TOBN(0x34dc30bd, 0x0871ee26), + TOBN(0x3a5ce948, 0x7c9c5b05), TOBN(0x7d487b80, 0x43a90c87), + TOBN(0x4089ba37, 0xdd0e7179), TOBN(0x45f80191, 0xb4041811), + TOBN(0x1c3e1058, 0x98747ba5), TOBN(0x98c4e13a, 0x6e1ae592), + TOBN(0xd44636e6, 0xe82c9f9e), TOBN(0x711db87c, 0xc33a1043), + TOBN(0x6f431263, 0xaa8aec05), TOBN(0x43ff120d, 0x2744a4aa), + TOBN(0xd3bd892f, 0xae77779b), TOBN(0xf0fe0cc9, 0x8cdc9f82), + TOBN(0xca5f7fe6, 0xf1c5b1bc), TOBN(0xcc63a682, 0x44929a72), + TOBN(0xc7eaba0c, 0x09dbe19a), TOBN(0x2f3585ad, 0x6b5c73c2), + TOBN(0x8ab8924b, 0x0ae50c30), TOBN(0x17fcd27a, 0x638b30ba), + TOBN(0xaf414d34, 0x10b3d5a5), TOBN(0x09c107d2, 0x2a9accf1), + TOBN(0x15dac49f, 0x946a6242), TOBN(0xaec3df2a, 0xd707d642), + TOBN(0x2c2492b7, 0x3f894ae0), TOBN(0xf59df3e5, 0xb75f18ce), + TOBN(0x7cb740d2, 0x8f53cad0), TOBN(0x3eb585fb, 0xc4f01294), + TOBN(0x17da0c86, 0x32c7f717), TOBN(0xeb8c795b, 0xaf943f4c), + TOBN(0x4ee23fb5, 0xf67c51d2), TOBN(0xef187575, 0x68889949), + TOBN(0xa6b4bdb2, 0x0389168b), TOBN(0xc4ecd258, 0xea577d03), + TOBN(0x3a63782b, 0x55743082), TOBN(0x6f678f4c, 0xc72f08cd), + TOBN(0x553511cf, 0x65e58dd8), TOBN(0xd53b4e3e, 0xd402c0cd), + TOBN(0x37de3e29, 0xa037c14c), TOBN(0x86b6c516, 0xc05712aa), + TOBN(0x2834da3e, 0xb38dff6f), TOBN(0xbe012c52, 0xea636be8), + TOBN(0x292d238c, 0x61dd37f8), TOBN(0x0e54523f, 0x8f8142db), + TOBN(0xe31eb436, 0x036a05d8), TOBN(0x83e3cdff, 0x1e93c0ff), + TOBN(0x3fd2fe0f, 0x50821ddf), TOBN(0xc8e19b0d, 0xff9eb33b), + TOBN(0xc8cc943f, 0xb569a5fe), TOBN(0xad0090d4, 0xd4342d75), + TOBN(0x82090b4b, 0xcaeca000), TOBN(0xca39687f, 0x1bd410eb), + TOBN(0xe7bb0df7, 0x65959d77), TOBN(0x39d78218, 0x9c964999), + TOBN(0xd87f62e8, 0xb2415451), TOBN(0xe5efb774, 0xbed76108), + TOBN(0x3ea011a4, 0xe822f0d0), TOBN(0xbc647ad1, 0x5a8704f8), + TOBN(0xbb315b35, 0x50c6820f), TOBN(0x863dec3d, 0xb7e76bec), + TOBN(0x01ff5d3a, 0xf017bfc7), TOBN(0x20054439, 0x976b8229), + TOBN(0x067fca37, 0x0bbd0d3b), TOBN(0xf63dde64, 0x7f5e3d0f), + TOBN(0x22dbefb3, 0x2a4c94e9), TOBN(0xafbff0fe, 0x96f8278a), + TOBN(0x80aea0b1, 0x3503793d), TOBN(0xb2238029, 0x5f06cd29), + TOBN(0x65703e57, 0x8ec3feca), TOBN(0x06c38314, 0x393e7053), + TOBN(0xa0b751eb, 0x7c6734c4), TOBN(0xd2e8a435, 0xc59f0f1e), + TOBN(0x147d9052, 0x5e9ca895), TOBN(0x2f4dd31e, 0x972072df), + TOBN(0xa16fda8e, 0xe6c6755c), TOBN(0xc66826ff, 0xcf196558), + TOBN(0x1f1a76a3, 0x0cf43895), TOBN(0xa9d604e0, 0x83c3097b), + TOBN(0xe1908309, 0x66390e0e), TOBN(0xa50bf753, 0xb3c85eff), + TOBN(0x0696bdde, 0xf6a70251), TOBN(0x548b801b, 0x3c6ab16a), + TOBN(0x37fcf704, 0xa4d08762), TOBN(0x090b3def, 0xdff76c4e), + TOBN(0x87e8cb89, 0x69cb9158), TOBN(0x44a90744, 0x995ece43), + TOBN(0xf85395f4, 0x0ad9fbf5), TOBN(0x49b0f6c5, 0x4fb0c82d), + TOBN(0x75d9bc15, 0xadf7cccf), TOBN(0x81a3e5d6, 0xdfa1e1b0), + TOBN(0x8c39e444, 0x249bc17e), TOBN(0xf37dccb2, 0x8ea7fd43), + TOBN(0xda654873, 0x907fba12), TOBN(0x35daa6da, 0x4a372904), + TOBN(0x0564cfc6, 0x6283a6c5), TOBN(0xd09fa4f6, 0x4a9395bf), + TOBN(0x688e9ec9, 0xaeb19a36), TOBN(0xd913f1ce, 0xc7bfbfb4), + TOBN(0x797b9a3c, 0x61c2faa6), TOBN(0x2f979bec, 0x6a0a9c12), + TOBN(0xb5969d0f, 0x359679ec), TOBN(0xebcf523d, 0x079b0460), + TOBN(0xfd6b0008, 0x10fab870), TOBN(0x3f2edcda, 0x9373a39c), + TOBN(0x0d64f9a7, 0x6f568431), TOBN(0xf848c27c, 0x02f8898c), + TOBN(0xf418ade1, 0x260b5bd5), TOBN(0xc1f3e323, 0x6973dee8), + TOBN(0x46e9319c, 0x26c185dd), TOBN(0x6d85b7d8, 0x546f0ac4), + TOBN(0x427965f2, 0x247f9d57), TOBN(0xb519b636, 0xb0035f48), + TOBN(0x6b6163a9, 0xab87d59c), TOBN(0xff9f58c3, 0x39caaa11), + TOBN(0x4ac39cde, 0x3177387b), TOBN(0x5f6557c2, 0x873e77f9), + TOBN(0x67504006, 0x36a83041), TOBN(0x9b1c96ca, 0x75ef196c), + TOBN(0xf34283de, 0xb08c7940), TOBN(0x7ea09644, 0x1128c316), + TOBN(0xb510b3b5, 0x6aa39dff), TOBN(0x59b43da2, 0x9f8e4d8c), + TOBN(0xa8ce31fd, 0x9e4c4b9f), TOBN(0x0e20be26, 0xc1303c01), + TOBN(0x18187182, 0xe8ee47c9), TOBN(0xd9687cdb, 0x7db98101), + TOBN(0x7a520e4d, 0xa1e14ff6), TOBN(0x429808ba, 0x8836d572), + TOBN(0xa37ca60d, 0x4944b663), TOBN(0xf901f7a9, 0xa3f91ae5), + TOBN(0xe4e3e76e, 0x9e36e3b1), TOBN(0x9aa219cf, 0x29d93250), + TOBN(0x347fe275, 0x056a2512), TOBN(0xa4d643d9, 0xde65d95c), + TOBN(0x9669d396, 0x699fc3ed), TOBN(0xb598dee2, 0xcf8c6bbe), + TOBN(0x682ac1e5, 0xdda9e5c6), TOBN(0x4e0d3c72, 0xcaa9fc95), + TOBN(0x17faaade, 0x772bea44), TOBN(0x5ef8428c, 0xab0009c8), + TOBN(0xcc4ce47a, 0x460ff016), TOBN(0xda6d12bf, 0x725281cb), + TOBN(0x44c67848, 0x0223aad2), TOBN(0x6e342afa, 0x36256e28), + TOBN(0x1400bb0b, 0x93a37c04), TOBN(0x62b1bc9b, 0xdd10bd96), + TOBN(0x7251adeb, 0x0dac46b7), TOBN(0x7d33b92e, 0x7be4ef51), + TOBN(0x28b2a94b, 0xe61fa29a), TOBN(0x4b2be13f, 0x06422233), + TOBN(0x36d6d062, 0x330d8d37), TOBN(0x5ef80e1e, 0xb28ca005), + TOBN(0x174d4699, 0x6d16768e), TOBN(0x9fc4ff6a, 0x628bf217), + TOBN(0x77705a94, 0x154e490d), TOBN(0x9d96dd28, 0x8d2d997a), + TOBN(0x77e2d9d8, 0xce5d72c4), TOBN(0x9d06c5a4, 0xc11c714f), + TOBN(0x02aa5136, 0x79e4a03e), TOBN(0x1386b3c2, 0x030ff28b), + TOBN(0xfe82e8a6, 0xfb283f61), TOBN(0x7df203e5, 0xf3abc3fb), + TOBN(0xeec7c351, 0x3a4d3622), TOBN(0xf7d17dbf, 0xdf762761), + TOBN(0xc3956e44, 0x522055f0), TOBN(0xde3012db, 0x8fa748db), + TOBN(0xca9fcb63, 0xbf1dcc14), TOBN(0xa56d9dcf, 0xbe4e2f3a), + TOBN(0xb86186b6, 0x8bcec9c2), TOBN(0x7cf24df9, 0x680b9f06), + TOBN(0xc46b45ea, 0xc0d29281), TOBN(0xfff42bc5, 0x07b10e12), + TOBN(0x12263c40, 0x4d289427), TOBN(0x3d5f1899, 0xb4848ec4), + TOBN(0x11f97010, 0xd040800c), TOBN(0xb4c5f529, 0x300feb20), + TOBN(0xcc543f8f, 0xde94fdcb), TOBN(0xe96af739, 0xc7c2f05e), + TOBN(0xaa5e0036, 0x882692e1), TOBN(0x09c75b68, 0x950d4ae9), + TOBN(0x62f63df2, 0xb5932a7a), TOBN(0x2658252e, 0xde0979ad), + TOBN(0x2a19343f, 0xb5e69631), TOBN(0x718c7501, 0x525b666b), + TOBN(0x26a42d69, 0xea40dc3a), TOBN(0xdc84ad22, 0xaecc018f), + TOBN(0x25c36c7b, 0x3270f04a), TOBN(0x46ba6d47, 0x50fa72ed), + TOBN(0x6c37d1c5, 0x93e58a8e), TOBN(0xa2394731, 0x120c088c), + TOBN(0xc3be4263, 0xcb6e86da), TOBN(0x2c417d36, 0x7126d038), + TOBN(0x5b70f9c5, 0x8b6f8efa), TOBN(0x671a2faa, 0x37718536), + TOBN(0xd3ced3c6, 0xb539c92b), TOBN(0xe56f1bd9, 0xa31203c2), + TOBN(0x8b096ec4, 0x9ff3c8eb), TOBN(0x2deae432, 0x43491cea), + TOBN(0x2465c6eb, 0x17943794), TOBN(0x5d267e66, 0x20586843), + TOBN(0x9d3d116d, 0xb07159d0), TOBN(0xae07a67f, 0xc1896210), + TOBN(0x8fc84d87, 0xbb961579), TOBN(0x30009e49, 0x1c1f8dd6), + TOBN(0x8a8caf22, 0xe3132819), TOBN(0xcffa197c, 0xf23ab4ff), + TOBN(0x58103a44, 0x205dd687), TOBN(0x57b796c3, 0x0ded67a2), + TOBN(0x0b9c3a6c, 0xa1779ad7), TOBN(0xa33cfe2e, 0x357c09c5), + TOBN(0x2ea29315, 0x3db4a57e), TOBN(0x91959695, 0x8ebeb52e), + TOBN(0x118db9a6, 0xe546c879), TOBN(0x8e996df4, 0x6295c8d6), + TOBN(0xdd990484, 0x55ec806b), TOBN(0x24f291ca, 0x165c1035), + TOBN(0xcca523bb, 0x440e2229), TOBN(0x324673a2, 0x73ef4d04), + TOBN(0xaf3adf34, 0x3e11ec39), TOBN(0x6136d7f1, 0xdc5968d3), + TOBN(0x7a7b2899, 0xb053a927), TOBN(0x3eaa2661, 0xae067ecd), + TOBN(0x8549b9c8, 0x02779cd9), TOBN(0x061d7940, 0xc53385ea), + TOBN(0x3e0ba883, 0xf06d18bd), TOBN(0x4ba6de53, 0xb2700843), + TOBN(0xb966b668, 0x591a9e4d), TOBN(0x93f67567, 0x7f4fa0ed), + TOBN(0x5a02711b, 0x4347237b), TOBN(0xbc041e2f, 0xe794608e), + TOBN(0x55af10f5, 0x70f73d8c), TOBN(0xd2d4d4f7, 0xbb7564f7), + TOBN(0xd7d27a89, 0xb3e93ce7), TOBN(0xf7b5a875, 0x5d3a2c1b), + TOBN(0xb29e68a0, 0x255b218a), TOBN(0xb533837e, 0x8af76754), + TOBN(0xd1b05a73, 0x579fab2e), TOBN(0xb41055a1, 0xecd74385), + TOBN(0xb2369274, 0x445e9115), TOBN(0x2972a7c4, 0xf520274e), + TOBN(0x6c08334e, 0xf678e68a), TOBN(0x4e4160f0, 0x99b057ed), + TOBN(0x3cfe11b8, 0x52ccb69a), TOBN(0x2fd1823a, 0x21c8f772), + TOBN(0xdf7f072f, 0x3298f055), TOBN(0x8c0566f9, 0xfec74a6e), + TOBN(0xe549e019, 0x5bb4d041), TOBN(0x7c3930ba, 0x9208d850), + TOBN(0xe07141fc, 0xaaa2902b), TOBN(0x539ad799, 0xe4f69ad3), + TOBN(0xa6453f94, 0x813f9ffd), TOBN(0xc58d3c48, 0x375bc2f7), + TOBN(0xb3326fad, 0x5dc64e96), TOBN(0x3aafcaa9, 0xb240e354), + TOBN(0x1d1b0903, 0xaca1e7a9), TOBN(0x4ceb9767, 0x1211b8a0), + TOBN(0xeca83e49, 0xe32a858e), TOBN(0x4c32892e, 0xae907bad), + TOBN(0xd5b42ab6, 0x2eb9b494), TOBN(0x7fde3ee2, 0x1eabae1b), + TOBN(0x13b5ab09, 0xcaf54957), TOBN(0xbfb028be, 0xe5f5d5d5), + TOBN(0x928a0650, 0x2003e2c0), TOBN(0x90793aac, 0x67476843), + TOBN(0x5e942e79, 0xc81710a0), TOBN(0x557e4a36, 0x27ccadd4), + TOBN(0x72a2bc56, 0x4bcf6d0c), TOBN(0x09ee5f43, 0x26d7b80c), + TOBN(0x6b70dbe9, 0xd4292f19), TOBN(0x56f74c26, 0x63f16b18), + TOBN(0xc23db0f7, 0x35fbb42a), TOBN(0xb606bdf6, 0x6ae10040), + TOBN(0x1eb15d4d, 0x044573ac), TOBN(0x7dc3cf86, 0x556b0ba4), + TOBN(0x97af9a33, 0xc60df6f7), TOBN(0x0b1ef85c, 0xa716ce8c), + TOBN(0x2922f884, 0xc96958be), TOBN(0x7c32fa94, 0x35690963), + TOBN(0x2d7f667c, 0xeaa00061), TOBN(0xeaaf7c17, 0x3547365c), + TOBN(0x1eb4de46, 0x87032d58), TOBN(0xc54f3d83, 0x5e2c79e0), + TOBN(0x07818df4, 0x5d04ef23), TOBN(0x55faa9c8, 0x673d41b4), + TOBN(0xced64f6f, 0x89b95355), TOBN(0x4860d2ea, 0xb7415c84), + TOBN(0x5fdb9bd2, 0x050ebad3), TOBN(0xdb53e0cc, 0x6685a5bf), + TOBN(0xb830c031, 0x9feb6593), TOBN(0xdd87f310, 0x6accff17), + TOBN(0x2303ebab, 0x9f555c10), TOBN(0x94603695, 0x287e7065), + TOBN(0xf88311c3, 0x2e83358c), TOBN(0x508dd9b4, 0xeefb0178), + TOBN(0x7ca23706, 0x2dba8652), TOBN(0x62aac5a3, 0x0047abe5), + TOBN(0x9a61d2a0, 0x8b1ea7b3), TOBN(0xd495ab63, 0xae8b1485), + TOBN(0x38740f84, 0x87052f99), TOBN(0x178ebe5b, 0xb2974eea), + TOBN(0x030bbcca, 0x5b36d17f), TOBN(0xb5e4cce3, 0xaaf86eea), + TOBN(0xb51a0220, 0x68f8e9e0), TOBN(0xa4348796, 0x09eb3e75), + TOBN(0xbe592309, 0xeef1a752), TOBN(0x5d7162d7, 0x6f2aa1ed), + TOBN(0xaebfb5ed, 0x0f007dd2), TOBN(0x255e14b2, 0xc89edd22), + TOBN(0xba85e072, 0x0303b697), TOBN(0xc5d17e25, 0xf05720ff), + TOBN(0x02b58d6e, 0x5128ebb6), TOBN(0x2c80242d, 0xd754e113), + TOBN(0x919fca5f, 0xabfae1ca), TOBN(0x937afaac, 0x1a21459b), + TOBN(0x9e0ca91c, 0x1f66a4d2), TOBN(0x194cc7f3, 0x23ec1331), + TOBN(0xad25143a, 0x8aa11690), TOBN(0xbe40ad8d, 0x09b59e08), + TOBN(0x37d60d9b, 0xe750860a), TOBN(0x6c53b008, 0xc6bf434c), + TOBN(0xb572415d, 0x1356eb80), TOBN(0xb8bf9da3, 0x9578ded8), + TOBN(0x22658e36, 0x5e8fb38b), TOBN(0x9b70ce22, 0x5af8cb22), + TOBN(0x7c00018a, 0x829a8180), TOBN(0x84329f93, 0xb81ed295), + TOBN(0x7c343ea2, 0x5f3cea83), TOBN(0x38f8655f, 0x67586536), + TOBN(0xa661a0d0, 0x1d3ec517), TOBN(0x98744652, 0x512321ae), + TOBN(0x084ca591, 0xeca92598), TOBN(0xa9bb9dc9, 0x1dcb3feb), + TOBN(0x14c54355, 0x78b4c240), TOBN(0x5ed62a3b, 0x610cafdc), + TOBN(0x07512f37, 0x1b38846b), TOBN(0x571bb70a, 0xb0e38161), + TOBN(0xb556b95b, 0x2da705d2), TOBN(0x3ef8ada6, 0xb1a08f98), + TOBN(0x85302ca7, 0xddecfbe5), TOBN(0x0e530573, 0x943105cd), + TOBN(0x60554d55, 0x21a9255d), TOBN(0x63a32fa1, 0xf2f3802a), + TOBN(0x35c8c5b0, 0xcd477875), TOBN(0x97f458ea, 0x6ad42da1), + TOBN(0x832d7080, 0xeb6b242d), TOBN(0xd30bd023, 0x3b71e246), + TOBN(0x7027991b, 0xbe31139d), TOBN(0x68797e91, 0x462e4e53), + TOBN(0x423fe20a, 0x6b4e185a), TOBN(0x82f2c67e, 0x42d9b707), + TOBN(0x25c81768, 0x4cf7811b), TOBN(0xbd53005e, 0x045bb95d),} + , + {TOBN(0xe5f649be, 0x9d8e68fd), TOBN(0xdb0f0533, 0x1b044320), + TOBN(0xf6fde9b3, 0xe0c33398), TOBN(0x92f4209b, 0x66c8cfae), + TOBN(0xe9d1afcc, 0x1a739d4b), TOBN(0x09aea75f, 0xa28ab8de), + TOBN(0x14375fb5, 0xeac6f1d0), TOBN(0x6420b560, 0x708f7aa5), + TOBN(0x9eae499c, 0x6254dc41), TOBN(0x7e293924, 0x7a837e7e), + TOBN(0x74aec08c, 0x090524a7), TOBN(0xf82b9219, 0x8d6f55f2), + TOBN(0x493c962e, 0x1402cec5), TOBN(0x9f17ca17, 0xfa2f30e7), + TOBN(0xbcd783e8, 0xe9b879cb), TOBN(0xea3d8c14, 0x5a6f145f), + TOBN(0xdede15e7, 0x5e0dee6e), TOBN(0x74f24872, 0xdc628aa2), + TOBN(0xd3e9c4fe, 0x7861bb93), TOBN(0x56d4822a, 0x6187b2e0), + TOBN(0xb66417cf, 0xc59826f9), TOBN(0xca260969, 0x2408169e), + TOBN(0xedf69d06, 0xc79ef885), TOBN(0x00031f8a, 0xdc7d138f), + TOBN(0x103c46e6, 0x0ebcf726), TOBN(0x4482b831, 0x6231470e), + TOBN(0x6f6dfaca, 0x487c2109), TOBN(0x2e0ace97, 0x62e666ef), + TOBN(0x3246a9d3, 0x1f8d1f42), TOBN(0x1b1e83f1, 0x574944d2), + TOBN(0x13dfa63a, 0xa57f334b), TOBN(0x0cf8daed, 0x9f025d81), + TOBN(0x30d78ea8, 0x00ee11c1), TOBN(0xeb053cd4, 0xb5e3dd75), + TOBN(0x9b65b13e, 0xd58c43c5), TOBN(0xc3ad49bd, 0xbd151663), + TOBN(0x99fd8e41, 0xb6427990), TOBN(0x12cf15bd, 0x707eae1e), + TOBN(0x29ad4f1b, 0x1aabb71e), TOBN(0x5143e74d, 0x07545d0e), + TOBN(0x30266336, 0xc88bdee1), TOBN(0x25f29306, 0x5876767c), + TOBN(0x9c078571, 0xc6731996), TOBN(0xc88690b2, 0xed552951), + TOBN(0x274f2c2d, 0x852705b4), TOBN(0xb0bf8d44, 0x4e09552d), + TOBN(0x7628beeb, 0x986575d1), TOBN(0x407be238, 0x7f864651), + TOBN(0x0e5e3049, 0xa639fc6b), TOBN(0xe75c35d9, 0x86003625), + TOBN(0x0cf35bd8, 0x5dcc1646), TOBN(0x8bcaced2, 0x6c26273a), + TOBN(0xe22ecf1d, 0xb5536742), TOBN(0x013dd897, 0x1a9e068b), + TOBN(0x17f411cb, 0x8a7909c5), TOBN(0x5757ac98, 0x861dd506), + TOBN(0x85de1f0d, 0x1e935abb), TOBN(0xdefd10b4, 0x154de37a), + TOBN(0xb8d9e392, 0x369cebb5), TOBN(0x54d5ef9b, 0x761324be), + TOBN(0x4d6341ba, 0x74f17e26), TOBN(0xc0a0e3c8, 0x78c1dde4), + TOBN(0xa6d77581, 0x87d918fd), TOBN(0x66876015, 0x02ca3a13), + TOBN(0xc7313e9c, 0xf36658f0), TOBN(0xc433ef1c, 0x71f8057e), + TOBN(0x85326246, 0x1b6a835a), TOBN(0xc8f05398, 0x7c86394c), + TOBN(0xff398cdf, 0xe983c4a1), TOBN(0xbf5e8162, 0x03b7b931), + TOBN(0x93193c46, 0xb7b9045b), TOBN(0x1e4ebf5d, 0xa4a6e46b), + TOBN(0xf9942a60, 0x43a24fe7), TOBN(0x29c1191e, 0xffb3492b), + TOBN(0x9f662449, 0x902fde05), TOBN(0xc792a7ac, 0x6713c32d), + TOBN(0x2fd88ad8, 0xb737982c), TOBN(0x7e3a0319, 0xa21e60e3), + TOBN(0x09b0de44, 0x7383591a), TOBN(0x6df141ee, 0x8310a456), + TOBN(0xaec1a039, 0xe6d6f471), TOBN(0x14b2ba0f, 0x1198d12e), + TOBN(0xebc1a160, 0x3aeee5ac), TOBN(0x401f4836, 0xe0b964ce), + TOBN(0x2ee43796, 0x4fd03f66), TOBN(0x3fdb4e49, 0xdd8f3f12), + TOBN(0x6ef267f6, 0x29380f18), TOBN(0x3e8e9670, 0x8da64d16), + TOBN(0xbc19180c, 0x207674f1), TOBN(0x112e09a7, 0x33ae8fdb), + TOBN(0x99667554, 0x6aaeb71e), TOBN(0x79432af1, 0xe101b1c7), + TOBN(0xd5eb558f, 0xde2ddec6), TOBN(0x81392d1f, 0x5357753f), + TOBN(0xa7a76b97, 0x3ae1158a), TOBN(0x416fbbff, 0x4a899991), + TOBN(0x9e65fdfd, 0x0d4a9dcf), TOBN(0x7bc29e48, 0x944ddf12), + TOBN(0xbc1a92d9, 0x3c856866), TOBN(0x273c6905, 0x6e98dfe2), + TOBN(0x69fce418, 0xcdfaa6b8), TOBN(0x606bd823, 0x5061c69f), + TOBN(0x42d495a0, 0x6af75e27), TOBN(0x8ed3d505, 0x6d873a1f), + TOBN(0xaf552841, 0x6ab25b6a), TOBN(0xc6c0ffc7, 0x2b1a4523), + TOBN(0xab18827b, 0x21c99e03), TOBN(0x060e8648, 0x9034691b), + TOBN(0x5207f90f, 0x93c7f398), TOBN(0x9f4a96cb, 0x82f8d10b), + TOBN(0xdd71cd79, 0x3ad0f9e3), TOBN(0x84f435d2, 0xfc3a54f5), + TOBN(0x4b03c55b, 0x8e33787f), TOBN(0xef42f975, 0xa6384673), + TOBN(0xff7304f7, 0x5051b9f0), TOBN(0x18aca1dc, 0x741c87c2), + TOBN(0x56f120a7, 0x2d4bfe80), TOBN(0xfd823b3d, 0x053e732c), + TOBN(0x11bccfe4, 0x7537ca16), TOBN(0xdf6c9c74, 0x1b5a996b), + TOBN(0xee7332c7, 0x904fc3fa), TOBN(0x14a23f45, 0xc7e3636a), + TOBN(0xc38659c3, 0xf091d9aa), TOBN(0x4a995e5d, 0xb12d8540), + TOBN(0x20a53bec, 0xf3a5598a), TOBN(0x56534b17, 0xb1eaa995), + TOBN(0x9ed3dca4, 0xbf04e03c), TOBN(0x716c563a, 0xd8d56268), + TOBN(0x27ba77a4, 0x1d6178e7), TOBN(0xe4c80c40, 0x68a1ff8e), + TOBN(0x75011099, 0x0a13f63d), TOBN(0x7bf33521, 0xa61d46f3), + TOBN(0x0aff218e, 0x10b365bb), TOBN(0x81021804, 0x0fd7ea75), + TOBN(0x05a3fd8a, 0xa4b3a925), TOBN(0xb829e75f, 0x9b3db4e6), + TOBN(0x6bdc75a5, 0x4d53e5fb), TOBN(0x04a5dc02, 0xd52717e3), + TOBN(0x86af502f, 0xe9a42ec2), TOBN(0x8867e8fb, 0x2630e382), + TOBN(0xbf845c6e, 0xbec9889b), TOBN(0x54f491f2, 0xcb47c98d), + TOBN(0xa3091fba, 0x790c2a12), TOBN(0xd7f6fd78, 0xc20f708b), + TOBN(0xa569ac30, 0xacde5e17), TOBN(0xd0f996d0, 0x6852b4d7), + TOBN(0xe51d4bb5, 0x4609ae54), TOBN(0x3fa37d17, 0x0daed061), + TOBN(0x62a88684, 0x34b8fb41), TOBN(0x99a2acbd, 0x9efb64f1), + TOBN(0xb75c1a5e, 0x6448e1f2), TOBN(0xfa99951a, 0x42b5a069), + TOBN(0x6d956e89, 0x2f3b26e7), TOBN(0xf4709860, 0xda875247), + TOBN(0x3ad15179, 0x2482dda3), TOBN(0xd64110e3, 0x017d82f0), + TOBN(0x14928d2c, 0xfad414e4), TOBN(0x2b155f58, 0x2ed02b24), + TOBN(0x481a141b, 0xcb821bf1), TOBN(0x12e3c770, 0x4f81f5da), + TOBN(0xe49c5de5, 0x9fff8381), TOBN(0x11053232, 0x5bbec894), + TOBN(0xa0d051cc, 0x454d88c4), TOBN(0x4f6db89c, 0x1f8e531b), + TOBN(0x34fe3fd6, 0xca563a44), TOBN(0x7f5c2215, 0x58da8ab9), + TOBN(0x8445016d, 0x9474f0a1), TOBN(0x17d34d61, 0xcb7d8a0a), + TOBN(0x8e9d3910, 0x1c474019), TOBN(0xcaff2629, 0xd52ceefb), + TOBN(0xf9cf3e32, 0xc1622c2b), TOBN(0xd4b95e3c, 0xe9071a05), + TOBN(0xfbbca61f, 0x1594438c), TOBN(0x1eb6e6a6, 0x04aadedf), + TOBN(0x853027f4, 0x68e14940), TOBN(0x221d322a, 0xdfabda9c), + TOBN(0xed8ea9f6, 0xb7cb179a), TOBN(0xdc7b764d, 0xb7934dcc), + TOBN(0xfcb13940, 0x5e09180d), TOBN(0x6629a6bf, 0xb47dc2dd), + TOBN(0xbfc55e4e, 0x9f5a915e), TOBN(0xb1db9d37, 0x6204441e), + TOBN(0xf82d68cf, 0x930c5f53), TOBN(0x17d3a142, 0xcbb605b1), + TOBN(0xdd5944ea, 0x308780f2), TOBN(0xdc8de761, 0x3845f5e4), + TOBN(0x6beaba7d, 0x7624d7a3), TOBN(0x1e709afd, 0x304df11e), + TOBN(0x95364376, 0x02170456), TOBN(0xbf204b3a, 0xc8f94b64), + TOBN(0x4e53af7c, 0x5680ca68), TOBN(0x0526074a, 0xe0c67574), + TOBN(0x95d8cef8, 0xecd92af6), TOBN(0xe6b9fa7a, 0x6cd1745a), + TOBN(0x3d546d3d, 0xa325c3e4), TOBN(0x1f57691d, 0x9ae93aae), + TOBN(0xe891f3fe, 0x9d2e1a33), TOBN(0xd430093f, 0xac063d35), + TOBN(0xeda59b12, 0x5513a327), TOBN(0xdc2134f3, 0x5536f18f), + TOBN(0xaa51fe2c, 0x5c210286), TOBN(0x3f68aaee, 0x1cab658c), + TOBN(0x5a23a00b, 0xf9357292), TOBN(0x9a626f39, 0x7efdabed), + TOBN(0xfe2b3bf3, 0x199d78e3), TOBN(0xb7a2af77, 0x71bbc345), + TOBN(0x3d19827a, 0x1e59802c), TOBN(0x823bbc15, 0xb487a51c), + TOBN(0x856139f2, 0x99d0a422), TOBN(0x9ac3df65, 0xf456c6fb), + TOBN(0xaddf65c6, 0x701f8bd6), TOBN(0x149f321e, 0x3758df87), + TOBN(0xb1ecf714, 0x721b7eba), TOBN(0xe17df098, 0x31a3312a), + TOBN(0xdb2fd6ec, 0xd5c4d581), TOBN(0xfd02996f, 0x8fcea1b3), + TOBN(0xe29fa63e, 0x7882f14f), TOBN(0xc9f6dc35, 0x07c6cadc), + TOBN(0x46f22d6f, 0xb882bed0), TOBN(0x1a45755b, 0xd118e52c), + TOBN(0x9f2c7c27, 0x7c4608cf), TOBN(0x7ccbdf32, 0x568012c2), + TOBN(0xfcb0aedd, 0x61729b0e), TOBN(0x7ca2ca9e, 0xf7d75dbf), + TOBN(0xf58fecb1, 0x6f640f62), TOBN(0xe274b92b, 0x39f51946), + TOBN(0x7f4dfc04, 0x6288af44), TOBN(0x0a91f32a, 0xeac329e5), + TOBN(0x43ad274b, 0xd6aaba31), TOBN(0x719a1640, 0x0f6884f9), + TOBN(0x685d29f6, 0xdaf91e20), TOBN(0x5ec1cc33, 0x27e49d52), + TOBN(0x38f4de96, 0x3b54a059), TOBN(0x0e0015e5, 0xefbcfdb3), + TOBN(0x177d23d9, 0x4dbb8da6), TOBN(0x98724aa2, 0x97a617ad), + TOBN(0x30f0885b, 0xfdb6558e), TOBN(0xf9f7a28a, 0xc7899a96), + TOBN(0xd2ae8ac8, 0x872dc112), TOBN(0xfa0642ca, 0x73c3c459), + TOBN(0x15296981, 0xe7dfc8d6), TOBN(0x67cd4450, 0x1fb5b94a), + TOBN(0x0ec71cf1, 0x0eddfd37), TOBN(0xc7e5eeb3, 0x9a8eddc7), + TOBN(0x02ac8e3d, 0x81d95028), TOBN(0x0088f172, 0x70b0e35d), + TOBN(0xec041fab, 0xe1881fe3), TOBN(0x62cf71b8, 0xd99e7faa), + TOBN(0x5043dea7, 0xe0f222c2), TOBN(0x309d42ac, 0x72e65142), + TOBN(0x94fe9ddd, 0x9216cd30), TOBN(0xd6539c7d, 0x0f87feec), + TOBN(0x03c5a57c, 0x432ac7d7), TOBN(0x72692cf0, 0x327fda10), + TOBN(0xec28c85f, 0x280698de), TOBN(0x2331fb46, 0x7ec283b1), + TOBN(0xd34bfa32, 0x2867e633), TOBN(0x78709a82, 0x0a9cc815), + TOBN(0xb7fe6964, 0x875e2fa5), TOBN(0x25cc064f, 0x9e98bfb5), + TOBN(0x9eb0151c, 0x493a65c5), TOBN(0x5fb5d941, 0x53182464), + TOBN(0x69e6f130, 0xf04618e2), TOBN(0xa8ecec22, 0xf89c8ab6), + TOBN(0xcd6ac88b, 0xb96209bd), TOBN(0x65fa8cdb, 0xb3e1c9e0), + TOBN(0xa47d22f5, 0x4a8d8eac), TOBN(0x83895cdf, 0x8d33f963), + TOBN(0xa8adca59, 0xb56cd3d1), TOBN(0x10c8350b, 0xdaf38232), + TOBN(0x2b161fb3, 0xa5080a9f), TOBN(0xbe7f5c64, 0x3af65b3a), + TOBN(0x2c754039, 0x97403a11), TOBN(0x94626cf7, 0x121b96af), + TOBN(0x431de7c4, 0x6a983ec2), TOBN(0x3780dd3a, 0x52cc3df7), + TOBN(0xe28a0e46, 0x2baf8e3b), TOBN(0xabe68aad, 0x51d299ae), + TOBN(0x603eb8f9, 0x647a2408), TOBN(0x14c61ed6, 0x5c750981), + TOBN(0x88b34414, 0xc53352e7), TOBN(0x5a34889c, 0x1337d46e), + TOBN(0x612c1560, 0xf95f2bc8), TOBN(0x8a3f8441, 0xd4807a3a), + TOBN(0x680d9e97, 0x5224da68), TOBN(0x60cd6e88, 0xc3eb00e9), + TOBN(0x3875a98e, 0x9a6bc375), TOBN(0xdc80f924, 0x4fd554c2), + TOBN(0x6c4b3415, 0x6ac77407), TOBN(0xa1e5ea8f, 0x25420681), + TOBN(0x541bfa14, 0x4607a458), TOBN(0x5dbc7e7a, 0x96d7fbf9), + TOBN(0x646a851b, 0x31590a47), TOBN(0x039e85ba, 0x15ee6df8), + TOBN(0xd19fa231, 0xd7b43fc0), TOBN(0x84bc8be8, 0x299a0e04), + TOBN(0x2b9d2936, 0xf20df03a), TOBN(0x24054382, 0x8608d472), + TOBN(0x76b6ba04, 0x9149202a), TOBN(0xb21c3831, 0x3670e7b7), + TOBN(0xddd93059, 0xd6fdee10), TOBN(0x9da47ad3, 0x78488e71), + TOBN(0x99cc1dfd, 0xa0fcfb25), TOBN(0x42abde10, 0x64696954), + TOBN(0x14cc15fc, 0x17eab9fe), TOBN(0xd6e863e4, 0xd3e70972), + TOBN(0x29a7765c, 0x6432112c), TOBN(0x88660001, 0x5b0774d8), + TOBN(0x3729175a, 0x2c088eae), TOBN(0x13afbcae, 0x8230b8d4), + TOBN(0x44768151, 0x915f4379), TOBN(0xf086431a, 0xd8d22812), + TOBN(0x37461955, 0xc298b974), TOBN(0x905fb5f0, 0xf8711e04), + TOBN(0x787abf3a, 0xfe969d18), TOBN(0x392167c2, 0x6f6a494e), + TOBN(0xfc7a0d2d, 0x28c511da), TOBN(0xf127c7dc, 0xb66a262d), + TOBN(0xf9c4bb95, 0xfd63fdf0), TOBN(0x90016589, 0x3913ef46), + TOBN(0x74d2a73c, 0x11aa600d), TOBN(0x2f5379bd, 0x9fb5ab52), + TOBN(0xe49e53a4, 0x7fb70068), TOBN(0x68dd39e5, 0x404aa9a7), + TOBN(0xb9b0cf57, 0x2ecaa9c3), TOBN(0xba0e103b, 0xe824826b), + TOBN(0x60c2198b, 0x4631a3c4), TOBN(0xc5ff84ab, 0xfa8966a2), + TOBN(0x2d6ebe22, 0xac95aff8), TOBN(0x1c9bb6db, 0xb5a46d09), + TOBN(0x419062da, 0x53ee4f8d), TOBN(0x7b9042d0, 0xbb97efef), + TOBN(0x0f87f080, 0x830cf6bd), TOBN(0x4861d19a, 0x6ec8a6c6), + TOBN(0xd3a0daa1, 0x202f01aa), TOBN(0xb0111674, 0xf25afbd5), + TOBN(0x6d00d6cf, 0x1afb20d9), TOBN(0x13695000, 0x40671bc5), + TOBN(0x913ab0dc, 0x2485ea9b), TOBN(0x1f2bed06, 0x9eef61ac), + TOBN(0x850c8217, 0x6d799e20), TOBN(0x93415f37, 0x3271c2de), + TOBN(0x5afb06e9, 0x6c4f5910), TOBN(0x688a52df, 0xc4e9e421), + TOBN(0x30495ba3, 0xe2a9a6db), TOBN(0x4601303d, 0x58f9268b), + TOBN(0xbe3b0dad, 0x7eb0f04f), TOBN(0x4ea47250, 0x4456936d), + TOBN(0x8caf8798, 0xd33fd3e7), TOBN(0x1ccd8a89, 0xeb433708), + TOBN(0x9effe3e8, 0x87fd50ad), TOBN(0xbe240a56, 0x6b29c4df), + TOBN(0xec4ffd98, 0xca0e7ebd), TOBN(0xf586783a, 0xe748616e), + TOBN(0xa5b00d8f, 0xc77baa99), TOBN(0x0acada29, 0xb4f34c9c), + TOBN(0x36dad67d, 0x0fe723ac), TOBN(0x1d8e53a5, 0x39c36c1e), + TOBN(0xe4dd342d, 0x1f4bea41), TOBN(0x64fd5e35, 0xebc9e4e0), + TOBN(0x96f01f90, 0x57908805), TOBN(0xb5b9ea3d, 0x5ed480dd), + TOBN(0x366c5dc2, 0x3efd2dd0), TOBN(0xed2fe305, 0x6e9dfa27), + TOBN(0x4575e892, 0x6e9197e2), TOBN(0x11719c09, 0xab502a5d), + TOBN(0x264c7bec, 0xe81f213f), TOBN(0x741b9241, 0x55f5c457), + TOBN(0x78ac7b68, 0x49a5f4f4), TOBN(0xf91d70a2, 0x9fc45b7d), + TOBN(0x39b05544, 0xb0f5f355), TOBN(0x11f06bce, 0xeef930d9), + TOBN(0xdb84d25d, 0x038d05e1), TOBN(0x04838ee5, 0xbacc1d51), + TOBN(0x9da3ce86, 0x9e8ee00b), TOBN(0xc3412057, 0xc36eda1f), + TOBN(0xae80b913, 0x64d9c2f4), TOBN(0x7468bac3, 0xa010a8ff), + TOBN(0xdfd20037, 0x37359d41), TOBN(0x1a0f5ab8, 0x15efeacc), + TOBN(0x7c25ad2f, 0x659d0ce0), TOBN(0x4011bcbb, 0x6785cff1), + TOBN(0x128b9912, 0x7e2192c7), TOBN(0xa549d8e1, 0x13ccb0e8), + TOBN(0x805588d8, 0xc85438b1), TOBN(0x5680332d, 0xbc25cb27), + TOBN(0xdcd1bc96, 0x1a4bfdf4), TOBN(0x779ff428, 0x706f6566), + TOBN(0x8bbee998, 0xf059987a), TOBN(0xf6ce8cf2, 0xcc686de7), + TOBN(0xf8ad3c4a, 0x953cfdb2), TOBN(0xd1d426d9, 0x2205da36), + TOBN(0xb3c0f13f, 0xc781a241), TOBN(0x3e89360e, 0xd75362a8), + TOBN(0xccd05863, 0xc8a91184), TOBN(0x9bd0c9b7, 0xefa8a7f4), + TOBN(0x97ee4d53, 0x8a912a4b), TOBN(0xde5e15f8, 0xbcf518fd), + TOBN(0x6a055bf8, 0xc467e1e0), TOBN(0x10be4b4b, 0x1587e256), + TOBN(0xd90c14f2, 0x668621c9), TOBN(0xd5518f51, 0xab9c92c1), + TOBN(0x8e6a0100, 0xd6d47b3c), TOBN(0xcbe980dd, 0x66716175), + TOBN(0x500d3f10, 0xddd83683), TOBN(0x3b6cb35d, 0x99cac73c), + TOBN(0x53730c8b, 0x6083d550), TOBN(0xcf159767, 0xdf0a1987), + TOBN(0x84bfcf53, 0x43ad73b3), TOBN(0x1b528c20, 0x4f035a94), + TOBN(0x4294edf7, 0x33eeac69), TOBN(0xb6283e83, 0x817f3240), + TOBN(0xc3fdc959, 0x0a5f25b1), TOBN(0xefaf8aa5, 0x5844ee22), + TOBN(0xde269ba5, 0xdbdde4de), TOBN(0xe3347160, 0xc56133bf), + TOBN(0xc1184219, 0x8d9ea9f8), TOBN(0x090de5db, 0xf3fc1ab5), + TOBN(0x404c37b1, 0x0bf22cda), TOBN(0x7de20ec8, 0xf5618894), + TOBN(0x754c588e, 0xecdaecab), TOBN(0x6ca4b0ed, 0x88342743), + TOBN(0x76f08bdd, 0xf4a938ec), TOBN(0xd182de89, 0x91493ccb), + TOBN(0xd652c53e, 0xc8a4186a), TOBN(0xb3e878db, 0x946d8e33), + TOBN(0x088453c0, 0x5f37663c), TOBN(0x5cd9daaa, 0xb407748b), + TOBN(0xa1f5197f, 0x586d5e72), TOBN(0x47500be8, 0xc443ca59), + TOBN(0x78ef35b2, 0xe2652424), TOBN(0x09c5d26f, 0x6dd7767d), + TOBN(0x7175a79a, 0xa74d3f7b), TOBN(0x0428fd8d, 0xcf5ea459), + TOBN(0x511cb97c, 0xa5d1746d), TOBN(0x36363939, 0xe71d1278), + TOBN(0xcf2df955, 0x10350bf4), TOBN(0xb3817439, 0x60aae782), + TOBN(0xa748c0e4, 0x3e688809), TOBN(0x98021fbf, 0xd7a5a006), + TOBN(0x9076a70c, 0x0e367a98), TOBN(0xbea1bc15, 0x0f62b7c2), + TOBN(0x2645a68c, 0x30fe0343), TOBN(0xacaffa78, 0x699dc14f), + TOBN(0xf4469964, 0x457bf9c4), TOBN(0x0db6407b, 0x0d2ead83), + TOBN(0x68d56cad, 0xb2c6f3eb), TOBN(0x3b512e73, 0xf376356c), + TOBN(0xe43b0e1f, 0xfce10408), TOBN(0x89ddc003, 0x5a5e257d), + TOBN(0xb0ae0d12, 0x0362e5b3), TOBN(0x07f983c7, 0xb0519161), + TOBN(0xc2e94d15, 0x5d5231e7), TOBN(0xcff22aed, 0x0b4f9513), + TOBN(0xb02588dd, 0x6ad0b0b5), TOBN(0xb967d1ac, 0x11d0dcd5), + TOBN(0x8dac6bc6, 0xcf777b6c), TOBN(0x0062bdbd, 0x4c6d1959), + TOBN(0x53da71b5, 0x0ef5cc85), TOBN(0x07012c7d, 0x4006f14f), + TOBN(0x4617f962, 0xac47800d), TOBN(0x53365f2b, 0xc102ed75), + TOBN(0xb422efcb, 0x4ab8c9d3), TOBN(0x195cb26b, 0x34af31c9), + TOBN(0x3a926e29, 0x05f2c4ce), TOBN(0xbd2bdecb, 0x9856966c), + TOBN(0x5d16ab3a, 0x85527015), TOBN(0x9f81609e, 0x4486c231), + TOBN(0xd8b96b2c, 0xda350002), TOBN(0xbd054690, 0xfa1b7d36), + TOBN(0xdc90ebf5, 0xe71d79bc), TOBN(0xf241b6f9, 0x08964e4e), + TOBN(0x7c838643, 0x2fe3cd4c), TOBN(0xe0f33acb, 0xb4bc633c), + TOBN(0xb4a9ecec, 0x3d139f1f), TOBN(0x05ce69cd, 0xdc4a1f49), + TOBN(0xa19d1b16, 0xf5f98aaf), TOBN(0x45bb71d6, 0x6f23e0ef), + TOBN(0x33789fcd, 0x46cdfdd3), TOBN(0x9b8e2978, 0xcee040ca), + TOBN(0x9c69b246, 0xae0a6828), TOBN(0xba533d24, 0x7078d5aa), + TOBN(0x7a2e42c0, 0x7bb4fbdb), TOBN(0xcfb4879a, 0x7035385c), + TOBN(0x8c3dd30b, 0x3281705b), TOBN(0x7e361c6c, 0x404fe081), + TOBN(0x7b21649c, 0x3f604edf), TOBN(0x5dbf6a3f, 0xe52ffe47), + TOBN(0xc41b7c23, 0x4b54d9bf), TOBN(0x1374e681, 0x3511c3d9), + TOBN(0x1863bf16, 0xc1b2b758), TOBN(0x90e78507, 0x1e9e6a96), + TOBN(0xab4bf98d, 0x5d86f174), TOBN(0xd74e0bd3, 0x85e96fe4), + TOBN(0x8afde39f, 0xcac5d344), TOBN(0x90946dbc, 0xbd91b847), + TOBN(0xf5b42358, 0xfe1a838c), TOBN(0x05aae6c5, 0x620ac9d8), + TOBN(0x8e193bd8, 0xa1ce5a0b), TOBN(0x8f710571, 0x4dabfd72), + TOBN(0x8d8fdd48, 0x182caaac), TOBN(0x8c4aeefa, 0x040745cf), + TOBN(0x73c6c30a, 0xf3b93e6d), TOBN(0x991241f3, 0x16f42011), + TOBN(0xa0158eea, 0xe457a477), TOBN(0xd19857db, 0xee6ddc05), + TOBN(0xb3265224, 0x18c41671), TOBN(0x3ffdfc7e, 0x3c2c0d58), + TOBN(0x3a3a5254, 0x26ee7cda), TOBN(0x341b0869, 0xdf02c3a8), + TOBN(0xa023bf42, 0x723bbfc8), TOBN(0x3d15002a, 0x14452691),} + , + {TOBN(0x5ef7324c, 0x85edfa30), TOBN(0x25976554, 0x87d4f3da), + TOBN(0x352f5bc0, 0xdcb50c86), TOBN(0x8f6927b0, 0x4832a96c), + TOBN(0xd08ee1ba, 0x55f2f94c), TOBN(0x6a996f99, 0x344b45fa), + TOBN(0xe133cb8d, 0xa8aa455d), TOBN(0x5d0721ec, 0x758dc1f7), + TOBN(0x6ba7a920, 0x79e5fb67), TOBN(0xe1331feb, 0x70aa725e), + TOBN(0x5080ccf5, 0x7df5d837), TOBN(0xe4cae01d, 0x7ff72e21), + TOBN(0xd9243ee6, 0x0412a77d), TOBN(0x06ff7cac, 0xdf449025), + TOBN(0xbe75f7cd, 0x23ef5a31), TOBN(0xbc957822, 0x0ddef7a8), + TOBN(0x8cf7230c, 0xb0ce1c55), TOBN(0x5b534d05, 0x0bbfb607), + TOBN(0xee1ef113, 0x0e16363b), TOBN(0x27e0aa7a, 0xb4999e82), + TOBN(0xce1dac2d, 0x79362c41), TOBN(0x67920c90, 0x91bb6cb0), + TOBN(0x1e648d63, 0x2223df24), TOBN(0x0f7d9eef, 0xe32e8f28), + TOBN(0x6943f39a, 0xfa833834), TOBN(0x22951722, 0xa6328562), + TOBN(0x81d63dd5, 0x4170fc10), TOBN(0x9f5fa58f, 0xaecc2e6d), + TOBN(0xb66c8725, 0xe77d9a3b), TOBN(0x11235cea, 0x6384ebe0), + TOBN(0x06a8c118, 0x5845e24a), TOBN(0x0137b286, 0xebd093b1), + TOBN(0xc589e1ce, 0x44ace150), TOBN(0xe0f8d3d9, 0x4381e97c), + TOBN(0x59e99b11, 0x62c5a4b8), TOBN(0x90d262f7, 0xfd0ec9f9), + TOBN(0xfbc854c9, 0x283e13c9), TOBN(0x2d04fde7, 0xaedc7085), + TOBN(0x057d7765, 0x47dcbecb), TOBN(0x8dbdf591, 0x9a76fa5f), + TOBN(0xd0150695, 0x0de1e578), TOBN(0x2e1463e7, 0xe9f72bc6), + TOBN(0xffa68441, 0x1b39eca5), TOBN(0x673c8530, 0x7c037f2f), + TOBN(0xd0d6a600, 0x747f91da), TOBN(0xb08d43e1, 0xc9cb78e9), + TOBN(0x0fc0c644, 0x27b5cef5), TOBN(0x5c1d160a, 0xa60a2fd6), + TOBN(0xf98cae53, 0x28c8e13b), TOBN(0x375f10c4, 0xb2eddcd1), + TOBN(0xd4eb8b7f, 0x5cce06ad), TOBN(0xb4669f45, 0x80a2e1ef), + TOBN(0xd593f9d0, 0x5bbd8699), TOBN(0x5528a4c9, 0xe7976d13), + TOBN(0x3923e095, 0x1c7e28d3), TOBN(0xb9293790, 0x3f6bb577), + TOBN(0xdb567d6a, 0xc42bd6d2), TOBN(0x6df86468, 0xbb1f96ae), + TOBN(0x0efe5b1a, 0x4843b28e), TOBN(0x961bbb05, 0x6379b240), + TOBN(0xb6caf5f0, 0x70a6a26b), TOBN(0x70686c0d, 0x328e6e39), + TOBN(0x80da06cf, 0x895fc8d3), TOBN(0x804d8810, 0xb363fdc9), + TOBN(0xbe22877b, 0x207f1670), TOBN(0x9b0dd188, 0x4e615291), + TOBN(0x625ae8dc, 0x97a3c2bf), TOBN(0x08584ef7, 0x439b86e8), + TOBN(0xde7190a5, 0xdcd898ff), TOBN(0x26286c40, 0x2058ee3d), + TOBN(0x3db0b217, 0x5f87b1c1), TOBN(0xcc334771, 0x102a6db5), + TOBN(0xd99de954, 0x2f770fb1), TOBN(0x97c1c620, 0x4cd7535e), + TOBN(0xd3b6c448, 0x3f09cefc), TOBN(0xd725af15, 0x5a63b4f8), + TOBN(0x0c95d24f, 0xc01e20ec), TOBN(0xdfd37494, 0x9ae7121f), + TOBN(0x7d6ddb72, 0xec77b7ec), TOBN(0xfe079d3b, 0x0353a4ae), + TOBN(0x3066e70a, 0x2e6ac8d2), TOBN(0x9c6b5a43, 0x106e5c05), + TOBN(0x52d3c6f5, 0xede59b8c), TOBN(0x30d6a5c3, 0xfccec9ae), + TOBN(0xedec7c22, 0x4fc0a9ef), TOBN(0x190ff083, 0x95c16ced), + TOBN(0xbe12ec8f, 0x94de0fde), TOBN(0x0d131ab8, 0x852d3433), + TOBN(0x42ace07e, 0x85701291), TOBN(0x94793ed9, 0x194061a8), + TOBN(0x30e83ed6, 0xd7f4a485), TOBN(0x9eec7269, 0xf9eeff4d), + TOBN(0x90acba59, 0x0c9d8005), TOBN(0x5feca458, 0x1e79b9d1), + TOBN(0x8fbe5427, 0x1d506a1e), TOBN(0xa32b2c8e, 0x2439cfa7), + TOBN(0x1671c173, 0x73dd0b4e), TOBN(0x37a28214, 0x44a054c6), + TOBN(0x81760a1b, 0x4e8b53f1), TOBN(0xa6c04224, 0xf9f93b9e), + TOBN(0x18784b34, 0xcf671e3c), TOBN(0x81bbecd2, 0xcda9b994), + TOBN(0x38831979, 0xb2ab3848), TOBN(0xef54feb7, 0xf2e03c2d), + TOBN(0xcf197ca7, 0xfb8088fa), TOBN(0x01427247, 0x4ddc96c5), + TOBN(0xa2d2550a, 0x30777176), TOBN(0x53469898, 0x4d0cf71d), + TOBN(0x6ce937b8, 0x3a2aaac6), TOBN(0xe9f91dc3, 0x5af38d9b), + TOBN(0x2598ad83, 0xc8bf2899), TOBN(0x8e706ac9, 0xb5536c16), + TOBN(0x40dc7495, 0xf688dc98), TOBN(0x26490cd7, 0x124c4afc), + TOBN(0xe651ec84, 0x1f18775c), TOBN(0x393ea6c3, 0xb4fdaf4a), + TOBN(0x1e1f3343, 0x7f338e0d), TOBN(0x39fb832b, 0x6053e7b5), + TOBN(0x46e702da, 0x619e14d5), TOBN(0x859cacd1, 0xcdeef6e0), + TOBN(0x63b99ce7, 0x4462007d), TOBN(0xb8ab48a5, 0x4cb5f5b7), + TOBN(0x9ec673d2, 0xf55edde7), TOBN(0xd1567f74, 0x8cfaefda), + TOBN(0x46381b6b, 0x0887bcec), TOBN(0x694497ce, 0xe178f3c2), + TOBN(0x5e6525e3, 0x1e6266cb), TOBN(0x5931de26, 0x697d6413), + TOBN(0x87f8df7c, 0x0e58d493), TOBN(0xb1ae5ed0, 0x58b73f12), + TOBN(0xc368f784, 0xdea0c34d), TOBN(0x9bd0a120, 0x859a91a0), + TOBN(0xb00d88b7, 0xcc863c68), TOBN(0x3a1cc11e, 0x3d1f4d65), + TOBN(0xea38e0e7, 0x0aa85593), TOBN(0x37f13e98, 0x7dc4aee8), + TOBN(0x10d38667, 0xbc947bad), TOBN(0x738e07ce, 0x2a36ee2e), + TOBN(0xc93470cd, 0xc577fcac), TOBN(0xdee1b616, 0x2782470d), + TOBN(0x36a25e67, 0x2e793d12), TOBN(0xd6aa6cae, 0xe0f186da), + TOBN(0x474d0fd9, 0x80e07af7), TOBN(0xf7cdc47d, 0xba8a5cd4), + TOBN(0x28af6d9d, 0xab15247f), TOBN(0x7c789c10, 0x493a537f), + TOBN(0x7ac9b110, 0x23a334e7), TOBN(0x0236ac09, 0x12c9c277), + TOBN(0xa7e5bd25, 0x1d7a5144), TOBN(0x098b9c2a, 0xf13ec4ec), + TOBN(0x3639daca, 0xd3f0abca), TOBN(0x642da81a, 0xa23960f9), + TOBN(0x7d2e5c05, 0x4f7269b1), TOBN(0xfcf30777, 0xe287c385), + TOBN(0x10edc84f, 0xf2a46f21), TOBN(0x35441757, 0x4f43fa36), + TOBN(0xf1327899, 0xfd703431), TOBN(0xa438d7a6, 0x16dd587a), + TOBN(0x65c34c57, 0xe9c8352d), TOBN(0xa728edab, 0x5cc5a24e), + TOBN(0xaed78abc, 0x42531689), TOBN(0x0a51a0e8, 0x010963ef), + TOBN(0x5776fa0a, 0xd717d9b3), TOBN(0xf356c239, 0x7dd3428b), + TOBN(0x29903fff, 0x8d3a3dac), TOBN(0x409597fa, 0x3d94491f), + TOBN(0x4cd7a5ff, 0xbf4a56a4), TOBN(0xe5096474, 0x8adab462), + TOBN(0xa97b5126, 0x5c3427b0), TOBN(0x6401405c, 0xd282c9bd), + TOBN(0x3629f8d7, 0x222c5c45), TOBN(0xb1c02c16, 0xe8d50aed), + TOBN(0xbea2ed75, 0xd9635bc9), TOBN(0x226790c7, 0x6e24552f), + TOBN(0x3c33f2a3, 0x65f1d066), TOBN(0x2a43463e, 0x6dfccc2e), + TOBN(0x8cc3453a, 0xdb483761), TOBN(0xe7cc6085, 0x65d5672b), + TOBN(0x277ed6cb, 0xde3efc87), TOBN(0x19f2f368, 0x69234eaf), + TOBN(0x9aaf4317, 0x5c0b800b), TOBN(0x1f1e7c89, 0x8b6da6e2), + TOBN(0x6cfb4715, 0xb94ec75e), TOBN(0xd590dd5f, 0x453118c2), + TOBN(0x14e49da1, 0x1f17a34c), TOBN(0x5420ab39, 0x235a1456), + TOBN(0xb7637241, 0x2f50363b), TOBN(0x7b15d623, 0xc3fabb6e), + TOBN(0xa0ef40b1, 0xe274e49c), TOBN(0x5cf50744, 0x96b1860a), + TOBN(0xd6583fbf, 0x66afe5a4), TOBN(0x44240510, 0xf47e3e9a), + TOBN(0x99254343, 0x11b2d595), TOBN(0xf1367499, 0xeec8df57), + TOBN(0x3cb12c61, 0x3e73dd05), TOBN(0xd248c033, 0x7dac102a), + TOBN(0xcf154f13, 0xa77739f5), TOBN(0xbf4288cb, 0x23d2af42), + TOBN(0xaa64c9b6, 0x32e4a1cf), TOBN(0xee8c07a8, 0xc8a208f3), + TOBN(0xe10d4999, 0x6fe8393f), TOBN(0x0f809a3f, 0xe91f3a32), + TOBN(0x61096d1c, 0x802f63c8), TOBN(0x289e1462, 0x57750d3d), + TOBN(0xed06167e, 0x9889feea), TOBN(0xd5c9c0e2, 0xe0993909), + TOBN(0x46fca0d8, 0x56508ac6), TOBN(0x91826047, 0x4f1b8e83), + TOBN(0x4f2c877a, 0x9a4a2751), TOBN(0x71bd0072, 0xcae6fead), + TOBN(0x38df8dcc, 0x06aa1941), TOBN(0x5a074b4c, 0x63beeaa8), + TOBN(0xd6d65934, 0xc1cec8ed), TOBN(0xa6ecb49e, 0xaabc03bd), + TOBN(0xaade91c2, 0xde8a8415), TOBN(0xcfb0efdf, 0x691136e0), + TOBN(0x11af45ee, 0x23ab3495), TOBN(0xa132df88, 0x0b77463d), + TOBN(0x8923c15c, 0x815d06f4), TOBN(0xc3ceb3f5, 0x0d61a436), + TOBN(0xaf52291d, 0xe88fb1da), TOBN(0xea057974, 0x1da12179), + TOBN(0xb0d7218c, 0xd2fef720), TOBN(0x6c0899c9, 0x8e1d8845), + TOBN(0x98157504, 0x752ddad7), TOBN(0xd60bd74f, 0xa1a68a97), + TOBN(0x7047a3a9, 0xf658fb99), TOBN(0x1f5d86d6, 0x5f8511e4), + TOBN(0xb8a4bc42, 0x4b5a6d88), TOBN(0x69eb2c33, 0x1abefa7d), + TOBN(0x95bf39e8, 0x13c9c510), TOBN(0xf571960a, 0xd48aab43), + TOBN(0x7e8cfbcf, 0x704e23c6), TOBN(0xc71b7d22, 0x28aaa65b), + TOBN(0xa041b2bd, 0x245e3c83), TOBN(0x69b98834, 0xd21854ff), + TOBN(0x89d227a3, 0x963bfeec), TOBN(0x99947aaa, 0xde7da7cb), + TOBN(0x1d9ee9db, 0xee68a9b1), TOBN(0x0a08f003, 0x698ec368), + TOBN(0xe9ea4094, 0x78ef2487), TOBN(0xc8d2d415, 0x02cfec26), + TOBN(0xc52f9a6e, 0xb7dcf328), TOBN(0x0ed489e3, 0x85b6a937), + TOBN(0x9b94986b, 0xbef3366e), TOBN(0x0de59c70, 0xedddddb8), + TOBN(0xffdb748c, 0xeadddbe2), TOBN(0x9b9784bb, 0x8266ea40), + TOBN(0x142b5502, 0x1a93507a), TOBN(0xb4cd1187, 0x8d3c06cf), + TOBN(0xdf70e76a, 0x91ec3f40), TOBN(0x484e81ad, 0x4e7553c2), + TOBN(0x830f87b5, 0x272e9d6e), TOBN(0xea1c93e5, 0xc6ff514a), + TOBN(0x67cc2adc, 0xc4192a8e), TOBN(0xc77e27e2, 0x42f4535a), + TOBN(0x9cdbab36, 0xd2b713c5), TOBN(0x86274ea0, 0xcf7b0cd3), + TOBN(0x784680f3, 0x09af826b), TOBN(0xbfcc837a, 0x0c72dea3), + TOBN(0xa8bdfe9d, 0xd6529b73), TOBN(0x708aa228, 0x63a88002), + TOBN(0x6c7a9a54, 0xc91d45b9), TOBN(0xdf1a38bb, 0xfd004f56), + TOBN(0x2e8c9a26, 0xb8bad853), TOBN(0x2d52cea3, 0x3723eae7), + TOBN(0x054d6d81, 0x56ca2830), TOBN(0xa3317d14, 0x9a8dc411), + TOBN(0xa08662fe, 0xfd4ddeda), TOBN(0xed2a153a, 0xb55d792b), + TOBN(0x7035c16a, 0xbfc6e944), TOBN(0xb6bc5834, 0x00171cf3), + TOBN(0xe27152b3, 0x83d102b6), TOBN(0xfe695a47, 0x0646b848), + TOBN(0xa5bb09d8, 0x916e6d37), TOBN(0xb4269d64, 0x0d17015e), + TOBN(0x8d8156a1, 0x0a1d2285), TOBN(0xfeef6c51, 0x46d26d72), + TOBN(0x9dac57c8, 0x4c5434a7), TOBN(0x0282e5be, 0x59d39e31), + TOBN(0xedfff181, 0x721c486d), TOBN(0x301baf10, 0xbc58824e), + TOBN(0x8136a6aa, 0x00570031), TOBN(0x55aaf78c, 0x1cddde68), + TOBN(0x26829371, 0x59c63952), TOBN(0x3a3bd274, 0x8bc25baf), + TOBN(0xecdf8657, 0xb7e52dc3), TOBN(0x2dd8c087, 0xfd78e6c8), + TOBN(0x20553274, 0xf5531461), TOBN(0x8b4a1281, 0x5d95499b), + TOBN(0xe2c8763a, 0x1a80f9d2), TOBN(0xd1dbe32b, 0x4ddec758), + TOBN(0xaf12210d, 0x30c34169), TOBN(0xba74a953, 0x78baa533), + TOBN(0x3d133c6e, 0xa438f254), TOBN(0xa431531a, 0x201bef5b), + TOBN(0x15295e22, 0xf669d7ec), TOBN(0xca374f64, 0x357fb515), + TOBN(0x8a8406ff, 0xeaa3fdb3), TOBN(0x106ae448, 0xdf3f2da8), + TOBN(0x8f9b0a90, 0x33c8e9a1), TOBN(0x234645e2, 0x71ad5885), + TOBN(0x3d083224, 0x1c0aed14), TOBN(0xf10a7d3e, 0x7a942d46), + TOBN(0x7c11deee, 0x40d5c9be), TOBN(0xb2bae7ff, 0xba84ed98), + TOBN(0x93e97139, 0xaad58ddd), TOBN(0x3d872796, 0x3f6d1fa3), + TOBN(0x483aca81, 0x8569ff13), TOBN(0x8b89a5fb, 0x9a600f72), + TOBN(0x4cbc27c3, 0xc06f2b86), TOBN(0x22130713, 0x63ad9c0b), + TOBN(0xb5358b1e, 0x48ac2840), TOBN(0x18311294, 0xecba9477), + TOBN(0xda58f990, 0xa6946b43), TOBN(0x3098baf9, 0x9ab41819), + TOBN(0x66c4c158, 0x4198da52), TOBN(0xab4fc17c, 0x146bfd1b), + TOBN(0x2f0a4c3c, 0xbf36a908), TOBN(0x2ae9e34b, 0x58cf7838), + TOBN(0xf411529e, 0x3fa11b1f), TOBN(0x21e43677, 0x974af2b4), + TOBN(0x7c20958e, 0xc230793b), TOBN(0x710ea885, 0x16e840f3), + TOBN(0xfc0b21fc, 0xc5dc67cf), TOBN(0x08d51647, 0x88405718), + TOBN(0xd955c21f, 0xcfe49eb7), TOBN(0x9722a5d5, 0x56dd4a1f), + TOBN(0xc9ef50e2, 0xc861baa5), TOBN(0xc0c21a5d, 0x9505ac3e), + TOBN(0xaf6b9a33, 0x8b7c063f), TOBN(0xc6370339, 0x2f4779c1), + TOBN(0x22df99c7, 0x638167c3), TOBN(0xfe6ffe76, 0x795db30c), + TOBN(0x2b822d33, 0xa4854989), TOBN(0xfef031dd, 0x30563aa5), + TOBN(0x16b09f82, 0xd57c667f), TOBN(0xc70312ce, 0xcc0b76f1), + TOBN(0xbf04a9e6, 0xc9118aec), TOBN(0x82fcb419, 0x3409d133), + TOBN(0x1a8ab385, 0xab45d44d), TOBN(0xfba07222, 0x617b83a3), + TOBN(0xb05f50dd, 0x58e81b52), TOBN(0x1d8db553, 0x21ce5aff), + TOBN(0x3097b8d4, 0xe344a873), TOBN(0x7d8d116d, 0xfe36d53e), + TOBN(0x6db22f58, 0x7875e750), TOBN(0x2dc5e373, 0x43e144ea), + TOBN(0xc05f32e6, 0xe799eb95), TOBN(0xe9e5f4df, 0x6899e6ec), + TOBN(0xbdc3bd68, 0x1fab23d5), TOBN(0xb72b8ab7, 0x73af60e6), + TOBN(0x8db27ae0, 0x2cecc84a), TOBN(0x600016d8, 0x7bdb871c), + TOBN(0x42a44b13, 0xd7c46f58), TOBN(0xb8919727, 0xc3a77d39), + TOBN(0xcfc6bbbd, 0xdafd6088), TOBN(0x1a740146, 0x6bd20d39), + TOBN(0x8c747abd, 0x98c41072), TOBN(0x4c91e765, 0xbdf68ea1), + TOBN(0x7c95e5ca, 0x08819a78), TOBN(0xcf48b729, 0xc9587921), + TOBN(0x091c7c5f, 0xdebbcc7d), TOBN(0x6f287404, 0xf0e05149), + TOBN(0xf83b5ac2, 0x26cd44ec), TOBN(0x88ae32a6, 0xcfea250e), + TOBN(0x6ac5047a, 0x1d06ebc5), TOBN(0xc7e550b4, 0xd434f781), + TOBN(0x61ab1cf2, 0x5c727bd2), TOBN(0x2e4badb1, 0x1cf915b0), + TOBN(0x1b4dadec, 0xf69d3920), TOBN(0xe61b1ca6, 0xf14c1dfe), + TOBN(0x90b479cc, 0xbd6bd51f), TOBN(0x8024e401, 0x8045ec30), + TOBN(0xcab29ca3, 0x25ef0e62), TOBN(0x4f2e9416, 0x49e4ebc0), + TOBN(0x45eb40ec, 0x0ccced58), TOBN(0x25cd4b9c, 0x0da44f98), + TOBN(0x43e06458, 0x871812c6), TOBN(0x99f80d55, 0x16cef651), + TOBN(0x571340c9, 0xce6dc153), TOBN(0x138d5117, 0xd8665521), + TOBN(0xacdb45bc, 0x4e07014d), TOBN(0x2f34bb38, 0x84b60b91), + TOBN(0xf44a4fd2, 0x2ae8921e), TOBN(0xb039288e, 0x892ba1e2), + TOBN(0x9da50174, 0xb1c180b2), TOBN(0x6b70ab66, 0x1693dc87), + TOBN(0x7e9babc9, 0xe7057481), TOBN(0x4581ddef, 0x9c80dc41), + TOBN(0x0c890da9, 0x51294682), TOBN(0x0b5629d3, 0x3f4736e5), + TOBN(0x2340c79e, 0xb06f5b41), TOBN(0xa42e84ce, 0x4e243469), + TOBN(0xf9a20135, 0x045a71a9), TOBN(0xefbfb415, 0xd27b6fb6), + TOBN(0x25ebea23, 0x9d33cd6f), TOBN(0x9caedb88, 0xaa6c0af8), + TOBN(0x53dc7e9a, 0xd9ce6f96), TOBN(0x3897f9fd, 0x51e0b15a), + TOBN(0xf51cb1f8, 0x8e5d788e), TOBN(0x1aec7ba8, 0xe1d490ee), + TOBN(0x265991e0, 0xcc58cb3c), TOBN(0x9f306e8c, 0x9fc3ad31), + TOBN(0x5fed006e, 0x5040a0ac), TOBN(0xca9d5043, 0xfb476f2e), + TOBN(0xa19c06e8, 0xbeea7a23), TOBN(0xd2865801, 0x0edabb63), + TOBN(0xdb92293f, 0x6967469a), TOBN(0x2894d839, 0x8d8a8ed8), + TOBN(0x87c9e406, 0xbbc77122), TOBN(0x8671c6f1, 0x2ea3a26a), + TOBN(0xe42df8d6, 0xd7de9853), TOBN(0x2e3ce346, 0xb1f2bcc7), + TOBN(0xda601dfc, 0x899d50cf), TOBN(0xbfc913de, 0xfb1b598f), + TOBN(0x81c4909f, 0xe61f7908), TOBN(0x192e304f, 0x9bbc7b29), + TOBN(0xc3ed8738, 0xc104b338), TOBN(0xedbe9e47, 0x783f5d61), + TOBN(0x0c06e9be, 0x2db30660), TOBN(0xda3e613f, 0xc0eb7d8e), + TOBN(0xd8fa3e97, 0x322e096e), TOBN(0xfebd91e8, 0xd336e247), + TOBN(0x8f13ccc4, 0xdf655a49), TOBN(0xa9e00dfc, 0x5eb20210), + TOBN(0x84631d0f, 0xc656b6ea), TOBN(0x93a058cd, 0xd8c0d947), + TOBN(0x6846904a, 0x67bd3448), TOBN(0x4a3d4e1a, 0xf394fd5c), + TOBN(0xc102c1a5, 0xdb225f52), TOBN(0xe3455bba, 0xfc4f5e9a), + TOBN(0x6b36985b, 0x4b9ad1ce), TOBN(0xa9818536, 0x5bb7f793), + TOBN(0x6c25e1d0, 0x48b1a416), TOBN(0x1381dd53, 0x3c81bee7), + TOBN(0xd2a30d61, 0x7a4a7620), TOBN(0xc8412926, 0x39b8944c), + TOBN(0x3c1c6fbe, 0x7a97c33a), TOBN(0x941e541d, 0x938664e7), + TOBN(0x417499e8, 0x4a34f239), TOBN(0x15fdb83c, 0xb90402d5), + TOBN(0xb75f46bf, 0x433aa832), TOBN(0xb61e15af, 0x63215db1), + TOBN(0xaabe59d4, 0xa127f89a), TOBN(0x5d541e0c, 0x07e816da), + TOBN(0xaaba0659, 0xa618b692), TOBN(0x55327733, 0x17266026), + TOBN(0xaf53a0fc, 0x95f57552), TOBN(0x32947650, 0x6cacb0c9), + TOBN(0x253ff58d, 0xc821be01), TOBN(0xb0309531, 0xa06f1146), + TOBN(0x59bbbdf5, 0x05c2e54d), TOBN(0x158f27ad, 0x26e8dd22), + TOBN(0xcc5b7ffb, 0x397e1e53), TOBN(0xae03f65b, 0x7fc1e50d), + TOBN(0xa9784ebd, 0x9c95f0f9), TOBN(0x5ed9deb2, 0x24640771), + TOBN(0x31244af7, 0x035561c4), TOBN(0x87332f3a, 0x7ee857de), + TOBN(0x09e16e9e, 0x2b9e0d88), TOBN(0x52d910f4, 0x56a06049), + TOBN(0x507ed477, 0xa9592f48), TOBN(0x85cb917b, 0x2365d678), + TOBN(0xf8511c93, 0x4c8998d1), TOBN(0x2186a3f1, 0x730ea58f), + TOBN(0x50189626, 0xb2029db0), TOBN(0x9137a6d9, 0x02ceb75a), + TOBN(0x2fe17f37, 0x748bc82c), TOBN(0x87c2e931, 0x80469f8c), + TOBN(0x850f71cd, 0xbf891aa2), TOBN(0x0ca1b89b, 0x75ec3d8d), + TOBN(0x516c43aa, 0x5e1cd3cd), TOBN(0x89397808, 0x9a887c28), + TOBN(0x0059c699, 0xddea1f9f), TOBN(0x7737d6fa, 0x8e6868f7), + TOBN(0x6d93746a, 0x60f1524b), TOBN(0x36985e55, 0xba052aa7), + TOBN(0x41b1d322, 0xed923ea5), TOBN(0x3429759f, 0x25852a11), + TOBN(0xbeca6ec3, 0x092e9f41), TOBN(0x3a238c66, 0x62256bbd), + TOBN(0xd82958ea, 0x70ad487d), TOBN(0x4ac8aaf9, 0x65610d93), + TOBN(0x3fa101b1, 0x5e4ccab0), TOBN(0x9bf430f2, 0x9de14bfb), + TOBN(0xa10f5cc6, 0x6531899d), TOBN(0x590005fb, 0xea8ce17d), + TOBN(0xc437912f, 0x24544cb6), TOBN(0x9987b71a, 0xd79ac2e3), + TOBN(0x13e3d9dd, 0xc058a212), TOBN(0x00075aac, 0xd2de9606), + TOBN(0x80ab508b, 0x6cac8369), TOBN(0x87842be7, 0xf54f6c89), + TOBN(0xa7ad663d, 0x6bc532a4), TOBN(0x67813de7, 0x78a91bc8), + TOBN(0x5dcb61ce, 0xc3427239), TOBN(0x5f3c7cf0, 0xc56934d9), + TOBN(0xc079e0fb, 0xe3191591), TOBN(0xe40896bd, 0xb01aada7), + TOBN(0x8d466791, 0x0492d25f), TOBN(0x8aeb30c9, 0xe7408276), + TOBN(0xe9437495, 0x9287aacc), TOBN(0x23d4708d, 0x79fe03d4), + TOBN(0x8cda9cf2, 0xd0c05199), TOBN(0x502fbc22, 0xfae78454), + TOBN(0xc0bda9df, 0xf572a182), TOBN(0x5f9b71b8, 0x6158b372), + TOBN(0xe0f33a59, 0x2b82dd07), TOBN(0x76302735, 0x9523032e), + TOBN(0x7fe1a721, 0xc4505a32), TOBN(0x7b6e3e82, 0xf796409f),} + , + {TOBN(0xe3417bc0, 0x35d0b34a), TOBN(0x440b386b, 0x8327c0a7), + TOBN(0x8fb7262d, 0xac0362d1), TOBN(0x2c41114c, 0xe0cdf943), + TOBN(0x2ba5cef1, 0xad95a0b1), TOBN(0xc09b37a8, 0x67d54362), + TOBN(0x26d6cdd2, 0x01e486c9), TOBN(0x20477abf, 0x42ff9297), + TOBN(0xa004dcb3, 0x292a9287), TOBN(0xddc15cf6, 0x77b092c7), + TOBN(0x083a8464, 0x806c0605), TOBN(0x4a68df70, 0x3db997b0), + TOBN(0x9c134e45, 0x05bf7dd0), TOBN(0xa4e63d39, 0x8ccf7f8c), + TOBN(0xa6e6517f, 0x41b5f8af), TOBN(0xaa8b9342, 0xad7bc1cc), + TOBN(0x126f35b5, 0x1e706ad9), TOBN(0xb99cebb4, 0xc3a9ebdf), + TOBN(0xa75389af, 0xbf608d90), TOBN(0x76113c4f, 0xc6c89858), + TOBN(0x80de8eb0, 0x97e2b5aa), TOBN(0x7e1022cc, 0x63b91304), + TOBN(0x3bdab605, 0x6ccc066c), TOBN(0x33cbb144, 0xb2edf900), + TOBN(0xc4176471, 0x7af715d2), TOBN(0xe2f7f594, 0xd0134a96), + TOBN(0x2c1873ef, 0xa41ec956), TOBN(0xe4e7b4f6, 0x77821304), + TOBN(0xe5c8ff97, 0x88d5374a), TOBN(0x2b915e63, 0x80823d5b), + TOBN(0xea6bc755, 0xb2ee8fe2), TOBN(0x6657624c, 0xe7112651), + TOBN(0x157af101, 0xdace5aca), TOBN(0xc4fdbcf2, 0x11a6a267), + TOBN(0xdaddf340, 0xc49c8609), TOBN(0x97e49f52, 0xe9604a65), + TOBN(0x9be8e790, 0x937e2ad5), TOBN(0x846e2508, 0x326e17f1), + TOBN(0x3f38007a, 0x0bbbc0dc), TOBN(0xcf03603f, 0xb11e16d6), + TOBN(0xd6f800e0, 0x7442f1d5), TOBN(0x475607d1, 0x66e0e3ab), + TOBN(0x82807f16, 0xb7c64047), TOBN(0x8858e1e3, 0xa749883d), + TOBN(0x5859120b, 0x8231ee10), TOBN(0x1b80e7eb, 0x638a1ece), + TOBN(0xcb72525a, 0xc6aa73a4), TOBN(0xa7cdea3d, 0x844423ac), + TOBN(0x5ed0c007, 0xf8ae7c38), TOBN(0x6db07a5c, 0x3d740192), + TOBN(0xbe5e9c2a, 0x5fe36db3), TOBN(0xd5b9d57a, 0x76e95046), + TOBN(0x54ac32e7, 0x8eba20f2), TOBN(0xef11ca8f, 0x71b9a352), + TOBN(0x305e373e, 0xff98a658), TOBN(0xffe5a100, 0x823eb667), + TOBN(0x57477b11, 0xe51732d2), TOBN(0xdfd6eb28, 0x2538fc0e), + TOBN(0x5c43b0cc, 0x3b39eec5), TOBN(0x6af12778, 0xcb36cc57), + TOBN(0x70b0852d, 0x06c425ae), TOBN(0x6df92f8c, 0x5c221b9b), + TOBN(0x6c8d4f9e, 0xce826d9c), TOBN(0xf59aba7b, 0xb49359c3), + TOBN(0x5c8ed8d5, 0xda64309d), TOBN(0x61a6de56, 0x91b30704), + TOBN(0xd6b52f6a, 0x2f9b5808), TOBN(0x0eee4194, 0x98c958a7), + TOBN(0xcddd9aab, 0x771e4caa), TOBN(0x83965dfd, 0x78bc21be), + TOBN(0x02affce3, 0xb3b504f5), TOBN(0x30847a21, 0x561c8291), + TOBN(0xd2eb2cf1, 0x52bfda05), TOBN(0xe0e4c4e9, 0x6197b98c), + TOBN(0x1d35076c, 0xf8a1726f), TOBN(0x6c06085b, 0x2db11e3d), + TOBN(0x15c0c4d7, 0x4463ba14), TOBN(0x9d292f83, 0x0030238c), + TOBN(0x1311ee8b, 0x3727536d), TOBN(0xfeea86ef, 0xbeaedc1e), + TOBN(0xb9d18cd3, 0x66131e2e), TOBN(0xf31d974f, 0x80fe2682), + TOBN(0xb6e49e0f, 0xe4160289), TOBN(0x7c48ec0b, 0x08e92799), + TOBN(0x818111d8, 0xd1989aa7), TOBN(0xb34fa0aa, 0xebf926f9), + TOBN(0xdb5fe2f5, 0xa245474a), TOBN(0xf80a6ebb, 0x3c7ca756), + TOBN(0xa7f96054, 0xafa05dd8), TOBN(0x26dfcf21, 0xfcaf119e), + TOBN(0xe20ef2e3, 0x0564bb59), TOBN(0xef4dca50, 0x61cb02b8), + TOBN(0xcda7838a, 0x65d30672), TOBN(0x8b08d534, 0xfd657e86), + TOBN(0x4c5b4395, 0x46d595c8), TOBN(0x39b58725, 0x425cb836), + TOBN(0x8ea61059, 0x3de9abe3), TOBN(0x40434881, 0x9cdc03be), + TOBN(0x9b261245, 0xcfedce8c), TOBN(0x78c318b4, 0xcf5234a1), + TOBN(0x510bcf16, 0xfde24c99), TOBN(0x2a77cb75, 0xa2c2ff5d), + TOBN(0x9c895c2b, 0x27960fb4), TOBN(0xd30ce975, 0xb0eda42b), + TOBN(0xfda85393, 0x1a62cc26), TOBN(0x23c69b96, 0x50c0e052), + TOBN(0xa227df15, 0xbfc633f3), TOBN(0x2ac78848, 0x1bae7d48), + TOBN(0x487878f9, 0x187d073d), TOBN(0x6c2be919, 0x967f807d), + TOBN(0x765861d8, 0x336e6d8f), TOBN(0x88b8974c, 0xce528a43), + TOBN(0x09521177, 0xff57d051), TOBN(0x2ff38037, 0xfb6a1961), + TOBN(0xfc0aba74, 0xa3d76ad4), TOBN(0x7c764803, 0x25a7ec17), + TOBN(0x7532d75f, 0x48879bc8), TOBN(0xea7eacc0, 0x58ce6bc1), + TOBN(0xc82176b4, 0x8e896c16), TOBN(0x9a30e0b2, 0x2c750fed), + TOBN(0xc37e2c2e, 0x421d3aa4), TOBN(0xf926407c, 0xe84fa840), + TOBN(0x18abc03d, 0x1454e41c), TOBN(0x26605ecd, 0x3f7af644), + TOBN(0x242341a6, 0xd6a5eabf), TOBN(0x1edb84f4, 0x216b668e), + TOBN(0xd836edb8, 0x04010102), TOBN(0x5b337ce7, 0x945e1d8c), + TOBN(0xd2075c77, 0xc055dc14), TOBN(0x2a0ffa25, 0x81d89cdf), + TOBN(0x8ce815ea, 0x6ffdcbaf), TOBN(0xa3428878, 0xfb648867), + TOBN(0x277699cf, 0x884655fb), TOBN(0xfa5b5bd6, 0x364d3e41), + TOBN(0x01f680c6, 0x441e1cb7), TOBN(0x3fd61e66, 0xb70a7d67), + TOBN(0x666ba2dc, 0xcc78cf66), TOBN(0xb3018174, 0x6fdbff77), + TOBN(0x8d4dd0db, 0x168d4668), TOBN(0x259455d0, 0x1dab3a2a), + TOBN(0xf58564c5, 0xcde3acec), TOBN(0x77141925, 0x13adb276), + TOBN(0x527d725d, 0x8a303f65), TOBN(0x55deb6c9, 0xe6f38f7b), + TOBN(0xfd5bb657, 0xb1fa70fb), TOBN(0xfa07f50f, 0xd8073a00), + TOBN(0xf72e3aa7, 0xbca02500), TOBN(0xf68f895d, 0x9975740d), + TOBN(0x30112060, 0x5cae2a6a), TOBN(0x01bd7218, 0x02874842), + TOBN(0x3d423891, 0x7ce47bd3), TOBN(0xa66663c1, 0x789544f6), + TOBN(0x864d05d7, 0x3272d838), TOBN(0xe22924f9, 0xfa6295c5), + TOBN(0x8189593f, 0x6c2fda32), TOBN(0x330d7189, 0xb184b544), + TOBN(0x79efa62c, 0xbde1f714), TOBN(0x35771c94, 0xe5cb1a63), + TOBN(0x2f4826b8, 0x641c8332), TOBN(0x00a894fb, 0xc8cee854), + TOBN(0xb4b9a39b, 0x36194d40), TOBN(0xe857a7c5, 0x77612601), + TOBN(0xf4209dd2, 0x4ecf2f58), TOBN(0x82b9e66d, 0x5a033487), + TOBN(0xc1e36934, 0xe4e8b9dd), TOBN(0xd2372c9d, 0xa42377d7), + TOBN(0x51dc94c7, 0x0e3ae43b), TOBN(0x4c57761e, 0x04474f6f), + TOBN(0xdcdacd0a, 0x1058a318), TOBN(0x369cf3f5, 0x78053a9a), + TOBN(0xc6c3de50, 0x31c68de2), TOBN(0x4653a576, 0x3c4b6d9f), + TOBN(0x1688dd5a, 0xaa4e5c97), TOBN(0x5be80aa1, 0xb7ab3c74), + TOBN(0x70cefe7c, 0xbc65c283), TOBN(0x57f95f13, 0x06867091), + TOBN(0xa39114e2, 0x4415503b), TOBN(0xc08ff7c6, 0x4cbb17e9), + TOBN(0x1eff674d, 0xd7dec966), TOBN(0x6d4690af, 0x53376f63), + TOBN(0xff6fe32e, 0xea74237b), TOBN(0xc436d17e, 0xcd57508e), + TOBN(0x15aa28e1, 0xedcc40fe), TOBN(0x0d769c04, 0x581bbb44), + TOBN(0xc240b6de, 0x34eaacda), TOBN(0xd9e116e8, 0x2ba0f1de), + TOBN(0xcbe45ec7, 0x79438e55), TOBN(0x91787c9d, 0x96f752d7), + TOBN(0x897f532b, 0xf129ac2f), TOBN(0xd307b7c8, 0x5a36e22c), + TOBN(0x91940675, 0x749fb8f3), TOBN(0xd14f95d0, 0x157fdb28), + TOBN(0xfe51d029, 0x6ae55043), TOBN(0x8931e98f, 0x44a87de1), + TOBN(0xe57f1cc6, 0x09e4fee2), TOBN(0x0d063b67, 0x4e072d92), + TOBN(0x70a998b9, 0xed0e4316), TOBN(0xe74a736b, 0x306aca46), + TOBN(0xecf0fbf2, 0x4fda97c7), TOBN(0xa40f65cb, 0x3e178d93), + TOBN(0x16253604, 0x16df4285), TOBN(0xb0c9babb, 0xd0c56ae2), + TOBN(0x73032b19, 0xcfc5cfc3), TOBN(0xe497e5c3, 0x09752056), + TOBN(0x12096bb4, 0x164bda96), TOBN(0x1ee42419, 0xa0b74da1), + TOBN(0x8fc36243, 0x403826ba), TOBN(0x0c8f0069, 0xdc09e660), + TOBN(0x8667e981, 0xc27253c9), TOBN(0x05a6aefb, 0x92b36a45), + TOBN(0xa62c4b36, 0x9cb7bb46), TOBN(0x8394f375, 0x11f7027b), + TOBN(0x747bc79c, 0x5f109d0f), TOBN(0xcad88a76, 0x5b8cc60a), + TOBN(0x80c5a66b, 0x58f09e68), TOBN(0xe753d451, 0xf6127eac), + TOBN(0xc44b74a1, 0x5b0ec6f5), TOBN(0x47989fe4, 0x5289b2b8), + TOBN(0x745f8484, 0x58d6fc73), TOBN(0xec362a6f, 0xf61c70ab), + TOBN(0x070c98a7, 0xb3a8ad41), TOBN(0x73a20fc0, 0x7b63db51), + TOBN(0xed2c2173, 0xf44c35f4), TOBN(0x8a56149d, 0x9acc9dca), + TOBN(0x98f17881, 0x9ac6e0f4), TOBN(0x360fdeaf, 0xa413b5ed), + TOBN(0x0625b8f4, 0xa300b0fd), TOBN(0xf1f4d76a, 0x5b3222d3), + TOBN(0x9d6f5109, 0x587f76b8), TOBN(0x8b4ee08d, 0x2317fdb5), + TOBN(0x88089bb7, 0x8c68b095), TOBN(0x95570e9a, 0x5808d9b9), + TOBN(0xa395c36f, 0x35d33ae7), TOBN(0x200ea123, 0x50bb5a94), + TOBN(0x20c789bd, 0x0bafe84b), TOBN(0x243ef52d, 0x0919276a), + TOBN(0x3934c577, 0xe23ae233), TOBN(0xb93807af, 0xa460d1ec), + TOBN(0xb72a53b1, 0xf8fa76a4), TOBN(0xd8914cb0, 0xc3ca4491), + TOBN(0x2e128494, 0x3fb42622), TOBN(0x3b2700ac, 0x500907d5), + TOBN(0xf370fb09, 0x1a95ec63), TOBN(0xf8f30be2, 0x31b6dfbd), + TOBN(0xf2b2f8d2, 0x69e55f15), TOBN(0x1fead851, 0xcc1323e9), + TOBN(0xfa366010, 0xd9e5eef6), TOBN(0x64d487b0, 0xe316107e), + TOBN(0x4c076b86, 0xd23ddc82), TOBN(0x03fd344c, 0x7e0143f0), + TOBN(0xa95362ff, 0x317af2c5), TOBN(0x0add3db7, 0xe18b7a4f), + TOBN(0x9c673e3f, 0x8260e01b), TOBN(0xfbeb49e5, 0x54a1cc91), + TOBN(0x91351bf2, 0x92f2e433), TOBN(0xc755e7ec, 0x851141eb), + TOBN(0xc9a95139, 0x29607745), TOBN(0x0ca07420, 0xa26f2b28), + TOBN(0xcb2790e7, 0x4bc6f9dd), TOBN(0x345bbb58, 0xadcaffc0), + TOBN(0xc65ea38c, 0xbe0f27a2), TOBN(0x67c24d7c, 0x641fcb56), + TOBN(0x2c25f0a7, 0xa9e2c757), TOBN(0x93f5cdb0, 0x16f16c49), + TOBN(0x2ca5a9d7, 0xc5ee30a1), TOBN(0xd1593635, 0xb909b729), + TOBN(0x804ce9f3, 0xdadeff48), TOBN(0xec464751, 0xb07c30c3), + TOBN(0x89d65ff3, 0x9e49af6a), TOBN(0xf2d6238a, 0x6f3d01bc), + TOBN(0x1095561e, 0x0bced843), TOBN(0x51789e12, 0xc8a13fd8), + TOBN(0xd633f929, 0x763231df), TOBN(0x46df9f7d, 0xe7cbddef), + TOBN(0x01c889c0, 0xcb265da8), TOBN(0xfce1ad10, 0xaf4336d2), + TOBN(0x8d110df6, 0xfc6a0a7e), TOBN(0xdd431b98, 0x6da425dc), + TOBN(0xcdc4aeab, 0x1834aabe), TOBN(0x84deb124, 0x8439b7fc), + TOBN(0x8796f169, 0x3c2a5998), TOBN(0x9b9247b4, 0x7947190d), + TOBN(0x55b9d9a5, 0x11597014), TOBN(0x7e9dd70d, 0x7b1566ee), + TOBN(0x94ad78f7, 0xcbcd5e64), TOBN(0x0359ac17, 0x9bd4c032), + TOBN(0x3b11baaf, 0x7cc222ae), TOBN(0xa6a6e284, 0xba78e812), + TOBN(0x8392053f, 0x24cea1a0), TOBN(0xc97bce4a, 0x33621491), + TOBN(0x7eb1db34, 0x35399ee9), TOBN(0x473f78ef, 0xece81ad1), + TOBN(0x41d72fe0, 0xf63d3d0d), TOBN(0xe620b880, 0xafab62fc), + TOBN(0x92096bc9, 0x93158383), TOBN(0x41a21357, 0x8f896f6c), + TOBN(0x1b5ee2fa, 0xc7dcfcab), TOBN(0x650acfde, 0x9546e007), + TOBN(0xc081b749, 0xb1b02e07), TOBN(0xda9e41a0, 0xf9eca03d), + TOBN(0x013ba727, 0x175a54ab), TOBN(0xca0cd190, 0xea5d8d10), + TOBN(0x85ea52c0, 0x95fd96a9), TOBN(0x2c591b9f, 0xbc5c3940), + TOBN(0x6fb4d4e4, 0x2bad4d5f), TOBN(0xfa4c3590, 0xfef0059b), + TOBN(0x6a10218a, 0xf5122294), TOBN(0x9a78a81a, 0xa85751d1), + TOBN(0x04f20579, 0xa98e84e7), TOBN(0xfe1242c0, 0x4997e5b5), + TOBN(0xe77a273b, 0xca21e1e4), TOBN(0xfcc8b1ef, 0x9411939d), + TOBN(0xe20ea302, 0x92d0487a), TOBN(0x1442dbec, 0x294b91fe), + TOBN(0x1f7a4afe, 0xbb6b0e8f), TOBN(0x1700ef74, 0x6889c318), + TOBN(0xf5bbffc3, 0x70f1fc62), TOBN(0x3b31d4b6, 0x69c79cca), + TOBN(0xe8bc2aab, 0xa7f6340d), TOBN(0xb0b08ab4, 0xa725e10a), + TOBN(0x44f05701, 0xae340050), TOBN(0xba4b3016, 0x1cf0c569), + TOBN(0x5aa29f83, 0xfbe19a51), TOBN(0x1b9ed428, 0xb71d752e), + TOBN(0x1666e54e, 0xeb4819f5), TOBN(0x616cdfed, 0x9e18b75b), + TOBN(0x112ed5be, 0x3ee27b0b), TOBN(0xfbf28319, 0x44c7de4d), + TOBN(0xd685ec85, 0xe0e60d84), TOBN(0x68037e30, 0x1db7ee78), + TOBN(0x5b65bdcd, 0x003c4d6e), TOBN(0x33e7363a, 0x93e29a6a), + TOBN(0x995b3a61, 0x08d0756c), TOBN(0xd727f85c, 0x2faf134b), + TOBN(0xfac6edf7, 0x1d337823), TOBN(0x99b9aa50, 0x0439b8b4), + TOBN(0x722eb104, 0xe2b4e075), TOBN(0x49987295, 0x437c4926), + TOBN(0xb1e4c0e4, 0x46a9b82d), TOBN(0xd0cb3197, 0x57a006f5), + TOBN(0xf3de0f7d, 0xd7808c56), TOBN(0xb5c54d8f, 0x51f89772), + TOBN(0x500a114a, 0xadbd31aa), TOBN(0x9afaaaa6, 0x295f6cab), + TOBN(0x94705e21, 0x04cf667a), TOBN(0xfc2a811b, 0x9d3935d7), + TOBN(0x560b0280, 0x6d09267c), TOBN(0xf19ed119, 0xf780e53b), + TOBN(0xf0227c09, 0x067b6269), TOBN(0x967b8533, 0x5caef599), + TOBN(0x155b9243, 0x68efeebc), TOBN(0xcd6d34f5, 0xc497bae6), + TOBN(0x1dd8d5d3, 0x6cceb370), TOBN(0x2aeac579, 0xa78d7bf9), + TOBN(0x5d65017d, 0x70b67a62), TOBN(0x70c8e44f, 0x17c53f67), + TOBN(0xd1fc0950, 0x86a34d09), TOBN(0xe0fca256, 0xe7134907), + TOBN(0xe24fa29c, 0x80fdd315), TOBN(0x2c4acd03, 0xd87499ad), + TOBN(0xbaaf7517, 0x3b5a9ba6), TOBN(0xb9cbe1f6, 0x12e51a51), + TOBN(0xd88edae3, 0x5e154897), TOBN(0xe4309c3c, 0x77b66ca0), + TOBN(0xf5555805, 0xf67f3746), TOBN(0x85fc37ba, 0xa36401ff), + TOBN(0xdf86e2ca, 0xd9499a53), TOBN(0x6270b2a3, 0xecbc955b), + TOBN(0xafae64f5, 0x974ad33b), TOBN(0x04d85977, 0xfe7b2df1), + TOBN(0x2a3db3ff, 0x4ab03f73), TOBN(0x0b87878a, 0x8702740a), + TOBN(0x6d263f01, 0x5a061732), TOBN(0xc25430ce, 0xa32a1901), + TOBN(0xf7ebab3d, 0xdb155018), TOBN(0x3a86f693, 0x63a9b78e), + TOBN(0x349ae368, 0xda9f3804), TOBN(0x470f07fe, 0xa164349c), + TOBN(0xd52f4cc9, 0x8562baa5), TOBN(0xc74a9e86, 0x2b290df3), + TOBN(0xd3a1aa35, 0x43471a24), TOBN(0x239446be, 0xb8194511), + TOBN(0xbec2dd00, 0x81dcd44d), TOBN(0xca3d7f0f, 0xc42ac82d), + TOBN(0x1f3db085, 0xfdaf4520), TOBN(0xbb6d3e80, 0x4549daf2), + TOBN(0xf5969d8a, 0x19ad5c42), TOBN(0x7052b13d, 0xdbfd1511), + TOBN(0x11890d1b, 0x682b9060), TOBN(0xa71d3883, 0xac34452c), + TOBN(0xa438055b, 0x783805b4), TOBN(0x43241277, 0x4725b23e), + TOBN(0xf20cf96e, 0x4901bbed), TOBN(0x6419c710, 0xf432a2bb), + TOBN(0x57a0fbb9, 0xdfa9cd7d), TOBN(0x589111e4, 0x00daa249), + TOBN(0x19809a33, 0x7b60554e), TOBN(0xea5f8887, 0xede283a4), + TOBN(0x2d713802, 0x503bfd35), TOBN(0x151bb0af, 0x585d2a53), + TOBN(0x40b08f74, 0x43b30ca8), TOBN(0xe10b5bba, 0xd9934583), + TOBN(0xe8a546d6, 0xb51110ad), TOBN(0x1dd50e66, 0x28e0b6c5), + TOBN(0x292e9d54, 0xcff2b821), TOBN(0x3882555d, 0x47281760), + TOBN(0x134838f8, 0x3724d6e3), TOBN(0xf2c679e0, 0x22ddcda1), + TOBN(0x40ee8815, 0x6d2a5768), TOBN(0x7f227bd2, 0x1c1e7e2d), + TOBN(0x487ba134, 0xd04ff443), TOBN(0x76e2ff3d, 0xc614e54b), + TOBN(0x36b88d6f, 0xa3177ec7), TOBN(0xbf731d51, 0x2328fff5), + TOBN(0x758caea2, 0x49ba158e), TOBN(0x5ab8ff4c, 0x02938188), + TOBN(0x33e16056, 0x35edc56d), TOBN(0x5a69d349, 0x7e940d79), + TOBN(0x6c4fd001, 0x03866dcb), TOBN(0x20a38f57, 0x4893cdef), + TOBN(0xfbf3e790, 0xfac3a15b), TOBN(0x6ed7ea2e, 0x7a4f8e6b), + TOBN(0xa663eb4f, 0xbc3aca86), TOBN(0x22061ea5, 0x080d53f7), + TOBN(0x2480dfe6, 0xf546783f), TOBN(0xd38bc6da, 0x5a0a641e), + TOBN(0xfb093cd1, 0x2ede8965), TOBN(0x89654db4, 0xacb455cf), + TOBN(0x413cbf9a, 0x26e1adee), TOBN(0x291f3764, 0x373294d4), + TOBN(0x00797257, 0x648083fe), TOBN(0x25f504d3, 0x208cc341), + TOBN(0x635a8e5e, 0xc3a0ee43), TOBN(0x70aaebca, 0x679898ff), + TOBN(0x9ee9f547, 0x5dc63d56), TOBN(0xce987966, 0xffb34d00), + TOBN(0xf9f86b19, 0x5e26310a), TOBN(0x9e435484, 0x382a8ca8), + TOBN(0x253bcb81, 0xc2352fe4), TOBN(0xa4eac8b0, 0x4474b571), + TOBN(0xc1b97512, 0xc1ad8cf8), TOBN(0x193b4e9e, 0x99e0b697), + TOBN(0x939d2716, 0x01e85df0), TOBN(0x4fb265b3, 0xcd44eafd), + TOBN(0x321e7dcd, 0xe51e1ae2), TOBN(0x8e3a8ca6, 0xe3d8b096), + TOBN(0x8de46cb0, 0x52604998), TOBN(0x91099ad8, 0x39072aa7), + TOBN(0x2617f91c, 0x93aa96b8), TOBN(0x0fc8716b, 0x7fca2e13), + TOBN(0xa7106f5e, 0x95328723), TOBN(0xd1c9c40b, 0x262e6522), + TOBN(0xb9bafe86, 0x42b7c094), TOBN(0x1873439d, 0x1543c021), + TOBN(0xe1baa5de, 0x5cbefd5d), TOBN(0xa363fc5e, 0x521e8aff), + TOBN(0xefe6320d, 0xf862eaac), TOBN(0x14419c63, 0x22c647dc), + TOBN(0x0e06707c, 0x4e46d428), TOBN(0xcb6c834f, 0x4a178f8f), + TOBN(0x0f993a45, 0xd30f917c), TOBN(0xd4c4b049, 0x9879afee), + TOBN(0xb6142a1e, 0x70500063), TOBN(0x7c9b41c3, 0xa5d9d605), + TOBN(0xbc00fc2f, 0x2f8ba2c7), TOBN(0x0966eb2f, 0x7c67aa28), + TOBN(0x13f7b516, 0x5a786972), TOBN(0x3bfb7557, 0x8a2fbba0), + TOBN(0x131c4f23, 0x5a2b9620), TOBN(0xbff3ed27, 0x6faf46be), + TOBN(0x9b4473d1, 0x7e172323), TOBN(0x421e8878, 0x339f6246), + TOBN(0x0fa8587a, 0x25a41632), TOBN(0xc0814124, 0xa35b6c93), + TOBN(0x2b18a9f5, 0x59ebb8db), TOBN(0x264e3357, 0x76edb29c), + TOBN(0xaf245ccd, 0xc87c51e2), TOBN(0x16b3015b, 0x501e6214), + TOBN(0xbb31c560, 0x0a3882ce), TOBN(0x6961bb94, 0xfec11e04), + TOBN(0x3b825b8d, 0xeff7a3a0), TOBN(0xbec33738, 0xb1df7326), + TOBN(0x68ad747c, 0x99604a1f), TOBN(0xd154c934, 0x9a3bd499), + TOBN(0xac33506f, 0x1cc7a906), TOBN(0x73bb5392, 0x6c560e8f), + TOBN(0x6428fcbe, 0x263e3944), TOBN(0xc11828d5, 0x1c387434), + TOBN(0x3cd04be1, 0x3e4b12ff), TOBN(0xc3aad9f9, 0x2d88667c), + TOBN(0xc52ddcf8, 0x248120cf), TOBN(0x985a892e, 0x2a389532), + TOBN(0xfbb4b21b, 0x3bb85fa0), TOBN(0xf95375e0, 0x8dfc6269), + TOBN(0xfb4fb06c, 0x7ee2acea), TOBN(0x6785426e, 0x309c4d1f), + TOBN(0x659b17c8, 0xd8ceb147), TOBN(0x9b649eee, 0xb70a5554), + TOBN(0x6b7fa0b5, 0xac6bc634), TOBN(0xd99fe2c7, 0x1d6e732f), + TOBN(0x30e6e762, 0x8d3abba2), TOBN(0x18fee6e7, 0xa797b799), + TOBN(0x5c9d360d, 0xc696464d), TOBN(0xe3baeb48, 0x27bfde12), + TOBN(0x2bf5db47, 0xf23206d5), TOBN(0x2f6d3420, 0x1d260152), + TOBN(0x17b87653, 0x3f8ff89a), TOBN(0x5157c30c, 0x378fa458), + TOBN(0x7517c5c5, 0x2d4fb936), TOBN(0xef22f7ac, 0xe6518cdc), + TOBN(0xdeb483e6, 0xbf847a64), TOBN(0xf5084558, 0x92e0fa89),} + , + {TOBN(0xab9659d8, 0xdf7304d4), TOBN(0xb71bcf1b, 0xff210e8e), + TOBN(0xa9a2438b, 0xd73fbd60), TOBN(0x4595cd1f, 0x5d11b4de), + TOBN(0x9c0d329a, 0x4835859d), TOBN(0x4a0f0d2d, 0x7dbb6e56), + TOBN(0xc6038e5e, 0xdf928a4e), TOBN(0xc9429621, 0x8f5ad154), + TOBN(0x91213462, 0xf23f2d92), TOBN(0x6cab71bd, 0x60b94078), + TOBN(0x6bdd0a63, 0x176cde20), TOBN(0x54c9b20c, 0xee4d54bc), + TOBN(0x3cd2d8aa, 0x9f2ac02f), TOBN(0x03f8e617, 0x206eedb0), + TOBN(0xc7f68e16, 0x93086434), TOBN(0x831469c5, 0x92dd3db9), + TOBN(0x8521df24, 0x8f981354), TOBN(0x587e23ec, 0x3588a259), + TOBN(0xcbedf281, 0xd7a0992c), TOBN(0x06930a55, 0x38961407), + TOBN(0x09320deb, 0xbe5bbe21), TOBN(0xa7ffa5b5, 0x2491817f), + TOBN(0xe6c8b4d9, 0x09065160), TOBN(0xac4f3992, 0xfff6d2a9), + TOBN(0x7aa7a158, 0x3ae9c1bd), TOBN(0xe0af6d98, 0xe37ce240), + TOBN(0xe54342d9, 0x28ab38b4), TOBN(0xe8b75007, 0x0a1c98ca), + TOBN(0xefce86af, 0xe02358f2), TOBN(0x31b8b856, 0xea921228), + TOBN(0x052a1912, 0x0a1c67fc), TOBN(0xb4069ea4, 0xe3aead59), + TOBN(0x3232d6e2, 0x7fa03cb3), TOBN(0xdb938e5b, 0x0fdd7d88), + TOBN(0x04c1d2cd, 0x2ccbfc5d), TOBN(0xd2f45c12, 0xaf3a580f), + TOBN(0x592620b5, 0x7883e614), TOBN(0x5fd27e68, 0xbe7c5f26), + TOBN(0x139e45a9, 0x1567e1e3), TOBN(0x2cc71d2d, 0x44d8aaaf), + TOBN(0x4a9090cd, 0xe36d0757), TOBN(0xf722d7b1, 0xd9a29382), + TOBN(0xfb7fb04c, 0x04b48ddf), TOBN(0x628ad2a7, 0xebe16f43), + TOBN(0xcd3fbfb5, 0x20226040), TOBN(0x6c34ecb1, 0x5104b6c4), + TOBN(0x30c0754e, 0xc903c188), TOBN(0xec336b08, 0x2d23cab0), + TOBN(0x473d62a2, 0x1e206ee5), TOBN(0xf1e27480, 0x8c49a633), + TOBN(0x87ab956c, 0xe9f6b2c3), TOBN(0x61830b48, 0x62b606ea), + TOBN(0x67cd6846, 0xe78e815f), TOBN(0xfe40139f, 0x4c02082a), + TOBN(0x52bbbfcb, 0x952ec365), TOBN(0x74c11642, 0x6b9836ab), + TOBN(0x9f51439e, 0x558df019), TOBN(0x230da4ba, 0xac712b27), + TOBN(0x518919e3, 0x55185a24), TOBN(0x4dcefcdd, 0x84b78f50), + TOBN(0xa7d90fb2, 0xa47d4c5a), TOBN(0x55ac9abf, 0xb30e009e), + TOBN(0xfd2fc359, 0x74eed273), TOBN(0xb72d824c, 0xdbea8faf), + TOBN(0xce721a74, 0x4513e2ca), TOBN(0x0b418612, 0x38240b2c), + TOBN(0x05199968, 0xd5baa450), TOBN(0xeb1757ed, 0x2b0e8c25), + TOBN(0x6ebc3e28, 0x3dfac6d5), TOBN(0xb2431e2e, 0x48a237f5), + TOBN(0x2acb5e23, 0x52f61499), TOBN(0x5558a2a7, 0xe06c936b), + TOBN(0xd213f923, 0xcbb13d1b), TOBN(0x98799f42, 0x5bfb9bfe), + TOBN(0x1ae8ddc9, 0x701144a9), TOBN(0x0b8b3bb6, 0x4c5595ee), + TOBN(0x0ea9ef2e, 0x3ecebb21), TOBN(0x17cb6c4b, 0x3671f9a7), + TOBN(0x47ef464f, 0x726f1d1f), TOBN(0x171b9484, 0x6943a276), + TOBN(0x51a4ae2d, 0x7ef0329c), TOBN(0x08509222, 0x91c4402a), + TOBN(0x64a61d35, 0xafd45bbc), TOBN(0x38f096fe, 0x3035a851), + TOBN(0xc7468b74, 0xa1dec027), TOBN(0xe8cf10e7, 0x4fc7dcba), + TOBN(0xea35ff40, 0xf4a06353), TOBN(0x0b4c0dfa, 0x8b77dd66), + TOBN(0x779b8552, 0xde7e5c19), TOBN(0xfab28609, 0xc1c0256c), + TOBN(0x64f58eee, 0xabd4743d), TOBN(0x4e8ef838, 0x7b6cc93b), + TOBN(0xee650d26, 0x4cb1bf3d), TOBN(0x4c1f9d09, 0x73dedf61), + TOBN(0xaef7c9d7, 0xbfb70ced), TOBN(0x1ec0507e, 0x1641de1e), + TOBN(0xcd7e5cc7, 0xcde45079), TOBN(0xde173c9a, 0x516ac9e4), + TOBN(0x517a8494, 0xc170315c), TOBN(0x438fd905, 0x91d8e8fb), + TOBN(0x5145c506, 0xc7d9630b), TOBN(0x6457a87b, 0xf47d4d75), + TOBN(0xd31646bf, 0x0d9a80e8), TOBN(0x453add2b, 0xcef3aabe), + TOBN(0xc9941109, 0xa607419d), TOBN(0xfaa71e62, 0xbb6bca80), + TOBN(0x34158c13, 0x07c431f3), TOBN(0x594abebc, 0x992bc47a), + TOBN(0x6dfea691, 0xeb78399f), TOBN(0x48aafb35, 0x3f42cba4), + TOBN(0xedcd65af, 0x077c04f0), TOBN(0x1a29a366, 0xe884491a), + TOBN(0x023a40e5, 0x1c21f2bf), TOBN(0xf99a513c, 0xa5057aee), + TOBN(0xa3fe7e25, 0xbcab072e), TOBN(0x8568d2e1, 0x40e32bcf), + TOBN(0x904594eb, 0xd3f69d9f), TOBN(0x181a9733, 0x07affab1), + TOBN(0xe4d68d76, 0xb6e330f4), TOBN(0x87a6dafb, 0xc75a7fc1), + TOBN(0x549db2b5, 0xef7d9289), TOBN(0x2480d4a8, 0x197f015a), + TOBN(0x61d5590b, 0xc40493b6), TOBN(0x3a55b52e, 0x6f780331), + TOBN(0x40eb8115, 0x309eadb0), TOBN(0xdea7de5a, 0x92e5c625), + TOBN(0x64d631f0, 0xcc6a3d5a), TOBN(0x9d5e9d7c, 0x93e8dd61), + TOBN(0xf297bef5, 0x206d3ffc), TOBN(0x23d5e033, 0x7d808bd4), + TOBN(0x4a4f6912, 0xd24cf5ba), TOBN(0xe4d8163b, 0x09cdaa8a), + TOBN(0x0e0de9ef, 0xd3082e8e), TOBN(0x4fe1246c, 0x0192f360), + TOBN(0x1f900150, 0x4b8eee0a), TOBN(0x5219da81, 0xf1da391b), + TOBN(0x7bf6a5c1, 0xf7ea25aa), TOBN(0xd165e6bf, 0xfbb07d5f), + TOBN(0xe3539361, 0x89e78671), TOBN(0xa3fcac89, 0x2bac4219), + TOBN(0xdfab6fd4, 0xf0baa8ab), TOBN(0x5a4adac1, 0xe2c1c2e5), + TOBN(0x6cd75e31, 0x40d85849), TOBN(0xce263fea, 0x19b39181), + TOBN(0xcb6803d3, 0x07032c72), TOBN(0x7f40d5ce, 0x790968c8), + TOBN(0xa6de86bd, 0xdce978f0), TOBN(0x25547c4f, 0x368f751c), + TOBN(0xb1e685fd, 0x65fb2a9e), TOBN(0xce69336f, 0x1eb9179c), + TOBN(0xb15d1c27, 0x12504442), TOBN(0xb7df465c, 0xb911a06b), + TOBN(0xb8d804a3, 0x315980cd), TOBN(0x693bc492, 0xfa3bebf7), + TOBN(0x3578aeee, 0x2253c504), TOBN(0x158de498, 0xcd2474a2), + TOBN(0x1331f5c7, 0xcfda8368), TOBN(0xd2d7bbb3, 0x78d7177e), + TOBN(0xdf61133a, 0xf3c1e46e), TOBN(0x5836ce7d, 0xd30e7be8), + TOBN(0x83084f19, 0x94f834cb), TOBN(0xd35653d4, 0x429ed782), + TOBN(0xa542f16f, 0x59e58243), TOBN(0xc2b52f65, 0x0470a22d), + TOBN(0xe3b6221b, 0x18f23d96), TOBN(0xcb05abac, 0x3f5252b4), + TOBN(0xca00938b, 0x87d61402), TOBN(0x2f186cdd, 0x411933e4), + TOBN(0xe042ece5, 0x9a29a5c5), TOBN(0xb19b3c07, 0x3b6c8402), + TOBN(0xc97667c7, 0x19d92684), TOBN(0xb5624622, 0xebc66372), + TOBN(0x0cb96e65, 0x3c04fa02), TOBN(0x83a7176c, 0x8eaa39aa), + TOBN(0x2033561d, 0xeaa1633f), TOBN(0x45a9d086, 0x4533df73), + TOBN(0xe0542c1d, 0x3dc090bc), TOBN(0x82c996ef, 0xaa59c167), + TOBN(0xe3f735e8, 0x0ee7fc4d), TOBN(0x7b179393, 0x7c35db79), + TOBN(0xb6419e25, 0xf8c5dbfd), TOBN(0x4d9d7a1e, 0x1f327b04), + TOBN(0x979f6f9b, 0x298dfca8), TOBN(0xc7c5dff1, 0x8de9366a), + TOBN(0x1b7a588d, 0x04c82bdd), TOBN(0x68005534, 0xf8319dfd), + TOBN(0xde8a55b5, 0xd8eb9580), TOBN(0x5ea886da, 0x8d5bca81), + TOBN(0xe8530a01, 0x252a0b4d), TOBN(0x1bffb4fe, 0x35eaa0a1), + TOBN(0x2ad828b1, 0xd8e99563), TOBN(0x7de96ef5, 0x95f9cd87), + TOBN(0x4abb2d0c, 0xd77d970c), TOBN(0x03cfb933, 0xd33ef9cb), + TOBN(0xb0547c01, 0x8b211fe9), TOBN(0x2fe64809, 0xa56ed1c6), + TOBN(0xcb7d5624, 0xc2ac98cc), TOBN(0x2a1372c0, 0x1a393e33), + TOBN(0xc8d1ec1c, 0x29660521), TOBN(0xf3d31b04, 0xb37ac3e9), + TOBN(0xa29ae9df, 0x5ece6e7c), TOBN(0x0603ac8f, 0x0facfb55), + TOBN(0xcfe85b7a, 0xdda233a5), TOBN(0xe618919f, 0xbd75f0b8), + TOBN(0xf555a3d2, 0x99bf1603), TOBN(0x1f43afc9, 0xf184255a), + TOBN(0xdcdaf341, 0x319a3e02), TOBN(0xd3b117ef, 0x03903a39), + TOBN(0xe095da13, 0x65d1d131), TOBN(0x86f16367, 0xc37ad03e), + TOBN(0x5f37389e, 0x462cd8dd), TOBN(0xc103fa04, 0xd67a60e6), + TOBN(0x57c34344, 0xf4b478f0), TOBN(0xce91edd8, 0xe117c98d), + TOBN(0x001777b0, 0x231fc12e), TOBN(0x11ae47f2, 0xb207bccb), + TOBN(0xd983cf8d, 0x20f8a242), TOBN(0x7aff5b1d, 0xf22e1ad8), + TOBN(0x68fd11d0, 0x7fc4feb3), TOBN(0x5d53ae90, 0xb0f1c3e1), + TOBN(0x50fb7905, 0xec041803), TOBN(0x85e3c977, 0x14404888), + TOBN(0x0e67faed, 0xac628d8f), TOBN(0x2e865150, 0x6668532c), + TOBN(0x15acaaa4, 0x6a67a6b0), TOBN(0xf4cdee25, 0xb25cec41), + TOBN(0x49ee565a, 0xe4c6701e), TOBN(0x2a04ca66, 0xfc7d63d8), + TOBN(0xeb105018, 0xef0543fb), TOBN(0xf709a4f5, 0xd1b0d81d), + TOBN(0x5b906ee6, 0x2915d333), TOBN(0xf4a87412, 0x96f1f0ab), + TOBN(0xb6b82fa7, 0x4d82f4c2), TOBN(0x90725a60, 0x6804efb3), + TOBN(0xbc82ec46, 0xadc3425e), TOBN(0xb7b80581, 0x2787843e), + TOBN(0xdf46d91c, 0xdd1fc74c), TOBN(0xdc1c62cb, 0xe783a6c4), + TOBN(0x59d1b9f3, 0x1a04cbba), TOBN(0xd87f6f72, 0x95e40764), + TOBN(0x02b4cfc1, 0x317f4a76), TOBN(0x8d2703eb, 0x91036bce), + TOBN(0x98206cc6, 0xa5e72a56), TOBN(0x57be9ed1, 0xcf53fb0f), + TOBN(0x09374571, 0xef0b17ac), TOBN(0x74b2655e, 0xd9181b38), + TOBN(0xc8f80ea8, 0x89935d0e), TOBN(0xc0d9e942, 0x91529936), + TOBN(0x19686041, 0x1e84e0e5), TOBN(0xa5db84d3, 0xaea34c93), + TOBN(0xf9d5bb19, 0x7073a732), TOBN(0xb8d2fe56, 0x6bcfd7c0), + TOBN(0x45775f36, 0xf3eb82fa), TOBN(0x8cb20ccc, 0xfdff8b58), + TOBN(0x1659b65f, 0x8374c110), TOBN(0xb8b4a422, 0x330c789a), + TOBN(0x75e3c3ea, 0x6fe8208b), TOBN(0xbd74b9e4, 0x286e78fe), + TOBN(0x0be2e81b, 0xd7d93a1a), TOBN(0x7ed06e27, 0xdd0a5aae), + TOBN(0x721f5a58, 0x6be8b800), TOBN(0x428299d1, 0xd846db28), + TOBN(0x95cb8e6b, 0x5be88ed3), TOBN(0xc3186b23, 0x1c034e11), + TOBN(0xa6312c9e, 0x8977d99b), TOBN(0xbe944331, 0x83f531e7), + TOBN(0x8232c0c2, 0x18d3b1d4), TOBN(0x617aae8b, 0xe1247b73), + TOBN(0x40153fc4, 0x282aec3b), TOBN(0xc6063d2f, 0xf7b8f823), + TOBN(0x68f10e58, 0x3304f94c), TOBN(0x31efae74, 0xee676346), + TOBN(0xbadb6c6d, 0x40a9b97c), TOBN(0x14702c63, 0x4f666256), + TOBN(0xdeb954f1, 0x5184b2e3), TOBN(0x5184a526, 0x94b6ca40), + TOBN(0xfff05337, 0x003c32ea), TOBN(0x5aa374dd, 0x205974c7), + TOBN(0x9a763854, 0x4b0dd71a), TOBN(0x459cd27f, 0xdeb947ec), + TOBN(0xa6e28161, 0x459c2b92), TOBN(0x2f020fa8, 0x75ee8ef5), + TOBN(0xb132ec2d, 0x30b06310), TOBN(0xc3e15899, 0xbc6a4530), + TOBN(0xdc5f53fe, 0xaa3f451a), TOBN(0x3a3c7f23, 0xc2d9acac), + TOBN(0x2ec2f892, 0x6b27e58b), TOBN(0x68466ee7, 0xd742799f), + TOBN(0x98324dd4, 0x1fa26613), TOBN(0xa2dc6dab, 0xbdc29d63), + TOBN(0xf9675faa, 0xd712d657), TOBN(0x813994be, 0x21fd8d15), + TOBN(0x5ccbb722, 0xfd4f7553), TOBN(0x5135ff8b, 0xf3a36b20), + TOBN(0x44be28af, 0x69559df5), TOBN(0x40b65bed, 0x9d41bf30), + TOBN(0xd98bf2a4, 0x3734e520), TOBN(0x5e3abbe3, 0x209bdcba), + TOBN(0x77c76553, 0xbc945b35), TOBN(0x5331c093, 0xc6ef14aa), + TOBN(0x518ffe29, 0x76b60c80), TOBN(0x2285593b, 0x7ace16f8), + TOBN(0xab1f64cc, 0xbe2b9784), TOBN(0xe8f2c0d9, 0xab2421b6), + TOBN(0x617d7174, 0xc1df065c), TOBN(0xafeeb5ab, 0x5f6578fa), + TOBN(0x16ff1329, 0x263b54a8), TOBN(0x45c55808, 0xc990dce3), + TOBN(0x42eab6c0, 0xecc8c177), TOBN(0x799ea9b5, 0x5982ecaa), + TOBN(0xf65da244, 0xb607ef8e), TOBN(0x8ab226ce, 0x32a3fc2c), + TOBN(0x745741e5, 0x7ea973dc), TOBN(0x5c00ca70, 0x20888f2e), + TOBN(0x7cdce3cf, 0x45fd9cf1), TOBN(0x8a741ef1, 0x5507f872), + TOBN(0x47c51c2f, 0x196b4cec), TOBN(0x70d08e43, 0xc97ea618), + TOBN(0x930da15c, 0x15b18a2b), TOBN(0x33b6c678, 0x2f610514), + TOBN(0xc662e4f8, 0x07ac9794), TOBN(0x1eccf050, 0xba06cb79), + TOBN(0x1ff08623, 0xe7d954e5), TOBN(0x6ef2c5fb, 0x24cf71c3), + TOBN(0xb2c063d2, 0x67978453), TOBN(0xa0cf3796, 0x1d654af8), + TOBN(0x7cb242ea, 0x7ebdaa37), TOBN(0x206e0b10, 0xb86747e0), + TOBN(0x481dae5f, 0xd5ecfefc), TOBN(0x07084fd8, 0xc2bff8fc), + TOBN(0x8040a01a, 0xea324596), TOBN(0x4c646980, 0xd4de4036), + TOBN(0x9eb8ab4e, 0xd65abfc3), TOBN(0xe01cb91f, 0x13541ec7), + TOBN(0x8f029adb, 0xfd695012), TOBN(0x9ae28483, 0x3c7569ec), + TOBN(0xa5614c9e, 0xa66d80a1), TOBN(0x680a3e44, 0x75f5f911), + TOBN(0x0c07b14d, 0xceba4fc1), TOBN(0x891c285b, 0xa13071c1), + TOBN(0xcac67ceb, 0x799ece3c), TOBN(0x29b910a9, 0x41e07e27), + TOBN(0x66bdb409, 0xf2e43123), TOBN(0x06f8b137, 0x7ac9ecbe), + TOBN(0x5981fafd, 0x38547090), TOBN(0x19ab8b9f, 0x85e3415d), + TOBN(0xfc28c194, 0xc7e31b27), TOBN(0x843be0aa, 0x6fbcbb42), + TOBN(0xf3b1ed43, 0xa6db836c), TOBN(0x2a1330e4, 0x01a45c05), + TOBN(0x4f19f3c5, 0x95c1a377), TOBN(0xa85f39d0, 0x44b5ee33), + TOBN(0x3da18e6d, 0x4ae52834), TOBN(0x5a403b39, 0x7423dcb0), + TOBN(0xbb555e0a, 0xf2374aef), TOBN(0x2ad599c4, 0x1e8ca111), + TOBN(0x1b3a2fb9, 0x014b3bf8), TOBN(0x73092684, 0xf66d5007), + TOBN(0x079f1426, 0xc4340102), TOBN(0x1827cf81, 0x8fddf4de), + TOBN(0xc83605f6, 0xf10ff927), TOBN(0xd3871451, 0x23739fc6), + TOBN(0x6d163450, 0xcac1c2cc), TOBN(0x6b521296, 0xa2ec1ac5), + TOBN(0x0606c4f9, 0x6e3cb4a5), TOBN(0xe47d3f41, 0x778abff7), + TOBN(0x425a8d5e, 0xbe8e3a45), TOBN(0x53ea9e97, 0xa6102160), + TOBN(0x477a106e, 0x39cbb688), TOBN(0x532401d2, 0xf3386d32), + TOBN(0x8e564f64, 0xb1b9b421), TOBN(0xca9b8388, 0x81dad33f), + TOBN(0xb1422b4e, 0x2093913e), TOBN(0x533d2f92, 0x69bc8112), + TOBN(0x3fa017be, 0xebe7b2c7), TOBN(0xb2767c4a, 0xcaf197c6), + TOBN(0xc925ff87, 0xaedbae9f), TOBN(0x7daf0eb9, 0x36880a54), + TOBN(0x9284ddf5, 0x9c4d0e71), TOBN(0x1581cf93, 0x316f8cf5), + TOBN(0x3eeca887, 0x3ac1f452), TOBN(0xb417fce9, 0xfb6aeffe), + TOBN(0xa5918046, 0xeefb8dc3), TOBN(0x73d318ac, 0x02209400), + TOBN(0xe800400f, 0x728693e5), TOBN(0xe87d814b, 0x339927ed), + TOBN(0x93e94d3b, 0x57ea9910), TOBN(0xff8a35b6, 0x2245fb69), + TOBN(0x043853d7, 0x7f200d34), TOBN(0x470f1e68, 0x0f653ce1), + TOBN(0x81ac05bd, 0x59a06379), TOBN(0xa14052c2, 0x03930c29), + TOBN(0x6b72fab5, 0x26bc2797), TOBN(0x13670d16, 0x99f16771), + TOBN(0x00170052, 0x1e3e48d1), TOBN(0x978fe401, 0xb7adf678), + TOBN(0x55ecfb92, 0xd41c5dd4), TOBN(0x5ff8e247, 0xc7b27da5), + TOBN(0xe7518272, 0x013fb606), TOBN(0x5768d7e5, 0x2f547a3c), + TOBN(0xbb24eaa3, 0x60017a5f), TOBN(0x6b18e6e4, 0x9c64ce9b), + TOBN(0xc225c655, 0x103dde07), TOBN(0xfc3672ae, 0x7592f7ea), + TOBN(0x9606ad77, 0xd06283a1), TOBN(0x542fc650, 0xe4d59d99), + TOBN(0xabb57c49, 0x2a40e7c2), TOBN(0xac948f13, 0xa8db9f55), + TOBN(0x6d4c9682, 0xb04465c3), TOBN(0xe3d062fa, 0x6468bd15), + TOBN(0xa51729ac, 0x5f318d7e), TOBN(0x1fc87df6, 0x9eb6fc95), + TOBN(0x63d146a8, 0x0591f652), TOBN(0xa861b8f7, 0x589621aa), + TOBN(0x59f5f15a, 0xce31348c), TOBN(0x8f663391, 0x440da6da), + TOBN(0xcfa778ac, 0xb591ffa3), TOBN(0x027ca9c5, 0x4cdfebce), + TOBN(0xbe8e05a5, 0x444ea6b3), TOBN(0x8aab4e69, 0xa78d8254), + TOBN(0x2437f04f, 0xb474d6b8), TOBN(0x6597ffd4, 0x045b3855), + TOBN(0xbb0aea4e, 0xca47ecaa), TOBN(0x568aae83, 0x85c7ebfc), + TOBN(0x0e966e64, 0xc73b2383), TOBN(0x49eb3447, 0xd17d8762), + TOBN(0xde107821, 0x8da05dab), TOBN(0x443d8baa, 0x016b7236), + TOBN(0x163b63a5, 0xea7610d6), TOBN(0xe47e4185, 0xce1ca979), + TOBN(0xae648b65, 0x80baa132), TOBN(0xebf53de2, 0x0e0d5b64), + TOBN(0x8d3bfcb4, 0xd3c8c1ca), TOBN(0x0d914ef3, 0x5d04b309), + TOBN(0x55ef6415, 0x3de7d395), TOBN(0xbde1666f, 0x26b850e8), + TOBN(0xdbe1ca6e, 0xd449ab19), TOBN(0x8902b322, 0xe89a2672), + TOBN(0xb1674b7e, 0xdacb7a53), TOBN(0x8e9faf6e, 0xf52523ff), + TOBN(0x6ba535da, 0x9a85788b), TOBN(0xd21f03ae, 0xbd0626d4), + TOBN(0x099f8c47, 0xe873dc64), TOBN(0xcda8564d, 0x018ec97e), + TOBN(0x3e8d7a5c, 0xde92c68c), TOBN(0x78e035a1, 0x73323cc4), + TOBN(0x3ef26275, 0xf880ff7c), TOBN(0xa4ee3dff, 0x273eedaa), + TOBN(0x58823507, 0xaf4e18f8), TOBN(0x967ec9b5, 0x0672f328), + TOBN(0x9ded19d9, 0x559d3186), TOBN(0x5e2ab3de, 0x6cdce39c), + TOBN(0xabad6e4d, 0x11c226df), TOBN(0xf9783f43, 0x87723014), + TOBN(0x9a49a0cf, 0x1a885719), TOBN(0xfc0c1a5a, 0x90da9dbf), + TOBN(0x8bbaec49, 0x571d92ac), TOBN(0x569e85fe, 0x4692517f), + TOBN(0x8333b014, 0xa14ea4af), TOBN(0x32f2a62f, 0x12e5c5ad), + TOBN(0x98c2ce3a, 0x06d89b85), TOBN(0xb90741aa, 0x2ff77a08), + TOBN(0x2530defc, 0x01f795a2), TOBN(0xd6e5ba0b, 0x84b3c199), + TOBN(0x7d8e8451, 0x12e4c936), TOBN(0xae419f7d, 0xbd0be17b), + TOBN(0xa583fc8c, 0x22262bc9), TOBN(0x6b842ac7, 0x91bfe2bd), + TOBN(0x33cef4e9, 0x440d6827), TOBN(0x5f69f4de, 0xef81fb14), + TOBN(0xf16cf6f6, 0x234fbb92), TOBN(0x76ae3fc3, 0xd9e7e158), + TOBN(0x4e89f6c2, 0xe9740b33), TOBN(0x677bc85d, 0x4962d6a1), + TOBN(0x6c6d8a7f, 0x68d10d15), TOBN(0x5f9a7224, 0x0257b1cd), + TOBN(0x7096b916, 0x4ad85961), TOBN(0x5f8c47f7, 0xe657ab4a), + TOBN(0xde57d7d0, 0xf7461d7e), TOBN(0x7eb6094d, 0x80ce5ee2), + TOBN(0x0b1e1dfd, 0x34190547), TOBN(0x8a394f43, 0xf05dd150), + TOBN(0x0a9eb24d, 0x97df44e6), TOBN(0x78ca06bf, 0x87675719), + TOBN(0x6f0b3462, 0x6ffeec22), TOBN(0x9d91bcea, 0x36cdd8fb), + TOBN(0xac83363c, 0xa105be47), TOBN(0x81ba76c1, 0x069710e3), + TOBN(0x3d1b24cb, 0x28c682c6), TOBN(0x27f25228, 0x8612575b), + TOBN(0xb587c779, 0xe8e66e98), TOBN(0x7b0c03e9, 0x405eb1fe), + TOBN(0xfdf0d030, 0x15b548e7), TOBN(0xa8be76e0, 0x38b36af7), + TOBN(0x4cdab04a, 0x4f310c40), TOBN(0x6287223e, 0xf47ecaec), + TOBN(0x678e6055, 0x8b399320), TOBN(0x61fe3fa6, 0xc01e4646), + TOBN(0xc482866b, 0x03261a5e), TOBN(0xdfcf45b8, 0x5c2f244a), + TOBN(0x8fab9a51, 0x2f684b43), TOBN(0xf796c654, 0xc7220a66), + TOBN(0x1d90707e, 0xf5afa58f), TOBN(0x2c421d97, 0x4fdbe0de), + TOBN(0xc4f4cda3, 0xaf2ebc2f), TOBN(0xa0af843d, 0xcb4efe24), + TOBN(0x53b857c1, 0x9ccd10b1), TOBN(0xddc9d1eb, 0x914d3e04), + TOBN(0x7bdec8bb, 0x62771deb), TOBN(0x829277aa, 0x91c5aa81), + TOBN(0x7af18dd6, 0x832391ae), TOBN(0x1740f316, 0xc71a84ca),} + , + {TOBN(0x8928e99a, 0xeeaf8c49), TOBN(0xee7aa73d, 0x6e24d728), + TOBN(0x4c5007c2, 0xe72b156c), TOBN(0x5fcf57c5, 0xed408a1d), + TOBN(0x9f719e39, 0xb6057604), TOBN(0x7d343c01, 0xc2868bbf), + TOBN(0x2cca254b, 0x7e103e2d), TOBN(0xe6eb38a9, 0xf131bea2), + TOBN(0xb33e624f, 0x8be762b4), TOBN(0x2a9ee4d1, 0x058e3413), + TOBN(0x968e6369, 0x67d805fa), TOBN(0x9848949b, 0x7db8bfd7), + TOBN(0x5308d7e5, 0xd23a8417), TOBN(0x892f3b1d, 0xf3e29da5), + TOBN(0xc95c139e, 0x3dee471f), TOBN(0x8631594d, 0xd757e089), + TOBN(0xe0c82a3c, 0xde918dcc), TOBN(0x2e7b5994, 0x26fdcf4b), + TOBN(0x82c50249, 0x32cb1b2d), TOBN(0xea613a9d, 0x7657ae07), + TOBN(0xc2eb5f6c, 0xf1fdc9f7), TOBN(0xb6eae8b8, 0x879fe682), + TOBN(0x253dfee0, 0x591cbc7f), TOBN(0x000da713, 0x3e1290e6), + TOBN(0x1083e2ea, 0x1f095615), TOBN(0x0a28ad77, 0x14e68c33), + TOBN(0x6bfc0252, 0x3d8818be), TOBN(0xb585113a, 0xf35850cd), + TOBN(0x7d935f0b, 0x30df8aa1), TOBN(0xaddda07c, 0x4ab7e3ac), + TOBN(0x92c34299, 0x552f00cb), TOBN(0xc33ed1de, 0x2909df6c), + TOBN(0x22c2195d, 0x80e87766), TOBN(0x9e99e6d8, 0x9ddf4ac0), + TOBN(0x09642e4e, 0x65e74934), TOBN(0x2610ffa2, 0xff1ff241), + TOBN(0x4d1d47d4, 0x751c8159), TOBN(0x697b4985, 0xaf3a9363), + TOBN(0x0318ca46, 0x87477c33), TOBN(0xa90cb565, 0x9441eff3), + TOBN(0x58bb3848, 0x36f024cb), TOBN(0x85be1f77, 0x36016168), + TOBN(0x6c59587c, 0xdc7e07f1), TOBN(0x191be071, 0xaf1d8f02), + TOBN(0xbf169fa5, 0xcca5e55c), TOBN(0x3864ba3c, 0xf7d04eac), + TOBN(0x915e367f, 0x8d7d05db), TOBN(0xb48a876d, 0xa6549e5d), + TOBN(0xef89c656, 0x580e40a2), TOBN(0xf194ed8c, 0x728068bc), + TOBN(0x74528045, 0xa47990c9), TOBN(0xf53fc7d7, 0x5e1a4649), + TOBN(0xbec5ae9b, 0x78593e7d), TOBN(0x2cac4ee3, 0x41db65d7), + TOBN(0xa8c1eb24, 0x04a3d39b), TOBN(0x53b7d634, 0x03f8f3ef), + TOBN(0x2dc40d48, 0x3e07113c), TOBN(0x6e4a5d39, 0x7d8b63ae), + TOBN(0x5582a94b, 0x79684c2b), TOBN(0x932b33d4, 0x622da26c), + TOBN(0xf534f651, 0x0dbbf08d), TOBN(0x211d07c9, 0x64c23a52), + TOBN(0x0eeece0f, 0xee5bdc9b), TOBN(0xdf178168, 0xf7015558), + TOBN(0xd4294635, 0x0a712229), TOBN(0x93cbe448, 0x09273f8c), + TOBN(0x00b095ef, 0x8f13bc83), TOBN(0xbb741972, 0x8798978c), + TOBN(0x9d7309a2, 0x56dbe6e7), TOBN(0xe578ec56, 0x5a5d39ec), + TOBN(0x3961151b, 0x851f9a31), TOBN(0x2da7715d, 0xe5709eb4), + TOBN(0x867f3017, 0x53dfabf0), TOBN(0x728d2078, 0xb8e39259), + TOBN(0x5c75a0cd, 0x815d9958), TOBN(0xf84867a6, 0x16603be1), + TOBN(0xc865b13d, 0x70e35b1c), TOBN(0x02414468, 0x19b03e2c), + TOBN(0xe46041da, 0xac1f3121), TOBN(0x7c9017ad, 0x6f028a7c), + TOBN(0xabc96de9, 0x0a482873), TOBN(0x4265d6b1, 0xb77e54d4), + TOBN(0x68c38e79, 0xa57d88e7), TOBN(0xd461d766, 0x9ce82de3), + TOBN(0x817a9ec5, 0x64a7e489), TOBN(0xcc5675cd, 0xa0def5f2), + TOBN(0x9a00e785, 0x985d494e), TOBN(0xc626833f, 0x1b03514a), + TOBN(0xabe7905a, 0x83cdd60e), TOBN(0x50602fb5, 0xa1170184), + TOBN(0x689886cd, 0xb023642a), TOBN(0xd568d090, 0xa6e1fb00), + TOBN(0x5b1922c7, 0x0259217f), TOBN(0x93831cd9, 0xc43141e4), + TOBN(0xdfca3587, 0x0c95f86e), TOBN(0xdec2057a, 0x568ae828), + TOBN(0xc44ea599, 0xf98a759a), TOBN(0x55a0a7a2, 0xf7c23c1d), + TOBN(0xd5ffb6e6, 0x94c4f687), TOBN(0x3563cce2, 0x12848478), + TOBN(0x812b3517, 0xe7b1fbe1), TOBN(0x8a7dc979, 0x4f7338e0), + TOBN(0x211ecee9, 0x52d048db), TOBN(0x2eea4056, 0xc86ea3b8), + TOBN(0xd8cb68a7, 0xba772b34), TOBN(0xe16ed341, 0x5f4e2541), + TOBN(0x9b32f6a6, 0x0fec14db), TOBN(0xeee376f7, 0x391698be), + TOBN(0xe9a7aa17, 0x83674c02), TOBN(0x65832f97, 0x5843022a), + TOBN(0x29f3a8da, 0x5ba4990f), TOBN(0x79a59c3a, 0xfb8e3216), + TOBN(0x9cdc4d2e, 0xbd19bb16), TOBN(0xc6c7cfd0, 0xb3262d86), + TOBN(0xd4ce14d0, 0x969c0b47), TOBN(0x1fa352b7, 0x13e56128), + TOBN(0x383d55b8, 0x973db6d3), TOBN(0x71836850, 0xe8e5b7bf), + TOBN(0xc7714596, 0xe6bb571f), TOBN(0x259df31f, 0x2d5b2dd2), + TOBN(0x568f8925, 0x913cc16d), TOBN(0x18bc5b6d, 0xe1a26f5a), + TOBN(0xdfa413be, 0xf5f499ae), TOBN(0xf8835dec, 0xc3f0ae84), + TOBN(0xb6e60bd8, 0x65a40ab0), TOBN(0x65596439, 0x194b377e), + TOBN(0xbcd85625, 0x92084a69), TOBN(0x5ce433b9, 0x4f23ede0), + TOBN(0xe8e8f04f, 0x6ad65143), TOBN(0x11511827, 0xd6e14af6), + TOBN(0x3d390a10, 0x8295c0c7), TOBN(0x71e29ee4, 0x621eba16), + TOBN(0xa588fc09, 0x63717b46), TOBN(0x02be02fe, 0xe06ad4a2), + TOBN(0x931558c6, 0x04c22b22), TOBN(0xbb4d4bd6, 0x12f3c849), + TOBN(0x54a4f496, 0x20efd662), TOBN(0x92ba6d20, 0xc5952d14), + TOBN(0x2db8ea1e, 0xcc9784c2), TOBN(0x81cc10ca, 0x4b353644), + TOBN(0x40b570ad, 0x4b4d7f6c), TOBN(0x5c9f1d96, 0x84a1dcd2), + TOBN(0x01379f81, 0x3147e797), TOBN(0xe5c6097b, 0x2bd499f5), + TOBN(0x40dcafa6, 0x328e5e20), TOBN(0xf7b5244a, 0x54815550), + TOBN(0xb9a4f118, 0x47bfc978), TOBN(0x0ea0e79f, 0xd25825b1), + TOBN(0xa50f96eb, 0x646c7ecf), TOBN(0xeb811493, 0x446dea9d), + TOBN(0x2af04677, 0xdfabcf69), TOBN(0xbe3a068f, 0xc713f6e8), + TOBN(0x860d523d, 0x42e06189), TOBN(0xbf077941, 0x4e3aff13), + TOBN(0x0b616dca, 0xc1b20650), TOBN(0xe66dd6d1, 0x2131300d), + TOBN(0xd4a0fd67, 0xff99abde), TOBN(0xc9903550, 0xc7aac50d), + TOBN(0x022ecf8b, 0x7c46b2d7), TOBN(0x3333b1e8, 0x3abf92af), + TOBN(0x11cc113c, 0x6c491c14), TOBN(0x05976688, 0x80dd3f88), + TOBN(0xf5b4d9e7, 0x29d932ed), TOBN(0xe982aad8, 0xa2c38b6d), + TOBN(0x6f925347, 0x8be0dcf0), TOBN(0x700080ae, 0x65ca53f2), + TOBN(0xd8131156, 0x443ca77f), TOBN(0xe92d6942, 0xec51f984), + TOBN(0xd2a08af8, 0x85dfe9ae), TOBN(0xd825d9a5, 0x4d2a86ca), + TOBN(0x2c53988d, 0x39dff020), TOBN(0xf38b135a, 0x430cdc40), + TOBN(0x0c918ae0, 0x62a7150b), TOBN(0xf31fd8de, 0x0c340e9b), + TOBN(0xafa0e7ae, 0x4dbbf02e), TOBN(0x5847fb2a, 0x5eba6239), + TOBN(0x6b1647dc, 0xdccbac8b), TOBN(0xb642aa78, 0x06f485c8), + TOBN(0x873f3765, 0x7038ecdf), TOBN(0x2ce5e865, 0xfa49d3fe), + TOBN(0xea223788, 0xc98c4400), TOBN(0x8104a8cd, 0xf1fa5279), + TOBN(0xbcf7cc7a, 0x06becfd7), TOBN(0x49424316, 0xc8f974ae), + TOBN(0xc0da65e7, 0x84d6365d), TOBN(0xbcb7443f, 0x8f759fb8), + TOBN(0x35c712b1, 0x7ae81930), TOBN(0x80428dff, 0x4c6e08ab), + TOBN(0xf19dafef, 0xa4faf843), TOBN(0xced8538d, 0xffa9855f), + TOBN(0x20ac409c, 0xbe3ac7ce), TOBN(0x358c1fb6, 0x882da71e), + TOBN(0xafa9c0e5, 0xfd349961), TOBN(0x2b2cfa51, 0x8421c2fc), + TOBN(0x2a80db17, 0xf3a28d38), TOBN(0xa8aba539, 0x5d138e7e), + TOBN(0x52012d1d, 0x6e96eb8d), TOBN(0x65d8dea0, 0xcbaf9622), + TOBN(0x57735447, 0xb264f56c), TOBN(0xbeebef3f, 0x1b6c8da2), + TOBN(0xfc346d98, 0xce785254), TOBN(0xd50e8d72, 0xbb64a161), + TOBN(0xc03567c7, 0x49794add), TOBN(0x15a76065, 0x752c7ef6), + TOBN(0x59f3a222, 0x961f23d6), TOBN(0x378e4438, 0x73ecc0b0), + TOBN(0xc74be434, 0x5a82fde4), TOBN(0xae509af2, 0xd8b9cf34), + TOBN(0x4a61ee46, 0x577f44a1), TOBN(0xe09b748c, 0xb611deeb), + TOBN(0xc0481b2c, 0xf5f7b884), TOBN(0x35626678, 0x61acfa6b), + TOBN(0x37f4c518, 0xbf8d21e6), TOBN(0x22d96531, 0xb205a76d), + TOBN(0x37fb85e1, 0x954073c0), TOBN(0xbceafe4f, 0x65b3a567), + TOBN(0xefecdef7, 0xbe42a582), TOBN(0xd3fc6080, 0x65046be6), + TOBN(0xc9af13c8, 0x09e8dba9), TOBN(0x1e6c9847, 0x641491ff), + TOBN(0x3b574925, 0xd30c31f7), TOBN(0xb7eb72ba, 0xac2a2122), + TOBN(0x776a0dac, 0xef0859e7), TOBN(0x06fec314, 0x21900942), + TOBN(0x2464bc10, 0xf8c22049), TOBN(0x9bfbcce7, 0x875ebf69), + TOBN(0xd7a88e2a, 0x4336326b), TOBN(0xda05261c, 0x5bc2acfa), + TOBN(0xc29f5bdc, 0xeba7efc8), TOBN(0x471237ca, 0x25dbbf2e), + TOBN(0xa72773f2, 0x2975f127), TOBN(0xdc744e8e, 0x04d0b326), + TOBN(0x38a7ed16, 0xa56edb73), TOBN(0x64357e37, 0x2c007e70), + TOBN(0xa167d15b, 0x5080b400), TOBN(0x07b41164, 0x23de4be1), + TOBN(0xb2d91e32, 0x74c89883), TOBN(0x3c162821, 0x2882e7ed), + TOBN(0xad6b36ba, 0x7503e482), TOBN(0x48434e8e, 0x0ea34331), + TOBN(0x79f4f24f, 0x2c7ae0b9), TOBN(0xc46fbf81, 0x1939b44a), + TOBN(0x76fefae8, 0x56595eb1), TOBN(0x417b66ab, 0xcd5f29c7), + TOBN(0x5f2332b2, 0xc5ceec20), TOBN(0xd69661ff, 0xe1a1cae2), + TOBN(0x5ede7e52, 0x9b0286e6), TOBN(0x9d062529, 0xe276b993), + TOBN(0x324794b0, 0x7e50122b), TOBN(0xdd744f8b, 0x4af07ca5), + TOBN(0x30a12f08, 0xd63fc97b), TOBN(0x39650f1a, 0x76626d9d), + TOBN(0x101b47f7, 0x1fa38477), TOBN(0x3d815f19, 0xd4dc124f), + TOBN(0x1569ae95, 0xb26eb58a), TOBN(0xc3cde188, 0x95fb1887), + TOBN(0x54e9f37b, 0xf9539a48), TOBN(0xb0100e06, 0x7408c1a5), + TOBN(0x821d9811, 0xea580cbb), TOBN(0x8af52d35, 0x86e50c56), + TOBN(0xdfbd9d47, 0xdbbf698b), TOBN(0x2961a1ea, 0x03dc1c73), + TOBN(0x203d38f8, 0xe76a5df8), TOBN(0x08a53a68, 0x6def707a), + TOBN(0x26eefb48, 0x1bee45d4), TOBN(0xb3cee346, 0x3c688036), + TOBN(0x463c5315, 0xc42f2469), TOBN(0x19d84d2e, 0x81378162), + TOBN(0x22d7c3c5, 0x1c4d349f), TOBN(0x65965844, 0x163d59c5), + TOBN(0xcf198c56, 0xb8abceae), TOBN(0x6fb1fb1b, 0x628559d5), + TOBN(0x8bbffd06, 0x07bf8fe3), TOBN(0x46259c58, 0x3467734b), + TOBN(0xd8953cea, 0x35f7f0d3), TOBN(0x1f0bece2, 0xd65b0ff1), + TOBN(0xf7d5b4b3, 0xf3c72914), TOBN(0x29e8ea95, 0x3cb53389), + TOBN(0x4a365626, 0x836b6d46), TOBN(0xe849f910, 0xea174fde), + TOBN(0x7ec62fbb, 0xf4737f21), TOBN(0xd8dba5ab, 0x6209f5ac), + TOBN(0x24b5d7a9, 0xa5f9adbe), TOBN(0x707d28f7, 0xa61dc768), + TOBN(0x7711460b, 0xcaa999ea), TOBN(0xba7b174d, 0x1c92e4cc), + TOBN(0x3c4bab66, 0x18d4bf2d), TOBN(0xb8f0c980, 0xeb8bd279), + TOBN(0x024bea9a, 0x324b4737), TOBN(0xfba9e423, 0x32a83bca), + TOBN(0x6e635643, 0xa232dced), TOBN(0x99619367, 0x2571c8ba), + TOBN(0xe8c9f357, 0x54b7032b), TOBN(0xf936b3ba, 0x2442d54a), + TOBN(0x2263f0f0, 0x8290c65a), TOBN(0x48989780, 0xee2c7fdb), + TOBN(0xadc5d55a, 0x13d4f95e), TOBN(0x737cff85, 0xad9b8500), + TOBN(0x271c557b, 0x8a73f43d), TOBN(0xbed617a4, 0xe18bc476), + TOBN(0x66245401, 0x7dfd8ab2), TOBN(0xae7b89ae, 0x3a2870aa), + TOBN(0x1b555f53, 0x23a7e545), TOBN(0x6791e247, 0xbe057e4c), + TOBN(0x860136ad, 0x324fa34d), TOBN(0xea111447, 0x4cbeae28), + TOBN(0x023a4270, 0xbedd3299), TOBN(0x3d5c3a7f, 0xc1c35c34), + TOBN(0xb0f6db67, 0x8d0412d2), TOBN(0xd92625e2, 0xfcdc6b9a), + TOBN(0x92ae5ccc, 0x4e28a982), TOBN(0xea251c36, 0x47a3ce7e), + TOBN(0x9d658932, 0x790691bf), TOBN(0xed610589, 0x06b736ae), + TOBN(0x712c2f04, 0xc0d63b6e), TOBN(0x5cf06fd5, 0xc63d488f), + TOBN(0x97363fac, 0xd9588e41), TOBN(0x1f9bf762, 0x2b93257e), + TOBN(0xa9d1ffc4, 0x667acace), TOBN(0x1cf4a1aa, 0x0a061ecf), + TOBN(0x40e48a49, 0xdc1818d0), TOBN(0x0643ff39, 0xa3621ab0), + TOBN(0x5768640c, 0xe39ef639), TOBN(0x1fc099ea, 0x04d86854), + TOBN(0x9130b9c3, 0xeccd28fd), TOBN(0xd743cbd2, 0x7eec54ab), + TOBN(0x052b146f, 0xe5b475b6), TOBN(0x058d9a82, 0x900a7d1f), + TOBN(0x65e02292, 0x91262b72), TOBN(0x96f924f9, 0xbb0edf03), + TOBN(0x5cfa59c8, 0xfe206842), TOBN(0xf6037004, 0x5eafa720), + TOBN(0x5f30699e, 0x18d7dd96), TOBN(0x381e8782, 0xcbab2495), + TOBN(0x91669b46, 0xdd8be949), TOBN(0xb40606f5, 0x26aae8ef), + TOBN(0x2812b839, 0xfc6751a4), TOBN(0x16196214, 0xfba800ef), + TOBN(0x4398d5ca, 0x4c1a2875), TOBN(0x720c00ee, 0x653d8349), + TOBN(0xc2699eb0, 0xd820007c), TOBN(0x880ee660, 0xa39b5825), + TOBN(0x70694694, 0x471f6984), TOBN(0xf7d16ea8, 0xe3dda99a), + TOBN(0x28d675b2, 0xc0519a23), TOBN(0x9ebf94fe, 0x4f6952e3), + TOBN(0xf28bb767, 0xa2294a8a), TOBN(0x85512b4d, 0xfe0af3f5), + TOBN(0x18958ba8, 0x99b16a0d), TOBN(0x95c2430c, 0xba7548a7), + TOBN(0xb30d1b10, 0xa16be615), TOBN(0xe3ebbb97, 0x85bfb74c), + TOBN(0xa3273cfe, 0x18549fdb), TOBN(0xf6e200bf, 0x4fcdb792), + TOBN(0x54a76e18, 0x83aba56c), TOBN(0x73ec66f6, 0x89ef6aa2), + TOBN(0x8d17add7, 0xd1b9a305), TOBN(0xa959c5b9, 0xb7ae1b9d), + TOBN(0x88643522, 0x6bcc094a), TOBN(0xcc5616c4, 0xd7d429b9), + TOBN(0xa6dada01, 0xe6a33f7c), TOBN(0xc6217a07, 0x9d4e70ad), + TOBN(0xd619a818, 0x09c15b7c), TOBN(0xea06b329, 0x0e80c854), + TOBN(0x174811ce, 0xa5f5e7b9), TOBN(0x66dfc310, 0x787c65f4), + TOBN(0x4ea7bd69, 0x3316ab54), TOBN(0xc12c4acb, 0x1dcc0f70), + TOBN(0xe4308d1a, 0x1e407dd9), TOBN(0xe8a3587c, 0x91afa997), + TOBN(0xea296c12, 0xab77b7a5), TOBN(0xb5ad49e4, 0x673c0d52), + TOBN(0x40f9b2b2, 0x7006085a), TOBN(0xa88ff340, 0x87bf6ec2), + TOBN(0x978603b1, 0x4e3066a6), TOBN(0xb3f99fc2, 0xb5e486e2), + TOBN(0x07b53f5e, 0xb2e63645), TOBN(0xbe57e547, 0x84c84232), + TOBN(0xd779c216, 0x7214d5cf), TOBN(0x617969cd, 0x029a3aca), + TOBN(0xd17668cd, 0x8a7017a0), TOBN(0x77b4d19a, 0xbe9b7ee8), + TOBN(0x58fd0e93, 0x9c161776), TOBN(0xa8c4f4ef, 0xd5968a72), + TOBN(0x296071cc, 0x67b3de77), TOBN(0xae3c0b8e, 0x634f7905), + TOBN(0x67e440c2, 0x8a7100c9), TOBN(0xbb8c3c1b, 0xeb4b9b42), + TOBN(0x6d71e8ea, 0xc51b3583), TOBN(0x7591f5af, 0x9525e642), + TOBN(0xf73a2f7b, 0x13f509f3), TOBN(0x618487aa, 0x5619ac9b), + TOBN(0x3a72e5f7, 0x9d61718a), TOBN(0x00413bcc, 0x7592d28c), + TOBN(0x7d9b11d3, 0x963c35cf), TOBN(0x77623bcf, 0xb90a46ed), + TOBN(0xdeef273b, 0xdcdd2a50), TOBN(0x4a741f9b, 0x0601846e), + TOBN(0x33b89e51, 0x0ec6e929), TOBN(0xcb02319f, 0x8b7f22cd), + TOBN(0xbbe1500d, 0x084bae24), TOBN(0x2f0ae8d7, 0x343d2693), + TOBN(0xacffb5f2, 0x7cdef811), TOBN(0xaa0c030a, 0x263fb94f), + TOBN(0x6eef0d61, 0xa0f442de), TOBN(0xf92e1817, 0x27b139d3), + TOBN(0x1ae6deb7, 0x0ad8bc28), TOBN(0xa89e38dc, 0xc0514130), + TOBN(0x81eeb865, 0xd2fdca23), TOBN(0x5a15ee08, 0xcc8ef895), + TOBN(0x768fa10a, 0x01905614), TOBN(0xeff5b8ef, 0x880ee19b), + TOBN(0xf0c0cabb, 0xcb1c8a0e), TOBN(0x2e1ee9cd, 0xb8c838f9), + TOBN(0x0587d8b8, 0x8a4a14c0), TOBN(0xf6f27896, 0x2ff698e5), + TOBN(0xed38ef1c, 0x89ee6256), TOBN(0xf44ee1fe, 0x6b353b45), + TOBN(0x9115c0c7, 0x70e903b3), TOBN(0xc78ec0a1, 0x818f31df), + TOBN(0x6c003324, 0xb7dccbc6), TOBN(0xd96dd1f3, 0x163bbc25), + TOBN(0x33aa82dd, 0x5cedd805), TOBN(0x123aae4f, 0x7f7eb2f1), + TOBN(0x1723fcf5, 0xa26262cd), TOBN(0x1f7f4d5d, 0x0060ebd5), + TOBN(0xf19c5c01, 0xb2eaa3af), TOBN(0x2ccb9b14, 0x9790accf), + TOBN(0x1f9c1cad, 0x52324aa6), TOBN(0x63200526, 0x7247df54), + TOBN(0x5732fe42, 0xbac96f82), TOBN(0x52fe771f, 0x01a1c384), + TOBN(0x546ca13d, 0xb1001684), TOBN(0xb56b4eee, 0xa1709f75), + TOBN(0x266545a9, 0xd5db8672), TOBN(0xed971c90, 0x1e8f3cfb), + TOBN(0x4e7d8691, 0xe3a07b29), TOBN(0x7570d9ec, 0xe4b696b9), + TOBN(0xdc5fa067, 0x7bc7e9ae), TOBN(0x68b44caf, 0xc82c4844), + TOBN(0x519d34b3, 0xbf44da80), TOBN(0x283834f9, 0x5ab32e66), + TOBN(0x6e608797, 0x6278a000), TOBN(0x1e62960e, 0x627312f6), + TOBN(0x9b87b27b, 0xe6901c55), TOBN(0x80e78538, 0x24fdbc1f), + TOBN(0xbbbc0951, 0x2facc27d), TOBN(0x06394239, 0xac143b5a), + TOBN(0x35bb4a40, 0x376c1944), TOBN(0x7cb62694, 0x63da1511), + TOBN(0xafd29161, 0xb7148a3b), TOBN(0xa6f9d9ed, 0x4e2ea2ee), + TOBN(0x15dc2ca2, 0x880dd212), TOBN(0x903c3813, 0xa61139a9), + TOBN(0x2aa7b46d, 0x6c0f8785), TOBN(0x36ce2871, 0x901c60ff), + TOBN(0xc683b028, 0xe10d9c12), TOBN(0x7573baa2, 0x032f33d3), + TOBN(0x87a9b1f6, 0x67a31b58), TOBN(0xfd3ed11a, 0xf4ffae12), + TOBN(0x83dcaa9a, 0x0cb2748e), TOBN(0x8239f018, 0x5d6fdf16), + TOBN(0xba67b49c, 0x72753941), TOBN(0x2beec455, 0xc321cb36), + TOBN(0x88015606, 0x3f8b84ce), TOBN(0x76417083, 0x8d38c86f), + TOBN(0x054f1ca7, 0x598953dd), TOBN(0xc939e110, 0x4e8e7429), + TOBN(0x9b1ac2b3, 0x5a914f2f), TOBN(0x39e35ed3, 0xe74b8f9c), + TOBN(0xd0debdb2, 0x781b2fb0), TOBN(0x1585638f, 0x2d997ba2), + TOBN(0x9c4b646e, 0x9e2fce99), TOBN(0x68a21081, 0x1e80857f), + TOBN(0x06d54e44, 0x3643b52a), TOBN(0xde8d6d63, 0x0d8eb843), + TOBN(0x70321563, 0x42146a0a), TOBN(0x8ba826f2, 0x5eaa3622), + TOBN(0x227a58bd, 0x86138787), TOBN(0x43b6c03c, 0x10281d37), + TOBN(0x6326afbb, 0xb54dde39), TOBN(0x744e5e8a, 0xdb6f2d5f), + TOBN(0x48b2a99a, 0xcff158e1), TOBN(0xa93c8fa0, 0xef87918f), + TOBN(0x2182f956, 0xde058c5c), TOBN(0x216235d2, 0x936f9e7a), + TOBN(0xace0c0db, 0xd2e31e67), TOBN(0xc96449bf, 0xf23ac3e7), + TOBN(0x7e9a2874, 0x170693bd), TOBN(0xa28e14fd, 0xa45e6335), + TOBN(0x5757f6b3, 0x56427344), TOBN(0x822e4556, 0xacf8edf9), + TOBN(0x2b7a6ee2, 0xe6a285cd), TOBN(0x5866f211, 0xa9df3af0), + TOBN(0x40dde2dd, 0xf845b844), TOBN(0x986c3726, 0x110e5e49), + TOBN(0x73680c2a, 0xf7172277), TOBN(0x57b94f0f, 0x0cccb244), + TOBN(0xbdff7267, 0x2d438ca7), TOBN(0xbad1ce11, 0xcf4663fd), + TOBN(0x9813ed9d, 0xd8f71cae), TOBN(0xf43272a6, 0x961fdaa6), + TOBN(0xbeff0119, 0xbd6d1637), TOBN(0xfebc4f91, 0x30361978), + TOBN(0x02b37a95, 0x2f41deff), TOBN(0x0e44a59a, 0xe63b89b7), + TOBN(0x673257dc, 0x143ff951), TOBN(0x19c02205, 0xd752baf4), + TOBN(0x46c23069, 0xc4b7d692), TOBN(0x2e6392c3, 0xfd1502ac), + TOBN(0x6057b1a2, 0x1b220846), TOBN(0xe51ff946, 0x0c1b5b63),} + , + {TOBN(0x6e85cb51, 0x566c5c43), TOBN(0xcff9c919, 0x3597f046), + TOBN(0x9354e90c, 0x4994d94a), TOBN(0xe0a39332, 0x2147927d), + TOBN(0x8427fac1, 0x0dc1eb2b), TOBN(0x88cfd8c2, 0x2ff319fa), + TOBN(0xe2d4e684, 0x01965274), TOBN(0xfa2e067d, 0x67aaa746), + TOBN(0xb6d92a7f, 0x3e5f9f11), TOBN(0x9afe153a, 0xd6cb3b8e), + TOBN(0x4d1a6dd7, 0xddf800bd), TOBN(0xf6c13cc0, 0xcaf17e19), + TOBN(0x15f6c58e, 0x325fc3ee), TOBN(0x71095400, 0xa31dc3b2), + TOBN(0x168e7c07, 0xafa3d3e7), TOBN(0x3f8417a1, 0x94c7ae2d), + TOBN(0xec234772, 0x813b230d), TOBN(0x634d0f5f, 0x17344427), + TOBN(0x11548ab1, 0xd77fc56a), TOBN(0x7fab1750, 0xce06af77), + TOBN(0xb62c10a7, 0x4f7c4f83), TOBN(0xa7d2edc4, 0x220a67d9), + TOBN(0x1c404170, 0x921209a0), TOBN(0x0b9815a0, 0xface59f0), + TOBN(0x2842589b, 0x319540c3), TOBN(0x18490f59, 0xa283d6f8), + TOBN(0xa2731f84, 0xdaae9fcb), TOBN(0x3db6d960, 0xc3683ba0), + TOBN(0xc85c63bb, 0x14611069), TOBN(0xb19436af, 0x0788bf05), + TOBN(0x905459df, 0x347460d2), TOBN(0x73f6e094, 0xe11a7db1), + TOBN(0xdc7f938e, 0xb6357f37), TOBN(0xc5d00f79, 0x2bd8aa62), + TOBN(0xc878dcb9, 0x2ca979fc), TOBN(0x37e83ed9, 0xeb023a99), + TOBN(0x6b23e273, 0x1560bf3d), TOBN(0x1086e459, 0x1d0fae61), + TOBN(0x78248316, 0x9a9414bd), TOBN(0x1b956bc0, 0xf0ea9ea1), + TOBN(0x7b85bb91, 0xc31b9c38), TOBN(0x0c5aa90b, 0x48ef57b5), + TOBN(0xdedeb169, 0xaf3bab6f), TOBN(0xe610ad73, 0x2d373685), + TOBN(0xf13870df, 0x02ba8e15), TOBN(0x0337edb6, 0x8ca7f771), + TOBN(0xe4acf747, 0xb62c036c), TOBN(0xd921d576, 0xb6b94e81), + TOBN(0xdbc86439, 0x2c422f7a), TOBN(0xfb635362, 0xed348898), + TOBN(0x83084668, 0xc45bfcd1), TOBN(0xc357c9e3, 0x2b315e11), + TOBN(0xb173b540, 0x5b2e5b8c), TOBN(0x7e946931, 0xe102b9a4), + TOBN(0x17c890eb, 0x7b0fb199), TOBN(0xec225a83, 0xd61b662b), + TOBN(0xf306a3c8, 0xee3c76cb), TOBN(0x3cf11623, 0xd32a1f6e), + TOBN(0xe6d5ab64, 0x6863e956), TOBN(0x3b8a4cbe, 0x5c005c26), + TOBN(0xdcd529a5, 0x9ce6bb27), TOBN(0xc4afaa52, 0x04d4b16f), + TOBN(0xb0624a26, 0x7923798d), TOBN(0x85e56df6, 0x6b307fab), + TOBN(0x0281893c, 0x2bf29698), TOBN(0x91fc19a4, 0xd7ce7603), + TOBN(0x75a5dca3, 0xad9a558f), TOBN(0x40ceb3fa, 0x4d50bf77), + TOBN(0x1baf6060, 0xbc9ba369), TOBN(0x927e1037, 0x597888c2), + TOBN(0xd936bf19, 0x86a34c07), TOBN(0xd4cf10c1, 0xc34ae980), + TOBN(0x3a3e5334, 0x859dd614), TOBN(0x9c475b5b, 0x18d0c8ee), + TOBN(0x63080d1f, 0x07cd51d5), TOBN(0xc9c0d0a6, 0xb88b4326), + TOBN(0x1ac98691, 0xc234296f), TOBN(0x2a0a83a4, 0x94887fb6), + TOBN(0x56511427, 0x0cea9cf2), TOBN(0x5230a6e8, 0xa24802f5), + TOBN(0xf7a2bf0f, 0x72e3d5c1), TOBN(0x37717446, 0x4f21439e), + TOBN(0xfedcbf25, 0x9ce30334), TOBN(0xe0030a78, 0x7ce202f9), + TOBN(0x6f2d9ebf, 0x1202e9ca), TOBN(0xe79dde6c, 0x75e6e591), + TOBN(0xf52072af, 0xf1dac4f8), TOBN(0x6c8d087e, 0xbb9b404d), + TOBN(0xad0fc73d, 0xbce913af), TOBN(0x909e587b, 0x458a07cb), + TOBN(0x1300da84, 0xd4f00c8a), TOBN(0x425cd048, 0xb54466ac), + TOBN(0xb59cb9be, 0x90e9d8bf), TOBN(0x991616db, 0x3e431b0e), + TOBN(0xd3aa117a, 0x531aecff), TOBN(0x91af92d3, 0x59f4dc3b), + TOBN(0x9b1ec292, 0xe93fda29), TOBN(0x76bb6c17, 0xe97d91bc), + TOBN(0x7509d95f, 0xaface1e6), TOBN(0x3653fe47, 0xbe855ae3), + TOBN(0x73180b28, 0x0f680e75), TOBN(0x75eefd1b, 0xeeb6c26c), + TOBN(0xa4cdf29f, 0xb66d4236), TOBN(0x2d70a997, 0x6b5821d8), + TOBN(0x7a3ee207, 0x20445c36), TOBN(0x71d1ac82, 0x59877174), + TOBN(0x0fc539f7, 0x949f73e9), TOBN(0xd05cf3d7, 0x982e3081), + TOBN(0x8758e20b, 0x7b1c7129), TOBN(0xffadcc20, 0x569e61f2), + TOBN(0xb05d3a2f, 0x59544c2d), TOBN(0xbe16f5c1, 0x9fff5e53), + TOBN(0x73cf65b8, 0xaad58135), TOBN(0x622c2119, 0x037aa5be), + TOBN(0x79373b3f, 0x646fd6a0), TOBN(0x0e029db5, 0x0d3978cf), + TOBN(0x8bdfc437, 0x94fba037), TOBN(0xaefbd687, 0x620797a6), + TOBN(0x3fa5382b, 0xbd30d38e), TOBN(0x7627cfbf, 0x585d7464), + TOBN(0xb2330fef, 0x4e4ca463), TOBN(0xbcef7287, 0x3566cc63), + TOBN(0xd161d2ca, 0xcf780900), TOBN(0x135dc539, 0x5b54827d), + TOBN(0x638f052e, 0x27bf1bc6), TOBN(0x10a224f0, 0x07dfa06c), + TOBN(0xe973586d, 0x6d3321da), TOBN(0x8b0c5738, 0x26152c8f), + TOBN(0x07ef4f2a, 0x34606074), TOBN(0x80fe7fe8, 0xa0f7047a), + TOBN(0x3d1a8152, 0xe1a0e306), TOBN(0x32cf43d8, 0x88da5222), + TOBN(0xbf89a95f, 0x5f02ffe6), TOBN(0x3d9eb9a4, 0x806ad3ea), + TOBN(0x012c17bb, 0x79c8e55e), TOBN(0xfdcd1a74, 0x99c81dac), + TOBN(0x7043178b, 0xb9556098), TOBN(0x4090a1df, 0x801c3886), + TOBN(0x759800ff, 0x9b67b912), TOBN(0x3e5c0304, 0x232620c8), + TOBN(0x4b9d3c4b, 0x70dceeca), TOBN(0xbb2d3c15, 0x181f648e), + TOBN(0xf981d837, 0x6e33345c), TOBN(0xb626289b, 0x0cf2297a), + TOBN(0x766ac659, 0x8baebdcf), TOBN(0x1a28ae09, 0x75df01e5), + TOBN(0xb71283da, 0x375876d8), TOBN(0x4865a96d, 0x607b9800), + TOBN(0x25dd1bcd, 0x237936b2), TOBN(0x332f4f4b, 0x60417494), + TOBN(0xd0923d68, 0x370a2147), TOBN(0x497f5dfb, 0xdc842203), + TOBN(0x9dc74cbd, 0x32be5e0f), TOBN(0x7475bcb7, 0x17a01375), + TOBN(0x438477c9, 0x50d872b1), TOBN(0xcec67879, 0xffe1d63d), + TOBN(0x9b006014, 0xd8578c70), TOBN(0xc9ad99a8, 0x78bb6b8b), + TOBN(0x6799008e, 0x11fb3806), TOBN(0xcfe81435, 0xcd44cab3), + TOBN(0xa2ee1582, 0x2f4fb344), TOBN(0xb8823450, 0x483fa6eb), + TOBN(0x622d323d, 0x652c7749), TOBN(0xd8474a98, 0xbeb0a15b), + TOBN(0xe43c154d, 0x5d1c00d0), TOBN(0x7fd581d9, 0x0e3e7aac), + TOBN(0x2b44c619, 0x2525ddf8), TOBN(0x67a033eb, 0xb8ae9739), + TOBN(0x113ffec1, 0x9ef2d2e4), TOBN(0x1bf6767e, 0xd5a0ea7f), + TOBN(0x57fff75e, 0x03714c0a), TOBN(0xa23c422e, 0x0a23e9ee), + TOBN(0xdd5f6b2d, 0x540f83af), TOBN(0xc2c2c27e, 0x55ea46a7), + TOBN(0xeb6b4246, 0x672a1208), TOBN(0xd13599f7, 0xae634f7a), + TOBN(0xcf914b5c, 0xd7b32c6e), TOBN(0x61a5a640, 0xeaf61814), + TOBN(0x8dc3df8b, 0x208a1bbb), TOBN(0xef627fd6, 0xb6d79aa5), + TOBN(0x44232ffc, 0xc4c86bc8), TOBN(0xe6f9231b, 0x061539fe), + TOBN(0x1d04f25a, 0x958b9533), TOBN(0x180cf934, 0x49e8c885), + TOBN(0x89689595, 0x9884aaf7), TOBN(0xb1959be3, 0x07b348a6), + TOBN(0x96250e57, 0x3c147c87), TOBN(0xae0efb3a, 0xdd0c61f8), + TOBN(0xed00745e, 0xca8c325e), TOBN(0x3c911696, 0xecff3f70), + TOBN(0x73acbc65, 0x319ad41d), TOBN(0x7b01a020, 0xf0b1c7ef), + TOBN(0xea32b293, 0x63a1483f), TOBN(0x89eabe71, 0x7a248f96), + TOBN(0x9c6231d3, 0x343157e5), TOBN(0x93a375e5, 0xdf3c546d), + TOBN(0xe76e9343, 0x6a2afe69), TOBN(0xc4f89100, 0xe166c88e), + TOBN(0x248efd0d, 0x4f872093), TOBN(0xae0eb3ea, 0x8fe0ea61), + TOBN(0xaf89790d, 0x9d79046e), TOBN(0x4d650f2d, 0x6cee0976), + TOBN(0xa3935d9a, 0x43071eca), TOBN(0x66fcd2c9, 0x283b0bfe), + TOBN(0x0e665eb5, 0x696605f1), TOBN(0xe77e5d07, 0xa54cd38d), + TOBN(0x90ee050a, 0x43d950cf), TOBN(0x86ddebda, 0xd32e69b5), + TOBN(0x6ad94a3d, 0xfddf7415), TOBN(0xf7fa1309, 0x3f6e8d5a), + TOBN(0xc4831d1d, 0xe9957f75), TOBN(0x7de28501, 0xd5817447), + TOBN(0x6f1d7078, 0x9e2aeb6b), TOBN(0xba2b9ff4, 0xf67a53c2), + TOBN(0x36963767, 0xdf9defc3), TOBN(0x479deed3, 0x0d38022c), + TOBN(0xd2edb89b, 0x3a8631e8), TOBN(0x8de855de, 0x7a213746), + TOBN(0xb2056cb7, 0xb00c5f11), TOBN(0xdeaefbd0, 0x2c9b85e4), + TOBN(0x03f39a8d, 0xd150892d), TOBN(0x37b84686, 0x218b7985), + TOBN(0x36296dd8, 0xb7375f1a), TOBN(0x472cd4b1, 0xb78e898e), + TOBN(0x15dff651, 0xe9f05de9), TOBN(0xd4045069, 0x2ce98ba9), + TOBN(0x8466a7ae, 0x9b38024c), TOBN(0xb910e700, 0xe5a6b5ef), + TOBN(0xae1c56ea, 0xb3aa8f0d), TOBN(0xbab2a507, 0x7eee74a6), + TOBN(0x0dca11e2, 0x4b4c4620), TOBN(0xfd896e2e, 0x4c47d1f4), + TOBN(0xeb45ae53, 0x308fbd93), TOBN(0x46cd5a2e, 0x02c36fda), + TOBN(0x6a3d4e90, 0xbaa48385), TOBN(0xdd55e62e, 0x9dbe9960), + TOBN(0xa1406aa0, 0x2a81ede7), TOBN(0x6860dd14, 0xf9274ea7), + TOBN(0xcfdcb0c2, 0x80414f86), TOBN(0xff410b10, 0x22f94327), + TOBN(0x5a33cc38, 0x49ad467b), TOBN(0xefb48b6c, 0x0a7335f1), + TOBN(0x14fb54a4, 0xb153a360), TOBN(0x604aa9d2, 0xb52469cc), + TOBN(0x5e9dc486, 0x754e48e9), TOBN(0x693cb455, 0x37471e8e), + TOBN(0xfb2fd7cd, 0x8d3b37b6), TOBN(0x63345e16, 0xcf09ff07), + TOBN(0x9910ba6b, 0x23a5d896), TOBN(0x1fe19e35, 0x7fe4364e), + TOBN(0x6e1da8c3, 0x9a33c677), TOBN(0x15b4488b, 0x29fd9fd0), + TOBN(0x1f439254, 0x1a1f22bf), TOBN(0x920a8a70, 0xab8163e8), + TOBN(0x3fd1b249, 0x07e5658e), TOBN(0xf2c4f79c, 0xb6ec839b), + TOBN(0x1abbc3d0, 0x4aa38d1b), TOBN(0x3b0db35c, 0xb5d9510e), + TOBN(0x1754ac78, 0x3e60dec0), TOBN(0x53272fd7, 0xea099b33), + TOBN(0x5fb0494f, 0x07a8e107), TOBN(0x4a89e137, 0x6a8191fa), + TOBN(0xa113b7f6, 0x3c4ad544), TOBN(0x88a2e909, 0x6cb9897b), + TOBN(0x17d55de3, 0xb44a3f84), TOBN(0xacb2f344, 0x17c6c690), + TOBN(0x32088168, 0x10232390), TOBN(0xf2e8a61f, 0x6c733bf7), + TOBN(0xa774aab6, 0x9c2d7652), TOBN(0xfb5307e3, 0xed95c5bc), + TOBN(0xa05c73c2, 0x4981f110), TOBN(0x1baae31c, 0xa39458c9), + TOBN(0x1def185b, 0xcbea62e7), TOBN(0xe8ac9eae, 0xeaf63059), + TOBN(0x098a8cfd, 0x9921851c), TOBN(0xd959c3f1, 0x3abe2f5b), + TOBN(0xa4f19525, 0x20e40ae5), TOBN(0x320789e3, 0x07a24aa1), + TOBN(0x259e6927, 0x7392b2bc), TOBN(0x58f6c667, 0x1918668b), + TOBN(0xce1db2bb, 0xc55d2d8b), TOBN(0x41d58bb7, 0xf4f6ca56), + TOBN(0x7650b680, 0x8f877614), TOBN(0x905e16ba, 0xf4c349ed), + TOBN(0xed415140, 0xf661acac), TOBN(0x3b8784f0, 0xcb2270af), + TOBN(0x3bc280ac, 0x8a402cba), TOBN(0xd53f7146, 0x0937921a), + TOBN(0xc03c8ee5, 0xe5681e83), TOBN(0x62126105, 0xf6ac9e4a), + TOBN(0x9503a53f, 0x936b1a38), TOBN(0x3d45e2d4, 0x782fecbd), + TOBN(0x69a5c439, 0x76e8ae98), TOBN(0xb53b2eeb, 0xbfb4b00e), + TOBN(0xf1674712, 0x72386c89), TOBN(0x30ca34a2, 0x4268bce4), + TOBN(0x7f1ed86c, 0x78341730), TOBN(0x8ef5beb8, 0xb525e248), + TOBN(0xbbc489fd, 0xb74fbf38), TOBN(0x38a92a0e, 0x91a0b382), + TOBN(0x7a77ba3f, 0x22433ccf), TOBN(0xde8362d6, 0xa29f05a9), + TOBN(0x7f6a30ea, 0x61189afc), TOBN(0x693b5505, 0x59ef114f), + TOBN(0x50266bc0, 0xcd1797a1), TOBN(0xea17b47e, 0xf4b7af2d), + TOBN(0xd6c4025c, 0x3df9483e), TOBN(0x8cbb9d9f, 0xa37b18c9), + TOBN(0x91cbfd9c, 0x4d8424cf), TOBN(0xdb7048f1, 0xab1c3506), + TOBN(0x9eaf641f, 0x028206a3), TOBN(0xf986f3f9, 0x25bdf6ce), + TOBN(0x262143b5, 0x224c08dc), TOBN(0x2bbb09b4, 0x81b50c91), + TOBN(0xc16ed709, 0xaca8c84f), TOBN(0xa6210d9d, 0xb2850ca8), + TOBN(0x6d8df67a, 0x09cb54d6), TOBN(0x91eef6e0, 0x500919a4), + TOBN(0x90f61381, 0x0f132857), TOBN(0x9acede47, 0xf8d5028b), + TOBN(0x844d1b71, 0x90b771c3), TOBN(0x563b71e4, 0xba6426be), + TOBN(0x2efa2e83, 0xbdb802ff), TOBN(0x3410cbab, 0xab5b4a41), + TOBN(0x555b2d26, 0x30da84dd), TOBN(0xd0711ae9, 0xee1cc29a), + TOBN(0xcf3e8c60, 0x2f547792), TOBN(0x03d7d5de, 0xdc678b35), + TOBN(0x071a2fa8, 0xced806b8), TOBN(0x222e6134, 0x697f1478), + TOBN(0xdc16fd5d, 0xabfcdbbf), TOBN(0x44912ebf, 0x121b53b8), + TOBN(0xac943674, 0x2496c27c), TOBN(0x8ea3176c, 0x1ffc26b0), + TOBN(0xb6e224ac, 0x13debf2c), TOBN(0x524cc235, 0xf372a832), + TOBN(0xd706e1d8, 0x9f6f1b18), TOBN(0x2552f005, 0x44cce35b), + TOBN(0x8c8326c2, 0xa88e31fc), TOBN(0xb5468b2c, 0xf9552047), + TOBN(0xce683e88, 0x3ff90f2b), TOBN(0x77947bdf, 0x2f0a5423), + TOBN(0xd0a1b28b, 0xed56e328), TOBN(0xaee35253, 0xc20134ac), + TOBN(0x7e98367d, 0x3567962f), TOBN(0x379ed61f, 0x8188bffb), + TOBN(0x73bba348, 0xfaf130a1), TOBN(0x6c1f75e1, 0x904ed734), + TOBN(0x18956642, 0x3b4a79fc), TOBN(0xf20bc83d, 0x54ef4493), + TOBN(0x836d425d, 0x9111eca1), TOBN(0xe5b5c318, 0x009a8dcf), + TOBN(0x3360b25d, 0x13221bc5), TOBN(0x707baad2, 0x6b3eeaf7), + TOBN(0xd7279ed8, 0x743a95a1), TOBN(0x7450a875, 0x969e809f), + TOBN(0x32b6bd53, 0xe5d0338f), TOBN(0x1e77f7af, 0x2b883bbc), + TOBN(0x90da12cc, 0x1063ecd0), TOBN(0xe2697b58, 0xc315be47), + TOBN(0x2771a5bd, 0xda85d534), TOBN(0x53e78c1f, 0xff980eea), + TOBN(0xadf1cf84, 0x900385e7), TOBN(0x7d3b14f6, 0xc9387b62), + TOBN(0x170e74b0, 0xcb8f2bd2), TOBN(0x2d50b486, 0x827fa993), + TOBN(0xcdbe8c9a, 0xf6f32bab), TOBN(0x55e906b0, 0xc3b93ab8), + TOBN(0x747f22fc, 0x8fe280d1), TOBN(0xcd8e0de5, 0xb2e114ab), + TOBN(0x5ab7dbeb, 0xe10b68b0), TOBN(0x9dc63a9c, 0xa480d4b2), + TOBN(0x78d4bc3b, 0x4be1495f), TOBN(0x25eb3db8, 0x9359122d), + TOBN(0x3f8ac05b, 0x0809cbdc), TOBN(0xbf4187bb, 0xd37c702f), + TOBN(0x84cea069, 0x1416a6a5), TOBN(0x8f860c79, 0x43ef881c), + TOBN(0x41311f8a, 0x38038a5d), TOBN(0xe78c2ec0, 0xfc612067), + TOBN(0x494d2e81, 0x5ad73581), TOBN(0xb4cc9e00, 0x59604097), + TOBN(0xff558aec, 0xf3612cba), TOBN(0x35beef7a, 0x9e36c39e), + TOBN(0x1845c7cf, 0xdbcf41b9), TOBN(0x5703662a, 0xaea997c0), + TOBN(0x8b925afe, 0xe402f6d8), TOBN(0xd0a1b1ae, 0x4dd72162), + TOBN(0x9f47b375, 0x03c41c4b), TOBN(0xa023829b, 0x0391d042), + TOBN(0x5f5045c3, 0x503b8b0a), TOBN(0x123c2688, 0x98c010e5), + TOBN(0x324ec0cc, 0x36ba06ee), TOBN(0xface3115, 0x3dd2cc0c), + TOBN(0xb364f3be, 0xf333e91f), TOBN(0xef8aff73, 0x28e832b0), + TOBN(0x1e9bad04, 0x2d05841b), TOBN(0x42f0e3df, 0x356a21e2), + TOBN(0xa3270bcb, 0x4add627e), TOBN(0xb09a8158, 0xd322e711), + TOBN(0x86e326a1, 0x0fee104a), TOBN(0xad7788f8, 0x3703f65d), + TOBN(0x7e765430, 0x47bc4833), TOBN(0x6cee582b, 0x2b9b893a), + TOBN(0x9cd2a167, 0xe8f55a7b), TOBN(0xefbee3c6, 0xd9e4190d), + TOBN(0x33ee7185, 0xd40c2e9d), TOBN(0x844cc9c5, 0xa380b548), + TOBN(0x323f8ecd, 0x66926e04), TOBN(0x0001e38f, 0x8110c1ba), + TOBN(0x8dbcac12, 0xfc6a7f07), TOBN(0xd65e1d58, 0x0cec0827), + TOBN(0xd2cd4141, 0xbe76ca2d), TOBN(0x7895cf5c, 0xe892f33a), + TOBN(0x956d230d, 0x367139d2), TOBN(0xa91abd3e, 0xd012c4c1), + TOBN(0x34fa4883, 0x87eb36bf), TOBN(0xc5f07102, 0x914b8fb4), + TOBN(0x90f0e579, 0xadb9c95f), TOBN(0xfe6ea8cb, 0x28888195), + TOBN(0x7b9b5065, 0xedfa9284), TOBN(0x6c510bd2, 0x2b8c8d65), + TOBN(0xd7b8ebef, 0xcbe8aafd), TOBN(0xedb3af98, 0x96b1da07), + TOBN(0x28ff779d, 0x6295d426), TOBN(0x0c4f6ac7, 0x3fa3ad7b), + TOBN(0xec44d054, 0x8b8e2604), TOBN(0x9b32a66d, 0x8b0050e1), + TOBN(0x1f943366, 0xf0476ce2), TOBN(0x7554d953, 0xa602c7b4), + TOBN(0xbe35aca6, 0x524f2809), TOBN(0xb6881229, 0xfd4edbea), + TOBN(0xe8cd0c8f, 0x508efb63), TOBN(0x9eb5b5c8, 0x6abcefc7), + TOBN(0xf5621f5f, 0xb441ab4f), TOBN(0x79e6c046, 0xb76a2b22), + TOBN(0x74a4792c, 0xe37a1f69), TOBN(0xcbd252cb, 0x03542b60), + TOBN(0x785f65d5, 0xb3c20bd3), TOBN(0x8dea6143, 0x4fabc60c), + TOBN(0x45e21446, 0xde673629), TOBN(0x57f7aa1e, 0x703c2d21), + TOBN(0xa0e99b7f, 0x98c868c7), TOBN(0x4e42f66d, 0x8b641676), + TOBN(0x602884dc, 0x91077896), TOBN(0xa0d690cf, 0xc2c9885b), + TOBN(0xfeb4da33, 0x3b9a5187), TOBN(0x5f789598, 0x153c87ee), + TOBN(0x2192dd47, 0x52b16dba), TOBN(0xdeefc0e6, 0x3524c1b1), + TOBN(0x465ea76e, 0xe4383693), TOBN(0x79401711, 0x361b8d98), + TOBN(0xa5f9ace9, 0xf21a15cb), TOBN(0x73d26163, 0xefee9aeb), + TOBN(0xcca844b3, 0xe677016c), TOBN(0x6c122b07, 0x57eaee06), + TOBN(0xb782dce7, 0x15f09690), TOBN(0x508b9b12, 0x2dfc0fc9), + TOBN(0x9015ab4b, 0x65d89fc6), TOBN(0x5e79dab7, 0xd6d5bb0f), + TOBN(0x64f021f0, 0x6c775aa2), TOBN(0xdf09d8cc, 0x37c7eca1), + TOBN(0x9a761367, 0xef2fa506), TOBN(0xed4ca476, 0x5b81eec6), + TOBN(0x262ede36, 0x10bbb8b5), TOBN(0x0737ce83, 0x0641ada3), + TOBN(0x4c94288a, 0xe9831ccc), TOBN(0x487fc1ce, 0x8065e635), + TOBN(0xb13d7ab3, 0xb8bb3659), TOBN(0xdea5df3e, 0x855e4120), + TOBN(0xb9a18573, 0x85eb0244), TOBN(0x1a1b8ea3, 0xa7cfe0a3), + TOBN(0x3b837119, 0x67b0867c), TOBN(0x8d5e0d08, 0x9d364520), + TOBN(0x52dccc1e, 0xd930f0e3), TOBN(0xefbbcec7, 0xbf20bbaf), + TOBN(0x99cffcab, 0x0263ad10), TOBN(0xd8199e6d, 0xfcd18f8a), + TOBN(0x64e2773f, 0xe9f10617), TOBN(0x0079e8e1, 0x08704848), + TOBN(0x1169989f, 0x8a342283), TOBN(0x8097799c, 0xa83012e6), + TOBN(0xece966cb, 0x8a6a9001), TOBN(0x93b3afef, 0x072ac7fc), + TOBN(0xe6893a2a, 0x2db3d5ba), TOBN(0x263dc462, 0x89bf4fdc), + TOBN(0x8852dfc9, 0xe0396673), TOBN(0x7ac70895, 0x3af362b6), + TOBN(0xbb9cce4d, 0x5c2f342b), TOBN(0xbf80907a, 0xb52d7aae), + TOBN(0x97f3d3cd, 0x2161bcd0), TOBN(0xb25b0834, 0x0962744d), + TOBN(0xc5b18ea5, 0x6c3a1dda), TOBN(0xfe4ec7eb, 0x06c92317), + TOBN(0xb787b890, 0xad1c4afe), TOBN(0xdccd9a92, 0x0ede801a), + TOBN(0x9ac6ddda, 0xdb58da1f), TOBN(0x22bbc12f, 0xb8cae6ee), + TOBN(0xc6f8bced, 0x815c4a43), TOBN(0x8105a92c, 0xf96480c7), + TOBN(0x0dc3dbf3, 0x7a859d51), TOBN(0xe3ec7ce6, 0x3041196b), + TOBN(0xd9f64b25, 0x0d1067c9), TOBN(0xf2321321, 0x3d1f8dd8), + TOBN(0x8b5c619c, 0x76497ee8), TOBN(0x5d2b0ac6, 0xc717370e), + TOBN(0x98204cb6, 0x4fcf68e1), TOBN(0x0bdec211, 0x62bc6792), + TOBN(0x6973ccef, 0xa63b1011), TOBN(0xf9e3fa97, 0xe0de1ac5), + TOBN(0x5efb693e, 0x3d0e0c8b), TOBN(0x037248e9, 0xd2d4fcb4),} + , + {TOBN(0x80802dc9, 0x1ec34f9e), TOBN(0xd8772d35, 0x33810603), + TOBN(0x3f06d66c, 0x530cb4f3), TOBN(0x7be5ed0d, 0xc475c129), + TOBN(0xcb9e3c19, 0x31e82b10), TOBN(0xc63d2857, 0xc9ff6b4c), + TOBN(0xb92118c6, 0x92a1b45e), TOBN(0x0aec4414, 0x7285bbca), + TOBN(0xfc189ae7, 0x1e29a3ef), TOBN(0xcbe906f0, 0x4c93302e), + TOBN(0xd0107914, 0xceaae10e), TOBN(0xb7a23f34, 0xb68e19f8), + TOBN(0xe9d875c2, 0xefd2119d), TOBN(0x03198c6e, 0xfcadc9c8), + TOBN(0x65591bf6, 0x4da17113), TOBN(0x3cf0bbf8, 0x3d443038), + TOBN(0xae485bb7, 0x2b724759), TOBN(0x945353e1, 0xb2d4c63a), + TOBN(0x82159d07, 0xde7d6f2c), TOBN(0x389caef3, 0x4ec5b109), + TOBN(0x4a8ebb53, 0xdb65ef14), TOBN(0x2dc2cb7e, 0xdd99de43), + TOBN(0x816fa3ed, 0x83f2405f), TOBN(0x73429bb9, 0xc14208a3), + TOBN(0xb618d590, 0xb01e6e27), TOBN(0x047e2ccd, 0xe180b2dc), + TOBN(0xd1b299b5, 0x04aea4a9), TOBN(0x412c9e1e, 0x9fa403a4), + TOBN(0x88d28a36, 0x79407552), TOBN(0x49c50136, 0xf332b8e3), + TOBN(0x3a1b6fcc, 0xe668de19), TOBN(0x178851bc, 0x75122b97), + TOBN(0xb1e13752, 0xfb85fa4c), TOBN(0xd61257ce, 0x383c8ce9), + TOBN(0xd43da670, 0xd2f74dae), TOBN(0xa35aa23f, 0xbf846bbb), + TOBN(0x5e74235d, 0x4421fc83), TOBN(0xf6df8ee0, 0xc363473b), + TOBN(0x34d7f52a, 0x3c4aa158), TOBN(0x50d05aab, 0x9bc6d22e), + TOBN(0x8c56e735, 0xa64785f4), TOBN(0xbc56637b, 0x5f29cd07), + TOBN(0x53b2bb80, 0x3ee35067), TOBN(0x50235a0f, 0xdc919270), + TOBN(0x191ab6d8, 0xf2c4aa65), TOBN(0xc3475831, 0x8396023b), + TOBN(0x80400ba5, 0xf0f805ba), TOBN(0x8881065b, 0x5ec0f80f), + TOBN(0xc370e522, 0xcc1b5e83), TOBN(0xde2d4ad1, 0x860b8bfb), + TOBN(0xad364df0, 0x67b256df), TOBN(0x8f12502e, 0xe0138997), + TOBN(0x503fa0dc, 0x7783920a), TOBN(0xe80014ad, 0xc0bc866a), + TOBN(0x3f89b744, 0xd3064ba6), TOBN(0x03511dcd, 0xcba5dba5), + TOBN(0x197dd46d, 0x95a7b1a2), TOBN(0x9c4e7ad6, 0x3c6341fb), + TOBN(0x426eca29, 0x484c2ece), TOBN(0x9211e489, 0xde7f4f8a), + TOBN(0x14997f6e, 0xc78ef1f4), TOBN(0x2b2c0910, 0x06574586), + TOBN(0x17286a6e, 0x1c3eede8), TOBN(0x25f92e47, 0x0f60e018), + TOBN(0x805c5646, 0x31890a36), TOBN(0x703ef600, 0x57feea5b), + TOBN(0x389f747c, 0xaf3c3030), TOBN(0xe0e5daeb, 0x54dd3739), + TOBN(0xfe24a4c3, 0xc9c9f155), TOBN(0x7e4bf176, 0xb5393962), + TOBN(0x37183de2, 0xaf20bf29), TOBN(0x4a1bd7b5, 0xf95a8c3b), + TOBN(0xa83b9699, 0x46191d3d), TOBN(0x281fc8dd, 0x7b87f257), + TOBN(0xb18e2c13, 0x54107588), TOBN(0x6372def7, 0x9b2bafe8), + TOBN(0xdaf4bb48, 0x0d8972ca), TOBN(0x3f2dd4b7, 0x56167a3f), + TOBN(0x1eace32d, 0x84310cf4), TOBN(0xe3bcefaf, 0xe42700aa), + TOBN(0x5fe5691e, 0xd785e73d), TOBN(0xa5db5ab6, 0x2ea60467), + TOBN(0x02e23d41, 0xdfc6514a), TOBN(0x35e8048e, 0xe03c3665), + TOBN(0x3f8b118f, 0x1adaa0f8), TOBN(0x28ec3b45, 0x84ce1a5a), + TOBN(0xe8cacc6e, 0x2c6646b8), TOBN(0x1343d185, 0xdbd0e40f), + TOBN(0xe5d7f844, 0xcaaa358c), TOBN(0x1a1db7e4, 0x9924182a), + TOBN(0xd64cd42d, 0x9c875d9a), TOBN(0xb37b515f, 0x042eeec8), + TOBN(0x4d4dd409, 0x7b165fbe), TOBN(0xfc322ed9, 0xe206eff3), + TOBN(0x7dee4102, 0x59b7e17e), TOBN(0x55a481c0, 0x8236ca00), + TOBN(0x8c885312, 0xc23fc975), TOBN(0x15715806, 0x05d6297b), + TOBN(0xa078868e, 0xf78edd39), TOBN(0x956b31e0, 0x03c45e52), + TOBN(0x470275d5, 0xff7b33a6), TOBN(0xc8d5dc3a, 0x0c7e673f), + TOBN(0x419227b4, 0x7e2f2598), TOBN(0x8b37b634, 0x4c14a975), + TOBN(0xd0667ed6, 0x8b11888c), TOBN(0x5e0e8c3e, 0x803e25dc), + TOBN(0x34e5d0dc, 0xb987a24a), TOBN(0x9f40ac3b, 0xae920323), + TOBN(0x5463de95, 0x34e0f63a), TOBN(0xa128bf92, 0x6b6328f9), + TOBN(0x491ccd7c, 0xda64f1b7), TOBN(0x7ef1ec27, 0xc47bde35), + TOBN(0xa857240f, 0xa36a2737), TOBN(0x35dc1366, 0x63621bc1), + TOBN(0x7a3a6453, 0xd4fb6897), TOBN(0x80f1a439, 0xc929319d), + TOBN(0xfc18274b, 0xf8cb0ba0), TOBN(0xb0b53766, 0x8078c5eb), + TOBN(0xfb0d4924, 0x1e01d0ef), TOBN(0x50d7c67d, 0x372ab09c), + TOBN(0xb4e370af, 0x3aeac968), TOBN(0xe4f7fee9, 0xc4b63266), + TOBN(0xb4acd4c2, 0xe3ac5664), TOBN(0xf8910bd2, 0xceb38cbf), + TOBN(0x1c3ae50c, 0xc9c0726e), TOBN(0x15309569, 0xd97b40bf), + TOBN(0x70884b7f, 0xfd5a5a1b), TOBN(0x3890896a, 0xef8314cd), + TOBN(0x58e1515c, 0xa5618c93), TOBN(0xe665432b, 0x77d942d1), + TOBN(0xb32181bf, 0xb6f767a8), TOBN(0x753794e8, 0x3a604110), + TOBN(0x09afeb7c, 0xe8c0dbcc), TOBN(0x31e02613, 0x598673a3), + TOBN(0x5d98e557, 0x7d46db00), TOBN(0xfc21fb8c, 0x9d985b28), + TOBN(0xc9040116, 0xb0843e0b), TOBN(0x53b1b3a8, 0x69b04531), + TOBN(0xdd1649f0, 0x85d7d830), TOBN(0xbb3bcc87, 0xcb7427e8), + TOBN(0x77261100, 0xc93dce83), TOBN(0x7e79da61, 0xa1922a2a), + TOBN(0x587a2b02, 0xf3149ce8), TOBN(0x147e1384, 0xde92ec83), + TOBN(0x484c83d3, 0xaf077f30), TOBN(0xea78f844, 0x0658b53a), + TOBN(0x912076c2, 0x027aec53), TOBN(0xf34714e3, 0x93c8177d), + TOBN(0x37ef5d15, 0xc2376c84), TOBN(0x8315b659, 0x3d1aa783), + TOBN(0x3a75c484, 0xef852a90), TOBN(0x0ba0c58a, 0x16086bd4), + TOBN(0x29688d7a, 0x529a6d48), TOBN(0x9c7f250d, 0xc2f19203), + TOBN(0x123042fb, 0x682e2df9), TOBN(0x2b7587e7, 0xad8121bc), + TOBN(0x30fc0233, 0xe0182a65), TOBN(0xb82ecf87, 0xe3e1128a), + TOBN(0x71682861, 0x93fb098f), TOBN(0x043e21ae, 0x85e9e6a7), + TOBN(0xab5b49d6, 0x66c834ea), TOBN(0x3be43e18, 0x47414287), + TOBN(0xf40fb859, 0x219a2a47), TOBN(0x0e6559e9, 0xcc58df3c), + TOBN(0xfe1dfe8e, 0x0c6615b4), TOBN(0x14abc8fd, 0x56459d70), + TOBN(0x7be0fa8e, 0x05de0386), TOBN(0x8e63ef68, 0xe9035c7c), + TOBN(0x116401b4, 0x53b31e91), TOBN(0x0cba7ad4, 0x4436b4d8), + TOBN(0x9151f9a0, 0x107afd66), TOBN(0xafaca8d0, 0x1f0ee4c4), + TOBN(0x75fe5c1d, 0x9ee9761c), TOBN(0x3497a16b, 0xf0c0588f), + TOBN(0x3ee2bebd, 0x0304804c), TOBN(0xa8fb9a60, 0xc2c990b9), + TOBN(0xd14d32fe, 0x39251114), TOBN(0x36bf25bc, 0xcac73366), + TOBN(0xc9562c66, 0xdba7495c), TOBN(0x324d301b, 0x46ad348b), + TOBN(0x9f46620c, 0xd670407e), TOBN(0x0ea8d4f1, 0xe3733a01), + TOBN(0xd396d532, 0xb0c324e0), TOBN(0x5b211a0e, 0x03c317cd), + TOBN(0x090d7d20, 0x5ffe7b37), TOBN(0x3b7f3efb, 0x1747d2da), + TOBN(0xa2cb525f, 0xb54fc519), TOBN(0x6e220932, 0xf66a971e), + TOBN(0xddc160df, 0xb486d440), TOBN(0x7fcfec46, 0x3fe13465), + TOBN(0x83da7e4e, 0x76e4c151), TOBN(0xd6fa48a1, 0xd8d302b5), + TOBN(0xc6304f26, 0x5872cd88), TOBN(0x806c1d3c, 0x278b90a1), + TOBN(0x3553e725, 0xcaf0bc1c), TOBN(0xff59e603, 0xbb9d8d5c), + TOBN(0xa4550f32, 0x7a0b85dd), TOBN(0xdec5720a, 0x93ecc217), + TOBN(0x0b88b741, 0x69d62213), TOBN(0x7212f245, 0x5b365955), + TOBN(0x20764111, 0xb5cae787), TOBN(0x13cb7f58, 0x1dfd3124), + TOBN(0x2dca77da, 0x1175aefb), TOBN(0xeb75466b, 0xffaae775), + TOBN(0x74d76f3b, 0xdb6cff32), TOBN(0x7440f37a, 0x61fcda9a), + TOBN(0x1bb3ac92, 0xb525028b), TOBN(0x20fbf8f7, 0xa1975f29), + TOBN(0x982692e1, 0xdf83097f), TOBN(0x28738f6c, 0x554b0800), + TOBN(0xdc703717, 0xa2ce2f2f), TOBN(0x7913b93c, 0x40814194), + TOBN(0x04924593, 0x1fe89636), TOBN(0x7b98443f, 0xf78834a6), + TOBN(0x11c6ab01, 0x5114a5a1), TOBN(0x60deb383, 0xffba5f4c), + TOBN(0x4caa54c6, 0x01a982e6), TOBN(0x1dd35e11, 0x3491cd26), + TOBN(0x973c315f, 0x7cbd6b05), TOBN(0xcab00775, 0x52494724), + TOBN(0x04659b1f, 0x6565e15a), TOBN(0xbf30f529, 0x8c8fb026), + TOBN(0xfc21641b, 0xa8a0de37), TOBN(0xe9c7a366, 0xfa5e5114), + TOBN(0xdb849ca5, 0x52f03ad8), TOBN(0xc7e8dbe9, 0x024e35c0), + TOBN(0xa1a2bbac, 0xcfc3c789), TOBN(0xbf733e7d, 0x9c26f262), + TOBN(0x882ffbf5, 0xb8444823), TOBN(0xb7224e88, 0x6bf8483b), + TOBN(0x53023b8b, 0x65bef640), TOBN(0xaabfec91, 0xd4d5f8cd), + TOBN(0xa40e1510, 0x079ea1bd), TOBN(0x1ad9addc, 0xd05d5d26), + TOBN(0xdb3f2eab, 0x13e68d4f), TOBN(0x1cff1ae2, 0x640f803f), + TOBN(0xe0e7b749, 0xd4cee117), TOBN(0x8e9f275b, 0x4036d909), + TOBN(0xce34e31d, 0x8f4d4c38), TOBN(0x22b37f69, 0xd75130fc), + TOBN(0x83e0f1fd, 0xb4014604), TOBN(0xa8ce9919, 0x89415078), + TOBN(0x82375b75, 0x41792efe), TOBN(0x4f59bf5c, 0x97d4515b), + TOBN(0xac4f324f, 0x923a277d), TOBN(0xd9bc9b7d, 0x650f3406), + TOBN(0xc6fa87d1, 0x8a39bc51), TOBN(0x82588530, 0x5ccc108f), + TOBN(0x5ced3c9f, 0x82e4c634), TOBN(0x8efb8314, 0x3a4464f8), + TOBN(0xe706381b, 0x7a1dca25), TOBN(0x6cd15a3c, 0x5a2a412b), + TOBN(0x9347a8fd, 0xbfcd8fb5), TOBN(0x31db2eef, 0x6e54cd22), + TOBN(0xc4aeb11e, 0xf8d8932f), TOBN(0x11e7c1ed, 0x344411af), + TOBN(0x2653050c, 0xdc9a151e), TOBN(0x9edbfc08, 0x3bb0a859), + TOBN(0x926c81c7, 0xfd5691e7), TOBN(0x9c1b2342, 0x6f39019a), + TOBN(0x64a81c8b, 0x7f8474b9), TOBN(0x90657c07, 0x01761819), + TOBN(0x390b3331, 0x55e0375a), TOBN(0xc676c626, 0xb6ebc47d), + TOBN(0x51623247, 0xb7d6dee8), TOBN(0x0948d927, 0x79659313), + TOBN(0x99700161, 0xe9ab35ed), TOBN(0x06cc32b4, 0x8ddde408), + TOBN(0x6f2fd664, 0x061ef338), TOBN(0x1606fa02, 0xc202e9ed), + TOBN(0x55388bc1, 0x929ba99b), TOBN(0xc4428c5e, 0x1e81df69), + TOBN(0xce2028ae, 0xf91b0b2a), TOBN(0xce870a23, 0xf03dfd3f), + TOBN(0x66ec2c87, 0x0affe8ed), TOBN(0xb205fb46, 0x284d0c00), + TOBN(0xbf5dffe7, 0x44cefa48), TOBN(0xb6fc37a8, 0xa19876d7), + TOBN(0xbecfa84c, 0x08b72863), TOBN(0xd7205ff5, 0x2576374f), + TOBN(0x80330d32, 0x8887de41), TOBN(0x5de0df0c, 0x869ea534), + TOBN(0x13f42753, 0x3c56ea17), TOBN(0xeb1f6069, 0x452b1a78), + TOBN(0x50474396, 0xe30ea15c), TOBN(0x575816a1, 0xc1494125), + TOBN(0xbe1ce55b, 0xfe6bb38f), TOBN(0xb901a948, 0x96ae30f7), + TOBN(0xe5af0f08, 0xd8fc3548), TOBN(0x5010b5d0, 0xd73bfd08), + TOBN(0x993d2880, 0x53fe655a), TOBN(0x99f2630b, 0x1c1309fd), + TOBN(0xd8677baf, 0xb4e3b76f), TOBN(0x14e51ddc, 0xb840784b), + TOBN(0x326c750c, 0xbf0092ce), TOBN(0xc83d306b, 0xf528320f), + TOBN(0xc4456715, 0x77d4715c), TOBN(0xd30019f9, 0x6b703235), + TOBN(0x207ccb2e, 0xd669e986), TOBN(0x57c824af, 0xf6dbfc28), + TOBN(0xf0eb532f, 0xd8f92a23), TOBN(0x4a557fd4, 0x9bb98fd2), + TOBN(0xa57acea7, 0xc1e6199a), TOBN(0x0c663820, 0x8b94b1ed), + TOBN(0x9b42be8f, 0xf83a9266), TOBN(0xc7741c97, 0x0101bd45), + TOBN(0x95770c11, 0x07bd9ceb), TOBN(0x1f50250a, 0x8b2e0744), + TOBN(0xf762eec8, 0x1477b654), TOBN(0xc65b900e, 0x15efe59a), + TOBN(0x88c96148, 0x9546a897), TOBN(0x7e8025b3, 0xc30b4d7c), + TOBN(0xae4065ef, 0x12045cf9), TOBN(0x6fcb2caf, 0x9ccce8bd), + TOBN(0x1fa0ba4e, 0xf2cf6525), TOBN(0xf683125d, 0xcb72c312), + TOBN(0xa01da4ea, 0xe312410e), TOBN(0x67e28677, 0x6cd8e830), + TOBN(0xabd95752, 0x98fb3f07), TOBN(0x05f11e11, 0xeef649a5), + TOBN(0xba47faef, 0x9d3472c2), TOBN(0x3adff697, 0xc77d1345), + TOBN(0x4761fa04, 0xdd15afee), TOBN(0x64f1f61a, 0xb9e69462), + TOBN(0xfa691fab, 0x9bfb9093), TOBN(0x3df8ae8f, 0xa1133dfe), + TOBN(0xcd5f8967, 0x58cc710d), TOBN(0xfbb88d50, 0x16c7fe79), + TOBN(0x8e011b4c, 0xe88c50d1), TOBN(0x7532e807, 0xa8771c4f), + TOBN(0x64c78a48, 0xe2278ee4), TOBN(0x0b283e83, 0x3845072a), + TOBN(0x98a6f291, 0x49e69274), TOBN(0xb96e9668, 0x1868b21c), + TOBN(0x38f0adc2, 0xb1a8908e), TOBN(0x90afcff7, 0x1feb829d), + TOBN(0x9915a383, 0x210b0856), TOBN(0xa5a80602, 0xdef04889), + TOBN(0x800e9af9, 0x7c64d509), TOBN(0x81382d0b, 0xb8996f6f), + TOBN(0x490eba53, 0x81927e27), TOBN(0x46c63b32, 0x4af50182), + TOBN(0x784c5fd9, 0xd3ad62ce), TOBN(0xe4fa1870, 0xf8ae8736), + TOBN(0x4ec9d0bc, 0xd7466b25), TOBN(0x84ddbe1a, 0xdb235c65), + TOBN(0x5e2645ee, 0x163c1688), TOBN(0x570bd00e, 0x00eba747), + TOBN(0xfa51b629, 0x128bfa0f), TOBN(0x92fce1bd, 0x6c1d3b68), + TOBN(0x3e7361dc, 0xb66778b1), TOBN(0x9c7d249d, 0x5561d2bb), + TOBN(0xa40b28bf, 0x0bbc6229), TOBN(0x1c83c05e, 0xdfd91497), + TOBN(0x5f9f5154, 0xf083df05), TOBN(0xbac38b3c, 0xeee66c9d), + TOBN(0xf71db7e3, 0xec0dfcfd), TOBN(0xf2ecda8e, 0x8b0a8416), + TOBN(0x52fddd86, 0x7812aa66), TOBN(0x2896ef10, 0x4e6f4272), + TOBN(0xff27186a, 0x0fe9a745), TOBN(0x08249fcd, 0x49ca70db), + TOBN(0x7425a2e6, 0x441cac49), TOBN(0xf4a0885a, 0xece5ff57), + TOBN(0x6e2cb731, 0x7d7ead58), TOBN(0xf96cf7d6, 0x1898d104), + TOBN(0xafe67c9d, 0x4f2c9a89), TOBN(0x89895a50, 0x1c7bf5bc), + TOBN(0xdc7cb8e5, 0x573cecfa), TOBN(0x66497eae, 0xd15f03e6), + TOBN(0x6bc0de69, 0x3f084420), TOBN(0x323b9b36, 0xacd532b0), + TOBN(0xcfed390a, 0x0115a3c1), TOBN(0x9414c40b, 0x2d65ca0e), + TOBN(0x641406bd, 0x2f530c78), TOBN(0x29369a44, 0x833438f2), + TOBN(0x996884f5, 0x903fa271), TOBN(0xe6da0fd2, 0xb9da921e), + TOBN(0xa6f2f269, 0x5db01e54), TOBN(0x1ee3e9bd, 0x6876214e), + TOBN(0xa26e181c, 0xe27a9497), TOBN(0x36d254e4, 0x8e215e04), + TOBN(0x42f32a6c, 0x252cabca), TOBN(0x99481487, 0x80b57614), + TOBN(0x4c4dfe69, 0x40d9cae1), TOBN(0x05869580, 0x11a10f09), + TOBN(0xca287b57, 0x3491b64b), TOBN(0x77862d5d, 0x3fd4a53b), + TOBN(0xbf94856e, 0x50349126), TOBN(0x2be30bd1, 0x71c5268f), + TOBN(0x10393f19, 0xcbb650a6), TOBN(0x639531fe, 0x778cf9fd), + TOBN(0x02556a11, 0xb2935359), TOBN(0xda38aa96, 0xaf8c126e), + TOBN(0x47dbe6c2, 0x0960167f), TOBN(0x37bbabb6, 0x501901cd), + TOBN(0xb6e979e0, 0x2c947778), TOBN(0xd69a5175, 0x7a1a1dc6), + TOBN(0xc3ed5095, 0x9d9faf0c), TOBN(0x4dd9c096, 0x1d5fa5f0), + TOBN(0xa0c4304d, 0x64f16ea8), TOBN(0x8b1cac16, 0x7e718623), + TOBN(0x0b576546, 0x7c67f03e), TOBN(0x559cf5ad, 0xcbd88c01), + TOBN(0x074877bb, 0x0e2af19a), TOBN(0x1f717ec1, 0xa1228c92), + TOBN(0x70bcb800, 0x326e8920), TOBN(0xec6e2c5c, 0x4f312804), + TOBN(0x426aea7d, 0x3fca4752), TOBN(0xf12c0949, 0x2211f62a), + TOBN(0x24beecd8, 0x7be7b6b5), TOBN(0xb77eaf4c, 0x36d7a27d), + TOBN(0x154c2781, 0xfda78fd3), TOBN(0x848a83b0, 0x264eeabe), + TOBN(0x81287ef0, 0x4ffe2bc4), TOBN(0x7b6d88c6, 0xb6b6fc2a), + TOBN(0x805fb947, 0xce417d99), TOBN(0x4b93dcc3, 0x8b916cc4), + TOBN(0x72e65bb3, 0x21273323), TOBN(0xbcc1badd, 0x6ea9886e), + TOBN(0x0e223011, 0x4bc5ee85), TOBN(0xa561be74, 0xc18ee1e4), + TOBN(0x762fd2d4, 0xa6bcf1f1), TOBN(0x50e6a5a4, 0x95231489), + TOBN(0xca96001f, 0xa00b500b), TOBN(0x5c098cfc, 0x5d7dcdf5), + TOBN(0xa64e2d2e, 0x8c446a85), TOBN(0xbae9bcf1, 0x971f3c62), + TOBN(0x4ec22683, 0x8435a2c5), TOBN(0x8ceaed6c, 0x4bad4643), + TOBN(0xe9f8fb47, 0xccccf4e3), TOBN(0xbd4f3fa4, 0x1ce3b21e), + TOBN(0xd79fb110, 0xa3db3292), TOBN(0xe28a37da, 0xb536c66a), + TOBN(0x279ce87b, 0x8e49e6a9), TOBN(0x70ccfe8d, 0xfdcec8e3), + TOBN(0x2193e4e0, 0x3ba464b2), TOBN(0x0f39d60e, 0xaca9a398), + TOBN(0x7d7932af, 0xf82c12ab), TOBN(0xd8ff50ed, 0x91e7e0f7), + TOBN(0xea961058, 0xfa28a7e0), TOBN(0xc726cf25, 0x0bf5ec74), + TOBN(0xe74d55c8, 0xdb229666), TOBN(0x0bd9abbf, 0xa57f5799), + TOBN(0x7479ef07, 0x4dfc47b3), TOBN(0xd9c65fc3, 0x0c52f91d), + TOBN(0x8e0283fe, 0x36a8bde2), TOBN(0xa32a8b5e, 0x7d4b7280), + TOBN(0x6a677c61, 0x12e83233), TOBN(0x0fbb3512, 0xdcc9bf28), + TOBN(0x562e8ea5, 0x0d780f61), TOBN(0x0db8b22b, 0x1dc4e89c), + TOBN(0x0a6fd1fb, 0x89be0144), TOBN(0x8c77d246, 0xca57113b), + TOBN(0x4639075d, 0xff09c91c), TOBN(0x5b47b17f, 0x5060824c), + TOBN(0x58aea2b0, 0x16287b52), TOBN(0xa1343520, 0xd0cd8eb0), + TOBN(0x6148b4d0, 0xc5d58573), TOBN(0xdd2b6170, 0x291c68ae), + TOBN(0xa61b3929, 0x1da3b3b7), TOBN(0x5f946d79, 0x08c4ac10), + TOBN(0x4105d4a5, 0x7217d583), TOBN(0x5061da3d, 0x25e6de5e), + TOBN(0x3113940d, 0xec1b4991), TOBN(0xf12195e1, 0x36f485ae), + TOBN(0xa7507fb2, 0x731a2ee0), TOBN(0x95057a8e, 0x6e9e196e), + TOBN(0xa3c2c911, 0x2e130136), TOBN(0x97dfbb36, 0x33c60d15), + TOBN(0xcaf3c581, 0xb300ee2b), TOBN(0x77f25d90, 0xf4bac8b8), + TOBN(0xdb1c4f98, 0x6d840cd6), TOBN(0x471d62c0, 0xe634288c), + TOBN(0x8ec2f85e, 0xcec8a161), TOBN(0x41f37cbc, 0xfa6f4ae2), + TOBN(0x6793a20f, 0x4b709985), TOBN(0x7a7bd33b, 0xefa8985b), + TOBN(0x2c6a3fbd, 0x938e6446), TOBN(0x19042619, 0x2a8d47c1), + TOBN(0x16848667, 0xcc36975f), TOBN(0x02acf168, 0x9d5f1dfb), + TOBN(0x62d41ad4, 0x613baa94), TOBN(0xb56fbb92, 0x9f684670), + TOBN(0xce610d0d, 0xe9e40569), TOBN(0x7b99c65f, 0x35489fef), + TOBN(0x0c88ad1b, 0x3df18b97), TOBN(0x81b7d9be, 0x5d0e9edb), + TOBN(0xd85218c0, 0xc716cc0a), TOBN(0xf4b5ff90, 0x85691c49), + TOBN(0xa4fd666b, 0xce356ac6), TOBN(0x17c72895, 0x4b327a7a), + TOBN(0xf93d5085, 0xda6be7de), TOBN(0xff71530e, 0x3301d34e), + TOBN(0x4cd96442, 0xd8f448e8), TOBN(0x9283d331, 0x2ed18ffa), + TOBN(0x4d33dd99, 0x2a849870), TOBN(0xa716964b, 0x41576335), + TOBN(0xff5e3a9b, 0x179be0e5), TOBN(0x5b9d6b1b, 0x83b13632), + TOBN(0x3b8bd7d4, 0xa52f313b), TOBN(0xc9dd95a0, 0x637a4660), + TOBN(0x30035962, 0x0b3e218f), TOBN(0xce1481a3, 0xc7b28a3c), + TOBN(0xab41b43a, 0x43228d83), TOBN(0x24ae1c30, 0x4ad63f99), + TOBN(0x8e525f1a, 0x46a51229), TOBN(0x14af860f, 0xcd26d2b4), + TOBN(0xd6baef61, 0x3f714aa1), TOBN(0xf51865ad, 0xeb78795e), + TOBN(0xd3e21fce, 0xe6a9d694), TOBN(0x82ceb1dd, 0x8a37b527)} +}; diff --git a/deps/openssl/openssl/crypto/ecdh/Makefile b/deps/openssl/openssl/crypto/ecdh/Makefile index ba05fea05ca04e..df1b03adb1dd05 100644 --- a/deps/openssl/openssl/crypto/ecdh/Makefile +++ b/deps/openssl/openssl/crypto/ecdh/Makefile @@ -17,9 +17,9 @@ TEST=ecdhtest.c APPS= LIB=$(TOP)/libcrypto.a -LIBSRC= ech_lib.c ech_ossl.c ech_key.c ech_err.c +LIBSRC= ech_lib.c ech_ossl.c ech_key.c ech_err.c ech_kdf.c -LIBOBJ= ech_lib.o ech_ossl.o ech_key.o ech_err.o +LIBOBJ= ech_lib.o ech_ossl.o ech_key.o ech_err.o ech_kdf.o SRC= $(LIBSRC) @@ -83,6 +83,14 @@ ech_err.o: ../../include/openssl/opensslconf.h ../../include/openssl/opensslv.h ech_err.o: ../../include/openssl/ossl_typ.h ../../include/openssl/safestack.h ech_err.o: ../../include/openssl/stack.h ../../include/openssl/symhacks.h ech_err.o: ech_err.c +ech_kdf.o: ../../include/openssl/asn1.h ../../include/openssl/bio.h +ech_kdf.o: ../../include/openssl/crypto.h ../../include/openssl/e_os2.h +ech_kdf.o: ../../include/openssl/ec.h ../../include/openssl/ecdh.h +ech_kdf.o: ../../include/openssl/evp.h ../../include/openssl/obj_mac.h +ech_kdf.o: ../../include/openssl/objects.h ../../include/openssl/opensslconf.h +ech_kdf.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h +ech_kdf.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h +ech_kdf.o: ../../include/openssl/symhacks.h ech_kdf.c ech_key.o: ../../include/openssl/asn1.h ../../include/openssl/bio.h ech_key.o: ../../include/openssl/crypto.h ../../include/openssl/e_os2.h ech_key.o: ../../include/openssl/ec.h ../../include/openssl/ecdh.h diff --git a/deps/openssl/openssl/crypto/ecdh/ecdh.h b/deps/openssl/openssl/crypto/ecdh/ecdh.h index a9b811abd050e5..25348b30fe7cf4 100644 --- a/deps/openssl/openssl/crypto/ecdh/ecdh.h +++ b/deps/openssl/openssl/crypto/ecdh/ecdh.h @@ -85,6 +85,8 @@ extern "C" { #endif +# define EC_FLAG_COFACTOR_ECDH 0x1000 + const ECDH_METHOD *ECDH_OpenSSL(void); void ECDH_set_default_method(const ECDH_METHOD *); @@ -101,6 +103,11 @@ int ECDH_get_ex_new_index(long argl, void *argp, CRYPTO_EX_new int ECDH_set_ex_data(EC_KEY *d, int idx, void *arg); void *ECDH_get_ex_data(EC_KEY *d, int idx); +int ECDH_KDF_X9_62(unsigned char *out, size_t outlen, + const unsigned char *Z, size_t Zlen, + const unsigned char *sinfo, size_t sinfolen, + const EVP_MD *md); + /* BEGIN ERROR CODES */ /* * The following lines are auto generated by the script mkerr.pl. Any changes diff --git a/deps/openssl/openssl/crypto/ecdh/ecdhtest.c b/deps/openssl/openssl/crypto/ecdh/ecdhtest.c index 996321d1ee430b..2fe2c66443d0af 100644 --- a/deps/openssl/openssl/crypto/ecdh/ecdhtest.c +++ b/deps/openssl/openssl/crypto/ecdh/ecdhtest.c @@ -312,6 +312,170 @@ static int test_ecdh_curve(int nid, const char *text, BN_CTX *ctx, BIO *out) return (ret); } +/* Keys and shared secrets from RFC 7027 */ + +static const unsigned char bp256_da[] = { + 0x81, 0xDB, 0x1E, 0xE1, 0x00, 0x15, 0x0F, 0xF2, 0xEA, 0x33, 0x8D, 0x70, + 0x82, 0x71, 0xBE, 0x38, 0x30, 0x0C, 0xB5, 0x42, 0x41, 0xD7, 0x99, 0x50, + 0xF7, 0x7B, 0x06, 0x30, 0x39, 0x80, 0x4F, 0x1D +}; + +static const unsigned char bp256_db[] = { + 0x55, 0xE4, 0x0B, 0xC4, 0x1E, 0x37, 0xE3, 0xE2, 0xAD, 0x25, 0xC3, 0xC6, + 0x65, 0x45, 0x11, 0xFF, 0xA8, 0x47, 0x4A, 0x91, 0xA0, 0x03, 0x20, 0x87, + 0x59, 0x38, 0x52, 0xD3, 0xE7, 0xD7, 0x6B, 0xD3 +}; + +static const unsigned char bp256_Z[] = { + 0x89, 0xAF, 0xC3, 0x9D, 0x41, 0xD3, 0xB3, 0x27, 0x81, 0x4B, 0x80, 0x94, + 0x0B, 0x04, 0x25, 0x90, 0xF9, 0x65, 0x56, 0xEC, 0x91, 0xE6, 0xAE, 0x79, + 0x39, 0xBC, 0xE3, 0x1F, 0x3A, 0x18, 0xBF, 0x2B +}; + +static const unsigned char bp384_da[] = { + 0x1E, 0x20, 0xF5, 0xE0, 0x48, 0xA5, 0x88, 0x6F, 0x1F, 0x15, 0x7C, 0x74, + 0xE9, 0x1B, 0xDE, 0x2B, 0x98, 0xC8, 0xB5, 0x2D, 0x58, 0xE5, 0x00, 0x3D, + 0x57, 0x05, 0x3F, 0xC4, 0xB0, 0xBD, 0x65, 0xD6, 0xF1, 0x5E, 0xB5, 0xD1, + 0xEE, 0x16, 0x10, 0xDF, 0x87, 0x07, 0x95, 0x14, 0x36, 0x27, 0xD0, 0x42 +}; + +static const unsigned char bp384_db[] = { + 0x03, 0x26, 0x40, 0xBC, 0x60, 0x03, 0xC5, 0x92, 0x60, 0xF7, 0x25, 0x0C, + 0x3D, 0xB5, 0x8C, 0xE6, 0x47, 0xF9, 0x8E, 0x12, 0x60, 0xAC, 0xCE, 0x4A, + 0xCD, 0xA3, 0xDD, 0x86, 0x9F, 0x74, 0xE0, 0x1F, 0x8B, 0xA5, 0xE0, 0x32, + 0x43, 0x09, 0xDB, 0x6A, 0x98, 0x31, 0x49, 0x7A, 0xBA, 0xC9, 0x66, 0x70 +}; + +static const unsigned char bp384_Z[] = { + 0x0B, 0xD9, 0xD3, 0xA7, 0xEA, 0x0B, 0x3D, 0x51, 0x9D, 0x09, 0xD8, 0xE4, + 0x8D, 0x07, 0x85, 0xFB, 0x74, 0x4A, 0x6B, 0x35, 0x5E, 0x63, 0x04, 0xBC, + 0x51, 0xC2, 0x29, 0xFB, 0xBC, 0xE2, 0x39, 0xBB, 0xAD, 0xF6, 0x40, 0x37, + 0x15, 0xC3, 0x5D, 0x4F, 0xB2, 0xA5, 0x44, 0x4F, 0x57, 0x5D, 0x4F, 0x42 +}; + +static const unsigned char bp512_da[] = { + 0x16, 0x30, 0x2F, 0xF0, 0xDB, 0xBB, 0x5A, 0x8D, 0x73, 0x3D, 0xAB, 0x71, + 0x41, 0xC1, 0xB4, 0x5A, 0xCB, 0xC8, 0x71, 0x59, 0x39, 0x67, 0x7F, 0x6A, + 0x56, 0x85, 0x0A, 0x38, 0xBD, 0x87, 0xBD, 0x59, 0xB0, 0x9E, 0x80, 0x27, + 0x96, 0x09, 0xFF, 0x33, 0x3E, 0xB9, 0xD4, 0xC0, 0x61, 0x23, 0x1F, 0xB2, + 0x6F, 0x92, 0xEE, 0xB0, 0x49, 0x82, 0xA5, 0xF1, 0xD1, 0x76, 0x4C, 0xAD, + 0x57, 0x66, 0x54, 0x22 +}; + +static const unsigned char bp512_db[] = { + 0x23, 0x0E, 0x18, 0xE1, 0xBC, 0xC8, 0x8A, 0x36, 0x2F, 0xA5, 0x4E, 0x4E, + 0xA3, 0x90, 0x20, 0x09, 0x29, 0x2F, 0x7F, 0x80, 0x33, 0x62, 0x4F, 0xD4, + 0x71, 0xB5, 0xD8, 0xAC, 0xE4, 0x9D, 0x12, 0xCF, 0xAB, 0xBC, 0x19, 0x96, + 0x3D, 0xAB, 0x8E, 0x2F, 0x1E, 0xBA, 0x00, 0xBF, 0xFB, 0x29, 0xE4, 0xD7, + 0x2D, 0x13, 0xF2, 0x22, 0x45, 0x62, 0xF4, 0x05, 0xCB, 0x80, 0x50, 0x36, + 0x66, 0xB2, 0x54, 0x29 +}; + +static const unsigned char bp512_Z[] = { + 0xA7, 0x92, 0x70, 0x98, 0x65, 0x5F, 0x1F, 0x99, 0x76, 0xFA, 0x50, 0xA9, + 0xD5, 0x66, 0x86, 0x5D, 0xC5, 0x30, 0x33, 0x18, 0x46, 0x38, 0x1C, 0x87, + 0x25, 0x6B, 0xAF, 0x32, 0x26, 0x24, 0x4B, 0x76, 0xD3, 0x64, 0x03, 0xC0, + 0x24, 0xD7, 0xBB, 0xF0, 0xAA, 0x08, 0x03, 0xEA, 0xFF, 0x40, 0x5D, 0x3D, + 0x24, 0xF1, 0x1A, 0x9B, 0x5C, 0x0B, 0xEF, 0x67, 0x9F, 0xE1, 0x45, 0x4B, + 0x21, 0xC4, 0xCD, 0x1F +}; + +/* Given private value and NID, create EC_KEY structure */ + +static EC_KEY *mk_eckey(int nid, const unsigned char *p, size_t plen) +{ + int ok = 0; + EC_KEY *k = NULL; + BIGNUM *priv = NULL; + EC_POINT *pub = NULL; + const EC_GROUP *grp; + k = EC_KEY_new_by_curve_name(nid); + if (!k) + goto err; + priv = BN_bin2bn(p, plen, NULL); + if (!priv) + goto err; + if (!EC_KEY_set_private_key(k, priv)) + goto err; + grp = EC_KEY_get0_group(k); + pub = EC_POINT_new(grp); + if (!pub) + goto err; + if (!EC_POINT_mul(grp, pub, priv, NULL, NULL, NULL)) + goto err; + if (!EC_KEY_set_public_key(k, pub)) + goto err; + ok = 1; + err: + if (priv) + BN_clear_free(priv); + if (pub) + EC_POINT_free(pub); + if (ok) + return k; + else if (k) + EC_KEY_free(k); + return NULL; +} + +/* + * Known answer test: compute shared secret and check it matches expected + * value. + */ + +static int ecdh_kat(BIO *out, const char *cname, int nid, + const unsigned char *k1, size_t k1_len, + const unsigned char *k2, size_t k2_len, + const unsigned char *Z, size_t Zlen) +{ + int rv = 0; + EC_KEY *key1 = NULL, *key2 = NULL; + unsigned char *Ztmp = NULL; + size_t Ztmplen; + BIO_puts(out, "Testing ECDH shared secret with "); + BIO_puts(out, cname); + key1 = mk_eckey(nid, k1, k1_len); + key2 = mk_eckey(nid, k2, k2_len); + if (!key1 || !key2) + goto err; + Ztmplen = (EC_GROUP_get_degree(EC_KEY_get0_group(key1)) + 7) / 8; + if (Ztmplen != Zlen) + goto err; + Ztmp = OPENSSL_malloc(Ztmplen); + if (!ECDH_compute_key(Ztmp, Ztmplen, + EC_KEY_get0_public_key(key2), key1, 0)) + goto err; + if (memcmp(Ztmp, Z, Zlen)) + goto err; + memset(Ztmp, 0, Zlen); + if (!ECDH_compute_key(Ztmp, Ztmplen, + EC_KEY_get0_public_key(key1), key2, 0)) + goto err; + if (memcmp(Ztmp, Z, Zlen)) + goto err; + rv = 1; + err: + if (key1) + EC_KEY_free(key1); + if (key2) + EC_KEY_free(key2); + if (Ztmp) + OPENSSL_free(Ztmp); + if (rv) + BIO_puts(out, " ok\n"); + else { + fprintf(stderr, "Error in ECDH routines\n"); + ERR_print_errors_fp(stderr); + } + return rv; +} + +# define test_ecdh_kat(bio, curve, bits) \ + ecdh_kat(bio, curve, NID_brainpoolP##bits##r1, \ + bp##bits##_da, sizeof(bp##bits##_da), \ + bp##bits##_db, sizeof(bp##bits##_db), \ + bp##bits##_Z, sizeof(bp##bits##_Z)) + int main(int argc, char *argv[]) { BN_CTX *ctx = NULL; @@ -372,6 +536,12 @@ int main(int argc, char *argv[]) if (!test_ecdh_curve(NID_sect571r1, "NIST Binary-Curve B-571", ctx, out)) goto err; # endif + if (!test_ecdh_kat(out, "Brainpool Prime-Curve brainpoolP256r1", 256)) + goto err; + if (!test_ecdh_kat(out, "Brainpool Prime-Curve brainpoolP384r1", 384)) + goto err; + if (!test_ecdh_kat(out, "Brainpool Prime-Curve brainpoolP512r1", 512)) + goto err; ret = 0; diff --git a/deps/openssl/openssl/crypto/ecdh/ech_kdf.c b/deps/openssl/openssl/crypto/ecdh/ech_kdf.c new file mode 100644 index 00000000000000..ac722ac9ee6952 --- /dev/null +++ b/deps/openssl/openssl/crypto/ecdh/ech_kdf.c @@ -0,0 +1,111 @@ +/* crypto/ecdh/ec_kdf.c */ +/* + * Written by Stephen Henson for the OpenSSL project. + */ +/* ==================================================================== + * Copyright (c) 2013 The OpenSSL Project. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgment: + * "This product includes software developed by the OpenSSL Project + * for use in the OpenSSL Toolkit. (http://www.openssl.org/)" + * + * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to + * endorse or promote products derived from this software without + * prior written permission. For written permission, please contact + * openssl-core@openssl.org. + * + * 5. Products derived from this software may not be called "OpenSSL" + * nor may "OpenSSL" appear in their names without prior written + * permission of the OpenSSL Project. + * + * 6. Redistributions of any form whatsoever must retain the following + * acknowledgment: + * "This product includes software developed by the OpenSSL Project + * for use in the OpenSSL Toolkit (http://www.openssl.org/)" + * + * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY + * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + * ==================================================================== + */ + +#define OPENSSL_FIPSAPI + +#include +#include +#include + +/* Key derivation function from X9.62/SECG */ +/* Way more than we will ever need */ +#define ECDH_KDF_MAX (1 << 30) + +int ECDH_KDF_X9_62(unsigned char *out, size_t outlen, + const unsigned char *Z, size_t Zlen, + const unsigned char *sinfo, size_t sinfolen, + const EVP_MD *md) +{ + EVP_MD_CTX mctx; + int rv = 0; + unsigned int i; + size_t mdlen; + unsigned char ctr[4]; + if (sinfolen > ECDH_KDF_MAX || outlen > ECDH_KDF_MAX + || Zlen > ECDH_KDF_MAX) + return 0; + mdlen = EVP_MD_size(md); + EVP_MD_CTX_init(&mctx); + for (i = 1;; i++) { + unsigned char mtmp[EVP_MAX_MD_SIZE]; + EVP_DigestInit_ex(&mctx, md, NULL); + ctr[3] = i & 0xFF; + ctr[2] = (i >> 8) & 0xFF; + ctr[1] = (i >> 16) & 0xFF; + ctr[0] = (i >> 24) & 0xFF; + if (!EVP_DigestUpdate(&mctx, Z, Zlen)) + goto err; + if (!EVP_DigestUpdate(&mctx, ctr, sizeof(ctr))) + goto err; + if (!EVP_DigestUpdate(&mctx, sinfo, sinfolen)) + goto err; + if (outlen >= mdlen) { + if (!EVP_DigestFinal(&mctx, out, NULL)) + goto err; + outlen -= mdlen; + if (outlen == 0) + break; + out += mdlen; + } else { + if (!EVP_DigestFinal(&mctx, mtmp, NULL)) + goto err; + memcpy(out, mtmp, outlen); + OPENSSL_cleanse(mtmp, mdlen); + break; + } + } + rv = 1; + err: + EVP_MD_CTX_cleanup(&mctx); + return rv; +} diff --git a/deps/openssl/openssl/crypto/ecdh/ech_ossl.c b/deps/openssl/openssl/crypto/ecdh/ech_ossl.c index d448b19a528f29..df115cc262e519 100644 --- a/deps/openssl/openssl/crypto/ecdh/ech_ossl.c +++ b/deps/openssl/openssl/crypto/ecdh/ech_ossl.c @@ -138,6 +138,16 @@ static int ecdh_compute_key(void *out, size_t outlen, const EC_POINT *pub_key, } group = EC_KEY_get0_group(ecdh); + + if (EC_KEY_get_flags(ecdh) & EC_FLAG_COFACTOR_ECDH) { + if (!EC_GROUP_get_cofactor(group, x, ctx) || + !BN_mul(x, x, priv_key, ctx)) { + ECDHerr(ECDH_F_ECDH_COMPUTE_KEY, ERR_R_MALLOC_FAILURE); + goto err; + } + priv_key = x; + } + if ((tmp = EC_POINT_new(group)) == NULL) { ECDHerr(ECDH_F_ECDH_COMPUTE_KEY, ERR_R_MALLOC_FAILURE); goto err; diff --git a/deps/openssl/openssl/crypto/ecdsa/ecdsa.h b/deps/openssl/openssl/crypto/ecdsa/ecdsa.h index faf76b1104fcba..c4016ac3e19b3f 100644 --- a/deps/openssl/openssl/crypto/ecdsa/ecdsa.h +++ b/deps/openssl/openssl/crypto/ecdsa/ecdsa.h @@ -228,6 +228,80 @@ int ECDSA_get_ex_new_index(long argl, void *argp, CRYPTO_EX_new int ECDSA_set_ex_data(EC_KEY *d, int idx, void *arg); void *ECDSA_get_ex_data(EC_KEY *d, int idx); +/** Allocates and initialize a ECDSA_METHOD structure + * \param ecdsa_method pointer to ECDSA_METHOD to copy. (May be NULL) + * \return pointer to a ECDSA_METHOD structure or NULL if an error occurred + */ + +ECDSA_METHOD *ECDSA_METHOD_new(ECDSA_METHOD *ecdsa_method); + +/** frees a ECDSA_METHOD structure + * \param ecdsa_method pointer to the ECDSA_METHOD structure + */ +void ECDSA_METHOD_free(ECDSA_METHOD *ecdsa_method); + +/** Sets application specific data in the ECDSA_METHOD + * \param ecdsa_method pointer to existing ECDSA_METHOD + * \param app application specific data to set + */ + +void ECDSA_METHOD_set_app_data(ECDSA_METHOD *ecdsa_method, void *app); + +/** Returns application specific data from a ECDSA_METHOD structure + * \param ecdsa_method pointer to ECDSA_METHOD structure + * \return pointer to application specific data. + */ + +void *ECDSA_METHOD_get_app_data(ECDSA_METHOD *ecdsa_method); + +/** Set the ECDSA_do_sign function in the ECDSA_METHOD + * \param ecdsa_method pointer to existing ECDSA_METHOD + * \param ecdsa_do_sign a funtion of type ECDSA_do_sign + */ + +void ECDSA_METHOD_set_sign(ECDSA_METHOD *ecdsa_method, + ECDSA_SIG *(*ecdsa_do_sign) (const unsigned char + *dgst, int dgst_len, + const BIGNUM *inv, + const BIGNUM *rp, + EC_KEY *eckey)); + +/** Set the ECDSA_sign_setup function in the ECDSA_METHOD + * \param ecdsa_method pointer to existing ECDSA_METHOD + * \param ecdsa_sign_setup a funtion of type ECDSA_sign_setup + */ + +void ECDSA_METHOD_set_sign_setup(ECDSA_METHOD *ecdsa_method, + int (*ecdsa_sign_setup) (EC_KEY *eckey, + BN_CTX *ctx, + BIGNUM **kinv, + BIGNUM **r)); + +/** Set the ECDSA_do_verify function in the ECDSA_METHOD + * \param ecdsa_method pointer to existing ECDSA_METHOD + * \param ecdsa_do_verify a funtion of type ECDSA_do_verify + */ + +void ECDSA_METHOD_set_verify(ECDSA_METHOD *ecdsa_method, + int (*ecdsa_do_verify) (const unsigned char + *dgst, int dgst_len, + const ECDSA_SIG *sig, + EC_KEY *eckey)); + +void ECDSA_METHOD_set_flags(ECDSA_METHOD *ecdsa_method, int flags); + +/** Set the flags field in the ECDSA_METHOD + * \param ecdsa_method pointer to existing ECDSA_METHOD + * \param flags flags value to set + */ + +void ECDSA_METHOD_set_name(ECDSA_METHOD *ecdsa_method, char *name); + +/** Set the name field in the ECDSA_METHOD + * \param ecdsa_method pointer to existing ECDSA_METHOD + * \param name name to set + */ + /* BEGIN ERROR CODES */ /* * The following lines are auto generated by the script mkerr.pl. Any changes @@ -242,6 +316,7 @@ void ERR_load_ECDSA_strings(void); # define ECDSA_F_ECDSA_DATA_NEW_METHOD 100 # define ECDSA_F_ECDSA_DO_SIGN 101 # define ECDSA_F_ECDSA_DO_VERIFY 102 +# define ECDSA_F_ECDSA_METHOD_NEW 105 # define ECDSA_F_ECDSA_SIGN_SETUP 103 /* Reason codes. */ diff --git a/deps/openssl/openssl/crypto/ecdsa/ecs_err.c b/deps/openssl/openssl/crypto/ecdsa/ecs_err.c index 6fc64a0077aca2..f1fa7b55f97712 100644 --- a/deps/openssl/openssl/crypto/ecdsa/ecs_err.c +++ b/deps/openssl/openssl/crypto/ecdsa/ecs_err.c @@ -74,6 +74,7 @@ static ERR_STRING_DATA ECDSA_str_functs[] = { {ERR_FUNC(ECDSA_F_ECDSA_DATA_NEW_METHOD), "ECDSA_DATA_NEW_METHOD"}, {ERR_FUNC(ECDSA_F_ECDSA_DO_SIGN), "ECDSA_do_sign"}, {ERR_FUNC(ECDSA_F_ECDSA_DO_VERIFY), "ECDSA_do_verify"}, + {ERR_FUNC(ECDSA_F_ECDSA_METHOD_NEW), "ECDSA_METHOD_new"}, {ERR_FUNC(ECDSA_F_ECDSA_SIGN_SETUP), "ECDSA_sign_setup"}, {0, NULL} }; diff --git a/deps/openssl/openssl/crypto/ecdsa/ecs_lib.c b/deps/openssl/openssl/crypto/ecdsa/ecs_lib.c index 0f2d3432f6663a..1c02310318500b 100644 --- a/deps/openssl/openssl/crypto/ecdsa/ecs_lib.c +++ b/deps/openssl/openssl/crypto/ecdsa/ecs_lib.c @@ -275,3 +275,80 @@ void *ECDSA_get_ex_data(EC_KEY *d, int idx) return NULL; return (CRYPTO_get_ex_data(&ecdsa->ex_data, idx)); } + +ECDSA_METHOD *ECDSA_METHOD_new(ECDSA_METHOD *ecdsa_meth) +{ + ECDSA_METHOD *ret; + + ret = OPENSSL_malloc(sizeof(ECDSA_METHOD)); + if (ret == NULL) { + ECDSAerr(ECDSA_F_ECDSA_METHOD_NEW, ERR_R_MALLOC_FAILURE); + return NULL; + } + + if (ecdsa_meth) + *ret = *ecdsa_meth; + else { + ret->ecdsa_sign_setup = 0; + ret->ecdsa_do_sign = 0; + ret->ecdsa_do_verify = 0; + ret->name = NULL; + ret->flags = 0; + } + ret->flags |= ECDSA_METHOD_FLAG_ALLOCATED; + return ret; +} + +void ECDSA_METHOD_set_sign(ECDSA_METHOD *ecdsa_method, + ECDSA_SIG *(*ecdsa_do_sign) (const unsigned char + *dgst, int dgst_len, + const BIGNUM *inv, + const BIGNUM *rp, + EC_KEY *eckey)) +{ + ecdsa_method->ecdsa_do_sign = ecdsa_do_sign; +} + +void ECDSA_METHOD_set_sign_setup(ECDSA_METHOD *ecdsa_method, + int (*ecdsa_sign_setup) (EC_KEY *eckey, + BN_CTX *ctx, + BIGNUM **kinv, + BIGNUM **r)) +{ + ecdsa_method->ecdsa_sign_setup = ecdsa_sign_setup; +} + +void ECDSA_METHOD_set_verify(ECDSA_METHOD *ecdsa_method, + int (*ecdsa_do_verify) (const unsigned char + *dgst, int dgst_len, + const ECDSA_SIG *sig, + EC_KEY *eckey)) +{ + ecdsa_method->ecdsa_do_verify = ecdsa_do_verify; +} + +void ECDSA_METHOD_set_flags(ECDSA_METHOD *ecdsa_method, int flags) +{ + ecdsa_method->flags = flags | ECDSA_METHOD_FLAG_ALLOCATED; +} + +void ECDSA_METHOD_set_name(ECDSA_METHOD *ecdsa_method, char *name) +{ + ecdsa_method->name = name; +} + +void ECDSA_METHOD_free(ECDSA_METHOD *ecdsa_method) +{ + if (ecdsa_method->flags & ECDSA_METHOD_FLAG_ALLOCATED) + OPENSSL_free(ecdsa_method); +} + +void ECDSA_METHOD_set_app_data(ECDSA_METHOD *ecdsa_method, void *app) +{ + ecdsa_method->app_data = app; +} + +void *ECDSA_METHOD_get_app_data(ECDSA_METHOD *ecdsa_method) +{ + return ecdsa_method->app_data; +} diff --git a/deps/openssl/openssl/crypto/ecdsa/ecs_locl.h b/deps/openssl/openssl/crypto/ecdsa/ecs_locl.h index 76b2caf1f4a9fd..d3a5efc5473898 100644 --- a/deps/openssl/openssl/crypto/ecdsa/ecs_locl.h +++ b/deps/openssl/openssl/crypto/ecdsa/ecs_locl.h @@ -79,9 +79,13 @@ struct ecdsa_method { int (*finish) (EC_KEY *eckey); # endif int flags; - char *app_data; + void *app_data; }; +/* The ECDSA_METHOD was allocated and can be freed */ + +# define ECDSA_METHOD_FLAG_ALLOCATED 0x2 + /* * If this flag is set the ECDSA method is FIPS compliant and can be used in * FIPS mode. This is set in the validated module method. If an application diff --git a/deps/openssl/openssl/crypto/ecdsa/ecs_ossl.c b/deps/openssl/openssl/crypto/ecdsa/ecs_ossl.c index 4c5fa6b926e440..dd769609be4c08 100644 --- a/deps/openssl/openssl/crypto/ecdsa/ecs_ossl.c +++ b/deps/openssl/openssl/crypto/ecdsa/ecs_ossl.c @@ -179,10 +179,32 @@ static int ecdsa_sign_setup(EC_KEY *eckey, BN_CTX *ctx_in, BIGNUM **kinvp, while (BN_is_zero(r)); /* compute the inverse of k */ - if (!BN_mod_inverse(k, k, order, ctx)) { - ECDSAerr(ECDSA_F_ECDSA_SIGN_SETUP, ERR_R_BN_LIB); - goto err; + if (EC_GROUP_get_mont_data(group) != NULL) { + /* + * We want inverse in constant time, therefore we utilize the fact + * order must be prime and use Fermats Little Theorem instead. + */ + if (!BN_set_word(X, 2)) { + ECDSAerr(ECDSA_F_ECDSA_SIGN_SETUP, ERR_R_BN_LIB); + goto err; + } + if (!BN_mod_sub(X, order, X, order, ctx)) { + ECDSAerr(ECDSA_F_ECDSA_SIGN_SETUP, ERR_R_BN_LIB); + goto err; + } + BN_set_flags(X, BN_FLG_CONSTTIME); + if (!BN_mod_exp_mont_consttime + (k, k, X, order, ctx, EC_GROUP_get_mont_data(group))) { + ECDSAerr(ECDSA_F_ECDSA_SIGN_SETUP, ERR_R_BN_LIB); + goto err; + } + } else { + if (!BN_mod_inverse(k, k, order, ctx)) { + ECDSAerr(ECDSA_F_ECDSA_SIGN_SETUP, ERR_R_BN_LIB); + goto err; + } } + /* clear old values if necessary */ if (*rp != NULL) BN_clear_free(*rp); diff --git a/deps/openssl/openssl/crypto/engine/Makefile b/deps/openssl/openssl/crypto/engine/Makefile index d29bdd09a09673..2ee6c72362d197 100644 --- a/deps/openssl/openssl/crypto/engine/Makefile +++ b/deps/openssl/openssl/crypto/engine/Makefile @@ -22,13 +22,13 @@ LIBSRC= eng_err.c eng_lib.c eng_list.c eng_init.c eng_ctrl.c \ tb_rsa.c tb_dsa.c tb_ecdsa.c tb_dh.c tb_ecdh.c tb_rand.c tb_store.c \ tb_cipher.c tb_digest.c tb_pkmeth.c tb_asnmth.c \ eng_openssl.c eng_cnf.c eng_dyn.c eng_cryptodev.c \ - eng_rsax.c eng_rdrand.c + eng_rdrand.c LIBOBJ= eng_err.o eng_lib.o eng_list.o eng_init.o eng_ctrl.o \ eng_table.o eng_pkey.o eng_fat.o eng_all.o \ tb_rsa.o tb_dsa.o tb_ecdsa.o tb_dh.o tb_ecdh.o tb_rand.o tb_store.o \ tb_cipher.o tb_digest.o tb_pkmeth.o tb_asnmth.o \ eng_openssl.o eng_cnf.o eng_dyn.o eng_cryptodev.o \ - eng_rsax.o eng_rdrand.o + eng_rdrand.o SRC= $(LIBSRC) @@ -265,20 +265,6 @@ eng_rdrand.o: ../../include/openssl/safestack.h ../../include/openssl/sha.h eng_rdrand.o: ../../include/openssl/stack.h ../../include/openssl/symhacks.h eng_rdrand.o: ../../include/openssl/x509.h ../../include/openssl/x509_vfy.h eng_rdrand.o: eng_rdrand.c -eng_rsax.o: ../../include/openssl/asn1.h ../../include/openssl/bio.h -eng_rsax.o: ../../include/openssl/bn.h ../../include/openssl/buffer.h -eng_rsax.o: ../../include/openssl/crypto.h ../../include/openssl/e_os2.h -eng_rsax.o: ../../include/openssl/ec.h ../../include/openssl/ecdh.h -eng_rsax.o: ../../include/openssl/ecdsa.h ../../include/openssl/engine.h -eng_rsax.o: ../../include/openssl/err.h ../../include/openssl/evp.h -eng_rsax.o: ../../include/openssl/lhash.h ../../include/openssl/obj_mac.h -eng_rsax.o: ../../include/openssl/objects.h ../../include/openssl/opensslconf.h -eng_rsax.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h -eng_rsax.o: ../../include/openssl/pkcs7.h ../../include/openssl/rsa.h -eng_rsax.o: ../../include/openssl/safestack.h ../../include/openssl/sha.h -eng_rsax.o: ../../include/openssl/stack.h ../../include/openssl/symhacks.h -eng_rsax.o: ../../include/openssl/x509.h ../../include/openssl/x509_vfy.h -eng_rsax.o: eng_rsax.c eng_table.o: ../../e_os.h ../../include/openssl/asn1.h eng_table.o: ../../include/openssl/bio.h ../../include/openssl/buffer.h eng_table.o: ../../include/openssl/crypto.h ../../include/openssl/e_os2.h diff --git a/deps/openssl/openssl/crypto/engine/eng_all.c b/deps/openssl/openssl/crypto/engine/eng_all.c index 7edf12e7e174d7..195a3a95542a7a 100644 --- a/deps/openssl/openssl/crypto/engine/eng_all.c +++ b/deps/openssl/openssl/crypto/engine/eng_all.c @@ -76,9 +76,6 @@ void ENGINE_load_builtin_engines(void) #if !defined(OPENSSL_NO_HW) && (defined(__OpenBSD__) || defined(__FreeBSD__) || defined(HAVE_CRYPTODEV)) ENGINE_load_cryptodev(); #endif -#ifndef OPENSSL_NO_RSAX - ENGINE_load_rsax(); -#endif #ifndef OPENSSL_NO_RDRAND ENGINE_load_rdrand(); #endif diff --git a/deps/openssl/openssl/crypto/engine/eng_cryptodev.c b/deps/openssl/openssl/crypto/engine/eng_cryptodev.c index bcb936dfa75447..926d95c0d7fc5d 100644 --- a/deps/openssl/openssl/crypto/engine/eng_cryptodev.c +++ b/deps/openssl/openssl/crypto/engine/eng_cryptodev.c @@ -54,10 +54,10 @@ void ENGINE_load_cryptodev(void) # include # include -# include -# include -# include -# include +# include +# include +# include +# include # include # include # include @@ -160,6 +160,17 @@ static struct { { CRYPTO_AES_CBC, NID_aes_256_cbc, 16, 32, }, +# ifdef CRYPTO_AES_CTR + { + CRYPTO_AES_CTR, NID_aes_128_ctr, 14, 16, + }, + { + CRYPTO_AES_CTR, NID_aes_192_ctr, 14, 24, + }, + { + CRYPTO_AES_CTR, NID_aes_256_ctr, 14, 32, + }, +# endif { CRYPTO_BLF_CBC, NID_bf_cbc, 8, 16, }, @@ -630,6 +641,46 @@ const EVP_CIPHER cryptodev_aes_256_cbc = { NULL }; +# ifdef CRYPTO_AES_CTR +const EVP_CIPHER cryptodev_aes_ctr = { + NID_aes_128_ctr, + 16, 16, 14, + EVP_CIPH_CTR_MODE, + cryptodev_init_key, + cryptodev_cipher, + cryptodev_cleanup, + sizeof(struct dev_crypto_state), + EVP_CIPHER_set_asn1_iv, + EVP_CIPHER_get_asn1_iv, + NULL +}; + +const EVP_CIPHER cryptodev_aes_ctr_192 = { + NID_aes_192_ctr, + 16, 24, 14, + EVP_CIPH_CTR_MODE, + cryptodev_init_key, + cryptodev_cipher, + cryptodev_cleanup, + sizeof(struct dev_crypto_state), + EVP_CIPHER_set_asn1_iv, + EVP_CIPHER_get_asn1_iv, + NULL +}; + +const EVP_CIPHER cryptodev_aes_ctr_256 = { + NID_aes_256_ctr, + 16, 32, 14, + EVP_CIPH_CTR_MODE, + cryptodev_init_key, + cryptodev_cipher, + cryptodev_cleanup, + sizeof(struct dev_crypto_state), + EVP_CIPHER_set_asn1_iv, + EVP_CIPHER_get_asn1_iv, + NULL +}; +# endif /* * Registered by the ENGINE when used to find out how to deal with * a particular NID in the ENGINE. this says what we'll do at the @@ -667,6 +718,17 @@ cryptodev_engine_ciphers(ENGINE *e, const EVP_CIPHER **cipher, case NID_aes_256_cbc: *cipher = &cryptodev_aes_256_cbc; break; +# ifdef CRYPTO_AES_CTR + case NID_aes_128_ctr: + *cipher = &cryptodev_aes_ctr; + break; + case NID_aes_192_ctr: + *cipher = &cryptodev_aes_ctr_192; + break; + case NID_aes_256_ctr: + *cipher = &cryptodev_aes_ctr_256; + break; +# endif default: *cipher = NULL; break; diff --git a/deps/openssl/openssl/crypto/engine/eng_rsax.c b/deps/openssl/openssl/crypto/engine/eng_rsax.c deleted file mode 100644 index 86ee9d89399544..00000000000000 --- a/deps/openssl/openssl/crypto/engine/eng_rsax.c +++ /dev/null @@ -1,701 +0,0 @@ -/* crypto/engine/eng_rsax.c */ -/* Copyright (c) 2010-2010 Intel Corp. - * Author: Vinodh.Gopal@intel.com - * Jim Guilford - * Erdinc.Ozturk@intel.com - * Maxim.Perminov@intel.com - * Ying.Huang@intel.com - * - * More information about algorithm used can be found at: - * http://www.cse.buffalo.edu/srds2009/escs2009_submission_Gopal.pdf - */ -/* ==================================================================== - * Copyright (c) 1999-2001 The OpenSSL Project. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * - * 3. All advertising materials mentioning features or use of this - * software must display the following acknowledgment: - * "This product includes software developed by the OpenSSL Project - * for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)" - * - * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to - * endorse or promote products derived from this software without - * prior written permission. For written permission, please contact - * licensing@OpenSSL.org. - * - * 5. Products derived from this software may not be called "OpenSSL" - * nor may "OpenSSL" appear in their names without prior written - * permission of the OpenSSL Project. - * - * 6. Redistributions of any form whatsoever must retain the following - * acknowledgment: - * "This product includes software developed by the OpenSSL Project - * for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)" - * - * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY - * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR - * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, - * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED - * OF THE POSSIBILITY OF SUCH DAMAGE. - * ==================================================================== - * - * This product includes cryptographic software written by Eric Young - * (eay@cryptsoft.com). This product includes software written by Tim - * Hudson (tjh@cryptsoft.com). - */ - -#include - -#include -#include -#include -#include -#include -#ifndef OPENSSL_NO_RSA -# include -#endif -#include -#include - -/* RSAX is available **ONLY* on x86_64 CPUs */ -#undef COMPILE_RSAX - -#if (defined(__x86_64) || defined(__x86_64__) || \ - defined(_M_AMD64) || defined (_M_X64)) && !defined(OPENSSL_NO_ASM) -# define COMPILE_RSAX -static ENGINE *ENGINE_rsax(void); -#endif - -void ENGINE_load_rsax(void) -{ -/* On non-x86 CPUs it just returns. */ -#ifdef COMPILE_RSAX - ENGINE *toadd = ENGINE_rsax(); - if (!toadd) - return; - ENGINE_add(toadd); - ENGINE_free(toadd); - ERR_clear_error(); -#endif -} - -#ifdef COMPILE_RSAX -# define E_RSAX_LIB_NAME "rsax engine" - -static int e_rsax_destroy(ENGINE *e); -static int e_rsax_init(ENGINE *e); -static int e_rsax_finish(ENGINE *e); -static int e_rsax_ctrl(ENGINE *e, int cmd, long i, void *p, void (*f) (void)); - -# ifndef OPENSSL_NO_RSA -/* RSA stuff */ -static int e_rsax_rsa_mod_exp(BIGNUM *r, const BIGNUM *I, RSA *rsa, - BN_CTX *ctx); -static int e_rsax_rsa_finish(RSA *r); -# endif - -static const ENGINE_CMD_DEFN e_rsax_cmd_defns[] = { - {0, NULL, NULL, 0} -}; - -# ifndef OPENSSL_NO_RSA -/* Our internal RSA_METHOD that we provide pointers to */ -static RSA_METHOD e_rsax_rsa = { - "Intel RSA-X method", - NULL, - NULL, - NULL, - NULL, - e_rsax_rsa_mod_exp, - NULL, - NULL, - e_rsax_rsa_finish, - RSA_FLAG_CACHE_PUBLIC | RSA_FLAG_CACHE_PRIVATE, - NULL, - NULL, - NULL -}; -# endif - -/* Constants used when creating the ENGINE */ -static const char *engine_e_rsax_id = "rsax"; -static const char *engine_e_rsax_name = "RSAX engine support"; - -/* This internal function is used by ENGINE_rsax() */ -static int bind_helper(ENGINE *e) -{ -# ifndef OPENSSL_NO_RSA - const RSA_METHOD *meth1; -# endif - if (!ENGINE_set_id(e, engine_e_rsax_id) || - !ENGINE_set_name(e, engine_e_rsax_name) || -# ifndef OPENSSL_NO_RSA - !ENGINE_set_RSA(e, &e_rsax_rsa) || -# endif - !ENGINE_set_destroy_function(e, e_rsax_destroy) || - !ENGINE_set_init_function(e, e_rsax_init) || - !ENGINE_set_finish_function(e, e_rsax_finish) || - !ENGINE_set_ctrl_function(e, e_rsax_ctrl) || - !ENGINE_set_cmd_defns(e, e_rsax_cmd_defns)) - return 0; - -# ifndef OPENSSL_NO_RSA - meth1 = RSA_PKCS1_SSLeay(); - e_rsax_rsa.rsa_pub_enc = meth1->rsa_pub_enc; - e_rsax_rsa.rsa_pub_dec = meth1->rsa_pub_dec; - e_rsax_rsa.rsa_priv_enc = meth1->rsa_priv_enc; - e_rsax_rsa.rsa_priv_dec = meth1->rsa_priv_dec; - e_rsax_rsa.bn_mod_exp = meth1->bn_mod_exp; -# endif - return 1; -} - -static ENGINE *ENGINE_rsax(void) -{ - ENGINE *ret = ENGINE_new(); - if (!ret) - return NULL; - if (!bind_helper(ret)) { - ENGINE_free(ret); - return NULL; - } - return ret; -} - -# ifndef OPENSSL_NO_RSA -/* Used to attach our own key-data to an RSA structure */ -static int rsax_ex_data_idx = -1; -# endif - -static int e_rsax_destroy(ENGINE *e) -{ - return 1; -} - -/* (de)initialisation functions. */ -static int e_rsax_init(ENGINE *e) -{ -# ifndef OPENSSL_NO_RSA - if (rsax_ex_data_idx == -1) - rsax_ex_data_idx = RSA_get_ex_new_index(0, NULL, NULL, NULL, NULL); -# endif - if (rsax_ex_data_idx == -1) - return 0; - return 1; -} - -static int e_rsax_finish(ENGINE *e) -{ - return 1; -} - -static int e_rsax_ctrl(ENGINE *e, int cmd, long i, void *p, void (*f) (void)) -{ - int to_return = 1; - - switch (cmd) { - /* The command isn't understood by this engine */ - default: - to_return = 0; - break; - } - - return to_return; -} - -# ifndef OPENSSL_NO_RSA - -# ifdef _WIN32 -typedef unsigned __int64 UINT64; -# else -typedef unsigned long long UINT64; -# endif -typedef unsigned short UINT16; - -/* - * Table t is interleaved in the following manner: The order in memory is - * t[0][0], t[0][1], ..., t[0][7], t[1][0], ... A particular 512-bit value is - * stored in t[][index] rather than the more normal t[index][]; i.e. the - * qwords of a particular entry in t are not adjacent in memory - */ - -/* Init BIGNUM b from the interleaved UINT64 array */ -static int interleaved_array_to_bn_512(BIGNUM *b, UINT64 *array); - -/* - * Extract array elements from BIGNUM b To set the whole array from b, call - * with n=8 - */ -static int bn_extract_to_array_512(const BIGNUM *b, unsigned int n, - UINT64 *array); - -struct mod_ctx_512 { - UINT64 t[8][8]; - UINT64 m[8]; - UINT64 m1[8]; /* 2^278 % m */ - UINT64 m2[8]; /* 2^640 % m */ - UINT64 k1[2]; /* (- 1/m) % 2^128 */ -}; - -static int mod_exp_pre_compute_data_512(UINT64 *m, struct mod_ctx_512 *data); - -void mod_exp_512(UINT64 *result, /* 512 bits, 8 qwords */ - UINT64 *g, /* 512 bits, 8 qwords */ - UINT64 *exp, /* 512 bits, 8 qwords */ - struct mod_ctx_512 *data); - -typedef struct st_e_rsax_mod_ctx { - UINT64 type; - union { - struct mod_ctx_512 b512; - } ctx; - -} E_RSAX_MOD_CTX; - -static E_RSAX_MOD_CTX *e_rsax_get_ctx(RSA *rsa, int idx, BIGNUM *m) -{ - E_RSAX_MOD_CTX *hptr; - - if (idx < 0 || idx > 2) - return NULL; - - hptr = RSA_get_ex_data(rsa, rsax_ex_data_idx); - if (!hptr) { - hptr = OPENSSL_malloc(3 * sizeof(E_RSAX_MOD_CTX)); - if (!hptr) - return NULL; - hptr[2].type = hptr[1].type = hptr[0].type = 0; - RSA_set_ex_data(rsa, rsax_ex_data_idx, hptr); - } - - if (hptr[idx].type == (UINT64)BN_num_bits(m)) - return hptr + idx; - - if (BN_num_bits(m) == 512) { - UINT64 _m[8]; - bn_extract_to_array_512(m, 8, _m); - memset(&hptr[idx].ctx.b512, 0, sizeof(struct mod_ctx_512)); - mod_exp_pre_compute_data_512(_m, &hptr[idx].ctx.b512); - } - - hptr[idx].type = BN_num_bits(m); - return hptr + idx; -} - -static int e_rsax_rsa_finish(RSA *rsa) -{ - E_RSAX_MOD_CTX *hptr = RSA_get_ex_data(rsa, rsax_ex_data_idx); - if (hptr) { - OPENSSL_free(hptr); - RSA_set_ex_data(rsa, rsax_ex_data_idx, NULL); - } - if (rsa->_method_mod_n) - BN_MONT_CTX_free(rsa->_method_mod_n); - if (rsa->_method_mod_p) - BN_MONT_CTX_free(rsa->_method_mod_p); - if (rsa->_method_mod_q) - BN_MONT_CTX_free(rsa->_method_mod_q); - return 1; -} - -static int e_rsax_bn_mod_exp(BIGNUM *r, const BIGNUM *g, const BIGNUM *e, - const BIGNUM *m, BN_CTX *ctx, - BN_MONT_CTX *in_mont, - E_RSAX_MOD_CTX *rsax_mod_ctx) -{ - if (rsax_mod_ctx && BN_get_flags(e, BN_FLG_CONSTTIME) != 0) { - if (BN_num_bits(m) == 512) { - UINT64 _r[8]; - UINT64 _g[8]; - UINT64 _e[8]; - - /* Init the arrays from the BIGNUMs */ - bn_extract_to_array_512(g, 8, _g); - bn_extract_to_array_512(e, 8, _e); - - mod_exp_512(_r, _g, _e, &rsax_mod_ctx->ctx.b512); - /* Return the result in the BIGNUM */ - interleaved_array_to_bn_512(r, _r); - return 1; - } - } - - return BN_mod_exp_mont(r, g, e, m, ctx, in_mont); -} - -/* - * Declares for the Intel CIAP 512-bit / CRT / 1024 bit RSA modular - * exponentiation routine precalculations and a structure to hold the - * necessary values. These files are meant to live in crypto/rsa/ in the - * target openssl. - */ - -/* - * Local method: extracts a piece from a BIGNUM, to fit it into - * an array. Call with n=8 to extract an entire 512-bit BIGNUM - */ -static int bn_extract_to_array_512(const BIGNUM *b, unsigned int n, - UINT64 *array) -{ - int i; - UINT64 tmp; - unsigned char bn_buff[64]; - memset(bn_buff, 0, 64); - if (BN_num_bytes(b) > 64) { - printf("Can't support this byte size\n"); - return 0; - } - if (BN_num_bytes(b) != 0) { - if (!BN_bn2bin(b, bn_buff + (64 - BN_num_bytes(b)))) { - printf("Error's in bn2bin\n"); - /* We have to error, here */ - return 0; - } - } - while (n-- > 0) { - array[n] = 0; - for (i = 7; i >= 0; i--) { - tmp = bn_buff[63 - (n * 8 + i)]; - array[n] |= tmp << (8 * i); - } - } - return 1; -} - -/* Init a 512-bit BIGNUM from the UINT64*_ (8 * 64) interleaved array */ -static int interleaved_array_to_bn_512(BIGNUM *b, UINT64 *array) -{ - unsigned char tmp[64]; - int n = 8; - int i; - while (n-- > 0) { - for (i = 7; i >= 0; i--) { - tmp[63 - (n * 8 + i)] = (unsigned char)(array[n] >> (8 * i)); - }} - BN_bin2bn(tmp, 64, b); - return 0; -} - -/* The main 512bit precompute call */ -static int mod_exp_pre_compute_data_512(UINT64 *m, struct mod_ctx_512 *data) -{ - BIGNUM two_768, two_640, two_128, two_512, tmp, _m, tmp2; - - /* We need a BN_CTX for the modulo functions */ - BN_CTX *ctx; - /* Some tmps */ - UINT64 _t[8]; - int i, j, ret = 0; - - /* Init _m with m */ - BN_init(&_m); - interleaved_array_to_bn_512(&_m, m); - memset(_t, 0, 64); - - /* Inits */ - BN_init(&two_768); - BN_init(&two_640); - BN_init(&two_128); - BN_init(&two_512); - BN_init(&tmp); - BN_init(&tmp2); - - /* Create our context */ - if ((ctx = BN_CTX_new()) == NULL) { - goto err; - } - BN_CTX_start(ctx); - - /* - * For production, if you care, these only need to be set once, - * and may be made constants. - */ - BN_lshift(&two_768, BN_value_one(), 768); - BN_lshift(&two_640, BN_value_one(), 640); - BN_lshift(&two_128, BN_value_one(), 128); - BN_lshift(&two_512, BN_value_one(), 512); - - if (0 == (m[7] & 0x8000000000000000)) { - goto err; - } - if (0 == (m[0] & 0x1)) { /* Odd modulus required for Mont */ - goto err; - } - - /* Precompute m1 */ - BN_mod(&tmp, &two_768, &_m, ctx); - if (!bn_extract_to_array_512(&tmp, 8, &data->m1[0])) { - goto err; - } - - /* Precompute m2 */ - BN_mod(&tmp, &two_640, &_m, ctx); - if (!bn_extract_to_array_512(&tmp, 8, &data->m2[0])) { - goto err; - } - - /* - * Precompute k1, a 128b number = ((-1)* m-1 ) mod 2128; k1 should - * be non-negative. - */ - BN_mod_inverse(&tmp, &_m, &two_128, ctx); - if (!BN_is_zero(&tmp)) { - BN_sub(&tmp, &two_128, &tmp); - } - if (!bn_extract_to_array_512(&tmp, 2, &data->k1[0])) { - goto err; - } - - /* Precompute t */ - for (i = 0; i < 8; i++) { - BN_zero(&tmp); - if (i & 1) { - BN_add(&tmp, &two_512, &tmp); - } - if (i & 2) { - BN_add(&tmp, &two_512, &tmp); - } - if (i & 4) { - BN_add(&tmp, &two_640, &tmp); - } - - BN_nnmod(&tmp2, &tmp, &_m, ctx); - if (!bn_extract_to_array_512(&tmp2, 8, _t)) { - goto err; - } - for (j = 0; j < 8; j++) - data->t[j][i] = _t[j]; - } - - /* Precompute m */ - for (i = 0; i < 8; i++) { - data->m[i] = m[i]; - } - - ret = 1; - - err: - /* Cleanup */ - if (ctx != NULL) { - BN_CTX_end(ctx); - BN_CTX_free(ctx); - } - BN_free(&two_768); - BN_free(&two_640); - BN_free(&two_128); - BN_free(&two_512); - BN_free(&tmp); - BN_free(&tmp2); - BN_free(&_m); - - return ret; -} - -static int e_rsax_rsa_mod_exp(BIGNUM *r0, const BIGNUM *I, RSA *rsa, - BN_CTX *ctx) -{ - BIGNUM *r1, *m1, *vrfy; - BIGNUM local_dmp1, local_dmq1, local_c, local_r1; - BIGNUM *dmp1, *dmq1, *c, *pr1; - int ret = 0; - - BN_CTX_start(ctx); - r1 = BN_CTX_get(ctx); - m1 = BN_CTX_get(ctx); - vrfy = BN_CTX_get(ctx); - - { - BIGNUM local_p, local_q; - BIGNUM *p = NULL, *q = NULL; - int error = 0; - - /* - * Make sure BN_mod_inverse in Montgomery intialization uses the - * BN_FLG_CONSTTIME flag (unless RSA_FLAG_NO_CONSTTIME is set) - */ - if (!(rsa->flags & RSA_FLAG_NO_CONSTTIME)) { - BN_init(&local_p); - p = &local_p; - BN_with_flags(p, rsa->p, BN_FLG_CONSTTIME); - - BN_init(&local_q); - q = &local_q; - BN_with_flags(q, rsa->q, BN_FLG_CONSTTIME); - } else { - p = rsa->p; - q = rsa->q; - } - - if (rsa->flags & RSA_FLAG_CACHE_PRIVATE) { - if (!BN_MONT_CTX_set_locked - (&rsa->_method_mod_p, CRYPTO_LOCK_RSA, p, ctx)) - error = 1; - if (!BN_MONT_CTX_set_locked - (&rsa->_method_mod_q, CRYPTO_LOCK_RSA, q, ctx)) - error = 1; - } - - /* clean up */ - if (!(rsa->flags & RSA_FLAG_NO_CONSTTIME)) { - BN_free(&local_p); - BN_free(&local_q); - } - if (error) - goto err; - } - - if (rsa->flags & RSA_FLAG_CACHE_PUBLIC) - if (!BN_MONT_CTX_set_locked - (&rsa->_method_mod_n, CRYPTO_LOCK_RSA, rsa->n, ctx)) - goto err; - - /* compute I mod q */ - if (!(rsa->flags & RSA_FLAG_NO_CONSTTIME)) { - c = &local_c; - BN_with_flags(c, I, BN_FLG_CONSTTIME); - if (!BN_mod(r1, c, rsa->q, ctx)) - goto err; - } else { - if (!BN_mod(r1, I, rsa->q, ctx)) - goto err; - } - - /* compute r1^dmq1 mod q */ - if (!(rsa->flags & RSA_FLAG_NO_CONSTTIME)) { - dmq1 = &local_dmq1; - BN_with_flags(dmq1, rsa->dmq1, BN_FLG_CONSTTIME); - } else - dmq1 = rsa->dmq1; - - if (!e_rsax_bn_mod_exp(m1, r1, dmq1, rsa->q, ctx, - rsa->_method_mod_q, e_rsax_get_ctx(rsa, 0, - rsa->q))) - goto err; - - /* compute I mod p */ - if (!(rsa->flags & RSA_FLAG_NO_CONSTTIME)) { - c = &local_c; - BN_with_flags(c, I, BN_FLG_CONSTTIME); - if (!BN_mod(r1, c, rsa->p, ctx)) - goto err; - } else { - if (!BN_mod(r1, I, rsa->p, ctx)) - goto err; - } - - /* compute r1^dmp1 mod p */ - if (!(rsa->flags & RSA_FLAG_NO_CONSTTIME)) { - dmp1 = &local_dmp1; - BN_with_flags(dmp1, rsa->dmp1, BN_FLG_CONSTTIME); - } else - dmp1 = rsa->dmp1; - - if (!e_rsax_bn_mod_exp(r0, r1, dmp1, rsa->p, ctx, - rsa->_method_mod_p, e_rsax_get_ctx(rsa, 1, - rsa->p))) - goto err; - - if (!BN_sub(r0, r0, m1)) - goto err; - /* - * This will help stop the size of r0 increasing, which does affect the - * multiply if it optimised for a power of 2 size - */ - if (BN_is_negative(r0)) - if (!BN_add(r0, r0, rsa->p)) - goto err; - - if (!BN_mul(r1, r0, rsa->iqmp, ctx)) - goto err; - - /* Turn BN_FLG_CONSTTIME flag on before division operation */ - if (!(rsa->flags & RSA_FLAG_NO_CONSTTIME)) { - pr1 = &local_r1; - BN_with_flags(pr1, r1, BN_FLG_CONSTTIME); - } else - pr1 = r1; - if (!BN_mod(r0, pr1, rsa->p, ctx)) - goto err; - - /* - * If p < q it is occasionally possible for the correction of adding 'p' - * if r0 is negative above to leave the result still negative. This can - * break the private key operations: the following second correction - * should *always* correct this rare occurrence. This will *never* happen - * with OpenSSL generated keys because they ensure p > q [steve] - */ - if (BN_is_negative(r0)) - if (!BN_add(r0, r0, rsa->p)) - goto err; - if (!BN_mul(r1, r0, rsa->q, ctx)) - goto err; - if (!BN_add(r0, r1, m1)) - goto err; - - if (rsa->e && rsa->n) { - if (!e_rsax_bn_mod_exp - (vrfy, r0, rsa->e, rsa->n, ctx, rsa->_method_mod_n, - e_rsax_get_ctx(rsa, 2, rsa->n))) - goto err; - - /* - * If 'I' was greater than (or equal to) rsa->n, the operation will - * be equivalent to using 'I mod n'. However, the result of the - * verify will *always* be less than 'n' so we don't check for - * absolute equality, just congruency. - */ - if (!BN_sub(vrfy, vrfy, I)) - goto err; - if (!BN_mod(vrfy, vrfy, rsa->n, ctx)) - goto err; - if (BN_is_negative(vrfy)) - if (!BN_add(vrfy, vrfy, rsa->n)) - goto err; - if (!BN_is_zero(vrfy)) { - /* - * 'I' and 'vrfy' aren't congruent mod n. Don't leak - * miscalculated CRT output, just do a raw (slower) mod_exp and - * return that instead. - */ - - BIGNUM local_d; - BIGNUM *d = NULL; - - if (!(rsa->flags & RSA_FLAG_NO_CONSTTIME)) { - d = &local_d; - BN_with_flags(d, rsa->d, BN_FLG_CONSTTIME); - } else - d = rsa->d; - if (!e_rsax_bn_mod_exp(r0, I, d, rsa->n, ctx, - rsa->_method_mod_n, e_rsax_get_ctx(rsa, 2, - rsa->n))) - goto err; - } - } - ret = 1; - - err: - BN_CTX_end(ctx); - - return ret; -} -# endif /* !OPENSSL_NO_RSA */ -#endif /* !COMPILE_RSAX */ diff --git a/deps/openssl/openssl/crypto/engine/eng_table.c b/deps/openssl/openssl/crypto/engine/eng_table.c index 2e1a7e855ae22a..27d31f70c85544 100644 --- a/deps/openssl/openssl/crypto/engine/eng_table.c +++ b/deps/openssl/openssl/crypto/engine/eng_table.c @@ -351,6 +351,8 @@ void engine_table_doall(ENGINE_TABLE *table, engine_table_doall_cb *cb, ENGINE_PILE_DOALL dall; dall.cb = cb; dall.arg = arg; - lh_ENGINE_PILE_doall_arg(&table->piles, LHASH_DOALL_ARG_FN(int_cb), - ENGINE_PILE_DOALL, &dall); + if (table) + lh_ENGINE_PILE_doall_arg(&table->piles, + LHASH_DOALL_ARG_FN(int_cb), + ENGINE_PILE_DOALL, &dall); } diff --git a/deps/openssl/openssl/crypto/engine/engine.h b/deps/openssl/openssl/crypto/engine/engine.h index e81096ae02a662..bd7b591447dd42 100644 --- a/deps/openssl/openssl/crypto/engine/engine.h +++ b/deps/openssl/openssl/crypto/engine/engine.h @@ -413,7 +413,6 @@ void ENGINE_load_gost(void); # endif # endif void ENGINE_load_cryptodev(void); -void ENGINE_load_rsax(void); void ENGINE_load_rdrand(void); void ENGINE_load_builtin_engines(void); diff --git a/deps/openssl/openssl/crypto/err/openssl.ec b/deps/openssl/openssl/crypto/err/openssl.ec index bafbc35d3065ea..139afe3234f537 100644 --- a/deps/openssl/openssl/crypto/err/openssl.ec +++ b/deps/openssl/openssl/crypto/err/openssl.ec @@ -41,6 +41,7 @@ L NONE crypto/x509/x509_vfy.h NONE L NONE crypto/ec/ec_lcl.h NONE L NONE crypto/asn1/asn_lcl.h NONE L NONE crypto/cms/cms_lcl.h NONE +L NONE ssl/ssl_locl.h NONE F RSAREF_F_RSA_BN2BIN diff --git a/deps/openssl/openssl/crypto/evp/Makefile b/deps/openssl/openssl/crypto/evp/Makefile index ef75678ad84d10..c9afca7cb2d917 100644 --- a/deps/openssl/openssl/crypto/evp/Makefile +++ b/deps/openssl/openssl/crypto/evp/Makefile @@ -28,8 +28,8 @@ LIBSRC= encode.c digest.c evp_enc.c evp_key.c evp_acnf.c evp_cnf.c \ bio_md.c bio_b64.c bio_enc.c evp_err.c e_null.c \ c_all.c c_allc.c c_alld.c evp_lib.c bio_ok.c \ evp_pkey.c evp_pbe.c p5_crpt.c p5_crpt2.c \ - e_old.c pmeth_lib.c pmeth_fn.c pmeth_gn.c m_sigver.c evp_fips.c \ - e_aes_cbc_hmac_sha1.c e_rc4_hmac_md5.c + e_old.c pmeth_lib.c pmeth_fn.c pmeth_gn.c m_sigver.c \ + e_aes_cbc_hmac_sha1.c e_aes_cbc_hmac_sha256.c e_rc4_hmac_md5.c LIBOBJ= encode.o digest.o evp_enc.o evp_key.o evp_acnf.o evp_cnf.o \ e_des.o e_bf.o e_idea.o e_des3.o e_camellia.o\ @@ -41,8 +41,8 @@ LIBOBJ= encode.o digest.o evp_enc.o evp_key.o evp_acnf.o evp_cnf.o \ bio_md.o bio_b64.o bio_enc.o evp_err.o e_null.o \ c_all.o c_allc.o c_alld.o evp_lib.o bio_ok.o \ evp_pkey.o evp_pbe.o p5_crpt.o p5_crpt2.o \ - e_old.o pmeth_lib.o pmeth_fn.o pmeth_gn.o m_sigver.o evp_fips.o \ - e_aes_cbc_hmac_sha1.o e_rc4_hmac_md5.o + e_old.o pmeth_lib.o pmeth_fn.o pmeth_gn.o m_sigver.o \ + e_aes_cbc_hmac_sha1.o e_aes_cbc_hmac_sha256.o e_rc4_hmac_md5.o SRC= $(LIBSRC) @@ -202,16 +202,36 @@ e_aes_cbc_hmac_sha1.o: ../../include/openssl/bio.h e_aes_cbc_hmac_sha1.o: ../../include/openssl/crypto.h e_aes_cbc_hmac_sha1.o: ../../include/openssl/e_os2.h e_aes_cbc_hmac_sha1.o: ../../include/openssl/evp.h +e_aes_cbc_hmac_sha1.o: ../../include/openssl/modes.h e_aes_cbc_hmac_sha1.o: ../../include/openssl/obj_mac.h e_aes_cbc_hmac_sha1.o: ../../include/openssl/objects.h e_aes_cbc_hmac_sha1.o: ../../include/openssl/opensslconf.h e_aes_cbc_hmac_sha1.o: ../../include/openssl/opensslv.h e_aes_cbc_hmac_sha1.o: ../../include/openssl/ossl_typ.h +e_aes_cbc_hmac_sha1.o: ../../include/openssl/rand.h e_aes_cbc_hmac_sha1.o: ../../include/openssl/safestack.h e_aes_cbc_hmac_sha1.o: ../../include/openssl/sha.h e_aes_cbc_hmac_sha1.o: ../../include/openssl/stack.h -e_aes_cbc_hmac_sha1.o: ../../include/openssl/symhacks.h e_aes_cbc_hmac_sha1.c -e_aes_cbc_hmac_sha1.o: evp_locl.h +e_aes_cbc_hmac_sha1.o: ../../include/openssl/symhacks.h ../modes/modes_lcl.h +e_aes_cbc_hmac_sha1.o: e_aes_cbc_hmac_sha1.c +e_aes_cbc_hmac_sha256.o: ../../include/openssl/aes.h +e_aes_cbc_hmac_sha256.o: ../../include/openssl/asn1.h +e_aes_cbc_hmac_sha256.o: ../../include/openssl/bio.h +e_aes_cbc_hmac_sha256.o: ../../include/openssl/crypto.h +e_aes_cbc_hmac_sha256.o: ../../include/openssl/e_os2.h +e_aes_cbc_hmac_sha256.o: ../../include/openssl/evp.h +e_aes_cbc_hmac_sha256.o: ../../include/openssl/modes.h +e_aes_cbc_hmac_sha256.o: ../../include/openssl/obj_mac.h +e_aes_cbc_hmac_sha256.o: ../../include/openssl/objects.h +e_aes_cbc_hmac_sha256.o: ../../include/openssl/opensslconf.h +e_aes_cbc_hmac_sha256.o: ../../include/openssl/opensslv.h +e_aes_cbc_hmac_sha256.o: ../../include/openssl/ossl_typ.h +e_aes_cbc_hmac_sha256.o: ../../include/openssl/rand.h +e_aes_cbc_hmac_sha256.o: ../../include/openssl/safestack.h +e_aes_cbc_hmac_sha256.o: ../../include/openssl/sha.h +e_aes_cbc_hmac_sha256.o: ../../include/openssl/stack.h +e_aes_cbc_hmac_sha256.o: ../../include/openssl/symhacks.h ../modes/modes_lcl.h +e_aes_cbc_hmac_sha256.o: e_aes_cbc_hmac_sha256.c e_bf.o: ../../e_os.h ../../include/openssl/asn1.h ../../include/openssl/bio.h e_bf.o: ../../include/openssl/blowfish.h ../../include/openssl/buffer.h e_bf.o: ../../include/openssl/crypto.h ../../include/openssl/e_os2.h @@ -225,11 +245,13 @@ e_camellia.o: ../../include/openssl/asn1.h ../../include/openssl/bio.h e_camellia.o: ../../include/openssl/camellia.h ../../include/openssl/crypto.h e_camellia.o: ../../include/openssl/e_os2.h ../../include/openssl/err.h e_camellia.o: ../../include/openssl/evp.h ../../include/openssl/lhash.h -e_camellia.o: ../../include/openssl/obj_mac.h ../../include/openssl/objects.h +e_camellia.o: ../../include/openssl/modes.h ../../include/openssl/obj_mac.h +e_camellia.o: ../../include/openssl/objects.h e_camellia.o: ../../include/openssl/opensslconf.h e_camellia.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h e_camellia.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h -e_camellia.o: ../../include/openssl/symhacks.h e_camellia.c evp_locl.h +e_camellia.o: ../../include/openssl/symhacks.h ../modes/modes_lcl.h +e_camellia.o: e_camellia.c evp_locl.h e_cast.o: ../../e_os.h ../../include/openssl/asn1.h ../../include/openssl/bio.h e_cast.o: ../../include/openssl/buffer.h ../../include/openssl/cast.h e_cast.o: ../../include/openssl/crypto.h ../../include/openssl/e_os2.h @@ -258,9 +280,10 @@ e_des3.o: ../../include/openssl/evp.h ../../include/openssl/lhash.h e_des3.o: ../../include/openssl/obj_mac.h ../../include/openssl/objects.h e_des3.o: ../../include/openssl/opensslconf.h ../../include/openssl/opensslv.h e_des3.o: ../../include/openssl/ossl_typ.h ../../include/openssl/rand.h -e_des3.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h -e_des3.o: ../../include/openssl/symhacks.h ../../include/openssl/ui.h -e_des3.o: ../../include/openssl/ui_compat.h ../cryptlib.h e_des3.c evp_locl.h +e_des3.o: ../../include/openssl/safestack.h ../../include/openssl/sha.h +e_des3.o: ../../include/openssl/stack.h ../../include/openssl/symhacks.h +e_des3.o: ../../include/openssl/ui.h ../../include/openssl/ui_compat.h +e_des3.o: ../cryptlib.h e_des3.c evp_locl.h e_idea.o: ../../e_os.h ../../include/openssl/asn1.h ../../include/openssl/bio.h e_idea.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h e_idea.o: ../../include/openssl/e_os2.h ../../include/openssl/err.h @@ -392,13 +415,6 @@ evp_err.o: ../../include/openssl/objects.h ../../include/openssl/opensslconf.h evp_err.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h evp_err.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h evp_err.o: ../../include/openssl/symhacks.h evp_err.c -evp_fips.o: ../../include/openssl/asn1.h ../../include/openssl/bio.h -evp_fips.o: ../../include/openssl/crypto.h ../../include/openssl/e_os2.h -evp_fips.o: ../../include/openssl/evp.h ../../include/openssl/obj_mac.h -evp_fips.o: ../../include/openssl/objects.h ../../include/openssl/opensslconf.h -evp_fips.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h -evp_fips.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h -evp_fips.o: ../../include/openssl/symhacks.h evp_fips.c evp_key.o: ../../e_os.h ../../include/openssl/asn1.h evp_key.o: ../../include/openssl/bio.h ../../include/openssl/buffer.h evp_key.o: ../../include/openssl/crypto.h ../../include/openssl/e_os2.h diff --git a/deps/openssl/openssl/crypto/evp/c_allc.c b/deps/openssl/openssl/crypto/evp/c_allc.c index 3097c2112efcdf..280e58408f2bbc 100644 --- a/deps/openssl/openssl/crypto/evp/c_allc.c +++ b/deps/openssl/openssl/crypto/evp/c_allc.c @@ -93,6 +93,7 @@ void OpenSSL_add_all_ciphers(void) EVP_add_cipher(EVP_des_ecb()); EVP_add_cipher(EVP_des_ede()); EVP_add_cipher(EVP_des_ede3()); + EVP_add_cipher(EVP_des_ede3_wrap()); #endif #ifndef OPENSSL_NO_RC4 @@ -172,6 +173,8 @@ void OpenSSL_add_all_ciphers(void) EVP_add_cipher(EVP_aes_128_ctr()); EVP_add_cipher(EVP_aes_128_gcm()); EVP_add_cipher(EVP_aes_128_xts()); + EVP_add_cipher(EVP_aes_128_ccm()); + EVP_add_cipher(EVP_aes_128_wrap()); EVP_add_cipher_alias(SN_aes_128_cbc, "AES128"); EVP_add_cipher_alias(SN_aes_128_cbc, "aes128"); EVP_add_cipher(EVP_aes_192_ecb()); @@ -182,6 +185,8 @@ void OpenSSL_add_all_ciphers(void) EVP_add_cipher(EVP_aes_192_ofb()); EVP_add_cipher(EVP_aes_192_ctr()); EVP_add_cipher(EVP_aes_192_gcm()); + EVP_add_cipher(EVP_aes_192_ccm()); + EVP_add_cipher(EVP_aes_192_wrap()); EVP_add_cipher_alias(SN_aes_192_cbc, "AES192"); EVP_add_cipher_alias(SN_aes_192_cbc, "aes192"); EVP_add_cipher(EVP_aes_256_ecb()); @@ -193,12 +198,18 @@ void OpenSSL_add_all_ciphers(void) EVP_add_cipher(EVP_aes_256_ctr()); EVP_add_cipher(EVP_aes_256_gcm()); EVP_add_cipher(EVP_aes_256_xts()); + EVP_add_cipher(EVP_aes_256_ccm()); + EVP_add_cipher(EVP_aes_256_wrap()); EVP_add_cipher_alias(SN_aes_256_cbc, "AES256"); EVP_add_cipher_alias(SN_aes_256_cbc, "aes256"); # if !defined(OPENSSL_NO_SHA) && !defined(OPENSSL_NO_SHA1) EVP_add_cipher(EVP_aes_128_cbc_hmac_sha1()); EVP_add_cipher(EVP_aes_256_cbc_hmac_sha1()); # endif +# if !defined(OPENSSL_NO_SHA) && !defined(OPENSSL_NO_SHA256) + EVP_add_cipher(EVP_aes_128_cbc_hmac_sha256()); + EVP_add_cipher(EVP_aes_256_cbc_hmac_sha256()); +# endif #endif #ifndef OPENSSL_NO_CAMELLIA diff --git a/deps/openssl/openssl/crypto/evp/digest.c b/deps/openssl/openssl/crypto/evp/digest.c index 2e202c8fe031a6..f2643f32486ad2 100644 --- a/deps/openssl/openssl/crypto/evp/digest.c +++ b/deps/openssl/openssl/crypto/evp/digest.c @@ -119,6 +119,7 @@ #ifdef OPENSSL_FIPS # include +# include "evp_locl.h" #endif void EVP_MD_CTX_init(EVP_MD_CTX *ctx) @@ -145,6 +146,17 @@ int EVP_DigestInit(EVP_MD_CTX *ctx, const EVP_MD *type) int EVP_DigestInit_ex(EVP_MD_CTX *ctx, const EVP_MD *type, ENGINE *impl) { EVP_MD_CTX_clear_flags(ctx, EVP_MD_CTX_FLAG_CLEANED); +#ifdef OPENSSL_FIPS + /* If FIPS mode switch to approved implementation if possible */ + if (FIPS_mode()) { + const EVP_MD *fipsmd; + if (type) { + fipsmd = evp_get_fips_md(type); + if (fipsmd) + type = fipsmd; + } + } +#endif #ifndef OPENSSL_NO_ENGINE /* * Whether it's nice or not, "Inits" can be used on "Final"'d contexts so diff --git a/deps/openssl/openssl/crypto/evp/e_aes.c b/deps/openssl/openssl/crypto/evp/e_aes.c index 245c18a69392c1..8161b26325745e 100644 --- a/deps/openssl/openssl/crypto/evp/e_aes.c +++ b/deps/openssl/openssl/crypto/evp/e_aes.c @@ -56,12 +56,17 @@ # include # include # include "evp_locl.h" -# ifndef OPENSSL_FIPS -# include "modes_lcl.h" -# include +# include "modes_lcl.h" +# include + +# undef EVP_CIPH_FLAG_FIPS +# define EVP_CIPH_FLAG_FIPS 0 typedef struct { - AES_KEY ks; + union { + double align; + AES_KEY ks; + } ks; block128_f block; union { cbc128_f cbc; @@ -70,7 +75,10 @@ typedef struct { } EVP_AES_KEY; typedef struct { - AES_KEY ks; /* AES key schedule to use */ + union { + double align; + AES_KEY ks; + } ks; /* AES key schedule to use */ int key_set; /* Set if key initialised */ int iv_set; /* Set if an iv is set */ GCM128_CONTEXT gcm; @@ -83,7 +91,10 @@ typedef struct { } EVP_AES_GCM_CTX; typedef struct { - AES_KEY ks1, ks2; /* AES key schedules to use */ + union { + double align; + AES_KEY ks; + } ks1, ks2; /* AES key schedules to use */ XTS128_CONTEXT xts; void (*stream) (const unsigned char *in, unsigned char *out, size_t length, @@ -92,7 +103,10 @@ typedef struct { } EVP_AES_XTS_CTX; typedef struct { - AES_KEY ks; /* AES key schedule to use */ + union { + double align; + AES_KEY ks; + } ks; /* AES key schedule to use */ int key_set; /* Set if key initialised */ int iv_set; /* Set if an iv is set */ int tag_set; /* Set if tag is valid */ @@ -102,9 +116,9 @@ typedef struct { ccm128_f str; } EVP_AES_CCM_CTX; -# define MAXBITCHUNK ((size_t)1<<(sizeof(size_t)*8-4)) +# define MAXBITCHUNK ((size_t)1<<(sizeof(size_t)*8-4)) -# ifdef VPAES_ASM +# ifdef VPAES_ASM int vpaes_set_encrypt_key(const unsigned char *userKey, int bits, AES_KEY *key); int vpaes_set_decrypt_key(const unsigned char *userKey, int bits, @@ -119,8 +133,8 @@ void vpaes_cbc_encrypt(const unsigned char *in, unsigned char *out, size_t length, const AES_KEY *key, unsigned char *ivec, int enc); -# endif -# ifdef BSAES_ASM +# endif +# ifdef BSAES_ASM void bsaes_cbc_encrypt(const unsigned char *in, unsigned char *out, size_t length, const AES_KEY *key, unsigned char ivec[16], int enc); @@ -133,40 +147,54 @@ void bsaes_xts_encrypt(const unsigned char *inp, unsigned char *out, void bsaes_xts_decrypt(const unsigned char *inp, unsigned char *out, size_t len, const AES_KEY *key1, const AES_KEY *key2, const unsigned char iv[16]); -# endif -# ifdef AES_CTR_ASM +# endif +# ifdef AES_CTR_ASM void AES_ctr32_encrypt(const unsigned char *in, unsigned char *out, size_t blocks, const AES_KEY *key, const unsigned char ivec[AES_BLOCK_SIZE]); -# endif -# ifdef AES_XTS_ASM +# endif +# ifdef AES_XTS_ASM void AES_xts_encrypt(const char *inp, char *out, size_t len, const AES_KEY *key1, const AES_KEY *key2, const unsigned char iv[16]); void AES_xts_decrypt(const char *inp, char *out, size_t len, const AES_KEY *key1, const AES_KEY *key2, const unsigned char iv[16]); +# endif + +# if defined(OPENSSL_CPUID_OBJ) && (defined(__powerpc__) || defined(__ppc__) || defined(_ARCH_PPC)) +# include "ppc_arch.h" +# ifdef VPAES_ASM +# define VPAES_CAPABLE (OPENSSL_ppccap_P & PPC_ALTIVEC) # endif +# define HWAES_CAPABLE (OPENSSL_ppccap_P & PPC_CRYPTO207) +# define HWAES_set_encrypt_key aes_p8_set_encrypt_key +# define HWAES_set_decrypt_key aes_p8_set_decrypt_key +# define HWAES_encrypt aes_p8_encrypt +# define HWAES_decrypt aes_p8_decrypt +# define HWAES_cbc_encrypt aes_p8_cbc_encrypt +# define HWAES_ctr32_encrypt_blocks aes_p8_ctr32_encrypt_blocks +# endif -# if defined(AES_ASM) && !defined(I386_ONLY) && ( \ +# if defined(AES_ASM) && !defined(I386_ONLY) && ( \ ((defined(__i386) || defined(__i386__) || \ defined(_M_IX86)) && defined(OPENSSL_IA32_SSE2))|| \ defined(__x86_64) || defined(__x86_64__) || \ defined(_M_AMD64) || defined(_M_X64) || \ defined(__INTEL__) ) -extern unsigned int OPENSSL_ia32cap_P[2]; +extern unsigned int OPENSSL_ia32cap_P[]; -# ifdef VPAES_ASM -# define VPAES_CAPABLE (OPENSSL_ia32cap_P[1]&(1<<(41-32))) -# endif -# ifdef BSAES_ASM -# define BSAES_CAPABLE (OPENSSL_ia32cap_P[1]&(1<<(41-32))) -# endif +# ifdef VPAES_ASM +# define VPAES_CAPABLE (OPENSSL_ia32cap_P[1]&(1<<(41-32))) +# endif +# ifdef BSAES_ASM +# define BSAES_CAPABLE (OPENSSL_ia32cap_P[1]&(1<<(41-32))) +# endif /* * AES-NI section */ -# define AESNI_CAPABLE (OPENSSL_ia32cap_P[1]&(1<<(57-32))) +# define AESNI_CAPABLE (OPENSSL_ia32cap_P[1]&(1<<(57-32))) int aesni_set_encrypt_key(const unsigned char *userKey, int bits, AES_KEY *key); @@ -217,6 +245,26 @@ void aesni_ccm64_decrypt_blocks(const unsigned char *in, const unsigned char ivec[16], unsigned char cmac[16]); +# if defined(__x86_64) || defined(__x86_64__) || defined(_M_AMD64) || defined(_M_X64) +size_t aesni_gcm_encrypt(const unsigned char *in, + unsigned char *out, + size_t len, + const void *key, unsigned char ivec[16], u64 *Xi); +# define AES_gcm_encrypt aesni_gcm_encrypt +size_t aesni_gcm_decrypt(const unsigned char *in, + unsigned char *out, + size_t len, + const void *key, unsigned char ivec[16], u64 *Xi); +# define AES_gcm_decrypt aesni_gcm_decrypt +void gcm_ghash_avx(u64 Xi[2], const u128 Htable[16], const u8 *in, + size_t len); +# define AES_GCM_ASM(gctx) (gctx->ctr==aesni_ctr32_encrypt_blocks && \ + gctx->gcm.ghash==gcm_ghash_avx) +# define AES_GCM_ASM2(gctx) (gctx->gcm.block==(block128_f)aesni_encrypt && \ + gctx->gcm.ghash==gcm_ghash_avx) +# undef AES_GCM_ASM2 /* minor size optimization */ +# endif + static int aesni_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key, const unsigned char *iv, int enc) { @@ -270,23 +318,23 @@ static int aesni_ecb_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, return 1; } -# define aesni_ofb_cipher aes_ofb_cipher +# define aesni_ofb_cipher aes_ofb_cipher static int aesni_ofb_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, const unsigned char *in, size_t len); -# define aesni_cfb_cipher aes_cfb_cipher +# define aesni_cfb_cipher aes_cfb_cipher static int aesni_cfb_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, const unsigned char *in, size_t len); -# define aesni_cfb8_cipher aes_cfb8_cipher +# define aesni_cfb8_cipher aes_cfb8_cipher static int aesni_cfb8_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, const unsigned char *in, size_t len); -# define aesni_cfb1_cipher aes_cfb1_cipher +# define aesni_cfb1_cipher aes_cfb1_cipher static int aesni_cfb1_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, const unsigned char *in, size_t len); -# define aesni_ctr_cipher aes_ctr_cipher +# define aesni_ctr_cipher aes_ctr_cipher static int aesni_ctr_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, const unsigned char *in, size_t len); @@ -297,7 +345,7 @@ static int aesni_gcm_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key, if (!iv && !key) return 1; if (key) { - aesni_set_encrypt_key(key, ctx->key_len * 8, &gctx->ks); + aesni_set_encrypt_key(key, ctx->key_len * 8, &gctx->ks.ks); CRYPTO_gcm128_init(&gctx->gcm, &gctx->ks, (block128_f) aesni_encrypt); gctx->ctr = (ctr128_f) aesni_ctr32_encrypt_blocks; /* @@ -322,7 +370,7 @@ static int aesni_gcm_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key, return 1; } -# define aesni_gcm_cipher aes_gcm_cipher +# define aesni_gcm_cipher aes_gcm_cipher static int aesni_gcm_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, const unsigned char *in, size_t len); @@ -336,17 +384,17 @@ static int aesni_xts_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key, if (key) { /* key_len is two AES keys */ if (enc) { - aesni_set_encrypt_key(key, ctx->key_len * 4, &xctx->ks1); + aesni_set_encrypt_key(key, ctx->key_len * 4, &xctx->ks1.ks); xctx->xts.block1 = (block128_f) aesni_encrypt; xctx->stream = aesni_xts_encrypt; } else { - aesni_set_decrypt_key(key, ctx->key_len * 4, &xctx->ks1); + aesni_set_decrypt_key(key, ctx->key_len * 4, &xctx->ks1.ks); xctx->xts.block1 = (block128_f) aesni_decrypt; xctx->stream = aesni_xts_decrypt; } aesni_set_encrypt_key(key + ctx->key_len / 2, - ctx->key_len * 4, &xctx->ks2); + ctx->key_len * 4, &xctx->ks2.ks); xctx->xts.block2 = (block128_f) aesni_encrypt; xctx->xts.key1 = &xctx->ks1; @@ -360,7 +408,7 @@ static int aesni_xts_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key, return 1; } -# define aesni_xts_cipher aes_xts_cipher +# define aesni_xts_cipher aes_xts_cipher static int aesni_xts_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, const unsigned char *in, size_t len); @@ -371,7 +419,7 @@ static int aesni_ccm_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key, if (!iv && !key) return 1; if (key) { - aesni_set_encrypt_key(key, ctx->key_len * 8, &cctx->ks); + aesni_set_encrypt_key(key, ctx->key_len * 8, &cctx->ks.ks); CRYPTO_ccm128_init(&cctx->ccm, cctx->M, cctx->L, &cctx->ks, (block128_f) aesni_encrypt); cctx->str = enc ? (ccm128_f) aesni_ccm64_encrypt_blocks : @@ -385,11 +433,11 @@ static int aesni_ccm_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key, return 1; } -# define aesni_ccm_cipher aes_ccm_cipher +# define aesni_ccm_cipher aes_ccm_cipher static int aesni_ccm_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, const unsigned char *in, size_t len); -# define BLOCK_CIPHER_generic(nid,keylen,blocksize,ivlen,nmode,mode,MODE,flags) \ +# define BLOCK_CIPHER_generic(nid,keylen,blocksize,ivlen,nmode,mode,MODE,flags) \ static const EVP_CIPHER aesni_##keylen##_##mode = { \ nid##_##keylen##_##nmode,blocksize,keylen/8,ivlen, \ flags|EVP_CIPH_##MODE##_MODE, \ @@ -410,7 +458,7 @@ static const EVP_CIPHER aes_##keylen##_##mode = { \ const EVP_CIPHER *EVP_aes_##keylen##_##mode(void) \ { return AESNI_CAPABLE?&aesni_##keylen##_##mode:&aes_##keylen##_##mode; } -# define BLOCK_CIPHER_custom(nid,keylen,blocksize,ivlen,mode,MODE,flags) \ +# define BLOCK_CIPHER_custom(nid,keylen,blocksize,ivlen,mode,MODE,flags) \ static const EVP_CIPHER aesni_##keylen##_##mode = { \ nid##_##keylen##_##mode,blocksize, \ (EVP_CIPH_##MODE##_MODE==EVP_CIPH_XTS_MODE?2:1)*keylen/8, ivlen, \ @@ -432,9 +480,378 @@ static const EVP_CIPHER aes_##keylen##_##mode = { \ const EVP_CIPHER *EVP_aes_##keylen##_##mode(void) \ { return AESNI_CAPABLE?&aesni_##keylen##_##mode:&aes_##keylen##_##mode; } +# elif defined(AES_ASM) && (defined(__sparc) || defined(__sparc__)) + +# include "sparc_arch.h" + +extern unsigned int OPENSSL_sparcv9cap_P[]; + +# define SPARC_AES_CAPABLE (OPENSSL_sparcv9cap_P[1] & CFR_AES) + +void aes_t4_set_encrypt_key(const unsigned char *key, int bits, AES_KEY *ks); +void aes_t4_set_decrypt_key(const unsigned char *key, int bits, AES_KEY *ks); +void aes_t4_encrypt(const unsigned char *in, unsigned char *out, + const AES_KEY *key); +void aes_t4_decrypt(const unsigned char *in, unsigned char *out, + const AES_KEY *key); +/* + * Key-length specific subroutines were chosen for following reason. + * Each SPARC T4 core can execute up to 8 threads which share core's + * resources. Loading as much key material to registers allows to + * minimize references to shared memory interface, as well as amount + * of instructions in inner loops [much needed on T4]. But then having + * non-key-length specific routines would require conditional branches + * either in inner loops or on subroutines' entries. Former is hardly + * acceptable, while latter means code size increase to size occupied + * by multiple key-length specfic subroutines, so why fight? + */ +void aes128_t4_cbc_encrypt(const unsigned char *in, unsigned char *out, + size_t len, const AES_KEY *key, + unsigned char *ivec); +void aes128_t4_cbc_decrypt(const unsigned char *in, unsigned char *out, + size_t len, const AES_KEY *key, + unsigned char *ivec); +void aes192_t4_cbc_encrypt(const unsigned char *in, unsigned char *out, + size_t len, const AES_KEY *key, + unsigned char *ivec); +void aes192_t4_cbc_decrypt(const unsigned char *in, unsigned char *out, + size_t len, const AES_KEY *key, + unsigned char *ivec); +void aes256_t4_cbc_encrypt(const unsigned char *in, unsigned char *out, + size_t len, const AES_KEY *key, + unsigned char *ivec); +void aes256_t4_cbc_decrypt(const unsigned char *in, unsigned char *out, + size_t len, const AES_KEY *key, + unsigned char *ivec); +void aes128_t4_ctr32_encrypt(const unsigned char *in, unsigned char *out, + size_t blocks, const AES_KEY *key, + unsigned char *ivec); +void aes192_t4_ctr32_encrypt(const unsigned char *in, unsigned char *out, + size_t blocks, const AES_KEY *key, + unsigned char *ivec); +void aes256_t4_ctr32_encrypt(const unsigned char *in, unsigned char *out, + size_t blocks, const AES_KEY *key, + unsigned char *ivec); +void aes128_t4_xts_encrypt(const unsigned char *in, unsigned char *out, + size_t blocks, const AES_KEY *key1, + const AES_KEY *key2, const unsigned char *ivec); +void aes128_t4_xts_decrypt(const unsigned char *in, unsigned char *out, + size_t blocks, const AES_KEY *key1, + const AES_KEY *key2, const unsigned char *ivec); +void aes256_t4_xts_encrypt(const unsigned char *in, unsigned char *out, + size_t blocks, const AES_KEY *key1, + const AES_KEY *key2, const unsigned char *ivec); +void aes256_t4_xts_decrypt(const unsigned char *in, unsigned char *out, + size_t blocks, const AES_KEY *key1, + const AES_KEY *key2, const unsigned char *ivec); + +static int aes_t4_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key, + const unsigned char *iv, int enc) +{ + int ret, mode, bits; + EVP_AES_KEY *dat = (EVP_AES_KEY *) ctx->cipher_data; + + mode = ctx->cipher->flags & EVP_CIPH_MODE; + bits = ctx->key_len * 8; + if ((mode == EVP_CIPH_ECB_MODE || mode == EVP_CIPH_CBC_MODE) + && !enc) { + ret = 0; + aes_t4_set_decrypt_key(key, bits, ctx->cipher_data); + dat->block = (block128_f) aes_t4_decrypt; + switch (bits) { + case 128: + dat->stream.cbc = mode == EVP_CIPH_CBC_MODE ? + (cbc128_f) aes128_t4_cbc_decrypt : NULL; + break; + case 192: + dat->stream.cbc = mode == EVP_CIPH_CBC_MODE ? + (cbc128_f) aes192_t4_cbc_decrypt : NULL; + break; + case 256: + dat->stream.cbc = mode == EVP_CIPH_CBC_MODE ? + (cbc128_f) aes256_t4_cbc_decrypt : NULL; + break; + default: + ret = -1; + } + } else { + ret = 0; + aes_t4_set_encrypt_key(key, bits, ctx->cipher_data); + dat->block = (block128_f) aes_t4_encrypt; + switch (bits) { + case 128: + if (mode == EVP_CIPH_CBC_MODE) + dat->stream.cbc = (cbc128_f) aes128_t4_cbc_encrypt; + else if (mode == EVP_CIPH_CTR_MODE) + dat->stream.ctr = (ctr128_f) aes128_t4_ctr32_encrypt; + else + dat->stream.cbc = NULL; + break; + case 192: + if (mode == EVP_CIPH_CBC_MODE) + dat->stream.cbc = (cbc128_f) aes192_t4_cbc_encrypt; + else if (mode == EVP_CIPH_CTR_MODE) + dat->stream.ctr = (ctr128_f) aes192_t4_ctr32_encrypt; + else + dat->stream.cbc = NULL; + break; + case 256: + if (mode == EVP_CIPH_CBC_MODE) + dat->stream.cbc = (cbc128_f) aes256_t4_cbc_encrypt; + else if (mode == EVP_CIPH_CTR_MODE) + dat->stream.ctr = (ctr128_f) aes256_t4_ctr32_encrypt; + else + dat->stream.cbc = NULL; + break; + default: + ret = -1; + } + } + + if (ret < 0) { + EVPerr(EVP_F_AES_T4_INIT_KEY, EVP_R_AES_KEY_SETUP_FAILED); + return 0; + } + + return 1; +} + +# define aes_t4_cbc_cipher aes_cbc_cipher +static int aes_t4_cbc_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, + const unsigned char *in, size_t len); + +# define aes_t4_ecb_cipher aes_ecb_cipher +static int aes_t4_ecb_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, + const unsigned char *in, size_t len); + +# define aes_t4_ofb_cipher aes_ofb_cipher +static int aes_t4_ofb_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, + const unsigned char *in, size_t len); + +# define aes_t4_cfb_cipher aes_cfb_cipher +static int aes_t4_cfb_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, + const unsigned char *in, size_t len); + +# define aes_t4_cfb8_cipher aes_cfb8_cipher +static int aes_t4_cfb8_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, + const unsigned char *in, size_t len); + +# define aes_t4_cfb1_cipher aes_cfb1_cipher +static int aes_t4_cfb1_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, + const unsigned char *in, size_t len); + +# define aes_t4_ctr_cipher aes_ctr_cipher +static int aes_t4_ctr_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, + const unsigned char *in, size_t len); + +static int aes_t4_gcm_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key, + const unsigned char *iv, int enc) +{ + EVP_AES_GCM_CTX *gctx = ctx->cipher_data; + if (!iv && !key) + return 1; + if (key) { + int bits = ctx->key_len * 8; + aes_t4_set_encrypt_key(key, bits, &gctx->ks.ks); + CRYPTO_gcm128_init(&gctx->gcm, &gctx->ks, + (block128_f) aes_t4_encrypt); + switch (bits) { + case 128: + gctx->ctr = (ctr128_f) aes128_t4_ctr32_encrypt; + break; + case 192: + gctx->ctr = (ctr128_f) aes192_t4_ctr32_encrypt; + break; + case 256: + gctx->ctr = (ctr128_f) aes256_t4_ctr32_encrypt; + break; + default: + return 0; + } + /* + * If we have an iv can set it directly, otherwise use saved IV. + */ + if (iv == NULL && gctx->iv_set) + iv = gctx->iv; + if (iv) { + CRYPTO_gcm128_setiv(&gctx->gcm, iv, gctx->ivlen); + gctx->iv_set = 1; + } + gctx->key_set = 1; + } else { + /* If key set use IV, otherwise copy */ + if (gctx->key_set) + CRYPTO_gcm128_setiv(&gctx->gcm, iv, gctx->ivlen); + else + memcpy(gctx->iv, iv, gctx->ivlen); + gctx->iv_set = 1; + gctx->iv_gen = 0; + } + return 1; +} + +# define aes_t4_gcm_cipher aes_gcm_cipher +static int aes_t4_gcm_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, + const unsigned char *in, size_t len); + +static int aes_t4_xts_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key, + const unsigned char *iv, int enc) +{ + EVP_AES_XTS_CTX *xctx = ctx->cipher_data; + if (!iv && !key) + return 1; + + if (key) { + int bits = ctx->key_len * 4; + xctx->stream = NULL; + /* key_len is two AES keys */ + if (enc) { + aes_t4_set_encrypt_key(key, bits, &xctx->ks1.ks); + xctx->xts.block1 = (block128_f) aes_t4_encrypt; + switch (bits) { + case 128: + xctx->stream = aes128_t4_xts_encrypt; + break; +# if 0 /* not yet */ + case 192: + xctx->stream = aes192_t4_xts_encrypt; + break; +# endif + case 256: + xctx->stream = aes256_t4_xts_encrypt; + break; + default: + return 0; + } + } else { + aes_t4_set_decrypt_key(key, ctx->key_len * 4, &xctx->ks1.ks); + xctx->xts.block1 = (block128_f) aes_t4_decrypt; + switch (bits) { + case 128: + xctx->stream = aes128_t4_xts_decrypt; + break; +# if 0 /* not yet */ + case 192: + xctx->stream = aes192_t4_xts_decrypt; + break; +# endif + case 256: + xctx->stream = aes256_t4_xts_decrypt; + break; + default: + return 0; + } + } + + aes_t4_set_encrypt_key(key + ctx->key_len / 2, + ctx->key_len * 4, &xctx->ks2.ks); + xctx->xts.block2 = (block128_f) aes_t4_encrypt; + + xctx->xts.key1 = &xctx->ks1; + } + + if (iv) { + xctx->xts.key2 = &xctx->ks2; + memcpy(ctx->iv, iv, 16); + } + + return 1; +} + +# define aes_t4_xts_cipher aes_xts_cipher +static int aes_t4_xts_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, + const unsigned char *in, size_t len); + +static int aes_t4_ccm_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key, + const unsigned char *iv, int enc) +{ + EVP_AES_CCM_CTX *cctx = ctx->cipher_data; + if (!iv && !key) + return 1; + if (key) { + int bits = ctx->key_len * 8; + aes_t4_set_encrypt_key(key, bits, &cctx->ks.ks); + CRYPTO_ccm128_init(&cctx->ccm, cctx->M, cctx->L, + &cctx->ks, (block128_f) aes_t4_encrypt); +# if 0 /* not yet */ + switch (bits) { + case 128: + cctx->str = enc ? (ccm128_f) aes128_t4_ccm64_encrypt : + (ccm128_f) ae128_t4_ccm64_decrypt; + break; + case 192: + cctx->str = enc ? (ccm128_f) aes192_t4_ccm64_encrypt : + (ccm128_f) ae192_t4_ccm64_decrypt; + break; + case 256: + cctx->str = enc ? (ccm128_f) aes256_t4_ccm64_encrypt : + (ccm128_f) ae256_t4_ccm64_decrypt; + break; + default: + return 0; + } # else + cctx->str = NULL; +# endif + cctx->key_set = 1; + } + if (iv) { + memcpy(ctx->iv, iv, 15 - cctx->L); + cctx->iv_set = 1; + } + return 1; +} -# define BLOCK_CIPHER_generic(nid,keylen,blocksize,ivlen,nmode,mode,MODE,flags) \ +# define aes_t4_ccm_cipher aes_ccm_cipher +static int aes_t4_ccm_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, + const unsigned char *in, size_t len); + +# define BLOCK_CIPHER_generic(nid,keylen,blocksize,ivlen,nmode,mode,MODE,flags) \ +static const EVP_CIPHER aes_t4_##keylen##_##mode = { \ + nid##_##keylen##_##nmode,blocksize,keylen/8,ivlen, \ + flags|EVP_CIPH_##MODE##_MODE, \ + aes_t4_init_key, \ + aes_t4_##mode##_cipher, \ + NULL, \ + sizeof(EVP_AES_KEY), \ + NULL,NULL,NULL,NULL }; \ +static const EVP_CIPHER aes_##keylen##_##mode = { \ + nid##_##keylen##_##nmode,blocksize, \ + keylen/8,ivlen, \ + flags|EVP_CIPH_##MODE##_MODE, \ + aes_init_key, \ + aes_##mode##_cipher, \ + NULL, \ + sizeof(EVP_AES_KEY), \ + NULL,NULL,NULL,NULL }; \ +const EVP_CIPHER *EVP_aes_##keylen##_##mode(void) \ +{ return SPARC_AES_CAPABLE?&aes_t4_##keylen##_##mode:&aes_##keylen##_##mode; } + +# define BLOCK_CIPHER_custom(nid,keylen,blocksize,ivlen,mode,MODE,flags) \ +static const EVP_CIPHER aes_t4_##keylen##_##mode = { \ + nid##_##keylen##_##mode,blocksize, \ + (EVP_CIPH_##MODE##_MODE==EVP_CIPH_XTS_MODE?2:1)*keylen/8, ivlen, \ + flags|EVP_CIPH_##MODE##_MODE, \ + aes_t4_##mode##_init_key, \ + aes_t4_##mode##_cipher, \ + aes_##mode##_cleanup, \ + sizeof(EVP_AES_##MODE##_CTX), \ + NULL,NULL,aes_##mode##_ctrl,NULL }; \ +static const EVP_CIPHER aes_##keylen##_##mode = { \ + nid##_##keylen##_##mode,blocksize, \ + (EVP_CIPH_##MODE##_MODE==EVP_CIPH_XTS_MODE?2:1)*keylen/8, ivlen, \ + flags|EVP_CIPH_##MODE##_MODE, \ + aes_##mode##_init_key, \ + aes_##mode##_cipher, \ + aes_##mode##_cleanup, \ + sizeof(EVP_AES_##MODE##_CTX), \ + NULL,NULL,aes_##mode##_ctrl,NULL }; \ +const EVP_CIPHER *EVP_aes_##keylen##_##mode(void) \ +{ return SPARC_AES_CAPABLE?&aes_t4_##keylen##_##mode:&aes_##keylen##_##mode; } + +# else + +# define BLOCK_CIPHER_generic(nid,keylen,blocksize,ivlen,nmode,mode,MODE,flags) \ static const EVP_CIPHER aes_##keylen##_##mode = { \ nid##_##keylen##_##nmode,blocksize,keylen/8,ivlen, \ flags|EVP_CIPH_##MODE##_MODE, \ @@ -446,7 +863,7 @@ static const EVP_CIPHER aes_##keylen##_##mode = { \ const EVP_CIPHER *EVP_aes_##keylen##_##mode(void) \ { return &aes_##keylen##_##mode; } -# define BLOCK_CIPHER_custom(nid,keylen,blocksize,ivlen,mode,MODE,flags) \ +# define BLOCK_CIPHER_custom(nid,keylen,blocksize,ivlen,mode,MODE,flags) \ static const EVP_CIPHER aes_##keylen##_##mode = { \ nid##_##keylen##_##mode,blocksize, \ (EVP_CIPH_##MODE##_MODE==EVP_CIPH_XTS_MODE?2:1)*keylen/8, ivlen, \ @@ -458,9 +875,42 @@ static const EVP_CIPHER aes_##keylen##_##mode = { \ NULL,NULL,aes_##mode##_ctrl,NULL }; \ const EVP_CIPHER *EVP_aes_##keylen##_##mode(void) \ { return &aes_##keylen##_##mode; } +# endif + +# if defined(OPENSSL_CPUID_OBJ) && (defined(__arm__) || defined(__arm) || defined(__aarch64__)) +# include "arm_arch.h" +# if __ARM_MAX_ARCH__>=7 +# if defined(BSAES_ASM) +# define BSAES_CAPABLE (OPENSSL_armcap_P & ARMV7_NEON) +# endif +# define HWAES_CAPABLE (OPENSSL_armcap_P & ARMV8_AES) +# define HWAES_set_encrypt_key aes_v8_set_encrypt_key +# define HWAES_set_decrypt_key aes_v8_set_decrypt_key +# define HWAES_encrypt aes_v8_encrypt +# define HWAES_decrypt aes_v8_decrypt +# define HWAES_cbc_encrypt aes_v8_cbc_encrypt +# define HWAES_ctr32_encrypt_blocks aes_v8_ctr32_encrypt_blocks # endif +# endif + +# if defined(HWAES_CAPABLE) +int HWAES_set_encrypt_key(const unsigned char *userKey, const int bits, + AES_KEY *key); +int HWAES_set_decrypt_key(const unsigned char *userKey, const int bits, + AES_KEY *key); +void HWAES_encrypt(const unsigned char *in, unsigned char *out, + const AES_KEY *key); +void HWAES_decrypt(const unsigned char *in, unsigned char *out, + const AES_KEY *key); +void HWAES_cbc_encrypt(const unsigned char *in, unsigned char *out, + size_t length, const AES_KEY *key, + unsigned char *ivec, const int enc); +void HWAES_ctr32_encrypt_blocks(const unsigned char *in, unsigned char *out, + size_t len, const AES_KEY *key, + const unsigned char ivec[16]); +# endif -# define BLOCK_CIPHER_generic_pack(nid,keylen,flags) \ +# define BLOCK_CIPHER_generic_pack(nid,keylen,flags) \ BLOCK_CIPHER_generic(nid,keylen,16,16,cbc,cbc,CBC,flags|EVP_CIPH_FLAG_DEFAULT_ASN1) \ BLOCK_CIPHER_generic(nid,keylen,16,0,ecb,ecb,ECB,flags|EVP_CIPH_FLAG_DEFAULT_ASN1) \ BLOCK_CIPHER_generic(nid,keylen,1,16,ofb128,ofb,OFB,flags|EVP_CIPH_FLAG_DEFAULT_ASN1) \ @@ -478,51 +928,80 @@ static int aes_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key, mode = ctx->cipher->flags & EVP_CIPH_MODE; if ((mode == EVP_CIPH_ECB_MODE || mode == EVP_CIPH_CBC_MODE) && !enc) -# ifdef BSAES_CAPABLE +# ifdef HWAES_CAPABLE + if (HWAES_CAPABLE) { + ret = HWAES_set_decrypt_key(key, ctx->key_len * 8, &dat->ks.ks); + dat->block = (block128_f) HWAES_decrypt; + dat->stream.cbc = NULL; +# ifdef HWAES_cbc_encrypt + if (mode == EVP_CIPH_CBC_MODE) + dat->stream.cbc = (cbc128_f) HWAES_cbc_encrypt; +# endif + } else +# endif +# ifdef BSAES_CAPABLE if (BSAES_CAPABLE && mode == EVP_CIPH_CBC_MODE) { - ret = AES_set_decrypt_key(key, ctx->key_len * 8, &dat->ks); + ret = AES_set_decrypt_key(key, ctx->key_len * 8, &dat->ks.ks); dat->block = (block128_f) AES_decrypt; dat->stream.cbc = (cbc128_f) bsaes_cbc_encrypt; } else -# endif -# ifdef VPAES_CAPABLE +# endif +# ifdef VPAES_CAPABLE if (VPAES_CAPABLE) { - ret = vpaes_set_decrypt_key(key, ctx->key_len * 8, &dat->ks); + ret = vpaes_set_decrypt_key(key, ctx->key_len * 8, &dat->ks.ks); dat->block = (block128_f) vpaes_decrypt; dat->stream.cbc = mode == EVP_CIPH_CBC_MODE ? (cbc128_f) vpaes_cbc_encrypt : NULL; } else -# endif +# endif { - ret = AES_set_decrypt_key(key, ctx->key_len * 8, &dat->ks); + ret = AES_set_decrypt_key(key, ctx->key_len * 8, &dat->ks.ks); dat->block = (block128_f) AES_decrypt; dat->stream.cbc = mode == EVP_CIPH_CBC_MODE ? (cbc128_f) AES_cbc_encrypt : NULL; } else -# ifdef BSAES_CAPABLE +# ifdef HWAES_CAPABLE + if (HWAES_CAPABLE) { + ret = HWAES_set_encrypt_key(key, ctx->key_len * 8, &dat->ks.ks); + dat->block = (block128_f) HWAES_encrypt; + dat->stream.cbc = NULL; +# ifdef HWAES_cbc_encrypt + if (mode == EVP_CIPH_CBC_MODE) + dat->stream.cbc = (cbc128_f) HWAES_cbc_encrypt; + else +# endif +# ifdef HWAES_ctr32_encrypt_blocks + if (mode == EVP_CIPH_CTR_MODE) + dat->stream.ctr = (ctr128_f) HWAES_ctr32_encrypt_blocks; + else +# endif + (void)0; /* terminate potentially open 'else' */ + } else +# endif +# ifdef BSAES_CAPABLE if (BSAES_CAPABLE && mode == EVP_CIPH_CTR_MODE) { - ret = AES_set_encrypt_key(key, ctx->key_len * 8, &dat->ks); + ret = AES_set_encrypt_key(key, ctx->key_len * 8, &dat->ks.ks); dat->block = (block128_f) AES_encrypt; dat->stream.ctr = (ctr128_f) bsaes_ctr32_encrypt_blocks; } else -# endif -# ifdef VPAES_CAPABLE +# endif +# ifdef VPAES_CAPABLE if (VPAES_CAPABLE) { - ret = vpaes_set_encrypt_key(key, ctx->key_len * 8, &dat->ks); + ret = vpaes_set_encrypt_key(key, ctx->key_len * 8, &dat->ks.ks); dat->block = (block128_f) vpaes_encrypt; dat->stream.cbc = mode == EVP_CIPH_CBC_MODE ? (cbc128_f) vpaes_cbc_encrypt : NULL; } else -# endif +# endif { - ret = AES_set_encrypt_key(key, ctx->key_len * 8, &dat->ks); + ret = AES_set_encrypt_key(key, ctx->key_len * 8, &dat->ks.ks); dat->block = (block128_f) AES_encrypt; dat->stream.cbc = mode == EVP_CIPH_CBC_MODE ? (cbc128_f) AES_cbc_encrypt : NULL; -# ifdef AES_CTR_ASM +# ifdef AES_CTR_ASM if (mode == EVP_CIPH_CTR_MODE) dat->stream.ctr = (ctr128_f) AES_ctr32_encrypt; -# endif +# endif } if (ret < 0) { @@ -543,7 +1022,7 @@ static int aes_cbc_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, else if (ctx->encrypt) CRYPTO_cbc128_encrypt(in, out, len, &dat->ks, ctx->iv, dat->block); else - CRYPTO_cbc128_encrypt(in, out, len, &dat->ks, ctx->iv, dat->block); + CRYPTO_cbc128_decrypt(in, out, len, &dat->ks, ctx->iv, dat->block); return 1; } @@ -679,11 +1158,6 @@ static int aes_gcm_ctrl(EVP_CIPHER_CTX *c, int type, int arg, void *ptr) case EVP_CTRL_GCM_SET_IVLEN: if (arg <= 0) return 0; -# ifdef OPENSSL_FIPS - if (FIPS_module_mode() && !(c->flags & EVP_CIPH_FLAG_NON_FIPS_ALLOW) - && arg < 12) - return 0; -# endif /* Allocate memory for IV if needed */ if ((arg > EVP_MAX_IV_LENGTH) && (arg > gctx->ivlen)) { if (gctx->iv != c->iv) @@ -804,34 +1278,47 @@ static int aes_gcm_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key, return 1; if (key) { do { -# ifdef BSAES_CAPABLE +# ifdef HWAES_CAPABLE + if (HWAES_CAPABLE) { + HWAES_set_encrypt_key(key, ctx->key_len * 8, &gctx->ks.ks); + CRYPTO_gcm128_init(&gctx->gcm, &gctx->ks, + (block128_f) HWAES_encrypt); +# ifdef HWAES_ctr32_encrypt_blocks + gctx->ctr = (ctr128_f) HWAES_ctr32_encrypt_blocks; +# else + gctx->ctr = NULL; +# endif + break; + } else +# endif +# ifdef BSAES_CAPABLE if (BSAES_CAPABLE) { - AES_set_encrypt_key(key, ctx->key_len * 8, &gctx->ks); + AES_set_encrypt_key(key, ctx->key_len * 8, &gctx->ks.ks); CRYPTO_gcm128_init(&gctx->gcm, &gctx->ks, (block128_f) AES_encrypt); gctx->ctr = (ctr128_f) bsaes_ctr32_encrypt_blocks; break; } else -# endif -# ifdef VPAES_CAPABLE +# endif +# ifdef VPAES_CAPABLE if (VPAES_CAPABLE) { - vpaes_set_encrypt_key(key, ctx->key_len * 8, &gctx->ks); + vpaes_set_encrypt_key(key, ctx->key_len * 8, &gctx->ks.ks); CRYPTO_gcm128_init(&gctx->gcm, &gctx->ks, (block128_f) vpaes_encrypt); gctx->ctr = NULL; break; } else -# endif +# endif (void)0; /* terminate potentially open 'else' */ - AES_set_encrypt_key(key, ctx->key_len * 8, &gctx->ks); + AES_set_encrypt_key(key, ctx->key_len * 8, &gctx->ks.ks); CRYPTO_gcm128_init(&gctx->gcm, &gctx->ks, (block128_f) AES_encrypt); -# ifdef AES_CTR_ASM +# ifdef AES_CTR_ASM gctx->ctr = (ctr128_f) AES_ctr32_encrypt; -# else +# else gctx->ctr = NULL; -# endif +# endif } while (0); /* @@ -890,11 +1377,38 @@ static int aes_gcm_tls_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, if (ctx->encrypt) { /* Encrypt payload */ if (gctx->ctr) { + size_t bulk = 0; +# if defined(AES_GCM_ASM) + if (len >= 32 && AES_GCM_ASM(gctx)) { + if (CRYPTO_gcm128_encrypt(&gctx->gcm, NULL, NULL, 0)) + return -1; + + bulk = AES_gcm_encrypt(in, out, len, + gctx->gcm.key, + gctx->gcm.Yi.c, gctx->gcm.Xi.u); + gctx->gcm.len.u[1] += bulk; + } +# endif if (CRYPTO_gcm128_encrypt_ctr32(&gctx->gcm, - in, out, len, gctx->ctr)) + in + bulk, + out + bulk, + len - bulk, gctx->ctr)) goto err; } else { - if (CRYPTO_gcm128_encrypt(&gctx->gcm, in, out, len)) + size_t bulk = 0; +# if defined(AES_GCM_ASM2) + if (len >= 32 && AES_GCM_ASM2(gctx)) { + if (CRYPTO_gcm128_encrypt(&gctx->gcm, NULL, NULL, 0)) + return -1; + + bulk = AES_gcm_encrypt(in, out, len, + gctx->gcm.key, + gctx->gcm.Yi.c, gctx->gcm.Xi.u); + gctx->gcm.len.u[1] += bulk; + } +# endif + if (CRYPTO_gcm128_encrypt(&gctx->gcm, + in + bulk, out + bulk, len - bulk)) goto err; } out += len; @@ -904,11 +1418,38 @@ static int aes_gcm_tls_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, } else { /* Decrypt */ if (gctx->ctr) { + size_t bulk = 0; +# if defined(AES_GCM_ASM) + if (len >= 16 && AES_GCM_ASM(gctx)) { + if (CRYPTO_gcm128_decrypt(&gctx->gcm, NULL, NULL, 0)) + return -1; + + bulk = AES_gcm_decrypt(in, out, len, + gctx->gcm.key, + gctx->gcm.Yi.c, gctx->gcm.Xi.u); + gctx->gcm.len.u[1] += bulk; + } +# endif if (CRYPTO_gcm128_decrypt_ctr32(&gctx->gcm, - in, out, len, gctx->ctr)) + in + bulk, + out + bulk, + len - bulk, gctx->ctr)) goto err; } else { - if (CRYPTO_gcm128_decrypt(&gctx->gcm, in, out, len)) + size_t bulk = 0; +# if defined(AES_GCM_ASM2) + if (len >= 16 && AES_GCM_ASM2(gctx)) { + if (CRYPTO_gcm128_decrypt(&gctx->gcm, NULL, NULL, 0)) + return -1; + + bulk = AES_gcm_decrypt(in, out, len, + gctx->gcm.key, + gctx->gcm.Yi.c, gctx->gcm.Xi.u); + gctx->gcm.len.u[1] += bulk; + } +# endif + if (CRYPTO_gcm128_decrypt(&gctx->gcm, + in + bulk, out + bulk, len - bulk)) goto err; } /* Retrieve tag */ @@ -946,20 +1487,90 @@ static int aes_gcm_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, return -1; } else if (ctx->encrypt) { if (gctx->ctr) { + size_t bulk = 0; +# if defined(AES_GCM_ASM) + if (len >= 32 && AES_GCM_ASM(gctx)) { + size_t res = (16 - gctx->gcm.mres) % 16; + + if (CRYPTO_gcm128_encrypt(&gctx->gcm, in, out, res)) + return -1; + + bulk = AES_gcm_encrypt(in + res, + out + res, len - res, + gctx->gcm.key, gctx->gcm.Yi.c, + gctx->gcm.Xi.u); + gctx->gcm.len.u[1] += bulk; + bulk += res; + } +# endif if (CRYPTO_gcm128_encrypt_ctr32(&gctx->gcm, - in, out, len, gctx->ctr)) + in + bulk, + out + bulk, + len - bulk, gctx->ctr)) return -1; } else { - if (CRYPTO_gcm128_encrypt(&gctx->gcm, in, out, len)) + size_t bulk = 0; +# if defined(AES_GCM_ASM2) + if (len >= 32 && AES_GCM_ASM2(gctx)) { + size_t res = (16 - gctx->gcm.mres) % 16; + + if (CRYPTO_gcm128_encrypt(&gctx->gcm, in, out, res)) + return -1; + + bulk = AES_gcm_encrypt(in + res, + out + res, len - res, + gctx->gcm.key, gctx->gcm.Yi.c, + gctx->gcm.Xi.u); + gctx->gcm.len.u[1] += bulk; + bulk += res; + } +# endif + if (CRYPTO_gcm128_encrypt(&gctx->gcm, + in + bulk, out + bulk, len - bulk)) return -1; } } else { if (gctx->ctr) { + size_t bulk = 0; +# if defined(AES_GCM_ASM) + if (len >= 16 && AES_GCM_ASM(gctx)) { + size_t res = (16 - gctx->gcm.mres) % 16; + + if (CRYPTO_gcm128_decrypt(&gctx->gcm, in, out, res)) + return -1; + + bulk = AES_gcm_decrypt(in + res, + out + res, len - res, + gctx->gcm.key, + gctx->gcm.Yi.c, gctx->gcm.Xi.u); + gctx->gcm.len.u[1] += bulk; + bulk += res; + } +# endif if (CRYPTO_gcm128_decrypt_ctr32(&gctx->gcm, - in, out, len, gctx->ctr)) + in + bulk, + out + bulk, + len - bulk, gctx->ctr)) return -1; } else { - if (CRYPTO_gcm128_decrypt(&gctx->gcm, in, out, len)) + size_t bulk = 0; +# if defined(AES_GCM_ASM2) + if (len >= 16 && AES_GCM_ASM2(gctx)) { + size_t res = (16 - gctx->gcm.mres) % 16; + + if (CRYPTO_gcm128_decrypt(&gctx->gcm, in, out, res)) + return -1; + + bulk = AES_gcm_decrypt(in + res, + out + res, len - res, + gctx->gcm.key, + gctx->gcm.Yi.c, gctx->gcm.Xi.u); + gctx->gcm.len.u[1] += bulk; + bulk += res; + } +# endif + if (CRYPTO_gcm128_decrypt(&gctx->gcm, + in + bulk, out + bulk, len - bulk)) return -1; } } @@ -982,7 +1593,7 @@ static int aes_gcm_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, } -# define CUSTOM_FLAGS (EVP_CIPH_FLAG_DEFAULT_ASN1 \ +# define CUSTOM_FLAGS (EVP_CIPH_FLAG_DEFAULT_ASN1 \ | EVP_CIPH_CUSTOM_IV | EVP_CIPH_FLAG_CUSTOM_CIPHER \ | EVP_CIPH_ALWAYS_CALL_INIT | EVP_CIPH_CTRL_INIT \ | EVP_CIPH_CUSTOM_COPY) @@ -1031,47 +1642,69 @@ static int aes_xts_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key, if (key) do { -# ifdef AES_XTS_ASM +# ifdef AES_XTS_ASM xctx->stream = enc ? AES_xts_encrypt : AES_xts_decrypt; -# else +# else xctx->stream = NULL; -# endif +# endif /* key_len is two AES keys */ -# ifdef BSAES_CAPABLE +# ifdef HWAES_CAPABLE + if (HWAES_CAPABLE) { + if (enc) { + HWAES_set_encrypt_key(key, ctx->key_len * 4, + &xctx->ks1.ks); + xctx->xts.block1 = (block128_f) HWAES_encrypt; + } else { + HWAES_set_decrypt_key(key, ctx->key_len * 4, + &xctx->ks1.ks); + xctx->xts.block1 = (block128_f) HWAES_decrypt; + } + + HWAES_set_encrypt_key(key + ctx->key_len / 2, + ctx->key_len * 4, &xctx->ks2.ks); + xctx->xts.block2 = (block128_f) HWAES_encrypt; + + xctx->xts.key1 = &xctx->ks1; + break; + } else +# endif +# ifdef BSAES_CAPABLE if (BSAES_CAPABLE) xctx->stream = enc ? bsaes_xts_encrypt : bsaes_xts_decrypt; else -# endif -# ifdef VPAES_CAPABLE +# endif +# ifdef VPAES_CAPABLE if (VPAES_CAPABLE) { if (enc) { - vpaes_set_encrypt_key(key, ctx->key_len * 4, &xctx->ks1); + vpaes_set_encrypt_key(key, ctx->key_len * 4, + &xctx->ks1.ks); xctx->xts.block1 = (block128_f) vpaes_encrypt; } else { - vpaes_set_decrypt_key(key, ctx->key_len * 4, &xctx->ks1); + vpaes_set_decrypt_key(key, ctx->key_len * 4, + &xctx->ks1.ks); xctx->xts.block1 = (block128_f) vpaes_decrypt; } vpaes_set_encrypt_key(key + ctx->key_len / 2, - ctx->key_len * 4, &xctx->ks2); + ctx->key_len * 4, &xctx->ks2.ks); xctx->xts.block2 = (block128_f) vpaes_encrypt; xctx->xts.key1 = &xctx->ks1; break; } else -# endif +# endif (void)0; /* terminate potentially open 'else' */ if (enc) { - AES_set_encrypt_key(key, ctx->key_len * 4, &xctx->ks1); + AES_set_encrypt_key(key, ctx->key_len * 4, &xctx->ks1.ks); xctx->xts.block1 = (block128_f) AES_encrypt; } else { - AES_set_decrypt_key(key, ctx->key_len * 4, &xctx->ks1); + AES_set_decrypt_key(key, ctx->key_len * 4, &xctx->ks1.ks); xctx->xts.block1 = (block128_f) AES_decrypt; } AES_set_encrypt_key(key + ctx->key_len / 2, - ctx->key_len * 4, &xctx->ks2); + ctx->key_len * 4, &xctx->ks2.ks); xctx->xts.block2 = (block128_f) AES_encrypt; xctx->xts.key1 = &xctx->ks1; @@ -1093,14 +1726,6 @@ static int aes_xts_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, return 0; if (!out || !in || len < AES_BLOCK_SIZE) return 0; -# ifdef OPENSSL_FIPS - /* Requirement of SP800-38E */ - if (FIPS_module_mode() && !(ctx->flags & EVP_CIPH_FLAG_NON_FIPS_ALLOW) && - (len > (1UL << 20) * 16)) { - EVPerr(EVP_F_AES_XTS_CIPHER, EVP_R_TOO_LARGE); - return 0; - } -# endif if (xctx->stream) (*xctx->stream) (in, out, len, xctx->xts.key1, xctx->xts.key2, ctx->iv); @@ -1110,9 +1735,9 @@ static int aes_xts_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, return 1; } -# define aes_xts_cleanup NULL +# define aes_xts_cleanup NULL -# define XTS_FLAGS (EVP_CIPH_FLAG_DEFAULT_ASN1 | EVP_CIPH_CUSTOM_IV \ +# define XTS_FLAGS (EVP_CIPH_FLAG_DEFAULT_ASN1 | EVP_CIPH_CUSTOM_IV \ | EVP_CIPH_ALWAYS_CALL_INIT | EVP_CIPH_CTRL_INIT \ | EVP_CIPH_CUSTOM_COPY) @@ -1190,17 +1815,28 @@ static int aes_ccm_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key, return 1; if (key) do { -# ifdef VPAES_CAPABLE +# ifdef HWAES_CAPABLE + if (HWAES_CAPABLE) { + HWAES_set_encrypt_key(key, ctx->key_len * 8, &cctx->ks.ks); + + CRYPTO_ccm128_init(&cctx->ccm, cctx->M, cctx->L, + &cctx->ks, (block128_f) HWAES_encrypt); + cctx->str = NULL; + cctx->key_set = 1; + break; + } else +# endif +# ifdef VPAES_CAPABLE if (VPAES_CAPABLE) { - vpaes_set_encrypt_key(key, ctx->key_len * 8, &cctx->ks); + vpaes_set_encrypt_key(key, ctx->key_len * 8, &cctx->ks.ks); CRYPTO_ccm128_init(&cctx->ccm, cctx->M, cctx->L, &cctx->ks, (block128_f) vpaes_encrypt); cctx->str = NULL; cctx->key_set = 1; break; } -# endif - AES_set_encrypt_key(key, ctx->key_len * 8, &cctx->ks); +# endif + AES_set_encrypt_key(key, ctx->key_len * 8, &cctx->ks.ks); CRYPTO_ccm128_init(&cctx->ccm, cctx->M, cctx->L, &cctx->ks, (block128_f) AES_encrypt); cctx->str = NULL; @@ -1273,7 +1909,7 @@ static int aes_ccm_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, } -# define aes_ccm_cleanup NULL +# define aes_ccm_cleanup NULL BLOCK_CIPHER_custom(NID_aes, 128, 1, 12, ccm, CCM, EVP_CIPH_FLAG_FIPS | CUSTOM_FLAGS) @@ -1281,5 +1917,107 @@ BLOCK_CIPHER_custom(NID_aes, 128, 1, 12, ccm, CCM, EVP_CIPH_FLAG_FIPS | CUSTOM_FLAGS) BLOCK_CIPHER_custom(NID_aes, 256, 1, 12, ccm, CCM, EVP_CIPH_FLAG_FIPS | CUSTOM_FLAGS) -# endif #endif +typedef struct { + union { + double align; + AES_KEY ks; + } ks; + /* Indicates if IV has been set */ + unsigned char *iv; +} EVP_AES_WRAP_CTX; + +static int aes_wrap_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key, + const unsigned char *iv, int enc) +{ + EVP_AES_WRAP_CTX *wctx = ctx->cipher_data; + if (!iv && !key) + return 1; + if (key) { + if (ctx->encrypt) + AES_set_encrypt_key(key, ctx->key_len * 8, &wctx->ks.ks); + else + AES_set_decrypt_key(key, ctx->key_len * 8, &wctx->ks.ks); + if (!iv) + wctx->iv = NULL; + } + if (iv) { + memcpy(ctx->iv, iv, 8); + wctx->iv = ctx->iv; + } + return 1; +} + +static int aes_wrap_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, + const unsigned char *in, size_t inlen) +{ + EVP_AES_WRAP_CTX *wctx = ctx->cipher_data; + size_t rv; + if (!in) + return 0; + if (inlen % 8) + return -1; + if (ctx->encrypt && inlen < 8) + return -1; + if (!ctx->encrypt && inlen < 16) + return -1; + if (!out) { + if (ctx->encrypt) + return inlen + 8; + else + return inlen - 8; + } + if (ctx->encrypt) + rv = CRYPTO_128_wrap(&wctx->ks.ks, wctx->iv, out, in, inlen, + (block128_f) AES_encrypt); + else + rv = CRYPTO_128_unwrap(&wctx->ks.ks, wctx->iv, out, in, inlen, + (block128_f) AES_decrypt); + return rv ? (int)rv : -1; +} + +#define WRAP_FLAGS (EVP_CIPH_WRAP_MODE \ + | EVP_CIPH_CUSTOM_IV | EVP_CIPH_FLAG_CUSTOM_CIPHER \ + | EVP_CIPH_ALWAYS_CALL_INIT | EVP_CIPH_FLAG_DEFAULT_ASN1) + +static const EVP_CIPHER aes_128_wrap = { + NID_id_aes128_wrap, + 8, 16, 8, WRAP_FLAGS, + aes_wrap_init_key, aes_wrap_cipher, + NULL, + sizeof(EVP_AES_WRAP_CTX), + NULL, NULL, NULL, NULL +}; + +const EVP_CIPHER *EVP_aes_128_wrap(void) +{ + return &aes_128_wrap; +} + +static const EVP_CIPHER aes_192_wrap = { + NID_id_aes192_wrap, + 8, 24, 8, WRAP_FLAGS, + aes_wrap_init_key, aes_wrap_cipher, + NULL, + sizeof(EVP_AES_WRAP_CTX), + NULL, NULL, NULL, NULL +}; + +const EVP_CIPHER *EVP_aes_192_wrap(void) +{ + return &aes_192_wrap; +} + +static const EVP_CIPHER aes_256_wrap = { + NID_id_aes256_wrap, + 8, 32, 8, WRAP_FLAGS, + aes_wrap_init_key, aes_wrap_cipher, + NULL, + sizeof(EVP_AES_WRAP_CTX), + NULL, NULL, NULL, NULL +}; + +const EVP_CIPHER *EVP_aes_256_wrap(void) +{ + return &aes_256_wrap; +} diff --git a/deps/openssl/openssl/crypto/evp/e_aes_cbc_hmac_sha1.c b/deps/openssl/openssl/crypto/evp/e_aes_cbc_hmac_sha1.c index 3f8a5ae391291f..e0127a9bb2e7ac 100644 --- a/deps/openssl/openssl/crypto/evp/e_aes_cbc_hmac_sha1.c +++ b/deps/openssl/openssl/crypto/evp/e_aes_cbc_hmac_sha1.c @@ -58,7 +58,8 @@ # include # include # include -# include "evp_locl.h" +# include +# include "modes_lcl.h" # ifndef EVP_CIPH_FLAG_AEAD_CIPHER # define EVP_CIPH_FLAG_AEAD_CIPHER 0x200000 @@ -70,6 +71,10 @@ # define EVP_CIPH_FLAG_DEFAULT_ASN1 0 # endif +# if !defined(EVP_CIPH_FLAG_TLS1_1_MULTIBLOCK) +# define EVP_CIPH_FLAG_TLS1_1_MULTIBLOCK 0 +# endif + # define TLS1_1_VERSION 0x0302 typedef struct { @@ -89,11 +94,7 @@ typedef struct { defined(_M_AMD64) || defined(_M_X64) || \ defined(__INTEL__) ) -# if defined(__GNUC__) && __GNUC__>=2 && !defined(PEDANTIC) -# define BSWAP(x) ({ unsigned int r=(x); asm ("bswapl %0":"=r"(r):"0"(r)); r; }) -# endif - -extern unsigned int OPENSSL_ia32cap_P[2]; +extern unsigned int OPENSSL_ia32cap_P[3]; # define AESNI_CAPABLE (1<<(57-32)) int aesni_set_encrypt_key(const unsigned char *userKey, int bits, @@ -110,6 +111,10 @@ void aesni_cbc_sha1_enc(const void *inp, void *out, size_t blocks, const AES_KEY *key, unsigned char iv[16], SHA_CTX *ctx, const void *in0); +void aesni256_cbc_sha1_dec(const void *inp, void *out, size_t blocks, + const AES_KEY *key, unsigned char iv[16], + SHA_CTX *ctx, const void *in0); + # define data(ctx) ((EVP_AES_HMAC_SHA1 *)(ctx)->cipher_data) static int aesni_cbc_hmac_sha1_init_key(EVP_CIPHER_CTX *ctx, @@ -134,6 +139,7 @@ static int aesni_cbc_hmac_sha1_init_key(EVP_CIPHER_CTX *ctx, } # define STITCHED_CALL +# undef STITCHED_DECRYPT_CALL # if !defined(STITCHED_CALL) # define aes_off 0 @@ -177,6 +183,275 @@ static void sha1_update(SHA_CTX *c, const void *data, size_t len) # endif # define SHA1_Update sha1_update +# if !defined(OPENSSL_NO_MULTIBLOCK) && EVP_CIPH_FLAG_TLS1_1_MULTIBLOCK + +typedef struct { + unsigned int A[8], B[8], C[8], D[8], E[8]; +} SHA1_MB_CTX; +typedef struct { + const unsigned char *ptr; + int blocks; +} HASH_DESC; + +void sha1_multi_block(SHA1_MB_CTX *, const HASH_DESC *, int); + +typedef struct { + const unsigned char *inp; + unsigned char *out; + int blocks; + u64 iv[2]; +} CIPH_DESC; + +void aesni_multi_cbc_encrypt(CIPH_DESC *, void *, int); + +static size_t tls1_1_multi_block_encrypt(EVP_AES_HMAC_SHA1 *key, + unsigned char *out, + const unsigned char *inp, + size_t inp_len, int n4x) +{ /* n4x is 1 or 2 */ + HASH_DESC hash_d[8], edges[8]; + CIPH_DESC ciph_d[8]; + unsigned char storage[sizeof(SHA1_MB_CTX) + 32]; + union { + u64 q[16]; + u32 d[32]; + u8 c[128]; + } blocks[8]; + SHA1_MB_CTX *ctx; + unsigned int frag, last, packlen, i, x4 = 4 * n4x, minblocks, processed = + 0; + size_t ret = 0; + u8 *IVs; +# if defined(BSWAP8) + u64 seqnum; +# endif + + /* ask for IVs in bulk */ + if (RAND_bytes((IVs = blocks[0].c), 16 * x4) <= 0) + return 0; + + ctx = (SHA1_MB_CTX *) (storage + 32 - ((size_t)storage % 32)); /* align */ + + frag = (unsigned int)inp_len >> (1 + n4x); + last = (unsigned int)inp_len + frag - (frag << (1 + n4x)); + if (last > frag && ((last + 13 + 9) % 64) < (x4 - 1)) { + frag++; + last -= x4 - 1; + } + + packlen = 5 + 16 + ((frag + 20 + 16) & -16); + + /* populate descriptors with pointers and IVs */ + hash_d[0].ptr = inp; + ciph_d[0].inp = inp; + /* 5+16 is place for header and explicit IV */ + ciph_d[0].out = out + 5 + 16; + memcpy(ciph_d[0].out - 16, IVs, 16); + memcpy(ciph_d[0].iv, IVs, 16); + IVs += 16; + + for (i = 1; i < x4; i++) { + ciph_d[i].inp = hash_d[i].ptr = hash_d[i - 1].ptr + frag; + ciph_d[i].out = ciph_d[i - 1].out + packlen; + memcpy(ciph_d[i].out - 16, IVs, 16); + memcpy(ciph_d[i].iv, IVs, 16); + IVs += 16; + } + +# if defined(BSWAP8) + memcpy(blocks[0].c, key->md.data, 8); + seqnum = BSWAP8(blocks[0].q[0]); +# endif + for (i = 0; i < x4; i++) { + unsigned int len = (i == (x4 - 1) ? last : frag); +# if !defined(BSWAP8) + unsigned int carry, j; +# endif + + ctx->A[i] = key->md.h0; + ctx->B[i] = key->md.h1; + ctx->C[i] = key->md.h2; + ctx->D[i] = key->md.h3; + ctx->E[i] = key->md.h4; + + /* fix seqnum */ +# if defined(BSWAP8) + blocks[i].q[0] = BSWAP8(seqnum + i); +# else + for (carry = i, j = 8; j--;) { + blocks[i].c[j] = ((u8 *)key->md.data)[j] + carry; + carry = (blocks[i].c[j] - carry) >> (sizeof(carry) * 8 - 1); + } +# endif + blocks[i].c[8] = ((u8 *)key->md.data)[8]; + blocks[i].c[9] = ((u8 *)key->md.data)[9]; + blocks[i].c[10] = ((u8 *)key->md.data)[10]; + /* fix length */ + blocks[i].c[11] = (u8)(len >> 8); + blocks[i].c[12] = (u8)(len); + + memcpy(blocks[i].c + 13, hash_d[i].ptr, 64 - 13); + hash_d[i].ptr += 64 - 13; + hash_d[i].blocks = (len - (64 - 13)) / 64; + + edges[i].ptr = blocks[i].c; + edges[i].blocks = 1; + } + + /* hash 13-byte headers and first 64-13 bytes of inputs */ + sha1_multi_block(ctx, edges, n4x); + /* hash bulk inputs */ +# define MAXCHUNKSIZE 2048 +# if MAXCHUNKSIZE%64 +# error "MAXCHUNKSIZE is not divisible by 64" +# elif MAXCHUNKSIZE + /* + * goal is to minimize pressure on L1 cache by moving in shorter steps, + * so that hashed data is still in the cache by the time we encrypt it + */ + minblocks = ((frag <= last ? frag : last) - (64 - 13)) / 64; + if (minblocks > MAXCHUNKSIZE / 64) { + for (i = 0; i < x4; i++) { + edges[i].ptr = hash_d[i].ptr; + edges[i].blocks = MAXCHUNKSIZE / 64; + ciph_d[i].blocks = MAXCHUNKSIZE / 16; + } + do { + sha1_multi_block(ctx, edges, n4x); + aesni_multi_cbc_encrypt(ciph_d, &key->ks, n4x); + + for (i = 0; i < x4; i++) { + edges[i].ptr = hash_d[i].ptr += MAXCHUNKSIZE; + hash_d[i].blocks -= MAXCHUNKSIZE / 64; + edges[i].blocks = MAXCHUNKSIZE / 64; + ciph_d[i].inp += MAXCHUNKSIZE; + ciph_d[i].out += MAXCHUNKSIZE; + ciph_d[i].blocks = MAXCHUNKSIZE / 16; + memcpy(ciph_d[i].iv, ciph_d[i].out - 16, 16); + } + processed += MAXCHUNKSIZE; + minblocks -= MAXCHUNKSIZE / 64; + } while (minblocks > MAXCHUNKSIZE / 64); + } +# endif +# undef MAXCHUNKSIZE + sha1_multi_block(ctx, hash_d, n4x); + + memset(blocks, 0, sizeof(blocks)); + for (i = 0; i < x4; i++) { + unsigned int len = (i == (x4 - 1) ? last : frag), + off = hash_d[i].blocks * 64; + const unsigned char *ptr = hash_d[i].ptr + off; + + off = (len - processed) - (64 - 13) - off; /* remainder actually */ + memcpy(blocks[i].c, ptr, off); + blocks[i].c[off] = 0x80; + len += 64 + 13; /* 64 is HMAC header */ + len *= 8; /* convert to bits */ + if (off < (64 - 8)) { +# ifdef BSWAP4 + blocks[i].d[15] = BSWAP4(len); +# else + PUTU32(blocks[i].c + 60, len); +# endif + edges[i].blocks = 1; + } else { +# ifdef BSWAP4 + blocks[i].d[31] = BSWAP4(len); +# else + PUTU32(blocks[i].c + 124, len); +# endif + edges[i].blocks = 2; + } + edges[i].ptr = blocks[i].c; + } + + /* hash input tails and finalize */ + sha1_multi_block(ctx, edges, n4x); + + memset(blocks, 0, sizeof(blocks)); + for (i = 0; i < x4; i++) { +# ifdef BSWAP4 + blocks[i].d[0] = BSWAP4(ctx->A[i]); + ctx->A[i] = key->tail.h0; + blocks[i].d[1] = BSWAP4(ctx->B[i]); + ctx->B[i] = key->tail.h1; + blocks[i].d[2] = BSWAP4(ctx->C[i]); + ctx->C[i] = key->tail.h2; + blocks[i].d[3] = BSWAP4(ctx->D[i]); + ctx->D[i] = key->tail.h3; + blocks[i].d[4] = BSWAP4(ctx->E[i]); + ctx->E[i] = key->tail.h4; + blocks[i].c[20] = 0x80; + blocks[i].d[15] = BSWAP4((64 + 20) * 8); +# else + PUTU32(blocks[i].c + 0, ctx->A[i]); + ctx->A[i] = key->tail.h0; + PUTU32(blocks[i].c + 4, ctx->B[i]); + ctx->B[i] = key->tail.h1; + PUTU32(blocks[i].c + 8, ctx->C[i]); + ctx->C[i] = key->tail.h2; + PUTU32(blocks[i].c + 12, ctx->D[i]); + ctx->D[i] = key->tail.h3; + PUTU32(blocks[i].c + 16, ctx->E[i]); + ctx->E[i] = key->tail.h4; + blocks[i].c[20] = 0x80; + PUTU32(blocks[i].c + 60, (64 + 20) * 8); +# endif + edges[i].ptr = blocks[i].c; + edges[i].blocks = 1; + } + + /* finalize MACs */ + sha1_multi_block(ctx, edges, n4x); + + for (i = 0; i < x4; i++) { + unsigned int len = (i == (x4 - 1) ? last : frag), pad, j; + unsigned char *out0 = out; + + memcpy(ciph_d[i].out, ciph_d[i].inp, len - processed); + ciph_d[i].inp = ciph_d[i].out; + + out += 5 + 16 + len; + + /* write MAC */ + PUTU32(out + 0, ctx->A[i]); + PUTU32(out + 4, ctx->B[i]); + PUTU32(out + 8, ctx->C[i]); + PUTU32(out + 12, ctx->D[i]); + PUTU32(out + 16, ctx->E[i]); + out += 20; + len += 20; + + /* pad */ + pad = 15 - len % 16; + for (j = 0; j <= pad; j++) + *(out++) = pad; + len += pad + 1; + + ciph_d[i].blocks = (len - processed) / 16; + len += 16; /* account for explicit iv */ + + /* arrange header */ + out0[0] = ((u8 *)key->md.data)[8]; + out0[1] = ((u8 *)key->md.data)[9]; + out0[2] = ((u8 *)key->md.data)[10]; + out0[3] = (u8)(len >> 8); + out0[4] = (u8)(len); + + ret += len + 5; + inp += frag; + } + + aesni_multi_cbc_encrypt(ciph_d, &key->ks, n4x); + + OPENSSL_cleanse(blocks, sizeof(blocks)); + OPENSSL_cleanse(ctx, sizeof(*ctx)); + + return ret; +} +# endif + static int aesni_cbc_hmac_sha1_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, const unsigned char *in, size_t len) { @@ -257,10 +532,7 @@ static int aesni_cbc_hmac_sha1_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, /* arrange cache line alignment */ pmac = (void *)(((size_t)mac.c + 31) & ((size_t)0 - 32)); - /* decrypt HMAC|padding at once */ - aesni_cbc_encrypt(in, out, len, &key->ks, ctx->iv, 0); - - if (plen) { /* "TLS" mode of operation */ + if (plen != NO_PAYLOAD_LENGTH) { /* "TLS" mode of operation */ size_t inp_len, mask, j, i; unsigned int res, maxpad, pad, bitlen; int ret = 1; @@ -268,17 +540,37 @@ static int aesni_cbc_hmac_sha1_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, unsigned int u[SHA_LBLOCK]; unsigned char c[SHA_CBLOCK]; } *data = (void *)key->md.data; +# if defined(STITCHED_DECRYPT_CALL) + unsigned char tail_iv[AES_BLOCK_SIZE]; + int stitch = 0; +# endif if ((key->aux.tls_aad[plen - 4] << 8 | key->aux.tls_aad[plen - 3]) - >= TLS1_1_VERSION) - iv = AES_BLOCK_SIZE; - - if (len < (iv + SHA_DIGEST_LENGTH + 1)) + >= TLS1_1_VERSION) { + if (len < (AES_BLOCK_SIZE + SHA_DIGEST_LENGTH + 1)) + return 0; + + /* omit explicit iv */ + memcpy(ctx->iv, in, AES_BLOCK_SIZE); + in += AES_BLOCK_SIZE; + out += AES_BLOCK_SIZE; + len -= AES_BLOCK_SIZE; + } else if (len < (SHA_DIGEST_LENGTH + 1)) return 0; - /* omit explicit iv */ - out += iv; - len -= iv; +# if defined(STITCHED_DECRYPT_CALL) + if (len >= 1024 && ctx->key_len == 32) { + /* decrypt last block */ + memcpy(tail_iv, in + len - 2 * AES_BLOCK_SIZE, + AES_BLOCK_SIZE); + aesni_cbc_encrypt(in + len - AES_BLOCK_SIZE, + out + len - AES_BLOCK_SIZE, AES_BLOCK_SIZE, + &key->ks, tail_iv, 0); + stitch = 1; + } else +# endif + /* decrypt HMAC|padding at once */ + aesni_cbc_encrypt(in, out, len, &key->ks, ctx->iv, 0); /* figure out payload length */ pad = out[len - 1]; @@ -298,6 +590,29 @@ static int aesni_cbc_hmac_sha1_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, key->md = key->head; SHA1_Update(&key->md, key->aux.tls_aad, plen); +# if defined(STITCHED_DECRYPT_CALL) + if (stitch) { + blocks = (len - (256 + 32 + SHA_CBLOCK)) / SHA_CBLOCK; + aes_off = len - AES_BLOCK_SIZE - blocks * SHA_CBLOCK; + sha_off = SHA_CBLOCK - plen; + + aesni_cbc_encrypt(in, out, aes_off, &key->ks, ctx->iv, 0); + + SHA1_Update(&key->md, out, sha_off); + aesni256_cbc_sha1_dec(in + aes_off, + out + aes_off, blocks, &key->ks, + ctx->iv, &key->md, out + sha_off); + + sha_off += blocks *= SHA_CBLOCK; + out += sha_off; + len -= sha_off; + inp_len -= sha_off; + + key->md.Nl += (blocks << 3); /* at most 18 bits */ + memcpy(ctx->iv, tail_iv, AES_BLOCK_SIZE); + } +# endif + # if 1 len -= SHA_DIGEST_LENGTH; /* amend mac */ if (len >= (256 + SHA_CBLOCK)) { @@ -311,8 +626,8 @@ static int aesni_cbc_hmac_sha1_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, /* but pretend as if we hashed padded payload */ bitlen = key->md.Nl + (inp_len << 3); /* at most 18 bits */ -# ifdef BSWAP - bitlen = BSWAP(bitlen); +# ifdef BSWAP4 + bitlen = BSWAP4(bitlen); # else mac.c[0] = 0; mac.c[1] = (unsigned char)(bitlen >> 16); @@ -376,12 +691,12 @@ static int aesni_cbc_hmac_sha1_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, pmac->u[3] |= key->md.h3 & mask; pmac->u[4] |= key->md.h4 & mask; -# ifdef BSWAP - pmac->u[0] = BSWAP(pmac->u[0]); - pmac->u[1] = BSWAP(pmac->u[1]); - pmac->u[2] = BSWAP(pmac->u[2]); - pmac->u[3] = BSWAP(pmac->u[3]); - pmac->u[4] = BSWAP(pmac->u[4]); +# ifdef BSWAP4 + pmac->u[0] = BSWAP4(pmac->u[0]); + pmac->u[1] = BSWAP4(pmac->u[1]); + pmac->u[2] = BSWAP4(pmac->u[2]); + pmac->u[3] = BSWAP4(pmac->u[3]); + pmac->u[4] = BSWAP4(pmac->u[4]); # else for (i = 0; i < 5; i++) { res = pmac->u[i]; @@ -458,6 +773,33 @@ static int aesni_cbc_hmac_sha1_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, # endif return ret; } else { +# if defined(STITCHED_DECRYPT_CALL) + if (len >= 1024 && ctx->key_len == 32) { + if (sha_off %= SHA_CBLOCK) + blocks = (len - 3 * SHA_CBLOCK) / SHA_CBLOCK; + else + blocks = (len - 2 * SHA_CBLOCK) / SHA_CBLOCK; + aes_off = len - blocks * SHA_CBLOCK; + + aesni_cbc_encrypt(in, out, aes_off, &key->ks, ctx->iv, 0); + SHA1_Update(&key->md, out, sha_off); + aesni256_cbc_sha1_dec(in + aes_off, + out + aes_off, blocks, &key->ks, + ctx->iv, &key->md, out + sha_off); + + sha_off += blocks *= SHA_CBLOCK; + out += sha_off; + len -= sha_off; + + key->md.Nh += blocks >> 29; + key->md.Nl += blocks <<= 3; + if (key->md.Nl < (unsigned int)blocks) + key->md.Nh++; + } else +# endif + /* decrypt HMAC|padding at once */ + aesni_cbc_encrypt(in, out, len, &key->ks, ctx->iv, 0); + SHA1_Update(&key->md, out, len); } } @@ -528,6 +870,70 @@ static int aesni_cbc_hmac_sha1_ctrl(EVP_CIPHER_CTX *ctx, int type, int arg, return SHA_DIGEST_LENGTH; } } +# if !defined(OPENSSL_NO_MULTIBLOCK) && EVP_CIPH_FLAG_TLS1_1_MULTIBLOCK + case EVP_CTRL_TLS1_1_MULTIBLOCK_MAX_BUFSIZE: + return (int)(5 + 16 + ((arg + 20 + 16) & -16)); + case EVP_CTRL_TLS1_1_MULTIBLOCK_AAD: + { + EVP_CTRL_TLS1_1_MULTIBLOCK_PARAM *param = + (EVP_CTRL_TLS1_1_MULTIBLOCK_PARAM *) ptr; + unsigned int n4x = 1, x4; + unsigned int frag, last, packlen, inp_len; + + if (arg < (int)sizeof(EVP_CTRL_TLS1_1_MULTIBLOCK_PARAM)) + return -1; + + inp_len = param->inp[11] << 8 | param->inp[12]; + + if (ctx->encrypt) { + if ((param->inp[9] << 8 | param->inp[10]) < TLS1_1_VERSION) + return -1; + + if (inp_len) { + if (inp_len < 4096) + return 0; /* too short */ + + if (inp_len >= 8192 && OPENSSL_ia32cap_P[2] & (1 << 5)) + n4x = 2; /* AVX2 */ + } else if ((n4x = param->interleave / 4) && n4x <= 2) + inp_len = param->len; + else + return -1; + + key->md = key->head; + SHA1_Update(&key->md, param->inp, 13); + + x4 = 4 * n4x; + n4x += 1; + + frag = inp_len >> n4x; + last = inp_len + frag - (frag << n4x); + if (last > frag && ((last + 13 + 9) % 64 < (x4 - 1))) { + frag++; + last -= x4 - 1; + } + + packlen = 5 + 16 + ((frag + 20 + 16) & -16); + packlen = (packlen << n4x) - packlen; + packlen += 5 + 16 + ((last + 20 + 16) & -16); + + param->interleave = x4; + + return (int)packlen; + } else + return -1; /* not yet */ + } + case EVP_CTRL_TLS1_1_MULTIBLOCK_ENCRYPT: + { + EVP_CTRL_TLS1_1_MULTIBLOCK_PARAM *param = + (EVP_CTRL_TLS1_1_MULTIBLOCK_PARAM *) ptr; + + return (int)tls1_1_multi_block_encrypt(key, param->out, + param->inp, param->len, + param->interleave / 4); + } + case EVP_CTRL_TLS1_1_MULTIBLOCK_DECRYPT: +# endif default: return -1; } @@ -541,7 +947,7 @@ static EVP_CIPHER aesni_128_cbc_hmac_sha1_cipher = { # endif 16, 16, 16, EVP_CIPH_CBC_MODE | EVP_CIPH_FLAG_DEFAULT_ASN1 | - EVP_CIPH_FLAG_AEAD_CIPHER, + EVP_CIPH_FLAG_AEAD_CIPHER | EVP_CIPH_FLAG_TLS1_1_MULTIBLOCK, aesni_cbc_hmac_sha1_init_key, aesni_cbc_hmac_sha1_cipher, NULL, @@ -560,7 +966,7 @@ static EVP_CIPHER aesni_256_cbc_hmac_sha1_cipher = { # endif 16, 32, 16, EVP_CIPH_CBC_MODE | EVP_CIPH_FLAG_DEFAULT_ASN1 | - EVP_CIPH_FLAG_AEAD_CIPHER, + EVP_CIPH_FLAG_AEAD_CIPHER | EVP_CIPH_FLAG_TLS1_1_MULTIBLOCK, aesni_cbc_hmac_sha1_init_key, aesni_cbc_hmac_sha1_cipher, NULL, diff --git a/deps/openssl/openssl/crypto/evp/e_aes_cbc_hmac_sha256.c b/deps/openssl/openssl/crypto/evp/e_aes_cbc_hmac_sha256.c new file mode 100644 index 00000000000000..30398c7ca43bd7 --- /dev/null +++ b/deps/openssl/openssl/crypto/evp/e_aes_cbc_hmac_sha256.c @@ -0,0 +1,970 @@ +/* ==================================================================== + * Copyright (c) 2011-2013 The OpenSSL Project. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgment: + * "This product includes software developed by the OpenSSL Project + * for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)" + * + * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to + * endorse or promote products derived from this software without + * prior written permission. For written permission, please contact + * licensing@OpenSSL.org. + * + * 5. Products derived from this software may not be called "OpenSSL" + * nor may "OpenSSL" appear in their names without prior written + * permission of the OpenSSL Project. + * + * 6. Redistributions of any form whatsoever must retain the following + * acknowledgment: + * "This product includes software developed by the OpenSSL Project + * for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)" + * + * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY + * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + * ==================================================================== + */ + +#include + +#include +#include + +#if !defined(OPENSSL_NO_AES) && !defined(OPENSSL_NO_SHA256) + +# include +# include +# include +# include +# include +# include "modes_lcl.h" + +# ifndef EVP_CIPH_FLAG_AEAD_CIPHER +# define EVP_CIPH_FLAG_AEAD_CIPHER 0x200000 +# define EVP_CTRL_AEAD_TLS1_AAD 0x16 +# define EVP_CTRL_AEAD_SET_MAC_KEY 0x17 +# endif + +# if !defined(EVP_CIPH_FLAG_DEFAULT_ASN1) +# define EVP_CIPH_FLAG_DEFAULT_ASN1 0 +# endif + +# if !defined(EVP_CIPH_FLAG_TLS1_1_MULTIBLOCK) +# define EVP_CIPH_FLAG_TLS1_1_MULTIBLOCK 0 +# endif + +# define TLS1_1_VERSION 0x0302 + +typedef struct { + AES_KEY ks; + SHA256_CTX head, tail, md; + size_t payload_length; /* AAD length in decrypt case */ + union { + unsigned int tls_ver; + unsigned char tls_aad[16]; /* 13 used */ + } aux; +} EVP_AES_HMAC_SHA256; + +# define NO_PAYLOAD_LENGTH ((size_t)-1) + +# if defined(AES_ASM) && ( \ + defined(__x86_64) || defined(__x86_64__) || \ + defined(_M_AMD64) || defined(_M_X64) || \ + defined(__INTEL__) ) + +extern unsigned int OPENSSL_ia32cap_P[3]; +# define AESNI_CAPABLE (1<<(57-32)) + +int aesni_set_encrypt_key(const unsigned char *userKey, int bits, + AES_KEY *key); +int aesni_set_decrypt_key(const unsigned char *userKey, int bits, + AES_KEY *key); + +void aesni_cbc_encrypt(const unsigned char *in, + unsigned char *out, + size_t length, + const AES_KEY *key, unsigned char *ivec, int enc); + +int aesni_cbc_sha256_enc(const void *inp, void *out, size_t blocks, + const AES_KEY *key, unsigned char iv[16], + SHA256_CTX *ctx, const void *in0); + +# define data(ctx) ((EVP_AES_HMAC_SHA256 *)(ctx)->cipher_data) + +static int aesni_cbc_hmac_sha256_init_key(EVP_CIPHER_CTX *ctx, + const unsigned char *inkey, + const unsigned char *iv, int enc) +{ + EVP_AES_HMAC_SHA256 *key = data(ctx); + int ret; + + if (enc) + memset(&key->ks, 0, sizeof(key->ks.rd_key)), + ret = aesni_set_encrypt_key(inkey, ctx->key_len * 8, &key->ks); + else + ret = aesni_set_decrypt_key(inkey, ctx->key_len * 8, &key->ks); + + SHA256_Init(&key->head); /* handy when benchmarking */ + key->tail = key->head; + key->md = key->head; + + key->payload_length = NO_PAYLOAD_LENGTH; + + return ret < 0 ? 0 : 1; +} + +# define STITCHED_CALL + +# if !defined(STITCHED_CALL) +# define aes_off 0 +# endif + +void sha256_block_data_order(void *c, const void *p, size_t len); + +static void sha256_update(SHA256_CTX *c, const void *data, size_t len) +{ + const unsigned char *ptr = data; + size_t res; + + if ((res = c->num)) { + res = SHA256_CBLOCK - res; + if (len < res) + res = len; + SHA256_Update(c, ptr, res); + ptr += res; + len -= res; + } + + res = len % SHA256_CBLOCK; + len -= res; + + if (len) { + sha256_block_data_order(c, ptr, len / SHA256_CBLOCK); + + ptr += len; + c->Nh += len >> 29; + c->Nl += len <<= 3; + if (c->Nl < (unsigned int)len) + c->Nh++; + } + + if (res) + SHA256_Update(c, ptr, res); +} + +# ifdef SHA256_Update +# undef SHA256_Update +# endif +# define SHA256_Update sha256_update + +# if !defined(OPENSSL_NO_MULTIBLOCK) && EVP_CIPH_FLAG_TLS1_1_MULTIBLOCK + +typedef struct { + unsigned int A[8], B[8], C[8], D[8], E[8], F[8], G[8], H[8]; +} SHA256_MB_CTX; +typedef struct { + const unsigned char *ptr; + int blocks; +} HASH_DESC; + +void sha256_multi_block(SHA256_MB_CTX *, const HASH_DESC *, int); + +typedef struct { + const unsigned char *inp; + unsigned char *out; + int blocks; + u64 iv[2]; +} CIPH_DESC; + +void aesni_multi_cbc_encrypt(CIPH_DESC *, void *, int); + +static size_t tls1_1_multi_block_encrypt(EVP_AES_HMAC_SHA256 *key, + unsigned char *out, + const unsigned char *inp, + size_t inp_len, int n4x) +{ /* n4x is 1 or 2 */ + HASH_DESC hash_d[8], edges[8]; + CIPH_DESC ciph_d[8]; + unsigned char storage[sizeof(SHA256_MB_CTX) + 32]; + union { + u64 q[16]; + u32 d[32]; + u8 c[128]; + } blocks[8]; + SHA256_MB_CTX *ctx; + unsigned int frag, last, packlen, i, x4 = 4 * n4x, minblocks, processed = + 0; + size_t ret = 0; + u8 *IVs; +# if defined(BSWAP8) + u64 seqnum; +# endif + + /* ask for IVs in bulk */ + if (RAND_bytes((IVs = blocks[0].c), 16 * x4) <= 0) + return 0; + + /* align */ + ctx = (SHA256_MB_CTX *) (storage + 32 - ((size_t)storage % 32)); + + frag = (unsigned int)inp_len >> (1 + n4x); + last = (unsigned int)inp_len + frag - (frag << (1 + n4x)); + if (last > frag && ((last + 13 + 9) % 64) < (x4 - 1)) { + frag++; + last -= x4 - 1; + } + + packlen = 5 + 16 + ((frag + 32 + 16) & -16); + + /* populate descriptors with pointers and IVs */ + hash_d[0].ptr = inp; + ciph_d[0].inp = inp; + /* 5+16 is place for header and explicit IV */ + ciph_d[0].out = out + 5 + 16; + memcpy(ciph_d[0].out - 16, IVs, 16); + memcpy(ciph_d[0].iv, IVs, 16); + IVs += 16; + + for (i = 1; i < x4; i++) { + ciph_d[i].inp = hash_d[i].ptr = hash_d[i - 1].ptr + frag; + ciph_d[i].out = ciph_d[i - 1].out + packlen; + memcpy(ciph_d[i].out - 16, IVs, 16); + memcpy(ciph_d[i].iv, IVs, 16); + IVs += 16; + } + +# if defined(BSWAP8) + memcpy(blocks[0].c, key->md.data, 8); + seqnum = BSWAP8(blocks[0].q[0]); +# endif + for (i = 0; i < x4; i++) { + unsigned int len = (i == (x4 - 1) ? last : frag); +# if !defined(BSWAP8) + unsigned int carry, j; +# endif + + ctx->A[i] = key->md.h[0]; + ctx->B[i] = key->md.h[1]; + ctx->C[i] = key->md.h[2]; + ctx->D[i] = key->md.h[3]; + ctx->E[i] = key->md.h[4]; + ctx->F[i] = key->md.h[5]; + ctx->G[i] = key->md.h[6]; + ctx->H[i] = key->md.h[7]; + + /* fix seqnum */ +# if defined(BSWAP8) + blocks[i].q[0] = BSWAP8(seqnum + i); +# else + for (carry = i, j = 8; j--;) { + blocks[i].c[j] = ((u8 *)key->md.data)[j] + carry; + carry = (blocks[i].c[j] - carry) >> (sizeof(carry) * 8 - 1); + } +# endif + blocks[i].c[8] = ((u8 *)key->md.data)[8]; + blocks[i].c[9] = ((u8 *)key->md.data)[9]; + blocks[i].c[10] = ((u8 *)key->md.data)[10]; + /* fix length */ + blocks[i].c[11] = (u8)(len >> 8); + blocks[i].c[12] = (u8)(len); + + memcpy(blocks[i].c + 13, hash_d[i].ptr, 64 - 13); + hash_d[i].ptr += 64 - 13; + hash_d[i].blocks = (len - (64 - 13)) / 64; + + edges[i].ptr = blocks[i].c; + edges[i].blocks = 1; + } + + /* hash 13-byte headers and first 64-13 bytes of inputs */ + sha256_multi_block(ctx, edges, n4x); + /* hash bulk inputs */ +# define MAXCHUNKSIZE 2048 +# if MAXCHUNKSIZE%64 +# error "MAXCHUNKSIZE is not divisible by 64" +# elif MAXCHUNKSIZE + /* + * goal is to minimize pressure on L1 cache by moving in shorter steps, + * so that hashed data is still in the cache by the time we encrypt it + */ + minblocks = ((frag <= last ? frag : last) - (64 - 13)) / 64; + if (minblocks > MAXCHUNKSIZE / 64) { + for (i = 0; i < x4; i++) { + edges[i].ptr = hash_d[i].ptr; + edges[i].blocks = MAXCHUNKSIZE / 64; + ciph_d[i].blocks = MAXCHUNKSIZE / 16; + } + do { + sha256_multi_block(ctx, edges, n4x); + aesni_multi_cbc_encrypt(ciph_d, &key->ks, n4x); + + for (i = 0; i < x4; i++) { + edges[i].ptr = hash_d[i].ptr += MAXCHUNKSIZE; + hash_d[i].blocks -= MAXCHUNKSIZE / 64; + edges[i].blocks = MAXCHUNKSIZE / 64; + ciph_d[i].inp += MAXCHUNKSIZE; + ciph_d[i].out += MAXCHUNKSIZE; + ciph_d[i].blocks = MAXCHUNKSIZE / 16; + memcpy(ciph_d[i].iv, ciph_d[i].out - 16, 16); + } + processed += MAXCHUNKSIZE; + minblocks -= MAXCHUNKSIZE / 64; + } while (minblocks > MAXCHUNKSIZE / 64); + } +# endif +# undef MAXCHUNKSIZE + sha256_multi_block(ctx, hash_d, n4x); + + memset(blocks, 0, sizeof(blocks)); + for (i = 0; i < x4; i++) { + unsigned int len = (i == (x4 - 1) ? last : frag), + off = hash_d[i].blocks * 64; + const unsigned char *ptr = hash_d[i].ptr + off; + + off = (len - processed) - (64 - 13) - off; /* remainder actually */ + memcpy(blocks[i].c, ptr, off); + blocks[i].c[off] = 0x80; + len += 64 + 13; /* 64 is HMAC header */ + len *= 8; /* convert to bits */ + if (off < (64 - 8)) { +# ifdef BSWAP4 + blocks[i].d[15] = BSWAP4(len); +# else + PUTU32(blocks[i].c + 60, len); +# endif + edges[i].blocks = 1; + } else { +# ifdef BSWAP4 + blocks[i].d[31] = BSWAP4(len); +# else + PUTU32(blocks[i].c + 124, len); +# endif + edges[i].blocks = 2; + } + edges[i].ptr = blocks[i].c; + } + + /* hash input tails and finalize */ + sha256_multi_block(ctx, edges, n4x); + + memset(blocks, 0, sizeof(blocks)); + for (i = 0; i < x4; i++) { +# ifdef BSWAP4 + blocks[i].d[0] = BSWAP4(ctx->A[i]); + ctx->A[i] = key->tail.h[0]; + blocks[i].d[1] = BSWAP4(ctx->B[i]); + ctx->B[i] = key->tail.h[1]; + blocks[i].d[2] = BSWAP4(ctx->C[i]); + ctx->C[i] = key->tail.h[2]; + blocks[i].d[3] = BSWAP4(ctx->D[i]); + ctx->D[i] = key->tail.h[3]; + blocks[i].d[4] = BSWAP4(ctx->E[i]); + ctx->E[i] = key->tail.h[4]; + blocks[i].d[5] = BSWAP4(ctx->F[i]); + ctx->F[i] = key->tail.h[5]; + blocks[i].d[6] = BSWAP4(ctx->G[i]); + ctx->G[i] = key->tail.h[6]; + blocks[i].d[7] = BSWAP4(ctx->H[i]); + ctx->H[i] = key->tail.h[7]; + blocks[i].c[32] = 0x80; + blocks[i].d[15] = BSWAP4((64 + 32) * 8); +# else + PUTU32(blocks[i].c + 0, ctx->A[i]); + ctx->A[i] = key->tail.h[0]; + PUTU32(blocks[i].c + 4, ctx->B[i]); + ctx->B[i] = key->tail.h[1]; + PUTU32(blocks[i].c + 8, ctx->C[i]); + ctx->C[i] = key->tail.h[2]; + PUTU32(blocks[i].c + 12, ctx->D[i]); + ctx->D[i] = key->tail.h[3]; + PUTU32(blocks[i].c + 16, ctx->E[i]); + ctx->E[i] = key->tail.h[4]; + PUTU32(blocks[i].c + 20, ctx->F[i]); + ctx->F[i] = key->tail.h[5]; + PUTU32(blocks[i].c + 24, ctx->G[i]); + ctx->G[i] = key->tail.h[6]; + PUTU32(blocks[i].c + 28, ctx->H[i]); + ctx->H[i] = key->tail.h[7]; + blocks[i].c[32] = 0x80; + PUTU32(blocks[i].c + 60, (64 + 32) * 8); +# endif + edges[i].ptr = blocks[i].c; + edges[i].blocks = 1; + } + + /* finalize MACs */ + sha256_multi_block(ctx, edges, n4x); + + for (i = 0; i < x4; i++) { + unsigned int len = (i == (x4 - 1) ? last : frag), pad, j; + unsigned char *out0 = out; + + memcpy(ciph_d[i].out, ciph_d[i].inp, len - processed); + ciph_d[i].inp = ciph_d[i].out; + + out += 5 + 16 + len; + + /* write MAC */ + PUTU32(out + 0, ctx->A[i]); + PUTU32(out + 4, ctx->B[i]); + PUTU32(out + 8, ctx->C[i]); + PUTU32(out + 12, ctx->D[i]); + PUTU32(out + 16, ctx->E[i]); + PUTU32(out + 20, ctx->F[i]); + PUTU32(out + 24, ctx->G[i]); + PUTU32(out + 28, ctx->H[i]); + out += 32; + len += 32; + + /* pad */ + pad = 15 - len % 16; + for (j = 0; j <= pad; j++) + *(out++) = pad; + len += pad + 1; + + ciph_d[i].blocks = (len - processed) / 16; + len += 16; /* account for explicit iv */ + + /* arrange header */ + out0[0] = ((u8 *)key->md.data)[8]; + out0[1] = ((u8 *)key->md.data)[9]; + out0[2] = ((u8 *)key->md.data)[10]; + out0[3] = (u8)(len >> 8); + out0[4] = (u8)(len); + + ret += len + 5; + inp += frag; + } + + aesni_multi_cbc_encrypt(ciph_d, &key->ks, n4x); + + OPENSSL_cleanse(blocks, sizeof(blocks)); + OPENSSL_cleanse(ctx, sizeof(*ctx)); + + return ret; +} +# endif + +static int aesni_cbc_hmac_sha256_cipher(EVP_CIPHER_CTX *ctx, + unsigned char *out, + const unsigned char *in, size_t len) +{ + EVP_AES_HMAC_SHA256 *key = data(ctx); + unsigned int l; + size_t plen = key->payload_length, iv = 0, /* explicit IV in TLS 1.1 and + * later */ + sha_off = 0; +# if defined(STITCHED_CALL) + size_t aes_off = 0, blocks; + + sha_off = SHA256_CBLOCK - key->md.num; +# endif + + key->payload_length = NO_PAYLOAD_LENGTH; + + if (len % AES_BLOCK_SIZE) + return 0; + + if (ctx->encrypt) { + if (plen == NO_PAYLOAD_LENGTH) + plen = len; + else if (len != + ((plen + SHA256_DIGEST_LENGTH + + AES_BLOCK_SIZE) & -AES_BLOCK_SIZE)) + return 0; + else if (key->aux.tls_ver >= TLS1_1_VERSION) + iv = AES_BLOCK_SIZE; + +# if defined(STITCHED_CALL) + if (OPENSSL_ia32cap_P[1] & (1 << (60 - 32)) && /* AVX? */ + plen > (sha_off + iv) && + (blocks = (plen - (sha_off + iv)) / SHA256_CBLOCK)) { + SHA256_Update(&key->md, in + iv, sha_off); + + (void)aesni_cbc_sha256_enc(in, out, blocks, &key->ks, + ctx->iv, &key->md, in + iv + sha_off); + blocks *= SHA256_CBLOCK; + aes_off += blocks; + sha_off += blocks; + key->md.Nh += blocks >> 29; + key->md.Nl += blocks <<= 3; + if (key->md.Nl < (unsigned int)blocks) + key->md.Nh++; + } else { + sha_off = 0; + } +# endif + sha_off += iv; + SHA256_Update(&key->md, in + sha_off, plen - sha_off); + + if (plen != len) { /* "TLS" mode of operation */ + if (in != out) + memcpy(out + aes_off, in + aes_off, plen - aes_off); + + /* calculate HMAC and append it to payload */ + SHA256_Final(out + plen, &key->md); + key->md = key->tail; + SHA256_Update(&key->md, out + plen, SHA256_DIGEST_LENGTH); + SHA256_Final(out + plen, &key->md); + + /* pad the payload|hmac */ + plen += SHA256_DIGEST_LENGTH; + for (l = len - plen - 1; plen < len; plen++) + out[plen] = l; + /* encrypt HMAC|padding at once */ + aesni_cbc_encrypt(out + aes_off, out + aes_off, len - aes_off, + &key->ks, ctx->iv, 1); + } else { + aesni_cbc_encrypt(in + aes_off, out + aes_off, len - aes_off, + &key->ks, ctx->iv, 1); + } + } else { + union { + unsigned int u[SHA256_DIGEST_LENGTH / sizeof(unsigned int)]; + unsigned char c[64 + SHA256_DIGEST_LENGTH]; + } mac, *pmac; + + /* arrange cache line alignment */ + pmac = (void *)(((size_t)mac.c + 63) & ((size_t)0 - 64)); + + /* decrypt HMAC|padding at once */ + aesni_cbc_encrypt(in, out, len, &key->ks, ctx->iv, 0); + + if (plen != NO_PAYLOAD_LENGTH) { /* "TLS" mode of operation */ + size_t inp_len, mask, j, i; + unsigned int res, maxpad, pad, bitlen; + int ret = 1; + union { + unsigned int u[SHA_LBLOCK]; + unsigned char c[SHA256_CBLOCK]; + } *data = (void *)key->md.data; + + if ((key->aux.tls_aad[plen - 4] << 8 | key->aux.tls_aad[plen - 3]) + >= TLS1_1_VERSION) + iv = AES_BLOCK_SIZE; + + if (len < (iv + SHA256_DIGEST_LENGTH + 1)) + return 0; + + /* omit explicit iv */ + out += iv; + len -= iv; + + /* figure out payload length */ + pad = out[len - 1]; + maxpad = len - (SHA256_DIGEST_LENGTH + 1); + maxpad |= (255 - maxpad) >> (sizeof(maxpad) * 8 - 8); + maxpad &= 255; + + inp_len = len - (SHA256_DIGEST_LENGTH + pad + 1); + mask = (0 - ((inp_len - len) >> (sizeof(inp_len) * 8 - 1))); + inp_len &= mask; + ret &= (int)mask; + + key->aux.tls_aad[plen - 2] = inp_len >> 8; + key->aux.tls_aad[plen - 1] = inp_len; + + /* calculate HMAC */ + key->md = key->head; + SHA256_Update(&key->md, key->aux.tls_aad, plen); + +# if 1 + len -= SHA256_DIGEST_LENGTH; /* amend mac */ + if (len >= (256 + SHA256_CBLOCK)) { + j = (len - (256 + SHA256_CBLOCK)) & (0 - SHA256_CBLOCK); + j += SHA256_CBLOCK - key->md.num; + SHA256_Update(&key->md, out, j); + out += j; + len -= j; + inp_len -= j; + } + + /* but pretend as if we hashed padded payload */ + bitlen = key->md.Nl + (inp_len << 3); /* at most 18 bits */ +# ifdef BSWAP4 + bitlen = BSWAP4(bitlen); +# else + mac.c[0] = 0; + mac.c[1] = (unsigned char)(bitlen >> 16); + mac.c[2] = (unsigned char)(bitlen >> 8); + mac.c[3] = (unsigned char)bitlen; + bitlen = mac.u[0]; +# endif + + pmac->u[0] = 0; + pmac->u[1] = 0; + pmac->u[2] = 0; + pmac->u[3] = 0; + pmac->u[4] = 0; + pmac->u[5] = 0; + pmac->u[6] = 0; + pmac->u[7] = 0; + + for (res = key->md.num, j = 0; j < len; j++) { + size_t c = out[j]; + mask = (j - inp_len) >> (sizeof(j) * 8 - 8); + c &= mask; + c |= 0x80 & ~mask & ~((inp_len - j) >> (sizeof(j) * 8 - 8)); + data->c[res++] = (unsigned char)c; + + if (res != SHA256_CBLOCK) + continue; + + /* j is not incremented yet */ + mask = 0 - ((inp_len + 7 - j) >> (sizeof(j) * 8 - 1)); + data->u[SHA_LBLOCK - 1] |= bitlen & mask; + sha256_block_data_order(&key->md, data, 1); + mask &= 0 - ((j - inp_len - 72) >> (sizeof(j) * 8 - 1)); + pmac->u[0] |= key->md.h[0] & mask; + pmac->u[1] |= key->md.h[1] & mask; + pmac->u[2] |= key->md.h[2] & mask; + pmac->u[3] |= key->md.h[3] & mask; + pmac->u[4] |= key->md.h[4] & mask; + pmac->u[5] |= key->md.h[5] & mask; + pmac->u[6] |= key->md.h[6] & mask; + pmac->u[7] |= key->md.h[7] & mask; + res = 0; + } + + for (i = res; i < SHA256_CBLOCK; i++, j++) + data->c[i] = 0; + + if (res > SHA256_CBLOCK - 8) { + mask = 0 - ((inp_len + 8 - j) >> (sizeof(j) * 8 - 1)); + data->u[SHA_LBLOCK - 1] |= bitlen & mask; + sha256_block_data_order(&key->md, data, 1); + mask &= 0 - ((j - inp_len - 73) >> (sizeof(j) * 8 - 1)); + pmac->u[0] |= key->md.h[0] & mask; + pmac->u[1] |= key->md.h[1] & mask; + pmac->u[2] |= key->md.h[2] & mask; + pmac->u[3] |= key->md.h[3] & mask; + pmac->u[4] |= key->md.h[4] & mask; + pmac->u[5] |= key->md.h[5] & mask; + pmac->u[6] |= key->md.h[6] & mask; + pmac->u[7] |= key->md.h[7] & mask; + + memset(data, 0, SHA256_CBLOCK); + j += 64; + } + data->u[SHA_LBLOCK - 1] = bitlen; + sha256_block_data_order(&key->md, data, 1); + mask = 0 - ((j - inp_len - 73) >> (sizeof(j) * 8 - 1)); + pmac->u[0] |= key->md.h[0] & mask; + pmac->u[1] |= key->md.h[1] & mask; + pmac->u[2] |= key->md.h[2] & mask; + pmac->u[3] |= key->md.h[3] & mask; + pmac->u[4] |= key->md.h[4] & mask; + pmac->u[5] |= key->md.h[5] & mask; + pmac->u[6] |= key->md.h[6] & mask; + pmac->u[7] |= key->md.h[7] & mask; + +# ifdef BSWAP4 + pmac->u[0] = BSWAP4(pmac->u[0]); + pmac->u[1] = BSWAP4(pmac->u[1]); + pmac->u[2] = BSWAP4(pmac->u[2]); + pmac->u[3] = BSWAP4(pmac->u[3]); + pmac->u[4] = BSWAP4(pmac->u[4]); + pmac->u[5] = BSWAP4(pmac->u[5]); + pmac->u[6] = BSWAP4(pmac->u[6]); + pmac->u[7] = BSWAP4(pmac->u[7]); +# else + for (i = 0; i < 8; i++) { + res = pmac->u[i]; + pmac->c[4 * i + 0] = (unsigned char)(res >> 24); + pmac->c[4 * i + 1] = (unsigned char)(res >> 16); + pmac->c[4 * i + 2] = (unsigned char)(res >> 8); + pmac->c[4 * i + 3] = (unsigned char)res; + } +# endif + len += SHA256_DIGEST_LENGTH; +# else + SHA256_Update(&key->md, out, inp_len); + res = key->md.num; + SHA256_Final(pmac->c, &key->md); + + { + unsigned int inp_blocks, pad_blocks; + + /* but pretend as if we hashed padded payload */ + inp_blocks = + 1 + ((SHA256_CBLOCK - 9 - res) >> (sizeof(res) * 8 - 1)); + res += (unsigned int)(len - inp_len); + pad_blocks = res / SHA256_CBLOCK; + res %= SHA256_CBLOCK; + pad_blocks += + 1 + ((SHA256_CBLOCK - 9 - res) >> (sizeof(res) * 8 - 1)); + for (; inp_blocks < pad_blocks; inp_blocks++) + sha1_block_data_order(&key->md, data, 1); + } +# endif + key->md = key->tail; + SHA256_Update(&key->md, pmac->c, SHA256_DIGEST_LENGTH); + SHA256_Final(pmac->c, &key->md); + + /* verify HMAC */ + out += inp_len; + len -= inp_len; +# if 1 + { + unsigned char *p = + out + len - 1 - maxpad - SHA256_DIGEST_LENGTH; + size_t off = out - p; + unsigned int c, cmask; + + maxpad += SHA256_DIGEST_LENGTH; + for (res = 0, i = 0, j = 0; j < maxpad; j++) { + c = p[j]; + cmask = + ((int)(j - off - SHA256_DIGEST_LENGTH)) >> + (sizeof(int) * 8 - 1); + res |= (c ^ pad) & ~cmask; /* ... and padding */ + cmask &= ((int)(off - 1 - j)) >> (sizeof(int) * 8 - 1); + res |= (c ^ pmac->c[i]) & cmask; + i += 1 & cmask; + } + maxpad -= SHA256_DIGEST_LENGTH; + + res = 0 - ((0 - res) >> (sizeof(res) * 8 - 1)); + ret &= (int)~res; + } +# else + for (res = 0, i = 0; i < SHA256_DIGEST_LENGTH; i++) + res |= out[i] ^ pmac->c[i]; + res = 0 - ((0 - res) >> (sizeof(res) * 8 - 1)); + ret &= (int)~res; + + /* verify padding */ + pad = (pad & ~res) | (maxpad & res); + out = out + len - 1 - pad; + for (res = 0, i = 0; i < pad; i++) + res |= out[i] ^ pad; + + res = (0 - res) >> (sizeof(res) * 8 - 1); + ret &= (int)~res; +# endif + return ret; + } else { + SHA256_Update(&key->md, out, len); + } + } + + return 1; +} + +static int aesni_cbc_hmac_sha256_ctrl(EVP_CIPHER_CTX *ctx, int type, int arg, + void *ptr) +{ + EVP_AES_HMAC_SHA256 *key = data(ctx); + + switch (type) { + case EVP_CTRL_AEAD_SET_MAC_KEY: + { + unsigned int i; + unsigned char hmac_key[64]; + + memset(hmac_key, 0, sizeof(hmac_key)); + + if (arg > (int)sizeof(hmac_key)) { + SHA256_Init(&key->head); + SHA256_Update(&key->head, ptr, arg); + SHA256_Final(hmac_key, &key->head); + } else { + memcpy(hmac_key, ptr, arg); + } + + for (i = 0; i < sizeof(hmac_key); i++) + hmac_key[i] ^= 0x36; /* ipad */ + SHA256_Init(&key->head); + SHA256_Update(&key->head, hmac_key, sizeof(hmac_key)); + + for (i = 0; i < sizeof(hmac_key); i++) + hmac_key[i] ^= 0x36 ^ 0x5c; /* opad */ + SHA256_Init(&key->tail); + SHA256_Update(&key->tail, hmac_key, sizeof(hmac_key)); + + OPENSSL_cleanse(hmac_key, sizeof(hmac_key)); + + return 1; + } + case EVP_CTRL_AEAD_TLS1_AAD: + { + unsigned char *p = ptr; + unsigned int len = p[arg - 2] << 8 | p[arg - 1]; + + if (ctx->encrypt) { + key->payload_length = len; + if ((key->aux.tls_ver = + p[arg - 4] << 8 | p[arg - 3]) >= TLS1_1_VERSION) { + len -= AES_BLOCK_SIZE; + p[arg - 2] = len >> 8; + p[arg - 1] = len; + } + key->md = key->head; + SHA256_Update(&key->md, p, arg); + + return (int)(((len + SHA256_DIGEST_LENGTH + + AES_BLOCK_SIZE) & -AES_BLOCK_SIZE) + - len); + } else { + if (arg > 13) + arg = 13; + memcpy(key->aux.tls_aad, ptr, arg); + key->payload_length = arg; + + return SHA256_DIGEST_LENGTH; + } + } +# if !defined(OPENSSL_NO_MULTIBLOCK) && EVP_CIPH_FLAG_TLS1_1_MULTIBLOCK + case EVP_CTRL_TLS1_1_MULTIBLOCK_MAX_BUFSIZE: + return (int)(5 + 16 + ((arg + 32 + 16) & -16)); + case EVP_CTRL_TLS1_1_MULTIBLOCK_AAD: + { + EVP_CTRL_TLS1_1_MULTIBLOCK_PARAM *param = + (EVP_CTRL_TLS1_1_MULTIBLOCK_PARAM *) ptr; + unsigned int n4x = 1, x4; + unsigned int frag, last, packlen, inp_len; + + if (arg < (int)sizeof(EVP_CTRL_TLS1_1_MULTIBLOCK_PARAM)) + return -1; + + inp_len = param->inp[11] << 8 | param->inp[12]; + + if (ctx->encrypt) { + if ((param->inp[9] << 8 | param->inp[10]) < TLS1_1_VERSION) + return -1; + + if (inp_len) { + if (inp_len < 4096) + return 0; /* too short */ + + if (inp_len >= 8192 && OPENSSL_ia32cap_P[2] & (1 << 5)) + n4x = 2; /* AVX2 */ + } else if ((n4x = param->interleave / 4) && n4x <= 2) + inp_len = param->len; + else + return -1; + + key->md = key->head; + SHA256_Update(&key->md, param->inp, 13); + + x4 = 4 * n4x; + n4x += 1; + + frag = inp_len >> n4x; + last = inp_len + frag - (frag << n4x); + if (last > frag && ((last + 13 + 9) % 64 < (x4 - 1))) { + frag++; + last -= x4 - 1; + } + + packlen = 5 + 16 + ((frag + 32 + 16) & -16); + packlen = (packlen << n4x) - packlen; + packlen += 5 + 16 + ((last + 32 + 16) & -16); + + param->interleave = x4; + + return (int)packlen; + } else + return -1; /* not yet */ + } + case EVP_CTRL_TLS1_1_MULTIBLOCK_ENCRYPT: + { + EVP_CTRL_TLS1_1_MULTIBLOCK_PARAM *param = + (EVP_CTRL_TLS1_1_MULTIBLOCK_PARAM *) ptr; + + return (int)tls1_1_multi_block_encrypt(key, param->out, + param->inp, param->len, + param->interleave / 4); + } + case EVP_CTRL_TLS1_1_MULTIBLOCK_DECRYPT: +# endif + default: + return -1; + } +} + +static EVP_CIPHER aesni_128_cbc_hmac_sha256_cipher = { +# ifdef NID_aes_128_cbc_hmac_sha256 + NID_aes_128_cbc_hmac_sha256, +# else + NID_undef, +# endif + 16, 16, 16, + EVP_CIPH_CBC_MODE | EVP_CIPH_FLAG_DEFAULT_ASN1 | + EVP_CIPH_FLAG_AEAD_CIPHER | EVP_CIPH_FLAG_TLS1_1_MULTIBLOCK, + aesni_cbc_hmac_sha256_init_key, + aesni_cbc_hmac_sha256_cipher, + NULL, + sizeof(EVP_AES_HMAC_SHA256), + EVP_CIPH_FLAG_DEFAULT_ASN1 ? NULL : EVP_CIPHER_set_asn1_iv, + EVP_CIPH_FLAG_DEFAULT_ASN1 ? NULL : EVP_CIPHER_get_asn1_iv, + aesni_cbc_hmac_sha256_ctrl, + NULL +}; + +static EVP_CIPHER aesni_256_cbc_hmac_sha256_cipher = { +# ifdef NID_aes_256_cbc_hmac_sha256 + NID_aes_256_cbc_hmac_sha256, +# else + NID_undef, +# endif + 16, 32, 16, + EVP_CIPH_CBC_MODE | EVP_CIPH_FLAG_DEFAULT_ASN1 | + EVP_CIPH_FLAG_AEAD_CIPHER | EVP_CIPH_FLAG_TLS1_1_MULTIBLOCK, + aesni_cbc_hmac_sha256_init_key, + aesni_cbc_hmac_sha256_cipher, + NULL, + sizeof(EVP_AES_HMAC_SHA256), + EVP_CIPH_FLAG_DEFAULT_ASN1 ? NULL : EVP_CIPHER_set_asn1_iv, + EVP_CIPH_FLAG_DEFAULT_ASN1 ? NULL : EVP_CIPHER_get_asn1_iv, + aesni_cbc_hmac_sha256_ctrl, + NULL +}; + +const EVP_CIPHER *EVP_aes_128_cbc_hmac_sha256(void) +{ + return ((OPENSSL_ia32cap_P[1] & AESNI_CAPABLE) && + aesni_cbc_sha256_enc(NULL, NULL, 0, NULL, NULL, NULL, NULL) ? + &aesni_128_cbc_hmac_sha256_cipher : NULL); +} + +const EVP_CIPHER *EVP_aes_256_cbc_hmac_sha256(void) +{ + return ((OPENSSL_ia32cap_P[1] & AESNI_CAPABLE) && + aesni_cbc_sha256_enc(NULL, NULL, 0, NULL, NULL, NULL, NULL) ? + &aesni_256_cbc_hmac_sha256_cipher : NULL); +} +# else +const EVP_CIPHER *EVP_aes_128_cbc_hmac_sha256(void) +{ + return NULL; +} + +const EVP_CIPHER *EVP_aes_256_cbc_hmac_sha256(void) +{ + return NULL; +} +# endif +#endif diff --git a/deps/openssl/openssl/crypto/evp/e_camellia.c b/deps/openssl/openssl/crypto/evp/e_camellia.c index 27bc4892fff834..f9c84013675d29 100644 --- a/deps/openssl/openssl/crypto/evp/e_camellia.c +++ b/deps/openssl/openssl/crypto/evp/e_camellia.c @@ -61,6 +61,7 @@ # include # include # include "evp_locl.h" +# include "modes_lcl.h" static int camellia_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key, const unsigned char *iv, int enc); @@ -68,48 +69,322 @@ static int camellia_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key, /* Camellia subkey Structure */ typedef struct { CAMELLIA_KEY ks; + block128_f block; + union { + cbc128_f cbc; + ctr128_f ctr; + } stream; } EVP_CAMELLIA_KEY; +# define MAXBITCHUNK ((size_t)1<<(sizeof(size_t)*8-4)) + /* Attribute operation for Camellia */ # define data(ctx) EVP_C_DATA(EVP_CAMELLIA_KEY,ctx) -IMPLEMENT_BLOCK_CIPHER(camellia_128, ks, Camellia, EVP_CAMELLIA_KEY, - NID_camellia_128, 16, 16, 16, 128, - 0, camellia_init_key, NULL, - EVP_CIPHER_set_asn1_iv, EVP_CIPHER_get_asn1_iv, NULL) - IMPLEMENT_BLOCK_CIPHER(camellia_192, ks, Camellia, EVP_CAMELLIA_KEY, - NID_camellia_192, 16, 24, 16, 128, - 0, camellia_init_key, NULL, - EVP_CIPHER_set_asn1_iv, EVP_CIPHER_get_asn1_iv, NULL) - IMPLEMENT_BLOCK_CIPHER(camellia_256, ks, Camellia, EVP_CAMELLIA_KEY, - NID_camellia_256, 16, 32, 16, 128, - 0, camellia_init_key, NULL, - EVP_CIPHER_set_asn1_iv, EVP_CIPHER_get_asn1_iv, NULL) -# define IMPLEMENT_CAMELLIA_CFBR(ksize,cbits) IMPLEMENT_CFBR(camellia,Camellia,EVP_CAMELLIA_KEY,ks,ksize,cbits,16) - IMPLEMENT_CAMELLIA_CFBR(128, 1) - IMPLEMENT_CAMELLIA_CFBR(192, 1) - IMPLEMENT_CAMELLIA_CFBR(256, 1) - - IMPLEMENT_CAMELLIA_CFBR(128, 8) - IMPLEMENT_CAMELLIA_CFBR(192, 8) - IMPLEMENT_CAMELLIA_CFBR(256, 8) +# if defined(AES_ASM) && (defined(__sparc) || defined(__sparc__)) +/* ---------^^^ this is not a typo, just a way to detect that + * assembler support was in general requested... */ +# include "sparc_arch.h" + +extern unsigned int OPENSSL_sparcv9cap_P[]; + +# define SPARC_CMLL_CAPABLE (OPENSSL_sparcv9cap_P[1] & CFR_CAMELLIA) + +void cmll_t4_set_key(const unsigned char *key, int bits, CAMELLIA_KEY *ks); +void cmll_t4_encrypt(const unsigned char *in, unsigned char *out, + const CAMELLIA_KEY *key); +void cmll_t4_decrypt(const unsigned char *in, unsigned char *out, + const CAMELLIA_KEY *key); + +void cmll128_t4_cbc_encrypt(const unsigned char *in, unsigned char *out, + size_t len, const CAMELLIA_KEY *key, + unsigned char *ivec); +void cmll128_t4_cbc_decrypt(const unsigned char *in, unsigned char *out, + size_t len, const CAMELLIA_KEY *key, + unsigned char *ivec); +void cmll256_t4_cbc_encrypt(const unsigned char *in, unsigned char *out, + size_t len, const CAMELLIA_KEY *key, + unsigned char *ivec); +void cmll256_t4_cbc_decrypt(const unsigned char *in, unsigned char *out, + size_t len, const CAMELLIA_KEY *key, + unsigned char *ivec); +void cmll128_t4_ctr32_encrypt(const unsigned char *in, unsigned char *out, + size_t blocks, const CAMELLIA_KEY *key, + unsigned char *ivec); +void cmll256_t4_ctr32_encrypt(const unsigned char *in, unsigned char *out, + size_t blocks, const CAMELLIA_KEY *key, + unsigned char *ivec); + +static int cmll_t4_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key, + const unsigned char *iv, int enc) +{ + int ret, mode, bits; + EVP_CAMELLIA_KEY *dat = (EVP_CAMELLIA_KEY *) ctx->cipher_data; + + mode = ctx->cipher->flags & EVP_CIPH_MODE; + bits = ctx->key_len * 8; + + cmll_t4_set_key(key, bits, &dat->ks); + + if ((mode == EVP_CIPH_ECB_MODE || mode == EVP_CIPH_CBC_MODE) + && !enc) { + ret = 0; + dat->block = (block128_f) cmll_t4_decrypt; + switch (bits) { + case 128: + dat->stream.cbc = mode == EVP_CIPH_CBC_MODE ? + (cbc128_f) cmll128_t4_cbc_decrypt : NULL; + break; + case 192: + case 256: + dat->stream.cbc = mode == EVP_CIPH_CBC_MODE ? + (cbc128_f) cmll256_t4_cbc_decrypt : NULL; + break; + default: + ret = -1; + } + } else { + ret = 0; + dat->block = (block128_f) cmll_t4_encrypt; + switch (bits) { + case 128: + if (mode == EVP_CIPH_CBC_MODE) + dat->stream.cbc = (cbc128_f) cmll128_t4_cbc_encrypt; + else if (mode == EVP_CIPH_CTR_MODE) + dat->stream.ctr = (ctr128_f) cmll128_t4_ctr32_encrypt; + else + dat->stream.cbc = NULL; + break; + case 192: + case 256: + if (mode == EVP_CIPH_CBC_MODE) + dat->stream.cbc = (cbc128_f) cmll256_t4_cbc_encrypt; + else if (mode == EVP_CIPH_CTR_MODE) + dat->stream.ctr = (ctr128_f) cmll256_t4_ctr32_encrypt; + else + dat->stream.cbc = NULL; + break; + default: + ret = -1; + } + } + + if (ret < 0) { + EVPerr(EVP_F_CMLL_T4_INIT_KEY, EVP_R_CAMELLIA_KEY_SETUP_FAILED); + return 0; + } + + return 1; +} + +# define cmll_t4_cbc_cipher camellia_cbc_cipher +static int cmll_t4_cbc_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, + const unsigned char *in, size_t len); + +# define cmll_t4_ecb_cipher camellia_ecb_cipher +static int cmll_t4_ecb_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, + const unsigned char *in, size_t len); +# define cmll_t4_ofb_cipher camellia_ofb_cipher +static int cmll_t4_ofb_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, + const unsigned char *in, size_t len); + +# define cmll_t4_cfb_cipher camellia_cfb_cipher +static int cmll_t4_cfb_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, + const unsigned char *in, size_t len); + +# define cmll_t4_cfb8_cipher camellia_cfb8_cipher +static int cmll_t4_cfb8_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, + const unsigned char *in, size_t len); + +# define cmll_t4_cfb1_cipher camellia_cfb1_cipher +static int cmll_t4_cfb1_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, + const unsigned char *in, size_t len); + +# define cmll_t4_ctr_cipher camellia_ctr_cipher +static int cmll_t4_ctr_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, + const unsigned char *in, size_t len); + +# define BLOCK_CIPHER_generic(nid,keylen,blocksize,ivlen,nmode,mode,MODE,flags) \ +static const EVP_CIPHER cmll_t4_##keylen##_##mode = { \ + nid##_##keylen##_##nmode,blocksize,keylen/8,ivlen, \ + flags|EVP_CIPH_##MODE##_MODE, \ + cmll_t4_init_key, \ + cmll_t4_##mode##_cipher, \ + NULL, \ + sizeof(EVP_CAMELLIA_KEY), \ + NULL,NULL,NULL,NULL }; \ +static const EVP_CIPHER camellia_##keylen##_##mode = { \ + nid##_##keylen##_##nmode,blocksize, \ + keylen/8,ivlen, \ + flags|EVP_CIPH_##MODE##_MODE, \ + camellia_init_key, \ + camellia_##mode##_cipher, \ + NULL, \ + sizeof(EVP_CAMELLIA_KEY), \ + NULL,NULL,NULL,NULL }; \ +const EVP_CIPHER *EVP_camellia_##keylen##_##mode(void) \ +{ return SPARC_CMLL_CAPABLE?&cmll_t4_##keylen##_##mode:&camellia_##keylen##_##mode; } + +# else + +# define BLOCK_CIPHER_generic(nid,keylen,blocksize,ivlen,nmode,mode,MODE,flags) \ +static const EVP_CIPHER camellia_##keylen##_##mode = { \ + nid##_##keylen##_##nmode,blocksize,keylen/8,ivlen, \ + flags|EVP_CIPH_##MODE##_MODE, \ + camellia_init_key, \ + camellia_##mode##_cipher, \ + NULL, \ + sizeof(EVP_CAMELLIA_KEY), \ + NULL,NULL,NULL,NULL }; \ +const EVP_CIPHER *EVP_camellia_##keylen##_##mode(void) \ +{ return &camellia_##keylen##_##mode; } + +# endif + +# define BLOCK_CIPHER_generic_pack(nid,keylen,flags) \ + BLOCK_CIPHER_generic(nid,keylen,16,16,cbc,cbc,CBC,flags|EVP_CIPH_FLAG_DEFAULT_ASN1) \ + BLOCK_CIPHER_generic(nid,keylen,16,0,ecb,ecb,ECB,flags|EVP_CIPH_FLAG_DEFAULT_ASN1) \ + BLOCK_CIPHER_generic(nid,keylen,1,16,ofb128,ofb,OFB,flags|EVP_CIPH_FLAG_DEFAULT_ASN1) \ + BLOCK_CIPHER_generic(nid,keylen,1,16,cfb128,cfb,CFB,flags|EVP_CIPH_FLAG_DEFAULT_ASN1) \ + BLOCK_CIPHER_generic(nid,keylen,1,16,cfb1,cfb1,CFB,flags) \ + BLOCK_CIPHER_generic(nid,keylen,1,16,cfb8,cfb8,CFB,flags) +# if 0 /* not yet, missing NID */ +BLOCK_CIPHER_generic(nid, keylen, 1, 16, ctr, ctr, CTR, flags) +# endif /* The subkey for Camellia is generated. */ static int camellia_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key, const unsigned char *iv, int enc) { - int ret; - - ret = Camellia_set_key(key, ctx->key_len * 8, ctx->cipher_data); + int ret, mode; + EVP_CAMELLIA_KEY *dat = (EVP_CAMELLIA_KEY *) ctx->cipher_data; + ret = Camellia_set_key(key, ctx->key_len * 8, &dat->ks); if (ret < 0) { EVPerr(EVP_F_CAMELLIA_INIT_KEY, EVP_R_CAMELLIA_KEY_SETUP_FAILED); return 0; } + mode = ctx->cipher->flags & EVP_CIPH_MODE; + if ((mode == EVP_CIPH_ECB_MODE || mode == EVP_CIPH_CBC_MODE) + && !enc) { + dat->block = (block128_f) Camellia_decrypt; + dat->stream.cbc = mode == EVP_CIPH_CBC_MODE ? + (cbc128_f) Camellia_cbc_encrypt : NULL; + } else { + dat->block = (block128_f) Camellia_encrypt; + dat->stream.cbc = mode == EVP_CIPH_CBC_MODE ? + (cbc128_f) Camellia_cbc_encrypt : NULL; + } + return 1; } +static int camellia_cbc_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, + const unsigned char *in, size_t len) +{ + EVP_CAMELLIA_KEY *dat = (EVP_CAMELLIA_KEY *) ctx->cipher_data; + + if (dat->stream.cbc) + (*dat->stream.cbc) (in, out, len, &dat->ks, ctx->iv, ctx->encrypt); + else if (ctx->encrypt) + CRYPTO_cbc128_encrypt(in, out, len, &dat->ks, ctx->iv, dat->block); + else + CRYPTO_cbc128_decrypt(in, out, len, &dat->ks, ctx->iv, dat->block); + + return 1; +} + +static int camellia_ecb_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, + const unsigned char *in, size_t len) +{ + size_t bl = ctx->cipher->block_size; + size_t i; + EVP_CAMELLIA_KEY *dat = (EVP_CAMELLIA_KEY *) ctx->cipher_data; + + if (len < bl) + return 1; + + for (i = 0, len -= bl; i <= len; i += bl) + (*dat->block) (in + i, out + i, &dat->ks); + + return 1; +} + +static int camellia_ofb_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, + const unsigned char *in, size_t len) +{ + EVP_CAMELLIA_KEY *dat = (EVP_CAMELLIA_KEY *) ctx->cipher_data; + + CRYPTO_ofb128_encrypt(in, out, len, &dat->ks, + ctx->iv, &ctx->num, dat->block); + return 1; +} + +static int camellia_cfb_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, + const unsigned char *in, size_t len) +{ + EVP_CAMELLIA_KEY *dat = (EVP_CAMELLIA_KEY *) ctx->cipher_data; + + CRYPTO_cfb128_encrypt(in, out, len, &dat->ks, + ctx->iv, &ctx->num, ctx->encrypt, dat->block); + return 1; +} + +static int camellia_cfb8_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, + const unsigned char *in, size_t len) +{ + EVP_CAMELLIA_KEY *dat = (EVP_CAMELLIA_KEY *) ctx->cipher_data; + + CRYPTO_cfb128_8_encrypt(in, out, len, &dat->ks, + ctx->iv, &ctx->num, ctx->encrypt, dat->block); + return 1; +} + +static int camellia_cfb1_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, + const unsigned char *in, size_t len) +{ + EVP_CAMELLIA_KEY *dat = (EVP_CAMELLIA_KEY *) ctx->cipher_data; + + if (ctx->flags & EVP_CIPH_FLAG_LENGTH_BITS) { + CRYPTO_cfb128_1_encrypt(in, out, len, &dat->ks, + ctx->iv, &ctx->num, ctx->encrypt, dat->block); + return 1; + } + + while (len >= MAXBITCHUNK) { + CRYPTO_cfb128_1_encrypt(in, out, MAXBITCHUNK * 8, &dat->ks, + ctx->iv, &ctx->num, ctx->encrypt, dat->block); + len -= MAXBITCHUNK; + } + if (len) + CRYPTO_cfb128_1_encrypt(in, out, len * 8, &dat->ks, + ctx->iv, &ctx->num, ctx->encrypt, dat->block); + + return 1; +} + +# if 0 /* not yet, missing NID */ +static int camellia_ctr_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, + const unsigned char *in, size_t len) +{ + unsigned int num = ctx->num; + EVP_CAMELLIA_KEY *dat = (EVP_CAMELLIA_KEY *) ctx->cipher_data; + + if (dat->stream.ctr) + CRYPTO_ctr128_encrypt_ctr32(in, out, len, &dat->ks, + ctx->iv, ctx->buf, &num, dat->stream.ctr); + else + CRYPTO_ctr128_encrypt(in, out, len, &dat->ks, + ctx->iv, ctx->buf, &num, dat->block); + ctx->num = (size_t)num; + return 1; +} +# endif + +BLOCK_CIPHER_generic_pack(NID_camellia, 128, 0) + BLOCK_CIPHER_generic_pack(NID_camellia, 192, 0) + BLOCK_CIPHER_generic_pack(NID_camellia, 256, 0) #else # ifdef PEDANTIC diff --git a/deps/openssl/openssl/crypto/evp/e_des.c b/deps/openssl/openssl/crypto/evp/e_des.c index ea1a4c422879dd..aae13a675694b4 100644 --- a/deps/openssl/openssl/crypto/evp/e_des.c +++ b/deps/openssl/openssl/crypto/evp/e_des.c @@ -65,6 +65,32 @@ # include # include +typedef struct { + union { + double align; + DES_key_schedule ks; + } ks; + union { + void (*cbc) (const void *, void *, size_t, const void *, void *); + } stream; +} EVP_DES_KEY; + +# if defined(AES_ASM) && (defined(__sparc) || defined(__sparc__)) +/* ---------^^^ this is not a typo, just a way to detect that + * assembler support was in general requested... */ +# include "sparc_arch.h" + +extern unsigned int OPENSSL_sparcv9cap_P[]; + +# define SPARC_DES_CAPABLE (OPENSSL_sparcv9cap_P[1] & CFR_DES) + +void des_t4_key_expand(const void *key, DES_key_schedule *ks); +void des_t4_cbc_encrypt(const void *inp, void *out, size_t len, + DES_key_schedule *ks, unsigned char iv[8]); +void des_t4_cbc_decrypt(const void *inp, void *out, size_t len, + DES_key_schedule *ks, unsigned char iv[8]); +# endif + static int des_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key, const unsigned char *iv, int enc); static int des_ctrl(EVP_CIPHER_CTX *c, int type, int arg, void *ptr); @@ -102,6 +128,12 @@ static int des_ofb_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, static int des_cbc_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, const unsigned char *in, size_t inl) { + EVP_DES_KEY *dat = (EVP_DES_KEY *) ctx->cipher_data; + + if (dat->stream.cbc) { + (*dat->stream.cbc) (in, out, inl, &dat->ks.ks, ctx->iv); + return 1; + } while (inl >= EVP_MAXCHUNK) { DES_ncbc_encrypt(in, out, (long)EVP_MAXCHUNK, ctx->cipher_data, (DES_cblock *)ctx->iv, ctx->encrypt); @@ -179,16 +211,15 @@ static int des_cfb8_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, return 1; } -BLOCK_CIPHER_defs(des, DES_key_schedule, NID_des, 8, 8, 8, 64, +BLOCK_CIPHER_defs(des, EVP_DES_KEY, NID_des, 8, 8, 8, 64, EVP_CIPH_RAND_KEY, des_init_key, NULL, EVP_CIPHER_set_asn1_iv, EVP_CIPHER_get_asn1_iv, des_ctrl) + BLOCK_CIPHER_def_cfb(des, EVP_DES_KEY, NID_des, 8, 8, 1, + EVP_CIPH_RAND_KEY, des_init_key, NULL, + EVP_CIPHER_set_asn1_iv, EVP_CIPHER_get_asn1_iv, des_ctrl) -BLOCK_CIPHER_def_cfb(des, DES_key_schedule, NID_des, 8, 8, 1, - EVP_CIPH_RAND_KEY, des_init_key, NULL, - EVP_CIPHER_set_asn1_iv, EVP_CIPHER_get_asn1_iv, des_ctrl) - -BLOCK_CIPHER_def_cfb(des, DES_key_schedule, NID_des, 8, 8, 8, + BLOCK_CIPHER_def_cfb(des, EVP_DES_KEY, NID_des, 8, 8, 8, EVP_CIPH_RAND_KEY, des_init_key, NULL, EVP_CIPHER_set_asn1_iv, EVP_CIPHER_get_asn1_iv, des_ctrl) @@ -196,8 +227,22 @@ static int des_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key, const unsigned char *iv, int enc) { DES_cblock *deskey = (DES_cblock *)key; + EVP_DES_KEY *dat = (EVP_DES_KEY *) ctx->cipher_data; + + dat->stream.cbc = NULL; +# if defined(SPARC_DES_CAPABLE) + if (SPARC_DES_CAPABLE) { + int mode = ctx->cipher->flags & EVP_CIPH_MODE; + + if (mode == EVP_CIPH_CBC_MODE) { + des_t4_key_expand(key, &dat->ks.ks); + dat->stream.cbc = enc ? des_t4_cbc_encrypt : des_t4_cbc_decrypt; + return 1; + } + } +# endif # ifdef EVP_CHECK_DES_KEY - if (DES_set_key_checked(deskey, ctx->cipher_data) != 0) + if (DES_set_key_checked(deskey, dat->ks.ks) != 0) return 0; # else DES_set_key_unchecked(deskey, ctx->cipher_data); diff --git a/deps/openssl/openssl/crypto/evp/e_des3.c b/deps/openssl/openssl/crypto/evp/e_des3.c index 07a5aca606de0c..301d93e13dff46 100644 --- a/deps/openssl/openssl/crypto/evp/e_des3.c +++ b/deps/openssl/openssl/crypto/evp/e_des3.c @@ -65,7 +65,38 @@ # include # include -# ifndef OPENSSL_FIPS +/* Block use of implementations in FIPS mode */ +# undef EVP_CIPH_FLAG_FIPS +# define EVP_CIPH_FLAG_FIPS 0 + +typedef struct { + union { + double align; + DES_key_schedule ks[3]; + } ks; + union { + void (*cbc) (const void *, void *, size_t, const void *, void *); + } stream; +} DES_EDE_KEY; +# define ks1 ks.ks[0] +# define ks2 ks.ks[1] +# define ks3 ks.ks[2] + +# if defined(AES_ASM) && (defined(__sparc) || defined(__sparc__)) +/* ---------^^^ this is not a typo, just a way to detect that + * assembler support was in general requested... */ +# include "sparc_arch.h" + +extern unsigned int OPENSSL_sparcv9cap_P[]; + +# define SPARC_DES_CAPABLE (OPENSSL_sparcv9cap_P[1] & CFR_DES) + +void des_t4_key_expand(const void *key, DES_key_schedule *ks); +void des_t4_ede3_cbc_encrypt(const void *inp, void *out, size_t len, + DES_key_schedule *ks, unsigned char iv[8]); +void des_t4_ede3_cbc_decrypt(const void *inp, void *out, size_t len, + DES_key_schedule *ks, unsigned char iv[8]); +# endif static int des_ede_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key, const unsigned char *iv, int enc); @@ -75,13 +106,7 @@ static int des_ede3_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key, static int des3_ctrl(EVP_CIPHER_CTX *c, int type, int arg, void *ptr); -typedef struct { - DES_key_schedule ks1; /* key schedule */ - DES_key_schedule ks2; /* key schedule (for ede) */ - DES_key_schedule ks3; /* key schedule (for ede3) */ -} DES_EDE_KEY; - -# define data(ctx) ((DES_EDE_KEY *)(ctx)->cipher_data) +# define data(ctx) ((DES_EDE_KEY *)(ctx)->cipher_data) /* * Because of various casts and different args can't use @@ -123,7 +148,9 @@ static int des_ede_ofb_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, static int des_ede_cbc_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, const unsigned char *in, size_t inl) { -# ifdef KSSL_DEBUG + DES_EDE_KEY *dat = data(ctx); + +# ifdef KSSL_DEBUG { int i; fprintf(stderr, "des_ede_cbc_cipher(ctx=%p, buflen=%d)\n", ctx, @@ -133,21 +160,24 @@ static int des_ede_cbc_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, fprintf(stderr, "%02X", ctx->iv[i]); fprintf(stderr, "\n"); } -# endif /* KSSL_DEBUG */ +# endif /* KSSL_DEBUG */ + if (dat->stream.cbc) { + (*dat->stream.cbc) (in, out, inl, &dat->ks, ctx->iv); + return 1; + } + while (inl >= EVP_MAXCHUNK) { DES_ede3_cbc_encrypt(in, out, (long)EVP_MAXCHUNK, - &data(ctx)->ks1, &data(ctx)->ks2, - &data(ctx)->ks3, (DES_cblock *)ctx->iv, - ctx->encrypt); + &dat->ks1, &dat->ks2, &dat->ks3, + (DES_cblock *)ctx->iv, ctx->encrypt); inl -= EVP_MAXCHUNK; in += EVP_MAXCHUNK; out += EVP_MAXCHUNK; } if (inl) DES_ede3_cbc_encrypt(in, out, (long)inl, - &data(ctx)->ks1, &data(ctx)->ks2, - &data(ctx)->ks3, (DES_cblock *)ctx->iv, - ctx->encrypt); + &dat->ks1, &dat->ks2, &dat->ks3, + (DES_cblock *)ctx->iv, ctx->encrypt); return 1; } @@ -215,39 +245,57 @@ static int des_ede3_cfb8_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, } BLOCK_CIPHER_defs(des_ede, DES_EDE_KEY, NID_des_ede, 8, 16, 8, 64, - EVP_CIPH_RAND_KEY, des_ede_init_key, NULL, - EVP_CIPHER_set_asn1_iv, EVP_CIPHER_get_asn1_iv, des3_ctrl) -# define des_ede3_cfb64_cipher des_ede_cfb64_cipher -# define des_ede3_ofb_cipher des_ede_ofb_cipher -# define des_ede3_cbc_cipher des_ede_cbc_cipher -# define des_ede3_ecb_cipher des_ede_ecb_cipher + EVP_CIPH_RAND_KEY | EVP_CIPH_FLAG_DEFAULT_ASN1, + des_ede_init_key, NULL, NULL, NULL, des3_ctrl) +# define des_ede3_cfb64_cipher des_ede_cfb64_cipher +# define des_ede3_ofb_cipher des_ede_ofb_cipher +# define des_ede3_cbc_cipher des_ede_cbc_cipher +# define des_ede3_ecb_cipher des_ede_ecb_cipher BLOCK_CIPHER_defs(des_ede3, DES_EDE_KEY, NID_des_ede3, 8, 24, 8, 64, - EVP_CIPH_RAND_KEY, des_ede3_init_key, NULL, - EVP_CIPHER_set_asn1_iv, EVP_CIPHER_get_asn1_iv, des3_ctrl) + EVP_CIPH_RAND_KEY | EVP_CIPH_FLAG_FIPS | + EVP_CIPH_FLAG_DEFAULT_ASN1, des_ede3_init_key, NULL, NULL, NULL, + des3_ctrl) BLOCK_CIPHER_def_cfb(des_ede3, DES_EDE_KEY, NID_des_ede3, 24, 8, 1, - EVP_CIPH_RAND_KEY, des_ede3_init_key, NULL, - EVP_CIPHER_set_asn1_iv, - EVP_CIPHER_get_asn1_iv, des3_ctrl) + EVP_CIPH_RAND_KEY | EVP_CIPH_FLAG_FIPS | + EVP_CIPH_FLAG_DEFAULT_ASN1, des_ede3_init_key, NULL, NULL, + NULL, des3_ctrl) BLOCK_CIPHER_def_cfb(des_ede3, DES_EDE_KEY, NID_des_ede3, 24, 8, 8, - EVP_CIPH_RAND_KEY, des_ede3_init_key, NULL, - EVP_CIPHER_set_asn1_iv, - EVP_CIPHER_get_asn1_iv, des3_ctrl) + EVP_CIPH_RAND_KEY | EVP_CIPH_FLAG_FIPS | + EVP_CIPH_FLAG_DEFAULT_ASN1, des_ede3_init_key, NULL, NULL, + NULL, des3_ctrl) static int des_ede_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key, const unsigned char *iv, int enc) { DES_cblock *deskey = (DES_cblock *)key; -# ifdef EVP_CHECK_DES_KEY - if (DES_set_key_checked(&deskey[0], &data(ctx)->ks1) - ! !DES_set_key_checked(&deskey[1], &data(ctx)->ks2)) + DES_EDE_KEY *dat = data(ctx); + + dat->stream.cbc = NULL; +# if defined(SPARC_DES_CAPABLE) + if (SPARC_DES_CAPABLE) { + int mode = ctx->cipher->flags & EVP_CIPH_MODE; + + if (mode == EVP_CIPH_CBC_MODE) { + des_t4_key_expand(&deskey[0], &dat->ks1); + des_t4_key_expand(&deskey[1], &dat->ks2); + memcpy(&dat->ks3, &dat->ks1, sizeof(dat->ks1)); + dat->stream.cbc = enc ? des_t4_ede3_cbc_encrypt : + des_t4_ede3_cbc_decrypt; + return 1; + } + } +# endif +# ifdef EVP_CHECK_DES_KEY + if (DES_set_key_checked(&deskey[0], &dat->ks1) + ! !DES_set_key_checked(&deskey[1], &dat->ks2)) return 0; -# else - DES_set_key_unchecked(&deskey[0], &data(ctx)->ks1); - DES_set_key_unchecked(&deskey[1], &data(ctx)->ks2); -# endif - memcpy(&data(ctx)->ks3, &data(ctx)->ks1, sizeof(data(ctx)->ks1)); +# else + DES_set_key_unchecked(&deskey[0], &dat->ks1); + DES_set_key_unchecked(&deskey[1], &dat->ks2); +# endif + memcpy(&dat->ks3, &dat->ks1, sizeof(dat->ks1)); return 1; } @@ -255,7 +303,9 @@ static int des_ede3_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key, const unsigned char *iv, int enc) { DES_cblock *deskey = (DES_cblock *)key; -# ifdef KSSL_DEBUG + DES_EDE_KEY *dat = data(ctx); + +# ifdef KSSL_DEBUG { int i; fprintf(stderr, "des_ede3_init_key(ctx=%p)\n", ctx); @@ -270,18 +320,33 @@ static int des_ede3_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key, fprintf(stderr, "\n"); } } -# endif /* KSSL_DEBUG */ +# endif /* KSSL_DEBUG */ -# ifdef EVP_CHECK_DES_KEY - if (DES_set_key_checked(&deskey[0], &data(ctx)->ks1) - || DES_set_key_checked(&deskey[1], &data(ctx)->ks2) - || DES_set_key_checked(&deskey[2], &data(ctx)->ks3)) + dat->stream.cbc = NULL; +# if defined(SPARC_DES_CAPABLE) + if (SPARC_DES_CAPABLE) { + int mode = ctx->cipher->flags & EVP_CIPH_MODE; + + if (mode == EVP_CIPH_CBC_MODE) { + des_t4_key_expand(&deskey[0], &dat->ks1); + des_t4_key_expand(&deskey[1], &dat->ks2); + des_t4_key_expand(&deskey[2], &dat->ks3); + dat->stream.cbc = enc ? des_t4_ede3_cbc_encrypt : + des_t4_ede3_cbc_decrypt; + return 1; + } + } +# endif +# ifdef EVP_CHECK_DES_KEY + if (DES_set_key_checked(&deskey[0], &dat->ks1) + || DES_set_key_checked(&deskey[1], &dat->ks2) + || DES_set_key_checked(&deskey[2], &dat->ks3)) return 0; -# else - DES_set_key_unchecked(&deskey[0], &data(ctx)->ks1); - DES_set_key_unchecked(&deskey[1], &data(ctx)->ks2); - DES_set_key_unchecked(&deskey[2], &data(ctx)->ks3); -# endif +# else + DES_set_key_unchecked(&deskey[0], &dat->ks1); + DES_set_key_unchecked(&deskey[1], &dat->ks2); + DES_set_key_unchecked(&deskey[2], &dat->ks3); +# endif return 1; } @@ -315,5 +380,114 @@ const EVP_CIPHER *EVP_des_ede3(void) { return &des_ede3_ecb; } + +# ifndef OPENSSL_NO_SHA + +# include + +static const unsigned char wrap_iv[8] = + { 0x4a, 0xdd, 0xa2, 0x2c, 0x79, 0xe8, 0x21, 0x05 }; + +static int des_ede3_unwrap(EVP_CIPHER_CTX *ctx, unsigned char *out, + const unsigned char *in, size_t inl) +{ + unsigned char icv[8], iv[8], sha1tmp[SHA_DIGEST_LENGTH]; + int rv = -1; + if (inl < 24) + return -1; + if (!out) + return inl - 16; + memcpy(ctx->iv, wrap_iv, 8); + /* Decrypt first block which will end up as icv */ + des_ede_cbc_cipher(ctx, icv, in, 8); + /* Decrypt central blocks */ + /* + * If decrypting in place move whole output along a block so the next + * des_ede_cbc_cipher is in place. + */ + if (out == in) { + memmove(out, out + 8, inl - 8); + in -= 8; + } + des_ede_cbc_cipher(ctx, out, in + 8, inl - 16); + /* Decrypt final block which will be IV */ + des_ede_cbc_cipher(ctx, iv, in + inl - 8, 8); + /* Reverse order of everything */ + BUF_reverse(icv, NULL, 8); + BUF_reverse(out, NULL, inl - 16); + BUF_reverse(ctx->iv, iv, 8); + /* Decrypt again using new IV */ + des_ede_cbc_cipher(ctx, out, out, inl - 16); + des_ede_cbc_cipher(ctx, icv, icv, 8); + /* Work out SHA1 hash of first portion */ + SHA1(out, inl - 16, sha1tmp); + + if (!CRYPTO_memcmp(sha1tmp, icv, 8)) + rv = inl - 16; + OPENSSL_cleanse(icv, 8); + OPENSSL_cleanse(sha1tmp, SHA_DIGEST_LENGTH); + OPENSSL_cleanse(iv, 8); + OPENSSL_cleanse(ctx->iv, 8); + if (rv == -1) + OPENSSL_cleanse(out, inl - 16); + + return rv; +} + +static int des_ede3_wrap(EVP_CIPHER_CTX *ctx, unsigned char *out, + const unsigned char *in, size_t inl) +{ + unsigned char sha1tmp[SHA_DIGEST_LENGTH]; + if (!out) + return inl + 16; + /* Copy input to output buffer + 8 so we have space for IV */ + memmove(out + 8, in, inl); + /* Work out ICV */ + SHA1(in, inl, sha1tmp); + memcpy(out + inl + 8, sha1tmp, 8); + OPENSSL_cleanse(sha1tmp, SHA_DIGEST_LENGTH); + /* Generate random IV */ + RAND_bytes(ctx->iv, 8); + memcpy(out, ctx->iv, 8); + /* Encrypt everything after IV in place */ + des_ede_cbc_cipher(ctx, out + 8, out + 8, inl + 8); + BUF_reverse(out, NULL, inl + 16); + memcpy(ctx->iv, wrap_iv, 8); + des_ede_cbc_cipher(ctx, out, out, inl + 16); + return inl + 16; +} + +static int des_ede3_wrap_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, + const unsigned char *in, size_t inl) +{ + /* + * Sanity check input length: we typically only wrap keys so EVP_MAXCHUNK + * is more than will ever be needed. Also input length must be a multiple + * of 8 bits. + */ + if (inl >= EVP_MAXCHUNK || inl % 8) + return -1; + if (ctx->encrypt) + return des_ede3_wrap(ctx, out, in, inl); + else + return des_ede3_unwrap(ctx, out, in, inl); +} + +static const EVP_CIPHER des3_wrap = { + NID_id_smime_alg_CMS3DESwrap, + 8, 24, 0, + EVP_CIPH_WRAP_MODE | EVP_CIPH_CUSTOM_IV | EVP_CIPH_FLAG_CUSTOM_CIPHER + | EVP_CIPH_FLAG_DEFAULT_ASN1, + des_ede3_init_key, des_ede3_wrap_cipher, + NULL, + sizeof(DES_EDE_KEY), + NULL, NULL, NULL, NULL +}; + +const EVP_CIPHER *EVP_des_ede3_wrap(void) +{ + return &des3_wrap; +} + # endif #endif diff --git a/deps/openssl/openssl/crypto/evp/e_null.c b/deps/openssl/openssl/crypto/evp/e_null.c index af90ce326bd9b8..599fcb808d219b 100644 --- a/deps/openssl/openssl/crypto/evp/e_null.c +++ b/deps/openssl/openssl/crypto/evp/e_null.c @@ -61,8 +61,6 @@ #include #include -#ifndef OPENSSL_FIPS - static int null_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key, const unsigned char *iv, int enc); static int null_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, @@ -100,4 +98,3 @@ static int null_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, memcpy((char *)out, (const char *)in, inl); return 1; } -#endif diff --git a/deps/openssl/openssl/crypto/evp/evp.h b/deps/openssl/openssl/crypto/evp/evp.h index b00997b14958fe..47abbac4a24cfe 100644 --- a/deps/openssl/openssl/crypto/evp/evp.h +++ b/deps/openssl/openssl/crypto/evp/evp.h @@ -114,6 +114,7 @@ # define EVP_PKEY_DSA3 NID_dsaWithSHA1 # define EVP_PKEY_DSA4 NID_dsaWithSHA1_2 # define EVP_PKEY_DH NID_dhKeyAgreement +# define EVP_PKEY_DHX NID_dhpublicnumber # define EVP_PKEY_EC NID_X9_62_id_ecPublicKey # define EVP_PKEY_HMAC NID_hmac # define EVP_PKEY_CMAC NID_cmac @@ -346,6 +347,7 @@ struct evp_cipher_st { # define EVP_CIPH_GCM_MODE 0x6 # define EVP_CIPH_CCM_MODE 0x7 # define EVP_CIPH_XTS_MODE 0x10001 +# define EVP_CIPH_WRAP_MODE 0x10002 # define EVP_CIPH_MODE 0xF0007 /* Set if variable length cipher */ # define EVP_CIPH_VARIABLE_LENGTH 0x8 @@ -376,6 +378,14 @@ struct evp_cipher_st { */ # define EVP_CIPH_FLAG_CUSTOM_CIPHER 0x100000 # define EVP_CIPH_FLAG_AEAD_CIPHER 0x200000 +# define EVP_CIPH_FLAG_TLS1_1_MULTIBLOCK 0x400000 + +/* + * Cipher context flag to indicate we can handle wrap mode: if allowed in + * older applications it could overflow buffers. + */ + +# define EVP_CIPHER_CTX_FLAG_WRAP_ALLOW 0x1 /* ctrl() values */ @@ -409,6 +419,18 @@ struct evp_cipher_st { /* Set the GCM invocation field, decrypt only */ # define EVP_CTRL_GCM_SET_IV_INV 0x18 +# define EVP_CTRL_TLS1_1_MULTIBLOCK_AAD 0x19 +# define EVP_CTRL_TLS1_1_MULTIBLOCK_ENCRYPT 0x1a +# define EVP_CTRL_TLS1_1_MULTIBLOCK_DECRYPT 0x1b +# define EVP_CTRL_TLS1_1_MULTIBLOCK_MAX_BUFSIZE 0x1c + +typedef struct { + unsigned char *out; + const unsigned char *inp; + size_t len; + unsigned int interleave; +} EVP_CTRL_TLS1_1_MULTIBLOCK_PARAM; + /* GCM TLS constants */ /* Length of fixed part of IV derived from PRF */ # define EVP_GCM_TLS_FIXED_IV_LEN 4 @@ -637,7 +659,8 @@ int EVP_DigestSignFinal(EVP_MD_CTX *ctx, int EVP_DigestVerifyInit(EVP_MD_CTX *ctx, EVP_PKEY_CTX **pctx, const EVP_MD *type, ENGINE *e, EVP_PKEY *pkey); -int EVP_DigestVerifyFinal(EVP_MD_CTX *ctx, unsigned char *sig, size_t siglen); +int EVP_DigestVerifyFinal(EVP_MD_CTX *ctx, + const unsigned char *sig, size_t siglen); int EVP_OpenInit(EVP_CIPHER_CTX *ctx, const EVP_CIPHER *type, const unsigned char *ek, int ekl, const unsigned char *iv, @@ -742,6 +765,7 @@ const EVP_CIPHER *EVP_des_cbc(void); const EVP_CIPHER *EVP_des_ede_cbc(void); const EVP_CIPHER *EVP_des_ede3_cbc(void); const EVP_CIPHER *EVP_desx_cbc(void); +const EVP_CIPHER *EVP_des_ede3_wrap(void); /* * This should now be supported through the dev_crypto ENGINE. But also, why * are rc4 and md5 declarations made here inside a "NO_DES" precompiler @@ -811,6 +835,7 @@ const EVP_CIPHER *EVP_aes_128_ctr(void); const EVP_CIPHER *EVP_aes_128_ccm(void); const EVP_CIPHER *EVP_aes_128_gcm(void); const EVP_CIPHER *EVP_aes_128_xts(void); +const EVP_CIPHER *EVP_aes_128_wrap(void); const EVP_CIPHER *EVP_aes_192_ecb(void); const EVP_CIPHER *EVP_aes_192_cbc(void); const EVP_CIPHER *EVP_aes_192_cfb1(void); @@ -821,6 +846,7 @@ const EVP_CIPHER *EVP_aes_192_ofb(void); const EVP_CIPHER *EVP_aes_192_ctr(void); const EVP_CIPHER *EVP_aes_192_ccm(void); const EVP_CIPHER *EVP_aes_192_gcm(void); +const EVP_CIPHER *EVP_aes_192_wrap(void); const EVP_CIPHER *EVP_aes_256_ecb(void); const EVP_CIPHER *EVP_aes_256_cbc(void); const EVP_CIPHER *EVP_aes_256_cfb1(void); @@ -832,10 +858,15 @@ const EVP_CIPHER *EVP_aes_256_ctr(void); const EVP_CIPHER *EVP_aes_256_ccm(void); const EVP_CIPHER *EVP_aes_256_gcm(void); const EVP_CIPHER *EVP_aes_256_xts(void); +const EVP_CIPHER *EVP_aes_256_wrap(void); # if !defined(OPENSSL_NO_SHA) && !defined(OPENSSL_NO_SHA1) const EVP_CIPHER *EVP_aes_128_cbc_hmac_sha1(void); const EVP_CIPHER *EVP_aes_256_cbc_hmac_sha1(void); # endif +# ifndef OPENSSL_NO_SHA256 +const EVP_CIPHER *EVP_aes_128_cbc_hmac_sha256(void); +const EVP_CIPHER *EVP_aes_256_cbc_hmac_sha256(void); +# endif # endif # ifndef OPENSSL_NO_CAMELLIA const EVP_CIPHER *EVP_camellia_128_ecb(void); @@ -1026,6 +1057,7 @@ void EVP_PBE_cleanup(void); # define ASN1_PKEY_CTRL_DEFAULT_MD_NID 0x3 # define ASN1_PKEY_CTRL_CMS_SIGN 0x5 # define ASN1_PKEY_CTRL_CMS_ENVELOPE 0x7 +# define ASN1_PKEY_CTRL_CMS_RI_TYPE 0x8 int EVP_PKEY_asn1_get_count(void); const EVP_PKEY_ASN1_METHOD *EVP_PKEY_asn1_get0(int idx); @@ -1119,6 +1151,10 @@ void EVP_PKEY_asn1_set_ctrl(EVP_PKEY_ASN1_METHOD *ameth, EVP_PKEY_CTX_ctrl(ctx, -1, EVP_PKEY_OP_TYPE_SIG, \ EVP_PKEY_CTRL_MD, 0, (void *)md) +# define EVP_PKEY_CTX_get_signature_md(ctx, pmd) \ + EVP_PKEY_CTX_ctrl(ctx, -1, EVP_PKEY_OP_TYPE_SIG, \ + EVP_PKEY_CTRL_GET_MD, 0, (void *)pmd) + # define EVP_PKEY_CTRL_MD 1 # define EVP_PKEY_CTRL_PEER_KEY 2 @@ -1140,6 +1176,8 @@ void EVP_PKEY_asn1_set_ctrl(EVP_PKEY_ASN1_METHOD *ameth, # define EVP_PKEY_CTRL_CIPHER 12 +# define EVP_PKEY_CTRL_GET_MD 13 + # define EVP_PKEY_ALG_CTRL 0x1000 # define EVP_PKEY_FLAG_AUTOARGLEN 2 @@ -1325,11 +1363,13 @@ void ERR_load_EVP_strings(void); # define EVP_F_AESNI_INIT_KEY 165 # define EVP_F_AESNI_XTS_CIPHER 176 # define EVP_F_AES_INIT_KEY 133 +# define EVP_F_AES_T4_INIT_KEY 178 # define EVP_F_AES_XTS 172 # define EVP_F_AES_XTS_CIPHER 175 # define EVP_F_ALG_MODULE_INIT 177 # define EVP_F_CAMELLIA_INIT_KEY 159 # define EVP_F_CMAC_INIT 173 +# define EVP_F_CMLL_T4_INIT_KEY 179 # define EVP_F_D2I_PKEY 100 # define EVP_F_DO_SIGVER_INIT 161 # define EVP_F_DSAPKEY2PKCS8 134 @@ -1469,6 +1509,7 @@ void ERR_load_EVP_strings(void); # define EVP_R_UNSUPPORTED_PRF 125 # define EVP_R_UNSUPPORTED_PRIVATE_KEY_ALGORITHM 118 # define EVP_R_UNSUPPORTED_SALT_TYPE 126 +# define EVP_R_WRAP_MODE_NOT_ALLOWED 170 # define EVP_R_WRONG_FINAL_BLOCK_LENGTH 109 # define EVP_R_WRONG_PUBLIC_KEY_TYPE 110 diff --git a/deps/openssl/openssl/crypto/evp/evp_enc.c b/deps/openssl/openssl/crypto/evp/evp_enc.c index 4e983c4bda2379..65f0e0244dce49 100644 --- a/deps/openssl/openssl/crypto/evp/evp_enc.c +++ b/deps/openssl/openssl/crypto/evp/evp_enc.c @@ -169,8 +169,14 @@ int EVP_CipherInit_ex(EVP_CIPHER_CTX *ctx, const EVP_CIPHER *cipher, #endif #ifdef OPENSSL_FIPS - if (FIPS_mode()) + if (FIPS_mode()) { + const EVP_CIPHER *fcipher; + if (cipher) + fcipher = evp_get_fips_cipher(cipher); + if (fcipher) + cipher = fcipher; return FIPS_cipherinit(ctx, cipher, key, iv, enc); + } #endif ctx->cipher = cipher; if (ctx->cipher->ctx_size) { @@ -183,7 +189,8 @@ int EVP_CipherInit_ex(EVP_CIPHER_CTX *ctx, const EVP_CIPHER *cipher, ctx->cipher_data = NULL; } ctx->key_len = cipher->key_len; - ctx->flags = 0; + /* Preserve wrap enable flag, zero everything else */ + ctx->flags &= EVP_CIPHER_CTX_FLAG_WRAP_ALLOW; if (ctx->cipher->flags & EVP_CIPH_CTRL_INIT) { if (!EVP_CIPHER_CTX_ctrl(ctx, EVP_CTRL_INIT, 0, NULL)) { EVPerr(EVP_F_EVP_CIPHERINIT_EX, EVP_R_INITIALIZATION_ERROR); @@ -206,6 +213,12 @@ int EVP_CipherInit_ex(EVP_CIPHER_CTX *ctx, const EVP_CIPHER *cipher, || ctx->cipher->block_size == 8 || ctx->cipher->block_size == 16); + if (!(ctx->flags & EVP_CIPHER_CTX_FLAG_WRAP_ALLOW) + && EVP_CIPHER_CTX_mode(ctx) == EVP_CIPH_WRAP_MODE) { + EVPerr(EVP_F_EVP_CIPHERINIT_EX, EVP_R_WRAP_MODE_NOT_ALLOWED); + return 0; + } + if (!(EVP_CIPHER_CTX_flags(ctx) & EVP_CIPH_CUSTOM_IV)) { switch (EVP_CIPHER_CTX_mode(ctx)) { diff --git a/deps/openssl/openssl/crypto/evp/evp_err.c b/deps/openssl/openssl/crypto/evp/evp_err.c index 686a69958dd701..15cf5532b3820a 100644 --- a/deps/openssl/openssl/crypto/evp/evp_err.c +++ b/deps/openssl/openssl/crypto/evp/evp_err.c @@ -1,6 +1,6 @@ /* crypto/evp/evp_err.c */ /* ==================================================================== - * Copyright (c) 1999-2011 The OpenSSL Project. All rights reserved. + * Copyright (c) 1999-2013 The OpenSSL Project. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -73,11 +73,13 @@ static ERR_STRING_DATA EVP_str_functs[] = { {ERR_FUNC(EVP_F_AESNI_INIT_KEY), "AESNI_INIT_KEY"}, {ERR_FUNC(EVP_F_AESNI_XTS_CIPHER), "AESNI_XTS_CIPHER"}, {ERR_FUNC(EVP_F_AES_INIT_KEY), "AES_INIT_KEY"}, + {ERR_FUNC(EVP_F_AES_T4_INIT_KEY), "AES_T4_INIT_KEY"}, {ERR_FUNC(EVP_F_AES_XTS), "AES_XTS"}, {ERR_FUNC(EVP_F_AES_XTS_CIPHER), "AES_XTS_CIPHER"}, {ERR_FUNC(EVP_F_ALG_MODULE_INIT), "ALG_MODULE_INIT"}, {ERR_FUNC(EVP_F_CAMELLIA_INIT_KEY), "CAMELLIA_INIT_KEY"}, {ERR_FUNC(EVP_F_CMAC_INIT), "CMAC_INIT"}, + {ERR_FUNC(EVP_F_CMLL_T4_INIT_KEY), "CMLL_T4_INIT_KEY"}, {ERR_FUNC(EVP_F_D2I_PKEY), "D2I_PKEY"}, {ERR_FUNC(EVP_F_DO_SIGVER_INIT), "DO_SIGVER_INIT"}, {ERR_FUNC(EVP_F_DSAPKEY2PKCS8), "DSAPKEY2PKCS8"}, @@ -232,6 +234,7 @@ static ERR_STRING_DATA EVP_str_reasons[] = { {ERR_REASON(EVP_R_UNSUPPORTED_PRIVATE_KEY_ALGORITHM), "unsupported private key algorithm"}, {ERR_REASON(EVP_R_UNSUPPORTED_SALT_TYPE), "unsupported salt type"}, + {ERR_REASON(EVP_R_WRAP_MODE_NOT_ALLOWED), "wrap mode not allowed"}, {ERR_REASON(EVP_R_WRONG_FINAL_BLOCK_LENGTH), "wrong final block length"}, {ERR_REASON(EVP_R_WRONG_PUBLIC_KEY_TYPE), "wrong public key type"}, {0, NULL} diff --git a/deps/openssl/openssl/crypto/evp/evp_extra_test.c b/deps/openssl/openssl/crypto/evp/evp_extra_test.c index 21688b0cd38310..0f7b011ce85e06 100644 --- a/deps/openssl/openssl/crypto/evp/evp_extra_test.c +++ b/deps/openssl/openssl/crypto/evp/evp_extra_test.c @@ -345,7 +345,7 @@ static int test_EVP_DigestVerifyInit(void) if (pkey == NULL || !EVP_DigestVerifyInit(&md_ctx, NULL, EVP_sha256(), NULL, pkey) || !EVP_DigestVerifyUpdate(&md_ctx, kMsg, sizeof(kMsg)) || - !EVP_DigestVerifyFinal(&md_ctx, (unsigned char *)kSignature, sizeof(kSignature))) { + !EVP_DigestVerifyFinal(&md_ctx, kSignature, sizeof(kSignature))) { goto out; } ret = 1; diff --git a/deps/openssl/openssl/crypto/evp/evp_fips.c b/deps/openssl/openssl/crypto/evp/evp_fips.c deleted file mode 100644 index 71a32fca8d2ab2..00000000000000 --- a/deps/openssl/openssl/crypto/evp/evp_fips.c +++ /dev/null @@ -1,310 +0,0 @@ -/* crypto/evp/evp_fips.c */ -/* - * Written by Dr Stephen N Henson (steve@openssl.org) for the OpenSSL - * project. - */ -/* ==================================================================== - * Copyright (c) 2011 The OpenSSL Project. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * - * 3. All advertising materials mentioning features or use of this - * software must display the following acknowledgment: - * "This product includes software developed by the OpenSSL Project - * for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)" - * - * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to - * endorse or promote products derived from this software without - * prior written permission. For written permission, please contact - * licensing@OpenSSL.org. - * - * 5. Products derived from this software may not be called "OpenSSL" - * nor may "OpenSSL" appear in their names without prior written - * permission of the OpenSSL Project. - * - * 6. Redistributions of any form whatsoever must retain the following - * acknowledgment: - * "This product includes software developed by the OpenSSL Project - * for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)" - * - * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY - * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR - * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, - * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED - * OF THE POSSIBILITY OF SUCH DAMAGE. - * ==================================================================== - */ - -#include - -#ifdef OPENSSL_FIPS -# include - -const EVP_CIPHER *EVP_aes_128_cbc(void) -{ - return FIPS_evp_aes_128_cbc(); -} - -const EVP_CIPHER *EVP_aes_128_ccm(void) -{ - return FIPS_evp_aes_128_ccm(); -} - -const EVP_CIPHER *EVP_aes_128_cfb1(void) -{ - return FIPS_evp_aes_128_cfb1(); -} - -const EVP_CIPHER *EVP_aes_128_cfb128(void) -{ - return FIPS_evp_aes_128_cfb128(); -} - -const EVP_CIPHER *EVP_aes_128_cfb8(void) -{ - return FIPS_evp_aes_128_cfb8(); -} - -const EVP_CIPHER *EVP_aes_128_ctr(void) -{ - return FIPS_evp_aes_128_ctr(); -} - -const EVP_CIPHER *EVP_aes_128_ecb(void) -{ - return FIPS_evp_aes_128_ecb(); -} - -const EVP_CIPHER *EVP_aes_128_gcm(void) -{ - return FIPS_evp_aes_128_gcm(); -} - -const EVP_CIPHER *EVP_aes_128_ofb(void) -{ - return FIPS_evp_aes_128_ofb(); -} - -const EVP_CIPHER *EVP_aes_128_xts(void) -{ - return FIPS_evp_aes_128_xts(); -} - -const EVP_CIPHER *EVP_aes_192_cbc(void) -{ - return FIPS_evp_aes_192_cbc(); -} - -const EVP_CIPHER *EVP_aes_192_ccm(void) -{ - return FIPS_evp_aes_192_ccm(); -} - -const EVP_CIPHER *EVP_aes_192_cfb1(void) -{ - return FIPS_evp_aes_192_cfb1(); -} - -const EVP_CIPHER *EVP_aes_192_cfb128(void) -{ - return FIPS_evp_aes_192_cfb128(); -} - -const EVP_CIPHER *EVP_aes_192_cfb8(void) -{ - return FIPS_evp_aes_192_cfb8(); -} - -const EVP_CIPHER *EVP_aes_192_ctr(void) -{ - return FIPS_evp_aes_192_ctr(); -} - -const EVP_CIPHER *EVP_aes_192_ecb(void) -{ - return FIPS_evp_aes_192_ecb(); -} - -const EVP_CIPHER *EVP_aes_192_gcm(void) -{ - return FIPS_evp_aes_192_gcm(); -} - -const EVP_CIPHER *EVP_aes_192_ofb(void) -{ - return FIPS_evp_aes_192_ofb(); -} - -const EVP_CIPHER *EVP_aes_256_cbc(void) -{ - return FIPS_evp_aes_256_cbc(); -} - -const EVP_CIPHER *EVP_aes_256_ccm(void) -{ - return FIPS_evp_aes_256_ccm(); -} - -const EVP_CIPHER *EVP_aes_256_cfb1(void) -{ - return FIPS_evp_aes_256_cfb1(); -} - -const EVP_CIPHER *EVP_aes_256_cfb128(void) -{ - return FIPS_evp_aes_256_cfb128(); -} - -const EVP_CIPHER *EVP_aes_256_cfb8(void) -{ - return FIPS_evp_aes_256_cfb8(); -} - -const EVP_CIPHER *EVP_aes_256_ctr(void) -{ - return FIPS_evp_aes_256_ctr(); -} - -const EVP_CIPHER *EVP_aes_256_ecb(void) -{ - return FIPS_evp_aes_256_ecb(); -} - -const EVP_CIPHER *EVP_aes_256_gcm(void) -{ - return FIPS_evp_aes_256_gcm(); -} - -const EVP_CIPHER *EVP_aes_256_ofb(void) -{ - return FIPS_evp_aes_256_ofb(); -} - -const EVP_CIPHER *EVP_aes_256_xts(void) -{ - return FIPS_evp_aes_256_xts(); -} - -const EVP_CIPHER *EVP_des_ede(void) -{ - return FIPS_evp_des_ede(); -} - -const EVP_CIPHER *EVP_des_ede3(void) -{ - return FIPS_evp_des_ede3(); -} - -const EVP_CIPHER *EVP_des_ede3_cbc(void) -{ - return FIPS_evp_des_ede3_cbc(); -} - -const EVP_CIPHER *EVP_des_ede3_cfb1(void) -{ - return FIPS_evp_des_ede3_cfb1(); -} - -const EVP_CIPHER *EVP_des_ede3_cfb64(void) -{ - return FIPS_evp_des_ede3_cfb64(); -} - -const EVP_CIPHER *EVP_des_ede3_cfb8(void) -{ - return FIPS_evp_des_ede3_cfb8(); -} - -const EVP_CIPHER *EVP_des_ede3_ecb(void) -{ - return FIPS_evp_des_ede3_ecb(); -} - -const EVP_CIPHER *EVP_des_ede3_ofb(void) -{ - return FIPS_evp_des_ede3_ofb(); -} - -const EVP_CIPHER *EVP_des_ede_cbc(void) -{ - return FIPS_evp_des_ede_cbc(); -} - -const EVP_CIPHER *EVP_des_ede_cfb64(void) -{ - return FIPS_evp_des_ede_cfb64(); -} - -const EVP_CIPHER *EVP_des_ede_ecb(void) -{ - return FIPS_evp_des_ede_ecb(); -} - -const EVP_CIPHER *EVP_des_ede_ofb(void) -{ - return FIPS_evp_des_ede_ofb(); -} - -const EVP_CIPHER *EVP_enc_null(void) -{ - return FIPS_evp_enc_null(); -} - -const EVP_MD *EVP_sha1(void) -{ - return FIPS_evp_sha1(); -} - -const EVP_MD *EVP_sha224(void) -{ - return FIPS_evp_sha224(); -} - -const EVP_MD *EVP_sha256(void) -{ - return FIPS_evp_sha256(); -} - -const EVP_MD *EVP_sha384(void) -{ - return FIPS_evp_sha384(); -} - -const EVP_MD *EVP_sha512(void) -{ - return FIPS_evp_sha512(); -} - -const EVP_MD *EVP_dss(void) -{ - return FIPS_evp_dss(); -} - -const EVP_MD *EVP_dss1(void) -{ - return FIPS_evp_dss1(); -} - -const EVP_MD *EVP_ecdsa(void) -{ - return FIPS_evp_ecdsa(); -} - -#endif diff --git a/deps/openssl/openssl/crypto/evp/evp_lib.c b/deps/openssl/openssl/crypto/evp/evp_lib.c index d4d2b4b2808153..a53a27ca0c92e2 100644 --- a/deps/openssl/openssl/crypto/evp/evp_lib.c +++ b/deps/openssl/openssl/crypto/evp/evp_lib.c @@ -60,6 +60,10 @@ #include "cryptlib.h" #include #include +#ifdef OPENSSL_FIPS +# include +# include "evp_locl.h" +#endif int EVP_CIPHER_param_to_asn1(EVP_CIPHER_CTX *c, ASN1_TYPE *type) { @@ -67,9 +71,13 @@ int EVP_CIPHER_param_to_asn1(EVP_CIPHER_CTX *c, ASN1_TYPE *type) if (c->cipher->set_asn1_parameters != NULL) ret = c->cipher->set_asn1_parameters(c, type); - else if (c->cipher->flags & EVP_CIPH_FLAG_DEFAULT_ASN1) - ret = EVP_CIPHER_set_asn1_iv(c, type); - else + else if (c->cipher->flags & EVP_CIPH_FLAG_DEFAULT_ASN1) { + if (EVP_CIPHER_CTX_mode(c) == EVP_CIPH_WRAP_MODE) { + ASN1_TYPE_set(type, V_ASN1_NULL, NULL); + ret = 1; + } else + ret = EVP_CIPHER_set_asn1_iv(c, type); + } else ret = -1; return (ret); } @@ -80,9 +88,11 @@ int EVP_CIPHER_asn1_to_param(EVP_CIPHER_CTX *c, ASN1_TYPE *type) if (c->cipher->get_asn1_parameters != NULL) ret = c->cipher->get_asn1_parameters(c, type); - else if (c->cipher->flags & EVP_CIPH_FLAG_DEFAULT_ASN1) + else if (c->cipher->flags & EVP_CIPH_FLAG_DEFAULT_ASN1) { + if (EVP_CIPHER_CTX_mode(c) == EVP_CIPH_WRAP_MODE) + return 1; ret = EVP_CIPHER_get_asn1_iv(c, type); - else + } else ret = -1; return (ret); } @@ -200,12 +210,22 @@ const EVP_CIPHER *EVP_CIPHER_CTX_cipher(const EVP_CIPHER_CTX *ctx) unsigned long EVP_CIPHER_flags(const EVP_CIPHER *cipher) { +#ifdef OPENSSL_FIPS + const EVP_CIPHER *fcipher; + fcipher = evp_get_fips_cipher(cipher); + if (fcipher && fcipher->flags & EVP_CIPH_FLAG_FIPS) + return cipher->flags | EVP_CIPH_FLAG_FIPS; +#endif return cipher->flags; } unsigned long EVP_CIPHER_CTX_flags(const EVP_CIPHER_CTX *ctx) { +#ifdef OPENSSL_FIPS + return EVP_CIPHER_flags(ctx->cipher); +#else return ctx->cipher->flags; +#endif } void *EVP_CIPHER_CTX_get_app_data(const EVP_CIPHER_CTX *ctx) @@ -272,8 +292,40 @@ int EVP_MD_size(const EVP_MD *md) return md->md_size; } +#ifdef OPENSSL_FIPS + +const EVP_MD *evp_get_fips_md(const EVP_MD *md) +{ + int nid = EVP_MD_type(md); + if (nid == NID_dsa) + return FIPS_evp_dss1(); + else if (nid == NID_dsaWithSHA) + return FIPS_evp_dss(); + else if (nid == NID_ecdsa_with_SHA1) + return FIPS_evp_ecdsa(); + else + return FIPS_get_digestbynid(nid); +} + +const EVP_CIPHER *evp_get_fips_cipher(const EVP_CIPHER *cipher) +{ + int nid = cipher->nid; + if (nid == NID_undef) + return FIPS_evp_enc_null(); + else + return FIPS_get_cipherbynid(nid); +} + +#endif + unsigned long EVP_MD_flags(const EVP_MD *md) { +#ifdef OPENSSL_FIPS + const EVP_MD *fmd; + fmd = evp_get_fips_md(md); + if (fmd && fmd->flags & EVP_MD_FLAG_FIPS) + return md->flags | EVP_MD_FLAG_FIPS; +#endif return md->flags; } diff --git a/deps/openssl/openssl/crypto/evp/evp_locl.h b/deps/openssl/openssl/crypto/evp/evp_locl.h index 980dadab23d46d..2bb709a065d5b5 100644 --- a/deps/openssl/openssl/crypto/evp/evp_locl.h +++ b/deps/openssl/openssl/crypto/evp/evp_locl.h @@ -333,6 +333,9 @@ int PKCS5_v2_PBKDF2_keyivgen(EVP_CIPHER_CTX *ctx, const char *pass, const EVP_CIPHER *c, const EVP_MD *md, int en_de); +const EVP_MD *evp_get_fips_md(const EVP_MD *md); +const EVP_CIPHER *evp_get_fips_cipher(const EVP_CIPHER *cipher); + #ifdef OPENSSL_FIPS # ifdef OPENSSL_DOING_MAKEDEPEND diff --git a/deps/openssl/openssl/crypto/evp/evp_test.c b/deps/openssl/openssl/crypto/evp/evp_test.c index d06b4ee5091447..d7441ec7b70282 100644 --- a/deps/openssl/openssl/crypto/evp/evp_test.c +++ b/deps/openssl/openssl/crypto/evp/evp_test.c @@ -132,11 +132,13 @@ static int test1_exit(int ec) static void test1(const EVP_CIPHER *c, const unsigned char *key, int kn, const unsigned char *iv, int in, const unsigned char *plaintext, int pn, - const unsigned char *ciphertext, int cn, int encdec) + const unsigned char *ciphertext, int cn, + const unsigned char *aad, int an, + const unsigned char *tag, int tn, int encdec) { EVP_CIPHER_CTX ctx; unsigned char out[4096]; - int outl, outl2; + int outl, outl2, mode; printf("Testing cipher %s%s\n", EVP_CIPHER_name(c), (encdec == @@ -147,15 +149,78 @@ static void test1(const EVP_CIPHER *c, const unsigned char *key, int kn, hexdump(stdout, "IV", iv, in); hexdump(stdout, "Plaintext", plaintext, pn); hexdump(stdout, "Ciphertext", ciphertext, cn); - - if (kn != c->key_len) { + if (an) + hexdump(stdout, "AAD", aad, an); + if (tn) + hexdump(stdout, "Tag", tag, tn); + mode = EVP_CIPHER_mode(c); + if (kn != EVP_CIPHER_key_length(c)) { fprintf(stderr, "Key length doesn't match, got %d expected %lu\n", kn, - (unsigned long)c->key_len); + (unsigned long)EVP_CIPHER_key_length(c)); test1_exit(5); } EVP_CIPHER_CTX_init(&ctx); + EVP_CIPHER_CTX_set_flags(&ctx, EVP_CIPHER_CTX_FLAG_WRAP_ALLOW); if (encdec != 0) { - if (!EVP_EncryptInit_ex(&ctx, c, NULL, key, iv)) { + if (mode == EVP_CIPH_GCM_MODE) { + if (!EVP_EncryptInit_ex(&ctx, c, NULL, NULL, NULL)) { + fprintf(stderr, "EncryptInit failed\n"); + ERR_print_errors_fp(stderr); + test1_exit(10); + } + if (!EVP_CIPHER_CTX_ctrl(&ctx, EVP_CTRL_GCM_SET_IVLEN, in, NULL)) { + fprintf(stderr, "IV length set failed\n"); + ERR_print_errors_fp(stderr); + test1_exit(11); + } + if (!EVP_EncryptInit_ex(&ctx, NULL, NULL, key, iv)) { + fprintf(stderr, "Key/IV set failed\n"); + ERR_print_errors_fp(stderr); + test1_exit(12); + } + if (an && !EVP_EncryptUpdate(&ctx, NULL, &outl, aad, an)) { + fprintf(stderr, "AAD set failed\n"); + ERR_print_errors_fp(stderr); + test1_exit(13); + } + } else if (mode == EVP_CIPH_CCM_MODE) { + if (!EVP_EncryptInit_ex(&ctx, c, NULL, NULL, NULL)) { + fprintf(stderr, "EncryptInit failed\n"); + ERR_print_errors_fp(stderr); + test1_exit(10); + } + if (!EVP_CIPHER_CTX_ctrl(&ctx, EVP_CTRL_CCM_SET_IVLEN, in, NULL)) { + fprintf(stderr, "IV length set failed\n"); + ERR_print_errors_fp(stderr); + test1_exit(11); + } + if (!EVP_CIPHER_CTX_ctrl(&ctx, EVP_CTRL_CCM_SET_TAG, tn, NULL)) { + fprintf(stderr, "Tag length set failed\n"); + ERR_print_errors_fp(stderr); + test1_exit(11); + } + if (!EVP_EncryptInit_ex(&ctx, NULL, NULL, key, iv)) { + fprintf(stderr, "Key/IV set failed\n"); + ERR_print_errors_fp(stderr); + test1_exit(12); + } + if (!EVP_EncryptUpdate(&ctx, NULL, &outl, NULL, pn)) { + fprintf(stderr, "Plaintext length set failed\n"); + ERR_print_errors_fp(stderr); + test1_exit(12); + } + if (an && !EVP_EncryptUpdate(&ctx, NULL, &outl, aad, an)) { + fprintf(stderr, "AAD set failed\n"); + ERR_print_errors_fp(stderr); + test1_exit(13); + } + } else if (mode == EVP_CIPH_WRAP_MODE) { + if (!EVP_EncryptInit_ex(&ctx, c, NULL, key, in ? iv : NULL)) { + fprintf(stderr, "EncryptInit failed\n"); + ERR_print_errors_fp(stderr); + test1_exit(10); + } + } else if (!EVP_EncryptInit_ex(&ctx, c, NULL, key, iv)) { fprintf(stderr, "EncryptInit failed\n"); ERR_print_errors_fp(stderr); test1_exit(10); @@ -185,10 +250,93 @@ static void test1(const EVP_CIPHER *c, const unsigned char *key, int kn, hexdump(stderr, "Expected", ciphertext, cn); test1_exit(9); } + if (mode == EVP_CIPH_GCM_MODE || mode == EVP_CIPH_CCM_MODE) { + unsigned char rtag[16]; + /* + * Note: EVP_CTRL_CCM_GET_TAG has same value as + * EVP_CTRL_GCM_GET_TAG + */ + if (!EVP_CIPHER_CTX_ctrl(&ctx, EVP_CTRL_GCM_GET_TAG, tn, rtag)) { + fprintf(stderr, "Get tag failed\n"); + ERR_print_errors_fp(stderr); + test1_exit(14); + } + if (memcmp(rtag, tag, tn)) { + fprintf(stderr, "Tag mismatch\n"); + hexdump(stderr, "Got", rtag, tn); + hexdump(stderr, "Expected", tag, tn); + test1_exit(9); + } + } } if (encdec <= 0) { - if (!EVP_DecryptInit_ex(&ctx, c, NULL, key, iv)) { + if (mode == EVP_CIPH_GCM_MODE) { + if (!EVP_DecryptInit_ex(&ctx, c, NULL, NULL, NULL)) { + fprintf(stderr, "EncryptInit failed\n"); + ERR_print_errors_fp(stderr); + test1_exit(10); + } + if (!EVP_CIPHER_CTX_ctrl(&ctx, EVP_CTRL_GCM_SET_IVLEN, in, NULL)) { + fprintf(stderr, "IV length set failed\n"); + ERR_print_errors_fp(stderr); + test1_exit(11); + } + if (!EVP_DecryptInit_ex(&ctx, NULL, NULL, key, iv)) { + fprintf(stderr, "Key/IV set failed\n"); + ERR_print_errors_fp(stderr); + test1_exit(12); + } + if (!EVP_CIPHER_CTX_ctrl + (&ctx, EVP_CTRL_GCM_SET_TAG, tn, (void *)tag)) { + fprintf(stderr, "Set tag failed\n"); + ERR_print_errors_fp(stderr); + test1_exit(14); + } + if (an && !EVP_DecryptUpdate(&ctx, NULL, &outl, aad, an)) { + fprintf(stderr, "AAD set failed\n"); + ERR_print_errors_fp(stderr); + test1_exit(13); + } + } else if (mode == EVP_CIPH_CCM_MODE) { + if (!EVP_DecryptInit_ex(&ctx, c, NULL, NULL, NULL)) { + fprintf(stderr, "DecryptInit failed\n"); + ERR_print_errors_fp(stderr); + test1_exit(10); + } + if (!EVP_CIPHER_CTX_ctrl(&ctx, EVP_CTRL_CCM_SET_IVLEN, in, NULL)) { + fprintf(stderr, "IV length set failed\n"); + ERR_print_errors_fp(stderr); + test1_exit(11); + } + if (!EVP_CIPHER_CTX_ctrl + (&ctx, EVP_CTRL_CCM_SET_TAG, tn, (void *)tag)) { + fprintf(stderr, "Tag length set failed\n"); + ERR_print_errors_fp(stderr); + test1_exit(11); + } + if (!EVP_DecryptInit_ex(&ctx, NULL, NULL, key, iv)) { + fprintf(stderr, "Key/Nonce set failed\n"); + ERR_print_errors_fp(stderr); + test1_exit(12); + } + if (!EVP_DecryptUpdate(&ctx, NULL, &outl, NULL, pn)) { + fprintf(stderr, "Plaintext length set failed\n"); + ERR_print_errors_fp(stderr); + test1_exit(12); + } + if (an && !EVP_EncryptUpdate(&ctx, NULL, &outl, aad, an)) { + fprintf(stderr, "AAD set failed\n"); + ERR_print_errors_fp(stderr); + test1_exit(13); + } + } else if (mode == EVP_CIPH_WRAP_MODE) { + if (!EVP_DecryptInit_ex(&ctx, c, NULL, key, in ? iv : NULL)) { + fprintf(stderr, "EncryptInit failed\n"); + ERR_print_errors_fp(stderr); + test1_exit(10); + } + } else if (!EVP_DecryptInit_ex(&ctx, c, NULL, key, iv)) { fprintf(stderr, "DecryptInit failed\n"); ERR_print_errors_fp(stderr); test1_exit(11); @@ -200,7 +348,8 @@ static void test1(const EVP_CIPHER *c, const unsigned char *key, int kn, ERR_print_errors_fp(stderr); test1_exit(6); } - if (!EVP_DecryptFinal_ex(&ctx, out + outl, &outl2)) { + if (mode != EVP_CIPH_CCM_MODE + && !EVP_DecryptFinal_ex(&ctx, out + outl, &outl2)) { fprintf(stderr, "DecryptFinal failed\n"); ERR_print_errors_fp(stderr); test1_exit(7); @@ -228,7 +377,9 @@ static void test1(const EVP_CIPHER *c, const unsigned char *key, int kn, static int test_cipher(const char *cipher, const unsigned char *key, int kn, const unsigned char *iv, int in, const unsigned char *plaintext, int pn, - const unsigned char *ciphertext, int cn, int encdec) + const unsigned char *ciphertext, int cn, + const unsigned char *aad, int an, + const unsigned char *tag, int tn, int encdec) { const EVP_CIPHER *c; @@ -236,7 +387,8 @@ static int test_cipher(const char *cipher, const unsigned char *key, int kn, if (!c) return 0; - test1(c, key, kn, iv, in, plaintext, pn, ciphertext, cn, encdec); + test1(c, key, kn, iv, in, plaintext, pn, ciphertext, cn, aad, an, tag, tn, + encdec); return 1; } @@ -316,7 +468,7 @@ int main(int argc, char **argv) perror(szTestFile); EXIT(2); } - + ERR_load_crypto_strings(); /* Load up the software EVP_CIPHER and EVP_MD definitions */ OpenSSL_add_all_ciphers(); OpenSSL_add_all_digests(); @@ -346,9 +498,11 @@ int main(int argc, char **argv) char line[4096]; char *p; char *cipher; - unsigned char *iv, *key, *plaintext, *ciphertext; + unsigned char *iv, *key, *plaintext, *ciphertext, *aad, *tag; int encdec; int kn, in, pn, cn; + int an = 0; + int tn = 0; if (!fgets((char *)line, sizeof line, f)) break; @@ -361,19 +515,37 @@ int main(int argc, char **argv) plaintext = ustrsep(&p, ":"); ciphertext = ustrsep(&p, ":"); if (p[-1] == '\n') { - p[-1] = '\0'; encdec = -1; + p[-1] = '\0'; + tag = aad = NULL; + an = tn = 0; } else { - encdec = atoi(sstrsep(&p, "\n")); + aad = ustrsep(&p, ":"); + tag = ustrsep(&p, ":"); + if (tag == NULL) { + p = (char *)aad; + tag = aad = NULL; + an = tn = 0; + } + if (p[-1] == '\n') { + encdec = -1; + p[-1] = '\0'; + } else + encdec = atoi(sstrsep(&p, "\n")); } kn = convert(key); in = convert(iv); pn = convert(plaintext); cn = convert(ciphertext); + if (aad) { + an = convert(aad); + tn = convert(tag); + } if (!test_cipher - (cipher, key, kn, iv, in, plaintext, pn, ciphertext, cn, encdec) + (cipher, key, kn, iv, in, plaintext, pn, ciphertext, cn, aad, an, + tag, tn, encdec) && !test_digest(cipher, plaintext, pn, ciphertext, cn)) { #ifdef OPENSSL_NO_AES if (strstr(cipher, "AES") == cipher) { diff --git a/deps/openssl/openssl/crypto/evp/evptests.txt b/deps/openssl/openssl/crypto/evp/evptests.txt index c273707c1444f5..4e9958b3b5bc07 100644 --- a/deps/openssl/openssl/crypto/evp/evptests.txt +++ b/deps/openssl/openssl/crypto/evp/evptests.txt @@ -1,4 +1,5 @@ #cipher:key:iv:plaintext:ciphertext:0/1(decrypt/encrypt) +#aadcipher:key:iv:plaintext:ciphertext:aad:tag:0/1(decrypt/encrypt) #digest:::input:output # SHA(1) tests (from shatest.c) @@ -332,3 +333,69 @@ SEED-ECB:00000000000000000000000000000000::000102030405060708090A0B0C0D0E0F:5EBA SEED-ECB:000102030405060708090A0B0C0D0E0F::00000000000000000000000000000000:C11F22F20140505084483597E4370F43:1 SEED-ECB:4706480851E61BE85D74BFB3FD956185::83A2F8A288641FB9A4E9A5CC2F131C7D:EE54D13EBCAE706D226BC3142CD40D4A:1 SEED-ECB:28DBC3BC49FFD87DCFA509B11D422BE7::B41E6BE2EBA84A148E2EED84593C5EC7:9B9B7BFCD1813CB95D0B3618F40F5122:1 + +# AES CCM 256 bit key +aes-256-ccm:1bde3251d41a8b5ea013c195ae128b218b3e0306376357077ef1c1c78548b92e:5b8e40746f6b98e00f1d13ff41:53bd72a97089e312422bf72e242377b3c6ee3e2075389b999c4ef7f28bd2b80a:9a5fcccdb4cf04e7293d2775cc76a488f042382d949b43b7d6bb2b9864786726:c17a32514eb6103f3249e076d4c871dc97e04b286699e54491dc18f6d734d4c0:2024931d73bca480c24a24ece6b6c2bf + +# AES GCM test vectors from http://csrc.nist.gov/groups/ST/toolkit/BCM/documents/proposedmodes/gcm/gcm-spec.pdf +aes-128-gcm:00000000000000000000000000000000:000000000000000000000000::::58e2fccefa7e3061367f1d57a4e7455a +aes-128-gcm:00000000000000000000000000000000:000000000000000000000000:00000000000000000000000000000000:0388dace60b6a392f328c2b971b2fe78::ab6e47d42cec13bdf53a67b21257bddf +aes-128-gcm:feffe9928665731c6d6a8f9467308308:cafebabefacedbaddecaf888:d9313225f88406e5a55909c5aff5269a86a7a9531534f7da2e4c303d8a318a721c3c0c95956809532fcf0e2449a6b525b16aedf5aa0de657ba637b391aafd255:42831ec2217774244b7221b784d0d49ce3aa212f2c02a4e035c17e2329aca12e21d514b25466931c7d8f6a5aac84aa051ba30b396a0aac973d58e091473f5985::4d5c2af327cd64a62cf35abd2ba6fab4 +aes-128-gcm:feffe9928665731c6d6a8f9467308308:cafebabefacedbaddecaf888:d9313225f88406e5a55909c5aff5269a86a7a9531534f7da2e4c303d8a318a721c3c0c95956809532fcf0e2449a6b525b16aedf5aa0de657ba637b39:42831ec2217774244b7221b784d0d49ce3aa212f2c02a4e035c17e2329aca12e21d514b25466931c7d8f6a5aac84aa051ba30b396a0aac973d58e091:feedfacedeadbeeffeedfacedeadbeefabaddad2:5bc94fbc3221a5db94fae95ae7121a47 +aes-128-gcm:feffe9928665731c6d6a8f9467308308:cafebabefacedbad:d9313225f88406e5a55909c5aff5269a86a7a9531534f7da2e4c303d8a318a721c3c0c95956809532fcf0e2449a6b525b16aedf5aa0de657ba637b39:61353b4c2806934a777ff51fa22a4755699b2a714fcdc6f83766e5f97b6c742373806900e49f24b22b097544d4896b424989b5e1ebac0f07c23f4598:feedfacedeadbeeffeedfacedeadbeefabaddad2:3612d2e79e3b0785561be14aaca2fccb +aes-128-gcm:feffe9928665731c6d6a8f9467308308:9313225df88406e555909c5aff5269aa6a7a9538534f7da1e4c303d2a318a728c3c0c95156809539fcf0e2429a6b525416aedbf5a0de6a57a637b39b:d9313225f88406e5a55909c5aff5269a86a7a9531534f7da2e4c303d8a318a721c3c0c95956809532fcf0e2449a6b525b16aedf5aa0de657ba637b39:8ce24998625615b603a033aca13fb894be9112a5c3a211a8ba262a3cca7e2ca701e4a9a4fba43c90ccdcb281d48c7c6fd62875d2aca417034c34aee5:feedfacedeadbeeffeedfacedeadbeefabaddad2:619cc5aefffe0bfa462af43c1699d050 +aes-192-gcm:000000000000000000000000000000000000000000000000:000000000000000000000000::::cd33b28ac773f74ba00ed1f312572435 +aes-192-gcm:000000000000000000000000000000000000000000000000:000000000000000000000000:00000000000000000000000000000000:98e7247c07f0fe411c267e4384b0f600::2ff58d80033927ab8ef4d4587514f0fb +aes-192-gcm:feffe9928665731c6d6a8f9467308308feffe9928665731c:cafebabefacedbaddecaf888:d9313225f88406e5a55909c5aff5269a86a7a9531534f7da2e4c303d8a318a721c3c0c95956809532fcf0e2449a6b525b16aedf5aa0de657ba637b391aafd255:3980ca0b3c00e841eb06fac4872a2757859e1ceaa6efd984628593b40ca1e19c7d773d00c144c525ac619d18c84a3f4718e2448b2fe324d9ccda2710acade256::9924a7c8587336bfb118024db8674a14 +aes-192-gcm:feffe9928665731c6d6a8f9467308308feffe9928665731c:cafebabefacedbaddecaf888:d9313225f88406e5a55909c5aff5269a86a7a9531534f7da2e4c303d8a318a721c3c0c95956809532fcf0e2449a6b525b16aedf5aa0de657ba637b39:3980ca0b3c00e841eb06fac4872a2757859e1ceaa6efd984628593b40ca1e19c7d773d00c144c525ac619d18c84a3f4718e2448b2fe324d9ccda2710:feedfacedeadbeeffeedfacedeadbeefabaddad2:2519498e80f1478f37ba55bd6d27618c +aes-192-gcm:feffe9928665731c6d6a8f9467308308feffe9928665731c:cafebabefacedbad:d9313225f88406e5a55909c5aff5269a86a7a9531534f7da2e4c303d8a318a721c3c0c95956809532fcf0e2449a6b525b16aedf5aa0de657ba637b39:0f10f599ae14a154ed24b36e25324db8c566632ef2bbb34f8347280fc4507057fddc29df9a471f75c66541d4d4dad1c9e93a19a58e8b473fa0f062f7:feedfacedeadbeeffeedfacedeadbeefabaddad2:65dcc57fcf623a24094fcca40d3533f8 +aes-192-gcm:feffe9928665731c6d6a8f9467308308feffe9928665731c:9313225df88406e555909c5aff5269aa6a7a9538534f7da1e4c303d2a318a728c3c0c95156809539fcf0e2429a6b525416aedbf5a0de6a57a637b39b:d9313225f88406e5a55909c5aff5269a86a7a9531534f7da2e4c303d8a318a721c3c0c95956809532fcf0e2449a6b525b16aedf5aa0de657ba637b39:d27e88681ce3243c4830165a8fdcf9ff1de9a1d8e6b447ef6ef7b79828666e4581e79012af34ddd9e2f037589b292db3e67c036745fa22e7e9b7373b:feedfacedeadbeeffeedfacedeadbeefabaddad2:dcf566ff291c25bbb8568fc3d376a6d9 +aes-256-gcm:0000000000000000000000000000000000000000000000000000000000000000:000000000000000000000000::::530f8afbc74536b9a963b4f1c4cb738b +aes-256-gcm:0000000000000000000000000000000000000000000000000000000000000000:000000000000000000000000:00000000000000000000000000000000:cea7403d4d606b6e074ec5d3baf39d18::d0d1c8a799996bf0265b98b5d48ab919 +aes-256-gcm:feffe9928665731c6d6a8f9467308308feffe9928665731c6d6a8f9467308308:cafebabefacedbaddecaf888:d9313225f88406e5a55909c5aff5269a86a7a9531534f7da2e4c303d8a318a721c3c0c95956809532fcf0e2449a6b525b16aedf5aa0de657ba637b391aafd255:522dc1f099567d07f47f37a32a84427d643a8cdcbfe5c0c97598a2bd2555d1aa8cb08e48590dbb3da7b08b1056828838c5f61e6393ba7a0abcc9f662898015ad::b094dac5d93471bdec1a502270e3cc6c +aes-256-gcm:feffe9928665731c6d6a8f9467308308feffe9928665731c6d6a8f9467308308:cafebabefacedbaddecaf888:d9313225f88406e5a55909c5aff5269a86a7a9531534f7da2e4c303d8a318a721c3c0c95956809532fcf0e2449a6b525b16aedf5aa0de657ba637b39:522dc1f099567d07f47f37a32a84427d643a8cdcbfe5c0c97598a2bd2555d1aa8cb08e48590dbb3da7b08b1056828838c5f61e6393ba7a0abcc9f662:feedfacedeadbeeffeedfacedeadbeefabaddad2:76fc6ece0f4e1768cddf8853bb2d551b +aes-256-gcm:feffe9928665731c6d6a8f9467308308feffe9928665731c6d6a8f9467308308:cafebabefacedbad:d9313225f88406e5a55909c5aff5269a86a7a9531534f7da2e4c303d8a318a721c3c0c95956809532fcf0e2449a6b525b16aedf5aa0de657ba637b39:c3762df1ca787d32ae47c13bf19844cbaf1ae14d0b976afac52ff7d79bba9de0feb582d33934a4f0954cc2363bc73f7862ac430e64abe499f47c9b1f:feedfacedeadbeeffeedfacedeadbeefabaddad2:3a337dbf46a792c45e454913fe2ea8f2 +aes-256-gcm:feffe9928665731c6d6a8f9467308308feffe9928665731c6d6a8f9467308308:9313225df88406e555909c5aff5269aa6a7a9538534f7da1e4c303d2a318a728c3c0c95156809539fcf0e2429a6b525416aedbf5a0de6a57a637b39b:d9313225f88406e5a55909c5aff5269a86a7a9531534f7da2e4c303d8a318a721c3c0c95956809532fcf0e2449a6b525b16aedf5aa0de657ba637b39:5a8def2f0c9e53f1f75d7853659e2a20eeb2b22aafde6419a058ab4f6f746bf40fc0c3b780f244452da3ebf1c5d82cdea2418997200ef82e44ae7e3f:feedfacedeadbeeffeedfacedeadbeefabaddad2:a44a8266ee1c8eb0c8b5d4cf5ae9f19a +# local add-ons, primarily streaming ghash tests +# 128 bytes aad +aes-128-gcm:00000000000000000000000000000000:000000000000000000000000:::d9313225f88406e5a55909c5aff5269a86a7a9531534f7da2e4c303d8a318a721c3c0c95956809532fcf0e2449a6b525b16aedf5aa0de657ba637b391aafd255522dc1f099567d07f47f37a32a84427d643a8cdcbfe5c0c97598a2bd2555d1aa8cb08e48590dbb3da7b08b1056828838c5f61e6393ba7a0abcc9f662898015ad:5fea793a2d6f974d37e68e0cb8ff9492 +# 48 bytes plaintext +aes-128-gcm:00000000000000000000000000000000:000000000000000000000000:000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000:0388dace60b6a392f328c2b971b2fe78f795aaab494b5923f7fd89ff948bc1e0200211214e7394da2089b6acd093abe0::9dd0a376b08e40eb00c35f29f9ea61a4 +# 80 bytes plaintext +aes-128-gcm:00000000000000000000000000000000:000000000000000000000000:0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000:0388dace60b6a392f328c2b971b2fe78f795aaab494b5923f7fd89ff948bc1e0200211214e7394da2089b6acd093abe0c94da219118e297d7b7ebcbcc9c388f28ade7d85a8ee35616f7124a9d5270291::98885a3a22bd4742fe7b72172193b163 +# 128 bytes plaintext +aes-128-gcm:00000000000000000000000000000000:000000000000000000000000:0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000:0388dace60b6a392f328c2b971b2fe78f795aaab494b5923f7fd89ff948bc1e0200211214e7394da2089b6acd093abe0c94da219118e297d7b7ebcbcc9c388f28ade7d85a8ee35616f7124a9d527029195b84d1b96c690ff2f2de30bf2ec89e00253786e126504f0dab90c48a30321de3345e6b0461e7c9e6c6b7afedde83f40::cac45f60e31efd3b5a43b98a22ce1aa1 +# 192 bytes plaintext, iv is chosen so that initial counter LSB is 0xFF +aes-128-gcm:00000000000000000000000000000000:ffffffff000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000:000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000:56b3373ca9ef6e4a2b64fe1e9a17b61425f10d47a75a5fce13efc6bc784af24f4141bdd48cf7c770887afd573cca5418a9aeffcd7c5ceddfc6a78397b9a85b499da558257267caab2ad0b23ca476a53cb17fb41c4b8b475cb4f3f7165094c229c9e8c4dc0a2a5ff1903e501511221376a1cdb8364c5061a20cae74bc4acd76ceb0abc9fd3217ef9f8c90be402ddf6d8697f4f880dff15bfb7a6b28241ec8fe183c2d59e3f9dfff653c7126f0acb9e64211f42bae12af462b1070bef1ab5e3606::566f8ef683078bfdeeffa869d751a017 +# 80 bytes plaintext, submitted by Intel +aes-128-gcm:843ffcf5d2b72694d19ed01d01249412:dbcca32ebf9b804617c3aa9e:000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f303132333435363738393a3b3c3d3e3f404142434445464748494a4b4c4d4e4f:6268c6fa2a80b2d137467f092f657ac04d89be2beaa623d61b5a868c8f03ff95d3dcee23ad2f1ab3a6c80eaf4b140eb05de3457f0fbc111a6b43d0763aa422a3013cf1dc37fe417d1fbfc449b75d4cc5:00000000000000000000000000000000101112131415161718191a1b1c1d1e1f:3b629ccfbc1119b7319e1dce2cd6fd6d + +# AES XTS test vectors from IEEE Std 1619-2007 +aes-128-xts:0000000000000000000000000000000000000000000000000000000000000000:00000000000000000000000000000000:0000000000000000000000000000000000000000000000000000000000000000:917cf69ebd68b2ec9b9fe9a3eadda692cd43d2f59598ed858c02c2652fbf922e +aes-128-xts:1111111111111111111111111111111122222222222222222222222222222222:33333333330000000000000000000000:4444444444444444444444444444444444444444444444444444444444444444:c454185e6a16936e39334038acef838bfb186fff7480adc4289382ecd6d394f0 +aes-128-xts:fffefdfcfbfaf9f8f7f6f5f4f3f2f1f022222222222222222222222222222222:33333333330000000000000000000000:4444444444444444444444444444444444444444444444444444444444444444:af85336b597afc1a900b2eb21ec949d292df4c047e0b21532186a5971a227a89 +aes-128-xts:2718281828459045235360287471352631415926535897932384626433832795:00000000000000000000000000000000:000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f303132333435363738393a3b3c3d3e3f404142434445464748494a4b4c4d4e4f505152535455565758595a5b5c5d5e5f606162636465666768696a6b6c6d6e6f707172737475767778797a7b7c7d7e7f808182838485868788898a8b8c8d8e8f909192939495969798999a9b9c9d9e9fa0a1a2a3a4a5a6a7a8a9aaabacadaeafb0b1b2b3b4b5b6b7b8b9babbbcbdbebfc0c1c2c3c4c5c6c7c8c9cacbcccdcecfd0d1d2d3d4d5d6d7d8d9dadbdcdddedfe0e1e2e3e4e5e6e7e8e9eaebecedeeeff0f1f2f3f4f5f6f7f8f9fafbfcfdfeff000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f303132333435363738393a3b3c3d3e3f404142434445464748494a4b4c4d4e4f505152535455565758595a5b5c5d5e5f606162636465666768696a6b6c6d6e6f707172737475767778797a7b7c7d7e7f808182838485868788898a8b8c8d8e8f909192939495969798999a9b9c9d9e9fa0a1a2a3a4a5a6a7a8a9aaabacadaeafb0b1b2b3b4b5b6b7b8b9babbbcbdbebfc0c1c2c3c4c5c6c7c8c9cacbcccdcecfd0d1d2d3d4d5d6d7d8d9dadbdcdddedfe0e1e2e3e4e5e6e7e8e9eaebecedeeeff0f1f2f3f4f5f6f7f8f9fafbfcfdfeff:27a7479befa1d476489f308cd4cfa6e2a96e4bbe3208ff25287dd3819616e89cc78cf7f5e543445f8333d8fa7f56000005279fa5d8b5e4ad40e736ddb4d35412328063fd2aab53e5ea1e0a9f332500a5df9487d07a5c92cc512c8866c7e860ce93fdf166a24912b422976146ae20ce846bb7dc9ba94a767aaef20c0d61ad02655ea92dc4c4e41a8952c651d33174be51a10c421110e6d81588ede82103a252d8a750e8768defffed9122810aaeb99f9172af82b604dc4b8e51bcb08235a6f4341332e4ca60482a4ba1a03b3e65008fc5da76b70bf1690db4eae29c5f1badd03c5ccf2a55d705ddcd86d449511ceb7ec30bf12b1fa35b913f9f747a8afd1b130e94bff94effd01a91735ca1726acd0b197c4e5b03393697e126826fb6bbde8ecc1e08298516e2c9ed03ff3c1b7860f6de76d4cecd94c8119855ef5297ca67e9f3e7ff72b1e99785ca0a7e7720c5b36dc6d72cac9574c8cbbc2f801e23e56fd344b07f22154beba0f08ce8891e643ed995c94d9a69c9f1b5f499027a78572aeebd74d20cc39881c213ee770b1010e4bea718846977ae119f7a023ab58cca0ad752afe656bb3c17256a9f6e9bf19fdd5a38fc82bbe872c5539edb609ef4f79c203ebb140f2e583cb2ad15b4aa5b655016a8449277dbd477ef2c8d6c017db738b18deb4a427d1923ce3ff262735779a418f20a282df920147beabe421ee5319d0568 +aes-128-xts:2718281828459045235360287471352631415926535897932384626433832795:01000000000000000000000000000000:27a7479befa1d476489f308cd4cfa6e2a96e4bbe3208ff25287dd3819616e89cc78cf7f5e543445f8333d8fa7f56000005279fa5d8b5e4ad40e736ddb4d35412328063fd2aab53e5ea1e0a9f332500a5df9487d07a5c92cc512c8866c7e860ce93fdf166a24912b422976146ae20ce846bb7dc9ba94a767aaef20c0d61ad02655ea92dc4c4e41a8952c651d33174be51a10c421110e6d81588ede82103a252d8a750e8768defffed9122810aaeb99f9172af82b604dc4b8e51bcb08235a6f4341332e4ca60482a4ba1a03b3e65008fc5da76b70bf1690db4eae29c5f1badd03c5ccf2a55d705ddcd86d449511ceb7ec30bf12b1fa35b913f9f747a8afd1b130e94bff94effd01a91735ca1726acd0b197c4e5b03393697e126826fb6bbde8ecc1e08298516e2c9ed03ff3c1b7860f6de76d4cecd94c8119855ef5297ca67e9f3e7ff72b1e99785ca0a7e7720c5b36dc6d72cac9574c8cbbc2f801e23e56fd344b07f22154beba0f08ce8891e643ed995c94d9a69c9f1b5f499027a78572aeebd74d20cc39881c213ee770b1010e4bea718846977ae119f7a023ab58cca0ad752afe656bb3c17256a9f6e9bf19fdd5a38fc82bbe872c5539edb609ef4f79c203ebb140f2e583cb2ad15b4aa5b655016a8449277dbd477ef2c8d6c017db738b18deb4a427d1923ce3ff262735779a418f20a282df920147beabe421ee5319d0568:264d3ca8512194fec312c8c9891f279fefdd608d0c027b60483a3fa811d65ee59d52d9e40ec5672d81532b38b6b089ce951f0f9c35590b8b978d175213f329bb1c2fd30f2f7f30492a61a532a79f51d36f5e31a7c9a12c286082ff7d2394d18f783e1a8e72c722caaaa52d8f065657d2631fd25bfd8e5baad6e527d763517501c68c5edc3cdd55435c532d7125c8614deed9adaa3acade5888b87bef641c4c994c8091b5bcd387f3963fb5bc37aa922fbfe3df4e5b915e6eb514717bdd2a74079a5073f5c4bfd46adf7d282e7a393a52579d11a028da4d9cd9c77124f9648ee383b1ac763930e7162a8d37f350b2f74b8472cf09902063c6b32e8c2d9290cefbd7346d1c779a0df50edcde4531da07b099c638e83a755944df2aef1aa31752fd323dcb710fb4bfbb9d22b925bc3577e1b8949e729a90bbafeacf7f7879e7b1147e28ba0bae940db795a61b15ecf4df8db07b824bb062802cc98a9545bb2aaeed77cb3fc6db15dcd7d80d7d5bc406c4970a3478ada8899b329198eb61c193fb6275aa8ca340344a75a862aebe92eee1ce032fd950b47d7704a3876923b4ad62844bf4a09c4dbe8b4397184b7471360c9564880aedddb9baa4af2e75394b08cd32ff479c57a07d3eab5d54de5f9738b8d27f27a9f0ab11799d7b7ffefb2704c95c6ad12c39f1e867a4b7b1d7818a4b753dfd2a89ccb45e001a03a867b187f225dd +aes-128-xts:2718281828459045235360287471352631415926535897932384626433832795:02000000000000000000000000000000:264d3ca8512194fec312c8c9891f279fefdd608d0c027b60483a3fa811d65ee59d52d9e40ec5672d81532b38b6b089ce951f0f9c35590b8b978d175213f329bb1c2fd30f2f7f30492a61a532a79f51d36f5e31a7c9a12c286082ff7d2394d18f783e1a8e72c722caaaa52d8f065657d2631fd25bfd8e5baad6e527d763517501c68c5edc3cdd55435c532d7125c8614deed9adaa3acade5888b87bef641c4c994c8091b5bcd387f3963fb5bc37aa922fbfe3df4e5b915e6eb514717bdd2a74079a5073f5c4bfd46adf7d282e7a393a52579d11a028da4d9cd9c77124f9648ee383b1ac763930e7162a8d37f350b2f74b8472cf09902063c6b32e8c2d9290cefbd7346d1c779a0df50edcde4531da07b099c638e83a755944df2aef1aa31752fd323dcb710fb4bfbb9d22b925bc3577e1b8949e729a90bbafeacf7f7879e7b1147e28ba0bae940db795a61b15ecf4df8db07b824bb062802cc98a9545bb2aaeed77cb3fc6db15dcd7d80d7d5bc406c4970a3478ada8899b329198eb61c193fb6275aa8ca340344a75a862aebe92eee1ce032fd950b47d7704a3876923b4ad62844bf4a09c4dbe8b4397184b7471360c9564880aedddb9baa4af2e75394b08cd32ff479c57a07d3eab5d54de5f9738b8d27f27a9f0ab11799d7b7ffefb2704c95c6ad12c39f1e867a4b7b1d7818a4b753dfd2a89ccb45e001a03a867b187f225dd:fa762a3680b76007928ed4a4f49a9456031b704782e65e16cecb54ed7d017b5e18abd67b338e81078f21edb7868d901ebe9c731a7c18b5e6dec1d6a72e078ac9a4262f860beefa14f4e821018272e411a951502b6e79066e84252c3346f3aa62344351a291d4bedc7a07618bdea2af63145cc7a4b8d4070691ae890cd65733e7946e9021a1dffc4c59f159425ee6d50ca9b135fa6162cea18a939838dc000fb386fad086acce5ac07cb2ece7fd580b00cfa5e98589631dc25e8e2a3daf2ffdec26531659912c9d8f7a15e5865ea8fb5816d6207052bd7128cd743c12c8118791a4736811935eb982a532349e31dd401e0b660a568cb1a4711f552f55ded59f1f15bf7196b3ca12a91e488ef59d64f3a02bf45239499ac6176ae321c4a211ec545365971c5d3f4f09d4eb139bfdf2073d33180b21002b65cc9865e76cb24cd92c874c24c18350399a936ab3637079295d76c417776b94efce3a0ef7206b15110519655c956cbd8b2489405ee2b09a6b6eebe0c53790a12a8998378b33a5b71159625f4ba49d2a2fdba59fbf0897bc7aabd8d707dc140a80f0f309f835d3da54ab584e501dfa0ee977fec543f74186a802b9a37adb3e8291eca04d66520d229e60401e7282bef486ae059aa70696e0e305d777140a7a883ecdcb69b9ff938e8a4231864c69ca2c2043bed007ff3e605e014bcf518138dc3a25c5e236171a2d01d6 +aes-128-xts:2718281828459045235360287471352631415926535897932384626433832795:fd000000000000000000000000000000:8e41b78c390b5af9d758bb214a67e9f6bf7727b09ac6124084c37611398fa45daad94868600ed391fb1acd4857a95b466e62ef9f4b377244d1c152e7b30d731aad30c716d214b707aed99eb5b5e580b3e887cf7497465651d4b60e6042051da3693c3b78c14489543be8b6ad0ba629565bba202313ba7b0d0c94a3252b676f46cc02ce0f8a7d34c0ed229129673c1f61aed579d08a9203a25aac3a77e9db60267996db38df637356d9dcd1632e369939f2a29d89345c66e05066f1a3677aef18dea4113faeb629e46721a66d0a7e785d3e29af2594eb67dfa982affe0aac058f6e15864269b135418261fc3afb089472cf68c45dd7f231c6249ba0255e1e033833fc4d00a3fe02132d7bc3873614b8aee34273581ea0325c81f0270affa13641d052d36f0757d484014354d02d6883ca15c24d8c3956b1bd027bcf41f151fd8023c5340e5606f37e90fdb87c86fb4fa634b3718a30bace06a66eaf8f63c4aa3b637826a87fe8cfa44282e92cb1615af3a28e53bc74c7cba1a0977be9065d0c1a5dec6c54ae38d37f37aa35283e048e5530a85c4e7a29d7b92ec0c3169cdf2a805c7604bce60049b9fb7b8eaac10f51ae23794ceba68bb58112e293b9b692ca721b37c662f8574ed4dba6f88e170881c82cddc1034a0ca7e284bf0962b6b26292d836fa9f73c1ac770eef0f2d3a1eaf61d3e03555fd424eedd67e18a18094f888:d55f684f81f4426e9fde92a5ff02df2ac896af63962888a97910c1379e20b0a3b1db613fb7fe2e07004329ea5c22bfd33e3dbe4cf58cc608c2c26c19a2e2fe22f98732c2b5cb844cc6c0702d91e1d50fc4382a7eba5635cd602432a2306ac4ce82f8d70c8d9bc15f918fe71e74c622d5cf71178bf6e0b9cc9f2b41dd8dbe441c41cd0c73a6dc47a348f6702f9d0e9b1b1431e948e299b9ec2272ab2c5f0c7be86affa5dec87a0bee81d3d50007edaa2bcfccb35605155ff36ed8edd4a40dcd4b243acd11b2b987bdbfaf91a7cac27e9c5aea525ee53de7b2d3332c8644402b823e94a7db26276d2d23aa07180f76b4fd29b9c0823099c9d62c519880aee7e9697617c1497d47bf3e571950311421b6b734d38b0db91eb85331b91ea9f61530f54512a5a52a4bad589eb69781d537f23297bb459bdad2948a29e1550bf4787e0be95bb173cf5fab17dab7a13a052a63453d97ccec1a321954886b7a1299faaeecae35c6eaaca753b041b5e5f093bf83397fd21dd6b3012066fcc058cc32c3b09d7562dee29509b5839392c9ff05f51f3166aaac4ac5f238038a3045e6f72e48ef0fe8bc675e82c318a268e43970271bf119b81bf6a982746554f84e72b9f00280a320a08142923c23c883423ff949827f29bbacdc1ccdb04938ce6098c95ba6b32528f4ef78eed778b2e122ddfd1cbdd11d1c0a6783e011fc536d63d053260637 +aes-128-xts:2718281828459045235360287471352631415926535897932384626433832795:fe000000000000000000000000000000:d55f684f81f4426e9fde92a5ff02df2ac896af63962888a97910c1379e20b0a3b1db613fb7fe2e07004329ea5c22bfd33e3dbe4cf58cc608c2c26c19a2e2fe22f98732c2b5cb844cc6c0702d91e1d50fc4382a7eba5635cd602432a2306ac4ce82f8d70c8d9bc15f918fe71e74c622d5cf71178bf6e0b9cc9f2b41dd8dbe441c41cd0c73a6dc47a348f6702f9d0e9b1b1431e948e299b9ec2272ab2c5f0c7be86affa5dec87a0bee81d3d50007edaa2bcfccb35605155ff36ed8edd4a40dcd4b243acd11b2b987bdbfaf91a7cac27e9c5aea525ee53de7b2d3332c8644402b823e94a7db26276d2d23aa07180f76b4fd29b9c0823099c9d62c519880aee7e9697617c1497d47bf3e571950311421b6b734d38b0db91eb85331b91ea9f61530f54512a5a52a4bad589eb69781d537f23297bb459bdad2948a29e1550bf4787e0be95bb173cf5fab17dab7a13a052a63453d97ccec1a321954886b7a1299faaeecae35c6eaaca753b041b5e5f093bf83397fd21dd6b3012066fcc058cc32c3b09d7562dee29509b5839392c9ff05f51f3166aaac4ac5f238038a3045e6f72e48ef0fe8bc675e82c318a268e43970271bf119b81bf6a982746554f84e72b9f00280a320a08142923c23c883423ff949827f29bbacdc1ccdb04938ce6098c95ba6b32528f4ef78eed778b2e122ddfd1cbdd11d1c0a6783e011fc536d63d053260637:72efc1ebfe1ee25975a6eb3aa8589dda2b261f1c85bdab442a9e5b2dd1d7c3957a16fc08e526d4b1223f1b1232a11af274c3d70dac57f83e0983c498f1a6f1aecb021c3e70085a1e527f1ce41ee5911a82020161529cd82773762daf5459de94a0a82adae7e1703c808543c29ed6fb32d9e004327c1355180c995a07741493a09c21ba01a387882da4f62534b87bb15d60d197201c0fd3bf30c1500a3ecfecdd66d8721f90bcc4c17ee925c61b0a03727a9c0d5f5ca462fbfa0af1c2513a9d9d4b5345bd27a5f6e653f751693e6b6a2b8ead57d511e00e58c45b7b8d005af79288f5c7c22fd4f1bf7a898b03a5634c6a1ae3f9fae5de4f296a2896b23e7ed43ed14fa5a2803f4d28f0d3ffcf24757677aebdb47bb388378708948a8d4126ed1839e0da29a537a8c198b3c66ab00712dd261674bf45a73d67f76914f830ca014b65596f27e4cf62de66125a5566df9975155628b400fbfb3a29040ed50faffdbb18aece7c5c44693260aab386c0a37b11b114f1c415aebb653be468179428d43a4d8bc3ec38813eca30a13cf1bb18d524f1992d44d8b1a42ea30b22e6c95b199d8d182f8840b09d059585c31ad691fa0619ff038aca2c39a943421157361717c49d322028a74648113bd8c9d7ec77cf3c89c1ec8718ceff8516d96b34c3c614f10699c9abc4ed0411506223bea16af35c883accdbe1104eef0cfdb54e12fb230a +aes-128-xts:2718281828459045235360287471352631415926535897932384626433832795:ff000000000000000000000000000000:72efc1ebfe1ee25975a6eb3aa8589dda2b261f1c85bdab442a9e5b2dd1d7c3957a16fc08e526d4b1223f1b1232a11af274c3d70dac57f83e0983c498f1a6f1aecb021c3e70085a1e527f1ce41ee5911a82020161529cd82773762daf5459de94a0a82adae7e1703c808543c29ed6fb32d9e004327c1355180c995a07741493a09c21ba01a387882da4f62534b87bb15d60d197201c0fd3bf30c1500a3ecfecdd66d8721f90bcc4c17ee925c61b0a03727a9c0d5f5ca462fbfa0af1c2513a9d9d4b5345bd27a5f6e653f751693e6b6a2b8ead57d511e00e58c45b7b8d005af79288f5c7c22fd4f1bf7a898b03a5634c6a1ae3f9fae5de4f296a2896b23e7ed43ed14fa5a2803f4d28f0d3ffcf24757677aebdb47bb388378708948a8d4126ed1839e0da29a537a8c198b3c66ab00712dd261674bf45a73d67f76914f830ca014b65596f27e4cf62de66125a5566df9975155628b400fbfb3a29040ed50faffdbb18aece7c5c44693260aab386c0a37b11b114f1c415aebb653be468179428d43a4d8bc3ec38813eca30a13cf1bb18d524f1992d44d8b1a42ea30b22e6c95b199d8d182f8840b09d059585c31ad691fa0619ff038aca2c39a943421157361717c49d322028a74648113bd8c9d7ec77cf3c89c1ec8718ceff8516d96b34c3c614f10699c9abc4ed0411506223bea16af35c883accdbe1104eef0cfdb54e12fb230a:3260ae8dad1f4a32c5cafe3ab0eb95549d461a67ceb9e5aa2d3afb62dece0553193ba50c75be251e08d1d08f1088576c7efdfaaf3f459559571e12511753b07af073f35da06af0ce0bbf6b8f5ccc5cea500ec1b211bd51f63b606bf6528796ca12173ba39b8935ee44ccce646f90a45bf9ccc567f0ace13dc2d53ebeedc81f58b2e41179dddf0d5a5c42f5d8506c1a5d2f8f59f3ea873cbcd0eec19acbf325423bd3dcb8c2b1bf1d1eaed0eba7f0698e4314fbeb2f1566d1b9253008cbccf45a2b0d9c5c9c21474f4076e02be26050b99dee4fd68a4cf890e496e4fcae7b70f94ea5a9062da0daeba1993d2ccd1dd3c244b8428801495a58b216547e7e847c46d1d756377b6242d2e5fb83bf752b54e0df71e889f3a2bb0f4c10805bf3c590376e3c24e22ff57f7fa965577375325cea5d920db94b9c336b455f6e894c01866fe9fbb8c8d3f70a2957285f6dfb5dcd8cbf54782f8fe7766d4723819913ac773421e3a31095866bad22c86a6036b2518b2059b4229d18c8c2ccbdf906c6cc6e82464ee57bddb0bebcb1dc645325bfb3e665ef7251082c88ebb1cf203bd779fdd38675713c8daadd17e1cabee432b09787b6ddf3304e38b731b45df5df51b78fcfb3d32466028d0ba36555e7e11ab0ee0666061d1645d962444bc47a38188930a84b4d561395c73c087021927ca638b7afc8a8679ccb84c26555440ec7f10445cd + +aes-256-xts:27182818284590452353602874713526624977572470936999595749669676273141592653589793238462643383279502884197169399375105820974944592:ff000000000000000000000000000000:000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f303132333435363738393a3b3c3d3e3f404142434445464748494a4b4c4d4e4f505152535455565758595a5b5c5d5e5f606162636465666768696a6b6c6d6e6f707172737475767778797a7b7c7d7e7f808182838485868788898a8b8c8d8e8f909192939495969798999a9b9c9d9e9fa0a1a2a3a4a5a6a7a8a9aaabacadaeafb0b1b2b3b4b5b6b7b8b9babbbcbdbebfc0c1c2c3c4c5c6c7c8c9cacbcccdcecfd0d1d2d3d4d5d6d7d8d9dadbdcdddedfe0e1e2e3e4e5e6e7e8e9eaebecedeeeff0f1f2f3f4f5f6f7f8f9fafbfcfdfeff000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f303132333435363738393a3b3c3d3e3f404142434445464748494a4b4c4d4e4f505152535455565758595a5b5c5d5e5f606162636465666768696a6b6c6d6e6f707172737475767778797a7b7c7d7e7f808182838485868788898a8b8c8d8e8f909192939495969798999a9b9c9d9e9fa0a1a2a3a4a5a6a7a8a9aaabacadaeafb0b1b2b3b4b5b6b7b8b9babbbcbdbebfc0c1c2c3c4c5c6c7c8c9cacbcccdcecfd0d1d2d3d4d5d6d7d8d9dadbdcdddedfe0e1e2e3e4e5e6e7e8e9eaebecedeeeff0f1f2f3f4f5f6f7f8f9fafbfcfdfeff:1c3b3a102f770386e4836c99e370cf9bea00803f5e482357a4ae12d414a3e63b5d31e276f8fe4a8d66b317f9ac683f44680a86ac35adfc3345befecb4bb188fd5776926c49a3095eb108fd1098baec70aaa66999a72a82f27d848b21d4a741b0c5cd4d5fff9dac89aeba122961d03a757123e9870f8acf1000020887891429ca2a3e7a7d7df7b10355165c8b9a6d0a7de8b062c4500dc4cd120c0f7418dae3d0b5781c34803fa75421c790dfe1de1834f280d7667b327f6c8cd7557e12ac3a0f93ec05c52e0493ef31a12d3d9260f79a289d6a379bc70c50841473d1a8cc81ec583e9645e07b8d9670655ba5bbcfecc6dc3966380ad8fecb17b6ba02469a020a84e18e8f84252070c13e9f1f289be54fbc481457778f616015e1327a02b140f1505eb309326d68378f8374595c849d84f4c333ec4423885143cb47bd71c5edae9be69a2ffeceb1bec9de244fbe15992b11b77c040f12bd8f6a975a44a0f90c29a9abc3d4d893927284c58754cce294529f8614dcd2aba991925fedc4ae74ffac6e333b93eb4aff0479da9a410e4450e0dd7ae4c6e2910900575da401fc07059f645e8b7e9bfdef33943054ff84011493c27b3429eaedb4ed5376441a77ed43851ad77f16f541dfd269d50d6a5f14fb0aab1cbb4c1550be97f7ab4066193c4caa773dad38014bd2092fa755c824bb5e54c4f36ffda9fcea70b9c6e693e148c151 +aes-256-xts:27182818284590452353602874713526624977572470936999595749669676273141592653589793238462643383279502884197169399375105820974944592:ffff0000000000000000000000000000:000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f303132333435363738393a3b3c3d3e3f404142434445464748494a4b4c4d4e4f505152535455565758595a5b5c5d5e5f606162636465666768696a6b6c6d6e6f707172737475767778797a7b7c7d7e7f808182838485868788898a8b8c8d8e8f909192939495969798999a9b9c9d9e9fa0a1a2a3a4a5a6a7a8a9aaabacadaeafb0b1b2b3b4b5b6b7b8b9babbbcbdbebfc0c1c2c3c4c5c6c7c8c9cacbcccdcecfd0d1d2d3d4d5d6d7d8d9dadbdcdddedfe0e1e2e3e4e5e6e7e8e9eaebecedeeeff0f1f2f3f4f5f6f7f8f9fafbfcfdfeff000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f303132333435363738393a3b3c3d3e3f404142434445464748494a4b4c4d4e4f505152535455565758595a5b5c5d5e5f606162636465666768696a6b6c6d6e6f707172737475767778797a7b7c7d7e7f808182838485868788898a8b8c8d8e8f909192939495969798999a9b9c9d9e9fa0a1a2a3a4a5a6a7a8a9aaabacadaeafb0b1b2b3b4b5b6b7b8b9babbbcbdbebfc0c1c2c3c4c5c6c7c8c9cacbcccdcecfd0d1d2d3d4d5d6d7d8d9dadbdcdddedfe0e1e2e3e4e5e6e7e8e9eaebecedeeeff0f1f2f3f4f5f6f7f8f9fafbfcfdfeff:77a31251618a15e6b92d1d66dffe7b50b50bad552305ba0217a610688eff7e11e1d0225438e093242d6db274fde801d4cae06f2092c728b2478559df58e837c2469ee4a4fa794e4bbc7f39bc026e3cb72c33b0888f25b4acf56a2a9804f1ce6d3d6e1dc6ca181d4b546179d55544aa7760c40d06741539c7e3cd9d2f6650b2013fd0eeb8c2b8e3d8d240ccae2d4c98320a7442e1c8d75a42d6e6cfa4c2eca1798d158c7aecdf82490f24bb9b38e108bcda12c3faf9a21141c3613b58367f922aaa26cd22f23d708dae699ad7cb40a8ad0b6e2784973dcb605684c08b8d6998c69aac049921871ebb65301a4619ca80ecb485a31d744223ce8ddc2394828d6a80470c092f5ba413c3378fa6054255c6f9df4495862bbb3287681f931b687c888abf844dfc8fc28331e579928cd12bd2390ae123cf03818d14dedde5c0c24c8ab018bfca75ca096f2d531f3d1619e785f1ada437cab92e980558b3dce1474afb75bfedbf8ff54cb2618e0244c9ac0d3c66fb51598cd2db11f9be39791abe447c63094f7c453b7ff87cb5bb36b7c79efb0872d17058b83b15ab0866ad8a58656c5a7e20dbdf308b2461d97c0ec0024a2715055249cf3b478ddd4740de654f75ca686e0d7345c69ed50cdc2a8b332b1f8824108ac937eb050585608ee734097fc09054fbff89eeaeea791f4a7ab1f9868294a4f9e27b42af8100cb9d59cef9645803 +aes-256-xts:27182818284590452353602874713526624977572470936999595749669676273141592653589793238462643383279502884197169399375105820974944592:ffffff00000000000000000000000000:000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f303132333435363738393a3b3c3d3e3f404142434445464748494a4b4c4d4e4f505152535455565758595a5b5c5d5e5f606162636465666768696a6b6c6d6e6f707172737475767778797a7b7c7d7e7f808182838485868788898a8b8c8d8e8f909192939495969798999a9b9c9d9e9fa0a1a2a3a4a5a6a7a8a9aaabacadaeafb0b1b2b3b4b5b6b7b8b9babbbcbdbebfc0c1c2c3c4c5c6c7c8c9cacbcccdcecfd0d1d2d3d4d5d6d7d8d9dadbdcdddedfe0e1e2e3e4e5e6e7e8e9eaebecedeeeff0f1f2f3f4f5f6f7f8f9fafbfcfdfeff000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f303132333435363738393a3b3c3d3e3f404142434445464748494a4b4c4d4e4f505152535455565758595a5b5c5d5e5f606162636465666768696a6b6c6d6e6f707172737475767778797a7b7c7d7e7f808182838485868788898a8b8c8d8e8f909192939495969798999a9b9c9d9e9fa0a1a2a3a4a5a6a7a8a9aaabacadaeafb0b1b2b3b4b5b6b7b8b9babbbcbdbebfc0c1c2c3c4c5c6c7c8c9cacbcccdcecfd0d1d2d3d4d5d6d7d8d9dadbdcdddedfe0e1e2e3e4e5e6e7e8e9eaebecedeeeff0f1f2f3f4f5f6f7f8f9fafbfcfdfeff:e387aaa58ba483afa7e8eb469778317ecf4cf573aa9d4eac23f2cdf914e4e200a8b490e42ee646802dc6ee2b471b278195d60918ececb44bf79966f83faba0499298ebc699c0c8634715a320bb4f075d622e74c8c932004f25b41e361025b5a87815391f6108fc4afa6a05d9303c6ba68a128a55705d415985832fdeaae6c8e19110e84d1b1f199a2692119edc96132658f09da7c623efcec712537a3d94c0bf5d7e352ec94ae5797fdb377dc1551150721adf15bd26a8efc2fcaad56881fa9e62462c28f30ae1ceaca93c345cf243b73f542e2074a705bd2643bb9f7cc79bb6e7091ea6e232df0f9ad0d6cf502327876d82207abf2115cdacf6d5a48f6c1879a65b115f0f8b3cb3c59d15dd8c769bc014795a1837f3901b5845eb491adfefe097b1fa30a12fc1f65ba22905031539971a10f2f36c321bb51331cdefb39e3964c7ef079994f5b69b2edd83a71ef549971ee93f44eac3938fcdd61d01fa71799da3a8091c4c48aa9ed263ff0749df95d44fef6a0bb578ec69456aa5408ae32c7af08ad7ba8921287e3bbee31b767be06a0e705c864a769137df28292283ea81a2480241b44d9921cdbec1bc28dc1fda114bd8e5217ac9d8ebafa720e9da4f9ace231cc949e5b96fe76ffc21063fddc83a6b8679c00d35e09576a875305bed5f36ed242c8900dd1fa965bc950dfce09b132263a1eef52dd6888c309f5a7d712826 +aes-256-xts:27182818284590452353602874713526624977572470936999595749669676273141592653589793238462643383279502884197169399375105820974944592:ffffffff000000000000000000000000:000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f303132333435363738393a3b3c3d3e3f404142434445464748494a4b4c4d4e4f505152535455565758595a5b5c5d5e5f606162636465666768696a6b6c6d6e6f707172737475767778797a7b7c7d7e7f808182838485868788898a8b8c8d8e8f909192939495969798999a9b9c9d9e9fa0a1a2a3a4a5a6a7a8a9aaabacadaeafb0b1b2b3b4b5b6b7b8b9babbbcbdbebfc0c1c2c3c4c5c6c7c8c9cacbcccdcecfd0d1d2d3d4d5d6d7d8d9dadbdcdddedfe0e1e2e3e4e5e6e7e8e9eaebecedeeeff0f1f2f3f4f5f6f7f8f9fafbfcfdfeff000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f303132333435363738393a3b3c3d3e3f404142434445464748494a4b4c4d4e4f505152535455565758595a5b5c5d5e5f606162636465666768696a6b6c6d6e6f707172737475767778797a7b7c7d7e7f808182838485868788898a8b8c8d8e8f909192939495969798999a9b9c9d9e9fa0a1a2a3a4a5a6a7a8a9aaabacadaeafb0b1b2b3b4b5b6b7b8b9babbbcbdbebfc0c1c2c3c4c5c6c7c8c9cacbcccdcecfd0d1d2d3d4d5d6d7d8d9dadbdcdddedfe0e1e2e3e4e5e6e7e8e9eaebecedeeeff0f1f2f3f4f5f6f7f8f9fafbfcfdfeff:bf53d2dade78e822a4d949a9bc6766b01b06a8ef70d26748c6a7fc36d80ae4c5520f7c4ab0ac8544424fa405162fef5a6b7f229498063618d39f0003cb5fb8d1c86b643497da1ff945c8d3bedeca4f479702a7a735f043ddb1d6aaade3c4a0ac7ca7f3fa5279bef56f82cd7a2f38672e824814e10700300a055e1630b8f1cb0e919f5e942010a416e2bf48cb46993d3cb6a51c19bacf864785a00bc2ecff15d350875b246ed53e68be6f55bd7e05cfc2b2ed6432198a6444b6d8c247fab941f569768b5c429366f1d3f00f0345b96123d56204c01c63b22ce78baf116e525ed90fdea39fa469494d3866c31e05f295ff21fea8d4e6e13d67e47ce722e9698a1c1048d68ebcde76b86fcf976eab8aa9790268b7068e017a8b9b749409514f1053027fd16c3786ea1bac5f15cb79711ee2abe82f5cf8b13ae73030ef5b9e4457e75d1304f988d62dd6fc4b94ed38ba831da4b7634971b6cd8ec325d9c61c00f1df73627ed3745a5e8489f3a95c69639c32cd6e1d537a85f75cc844726e8a72fc0077ad22000f1d5078f6b866318c668f1ad03d5a5fced5219f2eabbd0aa5c0f460d183f04404a0d6f469558e81fab24a167905ab4c7878502ad3e38fdbe62a41556cec37325759533ce8f25f367c87bb5578d667ae93f9e2fd99bcbc5f2fbba88cf6516139420fcff3b7361d86322c4bd84c82f335abb152c4a93411373aaa8220 +aes-256-xts:27182818284590452353602874713526624977572470936999595749669676273141592653589793238462643383279502884197169399375105820974944592:ffffffffff0000000000000000000000:000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f303132333435363738393a3b3c3d3e3f404142434445464748494a4b4c4d4e4f505152535455565758595a5b5c5d5e5f606162636465666768696a6b6c6d6e6f707172737475767778797a7b7c7d7e7f808182838485868788898a8b8c8d8e8f909192939495969798999a9b9c9d9e9fa0a1a2a3a4a5a6a7a8a9aaabacadaeafb0b1b2b3b4b5b6b7b8b9babbbcbdbebfc0c1c2c3c4c5c6c7c8c9cacbcccdcecfd0d1d2d3d4d5d6d7d8d9dadbdcdddedfe0e1e2e3e4e5e6e7e8e9eaebecedeeeff0f1f2f3f4f5f6f7f8f9fafbfcfdfeff000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f303132333435363738393a3b3c3d3e3f404142434445464748494a4b4c4d4e4f505152535455565758595a5b5c5d5e5f606162636465666768696a6b6c6d6e6f707172737475767778797a7b7c7d7e7f808182838485868788898a8b8c8d8e8f909192939495969798999a9b9c9d9e9fa0a1a2a3a4a5a6a7a8a9aaabacadaeafb0b1b2b3b4b5b6b7b8b9babbbcbdbebfc0c1c2c3c4c5c6c7c8c9cacbcccdcecfd0d1d2d3d4d5d6d7d8d9dadbdcdddedfe0e1e2e3e4e5e6e7e8e9eaebecedeeeff0f1f2f3f4f5f6f7f8f9fafbfcfdfeff:64497e5a831e4a932c09be3e5393376daa599548b816031d224bbf50a818ed2350eae7e96087c8a0db51ad290bd00c1ac1620857635bf246c176ab463be30b808da548081ac847b158e1264be25bb0910bbc92647108089415d45fab1b3d2604e8a8eff1ae4020cfa39936b66827b23f371b92200be90251e6d73c5f86de5fd4a950781933d79a28272b782a2ec313efdfcc0628f43d744c2dc2ff3dcb66999b50c7ca895b0c64791eeaa5f29499fb1c026f84ce5b5c72ba1083cddb5ce45434631665c333b60b11593fb253c5179a2c8db813782a004856a1653011e93fb6d876c18366dd8683f53412c0c180f9c848592d593f8609ca736317d356e13e2bff3a9f59cd9aeb19cd482593d8c46128bb32423b37a9adfb482b99453fbe25a41bf6feb4aa0bef5ed24bf73c762978025482c13115e4015aac992e5613a3b5c2f685b84795cb6e9b2656d8c88157e52c42f978d8634c43d06fea928f2822e465aa6576e9bf419384506cc3ce3c54ac1a6f67dc66f3b30191e698380bc999b05abce19dc0c6dcc2dd001ec535ba18deb2df1a101023108318c75dc98611a09dc48a0acdec676fabdf222f07e026f059b672b56e5cbc8e1d21bbd867dd927212054681d70ea737134cdfce93b6f82ae22423274e58a0821cc5502e2d0ab4585e94de6975be5e0b4efce51cd3e70c25a1fbbbd609d273ad5b0d59631c531f6a0a57b9 + +aes-128-xts:fffefdfcfbfaf9f8f7f6f5f4f3f2f1f0bfbebdbcbbbab9b8b7b6b5b4b3b2b1b0:9a785634120000000000000000000000:000102030405060708090a0b0c0d0e0f10:6c1625db4671522d3d7599601de7ca09ed +aes-128-xts:fffefdfcfbfaf9f8f7f6f5f4f3f2f1f0bfbebdbcbbbab9b8b7b6b5b4b3b2b1b0:9a785634120000000000000000000000:000102030405060708090a0b0c0d0e0f1011:d069444b7a7e0cab09e24447d24deb1fedbf +aes-128-xts:fffefdfcfbfaf9f8f7f6f5f4f3f2f1f0bfbebdbcbbbab9b8b7b6b5b4b3b2b1b0:9a785634120000000000000000000000:000102030405060708090a0b0c0d0e0f101112:e5df1351c0544ba1350b3363cd8ef4beedbf9d +aes-128-xts:fffefdfcfbfaf9f8f7f6f5f4f3f2f1f0bfbebdbcbbbab9b8b7b6b5b4b3b2b1b0:9a785634120000000000000000000000:000102030405060708090a0b0c0d0e0f10111213:9d84c813f719aa2c7be3f66171c7c5c2edbf9dac +aes-128-xts:e0e1e2e3e4e5e6e7e8e9eaebecedeeefc0c1c2c3c4c5c6c7c8c9cacbcccdcecf:21436587a90000000000000000000000:000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f303132333435363738393a3b3c3d3e3f404142434445464748494a4b4c4d4e4f505152535455565758595a5b5c5d5e5f606162636465666768696a6b6c6d6e6f707172737475767778797a7b7c7d7e7f808182838485868788898a8b8c8d8e8f909192939495969798999a9b9c9d9e9fa0a1a2a3a4a5a6a7a8a9aaabacadaeafb0b1b2b3b4b5b6b7b8b9babbbcbdbebfc0c1c2c3c4c5c6c7c8c9cacbcccdcecfd0d1d2d3d4d5d6d7d8d9dadbdcdddedfe0e1e2e3e4e5e6e7e8e9eaebecedeeeff0f1f2f3f4f5f6f7f8f9fafbfcfdfeff000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f303132333435363738393a3b3c3d3e3f404142434445464748494a4b4c4d4e4f505152535455565758595a5b5c5d5e5f606162636465666768696a6b6c6d6e6f707172737475767778797a7b7c7d7e7f808182838485868788898a8b8c8d8e8f909192939495969798999a9b9c9d9e9fa0a1a2a3a4a5a6a7a8a9aaabacadaeafb0b1b2b3b4b5b6b7b8b9babbbcbdbebfc0c1c2c3c4c5c6c7c8c9cacbcccdcecfd0d1d2d3d4d5d6d7d8d9dadbdcdddedfe0e1e2e3e4e5e6e7e8e9eaebecedeeeff0f1f2f3f4f5f6f7f8f9fafbfcfdfeff:38b45812ef43a05bd957e545907e223b954ab4aaf088303ad910eadf14b42be68b2461149d8c8ba85f992be970bc621f1b06573f63e867bf5875acafa04e42ccbd7bd3c2a0fb1fff791ec5ec36c66ae4ac1e806d81fbf709dbe29e471fad38549c8e66f5345d7c1eb94f405d1ec785cc6f6a68f6254dd8339f9d84057e01a17741990482999516b5611a38f41bb6478e6f173f320805dd71b1932fc333cb9ee39936beea9ad96fa10fb4112b901734ddad40bc1878995f8e11aee7d141a2f5d48b7a4e1e7f0b2c04830e69a4fd1378411c2f287edf48c6c4e5c247a19680f7fe41cefbd49b582106e3616cbbe4dfb2344b2ae9519391f3e0fb4922254b1d6d2d19c6d4d537b3a26f3bcc51588b32f3eca0829b6a5ac72578fb814fb43cf80d64a233e3f997a3f02683342f2b33d25b492536b93becb2f5e1a8b82f5b883342729e8ae09d16938841a21a97fb543eea3bbff59f13c1a18449e398701c1ad51648346cbc04c27bb2da3b93a1372ccae548fb53bee476f9e9c91773b1bb19828394d55d3e1a20ed69113a860b6829ffa847224604435070221b257e8dff783615d2cae4803a93aa4334ab482a0afac9c0aeda70b45a481df5dec5df8cc0f423c77a5fd46cd312021d4b438862419a791be03bb4d97c0e59578542531ba466a83baf92cefc151b5cc1611a167893819b63fb8a6b18e86de60290fa72b797b0ce59f3 +# AES wrap tests from RFC3394 +id-aes128-wrap:000102030405060708090A0B0C0D0E0F::00112233445566778899AABBCCDDEEFF:1FA68B0A8112B447AEF34BD8FB5A7B829D3E862371D2CFE5 +id-aes192-wrap:000102030405060708090A0B0C0D0E0F1011121314151617::00112233445566778899AABBCCDDEEFF:96778B25AE6CA435F92B5B97C050AED2468AB8A17AD84E5D +id-aes256-wrap:000102030405060708090A0B0C0D0E0F101112131415161718191A1B1C1D1E1F::00112233445566778899AABBCCDDEEFF:64E8C3F9CE0F5BA263E9777905818A2A93C8191E7D6E8AE7 +id-aes192-wrap:000102030405060708090A0B0C0D0E0F1011121314151617::00112233445566778899AABBCCDDEEFF0001020304050607:031D33264E15D33268F24EC260743EDCE1C6C7DDEE725A936BA814915C6762D2 +id-aes256-wrap:000102030405060708090A0B0C0D0E0F101112131415161718191A1B1C1D1E1F::00112233445566778899AABBCCDDEEFF0001020304050607:A8F9BC1612C68B3FF6E6F4FBE30E71E4769C8B80A32CB8958CD5D17D6B254DA1 +id-aes256-wrap:000102030405060708090A0B0C0D0E0F101112131415161718191A1B1C1D1E1F::00112233445566778899AABBCCDDEEFF000102030405060708090A0B0C0D0E0F:28C9F404C4B810F4CBCCB35CFB87F8263F5786E2D80ED326CBC7F0E71A99F43BFB988B9B7A02DD21 diff --git a/deps/openssl/openssl/crypto/evp/m_dss.c b/deps/openssl/openssl/crypto/evp/m_dss.c index f22ba522c2f28b..147844862d171c 100644 --- a/deps/openssl/openssl/crypto/evp/m_dss.c +++ b/deps/openssl/openssl/crypto/evp/m_dss.c @@ -66,7 +66,6 @@ #endif #ifndef OPENSSL_NO_SHA -# ifndef OPENSSL_FIPS static int init(EVP_MD_CTX *ctx) { @@ -102,5 +101,4 @@ const EVP_MD *EVP_dss(void) { return (&dsa_md); } -# endif #endif diff --git a/deps/openssl/openssl/crypto/evp/m_dss1.c b/deps/openssl/openssl/crypto/evp/m_dss1.c index 976148e436a0c0..e36fabff700ec9 100644 --- a/deps/openssl/openssl/crypto/evp/m_dss1.c +++ b/deps/openssl/openssl/crypto/evp/m_dss1.c @@ -68,8 +68,6 @@ # include # endif -# ifndef OPENSSL_FIPS - static int init(EVP_MD_CTX *ctx) { return SHA1_Init(ctx->md_data); @@ -104,5 +102,4 @@ const EVP_MD *EVP_dss1(void) { return (&dss1_md); } -# endif #endif diff --git a/deps/openssl/openssl/crypto/evp/m_ecdsa.c b/deps/openssl/openssl/crypto/evp/m_ecdsa.c index d11a13a6d7a702..803d3149557757 100644 --- a/deps/openssl/openssl/crypto/evp/m_ecdsa.c +++ b/deps/openssl/openssl/crypto/evp/m_ecdsa.c @@ -116,7 +116,6 @@ #include #ifndef OPENSSL_NO_SHA -# ifndef OPENSSL_FIPS static int init(EVP_MD_CTX *ctx) { @@ -152,5 +151,4 @@ const EVP_MD *EVP_ecdsa(void) { return (&ecdsa_md); } -# endif #endif diff --git a/deps/openssl/openssl/crypto/evp/m_sha1.c b/deps/openssl/openssl/crypto/evp/m_sha1.c index 0cc63555ad48cc..a74e6b77948e42 100644 --- a/deps/openssl/openssl/crypto/evp/m_sha1.c +++ b/deps/openssl/openssl/crypto/evp/m_sha1.c @@ -59,16 +59,14 @@ #include #include "cryptlib.h" -#ifndef OPENSSL_FIPS +#ifndef OPENSSL_NO_SHA -# ifndef OPENSSL_NO_SHA - -# include -# include -# include -# ifndef OPENSSL_NO_RSA -# include -# endif +# include +# include +# include +# ifndef OPENSSL_NO_RSA +# include +# endif static int init(EVP_MD_CTX *ctx) { @@ -104,9 +102,9 @@ const EVP_MD *EVP_sha1(void) { return (&sha1_md); } -# endif +#endif -# ifndef OPENSSL_NO_SHA256 +#ifndef OPENSSL_NO_SHA256 static int init224(EVP_MD_CTX *ctx) { return SHA224_Init(ctx->md_data); @@ -171,9 +169,9 @@ const EVP_MD *EVP_sha256(void) { return (&sha256_md); } -# endif /* ifndef OPENSSL_NO_SHA256 */ +#endif /* ifndef OPENSSL_NO_SHA256 */ -# ifndef OPENSSL_NO_SHA512 +#ifndef OPENSSL_NO_SHA512 static int init384(EVP_MD_CTX *ctx) { return SHA384_Init(ctx->md_data); @@ -234,6 +232,4 @@ const EVP_MD *EVP_sha512(void) { return (&sha512_md); } -# endif /* ifndef OPENSSL_NO_SHA512 */ - -#endif +#endif /* ifndef OPENSSL_NO_SHA512 */ diff --git a/deps/openssl/openssl/crypto/evp/m_sigver.c b/deps/openssl/openssl/crypto/evp/m_sigver.c index e153a1833e075f..4492d207f28e5e 100644 --- a/deps/openssl/openssl/crypto/evp/m_sigver.c +++ b/deps/openssl/openssl/crypto/evp/m_sigver.c @@ -73,15 +73,18 @@ static int do_sigver_init(EVP_MD_CTX *ctx, EVP_PKEY_CTX **pctx, if (ctx->pctx == NULL) return 0; - if (type == NULL) { - int def_nid; - if (EVP_PKEY_get_default_digest_nid(pkey, &def_nid) > 0) - type = EVP_get_digestbynid(def_nid); - } + if (!(ctx->pctx->pmeth->flags & EVP_PKEY_FLAG_SIGCTX_CUSTOM)) { - if (type == NULL) { - EVPerr(EVP_F_DO_SIGVER_INIT, EVP_R_NO_DEFAULT_DIGEST); - return 0; + if (type == NULL) { + int def_nid; + if (EVP_PKEY_get_default_digest_nid(pkey, &def_nid) > 0) + type = EVP_get_digestbynid(def_nid); + } + + if (type == NULL) { + EVPerr(EVP_F_DO_SIGVER_INIT, EVP_R_NO_DEFAULT_DIGEST); + return 0; + } } if (ver) { @@ -103,6 +106,8 @@ static int do_sigver_init(EVP_MD_CTX *ctx, EVP_PKEY_CTX **pctx, return 0; if (pctx) *pctx = ctx->pctx; + if (ctx->pctx->pmeth->flags & EVP_PKEY_FLAG_SIGCTX_CUSTOM) + return 1; if (!EVP_DigestInit_ex(ctx, type, e)) return 0; return 1; @@ -124,7 +129,19 @@ int EVP_DigestSignFinal(EVP_MD_CTX *ctx, unsigned char *sigret, size_t *siglen) { int sctx, r = 0; - if (ctx->pctx->pmeth->signctx) + EVP_PKEY_CTX *pctx = ctx->pctx; + if (pctx->pmeth->flags & EVP_PKEY_FLAG_SIGCTX_CUSTOM) { + EVP_PKEY_CTX *dctx; + if (!sigret) + return pctx->pmeth->signctx(pctx, sigret, siglen, ctx); + dctx = EVP_PKEY_CTX_dup(ctx->pctx); + if (!dctx) + return 0; + r = dctx->pmeth->signctx(dctx, sigret, siglen, ctx); + EVP_PKEY_CTX_free(dctx); + return r; + } + if (pctx->pmeth->signctx) sctx = 1; else sctx = 0; @@ -147,20 +164,19 @@ int EVP_DigestSignFinal(EVP_MD_CTX *ctx, unsigned char *sigret, return 0; } else { if (sctx) { - if (ctx->pctx->pmeth->signctx(ctx->pctx, sigret, siglen, ctx) <= - 0) + if (pctx->pmeth->signctx(pctx, sigret, siglen, ctx) <= 0) return 0; } else { int s = EVP_MD_size(ctx->digest); - if (s < 0 - || EVP_PKEY_sign(ctx->pctx, sigret, siglen, NULL, s) <= 0) + if (s < 0 || EVP_PKEY_sign(pctx, sigret, siglen, NULL, s) <= 0) return 0; } } return 1; } -int EVP_DigestVerifyFinal(EVP_MD_CTX *ctx, unsigned char *sig, size_t siglen) +int EVP_DigestVerifyFinal(EVP_MD_CTX *ctx, const unsigned char *sig, + size_t siglen) { EVP_MD_CTX tmp_ctx; unsigned char md[EVP_MAX_MD_SIZE]; diff --git a/deps/openssl/openssl/crypto/evp/p_lib.c b/deps/openssl/openssl/crypto/evp/p_lib.c index 2b84dc75ec3a19..1171d3086d0b2d 100644 --- a/deps/openssl/openssl/crypto/evp/p_lib.c +++ b/deps/openssl/openssl/crypto/evp/p_lib.c @@ -337,7 +337,7 @@ int EVP_PKEY_set1_DH(EVP_PKEY *pkey, DH *key) DH *EVP_PKEY_get1_DH(EVP_PKEY *pkey) { - if (pkey->type != EVP_PKEY_DH) { + if (pkey->type != EVP_PKEY_DH && pkey->type != EVP_PKEY_DHX) { EVPerr(EVP_F_EVP_PKEY_GET1_DH, EVP_R_EXPECTING_A_DH_KEY); return NULL; } diff --git a/deps/openssl/openssl/crypto/evp/pmeth_lib.c b/deps/openssl/openssl/crypto/evp/pmeth_lib.c index ae8bccb0c6eb8f..9f81d10021a065 100644 --- a/deps/openssl/openssl/crypto/evp/pmeth_lib.c +++ b/deps/openssl/openssl/crypto/evp/pmeth_lib.c @@ -75,6 +75,7 @@ STACK_OF(EVP_PKEY_METHOD) *app_pkey_methods = NULL; extern const EVP_PKEY_METHOD rsa_pkey_meth, dh_pkey_meth, dsa_pkey_meth; extern const EVP_PKEY_METHOD ec_pkey_meth, hmac_pkey_meth, cmac_pkey_meth; +extern const EVP_PKEY_METHOD dhx_pkey_meth; static const EVP_PKEY_METHOD *standard_methods[] = { #ifndef OPENSSL_NO_RSA @@ -90,7 +91,10 @@ static const EVP_PKEY_METHOD *standard_methods[] = { &ec_pkey_meth, #endif &hmac_pkey_meth, - &cmac_pkey_meth + &cmac_pkey_meth, +#ifndef OPENSSL_NO_DH + &dhx_pkey_meth +#endif }; DECLARE_OBJ_BSEARCH_CMP_FN(const EVP_PKEY_METHOD *, const EVP_PKEY_METHOD *, diff --git a/deps/openssl/openssl/crypto/hmac/hm_ameth.c b/deps/openssl/openssl/crypto/hmac/hm_ameth.c index 641c797ef1d5c0..29b2b5dffcf7e4 100644 --- a/deps/openssl/openssl/crypto/hmac/hm_ameth.c +++ b/deps/openssl/openssl/crypto/hmac/hm_ameth.c @@ -87,7 +87,7 @@ static int hmac_pkey_ctrl(EVP_PKEY *pkey, int op, long arg1, void *arg2) { switch (op) { case ASN1_PKEY_CTRL_DEFAULT_MD_NID: - *(int *)arg2 = NID_sha1; + *(int *)arg2 = NID_sha256; return 1; default: diff --git a/deps/openssl/openssl/crypto/hmac/hmac.c b/deps/openssl/openssl/crypto/hmac/hmac.c index 54778165510e97..1fc9e2c3fa93ee 100644 --- a/deps/openssl/openssl/crypto/hmac/hmac.c +++ b/deps/openssl/openssl/crypto/hmac/hmac.c @@ -72,6 +72,16 @@ int HMAC_Init_ex(HMAC_CTX *ctx, const void *key, int len, unsigned char pad[HMAC_MAX_MD_CBLOCK]; #ifdef OPENSSL_FIPS + /* If FIPS mode switch to approved implementation if possible */ + if (FIPS_mode()) { + const EVP_MD *fipsmd; + if (md) { + fipsmd = FIPS_get_digestbynid(EVP_MD_type(md)); + if (fipsmd) + md = fipsmd; + } + } + if (FIPS_mode()) { /* If we have an ENGINE need to allow non FIPS */ if ((impl || ctx->i_ctx.engine) diff --git a/deps/openssl/openssl/crypto/install-crypto.com b/deps/openssl/openssl/crypto/install-crypto.com index d19081d4df094d..af1d75b526f031 100755 --- a/deps/openssl/openssl/crypto/install-crypto.com +++ b/deps/openssl/openssl/crypto/install-crypto.com @@ -81,7 +81,7 @@ $ sdirs := , - buffer, bio, stack, lhash, rand, err, - evp, asn1, pem, x509, x509v3, conf, txt_db, pkcs7, pkcs12, comp, ocsp, - ui, krb5, - - cms, pqueue, ts, jpake, srp, store, cmac + store, cms, pqueue, ts, jpake $! $ exheader_ := crypto.h, opensslv.h, ebcdic.h, symhacks.h, ossl_typ.h $ exheader_'archd' := opensslconf.h @@ -139,9 +139,6 @@ $ exheader_cms := cms.h $ exheader_pqueue := pqueue.h $ exheader_ts := ts.h $ exheader_jpake := jpake.h -$ exheader_srp := srp.h -$ exheader_store := store.h -$ exheader_cmac := cmac.h $ libs := ssl_libcrypto $! $ exe_dir := [-.'archd'.exe.crypto] diff --git a/deps/openssl/openssl/crypto/jpake/jpake.c b/deps/openssl/openssl/crypto/jpake/jpake.c index ed2e888eb4c0b7..8c38727e20fd31 100644 --- a/deps/openssl/openssl/crypto/jpake/jpake.c +++ b/deps/openssl/openssl/crypto/jpake/jpake.c @@ -4,6 +4,7 @@ #include #include #include +#include /* * In the definition, (xa, xb, xc, xd) are Alice's (x1, x2, x3, x4) or diff --git a/deps/openssl/openssl/crypto/md32_common.h b/deps/openssl/openssl/crypto/md32_common.h index c1efb45f1c31de..96828d2693a1d2 100644 --- a/deps/openssl/openssl/crypto/md32_common.h +++ b/deps/openssl/openssl/crypto/md32_common.h @@ -142,8 +142,10 @@ */ #undef ROTATE #ifndef PEDANTIC -# if defined(_MSC_VER) || defined(__ICC) +# if defined(_MSC_VER) # define ROTATE(a,n) _lrotl(a,n) +# elif defined(__ICC) +# define ROTATE(a,n) _rotl(a,n) # elif defined(__MWERKS__) # if defined(__POWERPC__) # define ROTATE(a,n) __rlwinm(a,n,0,31) @@ -213,12 +215,30 @@ asm ("bswapl %0":"=r"(r):"0"(r)); \ *((unsigned int *)(c))=r; (c)+=4; r; }) # endif +# elif defined(__aarch64__) +# if defined(__BYTE_ORDER__) +# if defined(__ORDER_LITTLE_ENDIAN__) && __BYTE_ORDER__==__ORDER_LITTLE_ENDIAN__ +# define HOST_c2l(c,l) ({ unsigned int r; \ + asm ("rev %w0,%w1" \ + :"=r"(r) \ + :"r"(*((const unsigned int *)(c))));\ + (c)+=4; (l)=r; }) +# define HOST_l2c(l,c) ({ unsigned int r; \ + asm ("rev %w0,%w1" \ + :"=r"(r) \ + :"r"((unsigned int)(l)));\ + *((unsigned int *)(c))=r; (c)+=4; r; }) +# elif defined(__ORDER_BIG_ENDIAN__) && __BYTE_ORDER__==__ORDER_BIG_ENDIAN__ +# define HOST_c2l(c,l) ((l)=*((const unsigned int *)(c)), (c)+=4, (l)) +# define HOST_l2c(l,c) (*((unsigned int *)(c))=(l), (c)+=4, (l)) +# endif +# endif # endif # endif -# endif -# if defined(__s390__) || defined(__s390x__) -# define HOST_c2l(c,l) ((l)=*((const unsigned int *)(c)), (c)+=4, (l)) -# define HOST_l2c(l,c) (*((unsigned int *)(c))=(l), (c)+=4, (l)) +# if defined(__s390__) || defined(__s390x__) +# define HOST_c2l(c,l) ((l)=*((const unsigned int *)(c)), (c)+=4, (l)) +# define HOST_l2c(l,c) (*((unsigned int *)(c))=(l), (c)+=4, (l)) +# endif # endif # ifndef HOST_c2l @@ -248,12 +268,12 @@ (c)+=4; (l); }) # endif # endif -# endif -# if defined(__i386) || defined(__i386__) || defined(__x86_64) || defined(__x86_64__) -# ifndef B_ENDIAN - /* See comment in DATA_ORDER_IS_BIG_ENDIAN section. */ -# define HOST_c2l(c,l) ((l)=*((const unsigned int *)(c)), (c)+=4, l) -# define HOST_l2c(l,c) (*((unsigned int *)(c))=(l), (c)+=4, l) +# if defined(__i386) || defined(__i386__) || defined(__x86_64) || defined(__x86_64__) +# ifndef B_ENDIAN + /* See comment in DATA_ORDER_IS_BIG_ENDIAN section. */ +# define HOST_c2l(c,l) ((l)=*((const unsigned int *)(c)), (c)+=4, l) +# define HOST_l2c(l,c) (*((unsigned int *)(c))=(l), (c)+=4, l) +# endif # endif # endif diff --git a/deps/openssl/openssl/crypto/md5/Makefile b/deps/openssl/openssl/crypto/md5/Makefile index b9e2ce9a386af7..390e5f1c7dc7f1 100644 --- a/deps/openssl/openssl/crypto/md5/Makefile +++ b/deps/openssl/openssl/crypto/md5/Makefile @@ -52,6 +52,9 @@ md5-ia64.s: asm/md5-ia64.S $(CC) $(CFLAGS) -E asm/md5-ia64.S | \ $(PERL) -ne 's/;\s+/;\n/g; print;' > $@ +md5-sparcv9.S: asm/md5-sparcv9.pl + $(PERL) asm/md5-sparcv9.pl $@ $(CFLAGS) + files: $(PERL) $(TOP)/util/files.pl Makefile >> $(TOP)/MINFO diff --git a/deps/openssl/openssl/crypto/md5/asm/md5-sparcv9.pl b/deps/openssl/openssl/crypto/md5/asm/md5-sparcv9.pl new file mode 100644 index 00000000000000..407da3c1b0df24 --- /dev/null +++ b/deps/openssl/openssl/crypto/md5/asm/md5-sparcv9.pl @@ -0,0 +1,430 @@ +#!/usr/bin/env perl + +# ==================================================================== +# Written by Andy Polyakov for the OpenSSL +# project. The module is, however, dual licensed under OpenSSL and +# CRYPTOGAMS licenses depending on where you obtain it. For further +# details see http://www.openssl.org/~appro/cryptogams/. +# +# Hardware SPARC T4 support by David S. Miller . +# ==================================================================== + +# MD5 for SPARCv9, 6.9 cycles per byte on UltraSPARC, >40% faster than +# code generated by Sun C 5.2. + +# SPARC T4 MD5 hardware achieves 3.20 cycles per byte, which is 2.1x +# faster than software. Multi-process benchmark saturates at 12x +# single-process result on 8-core processor, or ~11GBps per 2.85GHz +# socket. + +$output=shift; +open STDOUT,">$output"; + +use integer; + +($ctx,$inp,$len)=("%i0","%i1","%i2"); # input arguments + +# 64-bit values +@X=("%o0","%o1","%o2","%o3","%o4","%o5","%o7","%g1","%g2"); +$tx="%g3"; +($AB,$CD)=("%g4","%g5"); + +# 32-bit values +@V=($A,$B,$C,$D)=map("%l$_",(0..3)); +($t1,$t2,$t3,$saved_asi)=map("%l$_",(4..7)); +($shr,$shl1,$shl2)=("%i3","%i4","%i5"); + +my @K=( 0xd76aa478,0xe8c7b756,0x242070db,0xc1bdceee, + 0xf57c0faf,0x4787c62a,0xa8304613,0xfd469501, + 0x698098d8,0x8b44f7af,0xffff5bb1,0x895cd7be, + 0x6b901122,0xfd987193,0xa679438e,0x49b40821, + + 0xf61e2562,0xc040b340,0x265e5a51,0xe9b6c7aa, + 0xd62f105d,0x02441453,0xd8a1e681,0xe7d3fbc8, + 0x21e1cde6,0xc33707d6,0xf4d50d87,0x455a14ed, + 0xa9e3e905,0xfcefa3f8,0x676f02d9,0x8d2a4c8a, + + 0xfffa3942,0x8771f681,0x6d9d6122,0xfde5380c, + 0xa4beea44,0x4bdecfa9,0xf6bb4b60,0xbebfbc70, + 0x289b7ec6,0xeaa127fa,0xd4ef3085,0x04881d05, + 0xd9d4d039,0xe6db99e5,0x1fa27cf8,0xc4ac5665, + + 0xf4292244,0x432aff97,0xab9423a7,0xfc93a039, + 0x655b59c3,0x8f0ccc92,0xffeff47d,0x85845dd1, + 0x6fa87e4f,0xfe2ce6e0,0xa3014314,0x4e0811a1, + 0xf7537e82,0xbd3af235,0x2ad7d2bb,0xeb86d391, 0 ); + +sub R0 { + my ($i,$a,$b,$c,$d) = @_; + my $rot = (7,12,17,22)[$i%4]; + my $j = ($i+1)/2; + + if ($i&1) { + $code.=<<___; + srlx @X[$j],$shr,@X[$j] ! align X[`$i+1`] + and $b,$t1,$t1 ! round $i + sllx @X[$j+1],$shl1,$tx + add $t2,$a,$a + sllx $tx,$shl2,$tx + xor $d,$t1,$t1 + or $tx,@X[$j],@X[$j] + sethi %hi(@K[$i+1]),$t2 + add $t1,$a,$a + or $t2,%lo(@K[$i+1]),$t2 + sll $a,$rot,$t3 + add @X[$j],$t2,$t2 ! X[`$i+1`]+K[`$i+1`] + srl $a,32-$rot,$a + add $b,$t3,$t3 + xor $b,$c,$t1 + add $t3,$a,$a +___ + } else { + $code.=<<___; + srlx @X[$j],32,$tx ! extract X[`2*$j+1`] + and $b,$t1,$t1 ! round $i + add $t2,$a,$a + xor $d,$t1,$t1 + sethi %hi(@K[$i+1]),$t2 + add $t1,$a,$a + or $t2,%lo(@K[$i+1]),$t2 + sll $a,$rot,$t3 + add $tx,$t2,$t2 ! X[`2*$j+1`]+K[`$i+1`] + srl $a,32-$rot,$a + add $b,$t3,$t3 + xor $b,$c,$t1 + add $t3,$a,$a +___ + } +} + +sub R0_1 { + my ($i,$a,$b,$c,$d) = @_; + my $rot = (7,12,17,22)[$i%4]; + +$code.=<<___; + srlx @X[0],32,$tx ! extract X[1] + and $b,$t1,$t1 ! round $i + add $t2,$a,$a + xor $d,$t1,$t1 + sethi %hi(@K[$i+1]),$t2 + add $t1,$a,$a + or $t2,%lo(@K[$i+1]),$t2 + sll $a,$rot,$t3 + add $tx,$t2,$t2 ! X[1]+K[`$i+1`] + srl $a,32-$rot,$a + add $b,$t3,$t3 + andn $b,$c,$t1 + add $t3,$a,$a +___ +} + +sub R1 { + my ($i,$a,$b,$c,$d) = @_; + my $rot = (5,9,14,20)[$i%4]; + my $j = $i<31 ? (1+5*($i+1))%16 : (5+3*($i+1))%16; + my $xi = @X[$j/2]; + +$code.=<<___ if ($j&1 && ($xi=$tx)); + srlx @X[$j/2],32,$xi ! extract X[$j] +___ +$code.=<<___; + and $b,$d,$t3 ! round $i + add $t2,$a,$a + or $t3,$t1,$t1 + sethi %hi(@K[$i+1]),$t2 + add $t1,$a,$a + or $t2,%lo(@K[$i+1]),$t2 + sll $a,$rot,$t3 + add $xi,$t2,$t2 ! X[$j]+K[`$i+1`] + srl $a,32-$rot,$a + add $b,$t3,$t3 + `$i<31?"andn":"xor"` $b,$c,$t1 + add $t3,$a,$a +___ +} + +sub R2 { + my ($i,$a,$b,$c,$d) = @_; + my $rot = (4,11,16,23)[$i%4]; + my $j = $i<47 ? (5+3*($i+1))%16 : (0+7*($i+1))%16; + my $xi = @X[$j/2]; + +$code.=<<___ if ($j&1 && ($xi=$tx)); + srlx @X[$j/2],32,$xi ! extract X[$j] +___ +$code.=<<___; + add $t2,$a,$a ! round $i + xor $b,$t1,$t1 + sethi %hi(@K[$i+1]),$t2 + add $t1,$a,$a + or $t2,%lo(@K[$i+1]),$t2 + sll $a,$rot,$t3 + add $xi,$t2,$t2 ! X[$j]+K[`$i+1`] + srl $a,32-$rot,$a + add $b,$t3,$t3 + xor $b,$c,$t1 + add $t3,$a,$a +___ +} + +sub R3 { + my ($i,$a,$b,$c,$d) = @_; + my $rot = (6,10,15,21)[$i%4]; + my $j = (0+7*($i+1))%16; + my $xi = @X[$j/2]; + +$code.=<<___; + add $t2,$a,$a ! round $i +___ +$code.=<<___ if ($j&1 && ($xi=$tx)); + srlx @X[$j/2],32,$xi ! extract X[$j] +___ +$code.=<<___; + orn $b,$d,$t1 + sethi %hi(@K[$i+1]),$t2 + xor $c,$t1,$t1 + or $t2,%lo(@K[$i+1]),$t2 + add $t1,$a,$a + sll $a,$rot,$t3 + add $xi,$t2,$t2 ! X[$j]+K[`$i+1`] + srl $a,32-$rot,$a + add $b,$t3,$t3 + add $t3,$a,$a +___ +} + +$code.=<<___; +#include "sparc_arch.h" + +#ifdef __arch64__ +.register %g2,#scratch +.register %g3,#scratch +#endif + +.section ".text",#alloc,#execinstr + +#ifdef __PIC__ +SPARC_PIC_THUNK(%g1) +#endif + +.globl md5_block_asm_data_order +.align 32 +md5_block_asm_data_order: + SPARC_LOAD_ADDRESS_LEAF(OPENSSL_sparcv9cap_P,%g1,%g5) + ld [%g1+4],%g1 ! OPENSSL_sparcv9cap_P[1] + + andcc %g1, CFR_MD5, %g0 + be .Lsoftware + nop + + mov 4, %g1 + andcc %o1, 0x7, %g0 + lda [%o0 + %g0]0x88, %f0 ! load context + lda [%o0 + %g1]0x88, %f1 + add %o0, 8, %o0 + lda [%o0 + %g0]0x88, %f2 + lda [%o0 + %g1]0x88, %f3 + bne,pn %icc, .Lhwunaligned + sub %o0, 8, %o0 + +.Lhw_loop: + ldd [%o1 + 0x00], %f8 + ldd [%o1 + 0x08], %f10 + ldd [%o1 + 0x10], %f12 + ldd [%o1 + 0x18], %f14 + ldd [%o1 + 0x20], %f16 + ldd [%o1 + 0x28], %f18 + ldd [%o1 + 0x30], %f20 + subcc %o2, 1, %o2 ! done yet? + ldd [%o1 + 0x38], %f22 + add %o1, 0x40, %o1 + prefetch [%o1 + 63], 20 + + .word 0x81b02800 ! MD5 + + bne,pt SIZE_T_CC, .Lhw_loop + nop + +.Lhwfinish: + sta %f0, [%o0 + %g0]0x88 ! store context + sta %f1, [%o0 + %g1]0x88 + add %o0, 8, %o0 + sta %f2, [%o0 + %g0]0x88 + sta %f3, [%o0 + %g1]0x88 + retl + nop + +.align 8 +.Lhwunaligned: + alignaddr %o1, %g0, %o1 + + ldd [%o1 + 0x00], %f10 +.Lhwunaligned_loop: + ldd [%o1 + 0x08], %f12 + ldd [%o1 + 0x10], %f14 + ldd [%o1 + 0x18], %f16 + ldd [%o1 + 0x20], %f18 + ldd [%o1 + 0x28], %f20 + ldd [%o1 + 0x30], %f22 + ldd [%o1 + 0x38], %f24 + subcc %o2, 1, %o2 ! done yet? + ldd [%o1 + 0x40], %f26 + add %o1, 0x40, %o1 + prefetch [%o1 + 63], 20 + + faligndata %f10, %f12, %f8 + faligndata %f12, %f14, %f10 + faligndata %f14, %f16, %f12 + faligndata %f16, %f18, %f14 + faligndata %f18, %f20, %f16 + faligndata %f20, %f22, %f18 + faligndata %f22, %f24, %f20 + faligndata %f24, %f26, %f22 + + .word 0x81b02800 ! MD5 + + bne,pt SIZE_T_CC, .Lhwunaligned_loop + for %f26, %f26, %f10 ! %f10=%f26 + + ba .Lhwfinish + nop + +.align 16 +.Lsoftware: + save %sp,-STACK_FRAME,%sp + + rd %asi,$saved_asi + wr %g0,0x88,%asi ! ASI_PRIMARY_LITTLE + and $inp,7,$shr + andn $inp,7,$inp + + sll $shr,3,$shr ! *=8 + mov 56,$shl2 + ld [$ctx+0],$A + sub $shl2,$shr,$shl2 + ld [$ctx+4],$B + and $shl2,32,$shl1 + add $shl2,8,$shl2 + ld [$ctx+8],$C + sub $shl2,$shl1,$shl2 ! shr+shl1+shl2==64 + ld [$ctx+12],$D + nop + +.Loop: + cmp $shr,0 ! was inp aligned? + ldxa [$inp+0]%asi,@X[0] ! load little-endian input + ldxa [$inp+8]%asi,@X[1] + ldxa [$inp+16]%asi,@X[2] + ldxa [$inp+24]%asi,@X[3] + ldxa [$inp+32]%asi,@X[4] + sllx $A,32,$AB ! pack A,B + ldxa [$inp+40]%asi,@X[5] + sllx $C,32,$CD ! pack C,D + ldxa [$inp+48]%asi,@X[6] + or $B,$AB,$AB + ldxa [$inp+56]%asi,@X[7] + or $D,$CD,$CD + bnz,a,pn %icc,.+8 + ldxa [$inp+64]%asi,@X[8] + + srlx @X[0],$shr,@X[0] ! align X[0] + sllx @X[1],$shl1,$tx + sethi %hi(@K[0]),$t2 + sllx $tx,$shl2,$tx + or $t2,%lo(@K[0]),$t2 + or $tx,@X[0],@X[0] + xor $C,$D,$t1 + add @X[0],$t2,$t2 ! X[0]+K[0] +___ + for ($i=0;$i<15;$i++) { &R0($i,@V); unshift(@V,pop(@V)); } + for (;$i<16;$i++) { &R0_1($i,@V); unshift(@V,pop(@V)); } + for (;$i<32;$i++) { &R1($i,@V); unshift(@V,pop(@V)); } + for (;$i<48;$i++) { &R2($i,@V); unshift(@V,pop(@V)); } + for (;$i<64;$i++) { &R3($i,@V); unshift(@V,pop(@V)); } +$code.=<<___; + srlx $AB,32,$t1 ! unpack A,B,C,D and accumulate + add $inp,64,$inp ! advance inp + srlx $CD,32,$t2 + add $t1,$A,$A + subcc $len,1,$len ! done yet? + add $AB,$B,$B + add $t2,$C,$C + add $CD,$D,$D + srl $B,0,$B ! clruw $B + bne SIZE_T_CC,.Loop + srl $D,0,$D ! clruw $D + + st $A,[$ctx+0] ! write out ctx + st $B,[$ctx+4] + st $C,[$ctx+8] + st $D,[$ctx+12] + + wr %g0,$saved_asi,%asi + ret + restore +.type md5_block_asm_data_order,#function +.size md5_block_asm_data_order,(.-md5_block_asm_data_order) + +.asciz "MD5 block transform for SPARCv9, CRYPTOGAMS by " +.align 4 +___ + +# Purpose of these subroutines is to explicitly encode VIS instructions, +# so that one can compile the module without having to specify VIS +# extentions on compiler command line, e.g. -xarch=v9 vs. -xarch=v9a. +# Idea is to reserve for option to produce "universal" binary and let +# programmer detect if current CPU is VIS capable at run-time. +sub unvis { +my ($mnemonic,$rs1,$rs2,$rd)=@_; +my $ref,$opf; +my %visopf = ( "faligndata" => 0x048, + "for" => 0x07c ); + + $ref = "$mnemonic\t$rs1,$rs2,$rd"; + + if ($opf=$visopf{$mnemonic}) { + foreach ($rs1,$rs2,$rd) { + return $ref if (!/%f([0-9]{1,2})/); + $_=$1; + if ($1>=32) { + return $ref if ($1&1); + # re-encode for upper double register addressing + $_=($1|$1>>5)&31; + } + } + + return sprintf ".word\t0x%08x !%s", + 0x81b00000|$rd<<25|$rs1<<14|$opf<<5|$rs2, + $ref; + } else { + return $ref; + } +} +sub unalignaddr { +my ($mnemonic,$rs1,$rs2,$rd)=@_; +my %bias = ( "g" => 0, "o" => 8, "l" => 16, "i" => 24 ); +my $ref="$mnemonic\t$rs1,$rs2,$rd"; + + foreach ($rs1,$rs2,$rd) { + if (/%([goli])([0-7])/) { $_=$bias{$1}+$2; } + else { return $ref; } + } + return sprintf ".word\t0x%08x !%s", + 0x81b00300|$rd<<25|$rs1<<14|$rs2, + $ref; +} + +foreach (split("\n",$code)) { + s/\`([^\`]*)\`/eval $1/ge; + + s/\b(f[^\s]*)\s+(%f[0-9]{1,2}),\s*(%f[0-9]{1,2}),\s*(%f[0-9]{1,2})/ + &unvis($1,$2,$3,$4) + /ge; + s/\b(alignaddr)\s+(%[goli][0-7]),\s*(%[goli][0-7]),\s*(%[goli][0-7])/ + &unalignaddr($1,$2,$3,$4) + /ge; + + print $_,"\n"; +} + +close STDOUT; diff --git a/deps/openssl/openssl/crypto/md5/md5_locl.h b/deps/openssl/openssl/crypto/md5/md5_locl.h index 5f6f2fdedd7845..82e69218dae19d 100644 --- a/deps/openssl/openssl/crypto/md5/md5_locl.h +++ b/deps/openssl/openssl/crypto/md5/md5_locl.h @@ -71,6 +71,8 @@ # define md5_block_data_order md5_block_asm_data_order # elif defined(__ia64) || defined(__ia64__) || defined(_M_IA64) # define md5_block_data_order md5_block_asm_data_order +# elif defined(__sparc) || defined(__sparc__) +# define md5_block_data_order md5_block_asm_data_order # endif #endif diff --git a/deps/openssl/openssl/crypto/modes/Makefile b/deps/openssl/openssl/crypto/modes/Makefile index 3d8bafd5716c82..cbcbfad4b117ca 100644 --- a/deps/openssl/openssl/crypto/modes/Makefile +++ b/deps/openssl/openssl/crypto/modes/Makefile @@ -22,9 +22,9 @@ APPS= LIB=$(TOP)/libcrypto.a LIBSRC= cbc128.c ctr128.c cts128.c cfb128.c ofb128.c gcm128.c \ - ccm128.c xts128.c + ccm128.c xts128.c wrap128.c LIBOBJ= cbc128.o ctr128.o cts128.o cfb128.o ofb128.o gcm128.o \ - ccm128.o xts128.o $(MODES_ASM_OBJ) + ccm128.o xts128.o wrap128.o $(MODES_ASM_OBJ) SRC= $(LIBSRC) @@ -50,20 +50,26 @@ ghash-x86.s: asm/ghash-x86.pl $(PERL) asm/ghash-x86.pl $(PERLASM_SCHEME) $(CFLAGS) $(PROCESSOR) > $@ ghash-x86_64.s: asm/ghash-x86_64.pl $(PERL) asm/ghash-x86_64.pl $(PERLASM_SCHEME) > $@ +aesni-gcm-x86_64.s: asm/aesni-gcm-x86_64.pl + $(PERL) asm/aesni-gcm-x86_64.pl $(PERLASM_SCHEME) > $@ ghash-sparcv9.s: asm/ghash-sparcv9.pl $(PERL) asm/ghash-sparcv9.pl $@ $(CFLAGS) ghash-alpha.s: asm/ghash-alpha.pl - (preproc=/tmp/$$$$.$@; trap "rm $$preproc" INT; \ + (preproc=$$$$.$@.S; trap "rm $$preproc" INT; \ $(PERL) asm/ghash-alpha.pl > $$preproc && \ - $(CC) -E $$preproc > $@ && rm $$preproc) - + $(CC) -E -P $$preproc > $@ && rm $$preproc) ghash-parisc.s: asm/ghash-parisc.pl $(PERL) asm/ghash-parisc.pl $(PERLASM_SCHEME) $@ +ghashv8-armx.S: asm/ghashv8-armx.pl + $(PERL) asm/ghashv8-armx.pl $(PERLASM_SCHEME) $@ +ghashp8-ppc.s: asm/ghashp8-ppc.pl + $(PERL) asm/ghashp8-ppc.pl $(PERLASM_SCHEME) $@ # GNU make "catch all" ghash-%.S: asm/ghash-%.pl; $(PERL) $< $(PERLASM_SCHEME) $@ ghash-armv4.o: ghash-armv4.S +ghashv8-armx.o: ghashv8-armx.S files: $(PERL) $(TOP)/util/files.pl Makefile >> $(TOP)/MINFO @@ -137,6 +143,14 @@ ofb128.o: ../../include/openssl/modes.h ../../include/openssl/opensslconf.h ofb128.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h ofb128.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h ofb128.o: ../../include/openssl/symhacks.h modes_lcl.h ofb128.c +wrap128.o: ../../e_os.h ../../include/openssl/bio.h +wrap128.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h +wrap128.o: ../../include/openssl/e_os2.h ../../include/openssl/err.h +wrap128.o: ../../include/openssl/lhash.h ../../include/openssl/modes.h +wrap128.o: ../../include/openssl/opensslconf.h ../../include/openssl/opensslv.h +wrap128.o: ../../include/openssl/ossl_typ.h ../../include/openssl/safestack.h +wrap128.o: ../../include/openssl/stack.h ../../include/openssl/symhacks.h +wrap128.o: ../cryptlib.h wrap128.c xts128.o: ../../include/openssl/crypto.h ../../include/openssl/e_os2.h xts128.o: ../../include/openssl/modes.h ../../include/openssl/opensslconf.h xts128.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h diff --git a/deps/openssl/openssl/crypto/modes/asm/aesni-gcm-x86_64.pl b/deps/openssl/openssl/crypto/modes/asm/aesni-gcm-x86_64.pl new file mode 100644 index 00000000000000..7e4e04ea25300d --- /dev/null +++ b/deps/openssl/openssl/crypto/modes/asm/aesni-gcm-x86_64.pl @@ -0,0 +1,1057 @@ +#!/usr/bin/env perl +# +# ==================================================================== +# Written by Andy Polyakov for the OpenSSL +# project. The module is, however, dual licensed under OpenSSL and +# CRYPTOGAMS licenses depending on where you obtain it. For further +# details see http://www.openssl.org/~appro/cryptogams/. +# ==================================================================== +# +# +# AES-NI-CTR+GHASH stitch. +# +# February 2013 +# +# OpenSSL GCM implementation is organized in such way that its +# performance is rather close to the sum of its streamed components, +# in the context parallelized AES-NI CTR and modulo-scheduled +# PCLMULQDQ-enabled GHASH. Unfortunately, as no stitch implementation +# was observed to perform significantly better than the sum of the +# components on contemporary CPUs, the effort was deemed impossible to +# justify. This module is based on combination of Intel submissions, +# [1] and [2], with MOVBE twist suggested by Ilya Albrekht and Max +# Locktyukhin of Intel Corp. who verified that it reduces shuffles +# pressure with notable relative improvement, achieving 1.0 cycle per +# byte processed with 128-bit key on Haswell processor, and 0.74 - +# on Broadwell. [Mentioned results are raw profiled measurements for +# favourable packet size, one divisible by 96. Applications using the +# EVP interface will observe a few percent worse performance.] +# +# [1] http://rt.openssl.org/Ticket/Display.html?id=2900&user=guest&pass=guest +# [2] http://www.intel.com/content/dam/www/public/us/en/documents/software-support/enabling-high-performance-gcm.pdf + +$flavour = shift; +$output = shift; +if ($flavour =~ /\./) { $output = $flavour; undef $flavour; } + +$win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/); + +$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; +( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or +( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or +die "can't locate x86_64-xlate.pl"; + +if (`$ENV{CC} -Wa,-v -c -o /dev/null -x assembler /dev/null 2>&1` + =~ /GNU assembler version ([2-9]\.[0-9]+)/) { + $avx = ($1>=2.19) + ($1>=2.22); +} + +if (!$avx && $win64 && ($flavour =~ /nasm/ || $ENV{ASM} =~ /nasm/) && + `nasm -v 2>&1` =~ /NASM version ([2-9]\.[0-9]+)/) { + $avx = ($1>=2.09) + ($1>=2.10); +} + +if (!$avx && $win64 && ($flavour =~ /masm/ || $ENV{ASM} =~ /ml64/) && + `ml64 2>&1` =~ /Version ([0-9]+)\./) { + $avx = ($1>=10) + ($1>=11); +} + +if (!$avx && `$ENV{CC} -v 2>&1` =~ /(^clang version|based on LLVM) ([3-9]\.[0-9]+)/) { + $avx = ($2>=3.0) + ($2>3.0); +} + +open OUT,"| \"$^X\" $xlate $flavour $output"; +*STDOUT=*OUT; + +if ($avx>1) {{{ + +($inp,$out,$len,$key,$ivp,$Xip)=("%rdi","%rsi","%rdx","%rcx","%r8","%r9"); + +($Ii,$T1,$T2,$Hkey, + $Z0,$Z1,$Z2,$Z3,$Xi) = map("%xmm$_",(0..8)); + +($inout0,$inout1,$inout2,$inout3,$inout4,$inout5,$rndkey) = map("%xmm$_",(9..15)); + +($counter,$rounds,$ret,$const,$in0,$end0)=("%ebx","%ebp","%r10","%r11","%r14","%r15"); + +$code=<<___; +.text + +.type _aesni_ctr32_ghash_6x,\@abi-omnipotent +.align 32 +_aesni_ctr32_ghash_6x: + vmovdqu 0x20($const),$T2 # borrow $T2, .Lone_msb + sub \$6,$len + vpxor $Z0,$Z0,$Z0 # $Z0 = 0 + vmovdqu 0x00-0x80($key),$rndkey + vpaddb $T2,$T1,$inout1 + vpaddb $T2,$inout1,$inout2 + vpaddb $T2,$inout2,$inout3 + vpaddb $T2,$inout3,$inout4 + vpaddb $T2,$inout4,$inout5 + vpxor $rndkey,$T1,$inout0 + vmovdqu $Z0,16+8(%rsp) # "$Z3" = 0 + jmp .Loop6x + +.align 32 +.Loop6x: + add \$`6<<24`,$counter + jc .Lhandle_ctr32 # discard $inout[1-5]? + vmovdqu 0x00-0x20($Xip),$Hkey # $Hkey^1 + vpaddb $T2,$inout5,$T1 # next counter value + vpxor $rndkey,$inout1,$inout1 + vpxor $rndkey,$inout2,$inout2 + +.Lresume_ctr32: + vmovdqu $T1,($ivp) # save next counter value + vpclmulqdq \$0x10,$Hkey,$Z3,$Z1 + vpxor $rndkey,$inout3,$inout3 + vmovups 0x10-0x80($key),$T2 # borrow $T2 for $rndkey + vpclmulqdq \$0x01,$Hkey,$Z3,$Z2 + xor %r12,%r12 + cmp $in0,$end0 + + vaesenc $T2,$inout0,$inout0 + vmovdqu 0x30+8(%rsp),$Ii # I[4] + vpxor $rndkey,$inout4,$inout4 + vpclmulqdq \$0x00,$Hkey,$Z3,$T1 + vaesenc $T2,$inout1,$inout1 + vpxor $rndkey,$inout5,$inout5 + setnc %r12b + vpclmulqdq \$0x11,$Hkey,$Z3,$Z3 + vaesenc $T2,$inout2,$inout2 + vmovdqu 0x10-0x20($Xip),$Hkey # $Hkey^2 + neg %r12 + vaesenc $T2,$inout3,$inout3 + vpxor $Z1,$Z2,$Z2 + vpclmulqdq \$0x00,$Hkey,$Ii,$Z1 + vpxor $Z0,$Xi,$Xi # modulo-scheduled + vaesenc $T2,$inout4,$inout4 + vpxor $Z1,$T1,$Z0 + and \$0x60,%r12 + vmovups 0x20-0x80($key),$rndkey + vpclmulqdq \$0x10,$Hkey,$Ii,$T1 + vaesenc $T2,$inout5,$inout5 + + vpclmulqdq \$0x01,$Hkey,$Ii,$T2 + lea ($in0,%r12),$in0 + vaesenc $rndkey,$inout0,$inout0 + vpxor 16+8(%rsp),$Xi,$Xi # modulo-scheduled [vpxor $Z3,$Xi,$Xi] + vpclmulqdq \$0x11,$Hkey,$Ii,$Hkey + vmovdqu 0x40+8(%rsp),$Ii # I[3] + vaesenc $rndkey,$inout1,$inout1 + movbe 0x58($in0),%r13 + vaesenc $rndkey,$inout2,$inout2 + movbe 0x50($in0),%r12 + vaesenc $rndkey,$inout3,$inout3 + mov %r13,0x20+8(%rsp) + vaesenc $rndkey,$inout4,$inout4 + mov %r12,0x28+8(%rsp) + vmovdqu 0x30-0x20($Xip),$Z1 # borrow $Z1 for $Hkey^3 + vaesenc $rndkey,$inout5,$inout5 + + vmovups 0x30-0x80($key),$rndkey + vpxor $T1,$Z2,$Z2 + vpclmulqdq \$0x00,$Z1,$Ii,$T1 + vaesenc $rndkey,$inout0,$inout0 + vpxor $T2,$Z2,$Z2 + vpclmulqdq \$0x10,$Z1,$Ii,$T2 + vaesenc $rndkey,$inout1,$inout1 + vpxor $Hkey,$Z3,$Z3 + vpclmulqdq \$0x01,$Z1,$Ii,$Hkey + vaesenc $rndkey,$inout2,$inout2 + vpclmulqdq \$0x11,$Z1,$Ii,$Z1 + vmovdqu 0x50+8(%rsp),$Ii # I[2] + vaesenc $rndkey,$inout3,$inout3 + vaesenc $rndkey,$inout4,$inout4 + vpxor $T1,$Z0,$Z0 + vmovdqu 0x40-0x20($Xip),$T1 # borrow $T1 for $Hkey^4 + vaesenc $rndkey,$inout5,$inout5 + + vmovups 0x40-0x80($key),$rndkey + vpxor $T2,$Z2,$Z2 + vpclmulqdq \$0x00,$T1,$Ii,$T2 + vaesenc $rndkey,$inout0,$inout0 + vpxor $Hkey,$Z2,$Z2 + vpclmulqdq \$0x10,$T1,$Ii,$Hkey + vaesenc $rndkey,$inout1,$inout1 + movbe 0x48($in0),%r13 + vpxor $Z1,$Z3,$Z3 + vpclmulqdq \$0x01,$T1,$Ii,$Z1 + vaesenc $rndkey,$inout2,$inout2 + movbe 0x40($in0),%r12 + vpclmulqdq \$0x11,$T1,$Ii,$T1 + vmovdqu 0x60+8(%rsp),$Ii # I[1] + vaesenc $rndkey,$inout3,$inout3 + mov %r13,0x30+8(%rsp) + vaesenc $rndkey,$inout4,$inout4 + mov %r12,0x38+8(%rsp) + vpxor $T2,$Z0,$Z0 + vmovdqu 0x60-0x20($Xip),$T2 # borrow $T2 for $Hkey^5 + vaesenc $rndkey,$inout5,$inout5 + + vmovups 0x50-0x80($key),$rndkey + vpxor $Hkey,$Z2,$Z2 + vpclmulqdq \$0x00,$T2,$Ii,$Hkey + vaesenc $rndkey,$inout0,$inout0 + vpxor $Z1,$Z2,$Z2 + vpclmulqdq \$0x10,$T2,$Ii,$Z1 + vaesenc $rndkey,$inout1,$inout1 + movbe 0x38($in0),%r13 + vpxor $T1,$Z3,$Z3 + vpclmulqdq \$0x01,$T2,$Ii,$T1 + vpxor 0x70+8(%rsp),$Xi,$Xi # accumulate I[0] + vaesenc $rndkey,$inout2,$inout2 + movbe 0x30($in0),%r12 + vpclmulqdq \$0x11,$T2,$Ii,$T2 + vaesenc $rndkey,$inout3,$inout3 + mov %r13,0x40+8(%rsp) + vaesenc $rndkey,$inout4,$inout4 + mov %r12,0x48+8(%rsp) + vpxor $Hkey,$Z0,$Z0 + vmovdqu 0x70-0x20($Xip),$Hkey # $Hkey^6 + vaesenc $rndkey,$inout5,$inout5 + + vmovups 0x60-0x80($key),$rndkey + vpxor $Z1,$Z2,$Z2 + vpclmulqdq \$0x10,$Hkey,$Xi,$Z1 + vaesenc $rndkey,$inout0,$inout0 + vpxor $T1,$Z2,$Z2 + vpclmulqdq \$0x01,$Hkey,$Xi,$T1 + vaesenc $rndkey,$inout1,$inout1 + movbe 0x28($in0),%r13 + vpxor $T2,$Z3,$Z3 + vpclmulqdq \$0x00,$Hkey,$Xi,$T2 + vaesenc $rndkey,$inout2,$inout2 + movbe 0x20($in0),%r12 + vpclmulqdq \$0x11,$Hkey,$Xi,$Xi + vaesenc $rndkey,$inout3,$inout3 + mov %r13,0x50+8(%rsp) + vaesenc $rndkey,$inout4,$inout4 + mov %r12,0x58+8(%rsp) + vpxor $Z1,$Z2,$Z2 + vaesenc $rndkey,$inout5,$inout5 + vpxor $T1,$Z2,$Z2 + + vmovups 0x70-0x80($key),$rndkey + vpslldq \$8,$Z2,$Z1 + vpxor $T2,$Z0,$Z0 + vmovdqu 0x10($const),$Hkey # .Lpoly + + vaesenc $rndkey,$inout0,$inout0 + vpxor $Xi,$Z3,$Z3 + vaesenc $rndkey,$inout1,$inout1 + vpxor $Z1,$Z0,$Z0 + movbe 0x18($in0),%r13 + vaesenc $rndkey,$inout2,$inout2 + movbe 0x10($in0),%r12 + vpalignr \$8,$Z0,$Z0,$Ii # 1st phase + vpclmulqdq \$0x10,$Hkey,$Z0,$Z0 + mov %r13,0x60+8(%rsp) + vaesenc $rndkey,$inout3,$inout3 + mov %r12,0x68+8(%rsp) + vaesenc $rndkey,$inout4,$inout4 + vmovups 0x80-0x80($key),$T1 # borrow $T1 for $rndkey + vaesenc $rndkey,$inout5,$inout5 + + vaesenc $T1,$inout0,$inout0 + vmovups 0x90-0x80($key),$rndkey + vaesenc $T1,$inout1,$inout1 + vpsrldq \$8,$Z2,$Z2 + vaesenc $T1,$inout2,$inout2 + vpxor $Z2,$Z3,$Z3 + vaesenc $T1,$inout3,$inout3 + vpxor $Ii,$Z0,$Z0 + movbe 0x08($in0),%r13 + vaesenc $T1,$inout4,$inout4 + movbe 0x00($in0),%r12 + vaesenc $T1,$inout5,$inout5 + vmovups 0xa0-0x80($key),$T1 + cmp \$11,$rounds + jb .Lenc_tail # 128-bit key + + vaesenc $rndkey,$inout0,$inout0 + vaesenc $rndkey,$inout1,$inout1 + vaesenc $rndkey,$inout2,$inout2 + vaesenc $rndkey,$inout3,$inout3 + vaesenc $rndkey,$inout4,$inout4 + vaesenc $rndkey,$inout5,$inout5 + + vaesenc $T1,$inout0,$inout0 + vaesenc $T1,$inout1,$inout1 + vaesenc $T1,$inout2,$inout2 + vaesenc $T1,$inout3,$inout3 + vaesenc $T1,$inout4,$inout4 + vmovups 0xb0-0x80($key),$rndkey + vaesenc $T1,$inout5,$inout5 + vmovups 0xc0-0x80($key),$T1 + je .Lenc_tail # 192-bit key + + vaesenc $rndkey,$inout0,$inout0 + vaesenc $rndkey,$inout1,$inout1 + vaesenc $rndkey,$inout2,$inout2 + vaesenc $rndkey,$inout3,$inout3 + vaesenc $rndkey,$inout4,$inout4 + vaesenc $rndkey,$inout5,$inout5 + + vaesenc $T1,$inout0,$inout0 + vaesenc $T1,$inout1,$inout1 + vaesenc $T1,$inout2,$inout2 + vaesenc $T1,$inout3,$inout3 + vaesenc $T1,$inout4,$inout4 + vmovups 0xd0-0x80($key),$rndkey + vaesenc $T1,$inout5,$inout5 + vmovups 0xe0-0x80($key),$T1 + jmp .Lenc_tail # 256-bit key + +.align 32 +.Lhandle_ctr32: + vmovdqu ($const),$Ii # borrow $Ii for .Lbswap_mask + vpshufb $Ii,$T1,$Z2 # byte-swap counter + vmovdqu 0x30($const),$Z1 # borrow $Z1, .Ltwo_lsb + vpaddd 0x40($const),$Z2,$inout1 # .Lone_lsb + vpaddd $Z1,$Z2,$inout2 + vmovdqu 0x00-0x20($Xip),$Hkey # $Hkey^1 + vpaddd $Z1,$inout1,$inout3 + vpshufb $Ii,$inout1,$inout1 + vpaddd $Z1,$inout2,$inout4 + vpshufb $Ii,$inout2,$inout2 + vpxor $rndkey,$inout1,$inout1 + vpaddd $Z1,$inout3,$inout5 + vpshufb $Ii,$inout3,$inout3 + vpxor $rndkey,$inout2,$inout2 + vpaddd $Z1,$inout4,$T1 # byte-swapped next counter value + vpshufb $Ii,$inout4,$inout4 + vpshufb $Ii,$inout5,$inout5 + vpshufb $Ii,$T1,$T1 # next counter value + jmp .Lresume_ctr32 + +.align 32 +.Lenc_tail: + vaesenc $rndkey,$inout0,$inout0 + vmovdqu $Z3,16+8(%rsp) # postpone vpxor $Z3,$Xi,$Xi + vpalignr \$8,$Z0,$Z0,$Xi # 2nd phase + vaesenc $rndkey,$inout1,$inout1 + vpclmulqdq \$0x10,$Hkey,$Z0,$Z0 + vpxor 0x00($inp),$T1,$T2 + vaesenc $rndkey,$inout2,$inout2 + vpxor 0x10($inp),$T1,$Ii + vaesenc $rndkey,$inout3,$inout3 + vpxor 0x20($inp),$T1,$Z1 + vaesenc $rndkey,$inout4,$inout4 + vpxor 0x30($inp),$T1,$Z2 + vaesenc $rndkey,$inout5,$inout5 + vpxor 0x40($inp),$T1,$Z3 + vpxor 0x50($inp),$T1,$Hkey + vmovdqu ($ivp),$T1 # load next counter value + + vaesenclast $T2,$inout0,$inout0 + vmovdqu 0x20($const),$T2 # borrow $T2, .Lone_msb + vaesenclast $Ii,$inout1,$inout1 + vpaddb $T2,$T1,$Ii + mov %r13,0x70+8(%rsp) + lea 0x60($inp),$inp + vaesenclast $Z1,$inout2,$inout2 + vpaddb $T2,$Ii,$Z1 + mov %r12,0x78+8(%rsp) + lea 0x60($out),$out + vmovdqu 0x00-0x80($key),$rndkey + vaesenclast $Z2,$inout3,$inout3 + vpaddb $T2,$Z1,$Z2 + vaesenclast $Z3, $inout4,$inout4 + vpaddb $T2,$Z2,$Z3 + vaesenclast $Hkey,$inout5,$inout5 + vpaddb $T2,$Z3,$Hkey + + add \$0x60,$ret + sub \$0x6,$len + jc .L6x_done + + vmovups $inout0,-0x60($out) # save output + vpxor $rndkey,$T1,$inout0 + vmovups $inout1,-0x50($out) + vmovdqa $Ii,$inout1 # 0 latency + vmovups $inout2,-0x40($out) + vmovdqa $Z1,$inout2 # 0 latency + vmovups $inout3,-0x30($out) + vmovdqa $Z2,$inout3 # 0 latency + vmovups $inout4,-0x20($out) + vmovdqa $Z3,$inout4 # 0 latency + vmovups $inout5,-0x10($out) + vmovdqa $Hkey,$inout5 # 0 latency + vmovdqu 0x20+8(%rsp),$Z3 # I[5] + jmp .Loop6x + +.L6x_done: + vpxor 16+8(%rsp),$Xi,$Xi # modulo-scheduled + vpxor $Z0,$Xi,$Xi # modulo-scheduled + + ret +.size _aesni_ctr32_ghash_6x,.-_aesni_ctr32_ghash_6x +___ +###################################################################### +# +# size_t aesni_gcm_[en|de]crypt(const void *inp, void *out, size_t len, +# const AES_KEY *key, unsigned char iv[16], +# struct { u128 Xi,H,Htbl[9]; } *Xip); +$code.=<<___; +.globl aesni_gcm_decrypt +.type aesni_gcm_decrypt,\@function,6 +.align 32 +aesni_gcm_decrypt: + xor $ret,$ret + cmp \$0x60,$len # minimal accepted length + jb .Lgcm_dec_abort + + lea (%rsp),%rax # save stack pointer + push %rbx + push %rbp + push %r12 + push %r13 + push %r14 + push %r15 +___ +$code.=<<___ if ($win64); + lea -0xa8(%rsp),%rsp + movaps %xmm6,-0xd8(%rax) + movaps %xmm7,-0xc8(%rax) + movaps %xmm8,-0xb8(%rax) + movaps %xmm9,-0xa8(%rax) + movaps %xmm10,-0x98(%rax) + movaps %xmm11,-0x88(%rax) + movaps %xmm12,-0x78(%rax) + movaps %xmm13,-0x68(%rax) + movaps %xmm14,-0x58(%rax) + movaps %xmm15,-0x48(%rax) +.Lgcm_dec_body: +___ +$code.=<<___; + vzeroupper + + vmovdqu ($ivp),$T1 # input counter value + add \$-128,%rsp + mov 12($ivp),$counter + lea .Lbswap_mask(%rip),$const + lea -0x80($key),$in0 # borrow $in0 + mov \$0xf80,$end0 # borrow $end0 + vmovdqu ($Xip),$Xi # load Xi + and \$-128,%rsp # ensure stack alignment + vmovdqu ($const),$Ii # borrow $Ii for .Lbswap_mask + lea 0x80($key),$key # size optimization + lea 0x20+0x20($Xip),$Xip # size optimization + mov 0xf0-0x80($key),$rounds + vpshufb $Ii,$Xi,$Xi + + and $end0,$in0 + and %rsp,$end0 + sub $in0,$end0 + jc .Ldec_no_key_aliasing + cmp \$768,$end0 + jnc .Ldec_no_key_aliasing + sub $end0,%rsp # avoid aliasing with key +.Ldec_no_key_aliasing: + + vmovdqu 0x50($inp),$Z3 # I[5] + lea ($inp),$in0 + vmovdqu 0x40($inp),$Z0 + lea -0xc0($inp,$len),$end0 + vmovdqu 0x30($inp),$Z1 + shr \$4,$len + xor $ret,$ret + vmovdqu 0x20($inp),$Z2 + vpshufb $Ii,$Z3,$Z3 # passed to _aesni_ctr32_ghash_6x + vmovdqu 0x10($inp),$T2 + vpshufb $Ii,$Z0,$Z0 + vmovdqu ($inp),$Hkey + vpshufb $Ii,$Z1,$Z1 + vmovdqu $Z0,0x30(%rsp) + vpshufb $Ii,$Z2,$Z2 + vmovdqu $Z1,0x40(%rsp) + vpshufb $Ii,$T2,$T2 + vmovdqu $Z2,0x50(%rsp) + vpshufb $Ii,$Hkey,$Hkey + vmovdqu $T2,0x60(%rsp) + vmovdqu $Hkey,0x70(%rsp) + + call _aesni_ctr32_ghash_6x + + vmovups $inout0,-0x60($out) # save output + vmovups $inout1,-0x50($out) + vmovups $inout2,-0x40($out) + vmovups $inout3,-0x30($out) + vmovups $inout4,-0x20($out) + vmovups $inout5,-0x10($out) + + vpshufb ($const),$Xi,$Xi # .Lbswap_mask + vmovdqu $Xi,-0x40($Xip) # output Xi + + vzeroupper +___ +$code.=<<___ if ($win64); + movaps -0xd8(%rax),%xmm6 + movaps -0xd8(%rax),%xmm7 + movaps -0xb8(%rax),%xmm8 + movaps -0xa8(%rax),%xmm9 + movaps -0x98(%rax),%xmm10 + movaps -0x88(%rax),%xmm11 + movaps -0x78(%rax),%xmm12 + movaps -0x68(%rax),%xmm13 + movaps -0x58(%rax),%xmm14 + movaps -0x48(%rax),%xmm15 +___ +$code.=<<___; + mov -48(%rax),%r15 + mov -40(%rax),%r14 + mov -32(%rax),%r13 + mov -24(%rax),%r12 + mov -16(%rax),%rbp + mov -8(%rax),%rbx + lea (%rax),%rsp # restore %rsp +.Lgcm_dec_abort: + mov $ret,%rax # return value + ret +.size aesni_gcm_decrypt,.-aesni_gcm_decrypt +___ + +$code.=<<___; +.type _aesni_ctr32_6x,\@abi-omnipotent +.align 32 +_aesni_ctr32_6x: + vmovdqu 0x00-0x80($key),$Z0 # borrow $Z0 for $rndkey + vmovdqu 0x20($const),$T2 # borrow $T2, .Lone_msb + lea -1($rounds),%r13 + vmovups 0x10-0x80($key),$rndkey + lea 0x20-0x80($key),%r12 + vpxor $Z0,$T1,$inout0 + add \$`6<<24`,$counter + jc .Lhandle_ctr32_2 + vpaddb $T2,$T1,$inout1 + vpaddb $T2,$inout1,$inout2 + vpxor $Z0,$inout1,$inout1 + vpaddb $T2,$inout2,$inout3 + vpxor $Z0,$inout2,$inout2 + vpaddb $T2,$inout3,$inout4 + vpxor $Z0,$inout3,$inout3 + vpaddb $T2,$inout4,$inout5 + vpxor $Z0,$inout4,$inout4 + vpaddb $T2,$inout5,$T1 + vpxor $Z0,$inout5,$inout5 + jmp .Loop_ctr32 + +.align 16 +.Loop_ctr32: + vaesenc $rndkey,$inout0,$inout0 + vaesenc $rndkey,$inout1,$inout1 + vaesenc $rndkey,$inout2,$inout2 + vaesenc $rndkey,$inout3,$inout3 + vaesenc $rndkey,$inout4,$inout4 + vaesenc $rndkey,$inout5,$inout5 + vmovups (%r12),$rndkey + lea 0x10(%r12),%r12 + dec %r13d + jnz .Loop_ctr32 + + vmovdqu (%r12),$Hkey # last round key + vaesenc $rndkey,$inout0,$inout0 + vpxor 0x00($inp),$Hkey,$Z0 + vaesenc $rndkey,$inout1,$inout1 + vpxor 0x10($inp),$Hkey,$Z1 + vaesenc $rndkey,$inout2,$inout2 + vpxor 0x20($inp),$Hkey,$Z2 + vaesenc $rndkey,$inout3,$inout3 + vpxor 0x30($inp),$Hkey,$Xi + vaesenc $rndkey,$inout4,$inout4 + vpxor 0x40($inp),$Hkey,$T2 + vaesenc $rndkey,$inout5,$inout5 + vpxor 0x50($inp),$Hkey,$Hkey + lea 0x60($inp),$inp + + vaesenclast $Z0,$inout0,$inout0 + vaesenclast $Z1,$inout1,$inout1 + vaesenclast $Z2,$inout2,$inout2 + vaesenclast $Xi,$inout3,$inout3 + vaesenclast $T2,$inout4,$inout4 + vaesenclast $Hkey,$inout5,$inout5 + vmovups $inout0,0x00($out) + vmovups $inout1,0x10($out) + vmovups $inout2,0x20($out) + vmovups $inout3,0x30($out) + vmovups $inout4,0x40($out) + vmovups $inout5,0x50($out) + lea 0x60($out),$out + + ret +.align 32 +.Lhandle_ctr32_2: + vpshufb $Ii,$T1,$Z2 # byte-swap counter + vmovdqu 0x30($const),$Z1 # borrow $Z1, .Ltwo_lsb + vpaddd 0x40($const),$Z2,$inout1 # .Lone_lsb + vpaddd $Z1,$Z2,$inout2 + vpaddd $Z1,$inout1,$inout3 + vpshufb $Ii,$inout1,$inout1 + vpaddd $Z1,$inout2,$inout4 + vpshufb $Ii,$inout2,$inout2 + vpxor $Z0,$inout1,$inout1 + vpaddd $Z1,$inout3,$inout5 + vpshufb $Ii,$inout3,$inout3 + vpxor $Z0,$inout2,$inout2 + vpaddd $Z1,$inout4,$T1 # byte-swapped next counter value + vpshufb $Ii,$inout4,$inout4 + vpxor $Z0,$inout3,$inout3 + vpshufb $Ii,$inout5,$inout5 + vpxor $Z0,$inout4,$inout4 + vpshufb $Ii,$T1,$T1 # next counter value + vpxor $Z0,$inout5,$inout5 + jmp .Loop_ctr32 +.size _aesni_ctr32_6x,.-_aesni_ctr32_6x + +.globl aesni_gcm_encrypt +.type aesni_gcm_encrypt,\@function,6 +.align 32 +aesni_gcm_encrypt: + xor $ret,$ret + cmp \$0x60*3,$len # minimal accepted length + jb .Lgcm_enc_abort + + lea (%rsp),%rax # save stack pointer + push %rbx + push %rbp + push %r12 + push %r13 + push %r14 + push %r15 +___ +$code.=<<___ if ($win64); + lea -0xa8(%rsp),%rsp + movaps %xmm6,-0xd8(%rax) + movaps %xmm7,-0xc8(%rax) + movaps %xmm8,-0xb8(%rax) + movaps %xmm9,-0xa8(%rax) + movaps %xmm10,-0x98(%rax) + movaps %xmm11,-0x88(%rax) + movaps %xmm12,-0x78(%rax) + movaps %xmm13,-0x68(%rax) + movaps %xmm14,-0x58(%rax) + movaps %xmm15,-0x48(%rax) +.Lgcm_enc_body: +___ +$code.=<<___; + vzeroupper + + vmovdqu ($ivp),$T1 # input counter value + add \$-128,%rsp + mov 12($ivp),$counter + lea .Lbswap_mask(%rip),$const + lea -0x80($key),$in0 # borrow $in0 + mov \$0xf80,$end0 # borrow $end0 + lea 0x80($key),$key # size optimization + vmovdqu ($const),$Ii # borrow $Ii for .Lbswap_mask + and \$-128,%rsp # ensure stack alignment + mov 0xf0-0x80($key),$rounds + + and $end0,$in0 + and %rsp,$end0 + sub $in0,$end0 + jc .Lenc_no_key_aliasing + cmp \$768,$end0 + jnc .Lenc_no_key_aliasing + sub $end0,%rsp # avoid aliasing with key +.Lenc_no_key_aliasing: + + lea ($out),$in0 + lea -0xc0($out,$len),$end0 + shr \$4,$len + + call _aesni_ctr32_6x + vpshufb $Ii,$inout0,$Xi # save bswapped output on stack + vpshufb $Ii,$inout1,$T2 + vmovdqu $Xi,0x70(%rsp) + vpshufb $Ii,$inout2,$Z0 + vmovdqu $T2,0x60(%rsp) + vpshufb $Ii,$inout3,$Z1 + vmovdqu $Z0,0x50(%rsp) + vpshufb $Ii,$inout4,$Z2 + vmovdqu $Z1,0x40(%rsp) + vpshufb $Ii,$inout5,$Z3 # passed to _aesni_ctr32_ghash_6x + vmovdqu $Z2,0x30(%rsp) + + call _aesni_ctr32_6x + + vmovdqu ($Xip),$Xi # load Xi + lea 0x20+0x20($Xip),$Xip # size optimization + sub \$12,$len + mov \$0x60*2,$ret + vpshufb $Ii,$Xi,$Xi + + call _aesni_ctr32_ghash_6x + vmovdqu 0x20(%rsp),$Z3 # I[5] + vmovdqu ($const),$Ii # borrow $Ii for .Lbswap_mask + vmovdqu 0x00-0x20($Xip),$Hkey # $Hkey^1 + vpunpckhqdq $Z3,$Z3,$T1 + vmovdqu 0x20-0x20($Xip),$rndkey # borrow $rndkey for $HK + vmovups $inout0,-0x60($out) # save output + vpshufb $Ii,$inout0,$inout0 # but keep bswapped copy + vpxor $Z3,$T1,$T1 + vmovups $inout1,-0x50($out) + vpshufb $Ii,$inout1,$inout1 + vmovups $inout2,-0x40($out) + vpshufb $Ii,$inout2,$inout2 + vmovups $inout3,-0x30($out) + vpshufb $Ii,$inout3,$inout3 + vmovups $inout4,-0x20($out) + vpshufb $Ii,$inout4,$inout4 + vmovups $inout5,-0x10($out) + vpshufb $Ii,$inout5,$inout5 + vmovdqu $inout0,0x10(%rsp) # free $inout0 +___ +{ my ($HK,$T3)=($rndkey,$inout0); + +$code.=<<___; + vmovdqu 0x30(%rsp),$Z2 # I[4] + vmovdqu 0x10-0x20($Xip),$Ii # borrow $Ii for $Hkey^2 + vpunpckhqdq $Z2,$Z2,$T2 + vpclmulqdq \$0x00,$Hkey,$Z3,$Z1 + vpxor $Z2,$T2,$T2 + vpclmulqdq \$0x11,$Hkey,$Z3,$Z3 + vpclmulqdq \$0x00,$HK,$T1,$T1 + + vmovdqu 0x40(%rsp),$T3 # I[3] + vpclmulqdq \$0x00,$Ii,$Z2,$Z0 + vmovdqu 0x30-0x20($Xip),$Hkey # $Hkey^3 + vpxor $Z1,$Z0,$Z0 + vpunpckhqdq $T3,$T3,$Z1 + vpclmulqdq \$0x11,$Ii,$Z2,$Z2 + vpxor $T3,$Z1,$Z1 + vpxor $Z3,$Z2,$Z2 + vpclmulqdq \$0x10,$HK,$T2,$T2 + vmovdqu 0x50-0x20($Xip),$HK + vpxor $T1,$T2,$T2 + + vmovdqu 0x50(%rsp),$T1 # I[2] + vpclmulqdq \$0x00,$Hkey,$T3,$Z3 + vmovdqu 0x40-0x20($Xip),$Ii # borrow $Ii for $Hkey^4 + vpxor $Z0,$Z3,$Z3 + vpunpckhqdq $T1,$T1,$Z0 + vpclmulqdq \$0x11,$Hkey,$T3,$T3 + vpxor $T1,$Z0,$Z0 + vpxor $Z2,$T3,$T3 + vpclmulqdq \$0x00,$HK,$Z1,$Z1 + vpxor $T2,$Z1,$Z1 + + vmovdqu 0x60(%rsp),$T2 # I[1] + vpclmulqdq \$0x00,$Ii,$T1,$Z2 + vmovdqu 0x60-0x20($Xip),$Hkey # $Hkey^5 + vpxor $Z3,$Z2,$Z2 + vpunpckhqdq $T2,$T2,$Z3 + vpclmulqdq \$0x11,$Ii,$T1,$T1 + vpxor $T2,$Z3,$Z3 + vpxor $T3,$T1,$T1 + vpclmulqdq \$0x10,$HK,$Z0,$Z0 + vmovdqu 0x80-0x20($Xip),$HK + vpxor $Z1,$Z0,$Z0 + + vpxor 0x70(%rsp),$Xi,$Xi # accumulate I[0] + vpclmulqdq \$0x00,$Hkey,$T2,$Z1 + vmovdqu 0x70-0x20($Xip),$Ii # borrow $Ii for $Hkey^6 + vpunpckhqdq $Xi,$Xi,$T3 + vpxor $Z2,$Z1,$Z1 + vpclmulqdq \$0x11,$Hkey,$T2,$T2 + vpxor $Xi,$T3,$T3 + vpxor $T1,$T2,$T2 + vpclmulqdq \$0x00,$HK,$Z3,$Z3 + vpxor $Z0,$Z3,$Z0 + + vpclmulqdq \$0x00,$Ii,$Xi,$Z2 + vmovdqu 0x00-0x20($Xip),$Hkey # $Hkey^1 + vpunpckhqdq $inout5,$inout5,$T1 + vpclmulqdq \$0x11,$Ii,$Xi,$Xi + vpxor $inout5,$T1,$T1 + vpxor $Z1,$Z2,$Z1 + vpclmulqdq \$0x10,$HK,$T3,$T3 + vmovdqu 0x20-0x20($Xip),$HK + vpxor $T2,$Xi,$Z3 + vpxor $Z0,$T3,$Z2 + + vmovdqu 0x10-0x20($Xip),$Ii # borrow $Ii for $Hkey^2 + vpxor $Z1,$Z3,$T3 # aggregated Karatsuba post-processing + vpclmulqdq \$0x00,$Hkey,$inout5,$Z0 + vpxor $T3,$Z2,$Z2 + vpunpckhqdq $inout4,$inout4,$T2 + vpclmulqdq \$0x11,$Hkey,$inout5,$inout5 + vpxor $inout4,$T2,$T2 + vpslldq \$8,$Z2,$T3 + vpclmulqdq \$0x00,$HK,$T1,$T1 + vpxor $T3,$Z1,$Xi + vpsrldq \$8,$Z2,$Z2 + vpxor $Z2,$Z3,$Z3 + + vpclmulqdq \$0x00,$Ii,$inout4,$Z1 + vmovdqu 0x30-0x20($Xip),$Hkey # $Hkey^3 + vpxor $Z0,$Z1,$Z1 + vpunpckhqdq $inout3,$inout3,$T3 + vpclmulqdq \$0x11,$Ii,$inout4,$inout4 + vpxor $inout3,$T3,$T3 + vpxor $inout5,$inout4,$inout4 + vpalignr \$8,$Xi,$Xi,$inout5 # 1st phase + vpclmulqdq \$0x10,$HK,$T2,$T2 + vmovdqu 0x50-0x20($Xip),$HK + vpxor $T1,$T2,$T2 + + vpclmulqdq \$0x00,$Hkey,$inout3,$Z0 + vmovdqu 0x40-0x20($Xip),$Ii # borrow $Ii for $Hkey^4 + vpxor $Z1,$Z0,$Z0 + vpunpckhqdq $inout2,$inout2,$T1 + vpclmulqdq \$0x11,$Hkey,$inout3,$inout3 + vpxor $inout2,$T1,$T1 + vpxor $inout4,$inout3,$inout3 + vxorps 0x10(%rsp),$Z3,$Z3 # accumulate $inout0 + vpclmulqdq \$0x00,$HK,$T3,$T3 + vpxor $T2,$T3,$T3 + + vpclmulqdq \$0x10,0x10($const),$Xi,$Xi + vxorps $inout5,$Xi,$Xi + + vpclmulqdq \$0x00,$Ii,$inout2,$Z1 + vmovdqu 0x60-0x20($Xip),$Hkey # $Hkey^5 + vpxor $Z0,$Z1,$Z1 + vpunpckhqdq $inout1,$inout1,$T2 + vpclmulqdq \$0x11,$Ii,$inout2,$inout2 + vpxor $inout1,$T2,$T2 + vpalignr \$8,$Xi,$Xi,$inout5 # 2nd phase + vpxor $inout3,$inout2,$inout2 + vpclmulqdq \$0x10,$HK,$T1,$T1 + vmovdqu 0x80-0x20($Xip),$HK + vpxor $T3,$T1,$T1 + + vxorps $Z3,$inout5,$inout5 + vpclmulqdq \$0x10,0x10($const),$Xi,$Xi + vxorps $inout5,$Xi,$Xi + + vpclmulqdq \$0x00,$Hkey,$inout1,$Z0 + vmovdqu 0x70-0x20($Xip),$Ii # borrow $Ii for $Hkey^6 + vpxor $Z1,$Z0,$Z0 + vpunpckhqdq $Xi,$Xi,$T3 + vpclmulqdq \$0x11,$Hkey,$inout1,$inout1 + vpxor $Xi,$T3,$T3 + vpxor $inout2,$inout1,$inout1 + vpclmulqdq \$0x00,$HK,$T2,$T2 + vpxor $T1,$T2,$T2 + + vpclmulqdq \$0x00,$Ii,$Xi,$Z1 + vpclmulqdq \$0x11,$Ii,$Xi,$Z3 + vpxor $Z0,$Z1,$Z1 + vpclmulqdq \$0x10,$HK,$T3,$Z2 + vpxor $inout1,$Z3,$Z3 + vpxor $T2,$Z2,$Z2 + + vpxor $Z1,$Z3,$Z0 # aggregated Karatsuba post-processing + vpxor $Z0,$Z2,$Z2 + vpslldq \$8,$Z2,$T1 + vmovdqu 0x10($const),$Hkey # .Lpoly + vpsrldq \$8,$Z2,$Z2 + vpxor $T1,$Z1,$Xi + vpxor $Z2,$Z3,$Z3 + + vpalignr \$8,$Xi,$Xi,$T2 # 1st phase + vpclmulqdq \$0x10,$Hkey,$Xi,$Xi + vpxor $T2,$Xi,$Xi + + vpalignr \$8,$Xi,$Xi,$T2 # 2nd phase + vpclmulqdq \$0x10,$Hkey,$Xi,$Xi + vpxor $Z3,$T2,$T2 + vpxor $T2,$Xi,$Xi +___ +} +$code.=<<___; + vpshufb ($const),$Xi,$Xi # .Lbswap_mask + vmovdqu $Xi,-0x40($Xip) # output Xi + + vzeroupper +___ +$code.=<<___ if ($win64); + movaps -0xd8(%rax),%xmm6 + movaps -0xc8(%rax),%xmm7 + movaps -0xb8(%rax),%xmm8 + movaps -0xa8(%rax),%xmm9 + movaps -0x98(%rax),%xmm10 + movaps -0x88(%rax),%xmm11 + movaps -0x78(%rax),%xmm12 + movaps -0x68(%rax),%xmm13 + movaps -0x58(%rax),%xmm14 + movaps -0x48(%rax),%xmm15 +___ +$code.=<<___; + mov -48(%rax),%r15 + mov -40(%rax),%r14 + mov -32(%rax),%r13 + mov -24(%rax),%r12 + mov -16(%rax),%rbp + mov -8(%rax),%rbx + lea (%rax),%rsp # restore %rsp +.Lgcm_enc_abort: + mov $ret,%rax # return value + ret +.size aesni_gcm_encrypt,.-aesni_gcm_encrypt +___ + +$code.=<<___; +.align 64 +.Lbswap_mask: + .byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0 +.Lpoly: + .byte 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xc2 +.Lone_msb: + .byte 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1 +.Ltwo_lsb: + .byte 2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 +.Lone_lsb: + .byte 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 +.asciz "AES-NI GCM module for x86_64, CRYPTOGAMS by " +.align 64 +___ +if ($win64) { +$rec="%rcx"; +$frame="%rdx"; +$context="%r8"; +$disp="%r9"; + +$code.=<<___ +.extern __imp_RtlVirtualUnwind +.type gcm_se_handler,\@abi-omnipotent +.align 16 +gcm_se_handler: + push %rsi + push %rdi + push %rbx + push %rbp + push %r12 + push %r13 + push %r14 + push %r15 + pushfq + sub \$64,%rsp + + mov 120($context),%rax # pull context->Rax + mov 248($context),%rbx # pull context->Rip + + mov 8($disp),%rsi # disp->ImageBase + mov 56($disp),%r11 # disp->HandlerData + + mov 0(%r11),%r10d # HandlerData[0] + lea (%rsi,%r10),%r10 # prologue label + cmp %r10,%rbx # context->RipRsp + + mov 4(%r11),%r10d # HandlerData[1] + lea (%rsi,%r10),%r10 # epilogue label + cmp %r10,%rbx # context->Rip>=epilogue label + jae .Lcommon_seh_tail + + mov 120($context),%rax # pull context->Rax + + mov -48(%rax),%r15 + mov -40(%rax),%r14 + mov -32(%rax),%r13 + mov -24(%rax),%r12 + mov -16(%rax),%rbp + mov -8(%rax),%rbx + mov %r15,240($context) + mov %r14,232($context) + mov %r13,224($context) + mov %r12,216($context) + mov %rbp,160($context) + mov %rbx,144($context) + + lea -0xd8(%rax),%rsi # %xmm save area + lea 512($context),%rdi # & context.Xmm6 + mov \$20,%ecx # 10*sizeof(%xmm0)/sizeof(%rax) + .long 0xa548f3fc # cld; rep movsq + +.Lcommon_seh_tail: + mov 8(%rax),%rdi + mov 16(%rax),%rsi + mov %rax,152($context) # restore context->Rsp + mov %rsi,168($context) # restore context->Rsi + mov %rdi,176($context) # restore context->Rdi + + mov 40($disp),%rdi # disp->ContextRecord + mov $context,%rsi # context + mov \$154,%ecx # sizeof(CONTEXT) + .long 0xa548f3fc # cld; rep movsq + + mov $disp,%rsi + xor %rcx,%rcx # arg1, UNW_FLAG_NHANDLER + mov 8(%rsi),%rdx # arg2, disp->ImageBase + mov 0(%rsi),%r8 # arg3, disp->ControlPc + mov 16(%rsi),%r9 # arg4, disp->FunctionEntry + mov 40(%rsi),%r10 # disp->ContextRecord + lea 56(%rsi),%r11 # &disp->HandlerData + lea 24(%rsi),%r12 # &disp->EstablisherFrame + mov %r10,32(%rsp) # arg5 + mov %r11,40(%rsp) # arg6 + mov %r12,48(%rsp) # arg7 + mov %rcx,56(%rsp) # arg8, (NULL) + call *__imp_RtlVirtualUnwind(%rip) + + mov \$1,%eax # ExceptionContinueSearch + add \$64,%rsp + popfq + pop %r15 + pop %r14 + pop %r13 + pop %r12 + pop %rbp + pop %rbx + pop %rdi + pop %rsi + ret +.size gcm_se_handler,.-gcm_se_handler + +.section .pdata +.align 4 + .rva .LSEH_begin_aesni_gcm_decrypt + .rva .LSEH_end_aesni_gcm_decrypt + .rva .LSEH_gcm_dec_info + + .rva .LSEH_begin_aesni_gcm_encrypt + .rva .LSEH_end_aesni_gcm_encrypt + .rva .LSEH_gcm_enc_info +.section .xdata +.align 8 +.LSEH_gcm_dec_info: + .byte 9,0,0,0 + .rva gcm_se_handler + .rva .Lgcm_dec_body,.Lgcm_dec_abort +.LSEH_gcm_enc_info: + .byte 9,0,0,0 + .rva gcm_se_handler + .rva .Lgcm_enc_body,.Lgcm_enc_abort +___ +} +}}} else {{{ +$code=<<___; # assembler is too old +.text + +.globl aesni_gcm_encrypt +.type aesni_gcm_encrypt,\@abi-omnipotent +aesni_gcm_encrypt: + xor %eax,%eax + ret +.size aesni_gcm_encrypt,.-aesni_gcm_encrypt + +.globl aesni_gcm_decrypt +.type aesni_gcm_decrypt,\@abi-omnipotent +aesni_gcm_decrypt: + xor %eax,%eax + ret +.size aesni_gcm_decrypt,.-aesni_gcm_decrypt +___ +}}} + +$code =~ s/\`([^\`]*)\`/eval($1)/gem; + +print $code; + +close STDOUT; diff --git a/deps/openssl/openssl/crypto/modes/asm/ghash-armv4.pl b/deps/openssl/openssl/crypto/modes/asm/ghash-armv4.pl index d91586ee2925bb..77fbf34465db48 100644 --- a/deps/openssl/openssl/crypto/modes/asm/ghash-armv4.pl +++ b/deps/openssl/openssl/crypto/modes/asm/ghash-armv4.pl @@ -35,6 +35,20 @@ # Add NEON implementation featuring polynomial multiplication, i.e. no # lookup tables involved. On Cortex A8 it was measured to process one # byte in 15 cycles or 55% faster than integer-only code. +# +# April 2014 +# +# Switch to multiplication algorithm suggested in paper referred +# below and combine it with reduction algorithm from x86 module. +# Performance improvement over previous version varies from 65% on +# Snapdragon S4 to 110% on Cortex A9. In absolute terms Cortex A8 +# processes one byte in 8.45 cycles, A9 - in 10.2, Snapdragon S4 - +# in 9.33. +# +# Câmara, D.; Gouvêa, C. P. L.; López, J. & Dahab, R.: Fast Software +# Polynomial Multiplication on ARM Processors using the NEON Engine. +# +# http://conradoplg.cryptoland.net/files/2010/12/mocrysen13.pdf # ==================================================================== # Note about "528B" variant. In ARM case it makes lesser sense to @@ -303,117 +317,161 @@ () .size gcm_gmult_4bit,.-gcm_gmult_4bit ___ { -my $cnt=$Htbl; # $Htbl is used once in the very beginning - -my ($Hhi, $Hlo, $Zo, $T, $xi, $mod) = map("d$_",(0..7)); -my ($Qhi, $Qlo, $Z, $R, $zero, $Qpost, $IN) = map("q$_",(8..15)); - -# Z:Zo keeps 128-bit result shifted by 1 to the right, with bottom bit -# in Zo. Or should I say "top bit", because GHASH is specified in -# reverse bit order? Otherwise straightforward 128-bt H by one input -# byte multiplication and modulo-reduction, times 16. +my ($Xl,$Xm,$Xh,$IN)=map("q$_",(0..3)); +my ($t0,$t1,$t2,$t3)=map("q$_",(8..12)); +my ($Hlo,$Hhi,$Hhl,$k48,$k32,$k16)=map("d$_",(26..31)); -sub Dlo() { shift=~m|q([1]?[0-9])|?"d".($1*2):""; } -sub Dhi() { shift=~m|q([1]?[0-9])|?"d".($1*2+1):""; } -sub Q() { shift=~m|d([1-3]?[02468])|?"q".($1/2):""; } +sub clmul64x64 { +my ($r,$a,$b)=@_; +$code.=<<___; + vext.8 $t0#lo, $a, $a, #1 @ A1 + vmull.p8 $t0, $t0#lo, $b @ F = A1*B + vext.8 $r#lo, $b, $b, #1 @ B1 + vmull.p8 $r, $a, $r#lo @ E = A*B1 + vext.8 $t1#lo, $a, $a, #2 @ A2 + vmull.p8 $t1, $t1#lo, $b @ H = A2*B + vext.8 $t3#lo, $b, $b, #2 @ B2 + vmull.p8 $t3, $a, $t3#lo @ G = A*B2 + vext.8 $t2#lo, $a, $a, #3 @ A3 + veor $t0, $t0, $r @ L = E + F + vmull.p8 $t2, $t2#lo, $b @ J = A3*B + vext.8 $r#lo, $b, $b, #3 @ B3 + veor $t1, $t1, $t3 @ M = G + H + vmull.p8 $r, $a, $r#lo @ I = A*B3 + veor $t0#lo, $t0#lo, $t0#hi @ t0 = (L) (P0 + P1) << 8 + vand $t0#hi, $t0#hi, $k48 + vext.8 $t3#lo, $b, $b, #4 @ B4 + veor $t1#lo, $t1#lo, $t1#hi @ t1 = (M) (P2 + P3) << 16 + vand $t1#hi, $t1#hi, $k32 + vmull.p8 $t3, $a, $t3#lo @ K = A*B4 + veor $t2, $t2, $r @ N = I + J + veor $t0#lo, $t0#lo, $t0#hi + veor $t1#lo, $t1#lo, $t1#hi + veor $t2#lo, $t2#lo, $t2#hi @ t2 = (N) (P4 + P5) << 24 + vand $t2#hi, $t2#hi, $k16 + vext.8 $t0, $t0, $t0, #15 + veor $t3#lo, $t3#lo, $t3#hi @ t3 = (K) (P6 + P7) << 32 + vmov.i64 $t3#hi, #0 + vext.8 $t1, $t1, $t1, #14 + veor $t2#lo, $t2#lo, $t2#hi + vmull.p8 $r, $a, $b @ D = A*B + vext.8 $t3, $t3, $t3, #12 + vext.8 $t2, $t2, $t2, #13 + veor $t0, $t0, $t1 + veor $t2, $t2, $t3 + veor $r, $r, $t0 + veor $r, $r, $t2 +___ +} $code.=<<___; -#if __ARM_ARCH__>=7 +#if __ARM_MAX_ARCH__>=7 +.arch armv7-a .fpu neon +.global gcm_init_neon +.type gcm_init_neon,%function +.align 4 +gcm_init_neon: + vld1.64 $IN#hi,[r1,:64]! @ load H + vmov.i8 $t0,#0xe1 + vld1.64 $IN#lo,[r1,:64] + vshl.i64 $t0#hi,#57 + vshr.u64 $t0#lo,#63 @ t0=0xc2....01 + vdup.8 $t1,$IN#hi[7] + vshr.u64 $Hlo,$IN#lo,#63 + vshr.s8 $t1,#7 @ broadcast carry bit + vshl.i64 $IN,$IN,#1 + vand $t0,$t0,$t1 + vorr $IN#hi,$Hlo @ H<<<=1 + veor $IN,$IN,$t0 @ twisted H + vstmia r0,{$IN} + + ret @ bx lr +.size gcm_init_neon,.-gcm_init_neon + .global gcm_gmult_neon .type gcm_gmult_neon,%function .align 4 gcm_gmult_neon: - sub $Htbl,#16 @ point at H in GCM128_CTX - vld1.64 `&Dhi("$IN")`,[$Xi,:64]!@ load Xi - vmov.i32 $mod,#0xe1 @ our irreducible polynomial - vld1.64 `&Dlo("$IN")`,[$Xi,:64]! - vshr.u64 $mod,#32 - vldmia $Htbl,{$Hhi-$Hlo} @ load H - veor $zero,$zero + vld1.64 $IN#hi,[$Xi,:64]! @ load Xi + vld1.64 $IN#lo,[$Xi,:64]! + vmov.i64 $k48,#0x0000ffffffffffff + vldmia $Htbl,{$Hlo-$Hhi} @ load twisted H + vmov.i64 $k32,#0x00000000ffffffff #ifdef __ARMEL__ vrev64.8 $IN,$IN #endif - veor $Qpost,$Qpost - veor $R,$R - mov $cnt,#16 - veor $Z,$Z + vmov.i64 $k16,#0x000000000000ffff + veor $Hhl,$Hlo,$Hhi @ Karatsuba pre-processing mov $len,#16 - veor $Zo,$Zo - vdup.8 $xi,`&Dlo("$IN")`[0] @ broadcast lowest byte - b .Linner_neon + b .Lgmult_neon .size gcm_gmult_neon,.-gcm_gmult_neon .global gcm_ghash_neon .type gcm_ghash_neon,%function .align 4 gcm_ghash_neon: - vld1.64 `&Dhi("$Z")`,[$Xi,:64]! @ load Xi - vmov.i32 $mod,#0xe1 @ our irreducible polynomial - vld1.64 `&Dlo("$Z")`,[$Xi,:64]! - vshr.u64 $mod,#32 - vldmia $Xi,{$Hhi-$Hlo} @ load H - veor $zero,$zero - nop + vld1.64 $Xl#hi,[$Xi,:64]! @ load Xi + vld1.64 $Xl#lo,[$Xi,:64]! + vmov.i64 $k48,#0x0000ffffffffffff + vldmia $Htbl,{$Hlo-$Hhi} @ load twisted H + vmov.i64 $k32,#0x00000000ffffffff #ifdef __ARMEL__ - vrev64.8 $Z,$Z + vrev64.8 $Xl,$Xl #endif -.Louter_neon: - vld1.64 `&Dhi($IN)`,[$inp]! @ load inp - veor $Qpost,$Qpost - vld1.64 `&Dlo($IN)`,[$inp]! - veor $R,$R - mov $cnt,#16 + vmov.i64 $k16,#0x000000000000ffff + veor $Hhl,$Hlo,$Hhi @ Karatsuba pre-processing + +.Loop_neon: + vld1.64 $IN#hi,[$inp]! @ load inp + vld1.64 $IN#lo,[$inp]! #ifdef __ARMEL__ vrev64.8 $IN,$IN #endif - veor $Zo,$Zo - veor $IN,$Z @ inp^=Xi - veor $Z,$Z - vdup.8 $xi,`&Dlo("$IN")`[0] @ broadcast lowest byte -.Linner_neon: - subs $cnt,$cnt,#1 - vmull.p8 $Qlo,$Hlo,$xi @ H.lo·Xi[i] - vmull.p8 $Qhi,$Hhi,$xi @ H.hi·Xi[i] - vext.8 $IN,$zero,#1 @ IN>>=8 - - veor $Z,$Qpost @ modulo-scheduled part - vshl.i64 `&Dlo("$R")`,#48 - vdup.8 $xi,`&Dlo("$IN")`[0] @ broadcast lowest byte - veor $T,`&Dlo("$Qlo")`,`&Dlo("$Z")` - - veor `&Dhi("$Z")`,`&Dlo("$R")` - vuzp.8 $Qlo,$Qhi - vsli.8 $Zo,$T,#1 @ compose the "carry" byte - vext.8 $Z,$zero,#1 @ Z>>=8 - - vmull.p8 $R,$Zo,$mod @ "carry"·0xe1 - vshr.u8 $Zo,$T,#7 @ save Z's bottom bit - vext.8 $Qpost,$Qlo,$zero,#1 @ Qlo>>=8 - veor $Z,$Qhi - bne .Linner_neon - - veor $Z,$Qpost @ modulo-scheduled artefact - vshl.i64 `&Dlo("$R")`,#48 - veor `&Dhi("$Z")`,`&Dlo("$R")` - - @ finalization, normalize Z:Zo - vand $Zo,$mod @ suffices to mask the bit - vshr.u64 `&Dhi(&Q("$Zo"))`,`&Dlo("$Z")`,#63 - vshl.i64 $Z,#1 + veor $IN,$Xl @ inp^=Xi +.Lgmult_neon: +___ + &clmul64x64 ($Xl,$Hlo,"$IN#lo"); # H.lo·Xi.lo +$code.=<<___; + veor $IN#lo,$IN#lo,$IN#hi @ Karatsuba pre-processing +___ + &clmul64x64 ($Xm,$Hhl,"$IN#lo"); # (H.lo+H.hi)·(Xi.lo+Xi.hi) + &clmul64x64 ($Xh,$Hhi,"$IN#hi"); # H.hi·Xi.hi +$code.=<<___; + veor $Xm,$Xm,$Xl @ Karatsuba post-processing + veor $Xm,$Xm,$Xh + veor $Xl#hi,$Xl#hi,$Xm#lo + veor $Xh#lo,$Xh#lo,$Xm#hi @ Xh|Xl - 256-bit result + + @ equivalent of reduction_avx from ghash-x86_64.pl + vshl.i64 $t1,$Xl,#57 @ 1st phase + vshl.i64 $t2,$Xl,#62 + veor $t2,$t2,$t1 @ + vshl.i64 $t1,$Xl,#63 + veor $t2, $t2, $t1 @ + veor $Xl#hi,$Xl#hi,$t2#lo @ + veor $Xh#lo,$Xh#lo,$t2#hi + + vshr.u64 $t2,$Xl,#1 @ 2nd phase + veor $Xh,$Xh,$Xl + veor $Xl,$Xl,$t2 @ + vshr.u64 $t2,$t2,#6 + vshr.u64 $Xl,$Xl,#1 @ + veor $Xl,$Xl,$Xh @ + veor $Xl,$Xl,$t2 @ + subs $len,#16 - vorr $Z,`&Q("$Zo")` @ Z=Z:Zo<<1 - bne .Louter_neon + bne .Loop_neon #ifdef __ARMEL__ - vrev64.8 $Z,$Z + vrev64.8 $Xl,$Xl #endif sub $Xi,#16 - vst1.64 `&Dhi("$Z")`,[$Xi,:64]! @ write out Xi - vst1.64 `&Dlo("$Z")`,[$Xi,:64] + vst1.64 $Xl#hi,[$Xi,:64]! @ write out Xi + vst1.64 $Xl#lo,[$Xi,:64] - bx lr + ret @ bx lr .size gcm_ghash_neon,.-gcm_ghash_neon #endif ___ @@ -423,7 +481,13 @@ () .align 2 ___ -$code =~ s/\`([^\`]*)\`/eval $1/gem; -$code =~ s/\bbx\s+lr\b/.word\t0xe12fff1e/gm; # make it possible to compile with -march=armv4 -print $code; +foreach (split("\n",$code)) { + s/\`([^\`]*)\`/eval $1/geo; + + s/\bq([0-9]+)#(lo|hi)/sprintf "d%d",2*$1+($2 eq "hi")/geo or + s/\bret\b/bx lr/go or + s/\bbx\s+lr\b/.word\t0xe12fff1e/go; # make it possible to compile with -march=armv4 + + print $_,"\n"; +} close STDOUT; # enforce flush diff --git a/deps/openssl/openssl/crypto/modes/asm/ghash-s390x.pl b/deps/openssl/openssl/crypto/modes/asm/ghash-s390x.pl index 6a40d5d89c0cd2..39096b423ad805 100644 --- a/deps/openssl/openssl/crypto/modes/asm/ghash-s390x.pl +++ b/deps/openssl/openssl/crypto/modes/asm/ghash-s390x.pl @@ -186,13 +186,13 @@ sllg $rem1,$Zlo,3 xgr $Zlo,$tmp ngr $rem1,$x78 + sllg $tmp,$Zhi,60 j .Lghash_inner .align 16 .Lghash_inner: srlg $Zlo,$Zlo,4 - sllg $tmp,$Zhi,60 - xg $Zlo,8($nlo,$Htbl) srlg $Zhi,$Zhi,4 + xg $Zlo,8($nlo,$Htbl) llgc $xi,0($cnt,$Xi) xg $Zhi,0($nlo,$Htbl) sllg $nlo,$xi,4 @@ -213,9 +213,9 @@ sllg $rem1,$Zlo,3 xgr $Zlo,$tmp ngr $rem1,$x78 + sllg $tmp,$Zhi,60 brct $cnt,.Lghash_inner - sllg $tmp,$Zhi,60 srlg $Zlo,$Zlo,4 srlg $Zhi,$Zhi,4 xg $Zlo,8($nlo,$Htbl) diff --git a/deps/openssl/openssl/crypto/modes/asm/ghash-sparcv9.pl b/deps/openssl/openssl/crypto/modes/asm/ghash-sparcv9.pl index 70e7b044a3ec1f..0365e0f1ff429e 100644 --- a/deps/openssl/openssl/crypto/modes/asm/ghash-sparcv9.pl +++ b/deps/openssl/openssl/crypto/modes/asm/ghash-sparcv9.pl @@ -36,6 +36,15 @@ # references to input data and Z.hi updates to achieve 12 cycles # timing. To anchor to something else, sha1-sparcv9.pl spends 11.6 # cycles to process one byte on UltraSPARC pre-Tx CPU and ~24 on T1. +# +# October 2012 +# +# Add VIS3 lookup-table-free implementation using polynomial +# multiplication xmulx[hi] and extended addition addxc[cc] +# instructions. 4.52/7.63x improvement on T3/T4 or in absolute +# terms 7.90/2.14 cycles per byte. On T4 multi-process benchmark +# saturates at ~15.5x single-process result on 8-core processor, +# or ~20.5GBps per 2.85GHz socket. $bits=32; for (@ARGV) { $bits=64 if (/\-m64/ || /\-xarch\=v9/); } @@ -66,6 +75,10 @@ $inp="%i2"; $len="%i3"; +$code.=<<___ if ($bits==64); +.register %g2,#scratch +.register %g3,#scratch +___ $code.=<<___; .section ".text",#alloc,#execinstr @@ -321,10 +334,238 @@ restore .type gcm_gmult_4bit,#function .size gcm_gmult_4bit,(.-gcm_gmult_4bit) -.asciz "GHASH for SPARCv9, CRYPTOGAMS by " +___ + +{{{ +# Straightforward 128x128-bit multiplication using Karatsuba algorithm +# followed by pair of 64-bit reductions [with a shortcut in first one, +# which allowed to break dependency between reductions and remove one +# multiplication from critical path]. While it might be suboptimal +# with regard to sheer number of multiplications, other methods [such +# as aggregate reduction] would require more 64-bit registers, which +# we don't have in 32-bit application context. + +($Xip,$Htable,$inp,$len)=map("%i$_",(0..3)); + +($Hhl,$Hlo,$Hhi,$Xlo,$Xhi,$xE1,$sqr, $C0,$C1,$C2,$C3,$V)= + (map("%o$_",(0..5,7)),map("%g$_",(1..5))); + +($shl,$shr)=map("%l$_",(0..7)); + +# For details regarding "twisted H" see ghash-x86.pl. +$code.=<<___; +.globl gcm_init_vis3 +.align 32 +gcm_init_vis3: + save %sp,-$frame,%sp + + ldx [%i1+0],$Hhi + ldx [%i1+8],$Hlo + mov 0xE1,$Xhi + mov 1,$Xlo + sllx $Xhi,57,$Xhi + srax $Hhi,63,$C0 ! broadcast carry + addcc $Hlo,$Hlo,$Hlo ! H<<=1 + addxc $Hhi,$Hhi,$Hhi + and $C0,$Xlo,$Xlo + and $C0,$Xhi,$Xhi + xor $Xlo,$Hlo,$Hlo + xor $Xhi,$Hhi,$Hhi + stx $Hlo,[%i0+8] ! save twisted H + stx $Hhi,[%i0+0] + + sethi %hi(0xA0406080),$V + sethi %hi(0x20C0E000),%l0 + or $V,%lo(0xA0406080),$V + or %l0,%lo(0x20C0E000),%l0 + sllx $V,32,$V + or %l0,$V,$V ! (0xE0·i)&0xff=0xA040608020C0E000 + stx $V,[%i0+16] + + ret + restore +.type gcm_init_vis3,#function +.size gcm_init_vis3,.-gcm_init_vis3 + +.globl gcm_gmult_vis3 +.align 32 +gcm_gmult_vis3: + save %sp,-$frame,%sp + + ldx [$Xip+8],$Xlo ! load Xi + ldx [$Xip+0],$Xhi + ldx [$Htable+8],$Hlo ! load twisted H + ldx [$Htable+0],$Hhi + + mov 0xE1,%l7 + sllx %l7,57,$xE1 ! 57 is not a typo + ldx [$Htable+16],$V ! (0xE0·i)&0xff=0xA040608020C0E000 + + xor $Hhi,$Hlo,$Hhl ! Karatsuba pre-processing + xmulx $Xlo,$Hlo,$C0 + xor $Xlo,$Xhi,$C2 ! Karatsuba pre-processing + xmulx $C2,$Hhl,$C1 + xmulxhi $Xlo,$Hlo,$Xlo + xmulxhi $C2,$Hhl,$C2 + xmulxhi $Xhi,$Hhi,$C3 + xmulx $Xhi,$Hhi,$Xhi + + sll $C0,3,$sqr + srlx $V,$sqr,$sqr ! ·0xE0 [implicit &(7<<3)] + xor $C0,$sqr,$sqr + sllx $sqr,57,$sqr ! ($C0·0xE1)<<1<<56 [implicit &0x7f] + + xor $C0,$C1,$C1 ! Karatsuba post-processing + xor $Xlo,$C2,$C2 + xor $sqr,$Xlo,$Xlo ! real destination is $C1 + xor $C3,$C2,$C2 + xor $Xlo,$C1,$C1 + xor $Xhi,$C2,$C2 + xor $Xhi,$C1,$C1 + + xmulxhi $C0,$xE1,$Xlo ! ·0xE1<<1<<56 + xor $C0,$C2,$C2 + xmulx $C1,$xE1,$C0 + xor $C1,$C3,$C3 + xmulxhi $C1,$xE1,$C1 + + xor $Xlo,$C2,$C2 + xor $C0,$C2,$C2 + xor $C1,$C3,$C3 + + stx $C2,[$Xip+8] ! save Xi + stx $C3,[$Xip+0] + + ret + restore +.type gcm_gmult_vis3,#function +.size gcm_gmult_vis3,.-gcm_gmult_vis3 + +.globl gcm_ghash_vis3 +.align 32 +gcm_ghash_vis3: + save %sp,-$frame,%sp + + ldx [$Xip+8],$C2 ! load Xi + ldx [$Xip+0],$C3 + ldx [$Htable+8],$Hlo ! load twisted H + ldx [$Htable+0],$Hhi + + mov 0xE1,%l7 + sllx %l7,57,$xE1 ! 57 is not a typo + ldx [$Htable+16],$V ! (0xE0·i)&0xff=0xA040608020C0E000 + + and $inp,7,$shl + andn $inp,7,$inp + sll $shl,3,$shl + prefetch [$inp+63], 20 + sub %g0,$shl,$shr + + xor $Hhi,$Hlo,$Hhl ! Karatsuba pre-processing +.Loop: + ldx [$inp+8],$Xlo + brz,pt $shl,1f + ldx [$inp+0],$Xhi + + ldx [$inp+16],$C1 ! align data + srlx $Xlo,$shr,$C0 + sllx $Xlo,$shl,$Xlo + sllx $Xhi,$shl,$Xhi + srlx $C1,$shr,$C1 + or $C0,$Xhi,$Xhi + or $C1,$Xlo,$Xlo +1: + add $inp,16,$inp + sub $len,16,$len + xor $C2,$Xlo,$Xlo + xor $C3,$Xhi,$Xhi + prefetch [$inp+63], 20 + + xmulx $Xlo,$Hlo,$C0 + xor $Xlo,$Xhi,$C2 ! Karatsuba pre-processing + xmulx $C2,$Hhl,$C1 + xmulxhi $Xlo,$Hlo,$Xlo + xmulxhi $C2,$Hhl,$C2 + xmulxhi $Xhi,$Hhi,$C3 + xmulx $Xhi,$Hhi,$Xhi + + sll $C0,3,$sqr + srlx $V,$sqr,$sqr ! ·0xE0 [implicit &(7<<3)] + xor $C0,$sqr,$sqr + sllx $sqr,57,$sqr ! ($C0·0xE1)<<1<<56 [implicit &0x7f] + + xor $C0,$C1,$C1 ! Karatsuba post-processing + xor $Xlo,$C2,$C2 + xor $sqr,$Xlo,$Xlo ! real destination is $C1 + xor $C3,$C2,$C2 + xor $Xlo,$C1,$C1 + xor $Xhi,$C2,$C2 + xor $Xhi,$C1,$C1 + + xmulxhi $C0,$xE1,$Xlo ! ·0xE1<<1<<56 + xor $C0,$C2,$C2 + xmulx $C1,$xE1,$C0 + xor $C1,$C3,$C3 + xmulxhi $C1,$xE1,$C1 + + xor $Xlo,$C2,$C2 + xor $C0,$C2,$C2 + brnz,pt $len,.Loop + xor $C1,$C3,$C3 + + stx $C2,[$Xip+8] ! save Xi + stx $C3,[$Xip+0] + + ret + restore +.type gcm_ghash_vis3,#function +.size gcm_ghash_vis3,.-gcm_ghash_vis3 +___ +}}} +$code.=<<___; +.asciz "GHASH for SPARCv9/VIS3, CRYPTOGAMS by " .align 4 ___ -$code =~ s/\`([^\`]*)\`/eval $1/gem; -print $code; + +# Purpose of these subroutines is to explicitly encode VIS instructions, +# so that one can compile the module without having to specify VIS +# extentions on compiler command line, e.g. -xarch=v9 vs. -xarch=v9a. +# Idea is to reserve for option to produce "universal" binary and let +# programmer detect if current CPU is VIS capable at run-time. +sub unvis3 { +my ($mnemonic,$rs1,$rs2,$rd)=@_; +my %bias = ( "g" => 0, "o" => 8, "l" => 16, "i" => 24 ); +my ($ref,$opf); +my %visopf = ( "addxc" => 0x011, + "addxccc" => 0x013, + "xmulx" => 0x115, + "xmulxhi" => 0x116 ); + + $ref = "$mnemonic\t$rs1,$rs2,$rd"; + + if ($opf=$visopf{$mnemonic}) { + foreach ($rs1,$rs2,$rd) { + return $ref if (!/%([goli])([0-9])/); + $_=$bias{$1}+$2; + } + + return sprintf ".word\t0x%08x !%s", + 0x81b00000|$rd<<25|$rs1<<14|$opf<<5|$rs2, + $ref; + } else { + return $ref; + } +} + +foreach (split("\n",$code)) { + s/\`([^\`]*)\`/eval $1/ge; + + s/\b(xmulx[hi]*|addxc[c]{0,2})\s+(%[goli][0-7]),\s*(%[goli][0-7]),\s*(%[goli][0-7])/ + &unvis3($1,$2,$3,$4) + /ge; + + print $_,"\n"; +} + close STDOUT; diff --git a/deps/openssl/openssl/crypto/modes/asm/ghash-x86.pl b/deps/openssl/openssl/crypto/modes/asm/ghash-x86.pl index 83c727e07f9517..23a5527b30af3b 100644 --- a/deps/openssl/openssl/crypto/modes/asm/ghash-x86.pl +++ b/deps/openssl/openssl/crypto/modes/asm/ghash-x86.pl @@ -12,25 +12,27 @@ # The module implements "4-bit" GCM GHASH function and underlying # single multiplication operation in GF(2^128). "4-bit" means that it # uses 256 bytes per-key table [+64/128 bytes fixed table]. It has two -# code paths: vanilla x86 and vanilla MMX. Former will be executed on -# 486 and Pentium, latter on all others. MMX GHASH features so called +# code paths: vanilla x86 and vanilla SSE. Former will be executed on +# 486 and Pentium, latter on all others. SSE GHASH features so called # "528B" variant of "4-bit" method utilizing additional 256+16 bytes # of per-key storage [+512 bytes shared table]. Performance results # are for streamed GHASH subroutine and are expressed in cycles per # processed byte, less is better: # -# gcc 2.95.3(*) MMX assembler x86 assembler +# gcc 2.95.3(*) SSE assembler x86 assembler # # Pentium 105/111(**) - 50 # PIII 68 /75 12.2 24 # P4 125/125 17.8 84(***) # Opteron 66 /70 10.1 30 # Core2 54 /67 8.4 18 +# Atom 105/105 16.8 53 +# VIA Nano 69 /71 13.0 27 # # (*) gcc 3.4.x was observed to generate few percent slower code, # which is one of reasons why 2.95.3 results were chosen, # another reason is lack of 3.4.x results for older CPUs; -# comparison with MMX results is not completely fair, because C +# comparison with SSE results is not completely fair, because C # results are for vanilla "256B" implementation, while # assembler results are for "528B";-) # (**) second number is result for code compiled with -fPIC flag, @@ -40,8 +42,8 @@ # # To summarize, it's >2-5 times faster than gcc-generated code. To # anchor it to something else SHA1 assembler processes one byte in -# 11-13 cycles on contemporary x86 cores. As for choice of MMX in -# particular, see comment at the end of the file... +# ~7 cycles on contemporary x86 cores. As for choice of MMX/SSE +# in particular, see comment at the end of the file... # May 2010 # @@ -113,6 +115,16 @@ # similar manner resulted in almost 20% degradation on Sandy Bridge, # where original 64-bit code processes one byte in 1.95 cycles. +##################################################################### +# For reference, AMD Bulldozer processes one byte in 1.98 cycles in +# 32-bit mode and 1.89 in 64-bit. + +# February 2013 +# +# Overhaul: aggregate Karatsuba post-processing, improve ILP in +# reduction_alg9. Resulting performance is 1.96 cycles per byte on +# Westmere, 1.95 - on Sandy/Ivy Bridge, 1.76 - on Bulldozer. + $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; push(@INC,"${dir}","${dir}../../perlasm"); require "x86asm.pl"; @@ -822,17 +834,18 @@ () &static_label("bswap"); sub clmul64x64_T2 { # minimal "register" pressure -my ($Xhi,$Xi,$Hkey)=@_; +my ($Xhi,$Xi,$Hkey,$HK)=@_; &movdqa ($Xhi,$Xi); # &pshufd ($T1,$Xi,0b01001110); - &pshufd ($T2,$Hkey,0b01001110); + &pshufd ($T2,$Hkey,0b01001110) if (!defined($HK)); &pxor ($T1,$Xi); # - &pxor ($T2,$Hkey); + &pxor ($T2,$Hkey) if (!defined($HK)); + $HK=$T2 if (!defined($HK)); &pclmulqdq ($Xi,$Hkey,0x00); ####### &pclmulqdq ($Xhi,$Hkey,0x11); ####### - &pclmulqdq ($T1,$T2,0x00); ####### + &pclmulqdq ($T1,$HK,0x00); ####### &xorps ($T1,$Xi); # &xorps ($T1,$Xhi); # @@ -879,31 +892,32 @@ sub clmul64x64_T3 { # below. Algorithm 9 was therefore chosen for # further optimization... -sub reduction_alg9 { # 17/13 times faster than Intel version +sub reduction_alg9 { # 17/11 times faster than Intel version my ($Xhi,$Xi) = @_; # 1st phase - &movdqa ($T1,$Xi); # + &movdqa ($T2,$Xi); # + &movdqa ($T1,$Xi); + &psllq ($Xi,5); + &pxor ($T1,$Xi); # &psllq ($Xi,1); &pxor ($Xi,$T1); # - &psllq ($Xi,5); # - &pxor ($Xi,$T1); # &psllq ($Xi,57); # - &movdqa ($T2,$Xi); # + &movdqa ($T1,$Xi); # &pslldq ($Xi,8); - &psrldq ($T2,8); # - &pxor ($Xi,$T1); - &pxor ($Xhi,$T2); # + &psrldq ($T1,8); # + &pxor ($Xi,$T2); + &pxor ($Xhi,$T1); # # 2nd phase &movdqa ($T2,$Xi); + &psrlq ($Xi,1); + &pxor ($Xhi,$T2); # + &pxor ($T2,$Xi); &psrlq ($Xi,5); &pxor ($Xi,$T2); # &psrlq ($Xi,1); # - &pxor ($Xi,$T2); # - &pxor ($T2,$Xhi); - &psrlq ($Xi,1); # - &pxor ($Xi,$T2); # + &pxor ($Xi,$Xhi) # } &function_begin_B("gcm_init_clmul"); @@ -937,8 +951,14 @@ sub reduction_alg9 { # 17/13 times faster than Intel version &clmul64x64_T2 ($Xhi,$Xi,$Hkey); &reduction_alg9 ($Xhi,$Xi); + &pshufd ($T1,$Hkey,0b01001110); + &pshufd ($T2,$Xi,0b01001110); + &pxor ($T1,$Hkey); # Karatsuba pre-processing &movdqu (&QWP(0,$Htbl),$Hkey); # save H + &pxor ($T2,$Xi); # Karatsuba pre-processing &movdqu (&QWP(16,$Htbl),$Xi); # save H^2 + &palignr ($T2,$T1,8); # low part is H.lo^H.hi + &movdqu (&QWP(32,$Htbl),$T2); # save Karatsuba "salt" &ret (); &function_end_B("gcm_init_clmul"); @@ -956,8 +976,9 @@ sub reduction_alg9 { # 17/13 times faster than Intel version &movdqa ($T3,&QWP(0,$const)); &movups ($Hkey,&QWP(0,$Htbl)); &pshufb ($Xi,$T3); + &movups ($T2,&QWP(32,$Htbl)); - &clmul64x64_T2 ($Xhi,$Xi,$Hkey); + &clmul64x64_T2 ($Xhi,$Xi,$Hkey,$T2); &reduction_alg9 ($Xhi,$Xi); &pshufb ($Xi,$T3); @@ -994,79 +1015,109 @@ sub reduction_alg9 { # 17/13 times faster than Intel version &movdqu ($Xn,&QWP(16,$inp)); # Ii+1 &pshufb ($T1,$T3); &pshufb ($Xn,$T3); + &movdqu ($T3,&QWP(32,$Htbl)); &pxor ($Xi,$T1); # Ii+Xi - &clmul64x64_T2 ($Xhn,$Xn,$Hkey); # H*Ii+1 + &pshufd ($T1,$Xn,0b01001110); # H*Ii+1 + &movdqa ($Xhn,$Xn); + &pxor ($T1,$Xn); # + &lea ($inp,&DWP(32,$inp)); # i+=2 + + &pclmulqdq ($Xn,$Hkey,0x00); ####### + &pclmulqdq ($Xhn,$Hkey,0x11); ####### + &pclmulqdq ($T1,$T3,0x00); ####### &movups ($Hkey,&QWP(16,$Htbl)); # load H^2 + &nop (); - &lea ($inp,&DWP(32,$inp)); # i+=2 &sub ($len,0x20); &jbe (&label("even_tail")); + &jmp (&label("mod_loop")); -&set_label("mod_loop"); - &clmul64x64_T2 ($Xhi,$Xi,$Hkey); # H^2*(Ii+Xi) - &movdqu ($T1,&QWP(0,$inp)); # Ii - &movups ($Hkey,&QWP(0,$Htbl)); # load H +&set_label("mod_loop",32); + &pshufd ($T2,$Xi,0b01001110); # H^2*(Ii+Xi) + &movdqa ($Xhi,$Xi); + &pxor ($T2,$Xi); # + &nop (); - &pxor ($Xi,$Xn); # (H*Ii+1) + H^2*(Ii+Xi) - &pxor ($Xhi,$Xhn); + &pclmulqdq ($Xi,$Hkey,0x00); ####### + &pclmulqdq ($Xhi,$Hkey,0x11); ####### + &pclmulqdq ($T2,$T3,0x10); ####### + &movups ($Hkey,&QWP(0,$Htbl)); # load H - &movdqu ($Xn,&QWP(16,$inp)); # Ii+1 - &pshufb ($T1,$T3); - &pshufb ($Xn,$T3); + &xorps ($Xi,$Xn); # (H*Ii+1) + H^2*(Ii+Xi) + &movdqa ($T3,&QWP(0,$const)); + &xorps ($Xhi,$Xhn); + &movdqu ($Xhn,&QWP(0,$inp)); # Ii + &pxor ($T1,$Xi); # aggregated Karatsuba post-processing + &movdqu ($Xn,&QWP(16,$inp)); # Ii+1 + &pxor ($T1,$Xhi); # - &movdqa ($T3,$Xn); #&clmul64x64_TX ($Xhn,$Xn,$Hkey); H*Ii+1 - &movdqa ($Xhn,$Xn); - &pxor ($Xhi,$T1); # "Ii+Xi", consume early + &pshufb ($Xhn,$T3); + &pxor ($T2,$T1); # - &movdqa ($T1,$Xi); #&reduction_alg9($Xhi,$Xi); 1st phase + &movdqa ($T1,$T2); # + &psrldq ($T2,8); + &pslldq ($T1,8); # + &pxor ($Xhi,$T2); + &pxor ($Xi,$T1); # + &pshufb ($Xn,$T3); + &pxor ($Xhi,$Xhn); # "Ii+Xi", consume early + + &movdqa ($Xhn,$Xn); #&clmul64x64_TX ($Xhn,$Xn,$Hkey); H*Ii+1 + &movdqa ($T2,$Xi); #&reduction_alg9($Xhi,$Xi); 1st phase + &movdqa ($T1,$Xi); + &psllq ($Xi,5); + &pxor ($T1,$Xi); # &psllq ($Xi,1); &pxor ($Xi,$T1); # - &psllq ($Xi,5); # - &pxor ($Xi,$T1); # &pclmulqdq ($Xn,$Hkey,0x00); ####### + &movups ($T3,&QWP(32,$Htbl)); &psllq ($Xi,57); # - &movdqa ($T2,$Xi); # + &movdqa ($T1,$Xi); # &pslldq ($Xi,8); - &psrldq ($T2,8); # - &pxor ($Xi,$T1); - &pshufd ($T1,$T3,0b01001110); + &psrldq ($T1,8); # + &pxor ($Xi,$T2); + &pxor ($Xhi,$T1); # + &pshufd ($T1,$Xhn,0b01001110); + &movdqa ($T2,$Xi); # 2nd phase + &psrlq ($Xi,1); + &pxor ($T1,$Xhn); &pxor ($Xhi,$T2); # - &pxor ($T1,$T3); - &pshufd ($T3,$Hkey,0b01001110); - &pxor ($T3,$Hkey); # - &pclmulqdq ($Xhn,$Hkey,0x11); ####### - &movdqa ($T2,$Xi); # 2nd phase + &movups ($Hkey,&QWP(16,$Htbl)); # load H^2 + &pxor ($T2,$Xi); &psrlq ($Xi,5); &pxor ($Xi,$T2); # &psrlq ($Xi,1); # - &pxor ($Xi,$T2); # - &pxor ($T2,$Xhi); - &psrlq ($Xi,1); # - &pxor ($Xi,$T2); # - + &pxor ($Xi,$Xhi) # &pclmulqdq ($T1,$T3,0x00); ####### - &movups ($Hkey,&QWP(16,$Htbl)); # load H^2 - &xorps ($T1,$Xn); # - &xorps ($T1,$Xhn); # - - &movdqa ($T3,$T1); # - &psrldq ($T1,8); - &pslldq ($T3,8); # - &pxor ($Xhn,$T1); - &pxor ($Xn,$T3); # - &movdqa ($T3,&QWP(0,$const)); &lea ($inp,&DWP(32,$inp)); &sub ($len,0x20); &ja (&label("mod_loop")); &set_label("even_tail"); - &clmul64x64_T2 ($Xhi,$Xi,$Hkey); # H^2*(Ii+Xi) + &pshufd ($T2,$Xi,0b01001110); # H^2*(Ii+Xi) + &movdqa ($Xhi,$Xi); + &pxor ($T2,$Xi); # - &pxor ($Xi,$Xn); # (H*Ii+1) + H^2*(Ii+Xi) - &pxor ($Xhi,$Xhn); + &pclmulqdq ($Xi,$Hkey,0x00); ####### + &pclmulqdq ($Xhi,$Hkey,0x11); ####### + &pclmulqdq ($T2,$T3,0x10); ####### + &movdqa ($T3,&QWP(0,$const)); + + &xorps ($Xi,$Xn); # (H*Ii+1) + H^2*(Ii+Xi) + &xorps ($Xhi,$Xhn); + &pxor ($T1,$Xi); # aggregated Karatsuba post-processing + &pxor ($T1,$Xhi); # + + &pxor ($T2,$T1); # + + &movdqa ($T1,$T2); # + &psrldq ($T2,8); + &pslldq ($T1,8); # + &pxor ($Xhi,$T2); + &pxor ($Xi,$T1); # &reduction_alg9 ($Xhi,$Xi); @@ -1273,13 +1324,6 @@ sub reduction_alg5 { # 19/16 times faster than Intel version &set_label("bswap",64); &data_byte(15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0); &data_byte(1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xc2); # 0x1c2_polynomial -}} # $sse2 - -&set_label("rem_4bit",64); - &data_word(0,0x0000<<$S,0,0x1C20<<$S,0,0x3840<<$S,0,0x2460<<$S); - &data_word(0,0x7080<<$S,0,0x6CA0<<$S,0,0x48C0<<$S,0,0x54E0<<$S); - &data_word(0,0xE100<<$S,0,0xFD20<<$S,0,0xD940<<$S,0,0xC560<<$S); - &data_word(0,0x9180<<$S,0,0x8DA0<<$S,0,0xA9C0<<$S,0,0xB5E0<<$S); &set_label("rem_8bit",64); &data_short(0x0000,0x01C2,0x0384,0x0246,0x0708,0x06CA,0x048C,0x054E); &data_short(0x0E10,0x0FD2,0x0D94,0x0C56,0x0918,0x08DA,0x0A9C,0x0B5E); @@ -1313,6 +1357,13 @@ sub reduction_alg5 { # 19/16 times faster than Intel version &data_short(0xA7D0,0xA612,0xA454,0xA596,0xA0D8,0xA11A,0xA35C,0xA29E); &data_short(0xB5E0,0xB422,0xB664,0xB7A6,0xB2E8,0xB32A,0xB16C,0xB0AE); &data_short(0xBBF0,0xBA32,0xB874,0xB9B6,0xBCF8,0xBD3A,0xBF7C,0xBEBE); +}} # $sse2 + +&set_label("rem_4bit",64); + &data_word(0,0x0000<<$S,0,0x1C20<<$S,0,0x3840<<$S,0,0x2460<<$S); + &data_word(0,0x7080<<$S,0,0x6CA0<<$S,0,0x48C0<<$S,0,0x54E0<<$S); + &data_word(0,0xE100<<$S,0,0xFD20<<$S,0,0xD940<<$S,0,0xC560<<$S); + &data_word(0,0x9180<<$S,0,0x8DA0<<$S,0,0xA9C0<<$S,0,0xB5E0<<$S); }}} # !$x86only &asciz("GHASH for x86, CRYPTOGAMS by "); diff --git a/deps/openssl/openssl/crypto/modes/asm/ghash-x86_64.pl b/deps/openssl/openssl/crypto/modes/asm/ghash-x86_64.pl index 38d779edbcfde5..6e656ca13b8029 100644 --- a/deps/openssl/openssl/crypto/modes/asm/ghash-x86_64.pl +++ b/deps/openssl/openssl/crypto/modes/asm/ghash-x86_64.pl @@ -22,6 +22,8 @@ # P4 28.6 14.0 +100% # Opteron 19.3 7.7 +150% # Core2 17.8 8.1(**) +120% +# Atom 31.6 16.8 +88% +# VIA Nano 21.8 10.1 +115% # # (*) comparison is not completely fair, because C results are # for vanilla "256B" implementation, while assembler results @@ -39,6 +41,44 @@ # providing access to a Westmere-based system on behalf of Intel # Open Source Technology Centre. +# December 2012 +# +# Overhaul: aggregate Karatsuba post-processing, improve ILP in +# reduction_alg9, increase reduction aggregate factor to 4x. As for +# the latter. ghash-x86.pl discusses that it makes lesser sense to +# increase aggregate factor. Then why increase here? Critical path +# consists of 3 independent pclmulqdq instructions, Karatsuba post- +# processing and reduction. "On top" of this we lay down aggregated +# multiplication operations, triplets of independent pclmulqdq's. As +# issue rate for pclmulqdq is limited, it makes lesser sense to +# aggregate more multiplications than it takes to perform remaining +# non-multiplication operations. 2x is near-optimal coefficient for +# contemporary Intel CPUs (therefore modest improvement coefficient), +# but not for Bulldozer. Latter is because logical SIMD operations +# are twice as slow in comparison to Intel, so that critical path is +# longer. A CPU with higher pclmulqdq issue rate would also benefit +# from higher aggregate factor... +# +# Westmere 1.78(+13%) +# Sandy Bridge 1.80(+8%) +# Ivy Bridge 1.80(+7%) +# Haswell 0.55(+93%) (if system doesn't support AVX) +# Broadwell 0.45(+110%)(if system doesn't support AVX) +# Bulldozer 1.49(+27%) +# Silvermont 2.88(+13%) + +# March 2013 +# +# ... 8x aggregate factor AVX code path is using reduction algorithm +# suggested by Shay Gueron[1]. Even though contemporary AVX-capable +# CPUs such as Sandy and Ivy Bridge can execute it, the code performs +# sub-optimally in comparison to above mentioned version. But thanks +# to Ilya Albrekht and Max Locktyukhin of Intel Corp. we knew that +# it performs in 0.41 cycles per byte on Haswell processor, and in +# 0.29 on Broadwell. +# +# [1] http://rt.openssl.org/Ticket/Display.html?id=2900&user=guest&pass=guest + $flavour = shift; $output = shift; if ($flavour =~ /\./) { $output = $flavour; undef $flavour; } @@ -50,9 +90,30 @@ ( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or die "can't locate x86_64-xlate.pl"; +if (`$ENV{CC} -Wa,-v -c -o /dev/null -x assembler /dev/null 2>&1` + =~ /GNU assembler version ([2-9]\.[0-9]+)/) { + $avx = ($1>=2.19) + ($1>=2.22); +} + +if (!$avx && $win64 && ($flavour =~ /nasm/ || $ENV{ASM} =~ /nasm/) && + `nasm -v 2>&1` =~ /NASM version ([2-9]\.[0-9]+)/) { + $avx = ($1>=2.09) + ($1>=2.10); +} + +if (!$avx && $win64 && ($flavour =~ /masm/ || $ENV{ASM} =~ /ml64/) && + `ml64 2>&1` =~ /Version ([0-9]+)\./) { + $avx = ($1>=10) + ($1>=11); +} + +if (!$avx && `$ENV{CC} -v 2>&1` =~ /(^clang version|based on LLVM) ([3-9]\.[0-9]+)/) { + $avx = ($2>=3.0) + ($2>3.0); +} + open OUT,"| \"$^X\" $xlate $flavour $output"; *STDOUT=*OUT; +$do4xaggr=1; + # common register layout $nlo="%rax"; $nhi="%rbx"; @@ -160,6 +221,7 @@ () $code=<<___; .text +.extern OPENSSL_ia32cap_P .globl gcm_gmult_4bit .type gcm_gmult_4bit,\@function,2 @@ -352,19 +414,27 @@ () ($T1,$T2,$T3)=("%xmm3","%xmm4","%xmm5"); sub clmul64x64_T2 { # minimal register pressure -my ($Xhi,$Xi,$Hkey,$modulo)=@_; +my ($Xhi,$Xi,$Hkey,$HK)=@_; -$code.=<<___ if (!defined($modulo)); +if (!defined($HK)) { $HK = $T2; +$code.=<<___; movdqa $Xi,$Xhi # pshufd \$0b01001110,$Xi,$T1 pshufd \$0b01001110,$Hkey,$T2 pxor $Xi,$T1 # pxor $Hkey,$T2 ___ +} else { +$code.=<<___; + movdqa $Xi,$Xhi # + pshufd \$0b01001110,$Xi,$T1 + pxor $Xi,$T1 # +___ +} $code.=<<___; pclmulqdq \$0x00,$Hkey,$Xi ####### pclmulqdq \$0x11,$Hkey,$Xhi ####### - pclmulqdq \$0x00,$T2,$T1 ####### + pclmulqdq \$0x00,$HK,$T1 ####### pxor $Xi,$T1 # pxor $Xhi,$T1 # @@ -376,42 +446,53 @@ sub clmul64x64_T2 { # minimal register pressure ___ } -sub reduction_alg9 { # 17/13 times faster than Intel version +sub reduction_alg9 { # 17/11 times faster than Intel version my ($Xhi,$Xi) = @_; $code.=<<___; # 1st phase - movdqa $Xi,$T1 # + movdqa $Xi,$T2 # + movdqa $Xi,$T1 + psllq \$5,$Xi + pxor $Xi,$T1 # psllq \$1,$Xi pxor $T1,$Xi # - psllq \$5,$Xi # - pxor $T1,$Xi # psllq \$57,$Xi # - movdqa $Xi,$T2 # + movdqa $Xi,$T1 # pslldq \$8,$Xi - psrldq \$8,$T2 # - pxor $T1,$Xi - pxor $T2,$Xhi # + psrldq \$8,$T1 # + pxor $T2,$Xi + pxor $T1,$Xhi # # 2nd phase movdqa $Xi,$T2 + psrlq \$1,$Xi + pxor $T2,$Xhi # + pxor $Xi,$T2 psrlq \$5,$Xi pxor $T2,$Xi # psrlq \$1,$Xi # - pxor $T2,$Xi # - pxor $Xhi,$T2 - psrlq \$1,$Xi # - pxor $T2,$Xi # + pxor $Xhi,$Xi # ___ } { my ($Htbl,$Xip)=@_4args; + my $HK="%xmm6"; $code.=<<___; .globl gcm_init_clmul .type gcm_init_clmul,\@abi-omnipotent .align 16 gcm_init_clmul: +.L_init_clmul: +___ +$code.=<<___ if ($win64); +.LSEH_begin_gcm_init_clmul: + # I can't trust assembler to use specific encoding:-( + .byte 0x48,0x83,0xec,0x18 #sub $0x18,%rsp + .byte 0x0f,0x29,0x34,0x24 #movaps %xmm6,(%rsp) +___ +$code.=<<___; movdqu ($Xip),$Hkey pshufd \$0b01001110,$Hkey,$Hkey # dword swap @@ -430,13 +511,47 @@ sub reduction_alg9 { # 17/13 times faster than Intel version pxor $T3,$Hkey # if(carry) H^=0x1c2_polynomial # calculate H^2 + pshufd \$0b01001110,$Hkey,$HK movdqa $Hkey,$Xi + pxor $Hkey,$HK ___ - &clmul64x64_T2 ($Xhi,$Xi,$Hkey); + &clmul64x64_T2 ($Xhi,$Xi,$Hkey,$HK); &reduction_alg9 ($Xhi,$Xi); $code.=<<___; - movdqu $Hkey,($Htbl) # save H - movdqu $Xi,16($Htbl) # save H^2 + pshufd \$0b01001110,$Hkey,$T1 + pshufd \$0b01001110,$Xi,$T2 + pxor $Hkey,$T1 # Karatsuba pre-processing + movdqu $Hkey,0x00($Htbl) # save H + pxor $Xi,$T2 # Karatsuba pre-processing + movdqu $Xi,0x10($Htbl) # save H^2 + palignr \$8,$T1,$T2 # low part is H.lo^H.hi... + movdqu $T2,0x20($Htbl) # save Karatsuba "salt" +___ +if ($do4xaggr) { + &clmul64x64_T2 ($Xhi,$Xi,$Hkey,$HK); # H^3 + &reduction_alg9 ($Xhi,$Xi); +$code.=<<___; + movdqa $Xi,$T3 +___ + &clmul64x64_T2 ($Xhi,$Xi,$Hkey,$HK); # H^4 + &reduction_alg9 ($Xhi,$Xi); +$code.=<<___; + pshufd \$0b01001110,$T3,$T1 + pshufd \$0b01001110,$Xi,$T2 + pxor $T3,$T1 # Karatsuba pre-processing + movdqu $T3,0x30($Htbl) # save H^3 + pxor $Xi,$T2 # Karatsuba pre-processing + movdqu $Xi,0x40($Htbl) # save H^4 + palignr \$8,$T1,$T2 # low part is H^3.lo^H^3.hi... + movdqu $T2,0x50($Htbl) # save Karatsuba "salt" +___ +} +$code.=<<___ if ($win64); + movaps (%rsp),%xmm6 + lea 0x18(%rsp),%rsp +.LSEH_end_gcm_init_clmul: +___ +$code.=<<___; ret .size gcm_init_clmul,.-gcm_init_clmul ___ @@ -449,13 +564,38 @@ sub reduction_alg9 { # 17/13 times faster than Intel version .type gcm_gmult_clmul,\@abi-omnipotent .align 16 gcm_gmult_clmul: +.L_gmult_clmul: movdqu ($Xip),$Xi movdqa .Lbswap_mask(%rip),$T3 movdqu ($Htbl),$Hkey + movdqu 0x20($Htbl),$T2 pshufb $T3,$Xi ___ - &clmul64x64_T2 ($Xhi,$Xi,$Hkey); - &reduction_alg9 ($Xhi,$Xi); + &clmul64x64_T2 ($Xhi,$Xi,$Hkey,$T2); +$code.=<<___ if (0 || (&reduction_alg9($Xhi,$Xi)&&0)); + # experimental alternative. special thing about is that there + # no dependency between the two multiplications... + mov \$`0xE1<<1`,%eax + mov \$0xA040608020C0E000,%r10 # ((7..0)·0xE0)&0xff + mov \$0x07,%r11d + movq %rax,$T1 + movq %r10,$T2 + movq %r11,$T3 # borrow $T3 + pand $Xi,$T3 + pshufb $T3,$T2 # ($Xi&7)·0xE0 + movq %rax,$T3 + pclmulqdq \$0x00,$Xi,$T1 # ·(0xE1<<1) + pxor $Xi,$T2 + pslldq \$15,$T2 + paddd $T2,$T2 # <<(64+56+1) + pxor $T2,$Xi + pclmulqdq \$0x01,$T3,$Xi + movdqa .Lbswap_mask(%rip),$T3 # reload $T3 + psrldq \$1,$T1 + pxor $T1,$Xhi + pslldq \$7,$Xi + pxor $Xhi,$Xi +___ $code.=<<___; pshufb $T3,$Xi movdqu $Xi,($Xip) @@ -465,129 +605,327 @@ sub reduction_alg9 { # 17/13 times faster than Intel version } { my ($Xip,$Htbl,$inp,$len)=@_4args; - my $Xn="%xmm6"; - my $Xhn="%xmm7"; - my $Hkey2="%xmm8"; - my $T1n="%xmm9"; - my $T2n="%xmm10"; + my ($Xln,$Xmn,$Xhn,$Hkey2,$HK) = map("%xmm$_",(3..7)); + my ($T1,$T2,$T3)=map("%xmm$_",(8..10)); $code.=<<___; .globl gcm_ghash_clmul .type gcm_ghash_clmul,\@abi-omnipotent -.align 16 +.align 32 gcm_ghash_clmul: +.L_ghash_clmul: ___ $code.=<<___ if ($win64); + lea -0x88(%rsp),%rax .LSEH_begin_gcm_ghash_clmul: # I can't trust assembler to use specific encoding:-( - .byte 0x48,0x83,0xec,0x58 #sub \$0x58,%rsp - .byte 0x0f,0x29,0x34,0x24 #movaps %xmm6,(%rsp) - .byte 0x0f,0x29,0x7c,0x24,0x10 #movdqa %xmm7,0x10(%rsp) - .byte 0x44,0x0f,0x29,0x44,0x24,0x20 #movaps %xmm8,0x20(%rsp) - .byte 0x44,0x0f,0x29,0x4c,0x24,0x30 #movaps %xmm9,0x30(%rsp) - .byte 0x44,0x0f,0x29,0x54,0x24,0x40 #movaps %xmm10,0x40(%rsp) + .byte 0x48,0x8d,0x60,0xe0 #lea -0x20(%rax),%rsp + .byte 0x0f,0x29,0x70,0xe0 #movaps %xmm6,-0x20(%rax) + .byte 0x0f,0x29,0x78,0xf0 #movaps %xmm7,-0x10(%rax) + .byte 0x44,0x0f,0x29,0x00 #movaps %xmm8,0(%rax) + .byte 0x44,0x0f,0x29,0x48,0x10 #movaps %xmm9,0x10(%rax) + .byte 0x44,0x0f,0x29,0x50,0x20 #movaps %xmm10,0x20(%rax) + .byte 0x44,0x0f,0x29,0x58,0x30 #movaps %xmm11,0x30(%rax) + .byte 0x44,0x0f,0x29,0x60,0x40 #movaps %xmm12,0x40(%rax) + .byte 0x44,0x0f,0x29,0x68,0x50 #movaps %xmm13,0x50(%rax) + .byte 0x44,0x0f,0x29,0x70,0x60 #movaps %xmm14,0x60(%rax) + .byte 0x44,0x0f,0x29,0x78,0x70 #movaps %xmm15,0x70(%rax) ___ $code.=<<___; movdqa .Lbswap_mask(%rip),$T3 movdqu ($Xip),$Xi movdqu ($Htbl),$Hkey + movdqu 0x20($Htbl),$HK pshufb $T3,$Xi sub \$0x10,$len jz .Lodd_tail - movdqu 16($Htbl),$Hkey2 + movdqu 0x10($Htbl),$Hkey2 +___ +if ($do4xaggr) { +my ($Xl,$Xm,$Xh,$Hkey3,$Hkey4)=map("%xmm$_",(11..15)); + +$code.=<<___; + mov OPENSSL_ia32cap_P+4(%rip),%eax + cmp \$0x30,$len + jb .Lskip4x + + and \$`1<<26|1<<22`,%eax # isolate MOVBE+XSAVE + cmp \$`1<<22`,%eax # check for MOVBE without XSAVE + je .Lskip4x + + sub \$0x30,$len + mov \$0xA040608020C0E000,%rax # ((7..0)·0xE0)&0xff + movdqu 0x30($Htbl),$Hkey3 + movdqu 0x40($Htbl),$Hkey4 + + ####### + # Xi+4 =[(H*Ii+3) + (H^2*Ii+2) + (H^3*Ii+1) + H^4*(Ii+Xi)] mod P + # + movdqu 0x30($inp),$Xln + movdqu 0x20($inp),$Xl + pshufb $T3,$Xln + pshufb $T3,$Xl + movdqa $Xln,$Xhn + pshufd \$0b01001110,$Xln,$Xmn + pxor $Xln,$Xmn + pclmulqdq \$0x00,$Hkey,$Xln + pclmulqdq \$0x11,$Hkey,$Xhn + pclmulqdq \$0x00,$HK,$Xmn + + movdqa $Xl,$Xh + pshufd \$0b01001110,$Xl,$Xm + pxor $Xl,$Xm + pclmulqdq \$0x00,$Hkey2,$Xl + pclmulqdq \$0x11,$Hkey2,$Xh + pclmulqdq \$0x10,$HK,$Xm + xorps $Xl,$Xln + xorps $Xh,$Xhn + movups 0x50($Htbl),$HK + xorps $Xm,$Xmn + + movdqu 0x10($inp),$Xl + movdqu 0($inp),$T1 + pshufb $T3,$Xl + pshufb $T3,$T1 + movdqa $Xl,$Xh + pshufd \$0b01001110,$Xl,$Xm + pxor $T1,$Xi + pxor $Xl,$Xm + pclmulqdq \$0x00,$Hkey3,$Xl + movdqa $Xi,$Xhi + pshufd \$0b01001110,$Xi,$T1 + pxor $Xi,$T1 + pclmulqdq \$0x11,$Hkey3,$Xh + pclmulqdq \$0x00,$HK,$Xm + xorps $Xl,$Xln + xorps $Xh,$Xhn + + lea 0x40($inp),$inp + sub \$0x40,$len + jc .Ltail4x + + jmp .Lmod4_loop +.align 32 +.Lmod4_loop: + pclmulqdq \$0x00,$Hkey4,$Xi + xorps $Xm,$Xmn + movdqu 0x30($inp),$Xl + pshufb $T3,$Xl + pclmulqdq \$0x11,$Hkey4,$Xhi + xorps $Xln,$Xi + movdqu 0x20($inp),$Xln + movdqa $Xl,$Xh + pclmulqdq \$0x10,$HK,$T1 + pshufd \$0b01001110,$Xl,$Xm + xorps $Xhn,$Xhi + pxor $Xl,$Xm + pshufb $T3,$Xln + movups 0x20($Htbl),$HK + xorps $Xmn,$T1 + pclmulqdq \$0x00,$Hkey,$Xl + pshufd \$0b01001110,$Xln,$Xmn + + pxor $Xi,$T1 # aggregated Karatsuba post-processing + movdqa $Xln,$Xhn + pxor $Xhi,$T1 # + pxor $Xln,$Xmn + movdqa $T1,$T2 # + pclmulqdq \$0x11,$Hkey,$Xh + pslldq \$8,$T1 + psrldq \$8,$T2 # + pxor $T1,$Xi + movdqa .L7_mask(%rip),$T1 + pxor $T2,$Xhi # + movq %rax,$T2 + + pand $Xi,$T1 # 1st phase + pshufb $T1,$T2 # + pxor $Xi,$T2 # + pclmulqdq \$0x00,$HK,$Xm + psllq \$57,$T2 # + movdqa $T2,$T1 # + pslldq \$8,$T2 + pclmulqdq \$0x00,$Hkey2,$Xln + psrldq \$8,$T1 # + pxor $T2,$Xi + pxor $T1,$Xhi # + movdqu 0($inp),$T1 + + movdqa $Xi,$T2 # 2nd phase + psrlq \$1,$Xi + pclmulqdq \$0x11,$Hkey2,$Xhn + xorps $Xl,$Xln + movdqu 0x10($inp),$Xl + pshufb $T3,$Xl + pclmulqdq \$0x10,$HK,$Xmn + xorps $Xh,$Xhn + movups 0x50($Htbl),$HK + pshufb $T3,$T1 + pxor $T2,$Xhi # + pxor $Xi,$T2 + psrlq \$5,$Xi + + movdqa $Xl,$Xh + pxor $Xm,$Xmn + pshufd \$0b01001110,$Xl,$Xm + pxor $T2,$Xi # + pxor $T1,$Xhi + pxor $Xl,$Xm + pclmulqdq \$0x00,$Hkey3,$Xl + psrlq \$1,$Xi # + pxor $Xhi,$Xi # + movdqa $Xi,$Xhi + pclmulqdq \$0x11,$Hkey3,$Xh + xorps $Xl,$Xln + pshufd \$0b01001110,$Xi,$T1 + pxor $Xi,$T1 + + pclmulqdq \$0x00,$HK,$Xm + xorps $Xh,$Xhn + + lea 0x40($inp),$inp + sub \$0x40,$len + jnc .Lmod4_loop + +.Ltail4x: + pclmulqdq \$0x00,$Hkey4,$Xi + pclmulqdq \$0x11,$Hkey4,$Xhi + pclmulqdq \$0x10,$HK,$T1 + xorps $Xm,$Xmn + xorps $Xln,$Xi + xorps $Xhn,$Xhi + pxor $Xi,$Xhi # aggregated Karatsuba post-processing + pxor $Xmn,$T1 + + pxor $Xhi,$T1 # + pxor $Xi,$Xhi + + movdqa $T1,$T2 # + psrldq \$8,$T1 + pslldq \$8,$T2 # + pxor $T1,$Xhi + pxor $T2,$Xi # +___ + &reduction_alg9($Xhi,$Xi); +$code.=<<___; + add \$0x40,$len + jz .Ldone + movdqu 0x20($Htbl),$HK + sub \$0x10,$len + jz .Lodd_tail +.Lskip4x: +___ +} +$code.=<<___; ####### # Xi+2 =[H*(Ii+1 + Xi+1)] mod P = # [(H*Ii+1) + (H*Xi+1)] mod P = # [(H*Ii+1) + H^2*(Ii+Xi)] mod P # movdqu ($inp),$T1 # Ii - movdqu 16($inp),$Xn # Ii+1 + movdqu 16($inp),$Xln # Ii+1 pshufb $T3,$T1 - pshufb $T3,$Xn + pshufb $T3,$Xln pxor $T1,$Xi # Ii+Xi -___ - &clmul64x64_T2 ($Xhn,$Xn,$Hkey); # H*Ii+1 -$code.=<<___; - movdqa $Xi,$Xhi # - pshufd \$0b01001110,$Xi,$T1 - pshufd \$0b01001110,$Hkey2,$T2 - pxor $Xi,$T1 # - pxor $Hkey2,$T2 + + movdqa $Xln,$Xhn + pshufd \$0b01001110,$Xln,$Xmn + pxor $Xln,$Xmn + pclmulqdq \$0x00,$Hkey,$Xln + pclmulqdq \$0x11,$Hkey,$Xhn + pclmulqdq \$0x00,$HK,$Xmn lea 32($inp),$inp # i+=2 + nop sub \$0x20,$len jbe .Leven_tail + nop + jmp .Lmod_loop +.align 32 .Lmod_loop: -___ - &clmul64x64_T2 ($Xhi,$Xi,$Hkey2,1); # H^2*(Ii+Xi) -$code.=<<___; - movdqu ($inp),$T1 # Ii - pxor $Xn,$Xi # (H*Ii+1) + H^2*(Ii+Xi) - pxor $Xhn,$Xhi + movdqa $Xi,$Xhi + movdqa $Xmn,$T1 + pshufd \$0b01001110,$Xi,$Xmn # + pxor $Xi,$Xmn # - movdqu 16($inp),$Xn # Ii+1 - pshufb $T3,$T1 - pshufb $T3,$Xn + pclmulqdq \$0x00,$Hkey2,$Xi + pclmulqdq \$0x11,$Hkey2,$Xhi + pclmulqdq \$0x10,$HK,$Xmn - movdqa $Xn,$Xhn # - pshufd \$0b01001110,$Xn,$T1n - pshufd \$0b01001110,$Hkey,$T2n - pxor $Xn,$T1n # - pxor $Hkey,$T2n - pxor $T1,$Xhi # "Ii+Xi", consume early + pxor $Xln,$Xi # (H*Ii+1) + H^2*(Ii+Xi) + pxor $Xhn,$Xhi + movdqu ($inp),$T2 # Ii + pxor $Xi,$T1 # aggregated Karatsuba post-processing + pshufb $T3,$T2 + movdqu 16($inp),$Xln # Ii+1 + + pxor $Xhi,$T1 + pxor $T2,$Xhi # "Ii+Xi", consume early + pxor $T1,$Xmn + pshufb $T3,$Xln + movdqa $Xmn,$T1 # + psrldq \$8,$T1 + pslldq \$8,$Xmn # + pxor $T1,$Xhi + pxor $Xmn,$Xi # + + movdqa $Xln,$Xhn # - movdqa $Xi,$T1 # 1st phase + movdqa $Xi,$T2 # 1st phase + movdqa $Xi,$T1 + psllq \$5,$Xi + pxor $Xi,$T1 # + pclmulqdq \$0x00,$Hkey,$Xln ####### psllq \$1,$Xi pxor $T1,$Xi # - psllq \$5,$Xi # - pxor $T1,$Xi # - pclmulqdq \$0x00,$Hkey,$Xn ####### psllq \$57,$Xi # - movdqa $Xi,$T2 # + movdqa $Xi,$T1 # pslldq \$8,$Xi - psrldq \$8,$T2 # - pxor $T1,$Xi - pxor $T2,$Xhi # + psrldq \$8,$T1 # + pxor $T2,$Xi + pshufd \$0b01001110,$Xhn,$Xmn + pxor $T1,$Xhi # + pxor $Xhn,$Xmn # - pclmulqdq \$0x11,$Hkey,$Xhn ####### movdqa $Xi,$T2 # 2nd phase + psrlq \$1,$Xi + pclmulqdq \$0x11,$Hkey,$Xhn ####### + pxor $T2,$Xhi # + pxor $Xi,$T2 psrlq \$5,$Xi pxor $T2,$Xi # + lea 32($inp),$inp psrlq \$1,$Xi # - pxor $T2,$Xi # - pxor $Xhi,$T2 - psrlq \$1,$Xi # - pxor $T2,$Xi # + pclmulqdq \$0x00,$HK,$Xmn ####### + pxor $Xhi,$Xi # - pclmulqdq \$0x00,$T2n,$T1n ####### - movdqa $Xi,$Xhi # - pshufd \$0b01001110,$Xi,$T1 - pshufd \$0b01001110,$Hkey2,$T2 - pxor $Xi,$T1 # - pxor $Hkey2,$T2 - - pxor $Xn,$T1n # - pxor $Xhn,$T1n # - movdqa $T1n,$T2n # - psrldq \$8,$T1n - pslldq \$8,$T2n # - pxor $T1n,$Xhn - pxor $T2n,$Xn # - - lea 32($inp),$inp sub \$0x20,$len ja .Lmod_loop .Leven_tail: -___ - &clmul64x64_T2 ($Xhi,$Xi,$Hkey2,1); # H^2*(Ii+Xi) -$code.=<<___; - pxor $Xn,$Xi # (H*Ii+1) + H^2*(Ii+Xi) + movdqa $Xi,$Xhi + movdqa $Xmn,$T1 + pshufd \$0b01001110,$Xi,$Xmn # + pxor $Xi,$Xmn # + + pclmulqdq \$0x00,$Hkey2,$Xi + pclmulqdq \$0x11,$Hkey2,$Xhi + pclmulqdq \$0x10,$HK,$Xmn + + pxor $Xln,$Xi # (H*Ii+1) + H^2*(Ii+Xi) pxor $Xhn,$Xhi + pxor $Xi,$T1 + pxor $Xhi,$T1 + pxor $T1,$Xmn + movdqa $Xmn,$T1 # + psrldq \$8,$T1 + pslldq \$8,$Xmn # + pxor $T1,$Xhi + pxor $Xmn,$Xi # ___ &reduction_alg9 ($Xhi,$Xi); $code.=<<___; @@ -599,7 +937,7 @@ sub reduction_alg9 { # 17/13 times faster than Intel version pshufb $T3,$T1 pxor $T1,$Xi # Ii+Xi ___ - &clmul64x64_T2 ($Xhi,$Xi,$Hkey); # H*(Ii+Xi) + &clmul64x64_T2 ($Xhi,$Xi,$Hkey,$HK); # H*(Ii+Xi) &reduction_alg9 ($Xhi,$Xi); $code.=<<___; .Ldone: @@ -612,21 +950,607 @@ sub reduction_alg9 { # 17/13 times faster than Intel version movaps 0x20(%rsp),%xmm8 movaps 0x30(%rsp),%xmm9 movaps 0x40(%rsp),%xmm10 - add \$0x58,%rsp + movaps 0x50(%rsp),%xmm11 + movaps 0x60(%rsp),%xmm12 + movaps 0x70(%rsp),%xmm13 + movaps 0x80(%rsp),%xmm14 + movaps 0x90(%rsp),%xmm15 + lea 0xa8(%rsp),%rsp +.LSEH_end_gcm_ghash_clmul: ___ $code.=<<___; ret -.LSEH_end_gcm_ghash_clmul: .size gcm_ghash_clmul,.-gcm_ghash_clmul ___ } + +$code.=<<___; +.globl gcm_init_avx +.type gcm_init_avx,\@abi-omnipotent +.align 32 +gcm_init_avx: +___ +if ($avx) { +my ($Htbl,$Xip)=@_4args; +my $HK="%xmm6"; + +$code.=<<___ if ($win64); +.LSEH_begin_gcm_init_avx: + # I can't trust assembler to use specific encoding:-( + .byte 0x48,0x83,0xec,0x18 #sub $0x18,%rsp + .byte 0x0f,0x29,0x34,0x24 #movaps %xmm6,(%rsp) +___ +$code.=<<___; + vzeroupper + + vmovdqu ($Xip),$Hkey + vpshufd \$0b01001110,$Hkey,$Hkey # dword swap + + # <<1 twist + vpshufd \$0b11111111,$Hkey,$T2 # broadcast uppermost dword + vpsrlq \$63,$Hkey,$T1 + vpsllq \$1,$Hkey,$Hkey + vpxor $T3,$T3,$T3 # + vpcmpgtd $T2,$T3,$T3 # broadcast carry bit + vpslldq \$8,$T1,$T1 + vpor $T1,$Hkey,$Hkey # H<<=1 + + # magic reduction + vpand .L0x1c2_polynomial(%rip),$T3,$T3 + vpxor $T3,$Hkey,$Hkey # if(carry) H^=0x1c2_polynomial + + vpunpckhqdq $Hkey,$Hkey,$HK + vmovdqa $Hkey,$Xi + vpxor $Hkey,$HK,$HK + mov \$4,%r10 # up to H^8 + jmp .Linit_start_avx +___ + +sub clmul64x64_avx { +my ($Xhi,$Xi,$Hkey,$HK)=@_; + +if (!defined($HK)) { $HK = $T2; +$code.=<<___; + vpunpckhqdq $Xi,$Xi,$T1 + vpunpckhqdq $Hkey,$Hkey,$T2 + vpxor $Xi,$T1,$T1 # + vpxor $Hkey,$T2,$T2 +___ +} else { +$code.=<<___; + vpunpckhqdq $Xi,$Xi,$T1 + vpxor $Xi,$T1,$T1 # +___ +} +$code.=<<___; + vpclmulqdq \$0x11,$Hkey,$Xi,$Xhi ####### + vpclmulqdq \$0x00,$Hkey,$Xi,$Xi ####### + vpclmulqdq \$0x00,$HK,$T1,$T1 ####### + vpxor $Xi,$Xhi,$T2 # + vpxor $T2,$T1,$T1 # + + vpslldq \$8,$T1,$T2 # + vpsrldq \$8,$T1,$T1 + vpxor $T2,$Xi,$Xi # + vpxor $T1,$Xhi,$Xhi +___ +} + +sub reduction_avx { +my ($Xhi,$Xi) = @_; + +$code.=<<___; + vpsllq \$57,$Xi,$T1 # 1st phase + vpsllq \$62,$Xi,$T2 + vpxor $T1,$T2,$T2 # + vpsllq \$63,$Xi,$T1 + vpxor $T1,$T2,$T2 # + vpslldq \$8,$T2,$T1 # + vpsrldq \$8,$T2,$T2 + vpxor $T1,$Xi,$Xi # + vpxor $T2,$Xhi,$Xhi + + vpsrlq \$1,$Xi,$T2 # 2nd phase + vpxor $Xi,$Xhi,$Xhi + vpxor $T2,$Xi,$Xi # + vpsrlq \$5,$T2,$T2 + vpxor $T2,$Xi,$Xi # + vpsrlq \$1,$Xi,$Xi # + vpxor $Xhi,$Xi,$Xi # +___ +} +$code.=<<___; +.align 32 +.Linit_loop_avx: + vpalignr \$8,$T1,$T2,$T3 # low part is H.lo^H.hi... + vmovdqu $T3,-0x10($Htbl) # save Karatsuba "salt" +___ + &clmul64x64_avx ($Xhi,$Xi,$Hkey,$HK); # calculate H^3,5,7 + &reduction_avx ($Xhi,$Xi); +$code.=<<___; +.Linit_start_avx: + vmovdqa $Xi,$T3 +___ + &clmul64x64_avx ($Xhi,$Xi,$Hkey,$HK); # calculate H^2,4,6,8 + &reduction_avx ($Xhi,$Xi); +$code.=<<___; + vpshufd \$0b01001110,$T3,$T1 + vpshufd \$0b01001110,$Xi,$T2 + vpxor $T3,$T1,$T1 # Karatsuba pre-processing + vmovdqu $T3,0x00($Htbl) # save H^1,3,5,7 + vpxor $Xi,$T2,$T2 # Karatsuba pre-processing + vmovdqu $Xi,0x10($Htbl) # save H^2,4,6,8 + lea 0x30($Htbl),$Htbl + sub \$1,%r10 + jnz .Linit_loop_avx + + vpalignr \$8,$T2,$T1,$T3 # last "salt" is flipped + vmovdqu $T3,-0x10($Htbl) + + vzeroupper +___ +$code.=<<___ if ($win64); + movaps (%rsp),%xmm6 + lea 0x18(%rsp),%rsp +.LSEH_end_gcm_init_avx: +___ +$code.=<<___; + ret +.size gcm_init_avx,.-gcm_init_avx +___ +} else { +$code.=<<___; + jmp .L_init_clmul +.size gcm_init_avx,.-gcm_init_avx +___ +} + +$code.=<<___; +.globl gcm_gmult_avx +.type gcm_gmult_avx,\@abi-omnipotent +.align 32 +gcm_gmult_avx: + jmp .L_gmult_clmul +.size gcm_gmult_avx,.-gcm_gmult_avx +___ + +$code.=<<___; +.globl gcm_ghash_avx +.type gcm_ghash_avx,\@abi-omnipotent +.align 32 +gcm_ghash_avx: +___ +if ($avx) { +my ($Xip,$Htbl,$inp,$len)=@_4args; +my ($Xlo,$Xhi,$Xmi, + $Zlo,$Zhi,$Zmi, + $Hkey,$HK,$T1,$T2, + $Xi,$Xo,$Tred,$bswap,$Ii,$Ij) = map("%xmm$_",(0..15)); + +$code.=<<___ if ($win64); + lea -0x88(%rsp),%rax +.LSEH_begin_gcm_ghash_avx: + # I can't trust assembler to use specific encoding:-( + .byte 0x48,0x8d,0x60,0xe0 #lea -0x20(%rax),%rsp + .byte 0x0f,0x29,0x70,0xe0 #movaps %xmm6,-0x20(%rax) + .byte 0x0f,0x29,0x78,0xf0 #movaps %xmm7,-0x10(%rax) + .byte 0x44,0x0f,0x29,0x00 #movaps %xmm8,0(%rax) + .byte 0x44,0x0f,0x29,0x48,0x10 #movaps %xmm9,0x10(%rax) + .byte 0x44,0x0f,0x29,0x50,0x20 #movaps %xmm10,0x20(%rax) + .byte 0x44,0x0f,0x29,0x58,0x30 #movaps %xmm11,0x30(%rax) + .byte 0x44,0x0f,0x29,0x60,0x40 #movaps %xmm12,0x40(%rax) + .byte 0x44,0x0f,0x29,0x68,0x50 #movaps %xmm13,0x50(%rax) + .byte 0x44,0x0f,0x29,0x70,0x60 #movaps %xmm14,0x60(%rax) + .byte 0x44,0x0f,0x29,0x78,0x70 #movaps %xmm15,0x70(%rax) +___ +$code.=<<___; + vzeroupper + + vmovdqu ($Xip),$Xi # load $Xi + lea .L0x1c2_polynomial(%rip),%r10 + lea 0x40($Htbl),$Htbl # size optimization + vmovdqu .Lbswap_mask(%rip),$bswap + vpshufb $bswap,$Xi,$Xi + cmp \$0x80,$len + jb .Lshort_avx + sub \$0x80,$len + + vmovdqu 0x70($inp),$Ii # I[7] + vmovdqu 0x00-0x40($Htbl),$Hkey # $Hkey^1 + vpshufb $bswap,$Ii,$Ii + vmovdqu 0x20-0x40($Htbl),$HK + + vpunpckhqdq $Ii,$Ii,$T2 + vmovdqu 0x60($inp),$Ij # I[6] + vpclmulqdq \$0x00,$Hkey,$Ii,$Xlo + vpxor $Ii,$T2,$T2 + vpshufb $bswap,$Ij,$Ij + vpclmulqdq \$0x11,$Hkey,$Ii,$Xhi + vmovdqu 0x10-0x40($Htbl),$Hkey # $Hkey^2 + vpunpckhqdq $Ij,$Ij,$T1 + vmovdqu 0x50($inp),$Ii # I[5] + vpclmulqdq \$0x00,$HK,$T2,$Xmi + vpxor $Ij,$T1,$T1 + + vpshufb $bswap,$Ii,$Ii + vpclmulqdq \$0x00,$Hkey,$Ij,$Zlo + vpunpckhqdq $Ii,$Ii,$T2 + vpclmulqdq \$0x11,$Hkey,$Ij,$Zhi + vmovdqu 0x30-0x40($Htbl),$Hkey # $Hkey^3 + vpxor $Ii,$T2,$T2 + vmovdqu 0x40($inp),$Ij # I[4] + vpclmulqdq \$0x10,$HK,$T1,$Zmi + vmovdqu 0x50-0x40($Htbl),$HK + + vpshufb $bswap,$Ij,$Ij + vpxor $Xlo,$Zlo,$Zlo + vpclmulqdq \$0x00,$Hkey,$Ii,$Xlo + vpxor $Xhi,$Zhi,$Zhi + vpunpckhqdq $Ij,$Ij,$T1 + vpclmulqdq \$0x11,$Hkey,$Ii,$Xhi + vmovdqu 0x40-0x40($Htbl),$Hkey # $Hkey^4 + vpxor $Xmi,$Zmi,$Zmi + vpclmulqdq \$0x00,$HK,$T2,$Xmi + vpxor $Ij,$T1,$T1 + + vmovdqu 0x30($inp),$Ii # I[3] + vpxor $Zlo,$Xlo,$Xlo + vpclmulqdq \$0x00,$Hkey,$Ij,$Zlo + vpxor $Zhi,$Xhi,$Xhi + vpshufb $bswap,$Ii,$Ii + vpclmulqdq \$0x11,$Hkey,$Ij,$Zhi + vmovdqu 0x60-0x40($Htbl),$Hkey # $Hkey^5 + vpxor $Zmi,$Xmi,$Xmi + vpunpckhqdq $Ii,$Ii,$T2 + vpclmulqdq \$0x10,$HK,$T1,$Zmi + vmovdqu 0x80-0x40($Htbl),$HK + vpxor $Ii,$T2,$T2 + + vmovdqu 0x20($inp),$Ij # I[2] + vpxor $Xlo,$Zlo,$Zlo + vpclmulqdq \$0x00,$Hkey,$Ii,$Xlo + vpxor $Xhi,$Zhi,$Zhi + vpshufb $bswap,$Ij,$Ij + vpclmulqdq \$0x11,$Hkey,$Ii,$Xhi + vmovdqu 0x70-0x40($Htbl),$Hkey # $Hkey^6 + vpxor $Xmi,$Zmi,$Zmi + vpunpckhqdq $Ij,$Ij,$T1 + vpclmulqdq \$0x00,$HK,$T2,$Xmi + vpxor $Ij,$T1,$T1 + + vmovdqu 0x10($inp),$Ii # I[1] + vpxor $Zlo,$Xlo,$Xlo + vpclmulqdq \$0x00,$Hkey,$Ij,$Zlo + vpxor $Zhi,$Xhi,$Xhi + vpshufb $bswap,$Ii,$Ii + vpclmulqdq \$0x11,$Hkey,$Ij,$Zhi + vmovdqu 0x90-0x40($Htbl),$Hkey # $Hkey^7 + vpxor $Zmi,$Xmi,$Xmi + vpunpckhqdq $Ii,$Ii,$T2 + vpclmulqdq \$0x10,$HK,$T1,$Zmi + vmovdqu 0xb0-0x40($Htbl),$HK + vpxor $Ii,$T2,$T2 + + vmovdqu ($inp),$Ij # I[0] + vpxor $Xlo,$Zlo,$Zlo + vpclmulqdq \$0x00,$Hkey,$Ii,$Xlo + vpxor $Xhi,$Zhi,$Zhi + vpshufb $bswap,$Ij,$Ij + vpclmulqdq \$0x11,$Hkey,$Ii,$Xhi + vmovdqu 0xa0-0x40($Htbl),$Hkey # $Hkey^8 + vpxor $Xmi,$Zmi,$Zmi + vpclmulqdq \$0x10,$HK,$T2,$Xmi + + lea 0x80($inp),$inp + cmp \$0x80,$len + jb .Ltail_avx + + vpxor $Xi,$Ij,$Ij # accumulate $Xi + sub \$0x80,$len + jmp .Loop8x_avx + +.align 32 +.Loop8x_avx: + vpunpckhqdq $Ij,$Ij,$T1 + vmovdqu 0x70($inp),$Ii # I[7] + vpxor $Xlo,$Zlo,$Zlo + vpxor $Ij,$T1,$T1 + vpclmulqdq \$0x00,$Hkey,$Ij,$Xi + vpshufb $bswap,$Ii,$Ii + vpxor $Xhi,$Zhi,$Zhi + vpclmulqdq \$0x11,$Hkey,$Ij,$Xo + vmovdqu 0x00-0x40($Htbl),$Hkey # $Hkey^1 + vpunpckhqdq $Ii,$Ii,$T2 + vpxor $Xmi,$Zmi,$Zmi + vpclmulqdq \$0x00,$HK,$T1,$Tred + vmovdqu 0x20-0x40($Htbl),$HK + vpxor $Ii,$T2,$T2 + + vmovdqu 0x60($inp),$Ij # I[6] + vpclmulqdq \$0x00,$Hkey,$Ii,$Xlo + vpxor $Zlo,$Xi,$Xi # collect result + vpshufb $bswap,$Ij,$Ij + vpclmulqdq \$0x11,$Hkey,$Ii,$Xhi + vxorps $Zhi,$Xo,$Xo + vmovdqu 0x10-0x40($Htbl),$Hkey # $Hkey^2 + vpunpckhqdq $Ij,$Ij,$T1 + vpclmulqdq \$0x00,$HK, $T2,$Xmi + vpxor $Zmi,$Tred,$Tred + vxorps $Ij,$T1,$T1 + + vmovdqu 0x50($inp),$Ii # I[5] + vpxor $Xi,$Tred,$Tred # aggregated Karatsuba post-processing + vpclmulqdq \$0x00,$Hkey,$Ij,$Zlo + vpxor $Xo,$Tred,$Tred + vpslldq \$8,$Tred,$T2 + vpxor $Xlo,$Zlo,$Zlo + vpclmulqdq \$0x11,$Hkey,$Ij,$Zhi + vpsrldq \$8,$Tred,$Tred + vpxor $T2, $Xi, $Xi + vmovdqu 0x30-0x40($Htbl),$Hkey # $Hkey^3 + vpshufb $bswap,$Ii,$Ii + vxorps $Tred,$Xo, $Xo + vpxor $Xhi,$Zhi,$Zhi + vpunpckhqdq $Ii,$Ii,$T2 + vpclmulqdq \$0x10,$HK, $T1,$Zmi + vmovdqu 0x50-0x40($Htbl),$HK + vpxor $Ii,$T2,$T2 + vpxor $Xmi,$Zmi,$Zmi + + vmovdqu 0x40($inp),$Ij # I[4] + vpalignr \$8,$Xi,$Xi,$Tred # 1st phase + vpclmulqdq \$0x00,$Hkey,$Ii,$Xlo + vpshufb $bswap,$Ij,$Ij + vpxor $Zlo,$Xlo,$Xlo + vpclmulqdq \$0x11,$Hkey,$Ii,$Xhi + vmovdqu 0x40-0x40($Htbl),$Hkey # $Hkey^4 + vpunpckhqdq $Ij,$Ij,$T1 + vpxor $Zhi,$Xhi,$Xhi + vpclmulqdq \$0x00,$HK, $T2,$Xmi + vxorps $Ij,$T1,$T1 + vpxor $Zmi,$Xmi,$Xmi + + vmovdqu 0x30($inp),$Ii # I[3] + vpclmulqdq \$0x10,(%r10),$Xi,$Xi + vpclmulqdq \$0x00,$Hkey,$Ij,$Zlo + vpshufb $bswap,$Ii,$Ii + vpxor $Xlo,$Zlo,$Zlo + vpclmulqdq \$0x11,$Hkey,$Ij,$Zhi + vmovdqu 0x60-0x40($Htbl),$Hkey # $Hkey^5 + vpunpckhqdq $Ii,$Ii,$T2 + vpxor $Xhi,$Zhi,$Zhi + vpclmulqdq \$0x10,$HK, $T1,$Zmi + vmovdqu 0x80-0x40($Htbl),$HK + vpxor $Ii,$T2,$T2 + vpxor $Xmi,$Zmi,$Zmi + + vmovdqu 0x20($inp),$Ij # I[2] + vpclmulqdq \$0x00,$Hkey,$Ii,$Xlo + vpshufb $bswap,$Ij,$Ij + vpxor $Zlo,$Xlo,$Xlo + vpclmulqdq \$0x11,$Hkey,$Ii,$Xhi + vmovdqu 0x70-0x40($Htbl),$Hkey # $Hkey^6 + vpunpckhqdq $Ij,$Ij,$T1 + vpxor $Zhi,$Xhi,$Xhi + vpclmulqdq \$0x00,$HK, $T2,$Xmi + vpxor $Ij,$T1,$T1 + vpxor $Zmi,$Xmi,$Xmi + vxorps $Tred,$Xi,$Xi + + vmovdqu 0x10($inp),$Ii # I[1] + vpalignr \$8,$Xi,$Xi,$Tred # 2nd phase + vpclmulqdq \$0x00,$Hkey,$Ij,$Zlo + vpshufb $bswap,$Ii,$Ii + vpxor $Xlo,$Zlo,$Zlo + vpclmulqdq \$0x11,$Hkey,$Ij,$Zhi + vmovdqu 0x90-0x40($Htbl),$Hkey # $Hkey^7 + vpclmulqdq \$0x10,(%r10),$Xi,$Xi + vxorps $Xo,$Tred,$Tred + vpunpckhqdq $Ii,$Ii,$T2 + vpxor $Xhi,$Zhi,$Zhi + vpclmulqdq \$0x10,$HK, $T1,$Zmi + vmovdqu 0xb0-0x40($Htbl),$HK + vpxor $Ii,$T2,$T2 + vpxor $Xmi,$Zmi,$Zmi + + vmovdqu ($inp),$Ij # I[0] + vpclmulqdq \$0x00,$Hkey,$Ii,$Xlo + vpshufb $bswap,$Ij,$Ij + vpclmulqdq \$0x11,$Hkey,$Ii,$Xhi + vmovdqu 0xa0-0x40($Htbl),$Hkey # $Hkey^8 + vpxor $Tred,$Ij,$Ij + vpclmulqdq \$0x10,$HK, $T2,$Xmi + vpxor $Xi,$Ij,$Ij # accumulate $Xi + + lea 0x80($inp),$inp + sub \$0x80,$len + jnc .Loop8x_avx + + add \$0x80,$len + jmp .Ltail_no_xor_avx + +.align 32 +.Lshort_avx: + vmovdqu -0x10($inp,$len),$Ii # very last word + lea ($inp,$len),$inp + vmovdqu 0x00-0x40($Htbl),$Hkey # $Hkey^1 + vmovdqu 0x20-0x40($Htbl),$HK + vpshufb $bswap,$Ii,$Ij + + vmovdqa $Xlo,$Zlo # subtle way to zero $Zlo, + vmovdqa $Xhi,$Zhi # $Zhi and + vmovdqa $Xmi,$Zmi # $Zmi + sub \$0x10,$len + jz .Ltail_avx + + vpunpckhqdq $Ij,$Ij,$T1 + vpxor $Xlo,$Zlo,$Zlo + vpclmulqdq \$0x00,$Hkey,$Ij,$Xlo + vpxor $Ij,$T1,$T1 + vmovdqu -0x20($inp),$Ii + vpxor $Xhi,$Zhi,$Zhi + vpclmulqdq \$0x11,$Hkey,$Ij,$Xhi + vmovdqu 0x10-0x40($Htbl),$Hkey # $Hkey^2 + vpshufb $bswap,$Ii,$Ij + vpxor $Xmi,$Zmi,$Zmi + vpclmulqdq \$0x00,$HK,$T1,$Xmi + vpsrldq \$8,$HK,$HK + sub \$0x10,$len + jz .Ltail_avx + + vpunpckhqdq $Ij,$Ij,$T1 + vpxor $Xlo,$Zlo,$Zlo + vpclmulqdq \$0x00,$Hkey,$Ij,$Xlo + vpxor $Ij,$T1,$T1 + vmovdqu -0x30($inp),$Ii + vpxor $Xhi,$Zhi,$Zhi + vpclmulqdq \$0x11,$Hkey,$Ij,$Xhi + vmovdqu 0x30-0x40($Htbl),$Hkey # $Hkey^3 + vpshufb $bswap,$Ii,$Ij + vpxor $Xmi,$Zmi,$Zmi + vpclmulqdq \$0x00,$HK,$T1,$Xmi + vmovdqu 0x50-0x40($Htbl),$HK + sub \$0x10,$len + jz .Ltail_avx + + vpunpckhqdq $Ij,$Ij,$T1 + vpxor $Xlo,$Zlo,$Zlo + vpclmulqdq \$0x00,$Hkey,$Ij,$Xlo + vpxor $Ij,$T1,$T1 + vmovdqu -0x40($inp),$Ii + vpxor $Xhi,$Zhi,$Zhi + vpclmulqdq \$0x11,$Hkey,$Ij,$Xhi + vmovdqu 0x40-0x40($Htbl),$Hkey # $Hkey^4 + vpshufb $bswap,$Ii,$Ij + vpxor $Xmi,$Zmi,$Zmi + vpclmulqdq \$0x00,$HK,$T1,$Xmi + vpsrldq \$8,$HK,$HK + sub \$0x10,$len + jz .Ltail_avx + + vpunpckhqdq $Ij,$Ij,$T1 + vpxor $Xlo,$Zlo,$Zlo + vpclmulqdq \$0x00,$Hkey,$Ij,$Xlo + vpxor $Ij,$T1,$T1 + vmovdqu -0x50($inp),$Ii + vpxor $Xhi,$Zhi,$Zhi + vpclmulqdq \$0x11,$Hkey,$Ij,$Xhi + vmovdqu 0x60-0x40($Htbl),$Hkey # $Hkey^5 + vpshufb $bswap,$Ii,$Ij + vpxor $Xmi,$Zmi,$Zmi + vpclmulqdq \$0x00,$HK,$T1,$Xmi + vmovdqu 0x80-0x40($Htbl),$HK + sub \$0x10,$len + jz .Ltail_avx + + vpunpckhqdq $Ij,$Ij,$T1 + vpxor $Xlo,$Zlo,$Zlo + vpclmulqdq \$0x00,$Hkey,$Ij,$Xlo + vpxor $Ij,$T1,$T1 + vmovdqu -0x60($inp),$Ii + vpxor $Xhi,$Zhi,$Zhi + vpclmulqdq \$0x11,$Hkey,$Ij,$Xhi + vmovdqu 0x70-0x40($Htbl),$Hkey # $Hkey^6 + vpshufb $bswap,$Ii,$Ij + vpxor $Xmi,$Zmi,$Zmi + vpclmulqdq \$0x00,$HK,$T1,$Xmi + vpsrldq \$8,$HK,$HK + sub \$0x10,$len + jz .Ltail_avx + + vpunpckhqdq $Ij,$Ij,$T1 + vpxor $Xlo,$Zlo,$Zlo + vpclmulqdq \$0x00,$Hkey,$Ij,$Xlo + vpxor $Ij,$T1,$T1 + vmovdqu -0x70($inp),$Ii + vpxor $Xhi,$Zhi,$Zhi + vpclmulqdq \$0x11,$Hkey,$Ij,$Xhi + vmovdqu 0x90-0x40($Htbl),$Hkey # $Hkey^7 + vpshufb $bswap,$Ii,$Ij + vpxor $Xmi,$Zmi,$Zmi + vpclmulqdq \$0x00,$HK,$T1,$Xmi + vmovq 0xb8-0x40($Htbl),$HK + sub \$0x10,$len + jmp .Ltail_avx + +.align 32 +.Ltail_avx: + vpxor $Xi,$Ij,$Ij # accumulate $Xi +.Ltail_no_xor_avx: + vpunpckhqdq $Ij,$Ij,$T1 + vpxor $Xlo,$Zlo,$Zlo + vpclmulqdq \$0x00,$Hkey,$Ij,$Xlo + vpxor $Ij,$T1,$T1 + vpxor $Xhi,$Zhi,$Zhi + vpclmulqdq \$0x11,$Hkey,$Ij,$Xhi + vpxor $Xmi,$Zmi,$Zmi + vpclmulqdq \$0x00,$HK,$T1,$Xmi + + vmovdqu (%r10),$Tred + + vpxor $Xlo,$Zlo,$Xi + vpxor $Xhi,$Zhi,$Xo + vpxor $Xmi,$Zmi,$Zmi + + vpxor $Xi, $Zmi,$Zmi # aggregated Karatsuba post-processing + vpxor $Xo, $Zmi,$Zmi + vpslldq \$8, $Zmi,$T2 + vpsrldq \$8, $Zmi,$Zmi + vpxor $T2, $Xi, $Xi + vpxor $Zmi,$Xo, $Xo + + vpclmulqdq \$0x10,$Tred,$Xi,$T2 # 1st phase + vpalignr \$8,$Xi,$Xi,$Xi + vpxor $T2,$Xi,$Xi + + vpclmulqdq \$0x10,$Tred,$Xi,$T2 # 2nd phase + vpalignr \$8,$Xi,$Xi,$Xi + vpxor $Xo,$Xi,$Xi + vpxor $T2,$Xi,$Xi + + cmp \$0,$len + jne .Lshort_avx + + vpshufb $bswap,$Xi,$Xi + vmovdqu $Xi,($Xip) + vzeroupper +___ +$code.=<<___ if ($win64); + movaps (%rsp),%xmm6 + movaps 0x10(%rsp),%xmm7 + movaps 0x20(%rsp),%xmm8 + movaps 0x30(%rsp),%xmm9 + movaps 0x40(%rsp),%xmm10 + movaps 0x50(%rsp),%xmm11 + movaps 0x60(%rsp),%xmm12 + movaps 0x70(%rsp),%xmm13 + movaps 0x80(%rsp),%xmm14 + movaps 0x90(%rsp),%xmm15 + lea 0xa8(%rsp),%rsp +.LSEH_end_gcm_ghash_avx: +___ +$code.=<<___; + ret +.size gcm_ghash_avx,.-gcm_ghash_avx +___ +} else { +$code.=<<___; + jmp .L_ghash_clmul +.size gcm_ghash_avx,.-gcm_ghash_avx +___ +} + $code.=<<___; .align 64 .Lbswap_mask: .byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0 .L0x1c2_polynomial: .byte 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xc2 +.L7_mask: + .long 7,0,7,0 +.L7_mask_poly: + .long 7,0,`0xE1<<1`,0 .align 64 .type .Lrem_4bit,\@object .Lrem_4bit: @@ -774,10 +1698,24 @@ sub reduction_alg9 { # 17/13 times faster than Intel version .rva .LSEH_end_gcm_ghash_4bit .rva .LSEH_info_gcm_ghash_4bit + .rva .LSEH_begin_gcm_init_clmul + .rva .LSEH_end_gcm_init_clmul + .rva .LSEH_info_gcm_init_clmul + .rva .LSEH_begin_gcm_ghash_clmul .rva .LSEH_end_gcm_ghash_clmul .rva .LSEH_info_gcm_ghash_clmul +___ +$code.=<<___ if ($avx); + .rva .LSEH_begin_gcm_init_avx + .rva .LSEH_end_gcm_init_avx + .rva .LSEH_info_gcm_init_clmul + .rva .LSEH_begin_gcm_ghash_avx + .rva .LSEH_end_gcm_ghash_avx + .rva .LSEH_info_gcm_ghash_clmul +___ +$code.=<<___; .section .xdata .align 8 .LSEH_info_gcm_gmult_4bit: @@ -788,14 +1726,23 @@ sub reduction_alg9 { # 17/13 times faster than Intel version .byte 9,0,0,0 .rva se_handler .rva .Lghash_prologue,.Lghash_epilogue # HandlerData +.LSEH_info_gcm_init_clmul: + .byte 0x01,0x08,0x03,0x00 + .byte 0x08,0x68,0x00,0x00 #movaps 0x00(rsp),xmm6 + .byte 0x04,0x22,0x00,0x00 #sub rsp,0x18 .LSEH_info_gcm_ghash_clmul: - .byte 0x01,0x1f,0x0b,0x00 - .byte 0x1f,0xa8,0x04,0x00 #movaps 0x40(rsp),xmm10 - .byte 0x19,0x98,0x03,0x00 #movaps 0x30(rsp),xmm9 - .byte 0x13,0x88,0x02,0x00 #movaps 0x20(rsp),xmm8 - .byte 0x0d,0x78,0x01,0x00 #movaps 0x10(rsp),xmm7 - .byte 0x08,0x68,0x00,0x00 #movaps (rsp),xmm6 - .byte 0x04,0xa2,0x00,0x00 #sub rsp,0x58 + .byte 0x01,0x33,0x16,0x00 + .byte 0x33,0xf8,0x09,0x00 #movaps 0x90(rsp),xmm15 + .byte 0x2e,0xe8,0x08,0x00 #movaps 0x80(rsp),xmm14 + .byte 0x29,0xd8,0x07,0x00 #movaps 0x70(rsp),xmm13 + .byte 0x24,0xc8,0x06,0x00 #movaps 0x60(rsp),xmm12 + .byte 0x1f,0xb8,0x05,0x00 #movaps 0x50(rsp),xmm11 + .byte 0x1a,0xa8,0x04,0x00 #movaps 0x40(rsp),xmm10 + .byte 0x15,0x98,0x03,0x00 #movaps 0x30(rsp),xmm9 + .byte 0x10,0x88,0x02,0x00 #movaps 0x20(rsp),xmm8 + .byte 0x0c,0x78,0x01,0x00 #movaps 0x10(rsp),xmm7 + .byte 0x08,0x68,0x00,0x00 #movaps 0x00(rsp),xmm6 + .byte 0x04,0x01,0x15,0x00 #sub rsp,0xa8 ___ } diff --git a/deps/openssl/openssl/crypto/modes/asm/ghashp8-ppc.pl b/deps/openssl/openssl/crypto/modes/asm/ghashp8-ppc.pl new file mode 100755 index 00000000000000..e76a58c343c1c7 --- /dev/null +++ b/deps/openssl/openssl/crypto/modes/asm/ghashp8-ppc.pl @@ -0,0 +1,234 @@ +#!/usr/bin/env perl +# +# ==================================================================== +# Written by Andy Polyakov for the OpenSSL +# project. The module is, however, dual licensed under OpenSSL and +# CRYPTOGAMS licenses depending on where you obtain it. For further +# details see http://www.openssl.org/~appro/cryptogams/. +# ==================================================================== +# +# GHASH for for PowerISA v2.07. +# +# July 2014 +# +# Accurate performance measurements are problematic, because it's +# always virtualized setup with possibly throttled processor. +# Relative comparison is therefore more informative. This initial +# version is ~2.1x slower than hardware-assisted AES-128-CTR, ~12x +# faster than "4-bit" integer-only compiler-generated 64-bit code. +# "Initial version" means that there is room for futher improvement. + +$flavour=shift; +$output =shift; + +if ($flavour =~ /64/) { + $SIZE_T=8; + $LRSAVE=2*$SIZE_T; + $STU="stdu"; + $POP="ld"; + $PUSH="std"; +} elsif ($flavour =~ /32/) { + $SIZE_T=4; + $LRSAVE=$SIZE_T; + $STU="stwu"; + $POP="lwz"; + $PUSH="stw"; +} else { die "nonsense $flavour"; } + +$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; +( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or +( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or +die "can't locate ppc-xlate.pl"; + +open STDOUT,"| $^X $xlate $flavour $output" || die "can't call $xlate: $!"; + +my ($Xip,$Htbl,$inp,$len)=map("r$_",(3..6)); # argument block + +my ($Xl,$Xm,$Xh,$IN)=map("v$_",(0..3)); +my ($zero,$t0,$t1,$t2,$xC2,$H,$Hh,$Hl,$lemask)=map("v$_",(4..12)); +my $vrsave="r12"; + +$code=<<___; +.machine "any" + +.text + +.globl .gcm_init_p8 +.align 5 +.gcm_init_p8: + lis r0,0xfff0 + li r8,0x10 + mfspr $vrsave,256 + li r9,0x20 + mtspr 256,r0 + li r10,0x30 + lvx_u $H,0,r4 # load H + + vspltisb $xC2,-16 # 0xf0 + vspltisb $t0,1 # one + vaddubm $xC2,$xC2,$xC2 # 0xe0 + vxor $zero,$zero,$zero + vor $xC2,$xC2,$t0 # 0xe1 + vsldoi $xC2,$xC2,$zero,15 # 0xe1... + vsldoi $t1,$zero,$t0,1 # ...1 + vaddubm $xC2,$xC2,$xC2 # 0xc2... + vspltisb $t2,7 + vor $xC2,$xC2,$t1 # 0xc2....01 + vspltb $t1,$H,0 # most significant byte + vsl $H,$H,$t0 # H<<=1 + vsrab $t1,$t1,$t2 # broadcast carry bit + vand $t1,$t1,$xC2 + vxor $H,$H,$t1 # twisted H + + vsldoi $H,$H,$H,8 # twist even more ... + vsldoi $xC2,$zero,$xC2,8 # 0xc2.0 + vsldoi $Hl,$zero,$H,8 # ... and split + vsldoi $Hh,$H,$zero,8 + + stvx_u $xC2,0,r3 # save pre-computed table + stvx_u $Hl,r8,r3 + stvx_u $H, r9,r3 + stvx_u $Hh,r10,r3 + + mtspr 256,$vrsave + blr + .long 0 + .byte 0,12,0x14,0,0,0,2,0 + .long 0 +.size .gcm_init_p8,.-.gcm_init_p8 + +.globl .gcm_gmult_p8 +.align 5 +.gcm_gmult_p8: + lis r0,0xfff8 + li r8,0x10 + mfspr $vrsave,256 + li r9,0x20 + mtspr 256,r0 + li r10,0x30 + lvx_u $IN,0,$Xip # load Xi + + lvx_u $Hl,r8,$Htbl # load pre-computed table + le?lvsl $lemask,r0,r0 + lvx_u $H, r9,$Htbl + le?vspltisb $t0,0x07 + lvx_u $Hh,r10,$Htbl + le?vxor $lemask,$lemask,$t0 + lvx_u $xC2,0,$Htbl + le?vperm $IN,$IN,$IN,$lemask + vxor $zero,$zero,$zero + + vpmsumd $Xl,$IN,$Hl # H.lo·Xi.lo + vpmsumd $Xm,$IN,$H # H.hi·Xi.lo+H.lo·Xi.hi + vpmsumd $Xh,$IN,$Hh # H.hi·Xi.hi + + vpmsumd $t2,$Xl,$xC2 # 1st phase + + vsldoi $t0,$Xm,$zero,8 + vsldoi $t1,$zero,$Xm,8 + vxor $Xl,$Xl,$t0 + vxor $Xh,$Xh,$t1 + + vsldoi $Xl,$Xl,$Xl,8 + vxor $Xl,$Xl,$t2 + + vsldoi $t1,$Xl,$Xl,8 # 2nd phase + vpmsumd $Xl,$Xl,$xC2 + vxor $t1,$t1,$Xh + vxor $Xl,$Xl,$t1 + + le?vperm $Xl,$Xl,$Xl,$lemask + stvx_u $Xl,0,$Xip # write out Xi + + mtspr 256,$vrsave + blr + .long 0 + .byte 0,12,0x14,0,0,0,2,0 + .long 0 +.size .gcm_gmult_p8,.-.gcm_gmult_p8 + +.globl .gcm_ghash_p8 +.align 5 +.gcm_ghash_p8: + lis r0,0xfff8 + li r8,0x10 + mfspr $vrsave,256 + li r9,0x20 + mtspr 256,r0 + li r10,0x30 + lvx_u $Xl,0,$Xip # load Xi + + lvx_u $Hl,r8,$Htbl # load pre-computed table + le?lvsl $lemask,r0,r0 + lvx_u $H, r9,$Htbl + le?vspltisb $t0,0x07 + lvx_u $Hh,r10,$Htbl + le?vxor $lemask,$lemask,$t0 + lvx_u $xC2,0,$Htbl + le?vperm $Xl,$Xl,$Xl,$lemask + vxor $zero,$zero,$zero + + lvx_u $IN,0,$inp + addi $inp,$inp,16 + subi $len,$len,16 + le?vperm $IN,$IN,$IN,$lemask + vxor $IN,$IN,$Xl + b Loop + +.align 5 +Loop: + subic $len,$len,16 + vpmsumd $Xl,$IN,$Hl # H.lo·Xi.lo + subfe. r0,r0,r0 # borrow?-1:0 + vpmsumd $Xm,$IN,$H # H.hi·Xi.lo+H.lo·Xi.hi + and r0,r0,$len + vpmsumd $Xh,$IN,$Hh # H.hi·Xi.hi + add $inp,$inp,r0 + + vpmsumd $t2,$Xl,$xC2 # 1st phase + + vsldoi $t0,$Xm,$zero,8 + vsldoi $t1,$zero,$Xm,8 + vxor $Xl,$Xl,$t0 + vxor $Xh,$Xh,$t1 + + vsldoi $Xl,$Xl,$Xl,8 + vxor $Xl,$Xl,$t2 + lvx_u $IN,0,$inp + addi $inp,$inp,16 + + vsldoi $t1,$Xl,$Xl,8 # 2nd phase + vpmsumd $Xl,$Xl,$xC2 + le?vperm $IN,$IN,$IN,$lemask + vxor $t1,$t1,$Xh + vxor $IN,$IN,$t1 + vxor $IN,$IN,$Xl + beq Loop # did $len-=16 borrow? + + vxor $Xl,$Xl,$t1 + le?vperm $Xl,$Xl,$Xl,$lemask + stvx_u $Xl,0,$Xip # write out Xi + + mtspr 256,$vrsave + blr + .long 0 + .byte 0,12,0x14,0,0,0,4,0 + .long 0 +.size .gcm_ghash_p8,.-.gcm_ghash_p8 + +.asciz "GHASH for PowerISA 2.07, CRYPTOGAMS by " +.align 2 +___ + +foreach (split("\n",$code)) { + if ($flavour =~ /le$/o) { # little-endian + s/le\?//o or + s/be\?/#be#/o; + } else { + s/le\?/#le#/o or + s/be\?//o; + } + print $_,"\n"; +} + +close STDOUT; # enforce flush diff --git a/deps/openssl/openssl/crypto/modes/asm/ghashv8-armx.pl b/deps/openssl/openssl/crypto/modes/asm/ghashv8-armx.pl new file mode 100644 index 00000000000000..54a1ac4db8d3b2 --- /dev/null +++ b/deps/openssl/openssl/crypto/modes/asm/ghashv8-armx.pl @@ -0,0 +1,241 @@ +#!/usr/bin/env perl +# +# ==================================================================== +# Written by Andy Polyakov for the OpenSSL +# project. The module is, however, dual licensed under OpenSSL and +# CRYPTOGAMS licenses depending on where you obtain it. For further +# details see http://www.openssl.org/~appro/cryptogams/. +# ==================================================================== +# +# GHASH for ARMv8 Crypto Extension, 64-bit polynomial multiplication. +# +# June 2014 +# +# Initial version was developed in tight cooperation with Ard +# Biesheuvel from bits-n-pieces from +# other assembly modules. Just like aesv8-armx.pl this module +# supports both AArch32 and AArch64 execution modes. +# +# Current performance in cycles per processed byte: +# +# PMULL[2] 32-bit NEON(*) +# Apple A7 1.76 5.62 +# Cortex-A53 1.45 8.39 +# Cortex-A57 2.22 7.61 +# +# (*) presented for reference/comparison purposes; + +$flavour = shift; +open STDOUT,">".shift; + +$Xi="x0"; # argument block +$Htbl="x1"; +$inp="x2"; +$len="x3"; + +$inc="x12"; + +{ +my ($Xl,$Xm,$Xh,$IN)=map("q$_",(0..3)); +my ($t0,$t1,$t2,$t3,$H,$Hhl)=map("q$_",(8..14)); + +$code=<<___; +#include "arm_arch.h" + +.text +___ +$code.=".arch armv8-a+crypto\n" if ($flavour =~ /64/); +$code.=".fpu neon\n.code 32\n" if ($flavour !~ /64/); + +$code.=<<___; +.global gcm_init_v8 +.type gcm_init_v8,%function +.align 4 +gcm_init_v8: + vld1.64 {$t1},[x1] @ load H + vmov.i8 $t0,#0xe1 + vext.8 $IN,$t1,$t1,#8 + vshl.i64 $t0,$t0,#57 + vshr.u64 $t2,$t0,#63 + vext.8 $t0,$t2,$t0,#8 @ t0=0xc2....01 + vdup.32 $t1,${t1}[1] + vshr.u64 $t3,$IN,#63 + vshr.s32 $t1,$t1,#31 @ broadcast carry bit + vand $t3,$t3,$t0 + vshl.i64 $IN,$IN,#1 + vext.8 $t3,$t3,$t3,#8 + vand $t0,$t0,$t1 + vorr $IN,$IN,$t3 @ H<<<=1 + veor $IN,$IN,$t0 @ twisted H + vst1.64 {$IN},[x0] + + ret +.size gcm_init_v8,.-gcm_init_v8 + +.global gcm_gmult_v8 +.type gcm_gmult_v8,%function +.align 4 +gcm_gmult_v8: + vld1.64 {$t1},[$Xi] @ load Xi + vmov.i8 $t3,#0xe1 + vld1.64 {$H},[$Htbl] @ load twisted H + vshl.u64 $t3,$t3,#57 +#ifndef __ARMEB__ + vrev64.8 $t1,$t1 +#endif + vext.8 $Hhl,$H,$H,#8 + mov $len,#0 + vext.8 $IN,$t1,$t1,#8 + mov $inc,#0 + veor $Hhl,$Hhl,$H @ Karatsuba pre-processing + mov $inp,$Xi + b .Lgmult_v8 +.size gcm_gmult_v8,.-gcm_gmult_v8 + +.global gcm_ghash_v8 +.type gcm_ghash_v8,%function +.align 4 +gcm_ghash_v8: + vld1.64 {$Xl},[$Xi] @ load [rotated] Xi + subs $len,$len,#16 + vmov.i8 $t3,#0xe1 + mov $inc,#16 + vld1.64 {$H},[$Htbl] @ load twisted H + cclr $inc,eq + vext.8 $Xl,$Xl,$Xl,#8 + vshl.u64 $t3,$t3,#57 + vld1.64 {$t1},[$inp],$inc @ load [rotated] inp + vext.8 $Hhl,$H,$H,#8 +#ifndef __ARMEB__ + vrev64.8 $Xl,$Xl + vrev64.8 $t1,$t1 +#endif + veor $Hhl,$Hhl,$H @ Karatsuba pre-processing + vext.8 $IN,$t1,$t1,#8 + b .Loop_v8 + +.align 4 +.Loop_v8: + vext.8 $t2,$Xl,$Xl,#8 + veor $IN,$IN,$Xl @ inp^=Xi + veor $t1,$t1,$t2 @ $t1 is rotated inp^Xi + +.Lgmult_v8: + vpmull.p64 $Xl,$H,$IN @ H.lo·Xi.lo + veor $t1,$t1,$IN @ Karatsuba pre-processing + vpmull2.p64 $Xh,$H,$IN @ H.hi·Xi.hi + subs $len,$len,#16 + vpmull.p64 $Xm,$Hhl,$t1 @ (H.lo+H.hi)·(Xi.lo+Xi.hi) + cclr $inc,eq + + vext.8 $t1,$Xl,$Xh,#8 @ Karatsuba post-processing + veor $t2,$Xl,$Xh + veor $Xm,$Xm,$t1 + vld1.64 {$t1},[$inp],$inc @ load [rotated] inp + veor $Xm,$Xm,$t2 + vpmull.p64 $t2,$Xl,$t3 @ 1st phase + + vmov $Xh#lo,$Xm#hi @ Xh|Xm - 256-bit result + vmov $Xm#hi,$Xl#lo @ Xm is rotated Xl +#ifndef __ARMEB__ + vrev64.8 $t1,$t1 +#endif + veor $Xl,$Xm,$t2 + vext.8 $IN,$t1,$t1,#8 + + vext.8 $t2,$Xl,$Xl,#8 @ 2nd phase + vpmull.p64 $Xl,$Xl,$t3 + veor $t2,$t2,$Xh + veor $Xl,$Xl,$t2 + b.hs .Loop_v8 + +#ifndef __ARMEB__ + vrev64.8 $Xl,$Xl +#endif + vext.8 $Xl,$Xl,$Xl,#8 + vst1.64 {$Xl},[$Xi] @ write out Xi + + ret +.size gcm_ghash_v8,.-gcm_ghash_v8 +___ +} +$code.=<<___; +.asciz "GHASH for ARMv8, CRYPTOGAMS by " +.align 2 +___ + +if ($flavour =~ /64/) { ######## 64-bit code + sub unvmov { + my $arg=shift; + + $arg =~ m/q([0-9]+)#(lo|hi),\s*q([0-9]+)#(lo|hi)/o && + sprintf "ins v%d.d[%d],v%d.d[%d]",$1,($2 eq "lo")?0:1,$3,($4 eq "lo")?0:1; + } + foreach(split("\n",$code)) { + s/cclr\s+([wx])([^,]+),\s*([a-z]+)/csel $1$2,$1zr,$1$2,$3/o or + s/vmov\.i8/movi/o or # fix up legacy mnemonics + s/vmov\s+(.*)/unvmov($1)/geo or + s/vext\.8/ext/o or + s/vshr\.s/sshr\.s/o or + s/vshr/ushr/o or + s/^(\s+)v/$1/o or # strip off v prefix + s/\bbx\s+lr\b/ret/o; + + s/\bq([0-9]+)\b/"v".($1<8?$1:$1+8).".16b"/geo; # old->new registers + s/@\s/\/\//o; # old->new style commentary + + # fix up remainig legacy suffixes + s/\.[ui]?8(\s)/$1/o; + s/\.[uis]?32//o and s/\.16b/\.4s/go; + m/\.p64/o and s/\.16b/\.1q/o; # 1st pmull argument + m/l\.p64/o and s/\.16b/\.1d/go; # 2nd and 3rd pmull arguments + s/\.[uisp]?64//o and s/\.16b/\.2d/go; + s/\.[42]([sd])\[([0-3])\]/\.$1\[$2\]/o; + + print $_,"\n"; + } +} else { ######## 32-bit code + sub unvdup32 { + my $arg=shift; + + $arg =~ m/q([0-9]+),\s*q([0-9]+)\[([0-3])\]/o && + sprintf "vdup.32 q%d,d%d[%d]",$1,2*$2+($3>>1),$3&1; + } + sub unvpmullp64 { + my ($mnemonic,$arg)=@_; + + if ($arg =~ m/q([0-9]+),\s*q([0-9]+),\s*q([0-9]+)/o) { + my $word = 0xf2a00e00|(($1&7)<<13)|(($1&8)<<19) + |(($2&7)<<17)|(($2&8)<<4) + |(($3&7)<<1) |(($3&8)<<2); + $word |= 0x00010001 if ($mnemonic =~ "2"); + # since ARMv7 instructions are always encoded little-endian. + # correct solution is to use .inst directive, but older + # assemblers don't implement it:-( + sprintf ".byte\t0x%02x,0x%02x,0x%02x,0x%02x\t@ %s %s", + $word&0xff,($word>>8)&0xff, + ($word>>16)&0xff,($word>>24)&0xff, + $mnemonic,$arg; + } + } + + foreach(split("\n",$code)) { + s/\b[wx]([0-9]+)\b/r$1/go; # new->old registers + s/\bv([0-9])\.[12468]+[bsd]\b/q$1/go; # new->old registers + s/\/\/\s?/@ /o; # new->old style commentary + + # fix up remainig new-style suffixes + s/\],#[0-9]+/]!/o; + + s/cclr\s+([^,]+),\s*([a-z]+)/mov$2 $1,#0/o or + s/vdup\.32\s+(.*)/unvdup32($1)/geo or + s/v?(pmull2?)\.p64\s+(.*)/unvpmullp64($1,$2)/geo or + s/\bq([0-9]+)#(lo|hi)/sprintf "d%d",2*$1+($2 eq "hi")/geo or + s/^(\s+)b\./$1b/o or + s/^(\s+)ret/$1bx\tlr/o; + + print $_,"\n"; + } +} + +close STDOUT; # enforce flush diff --git a/deps/openssl/openssl/crypto/modes/cbc128.c b/deps/openssl/openssl/crypto/modes/cbc128.c index 1ed79672749af9..c13caea5355beb 100644 --- a/deps/openssl/openssl/crypto/modes/cbc128.c +++ b/deps/openssl/openssl/crypto/modes/cbc128.c @@ -59,7 +59,7 @@ #endif #include -#ifndef STRICT_ALIGNMENT +#if !defined(STRICT_ALIGNMENT) && !defined(PEDANTIC) # define STRICT_ALIGNMENT 0 #endif diff --git a/deps/openssl/openssl/crypto/modes/gcm128.c b/deps/openssl/openssl/crypto/modes/gcm128.c index f69f2c9db6beaf..24a84a7ae7ec14 100644 --- a/deps/openssl/openssl/crypto/modes/gcm128.c +++ b/deps/openssl/openssl/crypto/modes/gcm128.c @@ -687,7 +687,7 @@ static void gcm_gmult_1bit(u64 Xi[2], const u64 H[2]) #endif -#if TABLE_BITS==4 && defined(GHASH_ASM) +#if TABLE_BITS==4 && (defined(GHASH_ASM) || defined(OPENSSL_CPUID_OBJ)) # if !defined(I386_ONLY) && \ (defined(__i386) || defined(__i386__) || \ defined(__x86_64) || defined(__x86_64__) || \ @@ -701,6 +701,17 @@ void gcm_gmult_clmul(u64 Xi[2], const u128 Htable[16]); void gcm_ghash_clmul(u64 Xi[2], const u128 Htable[16], const u8 *inp, size_t len); +# if defined(__i386) || defined(__i386__) || defined(_M_IX86) +# define gcm_init_avx gcm_init_clmul +# define gcm_gmult_avx gcm_gmult_clmul +# define gcm_ghash_avx gcm_ghash_clmul +# else +void gcm_init_avx(u128 Htable[16], const u64 Xi[2]); +void gcm_gmult_avx(u64 Xi[2], const u128 Htable[16]); +void gcm_ghash_avx(u64 Xi[2], const u128 Htable[16], const u8 *inp, + size_t len); +# endif + # if defined(__i386) || defined(__i386__) || defined(_M_IX86) # define GHASH_ASM_X86 void gcm_gmult_4bit_mmx(u64 Xi[2], const u128 Htable[16]); @@ -711,15 +722,41 @@ void gcm_gmult_4bit_x86(u64 Xi[2], const u128 Htable[16]); void gcm_ghash_4bit_x86(u64 Xi[2], const u128 Htable[16], const u8 *inp, size_t len); # endif -# elif defined(__arm__) || defined(__arm) +# elif defined(__arm__) || defined(__arm) || defined(__aarch64__) # include "arm_arch.h" -# if __ARM_ARCH__>=7 +# if __ARM_MAX_ARCH__>=7 # define GHASH_ASM_ARM # define GCM_FUNCREF_4BIT +# define PMULL_CAPABLE (OPENSSL_armcap_P & ARMV8_PMULL) +# if defined(__arm__) || defined(__arm) +# define NEON_CAPABLE (OPENSSL_armcap_P & ARMV7_NEON) +# endif +void gcm_init_neon(u128 Htable[16], const u64 Xi[2]); void gcm_gmult_neon(u64 Xi[2], const u128 Htable[16]); void gcm_ghash_neon(u64 Xi[2], const u128 Htable[16], const u8 *inp, size_t len); +void gcm_init_v8(u128 Htable[16], const u64 Xi[2]); +void gcm_gmult_v8(u64 Xi[2], const u128 Htable[16]); +void gcm_ghash_v8(u64 Xi[2], const u128 Htable[16], const u8 *inp, + size_t len); # endif +# elif defined(__sparc__) || defined(__sparc) +# include "sparc_arch.h" +# define GHASH_ASM_SPARC +# define GCM_FUNCREF_4BIT +extern unsigned int OPENSSL_sparcv9cap_P[]; +void gcm_init_vis3(u128 Htable[16], const u64 Xi[2]); +void gcm_gmult_vis3(u64 Xi[2], const u128 Htable[16]); +void gcm_ghash_vis3(u64 Xi[2], const u128 Htable[16], const u8 *inp, + size_t len); +# elif defined(OPENSSL_CPUID_OBJ) && (defined(__powerpc__) || defined(__ppc__) || defined(_ARCH_PPC)) +# include "ppc_arch.h" +# define GHASH_ASM_PPC +# define GCM_FUNCREF_4BIT +void gcm_init_p8(u128 Htable[16], const u64 Xi[2]); +void gcm_gmult_p8(u64 Xi[2], const u128 Htable[16]); +void gcm_ghash_p8(u64 Xi[2], const u128 Htable[16], const u8 *inp, + size_t len); # endif #endif @@ -768,9 +805,15 @@ void CRYPTO_gcm128_init(GCM128_CONTEXT *ctx, void *key, block128_f block) # if !defined(GHASH_ASM_X86) || defined(OPENSSL_IA32_SSE2) if (OPENSSL_ia32cap_P[0] & (1 << 24) && /* check FXSR bit */ OPENSSL_ia32cap_P[1] & (1 << 1)) { /* check PCLMULQDQ bit */ - gcm_init_clmul(ctx->Htable, ctx->H.u); - ctx->gmult = gcm_gmult_clmul; - ctx->ghash = gcm_ghash_clmul; + if (((OPENSSL_ia32cap_P[1] >> 22) & 0x41) == 0x41) { /* AVX+MOVBE */ + gcm_init_avx(ctx->Htable, ctx->H.u); + ctx->gmult = gcm_gmult_avx; + ctx->ghash = gcm_ghash_avx; + } else { + gcm_init_clmul(ctx->Htable, ctx->H.u); + ctx->gmult = gcm_gmult_clmul; + ctx->ghash = gcm_ghash_clmul; + } return; } # endif @@ -792,13 +835,52 @@ void CRYPTO_gcm128_init(GCM128_CONTEXT *ctx, void *key, block128_f block) ctx->ghash = gcm_ghash_4bit; # endif # elif defined(GHASH_ASM_ARM) - if (OPENSSL_armcap_P & ARMV7_NEON) { +# ifdef PMULL_CAPABLE + if (PMULL_CAPABLE) { + gcm_init_v8(ctx->Htable, ctx->H.u); + ctx->gmult = gcm_gmult_v8; + ctx->ghash = gcm_ghash_v8; + } else +# endif +# ifdef NEON_CAPABLE + if (NEON_CAPABLE) { + gcm_init_neon(ctx->Htable, ctx->H.u); ctx->gmult = gcm_gmult_neon; ctx->ghash = gcm_ghash_neon; + } else +# endif + { + gcm_init_4bit(ctx->Htable, ctx->H.u); + ctx->gmult = gcm_gmult_4bit; +# if defined(GHASH) + ctx->ghash = gcm_ghash_4bit; +# else + ctx->ghash = NULL; +# endif + } +# elif defined(GHASH_ASM_SPARC) + if (OPENSSL_sparcv9cap_P[0] & SPARCV9_VIS3) { + gcm_init_vis3(ctx->Htable, ctx->H.u); + ctx->gmult = gcm_gmult_vis3; + ctx->ghash = gcm_ghash_vis3; + } else { + gcm_init_4bit(ctx->Htable, ctx->H.u); + ctx->gmult = gcm_gmult_4bit; + ctx->ghash = gcm_ghash_4bit; + } +# elif defined(GHASH_ASM_PPC) + if (OPENSSL_ppccap_P & PPC_CRYPTO207) { + gcm_init_p8(ctx->Htable, ctx->H.u); + ctx->gmult = gcm_gmult_p8; + ctx->ghash = gcm_ghash_p8; } else { gcm_init_4bit(ctx->Htable, ctx->H.u); ctx->gmult = gcm_gmult_4bit; +# if defined(GHASH) ctx->ghash = gcm_ghash_4bit; +# else + ctx->ghash = NULL; +# endif } # else gcm_init_4bit(ctx->Htable, ctx->H.u); diff --git a/deps/openssl/openssl/crypto/modes/modes.h b/deps/openssl/openssl/crypto/modes/modes.h index 880f020d58e2a3..fd488499a0b49f 100644 --- a/deps/openssl/openssl/crypto/modes/modes.h +++ b/deps/openssl/openssl/crypto/modes/modes.h @@ -148,6 +148,16 @@ int CRYPTO_xts128_encrypt(const XTS128_CONTEXT *ctx, const unsigned char *inp, unsigned char *out, size_t len, int enc); +size_t CRYPTO_128_wrap(void *key, const unsigned char *iv, + unsigned char *out, + const unsigned char *in, size_t inlen, + block128_f block); + +size_t CRYPTO_128_unwrap(void *key, const unsigned char *iv, + unsigned char *out, + const unsigned char *in, size_t inlen, + block128_f block); + #ifdef __cplusplus } #endif diff --git a/deps/openssl/openssl/crypto/modes/modes_lcl.h b/deps/openssl/openssl/crypto/modes/modes_lcl.h index 296849b8670c8a..900f54ca2b87ef 100644 --- a/deps/openssl/openssl/crypto/modes/modes_lcl.h +++ b/deps/openssl/openssl/crypto/modes/modes_lcl.h @@ -25,11 +25,14 @@ typedef unsigned int u32; typedef unsigned char u8; #define STRICT_ALIGNMENT 1 -#if defined(__i386) || defined(__i386__) || \ - defined(__x86_64) || defined(__x86_64__) || \ - defined(_M_IX86) || defined(_M_AMD64) || defined(_M_X64) || \ - defined(__s390__) || defined(__s390x__) -# undef STRICT_ALIGNMENT +#ifndef PEDANTIC +# if defined(__i386) || defined(__i386__) || \ + defined(__x86_64) || defined(__x86_64__) || \ + defined(_M_IX86) || defined(_M_AMD64) || defined(_M_X64) || \ + defined(__aarch64__) || \ + defined(__s390__) || defined(__s390x__) +# undef STRICT_ALIGNMENT +# endif #endif #if !defined(PEDANTIC) && !defined(OPENSSL_NO_ASM) && !defined(OPENSSL_NO_INLINE_ASM) @@ -49,8 +52,15 @@ typedef unsigned char u8; # define BSWAP4(x) ({ u32 ret=(x); \ asm ("bswapl %0" \ : "+r"(ret)); ret; }) +# elif defined(__aarch64__) +# define BSWAP8(x) ({ u64 ret; \ + asm ("rev %0,%1" \ + : "=r"(ret) : "r"(x)); ret; }) +# define BSWAP4(x) ({ u32 ret; \ + asm ("rev %w0,%w1" \ + : "=r"(ret) : "r"(x)); ret; }) # elif (defined(__arm__) || defined(__arm)) && !defined(STRICT_ALIGNMENT) -# define BSWAP8(x) ({ u32 lo=(u64)(x)>>32,hi=(x); \ +# define BSWAP8(x) ({ u32 lo=(u64)(x)>>32,hi=(x); \ asm ("rev %0,%0; rev %1,%1" \ : "+r"(hi),"+r"(lo)); \ (u64)hi<<32|lo; }) diff --git a/deps/openssl/openssl/crypto/modes/wrap128.c b/deps/openssl/openssl/crypto/modes/wrap128.c new file mode 100644 index 00000000000000..4dcaf0326fa8da --- /dev/null +++ b/deps/openssl/openssl/crypto/modes/wrap128.c @@ -0,0 +1,138 @@ +/* crypto/modes/wrap128.c */ +/* + * Written by Dr Stephen N Henson (steve@openssl.org) for the OpenSSL + * project. + */ +/* ==================================================================== + * Copyright (c) 2013 The OpenSSL Project. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgment: + * "This product includes software developed by the OpenSSL Project + * for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)" + * + * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to + * endorse or promote products derived from this software without + * prior written permission. For written permission, please contact + * licensing@OpenSSL.org. + * + * 5. Products derived from this software may not be called "OpenSSL" + * nor may "OpenSSL" appear in their names without prior written + * permission of the OpenSSL Project. + * + * 6. Redistributions of any form whatsoever must retain the following + * acknowledgment: + * "This product includes software developed by the OpenSSL Project + * for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)" + * + * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY + * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + * ==================================================================== + */ + +#include "cryptlib.h" +#include + +static const unsigned char default_iv[] = { + 0xA6, 0xA6, 0xA6, 0xA6, 0xA6, 0xA6, 0xA6, 0xA6, +}; + +/* + * Input size limit: lower than maximum of standards but far larger than + * anything that will be used in practice. + */ +#define CRYPTO128_WRAP_MAX (1UL << 31) + +size_t CRYPTO_128_wrap(void *key, const unsigned char *iv, + unsigned char *out, + const unsigned char *in, size_t inlen, + block128_f block) +{ + unsigned char *A, B[16], *R; + size_t i, j, t; + if ((inlen & 0x7) || (inlen < 8) || (inlen > CRYPTO128_WRAP_MAX)) + return 0; + A = B; + t = 1; + memcpy(out + 8, in, inlen); + if (!iv) + iv = default_iv; + + memcpy(A, iv, 8); + + for (j = 0; j < 6; j++) { + R = out + 8; + for (i = 0; i < inlen; i += 8, t++, R += 8) { + memcpy(B + 8, R, 8); + block(B, B, key); + A[7] ^= (unsigned char)(t & 0xff); + if (t > 0xff) { + A[6] ^= (unsigned char)((t >> 8) & 0xff); + A[5] ^= (unsigned char)((t >> 16) & 0xff); + A[4] ^= (unsigned char)((t >> 24) & 0xff); + } + memcpy(R, B + 8, 8); + } + } + memcpy(out, A, 8); + return inlen + 8; +} + +size_t CRYPTO_128_unwrap(void *key, const unsigned char *iv, + unsigned char *out, + const unsigned char *in, size_t inlen, + block128_f block) +{ + unsigned char *A, B[16], *R; + size_t i, j, t; + inlen -= 8; + if ((inlen & 0x7) || (inlen < 16) || (inlen > CRYPTO128_WRAP_MAX)) + return 0; + A = B; + t = 6 * (inlen >> 3); + memcpy(A, in, 8); + memcpy(out, in + 8, inlen); + for (j = 0; j < 6; j++) { + R = out + inlen - 8; + for (i = 0; i < inlen; i += 8, t--, R -= 8) { + A[7] ^= (unsigned char)(t & 0xff); + if (t > 0xff) { + A[6] ^= (unsigned char)((t >> 8) & 0xff); + A[5] ^= (unsigned char)((t >> 16) & 0xff); + A[4] ^= (unsigned char)((t >> 24) & 0xff); + } + memcpy(B + 8, R, 8); + block(B, B, key); + memcpy(R, B + 8, 8); + } + } + if (!iv) + iv = default_iv; + if (memcmp(A, iv, 8)) { + OPENSSL_cleanse(out, inlen); + return 0; + } + return inlen; +} diff --git a/deps/openssl/openssl/crypto/o_str.c b/deps/openssl/openssl/crypto/o_str.c index b23ef323b12416..4e2d096704f000 100644 --- a/deps/openssl/openssl/crypto/o_str.c +++ b/deps/openssl/openssl/crypto/o_str.c @@ -62,7 +62,7 @@ #include "o_str.h" #if !defined(OPENSSL_IMPLEMENTS_strncasecmp) && \ - !defined(OPENSSL_SYSNAME_WIN32) && \ + !defined(OPENSSL_SYSNAME_WIN32) && !defined(OPENSSL_SYSNAME_WINCE) && \ !defined(NETWARE_CLIB) # include #endif diff --git a/deps/openssl/openssl/crypto/o_time.c b/deps/openssl/openssl/crypto/o_time.c index e18b71d484ff63..58413fe97d09d3 100644 --- a/deps/openssl/openssl/crypto/o_time.c +++ b/deps/openssl/openssl/crypto/o_time.c @@ -246,8 +246,72 @@ struct tm *OPENSSL_gmtime(const time_t *timer, struct tm *result) static long date_to_julian(int y, int m, int d); static void julian_to_date(long jd, int *y, int *m, int *d); +static int julian_adj(const struct tm *tm, int off_day, long offset_sec, + long *pday, int *psec); int OPENSSL_gmtime_adj(struct tm *tm, int off_day, long offset_sec) +{ + int time_sec, time_year, time_month, time_day; + long time_jd; + + /* Convert time and offset into julian day and seconds */ + if (!julian_adj(tm, off_day, offset_sec, &time_jd, &time_sec)) + return 0; + + /* Convert Julian day back to date */ + + julian_to_date(time_jd, &time_year, &time_month, &time_day); + + if (time_year < 1900 || time_year > 9999) + return 0; + + /* Update tm structure */ + + tm->tm_year = time_year - 1900; + tm->tm_mon = time_month - 1; + tm->tm_mday = time_day; + + tm->tm_hour = time_sec / 3600; + tm->tm_min = (time_sec / 60) % 60; + tm->tm_sec = time_sec % 60; + + return 1; + +} + +int OPENSSL_gmtime_diff(int *pday, int *psec, + const struct tm *from, const struct tm *to) +{ + int from_sec, to_sec, diff_sec; + long from_jd, to_jd, diff_day; + if (!julian_adj(from, 0, 0, &from_jd, &from_sec)) + return 0; + if (!julian_adj(to, 0, 0, &to_jd, &to_sec)) + return 0; + diff_day = to_jd - from_jd; + diff_sec = to_sec - from_sec; + /* Adjust differences so both positive or both negative */ + if (diff_day > 0 && diff_sec < 0) { + diff_day--; + diff_sec += SECS_PER_DAY; + } + if (diff_day < 0 && diff_sec > 0) { + diff_day++; + diff_sec -= SECS_PER_DAY; + } + + if (pday) + *pday = (int)diff_day; + if (psec) + *psec = diff_sec; + + return 1; + +} + +/* Convert tm structure and offset into julian day and seconds */ +static int julian_adj(const struct tm *tm, int off_day, long offset_sec, + long *pday, int *psec) { int offset_hms, offset_day; long time_jd; @@ -284,25 +348,9 @@ int OPENSSL_gmtime_adj(struct tm *tm, int off_day, long offset_sec) if (time_jd < 0) return 0; - /* Convert Julian day back to date */ - - julian_to_date(time_jd, &time_year, &time_month, &time_day); - - if (time_year < 1900 || time_year > 9999) - return 0; - - /* Update tm structure */ - - tm->tm_year = time_year - 1900; - tm->tm_mon = time_month - 1; - tm->tm_mday = time_day; - - tm->tm_hour = offset_hms / 3600; - tm->tm_min = (offset_hms / 60) % 60; - tm->tm_sec = offset_hms % 60; - + *pday = time_jd; + *psec = offset_hms; return 1; - } /* @@ -354,27 +402,39 @@ int main(int argc, char **argv) int check_time(long offset) { - struct tm tm1, tm2; + struct tm tm1, tm2, o1; + int off_day, off_sec; + long toffset; time_t t1, t2; time(&t1); t2 = t1 + offset; OPENSSL_gmtime(&t2, &tm2); OPENSSL_gmtime(&t1, &tm1); + o1 = tm1; OPENSSL_gmtime_adj(&tm1, 0, offset); - if ((tm1.tm_year == tm2.tm_year) && - (tm1.tm_mon == tm2.tm_mon) && - (tm1.tm_mday == tm2.tm_mday) && - (tm1.tm_hour == tm2.tm_hour) && - (tm1.tm_min == tm2.tm_min) && (tm1.tm_sec == tm2.tm_sec)) - return 1; - fprintf(stderr, "TIME ERROR!!\n"); - fprintf(stderr, "Time1: %d/%d/%d, %d:%02d:%02d\n", - tm2.tm_mday, tm2.tm_mon + 1, tm2.tm_year + 1900, - tm2.tm_hour, tm2.tm_min, tm2.tm_sec); - fprintf(stderr, "Time2: %d/%d/%d, %d:%02d:%02d\n", - tm1.tm_mday, tm1.tm_mon + 1, tm1.tm_year + 1900, - tm1.tm_hour, tm1.tm_min, tm1.tm_sec); - return 0; + if ((tm1.tm_year != tm2.tm_year) || + (tm1.tm_mon != tm2.tm_mon) || + (tm1.tm_mday != tm2.tm_mday) || + (tm1.tm_hour != tm2.tm_hour) || + (tm1.tm_min != tm2.tm_min) || (tm1.tm_sec != tm2.tm_sec)) { + fprintf(stderr, "TIME ERROR!!\n"); + fprintf(stderr, "Time1: %d/%d/%d, %d:%02d:%02d\n", + tm2.tm_mday, tm2.tm_mon + 1, tm2.tm_year + 1900, + tm2.tm_hour, tm2.tm_min, tm2.tm_sec); + fprintf(stderr, "Time2: %d/%d/%d, %d:%02d:%02d\n", + tm1.tm_mday, tm1.tm_mon + 1, tm1.tm_year + 1900, + tm1.tm_hour, tm1.tm_min, tm1.tm_sec); + return 0; + } + OPENSSL_gmtime_diff(&o1, &tm1, &off_day, &off_sec); + toffset = (long)off_day *SECS_PER_DAY + off_sec; + if (offset != toffset) { + fprintf(stderr, "TIME OFFSET ERROR!!\n"); + fprintf(stderr, "Expected %ld, Got %ld (%d:%d)\n", + offset, toffset, off_day, off_sec); + return 0; + } + return 1; } #endif diff --git a/deps/openssl/openssl/crypto/o_time.h b/deps/openssl/openssl/crypto/o_time.h index 901b2005677e6a..a83a3d247d41ee 100644 --- a/deps/openssl/openssl/crypto/o_time.h +++ b/deps/openssl/openssl/crypto/o_time.h @@ -64,5 +64,7 @@ struct tm *OPENSSL_gmtime(const time_t *timer, struct tm *result); int OPENSSL_gmtime_adj(struct tm *tm, int offset_day, long offset_sec); +int OPENSSL_gmtime_diff(int *pday, int *psec, + const struct tm *from, const struct tm *to); #endif diff --git a/deps/openssl/openssl/crypto/objects/obj_dat.c b/deps/openssl/openssl/crypto/objects/obj_dat.c index e42a1c147de8e8..5cd755d77d63bd 100644 --- a/deps/openssl/openssl/crypto/objects/obj_dat.c +++ b/deps/openssl/openssl/crypto/objects/obj_dat.c @@ -142,7 +142,7 @@ static unsigned long added_obj_hash(const ADDED_OBJ *ca) return 0; } ret &= 0x3fffffffL; - ret |= ca->type << 30L; + ret |= ((unsigned long)ca->type) << 30L; return (ret); } diff --git a/deps/openssl/openssl/crypto/objects/obj_dat.h b/deps/openssl/openssl/crypto/objects/obj_dat.h index bc69665bc582be..b7e3cf280ee033 100644 --- a/deps/openssl/openssl/crypto/objects/obj_dat.h +++ b/deps/openssl/openssl/crypto/objects/obj_dat.h @@ -62,12 +62,12 @@ * [including the GNU Public Licence.] */ -#define NUM_NID 920 -#define NUM_SN 913 -#define NUM_LN 913 -#define NUM_OBJ 857 +#define NUM_NID 958 +#define NUM_SN 951 +#define NUM_LN 951 +#define NUM_OBJ 890 -static const unsigned char lvalues[5974]={ +static const unsigned char lvalues[6255]={ 0x2A,0x86,0x48,0x86,0xF7,0x0D, /* [ 0] OBJ_rsadsi */ 0x2A,0x86,0x48,0x86,0xF7,0x0D,0x01, /* [ 6] OBJ_pkcs */ 0x2A,0x86,0x48,0x86,0xF7,0x0D,0x02,0x02, /* [ 13] OBJ_md2 */ @@ -919,6 +919,39 @@ static const unsigned char lvalues[5974]={ 0x2A,0x86,0x48,0x86,0xF7,0x0D,0x01,0x01,0x08,/* [5946] OBJ_mgf1 */ 0x2A,0x86,0x48,0x86,0xF7,0x0D,0x01,0x01,0x0A,/* [5955] OBJ_rsassaPss */ 0x2A,0x86,0x48,0x86,0xF7,0x0D,0x01,0x01,0x07,/* [5964] OBJ_rsaesOaep */ +0x2A,0x86,0x48,0xCE,0x3E,0x02,0x01, /* [5973] OBJ_dhpublicnumber */ +0x2B,0x24,0x03,0x03,0x02,0x08,0x01,0x01,0x01,/* [5980] OBJ_brainpoolP160r1 */ +0x2B,0x24,0x03,0x03,0x02,0x08,0x01,0x01,0x02,/* [5989] OBJ_brainpoolP160t1 */ +0x2B,0x24,0x03,0x03,0x02,0x08,0x01,0x01,0x03,/* [5998] OBJ_brainpoolP192r1 */ +0x2B,0x24,0x03,0x03,0x02,0x08,0x01,0x01,0x04,/* [6007] OBJ_brainpoolP192t1 */ +0x2B,0x24,0x03,0x03,0x02,0x08,0x01,0x01,0x05,/* [6016] OBJ_brainpoolP224r1 */ +0x2B,0x24,0x03,0x03,0x02,0x08,0x01,0x01,0x06,/* [6025] OBJ_brainpoolP224t1 */ +0x2B,0x24,0x03,0x03,0x02,0x08,0x01,0x01,0x07,/* [6034] OBJ_brainpoolP256r1 */ +0x2B,0x24,0x03,0x03,0x02,0x08,0x01,0x01,0x08,/* [6043] OBJ_brainpoolP256t1 */ +0x2B,0x24,0x03,0x03,0x02,0x08,0x01,0x01,0x09,/* [6052] OBJ_brainpoolP320r1 */ +0x2B,0x24,0x03,0x03,0x02,0x08,0x01,0x01,0x0A,/* [6061] OBJ_brainpoolP320t1 */ +0x2B,0x24,0x03,0x03,0x02,0x08,0x01,0x01,0x0B,/* [6070] OBJ_brainpoolP384r1 */ +0x2B,0x24,0x03,0x03,0x02,0x08,0x01,0x01,0x0C,/* [6079] OBJ_brainpoolP384t1 */ +0x2B,0x24,0x03,0x03,0x02,0x08,0x01,0x01,0x0D,/* [6088] OBJ_brainpoolP512r1 */ +0x2B,0x24,0x03,0x03,0x02,0x08,0x01,0x01,0x0E,/* [6097] OBJ_brainpoolP512t1 */ +0x2A,0x86,0x48,0x86,0xF7,0x0D,0x01,0x01,0x09,/* [6106] OBJ_pSpecified */ +0x2B,0x81,0x05,0x10,0x86,0x48,0x3F,0x00,0x02,/* [6115] OBJ_dhSinglePass_stdDH_sha1kdf_scheme */ +0x2B,0x81,0x04,0x01,0x0B,0x00, /* [6124] OBJ_dhSinglePass_stdDH_sha224kdf_scheme */ +0x2B,0x81,0x04,0x01,0x0B,0x01, /* [6130] OBJ_dhSinglePass_stdDH_sha256kdf_scheme */ +0x2B,0x81,0x04,0x01,0x0B,0x02, /* [6136] OBJ_dhSinglePass_stdDH_sha384kdf_scheme */ +0x2B,0x81,0x04,0x01,0x0B,0x03, /* [6142] OBJ_dhSinglePass_stdDH_sha512kdf_scheme */ +0x2B,0x81,0x05,0x10,0x86,0x48,0x3F,0x00,0x03,/* [6148] OBJ_dhSinglePass_cofactorDH_sha1kdf_scheme */ +0x2B,0x81,0x04,0x01,0x0E,0x00, /* [6157] OBJ_dhSinglePass_cofactorDH_sha224kdf_scheme */ +0x2B,0x81,0x04,0x01,0x0E,0x01, /* [6163] OBJ_dhSinglePass_cofactorDH_sha256kdf_scheme */ +0x2B,0x81,0x04,0x01,0x0E,0x02, /* [6169] OBJ_dhSinglePass_cofactorDH_sha384kdf_scheme */ +0x2B,0x81,0x04,0x01,0x0E,0x03, /* [6175] OBJ_dhSinglePass_cofactorDH_sha512kdf_scheme */ +0x2B,0x06,0x01,0x04,0x01,0xD6,0x79,0x02,0x04,0x02,/* [6181] OBJ_ct_precert_scts */ +0x2B,0x06,0x01,0x04,0x01,0xD6,0x79,0x02,0x04,0x03,/* [6191] OBJ_ct_precert_poison */ +0x2B,0x06,0x01,0x04,0x01,0xD6,0x79,0x02,0x04,0x04,/* [6201] OBJ_ct_precert_signer */ +0x2B,0x06,0x01,0x04,0x01,0xD6,0x79,0x02,0x04,0x05,/* [6211] OBJ_ct_cert_scts */ +0x2B,0x06,0x01,0x04,0x01,0x82,0x37,0x3C,0x02,0x01,0x01,/* [6221] OBJ_jurisdictionLocalityName */ +0x2B,0x06,0x01,0x04,0x01,0x82,0x37,0x3C,0x02,0x01,0x02,/* [6232] OBJ_jurisdictionStateOrProvinceName */ +0x2B,0x06,0x01,0x04,0x01,0x82,0x37,0x3C,0x02,0x01,0x03,/* [6243] OBJ_jurisdictionCountryName */ }; static const ASN1_OBJECT nid_objs[NUM_NID]={ @@ -2399,12 +2432,95 @@ static const ASN1_OBJECT nid_objs[NUM_NID]={ {"AES-256-CBC-HMAC-SHA1","aes-256-cbc-hmac-sha1", NID_aes_256_cbc_hmac_sha1,0,NULL,0}, {"RSAES-OAEP","rsaesOaep",NID_rsaesOaep,9,&(lvalues[5964]),0}, +{"dhpublicnumber","X9.42 DH",NID_dhpublicnumber,7,&(lvalues[5973]),0}, +{"brainpoolP160r1","brainpoolP160r1",NID_brainpoolP160r1,9, + &(lvalues[5980]),0}, +{"brainpoolP160t1","brainpoolP160t1",NID_brainpoolP160t1,9, + &(lvalues[5989]),0}, +{"brainpoolP192r1","brainpoolP192r1",NID_brainpoolP192r1,9, + &(lvalues[5998]),0}, +{"brainpoolP192t1","brainpoolP192t1",NID_brainpoolP192t1,9, + &(lvalues[6007]),0}, +{"brainpoolP224r1","brainpoolP224r1",NID_brainpoolP224r1,9, + &(lvalues[6016]),0}, +{"brainpoolP224t1","brainpoolP224t1",NID_brainpoolP224t1,9, + &(lvalues[6025]),0}, +{"brainpoolP256r1","brainpoolP256r1",NID_brainpoolP256r1,9, + &(lvalues[6034]),0}, +{"brainpoolP256t1","brainpoolP256t1",NID_brainpoolP256t1,9, + &(lvalues[6043]),0}, +{"brainpoolP320r1","brainpoolP320r1",NID_brainpoolP320r1,9, + &(lvalues[6052]),0}, +{"brainpoolP320t1","brainpoolP320t1",NID_brainpoolP320t1,9, + &(lvalues[6061]),0}, +{"brainpoolP384r1","brainpoolP384r1",NID_brainpoolP384r1,9, + &(lvalues[6070]),0}, +{"brainpoolP384t1","brainpoolP384t1",NID_brainpoolP384t1,9, + &(lvalues[6079]),0}, +{"brainpoolP512r1","brainpoolP512r1",NID_brainpoolP512r1,9, + &(lvalues[6088]),0}, +{"brainpoolP512t1","brainpoolP512t1",NID_brainpoolP512t1,9, + &(lvalues[6097]),0}, +{"PSPECIFIED","pSpecified",NID_pSpecified,9,&(lvalues[6106]),0}, +{"dhSinglePass-stdDH-sha1kdf-scheme", + "dhSinglePass-stdDH-sha1kdf-scheme", + NID_dhSinglePass_stdDH_sha1kdf_scheme,9,&(lvalues[6115]),0}, +{"dhSinglePass-stdDH-sha224kdf-scheme", + "dhSinglePass-stdDH-sha224kdf-scheme", + NID_dhSinglePass_stdDH_sha224kdf_scheme,6,&(lvalues[6124]),0}, +{"dhSinglePass-stdDH-sha256kdf-scheme", + "dhSinglePass-stdDH-sha256kdf-scheme", + NID_dhSinglePass_stdDH_sha256kdf_scheme,6,&(lvalues[6130]),0}, +{"dhSinglePass-stdDH-sha384kdf-scheme", + "dhSinglePass-stdDH-sha384kdf-scheme", + NID_dhSinglePass_stdDH_sha384kdf_scheme,6,&(lvalues[6136]),0}, +{"dhSinglePass-stdDH-sha512kdf-scheme", + "dhSinglePass-stdDH-sha512kdf-scheme", + NID_dhSinglePass_stdDH_sha512kdf_scheme,6,&(lvalues[6142]),0}, +{"dhSinglePass-cofactorDH-sha1kdf-scheme", + "dhSinglePass-cofactorDH-sha1kdf-scheme", + NID_dhSinglePass_cofactorDH_sha1kdf_scheme,9,&(lvalues[6148]),0}, +{"dhSinglePass-cofactorDH-sha224kdf-scheme", + "dhSinglePass-cofactorDH-sha224kdf-scheme", + NID_dhSinglePass_cofactorDH_sha224kdf_scheme,6,&(lvalues[6157]),0}, +{"dhSinglePass-cofactorDH-sha256kdf-scheme", + "dhSinglePass-cofactorDH-sha256kdf-scheme", + NID_dhSinglePass_cofactorDH_sha256kdf_scheme,6,&(lvalues[6163]),0}, +{"dhSinglePass-cofactorDH-sha384kdf-scheme", + "dhSinglePass-cofactorDH-sha384kdf-scheme", + NID_dhSinglePass_cofactorDH_sha384kdf_scheme,6,&(lvalues[6169]),0}, +{"dhSinglePass-cofactorDH-sha512kdf-scheme", + "dhSinglePass-cofactorDH-sha512kdf-scheme", + NID_dhSinglePass_cofactorDH_sha512kdf_scheme,6,&(lvalues[6175]),0}, +{"dh-std-kdf","dh-std-kdf",NID_dh_std_kdf,0,NULL,0}, +{"dh-cofactor-kdf","dh-cofactor-kdf",NID_dh_cofactor_kdf,0,NULL,0}, +{"AES-128-CBC-HMAC-SHA256","aes-128-cbc-hmac-sha256", + NID_aes_128_cbc_hmac_sha256,0,NULL,0}, +{"AES-192-CBC-HMAC-SHA256","aes-192-cbc-hmac-sha256", + NID_aes_192_cbc_hmac_sha256,0,NULL,0}, +{"AES-256-CBC-HMAC-SHA256","aes-256-cbc-hmac-sha256", + NID_aes_256_cbc_hmac_sha256,0,NULL,0}, +{"ct_precert_scts","CT Precertificate SCTs",NID_ct_precert_scts,10, + &(lvalues[6181]),0}, +{"ct_precert_poison","CT Precertificate Poison",NID_ct_precert_poison, + 10,&(lvalues[6191]),0}, +{"ct_precert_signer","CT Precertificate Signer",NID_ct_precert_signer, + 10,&(lvalues[6201]),0}, +{"ct_cert_scts","CT Certificate SCTs",NID_ct_cert_scts,10, + &(lvalues[6211]),0}, +{"jurisdictionL","jurisdictionLocalityName", + NID_jurisdictionLocalityName,11,&(lvalues[6221]),0}, +{"jurisdictionST","jurisdictionStateOrProvinceName", + NID_jurisdictionStateOrProvinceName,11,&(lvalues[6232]),0}, +{"jurisdictionC","jurisdictionCountryName", + NID_jurisdictionCountryName,11,&(lvalues[6243]),0}, }; static const unsigned int sn_objs[NUM_SN]={ 364, /* "AD_DVCS" */ 419, /* "AES-128-CBC" */ 916, /* "AES-128-CBC-HMAC-SHA1" */ +948, /* "AES-128-CBC-HMAC-SHA256" */ 421, /* "AES-128-CFB" */ 650, /* "AES-128-CFB1" */ 653, /* "AES-128-CFB8" */ @@ -2414,6 +2530,7 @@ static const unsigned int sn_objs[NUM_SN]={ 913, /* "AES-128-XTS" */ 423, /* "AES-192-CBC" */ 917, /* "AES-192-CBC-HMAC-SHA1" */ +949, /* "AES-192-CBC-HMAC-SHA256" */ 425, /* "AES-192-CFB" */ 651, /* "AES-192-CFB1" */ 654, /* "AES-192-CFB8" */ @@ -2422,6 +2539,7 @@ static const unsigned int sn_objs[NUM_SN]={ 424, /* "AES-192-OFB" */ 427, /* "AES-256-CBC" */ 918, /* "AES-256-CBC-HMAC-SHA1" */ +950, /* "AES-256-CBC-HMAC-SHA256" */ 429, /* "AES-256-CFB" */ 652, /* "AES-256-CFB1" */ 655, /* "AES-256-CFB8" */ @@ -2537,6 +2655,7 @@ static const unsigned int sn_objs[NUM_SN]={ 69, /* "PBKDF2" */ 162, /* "PBMAC1" */ 127, /* "PKIX" */ +935, /* "PSPECIFIED" */ 98, /* "RC2-40-CBC" */ 166, /* "RC2-64-CBC" */ 37, /* "RC2-CBC" */ @@ -2613,6 +2732,20 @@ static const unsigned int sn_objs[NUM_SN]={ 87, /* "basicConstraints" */ 365, /* "basicOCSPResponse" */ 285, /* "biometricInfo" */ +921, /* "brainpoolP160r1" */ +922, /* "brainpoolP160t1" */ +923, /* "brainpoolP192r1" */ +924, /* "brainpoolP192t1" */ +925, /* "brainpoolP224r1" */ +926, /* "brainpoolP224t1" */ +927, /* "brainpoolP256r1" */ +928, /* "brainpoolP256t1" */ +929, /* "brainpoolP320r1" */ +930, /* "brainpoolP320t1" */ +931, /* "brainpoolP384r1" */ +932, /* "brainpoolP384t1" */ +933, /* "brainpoolP512r1" */ +934, /* "brainpoolP512t1" */ 494, /* "buildingName" */ 860, /* "businessCategory" */ 691, /* "c2onb191v4" */ @@ -2658,6 +2791,10 @@ static const unsigned int sn_objs[NUM_SN]={ 884, /* "crossCertificatePair" */ 806, /* "cryptocom" */ 805, /* "cryptopro" */ +954, /* "ct_cert_scts" */ +952, /* "ct_precert_poison" */ +951, /* "ct_precert_scts" */ +953, /* "ct_precert_signer" */ 500, /* "dITRedirect" */ 451, /* "dNSDomain" */ 495, /* "dSAQuality" */ @@ -2667,7 +2804,20 @@ static const unsigned int sn_objs[NUM_SN]={ 891, /* "deltaRevocationList" */ 107, /* "description" */ 871, /* "destinationIndicator" */ +947, /* "dh-cofactor-kdf" */ +946, /* "dh-std-kdf" */ 28, /* "dhKeyAgreement" */ +941, /* "dhSinglePass-cofactorDH-sha1kdf-scheme" */ +942, /* "dhSinglePass-cofactorDH-sha224kdf-scheme" */ +943, /* "dhSinglePass-cofactorDH-sha256kdf-scheme" */ +944, /* "dhSinglePass-cofactorDH-sha384kdf-scheme" */ +945, /* "dhSinglePass-cofactorDH-sha512kdf-scheme" */ +936, /* "dhSinglePass-stdDH-sha1kdf-scheme" */ +937, /* "dhSinglePass-stdDH-sha224kdf-scheme" */ +938, /* "dhSinglePass-stdDH-sha256kdf-scheme" */ +939, /* "dhSinglePass-stdDH-sha384kdf-scheme" */ +940, /* "dhSinglePass-stdDH-sha512kdf-scheme" */ +920, /* "dhpublicnumber" */ 382, /* "directory" */ 887, /* "distinguishedName" */ 892, /* "dmdName" */ @@ -2978,6 +3128,9 @@ static const unsigned int sn_objs[NUM_SN]={ 86, /* "issuerAltName" */ 770, /* "issuingDistributionPoint" */ 492, /* "janetMailbox" */ +957, /* "jurisdictionC" */ +955, /* "jurisdictionL" */ +956, /* "jurisdictionST" */ 150, /* "keyBag" */ 83, /* "keyUsage" */ 477, /* "lastModifiedBy" */ @@ -3328,6 +3481,10 @@ static const unsigned int ln_objs[NUM_LN]={ 285, /* "Biometric Info" */ 179, /* "CA Issuers" */ 785, /* "CA Repository" */ +954, /* "CT Certificate SCTs" */ +952, /* "CT Precertificate Poison" */ +951, /* "CT Precertificate SCTs" */ +953, /* "CT Precertificate Signer" */ 131, /* "Code Signing" */ 783, /* "Diffie-Hellman based MAC" */ 382, /* "Directory" */ @@ -3451,6 +3608,7 @@ static const unsigned int ln_objs[NUM_LN]={ 85, /* "X509v3 Subject Alternative Name" */ 769, /* "X509v3 Subject Directory Attributes" */ 82, /* "X509v3 Subject Key Identifier" */ +920, /* "X9.42 DH" */ 184, /* "X9.57" */ 185, /* "X9.57 CM ?" */ 478, /* "aRecord" */ @@ -3463,6 +3621,7 @@ static const unsigned int ln_objs[NUM_LN]={ 606, /* "additional verification" */ 419, /* "aes-128-cbc" */ 916, /* "aes-128-cbc-hmac-sha1" */ +948, /* "aes-128-cbc-hmac-sha256" */ 896, /* "aes-128-ccm" */ 421, /* "aes-128-cfb" */ 650, /* "aes-128-cfb1" */ @@ -3474,6 +3633,7 @@ static const unsigned int ln_objs[NUM_LN]={ 913, /* "aes-128-xts" */ 423, /* "aes-192-cbc" */ 917, /* "aes-192-cbc-hmac-sha1" */ +949, /* "aes-192-cbc-hmac-sha256" */ 899, /* "aes-192-ccm" */ 425, /* "aes-192-cfb" */ 651, /* "aes-192-cfb1" */ @@ -3484,6 +3644,7 @@ static const unsigned int ln_objs[NUM_LN]={ 424, /* "aes-192-ofb" */ 427, /* "aes-256-cbc" */ 918, /* "aes-256-cbc-hmac-sha1" */ +950, /* "aes-256-cbc-hmac-sha256" */ 902, /* "aes-256-ccm" */ 429, /* "aes-256-cfb" */ 652, /* "aes-256-cfb1" */ @@ -3502,6 +3663,20 @@ static const unsigned int ln_objs[NUM_LN]={ 93, /* "bf-cfb" */ 92, /* "bf-ecb" */ 94, /* "bf-ofb" */ +921, /* "brainpoolP160r1" */ +922, /* "brainpoolP160t1" */ +923, /* "brainpoolP192r1" */ +924, /* "brainpoolP192t1" */ +925, /* "brainpoolP224r1" */ +926, /* "brainpoolP224t1" */ +927, /* "brainpoolP256r1" */ +928, /* "brainpoolP256t1" */ +929, /* "brainpoolP320r1" */ +930, /* "brainpoolP320t1" */ +931, /* "brainpoolP384r1" */ +932, /* "brainpoolP384t1" */ +933, /* "brainpoolP512r1" */ +934, /* "brainpoolP512t1" */ 494, /* "buildingName" */ 860, /* "businessCategory" */ 691, /* "c2onb191v4" */ @@ -3593,7 +3768,19 @@ static const unsigned int ln_objs[NUM_LN]={ 107, /* "description" */ 871, /* "destinationIndicator" */ 80, /* "desx-cbc" */ +947, /* "dh-cofactor-kdf" */ +946, /* "dh-std-kdf" */ 28, /* "dhKeyAgreement" */ +941, /* "dhSinglePass-cofactorDH-sha1kdf-scheme" */ +942, /* "dhSinglePass-cofactorDH-sha224kdf-scheme" */ +943, /* "dhSinglePass-cofactorDH-sha256kdf-scheme" */ +944, /* "dhSinglePass-cofactorDH-sha384kdf-scheme" */ +945, /* "dhSinglePass-cofactorDH-sha512kdf-scheme" */ +936, /* "dhSinglePass-stdDH-sha1kdf-scheme" */ +937, /* "dhSinglePass-stdDH-sha224kdf-scheme" */ +938, /* "dhSinglePass-stdDH-sha256kdf-scheme" */ +939, /* "dhSinglePass-stdDH-sha384kdf-scheme" */ +940, /* "dhSinglePass-stdDH-sha512kdf-scheme" */ 11, /* "directory services (X.500)" */ 378, /* "directory services - algorithms" */ 887, /* "distinguishedName" */ @@ -3881,6 +4068,9 @@ static const unsigned int ln_objs[NUM_LN]={ 645, /* "itu-t" */ 492, /* "janetMailbox" */ 646, /* "joint-iso-itu-t" */ +957, /* "jurisdictionCountryName" */ +955, /* "jurisdictionLocalityName" */ +956, /* "jurisdictionStateOrProvinceName" */ 150, /* "keyBag" */ 773, /* "kisa" */ 477, /* "lastModifiedBy" */ @@ -3917,6 +4107,7 @@ static const unsigned int ln_objs[NUM_LN]={ 18, /* "organizationalUnitName" */ 475, /* "otherMailbox" */ 876, /* "owner" */ +935, /* "pSpecified" */ 489, /* "pagerTelephoneNumber" */ 782, /* "password based MAC" */ 374, /* "path" */ @@ -4560,6 +4751,14 @@ static const unsigned int obj_objs[NUM_OBJ]={ 505, /* OBJ_mime_mhs_headings 1 3 6 1 7 1 1 */ 506, /* OBJ_mime_mhs_bodies 1 3 6 1 7 1 2 */ 119, /* OBJ_ripemd160WithRSA 1 3 36 3 3 1 2 */ +937, /* OBJ_dhSinglePass_stdDH_sha224kdf_scheme 1 3 132 1 11 0 */ +938, /* OBJ_dhSinglePass_stdDH_sha256kdf_scheme 1 3 132 1 11 1 */ +939, /* OBJ_dhSinglePass_stdDH_sha384kdf_scheme 1 3 132 1 11 2 */ +940, /* OBJ_dhSinglePass_stdDH_sha512kdf_scheme 1 3 132 1 11 3 */ +942, /* OBJ_dhSinglePass_cofactorDH_sha224kdf_scheme 1 3 132 1 14 0 */ +943, /* OBJ_dhSinglePass_cofactorDH_sha256kdf_scheme 1 3 132 1 14 1 */ +944, /* OBJ_dhSinglePass_cofactorDH_sha384kdf_scheme 1 3 132 1 14 2 */ +945, /* OBJ_dhSinglePass_cofactorDH_sha512kdf_scheme 1 3 132 1 14 3 */ 631, /* OBJ_setAttr_GenCryptgrm 2 23 42 3 3 3 1 */ 632, /* OBJ_setAttr_T2Enc 2 23 42 3 3 4 1 */ 633, /* OBJ_setAttr_T2cleartxt 2 23 42 3 3 4 2 */ @@ -4608,6 +4807,7 @@ static const unsigned int obj_objs[NUM_OBJ]={ 416, /* OBJ_ecdsa_with_SHA1 1 2 840 10045 4 1 */ 791, /* OBJ_ecdsa_with_Recommended 1 2 840 10045 4 2 */ 792, /* OBJ_ecdsa_with_Specified 1 2 840 10045 4 3 */ +920, /* OBJ_dhpublicnumber 1 2 840 10046 2 1 */ 258, /* OBJ_id_pkix_mod 1 3 6 1 5 5 7 0 */ 175, /* OBJ_id_pe 1 3 6 1 5 5 7 1 */ 259, /* OBJ_id_qt 1 3 6 1 5 5 7 2 */ @@ -4825,6 +5025,7 @@ static const unsigned int obj_objs[NUM_OBJ]={ 644, /* OBJ_rsaOAEPEncryptionSET 1 2 840 113549 1 1 6 */ 919, /* OBJ_rsaesOaep 1 2 840 113549 1 1 7 */ 911, /* OBJ_mgf1 1 2 840 113549 1 1 8 */ +935, /* OBJ_pSpecified 1 2 840 113549 1 1 9 */ 912, /* OBJ_rsassaPss 1 2 840 113549 1 1 10 */ 668, /* OBJ_sha256WithRSAEncryption 1 2 840 113549 1 1 11 */ 669, /* OBJ_sha384WithRSAEncryption 1 2 840 113549 1 1 12 */ @@ -4886,6 +5087,22 @@ static const unsigned int obj_objs[NUM_OBJ]={ 373, /* OBJ_id_pkix_OCSP_valid 1 3 6 1 5 5 7 48 1 9 */ 374, /* OBJ_id_pkix_OCSP_path 1 3 6 1 5 5 7 48 1 10 */ 375, /* OBJ_id_pkix_OCSP_trustRoot 1 3 6 1 5 5 7 48 1 11 */ +921, /* OBJ_brainpoolP160r1 1 3 36 3 3 2 8 1 1 1 */ +922, /* OBJ_brainpoolP160t1 1 3 36 3 3 2 8 1 1 2 */ +923, /* OBJ_brainpoolP192r1 1 3 36 3 3 2 8 1 1 3 */ +924, /* OBJ_brainpoolP192t1 1 3 36 3 3 2 8 1 1 4 */ +925, /* OBJ_brainpoolP224r1 1 3 36 3 3 2 8 1 1 5 */ +926, /* OBJ_brainpoolP224t1 1 3 36 3 3 2 8 1 1 6 */ +927, /* OBJ_brainpoolP256r1 1 3 36 3 3 2 8 1 1 7 */ +928, /* OBJ_brainpoolP256t1 1 3 36 3 3 2 8 1 1 8 */ +929, /* OBJ_brainpoolP320r1 1 3 36 3 3 2 8 1 1 9 */ +930, /* OBJ_brainpoolP320t1 1 3 36 3 3 2 8 1 1 10 */ +931, /* OBJ_brainpoolP384r1 1 3 36 3 3 2 8 1 1 11 */ +932, /* OBJ_brainpoolP384t1 1 3 36 3 3 2 8 1 1 12 */ +933, /* OBJ_brainpoolP512r1 1 3 36 3 3 2 8 1 1 13 */ +934, /* OBJ_brainpoolP512t1 1 3 36 3 3 2 8 1 1 14 */ +936, /* OBJ_dhSinglePass_stdDH_sha1kdf_scheme 1 3 133 16 840 63 0 2 */ +941, /* OBJ_dhSinglePass_cofactorDH_sha1kdf_scheme 1 3 133 16 840 63 0 3 */ 418, /* OBJ_aes_128_ecb 2 16 840 1 101 3 4 1 1 */ 419, /* OBJ_aes_128_cbc 2 16 840 1 101 3 4 1 2 */ 420, /* OBJ_aes_128_ofb128 2 16 840 1 101 3 4 1 3 */ @@ -5013,6 +5230,10 @@ static const unsigned int obj_objs[NUM_OBJ]={ 138, /* OBJ_ms_efs 1 3 6 1 4 1 311 10 3 4 */ 648, /* OBJ_ms_smartcard_login 1 3 6 1 4 1 311 20 2 2 */ 649, /* OBJ_ms_upn 1 3 6 1 4 1 311 20 2 3 */ +951, /* OBJ_ct_precert_scts 1 3 6 1 4 1 11129 2 4 2 */ +952, /* OBJ_ct_precert_poison 1 3 6 1 4 1 11129 2 4 3 */ +953, /* OBJ_ct_precert_signer 1 3 6 1 4 1 11129 2 4 4 */ +954, /* OBJ_ct_cert_scts 1 3 6 1 4 1 11129 2 4 5 */ 751, /* OBJ_camellia_128_cbc 1 2 392 200011 61 1 1 1 2 */ 752, /* OBJ_camellia_192_cbc 1 2 392 200011 61 1 1 1 3 */ 753, /* OBJ_camellia_256_cbc 1 2 392 200011 61 1 1 1 4 */ @@ -5091,5 +5312,8 @@ static const unsigned int obj_objs[NUM_OBJ]={ 154, /* OBJ_secretBag 1 2 840 113549 1 12 10 1 5 */ 155, /* OBJ_safeContentsBag 1 2 840 113549 1 12 10 1 6 */ 34, /* OBJ_idea_cbc 1 3 6 1 4 1 188 7 1 1 2 */ +955, /* OBJ_jurisdictionLocalityName 1 3 6 1 4 1 311 60 2 1 1 */ +956, /* OBJ_jurisdictionStateOrProvinceName 1 3 6 1 4 1 311 60 2 1 2 */ +957, /* OBJ_jurisdictionCountryName 1 3 6 1 4 1 311 60 2 1 3 */ }; diff --git a/deps/openssl/openssl/crypto/objects/obj_mac.h b/deps/openssl/openssl/crypto/objects/obj_mac.h index f752aeff75b192..779c309b869b65 100644 --- a/deps/openssl/openssl/crypto/objects/obj_mac.h +++ b/deps/openssl/openssl/crypto/objects/obj_mac.h @@ -590,6 +590,11 @@ #define NID_mgf1 911 #define OBJ_mgf1 OBJ_pkcs1,8L +#define SN_pSpecified "PSPECIFIED" +#define LN_pSpecified "pSpecified" +#define NID_pSpecified 935 +#define OBJ_pSpecified OBJ_pkcs1,9L + #define SN_rsassaPss "RSASSA-PSS" #define LN_rsassaPss "rsassaPss" #define NID_rsassaPss 912 @@ -4029,3 +4034,161 @@ #define SN_aes_256_cbc_hmac_sha1 "AES-256-CBC-HMAC-SHA1" #define LN_aes_256_cbc_hmac_sha1 "aes-256-cbc-hmac-sha1" #define NID_aes_256_cbc_hmac_sha1 918 + +#define SN_aes_128_cbc_hmac_sha256 "AES-128-CBC-HMAC-SHA256" +#define LN_aes_128_cbc_hmac_sha256 "aes-128-cbc-hmac-sha256" +#define NID_aes_128_cbc_hmac_sha256 948 + +#define SN_aes_192_cbc_hmac_sha256 "AES-192-CBC-HMAC-SHA256" +#define LN_aes_192_cbc_hmac_sha256 "aes-192-cbc-hmac-sha256" +#define NID_aes_192_cbc_hmac_sha256 949 + +#define SN_aes_256_cbc_hmac_sha256 "AES-256-CBC-HMAC-SHA256" +#define LN_aes_256_cbc_hmac_sha256 "aes-256-cbc-hmac-sha256" +#define NID_aes_256_cbc_hmac_sha256 950 + +#define SN_dhpublicnumber "dhpublicnumber" +#define LN_dhpublicnumber "X9.42 DH" +#define NID_dhpublicnumber 920 +#define OBJ_dhpublicnumber OBJ_ISO_US,10046L,2L,1L + +#define SN_brainpoolP160r1 "brainpoolP160r1" +#define NID_brainpoolP160r1 921 +#define OBJ_brainpoolP160r1 1L,3L,36L,3L,3L,2L,8L,1L,1L,1L + +#define SN_brainpoolP160t1 "brainpoolP160t1" +#define NID_brainpoolP160t1 922 +#define OBJ_brainpoolP160t1 1L,3L,36L,3L,3L,2L,8L,1L,1L,2L + +#define SN_brainpoolP192r1 "brainpoolP192r1" +#define NID_brainpoolP192r1 923 +#define OBJ_brainpoolP192r1 1L,3L,36L,3L,3L,2L,8L,1L,1L,3L + +#define SN_brainpoolP192t1 "brainpoolP192t1" +#define NID_brainpoolP192t1 924 +#define OBJ_brainpoolP192t1 1L,3L,36L,3L,3L,2L,8L,1L,1L,4L + +#define SN_brainpoolP224r1 "brainpoolP224r1" +#define NID_brainpoolP224r1 925 +#define OBJ_brainpoolP224r1 1L,3L,36L,3L,3L,2L,8L,1L,1L,5L + +#define SN_brainpoolP224t1 "brainpoolP224t1" +#define NID_brainpoolP224t1 926 +#define OBJ_brainpoolP224t1 1L,3L,36L,3L,3L,2L,8L,1L,1L,6L + +#define SN_brainpoolP256r1 "brainpoolP256r1" +#define NID_brainpoolP256r1 927 +#define OBJ_brainpoolP256r1 1L,3L,36L,3L,3L,2L,8L,1L,1L,7L + +#define SN_brainpoolP256t1 "brainpoolP256t1" +#define NID_brainpoolP256t1 928 +#define OBJ_brainpoolP256t1 1L,3L,36L,3L,3L,2L,8L,1L,1L,8L + +#define SN_brainpoolP320r1 "brainpoolP320r1" +#define NID_brainpoolP320r1 929 +#define OBJ_brainpoolP320r1 1L,3L,36L,3L,3L,2L,8L,1L,1L,9L + +#define SN_brainpoolP320t1 "brainpoolP320t1" +#define NID_brainpoolP320t1 930 +#define OBJ_brainpoolP320t1 1L,3L,36L,3L,3L,2L,8L,1L,1L,10L + +#define SN_brainpoolP384r1 "brainpoolP384r1" +#define NID_brainpoolP384r1 931 +#define OBJ_brainpoolP384r1 1L,3L,36L,3L,3L,2L,8L,1L,1L,11L + +#define SN_brainpoolP384t1 "brainpoolP384t1" +#define NID_brainpoolP384t1 932 +#define OBJ_brainpoolP384t1 1L,3L,36L,3L,3L,2L,8L,1L,1L,12L + +#define SN_brainpoolP512r1 "brainpoolP512r1" +#define NID_brainpoolP512r1 933 +#define OBJ_brainpoolP512r1 1L,3L,36L,3L,3L,2L,8L,1L,1L,13L + +#define SN_brainpoolP512t1 "brainpoolP512t1" +#define NID_brainpoolP512t1 934 +#define OBJ_brainpoolP512t1 1L,3L,36L,3L,3L,2L,8L,1L,1L,14L + +#define OBJ_x9_63_scheme 1L,3L,133L,16L,840L,63L,0L + +#define OBJ_secg_scheme OBJ_certicom_arc,1L + +#define SN_dhSinglePass_stdDH_sha1kdf_scheme "dhSinglePass-stdDH-sha1kdf-scheme" +#define NID_dhSinglePass_stdDH_sha1kdf_scheme 936 +#define OBJ_dhSinglePass_stdDH_sha1kdf_scheme OBJ_x9_63_scheme,2L + +#define SN_dhSinglePass_stdDH_sha224kdf_scheme "dhSinglePass-stdDH-sha224kdf-scheme" +#define NID_dhSinglePass_stdDH_sha224kdf_scheme 937 +#define OBJ_dhSinglePass_stdDH_sha224kdf_scheme OBJ_secg_scheme,11L,0L + +#define SN_dhSinglePass_stdDH_sha256kdf_scheme "dhSinglePass-stdDH-sha256kdf-scheme" +#define NID_dhSinglePass_stdDH_sha256kdf_scheme 938 +#define OBJ_dhSinglePass_stdDH_sha256kdf_scheme OBJ_secg_scheme,11L,1L + +#define SN_dhSinglePass_stdDH_sha384kdf_scheme "dhSinglePass-stdDH-sha384kdf-scheme" +#define NID_dhSinglePass_stdDH_sha384kdf_scheme 939 +#define OBJ_dhSinglePass_stdDH_sha384kdf_scheme OBJ_secg_scheme,11L,2L + +#define SN_dhSinglePass_stdDH_sha512kdf_scheme "dhSinglePass-stdDH-sha512kdf-scheme" +#define NID_dhSinglePass_stdDH_sha512kdf_scheme 940 +#define OBJ_dhSinglePass_stdDH_sha512kdf_scheme OBJ_secg_scheme,11L,3L + +#define SN_dhSinglePass_cofactorDH_sha1kdf_scheme "dhSinglePass-cofactorDH-sha1kdf-scheme" +#define NID_dhSinglePass_cofactorDH_sha1kdf_scheme 941 +#define OBJ_dhSinglePass_cofactorDH_sha1kdf_scheme OBJ_x9_63_scheme,3L + +#define SN_dhSinglePass_cofactorDH_sha224kdf_scheme "dhSinglePass-cofactorDH-sha224kdf-scheme" +#define NID_dhSinglePass_cofactorDH_sha224kdf_scheme 942 +#define OBJ_dhSinglePass_cofactorDH_sha224kdf_scheme OBJ_secg_scheme,14L,0L + +#define SN_dhSinglePass_cofactorDH_sha256kdf_scheme "dhSinglePass-cofactorDH-sha256kdf-scheme" +#define NID_dhSinglePass_cofactorDH_sha256kdf_scheme 943 +#define OBJ_dhSinglePass_cofactorDH_sha256kdf_scheme OBJ_secg_scheme,14L,1L + +#define SN_dhSinglePass_cofactorDH_sha384kdf_scheme "dhSinglePass-cofactorDH-sha384kdf-scheme" +#define NID_dhSinglePass_cofactorDH_sha384kdf_scheme 944 +#define OBJ_dhSinglePass_cofactorDH_sha384kdf_scheme OBJ_secg_scheme,14L,2L + +#define SN_dhSinglePass_cofactorDH_sha512kdf_scheme "dhSinglePass-cofactorDH-sha512kdf-scheme" +#define NID_dhSinglePass_cofactorDH_sha512kdf_scheme 945 +#define OBJ_dhSinglePass_cofactorDH_sha512kdf_scheme OBJ_secg_scheme,14L,3L + +#define SN_dh_std_kdf "dh-std-kdf" +#define NID_dh_std_kdf 946 + +#define SN_dh_cofactor_kdf "dh-cofactor-kdf" +#define NID_dh_cofactor_kdf 947 + +#define SN_ct_precert_scts "ct_precert_scts" +#define LN_ct_precert_scts "CT Precertificate SCTs" +#define NID_ct_precert_scts 951 +#define OBJ_ct_precert_scts 1L,3L,6L,1L,4L,1L,11129L,2L,4L,2L + +#define SN_ct_precert_poison "ct_precert_poison" +#define LN_ct_precert_poison "CT Precertificate Poison" +#define NID_ct_precert_poison 952 +#define OBJ_ct_precert_poison 1L,3L,6L,1L,4L,1L,11129L,2L,4L,3L + +#define SN_ct_precert_signer "ct_precert_signer" +#define LN_ct_precert_signer "CT Precertificate Signer" +#define NID_ct_precert_signer 953 +#define OBJ_ct_precert_signer 1L,3L,6L,1L,4L,1L,11129L,2L,4L,4L + +#define SN_ct_cert_scts "ct_cert_scts" +#define LN_ct_cert_scts "CT Certificate SCTs" +#define NID_ct_cert_scts 954 +#define OBJ_ct_cert_scts 1L,3L,6L,1L,4L,1L,11129L,2L,4L,5L + +#define SN_jurisdictionLocalityName "jurisdictionL" +#define LN_jurisdictionLocalityName "jurisdictionLocalityName" +#define NID_jurisdictionLocalityName 955 +#define OBJ_jurisdictionLocalityName 1L,3L,6L,1L,4L,1L,311L,60L,2L,1L,1L + +#define SN_jurisdictionStateOrProvinceName "jurisdictionST" +#define LN_jurisdictionStateOrProvinceName "jurisdictionStateOrProvinceName" +#define NID_jurisdictionStateOrProvinceName 956 +#define OBJ_jurisdictionStateOrProvinceName 1L,3L,6L,1L,4L,1L,311L,60L,2L,1L,2L + +#define SN_jurisdictionCountryName "jurisdictionC" +#define LN_jurisdictionCountryName "jurisdictionCountryName" +#define NID_jurisdictionCountryName 957 +#define OBJ_jurisdictionCountryName 1L,3L,6L,1L,4L,1L,311L,60L,2L,1L,3L diff --git a/deps/openssl/openssl/crypto/objects/obj_mac.num b/deps/openssl/openssl/crypto/objects/obj_mac.num index 1d0a7c802daf10..8e5ea8336331ec 100644 --- a/deps/openssl/openssl/crypto/objects/obj_mac.num +++ b/deps/openssl/openssl/crypto/objects/obj_mac.num @@ -917,3 +917,41 @@ aes_128_cbc_hmac_sha1 916 aes_192_cbc_hmac_sha1 917 aes_256_cbc_hmac_sha1 918 rsaesOaep 919 +dhpublicnumber 920 +brainpoolP160r1 921 +brainpoolP160t1 922 +brainpoolP192r1 923 +brainpoolP192t1 924 +brainpoolP224r1 925 +brainpoolP224t1 926 +brainpoolP256r1 927 +brainpoolP256t1 928 +brainpoolP320r1 929 +brainpoolP320t1 930 +brainpoolP384r1 931 +brainpoolP384t1 932 +brainpoolP512r1 933 +brainpoolP512t1 934 +pSpecified 935 +dhSinglePass_stdDH_sha1kdf_scheme 936 +dhSinglePass_stdDH_sha224kdf_scheme 937 +dhSinglePass_stdDH_sha256kdf_scheme 938 +dhSinglePass_stdDH_sha384kdf_scheme 939 +dhSinglePass_stdDH_sha512kdf_scheme 940 +dhSinglePass_cofactorDH_sha1kdf_scheme 941 +dhSinglePass_cofactorDH_sha224kdf_scheme 942 +dhSinglePass_cofactorDH_sha256kdf_scheme 943 +dhSinglePass_cofactorDH_sha384kdf_scheme 944 +dhSinglePass_cofactorDH_sha512kdf_scheme 945 +dh_std_kdf 946 +dh_cofactor_kdf 947 +aes_128_cbc_hmac_sha256 948 +aes_192_cbc_hmac_sha256 949 +aes_256_cbc_hmac_sha256 950 +ct_precert_scts 951 +ct_precert_poison 952 +ct_precert_signer 953 +ct_cert_scts 954 +jurisdictionLocalityName 955 +jurisdictionStateOrProvinceName 956 +jurisdictionCountryName 957 diff --git a/deps/openssl/openssl/crypto/objects/obj_xref.h b/deps/openssl/openssl/crypto/objects/obj_xref.h index b8f7d341f78759..e453e99f833660 100644 --- a/deps/openssl/openssl/crypto/objects/obj_xref.h +++ b/deps/openssl/openssl/crypto/objects/obj_xref.h @@ -41,6 +41,21 @@ static const nid_triple sigoid_srt[] = { {NID_id_GostR3411_94_with_GostR3410_2001_cc, NID_id_GostR3411_94, NID_id_GostR3410_2001_cc}, {NID_rsassaPss, NID_undef, NID_rsaEncryption}, + {NID_dhSinglePass_stdDH_sha1kdf_scheme, NID_sha1, NID_dh_std_kdf}, + {NID_dhSinglePass_stdDH_sha224kdf_scheme, NID_sha224, NID_dh_std_kdf}, + {NID_dhSinglePass_stdDH_sha256kdf_scheme, NID_sha256, NID_dh_std_kdf}, + {NID_dhSinglePass_stdDH_sha384kdf_scheme, NID_sha384, NID_dh_std_kdf}, + {NID_dhSinglePass_stdDH_sha512kdf_scheme, NID_sha512, NID_dh_std_kdf}, + {NID_dhSinglePass_cofactorDH_sha1kdf_scheme, NID_sha1, + NID_dh_cofactor_kdf}, + {NID_dhSinglePass_cofactorDH_sha224kdf_scheme, NID_sha224, + NID_dh_cofactor_kdf}, + {NID_dhSinglePass_cofactorDH_sha256kdf_scheme, NID_sha256, + NID_dh_cofactor_kdf}, + {NID_dhSinglePass_cofactorDH_sha384kdf_scheme, NID_sha384, + NID_dh_cofactor_kdf}, + {NID_dhSinglePass_cofactorDH_sha512kdf_scheme, NID_sha512, + NID_dh_cofactor_kdf}, }; static const nid_triple *const sigoid_srt_xref[] = { @@ -54,19 +69,29 @@ static const nid_triple *const sigoid_srt_xref[] = { &sigoid_srt[5], &sigoid_srt[8], &sigoid_srt[12], + &sigoid_srt[30], + &sigoid_srt[35], &sigoid_srt[6], &sigoid_srt[10], &sigoid_srt[11], &sigoid_srt[13], &sigoid_srt[24], &sigoid_srt[20], + &sigoid_srt[32], + &sigoid_srt[37], &sigoid_srt[14], &sigoid_srt[21], + &sigoid_srt[33], + &sigoid_srt[38], &sigoid_srt[15], &sigoid_srt[22], + &sigoid_srt[34], + &sigoid_srt[39], &sigoid_srt[16], &sigoid_srt[23], &sigoid_srt[19], + &sigoid_srt[31], + &sigoid_srt[36], &sigoid_srt[25], &sigoid_srt[26], &sigoid_srt[27], diff --git a/deps/openssl/openssl/crypto/objects/obj_xref.txt b/deps/openssl/openssl/crypto/objects/obj_xref.txt index cb917182ee2f9e..19c94226b20c1f 100644 --- a/deps/openssl/openssl/crypto/objects/obj_xref.txt +++ b/deps/openssl/openssl/crypto/objects/obj_xref.txt @@ -44,3 +44,15 @@ id_GostR3411_94_with_GostR3410_2001 id_GostR3411_94 id_GostR3410_2001 id_GostR3411_94_with_GostR3410_94 id_GostR3411_94 id_GostR3410_94 id_GostR3411_94_with_GostR3410_94_cc id_GostR3411_94 id_GostR3410_94_cc id_GostR3411_94_with_GostR3410_2001_cc id_GostR3411_94 id_GostR3410_2001_cc +# ECDH KDFs and their corresponding message digests and schemes +dhSinglePass_stdDH_sha1kdf_scheme sha1 dh_std_kdf +dhSinglePass_stdDH_sha224kdf_scheme sha224 dh_std_kdf +dhSinglePass_stdDH_sha256kdf_scheme sha256 dh_std_kdf +dhSinglePass_stdDH_sha384kdf_scheme sha384 dh_std_kdf +dhSinglePass_stdDH_sha512kdf_scheme sha512 dh_std_kdf + +dhSinglePass_cofactorDH_sha1kdf_scheme sha1 dh_cofactor_kdf +dhSinglePass_cofactorDH_sha224kdf_scheme sha224 dh_cofactor_kdf +dhSinglePass_cofactorDH_sha256kdf_scheme sha256 dh_cofactor_kdf +dhSinglePass_cofactorDH_sha384kdf_scheme sha384 dh_cofactor_kdf +dhSinglePass_cofactorDH_sha512kdf_scheme sha512 dh_cofactor_kdf diff --git a/deps/openssl/openssl/crypto/objects/objects.txt b/deps/openssl/openssl/crypto/objects/objects.txt index d3bfad72a2ac78..b57aabb226501b 100644 --- a/deps/openssl/openssl/crypto/objects/objects.txt +++ b/deps/openssl/openssl/crypto/objects/objects.txt @@ -168,6 +168,7 @@ pkcs1 5 : RSA-SHA1 : sha1WithRSAEncryption # According to PKCS #1 version 2.1 pkcs1 7 : RSAES-OAEP : rsaesOaep pkcs1 8 : MGF1 : mgf1 +pkcs1 9 : PSPECIFIED : pSpecified pkcs1 10 : RSASSA-PSS : rsassaPss pkcs1 11 : RSA-SHA256 : sha256WithRSAEncryption @@ -1290,3 +1291,60 @@ kisa 1 6 : SEED-OFB : seed-ofb : AES-128-CBC-HMAC-SHA1 : aes-128-cbc-hmac-sha1 : AES-192-CBC-HMAC-SHA1 : aes-192-cbc-hmac-sha1 : AES-256-CBC-HMAC-SHA1 : aes-256-cbc-hmac-sha1 + : AES-128-CBC-HMAC-SHA256 : aes-128-cbc-hmac-sha256 + : AES-192-CBC-HMAC-SHA256 : aes-192-cbc-hmac-sha256 + : AES-256-CBC-HMAC-SHA256 : aes-256-cbc-hmac-sha256 + +ISO-US 10046 2 1 : dhpublicnumber : X9.42 DH + +# RFC 5639 curve OIDs (see http://www.ietf.org/rfc/rfc5639.txt) +# versionOne OBJECT IDENTIFIER ::= { +# iso(1) identifified-organization(3) teletrust(36) algorithm(3) +# signature-algorithm(3) ecSign(2) ecStdCurvesAndGeneration(8) +# ellipticCurve(1) 1 } +1 3 36 3 3 2 8 1 1 1 : brainpoolP160r1 +1 3 36 3 3 2 8 1 1 2 : brainpoolP160t1 +1 3 36 3 3 2 8 1 1 3 : brainpoolP192r1 +1 3 36 3 3 2 8 1 1 4 : brainpoolP192t1 +1 3 36 3 3 2 8 1 1 5 : brainpoolP224r1 +1 3 36 3 3 2 8 1 1 6 : brainpoolP224t1 +1 3 36 3 3 2 8 1 1 7 : brainpoolP256r1 +1 3 36 3 3 2 8 1 1 8 : brainpoolP256t1 +1 3 36 3 3 2 8 1 1 9 : brainpoolP320r1 +1 3 36 3 3 2 8 1 1 10 : brainpoolP320t1 +1 3 36 3 3 2 8 1 1 11 : brainpoolP384r1 +1 3 36 3 3 2 8 1 1 12 : brainpoolP384t1 +1 3 36 3 3 2 8 1 1 13 : brainpoolP512r1 +1 3 36 3 3 2 8 1 1 14 : brainpoolP512t1 + +# ECDH schemes from RFC5753 +!Alias x9-63-scheme 1 3 133 16 840 63 0 +!Alias secg-scheme certicom-arc 1 + +x9-63-scheme 2 : dhSinglePass-stdDH-sha1kdf-scheme +secg-scheme 11 0 : dhSinglePass-stdDH-sha224kdf-scheme +secg-scheme 11 1 : dhSinglePass-stdDH-sha256kdf-scheme +secg-scheme 11 2 : dhSinglePass-stdDH-sha384kdf-scheme +secg-scheme 11 3 : dhSinglePass-stdDH-sha512kdf-scheme + +x9-63-scheme 3 : dhSinglePass-cofactorDH-sha1kdf-scheme +secg-scheme 14 0 : dhSinglePass-cofactorDH-sha224kdf-scheme +secg-scheme 14 1 : dhSinglePass-cofactorDH-sha256kdf-scheme +secg-scheme 14 2 : dhSinglePass-cofactorDH-sha384kdf-scheme +secg-scheme 14 3 : dhSinglePass-cofactorDH-sha512kdf-scheme +# NIDs for use with lookup tables. + : dh-std-kdf + : dh-cofactor-kdf + +# RFC 6962 Extension OIDs (see http://www.ietf.org/rfc/rfc6962.txt) +1 3 6 1 4 1 11129 2 4 2 : ct_precert_scts : CT Precertificate SCTs +1 3 6 1 4 1 11129 2 4 3 : ct_precert_poison : CT Precertificate Poison +1 3 6 1 4 1 11129 2 4 4 : ct_precert_signer : CT Precertificate Signer +1 3 6 1 4 1 11129 2 4 5 : ct_cert_scts : CT Certificate SCTs + +# CABForum EV SSL Certificate Guidelines +# (see https://cabforum.org/extended-validation/) +# OIDs for Subject Jurisdiction of Incorporation or Registration +1 3 6 1 4 1 311 60 2 1 1 : jurisdictionL : jurisdictionLocalityName +1 3 6 1 4 1 311 60 2 1 2 : jurisdictionST : jurisdictionStateOrProvinceName +1 3 6 1 4 1 311 60 2 1 3 : jurisdictionC : jurisdictionCountryName diff --git a/deps/openssl/openssl/crypto/objects/objxref.pl b/deps/openssl/openssl/crypto/objects/objxref.pl index 35c06514b93248..1913b9d133c24b 100644 --- a/deps/openssl/openssl/crypto/objects/objxref.pl +++ b/deps/openssl/openssl/crypto/objects/objxref.pl @@ -39,7 +39,8 @@ my @srt1 = sort { $oid_tbl{$a} <=> $oid_tbl{$b}} @xrkeys; -for(my $i = 0; $i <= $#srt1; $i++) +my $i; +for($i = 0; $i <= $#srt1; $i++) { $xref_tbl{$srt1[$i]}[2] = $i; } diff --git a/deps/openssl/openssl/crypto/ocsp/ocsp.h b/deps/openssl/openssl/crypto/ocsp/ocsp.h index 25ef01956be280..ca2ee76dce472f 100644 --- a/deps/openssl/openssl/crypto/ocsp/ocsp.h +++ b/deps/openssl/openssl/crypto/ocsp/ocsp.h @@ -394,11 +394,22 @@ typedef struct ocsp_service_locator_st { OCSP_CERTID *OCSP_CERTID_dup(OCSP_CERTID *id); -OCSP_RESPONSE *OCSP_sendreq_bio(BIO *b, char *path, OCSP_REQUEST *req); -OCSP_REQ_CTX *OCSP_sendreq_new(BIO *io, char *path, OCSP_REQUEST *req, +OCSP_RESPONSE *OCSP_sendreq_bio(BIO *b, const char *path, OCSP_REQUEST *req); +OCSP_REQ_CTX *OCSP_sendreq_new(BIO *io, const char *path, OCSP_REQUEST *req, int maxline); +int OCSP_REQ_CTX_nbio(OCSP_REQ_CTX *rctx); int OCSP_sendreq_nbio(OCSP_RESPONSE **presp, OCSP_REQ_CTX *rctx); +OCSP_REQ_CTX *OCSP_REQ_CTX_new(BIO *io, int maxline); void OCSP_REQ_CTX_free(OCSP_REQ_CTX *rctx); +void OCSP_set_max_response_length(OCSP_REQ_CTX *rctx, unsigned long len); +int OCSP_REQ_CTX_i2d(OCSP_REQ_CTX *rctx, const ASN1_ITEM *it, + ASN1_VALUE *val); +int OCSP_REQ_CTX_nbio_d2i(OCSP_REQ_CTX *rctx, ASN1_VALUE **pval, + const ASN1_ITEM *it); +BIO *OCSP_REQ_CTX_get0_mem_bio(OCSP_REQ_CTX *rctx); +int OCSP_REQ_CTX_i2d(OCSP_REQ_CTX *rctx, const ASN1_ITEM *it, + ASN1_VALUE *val); +int OCSP_REQ_CTX_http(OCSP_REQ_CTX *rctx, const char *op, const char *path); int OCSP_REQ_CTX_set1_req(OCSP_REQ_CTX *rctx, OCSP_REQUEST *req); int OCSP_REQ_CTX_add1_header(OCSP_REQ_CTX *rctx, const char *name, const char *value); @@ -447,7 +458,7 @@ int OCSP_check_validity(ASN1_GENERALIZEDTIME *thisupd, int OCSP_request_verify(OCSP_REQUEST *req, STACK_OF(X509) *certs, X509_STORE *store, unsigned long flags); -int OCSP_parse_url(char *url, char **phost, char **pport, char **ppath, +int OCSP_parse_url(const char *url, char **phost, char **pport, char **ppath, int *pssl); int OCSP_id_issuer_cmp(OCSP_CERTID *a, OCSP_CERTID *b); diff --git a/deps/openssl/openssl/crypto/ocsp/ocsp_ht.c b/deps/openssl/openssl/crypto/ocsp/ocsp_ht.c index 970fea4a70a899..88b26b38e8d999 100644 --- a/deps/openssl/openssl/crypto/ocsp/ocsp_ht.c +++ b/deps/openssl/openssl/crypto/ocsp/ocsp_ht.c @@ -81,9 +81,10 @@ struct ocsp_req_ctx_st { BIO *io; /* BIO to perform I/O with */ BIO *mem; /* Memory BIO response is built into */ unsigned long asn1_len; /* ASN1 length of response */ + unsigned long max_resp_len; /* Maximum length of response */ }; -#define OCSP_MAX_REQUEST_LENGTH (100 * 1024) +#define OCSP_MAX_RESP_LENGTH (100 * 1024) #define OCSP_MAX_LINE_LEN 4096; /* OCSP states */ @@ -100,15 +101,42 @@ struct ocsp_req_ctx_st { #define OHS_ASN1_HEADER 3 /* OCSP content octets being read */ #define OHS_ASN1_CONTENT 4 +/* First call: ready to start I/O */ +#define OHS_ASN1_WRITE_INIT (5 | OHS_NOREAD) /* Request being sent */ #define OHS_ASN1_WRITE (6 | OHS_NOREAD) /* Request being flushed */ #define OHS_ASN1_FLUSH (7 | OHS_NOREAD) /* Completed */ #define OHS_DONE (8 | OHS_NOREAD) +/* Headers set, no final \r\n included */ +#define OHS_HTTP_HEADER (9 | OHS_NOREAD) static int parse_http_line1(char *line); +OCSP_REQ_CTX *OCSP_REQ_CTX_new(BIO *io, int maxline) +{ + OCSP_REQ_CTX *rctx; + rctx = OPENSSL_malloc(sizeof(OCSP_REQ_CTX)); + if (!rctx) + return NULL; + rctx->state = OHS_ERROR; + rctx->max_resp_len = OCSP_MAX_RESP_LENGTH; + rctx->mem = BIO_new(BIO_s_mem()); + rctx->io = io; + rctx->asn1_len = 0; + if (maxline > 0) + rctx->iobuflen = maxline; + else + rctx->iobuflen = OCSP_MAX_LINE_LEN; + rctx->iobuf = OPENSSL_malloc(rctx->iobuflen); + if (!rctx->iobuf || !rctx->mem) { + OCSP_REQ_CTX_free(rctx); + return NULL; + } + return rctx; +} + void OCSP_REQ_CTX_free(OCSP_REQ_CTX *rctx) { if (rctx->mem) @@ -118,20 +146,71 @@ void OCSP_REQ_CTX_free(OCSP_REQ_CTX *rctx) OPENSSL_free(rctx); } -int OCSP_REQ_CTX_set1_req(OCSP_REQ_CTX *rctx, OCSP_REQUEST *req) +BIO *OCSP_REQ_CTX_get0_mem_bio(OCSP_REQ_CTX *rctx) +{ + return rctx->mem; +} + +void OCSP_set_max_response_length(OCSP_REQ_CTX *rctx, unsigned long len) +{ + if (len == 0) + rctx->max_resp_len = OCSP_MAX_RESP_LENGTH; + else + rctx->max_resp_len = len; +} + +int OCSP_REQ_CTX_i2d(OCSP_REQ_CTX *rctx, const ASN1_ITEM *it, ASN1_VALUE *val) { static const char req_hdr[] = "Content-Type: application/ocsp-request\r\n" "Content-Length: %d\r\n\r\n"; - if (BIO_printf(rctx->mem, req_hdr, i2d_OCSP_REQUEST(req, NULL)) <= 0) + int reqlen = ASN1_item_i2d(val, NULL, it); + if (BIO_printf(rctx->mem, req_hdr, reqlen) <= 0) + return 0; + if (ASN1_item_i2d_bio(it, rctx->mem, val) <= 0) + return 0; + rctx->state = OHS_ASN1_WRITE_INIT; + return 1; +} + +int OCSP_REQ_CTX_nbio_d2i(OCSP_REQ_CTX *rctx, + ASN1_VALUE **pval, const ASN1_ITEM *it) +{ + int rv, len; + const unsigned char *p; + + rv = OCSP_REQ_CTX_nbio(rctx); + if (rv != 1) + return rv; + + len = BIO_get_mem_data(rctx->mem, &p); + *pval = ASN1_item_d2i(NULL, &p, len, it); + if (*pval == NULL) { + rctx->state = OHS_ERROR; return 0; - if (i2d_OCSP_REQUEST_bio(rctx->mem, req) <= 0) + } + return 1; +} + +int OCSP_REQ_CTX_http(OCSP_REQ_CTX *rctx, const char *op, const char *path) +{ + static const char http_hdr[] = "%s %s HTTP/1.0\r\n"; + + if (!path) + path = "/"; + + if (BIO_printf(rctx->mem, http_hdr, op, path) <= 0) return 0; - rctx->state = OHS_ASN1_WRITE; - rctx->asn1_len = BIO_get_mem_data(rctx->mem, NULL); + rctx->state = OHS_HTTP_HEADER; return 1; } +int OCSP_REQ_CTX_set1_req(OCSP_REQ_CTX *rctx, OCSP_REQUEST *req) +{ + return OCSP_REQ_CTX_i2d(rctx, ASN1_ITEM_rptr(OCSP_REQUEST), + (ASN1_VALUE *)req); +} + int OCSP_REQ_CTX_add1_header(OCSP_REQ_CTX *rctx, const char *name, const char *value) { @@ -147,39 +226,27 @@ int OCSP_REQ_CTX_add1_header(OCSP_REQ_CTX *rctx, } if (BIO_write(rctx->mem, "\r\n", 2) != 2) return 0; + rctx->state = OHS_HTTP_HEADER; return 1; } -OCSP_REQ_CTX *OCSP_sendreq_new(BIO *io, char *path, OCSP_REQUEST *req, +OCSP_REQ_CTX *OCSP_sendreq_new(BIO *io, const char *path, OCSP_REQUEST *req, int maxline) { - static const char post_hdr[] = "POST %s HTTP/1.0\r\n"; - OCSP_REQ_CTX *rctx; - rctx = OPENSSL_malloc(sizeof(OCSP_REQ_CTX)); + OCSP_REQ_CTX *rctx = NULL; + rctx = OCSP_REQ_CTX_new(io, maxline); if (!rctx) return NULL; - rctx->state = OHS_ERROR; - rctx->mem = BIO_new(BIO_s_mem()); - rctx->io = io; - rctx->asn1_len = 0; - if (maxline > 0) - rctx->iobuflen = maxline; - else - rctx->iobuflen = OCSP_MAX_LINE_LEN; - rctx->iobuf = OPENSSL_malloc(rctx->iobuflen); - if (!rctx->mem || !rctx->iobuf) - goto err; - if (!path) - path = "/"; - if (BIO_printf(rctx->mem, post_hdr, path) <= 0) + if (!OCSP_REQ_CTX_http(rctx, "POST", path)) goto err; if (req && !OCSP_REQ_CTX_set1_req(rctx, req)) goto err; return rctx; + err: OCSP_REQ_CTX_free(rctx); return NULL; @@ -256,7 +323,7 @@ static int parse_http_line1(char *line) } -int OCSP_sendreq_nbio(OCSP_RESPONSE **presp, OCSP_REQ_CTX *rctx) +int OCSP_REQ_CTX_nbio(OCSP_REQ_CTX *rctx) { int i, n; const unsigned char *p; @@ -277,6 +344,17 @@ int OCSP_sendreq_nbio(OCSP_RESPONSE **presp, OCSP_REQ_CTX *rctx) } switch (rctx->state) { + case OHS_HTTP_HEADER: + /* Last operation was adding headers: need a final \r\n */ + if (BIO_write(rctx->mem, "\r\n", 2) != 2) { + rctx->state = OHS_ERROR; + return 0; + } + rctx->state = OHS_ASN1_WRITE_INIT; + + case OHS_ASN1_WRITE_INIT: + rctx->asn1_len = BIO_get_mem_data(rctx->mem, NULL); + rctx->state = OHS_ASN1_WRITE; case OHS_ASN1_WRITE: n = BIO_get_mem_data(rctx->mem, &p); @@ -412,7 +490,7 @@ int OCSP_sendreq_nbio(OCSP_RESPONSE **presp, OCSP_REQ_CTX *rctx) rctx->asn1_len |= *p++; } - if (rctx->asn1_len > OCSP_MAX_REQUEST_LENGTH) { + if (rctx->asn1_len > rctx->max_resp_len) { rctx->state = OHS_ERROR; return 0; } @@ -426,18 +504,12 @@ int OCSP_sendreq_nbio(OCSP_RESPONSE **presp, OCSP_REQ_CTX *rctx) /* Fall thru */ case OHS_ASN1_CONTENT: - n = BIO_get_mem_data(rctx->mem, &p); + n = BIO_get_mem_data(rctx->mem, NULL); if (n < (int)rctx->asn1_len) goto next_io; - *presp = d2i_OCSP_RESPONSE(NULL, &p, rctx->asn1_len); - if (*presp) { - rctx->state = OHS_DONE; - return 1; - } - - rctx->state = OHS_ERROR; - return 0; + rctx->state = OHS_DONE; + return 1; break; @@ -450,9 +522,16 @@ int OCSP_sendreq_nbio(OCSP_RESPONSE **presp, OCSP_REQ_CTX *rctx) } +int OCSP_sendreq_nbio(OCSP_RESPONSE **presp, OCSP_REQ_CTX *rctx) +{ + return OCSP_REQ_CTX_nbio_d2i(rctx, + (ASN1_VALUE **)presp, + ASN1_ITEM_rptr(OCSP_RESPONSE)); +} + /* Blocking OCSP request handler: now a special case of non-blocking I/O */ -OCSP_RESPONSE *OCSP_sendreq_bio(BIO *b, char *path, OCSP_REQUEST *req) +OCSP_RESPONSE *OCSP_sendreq_bio(BIO *b, const char *path, OCSP_REQUEST *req) { OCSP_RESPONSE *resp = NULL; OCSP_REQ_CTX *ctx; diff --git a/deps/openssl/openssl/crypto/ocsp/ocsp_lib.c b/deps/openssl/openssl/crypto/ocsp/ocsp_lib.c index d28d6b5c042e4e..442a5b63d4ba2d 100644 --- a/deps/openssl/openssl/crypto/ocsp/ocsp_lib.c +++ b/deps/openssl/openssl/crypto/ocsp/ocsp_lib.c @@ -175,7 +175,7 @@ int OCSP_id_cmp(OCSP_CERTID *a, OCSP_CERTID *b) * whether it is SSL. */ -int OCSP_parse_url(char *url, char **phost, char **pport, char **ppath, +int OCSP_parse_url(const char *url, char **phost, char **pport, char **ppath, int *pssl) { char *p, *buf; diff --git a/deps/openssl/openssl/crypto/opensslv.h b/deps/openssl/openssl/crypto/opensslv.h index daf3905dfba410..4f20b97a8f12f1 100644 --- a/deps/openssl/openssl/crypto/opensslv.h +++ b/deps/openssl/openssl/crypto/opensslv.h @@ -30,11 +30,11 @@ extern "C" { * (Prior to 0.9.5a beta1, a different scheme was used: MMNNFFRBB for * major minor fix final patch/beta) */ -# define OPENSSL_VERSION_NUMBER 0x100010dfL +# define OPENSSL_VERSION_NUMBER 0x1000201fL # ifdef OPENSSL_FIPS -# define OPENSSL_VERSION_TEXT "OpenSSL 1.0.1m-fips 19 Mar 2015" +# define OPENSSL_VERSION_TEXT "OpenSSL 1.0.2a-fips 19 Mar 2015" # else -# define OPENSSL_VERSION_TEXT "OpenSSL 1.0.1m 19 Mar 2015" +# define OPENSSL_VERSION_TEXT "OpenSSL 1.0.2a 19 Mar 2015" # endif # define OPENSSL_VERSION_PTEXT " part of " OPENSSL_VERSION_TEXT diff --git a/deps/openssl/openssl/crypto/ossl_typ.h b/deps/openssl/openssl/crypto/ossl_typ.h index 0fcb0cea6c61c3..9144ea2cf60b3a 100644 --- a/deps/openssl/openssl/crypto/ossl_typ.h +++ b/deps/openssl/openssl/crypto/ossl_typ.h @@ -100,6 +100,8 @@ typedef int ASN1_BOOLEAN; typedef int ASN1_NULL; # endif +typedef struct asn1_object_st ASN1_OBJECT; + typedef struct ASN1_ITEM_st ASN1_ITEM; typedef struct asn1_pctx_st ASN1_PCTX; diff --git a/deps/openssl/openssl/crypto/pem/Makefile b/deps/openssl/openssl/crypto/pem/Makefile index 2cc7801529e0fa..7691f83f6e1d40 100644 --- a/deps/openssl/openssl/crypto/pem/Makefile +++ b/deps/openssl/openssl/crypto/pem/Makefile @@ -169,12 +169,13 @@ pem_pk8.o: ../../include/openssl/x509.h ../../include/openssl/x509_vfy.h pem_pk8.o: ../cryptlib.h pem_pk8.c pem_pkey.o: ../../e_os.h ../../include/openssl/asn1.h pem_pkey.o: ../../include/openssl/bio.h ../../include/openssl/buffer.h -pem_pkey.o: ../../include/openssl/crypto.h ../../include/openssl/e_os2.h -pem_pkey.o: ../../include/openssl/ec.h ../../include/openssl/ecdh.h -pem_pkey.o: ../../include/openssl/ecdsa.h ../../include/openssl/engine.h -pem_pkey.o: ../../include/openssl/err.h ../../include/openssl/evp.h -pem_pkey.o: ../../include/openssl/lhash.h ../../include/openssl/obj_mac.h -pem_pkey.o: ../../include/openssl/objects.h ../../include/openssl/opensslconf.h +pem_pkey.o: ../../include/openssl/crypto.h ../../include/openssl/dh.h +pem_pkey.o: ../../include/openssl/e_os2.h ../../include/openssl/ec.h +pem_pkey.o: ../../include/openssl/ecdh.h ../../include/openssl/ecdsa.h +pem_pkey.o: ../../include/openssl/engine.h ../../include/openssl/err.h +pem_pkey.o: ../../include/openssl/evp.h ../../include/openssl/lhash.h +pem_pkey.o: ../../include/openssl/obj_mac.h ../../include/openssl/objects.h +pem_pkey.o: ../../include/openssl/opensslconf.h pem_pkey.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h pem_pkey.o: ../../include/openssl/pem.h ../../include/openssl/pem2.h pem_pkey.o: ../../include/openssl/pkcs12.h ../../include/openssl/pkcs7.h diff --git a/deps/openssl/openssl/crypto/pem/pem.h b/deps/openssl/openssl/crypto/pem/pem.h index 2cdad8ac26699d..d3b23fc997d61e 100644 --- a/deps/openssl/openssl/crypto/pem/pem.h +++ b/deps/openssl/openssl/crypto/pem/pem.h @@ -129,6 +129,7 @@ extern "C" { # define PEM_STRING_PKCS8 "ENCRYPTED PRIVATE KEY" # define PEM_STRING_PKCS8INF "PRIVATE KEY" # define PEM_STRING_DHPARAMS "DH PARAMETERS" +# define PEM_STRING_DHXPARAMS "X9.42 DH PARAMETERS" # define PEM_STRING_SSL_SESSION "SSL SESSION PARAMETERS" # define PEM_STRING_DSAPARAMS "DSA PARAMETERS" # define PEM_STRING_ECDSA_PUBLIC "ECDSA PUBLIC KEY" @@ -181,7 +182,6 @@ typedef struct pem_ctx_st { int num_recipient; PEM_USER **recipient; - /*- XXX(ben): don#t think this is used! STACK *x509_chain; / * certificate chain */ @@ -399,8 +399,8 @@ int PEM_do_header(EVP_CIPHER_INFO *cipher, unsigned char *data, long *len, # ifndef OPENSSL_NO_BIO int PEM_read_bio(BIO *bp, char **name, char **header, unsigned char **data, long *len); -int PEM_write_bio(BIO *bp, const char *name, char *hdr, unsigned char *data, - long len); +int PEM_write_bio(BIO *bp, const char *name, const char *hdr, + const unsigned char *data, long len); int PEM_bytes_read_bio(unsigned char **pdata, long *plen, char **pnm, const char *name, BIO *bp, pem_password_cb *cb, void *u); @@ -419,7 +419,8 @@ int PEM_X509_INFO_write_bio(BIO *bp, X509_INFO *xi, EVP_CIPHER *enc, int PEM_read(FILE *fp, char **name, char **header, unsigned char **data, long *len); -int PEM_write(FILE *fp, char *name, char *hdr, unsigned char *data, long len); +int PEM_write(FILE *fp, const char *name, const char *hdr, + const unsigned char *data, long len); void *PEM_ASN1_read(d2i_of_void *d2i, const char *name, FILE *fp, void **x, pem_password_cb *cb, void *u); int PEM_ASN1_write(i2d_of_void *i2d, const char *name, FILE *fp, @@ -474,6 +475,7 @@ DECLARE_PEM_rw(EC_PUBKEY, EC_KEY) # endif # ifndef OPENSSL_NO_DH DECLARE_PEM_rw_const(DHparams, DH) +DECLARE_PEM_write_const(DHxparams, DH) # endif DECLARE_PEM_rw_cb(PrivateKey, EVP_PKEY) DECLARE_PEM_rw(PUBKEY, EVP_PKEY) @@ -562,8 +564,10 @@ void ERR_load_PEM_strings(void); # define PEM_F_PEM_PK8PKEY 119 # define PEM_F_PEM_READ 108 # define PEM_F_PEM_READ_BIO 109 +# define PEM_F_PEM_READ_BIO_DHPARAMS 141 # define PEM_F_PEM_READ_BIO_PARAMETERS 140 # define PEM_F_PEM_READ_BIO_PRIVATEKEY 123 +# define PEM_F_PEM_READ_DHPARAMS 142 # define PEM_F_PEM_READ_PRIVATEKEY 124 # define PEM_F_PEM_SEALFINAL 110 # define PEM_F_PEM_SEALINIT 111 diff --git a/deps/openssl/openssl/crypto/pem/pem_all.c b/deps/openssl/openssl/crypto/pem/pem_all.c index 64b8ba7c256a5f..0e5be63ef08936 100644 --- a/deps/openssl/openssl/crypto/pem/pem_all.c +++ b/deps/openssl/openssl/crypto/pem/pem_all.c @@ -421,6 +421,7 @@ EC_KEY *PEM_read_ECPrivateKey(FILE *fp, EC_KEY **eckey, pem_password_cb *cb, #ifndef OPENSSL_NO_DH -IMPLEMENT_PEM_rw_const(DHparams, DH, PEM_STRING_DHPARAMS, DHparams) +IMPLEMENT_PEM_write_const(DHparams, DH, PEM_STRING_DHPARAMS, DHparams) + IMPLEMENT_PEM_write_const(DHxparams, DH, PEM_STRING_DHXPARAMS, DHxparams) #endif - IMPLEMENT_PEM_rw(PUBKEY, EVP_PKEY, PEM_STRING_PUBLIC, PUBKEY) +IMPLEMENT_PEM_rw(PUBKEY, EVP_PKEY, PEM_STRING_PUBLIC, PUBKEY) diff --git a/deps/openssl/openssl/crypto/pem/pem_err.c b/deps/openssl/openssl/crypto/pem/pem_err.c index 702c5adecb86b4..e1f4fdb432d03b 100644 --- a/deps/openssl/openssl/crypto/pem/pem_err.c +++ b/deps/openssl/openssl/crypto/pem/pem_err.c @@ -1,6 +1,6 @@ /* crypto/pem/pem_err.c */ /* ==================================================================== - * Copyright (c) 1999-2007 The OpenSSL Project. All rights reserved. + * Copyright (c) 1999-2011 The OpenSSL Project. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -99,8 +99,10 @@ static ERR_STRING_DATA PEM_str_functs[] = { {ERR_FUNC(PEM_F_PEM_PK8PKEY), "PEM_PK8PKEY"}, {ERR_FUNC(PEM_F_PEM_READ), "PEM_read"}, {ERR_FUNC(PEM_F_PEM_READ_BIO), "PEM_read_bio"}, + {ERR_FUNC(PEM_F_PEM_READ_BIO_DHPARAMS), "PEM_READ_BIO_DHPARAMS"}, {ERR_FUNC(PEM_F_PEM_READ_BIO_PARAMETERS), "PEM_read_bio_Parameters"}, {ERR_FUNC(PEM_F_PEM_READ_BIO_PRIVATEKEY), "PEM_READ_BIO_PRIVATEKEY"}, + {ERR_FUNC(PEM_F_PEM_READ_DHPARAMS), "PEM_READ_DHPARAMS"}, {ERR_FUNC(PEM_F_PEM_READ_PRIVATEKEY), "PEM_READ_PRIVATEKEY"}, {ERR_FUNC(PEM_F_PEM_SEALFINAL), "PEM_SealFinal"}, {ERR_FUNC(PEM_F_PEM_SEALINIT), "PEM_SealInit"}, diff --git a/deps/openssl/openssl/crypto/pem/pem_lib.c b/deps/openssl/openssl/crypto/pem/pem_lib.c index febb53ed0eeb0f..a29821aab2ebd5 100644 --- a/deps/openssl/openssl/crypto/pem/pem_lib.c +++ b/deps/openssl/openssl/crypto/pem/pem_lib.c @@ -229,6 +229,10 @@ static int check_pem(const char *nm, const char *name) } return 0; } + /* If reading DH parameters handle X9.42 DH format too */ + if (!strcmp(nm, PEM_STRING_DHXPARAMS) && + !strcmp(name, PEM_STRING_DHPARAMS)) + return 1; /* Permit older strings */ @@ -435,7 +439,7 @@ int PEM_ASN1_write_bio(i2d_of_void *i2d, const char *name, BIO *bp, int PEM_do_header(EVP_CIPHER_INFO *cipher, unsigned char *data, long *plen, pem_password_cb *callback, void *u) { - int i, j, o, klen; + int i = 0, j, o, klen; long len; EVP_CIPHER_CTX ctx; unsigned char key[EVP_MAX_KEY_LENGTH]; @@ -472,8 +476,9 @@ int PEM_do_header(EVP_CIPHER_INFO *cipher, unsigned char *data, long *plen, EVP_CIPHER_CTX_cleanup(&ctx); OPENSSL_cleanse((char *)buf, sizeof(buf)); OPENSSL_cleanse((char *)key, sizeof(key)); - j += i; - if (!o) { + if (o) + j += i; + else { PEMerr(PEM_F_PEM_DO_HEADER, PEM_R_BAD_DECRYPT); return (0); } @@ -574,8 +579,8 @@ static int load_iv(char **fromp, unsigned char *to, int num) } #ifndef OPENSSL_NO_FP_API -int PEM_write(FILE *fp, char *name, char *header, unsigned char *data, - long len) +int PEM_write(FILE *fp, const char *name, const char *header, + const unsigned char *data, long len) { BIO *b; int ret; @@ -591,8 +596,8 @@ int PEM_write(FILE *fp, char *name, char *header, unsigned char *data, } #endif -int PEM_write_bio(BIO *bp, const char *name, char *header, - unsigned char *data, long len) +int PEM_write_bio(BIO *bp, const char *name, const char *header, + const unsigned char *data, long len) { int nlen, n, i, j, outl; unsigned char *buf = NULL; diff --git a/deps/openssl/openssl/crypto/pem/pem_pkey.c b/deps/openssl/openssl/crypto/pem/pem_pkey.c index 0b05e63fd7aa5a..04d6319a225be0 100644 --- a/deps/openssl/openssl/crypto/pem/pem_pkey.c +++ b/deps/openssl/openssl/crypto/pem/pem_pkey.c @@ -68,6 +68,9 @@ #ifndef OPENSSL_NO_ENGINE # include #endif +#ifndef OPENSSL_NO_DH +# include +#endif #include "asn1_locl.h" int pem_check_suffix(const char *pem_str, const char *suffix); @@ -241,3 +244,50 @@ int PEM_write_PrivateKey(FILE *fp, EVP_PKEY *x, const EVP_CIPHER *enc, } #endif + +#ifndef OPENSSL_NO_DH + +/* Transparently read in PKCS#3 or X9.42 DH parameters */ + +DH *PEM_read_bio_DHparams(BIO *bp, DH **x, pem_password_cb *cb, void *u) +{ + char *nm = NULL; + const unsigned char *p = NULL; + unsigned char *data = NULL; + long len; + DH *ret = NULL; + + if (!PEM_bytes_read_bio(&data, &len, &nm, PEM_STRING_DHPARAMS, bp, cb, u)) + return NULL; + p = data; + + if (!strcmp(nm, PEM_STRING_DHXPARAMS)) + ret = d2i_DHxparams(x, &p, len); + else + ret = d2i_DHparams(x, &p, len); + + if (ret == NULL) + PEMerr(PEM_F_PEM_READ_BIO_DHPARAMS, ERR_R_ASN1_LIB); + OPENSSL_free(nm); + OPENSSL_free(data); + return ret; +} + +# ifndef OPENSSL_NO_FP_API +DH *PEM_read_DHparams(FILE *fp, DH **x, pem_password_cb *cb, void *u) +{ + BIO *b; + DH *ret; + + if ((b = BIO_new(BIO_s_file())) == NULL) { + PEMerr(PEM_F_PEM_READ_DHPARAMS, ERR_R_BUF_LIB); + return (0); + } + BIO_set_fp(b, fp, BIO_NOCLOSE); + ret = PEM_read_bio_DHparams(b, x, cb, u); + BIO_free(b); + return (ret); +} +# endif + +#endif diff --git a/deps/openssl/openssl/crypto/perlasm/ppc-xlate.pl b/deps/openssl/openssl/crypto/perlasm/ppc-xlate.pl index a3edd982b66477..f89e8142993166 100755 --- a/deps/openssl/openssl/crypto/perlasm/ppc-xlate.pl +++ b/deps/openssl/openssl/crypto/perlasm/ppc-xlate.pl @@ -27,7 +27,8 @@ /osx/ && do { $name = "_$name"; last; }; - /linux.*32/ && do { $ret .= ".globl $name\n"; + /linux.*(32|64le)/ + && do { $ret .= ".globl $name\n"; $ret .= ".type $name,\@function"; last; }; @@ -37,7 +38,6 @@ $ret .= ".align 3\n"; $ret .= "$name:\n"; $ret .= ".quad .$name,.TOC.\@tocbase,0\n"; - $ret .= ".size $name,24\n"; $ret .= ".previous\n"; $name = ".$name"; @@ -50,7 +50,9 @@ $ret; }; my $text = sub { - ($flavour =~ /aix/) ? ".csect" : ".text"; + my $ret = ($flavour =~ /aix/) ? ".csect\t.text[PR],7" : ".text"; + $ret = ".abiversion 2\n".$ret if ($flavour =~ /linux.*64le/); + $ret; }; my $machine = sub { my $junk = shift; @@ -62,9 +64,12 @@ ".machine $arch"; }; my $size = sub { - if ($flavour =~ /linux.*32/) + if ($flavour =~ /linux/) { shift; - ".size " . join(",",@_); + my $name = shift; $name =~ s|^[\.\_]||; + my $ret = ".size $name,.-".($flavour=~/64$/?".":"").$name; + $ret .= "\n.size .$name,.-.$name" if ($flavour=~/64$/); + $ret; } else { ""; } @@ -77,6 +82,25 @@ else { ""; } }; +my $quad = sub { + shift; + my @ret; + my ($hi,$lo); + for (@_) { + if (/^0x([0-9a-f]*?)([0-9a-f]{1,8})$/io) + { $hi=$1?"0x$1":"0"; $lo="0x$2"; } + elsif (/^([0-9]+)$/o) + { $hi=$1>>32; $lo=$1&0xffffffff; } # error-prone with 32-bit perl + else + { $hi=undef; $lo=$_; } + + if (defined($hi)) + { push(@ret,$flavour=~/le$/o?".long\t$lo,$hi":".long\t$hi,$lo"); } + else + { push(@ret,".quad $lo"); } + } + join("\n",@ret); +}; ################################################################ # simplified mnemonics not handled by at least one assembler @@ -122,6 +146,46 @@ $b = ($b+$n)&63; $n = 64-$n; " rldicl $ra,$rs,$b,$n"; }; +my $vmr = sub { + my ($f,$vx,$vy) = @_; + " vor $vx,$vy,$vy"; +}; + +# PowerISA 2.06 stuff +sub vsxmem_op { + my ($f, $vrt, $ra, $rb, $op) = @_; + " .long ".sprintf "0x%X",(31<<26)|($vrt<<21)|($ra<<16)|($rb<<11)|($op*2+1); +} +# made-up unaligned memory reference AltiVec/VMX instructions +my $lvx_u = sub { vsxmem_op(@_, 844); }; # lxvd2x +my $stvx_u = sub { vsxmem_op(@_, 972); }; # stxvd2x +my $lvdx_u = sub { vsxmem_op(@_, 588); }; # lxsdx +my $stvdx_u = sub { vsxmem_op(@_, 716); }; # stxsdx +my $lvx_4w = sub { vsxmem_op(@_, 780); }; # lxvw4x +my $stvx_4w = sub { vsxmem_op(@_, 908); }; # stxvw4x + +# PowerISA 2.07 stuff +sub vcrypto_op { + my ($f, $vrt, $vra, $vrb, $op) = @_; + " .long ".sprintf "0x%X",(4<<26)|($vrt<<21)|($vra<<16)|($vrb<<11)|$op; +} +my $vcipher = sub { vcrypto_op(@_, 1288); }; +my $vcipherlast = sub { vcrypto_op(@_, 1289); }; +my $vncipher = sub { vcrypto_op(@_, 1352); }; +my $vncipherlast= sub { vcrypto_op(@_, 1353); }; +my $vsbox = sub { vcrypto_op(@_, 0, 1480); }; +my $vshasigmad = sub { my ($st,$six)=splice(@_,-2); vcrypto_op(@_, $st<<4|$six, 1730); }; +my $vshasigmaw = sub { my ($st,$six)=splice(@_,-2); vcrypto_op(@_, $st<<4|$six, 1666); }; +my $vpmsumb = sub { vcrypto_op(@_, 1032); }; +my $vpmsumd = sub { vcrypto_op(@_, 1224); }; +my $vpmsubh = sub { vcrypto_op(@_, 1096); }; +my $vpmsumw = sub { vcrypto_op(@_, 1160); }; +my $vaddudm = sub { vcrypto_op(@_, 192); }; + +my $mtsle = sub { + my ($f, $arg) = @_; + " .long ".sprintf "0x%X",(31<<26)|($arg<<21)|(147*2); +}; while($line=<>) { @@ -138,7 +202,10 @@ { $line =~ s|(^[\.\w]+)\:\s*||; my $label = $1; - printf "%s:",($GLOBALS{$label} or $label) if ($label); + if ($label) { + printf "%s:",($GLOBALS{$label} or $label); + printf "\n.localentry\t$GLOBALS{$label},0" if ($GLOBALS{$label} && $flavour =~ /linux.*64le/); + } } { @@ -147,7 +214,7 @@ my $mnemonic = $2; my $f = $3; my $opcode = eval("\$$mnemonic"); - $line =~ s|\bc?[rf]([0-9]+)\b|$1|g if ($c ne "." and $flavour !~ /osx/); + $line =~ s/\b(c?[rf]|v|vs)([0-9]+)\b/$2/g if ($c ne "." and $flavour !~ /osx/); if (ref($opcode) eq 'CODE') { $line = &$opcode($f,split(',',$line)); } elsif ($mnemonic) { $line = $c.$mnemonic.$f."\t".$line; } } diff --git a/deps/openssl/openssl/crypto/perlasm/sparcv9_modes.pl b/deps/openssl/openssl/crypto/perlasm/sparcv9_modes.pl new file mode 100644 index 00000000000000..eb267a57ed8151 --- /dev/null +++ b/deps/openssl/openssl/crypto/perlasm/sparcv9_modes.pl @@ -0,0 +1,1687 @@ +#!/usr/bin/env perl + +# Specific modes implementations for SPARC Architecture 2011. There +# is T4 dependency though, an ASI value that is not specified in the +# Architecture Manual. But as SPARC universe is rather monocultural, +# we imply that processor capable of executing crypto instructions +# can handle the ASI in question as well. This means that we ought to +# keep eyes open when new processors emerge... +# +# As for above mentioned ASI. It's so called "block initializing +# store" which cancels "read" in "read-update-write" on cache lines. +# This is "cooperative" optimization, as it reduces overall pressure +# on memory interface. Benefits can't be observed/quantified with +# usual benchmarks, on the contrary you can notice that single-thread +# performance for parallelizable modes is ~1.5% worse for largest +# block sizes [though few percent better for not so long ones]. All +# this based on suggestions from David Miller. + +sub asm_init { # to be called with @ARGV as argument + for (@_) { $::abibits=64 if (/\-m64/ || /\-xarch\=v9/); } + if ($::abibits==64) { $::bias=2047; $::frame=192; $::size_t_cc="%xcc"; } + else { $::bias=0; $::frame=112; $::size_t_cc="%icc"; } +} + +# unified interface +my ($inp,$out,$len,$key,$ivec)=map("%i$_",(0..5)); +# local variables +my ($ileft,$iright,$ooff,$omask,$ivoff,$blk_init)=map("%l$_",(0..7)); + +sub alg_cbc_encrypt_implement { +my ($alg,$bits) = @_; + +$::code.=<<___; +.globl ${alg}${bits}_t4_cbc_encrypt +.align 32 +${alg}${bits}_t4_cbc_encrypt: + save %sp, -$::frame, %sp + cmp $len, 0 + be,pn $::size_t_cc, .L${bits}_cbc_enc_abort + sub $inp, $out, $blk_init ! $inp!=$out +___ +$::code.=<<___ if (!$::evp); + andcc $ivec, 7, $ivoff + alignaddr $ivec, %g0, $ivec + + ldd [$ivec + 0], %f0 ! load ivec + bz,pt %icc, 1f + ldd [$ivec + 8], %f2 + ldd [$ivec + 16], %f4 + faligndata %f0, %f2, %f0 + faligndata %f2, %f4, %f2 +1: +___ +$::code.=<<___ if ($::evp); + ld [$ivec + 0], %f0 + ld [$ivec + 4], %f1 + ld [$ivec + 8], %f2 + ld [$ivec + 12], %f3 +___ +$::code.=<<___; + prefetch [$inp], 20 + prefetch [$inp + 63], 20 + call _${alg}${bits}_load_enckey + and $inp, 7, $ileft + andn $inp, 7, $inp + sll $ileft, 3, $ileft + mov 64, $iright + mov 0xff, $omask + sub $iright, $ileft, $iright + and $out, 7, $ooff + cmp $len, 127 + movrnz $ooff, 0, $blk_init ! if ( $out&7 || + movleu $::size_t_cc, 0, $blk_init ! $len<128 || + brnz,pn $blk_init, .L${bits}cbc_enc_blk ! $inp==$out) + srl $omask, $ooff, $omask + + alignaddrl $out, %g0, $out + srlx $len, 4, $len + prefetch [$out], 22 + +.L${bits}_cbc_enc_loop: + ldx [$inp + 0], %o0 + brz,pt $ileft, 4f + ldx [$inp + 8], %o1 + + ldx [$inp + 16], %o2 + sllx %o0, $ileft, %o0 + srlx %o1, $iright, %g1 + sllx %o1, $ileft, %o1 + or %g1, %o0, %o0 + srlx %o2, $iright, %o2 + or %o2, %o1, %o1 +4: + xor %g4, %o0, %o0 ! ^= rk[0] + xor %g5, %o1, %o1 + movxtod %o0, %f12 + movxtod %o1, %f14 + + fxor %f12, %f0, %f0 ! ^= ivec + fxor %f14, %f2, %f2 + prefetch [$out + 63], 22 + prefetch [$inp + 16+63], 20 + call _${alg}${bits}_encrypt_1x + add $inp, 16, $inp + + brnz,pn $ooff, 2f + sub $len, 1, $len + + std %f0, [$out + 0] + std %f2, [$out + 8] + brnz,pt $len, .L${bits}_cbc_enc_loop + add $out, 16, $out +___ +$::code.=<<___ if ($::evp); + st %f0, [$ivec + 0] + st %f1, [$ivec + 4] + st %f2, [$ivec + 8] + st %f3, [$ivec + 12] +___ +$::code.=<<___ if (!$::evp); + brnz,pn $ivoff, 3f + nop + + std %f0, [$ivec + 0] ! write out ivec + std %f2, [$ivec + 8] +___ +$::code.=<<___; +.L${bits}_cbc_enc_abort: + ret + restore + +.align 16 +2: ldxa [$inp]0x82, %o0 ! avoid read-after-write hazard + ! and ~3x deterioration + ! in inp==out case + faligndata %f0, %f0, %f4 ! handle unaligned output + faligndata %f0, %f2, %f6 + faligndata %f2, %f2, %f8 + + stda %f4, [$out + $omask]0xc0 ! partial store + std %f6, [$out + 8] + add $out, 16, $out + orn %g0, $omask, $omask + stda %f8, [$out + $omask]0xc0 ! partial store + + brnz,pt $len, .L${bits}_cbc_enc_loop+4 + orn %g0, $omask, $omask +___ +$::code.=<<___ if ($::evp); + st %f0, [$ivec + 0] + st %f1, [$ivec + 4] + st %f2, [$ivec + 8] + st %f3, [$ivec + 12] +___ +$::code.=<<___ if (!$::evp); + brnz,pn $ivoff, 3f + nop + + std %f0, [$ivec + 0] ! write out ivec + std %f2, [$ivec + 8] + ret + restore + +.align 16 +3: alignaddrl $ivec, $ivoff, %g0 ! handle unaligned ivec + mov 0xff, $omask + srl $omask, $ivoff, $omask + faligndata %f0, %f0, %f4 + faligndata %f0, %f2, %f6 + faligndata %f2, %f2, %f8 + stda %f4, [$ivec + $omask]0xc0 + std %f6, [$ivec + 8] + add $ivec, 16, $ivec + orn %g0, $omask, $omask + stda %f8, [$ivec + $omask]0xc0 +___ +$::code.=<<___; + ret + restore + +!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! +.align 32 +.L${bits}cbc_enc_blk: + add $out, $len, $blk_init + and $blk_init, 63, $blk_init ! tail + sub $len, $blk_init, $len + add $blk_init, 15, $blk_init ! round up to 16n + srlx $len, 4, $len + srl $blk_init, 4, $blk_init + +.L${bits}_cbc_enc_blk_loop: + ldx [$inp + 0], %o0 + brz,pt $ileft, 5f + ldx [$inp + 8], %o1 + + ldx [$inp + 16], %o2 + sllx %o0, $ileft, %o0 + srlx %o1, $iright, %g1 + sllx %o1, $ileft, %o1 + or %g1, %o0, %o0 + srlx %o2, $iright, %o2 + or %o2, %o1, %o1 +5: + xor %g4, %o0, %o0 ! ^= rk[0] + xor %g5, %o1, %o1 + movxtod %o0, %f12 + movxtod %o1, %f14 + + fxor %f12, %f0, %f0 ! ^= ivec + fxor %f14, %f2, %f2 + prefetch [$inp + 16+63], 20 + call _${alg}${bits}_encrypt_1x + add $inp, 16, $inp + sub $len, 1, $len + + stda %f0, [$out]0xe2 ! ASI_BLK_INIT, T4-specific + add $out, 8, $out + stda %f2, [$out]0xe2 ! ASI_BLK_INIT, T4-specific + brnz,pt $len, .L${bits}_cbc_enc_blk_loop + add $out, 8, $out + + membar #StoreLoad|#StoreStore + brnz,pt $blk_init, .L${bits}_cbc_enc_loop + mov $blk_init, $len +___ +$::code.=<<___ if ($::evp); + st %f0, [$ivec + 0] + st %f1, [$ivec + 4] + st %f2, [$ivec + 8] + st %f3, [$ivec + 12] +___ +$::code.=<<___ if (!$::evp); + brnz,pn $ivoff, 3b + nop + + std %f0, [$ivec + 0] ! write out ivec + std %f2, [$ivec + 8] +___ +$::code.=<<___; + ret + restore +.type ${alg}${bits}_t4_cbc_encrypt,#function +.size ${alg}${bits}_t4_cbc_encrypt,.-${alg}${bits}_t4_cbc_encrypt +___ +} + +sub alg_cbc_decrypt_implement { +my ($alg,$bits) = @_; + +$::code.=<<___; +.globl ${alg}${bits}_t4_cbc_decrypt +.align 32 +${alg}${bits}_t4_cbc_decrypt: + save %sp, -$::frame, %sp + cmp $len, 0 + be,pn $::size_t_cc, .L${bits}_cbc_dec_abort + sub $inp, $out, $blk_init ! $inp!=$out +___ +$::code.=<<___ if (!$::evp); + andcc $ivec, 7, $ivoff + alignaddr $ivec, %g0, $ivec + + ldd [$ivec + 0], %f12 ! load ivec + bz,pt %icc, 1f + ldd [$ivec + 8], %f14 + ldd [$ivec + 16], %f0 + faligndata %f12, %f14, %f12 + faligndata %f14, %f0, %f14 +1: +___ +$::code.=<<___ if ($::evp); + ld [$ivec + 0], %f12 ! load ivec + ld [$ivec + 4], %f13 + ld [$ivec + 8], %f14 + ld [$ivec + 12], %f15 +___ +$::code.=<<___; + prefetch [$inp], 20 + prefetch [$inp + 63], 20 + call _${alg}${bits}_load_deckey + and $inp, 7, $ileft + andn $inp, 7, $inp + sll $ileft, 3, $ileft + mov 64, $iright + mov 0xff, $omask + sub $iright, $ileft, $iright + and $out, 7, $ooff + cmp $len, 255 + movrnz $ooff, 0, $blk_init ! if ( $out&7 || + movleu $::size_t_cc, 0, $blk_init ! $len<256 || + brnz,pn $blk_init, .L${bits}cbc_dec_blk ! $inp==$out) + srl $omask, $ooff, $omask + + andcc $len, 16, %g0 ! is number of blocks even? + srlx $len, 4, $len + alignaddrl $out, %g0, $out + bz %icc, .L${bits}_cbc_dec_loop2x + prefetch [$out], 22 +.L${bits}_cbc_dec_loop: + ldx [$inp + 0], %o0 + brz,pt $ileft, 4f + ldx [$inp + 8], %o1 + + ldx [$inp + 16], %o2 + sllx %o0, $ileft, %o0 + srlx %o1, $iright, %g1 + sllx %o1, $ileft, %o1 + or %g1, %o0, %o0 + srlx %o2, $iright, %o2 + or %o2, %o1, %o1 +4: + xor %g4, %o0, %o2 ! ^= rk[0] + xor %g5, %o1, %o3 + movxtod %o2, %f0 + movxtod %o3, %f2 + + prefetch [$out + 63], 22 + prefetch [$inp + 16+63], 20 + call _${alg}${bits}_decrypt_1x + add $inp, 16, $inp + + fxor %f12, %f0, %f0 ! ^= ivec + fxor %f14, %f2, %f2 + movxtod %o0, %f12 + movxtod %o1, %f14 + + brnz,pn $ooff, 2f + sub $len, 1, $len + + std %f0, [$out + 0] + std %f2, [$out + 8] + brnz,pt $len, .L${bits}_cbc_dec_loop2x + add $out, 16, $out +___ +$::code.=<<___ if ($::evp); + st %f12, [$ivec + 0] + st %f13, [$ivec + 4] + st %f14, [$ivec + 8] + st %f15, [$ivec + 12] +___ +$::code.=<<___ if (!$::evp); + brnz,pn $ivoff, .L${bits}_cbc_dec_unaligned_ivec + nop + + std %f12, [$ivec + 0] ! write out ivec + std %f14, [$ivec + 8] +___ +$::code.=<<___; +.L${bits}_cbc_dec_abort: + ret + restore + +.align 16 +2: ldxa [$inp]0x82, %o0 ! avoid read-after-write hazard + ! and ~3x deterioration + ! in inp==out case + faligndata %f0, %f0, %f4 ! handle unaligned output + faligndata %f0, %f2, %f6 + faligndata %f2, %f2, %f8 + + stda %f4, [$out + $omask]0xc0 ! partial store + std %f6, [$out + 8] + add $out, 16, $out + orn %g0, $omask, $omask + stda %f8, [$out + $omask]0xc0 ! partial store + + brnz,pt $len, .L${bits}_cbc_dec_loop2x+4 + orn %g0, $omask, $omask +___ +$::code.=<<___ if ($::evp); + st %f12, [$ivec + 0] + st %f13, [$ivec + 4] + st %f14, [$ivec + 8] + st %f15, [$ivec + 12] +___ +$::code.=<<___ if (!$::evp); + brnz,pn $ivoff, .L${bits}_cbc_dec_unaligned_ivec + nop + + std %f12, [$ivec + 0] ! write out ivec + std %f14, [$ivec + 8] +___ +$::code.=<<___; + ret + restore + +!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! +.align 32 +.L${bits}_cbc_dec_loop2x: + ldx [$inp + 0], %o0 + ldx [$inp + 8], %o1 + ldx [$inp + 16], %o2 + brz,pt $ileft, 4f + ldx [$inp + 24], %o3 + + ldx [$inp + 32], %o4 + sllx %o0, $ileft, %o0 + srlx %o1, $iright, %g1 + or %g1, %o0, %o0 + sllx %o1, $ileft, %o1 + srlx %o2, $iright, %g1 + or %g1, %o1, %o1 + sllx %o2, $ileft, %o2 + srlx %o3, $iright, %g1 + or %g1, %o2, %o2 + sllx %o3, $ileft, %o3 + srlx %o4, $iright, %o4 + or %o4, %o3, %o3 +4: + xor %g4, %o0, %o4 ! ^= rk[0] + xor %g5, %o1, %o5 + movxtod %o4, %f0 + movxtod %o5, %f2 + xor %g4, %o2, %o4 + xor %g5, %o3, %o5 + movxtod %o4, %f4 + movxtod %o5, %f6 + + prefetch [$out + 63], 22 + prefetch [$inp + 32+63], 20 + call _${alg}${bits}_decrypt_2x + add $inp, 32, $inp + + movxtod %o0, %f8 + movxtod %o1, %f10 + fxor %f12, %f0, %f0 ! ^= ivec + fxor %f14, %f2, %f2 + movxtod %o2, %f12 + movxtod %o3, %f14 + fxor %f8, %f4, %f4 + fxor %f10, %f6, %f6 + + brnz,pn $ooff, 2f + sub $len, 2, $len + + std %f0, [$out + 0] + std %f2, [$out + 8] + std %f4, [$out + 16] + std %f6, [$out + 24] + brnz,pt $len, .L${bits}_cbc_dec_loop2x + add $out, 32, $out +___ +$::code.=<<___ if ($::evp); + st %f12, [$ivec + 0] + st %f13, [$ivec + 4] + st %f14, [$ivec + 8] + st %f15, [$ivec + 12] +___ +$::code.=<<___ if (!$::evp); + brnz,pn $ivoff, .L${bits}_cbc_dec_unaligned_ivec + nop + + std %f12, [$ivec + 0] ! write out ivec + std %f14, [$ivec + 8] +___ +$::code.=<<___; + ret + restore + +.align 16 +2: ldxa [$inp]0x82, %o0 ! avoid read-after-write hazard + ! and ~3x deterioration + ! in inp==out case + faligndata %f0, %f0, %f8 ! handle unaligned output + faligndata %f0, %f2, %f0 + faligndata %f2, %f4, %f2 + faligndata %f4, %f6, %f4 + faligndata %f6, %f6, %f6 + stda %f8, [$out + $omask]0xc0 ! partial store + std %f0, [$out + 8] + std %f2, [$out + 16] + std %f4, [$out + 24] + add $out, 32, $out + orn %g0, $omask, $omask + stda %f6, [$out + $omask]0xc0 ! partial store + + brnz,pt $len, .L${bits}_cbc_dec_loop2x+4 + orn %g0, $omask, $omask +___ +$::code.=<<___ if ($::evp); + st %f12, [$ivec + 0] + st %f13, [$ivec + 4] + st %f14, [$ivec + 8] + st %f15, [$ivec + 12] +___ +$::code.=<<___ if (!$::evp); + brnz,pn $ivoff, .L${bits}_cbc_dec_unaligned_ivec + nop + + std %f12, [$ivec + 0] ! write out ivec + std %f14, [$ivec + 8] + ret + restore + +.align 16 +.L${bits}_cbc_dec_unaligned_ivec: + alignaddrl $ivec, $ivoff, %g0 ! handle unaligned ivec + mov 0xff, $omask + srl $omask, $ivoff, $omask + faligndata %f12, %f12, %f0 + faligndata %f12, %f14, %f2 + faligndata %f14, %f14, %f4 + stda %f0, [$ivec + $omask]0xc0 + std %f2, [$ivec + 8] + add $ivec, 16, $ivec + orn %g0, $omask, $omask + stda %f4, [$ivec + $omask]0xc0 +___ +$::code.=<<___; + ret + restore + +!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! +.align 32 +.L${bits}cbc_dec_blk: + add $out, $len, $blk_init + and $blk_init, 63, $blk_init ! tail + sub $len, $blk_init, $len + add $blk_init, 15, $blk_init ! round up to 16n + srlx $len, 4, $len + srl $blk_init, 4, $blk_init + sub $len, 1, $len + add $blk_init, 1, $blk_init + +.L${bits}_cbc_dec_blk_loop2x: + ldx [$inp + 0], %o0 + ldx [$inp + 8], %o1 + ldx [$inp + 16], %o2 + brz,pt $ileft, 5f + ldx [$inp + 24], %o3 + + ldx [$inp + 32], %o4 + sllx %o0, $ileft, %o0 + srlx %o1, $iright, %g1 + or %g1, %o0, %o0 + sllx %o1, $ileft, %o1 + srlx %o2, $iright, %g1 + or %g1, %o1, %o1 + sllx %o2, $ileft, %o2 + srlx %o3, $iright, %g1 + or %g1, %o2, %o2 + sllx %o3, $ileft, %o3 + srlx %o4, $iright, %o4 + or %o4, %o3, %o3 +5: + xor %g4, %o0, %o4 ! ^= rk[0] + xor %g5, %o1, %o5 + movxtod %o4, %f0 + movxtod %o5, %f2 + xor %g4, %o2, %o4 + xor %g5, %o3, %o5 + movxtod %o4, %f4 + movxtod %o5, %f6 + + prefetch [$inp + 32+63], 20 + call _${alg}${bits}_decrypt_2x + add $inp, 32, $inp + subcc $len, 2, $len + + movxtod %o0, %f8 + movxtod %o1, %f10 + fxor %f12, %f0, %f0 ! ^= ivec + fxor %f14, %f2, %f2 + movxtod %o2, %f12 + movxtod %o3, %f14 + fxor %f8, %f4, %f4 + fxor %f10, %f6, %f6 + + stda %f0, [$out]0xe2 ! ASI_BLK_INIT, T4-specific + add $out, 8, $out + stda %f2, [$out]0xe2 ! ASI_BLK_INIT, T4-specific + add $out, 8, $out + stda %f4, [$out]0xe2 ! ASI_BLK_INIT, T4-specific + add $out, 8, $out + stda %f6, [$out]0xe2 ! ASI_BLK_INIT, T4-specific + bgu,pt $::size_t_cc, .L${bits}_cbc_dec_blk_loop2x + add $out, 8, $out + + add $blk_init, $len, $len + andcc $len, 1, %g0 ! is number of blocks even? + membar #StoreLoad|#StoreStore + bnz,pt %icc, .L${bits}_cbc_dec_loop + srl $len, 0, $len + brnz,pn $len, .L${bits}_cbc_dec_loop2x + nop +___ +$::code.=<<___ if ($::evp); + st %f12, [$ivec + 0] ! write out ivec + st %f13, [$ivec + 4] + st %f14, [$ivec + 8] + st %f15, [$ivec + 12] +___ +$::code.=<<___ if (!$::evp); + brnz,pn $ivoff, 3b + nop + + std %f12, [$ivec + 0] ! write out ivec + std %f14, [$ivec + 8] +___ +$::code.=<<___; + ret + restore +.type ${alg}${bits}_t4_cbc_decrypt,#function +.size ${alg}${bits}_t4_cbc_decrypt,.-${alg}${bits}_t4_cbc_decrypt +___ +} + +sub alg_ctr32_implement { +my ($alg,$bits) = @_; + +$::code.=<<___; +.globl ${alg}${bits}_t4_ctr32_encrypt +.align 32 +${alg}${bits}_t4_ctr32_encrypt: + save %sp, -$::frame, %sp + + prefetch [$inp], 20 + prefetch [$inp + 63], 20 + call _${alg}${bits}_load_enckey + sllx $len, 4, $len + + ld [$ivec + 0], %l4 ! counter + ld [$ivec + 4], %l5 + ld [$ivec + 8], %l6 + ld [$ivec + 12], %l7 + + sllx %l4, 32, %o5 + or %l5, %o5, %o5 + sllx %l6, 32, %g1 + xor %o5, %g4, %g4 ! ^= rk[0] + xor %g1, %g5, %g5 + movxtod %g4, %f14 ! most significant 64 bits + + sub $inp, $out, $blk_init ! $inp!=$out + and $inp, 7, $ileft + andn $inp, 7, $inp + sll $ileft, 3, $ileft + mov 64, $iright + mov 0xff, $omask + sub $iright, $ileft, $iright + and $out, 7, $ooff + cmp $len, 255 + movrnz $ooff, 0, $blk_init ! if ( $out&7 || + movleu $::size_t_cc, 0, $blk_init ! $len<256 || + brnz,pn $blk_init, .L${bits}_ctr32_blk ! $inp==$out) + srl $omask, $ooff, $omask + + andcc $len, 16, %g0 ! is number of blocks even? + alignaddrl $out, %g0, $out + bz %icc, .L${bits}_ctr32_loop2x + srlx $len, 4, $len +.L${bits}_ctr32_loop: + ldx [$inp + 0], %o0 + brz,pt $ileft, 4f + ldx [$inp + 8], %o1 + + ldx [$inp + 16], %o2 + sllx %o0, $ileft, %o0 + srlx %o1, $iright, %g1 + sllx %o1, $ileft, %o1 + or %g1, %o0, %o0 + srlx %o2, $iright, %o2 + or %o2, %o1, %o1 +4: + xor %g5, %l7, %g1 ! ^= rk[0] + add %l7, 1, %l7 + movxtod %g1, %f2 + srl %l7, 0, %l7 ! clruw + prefetch [$out + 63], 22 + prefetch [$inp + 16+63], 20 +___ +$::code.=<<___ if ($alg eq "aes"); + aes_eround01 %f16, %f14, %f2, %f4 + aes_eround23 %f18, %f14, %f2, %f2 +___ +$::code.=<<___ if ($alg eq "cmll"); + camellia_f %f16, %f2, %f14, %f2 + camellia_f %f18, %f14, %f2, %f0 +___ +$::code.=<<___; + call _${alg}${bits}_encrypt_1x+8 + add $inp, 16, $inp + + movxtod %o0, %f10 + movxtod %o1, %f12 + fxor %f10, %f0, %f0 ! ^= inp + fxor %f12, %f2, %f2 + + brnz,pn $ooff, 2f + sub $len, 1, $len + + std %f0, [$out + 0] + std %f2, [$out + 8] + brnz,pt $len, .L${bits}_ctr32_loop2x + add $out, 16, $out + + ret + restore + +.align 16 +2: ldxa [$inp]0x82, %o0 ! avoid read-after-write hazard + ! and ~3x deterioration + ! in inp==out case + faligndata %f0, %f0, %f4 ! handle unaligned output + faligndata %f0, %f2, %f6 + faligndata %f2, %f2, %f8 + stda %f4, [$out + $omask]0xc0 ! partial store + std %f6, [$out + 8] + add $out, 16, $out + orn %g0, $omask, $omask + stda %f8, [$out + $omask]0xc0 ! partial store + + brnz,pt $len, .L${bits}_ctr32_loop2x+4 + orn %g0, $omask, $omask + + ret + restore + +!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! +.align 32 +.L${bits}_ctr32_loop2x: + ldx [$inp + 0], %o0 + ldx [$inp + 8], %o1 + ldx [$inp + 16], %o2 + brz,pt $ileft, 4f + ldx [$inp + 24], %o3 + + ldx [$inp + 32], %o4 + sllx %o0, $ileft, %o0 + srlx %o1, $iright, %g1 + or %g1, %o0, %o0 + sllx %o1, $ileft, %o1 + srlx %o2, $iright, %g1 + or %g1, %o1, %o1 + sllx %o2, $ileft, %o2 + srlx %o3, $iright, %g1 + or %g1, %o2, %o2 + sllx %o3, $ileft, %o3 + srlx %o4, $iright, %o4 + or %o4, %o3, %o3 +4: + xor %g5, %l7, %g1 ! ^= rk[0] + add %l7, 1, %l7 + movxtod %g1, %f2 + srl %l7, 0, %l7 ! clruw + xor %g5, %l7, %g1 + add %l7, 1, %l7 + movxtod %g1, %f6 + srl %l7, 0, %l7 ! clruw + prefetch [$out + 63], 22 + prefetch [$inp + 32+63], 20 +___ +$::code.=<<___ if ($alg eq "aes"); + aes_eround01 %f16, %f14, %f2, %f8 + aes_eround23 %f18, %f14, %f2, %f2 + aes_eround01 %f16, %f14, %f6, %f10 + aes_eround23 %f18, %f14, %f6, %f6 +___ +$::code.=<<___ if ($alg eq "cmll"); + camellia_f %f16, %f2, %f14, %f2 + camellia_f %f16, %f6, %f14, %f6 + camellia_f %f18, %f14, %f2, %f0 + camellia_f %f18, %f14, %f6, %f4 +___ +$::code.=<<___; + call _${alg}${bits}_encrypt_2x+16 + add $inp, 32, $inp + + movxtod %o0, %f8 + movxtod %o1, %f10 + movxtod %o2, %f12 + fxor %f8, %f0, %f0 ! ^= inp + movxtod %o3, %f8 + fxor %f10, %f2, %f2 + fxor %f12, %f4, %f4 + fxor %f8, %f6, %f6 + + brnz,pn $ooff, 2f + sub $len, 2, $len + + std %f0, [$out + 0] + std %f2, [$out + 8] + std %f4, [$out + 16] + std %f6, [$out + 24] + brnz,pt $len, .L${bits}_ctr32_loop2x + add $out, 32, $out + + ret + restore + +.align 16 +2: ldxa [$inp]0x82, %o0 ! avoid read-after-write hazard + ! and ~3x deterioration + ! in inp==out case + faligndata %f0, %f0, %f8 ! handle unaligned output + faligndata %f0, %f2, %f0 + faligndata %f2, %f4, %f2 + faligndata %f4, %f6, %f4 + faligndata %f6, %f6, %f6 + + stda %f8, [$out + $omask]0xc0 ! partial store + std %f0, [$out + 8] + std %f2, [$out + 16] + std %f4, [$out + 24] + add $out, 32, $out + orn %g0, $omask, $omask + stda %f6, [$out + $omask]0xc0 ! partial store + + brnz,pt $len, .L${bits}_ctr32_loop2x+4 + orn %g0, $omask, $omask + + ret + restore + +!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! +.align 32 +.L${bits}_ctr32_blk: + add $out, $len, $blk_init + and $blk_init, 63, $blk_init ! tail + sub $len, $blk_init, $len + add $blk_init, 15, $blk_init ! round up to 16n + srlx $len, 4, $len + srl $blk_init, 4, $blk_init + sub $len, 1, $len + add $blk_init, 1, $blk_init + +.L${bits}_ctr32_blk_loop2x: + ldx [$inp + 0], %o0 + ldx [$inp + 8], %o1 + ldx [$inp + 16], %o2 + brz,pt $ileft, 5f + ldx [$inp + 24], %o3 + + ldx [$inp + 32], %o4 + sllx %o0, $ileft, %o0 + srlx %o1, $iright, %g1 + or %g1, %o0, %o0 + sllx %o1, $ileft, %o1 + srlx %o2, $iright, %g1 + or %g1, %o1, %o1 + sllx %o2, $ileft, %o2 + srlx %o3, $iright, %g1 + or %g1, %o2, %o2 + sllx %o3, $ileft, %o3 + srlx %o4, $iright, %o4 + or %o4, %o3, %o3 +5: + xor %g5, %l7, %g1 ! ^= rk[0] + add %l7, 1, %l7 + movxtod %g1, %f2 + srl %l7, 0, %l7 ! clruw + xor %g5, %l7, %g1 + add %l7, 1, %l7 + movxtod %g1, %f6 + srl %l7, 0, %l7 ! clruw + prefetch [$inp + 32+63], 20 +___ +$::code.=<<___ if ($alg eq "aes"); + aes_eround01 %f16, %f14, %f2, %f8 + aes_eround23 %f18, %f14, %f2, %f2 + aes_eround01 %f16, %f14, %f6, %f10 + aes_eround23 %f18, %f14, %f6, %f6 +___ +$::code.=<<___ if ($alg eq "cmll"); + camellia_f %f16, %f2, %f14, %f2 + camellia_f %f16, %f6, %f14, %f6 + camellia_f %f18, %f14, %f2, %f0 + camellia_f %f18, %f14, %f6, %f4 +___ +$::code.=<<___; + call _${alg}${bits}_encrypt_2x+16 + add $inp, 32, $inp + subcc $len, 2, $len + + movxtod %o0, %f8 + movxtod %o1, %f10 + movxtod %o2, %f12 + fxor %f8, %f0, %f0 ! ^= inp + movxtod %o3, %f8 + fxor %f10, %f2, %f2 + fxor %f12, %f4, %f4 + fxor %f8, %f6, %f6 + + stda %f0, [$out]0xe2 ! ASI_BLK_INIT, T4-specific + add $out, 8, $out + stda %f2, [$out]0xe2 ! ASI_BLK_INIT, T4-specific + add $out, 8, $out + stda %f4, [$out]0xe2 ! ASI_BLK_INIT, T4-specific + add $out, 8, $out + stda %f6, [$out]0xe2 ! ASI_BLK_INIT, T4-specific + bgu,pt $::size_t_cc, .L${bits}_ctr32_blk_loop2x + add $out, 8, $out + + add $blk_init, $len, $len + andcc $len, 1, %g0 ! is number of blocks even? + membar #StoreLoad|#StoreStore + bnz,pt %icc, .L${bits}_ctr32_loop + srl $len, 0, $len + brnz,pn $len, .L${bits}_ctr32_loop2x + nop + + ret + restore +.type ${alg}${bits}_t4_ctr32_encrypt,#function +.size ${alg}${bits}_t4_ctr32_encrypt,.-${alg}${bits}_t4_ctr32_encrypt +___ +} + +sub alg_xts_implement { +my ($alg,$bits,$dir) = @_; +my ($inp,$out,$len,$key1,$key2,$ivec)=map("%i$_",(0..5)); +my $rem=$ivec; + +$::code.=<<___; +.globl ${alg}${bits}_t4_xts_${dir}crypt +.align 32 +${alg}${bits}_t4_xts_${dir}crypt: + save %sp, -$::frame-16, %sp + + mov $ivec, %o0 + add %fp, $::bias-16, %o1 + call ${alg}_t4_encrypt + mov $key2, %o2 + + add %fp, $::bias-16, %l7 + ldxa [%l7]0x88, %g2 + add %fp, $::bias-8, %l7 + ldxa [%l7]0x88, %g3 ! %g3:%g2 is tweak + + sethi %hi(0x76543210), %l7 + or %l7, %lo(0x76543210), %l7 + bmask %l7, %g0, %g0 ! byte swap mask + + prefetch [$inp], 20 + prefetch [$inp + 63], 20 + call _${alg}${bits}_load_${dir}ckey + and $len, 15, $rem + and $len, -16, $len +___ +$code.=<<___ if ($dir eq "de"); + mov 0, %l7 + movrnz $rem, 16, %l7 + sub $len, %l7, $len +___ +$code.=<<___; + + sub $inp, $out, $blk_init ! $inp!=$out + and $inp, 7, $ileft + andn $inp, 7, $inp + sll $ileft, 3, $ileft + mov 64, $iright + mov 0xff, $omask + sub $iright, $ileft, $iright + and $out, 7, $ooff + cmp $len, 255 + movrnz $ooff, 0, $blk_init ! if ( $out&7 || + movleu $::size_t_cc, 0, $blk_init ! $len<256 || + brnz,pn $blk_init, .L${bits}_xts_${dir}blk ! $inp==$out) + srl $omask, $ooff, $omask + + andcc $len, 16, %g0 ! is number of blocks even? +___ +$code.=<<___ if ($dir eq "de"); + brz,pn $len, .L${bits}_xts_${dir}steal +___ +$code.=<<___; + alignaddrl $out, %g0, $out + bz %icc, .L${bits}_xts_${dir}loop2x + srlx $len, 4, $len +.L${bits}_xts_${dir}loop: + ldx [$inp + 0], %o0 + brz,pt $ileft, 4f + ldx [$inp + 8], %o1 + + ldx [$inp + 16], %o2 + sllx %o0, $ileft, %o0 + srlx %o1, $iright, %g1 + sllx %o1, $ileft, %o1 + or %g1, %o0, %o0 + srlx %o2, $iright, %o2 + or %o2, %o1, %o1 +4: + movxtod %g2, %f12 + movxtod %g3, %f14 + bshuffle %f12, %f12, %f12 + bshuffle %f14, %f14, %f14 + + xor %g4, %o0, %o0 ! ^= rk[0] + xor %g5, %o1, %o1 + movxtod %o0, %f0 + movxtod %o1, %f2 + + fxor %f12, %f0, %f0 ! ^= tweak[0] + fxor %f14, %f2, %f2 + + prefetch [$out + 63], 22 + prefetch [$inp + 16+63], 20 + call _${alg}${bits}_${dir}crypt_1x + add $inp, 16, $inp + + fxor %f12, %f0, %f0 ! ^= tweak[0] + fxor %f14, %f2, %f2 + + srax %g3, 63, %l7 ! next tweak value + addcc %g2, %g2, %g2 + and %l7, 0x87, %l7 + addxc %g3, %g3, %g3 + xor %l7, %g2, %g2 + + brnz,pn $ooff, 2f + sub $len, 1, $len + + std %f0, [$out + 0] + std %f2, [$out + 8] + brnz,pt $len, .L${bits}_xts_${dir}loop2x + add $out, 16, $out + + brnz,pn $rem, .L${bits}_xts_${dir}steal + nop + + ret + restore + +.align 16 +2: ldxa [$inp]0x82, %o0 ! avoid read-after-write hazard + ! and ~3x deterioration + ! in inp==out case + faligndata %f0, %f0, %f4 ! handle unaligned output + faligndata %f0, %f2, %f6 + faligndata %f2, %f2, %f8 + stda %f4, [$out + $omask]0xc0 ! partial store + std %f6, [$out + 8] + add $out, 16, $out + orn %g0, $omask, $omask + stda %f8, [$out + $omask]0xc0 ! partial store + + brnz,pt $len, .L${bits}_xts_${dir}loop2x+4 + orn %g0, $omask, $omask + + brnz,pn $rem, .L${bits}_xts_${dir}steal + nop + + ret + restore + +!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! +.align 32 +.L${bits}_xts_${dir}loop2x: + ldx [$inp + 0], %o0 + ldx [$inp + 8], %o1 + ldx [$inp + 16], %o2 + brz,pt $ileft, 4f + ldx [$inp + 24], %o3 + + ldx [$inp + 32], %o4 + sllx %o0, $ileft, %o0 + srlx %o1, $iright, %g1 + or %g1, %o0, %o0 + sllx %o1, $ileft, %o1 + srlx %o2, $iright, %g1 + or %g1, %o1, %o1 + sllx %o2, $ileft, %o2 + srlx %o3, $iright, %g1 + or %g1, %o2, %o2 + sllx %o3, $ileft, %o3 + srlx %o4, $iright, %o4 + or %o4, %o3, %o3 +4: + movxtod %g2, %f12 + movxtod %g3, %f14 + bshuffle %f12, %f12, %f12 + bshuffle %f14, %f14, %f14 + + srax %g3, 63, %l7 ! next tweak value + addcc %g2, %g2, %g2 + and %l7, 0x87, %l7 + addxc %g3, %g3, %g3 + xor %l7, %g2, %g2 + + movxtod %g2, %f8 + movxtod %g3, %f10 + bshuffle %f8, %f8, %f8 + bshuffle %f10, %f10, %f10 + + xor %g4, %o0, %o0 ! ^= rk[0] + xor %g5, %o1, %o1 + xor %g4, %o2, %o2 ! ^= rk[0] + xor %g5, %o3, %o3 + movxtod %o0, %f0 + movxtod %o1, %f2 + movxtod %o2, %f4 + movxtod %o3, %f6 + + fxor %f12, %f0, %f0 ! ^= tweak[0] + fxor %f14, %f2, %f2 + fxor %f8, %f4, %f4 ! ^= tweak[0] + fxor %f10, %f6, %f6 + + prefetch [$out + 63], 22 + prefetch [$inp + 32+63], 20 + call _${alg}${bits}_${dir}crypt_2x + add $inp, 32, $inp + + movxtod %g2, %f8 + movxtod %g3, %f10 + + srax %g3, 63, %l7 ! next tweak value + addcc %g2, %g2, %g2 + and %l7, 0x87, %l7 + addxc %g3, %g3, %g3 + xor %l7, %g2, %g2 + + bshuffle %f8, %f8, %f8 + bshuffle %f10, %f10, %f10 + + fxor %f12, %f0, %f0 ! ^= tweak[0] + fxor %f14, %f2, %f2 + fxor %f8, %f4, %f4 + fxor %f10, %f6, %f6 + + brnz,pn $ooff, 2f + sub $len, 2, $len + + std %f0, [$out + 0] + std %f2, [$out + 8] + std %f4, [$out + 16] + std %f6, [$out + 24] + brnz,pt $len, .L${bits}_xts_${dir}loop2x + add $out, 32, $out + + fsrc2 %f4, %f0 + fsrc2 %f6, %f2 + brnz,pn $rem, .L${bits}_xts_${dir}steal + nop + + ret + restore + +.align 16 +2: ldxa [$inp]0x82, %o0 ! avoid read-after-write hazard + ! and ~3x deterioration + ! in inp==out case + faligndata %f0, %f0, %f8 ! handle unaligned output + faligndata %f0, %f2, %f10 + faligndata %f2, %f4, %f12 + faligndata %f4, %f6, %f14 + faligndata %f6, %f6, %f0 + + stda %f8, [$out + $omask]0xc0 ! partial store + std %f10, [$out + 8] + std %f12, [$out + 16] + std %f14, [$out + 24] + add $out, 32, $out + orn %g0, $omask, $omask + stda %f0, [$out + $omask]0xc0 ! partial store + + brnz,pt $len, .L${bits}_xts_${dir}loop2x+4 + orn %g0, $omask, $omask + + fsrc2 %f4, %f0 + fsrc2 %f6, %f2 + brnz,pn $rem, .L${bits}_xts_${dir}steal + nop + + ret + restore + +!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! +.align 32 +.L${bits}_xts_${dir}blk: + add $out, $len, $blk_init + and $blk_init, 63, $blk_init ! tail + sub $len, $blk_init, $len + add $blk_init, 15, $blk_init ! round up to 16n + srlx $len, 4, $len + srl $blk_init, 4, $blk_init + sub $len, 1, $len + add $blk_init, 1, $blk_init + +.L${bits}_xts_${dir}blk2x: + ldx [$inp + 0], %o0 + ldx [$inp + 8], %o1 + ldx [$inp + 16], %o2 + brz,pt $ileft, 5f + ldx [$inp + 24], %o3 + + ldx [$inp + 32], %o4 + sllx %o0, $ileft, %o0 + srlx %o1, $iright, %g1 + or %g1, %o0, %o0 + sllx %o1, $ileft, %o1 + srlx %o2, $iright, %g1 + or %g1, %o1, %o1 + sllx %o2, $ileft, %o2 + srlx %o3, $iright, %g1 + or %g1, %o2, %o2 + sllx %o3, $ileft, %o3 + srlx %o4, $iright, %o4 + or %o4, %o3, %o3 +5: + movxtod %g2, %f12 + movxtod %g3, %f14 + bshuffle %f12, %f12, %f12 + bshuffle %f14, %f14, %f14 + + srax %g3, 63, %l7 ! next tweak value + addcc %g2, %g2, %g2 + and %l7, 0x87, %l7 + addxc %g3, %g3, %g3 + xor %l7, %g2, %g2 + + movxtod %g2, %f8 + movxtod %g3, %f10 + bshuffle %f8, %f8, %f8 + bshuffle %f10, %f10, %f10 + + xor %g4, %o0, %o0 ! ^= rk[0] + xor %g5, %o1, %o1 + xor %g4, %o2, %o2 ! ^= rk[0] + xor %g5, %o3, %o3 + movxtod %o0, %f0 + movxtod %o1, %f2 + movxtod %o2, %f4 + movxtod %o3, %f6 + + fxor %f12, %f0, %f0 ! ^= tweak[0] + fxor %f14, %f2, %f2 + fxor %f8, %f4, %f4 ! ^= tweak[0] + fxor %f10, %f6, %f6 + + prefetch [$inp + 32+63], 20 + call _${alg}${bits}_${dir}crypt_2x + add $inp, 32, $inp + + movxtod %g2, %f8 + movxtod %g3, %f10 + + srax %g3, 63, %l7 ! next tweak value + addcc %g2, %g2, %g2 + and %l7, 0x87, %l7 + addxc %g3, %g3, %g3 + xor %l7, %g2, %g2 + + bshuffle %f8, %f8, %f8 + bshuffle %f10, %f10, %f10 + + fxor %f12, %f0, %f0 ! ^= tweak[0] + fxor %f14, %f2, %f2 + fxor %f8, %f4, %f4 + fxor %f10, %f6, %f6 + + subcc $len, 2, $len + stda %f0, [$out]0xe2 ! ASI_BLK_INIT, T4-specific + add $out, 8, $out + stda %f2, [$out]0xe2 ! ASI_BLK_INIT, T4-specific + add $out, 8, $out + stda %f4, [$out]0xe2 ! ASI_BLK_INIT, T4-specific + add $out, 8, $out + stda %f6, [$out]0xe2 ! ASI_BLK_INIT, T4-specific + bgu,pt $::size_t_cc, .L${bits}_xts_${dir}blk2x + add $out, 8, $out + + add $blk_init, $len, $len + andcc $len, 1, %g0 ! is number of blocks even? + membar #StoreLoad|#StoreStore + bnz,pt %icc, .L${bits}_xts_${dir}loop + srl $len, 0, $len + brnz,pn $len, .L${bits}_xts_${dir}loop2x + nop + + fsrc2 %f4, %f0 + fsrc2 %f6, %f2 + brnz,pn $rem, .L${bits}_xts_${dir}steal + nop + + ret + restore +!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! +___ +$code.=<<___ if ($dir eq "en"); +.align 32 +.L${bits}_xts_${dir}steal: + std %f0, [%fp + $::bias-16] ! copy of output + std %f2, [%fp + $::bias-8] + + srl $ileft, 3, $ileft + add %fp, $::bias-16, %l7 + add $inp, $ileft, $inp ! original $inp+$len&-15 + add $out, $ooff, $out ! original $out+$len&-15 + mov 0, $ileft + nop ! align + +.L${bits}_xts_${dir}stealing: + ldub [$inp + $ileft], %o0 + ldub [%l7 + $ileft], %o1 + dec $rem + stb %o0, [%l7 + $ileft] + stb %o1, [$out + $ileft] + brnz $rem, .L${bits}_xts_${dir}stealing + inc $ileft + + mov %l7, $inp + sub $out, 16, $out + mov 0, $ileft + sub $out, $ooff, $out + ba .L${bits}_xts_${dir}loop ! one more time + mov 1, $len ! $rem is 0 +___ +$code.=<<___ if ($dir eq "de"); +.align 32 +.L${bits}_xts_${dir}steal: + ldx [$inp + 0], %o0 + brz,pt $ileft, 8f + ldx [$inp + 8], %o1 + + ldx [$inp + 16], %o2 + sllx %o0, $ileft, %o0 + srlx %o1, $iright, %g1 + sllx %o1, $ileft, %o1 + or %g1, %o0, %o0 + srlx %o2, $iright, %o2 + or %o2, %o1, %o1 +8: + srax %g3, 63, %l7 ! next tweak value + addcc %g2, %g2, %o2 + and %l7, 0x87, %l7 + addxc %g3, %g3, %o3 + xor %l7, %o2, %o2 + + movxtod %o2, %f12 + movxtod %o3, %f14 + bshuffle %f12, %f12, %f12 + bshuffle %f14, %f14, %f14 + + xor %g4, %o0, %o0 ! ^= rk[0] + xor %g5, %o1, %o1 + movxtod %o0, %f0 + movxtod %o1, %f2 + + fxor %f12, %f0, %f0 ! ^= tweak[0] + fxor %f14, %f2, %f2 + + call _${alg}${bits}_${dir}crypt_1x + add $inp, 16, $inp + + fxor %f12, %f0, %f0 ! ^= tweak[0] + fxor %f14, %f2, %f2 + + std %f0, [%fp + $::bias-16] + std %f2, [%fp + $::bias-8] + + srl $ileft, 3, $ileft + add %fp, $::bias-16, %l7 + add $inp, $ileft, $inp ! original $inp+$len&-15 + add $out, $ooff, $out ! original $out+$len&-15 + mov 0, $ileft + add $out, 16, $out + nop ! align + +.L${bits}_xts_${dir}stealing: + ldub [$inp + $ileft], %o0 + ldub [%l7 + $ileft], %o1 + dec $rem + stb %o0, [%l7 + $ileft] + stb %o1, [$out + $ileft] + brnz $rem, .L${bits}_xts_${dir}stealing + inc $ileft + + mov %l7, $inp + sub $out, 16, $out + mov 0, $ileft + sub $out, $ooff, $out + ba .L${bits}_xts_${dir}loop ! one more time + mov 1, $len ! $rem is 0 +___ +$code.=<<___; + ret + restore +.type ${alg}${bits}_t4_xts_${dir}crypt,#function +.size ${alg}${bits}_t4_xts_${dir}crypt,.-${alg}${bits}_t4_xts_${dir}crypt +___ +} + +# Purpose of these subroutines is to explicitly encode VIS instructions, +# so that one can compile the module without having to specify VIS +# extentions on compiler command line, e.g. -xarch=v9 vs. -xarch=v9a. +# Idea is to reserve for option to produce "universal" binary and let +# programmer detect if current CPU is VIS capable at run-time. +sub unvis { +my ($mnemonic,$rs1,$rs2,$rd)=@_; +my ($ref,$opf); +my %visopf = ( "faligndata" => 0x048, + "bshuffle" => 0x04c, + "fnot2" => 0x066, + "fxor" => 0x06c, + "fsrc2" => 0x078 ); + + $ref = "$mnemonic\t$rs1,$rs2,$rd"; + + if ($opf=$visopf{$mnemonic}) { + foreach ($rs1,$rs2,$rd) { + return $ref if (!/%f([0-9]{1,2})/); + $_=$1; + if ($1>=32) { + return $ref if ($1&1); + # re-encode for upper double register addressing + $_=($1|$1>>5)&31; + } + } + + return sprintf ".word\t0x%08x !%s", + 0x81b00000|$rd<<25|$rs1<<14|$opf<<5|$rs2, + $ref; + } else { + return $ref; + } +} + +sub unvis3 { +my ($mnemonic,$rs1,$rs2,$rd)=@_; +my %bias = ( "g" => 0, "o" => 8, "l" => 16, "i" => 24 ); +my ($ref,$opf); +my %visopf = ( "addxc" => 0x011, + "addxccc" => 0x013, + "umulxhi" => 0x016, + "alignaddr" => 0x018, + "bmask" => 0x019, + "alignaddrl" => 0x01a ); + + $ref = "$mnemonic\t$rs1,$rs2,$rd"; + + if ($opf=$visopf{$mnemonic}) { + foreach ($rs1,$rs2,$rd) { + return $ref if (!/%([goli])([0-9])/); + $_=$bias{$1}+$2; + } + + return sprintf ".word\t0x%08x !%s", + 0x81b00000|$rd<<25|$rs1<<14|$opf<<5|$rs2, + $ref; + } else { + return $ref; + } +} + +sub unaes_round { # 4-argument instructions +my ($mnemonic,$rs1,$rs2,$rs3,$rd)=@_; +my ($ref,$opf); +my %aesopf = ( "aes_eround01" => 0, + "aes_eround23" => 1, + "aes_dround01" => 2, + "aes_dround23" => 3, + "aes_eround01_l"=> 4, + "aes_eround23_l"=> 5, + "aes_dround01_l"=> 6, + "aes_dround23_l"=> 7, + "aes_kexpand1" => 8 ); + + $ref = "$mnemonic\t$rs1,$rs2,$rs3,$rd"; + + if (defined($opf=$aesopf{$mnemonic})) { + $rs3 = ($rs3 =~ /%f([0-6]*[02468])/) ? (($1|$1>>5)&31) : $rs3; + foreach ($rs1,$rs2,$rd) { + return $ref if (!/%f([0-9]{1,2})/); + $_=$1; + if ($1>=32) { + return $ref if ($1&1); + # re-encode for upper double register addressing + $_=($1|$1>>5)&31; + } + } + + return sprintf ".word\t0x%08x !%s", + 2<<30|$rd<<25|0x19<<19|$rs1<<14|$rs3<<9|$opf<<5|$rs2, + $ref; + } else { + return $ref; + } +} + +sub unaes_kexpand { # 3-argument instructions +my ($mnemonic,$rs1,$rs2,$rd)=@_; +my ($ref,$opf); +my %aesopf = ( "aes_kexpand0" => 0x130, + "aes_kexpand2" => 0x131 ); + + $ref = "$mnemonic\t$rs1,$rs2,$rd"; + + if (defined($opf=$aesopf{$mnemonic})) { + foreach ($rs1,$rs2,$rd) { + return $ref if (!/%f([0-9]{1,2})/); + $_=$1; + if ($1>=32) { + return $ref if ($1&1); + # re-encode for upper double register addressing + $_=($1|$1>>5)&31; + } + } + + return sprintf ".word\t0x%08x !%s", + 2<<30|$rd<<25|0x36<<19|$rs1<<14|$opf<<5|$rs2, + $ref; + } else { + return $ref; + } +} + +sub uncamellia_f { # 4-argument instructions +my ($mnemonic,$rs1,$rs2,$rs3,$rd)=@_; +my ($ref,$opf); + + $ref = "$mnemonic\t$rs1,$rs2,$rs3,$rd"; + + if (1) { + $rs3 = ($rs3 =~ /%f([0-6]*[02468])/) ? (($1|$1>>5)&31) : $rs3; + foreach ($rs1,$rs2,$rd) { + return $ref if (!/%f([0-9]{1,2})/); + $_=$1; + if ($1>=32) { + return $ref if ($1&1); + # re-encode for upper double register addressing + $_=($1|$1>>5)&31; + } + } + + return sprintf ".word\t0x%08x !%s", + 2<<30|$rd<<25|0x19<<19|$rs1<<14|$rs3<<9|0xc<<5|$rs2, + $ref; + } else { + return $ref; + } +} + +sub uncamellia3 { # 3-argument instructions +my ($mnemonic,$rs1,$rs2,$rd)=@_; +my ($ref,$opf); +my %cmllopf = ( "camellia_fl" => 0x13c, + "camellia_fli" => 0x13d ); + + $ref = "$mnemonic\t$rs1,$rs2,$rd"; + + if (defined($opf=$cmllopf{$mnemonic})) { + foreach ($rs1,$rs2,$rd) { + return $ref if (!/%f([0-9]{1,2})/); + $_=$1; + if ($1>=32) { + return $ref if ($1&1); + # re-encode for upper double register addressing + $_=($1|$1>>5)&31; + } + } + + return sprintf ".word\t0x%08x !%s", + 2<<30|$rd<<25|0x36<<19|$rs1<<14|$opf<<5|$rs2, + $ref; + } else { + return $ref; + } +} + +sub unmovxtox { # 2-argument instructions +my ($mnemonic,$rs,$rd)=@_; +my %bias = ( "g" => 0, "o" => 8, "l" => 16, "i" => 24, "f" => 0 ); +my ($ref,$opf); +my %movxopf = ( "movdtox" => 0x110, + "movstouw" => 0x111, + "movstosw" => 0x113, + "movxtod" => 0x118, + "movwtos" => 0x119 ); + + $ref = "$mnemonic\t$rs,$rd"; + + if (defined($opf=$movxopf{$mnemonic})) { + foreach ($rs,$rd) { + return $ref if (!/%([fgoli])([0-9]{1,2})/); + $_=$bias{$1}+$2; + if ($2>=32) { + return $ref if ($2&1); + # re-encode for upper double register addressing + $_=($2|$2>>5)&31; + } + } + + return sprintf ".word\t0x%08x !%s", + 2<<30|$rd<<25|0x36<<19|$opf<<5|$rs, + $ref; + } else { + return $ref; + } +} + +sub undes { +my ($mnemonic)=shift; +my @args=@_; +my ($ref,$opf); +my %desopf = ( "des_round" => 0b1001, + "des_ip" => 0b100110100, + "des_iip" => 0b100110101, + "des_kexpand" => 0b100110110 ); + + $ref = "$mnemonic\t".join(",",@_); + + if (defined($opf=$desopf{$mnemonic})) { # 4-arg + if ($mnemonic eq "des_round") { + foreach (@args[0..3]) { + return $ref if (!/%f([0-9]{1,2})/); + $_=$1; + if ($1>=32) { + return $ref if ($1&1); + # re-encode for upper double register addressing + $_=($1|$1>>5)&31; + } + } + return sprintf ".word\t0x%08x !%s", + 2<<30|0b011001<<19|$opf<<5|$args[0]<<14|$args[1]|$args[2]<<9|$args[3]<<25, + $ref; + } elsif ($mnemonic eq "des_kexpand") { # 3-arg + foreach (@args[0..2]) { + return $ref if (!/(%f)?([0-9]{1,2})/); + $_=$2; + if ($2>=32) { + return $ref if ($2&1); + # re-encode for upper double register addressing + $_=($2|$2>>5)&31; + } + } + return sprintf ".word\t0x%08x !%s", + 2<<30|0b110110<<19|$opf<<5|$args[0]<<14|$args[1]|$args[2]<<25, + $ref; + } else { # 2-arg + foreach (@args[0..1]) { + return $ref if (!/%f([0-9]{1,2})/); + $_=$1; + if ($1>=32) { + return $ref if ($2&1); + # re-encode for upper double register addressing + $_=($1|$1>>5)&31; + } + } + return sprintf ".word\t0x%08x !%s", + 2<<30|0b110110<<19|$opf<<5|$args[0]<<14|$args[1]<<25, + $ref; + } + } else { + return $ref; + } +} + +sub emit_assembler { + foreach (split("\n",$::code)) { + s/\`([^\`]*)\`/eval $1/ge; + + s/\b(f[a-z]+2[sd]*)\s+(%f[0-9]{1,2}),\s*(%f[0-9]{1,2})\s*$/$1\t%f0,$2,$3/go; + + s/\b(aes_[edk][^\s]*)\s+(%f[0-9]{1,2}),\s*(%f[0-9]{1,2}),\s*([%fx0-9]+),\s*(%f[0-9]{1,2})/ + &unaes_round($1,$2,$3,$4,$5) + /geo or + s/\b(aes_kexpand[02])\s+(%f[0-9]{1,2}),\s*(%f[0-9]{1,2}),\s*(%f[0-9]{1,2})/ + &unaes_kexpand($1,$2,$3,$4) + /geo or + s/\b(camellia_f)\s+(%f[0-9]{1,2}),\s*(%f[0-9]{1,2}),\s*([%fx0-9]+),\s*(%f[0-9]{1,2})/ + &uncamellia_f($1,$2,$3,$4,$5) + /geo or + s/\b(camellia_[^s]+)\s+(%f[0-9]{1,2}),\s*(%f[0-9]{1,2}),\s*(%f[0-9]{1,2})/ + &uncamellia3($1,$2,$3,$4) + /geo or + s/\b(des_\w+)\s+(%f[0-9]{1,2}),\s*([%fx0-9]+)(?:,\s*(%f[0-9]{1,2})(?:,\s*(%f[0-9]{1,2}))?)?/ + &undes($1,$2,$3,$4,$5) + /geo or + s/\b(mov[ds]to\w+)\s+(%f[0-9]{1,2}),\s*(%[goli][0-7])/ + &unmovxtox($1,$2,$3) + /geo or + s/\b(mov[xw]to[ds])\s+(%[goli][0-7]),\s*(%f[0-9]{1,2})/ + &unmovxtox($1,$2,$3) + /geo or + s/\b([fb][^\s]*)\s+(%f[0-9]{1,2}),\s*(%f[0-9]{1,2}),\s*(%f[0-9]{1,2})/ + &unvis($1,$2,$3,$4) + /geo or + s/\b(umulxhi|bmask|addxc[c]{0,2}|alignaddr[l]*)\s+(%[goli][0-7]),\s*(%[goli][0-7]),\s*(%[goli][0-7])/ + &unvis3($1,$2,$3,$4) + /geo; + + print $_,"\n"; + } +} + +1; diff --git a/deps/openssl/openssl/crypto/perlasm/x86_64-xlate.pl b/deps/openssl/openssl/crypto/perlasm/x86_64-xlate.pl index 56d9b64b6fb32d..9c70b8c2c6e9d7 100755 --- a/deps/openssl/openssl/crypto/perlasm/x86_64-xlate.pl +++ b/deps/openssl/openssl/crypto/perlasm/x86_64-xlate.pl @@ -121,7 +121,7 @@ $self->{sz} = ""; } elsif ($self->{op} =~ /^v/) { # VEX $self->{sz} = ""; - } elsif ($self->{op} =~ /movq/ && $line =~ /%xmm/) { + } elsif ($self->{op} =~ /mov[dq]/ && $line =~ /%xmm/) { $self->{sz} = ""; } elsif ($self->{op} =~ /([a-z]{3,})([qlwb])$/) { $self->{op} = $1; @@ -250,8 +250,13 @@ # in $self->{label}, new gas requires sign extension... use integer; $self->{label} =~ s/(?{label} =~ s/([0-9]+\s*[\*\/\%]\s*[0-9]+)/eval($1)/eg; - $self->{label} =~ s/([0-9]+)/$1<<32>>32/eg; + $self->{label} =~ s/\b([0-9]+\s*[\*\/\%]\s*[0-9]+)\b/eval($1)/eg; + $self->{label} =~ s/\b([0-9]+)\b/$1<<32>>32/eg; + + if (!$self->{label} && $self->{index} && $self->{scale}==1 && + $self->{base} =~ /(rbp|r13)/) { + $self->{base} = $self->{index}; $self->{index} = $1; + } if ($gas) { $self->{label} =~ s/^___imp_/__imp__/ if ($flavour eq "mingw64"); @@ -265,14 +270,20 @@ sprintf "%s%s(%%%s)", $self->{asterisk},$self->{label},$self->{base}; } } else { - %szmap = ( b=>"BYTE$PTR", w=>"WORD$PTR", l=>"DWORD$PTR", - q=>"QWORD$PTR",o=>"OWORD$PTR",x=>"XMMWORD$PTR" ); + %szmap = ( b=>"BYTE$PTR", w=>"WORD$PTR", + l=>"DWORD$PTR", d=>"DWORD$PTR", + q=>"QWORD$PTR", o=>"OWORD$PTR", + x=>"XMMWORD$PTR", y=>"YMMWORD$PTR", z=>"ZMMWORD$PTR" ); $self->{label} =~ s/\./\$/g; $self->{label} =~ s/(?{label} = "($self->{label})" if ($self->{label} =~ /[\*\+\-\/]/); - $sz="q" if ($self->{asterisk} || opcode->mnemonic() eq "movq"); - $sz="l" if (opcode->mnemonic() eq "movd"); + + ($self->{asterisk}) && ($sz="q") || + (opcode->mnemonic() =~ /^v?mov([qd])$/) && ($sz=$1) || + (opcode->mnemonic() =~ /^v?pinsr([qdwb])$/) && ($sz=$1) || + (opcode->mnemonic() =~ /^vpbroadcast([qdwb])$/) && ($sz=$1) || + (opcode->mnemonic() =~ /^vinsert[fi]128$/) && ($sz="x"); if (defined($self->{index})) { sprintf "%s[%s%s*%d%s]",$szmap{$sz}, @@ -412,7 +423,7 @@ } sub out { my $self = shift; - if ($nasm && opcode->mnemonic()=~m/^j/) { + if ($nasm && opcode->mnemonic()=~m/^j(?![re]cxz)/) { "NEAR ".$self->{value}; } else { $self->{value}; @@ -530,7 +541,7 @@ $v="$current_segment\tENDS\n" if ($current_segment); $current_segment = ".text\$"; $v.="$current_segment\tSEGMENT "; - $v.=$masm>=$masmref ? "ALIGN(64)" : "PAGE"; + $v.=$masm>=$masmref ? "ALIGN(256)" : "PAGE"; $v.=" 'CODE'"; } $self->{value} = $v; @@ -772,10 +783,64 @@ sub rex { } }; +my $rdseed = sub { + if (shift =~ /%[er](\w+)/) { + my @opcode=(); + my $dst=$1; + if ($dst !~ /[0-9]+/) { $dst = $regrm{"%e$dst"}; } + rex(\@opcode,0,$1,8); + push @opcode,0x0f,0xc7,0xf8|($dst&7); + @opcode; + } else { + (); + } +}; + +sub rxb { + local *opcode=shift; + my ($dst,$src1,$src2,$rxb)=@_; + + $rxb|=0x7<<5; + $rxb&=~(0x04<<5) if($dst>=8); + $rxb&=~(0x01<<5) if($src1>=8); + $rxb&=~(0x02<<5) if($src2>=8); + push @opcode,$rxb; +} + +my $vprotd = sub { + if (shift =~ /\$([x0-9a-f]+),\s*%xmm([0-9]+),\s*%xmm([0-9]+)/) { + my @opcode=(0x8f); + rxb(\@opcode,$3,$2,-1,0x08); + push @opcode,0x78,0xc2; + push @opcode,0xc0|($2&7)|(($3&7)<<3); # ModR/M + my $c=$1; + push @opcode,$c=~/^0/?oct($c):$c; + @opcode; + } else { + (); + } +}; + +my $vprotq = sub { + if (shift =~ /\$([x0-9a-f]+),\s*%xmm([0-9]+),\s*%xmm([0-9]+)/) { + my @opcode=(0x8f); + rxb(\@opcode,$3,$2,-1,0x08); + push @opcode,0x78,0xc3; + push @opcode,0xc0|($2&7)|(($3&7)<<3); # ModR/M + my $c=$1; + push @opcode,$c=~/^0/?oct($c):$c; + @opcode; + } else { + (); + } +}; + if ($nasm) { print <<___; default rel %define XMMWORD +%define YMMWORD +%define ZMMWORD ___ } elsif ($masm) { print <<___; @@ -789,6 +854,7 @@ sub rex { $line =~ s|[#!].*$||; # get rid of asm-style comments... $line =~ s|/\*.*\*/||; # ... and C-style comments... $line =~ s|^\s+||; # ... and skip white spaces in beginning + $line =~ s|\s+$||; # ... and at the end undef $label; undef $opcode; @@ -837,6 +903,8 @@ sub rex { my $arg = $_->out(); # $insn.=$sz compensates for movq, pinsrw, ... if ($arg =~ /^xmm[0-9]+$/) { $insn.=$sz; $sz="x" if(!$sz); last; } + if ($arg =~ /^ymm[0-9]+$/) { $insn.=$sz; $sz="y" if(!$sz); last; } + if ($arg =~ /^zmm[0-9]+$/) { $insn.=$sz; $sz="z" if(!$sz); last; } if ($arg =~ /^mm[0-9]+$/) { $insn.=$sz; $sz="q" if(!$sz); last; } } @args = reverse(@args); diff --git a/deps/openssl/openssl/crypto/perlasm/x86asm.pl b/deps/openssl/openssl/crypto/perlasm/x86asm.pl index eb543db2f66eb9..cae156ae63ce32 100644 --- a/deps/openssl/openssl/crypto/perlasm/x86asm.pl +++ b/deps/openssl/openssl/crypto/perlasm/x86asm.pl @@ -131,6 +131,40 @@ sub ::rdrand { &::generic("rdrand",@_); } } +sub ::rdseed +{ my ($dst)=@_; + if ($dst =~ /(e[a-dsd][ixp])/) + { &::data_byte(0x0f,0xc7,0xf8|$regrm{$dst}); } + else + { &::generic("rdrand",@_); } +} + +sub rxb { + local *opcode=shift; + my ($dst,$src1,$src2,$rxb)=@_; + + $rxb|=0x7<<5; + $rxb&=~(0x04<<5) if($dst>=8); + $rxb&=~(0x01<<5) if($src1>=8); + $rxb&=~(0x02<<5) if($src2>=8); + push @opcode,$rxb; +} + +sub ::vprotd +{ my $args=join(',',@_); + if ($args =~ /xmm([0-7]),xmm([0-7]),([x0-9a-f]+)/) + { my @opcode=(0x8f); + rxb(\@opcode,$1,$2,-1,0x08); + push @opcode,0x78,0xc2; + push @opcode,0xc0|($2&7)|(($1&7)<<3); # ModR/M + my $c=$3; + push @opcode,$c=~/^0/?oct($c):$c; + &::data_byte(@opcode); + } + else + { &::generic("vprotd",@_); } +} + # label management $lbdecor="L"; # local label decoration, set by package $label="000"; @@ -221,6 +255,8 @@ sub ::asm_init $elf=$cpp=$coff=$aout=$macosx=$win32=$netware=$mwerks=$android=0; if (($type eq "elf")) { $elf=1; require "x86gas.pl"; } + elsif (($type eq "elf-1")) + { $elf=-1; require "x86gas.pl"; } elsif (($type eq "a\.out")) { $aout=1; require "x86gas.pl"; } elsif (($type eq "coff" or $type eq "gaswin")) @@ -257,4 +293,6 @@ sub ::asm_init &file($filename); } +sub ::hidden {} + 1; diff --git a/deps/openssl/openssl/crypto/perlasm/x86gas.pl b/deps/openssl/openssl/crypto/perlasm/x86gas.pl index 682a3a3163e256..63b2301fd1f096 100644 --- a/deps/openssl/openssl/crypto/perlasm/x86gas.pl +++ b/deps/openssl/openssl/crypto/perlasm/x86gas.pl @@ -70,6 +70,8 @@ sub ::DWP { my($addr,$reg1,$reg2,$idx)=@_; my $ret=""; + if (!defined($idx) && 1*$reg2) { $idx=$reg2; $reg2=$reg1; undef $reg1; } + $addr =~ s/^\s+//; # prepend global references with optional underscore $addr =~ s/^([^\+\-0-9][^\+\-]*)/&::islabel($1) or "$nmdecor$1"/ige; @@ -157,7 +159,7 @@ sub ::file_end } } if (grep {/\b${nmdecor}OPENSSL_ia32cap_P\b/i} @out) { - my $tmp=".comm\t${nmdecor}OPENSSL_ia32cap_P,8"; + my $tmp=".comm\t${nmdecor}OPENSSL_ia32cap_P,16"; if ($::macosx) { push (@out,"$tmp,2\n"); } elsif ($::elf) { push (@out,"$tmp,4\n"); } else { push (@out,"$tmp\n"); } @@ -170,10 +172,9 @@ sub ::file_end sub ::data_word { push(@out,".long\t".join(',',@_)."\n"); } sub ::align -{ my $val=$_[0],$p2,$i; +{ my $val=$_[0]; if ($::aout) - { for ($p2=0;$val!=0;$val>>=1) { $p2++; } - $val=$p2-1; + { $val=int(log($val)/log(2)); $val.=",0x90"; } push(@out,".align\t$val\n"); @@ -195,6 +196,8 @@ sub ::picmeup &::mov($dst,&::DWP("$indirect-$reflabel",$base)); $non_lazy_ptr{"$nmdecor$sym"}=$indirect; } + elsif ($sym eq "OPENSSL_ia32cap_P" && $::elf>0) + { &::lea($dst,&::DWP("$sym-$reflabel",$base)); } else { &::lea($dst,&::DWP("_GLOBAL_OFFSET_TABLE_+[.-$reflabel]", $base)); @@ -250,4 +253,6 @@ sub ::initseg sub ::dataseg { push(@out,".data\n"); } +*::hidden = sub { push(@out,".hidden\t$nmdecor$_[0]\n"); } if ($::elf); + 1; diff --git a/deps/openssl/openssl/crypto/perlasm/x86masm.pl b/deps/openssl/openssl/crypto/perlasm/x86masm.pl index 6b33b146f0f8ba..b7f49d1c41dc16 100644 --- a/deps/openssl/openssl/crypto/perlasm/x86masm.pl +++ b/deps/openssl/openssl/crypto/perlasm/x86masm.pl @@ -18,10 +18,10 @@ sub ::generic if ($opcode =~ /lea/ && @arg[1] =~ s/.*PTR\s+(\(.*\))$/OFFSET $1/) # no [] { $opcode="mov"; } - elsif ($opcode !~ /movq/) + elsif ($opcode !~ /mov[dq]$/) { # fix xmm references - $arg[0] =~ s/\b[A-Z]+WORD\s+PTR/XMMWORD PTR/i if ($arg[1]=~/\bxmm[0-7]\b/i); - $arg[1] =~ s/\b[A-Z]+WORD\s+PTR/XMMWORD PTR/i if ($arg[0]=~/\bxmm[0-7]\b/i); + $arg[0] =~ s/\b[A-Z]+WORD\s+PTR/XMMWORD PTR/i if ($arg[-1]=~/\bxmm[0-7]\b/i); + $arg[-1] =~ s/\b[A-Z]+WORD\s+PTR/XMMWORD PTR/i if ($arg[0]=~/\bxmm[0-7]\b/i); } &::emit($opcode,@arg); @@ -39,6 +39,8 @@ sub get_mem { my($size,$addr,$reg1,$reg2,$idx)=@_; my($post,$ret); + if (!defined($idx) && 1*$reg2) { $idx=$reg2; $reg2=$reg1; undef $reg1; } + $ret .= "$size PTR " if ($size ne ""); $addr =~ s/^\s+//; @@ -133,7 +135,7 @@ sub ::file_end if (grep {/\b${nmdecor}OPENSSL_ia32cap_P\b/i} @out) { my $comm=<<___; .bss SEGMENT 'BSS' -COMM ${nmdecor}OPENSSL_ia32cap_P:QWORD +COMM ${nmdecor}OPENSSL_ia32cap_P:DWORD:4 .bss ENDS ___ # comment out OPENSSL_ia32cap_P declarations @@ -158,13 +160,13 @@ sub ::public_label { push(@out,"PUBLIC\t".&::LABEL($_[0],$nmdecor.$_[0])."\n"); } sub ::data_byte -{ push(@out,("DB\t").join(',',@_)."\n"); } +{ push(@out,("DB\t").join(',',splice(@_,0,16))."\n") while(@_); } sub ::data_short -{ push(@out,("DW\t").join(',',@_)."\n"); } +{ push(@out,("DW\t").join(',',splice(@_,0,8))."\n") while(@_); } sub ::data_word -{ push(@out,("DD\t").join(',',@_)."\n"); } +{ push(@out,("DD\t").join(',',splice(@_,0,4))."\n") while(@_); } sub ::align { push(@out,"ALIGN\t$_[0]\n"); } diff --git a/deps/openssl/openssl/crypto/perlasm/x86nasm.pl b/deps/openssl/openssl/crypto/perlasm/x86nasm.pl index ca2511c9eb9165..5d92f6092ac94e 100644 --- a/deps/openssl/openssl/crypto/perlasm/x86nasm.pl +++ b/deps/openssl/openssl/crypto/perlasm/x86nasm.pl @@ -36,6 +36,8 @@ sub get_mem { my($size,$addr,$reg1,$reg2,$idx)=@_; my($post,$ret); + if (!defined($idx) && 1*$reg2) { $idx=$reg2; $reg2=$reg1; undef $reg1; } + if ($size ne "") { $ret .= "$size"; $ret .= " PTR" if ($::mwerks); @@ -117,7 +119,7 @@ sub ::file_end { if (grep {/\b${nmdecor}OPENSSL_ia32cap_P\b/i} @out) { my $comm=<<___; ${drdecor}segment .bss -${drdecor}common ${nmdecor}OPENSSL_ia32cap_P 8 +${drdecor}common ${nmdecor}OPENSSL_ia32cap_P 16 ___ # comment out OPENSSL_ia32cap_P declarations grep {s/(^extern\s+${nmdecor}OPENSSL_ia32cap_P)/\;$1/} @out; diff --git a/deps/openssl/openssl/crypto/pkcs12/p12_decr.c b/deps/openssl/openssl/crypto/pkcs12/p12_decr.c index d46eae3c66a697..b40ea10ccbee23 100644 --- a/deps/openssl/openssl/crypto/pkcs12/p12_decr.c +++ b/deps/openssl/openssl/crypto/pkcs12/p12_decr.c @@ -171,28 +171,32 @@ ASN1_OCTET_STRING *PKCS12_item_i2d_encrypt(X509_ALGOR *algor, const char *pass, int passlen, void *obj, int zbuf) { - ASN1_OCTET_STRING *oct; + ASN1_OCTET_STRING *oct = NULL; unsigned char *in = NULL; int inlen; if (!(oct = M_ASN1_OCTET_STRING_new())) { PKCS12err(PKCS12_F_PKCS12_ITEM_I2D_ENCRYPT, ERR_R_MALLOC_FAILURE); - return NULL; + goto err; } inlen = ASN1_item_i2d(obj, &in, it); if (!in) { PKCS12err(PKCS12_F_PKCS12_ITEM_I2D_ENCRYPT, PKCS12_R_ENCODE_ERROR); - return NULL; + goto err; } if (!PKCS12_pbe_crypt(algor, pass, passlen, in, inlen, &oct->data, &oct->length, 1)) { PKCS12err(PKCS12_F_PKCS12_ITEM_I2D_ENCRYPT, PKCS12_R_ENCRYPT_ERROR); OPENSSL_free(in); - return NULL; + goto err; } if (zbuf) OPENSSL_cleanse(in, inlen); OPENSSL_free(in); return oct; + err: + if (oct) + ASN1_OCTET_STRING_free(oct); + return NULL; } IMPLEMENT_PKCS12_STACK_OF(PKCS7) diff --git a/deps/openssl/openssl/crypto/pkcs12/p12_p8e.c b/deps/openssl/openssl/crypto/pkcs12/p12_p8e.c index d970f0544c5e5b..861a087f80d7bb 100644 --- a/deps/openssl/openssl/crypto/pkcs12/p12_p8e.c +++ b/deps/openssl/openssl/crypto/pkcs12/p12_p8e.c @@ -76,8 +76,12 @@ X509_SIG *PKCS8_encrypt(int pbe_nid, const EVP_CIPHER *cipher, if (pbe_nid == -1) pbe = PKCS5_pbe2_set(cipher, iter, salt, saltlen); - else + else if (EVP_PBE_find(EVP_PBE_TYPE_PRF, pbe_nid, NULL, NULL, 0)) + pbe = PKCS5_pbe2_set_iv(cipher, iter, salt, saltlen, NULL, pbe_nid); + else { + ERR_clear_error(); pbe = PKCS5_pbe_set(pbe_nid, iter, salt, saltlen); + } if (!pbe) { PKCS12err(PKCS12_F_PKCS8_ENCRYPT, ERR_R_ASN1_LIB); goto err; diff --git a/deps/openssl/openssl/crypto/ppc_arch.h b/deps/openssl/openssl/crypto/ppc_arch.h new file mode 100644 index 00000000000000..b50ec996a5a3b4 --- /dev/null +++ b/deps/openssl/openssl/crypto/ppc_arch.h @@ -0,0 +1,10 @@ +#ifndef __PPC_ARCH_H__ +# define __PPC_ARCH_H__ + +extern unsigned int OPENSSL_ppccap_P; + +# define PPC_FPU64 (1<<0) +# define PPC_ALTIVEC (1<<1) +# define PPC_CRYPTO207 (1<<2) + +#endif diff --git a/deps/openssl/openssl/crypto/ppccap.c b/deps/openssl/openssl/crypto/ppccap.c index 5242294310b179..2b7f704cd82af2 100644 --- a/deps/openssl/openssl/crypto/ppccap.c +++ b/deps/openssl/openssl/crypto/ppccap.c @@ -4,13 +4,15 @@ #include #include #include +#if defined(__linux) || defined(_AIX) +# include +#endif #include #include -#define PPC_FPU64 (1<<0) -#define PPC_ALTIVEC (1<<1) +#include "ppc_arch.h" -static int OPENSSL_ppccap_P = 0; +unsigned int OPENSSL_ppccap_P = 0; static sigset_t all_masked; @@ -25,7 +27,7 @@ int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np, const BN_ULONG *n0, int num); if (sizeof(size_t) == 4) { -# if (defined(__APPLE__) && defined(__MACH__)) +# if 1 || (defined(__APPLE__) && defined(__MACH__)) if (num >= 8 && (num & 3) == 0 && (OPENSSL_ppccap_P & PPC_FPU64)) return bn_mul_mont_fpu64(rp, ap, bp, np, n0, num); # else @@ -55,6 +57,22 @@ int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, } #endif +void sha256_block_p8(void *ctx, const void *inp, size_t len); +void sha256_block_ppc(void *ctx, const void *inp, size_t len); +void sha256_block_data_order(void *ctx, const void *inp, size_t len) +{ + OPENSSL_ppccap_P & PPC_CRYPTO207 ? sha256_block_p8(ctx, inp, len) : + sha256_block_ppc(ctx, inp, len); +} + +void sha512_block_p8(void *ctx, const void *inp, size_t len); +void sha512_block_ppc(void *ctx, const void *inp, size_t len); +void sha512_block_data_order(void *ctx, const void *inp, size_t len) +{ + OPENSSL_ppccap_P & PPC_CRYPTO207 ? sha512_block_p8(ctx, inp, len) : + sha512_block_ppc(ctx, inp, len); +} + static sigjmp_buf ill_jmp; static void ill_handler(int sig) { @@ -63,6 +81,7 @@ static void ill_handler(int sig) void OPENSSL_ppc64_probe(void); void OPENSSL_altivec_probe(void); +void OPENSSL_crypto207_probe(void); void OPENSSL_cpuid_setup(void) { @@ -93,12 +112,15 @@ void OPENSSL_cpuid_setup(void) OPENSSL_ppccap_P = 0; #if defined(_AIX) - if (sizeof(size_t) == 4 + if (sizeof(size_t) == 4) { + struct utsname uts; # if defined(_SC_AIX_KERNEL_BITMODE) - && sysconf(_SC_AIX_KERNEL_BITMODE) != 64 + if (sysconf(_SC_AIX_KERNEL_BITMODE) != 64) + return; # endif - ) - return; + if (uname(&uts) != 0 || atoi(uts.version) < 6) + return; + } #endif memset(&ill_act, 0, sizeof(ill_act)); @@ -109,10 +131,14 @@ void OPENSSL_cpuid_setup(void) sigaction(SIGILL, &ill_act, &ill_oact); if (sizeof(size_t) == 4) { - if (sigsetjmp(ill_jmp, 1) == 0) { - OPENSSL_ppc64_probe(); - OPENSSL_ppccap_P |= PPC_FPU64; - } +#ifdef __linux + struct utsname uts; + if (uname(&uts) == 0 && strcmp(uts.machine, "ppc64") == 0) +#endif + if (sigsetjmp(ill_jmp, 1) == 0) { + OPENSSL_ppc64_probe(); + OPENSSL_ppccap_P |= PPC_FPU64; + } } else { /* * Wanted code detecting POWER6 CPU and setting PPC_FPU64 @@ -122,6 +148,10 @@ void OPENSSL_cpuid_setup(void) if (sigsetjmp(ill_jmp, 1) == 0) { OPENSSL_altivec_probe(); OPENSSL_ppccap_P |= PPC_ALTIVEC; + if (sigsetjmp(ill_jmp, 1) == 0) { + OPENSSL_crypto207_probe(); + OPENSSL_ppccap_P |= PPC_CRYPTO207; + } } sigaction(SIGILL, &ill_oact, NULL); diff --git a/deps/openssl/openssl/crypto/ppccpuid.pl b/deps/openssl/openssl/crypto/ppccpuid.pl index 4ba736a1d1bd99..8d800fe7d36fa2 100755 --- a/deps/openssl/openssl/crypto/ppccpuid.pl +++ b/deps/openssl/openssl/crypto/ppccpuid.pl @@ -31,6 +31,7 @@ blr .long 0 .byte 0,12,0x14,0,0,0,0,0 +.size .OPENSSL_ppc64_probe,.-.OPENSSL_ppc64_probe .globl .OPENSSL_altivec_probe .align 4 @@ -39,6 +40,17 @@ blr .long 0 .byte 0,12,0x14,0,0,0,0,0 +.size .OPENSSL_altivec_probe,.-..OPENSSL_altivec_probe + +.globl .OPENSSL_crypto207_probe +.align 4 +.OPENSSL_crypto207_probe: + lvx_u v0,0,r1 + vcipher v0,v0,v0 + blr + .long 0 + .byte 0,12,0x14,0,0,0,0,0 +.size .OPENSSL_crypto207_probe,.-.OPENSSL_crypto207_probe .globl .OPENSSL_wipe_cpu .align 4 @@ -71,6 +83,7 @@ blr .long 0 .byte 0,12,0x14,0,0,0,0,0 +.size .OPENSSL_wipe_cpu,.-.OPENSSL_wipe_cpu .globl .OPENSSL_atomic_add .align 4 @@ -84,6 +97,7 @@ .long 0 .byte 0,12,0x14,0,0,0,2,0 .long 0 +.size .OPENSSL_atomic_add,.-.OPENSSL_atomic_add .globl .OPENSSL_rdtsc .align 4 @@ -93,6 +107,7 @@ blr .long 0 .byte 0,12,0x14,0,0,0,0,0 +.size .OPENSSL_rdtsc,.-.OPENSSL_rdtsc .globl .OPENSSL_cleanse .align 4 @@ -125,6 +140,7 @@ .long 0 .byte 0,12,0x14,0,0,0,2,0 .long 0 +.size .OPENSSL_cleanse,.-.OPENSSL_cleanse ___ $code =~ s/\`([^\`]*)\`/eval $1/gem; diff --git a/deps/openssl/openssl/crypto/rand/rand_win.c b/deps/openssl/openssl/crypto/rand/rand_win.c index 0c616c4c57271d..06670ae017ad84 100644 --- a/deps/openssl/openssl/crypto/rand/rand_win.c +++ b/deps/openssl/openssl/crypto/rand/rand_win.c @@ -684,9 +684,7 @@ static void readscreen(void) { # if !defined(OPENSSL_SYS_WINCE) && !defined(OPENSSL_SYS_WIN32_CYGWIN) HDC hScrDC; /* screen DC */ - HDC hMemDC; /* memory DC */ HBITMAP hBitmap; /* handle for our bitmap */ - HBITMAP hOldBitmap; /* handle for previous bitmap */ BITMAP bm; /* bitmap properties */ unsigned int size; /* size of bitmap */ char *bmbits; /* contents of bitmap */ @@ -694,13 +692,13 @@ static void readscreen(void) int h; /* screen height */ int y; /* y-coordinate of screen lines to grab */ int n = 16; /* number of screen lines to grab at a time */ + BITMAPINFOHEADER bi; /* info about the bitmap */ if (check_winnt() && OPENSSL_isservice() > 0) return; - /* Create a screen DC and a memory DC compatible to screen DC */ - hScrDC = CreateDC(TEXT("DISPLAY"), NULL, NULL, NULL); - hMemDC = CreateCompatibleDC(hScrDC); + /* Get a reference to the screen DC */ + hScrDC = GetDC(NULL); /* Get screen resolution */ w = GetDeviceCaps(hScrDC, HORZRES); @@ -709,24 +707,31 @@ static void readscreen(void) /* Create a bitmap compatible with the screen DC */ hBitmap = CreateCompatibleBitmap(hScrDC, w, n); - /* Select new bitmap into memory DC */ - hOldBitmap = SelectObject(hMemDC, hBitmap); - /* Get bitmap properties */ GetObject(hBitmap, sizeof(BITMAP), (LPSTR) & bm); size = (unsigned int)bm.bmWidthBytes * bm.bmHeight * bm.bmPlanes; + bi.biSize = sizeof(BITMAPINFOHEADER); + bi.biWidth = bm.bmWidth; + bi.biHeight = bm.bmHeight; + bi.biPlanes = bm.bmPlanes; + bi.biBitCount = bm.bmBitsPixel; + bi.biCompression = BI_RGB; + bi.biSizeImage = 0; + bi.biXPelsPerMeter = 0; + bi.biYPelsPerMeter = 0; + bi.biClrUsed = 0; + bi.biClrImportant = 0; + bmbits = OPENSSL_malloc(size); if (bmbits) { /* Now go through the whole screen, repeatedly grabbing n lines */ for (y = 0; y < h - n; y += n) { unsigned char md[MD_DIGEST_LENGTH]; - /* Bitblt screen DC to memory DC */ - BitBlt(hMemDC, 0, 0, w, n, hScrDC, 0, y, SRCCOPY); - - /* Copy bitmap bits from memory DC to bmbits */ - GetBitmapBits(hBitmap, size, bmbits); + /* Copy the bits of the current line range into the buffer */ + GetDIBits(hScrDC, hBitmap, y, n, + bmbits, (BITMAPINFO *) & bi, DIB_RGB_COLORS); /* Get the hash of the bitmap */ MD(bmbits, size, md); @@ -738,13 +743,9 @@ static void readscreen(void) OPENSSL_free(bmbits); } - /* Select old bitmap back into memory DC */ - hBitmap = SelectObject(hMemDC, hOldBitmap); - /* Clean up */ DeleteObject(hBitmap); - DeleteDC(hMemDC); - DeleteDC(hScrDC); + ReleaseDC(NULL, hScrDC); # endif /* !OPENSSL_SYS_WINCE */ } diff --git a/deps/openssl/openssl/crypto/rc4/Makefile b/deps/openssl/openssl/crypto/rc4/Makefile index 1614d479619f10..76860aeb4bf369 100644 --- a/deps/openssl/openssl/crypto/rc4/Makefile +++ b/deps/openssl/openssl/crypto/rc4/Makefile @@ -42,7 +42,7 @@ lib: $(LIBOBJ) @touch lib rc4-586.s: asm/rc4-586.pl ../perlasm/x86asm.pl - $(PERL) asm/rc4-586.pl $(PERLASM_SCHEME) $(CFLAGS) > $@ + $(PERL) asm/rc4-586.pl $(PERLASM_SCHEME) $(CFLAGS) $(PROCESSOR) > $@ rc4-x86_64.s: asm/rc4-x86_64.pl $(PERL) asm/rc4-x86_64.pl $(PERLASM_SCHEME) > $@ @@ -66,7 +66,7 @@ rc4-ia64.s: rc4-ia64.S rc4-%.s: asm/rc4-%.pl; $(PERL) $< $(PERLASM_SCHEME) $@ files: - $(PERL) $(TOP)/util/files.pl Makefile >> $(TOP)/MINFO + $(PERL) $(TOP)/util/files.pl "RC4_ENC=$(RC4_ENC)" Makefile >> $(TOP)/MINFO links: @$(PERL) $(TOP)/util/mklink.pl ../../include/openssl $(EXHEADER) diff --git a/deps/openssl/openssl/crypto/rc4/asm/rc4-586.pl b/deps/openssl/openssl/crypto/rc4/asm/rc4-586.pl index 5c9ac6ad286e21..1d55d551e9bb0e 100644 --- a/deps/openssl/openssl/crypto/rc4/asm/rc4-586.pl +++ b/deps/openssl/openssl/crypto/rc4/asm/rc4-586.pl @@ -60,7 +60,7 @@ push(@INC,"${dir}","${dir}../../perlasm"); require "x86asm.pl"; -&asm_init($ARGV[0],"rc4-586.pl"); +&asm_init($ARGV[0],"rc4-586.pl",$x86only = $ARGV[$#ARGV] eq "386"); $xx="eax"; $yy="ebx"; @@ -184,8 +184,11 @@ sub RC4_loop { &and ($ty,-4); # how many 4-byte chunks? &jz (&label("loop1")); - &test ($ty,-8); &mov (&wparam(3),$out); # $out as accumulator in these loops + if ($x86only) { + &jmp (&label("go4loop4")); + } else { + &test ($ty,-8); &jz (&label("go4loop4")); &picmeup($out,"OPENSSL_ia32cap_P"); @@ -228,6 +231,7 @@ sub RC4_loop { &cmp ($inp,&wparam(1)); # compare to input+len &je (&label("done")); &jmp (&label("loop1")); + } &set_label("go4loop4",16); &lea ($ty,&DWP(-4,$inp,$ty)); diff --git a/deps/openssl/openssl/crypto/rc4/rc4_enc.c b/deps/openssl/openssl/crypto/rc4/rc4_enc.c index 6ebd54d46cad6e..0f0a2487a7f729 100644 --- a/deps/openssl/openssl/crypto/rc4/rc4_enc.c +++ b/deps/openssl/openssl/crypto/rc4/rc4_enc.c @@ -79,7 +79,7 @@ void RC4(RC4_KEY *key, size_t len, const unsigned char *indata, y = key->y; d = key->data; -#if defined(RC4_CHUNK) +#if defined(RC4_CHUNK) && !defined(PEDANTIC) /*- * The original reason for implementing this(*) was the fact that * pre-21164a Alpha CPUs don't have byte load/store instructions diff --git a/deps/openssl/openssl/crypto/rc5/rc5_locl.h b/deps/openssl/openssl/crypto/rc5/rc5_locl.h index 1e83f19fecb740..ee757e647765a2 100644 --- a/deps/openssl/openssl/crypto/rc5/rc5_locl.h +++ b/deps/openssl/openssl/crypto/rc5/rc5_locl.h @@ -146,9 +146,12 @@ *((c)++)=(unsigned char)(((l)>> 8L)&0xff), \ *((c)++)=(unsigned char)(((l) )&0xff)) -#if (defined(OPENSSL_SYS_WIN32) && defined(_MSC_VER)) || defined(__ICC) +#if (defined(OPENSSL_SYS_WIN32) && defined(_MSC_VER)) # define ROTATE_l32(a,n) _lrotl(a,n) # define ROTATE_r32(a,n) _lrotr(a,n) +#elif defined(__ICC) +# define ROTATE_l32(a,n) _rotl(a,n) +# define ROTATE_r32(a,n) _rotr(a,n) #elif defined(__GNUC__) && __GNUC__>=2 && !defined(__STRICT_ANSI__) && !defined(OPENSSL_NO_ASM) && !defined(OPENSSL_NO_INLINE_ASM) && !defined(PEDANTIC) # if defined(__i386) || defined(__i386__) || defined(__x86_64) || defined(__x86_64__) # define ROTATE_l32(a,n) ({ register unsigned int ret; \ diff --git a/deps/openssl/openssl/crypto/rsa/Makefile b/deps/openssl/openssl/crypto/rsa/Makefile index e8121a49675063..af487b6004fb3b 100644 --- a/deps/openssl/openssl/crypto/rsa/Makefile +++ b/deps/openssl/openssl/crypto/rsa/Makefile @@ -226,19 +226,20 @@ rsa_pk1.o: ../cryptlib.h rsa_pk1.c rsa_pmeth.o: ../../e_os.h ../../include/openssl/asn1.h rsa_pmeth.o: ../../include/openssl/asn1t.h ../../include/openssl/bio.h rsa_pmeth.o: ../../include/openssl/bn.h ../../include/openssl/buffer.h -rsa_pmeth.o: ../../include/openssl/cms.h ../../include/openssl/crypto.h -rsa_pmeth.o: ../../include/openssl/e_os2.h ../../include/openssl/ec.h -rsa_pmeth.o: ../../include/openssl/ecdh.h ../../include/openssl/ecdsa.h -rsa_pmeth.o: ../../include/openssl/err.h ../../include/openssl/evp.h -rsa_pmeth.o: ../../include/openssl/lhash.h ../../include/openssl/obj_mac.h -rsa_pmeth.o: ../../include/openssl/objects.h +rsa_pmeth.o: ../../include/openssl/cms.h ../../include/openssl/conf.h +rsa_pmeth.o: ../../include/openssl/crypto.h ../../include/openssl/e_os2.h +rsa_pmeth.o: ../../include/openssl/ec.h ../../include/openssl/ecdh.h +rsa_pmeth.o: ../../include/openssl/ecdsa.h ../../include/openssl/err.h +rsa_pmeth.o: ../../include/openssl/evp.h ../../include/openssl/lhash.h +rsa_pmeth.o: ../../include/openssl/obj_mac.h ../../include/openssl/objects.h rsa_pmeth.o: ../../include/openssl/opensslconf.h rsa_pmeth.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h rsa_pmeth.o: ../../include/openssl/pkcs7.h ../../include/openssl/rsa.h rsa_pmeth.o: ../../include/openssl/safestack.h ../../include/openssl/sha.h rsa_pmeth.o: ../../include/openssl/stack.h ../../include/openssl/symhacks.h rsa_pmeth.o: ../../include/openssl/x509.h ../../include/openssl/x509_vfy.h -rsa_pmeth.o: ../cryptlib.h ../evp/evp_locl.h rsa_locl.h rsa_pmeth.c +rsa_pmeth.o: ../../include/openssl/x509v3.h ../cryptlib.h ../evp/evp_locl.h +rsa_pmeth.o: rsa_locl.h rsa_pmeth.c rsa_prn.o: ../../e_os.h ../../include/openssl/asn1.h rsa_prn.o: ../../include/openssl/bio.h ../../include/openssl/buffer.h rsa_prn.o: ../../include/openssl/crypto.h ../../include/openssl/e_os2.h diff --git a/deps/openssl/openssl/crypto/rsa/rsa.h b/deps/openssl/openssl/crypto/rsa/rsa.h index a8b59a9559a91e..d2ee37406e3cc3 100644 --- a/deps/openssl/openssl/crypto/rsa/rsa.h +++ b/deps/openssl/openssl/crypto/rsa/rsa.h @@ -262,13 +262,31 @@ struct rsa_st { EVP_PKEY_CTRL_RSA_KEYGEN_PUBEXP, 0, pubexp) # define EVP_PKEY_CTX_set_rsa_mgf1_md(ctx, md) \ - EVP_PKEY_CTX_ctrl(ctx, EVP_PKEY_RSA, EVP_PKEY_OP_TYPE_SIG, \ + EVP_PKEY_CTX_ctrl(ctx, EVP_PKEY_RSA, \ + EVP_PKEY_OP_TYPE_SIG | EVP_PKEY_OP_TYPE_CRYPT, \ EVP_PKEY_CTRL_RSA_MGF1_MD, 0, (void *)md) +# define EVP_PKEY_CTX_set_rsa_oaep_md(ctx, md) \ + EVP_PKEY_CTX_ctrl(ctx, EVP_PKEY_RSA, EVP_PKEY_OP_TYPE_CRYPT, \ + EVP_PKEY_CTRL_RSA_OAEP_MD, 0, (void *)md) + # define EVP_PKEY_CTX_get_rsa_mgf1_md(ctx, pmd) \ - EVP_PKEY_CTX_ctrl(ctx, EVP_PKEY_RSA, EVP_PKEY_OP_TYPE_SIG, \ + EVP_PKEY_CTX_ctrl(ctx, EVP_PKEY_RSA, \ + EVP_PKEY_OP_TYPE_SIG | EVP_PKEY_OP_TYPE_CRYPT, \ EVP_PKEY_CTRL_GET_RSA_MGF1_MD, 0, (void *)pmd) +# define EVP_PKEY_CTX_get_rsa_oaep_md(ctx, pmd) \ + EVP_PKEY_CTX_ctrl(ctx, EVP_PKEY_RSA, EVP_PKEY_OP_TYPE_CRYPT, \ + EVP_PKEY_CTRL_GET_RSA_OAEP_MD, 0, (void *)pmd) + +# define EVP_PKEY_CTX_set0_rsa_oaep_label(ctx, l, llen) \ + EVP_PKEY_CTX_ctrl(ctx, EVP_PKEY_RSA, EVP_PKEY_OP_TYPE_CRYPT, \ + EVP_PKEY_CTRL_RSA_OAEP_LABEL, llen, (void *)l) + +# define EVP_PKEY_CTX_get0_rsa_oaep_label(ctx, l) \ + EVP_PKEY_CTX_ctrl(ctx, EVP_PKEY_RSA, EVP_PKEY_OP_TYPE_CRYPT, \ + EVP_PKEY_CTRL_GET_RSA_OAEP_LABEL, 0, (void *)l) + # define EVP_PKEY_CTRL_RSA_PADDING (EVP_PKEY_ALG_CTRL + 1) # define EVP_PKEY_CTRL_RSA_PSS_SALTLEN (EVP_PKEY_ALG_CTRL + 2) @@ -280,6 +298,12 @@ struct rsa_st { # define EVP_PKEY_CTRL_GET_RSA_PSS_SALTLEN (EVP_PKEY_ALG_CTRL + 7) # define EVP_PKEY_CTRL_GET_RSA_MGF1_MD (EVP_PKEY_ALG_CTRL + 8) +# define EVP_PKEY_CTRL_RSA_OAEP_MD (EVP_PKEY_ALG_CTRL + 9) +# define EVP_PKEY_CTRL_RSA_OAEP_LABEL (EVP_PKEY_ALG_CTRL + 10) + +# define EVP_PKEY_CTRL_GET_RSA_OAEP_MD (EVP_PKEY_ALG_CTRL + 11) +# define EVP_PKEY_CTRL_GET_RSA_OAEP_LABEL (EVP_PKEY_ALG_CTRL + 12) + # define RSA_PKCS1_PADDING 1 # define RSA_SSLV23_PADDING 2 # define RSA_NO_PADDING 3 @@ -347,6 +371,14 @@ typedef struct rsa_pss_params_st { DECLARE_ASN1_FUNCTIONS(RSA_PSS_PARAMS) +typedef struct rsa_oaep_params_st { + X509_ALGOR *hashFunc; + X509_ALGOR *maskGenFunc; + X509_ALGOR *pSourceFunc; +} RSA_OAEP_PARAMS; + +DECLARE_ASN1_FUNCTIONS(RSA_OAEP_PARAMS) + # ifndef OPENSSL_NO_FP_API int RSA_print_fp(FILE *fp, const RSA *r, int offset); # endif @@ -414,6 +446,15 @@ int RSA_padding_add_PKCS1_OAEP(unsigned char *to, int tlen, int RSA_padding_check_PKCS1_OAEP(unsigned char *to, int tlen, const unsigned char *f, int fl, int rsa_len, const unsigned char *p, int pl); +int RSA_padding_add_PKCS1_OAEP_mgf1(unsigned char *to, int tlen, + const unsigned char *from, int flen, + const unsigned char *param, int plen, + const EVP_MD *md, const EVP_MD *mgf1md); +int RSA_padding_check_PKCS1_OAEP_mgf1(unsigned char *to, int tlen, + const unsigned char *from, int flen, + int num, const unsigned char *param, + int plen, const EVP_MD *md, + const EVP_MD *mgf1md); int RSA_padding_add_SSLv23(unsigned char *to, int tlen, const unsigned char *f, int fl); int RSA_padding_check_SSLv23(unsigned char *to, int tlen, @@ -494,8 +535,10 @@ void ERR_load_RSA_strings(void); # define RSA_F_PKEY_RSA_SIGN 142 # define RSA_F_PKEY_RSA_VERIFY 154 # define RSA_F_PKEY_RSA_VERIFYRECOVER 141 +# define RSA_F_RSA_ALGOR_TO_MD 157 # define RSA_F_RSA_BUILTIN_KEYGEN 129 # define RSA_F_RSA_CHECK_KEY 123 +# define RSA_F_RSA_CMS_DECRYPT 158 # define RSA_F_RSA_EAY_PRIVATE_DECRYPT 101 # define RSA_F_RSA_EAY_PRIVATE_ENCRYPT 102 # define RSA_F_RSA_EAY_PUBLIC_DECRYPT 103 @@ -504,6 +547,7 @@ void ERR_load_RSA_strings(void); # define RSA_F_RSA_GENERATE_KEY_EX 155 # define RSA_F_RSA_ITEM_VERIFY 156 # define RSA_F_RSA_MEMORY_LOCK 130 +# define RSA_F_RSA_MGF1_TO_MD 159 # define RSA_F_RSA_NEW_METHOD 106 # define RSA_F_RSA_NULL 124 # define RSA_F_RSA_NULL_MOD_EXP 131 @@ -513,6 +557,7 @@ void ERR_load_RSA_strings(void); # define RSA_F_RSA_NULL_PUBLIC_ENCRYPT 135 # define RSA_F_RSA_PADDING_ADD_NONE 107 # define RSA_F_RSA_PADDING_ADD_PKCS1_OAEP 121 +# define RSA_F_RSA_PADDING_ADD_PKCS1_OAEP_MGF1 160 # define RSA_F_RSA_PADDING_ADD_PKCS1_PSS 125 # define RSA_F_RSA_PADDING_ADD_PKCS1_PSS_MGF1 148 # define RSA_F_RSA_PADDING_ADD_PKCS1_TYPE_1 108 @@ -521,6 +566,7 @@ void ERR_load_RSA_strings(void); # define RSA_F_RSA_PADDING_ADD_X931 127 # define RSA_F_RSA_PADDING_CHECK_NONE 111 # define RSA_F_RSA_PADDING_CHECK_PKCS1_OAEP 122 +# define RSA_F_RSA_PADDING_CHECK_PKCS1_OAEP_MGF1 161 # define RSA_F_RSA_PADDING_CHECK_PKCS1_TYPE_1 112 # define RSA_F_RSA_PADDING_CHECK_PKCS1_TYPE_2 113 # define RSA_F_RSA_PADDING_CHECK_SSLV23 114 @@ -531,6 +577,7 @@ void ERR_load_RSA_strings(void); # define RSA_F_RSA_PRIVATE_ENCRYPT 151 # define RSA_F_RSA_PRIV_DECODE 137 # define RSA_F_RSA_PRIV_ENCODE 138 +# define RSA_F_RSA_PSS_TO_CTX 162 # define RSA_F_RSA_PUBLIC_DECRYPT 152 # define RSA_F_RSA_PUBLIC_ENCRYPT 153 # define RSA_F_RSA_PUB_DECODE 139 @@ -556,17 +603,21 @@ void ERR_load_RSA_strings(void); # define RSA_R_DATA_TOO_LARGE_FOR_MODULUS 132 # define RSA_R_DATA_TOO_SMALL 111 # define RSA_R_DATA_TOO_SMALL_FOR_KEY_SIZE 122 +# define RSA_R_DIGEST_DOES_NOT_MATCH 166 # define RSA_R_DIGEST_TOO_BIG_FOR_RSA_KEY 112 # define RSA_R_DMP1_NOT_CONGRUENT_TO_D 124 # define RSA_R_DMQ1_NOT_CONGRUENT_TO_D 125 # define RSA_R_D_E_NOT_CONGRUENT_TO_1 123 # define RSA_R_FIRST_OCTET_INVALID 133 # define RSA_R_ILLEGAL_OR_UNSUPPORTED_PADDING_MODE 144 +# define RSA_R_INVALID_DIGEST 160 # define RSA_R_INVALID_DIGEST_LENGTH 143 # define RSA_R_INVALID_HEADER 137 # define RSA_R_INVALID_KEYBITS 145 +# define RSA_R_INVALID_LABEL 161 # define RSA_R_INVALID_MESSAGE_LENGTH 131 # define RSA_R_INVALID_MGF1_MD 156 +# define RSA_R_INVALID_OAEP_PARAMETERS 162 # define RSA_R_INVALID_PADDING 138 # define RSA_R_INVALID_PADDING_MODE 141 # define RSA_R_INVALID_PSS_PARAMETERS 149 @@ -595,9 +646,12 @@ void ERR_load_RSA_strings(void); # define RSA_R_SSLV3_ROLLBACK_ATTACK 115 # define RSA_R_THE_ASN1_OBJECT_IDENTIFIER_IS_NOT_KNOWN_FOR_THIS_MD 116 # define RSA_R_UNKNOWN_ALGORITHM_TYPE 117 +# define RSA_R_UNKNOWN_DIGEST 163 # define RSA_R_UNKNOWN_MASK_DIGEST 151 # define RSA_R_UNKNOWN_PADDING_TYPE 118 # define RSA_R_UNKNOWN_PSS_DIGEST 152 +# define RSA_R_UNSUPPORTED_ENCRYPTION_TYPE 164 +# define RSA_R_UNSUPPORTED_LABEL_SOURCE 165 # define RSA_R_UNSUPPORTED_MASK_ALGORITHM 153 # define RSA_R_UNSUPPORTED_MASK_PARAMETER 154 # define RSA_R_UNSUPPORTED_SIGNATURE_TYPE 155 diff --git a/deps/openssl/openssl/crypto/rsa/rsa_ameth.c b/deps/openssl/openssl/crypto/rsa/rsa_ameth.c index 93e071de75e2a8..ca3922e6c29831 100644 --- a/deps/openssl/openssl/crypto/rsa/rsa_ameth.c +++ b/deps/openssl/openssl/crypto/rsa/rsa_ameth.c @@ -68,6 +68,11 @@ #endif #include "asn1_locl.h" +static int rsa_cms_sign(CMS_SignerInfo *si); +static int rsa_cms_verify(CMS_SignerInfo *si); +static int rsa_cms_decrypt(CMS_RecipientInfo *ri); +static int rsa_cms_encrypt(CMS_RecipientInfo *ri); + static int rsa_pub_encode(X509_PUBKEY *pk, const EVP_PKEY *pkey) { unsigned char *penc = NULL; @@ -258,6 +263,23 @@ static int rsa_priv_print(BIO *bp, const EVP_PKEY *pkey, int indent, return do_rsa_print(bp, pkey->pkey.rsa, indent, 1); } +/* Given an MGF1 Algorithm ID decode to an Algorithm Identifier */ +static X509_ALGOR *rsa_mgf1_decode(X509_ALGOR *alg) +{ + const unsigned char *p; + int plen; + if (alg == NULL) + return NULL; + if (OBJ_obj2nid(alg->algorithm) != NID_mgf1) + return NULL; + if (alg->parameter->type != V_ASN1_SEQUENCE) + return NULL; + + p = alg->parameter->value.sequence->data; + plen = alg->parameter->value.sequence->length; + return d2i_X509_ALGOR(NULL, &p, plen); +} + static RSA_PSS_PARAMS *rsa_pss_decode(const X509_ALGOR *alg, X509_ALGOR **pmaskHash) { @@ -276,15 +298,7 @@ static RSA_PSS_PARAMS *rsa_pss_decode(const X509_ALGOR *alg, if (!pss) return NULL; - if (pss->maskGenAlgorithm) { - ASN1_TYPE *param = pss->maskGenAlgorithm->parameter; - if (OBJ_obj2nid(pss->maskGenAlgorithm->algorithm) == NID_mgf1 - && param->type == V_ASN1_SEQUENCE) { - p = param->value.sequence->data; - plen = param->value.sequence->length; - *pmaskHash = d2i_X509_ALGOR(NULL, &p, plen); - } - } + *pmaskHash = rsa_mgf1_decode(pss->maskGenAlgorithm); return pss; } @@ -401,17 +415,25 @@ static int rsa_pkey_ctrl(EVP_PKEY *pkey, int op, long arg1, void *arg2) #ifndef OPENSSL_NO_CMS case ASN1_PKEY_CTRL_CMS_SIGN: if (arg1 == 0) - CMS_SignerInfo_get0_algs(arg2, NULL, NULL, NULL, &alg); + return rsa_cms_sign(arg2); + else if (arg1 == 1) + return rsa_cms_verify(arg2); break; case ASN1_PKEY_CTRL_CMS_ENVELOPE: if (arg1 == 0) - CMS_RecipientInfo_ktri_get0_algs(arg2, NULL, NULL, &alg); + return rsa_cms_encrypt(arg2); + else if (arg1 == 1) + return rsa_cms_decrypt(arg2); break; + + case ASN1_PKEY_CTRL_CMS_RI_TYPE: + *(int *)arg2 = CMS_RECIPINFO_TRANS; + return 1; #endif case ASN1_PKEY_CTRL_DEFAULT_MD_NID: - *(int *)arg2 = NID_sha1; + *(int *)arg2 = NID_sha256; return 1; default: @@ -426,59 +448,166 @@ static int rsa_pkey_ctrl(EVP_PKEY *pkey, int op, long arg1, void *arg2) } +/* allocate and set algorithm ID from EVP_MD, default SHA1 */ +static int rsa_md_to_algor(X509_ALGOR **palg, const EVP_MD *md) +{ + if (EVP_MD_type(md) == NID_sha1) + return 1; + *palg = X509_ALGOR_new(); + if (!*palg) + return 0; + X509_ALGOR_set_md(*palg, md); + return 1; +} + +/* Allocate and set MGF1 algorithm ID from EVP_MD */ +static int rsa_md_to_mgf1(X509_ALGOR **palg, const EVP_MD *mgf1md) +{ + X509_ALGOR *algtmp = NULL; + ASN1_STRING *stmp = NULL; + *palg = NULL; + if (EVP_MD_type(mgf1md) == NID_sha1) + return 1; + /* need to embed algorithm ID inside another */ + if (!rsa_md_to_algor(&algtmp, mgf1md)) + goto err; + if (!ASN1_item_pack(algtmp, ASN1_ITEM_rptr(X509_ALGOR), &stmp)) + goto err; + *palg = X509_ALGOR_new(); + if (!*palg) + goto err; + X509_ALGOR_set0(*palg, OBJ_nid2obj(NID_mgf1), V_ASN1_SEQUENCE, stmp); + stmp = NULL; + err: + if (stmp) + ASN1_STRING_free(stmp); + if (algtmp) + X509_ALGOR_free(algtmp); + if (*palg) + return 1; + return 0; +} + +/* convert algorithm ID to EVP_MD, default SHA1 */ +static const EVP_MD *rsa_algor_to_md(X509_ALGOR *alg) +{ + const EVP_MD *md; + if (!alg) + return EVP_sha1(); + md = EVP_get_digestbyobj(alg->algorithm); + if (md == NULL) + RSAerr(RSA_F_RSA_ALGOR_TO_MD, RSA_R_UNKNOWN_DIGEST); + return md; +} + +/* convert MGF1 algorithm ID to EVP_MD, default SHA1 */ +static const EVP_MD *rsa_mgf1_to_md(X509_ALGOR *alg, X509_ALGOR *maskHash) +{ + const EVP_MD *md; + if (!alg) + return EVP_sha1(); + /* Check mask and lookup mask hash algorithm */ + if (OBJ_obj2nid(alg->algorithm) != NID_mgf1) { + RSAerr(RSA_F_RSA_MGF1_TO_MD, RSA_R_UNSUPPORTED_MASK_ALGORITHM); + return NULL; + } + if (!maskHash) { + RSAerr(RSA_F_RSA_MGF1_TO_MD, RSA_R_UNSUPPORTED_MASK_PARAMETER); + return NULL; + } + md = EVP_get_digestbyobj(maskHash->algorithm); + if (md == NULL) { + RSAerr(RSA_F_RSA_MGF1_TO_MD, RSA_R_UNKNOWN_MASK_DIGEST); + return NULL; + } + return md; +} + /* - * Customised RSA item verification routine. This is called when a signature - * is encountered requiring special handling. We currently only handle PSS. + * Convert EVP_PKEY_CTX is PSS mode into corresponding algorithm parameter, + * suitable for setting an AlgorithmIdentifier. */ -static int rsa_item_verify(EVP_MD_CTX *ctx, const ASN1_ITEM *it, void *asn, - X509_ALGOR *sigalg, ASN1_BIT_STRING *sig, - EVP_PKEY *pkey) +static ASN1_STRING *rsa_ctx_to_pss(EVP_PKEY_CTX *pkctx) +{ + const EVP_MD *sigmd, *mgf1md; + RSA_PSS_PARAMS *pss = NULL; + ASN1_STRING *os = NULL; + EVP_PKEY *pk = EVP_PKEY_CTX_get0_pkey(pkctx); + int saltlen, rv = 0; + if (EVP_PKEY_CTX_get_signature_md(pkctx, &sigmd) <= 0) + goto err; + if (EVP_PKEY_CTX_get_rsa_mgf1_md(pkctx, &mgf1md) <= 0) + goto err; + if (!EVP_PKEY_CTX_get_rsa_pss_saltlen(pkctx, &saltlen)) + goto err; + if (saltlen == -1) + saltlen = EVP_MD_size(sigmd); + else if (saltlen == -2) { + saltlen = EVP_PKEY_size(pk) - EVP_MD_size(sigmd) - 2; + if (((EVP_PKEY_bits(pk) - 1) & 0x7) == 0) + saltlen--; + } + pss = RSA_PSS_PARAMS_new(); + if (!pss) + goto err; + if (saltlen != 20) { + pss->saltLength = ASN1_INTEGER_new(); + if (!pss->saltLength) + goto err; + if (!ASN1_INTEGER_set(pss->saltLength, saltlen)) + goto err; + } + if (!rsa_md_to_algor(&pss->hashAlgorithm, sigmd)) + goto err; + if (!rsa_md_to_mgf1(&pss->maskGenAlgorithm, mgf1md)) + goto err; + /* Finally create string with pss parameter encoding. */ + if (!ASN1_item_pack(pss, ASN1_ITEM_rptr(RSA_PSS_PARAMS), &os)) + goto err; + rv = 1; + err: + if (pss) + RSA_PSS_PARAMS_free(pss); + if (rv) + return os; + if (os) + ASN1_STRING_free(os); + return NULL; +} + +/* + * From PSS AlgorithmIdentifier set public key parameters. If pkey isn't NULL + * then the EVP_MD_CTX is setup and initalised. If it is NULL parameters are + * passed to pkctx instead. + */ + +static int rsa_pss_to_ctx(EVP_MD_CTX *ctx, EVP_PKEY_CTX *pkctx, + X509_ALGOR *sigalg, EVP_PKEY *pkey) { int rv = -1; int saltlen; const EVP_MD *mgf1md = NULL, *md = NULL; RSA_PSS_PARAMS *pss; X509_ALGOR *maskHash; - EVP_PKEY_CTX *pkctx; /* Sanity check: make sure it is PSS */ if (OBJ_obj2nid(sigalg->algorithm) != NID_rsassaPss) { - RSAerr(RSA_F_RSA_ITEM_VERIFY, RSA_R_UNSUPPORTED_SIGNATURE_TYPE); + RSAerr(RSA_F_RSA_PSS_TO_CTX, RSA_R_UNSUPPORTED_SIGNATURE_TYPE); return -1; } /* Decode PSS parameters */ pss = rsa_pss_decode(sigalg, &maskHash); if (pss == NULL) { - RSAerr(RSA_F_RSA_ITEM_VERIFY, RSA_R_INVALID_PSS_PARAMETERS); + RSAerr(RSA_F_RSA_PSS_TO_CTX, RSA_R_INVALID_PSS_PARAMETERS); goto err; } - /* Check mask and lookup mask hash algorithm */ - if (pss->maskGenAlgorithm) { - if (OBJ_obj2nid(pss->maskGenAlgorithm->algorithm) != NID_mgf1) { - RSAerr(RSA_F_RSA_ITEM_VERIFY, RSA_R_UNSUPPORTED_MASK_ALGORITHM); - goto err; - } - if (!maskHash) { - RSAerr(RSA_F_RSA_ITEM_VERIFY, RSA_R_UNSUPPORTED_MASK_PARAMETER); - goto err; - } - mgf1md = EVP_get_digestbyobj(maskHash->algorithm); - if (mgf1md == NULL) { - RSAerr(RSA_F_RSA_ITEM_VERIFY, RSA_R_UNKNOWN_MASK_DIGEST); - goto err; - } - } else - mgf1md = EVP_sha1(); - - if (pss->hashAlgorithm) { - md = EVP_get_digestbyobj(pss->hashAlgorithm->algorithm); - if (md == NULL) { - RSAerr(RSA_F_RSA_ITEM_VERIFY, RSA_R_UNKNOWN_PSS_DIGEST); - goto err; - } - } else - md = EVP_sha1(); + mgf1md = rsa_mgf1_to_md(pss->maskGenAlgorithm, maskHash); + if (!mgf1md) + goto err; + md = rsa_algor_to_md(pss->hashAlgorithm); + if (!md) + goto err; if (pss->saltLength) { saltlen = ASN1_INTEGER_get(pss->saltLength); @@ -488,7 +617,7 @@ static int rsa_item_verify(EVP_MD_CTX *ctx, const ASN1_ITEM *it, void *asn, * routines will trap other invalid values anyway. */ if (saltlen < 0) { - RSAerr(RSA_F_RSA_ITEM_VERIFY, RSA_R_INVALID_SALT_LENGTH); + RSAerr(RSA_F_RSA_PSS_TO_CTX, RSA_R_INVALID_SALT_LENGTH); goto err; } } else @@ -499,14 +628,24 @@ static int rsa_item_verify(EVP_MD_CTX *ctx, const ASN1_ITEM *it, void *asn, * PKCS#1 says we should reject any other value anyway. */ if (pss->trailerField && ASN1_INTEGER_get(pss->trailerField) != 1) { - RSAerr(RSA_F_RSA_ITEM_VERIFY, RSA_R_INVALID_TRAILER); + RSAerr(RSA_F_RSA_PSS_TO_CTX, RSA_R_INVALID_TRAILER); goto err; } /* We have all parameters now set up context */ - if (!EVP_DigestVerifyInit(ctx, &pkctx, md, NULL, pkey)) - goto err; + if (pkey) { + if (!EVP_DigestVerifyInit(ctx, &pkctx, md, NULL, pkey)) + goto err; + } else { + const EVP_MD *checkmd; + if (EVP_PKEY_CTX_get_signature_md(pkctx, &checkmd) <= 0) + goto err; + if (EVP_MD_type(md) != EVP_MD_type(checkmd)) { + RSAerr(RSA_F_RSA_PSS_TO_CTX, RSA_R_DIGEST_DOES_NOT_MATCH); + goto err; + } + } if (EVP_PKEY_CTX_set_rsa_padding(pkctx, RSA_PKCS1_PSS_PADDING) <= 0) goto err; @@ -517,7 +656,7 @@ static int rsa_item_verify(EVP_MD_CTX *ctx, const ASN1_ITEM *it, void *asn, if (EVP_PKEY_CTX_set_rsa_mgf1_md(pkctx, mgf1md) <= 0) goto err; /* Carry on */ - rv = 2; + rv = 1; err: RSA_PSS_PARAMS_free(pss); @@ -526,6 +665,71 @@ static int rsa_item_verify(EVP_MD_CTX *ctx, const ASN1_ITEM *it, void *asn, return rv; } +static int rsa_cms_verify(CMS_SignerInfo *si) +{ + int nid, nid2; + X509_ALGOR *alg; + EVP_PKEY_CTX *pkctx = CMS_SignerInfo_get0_pkey_ctx(si); + CMS_SignerInfo_get0_algs(si, NULL, NULL, NULL, &alg); + nid = OBJ_obj2nid(alg->algorithm); + if (nid == NID_rsaEncryption) + return 1; + if (nid == NID_rsassaPss) + return rsa_pss_to_ctx(NULL, pkctx, alg, NULL); + /* Workaround for some implementation that use a signature OID */ + if (OBJ_find_sigid_algs(nid, NULL, &nid2)) { + if (nid2 == NID_rsaEncryption) + return 1; + } + return 0; +} + +/* + * Customised RSA item verification routine. This is called when a signature + * is encountered requiring special handling. We currently only handle PSS. + */ + +static int rsa_item_verify(EVP_MD_CTX *ctx, const ASN1_ITEM *it, void *asn, + X509_ALGOR *sigalg, ASN1_BIT_STRING *sig, + EVP_PKEY *pkey) +{ + /* Sanity check: make sure it is PSS */ + if (OBJ_obj2nid(sigalg->algorithm) != NID_rsassaPss) { + RSAerr(RSA_F_RSA_ITEM_VERIFY, RSA_R_UNSUPPORTED_SIGNATURE_TYPE); + return -1; + } + if (rsa_pss_to_ctx(ctx, NULL, sigalg, pkey) > 0) { + /* Carry on */ + return 2; + } + return -1; +} + +static int rsa_cms_sign(CMS_SignerInfo *si) +{ + int pad_mode = RSA_PKCS1_PADDING; + X509_ALGOR *alg; + EVP_PKEY_CTX *pkctx = CMS_SignerInfo_get0_pkey_ctx(si); + ASN1_STRING *os = NULL; + CMS_SignerInfo_get0_algs(si, NULL, NULL, NULL, &alg); + if (pkctx) { + if (EVP_PKEY_CTX_get_rsa_padding(pkctx, &pad_mode) <= 0) + return 0; + } + if (pad_mode == RSA_PKCS1_PADDING) { + X509_ALGOR_set0(alg, OBJ_nid2obj(NID_rsaEncryption), V_ASN1_NULL, 0); + return 1; + } + /* We don't support it */ + if (pad_mode != RSA_PKCS1_PSS_PADDING) + return 0; + os = rsa_ctx_to_pss(pkctx); + if (!os) + return 0; + X509_ALGOR_set0(alg, OBJ_nid2obj(NID_rsassaPss), V_ASN1_SEQUENCE, os); + return 1; +} + static int rsa_item_sign(EVP_MD_CTX *ctx, const ASN1_ITEM *it, void *asn, X509_ALGOR *alg1, X509_ALGOR *alg2, ASN1_BIT_STRING *sig) @@ -537,78 +741,184 @@ static int rsa_item_sign(EVP_MD_CTX *ctx, const ASN1_ITEM *it, void *asn, if (pad_mode == RSA_PKCS1_PADDING) return 2; if (pad_mode == RSA_PKCS1_PSS_PADDING) { - const EVP_MD *sigmd, *mgf1md; - RSA_PSS_PARAMS *pss = NULL; - X509_ALGOR *mgf1alg = NULL; - ASN1_STRING *os1 = NULL, *os2 = NULL; - EVP_PKEY *pk = EVP_PKEY_CTX_get0_pkey(pkctx); - int saltlen, rv = 0; - sigmd = EVP_MD_CTX_md(ctx); - if (EVP_PKEY_CTX_get_rsa_mgf1_md(pkctx, &mgf1md) <= 0) - goto err; - if (!EVP_PKEY_CTX_get_rsa_pss_saltlen(pkctx, &saltlen)) - goto err; - if (saltlen == -1) - saltlen = EVP_MD_size(sigmd); - else if (saltlen == -2) { - saltlen = EVP_PKEY_size(pk) - EVP_MD_size(sigmd) - 2; - if (((EVP_PKEY_bits(pk) - 1) & 0x7) == 0) - saltlen--; - } - pss = RSA_PSS_PARAMS_new(); - if (!pss) - goto err; - if (saltlen != 20) { - pss->saltLength = ASN1_INTEGER_new(); - if (!pss->saltLength) - goto err; - if (!ASN1_INTEGER_set(pss->saltLength, saltlen)) - goto err; - } - if (EVP_MD_type(sigmd) != NID_sha1) { - pss->hashAlgorithm = X509_ALGOR_new(); - if (!pss->hashAlgorithm) - goto err; - X509_ALGOR_set_md(pss->hashAlgorithm, sigmd); - } - if (EVP_MD_type(mgf1md) != NID_sha1) { - ASN1_STRING *stmp = NULL; - /* need to embed algorithm ID inside another */ - mgf1alg = X509_ALGOR_new(); - X509_ALGOR_set_md(mgf1alg, mgf1md); - if (!ASN1_item_pack(mgf1alg, ASN1_ITEM_rptr(X509_ALGOR), &stmp)) - goto err; - pss->maskGenAlgorithm = X509_ALGOR_new(); - if (!pss->maskGenAlgorithm) - goto err; - X509_ALGOR_set0(pss->maskGenAlgorithm, - OBJ_nid2obj(NID_mgf1), V_ASN1_SEQUENCE, stmp); - } - /* Finally create string with pss parameter encoding. */ - if (!ASN1_item_pack(pss, ASN1_ITEM_rptr(RSA_PSS_PARAMS), &os1)) - goto err; + ASN1_STRING *os1 = NULL; + os1 = rsa_ctx_to_pss(pkctx); + if (!os1) + return 0; + /* Duplicate parameters if we have to */ if (alg2) { - os2 = ASN1_STRING_dup(os1); - if (!os2) - goto err; + ASN1_STRING *os2 = ASN1_STRING_dup(os1); + if (!os2) { + ASN1_STRING_free(os1); + return 0; + } X509_ALGOR_set0(alg2, OBJ_nid2obj(NID_rsassaPss), V_ASN1_SEQUENCE, os2); } X509_ALGOR_set0(alg1, OBJ_nid2obj(NID_rsassaPss), V_ASN1_SEQUENCE, os1); - os1 = os2 = NULL; - rv = 3; + return 3; + } + return 2; +} + +static RSA_OAEP_PARAMS *rsa_oaep_decode(const X509_ALGOR *alg, + X509_ALGOR **pmaskHash) +{ + const unsigned char *p; + int plen; + RSA_OAEP_PARAMS *pss; + + *pmaskHash = NULL; + + if (!alg->parameter || alg->parameter->type != V_ASN1_SEQUENCE) + return NULL; + p = alg->parameter->value.sequence->data; + plen = alg->parameter->value.sequence->length; + pss = d2i_RSA_OAEP_PARAMS(NULL, &p, plen); + + if (!pss) + return NULL; + + *pmaskHash = rsa_mgf1_decode(pss->maskGenFunc); + + return pss; +} + +static int rsa_cms_decrypt(CMS_RecipientInfo *ri) +{ + EVP_PKEY_CTX *pkctx; + X509_ALGOR *cmsalg; + int nid; + int rv = -1; + unsigned char *label = NULL; + int labellen = 0; + const EVP_MD *mgf1md = NULL, *md = NULL; + RSA_OAEP_PARAMS *oaep; + X509_ALGOR *maskHash; + pkctx = CMS_RecipientInfo_get0_pkey_ctx(ri); + if (!pkctx) + return 0; + if (!CMS_RecipientInfo_ktri_get0_algs(ri, NULL, NULL, &cmsalg)) + return -1; + nid = OBJ_obj2nid(cmsalg->algorithm); + if (nid == NID_rsaEncryption) + return 1; + if (nid != NID_rsaesOaep) { + RSAerr(RSA_F_RSA_CMS_DECRYPT, RSA_R_UNSUPPORTED_ENCRYPTION_TYPE); + return -1; + } + /* Decode OAEP parameters */ + oaep = rsa_oaep_decode(cmsalg, &maskHash); + + if (oaep == NULL) { + RSAerr(RSA_F_RSA_CMS_DECRYPT, RSA_R_INVALID_OAEP_PARAMETERS); + goto err; + } + + mgf1md = rsa_mgf1_to_md(oaep->maskGenFunc, maskHash); + if (!mgf1md) + goto err; + md = rsa_algor_to_md(oaep->hashFunc); + if (!md) + goto err; + + if (oaep->pSourceFunc) { + X509_ALGOR *plab = oaep->pSourceFunc; + if (OBJ_obj2nid(plab->algorithm) != NID_pSpecified) { + RSAerr(RSA_F_RSA_CMS_DECRYPT, RSA_R_UNSUPPORTED_LABEL_SOURCE); + goto err; + } + if (plab->parameter->type != V_ASN1_OCTET_STRING) { + RSAerr(RSA_F_RSA_CMS_DECRYPT, RSA_R_INVALID_LABEL); + goto err; + } + + label = plab->parameter->value.octet_string->data; + /* Stop label being freed when OAEP parameters are freed */ + plab->parameter->value.octet_string->data = NULL; + labellen = plab->parameter->value.octet_string->length; + } + + if (EVP_PKEY_CTX_set_rsa_padding(pkctx, RSA_PKCS1_OAEP_PADDING) <= 0) + goto err; + if (EVP_PKEY_CTX_set_rsa_oaep_md(pkctx, md) <= 0) + goto err; + if (EVP_PKEY_CTX_set_rsa_mgf1_md(pkctx, mgf1md) <= 0) + goto err; + if (EVP_PKEY_CTX_set0_rsa_oaep_label(pkctx, label, labellen) <= 0) + goto err; + /* Carry on */ + rv = 1; + err: - if (mgf1alg) - X509_ALGOR_free(mgf1alg); - if (pss) - RSA_PSS_PARAMS_free(pss); - if (os1) - ASN1_STRING_free(os1); - return rv; + RSA_OAEP_PARAMS_free(oaep); + if (maskHash) + X509_ALGOR_free(maskHash); + return rv; +} +static int rsa_cms_encrypt(CMS_RecipientInfo *ri) +{ + const EVP_MD *md, *mgf1md; + RSA_OAEP_PARAMS *oaep = NULL; + ASN1_STRING *os = NULL; + X509_ALGOR *alg; + EVP_PKEY_CTX *pkctx = CMS_RecipientInfo_get0_pkey_ctx(ri); + int pad_mode = RSA_PKCS1_PADDING, rv = 0, labellen; + unsigned char *label; + CMS_RecipientInfo_ktri_get0_algs(ri, NULL, NULL, &alg); + if (pkctx) { + if (EVP_PKEY_CTX_get_rsa_padding(pkctx, &pad_mode) <= 0) + return 0; } - return 2; + if (pad_mode == RSA_PKCS1_PADDING) { + X509_ALGOR_set0(alg, OBJ_nid2obj(NID_rsaEncryption), V_ASN1_NULL, 0); + return 1; + } + /* Not supported */ + if (pad_mode != RSA_PKCS1_OAEP_PADDING) + return 0; + if (EVP_PKEY_CTX_get_rsa_oaep_md(pkctx, &md) <= 0) + goto err; + if (EVP_PKEY_CTX_get_rsa_mgf1_md(pkctx, &mgf1md) <= 0) + goto err; + labellen = EVP_PKEY_CTX_get0_rsa_oaep_label(pkctx, &label); + if (labellen < 0) + goto err; + oaep = RSA_OAEP_PARAMS_new(); + if (!oaep) + goto err; + if (!rsa_md_to_algor(&oaep->hashFunc, md)) + goto err; + if (!rsa_md_to_mgf1(&oaep->maskGenFunc, mgf1md)) + goto err; + if (labellen > 0) { + ASN1_OCTET_STRING *los = ASN1_OCTET_STRING_new(); + oaep->pSourceFunc = X509_ALGOR_new(); + if (!oaep->pSourceFunc) + goto err; + if (!los) + goto err; + if (!ASN1_OCTET_STRING_set(los, label, labellen)) { + ASN1_OCTET_STRING_free(los); + goto err; + } + X509_ALGOR_set0(oaep->pSourceFunc, OBJ_nid2obj(NID_pSpecified), + V_ASN1_OCTET_STRING, los); + } + /* create string with pss parameter encoding. */ + if (!ASN1_item_pack(oaep, ASN1_ITEM_rptr(RSA_OAEP_PARAMS), &os)) + goto err; + X509_ALGOR_set0(alg, OBJ_nid2obj(NID_rsaesOaep), V_ASN1_SEQUENCE, os); + os = NULL; + rv = 1; + err: + if (oaep) + RSA_OAEP_PARAMS_free(oaep); + if (os) + ASN1_STRING_free(os); + return rv; } const EVP_PKEY_ASN1_METHOD rsa_asn1_meths[] = { diff --git a/deps/openssl/openssl/crypto/rsa/rsa_asn1.c b/deps/openssl/openssl/crypto/rsa/rsa_asn1.c index 3d82c1d0c71557..aff8b583fa72b7 100644 --- a/deps/openssl/openssl/crypto/rsa/rsa_asn1.c +++ b/deps/openssl/openssl/crypto/rsa/rsa_asn1.c @@ -108,6 +108,14 @@ ASN1_SEQUENCE(RSA_PSS_PARAMS) = { IMPLEMENT_ASN1_FUNCTIONS(RSA_PSS_PARAMS) +ASN1_SEQUENCE(RSA_OAEP_PARAMS) = { + ASN1_EXP_OPT(RSA_OAEP_PARAMS, hashFunc, X509_ALGOR, 0), + ASN1_EXP_OPT(RSA_OAEP_PARAMS, maskGenFunc, X509_ALGOR, 1), + ASN1_EXP_OPT(RSA_OAEP_PARAMS, pSourceFunc, X509_ALGOR, 2), +} ASN1_SEQUENCE_END(RSA_OAEP_PARAMS) + +IMPLEMENT_ASN1_FUNCTIONS(RSA_OAEP_PARAMS) + IMPLEMENT_ASN1_ENCODE_FUNCTIONS_const_fname(RSA, RSAPrivateKey, RSAPrivateKey) IMPLEMENT_ASN1_ENCODE_FUNCTIONS_const_fname(RSA, RSAPublicKey, RSAPublicKey) diff --git a/deps/openssl/openssl/crypto/rsa/rsa_err.c b/deps/openssl/openssl/crypto/rsa/rsa_err.c index 25b3fa743d418f..0bab05efcfca52 100644 --- a/deps/openssl/openssl/crypto/rsa/rsa_err.c +++ b/deps/openssl/openssl/crypto/rsa/rsa_err.c @@ -1,6 +1,6 @@ /* crypto/rsa/rsa_err.c */ /* ==================================================================== - * Copyright (c) 1999-2011 The OpenSSL Project. All rights reserved. + * Copyright (c) 1999-2014 The OpenSSL Project. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -80,8 +80,10 @@ static ERR_STRING_DATA RSA_str_functs[] = { {ERR_FUNC(RSA_F_PKEY_RSA_SIGN), "PKEY_RSA_SIGN"}, {ERR_FUNC(RSA_F_PKEY_RSA_VERIFY), "PKEY_RSA_VERIFY"}, {ERR_FUNC(RSA_F_PKEY_RSA_VERIFYRECOVER), "PKEY_RSA_VERIFYRECOVER"}, + {ERR_FUNC(RSA_F_RSA_ALGOR_TO_MD), "RSA_ALGOR_TO_MD"}, {ERR_FUNC(RSA_F_RSA_BUILTIN_KEYGEN), "RSA_BUILTIN_KEYGEN"}, {ERR_FUNC(RSA_F_RSA_CHECK_KEY), "RSA_check_key"}, + {ERR_FUNC(RSA_F_RSA_CMS_DECRYPT), "RSA_CMS_DECRYPT"}, {ERR_FUNC(RSA_F_RSA_EAY_PRIVATE_DECRYPT), "RSA_EAY_PRIVATE_DECRYPT"}, {ERR_FUNC(RSA_F_RSA_EAY_PRIVATE_ENCRYPT), "RSA_EAY_PRIVATE_ENCRYPT"}, {ERR_FUNC(RSA_F_RSA_EAY_PUBLIC_DECRYPT), "RSA_EAY_PUBLIC_DECRYPT"}, @@ -90,6 +92,7 @@ static ERR_STRING_DATA RSA_str_functs[] = { {ERR_FUNC(RSA_F_RSA_GENERATE_KEY_EX), "RSA_generate_key_ex"}, {ERR_FUNC(RSA_F_RSA_ITEM_VERIFY), "RSA_ITEM_VERIFY"}, {ERR_FUNC(RSA_F_RSA_MEMORY_LOCK), "RSA_memory_lock"}, + {ERR_FUNC(RSA_F_RSA_MGF1_TO_MD), "RSA_MGF1_TO_MD"}, {ERR_FUNC(RSA_F_RSA_NEW_METHOD), "RSA_new_method"}, {ERR_FUNC(RSA_F_RSA_NULL), "RSA_NULL"}, {ERR_FUNC(RSA_F_RSA_NULL_MOD_EXP), "RSA_NULL_MOD_EXP"}, @@ -100,6 +103,8 @@ static ERR_STRING_DATA RSA_str_functs[] = { {ERR_FUNC(RSA_F_RSA_PADDING_ADD_NONE), "RSA_padding_add_none"}, {ERR_FUNC(RSA_F_RSA_PADDING_ADD_PKCS1_OAEP), "RSA_padding_add_PKCS1_OAEP"}, + {ERR_FUNC(RSA_F_RSA_PADDING_ADD_PKCS1_OAEP_MGF1), + "RSA_padding_add_PKCS1_OAEP_mgf1"}, {ERR_FUNC(RSA_F_RSA_PADDING_ADD_PKCS1_PSS), "RSA_padding_add_PKCS1_PSS"}, {ERR_FUNC(RSA_F_RSA_PADDING_ADD_PKCS1_PSS_MGF1), "RSA_padding_add_PKCS1_PSS_mgf1"}, @@ -112,6 +117,8 @@ static ERR_STRING_DATA RSA_str_functs[] = { {ERR_FUNC(RSA_F_RSA_PADDING_CHECK_NONE), "RSA_padding_check_none"}, {ERR_FUNC(RSA_F_RSA_PADDING_CHECK_PKCS1_OAEP), "RSA_padding_check_PKCS1_OAEP"}, + {ERR_FUNC(RSA_F_RSA_PADDING_CHECK_PKCS1_OAEP_MGF1), + "RSA_padding_check_PKCS1_OAEP_mgf1"}, {ERR_FUNC(RSA_F_RSA_PADDING_CHECK_PKCS1_TYPE_1), "RSA_padding_check_PKCS1_type_1"}, {ERR_FUNC(RSA_F_RSA_PADDING_CHECK_PKCS1_TYPE_2), @@ -124,6 +131,7 @@ static ERR_STRING_DATA RSA_str_functs[] = { {ERR_FUNC(RSA_F_RSA_PRIVATE_ENCRYPT), "RSA_private_encrypt"}, {ERR_FUNC(RSA_F_RSA_PRIV_DECODE), "RSA_PRIV_DECODE"}, {ERR_FUNC(RSA_F_RSA_PRIV_ENCODE), "RSA_PRIV_ENCODE"}, + {ERR_FUNC(RSA_F_RSA_PSS_TO_CTX), "RSA_PSS_TO_CTX"}, {ERR_FUNC(RSA_F_RSA_PUBLIC_DECRYPT), "RSA_public_decrypt"}, {ERR_FUNC(RSA_F_RSA_PUBLIC_ENCRYPT), "RSA_public_encrypt"}, {ERR_FUNC(RSA_F_RSA_PUB_DECODE), "RSA_PUB_DECODE"}, @@ -157,6 +165,7 @@ static ERR_STRING_DATA RSA_str_reasons[] = { {ERR_REASON(RSA_R_DATA_TOO_SMALL), "data too small"}, {ERR_REASON(RSA_R_DATA_TOO_SMALL_FOR_KEY_SIZE), "data too small for key size"}, + {ERR_REASON(RSA_R_DIGEST_DOES_NOT_MATCH), "digest does not match"}, {ERR_REASON(RSA_R_DIGEST_TOO_BIG_FOR_RSA_KEY), "digest too big for rsa key"}, {ERR_REASON(RSA_R_DMP1_NOT_CONGRUENT_TO_D), "dmp1 not congruent to d"}, @@ -165,11 +174,14 @@ static ERR_STRING_DATA RSA_str_reasons[] = { {ERR_REASON(RSA_R_FIRST_OCTET_INVALID), "first octet invalid"}, {ERR_REASON(RSA_R_ILLEGAL_OR_UNSUPPORTED_PADDING_MODE), "illegal or unsupported padding mode"}, + {ERR_REASON(RSA_R_INVALID_DIGEST), "invalid digest"}, {ERR_REASON(RSA_R_INVALID_DIGEST_LENGTH), "invalid digest length"}, {ERR_REASON(RSA_R_INVALID_HEADER), "invalid header"}, {ERR_REASON(RSA_R_INVALID_KEYBITS), "invalid keybits"}, + {ERR_REASON(RSA_R_INVALID_LABEL), "invalid label"}, {ERR_REASON(RSA_R_INVALID_MESSAGE_LENGTH), "invalid message length"}, {ERR_REASON(RSA_R_INVALID_MGF1_MD), "invalid mgf1 md"}, + {ERR_REASON(RSA_R_INVALID_OAEP_PARAMETERS), "invalid oaep parameters"}, {ERR_REASON(RSA_R_INVALID_PADDING), "invalid padding"}, {ERR_REASON(RSA_R_INVALID_PADDING_MODE), "invalid padding mode"}, {ERR_REASON(RSA_R_INVALID_PSS_PARAMETERS), "invalid pss parameters"}, @@ -203,9 +215,13 @@ static ERR_STRING_DATA RSA_str_reasons[] = { {ERR_REASON(RSA_R_THE_ASN1_OBJECT_IDENTIFIER_IS_NOT_KNOWN_FOR_THIS_MD), "the asn1 object identifier is not known for this md"}, {ERR_REASON(RSA_R_UNKNOWN_ALGORITHM_TYPE), "unknown algorithm type"}, + {ERR_REASON(RSA_R_UNKNOWN_DIGEST), "unknown digest"}, {ERR_REASON(RSA_R_UNKNOWN_MASK_DIGEST), "unknown mask digest"}, {ERR_REASON(RSA_R_UNKNOWN_PADDING_TYPE), "unknown padding type"}, {ERR_REASON(RSA_R_UNKNOWN_PSS_DIGEST), "unknown pss digest"}, + {ERR_REASON(RSA_R_UNSUPPORTED_ENCRYPTION_TYPE), + "unsupported encryption type"}, + {ERR_REASON(RSA_R_UNSUPPORTED_LABEL_SOURCE), "unsupported label source"}, {ERR_REASON(RSA_R_UNSUPPORTED_MASK_ALGORITHM), "unsupported mask algorithm"}, {ERR_REASON(RSA_R_UNSUPPORTED_MASK_PARAMETER), diff --git a/deps/openssl/openssl/crypto/rsa/rsa_oaep.c b/deps/openssl/openssl/crypto/rsa/rsa_oaep.c index 499835f8143d4e..9c2a943cf7786c 100644 --- a/deps/openssl/openssl/crypto/rsa/rsa_oaep.c +++ b/deps/openssl/openssl/crypto/rsa/rsa_oaep.c @@ -28,39 +28,53 @@ # include # include -static int MGF1(unsigned char *mask, long len, - const unsigned char *seed, long seedlen); - int RSA_padding_add_PKCS1_OAEP(unsigned char *to, int tlen, const unsigned char *from, int flen, const unsigned char *param, int plen) +{ + return RSA_padding_add_PKCS1_OAEP_mgf1(to, tlen, from, flen, + param, plen, NULL, NULL); +} + +int RSA_padding_add_PKCS1_OAEP_mgf1(unsigned char *to, int tlen, + const unsigned char *from, int flen, + const unsigned char *param, int plen, + const EVP_MD *md, const EVP_MD *mgf1md) { int i, emlen = tlen - 1; unsigned char *db, *seed; - unsigned char *dbmask, seedmask[SHA_DIGEST_LENGTH]; + unsigned char *dbmask, seedmask[EVP_MAX_MD_SIZE]; + int mdlen; + + if (md == NULL) + md = EVP_sha1(); + if (mgf1md == NULL) + mgf1md = md; - if (flen > emlen - 2 * SHA_DIGEST_LENGTH - 1) { - RSAerr(RSA_F_RSA_PADDING_ADD_PKCS1_OAEP, + mdlen = EVP_MD_size(md); + + if (flen > emlen - 2 * mdlen - 1) { + RSAerr(RSA_F_RSA_PADDING_ADD_PKCS1_OAEP_MGF1, RSA_R_DATA_TOO_LARGE_FOR_KEY_SIZE); return 0; } - if (emlen < 2 * SHA_DIGEST_LENGTH + 1) { - RSAerr(RSA_F_RSA_PADDING_ADD_PKCS1_OAEP, RSA_R_KEY_SIZE_TOO_SMALL); + if (emlen < 2 * mdlen + 1) { + RSAerr(RSA_F_RSA_PADDING_ADD_PKCS1_OAEP_MGF1, + RSA_R_KEY_SIZE_TOO_SMALL); return 0; } to[0] = 0; seed = to + 1; - db = to + SHA_DIGEST_LENGTH + 1; + db = to + mdlen + 1; - if (!EVP_Digest((void *)param, plen, db, NULL, EVP_sha1(), NULL)) + if (!EVP_Digest((void *)param, plen, db, NULL, md, NULL)) return 0; - memset(db + SHA_DIGEST_LENGTH, 0, - emlen - flen - 2 * SHA_DIGEST_LENGTH - 1); - db[emlen - flen - SHA_DIGEST_LENGTH - 1] = 0x01; - memcpy(db + emlen - flen - SHA_DIGEST_LENGTH, from, (unsigned int)flen); - if (RAND_bytes(seed, SHA_DIGEST_LENGTH) <= 0) + memset(db + mdlen, 0, emlen - flen - 2 * mdlen - 1); + db[emlen - flen - mdlen - 1] = 0x01; + memcpy(db + emlen - flen - mdlen, from, (unsigned int)flen); + if (RAND_bytes(seed, mdlen) <= 0) return 0; # ifdef PKCS_TESTVECT memcpy(seed, @@ -68,20 +82,20 @@ int RSA_padding_add_PKCS1_OAEP(unsigned char *to, int tlen, 20); # endif - dbmask = OPENSSL_malloc(emlen - SHA_DIGEST_LENGTH); + dbmask = OPENSSL_malloc(emlen - mdlen); if (dbmask == NULL) { - RSAerr(RSA_F_RSA_PADDING_ADD_PKCS1_OAEP, ERR_R_MALLOC_FAILURE); + RSAerr(RSA_F_RSA_PADDING_ADD_PKCS1_OAEP_MGF1, ERR_R_MALLOC_FAILURE); return 0; } - if (MGF1(dbmask, emlen - SHA_DIGEST_LENGTH, seed, SHA_DIGEST_LENGTH) < 0) + if (PKCS1_MGF1(dbmask, emlen - mdlen, seed, mdlen, mgf1md) < 0) return 0; - for (i = 0; i < emlen - SHA_DIGEST_LENGTH; i++) + for (i = 0; i < emlen - mdlen; i++) db[i] ^= dbmask[i]; - if (MGF1(seedmask, SHA_DIGEST_LENGTH, db, emlen - SHA_DIGEST_LENGTH) < 0) + if (PKCS1_MGF1(seedmask, mdlen, db, emlen - mdlen, mgf1md) < 0) return 0; - for (i = 0; i < SHA_DIGEST_LENGTH; i++) + for (i = 0; i < mdlen; i++) seed[i] ^= seedmask[i]; OPENSSL_free(dbmask); @@ -91,6 +105,16 @@ int RSA_padding_add_PKCS1_OAEP(unsigned char *to, int tlen, int RSA_padding_check_PKCS1_OAEP(unsigned char *to, int tlen, const unsigned char *from, int flen, int num, const unsigned char *param, int plen) +{ + return RSA_padding_check_PKCS1_OAEP_mgf1(to, tlen, from, flen, num, + param, plen, NULL, NULL); +} + +int RSA_padding_check_PKCS1_OAEP_mgf1(unsigned char *to, int tlen, + const unsigned char *from, int flen, + int num, const unsigned char *param, + int plen, const EVP_MD *md, + const EVP_MD *mgf1md) { int i, dblen, mlen = -1, one_index = 0, msg_index; unsigned int good, found_one_byte; @@ -101,26 +125,33 @@ int RSA_padding_check_PKCS1_OAEP(unsigned char *to, int tlen, */ unsigned char *db = NULL, *em = NULL, seed[EVP_MAX_MD_SIZE], phash[EVP_MAX_MD_SIZE]; + int mdlen; + + if (md == NULL) + md = EVP_sha1(); + if (mgf1md == NULL) + mgf1md = md; + + mdlen = EVP_MD_size(md); if (tlen <= 0 || flen <= 0) return -1; - /* * |num| is the length of the modulus; |flen| is the length of the * encoded message. Therefore, for any |from| that was obtained by * decrypting a ciphertext, we must have |flen| <= |num|. Similarly, - * num < 2 * SHA_DIGEST_LENGTH + 2 must hold for the modulus - * irrespective of the ciphertext, see PKCS #1 v2.2, section 7.1.2. + * num < 2 * mdlen + 2 must hold for the modulus irrespective of + * the ciphertext, see PKCS #1 v2.2, section 7.1.2. * This does not leak any side-channel information. */ - if (num < flen || num < 2 * SHA_DIGEST_LENGTH + 2) + if (num < flen || num < 2 * mdlen + 2) goto decoding_err; - dblen = num - SHA_DIGEST_LENGTH - 1; + dblen = num - mdlen - 1; db = OPENSSL_malloc(dblen); em = OPENSSL_malloc(num); if (db == NULL || em == NULL) { - RSAerr(RSA_F_RSA_PADDING_CHECK_PKCS1_OAEP, ERR_R_MALLOC_FAILURE); + RSAerr(RSA_F_RSA_PADDING_CHECK_PKCS1_OAEP_MGF1, ERR_R_MALLOC_FAILURE); goto cleanup; } @@ -143,26 +174,25 @@ int RSA_padding_check_PKCS1_OAEP(unsigned char *to, int tlen, good = constant_time_is_zero(em[0]); maskedseed = em + 1; - maskeddb = em + 1 + SHA_DIGEST_LENGTH; + maskeddb = em + 1 + mdlen; - if (MGF1(seed, SHA_DIGEST_LENGTH, maskeddb, dblen)) + if (PKCS1_MGF1(seed, mdlen, maskeddb, dblen, mgf1md)) goto cleanup; - for (i = 0; i < SHA_DIGEST_LENGTH; i++) + for (i = 0; i < mdlen; i++) seed[i] ^= maskedseed[i]; - if (MGF1(db, dblen, seed, SHA_DIGEST_LENGTH)) + if (PKCS1_MGF1(db, dblen, seed, mdlen, mgf1md)) goto cleanup; for (i = 0; i < dblen; i++) db[i] ^= maskeddb[i]; - if (!EVP_Digest((void *)param, plen, phash, NULL, EVP_sha1(), NULL)) + if (!EVP_Digest((void *)param, plen, phash, NULL, md, NULL)) goto cleanup; - good &= - constant_time_is_zero(CRYPTO_memcmp(db, phash, SHA_DIGEST_LENGTH)); + good &= constant_time_is_zero(CRYPTO_memcmp(db, phash, mdlen)); found_one_byte = 0; - for (i = SHA_DIGEST_LENGTH; i < dblen; i++) { + for (i = mdlen; i < dblen; i++) { /* * Padding consists of a number of 0-bytes, followed by a 1. */ @@ -188,7 +218,7 @@ int RSA_padding_check_PKCS1_OAEP(unsigned char *to, int tlen, mlen = dblen - msg_index; if (tlen < mlen) { - RSAerr(RSA_F_RSA_PADDING_CHECK_PKCS1_OAEP, RSA_R_DATA_TOO_LARGE); + RSAerr(RSA_F_RSA_PADDING_CHECK_PKCS1_OAEP_MGF1, RSA_R_DATA_TOO_LARGE); mlen = -1; } else { memcpy(to, db + msg_index, mlen); @@ -200,7 +230,8 @@ int RSA_padding_check_PKCS1_OAEP(unsigned char *to, int tlen, * To avoid chosen ciphertext attacks, the error message should not * reveal which kind of decoding error happened. */ - RSAerr(RSA_F_RSA_PADDING_CHECK_PKCS1_OAEP, RSA_R_OAEP_DECODING_ERROR); + RSAerr(RSA_F_RSA_PADDING_CHECK_PKCS1_OAEP_MGF1, + RSA_R_OAEP_DECODING_ERROR); cleanup: if (db != NULL) OPENSSL_free(db); @@ -249,9 +280,4 @@ int PKCS1_MGF1(unsigned char *mask, long len, return rv; } -static int MGF1(unsigned char *mask, long len, const unsigned char *seed, - long seedlen) -{ - return PKCS1_MGF1(mask, len, seed, seedlen, EVP_sha1()); -} #endif diff --git a/deps/openssl/openssl/crypto/rsa/rsa_pmeth.c b/deps/openssl/openssl/crypto/rsa/rsa_pmeth.c index d61d6e8b66ace2..ddda0ddc4fe5d3 100644 --- a/deps/openssl/openssl/crypto/rsa/rsa_pmeth.c +++ b/deps/openssl/openssl/crypto/rsa/rsa_pmeth.c @@ -64,6 +64,7 @@ #include #include #include +#include #ifndef OPENSSL_NO_CMS # include #endif @@ -87,10 +88,13 @@ typedef struct { const EVP_MD *md; /* message digest for MGF1 */ const EVP_MD *mgf1md; - /* PSS/OAEP salt length */ + /* PSS salt length */ int saltlen; /* Temp buffer */ unsigned char *tbuf; + /* OAEP label */ + unsigned char *oaep_label; + size_t oaep_labellen; } RSA_PKEY_CTX; static int pkey_rsa_init(EVP_PKEY_CTX *ctx) @@ -108,6 +112,9 @@ static int pkey_rsa_init(EVP_PKEY_CTX *ctx) rctx->saltlen = -2; + rctx->oaep_label = NULL; + rctx->oaep_labellen = 0; + ctx->data = rctx; ctx->keygen_info = rctx->gentmp; ctx->keygen_info_count = 2; @@ -130,6 +137,15 @@ static int pkey_rsa_copy(EVP_PKEY_CTX *dst, EVP_PKEY_CTX *src) } dctx->pad_mode = sctx->pad_mode; dctx->md = sctx->md; + dctx->mgf1md = sctx->mgf1md; + if (sctx->oaep_label) { + if (dctx->oaep_label) + OPENSSL_free(dctx->oaep_label); + dctx->oaep_label = BUF_memdup(sctx->oaep_label, sctx->oaep_labellen); + if (!dctx->oaep_label) + return 0; + dctx->oaep_labellen = sctx->oaep_labellen; + } return 1; } @@ -151,6 +167,8 @@ static void pkey_rsa_cleanup(EVP_PKEY_CTX *ctx) BN_free(rctx->pub_exp); if (rctx->tbuf) OPENSSL_free(rctx->tbuf); + if (rctx->oaep_label) + OPENSSL_free(rctx->oaep_label); OPENSSL_free(rctx); } } @@ -173,10 +191,18 @@ static int pkey_fips_check_ctx(EVP_PKEY_CTX *ctx) rv = 0; if (!(rsa->meth->flags & RSA_FLAG_FIPS_METHOD) && rv) return -1; - if (rctx->md && !(rctx->md->flags & EVP_MD_FLAG_FIPS)) - return rv; - if (rctx->mgf1md && !(rctx->mgf1md->flags & EVP_MD_FLAG_FIPS)) - return rv; + if (rctx->md) { + const EVP_MD *fmd; + fmd = FIPS_get_digestbynid(EVP_MD_type(rctx->md)); + if (!fmd || !(fmd->flags & EVP_MD_FLAG_FIPS)) + return rv; + } + if (rctx->mgf1md && !(rctx->mgf1md->flags & EVP_MD_FLAG_FIPS)) { + const EVP_MD *fmd; + fmd = FIPS_get_digestbynid(EVP_MD_type(rctx->mgf1md)); + if (!fmd || !(fmd->flags & EVP_MD_FLAG_FIPS)) + return rv; + } return 1; } #endif @@ -382,8 +408,21 @@ static int pkey_rsa_encrypt(EVP_PKEY_CTX *ctx, { int ret; RSA_PKEY_CTX *rctx = ctx->data; - ret = RSA_public_encrypt(inlen, in, out, ctx->pkey->pkey.rsa, - rctx->pad_mode); + if (rctx->pad_mode == RSA_PKCS1_OAEP_PADDING) { + int klen = RSA_size(ctx->pkey->pkey.rsa); + if (!setup_tbuf(rctx, ctx)) + return -1; + if (!RSA_padding_add_PKCS1_OAEP_mgf1(rctx->tbuf, klen, + in, inlen, + rctx->oaep_label, + rctx->oaep_labellen, + rctx->md, rctx->mgf1md)) + return -1; + ret = RSA_public_encrypt(klen, rctx->tbuf, out, + ctx->pkey->pkey.rsa, RSA_NO_PADDING); + } else + ret = RSA_public_encrypt(inlen, in, out, ctx->pkey->pkey.rsa, + rctx->pad_mode); if (ret < 0) return ret; *outlen = ret; @@ -396,8 +435,26 @@ static int pkey_rsa_decrypt(EVP_PKEY_CTX *ctx, { int ret; RSA_PKEY_CTX *rctx = ctx->data; - ret = RSA_private_decrypt(inlen, in, out, ctx->pkey->pkey.rsa, - rctx->pad_mode); + if (rctx->pad_mode == RSA_PKCS1_OAEP_PADDING) { + int i; + if (!setup_tbuf(rctx, ctx)) + return -1; + ret = RSA_private_decrypt(inlen, in, rctx->tbuf, + ctx->pkey->pkey.rsa, RSA_NO_PADDING); + if (ret <= 0) + return ret; + for (i = 0; i < ret; i++) { + if (rctx->tbuf[i]) + break; + } + ret = RSA_padding_check_PKCS1_OAEP_mgf1(out, ret, rctx->tbuf + i, + ret - i, ret, + rctx->oaep_label, + rctx->oaep_labellen, + rctx->md, rctx->mgf1md); + } else + ret = RSA_private_decrypt(inlen, in, out, ctx->pkey->pkey.rsa, + rctx->pad_mode); if (ret < 0) return ret; *outlen = ret; @@ -484,18 +541,36 @@ static int pkey_rsa_ctrl(EVP_PKEY_CTX *ctx, int type, int p1, void *p2) case EVP_PKEY_CTRL_RSA_KEYGEN_PUBEXP: if (!p2) return -2; + BN_free(rctx->pub_exp); rctx->pub_exp = p2; return 1; + case EVP_PKEY_CTRL_RSA_OAEP_MD: + case EVP_PKEY_CTRL_GET_RSA_OAEP_MD: + if (rctx->pad_mode != RSA_PKCS1_OAEP_PADDING) { + RSAerr(RSA_F_PKEY_RSA_CTRL, RSA_R_INVALID_PADDING_MODE); + return -2; + } + if (type == EVP_PKEY_CTRL_GET_RSA_OAEP_MD) + *(const EVP_MD **)p2 = rctx->md; + else + rctx->md = p2; + return 1; + case EVP_PKEY_CTRL_MD: if (!check_padding_md(p2, rctx->pad_mode)) return 0; rctx->md = p2; return 1; + case EVP_PKEY_CTRL_GET_MD: + *(const EVP_MD **)p2 = rctx->md; + return 1; + case EVP_PKEY_CTRL_RSA_MGF1_MD: case EVP_PKEY_CTRL_GET_RSA_MGF1_MD: - if (rctx->pad_mode != RSA_PKCS1_PSS_PADDING) { + if (rctx->pad_mode != RSA_PKCS1_PSS_PADDING + && rctx->pad_mode != RSA_PKCS1_OAEP_PADDING) { RSAerr(RSA_F_PKEY_RSA_CTRL, RSA_R_INVALID_MGF1_MD); return -2; } @@ -508,6 +583,30 @@ static int pkey_rsa_ctrl(EVP_PKEY_CTX *ctx, int type, int p1, void *p2) rctx->mgf1md = p2; return 1; + case EVP_PKEY_CTRL_RSA_OAEP_LABEL: + if (rctx->pad_mode != RSA_PKCS1_OAEP_PADDING) { + RSAerr(RSA_F_PKEY_RSA_CTRL, RSA_R_INVALID_PADDING_MODE); + return -2; + } + if (rctx->oaep_label) + OPENSSL_free(rctx->oaep_label); + if (p2 && p1 > 0) { + rctx->oaep_label = p2; + rctx->oaep_labellen = p1; + } else { + rctx->oaep_label = NULL; + rctx->oaep_labellen = 0; + } + return 1; + + case EVP_PKEY_CTRL_GET_RSA_OAEP_LABEL: + if (rctx->pad_mode != RSA_PKCS1_OAEP_PADDING) { + RSAerr(RSA_F_PKEY_RSA_CTRL, RSA_R_INVALID_PADDING_MODE); + return -2; + } + *(unsigned char **)p2 = rctx->oaep_label; + return rctx->oaep_labellen; + case EVP_PKEY_CTRL_DIGESTINIT: case EVP_PKEY_CTRL_PKCS7_ENCRYPT: case EVP_PKEY_CTRL_PKCS7_DECRYPT: @@ -515,16 +614,6 @@ static int pkey_rsa_ctrl(EVP_PKEY_CTX *ctx, int type, int p1, void *p2) return 1; #ifndef OPENSSL_NO_CMS case EVP_PKEY_CTRL_CMS_DECRYPT: - { - X509_ALGOR *alg = NULL; - ASN1_OBJECT *encalg = NULL; - if (p2) - CMS_RecipientInfo_ktri_get0_algs(p2, NULL, NULL, &alg); - if (alg) - X509_ALGOR_get0(&encalg, NULL, NULL, alg); - if (encalg && OBJ_obj2nid(encalg) == NID_rsaesOaep) - rctx->pad_mode = RSA_PKCS1_OAEP_PADDING; - } case EVP_PKEY_CTRL_CMS_ENCRYPT: case EVP_PKEY_CTRL_CMS_SIGN: return 1; @@ -593,6 +682,36 @@ static int pkey_rsa_ctrl_str(EVP_PKEY_CTX *ctx, return ret; } + if (!strcmp(type, "rsa_mgf1_md")) { + const EVP_MD *md; + if (!(md = EVP_get_digestbyname(value))) { + RSAerr(RSA_F_PKEY_RSA_CTRL_STR, RSA_R_INVALID_DIGEST); + return 0; + } + return EVP_PKEY_CTX_set_rsa_mgf1_md(ctx, md); + } + + if (!strcmp(type, "rsa_oaep_md")) { + const EVP_MD *md; + if (!(md = EVP_get_digestbyname(value))) { + RSAerr(RSA_F_PKEY_RSA_CTRL_STR, RSA_R_INVALID_DIGEST); + return 0; + } + return EVP_PKEY_CTX_set_rsa_oaep_md(ctx, md); + } + if (!strcmp(type, "rsa_oaep_label")) { + unsigned char *lab; + long lablen; + int ret; + lab = string_to_hex(value, &lablen); + if (!lab) + return 0; + ret = EVP_PKEY_CTX_set0_rsa_oaep_label(ctx, lab, lablen); + if (ret <= 0) + OPENSSL_free(lab); + return ret; + } + return -2; } diff --git a/deps/openssl/openssl/crypto/rsa/rsa_sign.c b/deps/openssl/openssl/crypto/rsa/rsa_sign.c index bc91da2c1fe9cd..19461c6364d43e 100644 --- a/deps/openssl/openssl/crypto/rsa/rsa_sign.c +++ b/deps/openssl/openssl/crypto/rsa/rsa_sign.c @@ -261,19 +261,8 @@ int int_rsa_verify(int dtype, const unsigned char *m, OBJ_nid2ln(dtype)); #endif if (sigtype != dtype) { - if (((dtype == NID_md5) && - (sigtype == NID_md5WithRSAEncryption)) || - ((dtype == NID_md2) && - (sigtype == NID_md2WithRSAEncryption))) { - /* ok, we will let it through */ -#if !defined(OPENSSL_NO_STDIO) && !defined(OPENSSL_SYS_WIN16) - fprintf(stderr, - "signature has problems, re-make with post SSLeay045\n"); -#endif - } else { - RSAerr(RSA_F_INT_RSA_VERIFY, RSA_R_ALGORITHM_MISMATCH); - goto err; - } + RSAerr(RSA_F_INT_RSA_VERIFY, RSA_R_ALGORITHM_MISMATCH); + goto err; } if (rm) { const EVP_MD *md; diff --git a/deps/openssl/openssl/crypto/sha/Makefile b/deps/openssl/openssl/crypto/sha/Makefile index 2eb2b7af992bf6..a8c0cf78509d58 100644 --- a/deps/openssl/openssl/crypto/sha/Makefile +++ b/deps/openssl/openssl/crypto/sha/Makefile @@ -60,21 +60,25 @@ sha256-armv4.S: asm/sha256-armv4.pl $(PERL) $< $(PERLASM_SCHEME) $@ sha1-alpha.s: asm/sha1-alpha.pl - (preproc=/tmp/$$$$.$@; trap "rm $$preproc" INT; \ + (preproc=$$$$.$@.S; trap "rm $$preproc" INT; \ $(PERL) asm/sha1-alpha.pl > $$preproc && \ - $(CC) -E $$preproc > $@ && rm $$preproc) + $(CC) -E -P $$preproc > $@ && rm $$preproc) # Solaris make has to be explicitly told sha1-x86_64.s: asm/sha1-x86_64.pl; $(PERL) asm/sha1-x86_64.pl $(PERLASM_SCHEME) > $@ +sha1-mb-x86_64.s: asm/sha1-mb-x86_64.pl; $(PERL) asm/sha1-mb-x86_64.pl $(PERLASM_SCHEME) > $@ sha256-x86_64.s:asm/sha512-x86_64.pl; $(PERL) asm/sha512-x86_64.pl $(PERLASM_SCHEME) $@ +sha256-mb-x86_64.s: asm/sha256-mb-x86_64.pl; $(PERL) asm/sha256-mb-x86_64.pl $(PERLASM_SCHEME) > $@ sha512-x86_64.s:asm/sha512-x86_64.pl; $(PERL) asm/sha512-x86_64.pl $(PERLASM_SCHEME) $@ -sha1-sparcv9.s: asm/sha1-sparcv9.pl; $(PERL) asm/sha1-sparcv9.pl $@ $(CFLAGS) -sha256-sparcv9.s:asm/sha512-sparcv9.pl; $(PERL) asm/sha512-sparcv9.pl $@ $(CFLAGS) -sha512-sparcv9.s:asm/sha512-sparcv9.pl; $(PERL) asm/sha512-sparcv9.pl $@ $(CFLAGS) +sha1-sparcv9.S: asm/sha1-sparcv9.pl; $(PERL) asm/sha1-sparcv9.pl $@ $(CFLAGS) +sha256-sparcv9.S:asm/sha512-sparcv9.pl; $(PERL) asm/sha512-sparcv9.pl $@ $(CFLAGS) +sha512-sparcv9.S:asm/sha512-sparcv9.pl; $(PERL) asm/sha512-sparcv9.pl $@ $(CFLAGS) sha1-ppc.s: asm/sha1-ppc.pl; $(PERL) asm/sha1-ppc.pl $(PERLASM_SCHEME) $@ sha256-ppc.s: asm/sha512-ppc.pl; $(PERL) asm/sha512-ppc.pl $(PERLASM_SCHEME) $@ sha512-ppc.s: asm/sha512-ppc.pl; $(PERL) asm/sha512-ppc.pl $(PERLASM_SCHEME) $@ +sha256p8-ppc.s: asm/sha512p8-ppc.pl; $(PERL) asm/sha512p8-ppc.pl $(PERLASM_SCHEME) $@ +sha512p8-ppc.s: asm/sha512p8-ppc.pl; $(PERL) asm/sha512p8-ppc.pl $(PERLASM_SCHEME) $@ sha1-parisc.s: asm/sha1-parisc.pl; $(PERL) asm/sha1-parisc.pl $(PERLASM_SCHEME) $@ sha256-parisc.s:asm/sha512-parisc.pl; $(PERL) asm/sha512-parisc.pl $(PERLASM_SCHEME) $@ @@ -92,6 +96,9 @@ sha512-%.S: asm/sha512-%.pl; $(PERL) $< $(PERLASM_SCHEME) $@ sha1-armv4-large.o: sha1-armv4-large.S sha256-armv4.o: sha256-armv4.S sha512-armv4.o: sha512-armv4.S +sha1-armv8.o: sha1-armv8.S +sha256-armv8.o: sha256-armv8.S +sha512-armv8.o: sha512-armv8.S files: $(PERL) $(TOP)/util/files.pl Makefile >> $(TOP)/MINFO diff --git a/deps/openssl/openssl/crypto/sha/asm/sha1-586.pl b/deps/openssl/openssl/crypto/sha/asm/sha1-586.pl index 1084d227fe0608..4895eb3ddf8579 100644 --- a/deps/openssl/openssl/crypto/sha/asm/sha1-586.pl +++ b/deps/openssl/openssl/crypto/sha/asm/sha1-586.pl @@ -1,7 +1,7 @@ #!/usr/bin/env perl # ==================================================================== -# [Re]written by Andy Polyakov for the OpenSSL +# [Re]written by Andy Polyakov for the OpenSSL # project. The module is, however, dual licensed under OpenSSL and # CRYPTOGAMS licenses depending on where you obtain it. For further # details see http://www.openssl.org/~appro/cryptogams/. @@ -79,6 +79,10 @@ # strongly, it's probably more appropriate to discuss possibility of # using vector rotate XOP on AMD... +# March 2014. +# +# Add support for Intel SHA Extensions. + ###################################################################### # Current performance is summarized in following table. Numbers are # CPU clock cycles spent to process single byte (less is better). @@ -88,13 +92,20 @@ # PIII 11.5 - # P4 10.6 - # AMD K8 7.1 - -# Core2 7.3 6.1/+20% - -# Atom 12.5 9.5(*)/+32% - -# Westmere 7.3 5.6/+30% - -# Sandy Bridge 8.8 6.2/+40% 5.1(**)/+70% +# Core2 7.3 6.0/+22% - +# Westmere 7.3 5.5/+33% - +# Sandy Bridge 8.8 6.2/+40% 5.1(**)/+73% +# Ivy Bridge 7.2 4.8/+51% 4.7(**)/+53% +# Haswell 6.5 4.3/+51% 4.1(**)/+58% +# Bulldozer 11.6 6.0/+92% +# VIA Nano 10.6 7.5/+41% +# Atom 12.5 9.3(*)/+35% +# Silvermont 14.5 9.9(*)/+46% # # (*) Loop is 1056 instructions long and expected result is ~8.25. -# It remains mystery [to me] why ILP is limited to 1.7. +# The discrepancy is because of front-end limitations, so +# called MS-ROM penalties, and on Silvermont even rotate's +# limited parallelism. # # (**) As per above comment, the result is for AVX *plus* sh[rl]d. @@ -116,6 +127,15 @@ `nasm -v 2>&1` =~ /NASM version ([2-9]\.[0-9]+)/ && $1>=2.03); # first version supporting AVX +$ymm=1 if ($xmm && !$ymm && $ARGV[0] eq "win32" && + `ml 2>&1` =~ /Version ([0-9]+)\./ && + $1>=10); # first version supporting AVX + +$ymm=1 if ($xmm && !$ymm && `$ENV{CC} -v 2>&1` =~ /(^clang version|based on LLVM) ([3-9]\.[0-9]+)/ && + $2>=3.0); # first version supporting AVX + +$shaext=$xmm; ### set to zero if compiling for 1.0.1 + &external_label("OPENSSL_ia32cap_P") if ($xmm); @@ -295,6 +315,7 @@ sub BODY_40_59 &function_begin("sha1_block_data_order"); if ($xmm) { + &static_label("shaext_shortcut") if ($shaext); &static_label("ssse3_shortcut"); &static_label("avx_shortcut") if ($ymm); &static_label("K_XX_XX"); @@ -309,8 +330,13 @@ sub BODY_40_59 &mov ($D,&DWP(4,$T)); &test ($D,1<<9); # check SSSE3 bit &jz (&label("x86")); + &mov ($C,&DWP(8,$T)); &test ($A,1<<24); # check FXSR bit &jz (&label("x86")); + if ($shaext) { + &test ($C,1<<29); # check SHA bit + &jnz (&label("shaext_shortcut")); + } if ($ymm) { &and ($D,1<<28); # mask AVX bit &and ($A,1<<30); # mask "Intel CPU" bit @@ -389,6 +415,117 @@ sub BODY_40_59 &function_end("sha1_block_data_order"); if ($xmm) { +if ($shaext) { +###################################################################### +# Intel SHA Extensions implementation of SHA1 update function. +# +my ($ctx,$inp,$num)=("edi","esi","ecx"); +my ($ABCD,$E,$E_,$BSWAP)=map("xmm$_",(0..3)); +my @MSG=map("xmm$_",(4..7)); + +sub sha1rnds4 { + my ($dst,$src,$imm)=@_; + if ("$dst:$src" =~ /xmm([0-7]):xmm([0-7])/) + { &data_byte(0x0f,0x3a,0xcc,0xc0|($1<<3)|$2,$imm); } +} +sub sha1op38 { + my ($opcodelet,$dst,$src)=@_; + if ("$dst:$src" =~ /xmm([0-7]):xmm([0-7])/) + { &data_byte(0x0f,0x38,$opcodelet,0xc0|($1<<3)|$2); } +} +sub sha1nexte { sha1op38(0xc8,@_); } +sub sha1msg1 { sha1op38(0xc9,@_); } +sub sha1msg2 { sha1op38(0xca,@_); } + +&function_begin("_sha1_block_data_order_shaext"); + &call (&label("pic_point")); # make it PIC! + &set_label("pic_point"); + &blindpop($tmp1); + &lea ($tmp1,&DWP(&label("K_XX_XX")."-".&label("pic_point"),$tmp1)); +&set_label("shaext_shortcut"); + &mov ($ctx,&wparam(0)); + &mov ("ebx","esp"); + &mov ($inp,&wparam(1)); + &mov ($num,&wparam(2)); + &sub ("esp",32); + + &movdqu ($ABCD,&QWP(0,$ctx)); + &movd ($E,&DWP(16,$ctx)); + &and ("esp",-32); + &movdqa ($BSWAP,&QWP(0x50,$tmp1)); # byte-n-word swap + + &movdqu (@MSG[0],&QWP(0,$inp)); + &pshufd ($ABCD,$ABCD,0b00011011); # flip word order + &movdqu (@MSG[1],&QWP(0x10,$inp)); + &pshufd ($E,$E,0b00011011); # flip word order + &movdqu (@MSG[2],&QWP(0x20,$inp)); + &pshufb (@MSG[0],$BSWAP); + &movdqu (@MSG[3],&QWP(0x30,$inp)); + &pshufb (@MSG[1],$BSWAP); + &pshufb (@MSG[2],$BSWAP); + &pshufb (@MSG[3],$BSWAP); + &jmp (&label("loop_shaext")); + +&set_label("loop_shaext",16); + &dec ($num); + &lea ("eax",&DWP(0x40,$inp)); + &movdqa (&QWP(0,"esp"),$E); # offload $E + &paddd ($E,@MSG[0]); + &cmovne ($inp,"eax"); + &movdqa (&QWP(16,"esp"),$ABCD); # offload $ABCD + +for($i=0;$i<20-4;$i+=2) { + &sha1msg1 (@MSG[0],@MSG[1]); + &movdqa ($E_,$ABCD); + &sha1rnds4 ($ABCD,$E,int($i/5)); # 0-3... + &sha1nexte ($E_,@MSG[1]); + &pxor (@MSG[0],@MSG[2]); + &sha1msg1 (@MSG[1],@MSG[2]); + &sha1msg2 (@MSG[0],@MSG[3]); + + &movdqa ($E,$ABCD); + &sha1rnds4 ($ABCD,$E_,int(($i+1)/5)); + &sha1nexte ($E,@MSG[2]); + &pxor (@MSG[1],@MSG[3]); + &sha1msg2 (@MSG[1],@MSG[0]); + + push(@MSG,shift(@MSG)); push(@MSG,shift(@MSG)); +} + &movdqu (@MSG[0],&QWP(0,$inp)); + &movdqa ($E_,$ABCD); + &sha1rnds4 ($ABCD,$E,3); # 64-67 + &sha1nexte ($E_,@MSG[1]); + &movdqu (@MSG[1],&QWP(0x10,$inp)); + &pshufb (@MSG[0],$BSWAP); + + &movdqa ($E,$ABCD); + &sha1rnds4 ($ABCD,$E_,3); # 68-71 + &sha1nexte ($E,@MSG[2]); + &movdqu (@MSG[2],&QWP(0x20,$inp)); + &pshufb (@MSG[1],$BSWAP); + + &movdqa ($E_,$ABCD); + &sha1rnds4 ($ABCD,$E,3); # 72-75 + &sha1nexte ($E_,@MSG[3]); + &movdqu (@MSG[3],&QWP(0x30,$inp)); + &pshufb (@MSG[2],$BSWAP); + + &movdqa ($E,$ABCD); + &sha1rnds4 ($ABCD,$E_,3); # 76-79 + &movdqa ($E_,&QWP(0,"esp")); + &pshufb (@MSG[3],$BSWAP); + &sha1nexte ($E,$E_); + &paddd ($ABCD,&QWP(16,"esp")); + + &jnz (&label("loop_shaext")); + + &pshufd ($ABCD,$ABCD,0b00011011); + &pshufd ($E,$E,0b00011011); + &movdqu (&QWP(0,$ctx),$ABCD) + &movd (&DWP(16,$ctx),$E); + &mov ("esp","ebx"); +&function_end("_sha1_block_data_order_shaext"); +} ###################################################################### # The SSSE3 implementation. # @@ -416,6 +553,7 @@ sub BODY_40_59 my @X=map("xmm$_",(4..7,0..3)); # pre-seeded for $Xi=4 my @V=($A,$B,$C,$D,$E); my $j=0; # hash round +my $rx=0; my @T=($T,$tmp1); my $inp; @@ -501,8 +639,11 @@ sub BODY_40_59 &movdqa (&QWP(0+16,"esp"),@X[-3&7]); &psubd (@X[-3&7],@X[3]); &movdqa (&QWP(0+32,"esp"),@X[-2&7]); + &mov (@T[1],$C); &psubd (@X[-2&7],@X[3]); - &movdqa (@X[0],@X[-3&7]); + &xor (@T[1],$D); + &pshufd (@X[0],@X[-4&7],0xee); # was &movdqa (@X[0],@X[-3&7]); + &and (@T[0],@T[1]); &jmp (&label("loop")); ###################################################################### @@ -528,76 +669,77 @@ () my @insns = (&$body,&$body,&$body,&$body); # 40 instructions my ($a,$b,$c,$d,$e); + eval(shift(@insns)); # ror eval(shift(@insns)); eval(shift(@insns)); - &palignr(@X[0],@X[-4&7],8); # compose "X[-14]" in "X[0]" + &punpcklqdq(@X[0],@X[-3&7]); # compose "X[-14]" in "X[0]", was &palignr(@X[0],@X[-4&7],8); &movdqa (@X[2],@X[-1&7]); eval(shift(@insns)); eval(shift(@insns)); &paddd (@X[3],@X[-1&7]); &movdqa (&QWP(64+16*(($Xi-4)%3),"esp"),@X[-4&7]);# save X[] to backtrace buffer - eval(shift(@insns)); + eval(shift(@insns)); # rol eval(shift(@insns)); &psrldq (@X[2],4); # "X[-3]", 3 dwords eval(shift(@insns)); eval(shift(@insns)); &pxor (@X[0],@X[-4&7]); # "X[0]"^="X[-16]" eval(shift(@insns)); - eval(shift(@insns)); + eval(shift(@insns)); # ror &pxor (@X[2],@X[-2&7]); # "X[-3]"^"X[-8]" eval(shift(@insns)); eval(shift(@insns)); eval(shift(@insns)); - eval(shift(@insns)); &pxor (@X[0],@X[2]); # "X[0]"^="X[-3]"^"X[-8]" eval(shift(@insns)); - eval(shift(@insns)); + eval(shift(@insns)); # rol &movdqa (&QWP(0+16*(($Xi-1)&3),"esp"),@X[3]); # X[]+K xfer to IALU eval(shift(@insns)); eval(shift(@insns)); &movdqa (@X[4],@X[0]); - &movdqa (@X[2],@X[0]); - eval(shift(@insns)); eval(shift(@insns)); eval(shift(@insns)); + eval(shift(@insns)); # ror + &movdqa (@X[2],@X[0]); eval(shift(@insns)); &pslldq (@X[4],12); # "X[0]"<<96, extract one dword &paddd (@X[0],@X[0]); eval(shift(@insns)); eval(shift(@insns)); - eval(shift(@insns)); - eval(shift(@insns)); &psrld (@X[2],31); eval(shift(@insns)); - eval(shift(@insns)); + eval(shift(@insns)); # rol &movdqa (@X[3],@X[4]); eval(shift(@insns)); eval(shift(@insns)); + eval(shift(@insns)); &psrld (@X[4],30); - &por (@X[0],@X[2]); # "X[0]"<<<=1 eval(shift(@insns)); + eval(shift(@insns)); # ror + &por (@X[0],@X[2]); # "X[0]"<<<=1 eval(shift(@insns)); &movdqa (@X[2],&QWP(64+16*(($Xi-6)%3),"esp")) if ($Xi>5); # restore X[] from backtrace buffer eval(shift(@insns)); eval(shift(@insns)); &pslld (@X[3],2); - &pxor (@X[0],@X[4]); - eval(shift(@insns)); eval(shift(@insns)); + eval(shift(@insns)); # rol + &pxor (@X[0],@X[4]); &movdqa (@X[4],&QWP(112-16+16*(($Xi)/5),"esp")); # K_XX_XX eval(shift(@insns)); eval(shift(@insns)); &pxor (@X[0],@X[3]); # "X[0]"^=("X[0]"<<96)<<<2 - &movdqa (@X[1],@X[-2&7]) if ($Xi<7); + &pshufd (@X[1],@X[-3&7],0xee) if ($Xi<7); # was &movdqa (@X[1],@X[-2&7]) + &pshufd (@X[3],@X[-1&7],0xee) if ($Xi==7); eval(shift(@insns)); eval(shift(@insns)); @@ -609,13 +751,12 @@ () sub Xupdate_ssse3_32_79() { use integer; my $body = shift; - my @insns = (&$body,&$body,&$body,&$body); # 32 to 48 instructions + my @insns = (&$body,&$body,&$body,&$body); # 32 to 44 instructions my ($a,$b,$c,$d,$e); - &movdqa (@X[2],@X[-1&7]) if ($Xi==8); eval(shift(@insns)); # body_20_39 &pxor (@X[0],@X[-4&7]); # "X[0]"="X[-32]"^"X[-16]" - &palignr(@X[2],@X[-2&7],8); # compose "X[-6]" + &punpcklqdq(@X[2],@X[-1&7]); # compose "X[-6]", was &palignr(@X[2],@X[-2&7],8) eval(shift(@insns)); eval(shift(@insns)); eval(shift(@insns)); # rol @@ -624,13 +765,14 @@ () &movdqa (&QWP(64+16*(($Xi-4)%3),"esp"),@X[-4&7]); # save X[] to backtrace buffer eval(shift(@insns)); eval(shift(@insns)); + eval(shift(@insns)) if (@insns[0] =~ /_rol/); if ($Xi%5) { &movdqa (@X[4],@X[3]); # "perpetuate" K_XX_XX... } else { # ... or load next one &movdqa (@X[4],&QWP(112-16+16*($Xi/5),"esp")); } - &paddd (@X[3],@X[-1&7]); eval(shift(@insns)); # ror + &paddd (@X[3],@X[-1&7]); eval(shift(@insns)); &pxor (@X[0],@X[2]); # "X[0]"^="X[-6]" @@ -645,6 +787,7 @@ () eval(shift(@insns)); eval(shift(@insns)); # ror eval(shift(@insns)); + eval(shift(@insns)) if (@insns[0] =~ /_rol/); &pslld (@X[0],2); eval(shift(@insns)); # body_20_39 @@ -656,6 +799,8 @@ () eval(shift(@insns)); eval(shift(@insns)); # ror eval(shift(@insns)); + eval(shift(@insns)) if (@insns[1] =~ /_rol/); + eval(shift(@insns)) if (@insns[0] =~ /_rol/); &por (@X[0],@X[2]); # "X[0]"<<<=2 eval(shift(@insns)); # body_20_39 @@ -666,7 +811,7 @@ () eval(shift(@insns)); eval(shift(@insns)); eval(shift(@insns)); # ror - &movdqa (@X[3],@X[0]) if ($Xi<19); + &pshufd (@X[3],@X[-1],0xee) if ($Xi<19); # was &movdqa (@X[3],@X[0]) eval(shift(@insns)); foreach (@insns) { eval; } # remaining instructions @@ -680,6 +825,12 @@ () my @insns = (&$body,&$body,&$body,&$body); # 32 instructions my ($a,$b,$c,$d,$e); + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); eval(shift(@insns)); &paddd (@X[3],@X[-1&7]); eval(shift(@insns)); @@ -717,9 +868,16 @@ () eval(shift(@insns)); eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); &pshufb (@X[($Xi-3)&7],@X[2]); eval(shift(@insns)); eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); &paddd (@X[($Xi-4)&7],@X[3]); eval(shift(@insns)); eval(shift(@insns)); @@ -728,6 +886,8 @@ () &movdqa (&QWP(0+16*$Xi,"esp"),@X[($Xi-4)&7]); # X[]+K xfer to IALU eval(shift(@insns)); eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); &psubd (@X[($Xi-4)&7],@X[3]); foreach (@insns) { eval; } @@ -743,51 +903,124 @@ () foreach (@insns) { eval; } } -sub body_00_19 () { +sub body_00_19 () { # ((c^d)&b)^d + # on start @T[0]=(c^d)&b + return &body_20_39() if ($rx==19); $rx++; ( '($a,$b,$c,$d,$e)=@V;'. - '&add ($e,&DWP(4*($j&15),"esp"));', # X[]+K xfer - '&xor ($c,$d);', + '&$_ror ($b,$j?7:2);', # $b>>>2 + '&xor (@T[0],$d);', '&mov (@T[1],$a);', # $b in next round + + '&add ($e,&DWP(4*($j&15),"esp"));', # X[]+K xfer + '&xor ($b,$c);', # $c^$d for next round + '&$_rol ($a,5);', - '&and (@T[0],$c);', # ($b&($c^$d)) - '&xor ($c,$d);', # restore $c - '&xor (@T[0],$d);', - '&add ($e,$a);', - '&$_ror ($b,$j?7:2);', # $b>>>2 - '&add ($e,@T[0]);' .'$j++; unshift(@V,pop(@V)); unshift(@T,pop(@T));' + '&add ($e,@T[0]);', + '&and (@T[1],$b);', # ($b&($c^$d)) for next round + + '&xor ($b,$c);', # restore $b + '&add ($e,$a);' .'$j++; unshift(@V,pop(@V)); unshift(@T,pop(@T));' ); } -sub body_20_39 () { +sub body_20_39 () { # b^d^c + # on entry @T[0]=b^d + return &body_40_59() if ($rx==39); $rx++; ( '($a,$b,$c,$d,$e)=@V;'. - '&add ($e,&DWP(4*($j++&15),"esp"));', # X[]+K xfer - '&xor (@T[0],$d);', # ($b^$d) + '&add ($e,&DWP(4*($j&15),"esp"));', # X[]+K xfer + '&xor (@T[0],$d) if($j==19);'. + '&xor (@T[0],$c) if($j> 19);', # ($b^$d^$c) '&mov (@T[1],$a);', # $b in next round + '&$_rol ($a,5);', - '&xor (@T[0],$c);', # ($b^$d^$c) - '&add ($e,$a);', + '&add ($e,@T[0]);', + '&xor (@T[1],$c) if ($j< 79);', # $b^$d for next round + '&$_ror ($b,7);', # $b>>>2 - '&add ($e,@T[0]);' .'unshift(@V,pop(@V)); unshift(@T,pop(@T));' + '&add ($e,$a);' .'$j++; unshift(@V,pop(@V)); unshift(@T,pop(@T));' ); } -sub body_40_59 () { +sub body_40_59 () { # ((b^c)&(c^d))^c + # on entry @T[0]=(b^c), (c^=d) + $rx++; ( '($a,$b,$c,$d,$e)=@V;'. - '&mov (@T[1],$c);', - '&xor ($c,$d);', - '&add ($e,&DWP(4*($j++&15),"esp"));', # X[]+K xfer - '&and (@T[1],$d);', - '&and (@T[0],$c);', # ($b&($c^$d)) + '&add ($e,&DWP(4*($j&15),"esp"));', # X[]+K xfer + '&and (@T[0],$c) if ($j>=40);', # (b^c)&(c^d) + '&xor ($c,$d) if ($j>=40);', # restore $c + '&$_ror ($b,7);', # $b>>>2 - '&add ($e,@T[1]);', - '&mov (@T[1],$a);', # $b in next round + '&mov (@T[1],$a);', # $b for next round + '&xor (@T[0],$c);', + '&$_rol ($a,5);', '&add ($e,@T[0]);', - '&xor ($c,$d);', # restore $c - '&add ($e,$a);' .'unshift(@V,pop(@V)); unshift(@T,pop(@T));' + '&xor (@T[1],$c) if ($j==59);'. + '&xor (@T[1],$b) if ($j< 59);', # b^c for next round + + '&xor ($b,$c) if ($j< 59);', # c^d for next round + '&add ($e,$a);' .'$j++; unshift(@V,pop(@V)); unshift(@T,pop(@T));' + ); +} +###### +sub bodyx_00_19 () { # ((c^d)&b)^d + # on start @T[0]=(b&c)^(~b&d), $e+=X[]+K + return &bodyx_20_39() if ($rx==19); $rx++; + ( + '($a,$b,$c,$d,$e)=@V;'. + + '&rorx ($b,$b,2) if ($j==0);'. # $b>>>2 + '&rorx ($b,@T[1],7) if ($j!=0);', # $b>>>2 + '&lea ($e,&DWP(0,$e,@T[0]));', + '&rorx (@T[0],$a,5);', + + '&andn (@T[1],$a,$c);', + '&and ($a,$b)', + '&add ($d,&DWP(4*(($j+1)&15),"esp"));', # X[]+K xfer + + '&xor (@T[1],$a)', + '&add ($e,@T[0]);' .'$j++; unshift(@V,pop(@V)); unshift(@T,pop(@T));' + ); +} + +sub bodyx_20_39 () { # b^d^c + # on start $b=b^c^d + return &bodyx_40_59() if ($rx==39); $rx++; + ( + '($a,$b,$c,$d,$e)=@V;'. + + '&add ($e,($j==19?@T[0]:$b))', + '&rorx ($b,@T[1],7);', # $b>>>2 + '&rorx (@T[0],$a,5);', + + '&xor ($a,$b) if ($j<79);', + '&add ($d,&DWP(4*(($j+1)&15),"esp")) if ($j<79);', # X[]+K xfer + '&xor ($a,$c) if ($j<79);', + '&add ($e,@T[0]);' .'$j++; unshift(@V,pop(@V)); unshift(@T,pop(@T));' + ); +} + +sub bodyx_40_59 () { # ((b^c)&(c^d))^c + # on start $b=((b^c)&(c^d))^c + return &bodyx_20_39() if ($rx==59); $rx++; + ( + '($a,$b,$c,$d,$e)=@V;'. + + '&rorx (@T[0],$a,5)', + '&lea ($e,&DWP(0,$e,$b))', + '&rorx ($b,@T[1],7)', # $b>>>2 + '&add ($d,&DWP(4*(($j+1)&15),"esp"))', # X[]+K xfer + + '&mov (@T[1],$c)', + '&xor ($a,$b)', # b^c for next round + '&xor (@T[1],$b)', # c^d for next round + + '&and ($a,@T[1])', + '&add ($e,@T[0])', + '&xor ($a,$b)' .'$j++; unshift(@V,pop(@V)); unshift(@T,pop(@T));' ); } @@ -825,10 +1058,14 @@ () &mov (&DWP(4,@T[1]),@T[0]); &add ($E,&DWP(16,@T[1])); &mov (&DWP(8,@T[1]),$C); - &mov ($B,@T[0]); + &mov ($B,$C); &mov (&DWP(12,@T[1]),$D); + &xor ($B,$D); &mov (&DWP(16,@T[1]),$E); - &movdqa (@X[0],@X[-3&7]); + &mov (@T[1],@T[0]); + &pshufd (@X[0],@X[-4&7],0xee); # was &movdqa (@X[0],@X[-3&7]); + &and (@T[0],$B); + &mov ($B,$T[1]); &jmp (&label("loop")); @@ -853,6 +1090,8 @@ () &function_end("_sha1_block_data_order_ssse3"); +$rx=0; # reset + if ($ymm) { my $Xi=4; # 4xSIMD Xupdate round, start pre-seeded my @X=map("xmm$_",(4..7,0..3)); # pre-seeded for $Xi=4 @@ -940,8 +1179,11 @@ () &vpaddd (@X[1],@X[-3&7],@X[3]); &vpaddd (@X[2],@X[-2&7],@X[3]); &vmovdqa(&QWP(0,"esp"),@X[0]); # X[]+K xfer to IALU + &mov (@T[1],$C); &vmovdqa(&QWP(0+16,"esp"),@X[1]); + &xor (@T[1],$D); &vmovdqa(&QWP(0+32,"esp"),@X[2]); + &and (@T[0],@T[1]); &jmp (&label("loop")); sub Xupdate_avx_16_31() # recall that $Xi starts wtih 4 @@ -1025,7 +1267,7 @@ () sub Xupdate_avx_32_79() { use integer; my $body = shift; - my @insns = (&$body,&$body,&$body,&$body); # 32 to 48 instructions + my @insns = (&$body,&$body,&$body,&$body); # 32 to 44 instructions my ($a,$b,$c,$d,$e); &vpalignr(@X[2],@X[-1&7],@X[-2&7],8); # compose "X[-6]" @@ -1188,10 +1430,14 @@ () &add ($D,&DWP(12,@T[1])); &mov (&DWP(4,@T[1]),@T[0]); &add ($E,&DWP(16,@T[1])); + &mov ($B,$C); &mov (&DWP(8,@T[1]),$C); - &mov ($B,@T[0]); + &xor ($B,$D); &mov (&DWP(12,@T[1]),$D); &mov (&DWP(16,@T[1]),$E); + &mov (@T[1],@T[0]); + &and (@T[0],$B); + &mov ($B,@T[1]); &jmp (&label("loop")); @@ -1223,6 +1469,7 @@ () &data_word(0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc); # K_40_59 &data_word(0xca62c1d6,0xca62c1d6,0xca62c1d6,0xca62c1d6); # K_60_79 &data_word(0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f); # pbswap mask +&data_byte(0xf,0xe,0xd,0xc,0xb,0xa,0x9,0x8,0x7,0x6,0x5,0x4,0x3,0x2,0x1,0x0); } &asciz("SHA1 block transform for x86, CRYPTOGAMS by "); diff --git a/deps/openssl/openssl/crypto/sha/asm/sha1-armv4-large.pl b/deps/openssl/openssl/crypto/sha/asm/sha1-armv4-large.pl index 33da3e0e3c0d50..b2c30322c3515f 100644 --- a/deps/openssl/openssl/crypto/sha/asm/sha1-armv4-large.pl +++ b/deps/openssl/openssl/crypto/sha/asm/sha1-armv4-large.pl @@ -1,7 +1,7 @@ #!/usr/bin/env perl # ==================================================================== -# Written by Andy Polyakov for the OpenSSL +# Written by Andy Polyakov for the OpenSSL # project. The module is, however, dual licensed under OpenSSL and # CRYPTOGAMS licenses depending on where you obtain it. For further # details see http://www.openssl.org/~appro/cryptogams/. @@ -52,6 +52,20 @@ # Profiler-assisted and platform-specific optimization resulted in 10% # improvement on Cortex A8 core and 12.2 cycles per byte. +# September 2013. +# +# Add NEON implementation (see sha1-586.pl for background info). On +# Cortex A8 it was measured to process one byte in 6.7 cycles or >80% +# faster than integer-only code. Because [fully unrolled] NEON code +# is ~2.5x larger and there are some redundant instructions executed +# when processing last block, improvement is not as big for smallest +# blocks, only ~30%. Snapdragon S4 is a tad faster, 6.4 cycles per +# byte, which is also >80% faster than integer-only code. + +# May 2014. +# +# Add ARMv8 code path performing at 2.35 cpb on Apple A7. + while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {} open STDOUT,">$output"; @@ -153,12 +167,22 @@ sub BODY_40_59 { #include "arm_arch.h" .text +.code 32 .global sha1_block_data_order .type sha1_block_data_order,%function -.align 2 +.align 5 sha1_block_data_order: +#if __ARM_MAX_ARCH__>=7 + sub r3,pc,#8 @ sha1_block_data_order + ldr r12,.LOPENSSL_armcap + ldr r12,[r3,r12] @ OPENSSL_armcap_P + tst r12,#ARMV8_SHA1 + bne .LARMv8 + tst r12,#ARMV7_NEON + bne .LNEON +#endif stmdb sp!,{r4-r12,lr} add $len,$inp,$len,lsl#6 @ $len to point at the end of $inp ldmia $ctx,{$a,$b,$c,$d,$e} @@ -233,16 +257,427 @@ sub BODY_40_59 { moveq pc,lr @ be binary compatible with V4, yet bx lr @ interoperable with Thumb ISA:-) #endif -.align 2 +.size sha1_block_data_order,.-sha1_block_data_order + +.align 5 .LK_00_19: .word 0x5a827999 .LK_20_39: .word 0x6ed9eba1 .LK_40_59: .word 0x8f1bbcdc .LK_60_79: .word 0xca62c1d6 -.size sha1_block_data_order,.-sha1_block_data_order -.asciz "SHA1 block transform for ARMv4, CRYPTOGAMS by " -.align 2 +#if __ARM_MAX_ARCH__>=7 +.LOPENSSL_armcap: +.word OPENSSL_armcap_P-sha1_block_data_order +#endif +.asciz "SHA1 block transform for ARMv4/NEON/ARMv8, CRYPTOGAMS by " +.align 5 +___ +##################################################################### +# NEON stuff +# +{{{ +my @V=($a,$b,$c,$d,$e); +my ($K_XX_XX,$Ki,$t0,$t1,$Xfer,$saved_sp)=map("r$_",(8..12,14)); +my $Xi=4; +my @X=map("q$_",(8..11,0..3)); +my @Tx=("q12","q13"); +my ($K,$zero)=("q14","q15"); +my $j=0; + +sub AUTOLOAD() # thunk [simplified] x86-style perlasm +{ my $opcode = $AUTOLOAD; $opcode =~ s/.*:://; $opcode =~ s/_/\./; + my $arg = pop; + $arg = "#$arg" if ($arg*1 eq $arg); + $code .= "\t$opcode\t".join(',',@_,$arg)."\n"; +} + +sub body_00_19 () { + ( + '($a,$b,$c,$d,$e)=@V;'. # '$code.="@ $j\n";'. + '&bic ($t0,$d,$b)', + '&add ($e,$e,$Ki)', # e+=X[i]+K + '&and ($t1,$c,$b)', + '&ldr ($Ki,sprintf "[sp,#%d]",4*(($j+1)&15))', + '&add ($e,$e,$a,"ror#27")', # e+=ROR(A,27) + '&eor ($t1,$t1,$t0)', # F_00_19 + '&mov ($b,$b,"ror#2")', # b=ROR(b,2) + '&add ($e,$e,$t1);'. # e+=F_00_19 + '$j++; unshift(@V,pop(@V));' + ) +} +sub body_20_39 () { + ( + '($a,$b,$c,$d,$e)=@V;'. # '$code.="@ $j\n";'. + '&eor ($t0,$b,$d)', + '&add ($e,$e,$Ki)', # e+=X[i]+K + '&ldr ($Ki,sprintf "[sp,#%d]",4*(($j+1)&15)) if ($j<79)', + '&eor ($t1,$t0,$c)', # F_20_39 + '&add ($e,$e,$a,"ror#27")', # e+=ROR(A,27) + '&mov ($b,$b,"ror#2")', # b=ROR(b,2) + '&add ($e,$e,$t1);'. # e+=F_20_39 + '$j++; unshift(@V,pop(@V));' + ) +} +sub body_40_59 () { + ( + '($a,$b,$c,$d,$e)=@V;'. # '$code.="@ $j\n";'. + '&add ($e,$e,$Ki)', # e+=X[i]+K + '&and ($t0,$c,$d)', + '&ldr ($Ki,sprintf "[sp,#%d]",4*(($j+1)&15))', + '&add ($e,$e,$a,"ror#27")', # e+=ROR(A,27) + '&eor ($t1,$c,$d)', + '&add ($e,$e,$t0)', + '&and ($t1,$t1,$b)', + '&mov ($b,$b,"ror#2")', # b=ROR(b,2) + '&add ($e,$e,$t1);'. # e+=F_40_59 + '$j++; unshift(@V,pop(@V));' + ) +} + +sub Xupdate_16_31 () +{ use integer; + my $body = shift; + my @insns = (&$body,&$body,&$body,&$body); + my ($a,$b,$c,$d,$e); + + &vext_8 (@X[0],@X[-4&7],@X[-3&7],8); # compose "X[-14]" in "X[0]" + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + &vadd_i32 (@Tx[1],@X[-1&7],$K); + eval(shift(@insns)); + &vld1_32 ("{$K\[]}","[$K_XX_XX,:32]!") if ($Xi%5==0); + eval(shift(@insns)); + &vext_8 (@Tx[0],@X[-1&7],$zero,4); # "X[-3]", 3 words + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + &veor (@X[0],@X[0],@X[-4&7]); # "X[0]"^="X[-16]" + eval(shift(@insns)); + eval(shift(@insns)); + &veor (@Tx[0],@Tx[0],@X[-2&7]); # "X[-3]"^"X[-8]" + eval(shift(@insns)); + eval(shift(@insns)); + &veor (@Tx[0],@Tx[0],@X[0]); # "X[0]"^="X[-3]"^"X[-8] + eval(shift(@insns)); + eval(shift(@insns)); + &vst1_32 ("{@Tx[1]}","[$Xfer,:128]!"); # X[]+K xfer + &sub ($Xfer,$Xfer,64) if ($Xi%4==0); + eval(shift(@insns)); + eval(shift(@insns)); + &vext_8 (@Tx[1],$zero,@Tx[0],4); # "X[0]"<<96, extract one dword + eval(shift(@insns)); + eval(shift(@insns)); + &vadd_i32 (@X[0],@Tx[0],@Tx[0]); + eval(shift(@insns)); + eval(shift(@insns)); + &vsri_32 (@X[0],@Tx[0],31); # "X[0]"<<<=1 + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + &vshr_u32 (@Tx[0],@Tx[1],30); + eval(shift(@insns)); + eval(shift(@insns)); + &vshl_u32 (@Tx[1],@Tx[1],2); + eval(shift(@insns)); + eval(shift(@insns)); + &veor (@X[0],@X[0],@Tx[0]); + eval(shift(@insns)); + eval(shift(@insns)); + &veor (@X[0],@X[0],@Tx[1]); # "X[0]"^=("X[0]">>96)<<<2 + + foreach (@insns) { eval; } # remaining instructions [if any] + + $Xi++; push(@X,shift(@X)); # "rotate" X[] +} + +sub Xupdate_32_79 () +{ use integer; + my $body = shift; + my @insns = (&$body,&$body,&$body,&$body); + my ($a,$b,$c,$d,$e); + + &vext_8 (@Tx[0],@X[-2&7],@X[-1&7],8); # compose "X[-6]" + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + &veor (@X[0],@X[0],@X[-4&7]); # "X[0]"="X[-32]"^"X[-16]" + eval(shift(@insns)); + eval(shift(@insns)); + &veor (@X[0],@X[0],@X[-7&7]); # "X[0]"^="X[-28]" + eval(shift(@insns)); + eval(shift(@insns)); + &vadd_i32 (@Tx[1],@X[-1&7],$K); + eval(shift(@insns)); + &vld1_32 ("{$K\[]}","[$K_XX_XX,:32]!") if ($Xi%5==0); + eval(shift(@insns)); + &veor (@Tx[0],@Tx[0],@X[0]); # "X[-6]"^="X[0]" + eval(shift(@insns)); + eval(shift(@insns)); + &vshr_u32 (@X[0],@Tx[0],30); + eval(shift(@insns)); + eval(shift(@insns)); + &vst1_32 ("{@Tx[1]}","[$Xfer,:128]!"); # X[]+K xfer + &sub ($Xfer,$Xfer,64) if ($Xi%4==0); + eval(shift(@insns)); + eval(shift(@insns)); + &vsli_32 (@X[0],@Tx[0],2); # "X[0]"="X[-6]"<<<2 + + foreach (@insns) { eval; } # remaining instructions [if any] + + $Xi++; push(@X,shift(@X)); # "rotate" X[] +} + +sub Xuplast_80 () +{ use integer; + my $body = shift; + my @insns = (&$body,&$body,&$body,&$body); + my ($a,$b,$c,$d,$e); + + &vadd_i32 (@Tx[1],@X[-1&7],$K); + eval(shift(@insns)); + eval(shift(@insns)); + &vst1_32 ("{@Tx[1]}","[$Xfer,:128]!"); + &sub ($Xfer,$Xfer,64); + + &teq ($inp,$len); + &sub ($K_XX_XX,$K_XX_XX,16); # rewind $K_XX_XX + &subeq ($inp,$inp,64); # reload last block to avoid SEGV + &vld1_8 ("{@X[-4&7]-@X[-3&7]}","[$inp]!"); + eval(shift(@insns)); + eval(shift(@insns)); + &vld1_8 ("{@X[-2&7]-@X[-1&7]}","[$inp]!"); + eval(shift(@insns)); + eval(shift(@insns)); + &vld1_32 ("{$K\[]}","[$K_XX_XX,:32]!"); # load K_00_19 + eval(shift(@insns)); + eval(shift(@insns)); + &vrev32_8 (@X[-4&7],@X[-4&7]); + + foreach (@insns) { eval; } # remaining instructions + + $Xi=0; +} + +sub Xloop() +{ use integer; + my $body = shift; + my @insns = (&$body,&$body,&$body,&$body); + my ($a,$b,$c,$d,$e); + + &vrev32_8 (@X[($Xi-3)&7],@X[($Xi-3)&7]); + eval(shift(@insns)); + eval(shift(@insns)); + &vadd_i32 (@X[$Xi&7],@X[($Xi-4)&7],$K); + eval(shift(@insns)); + eval(shift(@insns)); + &vst1_32 ("{@X[$Xi&7]}","[$Xfer,:128]!");# X[]+K xfer to IALU + + foreach (@insns) { eval; } + + $Xi++; +} + +$code.=<<___; +#if __ARM_MAX_ARCH__>=7 +.arch armv7-a +.fpu neon + +.type sha1_block_data_order_neon,%function +.align 4 +sha1_block_data_order_neon: +.LNEON: + stmdb sp!,{r4-r12,lr} + add $len,$inp,$len,lsl#6 @ $len to point at the end of $inp + @ dmb @ errata #451034 on early Cortex A8 + @ vstmdb sp!,{d8-d15} @ ABI specification says so + mov $saved_sp,sp + sub sp,sp,#64 @ alloca + adr $K_XX_XX,.LK_00_19 + bic sp,sp,#15 @ align for 128-bit stores + + ldmia $ctx,{$a,$b,$c,$d,$e} @ load context + mov $Xfer,sp + + vld1.8 {@X[-4&7]-@X[-3&7]},[$inp]! @ handles unaligned + veor $zero,$zero,$zero + vld1.8 {@X[-2&7]-@X[-1&7]},[$inp]! + vld1.32 {${K}\[]},[$K_XX_XX,:32]! @ load K_00_19 + vrev32.8 @X[-4&7],@X[-4&7] @ yes, even on + vrev32.8 @X[-3&7],@X[-3&7] @ big-endian... + vrev32.8 @X[-2&7],@X[-2&7] + vadd.i32 @X[0],@X[-4&7],$K + vrev32.8 @X[-1&7],@X[-1&7] + vadd.i32 @X[1],@X[-3&7],$K + vst1.32 {@X[0]},[$Xfer,:128]! + vadd.i32 @X[2],@X[-2&7],$K + vst1.32 {@X[1]},[$Xfer,:128]! + vst1.32 {@X[2]},[$Xfer,:128]! + ldr $Ki,[sp] @ big RAW stall + +.Loop_neon: +___ + &Xupdate_16_31(\&body_00_19); + &Xupdate_16_31(\&body_00_19); + &Xupdate_16_31(\&body_00_19); + &Xupdate_16_31(\&body_00_19); + &Xupdate_32_79(\&body_00_19); + &Xupdate_32_79(\&body_20_39); + &Xupdate_32_79(\&body_20_39); + &Xupdate_32_79(\&body_20_39); + &Xupdate_32_79(\&body_20_39); + &Xupdate_32_79(\&body_20_39); + &Xupdate_32_79(\&body_40_59); + &Xupdate_32_79(\&body_40_59); + &Xupdate_32_79(\&body_40_59); + &Xupdate_32_79(\&body_40_59); + &Xupdate_32_79(\&body_40_59); + &Xupdate_32_79(\&body_20_39); + &Xuplast_80(\&body_20_39); + &Xloop(\&body_20_39); + &Xloop(\&body_20_39); + &Xloop(\&body_20_39); +$code.=<<___; + ldmia $ctx,{$Ki,$t0,$t1,$Xfer} @ accumulate context + add $a,$a,$Ki + ldr $Ki,[$ctx,#16] + add $b,$b,$t0 + add $c,$c,$t1 + add $d,$d,$Xfer + moveq sp,$saved_sp + add $e,$e,$Ki + ldrne $Ki,[sp] + stmia $ctx,{$a,$b,$c,$d,$e} + addne $Xfer,sp,#3*16 + bne .Loop_neon + + @ vldmia sp!,{d8-d15} + ldmia sp!,{r4-r12,pc} +.size sha1_block_data_order_neon,.-sha1_block_data_order_neon +#endif +___ +}}} +##################################################################### +# ARMv8 stuff +# +{{{ +my ($ABCD,$E,$E0,$E1)=map("q$_",(0..3)); +my @MSG=map("q$_",(4..7)); +my @Kxx=map("q$_",(8..11)); +my ($W0,$W1,$ABCD_SAVE)=map("q$_",(12..14)); + +$code.=<<___; +#if __ARM_MAX_ARCH__>=7 +.type sha1_block_data_order_armv8,%function +.align 5 +sha1_block_data_order_armv8: +.LARMv8: + vstmdb sp!,{d8-d15} @ ABI specification says so + + veor $E,$E,$E + adr r3,.LK_00_19 + vld1.32 {$ABCD},[$ctx]! + vld1.32 {$E\[0]},[$ctx] + sub $ctx,$ctx,#16 + vld1.32 {@Kxx[0]\[]},[r3,:32]! + vld1.32 {@Kxx[1]\[]},[r3,:32]! + vld1.32 {@Kxx[2]\[]},[r3,:32]! + vld1.32 {@Kxx[3]\[]},[r3,:32] + +.Loop_v8: + vld1.8 {@MSG[0]-@MSG[1]},[$inp]! + vld1.8 {@MSG[2]-@MSG[3]},[$inp]! + vrev32.8 @MSG[0],@MSG[0] + vrev32.8 @MSG[1],@MSG[1] + + vadd.i32 $W0,@Kxx[0],@MSG[0] + vrev32.8 @MSG[2],@MSG[2] + vmov $ABCD_SAVE,$ABCD @ offload + subs $len,$len,#1 + + vadd.i32 $W1,@Kxx[0],@MSG[1] + vrev32.8 @MSG[3],@MSG[3] + sha1h $E1,$ABCD @ 0 + sha1c $ABCD,$E,$W0 + vadd.i32 $W0,@Kxx[$j],@MSG[2] + sha1su0 @MSG[0],@MSG[1],@MSG[2] +___ +for ($j=0,$i=1;$i<20-3;$i++) { +my $f=("c","p","m","p")[$i/5]; +$code.=<<___; + sha1h $E0,$ABCD @ $i + sha1$f $ABCD,$E1,$W1 + vadd.i32 $W1,@Kxx[$j],@MSG[3] + sha1su1 @MSG[0],@MSG[3] +___ +$code.=<<___ if ($i<20-4); + sha1su0 @MSG[1],@MSG[2],@MSG[3] ___ + ($E0,$E1)=($E1,$E0); ($W0,$W1)=($W1,$W0); + push(@MSG,shift(@MSG)); $j++ if ((($i+3)%5)==0); +} +$code.=<<___; + sha1h $E0,$ABCD @ $i + sha1p $ABCD,$E1,$W1 + vadd.i32 $W1,@Kxx[$j],@MSG[3] + + sha1h $E1,$ABCD @ 18 + sha1p $ABCD,$E0,$W0 + + sha1h $E0,$ABCD @ 19 + sha1p $ABCD,$E1,$W1 + + vadd.i32 $E,$E,$E0 + vadd.i32 $ABCD,$ABCD,$ABCD_SAVE + bne .Loop_v8 + + vst1.32 {$ABCD},[$ctx]! + vst1.32 {$E\[0]},[$ctx] + + vldmia sp!,{d8-d15} + ret @ bx lr +.size sha1_block_data_order_armv8,.-sha1_block_data_order_armv8 +#endif +___ +}}} +$code.=<<___; +#if __ARM_MAX_ARCH__>=7 +.comm OPENSSL_armcap_P,4,4 +#endif +___ + +{ my %opcode = ( + "sha1c" => 0xf2000c40, "sha1p" => 0xf2100c40, + "sha1m" => 0xf2200c40, "sha1su0" => 0xf2300c40, + "sha1h" => 0xf3b902c0, "sha1su1" => 0xf3ba0380 ); + + sub unsha1 { + my ($mnemonic,$arg)=@_; + + if ($arg =~ m/q([0-9]+)(?:,\s*q([0-9]+))?,\s*q([0-9]+)/o) { + my $word = $opcode{$mnemonic}|(($1&7)<<13)|(($1&8)<<19) + |(($2&7)<<17)|(($2&8)<<4) + |(($3&7)<<1) |(($3&8)<<2); + # since ARMv7 instructions are always encoded little-endian. + # correct solution is to use .inst directive, but older + # assemblers don't implement it:-( + sprintf ".byte\t0x%02x,0x%02x,0x%02x,0x%02x\t@ %s %s", + $word&0xff,($word>>8)&0xff, + ($word>>16)&0xff,($word>>24)&0xff, + $mnemonic,$arg; + } + } +} + +foreach (split($/,$code)) { + s/{q([0-9]+)\[\]}/sprintf "{d%d[],d%d[]}",2*$1,2*$1+1/eo or + s/{q([0-9]+)\[0\]}/sprintf "{d%d[0]}",2*$1/eo; + + s/\b(sha1\w+)\s+(q.*)/unsha1($1,$2)/geo; + + s/\bret\b/bx lr/o or + s/\bbx\s+lr\b/.word\t0xe12fff1e/o; # make it possible to compile with -march=armv4 + + print $_,$/; +} -$code =~ s/\bbx\s+lr\b/.word\t0xe12fff1e/gm; # make it possible to compile with -march=armv4 -print $code; close STDOUT; # enforce flush diff --git a/deps/openssl/openssl/crypto/sha/asm/sha1-armv8.pl b/deps/openssl/openssl/crypto/sha/asm/sha1-armv8.pl new file mode 100644 index 00000000000000..deb1238d361e45 --- /dev/null +++ b/deps/openssl/openssl/crypto/sha/asm/sha1-armv8.pl @@ -0,0 +1,334 @@ +#!/usr/bin/env perl +# +# ==================================================================== +# Written by Andy Polyakov for the OpenSSL +# project. The module is, however, dual licensed under OpenSSL and +# CRYPTOGAMS licenses depending on where you obtain it. For further +# details see http://www.openssl.org/~appro/cryptogams/. +# ==================================================================== +# +# SHA1 for ARMv8. +# +# Performance in cycles per processed byte and improvement coefficient +# over code generated with "default" compiler: +# +# hardware-assisted software(*) +# Apple A7 2.31 4.13 (+14%) +# Cortex-A53 2.19 8.73 (+108%) +# Cortex-A57 2.35 7.88 (+74%) +# +# (*) Software results are presented mostly for reference purposes. + +$flavour = shift; +open STDOUT,">".shift; + +($ctx,$inp,$num)=("x0","x1","x2"); +@Xw=map("w$_",(3..17,19)); +@Xx=map("x$_",(3..17,19)); +@V=($A,$B,$C,$D,$E)=map("w$_",(20..24)); +($t0,$t1,$t2,$K)=map("w$_",(25..28)); + + +sub BODY_00_19 { +my ($i,$a,$b,$c,$d,$e)=@_; +my $j=($i+2)&15; + +$code.=<<___ if ($i<15 && !($i&1)); + lsr @Xx[$i+1],@Xx[$i],#32 +___ +$code.=<<___ if ($i<14 && !($i&1)); + ldr @Xx[$i+2],[$inp,#`($i+2)*4-64`] +___ +$code.=<<___ if ($i<14 && ($i&1)); +#ifdef __ARMEB__ + ror @Xx[$i+1],@Xx[$i+1],#32 +#else + rev32 @Xx[$i+1],@Xx[$i+1] +#endif +___ +$code.=<<___ if ($i<14); + bic $t0,$d,$b + and $t1,$c,$b + ror $t2,$a,#27 + add $d,$d,$K // future e+=K + orr $t0,$t0,$t1 + add $e,$e,$t2 // e+=rot(a,5) + ror $b,$b,#2 + add $d,$d,@Xw[($i+1)&15] // future e+=X[i] + add $e,$e,$t0 // e+=F(b,c,d) +___ +$code.=<<___ if ($i==19); + movz $K,#0xeba1 + movk $K,#0x6ed9,lsl#16 +___ +$code.=<<___ if ($i>=14); + eor @Xw[$j],@Xw[$j],@Xw[($j+2)&15] + bic $t0,$d,$b + and $t1,$c,$b + ror $t2,$a,#27 + eor @Xw[$j],@Xw[$j],@Xw[($j+8)&15] + add $d,$d,$K // future e+=K + orr $t0,$t0,$t1 + add $e,$e,$t2 // e+=rot(a,5) + eor @Xw[$j],@Xw[$j],@Xw[($j+13)&15] + ror $b,$b,#2 + add $d,$d,@Xw[($i+1)&15] // future e+=X[i] + add $e,$e,$t0 // e+=F(b,c,d) + ror @Xw[$j],@Xw[$j],#31 +___ +} + +sub BODY_40_59 { +my ($i,$a,$b,$c,$d,$e)=@_; +my $j=($i+2)&15; + +$code.=<<___ if ($i==59); + movz $K,#0xc1d6 + movk $K,#0xca62,lsl#16 +___ +$code.=<<___; + orr $t0,$b,$c + and $t1,$b,$c + eor @Xw[$j],@Xw[$j],@Xw[($j+2)&15] + ror $t2,$a,#27 + and $t0,$t0,$d + add $d,$d,$K // future e+=K + eor @Xw[$j],@Xw[$j],@Xw[($j+8)&15] + add $e,$e,$t2 // e+=rot(a,5) + orr $t0,$t0,$t1 + ror $b,$b,#2 + eor @Xw[$j],@Xw[$j],@Xw[($j+13)&15] + add $d,$d,@Xw[($i+1)&15] // future e+=X[i] + add $e,$e,$t0 // e+=F(b,c,d) + ror @Xw[$j],@Xw[$j],#31 +___ +} + +sub BODY_20_39 { +my ($i,$a,$b,$c,$d,$e)=@_; +my $j=($i+2)&15; + +$code.=<<___ if ($i==39); + movz $K,#0xbcdc + movk $K,#0x8f1b,lsl#16 +___ +$code.=<<___ if ($i<78); + eor @Xw[$j],@Xw[$j],@Xw[($j+2)&15] + eor $t0,$d,$b + ror $t2,$a,#27 + add $d,$d,$K // future e+=K + eor @Xw[$j],@Xw[$j],@Xw[($j+8)&15] + eor $t0,$t0,$c + add $e,$e,$t2 // e+=rot(a,5) + ror $b,$b,#2 + eor @Xw[$j],@Xw[$j],@Xw[($j+13)&15] + add $d,$d,@Xw[($i+1)&15] // future e+=X[i] + add $e,$e,$t0 // e+=F(b,c,d) + ror @Xw[$j],@Xw[$j],#31 +___ +$code.=<<___ if ($i==78); + ldp @Xw[1],@Xw[2],[$ctx] + eor $t0,$d,$b + ror $t2,$a,#27 + add $d,$d,$K // future e+=K + eor $t0,$t0,$c + add $e,$e,$t2 // e+=rot(a,5) + ror $b,$b,#2 + add $d,$d,@Xw[($i+1)&15] // future e+=X[i] + add $e,$e,$t0 // e+=F(b,c,d) +___ +$code.=<<___ if ($i==79); + ldp @Xw[3],@Xw[4],[$ctx,#8] + eor $t0,$d,$b + ror $t2,$a,#27 + eor $t0,$t0,$c + add $e,$e,$t2 // e+=rot(a,5) + ror $b,$b,#2 + ldr @Xw[5],[$ctx,#16] + add $e,$e,$t0 // e+=F(b,c,d) +___ +} + +$code.=<<___; +#include "arm_arch.h" + +.text + +.globl sha1_block_data_order +.type sha1_block_data_order,%function +.align 6 +sha1_block_data_order: + ldr x16,.LOPENSSL_armcap_P + adr x17,.LOPENSSL_armcap_P + add x16,x16,x17 + ldr w16,[x16] + tst w16,#ARMV8_SHA1 + b.ne .Lv8_entry + + stp x29,x30,[sp,#-96]! + add x29,sp,#0 + stp x19,x20,[sp,#16] + stp x21,x22,[sp,#32] + stp x23,x24,[sp,#48] + stp x25,x26,[sp,#64] + stp x27,x28,[sp,#80] + + ldp $A,$B,[$ctx] + ldp $C,$D,[$ctx,#8] + ldr $E,[$ctx,#16] + +.Loop: + ldr @Xx[0],[$inp],#64 + movz $K,#0x7999 + sub $num,$num,#1 + movk $K,#0x5a82,lsl#16 +#ifdef __ARMEB__ + ror $Xx[0],@Xx[0],#32 +#else + rev32 @Xx[0],@Xx[0] +#endif + add $E,$E,$K // warm it up + add $E,$E,@Xw[0] +___ +for($i=0;$i<20;$i++) { &BODY_00_19($i,@V); unshift(@V,pop(@V)); } +for(;$i<40;$i++) { &BODY_20_39($i,@V); unshift(@V,pop(@V)); } +for(;$i<60;$i++) { &BODY_40_59($i,@V); unshift(@V,pop(@V)); } +for(;$i<80;$i++) { &BODY_20_39($i,@V); unshift(@V,pop(@V)); } +$code.=<<___; + add $B,$B,@Xw[2] + add $C,$C,@Xw[3] + add $A,$A,@Xw[1] + add $D,$D,@Xw[4] + add $E,$E,@Xw[5] + stp $A,$B,[$ctx] + stp $C,$D,[$ctx,#8] + str $E,[$ctx,#16] + cbnz $num,.Loop + + ldp x19,x20,[sp,#16] + ldp x21,x22,[sp,#32] + ldp x23,x24,[sp,#48] + ldp x25,x26,[sp,#64] + ldp x27,x28,[sp,#80] + ldr x29,[sp],#96 + ret +.size sha1_block_data_order,.-sha1_block_data_order +___ +{{{ +my ($ABCD,$E,$E0,$E1)=map("v$_.16b",(0..3)); +my @MSG=map("v$_.16b",(4..7)); +my @Kxx=map("v$_.4s",(16..19)); +my ($W0,$W1)=("v20.4s","v21.4s"); +my $ABCD_SAVE="v22.16b"; + +$code.=<<___; +.type sha1_block_armv8,%function +.align 6 +sha1_block_armv8: +.Lv8_entry: + stp x29,x30,[sp,#-16]! + add x29,sp,#0 + + adr x4,.Lconst + eor $E,$E,$E + ld1.32 {$ABCD},[$ctx],#16 + ld1.32 {$E}[0],[$ctx] + sub $ctx,$ctx,#16 + ld1.32 {@Kxx[0]-@Kxx[3]},[x4] + +.Loop_hw: + ld1 {@MSG[0]-@MSG[3]},[$inp],#64 + sub $num,$num,#1 + rev32 @MSG[0],@MSG[0] + rev32 @MSG[1],@MSG[1] + + add.i32 $W0,@Kxx[0],@MSG[0] + rev32 @MSG[2],@MSG[2] + orr $ABCD_SAVE,$ABCD,$ABCD // offload + + add.i32 $W1,@Kxx[0],@MSG[1] + rev32 @MSG[3],@MSG[3] + sha1h $E1,$ABCD + sha1c $ABCD,$E,$W0 // 0 + add.i32 $W0,@Kxx[$j],@MSG[2] + sha1su0 @MSG[0],@MSG[1],@MSG[2] +___ +for ($j=0,$i=1;$i<20-3;$i++) { +my $f=("c","p","m","p")[$i/5]; +$code.=<<___; + sha1h $E0,$ABCD // $i + sha1$f $ABCD,$E1,$W1 + add.i32 $W1,@Kxx[$j],@MSG[3] + sha1su1 @MSG[0],@MSG[3] +___ +$code.=<<___ if ($i<20-4); + sha1su0 @MSG[1],@MSG[2],@MSG[3] +___ + ($E0,$E1)=($E1,$E0); ($W0,$W1)=($W1,$W0); + push(@MSG,shift(@MSG)); $j++ if ((($i+3)%5)==0); +} +$code.=<<___; + sha1h $E0,$ABCD // $i + sha1p $ABCD,$E1,$W1 + add.i32 $W1,@Kxx[$j],@MSG[3] + + sha1h $E1,$ABCD // 18 + sha1p $ABCD,$E0,$W0 + + sha1h $E0,$ABCD // 19 + sha1p $ABCD,$E1,$W1 + + add.i32 $E,$E,$E0 + add.i32 $ABCD,$ABCD,$ABCD_SAVE + + cbnz $num,.Loop_hw + + st1.32 {$ABCD},[$ctx],#16 + st1.32 {$E}[0],[$ctx] + + ldr x29,[sp],#16 + ret +.size sha1_block_armv8,.-sha1_block_armv8 +.align 6 +.Lconst: +.long 0x5a827999,0x5a827999,0x5a827999,0x5a827999 //K_00_19 +.long 0x6ed9eba1,0x6ed9eba1,0x6ed9eba1,0x6ed9eba1 //K_20_39 +.long 0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc //K_40_59 +.long 0xca62c1d6,0xca62c1d6,0xca62c1d6,0xca62c1d6 //K_60_79 +.LOPENSSL_armcap_P: +.quad OPENSSL_armcap_P-. +.asciz "SHA1 block transform for ARMv8, CRYPTOGAMS by " +.align 2 +.comm OPENSSL_armcap_P,4,4 +___ +}}} + +{ my %opcode = ( + "sha1c" => 0x5e000000, "sha1p" => 0x5e001000, + "sha1m" => 0x5e002000, "sha1su0" => 0x5e003000, + "sha1h" => 0x5e280800, "sha1su1" => 0x5e281800 ); + + sub unsha1 { + my ($mnemonic,$arg)=@_; + + $arg =~ m/[qv]([0-9]+)[^,]*,\s*[qv]([0-9]+)[^,]*(?:,\s*[qv]([0-9]+))?/o + && + sprintf ".inst\t0x%08x\t//%s %s", + $opcode{$mnemonic}|$1|($2<<5)|($3<<16), + $mnemonic,$arg; + } +} + +foreach(split("\n",$code)) { + + s/\`([^\`]*)\`/eval($1)/geo; + + s/\b(sha1\w+)\s+([qv].*)/unsha1($1,$2)/geo; + + s/\.\w?32\b//o and s/\.16b/\.4s/go; + m/(ld|st)1[^\[]+\[0\]/o and s/\.4s/\.s/go; + + print $_,"\n"; +} + +close STDOUT; diff --git a/deps/openssl/openssl/crypto/sha/asm/sha1-mb-x86_64.pl b/deps/openssl/openssl/crypto/sha/asm/sha1-mb-x86_64.pl new file mode 100644 index 00000000000000..a8ee075eaaa0a0 --- /dev/null +++ b/deps/openssl/openssl/crypto/sha/asm/sha1-mb-x86_64.pl @@ -0,0 +1,1574 @@ +#!/usr/bin/env perl + +# ==================================================================== +# Written by Andy Polyakov for the OpenSSL +# project. The module is, however, dual licensed under OpenSSL and +# CRYPTOGAMS licenses depending on where you obtain it. For further +# details see http://www.openssl.org/~appro/cryptogams/. +# ==================================================================== + +# Multi-buffer SHA1 procedure processes n buffers in parallel by +# placing buffer data to designated lane of SIMD register. n is +# naturally limited to 4 on pre-AVX2 processors and to 8 on +# AVX2-capable processors such as Haswell. +# +# this +aesni(i) sha1 aesni-sha1 gain(iv) +# ------------------------------------------------------------------- +# Westmere(ii) 10.7/n +1.28=3.96(n=4) 5.30 6.66 +68% +# Atom(ii) 18.1/n +3.93=8.46(n=4) 9.37 12.8 +51% +# Sandy Bridge (8.16 +5.15=13.3)/n 4.99 5.98 +80% +# Ivy Bridge (8.08 +5.14=13.2)/n 4.60 5.54 +68% +# Haswell(iii) (8.96 +5.00=14.0)/n 3.57 4.55 +160% +# Bulldozer (9.76 +5.76=15.5)/n 5.95 6.37 +64% +# +# (i) multi-block CBC encrypt with 128-bit key; +# (ii) (HASH+AES)/n does not apply to Westmere for n>3 and Atom, +# because of lower AES-NI instruction throughput; +# (iii) "this" is for n=8, when we gather twice as much data, result +# for n=4 is 8.00+4.44=12.4; +# (iv) presented improvement coefficients are asymptotic limits and +# in real-life application are somewhat lower, e.g. for 2KB +# fragments they range from 30% to 100% (on Haswell); + +$flavour = shift; +$output = shift; +if ($flavour =~ /\./) { $output = $flavour; undef $flavour; } + +$win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/); + +$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; +( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or +( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or +die "can't locate x86_64-xlate.pl"; + +$avx=0; + +if (`$ENV{CC} -Wa,-v -c -o /dev/null -x assembler /dev/null 2>&1` + =~ /GNU assembler version ([2-9]\.[0-9]+)/) { + $avx = ($1>=2.19) + ($1>=2.22); +} + +if (!$avx && $win64 && ($flavour =~ /nasm/ || $ENV{ASM} =~ /nasm/) && + `nasm -v 2>&1` =~ /NASM version ([2-9]\.[0-9]+)/) { + $avx = ($1>=2.09) + ($1>=2.10); +} + +if (!$avx && $win64 && ($flavour =~ /masm/ || $ENV{ASM} =~ /ml64/) && + `ml64 2>&1` =~ /Version ([0-9]+)\./) { + $avx = ($1>=10) + ($1>=11); +} + +if (!$avx && `$ENV{CC} -v 2>&1` =~ /(^clang version|based on LLVM) ([3-9]\.[0-9]+)/) { + $avx = ($2>=3.0) + ($2>3.0); +} + +open OUT,"| \"$^X\" $xlate $flavour $output"; +*STDOUT=*OUT; + +# void sha1_multi_block ( +# struct { unsigned int A[8]; +# unsigned int B[8]; +# unsigned int C[8]; +# unsigned int D[8]; +# unsigned int E[8]; } *ctx, +# struct { void *ptr; int blocks; } inp[8], +# int num); /* 1 or 2 */ +# +$ctx="%rdi"; # 1st arg +$inp="%rsi"; # 2nd arg +$num="%edx"; +@ptr=map("%r$_",(8..11)); +$Tbl="%rbp"; + +@V=($A,$B,$C,$D,$E)=map("%xmm$_",(0..4)); +($t0,$t1,$t2,$t3,$tx)=map("%xmm$_",(5..9)); +@Xi=map("%xmm$_",(10..14)); +$K="%xmm15"; + +if (1) { + # Atom-specific optimization aiming to eliminate pshufb with high + # registers [and thus get rid of 48 cycles accumulated penalty] + @Xi=map("%xmm$_",(0..4)); + ($tx,$t0,$t1,$t2,$t3)=map("%xmm$_",(5..9)); + @V=($A,$B,$C,$D,$E)=map("%xmm$_",(10..14)); +} + +$REG_SZ=16; + +sub Xi_off { +my $off = shift; + + $off %= 16; $off *= $REG_SZ; + $off<256 ? "$off-128(%rax)" : "$off-256-128(%rbx)"; +} + +sub BODY_00_19 { +my ($i,$a,$b,$c,$d,$e)=@_; +my $j=$i+1; +my $k=$i+2; + +# Loads are performed 2+3/4 iterations in advance. 3/4 means that out +# of 4 words you would expect to be loaded per given iteration one is +# spilled to next iteration. In other words indices in four input +# streams are distributed as following: +# +# $i==0: 0,0,0,0,1,1,1,1,2,2,2, +# $i==1: 2,3,3,3, +# $i==2: 3,4,4,4, +# ... +# $i==13: 14,15,15,15, +# $i==14: 15 +# +# Then at $i==15 Xupdate is applied one iteration in advance... +$code.=<<___ if ($i==0); + movd (@ptr[0]),@Xi[0] + lea `16*4`(@ptr[0]),@ptr[0] + movd (@ptr[1]),@Xi[2] # borrow @Xi[2] + lea `16*4`(@ptr[1]),@ptr[1] + movd (@ptr[2]),@Xi[3] # borrow @Xi[3] + lea `16*4`(@ptr[2]),@ptr[2] + movd (@ptr[3]),@Xi[4] # borrow @Xi[4] + lea `16*4`(@ptr[3]),@ptr[3] + punpckldq @Xi[3],@Xi[0] + movd `4*$j-16*4`(@ptr[0]),@Xi[1] + punpckldq @Xi[4],@Xi[2] + movd `4*$j-16*4`(@ptr[1]),$t3 + punpckldq @Xi[2],@Xi[0] + movd `4*$j-16*4`(@ptr[2]),$t2 + pshufb $tx,@Xi[0] +___ +$code.=<<___ if ($i<14); # just load input + movd `4*$j-16*4`(@ptr[3]),$t1 + punpckldq $t2,@Xi[1] + movdqa $a,$t2 + paddd $K,$e # e+=K_00_19 + punpckldq $t1,$t3 + movdqa $b,$t1 + movdqa $b,$t0 + pslld \$5,$t2 + pandn $d,$t1 + pand $c,$t0 + punpckldq $t3,@Xi[1] + movdqa $a,$t3 + + movdqa @Xi[0],`&Xi_off($i)` + paddd @Xi[0],$e # e+=X[i] + movd `4*$k-16*4`(@ptr[0]),@Xi[2] + psrld \$27,$t3 + pxor $t1,$t0 # Ch(b,c,d) + movdqa $b,$t1 + + por $t3,$t2 # rol(a,5) + movd `4*$k-16*4`(@ptr[1]),$t3 + pslld \$30,$t1 + paddd $t0,$e # e+=Ch(b,c,d) + + psrld \$2,$b + paddd $t2,$e # e+=rol(a,5) + pshufb $tx,@Xi[1] + movd `4*$k-16*4`(@ptr[2]),$t2 + por $t1,$b # b=rol(b,30) +___ +$code.=<<___ if ($i==14); # just load input + movd `4*$j-16*4`(@ptr[3]),$t1 + punpckldq $t2,@Xi[1] + movdqa $a,$t2 + paddd $K,$e # e+=K_00_19 + punpckldq $t1,$t3 + movdqa $b,$t1 + movdqa $b,$t0 + pslld \$5,$t2 + prefetcht0 63(@ptr[0]) + pandn $d,$t1 + pand $c,$t0 + punpckldq $t3,@Xi[1] + movdqa $a,$t3 + + movdqa @Xi[0],`&Xi_off($i)` + paddd @Xi[0],$e # e+=X[i] + psrld \$27,$t3 + pxor $t1,$t0 # Ch(b,c,d) + movdqa $b,$t1 + prefetcht0 63(@ptr[1]) + + por $t3,$t2 # rol(a,5) + pslld \$30,$t1 + paddd $t0,$e # e+=Ch(b,c,d) + prefetcht0 63(@ptr[2]) + + psrld \$2,$b + paddd $t2,$e # e+=rol(a,5) + pshufb $tx,@Xi[1] + prefetcht0 63(@ptr[3]) + por $t1,$b # b=rol(b,30) +___ +$code.=<<___ if ($i>=13 && $i<15); + movdqa `&Xi_off($j+2)`,@Xi[3] # preload "X[2]" +___ +$code.=<<___ if ($i>=15); # apply Xupdate + pxor @Xi[-2],@Xi[1] # "X[13]" + movdqa `&Xi_off($j+2)`,@Xi[3] # "X[2]" + + movdqa $a,$t2 + pxor `&Xi_off($j+8)`,@Xi[1] + paddd $K,$e # e+=K_00_19 + movdqa $b,$t1 + pslld \$5,$t2 + pxor @Xi[3],@Xi[1] + movdqa $b,$t0 + pandn $d,$t1 + movdqa @Xi[1],$tx + pand $c,$t0 + movdqa $a,$t3 + psrld \$31,$tx + paddd @Xi[1],@Xi[1] + + movdqa @Xi[0],`&Xi_off($i)` + paddd @Xi[0],$e # e+=X[i] + psrld \$27,$t3 + pxor $t1,$t0 # Ch(b,c,d) + + movdqa $b,$t1 + por $t3,$t2 # rol(a,5) + pslld \$30,$t1 + paddd $t0,$e # e+=Ch(b,c,d) + + psrld \$2,$b + paddd $t2,$e # e+=rol(a,5) + por $tx,@Xi[1] # rol \$1,@Xi[1] + por $t1,$b # b=rol(b,30) +___ +push(@Xi,shift(@Xi)); +} + +sub BODY_20_39 { +my ($i,$a,$b,$c,$d,$e)=@_; +my $j=$i+1; + +$code.=<<___ if ($i<79); + pxor @Xi[-2],@Xi[1] # "X[13]" + movdqa `&Xi_off($j+2)`,@Xi[3] # "X[2]" + + movdqa $a,$t2 + movdqa $d,$t0 + pxor `&Xi_off($j+8)`,@Xi[1] + paddd $K,$e # e+=K_20_39 + pslld \$5,$t2 + pxor $b,$t0 + + movdqa $a,$t3 +___ +$code.=<<___ if ($i<72); + movdqa @Xi[0],`&Xi_off($i)` +___ +$code.=<<___ if ($i<79); + paddd @Xi[0],$e # e+=X[i] + pxor @Xi[3],@Xi[1] + psrld \$27,$t3 + pxor $c,$t0 # Parity(b,c,d) + movdqa $b,$t1 + + pslld \$30,$t1 + movdqa @Xi[1],$tx + por $t3,$t2 # rol(a,5) + psrld \$31,$tx + paddd $t0,$e # e+=Parity(b,c,d) + paddd @Xi[1],@Xi[1] + + psrld \$2,$b + paddd $t2,$e # e+=rol(a,5) + por $tx,@Xi[1] # rol(@Xi[1],1) + por $t1,$b # b=rol(b,30) +___ +$code.=<<___ if ($i==79); + movdqa $a,$t2 + paddd $K,$e # e+=K_20_39 + movdqa $d,$t0 + pslld \$5,$t2 + pxor $b,$t0 + + movdqa $a,$t3 + paddd @Xi[0],$e # e+=X[i] + psrld \$27,$t3 + movdqa $b,$t1 + pxor $c,$t0 # Parity(b,c,d) + + pslld \$30,$t1 + por $t3,$t2 # rol(a,5) + paddd $t0,$e # e+=Parity(b,c,d) + + psrld \$2,$b + paddd $t2,$e # e+=rol(a,5) + por $t1,$b # b=rol(b,30) +___ +push(@Xi,shift(@Xi)); +} + +sub BODY_40_59 { +my ($i,$a,$b,$c,$d,$e)=@_; +my $j=$i+1; + +$code.=<<___; + pxor @Xi[-2],@Xi[1] # "X[13]" + movdqa `&Xi_off($j+2)`,@Xi[3] # "X[2]" + + movdqa $a,$t2 + movdqa $d,$t1 + pxor `&Xi_off($j+8)`,@Xi[1] + pxor @Xi[3],@Xi[1] + paddd $K,$e # e+=K_40_59 + pslld \$5,$t2 + movdqa $a,$t3 + pand $c,$t1 + + movdqa $d,$t0 + movdqa @Xi[1],$tx + psrld \$27,$t3 + paddd $t1,$e + pxor $c,$t0 + + movdqa @Xi[0],`&Xi_off($i)` + paddd @Xi[0],$e # e+=X[i] + por $t3,$t2 # rol(a,5) + psrld \$31,$tx + pand $b,$t0 + movdqa $b,$t1 + + pslld \$30,$t1 + paddd @Xi[1],@Xi[1] + paddd $t0,$e # e+=Maj(b,d,c) + + psrld \$2,$b + paddd $t2,$e # e+=rol(a,5) + por $tx,@Xi[1] # rol(@X[1],1) + por $t1,$b # b=rol(b,30) +___ +push(@Xi,shift(@Xi)); +} + +$code.=<<___; +.text + +.extern OPENSSL_ia32cap_P + +.globl sha1_multi_block +.type sha1_multi_block,\@function,3 +.align 32 +sha1_multi_block: + mov OPENSSL_ia32cap_P+4(%rip),%rcx + bt \$61,%rcx # check SHA bit + jc _shaext_shortcut +___ +$code.=<<___ if ($avx); + test \$`1<<28`,%ecx + jnz _avx_shortcut +___ +$code.=<<___; + mov %rsp,%rax + push %rbx + push %rbp +___ +$code.=<<___ if ($win64); + lea -0xa8(%rsp),%rsp + movaps %xmm6,(%rsp) + movaps %xmm7,0x10(%rsp) + movaps %xmm8,0x20(%rsp) + movaps %xmm9,0x30(%rsp) + movaps %xmm10,-0x78(%rax) + movaps %xmm11,-0x68(%rax) + movaps %xmm12,-0x58(%rax) + movaps %xmm13,-0x48(%rax) + movaps %xmm14,-0x38(%rax) + movaps %xmm15,-0x28(%rax) +___ +$code.=<<___; + sub \$`$REG_SZ*18`,%rsp + and \$-256,%rsp + mov %rax,`$REG_SZ*17`(%rsp) # original %rsp +.Lbody: + lea K_XX_XX(%rip),$Tbl + lea `$REG_SZ*16`(%rsp),%rbx + +.Loop_grande: + mov $num,`$REG_SZ*17+8`(%rsp) # original $num + xor $num,$num +___ +for($i=0;$i<4;$i++) { + $code.=<<___; + mov `16*$i+0`($inp),@ptr[$i] # input pointer + mov `16*$i+8`($inp),%ecx # number of blocks + cmp $num,%ecx + cmovg %ecx,$num # find maximum + test %ecx,%ecx + mov %ecx,`4*$i`(%rbx) # initialize counters + cmovle $Tbl,@ptr[$i] # cancel input +___ +} +$code.=<<___; + test $num,$num + jz .Ldone + + movdqu 0x00($ctx),$A # load context + lea 128(%rsp),%rax + movdqu 0x20($ctx),$B + movdqu 0x40($ctx),$C + movdqu 0x60($ctx),$D + movdqu 0x80($ctx),$E + movdqa 0x60($Tbl),$tx # pbswap_mask + movdqa -0x20($Tbl),$K # K_00_19 + jmp .Loop + +.align 32 +.Loop: +___ +for($i=0;$i<20;$i++) { &BODY_00_19($i,@V); unshift(@V,pop(@V)); } +$code.=" movdqa 0x00($Tbl),$K\n"; # K_20_39 +for(;$i<40;$i++) { &BODY_20_39($i,@V); unshift(@V,pop(@V)); } +$code.=" movdqa 0x20($Tbl),$K\n"; # K_40_59 +for(;$i<60;$i++) { &BODY_40_59($i,@V); unshift(@V,pop(@V)); } +$code.=" movdqa 0x40($Tbl),$K\n"; # K_60_79 +for(;$i<80;$i++) { &BODY_20_39($i,@V); unshift(@V,pop(@V)); } +$code.=<<___; + movdqa (%rbx),@Xi[0] # pull counters + mov \$1,%ecx + cmp 4*0(%rbx),%ecx # examinte counters + pxor $t2,$t2 + cmovge $Tbl,@ptr[0] # cancel input + cmp 4*1(%rbx),%ecx + movdqa @Xi[0],@Xi[1] + cmovge $Tbl,@ptr[1] + cmp 4*2(%rbx),%ecx + pcmpgtd $t2,@Xi[1] # mask value + cmovge $Tbl,@ptr[2] + cmp 4*3(%rbx),%ecx + paddd @Xi[1],@Xi[0] # counters-- + cmovge $Tbl,@ptr[3] + + movdqu 0x00($ctx),$t0 + pand @Xi[1],$A + movdqu 0x20($ctx),$t1 + pand @Xi[1],$B + paddd $t0,$A + movdqu 0x40($ctx),$t2 + pand @Xi[1],$C + paddd $t1,$B + movdqu 0x60($ctx),$t3 + pand @Xi[1],$D + paddd $t2,$C + movdqu 0x80($ctx),$tx + pand @Xi[1],$E + movdqu $A,0x00($ctx) + paddd $t3,$D + movdqu $B,0x20($ctx) + paddd $tx,$E + movdqu $C,0x40($ctx) + movdqu $D,0x60($ctx) + movdqu $E,0x80($ctx) + + movdqa @Xi[0],(%rbx) # save counters + movdqa 0x60($Tbl),$tx # pbswap_mask + movdqa -0x20($Tbl),$K # K_00_19 + dec $num + jnz .Loop + + mov `$REG_SZ*17+8`(%rsp),$num + lea $REG_SZ($ctx),$ctx + lea `16*$REG_SZ/4`($inp),$inp + dec $num + jnz .Loop_grande + +.Ldone: + mov `$REG_SZ*17`(%rsp),%rax # orignal %rsp +___ +$code.=<<___ if ($win64); + movaps -0xb8(%rax),%xmm6 + movaps -0xa8(%rax),%xmm7 + movaps -0x98(%rax),%xmm8 + movaps -0x88(%rax),%xmm9 + movaps -0x78(%rax),%xmm10 + movaps -0x68(%rax),%xmm11 + movaps -0x58(%rax),%xmm12 + movaps -0x48(%rax),%xmm13 + movaps -0x38(%rax),%xmm14 + movaps -0x28(%rax),%xmm15 +___ +$code.=<<___; + mov -16(%rax),%rbp + mov -8(%rax),%rbx + lea (%rax),%rsp +.Lepilogue: + ret +.size sha1_multi_block,.-sha1_multi_block +___ + {{{ +my ($ABCD0,$E0,$E0_,$BSWAP,$ABCD1,$E1,$E1_)=map("%xmm$_",(0..3,8..10)); +my @MSG0=map("%xmm$_",(4..7)); +my @MSG1=map("%xmm$_",(11..14)); + +$code.=<<___; +.type sha1_multi_block_shaext,\@function,3 +.align 32 +sha1_multi_block_shaext: +_shaext_shortcut: + mov %rsp,%rax + push %rbx + push %rbp +___ +$code.=<<___ if ($win64); + lea -0xa8(%rsp),%rsp + movaps %xmm6,(%rsp) + movaps %xmm7,0x10(%rsp) + movaps %xmm8,0x20(%rsp) + movaps %xmm9,0x30(%rsp) + movaps %xmm10,-0x78(%rax) + movaps %xmm11,-0x68(%rax) + movaps %xmm12,-0x58(%rax) + movaps %xmm13,-0x48(%rax) + movaps %xmm14,-0x38(%rax) + movaps %xmm15,-0x28(%rax) +___ +$code.=<<___; + sub \$`$REG_SZ*18`,%rsp + shl \$1,$num # we process pair at a time + and \$-256,%rsp + lea 0x40($ctx),$ctx # size optimization + mov %rax,`$REG_SZ*17`(%rsp) # original %rsp +.Lbody_shaext: + lea `$REG_SZ*16`(%rsp),%rbx + movdqa K_XX_XX+0x80(%rip),$BSWAP # byte-n-word swap + +.Loop_grande_shaext: + mov $num,`$REG_SZ*17+8`(%rsp) # orignal $num + xor $num,$num +___ +for($i=0;$i<2;$i++) { + $code.=<<___; + mov `16*$i+0`($inp),@ptr[$i] # input pointer + mov `16*$i+8`($inp),%ecx # number of blocks + cmp $num,%ecx + cmovg %ecx,$num # find maximum + test %ecx,%ecx + mov %ecx,`4*$i`(%rbx) # initialize counters + cmovle %rsp,@ptr[$i] # cancel input +___ +} +$code.=<<___; + test $num,$num + jz .Ldone_shaext + + movq 0x00-0x40($ctx),$ABCD0 # a1.a0 + movq 0x20-0x40($ctx),@MSG0[0]# b1.b0 + movq 0x40-0x40($ctx),@MSG0[1]# c1.c0 + movq 0x60-0x40($ctx),@MSG0[2]# d1.d0 + movq 0x80-0x40($ctx),@MSG0[3]# e1.e0 + + punpckldq @MSG0[0],$ABCD0 # b1.a1.b0.a0 + punpckldq @MSG0[2],@MSG0[1] # d1.c1.d0.c0 + + movdqa $ABCD0,$ABCD1 + punpcklqdq @MSG0[1],$ABCD0 # d0.c0.b0.a0 + punpckhqdq @MSG0[1],$ABCD1 # d1.c1.b1.a1 + + pshufd \$0b00111111,@MSG0[3],$E0 + pshufd \$0b01111111,@MSG0[3],$E1 + pshufd \$0b00011011,$ABCD0,$ABCD0 + pshufd \$0b00011011,$ABCD1,$ABCD1 + jmp .Loop_shaext + +.align 32 +.Loop_shaext: + movdqu 0x00(@ptr[0]),@MSG0[0] + movdqu 0x00(@ptr[1]),@MSG1[0] + movdqu 0x10(@ptr[0]),@MSG0[1] + movdqu 0x10(@ptr[1]),@MSG1[1] + movdqu 0x20(@ptr[0]),@MSG0[2] + pshufb $BSWAP,@MSG0[0] + movdqu 0x20(@ptr[1]),@MSG1[2] + pshufb $BSWAP,@MSG1[0] + movdqu 0x30(@ptr[0]),@MSG0[3] + lea 0x40(@ptr[0]),@ptr[0] + pshufb $BSWAP,@MSG0[1] + movdqu 0x30(@ptr[1]),@MSG1[3] + lea 0x40(@ptr[1]),@ptr[1] + pshufb $BSWAP,@MSG1[1] + + movdqa $E0,0x50(%rsp) # offload + paddd @MSG0[0],$E0 + movdqa $E1,0x70(%rsp) + paddd @MSG1[0],$E1 + movdqa $ABCD0,0x40(%rsp) # offload + movdqa $ABCD0,$E0_ + movdqa $ABCD1,0x60(%rsp) + movdqa $ABCD1,$E1_ + sha1rnds4 \$0,$E0,$ABCD0 # 0-3 + sha1nexte @MSG0[1],$E0_ + sha1rnds4 \$0,$E1,$ABCD1 # 0-3 + sha1nexte @MSG1[1],$E1_ + pshufb $BSWAP,@MSG0[2] + prefetcht0 127(@ptr[0]) + sha1msg1 @MSG0[1],@MSG0[0] + pshufb $BSWAP,@MSG1[2] + prefetcht0 127(@ptr[1]) + sha1msg1 @MSG1[1],@MSG1[0] + + pshufb $BSWAP,@MSG0[3] + movdqa $ABCD0,$E0 + pshufb $BSWAP,@MSG1[3] + movdqa $ABCD1,$E1 + sha1rnds4 \$0,$E0_,$ABCD0 # 4-7 + sha1nexte @MSG0[2],$E0 + sha1rnds4 \$0,$E1_,$ABCD1 # 4-7 + sha1nexte @MSG1[2],$E1 + pxor @MSG0[2],@MSG0[0] + sha1msg1 @MSG0[2],@MSG0[1] + pxor @MSG1[2],@MSG1[0] + sha1msg1 @MSG1[2],@MSG1[1] +___ +for($i=2;$i<20-4;$i++) { +$code.=<<___; + movdqa $ABCD0,$E0_ + movdqa $ABCD1,$E1_ + sha1rnds4 \$`int($i/5)`,$E0,$ABCD0 # 8-11 + sha1nexte @MSG0[3],$E0_ + sha1rnds4 \$`int($i/5)`,$E1,$ABCD1 # 8-11 + sha1nexte @MSG1[3],$E1_ + sha1msg2 @MSG0[3],@MSG0[0] + sha1msg2 @MSG1[3],@MSG1[0] + pxor @MSG0[3],@MSG0[1] + sha1msg1 @MSG0[3],@MSG0[2] + pxor @MSG1[3],@MSG1[1] + sha1msg1 @MSG1[3],@MSG1[2] +___ + ($E0,$E0_)=($E0_,$E0); ($E1,$E1_)=($E1_,$E1); + push(@MSG0,shift(@MSG0)); push(@MSG1,shift(@MSG1)); +} +$code.=<<___; + movdqa $ABCD0,$E0_ + movdqa $ABCD1,$E1_ + sha1rnds4 \$3,$E0,$ABCD0 # 64-67 + sha1nexte @MSG0[3],$E0_ + sha1rnds4 \$3,$E1,$ABCD1 # 64-67 + sha1nexte @MSG1[3],$E1_ + sha1msg2 @MSG0[3],@MSG0[0] + sha1msg2 @MSG1[3],@MSG1[0] + pxor @MSG0[3],@MSG0[1] + pxor @MSG1[3],@MSG1[1] + + mov \$1,%ecx + pxor @MSG0[2],@MSG0[2] # zero + cmp 4*0(%rbx),%ecx # examine counters + cmovge %rsp,@ptr[0] # cancel input + + movdqa $ABCD0,$E0 + movdqa $ABCD1,$E1 + sha1rnds4 \$3,$E0_,$ABCD0 # 68-71 + sha1nexte @MSG0[0],$E0 + sha1rnds4 \$3,$E1_,$ABCD1 # 68-71 + sha1nexte @MSG1[0],$E1 + sha1msg2 @MSG0[0],@MSG0[1] + sha1msg2 @MSG1[0],@MSG1[1] + + cmp 4*1(%rbx),%ecx + cmovge %rsp,@ptr[1] + movq (%rbx),@MSG0[0] # pull counters + + movdqa $ABCD0,$E0_ + movdqa $ABCD1,$E1_ + sha1rnds4 \$3,$E0,$ABCD0 # 72-75 + sha1nexte @MSG0[1],$E0_ + sha1rnds4 \$3,$E1,$ABCD1 # 72-75 + sha1nexte @MSG1[1],$E1_ + + pshufd \$0x00,@MSG0[0],@MSG1[2] + pshufd \$0x55,@MSG0[0],@MSG1[3] + movdqa @MSG0[0],@MSG0[1] + pcmpgtd @MSG0[2],@MSG1[2] + pcmpgtd @MSG0[2],@MSG1[3] + + movdqa $ABCD0,$E0 + movdqa $ABCD1,$E1 + sha1rnds4 \$3,$E0_,$ABCD0 # 76-79 + sha1nexte $MSG0[2],$E0 + sha1rnds4 \$3,$E1_,$ABCD1 # 76-79 + sha1nexte $MSG0[2],$E1 + + pcmpgtd @MSG0[2],@MSG0[1] # counter mask + pand @MSG1[2],$ABCD0 + pand @MSG1[2],$E0 + pand @MSG1[3],$ABCD1 + pand @MSG1[3],$E1 + paddd @MSG0[1],@MSG0[0] # counters-- + + paddd 0x40(%rsp),$ABCD0 + paddd 0x50(%rsp),$E0 + paddd 0x60(%rsp),$ABCD1 + paddd 0x70(%rsp),$E1 + + movq @MSG0[0],(%rbx) # save counters + dec $num + jnz .Loop_shaext + + mov `$REG_SZ*17+8`(%rsp),$num + + pshufd \$0b00011011,$ABCD0,$ABCD0 + pshufd \$0b00011011,$ABCD1,$ABCD1 + + movdqa $ABCD0,@MSG0[0] + punpckldq $ABCD1,$ABCD0 # b1.b0.a1.a0 + punpckhdq $ABCD1,@MSG0[0] # d1.d0.c1.c0 + punpckhdq $E1,$E0 # e1.e0.xx.xx + movq $ABCD0,0x00-0x40($ctx) # a1.a0 + psrldq \$8,$ABCD0 + movq @MSG0[0],0x40-0x40($ctx)# c1.c0 + psrldq \$8,@MSG0[0] + movq $ABCD0,0x20-0x40($ctx) # b1.b0 + psrldq \$8,$E0 + movq @MSG0[0],0x60-0x40($ctx)# d1.d0 + movq $E0,0x80-0x40($ctx) # e1.e0 + + lea `$REG_SZ/2`($ctx),$ctx + lea `16*2`($inp),$inp + dec $num + jnz .Loop_grande_shaext + +.Ldone_shaext: + #mov `$REG_SZ*17`(%rsp),%rax # original %rsp +___ +$code.=<<___ if ($win64); + movaps -0xb8(%rax),%xmm6 + movaps -0xa8(%rax),%xmm7 + movaps -0x98(%rax),%xmm8 + movaps -0x88(%rax),%xmm9 + movaps -0x78(%rax),%xmm10 + movaps -0x68(%rax),%xmm11 + movaps -0x58(%rax),%xmm12 + movaps -0x48(%rax),%xmm13 + movaps -0x38(%rax),%xmm14 + movaps -0x28(%rax),%xmm15 +___ +$code.=<<___; + mov -16(%rax),%rbp + mov -8(%rax),%rbx + lea (%rax),%rsp +.Lepilogue_shaext: + ret +.size sha1_multi_block_shaext,.-sha1_multi_block_shaext +___ + }}} + + if ($avx) {{{ +sub BODY_00_19_avx { +my ($i,$a,$b,$c,$d,$e)=@_; +my $j=$i+1; +my $k=$i+2; +my $vpack = $REG_SZ==16 ? "vpunpckldq" : "vinserti128"; +my $ptr_n = $REG_SZ==16 ? @ptr[1] : @ptr[4]; + +$code.=<<___ if ($i==0 && $REG_SZ==16); + vmovd (@ptr[0]),@Xi[0] + lea `16*4`(@ptr[0]),@ptr[0] + vmovd (@ptr[1]),@Xi[2] # borrow Xi[2] + lea `16*4`(@ptr[1]),@ptr[1] + vpinsrd \$1,(@ptr[2]),@Xi[0],@Xi[0] + lea `16*4`(@ptr[2]),@ptr[2] + vpinsrd \$1,(@ptr[3]),@Xi[2],@Xi[2] + lea `16*4`(@ptr[3]),@ptr[3] + vmovd `4*$j-16*4`(@ptr[0]),@Xi[1] + vpunpckldq @Xi[2],@Xi[0],@Xi[0] + vmovd `4*$j-16*4`($ptr_n),$t3 + vpshufb $tx,@Xi[0],@Xi[0] +___ +$code.=<<___ if ($i<15 && $REG_SZ==16); # just load input + vpinsrd \$1,`4*$j-16*4`(@ptr[2]),@Xi[1],@Xi[1] + vpinsrd \$1,`4*$j-16*4`(@ptr[3]),$t3,$t3 +___ +$code.=<<___ if ($i==0 && $REG_SZ==32); + vmovd (@ptr[0]),@Xi[0] + lea `16*4`(@ptr[0]),@ptr[0] + vmovd (@ptr[4]),@Xi[2] # borrow Xi[2] + lea `16*4`(@ptr[4]),@ptr[4] + vmovd (@ptr[1]),$t2 + lea `16*4`(@ptr[1]),@ptr[1] + vmovd (@ptr[5]),$t1 + lea `16*4`(@ptr[5]),@ptr[5] + vpinsrd \$1,(@ptr[2]),@Xi[0],@Xi[0] + lea `16*4`(@ptr[2]),@ptr[2] + vpinsrd \$1,(@ptr[6]),@Xi[2],@Xi[2] + lea `16*4`(@ptr[6]),@ptr[6] + vpinsrd \$1,(@ptr[3]),$t2,$t2 + lea `16*4`(@ptr[3]),@ptr[3] + vpunpckldq $t2,@Xi[0],@Xi[0] + vpinsrd \$1,(@ptr[7]),$t1,$t1 + lea `16*4`(@ptr[7]),@ptr[7] + vpunpckldq $t1,@Xi[2],@Xi[2] + vmovd `4*$j-16*4`(@ptr[0]),@Xi[1] + vinserti128 @Xi[2],@Xi[0],@Xi[0] + vmovd `4*$j-16*4`($ptr_n),$t3 + vpshufb $tx,@Xi[0],@Xi[0] +___ +$code.=<<___ if ($i<15 && $REG_SZ==32); # just load input + vmovd `4*$j-16*4`(@ptr[1]),$t2 + vmovd `4*$j-16*4`(@ptr[5]),$t1 + vpinsrd \$1,`4*$j-16*4`(@ptr[2]),@Xi[1],@Xi[1] + vpinsrd \$1,`4*$j-16*4`(@ptr[6]),$t3,$t3 + vpinsrd \$1,`4*$j-16*4`(@ptr[3]),$t2,$t2 + vpunpckldq $t2,@Xi[1],@Xi[1] + vpinsrd \$1,`4*$j-16*4`(@ptr[7]),$t1,$t1 + vpunpckldq $t1,$t3,$t3 +___ +$code.=<<___ if ($i<14); + vpaddd $K,$e,$e # e+=K_00_19 + vpslld \$5,$a,$t2 + vpandn $d,$b,$t1 + vpand $c,$b,$t0 + + vmovdqa @Xi[0],`&Xi_off($i)` + vpaddd @Xi[0],$e,$e # e+=X[i] + $vpack $t3,@Xi[1],@Xi[1] + vpsrld \$27,$a,$t3 + vpxor $t1,$t0,$t0 # Ch(b,c,d) + vmovd `4*$k-16*4`(@ptr[0]),@Xi[2] + + vpslld \$30,$b,$t1 + vpor $t3,$t2,$t2 # rol(a,5) + vmovd `4*$k-16*4`($ptr_n),$t3 + vpaddd $t0,$e,$e # e+=Ch(b,c,d) + + vpsrld \$2,$b,$b + vpaddd $t2,$e,$e # e+=rol(a,5) + vpshufb $tx,@Xi[1],@Xi[1] + vpor $t1,$b,$b # b=rol(b,30) +___ +$code.=<<___ if ($i==14); + vpaddd $K,$e,$e # e+=K_00_19 + prefetcht0 63(@ptr[0]) + vpslld \$5,$a,$t2 + vpandn $d,$b,$t1 + vpand $c,$b,$t0 + + vmovdqa @Xi[0],`&Xi_off($i)` + vpaddd @Xi[0],$e,$e # e+=X[i] + $vpack $t3,@Xi[1],@Xi[1] + vpsrld \$27,$a,$t3 + prefetcht0 63(@ptr[1]) + vpxor $t1,$t0,$t0 # Ch(b,c,d) + + vpslld \$30,$b,$t1 + vpor $t3,$t2,$t2 # rol(a,5) + prefetcht0 63(@ptr[2]) + vpaddd $t0,$e,$e # e+=Ch(b,c,d) + + vpsrld \$2,$b,$b + vpaddd $t2,$e,$e # e+=rol(a,5) + prefetcht0 63(@ptr[3]) + vpshufb $tx,@Xi[1],@Xi[1] + vpor $t1,$b,$b # b=rol(b,30) +___ +$code.=<<___ if ($i>=13 && $i<15); + vmovdqa `&Xi_off($j+2)`,@Xi[3] # preload "X[2]" +___ +$code.=<<___ if ($i>=15); # apply Xupdate + vpxor @Xi[-2],@Xi[1],@Xi[1] # "X[13]" + vmovdqa `&Xi_off($j+2)`,@Xi[3] # "X[2]" + + vpaddd $K,$e,$e # e+=K_00_19 + vpslld \$5,$a,$t2 + vpandn $d,$b,$t1 + `"prefetcht0 63(@ptr[4])" if ($i==15 && $REG_SZ==32)` + vpand $c,$b,$t0 + + vmovdqa @Xi[0],`&Xi_off($i)` + vpaddd @Xi[0],$e,$e # e+=X[i] + vpxor `&Xi_off($j+8)`,@Xi[1],@Xi[1] + vpsrld \$27,$a,$t3 + vpxor $t1,$t0,$t0 # Ch(b,c,d) + vpxor @Xi[3],@Xi[1],@Xi[1] + `"prefetcht0 63(@ptr[5])" if ($i==15 && $REG_SZ==32)` + + vpslld \$30,$b,$t1 + vpor $t3,$t2,$t2 # rol(a,5) + vpaddd $t0,$e,$e # e+=Ch(b,c,d) + `"prefetcht0 63(@ptr[6])" if ($i==15 && $REG_SZ==32)` + vpsrld \$31,@Xi[1],$tx + vpaddd @Xi[1],@Xi[1],@Xi[1] + + vpsrld \$2,$b,$b + `"prefetcht0 63(@ptr[7])" if ($i==15 && $REG_SZ==32)` + vpaddd $t2,$e,$e # e+=rol(a,5) + vpor $tx,@Xi[1],@Xi[1] # rol \$1,@Xi[1] + vpor $t1,$b,$b # b=rol(b,30) +___ +push(@Xi,shift(@Xi)); +} + +sub BODY_20_39_avx { +my ($i,$a,$b,$c,$d,$e)=@_; +my $j=$i+1; + +$code.=<<___ if ($i<79); + vpxor @Xi[-2],@Xi[1],@Xi[1] # "X[13]" + vmovdqa `&Xi_off($j+2)`,@Xi[3] # "X[2]" + + vpslld \$5,$a,$t2 + vpaddd $K,$e,$e # e+=K_20_39 + vpxor $b,$d,$t0 +___ +$code.=<<___ if ($i<72); + vmovdqa @Xi[0],`&Xi_off($i)` +___ +$code.=<<___ if ($i<79); + vpaddd @Xi[0],$e,$e # e+=X[i] + vpxor `&Xi_off($j+8)`,@Xi[1],@Xi[1] + vpsrld \$27,$a,$t3 + vpxor $c,$t0,$t0 # Parity(b,c,d) + vpxor @Xi[3],@Xi[1],@Xi[1] + + vpslld \$30,$b,$t1 + vpor $t3,$t2,$t2 # rol(a,5) + vpaddd $t0,$e,$e # e+=Parity(b,c,d) + vpsrld \$31,@Xi[1],$tx + vpaddd @Xi[1],@Xi[1],@Xi[1] + + vpsrld \$2,$b,$b + vpaddd $t2,$e,$e # e+=rol(a,5) + vpor $tx,@Xi[1],@Xi[1] # rol(@Xi[1],1) + vpor $t1,$b,$b # b=rol(b,30) +___ +$code.=<<___ if ($i==79); + vpslld \$5,$a,$t2 + vpaddd $K,$e,$e # e+=K_20_39 + vpxor $b,$d,$t0 + + vpsrld \$27,$a,$t3 + vpaddd @Xi[0],$e,$e # e+=X[i] + vpxor $c,$t0,$t0 # Parity(b,c,d) + + vpslld \$30,$b,$t1 + vpor $t3,$t2,$t2 # rol(a,5) + vpaddd $t0,$e,$e # e+=Parity(b,c,d) + + vpsrld \$2,$b,$b + vpaddd $t2,$e,$e # e+=rol(a,5) + vpor $t1,$b,$b # b=rol(b,30) +___ +push(@Xi,shift(@Xi)); +} + +sub BODY_40_59_avx { +my ($i,$a,$b,$c,$d,$e)=@_; +my $j=$i+1; + +$code.=<<___; + vpxor @Xi[-2],@Xi[1],@Xi[1] # "X[13]" + vmovdqa `&Xi_off($j+2)`,@Xi[3] # "X[2]" + + vpaddd $K,$e,$e # e+=K_40_59 + vpslld \$5,$a,$t2 + vpand $c,$d,$t1 + vpxor `&Xi_off($j+8)`,@Xi[1],@Xi[1] + + vpaddd $t1,$e,$e + vpsrld \$27,$a,$t3 + vpxor $c,$d,$t0 + vpxor @Xi[3],@Xi[1],@Xi[1] + + vmovdqu @Xi[0],`&Xi_off($i)` + vpaddd @Xi[0],$e,$e # e+=X[i] + vpor $t3,$t2,$t2 # rol(a,5) + vpsrld \$31,@Xi[1],$tx + vpand $b,$t0,$t0 + vpaddd @Xi[1],@Xi[1],@Xi[1] + + vpslld \$30,$b,$t1 + vpaddd $t0,$e,$e # e+=Maj(b,d,c) + + vpsrld \$2,$b,$b + vpaddd $t2,$e,$e # e+=rol(a,5) + vpor $tx,@Xi[1],@Xi[1] # rol(@X[1],1) + vpor $t1,$b,$b # b=rol(b,30) +___ +push(@Xi,shift(@Xi)); +} + +$code.=<<___; +.type sha1_multi_block_avx,\@function,3 +.align 32 +sha1_multi_block_avx: +_avx_shortcut: +___ +$code.=<<___ if ($avx>1); + shr \$32,%rcx + cmp \$2,$num + jb .Lavx + test \$`1<<5`,%ecx + jnz _avx2_shortcut + jmp .Lavx +.align 32 +.Lavx: +___ +$code.=<<___; + mov %rsp,%rax + push %rbx + push %rbp +___ +$code.=<<___ if ($win64); + lea -0xa8(%rsp),%rsp + movaps %xmm6,(%rsp) + movaps %xmm7,0x10(%rsp) + movaps %xmm8,0x20(%rsp) + movaps %xmm9,0x30(%rsp) + movaps %xmm10,-0x78(%rax) + movaps %xmm11,-0x68(%rax) + movaps %xmm12,-0x58(%rax) + movaps %xmm13,-0x48(%rax) + movaps %xmm14,-0x38(%rax) + movaps %xmm15,-0x28(%rax) +___ +$code.=<<___; + sub \$`$REG_SZ*18`, %rsp + and \$-256,%rsp + mov %rax,`$REG_SZ*17`(%rsp) # original %rsp +.Lbody_avx: + lea K_XX_XX(%rip),$Tbl + lea `$REG_SZ*16`(%rsp),%rbx + + vzeroupper +.Loop_grande_avx: + mov $num,`$REG_SZ*17+8`(%rsp) # original $num + xor $num,$num +___ +for($i=0;$i<4;$i++) { + $code.=<<___; + mov `16*$i+0`($inp),@ptr[$i] # input pointer + mov `16*$i+8`($inp),%ecx # number of blocks + cmp $num,%ecx + cmovg %ecx,$num # find maximum + test %ecx,%ecx + mov %ecx,`4*$i`(%rbx) # initialize counters + cmovle $Tbl,@ptr[$i] # cancel input +___ +} +$code.=<<___; + test $num,$num + jz .Ldone_avx + + vmovdqu 0x00($ctx),$A # load context + lea 128(%rsp),%rax + vmovdqu 0x20($ctx),$B + vmovdqu 0x40($ctx),$C + vmovdqu 0x60($ctx),$D + vmovdqu 0x80($ctx),$E + vmovdqu 0x60($Tbl),$tx # pbswap_mask + jmp .Loop_avx + +.align 32 +.Loop_avx: +___ +$code.=" vmovdqa -0x20($Tbl),$K\n"; # K_00_19 +for($i=0;$i<20;$i++) { &BODY_00_19_avx($i,@V); unshift(@V,pop(@V)); } +$code.=" vmovdqa 0x00($Tbl),$K\n"; # K_20_39 +for(;$i<40;$i++) { &BODY_20_39_avx($i,@V); unshift(@V,pop(@V)); } +$code.=" vmovdqa 0x20($Tbl),$K\n"; # K_40_59 +for(;$i<60;$i++) { &BODY_40_59_avx($i,@V); unshift(@V,pop(@V)); } +$code.=" vmovdqa 0x40($Tbl),$K\n"; # K_60_79 +for(;$i<80;$i++) { &BODY_20_39_avx($i,@V); unshift(@V,pop(@V)); } +$code.=<<___; + mov \$1,%ecx +___ +for($i=0;$i<4;$i++) { + $code.=<<___; + cmp `4*$i`(%rbx),%ecx # examine counters + cmovge $Tbl,@ptr[$i] # cancel input +___ +} +$code.=<<___; + vmovdqu (%rbx),$t0 # pull counters + vpxor $t2,$t2,$t2 + vmovdqa $t0,$t1 + vpcmpgtd $t2,$t1,$t1 # mask value + vpaddd $t1,$t0,$t0 # counters-- + + vpand $t1,$A,$A + vpand $t1,$B,$B + vpaddd 0x00($ctx),$A,$A + vpand $t1,$C,$C + vpaddd 0x20($ctx),$B,$B + vpand $t1,$D,$D + vpaddd 0x40($ctx),$C,$C + vpand $t1,$E,$E + vpaddd 0x60($ctx),$D,$D + vpaddd 0x80($ctx),$E,$E + vmovdqu $A,0x00($ctx) + vmovdqu $B,0x20($ctx) + vmovdqu $C,0x40($ctx) + vmovdqu $D,0x60($ctx) + vmovdqu $E,0x80($ctx) + + vmovdqu $t0,(%rbx) # save counters + vmovdqu 0x60($Tbl),$tx # pbswap_mask + dec $num + jnz .Loop_avx + + mov `$REG_SZ*17+8`(%rsp),$num + lea $REG_SZ($ctx),$ctx + lea `16*$REG_SZ/4`($inp),$inp + dec $num + jnz .Loop_grande_avx + +.Ldone_avx: + mov `$REG_SZ*17`(%rsp),%rax # orignal %rsp + vzeroupper +___ +$code.=<<___ if ($win64); + movaps -0xb8(%rax),%xmm6 + movaps -0xa8(%rax),%xmm7 + movaps -0x98(%rax),%xmm8 + movaps -0x88(%rax),%xmm9 + movaps -0x78(%rax),%xmm10 + movaps -0x68(%rax),%xmm11 + movaps -0x58(%rax),%xmm12 + movaps -0x48(%rax),%xmm13 + movaps -0x38(%rax),%xmm14 + movaps -0x28(%rax),%xmm15 +___ +$code.=<<___; + mov -16(%rax),%rbp + mov -8(%rax),%rbx + lea (%rax),%rsp +.Lepilogue_avx: + ret +.size sha1_multi_block_avx,.-sha1_multi_block_avx +___ + + if ($avx>1) { +$code =~ s/\`([^\`]*)\`/eval $1/gem; + +$REG_SZ=32; + +@ptr=map("%r$_",(12..15,8..11)); + +@V=($A,$B,$C,$D,$E)=map("%ymm$_",(0..4)); +($t0,$t1,$t2,$t3,$tx)=map("%ymm$_",(5..9)); +@Xi=map("%ymm$_",(10..14)); +$K="%ymm15"; + +$code.=<<___; +.type sha1_multi_block_avx2,\@function,3 +.align 32 +sha1_multi_block_avx2: +_avx2_shortcut: + mov %rsp,%rax + push %rbx + push %rbp + push %r12 + push %r13 + push %r14 + push %r15 +___ +$code.=<<___ if ($win64); + lea -0xa8(%rsp),%rsp + movaps %xmm6,(%rsp) + movaps %xmm7,0x10(%rsp) + movaps %xmm8,0x20(%rsp) + movaps %xmm9,0x30(%rsp) + movaps %xmm10,0x40(%rsp) + movaps %xmm11,0x50(%rsp) + movaps %xmm12,-0x78(%rax) + movaps %xmm13,-0x68(%rax) + movaps %xmm14,-0x58(%rax) + movaps %xmm15,-0x48(%rax) +___ +$code.=<<___; + sub \$`$REG_SZ*18`, %rsp + and \$-256,%rsp + mov %rax,`$REG_SZ*17`(%rsp) # original %rsp +.Lbody_avx2: + lea K_XX_XX(%rip),$Tbl + shr \$1,$num + + vzeroupper +.Loop_grande_avx2: + mov $num,`$REG_SZ*17+8`(%rsp) # original $num + xor $num,$num + lea `$REG_SZ*16`(%rsp),%rbx +___ +for($i=0;$i<8;$i++) { + $code.=<<___; + mov `16*$i+0`($inp),@ptr[$i] # input pointer + mov `16*$i+8`($inp),%ecx # number of blocks + cmp $num,%ecx + cmovg %ecx,$num # find maximum + test %ecx,%ecx + mov %ecx,`4*$i`(%rbx) # initialize counters + cmovle $Tbl,@ptr[$i] # cancel input +___ +} +$code.=<<___; + vmovdqu 0x00($ctx),$A # load context + lea 128(%rsp),%rax + vmovdqu 0x20($ctx),$B + lea 256+128(%rsp),%rbx + vmovdqu 0x40($ctx),$C + vmovdqu 0x60($ctx),$D + vmovdqu 0x80($ctx),$E + vmovdqu 0x60($Tbl),$tx # pbswap_mask + jmp .Loop_avx2 + +.align 32 +.Loop_avx2: +___ +$code.=" vmovdqa -0x20($Tbl),$K\n"; # K_00_19 +for($i=0;$i<20;$i++) { &BODY_00_19_avx($i,@V); unshift(@V,pop(@V)); } +$code.=" vmovdqa 0x00($Tbl),$K\n"; # K_20_39 +for(;$i<40;$i++) { &BODY_20_39_avx($i,@V); unshift(@V,pop(@V)); } +$code.=" vmovdqa 0x20($Tbl),$K\n"; # K_40_59 +for(;$i<60;$i++) { &BODY_40_59_avx($i,@V); unshift(@V,pop(@V)); } +$code.=" vmovdqa 0x40($Tbl),$K\n"; # K_60_79 +for(;$i<80;$i++) { &BODY_20_39_avx($i,@V); unshift(@V,pop(@V)); } +$code.=<<___; + mov \$1,%ecx + lea `$REG_SZ*16`(%rsp),%rbx +___ +for($i=0;$i<8;$i++) { + $code.=<<___; + cmp `4*$i`(%rbx),%ecx # examine counters + cmovge $Tbl,@ptr[$i] # cancel input +___ +} +$code.=<<___; + vmovdqu (%rbx),$t0 # pull counters + vpxor $t2,$t2,$t2 + vmovdqa $t0,$t1 + vpcmpgtd $t2,$t1,$t1 # mask value + vpaddd $t1,$t0,$t0 # counters-- + + vpand $t1,$A,$A + vpand $t1,$B,$B + vpaddd 0x00($ctx),$A,$A + vpand $t1,$C,$C + vpaddd 0x20($ctx),$B,$B + vpand $t1,$D,$D + vpaddd 0x40($ctx),$C,$C + vpand $t1,$E,$E + vpaddd 0x60($ctx),$D,$D + vpaddd 0x80($ctx),$E,$E + vmovdqu $A,0x00($ctx) + vmovdqu $B,0x20($ctx) + vmovdqu $C,0x40($ctx) + vmovdqu $D,0x60($ctx) + vmovdqu $E,0x80($ctx) + + vmovdqu $t0,(%rbx) # save counters + lea 256+128(%rsp),%rbx + vmovdqu 0x60($Tbl),$tx # pbswap_mask + dec $num + jnz .Loop_avx2 + + #mov `$REG_SZ*17+8`(%rsp),$num + #lea $REG_SZ($ctx),$ctx + #lea `16*$REG_SZ/4`($inp),$inp + #dec $num + #jnz .Loop_grande_avx2 + +.Ldone_avx2: + mov `$REG_SZ*17`(%rsp),%rax # orignal %rsp + vzeroupper +___ +$code.=<<___ if ($win64); + movaps -0xd8(%rax),%xmm6 + movaps -0xc8(%rax),%xmm7 + movaps -0xb8(%rax),%xmm8 + movaps -0xa8(%rax),%xmm9 + movaps -0x98(%rax),%xmm10 + movaps -0x88(%rax),%xmm11 + movaps -0x78(%rax),%xmm12 + movaps -0x68(%rax),%xmm13 + movaps -0x58(%rax),%xmm14 + movaps -0x48(%rax),%xmm15 +___ +$code.=<<___; + mov -48(%rax),%r15 + mov -40(%rax),%r14 + mov -32(%rax),%r13 + mov -24(%rax),%r12 + mov -16(%rax),%rbp + mov -8(%rax),%rbx + lea (%rax),%rsp +.Lepilogue_avx2: + ret +.size sha1_multi_block_avx2,.-sha1_multi_block_avx2 +___ + } }}} +$code.=<<___; + +.align 256 + .long 0x5a827999,0x5a827999,0x5a827999,0x5a827999 # K_00_19 + .long 0x5a827999,0x5a827999,0x5a827999,0x5a827999 # K_00_19 +K_XX_XX: + .long 0x6ed9eba1,0x6ed9eba1,0x6ed9eba1,0x6ed9eba1 # K_20_39 + .long 0x6ed9eba1,0x6ed9eba1,0x6ed9eba1,0x6ed9eba1 # K_20_39 + .long 0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc # K_40_59 + .long 0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc # K_40_59 + .long 0xca62c1d6,0xca62c1d6,0xca62c1d6,0xca62c1d6 # K_60_79 + .long 0xca62c1d6,0xca62c1d6,0xca62c1d6,0xca62c1d6 # K_60_79 + .long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f # pbswap + .long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f # pbswap + .byte 0xf,0xe,0xd,0xc,0xb,0xa,0x9,0x8,0x7,0x6,0x5,0x4,0x3,0x2,0x1,0x0 + .asciz "SHA1 multi-block transform for x86_64, CRYPTOGAMS by " +___ + +if ($win64) { +# EXCEPTION_DISPOSITION handler (EXCEPTION_RECORD *rec,ULONG64 frame, +# CONTEXT *context,DISPATCHER_CONTEXT *disp) +$rec="%rcx"; +$frame="%rdx"; +$context="%r8"; +$disp="%r9"; + +$code.=<<___; +.extern __imp_RtlVirtualUnwind +.type se_handler,\@abi-omnipotent +.align 16 +se_handler: + push %rsi + push %rdi + push %rbx + push %rbp + push %r12 + push %r13 + push %r14 + push %r15 + pushfq + sub \$64,%rsp + + mov 120($context),%rax # pull context->Rax + mov 248($context),%rbx # pull context->Rip + + mov 8($disp),%rsi # disp->ImageBase + mov 56($disp),%r11 # disp->HandlerData + + mov 0(%r11),%r10d # HandlerData[0] + lea (%rsi,%r10),%r10 # end of prologue label + cmp %r10,%rbx # context->Rip<.Lbody + jb .Lin_prologue + + mov 152($context),%rax # pull context->Rsp + + mov 4(%r11),%r10d # HandlerData[1] + lea (%rsi,%r10),%r10 # epilogue label + cmp %r10,%rbx # context->Rip>=.Lepilogue + jae .Lin_prologue + + mov `16*17`(%rax),%rax # pull saved stack pointer + + mov -8(%rax),%rbx + mov -16(%rax),%rbp + mov %rbx,144($context) # restore context->Rbx + mov %rbp,160($context) # restore context->Rbp + + lea -24-10*16(%rax),%rsi + lea 512($context),%rdi # &context.Xmm6 + mov \$20,%ecx + .long 0xa548f3fc # cld; rep movsq + +.Lin_prologue: + mov 8(%rax),%rdi + mov 16(%rax),%rsi + mov %rax,152($context) # restore context->Rsp + mov %rsi,168($context) # restore context->Rsi + mov %rdi,176($context) # restore context->Rdi + + mov 40($disp),%rdi # disp->ContextRecord + mov $context,%rsi # context + mov \$154,%ecx # sizeof(CONTEXT) + .long 0xa548f3fc # cld; rep movsq + + mov $disp,%rsi + xor %rcx,%rcx # arg1, UNW_FLAG_NHANDLER + mov 8(%rsi),%rdx # arg2, disp->ImageBase + mov 0(%rsi),%r8 # arg3, disp->ControlPc + mov 16(%rsi),%r9 # arg4, disp->FunctionEntry + mov 40(%rsi),%r10 # disp->ContextRecord + lea 56(%rsi),%r11 # &disp->HandlerData + lea 24(%rsi),%r12 # &disp->EstablisherFrame + mov %r10,32(%rsp) # arg5 + mov %r11,40(%rsp) # arg6 + mov %r12,48(%rsp) # arg7 + mov %rcx,56(%rsp) # arg8, (NULL) + call *__imp_RtlVirtualUnwind(%rip) + + mov \$1,%eax # ExceptionContinueSearch + add \$64,%rsp + popfq + pop %r15 + pop %r14 + pop %r13 + pop %r12 + pop %rbp + pop %rbx + pop %rdi + pop %rsi + ret +.size se_handler,.-se_handler +___ +$code.=<<___ if ($avx>1); +.type avx2_handler,\@abi-omnipotent +.align 16 +avx2_handler: + push %rsi + push %rdi + push %rbx + push %rbp + push %r12 + push %r13 + push %r14 + push %r15 + pushfq + sub \$64,%rsp + + mov 120($context),%rax # pull context->Rax + mov 248($context),%rbx # pull context->Rip + + mov 8($disp),%rsi # disp->ImageBase + mov 56($disp),%r11 # disp->HandlerData + + mov 0(%r11),%r10d # HandlerData[0] + lea (%rsi,%r10),%r10 # end of prologue label + cmp %r10,%rbx # context->RipRsp + + mov 4(%r11),%r10d # HandlerData[1] + lea (%rsi,%r10),%r10 # epilogue label + cmp %r10,%rbx # context->Rip>=epilogue label + jae .Lin_prologue + + mov `32*17`($context),%rax # pull saved stack pointer + + mov -8(%rax),%rbx + mov -16(%rax),%rbp + mov -24(%rax),%r12 + mov -32(%rax),%r13 + mov -40(%rax),%r14 + mov -48(%rax),%r15 + mov %rbx,144($context) # restore context->Rbx + mov %rbp,160($context) # restore context->Rbp + mov %r12,216($context) # restore cotnext->R12 + mov %r13,224($context) # restore cotnext->R13 + mov %r14,232($context) # restore cotnext->R14 + mov %r15,240($context) # restore cotnext->R15 + + lea -56-10*16(%rax),%rsi + lea 512($context),%rdi # &context.Xmm6 + mov \$20,%ecx + .long 0xa548f3fc # cld; rep movsq + + jmp .Lin_prologue +.size avx2_handler,.-avx2_handler +___ +$code.=<<___; +.section .pdata +.align 4 + .rva .LSEH_begin_sha1_multi_block + .rva .LSEH_end_sha1_multi_block + .rva .LSEH_info_sha1_multi_block + .rva .LSEH_begin_sha1_multi_block_shaext + .rva .LSEH_end_sha1_multi_block_shaext + .rva .LSEH_info_sha1_multi_block_shaext +___ +$code.=<<___ if ($avx); + .rva .LSEH_begin_sha1_multi_block_avx + .rva .LSEH_end_sha1_multi_block_avx + .rva .LSEH_info_sha1_multi_block_avx +___ +$code.=<<___ if ($avx>1); + .rva .LSEH_begin_sha1_multi_block_avx2 + .rva .LSEH_end_sha1_multi_block_avx2 + .rva .LSEH_info_sha1_multi_block_avx2 +___ +$code.=<<___; +.section .xdata +.align 8 +.LSEH_info_sha1_multi_block: + .byte 9,0,0,0 + .rva se_handler + .rva .Lbody,.Lepilogue # HandlerData[] +.LSEH_info_sha1_multi_block_shaext: + .byte 9,0,0,0 + .rva se_handler + .rva .Lbody_shaext,.Lepilogue_shaext # HandlerData[] +___ +$code.=<<___ if ($avx); +.LSEH_info_sha1_multi_block_avx: + .byte 9,0,0,0 + .rva se_handler + .rva .Lbody_avx,.Lepilogue_avx # HandlerData[] +___ +$code.=<<___ if ($avx>1); +.LSEH_info_sha1_multi_block_avx2: + .byte 9,0,0,0 + .rva avx2_handler + .rva .Lbody_avx2,.Lepilogue_avx2 # HandlerData[] +___ +} +#################################################################### + +sub rex { + local *opcode=shift; + my ($dst,$src)=@_; + my $rex=0; + + $rex|=0x04 if ($dst>=8); + $rex|=0x01 if ($src>=8); + unshift @opcode,$rex|0x40 if ($rex); +} + +sub sha1rnds4 { + if (@_[0] =~ /\$([x0-9a-f]+),\s*%xmm([0-9]+),\s*%xmm([0-9]+)/) { + my @opcode=(0x0f,0x3a,0xcc); + rex(\@opcode,$3,$2); + push @opcode,0xc0|($2&7)|(($3&7)<<3); # ModR/M + my $c=$1; + push @opcode,$c=~/^0/?oct($c):$c; + return ".byte\t".join(',',@opcode); + } else { + return "sha1rnds4\t".@_[0]; + } +} + +sub sha1op38 { + my $instr = shift; + my %opcodelet = ( + "sha1nexte" => 0xc8, + "sha1msg1" => 0xc9, + "sha1msg2" => 0xca ); + + if (defined($opcodelet{$instr}) && @_[0] =~ /%xmm([0-9]+),\s*%xmm([0-9]+)/) { + my @opcode=(0x0f,0x38); + rex(\@opcode,$2,$1); + push @opcode,$opcodelet{$instr}; + push @opcode,0xc0|($1&7)|(($2&7)<<3); # ModR/M + return ".byte\t".join(',',@opcode); + } else { + return $instr."\t".@_[0]; + } +} + +foreach (split("\n",$code)) { + s/\`([^\`]*)\`/eval($1)/ge; + + s/\b(sha1rnds4)\s+(.*)/sha1rnds4($2)/geo or + s/\b(sha1[^\s]*)\s+(.*)/sha1op38($1,$2)/geo or + + s/\b(vmov[dq])\b(.+)%ymm([0-9]+)/$1$2%xmm$3/go or + s/\b(vmovdqu)\b(.+)%x%ymm([0-9]+)/$1$2%xmm$3/go or + s/\b(vpinsr[qd])\b(.+)%ymm([0-9]+),%ymm([0-9]+)/$1$2%xmm$3,%xmm$4/go or + s/\b(vpextr[qd])\b(.+)%ymm([0-9]+)/$1$2%xmm$3/go or + s/\b(vinserti128)\b(\s+)%ymm/$1$2\$1,%xmm/go or + s/\b(vpbroadcast[qd]\s+)%ymm([0-9]+)/$1%xmm$2/go; + + print $_,"\n"; +} + +close STDOUT; diff --git a/deps/openssl/openssl/crypto/sha/asm/sha1-mips.pl b/deps/openssl/openssl/crypto/sha/asm/sha1-mips.pl index 197bc6b50e95f5..34084938999337 100644 --- a/deps/openssl/openssl/crypto/sha/asm/sha1-mips.pl +++ b/deps/openssl/openssl/crypto/sha/asm/sha1-mips.pl @@ -15,6 +15,10 @@ # compatible subroutine. There is room for minor optimization on # little-endian platforms... +# September 2012. +# +# Add MIPS32r2 code (>25% less instructions). + ###################################################################### # There is a number of MIPS ABI in use, O32 and N32/64 are most # widely used. Then there is a new contender: NUBI. It appears that if @@ -42,7 +46,7 @@ # ($s0,$s1,$s2,$s3,$s4,$s5,$s6,$s7)=map("\$$_",(16..23)); # ($gp,$sp,$fp,$ra)=map("\$$_",(28..31)); # -$flavour = shift; # supported flavours are o32,n32,64,nubi32,nubi64 +$flavour = shift || "o32"; # supported flavours are o32,n32,64,nubi32,nubi64 if ($flavour =~ /64|n32/i) { $PTR_ADD="dadd"; # incidentally works even on n32 @@ -95,6 +99,10 @@ sub BODY_00_14 { my ($i,$a,$b,$c,$d,$e)=@_; my $j=$i+1; $code.=<<___ if (!$big_endian); +#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2) + wsbh @X[$i],@X[$i] # byte swap($i) + rotr @X[$i],@X[$i],16 +#else srl $t0,@X[$i],24 # byte swap($i) srl $t1,@X[$i],8 andi $t2,@X[$i],0xFF00 @@ -104,8 +112,22 @@ sub BODY_00_14 { or @X[$i],$t0 or $t1,$t2 or @X[$i],$t1 +#endif ___ $code.=<<___; +#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2) + addu $e,$K # $i + xor $t0,$c,$d + rotr $t1,$a,27 + lwl @X[$j],$j*4+$MSB($inp) + and $t0,$b + addu $e,$t1 + lwr @X[$j],$j*4+$LSB($inp) + xor $t0,$d + addu $e,@X[$i] + rotr $b,$b,2 + addu $e,$t0 +#else lwl @X[$j],$j*4+$MSB($inp) sll $t0,$a,5 # $i addu $e,$K @@ -121,6 +143,7 @@ sub BODY_00_14 { addu $e,@X[$i] or $b,$t2 addu $e,$t0 +#endif ___ } @@ -129,6 +152,10 @@ sub BODY_15_19 { my $j=$i+1; $code.=<<___ if (!$big_endian && $i==15); +#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2) + wsbh @X[$i],@X[$i] # byte swap($i) + rotr @X[$i],@X[$i],16 +#else srl $t0,@X[$i],24 # byte swap($i) srl $t1,@X[$i],8 andi $t2,@X[$i],0xFF00 @@ -138,8 +165,24 @@ sub BODY_15_19 { or @X[$i],$t0 or @X[$i],$t1 or @X[$i],$t2 +#endif ___ $code.=<<___; +#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2) + addu $e,$K # $i + xor @X[$j%16],@X[($j+2)%16] + xor $t0,$c,$d + rotr $t1,$a,27 + xor @X[$j%16],@X[($j+8)%16] + and $t0,$b + addu $e,$t1 + xor @X[$j%16],@X[($j+13)%16] + xor $t0,$d + addu $e,@X[$i%16] + rotr @X[$j%16],@X[$j%16],31 + rotr $b,$b,2 + addu $e,$t0 +#else xor @X[$j%16],@X[($j+2)%16] sll $t0,$a,5 # $i addu $e,$K @@ -159,6 +202,7 @@ sub BODY_15_19 { addu $e,@X[$i%16] or $b,$t2 addu $e,$t0 +#endif ___ } @@ -166,6 +210,20 @@ sub BODY_20_39 { my ($i,$a,$b,$c,$d,$e)=@_; my $j=$i+1; $code.=<<___ if ($i<79); +#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2) + xor @X[$j%16],@X[($j+2)%16] + addu $e,$K # $i + rotr $t1,$a,27 + xor @X[$j%16],@X[($j+8)%16] + xor $t0,$c,$d + addu $e,$t1 + xor @X[$j%16],@X[($j+13)%16] + xor $t0,$b + addu $e,@X[$i%16] + rotr @X[$j%16],@X[$j%16],31 + rotr $b,$b,2 + addu $e,$t0 +#else xor @X[$j%16],@X[($j+2)%16] sll $t0,$a,5 # $i addu $e,$K @@ -184,8 +242,24 @@ sub BODY_20_39 { or @X[$j%16],$t1 or $b,$t2 addu $e,$t0 +#endif ___ $code.=<<___ if ($i==79); +#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2) + lw @X[0],0($ctx) + addu $e,$K # $i + lw @X[1],4($ctx) + rotr $t1,$a,27 + lw @X[2],8($ctx) + xor $t0,$c,$d + addu $e,$t1 + lw @X[3],12($ctx) + xor $t0,$b + addu $e,@X[$i%16] + lw @X[4],16($ctx) + rotr $b,$b,2 + addu $e,$t0 +#else lw @X[0],0($ctx) sll $t0,$a,5 # $i addu $e,$K @@ -203,6 +277,7 @@ sub BODY_20_39 { addu $e,@X[$i%16] or $b,$t2 addu $e,$t0 +#endif ___ } @@ -210,6 +285,22 @@ sub BODY_40_59 { my ($i,$a,$b,$c,$d,$e)=@_; my $j=$i+1; $code.=<<___ if ($i<79); +#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2) + addu $e,$K # $i + and $t0,$c,$d + xor @X[$j%16],@X[($j+2)%16] + rotr $t1,$a,27 + addu $e,$t0 + xor @X[$j%16],@X[($j+8)%16] + xor $t0,$c,$d + addu $e,$t1 + xor @X[$j%16],@X[($j+13)%16] + and $t0,$b + addu $e,@X[$i%16] + rotr @X[$j%16],@X[$j%16],31 + rotr $b,$b,2 + addu $e,$t0 +#else xor @X[$j%16],@X[($j+2)%16] sll $t0,$a,5 # $i addu $e,$K @@ -230,6 +321,7 @@ sub BODY_40_59 { addu $e,@X[$i%16] or $b,$t2 addu $e,$t0 +#endif ___ } @@ -241,6 +333,10 @@ sub BODY_40_59 { # include #endif +#if defined(__mips_smartmips) && !defined(_MIPS_ARCH_MIPS32R2) +#define _MIPS_ARCH_MIPS32R2 +#endif + .text .set noat diff --git a/deps/openssl/openssl/crypto/sha/asm/sha1-ppc.pl b/deps/openssl/openssl/crypto/sha/asm/sha1-ppc.pl index 2140dd2f8dd6fc..df5989610c4c70 100755 --- a/deps/openssl/openssl/crypto/sha/asm/sha1-ppc.pl +++ b/deps/openssl/openssl/crypto/sha/asm/sha1-ppc.pl @@ -9,8 +9,7 @@ # I let hardware handle unaligned input(*), except on page boundaries # (see below for details). Otherwise straightforward implementation -# with X vector in register bank. The module is big-endian [which is -# not big deal as there're no little-endian targets left around]. +# with X vector in register bank. # # (*) this means that this module is inappropriate for PPC403? Does # anybody know if pre-POWER3 can sustain unaligned load? @@ -38,6 +37,10 @@ $PUSH ="stw"; } else { die "nonsense $flavour"; } +# Define endianess based on flavour +# i.e.: linux64le +$LITTLE_ENDIAN = ($flavour=~/le$/) ? $SIZE_T : 0; + $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; ( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or ( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or @@ -68,14 +71,28 @@ @X=("r16","r17","r18","r19","r20","r21","r22","r23", "r24","r25","r26","r27","r28","r29","r30","r31"); +sub loadbe { +my ($dst, $src, $temp_reg) = @_; +$code.=<<___ if (!$LITTLE_ENDIAN); + lwz $dst,$src +___ +$code.=<<___ if ($LITTLE_ENDIAN); + lwz $temp_reg,$src + rotlwi $dst,$temp_reg,8 + rlwimi $dst,$temp_reg,24,0,7 + rlwimi $dst,$temp_reg,24,16,23 +___ +} + sub BODY_00_19 { my ($i,$a,$b,$c,$d,$e,$f)=@_; my $j=$i+1; -$code.=<<___ if ($i==0); - lwz @X[$i],`$i*4`($inp) -___ + + # Since the last value of $f is discarded, we can use + # it as a temp reg to swap byte-order when needed. + loadbe("@X[$i]","`$i*4`($inp)",$f) if ($i==0); + loadbe("@X[$j]","`$j*4`($inp)",$f) if ($i<15); $code.=<<___ if ($i<15); - lwz @X[$j],`$j*4`($inp) add $f,$K,$e rotlwi $e,$a,5 add $f,$f,@X[$i] @@ -108,31 +125,31 @@ sub BODY_20_39 { my $j=$i+1; $code.=<<___ if ($i<79); add $f,$K,$e + xor $t0,$b,$d rotlwi $e,$a,5 xor @X[$j%16],@X[$j%16],@X[($j+2)%16] add $f,$f,@X[$i%16] - xor $t0,$b,$c + xor $t0,$t0,$c xor @X[$j%16],@X[$j%16],@X[($j+8)%16] - add $f,$f,$e + add $f,$f,$t0 rotlwi $b,$b,30 - xor $t0,$t0,$d xor @X[$j%16],@X[$j%16],@X[($j+13)%16] - add $f,$f,$t0 + add $f,$f,$e rotlwi @X[$j%16],@X[$j%16],1 ___ $code.=<<___ if ($i==79); add $f,$K,$e + xor $t0,$b,$d rotlwi $e,$a,5 lwz r16,0($ctx) add $f,$f,@X[$i%16] - xor $t0,$b,$c + xor $t0,$t0,$c lwz r17,4($ctx) - add $f,$f,$e + add $f,$f,$t0 rotlwi $b,$b,30 lwz r18,8($ctx) - xor $t0,$t0,$d lwz r19,12($ctx) - add $f,$f,$t0 + add $f,$f,$e lwz r20,16($ctx) ___ } @@ -316,6 +333,7 @@ sub BODY_40_59 { blr .long 0 .byte 0,12,0x14,0,0,0,0,0 +.size .sha1_block_data_order,.-.sha1_block_data_order ___ $code.=<<___; .asciz "SHA1 block transform for PPC, CRYPTOGAMS by " diff --git a/deps/openssl/openssl/crypto/sha/asm/sha1-sparcv9.pl b/deps/openssl/openssl/crypto/sha/asm/sha1-sparcv9.pl index 5c161cecd696c5..b5efcde5c13962 100644 --- a/deps/openssl/openssl/crypto/sha/asm/sha1-sparcv9.pl +++ b/deps/openssl/openssl/crypto/sha/asm/sha1-sparcv9.pl @@ -5,6 +5,8 @@ # project. The module is, however, dual licensed under OpenSSL and # CRYPTOGAMS licenses depending on where you obtain it. For further # details see http://www.openssl.org/~appro/cryptogams/. +# +# Hardware SPARC T4 support by David S. Miller . # ==================================================================== # Performance improvement is not really impressive on pre-T1 CPU: +8% @@ -18,10 +20,10 @@ # ensure scalability on UltraSPARC T1, or rather to avoid decay when # amount of active threads exceeds the number of physical cores. -$bits=32; -for (@ARGV) { $bits=64 if (/\-m64/ || /\-xarch\=v9/); } -if ($bits==64) { $bias=2047; $frame=192; } -else { $bias=0; $frame=112; } +# SPARC T4 SHA1 hardware achieves 3.72 cycles per byte, which is 3.1x +# faster than software. Multi-process benchmark saturates at 11x +# single-process result on 8-core processor, or ~9GBps per 2.85GHz +# socket. $output=shift; open STDOUT,">$output"; @@ -178,17 +180,102 @@ sub BODY_40_59 { ___ } -$code.=<<___ if ($bits==64); +$code.=<<___; +#include "sparc_arch.h" + +#ifdef __arch64__ .register %g2,#scratch .register %g3,#scratch -___ -$code.=<<___; +#endif + .section ".text",#alloc,#execinstr +#ifdef __PIC__ +SPARC_PIC_THUNK(%g1) +#endif + .align 32 .globl sha1_block_data_order sha1_block_data_order: - save %sp,-$frame,%sp + SPARC_LOAD_ADDRESS_LEAF(OPENSSL_sparcv9cap_P,%g1,%g5) + ld [%g1+4],%g1 ! OPENSSL_sparcv9cap_P[1] + + andcc %g1, CFR_SHA1, %g0 + be .Lsoftware + nop + + ld [%o0 + 0x00], %f0 ! load context + ld [%o0 + 0x04], %f1 + ld [%o0 + 0x08], %f2 + andcc %o1, 0x7, %g0 + ld [%o0 + 0x0c], %f3 + bne,pn %icc, .Lhwunaligned + ld [%o0 + 0x10], %f4 + +.Lhw_loop: + ldd [%o1 + 0x00], %f8 + ldd [%o1 + 0x08], %f10 + ldd [%o1 + 0x10], %f12 + ldd [%o1 + 0x18], %f14 + ldd [%o1 + 0x20], %f16 + ldd [%o1 + 0x28], %f18 + ldd [%o1 + 0x30], %f20 + subcc %o2, 1, %o2 ! done yet? + ldd [%o1 + 0x38], %f22 + add %o1, 0x40, %o1 + prefetch [%o1 + 63], 20 + + .word 0x81b02820 ! SHA1 + + bne,pt SIZE_T_CC, .Lhw_loop + nop + +.Lhwfinish: + st %f0, [%o0 + 0x00] ! store context + st %f1, [%o0 + 0x04] + st %f2, [%o0 + 0x08] + st %f3, [%o0 + 0x0c] + retl + st %f4, [%o0 + 0x10] + +.align 8 +.Lhwunaligned: + alignaddr %o1, %g0, %o1 + + ldd [%o1 + 0x00], %f10 +.Lhwunaligned_loop: + ldd [%o1 + 0x08], %f12 + ldd [%o1 + 0x10], %f14 + ldd [%o1 + 0x18], %f16 + ldd [%o1 + 0x20], %f18 + ldd [%o1 + 0x28], %f20 + ldd [%o1 + 0x30], %f22 + ldd [%o1 + 0x38], %f24 + subcc %o2, 1, %o2 ! done yet? + ldd [%o1 + 0x40], %f26 + add %o1, 0x40, %o1 + prefetch [%o1 + 63], 20 + + faligndata %f10, %f12, %f8 + faligndata %f12, %f14, %f10 + faligndata %f14, %f16, %f12 + faligndata %f16, %f18, %f14 + faligndata %f18, %f20, %f16 + faligndata %f20, %f22, %f18 + faligndata %f22, %f24, %f20 + faligndata %f24, %f26, %f22 + + .word 0x81b02820 ! SHA1 + + bne,pt SIZE_T_CC, .Lhwunaligned_loop + for %f26, %f26, %f10 ! %f10=%f26 + + ba .Lhwfinish + nop + +.align 16 +.Lsoftware: + save %sp,-STACK_FRAME,%sp sllx $len,6,$len add $inp,$len,$len @@ -268,7 +355,7 @@ sub BODY_40_59 { add $E,@X[4],$E st $E,[$ctx+16] - bne `$bits==64?"%xcc":"%icc"`,.Lloop + bne SIZE_T_CC,.Lloop andn $inp,7,$tmp0 ret @@ -279,6 +366,62 @@ sub BODY_40_59 { .align 4 ___ -$code =~ s/\`([^\`]*)\`/eval $1/gem; -print $code; +# Purpose of these subroutines is to explicitly encode VIS instructions, +# so that one can compile the module without having to specify VIS +# extentions on compiler command line, e.g. -xarch=v9 vs. -xarch=v9a. +# Idea is to reserve for option to produce "universal" binary and let +# programmer detect if current CPU is VIS capable at run-time. +sub unvis { +my ($mnemonic,$rs1,$rs2,$rd)=@_; +my $ref,$opf; +my %visopf = ( "faligndata" => 0x048, + "for" => 0x07c ); + + $ref = "$mnemonic\t$rs1,$rs2,$rd"; + + if ($opf=$visopf{$mnemonic}) { + foreach ($rs1,$rs2,$rd) { + return $ref if (!/%f([0-9]{1,2})/); + $_=$1; + if ($1>=32) { + return $ref if ($1&1); + # re-encode for upper double register addressing + $_=($1|$1>>5)&31; + } + } + + return sprintf ".word\t0x%08x !%s", + 0x81b00000|$rd<<25|$rs1<<14|$opf<<5|$rs2, + $ref; + } else { + return $ref; + } +} +sub unalignaddr { +my ($mnemonic,$rs1,$rs2,$rd)=@_; +my %bias = ( "g" => 0, "o" => 8, "l" => 16, "i" => 24 ); +my $ref="$mnemonic\t$rs1,$rs2,$rd"; + + foreach ($rs1,$rs2,$rd) { + if (/%([goli])([0-7])/) { $_=$bias{$1}+$2; } + else { return $ref; } + } + return sprintf ".word\t0x%08x !%s", + 0x81b00300|$rd<<25|$rs1<<14|$rs2, + $ref; +} + +foreach (split("\n",$code)) { + s/\`([^\`]*)\`/eval $1/ge; + + s/\b(f[^\s]*)\s+(%f[0-9]{1,2}),\s*(%f[0-9]{1,2}),\s*(%f[0-9]{1,2})/ + &unvis($1,$2,$3,$4) + /ge; + s/\b(alignaddr)\s+(%[goli][0-7]),\s*(%[goli][0-7]),\s*(%[goli][0-7])/ + &unalignaddr($1,$2,$3,$4) + /ge; + + print $_,"\n"; +} + close STDOUT; diff --git a/deps/openssl/openssl/crypto/sha/asm/sha1-x86_64.pl b/deps/openssl/openssl/crypto/sha/asm/sha1-x86_64.pl index f15c7ec39b20d9..9bb6b498190fdf 100755 --- a/deps/openssl/openssl/crypto/sha/asm/sha1-x86_64.pl +++ b/deps/openssl/openssl/crypto/sha/asm/sha1-x86_64.pl @@ -1,7 +1,7 @@ #!/usr/bin/env perl # # ==================================================================== -# Written by Andy Polyakov for the OpenSSL +# Written by Andy Polyakov for the OpenSSL # project. The module is, however, dual licensed under OpenSSL and # CRYPTOGAMS licenses depending on where you obtain it. For further # details see http://www.openssl.org/~appro/cryptogams/. @@ -49,17 +49,37 @@ # # Add AVX code path. See sha1-586.pl for further information. +# May 2013. +# +# Add AVX2+BMI code path. Initial attempt (utilizing BMI instructions +# and loading pair of consecutive blocks to 256-bit %ymm registers) +# did not provide impressive performance improvement till a crucial +# hint regarding the number of Xupdate iterations to pre-compute in +# advance was provided by Ilya Albrekht of Intel Corp. + +# March 2014. +# +# Add support for Intel SHA Extensions. + ###################################################################### # Current performance is summarized in following table. Numbers are # CPU clock cycles spent to process single byte (less is better). # -# x86_64 SSSE3 AVX -# P4 9.8 - -# Opteron 6.6 - -# Core2 6.7 6.1/+10% - -# Atom 11.0 9.7/+13% - -# Westmere 7.1 5.6/+27% - -# Sandy Bridge 7.9 6.3/+25% 5.2/+51% +# x86_64 SSSE3 AVX[2] +# P4 9.05 - +# Opteron 6.26 - +# Core2 6.55 6.05/+8% - +# Westmere 6.73 5.30/+27% - +# Sandy Bridge 7.70 6.10/+26% 4.99/+54% +# Ivy Bridge 6.06 4.67/+30% 4.60/+32% +# Haswell 5.45 4.15/+31% 3.57/+53% +# Bulldozer 9.11 5.95/+53% +# VIA Nano 9.32 7.15/+30% +# Atom 10.3 9.17/+12% +# Silvermont 13.1(*) 9.37/+40% +# +# (*) obviously suboptimal result, nothing was done about it, +# because SSSE3 code is compiled unconditionally; $flavour = shift; $output = shift; @@ -72,15 +92,27 @@ ( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or die "can't locate x86_64-xlate.pl"; -$avx=1 if (`$ENV{CC} -Wa,-v -c -o /dev/null -x assembler /dev/null 2>&1` - =~ /GNU assembler version ([2-9]\.[0-9]+)/ && - $1>=2.19); -$avx=1 if (!$avx && $win64 && ($flavour =~ /nasm/ || $ENV{ASM} =~ /nasm/) && - `nasm -v 2>&1` =~ /NASM version ([2-9]\.[0-9]+)/ && - $1>=2.09); -$avx=1 if (!$avx && $win64 && ($flavour =~ /masm/ || $ENV{ASM} =~ /ml64/) && - `ml64 2>&1` =~ /Version ([0-9]+)\./ && - $1>=10); +if (`$ENV{CC} -Wa,-v -c -o /dev/null -x assembler /dev/null 2>&1` + =~ /GNU assembler version ([2-9]\.[0-9]+)/) { + $avx = ($1>=2.19) + ($1>=2.22); +} + +if (!$avx && $win64 && ($flavour =~ /nasm/ || $ENV{ASM} =~ /nasm/) && + `nasm -v 2>&1` =~ /NASM version ([2-9]\.[0-9]+)/) { + $avx = ($1>=2.09) + ($1>=2.10); +} + +if (!$avx && $win64 && ($flavour =~ /masm/ || $ENV{ASM} =~ /ml64/) && + `ml64 2>&1` =~ /Version ([0-9]+)\./) { + $avx = ($1>=10) + ($1>=11); +} + +if (!$avx && `$ENV{CC} -v 2>&1` =~ /(^clang version|based on LLVM) ([2-9]\.[0-9]+)/) { + $avx = ($2>=3.0) + ($2>3.0); +} + +$shaext=1; ### set to zero if compiling for 1.0.1 +$avx=1 if (!$shaext && $avx); open OUT,"| \"$^X\" $xlate $flavour $output"; *STDOUT=*OUT; @@ -97,7 +129,7 @@ $t0="%eax"; $t1="%ebx"; $t2="%ecx"; -@xi=("%edx","%ebp"); +@xi=("%edx","%ebp","%r14d"); $A="%esi"; $B="%edi"; $C="%r11d"; @@ -112,42 +144,40 @@ sub BODY_00_19 { $code.=<<___ if ($i==0); mov `4*$i`($inp),$xi[0] bswap $xi[0] - mov $xi[0],`4*$i`(%rsp) ___ $code.=<<___ if ($i<15); - mov $c,$t0 mov `4*$j`($inp),$xi[1] + mov $d,$t0 + mov $xi[0],`4*$i`(%rsp) mov $a,$t2 - xor $d,$t0 bswap $xi[1] + xor $c,$t0 rol \$5,$t2 - lea 0x5a827999($xi[0],$e),$e and $b,$t0 - mov $xi[1],`4*$j`(%rsp) + lea 0x5a827999($xi[0],$e),$e add $t2,$e xor $d,$t0 rol \$30,$b add $t0,$e ___ $code.=<<___ if ($i>=15); - mov `4*($j%16)`(%rsp),$xi[1] - mov $c,$t0 + xor `4*($j%16)`(%rsp),$xi[1] + mov $d,$t0 + mov $xi[0],`4*($i%16)`(%rsp) mov $a,$t2 xor `4*(($j+2)%16)`(%rsp),$xi[1] - xor $d,$t0 + xor $c,$t0 rol \$5,$t2 xor `4*(($j+8)%16)`(%rsp),$xi[1] and $b,$t0 lea 0x5a827999($xi[0],$e),$e - xor `4*(($j+13)%16)`(%rsp),$xi[1] + rol \$30,$b xor $d,$t0 - rol \$1,$xi[1] add $t2,$e - rol \$30,$b - mov $xi[1],`4*($j%16)`(%rsp) + rol \$1,$xi[1] add $t0,$e ___ -unshift(@xi,pop(@xi)); +push(@xi,shift(@xi)); } sub BODY_20_39 { @@ -155,62 +185,58 @@ sub BODY_20_39 { my $j=$i+1; my $K=($i<40)?0x6ed9eba1:0xca62c1d6; $code.=<<___ if ($i<79); - mov `4*($j%16)`(%rsp),$xi[1] - mov $c,$t0 + xor `4*($j%16)`(%rsp),$xi[1] + mov $b,$t0 + `"mov $xi[0],".4*($i%16)."(%rsp)" if ($i<72)` mov $a,$t2 xor `4*(($j+2)%16)`(%rsp),$xi[1] - xor $b,$t0 + xor $d,$t0 rol \$5,$t2 - lea $K($xi[0],$e),$e xor `4*(($j+8)%16)`(%rsp),$xi[1] - xor $d,$t0 + lea $K($xi[0],$e),$e + xor $c,$t0 add $t2,$e - xor `4*(($j+13)%16)`(%rsp),$xi[1] rol \$30,$b add $t0,$e rol \$1,$xi[1] ___ -$code.=<<___ if ($i<76); - mov $xi[1],`4*($j%16)`(%rsp) -___ $code.=<<___ if ($i==79); - mov $c,$t0 + mov $b,$t0 mov $a,$t2 - xor $b,$t0 + xor $d,$t0 lea $K($xi[0],$e),$e rol \$5,$t2 - xor $d,$t0 + xor $c,$t0 add $t2,$e rol \$30,$b add $t0,$e ___ -unshift(@xi,pop(@xi)); +push(@xi,shift(@xi)); } sub BODY_40_59 { my ($i,$a,$b,$c,$d,$e)=@_; my $j=$i+1; $code.=<<___; - mov `4*($j%16)`(%rsp),$xi[1] - mov $c,$t0 - mov $c,$t1 + xor `4*($j%16)`(%rsp),$xi[1] + mov $d,$t0 + mov $xi[0],`4*($i%16)`(%rsp) + mov $d,$t1 xor `4*(($j+2)%16)`(%rsp),$xi[1] - and $d,$t0 + and $c,$t0 mov $a,$t2 xor `4*(($j+8)%16)`(%rsp),$xi[1] - xor $d,$t1 lea 0x8f1bbcdc($xi[0],$e),$e + xor $c,$t1 rol \$5,$t2 - xor `4*(($j+13)%16)`(%rsp),$xi[1] add $t0,$e - and $b,$t1 rol \$1,$xi[1] - add $t1,$e - rol \$30,$b - mov $xi[1],`4*($j%16)`(%rsp) + and $b,$t1 add $t2,$e + rol \$30,$b + add $t1,$e ___ -unshift(@xi,pop(@xi)); +push(@xi,shift(@xi)); } $code.=<<___; @@ -223,9 +249,19 @@ sub BODY_40_59 { sha1_block_data_order: mov OPENSSL_ia32cap_P+0(%rip),%r9d mov OPENSSL_ia32cap_P+4(%rip),%r8d + mov OPENSSL_ia32cap_P+8(%rip),%r10d test \$`1<<9`,%r8d # check SSSE3 bit jz .Lialu ___ +$code.=<<___ if ($shaext); + test \$`1<<29`,%r10d # check SHA bit + jnz _shaext_shortcut +___ +$code.=<<___ if ($avx>1); + and \$`1<<3|1<<5|1<<8`,%r10d # check AVX2+BMI1+BMI2 + cmp \$`1<<3|1<<5|1<<8`,%r10d + je _avx2_shortcut +___ $code.=<<___ if ($avx); and \$`1<<28`,%r8d # mask AVX bit and \$`1<<30`,%r9d # mask "Intel CPU" bit @@ -238,17 +274,18 @@ sub BODY_40_59 { .align 16 .Lialu: + mov %rsp,%rax push %rbx push %rbp push %r12 push %r13 - mov %rsp,%r11 + push %r14 mov %rdi,$ctx # reassigned argument sub \$`8+16*4`,%rsp mov %rsi,$inp # reassigned argument and \$-64,%rsp mov %rdx,$num # reassigned argument - mov %r11,`16*4`(%rsp) + mov %rax,`16*4`(%rsp) .Lprologue: mov 0($ctx),$A @@ -282,53 +319,187 @@ sub BODY_40_59 { jnz .Lloop mov `16*4`(%rsp),%rsi - mov (%rsi),%r13 - mov 8(%rsi),%r12 - mov 16(%rsi),%rbp - mov 24(%rsi),%rbx - lea 32(%rsi),%rsp + mov -40(%rsi),%r14 + mov -32(%rsi),%r13 + mov -24(%rsi),%r12 + mov -16(%rsi),%rbp + mov -8(%rsi),%rbx + lea (%rsi),%rsp .Lepilogue: ret .size sha1_block_data_order,.-sha1_block_data_order ___ +if ($shaext) {{{ +###################################################################### +# Intel SHA Extensions implementation of SHA1 update function. +# +my ($ctx,$inp,$num)=("%rdi","%rsi","%rdx"); +my ($ABCD,$E,$E_,$BSWAP,$ABCD_SAVE,$E_SAVE)=map("%xmm$_",(0..3,8,9)); +my @MSG=map("%xmm$_",(4..7)); + +$code.=<<___; +.type sha1_block_data_order_shaext,\@function,3 +.align 32 +sha1_block_data_order_shaext: +_shaext_shortcut: +___ +$code.=<<___ if ($win64); + lea `-8-4*16`(%rsp),%rsp + movaps %xmm6,-8-4*16(%rax) + movaps %xmm7,-8-3*16(%rax) + movaps %xmm8,-8-2*16(%rax) + movaps %xmm9,-8-1*16(%rax) +.Lprologue_shaext: +___ +$code.=<<___; + movdqu ($ctx),$ABCD + movd 16($ctx),$E + movdqa K_XX_XX+0xa0(%rip),$BSWAP # byte-n-word swap + + movdqu ($inp),@MSG[0] + pshufd \$0b00011011,$ABCD,$ABCD # flip word order + movdqu 0x10($inp),@MSG[1] + pshufd \$0b00011011,$E,$E # flip word order + movdqu 0x20($inp),@MSG[2] + pshufb $BSWAP,@MSG[0] + movdqu 0x30($inp),@MSG[3] + pshufb $BSWAP,@MSG[1] + pshufb $BSWAP,@MSG[2] + movdqa $E,$E_SAVE # offload $E + pshufb $BSWAP,@MSG[3] + jmp .Loop_shaext + +.align 16 +.Loop_shaext: + dec $num + lea 0x40($inp),%rax # next input block + paddd @MSG[0],$E + cmovne %rax,$inp + movdqa $ABCD,$ABCD_SAVE # offload $ABCD +___ +for($i=0;$i<20-4;$i+=2) { +$code.=<<___; + sha1msg1 @MSG[1],@MSG[0] + movdqa $ABCD,$E_ + sha1rnds4 \$`int($i/5)`,$E,$ABCD # 0-3... + sha1nexte @MSG[1],$E_ + pxor @MSG[2],@MSG[0] + sha1msg1 @MSG[2],@MSG[1] + sha1msg2 @MSG[3],@MSG[0] + + movdqa $ABCD,$E + sha1rnds4 \$`int(($i+1)/5)`,$E_,$ABCD + sha1nexte @MSG[2],$E + pxor @MSG[3],@MSG[1] + sha1msg2 @MSG[0],@MSG[1] +___ + push(@MSG,shift(@MSG)); push(@MSG,shift(@MSG)); +} +$code.=<<___; + movdqu ($inp),@MSG[0] + movdqa $ABCD,$E_ + sha1rnds4 \$3,$E,$ABCD # 64-67 + sha1nexte @MSG[1],$E_ + movdqu 0x10($inp),@MSG[1] + pshufb $BSWAP,@MSG[0] + + movdqa $ABCD,$E + sha1rnds4 \$3,$E_,$ABCD # 68-71 + sha1nexte @MSG[2],$E + movdqu 0x20($inp),@MSG[2] + pshufb $BSWAP,@MSG[1] + + movdqa $ABCD,$E_ + sha1rnds4 \$3,$E,$ABCD # 72-75 + sha1nexte @MSG[3],$E_ + movdqu 0x30($inp),@MSG[3] + pshufb $BSWAP,@MSG[2] + + movdqa $ABCD,$E + sha1rnds4 \$3,$E_,$ABCD # 76-79 + sha1nexte $E_SAVE,$E + pshufb $BSWAP,@MSG[3] + + paddd $ABCD_SAVE,$ABCD + movdqa $E,$E_SAVE # offload $E + + jnz .Loop_shaext + + pshufd \$0b00011011,$ABCD,$ABCD + pshufd \$0b00011011,$E,$E + movdqu $ABCD,($ctx) + movd $E,16($ctx) +___ +$code.=<<___ if ($win64); + movaps -8-4*16(%rax),%xmm6 + movaps -8-3*16(%rax),%xmm7 + movaps -8-2*16(%rax),%xmm8 + movaps -8-1*16(%rax),%xmm9 + mov %rax,%rsp +.Lepilogue_shaext: +___ +$code.=<<___; + ret +.size sha1_block_data_order_shaext,.-sha1_block_data_order_shaext +___ +}}} {{{ my $Xi=4; my @X=map("%xmm$_",(4..7,0..3)); my @Tx=map("%xmm$_",(8..10)); +my $Kx="%xmm11"; my @V=($A,$B,$C,$D,$E)=("%eax","%ebx","%ecx","%edx","%ebp"); # size optimization my @T=("%esi","%edi"); my $j=0; +my $rx=0; my $K_XX_XX="%r11"; my $_rol=sub { &rol(@_) }; my $_ror=sub { &ror(@_) }; +{ my $sn; +sub align32() { + ++$sn; +$code.=<<___; + jmp .Lalign32_$sn # see "Decoded ICache" in manual +.align 32 +.Lalign32_$sn: +___ +} +} + $code.=<<___; .type sha1_block_data_order_ssse3,\@function,3 .align 16 sha1_block_data_order_ssse3: _ssse3_shortcut: + mov %rsp,%rax push %rbx push %rbp push %r12 - lea `-64-($win64?5*16:0)`(%rsp),%rsp + push %r13 # redundant, done to share Win64 SE handler + push %r14 + lea `-64-($win64?6*16:0)`(%rsp),%rsp ___ $code.=<<___ if ($win64); - movaps %xmm6,64+0(%rsp) - movaps %xmm7,64+16(%rsp) - movaps %xmm8,64+32(%rsp) - movaps %xmm9,64+48(%rsp) - movaps %xmm10,64+64(%rsp) + movaps %xmm6,-40-6*16(%rax) + movaps %xmm7,-40-5*16(%rax) + movaps %xmm8,-40-4*16(%rax) + movaps %xmm9,-40-3*16(%rax) + movaps %xmm10,-40-2*16(%rax) + movaps %xmm11,-40-1*16(%rax) .Lprologue_ssse3: ___ $code.=<<___; + mov %rax,%r14 # original %rsp + and \$-64,%rsp mov %rdi,$ctx # reassigned argument mov %rsi,$inp # reassigned argument mov %rdx,$num # reassigned argument shl \$6,$num add $inp,$num - lea K_XX_XX(%rip),$K_XX_XX + lea K_XX_XX+64(%rip),$K_XX_XX mov 0($ctx),$A # load context mov 4($ctx),$B @@ -336,19 +507,22 @@ sub BODY_40_59 { mov 12($ctx),$D mov $B,@T[0] # magic seed mov 16($ctx),$E + mov $C,@T[1] + xor $D,@T[1] + and @T[1],@T[0] movdqa 64($K_XX_XX),@X[2] # pbswap mask - movdqa 0($K_XX_XX),@Tx[1] # K_00_19 + movdqa -64($K_XX_XX),@Tx[1] # K_00_19 movdqu 0($inp),@X[-4&7] # load input to %xmm[0-3] movdqu 16($inp),@X[-3&7] movdqu 32($inp),@X[-2&7] movdqu 48($inp),@X[-1&7] pshufb @X[2],@X[-4&7] # byte swap - add \$64,$inp pshufb @X[2],@X[-3&7] pshufb @X[2],@X[-2&7] - pshufb @X[2],@X[-1&7] + add \$64,$inp paddd @Tx[1],@X[-4&7] # add K_00_19 + pshufb @X[2],@X[-1&7] paddd @Tx[1],@X[-3&7] paddd @Tx[1],@X[-2&7] movdqa @X[-4&7],0(%rsp) # X[]+K xfer to IALU @@ -373,61 +547,61 @@ () my @insns = (&$body,&$body,&$body,&$body); # 40 instructions my ($a,$b,$c,$d,$e); - &movdqa (@X[0],@X[-3&7]); - eval(shift(@insns)); + eval(shift(@insns)); # ror + &pshufd (@X[0],@X[-4&7],0xee); # was &movdqa (@X[0],@X[-3&7]); eval(shift(@insns)); &movdqa (@Tx[0],@X[-1&7]); - &palignr(@X[0],@X[-4&7],8); # compose "X[-14]" in "X[0]" + &paddd (@Tx[1],@X[-1&7]); eval(shift(@insns)); eval(shift(@insns)); - &paddd (@Tx[1],@X[-1&7]); + &punpcklqdq(@X[0],@X[-3&7]); # compose "X[-14]" in "X[0]", was &palignr(@X[0],@X[-4&7],8); eval(shift(@insns)); + eval(shift(@insns)); # rol eval(shift(@insns)); &psrldq (@Tx[0],4); # "X[-3]", 3 dwords eval(shift(@insns)); eval(shift(@insns)); + &pxor (@X[0],@X[-4&7]); # "X[0]"^="X[-16]" eval(shift(@insns)); - eval(shift(@insns)); - + eval(shift(@insns)); # ror &pxor (@Tx[0],@X[-2&7]); # "X[-3]"^"X[-8]" eval(shift(@insns)); eval(shift(@insns)); eval(shift(@insns)); - eval(shift(@insns)); &pxor (@X[0],@Tx[0]); # "X[0]"^="X[-3]"^"X[-8]" eval(shift(@insns)); - eval(shift(@insns)); + eval(shift(@insns)); # rol &movdqa (eval(16*(($Xi-1)&3))."(%rsp)",@Tx[1]); # X[]+K xfer to IALU eval(shift(@insns)); eval(shift(@insns)); &movdqa (@Tx[2],@X[0]); - &movdqa (@Tx[0],@X[0]); - eval(shift(@insns)); eval(shift(@insns)); eval(shift(@insns)); + eval(shift(@insns)); # ror + &movdqa (@Tx[0],@X[0]); eval(shift(@insns)); &pslldq (@Tx[2],12); # "X[0]"<<96, extract one dword &paddd (@X[0],@X[0]); eval(shift(@insns)); eval(shift(@insns)); - eval(shift(@insns)); - eval(shift(@insns)); &psrld (@Tx[0],31); eval(shift(@insns)); + eval(shift(@insns)); # rol eval(shift(@insns)); &movdqa (@Tx[1],@Tx[2]); eval(shift(@insns)); eval(shift(@insns)); &psrld (@Tx[2],30); - &por (@X[0],@Tx[0]); # "X[0]"<<<=1 eval(shift(@insns)); + eval(shift(@insns)); # ror + &por (@X[0],@Tx[0]); # "X[0]"<<<=1 eval(shift(@insns)); eval(shift(@insns)); eval(shift(@insns)); @@ -435,12 +609,13 @@ () &pslld (@Tx[1],2); &pxor (@X[0],@Tx[2]); eval(shift(@insns)); - eval(shift(@insns)); - &movdqa (@Tx[2],eval(16*(($Xi)/5))."($K_XX_XX)"); # K_XX_XX + &movdqa (@Tx[2],eval(2*16*(($Xi)/5)-64)."($K_XX_XX)"); # K_XX_XX + eval(shift(@insns)); # rol eval(shift(@insns)); eval(shift(@insns)); &pxor (@X[0],@Tx[1]); # "X[0]"^=("X[0]">>96)<<<2 + &pshufd (@Tx[1],@X[-1&7],0xee) if ($Xi==7); # was &movdqa (@Tx[0],@X[-1&7]) in Xupdate_ssse3_32_79 foreach (@insns) { eval; } # remaining instructions [if any] @@ -451,27 +626,30 @@ () sub Xupdate_ssse3_32_79() { use integer; my $body = shift; - my @insns = (&$body,&$body,&$body,&$body); # 32 to 48 instructions + my @insns = (&$body,&$body,&$body,&$body); # 32 to 44 instructions my ($a,$b,$c,$d,$e); - &movdqa (@Tx[0],@X[-1&7]) if ($Xi==8); - eval(shift(@insns)); # body_20_39 + eval(shift(@insns)) if ($Xi==8); &pxor (@X[0],@X[-4&7]); # "X[0]"="X[-32]"^"X[-16]" - &palignr(@Tx[0],@X[-2&7],8); # compose "X[-6]" + eval(shift(@insns)) if ($Xi==8); + eval(shift(@insns)); # body_20_39 eval(shift(@insns)); + eval(shift(@insns)) if (@insns[1] =~ /_ror/); + eval(shift(@insns)) if (@insns[0] =~ /_ror/); + &punpcklqdq(@Tx[0],@X[-1&7]); # compose "X[-6]", was &palignr(@Tx[0],@X[-2&7],8); eval(shift(@insns)); eval(shift(@insns)); # rol &pxor (@X[0],@X[-7&7]); # "X[0]"^="X[-28]" eval(shift(@insns)); - eval(shift(@insns)) if (@insns[0] !~ /&ro[rl]/); + eval(shift(@insns)); if ($Xi%5) { &movdqa (@Tx[2],@Tx[1]);# "perpetuate" K_XX_XX... } else { # ... or load next one - &movdqa (@Tx[2],eval(16*($Xi/5))."($K_XX_XX)"); + &movdqa (@Tx[2],eval(2*16*($Xi/5)-64)."($K_XX_XX)"); } - &paddd (@Tx[1],@X[-1&7]); eval(shift(@insns)); # ror + &paddd (@Tx[1],@X[-1&7]); eval(shift(@insns)); &pxor (@X[0],@Tx[0]); # "X[0]"^="X[-6]" @@ -479,29 +657,31 @@ () eval(shift(@insns)); eval(shift(@insns)); eval(shift(@insns)); # rol + eval(shift(@insns)) if (@insns[0] =~ /_ror/); &movdqa (@Tx[0],@X[0]); - &movdqa (eval(16*(($Xi-1)&3))."(%rsp)",@Tx[1]); # X[]+K xfer to IALU eval(shift(@insns)); eval(shift(@insns)); + &movdqa (eval(16*(($Xi-1)&3))."(%rsp)",@Tx[1]); # X[]+K xfer to IALU eval(shift(@insns)); # ror eval(shift(@insns)); + eval(shift(@insns)); # body_20_39 &pslld (@X[0],2); - eval(shift(@insns)); # body_20_39 eval(shift(@insns)); - &psrld (@Tx[0],30); eval(shift(@insns)); - eval(shift(@insns)); # rol + &psrld (@Tx[0],30); + eval(shift(@insns)) if (@insns[0] =~ /_rol/);# rol eval(shift(@insns)); eval(shift(@insns)); eval(shift(@insns)); # ror - eval(shift(@insns)); &por (@X[0],@Tx[0]); # "X[0]"<<<=2 - eval(shift(@insns)); # body_20_39 eval(shift(@insns)); - &movdqa (@Tx[1],@X[0]) if ($Xi<19); + eval(shift(@insns)); # body_20_39 + eval(shift(@insns)) if (@insns[1] =~ /_rol/); + eval(shift(@insns)) if (@insns[0] =~ /_rol/); + &pshufd(@Tx[1],@X[-1&7],0xee) if ($Xi<19); # was &movdqa (@Tx[1],@X[0]) eval(shift(@insns)); eval(shift(@insns)); # rol eval(shift(@insns)); @@ -522,9 +702,10 @@ () my ($a,$b,$c,$d,$e); eval(shift(@insns)); - &paddd (@Tx[1],@X[-1&7]); eval(shift(@insns)); eval(shift(@insns)); + eval(shift(@insns)); + &paddd (@Tx[1],@X[-1&7]); eval(shift(@insns)); eval(shift(@insns)); @@ -538,7 +719,7 @@ () unshift(@Tx,pop(@Tx)); &movdqa (@X[2],"64($K_XX_XX)"); # pbswap mask - &movdqa (@Tx[1],"0($K_XX_XX)"); # K_00_19 + &movdqa (@Tx[1],"-64($K_XX_XX)"); # K_00_19 &movdqu (@X[-4&7],"0($inp)"); # load input &movdqu (@X[-3&7],"16($inp)"); &movdqu (@X[-2&7],"32($inp)"); @@ -557,9 +738,12 @@ () eval(shift(@insns)); eval(shift(@insns)); + eval(shift(@insns)); &pshufb (@X[($Xi-3)&7],@X[2]); eval(shift(@insns)); eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); &paddd (@X[($Xi-4)&7],@Tx[1]); eval(shift(@insns)); eval(shift(@insns)); @@ -568,6 +752,8 @@ () &movdqa (eval(16*$Xi)."(%rsp)",@X[($Xi-4)&7]); # X[]+K xfer to IALU eval(shift(@insns)); eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); &psubd (@X[($Xi-4)&7],@Tx[1]); foreach (@insns) { eval; } @@ -583,51 +769,66 @@ () foreach (@insns) { eval; } } -sub body_00_19 () { +sub body_00_19 () { # ((c^d)&b)^d + # on start @T[0]=(c^d)&b + return &body_20_39() if ($rx==19); $rx++; ( '($a,$b,$c,$d,$e)=@V;'. - '&add ($e,eval(4*($j&15))."(%rsp)");', # X[]+K xfer - '&xor ($c,$d);', - '&mov (@T[1],$a);', # $b in next round - '&$_rol ($a,5);', - '&and (@T[0],$c);', # ($b&($c^$d)) - '&xor ($c,$d);', # restore $c - '&xor (@T[0],$d);', - '&add ($e,$a);', - '&$_ror ($b,$j?7:2);', # $b>>>2 - '&add ($e,@T[0]);' .'$j++; unshift(@V,pop(@V)); unshift(@T,pop(@T));' + '&$_ror ($b,$j?7:2)', # $b>>>2 + '&xor (@T[0],$d)', + '&mov (@T[1],$a)', # $b for next round + + '&add ($e,eval(4*($j&15))."(%rsp)")', # X[]+K xfer + '&xor ($b,$c)', # $c^$d for next round + + '&$_rol ($a,5)', + '&add ($e,@T[0])', + '&and (@T[1],$b)', # ($b&($c^$d)) for next round + + '&xor ($b,$c)', # restore $b + '&add ($e,$a);' .'$j++; unshift(@V,pop(@V)); unshift(@T,pop(@T));' ); } -sub body_20_39 () { +sub body_20_39 () { # b^d^c + # on entry @T[0]=b^d + return &body_40_59() if ($rx==39); $rx++; ( '($a,$b,$c,$d,$e)=@V;'. - '&add ($e,eval(4*($j++&15))."(%rsp)");', # X[]+K xfer - '&xor (@T[0],$d);', # ($b^$d) - '&mov (@T[1],$a);', # $b in next round - '&$_rol ($a,5);', - '&xor (@T[0],$c);', # ($b^$d^$c) - '&add ($e,$a);', - '&$_ror ($b,7);', # $b>>>2 - '&add ($e,@T[0]);' .'unshift(@V,pop(@V)); unshift(@T,pop(@T));' + '&add ($e,eval(4*($j&15))."(%rsp)")', # X[]+K xfer + '&xor (@T[0],$d) if($j==19);'. + '&xor (@T[0],$c) if($j> 19)', # ($b^$d^$c) + '&mov (@T[1],$a)', # $b for next round + + '&$_rol ($a,5)', + '&add ($e,@T[0])', + '&xor (@T[1],$c) if ($j< 79)', # $b^$d for next round + + '&$_ror ($b,7)', # $b>>>2 + '&add ($e,$a);' .'$j++; unshift(@V,pop(@V)); unshift(@T,pop(@T));' ); } -sub body_40_59 () { +sub body_40_59 () { # ((b^c)&(c^d))^c + # on entry @T[0]=(b^c), (c^=d) + $rx++; ( '($a,$b,$c,$d,$e)=@V;'. - '&mov (@T[1],$c);', - '&xor ($c,$d);', - '&add ($e,eval(4*($j++&15))."(%rsp)");', # X[]+K xfer - '&and (@T[1],$d);', - '&and (@T[0],$c);', # ($b&($c^$d)) - '&$_ror ($b,7);', # $b>>>2 - '&add ($e,@T[1]);', - '&mov (@T[1],$a);', # $b in next round - '&$_rol ($a,5);', - '&add ($e,@T[0]);', - '&xor ($c,$d);', # restore $c - '&add ($e,$a);' .'unshift(@V,pop(@V)); unshift(@T,pop(@T));' + '&add ($e,eval(4*($j&15))."(%rsp)")', # X[]+K xfer + '&and (@T[0],$c) if ($j>=40)', # (b^c)&(c^d) + '&xor ($c,$d) if ($j>=40)', # restore $c + + '&$_ror ($b,7)', # $b>>>2 + '&mov (@T[1],$a)', # $b for next round + '&xor (@T[0],$c)', + + '&$_rol ($a,5)', + '&add ($e,@T[0])', + '&xor (@T[1],$c) if ($j==59);'. + '&xor (@T[1],$b) if ($j< 59)', # b^c for next round + + '&xor ($b,$c) if ($j< 59)', # c^d for next round + '&add ($e,$a);' .'$j++; unshift(@V,pop(@V)); unshift(@T,pop(@T));' ); } $code.=<<___; @@ -668,8 +869,11 @@ () mov @T[0],4($ctx) mov @T[0],$B # magic seed mov $C,8($ctx) + mov $C,@T[1] mov $D,12($ctx) + xor $D,@T[1] mov $E,16($ctx) + and @T[1],@T[0] jmp .Loop_ssse3 .align 16 @@ -694,31 +898,34 @@ () mov $E,16($ctx) ___ $code.=<<___ if ($win64); - movaps 64+0(%rsp),%xmm6 - movaps 64+16(%rsp),%xmm7 - movaps 64+32(%rsp),%xmm8 - movaps 64+48(%rsp),%xmm9 - movaps 64+64(%rsp),%xmm10 + movaps -40-6*16(%r14),%xmm6 + movaps -40-5*16(%r14),%xmm7 + movaps -40-4*16(%r14),%xmm8 + movaps -40-3*16(%r14),%xmm9 + movaps -40-2*16(%r14),%xmm10 + movaps -40-1*16(%r14),%xmm11 ___ $code.=<<___; - lea `64+($win64?5*16:0)`(%rsp),%rsi - mov 0(%rsi),%r12 - mov 8(%rsi),%rbp - mov 16(%rsi),%rbx - lea 24(%rsi),%rsp + lea (%r14),%rsi + mov -40(%rsi),%r14 + mov -32(%rsi),%r13 + mov -24(%rsi),%r12 + mov -16(%rsi),%rbp + mov -8(%rsi),%rbx + lea (%rsi),%rsp .Lepilogue_ssse3: ret .size sha1_block_data_order_ssse3,.-sha1_block_data_order_ssse3 ___ if ($avx) { -my $Xi=4; -my @X=map("%xmm$_",(4..7,0..3)); -my @Tx=map("%xmm$_",(8..10)); -my @V=($A,$B,$C,$D,$E)=("%eax","%ebx","%ecx","%edx","%ebp"); # size optimization -my @T=("%esi","%edi"); -my $j=0; -my $K_XX_XX="%r11"; +$Xi=4; # reset variables +@X=map("%xmm$_",(4..7,0..3)); +@Tx=map("%xmm$_",(8..10)); +$j=0; +$rx=0; + +my $done_avx_label=".Ldone_avx"; my $_rol=sub { &shld(@_[0],@_) }; my $_ror=sub { &shrd(@_[0],@_) }; @@ -728,28 +935,34 @@ () .align 16 sha1_block_data_order_avx: _avx_shortcut: + mov %rsp,%rax push %rbx push %rbp push %r12 - lea `-64-($win64?5*16:0)`(%rsp),%rsp + push %r13 # redundant, done to share Win64 SE handler + push %r14 + lea `-64-($win64?6*16:0)`(%rsp),%rsp + vzeroupper ___ $code.=<<___ if ($win64); - movaps %xmm6,64+0(%rsp) - movaps %xmm7,64+16(%rsp) - movaps %xmm8,64+32(%rsp) - movaps %xmm9,64+48(%rsp) - movaps %xmm10,64+64(%rsp) + vmovaps %xmm6,-40-6*16(%rax) + vmovaps %xmm7,-40-5*16(%rax) + vmovaps %xmm8,-40-4*16(%rax) + vmovaps %xmm9,-40-3*16(%rax) + vmovaps %xmm10,-40-2*16(%rax) + vmovaps %xmm11,-40-1*16(%rax) .Lprologue_avx: ___ $code.=<<___; + mov %rax,%r14 # original %rsp + and \$-64,%rsp mov %rdi,$ctx # reassigned argument mov %rsi,$inp # reassigned argument mov %rdx,$num # reassigned argument - vzeroupper shl \$6,$num add $inp,$num - lea K_XX_XX(%rip),$K_XX_XX + lea K_XX_XX+64(%rip),$K_XX_XX mov 0($ctx),$A # load context mov 4($ctx),$B @@ -757,9 +970,12 @@ () mov 12($ctx),$D mov $B,@T[0] # magic seed mov 16($ctx),$E + mov $C,@T[1] + xor $D,@T[1] + and @T[1],@T[0] vmovdqa 64($K_XX_XX),@X[2] # pbswap mask - vmovdqa 0($K_XX_XX),@Tx[1] # K_00_19 + vmovdqa -64($K_XX_XX),$Kx # K_00_19 vmovdqu 0($inp),@X[-4&7] # load input to %xmm[0-3] vmovdqu 16($inp),@X[-3&7] vmovdqu 32($inp),@X[-2&7] @@ -769,9 +985,9 @@ () vpshufb @X[2],@X[-3&7],@X[-3&7] vpshufb @X[2],@X[-2&7],@X[-2&7] vpshufb @X[2],@X[-1&7],@X[-1&7] - vpaddd @Tx[1],@X[-4&7],@X[0] # add K_00_19 - vpaddd @Tx[1],@X[-3&7],@X[1] - vpaddd @Tx[1],@X[-2&7],@X[2] + vpaddd $Kx,@X[-4&7],@X[0] # add K_00_19 + vpaddd $Kx,@X[-3&7],@X[1] + vpaddd $Kx,@X[-2&7],@X[2] vmovdqa @X[0],0(%rsp) # X[]+K xfer to IALU vmovdqa @X[1],16(%rsp) vmovdqa @X[2],32(%rsp) @@ -790,10 +1006,10 @@ () eval(shift(@insns)); eval(shift(@insns)); - &vpaddd (@Tx[1],@Tx[1],@X[-1&7]); + &vpaddd (@Tx[1],$Kx,@X[-1&7]); eval(shift(@insns)); eval(shift(@insns)); - &vpsrldq(@Tx[0],@X[-1&7],4); # "X[-3]", 3 dwords + &vpsrldq(@Tx[0],@X[-1&7],4); # "X[-3]", 3 dwords eval(shift(@insns)); eval(shift(@insns)); &vpxor (@X[0],@X[0],@X[-4&7]); # "X[0]"^="X[-16]" @@ -843,7 +1059,7 @@ () &vpxor (@X[0],@X[0],@Tx[2]); # "X[0]"^=("X[0]">>96)<<<2 eval(shift(@insns)); eval(shift(@insns)); - &vmovdqa (@Tx[2],eval(16*(($Xi)/5))."($K_XX_XX)"); # K_XX_XX + &vmovdqa ($Kx,eval(2*16*(($Xi)/5)-64)."($K_XX_XX)") if ($Xi%5==0); # K_XX_XX eval(shift(@insns)); eval(shift(@insns)); @@ -851,13 +1067,12 @@ () foreach (@insns) { eval; } # remaining instructions [if any] $Xi++; push(@X,shift(@X)); # "rotate" X[] - push(@Tx,shift(@Tx)); } sub Xupdate_avx_32_79() { use integer; my $body = shift; - my @insns = (&$body,&$body,&$body,&$body); # 32 to 48 instructions + my @insns = (&$body,&$body,&$body,&$body); # 32 to 44 instructions my ($a,$b,$c,$d,$e); &vpalignr(@Tx[0],@X[-1&7],@X[-2&7],8); # compose "X[-6]" @@ -870,12 +1085,8 @@ () &vpxor (@X[0],@X[0],@X[-7&7]); # "X[0]"^="X[-28]" eval(shift(@insns)); eval(shift(@insns)) if (@insns[0] !~ /&ro[rl]/); - if ($Xi%5) { - &vmovdqa (@Tx[2],@Tx[1]);# "perpetuate" K_XX_XX... - } else { # ... or load next one - &vmovdqa (@Tx[2],eval(16*($Xi/5))."($K_XX_XX)"); - } - &vpaddd (@Tx[1],@Tx[1],@X[-1&7]); + &vpaddd (@Tx[1],$Kx,@X[-1&7]); + &vmovdqa ($Kx,eval(2*16*($Xi/5)-64)."($K_XX_XX)") if ($Xi%5==0); eval(shift(@insns)); # ror eval(shift(@insns)); @@ -905,7 +1116,6 @@ () &vpor (@X[0],@X[0],@Tx[0]); # "X[0]"<<<=2 eval(shift(@insns)); # body_20_39 eval(shift(@insns)); - &vmovdqa (@Tx[1],@X[0]) if ($Xi<19); eval(shift(@insns)); eval(shift(@insns)); # rol eval(shift(@insns)); @@ -916,7 +1126,6 @@ () foreach (@insns) { eval; } # remaining instructions $Xi++; push(@X,shift(@X)); # "rotate" X[] - push(@Tx,shift(@Tx)); } sub Xuplast_avx_80() @@ -926,23 +1135,21 @@ () my ($a,$b,$c,$d,$e); eval(shift(@insns)); - &vpaddd (@Tx[1],@Tx[1],@X[-1&7]); + &vpaddd (@Tx[1],$Kx,@X[-1&7]); eval(shift(@insns)); eval(shift(@insns)); eval(shift(@insns)); eval(shift(@insns)); - &movdqa (eval(16*(($Xi-1)&3))."(%rsp)",@Tx[1]); # X[]+K xfer IALU + &vmovdqa (eval(16*(($Xi-1)&3))."(%rsp)",@Tx[1]); # X[]+K xfer IALU foreach (@insns) { eval; } # remaining instructions &cmp ($inp,$num); - &je (".Ldone_avx"); - - unshift(@Tx,pop(@Tx)); + &je ($done_avx_label); &vmovdqa(@X[2],"64($K_XX_XX)"); # pbswap mask - &vmovdqa(@Tx[1],"0($K_XX_XX)"); # K_00_19 + &vmovdqa($Kx,"-64($K_XX_XX)"); # K_00_19 &vmovdqu(@X[-4&7],"0($inp)"); # load input &vmovdqu(@X[-3&7],"16($inp)"); &vmovdqu(@X[-2&7],"32($inp)"); @@ -964,7 +1171,7 @@ () &vpshufb(@X[($Xi-3)&7],@X[($Xi-3)&7],@X[2]); eval(shift(@insns)); eval(shift(@insns)); - &vpaddd (@X[$Xi&7],@X[($Xi-4)&7],@Tx[1]); + &vpaddd (@X[$Xi&7],@X[($Xi-4)&7],$Kx); eval(shift(@insns)); eval(shift(@insns)); eval(shift(@insns)); @@ -1024,12 +1231,15 @@ () mov @T[0],4($ctx) mov @T[0],$B # magic seed mov $C,8($ctx) + mov $C,@T[1] mov $D,12($ctx) + xor $D,@T[1] mov $E,16($ctx) + and @T[1],@T[0] jmp .Loop_avx .align 16 -.Ldone_avx: +$done_avx_label: ___ $j=$saved_j; @V=@saved_V; @@ -1052,31 +1262,520 @@ () mov $E,16($ctx) ___ $code.=<<___ if ($win64); - movaps 64+0(%rsp),%xmm6 - movaps 64+16(%rsp),%xmm7 - movaps 64+32(%rsp),%xmm8 - movaps 64+48(%rsp),%xmm9 - movaps 64+64(%rsp),%xmm10 + movaps -40-6*16(%r14),%xmm6 + movaps -40-5*16(%r14),%xmm7 + movaps -40-4*16(%r14),%xmm8 + movaps -40-3*16(%r14),%xmm9 + movaps -40-2*16(%r14),%xmm10 + movaps -40-1*16(%r14),%xmm11 ___ $code.=<<___; - lea `64+($win64?5*16:0)`(%rsp),%rsi - mov 0(%rsi),%r12 - mov 8(%rsi),%rbp - mov 16(%rsi),%rbx - lea 24(%rsi),%rsp + lea (%r14),%rsi + mov -40(%rsi),%r14 + mov -32(%rsi),%r13 + mov -24(%rsi),%r12 + mov -16(%rsi),%rbp + mov -8(%rsi),%rbx + lea (%rsi),%rsp .Lepilogue_avx: ret .size sha1_block_data_order_avx,.-sha1_block_data_order_avx ___ + +if ($avx>1) { +use integer; +$Xi=4; # reset variables +@X=map("%ymm$_",(4..7,0..3)); +@Tx=map("%ymm$_",(8..10)); +$Kx="%ymm11"; +$j=0; + +my @ROTX=("%eax","%ebp","%ebx","%ecx","%edx","%esi"); +my ($a5,$t0)=("%r12d","%edi"); + +my ($A,$F,$B,$C,$D,$E)=@ROTX; +my $rx=0; +my $frame="%r13"; + +$code.=<<___; +.type sha1_block_data_order_avx2,\@function,3 +.align 16 +sha1_block_data_order_avx2: +_avx2_shortcut: + mov %rsp,%rax + push %rbx + push %rbp + push %r12 + push %r13 + push %r14 + vzeroupper +___ +$code.=<<___ if ($win64); + lea -6*16(%rsp),%rsp + vmovaps %xmm6,-40-6*16(%rax) + vmovaps %xmm7,-40-5*16(%rax) + vmovaps %xmm8,-40-4*16(%rax) + vmovaps %xmm9,-40-3*16(%rax) + vmovaps %xmm10,-40-2*16(%rax) + vmovaps %xmm11,-40-1*16(%rax) +.Lprologue_avx2: +___ +$code.=<<___; + mov %rax,%r14 # original %rsp + mov %rdi,$ctx # reassigned argument + mov %rsi,$inp # reassigned argument + mov %rdx,$num # reassigned argument + + lea -640(%rsp),%rsp + shl \$6,$num + lea 64($inp),$frame + and \$-128,%rsp + add $inp,$num + lea K_XX_XX+64(%rip),$K_XX_XX + + mov 0($ctx),$A # load context + cmp $num,$frame + cmovae $inp,$frame # next or same block + mov 4($ctx),$F + mov 8($ctx),$C + mov 12($ctx),$D + mov 16($ctx),$E + vmovdqu 64($K_XX_XX),@X[2] # pbswap mask + + vmovdqu ($inp),%xmm0 + vmovdqu 16($inp),%xmm1 + vmovdqu 32($inp),%xmm2 + vmovdqu 48($inp),%xmm3 + lea 64($inp),$inp + vinserti128 \$1,($frame),@X[-4&7],@X[-4&7] + vinserti128 \$1,16($frame),@X[-3&7],@X[-3&7] + vpshufb @X[2],@X[-4&7],@X[-4&7] + vinserti128 \$1,32($frame),@X[-2&7],@X[-2&7] + vpshufb @X[2],@X[-3&7],@X[-3&7] + vinserti128 \$1,48($frame),@X[-1&7],@X[-1&7] + vpshufb @X[2],@X[-2&7],@X[-2&7] + vmovdqu -64($K_XX_XX),$Kx # K_00_19 + vpshufb @X[2],@X[-1&7],@X[-1&7] + + vpaddd $Kx,@X[-4&7],@X[0] # add K_00_19 + vpaddd $Kx,@X[-3&7],@X[1] + vmovdqu @X[0],0(%rsp) # X[]+K xfer to IALU + vpaddd $Kx,@X[-2&7],@X[2] + vmovdqu @X[1],32(%rsp) + vpaddd $Kx,@X[-1&7],@X[3] + vmovdqu @X[2],64(%rsp) + vmovdqu @X[3],96(%rsp) +___ +for (;$Xi<8;$Xi++) { # Xupdate_avx2_16_31 + use integer; + + &vpalignr(@X[0],@X[-3&7],@X[-4&7],8); # compose "X[-14]" in "X[0]" + &vpsrldq(@Tx[0],@X[-1&7],4); # "X[-3]", 3 dwords + &vpxor (@X[0],@X[0],@X[-4&7]); # "X[0]"^="X[-16]" + &vpxor (@Tx[0],@Tx[0],@X[-2&7]); # "X[-3]"^"X[-8]" + &vpxor (@X[0],@X[0],@Tx[0]); # "X[0]"^="X[-3]"^"X[-8]" + &vpsrld (@Tx[0],@X[0],31); + &vmovdqu($Kx,eval(2*16*(($Xi)/5)-64)."($K_XX_XX)") if ($Xi%5==0); # K_XX_XX + &vpslldq(@Tx[2],@X[0],12); # "X[0]"<<96, extract one dword + &vpaddd (@X[0],@X[0],@X[0]); + &vpsrld (@Tx[1],@Tx[2],30); + &vpor (@X[0],@X[0],@Tx[0]); # "X[0]"<<<=1 + &vpslld (@Tx[2],@Tx[2],2); + &vpxor (@X[0],@X[0],@Tx[1]); + &vpxor (@X[0],@X[0],@Tx[2]); # "X[0]"^=("X[0]">>96)<<<2 + &vpaddd (@Tx[1],@X[0],$Kx); + &vmovdqu("32*$Xi(%rsp)",@Tx[1]); # X[]+K xfer to IALU + + push(@X,shift(@X)); # "rotate" X[] +} +$code.=<<___; + lea 128(%rsp),$frame + jmp .Loop_avx2 +.align 32 +.Loop_avx2: + rorx \$2,$F,$B + andn $D,$F,$t0 + and $C,$F + xor $t0,$F +___ +sub bodyx_00_19 () { # 8 instructions, 3 cycles critical path + # at start $f=(b&c)^(~b&d), $b>>>=2 + return &bodyx_20_39() if ($rx==19); $rx++; + ( + '($a,$f,$b,$c,$d,$e)=@ROTX;'. + + '&add ($e,((32*($j/4)+4*($j%4))%256-128)."($frame)");'. # e+=X[i]+K + '&lea ($frame,"256($frame)") if ($j%32==31);', + '&andn ($t0,$a,$c)', # ~b&d for next round + + '&add ($e,$f)', # e+=(b&c)^(~b&d) + '&rorx ($a5,$a,27)', # a<<<5 + '&rorx ($f,$a,2)', # b>>>2 for next round + '&and ($a,$b)', # b&c for next round + + '&add ($e,$a5)', # e+=a<<<5 + '&xor ($a,$t0);'. # f=(b&c)^(~b&d) for next round + + 'unshift(@ROTX,pop(@ROTX)); $j++;' + ) +} + +sub bodyx_20_39 () { # 7 instructions, 2 cycles critical path + # on entry $f=b^c^d, $b>>>=2 + return &bodyx_40_59() if ($rx==39); $rx++; + ( + '($a,$f,$b,$c,$d,$e)=@ROTX;'. + + '&add ($e,((32*($j/4)+4*($j%4))%256-128)."($frame)");'. # e+=X[i]+K + '&lea ($frame,"256($frame)") if ($j%32==31);', + + '&lea ($e,"($e,$f)")', # e+=b^c^d + '&rorx ($a5,$a,27)', # a<<<5 + '&rorx ($f,$a,2) if ($j<79)', # b>>>2 in next round + '&xor ($a,$b) if ($j<79)', # b^c for next round + + '&add ($e,$a5)', # e+=a<<<5 + '&xor ($a,$c) if ($j<79);'. # f=b^c^d for next round + + 'unshift(@ROTX,pop(@ROTX)); $j++;' + ) +} + +sub bodyx_40_59 () { # 10 instructions, 3 cycles critical path + # on entry $f=((b^c)&(c^d)), $b>>>=2 + $rx++; + ( + '($a,$f,$b,$c,$d,$e)=@ROTX;'. + + '&add ($e,((32*($j/4)+4*($j%4))%256-128)."($frame)");'. # e+=X[i]+K + '&lea ($frame,"256($frame)") if ($j%32==31);', + '&xor ($f,$c) if ($j>39)', # (b^c)&(c^d)^c + '&mov ($t0,$b) if ($j<59)', # count on zero latency + '&xor ($t0,$c) if ($j<59)', # c^d for next round + + '&lea ($e,"($e,$f)")', # e+=(b^c)&(c^d)^c + '&rorx ($a5,$a,27)', # a<<<5 + '&rorx ($f,$a,2)', # b>>>2 in next round + '&xor ($a,$b)', # b^c for next round + + '&add ($e,$a5)', # e+=a<<<5 + '&and ($a,$t0) if ($j< 59);'. # f=(b^c)&(c^d) for next round + '&xor ($a,$c) if ($j==59);'. # f=b^c^d for next round + + 'unshift(@ROTX,pop(@ROTX)); $j++;' + ) +} + +sub Xupdate_avx2_16_31() # recall that $Xi starts wtih 4 +{ use integer; + my $body = shift; + my @insns = (&$body,&$body,&$body,&$body,&$body); # 35 instructions + my ($a,$b,$c,$d,$e); + + &vpalignr(@X[0],@X[-3&7],@X[-4&7],8); # compose "X[-14]" in "X[0]" + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + + &vpsrldq(@Tx[0],@X[-1&7],4); # "X[-3]", 3 dwords + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + + &vpxor (@X[0],@X[0],@X[-4&7]); # "X[0]"^="X[-16]" + &vpxor (@Tx[0],@Tx[0],@X[-2&7]); # "X[-3]"^"X[-8]" + eval(shift(@insns)); + eval(shift(@insns)); + + &vpxor (@X[0],@X[0],@Tx[0]); # "X[0]"^="X[-3]"^"X[-8]" + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + + &vpsrld (@Tx[0],@X[0],31); + &vmovdqu($Kx,eval(2*16*(($Xi)/5)-64)."($K_XX_XX)") if ($Xi%5==0); # K_XX_XX + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + + &vpslldq(@Tx[2],@X[0],12); # "X[0]"<<96, extract one dword + &vpaddd (@X[0],@X[0],@X[0]); + eval(shift(@insns)); + eval(shift(@insns)); + + &vpsrld (@Tx[1],@Tx[2],30); + &vpor (@X[0],@X[0],@Tx[0]); # "X[0]"<<<=1 + eval(shift(@insns)); + eval(shift(@insns)); + + &vpslld (@Tx[2],@Tx[2],2); + &vpxor (@X[0],@X[0],@Tx[1]); + eval(shift(@insns)); + eval(shift(@insns)); + + &vpxor (@X[0],@X[0],@Tx[2]); # "X[0]"^=("X[0]">>96)<<<2 + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + + &vpaddd (@Tx[1],@X[0],$Kx); + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + &vmovdqu(eval(32*($Xi))."(%rsp)",@Tx[1]); # X[]+K xfer to IALU + + foreach (@insns) { eval; } # remaining instructions [if any] + + $Xi++; + push(@X,shift(@X)); # "rotate" X[] +} + +sub Xupdate_avx2_32_79() +{ use integer; + my $body = shift; + my @insns = (&$body,&$body,&$body,&$body,&$body); # 35 to 50 instructions + my ($a,$b,$c,$d,$e); + + &vpalignr(@Tx[0],@X[-1&7],@X[-2&7],8); # compose "X[-6]" + &vpxor (@X[0],@X[0],@X[-4&7]); # "X[0]"="X[-32]"^"X[-16]" + eval(shift(@insns)); + eval(shift(@insns)); + + &vpxor (@X[0],@X[0],@X[-7&7]); # "X[0]"^="X[-28]" + &vmovdqu($Kx,eval(2*16*($Xi/5)-64)."($K_XX_XX)") if ($Xi%5==0); + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + + &vpxor (@X[0],@X[0],@Tx[0]); # "X[0]"^="X[-6]" + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + + &vpsrld (@Tx[0],@X[0],30); + &vpslld (@X[0],@X[0],2); + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + + #&vpslld (@X[0],@X[0],2); + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + + &vpor (@X[0],@X[0],@Tx[0]); # "X[0]"<<<=2 + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + + &vpaddd (@Tx[1],@X[0],$Kx); + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + + &vmovdqu("32*$Xi(%rsp)",@Tx[1]); # X[]+K xfer to IALU + + foreach (@insns) { eval; } # remaining instructions + + $Xi++; + push(@X,shift(@X)); # "rotate" X[] +} + +sub Xloop_avx2() +{ use integer; + my $body = shift; + my @insns = (&$body,&$body,&$body,&$body,&$body); # 32 instructions + my ($a,$b,$c,$d,$e); + + foreach (@insns) { eval; } +} + + &align32(); + &Xupdate_avx2_32_79(\&bodyx_00_19); + &Xupdate_avx2_32_79(\&bodyx_00_19); + &Xupdate_avx2_32_79(\&bodyx_00_19); + &Xupdate_avx2_32_79(\&bodyx_00_19); + + &Xupdate_avx2_32_79(\&bodyx_20_39); + &Xupdate_avx2_32_79(\&bodyx_20_39); + &Xupdate_avx2_32_79(\&bodyx_20_39); + &Xupdate_avx2_32_79(\&bodyx_20_39); + + &align32(); + &Xupdate_avx2_32_79(\&bodyx_40_59); + &Xupdate_avx2_32_79(\&bodyx_40_59); + &Xupdate_avx2_32_79(\&bodyx_40_59); + &Xupdate_avx2_32_79(\&bodyx_40_59); + + &Xloop_avx2(\&bodyx_20_39); + &Xloop_avx2(\&bodyx_20_39); + &Xloop_avx2(\&bodyx_20_39); + &Xloop_avx2(\&bodyx_20_39); + +$code.=<<___; + lea 128($inp),$frame + lea 128($inp),%rdi # borrow $t0 + cmp $num,$frame + cmovae $inp,$frame # next or previous block + + # output is d-e-[a]-f-b-c => A=d,F=e,C=f,D=b,E=c + add 0($ctx),@ROTX[0] # update context + add 4($ctx),@ROTX[1] + add 8($ctx),@ROTX[3] + mov @ROTX[0],0($ctx) + add 12($ctx),@ROTX[4] + mov @ROTX[1],4($ctx) + mov @ROTX[0],$A # A=d + add 16($ctx),@ROTX[5] + mov @ROTX[3],$a5 + mov @ROTX[3],8($ctx) + mov @ROTX[4],$D # D=b + #xchg @ROTX[5],$F # F=c, C=f + mov @ROTX[4],12($ctx) + mov @ROTX[1],$F # F=e + mov @ROTX[5],16($ctx) + #mov $F,16($ctx) + mov @ROTX[5],$E # E=c + mov $a5,$C # C=f + #xchg $F,$E # E=c, F=e + + cmp $num,$inp + je .Ldone_avx2 +___ + +$Xi=4; # reset variables +@X=map("%ymm$_",(4..7,0..3)); + +$code.=<<___; + vmovdqu 64($K_XX_XX),@X[2] # pbswap mask + cmp $num,%rdi # borrowed $t0 + ja .Last_avx2 + + vmovdqu -64(%rdi),%xmm0 # low part of @X[-4&7] + vmovdqu -48(%rdi),%xmm1 + vmovdqu -32(%rdi),%xmm2 + vmovdqu -16(%rdi),%xmm3 + vinserti128 \$1,0($frame),@X[-4&7],@X[-4&7] + vinserti128 \$1,16($frame),@X[-3&7],@X[-3&7] + vinserti128 \$1,32($frame),@X[-2&7],@X[-2&7] + vinserti128 \$1,48($frame),@X[-1&7],@X[-1&7] + jmp .Last_avx2 + +.align 32 +.Last_avx2: + lea 128+16(%rsp),$frame + rorx \$2,$F,$B + andn $D,$F,$t0 + and $C,$F + xor $t0,$F + sub \$-128,$inp +___ + $rx=$j=0; @ROTX=($A,$F,$B,$C,$D,$E); + + &Xloop_avx2 (\&bodyx_00_19); + &Xloop_avx2 (\&bodyx_00_19); + &Xloop_avx2 (\&bodyx_00_19); + &Xloop_avx2 (\&bodyx_00_19); + + &Xloop_avx2 (\&bodyx_20_39); + &vmovdqu ($Kx,"-64($K_XX_XX)"); # K_00_19 + &vpshufb (@X[-4&7],@X[-4&7],@X[2]); # byte swap + &Xloop_avx2 (\&bodyx_20_39); + &vpshufb (@X[-3&7],@X[-3&7],@X[2]); + &vpaddd (@Tx[0],@X[-4&7],$Kx); # add K_00_19 + &Xloop_avx2 (\&bodyx_20_39); + &vmovdqu ("0(%rsp)",@Tx[0]); + &vpshufb (@X[-2&7],@X[-2&7],@X[2]); + &vpaddd (@Tx[1],@X[-3&7],$Kx); + &Xloop_avx2 (\&bodyx_20_39); + &vmovdqu ("32(%rsp)",@Tx[1]); + &vpshufb (@X[-1&7],@X[-1&7],@X[2]); + &vpaddd (@X[2],@X[-2&7],$Kx); + + &Xloop_avx2 (\&bodyx_40_59); + &align32 (); + &vmovdqu ("64(%rsp)",@X[2]); + &vpaddd (@X[3],@X[-1&7],$Kx); + &Xloop_avx2 (\&bodyx_40_59); + &vmovdqu ("96(%rsp)",@X[3]); + &Xloop_avx2 (\&bodyx_40_59); + &Xupdate_avx2_16_31(\&bodyx_40_59); + + &Xupdate_avx2_16_31(\&bodyx_20_39); + &Xupdate_avx2_16_31(\&bodyx_20_39); + &Xupdate_avx2_16_31(\&bodyx_20_39); + &Xloop_avx2 (\&bodyx_20_39); + +$code.=<<___; + lea 128(%rsp),$frame + + # output is d-e-[a]-f-b-c => A=d,F=e,C=f,D=b,E=c + add 0($ctx),@ROTX[0] # update context + add 4($ctx),@ROTX[1] + add 8($ctx),@ROTX[3] + mov @ROTX[0],0($ctx) + add 12($ctx),@ROTX[4] + mov @ROTX[1],4($ctx) + mov @ROTX[0],$A # A=d + add 16($ctx),@ROTX[5] + mov @ROTX[3],$a5 + mov @ROTX[3],8($ctx) + mov @ROTX[4],$D # D=b + #xchg @ROTX[5],$F # F=c, C=f + mov @ROTX[4],12($ctx) + mov @ROTX[1],$F # F=e + mov @ROTX[5],16($ctx) + #mov $F,16($ctx) + mov @ROTX[5],$E # E=c + mov $a5,$C # C=f + #xchg $F,$E # E=c, F=e + + cmp $num,$inp + jbe .Loop_avx2 + +.Ldone_avx2: + vzeroupper +___ +$code.=<<___ if ($win64); + movaps -40-6*16(%r14),%xmm6 + movaps -40-5*16(%r14),%xmm7 + movaps -40-4*16(%r14),%xmm8 + movaps -40-3*16(%r14),%xmm9 + movaps -40-2*16(%r14),%xmm10 + movaps -40-1*16(%r14),%xmm11 +___ +$code.=<<___; + lea (%r14),%rsi + mov -40(%rsi),%r14 + mov -32(%rsi),%r13 + mov -24(%rsi),%r12 + mov -16(%rsi),%rbp + mov -8(%rsi),%rbx + lea (%rsi),%rsp +.Lepilogue_avx2: + ret +.size sha1_block_data_order_avx2,.-sha1_block_data_order_avx2 +___ +} } $code.=<<___; .align 64 K_XX_XX: .long 0x5a827999,0x5a827999,0x5a827999,0x5a827999 # K_00_19 +.long 0x5a827999,0x5a827999,0x5a827999,0x5a827999 # K_00_19 +.long 0x6ed9eba1,0x6ed9eba1,0x6ed9eba1,0x6ed9eba1 # K_20_39 .long 0x6ed9eba1,0x6ed9eba1,0x6ed9eba1,0x6ed9eba1 # K_20_39 .long 0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc # K_40_59 +.long 0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc # K_40_59 +.long 0xca62c1d6,0xca62c1d6,0xca62c1d6,0xca62c1d6 # K_60_79 .long 0xca62c1d6,0xca62c1d6,0xca62c1d6,0xca62c1d6 # K_60_79 .long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f # pbswap mask +.long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f # pbswap mask +.byte 0xf,0xe,0xd,0xc,0xb,0xa,0x9,0x8,0x7,0x6,0x5,0x4,0x3,0x2,0x1,0x0 ___ }}} $code.=<<___; @@ -1122,20 +1821,58 @@ () jae .Lcommon_seh_tail mov `16*4`(%rax),%rax # pull saved stack pointer - lea 32(%rax),%rax mov -8(%rax),%rbx mov -16(%rax),%rbp mov -24(%rax),%r12 mov -32(%rax),%r13 + mov -40(%rax),%r14 mov %rbx,144($context) # restore context->Rbx mov %rbp,160($context) # restore context->Rbp mov %r12,216($context) # restore context->R12 mov %r13,224($context) # restore context->R13 + mov %r14,232($context) # restore context->R14 jmp .Lcommon_seh_tail .size se_handler,.-se_handler +___ + +$code.=<<___ if ($shaext); +.type shaext_handler,\@abi-omnipotent +.align 16 +shaext_handler: + push %rsi + push %rdi + push %rbx + push %rbp + push %r12 + push %r13 + push %r14 + push %r15 + pushfq + sub \$64,%rsp + + mov 120($context),%rax # pull context->Rax + mov 248($context),%rbx # pull context->Rip + + lea .Lprologue_shaext(%rip),%r10 + cmp %r10,%rbx # context->Rip<.Lprologue + jb .Lcommon_seh_tail + + lea .Lepilogue_shaext(%rip),%r10 + cmp %r10,%rbx # context->Rip>=.Lepilogue + jae .Lcommon_seh_tail + + lea -8-4*16(%rax),%rsi + lea 512($context),%rdi # &context.Xmm6 + mov \$8,%ecx + .long 0xa548f3fc # cld; rep movsq + jmp .Lcommon_seh_tail +.size shaext_handler,.-shaext_handler +___ + +$code.=<<___; .type ssse3_handler,\@abi-omnipotent .align 16 ssse3_handler: @@ -1168,18 +1905,23 @@ () cmp %r10,%rbx # context->Rip>=epilogue label jae .Lcommon_seh_tail - lea 64(%rax),%rsi + mov 232($context),%rax # pull context->R14 + + lea -40-6*16(%rax),%rsi lea 512($context),%rdi # &context.Xmm6 - mov \$10,%ecx + mov \$12,%ecx .long 0xa548f3fc # cld; rep movsq - lea `24+64+5*16`(%rax),%rax # adjust stack pointer mov -8(%rax),%rbx mov -16(%rax),%rbp mov -24(%rax),%r12 + mov -32(%rax),%r13 + mov -40(%rax),%r14 mov %rbx,144($context) # restore context->Rbx mov %rbp,160($context) # restore context->Rbp mov %r12,216($context) # restore cotnext->R12 + mov %r13,224($context) # restore cotnext->R13 + mov %r14,232($context) # restore cotnext->R14 .Lcommon_seh_tail: mov 8(%rax),%rdi @@ -1226,6 +1968,13 @@ () .rva .LSEH_begin_sha1_block_data_order .rva .LSEH_end_sha1_block_data_order .rva .LSEH_info_sha1_block_data_order +___ +$code.=<<___ if ($shaext); + .rva .LSEH_begin_sha1_block_data_order_shaext + .rva .LSEH_end_sha1_block_data_order_shaext + .rva .LSEH_info_sha1_block_data_order_shaext +___ +$code.=<<___; .rva .LSEH_begin_sha1_block_data_order_ssse3 .rva .LSEH_end_sha1_block_data_order_ssse3 .rva .LSEH_info_sha1_block_data_order_ssse3 @@ -1235,12 +1984,24 @@ () .rva .LSEH_end_sha1_block_data_order_avx .rva .LSEH_info_sha1_block_data_order_avx ___ +$code.=<<___ if ($avx>1); + .rva .LSEH_begin_sha1_block_data_order_avx2 + .rva .LSEH_end_sha1_block_data_order_avx2 + .rva .LSEH_info_sha1_block_data_order_avx2 +___ $code.=<<___; .section .xdata .align 8 .LSEH_info_sha1_block_data_order: .byte 9,0,0,0 .rva se_handler +___ +$code.=<<___ if ($shaext); +.LSEH_info_sha1_block_data_order_shaext: + .byte 9,0,0,0 + .rva shaext_handler +___ +$code.=<<___; .LSEH_info_sha1_block_data_order_ssse3: .byte 9,0,0,0 .rva ssse3_handler @@ -1252,10 +2013,55 @@ () .rva ssse3_handler .rva .Lprologue_avx,.Lepilogue_avx # HandlerData[] ___ +$code.=<<___ if ($avx>1); +.LSEH_info_sha1_block_data_order_avx2: + .byte 9,0,0,0 + .rva ssse3_handler + .rva .Lprologue_avx2,.Lepilogue_avx2 # HandlerData[] +___ } #################################################################### -$code =~ s/\`([^\`]*)\`/eval $1/gem; -print $code; +sub sha1rnds4 { + if (@_[0] =~ /\$([x0-9a-f]+),\s*%xmm([0-7]),\s*%xmm([0-7])/) { + my @opcode=(0x0f,0x3a,0xcc); + push @opcode,0xc0|($2&7)|(($3&7)<<3); # ModR/M + my $c=$1; + push @opcode,$c=~/^0/?oct($c):$c; + return ".byte\t".join(',',@opcode); + } else { + return "sha1rnds4\t".@_[0]; + } +} + +sub sha1op38 { + my $instr = shift; + my %opcodelet = ( + "sha1nexte" => 0xc8, + "sha1msg1" => 0xc9, + "sha1msg2" => 0xca ); + + if (defined($opcodelet{$instr}) && @_[0] =~ /%xmm([0-9]+),\s*%xmm([0-9]+)/) { + my @opcode=(0x0f,0x38); + my $rex=0; + $rex|=0x04 if ($2>=8); + $rex|=0x01 if ($1>=8); + unshift @opcode,0x40|$rex if ($rex); + push @opcode,$opcodelet{$instr}; + push @opcode,0xc0|($1&7)|(($2&7)<<3); # ModR/M + return ".byte\t".join(',',@opcode); + } else { + return $instr."\t".@_[0]; + } +} + +foreach (split("\n",$code)) { + s/\`([^\`]*)\`/eval $1/geo; + + s/\b(sha1rnds4)\s+(.*)/sha1rnds4($2)/geo or + s/\b(sha1[^\s]*)\s+(.*)/sha1op38($1,$2)/geo; + + print $_,"\n"; +} close STDOUT; diff --git a/deps/openssl/openssl/crypto/sha/asm/sha256-586.pl b/deps/openssl/openssl/crypto/sha/asm/sha256-586.pl index 928ec53123bfd1..6462e45ba75bee 100644 --- a/deps/openssl/openssl/crypto/sha/asm/sha256-586.pl +++ b/deps/openssl/openssl/crypto/sha/asm/sha256-586.pl @@ -1,7 +1,7 @@ #!/usr/bin/env perl # # ==================================================================== -# Written by Andy Polyakov for the OpenSSL +# Written by Andy Polyakov for the OpenSSL # project. The module is, however, dual licensed under OpenSSL and # CRYPTOGAMS licenses depending on where you obtain it. For further # details see http://www.openssl.org/~appro/cryptogams/. @@ -9,20 +9,55 @@ # # SHA256 block transform for x86. September 2007. # -# Performance in clock cycles per processed byte (less is better): +# Performance improvement over compiler generated code varies from +# 10% to 40% [see below]. Not very impressive on some µ-archs, but +# it's 5 times smaller and optimizies amount of writes. # -# Pentium PIII P4 AMD K8 Core2 -# gcc 46 36 41 27 26 -# icc 57 33 38 25 23 -# x86 asm 40 30 33 20 18 -# x86_64 asm(*) - - 21 16 16 +# May 2012. # -# (*) x86_64 assembler performance is presented for reference -# purposes. +# Optimization including two of Pavel Semjanov's ideas, alternative +# Maj and full unroll, resulted in ~20-25% improvement on most CPUs, +# ~7% on Pentium, ~40% on Atom. As fully unrolled loop body is almost +# 15x larger, 8KB vs. 560B, it's fired only for longer inputs. But not +# on P4, where it kills performance, nor Sandy Bridge, where folded +# loop is approximately as fast... # -# Performance improvement over compiler generated code varies from -# 10% to 40% [see above]. Not very impressive on some µ-archs, but -# it's 5 times smaller and optimizies amount of writes. +# June 2012. +# +# Add AMD XOP-specific code path, >30% improvement on Bulldozer over +# May version, >60% over original. Add AVX+shrd code path, >25% +# improvement on Sandy Bridge over May version, 60% over original. +# +# May 2013. +# +# Replace AMD XOP code path with SSSE3 to cover more processors. +# (Biggest improvement coefficient is on upcoming Atom Silvermont, +# not shown.) Add AVX+BMI code path. +# +# March 2014. +# +# Add support for Intel SHA Extensions. +# +# Performance in clock cycles per processed byte (less is better): +# +# gcc icc x86 asm(*) SIMD x86_64 asm(**) +# Pentium 46 57 40/38 - - +# PIII 36 33 27/24 - - +# P4 41 38 28 - 17.3 +# AMD K8 27 25 19/15.5 - 14.9 +# Core2 26 23 18/15.6 14.3 13.8 +# Westmere 27 - 19/15.7 13.4 12.3 +# Sandy Bridge 25 - 15.9 12.4 11.6 +# Ivy Bridge 24 - 15.0 11.4 10.3 +# Haswell 22 - 13.9 9.46 7.80 +# Bulldozer 36 - 27/22 17.0 13.6 +# VIA Nano 36 - 25/22 16.8 16.5 +# Atom 50 - 30/25 21.9 18.9 +# Silvermont 40 - 34/31 22.9 20.6 +# +# (*) numbers after slash are for unrolled loop, where applicable; +# (**) x86_64 assembly performance is presented for reference +# purposes, results are best-available; $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; push(@INC,"${dir}","${dir}../../perlasm"); @@ -30,72 +65,122 @@ &asm_init($ARGV[0],"sha512-586.pl",$ARGV[$#ARGV] eq "386"); +$xmm=$avx=0; +for (@ARGV) { $xmm=1 if (/-DOPENSSL_IA32_SSE2/); } + +if ($xmm && `$ENV{CC} -Wa,-v -c -o /dev/null -x assembler /dev/null 2>&1` + =~ /GNU assembler version ([2-9]\.[0-9]+)/) { + $avx = ($1>=2.19) + ($1>=2.22); +} + +if ($xmm && !$avx && $ARGV[0] eq "win32n" && + `nasm -v 2>&1` =~ /NASM version ([2-9]\.[0-9]+)/) { + $avx = ($1>=2.03) + ($1>=2.10); +} + +if ($xmm && !$avx && $ARGV[0] eq "win32" && + `ml 2>&1` =~ /Version ([0-9]+)\./) { + $avx = ($1>=10) + ($1>=11); +} + +if ($xmm && !$avx && `$ENV{CC} -v 2>&1` =~ /(^clang version|based on LLVM) ([3-9]\.[0-9]+)/) { + $avx = ($2>=3.0) + ($2>3.0); +} + +$shaext=$xmm; ### set to zero if compiling for 1.0.1 + +$unroll_after = 64*4; # If pre-evicted from L1P cache first spin of + # fully unrolled loop was measured to run about + # 3-4x slower. If slowdown coefficient is N and + # unrolled loop is m times faster, then you break + # even at (N-1)/(m-1) blocks. Then it needs to be + # adjusted for probability of code being evicted, + # code size/cache size=1/4. Typical m is 1.15... + $A="eax"; $E="edx"; $T="ebx"; -$Aoff=&DWP(0,"esp"); -$Boff=&DWP(4,"esp"); -$Coff=&DWP(8,"esp"); -$Doff=&DWP(12,"esp"); -$Eoff=&DWP(16,"esp"); -$Foff=&DWP(20,"esp"); -$Goff=&DWP(24,"esp"); -$Hoff=&DWP(28,"esp"); -$Xoff=&DWP(32,"esp"); +$Aoff=&DWP(4,"esp"); +$Boff=&DWP(8,"esp"); +$Coff=&DWP(12,"esp"); +$Doff=&DWP(16,"esp"); +$Eoff=&DWP(20,"esp"); +$Foff=&DWP(24,"esp"); +$Goff=&DWP(28,"esp"); +$Hoff=&DWP(32,"esp"); +$Xoff=&DWP(36,"esp"); $K256="ebp"; +sub BODY_16_63() { + &mov ($T,"ecx"); # "ecx" is preloaded + &mov ("esi",&DWP(4*(9+15+16-14),"esp")); + &ror ("ecx",18-7); + &mov ("edi","esi"); + &ror ("esi",19-17); + &xor ("ecx",$T); + &shr ($T,3); + &ror ("ecx",7); + &xor ("esi","edi"); + &xor ($T,"ecx"); # T = sigma0(X[-15]) + &ror ("esi",17); + &add ($T,&DWP(4*(9+15+16),"esp")); # T += X[-16] + &shr ("edi",10); + &add ($T,&DWP(4*(9+15+16-9),"esp")); # T += X[-7] + #&xor ("edi","esi") # sigma1(X[-2]) + # &add ($T,"edi"); # T += sigma1(X[-2]) + # &mov (&DWP(4*(9+15),"esp"),$T); # save X[0] + + &BODY_00_15(1); +} sub BODY_00_15() { my $in_16_63=shift; &mov ("ecx",$E); - &add ($T,"edi") if ($in_16_63); # T += sigma1(X[-2]) - &ror ("ecx",25-11); + &xor ("edi","esi") if ($in_16_63); # sigma1(X[-2]) &mov ("esi",$Foff); + &ror ("ecx",25-11); + &add ($T,"edi") if ($in_16_63); # T += sigma1(X[-2]) + &mov ("edi",$Goff); &xor ("ecx",$E); + &xor ("esi","edi"); + &mov ($T,&DWP(4*(9+15),"esp")) if (!$in_16_63); + &mov (&DWP(4*(9+15),"esp"),$T) if ($in_16_63); # save X[0] &ror ("ecx",11-6); - &mov (&DWP(4*(8+15),"esp"),$T) if ($in_16_63); # save X[0] - &xor ("ecx",$E); - &ror ("ecx",6); # Sigma1(e) - &mov ("edi",$Goff); - &add ($T,"ecx"); # T += Sigma1(e) - - &xor ("esi","edi"); - &mov ($Eoff,$E); # modulo-scheduled + &and ("esi",$E); + &mov ($Eoff,$E); # modulo-scheduled + &xor ($E,"ecx"); + &add ($T,$Hoff); # T += h + &xor ("esi","edi"); # Ch(e,f,g) + &ror ($E,6); # Sigma1(e) &mov ("ecx",$A); - &and ("esi",$E); - &mov ($E,$Doff); # e becomes d, which is e in next iteration - &xor ("esi","edi"); # Ch(e,f,g) - &mov ("edi",$A); - &add ($T,"esi"); # T += Ch(e,f,g) + &add ($T,"esi"); # T += Ch(e,f,g) &ror ("ecx",22-13); - &add ($T,$Hoff); # T += h + &add ($T,$E); # T += Sigma1(e) + &mov ("edi",$Boff); &xor ("ecx",$A); + &mov ($Aoff,$A); # modulo-scheduled + &lea ("esp",&DWP(-4,"esp")); &ror ("ecx",13-2); - &mov ("esi",$Boff); - &xor ("ecx",$A); - &ror ("ecx",2); # Sigma0(a) - &add ($E,$T); # d += T - &mov ("edi",$Coff); - - &add ($T,"ecx"); # T += Sigma0(a) - &mov ($Aoff,$A); # modulo-scheduled - - &mov ("ecx",$A); - &sub ("esp",4); - &or ($A,"esi"); # a becomes h, which is a in next iteration - &and ("ecx","esi"); - &and ($A,"edi"); &mov ("esi",&DWP(0,$K256)); - &or ($A,"ecx"); # h=Maj(a,b,c) + &xor ("ecx",$A); + &mov ($E,$Eoff); # e in next iteration, d in this one + &xor ($A,"edi"); # a ^= b + &ror ("ecx",2); # Sigma0(a) + &add ($T,"esi"); # T+= K[i] + &mov (&DWP(0,"esp"),$A); # (b^c) in next round + &add ($E,$T); # d += T + &and ($A,&DWP(4,"esp")); # a &= (b^c) + &add ($T,"ecx"); # T += Sigma0(a) + &xor ($A,"edi"); # h = Maj(a,b,c) = Ch(a^b,c,b) + &mov ("ecx",&DWP(4*(9+15+16-1),"esp")) if ($in_16_63); # preload T &add ($K256,4); - &add ($A,$T); # h += T - &mov ($T,&DWP(4*(8+15+16-1),"esp")) if ($in_16_63); # preload T - &add ($E,"esi"); # d += K256[i] - &add ($A,"esi"); # h += K256[i] + &add ($A,$T); # h += T } +&external_label("OPENSSL_ia32cap_P") if (!$i386); + &function_begin("sha256_block_data_order"); &mov ("esi",wparam(0)); # ctx &mov ("edi",wparam(1)); # inp @@ -116,26 +201,59 @@ () &mov (&DWP(4,"esp"),"edi"); # inp &mov (&DWP(8,"esp"),"eax"); # inp+num*128 &mov (&DWP(12,"esp"),"ebx"); # saved sp + if (!$i386 && $xmm) { + &picmeup("edx","OPENSSL_ia32cap_P",$K256,&label("K256")); + &mov ("ecx",&DWP(0,"edx")); + &mov ("ebx",&DWP(4,"edx")); + &test ("ecx",1<<20); # check for P4 + &jnz (&label("loop")); + &mov ("edx",&DWP(8,"edx")) if ($xmm); + &test ("ecx",1<<24); # check for FXSR + &jz ($unroll_after?&label("no_xmm"):&label("loop")); + &and ("ecx",1<<30); # mask "Intel CPU" bit + &and ("ebx",1<<28|1<<9); # mask AVX and SSSE3 bits + &test ("edx",1<<29) if ($shaext); # check for SHA + &jnz (&label("shaext")) if ($shaext); + &or ("ecx","ebx"); + &and ("ecx",1<<28|1<<30); + &cmp ("ecx",1<<28|1<<30); + if ($xmm) { + &je (&label("AVX")) if ($avx); + &test ("ebx",1<<9); # check for SSSE3 + &jnz (&label("SSSE3")); + } else { + &je (&label("loop_shrd")); + } + if ($unroll_after) { +&set_label("no_xmm"); + &sub ("eax","edi"); + &cmp ("eax",$unroll_after); + &jae (&label("unrolled")); + } } + &jmp (&label("loop")); + +sub COMPACT_LOOP() { +my $suffix=shift; -&set_label("loop",16); +&set_label("loop$suffix",$suffix?32:16); # copy input block to stack reversing byte and dword order for($i=0;$i<4;$i++) { &mov ("eax",&DWP($i*16+0,"edi")); &mov ("ebx",&DWP($i*16+4,"edi")); &mov ("ecx",&DWP($i*16+8,"edi")); - &mov ("edx",&DWP($i*16+12,"edi")); &bswap ("eax"); + &mov ("edx",&DWP($i*16+12,"edi")); &bswap ("ebx"); - &bswap ("ecx"); - &bswap ("edx"); &push ("eax"); + &bswap ("ecx"); &push ("ebx"); + &bswap ("edx"); &push ("ecx"); &push ("edx"); } &add ("edi",64); - &sub ("esp",4*8); # place for A,B,C,D,E,F,G,H - &mov (&DWP(4*(8+16)+4,"esp"),"edi"); + &lea ("esp",&DWP(-4*9,"esp"));# place for A,B,C,D,E,F,G,H + &mov (&DWP(4*(9+16)+4,"esp"),"edi"); # copy ctx->h[0-7] to A,B,C,D,E,F,G,H on stack &mov ($A,&DWP(0,"esi")); @@ -144,8 +262,10 @@ () &mov ("edi",&DWP(12,"esi")); # &mov ($Aoff,$A); &mov ($Boff,"ebx"); + &xor ("ebx","ecx"); &mov ($Coff,"ecx"); &mov ($Doff,"edi"); + &mov (&DWP(0,"esp"),"ebx"); # magic &mov ($E,&DWP(16,"esi")); &mov ("ebx",&DWP(20,"esi")); &mov ("ecx",&DWP(24,"esi")); @@ -155,59 +275,41 @@ () &mov ($Goff,"ecx"); &mov ($Hoff,"edi"); -&set_label("00_15",16); - &mov ($T,&DWP(4*(8+15),"esp")); +&set_label("00_15$suffix",16); &BODY_00_15(); &cmp ("esi",0xc19bf174); - &jne (&label("00_15")); - - &mov ($T,&DWP(4*(8+15+16-1),"esp")); # preloaded in BODY_00_15(1) -&set_label("16_63",16); - &mov ("esi",$T); - &mov ("ecx",&DWP(4*(8+15+16-14),"esp")); - &ror ("esi",18-7); - &mov ("edi","ecx"); - &xor ("esi",$T); - &ror ("esi",7); - &shr ($T,3); - - &ror ("edi",19-17); - &xor ($T,"esi"); # T = sigma0(X[-15]) - &xor ("edi","ecx"); - &ror ("edi",17); - &shr ("ecx",10); - &add ($T,&DWP(4*(8+15+16),"esp")); # T += X[-16] - &xor ("edi","ecx"); # sigma1(X[-2]) - - &add ($T,&DWP(4*(8+15+16-9),"esp")); # T += X[-7] - # &add ($T,"edi"); # T += sigma1(X[-2]) - # &mov (&DWP(4*(8+15),"esp"),$T); # save X[0] + &jne (&label("00_15$suffix")); - &BODY_00_15(1); + &mov ("ecx",&DWP(4*(9+15+16-1),"esp")); # preloaded in BODY_00_15(1) + &jmp (&label("16_63$suffix")); + +&set_label("16_63$suffix",16); + + &BODY_16_63(); &cmp ("esi",0xc67178f2); - &jne (&label("16_63")); + &jne (&label("16_63$suffix")); - &mov ("esi",&DWP(4*(8+16+64)+0,"esp"));#ctx + &mov ("esi",&DWP(4*(9+16+64)+0,"esp"));#ctx # &mov ($A,$Aoff); &mov ("ebx",$Boff); - &mov ("ecx",$Coff); - &mov ("edi",$Doff); + # &mov ("edi",$Coff); + &mov ("ecx",$Doff); &add ($A,&DWP(0,"esi")); &add ("ebx",&DWP(4,"esi")); - &add ("ecx",&DWP(8,"esi")); - &add ("edi",&DWP(12,"esi")); + &add ("edi",&DWP(8,"esi")); + &add ("ecx",&DWP(12,"esi")); &mov (&DWP(0,"esi"),$A); &mov (&DWP(4,"esi"),"ebx"); - &mov (&DWP(8,"esi"),"ecx"); - &mov (&DWP(12,"esi"),"edi"); + &mov (&DWP(8,"esi"),"edi"); + &mov (&DWP(12,"esi"),"ecx"); # &mov ($E,$Eoff); &mov ("eax",$Foff); &mov ("ebx",$Goff); &mov ("ecx",$Hoff); - &mov ("edi",&DWP(4*(8+16+64)+4,"esp"));#inp + &mov ("edi",&DWP(4*(9+16+64)+4,"esp"));#inp &add ($E,&DWP(16,"esi")); &add ("eax",&DWP(20,"esi")); &add ("ebx",&DWP(24,"esi")); @@ -217,33 +319,963 @@ () &mov (&DWP(24,"esi"),"ebx"); &mov (&DWP(28,"esi"),"ecx"); - &add ("esp",4*(8+16+64)); # destroy frame + &lea ("esp",&DWP(4*(9+16+64),"esp"));# destroy frame &sub ($K256,4*64); # rewind K &cmp ("edi",&DWP(8,"esp")); # are we done yet? - &jb (&label("loop")); - + &jb (&label("loop$suffix")); +} + &COMPACT_LOOP(); + &mov ("esp",&DWP(12,"esp")); # restore sp +&function_end_A(); + if (!$i386 && !$xmm) { + # ~20% improvement on Sandy Bridge + local *ror = sub { &shrd(@_[0],@_) }; + &COMPACT_LOOP("_shrd"); &mov ("esp",&DWP(12,"esp")); # restore sp &function_end_A(); + } &set_label("K256",64); # Yes! I keep it in the code segment! - &data_word(0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5); - &data_word(0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5); - &data_word(0xd807aa98,0x12835b01,0x243185be,0x550c7dc3); - &data_word(0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174); - &data_word(0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc); - &data_word(0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da); - &data_word(0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7); - &data_word(0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967); - &data_word(0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13); - &data_word(0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85); - &data_word(0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3); - &data_word(0xd192e819,0xd6990624,0xf40e3585,0x106aa070); - &data_word(0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5); - &data_word(0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3); - &data_word(0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208); - &data_word(0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2); -&function_end_B("sha256_block_data_order"); +@K256=( 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5, + 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5, + 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3, + 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174, + 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc, + 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da, + 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7, + 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967, + 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13, + 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85, + 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3, + 0xd192e819,0xd6990624,0xf40e3585,0x106aa070, + 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5, + 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3, + 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208, + 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 ); +&data_word(@K256); +&data_word(0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f); # byte swap mask &asciz("SHA256 block transform for x86, CRYPTOGAMS by "); +($a,$b,$c,$d,$e,$f,$g,$h)=(0..7); # offsets +sub off { &DWP(4*(((shift)-$i)&7),"esp"); } + +if (!$i386 && $unroll_after) { +my @AH=($A,$K256); + +&set_label("unrolled",16); + &lea ("esp",&DWP(-96,"esp")); + # copy ctx->h[0-7] to A,B,C,D,E,F,G,H on stack + &mov ($AH[0],&DWP(0,"esi")); + &mov ($AH[1],&DWP(4,"esi")); + &mov ("ecx",&DWP(8,"esi")); + &mov ("ebx",&DWP(12,"esi")); + #&mov (&DWP(0,"esp"),$AH[0]); + &mov (&DWP(4,"esp"),$AH[1]); + &xor ($AH[1],"ecx"); # magic + &mov (&DWP(8,"esp"),"ecx"); + &mov (&DWP(12,"esp"),"ebx"); + &mov ($E,&DWP(16,"esi")); + &mov ("ebx",&DWP(20,"esi")); + &mov ("ecx",&DWP(24,"esi")); + &mov ("esi",&DWP(28,"esi")); + #&mov (&DWP(16,"esp"),$E); + &mov (&DWP(20,"esp"),"ebx"); + &mov (&DWP(24,"esp"),"ecx"); + &mov (&DWP(28,"esp"),"esi"); + &jmp (&label("grand_loop")); + +&set_label("grand_loop",16); + # copy input block to stack reversing byte order + for($i=0;$i<5;$i++) { + &mov ("ebx",&DWP(12*$i+0,"edi")); + &mov ("ecx",&DWP(12*$i+4,"edi")); + &bswap ("ebx"); + &mov ("esi",&DWP(12*$i+8,"edi")); + &bswap ("ecx"); + &mov (&DWP(32+12*$i+0,"esp"),"ebx"); + &bswap ("esi"); + &mov (&DWP(32+12*$i+4,"esp"),"ecx"); + &mov (&DWP(32+12*$i+8,"esp"),"esi"); + } + &mov ("ebx",&DWP($i*12,"edi")); + &add ("edi",64); + &bswap ("ebx"); + &mov (&DWP(96+4,"esp"),"edi"); + &mov (&DWP(32+12*$i,"esp"),"ebx"); + + my ($t1,$t2) = ("ecx","esi"); + + for ($i=0;$i<64;$i++) { + + if ($i>=16) { + &mov ($T,$t1); # $t1 is preloaded + # &mov ($t2,&DWP(32+4*(($i+14)&15),"esp")); + &ror ($t1,18-7); + &mov ("edi",$t2); + &ror ($t2,19-17); + &xor ($t1,$T); + &shr ($T,3); + &ror ($t1,7); + &xor ($t2,"edi"); + &xor ($T,$t1); # T = sigma0(X[-15]) + &ror ($t2,17); + &add ($T,&DWP(32+4*($i&15),"esp")); # T += X[-16] + &shr ("edi",10); + &add ($T,&DWP(32+4*(($i+9)&15),"esp")); # T += X[-7] + #&xor ("edi",$t2) # sigma1(X[-2]) + # &add ($T,"edi"); # T += sigma1(X[-2]) + # &mov (&DWP(4*(9+15),"esp"),$T); # save X[0] + } + &mov ($t1,$E); + &xor ("edi",$t2) if ($i>=16); # sigma1(X[-2]) + &mov ($t2,&off($f)); + &ror ($E,25-11); + &add ($T,"edi") if ($i>=16); # T += sigma1(X[-2]) + &mov ("edi",&off($g)); + &xor ($E,$t1); + &mov ($T,&DWP(32+4*($i&15),"esp")) if ($i<16); # X[i] + &mov (&DWP(32+4*($i&15),"esp"),$T) if ($i>=16 && $i<62); # save X[0] + &xor ($t2,"edi"); + &ror ($E,11-6); + &and ($t2,$t1); + &mov (&off($e),$t1); # save $E, modulo-scheduled + &xor ($E,$t1); + &add ($T,&off($h)); # T += h + &xor ("edi",$t2); # Ch(e,f,g) + &ror ($E,6); # Sigma1(e) + &mov ($t1,$AH[0]); + &add ($T,"edi"); # T += Ch(e,f,g) + + &ror ($t1,22-13); + &mov ($t2,$AH[0]); + &mov ("edi",&off($b)); + &xor ($t1,$AH[0]); + &mov (&off($a),$AH[0]); # save $A, modulo-scheduled + &xor ($AH[0],"edi"); # a ^= b, (b^c) in next round + &ror ($t1,13-2); + &and ($AH[1],$AH[0]); # (b^c) &= (a^b) + &lea ($E,&DWP(@K256[$i],$T,$E)); # T += Sigma1(1)+K[i] + &xor ($t1,$t2); + &xor ($AH[1],"edi"); # h = Maj(a,b,c) = Ch(a^b,c,b) + &mov ($t2,&DWP(32+4*(($i+2)&15),"esp")) if ($i>=15 && $i<63); + &ror ($t1,2); # Sigma0(a) + + &add ($AH[1],$E); # h += T + &add ($E,&off($d)); # d += T + &add ($AH[1],$t1); # h += Sigma0(a) + &mov ($t1,&DWP(32+4*(($i+15)&15),"esp")) if ($i>=15 && $i<63); + + @AH = reverse(@AH); # rotate(a,h) + ($t1,$t2) = ($t2,$t1); # rotate(t1,t2) + } + &mov ("esi",&DWP(96,"esp")); #ctx + #&mov ($AH[0],&DWP(0,"esp")); + &xor ($AH[1],"edi"); #&mov ($AH[1],&DWP(4,"esp")); + #&mov ("edi", &DWP(8,"esp")); + &mov ("ecx",&DWP(12,"esp")); + &add ($AH[0],&DWP(0,"esi")); + &add ($AH[1],&DWP(4,"esi")); + &add ("edi",&DWP(8,"esi")); + &add ("ecx",&DWP(12,"esi")); + &mov (&DWP(0,"esi"),$AH[0]); + &mov (&DWP(4,"esi"),$AH[1]); + &mov (&DWP(8,"esi"),"edi"); + &mov (&DWP(12,"esi"),"ecx"); + #&mov (&DWP(0,"esp"),$AH[0]); + &mov (&DWP(4,"esp"),$AH[1]); + &xor ($AH[1],"edi"); # magic + &mov (&DWP(8,"esp"),"edi"); + &mov (&DWP(12,"esp"),"ecx"); + #&mov ($E,&DWP(16,"esp")); + &mov ("edi",&DWP(20,"esp")); + &mov ("ebx",&DWP(24,"esp")); + &mov ("ecx",&DWP(28,"esp")); + &add ($E,&DWP(16,"esi")); + &add ("edi",&DWP(20,"esi")); + &add ("ebx",&DWP(24,"esi")); + &add ("ecx",&DWP(28,"esi")); + &mov (&DWP(16,"esi"),$E); + &mov (&DWP(20,"esi"),"edi"); + &mov (&DWP(24,"esi"),"ebx"); + &mov (&DWP(28,"esi"),"ecx"); + #&mov (&DWP(16,"esp"),$E); + &mov (&DWP(20,"esp"),"edi"); + &mov ("edi",&DWP(96+4,"esp")); # inp + &mov (&DWP(24,"esp"),"ebx"); + &mov (&DWP(28,"esp"),"ecx"); + + &cmp ("edi",&DWP(96+8,"esp")); # are we done yet? + &jb (&label("grand_loop")); + + &mov ("esp",&DWP(96+12,"esp")); # restore sp +&function_end_A(); +} + if (!$i386 && $xmm) {{{ +if ($shaext) { +###################################################################### +# Intel SHA Extensions implementation of SHA256 update function. +# +my ($ctx,$inp,$end)=("esi","edi","eax"); +my ($Wi,$ABEF,$CDGH,$TMP)=map("xmm$_",(0..2,7)); +my @MSG=map("xmm$_",(3..6)); + +sub sha256op38 { + my ($opcodelet,$dst,$src)=@_; + if ("$dst:$src" =~ /xmm([0-7]):xmm([0-7])/) + { &data_byte(0x0f,0x38,$opcodelet,0xc0|($1<<3)|$2); } +} +sub sha256rnds2 { sha256op38(0xcb,@_); } +sub sha256msg1 { sha256op38(0xcc,@_); } +sub sha256msg2 { sha256op38(0xcd,@_); } + +&set_label("shaext",32); + &sub ("esp",32); + + &movdqu ($ABEF,&QWP(0,$ctx)); # DCBA + &lea ($K256,&DWP(0x80,$K256)); + &movdqu ($CDGH,&QWP(16,$ctx)); # HGFE + &movdqa ($TMP,&QWP(0x100-0x80,$K256)); # byte swap mask + + &pshufd ($Wi,$ABEF,0x1b); # ABCD + &pshufd ($ABEF,$ABEF,0xb1); # CDAB + &pshufd ($CDGH,$CDGH,0x1b); # EFGH + &palignr ($ABEF,$CDGH,8); # ABEF + &punpcklqdq ($CDGH,$Wi); # CDGH + &jmp (&label("loop_shaext")); + +&set_label("loop_shaext",16); + &movdqu (@MSG[0],&QWP(0,$inp)); + &movdqu (@MSG[1],&QWP(0x10,$inp)); + &movdqu (@MSG[2],&QWP(0x20,$inp)); + &pshufb (@MSG[0],$TMP); + &movdqu (@MSG[3],&QWP(0x30,$inp)); + &movdqa (&QWP(16,"esp"),$CDGH); # offload + + &movdqa ($Wi,&QWP(0*16-0x80,$K256)); + &paddd ($Wi,@MSG[0]); + &pshufb (@MSG[1],$TMP); + &sha256rnds2 ($CDGH,$ABEF); # 0-3 + &pshufd ($Wi,$Wi,0x0e); + &nop (); + &movdqa (&QWP(0,"esp"),$ABEF); # offload + &sha256rnds2 ($ABEF,$CDGH); + + &movdqa ($Wi,&QWP(1*16-0x80,$K256)); + &paddd ($Wi,@MSG[1]); + &pshufb (@MSG[2],$TMP); + &sha256rnds2 ($CDGH,$ABEF); # 4-7 + &pshufd ($Wi,$Wi,0x0e); + &lea ($inp,&DWP(0x40,$inp)); + &sha256msg1 (@MSG[0],@MSG[1]); + &sha256rnds2 ($ABEF,$CDGH); + + &movdqa ($Wi,&QWP(2*16-0x80,$K256)); + &paddd ($Wi,@MSG[2]); + &pshufb (@MSG[3],$TMP); + &sha256rnds2 ($CDGH,$ABEF); # 8-11 + &pshufd ($Wi,$Wi,0x0e); + &movdqa ($TMP,@MSG[3]); + &palignr ($TMP,@MSG[2],4); + &nop (); + &paddd (@MSG[0],$TMP); + &sha256msg1 (@MSG[1],@MSG[2]); + &sha256rnds2 ($ABEF,$CDGH); + + &movdqa ($Wi,&QWP(3*16-0x80,$K256)); + &paddd ($Wi,@MSG[3]); + &sha256msg2 (@MSG[0],@MSG[3]); + &sha256rnds2 ($CDGH,$ABEF); # 12-15 + &pshufd ($Wi,$Wi,0x0e); + &movdqa ($TMP,@MSG[0]); + &palignr ($TMP,@MSG[3],4); + &nop (); + &paddd (@MSG[1],$TMP); + &sha256msg1 (@MSG[2],@MSG[3]); + &sha256rnds2 ($ABEF,$CDGH); + +for($i=4;$i<16-3;$i++) { + &movdqa ($Wi,&QWP($i*16-0x80,$K256)); + &paddd ($Wi,@MSG[0]); + &sha256msg2 (@MSG[1],@MSG[0]); + &sha256rnds2 ($CDGH,$ABEF); # 16-19... + &pshufd ($Wi,$Wi,0x0e); + &movdqa ($TMP,@MSG[1]); + &palignr ($TMP,@MSG[0],4); + &nop (); + &paddd (@MSG[2],$TMP); + &sha256msg1 (@MSG[3],@MSG[0]); + &sha256rnds2 ($ABEF,$CDGH); + + push(@MSG,shift(@MSG)); +} + &movdqa ($Wi,&QWP(13*16-0x80,$K256)); + &paddd ($Wi,@MSG[0]); + &sha256msg2 (@MSG[1],@MSG[0]); + &sha256rnds2 ($CDGH,$ABEF); # 52-55 + &pshufd ($Wi,$Wi,0x0e); + &movdqa ($TMP,@MSG[1]) + &palignr ($TMP,@MSG[0],4); + &sha256rnds2 ($ABEF,$CDGH); + &paddd (@MSG[2],$TMP); + + &movdqa ($Wi,&QWP(14*16-0x80,$K256)); + &paddd ($Wi,@MSG[1]); + &sha256rnds2 ($CDGH,$ABEF); # 56-59 + &pshufd ($Wi,$Wi,0x0e); + &sha256msg2 (@MSG[2],@MSG[1]); + &movdqa ($TMP,&QWP(0x100-0x80,$K256)); # byte swap mask + &sha256rnds2 ($ABEF,$CDGH); + + &movdqa ($Wi,&QWP(15*16-0x80,$K256)); + &paddd ($Wi,@MSG[2]); + &nop (); + &sha256rnds2 ($CDGH,$ABEF); # 60-63 + &pshufd ($Wi,$Wi,0x0e); + &cmp ($end,$inp); + &nop (); + &sha256rnds2 ($ABEF,$CDGH); + + &paddd ($CDGH,&QWP(16,"esp")); + &paddd ($ABEF,&QWP(0,"esp")); + &jnz (&label("loop_shaext")); + + &pshufd ($CDGH,$CDGH,0xb1); # DCHG + &pshufd ($TMP,$ABEF,0x1b); # FEBA + &pshufd ($ABEF,$ABEF,0xb1); # BAFE + &punpckhqdq ($ABEF,$CDGH); # DCBA + &palignr ($CDGH,$TMP,8); # HGFE + + &mov ("esp",&DWP(32+12,"esp")); + &movdqu (&QWP(0,$ctx),$ABEF); + &movdqu (&QWP(16,$ctx),$CDGH); +&function_end_A(); +} + +my @X = map("xmm$_",(0..3)); +my ($t0,$t1,$t2,$t3) = map("xmm$_",(4..7)); +my @AH = ($A,$T); + +&set_label("SSSE3",32); + &lea ("esp",&DWP(-96,"esp")); + # copy ctx->h[0-7] to A,B,C,D,E,F,G,H on stack + &mov ($AH[0],&DWP(0,"esi")); + &mov ($AH[1],&DWP(4,"esi")); + &mov ("ecx",&DWP(8,"esi")); + &mov ("edi",&DWP(12,"esi")); + #&mov (&DWP(0,"esp"),$AH[0]); + &mov (&DWP(4,"esp"),$AH[1]); + &xor ($AH[1],"ecx"); # magic + &mov (&DWP(8,"esp"),"ecx"); + &mov (&DWP(12,"esp"),"edi"); + &mov ($E,&DWP(16,"esi")); + &mov ("edi",&DWP(20,"esi")); + &mov ("ecx",&DWP(24,"esi")); + &mov ("esi",&DWP(28,"esi")); + #&mov (&DWP(16,"esp"),$E); + &mov (&DWP(20,"esp"),"edi"); + &mov ("edi",&DWP(96+4,"esp")); # inp + &mov (&DWP(24,"esp"),"ecx"); + &mov (&DWP(28,"esp"),"esi"); + &movdqa ($t3,&QWP(256,$K256)); + &jmp (&label("grand_ssse3")); + +&set_label("grand_ssse3",16); + # load input, reverse byte order, add K256[0..15], save to stack + &movdqu (@X[0],&QWP(0,"edi")); + &movdqu (@X[1],&QWP(16,"edi")); + &movdqu (@X[2],&QWP(32,"edi")); + &movdqu (@X[3],&QWP(48,"edi")); + &add ("edi",64); + &pshufb (@X[0],$t3); + &mov (&DWP(96+4,"esp"),"edi"); + &pshufb (@X[1],$t3); + &movdqa ($t0,&QWP(0,$K256)); + &pshufb (@X[2],$t3); + &movdqa ($t1,&QWP(16,$K256)); + &paddd ($t0,@X[0]); + &pshufb (@X[3],$t3); + &movdqa ($t2,&QWP(32,$K256)); + &paddd ($t1,@X[1]); + &movdqa ($t3,&QWP(48,$K256)); + &movdqa (&QWP(32+0,"esp"),$t0); + &paddd ($t2,@X[2]); + &movdqa (&QWP(32+16,"esp"),$t1); + &paddd ($t3,@X[3]); + &movdqa (&QWP(32+32,"esp"),$t2); + &movdqa (&QWP(32+48,"esp"),$t3); + &jmp (&label("ssse3_00_47")); + +&set_label("ssse3_00_47",16); + &add ($K256,64); + +sub SSSE3_00_47 () { +my $j = shift; +my $body = shift; +my @X = @_; +my @insns = (&$body,&$body,&$body,&$body); # 120 instructions + + eval(shift(@insns)); + &movdqa ($t0,@X[1]); + eval(shift(@insns)); # @ + eval(shift(@insns)); + &movdqa ($t3,@X[3]); + eval(shift(@insns)); + eval(shift(@insns)); + &palignr ($t0,@X[0],4); # X[1..4] + eval(shift(@insns)); + eval(shift(@insns)); # @ + eval(shift(@insns)); + &palignr ($t3,@X[2],4); # X[9..12] + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + &movdqa ($t1,$t0); + eval(shift(@insns)); # @ + eval(shift(@insns)); + &movdqa ($t2,$t0); + eval(shift(@insns)); + eval(shift(@insns)); + &psrld ($t0,3); + eval(shift(@insns)); + eval(shift(@insns)); # @ + &paddd (@X[0],$t3); # X[0..3] += X[9..12] + eval(shift(@insns)); + eval(shift(@insns)); + &psrld ($t2,7); + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); # @ + eval(shift(@insns)); + &pshufd ($t3,@X[3],0b11111010); # X[14..15] + eval(shift(@insns)); + eval(shift(@insns)); + &pslld ($t1,32-18); + eval(shift(@insns)); + eval(shift(@insns)); # @ + &pxor ($t0,$t2); + eval(shift(@insns)); + eval(shift(@insns)); + &psrld ($t2,18-7); + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); # @ + &pxor ($t0,$t1); + eval(shift(@insns)); + eval(shift(@insns)); + &pslld ($t1,18-7); + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); # @ + &pxor ($t0,$t2); + eval(shift(@insns)); + eval(shift(@insns)); + &movdqa ($t2,$t3); + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); # @ + &pxor ($t0,$t1); # sigma0(X[1..4]) + eval(shift(@insns)); + eval(shift(@insns)); + &psrld ($t3,10); + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); # @ + &paddd (@X[0],$t0); # X[0..3] += sigma0(X[1..4]) + eval(shift(@insns)); + eval(shift(@insns)); + &psrlq ($t2,17); + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); # @ + &pxor ($t3,$t2); + eval(shift(@insns)); + eval(shift(@insns)); + &psrlq ($t2,19-17); + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); # @ + &pxor ($t3,$t2); + eval(shift(@insns)); + eval(shift(@insns)); + &pshufd ($t3,$t3,0b10000000); + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); # @ + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); # @ + eval(shift(@insns)); + &psrldq ($t3,8); + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + &paddd (@X[0],$t3); # X[0..1] += sigma1(X[14..15]) + eval(shift(@insns)); # @ + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); # @ + eval(shift(@insns)); + &pshufd ($t3,@X[0],0b01010000); # X[16..17] + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + &movdqa ($t2,$t3); + eval(shift(@insns)); # @ + &psrld ($t3,10); + eval(shift(@insns)); + &psrlq ($t2,17); + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); # @ + &pxor ($t3,$t2); + eval(shift(@insns)); + eval(shift(@insns)); + &psrlq ($t2,19-17); + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); # @ + &pxor ($t3,$t2); + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + &pshufd ($t3,$t3,0b00001000); + eval(shift(@insns)); + eval(shift(@insns)); # @ + &movdqa ($t2,&QWP(16*$j,$K256)); + eval(shift(@insns)); + eval(shift(@insns)); + &pslldq ($t3,8); + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); # @ + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); # @ + &paddd (@X[0],$t3); # X[2..3] += sigma1(X[16..17]) + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + &paddd ($t2,@X[0]); + eval(shift(@insns)); # @ + + foreach (@insns) { eval; } # remaining instructions + + &movdqa (&QWP(32+16*$j,"esp"),$t2); +} + +sub body_00_15 () { + ( + '&mov ("ecx",$E);', + '&ror ($E,25-11);', + '&mov ("esi",&off($f));', + '&xor ($E,"ecx");', + '&mov ("edi",&off($g));', + '&xor ("esi","edi");', + '&ror ($E,11-6);', + '&and ("esi","ecx");', + '&mov (&off($e),"ecx");', # save $E, modulo-scheduled + '&xor ($E,"ecx");', + '&xor ("edi","esi");', # Ch(e,f,g) + '&ror ($E,6);', # T = Sigma1(e) + '&mov ("ecx",$AH[0]);', + '&add ($E,"edi");', # T += Ch(e,f,g) + '&mov ("edi",&off($b));', + '&mov ("esi",$AH[0]);', + + '&ror ("ecx",22-13);', + '&mov (&off($a),$AH[0]);', # save $A, modulo-scheduled + '&xor ("ecx",$AH[0]);', + '&xor ($AH[0],"edi");', # a ^= b, (b^c) in next round + '&add ($E,&off($h));', # T += h + '&ror ("ecx",13-2);', + '&and ($AH[1],$AH[0]);', # (b^c) &= (a^b) + '&xor ("ecx","esi");', + '&add ($E,&DWP(32+4*($i&15),"esp"));', # T += K[i]+X[i] + '&xor ($AH[1],"edi");', # h = Maj(a,b,c) = Ch(a^b,c,b) + '&ror ("ecx",2);', # Sigma0(a) + + '&add ($AH[1],$E);', # h += T + '&add ($E,&off($d));', # d += T + '&add ($AH[1],"ecx");'. # h += Sigma0(a) + + '@AH = reverse(@AH); $i++;' # rotate(a,h) + ); +} + + for ($i=0,$j=0; $j<4; $j++) { + &SSSE3_00_47($j,\&body_00_15,@X); + push(@X,shift(@X)); # rotate(@X) + } + &cmp (&DWP(16*$j,$K256),0x00010203); + &jne (&label("ssse3_00_47")); + + for ($i=0; $i<16; ) { + foreach(body_00_15()) { eval; } + } + + &mov ("esi",&DWP(96,"esp")); #ctx + #&mov ($AH[0],&DWP(0,"esp")); + &xor ($AH[1],"edi"); #&mov ($AH[1],&DWP(4,"esp")); + #&mov ("edi", &DWP(8,"esp")); + &mov ("ecx",&DWP(12,"esp")); + &add ($AH[0],&DWP(0,"esi")); + &add ($AH[1],&DWP(4,"esi")); + &add ("edi",&DWP(8,"esi")); + &add ("ecx",&DWP(12,"esi")); + &mov (&DWP(0,"esi"),$AH[0]); + &mov (&DWP(4,"esi"),$AH[1]); + &mov (&DWP(8,"esi"),"edi"); + &mov (&DWP(12,"esi"),"ecx"); + #&mov (&DWP(0,"esp"),$AH[0]); + &mov (&DWP(4,"esp"),$AH[1]); + &xor ($AH[1],"edi"); # magic + &mov (&DWP(8,"esp"),"edi"); + &mov (&DWP(12,"esp"),"ecx"); + #&mov ($E,&DWP(16,"esp")); + &mov ("edi",&DWP(20,"esp")); + &mov ("ecx",&DWP(24,"esp")); + &add ($E,&DWP(16,"esi")); + &add ("edi",&DWP(20,"esi")); + &add ("ecx",&DWP(24,"esi")); + &mov (&DWP(16,"esi"),$E); + &mov (&DWP(20,"esi"),"edi"); + &mov (&DWP(20,"esp"),"edi"); + &mov ("edi",&DWP(28,"esp")); + &mov (&DWP(24,"esi"),"ecx"); + #&mov (&DWP(16,"esp"),$E); + &add ("edi",&DWP(28,"esi")); + &mov (&DWP(24,"esp"),"ecx"); + &mov (&DWP(28,"esi"),"edi"); + &mov (&DWP(28,"esp"),"edi"); + &mov ("edi",&DWP(96+4,"esp")); # inp + + &movdqa ($t3,&QWP(64,$K256)); + &sub ($K256,3*64); # rewind K + &cmp ("edi",&DWP(96+8,"esp")); # are we done yet? + &jb (&label("grand_ssse3")); + + &mov ("esp",&DWP(96+12,"esp")); # restore sp +&function_end_A(); + if ($avx) { +&set_label("AVX",32); + if ($avx>1) { + &and ("edx",1<<8|1<<3); # check for BMI2+BMI1 + &cmp ("edx",1<<8|1<<3); + &je (&label("AVX_BMI")); + } + &lea ("esp",&DWP(-96,"esp")); + &vzeroall (); + # copy ctx->h[0-7] to A,B,C,D,E,F,G,H on stack + &mov ($AH[0],&DWP(0,"esi")); + &mov ($AH[1],&DWP(4,"esi")); + &mov ("ecx",&DWP(8,"esi")); + &mov ("edi",&DWP(12,"esi")); + #&mov (&DWP(0,"esp"),$AH[0]); + &mov (&DWP(4,"esp"),$AH[1]); + &xor ($AH[1],"ecx"); # magic + &mov (&DWP(8,"esp"),"ecx"); + &mov (&DWP(12,"esp"),"edi"); + &mov ($E,&DWP(16,"esi")); + &mov ("edi",&DWP(20,"esi")); + &mov ("ecx",&DWP(24,"esi")); + &mov ("esi",&DWP(28,"esi")); + #&mov (&DWP(16,"esp"),$E); + &mov (&DWP(20,"esp"),"edi"); + &mov ("edi",&DWP(96+4,"esp")); # inp + &mov (&DWP(24,"esp"),"ecx"); + &mov (&DWP(28,"esp"),"esi"); + &vmovdqa ($t3,&QWP(256,$K256)); + &jmp (&label("grand_avx")); + +&set_label("grand_avx",32); + # load input, reverse byte order, add K256[0..15], save to stack + &vmovdqu (@X[0],&QWP(0,"edi")); + &vmovdqu (@X[1],&QWP(16,"edi")); + &vmovdqu (@X[2],&QWP(32,"edi")); + &vmovdqu (@X[3],&QWP(48,"edi")); + &add ("edi",64); + &vpshufb (@X[0],@X[0],$t3); + &mov (&DWP(96+4,"esp"),"edi"); + &vpshufb (@X[1],@X[1],$t3); + &vpshufb (@X[2],@X[2],$t3); + &vpaddd ($t0,@X[0],&QWP(0,$K256)); + &vpshufb (@X[3],@X[3],$t3); + &vpaddd ($t1,@X[1],&QWP(16,$K256)); + &vpaddd ($t2,@X[2],&QWP(32,$K256)); + &vpaddd ($t3,@X[3],&QWP(48,$K256)); + &vmovdqa (&QWP(32+0,"esp"),$t0); + &vmovdqa (&QWP(32+16,"esp"),$t1); + &vmovdqa (&QWP(32+32,"esp"),$t2); + &vmovdqa (&QWP(32+48,"esp"),$t3); + &jmp (&label("avx_00_47")); + +&set_label("avx_00_47",16); + &add ($K256,64); + +sub Xupdate_AVX () { + ( + '&vpalignr ($t0,@X[1],@X[0],4);', # X[1..4] + '&vpalignr ($t3,@X[3],@X[2],4);', # X[9..12] + '&vpsrld ($t2,$t0,7);', + '&vpaddd (@X[0],@X[0],$t3);', # X[0..3] += X[9..16] + '&vpsrld ($t3,$t0,3);', + '&vpslld ($t1,$t0,14);', + '&vpxor ($t0,$t3,$t2);', + '&vpshufd ($t3,@X[3],0b11111010)',# X[14..15] + '&vpsrld ($t2,$t2,18-7);', + '&vpxor ($t0,$t0,$t1);', + '&vpslld ($t1,$t1,25-14);', + '&vpxor ($t0,$t0,$t2);', + '&vpsrld ($t2,$t3,10);', + '&vpxor ($t0,$t0,$t1);', # sigma0(X[1..4]) + '&vpsrlq ($t1,$t3,17);', + '&vpaddd (@X[0],@X[0],$t0);', # X[0..3] += sigma0(X[1..4]) + '&vpxor ($t2,$t2,$t1);', + '&vpsrlq ($t3,$t3,19);', + '&vpxor ($t2,$t2,$t3);', # sigma1(X[14..15] + '&vpshufd ($t3,$t2,0b10000100);', + '&vpsrldq ($t3,$t3,8);', + '&vpaddd (@X[0],@X[0],$t3);', # X[0..1] += sigma1(X[14..15]) + '&vpshufd ($t3,@X[0],0b01010000)',# X[16..17] + '&vpsrld ($t2,$t3,10);', + '&vpsrlq ($t1,$t3,17);', + '&vpxor ($t2,$t2,$t1);', + '&vpsrlq ($t3,$t3,19);', + '&vpxor ($t2,$t2,$t3);', # sigma1(X[16..17] + '&vpshufd ($t3,$t2,0b11101000);', + '&vpslldq ($t3,$t3,8);', + '&vpaddd (@X[0],@X[0],$t3);' # X[2..3] += sigma1(X[16..17]) + ); +} + +local *ror = sub { &shrd(@_[0],@_) }; +sub AVX_00_47 () { +my $j = shift; +my $body = shift; +my @X = @_; +my @insns = (&$body,&$body,&$body,&$body); # 120 instructions +my $insn; + + foreach (Xupdate_AVX()) { # 31 instructions + eval; + eval(shift(@insns)); + eval(shift(@insns)); + eval($insn = shift(@insns)); + eval(shift(@insns)) if ($insn =~ /rorx/ && @insns[0] =~ /rorx/); + } + &vpaddd ($t2,@X[0],&QWP(16*$j,$K256)); + foreach (@insns) { eval; } # remaining instructions + &vmovdqa (&QWP(32+16*$j,"esp"),$t2); +} + + for ($i=0,$j=0; $j<4; $j++) { + &AVX_00_47($j,\&body_00_15,@X); + push(@X,shift(@X)); # rotate(@X) + } + &cmp (&DWP(16*$j,$K256),0x00010203); + &jne (&label("avx_00_47")); + + for ($i=0; $i<16; ) { + foreach(body_00_15()) { eval; } + } + + &mov ("esi",&DWP(96,"esp")); #ctx + #&mov ($AH[0],&DWP(0,"esp")); + &xor ($AH[1],"edi"); #&mov ($AH[1],&DWP(4,"esp")); + #&mov ("edi", &DWP(8,"esp")); + &mov ("ecx",&DWP(12,"esp")); + &add ($AH[0],&DWP(0,"esi")); + &add ($AH[1],&DWP(4,"esi")); + &add ("edi",&DWP(8,"esi")); + &add ("ecx",&DWP(12,"esi")); + &mov (&DWP(0,"esi"),$AH[0]); + &mov (&DWP(4,"esi"),$AH[1]); + &mov (&DWP(8,"esi"),"edi"); + &mov (&DWP(12,"esi"),"ecx"); + #&mov (&DWP(0,"esp"),$AH[0]); + &mov (&DWP(4,"esp"),$AH[1]); + &xor ($AH[1],"edi"); # magic + &mov (&DWP(8,"esp"),"edi"); + &mov (&DWP(12,"esp"),"ecx"); + #&mov ($E,&DWP(16,"esp")); + &mov ("edi",&DWP(20,"esp")); + &mov ("ecx",&DWP(24,"esp")); + &add ($E,&DWP(16,"esi")); + &add ("edi",&DWP(20,"esi")); + &add ("ecx",&DWP(24,"esi")); + &mov (&DWP(16,"esi"),$E); + &mov (&DWP(20,"esi"),"edi"); + &mov (&DWP(20,"esp"),"edi"); + &mov ("edi",&DWP(28,"esp")); + &mov (&DWP(24,"esi"),"ecx"); + #&mov (&DWP(16,"esp"),$E); + &add ("edi",&DWP(28,"esi")); + &mov (&DWP(24,"esp"),"ecx"); + &mov (&DWP(28,"esi"),"edi"); + &mov (&DWP(28,"esp"),"edi"); + &mov ("edi",&DWP(96+4,"esp")); # inp + + &vmovdqa ($t3,&QWP(64,$K256)); + &sub ($K256,3*64); # rewind K + &cmp ("edi",&DWP(96+8,"esp")); # are we done yet? + &jb (&label("grand_avx")); + + &mov ("esp",&DWP(96+12,"esp")); # restore sp + &vzeroall (); +&function_end_A(); + if ($avx>1) { +sub bodyx_00_15 () { # +10% + ( + '&rorx ("ecx",$E,6)', + '&rorx ("esi",$E,11)', + '&mov (&off($e),$E)', # save $E, modulo-scheduled + '&rorx ("edi",$E,25)', + '&xor ("ecx","esi")', + '&andn ("esi",$E,&off($g))', + '&xor ("ecx","edi")', # Sigma1(e) + '&and ($E,&off($f))', + '&mov (&off($a),$AH[0]);', # save $A, modulo-scheduled + '&or ($E,"esi")', # T = Ch(e,f,g) + + '&rorx ("edi",$AH[0],2)', + '&rorx ("esi",$AH[0],13)', + '&lea ($E,&DWP(0,$E,"ecx"))', # T += Sigma1(e) + '&rorx ("ecx",$AH[0],22)', + '&xor ("esi","edi")', + '&mov ("edi",&off($b))', + '&xor ("ecx","esi")', # Sigma0(a) + + '&xor ($AH[0],"edi")', # a ^= b, (b^c) in next round + '&add ($E,&off($h))', # T += h + '&and ($AH[1],$AH[0])', # (b^c) &= (a^b) + '&add ($E,&DWP(32+4*($i&15),"esp"))', # T += K[i]+X[i] + '&xor ($AH[1],"edi")', # h = Maj(a,b,c) = Ch(a^b,c,b) + + '&add ("ecx",$E)', # h += T + '&add ($E,&off($d))', # d += T + '&lea ($AH[1],&DWP(0,$AH[1],"ecx"));'. # h += Sigma0(a) + + '@AH = reverse(@AH); $i++;' # rotate(a,h) + ); +} + +&set_label("AVX_BMI",32); + &lea ("esp",&DWP(-96,"esp")); + &vzeroall (); + # copy ctx->h[0-7] to A,B,C,D,E,F,G,H on stack + &mov ($AH[0],&DWP(0,"esi")); + &mov ($AH[1],&DWP(4,"esi")); + &mov ("ecx",&DWP(8,"esi")); + &mov ("edi",&DWP(12,"esi")); + #&mov (&DWP(0,"esp"),$AH[0]); + &mov (&DWP(4,"esp"),$AH[1]); + &xor ($AH[1],"ecx"); # magic + &mov (&DWP(8,"esp"),"ecx"); + &mov (&DWP(12,"esp"),"edi"); + &mov ($E,&DWP(16,"esi")); + &mov ("edi",&DWP(20,"esi")); + &mov ("ecx",&DWP(24,"esi")); + &mov ("esi",&DWP(28,"esi")); + #&mov (&DWP(16,"esp"),$E); + &mov (&DWP(20,"esp"),"edi"); + &mov ("edi",&DWP(96+4,"esp")); # inp + &mov (&DWP(24,"esp"),"ecx"); + &mov (&DWP(28,"esp"),"esi"); + &vmovdqa ($t3,&QWP(256,$K256)); + &jmp (&label("grand_avx_bmi")); + +&set_label("grand_avx_bmi",32); + # load input, reverse byte order, add K256[0..15], save to stack + &vmovdqu (@X[0],&QWP(0,"edi")); + &vmovdqu (@X[1],&QWP(16,"edi")); + &vmovdqu (@X[2],&QWP(32,"edi")); + &vmovdqu (@X[3],&QWP(48,"edi")); + &add ("edi",64); + &vpshufb (@X[0],@X[0],$t3); + &mov (&DWP(96+4,"esp"),"edi"); + &vpshufb (@X[1],@X[1],$t3); + &vpshufb (@X[2],@X[2],$t3); + &vpaddd ($t0,@X[0],&QWP(0,$K256)); + &vpshufb (@X[3],@X[3],$t3); + &vpaddd ($t1,@X[1],&QWP(16,$K256)); + &vpaddd ($t2,@X[2],&QWP(32,$K256)); + &vpaddd ($t3,@X[3],&QWP(48,$K256)); + &vmovdqa (&QWP(32+0,"esp"),$t0); + &vmovdqa (&QWP(32+16,"esp"),$t1); + &vmovdqa (&QWP(32+32,"esp"),$t2); + &vmovdqa (&QWP(32+48,"esp"),$t3); + &jmp (&label("avx_bmi_00_47")); + +&set_label("avx_bmi_00_47",16); + &add ($K256,64); + + for ($i=0,$j=0; $j<4; $j++) { + &AVX_00_47($j,\&bodyx_00_15,@X); + push(@X,shift(@X)); # rotate(@X) + } + &cmp (&DWP(16*$j,$K256),0x00010203); + &jne (&label("avx_bmi_00_47")); + + for ($i=0; $i<16; ) { + foreach(bodyx_00_15()) { eval; } + } + + &mov ("esi",&DWP(96,"esp")); #ctx + #&mov ($AH[0],&DWP(0,"esp")); + &xor ($AH[1],"edi"); #&mov ($AH[1],&DWP(4,"esp")); + #&mov ("edi", &DWP(8,"esp")); + &mov ("ecx",&DWP(12,"esp")); + &add ($AH[0],&DWP(0,"esi")); + &add ($AH[1],&DWP(4,"esi")); + &add ("edi",&DWP(8,"esi")); + &add ("ecx",&DWP(12,"esi")); + &mov (&DWP(0,"esi"),$AH[0]); + &mov (&DWP(4,"esi"),$AH[1]); + &mov (&DWP(8,"esi"),"edi"); + &mov (&DWP(12,"esi"),"ecx"); + #&mov (&DWP(0,"esp"),$AH[0]); + &mov (&DWP(4,"esp"),$AH[1]); + &xor ($AH[1],"edi"); # magic + &mov (&DWP(8,"esp"),"edi"); + &mov (&DWP(12,"esp"),"ecx"); + #&mov ($E,&DWP(16,"esp")); + &mov ("edi",&DWP(20,"esp")); + &mov ("ecx",&DWP(24,"esp")); + &add ($E,&DWP(16,"esi")); + &add ("edi",&DWP(20,"esi")); + &add ("ecx",&DWP(24,"esi")); + &mov (&DWP(16,"esi"),$E); + &mov (&DWP(20,"esi"),"edi"); + &mov (&DWP(20,"esp"),"edi"); + &mov ("edi",&DWP(28,"esp")); + &mov (&DWP(24,"esi"),"ecx"); + #&mov (&DWP(16,"esp"),$E); + &add ("edi",&DWP(28,"esi")); + &mov (&DWP(24,"esp"),"ecx"); + &mov (&DWP(28,"esi"),"edi"); + &mov (&DWP(28,"esp"),"edi"); + &mov ("edi",&DWP(96+4,"esp")); # inp + + &vmovdqa ($t3,&QWP(64,$K256)); + &sub ($K256,3*64); # rewind K + &cmp ("edi",&DWP(96+8,"esp")); # are we done yet? + &jb (&label("grand_avx_bmi")); + + &mov ("esp",&DWP(96+12,"esp")); # restore sp + &vzeroall (); +&function_end_A(); + } + } + }}} +&function_end_B("sha256_block_data_order"); + &asm_finish(); diff --git a/deps/openssl/openssl/crypto/sha/asm/sha256-armv4.pl b/deps/openssl/openssl/crypto/sha/asm/sha256-armv4.pl index 9c84e8d93c301a..f14c9c3cb5a190 100644 --- a/deps/openssl/openssl/crypto/sha/asm/sha256-armv4.pl +++ b/deps/openssl/openssl/crypto/sha/asm/sha256-armv4.pl @@ -1,7 +1,7 @@ #!/usr/bin/env perl # ==================================================================== -# Written by Andy Polyakov for the OpenSSL +# Written by Andy Polyakov for the OpenSSL # project. The module is, however, dual licensed under OpenSSL and # CRYPTOGAMS licenses depending on where you obtain it. For further # details see http://www.openssl.org/~appro/cryptogams/. @@ -21,15 +21,27 @@ # February 2011. # # Profiler-assisted and platform-specific optimization resulted in 16% -# improvement on Cortex A8 core and ~17 cycles per processed byte. +# improvement on Cortex A8 core and ~15.4 cycles per processed byte. + +# September 2013. +# +# Add NEON implementation. On Cortex A8 it was measured to process one +# byte in 12.5 cycles or 23% faster than integer-only code. Snapdragon +# S4 does it in 12.5 cycles too, but it's 50% faster than integer-only +# code (meaning that latter performs sub-optimally, nothing was done +# about it). + +# May 2014. +# +# Add ARMv8 code path performing at 2.0 cpb on Apple A7. while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {} open STDOUT,">$output"; $ctx="r0"; $t0="r0"; -$inp="r1"; $t3="r1"; +$inp="r1"; $t4="r1"; $len="r2"; $t1="r2"; -$T1="r3"; +$T1="r3"; $t3="r3"; $A="r4"; $B="r5"; $C="r6"; @@ -52,71 +64,88 @@ sub BODY_00_15 { $code.=<<___ if ($i<16); #if __ARM_ARCH__>=7 - ldr $T1,[$inp],#4 + @ ldr $t1,[$inp],#4 @ $i +# if $i==15 + str $inp,[sp,#17*4] @ make room for $t4 +# endif + eor $t0,$e,$e,ror#`$Sigma1[1]-$Sigma1[0]` + add $a,$a,$t2 @ h+=Maj(a,b,c) from the past + eor $t0,$t0,$e,ror#`$Sigma1[2]-$Sigma1[0]` @ Sigma1(e) + rev $t1,$t1 #else - ldrb $T1,[$inp,#3] @ $i + @ ldrb $t1,[$inp,#3] @ $i + add $a,$a,$t2 @ h+=Maj(a,b,c) from the past ldrb $t2,[$inp,#2] - ldrb $t1,[$inp,#1] - ldrb $t0,[$inp],#4 - orr $T1,$T1,$t2,lsl#8 - orr $T1,$T1,$t1,lsl#16 - orr $T1,$T1,$t0,lsl#24 + ldrb $t0,[$inp,#1] + orr $t1,$t1,$t2,lsl#8 + ldrb $t2,[$inp],#4 + orr $t1,$t1,$t0,lsl#16 +# if $i==15 + str $inp,[sp,#17*4] @ make room for $t4 +# endif + eor $t0,$e,$e,ror#`$Sigma1[1]-$Sigma1[0]` + orr $t1,$t1,$t2,lsl#24 + eor $t0,$t0,$e,ror#`$Sigma1[2]-$Sigma1[0]` @ Sigma1(e) #endif ___ $code.=<<___; - mov $t0,$e,ror#$Sigma1[0] ldr $t2,[$Ktbl],#4 @ *K256++ - eor $t0,$t0,$e,ror#$Sigma1[1] + add $h,$h,$t1 @ h+=X[i] + str $t1,[sp,#`$i%16`*4] eor $t1,$f,$g -#if $i>=16 - add $T1,$T1,$t3 @ from BODY_16_xx -#elif __ARM_ARCH__>=7 && defined(__ARMEL__) - rev $T1,$T1 -#endif -#if $i==15 - str $inp,[sp,#17*4] @ leave room for $t3 -#endif - eor $t0,$t0,$e,ror#$Sigma1[2] @ Sigma1(e) + add $h,$h,$t0,ror#$Sigma1[0] @ h+=Sigma1(e) and $t1,$t1,$e - str $T1,[sp,#`$i%16`*4] - add $T1,$T1,$t0 + add $h,$h,$t2 @ h+=K256[i] eor $t1,$t1,$g @ Ch(e,f,g) - add $T1,$T1,$h - mov $h,$a,ror#$Sigma0[0] - add $T1,$T1,$t1 - eor $h,$h,$a,ror#$Sigma0[1] - add $T1,$T1,$t2 - eor $h,$h,$a,ror#$Sigma0[2] @ Sigma0(a) -#if $i>=15 - ldr $t3,[sp,#`($i+2)%16`*4] @ from BODY_16_xx + eor $t0,$a,$a,ror#`$Sigma0[1]-$Sigma0[0]` + add $h,$h,$t1 @ h+=Ch(e,f,g) +#if $i==31 + and $t2,$t2,#0xff + cmp $t2,#0xf2 @ done? #endif - orr $t0,$a,$b - and $t1,$a,$b - and $t0,$t0,$c - add $h,$h,$T1 - orr $t0,$t0,$t1 @ Maj(a,b,c) - add $d,$d,$T1 - add $h,$h,$t0 +#if $i<15 +# if __ARM_ARCH__>=7 + ldr $t1,[$inp],#4 @ prefetch +# else + ldrb $t1,[$inp,#3] +# endif + eor $t2,$a,$b @ a^b, b^c in next round +#else + ldr $t1,[sp,#`($i+2)%16`*4] @ from future BODY_16_xx + eor $t2,$a,$b @ a^b, b^c in next round + ldr $t4,[sp,#`($i+15)%16`*4] @ from future BODY_16_xx +#endif + eor $t0,$t0,$a,ror#`$Sigma0[2]-$Sigma0[0]` @ Sigma0(a) + and $t3,$t3,$t2 @ (b^c)&=(a^b) + add $d,$d,$h @ d+=h + eor $t3,$t3,$b @ Maj(a,b,c) + add $h,$h,$t0,ror#$Sigma0[0] @ h+=Sigma0(a) + @ add $h,$h,$t3 @ h+=Maj(a,b,c) ___ + ($t2,$t3)=($t3,$t2); } sub BODY_16_XX { my ($i,$a,$b,$c,$d,$e,$f,$g,$h) = @_; $code.=<<___; - @ ldr $t3,[sp,#`($i+1)%16`*4] @ $i - ldr $t2,[sp,#`($i+14)%16`*4] - mov $t0,$t3,ror#$sigma0[0] - ldr $T1,[sp,#`($i+0)%16`*4] - eor $t0,$t0,$t3,ror#$sigma0[1] - ldr $t1,[sp,#`($i+9)%16`*4] - eor $t0,$t0,$t3,lsr#$sigma0[2] @ sigma0(X[i+1]) - mov $t3,$t2,ror#$sigma1[0] - add $T1,$T1,$t0 - eor $t3,$t3,$t2,ror#$sigma1[1] - add $T1,$T1,$t1 - eor $t3,$t3,$t2,lsr#$sigma1[2] @ sigma1(X[i+14]) - @ add $T1,$T1,$t3 + @ ldr $t1,[sp,#`($i+1)%16`*4] @ $i + @ ldr $t4,[sp,#`($i+14)%16`*4] + mov $t0,$t1,ror#$sigma0[0] + add $a,$a,$t2 @ h+=Maj(a,b,c) from the past + mov $t2,$t4,ror#$sigma1[0] + eor $t0,$t0,$t1,ror#$sigma0[1] + eor $t2,$t2,$t4,ror#$sigma1[1] + eor $t0,$t0,$t1,lsr#$sigma0[2] @ sigma0(X[i+1]) + ldr $t1,[sp,#`($i+0)%16`*4] + eor $t2,$t2,$t4,lsr#$sigma1[2] @ sigma1(X[i+14]) + ldr $t4,[sp,#`($i+9)%16`*4] + + add $t2,$t2,$t0 + eor $t0,$e,$e,ror#`$Sigma1[1]-$Sigma1[0]` @ from BODY_00_15 + add $t1,$t1,$t2 + eor $t0,$t0,$e,ror#`$Sigma1[2]-$Sigma1[0]` @ Sigma1(e) + add $t1,$t1,$t4 @ X[i] ___ &BODY_00_15(@_); } @@ -147,46 +176,66 @@ sub BODY_16_XX { .word 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 .word 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 .size K256,.-K256 +.word 0 @ terminator +#if __ARM_MAX_ARCH__>=7 +.LOPENSSL_armcap: +.word OPENSSL_armcap_P-sha256_block_data_order +#endif +.align 5 .global sha256_block_data_order .type sha256_block_data_order,%function sha256_block_data_order: sub r3,pc,#8 @ sha256_block_data_order add $len,$inp,$len,lsl#6 @ len to point at the end of inp +#if __ARM_MAX_ARCH__>=7 + ldr r12,.LOPENSSL_armcap + ldr r12,[r3,r12] @ OPENSSL_armcap_P + tst r12,#ARMV8_SHA256 + bne .LARMv8 + tst r12,#ARMV7_NEON + bne .LNEON +#endif stmdb sp!,{$ctx,$inp,$len,r4-r11,lr} ldmia $ctx,{$A,$B,$C,$D,$E,$F,$G,$H} - sub $Ktbl,r3,#256 @ K256 + sub $Ktbl,r3,#256+32 @ K256 sub sp,sp,#16*4 @ alloca(X[16]) .Loop: +# if __ARM_ARCH__>=7 + ldr $t1,[$inp],#4 +# else + ldrb $t1,[$inp,#3] +# endif + eor $t3,$B,$C @ magic + eor $t2,$t2,$t2 ___ for($i=0;$i<16;$i++) { &BODY_00_15($i,@V); unshift(@V,pop(@V)); } $code.=".Lrounds_16_xx:\n"; for (;$i<32;$i++) { &BODY_16_XX($i,@V); unshift(@V,pop(@V)); } $code.=<<___; - and $t2,$t2,#0xff - cmp $t2,#0xf2 + ldreq $t3,[sp,#16*4] @ pull ctx bne .Lrounds_16_xx - ldr $T1,[sp,#16*4] @ pull ctx - ldr $t0,[$T1,#0] - ldr $t1,[$T1,#4] - ldr $t2,[$T1,#8] + add $A,$A,$t2 @ h+=Maj(a,b,c) from the past + ldr $t0,[$t3,#0] + ldr $t1,[$t3,#4] + ldr $t2,[$t3,#8] add $A,$A,$t0 - ldr $t0,[$T1,#12] + ldr $t0,[$t3,#12] add $B,$B,$t1 - ldr $t1,[$T1,#16] + ldr $t1,[$t3,#16] add $C,$C,$t2 - ldr $t2,[$T1,#20] + ldr $t2,[$t3,#20] add $D,$D,$t0 - ldr $t0,[$T1,#24] + ldr $t0,[$t3,#24] add $E,$E,$t1 - ldr $t1,[$T1,#28] + ldr $t1,[$t3,#28] add $F,$F,$t2 ldr $inp,[sp,#17*4] @ pull inp ldr $t2,[sp,#18*4] @ pull inp+len add $G,$G,$t0 add $H,$H,$t1 - stmia $T1,{$A,$B,$C,$D,$E,$F,$G,$H} + stmia $t3,{$A,$B,$C,$D,$E,$F,$G,$H} cmp $inp,$t2 sub $Ktbl,$Ktbl,#256 @ rewind Ktbl bne .Loop @@ -200,12 +249,413 @@ sub BODY_16_XX { moveq pc,lr @ be binary compatible with V4, yet bx lr @ interoperable with Thumb ISA:-) #endif -.size sha256_block_data_order,.-sha256_block_data_order -.asciz "SHA256 block transform for ARMv4, CRYPTOGAMS by " +.size sha256_block_data_order,.-sha256_block_data_order +___ +###################################################################### +# NEON stuff +# +{{{ +my @X=map("q$_",(0..3)); +my ($T0,$T1,$T2,$T3,$T4,$T5)=("q8","q9","q10","q11","d24","d25"); +my $Xfer=$t4; +my $j=0; + +sub Dlo() { shift=~m|q([1]?[0-9])|?"d".($1*2):""; } +sub Dhi() { shift=~m|q([1]?[0-9])|?"d".($1*2+1):""; } + +sub AUTOLOAD() # thunk [simplified] x86-style perlasm +{ my $opcode = $AUTOLOAD; $opcode =~ s/.*:://; $opcode =~ s/_/\./; + my $arg = pop; + $arg = "#$arg" if ($arg*1 eq $arg); + $code .= "\t$opcode\t".join(',',@_,$arg)."\n"; +} + +sub Xupdate() +{ use integer; + my $body = shift; + my @insns = (&$body,&$body,&$body,&$body); + my ($a,$b,$c,$d,$e,$f,$g,$h); + + &vext_8 ($T0,@X[0],@X[1],4); # X[1..4] + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + &vext_8 ($T1,@X[2],@X[3],4); # X[9..12] + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + &vshr_u32 ($T2,$T0,$sigma0[0]); + eval(shift(@insns)); + eval(shift(@insns)); + &vadd_i32 (@X[0],@X[0],$T1); # X[0..3] += X[9..12] + eval(shift(@insns)); + eval(shift(@insns)); + &vshr_u32 ($T1,$T0,$sigma0[2]); + eval(shift(@insns)); + eval(shift(@insns)); + &vsli_32 ($T2,$T0,32-$sigma0[0]); + eval(shift(@insns)); + eval(shift(@insns)); + &vshr_u32 ($T3,$T0,$sigma0[1]); + eval(shift(@insns)); + eval(shift(@insns)); + &veor ($T1,$T1,$T2); + eval(shift(@insns)); + eval(shift(@insns)); + &vsli_32 ($T3,$T0,32-$sigma0[1]); + eval(shift(@insns)); + eval(shift(@insns)); + &vshr_u32 ($T4,&Dhi(@X[3]),$sigma1[0]); + eval(shift(@insns)); + eval(shift(@insns)); + &veor ($T1,$T1,$T3); # sigma0(X[1..4]) + eval(shift(@insns)); + eval(shift(@insns)); + &vsli_32 ($T4,&Dhi(@X[3]),32-$sigma1[0]); + eval(shift(@insns)); + eval(shift(@insns)); + &vshr_u32 ($T5,&Dhi(@X[3]),$sigma1[2]); + eval(shift(@insns)); + eval(shift(@insns)); + &vadd_i32 (@X[0],@X[0],$T1); # X[0..3] += sigma0(X[1..4]) + eval(shift(@insns)); + eval(shift(@insns)); + &veor ($T5,$T5,$T4); + eval(shift(@insns)); + eval(shift(@insns)); + &vshr_u32 ($T4,&Dhi(@X[3]),$sigma1[1]); + eval(shift(@insns)); + eval(shift(@insns)); + &vsli_32 ($T4,&Dhi(@X[3]),32-$sigma1[1]); + eval(shift(@insns)); + eval(shift(@insns)); + &veor ($T5,$T5,$T4); # sigma1(X[14..15]) + eval(shift(@insns)); + eval(shift(@insns)); + &vadd_i32 (&Dlo(@X[0]),&Dlo(@X[0]),$T5);# X[0..1] += sigma1(X[14..15]) + eval(shift(@insns)); + eval(shift(@insns)); + &vshr_u32 ($T4,&Dlo(@X[0]),$sigma1[0]); + eval(shift(@insns)); + eval(shift(@insns)); + &vsli_32 ($T4,&Dlo(@X[0]),32-$sigma1[0]); + eval(shift(@insns)); + eval(shift(@insns)); + &vshr_u32 ($T5,&Dlo(@X[0]),$sigma1[2]); + eval(shift(@insns)); + eval(shift(@insns)); + &veor ($T5,$T5,$T4); + eval(shift(@insns)); + eval(shift(@insns)); + &vshr_u32 ($T4,&Dlo(@X[0]),$sigma1[1]); + eval(shift(@insns)); + eval(shift(@insns)); + &vld1_32 ("{$T0}","[$Ktbl,:128]!"); + eval(shift(@insns)); + eval(shift(@insns)); + &vsli_32 ($T4,&Dlo(@X[0]),32-$sigma1[1]); + eval(shift(@insns)); + eval(shift(@insns)); + &veor ($T5,$T5,$T4); # sigma1(X[16..17]) + eval(shift(@insns)); + eval(shift(@insns)); + &vadd_i32 (&Dhi(@X[0]),&Dhi(@X[0]),$T5);# X[2..3] += sigma1(X[16..17]) + eval(shift(@insns)); + eval(shift(@insns)); + &vadd_i32 ($T0,$T0,@X[0]); + while($#insns>=2) { eval(shift(@insns)); } + &vst1_32 ("{$T0}","[$Xfer,:128]!"); + eval(shift(@insns)); + eval(shift(@insns)); + + push(@X,shift(@X)); # "rotate" X[] +} + +sub Xpreload() +{ use integer; + my $body = shift; + my @insns = (&$body,&$body,&$body,&$body); + my ($a,$b,$c,$d,$e,$f,$g,$h); + + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + &vld1_32 ("{$T0}","[$Ktbl,:128]!"); + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + &vrev32_8 (@X[0],@X[0]); + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + &vadd_i32 ($T0,$T0,@X[0]); + foreach (@insns) { eval; } # remaining instructions + &vst1_32 ("{$T0}","[$Xfer,:128]!"); + + push(@X,shift(@X)); # "rotate" X[] +} + +sub body_00_15 () { + ( + '($a,$b,$c,$d,$e,$f,$g,$h)=@V;'. + '&add ($h,$h,$t1)', # h+=X[i]+K[i] + '&eor ($t1,$f,$g)', + '&eor ($t0,$e,$e,"ror#".($Sigma1[1]-$Sigma1[0]))', + '&add ($a,$a,$t2)', # h+=Maj(a,b,c) from the past + '&and ($t1,$t1,$e)', + '&eor ($t2,$t0,$e,"ror#".($Sigma1[2]-$Sigma1[0]))', # Sigma1(e) + '&eor ($t0,$a,$a,"ror#".($Sigma0[1]-$Sigma0[0]))', + '&eor ($t1,$t1,$g)', # Ch(e,f,g) + '&add ($h,$h,$t2,"ror#$Sigma1[0]")', # h+=Sigma1(e) + '&eor ($t2,$a,$b)', # a^b, b^c in next round + '&eor ($t0,$t0,$a,"ror#".($Sigma0[2]-$Sigma0[0]))', # Sigma0(a) + '&add ($h,$h,$t1)', # h+=Ch(e,f,g) + '&ldr ($t1,sprintf "[sp,#%d]",4*(($j+1)&15)) if (($j&15)!=15);'. + '&ldr ($t1,"[$Ktbl]") if ($j==15);'. + '&ldr ($t1,"[sp,#64]") if ($j==31)', + '&and ($t3,$t3,$t2)', # (b^c)&=(a^b) + '&add ($d,$d,$h)', # d+=h + '&add ($h,$h,$t0,"ror#$Sigma0[0]");'. # h+=Sigma0(a) + '&eor ($t3,$t3,$b)', # Maj(a,b,c) + '$j++; unshift(@V,pop(@V)); ($t2,$t3)=($t3,$t2);' + ) +} + +$code.=<<___; +#if __ARM_MAX_ARCH__>=7 +.arch armv7-a +.fpu neon + +.type sha256_block_data_order_neon,%function +.align 4 +sha256_block_data_order_neon: +.LNEON: + stmdb sp!,{r4-r12,lr} + + mov $t2,sp + sub sp,sp,#16*4+16 @ alloca + sub $Ktbl,r3,#256+32 @ K256 + bic sp,sp,#15 @ align for 128-bit stores + + vld1.8 {@X[0]},[$inp]! + vld1.8 {@X[1]},[$inp]! + vld1.8 {@X[2]},[$inp]! + vld1.8 {@X[3]},[$inp]! + vld1.32 {$T0},[$Ktbl,:128]! + vld1.32 {$T1},[$Ktbl,:128]! + vld1.32 {$T2},[$Ktbl,:128]! + vld1.32 {$T3},[$Ktbl,:128]! + vrev32.8 @X[0],@X[0] @ yes, even on + str $ctx,[sp,#64] + vrev32.8 @X[1],@X[1] @ big-endian + str $inp,[sp,#68] + mov $Xfer,sp + vrev32.8 @X[2],@X[2] + str $len,[sp,#72] + vrev32.8 @X[3],@X[3] + str $t2,[sp,#76] @ save original sp + vadd.i32 $T0,$T0,@X[0] + vadd.i32 $T1,$T1,@X[1] + vst1.32 {$T0},[$Xfer,:128]! + vadd.i32 $T2,$T2,@X[2] + vst1.32 {$T1},[$Xfer,:128]! + vadd.i32 $T3,$T3,@X[3] + vst1.32 {$T2},[$Xfer,:128]! + vst1.32 {$T3},[$Xfer,:128]! + + ldmia $ctx,{$A-$H} + sub $Xfer,$Xfer,#64 + ldr $t1,[sp,#0] + eor $t2,$t2,$t2 + eor $t3,$B,$C + b .L_00_48 + +.align 4 +.L_00_48: +___ + &Xupdate(\&body_00_15); + &Xupdate(\&body_00_15); + &Xupdate(\&body_00_15); + &Xupdate(\&body_00_15); +$code.=<<___; + teq $t1,#0 @ check for K256 terminator + ldr $t1,[sp,#0] + sub $Xfer,$Xfer,#64 + bne .L_00_48 + + ldr $inp,[sp,#68] + ldr $t0,[sp,#72] + sub $Ktbl,$Ktbl,#256 @ rewind $Ktbl + teq $inp,$t0 + subeq $inp,$inp,#64 @ avoid SEGV + vld1.8 {@X[0]},[$inp]! @ load next input block + vld1.8 {@X[1]},[$inp]! + vld1.8 {@X[2]},[$inp]! + vld1.8 {@X[3]},[$inp]! + strne $inp,[sp,#68] + mov $Xfer,sp +___ + &Xpreload(\&body_00_15); + &Xpreload(\&body_00_15); + &Xpreload(\&body_00_15); + &Xpreload(\&body_00_15); +$code.=<<___; + ldr $t0,[$t1,#0] + add $A,$A,$t2 @ h+=Maj(a,b,c) from the past + ldr $t2,[$t1,#4] + ldr $t3,[$t1,#8] + ldr $t4,[$t1,#12] + add $A,$A,$t0 @ accumulate + ldr $t0,[$t1,#16] + add $B,$B,$t2 + ldr $t2,[$t1,#20] + add $C,$C,$t3 + ldr $t3,[$t1,#24] + add $D,$D,$t4 + ldr $t4,[$t1,#28] + add $E,$E,$t0 + str $A,[$t1],#4 + add $F,$F,$t2 + str $B,[$t1],#4 + add $G,$G,$t3 + str $C,[$t1],#4 + add $H,$H,$t4 + str $D,[$t1],#4 + stmia $t1,{$E-$H} + + movne $Xfer,sp + ldrne $t1,[sp,#0] + eorne $t2,$t2,$t2 + ldreq sp,[sp,#76] @ restore original sp + eorne $t3,$B,$C + bne .L_00_48 + + ldmia sp!,{r4-r12,pc} +.size sha256_block_data_order_neon,.-sha256_block_data_order_neon +#endif +___ +}}} +###################################################################### +# ARMv8 stuff +# +{{{ +my ($ABCD,$EFGH,$abcd)=map("q$_",(0..2)); +my @MSG=map("q$_",(8..11)); +my ($W0,$W1,$ABCD_SAVE,$EFGH_SAVE)=map("q$_",(12..15)); +my $Ktbl="r3"; + +$code.=<<___; +#if __ARM_MAX_ARCH__>=7 +.type sha256_block_data_order_armv8,%function +.align 5 +sha256_block_data_order_armv8: +.LARMv8: + vld1.32 {$ABCD,$EFGH},[$ctx] + sub $Ktbl,r3,#sha256_block_data_order-K256 + +.Loop_v8: + vld1.8 {@MSG[0]-@MSG[1]},[$inp]! + vld1.8 {@MSG[2]-@MSG[3]},[$inp]! + vld1.32 {$W0},[$Ktbl]! + vrev32.8 @MSG[0],@MSG[0] + vrev32.8 @MSG[1],@MSG[1] + vrev32.8 @MSG[2],@MSG[2] + vrev32.8 @MSG[3],@MSG[3] + vmov $ABCD_SAVE,$ABCD @ offload + vmov $EFGH_SAVE,$EFGH + teq $inp,$len +___ +for($i=0;$i<12;$i++) { +$code.=<<___; + vld1.32 {$W1},[$Ktbl]! + vadd.i32 $W0,$W0,@MSG[0] + sha256su0 @MSG[0],@MSG[1] + vmov $abcd,$ABCD + sha256h $ABCD,$EFGH,$W0 + sha256h2 $EFGH,$abcd,$W0 + sha256su1 @MSG[0],@MSG[2],@MSG[3] +___ + ($W0,$W1)=($W1,$W0); push(@MSG,shift(@MSG)); +} +$code.=<<___; + vld1.32 {$W1},[$Ktbl]! + vadd.i32 $W0,$W0,@MSG[0] + vmov $abcd,$ABCD + sha256h $ABCD,$EFGH,$W0 + sha256h2 $EFGH,$abcd,$W0 + + vld1.32 {$W0},[$Ktbl]! + vadd.i32 $W1,$W1,@MSG[1] + vmov $abcd,$ABCD + sha256h $ABCD,$EFGH,$W1 + sha256h2 $EFGH,$abcd,$W1 + + vld1.32 {$W1},[$Ktbl] + vadd.i32 $W0,$W0,@MSG[2] + sub $Ktbl,$Ktbl,#256-16 @ rewind + vmov $abcd,$ABCD + sha256h $ABCD,$EFGH,$W0 + sha256h2 $EFGH,$abcd,$W0 + + vadd.i32 $W1,$W1,@MSG[3] + vmov $abcd,$ABCD + sha256h $ABCD,$EFGH,$W1 + sha256h2 $EFGH,$abcd,$W1 + + vadd.i32 $ABCD,$ABCD,$ABCD_SAVE + vadd.i32 $EFGH,$EFGH,$EFGH_SAVE + bne .Loop_v8 + + vst1.32 {$ABCD,$EFGH},[$ctx] + + ret @ bx lr +.size sha256_block_data_order_armv8,.-sha256_block_data_order_armv8 +#endif +___ +}}} +$code.=<<___; +.asciz "SHA256 block transform for ARMv4/NEON/ARMv8, CRYPTOGAMS by " .align 2 +#if __ARM_MAX_ARCH__>=7 +.comm OPENSSL_armcap_P,4,4 +#endif ___ -$code =~ s/\`([^\`]*)\`/eval $1/gem; -$code =~ s/\bbx\s+lr\b/.word\t0xe12fff1e/gm; # make it possible to compile with -march=armv4 -print $code; +{ my %opcode = ( + "sha256h" => 0xf3000c40, "sha256h2" => 0xf3100c40, + "sha256su0" => 0xf3ba03c0, "sha256su1" => 0xf3200c40 ); + + sub unsha256 { + my ($mnemonic,$arg)=@_; + + if ($arg =~ m/q([0-9]+)(?:,\s*q([0-9]+))?,\s*q([0-9]+)/o) { + my $word = $opcode{$mnemonic}|(($1&7)<<13)|(($1&8)<<19) + |(($2&7)<<17)|(($2&8)<<4) + |(($3&7)<<1) |(($3&8)<<2); + # since ARMv7 instructions are always encoded little-endian. + # correct solution is to use .inst directive, but older + # assemblers don't implement it:-( + sprintf ".byte\t0x%02x,0x%02x,0x%02x,0x%02x\t@ %s %s", + $word&0xff,($word>>8)&0xff, + ($word>>16)&0xff,($word>>24)&0xff, + $mnemonic,$arg; + } + } +} + +foreach (split($/,$code)) { + + s/\`([^\`]*)\`/eval $1/geo; + + s/\b(sha256\w+)\s+(q.*)/unsha256($1,$2)/geo; + + s/\bret\b/bx lr/go or + s/\bbx\s+lr\b/.word\t0xe12fff1e/go; # make it possible to compile with -march=armv4 + + print $_,"\n"; +} + close STDOUT; # enforce flush diff --git a/deps/openssl/openssl/crypto/sha/asm/sha256-mb-x86_64.pl b/deps/openssl/openssl/crypto/sha/asm/sha256-mb-x86_64.pl new file mode 100644 index 00000000000000..adf2ddccd18b0c --- /dev/null +++ b/deps/openssl/openssl/crypto/sha/asm/sha256-mb-x86_64.pl @@ -0,0 +1,1560 @@ +#!/usr/bin/env perl + +# ==================================================================== +# Written by Andy Polyakov for the OpenSSL +# project. The module is, however, dual licensed under OpenSSL and +# CRYPTOGAMS licenses depending on where you obtain it. For further +# details see http://www.openssl.org/~appro/cryptogams/. +# ==================================================================== + +# Multi-buffer SHA256 procedure processes n buffers in parallel by +# placing buffer data to designated lane of SIMD register. n is +# naturally limited to 4 on pre-AVX2 processors and to 8 on +# AVX2-capable processors such as Haswell. +# +# this +aesni(i) sha256 aesni-sha256 gain(iv) +# ------------------------------------------------------------------- +# Westmere(ii) 23.3/n +1.28=7.11(n=4) 12.3 +3.75=16.1 +126% +# Atom(ii) 38.7/n +3.93=13.6(n=4) 20.8 +5.69=26.5 +95% +# Sandy Bridge (20.5 +5.15=25.7)/n 11.6 13.0 +103% +# Ivy Bridge (20.4 +5.14=25.5)/n 10.3 11.6 +82% +# Haswell(iii) (21.0 +5.00=26.0)/n 7.80 8.79 +170% +# Bulldozer (21.6 +5.76=27.4)/n 13.6 13.7 +100% +# +# (i) multi-block CBC encrypt with 128-bit key; +# (ii) (HASH+AES)/n does not apply to Westmere for n>3 and Atom, +# because of lower AES-NI instruction throughput, nor is there +# AES-NI-SHA256 stitch for these processors; +# (iii) "this" is for n=8, when we gather twice as much data, result +# for n=4 is 20.3+4.44=24.7; +# (iv) presented improvement coefficients are asymptotic limits and +# in real-life application are somewhat lower, e.g. for 2KB +# fragments they range from 75% to 130% (on Haswell); + +$flavour = shift; +$output = shift; +if ($flavour =~ /\./) { $output = $flavour; undef $flavour; } + +$win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/); + +$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; +( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or +( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or +die "can't locate x86_64-xlate.pl"; + +$avx=0; + +if (`$ENV{CC} -Wa,-v -c -o /dev/null -x assembler /dev/null 2>&1` + =~ /GNU assembler version ([2-9]\.[0-9]+)/) { + $avx = ($1>=2.19) + ($1>=2.22); +} + +if (!$avx && $win64 && ($flavour =~ /nasm/ || $ENV{ASM} =~ /nasm/) && + `nasm -v 2>&1` =~ /NASM version ([2-9]\.[0-9]+)/) { + $avx = ($1>=2.09) + ($1>=2.10); +} + +if (!$avx && $win64 && ($flavour =~ /masm/ || $ENV{ASM} =~ /ml64/) && + `ml64 2>&1` =~ /Version ([0-9]+)\./) { + $avx = ($1>=10) + ($1>=11); +} + +if (!$avx && `$ENV{CC} -v 2>&1` =~ /(^clang version|based on LLVM) ([3-9]\.[0-9]+)/) { + $avx = ($2>=3.0) + ($2>3.0); +} + +open OUT,"| \"$^X\" $xlate $flavour $output"; +*STDOUT=*OUT; + +# void sha256_multi_block ( +# struct { unsigned int A[8]; +# unsigned int B[8]; +# unsigned int C[8]; +# unsigned int D[8]; +# unsigned int E[8]; +# unsigned int F[8]; +# unsigned int G[8]; +# unsigned int H[8]; } *ctx, +# struct { void *ptr; int blocks; } inp[8], +# int num); /* 1 or 2 */ +# +$ctx="%rdi"; # 1st arg +$inp="%rsi"; # 2nd arg +$num="%edx"; # 3rd arg +@ptr=map("%r$_",(8..11)); +$Tbl="%rbp"; + +@V=($A,$B,$C,$D,$E,$F,$G,$H)=map("%xmm$_",(8..15)); +($t1,$t2,$t3,$axb,$bxc,$Xi,$Xn,$sigma)=map("%xmm$_",(0..7)); + +$REG_SZ=16; + +sub Xi_off { +my $off = shift; + + $off %= 16; $off *= $REG_SZ; + $off<256 ? "$off-128(%rax)" : "$off-256-128(%rbx)"; +} + +sub ROUND_00_15 { +my ($i,$a,$b,$c,$d,$e,$f,$g,$h)=@_; + +$code.=<<___ if ($i<15); + movd `4*$i`(@ptr[0]),$Xi + movd `4*$i`(@ptr[1]),$t1 + movd `4*$i`(@ptr[2]),$t2 + movd `4*$i`(@ptr[3]),$t3 + punpckldq $t2,$Xi + punpckldq $t3,$t1 + punpckldq $t1,$Xi +___ +$code.=<<___ if ($i==15); + movd `4*$i`(@ptr[0]),$Xi + lea `16*4`(@ptr[0]),@ptr[0] + movd `4*$i`(@ptr[1]),$t1 + lea `16*4`(@ptr[1]),@ptr[1] + movd `4*$i`(@ptr[2]),$t2 + lea `16*4`(@ptr[2]),@ptr[2] + movd `4*$i`(@ptr[3]),$t3 + lea `16*4`(@ptr[3]),@ptr[3] + punpckldq $t2,$Xi + punpckldq $t3,$t1 + punpckldq $t1,$Xi +___ +$code.=<<___; + movdqa $e,$sigma + `"pshufb $Xn,$Xi" if ($i<=15 && ($i&1)==0)` + movdqa $e,$t3 + `"pshufb $Xn,$Xi" if ($i<=15 && ($i&1)==1)` + psrld \$6,$sigma + movdqa $e,$t2 + pslld \$7,$t3 + movdqa $Xi,`&Xi_off($i)` + paddd $h,$Xi # Xi+=h + + psrld \$11,$t2 + pxor $t3,$sigma + pslld \$21-7,$t3 + paddd `32*($i%8)-128`($Tbl),$Xi # Xi+=K[round] + pxor $t2,$sigma + + psrld \$25-11,$t2 + movdqa $e,$t1 + `"prefetcht0 63(@ptr[0])" if ($i==15)` + pxor $t3,$sigma + movdqa $e,$axb # borrow $axb + pslld \$26-21,$t3 + pandn $g,$t1 + pand $f,$axb + pxor $t2,$sigma + + `"prefetcht0 63(@ptr[1])" if ($i==15)` + movdqa $a,$t2 + pxor $t3,$sigma # Sigma1(e) + movdqa $a,$t3 + psrld \$2,$t2 + paddd $sigma,$Xi # Xi+=Sigma1(e) + pxor $axb,$t1 # Ch(e,f,g) + movdqa $b,$axb + movdqa $a,$sigma + pslld \$10,$t3 + pxor $a,$axb # a^b, b^c in next round + + `"prefetcht0 63(@ptr[2])" if ($i==15)` + psrld \$13,$sigma + pxor $t3,$t2 + paddd $t1,$Xi # Xi+=Ch(e,f,g) + pslld \$19-10,$t3 + pand $axb,$bxc + pxor $sigma,$t2 + + `"prefetcht0 63(@ptr[3])" if ($i==15)` + psrld \$22-13,$sigma + pxor $t3,$t2 + movdqa $b,$h + pslld \$30-19,$t3 + pxor $t2,$sigma + pxor $bxc,$h # h=Maj(a,b,c)=Ch(a^b,c,b) + paddd $Xi,$d # d+=Xi + pxor $t3,$sigma # Sigma0(a) + + paddd $Xi,$h # h+=Xi + paddd $sigma,$h # h+=Sigma0(a) +___ +$code.=<<___ if (($i%8)==7); + lea `32*8`($Tbl),$Tbl +___ + ($axb,$bxc)=($bxc,$axb); +} + +sub ROUND_16_XX { +my $i=shift; + +$code.=<<___; + movdqa `&Xi_off($i+1)`,$Xn + paddd `&Xi_off($i+9)`,$Xi # Xi+=X[i+9] + + movdqa $Xn,$sigma + movdqa $Xn,$t2 + psrld \$3,$sigma + movdqa $Xn,$t3 + + psrld \$7,$t2 + movdqa `&Xi_off($i+14)`,$t1 + pslld \$14,$t3 + pxor $t2,$sigma + psrld \$18-7,$t2 + movdqa $t1,$axb # borrow $axb + pxor $t3,$sigma + pslld \$25-14,$t3 + pxor $t2,$sigma + psrld \$10,$t1 + movdqa $axb,$t2 + + psrld \$17,$axb + pxor $t3,$sigma # sigma0(X[i+1]) + pslld \$13,$t2 + paddd $sigma,$Xi # Xi+=sigma0(e) + pxor $axb,$t1 + psrld \$19-17,$axb + pxor $t2,$t1 + pslld \$15-13,$t2 + pxor $axb,$t1 + pxor $t2,$t1 # sigma0(X[i+14]) + paddd $t1,$Xi # Xi+=sigma1(X[i+14]) +___ + &ROUND_00_15($i,@_); + ($Xi,$Xn)=($Xn,$Xi); +} + +$code.=<<___; +.text + +.extern OPENSSL_ia32cap_P + +.globl sha256_multi_block +.type sha256_multi_block,\@function,3 +.align 32 +sha256_multi_block: + mov OPENSSL_ia32cap_P+4(%rip),%rcx + bt \$61,%rcx # check SHA bit + jc _shaext_shortcut +___ +$code.=<<___ if ($avx); + test \$`1<<28`,%ecx + jnz _avx_shortcut +___ +$code.=<<___; + mov %rsp,%rax + push %rbx + push %rbp +___ +$code.=<<___ if ($win64); + lea -0xa8(%rsp),%rsp + movaps %xmm6,(%rsp) + movaps %xmm7,0x10(%rsp) + movaps %xmm8,0x20(%rsp) + movaps %xmm9,0x30(%rsp) + movaps %xmm10,-0x78(%rax) + movaps %xmm11,-0x68(%rax) + movaps %xmm12,-0x58(%rax) + movaps %xmm13,-0x48(%rax) + movaps %xmm14,-0x38(%rax) + movaps %xmm15,-0x28(%rax) +___ +$code.=<<___; + sub \$`$REG_SZ*18`, %rsp + and \$-256,%rsp + mov %rax,`$REG_SZ*17`(%rsp) # original %rsp +.Lbody: + lea K256+128(%rip),$Tbl + lea `$REG_SZ*16`(%rsp),%rbx + lea 0x80($ctx),$ctx # size optimization + +.Loop_grande: + mov $num,`$REG_SZ*17+8`(%rsp) # original $num + xor $num,$num +___ +for($i=0;$i<4;$i++) { + $code.=<<___; + mov `16*$i+0`($inp),@ptr[$i] # input pointer + mov `16*$i+8`($inp),%ecx # number of blocks + cmp $num,%ecx + cmovg %ecx,$num # find maximum + test %ecx,%ecx + mov %ecx,`4*$i`(%rbx) # initialize counters + cmovle $Tbl,@ptr[$i] # cancel input +___ +} +$code.=<<___; + test $num,$num + jz .Ldone + + movdqu 0x00-0x80($ctx),$A # load context + lea 128(%rsp),%rax + movdqu 0x20-0x80($ctx),$B + movdqu 0x40-0x80($ctx),$C + movdqu 0x60-0x80($ctx),$D + movdqu 0x80-0x80($ctx),$E + movdqu 0xa0-0x80($ctx),$F + movdqu 0xc0-0x80($ctx),$G + movdqu 0xe0-0x80($ctx),$H + movdqu .Lpbswap(%rip),$Xn + jmp .Loop + +.align 32 +.Loop: + movdqa $C,$bxc + pxor $B,$bxc # magic seed +___ +for($i=0;$i<16;$i++) { &ROUND_00_15($i,@V); unshift(@V,pop(@V)); } +$code.=<<___; + movdqu `&Xi_off($i)`,$Xi + mov \$3,%ecx + jmp .Loop_16_xx +.align 32 +.Loop_16_xx: +___ +for(;$i<32;$i++) { &ROUND_16_XX($i,@V); unshift(@V,pop(@V)); } +$code.=<<___; + dec %ecx + jnz .Loop_16_xx + + mov \$1,%ecx + lea K256+128(%rip),$Tbl + + movdqa (%rbx),$sigma # pull counters + cmp 4*0(%rbx),%ecx # examine counters + pxor $t1,$t1 + cmovge $Tbl,@ptr[0] # cancel input + cmp 4*1(%rbx),%ecx + movdqa $sigma,$Xn + cmovge $Tbl,@ptr[1] + cmp 4*2(%rbx),%ecx + pcmpgtd $t1,$Xn # mask value + cmovge $Tbl,@ptr[2] + cmp 4*3(%rbx),%ecx + paddd $Xn,$sigma # counters-- + cmovge $Tbl,@ptr[3] + + movdqu 0x00-0x80($ctx),$t1 + pand $Xn,$A + movdqu 0x20-0x80($ctx),$t2 + pand $Xn,$B + movdqu 0x40-0x80($ctx),$t3 + pand $Xn,$C + movdqu 0x60-0x80($ctx),$Xi + pand $Xn,$D + paddd $t1,$A + movdqu 0x80-0x80($ctx),$t1 + pand $Xn,$E + paddd $t2,$B + movdqu 0xa0-0x80($ctx),$t2 + pand $Xn,$F + paddd $t3,$C + movdqu 0xc0-0x80($ctx),$t3 + pand $Xn,$G + paddd $Xi,$D + movdqu 0xe0-0x80($ctx),$Xi + pand $Xn,$H + paddd $t1,$E + paddd $t2,$F + movdqu $A,0x00-0x80($ctx) + paddd $t3,$G + movdqu $B,0x20-0x80($ctx) + paddd $Xi,$H + movdqu $C,0x40-0x80($ctx) + movdqu $D,0x60-0x80($ctx) + movdqu $E,0x80-0x80($ctx) + movdqu $F,0xa0-0x80($ctx) + movdqu $G,0xc0-0x80($ctx) + movdqu $H,0xe0-0x80($ctx) + + movdqa $sigma,(%rbx) # save counters + movdqa .Lpbswap(%rip),$Xn + dec $num + jnz .Loop + + mov `$REG_SZ*17+8`(%rsp),$num + lea $REG_SZ($ctx),$ctx + lea `16*$REG_SZ/4`($inp),$inp + dec $num + jnz .Loop_grande + +.Ldone: + mov `$REG_SZ*17`(%rsp),%rax # orignal %rsp +___ +$code.=<<___ if ($win64); + movaps -0xb8(%rax),%xmm6 + movaps -0xa8(%rax),%xmm7 + movaps -0x98(%rax),%xmm8 + movaps -0x88(%rax),%xmm9 + movaps -0x78(%rax),%xmm10 + movaps -0x68(%rax),%xmm11 + movaps -0x58(%rax),%xmm12 + movaps -0x48(%rax),%xmm13 + movaps -0x38(%rax),%xmm14 + movaps -0x28(%rax),%xmm15 +___ +$code.=<<___; + mov -16(%rax),%rbp + mov -8(%rax),%rbx + lea (%rax),%rsp +.Lepilogue: + ret +.size sha256_multi_block,.-sha256_multi_block +___ + {{{ +my ($Wi,$TMP0,$TMP1,$TMPx,$ABEF0,$CDGH0,$ABEF1,$CDGH1)=map("%xmm$_",(0..3,12..15)); +my @MSG0=map("%xmm$_",(4..7)); +my @MSG1=map("%xmm$_",(8..11)); + +$code.=<<___; +.type sha256_multi_block_shaext,\@function,3 +.align 32 +sha256_multi_block_shaext: +_shaext_shortcut: + mov %rsp,%rax + push %rbx + push %rbp +___ +$code.=<<___ if ($win64); + lea -0xa8(%rsp),%rsp + movaps %xmm6,(%rsp) + movaps %xmm7,0x10(%rsp) + movaps %xmm8,0x20(%rsp) + movaps %xmm9,0x30(%rsp) + movaps %xmm10,-0x78(%rax) + movaps %xmm11,-0x68(%rax) + movaps %xmm12,-0x58(%rax) + movaps %xmm13,-0x48(%rax) + movaps %xmm14,-0x38(%rax) + movaps %xmm15,-0x28(%rax) +___ +$code.=<<___; + sub \$`$REG_SZ*18`,%rsp + shl \$1,$num # we process pair at a time + and \$-256,%rsp + lea 0x80($ctx),$ctx # size optimization + mov %rax,`$REG_SZ*17`(%rsp) # original %rsp +.Lbody_shaext: + lea `$REG_SZ*16`(%rsp),%rbx + lea K256_shaext+0x80(%rip),$Tbl + +.Loop_grande_shaext: + mov $num,`$REG_SZ*17+8`(%rsp) # orignal $num + xor $num,$num +___ +for($i=0;$i<2;$i++) { + $code.=<<___; + mov `16*$i+0`($inp),@ptr[$i] # input pointer + mov `16*$i+8`($inp),%ecx # number of blocks + cmp $num,%ecx + cmovg %ecx,$num # find maximum + test %ecx,%ecx + mov %ecx,`4*$i`(%rbx) # initialize counters + cmovle %rsp,@ptr[$i] # cancel input +___ +} +$code.=<<___; + test $num,$num + jz .Ldone_shaext + + movq 0x00-0x80($ctx),$ABEF0 # A1.A0 + movq 0x20-0x80($ctx),@MSG0[0] # B1.B0 + movq 0x40-0x80($ctx),$CDGH0 # C1.C0 + movq 0x60-0x80($ctx),@MSG0[1] # D1.D0 + movq 0x80-0x80($ctx),@MSG1[0] # E1.E0 + movq 0xa0-0x80($ctx),@MSG1[1] # F1.F0 + movq 0xc0-0x80($ctx),@MSG1[2] # G1.G0 + movq 0xe0-0x80($ctx),@MSG1[3] # H1.H0 + + punpckldq @MSG0[0],$ABEF0 # B1.A1.B0.A0 + punpckldq @MSG0[1],$CDGH0 # D1.C1.D0.C0 + punpckldq @MSG1[1],@MSG1[0] # F1.E1.F0.E0 + punpckldq @MSG1[3],@MSG1[2] # H1.G1.H0.G0 + movdqa K256_shaext-0x10(%rip),$TMPx # byte swap + + movdqa $ABEF0,$ABEF1 + movdqa $CDGH0,$CDGH1 + punpcklqdq @MSG1[0],$ABEF0 # F0.E0.B0.A0 + punpcklqdq @MSG1[2],$CDGH0 # H0.G0.D0.C0 + punpckhqdq @MSG1[0],$ABEF1 # F1.E1.B1.A1 + punpckhqdq @MSG1[2],$CDGH1 # H1.G1.D1.C1 + + pshufd \$0b00011011,$ABEF0,$ABEF0 + pshufd \$0b00011011,$CDGH0,$CDGH0 + pshufd \$0b00011011,$ABEF1,$ABEF1 + pshufd \$0b00011011,$CDGH1,$CDGH1 + jmp .Loop_shaext + +.align 32 +.Loop_shaext: + movdqu 0x00(@ptr[0]),@MSG0[0] + movdqu 0x00(@ptr[1]),@MSG1[0] + movdqu 0x10(@ptr[0]),@MSG0[1] + movdqu 0x10(@ptr[1]),@MSG1[1] + movdqu 0x20(@ptr[0]),@MSG0[2] + pshufb $TMPx,@MSG0[0] + movdqu 0x20(@ptr[1]),@MSG1[2] + pshufb $TMPx,@MSG1[0] + movdqu 0x30(@ptr[0]),@MSG0[3] + lea 0x40(@ptr[0]),@ptr[0] + movdqu 0x30(@ptr[1]),@MSG1[3] + lea 0x40(@ptr[1]),@ptr[1] + + movdqa 0*16-0x80($Tbl),$Wi + pshufb $TMPx,@MSG0[1] + paddd @MSG0[0],$Wi + pxor $ABEF0,@MSG0[0] # black magic + movdqa $Wi,$TMP0 + movdqa 0*16-0x80($Tbl),$TMP1 + pshufb $TMPx,@MSG1[1] + paddd @MSG1[0],$TMP1 + movdqa $CDGH0,0x50(%rsp) # offload + sha256rnds2 $ABEF0,$CDGH0 # 0-3 + pxor $ABEF1,@MSG1[0] # black magic + movdqa $TMP1,$Wi + movdqa $CDGH1,0x70(%rsp) + sha256rnds2 $ABEF1,$CDGH1 # 0-3 + pshufd \$0x0e,$TMP0,$Wi + pxor $ABEF0,@MSG0[0] # black magic + movdqa $ABEF0,0x40(%rsp) # offload + sha256rnds2 $CDGH0,$ABEF0 + pshufd \$0x0e,$TMP1,$Wi + pxor $ABEF1,@MSG1[0] # black magic + movdqa $ABEF1,0x60(%rsp) + movdqa 1*16-0x80($Tbl),$TMP0 + paddd @MSG0[1],$TMP0 + pshufb $TMPx,@MSG0[2] + sha256rnds2 $CDGH1,$ABEF1 + + movdqa $TMP0,$Wi + movdqa 1*16-0x80($Tbl),$TMP1 + paddd @MSG1[1],$TMP1 + sha256rnds2 $ABEF0,$CDGH0 # 4-7 + movdqa $TMP1,$Wi + prefetcht0 127(@ptr[0]) + pshufb $TMPx,@MSG0[3] + pshufb $TMPx,@MSG1[2] + prefetcht0 127(@ptr[1]) + sha256rnds2 $ABEF1,$CDGH1 # 4-7 + pshufd \$0x0e,$TMP0,$Wi + pshufb $TMPx,@MSG1[3] + sha256msg1 @MSG0[1],@MSG0[0] + sha256rnds2 $CDGH0,$ABEF0 + pshufd \$0x0e,$TMP1,$Wi + movdqa 2*16-0x80($Tbl),$TMP0 + paddd @MSG0[2],$TMP0 + sha256rnds2 $CDGH1,$ABEF1 + + movdqa $TMP0,$Wi + movdqa 2*16-0x80($Tbl),$TMP1 + paddd @MSG1[2],$TMP1 + sha256rnds2 $ABEF0,$CDGH0 # 8-11 + sha256msg1 @MSG1[1],@MSG1[0] + movdqa $TMP1,$Wi + movdqa @MSG0[3],$TMPx + sha256rnds2 $ABEF1,$CDGH1 # 8-11 + pshufd \$0x0e,$TMP0,$Wi + palignr \$4,@MSG0[2],$TMPx + paddd $TMPx,@MSG0[0] + movdqa @MSG1[3],$TMPx + palignr \$4,@MSG1[2],$TMPx + sha256msg1 @MSG0[2],@MSG0[1] + sha256rnds2 $CDGH0,$ABEF0 + pshufd \$0x0e,$TMP1,$Wi + movdqa 3*16-0x80($Tbl),$TMP0 + paddd @MSG0[3],$TMP0 + sha256rnds2 $CDGH1,$ABEF1 + sha256msg1 @MSG1[2],@MSG1[1] + + movdqa $TMP0,$Wi + movdqa 3*16-0x80($Tbl),$TMP1 + paddd $TMPx,@MSG1[0] + paddd @MSG1[3],$TMP1 + sha256msg2 @MSG0[3],@MSG0[0] + sha256rnds2 $ABEF0,$CDGH0 # 12-15 + movdqa $TMP1,$Wi + movdqa @MSG0[0],$TMPx + palignr \$4,@MSG0[3],$TMPx + sha256rnds2 $ABEF1,$CDGH1 # 12-15 + sha256msg2 @MSG1[3],@MSG1[0] + pshufd \$0x0e,$TMP0,$Wi + paddd $TMPx,@MSG0[1] + movdqa @MSG1[0],$TMPx + palignr \$4,@MSG1[3],$TMPx + sha256msg1 @MSG0[3],@MSG0[2] + sha256rnds2 $CDGH0,$ABEF0 + pshufd \$0x0e,$TMP1,$Wi + movdqa 4*16-0x80($Tbl),$TMP0 + paddd @MSG0[0],$TMP0 + sha256rnds2 $CDGH1,$ABEF1 + sha256msg1 @MSG1[3],@MSG1[2] +___ +for($i=4;$i<16-3;$i++) { +$code.=<<___; + movdqa $TMP0,$Wi + movdqa $i*16-0x80($Tbl),$TMP1 + paddd $TMPx,@MSG1[1] + paddd @MSG1[0],$TMP1 + sha256msg2 @MSG0[0],@MSG0[1] + sha256rnds2 $ABEF0,$CDGH0 # 16-19... + movdqa $TMP1,$Wi + movdqa @MSG0[1],$TMPx + palignr \$4,@MSG0[0],$TMPx + sha256rnds2 $ABEF1,$CDGH1 # 16-19... + sha256msg2 @MSG1[0],@MSG1[1] + pshufd \$0x0e,$TMP0,$Wi + paddd $TMPx,@MSG0[2] + movdqa @MSG1[1],$TMPx + palignr \$4,@MSG1[0],$TMPx + sha256msg1 @MSG0[0],@MSG0[3] + sha256rnds2 $CDGH0,$ABEF0 + pshufd \$0x0e,$TMP1,$Wi + movdqa `($i+1)*16`-0x80($Tbl),$TMP0 + paddd @MSG0[1],$TMP0 + sha256rnds2 $CDGH1,$ABEF1 + sha256msg1 @MSG1[0],@MSG1[3] +___ + push(@MSG0,shift(@MSG0)); push(@MSG1,shift(@MSG1)); +} +$code.=<<___; + movdqa $TMP0,$Wi + movdqa 13*16-0x80($Tbl),$TMP1 + paddd $TMPx,@MSG1[1] + paddd @MSG1[0],$TMP1 + sha256msg2 @MSG0[0],@MSG0[1] + sha256rnds2 $ABEF0,$CDGH0 # 52-55 + movdqa $TMP1,$Wi + movdqa @MSG0[1],$TMPx + palignr \$4,@MSG0[0],$TMPx + sha256rnds2 $ABEF1,$CDGH1 # 52-55 + sha256msg2 @MSG1[0],@MSG1[1] + pshufd \$0x0e,$TMP0,$Wi + paddd $TMPx,@MSG0[2] + movdqa @MSG1[1],$TMPx + palignr \$4,@MSG1[0],$TMPx + nop + sha256rnds2 $CDGH0,$ABEF0 + pshufd \$0x0e,$TMP1,$Wi + movdqa 14*16-0x80($Tbl),$TMP0 + paddd @MSG0[1],$TMP0 + sha256rnds2 $CDGH1,$ABEF1 + + movdqa $TMP0,$Wi + movdqa 14*16-0x80($Tbl),$TMP1 + paddd $TMPx,@MSG1[2] + paddd @MSG1[1],$TMP1 + sha256msg2 @MSG0[1],@MSG0[2] + nop + sha256rnds2 $ABEF0,$CDGH0 # 56-59 + movdqa $TMP1,$Wi + mov \$1,%ecx + pxor @MSG0[1],@MSG0[1] # zero + sha256rnds2 $ABEF1,$CDGH1 # 56-59 + sha256msg2 @MSG1[1],@MSG1[2] + pshufd \$0x0e,$TMP0,$Wi + movdqa 15*16-0x80($Tbl),$TMP0 + paddd @MSG0[2],$TMP0 + movq (%rbx),@MSG0[2] # pull counters + nop + sha256rnds2 $CDGH0,$ABEF0 + pshufd \$0x0e,$TMP1,$Wi + movdqa 15*16-0x80($Tbl),$TMP1 + paddd @MSG1[2],$TMP1 + sha256rnds2 $CDGH1,$ABEF1 + + movdqa $TMP0,$Wi + cmp 4*0(%rbx),%ecx # examine counters + cmovge %rsp,@ptr[0] # cancel input + cmp 4*1(%rbx),%ecx + cmovge %rsp,@ptr[1] + pshufd \$0x00,@MSG0[2],@MSG1[0] + sha256rnds2 $ABEF0,$CDGH0 # 60-63 + movdqa $TMP1,$Wi + pshufd \$0x55,@MSG0[2],@MSG1[1] + movdqa @MSG0[2],@MSG1[2] + sha256rnds2 $ABEF1,$CDGH1 # 60-63 + pshufd \$0x0e,$TMP0,$Wi + pcmpgtd @MSG0[1],@MSG1[0] + pcmpgtd @MSG0[1],@MSG1[1] + sha256rnds2 $CDGH0,$ABEF0 + pshufd \$0x0e,$TMP1,$Wi + pcmpgtd @MSG0[1],@MSG1[2] # counter mask + movdqa K256_shaext-0x10(%rip),$TMPx + sha256rnds2 $CDGH1,$ABEF1 + + pand @MSG1[0],$CDGH0 + pand @MSG1[1],$CDGH1 + pand @MSG1[0],$ABEF0 + pand @MSG1[1],$ABEF1 + paddd @MSG0[2],@MSG1[2] # counters-- + + paddd 0x50(%rsp),$CDGH0 + paddd 0x70(%rsp),$CDGH1 + paddd 0x40(%rsp),$ABEF0 + paddd 0x60(%rsp),$ABEF1 + + movq @MSG1[2],(%rbx) # save counters + dec $num + jnz .Loop_shaext + + mov `$REG_SZ*17+8`(%rsp),$num + + pshufd \$0b00011011,$ABEF0,$ABEF0 + pshufd \$0b00011011,$CDGH0,$CDGH0 + pshufd \$0b00011011,$ABEF1,$ABEF1 + pshufd \$0b00011011,$CDGH1,$CDGH1 + + movdqa $ABEF0,@MSG0[0] + movdqa $CDGH0,@MSG0[1] + punpckldq $ABEF1,$ABEF0 # B1.B0.A1.A0 + punpckhdq $ABEF1,@MSG0[0] # F1.F0.E1.E0 + punpckldq $CDGH1,$CDGH0 # D1.D0.C1.C0 + punpckhdq $CDGH1,@MSG0[1] # H1.H0.G1.G0 + + movq $ABEF0,0x00-0x80($ctx) # A1.A0 + psrldq \$8,$ABEF0 + movq @MSG0[0],0x80-0x80($ctx) # E1.E0 + psrldq \$8,@MSG0[0] + movq $ABEF0,0x20-0x80($ctx) # B1.B0 + movq @MSG0[0],0xa0-0x80($ctx) # F1.F0 + + movq $CDGH0,0x40-0x80($ctx) # C1.C0 + psrldq \$8,$CDGH0 + movq @MSG0[1],0xc0-0x80($ctx) # G1.G0 + psrldq \$8,@MSG0[1] + movq $CDGH0,0x60-0x80($ctx) # D1.D0 + movq @MSG0[1],0xe0-0x80($ctx) # H1.H0 + + lea `$REG_SZ/2`($ctx),$ctx + lea `16*2`($inp),$inp + dec $num + jnz .Loop_grande_shaext + +.Ldone_shaext: + #mov `$REG_SZ*17`(%rsp),%rax # original %rsp +___ +$code.=<<___ if ($win64); + movaps -0xb8(%rax),%xmm6 + movaps -0xa8(%rax),%xmm7 + movaps -0x98(%rax),%xmm8 + movaps -0x88(%rax),%xmm9 + movaps -0x78(%rax),%xmm10 + movaps -0x68(%rax),%xmm11 + movaps -0x58(%rax),%xmm12 + movaps -0x48(%rax),%xmm13 + movaps -0x38(%rax),%xmm14 + movaps -0x28(%rax),%xmm15 +___ +$code.=<<___; + mov -16(%rax),%rbp + mov -8(%rax),%rbx + lea (%rax),%rsp +.Lepilogue_shaext: + ret +.size sha256_multi_block_shaext,.-sha256_multi_block_shaext +___ + }}} + if ($avx) {{{ +sub ROUND_00_15_avx { +my ($i,$a,$b,$c,$d,$e,$f,$g,$h)=@_; + +$code.=<<___ if ($i<15 && $REG_SZ==16); + vmovd `4*$i`(@ptr[0]),$Xi + vmovd `4*$i`(@ptr[1]),$t1 + vpinsrd \$1,`4*$i`(@ptr[2]),$Xi,$Xi + vpinsrd \$1,`4*$i`(@ptr[3]),$t1,$t1 + vpunpckldq $t1,$Xi,$Xi + vpshufb $Xn,$Xi,$Xi +___ +$code.=<<___ if ($i==15 && $REG_SZ==16); + vmovd `4*$i`(@ptr[0]),$Xi + lea `16*4`(@ptr[0]),@ptr[0] + vmovd `4*$i`(@ptr[1]),$t1 + lea `16*4`(@ptr[1]),@ptr[1] + vpinsrd \$1,`4*$i`(@ptr[2]),$Xi,$Xi + lea `16*4`(@ptr[2]),@ptr[2] + vpinsrd \$1,`4*$i`(@ptr[3]),$t1,$t1 + lea `16*4`(@ptr[3]),@ptr[3] + vpunpckldq $t1,$Xi,$Xi + vpshufb $Xn,$Xi,$Xi +___ +$code.=<<___ if ($i<15 && $REG_SZ==32); + vmovd `4*$i`(@ptr[0]),$Xi + vmovd `4*$i`(@ptr[4]),$t1 + vmovd `4*$i`(@ptr[1]),$t2 + vmovd `4*$i`(@ptr[5]),$t3 + vpinsrd \$1,`4*$i`(@ptr[2]),$Xi,$Xi + vpinsrd \$1,`4*$i`(@ptr[6]),$t1,$t1 + vpinsrd \$1,`4*$i`(@ptr[3]),$t2,$t2 + vpunpckldq $t2,$Xi,$Xi + vpinsrd \$1,`4*$i`(@ptr[7]),$t3,$t3 + vpunpckldq $t3,$t1,$t1 + vinserti128 $t1,$Xi,$Xi + vpshufb $Xn,$Xi,$Xi +___ +$code.=<<___ if ($i==15 && $REG_SZ==32); + vmovd `4*$i`(@ptr[0]),$Xi + lea `16*4`(@ptr[0]),@ptr[0] + vmovd `4*$i`(@ptr[4]),$t1 + lea `16*4`(@ptr[4]),@ptr[4] + vmovd `4*$i`(@ptr[1]),$t2 + lea `16*4`(@ptr[1]),@ptr[1] + vmovd `4*$i`(@ptr[5]),$t3 + lea `16*4`(@ptr[5]),@ptr[5] + vpinsrd \$1,`4*$i`(@ptr[2]),$Xi,$Xi + lea `16*4`(@ptr[2]),@ptr[2] + vpinsrd \$1,`4*$i`(@ptr[6]),$t1,$t1 + lea `16*4`(@ptr[6]),@ptr[6] + vpinsrd \$1,`4*$i`(@ptr[3]),$t2,$t2 + lea `16*4`(@ptr[3]),@ptr[3] + vpunpckldq $t2,$Xi,$Xi + vpinsrd \$1,`4*$i`(@ptr[7]),$t3,$t3 + lea `16*4`(@ptr[7]),@ptr[7] + vpunpckldq $t3,$t1,$t1 + vinserti128 $t1,$Xi,$Xi + vpshufb $Xn,$Xi,$Xi +___ +$code.=<<___; + vpsrld \$6,$e,$sigma + vpslld \$26,$e,$t3 + vmovdqu $Xi,`&Xi_off($i)` + vpaddd $h,$Xi,$Xi # Xi+=h + + vpsrld \$11,$e,$t2 + vpxor $t3,$sigma,$sigma + vpslld \$21,$e,$t3 + vpaddd `32*($i%8)-128`($Tbl),$Xi,$Xi # Xi+=K[round] + vpxor $t2,$sigma,$sigma + + vpsrld \$25,$e,$t2 + vpxor $t3,$sigma,$sigma + `"prefetcht0 63(@ptr[0])" if ($i==15)` + vpslld \$7,$e,$t3 + vpandn $g,$e,$t1 + vpand $f,$e,$axb # borrow $axb + `"prefetcht0 63(@ptr[1])" if ($i==15)` + vpxor $t2,$sigma,$sigma + + vpsrld \$2,$a,$h # borrow $h + vpxor $t3,$sigma,$sigma # Sigma1(e) + `"prefetcht0 63(@ptr[2])" if ($i==15)` + vpslld \$30,$a,$t2 + vpxor $axb,$t1,$t1 # Ch(e,f,g) + vpxor $a,$b,$axb # a^b, b^c in next round + `"prefetcht0 63(@ptr[3])" if ($i==15)` + vpxor $t2,$h,$h + vpaddd $sigma,$Xi,$Xi # Xi+=Sigma1(e) + + vpsrld \$13,$a,$t2 + `"prefetcht0 63(@ptr[4])" if ($i==15 && $REG_SZ==32)` + vpslld \$19,$a,$t3 + vpaddd $t1,$Xi,$Xi # Xi+=Ch(e,f,g) + vpand $axb,$bxc,$bxc + `"prefetcht0 63(@ptr[5])" if ($i==15 && $REG_SZ==32)` + vpxor $t2,$h,$sigma + + vpsrld \$22,$a,$t2 + vpxor $t3,$sigma,$sigma + `"prefetcht0 63(@ptr[6])" if ($i==15 && $REG_SZ==32)` + vpslld \$10,$a,$t3 + vpxor $bxc,$b,$h # h=Maj(a,b,c)=Ch(a^b,c,b) + vpaddd $Xi,$d,$d # d+=Xi + `"prefetcht0 63(@ptr[7])" if ($i==15 && $REG_SZ==32)` + vpxor $t2,$sigma,$sigma + vpxor $t3,$sigma,$sigma # Sigma0(a) + + vpaddd $Xi,$h,$h # h+=Xi + vpaddd $sigma,$h,$h # h+=Sigma0(a) +___ +$code.=<<___ if (($i%8)==7); + add \$`32*8`,$Tbl +___ + ($axb,$bxc)=($bxc,$axb); +} + +sub ROUND_16_XX_avx { +my $i=shift; + +$code.=<<___; + vmovdqu `&Xi_off($i+1)`,$Xn + vpaddd `&Xi_off($i+9)`,$Xi,$Xi # Xi+=X[i+9] + + vpsrld \$3,$Xn,$sigma + vpsrld \$7,$Xn,$t2 + vpslld \$25,$Xn,$t3 + vpxor $t2,$sigma,$sigma + vpsrld \$18,$Xn,$t2 + vpxor $t3,$sigma,$sigma + vpslld \$14,$Xn,$t3 + vmovdqu `&Xi_off($i+14)`,$t1 + vpsrld \$10,$t1,$axb # borrow $axb + + vpxor $t2,$sigma,$sigma + vpsrld \$17,$t1,$t2 + vpxor $t3,$sigma,$sigma # sigma0(X[i+1]) + vpslld \$15,$t1,$t3 + vpaddd $sigma,$Xi,$Xi # Xi+=sigma0(e) + vpxor $t2,$axb,$sigma + vpsrld \$19,$t1,$t2 + vpxor $t3,$sigma,$sigma + vpslld \$13,$t1,$t3 + vpxor $t2,$sigma,$sigma + vpxor $t3,$sigma,$sigma # sigma0(X[i+14]) + vpaddd $sigma,$Xi,$Xi # Xi+=sigma1(X[i+14]) +___ + &ROUND_00_15_avx($i,@_); + ($Xi,$Xn)=($Xn,$Xi); +} + +$code.=<<___; +.type sha256_multi_block_avx,\@function,3 +.align 32 +sha256_multi_block_avx: +_avx_shortcut: +___ +$code.=<<___ if ($avx>1); + shr \$32,%rcx + cmp \$2,$num + jb .Lavx + test \$`1<<5`,%ecx + jnz _avx2_shortcut + jmp .Lavx +.align 32 +.Lavx: +___ +$code.=<<___; + mov %rsp,%rax + push %rbx + push %rbp +___ +$code.=<<___ if ($win64); + lea -0xa8(%rsp),%rsp + movaps %xmm6,(%rsp) + movaps %xmm7,0x10(%rsp) + movaps %xmm8,0x20(%rsp) + movaps %xmm9,0x30(%rsp) + movaps %xmm10,-0x78(%rax) + movaps %xmm11,-0x68(%rax) + movaps %xmm12,-0x58(%rax) + movaps %xmm13,-0x48(%rax) + movaps %xmm14,-0x38(%rax) + movaps %xmm15,-0x28(%rax) +___ +$code.=<<___; + sub \$`$REG_SZ*18`, %rsp + and \$-256,%rsp + mov %rax,`$REG_SZ*17`(%rsp) # original %rsp +.Lbody_avx: + lea K256+128(%rip),$Tbl + lea `$REG_SZ*16`(%rsp),%rbx + lea 0x80($ctx),$ctx # size optimization + +.Loop_grande_avx: + mov $num,`$REG_SZ*17+8`(%rsp) # original $num + xor $num,$num +___ +for($i=0;$i<4;$i++) { + $code.=<<___; + mov `16*$i+0`($inp),@ptr[$i] # input pointer + mov `16*$i+8`($inp),%ecx # number of blocks + cmp $num,%ecx + cmovg %ecx,$num # find maximum + test %ecx,%ecx + mov %ecx,`4*$i`(%rbx) # initialize counters + cmovle $Tbl,@ptr[$i] # cancel input +___ +} +$code.=<<___; + test $num,$num + jz .Ldone_avx + + vmovdqu 0x00-0x80($ctx),$A # load context + lea 128(%rsp),%rax + vmovdqu 0x20-0x80($ctx),$B + vmovdqu 0x40-0x80($ctx),$C + vmovdqu 0x60-0x80($ctx),$D + vmovdqu 0x80-0x80($ctx),$E + vmovdqu 0xa0-0x80($ctx),$F + vmovdqu 0xc0-0x80($ctx),$G + vmovdqu 0xe0-0x80($ctx),$H + vmovdqu .Lpbswap(%rip),$Xn + jmp .Loop_avx + +.align 32 +.Loop_avx: + vpxor $B,$C,$bxc # magic seed +___ +for($i=0;$i<16;$i++) { &ROUND_00_15_avx($i,@V); unshift(@V,pop(@V)); } +$code.=<<___; + vmovdqu `&Xi_off($i)`,$Xi + mov \$3,%ecx + jmp .Loop_16_xx_avx +.align 32 +.Loop_16_xx_avx: +___ +for(;$i<32;$i++) { &ROUND_16_XX_avx($i,@V); unshift(@V,pop(@V)); } +$code.=<<___; + dec %ecx + jnz .Loop_16_xx_avx + + mov \$1,%ecx + lea K256+128(%rip),$Tbl +___ +for($i=0;$i<4;$i++) { + $code.=<<___; + cmp `4*$i`(%rbx),%ecx # examine counters + cmovge $Tbl,@ptr[$i] # cancel input +___ +} +$code.=<<___; + vmovdqa (%rbx),$sigma # pull counters + vpxor $t1,$t1,$t1 + vmovdqa $sigma,$Xn + vpcmpgtd $t1,$Xn,$Xn # mask value + vpaddd $Xn,$sigma,$sigma # counters-- + + vmovdqu 0x00-0x80($ctx),$t1 + vpand $Xn,$A,$A + vmovdqu 0x20-0x80($ctx),$t2 + vpand $Xn,$B,$B + vmovdqu 0x40-0x80($ctx),$t3 + vpand $Xn,$C,$C + vmovdqu 0x60-0x80($ctx),$Xi + vpand $Xn,$D,$D + vpaddd $t1,$A,$A + vmovdqu 0x80-0x80($ctx),$t1 + vpand $Xn,$E,$E + vpaddd $t2,$B,$B + vmovdqu 0xa0-0x80($ctx),$t2 + vpand $Xn,$F,$F + vpaddd $t3,$C,$C + vmovdqu 0xc0-0x80($ctx),$t3 + vpand $Xn,$G,$G + vpaddd $Xi,$D,$D + vmovdqu 0xe0-0x80($ctx),$Xi + vpand $Xn,$H,$H + vpaddd $t1,$E,$E + vpaddd $t2,$F,$F + vmovdqu $A,0x00-0x80($ctx) + vpaddd $t3,$G,$G + vmovdqu $B,0x20-0x80($ctx) + vpaddd $Xi,$H,$H + vmovdqu $C,0x40-0x80($ctx) + vmovdqu $D,0x60-0x80($ctx) + vmovdqu $E,0x80-0x80($ctx) + vmovdqu $F,0xa0-0x80($ctx) + vmovdqu $G,0xc0-0x80($ctx) + vmovdqu $H,0xe0-0x80($ctx) + + vmovdqu $sigma,(%rbx) # save counters + vmovdqu .Lpbswap(%rip),$Xn + dec $num + jnz .Loop_avx + + mov `$REG_SZ*17+8`(%rsp),$num + lea $REG_SZ($ctx),$ctx + lea `16*$REG_SZ/4`($inp),$inp + dec $num + jnz .Loop_grande_avx + +.Ldone_avx: + mov `$REG_SZ*17`(%rsp),%rax # orignal %rsp + vzeroupper +___ +$code.=<<___ if ($win64); + movaps -0xb8(%rax),%xmm6 + movaps -0xa8(%rax),%xmm7 + movaps -0x98(%rax),%xmm8 + movaps -0x88(%rax),%xmm9 + movaps -0x78(%rax),%xmm10 + movaps -0x68(%rax),%xmm11 + movaps -0x58(%rax),%xmm12 + movaps -0x48(%rax),%xmm13 + movaps -0x38(%rax),%xmm14 + movaps -0x28(%rax),%xmm15 +___ +$code.=<<___; + mov -16(%rax),%rbp + mov -8(%rax),%rbx + lea (%rax),%rsp +.Lepilogue_avx: + ret +.size sha256_multi_block_avx,.-sha256_multi_block_avx +___ + if ($avx>1) { +$code =~ s/\`([^\`]*)\`/eval $1/gem; + +$REG_SZ=32; +@ptr=map("%r$_",(12..15,8..11)); + +@V=($A,$B,$C,$D,$E,$F,$G,$H)=map("%ymm$_",(8..15)); +($t1,$t2,$t3,$axb,$bxc,$Xi,$Xn,$sigma)=map("%ymm$_",(0..7)); + +$code.=<<___; +.type sha256_multi_block_avx2,\@function,3 +.align 32 +sha256_multi_block_avx2: +_avx2_shortcut: + mov %rsp,%rax + push %rbx + push %rbp + push %r12 + push %r13 + push %r14 + push %r15 +___ +$code.=<<___ if ($win64); + lea -0xa8(%rsp),%rsp + movaps %xmm6,(%rsp) + movaps %xmm7,0x10(%rsp) + movaps %xmm8,0x20(%rsp) + movaps %xmm9,0x30(%rsp) + movaps %xmm10,0x40(%rsp) + movaps %xmm11,0x50(%rsp) + movaps %xmm12,-0x78(%rax) + movaps %xmm13,-0x68(%rax) + movaps %xmm14,-0x58(%rax) + movaps %xmm15,-0x48(%rax) +___ +$code.=<<___; + sub \$`$REG_SZ*18`, %rsp + and \$-256,%rsp + mov %rax,`$REG_SZ*17`(%rsp) # original %rsp +.Lbody_avx2: + lea K256+128(%rip),$Tbl + lea 0x80($ctx),$ctx # size optimization + +.Loop_grande_avx2: + mov $num,`$REG_SZ*17+8`(%rsp) # original $num + xor $num,$num + lea `$REG_SZ*16`(%rsp),%rbx +___ +for($i=0;$i<8;$i++) { + $code.=<<___; + mov `16*$i+0`($inp),@ptr[$i] # input pointer + mov `16*$i+8`($inp),%ecx # number of blocks + cmp $num,%ecx + cmovg %ecx,$num # find maximum + test %ecx,%ecx + mov %ecx,`4*$i`(%rbx) # initialize counters + cmovle $Tbl,@ptr[$i] # cancel input +___ +} +$code.=<<___; + vmovdqu 0x00-0x80($ctx),$A # load context + lea 128(%rsp),%rax + vmovdqu 0x20-0x80($ctx),$B + lea 256+128(%rsp),%rbx + vmovdqu 0x40-0x80($ctx),$C + vmovdqu 0x60-0x80($ctx),$D + vmovdqu 0x80-0x80($ctx),$E + vmovdqu 0xa0-0x80($ctx),$F + vmovdqu 0xc0-0x80($ctx),$G + vmovdqu 0xe0-0x80($ctx),$H + vmovdqu .Lpbswap(%rip),$Xn + jmp .Loop_avx2 + +.align 32 +.Loop_avx2: + vpxor $B,$C,$bxc # magic seed +___ +for($i=0;$i<16;$i++) { &ROUND_00_15_avx($i,@V); unshift(@V,pop(@V)); } +$code.=<<___; + vmovdqu `&Xi_off($i)`,$Xi + mov \$3,%ecx + jmp .Loop_16_xx_avx2 +.align 32 +.Loop_16_xx_avx2: +___ +for(;$i<32;$i++) { &ROUND_16_XX_avx($i,@V); unshift(@V,pop(@V)); } +$code.=<<___; + dec %ecx + jnz .Loop_16_xx_avx2 + + mov \$1,%ecx + lea `$REG_SZ*16`(%rsp),%rbx + lea K256+128(%rip),$Tbl +___ +for($i=0;$i<8;$i++) { + $code.=<<___; + cmp `4*$i`(%rbx),%ecx # examine counters + cmovge $Tbl,@ptr[$i] # cancel input +___ +} +$code.=<<___; + vmovdqa (%rbx),$sigma # pull counters + vpxor $t1,$t1,$t1 + vmovdqa $sigma,$Xn + vpcmpgtd $t1,$Xn,$Xn # mask value + vpaddd $Xn,$sigma,$sigma # counters-- + + vmovdqu 0x00-0x80($ctx),$t1 + vpand $Xn,$A,$A + vmovdqu 0x20-0x80($ctx),$t2 + vpand $Xn,$B,$B + vmovdqu 0x40-0x80($ctx),$t3 + vpand $Xn,$C,$C + vmovdqu 0x60-0x80($ctx),$Xi + vpand $Xn,$D,$D + vpaddd $t1,$A,$A + vmovdqu 0x80-0x80($ctx),$t1 + vpand $Xn,$E,$E + vpaddd $t2,$B,$B + vmovdqu 0xa0-0x80($ctx),$t2 + vpand $Xn,$F,$F + vpaddd $t3,$C,$C + vmovdqu 0xc0-0x80($ctx),$t3 + vpand $Xn,$G,$G + vpaddd $Xi,$D,$D + vmovdqu 0xe0-0x80($ctx),$Xi + vpand $Xn,$H,$H + vpaddd $t1,$E,$E + vpaddd $t2,$F,$F + vmovdqu $A,0x00-0x80($ctx) + vpaddd $t3,$G,$G + vmovdqu $B,0x20-0x80($ctx) + vpaddd $Xi,$H,$H + vmovdqu $C,0x40-0x80($ctx) + vmovdqu $D,0x60-0x80($ctx) + vmovdqu $E,0x80-0x80($ctx) + vmovdqu $F,0xa0-0x80($ctx) + vmovdqu $G,0xc0-0x80($ctx) + vmovdqu $H,0xe0-0x80($ctx) + + vmovdqu $sigma,(%rbx) # save counters + lea 256+128(%rsp),%rbx + vmovdqu .Lpbswap(%rip),$Xn + dec $num + jnz .Loop_avx2 + + #mov `$REG_SZ*17+8`(%rsp),$num + #lea $REG_SZ($ctx),$ctx + #lea `16*$REG_SZ/4`($inp),$inp + #dec $num + #jnz .Loop_grande_avx2 + +.Ldone_avx2: + mov `$REG_SZ*17`(%rsp),%rax # orignal %rsp + vzeroupper +___ +$code.=<<___ if ($win64); + movaps -0xd8(%rax),%xmm6 + movaps -0xc8(%rax),%xmm7 + movaps -0xb8(%rax),%xmm8 + movaps -0xa8(%rax),%xmm9 + movaps -0x98(%rax),%xmm10 + movaps -0x88(%rax),%xmm11 + movaps -0x78(%rax),%xmm12 + movaps -0x68(%rax),%xmm13 + movaps -0x58(%rax),%xmm14 + movaps -0x48(%rax),%xmm15 +___ +$code.=<<___; + mov -48(%rax),%r15 + mov -40(%rax),%r14 + mov -32(%rax),%r13 + mov -24(%rax),%r12 + mov -16(%rax),%rbp + mov -8(%rax),%rbx + lea (%rax),%rsp +.Lepilogue_avx2: + ret +.size sha256_multi_block_avx2,.-sha256_multi_block_avx2 +___ + } }}} +$code.=<<___; +.align 256 +K256: +___ +sub TABLE { + foreach (@_) { + $code.=<<___; + .long $_,$_,$_,$_ + .long $_,$_,$_,$_ +___ + } +} +&TABLE( 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5, + 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5, + 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3, + 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174, + 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc, + 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da, + 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7, + 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967, + 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13, + 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85, + 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3, + 0xd192e819,0xd6990624,0xf40e3585,0x106aa070, + 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5, + 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3, + 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208, + 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 ); +$code.=<<___; +.Lpbswap: + .long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f # pbswap + .long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f # pbswap +K256_shaext: + .long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 + .long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 + .long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 + .long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 + .long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc + .long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da + .long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 + .long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967 + .long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 + .long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 + .long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 + .long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070 + .long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 + .long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 + .long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 + .long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 + .asciz "SHA256 multi-block transform for x86_64, CRYPTOGAMS by " +___ + +if ($win64) { +# EXCEPTION_DISPOSITION handler (EXCEPTION_RECORD *rec,ULONG64 frame, +# CONTEXT *context,DISPATCHER_CONTEXT *disp) +$rec="%rcx"; +$frame="%rdx"; +$context="%r8"; +$disp="%r9"; + +$code.=<<___; +.extern __imp_RtlVirtualUnwind +.type se_handler,\@abi-omnipotent +.align 16 +se_handler: + push %rsi + push %rdi + push %rbx + push %rbp + push %r12 + push %r13 + push %r14 + push %r15 + pushfq + sub \$64,%rsp + + mov 120($context),%rax # pull context->Rax + mov 248($context),%rbx # pull context->Rip + + mov 8($disp),%rsi # disp->ImageBase + mov 56($disp),%r11 # disp->HandlerData + + mov 0(%r11),%r10d # HandlerData[0] + lea (%rsi,%r10),%r10 # end of prologue label + cmp %r10,%rbx # context->Rip<.Lbody + jb .Lin_prologue + + mov 152($context),%rax # pull context->Rsp + + mov 4(%r11),%r10d # HandlerData[1] + lea (%rsi,%r10),%r10 # epilogue label + cmp %r10,%rbx # context->Rip>=.Lepilogue + jae .Lin_prologue + + mov `16*17`(%rax),%rax # pull saved stack pointer + + mov -8(%rax),%rbx + mov -16(%rax),%rbp + mov %rbx,144($context) # restore context->Rbx + mov %rbp,160($context) # restore context->Rbp + + lea -24-10*16(%rax),%rsi + lea 512($context),%rdi # &context.Xmm6 + mov \$20,%ecx + .long 0xa548f3fc # cld; rep movsq + +.Lin_prologue: + mov 8(%rax),%rdi + mov 16(%rax),%rsi + mov %rax,152($context) # restore context->Rsp + mov %rsi,168($context) # restore context->Rsi + mov %rdi,176($context) # restore context->Rdi + + mov 40($disp),%rdi # disp->ContextRecord + mov $context,%rsi # context + mov \$154,%ecx # sizeof(CONTEXT) + .long 0xa548f3fc # cld; rep movsq + + mov $disp,%rsi + xor %rcx,%rcx # arg1, UNW_FLAG_NHANDLER + mov 8(%rsi),%rdx # arg2, disp->ImageBase + mov 0(%rsi),%r8 # arg3, disp->ControlPc + mov 16(%rsi),%r9 # arg4, disp->FunctionEntry + mov 40(%rsi),%r10 # disp->ContextRecord + lea 56(%rsi),%r11 # &disp->HandlerData + lea 24(%rsi),%r12 # &disp->EstablisherFrame + mov %r10,32(%rsp) # arg5 + mov %r11,40(%rsp) # arg6 + mov %r12,48(%rsp) # arg7 + mov %rcx,56(%rsp) # arg8, (NULL) + call *__imp_RtlVirtualUnwind(%rip) + + mov \$1,%eax # ExceptionContinueSearch + add \$64,%rsp + popfq + pop %r15 + pop %r14 + pop %r13 + pop %r12 + pop %rbp + pop %rbx + pop %rdi + pop %rsi + ret +.size se_handler,.-se_handler +___ +$code.=<<___ if ($avx>1); +.type avx2_handler,\@abi-omnipotent +.align 16 +avx2_handler: + push %rsi + push %rdi + push %rbx + push %rbp + push %r12 + push %r13 + push %r14 + push %r15 + pushfq + sub \$64,%rsp + + mov 120($context),%rax # pull context->Rax + mov 248($context),%rbx # pull context->Rip + + mov 8($disp),%rsi # disp->ImageBase + mov 56($disp),%r11 # disp->HandlerData + + mov 0(%r11),%r10d # HandlerData[0] + lea (%rsi,%r10),%r10 # end of prologue label + cmp %r10,%rbx # context->RipRsp + + mov 4(%r11),%r10d # HandlerData[1] + lea (%rsi,%r10),%r10 # epilogue label + cmp %r10,%rbx # context->Rip>=epilogue label + jae .Lin_prologue + + mov `32*17`($context),%rax # pull saved stack pointer + + mov -8(%rax),%rbx + mov -16(%rax),%rbp + mov -24(%rax),%r12 + mov -32(%rax),%r13 + mov -40(%rax),%r14 + mov -48(%rax),%r15 + mov %rbx,144($context) # restore context->Rbx + mov %rbp,160($context) # restore context->Rbp + mov %r12,216($context) # restore cotnext->R12 + mov %r13,224($context) # restore cotnext->R13 + mov %r14,232($context) # restore cotnext->R14 + mov %r15,240($context) # restore cotnext->R15 + + lea -56-10*16(%rax),%rsi + lea 512($context),%rdi # &context.Xmm6 + mov \$20,%ecx + .long 0xa548f3fc # cld; rep movsq + + jmp .Lin_prologue +.size avx2_handler,.-avx2_handler +___ +$code.=<<___; +.section .pdata +.align 4 + .rva .LSEH_begin_sha256_multi_block + .rva .LSEH_end_sha256_multi_block + .rva .LSEH_info_sha256_multi_block + .rva .LSEH_begin_sha256_multi_block_shaext + .rva .LSEH_end_sha256_multi_block_shaext + .rva .LSEH_info_sha256_multi_block_shaext +___ +$code.=<<___ if ($avx); + .rva .LSEH_begin_sha256_multi_block_avx + .rva .LSEH_end_sha256_multi_block_avx + .rva .LSEH_info_sha256_multi_block_avx +___ +$code.=<<___ if ($avx>1); + .rva .LSEH_begin_sha256_multi_block_avx2 + .rva .LSEH_end_sha256_multi_block_avx2 + .rva .LSEH_info_sha256_multi_block_avx2 +___ +$code.=<<___; +.section .xdata +.align 8 +.LSEH_info_sha256_multi_block: + .byte 9,0,0,0 + .rva se_handler + .rva .Lbody,.Lepilogue # HandlerData[] +.LSEH_info_sha256_multi_block_shaext: + .byte 9,0,0,0 + .rva se_handler + .rva .Lbody_shaext,.Lepilogue_shaext # HandlerData[] +___ +$code.=<<___ if ($avx); +.LSEH_info_sha256_multi_block_avx: + .byte 9,0,0,0 + .rva se_handler + .rva .Lbody_avx,.Lepilogue_avx # HandlerData[] +___ +$code.=<<___ if ($avx>1); +.LSEH_info_sha256_multi_block_avx2: + .byte 9,0,0,0 + .rva avx2_handler + .rva .Lbody_avx2,.Lepilogue_avx2 # HandlerData[] +___ +} +#################################################################### + +sub rex { + local *opcode=shift; + my ($dst,$src)=@_; + my $rex=0; + + $rex|=0x04 if ($dst>=8); + $rex|=0x01 if ($src>=8); + unshift @opcode,$rex|0x40 if ($rex); +} + +sub sha256op38 { + my $instr = shift; + my %opcodelet = ( + "sha256rnds2" => 0xcb, + "sha256msg1" => 0xcc, + "sha256msg2" => 0xcd ); + + if (defined($opcodelet{$instr}) && @_[0] =~ /%xmm([0-9]+),\s*%xmm([0-9]+)/) { + my @opcode=(0x0f,0x38); + rex(\@opcode,$2,$1); + push @opcode,$opcodelet{$instr}; + push @opcode,0xc0|($1&7)|(($2&7)<<3); # ModR/M + return ".byte\t".join(',',@opcode); + } else { + return $instr."\t".@_[0]; + } +} + +foreach (split("\n",$code)) { + s/\`([^\`]*)\`/eval($1)/ge; + + s/\b(sha256[^\s]*)\s+(.*)/sha256op38($1,$2)/geo or + + s/\b(vmov[dq])\b(.+)%ymm([0-9]+)/$1$2%xmm$3/go or + s/\b(vmovdqu)\b(.+)%x%ymm([0-9]+)/$1$2%xmm$3/go or + s/\b(vpinsr[qd])\b(.+)%ymm([0-9]+),%ymm([0-9]+)/$1$2%xmm$3,%xmm$4/go or + s/\b(vpextr[qd])\b(.+)%ymm([0-9]+)/$1$2%xmm$3/go or + s/\b(vinserti128)\b(\s+)%ymm/$1$2\$1,%xmm/go or + s/\b(vpbroadcast[qd]\s+)%ymm([0-9]+)/$1%xmm$2/go; + + print $_,"\n"; +} + +close STDOUT; diff --git a/deps/openssl/openssl/crypto/sha/asm/sha256-x86_64.pl b/deps/openssl/openssl/crypto/sha/asm/sha256-x86_64.pl deleted file mode 100755 index 205a8e6f9a04f1..00000000000000 --- a/deps/openssl/openssl/crypto/sha/asm/sha256-x86_64.pl +++ /dev/null @@ -1,438 +0,0 @@ -#!/usr/bin/env perl -# -# ==================================================================== -# Written by Andy Polyakov for the OpenSSL -# project. Rights for redistribution and usage in source and binary -# forms are granted according to the OpenSSL license. -# ==================================================================== -# -# sha256/512_block procedure for x86_64. -# -# 40% improvement over compiler-generated code on Opteron. On EM64T -# sha256 was observed to run >80% faster and sha512 - >40%. No magical -# tricks, just straight implementation... I really wonder why gcc -# [being armed with inline assembler] fails to generate as fast code. -# The only thing which is cool about this module is that it's very -# same instruction sequence used for both SHA-256 and SHA-512. In -# former case the instructions operate on 32-bit operands, while in -# latter - on 64-bit ones. All I had to do is to get one flavor right, -# the other one passed the test right away:-) -# -# sha256_block runs in ~1005 cycles on Opteron, which gives you -# asymptotic performance of 64*1000/1005=63.7MBps times CPU clock -# frequency in GHz. sha512_block runs in ~1275 cycles, which results -# in 128*1000/1275=100MBps per GHz. Is there room for improvement? -# Well, if you compare it to IA-64 implementation, which maintains -# X[16] in register bank[!], tends to 4 instructions per CPU clock -# cycle and runs in 1003 cycles, 1275 is very good result for 3-way -# issue Opteron pipeline and X[16] maintained in memory. So that *if* -# there is a way to improve it, *then* the only way would be to try to -# offload X[16] updates to SSE unit, but that would require "deeper" -# loop unroll, which in turn would naturally cause size blow-up, not -# to mention increased complexity! And once again, only *if* it's -# actually possible to noticeably improve overall ILP, instruction -# level parallelism, on a given CPU implementation in this case. -# -# Special note on Intel EM64T. While Opteron CPU exhibits perfect -# perfromance ratio of 1.5 between 64- and 32-bit flavors [see above], -# [currently available] EM64T CPUs apparently are far from it. On the -# contrary, 64-bit version, sha512_block, is ~30% *slower* than 32-bit -# sha256_block:-( This is presumably because 64-bit shifts/rotates -# apparently are not atomic instructions, but implemented in microcode. - -$flavour = shift; -$output = shift; -if ($flavour =~ /\./) { $output = $flavour; undef $flavour; } - -$win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/); - -$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; -( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or -( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or -die "can't locate x86_64-xlate.pl"; - -open OUT,"| \"$^X\" $xlate $flavour $output"; -*STDOUT=*OUT; - -$func="sha256_block_data_order"; -$TABLE="K256"; -$SZ=4; -@ROT=($A,$B,$C,$D,$E,$F,$G,$H)=("%eax","%ebx","%ecx","%edx", - "%r8d","%r9d","%r10d","%r11d"); -($T1,$a0,$a1,$a2)=("%r12d","%r13d","%r14d","%r15d"); -@Sigma0=( 2,13,22); -@Sigma1=( 6,11,25); -@sigma0=( 7,18, 3); -@sigma1=(17,19,10); -$rounds=64; - - -$ctx="%rdi"; # 1st arg -$round="%rdi"; # zaps $ctx -$inp="%rsi"; # 2nd arg -$Tbl="%rbp"; - -$_ctx="16*$SZ+0*8(%rsp)"; -$_inp="16*$SZ+1*8(%rsp)"; -$_end="16*$SZ+2*8(%rsp)"; -$_rsp="16*$SZ+3*8(%rsp)"; -$framesz="16*$SZ+4*8"; - - -sub ROUND_00_15() -{ my ($i,$a,$b,$c,$d,$e,$f,$g,$h) = @_; - -$code.=<<___; - ror \$`$Sigma1[2]-$Sigma1[1]`,$a0 - mov $f,$a2 - mov $T1,`$SZ*($i&0xf)`(%rsp) - - ror \$`$Sigma0[2]-$Sigma0[1]`,$a1 - xor $e,$a0 - xor $g,$a2 # f^g - - ror \$`$Sigma1[1]-$Sigma1[0]`,$a0 - add $h,$T1 # T1+=h - xor $a,$a1 - - add ($Tbl,$round,$SZ),$T1 # T1+=K[round] - and $e,$a2 # (f^g)&e - mov $b,$h - - ror \$`$Sigma0[1]-$Sigma0[0]`,$a1 - xor $e,$a0 - xor $g,$a2 # Ch(e,f,g)=((f^g)&e)^g - - xor $c,$h # b^c - xor $a,$a1 - add $a2,$T1 # T1+=Ch(e,f,g) - mov $b,$a2 - - ror \$$Sigma1[0],$a0 # Sigma1(e) - and $a,$h # h=(b^c)&a - and $c,$a2 # b&c - - ror \$$Sigma0[0],$a1 # Sigma0(a) - add $a0,$T1 # T1+=Sigma1(e) - add $a2,$h # h+=b&c (completes +=Maj(a,b,c) - - add $T1,$d # d+=T1 - add $T1,$h # h+=T1 - lea 1($round),$round # round++ - add $a1,$h # h+=Sigma0(a) - -___ -} - -sub ROUND_16_XX() -{ my ($i,$a,$b,$c,$d,$e,$f,$g,$h) = @_; - -$code.=<<___; - mov `$SZ*(($i+1)&0xf)`(%rsp),$a0 - mov `$SZ*(($i+14)&0xf)`(%rsp),$a1 - mov $a0,$T1 - mov $a1,$a2 - - ror \$`$sigma0[1]-$sigma0[0]`,$T1 - xor $a0,$T1 - shr \$$sigma0[2],$a0 - - ror \$$sigma0[0],$T1 - xor $T1,$a0 # sigma0(X[(i+1)&0xf]) - mov `$SZ*(($i+9)&0xf)`(%rsp),$T1 - - ror \$`$sigma1[1]-$sigma1[0]`,$a2 - xor $a1,$a2 - shr \$$sigma1[2],$a1 - - ror \$$sigma1[0],$a2 - add $a0,$T1 - xor $a2,$a1 # sigma1(X[(i+14)&0xf]) - - add `$SZ*($i&0xf)`(%rsp),$T1 - mov $e,$a0 - add $a1,$T1 - mov $a,$a1 -___ - &ROUND_00_15(@_); -} - -$code=<<___; -.text - -.globl $func -.type $func,\@function,4 -.align 16 -$func: - push %rbx - push %rbp - push %r12 - push %r13 - push %r14 - push %r15 - mov %rsp,%r11 # copy %rsp - shl \$4,%rdx # num*16 - sub \$$framesz,%rsp - lea ($inp,%rdx,$SZ),%rdx # inp+num*16*$SZ - and \$-64,%rsp # align stack frame - mov $ctx,$_ctx # save ctx, 1st arg - mov $inp,$_inp # save inp, 2nd arh - mov %rdx,$_end # save end pointer, "3rd" arg - mov %r11,$_rsp # save copy of %rsp -.Lprologue: - - lea $TABLE(%rip),$Tbl - - mov $SZ*0($ctx),$A - mov $SZ*1($ctx),$B - mov $SZ*2($ctx),$C - mov $SZ*3($ctx),$D - mov $SZ*4($ctx),$E - mov $SZ*5($ctx),$F - mov $SZ*6($ctx),$G - mov $SZ*7($ctx),$H - jmp .Lloop - -.align 16 -.Lloop: - xor $round,$round -___ - for($i=0;$i<16;$i++) { - $code.=" mov $SZ*$i($inp),$T1\n"; - $code.=" mov @ROT[4],$a0\n"; - $code.=" mov @ROT[0],$a1\n"; - $code.=" bswap $T1\n"; - &ROUND_00_15($i,@ROT); - unshift(@ROT,pop(@ROT)); - } -$code.=<<___; - jmp .Lrounds_16_xx -.align 16 -.Lrounds_16_xx: -___ - for(;$i<32;$i++) { - &ROUND_16_XX($i,@ROT); - unshift(@ROT,pop(@ROT)); - } - -$code.=<<___; - cmp \$$rounds,$round - jb .Lrounds_16_xx - - mov $_ctx,$ctx - lea 16*$SZ($inp),$inp - - add $SZ*0($ctx),$A - add $SZ*1($ctx),$B - add $SZ*2($ctx),$C - add $SZ*3($ctx),$D - add $SZ*4($ctx),$E - add $SZ*5($ctx),$F - add $SZ*6($ctx),$G - add $SZ*7($ctx),$H - - cmp $_end,$inp - - mov $A,$SZ*0($ctx) - mov $B,$SZ*1($ctx) - mov $C,$SZ*2($ctx) - mov $D,$SZ*3($ctx) - mov $E,$SZ*4($ctx) - mov $F,$SZ*5($ctx) - mov $G,$SZ*6($ctx) - mov $H,$SZ*7($ctx) - jb .Lloop - - mov $_rsp,%rsi - mov (%rsi),%r15 - mov 8(%rsi),%r14 - mov 16(%rsi),%r13 - mov 24(%rsi),%r12 - mov 32(%rsi),%rbp - mov 40(%rsi),%rbx - lea 48(%rsi),%rsp -.Lepilogue: - ret -.size $func,.-$func -___ - -if ($SZ==4) { -$code.=<<___; -.align 64 -.type $TABLE,\@object -$TABLE: - .long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 - .long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 - .long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 - .long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 - .long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc - .long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da - .long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 - .long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967 - .long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 - .long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 - .long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 - .long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070 - .long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 - .long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 - .long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 - .long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 -___ -} else { -$code.=<<___; -.align 64 -.type $TABLE,\@object -$TABLE: - .quad 0x428a2f98d728ae22,0x7137449123ef65cd - .quad 0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc - .quad 0x3956c25bf348b538,0x59f111f1b605d019 - .quad 0x923f82a4af194f9b,0xab1c5ed5da6d8118 - .quad 0xd807aa98a3030242,0x12835b0145706fbe - .quad 0x243185be4ee4b28c,0x550c7dc3d5ffb4e2 - .quad 0x72be5d74f27b896f,0x80deb1fe3b1696b1 - .quad 0x9bdc06a725c71235,0xc19bf174cf692694 - .quad 0xe49b69c19ef14ad2,0xefbe4786384f25e3 - .quad 0x0fc19dc68b8cd5b5,0x240ca1cc77ac9c65 - .quad 0x2de92c6f592b0275,0x4a7484aa6ea6e483 - .quad 0x5cb0a9dcbd41fbd4,0x76f988da831153b5 - .quad 0x983e5152ee66dfab,0xa831c66d2db43210 - .quad 0xb00327c898fb213f,0xbf597fc7beef0ee4 - .quad 0xc6e00bf33da88fc2,0xd5a79147930aa725 - .quad 0x06ca6351e003826f,0x142929670a0e6e70 - .quad 0x27b70a8546d22ffc,0x2e1b21385c26c926 - .quad 0x4d2c6dfc5ac42aed,0x53380d139d95b3df - .quad 0x650a73548baf63de,0x766a0abb3c77b2a8 - .quad 0x81c2c92e47edaee6,0x92722c851482353b - .quad 0xa2bfe8a14cf10364,0xa81a664bbc423001 - .quad 0xc24b8b70d0f89791,0xc76c51a30654be30 - .quad 0xd192e819d6ef5218,0xd69906245565a910 - .quad 0xf40e35855771202a,0x106aa07032bbd1b8 - .quad 0x19a4c116b8d2d0c8,0x1e376c085141ab53 - .quad 0x2748774cdf8eeb99,0x34b0bcb5e19b48a8 - .quad 0x391c0cb3c5c95a63,0x4ed8aa4ae3418acb - .quad 0x5b9cca4f7763e373,0x682e6ff3d6b2b8a3 - .quad 0x748f82ee5defb2fc,0x78a5636f43172f60 - .quad 0x84c87814a1f0ab72,0x8cc702081a6439ec - .quad 0x90befffa23631e28,0xa4506cebde82bde9 - .quad 0xbef9a3f7b2c67915,0xc67178f2e372532b - .quad 0xca273eceea26619c,0xd186b8c721c0c207 - .quad 0xeada7dd6cde0eb1e,0xf57d4f7fee6ed178 - .quad 0x06f067aa72176fba,0x0a637dc5a2c898a6 - .quad 0x113f9804bef90dae,0x1b710b35131c471b - .quad 0x28db77f523047d84,0x32caab7b40c72493 - .quad 0x3c9ebe0a15c9bebc,0x431d67c49c100d4c - .quad 0x4cc5d4becb3e42b6,0x597f299cfc657e2a - .quad 0x5fcb6fab3ad6faec,0x6c44198c4a475817 -___ -} - -# EXCEPTION_DISPOSITION handler (EXCEPTION_RECORD *rec,ULONG64 frame, -# CONTEXT *context,DISPATCHER_CONTEXT *disp) -if ($win64) { -$rec="%rcx"; -$frame="%rdx"; -$context="%r8"; -$disp="%r9"; - -$code.=<<___; -.extern __imp_RtlVirtualUnwind -.type se_handler,\@abi-omnipotent -.align 16 -se_handler: - push %rsi - push %rdi - push %rbx - push %rbp - push %r12 - push %r13 - push %r14 - push %r15 - pushfq - sub \$64,%rsp - - mov 120($context),%rax # pull context->Rax - mov 248($context),%rbx # pull context->Rip - - lea .Lprologue(%rip),%r10 - cmp %r10,%rbx # context->Rip<.Lprologue - jb .Lin_prologue - - mov 152($context),%rax # pull context->Rsp - - lea .Lepilogue(%rip),%r10 - cmp %r10,%rbx # context->Rip>=.Lepilogue - jae .Lin_prologue - - mov 16*$SZ+3*8(%rax),%rax # pull $_rsp - lea 48(%rax),%rax - - mov -8(%rax),%rbx - mov -16(%rax),%rbp - mov -24(%rax),%r12 - mov -32(%rax),%r13 - mov -40(%rax),%r14 - mov -48(%rax),%r15 - mov %rbx,144($context) # restore context->Rbx - mov %rbp,160($context) # restore context->Rbp - mov %r12,216($context) # restore context->R12 - mov %r13,224($context) # restore context->R13 - mov %r14,232($context) # restore context->R14 - mov %r15,240($context) # restore context->R15 - -.Lin_prologue: - mov 8(%rax),%rdi - mov 16(%rax),%rsi - mov %rax,152($context) # restore context->Rsp - mov %rsi,168($context) # restore context->Rsi - mov %rdi,176($context) # restore context->Rdi - - mov 40($disp),%rdi # disp->ContextRecord - mov $context,%rsi # context - mov \$154,%ecx # sizeof(CONTEXT) - .long 0xa548f3fc # cld; rep movsq - - mov $disp,%rsi - xor %rcx,%rcx # arg1, UNW_FLAG_NHANDLER - mov 8(%rsi),%rdx # arg2, disp->ImageBase - mov 0(%rsi),%r8 # arg3, disp->ControlPc - mov 16(%rsi),%r9 # arg4, disp->FunctionEntry - mov 40(%rsi),%r10 # disp->ContextRecord - lea 56(%rsi),%r11 # &disp->HandlerData - lea 24(%rsi),%r12 # &disp->EstablisherFrame - mov %r10,32(%rsp) # arg5 - mov %r11,40(%rsp) # arg6 - mov %r12,48(%rsp) # arg7 - mov %rcx,56(%rsp) # arg8, (NULL) - call *__imp_RtlVirtualUnwind(%rip) - - mov \$1,%eax # ExceptionContinueSearch - add \$64,%rsp - popfq - pop %r15 - pop %r14 - pop %r13 - pop %r12 - pop %rbp - pop %rbx - pop %rdi - pop %rsi - ret -.size se_handler,.-se_handler - -.section .pdata -.align 4 - .rva .LSEH_begin_$func - .rva .LSEH_end_$func - .rva .LSEH_info_$func - -.section .xdata -.align 8 -.LSEH_info_$func: - .byte 9,0,0,0 - .rva se_handler -___ -} - -$code =~ s/\`([^\`]*)\`/eval $1/gem; -print $code; -close STDOUT; diff --git a/deps/openssl/openssl/crypto/sha/asm/sha512-586.pl b/deps/openssl/openssl/crypto/sha/asm/sha512-586.pl index 7eab6a5b88b245..e96ec00314a486 100644 --- a/deps/openssl/openssl/crypto/sha/asm/sha512-586.pl +++ b/deps/openssl/openssl/crypto/sha/asm/sha512-586.pl @@ -1,7 +1,7 @@ #!/usr/bin/env perl # # ==================================================================== -# Written by Andy Polyakov for the OpenSSL +# Written by Andy Polyakov for the OpenSSL # project. The module is, however, dual licensed under OpenSSL and # CRYPTOGAMS licenses depending on where you obtain it. For further # details see http://www.openssl.org/~appro/cryptogams/. @@ -9,17 +9,31 @@ # # SHA512 block transform for x86. September 2007. # +# May 2013. +# +# Add SSSE3 code path, 20-25% improvement [over original SSE2 code]. +# # Performance in clock cycles per processed byte (less is better): # -# Pentium PIII P4 AMD K8 Core2 -# gcc 100 75 116 54 66 -# icc 97 77 95 55 57 -# x86 asm 61 56 82 36 40 -# SSE2 asm - - 38 24 20 -# x86_64 asm(*) - - 30 10.0 10.5 +# gcc icc x86 asm SIMD(*) x86_64(**) +# Pentium 100 97 61 - - +# PIII 75 77 56 - - +# P4 116 95 82 34.6 30.8 +# AMD K8 54 55 36 20.7 9.57 +# Core2 66 57 40 15.9 9.97 +# Westmere 70 - 38 12.2 9.58 +# Sandy Bridge 58 - 35 11.9 11.2 +# Ivy Bridge 50 - 33 11.5 8.17 +# Haswell 46 - 29 11.3 7.66 +# Bulldozer 121 - 50 14.0 13.5 +# VIA Nano 91 - 52 33 14.7 +# Atom 126 - 68 48(***) 14.7 +# Silvermont 97 - 58 42(***) 17.5 # -# (*) x86_64 assembler performance is presented for reference -# purposes. +# (*) whichever best applicable. +# (**) x86_64 assembler performance is presented for reference +# purposes, the results are for integer-only code. +# (***) paddq is increadibly slow on Atom. # # IALU code-path is optimized for elder Pentiums. On vanilla Pentium # performance improvement over compiler generated code reaches ~60%, @@ -66,72 +80,77 @@ $A="mm0"; # B-D and $E="mm4"; # F-H are commonly loaded to respectively mm1-mm3 and # mm5-mm7, but it's done on on-demand basis... +$BxC="mm2"; # ... except for B^C sub BODY_00_15_sse2 { - my $prefetch=shift; + my $phase=shift; - &movq ("mm5",$Fsse2); # load f - &movq ("mm6",$Gsse2); # load g - &movq ("mm7",$Hsse2); # load h + #&movq ("mm5",$Fsse2); # load f + #&movq ("mm6",$Gsse2); # load g &movq ("mm1",$E); # %mm1 is sliding right - &movq ("mm2",$E); # %mm2 is sliding left + &pxor ("mm5","mm6"); # f^=g &psrlq ("mm1",14); - &movq ($Esse2,$E); # modulo-scheduled save e - &psllq ("mm2",23); + &movq ($Esse2,$E); # modulo-scheduled save e + &pand ("mm5",$E); # f&=e + &psllq ($E,23); # $E is sliding left + &movq ($A,"mm3") if ($phase<2); + &movq (&QWP(8*9,"esp"),"mm7") # save X[i] &movq ("mm3","mm1"); # %mm3 is T1 - &psrlq ("mm1",4); - &pxor ("mm3","mm2"); - &psllq ("mm2",23); + &psrlq ("mm1",4); + &pxor ("mm5","mm6"); # Ch(e,f,g) + &pxor ("mm3",$E); + &psllq ($E,23); &pxor ("mm3","mm1"); - &psrlq ("mm1",23); - &pxor ("mm3","mm2"); - &psllq ("mm2",4); + &movq ($Asse2,$A); # modulo-scheduled save a + &paddq ("mm7","mm5"); # X[i]+=Ch(e,f,g) + &pxor ("mm3",$E); + &psrlq ("mm1",23); + &paddq ("mm7",$Hsse2); # X[i]+=h &pxor ("mm3","mm1"); - &paddq ("mm7",QWP(0,$K512)); # h+=K512[i] - &pxor ("mm3","mm2"); # T1=Sigma1_512(e) - - &pxor ("mm5","mm6"); # f^=g + &psllq ($E,4); + &paddq ("mm7",QWP(0,$K512)); # X[i]+=K512[i] + &pxor ("mm3",$E); # T1=Sigma1_512(e) + + &movq ($E,$Dsse2); # e = load d, e in next round + &paddq ("mm3","mm7"); # T1+=X[i] + &movq ("mm5",$A); # %mm5 is sliding right + &psrlq ("mm5",28); + &paddq ($E,"mm3"); # d += T1 + &movq ("mm6",$A); # %mm6 is sliding left + &movq ("mm7","mm5"); + &psllq ("mm6",25); &movq ("mm1",$Bsse2); # load b - &pand ("mm5",$E); # f&=e - &movq ("mm2",$Csse2); # load c - &pxor ("mm5","mm6"); # f^=g - &movq ($E,$Dsse2); # e = load d - &paddq ("mm3","mm5"); # T1+=Ch(e,f,g) - &movq (&QWP(0,"esp"),$A); # modulo-scheduled save a - &paddq ("mm3","mm7"); # T1+=h - - &movq ("mm5",$A); # %mm5 is sliding right - &movq ("mm6",$A); # %mm6 is sliding left - &paddq ("mm3",&QWP(8*9,"esp")); # T1+=X[0] - &psrlq ("mm5",28); - &paddq ($E,"mm3"); # e += T1 - &psllq ("mm6",25); - &movq ("mm7","mm5"); # %mm7 is T2 - &psrlq ("mm5",6); - &pxor ("mm7","mm6"); - &psllq ("mm6",5); - &pxor ("mm7","mm5"); - &psrlq ("mm5",5); - &pxor ("mm7","mm6"); - &psllq ("mm6",6); - &pxor ("mm7","mm5"); + &psrlq ("mm5",6); + &pxor ("mm7","mm6"); &sub ("esp",8); - &pxor ("mm7","mm6"); # T2=Sigma0_512(a) - - &movq ("mm5",$A); # %mm5=a - &por ($A,"mm2"); # a=a|c - &movq ("mm6",&QWP(8*(9+16-14),"esp")) if ($prefetch); - &pand ("mm5","mm2"); # %mm5=a&c - &pand ($A,"mm1"); # a=(a|c)&b - &movq ("mm2",&QWP(8*(9+16-1),"esp")) if ($prefetch); - &por ("mm5",$A); # %mm5=(a&c)|((a|c)&b) - &paddq ("mm7","mm5"); # T2+=Maj(a,b,c) - &movq ($A,"mm3"); # a=T1 - - &mov (&LB("edx"),&BP(0,$K512)); - &paddq ($A,"mm7"); # a+=T2 - &add ($K512,8); + &psllq ("mm6",5); + &pxor ("mm7","mm5"); + &pxor ($A,"mm1"); # a^b, b^c in next round + &psrlq ("mm5",5); + &pxor ("mm7","mm6"); + &pand ($BxC,$A); # (b^c)&(a^b) + &psllq ("mm6",6); + &pxor ("mm7","mm5"); + &pxor ($BxC,"mm1"); # [h=]Maj(a,b,c) + &pxor ("mm6","mm7"); # Sigma0_512(a) + &movq ("mm7",&QWP(8*(9+16-1),"esp")) if ($phase!=0); # pre-fetch + &movq ("mm5",$Fsse2) if ($phase==0); # load f + + if ($phase>1) { + &paddq ($BxC,"mm6"); # h+=Sigma0(a) + &add ($K512,8); + #&paddq ($BxC,"mm3"); # h+=T1 + + ($A,$BxC) = ($BxC,$A); # rotate registers + } else { + &paddq ("mm3",$BxC); # T1+=Maj(a,b,c) + &movq ($BxC,$A); + &add ($K512,8); + &paddq ("mm3","mm6"); # T1+=Sigma0(a) + &movq ("mm6",$Gsse2) if ($phase==0); # load g + #&movq ($A,"mm3"); # h=T1 + } } sub BODY_00_15_x86 { @@ -284,110 +303,357 @@ sub BODY_00_15_x86 { if ($sse2) { &picmeup("edx","OPENSSL_ia32cap_P",$K512,&label("K512")); - &bt (&DWP(0,"edx"),26); - &jnc (&label("loop_x86")); + &mov ("ecx",&DWP(0,"edx")); + &test ("ecx",1<<26); + &jz (&label("loop_x86")); + + &mov ("edx",&DWP(4,"edx")); # load ctx->h[0-7] &movq ($A,&QWP(0,"esi")); + &and ("ecx",1<<24); # XMM registers availability &movq ("mm1",&QWP(8,"esi")); - &movq ("mm2",&QWP(16,"esi")); + &and ("edx",1<<9); # SSSE3 bit + &movq ($BxC,&QWP(16,"esi")); + &or ("ecx","edx"); &movq ("mm3",&QWP(24,"esi")); &movq ($E,&QWP(32,"esi")); &movq ("mm5",&QWP(40,"esi")); &movq ("mm6",&QWP(48,"esi")); &movq ("mm7",&QWP(56,"esi")); + &cmp ("ecx",1<<24|1<<9); + &je (&label("SSSE3")); &sub ("esp",8*10); + &jmp (&label("loop_sse2")); &set_label("loop_sse2",16); - # &movq ($Asse2,$A); + #&movq ($Asse2,$A); &movq ($Bsse2,"mm1"); - &movq ($Csse2,"mm2"); + &movq ($Csse2,$BxC); &movq ($Dsse2,"mm3"); - # &movq ($Esse2,$E); + #&movq ($Esse2,$E); &movq ($Fsse2,"mm5"); &movq ($Gsse2,"mm6"); + &pxor ($BxC,"mm1"); # magic &movq ($Hsse2,"mm7"); + &movq ("mm3",$A); # magic - &mov ("ecx",&DWP(0,"edi")); - &mov ("edx",&DWP(4,"edi")); + &mov ("eax",&DWP(0,"edi")); + &mov ("ebx",&DWP(4,"edi")); &add ("edi",8); - &bswap ("ecx"); - &bswap ("edx"); - &mov (&DWP(8*9+4,"esp"),"ecx"); - &mov (&DWP(8*9+0,"esp"),"edx"); + &mov ("edx",15); # counter + &bswap ("eax"); + &bswap ("ebx"); + &jmp (&label("00_14_sse2")); &set_label("00_14_sse2",16); + &movd ("mm1","eax"); &mov ("eax",&DWP(0,"edi")); + &movd ("mm7","ebx"); &mov ("ebx",&DWP(4,"edi")); &add ("edi",8); &bswap ("eax"); &bswap ("ebx"); - &mov (&DWP(8*8+4,"esp"),"eax"); - &mov (&DWP(8*8+0,"esp"),"ebx"); + &punpckldq("mm7","mm1"); &BODY_00_15_sse2(); - &cmp (&LB("edx"),0x35); - &jne (&label("00_14_sse2")); + &dec ("edx"); + &jnz (&label("00_14_sse2")); + + &movd ("mm1","eax"); + &movd ("mm7","ebx"); + &punpckldq("mm7","mm1"); &BODY_00_15_sse2(1); + &pxor ($A,$A); # A is in %mm3 + &mov ("edx",32); # counter + &jmp (&label("16_79_sse2")); + &set_label("16_79_sse2",16); - #&movq ("mm2",&QWP(8*(9+16-1),"esp")); #prefetched in BODY_00_15 - #&movq ("mm6",&QWP(8*(9+16-14),"esp")); - &movq ("mm1","mm2"); + for ($j=0;$j<2;$j++) { # 2x unroll + #&movq ("mm7",&QWP(8*(9+16-1),"esp")); # prefetched in BODY_00_15 + &movq ("mm5",&QWP(8*(9+16-14),"esp")); + &movq ("mm1","mm7"); + &psrlq ("mm7",1); + &movq ("mm6","mm5"); + &psrlq ("mm5",6); + &psllq ("mm1",56); + &paddq ($A,"mm3"); # from BODY_00_15 + &movq ("mm3","mm7"); + &psrlq ("mm7",7-1); + &pxor ("mm3","mm1"); + &psllq ("mm1",63-56); + &pxor ("mm3","mm7"); + &psrlq ("mm7",8-7); + &pxor ("mm3","mm1"); + &movq ("mm1","mm5"); + &psrlq ("mm5",19-6); + &pxor ("mm7","mm3"); # sigma0 + + &psllq ("mm6",3); + &pxor ("mm1","mm5"); + &paddq ("mm7",&QWP(8*(9+16),"esp")); + &pxor ("mm1","mm6"); + &psrlq ("mm5",61-19); + &paddq ("mm7",&QWP(8*(9+16-9),"esp")); + &pxor ("mm1","mm5"); + &psllq ("mm6",45-3); + &movq ("mm5",$Fsse2); # load f + &pxor ("mm1","mm6"); # sigma1 + &movq ("mm6",$Gsse2); # load g - &psrlq ("mm2",1); - &movq ("mm7","mm6"); - &psrlq ("mm6",6); - &movq ("mm3","mm2"); + &paddq ("mm7","mm1"); # X[i] + #&movq (&QWP(8*9,"esp"),"mm7"); # moved to BODY_00_15 - &psrlq ("mm2",7-1); - &movq ("mm5","mm6"); - &psrlq ("mm6",19-6); - &pxor ("mm3","mm2"); + &BODY_00_15_sse2(2); + } + &dec ("edx"); + &jnz (&label("16_79_sse2")); - &psrlq ("mm2",8-7); - &pxor ("mm5","mm6"); - &psrlq ("mm6",61-19); - &pxor ("mm3","mm2"); + #&movq ($A,$Asse2); + &paddq ($A,"mm3"); # from BODY_00_15 + &movq ("mm1",$Bsse2); + #&movq ($BxC,$Csse2); + &movq ("mm3",$Dsse2); + #&movq ($E,$Esse2); + &movq ("mm5",$Fsse2); + &movq ("mm6",$Gsse2); + &movq ("mm7",$Hsse2); - &movq ("mm2",&QWP(8*(9+16),"esp")); + &pxor ($BxC,"mm1"); # de-magic + &paddq ($A,&QWP(0,"esi")); + &paddq ("mm1",&QWP(8,"esi")); + &paddq ($BxC,&QWP(16,"esi")); + &paddq ("mm3",&QWP(24,"esi")); + &paddq ($E,&QWP(32,"esi")); + &paddq ("mm5",&QWP(40,"esi")); + &paddq ("mm6",&QWP(48,"esi")); + &paddq ("mm7",&QWP(56,"esi")); - &psllq ("mm1",56); - &pxor ("mm5","mm6"); - &psllq ("mm7",3); - &pxor ("mm3","mm1"); + &mov ("eax",8*80); + &movq (&QWP(0,"esi"),$A); + &movq (&QWP(8,"esi"),"mm1"); + &movq (&QWP(16,"esi"),$BxC); + &movq (&QWP(24,"esi"),"mm3"); + &movq (&QWP(32,"esi"),$E); + &movq (&QWP(40,"esi"),"mm5"); + &movq (&QWP(48,"esi"),"mm6"); + &movq (&QWP(56,"esi"),"mm7"); - &paddq ("mm2",&QWP(8*(9+16-9),"esp")); + &lea ("esp",&DWP(0,"esp","eax")); # destroy frame + &sub ($K512,"eax"); # rewind K - &psllq ("mm1",63-56); - &pxor ("mm5","mm7"); - &psllq ("mm7",45-3); - &pxor ("mm3","mm1"); - &pxor ("mm5","mm7"); + &cmp ("edi",&DWP(8*10+8,"esp")); # are we done yet? + &jb (&label("loop_sse2")); - &paddq ("mm3","mm5"); - &paddq ("mm3","mm2"); - &movq (&QWP(8*9,"esp"),"mm3"); + &mov ("esp",&DWP(8*10+12,"esp")); # restore sp + &emms (); +&function_end_A(); - &BODY_00_15_sse2(1); +&set_label("SSSE3",32); +{ my ($cnt,$frame)=("ecx","edx"); + my @X=map("xmm$_",(0..7)); + my $j; + my $i=0; + + &lea ($frame,&DWP(-64,"esp")); + &sub ("esp",256); + + # fixed stack frame layout + # + # +0 A B C D E F G H # backing store + # +64 X[0]+K[i] .. X[15]+K[i] # XMM->MM xfer area + # +192 # XMM off-load ring buffer + # +256 # saved parameters + + &movdqa (@X[1],&QWP(80*8,$K512)); # byte swap mask + &movdqu (@X[0],&QWP(0,"edi")); + &pshufb (@X[0],@X[1]); + for ($j=0;$j<8;$j++) { + &movdqa (&QWP(16*(($j-1)%4),$frame),@X[3]) if ($j>4); # off-load + &movdqa (@X[3],&QWP(16*($j%8),$K512)); + &movdqa (@X[2],@X[1]) if ($j<7); # perpetuate byte swap mask + &movdqu (@X[1],&QWP(16*($j+1),"edi")) if ($j<7); # next input + &movdqa (@X[1],&QWP(16*(($j+1)%4),$frame)) if ($j==7);# restore @X[0] + &paddq (@X[3],@X[0]); + &pshufb (@X[1],@X[2]) if ($j<7); + &movdqa (&QWP(16*($j%8)-128,$frame),@X[3]); # xfer X[i]+K[i] + + push(@X,shift(@X)); # rotate(@X) + } + #&jmp (&label("loop_ssse3")); + &nop (); - &cmp (&LB("edx"),0x17); - &jne (&label("16_79_sse2")); +&set_label("loop_ssse3",32); + &movdqa (@X[2],&QWP(16*(($j+1)%4),$frame)); # pre-restore @X[1] + &movdqa (&QWP(16*(($j-1)%4),$frame),@X[3]); # off-load @X[3] + &lea ($K512,&DWP(16*8,$K512)); + + #&movq ($Asse2,$A); # off-load A-H + &movq ($Bsse2,"mm1"); + &mov ("ebx","edi"); + &movq ($Csse2,$BxC); + &lea ("edi",&DWP(128,"edi")); # advance input + &movq ($Dsse2,"mm3"); + &cmp ("edi","eax"); + #&movq ($Esse2,$E); + &movq ($Fsse2,"mm5"); + &cmovb ("ebx","edi"); + &movq ($Gsse2,"mm6"); + &mov ("ecx",4); # loop counter + &pxor ($BxC,"mm1"); # magic + &movq ($Hsse2,"mm7"); + &pxor ("mm3","mm3"); # magic + + &jmp (&label("00_47_ssse3")); + +sub BODY_00_15_ssse3 { # "phase-less" copy of BODY_00_15_sse2 + ( + '&movq ("mm1",$E)', # %mm1 is sliding right + '&movq ("mm7",&QWP(((-8*$i)%128)-128,$frame))',# X[i]+K[i] + '&pxor ("mm5","mm6")', # f^=g + '&psrlq ("mm1",14)', + '&movq (&QWP(8*($i+4)%64,"esp"),$E)', # modulo-scheduled save e + '&pand ("mm5",$E)', # f&=e + '&psllq ($E,23)', # $E is sliding left + '&paddq ($A,"mm3")', # [h+=Maj(a,b,c)] + '&movq ("mm3","mm1")', # %mm3 is T1 + '&psrlq("mm1",4)', + '&pxor ("mm5","mm6")', # Ch(e,f,g) + '&pxor ("mm3",$E)', + '&psllq($E,23)', + '&pxor ("mm3","mm1")', + '&movq (&QWP(8*$i%64,"esp"),$A)', # modulo-scheduled save a + '&paddq("mm7","mm5")', # X[i]+=Ch(e,f,g) + '&pxor ("mm3",$E)', + '&psrlq("mm1",23)', + '&paddq("mm7",&QWP(8*($i+7)%64,"esp"))', # X[i]+=h + '&pxor ("mm3","mm1")', + '&psllq($E,4)', + '&pxor ("mm3",$E)', # T1=Sigma1_512(e) + + '&movq ($E,&QWP(8*($i+3)%64,"esp"))', # e = load d, e in next round + '&paddq ("mm3","mm7")', # T1+=X[i] + '&movq ("mm5",$A)', # %mm5 is sliding right + '&psrlq("mm5",28)', + '&paddq ($E,"mm3")', # d += T1 + '&movq ("mm6",$A)', # %mm6 is sliding left + '&movq ("mm7","mm5")', + '&psllq("mm6",25)', + '&movq ("mm1",&QWP(8*($i+1)%64,"esp"))', # load b + '&psrlq("mm5",6)', + '&pxor ("mm7","mm6")', + '&psllq("mm6",5)', + '&pxor ("mm7","mm5")', + '&pxor ($A,"mm1")', # a^b, b^c in next round + '&psrlq("mm5",5)', + '&pxor ("mm7","mm6")', + '&pand ($BxC,$A)', # (b^c)&(a^b) + '&psllq("mm6",6)', + '&pxor ("mm7","mm5")', + '&pxor ($BxC,"mm1")', # [h=]Maj(a,b,c) + '&pxor ("mm6","mm7")', # Sigma0_512(a) + '&movq ("mm5",&QWP(8*($i+5-1)%64,"esp"))', # pre-load f + '&paddq ($BxC,"mm6")', # h+=Sigma0(a) + '&movq ("mm6",&QWP(8*($i+6-1)%64,"esp"))', # pre-load g + + '($A,$BxC) = ($BxC,$A); $i--;' + ); +} - # &movq ($A,$Asse2); +&set_label("00_47_ssse3",32); + + for(;$j<16;$j++) { + my ($t0,$t2,$t1)=@X[2..4]; + my @insns = (&BODY_00_15_ssse3(),&BODY_00_15_ssse3()); + + &movdqa ($t2,@X[5]); + &movdqa (@X[1],$t0); # restore @X[1] + &palignr ($t0,@X[0],8); # X[1..2] + &movdqa (&QWP(16*($j%4),$frame),@X[4]); # off-load @X[4] + &palignr ($t2,@X[4],8); # X[9..10] + + &movdqa ($t1,$t0); + &psrlq ($t0,7); + &paddq (@X[0],$t2); # X[0..1] += X[9..10] + &movdqa ($t2,$t1); + &psrlq ($t1,1); + &psllq ($t2,64-8); + &pxor ($t0,$t1); + &psrlq ($t1,8-1); + &pxor ($t0,$t2); + &psllq ($t2,8-1); + &pxor ($t0,$t1); + &movdqa ($t1,@X[7]); + &pxor ($t0,$t2); # sigma0(X[1..2]) + &movdqa ($t2,@X[7]); + &psrlq ($t1,6); + &paddq (@X[0],$t0); # X[0..1] += sigma0(X[1..2]) + + &movdqa ($t0,@X[7]); + &psrlq ($t2,19); + &psllq ($t0,64-61); + &pxor ($t1,$t2); + &psrlq ($t2,61-19); + &pxor ($t1,$t0); + &psllq ($t0,61-19); + &pxor ($t1,$t2); + &movdqa ($t2,&QWP(16*(($j+2)%4),$frame));# pre-restore @X[1] + &pxor ($t1,$t0); # sigma0(X[1..2]) + &movdqa ($t0,&QWP(16*($j%8),$K512)); + eval(shift(@insns)); + &paddq (@X[0],$t1); # X[0..1] += sigma0(X[14..15]) + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + &paddq ($t0,@X[0]); + foreach(@insns) { eval; } + &movdqa (&QWP(16*($j%8)-128,$frame),$t0);# xfer X[i]+K[i] + + push(@X,shift(@X)); # rotate(@X) + } + &lea ($K512,&DWP(16*8,$K512)); + &dec ("ecx"); + &jnz (&label("00_47_ssse3")); + + &movdqa (@X[1],&QWP(0,$K512)); # byte swap mask + &lea ($K512,&DWP(-80*8,$K512)); # rewind + &movdqu (@X[0],&QWP(0,"ebx")); + &pshufb (@X[0],@X[1]); + + for ($j=0;$j<8;$j++) { # load next or same block + my @insns = (&BODY_00_15_ssse3(),&BODY_00_15_ssse3()); + + &movdqa (&QWP(16*(($j-1)%4),$frame),@X[3]) if ($j>4); # off-load + &movdqa (@X[3],&QWP(16*($j%8),$K512)); + &movdqa (@X[2],@X[1]) if ($j<7); # perpetuate byte swap mask + &movdqu (@X[1],&QWP(16*($j+1),"ebx")) if ($j<7); # next input + &movdqa (@X[1],&QWP(16*(($j+1)%4),$frame)) if ($j==7);# restore @X[0] + &paddq (@X[3],@X[0]); + &pshufb (@X[1],@X[2]) if ($j<7); + foreach(@insns) { eval; } + &movdqa (&QWP(16*($j%8)-128,$frame),@X[3]);# xfer X[i]+K[i] + + push(@X,shift(@X)); # rotate(@X) + } + + #&movq ($A,$Asse2); # load A-H &movq ("mm1",$Bsse2); - &movq ("mm2",$Csse2); + &paddq ($A,"mm3"); # from BODY_00_15 + #&movq ($BxC,$Csse2); &movq ("mm3",$Dsse2); - # &movq ($E,$Esse2); - &movq ("mm5",$Fsse2); - &movq ("mm6",$Gsse2); + #&movq ($E,$Esse2); + #&movq ("mm5",$Fsse2); + #&movq ("mm6",$Gsse2); &movq ("mm7",$Hsse2); + &pxor ($BxC,"mm1"); # de-magic &paddq ($A,&QWP(0,"esi")); &paddq ("mm1",&QWP(8,"esi")); - &paddq ("mm2",&QWP(16,"esi")); + &paddq ($BxC,&QWP(16,"esi")); &paddq ("mm3",&QWP(24,"esi")); &paddq ($E,&QWP(32,"esi")); &paddq ("mm5",&QWP(40,"esi")); @@ -396,21 +662,19 @@ sub BODY_00_15_x86 { &movq (&QWP(0,"esi"),$A); &movq (&QWP(8,"esi"),"mm1"); - &movq (&QWP(16,"esi"),"mm2"); + &movq (&QWP(16,"esi"),$BxC); &movq (&QWP(24,"esi"),"mm3"); &movq (&QWP(32,"esi"),$E); &movq (&QWP(40,"esi"),"mm5"); &movq (&QWP(48,"esi"),"mm6"); &movq (&QWP(56,"esi"),"mm7"); - &add ("esp",8*80); # destroy frame - &sub ($K512,8*80); # rewind K - - &cmp ("edi",&DWP(8*10+8,"esp")); # are we done yet? - &jb (&label("loop_sse2")); + &cmp ("edi","eax") # are we done yet? + &jb (&label("loop_ssse3")); + &mov ("esp",&DWP(64+12,$frame)); # restore sp &emms (); - &mov ("esp",&DWP(8*10+12,"esp")); # restore sp +} &function_end_A(); } &set_label("loop_x86",16); @@ -638,6 +902,9 @@ sub BODY_00_15_x86 { &data_word(0xfc657e2a,0x597f299c); # u64 &data_word(0x3ad6faec,0x5fcb6fab); # u64 &data_word(0x4a475817,0x6c44198c); # u64 + + &data_word(0x04050607,0x00010203); # byte swap + &data_word(0x0c0d0e0f,0x08090a0b); # mask &function_end_B("sha512_block_data_order"); &asciz("SHA512 block transform for x86, CRYPTOGAMS by "); diff --git a/deps/openssl/openssl/crypto/sha/asm/sha512-armv4.pl b/deps/openssl/openssl/crypto/sha/asm/sha512-armv4.pl index 7faf37b1479029..fb7dc506aca15c 100644 --- a/deps/openssl/openssl/crypto/sha/asm/sha512-armv4.pl +++ b/deps/openssl/openssl/crypto/sha/asm/sha512-armv4.pl @@ -1,7 +1,7 @@ #!/usr/bin/env perl # ==================================================================== -# Written by Andy Polyakov for the OpenSSL +# Written by Andy Polyakov for the OpenSSL # project. The module is, however, dual licensed under OpenSSL and # CRYPTOGAMS licenses depending on where you obtain it. For further # details see http://www.openssl.org/~appro/cryptogams/. @@ -26,7 +26,24 @@ # March 2011. # # Add NEON implementation. On Cortex A8 it was measured to process -# one byte in 25.5 cycles or 47% faster than integer-only code. +# one byte in 23.3 cycles or ~60% faster than integer-only code. + +# August 2012. +# +# Improve NEON performance by 12% on Snapdragon S4. In absolute +# terms it's 22.6 cycles per byte, which is disappointing result. +# Technical writers asserted that 3-way S4 pipeline can sustain +# multiple NEON instructions per cycle, but dual NEON issue could +# not be observed, and for NEON-only sequences IPC(*) was found to +# be limited by 1:-( 0.33 and 0.66 were measured for sequences with +# ILPs(*) of 1 and 2 respectively. This in turn means that you can +# even find yourself striving, as I did here, for achieving IPC +# adequate to one delivered by Cortex A8 [for reference, it's +# 0.5 for ILP of 1, and 1 for higher ILPs]. +# +# (*) ILP, instruction-level parallelism, how many instructions +# *can* execute at the same time. IPC, instructions per cycle, +# indicates how many instructions actually execute. # Byte order [in]dependence. ========================================= # @@ -220,16 +237,20 @@ () WORD64(0x4cc5d4be,0xcb3e42b6, 0x597f299c,0xfc657e2a) WORD64(0x5fcb6fab,0x3ad6faec, 0x6c44198c,0x4a475817) .size K512,.-K512 +#if __ARM_MAX_ARCH__>=7 .LOPENSSL_armcap: .word OPENSSL_armcap_P-sha512_block_data_order .skip 32-4 +#else +.skip 32 +#endif .global sha512_block_data_order .type sha512_block_data_order,%function sha512_block_data_order: sub r3,pc,#8 @ sha512_block_data_order add $len,$inp,$len,lsl#7 @ len to point at the end of inp -#if __ARM_ARCH__>=7 +#if __ARM_MAX_ARCH__>=7 ldr r12,.LOPENSSL_armcap ldr r12,[r3,r12] @ OPENSSL_armcap_P tst r12,#1 @@ -457,40 +478,40 @@ () vld1.64 {@X[$i%16]},[$inp]! @ handles unaligned #endif vshr.u64 $t1,$e,#@Sigma1[1] +#if $i>0 + vadd.i64 $a,$Maj @ h+=Maj from the past +#endif vshr.u64 $t2,$e,#@Sigma1[2] ___ $code.=<<___; vld1.64 {$K},[$Ktbl,:64]! @ K[i++] vsli.64 $t0,$e,#`64-@Sigma1[0]` vsli.64 $t1,$e,#`64-@Sigma1[1]` + vmov $Ch,$e vsli.64 $t2,$e,#`64-@Sigma1[2]` #if $i<16 && defined(__ARMEL__) vrev64.8 @X[$i],@X[$i] #endif - vadd.i64 $T1,$K,$h - veor $Ch,$f,$g - veor $t0,$t1 - vand $Ch,$e - veor $t0,$t2 @ Sigma1(e) - veor $Ch,$g @ Ch(e,f,g) - vadd.i64 $T1,$t0 + veor $t1,$t0 + vbsl $Ch,$f,$g @ Ch(e,f,g) vshr.u64 $t0,$a,#@Sigma0[0] - vadd.i64 $T1,$Ch + veor $t2,$t1 @ Sigma1(e) + vadd.i64 $T1,$Ch,$h vshr.u64 $t1,$a,#@Sigma0[1] - vshr.u64 $t2,$a,#@Sigma0[2] vsli.64 $t0,$a,#`64-@Sigma0[0]` + vadd.i64 $T1,$t2 + vshr.u64 $t2,$a,#@Sigma0[2] + vadd.i64 $K,@X[$i%16] vsli.64 $t1,$a,#`64-@Sigma0[1]` + veor $Maj,$a,$b vsli.64 $t2,$a,#`64-@Sigma0[2]` - vadd.i64 $T1,@X[$i%16] - vorr $Maj,$a,$c - vand $Ch,$a,$c veor $h,$t0,$t1 - vand $Maj,$b + vadd.i64 $T1,$K + vbsl $Maj,$c,$b @ Maj(a,b,c) veor $h,$t2 @ Sigma0(a) - vorr $Maj,$Ch @ Maj(a,b,c) - vadd.i64 $h,$T1 vadd.i64 $d,$T1 - vadd.i64 $h,$Maj + vadd.i64 $Maj,$T1 + @ vadd.i64 $h,$Maj ___ } @@ -508,6 +529,7 @@ () $code.=<<___; vshr.u64 $t0,@X[($i+7)%8],#@sigma1[0] vshr.u64 $t1,@X[($i+7)%8],#@sigma1[1] + vadd.i64 @_[0],d30 @ h+=Maj from the past vshr.u64 $s1,@X[($i+7)%8],#@sigma1[2] vsli.64 $t0,@X[($i+7)%8],#`64-@sigma1[0]` vext.8 $s0,@X[$i%8],@X[($i+1)%8],#8 @ X[i+1] @@ -533,7 +555,8 @@ () } $code.=<<___; -#if __ARM_ARCH__>=7 +#if __ARM_MAX_ARCH__>=7 +.arch armv7-a .fpu neon .align 4 @@ -554,6 +577,7 @@ () $code.=<<___; bne .L16_79_neon + vadd.i64 $A,d30 @ h+=Maj from the past vldmia $ctx,{d24-d31} @ load context to temp vadd.i64 q8,q12 @ vectorized accumulate vadd.i64 q9,q13 @@ -565,7 +589,7 @@ () bne .Loop_neon vldmia sp!,{d8-d15} @ epilogue - bx lr + ret @ bx lr #endif ___ } @@ -573,10 +597,13 @@ () .size sha512_block_data_order,.-sha512_block_data_order .asciz "SHA512 block transform for ARMv4/NEON, CRYPTOGAMS by " .align 2 +#if __ARM_MAX_ARCH__>=7 .comm OPENSSL_armcap_P,4,4 +#endif ___ $code =~ s/\`([^\`]*)\`/eval $1/gem; $code =~ s/\bbx\s+lr\b/.word\t0xe12fff1e/gm; # make it possible to compile with -march=armv4 +$code =~ s/\bret\b/bx lr/gm; print $code; close STDOUT; # enforce flush diff --git a/deps/openssl/openssl/crypto/sha/asm/sha512-armv8.pl b/deps/openssl/openssl/crypto/sha/asm/sha512-armv8.pl new file mode 100644 index 00000000000000..bd7a0a5662e0a2 --- /dev/null +++ b/deps/openssl/openssl/crypto/sha/asm/sha512-armv8.pl @@ -0,0 +1,420 @@ +#!/usr/bin/env perl +# +# ==================================================================== +# Written by Andy Polyakov for the OpenSSL +# project. The module is, however, dual licensed under OpenSSL and +# CRYPTOGAMS licenses depending on where you obtain it. For further +# details see http://www.openssl.org/~appro/cryptogams/. +# ==================================================================== +# +# SHA256/512 for ARMv8. +# +# Performance in cycles per processed byte and improvement coefficient +# over code generated with "default" compiler: +# +# SHA256-hw SHA256(*) SHA512 +# Apple A7 1.97 10.5 (+33%) 6.73 (-1%(**)) +# Cortex-A53 2.38 15.6 (+110%) 10.1 (+190%(***)) +# Cortex-A57 2.31 11.6 (+86%) 7.51 (+260%(***)) +# +# (*) Software SHA256 results are of lesser relevance, presented +# mostly for informational purposes. +# (**) The result is a trade-off: it's possible to improve it by +# 10% (or by 1 cycle per round), but at the cost of 20% loss +# on Cortex-A53 (or by 4 cycles per round). +# (***) Super-impressive coefficients over gcc-generated code are +# indication of some compiler "pathology", most notably code +# generated with -mgeneral-regs-only is significanty faster +# and lags behind assembly only by 50-90%. + +$flavour=shift; +$output=shift; +open STDOUT,">$output"; + +if ($output =~ /512/) { + $BITS=512; + $SZ=8; + @Sigma0=(28,34,39); + @Sigma1=(14,18,41); + @sigma0=(1, 8, 7); + @sigma1=(19,61, 6); + $rounds=80; + $reg_t="x"; +} else { + $BITS=256; + $SZ=4; + @Sigma0=( 2,13,22); + @Sigma1=( 6,11,25); + @sigma0=( 7,18, 3); + @sigma1=(17,19,10); + $rounds=64; + $reg_t="w"; +} + +$func="sha${BITS}_block_data_order"; + +($ctx,$inp,$num,$Ktbl)=map("x$_",(0..2,30)); + +@X=map("$reg_t$_",(3..15,0..2)); +@V=($A,$B,$C,$D,$E,$F,$G,$H)=map("$reg_t$_",(20..27)); +($t0,$t1,$t2,$t3)=map("$reg_t$_",(16,17,19,28)); + +sub BODY_00_xx { +my ($i,$a,$b,$c,$d,$e,$f,$g,$h)=@_; +my $j=($i+1)&15; +my ($T0,$T1,$T2)=(@X[($i-8)&15],@X[($i-9)&15],@X[($i-10)&15]); + $T0=@X[$i+3] if ($i<11); + +$code.=<<___ if ($i<16); +#ifndef __ARMEB__ + rev @X[$i],@X[$i] // $i +#endif +___ +$code.=<<___ if ($i<13 && ($i&1)); + ldp @X[$i+1],@X[$i+2],[$inp],#2*$SZ +___ +$code.=<<___ if ($i==13); + ldp @X[14],@X[15],[$inp] +___ +$code.=<<___ if ($i>=14); + ldr @X[($i-11)&15],[sp,#`$SZ*(($i-11)%4)`] +___ +$code.=<<___ if ($i>0 && $i<16); + add $a,$a,$t1 // h+=Sigma0(a) +___ +$code.=<<___ if ($i>=11); + str @X[($i-8)&15],[sp,#`$SZ*(($i-8)%4)`] +___ +# While ARMv8 specifies merged rotate-n-logical operation such as +# 'eor x,y,z,ror#n', it was found to negatively affect performance +# on Apple A7. The reason seems to be that it requires even 'y' to +# be available earlier. This means that such merged instruction is +# not necessarily best choice on critical path... On the other hand +# Cortex-A5x handles merged instructions much better than disjoint +# rotate and logical... See (**) footnote above. +$code.=<<___ if ($i<15); + ror $t0,$e,#$Sigma1[0] + add $h,$h,$t2 // h+=K[i] + eor $T0,$e,$e,ror#`$Sigma1[2]-$Sigma1[1]` + and $t1,$f,$e + bic $t2,$g,$e + add $h,$h,@X[$i&15] // h+=X[i] + orr $t1,$t1,$t2 // Ch(e,f,g) + eor $t2,$a,$b // a^b, b^c in next round + eor $t0,$t0,$T0,ror#$Sigma1[1] // Sigma1(e) + ror $T0,$a,#$Sigma0[0] + add $h,$h,$t1 // h+=Ch(e,f,g) + eor $t1,$a,$a,ror#`$Sigma0[2]-$Sigma0[1]` + add $h,$h,$t0 // h+=Sigma1(e) + and $t3,$t3,$t2 // (b^c)&=(a^b) + add $d,$d,$h // d+=h + eor $t3,$t3,$b // Maj(a,b,c) + eor $t1,$T0,$t1,ror#$Sigma0[1] // Sigma0(a) + add $h,$h,$t3 // h+=Maj(a,b,c) + ldr $t3,[$Ktbl],#$SZ // *K++, $t2 in next round + //add $h,$h,$t1 // h+=Sigma0(a) +___ +$code.=<<___ if ($i>=15); + ror $t0,$e,#$Sigma1[0] + add $h,$h,$t2 // h+=K[i] + ror $T1,@X[($j+1)&15],#$sigma0[0] + and $t1,$f,$e + ror $T2,@X[($j+14)&15],#$sigma1[0] + bic $t2,$g,$e + ror $T0,$a,#$Sigma0[0] + add $h,$h,@X[$i&15] // h+=X[i] + eor $t0,$t0,$e,ror#$Sigma1[1] + eor $T1,$T1,@X[($j+1)&15],ror#$sigma0[1] + orr $t1,$t1,$t2 // Ch(e,f,g) + eor $t2,$a,$b // a^b, b^c in next round + eor $t0,$t0,$e,ror#$Sigma1[2] // Sigma1(e) + eor $T0,$T0,$a,ror#$Sigma0[1] + add $h,$h,$t1 // h+=Ch(e,f,g) + and $t3,$t3,$t2 // (b^c)&=(a^b) + eor $T2,$T2,@X[($j+14)&15],ror#$sigma1[1] + eor $T1,$T1,@X[($j+1)&15],lsr#$sigma0[2] // sigma0(X[i+1]) + add $h,$h,$t0 // h+=Sigma1(e) + eor $t3,$t3,$b // Maj(a,b,c) + eor $t1,$T0,$a,ror#$Sigma0[2] // Sigma0(a) + eor $T2,$T2,@X[($j+14)&15],lsr#$sigma1[2] // sigma1(X[i+14]) + add @X[$j],@X[$j],@X[($j+9)&15] + add $d,$d,$h // d+=h + add $h,$h,$t3 // h+=Maj(a,b,c) + ldr $t3,[$Ktbl],#$SZ // *K++, $t2 in next round + add @X[$j],@X[$j],$T1 + add $h,$h,$t1 // h+=Sigma0(a) + add @X[$j],@X[$j],$T2 +___ + ($t2,$t3)=($t3,$t2); +} + +$code.=<<___; +#include "arm_arch.h" + +.text + +.globl $func +.type $func,%function +.align 6 +$func: +___ +$code.=<<___ if ($SZ==4); + ldr x16,.LOPENSSL_armcap_P + adr x17,.LOPENSSL_armcap_P + add x16,x16,x17 + ldr w16,[x16] + tst w16,#ARMV8_SHA256 + b.ne .Lv8_entry +___ +$code.=<<___; + stp x29,x30,[sp,#-128]! + add x29,sp,#0 + + stp x19,x20,[sp,#16] + stp x21,x22,[sp,#32] + stp x23,x24,[sp,#48] + stp x25,x26,[sp,#64] + stp x27,x28,[sp,#80] + sub sp,sp,#4*$SZ + + ldp $A,$B,[$ctx] // load context + ldp $C,$D,[$ctx,#2*$SZ] + ldp $E,$F,[$ctx,#4*$SZ] + add $num,$inp,$num,lsl#`log(16*$SZ)/log(2)` // end of input + ldp $G,$H,[$ctx,#6*$SZ] + adr $Ktbl,K$BITS + stp $ctx,$num,[x29,#96] + +.Loop: + ldp @X[0],@X[1],[$inp],#2*$SZ + ldr $t2,[$Ktbl],#$SZ // *K++ + eor $t3,$B,$C // magic seed + str $inp,[x29,#112] +___ +for ($i=0;$i<16;$i++) { &BODY_00_xx($i,@V); unshift(@V,pop(@V)); } +$code.=".Loop_16_xx:\n"; +for (;$i<32;$i++) { &BODY_00_xx($i,@V); unshift(@V,pop(@V)); } +$code.=<<___; + cbnz $t2,.Loop_16_xx + + ldp $ctx,$num,[x29,#96] + ldr $inp,[x29,#112] + sub $Ktbl,$Ktbl,#`$SZ*($rounds+1)` // rewind + + ldp @X[0],@X[1],[$ctx] + ldp @X[2],@X[3],[$ctx,#2*$SZ] + add $inp,$inp,#14*$SZ // advance input pointer + ldp @X[4],@X[5],[$ctx,#4*$SZ] + add $A,$A,@X[0] + ldp @X[6],@X[7],[$ctx,#6*$SZ] + add $B,$B,@X[1] + add $C,$C,@X[2] + add $D,$D,@X[3] + stp $A,$B,[$ctx] + add $E,$E,@X[4] + add $F,$F,@X[5] + stp $C,$D,[$ctx,#2*$SZ] + add $G,$G,@X[6] + add $H,$H,@X[7] + cmp $inp,$num + stp $E,$F,[$ctx,#4*$SZ] + stp $G,$H,[$ctx,#6*$SZ] + b.ne .Loop + + ldp x19,x20,[x29,#16] + add sp,sp,#4*$SZ + ldp x21,x22,[x29,#32] + ldp x23,x24,[x29,#48] + ldp x25,x26,[x29,#64] + ldp x27,x28,[x29,#80] + ldp x29,x30,[sp],#128 + ret +.size $func,.-$func + +.align 6 +.type K$BITS,%object +K$BITS: +___ +$code.=<<___ if ($SZ==8); + .quad 0x428a2f98d728ae22,0x7137449123ef65cd + .quad 0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc + .quad 0x3956c25bf348b538,0x59f111f1b605d019 + .quad 0x923f82a4af194f9b,0xab1c5ed5da6d8118 + .quad 0xd807aa98a3030242,0x12835b0145706fbe + .quad 0x243185be4ee4b28c,0x550c7dc3d5ffb4e2 + .quad 0x72be5d74f27b896f,0x80deb1fe3b1696b1 + .quad 0x9bdc06a725c71235,0xc19bf174cf692694 + .quad 0xe49b69c19ef14ad2,0xefbe4786384f25e3 + .quad 0x0fc19dc68b8cd5b5,0x240ca1cc77ac9c65 + .quad 0x2de92c6f592b0275,0x4a7484aa6ea6e483 + .quad 0x5cb0a9dcbd41fbd4,0x76f988da831153b5 + .quad 0x983e5152ee66dfab,0xa831c66d2db43210 + .quad 0xb00327c898fb213f,0xbf597fc7beef0ee4 + .quad 0xc6e00bf33da88fc2,0xd5a79147930aa725 + .quad 0x06ca6351e003826f,0x142929670a0e6e70 + .quad 0x27b70a8546d22ffc,0x2e1b21385c26c926 + .quad 0x4d2c6dfc5ac42aed,0x53380d139d95b3df + .quad 0x650a73548baf63de,0x766a0abb3c77b2a8 + .quad 0x81c2c92e47edaee6,0x92722c851482353b + .quad 0xa2bfe8a14cf10364,0xa81a664bbc423001 + .quad 0xc24b8b70d0f89791,0xc76c51a30654be30 + .quad 0xd192e819d6ef5218,0xd69906245565a910 + .quad 0xf40e35855771202a,0x106aa07032bbd1b8 + .quad 0x19a4c116b8d2d0c8,0x1e376c085141ab53 + .quad 0x2748774cdf8eeb99,0x34b0bcb5e19b48a8 + .quad 0x391c0cb3c5c95a63,0x4ed8aa4ae3418acb + .quad 0x5b9cca4f7763e373,0x682e6ff3d6b2b8a3 + .quad 0x748f82ee5defb2fc,0x78a5636f43172f60 + .quad 0x84c87814a1f0ab72,0x8cc702081a6439ec + .quad 0x90befffa23631e28,0xa4506cebde82bde9 + .quad 0xbef9a3f7b2c67915,0xc67178f2e372532b + .quad 0xca273eceea26619c,0xd186b8c721c0c207 + .quad 0xeada7dd6cde0eb1e,0xf57d4f7fee6ed178 + .quad 0x06f067aa72176fba,0x0a637dc5a2c898a6 + .quad 0x113f9804bef90dae,0x1b710b35131c471b + .quad 0x28db77f523047d84,0x32caab7b40c72493 + .quad 0x3c9ebe0a15c9bebc,0x431d67c49c100d4c + .quad 0x4cc5d4becb3e42b6,0x597f299cfc657e2a + .quad 0x5fcb6fab3ad6faec,0x6c44198c4a475817 + .quad 0 // terminator +___ +$code.=<<___ if ($SZ==4); + .long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 + .long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 + .long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 + .long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 + .long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc + .long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da + .long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 + .long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967 + .long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 + .long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 + .long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 + .long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070 + .long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 + .long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 + .long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 + .long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 + .long 0 //terminator +___ +$code.=<<___; +.size K$BITS,.-K$BITS +.align 3 +.LOPENSSL_armcap_P: + .quad OPENSSL_armcap_P-. +.asciz "SHA$BITS block transform for ARMv8, CRYPTOGAMS by " +.align 2 +___ + +if ($SZ==4) { +my $Ktbl="x3"; + +my ($ABCD,$EFGH,$abcd)=map("v$_.16b",(0..2)); +my @MSG=map("v$_.16b",(4..7)); +my ($W0,$W1)=("v16.4s","v17.4s"); +my ($ABCD_SAVE,$EFGH_SAVE)=("v18.16b","v19.16b"); + +$code.=<<___; +.type sha256_block_armv8,%function +.align 6 +sha256_block_armv8: +.Lv8_entry: + stp x29,x30,[sp,#-16]! + add x29,sp,#0 + + ld1.32 {$ABCD,$EFGH},[$ctx] + adr $Ktbl,K256 + +.Loop_hw: + ld1 {@MSG[0]-@MSG[3]},[$inp],#64 + sub $num,$num,#1 + ld1.32 {$W0},[$Ktbl],#16 + rev32 @MSG[0],@MSG[0] + rev32 @MSG[1],@MSG[1] + rev32 @MSG[2],@MSG[2] + rev32 @MSG[3],@MSG[3] + orr $ABCD_SAVE,$ABCD,$ABCD // offload + orr $EFGH_SAVE,$EFGH,$EFGH +___ +for($i=0;$i<12;$i++) { +$code.=<<___; + ld1.32 {$W1},[$Ktbl],#16 + add.i32 $W0,$W0,@MSG[0] + sha256su0 @MSG[0],@MSG[1] + orr $abcd,$ABCD,$ABCD + sha256h $ABCD,$EFGH,$W0 + sha256h2 $EFGH,$abcd,$W0 + sha256su1 @MSG[0],@MSG[2],@MSG[3] +___ + ($W0,$W1)=($W1,$W0); push(@MSG,shift(@MSG)); +} +$code.=<<___; + ld1.32 {$W1},[$Ktbl],#16 + add.i32 $W0,$W0,@MSG[0] + orr $abcd,$ABCD,$ABCD + sha256h $ABCD,$EFGH,$W0 + sha256h2 $EFGH,$abcd,$W0 + + ld1.32 {$W0},[$Ktbl],#16 + add.i32 $W1,$W1,@MSG[1] + orr $abcd,$ABCD,$ABCD + sha256h $ABCD,$EFGH,$W1 + sha256h2 $EFGH,$abcd,$W1 + + ld1.32 {$W1},[$Ktbl] + add.i32 $W0,$W0,@MSG[2] + sub $Ktbl,$Ktbl,#$rounds*$SZ-16 // rewind + orr $abcd,$ABCD,$ABCD + sha256h $ABCD,$EFGH,$W0 + sha256h2 $EFGH,$abcd,$W0 + + add.i32 $W1,$W1,@MSG[3] + orr $abcd,$ABCD,$ABCD + sha256h $ABCD,$EFGH,$W1 + sha256h2 $EFGH,$abcd,$W1 + + add.i32 $ABCD,$ABCD,$ABCD_SAVE + add.i32 $EFGH,$EFGH,$EFGH_SAVE + + cbnz $num,.Loop_hw + + st1.32 {$ABCD,$EFGH},[$ctx] + + ldr x29,[sp],#16 + ret +.size sha256_block_armv8,.-sha256_block_armv8 +___ +} + +$code.=<<___; +.comm OPENSSL_armcap_P,4,4 +___ + +{ my %opcode = ( + "sha256h" => 0x5e004000, "sha256h2" => 0x5e005000, + "sha256su0" => 0x5e282800, "sha256su1" => 0x5e006000 ); + + sub unsha256 { + my ($mnemonic,$arg)=@_; + + $arg =~ m/[qv]([0-9]+)[^,]*,\s*[qv]([0-9]+)[^,]*(?:,\s*[qv]([0-9]+))?/o + && + sprintf ".inst\t0x%08x\t//%s %s", + $opcode{$mnemonic}|$1|($2<<5)|($3<<16), + $mnemonic,$arg; + } +} + +foreach(split("\n",$code)) { + + s/\`([^\`]*)\`/eval($1)/geo; + + s/\b(sha256\w+)\s+([qv].*)/unsha256($1,$2)/geo; + + s/\.\w?32\b//o and s/\.16b/\.4s/go; + m/(ld|st)1[^\[]+\[0\]/o and s/\.4s/\.s/go; + + print $_,"\n"; +} + +close STDOUT; diff --git a/deps/openssl/openssl/crypto/sha/asm/sha512-ia64.pl b/deps/openssl/openssl/crypto/sha/asm/sha512-ia64.pl index 1c6ce56522ed2d..59f889a095941a 100755 --- a/deps/openssl/openssl/crypto/sha/asm/sha512-ia64.pl +++ b/deps/openssl/openssl/crypto/sha/asm/sha512-ia64.pl @@ -1,7 +1,7 @@ #!/usr/bin/env perl # # ==================================================================== -# Written by Andy Polyakov for the OpenSSL +# Written by Andy Polyakov for the OpenSSL # project. The module is, however, dual licensed under OpenSSL and # CRYPTOGAMS licenses depending on where you obtain it. For further # details see http://www.openssl.org/~appro/cryptogams/. @@ -48,16 +48,22 @@ # because on Itanium 1 stall on MM result is accompanied by # pipeline flush, which takes 6 cycles:-( # -# Resulting performance numbers for 900MHz Itanium 2 system: +# June 2012 # -# The 'numbers' are in 1000s of bytes per second processed. -# type 16 bytes 64 bytes 256 bytes 1024 bytes 8192 bytes -# sha1(*) 6210.14k 20376.30k 52447.83k 85870.05k 105478.12k -# sha256 7476.45k 20572.05k 41538.34k 56062.29k 62093.18k -# sha512 4996.56k 20026.28k 47597.20k 85278.79k 111501.31k +# Improve performance by 15-20%. Note about "rules of engagement" +# above. Contemporary cores are equipped with additional shifter, +# so that they should perform even better than below, presumably +# by ~10%. # -# (*) SHA1 numbers are for HP-UX compiler and are presented purely -# for reference purposes. I bet it can improved too... +###################################################################### +# Current performance in cycles per processed byte for Itanium 2 +# pre-9000 series [little-endian] system: +# +# SHA1(*) 5.7 +# SHA256 12.6 +# SHA512 6.7 +# +# (*) SHA1 result is presented purely for reference purposes. # # To generate code, pass the file name with either 256 or 512 in its # name and compiler flags. @@ -106,8 +112,8 @@ { $big_endian=(unpack('L',pack('N',1))==1); } $code=<<___; -.ident \"$output, version 1.1\" -.ident \"IA-64 ISA artwork by Andy Polyakov \" +.ident \"$output, version 2.0\" +.ident \"IA-64 ISA artwork by Andy Polyakov \" .explicit .text @@ -115,26 +121,25 @@ lcsave=r3; prsave=r14; K=r15; -A=r16; B=r17; C=r18; D=r19; -E=r20; F=r21; G=r22; H=r23; +A_=r16; B_=r17; C_=r18; D_=r19; +E_=r20; F_=r21; G_=r22; H_=r23; T1=r24; T2=r25; s0=r26; s1=r27; t0=r28; t1=r29; Ktbl=r30; ctx=r31; // 1st arg -input=r48; // 2nd arg -num=r49; // 3rd arg -sgm0=r50; sgm1=r51; // small constants -A_=r54; B_=r55; C_=r56; D_=r57; -E_=r58; F_=r59; G_=r60; H_=r61; +input=r56; // 2nd arg +num=r57; // 3rd arg +sgm0=r58; sgm1=r59; // small constants // void $func (SHA_CTX *ctx, const void *in,size_t num[,int host]) .global $func# .proc $func# .align 32 +.skip 16 $func: .prologue .save ar.pfs,pfssave -{ .mmi; alloc pfssave=ar.pfs,3,27,0,16 +{ .mmi; alloc pfssave=ar.pfs,3,25,0,24 $ADDP ctx=0,r32 // 1st arg .save ar.lc,lcsave mov lcsave=ar.lc } @@ -145,11 +150,9 @@ .body { .mib; add r8=0*$SZ,ctx - add r9=1*$SZ,ctx - brp.loop.imp .L_first16,.L_first16_end-16 } + add r9=1*$SZ,ctx } { .mib; add r10=2*$SZ,ctx - add r11=3*$SZ,ctx - brp.loop.imp .L_rest,.L_rest_end-16 };; + add r11=3*$SZ,ctx };; // load A-H .Lpic_point: @@ -164,7 +167,7 @@ add Ktbl=($TABLE#-.Lpic_point),Ktbl } { .mmi; $LDW G_=[r10] $LDW H_=[r11] - cmp.ne p0,p16=0,r0 };; // used in sha256_block + cmp.ne p0,p16=0,r0 };; ___ $code.=<<___ if ($BITS==64); { .mii; and r8=7,input @@ -179,50 +182,26 @@ ___ $code.=<<___; .L_outer: -.rotr X[16] -{ .mmi; mov A=A_ - mov B=B_ +.rotr R[8],X[16] +A=R[0]; B=R[1]; C=R[2]; D=R[3]; E=R[4]; F=R[5]; G=R[6]; H=R[7] +{ .mmi; ld1 X[15]=[input],$SZ // eliminated in sha512 + mov A=A_ mov ar.lc=14 } -{ .mmi; mov C=C_ - mov D=D_ - mov E=E_ } -{ .mmi; mov F=F_ - mov G=G_ - mov ar.ec=2 } -{ .mmi; ld1 X[15]=[input],$SZ // eliminated in 64-bit +{ .mmi; mov B=B_ + mov C=C_ + mov D=D_ } +{ .mmi; mov E=E_ + mov F=F_ + mov ar.ec=2 };; +{ .mmi; mov G=G_ mov H=H_ - mov sgm1=$sigma1[2] };; - -___ -$t0="t0", $t1="t1", $code.=<<___ if ($BITS==32); -.align 32 -.L_first16: -{ .mmi; add r9=1-$SZ,input - add r10=2-$SZ,input - add r11=3-$SZ,input };; -{ .mmi; ld1 r9=[r9] - ld1 r10=[r10] - dep.z $t1=E,32,32 } -{ .mmi; $LDW K=[Ktbl],$SZ - ld1 r11=[r11] - zxt4 E=E };; -{ .mii; or $t1=$t1,E - dep X[15]=X[15],r9,8,8 - dep r11=r10,r11,8,8 };; -{ .mmi; and T1=F,E - and T2=A,B - dep X[15]=X[15],r11,16,16 } -{ .mmi; andcm r8=G,E - and r9=A,C - mux2 $t0=A,0x44 };; // copy lower half to upper -{ .mmi; (p16) ld1 X[15-1]=[input],$SZ // prefetch - xor T1=T1,r8 // T1=((e & f) ^ (~e & g)) - _rotr r11=$t1,$Sigma1[0] } // ROTR(e,14) -{ .mib; and r10=B,C - xor T2=T2,r9 };; + mov sgm1=$sigma1[2] } +{ .mib; mov r8=0 + add r9=1-$SZ,input + brp.loop.imp .L_first16,.L_first16_end-16 };; ___ $t0="A", $t1="E", $code.=<<___ if ($BITS==64); -// in 64-bit mode I load whole X[16] at once and take care of alignment... +// in sha512 case I load whole X[16] at once and take care of alignment... { .mmi; add r8=1*$SZ,input add r9=2*$SZ,input add r10=3*$SZ,input };; @@ -248,7 +227,9 @@ $LDW X[ 2]=[r8],4*$SZ (p15) br.cond.dpnt.many .L7byte };; { .mmb; $LDW X[ 1]=[r9],4*$SZ - $LDW X[ 0]=[r10],4*$SZ + $LDW X[ 0]=[r10],4*$SZ } +{ .mib; mov r8=0 + mux1 X[15]=X[15],\@rev // eliminated on big-endian br.many .L_first16 };; .L1byte: { .mmi; $LDW X[13]=[r9],4*$SZ @@ -281,7 +262,9 @@ shrp X[ 3]=X[ 3],X[ 2],56 } { .mii; shrp X[ 2]=X[ 2],X[ 1],56 shrp X[ 1]=X[ 1],X[ 0],56 } -{ .mib; shrp X[ 0]=X[ 0],T1,56 +{ .mib; shrp X[ 0]=X[ 0],T1,56 } +{ .mib; mov r8=0 + mux1 X[15]=X[15],\@rev // eliminated on big-endian br.many .L_first16 };; .L2byte: { .mmi; $LDW X[11]=[input],4*$SZ @@ -313,7 +296,9 @@ shrp X[ 2]=X[ 2],X[ 1],48 } { .mii; shrp X[ 1]=X[ 1],X[ 0],48 shrp X[ 0]=X[ 0],T1,48 } -{ .mfb; br.many .L_first16 };; +{ .mib; mov r8=0 + mux1 X[15]=X[15],\@rev // eliminated on big-endian + br.many .L_first16 };; .L3byte: { .mmi; $LDW X[ 9]=[r9],4*$SZ $LDW X[ 8]=[r10],4*$SZ @@ -341,7 +326,9 @@ shrp X[ 3]=X[ 3],X[ 2],40 } { .mii; shrp X[ 2]=X[ 2],X[ 1],40 shrp X[ 1]=X[ 1],X[ 0],40 } -{ .mib; shrp X[ 0]=X[ 0],T1,40 +{ .mib; shrp X[ 0]=X[ 0],T1,40 } +{ .mib; mov r8=0 + mux1 X[15]=X[15],\@rev // eliminated on big-endian br.many .L_first16 };; .L4byte: { .mmi; $LDW X[ 7]=[input],4*$SZ @@ -369,7 +356,9 @@ shrp X[ 2]=X[ 2],X[ 1],32 } { .mii; shrp X[ 1]=X[ 1],X[ 0],32 shrp X[ 0]=X[ 0],T1,32 } -{ .mfb; br.many .L_first16 };; +{ .mib; mov r8=0 + mux1 X[15]=X[15],\@rev // eliminated on big-endian + br.many .L_first16 };; .L5byte: { .mmi; $LDW X[ 5]=[r9],4*$SZ $LDW X[ 4]=[r10],4*$SZ @@ -393,7 +382,9 @@ shrp X[ 3]=X[ 3],X[ 2],24 } { .mii; shrp X[ 2]=X[ 2],X[ 1],24 shrp X[ 1]=X[ 1],X[ 0],24 } -{ .mib; shrp X[ 0]=X[ 0],T1,24 +{ .mib; shrp X[ 0]=X[ 0],T1,24 } +{ .mib; mov r8=0 + mux1 X[15]=X[15],\@rev // eliminated on big-endian br.many .L_first16 };; .L6byte: { .mmi; $LDW X[ 3]=[input],4*$SZ @@ -417,7 +408,9 @@ shrp X[ 2]=X[ 2],X[ 1],16 } { .mii; shrp X[ 1]=X[ 1],X[ 0],16 shrp X[ 0]=X[ 0],T1,16 } -{ .mfb; br.many .L_first16 };; +{ .mib; mov r8=0 + mux1 X[15]=X[15],\@rev // eliminated on big-endian + br.many .L_first16 };; .L7byte: { .mmi; $LDW X[ 1]=[r9],4*$SZ $LDW X[ 0]=[r10],4*$SZ @@ -437,128 +430,146 @@ shrp X[ 3]=X[ 3],X[ 2],8 } { .mii; shrp X[ 2]=X[ 2],X[ 1],8 shrp X[ 1]=X[ 1],X[ 0],8 } -{ .mib; shrp X[ 0]=X[ 0],T1,8 - br.many .L_first16 };; +{ .mib; shrp X[ 0]=X[ 0],T1,8 } +{ .mib; mov r8=0 + mux1 X[15]=X[15],\@rev };; // eliminated on big-endian .align 32 .L_first16: { .mmi; $LDW K=[Ktbl],$SZ - and T1=F,E - and T2=A,B } -{ .mmi; //$LDW X[15]=[input],$SZ // X[i]=*input++ + add A=A,r8 // H+=Sigma(0) from the past + _rotr r10=$t1,$Sigma1[0] } // ROTR(e,14) +{ .mmi; and T1=F,E andcm r8=G,E - and r9=A,C };; -{ .mmi; xor T1=T1,r8 //T1=((e & f) ^ (~e & g)) - and r10=B,C - _rotr r11=$t1,$Sigma1[0] } // ROTR(e,14) -{ .mmi; xor T2=T2,r9 - mux1 X[15]=X[15],\@rev };; // eliminated in big-endian + (p16) mux1 X[14]=X[14],\@rev };; // eliminated on big-endian +{ .mmi; and T2=A,B + and r9=A,C + _rotr r11=$t1,$Sigma1[1] } // ROTR(e,41) +{ .mmi; xor T1=T1,r8 // T1=((e & f) ^ (~e & g)) + and r8=B,C };; +___ +$t0="t0", $t1="t1", $code.=<<___ if ($BITS==32); +.align 32 +.L_first16: +{ .mmi; add A=A,r8 // H+=Sigma(0) from the past + add r10=2-$SZ,input + add r11=3-$SZ,input };; +{ .mmi; ld1 r9=[r9] + ld1 r10=[r10] + dep.z $t1=E,32,32 } +{ .mmi; ld1 r11=[r11] + $LDW K=[Ktbl],$SZ + zxt4 E=E };; +{ .mii; or $t1=$t1,E + dep X[15]=X[15],r9,8,8 + mux2 $t0=A,0x44 };; // copy lower half to upper +{ .mmi; and T1=F,E + andcm r8=G,E + dep r11=r10,r11,8,8 };; +{ .mmi; and T2=A,B + and r9=A,C + dep X[15]=X[15],r11,16,16 };; +{ .mmi; (p16) ld1 X[15-1]=[input],$SZ // prefetch + xor T1=T1,r8 // T1=((e & f) ^ (~e & g)) + _rotr r10=$t1,$Sigma1[0] } // ROTR(e,14) +{ .mmi; and r8=B,C + _rotr r11=$t1,$Sigma1[1] };; // ROTR(e,18) ___ $code.=<<___; -{ .mib; add T1=T1,H // T1=Ch(e,f,g)+h - _rotr r8=$t1,$Sigma1[1] } // ROTR(e,18) -{ .mib; xor T2=T2,r10 // T2=((a & b) ^ (a & c) ^ (b & c)) - mov H=G };; -{ .mib; xor r11=r8,r11 - _rotr r9=$t1,$Sigma1[2] } // ROTR(e,41) -{ .mib; mov G=F - mov F=E };; -{ .mib; xor r9=r9,r11 // r9=Sigma1(e) - _rotr r10=$t0,$Sigma0[0] } // ROTR(a,28) -{ .mib; add T1=T1,K // T1=Ch(e,f,g)+h+K512[i] - mov E=D };; -{ .mib; add T1=T1,r9 // T1+=Sigma1(e) - _rotr r11=$t0,$Sigma0[1] } // ROTR(a,34) -{ .mib; mov D=C - mov C=B };; -{ .mib; add T1=T1,X[15] // T1+=X[i] - _rotr r8=$t0,$Sigma0[2] } // ROTR(a,39) -{ .mib; xor r10=r10,r11 - mux2 X[15]=X[15],0x44 };; // eliminated in 64-bit -{ .mmi; xor r10=r8,r10 // r10=Sigma0(a) - mov B=A - add A=T1,T2 };; -{ .mib; add E=E,T1 - add A=A,r10 // T2=Maj(a,b,c)+Sigma0(a) - br.ctop.sptk .L_first16 };; +{ .mmi; add T1=T1,H // T1=Ch(e,f,g)+h + xor r10=r10,r11 + _rotr r11=$t1,$Sigma1[2] } // ROTR(e,41) +{ .mmi; xor T2=T2,r9 + add K=K,X[15] };; +{ .mmi; add T1=T1,K // T1+=K[i]+X[i] + xor T2=T2,r8 // T2=((a & b) ^ (a & c) ^ (b & c)) + _rotr r8=$t0,$Sigma0[0] } // ROTR(a,28) +{ .mmi; xor r11=r11,r10 // Sigma1(e) + _rotr r9=$t0,$Sigma0[1] };; // ROTR(a,34) +{ .mmi; add T1=T1,r11 // T+=Sigma1(e) + xor r8=r8,r9 + _rotr r9=$t0,$Sigma0[2] };; // ROTR(a,39) +{ .mmi; xor r8=r8,r9 // Sigma0(a) + add D=D,T1 + mux2 H=X[15],0x44 } // mov H=X[15] in sha512 +{ .mib; (p16) add r9=1-$SZ,input // not used in sha512 + add X[15]=T1,T2 // H=T1+Maj(a,b,c) + br.ctop.sptk .L_first16 };; .L_first16_end: -{ .mii; mov ar.lc=$rounds-17 - mov ar.ec=1 };; +{ .mib; mov ar.lc=$rounds-17 + brp.loop.imp .L_rest,.L_rest_end-16 } +{ .mib; mov ar.ec=1 + br.many .L_rest };; .align 32 .L_rest: -.rotr X[16] -{ .mib; $LDW K=[Ktbl],$SZ +{ .mmi; $LDW K=[Ktbl],$SZ + add A=A,r8 // H+=Sigma0(a) from the past _rotr r8=X[15-1],$sigma0[0] } // ROTR(s0,1) -{ .mib; $ADD X[15]=X[15],X[15-9] // X[i&0xF]+=X[(i+9)&0xF] - $SHRU s0=X[15-1],sgm0 };; // s0=X[(i+1)&0xF]>>7 +{ .mmi; add X[15]=X[15],X[15-9] // X[i&0xF]+=X[(i+9)&0xF] + $SHRU s0=X[15-1],sgm0 };; // s0=X[(i+1)&0xF]>>7 { .mib; and T1=F,E _rotr r9=X[15-1],$sigma0[1] } // ROTR(s0,8) { .mib; andcm r10=G,E - $SHRU s1=X[15-14],sgm1 };; // s1=X[(i+14)&0xF]>>6 + $SHRU s1=X[15-14],sgm1 };; // s1=X[(i+14)&0xF]>>6 +// Pair of mmi; splits on Itanium 1 and prevents pipeline flush +// upon $SHRU output usage { .mmi; xor T1=T1,r10 // T1=((e & f) ^ (~e & g)) xor r9=r8,r9 - _rotr r10=X[15-14],$sigma1[0] };;// ROTR(s1,19) -{ .mib; and T2=A,B - _rotr r11=X[15-14],$sigma1[1] }// ROTR(s1,61) -{ .mib; and r8=A,C };; + _rotr r10=X[15-14],$sigma1[0] }// ROTR(s1,19) +{ .mmi; and T2=A,B + and r8=A,C + _rotr r11=X[15-14],$sigma1[1] };;// ROTR(s1,61) ___ $t0="t0", $t1="t1", $code.=<<___ if ($BITS==32); -// I adhere to mmi; in order to hold Itanium 1 back and avoid 6 cycle -// pipeline flush in last bundle. Note that even on Itanium2 the -// latter stalls for one clock cycle... -{ .mmi; xor s0=s0,r9 // s0=sigma0(X[(i+1)&0xF]) - dep.z $t1=E,32,32 } -{ .mmi; xor r10=r11,r10 - zxt4 E=E };; -{ .mmi; or $t1=$t1,E - xor s1=s1,r10 // s1=sigma1(X[(i+14)&0xF]) - mux2 $t0=A,0x44 };; // copy lower half to upper +{ .mib; xor s0=s0,r9 // s0=sigma0(X[(i+1)&0xF]) + dep.z $t1=E,32,32 } +{ .mib; xor r10=r11,r10 + zxt4 E=E };; +{ .mii; xor s1=s1,r10 // s1=sigma1(X[(i+14)&0xF]) + shrp r9=E,$t1,32+$Sigma1[0] // ROTR(e,14) + mux2 $t0=A,0x44 };; // copy lower half to upper +// Pair of mmi; splits on Itanium 1 and prevents pipeline flush +// upon mux2 output usage { .mmi; xor T2=T2,r8 - _rotr r9=$t1,$Sigma1[0] } // ROTR(e,14) + shrp r8=E,$t1,32+$Sigma1[1]} // ROTR(e,18) { .mmi; and r10=B,C add T1=T1,H // T1=Ch(e,f,g)+h - $ADD X[15]=X[15],s0 };; // X[i&0xF]+=sigma0(X[(i+1)&0xF]) + or $t1=$t1,E };; ___ $t0="A", $t1="E", $code.=<<___ if ($BITS==64); { .mib; xor s0=s0,r9 // s0=sigma0(X[(i+1)&0xF]) - _rotr r9=$t1,$Sigma1[0] } // ROTR(e,14) + _rotr r9=$t1,$Sigma1[0] } // ROTR(e,14) { .mib; xor r10=r11,r10 - xor T2=T2,r8 };; + xor T2=T2,r8 };; { .mib; xor s1=s1,r10 // s1=sigma1(X[(i+14)&0xF]) - add T1=T1,H } + _rotr r8=$t1,$Sigma1[1] } // ROTR(e,18) { .mib; and r10=B,C - $ADD X[15]=X[15],s0 };; // X[i&0xF]+=sigma0(X[(i+1)&0xF]) + add T1=T1,H };; // T1+=H ___ $code.=<<___; -{ .mmi; xor T2=T2,r10 // T2=((a & b) ^ (a & c) ^ (b & c)) - mov H=G - _rotr r8=$t1,$Sigma1[1] };; // ROTR(e,18) -{ .mmi; xor r11=r8,r9 - $ADD X[15]=X[15],s1 // X[i&0xF]+=sigma1(X[(i+14)&0xF]) - _rotr r9=$t1,$Sigma1[2] } // ROTR(e,41) -{ .mmi; mov G=F - mov F=E };; -{ .mib; xor r9=r9,r11 // r9=Sigma1(e) - _rotr r10=$t0,$Sigma0[0] } // ROTR(a,28) -{ .mib; add T1=T1,K // T1=Ch(e,f,g)+h+K512[i] - mov E=D };; -{ .mib; add T1=T1,r9 // T1+=Sigma1(e) - _rotr r11=$t0,$Sigma0[1] } // ROTR(a,34) -{ .mib; mov D=C - mov C=B };; -{ .mmi; add T1=T1,X[15] // T1+=X[i] - xor r10=r10,r11 - _rotr r8=$t0,$Sigma0[2] };; // ROTR(a,39) -{ .mmi; xor r10=r8,r10 // r10=Sigma0(a) - mov B=A - add A=T1,T2 };; -{ .mib; add E=E,T1 - add A=A,r10 // T2=Maj(a,b,c)+Sigma0(a) - br.ctop.sptk .L_rest };; +{ .mib; xor r9=r9,r8 + _rotr r8=$t1,$Sigma1[2] } // ROTR(e,41) +{ .mib; xor T2=T2,r10 // T2=((a & b) ^ (a & c) ^ (b & c)) + add X[15]=X[15],s0 };; // X[i]+=sigma0(X[i+1]) +{ .mmi; xor r9=r9,r8 // Sigma1(e) + add X[15]=X[15],s1 // X[i]+=sigma0(X[i+14]) + _rotr r8=$t0,$Sigma0[0] };; // ROTR(a,28) +{ .mmi; add K=K,X[15] + add T1=T1,r9 // T1+=Sigma1(e) + _rotr r9=$t0,$Sigma0[1] };; // ROTR(a,34) +{ .mmi; add T1=T1,K // T1+=K[i]+X[i] + xor r8=r8,r9 + _rotr r9=$t0,$Sigma0[2] };; // ROTR(a,39) +{ .mib; add D=D,T1 + mux2 H=X[15],0x44 } // mov H=X[15] in sha512 +{ .mib; xor r8=r8,r9 // Sigma0(a) + add X[15]=T1,T2 // H=T1+Maj(a,b,c) + br.ctop.sptk .L_rest };; .L_rest_end: +{ .mmi; add A=A,r8 };; // H+=Sigma0(a) from the past { .mmi; add A_=A_,A add B_=B_,B add C_=C_,C } @@ -590,17 +601,19 @@ .endp $func# ___ -$code =~ s/\`([^\`]*)\`/eval $1/gem; -$code =~ s/_rotr(\s+)([^=]+)=([^,]+),([0-9]+)/shrp$1$2=$3,$3,$4/gm; -if ($BITS==64) { - $code =~ s/mux2(\s+)\S+/nop.i$1 0x0/gm; - $code =~ s/mux1(\s+)\S+/nop.i$1 0x0/gm if ($big_endian); - $code =~ s/(shrp\s+X\[[^=]+)=([^,]+),([^,]+),([1-9]+)/$1=$3,$2,64-$4/gm +foreach(split($/,$code)) { + s/\`([^\`]*)\`/eval $1/gem; + s/_rotr(\s+)([^=]+)=([^,]+),([0-9]+)/shrp$1$2=$3,$3,$4/gm; + if ($BITS==64) { + s/mux2(\s+)([^=]+)=([^,]+),\S+/mov$1 $2=$3/gm; + s/mux1(\s+)\S+/nop.i$1 0x0/gm if ($big_endian); + s/(shrp\s+X\[[^=]+)=([^,]+),([^,]+),([1-9]+)/$1=$3,$2,64-$4/gm if (!$big_endian); - $code =~ s/ld1(\s+)X\[\S+/nop.m$1 0x0/gm; -} + s/ld1(\s+)X\[\S+/nop.m$1 0x0/gm; + } -print $code; + print $_,"\n"; +} print<<___ if ($BITS==32); .align 64 diff --git a/deps/openssl/openssl/crypto/sha/asm/sha512-mips.pl b/deps/openssl/openssl/crypto/sha/asm/sha512-mips.pl index 6807a2c7220b44..b468cfb4569e96 100644 --- a/deps/openssl/openssl/crypto/sha/asm/sha512-mips.pl +++ b/deps/openssl/openssl/crypto/sha/asm/sha512-mips.pl @@ -1,7 +1,7 @@ #!/usr/bin/env perl # ==================================================================== -# Written by Andy Polyakov for the OpenSSL +# Written by Andy Polyakov for the OpenSSL # project. The module is, however, dual licensed under OpenSSL and # CRYPTOGAMS licenses depending on where you obtain it. For further # details see http://www.openssl.org/~appro/cryptogams/. @@ -17,6 +17,10 @@ # ~17%, but it comes for free, because it's same instruction sequence. # Improvement coefficients are for aligned input. +# September 2012. +# +# Add MIPS[32|64]R2 code (>25% less instructions). + ###################################################################### # There is a number of MIPS ABI in use, O32 and N32/64 are most # widely used. Then there is a new contender: NUBI. It appears that if @@ -45,7 +49,7 @@ # ($s0,$s1,$s2,$s3,$s4,$s5,$s6,$s7)=map("\$$_",(16..23)); # ($gp,$sp,$fp,$ra)=map("\$$_",(28..31)); # -$flavour = shift; # supported flavours are o32,n32,64,nubi32,nubi64 +$flavour = shift || "o32"; # supported flavours are o32,n32,64,nubi32,nubi64 if ($flavour =~ /64|n32/i) { $PTR_ADD="dadd"; # incidentally works even on n32 @@ -83,6 +87,7 @@ $SLL="dsll"; # shift left logical $SRL="dsrl"; # shift right logical $ADDU="daddu"; + $ROTR="drotr"; @Sigma0=(28,34,39); @Sigma1=(14,18,41); @sigma0=( 7, 1, 8); # right shift first @@ -97,6 +102,7 @@ $SLL="sll"; # shift left logical $SRL="srl"; # shift right logical $ADDU="addu"; + $ROTR="rotr"; @Sigma0=( 2,13,22); @Sigma1=( 6,11,25); @sigma0=( 3, 7,18); # right shift first @@ -124,6 +130,10 @@ sub BODY_00_15 { ${LD}r @X[1],`($i+1)*$SZ+$LSB`($inp) ___ $code.=<<___ if (!$big_endian && $i<16 && $SZ==4); +#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2) + wsbh @X[0],@X[0] # byte swap($i) + rotr @X[0],@X[0],16 +#else srl $tmp0,@X[0],24 # byte swap($i) srl $tmp1,@X[0],8 andi $tmp2,@X[0],0xFF00 @@ -133,8 +143,13 @@ sub BODY_00_15 { or @X[0],$tmp0 or $tmp1,$tmp2 or @X[0],$tmp1 +#endif ___ $code.=<<___ if (!$big_endian && $i<16 && $SZ==8); +#if defined(_MIPS_ARCH_MIPS64R2) + dsbh @X[0],@X[0] # byte swap($i) + dshd @X[0],@X[0] +#else ori $tmp0,$zero,0xFF dsll $tmp2,$tmp0,32 or $tmp0,$tmp2 # 0x000000FF000000FF @@ -153,8 +168,31 @@ sub BODY_00_15 { dsrl $tmp1,@X[0],32 dsll @X[0],32 or @X[0],$tmp1 +#endif ___ $code.=<<___; +#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2) + xor $tmp2,$f,$g # $i + $ROTR $tmp0,$e,@Sigma1[0] + $ADDU $T1,$X[0],$h + $ROTR $tmp1,$e,@Sigma1[1] + and $tmp2,$e + $ROTR $h,$e,@Sigma1[2] + xor $tmp0,$tmp1 + $ROTR $tmp1,$a,@Sigma0[0] + xor $tmp2,$g # Ch(e,f,g) + xor $tmp0,$h # Sigma1(e) + + $ROTR $h,$a,@Sigma0[1] + $ADDU $T1,$tmp2 + $LD $tmp2,`$i*$SZ`($Ktbl) # K[$i] + xor $h,$tmp1 + $ROTR $tmp1,$a,@Sigma0[2] + $ADDU $T1,$tmp0 + and $tmp0,$b,$c + xor $h,$tmp1 # Sigma0(a) + xor $tmp1,$b,$c +#else $ADDU $T1,$X[0],$h # $i $SRL $h,$e,@Sigma1[0] xor $tmp2,$f,$g @@ -184,16 +222,15 @@ sub BODY_00_15 { xor $h,$tmp1 $SLL $tmp1,$a,`$SZ*8-@Sigma0[0]` xor $h,$tmp0 - $ST @X[0],`($i%16)*$SZ`($sp) # offload to ring buffer + and $tmp0,$b,$c xor $h,$tmp1 # Sigma0(a) - - or $tmp0,$a,$b - and $tmp1,$a,$b - and $tmp0,$c - or $tmp1,$tmp0 # Maj(a,b,c) + xor $tmp1,$b,$c +#endif + $ST @X[0],`($i%16)*$SZ`($sp) # offload to ring buffer + $ADDU $h,$tmp0 + and $tmp1,$a $ADDU $T1,$tmp2 # +=K[$i] - $ADDU $h,$tmp1 - + $ADDU $h,$tmp1 # +=Maj(a,b,c) $ADDU $d,$T1 $ADDU $h,$T1 ___ @@ -207,6 +244,20 @@ sub BODY_16_XX { my ($tmp0,$tmp1,$tmp2,$tmp3)=(@X[4],@X[5],@X[6],@X[7]); $code.=<<___; +#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2) + $SRL $tmp2,@X[1],@sigma0[0] # Xupdate($i) + $ROTR $tmp0,@X[1],@sigma0[1] + $ADDU @X[0],@X[9] # +=X[i+9] + xor $tmp2,$tmp0 + $ROTR $tmp0,@X[1],@sigma0[2] + + $SRL $tmp3,@X[14],@sigma1[0] + $ROTR $tmp1,@X[14],@sigma1[1] + xor $tmp2,$tmp0 # sigma0(X[i+1]) + $ROTR $tmp0,@X[14],@sigma1[2] + xor $tmp3,$tmp1 + $ADDU @X[0],$tmp2 +#else $SRL $tmp2,@X[1],@sigma0[0] # Xupdate($i) $ADDU @X[0],@X[9] # +=X[i+9] $SLL $tmp1,@X[1],`$SZ*8-@sigma0[2]` @@ -227,7 +278,7 @@ sub BODY_16_XX { xor $tmp3,$tmp0 $SRL $tmp0,@X[14],@sigma1[2] xor $tmp3,$tmp1 - +#endif xor $tmp3,$tmp0 # sigma1(X[i+14]) $ADDU @X[0],$tmp3 ___ @@ -242,9 +293,13 @@ sub BODY_16_XX { # include #endif +#if defined(__mips_smartmips) && !defined(_MIPS_ARCH_MIPS32R2) +#define _MIPS_ARCH_MIPS32R2 +#endif + .text .set noat -#if !defined(__vxworks) || defined(__pic__) +#if !defined(__mips_eabi) && (!defined(__vxworks) || defined(__pic__)) .option pic2 #endif diff --git a/deps/openssl/openssl/crypto/sha/asm/sha512-ppc.pl b/deps/openssl/openssl/crypto/sha/asm/sha512-ppc.pl index 6b44a68e599e5f..734f3c1ca0f092 100755 --- a/deps/openssl/openssl/crypto/sha/asm/sha512-ppc.pl +++ b/deps/openssl/openssl/crypto/sha/asm/sha512-ppc.pl @@ -1,7 +1,7 @@ #!/usr/bin/env perl # ==================================================================== -# Written by Andy Polyakov for the OpenSSL +# Written by Andy Polyakov for the OpenSSL # project. The module is, however, dual licensed under OpenSSL and # CRYPTOGAMS licenses depending on where you obtain it. For further # details see http://www.openssl.org/~appro/cryptogams/. @@ -9,8 +9,7 @@ # I let hardware handle unaligned input, except on page boundaries # (see below for details). Otherwise straightforward implementation -# with X vector in register bank. The module is big-endian [which is -# not big deal as there're no little-endian targets left around]. +# with X vector in register bank. # sha256 | sha512 # -m64 -m32 | -m64 -m32 @@ -56,6 +55,8 @@ $PUSH="stw"; } else { die "nonsense $flavour"; } +$LITTLE_ENDIAN = ($flavour=~/le$/) ? $SIZE_T : 0; + $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; ( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or ( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or @@ -64,7 +65,7 @@ open STDOUT,"| $^X $xlate $flavour $output" || die "can't call $xlate: $!"; if ($output =~ /512/) { - $func="sha512_block_data_order"; + $func="sha512_block_ppc"; $SZ=8; @Sigma0=(28,34,39); @Sigma1=(14,18,41); @@ -76,7 +77,7 @@ $ROR="rotrdi"; $SHR="srdi"; } else { - $func="sha256_block_data_order"; + $func="sha256_block_ppc"; $SZ=4; @Sigma0=( 2,13,22); @Sigma1=( 6,11,25); @@ -110,7 +111,7 @@ $C ="r10"; $D ="r11"; $E ="r12"; -$F ="r13"; $F="r2" if ($SIZE_T==8);# reassigned to exempt TLS pointer +$F =$t1; $t1 = "r0"; # stay away from "r13"; $G ="r14"; $H ="r15"; @@ -118,24 +119,23 @@ @X=("r16","r17","r18","r19","r20","r21","r22","r23", "r24","r25","r26","r27","r28","r29","r30","r31"); -$inp="r31"; # reassigned $inp! aliases with @X[15] +$inp="r31" if($SZ==4 || $SIZE_T==8); # reassigned $inp! aliases with @X[15] sub ROUND_00_15 { my ($i,$a,$b,$c,$d,$e,$f,$g,$h)=@_; $code.=<<___; - $LD $T,`$i*$SZ`($Tbl) $ROR $a0,$e,$Sigma1[0] $ROR $a1,$e,$Sigma1[1] and $t0,$f,$e - andc $t1,$g,$e - add $T,$T,$h xor $a0,$a0,$a1 + add $h,$h,$t1 + andc $t1,$g,$e $ROR $a1,$a1,`$Sigma1[2]-$Sigma1[1]` or $t0,$t0,$t1 ; Ch(e,f,g) - add $T,$T,@X[$i] + add $h,$h,@X[$i%16] xor $a0,$a0,$a1 ; Sigma1(e) - add $T,$T,$t0 - add $T,$T,$a0 + add $h,$h,$t0 + add $h,$h,$a0 $ROR $a0,$a,$Sigma0[0] $ROR $a1,$a,$Sigma0[1] @@ -146,9 +146,14 @@ sub ROUND_00_15 { xor $t0,$t0,$t1 and $t1,$b,$c xor $a0,$a0,$a1 ; Sigma0(a) - add $d,$d,$T + add $d,$d,$h xor $t0,$t0,$t1 ; Maj(a,b,c) - add $h,$T,$a0 +___ +$code.=<<___ if ($i<15); + $LD $t1,`($i+1)*$SZ`($Tbl) +___ +$code.=<<___; + add $h,$h,$a0 add $h,$h,$t0 ___ @@ -169,10 +174,11 @@ sub ROUND_16_xx { add @X[$i],@X[$i],@X[($i+9)%16] xor $a0,$a0,$a1 ; sigma0(X[(i+1)&0x0f]) xor $t0,$t0,$t1 ; sigma1(X[(i+14)&0x0f]) + $LD $t1,`$i*$SZ`($Tbl) add @X[$i],@X[$i],$a0 add @X[$i],@X[$i],$t0 ___ -&ROUND_00_15($i,$a,$b,$c,$d,$e,$f,$g,$h); +&ROUND_00_15($i+16,$a,$b,$c,$d,$e,$f,$g,$h); } $code=<<___; @@ -188,8 +194,6 @@ sub ROUND_16_xx { $PUSH $ctx,`$FRAME-$SIZE_T*22`($sp) - $PUSH $toc,`$FRAME-$SIZE_T*20`($sp) - $PUSH r13,`$FRAME-$SIZE_T*19`($sp) $PUSH r14,`$FRAME-$SIZE_T*18`($sp) $PUSH r15,`$FRAME-$SIZE_T*17`($sp) $PUSH r16,`$FRAME-$SIZE_T*16`($sp) @@ -209,7 +213,10 @@ sub ROUND_16_xx { $PUSH r30,`$FRAME-$SIZE_T*2`($sp) $PUSH r31,`$FRAME-$SIZE_T*1`($sp) $PUSH r0,`$FRAME+$LRSAVE`($sp) +___ +if ($SZ==4 || $SIZE_T==8) { +$code.=<<___; $LD $A,`0*$SZ`($ctx) mr $inp,r4 ; incarnate $inp $LD $B,`1*$SZ`($ctx) @@ -219,7 +226,16 @@ sub ROUND_16_xx { $LD $F,`5*$SZ`($ctx) $LD $G,`6*$SZ`($ctx) $LD $H,`7*$SZ`($ctx) +___ +} else { + for ($i=16;$i<32;$i++) { + $code.=<<___; + lwz r$i,`$LITTLE_ENDIAN^(4*($i-16))`($ctx) +___ + } +} +$code.=<<___; bl LPICmeup LPICedup: andi. r0,$inp,3 @@ -255,6 +271,9 @@ sub ROUND_16_xx { Lcross_page: li $t1,`16*$SZ/4` mtctr $t1 +___ +if ($SZ==4 || $SIZE_T==8) { +$code.=<<___; addi r20,$sp,$LOCALS ; aligned spot below the frame Lmemcpy: lbz r16,0($inp) @@ -268,7 +287,26 @@ sub ROUND_16_xx { stb r19,3(r20) addi r20,r20,4 bdnz Lmemcpy +___ +} else { +$code.=<<___; + addi r12,$sp,$LOCALS ; aligned spot below the frame +Lmemcpy: + lbz r8,0($inp) + lbz r9,1($inp) + lbz r10,2($inp) + lbz r11,3($inp) + addi $inp,$inp,4 + stb r8,0(r12) + stb r9,1(r12) + stb r10,2(r12) + stb r11,3(r12) + addi r12,r12,4 + bdnz Lmemcpy +___ +} +$code.=<<___; $PUSH $inp,`$FRAME-$SIZE_T*26`($sp) ; save real inp addi $t1,$sp,`$LOCALS+16*$SZ` ; fictitious end pointer addi $inp,$sp,$LOCALS ; fictitious inp pointer @@ -283,8 +321,6 @@ sub ROUND_16_xx { Ldone: $POP r0,`$FRAME+$LRSAVE`($sp) - $POP $toc,`$FRAME-$SIZE_T*20`($sp) - $POP r13,`$FRAME-$SIZE_T*19`($sp) $POP r14,`$FRAME-$SIZE_T*18`($sp) $POP r15,`$FRAME-$SIZE_T*17`($sp) $POP r16,`$FRAME-$SIZE_T*16`($sp) @@ -309,27 +345,48 @@ sub ROUND_16_xx { .long 0 .byte 0,12,4,1,0x80,18,3,0 .long 0 +___ +if ($SZ==4 || $SIZE_T==8) { +$code.=<<___; .align 4 Lsha2_block_private: + $LD $t1,0($Tbl) ___ for($i=0;$i<16;$i++) { -$code.=<<___ if ($SZ==4); +$code.=<<___ if ($SZ==4 && !$LITTLE_ENDIAN); lwz @X[$i],`$i*$SZ`($inp) ___ +$code.=<<___ if ($SZ==4 && $LITTLE_ENDIAN); + lwz $a0,`$i*$SZ`($inp) + rotlwi @X[$i],$a0,8 + rlwimi @X[$i],$a0,24,0,7 + rlwimi @X[$i],$a0,24,16,23 +___ # 64-bit loads are split to 2x32-bit ones, as CPU can't handle # unaligned 64-bit loads, only 32-bit ones... -$code.=<<___ if ($SZ==8); +$code.=<<___ if ($SZ==8 && !$LITTLE_ENDIAN); lwz $t0,`$i*$SZ`($inp) lwz @X[$i],`$i*$SZ+4`($inp) insrdi @X[$i],$t0,32,0 +___ +$code.=<<___ if ($SZ==8 && $LITTLE_ENDIAN); + lwz $a0,`$i*$SZ`($inp) + lwz $a1,`$i*$SZ+4`($inp) + rotlwi $t0,$a0,8 + rotlwi @X[$i],$a1,8 + rlwimi $t0,$a0,24,0,7 + rlwimi @X[$i],$a1,24,0,7 + rlwimi $t0,$a0,24,16,23 + rlwimi @X[$i],$a1,24,16,23 + insrdi @X[$i],$t0,32,0 ___ &ROUND_00_15($i,@V); unshift(@V,pop(@V)); } $code.=<<___; - li $T,`$rounds/16-1` - mtctr $T + li $t0,`$rounds/16-1` + mtctr $t0 .align 4 Lrounds: addi $Tbl,$Tbl,`16*$SZ` @@ -377,7 +434,282 @@ sub ROUND_16_xx { blr .long 0 .byte 0,12,0x14,0,0,0,0,0 +.size $func,.-$func +___ +} else { +######################################################################## +# SHA512 for PPC32, X vector is off-loaded to stack... +# +# | sha512 +# | -m32 +# ----------------------+----------------------- +# PPC74x0,gcc-4.0.1 | +48% +# POWER6,gcc-4.4.6 | +124%(*) +# POWER7,gcc-4.4.6 | +79%(*) +# e300,gcc-4.1.0 | +167% +# +# (*) ~1/3 of -m64 result [and ~20% better than -m32 code generated +# by xlc-12.1] + +my $XOFF=$LOCALS; + +my @V=map("r$_",(16..31)); # A..H + +my ($s0,$s1,$t0,$t1,$t2,$t3,$a0,$a1,$a2,$a3)=map("r$_",(0,5,6,8..12,14,15)); +my ($x0,$x1)=("r3","r4"); # zaps $ctx and $inp + +sub ROUND_00_15_ppc32 { +my ($i, $ahi,$alo,$bhi,$blo,$chi,$clo,$dhi,$dlo, + $ehi,$elo,$fhi,$flo,$ghi,$glo,$hhi,$hlo)=@_; + +$code.=<<___; + lwz $t2,`$SZ*($i%16)+($LITTLE_ENDIAN^4)`($Tbl) + xor $a0,$flo,$glo + lwz $t3,`$SZ*($i%16)+($LITTLE_ENDIAN^0)`($Tbl) + xor $a1,$fhi,$ghi + addc $hlo,$hlo,$t0 ; h+=x[i] + stw $t0,`$XOFF+0+$SZ*($i%16)`($sp) ; save x[i] + + srwi $s0,$elo,$Sigma1[0] + srwi $s1,$ehi,$Sigma1[0] + and $a0,$a0,$elo + adde $hhi,$hhi,$t1 + and $a1,$a1,$ehi + stw $t1,`$XOFF+4+$SZ*($i%16)`($sp) + srwi $t0,$elo,$Sigma1[1] + srwi $t1,$ehi,$Sigma1[1] + addc $hlo,$hlo,$t2 ; h+=K512[i] + insrwi $s0,$ehi,$Sigma1[0],0 + insrwi $s1,$elo,$Sigma1[0],0 + xor $a0,$a0,$glo ; Ch(e,f,g) + adde $hhi,$hhi,$t3 + xor $a1,$a1,$ghi + insrwi $t0,$ehi,$Sigma1[1],0 + insrwi $t1,$elo,$Sigma1[1],0 + addc $hlo,$hlo,$a0 ; h+=Ch(e,f,g) + srwi $t2,$ehi,$Sigma1[2]-32 + srwi $t3,$elo,$Sigma1[2]-32 + xor $s0,$s0,$t0 + xor $s1,$s1,$t1 + insrwi $t2,$elo,$Sigma1[2]-32,0 + insrwi $t3,$ehi,$Sigma1[2]-32,0 + xor $a0,$alo,$blo ; a^b, b^c in next round + adde $hhi,$hhi,$a1 + xor $a1,$ahi,$bhi + xor $s0,$s0,$t2 ; Sigma1(e) + xor $s1,$s1,$t3 + + srwi $t0,$alo,$Sigma0[0] + and $a2,$a2,$a0 + addc $hlo,$hlo,$s0 ; h+=Sigma1(e) + and $a3,$a3,$a1 + srwi $t1,$ahi,$Sigma0[0] + srwi $s0,$ahi,$Sigma0[1]-32 + adde $hhi,$hhi,$s1 + srwi $s1,$alo,$Sigma0[1]-32 + insrwi $t0,$ahi,$Sigma0[0],0 + insrwi $t1,$alo,$Sigma0[0],0 + xor $a2,$a2,$blo ; Maj(a,b,c) + addc $dlo,$dlo,$hlo ; d+=h + xor $a3,$a3,$bhi + insrwi $s0,$alo,$Sigma0[1]-32,0 + insrwi $s1,$ahi,$Sigma0[1]-32,0 + adde $dhi,$dhi,$hhi + srwi $t2,$ahi,$Sigma0[2]-32 + srwi $t3,$alo,$Sigma0[2]-32 + xor $s0,$s0,$t0 + addc $hlo,$hlo,$a2 ; h+=Maj(a,b,c) + xor $s1,$s1,$t1 + insrwi $t2,$alo,$Sigma0[2]-32,0 + insrwi $t3,$ahi,$Sigma0[2]-32,0 + adde $hhi,$hhi,$a3 +___ +$code.=<<___ if ($i>=15); + lwz $t0,`$XOFF+0+$SZ*(($i+2)%16)`($sp) + lwz $t1,`$XOFF+4+$SZ*(($i+2)%16)`($sp) +___ +$code.=<<___ if ($i<15 && !$LITTLE_ENDIAN); + lwz $t1,`$SZ*($i+1)+0`($inp) + lwz $t0,`$SZ*($i+1)+4`($inp) ___ +$code.=<<___ if ($i<15 && $LITTLE_ENDIAN); + lwz $a2,`$SZ*($i+1)+0`($inp) + lwz $a3,`$SZ*($i+1)+4`($inp) + rotlwi $t1,$a2,8 + rotlwi $t0,$a3,8 + rlwimi $t1,$a2,24,0,7 + rlwimi $t0,$a3,24,0,7 + rlwimi $t1,$a2,24,16,23 + rlwimi $t0,$a3,24,16,23 +___ +$code.=<<___; + xor $s0,$s0,$t2 ; Sigma0(a) + xor $s1,$s1,$t3 + addc $hlo,$hlo,$s0 ; h+=Sigma0(a) + adde $hhi,$hhi,$s1 +___ +$code.=<<___ if ($i==15); + lwz $x0,`$XOFF+0+$SZ*(($i+1)%16)`($sp) + lwz $x1,`$XOFF+4+$SZ*(($i+1)%16)`($sp) +___ +} +sub ROUND_16_xx_ppc32 { +my ($i, $ahi,$alo,$bhi,$blo,$chi,$clo,$dhi,$dlo, + $ehi,$elo,$fhi,$flo,$ghi,$glo,$hhi,$hlo)=@_; + +$code.=<<___; + srwi $s0,$t0,$sigma0[0] + srwi $s1,$t1,$sigma0[0] + srwi $t2,$t0,$sigma0[1] + srwi $t3,$t1,$sigma0[1] + insrwi $s0,$t1,$sigma0[0],0 + insrwi $s1,$t0,$sigma0[0],0 + srwi $a0,$t0,$sigma0[2] + insrwi $t2,$t1,$sigma0[1],0 + insrwi $t3,$t0,$sigma0[1],0 + insrwi $a0,$t1,$sigma0[2],0 + xor $s0,$s0,$t2 + lwz $t2,`$XOFF+0+$SZ*(($i+14)%16)`($sp) + srwi $a1,$t1,$sigma0[2] + xor $s1,$s1,$t3 + lwz $t3,`$XOFF+4+$SZ*(($i+14)%16)`($sp) + xor $a0,$a0,$s0 + srwi $s0,$t2,$sigma1[0] + xor $a1,$a1,$s1 + srwi $s1,$t3,$sigma1[0] + addc $x0,$x0,$a0 ; x[i]+=sigma0(x[i+1]) + srwi $a0,$t3,$sigma1[1]-32 + insrwi $s0,$t3,$sigma1[0],0 + insrwi $s1,$t2,$sigma1[0],0 + adde $x1,$x1,$a1 + srwi $a1,$t2,$sigma1[1]-32 + + insrwi $a0,$t2,$sigma1[1]-32,0 + srwi $t2,$t2,$sigma1[2] + insrwi $a1,$t3,$sigma1[1]-32,0 + insrwi $t2,$t3,$sigma1[2],0 + xor $s0,$s0,$a0 + lwz $a0,`$XOFF+0+$SZ*(($i+9)%16)`($sp) + srwi $t3,$t3,$sigma1[2] + xor $s1,$s1,$a1 + lwz $a1,`$XOFF+4+$SZ*(($i+9)%16)`($sp) + xor $s0,$s0,$t2 + addc $x0,$x0,$a0 ; x[i]+=x[i+9] + xor $s1,$s1,$t3 + adde $x1,$x1,$a1 + addc $x0,$x0,$s0 ; x[i]+=sigma1(x[i+14]) + adde $x1,$x1,$s1 +___ + ($t0,$t1,$x0,$x1) = ($x0,$x1,$t0,$t1); + &ROUND_00_15_ppc32(@_); +} + +$code.=<<___; +.align 4 +Lsha2_block_private: +___ +$code.=<<___ if (!$LITTLE_ENDIAN); + lwz $t1,0($inp) + xor $a2,@V[3],@V[5] ; B^C, magic seed + lwz $t0,4($inp) + xor $a3,@V[2],@V[4] +___ +$code.=<<___ if ($LITTLE_ENDIAN); + lwz $a1,0($inp) + xor $a2,@V[3],@V[5] ; B^C, magic seed + lwz $a0,4($inp) + xor $a3,@V[2],@V[4] + rotlwi $t1,$a1,8 + rotlwi $t0,$a0,8 + rlwimi $t1,$a1,24,0,7 + rlwimi $t0,$a0,24,0,7 + rlwimi $t1,$a1,24,16,23 + rlwimi $t0,$a0,24,16,23 +___ +for($i=0;$i<16;$i++) { + &ROUND_00_15_ppc32($i,@V); + unshift(@V,pop(@V)); unshift(@V,pop(@V)); + ($a0,$a1,$a2,$a3) = ($a2,$a3,$a0,$a1); +} +$code.=<<___; + li $a0,`$rounds/16-1` + mtctr $a0 +.align 4 +Lrounds: + addi $Tbl,$Tbl,`16*$SZ` +___ +for(;$i<32;$i++) { + &ROUND_16_xx_ppc32($i,@V); + unshift(@V,pop(@V)); unshift(@V,pop(@V)); + ($a0,$a1,$a2,$a3) = ($a2,$a3,$a0,$a1); +} +$code.=<<___; + bdnz- Lrounds + + $POP $ctx,`$FRAME-$SIZE_T*22`($sp) + $POP $inp,`$FRAME-$SIZE_T*23`($sp) ; inp pointer + $POP $num,`$FRAME-$SIZE_T*24`($sp) ; end pointer + subi $Tbl,$Tbl,`($rounds-16)*$SZ` ; rewind Tbl + + lwz $t0,`$LITTLE_ENDIAN^0`($ctx) + lwz $t1,`$LITTLE_ENDIAN^4`($ctx) + lwz $t2,`$LITTLE_ENDIAN^8`($ctx) + lwz $t3,`$LITTLE_ENDIAN^12`($ctx) + lwz $a0,`$LITTLE_ENDIAN^16`($ctx) + lwz $a1,`$LITTLE_ENDIAN^20`($ctx) + lwz $a2,`$LITTLE_ENDIAN^24`($ctx) + addc @V[1],@V[1],$t1 + lwz $a3,`$LITTLE_ENDIAN^28`($ctx) + adde @V[0],@V[0],$t0 + lwz $t0,`$LITTLE_ENDIAN^32`($ctx) + addc @V[3],@V[3],$t3 + lwz $t1,`$LITTLE_ENDIAN^36`($ctx) + adde @V[2],@V[2],$t2 + lwz $t2,`$LITTLE_ENDIAN^40`($ctx) + addc @V[5],@V[5],$a1 + lwz $t3,`$LITTLE_ENDIAN^44`($ctx) + adde @V[4],@V[4],$a0 + lwz $a0,`$LITTLE_ENDIAN^48`($ctx) + addc @V[7],@V[7],$a3 + lwz $a1,`$LITTLE_ENDIAN^52`($ctx) + adde @V[6],@V[6],$a2 + lwz $a2,`$LITTLE_ENDIAN^56`($ctx) + addc @V[9],@V[9],$t1 + lwz $a3,`$LITTLE_ENDIAN^60`($ctx) + adde @V[8],@V[8],$t0 + stw @V[0],`$LITTLE_ENDIAN^0`($ctx) + stw @V[1],`$LITTLE_ENDIAN^4`($ctx) + addc @V[11],@V[11],$t3 + stw @V[2],`$LITTLE_ENDIAN^8`($ctx) + stw @V[3],`$LITTLE_ENDIAN^12`($ctx) + adde @V[10],@V[10],$t2 + stw @V[4],`$LITTLE_ENDIAN^16`($ctx) + stw @V[5],`$LITTLE_ENDIAN^20`($ctx) + addc @V[13],@V[13],$a1 + stw @V[6],`$LITTLE_ENDIAN^24`($ctx) + stw @V[7],`$LITTLE_ENDIAN^28`($ctx) + adde @V[12],@V[12],$a0 + stw @V[8],`$LITTLE_ENDIAN^32`($ctx) + stw @V[9],`$LITTLE_ENDIAN^36`($ctx) + addc @V[15],@V[15],$a3 + stw @V[10],`$LITTLE_ENDIAN^40`($ctx) + stw @V[11],`$LITTLE_ENDIAN^44`($ctx) + adde @V[14],@V[14],$a2 + stw @V[12],`$LITTLE_ENDIAN^48`($ctx) + stw @V[13],`$LITTLE_ENDIAN^52`($ctx) + stw @V[14],`$LITTLE_ENDIAN^56`($ctx) + stw @V[15],`$LITTLE_ENDIAN^60`($ctx) + + addi $inp,$inp,`16*$SZ` ; advance inp + $PUSH $inp,`$FRAME-$SIZE_T*23`($sp) + $UCMP $inp,$num + bne Lsha2_block_private + blr + .long 0 + .byte 0,12,0x14,0,0,0,0,0 +.size $func,.-$func +___ +} # Ugly hack here, because PPC assembler syntax seem to vary too # much from platforms to platform... @@ -395,46 +727,46 @@ sub ROUND_16_xx { .space `64-9*4` ___ $code.=<<___ if ($SZ==8); - .long 0x428a2f98,0xd728ae22,0x71374491,0x23ef65cd - .long 0xb5c0fbcf,0xec4d3b2f,0xe9b5dba5,0x8189dbbc - .long 0x3956c25b,0xf348b538,0x59f111f1,0xb605d019 - .long 0x923f82a4,0xaf194f9b,0xab1c5ed5,0xda6d8118 - .long 0xd807aa98,0xa3030242,0x12835b01,0x45706fbe - .long 0x243185be,0x4ee4b28c,0x550c7dc3,0xd5ffb4e2 - .long 0x72be5d74,0xf27b896f,0x80deb1fe,0x3b1696b1 - .long 0x9bdc06a7,0x25c71235,0xc19bf174,0xcf692694 - .long 0xe49b69c1,0x9ef14ad2,0xefbe4786,0x384f25e3 - .long 0x0fc19dc6,0x8b8cd5b5,0x240ca1cc,0x77ac9c65 - .long 0x2de92c6f,0x592b0275,0x4a7484aa,0x6ea6e483 - .long 0x5cb0a9dc,0xbd41fbd4,0x76f988da,0x831153b5 - .long 0x983e5152,0xee66dfab,0xa831c66d,0x2db43210 - .long 0xb00327c8,0x98fb213f,0xbf597fc7,0xbeef0ee4 - .long 0xc6e00bf3,0x3da88fc2,0xd5a79147,0x930aa725 - .long 0x06ca6351,0xe003826f,0x14292967,0x0a0e6e70 - .long 0x27b70a85,0x46d22ffc,0x2e1b2138,0x5c26c926 - .long 0x4d2c6dfc,0x5ac42aed,0x53380d13,0x9d95b3df - .long 0x650a7354,0x8baf63de,0x766a0abb,0x3c77b2a8 - .long 0x81c2c92e,0x47edaee6,0x92722c85,0x1482353b - .long 0xa2bfe8a1,0x4cf10364,0xa81a664b,0xbc423001 - .long 0xc24b8b70,0xd0f89791,0xc76c51a3,0x0654be30 - .long 0xd192e819,0xd6ef5218,0xd6990624,0x5565a910 - .long 0xf40e3585,0x5771202a,0x106aa070,0x32bbd1b8 - .long 0x19a4c116,0xb8d2d0c8,0x1e376c08,0x5141ab53 - .long 0x2748774c,0xdf8eeb99,0x34b0bcb5,0xe19b48a8 - .long 0x391c0cb3,0xc5c95a63,0x4ed8aa4a,0xe3418acb - .long 0x5b9cca4f,0x7763e373,0x682e6ff3,0xd6b2b8a3 - .long 0x748f82ee,0x5defb2fc,0x78a5636f,0x43172f60 - .long 0x84c87814,0xa1f0ab72,0x8cc70208,0x1a6439ec - .long 0x90befffa,0x23631e28,0xa4506ceb,0xde82bde9 - .long 0xbef9a3f7,0xb2c67915,0xc67178f2,0xe372532b - .long 0xca273ece,0xea26619c,0xd186b8c7,0x21c0c207 - .long 0xeada7dd6,0xcde0eb1e,0xf57d4f7f,0xee6ed178 - .long 0x06f067aa,0x72176fba,0x0a637dc5,0xa2c898a6 - .long 0x113f9804,0xbef90dae,0x1b710b35,0x131c471b - .long 0x28db77f5,0x23047d84,0x32caab7b,0x40c72493 - .long 0x3c9ebe0a,0x15c9bebc,0x431d67c4,0x9c100d4c - .long 0x4cc5d4be,0xcb3e42b6,0x597f299c,0xfc657e2a - .long 0x5fcb6fab,0x3ad6faec,0x6c44198c,0x4a475817 + .quad 0x428a2f98d728ae22,0x7137449123ef65cd + .quad 0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc + .quad 0x3956c25bf348b538,0x59f111f1b605d019 + .quad 0x923f82a4af194f9b,0xab1c5ed5da6d8118 + .quad 0xd807aa98a3030242,0x12835b0145706fbe + .quad 0x243185be4ee4b28c,0x550c7dc3d5ffb4e2 + .quad 0x72be5d74f27b896f,0x80deb1fe3b1696b1 + .quad 0x9bdc06a725c71235,0xc19bf174cf692694 + .quad 0xe49b69c19ef14ad2,0xefbe4786384f25e3 + .quad 0x0fc19dc68b8cd5b5,0x240ca1cc77ac9c65 + .quad 0x2de92c6f592b0275,0x4a7484aa6ea6e483 + .quad 0x5cb0a9dcbd41fbd4,0x76f988da831153b5 + .quad 0x983e5152ee66dfab,0xa831c66d2db43210 + .quad 0xb00327c898fb213f,0xbf597fc7beef0ee4 + .quad 0xc6e00bf33da88fc2,0xd5a79147930aa725 + .quad 0x06ca6351e003826f,0x142929670a0e6e70 + .quad 0x27b70a8546d22ffc,0x2e1b21385c26c926 + .quad 0x4d2c6dfc5ac42aed,0x53380d139d95b3df + .quad 0x650a73548baf63de,0x766a0abb3c77b2a8 + .quad 0x81c2c92e47edaee6,0x92722c851482353b + .quad 0xa2bfe8a14cf10364,0xa81a664bbc423001 + .quad 0xc24b8b70d0f89791,0xc76c51a30654be30 + .quad 0xd192e819d6ef5218,0xd69906245565a910 + .quad 0xf40e35855771202a,0x106aa07032bbd1b8 + .quad 0x19a4c116b8d2d0c8,0x1e376c085141ab53 + .quad 0x2748774cdf8eeb99,0x34b0bcb5e19b48a8 + .quad 0x391c0cb3c5c95a63,0x4ed8aa4ae3418acb + .quad 0x5b9cca4f7763e373,0x682e6ff3d6b2b8a3 + .quad 0x748f82ee5defb2fc,0x78a5636f43172f60 + .quad 0x84c87814a1f0ab72,0x8cc702081a6439ec + .quad 0x90befffa23631e28,0xa4506cebde82bde9 + .quad 0xbef9a3f7b2c67915,0xc67178f2e372532b + .quad 0xca273eceea26619c,0xd186b8c721c0c207 + .quad 0xeada7dd6cde0eb1e,0xf57d4f7fee6ed178 + .quad 0x06f067aa72176fba,0x0a637dc5a2c898a6 + .quad 0x113f9804bef90dae,0x1b710b35131c471b + .quad 0x28db77f523047d84,0x32caab7b40c72493 + .quad 0x3c9ebe0a15c9bebc,0x431d67c49c100d4c + .quad 0x4cc5d4becb3e42b6,0x597f299cfc657e2a + .quad 0x5fcb6fab3ad6faec,0x6c44198c4a475817 ___ $code.=<<___ if ($SZ==4); .long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 diff --git a/deps/openssl/openssl/crypto/sha/asm/sha512-sparcv9.pl b/deps/openssl/openssl/crypto/sha/asm/sha512-sparcv9.pl index 585740789e63e7..5a9c15d1d34abe 100644 --- a/deps/openssl/openssl/crypto/sha/asm/sha512-sparcv9.pl +++ b/deps/openssl/openssl/crypto/sha/asm/sha512-sparcv9.pl @@ -5,6 +5,8 @@ # project. The module is, however, dual licensed under OpenSSL and # CRYPTOGAMS licenses depending on where you obtain it. For further # details see http://www.openssl.org/~appro/cryptogams/. +# +# Hardware SPARC T4 support by David S. Miller . # ==================================================================== # SHA256 performance improvement over compiler generated code varies @@ -41,11 +43,11 @@ # loads are always slower than one 64-bit load. Once again this # is unlike pre-T1 UltraSPARC, where, if scheduled appropriately, # 2x32-bit loads can be as fast as 1x64-bit ones. - -$bits=32; -for (@ARGV) { $bits=64 if (/\-m64/ || /\-xarch\=v9/); } -if ($bits==64) { $bias=2047; $frame=192; } -else { $bias=0; $frame=112; } +# +# SPARC T4 SHA256/512 hardware achieves 3.17/2.01 cycles per byte, +# which is 9.3x/11.1x faster than software. Multi-process benchmark +# saturates at 11.5x single-process result on 8-core processor, or +# ~11/16GBps per 2.85GHz socket. $output=shift; open STDOUT,">$output"; @@ -170,6 +172,7 @@ ld [$inp+16],%l4 ld [$inp+20],%l5 ld [$inp+24],%l6 + cmp $tmp31,0 ld [$inp+28],%l7 ___ $code.=<<___ if ($i<15); @@ -182,29 +185,29 @@ or @pair[1],$tmp2,$tmp2 `"ld [$inp+".eval(32+4+$i*8)."],@pair[1]" if ($i<12)` add $h,$tmp2,$T1 - $ST $tmp2,[%sp+`$bias+$frame+$i*$SZ`] + $ST $tmp2,[%sp+STACK_BIAS+STACK_FRAME+`$i*$SZ`] ___ $code.=<<___ if ($i==12); - brnz,a $tmp31,.+8 + bnz,a,pn %icc,.+8 ld [$inp+128],%l0 ___ $code.=<<___ if ($i==15); - ld [%sp+`$bias+$frame+(($i+1+1)%16)*$SZ+0`],%l2 + ld [%sp+STACK_BIAS+STACK_FRAME+`(($i+1+1)%16)*$SZ+0`],%l2 sllx @pair[1],$tmp31,$tmp2 ! Xload($i) add $tmp31,32,$tmp0 - ld [%sp+`$bias+$frame+(($i+1+1)%16)*$SZ+4`],%l3 + ld [%sp+STACK_BIAS+STACK_FRAME+`(($i+1+1)%16)*$SZ+4`],%l3 sllx @pair[0],$tmp0,$tmp1 - ld [%sp+`$bias+$frame+(($i+1+9)%16)*$SZ+0`],%l4 + ld [%sp+STACK_BIAS+STACK_FRAME+`(($i+1+9)%16)*$SZ+0`],%l4 srlx @pair[2],$tmp32,@pair[1] or $tmp1,$tmp2,$tmp2 - ld [%sp+`$bias+$frame+(($i+1+9)%16)*$SZ+4`],%l5 + ld [%sp+STACK_BIAS+STACK_FRAME+`(($i+1+9)%16)*$SZ+4`],%l5 or @pair[1],$tmp2,$tmp2 - ld [%sp+`$bias+$frame+(($i+1+14)%16)*$SZ+0`],%l6 + ld [%sp+STACK_BIAS+STACK_FRAME+`(($i+1+14)%16)*$SZ+0`],%l6 add $h,$tmp2,$T1 - $ST $tmp2,[%sp+`$bias+$frame+$i*$SZ`] - ld [%sp+`$bias+$frame+(($i+1+14)%16)*$SZ+4`],%l7 - ld [%sp+`$bias+$frame+(($i+1+0)%16)*$SZ+0`],%l0 - ld [%sp+`$bias+$frame+(($i+1+0)%16)*$SZ+4`],%l1 + $ST $tmp2,[%sp+STACK_BIAS+STACK_FRAME+`$i*$SZ`] + ld [%sp+STACK_BIAS+STACK_FRAME+`(($i+1+14)%16)*$SZ+4`],%l7 + ld [%sp+STACK_BIAS+STACK_FRAME+`(($i+1+0)%16)*$SZ+0`],%l0 + ld [%sp+STACK_BIAS+STACK_FRAME+`(($i+1+0)%16)*$SZ+4`],%l1 ___ } if ($SZ==8); @@ -340,9 +343,9 @@ sub BODY_00_15 { or %l3,$tmp0,$tmp0 srlx $tmp0,@sigma0[0],$T1 - ld [%sp+`$bias+$frame+(($i+1+1)%16)*$SZ+0`],%l2 + ld [%sp+STACK_BIAS+STACK_FRAME+`(($i+1+1)%16)*$SZ+0`],%l2 sllx $tmp0,`64-@sigma0[2]`,$tmp1 - ld [%sp+`$bias+$frame+(($i+1+1)%16)*$SZ+4`],%l3 + ld [%sp+STACK_BIAS+STACK_FRAME+`(($i+1+1)%16)*$SZ+4`],%l3 srlx $tmp0,@sigma0[1],$tmp0 xor $tmp1,$T1,$T1 sllx $tmp1,`@sigma0[2]-@sigma0[1]`,$tmp1 @@ -354,9 +357,9 @@ sub BODY_00_15 { or %l7,$tmp2,$tmp2 srlx $tmp2,@sigma1[0],$tmp1 - ld [%sp+`$bias+$frame+(($i+1+14)%16)*$SZ+0`],%l6 + ld [%sp+STACK_BIAS+STACK_FRAME+`(($i+1+14)%16)*$SZ+0`],%l6 sllx $tmp2,`64-@sigma1[2]`,$tmp0 - ld [%sp+`$bias+$frame+(($i+1+14)%16)*$SZ+4`],%l7 + ld [%sp+STACK_BIAS+STACK_FRAME+`(($i+1+14)%16)*$SZ+4`],%l7 srlx $tmp2,@sigma1[1],$tmp2 xor $tmp0,$tmp1,$tmp1 sllx $tmp0,`@sigma1[2]-@sigma1[1]`,$tmp0 @@ -365,27 +368,30 @@ sub BODY_00_15 { xor $tmp0,$tmp1,$tmp1 sllx %l4,32,$tmp0 xor $tmp2,$tmp1,$tmp1 ! sigma1(X[$i+14]) - ld [%sp+`$bias+$frame+(($i+1+9)%16)*$SZ+0`],%l4 + ld [%sp+STACK_BIAS+STACK_FRAME+`(($i+1+9)%16)*$SZ+0`],%l4 or %l5,$tmp0,$tmp0 - ld [%sp+`$bias+$frame+(($i+1+9)%16)*$SZ+4`],%l5 + ld [%sp+STACK_BIAS+STACK_FRAME+`(($i+1+9)%16)*$SZ+4`],%l5 sllx %l0,32,$tmp2 add $tmp1,$T1,$T1 - ld [%sp+`$bias+$frame+(($i+1+0)%16)*$SZ+0`],%l0 + ld [%sp+STACK_BIAS+STACK_FRAME+`(($i+1+0)%16)*$SZ+0`],%l0 or %l1,$tmp2,$tmp2 add $tmp0,$T1,$T1 ! +=X[$i+9] - ld [%sp+`$bias+$frame+(($i+1+0)%16)*$SZ+4`],%l1 + ld [%sp+STACK_BIAS+STACK_FRAME+`(($i+1+0)%16)*$SZ+4`],%l1 add $tmp2,$T1,$T1 ! +=X[$i] - $ST $T1,[%sp+`$bias+$frame+($i%16)*$SZ`] + $ST $T1,[%sp+STACK_BIAS+STACK_FRAME+`($i%16)*$SZ`] ___ &BODY_00_15(@_); } if ($SZ==8); -$code.=<<___ if ($bits==64); +$code.=<<___; +#include "sparc_arch.h" + +#ifdef __arch64__ .register %g2,#scratch .register %g3,#scratch -___ -$code.=<<___; +#endif + .section ".text",#alloc,#execinstr .align 64 @@ -457,9 +463,203 @@ sub BODY_00_15 { } $code.=<<___; .size K${label},.-K${label} + +#ifdef __PIC__ +SPARC_PIC_THUNK(%g1) +#endif + .globl sha${label}_block_data_order +.align 32 sha${label}_block_data_order: - save %sp,`-$frame-$locals`,%sp + SPARC_LOAD_ADDRESS_LEAF(OPENSSL_sparcv9cap_P,%g1,%g5) + ld [%g1+4],%g1 ! OPENSSL_sparcv9cap_P[1] + + andcc %g1, CFR_SHA${label}, %g0 + be .Lsoftware + nop +___ +$code.=<<___ if ($SZ==8); # SHA512 + ldd [%o0 + 0x00], %f0 ! load context + ldd [%o0 + 0x08], %f2 + ldd [%o0 + 0x10], %f4 + ldd [%o0 + 0x18], %f6 + ldd [%o0 + 0x20], %f8 + ldd [%o0 + 0x28], %f10 + andcc %o1, 0x7, %g0 + ldd [%o0 + 0x30], %f12 + bne,pn %icc, .Lhwunaligned + ldd [%o0 + 0x38], %f14 + +.Lhwaligned_loop: + ldd [%o1 + 0x00], %f16 + ldd [%o1 + 0x08], %f18 + ldd [%o1 + 0x10], %f20 + ldd [%o1 + 0x18], %f22 + ldd [%o1 + 0x20], %f24 + ldd [%o1 + 0x28], %f26 + ldd [%o1 + 0x30], %f28 + ldd [%o1 + 0x38], %f30 + ldd [%o1 + 0x40], %f32 + ldd [%o1 + 0x48], %f34 + ldd [%o1 + 0x50], %f36 + ldd [%o1 + 0x58], %f38 + ldd [%o1 + 0x60], %f40 + ldd [%o1 + 0x68], %f42 + ldd [%o1 + 0x70], %f44 + subcc %o2, 1, %o2 ! done yet? + ldd [%o1 + 0x78], %f46 + add %o1, 0x80, %o1 + prefetch [%o1 + 63], 20 + prefetch [%o1 + 64+63], 20 + + .word 0x81b02860 ! SHA512 + + bne,pt SIZE_T_CC, .Lhwaligned_loop + nop + +.Lhwfinish: + std %f0, [%o0 + 0x00] ! store context + std %f2, [%o0 + 0x08] + std %f4, [%o0 + 0x10] + std %f6, [%o0 + 0x18] + std %f8, [%o0 + 0x20] + std %f10, [%o0 + 0x28] + std %f12, [%o0 + 0x30] + retl + std %f14, [%o0 + 0x38] + +.align 16 +.Lhwunaligned: + alignaddr %o1, %g0, %o1 + + ldd [%o1 + 0x00], %f18 +.Lhwunaligned_loop: + ldd [%o1 + 0x08], %f20 + ldd [%o1 + 0x10], %f22 + ldd [%o1 + 0x18], %f24 + ldd [%o1 + 0x20], %f26 + ldd [%o1 + 0x28], %f28 + ldd [%o1 + 0x30], %f30 + ldd [%o1 + 0x38], %f32 + ldd [%o1 + 0x40], %f34 + ldd [%o1 + 0x48], %f36 + ldd [%o1 + 0x50], %f38 + ldd [%o1 + 0x58], %f40 + ldd [%o1 + 0x60], %f42 + ldd [%o1 + 0x68], %f44 + ldd [%o1 + 0x70], %f46 + ldd [%o1 + 0x78], %f48 + subcc %o2, 1, %o2 ! done yet? + ldd [%o1 + 0x80], %f50 + add %o1, 0x80, %o1 + prefetch [%o1 + 63], 20 + prefetch [%o1 + 64+63], 20 + + faligndata %f18, %f20, %f16 + faligndata %f20, %f22, %f18 + faligndata %f22, %f24, %f20 + faligndata %f24, %f26, %f22 + faligndata %f26, %f28, %f24 + faligndata %f28, %f30, %f26 + faligndata %f30, %f32, %f28 + faligndata %f32, %f34, %f30 + faligndata %f34, %f36, %f32 + faligndata %f36, %f38, %f34 + faligndata %f38, %f40, %f36 + faligndata %f40, %f42, %f38 + faligndata %f42, %f44, %f40 + faligndata %f44, %f46, %f42 + faligndata %f46, %f48, %f44 + faligndata %f48, %f50, %f46 + + .word 0x81b02860 ! SHA512 + + bne,pt SIZE_T_CC, .Lhwunaligned_loop + for %f50, %f50, %f18 ! %f18=%f50 + + ba .Lhwfinish + nop +___ +$code.=<<___ if ($SZ==4); # SHA256 + ld [%o0 + 0x00], %f0 + ld [%o0 + 0x04], %f1 + ld [%o0 + 0x08], %f2 + ld [%o0 + 0x0c], %f3 + ld [%o0 + 0x10], %f4 + ld [%o0 + 0x14], %f5 + andcc %o1, 0x7, %g0 + ld [%o0 + 0x18], %f6 + bne,pn %icc, .Lhwunaligned + ld [%o0 + 0x1c], %f7 + +.Lhwloop: + ldd [%o1 + 0x00], %f8 + ldd [%o1 + 0x08], %f10 + ldd [%o1 + 0x10], %f12 + ldd [%o1 + 0x18], %f14 + ldd [%o1 + 0x20], %f16 + ldd [%o1 + 0x28], %f18 + ldd [%o1 + 0x30], %f20 + subcc %o2, 1, %o2 ! done yet? + ldd [%o1 + 0x38], %f22 + add %o1, 0x40, %o1 + prefetch [%o1 + 63], 20 + + .word 0x81b02840 ! SHA256 + + bne,pt SIZE_T_CC, .Lhwloop + nop + +.Lhwfinish: + st %f0, [%o0 + 0x00] ! store context + st %f1, [%o0 + 0x04] + st %f2, [%o0 + 0x08] + st %f3, [%o0 + 0x0c] + st %f4, [%o0 + 0x10] + st %f5, [%o0 + 0x14] + st %f6, [%o0 + 0x18] + retl + st %f7, [%o0 + 0x1c] + +.align 8 +.Lhwunaligned: + alignaddr %o1, %g0, %o1 + + ldd [%o1 + 0x00], %f10 +.Lhwunaligned_loop: + ldd [%o1 + 0x08], %f12 + ldd [%o1 + 0x10], %f14 + ldd [%o1 + 0x18], %f16 + ldd [%o1 + 0x20], %f18 + ldd [%o1 + 0x28], %f20 + ldd [%o1 + 0x30], %f22 + ldd [%o1 + 0x38], %f24 + subcc %o2, 1, %o2 ! done yet? + ldd [%o1 + 0x40], %f26 + add %o1, 0x40, %o1 + prefetch [%o1 + 63], 20 + + faligndata %f10, %f12, %f8 + faligndata %f12, %f14, %f10 + faligndata %f14, %f16, %f12 + faligndata %f16, %f18, %f14 + faligndata %f18, %f20, %f16 + faligndata %f20, %f22, %f18 + faligndata %f22, %f24, %f20 + faligndata %f24, %f26, %f22 + + .word 0x81b02840 ! SHA256 + + bne,pt SIZE_T_CC, .Lhwunaligned_loop + for %f26, %f26, %f10 ! %f10=%f26 + + ba .Lhwfinish + nop +___ +$code.=<<___; +.align 16 +.Lsoftware: + save %sp,-STACK_FRAME-$locals,%sp and $inp,`$align-1`,$tmp31 sllx $len,`log(16*$SZ)/log(2)`,$len andn $inp,`$align-1`,$inp @@ -578,7 +778,7 @@ sub BODY_00_15 { $code.=<<___; add $inp,`16*$SZ`,$inp ! advance inp cmp $inp,$len - bne `$bits==64?"%xcc":"%icc"`,.Lloop + bne SIZE_T_CC,.Lloop sub $Ktbl,`($rounds-16)*$SZ`,$Ktbl ! rewind Ktbl ret @@ -589,6 +789,62 @@ sub BODY_00_15 { .align 4 ___ -$code =~ s/\`([^\`]*)\`/eval $1/gem; -print $code; +# Purpose of these subroutines is to explicitly encode VIS instructions, +# so that one can compile the module without having to specify VIS +# extentions on compiler command line, e.g. -xarch=v9 vs. -xarch=v9a. +# Idea is to reserve for option to produce "universal" binary and let +# programmer detect if current CPU is VIS capable at run-time. +sub unvis { +my ($mnemonic,$rs1,$rs2,$rd)=@_; +my $ref,$opf; +my %visopf = ( "faligndata" => 0x048, + "for" => 0x07c ); + + $ref = "$mnemonic\t$rs1,$rs2,$rd"; + + if ($opf=$visopf{$mnemonic}) { + foreach ($rs1,$rs2,$rd) { + return $ref if (!/%f([0-9]{1,2})/); + $_=$1; + if ($1>=32) { + return $ref if ($1&1); + # re-encode for upper double register addressing + $_=($1|$1>>5)&31; + } + } + + return sprintf ".word\t0x%08x !%s", + 0x81b00000|$rd<<25|$rs1<<14|$opf<<5|$rs2, + $ref; + } else { + return $ref; + } +} +sub unalignaddr { +my ($mnemonic,$rs1,$rs2,$rd)=@_; +my %bias = ( "g" => 0, "o" => 8, "l" => 16, "i" => 24 ); +my $ref="$mnemonic\t$rs1,$rs2,$rd"; + + foreach ($rs1,$rs2,$rd) { + if (/%([goli])([0-7])/) { $_=$bias{$1}+$2; } + else { return $ref; } + } + return sprintf ".word\t0x%08x !%s", + 0x81b00300|$rd<<25|$rs1<<14|$rs2, + $ref; +} + +foreach (split("\n",$code)) { + s/\`([^\`]*)\`/eval $1/ge; + + s/\b(f[^\s]*)\s+(%f[0-9]{1,2}),\s*(%f[0-9]{1,2}),\s*(%f[0-9]{1,2})/ + &unvis($1,$2,$3,$4) + /ge; + s/\b(alignaddr)\s+(%[goli][0-7]),\s*(%[goli][0-7]),\s*(%[goli][0-7])/ + &unalignaddr($1,$2,$3,$4) + /ge; + + print $_,"\n"; +} + close STDOUT; diff --git a/deps/openssl/openssl/crypto/sha/asm/sha512-x86_64.pl b/deps/openssl/openssl/crypto/sha/asm/sha512-x86_64.pl index 86c593a2c83e3e..b7b44b4411362d 100755 --- a/deps/openssl/openssl/crypto/sha/asm/sha512-x86_64.pl +++ b/deps/openssl/openssl/crypto/sha/asm/sha512-x86_64.pl @@ -1,7 +1,7 @@ #!/usr/bin/env perl # # ==================================================================== -# Written by Andy Polyakov for the OpenSSL +# Written by Andy Polyakov for the OpenSSL # project. Rights for redistribution and usage in source and binary # forms are granted according to the OpenSSL license. # ==================================================================== @@ -39,6 +39,64 @@ # contrary, 64-bit version, sha512_block, is ~30% *slower* than 32-bit # sha256_block:-( This is presumably because 64-bit shifts/rotates # apparently are not atomic instructions, but implemented in microcode. +# +# May 2012. +# +# Optimization including one of Pavel Semjanov's ideas, alternative +# Maj, resulted in >=5% improvement on most CPUs, +20% SHA256 and +# unfortunately -2% SHA512 on P4 [which nobody should care about +# that much]. +# +# June 2012. +# +# Add SIMD code paths, see below for improvement coefficients. SSSE3 +# code path was not attempted for SHA512, because improvement is not +# estimated to be high enough, noticeably less than 9%, to justify +# the effort, not on pre-AVX processors. [Obviously with exclusion +# for VIA Nano, but it has SHA512 instruction that is faster and +# should be used instead.] For reference, corresponding estimated +# upper limit for improvement for SSSE3 SHA256 is 28%. The fact that +# higher coefficients are observed on VIA Nano and Bulldozer has more +# to do with specifics of their architecture [which is topic for +# separate discussion]. +# +# November 2012. +# +# Add AVX2 code path. Two consecutive input blocks are loaded to +# 256-bit %ymm registers, with data from first block to least +# significant 128-bit halves and data from second to most significant. +# The data is then processed with same SIMD instruction sequence as +# for AVX, but with %ymm as operands. Side effect is increased stack +# frame, 448 additional bytes in SHA256 and 1152 in SHA512, and 1.2KB +# code size increase. +# +# March 2014. +# +# Add support for Intel SHA Extensions. + +###################################################################### +# Current performance in cycles per processed byte (less is better): +# +# SHA256 SSSE3 AVX/XOP(*) SHA512 AVX/XOP(*) +# +# AMD K8 14.9 - - 9.57 - +# P4 17.3 - - 30.8 - +# Core 2 15.6 13.8(+13%) - 9.97 - +# Westmere 14.8 12.3(+19%) - 9.58 - +# Sandy Bridge 17.4 14.2(+23%) 11.6(+50%(**)) 11.2 8.10(+38%(**)) +# Ivy Bridge 12.6 10.5(+20%) 10.3(+22%) 8.17 7.22(+13%) +# Haswell 12.2 9.28(+31%) 7.80(+56%) 7.66 5.40(+42%) +# Bulldozer 21.1 13.6(+54%) 13.6(+54%(***)) 13.5 8.58(+57%) +# VIA Nano 23.0 16.5(+39%) - 14.7 - +# Atom 23.0 18.9(+22%) - 14.7 - +# Silvermont 27.4 20.6(+33%) - 17.5 - +# +# (*) whichever best applicable; +# (**) switch from ror to shrd stands for fair share of improvement; +# (***) execution time is fully determined by remaining integer-only +# part, body_00_15; reducing the amount of SIMD instructions +# below certain limit makes no difference/sense; to conserve +# space SHA256 XOP code path is therefore omitted; $flavour = shift; $output = shift; @@ -51,23 +109,58 @@ ( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or die "can't locate x86_64-xlate.pl"; +if (`$ENV{CC} -Wa,-v -c -o /dev/null -x assembler /dev/null 2>&1` + =~ /GNU assembler version ([2-9]\.[0-9]+)/) { + $avx = ($1>=2.19) + ($1>=2.22); +} + +if (!$avx && $win64 && ($flavour =~ /nasm/ || $ENV{ASM} =~ /nasm/) && + `nasm -v 2>&1` =~ /NASM version ([2-9]\.[0-9]+)/) { + $avx = ($1>=2.09) + ($1>=2.10); +} + +if (!$avx && $win64 && ($flavour =~ /masm/ || $ENV{ASM} =~ /ml64/) && + `ml64 2>&1` =~ /Version ([0-9]+)\./) { + $avx = ($1>=10) + ($1>=11); +} + +if (!$avx && `$ENV{CC} -v 2>&1` =~ /(^clang version|based on LLVM) ([3-9]\.[0-9]+)/) { + $avx = ($2>=3.0) + ($2>3.0); +} + +$shaext=1; ### set to zero if compiling for 1.0.1 +$avx=1 if (!$shaext && $avx); + open OUT,"| \"$^X\" $xlate $flavour $output"; *STDOUT=*OUT; -$func="sha512_block_data_order"; -$TABLE="K512"; -$SZ=8; -@ROT=($A,$B,$C,$D,$E,$F,$G,$H)=("%rax","%rbx","%rcx","%rdx", - "%r8", "%r9", "%r10","%r11"); -($T1,$a0,$a1,$a2)=("%r12","%r13","%r14","%r15"); -@Sigma0=(28,34,39); -@Sigma1=(14,18,41); -@sigma0=(1, 8, 7); -@sigma1=(19,61, 6); -$rounds=80; - -$ctx="%rdi"; # 1st arg -$round="%rdi"; # zaps $ctx +if ($output =~ /512/) { + $func="sha512_block_data_order"; + $TABLE="K512"; + $SZ=8; + @ROT=($A,$B,$C,$D,$E,$F,$G,$H)=("%rax","%rbx","%rcx","%rdx", + "%r8", "%r9", "%r10","%r11"); + ($T1,$a0,$a1,$a2,$a3)=("%r12","%r13","%r14","%r15","%rdi"); + @Sigma0=(28,34,39); + @Sigma1=(14,18,41); + @sigma0=(1, 8, 7); + @sigma1=(19,61, 6); + $rounds=80; +} else { + $func="sha256_block_data_order"; + $TABLE="K256"; + $SZ=4; + @ROT=($A,$B,$C,$D,$E,$F,$G,$H)=("%eax","%ebx","%ecx","%edx", + "%r8d","%r9d","%r10d","%r11d"); + ($T1,$a0,$a1,$a2,$a3)=("%r12d","%r13d","%r14d","%r15d","%edi"); + @Sigma0=( 2,13,22); + @Sigma1=( 6,11,25); + @sigma0=( 7,18, 3); + @sigma1=(17,19,10); + $rounds=64; +} + +$ctx="%rdi"; # 1st arg, zapped by $a3 $inp="%rsi"; # 2nd arg $Tbl="%rbp"; @@ -80,47 +173,51 @@ sub ROUND_00_15() { my ($i,$a,$b,$c,$d,$e,$f,$g,$h) = @_; + my $STRIDE=$SZ; + $STRIDE += 16 if ($i%(16/$SZ)==(16/$SZ-1)); $code.=<<___; ror \$`$Sigma1[2]-$Sigma1[1]`,$a0 mov $f,$a2 - mov $T1,`$SZ*($i&0xf)`(%rsp) - ror \$`$Sigma0[2]-$Sigma0[1]`,$a1 xor $e,$a0 + ror \$`$Sigma0[2]-$Sigma0[1]`,$a1 xor $g,$a2 # f^g - ror \$`$Sigma1[1]-$Sigma1[0]`,$a0 - add $h,$T1 # T1+=h + mov $T1,`$SZ*($i&0xf)`(%rsp) xor $a,$a1 - - add ($Tbl,$round,$SZ),$T1 # T1+=K[round] and $e,$a2 # (f^g)&e - mov $b,$h + + ror \$`$Sigma1[1]-$Sigma1[0]`,$a0 + add $h,$T1 # T1+=h + xor $g,$a2 # Ch(e,f,g)=((f^g)&e)^g ror \$`$Sigma0[1]-$Sigma0[0]`,$a1 xor $e,$a0 - xor $g,$a2 # Ch(e,f,g)=((f^g)&e)^g + add $a2,$T1 # T1+=Ch(e,f,g) - xor $c,$h # b^c + mov $a,$a2 + add ($Tbl),$T1 # T1+=K[round] xor $a,$a1 - add $a2,$T1 # T1+=Ch(e,f,g) - mov $b,$a2 + xor $b,$a2 # a^b, b^c in next round ror \$$Sigma1[0],$a0 # Sigma1(e) - and $a,$h # h=(b^c)&a - and $c,$a2 # b&c + mov $b,$h + and $a2,$a3 ror \$$Sigma0[0],$a1 # Sigma0(a) add $a0,$T1 # T1+=Sigma1(e) - add $a2,$h # h+=b&c (completes +=Maj(a,b,c) + xor $a3,$h # h=Maj(a,b,c)=Ch(a^b,c,b) add $T1,$d # d+=T1 add $T1,$h # h+=T1 - lea 1($round),$round # round++ - add $a1,$h # h+=Sigma0(a) + lea $STRIDE($Tbl),$Tbl # round++ +___ +$code.=<<___ if ($i<15); + add $a1,$h # h+=Sigma0(a) ___ + ($a2,$a3) = ($a3,$a2); } sub ROUND_16_XX() @@ -128,29 +225,28 @@ () $code.=<<___; mov `$SZ*(($i+1)&0xf)`(%rsp),$a0 - mov `$SZ*(($i+14)&0xf)`(%rsp),$a1 - mov $a0,$T1 - mov $a1,$a2 - - ror \$`$sigma0[1]-$sigma0[0]`,$T1 - xor $a0,$T1 - shr \$$sigma0[2],$a0 - - ror \$$sigma0[0],$T1 - xor $T1,$a0 # sigma0(X[(i+1)&0xf]) - mov `$SZ*(($i+9)&0xf)`(%rsp),$T1 + mov `$SZ*(($i+14)&0xf)`(%rsp),$a2 + mov $a0,$T1 + ror \$`$sigma0[1]-$sigma0[0]`,$a0 + add $a1,$a # modulo-scheduled h+=Sigma0(a) + mov $a2,$a1 ror \$`$sigma1[1]-$sigma1[0]`,$a2 + + xor $T1,$a0 + shr \$$sigma0[2],$T1 + ror \$$sigma0[0],$a0 xor $a1,$a2 shr \$$sigma1[2],$a1 ror \$$sigma1[0],$a2 - add $a0,$T1 - xor $a2,$a1 # sigma1(X[(i+14)&0xf]) + xor $a0,$T1 # sigma0(X[(i+1)&0xf]) + xor $a1,$a2 # sigma1(X[(i+14)&0xf]) + add `$SZ*(($i+9)&0xf)`(%rsp),$T1 add `$SZ*($i&0xf)`(%rsp),$T1 mov $e,$a0 - add $a1,$T1 + add $a2,$T1 mov $a,$a1 ___ &ROUND_00_15(@_); @@ -159,10 +255,43 @@ () $code=<<___; .text +.extern OPENSSL_ia32cap_P .globl $func -.type $func,\@function,4 +.type $func,\@function,3 .align 16 $func: +___ +$code.=<<___ if ($SZ==4 || $avx); + lea OPENSSL_ia32cap_P(%rip),%r11 + mov 0(%r11),%r9d + mov 4(%r11),%r10d + mov 8(%r11),%r11d +___ +$code.=<<___ if ($SZ==4 && $shaext); + test \$`1<<29`,%r11d # check for SHA + jnz _shaext_shortcut +___ +$code.=<<___ if ($avx && $SZ==8); + test \$`1<<11`,%r10d # check for XOP + jnz .Lxop_shortcut +___ +$code.=<<___ if ($avx>1); + and \$`1<<8|1<<5|1<<3`,%r11d # check for BMI2+AVX2+BMI1 + cmp \$`1<<8|1<<5|1<<3`,%r11d + je .Lavx2_shortcut +___ +$code.=<<___ if ($avx); + and \$`1<<30`,%r9d # mask "Intel CPU" bit + and \$`1<<28|1<<9`,%r10d # mask AVX and SSSE3 bits + or %r9d,%r10d + cmp \$`1<<28|1<<9|1<<30`,%r10d + je .Lavx_shortcut +___ +$code.=<<___ if ($SZ==4); + test \$`1<<9`,%r10d + jnz .Lssse3_shortcut +___ +$code.=<<___; push %rbx push %rbp push %r12 @@ -180,8 +309,6 @@ () mov %r11,$_rsp # save copy of %rsp .Lprologue: - lea $TABLE(%rip),$Tbl - mov $SZ*0($ctx),$A mov $SZ*1($ctx),$B mov $SZ*2($ctx),$C @@ -194,7 +321,9 @@ () .align 16 .Lloop: - xor $round,$round + mov $B,$a3 + lea $TABLE(%rip),$Tbl + xor $C,$a3 # magic ___ for($i=0;$i<16;$i++) { $code.=" mov $SZ*$i($inp),$T1\n"; @@ -215,10 +344,11 @@ () } $code.=<<___; - cmp \$$rounds,$round - jb .Lrounds_16_xx + cmpb \$0,`$SZ-1`($Tbl) + jnz .Lrounds_16_xx mov $_ctx,$ctx + add $a1,$A # modulo-scheduled h+=Sigma0(a) lea 16*$SZ($inp),$inp add $SZ*0($ctx),$A @@ -260,22 +390,46 @@ () .align 64 .type $TABLE,\@object $TABLE: + .long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 .long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 .long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 + .long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 + .long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 .long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 .long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 + .long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 + .long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc .long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc .long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da + .long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da + .long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 .long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 .long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967 + .long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967 + .long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 .long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 .long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 + .long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 + .long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 .long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 .long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070 + .long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070 + .long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 .long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 .long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 + .long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 + .long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 .long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 .long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 + .long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 + + .long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f + .long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f + .long 0x03020100,0x0b0a0908,0xffffffff,0xffffffff + .long 0x03020100,0x0b0a0908,0xffffffff,0xffffffff + .long 0xffffffff,0xffffffff,0x03020100,0x0b0a0908 + .long 0xffffffff,0xffffffff,0x03020100,0x0b0a0908 + .asciz "SHA256 block transform for x86_64, CRYPTOGAMS by " ___ } else { $code.=<<___; @@ -283,120 +437,1812 @@ () .type $TABLE,\@object $TABLE: .quad 0x428a2f98d728ae22,0x7137449123ef65cd + .quad 0x428a2f98d728ae22,0x7137449123ef65cd + .quad 0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc .quad 0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc .quad 0x3956c25bf348b538,0x59f111f1b605d019 + .quad 0x3956c25bf348b538,0x59f111f1b605d019 + .quad 0x923f82a4af194f9b,0xab1c5ed5da6d8118 .quad 0x923f82a4af194f9b,0xab1c5ed5da6d8118 .quad 0xd807aa98a3030242,0x12835b0145706fbe + .quad 0xd807aa98a3030242,0x12835b0145706fbe + .quad 0x243185be4ee4b28c,0x550c7dc3d5ffb4e2 .quad 0x243185be4ee4b28c,0x550c7dc3d5ffb4e2 .quad 0x72be5d74f27b896f,0x80deb1fe3b1696b1 + .quad 0x72be5d74f27b896f,0x80deb1fe3b1696b1 + .quad 0x9bdc06a725c71235,0xc19bf174cf692694 .quad 0x9bdc06a725c71235,0xc19bf174cf692694 .quad 0xe49b69c19ef14ad2,0xefbe4786384f25e3 + .quad 0xe49b69c19ef14ad2,0xefbe4786384f25e3 + .quad 0x0fc19dc68b8cd5b5,0x240ca1cc77ac9c65 .quad 0x0fc19dc68b8cd5b5,0x240ca1cc77ac9c65 .quad 0x2de92c6f592b0275,0x4a7484aa6ea6e483 + .quad 0x2de92c6f592b0275,0x4a7484aa6ea6e483 + .quad 0x5cb0a9dcbd41fbd4,0x76f988da831153b5 .quad 0x5cb0a9dcbd41fbd4,0x76f988da831153b5 .quad 0x983e5152ee66dfab,0xa831c66d2db43210 + .quad 0x983e5152ee66dfab,0xa831c66d2db43210 + .quad 0xb00327c898fb213f,0xbf597fc7beef0ee4 .quad 0xb00327c898fb213f,0xbf597fc7beef0ee4 .quad 0xc6e00bf33da88fc2,0xd5a79147930aa725 + .quad 0xc6e00bf33da88fc2,0xd5a79147930aa725 + .quad 0x06ca6351e003826f,0x142929670a0e6e70 .quad 0x06ca6351e003826f,0x142929670a0e6e70 .quad 0x27b70a8546d22ffc,0x2e1b21385c26c926 + .quad 0x27b70a8546d22ffc,0x2e1b21385c26c926 + .quad 0x4d2c6dfc5ac42aed,0x53380d139d95b3df .quad 0x4d2c6dfc5ac42aed,0x53380d139d95b3df .quad 0x650a73548baf63de,0x766a0abb3c77b2a8 + .quad 0x650a73548baf63de,0x766a0abb3c77b2a8 + .quad 0x81c2c92e47edaee6,0x92722c851482353b .quad 0x81c2c92e47edaee6,0x92722c851482353b .quad 0xa2bfe8a14cf10364,0xa81a664bbc423001 + .quad 0xa2bfe8a14cf10364,0xa81a664bbc423001 + .quad 0xc24b8b70d0f89791,0xc76c51a30654be30 .quad 0xc24b8b70d0f89791,0xc76c51a30654be30 .quad 0xd192e819d6ef5218,0xd69906245565a910 + .quad 0xd192e819d6ef5218,0xd69906245565a910 + .quad 0xf40e35855771202a,0x106aa07032bbd1b8 .quad 0xf40e35855771202a,0x106aa07032bbd1b8 .quad 0x19a4c116b8d2d0c8,0x1e376c085141ab53 + .quad 0x19a4c116b8d2d0c8,0x1e376c085141ab53 + .quad 0x2748774cdf8eeb99,0x34b0bcb5e19b48a8 .quad 0x2748774cdf8eeb99,0x34b0bcb5e19b48a8 .quad 0x391c0cb3c5c95a63,0x4ed8aa4ae3418acb + .quad 0x391c0cb3c5c95a63,0x4ed8aa4ae3418acb + .quad 0x5b9cca4f7763e373,0x682e6ff3d6b2b8a3 .quad 0x5b9cca4f7763e373,0x682e6ff3d6b2b8a3 .quad 0x748f82ee5defb2fc,0x78a5636f43172f60 + .quad 0x748f82ee5defb2fc,0x78a5636f43172f60 + .quad 0x84c87814a1f0ab72,0x8cc702081a6439ec .quad 0x84c87814a1f0ab72,0x8cc702081a6439ec .quad 0x90befffa23631e28,0xa4506cebde82bde9 + .quad 0x90befffa23631e28,0xa4506cebde82bde9 + .quad 0xbef9a3f7b2c67915,0xc67178f2e372532b .quad 0xbef9a3f7b2c67915,0xc67178f2e372532b .quad 0xca273eceea26619c,0xd186b8c721c0c207 + .quad 0xca273eceea26619c,0xd186b8c721c0c207 + .quad 0xeada7dd6cde0eb1e,0xf57d4f7fee6ed178 .quad 0xeada7dd6cde0eb1e,0xf57d4f7fee6ed178 .quad 0x06f067aa72176fba,0x0a637dc5a2c898a6 + .quad 0x06f067aa72176fba,0x0a637dc5a2c898a6 + .quad 0x113f9804bef90dae,0x1b710b35131c471b .quad 0x113f9804bef90dae,0x1b710b35131c471b .quad 0x28db77f523047d84,0x32caab7b40c72493 + .quad 0x28db77f523047d84,0x32caab7b40c72493 + .quad 0x3c9ebe0a15c9bebc,0x431d67c49c100d4c .quad 0x3c9ebe0a15c9bebc,0x431d67c49c100d4c .quad 0x4cc5d4becb3e42b6,0x597f299cfc657e2a + .quad 0x4cc5d4becb3e42b6,0x597f299cfc657e2a + .quad 0x5fcb6fab3ad6faec,0x6c44198c4a475817 .quad 0x5fcb6fab3ad6faec,0x6c44198c4a475817 + + .quad 0x0001020304050607,0x08090a0b0c0d0e0f + .quad 0x0001020304050607,0x08090a0b0c0d0e0f + .asciz "SHA512 block transform for x86_64, CRYPTOGAMS by " ___ } -# EXCEPTION_DISPOSITION handler (EXCEPTION_RECORD *rec,ULONG64 frame, -# CONTEXT *context,DISPATCHER_CONTEXT *disp) -if ($win64) { -$rec="%rcx"; -$frame="%rdx"; -$context="%r8"; -$disp="%r9"; +###################################################################### +# SIMD code paths +# +if ($SZ==4 && $shaext) {{{ +###################################################################### +# Intel SHA Extensions implementation of SHA256 update function. +# +my ($ctx,$inp,$num,$Tbl)=("%rdi","%rsi","%rdx","%rcx"); + +my ($Wi,$ABEF,$CDGH,$TMP,$BSWAP,$ABEF_SAVE,$CDGH_SAVE)=map("%xmm$_",(0..2,7..10)); +my @MSG=map("%xmm$_",(3..6)); $code.=<<___; -.extern __imp_RtlVirtualUnwind -.type se_handler,\@abi-omnipotent +.type sha256_block_data_order_shaext,\@function,3 +.align 64 +sha256_block_data_order_shaext: +_shaext_shortcut: +___ +$code.=<<___ if ($win64); + lea `-8-5*16`(%rsp),%rsp + movaps %xmm6,-8-5*16(%rax) + movaps %xmm7,-8-4*16(%rax) + movaps %xmm8,-8-3*16(%rax) + movaps %xmm9,-8-2*16(%rax) + movaps %xmm10,-8-1*16(%rax) +.Lprologue_shaext: +___ +$code.=<<___; + lea K256+0x80(%rip),$Tbl + movdqu ($ctx),$ABEF # DCBA + movdqu 16($ctx),$CDGH # HGFE + movdqa 0x200-0x80($Tbl),$TMP # byte swap mask + + pshufd \$0x1b,$ABEF,$Wi # ABCD + pshufd \$0xb1,$ABEF,$ABEF # CDAB + pshufd \$0x1b,$CDGH,$CDGH # EFGH + movdqa $TMP,$BSWAP # offload + palignr \$8,$CDGH,$ABEF # ABEF + punpcklqdq $Wi,$CDGH # CDGH + jmp .Loop_shaext + .align 16 -se_handler: - push %rsi - push %rdi +.Loop_shaext: + movdqu ($inp),@MSG[0] + movdqu 0x10($inp),@MSG[1] + movdqu 0x20($inp),@MSG[2] + pshufb $TMP,@MSG[0] + movdqu 0x30($inp),@MSG[3] + + movdqa 0*32-0x80($Tbl),$Wi + paddd @MSG[0],$Wi + pshufb $TMP,@MSG[1] + movdqa $CDGH,$CDGH_SAVE # offload + sha256rnds2 $ABEF,$CDGH # 0-3 + pshufd \$0x0e,$Wi,$Wi + nop + movdqa $ABEF,$ABEF_SAVE # offload + sha256rnds2 $CDGH,$ABEF + + movdqa 1*32-0x80($Tbl),$Wi + paddd @MSG[1],$Wi + pshufb $TMP,@MSG[2] + sha256rnds2 $ABEF,$CDGH # 4-7 + pshufd \$0x0e,$Wi,$Wi + lea 0x40($inp),$inp + sha256msg1 @MSG[1],@MSG[0] + sha256rnds2 $CDGH,$ABEF + + movdqa 2*32-0x80($Tbl),$Wi + paddd @MSG[2],$Wi + pshufb $TMP,@MSG[3] + sha256rnds2 $ABEF,$CDGH # 8-11 + pshufd \$0x0e,$Wi,$Wi + movdqa @MSG[3],$TMP + palignr \$4,@MSG[2],$TMP + nop + paddd $TMP,@MSG[0] + sha256msg1 @MSG[2],@MSG[1] + sha256rnds2 $CDGH,$ABEF + + movdqa 3*32-0x80($Tbl),$Wi + paddd @MSG[3],$Wi + sha256msg2 @MSG[3],@MSG[0] + sha256rnds2 $ABEF,$CDGH # 12-15 + pshufd \$0x0e,$Wi,$Wi + movdqa @MSG[0],$TMP + palignr \$4,@MSG[3],$TMP + nop + paddd $TMP,@MSG[1] + sha256msg1 @MSG[3],@MSG[2] + sha256rnds2 $CDGH,$ABEF +___ +for($i=4;$i<16-3;$i++) { +$code.=<<___; + movdqa $i*32-0x80($Tbl),$Wi + paddd @MSG[0],$Wi + sha256msg2 @MSG[0],@MSG[1] + sha256rnds2 $ABEF,$CDGH # 16-19... + pshufd \$0x0e,$Wi,$Wi + movdqa @MSG[1],$TMP + palignr \$4,@MSG[0],$TMP + nop + paddd $TMP,@MSG[2] + sha256msg1 @MSG[0],@MSG[3] + sha256rnds2 $CDGH,$ABEF +___ + push(@MSG,shift(@MSG)); +} +$code.=<<___; + movdqa 13*32-0x80($Tbl),$Wi + paddd @MSG[0],$Wi + sha256msg2 @MSG[0],@MSG[1] + sha256rnds2 $ABEF,$CDGH # 52-55 + pshufd \$0x0e,$Wi,$Wi + movdqa @MSG[1],$TMP + palignr \$4,@MSG[0],$TMP + sha256rnds2 $CDGH,$ABEF + paddd $TMP,@MSG[2] + + movdqa 14*32-0x80($Tbl),$Wi + paddd @MSG[1],$Wi + sha256rnds2 $ABEF,$CDGH # 56-59 + pshufd \$0x0e,$Wi,$Wi + sha256msg2 @MSG[1],@MSG[2] + movdqa $BSWAP,$TMP + sha256rnds2 $CDGH,$ABEF + + movdqa 15*32-0x80($Tbl),$Wi + paddd @MSG[2],$Wi + nop + sha256rnds2 $ABEF,$CDGH # 60-63 + pshufd \$0x0e,$Wi,$Wi + dec $num + nop + sha256rnds2 $CDGH,$ABEF + + paddd $CDGH_SAVE,$CDGH + paddd $ABEF_SAVE,$ABEF + jnz .Loop_shaext + + pshufd \$0xb1,$CDGH,$CDGH # DCHG + pshufd \$0x1b,$ABEF,$TMP # FEBA + pshufd \$0xb1,$ABEF,$ABEF # BAFE + punpckhqdq $CDGH,$ABEF # DCBA + palignr \$8,$TMP,$CDGH # HGFE + + movdqu $ABEF,($ctx) + movdqu $CDGH,16($ctx) +___ +$code.=<<___ if ($win64); + movaps -8-5*16(%rax),%xmm6 + movaps -8-4*16(%rax),%xmm7 + movaps -8-3*16(%rax),%xmm8 + movaps -8-2*16(%rax),%xmm9 + movaps -8-1*16(%rax),%xmm10 + mov %rax,%rsp +.Lepilogue_shaext: +___ +$code.=<<___; + ret +.size sha256_block_data_order_shaext,.-sha256_block_data_order_shaext +___ +}}} +{{{ + +my $a4=$T1; +my ($a,$b,$c,$d,$e,$f,$g,$h); + +sub AUTOLOAD() # thunk [simplified] 32-bit style perlasm +{ my $opcode = $AUTOLOAD; $opcode =~ s/.*:://; + my $arg = pop; + $arg = "\$$arg" if ($arg*1 eq $arg); + $code .= "\t$opcode\t".join(',',$arg,reverse @_)."\n"; +} + +sub body_00_15 () { + ( + '($a,$b,$c,$d,$e,$f,$g,$h)=@ROT;'. + + '&ror ($a0,$Sigma1[2]-$Sigma1[1])', + '&mov ($a,$a1)', + '&mov ($a4,$f)', + + '&ror ($a1,$Sigma0[2]-$Sigma0[1])', + '&xor ($a0,$e)', + '&xor ($a4,$g)', # f^g + + '&ror ($a0,$Sigma1[1]-$Sigma1[0])', + '&xor ($a1,$a)', + '&and ($a4,$e)', # (f^g)&e + + '&xor ($a0,$e)', + '&add ($h,$SZ*($i&15)."(%rsp)")', # h+=X[i]+K[i] + '&mov ($a2,$a)', + + '&xor ($a4,$g)', # Ch(e,f,g)=((f^g)&e)^g + '&ror ($a1,$Sigma0[1]-$Sigma0[0])', + '&xor ($a2,$b)', # a^b, b^c in next round + + '&add ($h,$a4)', # h+=Ch(e,f,g) + '&ror ($a0,$Sigma1[0])', # Sigma1(e) + '&and ($a3,$a2)', # (b^c)&(a^b) + + '&xor ($a1,$a)', + '&add ($h,$a0)', # h+=Sigma1(e) + '&xor ($a3,$b)', # Maj(a,b,c)=Ch(a^b,c,b) + + '&ror ($a1,$Sigma0[0])', # Sigma0(a) + '&add ($d,$h)', # d+=h + '&add ($h,$a3)', # h+=Maj(a,b,c) + + '&mov ($a0,$d)', + '&add ($a1,$h);'. # h+=Sigma0(a) + '($a2,$a3) = ($a3,$a2); unshift(@ROT,pop(@ROT)); $i++;' + ); +} + +###################################################################### +# SSSE3 code path +# +if ($SZ==4) { # SHA256 only +my @X = map("%xmm$_",(0..3)); +my ($t0,$t1,$t2,$t3, $t4,$t5) = map("%xmm$_",(4..9)); + +$code.=<<___; +.type ${func}_ssse3,\@function,3 +.align 64 +${func}_ssse3: +.Lssse3_shortcut: push %rbx push %rbp push %r12 push %r13 push %r14 push %r15 - pushfq - sub \$64,%rsp + mov %rsp,%r11 # copy %rsp + shl \$4,%rdx # num*16 + sub \$`$framesz+$win64*16*4`,%rsp + lea ($inp,%rdx,$SZ),%rdx # inp+num*16*$SZ + and \$-64,%rsp # align stack frame + mov $ctx,$_ctx # save ctx, 1st arg + mov $inp,$_inp # save inp, 2nd arh + mov %rdx,$_end # save end pointer, "3rd" arg + mov %r11,$_rsp # save copy of %rsp +___ +$code.=<<___ if ($win64); + movaps %xmm6,16*$SZ+32(%rsp) + movaps %xmm7,16*$SZ+48(%rsp) + movaps %xmm8,16*$SZ+64(%rsp) + movaps %xmm9,16*$SZ+80(%rsp) +___ +$code.=<<___; +.Lprologue_ssse3: - mov 120($context),%rax # pull context->Rax - mov 248($context),%rbx # pull context->Rip + mov $SZ*0($ctx),$A + mov $SZ*1($ctx),$B + mov $SZ*2($ctx),$C + mov $SZ*3($ctx),$D + mov $SZ*4($ctx),$E + mov $SZ*5($ctx),$F + mov $SZ*6($ctx),$G + mov $SZ*7($ctx),$H +___ - lea .Lprologue(%rip),%r10 - cmp %r10,%rbx # context->Rip<.Lprologue - jb .Lin_prologue +$code.=<<___; + #movdqa $TABLE+`$SZ*2*$rounds`+32(%rip),$t4 + #movdqa $TABLE+`$SZ*2*$rounds`+64(%rip),$t5 + jmp .Lloop_ssse3 +.align 16 +.Lloop_ssse3: + movdqa $TABLE+`$SZ*2*$rounds`(%rip),$t3 + movdqu 0x00($inp),@X[0] + movdqu 0x10($inp),@X[1] + movdqu 0x20($inp),@X[2] + pshufb $t3,@X[0] + movdqu 0x30($inp),@X[3] + lea $TABLE(%rip),$Tbl + pshufb $t3,@X[1] + movdqa 0x00($Tbl),$t0 + movdqa 0x20($Tbl),$t1 + pshufb $t3,@X[2] + paddd @X[0],$t0 + movdqa 0x40($Tbl),$t2 + pshufb $t3,@X[3] + movdqa 0x60($Tbl),$t3 + paddd @X[1],$t1 + paddd @X[2],$t2 + paddd @X[3],$t3 + movdqa $t0,0x00(%rsp) + mov $A,$a1 + movdqa $t1,0x10(%rsp) + mov $B,$a3 + movdqa $t2,0x20(%rsp) + xor $C,$a3 # magic + movdqa $t3,0x30(%rsp) + mov $E,$a0 + jmp .Lssse3_00_47 - mov 152($context),%rax # pull context->Rsp +.align 16 +.Lssse3_00_47: + sub \$`-16*2*$SZ`,$Tbl # size optimization +___ +sub Xupdate_256_SSSE3 () { + ( + '&movdqa ($t0,@X[1]);', + '&movdqa ($t3,@X[3])', + '&palignr ($t0,@X[0],$SZ)', # X[1..4] + '&palignr ($t3,@X[2],$SZ);', # X[9..12] + '&movdqa ($t1,$t0)', + '&movdqa ($t2,$t0);', + '&psrld ($t0,$sigma0[2])', + '&paddd (@X[0],$t3);', # X[0..3] += X[9..12] + '&psrld ($t2,$sigma0[0])', + '&pshufd ($t3,@X[3],0b11111010)',# X[14..15] + '&pslld ($t1,8*$SZ-$sigma0[1]);'. + '&pxor ($t0,$t2)', + '&psrld ($t2,$sigma0[1]-$sigma0[0]);'. + '&pxor ($t0,$t1)', + '&pslld ($t1,$sigma0[1]-$sigma0[0]);'. + '&pxor ($t0,$t2);', + '&movdqa ($t2,$t3)', + '&pxor ($t0,$t1);', # sigma0(X[1..4]) + '&psrld ($t3,$sigma1[2])', + '&paddd (@X[0],$t0);', # X[0..3] += sigma0(X[1..4]) + '&psrlq ($t2,$sigma1[0])', + '&pxor ($t3,$t2);', + '&psrlq ($t2,$sigma1[1]-$sigma1[0])', + '&pxor ($t3,$t2)', + '&pshufb ($t3,$t4)', # sigma1(X[14..15]) + '&paddd (@X[0],$t3)', # X[0..1] += sigma1(X[14..15]) + '&pshufd ($t3,@X[0],0b01010000)',# X[16..17] + '&movdqa ($t2,$t3);', + '&psrld ($t3,$sigma1[2])', + '&psrlq ($t2,$sigma1[0])', + '&pxor ($t3,$t2);', + '&psrlq ($t2,$sigma1[1]-$sigma1[0])', + '&pxor ($t3,$t2);', + '&movdqa ($t2,16*2*$j."($Tbl)")', + '&pshufb ($t3,$t5)', + '&paddd (@X[0],$t3)' # X[2..3] += sigma1(X[16..17]) + ); +} - lea .Lepilogue(%rip),%r10 - cmp %r10,%rbx # context->Rip>=.Lepilogue - jae .Lin_prologue +sub SSSE3_256_00_47 () { +my $j = shift; +my $body = shift; +my @X = @_; +my @insns = (&$body,&$body,&$body,&$body); # 104 instructions - mov 16*$SZ+3*8(%rax),%rax # pull $_rsp - lea 48(%rax),%rax + if (0) { + foreach (Xupdate_256_SSSE3()) { # 36 instructions + eval; + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + } + } else { # squeeze extra 4% on Westmere and 19% on Atom + eval(shift(@insns)); #@ + &movdqa ($t0,@X[1]); + eval(shift(@insns)); + eval(shift(@insns)); + &movdqa ($t3,@X[3]); + eval(shift(@insns)); #@ + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); #@ + eval(shift(@insns)); + &palignr ($t0,@X[0],$SZ); # X[1..4] + eval(shift(@insns)); + eval(shift(@insns)); + &palignr ($t3,@X[2],$SZ); # X[9..12] + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); #@ + &movdqa ($t1,$t0); + eval(shift(@insns)); + eval(shift(@insns)); + &movdqa ($t2,$t0); + eval(shift(@insns)); #@ + eval(shift(@insns)); + &psrld ($t0,$sigma0[2]); + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + &paddd (@X[0],$t3); # X[0..3] += X[9..12] + eval(shift(@insns)); #@ + eval(shift(@insns)); + &psrld ($t2,$sigma0[0]); + eval(shift(@insns)); + eval(shift(@insns)); + &pshufd ($t3,@X[3],0b11111010); # X[4..15] + eval(shift(@insns)); + eval(shift(@insns)); #@ + &pslld ($t1,8*$SZ-$sigma0[1]); + eval(shift(@insns)); + eval(shift(@insns)); + &pxor ($t0,$t2); + eval(shift(@insns)); #@ + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); #@ + &psrld ($t2,$sigma0[1]-$sigma0[0]); + eval(shift(@insns)); + &pxor ($t0,$t1); + eval(shift(@insns)); + eval(shift(@insns)); + &pslld ($t1,$sigma0[1]-$sigma0[0]); + eval(shift(@insns)); + eval(shift(@insns)); + &pxor ($t0,$t2); + eval(shift(@insns)); + eval(shift(@insns)); #@ + &movdqa ($t2,$t3); + eval(shift(@insns)); + eval(shift(@insns)); + &pxor ($t0,$t1); # sigma0(X[1..4]) + eval(shift(@insns)); #@ + eval(shift(@insns)); + eval(shift(@insns)); + &psrld ($t3,$sigma1[2]); + eval(shift(@insns)); + eval(shift(@insns)); + &paddd (@X[0],$t0); # X[0..3] += sigma0(X[1..4]) + eval(shift(@insns)); #@ + eval(shift(@insns)); + &psrlq ($t2,$sigma1[0]); + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + &pxor ($t3,$t2); + eval(shift(@insns)); #@ + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); #@ + &psrlq ($t2,$sigma1[1]-$sigma1[0]); + eval(shift(@insns)); + eval(shift(@insns)); + &pxor ($t3,$t2); + eval(shift(@insns)); #@ + eval(shift(@insns)); + eval(shift(@insns)); + #&pshufb ($t3,$t4); # sigma1(X[14..15]) + &pshufd ($t3,$t3,0b10000000); + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + &psrldq ($t3,8); + eval(shift(@insns)); + eval(shift(@insns)); #@ + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); #@ + &paddd (@X[0],$t3); # X[0..1] += sigma1(X[14..15]) + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + &pshufd ($t3,@X[0],0b01010000); # X[16..17] + eval(shift(@insns)); + eval(shift(@insns)); #@ + eval(shift(@insns)); + &movdqa ($t2,$t3); + eval(shift(@insns)); + eval(shift(@insns)); + &psrld ($t3,$sigma1[2]); + eval(shift(@insns)); + eval(shift(@insns)); #@ + &psrlq ($t2,$sigma1[0]); + eval(shift(@insns)); + eval(shift(@insns)); + &pxor ($t3,$t2); + eval(shift(@insns)); #@ + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); #@ + eval(shift(@insns)); + &psrlq ($t2,$sigma1[1]-$sigma1[0]); + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + &pxor ($t3,$t2); + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); #@ + #&pshufb ($t3,$t5); + &pshufd ($t3,$t3,0b00001000); + eval(shift(@insns)); + eval(shift(@insns)); + &movdqa ($t2,16*2*$j."($Tbl)"); + eval(shift(@insns)); #@ + eval(shift(@insns)); + &pslldq ($t3,8); + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + &paddd (@X[0],$t3); # X[2..3] += sigma1(X[16..17]) + eval(shift(@insns)); #@ + eval(shift(@insns)); + eval(shift(@insns)); + } + &paddd ($t2,@X[0]); + foreach (@insns) { eval; } # remaining instructions + &movdqa (16*$j."(%rsp)",$t2); +} - mov -8(%rax),%rbx - mov -16(%rax),%rbp - mov -24(%rax),%r12 - mov -32(%rax),%r13 - mov -40(%rax),%r14 - mov -48(%rax),%r15 - mov %rbx,144($context) # restore context->Rbx - mov %rbp,160($context) # restore context->Rbp - mov %r12,216($context) # restore context->R12 - mov %r13,224($context) # restore context->R13 - mov %r14,232($context) # restore context->R14 - mov %r15,240($context) # restore context->R15 + for ($i=0,$j=0; $j<4; $j++) { + &SSSE3_256_00_47($j,\&body_00_15,@X); + push(@X,shift(@X)); # rotate(@X) + } + &cmpb ($SZ-1+16*2*$SZ."($Tbl)",0); + &jne (".Lssse3_00_47"); -.Lin_prologue: - mov 8(%rax),%rdi - mov 16(%rax),%rsi - mov %rax,152($context) # restore context->Rsp - mov %rsi,168($context) # restore context->Rsi - mov %rdi,176($context) # restore context->Rdi + for ($i=0; $i<16; ) { + foreach(body_00_15()) { eval; } + } +$code.=<<___; + mov $_ctx,$ctx + mov $a1,$A - mov 40($disp),%rdi # disp->ContextRecord - mov $context,%rsi # context - mov \$154,%ecx # sizeof(CONTEXT) - .long 0xa548f3fc # cld; rep movsq + add $SZ*0($ctx),$A + lea 16*$SZ($inp),$inp + add $SZ*1($ctx),$B + add $SZ*2($ctx),$C + add $SZ*3($ctx),$D + add $SZ*4($ctx),$E + add $SZ*5($ctx),$F + add $SZ*6($ctx),$G + add $SZ*7($ctx),$H - mov $disp,%rsi - xor %rcx,%rcx # arg1, UNW_FLAG_NHANDLER - mov 8(%rsi),%rdx # arg2, disp->ImageBase - mov 0(%rsi),%r8 # arg3, disp->ControlPc - mov 16(%rsi),%r9 # arg4, disp->FunctionEntry - mov 40(%rsi),%r10 # disp->ContextRecord - lea 56(%rsi),%r11 # &disp->HandlerData + cmp $_end,$inp + + mov $A,$SZ*0($ctx) + mov $B,$SZ*1($ctx) + mov $C,$SZ*2($ctx) + mov $D,$SZ*3($ctx) + mov $E,$SZ*4($ctx) + mov $F,$SZ*5($ctx) + mov $G,$SZ*6($ctx) + mov $H,$SZ*7($ctx) + jb .Lloop_ssse3 + + mov $_rsp,%rsi +___ +$code.=<<___ if ($win64); + movaps 16*$SZ+32(%rsp),%xmm6 + movaps 16*$SZ+48(%rsp),%xmm7 + movaps 16*$SZ+64(%rsp),%xmm8 + movaps 16*$SZ+80(%rsp),%xmm9 +___ +$code.=<<___; + mov (%rsi),%r15 + mov 8(%rsi),%r14 + mov 16(%rsi),%r13 + mov 24(%rsi),%r12 + mov 32(%rsi),%rbp + mov 40(%rsi),%rbx + lea 48(%rsi),%rsp +.Lepilogue_ssse3: + ret +.size ${func}_ssse3,.-${func}_ssse3 +___ +} + +if ($avx) {{ +###################################################################### +# XOP code path +# +if ($SZ==8) { # SHA512 only +$code.=<<___; +.type ${func}_xop,\@function,3 +.align 64 +${func}_xop: +.Lxop_shortcut: + push %rbx + push %rbp + push %r12 + push %r13 + push %r14 + push %r15 + mov %rsp,%r11 # copy %rsp + shl \$4,%rdx # num*16 + sub \$`$framesz+$win64*16*($SZ==4?4:6)`,%rsp + lea ($inp,%rdx,$SZ),%rdx # inp+num*16*$SZ + and \$-64,%rsp # align stack frame + mov $ctx,$_ctx # save ctx, 1st arg + mov $inp,$_inp # save inp, 2nd arh + mov %rdx,$_end # save end pointer, "3rd" arg + mov %r11,$_rsp # save copy of %rsp +___ +$code.=<<___ if ($win64); + movaps %xmm6,16*$SZ+32(%rsp) + movaps %xmm7,16*$SZ+48(%rsp) + movaps %xmm8,16*$SZ+64(%rsp) + movaps %xmm9,16*$SZ+80(%rsp) +___ +$code.=<<___ if ($win64 && $SZ>4); + movaps %xmm10,16*$SZ+96(%rsp) + movaps %xmm11,16*$SZ+112(%rsp) +___ +$code.=<<___; +.Lprologue_xop: + + vzeroupper + mov $SZ*0($ctx),$A + mov $SZ*1($ctx),$B + mov $SZ*2($ctx),$C + mov $SZ*3($ctx),$D + mov $SZ*4($ctx),$E + mov $SZ*5($ctx),$F + mov $SZ*6($ctx),$G + mov $SZ*7($ctx),$H + jmp .Lloop_xop +___ + if ($SZ==4) { # SHA256 + my @X = map("%xmm$_",(0..3)); + my ($t0,$t1,$t2,$t3) = map("%xmm$_",(4..7)); + +$code.=<<___; +.align 16 +.Lloop_xop: + vmovdqa $TABLE+`$SZ*2*$rounds`(%rip),$t3 + vmovdqu 0x00($inp),@X[0] + vmovdqu 0x10($inp),@X[1] + vmovdqu 0x20($inp),@X[2] + vmovdqu 0x30($inp),@X[3] + vpshufb $t3,@X[0],@X[0] + lea $TABLE(%rip),$Tbl + vpshufb $t3,@X[1],@X[1] + vpshufb $t3,@X[2],@X[2] + vpaddd 0x00($Tbl),@X[0],$t0 + vpshufb $t3,@X[3],@X[3] + vpaddd 0x20($Tbl),@X[1],$t1 + vpaddd 0x40($Tbl),@X[2],$t2 + vpaddd 0x60($Tbl),@X[3],$t3 + vmovdqa $t0,0x00(%rsp) + mov $A,$a1 + vmovdqa $t1,0x10(%rsp) + mov $B,$a3 + vmovdqa $t2,0x20(%rsp) + xor $C,$a3 # magic + vmovdqa $t3,0x30(%rsp) + mov $E,$a0 + jmp .Lxop_00_47 + +.align 16 +.Lxop_00_47: + sub \$`-16*2*$SZ`,$Tbl # size optimization +___ +sub XOP_256_00_47 () { +my $j = shift; +my $body = shift; +my @X = @_; +my @insns = (&$body,&$body,&$body,&$body); # 104 instructions + + &vpalignr ($t0,@X[1],@X[0],$SZ); # X[1..4] + eval(shift(@insns)); + eval(shift(@insns)); + &vpalignr ($t3,@X[3],@X[2],$SZ); # X[9..12] + eval(shift(@insns)); + eval(shift(@insns)); + &vprotd ($t1,$t0,8*$SZ-$sigma0[1]); + eval(shift(@insns)); + eval(shift(@insns)); + &vpsrld ($t0,$t0,$sigma0[2]); + eval(shift(@insns)); + eval(shift(@insns)); + &vpaddd (@X[0],@X[0],$t3); # X[0..3] += X[9..12] + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + &vprotd ($t2,$t1,$sigma0[1]-$sigma0[0]); + eval(shift(@insns)); + eval(shift(@insns)); + &vpxor ($t0,$t0,$t1); + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + &vprotd ($t3,@X[3],8*$SZ-$sigma1[1]); + eval(shift(@insns)); + eval(shift(@insns)); + &vpxor ($t0,$t0,$t2); # sigma0(X[1..4]) + eval(shift(@insns)); + eval(shift(@insns)); + &vpsrld ($t2,@X[3],$sigma1[2]); + eval(shift(@insns)); + eval(shift(@insns)); + &vpaddd (@X[0],@X[0],$t0); # X[0..3] += sigma0(X[1..4]) + eval(shift(@insns)); + eval(shift(@insns)); + &vprotd ($t1,$t3,$sigma1[1]-$sigma1[0]); + eval(shift(@insns)); + eval(shift(@insns)); + &vpxor ($t3,$t3,$t2); + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + &vpxor ($t3,$t3,$t1); # sigma1(X[14..15]) + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + &vpsrldq ($t3,$t3,8); + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + &vpaddd (@X[0],@X[0],$t3); # X[0..1] += sigma1(X[14..15]) + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + &vprotd ($t3,@X[0],8*$SZ-$sigma1[1]); + eval(shift(@insns)); + eval(shift(@insns)); + &vpsrld ($t2,@X[0],$sigma1[2]); + eval(shift(@insns)); + eval(shift(@insns)); + &vprotd ($t1,$t3,$sigma1[1]-$sigma1[0]); + eval(shift(@insns)); + eval(shift(@insns)); + &vpxor ($t3,$t3,$t2); + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + &vpxor ($t3,$t3,$t1); # sigma1(X[16..17]) + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + &vpslldq ($t3,$t3,8); # 22 instructions + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + &vpaddd (@X[0],@X[0],$t3); # X[2..3] += sigma1(X[16..17]) + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + &vpaddd ($t2,@X[0],16*2*$j."($Tbl)"); + foreach (@insns) { eval; } # remaining instructions + &vmovdqa (16*$j."(%rsp)",$t2); +} + + for ($i=0,$j=0; $j<4; $j++) { + &XOP_256_00_47($j,\&body_00_15,@X); + push(@X,shift(@X)); # rotate(@X) + } + &cmpb ($SZ-1+16*2*$SZ."($Tbl)",0); + &jne (".Lxop_00_47"); + + for ($i=0; $i<16; ) { + foreach(body_00_15()) { eval; } + } + + } else { # SHA512 + my @X = map("%xmm$_",(0..7)); + my ($t0,$t1,$t2,$t3) = map("%xmm$_",(8..11)); + +$code.=<<___; +.align 16 +.Lloop_xop: + vmovdqa $TABLE+`$SZ*2*$rounds`(%rip),$t3 + vmovdqu 0x00($inp),@X[0] + lea $TABLE+0x80(%rip),$Tbl # size optimization + vmovdqu 0x10($inp),@X[1] + vmovdqu 0x20($inp),@X[2] + vpshufb $t3,@X[0],@X[0] + vmovdqu 0x30($inp),@X[3] + vpshufb $t3,@X[1],@X[1] + vmovdqu 0x40($inp),@X[4] + vpshufb $t3,@X[2],@X[2] + vmovdqu 0x50($inp),@X[5] + vpshufb $t3,@X[3],@X[3] + vmovdqu 0x60($inp),@X[6] + vpshufb $t3,@X[4],@X[4] + vmovdqu 0x70($inp),@X[7] + vpshufb $t3,@X[5],@X[5] + vpaddq -0x80($Tbl),@X[0],$t0 + vpshufb $t3,@X[6],@X[6] + vpaddq -0x60($Tbl),@X[1],$t1 + vpshufb $t3,@X[7],@X[7] + vpaddq -0x40($Tbl),@X[2],$t2 + vpaddq -0x20($Tbl),@X[3],$t3 + vmovdqa $t0,0x00(%rsp) + vpaddq 0x00($Tbl),@X[4],$t0 + vmovdqa $t1,0x10(%rsp) + vpaddq 0x20($Tbl),@X[5],$t1 + vmovdqa $t2,0x20(%rsp) + vpaddq 0x40($Tbl),@X[6],$t2 + vmovdqa $t3,0x30(%rsp) + vpaddq 0x60($Tbl),@X[7],$t3 + vmovdqa $t0,0x40(%rsp) + mov $A,$a1 + vmovdqa $t1,0x50(%rsp) + mov $B,$a3 + vmovdqa $t2,0x60(%rsp) + xor $C,$a3 # magic + vmovdqa $t3,0x70(%rsp) + mov $E,$a0 + jmp .Lxop_00_47 + +.align 16 +.Lxop_00_47: + add \$`16*2*$SZ`,$Tbl +___ +sub XOP_512_00_47 () { +my $j = shift; +my $body = shift; +my @X = @_; +my @insns = (&$body,&$body); # 52 instructions + + &vpalignr ($t0,@X[1],@X[0],$SZ); # X[1..2] + eval(shift(@insns)); + eval(shift(@insns)); + &vpalignr ($t3,@X[5],@X[4],$SZ); # X[9..10] + eval(shift(@insns)); + eval(shift(@insns)); + &vprotq ($t1,$t0,8*$SZ-$sigma0[1]); + eval(shift(@insns)); + eval(shift(@insns)); + &vpsrlq ($t0,$t0,$sigma0[2]); + eval(shift(@insns)); + eval(shift(@insns)); + &vpaddq (@X[0],@X[0],$t3); # X[0..1] += X[9..10] + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + &vprotq ($t2,$t1,$sigma0[1]-$sigma0[0]); + eval(shift(@insns)); + eval(shift(@insns)); + &vpxor ($t0,$t0,$t1); + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + &vprotq ($t3,@X[7],8*$SZ-$sigma1[1]); + eval(shift(@insns)); + eval(shift(@insns)); + &vpxor ($t0,$t0,$t2); # sigma0(X[1..2]) + eval(shift(@insns)); + eval(shift(@insns)); + &vpsrlq ($t2,@X[7],$sigma1[2]); + eval(shift(@insns)); + eval(shift(@insns)); + &vpaddq (@X[0],@X[0],$t0); # X[0..1] += sigma0(X[1..2]) + eval(shift(@insns)); + eval(shift(@insns)); + &vprotq ($t1,$t3,$sigma1[1]-$sigma1[0]); + eval(shift(@insns)); + eval(shift(@insns)); + &vpxor ($t3,$t3,$t2); + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + &vpxor ($t3,$t3,$t1); # sigma1(X[14..15]) + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + &vpaddq (@X[0],@X[0],$t3); # X[0..1] += sigma1(X[14..15]) + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + &vpaddq ($t2,@X[0],16*2*$j-0x80."($Tbl)"); + foreach (@insns) { eval; } # remaining instructions + &vmovdqa (16*$j."(%rsp)",$t2); +} + + for ($i=0,$j=0; $j<8; $j++) { + &XOP_512_00_47($j,\&body_00_15,@X); + push(@X,shift(@X)); # rotate(@X) + } + &cmpb ($SZ-1+16*2*$SZ-0x80."($Tbl)",0); + &jne (".Lxop_00_47"); + + for ($i=0; $i<16; ) { + foreach(body_00_15()) { eval; } + } +} +$code.=<<___; + mov $_ctx,$ctx + mov $a1,$A + + add $SZ*0($ctx),$A + lea 16*$SZ($inp),$inp + add $SZ*1($ctx),$B + add $SZ*2($ctx),$C + add $SZ*3($ctx),$D + add $SZ*4($ctx),$E + add $SZ*5($ctx),$F + add $SZ*6($ctx),$G + add $SZ*7($ctx),$H + + cmp $_end,$inp + + mov $A,$SZ*0($ctx) + mov $B,$SZ*1($ctx) + mov $C,$SZ*2($ctx) + mov $D,$SZ*3($ctx) + mov $E,$SZ*4($ctx) + mov $F,$SZ*5($ctx) + mov $G,$SZ*6($ctx) + mov $H,$SZ*7($ctx) + jb .Lloop_xop + + mov $_rsp,%rsi + vzeroupper +___ +$code.=<<___ if ($win64); + movaps 16*$SZ+32(%rsp),%xmm6 + movaps 16*$SZ+48(%rsp),%xmm7 + movaps 16*$SZ+64(%rsp),%xmm8 + movaps 16*$SZ+80(%rsp),%xmm9 +___ +$code.=<<___ if ($win64 && $SZ>4); + movaps 16*$SZ+96(%rsp),%xmm10 + movaps 16*$SZ+112(%rsp),%xmm11 +___ +$code.=<<___; + mov (%rsi),%r15 + mov 8(%rsi),%r14 + mov 16(%rsi),%r13 + mov 24(%rsi),%r12 + mov 32(%rsi),%rbp + mov 40(%rsi),%rbx + lea 48(%rsi),%rsp +.Lepilogue_xop: + ret +.size ${func}_xop,.-${func}_xop +___ +} +###################################################################### +# AVX+shrd code path +# +local *ror = sub { &shrd(@_[0],@_) }; + +$code.=<<___; +.type ${func}_avx,\@function,3 +.align 64 +${func}_avx: +.Lavx_shortcut: + push %rbx + push %rbp + push %r12 + push %r13 + push %r14 + push %r15 + mov %rsp,%r11 # copy %rsp + shl \$4,%rdx # num*16 + sub \$`$framesz+$win64*16*($SZ==4?4:6)`,%rsp + lea ($inp,%rdx,$SZ),%rdx # inp+num*16*$SZ + and \$-64,%rsp # align stack frame + mov $ctx,$_ctx # save ctx, 1st arg + mov $inp,$_inp # save inp, 2nd arh + mov %rdx,$_end # save end pointer, "3rd" arg + mov %r11,$_rsp # save copy of %rsp +___ +$code.=<<___ if ($win64); + movaps %xmm6,16*$SZ+32(%rsp) + movaps %xmm7,16*$SZ+48(%rsp) + movaps %xmm8,16*$SZ+64(%rsp) + movaps %xmm9,16*$SZ+80(%rsp) +___ +$code.=<<___ if ($win64 && $SZ>4); + movaps %xmm10,16*$SZ+96(%rsp) + movaps %xmm11,16*$SZ+112(%rsp) +___ +$code.=<<___; +.Lprologue_avx: + + vzeroupper + mov $SZ*0($ctx),$A + mov $SZ*1($ctx),$B + mov $SZ*2($ctx),$C + mov $SZ*3($ctx),$D + mov $SZ*4($ctx),$E + mov $SZ*5($ctx),$F + mov $SZ*6($ctx),$G + mov $SZ*7($ctx),$H +___ + if ($SZ==4) { # SHA256 + my @X = map("%xmm$_",(0..3)); + my ($t0,$t1,$t2,$t3, $t4,$t5) = map("%xmm$_",(4..9)); + +$code.=<<___; + vmovdqa $TABLE+`$SZ*2*$rounds`+32(%rip),$t4 + vmovdqa $TABLE+`$SZ*2*$rounds`+64(%rip),$t5 + jmp .Lloop_avx +.align 16 +.Lloop_avx: + vmovdqa $TABLE+`$SZ*2*$rounds`(%rip),$t3 + vmovdqu 0x00($inp),@X[0] + vmovdqu 0x10($inp),@X[1] + vmovdqu 0x20($inp),@X[2] + vmovdqu 0x30($inp),@X[3] + vpshufb $t3,@X[0],@X[0] + lea $TABLE(%rip),$Tbl + vpshufb $t3,@X[1],@X[1] + vpshufb $t3,@X[2],@X[2] + vpaddd 0x00($Tbl),@X[0],$t0 + vpshufb $t3,@X[3],@X[3] + vpaddd 0x20($Tbl),@X[1],$t1 + vpaddd 0x40($Tbl),@X[2],$t2 + vpaddd 0x60($Tbl),@X[3],$t3 + vmovdqa $t0,0x00(%rsp) + mov $A,$a1 + vmovdqa $t1,0x10(%rsp) + mov $B,$a3 + vmovdqa $t2,0x20(%rsp) + xor $C,$a3 # magic + vmovdqa $t3,0x30(%rsp) + mov $E,$a0 + jmp .Lavx_00_47 + +.align 16 +.Lavx_00_47: + sub \$`-16*2*$SZ`,$Tbl # size optimization +___ +sub Xupdate_256_AVX () { + ( + '&vpalignr ($t0,@X[1],@X[0],$SZ)', # X[1..4] + '&vpalignr ($t3,@X[3],@X[2],$SZ)', # X[9..12] + '&vpsrld ($t2,$t0,$sigma0[0]);', + '&vpaddd (@X[0],@X[0],$t3)', # X[0..3] += X[9..12] + '&vpsrld ($t3,$t0,$sigma0[2])', + '&vpslld ($t1,$t0,8*$SZ-$sigma0[1]);', + '&vpxor ($t0,$t3,$t2)', + '&vpshufd ($t3,@X[3],0b11111010)',# X[14..15] + '&vpsrld ($t2,$t2,$sigma0[1]-$sigma0[0]);', + '&vpxor ($t0,$t0,$t1)', + '&vpslld ($t1,$t1,$sigma0[1]-$sigma0[0]);', + '&vpxor ($t0,$t0,$t2)', + '&vpsrld ($t2,$t3,$sigma1[2]);', + '&vpxor ($t0,$t0,$t1)', # sigma0(X[1..4]) + '&vpsrlq ($t3,$t3,$sigma1[0]);', + '&vpaddd (@X[0],@X[0],$t0)', # X[0..3] += sigma0(X[1..4]) + '&vpxor ($t2,$t2,$t3);', + '&vpsrlq ($t3,$t3,$sigma1[1]-$sigma1[0])', + '&vpxor ($t2,$t2,$t3)', + '&vpshufb ($t2,$t2,$t4)', # sigma1(X[14..15]) + '&vpaddd (@X[0],@X[0],$t2)', # X[0..1] += sigma1(X[14..15]) + '&vpshufd ($t3,@X[0],0b01010000)',# X[16..17] + '&vpsrld ($t2,$t3,$sigma1[2])', + '&vpsrlq ($t3,$t3,$sigma1[0])', + '&vpxor ($t2,$t2,$t3);', + '&vpsrlq ($t3,$t3,$sigma1[1]-$sigma1[0])', + '&vpxor ($t2,$t2,$t3)', + '&vpshufb ($t2,$t2,$t5)', + '&vpaddd (@X[0],@X[0],$t2)' # X[2..3] += sigma1(X[16..17]) + ); +} + +sub AVX_256_00_47 () { +my $j = shift; +my $body = shift; +my @X = @_; +my @insns = (&$body,&$body,&$body,&$body); # 104 instructions + + foreach (Xupdate_256_AVX()) { # 29 instructions + eval; + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + } + &vpaddd ($t2,@X[0],16*2*$j."($Tbl)"); + foreach (@insns) { eval; } # remaining instructions + &vmovdqa (16*$j."(%rsp)",$t2); +} + + for ($i=0,$j=0; $j<4; $j++) { + &AVX_256_00_47($j,\&body_00_15,@X); + push(@X,shift(@X)); # rotate(@X) + } + &cmpb ($SZ-1+16*2*$SZ."($Tbl)",0); + &jne (".Lavx_00_47"); + + for ($i=0; $i<16; ) { + foreach(body_00_15()) { eval; } + } + + } else { # SHA512 + my @X = map("%xmm$_",(0..7)); + my ($t0,$t1,$t2,$t3) = map("%xmm$_",(8..11)); + +$code.=<<___; + jmp .Lloop_avx +.align 16 +.Lloop_avx: + vmovdqa $TABLE+`$SZ*2*$rounds`(%rip),$t3 + vmovdqu 0x00($inp),@X[0] + lea $TABLE+0x80(%rip),$Tbl # size optimization + vmovdqu 0x10($inp),@X[1] + vmovdqu 0x20($inp),@X[2] + vpshufb $t3,@X[0],@X[0] + vmovdqu 0x30($inp),@X[3] + vpshufb $t3,@X[1],@X[1] + vmovdqu 0x40($inp),@X[4] + vpshufb $t3,@X[2],@X[2] + vmovdqu 0x50($inp),@X[5] + vpshufb $t3,@X[3],@X[3] + vmovdqu 0x60($inp),@X[6] + vpshufb $t3,@X[4],@X[4] + vmovdqu 0x70($inp),@X[7] + vpshufb $t3,@X[5],@X[5] + vpaddq -0x80($Tbl),@X[0],$t0 + vpshufb $t3,@X[6],@X[6] + vpaddq -0x60($Tbl),@X[1],$t1 + vpshufb $t3,@X[7],@X[7] + vpaddq -0x40($Tbl),@X[2],$t2 + vpaddq -0x20($Tbl),@X[3],$t3 + vmovdqa $t0,0x00(%rsp) + vpaddq 0x00($Tbl),@X[4],$t0 + vmovdqa $t1,0x10(%rsp) + vpaddq 0x20($Tbl),@X[5],$t1 + vmovdqa $t2,0x20(%rsp) + vpaddq 0x40($Tbl),@X[6],$t2 + vmovdqa $t3,0x30(%rsp) + vpaddq 0x60($Tbl),@X[7],$t3 + vmovdqa $t0,0x40(%rsp) + mov $A,$a1 + vmovdqa $t1,0x50(%rsp) + mov $B,$a3 + vmovdqa $t2,0x60(%rsp) + xor $C,$a3 # magic + vmovdqa $t3,0x70(%rsp) + mov $E,$a0 + jmp .Lavx_00_47 + +.align 16 +.Lavx_00_47: + add \$`16*2*$SZ`,$Tbl +___ +sub Xupdate_512_AVX () { + ( + '&vpalignr ($t0,@X[1],@X[0],$SZ)', # X[1..2] + '&vpalignr ($t3,@X[5],@X[4],$SZ)', # X[9..10] + '&vpsrlq ($t2,$t0,$sigma0[0])', + '&vpaddq (@X[0],@X[0],$t3);', # X[0..1] += X[9..10] + '&vpsrlq ($t3,$t0,$sigma0[2])', + '&vpsllq ($t1,$t0,8*$SZ-$sigma0[1]);', + '&vpxor ($t0,$t3,$t2)', + '&vpsrlq ($t2,$t2,$sigma0[1]-$sigma0[0]);', + '&vpxor ($t0,$t0,$t1)', + '&vpsllq ($t1,$t1,$sigma0[1]-$sigma0[0]);', + '&vpxor ($t0,$t0,$t2)', + '&vpsrlq ($t3,@X[7],$sigma1[2]);', + '&vpxor ($t0,$t0,$t1)', # sigma0(X[1..2]) + '&vpsllq ($t2,@X[7],8*$SZ-$sigma1[1]);', + '&vpaddq (@X[0],@X[0],$t0)', # X[0..1] += sigma0(X[1..2]) + '&vpsrlq ($t1,@X[7],$sigma1[0]);', + '&vpxor ($t3,$t3,$t2)', + '&vpsllq ($t2,$t2,$sigma1[1]-$sigma1[0]);', + '&vpxor ($t3,$t3,$t1)', + '&vpsrlq ($t1,$t1,$sigma1[1]-$sigma1[0]);', + '&vpxor ($t3,$t3,$t2)', + '&vpxor ($t3,$t3,$t1)', # sigma1(X[14..15]) + '&vpaddq (@X[0],@X[0],$t3)', # X[0..1] += sigma1(X[14..15]) + ); +} + +sub AVX_512_00_47 () { +my $j = shift; +my $body = shift; +my @X = @_; +my @insns = (&$body,&$body); # 52 instructions + + foreach (Xupdate_512_AVX()) { # 23 instructions + eval; + eval(shift(@insns)); + eval(shift(@insns)); + } + &vpaddq ($t2,@X[0],16*2*$j-0x80."($Tbl)"); + foreach (@insns) { eval; } # remaining instructions + &vmovdqa (16*$j."(%rsp)",$t2); +} + + for ($i=0,$j=0; $j<8; $j++) { + &AVX_512_00_47($j,\&body_00_15,@X); + push(@X,shift(@X)); # rotate(@X) + } + &cmpb ($SZ-1+16*2*$SZ-0x80."($Tbl)",0); + &jne (".Lavx_00_47"); + + for ($i=0; $i<16; ) { + foreach(body_00_15()) { eval; } + } +} +$code.=<<___; + mov $_ctx,$ctx + mov $a1,$A + + add $SZ*0($ctx),$A + lea 16*$SZ($inp),$inp + add $SZ*1($ctx),$B + add $SZ*2($ctx),$C + add $SZ*3($ctx),$D + add $SZ*4($ctx),$E + add $SZ*5($ctx),$F + add $SZ*6($ctx),$G + add $SZ*7($ctx),$H + + cmp $_end,$inp + + mov $A,$SZ*0($ctx) + mov $B,$SZ*1($ctx) + mov $C,$SZ*2($ctx) + mov $D,$SZ*3($ctx) + mov $E,$SZ*4($ctx) + mov $F,$SZ*5($ctx) + mov $G,$SZ*6($ctx) + mov $H,$SZ*7($ctx) + jb .Lloop_avx + + mov $_rsp,%rsi + vzeroupper +___ +$code.=<<___ if ($win64); + movaps 16*$SZ+32(%rsp),%xmm6 + movaps 16*$SZ+48(%rsp),%xmm7 + movaps 16*$SZ+64(%rsp),%xmm8 + movaps 16*$SZ+80(%rsp),%xmm9 +___ +$code.=<<___ if ($win64 && $SZ>4); + movaps 16*$SZ+96(%rsp),%xmm10 + movaps 16*$SZ+112(%rsp),%xmm11 +___ +$code.=<<___; + mov (%rsi),%r15 + mov 8(%rsi),%r14 + mov 16(%rsi),%r13 + mov 24(%rsi),%r12 + mov 32(%rsi),%rbp + mov 40(%rsi),%rbx + lea 48(%rsi),%rsp +.Lepilogue_avx: + ret +.size ${func}_avx,.-${func}_avx +___ + +if ($avx>1) {{ +###################################################################### +# AVX2+BMI code path +# +my $a5=$SZ==4?"%esi":"%rsi"; # zap $inp +my $PUSH8=8*2*$SZ; +use integer; + +sub bodyx_00_15 () { + # at start $a1 should be zero, $a3 - $b^$c and $a4 copy of $f + ( + '($a,$b,$c,$d,$e,$f,$g,$h)=@ROT;'. + + '&add ($h,(32*($i/(16/$SZ))+$SZ*($i%(16/$SZ)))%$PUSH8.$base)', # h+=X[i]+K[i] + '&and ($a4,$e)', # f&e + '&rorx ($a0,$e,$Sigma1[2])', + '&rorx ($a2,$e,$Sigma1[1])', + + '&lea ($a,"($a,$a1)")', # h+=Sigma0(a) from the past + '&lea ($h,"($h,$a4)")', + '&andn ($a4,$e,$g)', # ~e&g + '&xor ($a0,$a2)', + + '&rorx ($a1,$e,$Sigma1[0])', + '&lea ($h,"($h,$a4)")', # h+=Ch(e,f,g)=(e&f)+(~e&g) + '&xor ($a0,$a1)', # Sigma1(e) + '&mov ($a2,$a)', + + '&rorx ($a4,$a,$Sigma0[2])', + '&lea ($h,"($h,$a0)")', # h+=Sigma1(e) + '&xor ($a2,$b)', # a^b, b^c in next round + '&rorx ($a1,$a,$Sigma0[1])', + + '&rorx ($a0,$a,$Sigma0[0])', + '&lea ($d,"($d,$h)")', # d+=h + '&and ($a3,$a2)', # (b^c)&(a^b) + '&xor ($a1,$a4)', + + '&xor ($a3,$b)', # Maj(a,b,c)=Ch(a^b,c,b) + '&xor ($a1,$a0)', # Sigma0(a) + '&lea ($h,"($h,$a3)");'. # h+=Maj(a,b,c) + '&mov ($a4,$e)', # copy of f in future + + '($a2,$a3) = ($a3,$a2); unshift(@ROT,pop(@ROT)); $i++;' + ); + # and at the finish one has to $a+=$a1 +} + +$code.=<<___; +.type ${func}_avx2,\@function,3 +.align 64 +${func}_avx2: +.Lavx2_shortcut: + push %rbx + push %rbp + push %r12 + push %r13 + push %r14 + push %r15 + mov %rsp,%r11 # copy %rsp + sub \$`2*$SZ*$rounds+4*8+$win64*16*($SZ==4?4:6)`,%rsp + shl \$4,%rdx # num*16 + and \$-256*$SZ,%rsp # align stack frame + lea ($inp,%rdx,$SZ),%rdx # inp+num*16*$SZ + add \$`2*$SZ*($rounds-8)`,%rsp + mov $ctx,$_ctx # save ctx, 1st arg + mov $inp,$_inp # save inp, 2nd arh + mov %rdx,$_end # save end pointer, "3rd" arg + mov %r11,$_rsp # save copy of %rsp +___ +$code.=<<___ if ($win64); + movaps %xmm6,16*$SZ+32(%rsp) + movaps %xmm7,16*$SZ+48(%rsp) + movaps %xmm8,16*$SZ+64(%rsp) + movaps %xmm9,16*$SZ+80(%rsp) +___ +$code.=<<___ if ($win64 && $SZ>4); + movaps %xmm10,16*$SZ+96(%rsp) + movaps %xmm11,16*$SZ+112(%rsp) +___ +$code.=<<___; +.Lprologue_avx2: + + vzeroupper + sub \$-16*$SZ,$inp # inp++, size optimization + mov $SZ*0($ctx),$A + mov $inp,%r12 # borrow $T1 + mov $SZ*1($ctx),$B + cmp %rdx,$inp # $_end + mov $SZ*2($ctx),$C + cmove %rsp,%r12 # next block or random data + mov $SZ*3($ctx),$D + mov $SZ*4($ctx),$E + mov $SZ*5($ctx),$F + mov $SZ*6($ctx),$G + mov $SZ*7($ctx),$H +___ + if ($SZ==4) { # SHA256 + my @X = map("%ymm$_",(0..3)); + my ($t0,$t1,$t2,$t3, $t4,$t5) = map("%ymm$_",(4..9)); + +$code.=<<___; + vmovdqa $TABLE+`$SZ*2*$rounds`+32(%rip),$t4 + vmovdqa $TABLE+`$SZ*2*$rounds`+64(%rip),$t5 + jmp .Loop_avx2 +.align 16 +.Loop_avx2: + vmovdqa $TABLE+`$SZ*2*$rounds`(%rip),$t3 + vmovdqu -16*$SZ+0($inp),%xmm0 + vmovdqu -16*$SZ+16($inp),%xmm1 + vmovdqu -16*$SZ+32($inp),%xmm2 + vmovdqu -16*$SZ+48($inp),%xmm3 + #mov $inp,$_inp # offload $inp + vinserti128 \$1,(%r12),@X[0],@X[0] + vinserti128 \$1,16(%r12),@X[1],@X[1] + vpshufb $t3,@X[0],@X[0] + vinserti128 \$1,32(%r12),@X[2],@X[2] + vpshufb $t3,@X[1],@X[1] + vinserti128 \$1,48(%r12),@X[3],@X[3] + + lea $TABLE(%rip),$Tbl + vpshufb $t3,@X[2],@X[2] + vpaddd 0x00($Tbl),@X[0],$t0 + vpshufb $t3,@X[3],@X[3] + vpaddd 0x20($Tbl),@X[1],$t1 + vpaddd 0x40($Tbl),@X[2],$t2 + vpaddd 0x60($Tbl),@X[3],$t3 + vmovdqa $t0,0x00(%rsp) + xor $a1,$a1 + vmovdqa $t1,0x20(%rsp) + lea -$PUSH8(%rsp),%rsp + mov $B,$a3 + vmovdqa $t2,0x00(%rsp) + xor $C,$a3 # magic + vmovdqa $t3,0x20(%rsp) + mov $F,$a4 + sub \$-16*2*$SZ,$Tbl # size optimization + jmp .Lavx2_00_47 + +.align 16 +.Lavx2_00_47: +___ + +sub AVX2_256_00_47 () { +my $j = shift; +my $body = shift; +my @X = @_; +my @insns = (&$body,&$body,&$body,&$body); # 96 instructions +my $base = "+2*$PUSH8(%rsp)"; + + &lea ("%rsp","-$PUSH8(%rsp)") if (($j%2)==0); + foreach (Xupdate_256_AVX()) { # 29 instructions + eval; + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + } + &vpaddd ($t2,@X[0],16*2*$j."($Tbl)"); + foreach (@insns) { eval; } # remaining instructions + &vmovdqa ((32*$j)%$PUSH8."(%rsp)",$t2); +} + + for ($i=0,$j=0; $j<4; $j++) { + &AVX2_256_00_47($j,\&bodyx_00_15,@X); + push(@X,shift(@X)); # rotate(@X) + } + &lea ($Tbl,16*2*$SZ."($Tbl)"); + &cmpb (($SZ-1)."($Tbl)",0); + &jne (".Lavx2_00_47"); + + for ($i=0; $i<16; ) { + my $base=$i<8?"+$PUSH8(%rsp)":"(%rsp)"; + foreach(bodyx_00_15()) { eval; } + } + } else { # SHA512 + my @X = map("%ymm$_",(0..7)); + my ($t0,$t1,$t2,$t3) = map("%ymm$_",(8..11)); + +$code.=<<___; + jmp .Loop_avx2 +.align 16 +.Loop_avx2: + vmovdqu -16*$SZ($inp),%xmm0 + vmovdqu -16*$SZ+16($inp),%xmm1 + vmovdqu -16*$SZ+32($inp),%xmm2 + lea $TABLE+0x80(%rip),$Tbl # size optimization + vmovdqu -16*$SZ+48($inp),%xmm3 + vmovdqu -16*$SZ+64($inp),%xmm4 + vmovdqu -16*$SZ+80($inp),%xmm5 + vmovdqu -16*$SZ+96($inp),%xmm6 + vmovdqu -16*$SZ+112($inp),%xmm7 + #mov $inp,$_inp # offload $inp + vmovdqa `$SZ*2*$rounds-0x80`($Tbl),$t2 + vinserti128 \$1,(%r12),@X[0],@X[0] + vinserti128 \$1,16(%r12),@X[1],@X[1] + vpshufb $t2,@X[0],@X[0] + vinserti128 \$1,32(%r12),@X[2],@X[2] + vpshufb $t2,@X[1],@X[1] + vinserti128 \$1,48(%r12),@X[3],@X[3] + vpshufb $t2,@X[2],@X[2] + vinserti128 \$1,64(%r12),@X[4],@X[4] + vpshufb $t2,@X[3],@X[3] + vinserti128 \$1,80(%r12),@X[5],@X[5] + vpshufb $t2,@X[4],@X[4] + vinserti128 \$1,96(%r12),@X[6],@X[6] + vpshufb $t2,@X[5],@X[5] + vinserti128 \$1,112(%r12),@X[7],@X[7] + + vpaddq -0x80($Tbl),@X[0],$t0 + vpshufb $t2,@X[6],@X[6] + vpaddq -0x60($Tbl),@X[1],$t1 + vpshufb $t2,@X[7],@X[7] + vpaddq -0x40($Tbl),@X[2],$t2 + vpaddq -0x20($Tbl),@X[3],$t3 + vmovdqa $t0,0x00(%rsp) + vpaddq 0x00($Tbl),@X[4],$t0 + vmovdqa $t1,0x20(%rsp) + vpaddq 0x20($Tbl),@X[5],$t1 + vmovdqa $t2,0x40(%rsp) + vpaddq 0x40($Tbl),@X[6],$t2 + vmovdqa $t3,0x60(%rsp) + lea -$PUSH8(%rsp),%rsp + vpaddq 0x60($Tbl),@X[7],$t3 + vmovdqa $t0,0x00(%rsp) + xor $a1,$a1 + vmovdqa $t1,0x20(%rsp) + mov $B,$a3 + vmovdqa $t2,0x40(%rsp) + xor $C,$a3 # magic + vmovdqa $t3,0x60(%rsp) + mov $F,$a4 + add \$16*2*$SZ,$Tbl + jmp .Lavx2_00_47 + +.align 16 +.Lavx2_00_47: +___ + +sub AVX2_512_00_47 () { +my $j = shift; +my $body = shift; +my @X = @_; +my @insns = (&$body,&$body); # 48 instructions +my $base = "+2*$PUSH8(%rsp)"; + + &lea ("%rsp","-$PUSH8(%rsp)") if (($j%4)==0); + foreach (Xupdate_512_AVX()) { # 23 instructions + eval; + if ($_ !~ /\;$/) { + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + } + } + &vpaddq ($t2,@X[0],16*2*$j-0x80."($Tbl)"); + foreach (@insns) { eval; } # remaining instructions + &vmovdqa ((32*$j)%$PUSH8."(%rsp)",$t2); +} + + for ($i=0,$j=0; $j<8; $j++) { + &AVX2_512_00_47($j,\&bodyx_00_15,@X); + push(@X,shift(@X)); # rotate(@X) + } + &lea ($Tbl,16*2*$SZ."($Tbl)"); + &cmpb (($SZ-1-0x80)."($Tbl)",0); + &jne (".Lavx2_00_47"); + + for ($i=0; $i<16; ) { + my $base=$i<8?"+$PUSH8(%rsp)":"(%rsp)"; + foreach(bodyx_00_15()) { eval; } + } +} +$code.=<<___; + mov `2*$SZ*$rounds`(%rsp),$ctx # $_ctx + add $a1,$A + #mov `2*$SZ*$rounds+8`(%rsp),$inp # $_inp + lea `2*$SZ*($rounds-8)`(%rsp),$Tbl + + add $SZ*0($ctx),$A + add $SZ*1($ctx),$B + add $SZ*2($ctx),$C + add $SZ*3($ctx),$D + add $SZ*4($ctx),$E + add $SZ*5($ctx),$F + add $SZ*6($ctx),$G + add $SZ*7($ctx),$H + + mov $A,$SZ*0($ctx) + mov $B,$SZ*1($ctx) + mov $C,$SZ*2($ctx) + mov $D,$SZ*3($ctx) + mov $E,$SZ*4($ctx) + mov $F,$SZ*5($ctx) + mov $G,$SZ*6($ctx) + mov $H,$SZ*7($ctx) + + cmp `$PUSH8+2*8`($Tbl),$inp # $_end + je .Ldone_avx2 + + xor $a1,$a1 + mov $B,$a3 + xor $C,$a3 # magic + mov $F,$a4 + jmp .Lower_avx2 +.align 16 +.Lower_avx2: +___ + for ($i=0; $i<8; ) { + my $base="+16($Tbl)"; + foreach(bodyx_00_15()) { eval; } + } +$code.=<<___; + lea -$PUSH8($Tbl),$Tbl + cmp %rsp,$Tbl + jae .Lower_avx2 + + mov `2*$SZ*$rounds`(%rsp),$ctx # $_ctx + add $a1,$A + #mov `2*$SZ*$rounds+8`(%rsp),$inp # $_inp + lea `2*$SZ*($rounds-8)`(%rsp),%rsp + + add $SZ*0($ctx),$A + add $SZ*1($ctx),$B + add $SZ*2($ctx),$C + add $SZ*3($ctx),$D + add $SZ*4($ctx),$E + add $SZ*5($ctx),$F + lea `2*16*$SZ`($inp),$inp # inp+=2 + add $SZ*6($ctx),$G + mov $inp,%r12 + add $SZ*7($ctx),$H + cmp $_end,$inp + + mov $A,$SZ*0($ctx) + cmove %rsp,%r12 # next block or stale data + mov $B,$SZ*1($ctx) + mov $C,$SZ*2($ctx) + mov $D,$SZ*3($ctx) + mov $E,$SZ*4($ctx) + mov $F,$SZ*5($ctx) + mov $G,$SZ*6($ctx) + mov $H,$SZ*7($ctx) + + jbe .Loop_avx2 + lea (%rsp),$Tbl + +.Ldone_avx2: + lea ($Tbl),%rsp + mov $_rsp,%rsi + vzeroupper +___ +$code.=<<___ if ($win64); + movaps 16*$SZ+32(%rsp),%xmm6 + movaps 16*$SZ+48(%rsp),%xmm7 + movaps 16*$SZ+64(%rsp),%xmm8 + movaps 16*$SZ+80(%rsp),%xmm9 +___ +$code.=<<___ if ($win64 && $SZ>4); + movaps 16*$SZ+96(%rsp),%xmm10 + movaps 16*$SZ+112(%rsp),%xmm11 +___ +$code.=<<___; + mov (%rsi),%r15 + mov 8(%rsi),%r14 + mov 16(%rsi),%r13 + mov 24(%rsi),%r12 + mov 32(%rsi),%rbp + mov 40(%rsi),%rbx + lea 48(%rsi),%rsp +.Lepilogue_avx2: + ret +.size ${func}_avx2,.-${func}_avx2 +___ +}} +}}}}} + +# EXCEPTION_DISPOSITION handler (EXCEPTION_RECORD *rec,ULONG64 frame, +# CONTEXT *context,DISPATCHER_CONTEXT *disp) +if ($win64) { +$rec="%rcx"; +$frame="%rdx"; +$context="%r8"; +$disp="%r9"; + +$code.=<<___; +.extern __imp_RtlVirtualUnwind +.type se_handler,\@abi-omnipotent +.align 16 +se_handler: + push %rsi + push %rdi + push %rbx + push %rbp + push %r12 + push %r13 + push %r14 + push %r15 + pushfq + sub \$64,%rsp + + mov 120($context),%rax # pull context->Rax + mov 248($context),%rbx # pull context->Rip + + mov 8($disp),%rsi # disp->ImageBase + mov 56($disp),%r11 # disp->HanderlData + + mov 0(%r11),%r10d # HandlerData[0] + lea (%rsi,%r10),%r10 # prologue label + cmp %r10,%rbx # context->RipRsp + + mov 4(%r11),%r10d # HandlerData[1] + lea (%rsi,%r10),%r10 # epilogue label + cmp %r10,%rbx # context->Rip>=epilogue label + jae .Lin_prologue +___ +$code.=<<___ if ($avx>1); + lea .Lavx2_shortcut(%rip),%r10 + cmp %r10,%rbx # context->RipRbx + mov %rbp,160($context) # restore context->Rbp + mov %r12,216($context) # restore context->R12 + mov %r13,224($context) # restore context->R13 + mov %r14,232($context) # restore context->R14 + mov %r15,240($context) # restore context->R15 + + lea .Lepilogue(%rip),%r10 + cmp %r10,%rbx + jb .Lin_prologue # non-AVX code + + lea 16*$SZ+4*8(%rsi),%rsi # Xmm6- save area + lea 512($context),%rdi # &context.Xmm6 + mov \$`$SZ==4?8:12`,%ecx + .long 0xa548f3fc # cld; rep movsq + +.Lin_prologue: + mov 8(%rax),%rdi + mov 16(%rax),%rsi + mov %rax,152($context) # restore context->Rsp + mov %rsi,168($context) # restore context->Rsi + mov %rdi,176($context) # restore context->Rdi + + mov 40($disp),%rdi # disp->ContextRecord + mov $context,%rsi # context + mov \$154,%ecx # sizeof(CONTEXT) + .long 0xa548f3fc # cld; rep movsq + + mov $disp,%rsi + xor %rcx,%rcx # arg1, UNW_FLAG_NHANDLER + mov 8(%rsi),%rdx # arg2, disp->ImageBase + mov 0(%rsi),%r8 # arg3, disp->ControlPc + mov 16(%rsi),%r9 # arg4, disp->FunctionEntry + mov 40(%rsi),%r10 # disp->ContextRecord + lea 56(%rsi),%r11 # &disp->HandlerData lea 24(%rsi),%r12 # &disp->EstablisherFrame mov %r10,32(%rsp) # arg5 mov %r11,40(%rsp) # arg6 @@ -417,21 +2263,136 @@ () pop %rsi ret .size se_handler,.-se_handler +___ + +$code.=<<___ if ($SZ==4 && $shaext); +.type shaext_handler,\@abi-omnipotent +.align 16 +shaext_handler: + push %rsi + push %rdi + push %rbx + push %rbp + push %r12 + push %r13 + push %r14 + push %r15 + pushfq + sub \$64,%rsp + + mov 120($context),%rax # pull context->Rax + mov 248($context),%rbx # pull context->Rip + + lea .Lprologue_shaext(%rip),%r10 + cmp %r10,%rbx # context->Rip<.Lprologue + jb .Lin_prologue + + lea .Lepilogue_shaext(%rip),%r10 + cmp %r10,%rbx # context->Rip>=.Lepilogue + jae .Lin_prologue + + lea -8-5*16(%rax),%rsi + lea 512($context),%rdi # &context.Xmm6 + mov \$10,%ecx + .long 0xa548f3fc # cld; rep movsq + + jmp .Lin_prologue +.size shaext_handler,.-shaext_handler +___ +$code.=<<___; .section .pdata .align 4 .rva .LSEH_begin_$func .rva .LSEH_end_$func .rva .LSEH_info_$func - +___ +$code.=<<___ if ($SZ==4 && $shaext); + .rva .LSEH_begin_${func}_shaext + .rva .LSEH_end_${func}_shaext + .rva .LSEH_info_${func}_shaext +___ +$code.=<<___ if ($SZ==4); + .rva .LSEH_begin_${func}_ssse3 + .rva .LSEH_end_${func}_ssse3 + .rva .LSEH_info_${func}_ssse3 +___ +$code.=<<___ if ($avx && $SZ==8); + .rva .LSEH_begin_${func}_xop + .rva .LSEH_end_${func}_xop + .rva .LSEH_info_${func}_xop +___ +$code.=<<___ if ($avx); + .rva .LSEH_begin_${func}_avx + .rva .LSEH_end_${func}_avx + .rva .LSEH_info_${func}_avx +___ +$code.=<<___ if ($avx>1); + .rva .LSEH_begin_${func}_avx2 + .rva .LSEH_end_${func}_avx2 + .rva .LSEH_info_${func}_avx2 +___ +$code.=<<___; .section .xdata .align 8 .LSEH_info_$func: .byte 9,0,0,0 .rva se_handler + .rva .Lprologue,.Lepilogue # HandlerData[] +___ +$code.=<<___ if ($SZ==4 && $shaext); +.LSEH_info_${func}_shaext: + .byte 9,0,0,0 + .rva shaext_handler ___ +$code.=<<___ if ($SZ==4); +.LSEH_info_${func}_ssse3: + .byte 9,0,0,0 + .rva se_handler + .rva .Lprologue_ssse3,.Lepilogue_ssse3 # HandlerData[] +___ +$code.=<<___ if ($avx && $SZ==8); +.LSEH_info_${func}_xop: + .byte 9,0,0,0 + .rva se_handler + .rva .Lprologue_xop,.Lepilogue_xop # HandlerData[] +___ +$code.=<<___ if ($avx); +.LSEH_info_${func}_avx: + .byte 9,0,0,0 + .rva se_handler + .rva .Lprologue_avx,.Lepilogue_avx # HandlerData[] +___ +$code.=<<___ if ($avx>1); +.LSEH_info_${func}_avx2: + .byte 9,0,0,0 + .rva se_handler + .rva .Lprologue_avx2,.Lepilogue_avx2 # HandlerData[] +___ +} + +sub sha256op38 { + my $instr = shift; + my %opcodelet = ( + "sha256rnds2" => 0xcb, + "sha256msg1" => 0xcc, + "sha256msg2" => 0xcd ); + + if (defined($opcodelet{$instr}) && @_[0] =~ /%xmm([0-7]),\s*%xmm([0-7])/) { + my @opcode=(0x0f,0x38); + push @opcode,$opcodelet{$instr}; + push @opcode,0xc0|($1&7)|(($2&7)<<3); # ModR/M + return ".byte\t".join(',',@opcode); + } else { + return $instr."\t".@_[0]; + } } -$code =~ s/\`([^\`]*)\`/eval $1/gem; -print $code; +foreach (split("\n",$code)) { + s/\`([^\`]*)\`/eval $1/geo; + + s/\b(sha256[^\s]*)\s+(.*)/sha256op38($1,$2)/geo; + + print $_,"\n"; +} close STDOUT; diff --git a/deps/openssl/openssl/crypto/sha/asm/sha512p8-ppc.pl b/deps/openssl/openssl/crypto/sha/asm/sha512p8-ppc.pl new file mode 100755 index 00000000000000..47189502c6cc95 --- /dev/null +++ b/deps/openssl/openssl/crypto/sha/asm/sha512p8-ppc.pl @@ -0,0 +1,424 @@ +#!/usr/bin/env perl + +# ==================================================================== +# Written by Andy Polyakov for the OpenSSL +# project. The module is, however, dual licensed under OpenSSL and +# CRYPTOGAMS licenses depending on where you obtain it. For further +# details see http://www.openssl.org/~appro/cryptogams/. +# ==================================================================== + +# SHA256/512 for PowerISA v2.07. +# +# Accurate performance measurements are problematic, because it's +# always virtualized setup with possibly throttled processor. +# Relative comparison is therefore more informative. This module is +# ~60% faster than integer-only sha512-ppc.pl. To anchor to something +# else, SHA256 is 24% slower than sha1-ppc.pl and 2.5x slower than +# hardware-assisted aes-128-cbc encrypt. SHA512 is 20% faster than +# sha1-ppc.pl and 1.6x slower than aes-128-cbc. Another interesting +# result is degree of computational resources' utilization. POWER8 is +# "massively multi-threaded chip" and difference between single- and +# maximum multi-process benchmark results tells that utlization is +# whooping 94%. For sha512-ppc.pl we get [not unimpressive] 84% and +# for sha1-ppc.pl - 73%. 100% means that multi-process result equals +# to single-process one, given that all threads end up on the same +# physical core. + +$flavour=shift; +$output =shift; + +if ($flavour =~ /64/) { + $SIZE_T=8; + $LRSAVE=2*$SIZE_T; + $STU="stdu"; + $POP="ld"; + $PUSH="std"; +} elsif ($flavour =~ /32/) { + $SIZE_T=4; + $LRSAVE=$SIZE_T; + $STU="stwu"; + $POP="lwz"; + $PUSH="stw"; +} else { die "nonsense $flavour"; } + +$LENDIAN=($flavour=~/le/); + +$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; +( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or +( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or +die "can't locate ppc-xlate.pl"; + +open STDOUT,"| $^X $xlate $flavour $output" || die "can't call $xlate: $!"; + +if ($output =~ /512/) { + $bits=512; + $SZ=8; + $sz="d"; + $rounds=80; +} else { + $bits=256; + $SZ=4; + $sz="w"; + $rounds=64; +} + +$func="sha${bits}_block_p8"; +$FRAME=8*$SIZE_T; + +$sp ="r1"; +$toc="r2"; +$ctx="r3"; +$inp="r4"; +$num="r5"; +$Tbl="r6"; +$idx="r7"; +$lrsave="r8"; +$offload="r11"; +$vrsave="r12"; +($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,10,26..31)); + $x00=0 if ($flavour =~ /osx/); + +@V=($A,$B,$C,$D,$E,$F,$G,$H)=map("v$_",(0..7)); +@X=map("v$_",(8..23)); +($Ki,$Func,$S0,$S1,$s0,$s1,$lemask)=map("v$_",(24..31)); + +sub ROUND { +my ($i,$a,$b,$c,$d,$e,$f,$g,$h)=@_; +my $j=($i+1)%16; + +$code.=<<___ if ($i<15 && ($i%(16/$SZ))==(16/$SZ-1)); + lvx_u @X[$i+1],0,$inp ; load X[i] in advance + addi $inp,$inp,16 +___ +$code.=<<___ if ($i<16 && ($i%(16/$SZ))); + vsldoi @X[$i],@X[$i-1],@X[$i-1],$SZ +___ +$code.=<<___ if ($LENDIAN && $i<16 && ($i%(16/$SZ))==0); + vperm @X[$i],@X[$i],@X[$i],$lemask +___ +$code.=<<___; + `"vshasigma${sz} $s0,@X[($j+1)%16],0,0" if ($i>=15)` + vsel $Func,$g,$f,$e ; Ch(e,f,g) + vshasigma${sz} $S1,$e,1,15 ; Sigma1(e) + vaddu${sz}m $h,$h,@X[$i%16] ; h+=X[i] + vshasigma${sz} $S0,$a,1,0 ; Sigma0(a) + `"vshasigma${sz} $s1,@X[($j+14)%16],0,15" if ($i>=15)` + vaddu${sz}m $h,$h,$Func ; h+=Ch(e,f,g) + vxor $Func,$a,$b + `"vaddu${sz}m @X[$j],@X[$j],@X[($j+9)%16]" if ($i>=15)` + vaddu${sz}m $h,$h,$S1 ; h+=Sigma1(e) + vsel $Func,$b,$c,$Func ; Maj(a,b,c) + vaddu${sz}m $g,$g,$Ki ; future h+=K[i] + vaddu${sz}m $d,$d,$h ; d+=h + vaddu${sz}m $S0,$S0,$Func ; Sigma0(a)+Maj(a,b,c) + `"vaddu${sz}m @X[$j],@X[$j],$s0" if ($i>=15)` + lvx $Ki,$idx,$Tbl ; load next K[i] + addi $idx,$idx,16 + vaddu${sz}m $h,$h,$S0 ; h+=Sigma0(a)+Maj(a,b,c) + `"vaddu${sz}m @X[$j],@X[$j],$s1" if ($i>=15)` +___ +} + +$code=<<___; +.machine "any" +.text + +.globl $func +.align 6 +$func: + $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp) + mflr $lrsave + li r10,`$FRAME+8*16+15` + li r11,`$FRAME+8*16+31` + stvx v20,r10,$sp # ABI says so + addi r10,r10,32 + mfspr $vrsave,256 + stvx v21,r11,$sp + addi r11,r11,32 + stvx v22,r10,$sp + addi r10,r10,32 + stvx v23,r11,$sp + addi r11,r11,32 + stvx v24,r10,$sp + addi r10,r10,32 + stvx v25,r11,$sp + addi r11,r11,32 + stvx v26,r10,$sp + addi r10,r10,32 + stvx v27,r11,$sp + addi r11,r11,32 + stvx v28,r10,$sp + addi r10,r10,32 + stvx v29,r11,$sp + addi r11,r11,32 + stvx v30,r10,$sp + stvx v31,r11,$sp + li r11,-1 + stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave + li $x10,0x10 + $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp) + li $x20,0x20 + $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp) + li $x30,0x30 + $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp) + li $x40,0x40 + $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp) + li $x50,0x50 + $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp) + li $x60,0x60 + $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp) + li $x70,0x70 + $PUSH $lrsave,`$FRAME+21*16+6*$SIZE_T+$LRSAVE`($sp) + mtspr 256,r11 + + bl LPICmeup + addi $offload,$sp,$FRAME+15 +___ +$code.=<<___ if ($LENDIAN); + li $idx,8 + lvsl $lemask,0,$idx + vspltisb $Ki,0x0f + vxor $lemask,$lemask,$Ki +___ +$code.=<<___ if ($SZ==4); + lvx_4w $A,$x00,$ctx + lvx_4w $E,$x10,$ctx + vsldoi $B,$A,$A,4 # unpack + vsldoi $C,$A,$A,8 + vsldoi $D,$A,$A,12 + vsldoi $F,$E,$E,4 + vsldoi $G,$E,$E,8 + vsldoi $H,$E,$E,12 +___ +$code.=<<___ if ($SZ==8); + lvx_u $A,$x00,$ctx + lvx_u $C,$x10,$ctx + lvx_u $E,$x20,$ctx + vsldoi $B,$A,$A,8 # unpack + lvx_u $G,$x30,$ctx + vsldoi $D,$C,$C,8 + vsldoi $F,$E,$E,8 + vsldoi $H,$G,$G,8 +___ +$code.=<<___; + li r0,`($rounds-16)/16` # inner loop counter + b Loop +.align 5 +Loop: + lvx $Ki,$x00,$Tbl + li $idx,16 + lvx_u @X[0],0,$inp + addi $inp,$inp,16 + stvx $A,$x00,$offload # offload $A-$H + stvx $B,$x10,$offload + stvx $C,$x20,$offload + stvx $D,$x30,$offload + stvx $E,$x40,$offload + stvx $F,$x50,$offload + stvx $G,$x60,$offload + stvx $H,$x70,$offload + vaddu${sz}m $H,$H,$Ki # h+K[i] + lvx $Ki,$idx,$Tbl + addi $idx,$idx,16 +___ +for ($i=0;$i<16;$i++) { &ROUND($i,@V); unshift(@V,pop(@V)); } +$code.=<<___; + mtctr r0 + b L16_xx +.align 5 +L16_xx: +___ +for (;$i<32;$i++) { &ROUND($i,@V); unshift(@V,pop(@V)); } +$code.=<<___; + bdnz L16_xx + + lvx @X[2],$x00,$offload + subic. $num,$num,1 + lvx @X[3],$x10,$offload + vaddu${sz}m $A,$A,@X[2] + lvx @X[4],$x20,$offload + vaddu${sz}m $B,$B,@X[3] + lvx @X[5],$x30,$offload + vaddu${sz}m $C,$C,@X[4] + lvx @X[6],$x40,$offload + vaddu${sz}m $D,$D,@X[5] + lvx @X[7],$x50,$offload + vaddu${sz}m $E,$E,@X[6] + lvx @X[8],$x60,$offload + vaddu${sz}m $F,$F,@X[7] + lvx @X[9],$x70,$offload + vaddu${sz}m $G,$G,@X[8] + vaddu${sz}m $H,$H,@X[9] + bne Loop +___ +$code.=<<___ if ($SZ==4); + lvx @X[0],$idx,$Tbl + addi $idx,$idx,16 + vperm $A,$A,$B,$Ki # pack the answer + lvx @X[1],$idx,$Tbl + vperm $E,$E,$F,$Ki + vperm $A,$A,$C,@X[0] + vperm $E,$E,$G,@X[0] + vperm $A,$A,$D,@X[1] + vperm $E,$E,$H,@X[1] + stvx_4w $A,$x00,$ctx + stvx_4w $E,$x10,$ctx +___ +$code.=<<___ if ($SZ==8); + vperm $A,$A,$B,$Ki # pack the answer + vperm $C,$C,$D,$Ki + vperm $E,$E,$F,$Ki + vperm $G,$G,$H,$Ki + stvx_u $A,$x00,$ctx + stvx_u $C,$x10,$ctx + stvx_u $E,$x20,$ctx + stvx_u $G,$x30,$ctx +___ +$code.=<<___; + li r10,`$FRAME+8*16+15` + mtlr $lrsave + li r11,`$FRAME+8*16+31` + mtspr 256,$vrsave + lvx v20,r10,$sp # ABI says so + addi r10,r10,32 + lvx v21,r11,$sp + addi r11,r11,32 + lvx v22,r10,$sp + addi r10,r10,32 + lvx v23,r11,$sp + addi r11,r11,32 + lvx v24,r10,$sp + addi r10,r10,32 + lvx v25,r11,$sp + addi r11,r11,32 + lvx v26,r10,$sp + addi r10,r10,32 + lvx v27,r11,$sp + addi r11,r11,32 + lvx v28,r10,$sp + addi r10,r10,32 + lvx v29,r11,$sp + addi r11,r11,32 + lvx v30,r10,$sp + lvx v31,r11,$sp + $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp) + $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp) + $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp) + $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp) + $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp) + $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp) + addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T` + blr + .long 0 + .byte 0,12,4,1,0x80,6,3,0 + .long 0 +.size $func,.-$func +___ + +# Ugly hack here, because PPC assembler syntax seem to vary too +# much from platforms to platform... +$code.=<<___; +.align 6 +LPICmeup: + mflr r0 + bcl 20,31,\$+4 + mflr $Tbl ; vvvvvv "distance" between . and 1st data entry + addi $Tbl,$Tbl,`64-8` + mtlr r0 + blr + .long 0 + .byte 0,12,0x14,0,0,0,0,0 + .space `64-9*4` +___ + +if ($SZ==8) { + local *table = sub { + foreach(@_) { $code.=".quad $_,$_\n"; } + }; + table( + "0x428a2f98d728ae22","0x7137449123ef65cd", + "0xb5c0fbcfec4d3b2f","0xe9b5dba58189dbbc", + "0x3956c25bf348b538","0x59f111f1b605d019", + "0x923f82a4af194f9b","0xab1c5ed5da6d8118", + "0xd807aa98a3030242","0x12835b0145706fbe", + "0x243185be4ee4b28c","0x550c7dc3d5ffb4e2", + "0x72be5d74f27b896f","0x80deb1fe3b1696b1", + "0x9bdc06a725c71235","0xc19bf174cf692694", + "0xe49b69c19ef14ad2","0xefbe4786384f25e3", + "0x0fc19dc68b8cd5b5","0x240ca1cc77ac9c65", + "0x2de92c6f592b0275","0x4a7484aa6ea6e483", + "0x5cb0a9dcbd41fbd4","0x76f988da831153b5", + "0x983e5152ee66dfab","0xa831c66d2db43210", + "0xb00327c898fb213f","0xbf597fc7beef0ee4", + "0xc6e00bf33da88fc2","0xd5a79147930aa725", + "0x06ca6351e003826f","0x142929670a0e6e70", + "0x27b70a8546d22ffc","0x2e1b21385c26c926", + "0x4d2c6dfc5ac42aed","0x53380d139d95b3df", + "0x650a73548baf63de","0x766a0abb3c77b2a8", + "0x81c2c92e47edaee6","0x92722c851482353b", + "0xa2bfe8a14cf10364","0xa81a664bbc423001", + "0xc24b8b70d0f89791","0xc76c51a30654be30", + "0xd192e819d6ef5218","0xd69906245565a910", + "0xf40e35855771202a","0x106aa07032bbd1b8", + "0x19a4c116b8d2d0c8","0x1e376c085141ab53", + "0x2748774cdf8eeb99","0x34b0bcb5e19b48a8", + "0x391c0cb3c5c95a63","0x4ed8aa4ae3418acb", + "0x5b9cca4f7763e373","0x682e6ff3d6b2b8a3", + "0x748f82ee5defb2fc","0x78a5636f43172f60", + "0x84c87814a1f0ab72","0x8cc702081a6439ec", + "0x90befffa23631e28","0xa4506cebde82bde9", + "0xbef9a3f7b2c67915","0xc67178f2e372532b", + "0xca273eceea26619c","0xd186b8c721c0c207", + "0xeada7dd6cde0eb1e","0xf57d4f7fee6ed178", + "0x06f067aa72176fba","0x0a637dc5a2c898a6", + "0x113f9804bef90dae","0x1b710b35131c471b", + "0x28db77f523047d84","0x32caab7b40c72493", + "0x3c9ebe0a15c9bebc","0x431d67c49c100d4c", + "0x4cc5d4becb3e42b6","0x597f299cfc657e2a", + "0x5fcb6fab3ad6faec","0x6c44198c4a475817","0"); +$code.=<<___ if (!$LENDIAN); +.quad 0x0001020304050607,0x1011121314151617 +___ +$code.=<<___ if ($LENDIAN); # quad-swapped +.quad 0x1011121314151617,0x0001020304050607 +___ +} else { + local *table = sub { + foreach(@_) { $code.=".long $_,$_,$_,$_\n"; } + }; + table( + "0x428a2f98","0x71374491","0xb5c0fbcf","0xe9b5dba5", + "0x3956c25b","0x59f111f1","0x923f82a4","0xab1c5ed5", + "0xd807aa98","0x12835b01","0x243185be","0x550c7dc3", + "0x72be5d74","0x80deb1fe","0x9bdc06a7","0xc19bf174", + "0xe49b69c1","0xefbe4786","0x0fc19dc6","0x240ca1cc", + "0x2de92c6f","0x4a7484aa","0x5cb0a9dc","0x76f988da", + "0x983e5152","0xa831c66d","0xb00327c8","0xbf597fc7", + "0xc6e00bf3","0xd5a79147","0x06ca6351","0x14292967", + "0x27b70a85","0x2e1b2138","0x4d2c6dfc","0x53380d13", + "0x650a7354","0x766a0abb","0x81c2c92e","0x92722c85", + "0xa2bfe8a1","0xa81a664b","0xc24b8b70","0xc76c51a3", + "0xd192e819","0xd6990624","0xf40e3585","0x106aa070", + "0x19a4c116","0x1e376c08","0x2748774c","0x34b0bcb5", + "0x391c0cb3","0x4ed8aa4a","0x5b9cca4f","0x682e6ff3", + "0x748f82ee","0x78a5636f","0x84c87814","0x8cc70208", + "0x90befffa","0xa4506ceb","0xbef9a3f7","0xc67178f2","0"); +$code.=<<___ if (!$LENDIAN); +.long 0x00010203,0x10111213,0x10111213,0x10111213 +.long 0x00010203,0x04050607,0x10111213,0x10111213 +.long 0x00010203,0x04050607,0x08090a0b,0x10111213 +___ +$code.=<<___ if ($LENDIAN); # word-swapped +.long 0x10111213,0x10111213,0x10111213,0x00010203 +.long 0x10111213,0x10111213,0x04050607,0x00010203 +.long 0x10111213,0x08090a0b,0x04050607,0x00010203 +___ +} +$code.=<<___; +.asciz "SHA${bits} for PowerISA 2.07, CRYPTOGAMS by " +.align 2 +___ + +$code =~ s/\`([^\`]*)\`/eval $1/gem; +print $code; +close STDOUT; diff --git a/deps/openssl/openssl/crypto/sha/sha512.c b/deps/openssl/openssl/crypto/sha/sha512.c index de0aad8c9dfa2d..3bf66ae1987ef2 100644 --- a/deps/openssl/openssl/crypto/sha/sha512.c +++ b/deps/openssl/openssl/crypto/sha/sha512.c @@ -55,6 +55,7 @@ const char SHA512_version[] = "SHA-512" OPENSSL_VERSION_PTEXT; # if defined(__i386) || defined(__i386__) || defined(_M_IX86) || \ defined(__x86_64) || defined(_M_AMD64) || defined(_M_X64) || \ defined(__s390__) || defined(__s390x__) || \ + defined(__aarch64__) || \ defined(SHA512_ASM) # define SHA512_BLOCK_CAN_MANAGE_UNALIGNED_DATA # endif @@ -353,6 +354,18 @@ static const SHA_LONG64 K512[80] = { asm ("rotrdi %0,%1,%2" \ : "=r"(ret) \ : "r"(a),"K"(n)); ret; }) +# elif defined(__aarch64__) +# define ROTR(a,n) ({ SHA_LONG64 ret; \ + asm ("ror %0,%1,%2" \ + : "=r"(ret) \ + : "r"(a),"I"(n)); ret; }) +# if defined(__BYTE_ORDER__) && defined(__ORDER_LITTLE_ENDIAN__) && \ + __BYTE_ORDER__==__ORDER_LITTLE_ENDIAN__ +# define PULL64(x) ({ SHA_LONG64 ret; \ + asm ("rev %0,%1" \ + : "=r"(ret) \ + : "r"(*((const SHA_LONG64 *)(&(x))))); ret; }) +# endif # endif # elif defined(_MSC_VER) # if defined(_WIN64) /* applies to both IA-64 and AMD64 */ diff --git a/deps/openssl/openssl/crypto/sparc_arch.h b/deps/openssl/openssl/crypto/sparc_arch.h new file mode 100644 index 00000000000000..e30d322a4ae219 --- /dev/null +++ b/deps/openssl/openssl/crypto/sparc_arch.h @@ -0,0 +1,101 @@ +#ifndef __SPARC_ARCH_H__ +# define __SPARC_ARCH_H__ + +# define SPARCV9_TICK_PRIVILEGED (1<<0) +# define SPARCV9_PREFER_FPU (1<<1) +# define SPARCV9_VIS1 (1<<2) +# define SPARCV9_VIS2 (1<<3)/* reserved */ +# define SPARCV9_FMADD (1<<4)/* reserved for SPARC64 V */ +# define SPARCV9_BLK (1<<5)/* VIS1 block copy */ +# define SPARCV9_VIS3 (1<<6) +# define SPARCV9_RANDOM (1<<7) +# define SPARCV9_64BIT_STACK (1<<8) + +/* + * OPENSSL_sparcv9cap_P[1] is copy of Compatibility Feature Register, + * %asr26, SPARC-T4 and later. There is no SPARCV9_CFR bit in + * OPENSSL_sparcv9cap_P[0], as %cfr copy is sufficient... + */ +# define CFR_AES 0x00000001/* Supports AES opcodes */ +# define CFR_DES 0x00000002/* Supports DES opcodes */ +# define CFR_KASUMI 0x00000004/* Supports KASUMI opcodes */ +# define CFR_CAMELLIA 0x00000008/* Supports CAMELLIA opcodes */ +# define CFR_MD5 0x00000010/* Supports MD5 opcodes */ +# define CFR_SHA1 0x00000020/* Supports SHA1 opcodes */ +# define CFR_SHA256 0x00000040/* Supports SHA256 opcodes */ +# define CFR_SHA512 0x00000080/* Supports SHA512 opcodes */ +# define CFR_MPMUL 0x00000100/* Supports MPMUL opcodes */ +# define CFR_MONTMUL 0x00000200/* Supports MONTMUL opcodes */ +# define CFR_MONTSQR 0x00000400/* Supports MONTSQR opcodes */ +# define CFR_CRC32C 0x00000800/* Supports CRC32C opcodes */ + +# if defined(OPENSSL_PIC) && !defined(__PIC__) +# define __PIC__ +# endif + +# if defined(__SUNPRO_C) && defined(__sparcv9) && !defined(__arch64__) +# define __arch64__ +# endif + +# define SPARC_PIC_THUNK(reg) \ + .align 32; \ +.Lpic_thunk: \ + jmp %o7 + 8; \ + add %o7, reg, reg; + +# define SPARC_PIC_THUNK_CALL(reg) \ + sethi %hi(_GLOBAL_OFFSET_TABLE_-4), reg; \ + call .Lpic_thunk; \ + or reg, %lo(_GLOBAL_OFFSET_TABLE_+4), reg; + +# if 1 +# define SPARC_SETUP_GOT_REG(reg) SPARC_PIC_THUNK_CALL(reg) +# else +# define SPARC_SETUP_GOT_REG(reg) \ + sethi %hi(_GLOBAL_OFFSET_TABLE_-4), reg; \ + call .+8; \ + or reg,%lo(_GLOBAL_OFFSET_TABLE_+4), reg; \ + add %o7, reg, reg +# endif + +# if defined(__arch64__) + +# define SPARC_LOAD_ADDRESS(SYM, reg) \ + setx SYM, %o7, reg; +# define LDPTR ldx +# define SIZE_T_CC %xcc +# define STACK_FRAME 192 +# define STACK_BIAS 2047 +# define STACK_7thARG (STACK_BIAS+176) + +# else + +# define SPARC_LOAD_ADDRESS(SYM, reg) \ + set SYM, reg; +# define LDPTR ld +# define SIZE_T_CC %icc +# define STACK_FRAME 112 +# define STACK_BIAS 0 +# define STACK_7thARG 92 +# define SPARC_LOAD_ADDRESS_LEAF(SYM,reg,tmp) SPARC_LOAD_ADDRESS(SYM,reg) + +# endif + +# ifdef __PIC__ +# undef SPARC_LOAD_ADDRESS +# undef SPARC_LOAD_ADDRESS_LEAF +# define SPARC_LOAD_ADDRESS(SYM, reg) \ + SPARC_SETUP_GOT_REG(reg); \ + sethi %hi(SYM), %o7; \ + or %o7, %lo(SYM), %o7; \ + LDPTR [reg + %o7], reg; +# endif + +# ifndef SPARC_LOAD_ADDRESS_LEAF +# define SPARC_LOAD_ADDRESS_LEAF(SYM, reg, tmp) \ + mov %o7, tmp; \ + SPARC_LOAD_ADDRESS(SYM, reg) \ + mov tmp, %o7; +# endif + +#endif /* __SPARC_ARCH_H__ */ diff --git a/deps/openssl/openssl/crypto/sparccpuid.S b/deps/openssl/openssl/crypto/sparccpuid.S index 0cc247e489719f..eea2006fba18f6 100644 --- a/deps/openssl/openssl/crypto/sparccpuid.S +++ b/deps/openssl/openssl/crypto/sparccpuid.S @@ -251,6 +251,11 @@ _sparcv9_vis1_probe: ! UltraSPARC IIe 7 ! UltraSPARC III 7 ! UltraSPARC T1 24 +! SPARC T4 65(*) +! +! (*) result has lesser to do with VIS instruction latencies, rdtick +! appears that slow, but it does the trick in sense that FP and +! VIS code paths are still slower than integer-only ones. ! ! Numbers for T2 and SPARC64 V-VII are more than welcomed. ! @@ -260,6 +265,8 @@ _sparcv9_vis1_probe: .global _sparcv9_vis1_instrument .align 8 _sparcv9_vis1_instrument: + .word 0x81b00d80 !fxor %f0,%f0,%f0 + .word 0x85b08d82 !fxor %f2,%f2,%f2 .word 0x91410000 !rd %tick,%o0 .word 0x81b00d80 !fxor %f0,%f0,%f0 .word 0x85b08d82 !fxor %f2,%f2,%f2 @@ -314,6 +321,30 @@ _sparcv9_fmadd_probe: .type _sparcv9_fmadd_probe,#function .size _sparcv9_fmadd_probe,.-_sparcv9_fmadd_probe +.global _sparcv9_rdcfr +.align 8 +_sparcv9_rdcfr: + retl + .word 0x91468000 !rd %asr26,%o0 +.type _sparcv9_rdcfr,#function +.size _sparcv9_rdcfr,.-_sparcv9_rdcfr + +.global _sparcv9_vis3_probe +.align 8 +_sparcv9_vis3_probe: + retl + .word 0x81b022a0 !xmulx %g0,%g0,%g0 +.type _sparcv9_vis3_probe,#function +.size _sparcv9_vis3_probe,.-_sparcv9_vis3_probe + +.global _sparcv9_random +.align 8 +_sparcv9_random: + retl + .word 0x91b002a0 !random %o0 +.type _sparcv9_random,#function +.size _sparcv9_random,.-_sparcv9_vis3_probe + .global OPENSSL_cleanse .align 32 OPENSSL_cleanse: @@ -397,6 +428,102 @@ OPENSSL_cleanse: .type OPENSSL_cleanse,#function .size OPENSSL_cleanse,.-OPENSSL_cleanse +.global _sparcv9_vis1_instrument_bus +.align 8 +_sparcv9_vis1_instrument_bus: + mov %o1,%o3 ! save cnt + .word 0x99410000 !rd %tick,%o4 ! tick + mov %o4,%o5 ! lasttick = tick + set 0,%g4 ! diff + + andn %o0,63,%g1 + .word 0xc1985e00 !ldda [%g1]0xf0,%f0 ! block load + .word 0x8143e040 !membar #Sync + .word 0xc1b85c00 !stda %f0,[%g1]0xe0 ! block store and commit + .word 0x8143e040 !membar #Sync + ld [%o0],%o4 + add %o4,%g4,%g4 + .word 0xc9e2100c !cas [%o0],%o4,%g4 + +.Loop: .word 0x99410000 !rd %tick,%o4 + sub %o4,%o5,%g4 ! diff=tick-lasttick + mov %o4,%o5 ! lasttick=tick + + andn %o0,63,%g1 + .word 0xc1985e00 !ldda [%g1]0xf0,%f0 ! block load + .word 0x8143e040 !membar #Sync + .word 0xc1b85c00 !stda %f0,[%g1]0xe0 ! block store and commit + .word 0x8143e040 !membar #Sync + ld [%o0],%o4 + add %o4,%g4,%g4 + .word 0xc9e2100c !cas [%o0],%o4,%g4 + subcc %o1,1,%o1 ! --$cnt + bnz .Loop + add %o0,4,%o0 ! ++$out + + retl + mov %o3,%o0 +.type _sparcv9_vis1_instrument_bus,#function +.size _sparcv9_vis1_instrument_bus,.-_sparcv9_vis1_instrument_bus + +.global _sparcv9_vis1_instrument_bus2 +.align 8 +_sparcv9_vis1_instrument_bus2: + mov %o1,%o3 ! save cnt + sll %o1,2,%o1 ! cnt*=4 + + .word 0x99410000 !rd %tick,%o4 ! tick + mov %o4,%o5 ! lasttick = tick + set 0,%g4 ! diff + + andn %o0,63,%g1 + .word 0xc1985e00 !ldda [%g1]0xf0,%f0 ! block load + .word 0x8143e040 !membar #Sync + .word 0xc1b85c00 !stda %f0,[%g1]0xe0 ! block store and commit + .word 0x8143e040 !membar #Sync + ld [%o0],%o4 + add %o4,%g4,%g4 + .word 0xc9e2100c !cas [%o0],%o4,%g4 + + .word 0x99410000 !rd %tick,%o4 ! tick + sub %o4,%o5,%g4 ! diff=tick-lasttick + mov %o4,%o5 ! lasttick=tick + mov %g4,%g5 ! lastdiff=diff +.Loop2: + andn %o0,63,%g1 + .word 0xc1985e00 !ldda [%g1]0xf0,%f0 ! block load + .word 0x8143e040 !membar #Sync + .word 0xc1b85c00 !stda %f0,[%g1]0xe0 ! block store and commit + .word 0x8143e040 !membar #Sync + ld [%o0],%o4 + add %o4,%g4,%g4 + .word 0xc9e2100c !cas [%o0],%o4,%g4 + + subcc %o2,1,%o2 ! --max + bz .Ldone2 + nop + + .word 0x99410000 !rd %tick,%o4 ! tick + sub %o4,%o5,%g4 ! diff=tick-lasttick + mov %o4,%o5 ! lasttick=tick + cmp %g4,%g5 + mov %g4,%g5 ! lastdiff=diff + + .word 0x83408000 !rd %ccr,%g1 + and %g1,4,%g1 ! isolate zero flag + xor %g1,4,%g1 ! flip zero flag + + subcc %o1,%g1,%o1 ! conditional --$cnt + bnz .Loop2 + add %o0,%g1,%o0 ! conditional ++$out + +.Ldone2: + srl %o1,2,%o1 + retl + sub %o3,%o1,%o0 +.type _sparcv9_vis1_instrument_bus2,#function +.size _sparcv9_vis1_instrument_bus2,.-_sparcv9_vis1_instrument_bus2 + .section ".init",#alloc,#execinstr call OPENSSL_cpuid_setup nop diff --git a/deps/openssl/openssl/crypto/sparcv9cap.c b/deps/openssl/openssl/crypto/sparcv9cap.c index d9f986f6b9684c..8bf2846929b177 100644 --- a/deps/openssl/openssl/crypto/sparcv9cap.c +++ b/deps/openssl/openssl/crypto/sparcv9cap.c @@ -4,30 +4,68 @@ #include #include #include +#include #include -#define SPARCV9_TICK_PRIVILEGED (1<<0) -#define SPARCV9_PREFER_FPU (1<<1) -#define SPARCV9_VIS1 (1<<2) -#define SPARCV9_VIS2 (1<<3) /* reserved */ -#define SPARCV9_FMADD (1<<4) /* reserved for SPARC64 V */ +#include "sparc_arch.h" -static int OPENSSL_sparcv9cap_P = SPARCV9_TICK_PRIVILEGED; +#if defined(__GNUC__) && defined(__linux) +__attribute__ ((visibility("hidden"))) +#endif +unsigned int OPENSSL_sparcv9cap_P[2] = { SPARCV9_TICK_PRIVILEGED, 0 }; int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np, const BN_ULONG *n0, int num) { + int bn_mul_mont_vis3(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, + const BN_ULONG *np, const BN_ULONG *n0, int num); int bn_mul_mont_fpu(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np, const BN_ULONG *n0, int num); int bn_mul_mont_int(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np, const BN_ULONG *n0, int num); - if (num >= 8 && !(num & 1) && - (OPENSSL_sparcv9cap_P & (SPARCV9_PREFER_FPU | SPARCV9_VIS1)) == - (SPARCV9_PREFER_FPU | SPARCV9_VIS1)) - return bn_mul_mont_fpu(rp, ap, bp, np, n0, num); - else - return bn_mul_mont_int(rp, ap, bp, np, n0, num); + if (!(num & 1) && num >= 6) { + if ((num & 15) == 0 && num <= 64 && + (OPENSSL_sparcv9cap_P[1] & (CFR_MONTMUL | CFR_MONTSQR)) == + (CFR_MONTMUL | CFR_MONTSQR)) { + typedef int (*bn_mul_mont_f) (BN_ULONG *rp, const BN_ULONG *ap, + const BN_ULONG *bp, + const BN_ULONG *np, + const BN_ULONG *n0); + int bn_mul_mont_t4_8(BN_ULONG *rp, const BN_ULONG *ap, + const BN_ULONG *bp, const BN_ULONG *np, + const BN_ULONG *n0); + int bn_mul_mont_t4_16(BN_ULONG *rp, const BN_ULONG *ap, + const BN_ULONG *bp, const BN_ULONG *np, + const BN_ULONG *n0); + int bn_mul_mont_t4_24(BN_ULONG *rp, const BN_ULONG *ap, + const BN_ULONG *bp, const BN_ULONG *np, + const BN_ULONG *n0); + int bn_mul_mont_t4_32(BN_ULONG *rp, const BN_ULONG *ap, + const BN_ULONG *bp, const BN_ULONG *np, + const BN_ULONG *n0); + static const bn_mul_mont_f funcs[4] = { + bn_mul_mont_t4_8, bn_mul_mont_t4_16, + bn_mul_mont_t4_24, bn_mul_mont_t4_32 + }; + bn_mul_mont_f worker = funcs[num / 16 - 1]; + + if ((*worker) (rp, ap, bp, np, n0)) + return 1; + /* retry once and fall back */ + if ((*worker) (rp, ap, bp, np, n0)) + return 1; + return bn_mul_mont_vis3(rp, ap, bp, np, n0, num); + } + if ((OPENSSL_sparcv9cap_P[0] & SPARCV9_VIS3)) + return bn_mul_mont_vis3(rp, ap, bp, np, n0, num); + else if (num >= 8 && + (OPENSSL_sparcv9cap_P[0] & + (SPARCV9_PREFER_FPU | SPARCV9_VIS1)) == + (SPARCV9_PREFER_FPU | SPARCV9_VIS1)) + return bn_mul_mont_fpu(rp, ap, bp, np, n0, num); + } + return bn_mul_mont_int(rp, ap, bp, np, n0, num); } unsigned long _sparcv9_rdtick(void); @@ -35,10 +73,15 @@ void _sparcv9_vis1_probe(void); unsigned long _sparcv9_vis1_instrument(void); void _sparcv9_vis2_probe(void); void _sparcv9_fmadd_probe(void); +unsigned long _sparcv9_rdcfr(void); +void _sparcv9_vis3_probe(void); +unsigned long _sparcv9_random(void); +size_t _sparcv9_vis1_instrument_bus(unsigned int *, size_t); +size_t _sparcv9_vis1_instrument_bus2(unsigned int *, size_t, size_t); unsigned long OPENSSL_rdtsc(void) { - if (OPENSSL_sparcv9cap_P & SPARCV9_TICK_PRIVILEGED) + if (OPENSSL_sparcv9cap_P[0] & SPARCV9_TICK_PRIVILEGED) #if defined(__sun) && defined(__SVR4) return gethrtime(); #else @@ -48,6 +91,24 @@ unsigned long OPENSSL_rdtsc(void) return _sparcv9_rdtick(); } +size_t OPENSSL_instrument_bus(unsigned int *out, size_t cnt) +{ + if ((OPENSSL_sparcv9cap_P[0] & (SPARCV9_TICK_PRIVILEGED | SPARCV9_BLK)) == + SPARCV9_BLK) + return _sparcv9_vis1_instrument_bus(out, cnt); + else + return 0; +} + +size_t OPENSSL_instrument_bus2(unsigned int *out, size_t cnt, size_t max) +{ + if ((OPENSSL_sparcv9cap_P[0] & (SPARCV9_TICK_PRIVILEGED | SPARCV9_BLK)) == + SPARCV9_BLK) + return _sparcv9_vis1_instrument_bus2(out, cnt, max); + else + return 0; +} + #if 0 && defined(__sun) && defined(__SVR4) /* * This code path is disabled, because of incompatibility of libdevinfo.so.1 @@ -74,17 +135,17 @@ static int walk_nodename(di_node_t node, di_node_name_t di_node_name) if (!strcmp(name, "SUNW,UltraSPARC") || /* covers II,III,IV */ !strncmp(name, "SUNW,UltraSPARC-I", 17)) { - OPENSSL_sparcv9cap_P |= SPARCV9_PREFER_FPU | SPARCV9_VIS1; + OPENSSL_sparcv9cap_P[0] |= SPARCV9_PREFER_FPU | SPARCV9_VIS1; /* %tick is privileged only on UltraSPARC-I/II, but not IIe */ if (name[14] != '\0' && name[17] != '\0' && name[18] != '\0') - OPENSSL_sparcv9cap_P &= ~SPARCV9_TICK_PRIVILEGED; + OPENSSL_sparcv9cap_P[0] &= ~SPARCV9_TICK_PRIVILEGED; return DI_WALK_TERMINATE; } /* This is expected to catch remaining UltraSPARCs, such as T1 */ else if (!strncmp(name, "SUNW,UltraSPARC", 15)) { - OPENSSL_sparcv9cap_P &= ~SPARCV9_TICK_PRIVILEGED; + OPENSSL_sparcv9cap_P[0] &= ~SPARCV9_TICK_PRIVILEGED; return DI_WALK_TERMINATE; } @@ -103,22 +164,22 @@ void OPENSSL_cpuid_setup(void) trigger = 1; if ((e = getenv("OPENSSL_sparcv9cap"))) { - OPENSSL_sparcv9cap_P = strtoul(e, NULL, 0); + OPENSSL_sparcv9cap_P[0] = strtoul(e, NULL, 0); return; } if (sysinfo(SI_MACHINE, si, sizeof(si)) > 0) { if (strcmp(si, "sun4v")) /* FPU is preferred for all CPUs, but US-T1/2 */ - OPENSSL_sparcv9cap_P |= SPARCV9_PREFER_FPU; + OPENSSL_sparcv9cap_P[0] |= SPARCV9_PREFER_FPU; } if (sysinfo(SI_ISALIST, si, sizeof(si)) > 0) { if (strstr(si, "+vis")) - OPENSSL_sparcv9cap_P |= SPARCV9_VIS1; + OPENSSL_sparcv9cap_P[0] |= SPARCV9_VIS1 | SPARCV9_BLK; if (strstr(si, "+vis2")) { - OPENSSL_sparcv9cap_P |= SPARCV9_VIS2; - OPENSSL_sparcv9cap_P &= ~SPARCV9_TICK_PRIVILEGED; + OPENSSL_sparcv9cap_P[0] |= SPARCV9_VIS2; + OPENSSL_sparcv9cap_P[0] &= ~SPARCV9_TICK_PRIVILEGED; return; } } @@ -188,12 +249,14 @@ void OPENSSL_cpuid_setup(void) trigger = 1; if ((e = getenv("OPENSSL_sparcv9cap"))) { - OPENSSL_sparcv9cap_P = strtoul(e, NULL, 0); + OPENSSL_sparcv9cap_P[0] = strtoul(e, NULL, 0); + if ((e = strchr(e, ':'))) + OPENSSL_sparcv9cap_P[1] = strtoul(e + 1, NULL, 0); return; } /* Initial value, fits UltraSPARC-I&II... */ - OPENSSL_sparcv9cap_P = SPARCV9_PREFER_FPU | SPARCV9_TICK_PRIVILEGED; + OPENSSL_sparcv9cap_P[0] = SPARCV9_PREFER_FPU | SPARCV9_TICK_PRIVILEGED; sigfillset(&all_masked); sigdelset(&all_masked, SIGILL); @@ -216,30 +279,68 @@ void OPENSSL_cpuid_setup(void) if (sigsetjmp(common_jmp, 1) == 0) { _sparcv9_rdtick(); - OPENSSL_sparcv9cap_P &= ~SPARCV9_TICK_PRIVILEGED; + OPENSSL_sparcv9cap_P[0] &= ~SPARCV9_TICK_PRIVILEGED; } if (sigsetjmp(common_jmp, 1) == 0) { _sparcv9_vis1_probe(); - OPENSSL_sparcv9cap_P |= SPARCV9_VIS1; + OPENSSL_sparcv9cap_P[0] |= SPARCV9_VIS1 | SPARCV9_BLK; /* detect UltraSPARC-Tx, see sparccpud.S for details... */ if (_sparcv9_vis1_instrument() >= 12) - OPENSSL_sparcv9cap_P &= ~(SPARCV9_VIS1 | SPARCV9_PREFER_FPU); + OPENSSL_sparcv9cap_P[0] &= ~(SPARCV9_VIS1 | SPARCV9_PREFER_FPU); else { _sparcv9_vis2_probe(); - OPENSSL_sparcv9cap_P |= SPARCV9_VIS2; + OPENSSL_sparcv9cap_P[0] |= SPARCV9_VIS2; } } if (sigsetjmp(common_jmp, 1) == 0) { _sparcv9_fmadd_probe(); - OPENSSL_sparcv9cap_P |= SPARCV9_FMADD; + OPENSSL_sparcv9cap_P[0] |= SPARCV9_FMADD; + } + + /* + * VIS3 flag is tested independently from VIS1, unlike VIS2 that is, + * because VIS3 defines even integer instructions. + */ + if (sigsetjmp(common_jmp, 1) == 0) { + _sparcv9_vis3_probe(); + OPENSSL_sparcv9cap_P[0] |= SPARCV9_VIS3; + } +# if 0 /* was planned at some point but never + * implemented in hardware */ + if (sigsetjmp(common_jmp, 1) == 0) { + (void)_sparcv9_random(); + OPENSSL_sparcv9cap_P[0] |= SPARCV9_RANDOM; + } +# endif + + /* + * In wait for better solution _sparcv9_rdcfr is masked by + * VIS3 flag, because it goes to uninterruptable endless + * loop on UltraSPARC II running Solaris. Things might be + * different on Linux... + */ + if ((OPENSSL_sparcv9cap_P[0] & SPARCV9_VIS3) && + sigsetjmp(common_jmp, 1) == 0) { + OPENSSL_sparcv9cap_P[1] = (unsigned int)_sparcv9_rdcfr(); } sigaction(SIGBUS, &bus_oact, NULL); sigaction(SIGILL, &ill_oact, NULL); sigprocmask(SIG_SETMASK, &oset, NULL); + + if (sizeof(size_t) == 8) + OPENSSL_sparcv9cap_P[0] |= SPARCV9_64BIT_STACK; +# ifdef __linux + else { + int ret = syscall(340); + + if (ret >= 0 && ret & 1) + OPENSSL_sparcv9cap_P[0] |= SPARCV9_64BIT_STACK; + } +# endif } #endif diff --git a/deps/openssl/openssl/crypto/srp/Makefile b/deps/openssl/openssl/crypto/srp/Makefile index 41859d46fa7f4c..ddf674864f98a8 100644 --- a/deps/openssl/openssl/crypto/srp/Makefile +++ b/deps/openssl/openssl/crypto/srp/Makefile @@ -37,6 +37,9 @@ lib: $(LIBOBJ) $(RANLIB) $(LIB) || echo Never mind. @touch lib +files: + $(PERL) $(TOP)/util/files.pl Makefile >> $(TOP)/MINFO + links: @$(PERL) $(TOP)/util/mklink.pl ../../include/openssl $(EXHEADER) @$(PERL) $(TOP)/util/mklink.pl ../../test $(TEST) diff --git a/deps/openssl/openssl/crypto/stack/safestack.h b/deps/openssl/openssl/crypto/stack/safestack.h index 519649b6ed0e02..1d4f87eab34dcb 100644 --- a/deps/openssl/openssl/crypto/stack/safestack.h +++ b/deps/openssl/openssl/crypto/stack/safestack.h @@ -75,12 +75,12 @@ extern "C" { # define CHECKED_STACK_OF(type, p) \ ((_STACK*) (1 ? p : (STACK_OF(type)*)0)) +# define CHECKED_SK_COPY_FUNC(type, p) \ + ((void *(*)(void *)) ((1 ? p : (type *(*)(const type *))0))) + # define CHECKED_SK_FREE_FUNC(type, p) \ ((void (*)(void *)) ((1 ? p : (void (*)(type *))0))) -# define CHECKED_SK_FREE_FUNC2(type, p) \ - ((void (*)(void *)) ((1 ? p : (void (*)(type))0))) - # define CHECKED_SK_CMP_FUNC(type, p) \ ((int (*)(const void *, const void *)) \ ((1 ? p : (int (*)(const type * const *, const type * const *))0))) @@ -177,6 +177,8 @@ DECLARE_SPECIAL_STACK_OF(OPENSSL_BLOCK, void) (STACK_OF(type) *)sk_dup(CHECKED_STACK_OF(type, st)) # define SKM_sk_pop_free(type, st, free_func) \ sk_pop_free(CHECKED_STACK_OF(type, st), CHECKED_SK_FREE_FUNC(type, free_func)) +# define SKM_sk_deep_copy(type, st, copy_func, free_func) \ + (STACK_OF(type) *)sk_deep_copy(CHECKED_STACK_OF(type, st), CHECKED_SK_COPY_FUNC(type, copy_func), CHECKED_SK_FREE_FUNC(type, free_func)) # define SKM_sk_shift(type, st) \ (type *)sk_shift(CHECKED_STACK_OF(type, st)) # define SKM_sk_pop(type, st) \ @@ -226,6 +228,7 @@ DECLARE_SPECIAL_STACK_OF(OPENSSL_BLOCK, void) # define sk_ACCESS_DESCRIPTION_set_cmp_func(st, cmp) SKM_sk_set_cmp_func(ACCESS_DESCRIPTION, (st), (cmp)) # define sk_ACCESS_DESCRIPTION_dup(st) SKM_sk_dup(ACCESS_DESCRIPTION, st) # define sk_ACCESS_DESCRIPTION_pop_free(st, free_func) SKM_sk_pop_free(ACCESS_DESCRIPTION, (st), (free_func)) +# define sk_ACCESS_DESCRIPTION_deep_copy(st, copy_func, free_func) SKM_sk_deep_copy(ACCESS_DESCRIPTION, (st), (copy_func), (free_func)) # define sk_ACCESS_DESCRIPTION_shift(st) SKM_sk_shift(ACCESS_DESCRIPTION, (st)) # define sk_ACCESS_DESCRIPTION_pop(st) SKM_sk_pop(ACCESS_DESCRIPTION, (st)) # define sk_ACCESS_DESCRIPTION_sort(st) SKM_sk_sort(ACCESS_DESCRIPTION, (st)) @@ -247,6 +250,7 @@ DECLARE_SPECIAL_STACK_OF(OPENSSL_BLOCK, void) # define sk_ASIdOrRange_set_cmp_func(st, cmp) SKM_sk_set_cmp_func(ASIdOrRange, (st), (cmp)) # define sk_ASIdOrRange_dup(st) SKM_sk_dup(ASIdOrRange, st) # define sk_ASIdOrRange_pop_free(st, free_func) SKM_sk_pop_free(ASIdOrRange, (st), (free_func)) +# define sk_ASIdOrRange_deep_copy(st, copy_func, free_func) SKM_sk_deep_copy(ASIdOrRange, (st), (copy_func), (free_func)) # define sk_ASIdOrRange_shift(st) SKM_sk_shift(ASIdOrRange, (st)) # define sk_ASIdOrRange_pop(st) SKM_sk_pop(ASIdOrRange, (st)) # define sk_ASIdOrRange_sort(st) SKM_sk_sort(ASIdOrRange, (st)) @@ -268,6 +272,7 @@ DECLARE_SPECIAL_STACK_OF(OPENSSL_BLOCK, void) # define sk_ASN1_GENERALSTRING_set_cmp_func(st, cmp) SKM_sk_set_cmp_func(ASN1_GENERALSTRING, (st), (cmp)) # define sk_ASN1_GENERALSTRING_dup(st) SKM_sk_dup(ASN1_GENERALSTRING, st) # define sk_ASN1_GENERALSTRING_pop_free(st, free_func) SKM_sk_pop_free(ASN1_GENERALSTRING, (st), (free_func)) +# define sk_ASN1_GENERALSTRING_deep_copy(st, copy_func, free_func) SKM_sk_deep_copy(ASN1_GENERALSTRING, (st), (copy_func), (free_func)) # define sk_ASN1_GENERALSTRING_shift(st) SKM_sk_shift(ASN1_GENERALSTRING, (st)) # define sk_ASN1_GENERALSTRING_pop(st) SKM_sk_pop(ASN1_GENERALSTRING, (st)) # define sk_ASN1_GENERALSTRING_sort(st) SKM_sk_sort(ASN1_GENERALSTRING, (st)) @@ -289,6 +294,7 @@ DECLARE_SPECIAL_STACK_OF(OPENSSL_BLOCK, void) # define sk_ASN1_INTEGER_set_cmp_func(st, cmp) SKM_sk_set_cmp_func(ASN1_INTEGER, (st), (cmp)) # define sk_ASN1_INTEGER_dup(st) SKM_sk_dup(ASN1_INTEGER, st) # define sk_ASN1_INTEGER_pop_free(st, free_func) SKM_sk_pop_free(ASN1_INTEGER, (st), (free_func)) +# define sk_ASN1_INTEGER_deep_copy(st, copy_func, free_func) SKM_sk_deep_copy(ASN1_INTEGER, (st), (copy_func), (free_func)) # define sk_ASN1_INTEGER_shift(st) SKM_sk_shift(ASN1_INTEGER, (st)) # define sk_ASN1_INTEGER_pop(st) SKM_sk_pop(ASN1_INTEGER, (st)) # define sk_ASN1_INTEGER_sort(st) SKM_sk_sort(ASN1_INTEGER, (st)) @@ -310,6 +316,7 @@ DECLARE_SPECIAL_STACK_OF(OPENSSL_BLOCK, void) # define sk_ASN1_OBJECT_set_cmp_func(st, cmp) SKM_sk_set_cmp_func(ASN1_OBJECT, (st), (cmp)) # define sk_ASN1_OBJECT_dup(st) SKM_sk_dup(ASN1_OBJECT, st) # define sk_ASN1_OBJECT_pop_free(st, free_func) SKM_sk_pop_free(ASN1_OBJECT, (st), (free_func)) +# define sk_ASN1_OBJECT_deep_copy(st, copy_func, free_func) SKM_sk_deep_copy(ASN1_OBJECT, (st), (copy_func), (free_func)) # define sk_ASN1_OBJECT_shift(st) SKM_sk_shift(ASN1_OBJECT, (st)) # define sk_ASN1_OBJECT_pop(st) SKM_sk_pop(ASN1_OBJECT, (st)) # define sk_ASN1_OBJECT_sort(st) SKM_sk_sort(ASN1_OBJECT, (st)) @@ -331,6 +338,7 @@ DECLARE_SPECIAL_STACK_OF(OPENSSL_BLOCK, void) # define sk_ASN1_STRING_TABLE_set_cmp_func(st, cmp) SKM_sk_set_cmp_func(ASN1_STRING_TABLE, (st), (cmp)) # define sk_ASN1_STRING_TABLE_dup(st) SKM_sk_dup(ASN1_STRING_TABLE, st) # define sk_ASN1_STRING_TABLE_pop_free(st, free_func) SKM_sk_pop_free(ASN1_STRING_TABLE, (st), (free_func)) +# define sk_ASN1_STRING_TABLE_deep_copy(st, copy_func, free_func) SKM_sk_deep_copy(ASN1_STRING_TABLE, (st), (copy_func), (free_func)) # define sk_ASN1_STRING_TABLE_shift(st) SKM_sk_shift(ASN1_STRING_TABLE, (st)) # define sk_ASN1_STRING_TABLE_pop(st) SKM_sk_pop(ASN1_STRING_TABLE, (st)) # define sk_ASN1_STRING_TABLE_sort(st) SKM_sk_sort(ASN1_STRING_TABLE, (st)) @@ -352,6 +360,7 @@ DECLARE_SPECIAL_STACK_OF(OPENSSL_BLOCK, void) # define sk_ASN1_TYPE_set_cmp_func(st, cmp) SKM_sk_set_cmp_func(ASN1_TYPE, (st), (cmp)) # define sk_ASN1_TYPE_dup(st) SKM_sk_dup(ASN1_TYPE, st) # define sk_ASN1_TYPE_pop_free(st, free_func) SKM_sk_pop_free(ASN1_TYPE, (st), (free_func)) +# define sk_ASN1_TYPE_deep_copy(st, copy_func, free_func) SKM_sk_deep_copy(ASN1_TYPE, (st), (copy_func), (free_func)) # define sk_ASN1_TYPE_shift(st) SKM_sk_shift(ASN1_TYPE, (st)) # define sk_ASN1_TYPE_pop(st) SKM_sk_pop(ASN1_TYPE, (st)) # define sk_ASN1_TYPE_sort(st) SKM_sk_sort(ASN1_TYPE, (st)) @@ -373,6 +382,7 @@ DECLARE_SPECIAL_STACK_OF(OPENSSL_BLOCK, void) # define sk_ASN1_UTF8STRING_set_cmp_func(st, cmp) SKM_sk_set_cmp_func(ASN1_UTF8STRING, (st), (cmp)) # define sk_ASN1_UTF8STRING_dup(st) SKM_sk_dup(ASN1_UTF8STRING, st) # define sk_ASN1_UTF8STRING_pop_free(st, free_func) SKM_sk_pop_free(ASN1_UTF8STRING, (st), (free_func)) +# define sk_ASN1_UTF8STRING_deep_copy(st, copy_func, free_func) SKM_sk_deep_copy(ASN1_UTF8STRING, (st), (copy_func), (free_func)) # define sk_ASN1_UTF8STRING_shift(st) SKM_sk_shift(ASN1_UTF8STRING, (st)) # define sk_ASN1_UTF8STRING_pop(st) SKM_sk_pop(ASN1_UTF8STRING, (st)) # define sk_ASN1_UTF8STRING_sort(st) SKM_sk_sort(ASN1_UTF8STRING, (st)) @@ -394,6 +404,7 @@ DECLARE_SPECIAL_STACK_OF(OPENSSL_BLOCK, void) # define sk_ASN1_VALUE_set_cmp_func(st, cmp) SKM_sk_set_cmp_func(ASN1_VALUE, (st), (cmp)) # define sk_ASN1_VALUE_dup(st) SKM_sk_dup(ASN1_VALUE, st) # define sk_ASN1_VALUE_pop_free(st, free_func) SKM_sk_pop_free(ASN1_VALUE, (st), (free_func)) +# define sk_ASN1_VALUE_deep_copy(st, copy_func, free_func) SKM_sk_deep_copy(ASN1_VALUE, (st), (copy_func), (free_func)) # define sk_ASN1_VALUE_shift(st) SKM_sk_shift(ASN1_VALUE, (st)) # define sk_ASN1_VALUE_pop(st) SKM_sk_pop(ASN1_VALUE, (st)) # define sk_ASN1_VALUE_sort(st) SKM_sk_sort(ASN1_VALUE, (st)) @@ -415,6 +426,7 @@ DECLARE_SPECIAL_STACK_OF(OPENSSL_BLOCK, void) # define sk_BIO_set_cmp_func(st, cmp) SKM_sk_set_cmp_func(BIO, (st), (cmp)) # define sk_BIO_dup(st) SKM_sk_dup(BIO, st) # define sk_BIO_pop_free(st, free_func) SKM_sk_pop_free(BIO, (st), (free_func)) +# define sk_BIO_deep_copy(st, copy_func, free_func) SKM_sk_deep_copy(BIO, (st), (copy_func), (free_func)) # define sk_BIO_shift(st) SKM_sk_shift(BIO, (st)) # define sk_BIO_pop(st) SKM_sk_pop(BIO, (st)) # define sk_BIO_sort(st) SKM_sk_sort(BIO, (st)) @@ -436,6 +448,7 @@ DECLARE_SPECIAL_STACK_OF(OPENSSL_BLOCK, void) # define sk_BY_DIR_ENTRY_set_cmp_func(st, cmp) SKM_sk_set_cmp_func(BY_DIR_ENTRY, (st), (cmp)) # define sk_BY_DIR_ENTRY_dup(st) SKM_sk_dup(BY_DIR_ENTRY, st) # define sk_BY_DIR_ENTRY_pop_free(st, free_func) SKM_sk_pop_free(BY_DIR_ENTRY, (st), (free_func)) +# define sk_BY_DIR_ENTRY_deep_copy(st, copy_func, free_func) SKM_sk_deep_copy(BY_DIR_ENTRY, (st), (copy_func), (free_func)) # define sk_BY_DIR_ENTRY_shift(st) SKM_sk_shift(BY_DIR_ENTRY, (st)) # define sk_BY_DIR_ENTRY_pop(st) SKM_sk_pop(BY_DIR_ENTRY, (st)) # define sk_BY_DIR_ENTRY_sort(st) SKM_sk_sort(BY_DIR_ENTRY, (st)) @@ -457,6 +470,7 @@ DECLARE_SPECIAL_STACK_OF(OPENSSL_BLOCK, void) # define sk_BY_DIR_HASH_set_cmp_func(st, cmp) SKM_sk_set_cmp_func(BY_DIR_HASH, (st), (cmp)) # define sk_BY_DIR_HASH_dup(st) SKM_sk_dup(BY_DIR_HASH, st) # define sk_BY_DIR_HASH_pop_free(st, free_func) SKM_sk_pop_free(BY_DIR_HASH, (st), (free_func)) +# define sk_BY_DIR_HASH_deep_copy(st, copy_func, free_func) SKM_sk_deep_copy(BY_DIR_HASH, (st), (copy_func), (free_func)) # define sk_BY_DIR_HASH_shift(st) SKM_sk_shift(BY_DIR_HASH, (st)) # define sk_BY_DIR_HASH_pop(st) SKM_sk_pop(BY_DIR_HASH, (st)) # define sk_BY_DIR_HASH_sort(st) SKM_sk_sort(BY_DIR_HASH, (st)) @@ -478,10 +492,33 @@ DECLARE_SPECIAL_STACK_OF(OPENSSL_BLOCK, void) # define sk_CMS_CertificateChoices_set_cmp_func(st, cmp) SKM_sk_set_cmp_func(CMS_CertificateChoices, (st), (cmp)) # define sk_CMS_CertificateChoices_dup(st) SKM_sk_dup(CMS_CertificateChoices, st) # define sk_CMS_CertificateChoices_pop_free(st, free_func) SKM_sk_pop_free(CMS_CertificateChoices, (st), (free_func)) +# define sk_CMS_CertificateChoices_deep_copy(st, copy_func, free_func) SKM_sk_deep_copy(CMS_CertificateChoices, (st), (copy_func), (free_func)) # define sk_CMS_CertificateChoices_shift(st) SKM_sk_shift(CMS_CertificateChoices, (st)) # define sk_CMS_CertificateChoices_pop(st) SKM_sk_pop(CMS_CertificateChoices, (st)) # define sk_CMS_CertificateChoices_sort(st) SKM_sk_sort(CMS_CertificateChoices, (st)) # define sk_CMS_CertificateChoices_is_sorted(st) SKM_sk_is_sorted(CMS_CertificateChoices, (st)) +# define sk_CMS_RecipientEncryptedKey_new(cmp) SKM_sk_new(CMS_RecipientEncryptedKey, (cmp)) +# define sk_CMS_RecipientEncryptedKey_new_null() SKM_sk_new_null(CMS_RecipientEncryptedKey) +# define sk_CMS_RecipientEncryptedKey_free(st) SKM_sk_free(CMS_RecipientEncryptedKey, (st)) +# define sk_CMS_RecipientEncryptedKey_num(st) SKM_sk_num(CMS_RecipientEncryptedKey, (st)) +# define sk_CMS_RecipientEncryptedKey_value(st, i) SKM_sk_value(CMS_RecipientEncryptedKey, (st), (i)) +# define sk_CMS_RecipientEncryptedKey_set(st, i, val) SKM_sk_set(CMS_RecipientEncryptedKey, (st), (i), (val)) +# define sk_CMS_RecipientEncryptedKey_zero(st) SKM_sk_zero(CMS_RecipientEncryptedKey, (st)) +# define sk_CMS_RecipientEncryptedKey_push(st, val) SKM_sk_push(CMS_RecipientEncryptedKey, (st), (val)) +# define sk_CMS_RecipientEncryptedKey_unshift(st, val) SKM_sk_unshift(CMS_RecipientEncryptedKey, (st), (val)) +# define sk_CMS_RecipientEncryptedKey_find(st, val) SKM_sk_find(CMS_RecipientEncryptedKey, (st), (val)) +# define sk_CMS_RecipientEncryptedKey_find_ex(st, val) SKM_sk_find_ex(CMS_RecipientEncryptedKey, (st), (val)) +# define sk_CMS_RecipientEncryptedKey_delete(st, i) SKM_sk_delete(CMS_RecipientEncryptedKey, (st), (i)) +# define sk_CMS_RecipientEncryptedKey_delete_ptr(st, ptr) SKM_sk_delete_ptr(CMS_RecipientEncryptedKey, (st), (ptr)) +# define sk_CMS_RecipientEncryptedKey_insert(st, val, i) SKM_sk_insert(CMS_RecipientEncryptedKey, (st), (val), (i)) +# define sk_CMS_RecipientEncryptedKey_set_cmp_func(st, cmp) SKM_sk_set_cmp_func(CMS_RecipientEncryptedKey, (st), (cmp)) +# define sk_CMS_RecipientEncryptedKey_dup(st) SKM_sk_dup(CMS_RecipientEncryptedKey, st) +# define sk_CMS_RecipientEncryptedKey_pop_free(st, free_func) SKM_sk_pop_free(CMS_RecipientEncryptedKey, (st), (free_func)) +# define sk_CMS_RecipientEncryptedKey_deep_copy(st, copy_func, free_func) SKM_sk_deep_copy(CMS_RecipientEncryptedKey, (st), (copy_func), (free_func)) +# define sk_CMS_RecipientEncryptedKey_shift(st) SKM_sk_shift(CMS_RecipientEncryptedKey, (st)) +# define sk_CMS_RecipientEncryptedKey_pop(st) SKM_sk_pop(CMS_RecipientEncryptedKey, (st)) +# define sk_CMS_RecipientEncryptedKey_sort(st) SKM_sk_sort(CMS_RecipientEncryptedKey, (st)) +# define sk_CMS_RecipientEncryptedKey_is_sorted(st) SKM_sk_is_sorted(CMS_RecipientEncryptedKey, (st)) # define sk_CMS_RecipientInfo_new(cmp) SKM_sk_new(CMS_RecipientInfo, (cmp)) # define sk_CMS_RecipientInfo_new_null() SKM_sk_new_null(CMS_RecipientInfo) # define sk_CMS_RecipientInfo_free(st) SKM_sk_free(CMS_RecipientInfo, (st)) @@ -499,6 +536,7 @@ DECLARE_SPECIAL_STACK_OF(OPENSSL_BLOCK, void) # define sk_CMS_RecipientInfo_set_cmp_func(st, cmp) SKM_sk_set_cmp_func(CMS_RecipientInfo, (st), (cmp)) # define sk_CMS_RecipientInfo_dup(st) SKM_sk_dup(CMS_RecipientInfo, st) # define sk_CMS_RecipientInfo_pop_free(st, free_func) SKM_sk_pop_free(CMS_RecipientInfo, (st), (free_func)) +# define sk_CMS_RecipientInfo_deep_copy(st, copy_func, free_func) SKM_sk_deep_copy(CMS_RecipientInfo, (st), (copy_func), (free_func)) # define sk_CMS_RecipientInfo_shift(st) SKM_sk_shift(CMS_RecipientInfo, (st)) # define sk_CMS_RecipientInfo_pop(st) SKM_sk_pop(CMS_RecipientInfo, (st)) # define sk_CMS_RecipientInfo_sort(st) SKM_sk_sort(CMS_RecipientInfo, (st)) @@ -520,6 +558,7 @@ DECLARE_SPECIAL_STACK_OF(OPENSSL_BLOCK, void) # define sk_CMS_RevocationInfoChoice_set_cmp_func(st, cmp) SKM_sk_set_cmp_func(CMS_RevocationInfoChoice, (st), (cmp)) # define sk_CMS_RevocationInfoChoice_dup(st) SKM_sk_dup(CMS_RevocationInfoChoice, st) # define sk_CMS_RevocationInfoChoice_pop_free(st, free_func) SKM_sk_pop_free(CMS_RevocationInfoChoice, (st), (free_func)) +# define sk_CMS_RevocationInfoChoice_deep_copy(st, copy_func, free_func) SKM_sk_deep_copy(CMS_RevocationInfoChoice, (st), (copy_func), (free_func)) # define sk_CMS_RevocationInfoChoice_shift(st) SKM_sk_shift(CMS_RevocationInfoChoice, (st)) # define sk_CMS_RevocationInfoChoice_pop(st) SKM_sk_pop(CMS_RevocationInfoChoice, (st)) # define sk_CMS_RevocationInfoChoice_sort(st) SKM_sk_sort(CMS_RevocationInfoChoice, (st)) @@ -541,6 +580,7 @@ DECLARE_SPECIAL_STACK_OF(OPENSSL_BLOCK, void) # define sk_CMS_SignerInfo_set_cmp_func(st, cmp) SKM_sk_set_cmp_func(CMS_SignerInfo, (st), (cmp)) # define sk_CMS_SignerInfo_dup(st) SKM_sk_dup(CMS_SignerInfo, st) # define sk_CMS_SignerInfo_pop_free(st, free_func) SKM_sk_pop_free(CMS_SignerInfo, (st), (free_func)) +# define sk_CMS_SignerInfo_deep_copy(st, copy_func, free_func) SKM_sk_deep_copy(CMS_SignerInfo, (st), (copy_func), (free_func)) # define sk_CMS_SignerInfo_shift(st) SKM_sk_shift(CMS_SignerInfo, (st)) # define sk_CMS_SignerInfo_pop(st) SKM_sk_pop(CMS_SignerInfo, (st)) # define sk_CMS_SignerInfo_sort(st) SKM_sk_sort(CMS_SignerInfo, (st)) @@ -562,6 +602,7 @@ DECLARE_SPECIAL_STACK_OF(OPENSSL_BLOCK, void) # define sk_CONF_IMODULE_set_cmp_func(st, cmp) SKM_sk_set_cmp_func(CONF_IMODULE, (st), (cmp)) # define sk_CONF_IMODULE_dup(st) SKM_sk_dup(CONF_IMODULE, st) # define sk_CONF_IMODULE_pop_free(st, free_func) SKM_sk_pop_free(CONF_IMODULE, (st), (free_func)) +# define sk_CONF_IMODULE_deep_copy(st, copy_func, free_func) SKM_sk_deep_copy(CONF_IMODULE, (st), (copy_func), (free_func)) # define sk_CONF_IMODULE_shift(st) SKM_sk_shift(CONF_IMODULE, (st)) # define sk_CONF_IMODULE_pop(st) SKM_sk_pop(CONF_IMODULE, (st)) # define sk_CONF_IMODULE_sort(st) SKM_sk_sort(CONF_IMODULE, (st)) @@ -583,6 +624,7 @@ DECLARE_SPECIAL_STACK_OF(OPENSSL_BLOCK, void) # define sk_CONF_MODULE_set_cmp_func(st, cmp) SKM_sk_set_cmp_func(CONF_MODULE, (st), (cmp)) # define sk_CONF_MODULE_dup(st) SKM_sk_dup(CONF_MODULE, st) # define sk_CONF_MODULE_pop_free(st, free_func) SKM_sk_pop_free(CONF_MODULE, (st), (free_func)) +# define sk_CONF_MODULE_deep_copy(st, copy_func, free_func) SKM_sk_deep_copy(CONF_MODULE, (st), (copy_func), (free_func)) # define sk_CONF_MODULE_shift(st) SKM_sk_shift(CONF_MODULE, (st)) # define sk_CONF_MODULE_pop(st) SKM_sk_pop(CONF_MODULE, (st)) # define sk_CONF_MODULE_sort(st) SKM_sk_sort(CONF_MODULE, (st)) @@ -604,6 +646,7 @@ DECLARE_SPECIAL_STACK_OF(OPENSSL_BLOCK, void) # define sk_CONF_VALUE_set_cmp_func(st, cmp) SKM_sk_set_cmp_func(CONF_VALUE, (st), (cmp)) # define sk_CONF_VALUE_dup(st) SKM_sk_dup(CONF_VALUE, st) # define sk_CONF_VALUE_pop_free(st, free_func) SKM_sk_pop_free(CONF_VALUE, (st), (free_func)) +# define sk_CONF_VALUE_deep_copy(st, copy_func, free_func) SKM_sk_deep_copy(CONF_VALUE, (st), (copy_func), (free_func)) # define sk_CONF_VALUE_shift(st) SKM_sk_shift(CONF_VALUE, (st)) # define sk_CONF_VALUE_pop(st) SKM_sk_pop(CONF_VALUE, (st)) # define sk_CONF_VALUE_sort(st) SKM_sk_sort(CONF_VALUE, (st)) @@ -625,6 +668,7 @@ DECLARE_SPECIAL_STACK_OF(OPENSSL_BLOCK, void) # define sk_CRYPTO_EX_DATA_FUNCS_set_cmp_func(st, cmp) SKM_sk_set_cmp_func(CRYPTO_EX_DATA_FUNCS, (st), (cmp)) # define sk_CRYPTO_EX_DATA_FUNCS_dup(st) SKM_sk_dup(CRYPTO_EX_DATA_FUNCS, st) # define sk_CRYPTO_EX_DATA_FUNCS_pop_free(st, free_func) SKM_sk_pop_free(CRYPTO_EX_DATA_FUNCS, (st), (free_func)) +# define sk_CRYPTO_EX_DATA_FUNCS_deep_copy(st, copy_func, free_func) SKM_sk_deep_copy(CRYPTO_EX_DATA_FUNCS, (st), (copy_func), (free_func)) # define sk_CRYPTO_EX_DATA_FUNCS_shift(st) SKM_sk_shift(CRYPTO_EX_DATA_FUNCS, (st)) # define sk_CRYPTO_EX_DATA_FUNCS_pop(st) SKM_sk_pop(CRYPTO_EX_DATA_FUNCS, (st)) # define sk_CRYPTO_EX_DATA_FUNCS_sort(st) SKM_sk_sort(CRYPTO_EX_DATA_FUNCS, (st)) @@ -646,6 +690,7 @@ DECLARE_SPECIAL_STACK_OF(OPENSSL_BLOCK, void) # define sk_CRYPTO_dynlock_set_cmp_func(st, cmp) SKM_sk_set_cmp_func(CRYPTO_dynlock, (st), (cmp)) # define sk_CRYPTO_dynlock_dup(st) SKM_sk_dup(CRYPTO_dynlock, st) # define sk_CRYPTO_dynlock_pop_free(st, free_func) SKM_sk_pop_free(CRYPTO_dynlock, (st), (free_func)) +# define sk_CRYPTO_dynlock_deep_copy(st, copy_func, free_func) SKM_sk_deep_copy(CRYPTO_dynlock, (st), (copy_func), (free_func)) # define sk_CRYPTO_dynlock_shift(st) SKM_sk_shift(CRYPTO_dynlock, (st)) # define sk_CRYPTO_dynlock_pop(st) SKM_sk_pop(CRYPTO_dynlock, (st)) # define sk_CRYPTO_dynlock_sort(st) SKM_sk_sort(CRYPTO_dynlock, (st)) @@ -667,6 +712,7 @@ DECLARE_SPECIAL_STACK_OF(OPENSSL_BLOCK, void) # define sk_DIST_POINT_set_cmp_func(st, cmp) SKM_sk_set_cmp_func(DIST_POINT, (st), (cmp)) # define sk_DIST_POINT_dup(st) SKM_sk_dup(DIST_POINT, st) # define sk_DIST_POINT_pop_free(st, free_func) SKM_sk_pop_free(DIST_POINT, (st), (free_func)) +# define sk_DIST_POINT_deep_copy(st, copy_func, free_func) SKM_sk_deep_copy(DIST_POINT, (st), (copy_func), (free_func)) # define sk_DIST_POINT_shift(st) SKM_sk_shift(DIST_POINT, (st)) # define sk_DIST_POINT_pop(st) SKM_sk_pop(DIST_POINT, (st)) # define sk_DIST_POINT_sort(st) SKM_sk_sort(DIST_POINT, (st)) @@ -688,6 +734,7 @@ DECLARE_SPECIAL_STACK_OF(OPENSSL_BLOCK, void) # define sk_ENGINE_set_cmp_func(st, cmp) SKM_sk_set_cmp_func(ENGINE, (st), (cmp)) # define sk_ENGINE_dup(st) SKM_sk_dup(ENGINE, st) # define sk_ENGINE_pop_free(st, free_func) SKM_sk_pop_free(ENGINE, (st), (free_func)) +# define sk_ENGINE_deep_copy(st, copy_func, free_func) SKM_sk_deep_copy(ENGINE, (st), (copy_func), (free_func)) # define sk_ENGINE_shift(st) SKM_sk_shift(ENGINE, (st)) # define sk_ENGINE_pop(st) SKM_sk_pop(ENGINE, (st)) # define sk_ENGINE_sort(st) SKM_sk_sort(ENGINE, (st)) @@ -709,6 +756,7 @@ DECLARE_SPECIAL_STACK_OF(OPENSSL_BLOCK, void) # define sk_ENGINE_CLEANUP_ITEM_set_cmp_func(st, cmp) SKM_sk_set_cmp_func(ENGINE_CLEANUP_ITEM, (st), (cmp)) # define sk_ENGINE_CLEANUP_ITEM_dup(st) SKM_sk_dup(ENGINE_CLEANUP_ITEM, st) # define sk_ENGINE_CLEANUP_ITEM_pop_free(st, free_func) SKM_sk_pop_free(ENGINE_CLEANUP_ITEM, (st), (free_func)) +# define sk_ENGINE_CLEANUP_ITEM_deep_copy(st, copy_func, free_func) SKM_sk_deep_copy(ENGINE_CLEANUP_ITEM, (st), (copy_func), (free_func)) # define sk_ENGINE_CLEANUP_ITEM_shift(st) SKM_sk_shift(ENGINE_CLEANUP_ITEM, (st)) # define sk_ENGINE_CLEANUP_ITEM_pop(st) SKM_sk_pop(ENGINE_CLEANUP_ITEM, (st)) # define sk_ENGINE_CLEANUP_ITEM_sort(st) SKM_sk_sort(ENGINE_CLEANUP_ITEM, (st)) @@ -730,6 +778,7 @@ DECLARE_SPECIAL_STACK_OF(OPENSSL_BLOCK, void) # define sk_ESS_CERT_ID_set_cmp_func(st, cmp) SKM_sk_set_cmp_func(ESS_CERT_ID, (st), (cmp)) # define sk_ESS_CERT_ID_dup(st) SKM_sk_dup(ESS_CERT_ID, st) # define sk_ESS_CERT_ID_pop_free(st, free_func) SKM_sk_pop_free(ESS_CERT_ID, (st), (free_func)) +# define sk_ESS_CERT_ID_deep_copy(st, copy_func, free_func) SKM_sk_deep_copy(ESS_CERT_ID, (st), (copy_func), (free_func)) # define sk_ESS_CERT_ID_shift(st) SKM_sk_shift(ESS_CERT_ID, (st)) # define sk_ESS_CERT_ID_pop(st) SKM_sk_pop(ESS_CERT_ID, (st)) # define sk_ESS_CERT_ID_sort(st) SKM_sk_sort(ESS_CERT_ID, (st)) @@ -751,6 +800,7 @@ DECLARE_SPECIAL_STACK_OF(OPENSSL_BLOCK, void) # define sk_EVP_MD_set_cmp_func(st, cmp) SKM_sk_set_cmp_func(EVP_MD, (st), (cmp)) # define sk_EVP_MD_dup(st) SKM_sk_dup(EVP_MD, st) # define sk_EVP_MD_pop_free(st, free_func) SKM_sk_pop_free(EVP_MD, (st), (free_func)) +# define sk_EVP_MD_deep_copy(st, copy_func, free_func) SKM_sk_deep_copy(EVP_MD, (st), (copy_func), (free_func)) # define sk_EVP_MD_shift(st) SKM_sk_shift(EVP_MD, (st)) # define sk_EVP_MD_pop(st) SKM_sk_pop(EVP_MD, (st)) # define sk_EVP_MD_sort(st) SKM_sk_sort(EVP_MD, (st)) @@ -772,6 +822,7 @@ DECLARE_SPECIAL_STACK_OF(OPENSSL_BLOCK, void) # define sk_EVP_PBE_CTL_set_cmp_func(st, cmp) SKM_sk_set_cmp_func(EVP_PBE_CTL, (st), (cmp)) # define sk_EVP_PBE_CTL_dup(st) SKM_sk_dup(EVP_PBE_CTL, st) # define sk_EVP_PBE_CTL_pop_free(st, free_func) SKM_sk_pop_free(EVP_PBE_CTL, (st), (free_func)) +# define sk_EVP_PBE_CTL_deep_copy(st, copy_func, free_func) SKM_sk_deep_copy(EVP_PBE_CTL, (st), (copy_func), (free_func)) # define sk_EVP_PBE_CTL_shift(st) SKM_sk_shift(EVP_PBE_CTL, (st)) # define sk_EVP_PBE_CTL_pop(st) SKM_sk_pop(EVP_PBE_CTL, (st)) # define sk_EVP_PBE_CTL_sort(st) SKM_sk_sort(EVP_PBE_CTL, (st)) @@ -793,6 +844,7 @@ DECLARE_SPECIAL_STACK_OF(OPENSSL_BLOCK, void) # define sk_EVP_PKEY_ASN1_METHOD_set_cmp_func(st, cmp) SKM_sk_set_cmp_func(EVP_PKEY_ASN1_METHOD, (st), (cmp)) # define sk_EVP_PKEY_ASN1_METHOD_dup(st) SKM_sk_dup(EVP_PKEY_ASN1_METHOD, st) # define sk_EVP_PKEY_ASN1_METHOD_pop_free(st, free_func) SKM_sk_pop_free(EVP_PKEY_ASN1_METHOD, (st), (free_func)) +# define sk_EVP_PKEY_ASN1_METHOD_deep_copy(st, copy_func, free_func) SKM_sk_deep_copy(EVP_PKEY_ASN1_METHOD, (st), (copy_func), (free_func)) # define sk_EVP_PKEY_ASN1_METHOD_shift(st) SKM_sk_shift(EVP_PKEY_ASN1_METHOD, (st)) # define sk_EVP_PKEY_ASN1_METHOD_pop(st) SKM_sk_pop(EVP_PKEY_ASN1_METHOD, (st)) # define sk_EVP_PKEY_ASN1_METHOD_sort(st) SKM_sk_sort(EVP_PKEY_ASN1_METHOD, (st)) @@ -814,6 +866,7 @@ DECLARE_SPECIAL_STACK_OF(OPENSSL_BLOCK, void) # define sk_EVP_PKEY_METHOD_set_cmp_func(st, cmp) SKM_sk_set_cmp_func(EVP_PKEY_METHOD, (st), (cmp)) # define sk_EVP_PKEY_METHOD_dup(st) SKM_sk_dup(EVP_PKEY_METHOD, st) # define sk_EVP_PKEY_METHOD_pop_free(st, free_func) SKM_sk_pop_free(EVP_PKEY_METHOD, (st), (free_func)) +# define sk_EVP_PKEY_METHOD_deep_copy(st, copy_func, free_func) SKM_sk_deep_copy(EVP_PKEY_METHOD, (st), (copy_func), (free_func)) # define sk_EVP_PKEY_METHOD_shift(st) SKM_sk_shift(EVP_PKEY_METHOD, (st)) # define sk_EVP_PKEY_METHOD_pop(st) SKM_sk_pop(EVP_PKEY_METHOD, (st)) # define sk_EVP_PKEY_METHOD_sort(st) SKM_sk_sort(EVP_PKEY_METHOD, (st)) @@ -835,6 +888,7 @@ DECLARE_SPECIAL_STACK_OF(OPENSSL_BLOCK, void) # define sk_GENERAL_NAME_set_cmp_func(st, cmp) SKM_sk_set_cmp_func(GENERAL_NAME, (st), (cmp)) # define sk_GENERAL_NAME_dup(st) SKM_sk_dup(GENERAL_NAME, st) # define sk_GENERAL_NAME_pop_free(st, free_func) SKM_sk_pop_free(GENERAL_NAME, (st), (free_func)) +# define sk_GENERAL_NAME_deep_copy(st, copy_func, free_func) SKM_sk_deep_copy(GENERAL_NAME, (st), (copy_func), (free_func)) # define sk_GENERAL_NAME_shift(st) SKM_sk_shift(GENERAL_NAME, (st)) # define sk_GENERAL_NAME_pop(st) SKM_sk_pop(GENERAL_NAME, (st)) # define sk_GENERAL_NAME_sort(st) SKM_sk_sort(GENERAL_NAME, (st)) @@ -856,6 +910,7 @@ DECLARE_SPECIAL_STACK_OF(OPENSSL_BLOCK, void) # define sk_GENERAL_NAMES_set_cmp_func(st, cmp) SKM_sk_set_cmp_func(GENERAL_NAMES, (st), (cmp)) # define sk_GENERAL_NAMES_dup(st) SKM_sk_dup(GENERAL_NAMES, st) # define sk_GENERAL_NAMES_pop_free(st, free_func) SKM_sk_pop_free(GENERAL_NAMES, (st), (free_func)) +# define sk_GENERAL_NAMES_deep_copy(st, copy_func, free_func) SKM_sk_deep_copy(GENERAL_NAMES, (st), (copy_func), (free_func)) # define sk_GENERAL_NAMES_shift(st) SKM_sk_shift(GENERAL_NAMES, (st)) # define sk_GENERAL_NAMES_pop(st) SKM_sk_pop(GENERAL_NAMES, (st)) # define sk_GENERAL_NAMES_sort(st) SKM_sk_sort(GENERAL_NAMES, (st)) @@ -877,6 +932,7 @@ DECLARE_SPECIAL_STACK_OF(OPENSSL_BLOCK, void) # define sk_GENERAL_SUBTREE_set_cmp_func(st, cmp) SKM_sk_set_cmp_func(GENERAL_SUBTREE, (st), (cmp)) # define sk_GENERAL_SUBTREE_dup(st) SKM_sk_dup(GENERAL_SUBTREE, st) # define sk_GENERAL_SUBTREE_pop_free(st, free_func) SKM_sk_pop_free(GENERAL_SUBTREE, (st), (free_func)) +# define sk_GENERAL_SUBTREE_deep_copy(st, copy_func, free_func) SKM_sk_deep_copy(GENERAL_SUBTREE, (st), (copy_func), (free_func)) # define sk_GENERAL_SUBTREE_shift(st) SKM_sk_shift(GENERAL_SUBTREE, (st)) # define sk_GENERAL_SUBTREE_pop(st) SKM_sk_pop(GENERAL_SUBTREE, (st)) # define sk_GENERAL_SUBTREE_sort(st) SKM_sk_sort(GENERAL_SUBTREE, (st)) @@ -898,6 +954,7 @@ DECLARE_SPECIAL_STACK_OF(OPENSSL_BLOCK, void) # define sk_IPAddressFamily_set_cmp_func(st, cmp) SKM_sk_set_cmp_func(IPAddressFamily, (st), (cmp)) # define sk_IPAddressFamily_dup(st) SKM_sk_dup(IPAddressFamily, st) # define sk_IPAddressFamily_pop_free(st, free_func) SKM_sk_pop_free(IPAddressFamily, (st), (free_func)) +# define sk_IPAddressFamily_deep_copy(st, copy_func, free_func) SKM_sk_deep_copy(IPAddressFamily, (st), (copy_func), (free_func)) # define sk_IPAddressFamily_shift(st) SKM_sk_shift(IPAddressFamily, (st)) # define sk_IPAddressFamily_pop(st) SKM_sk_pop(IPAddressFamily, (st)) # define sk_IPAddressFamily_sort(st) SKM_sk_sort(IPAddressFamily, (st)) @@ -919,6 +976,7 @@ DECLARE_SPECIAL_STACK_OF(OPENSSL_BLOCK, void) # define sk_IPAddressOrRange_set_cmp_func(st, cmp) SKM_sk_set_cmp_func(IPAddressOrRange, (st), (cmp)) # define sk_IPAddressOrRange_dup(st) SKM_sk_dup(IPAddressOrRange, st) # define sk_IPAddressOrRange_pop_free(st, free_func) SKM_sk_pop_free(IPAddressOrRange, (st), (free_func)) +# define sk_IPAddressOrRange_deep_copy(st, copy_func, free_func) SKM_sk_deep_copy(IPAddressOrRange, (st), (copy_func), (free_func)) # define sk_IPAddressOrRange_shift(st) SKM_sk_shift(IPAddressOrRange, (st)) # define sk_IPAddressOrRange_pop(st) SKM_sk_pop(IPAddressOrRange, (st)) # define sk_IPAddressOrRange_sort(st) SKM_sk_sort(IPAddressOrRange, (st)) @@ -940,6 +998,7 @@ DECLARE_SPECIAL_STACK_OF(OPENSSL_BLOCK, void) # define sk_KRB5_APREQBODY_set_cmp_func(st, cmp) SKM_sk_set_cmp_func(KRB5_APREQBODY, (st), (cmp)) # define sk_KRB5_APREQBODY_dup(st) SKM_sk_dup(KRB5_APREQBODY, st) # define sk_KRB5_APREQBODY_pop_free(st, free_func) SKM_sk_pop_free(KRB5_APREQBODY, (st), (free_func)) +# define sk_KRB5_APREQBODY_deep_copy(st, copy_func, free_func) SKM_sk_deep_copy(KRB5_APREQBODY, (st), (copy_func), (free_func)) # define sk_KRB5_APREQBODY_shift(st) SKM_sk_shift(KRB5_APREQBODY, (st)) # define sk_KRB5_APREQBODY_pop(st) SKM_sk_pop(KRB5_APREQBODY, (st)) # define sk_KRB5_APREQBODY_sort(st) SKM_sk_sort(KRB5_APREQBODY, (st)) @@ -961,6 +1020,7 @@ DECLARE_SPECIAL_STACK_OF(OPENSSL_BLOCK, void) # define sk_KRB5_AUTHDATA_set_cmp_func(st, cmp) SKM_sk_set_cmp_func(KRB5_AUTHDATA, (st), (cmp)) # define sk_KRB5_AUTHDATA_dup(st) SKM_sk_dup(KRB5_AUTHDATA, st) # define sk_KRB5_AUTHDATA_pop_free(st, free_func) SKM_sk_pop_free(KRB5_AUTHDATA, (st), (free_func)) +# define sk_KRB5_AUTHDATA_deep_copy(st, copy_func, free_func) SKM_sk_deep_copy(KRB5_AUTHDATA, (st), (copy_func), (free_func)) # define sk_KRB5_AUTHDATA_shift(st) SKM_sk_shift(KRB5_AUTHDATA, (st)) # define sk_KRB5_AUTHDATA_pop(st) SKM_sk_pop(KRB5_AUTHDATA, (st)) # define sk_KRB5_AUTHDATA_sort(st) SKM_sk_sort(KRB5_AUTHDATA, (st)) @@ -982,6 +1042,7 @@ DECLARE_SPECIAL_STACK_OF(OPENSSL_BLOCK, void) # define sk_KRB5_AUTHENTBODY_set_cmp_func(st, cmp) SKM_sk_set_cmp_func(KRB5_AUTHENTBODY, (st), (cmp)) # define sk_KRB5_AUTHENTBODY_dup(st) SKM_sk_dup(KRB5_AUTHENTBODY, st) # define sk_KRB5_AUTHENTBODY_pop_free(st, free_func) SKM_sk_pop_free(KRB5_AUTHENTBODY, (st), (free_func)) +# define sk_KRB5_AUTHENTBODY_deep_copy(st, copy_func, free_func) SKM_sk_deep_copy(KRB5_AUTHENTBODY, (st), (copy_func), (free_func)) # define sk_KRB5_AUTHENTBODY_shift(st) SKM_sk_shift(KRB5_AUTHENTBODY, (st)) # define sk_KRB5_AUTHENTBODY_pop(st) SKM_sk_pop(KRB5_AUTHENTBODY, (st)) # define sk_KRB5_AUTHENTBODY_sort(st) SKM_sk_sort(KRB5_AUTHENTBODY, (st)) @@ -1003,6 +1064,7 @@ DECLARE_SPECIAL_STACK_OF(OPENSSL_BLOCK, void) # define sk_KRB5_CHECKSUM_set_cmp_func(st, cmp) SKM_sk_set_cmp_func(KRB5_CHECKSUM, (st), (cmp)) # define sk_KRB5_CHECKSUM_dup(st) SKM_sk_dup(KRB5_CHECKSUM, st) # define sk_KRB5_CHECKSUM_pop_free(st, free_func) SKM_sk_pop_free(KRB5_CHECKSUM, (st), (free_func)) +# define sk_KRB5_CHECKSUM_deep_copy(st, copy_func, free_func) SKM_sk_deep_copy(KRB5_CHECKSUM, (st), (copy_func), (free_func)) # define sk_KRB5_CHECKSUM_shift(st) SKM_sk_shift(KRB5_CHECKSUM, (st)) # define sk_KRB5_CHECKSUM_pop(st) SKM_sk_pop(KRB5_CHECKSUM, (st)) # define sk_KRB5_CHECKSUM_sort(st) SKM_sk_sort(KRB5_CHECKSUM, (st)) @@ -1024,6 +1086,7 @@ DECLARE_SPECIAL_STACK_OF(OPENSSL_BLOCK, void) # define sk_KRB5_ENCDATA_set_cmp_func(st, cmp) SKM_sk_set_cmp_func(KRB5_ENCDATA, (st), (cmp)) # define sk_KRB5_ENCDATA_dup(st) SKM_sk_dup(KRB5_ENCDATA, st) # define sk_KRB5_ENCDATA_pop_free(st, free_func) SKM_sk_pop_free(KRB5_ENCDATA, (st), (free_func)) +# define sk_KRB5_ENCDATA_deep_copy(st, copy_func, free_func) SKM_sk_deep_copy(KRB5_ENCDATA, (st), (copy_func), (free_func)) # define sk_KRB5_ENCDATA_shift(st) SKM_sk_shift(KRB5_ENCDATA, (st)) # define sk_KRB5_ENCDATA_pop(st) SKM_sk_pop(KRB5_ENCDATA, (st)) # define sk_KRB5_ENCDATA_sort(st) SKM_sk_sort(KRB5_ENCDATA, (st)) @@ -1045,6 +1108,7 @@ DECLARE_SPECIAL_STACK_OF(OPENSSL_BLOCK, void) # define sk_KRB5_ENCKEY_set_cmp_func(st, cmp) SKM_sk_set_cmp_func(KRB5_ENCKEY, (st), (cmp)) # define sk_KRB5_ENCKEY_dup(st) SKM_sk_dup(KRB5_ENCKEY, st) # define sk_KRB5_ENCKEY_pop_free(st, free_func) SKM_sk_pop_free(KRB5_ENCKEY, (st), (free_func)) +# define sk_KRB5_ENCKEY_deep_copy(st, copy_func, free_func) SKM_sk_deep_copy(KRB5_ENCKEY, (st), (copy_func), (free_func)) # define sk_KRB5_ENCKEY_shift(st) SKM_sk_shift(KRB5_ENCKEY, (st)) # define sk_KRB5_ENCKEY_pop(st) SKM_sk_pop(KRB5_ENCKEY, (st)) # define sk_KRB5_ENCKEY_sort(st) SKM_sk_sort(KRB5_ENCKEY, (st)) @@ -1066,6 +1130,7 @@ DECLARE_SPECIAL_STACK_OF(OPENSSL_BLOCK, void) # define sk_KRB5_PRINCNAME_set_cmp_func(st, cmp) SKM_sk_set_cmp_func(KRB5_PRINCNAME, (st), (cmp)) # define sk_KRB5_PRINCNAME_dup(st) SKM_sk_dup(KRB5_PRINCNAME, st) # define sk_KRB5_PRINCNAME_pop_free(st, free_func) SKM_sk_pop_free(KRB5_PRINCNAME, (st), (free_func)) +# define sk_KRB5_PRINCNAME_deep_copy(st, copy_func, free_func) SKM_sk_deep_copy(KRB5_PRINCNAME, (st), (copy_func), (free_func)) # define sk_KRB5_PRINCNAME_shift(st) SKM_sk_shift(KRB5_PRINCNAME, (st)) # define sk_KRB5_PRINCNAME_pop(st) SKM_sk_pop(KRB5_PRINCNAME, (st)) # define sk_KRB5_PRINCNAME_sort(st) SKM_sk_sort(KRB5_PRINCNAME, (st)) @@ -1087,6 +1152,7 @@ DECLARE_SPECIAL_STACK_OF(OPENSSL_BLOCK, void) # define sk_KRB5_TKTBODY_set_cmp_func(st, cmp) SKM_sk_set_cmp_func(KRB5_TKTBODY, (st), (cmp)) # define sk_KRB5_TKTBODY_dup(st) SKM_sk_dup(KRB5_TKTBODY, st) # define sk_KRB5_TKTBODY_pop_free(st, free_func) SKM_sk_pop_free(KRB5_TKTBODY, (st), (free_func)) +# define sk_KRB5_TKTBODY_deep_copy(st, copy_func, free_func) SKM_sk_deep_copy(KRB5_TKTBODY, (st), (copy_func), (free_func)) # define sk_KRB5_TKTBODY_shift(st) SKM_sk_shift(KRB5_TKTBODY, (st)) # define sk_KRB5_TKTBODY_pop(st) SKM_sk_pop(KRB5_TKTBODY, (st)) # define sk_KRB5_TKTBODY_sort(st) SKM_sk_sort(KRB5_TKTBODY, (st)) @@ -1108,6 +1174,7 @@ DECLARE_SPECIAL_STACK_OF(OPENSSL_BLOCK, void) # define sk_MEM_OBJECT_DATA_set_cmp_func(st, cmp) SKM_sk_set_cmp_func(MEM_OBJECT_DATA, (st), (cmp)) # define sk_MEM_OBJECT_DATA_dup(st) SKM_sk_dup(MEM_OBJECT_DATA, st) # define sk_MEM_OBJECT_DATA_pop_free(st, free_func) SKM_sk_pop_free(MEM_OBJECT_DATA, (st), (free_func)) +# define sk_MEM_OBJECT_DATA_deep_copy(st, copy_func, free_func) SKM_sk_deep_copy(MEM_OBJECT_DATA, (st), (copy_func), (free_func)) # define sk_MEM_OBJECT_DATA_shift(st) SKM_sk_shift(MEM_OBJECT_DATA, (st)) # define sk_MEM_OBJECT_DATA_pop(st) SKM_sk_pop(MEM_OBJECT_DATA, (st)) # define sk_MEM_OBJECT_DATA_sort(st) SKM_sk_sort(MEM_OBJECT_DATA, (st)) @@ -1129,6 +1196,7 @@ DECLARE_SPECIAL_STACK_OF(OPENSSL_BLOCK, void) # define sk_MIME_HEADER_set_cmp_func(st, cmp) SKM_sk_set_cmp_func(MIME_HEADER, (st), (cmp)) # define sk_MIME_HEADER_dup(st) SKM_sk_dup(MIME_HEADER, st) # define sk_MIME_HEADER_pop_free(st, free_func) SKM_sk_pop_free(MIME_HEADER, (st), (free_func)) +# define sk_MIME_HEADER_deep_copy(st, copy_func, free_func) SKM_sk_deep_copy(MIME_HEADER, (st), (copy_func), (free_func)) # define sk_MIME_HEADER_shift(st) SKM_sk_shift(MIME_HEADER, (st)) # define sk_MIME_HEADER_pop(st) SKM_sk_pop(MIME_HEADER, (st)) # define sk_MIME_HEADER_sort(st) SKM_sk_sort(MIME_HEADER, (st)) @@ -1150,6 +1218,7 @@ DECLARE_SPECIAL_STACK_OF(OPENSSL_BLOCK, void) # define sk_MIME_PARAM_set_cmp_func(st, cmp) SKM_sk_set_cmp_func(MIME_PARAM, (st), (cmp)) # define sk_MIME_PARAM_dup(st) SKM_sk_dup(MIME_PARAM, st) # define sk_MIME_PARAM_pop_free(st, free_func) SKM_sk_pop_free(MIME_PARAM, (st), (free_func)) +# define sk_MIME_PARAM_deep_copy(st, copy_func, free_func) SKM_sk_deep_copy(MIME_PARAM, (st), (copy_func), (free_func)) # define sk_MIME_PARAM_shift(st) SKM_sk_shift(MIME_PARAM, (st)) # define sk_MIME_PARAM_pop(st) SKM_sk_pop(MIME_PARAM, (st)) # define sk_MIME_PARAM_sort(st) SKM_sk_sort(MIME_PARAM, (st)) @@ -1171,6 +1240,7 @@ DECLARE_SPECIAL_STACK_OF(OPENSSL_BLOCK, void) # define sk_NAME_FUNCS_set_cmp_func(st, cmp) SKM_sk_set_cmp_func(NAME_FUNCS, (st), (cmp)) # define sk_NAME_FUNCS_dup(st) SKM_sk_dup(NAME_FUNCS, st) # define sk_NAME_FUNCS_pop_free(st, free_func) SKM_sk_pop_free(NAME_FUNCS, (st), (free_func)) +# define sk_NAME_FUNCS_deep_copy(st, copy_func, free_func) SKM_sk_deep_copy(NAME_FUNCS, (st), (copy_func), (free_func)) # define sk_NAME_FUNCS_shift(st) SKM_sk_shift(NAME_FUNCS, (st)) # define sk_NAME_FUNCS_pop(st) SKM_sk_pop(NAME_FUNCS, (st)) # define sk_NAME_FUNCS_sort(st) SKM_sk_sort(NAME_FUNCS, (st)) @@ -1192,6 +1262,7 @@ DECLARE_SPECIAL_STACK_OF(OPENSSL_BLOCK, void) # define sk_OCSP_CERTID_set_cmp_func(st, cmp) SKM_sk_set_cmp_func(OCSP_CERTID, (st), (cmp)) # define sk_OCSP_CERTID_dup(st) SKM_sk_dup(OCSP_CERTID, st) # define sk_OCSP_CERTID_pop_free(st, free_func) SKM_sk_pop_free(OCSP_CERTID, (st), (free_func)) +# define sk_OCSP_CERTID_deep_copy(st, copy_func, free_func) SKM_sk_deep_copy(OCSP_CERTID, (st), (copy_func), (free_func)) # define sk_OCSP_CERTID_shift(st) SKM_sk_shift(OCSP_CERTID, (st)) # define sk_OCSP_CERTID_pop(st) SKM_sk_pop(OCSP_CERTID, (st)) # define sk_OCSP_CERTID_sort(st) SKM_sk_sort(OCSP_CERTID, (st)) @@ -1213,6 +1284,7 @@ DECLARE_SPECIAL_STACK_OF(OPENSSL_BLOCK, void) # define sk_OCSP_ONEREQ_set_cmp_func(st, cmp) SKM_sk_set_cmp_func(OCSP_ONEREQ, (st), (cmp)) # define sk_OCSP_ONEREQ_dup(st) SKM_sk_dup(OCSP_ONEREQ, st) # define sk_OCSP_ONEREQ_pop_free(st, free_func) SKM_sk_pop_free(OCSP_ONEREQ, (st), (free_func)) +# define sk_OCSP_ONEREQ_deep_copy(st, copy_func, free_func) SKM_sk_deep_copy(OCSP_ONEREQ, (st), (copy_func), (free_func)) # define sk_OCSP_ONEREQ_shift(st) SKM_sk_shift(OCSP_ONEREQ, (st)) # define sk_OCSP_ONEREQ_pop(st) SKM_sk_pop(OCSP_ONEREQ, (st)) # define sk_OCSP_ONEREQ_sort(st) SKM_sk_sort(OCSP_ONEREQ, (st)) @@ -1234,6 +1306,7 @@ DECLARE_SPECIAL_STACK_OF(OPENSSL_BLOCK, void) # define sk_OCSP_RESPID_set_cmp_func(st, cmp) SKM_sk_set_cmp_func(OCSP_RESPID, (st), (cmp)) # define sk_OCSP_RESPID_dup(st) SKM_sk_dup(OCSP_RESPID, st) # define sk_OCSP_RESPID_pop_free(st, free_func) SKM_sk_pop_free(OCSP_RESPID, (st), (free_func)) +# define sk_OCSP_RESPID_deep_copy(st, copy_func, free_func) SKM_sk_deep_copy(OCSP_RESPID, (st), (copy_func), (free_func)) # define sk_OCSP_RESPID_shift(st) SKM_sk_shift(OCSP_RESPID, (st)) # define sk_OCSP_RESPID_pop(st) SKM_sk_pop(OCSP_RESPID, (st)) # define sk_OCSP_RESPID_sort(st) SKM_sk_sort(OCSP_RESPID, (st)) @@ -1255,6 +1328,7 @@ DECLARE_SPECIAL_STACK_OF(OPENSSL_BLOCK, void) # define sk_OCSP_SINGLERESP_set_cmp_func(st, cmp) SKM_sk_set_cmp_func(OCSP_SINGLERESP, (st), (cmp)) # define sk_OCSP_SINGLERESP_dup(st) SKM_sk_dup(OCSP_SINGLERESP, st) # define sk_OCSP_SINGLERESP_pop_free(st, free_func) SKM_sk_pop_free(OCSP_SINGLERESP, (st), (free_func)) +# define sk_OCSP_SINGLERESP_deep_copy(st, copy_func, free_func) SKM_sk_deep_copy(OCSP_SINGLERESP, (st), (copy_func), (free_func)) # define sk_OCSP_SINGLERESP_shift(st) SKM_sk_shift(OCSP_SINGLERESP, (st)) # define sk_OCSP_SINGLERESP_pop(st) SKM_sk_pop(OCSP_SINGLERESP, (st)) # define sk_OCSP_SINGLERESP_sort(st) SKM_sk_sort(OCSP_SINGLERESP, (st)) @@ -1276,6 +1350,7 @@ DECLARE_SPECIAL_STACK_OF(OPENSSL_BLOCK, void) # define sk_PKCS12_SAFEBAG_set_cmp_func(st, cmp) SKM_sk_set_cmp_func(PKCS12_SAFEBAG, (st), (cmp)) # define sk_PKCS12_SAFEBAG_dup(st) SKM_sk_dup(PKCS12_SAFEBAG, st) # define sk_PKCS12_SAFEBAG_pop_free(st, free_func) SKM_sk_pop_free(PKCS12_SAFEBAG, (st), (free_func)) +# define sk_PKCS12_SAFEBAG_deep_copy(st, copy_func, free_func) SKM_sk_deep_copy(PKCS12_SAFEBAG, (st), (copy_func), (free_func)) # define sk_PKCS12_SAFEBAG_shift(st) SKM_sk_shift(PKCS12_SAFEBAG, (st)) # define sk_PKCS12_SAFEBAG_pop(st) SKM_sk_pop(PKCS12_SAFEBAG, (st)) # define sk_PKCS12_SAFEBAG_sort(st) SKM_sk_sort(PKCS12_SAFEBAG, (st)) @@ -1297,6 +1372,7 @@ DECLARE_SPECIAL_STACK_OF(OPENSSL_BLOCK, void) # define sk_PKCS7_set_cmp_func(st, cmp) SKM_sk_set_cmp_func(PKCS7, (st), (cmp)) # define sk_PKCS7_dup(st) SKM_sk_dup(PKCS7, st) # define sk_PKCS7_pop_free(st, free_func) SKM_sk_pop_free(PKCS7, (st), (free_func)) +# define sk_PKCS7_deep_copy(st, copy_func, free_func) SKM_sk_deep_copy(PKCS7, (st), (copy_func), (free_func)) # define sk_PKCS7_shift(st) SKM_sk_shift(PKCS7, (st)) # define sk_PKCS7_pop(st) SKM_sk_pop(PKCS7, (st)) # define sk_PKCS7_sort(st) SKM_sk_sort(PKCS7, (st)) @@ -1318,6 +1394,7 @@ DECLARE_SPECIAL_STACK_OF(OPENSSL_BLOCK, void) # define sk_PKCS7_RECIP_INFO_set_cmp_func(st, cmp) SKM_sk_set_cmp_func(PKCS7_RECIP_INFO, (st), (cmp)) # define sk_PKCS7_RECIP_INFO_dup(st) SKM_sk_dup(PKCS7_RECIP_INFO, st) # define sk_PKCS7_RECIP_INFO_pop_free(st, free_func) SKM_sk_pop_free(PKCS7_RECIP_INFO, (st), (free_func)) +# define sk_PKCS7_RECIP_INFO_deep_copy(st, copy_func, free_func) SKM_sk_deep_copy(PKCS7_RECIP_INFO, (st), (copy_func), (free_func)) # define sk_PKCS7_RECIP_INFO_shift(st) SKM_sk_shift(PKCS7_RECIP_INFO, (st)) # define sk_PKCS7_RECIP_INFO_pop(st) SKM_sk_pop(PKCS7_RECIP_INFO, (st)) # define sk_PKCS7_RECIP_INFO_sort(st) SKM_sk_sort(PKCS7_RECIP_INFO, (st)) @@ -1339,6 +1416,7 @@ DECLARE_SPECIAL_STACK_OF(OPENSSL_BLOCK, void) # define sk_PKCS7_SIGNER_INFO_set_cmp_func(st, cmp) SKM_sk_set_cmp_func(PKCS7_SIGNER_INFO, (st), (cmp)) # define sk_PKCS7_SIGNER_INFO_dup(st) SKM_sk_dup(PKCS7_SIGNER_INFO, st) # define sk_PKCS7_SIGNER_INFO_pop_free(st, free_func) SKM_sk_pop_free(PKCS7_SIGNER_INFO, (st), (free_func)) +# define sk_PKCS7_SIGNER_INFO_deep_copy(st, copy_func, free_func) SKM_sk_deep_copy(PKCS7_SIGNER_INFO, (st), (copy_func), (free_func)) # define sk_PKCS7_SIGNER_INFO_shift(st) SKM_sk_shift(PKCS7_SIGNER_INFO, (st)) # define sk_PKCS7_SIGNER_INFO_pop(st) SKM_sk_pop(PKCS7_SIGNER_INFO, (st)) # define sk_PKCS7_SIGNER_INFO_sort(st) SKM_sk_sort(PKCS7_SIGNER_INFO, (st)) @@ -1360,6 +1438,7 @@ DECLARE_SPECIAL_STACK_OF(OPENSSL_BLOCK, void) # define sk_POLICYINFO_set_cmp_func(st, cmp) SKM_sk_set_cmp_func(POLICYINFO, (st), (cmp)) # define sk_POLICYINFO_dup(st) SKM_sk_dup(POLICYINFO, st) # define sk_POLICYINFO_pop_free(st, free_func) SKM_sk_pop_free(POLICYINFO, (st), (free_func)) +# define sk_POLICYINFO_deep_copy(st, copy_func, free_func) SKM_sk_deep_copy(POLICYINFO, (st), (copy_func), (free_func)) # define sk_POLICYINFO_shift(st) SKM_sk_shift(POLICYINFO, (st)) # define sk_POLICYINFO_pop(st) SKM_sk_pop(POLICYINFO, (st)) # define sk_POLICYINFO_sort(st) SKM_sk_sort(POLICYINFO, (st)) @@ -1381,6 +1460,7 @@ DECLARE_SPECIAL_STACK_OF(OPENSSL_BLOCK, void) # define sk_POLICYQUALINFO_set_cmp_func(st, cmp) SKM_sk_set_cmp_func(POLICYQUALINFO, (st), (cmp)) # define sk_POLICYQUALINFO_dup(st) SKM_sk_dup(POLICYQUALINFO, st) # define sk_POLICYQUALINFO_pop_free(st, free_func) SKM_sk_pop_free(POLICYQUALINFO, (st), (free_func)) +# define sk_POLICYQUALINFO_deep_copy(st, copy_func, free_func) SKM_sk_deep_copy(POLICYQUALINFO, (st), (copy_func), (free_func)) # define sk_POLICYQUALINFO_shift(st) SKM_sk_shift(POLICYQUALINFO, (st)) # define sk_POLICYQUALINFO_pop(st) SKM_sk_pop(POLICYQUALINFO, (st)) # define sk_POLICYQUALINFO_sort(st) SKM_sk_sort(POLICYQUALINFO, (st)) @@ -1402,10 +1482,33 @@ DECLARE_SPECIAL_STACK_OF(OPENSSL_BLOCK, void) # define sk_POLICY_MAPPING_set_cmp_func(st, cmp) SKM_sk_set_cmp_func(POLICY_MAPPING, (st), (cmp)) # define sk_POLICY_MAPPING_dup(st) SKM_sk_dup(POLICY_MAPPING, st) # define sk_POLICY_MAPPING_pop_free(st, free_func) SKM_sk_pop_free(POLICY_MAPPING, (st), (free_func)) +# define sk_POLICY_MAPPING_deep_copy(st, copy_func, free_func) SKM_sk_deep_copy(POLICY_MAPPING, (st), (copy_func), (free_func)) # define sk_POLICY_MAPPING_shift(st) SKM_sk_shift(POLICY_MAPPING, (st)) # define sk_POLICY_MAPPING_pop(st) SKM_sk_pop(POLICY_MAPPING, (st)) # define sk_POLICY_MAPPING_sort(st) SKM_sk_sort(POLICY_MAPPING, (st)) # define sk_POLICY_MAPPING_is_sorted(st) SKM_sk_is_sorted(POLICY_MAPPING, (st)) +# define sk_SCT_new(cmp) SKM_sk_new(SCT, (cmp)) +# define sk_SCT_new_null() SKM_sk_new_null(SCT) +# define sk_SCT_free(st) SKM_sk_free(SCT, (st)) +# define sk_SCT_num(st) SKM_sk_num(SCT, (st)) +# define sk_SCT_value(st, i) SKM_sk_value(SCT, (st), (i)) +# define sk_SCT_set(st, i, val) SKM_sk_set(SCT, (st), (i), (val)) +# define sk_SCT_zero(st) SKM_sk_zero(SCT, (st)) +# define sk_SCT_push(st, val) SKM_sk_push(SCT, (st), (val)) +# define sk_SCT_unshift(st, val) SKM_sk_unshift(SCT, (st), (val)) +# define sk_SCT_find(st, val) SKM_sk_find(SCT, (st), (val)) +# define sk_SCT_find_ex(st, val) SKM_sk_find_ex(SCT, (st), (val)) +# define sk_SCT_delete(st, i) SKM_sk_delete(SCT, (st), (i)) +# define sk_SCT_delete_ptr(st, ptr) SKM_sk_delete_ptr(SCT, (st), (ptr)) +# define sk_SCT_insert(st, val, i) SKM_sk_insert(SCT, (st), (val), (i)) +# define sk_SCT_set_cmp_func(st, cmp) SKM_sk_set_cmp_func(SCT, (st), (cmp)) +# define sk_SCT_dup(st) SKM_sk_dup(SCT, st) +# define sk_SCT_pop_free(st, free_func) SKM_sk_pop_free(SCT, (st), (free_func)) +# define sk_SCT_deep_copy(st, copy_func, free_func) SKM_sk_deep_copy(SCT, (st), (copy_func), (free_func)) +# define sk_SCT_shift(st) SKM_sk_shift(SCT, (st)) +# define sk_SCT_pop(st) SKM_sk_pop(SCT, (st)) +# define sk_SCT_sort(st) SKM_sk_sort(SCT, (st)) +# define sk_SCT_is_sorted(st) SKM_sk_is_sorted(SCT, (st)) # define sk_SRP_gN_new(cmp) SKM_sk_new(SRP_gN, (cmp)) # define sk_SRP_gN_new_null() SKM_sk_new_null(SRP_gN) # define sk_SRP_gN_free(st) SKM_sk_free(SRP_gN, (st)) @@ -1423,6 +1526,7 @@ DECLARE_SPECIAL_STACK_OF(OPENSSL_BLOCK, void) # define sk_SRP_gN_set_cmp_func(st, cmp) SKM_sk_set_cmp_func(SRP_gN, (st), (cmp)) # define sk_SRP_gN_dup(st) SKM_sk_dup(SRP_gN, st) # define sk_SRP_gN_pop_free(st, free_func) SKM_sk_pop_free(SRP_gN, (st), (free_func)) +# define sk_SRP_gN_deep_copy(st, copy_func, free_func) SKM_sk_deep_copy(SRP_gN, (st), (copy_func), (free_func)) # define sk_SRP_gN_shift(st) SKM_sk_shift(SRP_gN, (st)) # define sk_SRP_gN_pop(st) SKM_sk_pop(SRP_gN, (st)) # define sk_SRP_gN_sort(st) SKM_sk_sort(SRP_gN, (st)) @@ -1444,6 +1548,7 @@ DECLARE_SPECIAL_STACK_OF(OPENSSL_BLOCK, void) # define sk_SRP_gN_cache_set_cmp_func(st, cmp) SKM_sk_set_cmp_func(SRP_gN_cache, (st), (cmp)) # define sk_SRP_gN_cache_dup(st) SKM_sk_dup(SRP_gN_cache, st) # define sk_SRP_gN_cache_pop_free(st, free_func) SKM_sk_pop_free(SRP_gN_cache, (st), (free_func)) +# define sk_SRP_gN_cache_deep_copy(st, copy_func, free_func) SKM_sk_deep_copy(SRP_gN_cache, (st), (copy_func), (free_func)) # define sk_SRP_gN_cache_shift(st) SKM_sk_shift(SRP_gN_cache, (st)) # define sk_SRP_gN_cache_pop(st) SKM_sk_pop(SRP_gN_cache, (st)) # define sk_SRP_gN_cache_sort(st) SKM_sk_sort(SRP_gN_cache, (st)) @@ -1465,6 +1570,7 @@ DECLARE_SPECIAL_STACK_OF(OPENSSL_BLOCK, void) # define sk_SRP_user_pwd_set_cmp_func(st, cmp) SKM_sk_set_cmp_func(SRP_user_pwd, (st), (cmp)) # define sk_SRP_user_pwd_dup(st) SKM_sk_dup(SRP_user_pwd, st) # define sk_SRP_user_pwd_pop_free(st, free_func) SKM_sk_pop_free(SRP_user_pwd, (st), (free_func)) +# define sk_SRP_user_pwd_deep_copy(st, copy_func, free_func) SKM_sk_deep_copy(SRP_user_pwd, (st), (copy_func), (free_func)) # define sk_SRP_user_pwd_shift(st) SKM_sk_shift(SRP_user_pwd, (st)) # define sk_SRP_user_pwd_pop(st) SKM_sk_pop(SRP_user_pwd, (st)) # define sk_SRP_user_pwd_sort(st) SKM_sk_sort(SRP_user_pwd, (st)) @@ -1486,6 +1592,7 @@ DECLARE_SPECIAL_STACK_OF(OPENSSL_BLOCK, void) # define sk_SRTP_PROTECTION_PROFILE_set_cmp_func(st, cmp) SKM_sk_set_cmp_func(SRTP_PROTECTION_PROFILE, (st), (cmp)) # define sk_SRTP_PROTECTION_PROFILE_dup(st) SKM_sk_dup(SRTP_PROTECTION_PROFILE, st) # define sk_SRTP_PROTECTION_PROFILE_pop_free(st, free_func) SKM_sk_pop_free(SRTP_PROTECTION_PROFILE, (st), (free_func)) +# define sk_SRTP_PROTECTION_PROFILE_deep_copy(st, copy_func, free_func) SKM_sk_deep_copy(SRTP_PROTECTION_PROFILE, (st), (copy_func), (free_func)) # define sk_SRTP_PROTECTION_PROFILE_shift(st) SKM_sk_shift(SRTP_PROTECTION_PROFILE, (st)) # define sk_SRTP_PROTECTION_PROFILE_pop(st) SKM_sk_pop(SRTP_PROTECTION_PROFILE, (st)) # define sk_SRTP_PROTECTION_PROFILE_sort(st) SKM_sk_sort(SRTP_PROTECTION_PROFILE, (st)) @@ -1507,6 +1614,7 @@ DECLARE_SPECIAL_STACK_OF(OPENSSL_BLOCK, void) # define sk_SSL_CIPHER_set_cmp_func(st, cmp) SKM_sk_set_cmp_func(SSL_CIPHER, (st), (cmp)) # define sk_SSL_CIPHER_dup(st) SKM_sk_dup(SSL_CIPHER, st) # define sk_SSL_CIPHER_pop_free(st, free_func) SKM_sk_pop_free(SSL_CIPHER, (st), (free_func)) +# define sk_SSL_CIPHER_deep_copy(st, copy_func, free_func) SKM_sk_deep_copy(SSL_CIPHER, (st), (copy_func), (free_func)) # define sk_SSL_CIPHER_shift(st) SKM_sk_shift(SSL_CIPHER, (st)) # define sk_SSL_CIPHER_pop(st) SKM_sk_pop(SSL_CIPHER, (st)) # define sk_SSL_CIPHER_sort(st) SKM_sk_sort(SSL_CIPHER, (st)) @@ -1528,6 +1636,7 @@ DECLARE_SPECIAL_STACK_OF(OPENSSL_BLOCK, void) # define sk_SSL_COMP_set_cmp_func(st, cmp) SKM_sk_set_cmp_func(SSL_COMP, (st), (cmp)) # define sk_SSL_COMP_dup(st) SKM_sk_dup(SSL_COMP, st) # define sk_SSL_COMP_pop_free(st, free_func) SKM_sk_pop_free(SSL_COMP, (st), (free_func)) +# define sk_SSL_COMP_deep_copy(st, copy_func, free_func) SKM_sk_deep_copy(SSL_COMP, (st), (copy_func), (free_func)) # define sk_SSL_COMP_shift(st) SKM_sk_shift(SSL_COMP, (st)) # define sk_SSL_COMP_pop(st) SKM_sk_pop(SSL_COMP, (st)) # define sk_SSL_COMP_sort(st) SKM_sk_sort(SSL_COMP, (st)) @@ -1549,6 +1658,7 @@ DECLARE_SPECIAL_STACK_OF(OPENSSL_BLOCK, void) # define sk_STACK_OF_X509_NAME_ENTRY_set_cmp_func(st, cmp) SKM_sk_set_cmp_func(STACK_OF_X509_NAME_ENTRY, (st), (cmp)) # define sk_STACK_OF_X509_NAME_ENTRY_dup(st) SKM_sk_dup(STACK_OF_X509_NAME_ENTRY, st) # define sk_STACK_OF_X509_NAME_ENTRY_pop_free(st, free_func) SKM_sk_pop_free(STACK_OF_X509_NAME_ENTRY, (st), (free_func)) +# define sk_STACK_OF_X509_NAME_ENTRY_deep_copy(st, copy_func, free_func) SKM_sk_deep_copy(STACK_OF_X509_NAME_ENTRY, (st), (copy_func), (free_func)) # define sk_STACK_OF_X509_NAME_ENTRY_shift(st) SKM_sk_shift(STACK_OF_X509_NAME_ENTRY, (st)) # define sk_STACK_OF_X509_NAME_ENTRY_pop(st) SKM_sk_pop(STACK_OF_X509_NAME_ENTRY, (st)) # define sk_STACK_OF_X509_NAME_ENTRY_sort(st) SKM_sk_sort(STACK_OF_X509_NAME_ENTRY, (st)) @@ -1570,6 +1680,7 @@ DECLARE_SPECIAL_STACK_OF(OPENSSL_BLOCK, void) # define sk_STORE_ATTR_INFO_set_cmp_func(st, cmp) SKM_sk_set_cmp_func(STORE_ATTR_INFO, (st), (cmp)) # define sk_STORE_ATTR_INFO_dup(st) SKM_sk_dup(STORE_ATTR_INFO, st) # define sk_STORE_ATTR_INFO_pop_free(st, free_func) SKM_sk_pop_free(STORE_ATTR_INFO, (st), (free_func)) +# define sk_STORE_ATTR_INFO_deep_copy(st, copy_func, free_func) SKM_sk_deep_copy(STORE_ATTR_INFO, (st), (copy_func), (free_func)) # define sk_STORE_ATTR_INFO_shift(st) SKM_sk_shift(STORE_ATTR_INFO, (st)) # define sk_STORE_ATTR_INFO_pop(st) SKM_sk_pop(STORE_ATTR_INFO, (st)) # define sk_STORE_ATTR_INFO_sort(st) SKM_sk_sort(STORE_ATTR_INFO, (st)) @@ -1591,6 +1702,7 @@ DECLARE_SPECIAL_STACK_OF(OPENSSL_BLOCK, void) # define sk_STORE_OBJECT_set_cmp_func(st, cmp) SKM_sk_set_cmp_func(STORE_OBJECT, (st), (cmp)) # define sk_STORE_OBJECT_dup(st) SKM_sk_dup(STORE_OBJECT, st) # define sk_STORE_OBJECT_pop_free(st, free_func) SKM_sk_pop_free(STORE_OBJECT, (st), (free_func)) +# define sk_STORE_OBJECT_deep_copy(st, copy_func, free_func) SKM_sk_deep_copy(STORE_OBJECT, (st), (copy_func), (free_func)) # define sk_STORE_OBJECT_shift(st) SKM_sk_shift(STORE_OBJECT, (st)) # define sk_STORE_OBJECT_pop(st) SKM_sk_pop(STORE_OBJECT, (st)) # define sk_STORE_OBJECT_sort(st) SKM_sk_sort(STORE_OBJECT, (st)) @@ -1612,6 +1724,7 @@ DECLARE_SPECIAL_STACK_OF(OPENSSL_BLOCK, void) # define sk_SXNETID_set_cmp_func(st, cmp) SKM_sk_set_cmp_func(SXNETID, (st), (cmp)) # define sk_SXNETID_dup(st) SKM_sk_dup(SXNETID, st) # define sk_SXNETID_pop_free(st, free_func) SKM_sk_pop_free(SXNETID, (st), (free_func)) +# define sk_SXNETID_deep_copy(st, copy_func, free_func) SKM_sk_deep_copy(SXNETID, (st), (copy_func), (free_func)) # define sk_SXNETID_shift(st) SKM_sk_shift(SXNETID, (st)) # define sk_SXNETID_pop(st) SKM_sk_pop(SXNETID, (st)) # define sk_SXNETID_sort(st) SKM_sk_sort(SXNETID, (st)) @@ -1633,6 +1746,7 @@ DECLARE_SPECIAL_STACK_OF(OPENSSL_BLOCK, void) # define sk_UI_STRING_set_cmp_func(st, cmp) SKM_sk_set_cmp_func(UI_STRING, (st), (cmp)) # define sk_UI_STRING_dup(st) SKM_sk_dup(UI_STRING, st) # define sk_UI_STRING_pop_free(st, free_func) SKM_sk_pop_free(UI_STRING, (st), (free_func)) +# define sk_UI_STRING_deep_copy(st, copy_func, free_func) SKM_sk_deep_copy(UI_STRING, (st), (copy_func), (free_func)) # define sk_UI_STRING_shift(st) SKM_sk_shift(UI_STRING, (st)) # define sk_UI_STRING_pop(st) SKM_sk_pop(UI_STRING, (st)) # define sk_UI_STRING_sort(st) SKM_sk_sort(UI_STRING, (st)) @@ -1654,6 +1768,7 @@ DECLARE_SPECIAL_STACK_OF(OPENSSL_BLOCK, void) # define sk_X509_set_cmp_func(st, cmp) SKM_sk_set_cmp_func(X509, (st), (cmp)) # define sk_X509_dup(st) SKM_sk_dup(X509, st) # define sk_X509_pop_free(st, free_func) SKM_sk_pop_free(X509, (st), (free_func)) +# define sk_X509_deep_copy(st, copy_func, free_func) SKM_sk_deep_copy(X509, (st), (copy_func), (free_func)) # define sk_X509_shift(st) SKM_sk_shift(X509, (st)) # define sk_X509_pop(st) SKM_sk_pop(X509, (st)) # define sk_X509_sort(st) SKM_sk_sort(X509, (st)) @@ -1675,6 +1790,7 @@ DECLARE_SPECIAL_STACK_OF(OPENSSL_BLOCK, void) # define sk_X509V3_EXT_METHOD_set_cmp_func(st, cmp) SKM_sk_set_cmp_func(X509V3_EXT_METHOD, (st), (cmp)) # define sk_X509V3_EXT_METHOD_dup(st) SKM_sk_dup(X509V3_EXT_METHOD, st) # define sk_X509V3_EXT_METHOD_pop_free(st, free_func) SKM_sk_pop_free(X509V3_EXT_METHOD, (st), (free_func)) +# define sk_X509V3_EXT_METHOD_deep_copy(st, copy_func, free_func) SKM_sk_deep_copy(X509V3_EXT_METHOD, (st), (copy_func), (free_func)) # define sk_X509V3_EXT_METHOD_shift(st) SKM_sk_shift(X509V3_EXT_METHOD, (st)) # define sk_X509V3_EXT_METHOD_pop(st) SKM_sk_pop(X509V3_EXT_METHOD, (st)) # define sk_X509V3_EXT_METHOD_sort(st) SKM_sk_sort(X509V3_EXT_METHOD, (st)) @@ -1696,6 +1812,7 @@ DECLARE_SPECIAL_STACK_OF(OPENSSL_BLOCK, void) # define sk_X509_ALGOR_set_cmp_func(st, cmp) SKM_sk_set_cmp_func(X509_ALGOR, (st), (cmp)) # define sk_X509_ALGOR_dup(st) SKM_sk_dup(X509_ALGOR, st) # define sk_X509_ALGOR_pop_free(st, free_func) SKM_sk_pop_free(X509_ALGOR, (st), (free_func)) +# define sk_X509_ALGOR_deep_copy(st, copy_func, free_func) SKM_sk_deep_copy(X509_ALGOR, (st), (copy_func), (free_func)) # define sk_X509_ALGOR_shift(st) SKM_sk_shift(X509_ALGOR, (st)) # define sk_X509_ALGOR_pop(st) SKM_sk_pop(X509_ALGOR, (st)) # define sk_X509_ALGOR_sort(st) SKM_sk_sort(X509_ALGOR, (st)) @@ -1717,6 +1834,7 @@ DECLARE_SPECIAL_STACK_OF(OPENSSL_BLOCK, void) # define sk_X509_ATTRIBUTE_set_cmp_func(st, cmp) SKM_sk_set_cmp_func(X509_ATTRIBUTE, (st), (cmp)) # define sk_X509_ATTRIBUTE_dup(st) SKM_sk_dup(X509_ATTRIBUTE, st) # define sk_X509_ATTRIBUTE_pop_free(st, free_func) SKM_sk_pop_free(X509_ATTRIBUTE, (st), (free_func)) +# define sk_X509_ATTRIBUTE_deep_copy(st, copy_func, free_func) SKM_sk_deep_copy(X509_ATTRIBUTE, (st), (copy_func), (free_func)) # define sk_X509_ATTRIBUTE_shift(st) SKM_sk_shift(X509_ATTRIBUTE, (st)) # define sk_X509_ATTRIBUTE_pop(st) SKM_sk_pop(X509_ATTRIBUTE, (st)) # define sk_X509_ATTRIBUTE_sort(st) SKM_sk_sort(X509_ATTRIBUTE, (st)) @@ -1738,6 +1856,7 @@ DECLARE_SPECIAL_STACK_OF(OPENSSL_BLOCK, void) # define sk_X509_CRL_set_cmp_func(st, cmp) SKM_sk_set_cmp_func(X509_CRL, (st), (cmp)) # define sk_X509_CRL_dup(st) SKM_sk_dup(X509_CRL, st) # define sk_X509_CRL_pop_free(st, free_func) SKM_sk_pop_free(X509_CRL, (st), (free_func)) +# define sk_X509_CRL_deep_copy(st, copy_func, free_func) SKM_sk_deep_copy(X509_CRL, (st), (copy_func), (free_func)) # define sk_X509_CRL_shift(st) SKM_sk_shift(X509_CRL, (st)) # define sk_X509_CRL_pop(st) SKM_sk_pop(X509_CRL, (st)) # define sk_X509_CRL_sort(st) SKM_sk_sort(X509_CRL, (st)) @@ -1759,6 +1878,7 @@ DECLARE_SPECIAL_STACK_OF(OPENSSL_BLOCK, void) # define sk_X509_EXTENSION_set_cmp_func(st, cmp) SKM_sk_set_cmp_func(X509_EXTENSION, (st), (cmp)) # define sk_X509_EXTENSION_dup(st) SKM_sk_dup(X509_EXTENSION, st) # define sk_X509_EXTENSION_pop_free(st, free_func) SKM_sk_pop_free(X509_EXTENSION, (st), (free_func)) +# define sk_X509_EXTENSION_deep_copy(st, copy_func, free_func) SKM_sk_deep_copy(X509_EXTENSION, (st), (copy_func), (free_func)) # define sk_X509_EXTENSION_shift(st) SKM_sk_shift(X509_EXTENSION, (st)) # define sk_X509_EXTENSION_pop(st) SKM_sk_pop(X509_EXTENSION, (st)) # define sk_X509_EXTENSION_sort(st) SKM_sk_sort(X509_EXTENSION, (st)) @@ -1780,6 +1900,7 @@ DECLARE_SPECIAL_STACK_OF(OPENSSL_BLOCK, void) # define sk_X509_INFO_set_cmp_func(st, cmp) SKM_sk_set_cmp_func(X509_INFO, (st), (cmp)) # define sk_X509_INFO_dup(st) SKM_sk_dup(X509_INFO, st) # define sk_X509_INFO_pop_free(st, free_func) SKM_sk_pop_free(X509_INFO, (st), (free_func)) +# define sk_X509_INFO_deep_copy(st, copy_func, free_func) SKM_sk_deep_copy(X509_INFO, (st), (copy_func), (free_func)) # define sk_X509_INFO_shift(st) SKM_sk_shift(X509_INFO, (st)) # define sk_X509_INFO_pop(st) SKM_sk_pop(X509_INFO, (st)) # define sk_X509_INFO_sort(st) SKM_sk_sort(X509_INFO, (st)) @@ -1801,6 +1922,7 @@ DECLARE_SPECIAL_STACK_OF(OPENSSL_BLOCK, void) # define sk_X509_LOOKUP_set_cmp_func(st, cmp) SKM_sk_set_cmp_func(X509_LOOKUP, (st), (cmp)) # define sk_X509_LOOKUP_dup(st) SKM_sk_dup(X509_LOOKUP, st) # define sk_X509_LOOKUP_pop_free(st, free_func) SKM_sk_pop_free(X509_LOOKUP, (st), (free_func)) +# define sk_X509_LOOKUP_deep_copy(st, copy_func, free_func) SKM_sk_deep_copy(X509_LOOKUP, (st), (copy_func), (free_func)) # define sk_X509_LOOKUP_shift(st) SKM_sk_shift(X509_LOOKUP, (st)) # define sk_X509_LOOKUP_pop(st) SKM_sk_pop(X509_LOOKUP, (st)) # define sk_X509_LOOKUP_sort(st) SKM_sk_sort(X509_LOOKUP, (st)) @@ -1822,6 +1944,7 @@ DECLARE_SPECIAL_STACK_OF(OPENSSL_BLOCK, void) # define sk_X509_NAME_set_cmp_func(st, cmp) SKM_sk_set_cmp_func(X509_NAME, (st), (cmp)) # define sk_X509_NAME_dup(st) SKM_sk_dup(X509_NAME, st) # define sk_X509_NAME_pop_free(st, free_func) SKM_sk_pop_free(X509_NAME, (st), (free_func)) +# define sk_X509_NAME_deep_copy(st, copy_func, free_func) SKM_sk_deep_copy(X509_NAME, (st), (copy_func), (free_func)) # define sk_X509_NAME_shift(st) SKM_sk_shift(X509_NAME, (st)) # define sk_X509_NAME_pop(st) SKM_sk_pop(X509_NAME, (st)) # define sk_X509_NAME_sort(st) SKM_sk_sort(X509_NAME, (st)) @@ -1843,6 +1966,7 @@ DECLARE_SPECIAL_STACK_OF(OPENSSL_BLOCK, void) # define sk_X509_NAME_ENTRY_set_cmp_func(st, cmp) SKM_sk_set_cmp_func(X509_NAME_ENTRY, (st), (cmp)) # define sk_X509_NAME_ENTRY_dup(st) SKM_sk_dup(X509_NAME_ENTRY, st) # define sk_X509_NAME_ENTRY_pop_free(st, free_func) SKM_sk_pop_free(X509_NAME_ENTRY, (st), (free_func)) +# define sk_X509_NAME_ENTRY_deep_copy(st, copy_func, free_func) SKM_sk_deep_copy(X509_NAME_ENTRY, (st), (copy_func), (free_func)) # define sk_X509_NAME_ENTRY_shift(st) SKM_sk_shift(X509_NAME_ENTRY, (st)) # define sk_X509_NAME_ENTRY_pop(st) SKM_sk_pop(X509_NAME_ENTRY, (st)) # define sk_X509_NAME_ENTRY_sort(st) SKM_sk_sort(X509_NAME_ENTRY, (st)) @@ -1864,6 +1988,7 @@ DECLARE_SPECIAL_STACK_OF(OPENSSL_BLOCK, void) # define sk_X509_OBJECT_set_cmp_func(st, cmp) SKM_sk_set_cmp_func(X509_OBJECT, (st), (cmp)) # define sk_X509_OBJECT_dup(st) SKM_sk_dup(X509_OBJECT, st) # define sk_X509_OBJECT_pop_free(st, free_func) SKM_sk_pop_free(X509_OBJECT, (st), (free_func)) +# define sk_X509_OBJECT_deep_copy(st, copy_func, free_func) SKM_sk_deep_copy(X509_OBJECT, (st), (copy_func), (free_func)) # define sk_X509_OBJECT_shift(st) SKM_sk_shift(X509_OBJECT, (st)) # define sk_X509_OBJECT_pop(st) SKM_sk_pop(X509_OBJECT, (st)) # define sk_X509_OBJECT_sort(st) SKM_sk_sort(X509_OBJECT, (st)) @@ -1885,6 +2010,7 @@ DECLARE_SPECIAL_STACK_OF(OPENSSL_BLOCK, void) # define sk_X509_POLICY_DATA_set_cmp_func(st, cmp) SKM_sk_set_cmp_func(X509_POLICY_DATA, (st), (cmp)) # define sk_X509_POLICY_DATA_dup(st) SKM_sk_dup(X509_POLICY_DATA, st) # define sk_X509_POLICY_DATA_pop_free(st, free_func) SKM_sk_pop_free(X509_POLICY_DATA, (st), (free_func)) +# define sk_X509_POLICY_DATA_deep_copy(st, copy_func, free_func) SKM_sk_deep_copy(X509_POLICY_DATA, (st), (copy_func), (free_func)) # define sk_X509_POLICY_DATA_shift(st) SKM_sk_shift(X509_POLICY_DATA, (st)) # define sk_X509_POLICY_DATA_pop(st) SKM_sk_pop(X509_POLICY_DATA, (st)) # define sk_X509_POLICY_DATA_sort(st) SKM_sk_sort(X509_POLICY_DATA, (st)) @@ -1906,6 +2032,7 @@ DECLARE_SPECIAL_STACK_OF(OPENSSL_BLOCK, void) # define sk_X509_POLICY_NODE_set_cmp_func(st, cmp) SKM_sk_set_cmp_func(X509_POLICY_NODE, (st), (cmp)) # define sk_X509_POLICY_NODE_dup(st) SKM_sk_dup(X509_POLICY_NODE, st) # define sk_X509_POLICY_NODE_pop_free(st, free_func) SKM_sk_pop_free(X509_POLICY_NODE, (st), (free_func)) +# define sk_X509_POLICY_NODE_deep_copy(st, copy_func, free_func) SKM_sk_deep_copy(X509_POLICY_NODE, (st), (copy_func), (free_func)) # define sk_X509_POLICY_NODE_shift(st) SKM_sk_shift(X509_POLICY_NODE, (st)) # define sk_X509_POLICY_NODE_pop(st) SKM_sk_pop(X509_POLICY_NODE, (st)) # define sk_X509_POLICY_NODE_sort(st) SKM_sk_sort(X509_POLICY_NODE, (st)) @@ -1927,6 +2054,7 @@ DECLARE_SPECIAL_STACK_OF(OPENSSL_BLOCK, void) # define sk_X509_PURPOSE_set_cmp_func(st, cmp) SKM_sk_set_cmp_func(X509_PURPOSE, (st), (cmp)) # define sk_X509_PURPOSE_dup(st) SKM_sk_dup(X509_PURPOSE, st) # define sk_X509_PURPOSE_pop_free(st, free_func) SKM_sk_pop_free(X509_PURPOSE, (st), (free_func)) +# define sk_X509_PURPOSE_deep_copy(st, copy_func, free_func) SKM_sk_deep_copy(X509_PURPOSE, (st), (copy_func), (free_func)) # define sk_X509_PURPOSE_shift(st) SKM_sk_shift(X509_PURPOSE, (st)) # define sk_X509_PURPOSE_pop(st) SKM_sk_pop(X509_PURPOSE, (st)) # define sk_X509_PURPOSE_sort(st) SKM_sk_sort(X509_PURPOSE, (st)) @@ -1948,6 +2076,7 @@ DECLARE_SPECIAL_STACK_OF(OPENSSL_BLOCK, void) # define sk_X509_REVOKED_set_cmp_func(st, cmp) SKM_sk_set_cmp_func(X509_REVOKED, (st), (cmp)) # define sk_X509_REVOKED_dup(st) SKM_sk_dup(X509_REVOKED, st) # define sk_X509_REVOKED_pop_free(st, free_func) SKM_sk_pop_free(X509_REVOKED, (st), (free_func)) +# define sk_X509_REVOKED_deep_copy(st, copy_func, free_func) SKM_sk_deep_copy(X509_REVOKED, (st), (copy_func), (free_func)) # define sk_X509_REVOKED_shift(st) SKM_sk_shift(X509_REVOKED, (st)) # define sk_X509_REVOKED_pop(st) SKM_sk_pop(X509_REVOKED, (st)) # define sk_X509_REVOKED_sort(st) SKM_sk_sort(X509_REVOKED, (st)) @@ -1969,6 +2098,7 @@ DECLARE_SPECIAL_STACK_OF(OPENSSL_BLOCK, void) # define sk_X509_TRUST_set_cmp_func(st, cmp) SKM_sk_set_cmp_func(X509_TRUST, (st), (cmp)) # define sk_X509_TRUST_dup(st) SKM_sk_dup(X509_TRUST, st) # define sk_X509_TRUST_pop_free(st, free_func) SKM_sk_pop_free(X509_TRUST, (st), (free_func)) +# define sk_X509_TRUST_deep_copy(st, copy_func, free_func) SKM_sk_deep_copy(X509_TRUST, (st), (copy_func), (free_func)) # define sk_X509_TRUST_shift(st) SKM_sk_shift(X509_TRUST, (st)) # define sk_X509_TRUST_pop(st) SKM_sk_pop(X509_TRUST, (st)) # define sk_X509_TRUST_sort(st) SKM_sk_sort(X509_TRUST, (st)) @@ -1990,6 +2120,7 @@ DECLARE_SPECIAL_STACK_OF(OPENSSL_BLOCK, void) # define sk_X509_VERIFY_PARAM_set_cmp_func(st, cmp) SKM_sk_set_cmp_func(X509_VERIFY_PARAM, (st), (cmp)) # define sk_X509_VERIFY_PARAM_dup(st) SKM_sk_dup(X509_VERIFY_PARAM, st) # define sk_X509_VERIFY_PARAM_pop_free(st, free_func) SKM_sk_pop_free(X509_VERIFY_PARAM, (st), (free_func)) +# define sk_X509_VERIFY_PARAM_deep_copy(st, copy_func, free_func) SKM_sk_deep_copy(X509_VERIFY_PARAM, (st), (copy_func), (free_func)) # define sk_X509_VERIFY_PARAM_shift(st) SKM_sk_shift(X509_VERIFY_PARAM, (st)) # define sk_X509_VERIFY_PARAM_pop(st) SKM_sk_pop(X509_VERIFY_PARAM, (st)) # define sk_X509_VERIFY_PARAM_sort(st) SKM_sk_sort(X509_VERIFY_PARAM, (st)) @@ -2011,6 +2142,7 @@ DECLARE_SPECIAL_STACK_OF(OPENSSL_BLOCK, void) # define sk_nid_triple_set_cmp_func(st, cmp) SKM_sk_set_cmp_func(nid_triple, (st), (cmp)) # define sk_nid_triple_dup(st) SKM_sk_dup(nid_triple, st) # define sk_nid_triple_pop_free(st, free_func) SKM_sk_pop_free(nid_triple, (st), (free_func)) +# define sk_nid_triple_deep_copy(st, copy_func, free_func) SKM_sk_deep_copy(nid_triple, (st), (copy_func), (free_func)) # define sk_nid_triple_shift(st) SKM_sk_shift(nid_triple, (st)) # define sk_nid_triple_pop(st) SKM_sk_pop(nid_triple, (st)) # define sk_nid_triple_sort(st) SKM_sk_sort(nid_triple, (st)) @@ -2032,6 +2164,7 @@ DECLARE_SPECIAL_STACK_OF(OPENSSL_BLOCK, void) # define sk_void_set_cmp_func(st, cmp) SKM_sk_set_cmp_func(void, (st), (cmp)) # define sk_void_dup(st) SKM_sk_dup(void, st) # define sk_void_pop_free(st, free_func) SKM_sk_pop_free(void, (st), (free_func)) +# define sk_void_deep_copy(st, copy_func, free_func) SKM_sk_deep_copy(void, (st), (copy_func), (free_func)) # define sk_void_shift(st) SKM_sk_shift(void, (st)) # define sk_void_pop(st) SKM_sk_pop(void, (st)) # define sk_void_sort(st) SKM_sk_sort(void, (st)) @@ -2042,7 +2175,8 @@ DECLARE_SPECIAL_STACK_OF(OPENSSL_BLOCK, void) # define sk_OPENSSL_STRING_find(st, val) sk_find(CHECKED_STACK_OF(OPENSSL_STRING, st), CHECKED_PTR_OF(char, val)) # define sk_OPENSSL_STRING_value(st, i) ((OPENSSL_STRING)sk_value(CHECKED_STACK_OF(OPENSSL_STRING, st), i)) # define sk_OPENSSL_STRING_num(st) SKM_sk_num(OPENSSL_STRING, st) -# define sk_OPENSSL_STRING_pop_free(st, free_func) sk_pop_free(CHECKED_STACK_OF(OPENSSL_STRING, st), CHECKED_SK_FREE_FUNC2(OPENSSL_STRING, free_func)) +# define sk_OPENSSL_STRING_pop_free(st, free_func) sk_pop_free(CHECKED_STACK_OF(OPENSSL_STRING, st), CHECKED_SK_FREE_FUNC(char, free_func)) +# define sk_OPENSSL_STRING_deep_copy(st, copy_func, free_func) ((STACK_OF(OPENSSL_STRING) *)sk_deep_copy(CHECKED_STACK_OF(OPENSSL_STRING, st), CHECKED_SK_COPY_FUNC(char, copy_func), CHECKED_SK_FREE_FUNC(char, free_func))) # define sk_OPENSSL_STRING_insert(st, val, i) sk_insert(CHECKED_STACK_OF(OPENSSL_STRING, st), CHECKED_PTR_OF(char, val), i) # define sk_OPENSSL_STRING_free(st) SKM_sk_free(OPENSSL_STRING, st) # define sk_OPENSSL_STRING_set(st, i, val) sk_set(CHECKED_STACK_OF(OPENSSL_STRING, st), i, CHECKED_PTR_OF(char, val)) @@ -2065,7 +2199,8 @@ DECLARE_SPECIAL_STACK_OF(OPENSSL_BLOCK, void) # define sk_OPENSSL_BLOCK_find(st, val) sk_find(CHECKED_STACK_OF(OPENSSL_BLOCK, st), CHECKED_PTR_OF(void, val)) # define sk_OPENSSL_BLOCK_value(st, i) ((OPENSSL_BLOCK)sk_value(CHECKED_STACK_OF(OPENSSL_BLOCK, st), i)) # define sk_OPENSSL_BLOCK_num(st) SKM_sk_num(OPENSSL_BLOCK, st) -# define sk_OPENSSL_BLOCK_pop_free(st, free_func) sk_pop_free(CHECKED_STACK_OF(OPENSSL_BLOCK, st), CHECKED_SK_FREE_FUNC2(OPENSSL_BLOCK, free_func)) +# define sk_OPENSSL_BLOCK_pop_free(st, free_func) sk_pop_free(CHECKED_STACK_OF(OPENSSL_BLOCK, st), CHECKED_SK_FREE_FUNC(void, free_func)) +# define sk_OPENSSL_BLOCK_deep_copy(st, copy_func, free_func) ((STACK_OF(OPENSSL_BLOCK) *)sk_deep_copy(CHECKED_STACK_OF(OPENSSL_BLOCK, st), CHECKED_SK_COPY_FUNC(void, copy_func), CHECKED_SK_FREE_FUNC(void, free_func))) # define sk_OPENSSL_BLOCK_insert(st, val, i) sk_insert(CHECKED_STACK_OF(OPENSSL_BLOCK, st), CHECKED_PTR_OF(void, val), i) # define sk_OPENSSL_BLOCK_free(st) SKM_sk_free(OPENSSL_BLOCK, st) # define sk_OPENSSL_BLOCK_set(st, i, val) sk_set(CHECKED_STACK_OF(OPENSSL_BLOCK, st), i, CHECKED_PTR_OF(void, val)) @@ -2088,7 +2223,8 @@ DECLARE_SPECIAL_STACK_OF(OPENSSL_BLOCK, void) # define sk_OPENSSL_PSTRING_find(st, val) sk_find(CHECKED_STACK_OF(OPENSSL_PSTRING, st), CHECKED_PTR_OF(OPENSSL_STRING, val)) # define sk_OPENSSL_PSTRING_value(st, i) ((OPENSSL_PSTRING)sk_value(CHECKED_STACK_OF(OPENSSL_PSTRING, st), i)) # define sk_OPENSSL_PSTRING_num(st) SKM_sk_num(OPENSSL_PSTRING, st) -# define sk_OPENSSL_PSTRING_pop_free(st, free_func) sk_pop_free(CHECKED_STACK_OF(OPENSSL_PSTRING, st), CHECKED_SK_FREE_FUNC2(OPENSSL_PSTRING, free_func)) +# define sk_OPENSSL_PSTRING_pop_free(st, free_func) sk_pop_free(CHECKED_STACK_OF(OPENSSL_PSTRING, st), CHECKED_SK_FREE_FUNC(OPENSSL_STRING, free_func)) +# define sk_OPENSSL_PSTRING_deep_copy(st, copy_func, free_func) ((STACK_OF(OPENSSL_PSTRING) *)sk_deep_copy(CHECKED_STACK_OF(OPENSSL_PSTRING, st), CHECKED_SK_COPY_FUNC(OPENSSL_STRING, copy_func), CHECKED_SK_FREE_FUNC(OPENSSL_STRING, free_func))) # define sk_OPENSSL_PSTRING_insert(st, val, i) sk_insert(CHECKED_STACK_OF(OPENSSL_PSTRING, st), CHECKED_PTR_OF(OPENSSL_STRING, val), i) # define sk_OPENSSL_PSTRING_free(st) SKM_sk_free(OPENSSL_PSTRING, st) # define sk_OPENSSL_PSTRING_set(st, i, val) sk_set(CHECKED_STACK_OF(OPENSSL_PSTRING, st), i, CHECKED_PTR_OF(OPENSSL_STRING, val)) diff --git a/deps/openssl/openssl/crypto/stack/stack.c b/deps/openssl/openssl/crypto/stack/stack.c index 331f907190f7b2..de437acf6a5cb6 100644 --- a/deps/openssl/openssl/crypto/stack/stack.c +++ b/deps/openssl/openssl/crypto/stack/stack.c @@ -115,6 +115,40 @@ _STACK *sk_dup(_STACK *sk) return (NULL); } +_STACK *sk_deep_copy(_STACK *sk, void *(*copy_func) (void *), + void (*free_func) (void *)) +{ + _STACK *ret; + int i; + + if ((ret = OPENSSL_malloc(sizeof(_STACK))) == NULL) + return ret; + ret->comp = sk->comp; + ret->sorted = sk->sorted; + ret->num = sk->num; + ret->num_alloc = sk->num > MIN_NODES ? sk->num : MIN_NODES; + ret->data = OPENSSL_malloc(sizeof(char *) * ret->num_alloc); + if (ret->data == NULL) { + OPENSSL_free(ret); + return NULL; + } + for (i = 0; i < ret->num_alloc; i++) + ret->data[i] = NULL; + + for (i = 0; i < ret->num; ++i) { + if (sk->data[i] == NULL) + continue; + if ((ret->data[i] = copy_func(sk->data[i])) == NULL) { + while (--i >= 0) + if (ret->data[i] != NULL) + free_func(ret->data[i]); + sk_free(ret); + return NULL; + } + } + return ret; +} + _STACK *sk_new_null(void) { return sk_new((int (*)(const void *, const void *))0); diff --git a/deps/openssl/openssl/crypto/stack/stack.h b/deps/openssl/openssl/crypto/stack/stack.h index 8d6e939a803147..eb07216659d0f3 100644 --- a/deps/openssl/openssl/crypto/stack/stack.h +++ b/deps/openssl/openssl/crypto/stack/stack.h @@ -83,6 +83,7 @@ _STACK *sk_new(int (*cmp) (const void *, const void *)); _STACK *sk_new_null(void); void sk_free(_STACK *); void sk_pop_free(_STACK *st, void (*func) (void *)); +_STACK *sk_deep_copy(_STACK *, void *(*)(void *), void (*)(void *)); int sk_insert(_STACK *sk, void *data, int where); void *sk_delete(_STACK *st, int loc); void *sk_delete_ptr(_STACK *st, void *p); diff --git a/deps/openssl/openssl/crypto/symhacks.h b/deps/openssl/openssl/crypto/symhacks.h index 2eadf7f3df2b5d..239fa4fb1b77e7 100644 --- a/deps/openssl/openssl/crypto/symhacks.h +++ b/deps/openssl/openssl/crypto/symhacks.h @@ -166,7 +166,7 @@ # undef CRYPTO_get_locked_mem_ex_functions # define CRYPTO_get_locked_mem_ex_functions CRYPTO_get_locked_mem_ex_funcs -/* Hack some long SSL names */ +/* Hack some long SSL/TLS names */ # undef SSL_CTX_set_default_verify_paths # define SSL_CTX_set_default_verify_paths SSL_CTX_set_def_verify_paths # undef SSL_get_ex_data_X509_STORE_CTX_idx @@ -183,6 +183,10 @@ # define SSL_CTX_set_default_passwd_cb_userdata SSL_CTX_set_def_passwd_cb_ud # undef SSL_COMP_get_compression_methods # define SSL_COMP_get_compression_methods SSL_COMP_get_compress_methods +# undef SSL_COMP_set0_compression_methods +# define SSL_COMP_set0_compression_methods SSL_COMP_set0_compress_methods +# undef SSL_COMP_free_compression_methods +# define SSL_COMP_free_compression_methods SSL_COMP_free_compress_methods # undef ssl_add_clienthello_renegotiate_ext # define ssl_add_clienthello_renegotiate_ext ssl_add_clienthello_reneg_ext # undef ssl_add_serverhello_renegotiate_ext @@ -211,6 +215,16 @@ # define SSL_CTX_set_next_protos_advertised_cb SSL_CTX_set_next_protos_adv_cb # undef SSL_CTX_set_next_proto_select_cb # define SSL_CTX_set_next_proto_select_cb SSL_CTX_set_next_proto_sel_cb + +# undef tls1_send_server_supplemental_data +# define tls1_send_server_supplemental_data tls1_send_server_suppl_data +# undef tls1_send_client_supplemental_data +# define tls1_send_client_supplemental_data tls1_send_client_suppl_data +# undef tls1_get_server_supplemental_data +# define tls1_get_server_supplemental_data tls1_get_server_suppl_data +# undef tls1_get_client_supplemental_data +# define tls1_get_client_supplemental_data tls1_get_client_suppl_data + # undef ssl3_cbc_record_digest_supported # define ssl3_cbc_record_digest_supported ssl3_cbc_record_digest_support # undef ssl_check_clienthello_tlsext_late @@ -218,7 +232,11 @@ # undef ssl_check_clienthello_tlsext_early # define ssl_check_clienthello_tlsext_early ssl_check_clihello_tlsext_early -/* Hack some long ENGINE names */ +/* Hack some RSA long names */ +# undef RSA_padding_check_PKCS1_OAEP_mgf1 +# define RSA_padding_check_PKCS1_OAEP_mgf1 RSA_pad_check_PKCS1_OAEP_mgf1 + +/* Hack some ENGINE long names */ # undef ENGINE_get_default_BN_mod_exp_crt # define ENGINE_get_default_BN_mod_exp_crt ENGINE_get_def_BN_mod_exp_crt # undef ENGINE_set_default_BN_mod_exp_crt @@ -427,6 +445,18 @@ # define CMS_OriginatorIdentifierOrKey_it CMS_OriginatorIdOrKey_it # undef cms_SignerIdentifier_get0_signer_id # define cms_SignerIdentifier_get0_signer_id cms_SignerId_get0_signer_id +# undef CMS_RecipientInfo_kari_get0_orig_id +# define CMS_RecipientInfo_kari_get0_orig_id CMS_RecipInfo_kari_get0_orig_id +# undef CMS_RecipientInfo_kari_get0_reks +# define CMS_RecipientInfo_kari_get0_reks CMS_RecipInfo_kari_get0_reks +# undef CMS_RecipientEncryptedKey_cert_cmp +# define CMS_RecipientEncryptedKey_cert_cmp CMS_RecipEncryptedKey_cert_cmp +# undef CMS_RecipientInfo_kari_set0_pkey +# define CMS_RecipientInfo_kari_set0_pkey CMS_RecipInfo_kari_set0_pkey +# undef CMS_RecipientEncryptedKey_get0_id +# define CMS_RecipientEncryptedKey_get0_id CMS_RecipEncryptedKey_get0_id +# undef CMS_RecipientInfo_kari_orig_id_cmp +# define CMS_RecipientInfo_kari_orig_id_cmp CMS_RecipInfo_kari_orig_id_cmp /* Hack some long DTLS1 names */ # undef dtls1_retransmit_buffered_messages diff --git a/deps/openssl/openssl/crypto/ts/ts_rsp_sign.c b/deps/openssl/openssl/crypto/ts/ts_rsp_sign.c index 031d872e2cf6ba..db6ce3241f73d0 100644 --- a/deps/openssl/openssl/crypto/ts/ts_rsp_sign.c +++ b/deps/openssl/openssl/crypto/ts/ts_rsp_sign.c @@ -238,7 +238,6 @@ int TS_RESP_CTX_set_def_policy(TS_RESP_CTX *ctx, ASN1_OBJECT *def_policy) int TS_RESP_CTX_set_certs(TS_RESP_CTX *ctx, STACK_OF(X509) *certs) { - int i; if (ctx->certs) { sk_X509_pop_free(ctx->certs, X509_free); @@ -246,14 +245,10 @@ int TS_RESP_CTX_set_certs(TS_RESP_CTX *ctx, STACK_OF(X509) *certs) } if (!certs) return 1; - if (!(ctx->certs = sk_X509_dup(certs))) { + if (!(ctx->certs = X509_chain_up_ref(certs))) { TSerr(TS_F_TS_RESP_CTX_SET_CERTS, ERR_R_MALLOC_FAILURE); return 0; } - for (i = 0; i < sk_X509_num(ctx->certs); ++i) { - X509 *cert = sk_X509_value(ctx->certs, i); - CRYPTO_add(&cert->references, +1, CRYPTO_LOCK_X509); - } return 1; } diff --git a/deps/openssl/openssl/crypto/ts/ts_rsp_verify.c b/deps/openssl/openssl/crypto/ts/ts_rsp_verify.c index 32b4d9923f46c0..3ce765dfa1b6a1 100644 --- a/deps/openssl/openssl/crypto/ts/ts_rsp_verify.c +++ b/deps/openssl/openssl/crypto/ts/ts_rsp_verify.c @@ -637,7 +637,7 @@ static int TS_compute_imprint(BIO *data, TS_TST_INFO *tst_info, X509_ALGOR_free(*md_alg); OPENSSL_free(*imprint); *imprint_len = 0; - *imprint = NULL; + *imprint = 0; return 0; } diff --git a/deps/openssl/openssl/crypto/ui/ui_openssl.c b/deps/openssl/openssl/crypto/ui/ui_openssl.c index 829ea8691eb579..5d66276418fc83 100644 --- a/deps/openssl/openssl/crypto/ui/ui_openssl.c +++ b/deps/openssl/openssl/crypto/ui/ui_openssl.c @@ -208,7 +208,7 @@ # elif !defined(OPENSSL_SYS_VMS) \ && !defined(OPENSSL_SYS_MSDOS) \ && !defined(OPENSSL_SYS_MACINTOSH_CLASSIC) \ - && !defined(MAC_OS_GUSI_SOURCE) \ + && !defined(MAC_OS_GUSI_SOURCE) \ && !defined(OPENSSL_SYS_VXWORKS) \ && !defined(OPENSSL_SYS_NETWARE) # define TERMIOS diff --git a/deps/openssl/openssl/crypto/whrlpool/asm/wp-mmx.pl b/deps/openssl/openssl/crypto/whrlpool/asm/wp-mmx.pl index cb2381c22ba12a..c584e5b92b251a 100644 --- a/deps/openssl/openssl/crypto/whrlpool/asm/wp-mmx.pl +++ b/deps/openssl/openssl/crypto/whrlpool/asm/wp-mmx.pl @@ -118,34 +118,36 @@ () &movq (@mm[0],&QWP(2048*$SCALE,$tbl,"esi",8)); # rc[r] &mov ("eax",&DWP(0,"esp")); &mov ("ebx",&DWP(4,"esp")); + &movz ("ecx",&LB("eax")); + &movz ("edx",&HB("eax")); for($i=0;$i<8;$i++) { my $func = ($i==0)? \&movq : \&pxor; - &movb (&LB("ecx"),&LB("eax")); - &movb (&LB("edx"),&HB("eax")); + &shr ("eax",16); &scale ("esi","ecx"); + &movz ("ecx",&LB("eax")); &scale ("edi","edx"); - &shr ("eax",16); + &movz ("edx",&HB("eax")); &pxor (@mm[0],&QWP(&row(0),$tbl,"esi",8)); &$func (@mm[1],&QWP(&row(1),$tbl,"edi",8)); - &movb (&LB("ecx"),&LB("eax")); - &movb (&LB("edx"),&HB("eax")); &mov ("eax",&DWP(($i+1)*8,"esp")); &scale ("esi","ecx"); + &movz ("ecx",&LB("ebx")); &scale ("edi","edx"); + &movz ("edx",&HB("ebx")); &$func (@mm[2],&QWP(&row(2),$tbl,"esi",8)); &$func (@mm[3],&QWP(&row(3),$tbl,"edi",8)); - &movb (&LB("ecx"),&LB("ebx")); - &movb (&LB("edx"),&HB("ebx")); + &shr ("ebx",16); &scale ("esi","ecx"); + &movz ("ecx",&LB("ebx")); &scale ("edi","edx"); - &shr ("ebx",16); + &movz ("edx",&HB("ebx")); &$func (@mm[4],&QWP(&row(4),$tbl,"esi",8)); &$func (@mm[5],&QWP(&row(5),$tbl,"edi",8)); - &movb (&LB("ecx"),&LB("ebx")); - &movb (&LB("edx"),&HB("ebx")); &mov ("ebx",&DWP(($i+1)*8+4,"esp")); &scale ("esi","ecx"); + &movz ("ecx",&LB("eax")); &scale ("edi","edx"); + &movz ("edx",&HB("eax")); &$func (@mm[6],&QWP(&row(6),$tbl,"esi",8)); &$func (@mm[7],&QWP(&row(7),$tbl,"edi",8)); push(@mm,shift(@mm)); @@ -154,32 +156,32 @@ () for($i=0;$i<8;$i++) { &movq(&QWP($i*8,"esp"),@mm[$i]); } # K=L for($i=0;$i<8;$i++) { - &movb (&LB("ecx"),&LB("eax")); - &movb (&LB("edx"),&HB("eax")); + &shr ("eax",16); &scale ("esi","ecx"); + &movz ("ecx",&LB("eax")); &scale ("edi","edx"); - &shr ("eax",16); + &movz ("edx",&HB("eax")); &pxor (@mm[0],&QWP(&row(0),$tbl,"esi",8)); &pxor (@mm[1],&QWP(&row(1),$tbl,"edi",8)); - &movb (&LB("ecx"),&LB("eax")); - &movb (&LB("edx"),&HB("eax")); &mov ("eax",&DWP(64+($i+1)*8,"esp")) if ($i<7); &scale ("esi","ecx"); + &movz ("ecx",&LB("ebx")); &scale ("edi","edx"); + &movz ("edx",&HB("ebx")); &pxor (@mm[2],&QWP(&row(2),$tbl,"esi",8)); &pxor (@mm[3],&QWP(&row(3),$tbl,"edi",8)); - &movb (&LB("ecx"),&LB("ebx")); - &movb (&LB("edx"),&HB("ebx")); + &shr ("ebx",16); &scale ("esi","ecx"); + &movz ("ecx",&LB("ebx")); &scale ("edi","edx"); - &shr ("ebx",16); + &movz ("edx",&HB("ebx")); &pxor (@mm[4],&QWP(&row(4),$tbl,"esi",8)); &pxor (@mm[5],&QWP(&row(5),$tbl,"edi",8)); - &movb (&LB("ecx"),&LB("ebx")); - &movb (&LB("edx"),&HB("ebx")); &mov ("ebx",&DWP(64+($i+1)*8+4,"esp")) if ($i<7); &scale ("esi","ecx"); + &movz ("ecx",&LB("eax")); &scale ("edi","edx"); + &movz ("edx",&HB("eax")); &pxor (@mm[6],&QWP(&row(6),$tbl,"esi",8)); &pxor (@mm[7],&QWP(&row(7),$tbl,"edi",8)); push(@mm,shift(@mm)); diff --git a/deps/openssl/openssl/crypto/whrlpool/asm/wp-x86_64.pl b/deps/openssl/openssl/crypto/whrlpool/asm/wp-x86_64.pl index 24b2ff60c38b78..5a3bdbcf20d1a3 100644 --- a/deps/openssl/openssl/crypto/whrlpool/asm/wp-x86_64.pl +++ b/deps/openssl/openssl/crypto/whrlpool/asm/wp-x86_64.pl @@ -91,41 +91,44 @@ $code.=<<___; xor %rsi,%rsi mov %rsi,24(%rbx) # zero round counter + jmp .Lround .align 16 .Lround: mov 4096(%rbp,%rsi,8),@mm[0] # rc[r] mov 0(%rsp),%eax mov 4(%rsp),%ebx + movz %al,%ecx + movz %ah,%edx ___ for($i=0;$i<8;$i++) { my $func = ($i==0)? "mov" : "xor"; $code.=<<___; - mov %al,%cl - mov %ah,%dl + shr \$16,%eax lea (%rcx,%rcx),%rsi + movz %al,%ecx lea (%rdx,%rdx),%rdi - shr \$16,%eax + movz %ah,%edx xor 0(%rbp,%rsi,8),@mm[0] $func 7(%rbp,%rdi,8),@mm[1] - mov %al,%cl - mov %ah,%dl mov $i*8+8(%rsp),%eax # ($i+1)*8 lea (%rcx,%rcx),%rsi + movz %bl,%ecx lea (%rdx,%rdx),%rdi + movz %bh,%edx $func 6(%rbp,%rsi,8),@mm[2] $func 5(%rbp,%rdi,8),@mm[3] - mov %bl,%cl - mov %bh,%dl + shr \$16,%ebx lea (%rcx,%rcx),%rsi + movz %bl,%ecx lea (%rdx,%rdx),%rdi - shr \$16,%ebx + movz %bh,%edx $func 4(%rbp,%rsi,8),@mm[4] $func 3(%rbp,%rdi,8),@mm[5] - mov %bl,%cl - mov %bh,%dl mov $i*8+8+4(%rsp),%ebx # ($i+1)*8+4 lea (%rcx,%rcx),%rsi + movz %al,%ecx lea (%rdx,%rdx),%rdi + movz %ah,%edx $func 2(%rbp,%rsi,8),@mm[6] $func 1(%rbp,%rdi,8),@mm[7] ___ @@ -134,32 +137,32 @@ for($i=0;$i<8;$i++) { $code.="mov @mm[$i],$i*8(%rsp)\n"; } # K=L for($i=0;$i<8;$i++) { $code.=<<___; - mov %al,%cl - mov %ah,%dl + shr \$16,%eax lea (%rcx,%rcx),%rsi + movz %al,%ecx lea (%rdx,%rdx),%rdi - shr \$16,%eax + movz %ah,%edx xor 0(%rbp,%rsi,8),@mm[0] xor 7(%rbp,%rdi,8),@mm[1] - mov %al,%cl - mov %ah,%dl `"mov 64+$i*8+8(%rsp),%eax" if($i<7);` # 64+($i+1)*8 lea (%rcx,%rcx),%rsi + movz %bl,%ecx lea (%rdx,%rdx),%rdi + movz %bh,%edx xor 6(%rbp,%rsi,8),@mm[2] xor 5(%rbp,%rdi,8),@mm[3] - mov %bl,%cl - mov %bh,%dl + shr \$16,%ebx lea (%rcx,%rcx),%rsi + movz %bl,%ecx lea (%rdx,%rdx),%rdi - shr \$16,%ebx + movz %bh,%edx xor 4(%rbp,%rsi,8),@mm[4] xor 3(%rbp,%rdi,8),@mm[5] - mov %bl,%cl - mov %bh,%dl `"mov 64+$i*8+8+4(%rsp),%ebx" if($i<7);` # 64+($i+1)*8+4 lea (%rcx,%rcx),%rsi + movz %al,%ecx lea (%rdx,%rdx),%rdi + movz %ah,%edx xor 2(%rbp,%rsi,8),@mm[6] xor 1(%rbp,%rdi,8),@mm[7] ___ diff --git a/deps/openssl/openssl/crypto/x509/Makefile b/deps/openssl/openssl/crypto/x509/Makefile index 72c82278f43ddc..cfbb59c37d0e64 100644 --- a/deps/openssl/openssl/crypto/x509/Makefile +++ b/deps/openssl/openssl/crypto/x509/Makefile @@ -33,7 +33,7 @@ LIBOBJ= x509_def.o x509_d2.o x509_r2x.o x509_cmp.o \ SRC= $(LIBSRC) EXHEADER= x509.h x509_vfy.h -HEADER= $(EXHEADER) +HEADER= $(EXHEADER) vpm_int.h ALL= $(GENERAL) $(SRC) $(HEADER) @@ -312,7 +312,7 @@ x509_vfy.o: ../../include/openssl/pkcs7.h ../../include/openssl/safestack.h x509_vfy.o: ../../include/openssl/sha.h ../../include/openssl/stack.h x509_vfy.o: ../../include/openssl/symhacks.h ../../include/openssl/x509.h x509_vfy.o: ../../include/openssl/x509_vfy.h ../../include/openssl/x509v3.h -x509_vfy.o: ../cryptlib.h x509_vfy.c +x509_vfy.o: ../cryptlib.h vpm_int.h x509_vfy.c x509_vpm.o: ../../e_os.h ../../include/openssl/asn1.h x509_vpm.o: ../../include/openssl/bio.h ../../include/openssl/buffer.h x509_vpm.o: ../../include/openssl/conf.h ../../include/openssl/crypto.h @@ -326,7 +326,7 @@ x509_vpm.o: ../../include/openssl/pkcs7.h ../../include/openssl/safestack.h x509_vpm.o: ../../include/openssl/sha.h ../../include/openssl/stack.h x509_vpm.o: ../../include/openssl/symhacks.h ../../include/openssl/x509.h x509_vpm.o: ../../include/openssl/x509_vfy.h ../../include/openssl/x509v3.h -x509_vpm.o: ../cryptlib.h x509_vpm.c +x509_vpm.o: ../cryptlib.h vpm_int.h x509_vpm.c x509cset.o: ../../e_os.h ../../include/openssl/asn1.h x509cset.o: ../../include/openssl/bio.h ../../include/openssl/buffer.h x509cset.o: ../../include/openssl/crypto.h ../../include/openssl/e_os2.h @@ -393,15 +393,17 @@ x509type.o: ../../include/openssl/sha.h ../../include/openssl/stack.h x509type.o: ../../include/openssl/symhacks.h ../../include/openssl/x509.h x509type.o: ../../include/openssl/x509_vfy.h ../cryptlib.h x509type.c x_all.o: ../../e_os.h ../../include/openssl/asn1.h ../../include/openssl/bio.h -x_all.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h -x_all.o: ../../include/openssl/dsa.h ../../include/openssl/e_os2.h -x_all.o: ../../include/openssl/ec.h ../../include/openssl/ecdh.h -x_all.o: ../../include/openssl/ecdsa.h ../../include/openssl/err.h -x_all.o: ../../include/openssl/evp.h ../../include/openssl/lhash.h -x_all.o: ../../include/openssl/obj_mac.h ../../include/openssl/objects.h +x_all.o: ../../include/openssl/buffer.h ../../include/openssl/conf.h +x_all.o: ../../include/openssl/crypto.h ../../include/openssl/dsa.h +x_all.o: ../../include/openssl/e_os2.h ../../include/openssl/ec.h +x_all.o: ../../include/openssl/ecdh.h ../../include/openssl/ecdsa.h +x_all.o: ../../include/openssl/err.h ../../include/openssl/evp.h +x_all.o: ../../include/openssl/lhash.h ../../include/openssl/obj_mac.h +x_all.o: ../../include/openssl/objects.h ../../include/openssl/ocsp.h x_all.o: ../../include/openssl/opensslconf.h ../../include/openssl/opensslv.h x_all.o: ../../include/openssl/ossl_typ.h ../../include/openssl/pkcs7.h x_all.o: ../../include/openssl/rsa.h ../../include/openssl/safestack.h x_all.o: ../../include/openssl/sha.h ../../include/openssl/stack.h x_all.o: ../../include/openssl/symhacks.h ../../include/openssl/x509.h -x_all.o: ../../include/openssl/x509_vfy.h ../cryptlib.h x_all.c +x_all.o: ../../include/openssl/x509_vfy.h ../../include/openssl/x509v3.h +x_all.o: ../cryptlib.h x_all.c diff --git a/deps/openssl/openssl/crypto/x509/vpm_int.h b/deps/openssl/openssl/crypto/x509/vpm_int.h new file mode 100644 index 00000000000000..9c55defc512a7f --- /dev/null +++ b/deps/openssl/openssl/crypto/x509/vpm_int.h @@ -0,0 +1,70 @@ +/* vpm_int.h */ +/* + * Written by Dr Stephen N Henson (steve@openssl.org) for the OpenSSL project + * 2013. + */ +/* ==================================================================== + * Copyright (c) 2013 The OpenSSL Project. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgment: + * "This product includes software developed by the OpenSSL Project + * for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)" + * + * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to + * endorse or promote products derived from this software without + * prior written permission. For written permission, please contact + * licensing@OpenSSL.org. + * + * 5. Products derived from this software may not be called "OpenSSL" + * nor may "OpenSSL" appear in their names without prior written + * permission of the OpenSSL Project. + * + * 6. Redistributions of any form whatsoever must retain the following + * acknowledgment: + * "This product includes software developed by the OpenSSL Project + * for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)" + * + * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY + * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + * ==================================================================== + * + * This product includes cryptographic software written by Eric Young + * (eay@cryptsoft.com). This product includes software written by Tim + * Hudson (tjh@cryptsoft.com). + * + */ + +/* internal only structure to hold additional X509_VERIFY_PARAM data */ + +struct X509_VERIFY_PARAM_ID_st { + STACK_OF(OPENSSL_STRING) *hosts; /* Set of acceptable names */ + unsigned int hostflags; /* Flags to control matching features */ + char *peername; /* Matching hostname in peer certificate */ + char *email; /* If not NULL email address to match */ + size_t emaillen; + unsigned char *ip; /* If not NULL IP address to match */ + size_t iplen; /* Length of IP address */ +}; diff --git a/deps/openssl/openssl/crypto/x509/x509.h b/deps/openssl/openssl/crypto/x509/x509.h index a4911741005e55..99337b849a520c 100644 --- a/deps/openssl/openssl/crypto/x509/x509.h +++ b/deps/openssl/openssl/crypto/x509/x509.h @@ -361,6 +361,7 @@ typedef struct x509_cert_pair_st { # define X509_FLAG_NO_SIGDUMP (1L << 9) # define X509_FLAG_NO_AUX (1L << 10) # define X509_FLAG_NO_ATTRIBUTES (1L << 11) +# define X509_FLAG_NO_IDS (1L << 12) /* Flags specific to X509_NAME_print_ex() */ @@ -645,10 +646,12 @@ int X509_signature_print(BIO *bp, X509_ALGOR *alg, ASN1_STRING *sig); int X509_sign(X509 *x, EVP_PKEY *pkey, const EVP_MD *md); int X509_sign_ctx(X509 *x, EVP_MD_CTX *ctx); +int X509_http_nbio(OCSP_REQ_CTX *rctx, X509 **pcert); int X509_REQ_sign(X509_REQ *x, EVP_PKEY *pkey, const EVP_MD *md); int X509_REQ_sign_ctx(X509_REQ *x, EVP_MD_CTX *ctx); int X509_CRL_sign(X509_CRL *x, EVP_PKEY *pkey, const EVP_MD *md); int X509_CRL_sign_ctx(X509_CRL *x, EVP_MD_CTX *ctx); +int X509_CRL_http_nbio(OCSP_REQ_CTX *rctx, X509_CRL **pcrl); int NETSCAPE_SPKI_sign(NETSCAPE_SPKI *x, EVP_PKEY *pkey, const EVP_MD *md); int X509_pubkey_digest(const X509 *data, const EVP_MD *type, @@ -745,6 +748,7 @@ X509 *X509_dup(X509 *x509); X509_ATTRIBUTE *X509_ATTRIBUTE_dup(X509_ATTRIBUTE *xa); X509_EXTENSION *X509_EXTENSION_dup(X509_EXTENSION *ex); X509_CRL *X509_CRL_dup(X509_CRL *crl); +X509_REVOKED *X509_REVOKED_dup(X509_REVOKED *rev); X509_REQ *X509_REQ_dup(X509_REQ *req); X509_ALGOR *X509_ALGOR_dup(X509_ALGOR *xn); int X509_ALGOR_set0(X509_ALGOR *alg, ASN1_OBJECT *aobj, int ptype, @@ -828,6 +832,12 @@ void *X509_get_ex_data(X509 *r, int idx); int i2d_X509_AUX(X509 *a, unsigned char **pp); X509 *d2i_X509_AUX(X509 **a, const unsigned char **pp, long length); +int i2d_re_X509_tbs(X509 *x, unsigned char **pp); + +void X509_get0_signature(ASN1_BIT_STRING **psig, X509_ALGOR **palg, + const X509 *x); +int X509_get_signature_nid(const X509 *x); + int X509_alias_set1(X509 *x, unsigned char *name, int len); int X509_keyid_set1(X509 *x, unsigned char *id, int len); unsigned char *X509_alias_get0(X509 *x, int *len); @@ -939,9 +949,17 @@ int X509_CRL_sort(X509_CRL *crl); int X509_REVOKED_set_serialNumber(X509_REVOKED *x, ASN1_INTEGER *serial); int X509_REVOKED_set_revocationDate(X509_REVOKED *r, ASN1_TIME *tm); +X509_CRL *X509_CRL_diff(X509_CRL *base, X509_CRL *newer, + EVP_PKEY *skey, const EVP_MD *md, unsigned int flags); + int X509_REQ_check_private_key(X509_REQ *x509, EVP_PKEY *pkey); int X509_check_private_key(X509 *x509, EVP_PKEY *pkey); +int X509_chain_check_suiteb(int *perror_depth, + X509 *x, STACK_OF(X509) *chain, + unsigned long flags); +int X509_CRL_check_suiteb(X509_CRL *crl, EVP_PKEY *pk, unsigned long flags); +STACK_OF(X509) *X509_chain_up_ref(STACK_OF(X509) *chain); int X509_issuer_and_serial_cmp(const X509 *a, const X509 *b); unsigned long X509_issuer_and_serial_hash(X509 *a); @@ -1236,6 +1254,7 @@ void ERR_load_X509_strings(void); # define X509_F_X509_ATTRIBUTE_GET0_DATA 139 # define X509_F_X509_ATTRIBUTE_SET1_DATA 138 # define X509_F_X509_CHECK_PRIVATE_KEY 128 +# define X509_F_X509_CRL_DIFF 105 # define X509_F_X509_CRL_PRINT_FP 147 # define X509_F_X509_EXTENSION_CREATE_BY_NID 108 # define X509_F_X509_EXTENSION_CREATE_BY_OBJ 109 @@ -1268,20 +1287,27 @@ void ERR_load_X509_strings(void); # define X509_F_X509_VERIFY_CERT 127 /* Reason codes. */ +# define X509_R_AKID_MISMATCH 110 # define X509_R_BAD_X509_FILETYPE 100 # define X509_R_BASE64_DECODE_ERROR 118 # define X509_R_CANT_CHECK_DH_KEY 114 # define X509_R_CERT_ALREADY_IN_HASH_TABLE 101 +# define X509_R_CRL_ALREADY_DELTA 127 +# define X509_R_CRL_VERIFY_FAILURE 131 # define X509_R_ERR_ASN1_LIB 102 +# define X509_R_IDP_MISMATCH 128 # define X509_R_INVALID_DIRECTORY 113 # define X509_R_INVALID_FIELD_NAME 119 # define X509_R_INVALID_TRUST 123 +# define X509_R_ISSUER_MISMATCH 129 # define X509_R_KEY_TYPE_MISMATCH 115 # define X509_R_KEY_VALUES_MISMATCH 116 # define X509_R_LOADING_CERT_DIR 103 # define X509_R_LOADING_DEFAULTS 104 # define X509_R_METHOD_NOT_SUPPORTED 124 +# define X509_R_NEWER_CRL_NOT_NEWER 132 # define X509_R_NO_CERT_SET_FOR_US_TO_VERIFY 105 +# define X509_R_NO_CRL_NUMBER 130 # define X509_R_PUBLIC_KEY_DECODE_ERROR 125 # define X509_R_PUBLIC_KEY_ENCODE_ERROR 126 # define X509_R_SHOULD_RETRY 106 diff --git a/deps/openssl/openssl/crypto/x509/x509_cmp.c b/deps/openssl/openssl/crypto/x509/x509_cmp.c index 3c5b717c15a355..49c71b91280e52 100644 --- a/deps/openssl/openssl/crypto/x509/x509_cmp.c +++ b/deps/openssl/openssl/crypto/x509/x509_cmp.c @@ -179,11 +179,23 @@ unsigned long X509_subject_name_hash_old(X509 *x) */ int X509_cmp(const X509 *a, const X509 *b) { + int rv; /* ensure hash is valid */ X509_check_purpose((X509 *)a, -1, 0); X509_check_purpose((X509 *)b, -1, 0); - return memcmp(a->sha1_hash, b->sha1_hash, SHA_DIGEST_LENGTH); + rv = memcmp(a->sha1_hash, b->sha1_hash, SHA_DIGEST_LENGTH); + if (rv) + return rv; + /* Check for match against stored encoding too */ + if (!a->cert_info->enc.modified && !b->cert_info->enc.modified) { + rv = (int)(a->cert_info->enc.len - b->cert_info->enc.len); + if (rv) + return rv; + return memcmp(a->cert_info->enc.enc, b->cert_info->enc.enc, + a->cert_info->enc.len); + } + return rv; } #endif @@ -339,3 +351,148 @@ int X509_check_private_key(X509 *x, EVP_PKEY *k) return 1; return 0; } + +/* + * Check a suite B algorithm is permitted: pass in a public key and the NID + * of its signature (or 0 if no signature). The pflags is a pointer to a + * flags field which must contain the suite B verification flags. + */ + +#ifndef OPENSSL_NO_EC + +static int check_suite_b(EVP_PKEY *pkey, int sign_nid, unsigned long *pflags) +{ + const EC_GROUP *grp = NULL; + int curve_nid; + if (pkey && pkey->type == EVP_PKEY_EC) + grp = EC_KEY_get0_group(pkey->pkey.ec); + if (!grp) + return X509_V_ERR_SUITE_B_INVALID_ALGORITHM; + curve_nid = EC_GROUP_get_curve_name(grp); + /* Check curve is consistent with LOS */ + if (curve_nid == NID_secp384r1) { /* P-384 */ + /* + * Check signature algorithm is consistent with curve. + */ + if (sign_nid != -1 && sign_nid != NID_ecdsa_with_SHA384) + return X509_V_ERR_SUITE_B_INVALID_SIGNATURE_ALGORITHM; + if (!(*pflags & X509_V_FLAG_SUITEB_192_LOS)) + return X509_V_ERR_SUITE_B_LOS_NOT_ALLOWED; + /* If we encounter P-384 we cannot use P-256 later */ + *pflags &= ~X509_V_FLAG_SUITEB_128_LOS_ONLY; + } else if (curve_nid == NID_X9_62_prime256v1) { /* P-256 */ + if (sign_nid != -1 && sign_nid != NID_ecdsa_with_SHA256) + return X509_V_ERR_SUITE_B_INVALID_SIGNATURE_ALGORITHM; + if (!(*pflags & X509_V_FLAG_SUITEB_128_LOS_ONLY)) + return X509_V_ERR_SUITE_B_LOS_NOT_ALLOWED; + } else + return X509_V_ERR_SUITE_B_INVALID_CURVE; + + return X509_V_OK; +} + +int X509_chain_check_suiteb(int *perror_depth, X509 *x, STACK_OF(X509) *chain, + unsigned long flags) +{ + int rv, i, sign_nid; + EVP_PKEY *pk = NULL; + unsigned long tflags; + if (!(flags & X509_V_FLAG_SUITEB_128_LOS)) + return X509_V_OK; + tflags = flags; + /* If no EE certificate passed in must be first in chain */ + if (x == NULL) { + x = sk_X509_value(chain, 0); + i = 1; + } else + i = 0; + + if (X509_get_version(x) != 2) { + rv = X509_V_ERR_SUITE_B_INVALID_VERSION; + /* Correct error depth */ + i = 0; + goto end; + } + + pk = X509_get_pubkey(x); + /* Check EE key only */ + rv = check_suite_b(pk, -1, &tflags); + if (rv != X509_V_OK) { + /* Correct error depth */ + i = 0; + goto end; + } + for (; i < sk_X509_num(chain); i++) { + sign_nid = X509_get_signature_nid(x); + x = sk_X509_value(chain, i); + if (X509_get_version(x) != 2) { + rv = X509_V_ERR_SUITE_B_INVALID_VERSION; + goto end; + } + EVP_PKEY_free(pk); + pk = X509_get_pubkey(x); + rv = check_suite_b(pk, sign_nid, &tflags); + if (rv != X509_V_OK) + goto end; + } + + /* Final check: root CA signature */ + rv = check_suite_b(pk, X509_get_signature_nid(x), &tflags); + end: + if (pk) + EVP_PKEY_free(pk); + if (rv != X509_V_OK) { + /* Invalid signature or LOS errors are for previous cert */ + if ((rv == X509_V_ERR_SUITE_B_INVALID_SIGNATURE_ALGORITHM + || rv == X509_V_ERR_SUITE_B_LOS_NOT_ALLOWED) && i) + i--; + /* + * If we have LOS error and flags changed then we are signing P-384 + * with P-256. Use more meaninggul error. + */ + if (rv == X509_V_ERR_SUITE_B_LOS_NOT_ALLOWED && flags != tflags) + rv = X509_V_ERR_SUITE_B_CANNOT_SIGN_P_384_WITH_P_256; + if (perror_depth) + *perror_depth = i; + } + return rv; +} + +int X509_CRL_check_suiteb(X509_CRL *crl, EVP_PKEY *pk, unsigned long flags) +{ + int sign_nid; + if (!(flags & X509_V_FLAG_SUITEB_128_LOS)) + return X509_V_OK; + sign_nid = OBJ_obj2nid(crl->crl->sig_alg->algorithm); + return check_suite_b(pk, sign_nid, &flags); +} + +#else +int X509_chain_check_suiteb(int *perror_depth, X509 *x, STACK_OF(X509) *chain, + unsigned long flags) +{ + return 0; +} + +int X509_CRL_check_suiteb(X509_CRL *crl, EVP_PKEY *pk, unsigned long flags) +{ + return 0; +} + +#endif +/* + * Not strictly speaking an "up_ref" as a STACK doesn't have a reference + * count but it has the same effect by duping the STACK and upping the ref of + * each X509 structure. + */ +STACK_OF(X509) *X509_chain_up_ref(STACK_OF(X509) *chain) +{ + STACK_OF(X509) *ret; + int i; + ret = sk_X509_dup(chain); + for (i = 0; i < sk_X509_num(ret); i++) { + X509 *x = sk_X509_value(ret, i); + CRYPTO_add(&x->references, 1, CRYPTO_LOCK_X509); + } + return ret; +} diff --git a/deps/openssl/openssl/crypto/x509/x509_err.c b/deps/openssl/openssl/crypto/x509/x509_err.c index 61a19f74109b0d..43cde18e49a7a2 100644 --- a/deps/openssl/openssl/crypto/x509/x509_err.c +++ b/deps/openssl/openssl/crypto/x509/x509_err.c @@ -1,6 +1,6 @@ /* crypto/x509/x509_err.c */ /* ==================================================================== - * Copyright (c) 1999-2006 The OpenSSL Project. All rights reserved. + * Copyright (c) 1999-2012 The OpenSSL Project. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -88,6 +88,7 @@ static ERR_STRING_DATA X509_str_functs[] = { {ERR_FUNC(X509_F_X509_ATTRIBUTE_GET0_DATA), "X509_ATTRIBUTE_get0_data"}, {ERR_FUNC(X509_F_X509_ATTRIBUTE_SET1_DATA), "X509_ATTRIBUTE_set1_data"}, {ERR_FUNC(X509_F_X509_CHECK_PRIVATE_KEY), "X509_check_private_key"}, + {ERR_FUNC(X509_F_X509_CRL_DIFF), "X509_CRL_diff"}, {ERR_FUNC(X509_F_X509_CRL_PRINT_FP), "X509_CRL_print_fp"}, {ERR_FUNC(X509_F_X509_EXTENSION_CREATE_BY_NID), "X509_EXTENSION_create_by_NID"}, @@ -131,22 +132,29 @@ static ERR_STRING_DATA X509_str_functs[] = { }; static ERR_STRING_DATA X509_str_reasons[] = { + {ERR_REASON(X509_R_AKID_MISMATCH), "akid mismatch"}, {ERR_REASON(X509_R_BAD_X509_FILETYPE), "bad x509 filetype"}, {ERR_REASON(X509_R_BASE64_DECODE_ERROR), "base64 decode error"}, {ERR_REASON(X509_R_CANT_CHECK_DH_KEY), "cant check dh key"}, {ERR_REASON(X509_R_CERT_ALREADY_IN_HASH_TABLE), "cert already in hash table"}, + {ERR_REASON(X509_R_CRL_ALREADY_DELTA), "crl already delta"}, + {ERR_REASON(X509_R_CRL_VERIFY_FAILURE), "crl verify failure"}, {ERR_REASON(X509_R_ERR_ASN1_LIB), "err asn1 lib"}, + {ERR_REASON(X509_R_IDP_MISMATCH), "idp mismatch"}, {ERR_REASON(X509_R_INVALID_DIRECTORY), "invalid directory"}, {ERR_REASON(X509_R_INVALID_FIELD_NAME), "invalid field name"}, {ERR_REASON(X509_R_INVALID_TRUST), "invalid trust"}, + {ERR_REASON(X509_R_ISSUER_MISMATCH), "issuer mismatch"}, {ERR_REASON(X509_R_KEY_TYPE_MISMATCH), "key type mismatch"}, {ERR_REASON(X509_R_KEY_VALUES_MISMATCH), "key values mismatch"}, {ERR_REASON(X509_R_LOADING_CERT_DIR), "loading cert dir"}, {ERR_REASON(X509_R_LOADING_DEFAULTS), "loading defaults"}, {ERR_REASON(X509_R_METHOD_NOT_SUPPORTED), "method not supported"}, + {ERR_REASON(X509_R_NEWER_CRL_NOT_NEWER), "newer crl not newer"}, {ERR_REASON(X509_R_NO_CERT_SET_FOR_US_TO_VERIFY), "no cert set for us to verify"}, + {ERR_REASON(X509_R_NO_CRL_NUMBER), "no crl number"}, {ERR_REASON(X509_R_PUBLIC_KEY_DECODE_ERROR), "public key decode error"}, {ERR_REASON(X509_R_PUBLIC_KEY_ENCODE_ERROR), "public key encode error"}, {ERR_REASON(X509_R_SHOULD_RETRY), "should retry"}, diff --git a/deps/openssl/openssl/crypto/x509/x509_lu.c b/deps/openssl/openssl/crypto/x509/x509_lu.c index a910636f823d03..ff1fa975fd7c0b 100644 --- a/deps/openssl/openssl/crypto/x509/x509_lu.c +++ b/deps/openssl/openssl/crypto/x509/x509_lu.c @@ -236,6 +236,19 @@ void X509_STORE_free(X509_STORE *vfy) if (vfy == NULL) return; + i = CRYPTO_add(&vfy->references, -1, CRYPTO_LOCK_X509_STORE); +#ifdef REF_PRINT + REF_PRINT("X509_STORE", vfy); +#endif + if (i > 0) + return; +#ifdef REF_CHECK + if (i < 0) { + fprintf(stderr, "X509_STORE_free, bad reference count\n"); + abort(); /* ok */ + } +#endif + sk = vfy->get_cert_methods; for (i = 0; i < sk_X509_LOOKUP_num(sk); i++) { lu = sk_X509_LOOKUP_value(sk, i); @@ -679,6 +692,19 @@ void X509_STORE_set_verify_cb(X509_STORE *ctx, ctx->verify_cb = verify_cb; } +void X509_STORE_set_lookup_crls_cb(X509_STORE *ctx, + STACK_OF(X509_CRL) *(*cb) (X509_STORE_CTX + *ctx, + X509_NAME *nm)) +{ + ctx->lookup_crls = cb; +} + +X509_STORE *X509_STORE_CTX_get0_store(X509_STORE_CTX *ctx) +{ + return ctx->ctx; +} + IMPLEMENT_STACK_OF(X509_LOOKUP) IMPLEMENT_STACK_OF(X509_OBJECT) diff --git a/deps/openssl/openssl/crypto/x509/x509_set.c b/deps/openssl/openssl/crypto/x509/x509_set.c index 4645777634b756..5b802bd6c7549a 100644 --- a/deps/openssl/openssl/crypto/x509/x509_set.c +++ b/deps/openssl/openssl/crypto/x509/x509_set.c @@ -67,6 +67,11 @@ int X509_set_version(X509 *x, long version) { if (x == NULL) return (0); + if (version == 0) { + M_ASN1_INTEGER_free(x->cert_info->version); + x->cert_info->version = NULL; + return (1); + } if (x->cert_info->version == NULL) { if ((x->cert_info->version = M_ASN1_INTEGER_new()) == NULL) return (0); diff --git a/deps/openssl/openssl/crypto/x509/x509_trs.c b/deps/openssl/openssl/crypto/x509/x509_trs.c index 7e444795a5e77f..11e0763403aecc 100644 --- a/deps/openssl/openssl/crypto/x509/x509_trs.c +++ b/deps/openssl/openssl/crypto/x509/x509_trs.c @@ -119,6 +119,14 @@ int X509_check_trust(X509 *x, int id, int flags) int idx; if (id == -1) return 1; + /* We get this as a default value */ + if (id == 0) { + int rv; + rv = obj_trust(NID_anyExtendedKeyUsage, x, 0); + if (rv != X509_TRUST_UNTRUSTED) + return rv; + return trust_compat(NULL, x, 0); + } idx = X509_TRUST_get_by_id(id); if (idx == -1) return default_trust(id, x, flags); diff --git a/deps/openssl/openssl/crypto/x509/x509_txt.c b/deps/openssl/openssl/crypto/x509/x509_txt.c index d834180ec4430b..3d46d3ff8366e4 100644 --- a/deps/openssl/openssl/crypto/x509/x509_txt.c +++ b/deps/openssl/openssl/crypto/x509/x509_txt.c @@ -184,6 +184,26 @@ const char *X509_verify_cert_error_string(long n) case X509_V_ERR_CRL_PATH_VALIDATION_ERROR: return ("CRL path validation error"); + case X509_V_ERR_SUITE_B_INVALID_VERSION: + return ("Suite B: certificate version invalid"); + case X509_V_ERR_SUITE_B_INVALID_ALGORITHM: + return ("Suite B: invalid public key algorithm"); + case X509_V_ERR_SUITE_B_INVALID_CURVE: + return ("Suite B: invalid ECC curve"); + case X509_V_ERR_SUITE_B_INVALID_SIGNATURE_ALGORITHM: + return ("Suite B: invalid signature algorithm"); + case X509_V_ERR_SUITE_B_LOS_NOT_ALLOWED: + return ("Suite B: curve not allowed for this LOS"); + case X509_V_ERR_SUITE_B_CANNOT_SIGN_P_384_WITH_P_256: + return ("Suite B: cannot sign P-384 with P-256"); + + case X509_V_ERR_HOSTNAME_MISMATCH: + return ("Hostname mismatch"); + case X509_V_ERR_EMAIL_MISMATCH: + return ("Email address mismatch"); + case X509_V_ERR_IP_ADDRESS_MISMATCH: + return ("IP address mismatch"); + default: BIO_snprintf(buf, sizeof buf, "error number %ld", n); return (buf); diff --git a/deps/openssl/openssl/crypto/x509/x509_vfy.c b/deps/openssl/openssl/crypto/x509/x509_vfy.c index 136bfbda617a1d..f3e9c56b091715 100644 --- a/deps/openssl/openssl/crypto/x509/x509_vfy.c +++ b/deps/openssl/openssl/crypto/x509/x509_vfy.c @@ -69,6 +69,7 @@ #include #include #include +#include "vpm_int.h" /* CRL score values */ @@ -113,6 +114,7 @@ static int check_issued(X509_STORE_CTX *ctx, X509 *x, X509 *issuer); static X509 *find_issuer(X509_STORE_CTX *ctx, STACK_OF(X509) *sk, X509 *x); static int check_chain_extensions(X509_STORE_CTX *ctx); static int check_name_constraints(X509_STORE_CTX *ctx); +static int check_id(X509_STORE_CTX *ctx); static int check_trust(X509_STORE_CTX *ctx); static int check_revocation(X509_STORE_CTX *ctx); static int check_cert(X509_STORE_CTX *ctx); @@ -148,14 +150,48 @@ static int x509_subject_cmp(X509 **a, X509 **b) return X509_subject_name_cmp(*a, *b); } #endif +/* Return 1 is a certificate is self signed */ +static int cert_self_signed(X509 *x) +{ + X509_check_purpose(x, -1, 0); + if (x->ex_flags & EXFLAG_SS) + return 1; + else + return 0; +} + +/* Given a certificate try and find an exact match in the store */ + +static X509 *lookup_cert_match(X509_STORE_CTX *ctx, X509 *x) +{ + STACK_OF(X509) *certs; + X509 *xtmp = NULL; + int i; + /* Lookup all certs with matching subject name */ + certs = ctx->lookup_certs(ctx, X509_get_subject_name(x)); + if (certs == NULL) + return NULL; + /* Look for exact match */ + for (i = 0; i < sk_X509_num(certs); i++) { + xtmp = sk_X509_value(certs, i); + if (!X509_cmp(xtmp, x)) + break; + } + if (i < sk_X509_num(certs)) + CRYPTO_add(&xtmp->references, 1, CRYPTO_LOCK_X509); + else + xtmp = NULL; + sk_X509_pop_free(certs, X509_free); + return xtmp; +} int X509_verify_cert(X509_STORE_CTX *ctx) { - X509 *x, *xtmp, *chain_ss = NULL; + X509 *x, *xtmp, *xtmp2, *chain_ss = NULL; int bad_chain = 0; X509_VERIFY_PARAM *param = ctx->param; int depth, i, ok = 0; - int num; + int num, j, retry; int (*cb) (int xok, X509_STORE_CTX *xctx); STACK_OF(X509) *sktmp = NULL; if (ctx->cert == NULL) { @@ -199,8 +235,24 @@ int X509_verify_cert(X509_STORE_CTX *ctx) * later. */ /* If we are self signed, we break */ - if (ctx->check_issued(ctx, x, x)) + if (cert_self_signed(x)) break; + /* + * If asked see if we can find issuer in trusted store first + */ + if (ctx->param->flags & X509_V_FLAG_TRUSTED_FIRST) { + ok = ctx->get_issuer(&xtmp, ctx, x); + if (ok < 0) + return ok; + /* + * If successful for now free up cert so it will be picked up + * again later. + */ + if (ok > 0) { + X509_free(xtmp); + break; + } + } /* If we were passed a cert chain, use it first */ if (ctx->untrusted != NULL) { @@ -224,88 +276,134 @@ int X509_verify_cert(X509_STORE_CTX *ctx) break; } + /* Remember how many untrusted certs we have */ + j = num; /* * at this point, chain should contain a list of untrusted certificates. * We now need to add at least one trusted one, if possible, otherwise we * complain. */ - /* - * Examine last certificate in chain and see if it is self signed. - */ - - i = sk_X509_num(ctx->chain); - x = sk_X509_value(ctx->chain, i - 1); - if (ctx->check_issued(ctx, x, x)) { - /* we have a self signed certificate */ - if (sk_X509_num(ctx->chain) == 1) { - /* - * We have a single self signed certificate: see if we can find - * it in the store. We must have an exact match to avoid possible - * impersonation. - */ - ok = ctx->get_issuer(&xtmp, ctx, x); - if ((ok <= 0) || X509_cmp(x, xtmp)) { - ctx->error = X509_V_ERR_DEPTH_ZERO_SELF_SIGNED_CERT; - ctx->current_cert = x; - ctx->error_depth = i - 1; - if (ok == 1) - X509_free(xtmp); - bad_chain = 1; - ok = cb(0, ctx); - if (!ok) - goto end; + do { + /* + * Examine last certificate in chain and see if it is self signed. + */ + i = sk_X509_num(ctx->chain); + x = sk_X509_value(ctx->chain, i - 1); + if (cert_self_signed(x)) { + /* we have a self signed certificate */ + if (sk_X509_num(ctx->chain) == 1) { + /* + * We have a single self signed certificate: see if we can + * find it in the store. We must have an exact match to avoid + * possible impersonation. + */ + ok = ctx->get_issuer(&xtmp, ctx, x); + if ((ok <= 0) || X509_cmp(x, xtmp)) { + ctx->error = X509_V_ERR_DEPTH_ZERO_SELF_SIGNED_CERT; + ctx->current_cert = x; + ctx->error_depth = i - 1; + if (ok == 1) + X509_free(xtmp); + bad_chain = 1; + ok = cb(0, ctx); + if (!ok) + goto end; + } else { + /* + * We have a match: replace certificate with store + * version so we get any trust settings. + */ + X509_free(x); + x = xtmp; + (void)sk_X509_set(ctx->chain, i - 1, x); + ctx->last_untrusted = 0; + } } else { /* - * We have a match: replace certificate with store version so - * we get any trust settings. + * extract and save self signed certificate for later use */ - X509_free(x); - x = xtmp; - (void)sk_X509_set(ctx->chain, i - 1, x); - ctx->last_untrusted = 0; + chain_ss = sk_X509_pop(ctx->chain); + ctx->last_untrusted--; + num--; + j--; + x = sk_X509_value(ctx->chain, num - 1); } - } else { - /* - * extract and save self signed certificate for later use - */ - chain_ss = sk_X509_pop(ctx->chain); - ctx->last_untrusted--; - num--; - x = sk_X509_value(ctx->chain, num - 1); } - } - - /* We now lookup certs from the certificate store */ - for (;;) { - /* If we have enough, we break */ - if (depth < num) - break; + /* We now lookup certs from the certificate store */ + for (;;) { + /* If we have enough, we break */ + if (depth < num) + break; + /* If we are self signed, we break */ + if (cert_self_signed(x)) + break; + ok = ctx->get_issuer(&xtmp, ctx, x); - /* If we are self signed, we break */ - if (ctx->check_issued(ctx, x, x)) - break; + if (ok < 0) + return ok; + if (ok == 0) + break; + x = xtmp; + if (!sk_X509_push(ctx->chain, x)) { + X509_free(xtmp); + X509err(X509_F_X509_VERIFY_CERT, ERR_R_MALLOC_FAILURE); + return 0; + } + num++; + } - ok = ctx->get_issuer(&xtmp, ctx, x); + /* we now have our chain, lets check it... */ + i = check_trust(ctx); - if (ok < 0) - return ok; - if (ok == 0) - break; + /* If explicitly rejected error */ + if (i == X509_TRUST_REJECTED) + goto end; + /* + * If it's not explicitly trusted then check if there is an alternative + * chain that could be used. We only do this if we haven't already + * checked via TRUSTED_FIRST and the user hasn't switched off alternate + * chain checking + */ + retry = 0; + if (i != X509_TRUST_TRUSTED + && !(ctx->param->flags & X509_V_FLAG_TRUSTED_FIRST) + && !(ctx->param->flags & X509_V_FLAG_NO_ALT_CHAINS)) { + while (j-- > 1) { + xtmp2 = sk_X509_value(ctx->chain, j - 1); + ok = ctx->get_issuer(&xtmp, ctx, xtmp2); + if (ok < 0) + goto end; + /* Check if we found an alternate chain */ + if (ok > 0) { + /* + * Free up the found cert we'll add it again later + */ + X509_free(xtmp); - x = xtmp; - if (!sk_X509_push(ctx->chain, x)) { - X509_free(xtmp); - X509err(X509_F_X509_VERIFY_CERT, ERR_R_MALLOC_FAILURE); - return 0; + /* + * Dump all the certs above this point - we've found an + * alternate chain + */ + while (num > j) { + xtmp = sk_X509_pop(ctx->chain); + X509_free(xtmp); + num--; + ctx->last_untrusted--; + } + retry = 1; + break; + } + } } - num++; - } - - /* we now have our chain, lets check it... */ + } while (retry); - /* Is last certificate looked up self signed? */ - if (!ctx->check_issued(ctx, x, x)) { + /* + * If not explicitly trusted then indicate error unless it's a single + * self signed certificate in which case we've indicated an error already + * and set bad_chain == 1 + */ + if (i != X509_TRUST_TRUSTED && !bad_chain) { if ((chain_ss == NULL) || !ctx->check_issued(ctx, x, chain_ss)) { if (ctx->last_untrusted >= num) ctx->error = X509_V_ERR_UNABLE_TO_GET_ISSUER_CERT_LOCALLY; @@ -342,10 +440,7 @@ int X509_verify_cert(X509_STORE_CTX *ctx) if (!ok) goto end; - /* The chain extensions are OK: check trust */ - - if (param->trust > 0) - ok = check_trust(ctx); + ok = check_id(ctx); if (!ok) goto end; @@ -362,6 +457,16 @@ int X509_verify_cert(X509_STORE_CTX *ctx) if (!ok) goto end; + i = X509_chain_check_suiteb(&ctx->error_depth, NULL, ctx->chain, + ctx->param->flags); + if (i != X509_V_OK) { + ctx->error = i; + ctx->current_cert = sk_X509_value(ctx->chain, ctx->error_depth); + ok = cb(0, ctx); + if (!ok) + goto end; + } + /* At this point, we have a chain and need to verify it */ if (ctx->verify != NULL) ok = ctx->verify(ctx); @@ -428,7 +533,6 @@ static int check_issued(X509_STORE_CTX *ctx, X509 *x, X509 *issuer) ctx->current_cert = x; ctx->current_issuer = issuer; return ctx->verify_cb(0, ctx); - return 0; } /* Alternative lookup method: look from a STACK stored in other_ctx */ @@ -628,30 +732,97 @@ static int check_name_constraints(X509_STORE_CTX *ctx) return 1; } -static int check_trust(X509_STORE_CTX *ctx) +static int check_id_error(X509_STORE_CTX *ctx, int errcode) { -#ifdef OPENSSL_NO_CHAIN_VERIFY + ctx->error = errcode; + ctx->current_cert = ctx->cert; + ctx->error_depth = 0; + return ctx->verify_cb(0, ctx); +} + +static int check_hosts(X509 *x, X509_VERIFY_PARAM_ID *id) +{ + int i; + int n = sk_OPENSSL_STRING_num(id->hosts); + char *name; + + for (i = 0; i < n; ++i) { + name = sk_OPENSSL_STRING_value(id->hosts, i); + if (X509_check_host(x, name, 0, id->hostflags, &id->peername) > 0) + return 1; + } + return n == 0; +} + +static int check_id(X509_STORE_CTX *ctx) +{ + X509_VERIFY_PARAM *vpm = ctx->param; + X509_VERIFY_PARAM_ID *id = vpm->id; + X509 *x = ctx->cert; + if (id->hosts && check_hosts(x, id) <= 0) { + if (!check_id_error(ctx, X509_V_ERR_HOSTNAME_MISMATCH)) + return 0; + } + if (id->email && X509_check_email(x, id->email, id->emaillen, 0) <= 0) { + if (!check_id_error(ctx, X509_V_ERR_EMAIL_MISMATCH)) + return 0; + } + if (id->ip && X509_check_ip(x, id->ip, id->iplen, 0) <= 0) { + if (!check_id_error(ctx, X509_V_ERR_IP_ADDRESS_MISMATCH)) + return 0; + } return 1; -#else +} + +static int check_trust(X509_STORE_CTX *ctx) +{ int i, ok; - X509 *x; + X509 *x = NULL; int (*cb) (int xok, X509_STORE_CTX *xctx); cb = ctx->verify_cb; -/* For now just check the last certificate in the chain */ - i = sk_X509_num(ctx->chain) - 1; - x = sk_X509_value(ctx->chain, i); - ok = X509_check_trust(x, ctx->param->trust, 0); - if (ok == X509_TRUST_TRUSTED) - return 1; - ctx->error_depth = i; - ctx->current_cert = x; - if (ok == X509_TRUST_REJECTED) - ctx->error = X509_V_ERR_CERT_REJECTED; - else - ctx->error = X509_V_ERR_CERT_UNTRUSTED; - ok = cb(0, ctx); - return ok; -#endif + /* Check all trusted certificates in chain */ + for (i = ctx->last_untrusted; i < sk_X509_num(ctx->chain); i++) { + x = sk_X509_value(ctx->chain, i); + ok = X509_check_trust(x, ctx->param->trust, 0); + /* If explicitly trusted return trusted */ + if (ok == X509_TRUST_TRUSTED) + return X509_TRUST_TRUSTED; + /* + * If explicitly rejected notify callback and reject if not + * overridden. + */ + if (ok == X509_TRUST_REJECTED) { + ctx->error_depth = i; + ctx->current_cert = x; + ctx->error = X509_V_ERR_CERT_REJECTED; + ok = cb(0, ctx); + if (!ok) + return X509_TRUST_REJECTED; + } + } + /* + * If we accept partial chains and have at least one trusted certificate + * return success. + */ + if (ctx->param->flags & X509_V_FLAG_PARTIAL_CHAIN) { + X509 *mx; + if (ctx->last_untrusted < sk_X509_num(ctx->chain)) + return X509_TRUST_TRUSTED; + x = sk_X509_value(ctx->chain, 0); + mx = lookup_cert_match(ctx, x); + if (mx) { + (void)sk_X509_set(ctx->chain, 0, mx); + X509_free(x); + ctx->last_untrusted = 0; + return X509_TRUST_TRUSTED; + } + } + + /* + * If no trusted certs in chain at all return untrusted and allow + * standard (no issuer cert) etc errors to be indicated. + */ + return X509_TRUST_UNTRUSTED; } static int check_revocation(X509_STORE_CTX *ctx) @@ -1370,6 +1541,14 @@ static int check_crl(X509_STORE_CTX *ctx, X509_CRL *crl) if (!ok) goto err; } else { + int rv; + rv = X509_CRL_check_suiteb(crl, ikey, ctx->param->flags); + if (rv != X509_V_OK) { + ctx->error = rv; + ok = ctx->verify_cb(0, ctx); + if (!ok) + goto err; + } /* Verify CRL signature */ if (X509_CRL_verify(crl, ikey) <= 0) { ctx->error = X509_V_ERR_CRL_SIGNATURE_FAILURE; @@ -1526,6 +1705,10 @@ static int internal_verify(X509_STORE_CTX *ctx) if (ctx->check_issued(ctx, xi, xi)) xs = xi; else { + if (ctx->param->flags & X509_V_FLAG_PARTIAL_CHAIN) { + xs = xi; + goto check_cert; + } if (n <= 0) { ctx->error = X509_V_ERR_UNABLE_TO_VERIFY_LEAF_SIGNATURE; ctx->current_cert = xi; @@ -1571,6 +1754,7 @@ static int internal_verify(X509_STORE_CTX *ctx) xs->valid = 1; + check_cert: ok = check_cert_time(ctx, xs); if (!ok) goto end; @@ -1748,6 +1932,114 @@ int X509_get_pubkey_parameters(EVP_PKEY *pkey, STACK_OF(X509) *chain) return 1; } +/* Make a delta CRL as the diff between two full CRLs */ + +X509_CRL *X509_CRL_diff(X509_CRL *base, X509_CRL *newer, + EVP_PKEY *skey, const EVP_MD *md, unsigned int flags) +{ + X509_CRL *crl = NULL; + int i; + STACK_OF(X509_REVOKED) *revs = NULL; + /* CRLs can't be delta already */ + if (base->base_crl_number || newer->base_crl_number) { + X509err(X509_F_X509_CRL_DIFF, X509_R_CRL_ALREADY_DELTA); + return NULL; + } + /* Base and new CRL must have a CRL number */ + if (!base->crl_number || !newer->crl_number) { + X509err(X509_F_X509_CRL_DIFF, X509_R_NO_CRL_NUMBER); + return NULL; + } + /* Issuer names must match */ + if (X509_NAME_cmp(X509_CRL_get_issuer(base), X509_CRL_get_issuer(newer))) { + X509err(X509_F_X509_CRL_DIFF, X509_R_ISSUER_MISMATCH); + return NULL; + } + /* AKID and IDP must match */ + if (!crl_extension_match(base, newer, NID_authority_key_identifier)) { + X509err(X509_F_X509_CRL_DIFF, X509_R_AKID_MISMATCH); + return NULL; + } + if (!crl_extension_match(base, newer, NID_issuing_distribution_point)) { + X509err(X509_F_X509_CRL_DIFF, X509_R_IDP_MISMATCH); + return NULL; + } + /* Newer CRL number must exceed full CRL number */ + if (ASN1_INTEGER_cmp(newer->crl_number, base->crl_number) <= 0) { + X509err(X509_F_X509_CRL_DIFF, X509_R_NEWER_CRL_NOT_NEWER); + return NULL; + } + /* CRLs must verify */ + if (skey && (X509_CRL_verify(base, skey) <= 0 || + X509_CRL_verify(newer, skey) <= 0)) { + X509err(X509_F_X509_CRL_DIFF, X509_R_CRL_VERIFY_FAILURE); + return NULL; + } + /* Create new CRL */ + crl = X509_CRL_new(); + if (!crl || !X509_CRL_set_version(crl, 1)) + goto memerr; + /* Set issuer name */ + if (!X509_CRL_set_issuer_name(crl, X509_CRL_get_issuer(newer))) + goto memerr; + + if (!X509_CRL_set_lastUpdate(crl, X509_CRL_get_lastUpdate(newer))) + goto memerr; + if (!X509_CRL_set_nextUpdate(crl, X509_CRL_get_nextUpdate(newer))) + goto memerr; + + /* Set base CRL number: must be critical */ + + if (!X509_CRL_add1_ext_i2d(crl, NID_delta_crl, base->crl_number, 1, 0)) + goto memerr; + + /* + * Copy extensions across from newest CRL to delta: this will set CRL + * number to correct value too. + */ + + for (i = 0; i < X509_CRL_get_ext_count(newer); i++) { + X509_EXTENSION *ext; + ext = X509_CRL_get_ext(newer, i); + if (!X509_CRL_add_ext(crl, ext, -1)) + goto memerr; + } + + /* Go through revoked entries, copying as needed */ + + revs = X509_CRL_get_REVOKED(newer); + + for (i = 0; i < sk_X509_REVOKED_num(revs); i++) { + X509_REVOKED *rvn, *rvtmp; + rvn = sk_X509_REVOKED_value(revs, i); + /* + * Add only if not also in base. TODO: need something cleverer here + * for some more complex CRLs covering multiple CAs. + */ + if (!X509_CRL_get0_by_serial(base, &rvtmp, rvn->serialNumber)) { + rvtmp = X509_REVOKED_dup(rvn); + if (!rvtmp) + goto memerr; + if (!X509_CRL_add0_revoked(crl, rvtmp)) { + X509_REVOKED_free(rvtmp); + goto memerr; + } + } + } + /* TODO: optionally prune deleted entries */ + + if (skey && md && !X509_CRL_sign(crl, skey, md)) + goto memerr; + + return crl; + + memerr: + X509err(X509_F_X509_CRL_DIFF, ERR_R_MALLOC_FAILURE); + if (crl) + X509_CRL_free(crl); + return NULL; +} + int X509_STORE_CTX_get_ex_new_index(long argl, void *argp, CRYPTO_EX_new *new_func, CRYPTO_EX_dup *dup_func, @@ -1798,16 +2090,9 @@ STACK_OF(X509) *X509_STORE_CTX_get_chain(X509_STORE_CTX *ctx) STACK_OF(X509) *X509_STORE_CTX_get1_chain(X509_STORE_CTX *ctx) { - int i; - X509 *x; - STACK_OF(X509) *chain; - if (!ctx->chain || !(chain = sk_X509_dup(ctx->chain))) + if (!ctx->chain) return NULL; - for (i = 0; i < sk_X509_num(chain); i++) { - x = sk_X509_value(chain, i); - CRYPTO_add(&x->references, 1, CRYPTO_LOCK_X509); - } - return chain; + return X509_chain_up_ref(ctx->chain); } X509 *X509_STORE_CTX_get0_current_issuer(X509_STORE_CTX *ctx) diff --git a/deps/openssl/openssl/crypto/x509/x509_vfy.h b/deps/openssl/openssl/crypto/x509/x509_vfy.h index 1f8c0eccbf3478..4b236c06143b69 100644 --- a/deps/openssl/openssl/crypto/x509/x509_vfy.h +++ b/deps/openssl/openssl/crypto/x509/x509_vfy.h @@ -156,6 +156,8 @@ typedef struct x509_lookup_method_st { X509_OBJECT *ret); } X509_LOOKUP_METHOD; +typedef struct X509_VERIFY_PARAM_ID_st X509_VERIFY_PARAM_ID; + /* * This structure hold all parameters associated with a verify operation by * including an X509_VERIFY_PARAM structure in related structures the @@ -171,6 +173,7 @@ typedef struct X509_VERIFY_PARAM_st { int trust; /* trust setting to check */ int depth; /* Verify depth */ STACK_OF(ASN1_OBJECT) *policies; /* Permissible policies */ + X509_VERIFY_PARAM_ID *id; /* opaque ID data */ } X509_VERIFY_PARAM; DECLARE_STACK_OF(X509_VERIFY_PARAM) @@ -370,6 +373,19 @@ void X509_STORE_CTX_set_depth(X509_STORE_CTX *ctx, int depth); # define X509_V_ERR_UNSUPPORTED_NAME_SYNTAX 53 # define X509_V_ERR_CRL_PATH_VALIDATION_ERROR 54 +/* Suite B mode algorithm violation */ +# define X509_V_ERR_SUITE_B_INVALID_VERSION 56 +# define X509_V_ERR_SUITE_B_INVALID_ALGORITHM 57 +# define X509_V_ERR_SUITE_B_INVALID_CURVE 58 +# define X509_V_ERR_SUITE_B_INVALID_SIGNATURE_ALGORITHM 59 +# define X509_V_ERR_SUITE_B_LOS_NOT_ALLOWED 60 +# define X509_V_ERR_SUITE_B_CANNOT_SIGN_P_384_WITH_P_256 61 + +/* Host, email and IP check errors */ +# define X509_V_ERR_HOSTNAME_MISMATCH 62 +# define X509_V_ERR_EMAIL_MISMATCH 63 +# define X509_V_ERR_IP_ADDRESS_MISMATCH 64 + /* The application is not happy */ # define X509_V_ERR_APPLICATION_VERIFICATION 50 @@ -405,6 +421,23 @@ void X509_STORE_CTX_set_depth(X509_STORE_CTX *ctx, int depth); # define X509_V_FLAG_USE_DELTAS 0x2000 /* Check selfsigned CA signature */ # define X509_V_FLAG_CHECK_SS_SIGNATURE 0x4000 +/* Use trusted store first */ +# define X509_V_FLAG_TRUSTED_FIRST 0x8000 +/* Suite B 128 bit only mode: not normally used */ +# define X509_V_FLAG_SUITEB_128_LOS_ONLY 0x10000 +/* Suite B 192 bit only mode */ +# define X509_V_FLAG_SUITEB_192_LOS 0x20000 +/* Suite B 128 bit mode allowing 192 bit algorithms */ +# define X509_V_FLAG_SUITEB_128_LOS 0x30000 + +/* Allow partial chains if at least one certificate is in trusted store */ +# define X509_V_FLAG_PARTIAL_CHAIN 0x80000 +/* + * If the initial chain is not trusted, do not attempt to build an alternative + * chain. Alternate chain checking was introduced in 1.1.0. Setting this flag + * will force the behaviour to match that of previous versions. + */ +# define X509_V_FLAG_NO_ALT_CHAINS 0x100000 # define X509_VP_FLAG_DEFAULT 0x1 # define X509_VP_FLAG_OVERWRITE 0x2 @@ -439,6 +472,11 @@ int X509_STORE_set1_param(X509_STORE *ctx, X509_VERIFY_PARAM *pm); void X509_STORE_set_verify_cb(X509_STORE *ctx, int (*verify_cb) (int, X509_STORE_CTX *)); +void X509_STORE_set_lookup_crls_cb(X509_STORE *ctx, + STACK_OF(X509_CRL) *(*cb) (X509_STORE_CTX + *ctx, + X509_NAME *nm)); + X509_STORE_CTX *X509_STORE_CTX_new(void); int X509_STORE_CTX_get1_issuer(X509 **issuer, X509_STORE_CTX *ctx, X509 *x); @@ -449,6 +487,8 @@ int X509_STORE_CTX_init(X509_STORE_CTX *ctx, X509_STORE *store, void X509_STORE_CTX_trusted_stack(X509_STORE_CTX *ctx, STACK_OF(X509) *sk); void X509_STORE_CTX_cleanup(X509_STORE_CTX *ctx); +X509_STORE *X509_STORE_CTX_get0_store(X509_STORE_CTX *ctx); + X509_LOOKUP *X509_STORE_add_lookup(X509_STORE *v, X509_LOOKUP_METHOD *m); X509_LOOKUP_METHOD *X509_LOOKUP_hash_dir(void); @@ -546,9 +586,27 @@ int X509_VERIFY_PARAM_add0_policy(X509_VERIFY_PARAM *param, ASN1_OBJECT *policy); int X509_VERIFY_PARAM_set1_policies(X509_VERIFY_PARAM *param, STACK_OF(ASN1_OBJECT) *policies); + +int X509_VERIFY_PARAM_set1_host(X509_VERIFY_PARAM *param, + const char *name, size_t namelen); +int X509_VERIFY_PARAM_add1_host(X509_VERIFY_PARAM *param, + const char *name, size_t namelen); +void X509_VERIFY_PARAM_set_hostflags(X509_VERIFY_PARAM *param, + unsigned int flags); +char *X509_VERIFY_PARAM_get0_peername(X509_VERIFY_PARAM *); +int X509_VERIFY_PARAM_set1_email(X509_VERIFY_PARAM *param, + const char *email, size_t emaillen); +int X509_VERIFY_PARAM_set1_ip(X509_VERIFY_PARAM *param, + const unsigned char *ip, size_t iplen); +int X509_VERIFY_PARAM_set1_ip_asc(X509_VERIFY_PARAM *param, + const char *ipasc); + int X509_VERIFY_PARAM_get_depth(const X509_VERIFY_PARAM *param); +const char *X509_VERIFY_PARAM_get0_name(const X509_VERIFY_PARAM *param); int X509_VERIFY_PARAM_add0_table(X509_VERIFY_PARAM *param); +int X509_VERIFY_PARAM_get_count(void); +const X509_VERIFY_PARAM *X509_VERIFY_PARAM_get0(int id); const X509_VERIFY_PARAM *X509_VERIFY_PARAM_lookup(const char *name); void X509_VERIFY_PARAM_table_cleanup(void); diff --git a/deps/openssl/openssl/crypto/x509/x509_vpm.c b/deps/openssl/openssl/crypto/x509/x509_vpm.c index d0543662c08c92..322239401e004b 100644 --- a/deps/openssl/openssl/crypto/x509/x509_vpm.c +++ b/deps/openssl/openssl/crypto/x509/x509_vpm.c @@ -66,10 +66,73 @@ #include #include +#include "vpm_int.h" + /* X509_VERIFY_PARAM functions */ +#define SET_HOST 0 +#define ADD_HOST 1 + +static char *str_copy(const char *s) +{ + return OPENSSL_strdup(s); +} + +static void str_free(char *s) +{ + OPENSSL_free(s); +} + +#define string_stack_free(sk) sk_OPENSSL_STRING_pop_free(sk, str_free) + +static int int_x509_param_set_hosts(X509_VERIFY_PARAM_ID *id, int mode, + const char *name, size_t namelen) +{ + char *copy; + + /* + * Refuse names with embedded NUL bytes, except perhaps as final byte. + * XXX: Do we need to push an error onto the error stack? + */ + if (namelen == 0) + namelen = name ? strlen(name) : 0; + else if (name && memchr(name, '\0', namelen > 1 ? namelen - 1 : namelen)) + return 0; + if (name && name[namelen - 1] == '\0') + --namelen; + + if (mode == SET_HOST && id->hosts) { + string_stack_free(id->hosts); + id->hosts = NULL; + } + if (name == NULL || namelen == 0) + return 1; + + copy = BUF_strndup(name, namelen); + if (copy == NULL) + return 0; + + if (id->hosts == NULL && + (id->hosts = sk_OPENSSL_STRING_new_null()) == NULL) { + OPENSSL_free(copy); + return 0; + } + + if (!sk_OPENSSL_STRING_push(id->hosts, copy)) { + OPENSSL_free(copy); + if (sk_OPENSSL_STRING_num(id->hosts) == 0) { + sk_OPENSSL_STRING_free(id->hosts); + id->hosts = NULL; + } + return 0; + } + + return 1; +} + static void x509_verify_param_zero(X509_VERIFY_PARAM *param) { + X509_VERIFY_PARAM_ID *paramid; if (!param) return; param->name = NULL; @@ -85,15 +148,41 @@ static void x509_verify_param_zero(X509_VERIFY_PARAM *param) sk_ASN1_OBJECT_pop_free(param->policies, ASN1_OBJECT_free); param->policies = NULL; } + paramid = param->id; + if (paramid->hosts) { + string_stack_free(paramid->hosts); + paramid->hosts = NULL; + } + if (paramid->peername) + OPENSSL_free(paramid->peername); + if (paramid->email) { + OPENSSL_free(paramid->email); + paramid->email = NULL; + paramid->emaillen = 0; + } + if (paramid->ip) { + OPENSSL_free(paramid->ip); + paramid->ip = NULL; + paramid->iplen = 0; + } + } X509_VERIFY_PARAM *X509_VERIFY_PARAM_new(void) { X509_VERIFY_PARAM *param; + X509_VERIFY_PARAM_ID *paramid; param = OPENSSL_malloc(sizeof(X509_VERIFY_PARAM)); if (!param) return NULL; + paramid = OPENSSL_malloc(sizeof(X509_VERIFY_PARAM)); + if (!paramid) { + OPENSSL_free(param); + return NULL; + } memset(param, 0, sizeof(X509_VERIFY_PARAM)); + memset(paramid, 0, sizeof(X509_VERIFY_PARAM_ID)); + param->id = paramid; x509_verify_param_zero(param); return param; } @@ -101,6 +190,7 @@ X509_VERIFY_PARAM *X509_VERIFY_PARAM_new(void) void X509_VERIFY_PARAM_free(X509_VERIFY_PARAM *param) { x509_verify_param_zero(param); + OPENSSL_free(param->id); OPENSSL_free(param); } @@ -142,6 +232,11 @@ void X509_VERIFY_PARAM_free(X509_VERIFY_PARAM *param) (to_overwrite || \ ((src->field != def) && (to_default || (dest->field == def)))) +/* As above but for ID fields */ + +#define test_x509_verify_param_copy_id(idf, def) \ + test_x509_verify_param_copy(id->idf, def) + /* Macro to test and copy a field if necessary */ #define x509_verify_param_copy(field, def) \ @@ -153,8 +248,10 @@ int X509_VERIFY_PARAM_inherit(X509_VERIFY_PARAM *dest, { unsigned long inh_flags; int to_default, to_overwrite; + X509_VERIFY_PARAM_ID *id; if (!src) return 1; + id = src->id; inh_flags = dest->inh_flags | src->inh_flags; if (inh_flags & X509_VP_FLAG_ONCE) @@ -195,6 +292,31 @@ int X509_VERIFY_PARAM_inherit(X509_VERIFY_PARAM *dest, return 0; } + /* Copy the host flags if and only if we're copying the host list */ + if (test_x509_verify_param_copy_id(hosts, NULL)) { + if (dest->id->hosts) { + string_stack_free(dest->id->hosts); + dest->id->hosts = NULL; + } + if (id->hosts) { + dest->id->hosts = + sk_OPENSSL_STRING_deep_copy(id->hosts, str_copy, str_free); + if (dest->id->hosts == NULL) + return 0; + dest->id->hostflags = id->hostflags; + } + } + + if (test_x509_verify_param_copy_id(email, NULL)) { + if (!X509_VERIFY_PARAM_set1_email(dest, id->email, id->emaillen)) + return 0; + } + + if (test_x509_verify_param_copy_id(ip, NULL)) { + if (!X509_VERIFY_PARAM_set1_ip(dest, id->ip, id->iplen)) + return 0; + } + return 1; } @@ -209,6 +331,30 @@ int X509_VERIFY_PARAM_set1(X509_VERIFY_PARAM *to, return ret; } +static int int_x509_param_set1(char **pdest, size_t *pdestlen, + const char *src, size_t srclen) +{ + void *tmp; + if (src) { + if (srclen == 0) { + tmp = BUF_strdup(src); + srclen = strlen(src); + } else + tmp = BUF_memdup(src, srclen); + if (!tmp) + return 0; + } else { + tmp = NULL; + srclen = 0; + } + if (*pdest) + OPENSSL_free(*pdest); + *pdest = tmp; + if (pdestlen) + *pdestlen = srclen; + return 1; +} + int X509_VERIFY_PARAM_set1_name(X509_VERIFY_PARAM *param, const char *name) { if (param->name) @@ -306,11 +452,70 @@ int X509_VERIFY_PARAM_set1_policies(X509_VERIFY_PARAM *param, return 1; } +int X509_VERIFY_PARAM_set1_host(X509_VERIFY_PARAM *param, + const char *name, size_t namelen) +{ + return int_x509_param_set_hosts(param->id, SET_HOST, name, namelen); +} + +int X509_VERIFY_PARAM_add1_host(X509_VERIFY_PARAM *param, + const char *name, size_t namelen) +{ + return int_x509_param_set_hosts(param->id, ADD_HOST, name, namelen); +} + +void X509_VERIFY_PARAM_set_hostflags(X509_VERIFY_PARAM *param, + unsigned int flags) +{ + param->id->hostflags = flags; +} + +char *X509_VERIFY_PARAM_get0_peername(X509_VERIFY_PARAM *param) +{ + return param->id->peername; +} + +int X509_VERIFY_PARAM_set1_email(X509_VERIFY_PARAM *param, + const char *email, size_t emaillen) +{ + return int_x509_param_set1(¶m->id->email, ¶m->id->emaillen, + email, emaillen); +} + +int X509_VERIFY_PARAM_set1_ip(X509_VERIFY_PARAM *param, + const unsigned char *ip, size_t iplen) +{ + if (iplen != 0 && iplen != 4 && iplen != 16) + return 0; + return int_x509_param_set1((char **)¶m->id->ip, ¶m->id->iplen, + (char *)ip, iplen); +} + +int X509_VERIFY_PARAM_set1_ip_asc(X509_VERIFY_PARAM *param, const char *ipasc) +{ + unsigned char ipout[16]; + size_t iplen; + + iplen = (size_t)a2i_ipadd(ipout, ipasc); + if (iplen == 0) + return 0; + return X509_VERIFY_PARAM_set1_ip(param, ipout, iplen); +} + int X509_VERIFY_PARAM_get_depth(const X509_VERIFY_PARAM *param) { return param->depth; } +const char *X509_VERIFY_PARAM_get0_name(const X509_VERIFY_PARAM *param) +{ + return param->name; +} + +static X509_VERIFY_PARAM_ID _empty_id = { NULL, 0U, NULL, NULL, 0, NULL, 0 }; + +#define vpm_empty_id (X509_VERIFY_PARAM_ID *)&_empty_id + /* * Default verify parameters: these are used for various applications and can * be overridden by the user specified table. NB: the 'name' field *must* be @@ -326,8 +531,8 @@ static const X509_VERIFY_PARAM default_table[] = { 0, /* purpose */ 0, /* trust */ 100, /* depth */ - NULL /* policies */ - }, + NULL, /* policies */ + vpm_empty_id}, { "pkcs7", /* S/MIME sign parameters */ 0, /* Check time */ @@ -336,8 +541,8 @@ static const X509_VERIFY_PARAM default_table[] = { X509_PURPOSE_SMIME_SIGN, /* purpose */ X509_TRUST_EMAIL, /* trust */ -1, /* depth */ - NULL /* policies */ - }, + NULL, /* policies */ + vpm_empty_id}, { "smime_sign", /* S/MIME sign parameters */ 0, /* Check time */ @@ -346,8 +551,8 @@ static const X509_VERIFY_PARAM default_table[] = { X509_PURPOSE_SMIME_SIGN, /* purpose */ X509_TRUST_EMAIL, /* trust */ -1, /* depth */ - NULL /* policies */ - }, + NULL, /* policies */ + vpm_empty_id}, { "ssl_client", /* SSL/TLS client parameters */ 0, /* Check time */ @@ -356,8 +561,8 @@ static const X509_VERIFY_PARAM default_table[] = { X509_PURPOSE_SSL_CLIENT, /* purpose */ X509_TRUST_SSL_CLIENT, /* trust */ -1, /* depth */ - NULL /* policies */ - }, + NULL, /* policies */ + vpm_empty_id}, { "ssl_server", /* SSL/TLS server parameters */ 0, /* Check time */ @@ -366,8 +571,8 @@ static const X509_VERIFY_PARAM default_table[] = { X509_PURPOSE_SSL_SERVER, /* purpose */ X509_TRUST_SSL_SERVER, /* trust */ -1, /* depth */ - NULL /* policies */ - } + NULL, /* policies */ + vpm_empty_id} }; static STACK_OF(X509_VERIFY_PARAM) *param_table = NULL; @@ -407,6 +612,22 @@ int X509_VERIFY_PARAM_add0_table(X509_VERIFY_PARAM *param) return 1; } +int X509_VERIFY_PARAM_get_count(void) +{ + int num = sizeof(default_table) / sizeof(X509_VERIFY_PARAM); + if (param_table) + num += sk_X509_VERIFY_PARAM_num(param_table); + return num; +} + +const X509_VERIFY_PARAM *X509_VERIFY_PARAM_get0(int id) +{ + int num = sizeof(default_table) / sizeof(X509_VERIFY_PARAM); + if (id < num) + return default_table + id; + return sk_X509_VERIFY_PARAM_value(param_table, id - num); +} + const X509_VERIFY_PARAM *X509_VERIFY_PARAM_lookup(const char *name) { int idx; diff --git a/deps/openssl/openssl/crypto/x509/x_all.c b/deps/openssl/openssl/crypto/x509/x_all.c index 43152e933fa1b0..0f26c546d835fd 100644 --- a/deps/openssl/openssl/crypto/x509/x_all.c +++ b/deps/openssl/openssl/crypto/x509/x_all.c @@ -63,6 +63,7 @@ #include #include #include +#include #ifndef OPENSSL_NO_RSA # include #endif @@ -105,6 +106,12 @@ int X509_sign_ctx(X509 *x, EVP_MD_CTX *ctx) x->sig_alg, x->signature, x->cert_info, ctx); } +int X509_http_nbio(OCSP_REQ_CTX *rctx, X509 **pcert) +{ + return OCSP_REQ_CTX_nbio_d2i(rctx, + (ASN1_VALUE **)pcert, ASN1_ITEM_rptr(X509)); +} + int X509_REQ_sign(X509_REQ *x, EVP_PKEY *pkey, const EVP_MD *md) { return (ASN1_item_sign(ASN1_ITEM_rptr(X509_REQ_INFO), x->sig_alg, NULL, @@ -133,6 +140,13 @@ int X509_CRL_sign_ctx(X509_CRL *x, EVP_MD_CTX *ctx) x->crl, ctx); } +int X509_CRL_http_nbio(OCSP_REQ_CTX *rctx, X509_CRL **pcrl) +{ + return OCSP_REQ_CTX_nbio_d2i(rctx, + (ASN1_VALUE **)pcrl, + ASN1_ITEM_rptr(X509_CRL)); +} + int NETSCAPE_SPKI_sign(NETSCAPE_SPKI *x, EVP_PKEY *pkey, const EVP_MD *md) { return (ASN1_item_sign(ASN1_ITEM_rptr(NETSCAPE_SPKAC), x->sig_algor, NULL, diff --git a/deps/openssl/openssl/crypto/x509v3/Makefile b/deps/openssl/openssl/crypto/x509v3/Makefile index 556ef351bf888c..cdbfd524039215 100644 --- a/deps/openssl/openssl/crypto/x509v3/Makefile +++ b/deps/openssl/openssl/crypto/x509v3/Makefile @@ -13,7 +13,7 @@ AR= ar r CFLAGS= $(INCLUDES) $(CFLAG) GENERAL=Makefile README -TEST= +TEST=v3nametest.c APPS= LIB=$(TOP)/libcrypto.a @@ -22,13 +22,13 @@ v3_prn.c v3_utl.c v3err.c v3_genn.c v3_alt.c v3_skey.c v3_akey.c v3_pku.c \ v3_int.c v3_enum.c v3_sxnet.c v3_cpols.c v3_crld.c v3_purp.c v3_info.c \ v3_ocsp.c v3_akeya.c v3_pmaps.c v3_pcons.c v3_ncons.c v3_pcia.c v3_pci.c \ pcy_cache.c pcy_node.c pcy_data.c pcy_map.c pcy_tree.c pcy_lib.c \ -v3_asid.c v3_addr.c +v3_asid.c v3_addr.c v3_scts.c LIBOBJ= v3_bcons.o v3_bitst.o v3_conf.o v3_extku.o v3_ia5.o v3_lib.o \ v3_prn.o v3_utl.o v3err.o v3_genn.o v3_alt.o v3_skey.o v3_akey.o v3_pku.o \ v3_int.o v3_enum.o v3_sxnet.o v3_cpols.o v3_crld.o v3_purp.o v3_info.o \ v3_ocsp.o v3_akeya.o v3_pmaps.o v3_pcons.o v3_ncons.o v3_pcia.o v3_pci.o \ pcy_cache.o pcy_node.o pcy_data.o pcy_map.o pcy_tree.o pcy_lib.o \ -v3_asid.o v3_addr.o +v3_asid.o v3_addr.o v3_scts.o SRC= $(LIBSRC) @@ -533,6 +533,28 @@ v3_purp.o: ../../include/openssl/sha.h ../../include/openssl/stack.h v3_purp.o: ../../include/openssl/symhacks.h ../../include/openssl/x509.h v3_purp.o: ../../include/openssl/x509_vfy.h ../../include/openssl/x509v3.h v3_purp.o: ../cryptlib.h v3_purp.c +v3_scts.o: ../../e_os.h ../../include/openssl/asn1.h +v3_scts.o: ../../include/openssl/bio.h ../../include/openssl/buffer.h +v3_scts.o: ../../include/openssl/comp.h ../../include/openssl/conf.h +v3_scts.o: ../../include/openssl/crypto.h ../../include/openssl/dsa.h +v3_scts.o: ../../include/openssl/dtls1.h ../../include/openssl/e_os2.h +v3_scts.o: ../../include/openssl/ec.h ../../include/openssl/ecdh.h +v3_scts.o: ../../include/openssl/ecdsa.h ../../include/openssl/err.h +v3_scts.o: ../../include/openssl/evp.h ../../include/openssl/hmac.h +v3_scts.o: ../../include/openssl/kssl.h ../../include/openssl/lhash.h +v3_scts.o: ../../include/openssl/obj_mac.h ../../include/openssl/objects.h +v3_scts.o: ../../include/openssl/opensslconf.h ../../include/openssl/opensslv.h +v3_scts.o: ../../include/openssl/ossl_typ.h ../../include/openssl/pem.h +v3_scts.o: ../../include/openssl/pem2.h ../../include/openssl/pkcs7.h +v3_scts.o: ../../include/openssl/pqueue.h ../../include/openssl/rsa.h +v3_scts.o: ../../include/openssl/safestack.h ../../include/openssl/sha.h +v3_scts.o: ../../include/openssl/srtp.h ../../include/openssl/ssl.h +v3_scts.o: ../../include/openssl/ssl2.h ../../include/openssl/ssl23.h +v3_scts.o: ../../include/openssl/ssl3.h ../../include/openssl/stack.h +v3_scts.o: ../../include/openssl/symhacks.h ../../include/openssl/tls1.h +v3_scts.o: ../../include/openssl/x509.h ../../include/openssl/x509_vfy.h +v3_scts.o: ../../include/openssl/x509v3.h ../../ssl/ssl_locl.h ../cryptlib.h +v3_scts.o: v3_scts.c v3_skey.o: ../../e_os.h ../../include/openssl/asn1.h v3_skey.o: ../../include/openssl/bio.h ../../include/openssl/buffer.h v3_skey.o: ../../include/openssl/conf.h ../../include/openssl/crypto.h diff --git a/deps/openssl/openssl/crypto/x509v3/ext_dat.h b/deps/openssl/openssl/crypto/x509v3/ext_dat.h index 136b1f8ecfba64..c3a6fce7524f4c 100644 --- a/deps/openssl/openssl/crypto/x509v3/ext_dat.h +++ b/deps/openssl/openssl/crypto/x509v3/ext_dat.h @@ -69,6 +69,7 @@ extern X509V3_EXT_METHOD v3_crl_hold, v3_pci; extern X509V3_EXT_METHOD v3_policy_mappings, v3_policy_constraints; extern X509V3_EXT_METHOD v3_name_constraints, v3_inhibit_anyp, v3_idp; extern X509V3_EXT_METHOD v3_addr, v3_asid; +extern X509V3_EXT_METHOD v3_ct_scts[]; /* * This table will be searched using OBJ_bsearch so it *must* kept in order @@ -126,6 +127,8 @@ static const X509V3_EXT_METHOD *standard_exts[] = { &v3_idp, &v3_alt[2], &v3_freshest_crl, + &v3_ct_scts[0], + &v3_ct_scts[1], }; /* Number of standard extensions */ diff --git a/deps/openssl/openssl/crypto/x509v3/v3_lib.c b/deps/openssl/openssl/crypto/x509v3/v3_lib.c index b5598c9a3e9508..8350429aafbe31 100644 --- a/deps/openssl/openssl/crypto/x509v3/v3_lib.c +++ b/deps/openssl/openssl/crypto/x509v3/v3_lib.c @@ -122,6 +122,28 @@ const X509V3_EXT_METHOD *X509V3_EXT_get(X509_EXTENSION *ext) return X509V3_EXT_get_nid(nid); } +int X509V3_EXT_free(int nid, void *ext_data) +{ + const X509V3_EXT_METHOD *ext_method = X509V3_EXT_get_nid(nid); + if (ext_method == NULL) { + X509V3err(X509V3_F_X509V3_EXT_FREE, + X509V3_R_CANNOT_FIND_FREE_FUNCTION); + return 0; + } + + if (ext_method->it != NULL) + ASN1_item_free(ext_data, ASN1_ITEM_ptr(ext_method->it)); + else if (ext_method->ext_free != NULL) + ext_method->ext_free(ext_data); + else { + X509V3err(X509V3_F_X509V3_EXT_FREE, + X509V3_R_CANNOT_FIND_FREE_FUNCTION); + return 0; + } + + return 1; +} + int X509V3_EXT_add_list(X509V3_EXT_METHOD *extlist) { for (; extlist->ext_nid != -1; extlist++) diff --git a/deps/openssl/openssl/crypto/x509v3/v3_purp.c b/deps/openssl/openssl/crypto/x509v3/v3_purp.c index 0d9bc58ce75a5c..36b0d87a0d8bb5 100644 --- a/deps/openssl/openssl/crypto/x509v3/v3_purp.c +++ b/deps/openssl/openssl/crypto/x509v3/v3_purp.c @@ -395,9 +395,6 @@ static void x509v3_cache_extensions(X509 *x) #ifndef OPENSSL_NO_SHA X509_digest(x, EVP_sha1(), x->sha1_hash, NULL); #endif - /* Does subject name match issuer ? */ - if (!X509_NAME_cmp(X509_get_subject_name(x), X509_get_issuer_name(x))) - x->ex_flags |= EXFLAG_SI; /* V1 should mean no extensions ... */ if (!X509_get_version(x)) x->ex_flags |= EXFLAG_V1; @@ -479,6 +476,10 @@ static void x509v3_cache_extensions(X509 *x) case NID_dvcs: x->ex_xkusage |= XKU_DVCS; break; + + case NID_anyExtendedKeyUsage: + x->ex_xkusage |= XKU_ANYEKU; + break; } } sk_ASN1_OBJECT_pop_free(extusage, ASN1_OBJECT_free); @@ -494,6 +495,13 @@ static void x509v3_cache_extensions(X509 *x) } x->skid = X509_get_ext_d2i(x, NID_subject_key_identifier, NULL, NULL); x->akid = X509_get_ext_d2i(x, NID_authority_key_identifier, NULL, NULL); + /* Does subject name match issuer ? */ + if (!X509_NAME_cmp(X509_get_subject_name(x), X509_get_issuer_name(x))) { + x->ex_flags |= EXFLAG_SI; + /* If SKID matches AKID also indicate self signed */ + if (X509_check_akid(x, x->akid) == X509_V_OK) + x->ex_flags |= EXFLAG_SS; + } x->altname = X509_get_ext_d2i(x, NID_subject_alt_name, NULL, NULL); x->nc = X509_get_ext_d2i(x, NID_name_constraints, &i, NULL); if (!x->nc && (i != -1)) @@ -598,8 +606,8 @@ static int check_purpose_ssl_client(const X509_PURPOSE *xp, const X509 *x, return 0; if (ca) return check_ssl_ca(x); - /* We need to do digital signatures with it */ - if (ku_reject(x, KU_DIGITAL_SIGNATURE)) + /* We need to do digital signatures or key agreement */ + if (ku_reject(x, KU_DIGITAL_SIGNATURE | KU_KEY_AGREEMENT)) return 0; /* nsCertType if present should allow SSL client use */ if (ns_reject(x, NS_SSL_CLIENT)) @@ -607,6 +615,14 @@ static int check_purpose_ssl_client(const X509_PURPOSE *xp, const X509 *x, return 1; } +/* + * Key usage needed for TLS/SSL server: digital signature, encipherment or + * key agreement. The ssl code can check this more thoroughly for individual + * key types. + */ +#define KU_TLS \ + KU_DIGITAL_SIGNATURE|KU_KEY_ENCIPHERMENT|KU_KEY_AGREEMENT + static int check_purpose_ssl_server(const X509_PURPOSE *xp, const X509 *x, int ca) { @@ -617,8 +633,7 @@ static int check_purpose_ssl_server(const X509_PURPOSE *xp, const X509 *x, if (ns_reject(x, NS_SSL_SERVER)) return 0; - /* Now as for keyUsage: we'll at least need to sign OR encipher */ - if (ku_reject(x, KU_DIGITAL_SIGNATURE | KU_KEY_ENCIPHERMENT)) + if (ku_reject(x, KU_TLS)) return 0; return 1; diff --git a/deps/openssl/openssl/crypto/x509v3/v3_scts.c b/deps/openssl/openssl/crypto/x509v3/v3_scts.c new file mode 100644 index 00000000000000..9a4c3eba0bdd5f --- /dev/null +++ b/deps/openssl/openssl/crypto/x509v3/v3_scts.c @@ -0,0 +1,323 @@ +/* v3_scts.c */ +/* + * Written by Rob Stradling (rob@comodo.com) for the OpenSSL project 2014. + */ +/* ==================================================================== + * Copyright (c) 2014 The OpenSSL Project. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgment: + * "This product includes software developed by the OpenSSL Project + * for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)" + * + * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to + * endorse or promote products derived from this software without + * prior written permission. For written permission, please contact + * licensing@OpenSSL.org. + * + * 5. Products derived from this software may not be called "OpenSSL" + * nor may "OpenSSL" appear in their names without prior written + * permission of the OpenSSL Project. + * + * 6. Redistributions of any form whatsoever must retain the following + * acknowledgment: + * "This product includes software developed by the OpenSSL Project + * for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)" + * + * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY + * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + * ==================================================================== + * + * This product includes cryptographic software written by Eric Young + * (eay@cryptsoft.com). This product includes software written by Tim + * Hudson (tjh@cryptsoft.com). + * + */ + +#include +#include "cryptlib.h" +#include +#include +#include "../ssl/ssl_locl.h" + +#if (defined(_WIN32) || defined(_WIN64)) && !defined(__MINGW32__) +# define SCT_TIMESTAMP unsigned __int64 +#elif defined(__arch64__) +# define SCT_TIMESTAMP unsigned long +#else +# define SCT_TIMESTAMP unsigned long long +#endif + +#define n2l8(c,l) (l =((SCT_TIMESTAMP)(*((c)++)))<<56, \ + l|=((SCT_TIMESTAMP)(*((c)++)))<<48, \ + l|=((SCT_TIMESTAMP)(*((c)++)))<<40, \ + l|=((SCT_TIMESTAMP)(*((c)++)))<<32, \ + l|=((SCT_TIMESTAMP)(*((c)++)))<<24, \ + l|=((SCT_TIMESTAMP)(*((c)++)))<<16, \ + l|=((SCT_TIMESTAMP)(*((c)++)))<< 8, \ + l|=((SCT_TIMESTAMP)(*((c)++)))) + +typedef struct SCT_st { + /* The encoded SCT */ + unsigned char *sct; + unsigned short sctlen; + /* + * Components of the SCT. "logid", "ext" and "sig" point to addresses + * inside "sct". + */ + unsigned char version; + unsigned char *logid; + unsigned short logidlen; + SCT_TIMESTAMP timestamp; + unsigned char *ext; + unsigned short extlen; + unsigned char hash_alg; + unsigned char sig_alg; + unsigned char *sig; + unsigned short siglen; +} SCT; + +DECLARE_STACK_OF(SCT) + +static void SCT_LIST_free(STACK_OF(SCT) *a); +static STACK_OF(SCT) *d2i_SCT_LIST(STACK_OF(SCT) **a, + const unsigned char **pp, long length); +static int i2r_SCT_LIST(X509V3_EXT_METHOD *method, STACK_OF(SCT) *sct_list, + BIO *out, int indent); + +const X509V3_EXT_METHOD v3_ct_scts[] = { + {NID_ct_precert_scts, 0, NULL, + 0, (X509V3_EXT_FREE)SCT_LIST_free, + (X509V3_EXT_D2I)d2i_SCT_LIST, 0, + 0, 0, 0, 0, + (X509V3_EXT_I2R)i2r_SCT_LIST, 0, + NULL}, + + {NID_ct_cert_scts, 0, NULL, + 0, (X509V3_EXT_FREE)SCT_LIST_free, + (X509V3_EXT_D2I)d2i_SCT_LIST, 0, + 0, 0, 0, 0, + (X509V3_EXT_I2R)i2r_SCT_LIST, 0, + NULL}, +}; + +static void tls12_signature_print(BIO *out, const unsigned char hash_alg, + const unsigned char sig_alg) +{ + int nid = NID_undef; + /* RFC6962 only permits two signature algorithms */ + if (hash_alg == TLSEXT_hash_sha256) { + if (sig_alg == TLSEXT_signature_rsa) + nid = NID_sha256WithRSAEncryption; + else if (sig_alg == TLSEXT_signature_ecdsa) + nid = NID_ecdsa_with_SHA256; + } + if (nid == NID_undef) + BIO_printf(out, "%02X%02X", hash_alg, sig_alg); + else + BIO_printf(out, "%s", OBJ_nid2ln(nid)); +} + +static void timestamp_print(BIO *out, SCT_TIMESTAMP timestamp) +{ + ASN1_GENERALIZEDTIME *gen; + char genstr[20]; + gen = ASN1_GENERALIZEDTIME_new(); + ASN1_GENERALIZEDTIME_adj(gen, (time_t)0, + (int)(timestamp / 86400000), + (timestamp % 86400000) / 1000); + /* + * Note GeneralizedTime from ASN1_GENERALIZETIME_adj is always 15 + * characters long with a final Z. Update it with fractional seconds. + */ + BIO_snprintf(genstr, sizeof(genstr), "%.14s.%03dZ", + ASN1_STRING_data(gen), (unsigned int)(timestamp % 1000)); + ASN1_GENERALIZEDTIME_set_string(gen, genstr); + ASN1_GENERALIZEDTIME_print(out, gen); + ASN1_GENERALIZEDTIME_free(gen); +} + +static void SCT_free(SCT *sct) +{ + if (sct) { + if (sct->sct) + OPENSSL_free(sct->sct); + OPENSSL_free(sct); + } +} + +static void SCT_LIST_free(STACK_OF(SCT) *a) +{ + sk_SCT_pop_free(a, SCT_free); +} + +static STACK_OF(SCT) *d2i_SCT_LIST(STACK_OF(SCT) **a, + const unsigned char **pp, long length) +{ + ASN1_OCTET_STRING *oct = NULL; + STACK_OF(SCT) *sk = NULL; + SCT *sct; + unsigned char *p, *p2; + unsigned short listlen, sctlen = 0, fieldlen; + + if (d2i_ASN1_OCTET_STRING(&oct, pp, length) == NULL) + return NULL; + if (oct->length < 2) + goto done; + p = oct->data; + n2s(p, listlen); + if (listlen != oct->length - 2) + goto done; + + if ((sk = sk_SCT_new_null()) == NULL) + goto done; + + while (listlen > 0) { + if (listlen < 2) + goto err; + n2s(p, sctlen); + listlen -= 2; + + if ((sctlen < 1) || (sctlen > listlen)) + goto err; + listlen -= sctlen; + + sct = OPENSSL_malloc(sizeof(SCT)); + if (!sct) + goto err; + if (!sk_SCT_push(sk, sct)) { + OPENSSL_free(sct); + goto err; + } + + sct->sct = OPENSSL_malloc(sctlen); + if (!sct->sct) + goto err; + memcpy(sct->sct, p, sctlen); + sct->sctlen = sctlen; + p += sctlen; + p2 = sct->sct; + + sct->version = *p2++; + if (sct->version == 0) { /* SCT v1 */ + /*- + * Fixed-length header: + * struct { + * (1 byte) Version sct_version; + * (32 bytes) LogID id; + * (8 bytes) uint64 timestamp; + * (2 bytes + ?) CtExtensions extensions; + */ + if (sctlen < 43) + goto err; + sctlen -= 43; + + sct->logid = p2; + sct->logidlen = 32; + p2 += 32; + + n2l8(p2, sct->timestamp); + + n2s(p2, fieldlen); + if (sctlen < fieldlen) + goto err; + sct->ext = p2; + sct->extlen = fieldlen; + p2 += fieldlen; + sctlen -= fieldlen; + + /*- + * digitally-signed struct header: + * (1 byte) Hash algorithm + * (1 byte) Signature algorithm + * (2 bytes + ?) Signature + */ + if (sctlen < 4) + goto err; + sctlen -= 4; + + sct->hash_alg = *p2++; + sct->sig_alg = *p2++; + n2s(p2, fieldlen); + if (sctlen != fieldlen) + goto err; + sct->sig = p2; + sct->siglen = fieldlen; + } + } + + done: + ASN1_OCTET_STRING_free(oct); + return sk; + + err: + SCT_LIST_free(sk); + sk = NULL; + goto done; +} + +static int i2r_SCT_LIST(X509V3_EXT_METHOD *method, STACK_OF(SCT) *sct_list, + BIO *out, int indent) +{ + SCT *sct; + int i; + + for (i = 0; i < sk_SCT_num(sct_list);) { + sct = sk_SCT_value(sct_list, i); + + BIO_printf(out, "%*sSigned Certificate Timestamp:", indent, ""); + BIO_printf(out, "\n%*sVersion : ", indent + 4, ""); + + if (sct->version == 0) { /* SCT v1 */ + BIO_printf(out, "v1(0)"); + + BIO_printf(out, "\n%*sLog ID : ", indent + 4, ""); + BIO_hex_string(out, indent + 16, 16, sct->logid, sct->logidlen); + + BIO_printf(out, "\n%*sTimestamp : ", indent + 4, ""); + timestamp_print(out, sct->timestamp); + + BIO_printf(out, "\n%*sExtensions: ", indent + 4, ""); + if (sct->extlen == 0) + BIO_printf(out, "none"); + else + BIO_hex_string(out, indent + 16, 16, sct->ext, sct->extlen); + + BIO_printf(out, "\n%*sSignature : ", indent + 4, ""); + tls12_signature_print(out, sct->hash_alg, sct->sig_alg); + BIO_printf(out, "\n%*s ", indent + 4, ""); + BIO_hex_string(out, indent + 16, 16, sct->sig, sct->siglen); + } else { /* Unknown version */ + + BIO_printf(out, "unknown\n%*s", indent + 16, ""); + BIO_hex_string(out, indent + 16, 16, sct->sct, sct->sctlen); + } + + if (++i < sk_SCT_num(sct_list)) + BIO_printf(out, "\n"); + } + + return 1; +} diff --git a/deps/openssl/openssl/crypto/x509v3/v3_utl.c b/deps/openssl/openssl/crypto/x509v3/v3_utl.c index 65dd1e2fb35937..ed6099e120b261 100644 --- a/deps/openssl/openssl/crypto/x509v3/v3_utl.c +++ b/deps/openssl/openssl/crypto/x509v3/v3_utl.c @@ -628,6 +628,433 @@ void X509_email_free(STACK_OF(OPENSSL_STRING) *sk) sk_OPENSSL_STRING_pop_free(sk, str_free); } +typedef int (*equal_fn) (const unsigned char *pattern, size_t pattern_len, + const unsigned char *subject, size_t subject_len, + unsigned int flags); + +/* Skip pattern prefix to match "wildcard" subject */ +static void skip_prefix(const unsigned char **p, size_t *plen, + const unsigned char *subject, size_t subject_len, + unsigned int flags) +{ + const unsigned char *pattern = *p; + size_t pattern_len = *plen; + + /* + * If subject starts with a leading '.' followed by more octets, and + * pattern is longer, compare just an equal-length suffix with the + * full subject (starting at the '.'), provided the prefix contains + * no NULs. + */ + if ((flags & _X509_CHECK_FLAG_DOT_SUBDOMAINS) == 0) + return; + + while (pattern_len > subject_len && *pattern) { + if ((flags & X509_CHECK_FLAG_SINGLE_LABEL_SUBDOMAINS) && + *pattern == '.') + break; + ++pattern; + --pattern_len; + } + + /* Skip if entire prefix acceptable */ + if (pattern_len == subject_len) { + *p = pattern; + *plen = pattern_len; + } +} + +/* Compare while ASCII ignoring case. */ +static int equal_nocase(const unsigned char *pattern, size_t pattern_len, + const unsigned char *subject, size_t subject_len, + unsigned int flags) +{ + skip_prefix(&pattern, &pattern_len, subject, subject_len, flags); + if (pattern_len != subject_len) + return 0; + while (pattern_len) { + unsigned char l = *pattern; + unsigned char r = *subject; + /* The pattern must not contain NUL characters. */ + if (l == 0) + return 0; + if (l != r) { + if ('A' <= l && l <= 'Z') + l = (l - 'A') + 'a'; + if ('A' <= r && r <= 'Z') + r = (r - 'A') + 'a'; + if (l != r) + return 0; + } + ++pattern; + ++subject; + --pattern_len; + } + return 1; +} + +/* Compare using memcmp. */ +static int equal_case(const unsigned char *pattern, size_t pattern_len, + const unsigned char *subject, size_t subject_len, + unsigned int flags) +{ + skip_prefix(&pattern, &pattern_len, subject, subject_len, flags); + if (pattern_len != subject_len) + return 0; + return !memcmp(pattern, subject, pattern_len); +} + +/* + * RFC 5280, section 7.5, requires that only the domain is compared in a + * case-insensitive manner. + */ +static int equal_email(const unsigned char *a, size_t a_len, + const unsigned char *b, size_t b_len, + unsigned int unused_flags) +{ + size_t i = a_len; + if (a_len != b_len) + return 0; + /* + * We search backwards for the '@' character, so that we do not have to + * deal with quoted local-parts. The domain part is compared in a + * case-insensitive manner. + */ + while (i > 0) { + --i; + if (a[i] == '@' || b[i] == '@') { + if (!equal_nocase(a + i, a_len - i, b + i, a_len - i, 0)) + return 0; + break; + } + } + if (i == 0) + i = a_len; + return equal_case(a, i, b, i, 0); +} + +/* + * Compare the prefix and suffix with the subject, and check that the + * characters in-between are valid. + */ +static int wildcard_match(const unsigned char *prefix, size_t prefix_len, + const unsigned char *suffix, size_t suffix_len, + const unsigned char *subject, size_t subject_len, + unsigned int flags) +{ + const unsigned char *wildcard_start; + const unsigned char *wildcard_end; + const unsigned char *p; + int allow_multi = 0; + int allow_idna = 0; + + if (subject_len < prefix_len + suffix_len) + return 0; + if (!equal_nocase(prefix, prefix_len, subject, prefix_len, flags)) + return 0; + wildcard_start = subject + prefix_len; + wildcard_end = subject + (subject_len - suffix_len); + if (!equal_nocase(wildcard_end, suffix_len, suffix, suffix_len, flags)) + return 0; + /* + * If the wildcard makes up the entire first label, it must match at + * least one character. + */ + if (prefix_len == 0 && *suffix == '.') { + if (wildcard_start == wildcard_end) + return 0; + allow_idna = 1; + if (flags & X509_CHECK_FLAG_MULTI_LABEL_WILDCARDS) + allow_multi = 1; + } + /* IDNA labels cannot match partial wildcards */ + if (!allow_idna && + subject_len >= 4 && strncasecmp((char *)subject, "xn--", 4) == 0) + return 0; + /* The wildcard may match a literal '*' */ + if (wildcard_end == wildcard_start + 1 && *wildcard_start == '*') + return 1; + /* + * Check that the part matched by the wildcard contains only + * permitted characters and only matches a single label unless + * allow_multi is set. + */ + for (p = wildcard_start; p != wildcard_end; ++p) + if (!(('0' <= *p && *p <= '9') || + ('A' <= *p && *p <= 'Z') || + ('a' <= *p && *p <= 'z') || + *p == '-' || (allow_multi && *p == '.'))) + return 0; + return 1; +} + +#define LABEL_START (1 << 0) +#define LABEL_END (1 << 1) +#define LABEL_HYPHEN (1 << 2) +#define LABEL_IDNA (1 << 3) + +static const unsigned char *valid_star(const unsigned char *p, size_t len, + unsigned int flags) +{ + const unsigned char *star = 0; + size_t i; + int state = LABEL_START; + int dots = 0; + for (i = 0; i < len; ++i) { + /* + * Locate first and only legal wildcard, either at the start + * or end of a non-IDNA first and not final label. + */ + if (p[i] == '*') { + int atstart = (state & LABEL_START); + int atend = (i == len - 1 || p[i + i] == '.'); + /*- + * At most one wildcard per pattern. + * No wildcards in IDNA labels. + * No wildcards after the first label. + */ + if (star != NULL || (state & LABEL_IDNA) != 0 || dots) + return NULL; + /* Only full-label '*.example.com' wildcards? */ + if ((flags & X509_CHECK_FLAG_NO_PARTIAL_WILDCARDS) + && (!atstart || !atend)) + return NULL; + /* No 'foo*bar' wildcards */ + if (!atstart && !atend) + return NULL; + star = &p[i]; + state &= ~LABEL_START; + } else if (('a' <= p[i] && p[i] <= 'z') + || ('A' <= p[i] && p[i] <= 'Z') + || ('0' <= p[i] && p[i] <= '9')) { + if ((state & LABEL_START) != 0 + && len - i >= 4 && strncasecmp((char *)&p[i], "xn--", 4) == 0) + state |= LABEL_IDNA; + state &= ~(LABEL_HYPHEN | LABEL_START); + } else if (p[i] == '.') { + if ((state & (LABEL_HYPHEN | LABEL_START)) != 0) + return NULL; + state = LABEL_START; + ++dots; + } else if (p[i] == '-') { + if ((state & LABEL_HYPHEN) != 0) + return NULL; + state |= LABEL_HYPHEN; + } else + return NULL; + } + + /* + * The final label must not end in a hyphen or ".", and + * there must be at least two dots after the star. + */ + if ((state & (LABEL_START | LABEL_HYPHEN)) != 0 || dots < 2) + return NULL; + return star; +} + +/* Compare using wildcards. */ +static int equal_wildcard(const unsigned char *pattern, size_t pattern_len, + const unsigned char *subject, size_t subject_len, + unsigned int flags) +{ + const unsigned char *star = NULL; + + /* + * Subject names starting with '.' can only match a wildcard pattern + * via a subject sub-domain pattern suffix match. + */ + if (!(subject_len > 1 && subject[0] == '.')) + star = valid_star(pattern, pattern_len, flags); + if (star == NULL) + return equal_nocase(pattern, pattern_len, + subject, subject_len, flags); + return wildcard_match(pattern, star - pattern, + star + 1, (pattern + pattern_len) - star - 1, + subject, subject_len, flags); +} + +/* + * Compare an ASN1_STRING to a supplied string. If they match return 1. If + * cmp_type > 0 only compare if string matches the type, otherwise convert it + * to UTF8. + */ + +static int do_check_string(ASN1_STRING *a, int cmp_type, equal_fn equal, + unsigned int flags, const char *b, size_t blen, + char **peername) +{ + int rv = 0; + + if (!a->data || !a->length) + return 0; + if (cmp_type > 0) { + if (cmp_type != a->type) + return 0; + if (cmp_type == V_ASN1_IA5STRING) + rv = equal(a->data, a->length, (unsigned char *)b, blen, flags); + else if (a->length == (int)blen && !memcmp(a->data, b, blen)) + rv = 1; + if (rv > 0 && peername) + *peername = BUF_strndup((char *)a->data, a->length); + } else { + int astrlen; + unsigned char *astr; + astrlen = ASN1_STRING_to_UTF8(&astr, a); + if (astrlen < 0) { + /* + * -1 could be an internal malloc failure or a decoding error from + * malformed input; we can't distinguish. + */ + return -1; + } + rv = equal(astr, astrlen, (unsigned char *)b, blen, flags); + if (rv > 0 && peername) + *peername = BUF_strndup((char *)astr, astrlen); + OPENSSL_free(astr); + } + return rv; +} + +static int do_x509_check(X509 *x, const char *chk, size_t chklen, + unsigned int flags, int check_type, char **peername) +{ + GENERAL_NAMES *gens = NULL; + X509_NAME *name = NULL; + int i; + int cnid; + int alt_type; + int san_present = 0; + int rv = 0; + equal_fn equal; + + /* See below, this flag is internal-only */ + flags &= ~_X509_CHECK_FLAG_DOT_SUBDOMAINS; + if (check_type == GEN_EMAIL) { + cnid = NID_pkcs9_emailAddress; + alt_type = V_ASN1_IA5STRING; + equal = equal_email; + } else if (check_type == GEN_DNS) { + cnid = NID_commonName; + /* Implicit client-side DNS sub-domain pattern */ + if (chklen > 1 && chk[0] == '.') + flags |= _X509_CHECK_FLAG_DOT_SUBDOMAINS; + alt_type = V_ASN1_IA5STRING; + if (flags & X509_CHECK_FLAG_NO_WILDCARDS) + equal = equal_nocase; + else + equal = equal_wildcard; + } else { + cnid = 0; + alt_type = V_ASN1_OCTET_STRING; + equal = equal_case; + } + + if (chklen == 0) + chklen = strlen(chk); + + gens = X509_get_ext_d2i(x, NID_subject_alt_name, NULL, NULL); + if (gens) { + for (i = 0; i < sk_GENERAL_NAME_num(gens); i++) { + GENERAL_NAME *gen; + ASN1_STRING *cstr; + gen = sk_GENERAL_NAME_value(gens, i); + if (gen->type != check_type) + continue; + san_present = 1; + if (check_type == GEN_EMAIL) + cstr = gen->d.rfc822Name; + else if (check_type == GEN_DNS) + cstr = gen->d.dNSName; + else + cstr = gen->d.iPAddress; + /* Positive on success, negative on error! */ + if ((rv = do_check_string(cstr, alt_type, equal, flags, + chk, chklen, peername)) != 0) + break; + } + GENERAL_NAMES_free(gens); + if (rv != 0) + return rv; + if (!cnid + || (san_present + && !(flags & X509_CHECK_FLAG_ALWAYS_CHECK_SUBJECT))) + return 0; + } + i = -1; + name = X509_get_subject_name(x); + while ((i = X509_NAME_get_index_by_NID(name, cnid, i)) >= 0) { + X509_NAME_ENTRY *ne; + ASN1_STRING *str; + ne = X509_NAME_get_entry(name, i); + str = X509_NAME_ENTRY_get_data(ne); + /* Positive on success, negative on error! */ + if ((rv = do_check_string(str, -1, equal, flags, + chk, chklen, peername)) != 0) + return rv; + } + return 0; +} + +int X509_check_host(X509 *x, const char *chk, size_t chklen, + unsigned int flags, char **peername) +{ + if (chk == NULL) + return -2; + /* + * Embedded NULs are disallowed, except as the last character of a + * string of length 2 or more (tolerate caller including terminating + * NUL in string length). + */ + if (chklen == 0) + chklen = strlen(chk); + else if (memchr(chk, '\0', chklen > 1 ? chklen - 1 : chklen)) + return -2; + if (chklen > 1 && chk[chklen - 1] == '\0') + --chklen; + return do_x509_check(x, chk, chklen, flags, GEN_DNS, peername); +} + +int X509_check_email(X509 *x, const char *chk, size_t chklen, + unsigned int flags) +{ + if (chk == NULL) + return -2; + /* + * Embedded NULs are disallowed, except as the last character of a + * string of length 2 or more (tolerate caller including terminating + * NUL in string length). + */ + if (chklen == 0) + chklen = strlen((char *)chk); + else if (memchr(chk, '\0', chklen > 1 ? chklen - 1 : chklen)) + return -2; + if (chklen > 1 && chk[chklen - 1] == '\0') + --chklen; + return do_x509_check(x, chk, chklen, flags, GEN_EMAIL, NULL); +} + +int X509_check_ip(X509 *x, const unsigned char *chk, size_t chklen, + unsigned int flags) +{ + if (chk == NULL) + return -2; + return do_x509_check(x, (char *)chk, chklen, flags, GEN_IPADD, NULL); +} + +int X509_check_ip_asc(X509 *x, const char *ipasc, unsigned int flags) +{ + unsigned char ipout[16]; + size_t iplen; + + if (ipasc == NULL) + return -2; + iplen = (size_t)a2i_ipadd(ipout, ipasc); + if (iplen == 0) + return -2; + return do_x509_check(x, (char *)ipout, iplen, flags, GEN_IPADD, NULL); +} + /* * Convert IP addresses both IPv4 and IPv6 into an OCTET STRING compatible * with RFC3280. diff --git a/deps/openssl/openssl/crypto/x509v3/v3err.c b/deps/openssl/openssl/crypto/x509v3/v3err.c index 0138f7a8462aca..bcc1be722e4269 100644 --- a/deps/openssl/openssl/crypto/x509v3/v3err.c +++ b/deps/openssl/openssl/crypto/x509v3/v3err.c @@ -1,6 +1,6 @@ /* crypto/x509v3/v3err.c */ /* ==================================================================== - * Copyright (c) 1999-2007 The OpenSSL Project. All rights reserved. + * Copyright (c) 1999-2014 The OpenSSL Project. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -70,7 +70,7 @@ # define ERR_REASON(reason) ERR_PACK(ERR_LIB_X509V3,0,reason) static ERR_STRING_DATA X509V3_str_functs[] = { - {ERR_FUNC(X509V3_F_A2I_GENERAL_NAME), "A2I_GENERAL_NAME"}, + {ERR_FUNC(X509V3_F_A2I_GENERAL_NAME), "a2i_GENERAL_NAME"}, {ERR_FUNC(X509V3_F_ASIDENTIFIERCHOICE_CANONIZE), "ASIDENTIFIERCHOICE_CANONIZE"}, {ERR_FUNC(X509V3_F_ASIDENTIFIERCHOICE_IS_CANONICAL), @@ -132,6 +132,7 @@ static ERR_STRING_DATA X509V3_str_functs[] = { {ERR_FUNC(X509V3_F_X509V3_EXT_ADD), "X509V3_EXT_add"}, {ERR_FUNC(X509V3_F_X509V3_EXT_ADD_ALIAS), "X509V3_EXT_add_alias"}, {ERR_FUNC(X509V3_F_X509V3_EXT_CONF), "X509V3_EXT_conf"}, + {ERR_FUNC(X509V3_F_X509V3_EXT_FREE), "X509V3_EXT_free"}, {ERR_FUNC(X509V3_F_X509V3_EXT_I2D), "X509V3_EXT_i2d"}, {ERR_FUNC(X509V3_F_X509V3_EXT_NCONF), "X509V3_EXT_nconf"}, {ERR_FUNC(X509V3_F_X509V3_GET_SECTION), "X509V3_get_section"}, @@ -149,6 +150,8 @@ static ERR_STRING_DATA X509V3_str_reasons[] = { {ERR_REASON(X509V3_R_BN_DEC2BN_ERROR), "bn dec2bn error"}, {ERR_REASON(X509V3_R_BN_TO_ASN1_INTEGER_ERROR), "bn to asn1 integer error"}, + {ERR_REASON(X509V3_R_CANNOT_FIND_FREE_FUNCTION), + "cannot find free function"}, {ERR_REASON(X509V3_R_DIRNAME_ERROR), "dirname error"}, {ERR_REASON(X509V3_R_DISTPOINT_ALREADY_SET), "distpoint already set"}, {ERR_REASON(X509V3_R_DUPLICATE_ZONE_ID), "duplicate zone id"}, @@ -167,7 +170,6 @@ static ERR_STRING_DATA X509V3_str_reasons[] = { {ERR_REASON(X509V3_R_ILLEGAL_HEX_DIGIT), "illegal hex digit"}, {ERR_REASON(X509V3_R_INCORRECT_POLICY_SYNTAX_TAG), "incorrect policy syntax tag"}, - {ERR_REASON(X509V3_R_INVALID_MULTIPLE_RDNS), "invalid multiple rdns"}, {ERR_REASON(X509V3_R_INVALID_ASNUMBER), "invalid asnumber"}, {ERR_REASON(X509V3_R_INVALID_ASRANGE), "invalid asrange"}, {ERR_REASON(X509V3_R_INVALID_BOOLEAN_STRING), "invalid boolean string"}, @@ -175,6 +177,7 @@ static ERR_STRING_DATA X509V3_str_reasons[] = { "invalid extension string"}, {ERR_REASON(X509V3_R_INVALID_INHERITANCE), "invalid inheritance"}, {ERR_REASON(X509V3_R_INVALID_IPADDRESS), "invalid ipaddress"}, + {ERR_REASON(X509V3_R_INVALID_MULTIPLE_RDNS), "invalid multiple rdns"}, {ERR_REASON(X509V3_R_INVALID_NAME), "invalid name"}, {ERR_REASON(X509V3_R_INVALID_NULL_ARGUMENT), "invalid null argument"}, {ERR_REASON(X509V3_R_INVALID_NULL_NAME), "invalid null name"}, diff --git a/deps/openssl/openssl/crypto/x509v3/v3nametest.c b/deps/openssl/openssl/crypto/x509v3/v3nametest.c new file mode 100644 index 00000000000000..7b5c1c8e51270f --- /dev/null +++ b/deps/openssl/openssl/crypto/x509v3/v3nametest.c @@ -0,0 +1,336 @@ +#include +#include +#include "../e_os.h" +#include + +static const char *const names[] = { + "a", "b", ".", "*", "@", + ".a", "a.", ".b", "b.", ".*", "*.", "*@", "@*", "a@", "@a", "b@", "..", + "@@", "**", "*.com", "*com", "*.*.com", "*com", "com*", "*example.com", + "*@example.com", "test@*.example.com", "example.com", "www.example.com", + "test.www.example.com", "*.example.com", "*.www.example.com", + "test.*.example.com", "www.*.com", + ".www.example.com", "*www.example.com", + "example.net", "xn--rger-koa.example.com", + "a.example.com", "b.example.com", + "postmaster@example.com", "Postmaster@example.com", + "postmaster@EXAMPLE.COM", + NULL +}; + +static const char *const exceptions[] = { + "set CN: host: [*.example.com] matches [a.example.com]", + "set CN: host: [*.example.com] matches [b.example.com]", + "set CN: host: [*.example.com] matches [www.example.com]", + "set CN: host: [*.example.com] matches [xn--rger-koa.example.com]", + "set CN: host: [*.www.example.com] matches [test.www.example.com]", + "set CN: host: [*.www.example.com] matches [.www.example.com]", + "set CN: host: [*www.example.com] matches [www.example.com]", + "set CN: host: [test.www.example.com] matches [.www.example.com]", + "set CN: host-no-wildcards: [*.www.example.com] matches [.www.example.com]", + "set CN: host-no-wildcards: [test.www.example.com] matches [.www.example.com]", + "set emailAddress: email: [postmaster@example.com] does not match [Postmaster@example.com]", + "set emailAddress: email: [postmaster@EXAMPLE.COM] does not match [Postmaster@example.com]", + "set emailAddress: email: [Postmaster@example.com] does not match [postmaster@example.com]", + "set emailAddress: email: [Postmaster@example.com] does not match [postmaster@EXAMPLE.COM]", + "set dnsName: host: [*.example.com] matches [www.example.com]", + "set dnsName: host: [*.example.com] matches [a.example.com]", + "set dnsName: host: [*.example.com] matches [b.example.com]", + "set dnsName: host: [*.example.com] matches [xn--rger-koa.example.com]", + "set dnsName: host: [*.www.example.com] matches [test.www.example.com]", + "set dnsName: host-no-wildcards: [*.www.example.com] matches [.www.example.com]", + "set dnsName: host-no-wildcards: [test.www.example.com] matches [.www.example.com]", + "set dnsName: host: [*.www.example.com] matches [.www.example.com]", + "set dnsName: host: [*www.example.com] matches [www.example.com]", + "set dnsName: host: [test.www.example.com] matches [.www.example.com]", + "set rfc822Name: email: [postmaster@example.com] does not match [Postmaster@example.com]", + "set rfc822Name: email: [Postmaster@example.com] does not match [postmaster@example.com]", + "set rfc822Name: email: [Postmaster@example.com] does not match [postmaster@EXAMPLE.COM]", + "set rfc822Name: email: [postmaster@EXAMPLE.COM] does not match [Postmaster@example.com]", + NULL +}; + +static int is_exception(const char *msg) +{ + const char *const *p; + for (p = exceptions; *p; ++p) + if (strcmp(msg, *p) == 0) + return 1; + return 0; +} + +static int set_cn(X509 *crt, ...) +{ + int ret = 0; + X509_NAME *n = NULL; + va_list ap; + va_start(ap, crt); + n = X509_NAME_new(); + if (n == NULL) + goto out; + while (1) { + int nid; + const char *name; + nid = va_arg(ap, int); + if (nid == 0) + break; + name = va_arg(ap, const char *); + if (!X509_NAME_add_entry_by_NID(n, nid, MBSTRING_ASC, + (unsigned char *)name, -1, -1, 1)) + goto out; + } + if (!X509_set_subject_name(crt, n)) + goto out; + ret = 1; + out: + X509_NAME_free(n); + va_end(ap); + return ret; +} + +/*- +int X509_add_ext(X509 *x, X509_EXTENSION *ex, int loc); +X509_EXTENSION *X509_EXTENSION_create_by_NID(X509_EXTENSION **ex, + int nid, int crit, ASN1_OCTET_STRING *data); +int X509_add_ext(X509 *x, X509_EXTENSION *ex, int loc); +*/ + +static int set_altname(X509 *crt, ...) +{ + int ret = 0; + GENERAL_NAMES *gens = NULL; + GENERAL_NAME *gen = NULL; + ASN1_IA5STRING *ia5 = NULL; + va_list ap; + va_start(ap, crt); + gens = sk_GENERAL_NAME_new_null(); + if (gens == NULL) + goto out; + while (1) { + int type; + const char *name; + type = va_arg(ap, int); + if (type == 0) + break; + name = va_arg(ap, const char *); + + gen = GENERAL_NAME_new(); + if (gen == NULL) + goto out; + ia5 = ASN1_IA5STRING_new(); + if (ia5 == NULL) + goto out; + if (!ASN1_STRING_set(ia5, name, -1)) + goto out; + switch (type) { + case GEN_EMAIL: + case GEN_DNS: + GENERAL_NAME_set0_value(gen, type, ia5); + ia5 = NULL; + break; + default: + abort(); + } + sk_GENERAL_NAME_push(gens, gen); + gen = NULL; + } + if (!X509_add1_ext_i2d(crt, NID_subject_alt_name, gens, 0, 0)) + goto out; + ret = 1; + out: + ASN1_IA5STRING_free(ia5); + GENERAL_NAME_free(gen); + GENERAL_NAMES_free(gens); + va_end(ap); + return ret; +} + +static int set_cn1(X509 *crt, const char *name) +{ + return set_cn(crt, NID_commonName, name, 0); +} + +static int set_cn_and_email(X509 *crt, const char *name) +{ + return set_cn(crt, NID_commonName, name, + NID_pkcs9_emailAddress, "dummy@example.com", 0); +} + +static int set_cn2(X509 *crt, const char *name) +{ + return set_cn(crt, NID_commonName, "dummy value", + NID_commonName, name, 0); +} + +static int set_cn3(X509 *crt, const char *name) +{ + return set_cn(crt, NID_commonName, name, + NID_commonName, "dummy value", 0); +} + +static int set_email1(X509 *crt, const char *name) +{ + return set_cn(crt, NID_pkcs9_emailAddress, name, 0); +} + +static int set_email2(X509 *crt, const char *name) +{ + return set_cn(crt, NID_pkcs9_emailAddress, "dummy@example.com", + NID_pkcs9_emailAddress, name, 0); +} + +static int set_email3(X509 *crt, const char *name) +{ + return set_cn(crt, NID_pkcs9_emailAddress, name, + NID_pkcs9_emailAddress, "dummy@example.com", 0); +} + +static int set_email_and_cn(X509 *crt, const char *name) +{ + return set_cn(crt, NID_pkcs9_emailAddress, name, + NID_commonName, "www.example.org", 0); +} + +static int set_altname_dns(X509 *crt, const char *name) +{ + return set_altname(crt, GEN_DNS, name, 0); +} + +static int set_altname_email(X509 *crt, const char *name) +{ + return set_altname(crt, GEN_EMAIL, name, 0); +} + +struct set_name_fn { + int (*fn) (X509 *, const char *); + const char *name; + int host; + int email; +}; + +static const struct set_name_fn name_fns[] = { + {set_cn1, "set CN", 1, 0}, + {set_cn2, "set CN", 1, 0}, + {set_cn3, "set CN", 1, 0}, + {set_cn_and_email, "set CN", 1, 0}, + {set_email1, "set emailAddress", 0, 1}, + {set_email2, "set emailAddress", 0, 1}, + {set_email3, "set emailAddress", 0, 1}, + {set_email_and_cn, "set emailAddress", 0, 1}, + {set_altname_dns, "set dnsName", 1, 0}, + {set_altname_email, "set rfc822Name", 0, 1}, + {NULL, NULL, 0} +}; + +static X509 *make_cert() +{ + X509 *ret = NULL; + X509 *crt = NULL; + X509_NAME *issuer = NULL; + crt = X509_new(); + if (crt == NULL) + goto out; + if (!X509_set_version(crt, 3)) + goto out; + ret = crt; + crt = NULL; + out: + X509_NAME_free(issuer); + return ret; +} + +static int errors; + +static void check_message(const struct set_name_fn *fn, const char *op, + const char *nameincert, int match, const char *name) +{ + char msg[1024]; + if (match < 0) + return; + BIO_snprintf(msg, sizeof(msg), "%s: %s: [%s] %s [%s]", + fn->name, op, nameincert, + match ? "matches" : "does not match", name); + if (is_exception(msg)) + return; + puts(msg); + ++errors; +} + +static void run_cert(X509 *crt, const char *nameincert, + const struct set_name_fn *fn) +{ + const char *const *pname = names; + while (*pname) { + int samename = strcasecmp(nameincert, *pname) == 0; + size_t namelen = strlen(*pname); + char *name = malloc(namelen); + int match, ret; + memcpy(name, *pname, namelen); + + ret = X509_check_host(crt, name, namelen, 0, NULL); + match = -1; + if (ret < 0) { + fprintf(stderr, "internal error in X509_check_host"); + ++errors; + } else if (fn->host) { + if (ret == 1 && !samename) + match = 1; + if (ret == 0 && samename) + match = 0; + } else if (ret == 1) + match = 1; + check_message(fn, "host", nameincert, match, *pname); + + ret = X509_check_host(crt, name, namelen, + X509_CHECK_FLAG_NO_WILDCARDS, NULL); + match = -1; + if (ret < 0) { + fprintf(stderr, "internal error in X509_check_host"); + ++errors; + } else if (fn->host) { + if (ret == 1 && !samename) + match = 1; + if (ret == 0 && samename) + match = 0; + } else if (ret == 1) + match = 1; + check_message(fn, "host-no-wildcards", nameincert, match, *pname); + + ret = X509_check_email(crt, name, namelen, 0); + match = -1; + if (fn->email) { + if (ret && !samename) + match = 1; + if (!ret && samename && strchr(nameincert, '@') != NULL) + match = 0; + } else if (ret) + match = 1; + check_message(fn, "email", nameincert, match, *pname); + ++pname; + free(name); + } +} + +int main(void) +{ + const struct set_name_fn *pfn = name_fns; + while (pfn->name) { + const char *const *pname = names; + while (*pname) { + X509 *crt = make_cert(); + if (crt == NULL) { + fprintf(stderr, "make_cert failed\n"); + return 1; + } + if (!pfn->fn(crt, *pname)) { + fprintf(stderr, "X509 name setting failed\n"); + return 1; + } + run_cert(crt, *pname, pfn); + X509_free(crt); + ++pname; + } + ++pfn; + } + return errors > 0 ? 1 : 0; +} diff --git a/deps/openssl/openssl/crypto/x509v3/x509v3.h b/deps/openssl/openssl/crypto/x509v3/x509v3.h index db9c3e8bde6e35..f5c61560aa10ba 100644 --- a/deps/openssl/openssl/crypto/x509v3/x509v3.h +++ b/deps/openssl/openssl/crypto/x509v3/x509v3.h @@ -67,6 +67,13 @@ extern "C" { #endif +# ifdef OPENSSL_SYS_WIN32 +/* Under Win32 these are defined in wincrypt.h */ +# undef X509_NAME +# undef X509_CERT_PAIR +# undef X509_EXTENSIONS +# endif + /* Forward reference */ struct v3_ext_method; struct v3_ext_ctx; @@ -405,7 +412,6 @@ struct ISSUING_DIST_POINT_st { # define EXFLAG_CA 0x10 /* Really self issued not necessarily self signed */ # define EXFLAG_SI 0x20 -# define EXFLAG_SS 0x20 # define EXFLAG_V1 0x40 # define EXFLAG_INVALID 0x80 # define EXFLAG_SET 0x100 @@ -414,6 +420,8 @@ struct ISSUING_DIST_POINT_st { # define EXFLAG_INVALID_POLICY 0x800 # define EXFLAG_FRESHEST 0x1000 +/* Self signed */ +# define EXFLAG_SS 0x2000 # define KU_DIGITAL_SIGNATURE 0x0080 # define KU_NON_REPUDIATION 0x0040 @@ -442,6 +450,7 @@ struct ISSUING_DIST_POINT_st { # define XKU_OCSP_SIGN 0x20 # define XKU_TIMESTAMP 0x40 # define XKU_DVCS 0x80 +# define XKU_ANYEKU 0x100 # define X509_PURPOSE_DYNAMIC 0x1 # define X509_PURPOSE_DYNAMIC_NAME 0x2 @@ -665,6 +674,7 @@ STACK_OF(CONF_VALUE) *X509V3_parse_list(const char *line); void *X509V3_EXT_d2i(X509_EXTENSION *ext); void *X509V3_get_d2i(STACK_OF(X509_EXTENSION) *x, int nid, int *crit, int *idx); +int X509V3_EXT_free(int nid, void *ext_data); X509_EXTENSION *X509V3_EXT_i2d(int ext_nid, int crit, void *ext_struc); int X509V3_add1_i2d(STACK_OF(X509_EXTENSION) **x, int nid, void *value, @@ -707,6 +717,34 @@ STACK_OF(OPENSSL_STRING) *X509_get1_email(X509 *x); STACK_OF(OPENSSL_STRING) *X509_REQ_get1_email(X509_REQ *x); void X509_email_free(STACK_OF(OPENSSL_STRING) *sk); STACK_OF(OPENSSL_STRING) *X509_get1_ocsp(X509 *x); +/* Flags for X509_check_* functions */ + +/* + * Always check subject name for host match even if subject alt names present + */ +# define X509_CHECK_FLAG_ALWAYS_CHECK_SUBJECT 0x1 +/* Disable wildcard matching for dnsName fields and common name. */ +# define X509_CHECK_FLAG_NO_WILDCARDS 0x2 +/* Wildcards must not match a partial label. */ +# define X509_CHECK_FLAG_NO_PARTIAL_WILDCARDS 0x4 +/* Allow (non-partial) wildcards to match multiple labels. */ +# define X509_CHECK_FLAG_MULTI_LABEL_WILDCARDS 0x8 +/* Constraint verifier subdomain patterns to match a single labels. */ +# define X509_CHECK_FLAG_SINGLE_LABEL_SUBDOMAINS 0x10 +/* + * Match reference identifiers starting with "." to any sub-domain. + * This is a non-public flag, turned on implicitly when the subject + * reference identity is a DNS name. + */ +# define _X509_CHECK_FLAG_DOT_SUBDOMAINS 0x8000 + +int X509_check_host(X509 *x, const char *chk, size_t chklen, + unsigned int flags, char **peername); +int X509_check_email(X509 *x, const char *chk, size_t chklen, + unsigned int flags); +int X509_check_ip(X509 *x, const unsigned char *chk, size_t chklen, + unsigned int flags); +int X509_check_ip_asc(X509 *x, const char *ipasc, unsigned int flags); ASN1_OCTET_STRING *a2i_IPADDRESS(const char *ipasc); ASN1_OCTET_STRING *a2i_IPADDRESS_NC(const char *ipasc); @@ -930,6 +968,7 @@ void ERR_load_X509V3_strings(void); # define X509V3_F_X509V3_EXT_ADD 104 # define X509V3_F_X509V3_EXT_ADD_ALIAS 106 # define X509V3_F_X509V3_EXT_CONF 107 +# define X509V3_F_X509V3_EXT_FREE 165 # define X509V3_F_X509V3_EXT_I2D 136 # define X509V3_F_X509V3_EXT_NCONF 152 # define X509V3_F_X509V3_GET_SECTION 142 @@ -944,6 +983,7 @@ void ERR_load_X509V3_strings(void); # define X509V3_R_BAD_OBJECT 119 # define X509V3_R_BN_DEC2BN_ERROR 100 # define X509V3_R_BN_TO_ASN1_INTEGER_ERROR 101 +# define X509V3_R_CANNOT_FIND_FREE_FUNCTION 168 # define X509V3_R_DIRNAME_ERROR 149 # define X509V3_R_DISTPOINT_ALREADY_SET 160 # define X509V3_R_DUPLICATE_ZONE_ID 133 @@ -959,13 +999,13 @@ void ERR_load_X509V3_strings(void); # define X509V3_R_ILLEGAL_EMPTY_EXTENSION 151 # define X509V3_R_ILLEGAL_HEX_DIGIT 113 # define X509V3_R_INCORRECT_POLICY_SYNTAX_TAG 152 -# define X509V3_R_INVALID_MULTIPLE_RDNS 161 # define X509V3_R_INVALID_ASNUMBER 162 # define X509V3_R_INVALID_ASRANGE 163 # define X509V3_R_INVALID_BOOLEAN_STRING 104 # define X509V3_R_INVALID_EXTENSION_STRING 105 # define X509V3_R_INVALID_INHERITANCE 165 # define X509V3_R_INVALID_IPADDRESS 166 +# define X509V3_R_INVALID_MULTIPLE_RDNS 161 # define X509V3_R_INVALID_NAME 106 # define X509V3_R_INVALID_NULL_ARGUMENT 107 # define X509V3_R_INVALID_NULL_NAME 108 diff --git a/deps/openssl/openssl/crypto/x86_64cpuid.pl b/deps/openssl/openssl/crypto/x86_64cpuid.pl index 6ebfd017ea563e..d208d02392e9d2 100644 --- a/deps/openssl/openssl/crypto/x86_64cpuid.pl +++ b/deps/openssl/openssl/crypto/x86_64cpuid.pl @@ -24,7 +24,7 @@ call OPENSSL_cpuid_setup .hidden OPENSSL_ia32cap_P -.comm OPENSSL_ia32cap_P,8,4 +.comm OPENSSL_ia32cap_P,16,4 .text @@ -53,12 +53,13 @@ .size OPENSSL_rdtsc,.-OPENSSL_rdtsc .globl OPENSSL_ia32_cpuid -.type OPENSSL_ia32_cpuid,\@abi-omnipotent +.type OPENSSL_ia32_cpuid,\@function,1 .align 16 OPENSSL_ia32_cpuid: mov %rbx,%r8 # save %rbx xor %eax,%eax + mov %eax,8(%rdi) # clear 3rd word cpuid mov %eax,%r11d # max value for standard query level @@ -126,6 +127,14 @@ shr \$14,%r10d and \$0xfff,%r10d # number of cores -1 per L1D + cmp \$7,%r11d + jb .Lnocacheinfo + + mov \$7,%eax + xor %ecx,%ecx + cpuid + mov %ebx,8(%rdi) + .Lnocacheinfo: mov \$1,%eax cpuid @@ -165,6 +174,7 @@ .Lclear_avx: mov \$0xefffe7ff,%eax # ~(1<<28|1<<12|1<<11) and %eax,%r9d # clear AVX, FMA and AMD XOP bits + andl \$0xffffffdf,8(%rdi) # cleax AVX2, ~(1<<5) .Ldone: shl \$32,%r9 mov %r10d,%eax @@ -279,6 +289,21 @@ cmove %rcx,%rax ret .size OPENSSL_ia32_rdrand,.-OPENSSL_ia32_rdrand + +.globl OPENSSL_ia32_rdseed +.type OPENSSL_ia32_rdseed,\@abi-omnipotent +.align 16 +OPENSSL_ia32_rdseed: + mov \$8,%ecx +.Loop_rdseed: + rdseed %rax + jc .Lbreak_rdseed + loop .Loop_rdseed +.Lbreak_rdseed: + cmp \$0,%rax + cmove %rcx,%rax + ret +.size OPENSSL_ia32_rdseed,.-OPENSSL_ia32_rdseed ___ close STDOUT; # flush diff --git a/deps/openssl/openssl/crypto/x86cpuid.pl b/deps/openssl/openssl/crypto/x86cpuid.pl index b270b44337d140..e95f6274f5e063 100644 --- a/deps/openssl/openssl/crypto/x86cpuid.pl +++ b/deps/openssl/openssl/crypto/x86cpuid.pl @@ -22,6 +22,8 @@ &xor ("eax","eax"); &bt ("ecx",21); &jnc (&label("nocpuid")); + &mov ("esi",&wparam(0)); + &mov (&DWP(8,"esi"),"eax"); # clear 3rd word &cpuid (); &mov ("edi","eax"); # max value for standard query level @@ -79,6 +81,16 @@ &jmp (&label("generic")); &set_label("intel"); + &cmp ("edi",7); + &jb (&label("cacheinfo")); + + &mov ("esi",&wparam(0)); + &mov ("eax",7); + &xor ("ecx","ecx"); + &cpuid (); + &mov (&DWP(8,"esi"),"ebx"); + +&set_label("cacheinfo"); &cmp ("edi",4); &mov ("edi",-1); &jb (&label("nocacheinfo")); @@ -135,6 +147,8 @@ &and ("esi",0xfeffffff); # clear FXSR &set_label("clear_avx"); &and ("ebp",0xefffe7ff); # clear AVX, FMA and AMD XOP bits + &mov ("edi",&wparam(0)); + &and (&DWP(8,"edi"),0xffffffdf); # clear AVX2 &set_label("done"); &mov ("eax","esi"); &mov ("edx","ebp"); @@ -198,7 +212,7 @@ &function_begin_B("OPENSSL_far_spin"); &pushf (); - &pop ("eax") + &pop ("eax"); &bt ("eax",9); &jnc (&label("nospin")); # interrupts are disabled @@ -353,6 +367,21 @@ &ret (); &function_end_B("OPENSSL_ia32_rdrand"); +&function_begin_B("OPENSSL_ia32_rdseed"); + &mov ("ecx",8); +&set_label("loop"); + &rdseed ("eax"); + &jc (&label("break")); + &loop (&label("loop")); +&set_label("break"); + &cmp ("eax",0); + &cmove ("eax","ecx"); + &ret (); +&function_end_B("OPENSSL_ia32_rdseed"); + &initseg("OPENSSL_cpuid_setup"); +&hidden("OPENSSL_cpuid_setup"); +&hidden("OPENSSL_ia32cap_P"); + &asm_finish(); diff --git a/deps/openssl/openssl/demos/bio/Makefile b/deps/openssl/openssl/demos/bio/Makefile index 43515405323685..f8c8f03517c4af 100644 --- a/deps/openssl/openssl/demos/bio/Makefile +++ b/deps/openssl/openssl/demos/bio/Makefile @@ -1,7 +1,7 @@ CC=cc CFLAGS= -g -I../../include -LIBS= -L../.. ../../libssl.a ../../libcrypto.a -EXAMPLES=saccept sconnect +LIBS= -L../.. ../../libssl.a ../../libcrypto.a -ldl +EXAMPLES=saccept sconnect client-arg client-conf all: $(EXAMPLES) @@ -11,6 +11,12 @@ saccept: saccept.o sconnect: sconnect.o $(CC) -o sconnect sconnect.o $(LIBS) +client-arg: client-arg.o + $(CC) -o client-arg client-arg.o $(LIBS) + +client-conf: client-conf.o + $(CC) -o client-conf client-conf.o $(LIBS) + clean: rm -f $(EXAMPLES) *.o diff --git a/deps/openssl/openssl/demos/bio/README b/deps/openssl/openssl/demos/bio/README index 0b24e5b80cc7a8..a36bb48a5df910 100644 --- a/deps/openssl/openssl/demos/bio/README +++ b/deps/openssl/openssl/demos/bio/README @@ -1,3 +1,7 @@ This directory contains some simple examples of the use of BIO's to simplify socket programming. +The client-conf, server-conf, client-arg and client-conf include examples +of how to use the SSL_CONF API for configuration file or command line +processing. + diff --git a/deps/openssl/openssl/demos/bio/accept.cnf b/deps/openssl/openssl/demos/bio/accept.cnf new file mode 100644 index 00000000000000..e4acea75f344bd --- /dev/null +++ b/deps/openssl/openssl/demos/bio/accept.cnf @@ -0,0 +1,13 @@ +# Example configuration file +# Port to listen on +Port = 4433 +# Disable TLS v1.2 for test. +# Protocol = ALL, -TLSv1.2 +# Only support 3 curves +Curves = P-521:P-384:P-256 +# Automatic curve selection +ECDHParameters = Automatic +# Restricted signature algorithms +SignatureAlgorithms = RSA+SHA512:ECDSA+SHA512 +Certificate=server.pem +PrivateKey=server.pem diff --git a/deps/openssl/openssl/demos/bio/client-arg.c b/deps/openssl/openssl/demos/bio/client-arg.c new file mode 100644 index 00000000000000..dc354cae06b50c --- /dev/null +++ b/deps/openssl/openssl/demos/bio/client-arg.c @@ -0,0 +1,111 @@ +#include +#include + +int main(int argc, char **argv) +{ + BIO *sbio = NULL, *out = NULL; + int len; + char tmpbuf[1024]; + SSL_CTX *ctx; + SSL_CONF_CTX *cctx; + SSL *ssl; + char **args = argv + 1; + const char *connect_str = "localhost:4433"; + int nargs = argc - 1; + + ERR_load_crypto_strings(); + ERR_load_SSL_strings(); + SSL_library_init(); + + ctx = SSL_CTX_new(SSLv23_client_method()); + cctx = SSL_CONF_CTX_new(); + SSL_CONF_CTX_set_flags(cctx, SSL_CONF_FLAG_CLIENT); + SSL_CONF_CTX_set_ssl_ctx(cctx, ctx); + while (*args && **args == '-') { + int rv; + /* Parse standard arguments */ + rv = SSL_CONF_cmd_argv(cctx, &nargs, &args); + if (rv == -3) { + fprintf(stderr, "Missing argument for %s\n", *args); + goto end; + } + if (rv < 0) { + fprintf(stderr, "Error in command %s\n", *args); + ERR_print_errors_fp(stderr); + goto end; + } + /* If rv > 0 we processed something so proceed to next arg */ + if (rv > 0) + continue; + /* Otherwise application specific argument processing */ + if (!strcmp(*args, "-connect")) { + connect_str = args[1]; + if (connect_str == NULL) { + fprintf(stderr, "Missing -connect argument\n"); + goto end; + } + args += 2; + nargs -= 2; + continue; + } else { + fprintf(stderr, "Unknown argument %s\n", *args); + goto end; + } + } + + if (!SSL_CONF_CTX_finish(cctx)) { + fprintf(stderr, "Finish error\n"); + ERR_print_errors_fp(stderr); + goto err; + } + + /* + * We'd normally set some stuff like the verify paths and * mode here + * because as things stand this will connect to * any server whose + * certificate is signed by any CA. + */ + + sbio = BIO_new_ssl_connect(ctx); + + BIO_get_ssl(sbio, &ssl); + + if (!ssl) { + fprintf(stderr, "Can't locate SSL pointer\n"); + goto end; + } + + /* Don't want any retries */ + SSL_set_mode(ssl, SSL_MODE_AUTO_RETRY); + + /* We might want to do other things with ssl here */ + + BIO_set_conn_hostname(sbio, connect_str); + + out = BIO_new_fp(stdout, BIO_NOCLOSE); + if (BIO_do_connect(sbio) <= 0) { + fprintf(stderr, "Error connecting to server\n"); + ERR_print_errors_fp(stderr); + goto end; + } + + if (BIO_do_handshake(sbio) <= 0) { + fprintf(stderr, "Error establishing SSL connection\n"); + ERR_print_errors_fp(stderr); + goto end; + } + + /* Could examine ssl here to get connection info */ + + BIO_puts(sbio, "GET / HTTP/1.0\n\n"); + for (;;) { + len = BIO_read(sbio, tmpbuf, 1024); + if (len <= 0) + break; + BIO_write(out, tmpbuf, len); + } + end: + SSL_CONF_CTX_free(cctx); + BIO_free_all(sbio); + BIO_free(out); + return 0; +} diff --git a/deps/openssl/openssl/demos/bio/client-conf.c b/deps/openssl/openssl/demos/bio/client-conf.c new file mode 100644 index 00000000000000..150e7fcf835f4c --- /dev/null +++ b/deps/openssl/openssl/demos/bio/client-conf.c @@ -0,0 +1,120 @@ +#include +#include +#include + +int main(int argc, char **argv) +{ + BIO *sbio = NULL, *out = NULL; + int i, len, rv; + char tmpbuf[1024]; + SSL_CTX *ctx = NULL; + SSL_CONF_CTX *cctx = NULL; + SSL *ssl = NULL; + CONF *conf = NULL; + STACK_OF(CONF_VALUE) *sect = NULL; + CONF_VALUE *cnf; + const char *connect_str = "localhost:4433"; + long errline = -1; + + ERR_load_crypto_strings(); + ERR_load_SSL_strings(); + SSL_library_init(); + + conf = NCONF_new(NULL); + + if (NCONF_load(conf, "connect.cnf", &errline) <= 0) { + if (errline <= 0) + fprintf(stderr, "Error processing config file\n"); + else + fprintf(stderr, "Error on line %ld\n", errline); + goto end; + } + + sect = NCONF_get_section(conf, "default"); + + if (sect == NULL) { + fprintf(stderr, "Error retrieving default section\n"); + goto end; + } + + ctx = SSL_CTX_new(SSLv23_client_method()); + cctx = SSL_CONF_CTX_new(); + SSL_CONF_CTX_set_flags(cctx, SSL_CONF_FLAG_CLIENT); + SSL_CONF_CTX_set_flags(cctx, SSL_CONF_FLAG_FILE); + SSL_CONF_CTX_set_ssl_ctx(cctx, ctx); + for (i = 0; i < sk_CONF_VALUE_num(sect); i++) { + cnf = sk_CONF_VALUE_value(sect, i); + rv = SSL_CONF_cmd(cctx, cnf->name, cnf->value); + if (rv > 0) + continue; + if (rv != -2) { + fprintf(stderr, "Error processing %s = %s\n", + cnf->name, cnf->value); + ERR_print_errors_fp(stderr); + goto end; + } + if (!strcmp(cnf->name, "Connect")) { + connect_str = cnf->value; + } else { + fprintf(stderr, "Unknown configuration option %s\n", cnf->name); + goto end; + } + } + + if (!SSL_CONF_CTX_finish(cctx)) { + fprintf(stderr, "Finish error\n"); + ERR_print_errors_fp(stderr); + goto err; + } + + /* + * We'd normally set some stuff like the verify paths and * mode here + * because as things stand this will connect to * any server whose + * certificate is signed by any CA. + */ + + sbio = BIO_new_ssl_connect(ctx); + + BIO_get_ssl(sbio, &ssl); + + if (!ssl) { + fprintf(stderr, "Can't locate SSL pointer\n"); + goto end; + } + + /* Don't want any retries */ + SSL_set_mode(ssl, SSL_MODE_AUTO_RETRY); + + /* We might want to do other things with ssl here */ + + BIO_set_conn_hostname(sbio, connect_str); + + out = BIO_new_fp(stdout, BIO_NOCLOSE); + if (BIO_do_connect(sbio) <= 0) { + fprintf(stderr, "Error connecting to server\n"); + ERR_print_errors_fp(stderr); + goto end; + } + + if (BIO_do_handshake(sbio) <= 0) { + fprintf(stderr, "Error establishing SSL connection\n"); + ERR_print_errors_fp(stderr); + goto end; + } + + /* Could examine ssl here to get connection info */ + + BIO_puts(sbio, "GET / HTTP/1.0\n\n"); + for (;;) { + len = BIO_read(sbio, tmpbuf, 1024); + if (len <= 0) + break; + BIO_write(out, tmpbuf, len); + } + end: + SSL_CONF_CTX_free(cctx); + BIO_free_all(sbio); + BIO_free(out); + NCONF_free(conf); + return 0; +} diff --git a/deps/openssl/openssl/demos/bio/connect.cnf b/deps/openssl/openssl/demos/bio/connect.cnf new file mode 100644 index 00000000000000..4dee03c373885e --- /dev/null +++ b/deps/openssl/openssl/demos/bio/connect.cnf @@ -0,0 +1,9 @@ +# Example configuration file +# Connects to the default port of s_server +Connect = localhost:4433 +# Disable TLS v1.2 for test. +# Protocol = ALL, -TLSv1.2 +# Only support 3 curves +Curves = P-521:P-384:P-256 +# Restricted signature algorithms +SignatureAlgorithms = RSA+SHA512:ECDSA+SHA512 diff --git a/deps/openssl/openssl/demos/bio/saccept.c b/deps/openssl/openssl/demos/bio/saccept.c index 8d02610cbf08b6..e79c8723290537 100644 --- a/deps/openssl/openssl/demos/bio/saccept.c +++ b/deps/openssl/openssl/demos/bio/saccept.c @@ -2,13 +2,13 @@ /* demos/bio/saccept.c */ /*- - * A minimal program to server an SSL connection. + * A minimal program to serve an SSL connection. * It uses blocking. * saccept host:port * host is the interface IP to use. If any interface, use *:port * The default it *:4433 * - * cc -I../../include saccept.c -L../.. -lssl -lcrypto + * cc -I../../include saccept.c -L../.. -lssl -lcrypto -ldl */ #include @@ -70,8 +70,8 @@ char *argv[]; goto err; /* - * This means that when a new connection is acceptede on 'in', The - * ssl_bio will be 'dupilcated' and have the new socket BIO push into it. + * This means that when a new connection is accepted on 'in', The ssl_bio + * will be 'duplicated' and have the new socket BIO push into it. * Basically it means the SSL BIO will be automatically setup */ BIO_set_accept_bios(in, ssl_bio); diff --git a/deps/openssl/openssl/demos/bio/server-arg.c b/deps/openssl/openssl/demos/bio/server-arg.c new file mode 100644 index 00000000000000..1d0e1db2343f9e --- /dev/null +++ b/deps/openssl/openssl/demos/bio/server-arg.c @@ -0,0 +1,144 @@ +/* NOCW */ +/* demos/bio/server-arg.c */ + +/* + * A minimal program to serve an SSL connection. It uses blocking. It use the + * SSL_CONF API with the command line. cc -I../../include server-arg.c + * -L../.. -lssl -lcrypto -ldl + */ + +#include +#include +#include +#include + +int main(int argc, char *argv[]) +{ + char *port = "*:4433"; + BIO *ssl_bio, *tmp; + SSL_CTX *ctx; + SSL_CONF_CTX *cctx; + char buf[512]; + BIO *in = NULL; + int ret = 1, i; + char **args = argv + 1; + int nargs = argc - 1; + + SSL_load_error_strings(); + + /* Add ciphers and message digests */ + OpenSSL_add_ssl_algorithms(); + + ctx = SSL_CTX_new(SSLv23_server_method()); + + cctx = SSL_CONF_CTX_new(); + SSL_CONF_CTX_set_flags(cctx, SSL_CONF_FLAG_SERVER); + SSL_CONF_CTX_set_flags(cctx, SSL_CONF_FLAG_CERTIFICATE); + SSL_CONF_CTX_set_ssl_ctx(cctx, ctx); + while (*args && **args == '-') { + int rv; + /* Parse standard arguments */ + rv = SSL_CONF_cmd_argv(cctx, &nargs, &args); + if (rv == -3) { + fprintf(stderr, "Missing argument for %s\n", *args); + goto err; + } + if (rv < 0) { + fprintf(stderr, "Error in command %s\n", *args); + ERR_print_errors_fp(stderr); + goto err; + } + /* If rv > 0 we processed something so proceed to next arg */ + if (rv > 0) + continue; + /* Otherwise application specific argument processing */ + if (!strcmp(*args, "-port")) { + port = args[1]; + if (port == NULL) { + fprintf(stderr, "Missing -port argument\n"); + goto err; + } + args += 2; + nargs -= 2; + continue; + } else { + fprintf(stderr, "Unknown argument %s\n", *args); + goto err; + } + } + + if (!SSL_CONF_CTX_finish(cctx)) { + fprintf(stderr, "Finish error\n"); + ERR_print_errors_fp(stderr); + goto err; + } +#if 0 + /* + * Demo of how to iterate over all certificates in an SSL_CTX structure. + */ + { + X509 *x; + int rv; + rv = SSL_CTX_set_current_cert(ctx, SSL_CERT_SET_FIRST); + while (rv) { + X509 *x = SSL_CTX_get0_certificate(ctx); + X509_NAME_print_ex_fp(stdout, X509_get_subject_name(x), 0, + XN_FLAG_ONELINE); + printf("\n"); + rv = SSL_CTX_set_current_cert(ctx, SSL_CERT_SET_NEXT); + } + fflush(stdout); + } +#endif + /* Setup server side SSL bio */ + ssl_bio = BIO_new_ssl(ctx, 0); + + if ((in = BIO_new_accept(port)) == NULL) + goto err; + + /* + * This means that when a new connection is accepted on 'in', The ssl_bio + * will be 'duplicated' and have the new socket BIO push into it. + * Basically it means the SSL BIO will be automatically setup + */ + BIO_set_accept_bios(in, ssl_bio); + + again: + /* + * The first call will setup the accept socket, and the second will get a + * socket. In this loop, the first actual accept will occur in the + * BIO_read() function. + */ + + if (BIO_do_accept(in) <= 0) + goto err; + + for (;;) { + i = BIO_read(in, buf, 512); + if (i == 0) { + /* + * If we have finished, remove the underlying BIO stack so the + * next time we call any function for this BIO, it will attempt + * to do an accept + */ + printf("Done\n"); + tmp = BIO_pop(in); + BIO_free_all(tmp); + goto again; + } + if (i < 0) + goto err; + fwrite(buf, 1, i, stdout); + fflush(stdout); + } + + ret = 0; + err: + if (ret) { + ERR_print_errors_fp(stderr); + } + if (in != NULL) + BIO_free(in); + exit(ret); + return (!ret); +} diff --git a/deps/openssl/openssl/demos/bio/server-conf.c b/deps/openssl/openssl/demos/bio/server-conf.c new file mode 100644 index 00000000000000..a09bc9320d1be0 --- /dev/null +++ b/deps/openssl/openssl/demos/bio/server-conf.c @@ -0,0 +1,138 @@ +/* NOCW */ +/* demos/bio/saccept-conf.c */ + +/* + * A minimal program to serve an SSL connection. It uses blocking. It uses + * the SSL_CONF API with a configuration file. cc -I../../include saccept.c + * -L../.. -lssl -lcrypto -ldl + */ + +#include +#include +#include +#include +#include + +int main(int argc, char *argv[]) +{ + char *port = "*:4433"; + BIO *in = NULL; + BIO *ssl_bio, *tmp; + SSL_CTX *ctx; + SSL_CONF_CTX *cctx = NULL; + CONF *conf = NULL; + STACK_OF(CONF_VALUE) *sect = NULL; + CONF_VALUE *cnf; + long errline = -1; + char buf[512]; + int ret = 1, i; + + SSL_load_error_strings(); + + /* Add ciphers and message digests */ + OpenSSL_add_ssl_algorithms(); + + conf = NCONF_new(NULL); + + if (NCONF_load(conf, "accept.cnf", &errline) <= 0) { + if (errline <= 0) + fprintf(stderr, "Error processing config file\n"); + else + fprintf(stderr, "Error on line %ld\n", errline); + goto err; + } + + sect = NCONF_get_section(conf, "default"); + + if (sect == NULL) { + fprintf(stderr, "Error retrieving default section\n"); + goto err; + } + + ctx = SSL_CTX_new(SSLv23_server_method()); + cctx = SSL_CONF_CTX_new(); + SSL_CONF_CTX_set_flags(cctx, SSL_CONF_FLAG_SERVER); + SSL_CONF_CTX_set_flags(cctx, SSL_CONF_FLAG_CERTIFICATE); + SSL_CONF_CTX_set_flags(cctx, SSL_CONF_FLAG_FILE); + SSL_CONF_CTX_set_ssl_ctx(cctx, ctx); + for (i = 0; i < sk_CONF_VALUE_num(sect); i++) { + int rv; + cnf = sk_CONF_VALUE_value(sect, i); + rv = SSL_CONF_cmd(cctx, cnf->name, cnf->value); + if (rv > 0) + continue; + if (rv != -2) { + fprintf(stderr, "Error processing %s = %s\n", + cnf->name, cnf->value); + ERR_print_errors_fp(stderr); + goto err; + } + if (!strcmp(cnf->name, "Port")) { + port = cnf->value; + } else { + fprintf(stderr, "Unknown configuration option %s\n", cnf->name); + goto err; + } + } + + if (!SSL_CONF_CTX_finish(cctx)) { + fprintf(stderr, "Finish error\n"); + ERR_print_errors_fp(stderr); + goto err; + } + + /* Setup server side SSL bio */ + ssl_bio = BIO_new_ssl(ctx, 0); + + if ((in = BIO_new_accept(port)) == NULL) + goto err; + + /* + * This means that when a new connection is accepted on 'in', The ssl_bio + * will be 'duplicated' and have the new socket BIO push into it. + * Basically it means the SSL BIO will be automatically setup + */ + BIO_set_accept_bios(in, ssl_bio); + + again: + /* + * The first call will setup the accept socket, and the second will get a + * socket. In this loop, the first actual accept will occur in the + * BIO_read() function. + */ + + if (BIO_do_accept(in) <= 0) + goto err; + + for (;;) { + i = BIO_read(in, buf, 512); + if (i == 0) { + /* + * If we have finished, remove the underlying BIO stack so the + * next time we call any function for this BIO, it will attempt + * to do an accept + */ + printf("Done\n"); + tmp = BIO_pop(in); + BIO_free_all(tmp); + goto again; + } + if (i < 0) { + if (BIO_should_retry(in)) + continue; + goto err; + } + fwrite(buf, 1, i, stdout); + fflush(stdout); + } + + ret = 0; + err: + if (ret) { + ERR_print_errors_fp(stderr); + } + if (in != NULL) + BIO_free(in); + exit(ret); + return (!ret); +} diff --git a/deps/openssl/openssl/demos/bio/server.pem b/deps/openssl/openssl/demos/bio/server.pem index 5cf1387d65d755..d0fc265f045528 100644 --- a/deps/openssl/openssl/demos/bio/server.pem +++ b/deps/openssl/openssl/demos/bio/server.pem @@ -1,30 +1,52 @@ -subject=/C=AU/SP=QLD/O=Mincom Pty. Ltd./OU=CS/CN=SSLeay demo server -issuer= /C=AU/SP=QLD/O=Mincom Pty. Ltd./OU=CS/CN=CA ------BEGIN X509 CERTIFICATE----- - -MIIBgjCCASwCAQQwDQYJKoZIhvcNAQEEBQAwODELMAkGA1UEBhMCQVUxDDAKBgNV -BAgTA1FMRDEbMBkGA1UEAxMSU1NMZWF5L3JzYSB0ZXN0IENBMB4XDTk1MTAwOTIz -MzIwNVoXDTk4MDcwNTIzMzIwNVowYDELMAkGA1UEBhMCQVUxDDAKBgNVBAgTA1FM -RDEZMBcGA1UEChMQTWluY29tIFB0eS4gTHRkLjELMAkGA1UECxMCQ1MxGzAZBgNV -BAMTElNTTGVheSBkZW1vIHNlcnZlcjBcMA0GCSqGSIb3DQEBAQUAA0sAMEgCQQC3 -LCXcScWua0PFLkHBLm2VejqpA1F4RQ8q0VjRiPafjx/Z/aWH3ipdMVvuJGa/wFXb -/nDFLDlfWp+oCPwhBtVPAgMBAAEwDQYJKoZIhvcNAQEEBQADQQArNFsihWIjBzb0 -DCsU0BvL2bvSwJrPEqFlkDq3F4M6EGutL9axEcANWgbbEdAvNJD1dmEmoWny27Pn -IMs6ZOZB ------END X509 CERTIFICATE----- +subject= C = UK, O = OpenSSL Group, OU = FOR TESTING PURPOSES ONLY, CN = Test Server Cert +issuer= C = UK, O = OpenSSL Group, OU = FOR TESTING PURPOSES ONLY, CN = OpenSSL Test Intermediate CA +-----BEGIN CERTIFICATE----- +MIID5zCCAs+gAwIBAgIJALnu1NlVpZ6zMA0GCSqGSIb3DQEBBQUAMHAxCzAJBgNV +BAYTAlVLMRYwFAYDVQQKDA1PcGVuU1NMIEdyb3VwMSIwIAYDVQQLDBlGT1IgVEVT +VElORyBQVVJQT1NFUyBPTkxZMSUwIwYDVQQDDBxPcGVuU1NMIFRlc3QgSW50ZXJt +ZWRpYXRlIENBMB4XDTExMTIwODE0MDE0OFoXDTIxMTAxNjE0MDE0OFowZDELMAkG +A1UEBhMCVUsxFjAUBgNVBAoMDU9wZW5TU0wgR3JvdXAxIjAgBgNVBAsMGUZPUiBU +RVNUSU5HIFBVUlBPU0VTIE9OTFkxGTAXBgNVBAMMEFRlc3QgU2VydmVyIENlcnQw +ggEiMA0GCSqGSIb3DQEBAQUAA4IBDwAwggEKAoIBAQDzhPOSNtyyRspmeuUpxfNJ +KCLTuf7g3uQ4zu4iHOmRO5TQci+HhVlLZrHF9XqFXcIP0y4pWDbMSGuiorUmzmfi +R7bfSdI/+qIQt8KXRH6HNG1t8ou0VSvWId5TS5Dq/er5ODUr9OaaDva7EquHIcMv +vPQGuI+OEAcnleVCy9HVEIySrO4P3CNIicnGkwwiAud05yUAq/gPXBC1hTtmlPD7 +TVcGVSEiJdvzqqlgv02qedGrkki6GY4S7GjZxrrf7Foc2EP+51LJzwLQx3/JfrCU +41NEWAsu/Sl0tQabXESN+zJ1pDqoZ3uHMgpQjeGiE0olr+YcsSW/tJmiU9OiAr8R +AgMBAAGjgY8wgYwwDAYDVR0TAQH/BAIwADAOBgNVHQ8BAf8EBAMCBeAwLAYJYIZI +AYb4QgENBB8WHU9wZW5TU0wgR2VuZXJhdGVkIENlcnRpZmljYXRlMB0GA1UdDgQW +BBSCvM8AABPR9zklmifnr9LvIBturDAfBgNVHSMEGDAWgBQ2w2yI55X+sL3szj49 +hqshgYfa2jANBgkqhkiG9w0BAQUFAAOCAQEAqb1NV0B0/pbpK9Z4/bNjzPQLTRLK +WnSNm/Jh5v0GEUOE/Beg7GNjNrmeNmqxAlpqWz9qoeoFZax+QBpIZYjROU3TS3fp +yLsrnlr0CDQ5R7kCCDGa8dkXxemmpZZLbUCpW2Uoy8sAA4JjN9OtsZY7dvUXFgJ7 +vVNTRnI01ghknbtD+2SxSQd3CWF6QhcRMAzZJ1z1cbbwGDDzfvGFPzJ+Sq+zEPds +xoVLLSetCiBc+40ZcDS5dV98h9XD7JMTQfxzA7mNGv73JoZJA6nFgj+ADSlJsY/t +JBv+z1iQRueoh9Qeee+ZbRifPouCB8FDx+AltvHTANdAq0t/K3o+pplMVA== +-----END CERTIFICATE----- -----BEGIN RSA PRIVATE KEY----- - -MIIBPAIBAAJBALcsJdxJxa5rQ8UuQcEubZV6OqkDUXhFDyrRWNGI9p+PH9n9pYfe -Kl0xW+4kZr/AVdv+cMUsOV9an6gI/CEG1U8CAwEAAQJAXJMBZ34ZXHd1vtgL/3hZ -hexKbVTx/djZO4imXO/dxPGRzG2ylYZpHmG32/T1kaHpZlCHoEPgHoSzmxYXfxjG -sQIhAPmZ/bQOjmRUHM/VM2X5zrjjM6z18R1P6l3ObFwt9FGdAiEAu943Yh9SqMRw -tL0xHGxKmM/YJueUw1gB6sLkETN71NsCIQCeT3RhoqXfrpXDoEcEU+gwzjI1bpxq -agiNTOLfqGoA5QIhAIQFYjgzONxex7FLrsKBm16N2SFl5pXsN9SpRqqL2n63AiEA -g9VNIQ3xwpw7og3IbONifeku+J9qGMGQJMKwSTwrFtI= +MIIEpAIBAAKCAQEA84TzkjbcskbKZnrlKcXzSSgi07n+4N7kOM7uIhzpkTuU0HIv +h4VZS2axxfV6hV3CD9MuKVg2zEhroqK1Js5n4ke230nSP/qiELfCl0R+hzRtbfKL +tFUr1iHeU0uQ6v3q+Tg1K/Tmmg72uxKrhyHDL7z0BriPjhAHJ5XlQsvR1RCMkqzu +D9wjSInJxpMMIgLndOclAKv4D1wQtYU7ZpTw+01XBlUhIiXb86qpYL9NqnnRq5JI +uhmOEuxo2ca63+xaHNhD/udSyc8C0Md/yX6wlONTRFgLLv0pdLUGm1xEjfsydaQ6 +qGd7hzIKUI3hohNKJa/mHLElv7SZolPTogK/EQIDAQABAoIBAADq9FwNtuE5IRQn +zGtO4q7Y5uCzZ8GDNYr9RKp+P2cbuWDbvVAecYq2NV9QoIiWJOAYZKklOvekIju3 +r0UZLA0PRiIrTg6NrESx3JrjWDK8QNlUO7CPTZ39/K+FrmMkV9lem9yxjJjyC34D +AQB+YRTx+l14HppjdxNwHjAVQpIx/uO2F5xAMuk32+3K+pq9CZUtrofe1q4Agj9R +5s8mSy9pbRo9kW9wl5xdEotz1LivFOEiqPUJTUq5J5PeMKao3vdK726XI4Z455Nm +W2/MA0YV0ug2FYinHcZdvKM6dimH8GLfa3X8xKRfzjGjTiMSwsdjgMa4awY3tEHH +674jhAECgYEA/zqMrc0zsbNk83sjgaYIug5kzEpN4ic020rSZsmQxSCerJTgNhmg +utKSCt0Re09Jt3LqG48msahX8ycqDsHNvlEGPQSbMu9IYeO3Wr3fAm75GEtFWePY +BhM73I7gkRt4s8bUiUepMG/wY45c5tRF23xi8foReHFFe9MDzh8fJFECgYEA9EFX +4qAik1pOJGNei9BMwmx0I0gfVEIgu0tzeVqT45vcxbxr7RkTEaDoAG6PlbWP6D9a +WQNLp4gsgRM90ZXOJ4up5DsAWDluvaF4/omabMA+MJJ5kGZ0gCj5rbZbKqUws7x8 +bp+6iBfUPJUbcqNqFmi/08Yt7vrDnMnyMw2A/sECgYEAiiuRMxnuzVm34hQcsbhH +6ymVqf7j0PW2qK0F4H1ocT9qhzWFd+RB3kHWrCjnqODQoI6GbGr/4JepHUpre1ex +4UEN5oSS3G0ru0rC3U4C59dZ5KwDHFm7ffZ1pr52ljfQDUsrjjIMRtuiwNK2OoRa +WSsqiaL+SDzSB+nBmpnAizECgYBdt/y6rerWUx4MhDwwtTnel7JwHyo2MDFS6/5g +n8qC2Lj6/fMDRE22w+CA2esp7EJNQJGv+b27iFpbJEDh+/Lf5YzIT4MwVskQ5bYB +JFcmRxUVmf4e09D7o705U/DjCgMH09iCsbLmqQ38ONIRSHZaJtMDtNTHD1yi+jF+ +OT43gQKBgQC/2OHZoko6iRlNOAQ/tMVFNq7fL81GivoQ9F1U0Qr+DH3ZfaH8eIkX +xT0ToMPJUzWAn8pZv0snA0um6SIgvkCuxO84OkANCVbttzXImIsL7pFzfcwV/ERK +UM6j0ZuSMFOCr/lGPAoOQU0fskidGEHi1/kW+suSr28TqsyYZpwBDQ== -----END RSA PRIVATE KEY----- - ------BEGIN DH PARAMETERS----- -MEYCQQDaWDwW2YUiidDkr3VvTMqS3UvlM7gE+w/tlO+cikQD7VdGUNNpmdsp13Yn -a6LT1BLiGPTdHghM9tgAPnxHdOgzAgEC ------END DH PARAMETERS----- - diff --git a/deps/openssl/openssl/doc/apps/c_rehash.pod b/deps/openssl/openssl/doc/apps/c_rehash.pod index c564e86315529a..ccce29e47b7e45 100644 --- a/deps/openssl/openssl/doc/apps/c_rehash.pod +++ b/deps/openssl/openssl/doc/apps/c_rehash.pod @@ -10,13 +10,19 @@ c_rehash - Create symbolic links to files named by the hash values =head1 SYNOPSIS B +B<[-old]> +B<[-h]> +B<[-n]> +B<[-v]> [ I...] =head1 DESCRIPTION -B scans directories and calculates a hash value of each C<.pem> +B scans directories and calculates a hash value of each +C<.pem>, C<.crt>, C<.cer>, or C<.crl> file in the specified directory list and creates symbolic links for each file, where the name of the link is the hash value. +(If the platform does not support symbolic links, a copy is made.) This utility is useful as many programs that use OpenSSL require directories to be set up like this in order to find certificates. @@ -34,6 +40,7 @@ is a hexadecimal character and B is a single decimal digit. When processing a directory, B will first remove all links that have a name in that syntax. If you have links in that format used for other purposes, they will be removed. +To skip the removal step, use the B<-n> flag. Hashes for CRL's look similar except the letter B appears after the period, like this: C. @@ -42,7 +49,7 @@ incrementing the B value. Duplicates are found by comparing the full SHA-1 fingerprint. A warning will be displayed if a duplicate is found. -A warning will also be displayed if there are B<.pem> files that +A warning will also be displayed if there are files that cannot be parsed as either a certificate or a CRL. The program uses the B program to compute the hashes and @@ -51,13 +58,39 @@ B environment variable to the full pathname. Any program can be used, it will be invoked as follows for either a certificate or CRL: - $OPENSSL x509 -hash -fingerprint -noout -in FFFFFF - $OPENSSL crl -hash -fingerprint -noout -in FFFFFF + $OPENSSL x509 -hash -fingerprint -noout -in FILENAME + $OPENSSL crl -hash -fingerprint -noout -in FILENAME -where B is the filename. It must output the hash of the +where B is the filename. It must output the hash of the file on the first line, and the fingerprint on the second, optionally prefixed with some text and an equals sign. +=head1 OPTIONS + +=over 4 + +=item B<-old> + +Use old-style hashing (MD5, as opposed to SHA-1) for generating +links for releases before 1.0.0. Note that current versions will +not use the old style. + +=item B<-h> + +Display a brief usage message. + +=item B<-n> + +Do not remove existing links. +This is needed when keeping new and old-style links in the same directory. + +=item B<-v> + +Print messages about old links removed and new links created. +By default, B only lists each directory as it is processed. + +=back + =head1 ENVIRONMENT =over diff --git a/deps/openssl/openssl/doc/apps/ciphers.pod b/deps/openssl/openssl/doc/apps/ciphers.pod index 0aa1bad111c5d0..e9280bc5021c87 100644 --- a/deps/openssl/openssl/doc/apps/ciphers.pod +++ b/deps/openssl/openssl/doc/apps/ciphers.pod @@ -175,14 +175,14 @@ cipher suites using RSA key exchange. =item B, B, B cipher suites using DH key agreement and DH certificates signed by CAs with RSA -and DSS keys or either respectively. Not implemented. +and DSS keys or either respectively. -=item B +=item B, B cipher suites using ephemeral DH key agreement, including anonymous cipher suites. -=item B +=item B, B cipher suites using authenticated ephemeral DH key agreement. @@ -200,12 +200,12 @@ cipher suites using DH, including anonymous DH, ephemeral DH and fixed DH. cipher suites using fixed ECDH key agreement signed by CAs with RSA and ECDSA keys or either respectively. -=item B +=item B, B cipher suites using ephemeral ECDH key agreement, including anonymous cipher suites. -=item B +=item B, B cipher suites using authenticated ephemeral ECDH key agreement. @@ -229,7 +229,7 @@ cipher suites using DSS authentication, i.e. the certificates carry DSS keys. =item B cipher suites effectively using DH authentication, i.e. the certificates carry -DH keys. Not implemented. +DH keys. =item B @@ -331,6 +331,18 @@ cipher suites using GOST 28147-89 MAC B HMAC. cipher suites using pre-shared keys (PSK). +=item B, B, B + +enables suite B mode operation using 128 (permitting 192 bit mode by peer) +128 bit (not permitting 192 bit by peer) or 192 bit level of security +respectively. If used these cipherstrings should appear first in the cipher +list and anything after them is ignored. Setting Suite B mode has additional +consequences required to comply with RFC6460. In particular the supported +signature algorithms is reduced to support only ECDSA and SHA256 or SHA384, +only the elliptic curves P-256 and P-384 can be used and only the two suite B +compliant ciphersuites (ECDHE-ECDSA-AES128-GCM-SHA256 and +ECDHE-ECDSA-AES256-GCM-SHA384) are permissible. + =back =head1 CIPHER SUITE NAMES @@ -353,12 +365,12 @@ e.g. DES-CBC3-SHA. In these cases, RSA authentication is used. SSL_RSA_WITH_DES_CBC_SHA DES-CBC-SHA SSL_RSA_WITH_3DES_EDE_CBC_SHA DES-CBC3-SHA - SSL_DH_DSS_EXPORT_WITH_DES40_CBC_SHA Not implemented. - SSL_DH_DSS_WITH_DES_CBC_SHA Not implemented. - SSL_DH_DSS_WITH_3DES_EDE_CBC_SHA Not implemented. - SSL_DH_RSA_EXPORT_WITH_DES40_CBC_SHA Not implemented. - SSL_DH_RSA_WITH_DES_CBC_SHA Not implemented. - SSL_DH_RSA_WITH_3DES_EDE_CBC_SHA Not implemented. + SSL_DH_DSS_EXPORT_WITH_DES40_CBC_SHA EXP-DH-DSS-DES-CBC-SHA + SSL_DH_DSS_WITH_DES_CBC_SHA DH-DSS-DES-CBC-SHA + SSL_DH_DSS_WITH_3DES_EDE_CBC_SHA DH-DSS-DES-CBC3-SHA + SSL_DH_RSA_EXPORT_WITH_DES40_CBC_SHA EXP-DH-RSA-DES-CBC-SHA + SSL_DH_RSA_WITH_DES_CBC_SHA DH-RSA-DES-CBC-SHA + SSL_DH_RSA_WITH_3DES_EDE_CBC_SHA DH-RSA-DES-CBC3-SHA SSL_DHE_DSS_EXPORT_WITH_DES40_CBC_SHA EXP-EDH-DSS-DES-CBC-SHA SSL_DHE_DSS_WITH_DES_CBC_SHA EDH-DSS-CBC-SHA SSL_DHE_DSS_WITH_3DES_EDE_CBC_SHA EDH-DSS-DES-CBC3-SHA @@ -413,10 +425,10 @@ e.g. DES-CBC3-SHA. In these cases, RSA authentication is used. TLS_RSA_WITH_AES_128_CBC_SHA AES128-SHA TLS_RSA_WITH_AES_256_CBC_SHA AES256-SHA - TLS_DH_DSS_WITH_AES_128_CBC_SHA Not implemented. - TLS_DH_DSS_WITH_AES_256_CBC_SHA Not implemented. - TLS_DH_RSA_WITH_AES_128_CBC_SHA Not implemented. - TLS_DH_RSA_WITH_AES_256_CBC_SHA Not implemented. + TLS_DH_DSS_WITH_AES_128_CBC_SHA DH-DSS-AES128-SHA + TLS_DH_DSS_WITH_AES_256_CBC_SHA DH-DSS-AES256-SHA + TLS_DH_RSA_WITH_AES_128_CBC_SHA DH-RSA-AES128-SHA + TLS_DH_RSA_WITH_AES_256_CBC_SHA DH-RSA-AES256-SHA TLS_DHE_DSS_WITH_AES_128_CBC_SHA DHE-DSS-AES128-SHA TLS_DHE_DSS_WITH_AES_256_CBC_SHA DHE-DSS-AES256-SHA @@ -431,10 +443,10 @@ e.g. DES-CBC3-SHA. In these cases, RSA authentication is used. TLS_RSA_WITH_CAMELLIA_128_CBC_SHA CAMELLIA128-SHA TLS_RSA_WITH_CAMELLIA_256_CBC_SHA CAMELLIA256-SHA - TLS_DH_DSS_WITH_CAMELLIA_128_CBC_SHA Not implemented. - TLS_DH_DSS_WITH_CAMELLIA_256_CBC_SHA Not implemented. - TLS_DH_RSA_WITH_CAMELLIA_128_CBC_SHA Not implemented. - TLS_DH_RSA_WITH_CAMELLIA_256_CBC_SHA Not implemented. + TLS_DH_DSS_WITH_CAMELLIA_128_CBC_SHA DH-DSS-CAMELLIA128-SHA + TLS_DH_DSS_WITH_CAMELLIA_256_CBC_SHA DH-DSS-CAMELLIA256-SHA + TLS_DH_RSA_WITH_CAMELLIA_128_CBC_SHA DH-RSA-CAMELLIA128-SHA + TLS_DH_RSA_WITH_CAMELLIA_256_CBC_SHA DH-RSA-CAMELLIA256-SHA TLS_DHE_DSS_WITH_CAMELLIA_128_CBC_SHA DHE-DSS-CAMELLIA128-SHA TLS_DHE_DSS_WITH_CAMELLIA_256_CBC_SHA DHE-DSS-CAMELLIA256-SHA @@ -448,8 +460,8 @@ e.g. DES-CBC3-SHA. In these cases, RSA authentication is used. TLS_RSA_WITH_SEED_CBC_SHA SEED-SHA - TLS_DH_DSS_WITH_SEED_CBC_SHA Not implemented. - TLS_DH_RSA_WITH_SEED_CBC_SHA Not implemented. + TLS_DH_DSS_WITH_SEED_CBC_SHA DH-DSS-SEED-SHA + TLS_DH_RSA_WITH_SEED_CBC_SHA DH-RSA-SEED-SHA TLS_DHE_DSS_WITH_SEED_CBC_SHA DHE-DSS-SEED-SHA TLS_DHE_RSA_WITH_SEED_CBC_SHA DHE-RSA-SEED-SHA @@ -517,15 +529,15 @@ Note: these ciphers can also be used in SSL v3. TLS_RSA_WITH_AES_128_GCM_SHA256 AES128-GCM-SHA256 TLS_RSA_WITH_AES_256_GCM_SHA384 AES256-GCM-SHA384 - TLS_DH_RSA_WITH_AES_128_CBC_SHA256 Not implemented. - TLS_DH_RSA_WITH_AES_256_CBC_SHA256 Not implemented. - TLS_DH_RSA_WITH_AES_128_GCM_SHA256 Not implemented. - TLS_DH_RSA_WITH_AES_256_GCM_SHA384 Not implemented. + TLS_DH_RSA_WITH_AES_128_CBC_SHA256 DH-RSA-AES128-SHA256 + TLS_DH_RSA_WITH_AES_256_CBC_SHA256 DH-RSA-AES256-SHA256 + TLS_DH_RSA_WITH_AES_128_GCM_SHA256 DH-RSA-AES128-GCM-SHA256 + TLS_DH_RSA_WITH_AES_256_GCM_SHA384 DH-RSA-AES256-GCM-SHA384 - TLS_DH_DSS_WITH_AES_128_CBC_SHA256 Not implemented. - TLS_DH_DSS_WITH_AES_256_CBC_SHA256 Not implemented. - TLS_DH_DSS_WITH_AES_128_GCM_SHA256 Not implemented. - TLS_DH_DSS_WITH_AES_256_GCM_SHA384 Not implemented. + TLS_DH_DSS_WITH_AES_128_CBC_SHA256 DH-DSS-AES128-SHA256 + TLS_DH_DSS_WITH_AES_256_CBC_SHA256 DH-DSS-AES256-SHA256 + TLS_DH_DSS_WITH_AES_128_GCM_SHA256 DH-DSS-AES128-GCM-SHA256 + TLS_DH_DSS_WITH_AES_256_GCM_SHA384 DH-DSS-AES256-GCM-SHA384 TLS_DHE_RSA_WITH_AES_128_CBC_SHA256 DHE-RSA-AES128-SHA256 TLS_DHE_RSA_WITH_AES_256_CBC_SHA256 DHE-RSA-AES256-SHA256 @@ -581,9 +593,6 @@ Note: these ciphers can also be used in SSL v3. =head1 NOTES -The non-ephemeral DH modes are currently unimplemented in OpenSSL -because there is no support for DH certificates. - Some compiled versions of OpenSSL may not include all the ciphers listed here because some ciphers were excluded at compile time. diff --git a/deps/openssl/openssl/doc/apps/cms.pod b/deps/openssl/openssl/doc/apps/cms.pod index 75b698834a2877..76dbf2ca36caec 100644 --- a/deps/openssl/openssl/doc/apps/cms.pod +++ b/deps/openssl/openssl/doc/apps/cms.pod @@ -57,6 +57,7 @@ B B [B<-secretkeyid id>] [B<-econtent_type type>] [B<-inkey file>] +[B<-keyopt name:parameter>] [B<-passin arg>] [B<-rand file(s)>] [B] @@ -321,8 +322,13 @@ verification was successful. =item B<-recip file> -the recipients certificate when decrypting a message. This certificate -must match one of the recipients of the message or an error occurs. +when decrypting a message this specifies the recipients certificate. The +certificate must match one of the recipients of the message or an error +occurs. + +When encrypting a message this option may be used multiple times to specify +each recipient. This form B be used if customised parameters are +required (for example to specify RSA-OAEP). =item B<-keyid> @@ -381,6 +387,13 @@ private key must be included in the certificate file specified with the B<-recip> or B<-signer> file. When signing this option can be used multiple times to specify successive keys. +=item B<-keyopt name:opt> + +for signing and encryption this option can be used multiple times to +set customised parameters for the preceding key or certificate. It can +currently be used to set RSA-PSS for signing, RSA-OAEP for encryption +or to modify default parameters for ECDH. + =item B<-passin arg> the private key password source. For more information about the format of B @@ -508,6 +521,10 @@ The B<-compress> option. The B<-secretkey> option when used with B<-encrypt>. +The use of PSS with B<-sign>. + +The use of OAEP or non-RSA keys with B<-encrypt>. + Additionally the B<-EncryptedData_create> and B<-data_create> type cannot be processed by the older B command. @@ -588,6 +605,21 @@ Add a signer to an existing message: openssl cms -resign -in mail.msg -signer newsign.pem -out mail2.msg +Sign mail using RSA-PSS: + + openssl cms -sign -in message.txt -text -out mail.msg \ + -signer mycert.pem -keyopt rsa_padding_mode:pss + +Create encrypted mail using RSA-OAEP: + + openssl cms -encrypt -in plain.txt -out mail.msg \ + -recip cert.pem -keyopt rsa_padding_mode:oaep + +Use SHA256 KDF with an ECDH certificate: + + openssl cms -encrypt -in plain.txt -out mail.msg \ + -recip ecdhcert.pem -keyopt ecdh_kdf_md:sha256 + =head1 BUGS The MIME parser isn't very clever: it seems to handle most messages that I've @@ -613,5 +645,14 @@ No revocation checking is done on the signer's certificate. The use of multiple B<-signer> options and the B<-resign> command were first added in OpenSSL 1.0.0 +The B option was first added in OpenSSL 1.1.0 + +The use of B<-recip> to specify the recipient when encrypting mail was first +added to OpenSSL 1.1.0 + +Support for RSA-OAEP and RSA-PSS was first added to OpenSSL 1.1.0. + +The use of non-RSA keys with B<-encrypt> and B<-decrypt> was first added +to OpenSSL 1.1.0. =cut diff --git a/deps/openssl/openssl/doc/apps/genpkey.pod b/deps/openssl/openssl/doc/apps/genpkey.pod index c74d097fb3d830..929edcd26ff00f 100644 --- a/deps/openssl/openssl/doc/apps/genpkey.pod +++ b/deps/openssl/openssl/doc/apps/genpkey.pod @@ -128,6 +128,15 @@ The number of bits in the prime parameter B

. The value to use for the generator B. +=item B + +If this option is set then the appropriate RFC5114 parameters are used +instead of generating new parameters. The value B can take the +values 1, 2 or 3 corresponding to RFC5114 DH parameters consisting of +1024 bit group with 160 bit subgroup, 2048 bit group with 224 bit subgroup +and 2048 bit group with 256 bit subgroup as mentioned in RFC5114 sections +2.1, 2.2 and 2.3 respectively. + =back =head1 EC PARAMETER GENERATION OPTIONS @@ -206,6 +215,10 @@ Generate 1024 bit DH parameters: openssl genpkey -genparam -algorithm DH -out dhp.pem \ -pkeyopt dh_paramgen_prime_len:1024 +Output RFC5114 2048 bit DH parameters with 224 bit subgroup: + + openssl genpkey -genparam -algorithm DH -out dhp.pem -pkeyopt dh_rfc5114:2 + Generate DH key from parameters: openssl genpkey -paramfile dhp.pem -out dhkey.pem diff --git a/deps/openssl/openssl/doc/apps/pkcs8.pod b/deps/openssl/openssl/doc/apps/pkcs8.pod index 84abee78f3ebbf..6901f1f3f2112e 100644 --- a/deps/openssl/openssl/doc/apps/pkcs8.pod +++ b/deps/openssl/openssl/doc/apps/pkcs8.pod @@ -20,6 +20,7 @@ B B [B<-embed>] [B<-nsdb>] [B<-v2 alg>] +[B<-v2prf alg>] [B<-v1 alg>] [B<-engine id>] @@ -118,6 +119,12 @@ private keys with OpenSSL then this doesn't matter. The B argument is the encryption algorithm to use, valid values include B, B and B. It is recommended that B is used. +=item B<-v2prf alg> + +This option sets the PRF algorithm to use with PKCS#5 v2.0. A typical value +values would be B. If this option isn't set then the default +for the cipher is used or B if there is no default. + =item B<-v1 alg> This option specifies a PKCS#5 v1.5 or PKCS#12 algorithm to use. A complete @@ -195,6 +202,11 @@ DES: openssl pkcs8 -in key.pem -topk8 -v2 des3 -out enckey.pem +Convert a private from traditional to PKCS#5 v2.0 format using AES with +256 bits in CBC mode and B PRF: + + openssl pkcs8 -in key.pem -topk8 -v2 aes-256-cbc -v2prf hmacWithSHA256 -out enckey.pem + Convert a private key to PKCS#8 using a PKCS#5 1.5 compatible algorithm (DES): diff --git a/deps/openssl/openssl/doc/apps/req.pod b/deps/openssl/openssl/doc/apps/req.pod index 0730d117b39cb3..df68cb0921fda3 100644 --- a/deps/openssl/openssl/doc/apps/req.pod +++ b/deps/openssl/openssl/doc/apps/req.pod @@ -235,8 +235,8 @@ this option outputs a self signed certificate instead of a certificate request. This is typically used to generate a test certificate or a self signed root CA. The extensions added to the certificate (if any) are specified in the configuration file. Unless specified -using the B option B<0> will be used for the serial -number. +using the B option, a large random number will be used for +the serial number. =item B<-days n> diff --git a/deps/openssl/openssl/doc/apps/s_client.pod b/deps/openssl/openssl/doc/apps/s_client.pod index b021c730c02ac7..aad59b181bfeaf 100644 --- a/deps/openssl/openssl/doc/apps/s_client.pod +++ b/deps/openssl/openssl/doc/apps/s_client.pod @@ -37,6 +37,9 @@ B B [B<-no_ssl2>] [B<-no_ssl3>] [B<-no_tls1>] +[B<-no_tls1_1>] +[B<-no_tls1_2>] +[B<-fallback_scsv>] [B<-bugs>] [B<-cipher cipherlist>] [B<-serverpref>] @@ -47,6 +50,7 @@ B B [B<-sess_out filename>] [B<-sess_in filename>] [B<-rand file(s)>] +[B<-serverinfo types>] [B<-status>] [B<-nextprotoneg protocols>] @@ -196,16 +200,19 @@ Use the PSK key B when using a PSK cipher suite. The key is given as a hexadecimal number without leading 0x, for example -psk 1a2b3c4d. -=item B<-ssl2>, B<-ssl3>, B<-tls1>, B<-no_ssl2>, B<-no_ssl3>, B<-no_tls1> +=item B<-ssl2>, B<-ssl3>, B<-tls1>, B<-no_ssl2>, B<-no_ssl3>, B<-no_tls1>, B<-no_tls1_1>, B<-no_tls1_2> these options disable the use of certain SSL or TLS protocols. By default the initial handshake uses a method which should be compatible with all servers and permit them to use SSL v3, SSL v2 or TLS as appropriate. -Unfortunately there are a lot of ancient and broken servers in use which +Unfortunately there are still ancient and broken servers in use which cannot handle this technique and will fail to connect. Some servers only -work if TLS is turned off with the B<-no_tls> option others will only -support SSL v2 and may need the B<-ssl2> option. +work if TLS is turned off. + +=item B<-fallback_scsv> + +Send TLS_FALLBACK_SCSV in the ClientHello. =item B<-bugs> @@ -261,6 +268,13 @@ Multiple files can be specified separated by a OS-dependent character. The separator is B<;> for MS-Windows, B<,> for OpenVMS, and B<:> for all others. +=item B<-serverinfo types> + +a list of comma-separated TLS Extension Types (numbers between 0 and +65535). Each type will be sent as an empty ClientHello TLS Extension. +The server's response (if any) will be encoded and displayed as a PEM +file. + =item B<-status> sends a certificate status request to the server (OCSP stapling). The server diff --git a/deps/openssl/openssl/doc/apps/s_server.pod b/deps/openssl/openssl/doc/apps/s_server.pod index 2105b603b7ba33..b37f410fb9ce61 100644 --- a/deps/openssl/openssl/doc/apps/s_server.pod +++ b/deps/openssl/openssl/doc/apps/s_server.pod @@ -45,7 +45,6 @@ B B [B<-no_ssl3>] [B<-no_tls1>] [B<-no_dhe>] -[B<-no_ecdhe>] [B<-bugs>] [B<-hack>] [B<-www>] @@ -56,6 +55,8 @@ B B [B<-no_ticket>] [B<-id_prefix arg>] [B<-rand file(s)>] +[B<-serverinfo file>] +[B<-no_resumption_on_reneg>] [B<-status>] [B<-status_verbose>] [B<-status_timeout nsec>] @@ -138,11 +139,6 @@ a static set of parameters hard coded into the s_server program will be used. if this option is set then no DH parameters will be loaded effectively disabling the ephemeral DH cipher suites. -=item B<-no_ecdhe> - -if this option is set then no ECDH parameters will be loaded effectively -disabling the ephemeral ECDH cipher suites. - =item B<-no_tmp_rsa> certain export cipher suites sometimes use a temporary RSA key, this option @@ -295,6 +291,18 @@ Multiple files can be specified separated by a OS-dependent character. The separator is B<;> for MS-Windows, B<,> for OpenVMS, and B<:> for all others. +=item B<-serverinfo file> + +a file containing one or more blocks of PEM data. Each PEM block +must encode a TLS ServerHello extension (2 bytes type, 2 bytes length, +followed by "length" bytes of extension data). If the client sends +an empty TLS ClientHello extension matching the type, the corresponding +ServerHello extension will be returned. + +=item B<-no_resumption_on_reneg> + +set SSL_OP_NO_SESSION_RESUMPTION_ON_RENEGOTIATION flag. + =item B<-status> enables certificate status request support (aka OCSP stapling). diff --git a/deps/openssl/openssl/doc/apps/verify.pod b/deps/openssl/openssl/doc/apps/verify.pod index 0c8e4926ccff18..df0153435629fd 100644 --- a/deps/openssl/openssl/doc/apps/verify.pod +++ b/deps/openssl/openssl/doc/apps/verify.pod @@ -12,6 +12,9 @@ B B [B<-purpose purpose>] [B<-policy arg>] [B<-ignore_critical>] +[B<-attime timestamp>] +[B<-check_ss_sig>] +[B<-crlfile file>] [B<-crl_check>] [B<-crl_check_all>] [B<-policy_check>] @@ -25,7 +28,6 @@ B B [B<-untrusted file>] [B<-help>] [B<-issuer_checks>] -[B<-attime timestamp>] [B<-verbose>] [B<->] [certificates] @@ -51,6 +53,26 @@ create symbolic links to a directory of certificates. A file of trusted certificates. The file should contain multiple certificates in PEM format concatenated together. +=item B<-attime timestamp> + +Perform validation checks using time specified by B and not +current system time. B is the number of seconds since +01.01.1970 (UNIX time). + +=item B<-check_ss_sig> + +Verify the signature on the self-signed root CA. This is disabled by default +because it doesn't add any security. + +=item B<-crlfile file> + +File containing one or more CRL's (in PEM format) to load. + +=item B<-crl_check> + +Checks end entity certificate validity by attempting to look up a valid CRL. +If a valid CRL cannot be found an error occurs. + =item B<-untrusted file> A file of untrusted certificates. The file should contain multiple certificates @@ -80,12 +102,6 @@ rejected. The presence of rejection messages does not itself imply that anything is wrong; during the normal verification process, several rejections may take place. -=item B<-attime timestamp> - -Perform validation checks using time specified by B and not -current system time. B is the number of seconds since -01.01.1970 (UNIX time). - =item B<-policy arg> Enable policy processing and add B to the user-initial-policy-set (see diff --git a/deps/openssl/openssl/doc/apps/x509.pod b/deps/openssl/openssl/doc/apps/x509.pod index 6109389e0bb7cc..a1326edeefb6f3 100644 --- a/deps/openssl/openssl/doc/apps/x509.pod +++ b/deps/openssl/openssl/doc/apps/x509.pod @@ -51,6 +51,7 @@ B B [B<-CAkey filename>] [B<-CAcreateserial>] [B<-CAserial filename>] +[B<-force_pubkey key>] [B<-text>] [B<-certopt option>] [B<-C>] @@ -418,6 +419,15 @@ specified then the extensions should either be contained in the unnamed L manual page for details of the extension section format. +=item B<-force_pubkey key> + +when a certificate is created set its public key to B instead of the +key in the certificate or certificate request. This option is useful for +creating certificates where the algorithm can't normally sign requests, for +example DH. + +The format or B can be specified using the B<-keyform> option. + =back =head2 NAME OPTIONS diff --git a/deps/openssl/openssl/doc/crypto/ASN1_STRING_length.pod b/deps/openssl/openssl/doc/crypto/ASN1_STRING_length.pod index a08e9a0fa4754f..f651e4f2aee031 100644 --- a/deps/openssl/openssl/doc/crypto/ASN1_STRING_length.pod +++ b/deps/openssl/openssl/doc/crypto/ASN1_STRING_length.pod @@ -3,7 +3,7 @@ =head1 NAME ASN1_STRING_dup, ASN1_STRING_cmp, ASN1_STRING_set, ASN1_STRING_length, -ASN1_STRING_length_set, ASN1_STRING_type, ASN1_STRING_data - +ASN1_STRING_length_set, ASN1_STRING_type, ASN1_STRING_data, ASN1_STRING_to_UTF8 - ASN1_STRING utility functions =head1 SYNOPSIS diff --git a/deps/openssl/openssl/doc/crypto/ASN1_STRING_print_ex.pod b/deps/openssl/openssl/doc/crypto/ASN1_STRING_print_ex.pod index 3891b88791dded..19c82ff1e44453 100644 --- a/deps/openssl/openssl/doc/crypto/ASN1_STRING_print_ex.pod +++ b/deps/openssl/openssl/doc/crypto/ASN1_STRING_print_ex.pod @@ -2,7 +2,7 @@ =head1 NAME -ASN1_STRING_print_ex, ASN1_STRING_print_ex_fp - ASN1_STRING output routines. +ASN1_STRING_print_ex, ASN1_STRING_print_ex_fp, ASN1_STRING_print - ASN1_STRING output routines. =head1 SYNOPSIS diff --git a/deps/openssl/openssl/doc/crypto/ASN1_TIME_set.pod b/deps/openssl/openssl/doc/crypto/ASN1_TIME_set.pod new file mode 100644 index 00000000000000..ae2b53d3558423 --- /dev/null +++ b/deps/openssl/openssl/doc/crypto/ASN1_TIME_set.pod @@ -0,0 +1,129 @@ +=pod + +=head1 NAME + +ASN1_TIME_set, ASN1_TIME_adj, ASN1_TIME_check, ASN1_TIME_set_string, +ASN1_TIME_print, ASN1_TIME_diff - ASN.1 Time functions. + +=head1 SYNOPSIS + + ASN1_TIME *ASN1_TIME_set(ASN1_TIME *s, time_t t); + ASN1_TIME *ASN1_TIME_adj(ASN1_TIME *s, time_t t, + int offset_day, long offset_sec); + int ASN1_TIME_set_string(ASN1_TIME *s, const char *str); + int ASN1_TIME_check(const ASN1_TIME *t); + int ASN1_TIME_print(BIO *b, const ASN1_TIME *s); + + int ASN1_TIME_diff(int *pday, int *psec, + const ASN1_TIME *from, const ASN1_TIME *to); + +=head1 DESCRIPTION + +The function ASN1_TIME_set() sets the ASN1_TIME structure B to the +time represented by the time_t value B. If B is NULL a new ASN1_TIME +structure is allocated and returned. + +ASN1_TIME_adj() sets the ASN1_TIME structure B to the time represented +by the time B and B after the time_t value B. +The values of B or B can be negative to set a +time before B. The B value can also exceed the number of +seconds in a day. If B is NULL a new ASN1_TIME structure is allocated +and returned. + +ASN1_TIME_set_string() sets ASN1_TIME structure B to the time +represented by string B which must be in appropriate ASN.1 time +format (for example YYMMDDHHMMSSZ or YYYYMMDDHHMMSSZ). + +ASN1_TIME_check() checks the syntax of ASN1_TIME structure B. + +ASN1_TIME_print() prints out the time B to BIO B in human readable +format. It will be of the format MMM DD HH:MM:SS YYYY [GMT], for example +"Feb 3 00:55:52 2015 GMT" it does not include a newline. If the time +structure has invalid format it prints out "Bad time value" and returns +an error. + +ASN1_TIME_diff() sets B<*pday> and B<*psec> to the time difference between +B and B. If B represents a time later than B then +one or both (depending on the time difference) of B<*pday> and B<*psec> +will be positive. If B represents a time earlier than B then +one or both of B<*pday> and B<*psec> will be negative. If B and B +represent the same time then B<*pday> and B<*psec> will both be zero. +If both B<*pday> and B<*psec> are non-zero they will always have the same +sign. The value of B<*psec> will always be less than the number of seconds +in a day. If B or B is NULL the current time is used. + +=head1 NOTES + +The ASN1_TIME structure corresponds to the ASN.1 structure B

is prime. The following tests are performed until one of them shows that -B is composite; if B passes all these tests, it is considered +B

is composite; if B

passes all these tests, it is considered prime. -BN_is_prime_fasttest(), when called with B, +BN_is_prime_fasttest_ex(), when called with B, first attempts trial division by a number of small primes; -if no divisors are found by this test and B is not B, -B is called. +if no divisors are found by this test and B is not B, +B is called. If B, this test is skipped. -Both BN_is_prime() and BN_is_prime_fasttest() perform a Miller-Rabin -probabilistic primality test with B iterations. If -B, a number of iterations is used that +Both BN_is_prime_ex() and BN_is_prime_fasttest_ex() perform a Miller-Rabin +probabilistic primality test with B iterations. If +B, a number of iterations is used that yields a false positive rate of at most 2^-80 for random input. -If B is not B, B is called +If B is not B, B is called after the j-th iteration (j = 0, 1, ...). B is a pre-allocated B (to save the overhead of allocating and freeing the structure in a loop), or B. +BN_GENCB_call calls the callback function held in the B structure +and passes the ints B and B as arguments. There are two types of +B structure that are supported: "new" style and "old" style. New +programs should prefer the "new" style, whilst the "old" style is provided +for backwards compatibility purposes. + +For "new" style callbacks a BN_GENCB structure should be initialised with a +call to BN_GENCB_set, where B is a B, B is of +type B and B is a B. +"Old" style callbacks are the same except they are initialised with a call +to BN_GENCB_set_old and B is of type +B. + +A callback is invoked through a call to B. This will check +the type of the callback and will invoke B for new +style callbacks or B for old style. + +BN_generate_prime (deprecated) works in the same way as +BN_generate_prime_ex but expects an old style callback function +directly in the B parameter, and an argument to pass to it in +the B. Similarly BN_is_prime and BN_is_prime_fasttest are +deprecated and can be compared to BN_is_prime_ex and +BN_is_prime_fasttest_ex respectively. + =head1 RETURN VALUES -BN_generate_prime() returns the prime number on success, B otherwise. +BN_generate_prime_ex() return 1 on success or 0 on error. -BN_is_prime() returns 0 if the number is composite, 1 if it is -prime with an error probability of less than 0.25^B, and +BN_is_prime_ex(), BN_is_prime_fasttest_ex(), BN_is_prime() and +BN_is_prime_fasttest() return 0 if the number is composite, 1 if it is +prime with an error probability of less than 0.25^B, and -1 on error. +BN_generate_prime() returns the prime number on success, B otherwise. + +Callback functions should return 1 on success or 0 on error. + The error codes can be obtained by L. =head1 SEE ALSO diff --git a/deps/openssl/openssl/doc/crypto/BN_rand.pod b/deps/openssl/openssl/doc/crypto/BN_rand.pod index 81f93c2eb3a4e5..d6b975ccf64385 100644 --- a/deps/openssl/openssl/doc/crypto/BN_rand.pod +++ b/deps/openssl/openssl/doc/crypto/BN_rand.pod @@ -2,7 +2,7 @@ =head1 NAME -BN_rand, BN_pseudo_rand - generate pseudo-random number +BN_rand, BN_pseudo_rand, BN_rand_range, BN_pseudo_rand_range - generate pseudo-random number =head1 SYNOPSIS diff --git a/deps/openssl/openssl/doc/crypto/CMS_add0_cert.pod b/deps/openssl/openssl/doc/crypto/CMS_add0_cert.pod index 9c13f488f61a92..8678ca18a58605 100644 --- a/deps/openssl/openssl/doc/crypto/CMS_add0_cert.pod +++ b/deps/openssl/openssl/doc/crypto/CMS_add0_cert.pod @@ -2,7 +2,7 @@ =head1 NAME - CMS_add0_cert, CMS_add1_cert, CMS_get1_certs, CMS_add0_crl, CMS_get1_crls, - CMS certificate and CRL utility functions +CMS_add0_cert, CMS_add1_cert, CMS_get1_certs, CMS_add0_crl, CMS_add1_crl, CMS_get1_crls, - CMS certificate and CRL utility functions =head1 SYNOPSIS diff --git a/deps/openssl/openssl/doc/crypto/CMS_get0_RecipientInfos.pod b/deps/openssl/openssl/doc/crypto/CMS_get0_RecipientInfos.pod index e0355423e6d6a3..fe49772a86a6bd 100644 --- a/deps/openssl/openssl/doc/crypto/CMS_get0_RecipientInfos.pod +++ b/deps/openssl/openssl/doc/crypto/CMS_get0_RecipientInfos.pod @@ -2,7 +2,7 @@ =head1 NAME - CMS_get0_RecipientInfos, CMS_RecipientInfo_type, CMS_RecipientInfo_ktri_get0_signer_id,CMS_RecipientInfo_ktri_cert_cmp, CMS_RecipientInfo_set0_pkey, CMS_RecipientInfo_kekri_get0_id, CMS_RecipientInfo_kekri_id_cmp, CMS_RecipientInfo_set0_key, CMS_RecipientInfo_decrypt - CMS envelopedData RecipientInfo routines +CMS_get0_RecipientInfos, CMS_RecipientInfo_type, CMS_RecipientInfo_ktri_get0_signer_id,CMS_RecipientInfo_ktri_cert_cmp, CMS_RecipientInfo_set0_pkey, CMS_RecipientInfo_kekri_get0_id, CMS_RecipientInfo_kekri_id_cmp, CMS_RecipientInfo_set0_key, CMS_RecipientInfo_decrypt, CMS_RecipientInfo_encrypt - CMS envelopedData RecipientInfo routines =head1 SYNOPSIS @@ -20,6 +20,7 @@ int CMS_RecipientInfo_set0_key(CMS_RecipientInfo *ri, unsigned char *key, size_t keylen); int CMS_RecipientInfo_decrypt(CMS_ContentInfo *cms, CMS_RecipientInfo *ri); + int CMS_RecipientInfo_encrypt(CMS_ContentInfo *cms, CMS_RecipientInfo *ri); =head1 DESCRIPTION @@ -66,6 +67,11 @@ CMS_RecipientInfo_decrypt() attempts to decrypt CMS_RecipientInfo structure B in structure B. A key must have been associated with the structure first. +CMS_RecipientInfo_encrypt() attempts to encrypt CMS_RecipientInfo structure +B in structure B. A key must have been associated with the structure +first and the content encryption key must be available: for example by a +previous call to CMS_RecipientInfo_decrypt(). + =head1 NOTES The main purpose of these functions is to enable an application to lookup @@ -81,6 +87,13 @@ any appropriate means it can then associated with the structure and CMS_RecpientInfo_decrypt() called. If successful CMS_decrypt() can be called with a NULL key to decrypt the enveloped content. +The CMS_RecipientInfo_encrypt() can be used to add a new recipient to an +existing enveloped data structure. Typically an application will first decrypt +an appropriate CMS_RecipientInfo structure to make the content encrypt key +available, it will then add a new recipient using a function such as +CMS_add1_recipient_cert() and finally encrypt the content encryption key +using CMS_RecipientInfo_encrypt(). + =head1 RETURN VALUES CMS_get0_RecipientInfos() returns all CMS_RecipientInfo structures, or NULL if @@ -89,6 +102,7 @@ an error occurs. CMS_RecipientInfo_ktri_get0_signer_id(), CMS_RecipientInfo_set0_pkey(), CMS_RecipientInfo_kekri_get0_id(), CMS_RecipientInfo_set0_key() and CMS_RecipientInfo_decrypt() return 1 for success or 0 if an error occurs. +CMS_RecipientInfo_encrypt() return 1 for success or 0 if an error occurs. CMS_RecipientInfo_ktri_cert_cmp() and CMS_RecipientInfo_kekri_cmp() return 0 for a successful comparison and non zero otherwise. diff --git a/deps/openssl/openssl/doc/crypto/CMS_get0_SignerInfos.pod b/deps/openssl/openssl/doc/crypto/CMS_get0_SignerInfos.pod index 47f6d2a04726bf..b46c0e07ab3dac 100644 --- a/deps/openssl/openssl/doc/crypto/CMS_get0_SignerInfos.pod +++ b/deps/openssl/openssl/doc/crypto/CMS_get0_SignerInfos.pod @@ -2,7 +2,7 @@ =head1 NAME - CMS_get0_SignerInfos, CMS_SignerInfo_get0_signer_id, CMS_SignerInfo_cert_cmp, CMS_set1_signer_certs - CMS signedData signer functions. +CMS_get0_SignerInfos, CMS_SignerInfo_get0_signer_id, CMS_SignerInfo_get0_signature, CMS_SignerInfo_cert_cmp, CMS_set1_signer_cert - CMS signedData signer functions. =head1 SYNOPSIS @@ -11,6 +11,7 @@ STACK_OF(CMS_SignerInfo) *CMS_get0_SignerInfos(CMS_ContentInfo *cms); int CMS_SignerInfo_get0_signer_id(CMS_SignerInfo *si, ASN1_OCTET_STRING **keyid, X509_NAME **issuer, ASN1_INTEGER **sno); + ASN1_OCTET_STRING *CMS_SignerInfo_get0_signature(CMS_SignerInfo *si); int CMS_SignerInfo_cert_cmp(CMS_SignerInfo *si, X509 *cert); void CMS_SignerInfo_set1_signer_cert(CMS_SignerInfo *si, X509 *signer); @@ -24,6 +25,11 @@ associated with a specific CMS_SignerInfo structure B. Either the keyidentifier will be set in B or B issuer name and serial number in B and B. +CMS_SignerInfo_get0_signature() retrieves the signature associated with +B in a pointer to an ASN1_OCTET_STRING structure. This pointer returned +corresponds to the internal signature value if B so it may be read or +modified. + CMS_SignerInfo_cert_cmp() compares the certificate B against the signer identifier B. It returns zero if the comparison is successful and non zero if not. diff --git a/deps/openssl/openssl/doc/crypto/CMS_verify.pod b/deps/openssl/openssl/doc/crypto/CMS_verify.pod index 8f26fdab093be1..7a2c1ee2515491 100644 --- a/deps/openssl/openssl/doc/crypto/CMS_verify.pod +++ b/deps/openssl/openssl/doc/crypto/CMS_verify.pod @@ -2,7 +2,7 @@ =head1 NAME - CMS_verify - verify a CMS SignedData structure +CMS_verify, CMS_get0_signers - verify a CMS SignedData structure =head1 SYNOPSIS diff --git a/deps/openssl/openssl/doc/crypto/DH_generate_parameters.pod b/deps/openssl/openssl/doc/crypto/DH_generate_parameters.pod index 9081e9ea7cf938..7f81a04d915e78 100644 --- a/deps/openssl/openssl/doc/crypto/DH_generate_parameters.pod +++ b/deps/openssl/openssl/doc/crypto/DH_generate_parameters.pod @@ -2,32 +2,39 @@ =head1 NAME -DH_generate_parameters, DH_check - generate and check Diffie-Hellman parameters + +DH_generate_parameters_ex, DH_generate_parameters, +DH_check - generate and check Diffie-Hellman parameters =head1 SYNOPSIS #include - DH *DH_generate_parameters(int prime_len, int generator, - void (*callback)(int, int, void *), void *cb_arg); + int DH_generate_parameters_ex(DH *dh, int prime_len,int generator, BN_GENCB *cb); int DH_check(DH *dh, int *codes); +Deprecated: + + DH *DH_generate_parameters(int prime_len, int generator, + void (*callback)(int, int, void *), void *cb_arg); + =head1 DESCRIPTION -DH_generate_parameters() generates Diffie-Hellman parameters that can -be shared among a group of users, and returns them in a newly -allocated B structure. The pseudo-random number generator must be +DH_generate_parameters_ex() generates Diffie-Hellman parameters that can +be shared among a group of users, and stores them in the provided B +structure. The pseudo-random number generator must be seeded prior to calling DH_generate_parameters(). B is the length in bits of the safe prime to be generated. B is a small number E 1, typically 2 or 5. A callback function may be used to provide feedback about the progress -of the key generation. If B is not B, it will be +of the key generation. If B is not B, it will be called as described in L while a random prime -number is generated, and when a prime has been found, B is called. +number is generated, and when a prime has been found, B +is called. See L for information on +the BN_GENCB_call() function. DH_check() validates Diffie-Hellman parameters. It checks that B

is a safe prime, and that B is a suitable generator. In the case of an @@ -38,19 +45,21 @@ checked, i.e. it does not equal 2 or 5. =head1 RETURN VALUES -DH_generate_parameters() returns a pointer to the DH structure, or -NULL if the parameter generation fails. The error codes can be -obtained by L. +DH_generate_parameters_ex() and DH_check() return 1 if the check could be +performed, 0 otherwise. + +DH_generate_parameters() (deprecated) returns a pointer to the DH structure, or +NULL if the parameter generation fails. -DH_check() returns 1 if the check could be performed, 0 otherwise. +The error codes can be obtained by L. =head1 NOTES -DH_generate_parameters() may run for several hours before finding a -suitable prime. +DH_generate_parameters_ex() and DH_generate_parameters() may run for several +hours before finding a suitable prime. -The parameters generated by DH_generate_parameters() are not to be -used in signature schemes. +The parameters generated by DH_generate_parameters_ex() and DH_generate_parameters() +are not to be used in signature schemes. =head1 BUGS diff --git a/deps/openssl/openssl/doc/crypto/DSA_generate_parameters.pod b/deps/openssl/openssl/doc/crypto/DSA_generate_parameters.pod index be7c924ff8f971..16a67f22b0c423 100644 --- a/deps/openssl/openssl/doc/crypto/DSA_generate_parameters.pod +++ b/deps/openssl/openssl/doc/crypto/DSA_generate_parameters.pod @@ -2,20 +2,26 @@ =head1 NAME -DSA_generate_parameters - generate DSA parameters +DSA_generate_parameters_ex, DSA_generate_parameters - generate DSA parameters =head1 SYNOPSIS #include + int DSA_generate_parameters_ex(DSA *dsa, int bits, + const unsigned char *seed,int seed_len, + int *counter_ret, unsigned long *h_ret, BN_GENCB *cb); + +Deprecated: + DSA *DSA_generate_parameters(int bits, unsigned char *seed, int seed_len, int *counter_ret, unsigned long *h_ret, void (*callback)(int, int, void *), void *cb_arg); =head1 DESCRIPTION -DSA_generate_parameters() generates primes p and q and a generator g -for use in the DSA. +DSA_generate_parameters_ex() generates primes p and q and a generator g +for use in the DSA and stores the result in B. B is the length of the prime to be generated; the DSS allows a maximum of 1024 bits. @@ -25,64 +31,74 @@ generated at random. Otherwise, the seed is used to generate them. If the given seed does not yield a prime q, a new random seed is chosen and placed at B. -DSA_generate_parameters() places the iteration count in +DSA_generate_parameters_ex() places the iteration count in *B and a counter used for finding a generator in *B, unless these are B. A callback function may be used to provide feedback about the progress -of the key generation. If B is not B, it will be -called as follows: +of the key generation. If B is not B, it will be +called as shown below. For information on the BN_GENCB structure and the +BN_GENCB_call function discussed below, refer to +L. =over 4 =item * -When a candidate for q is generated, B is called +When a candidate for q is generated, B is called (m is 0 for the first candidate). =item * When a candidate for q has passed a test by trial division, -B is called. +B is called. While a candidate for q is tested by Miller-Rabin primality tests, -B is called in the outer loop +B is called in the outer loop (once for each witness that confirms that the candidate may be prime); i is the loop counter (starting at 0). =item * -When a prime q has been found, B and -B are called. +When a prime q has been found, B and +B are called. =item * Before a candidate for p (other than the first) is generated and tested, -B is called. +B is called. =item * When a candidate for p has passed the test by trial division, -B is called. +B is called. While it is tested by the Miller-Rabin primality test, -B is called in the outer loop +B is called in the outer loop (once for each witness that confirms that the candidate may be prime). i is the loop counter (starting at 0). =item * -When p has been found, B is called. +When p has been found, B is called. =item * -When the generator has been found, B is called. +When the generator has been found, B is called. =back +DSA_generate_parameters() (deprecated) works in much the same way as for DSA_generate_parameters_ex, except that no B parameter is passed and +instead a newly allocated B structure is returned. Additionally "old +style" callbacks are used instead of the newer BN_GENCB based approach. +Refer to L for further information. + =head1 RETURN VALUE +DSA_generate_parameters_ex() returns a 1 on success, or 0 otherwise. + DSA_generate_parameters() returns a pointer to the DSA structure, or -B if the parameter generation fails. The error codes can be -obtained by L. +B if the parameter generation fails. + +The error codes can be obtained by L. =head1 BUGS @@ -91,7 +107,7 @@ Seed lengths E 20 are not supported. =head1 SEE ALSO L, L, L, -L +L, L =head1 HISTORY diff --git a/deps/openssl/openssl/doc/crypto/EC_GFp_simple_method.pod b/deps/openssl/openssl/doc/crypto/EC_GFp_simple_method.pod new file mode 100644 index 00000000000000..aff20ac175b76e --- /dev/null +++ b/deps/openssl/openssl/doc/crypto/EC_GFp_simple_method.pod @@ -0,0 +1,60 @@ +=pod + +=head1 NAME + +EC_GFp_simple_method, EC_GFp_mont_method, EC_GFp_nist_method, EC_GFp_nistp224_method, EC_GFp_nistp256_method, EC_GFp_nistp521_method, EC_GF2m_simple_method, EC_METHOD_get_field_type - Functions for obtaining B objects. + +=head1 SYNOPSIS + + #include + + const EC_METHOD *EC_GFp_simple_method(void); + const EC_METHOD *EC_GFp_mont_method(void); + const EC_METHOD *EC_GFp_nist_method(void); + const EC_METHOD *EC_GFp_nistp224_method(void); + const EC_METHOD *EC_GFp_nistp256_method(void); + const EC_METHOD *EC_GFp_nistp521_method(void); + + const EC_METHOD *EC_GF2m_simple_method(void); + + int EC_METHOD_get_field_type(const EC_METHOD *meth); + +=head1 DESCRIPTION + +The Elliptic Curve library provides a number of different implementations through a single common interface. +When constructing a curve using EC_GROUP_new (see L) an +implementation method must be provided. The functions described here all return a const pointer to an +B structure that can be passed to EC_GROUP_NEW. It is important that the correct implementation +type for the form of curve selected is used. + +For F2^m curves there is only one implementation choice, i.e. EC_GF2_simple_method. + +For Fp curves the lowest common denominator implementation is the EC_GFp_simple_method implementation. All +other implementations are based on this one. EC_GFp_mont_method builds on EC_GFp_simple_method but adds the +use of montgomery multiplication (see L). EC_GFp_nist_method +offers an implementation optimised for use with NIST recommended curves (NIST curves are available through +EC_GROUP_new_by_curve_name as described in L). + +The functions EC_GFp_nistp224_method, EC_GFp_nistp256_method and EC_GFp_nistp521_method offer 64 bit +optimised implementations for the NIST P224, P256 and P521 curves respectively. Note, however, that these +implementations are not available on all platforms. + +EC_METHOD_get_field_type identifies what type of field the EC_METHOD structure supports, which will be either +F2^m or Fp. If the field type is Fp then the value B is returned. If the field type is +F2^m then the value B is returned. These values are defined in the +obj_mac.h header file. + +=head1 RETURN VALUES + +All EC_GFp* functions and EC_GF2m_simple_method always return a const pointer to an EC_METHOD structure. + +EC_METHOD_get_field_type returns an integer that identifies the type of field the EC_METHOD structure supports. + +=head1 SEE ALSO + +L, L, L, L, +L, L, L, +L, +L + +=cut diff --git a/deps/openssl/openssl/doc/crypto/EC_GROUP_copy.pod b/deps/openssl/openssl/doc/crypto/EC_GROUP_copy.pod new file mode 100644 index 00000000000000..954af469d5aa8c --- /dev/null +++ b/deps/openssl/openssl/doc/crypto/EC_GROUP_copy.pod @@ -0,0 +1,174 @@ +=pod + +=head1 NAME + +EC_GROUP_copy, EC_GROUP_dup, EC_GROUP_method_of, EC_GROUP_set_generator, EC_GROUP_get0_generator, EC_GROUP_get_order, EC_GROUP_get_cofactor, EC_GROUP_set_curve_name, EC_GROUP_get_curve_name, EC_GROUP_set_asn1_flag, EC_GROUP_get_asn1_flag, EC_GROUP_set_point_conversion_form, EC_GROUP_get_point_conversion_form, EC_GROUP_get0_seed, EC_GROUP_get_seed_len, EC_GROUP_set_seed, EC_GROUP_get_degree, EC_GROUP_check, EC_GROUP_check_discriminant, EC_GROUP_cmp, EC_GROUP_get_basis_type, EC_GROUP_get_trinomial_basis, EC_GROUP_get_pentanomial_basis - Functions for manipulating B objects. + +=head1 SYNOPSIS + + #include + #include + + int EC_GROUP_copy(EC_GROUP *dst, const EC_GROUP *src); + EC_GROUP *EC_GROUP_dup(const EC_GROUP *src); + + const EC_METHOD *EC_GROUP_method_of(const EC_GROUP *group); + + int EC_GROUP_set_generator(EC_GROUP *group, const EC_POINT *generator, const BIGNUM *order, const BIGNUM *cofactor); + const EC_POINT *EC_GROUP_get0_generator(const EC_GROUP *group); + + int EC_GROUP_get_order(const EC_GROUP *group, BIGNUM *order, BN_CTX *ctx); + int EC_GROUP_get_cofactor(const EC_GROUP *group, BIGNUM *cofactor, BN_CTX *ctx); + + void EC_GROUP_set_curve_name(EC_GROUP *group, int nid); + int EC_GROUP_get_curve_name(const EC_GROUP *group); + + void EC_GROUP_set_asn1_flag(EC_GROUP *group, int flag); + int EC_GROUP_get_asn1_flag(const EC_GROUP *group); + + void EC_GROUP_set_point_conversion_form(EC_GROUP *group, point_conversion_form_t form); + point_conversion_form_t EC_GROUP_get_point_conversion_form(const EC_GROUP *); + + unsigned char *EC_GROUP_get0_seed(const EC_GROUP *x); + size_t EC_GROUP_get_seed_len(const EC_GROUP *); + size_t EC_GROUP_set_seed(EC_GROUP *, const unsigned char *, size_t len); + + int EC_GROUP_get_degree(const EC_GROUP *group); + + int EC_GROUP_check(const EC_GROUP *group, BN_CTX *ctx); + + int EC_GROUP_check_discriminant(const EC_GROUP *group, BN_CTX *ctx); + + int EC_GROUP_cmp(const EC_GROUP *a, const EC_GROUP *b, BN_CTX *ctx); + + int EC_GROUP_get_basis_type(const EC_GROUP *); + int EC_GROUP_get_trinomial_basis(const EC_GROUP *, unsigned int *k); + int EC_GROUP_get_pentanomial_basis(const EC_GROUP *, unsigned int *k1, + unsigned int *k2, unsigned int *k3); + +=head1 DESCRIPTION + +EC_GROUP_copy copies the curve B into B. Both B and B must use the same EC_METHOD. + +EC_GROUP_dup creates a new EC_GROUP object and copies the content from B to the newly created +EC_GROUP object. + +EC_GROUP_method_of obtains the EC_METHOD of B. + +EC_GROUP_set_generator sets curve paramaters that must be agreed by all participants using the curve. These +paramaters include the B, the B and the B. The B is a well defined point on the +curve chosen for cryptographic operations. Integers used for point multiplications will be between 0 and +n-1 where n is the B. The B multipied by the B gives the number of points on the curve. + +EC_GROUP_get0_generator returns the generator for the identified B. + +The functions EC_GROUP_get_order and EC_GROUP_get_cofactor populate the provided B and B parameters +with the respective order and cofactors for the B. + +The functions EC_GROUP_set_curve_name and EC_GROUP_get_curve_name, set and get the NID for the curve respectively +(see L). If a curve does not have a NID associated with it, then EC_GROUP_get_curve_name +will return 0. + +The asn1_flag value on a curve is used to determine whether there is a specific ASN1 OID to describe the curve or not. +If the asn1_flag is 1 then this is a named curve with an associated ASN1 OID. If not then asn1_flag is 0. The functions +EC_GROUP_get_asn1_flag and EC_GROUP_set_asn1_flag get and set the status of the asn1_flag for the curve. If set then +the curve_name must also be set. + +The point_coversion_form for a curve controls how EC_POINT data is encoded as ASN1 as defined in X9.62 (ECDSA). +point_conversion_form_t is an enum defined as follows: + + typedef enum { + /** the point is encoded as z||x, where the octet z specifies + * which solution of the quadratic equation y is */ + POINT_CONVERSION_COMPRESSED = 2, + /** the point is encoded as z||x||y, where z is the octet 0x02 */ + POINT_CONVERSION_UNCOMPRESSED = 4, + /** the point is encoded as z||x||y, where the octet z specifies + * which solution of the quadratic equation y is */ + POINT_CONVERSION_HYBRID = 6 + } point_conversion_form_t; + + +For POINT_CONVERSION_UNCOMPRESSED the point is encoded as an octet signifying the UNCOMPRESSED form has been used followed by +the octets for x, followed by the octets for y. + +For any given x co-ordinate for a point on a curve it is possible to derive two possible y values. For +POINT_CONVERSION_COMPRESSED the point is encoded as an octet signifying that the COMPRESSED form has been used AND which of +the two possible solutions for y has been used, followed by the octets for x. + +For POINT_CONVERSION_HYBRID the point is encoded as an octet signifying the HYBRID form has been used AND which of the two +possible solutions for y has been used, followed by the octets for x, followed by the octets for y. + +The functions EC_GROUP_set_point_conversion_form and EC_GROUP_get_point_conversion_form set and get the point_conversion_form +for the curve respectively. + +ANSI X9.62 (ECDSA standard) defines a method of generating the curve parameter b from a random number. This provides advantages +in that a parameter obtained in this way is highly unlikely to be susceptible to special purpose attacks, or have any trapdoors in it. +If the seed is present for a curve then the b parameter was generated in a verifiable fashion using that seed. The OpenSSL EC library +does not use this seed value but does enable you to inspect it using EC_GROUP_get0_seed. This returns a pointer to a memory block +containing the seed that was used. The length of the memory block can be obtained using EC_GROUP_get_seed_len. A number of the +builtin curves within the library provide seed values that can be obtained. It is also possible to set a custom seed using +EC_GROUP_set_seed and passing a pointer to a memory block, along with the length of the seed. Again, the EC library will not use +this seed value, although it will be preserved in any ASN1 based communications. + +EC_GROUP_get_degree gets the degree of the field. For Fp fields this will be the number of bits in p. For F2^m fields this will be +the value m. + +The function EC_GROUP_check_discriminant calculates the discriminant for the curve and verifies that it is valid. +For a curve defined over Fp the discriminant is given by the formula 4*a^3 + 27*b^2 whilst for F2^m curves the discriminant is +simply b. In either case for the curve to be valid the discriminant must be non zero. + +The function EC_GROUP_check performs a number of checks on a curve to verify that it is valid. Checks performed include +verifying that the discriminant is non zero; that a generator has been defined; that the generator is on the curve and has +the correct order. + +EC_GROUP_cmp compares B and B to determine whether they represent the same curve or not. + +The functions EC_GROUP_get_basis_type, EC_GROUP_get_trinomial_basis and EC_GROUP_get_pentanomial_basis should only be called for curves +defined over an F2^m field. Addition and multiplication operations within an F2^m field are performed using an irreducible polynomial +function f(x). This function is either a trinomial of the form: + +f(x) = x^m + x^k + 1 with m > k >= 1 + +or a pentanomial of the form: + +f(x) = x^m + x^k3 + x^k2 + x^k1 + 1 with m > k3 > k2 > k1 >= 1 + +The function EC_GROUP_get_basis_type returns a NID identifying whether a trinomial or pentanomial is in use for the field. The +function EC_GROUP_get_trinomial_basis must only be called where f(x) is of the trinomial form, and returns the value of B. Similary +the function EC_GROUP_get_pentanomial_basis must only be called where f(x) is of the pentanomial form, and returns the values of B, +B and B respectively. + +=head1 RETURN VALUES + +The following functions return 1 on success or 0 on error: EC_GROUP_copy, EC_GROUP_set_generator, EC_GROUP_check, +EC_GROUP_check_discriminant, EC_GROUP_get_trinomial_basis and EC_GROUP_get_pentanomial_basis. + +EC_GROUP_dup returns a pointer to the duplicated curve, or NULL on error. + +EC_GROUP_method_of returns the EC_METHOD implementation in use for the given curve or NULL on error. + +EC_GROUP_get0_generator returns the generator for the given curve or NULL on error. + +EC_GROUP_get_order, EC_GROUP_get_cofactor, EC_GROUP_get_curve_name, EC_GROUP_get_asn1_flag, EC_GROUP_get_point_conversion_form +and EC_GROUP_get_degree return the order, cofactor, curve name (NID), ASN1 flag, point_conversion_form and degree for the +specified curve respectively. If there is no curve name associated with a curve then EC_GROUP_get_curve_name will return 0. + +EC_GROUP_get0_seed returns a pointer to the seed that was used to generate the parameter b, or NULL if the seed is not +specified. EC_GROUP_get_seed_len returns the length of the seed or 0 if the seed is not specified. + +EC_GROUP_set_seed returns the length of the seed that has been set. If the supplied seed is NULL, or the supplied seed length is +0, the the return value will be 1. On error 0 is returned. + +EC_GROUP_cmp returns 0 if the curves are equal, 1 if they are not equal, or -1 on error. + +EC_GROUP_get_basis_type returns the values NID_X9_62_tpBasis or NID_X9_62_ppBasis (as defined in ) for a +trinomial or pentanomial respectively. Alternatively in the event of an error a 0 is returned. + +=head1 SEE ALSO + +L, L, L, +L, L, L, +L, L + +=cut diff --git a/deps/openssl/openssl/doc/crypto/EC_GROUP_new.pod b/deps/openssl/openssl/doc/crypto/EC_GROUP_new.pod new file mode 100644 index 00000000000000..ff55bf33a3c9cc --- /dev/null +++ b/deps/openssl/openssl/doc/crypto/EC_GROUP_new.pod @@ -0,0 +1,95 @@ +=pod + +=head1 NAME + +EC_GROUP_new, EC_GROUP_free, EC_GROUP_clear_free, EC_GROUP_new_curve_GFp, EC_GROUP_new_curve_GF2m, EC_GROUP_new_by_curve_name, EC_GROUP_set_curve_GFp, EC_GROUP_get_curve_GFp, EC_GROUP_set_curve_GF2m, EC_GROUP_get_curve_GF2m, EC_get_builtin_curves - Functions for creating and destroying B objects. + +=head1 SYNOPSIS + + #include + #include + + EC_GROUP *EC_GROUP_new(const EC_METHOD *meth); + void EC_GROUP_free(EC_GROUP *group); + void EC_GROUP_clear_free(EC_GROUP *group); + + EC_GROUP *EC_GROUP_new_curve_GFp(const BIGNUM *p, const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx); + EC_GROUP *EC_GROUP_new_curve_GF2m(const BIGNUM *p, const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx); + EC_GROUP *EC_GROUP_new_by_curve_name(int nid); + + int EC_GROUP_set_curve_GFp(EC_GROUP *group, const BIGNUM *p, const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx); + int EC_GROUP_get_curve_GFp(const EC_GROUP *group, BIGNUM *p, BIGNUM *a, BIGNUM *b, BN_CTX *ctx); + int EC_GROUP_set_curve_GF2m(EC_GROUP *group, const BIGNUM *p, const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx); + int EC_GROUP_get_curve_GF2m(const EC_GROUP *group, BIGNUM *p, BIGNUM *a, BIGNUM *b, BN_CTX *ctx); + + size_t EC_get_builtin_curves(EC_builtin_curve *r, size_t nitems); + +=head1 DESCRIPTION + +Within the library there are two forms of elliptic curve that are of interest. The first form is those defined over the +prime field Fp. The elements of Fp are the integers 0 to p-1, where p is a prime number. This gives us a revised +elliptic curve equation as follows: + +y^2 mod p = x^3 +ax + b mod p + +The second form is those defined over a binary field F2^m where the elements of the field are integers of length at +most m bits. For this form the elliptic curve equation is modified to: + +y^2 + xy = x^3 + ax^2 + b (where b != 0) + +Operations in a binary field are performed relative to an B. All such curves with OpenSSL +use a trinomial or a pentanomial for this parameter. + +A new curve can be constructed by calling EC_GROUP_new, using the implementation provided by B (see +L). It is then necessary to call either EC_GROUP_set_curve_GFp or +EC_GROUP_set_curve_GF2m as appropriate to create a curve defined over Fp or over F2^m respectively. + +EC_GROUP_set_curve_GFp sets the curve parameters B

, B and B for a curve over Fp stored in B. +EC_group_get_curve_GFp obtains the previously set curve parameters. + +EC_GROUP_set_curve_GF2m sets the equivalent curve parameters for a curve over F2^m. In this case B

represents +the irreducible polybnomial - each bit represents a term in the polynomial. Therefore there will either be three +or five bits set dependant on whether the polynomial is a trinomial or a pentanomial. +EC_group_get_curve_GF2m obtains the previously set curve parameters. + +The functions EC_GROUP_new_curve_GFp and EC_GROUP_new_curve_GF2m are shortcuts for calling EC_GROUP_new and the +appropriate EC_group_set_curve function. An appropriate default implementation method will be used. + +Whilst the library can be used to create any curve using the functions described above, there are also a number of +predefined curves that are available. In order to obtain a list of all of the predefined curves, call the function +EC_get_builtin_curves. The parameter B should be an array of EC_builtin_curve structures of size B. The function +will populate the B array with information about the builtin curves. If B is less than the total number of +curves available, then the first B curves will be returned. Otherwise the total number of curves will be +provided. The return value is the total number of curves available (whether that number has been populated in B or +not). Passing a NULL B, or setting B to 0 will do nothing other than return the total number of curves available. +The EC_builtin_curve structure is defined as follows: + + typedef struct { + int nid; + const char *comment; + } EC_builtin_curve; + +Each EC_builtin_curve item has a unique integer id (B), and a human readable comment string describing the curve. + +In order to construct a builtin curve use the function EC_GROUP_new_by_curve_name and provide the B of the curve to +be constructed. + +EC_GROUP_free frees the memory associated with the EC_GROUP. + +EC_GROUP_clear_free destroys any sensitive data held within the EC_GROUP and then frees its memory. + +=head1 RETURN VALUES + +All EC_GROUP_new* functions return a pointer to the newly constructed group, or NULL on error. + +EC_get_builtin_curves returns the number of builtin curves that are available. + +EC_GROUP_set_curve_GFp, EC_GROUP_get_curve_GFp, EC_GROUP_set_curve_GF2m, EC_GROUP_get_curve_GF2m return 1 on success or 0 on error. + +=head1 SEE ALSO + +L, L, L, +L, L, L, +L, L + +=cut diff --git a/deps/openssl/openssl/doc/crypto/EC_KEY_new.pod b/deps/openssl/openssl/doc/crypto/EC_KEY_new.pod new file mode 100644 index 00000000000000..e859689bcb5079 --- /dev/null +++ b/deps/openssl/openssl/doc/crypto/EC_KEY_new.pod @@ -0,0 +1,108 @@ +=pod + +=head1 NAME + +EC_KEY_new, EC_KEY_get_flags, EC_KEY_set_flags, EC_KEY_clear_flags, EC_KEY_new_by_curve_name, EC_KEY_free, EC_KEY_copy, EC_KEY_dup, EC_KEY_up_ref, EC_KEY_get0_group, EC_KEY_set_group, EC_KEY_get0_private_key, EC_KEY_set_private_key, EC_KEY_get0_public_key, EC_KEY_set_public_key, EC_KEY_get_enc_flags, EC_KEY_set_enc_flags, EC_KEY_get_conv_form, EC_KEY_set_conv_form, EC_KEY_get_key_method_data, EC_KEY_insert_key_method_data, EC_KEY_set_asn1_flag, EC_KEY_precompute_mult, EC_KEY_generate_key, EC_KEY_check_key, EC_KEY_set_public_key_affine_coordinates - Functions for creating, destroying and manipulating B objects. + +=head1 SYNOPSIS + + #include + #include + + EC_KEY *EC_KEY_new(void); + int EC_KEY_get_flags(const EC_KEY *key); + void EC_KEY_set_flags(EC_KEY *key, int flags); + void EC_KEY_clear_flags(EC_KEY *key, int flags); + EC_KEY *EC_KEY_new_by_curve_name(int nid); + void EC_KEY_free(EC_KEY *key); + EC_KEY *EC_KEY_copy(EC_KEY *dst, const EC_KEY *src); + EC_KEY *EC_KEY_dup(const EC_KEY *src); + int EC_KEY_up_ref(EC_KEY *key); + const EC_GROUP *EC_KEY_get0_group(const EC_KEY *key); + int EC_KEY_set_group(EC_KEY *key, const EC_GROUP *group); + const BIGNUM *EC_KEY_get0_private_key(const EC_KEY *key); + int EC_KEY_set_private_key(EC_KEY *key, const BIGNUM *prv); + const EC_POINT *EC_KEY_get0_public_key(const EC_KEY *key); + int EC_KEY_set_public_key(EC_KEY *key, const EC_POINT *pub); + point_conversion_form_t EC_KEY_get_conv_form(const EC_KEY *key); + void EC_KEY_set_conv_form(EC_KEY *eckey, point_conversion_form_t cform); + void *EC_KEY_get_key_method_data(EC_KEY *key, + void *(*dup_func)(void *), void (*free_func)(void *), void (*clear_free_func)(void *)); + void EC_KEY_insert_key_method_data(EC_KEY *key, void *data, + void *(*dup_func)(void *), void (*free_func)(void *), void (*clear_free_func)(void *)); + void EC_KEY_set_asn1_flag(EC_KEY *eckey, int asn1_flag); + int EC_KEY_precompute_mult(EC_KEY *key, BN_CTX *ctx); + int EC_KEY_generate_key(EC_KEY *key); + int EC_KEY_check_key(const EC_KEY *key); + int EC_KEY_set_public_key_affine_coordinates(EC_KEY *key, BIGNUM *x, BIGNUM *y); + +=head1 DESCRIPTION + +An EC_KEY represents a public key and (optionaly) an associated private key. A new EC_KEY (with no associated curve) can be constructed by calling EC_KEY_new. +The reference count for the newly created EC_KEY is initially set to 1. A curve can be associated with the EC_KEY by calling +EC_KEY_set_group. + +Alternatively a new EC_KEY can be constructed by calling EC_KEY_new_by_curve_name and supplying the nid of the associated curve. Refer to L for a description of curve names. This function simply wraps calls to EC_KEY_new and +EC_GROUP_new_by_curve_name. + +Calling EC_KEY_free decrements the reference count for the EC_KEY object, and if it has dropped to zero then frees the memory associated +with it. + +EC_KEY_copy copies the contents of the EC_KEY in B into B. + +EC_KEY_dup creates a new EC_KEY object and copies B into it. + +EC_KEY_up_ref increments the reference count associated with the EC_KEY object. + +EC_KEY_generate_key generates a new public and private key for the supplied B object. B must have an EC_GROUP object +associated with it before calling this function. The private key is a random integer (0 < priv_key < order, where order is the order +of the EC_GROUP object). The public key is an EC_POINT on the curve calculated by multiplying the generator for the curve by the +private key. + +EC_KEY_check_key performs various sanity checks on the EC_KEY object to confirm that it is valid. + +EC_KEY_set_public_key_affine_coordinates sets the public key for B based on its affine co-ordinates, i.e. it constructs an EC_POINT +object based on the supplied B and B values and sets the public key to be this EC_POINT. It will also performs certain sanity checks +on the key to confirm that it is valid. + +The functions EC_KEY_get0_group, EC_KEY_set_group, EC_KEY_get0_private_key, EC_KEY_set_private_key, EC_KEY_get0_public_key, and EC_KEY_set_public_key get and set the EC_GROUP object, the private key and the EC_POINT public key for the B respectively. + +The functions EC_KEY_get_conv_form and EC_KEY_set_conv_form get and set the point_conversion_form for the B. For a description +of point_conversion_forms please refer to L. + +EC_KEY_insert_key_method_data and EC_KEY_get_key_method_data enable the caller to associate arbitary additional data specific to the +elliptic curve scheme being used with the EC_KEY object. This data is treated as a "black box" by the ec library. The data to be stored by EC_KEY_insert_key_method_data is provided in the B parameter, which must have have associated functions for duplicating, freeing and "clear_freeing" the data item. If a subsequent EC_KEY_get_key_method_data call is issued, the functions for duplicating, freeing and "clear_freeing" the data item must be provided again, and they must be the same as they were when the data item was inserted. + +EC_KEY_set_flags sets the flags in the B parameter on the EC_KEY object. Any flags that are already set are left set. The currently defined standard flags are EC_FLAG_NON_FIPS_ALLOW and EC_FLAG_FIPS_CHECKED. In addition there is the flag EC_FLAG_COFACTOR_ECDH which is specific to ECDH and is defined in ecdh.h. EC_KEY_get_flags returns the current flags that are set for this EC_KEY. EC_KEY_clear_flags clears the flags indicated by the B parameter. All other flags are left in their existing state. + +EC_KEY_set_asn1_flag sets the asn1_flag on the underlying EC_GROUP object (if set). Refer to L for further information on the asn1_flag. + +EC_KEY_precompute_mult stores multiples of the underlying EC_GROUP generator for faster point multiplication. See also L. + + +=head1 RETURN VALUES + +EC_KEY_new, EC_KEY_new_by_curve_name and EC_KEY_dup return a pointer to the newly created EC_KEY object, or NULL on error. + +EC_KEY_get_flags returns the flags associated with the EC_KEY object as an integer. + +EC_KEY_copy returns a pointer to the destination key, or NULL on error. + +EC_KEY_up_ref, EC_KEY_set_group, EC_KEY_set_private_key, EC_KEY_set_public_key, EC_KEY_precompute_mult, EC_KEY_generate_key, EC_KEY_check_key and EC_KEY_set_public_key_affine_coordinates return 1 on success or 0 on error. + +EC_KEY_get0_group returns the EC_GROUP associated with the EC_KEY. + +EC_KEY_get0_private_key returns the private key associated with the EC_KEY. + +EC_KEY_get_conv_form return the point_conversion_form for the EC_KEY. + + +=head1 SEE ALSO + +L, L, L, +L, L, +L, +L, +L + +=cut diff --git a/deps/openssl/openssl/doc/crypto/EC_POINT_add.pod b/deps/openssl/openssl/doc/crypto/EC_POINT_add.pod new file mode 100644 index 00000000000000..ae926408432188 --- /dev/null +++ b/deps/openssl/openssl/doc/crypto/EC_POINT_add.pod @@ -0,0 +1,72 @@ +=pod + +=head1 NAME + +EC_POINT_add, EC_POINT_dbl, EC_POINT_invert, EC_POINT_is_at_infinity, EC_POINT_is_on_curve, EC_POINT_cmp, EC_POINT_make_affine, EC_POINTs_make_affine, EC_POINTs_mul, EC_POINT_mul, EC_GROUP_precompute_mult, EC_GROUP_have_precompute_mult - Functions for performing mathematical operations and tests on B objects. + +=head1 SYNOPSIS + + #include + #include + + int EC_POINT_add(const EC_GROUP *group, EC_POINT *r, const EC_POINT *a, const EC_POINT *b, BN_CTX *ctx); + int EC_POINT_dbl(const EC_GROUP *group, EC_POINT *r, const EC_POINT *a, BN_CTX *ctx); + int EC_POINT_invert(const EC_GROUP *group, EC_POINT *a, BN_CTX *ctx); + int EC_POINT_is_at_infinity(const EC_GROUP *group, const EC_POINT *p); + int EC_POINT_is_on_curve(const EC_GROUP *group, const EC_POINT *point, BN_CTX *ctx); + int EC_POINT_cmp(const EC_GROUP *group, const EC_POINT *a, const EC_POINT *b, BN_CTX *ctx); + int EC_POINT_make_affine(const EC_GROUP *group, EC_POINT *point, BN_CTX *ctx); + int EC_POINTs_make_affine(const EC_GROUP *group, size_t num, EC_POINT *points[], BN_CTX *ctx); + int EC_POINTs_mul(const EC_GROUP *group, EC_POINT *r, const BIGNUM *n, size_t num, const EC_POINT *p[], const BIGNUM *m[], BN_CTX *ctx); + int EC_POINT_mul(const EC_GROUP *group, EC_POINT *r, const BIGNUM *n, const EC_POINT *q, const BIGNUM *m, BN_CTX *ctx); + int EC_GROUP_precompute_mult(EC_GROUP *group, BN_CTX *ctx); + int EC_GROUP_have_precompute_mult(const EC_GROUP *group); + + +=head1 DESCRIPTION + +EC_POINT_add adds the two points B and B and places the result in B. Similarly EC_POINT_dbl doubles the point B and places the +result in B. In both cases it is valid for B to be one of B or B. + +EC_POINT_invert calculates the inverse of the supplied point B. The result is placed back in B. + +The function EC_POINT_is_at_infinity tests whether the supplied point is at infinity or not. + +EC_POINT_is_on_curve tests whether the supplied point is on the curve or not. + +EC_POINT_cmp compares the two supplied points and tests whether or not they are equal. + +The functions EC_POINT_make_affine and EC_POINTs_make_affine force the internal representation of the EC_POINT(s) into the affine +co-ordinate system. In the case of EC_POINTs_make_affine the value B provides the number of points in the array B to be +forced. + +EC_POINT_mul calculates the value generator * B + B * B and stores the result in B. The value B may be NULL in which case the result is just B * B. + +EC_POINTs_mul calculates the value generator * B + B * B + ... + B * B. As for EC_POINT_mul the value +B may be NULL. + +The function EC_GROUP_precompute_mult stores multiples of the generator for faster point multiplication, whilst +EC_GROUP_have_precompute_mult tests whether precomputation has already been done. See L for information +about the generator. + + +=head1 RETURN VALUES + +The following functions return 1 on success or 0 on error: EC_POINT_add, EC_POINT_dbl, EC_POINT_invert, EC_POINT_make_affine, +EC_POINTs_make_affine, EC_POINTs_make_affine, EC_POINT_mul, EC_POINTs_mul and EC_GROUP_precompute_mult. + +EC_POINT_is_at_infinity returns 1 if the point is at infinity, or 0 otherwise. + +EC_POINT_is_on_curve returns 1 if the point is on the curve, 0 if not, or -1 on error. + +EC_POINT_cmp returns 1 if the points are not equal, 0 if they are, or -1 on error. + +EC_GROUP_have_precompute_mult return 1 if a precomputation has been done, or 0 if not. + +=head1 SEE ALSO + +L, L, L, L, +L, L, +L, L + +=cut diff --git a/deps/openssl/openssl/doc/crypto/EC_POINT_new.pod b/deps/openssl/openssl/doc/crypto/EC_POINT_new.pod new file mode 100644 index 00000000000000..858baf4244601c --- /dev/null +++ b/deps/openssl/openssl/doc/crypto/EC_POINT_new.pod @@ -0,0 +1,128 @@ +=pod + +=head1 NAME + +EC_POINT_new, EC_POINT_free, EC_POINT_clear_free, EC_POINT_copy, EC_POINT_dup, EC_POINT_method_of, EC_POINT_set_to_infinity, EC_POINT_set_Jprojective_coordinates, EC_POINT_get_Jprojective_coordinates_GFp, EC_POINT_set_affine_coordinates_GFp, EC_POINT_get_affine_coordinates_GFp, EC_POINT_set_compressed_coordinates_GFp, EC_POINT_set_affine_coordinates_GF2m, EC_POINT_get_affine_coordinates_GF2m, EC_POINT_set_compressed_coordinates_GF2m, EC_POINT_point2oct, EC_POINT_oct2point, EC_POINT_point2bn, EC_POINT_bn2point, EC_POINT_point2hex, EC_POINT_hex2point - Functions for creating, destroying and manipulating B objects. + +=head1 SYNOPSIS + + #include + #include + + EC_POINT *EC_POINT_new(const EC_GROUP *group); + void EC_POINT_free(EC_POINT *point); + void EC_POINT_clear_free(EC_POINT *point); + int EC_POINT_copy(EC_POINT *dst, const EC_POINT *src); + EC_POINT *EC_POINT_dup(const EC_POINT *src, const EC_GROUP *group); + const EC_METHOD *EC_POINT_method_of(const EC_POINT *point); + int EC_POINT_set_to_infinity(const EC_GROUP *group, EC_POINT *point); + int EC_POINT_set_Jprojective_coordinates_GFp(const EC_GROUP *group, EC_POINT *p, + const BIGNUM *x, const BIGNUM *y, const BIGNUM *z, BN_CTX *ctx); + int EC_POINT_get_Jprojective_coordinates_GFp(const EC_GROUP *group, + const EC_POINT *p, BIGNUM *x, BIGNUM *y, BIGNUM *z, BN_CTX *ctx); + int EC_POINT_set_affine_coordinates_GFp(const EC_GROUP *group, EC_POINT *p, + const BIGNUM *x, const BIGNUM *y, BN_CTX *ctx); + int EC_POINT_get_affine_coordinates_GFp(const EC_GROUP *group, + const EC_POINT *p, BIGNUM *x, BIGNUM *y, BN_CTX *ctx); + int EC_POINT_set_compressed_coordinates_GFp(const EC_GROUP *group, EC_POINT *p, + const BIGNUM *x, int y_bit, BN_CTX *ctx); + int EC_POINT_set_affine_coordinates_GF2m(const EC_GROUP *group, EC_POINT *p, + const BIGNUM *x, const BIGNUM *y, BN_CTX *ctx); + int EC_POINT_get_affine_coordinates_GF2m(const EC_GROUP *group, + const EC_POINT *p, BIGNUM *x, BIGNUM *y, BN_CTX *ctx); + int EC_POINT_set_compressed_coordinates_GF2m(const EC_GROUP *group, EC_POINT *p, + const BIGNUM *x, int y_bit, BN_CTX *ctx); + size_t EC_POINT_point2oct(const EC_GROUP *group, const EC_POINT *p, + point_conversion_form_t form, + unsigned char *buf, size_t len, BN_CTX *ctx); + int EC_POINT_oct2point(const EC_GROUP *group, EC_POINT *p, + const unsigned char *buf, size_t len, BN_CTX *ctx); + BIGNUM *EC_POINT_point2bn(const EC_GROUP *, const EC_POINT *, + point_conversion_form_t form, BIGNUM *, BN_CTX *); + EC_POINT *EC_POINT_bn2point(const EC_GROUP *, const BIGNUM *, + EC_POINT *, BN_CTX *); + char *EC_POINT_point2hex(const EC_GROUP *, const EC_POINT *, + point_conversion_form_t form, BN_CTX *); + EC_POINT *EC_POINT_hex2point(const EC_GROUP *, const char *, + EC_POINT *, BN_CTX *); + + +=head1 DESCRIPTION + +An EC_POINT represents a point on a curve. A new point is constructed by calling the function EC_POINT_new and providing the B +object that the point relates to. + +EC_POINT_free frees the memory associated with the EC_POINT. + +EC_POINT_clear_free destroys any sensitive data held within the EC_POINT and then frees its memory. + +EC_POINT_copy copies the point B into B. Both B and B must use the same EC_METHOD. + +EC_POINT_dup creates a new EC_POINT object and copies the content from B to the newly created +EC_POINT object. + +EC_POINT_method_of obtains the EC_METHOD associated with B. + +A valid point on a curve is the special point at infinity. A point is set to be at infinity by calling EC_POINT_set_to_infinity. + +The affine co-ordinates for a point describe a point in terms of its x and y position. The functions +EC_POINT_set_affine_coordinates_GFp and EC_POINT_set_affine_coordinates_GF2m set the B and B co-ordinates for the point +B

defined over the curve given in B. + +As well as the affine co-ordinates, a point can alternatively be described in terms of its Jacobian +projective co-ordinates (for Fp curves only). Jacobian projective co-ordinates are expressed as three values x, y and z. Working in +this co-ordinate system provides more efficient point multiplication operations. +A mapping exists between Jacobian projective co-ordinates and affine co-ordinates. A Jacobian projective co-ordinate (x, y, z) can be written as an affine co-ordinate as (x/(z^2), y/(z^3)). Conversion to Jacobian projective to affine co-ordinates is simple. The co-ordinate (x, y) is +mapped to (x, y, 1). To set or get the projective co-ordinates use EC_POINT_set_Jprojective_coordinates_GFp and +EC_POINT_get_Jprojective_coordinates_GFp respectively. + +Points can also be described in terms of their compressed co-ordinates. For a point (x, y), for any given value for x such that the point is +on the curve there will only ever be two possible values for y. Therefore a point can be set using the EC_POINT_set_compressed_coordinates_GFp +and EC_POINT_set_compressed_coordinates_GF2m functions where B is the x co-ordinate and B is a value 0 or 1 to identify which of +the two possible values for y should be used. + +In addition EC_POINTs can be converted to and from various external +representations. Supported representations are octet strings, BIGNUMs and +hexadecimal. Octet strings are stored in a buffer along with an associated +buffer length. A point held in a BIGNUM is calculated by converting the point to +an octet string and then converting that octet string into a BIGNUM integer. +Points in hexadecimal format are stored in a NULL terminated character string +where each character is one of the printable values 0-9 or A-F (or a-f). + +The functions EC_POINT_point2oct, EC_POINT_oct2point, EC_POINT_point2bn, EC_POINT_bn2point, EC_POINT_point2hex and EC_POINT_hex2point convert +from and to EC_POINTs for the formats: octet string, BIGNUM and hexadecimal respectively. + +The function EC_POINT_point2oct must be supplied with a buffer long enough to store the octet string. The return value provides the number of +octets stored. Calling the function with a NULL buffer will not perform the conversion but will still return the required buffer length. + +The function EC_POINT_point2hex will allocate sufficient memory to store the hexadecimal string. It is the caller's responsibility to free +this memory with a subsequent call to OPENSSL_free(). + +=head1 RETURN VALUES + +EC_POINT_new and EC_POINT_dup return the newly allocated EC_POINT or NULL on error. + +The following functions return 1 on success or 0 on error: EC_POINT_copy, EC_POINT_set_to_infinity, EC_POINT_set_Jprojective_coordinates_GFp, +EC_POINT_get_Jprojective_coordinates_GFp, EC_POINT_set_affine_coordinates_GFp, EC_POINT_get_affine_coordinates_GFp, +EC_POINT_set_compressed_coordinates_GFp, EC_POINT_set_affine_coordinates_GF2m, EC_POINT_get_affine_coordinates_GF2m, +EC_POINT_set_compressed_coordinates_GF2m and EC_POINT_oct2point. + +EC_POINT_method_of returns the EC_METHOD associated with the supplied EC_POINT. + +EC_POINT_point2oct returns the length of the required buffer, or 0 on error. + +EC_POINT_point2bn returns the pointer to the BIGNUM supplied, or NULL on error. + +EC_POINT_bn2point returns the pointer to the EC_POINT supplied, or NULL on error. + +EC_POINT_point2hex returns a pointer to the hex string, or NULL on error. + +EC_POINT_hex2point returns the pointer to the EC_POINT supplied, or NULL on error. + +=head1 SEE ALSO + +L, L, L, L, +L, L, +L, L + +=cut diff --git a/deps/openssl/openssl/doc/crypto/ERR_remove_state.pod b/deps/openssl/openssl/doc/crypto/ERR_remove_state.pod index 72925fb9f474cf..a4d38c17fd6bbb 100644 --- a/deps/openssl/openssl/doc/crypto/ERR_remove_state.pod +++ b/deps/openssl/openssl/doc/crypto/ERR_remove_state.pod @@ -2,26 +2,35 @@ =head1 NAME -ERR_remove_state - free a thread's error queue +ERR_remove_thread_state, ERR_remove_state - free a thread's error queue =head1 SYNOPSIS #include + void ERR_remove_thread_state(const CRYPTO_THREADID *tid); + +Deprecated: + void ERR_remove_state(unsigned long pid); =head1 DESCRIPTION -ERR_remove_state() frees the error queue associated with thread B. -If B == 0, the current thread will have its error queue removed. +ERR_remove_thread_state() frees the error queue associated with thread B. +If B == B, the current thread will have its error queue removed. Since error queue data structures are allocated automatically for new threads, they must be freed when threads are terminated in order to avoid memory leaks. +ERR_remove_state is deprecated and has been replaced by +ERR_remove_thread_state. Since threads in OpenSSL are no longer identified +by unsigned long values any argument to this function is ignored. Calling +ERR_remove_state is equivalent to B. + =head1 RETURN VALUE -ERR_remove_state() returns no value. +ERR_remove_thread_state and ERR_remove_state() return no value. =head1 SEE ALSO @@ -29,6 +38,8 @@ L =head1 HISTORY -ERR_remove_state() is available in all versions of SSLeay and OpenSSL. +ERR_remove_state() is available in all versions of SSLeay and OpenSSL. It +was deprecated in OpenSSL 1.0.0 when ERR_remove_thread_state was introduced +and thread IDs were introduced to identify threads instead of 'unsigned long'. =cut diff --git a/deps/openssl/openssl/doc/crypto/EVP_BytesToKey.pod b/deps/openssl/openssl/doc/crypto/EVP_BytesToKey.pod index 0ea7d55c0f1f1e..5d6059528e6d43 100644 --- a/deps/openssl/openssl/doc/crypto/EVP_BytesToKey.pod +++ b/deps/openssl/openssl/doc/crypto/EVP_BytesToKey.pod @@ -36,8 +36,8 @@ If the total key and IV length is less than the digest length and B is used then the derivation algorithm is compatible with PKCS#5 v1.5 otherwise a non standard extension is used to derive the extra data. -Newer applications should use more standard algorithms such as PKCS#5 -v2.0 for key derivation. +Newer applications should use more standard algorithms such as PBKDF2 as +defined in PKCS#5v2.1 for key derivation. =head1 KEY DERIVATION ALGORITHM @@ -55,7 +55,10 @@ the IV. =head1 RETURN VALUES -EVP_BytesToKey() returns the size of the derived key in bytes. +If B is NULL, then EVP_BytesToKey() returns the number of bytes +needed to store the derived key. +Otherwise, EVP_BytesToKey() returns the size of the derived key in bytes, +or 0 on error. =head1 SEE ALSO diff --git a/deps/openssl/openssl/doc/crypto/EVP_DigestInit.pod b/deps/openssl/openssl/doc/crypto/EVP_DigestInit.pod index ac526bb6dbcb71..0895e8c392fa37 100644 --- a/deps/openssl/openssl/doc/crypto/EVP_DigestInit.pod +++ b/deps/openssl/openssl/doc/crypto/EVP_DigestInit.pod @@ -4,10 +4,10 @@ EVP_MD_CTX_init, EVP_MD_CTX_create, EVP_DigestInit_ex, EVP_DigestUpdate, EVP_DigestFinal_ex, EVP_MD_CTX_cleanup, EVP_MD_CTX_destroy, EVP_MAX_MD_SIZE, -EVP_MD_CTX_copy_ex, EVP_MD_CTX_copy, EVP_MD_type, EVP_MD_pkey_type, EVP_MD_size, -EVP_MD_block_size, EVP_MD_CTX_md, EVP_MD_CTX_size, EVP_MD_CTX_block_size, EVP_MD_CTX_type, -EVP_md_null, EVP_md2, EVP_md5, EVP_sha, EVP_sha1, EVP_sha224, EVP_sha256, -EVP_sha384, EVP_sha512, EVP_dss, EVP_dss1, EVP_mdc2, +EVP_MD_CTX_copy_ex, EVP_DigestInit, EVP_DigestFinal, EVP_MD_CTX_copy, EVP_MD_type, +EVP_MD_pkey_type, EVP_MD_size, EVP_MD_block_size, EVP_MD_CTX_md, EVP_MD_CTX_size, +EVP_MD_CTX_block_size, EVP_MD_CTX_type, EVP_md_null, EVP_md2, EVP_md5, EVP_sha, EVP_sha1, +EVP_sha224, EVP_sha256, EVP_sha384, EVP_sha512, EVP_dss, EVP_dss1, EVP_mdc2, EVP_ripemd160, EVP_get_digestbyname, EVP_get_digestbynid, EVP_get_digestbyobj - EVP digest routines @@ -270,7 +270,7 @@ and EVP_DigestFinal_ex() were added in OpenSSL 0.9.7. EVP_md_null(), EVP_md2(), EVP_md5(), EVP_sha(), EVP_sha1(), EVP_dss(), EVP_dss1(), EVP_mdc2() and EVP_ripemd160() were -changed to return truely const EVP_MD * in OpenSSL 0.9.7. +changed to return truly const EVP_MD * in OpenSSL 0.9.7. The link between digests and signing algorithms was fixed in OpenSSL 1.0 and later, so now EVP_sha1() can be used with RSA and DSA; there is no need to diff --git a/deps/openssl/openssl/doc/crypto/EVP_DigestVerifyInit.pod b/deps/openssl/openssl/doc/crypto/EVP_DigestVerifyInit.pod index cfeccd96effcfb..e0217e40cba8d2 100644 --- a/deps/openssl/openssl/doc/crypto/EVP_DigestVerifyInit.pod +++ b/deps/openssl/openssl/doc/crypto/EVP_DigestVerifyInit.pod @@ -11,7 +11,7 @@ EVP_DigestVerifyInit, EVP_DigestVerifyUpdate, EVP_DigestVerifyFinal - EVP signat int EVP_DigestVerifyInit(EVP_MD_CTX *ctx, EVP_PKEY_CTX **pctx, const EVP_MD *type, ENGINE *e, EVP_PKEY *pkey); int EVP_DigestVerifyUpdate(EVP_MD_CTX *ctx, const void *d, unsigned int cnt); - int EVP_DigestVerifyFinal(EVP_MD_CTX *ctx, unsigned char *sig, size_t siglen); + int EVP_DigestVerifyFinal(EVP_MD_CTX *ctx, const unsigned char *sig, size_t siglen); =head1 DESCRIPTION diff --git a/deps/openssl/openssl/doc/crypto/EVP_EncryptInit.pod b/deps/openssl/openssl/doc/crypto/EVP_EncryptInit.pod index ed027b387aea3c..fb6036f959ba0c 100644 --- a/deps/openssl/openssl/doc/crypto/EVP_EncryptInit.pod +++ b/deps/openssl/openssl/doc/crypto/EVP_EncryptInit.pod @@ -16,7 +16,17 @@ EVP_CIPHER_CTX_nid, EVP_CIPHER_CTX_block_size, EVP_CIPHER_CTX_key_length, EVP_CIPHER_CTX_iv_length, EVP_CIPHER_CTX_get_app_data, EVP_CIPHER_CTX_set_app_data, EVP_CIPHER_CTX_type, EVP_CIPHER_CTX_flags, EVP_CIPHER_CTX_mode, EVP_CIPHER_param_to_asn1, EVP_CIPHER_asn1_to_param, -EVP_CIPHER_CTX_set_padding - EVP cipher routines +EVP_CIPHER_CTX_set_padding, EVP_enc_null, EVP_des_cbc, EVP_des_ecb, +EVP_des_cfb, EVP_des_ofb, EVP_des_ede_cbc, EVP_des_ede, EVP_des_ede_ofb, +EVP_des_ede_cfb, EVP_des_ede3_cbc, EVP_des_ede3, EVP_des_ede3_ofb, +EVP_des_ede3_cfb, EVP_desx_cbc, EVP_rc4, EVP_rc4_40, EVP_idea_cbc, +EVP_idea_ecb, EVP_idea_cfb, EVP_idea_ofb, EVP_idea_cbc, EVP_rc2_cbc, +EVP_rc2_ecb, EVP_rc2_cfb, EVP_rc2_ofb, EVP_rc2_40_cbc, EVP_rc2_64_cbc, +EVP_bf_cbc, EVP_bf_ecb, EVP_bf_cfb, EVP_bf_ofb, EVP_cast5_cbc, +EVP_cast5_ecb, EVP_cast5_cfb, EVP_cast5_ofb, EVP_rc5_32_12_16_cbc, +EVP_rc5_32_12_16_ecb, EVP_rc5_32_12_16_cfb, EVP_rc5_32_12_16_ofb, +EVP_aes_128_gcm, EVP_aes_192_gcm, EVP_aes_256_gcm, EVP_aes_128_ccm, +EVP_aes_192_ccm, EVP_aes_256_ccm - EVP cipher routines =head1 SYNOPSIS @@ -231,8 +241,7 @@ or the parameters cannot be set (for example the RC2 effective key length is not supported. EVP_CIPHER_CTX_ctrl() allows various cipher specific parameters to be determined -and set. Currently only the RC2 effective key length and the number of rounds of -RC5 can be set. +and set. =head1 RETURN VALUES @@ -338,8 +347,88 @@ RC5 encryption algorithm in CBC, ECB, CFB and OFB modes respectively. This is a cipher with an additional "number of rounds" parameter. By default the key length is set to 128 bits and 12 rounds. +=item EVP_aes_128_gcm(void), EVP_aes_192_gcm(void), EVP_aes_256_gcm(void) + +AES Galois Counter Mode (GCM) for 128, 192 and 256 bit keys respectively. +These ciphers require additional control operations to function correctly: see +L section below for details. + +=item EVP_aes_128_ccm(void), EVP_aes_192_ccm(void), EVP_aes_256_ccm(void) + +AES Counter with CBC-MAC Mode (CCM) for 128, 192 and 256 bit keys respectively. +These ciphers require additional control operations to function correctly: see +CCM mode section below for details. + =back +=head1 GCM Mode + +For GCM mode ciphers the behaviour of the EVP interface is subtly altered and +several GCM specific ctrl operations are supported. + +To specify any additional authenticated data (AAD) a call to EVP_CipherUpdate(), +EVP_EncryptUpdate() or EVP_DecryptUpdate() should be made with the output +parameter B set to B. + +When decrypting the return value of EVP_DecryptFinal() or EVP_CipherFinal() +indicates if the operation was successful. If it does not indicate success +the authentication operation has failed and any output data B +be used as it is corrupted. + +The following ctrls are supported in GCM mode: + + EVP_CIPHER_CTX_ctrl(ctx, EVP_CTRL_GCM_SET_IVLEN, ivlen, NULL); + +Sets the GCM IV length: this call can only be made before specifying an IV. If +not called a default IV length is used (96 bits for AES). + + EVP_CIPHER_CTX_ctrl(ctx, EVP_CTRL_GCM_GET_TAG, taglen, tag); + +Writes B bytes of the tag value to the buffer indicated by B. +This call can only be made when encrypting data and B all data has been +processed (e.g. after an EVP_EncryptFinal() call). + + EVP_CIPHER_CTX_ctrl(ctx, EVP_CTRL_GCM_SET_TAG, taglen, tag); + +Sets the expected tag to B bytes from B. This call is only legal +when decrypting data and must be made B any data is processed (e.g. +before any EVP_DecryptUpdate() call). + +See L below for an example of the use of GCM mode. + +=head1 CCM Mode + +The behaviour of CCM mode ciphers is similar to CCM mode but with a few +additional requirements and different ctrl values. + +Like GCM mode any additional authenticated data (AAD) is passed by calling +EVP_CipherUpdate(), EVP_EncryptUpdate() or EVP_DecryptUpdate() with the output +parameter B set to B. Additionally the total plaintext or ciphertext +length B be passed to EVP_CipherUpdate(), EVP_EncryptUpdate() or +EVP_DecryptUpdate() with the output and input parameters (B and B) +set to B and the length passed in the B parameter. + +The following ctrls are supported in CCM mode: + + EVP_CIPHER_CTX_ctrl(ctx, EVP_CTRL_CCM_SET_TAG, taglen, tag); + +This call is made to set the expected B tag value when decrypting or +the length of the tag (with the B parameter set to NULL) when encrypting. +The tag length is often referred to as B. If not set a default value is +used (12 for AES). + + EVP_CIPHER_CTX_ctrl(ctx, EVP_CTRL_CCM_SET_L, ivlen, NULL); + +Sets the CCM B value. If not set a default is used (8 for AES). + + EVP_CIPHER_CTX_ctrl(ctx, EVP_CTRL_CCM_SET_IVLEN, ivlen, NULL); + +Sets the CCM nonce (IV) length: this call can only be made before specifying +an nonce value. The nonce length is given by B<15 - L> so it is 7 by default +for AES. + + + =head1 NOTES Where possible the B interface to symmetric ciphers should be used in diff --git a/deps/openssl/openssl/doc/crypto/EVP_PKEY_CTX_ctrl.pod b/deps/openssl/openssl/doc/crypto/EVP_PKEY_CTX_ctrl.pod index 13b91f1e6e578f..44b5fdb7f2ec91 100644 --- a/deps/openssl/openssl/doc/crypto/EVP_PKEY_CTX_ctrl.pod +++ b/deps/openssl/openssl/doc/crypto/EVP_PKEY_CTX_ctrl.pod @@ -2,7 +2,13 @@ =head1 NAME -EVP_PKEY_ctrl, EVP_PKEY_ctrl_str - algorithm specific control operations +EVP_PKEY_CTX_ctrl, EVP_PKEY_CTX_ctrl_str, EVP_PKEY_get_default_digest_nid, +EVP_PKEY_CTX_set_signature_md, EVP_PKEY_CTX_set_rsa_padding, +EVP_PKEY_CTX_set_rsa_pss_saltlen, EVP_PKEY_CTX_set_rsa_rsa_keygen_bits, +EVP_PKEY_CTX_set_rsa_keygen_pubexp, EVP_PKEY_CTX_set_dsa_paramgen_bits, +EVP_PKEY_CTX_set_dh_paramgen_prime_len, +EVP_PKEY_CTX_set_dh_paramgen_generator, +EVP_PKEY_CTX_set_ec_paramgen_curve_nid - algorithm specific control operations =head1 SYNOPSIS @@ -45,7 +51,7 @@ B and B. Applications will not normally call EVP_PKEY_CTX_ctrl() directly but will instead call one of the algorithm specific macros below. -The function EVP_PKEY_ctrl_str() allows an application to send an algorithm +The function EVP_PKEY_CTX_ctrl_str() allows an application to send an algorithm specific control operation to a context B in string form. This is intended to be used for options specified on the command line or in text files. The commands supported are documented in the openssl utility diff --git a/deps/openssl/openssl/doc/crypto/EVP_PKEY_cmp.pod b/deps/openssl/openssl/doc/crypto/EVP_PKEY_cmp.pod index 4f8185e36cdf95..0ff027c0d5f93b 100644 --- a/deps/openssl/openssl/doc/crypto/EVP_PKEY_cmp.pod +++ b/deps/openssl/openssl/doc/crypto/EVP_PKEY_cmp.pod @@ -23,10 +23,10 @@ doesn't use parameters. The function EVP_PKEY_copy_parameters() copies the parameters from key B to key B. -The funcion EVP_PKEY_cmp_parameters() compares the parameters of keys +The function EVP_PKEY_cmp_parameters() compares the parameters of keys B and B. -The funcion EVP_PKEY_cmp() compares the public key components and paramters +The function EVP_PKEY_cmp() compares the public key components and paramters (if present) of keys B and B. =head1 NOTES diff --git a/deps/openssl/openssl/doc/crypto/OPENSSL_VERSION_NUMBER.pod b/deps/openssl/openssl/doc/crypto/OPENSSL_VERSION_NUMBER.pod index c39ac35e78ae60..f7ca7cb7906671 100644 --- a/deps/openssl/openssl/doc/crypto/OPENSSL_VERSION_NUMBER.pod +++ b/deps/openssl/openssl/doc/crypto/OPENSSL_VERSION_NUMBER.pod @@ -17,7 +17,7 @@ OPENSSL_VERSION_NUMBER, SSLeay, SSLeay_version - get OpenSSL version number OPENSSL_VERSION_NUMBER is a numeric release version identifier: - MMNNFFPPS: major minor fix patch status + MNNFFPPS: major minor fix patch status The status nibble has one of the values 0 for development, 1 to e for betas 1 to 14, and f for release. diff --git a/deps/openssl/openssl/doc/crypto/OPENSSL_config.pod b/deps/openssl/openssl/doc/crypto/OPENSSL_config.pod index 5096faca04fc5a..2d25b266951207 100644 --- a/deps/openssl/openssl/doc/crypto/OPENSSL_config.pod +++ b/deps/openssl/openssl/doc/crypto/OPENSSL_config.pod @@ -48,17 +48,6 @@ configuration file. Applications should free up configuration at application closedown by calling CONF_modules_free(). -=head1 RESTRICTIONS - -The OPENSSL_config() function is designed to be a very simple "call it and -forget it" function. As a result its behaviour is somewhat limited. It ignores -all errors silently and it can only load from the standard configuration file -location for example. - -It is however B better than nothing. Applications which need finer -control over their configuration functionality should use the configuration -functions such as CONF_load_modules() directly. - =head1 RETURN VALUES Neither OPENSSL_config() nor OPENSSL_no_config() return a value. diff --git a/deps/openssl/openssl/doc/crypto/OPENSSL_ia32cap.pod b/deps/openssl/openssl/doc/crypto/OPENSSL_ia32cap.pod index 2e659d34a5c43b..90156d21901b69 100644 --- a/deps/openssl/openssl/doc/crypto/OPENSSL_ia32cap.pod +++ b/deps/openssl/openssl/doc/crypto/OPENSSL_ia32cap.pod @@ -2,42 +2,95 @@ =head1 NAME -OPENSSL_ia32cap - finding the IA-32 processor capabilities +OPENSSL_ia32cap, OPENSSL_ia32cap_loc - the IA-32 processor capabilities vector =head1 SYNOPSIS - unsigned long *OPENSSL_ia32cap_loc(void); - #define OPENSSL_ia32cap (*(OPENSSL_ia32cap_loc())) + unsigned int *OPENSSL_ia32cap_loc(void); + #define OPENSSL_ia32cap ((OPENSSL_ia32cap_loc())[0]) =head1 DESCRIPTION Value returned by OPENSSL_ia32cap_loc() is address of a variable -containing IA-32 processor capabilities bit vector as it appears in EDX -register after executing CPUID instruction with EAX=1 input value (see -Intel Application Note #241618). Naturally it's meaningful on IA-32[E] -platforms only. The variable is normally set up automatically upon -toolkit initialization, but can be manipulated afterwards to modify -crypto library behaviour. For the moment of this writing six bits are -significant, namely: - -1. bit #28 denoting Hyperthreading, which is used to distiguish - cores with shared cache; -2. bit #26 denoting SSE2 support; -3. bit #25 denoting SSE support; -4. bit #23 denoting MMX support; -5. bit #20, reserved by Intel, is used to choose between RC4 code - pathes; -6. bit #4 denoting presence of Time-Stamp Counter. +containing IA-32 processor capabilities bit vector as it appears in +EDX:ECX register pair after executing CPUID instruction with EAX=1 +input value (see Intel Application Note #241618). Naturally it's +meaningful on x86 and x86_64 platforms only. The variable is normally +set up automatically upon toolkit initialization, but can be +manipulated afterwards to modify crypto library behaviour. For the +moment of this writing following bits are significant: + +=over + +=item bit #4 denoting presence of Time-Stamp Counter. + +=item bit #19 denoting availability of CLFLUSH instruction; + +=item bit #20, reserved by Intel, is used to choose among RC4 code paths; + +=item bit #23 denoting MMX support; + +=item bit #24, FXSR bit, denoting availability of XMM registers; + +=item bit #25 denoting SSE support; + +=item bit #26 denoting SSE2 support; + +=item bit #28 denoting Hyperthreading, which is used to distinguish +cores with shared cache; + +=item bit #30, reserved by Intel, denotes specifically Intel CPUs; + +=item bit #33 denoting availability of PCLMULQDQ instruction; + +=item bit #41 denoting SSSE3, Supplemental SSE3, support; + +=item bit #43 denoting AMD XOP support (forced to zero on non-AMD CPUs); + +=item bit #57 denoting AES-NI instruction set extension; + +=item bit #59, OSXSAVE bit, denoting availability of YMM registers; + +=item bit #60 denoting AVX extension; + +=item bit #62 denoting availability of RDRAND instruction; + +=back For example, clearing bit #26 at run-time disables high-performance -SSE2 code present in the crypto library. You might have to do this if -target OpenSSL application is executed on SSE2 capable CPU, but under -control of OS which does not support SSE2 extentions. Even though you -can manipulate the value programmatically, you most likely will find it -more appropriate to set up an environment variable with the same name -prior starting target application, e.g. on Intel P4 processor 'env -OPENSSL_ia32cap=0x12900010 apps/openssl', to achieve same effect -without modifying the application source code. Alternatively you can -reconfigure the toolkit with no-sse2 option and recompile. - -=cut +SSE2 code present in the crypto library, while clearing bit #24 +disables SSE2 code operating on 128-bit XMM register bank. You might +have to do the latter if target OpenSSL application is executed on SSE2 +capable CPU, but under control of OS that does not enable XMM +registers. Even though you can manipulate the value programmatically, +you most likely will find it more appropriate to set up an environment +variable with the same name prior starting target application, e.g. on +Intel P4 processor 'env OPENSSL_ia32cap=0x16980010 apps/openssl', or +better yet 'env OPENSSL_ia32cap=~0x1000000 apps/openssl' to achieve same +effect without modifying the application source code. Alternatively you +can reconfigure the toolkit with no-sse2 option and recompile. + +Less intuitive is clearing bit #28. The truth is that it's not copied +from CPUID output verbatim, but is adjusted to reflect whether or not +the data cache is actually shared between logical cores. This in turn +affects the decision on whether or not expensive countermeasures +against cache-timing attacks are applied, most notably in AES assembler +module. + +The vector is further extended with EBX value returned by CPUID with +EAX=7 and ECX=0 as input. Following bits are significant: + +=over + +=item bit #64+3 denoting availability of BMI1 instructions, e.g. ANDN; + +=item bit #64+5 denoting availability of AVX2 instructions; + +=item bit #64+8 denoting availability of BMI2 instructions, e.g. MUXL +and RORX; + +=item bit #64+18 denoting availability of RDSEED instruction; + +=item bit #64+19 denoting availability of ADCX and ADOX instructions; + +=back diff --git a/deps/openssl/openssl/doc/crypto/OPENSSL_instrument_bus.pod b/deps/openssl/openssl/doc/crypto/OPENSSL_instrument_bus.pod new file mode 100644 index 00000000000000..4ed83e4950b710 --- /dev/null +++ b/deps/openssl/openssl/doc/crypto/OPENSSL_instrument_bus.pod @@ -0,0 +1,42 @@ +=pod + +=head1 NAME + +OPENSSL_instrument_bus, OPENSSL_instrument_bus2 - instrument references to memory bus + +=head1 SYNOPSIS + + #ifdef OPENSSL_CPUID_OBJ + size_t OPENSSL_instrument_bus (int *vector,size_t num); + size_t OPENSSL_instrument_bus2(int *vector,size_t num,size_t max); + #endif + +=head1 DESCRIPTION + +It was empirically found that timings of references to primary memory +are subject to irregular, apparently non-deterministic variations. The +subroutines in question instrument these references for purposes of +gathering entropy for random number generator. In order to make it +bus-bound a 'flush cache line' instruction is used between probes. In +addition probes are added to B elements in atomic or +interlocked manner, which should contribute additional noise on +multi-processor systems. This also means that B should be +zeroed upon invocation (if you want to retrieve actual probe values). + +OPENSSL_instrument_bus performs B probes and records the number of +oscillator cycles every probe took. + +OPENSSL_instrument_bus2 on the other hand B consecutive +probes with the same value, i.e. in a way it records duration of +periods when probe values appeared deterministic. The subroutine +performs at most B probes in attempt to fill the B, +with B value of 0 meaning "as many as it takes." + +=head1 RETURN VALUE + +Return value of 0 indicates that CPU is not capable of performing the +benchmark, either because oscillator counter or 'flush cache line' is +not available on current platform. For reference, on x86 'flush cache +line' was introduced with the SSE2 extensions. + +Otherwise number of recorded values is returned. diff --git a/deps/openssl/openssl/doc/crypto/OPENSSL_load_builtin_modules.pod b/deps/openssl/openssl/doc/crypto/OPENSSL_load_builtin_modules.pod index f14dfaf005dd8e..de62912ff2530a 100644 --- a/deps/openssl/openssl/doc/crypto/OPENSSL_load_builtin_modules.pod +++ b/deps/openssl/openssl/doc/crypto/OPENSSL_load_builtin_modules.pod @@ -2,7 +2,7 @@ =head1 NAME -OPENSSL_load_builtin_modules - add standard configuration modules +OPENSSL_load_builtin_modules, ASN1_add_oid_module, ENGINE_add_conf_module - add standard configuration modules =head1 SYNOPSIS diff --git a/deps/openssl/openssl/doc/crypto/OpenSSL_add_all_algorithms.pod b/deps/openssl/openssl/doc/crypto/OpenSSL_add_all_algorithms.pod index e63411b5bba0c1..bcb79e5f6b4510 100644 --- a/deps/openssl/openssl/doc/crypto/OpenSSL_add_all_algorithms.pod +++ b/deps/openssl/openssl/doc/crypto/OpenSSL_add_all_algorithms.pod @@ -2,7 +2,7 @@ =head1 NAME -OpenSSL_add_all_algorithms, OpenSSL_add_all_ciphers, OpenSSL_add_all_digests - +OpenSSL_add_all_algorithms, OpenSSL_add_all_ciphers, OpenSSL_add_all_digests, EVP_cleanup - add algorithms to internal table =head1 SYNOPSIS diff --git a/deps/openssl/openssl/doc/crypto/PKCS7_verify.pod b/deps/openssl/openssl/doc/crypto/PKCS7_verify.pod index 7c10a4cc3c04f1..f083306b0dc34b 100644 --- a/deps/openssl/openssl/doc/crypto/PKCS7_verify.pod +++ b/deps/openssl/openssl/doc/crypto/PKCS7_verify.pod @@ -2,7 +2,7 @@ =head1 NAME -PKCS7_verify - verify a PKCS#7 signedData structure +PKCS7_verify, PKCS7_get0_signers - verify a PKCS#7 signedData structure =head1 SYNOPSIS @@ -91,8 +91,8 @@ timestamp). =head1 RETURN VALUES -PKCS7_verify() returns 1 for a successful verification and zero or a negative -value if an error occurs. +PKCS7_verify() returns one for a successful verification and zero +if an error occurs. PKCS7_get0_signers() returns all signers or B if an error occurred. diff --git a/deps/openssl/openssl/doc/crypto/RAND_egd.pod b/deps/openssl/openssl/doc/crypto/RAND_egd.pod index 8b8c61d161b1a8..80fa734d1865b6 100644 --- a/deps/openssl/openssl/doc/crypto/RAND_egd.pod +++ b/deps/openssl/openssl/doc/crypto/RAND_egd.pod @@ -2,7 +2,7 @@ =head1 NAME -RAND_egd - query entropy gathering daemon +RAND_egd, RAND_egd_bytes, RAND_query_egd_bytes - query entropy gathering daemon =head1 SYNOPSIS diff --git a/deps/openssl/openssl/doc/crypto/RSA_generate_key.pod b/deps/openssl/openssl/doc/crypto/RSA_generate_key.pod index 52dbb14a537deb..881391a045047a 100644 --- a/deps/openssl/openssl/doc/crypto/RSA_generate_key.pod +++ b/deps/openssl/openssl/doc/crypto/RSA_generate_key.pod @@ -2,28 +2,33 @@ =head1 NAME -RSA_generate_key - generate RSA key pair +RSA_generate_key_ex, RSA_generate_key - generate RSA key pair =head1 SYNOPSIS #include + int RSA_generate_key_ex(RSA *rsa, int bits, BIGNUM *e, BN_GENCB *cb); + +Deprecated: + RSA *RSA_generate_key(int num, unsigned long e, void (*callback)(int,int,void *), void *cb_arg); =head1 DESCRIPTION -RSA_generate_key() generates a key pair and returns it in a newly -allocated B structure. The pseudo-random number generator must -be seeded prior to calling RSA_generate_key(). +RSA_generate_key_ex() generates a key pair and stores it in the B +structure provided in B. The pseudo-random number generator must +be seeded prior to calling RSA_generate_key_ex(). -The modulus size will be B bits, and the public exponent will be +The modulus size will be of length B, and the public exponent will be B. Key sizes with B E 1024 should be considered insecure. The exponent is an odd number, typically 3, 17 or 65537. A callback function may be used to provide feedback about the -progress of the key generation. If B is not B, it -will be called as follows: +progress of the key generation. If B is not B, it +will be called as follows using the BN_GENCB_call() function +described on the L page. =over 4 @@ -35,32 +40,38 @@ described in L. =item * When the n-th randomly generated prime is rejected as not -suitable for the key, B is called. +suitable for the key, B is called. =item * When a random p has been found with p-1 relatively prime to B, -it is called as B. +it is called as B. =back -The process is then repeated for prime q with B. +The process is then repeated for prime q with B. + +RSA_generate_key is deprecated (new applications should use +RSA_generate_key_ex instead). RSA_generate_key works in the same was as +RSA_generate_key_ex except it uses "old style" call backs. See +L for further details. =head1 RETURN VALUE -If key generation fails, RSA_generate_key() returns B; the -error codes can be obtained by L. +If key generation fails, RSA_generate_key() returns B. + +The error codes can be obtained by L. =head1 BUGS -B is used with two different meanings. +B is used with two different meanings. RSA_generate_key() goes into an infinite loop for illegal input values. =head1 SEE ALSO L, L, L, -L +L, L =head1 HISTORY diff --git a/deps/openssl/openssl/doc/crypto/SSLeay_version.pod b/deps/openssl/openssl/doc/crypto/SSLeay_version.pod new file mode 100644 index 00000000000000..1500c2af912664 --- /dev/null +++ b/deps/openssl/openssl/doc/crypto/SSLeay_version.pod @@ -0,0 +1,74 @@ +=pod + +=head1 NAME + +SSLeay_version - retrieve version/build information about OpenSSL library + +=head1 SYNOPSIS + + #include + + const char *SSLeay_version(int type); + +=head1 DESCRIPTION + +SSLeay_version() returns a pointer to a constant string describing the +version of the OpenSSL library or giving information about the library +build. + +The following B values are supported: + +=over 4 + +=item SSLEAY_VERSION + +The version of the OpenSSL library including the release date. + +=item SSLEAY_CFLAGS + +The compiler flags set for the compilation process in the form +"compiler: ..." if available or "compiler: information not available" +otherwise. + +=item SSLEAY_BUILT_ON + +The date of the build process in the form "built on: ..." if available +or "built on: date not available" otherwise. + +=item SSLEAY_PLATFORM + +The "Configure" target of the library build in the form "platform: ..." +if available or "platform: information not available" otherwise. + +=item SSLEAY_DIR + +The "OPENSSLDIR" setting of the library build in the form "OPENSSLDIR: "..."" +if available or "OPENSSLDIR: N/A" otherwise. + +=back + +=head1 RETURN VALUES + +The following return values can occur: + +=over 4 + +=item "not available" + +An invalid value for B was given. + +=item Pointer to constant string + +Textual description. + +=back + +=head1 SEE ALSO + +L + +=head1 HISTORY + +B was added in OpenSSL 0.9.7. + +=cut diff --git a/deps/openssl/openssl/doc/crypto/X509_NAME_add_entry_by_txt.pod b/deps/openssl/openssl/doc/crypto/X509_NAME_add_entry_by_txt.pod index 043766cc461bee..3bdc07fcfbea66 100644 --- a/deps/openssl/openssl/doc/crypto/X509_NAME_add_entry_by_txt.pod +++ b/deps/openssl/openssl/doc/crypto/X509_NAME_add_entry_by_txt.pod @@ -44,7 +44,7 @@ B. The deleted entry is returned and must be freed up. =head1 NOTES The use of string types such as B or B -is strongly recommened for the B parameter. This allows the +is strongly recommended for the B parameter. This allows the internal code to correctly determine the type of the field and to apply length checks according to the relevant standards. This is done using ASN1_STRING_set_by_NID(). diff --git a/deps/openssl/openssl/doc/crypto/X509_STORE_CTX_get_error.pod b/deps/openssl/openssl/doc/crypto/X509_STORE_CTX_get_error.pod index 60e8332ae9ddf0..be00ff1fecf415 100644 --- a/deps/openssl/openssl/doc/crypto/X509_STORE_CTX_get_error.pod +++ b/deps/openssl/openssl/doc/crypto/X509_STORE_CTX_get_error.pod @@ -32,7 +32,7 @@ checks. X509_STORE_CTX_get_error_depth() returns the B of the error. This is a non-negative integer representing where in the certificate chain the error -occurred. If it is zero it occured in the end entity certificate, one if +occurred. If it is zero it occurred in the end entity certificate, one if it is the certificate which signed the end entity certificate and so on. X509_STORE_CTX_get_current_cert() returns the certificate in B which @@ -246,11 +246,11 @@ Some feature of a certificate extension is not supported. Unused. =item B -A name constraint violation occured in the permitted subtrees. +A name constraint violation occurred in the permitted subtrees. =item B -A name constraint violation occured in the excluded subtrees. +A name constraint violation occurred in the excluded subtrees. =item B @@ -270,7 +270,7 @@ a garbage extension or some new feature not currently supported. =item B -An error occured when attempting to verify the CRL path. This error can only +An error occurred when attempting to verify the CRL path. This error can only happen if extended CRL checking is enabled. =item B diff --git a/deps/openssl/openssl/doc/crypto/X509_VERIFY_PARAM_set_flags.pod b/deps/openssl/openssl/doc/crypto/X509_VERIFY_PARAM_set_flags.pod index 46cac2bea2beaa..347d48dfec0ab7 100644 --- a/deps/openssl/openssl/doc/crypto/X509_VERIFY_PARAM_set_flags.pod +++ b/deps/openssl/openssl/doc/crypto/X509_VERIFY_PARAM_set_flags.pod @@ -2,7 +2,7 @@ =head1 NAME -X509_VERIFY_PARAM_set_flags, X509_VERIFY_PARAM_clear_flags, X509_VERIFY_PARAM_get_flags, X509_VERIFY_PARAM_set_purpose, X509_VERIFY_PARAM_set_trust, X509_VERIFY_PARAM_set_depth, X509_VERIFY_PARAM_get_depth, X509_VERIFY_PARAM_set_time, X509_VERIFY_PARAM_add0_policy, X509_VERIFY_PARAM_set1_policies - X509 verification parameters +X509_VERIFY_PARAM_set_flags, X509_VERIFY_PARAM_clear_flags, X509_VERIFY_PARAM_get_flags, X509_VERIFY_PARAM_set_purpose, X509_VERIFY_PARAM_set_trust, X509_VERIFY_PARAM_set_depth, X509_VERIFY_PARAM_get_depth, X509_VERIFY_PARAM_set_time, X509_VERIFY_PARAM_add0_policy, X509_VERIFY_PARAM_set1_policies, X509_VERIFY_PARAM_set1_host, X509_VERIFY_PARAM_add1_host, X509_VERIFY_PARAM_set_hostflags, X509_VERIFY_PARAM_get0_peername, X509_VERIFY_PARAM_set1_email, X509_VERIFY_PARAM_set1_ip, X509_VERIFY_PARAM_set1_ip_asc - X509 verification parameters =head1 SYNOPSIS @@ -26,6 +26,19 @@ X509_VERIFY_PARAM_set_flags, X509_VERIFY_PARAM_clear_flags, X509_VERIFY_PARAM_ge void X509_VERIFY_PARAM_set_depth(X509_VERIFY_PARAM *param, int depth); int X509_VERIFY_PARAM_get_depth(const X509_VERIFY_PARAM *param); + int X509_VERIFY_PARAM_set1_host(X509_VERIFY_PARAM *param, + const char *name, size_t namelen); + int X509_VERIFY_PARAM_add1_host(X509_VERIFY_PARAM *param, + const char *name, size_t namelen); + void X509_VERIFY_PARAM_set_hostflags(X509_VERIFY_PARAM *param, + unsigned int flags); + char *X509_VERIFY_PARAM_get0_peername(X509_VERIFY_PARAM *param); + int X509_VERIFY_PARAM_set1_email(X509_VERIFY_PARAM *param, + const char *email, size_t emaillen); + int X509_VERIFY_PARAM_set1_ip(X509_VERIFY_PARAM *param, + const unsigned char *ip, size_t iplen); + int X509_VERIFY_PARAM_set1_ip_asc(X509_VERIFY_PARAM *param, const char *ipasc); + =head1 DESCRIPTION These functions manipulate the B structure associated with @@ -61,12 +74,63 @@ X509_VERIFY_PARAM_set_depth() sets the maximum verification depth to B. That is the maximum number of untrusted CA certificates that can appear in a chain. +X509_VERIFY_PARAM_set1_host() sets the expected DNS hostname to +B clearing any previously specified host name or names. If +B is NULL, or empty the list of hostnames is cleared, and +name checks are not performed on the peer certificate. If B +is NUL-terminated, B may be zero, otherwise B +must be set to the length of B. When a hostname is specified, +certificate verification automatically invokes L +with flags equal to the B argument given to +B (default zero). Applications +are strongly advised to use this interface in preference to explicitly +calling L, hostname checks are out of scope +with the DANE-EE(3) certificate usage, and the internal check will +be suppressed as appropriate when DANE support is added to OpenSSL. + +X509_VERIFY_PARAM_add1_host() adds B as an additional reference +identifer that can match the peer's certificate. Any previous names +set via X509_VERIFY_PARAM_set1_host() or X509_VERIFY_PARAM_add1_host() +are retained, no change is made if B is NULL or empty. When +multiple names are configured, the peer is considered verified when +any name matches. + +X509_VERIFY_PARAM_get0_peername() returns the DNS hostname or subject +CommonName from the peer certificate that matched one of the reference +identifiers. When wildcard matching is not disabled, or when a +reference identifier specifies a parent domain (starts with ".") +rather than a hostname, the peer name may be a wildcard name or a +sub-domain of the reference identifier respectively. The return +string is allocated by the library and is no longer valid once the +associated B argument is freed. Applications must not free +the return value. + +X509_VERIFY_PARAM_set1_email() sets the expected RFC822 email address to +B. If B is NUL-terminated, B may be zero, otherwise +B must be set to the length of B. When an email address +is specified, certificate verification automatically invokes +L. + +X509_VERIFY_PARAM_set1_ip() sets the expected IP address to B. +The B argument is in binary format, in network byte-order and +B must be set to 4 for IPv4 and 16 for IPv6. When an IP +address is specified, certificate verification automatically invokes +L. + +X509_VERIFY_PARAM_set1_ip_asc() sets the expected IP address to +B. The B argument is a NUL-terminal ASCII string: +dotted decimal quad for IPv4 and colon-separated hexadecimal for +IPv6. The condensed "::" notation is supported for IPv6 addresses. + =head1 RETURN VALUES -X509_VERIFY_PARAM_set_flags(), X509_VERIFY_PARAM_clear_flags(), +X509_VERIFY_PARAM_set_flags(), X509_VERIFY_PARAM_clear_flags(), X509_VERIFY_PARAM_set_purpose(), X509_VERIFY_PARAM_set_trust(), -X509_VERIFY_PARAM_add0_policy() and X509_VERIFY_PARAM_set1_policies() return 1 -for success and 0 for failure. +X509_VERIFY_PARAM_add0_policy() X509_VERIFY_PARAM_set1_policies(), +X509_VERIFY_PARAM_set1_host(), X509_VERIFY_PARAM_set_hostflags(), +X509_VERIFY_PARAM_set1_email(), X509_VERIFY_PARAM_set1_ip() and +X509_VERIFY_PARAM_set1_ip_asc() return 1 for success and 0 for +failure. X509_VERIFY_PARAM_get_flags() returns the current verification flags. @@ -162,7 +226,10 @@ connections associated with an B structure B: =head1 SEE ALSO -L +L, +L, +L, +L =head1 HISTORY diff --git a/deps/openssl/openssl/doc/crypto/X509_check_host.pod b/deps/openssl/openssl/doc/crypto/X509_check_host.pod new file mode 100644 index 00000000000000..0def17aac1c54a --- /dev/null +++ b/deps/openssl/openssl/doc/crypto/X509_check_host.pod @@ -0,0 +1,140 @@ +=pod + +=head1 NAME + +X509_check_host, X509_check_email, X509_check_ip, X509_check_ip_asc - X.509 certificate matching + +=head1 SYNOPSIS + + #include + + int X509_check_host(X509 *, const char *name, size_t namelen, + unsigned int flags, char **peername); + int X509_check_email(X509 *, const char *address, size_t addresslen, + unsigned int flags); + int X509_check_ip(X509 *, const unsigned char *address, size_t addresslen, + unsigned int flags); + int X509_check_ip_asc(X509 *, const char *address, unsigned int flags); + +=head1 DESCRIPTION + +The certificate matching functions are used to check whether a +certificate matches a given host name, email address, or IP address. +The validity of the certificate and its trust level has to be checked by +other means. + +X509_check_host() checks if the certificate Subject Alternative +Name (SAN) or Subject CommonName (CN) matches the specified host +name, which must be encoded in the preferred name syntax described +in section 3.5 of RFC 1034. By default, wildcards are supported +and they match only in the left-most label; but they may match +part of that label with an explicit prefix or suffix. For example, +by default, the host B "www.example.com" would match a +certificate with a SAN or CN value of "*.example.com", "w*.example.com" +or "*w.example.com". + +Per section 6.4.2 of RFC 6125, B values representing international +domain names must be given in A-label form. The B argument +must be the number of characters in the name string or zero in which +case the length is calculated with strlen(B). When B starts +with a dot (e.g ".example.com"), it will be matched by a certificate +valid for any sub-domain of B, (see also +B below). + +When the certificate is matched, and B is not NULL, a +pointer to a copy of the matching SAN or CN from the peer certificate +is stored at the address passed in B. The application +is responsible for freeing the peername via OPENSSL_free() when it +is no longer needed. + +X509_check_email() checks if the certificate matches the specified +email B

. Only the mailbox syntax of RFC 822 is supported, +comments are not allowed, and no attempt is made to normalize quoted +characters. The B argument must be the number of +characters in the address string or zero in which case the length +is calculated with strlen(B
). + +X509_check_ip() checks if the certificate matches a specified IPv4 or +IPv6 address. The B
array is in binary format, in network +byte order. The length is either 4 (IPv4) or 16 (IPv6). Only +explicitly marked addresses in the certificates are considered; IP +addresses stored in DNS names and Common Names are ignored. + +X509_check_ip_asc() is similar, except that the NUL-terminated +string B
is first converted to the internal representation. + +The B argument is usually 0. It can be the bitwise OR of the +flags: + +=over 4 + +=item B, + +=item B, + +=item B, + +=item B. + +=item B. + +=back + +The B flag causes the function +to consider the subject DN even if the certificate contains at least +one subject alternative name of the right type (DNS name or email +address as appropriate); the default is to ignore the subject DN +when at least one corresponding subject alternative names is present. + +If set, B disables wildcard +expansion; this only applies to B. + +If set, B suppresses support +for "*" as wildcard pattern in labels that have a prefix or suffix, +such as: "www*" or "*www"; this only aplies to B. + +If set, B allows a "*" that +constitutes the complete label of a DNS name (e.g. "*.example.com") +to match more than one label in B; this flag only applies +to B. + +If set, B restricts B +values which start with ".", that would otherwise match any sub-domain +in the peer certificate, to only match direct child sub-domains. +Thus, for instance, with this flag set a B of ".example.com" +would match a peer certificate with a DNS name of "www.example.com", +but would not match a peer certificate with a DNS name of +"www.sub.example.com"; this flag only applies to B. + +=head1 RETURN VALUES + +The functions return 1 for a successful match, 0 for a failed match +and -1 for an internal error: typically a memory allocation failure +or an ASN.1 decoding error. + +All functions can also return -2 if the input is malformed. For example, +X509_check_host() returns -2 if the provided B contains embedded +NULs. + +=head1 NOTES + +Applications are encouraged to use X509_VERIFY_PARAM_set1_host() +rather than explicitly calling L. Host name +checks are out of scope with the DANE-EE(3) certificate usage, +and the internal checks will be suppressed as appropriate when +DANE support is added to OpenSSL. + +=head1 SEE ALSO + +L, +L, +L, +L, +L, +L + +=head1 HISTORY + +These functions were added in OpenSSL 1.1.0. + +=cut diff --git a/deps/openssl/openssl/doc/crypto/crypto.pod b/deps/openssl/openssl/doc/crypto/crypto.pod index 7a527992bb5eb2..f18edfe3053b52 100644 --- a/deps/openssl/openssl/doc/crypto/crypto.pod +++ b/deps/openssl/openssl/doc/crypto/crypto.pod @@ -56,7 +56,7 @@ L, L =item INTERNAL FUNCTIONS -L, L, L, +L, L, L, L, L, L, L diff --git a/deps/openssl/openssl/doc/crypto/d2i_DSAPublicKey.pod b/deps/openssl/openssl/doc/crypto/d2i_DSAPublicKey.pod index 22c1b50f22888e..e999376492401b 100644 --- a/deps/openssl/openssl/doc/crypto/d2i_DSAPublicKey.pod +++ b/deps/openssl/openssl/doc/crypto/d2i_DSAPublicKey.pod @@ -3,7 +3,7 @@ =head1 NAME d2i_DSAPublicKey, i2d_DSAPublicKey, d2i_DSAPrivateKey, i2d_DSAPrivateKey, -d2i_DSA_PUBKEY, i2d_DSA_PUBKEY, d2i_DSA_SIG, i2d_DSA_SIG - DSA key encoding +d2i_DSA_PUBKEY, i2d_DSA_PUBKEY, d2i_DSAparams, i2d_DSAparams, d2i_DSA_SIG, i2d_DSA_SIG - DSA key encoding and parsing functions. =head1 SYNOPSIS diff --git a/deps/openssl/openssl/doc/crypto/d2i_ECPKParameters.pod b/deps/openssl/openssl/doc/crypto/d2i_ECPKParameters.pod new file mode 100644 index 00000000000000..704b4ab35286a8 --- /dev/null +++ b/deps/openssl/openssl/doc/crypto/d2i_ECPKParameters.pod @@ -0,0 +1,84 @@ +=pod + +=head1 NAME + +d2i_ECPKParameters, i2d_ECPKParameters, d2i_ECPKParameters_bio, i2d_ECPKParameters_bio, d2i_ECPKParameters_fp, i2d_ECPKParameters_fp, ECPKParameters_print, ECPKParameters_print_fp - Functions for decoding and encoding ASN1 representations of elliptic curve entities + +=head1 SYNOPSIS + + #include + + EC_GROUP *d2i_ECPKParameters(EC_GROUP **px, const unsigned char **in, long len); + int i2d_ECPKParameters(const EC_GROUP *x, unsigned char **out); + #define d2i_ECPKParameters_bio(bp,x) ASN1_d2i_bio_of(EC_GROUP,NULL,d2i_ECPKParameters,bp,x) + #define i2d_ECPKParameters_bio(bp,x) ASN1_i2d_bio_of_const(EC_GROUP,i2d_ECPKParameters,bp,x) + #define d2i_ECPKParameters_fp(fp,x) (EC_GROUP *)ASN1_d2i_fp(NULL, \ + (char *(*)())d2i_ECPKParameters,(fp),(unsigned char **)(x)) + #define i2d_ECPKParameters_fp(fp,x) ASN1_i2d_fp(i2d_ECPKParameters,(fp), \ + (unsigned char *)(x)) + int ECPKParameters_print(BIO *bp, const EC_GROUP *x, int off); + int ECPKParameters_print_fp(FILE *fp, const EC_GROUP *x, int off); + + +=head1 DESCRIPTION + +The ECPKParameters encode and decode routines encode and parse the public parameters for an +B structure, which represents a curve. + +d2i_ECPKParameters() attempts to decode B bytes at B<*in>. If +successful a pointer to the B structure is returned. If an error +occurred then B is returned. If B is not B then the +returned structure is written to B<*px>. If B<*px> is not B +then it is assumed that B<*px> contains a valid B +structure and an attempt is made to reuse it. If the call is +successful B<*in> is incremented to the byte following the +parsed data. + +i2d_ECPKParameters() encodes the structure pointed to by B into DER format. +If B is not B is writes the DER encoded data to the buffer +at B<*out>, and increments it to point after the data just written. +If the return value is negative an error occurred, otherwise it +returns the length of the encoded data. + +If B<*out> is B memory will be allocated for a buffer and the encoded +data written to it. In this case B<*out> is not incremented and it points to +the start of the data just written. + +d2i_ECPKParameters_bio() is similar to d2i_ECPKParameters() except it attempts +to parse data from BIO B. + +d2i_ECPKParameters_fp() is similar to d2i_ECPKParameters() except it attempts +to parse data from FILE pointer B. + +i2d_ECPKParameters_bio() is similar to i2d_ECPKParameters() except it writes +the encoding of the structure B to BIO B and it +returns 1 for success and 0 for failure. + +i2d_ECPKParameters_fp() is similar to i2d_ECPKParameters() except it writes +the encoding of the structure B to BIO B and it +returns 1 for success and 0 for failure. + +These functions are very similar to the X509 functions described in L, +where further notes and examples are available. + +The ECPKParameters_print and ECPKParameters_print_fp functions print a human-readable output +of the public parameters of the EC_GROUP to B or B. The output lines are indented by B spaces. + +=head1 RETURN VALUES + +d2i_ECPKParameters(), d2i_ECPKParameters_bio() and d2i_ECPKParameters_fp() return a valid B structure +or B if an error occurs. + +i2d_ECPKParameters() returns the number of bytes successfully encoded or a negative +value if an error occurs. + +i2d_ECPKParameters_bio(), i2d_ECPKParameters_fp(), ECPKParameters_print and ECPKParameters_print_fp +return 1 for success and 0 if an error occurs. + +=head1 SEE ALSO + +L, L, L, L, +L, L, L, +L, L + +=cut diff --git a/deps/openssl/openssl/doc/crypto/d2i_X509.pod b/deps/openssl/openssl/doc/crypto/d2i_X509.pod index e3dc2381966b23..5b7c16fd0316bc 100644 --- a/deps/openssl/openssl/doc/crypto/d2i_X509.pod +++ b/deps/openssl/openssl/doc/crypto/d2i_X509.pod @@ -18,6 +18,8 @@ i2d_X509_fp - X509 encode and decode functions int i2d_X509_bio(BIO *bp, X509 *x); int i2d_X509_fp(FILE *fp, X509 *x); + int i2d_re_X509_tbs(X509 *x, unsigned char **out); + =head1 DESCRIPTION The X509 encode and decode routines encode and parse an @@ -60,11 +62,17 @@ i2d_X509_fp() is similar to i2d_X509() except it writes the encoding of the structure B to BIO B and it returns 1 for success and 0 for failure. +i2d_re_X509_tbs() is similar to i2d_X509() except it encodes +only the TBSCertificate portion of the certificate. + =head1 NOTES The letters B and B in for example B stand for -"internal" (that is an internal C structure) and "DER". So that -B converts from internal to DER. +"internal" (that is an internal C structure) and "DER". So +B converts from internal to DER. The "re" in +B stands for "re-encode", and ensures that a fresh +encoding is generated in case the object has been modified after +creation (see the BUGS section). The functions can also understand B forms. @@ -209,6 +217,21 @@ fields entirely and will not be parsed by d2i_X509(). This may be fixed in future so code should not assume that i2d_X509() will always succeed. +The encoding of the TBSCertificate portion of a certificate is cached +in the B structure internally to improve encoding performance +and to ensure certificate signatures are verified correctly in some +certificates with broken (non-DER) encodings. + +Any function which encodes an X509 structure such as i2d_X509(), +i2d_X509_fp() or i2d_X509_bio() may return a stale encoding if the +B structure has been modified after deserialization or previous +serialization. + +If, after modification, the B object is re-signed with X509_sign(), +the encoding is automatically renewed. Otherwise, the encoding of the +TBSCertificate portion of the B can be manually renewed by calling +i2d_re_X509_tbs(). + =head1 RETURN VALUES d2i_X509(), d2i_X509_bio() and d2i_X509_fp() return a valid B structure diff --git a/deps/openssl/openssl/doc/crypto/d2i_X509_CRL.pod b/deps/openssl/openssl/doc/crypto/d2i_X509_CRL.pod index 224f9e082b7aaf..675d38b3e5a578 100644 --- a/deps/openssl/openssl/doc/crypto/d2i_X509_CRL.pod +++ b/deps/openssl/openssl/doc/crypto/d2i_X509_CRL.pod @@ -2,7 +2,7 @@ =head1 NAME -d2i_X509_CRL, i2d_X509_CRL, d2i_X509_CRL_bio, d2i_509_CRL_fp, +d2i_X509_CRL, i2d_X509_CRL, d2i_X509_CRL_bio, d2i_X509_CRL_fp, i2d_X509_CRL_bio, i2d_X509_CRL_fp - PKCS#10 certificate request functions. =head1 SYNOPSIS diff --git a/deps/openssl/openssl/doc/crypto/ec.pod b/deps/openssl/openssl/doc/crypto/ec.pod new file mode 100644 index 00000000000000..7d57ba8ea07113 --- /dev/null +++ b/deps/openssl/openssl/doc/crypto/ec.pod @@ -0,0 +1,201 @@ +=pod + +=head1 NAME + +ec - Elliptic Curve functions + +=head1 SYNOPSIS + + #include + #include + + const EC_METHOD *EC_GFp_simple_method(void); + const EC_METHOD *EC_GFp_mont_method(void); + const EC_METHOD *EC_GFp_nist_method(void); + const EC_METHOD *EC_GFp_nistp224_method(void); + const EC_METHOD *EC_GFp_nistp256_method(void); + const EC_METHOD *EC_GFp_nistp521_method(void); + + const EC_METHOD *EC_GF2m_simple_method(void); + + EC_GROUP *EC_GROUP_new(const EC_METHOD *meth); + void EC_GROUP_free(EC_GROUP *group); + void EC_GROUP_clear_free(EC_GROUP *group); + int EC_GROUP_copy(EC_GROUP *dst, const EC_GROUP *src); + EC_GROUP *EC_GROUP_dup(const EC_GROUP *src); + const EC_METHOD *EC_GROUP_method_of(const EC_GROUP *group); + int EC_METHOD_get_field_type(const EC_METHOD *meth); + int EC_GROUP_set_generator(EC_GROUP *group, const EC_POINT *generator, const BIGNUM *order, const BIGNUM *cofactor); + const EC_POINT *EC_GROUP_get0_generator(const EC_GROUP *group); + int EC_GROUP_get_order(const EC_GROUP *group, BIGNUM *order, BN_CTX *ctx); + int EC_GROUP_get_cofactor(const EC_GROUP *group, BIGNUM *cofactor, BN_CTX *ctx); + void EC_GROUP_set_curve_name(EC_GROUP *group, int nid); + int EC_GROUP_get_curve_name(const EC_GROUP *group); + void EC_GROUP_set_asn1_flag(EC_GROUP *group, int flag); + int EC_GROUP_get_asn1_flag(const EC_GROUP *group); + void EC_GROUP_set_point_conversion_form(EC_GROUP *group, point_conversion_form_t form); + point_conversion_form_t EC_GROUP_get_point_conversion_form(const EC_GROUP *); + unsigned char *EC_GROUP_get0_seed(const EC_GROUP *x); + size_t EC_GROUP_get_seed_len(const EC_GROUP *); + size_t EC_GROUP_set_seed(EC_GROUP *, const unsigned char *, size_t len); + int EC_GROUP_set_curve_GFp(EC_GROUP *group, const BIGNUM *p, const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx); + int EC_GROUP_get_curve_GFp(const EC_GROUP *group, BIGNUM *p, BIGNUM *a, BIGNUM *b, BN_CTX *ctx); + int EC_GROUP_set_curve_GF2m(EC_GROUP *group, const BIGNUM *p, const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx); + int EC_GROUP_get_curve_GF2m(const EC_GROUP *group, BIGNUM *p, BIGNUM *a, BIGNUM *b, BN_CTX *ctx); + int EC_GROUP_get_degree(const EC_GROUP *group); + int EC_GROUP_check(const EC_GROUP *group, BN_CTX *ctx); + int EC_GROUP_check_discriminant(const EC_GROUP *group, BN_CTX *ctx); + int EC_GROUP_cmp(const EC_GROUP *a, const EC_GROUP *b, BN_CTX *ctx); + EC_GROUP *EC_GROUP_new_curve_GFp(const BIGNUM *p, const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx); + EC_GROUP *EC_GROUP_new_curve_GF2m(const BIGNUM *p, const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx); + EC_GROUP *EC_GROUP_new_by_curve_name(int nid); + + size_t EC_get_builtin_curves(EC_builtin_curve *r, size_t nitems); + + EC_POINT *EC_POINT_new(const EC_GROUP *group); + void EC_POINT_free(EC_POINT *point); + void EC_POINT_clear_free(EC_POINT *point); + int EC_POINT_copy(EC_POINT *dst, const EC_POINT *src); + EC_POINT *EC_POINT_dup(const EC_POINT *src, const EC_GROUP *group); + const EC_METHOD *EC_POINT_method_of(const EC_POINT *point); + int EC_POINT_set_to_infinity(const EC_GROUP *group, EC_POINT *point); + int EC_POINT_set_Jprojective_coordinates_GFp(const EC_GROUP *group, EC_POINT *p, + const BIGNUM *x, const BIGNUM *y, const BIGNUM *z, BN_CTX *ctx); + int EC_POINT_get_Jprojective_coordinates_GFp(const EC_GROUP *group, + const EC_POINT *p, BIGNUM *x, BIGNUM *y, BIGNUM *z, BN_CTX *ctx); + int EC_POINT_set_affine_coordinates_GFp(const EC_GROUP *group, EC_POINT *p, + const BIGNUM *x, const BIGNUM *y, BN_CTX *ctx); + int EC_POINT_get_affine_coordinates_GFp(const EC_GROUP *group, + const EC_POINT *p, BIGNUM *x, BIGNUM *y, BN_CTX *ctx); + int EC_POINT_set_compressed_coordinates_GFp(const EC_GROUP *group, EC_POINT *p, + const BIGNUM *x, int y_bit, BN_CTX *ctx); + int EC_POINT_set_affine_coordinates_GF2m(const EC_GROUP *group, EC_POINT *p, + const BIGNUM *x, const BIGNUM *y, BN_CTX *ctx); + int EC_POINT_get_affine_coordinates_GF2m(const EC_GROUP *group, + const EC_POINT *p, BIGNUM *x, BIGNUM *y, BN_CTX *ctx); + int EC_POINT_set_compressed_coordinates_GF2m(const EC_GROUP *group, EC_POINT *p, + const BIGNUM *x, int y_bit, BN_CTX *ctx); + size_t EC_POINT_point2oct(const EC_GROUP *group, const EC_POINT *p, + point_conversion_form_t form, + unsigned char *buf, size_t len, BN_CTX *ctx); + int EC_POINT_oct2point(const EC_GROUP *group, EC_POINT *p, + const unsigned char *buf, size_t len, BN_CTX *ctx); + BIGNUM *EC_POINT_point2bn(const EC_GROUP *, const EC_POINT *, + point_conversion_form_t form, BIGNUM *, BN_CTX *); + EC_POINT *EC_POINT_bn2point(const EC_GROUP *, const BIGNUM *, + EC_POINT *, BN_CTX *); + char *EC_POINT_point2hex(const EC_GROUP *, const EC_POINT *, + point_conversion_form_t form, BN_CTX *); + EC_POINT *EC_POINT_hex2point(const EC_GROUP *, const char *, + EC_POINT *, BN_CTX *); + + int EC_POINT_add(const EC_GROUP *group, EC_POINT *r, const EC_POINT *a, const EC_POINT *b, BN_CTX *ctx); + int EC_POINT_dbl(const EC_GROUP *group, EC_POINT *r, const EC_POINT *a, BN_CTX *ctx); + int EC_POINT_invert(const EC_GROUP *group, EC_POINT *a, BN_CTX *ctx); + int EC_POINT_is_at_infinity(const EC_GROUP *group, const EC_POINT *p); + int EC_POINT_is_on_curve(const EC_GROUP *group, const EC_POINT *point, BN_CTX *ctx); + int EC_POINT_cmp(const EC_GROUP *group, const EC_POINT *a, const EC_POINT *b, BN_CTX *ctx); + int EC_POINT_make_affine(const EC_GROUP *group, EC_POINT *point, BN_CTX *ctx); + int EC_POINTs_make_affine(const EC_GROUP *group, size_t num, EC_POINT *points[], BN_CTX *ctx); + int EC_POINTs_mul(const EC_GROUP *group, EC_POINT *r, const BIGNUM *n, size_t num, const EC_POINT *p[], const BIGNUM *m[], BN_CTX *ctx); + int EC_POINT_mul(const EC_GROUP *group, EC_POINT *r, const BIGNUM *n, const EC_POINT *q, const BIGNUM *m, BN_CTX *ctx); + int EC_GROUP_precompute_mult(EC_GROUP *group, BN_CTX *ctx); + int EC_GROUP_have_precompute_mult(const EC_GROUP *group); + + int EC_GROUP_get_basis_type(const EC_GROUP *); + int EC_GROUP_get_trinomial_basis(const EC_GROUP *, unsigned int *k); + int EC_GROUP_get_pentanomial_basis(const EC_GROUP *, unsigned int *k1, + unsigned int *k2, unsigned int *k3); + EC_GROUP *d2i_ECPKParameters(EC_GROUP **, const unsigned char **in, long len); + int i2d_ECPKParameters(const EC_GROUP *, unsigned char **out); + #define d2i_ECPKParameters_bio(bp,x) ASN1_d2i_bio_of(EC_GROUP,NULL,d2i_ECPKParameters,bp,x) + #define i2d_ECPKParameters_bio(bp,x) ASN1_i2d_bio_of_const(EC_GROUP,i2d_ECPKParameters,bp,x) + #define d2i_ECPKParameters_fp(fp,x) (EC_GROUP *)ASN1_d2i_fp(NULL, \ + (char *(*)())d2i_ECPKParameters,(fp),(unsigned char **)(x)) + #define i2d_ECPKParameters_fp(fp,x) ASN1_i2d_fp(i2d_ECPKParameters,(fp), \ + (unsigned char *)(x)) + int ECPKParameters_print(BIO *bp, const EC_GROUP *x, int off); + int ECPKParameters_print_fp(FILE *fp, const EC_GROUP *x, int off); + + EC_KEY *EC_KEY_new(void); + int EC_KEY_get_flags(const EC_KEY *key); + void EC_KEY_set_flags(EC_KEY *key, int flags); + void EC_KEY_clear_flags(EC_KEY *key, int flags); + EC_KEY *EC_KEY_new_by_curve_name(int nid); + void EC_KEY_free(EC_KEY *key); + EC_KEY *EC_KEY_copy(EC_KEY *dst, const EC_KEY *src); + EC_KEY *EC_KEY_dup(const EC_KEY *src); + int EC_KEY_up_ref(EC_KEY *key); + const EC_GROUP *EC_KEY_get0_group(const EC_KEY *key); + int EC_KEY_set_group(EC_KEY *key, const EC_GROUP *group); + const BIGNUM *EC_KEY_get0_private_key(const EC_KEY *key); + int EC_KEY_set_private_key(EC_KEY *key, const BIGNUM *prv); + const EC_POINT *EC_KEY_get0_public_key(const EC_KEY *key); + int EC_KEY_set_public_key(EC_KEY *key, const EC_POINT *pub); + unsigned EC_KEY_get_enc_flags(const EC_KEY *key); + void EC_KEY_set_enc_flags(EC_KEY *eckey, unsigned int flags); + point_conversion_form_t EC_KEY_get_conv_form(const EC_KEY *key); + void EC_KEY_set_conv_form(EC_KEY *eckey, point_conversion_form_t cform); + void *EC_KEY_get_key_method_data(EC_KEY *key, + void *(*dup_func)(void *), void (*free_func)(void *), void (*clear_free_func)(void *)); + void EC_KEY_insert_key_method_data(EC_KEY *key, void *data, + void *(*dup_func)(void *), void (*free_func)(void *), void (*clear_free_func)(void *)); + void EC_KEY_set_asn1_flag(EC_KEY *eckey, int asn1_flag); + int EC_KEY_precompute_mult(EC_KEY *key, BN_CTX *ctx); + int EC_KEY_generate_key(EC_KEY *key); + int EC_KEY_check_key(const EC_KEY *key); + int EC_KEY_set_public_key_affine_coordinates(EC_KEY *key, BIGNUM *x, BIGNUM *y); + + EC_KEY *d2i_ECPrivateKey(EC_KEY **key, const unsigned char **in, long len); + int i2d_ECPrivateKey(EC_KEY *key, unsigned char **out); + + EC_KEY *d2i_ECParameters(EC_KEY **key, const unsigned char **in, long len); + int i2d_ECParameters(EC_KEY *key, unsigned char **out); + + EC_KEY *o2i_ECPublicKey(EC_KEY **key, const unsigned char **in, long len); + int i2o_ECPublicKey(EC_KEY *key, unsigned char **out); + int ECParameters_print(BIO *bp, const EC_KEY *key); + int EC_KEY_print(BIO *bp, const EC_KEY *key, int off); + int ECParameters_print_fp(FILE *fp, const EC_KEY *key); + int EC_KEY_print_fp(FILE *fp, const EC_KEY *key, int off); + #define ECParameters_dup(x) ASN1_dup_of(EC_KEY,i2d_ECParameters,d2i_ECParameters,x) + #define EVP_PKEY_CTX_set_ec_paramgen_curve_nid(ctx, nid) \ + EVP_PKEY_CTX_ctrl(ctx, EVP_PKEY_EC, EVP_PKEY_OP_PARAMGEN, \ + EVP_PKEY_CTRL_EC_PARAMGEN_CURVE_NID, nid, NULL) + + +=head1 DESCRIPTION + +This library provides an extensive set of functions for performing operations on elliptic curves over finite fields. +In general an elliptic curve is one with an equation of the form: + +y^2 = x^3 + ax + b + +An B structure is used to represent the definition of an elliptic curve. Points on a curve are stored using an +B structure. An B is used to hold a private/public key pair, where a private key is simply a BIGNUM and a +public key is a point on a curve (represented by an B). + +The library contains a number of alternative implementations of the different functions. Each implementation is optimised +for different scenarios. No matter which implementation is being used, the interface remains the same. The library +handles calling the correct implementation when an interface function is invoked. An implementation is represented by +an B structure. + +The creation and destruction of B objects is described in L. Functions for +manipulating B objects are described in L. + +Functions for creating, destroying and manipulating B objects are explained in L, +whilst functions for performing mathematical operations and tests on B are coverd in L. + +For working with private and public keys refer to L. Implementations are covered in +L. + +For information on encoding and decoding curve parameters to and from ASN1 see L. + +=head1 SEE ALSO + +L, L, L, +L, L, L, +L, L + + +=cut diff --git a/deps/openssl/openssl/doc/crypto/ecdsa.pod b/deps/openssl/openssl/doc/crypto/ecdsa.pod index 59a5916de1239d..46c071b7330878 100644 --- a/deps/openssl/openssl/doc/crypto/ecdsa.pod +++ b/deps/openssl/openssl/doc/crypto/ecdsa.pod @@ -2,7 +2,7 @@ =head1 NAME -ecdsa - Elliptic Curve Digital Signature Algorithm +ECDSA_SIG_new, ECDSA_SIG_free, i2d_ECDSA_SIG, d2i_ECDSA_SIG, ECDSA_size, ECDSA_sign_setup, ECDSA_sign, ECDSA_sign_ex, ECDSA_verify, ECDSA_do_sign, ECDSA_do_sign_ex, ECDSA_do_verify - Elliptic Curve Digital Signature Algorithm =head1 SYNOPSIS diff --git a/deps/openssl/openssl/doc/crypto/evp.pod b/deps/openssl/openssl/doc/crypto/evp.pod index 9faa349243af46..29fab9fd517395 100644 --- a/deps/openssl/openssl/doc/crypto/evp.pod +++ b/deps/openssl/openssl/doc/crypto/evp.pod @@ -13,22 +13,58 @@ evp - high-level cryptographic functions The EVP library provides a high-level interface to cryptographic functions. -BI<...> and BI<...> provide public key encryption -and decryption to implement digital "envelopes". +LI<...>|EVP_SealInit(3)> and LI<...>|EVP_OpenInit(3)> +provide public key encryption and decryption to implement digital "envelopes". -The BI<...> and BI<...> functions implement -digital signatures. +The LI<...>|EVP_DigestSignInit(3)> and +LI<...>|EVP_DigestVerifyInit(3)> functions implement +digital signatures and Message Authentication Codes (MACs). Also see the older +LI<...>|EVP_SignInit(3)> and LI<...>|EVP_VerifyInit(3)> +functions. -Symmetric encryption is available with the BI<...> -functions. The BI<...> functions provide message digests. +Symmetric encryption is available with the LI<...>|EVP_EncryptInit(3)> +functions. The LI<...>|EVP_DigestInit(3)> functions provide message digests. The BI<...> functions provide a high level interface to -asymmetric algorithms. +asymmetric algorithms. To create a new EVP_PKEY see +L. EVP_PKEYs can be associated +with a private key of a particular algorithm by using the functions +described on the L page, or +new keys can be generated using L. +EVP_PKEYs can be compared using L, or printed using +L. + +The EVP_PKEY functions support the full range of asymmetric algorithm operations: + +=over + +=item For key agreement see L + +=item For signing and verifying see L, +L and L. +However, note that +these functions do not perform a digest of the data to be signed. Therefore +normally you would use the LI<...>|EVP_DigestSignInit(3)> +functions for this purpose. + +=item For encryption and decryption see L +and L respectively. However, note that +these functions perform encryption and decryption only. As public key +encryption is an expensive operation, normally you would wrap +an encrypted message in a "digital envelope" using the LI<...>|EVP_SealInit(3)> and +LI<...>|EVP_OpenInit(3)> functions. + +=back + +The L function provides some limited support for password +based encryption. Careful selection of the parameters will provide a PKCS#5 PBKDF1 compatible +implementation. However, new applications should not typically use this (preferring, for example, +PBKDF2 from PCKS#5). -Algorithms are loaded with OpenSSL_add_all_algorithms(3). +Algorithms are loaded with L. All the symmetric algorithms (ciphers), digests and asymmetric algorithms -(public key algorithms) can be replaced by ENGINE modules providing alternative +(public key algorithms) can be replaced by L modules providing alternative implementations. If ENGINE implementations of ciphers or digests are registered as defaults, then the various EVP functions will automatically use those implementations automatically in preference to built in software @@ -47,8 +83,20 @@ L, L, L, L, +L, L, L, +L, +L, +L, +L, +L, +L, +L, +L, +L, +L, +L, L, L diff --git a/deps/openssl/openssl/doc/crypto/hmac.pod b/deps/openssl/openssl/doc/crypto/hmac.pod index d92138d2731b31..58a57f47bb4fe4 100644 --- a/deps/openssl/openssl/doc/crypto/hmac.pod +++ b/deps/openssl/openssl/doc/crypto/hmac.pod @@ -2,8 +2,8 @@ =head1 NAME -HMAC, HMAC_Init, HMAC_Update, HMAC_Final, HMAC_cleanup - HMAC message -authentication code +HMAC, HMAC_CTX_init, HMAC_Init, HMAC_Init_ex, HMAC_Update, HMAC_Final, HMAC_CTX_cleanup, +HMAC_cleanup - HMAC message authentication code =head1 SYNOPSIS diff --git a/deps/openssl/openssl/doc/crypto/i2d_PKCS7_bio_stream.pod b/deps/openssl/openssl/doc/crypto/i2d_PKCS7_bio_stream.pod index dc4d884c597e81..a37231e267b934 100644 --- a/deps/openssl/openssl/doc/crypto/i2d_PKCS7_bio_stream.pod +++ b/deps/openssl/openssl/doc/crypto/i2d_PKCS7_bio_stream.pod @@ -23,7 +23,7 @@ streaming. =head1 BUGS -The prefix "d2i" is arguably wrong because the function outputs BER format. +The prefix "i2d" is arguably wrong because the function outputs BER format. =head1 RETURN VALUES diff --git a/deps/openssl/openssl/doc/crypto/rand.pod b/deps/openssl/openssl/doc/crypto/rand.pod index 1c068c85b34c14..d102df2eee8015 100644 --- a/deps/openssl/openssl/doc/crypto/rand.pod +++ b/deps/openssl/openssl/doc/crypto/rand.pod @@ -39,7 +39,7 @@ Since the introduction of the ENGINE API, the recommended way of controlling default implementations is by using the ENGINE API functions. The default B, as set by RAND_set_rand_method() and returned by RAND_get_rand_method(), is only used if no ENGINE has been set as the default -"rand" implementation. Hence, these two functions are no longer the recommened +"rand" implementation. Hence, these two functions are no longer the recommended way to control defaults. If an alternative B implementation is being used (either set diff --git a/deps/openssl/openssl/doc/crypto/sha.pod b/deps/openssl/openssl/doc/crypto/sha.pod index 94ab7bc7241660..0c9dbf2f3d2489 100644 --- a/deps/openssl/openssl/doc/crypto/sha.pod +++ b/deps/openssl/openssl/doc/crypto/sha.pod @@ -2,29 +2,58 @@ =head1 NAME -SHA1, SHA1_Init, SHA1_Update, SHA1_Final - Secure Hash Algorithm +SHA1, SHA1_Init, SHA1_Update, SHA1_Final, SHA224, SHA224_Init, SHA224_Update, +SHA224_Final, SHA256, SHA256_Init, SHA256_Update, SHA256_Final, SHA384, +SHA384_Init, SHA384_Update, SHA384_Final, SHA512, SHA512_Init, SHA512_Update, +SHA512_Final - Secure Hash Algorithm =head1 SYNOPSIS #include - unsigned char *SHA1(const unsigned char *d, unsigned long n, - unsigned char *md); - int SHA1_Init(SHA_CTX *c); - int SHA1_Update(SHA_CTX *c, const void *data, - unsigned long len); + int SHA1_Update(SHA_CTX *c, const void *data, size_t len); int SHA1_Final(unsigned char *md, SHA_CTX *c); + unsigned char *SHA1(const unsigned char *d, size_t n, + unsigned char *md); + + int SHA224_Init(SHA256_CTX *c); + int SHA224_Update(SHA256_CTX *c, const void *data, size_t len); + int SHA224_Final(unsigned char *md, SHA256_CTX *c); + unsigned char *SHA224(const unsigned char *d, size_t n, + unsigned char *md); + + int SHA256_Init(SHA256_CTX *c); + int SHA256_Update(SHA256_CTX *c, const void *data, size_t len); + int SHA256_Final(unsigned char *md, SHA256_CTX *c); + unsigned char *SHA256(const unsigned char *d, size_t n, + unsigned char *md); + + int SHA384_Init(SHA512_CTX *c); + int SHA384_Update(SHA512_CTX *c, const void *data, size_t len); + int SHA384_Final(unsigned char *md, SHA512_CTX *c); + unsigned char *SHA384(const unsigned char *d, size_t n, + unsigned char *md); + + int SHA512_Init(SHA512_CTX *c); + int SHA512_Update(SHA512_CTX *c, const void *data, size_t len); + int SHA512_Final(unsigned char *md, SHA512_CTX *c); + unsigned char *SHA512(const unsigned char *d, size_t n, + unsigned char *md); =head1 DESCRIPTION +Applications should use the higher level functions +L etc. instead of calling the hash +functions directly. + SHA-1 (Secure Hash Algorithm) is a cryptographic hash function with a 160 bit output. SHA1() computes the SHA-1 message digest of the B bytes at B and places it in B (which must have space for SHA_DIGEST_LENGTH == 20 bytes of output). If B is NULL, the digest -is placed in a static array. +is placed in a static array. Note: setting B to NULL is B. The following functions may be used if the message is not completely stored in memory: @@ -37,24 +66,29 @@ be hashed (B bytes at B). SHA1_Final() places the message digest in B, which must have space for SHA_DIGEST_LENGTH == 20 bytes of output, and erases the B. -Applications should use the higher level functions -L -etc. instead of calling the hash functions directly. +The SHA224, SHA256, SHA384 and SHA512 families of functions operate in the +same way as for the SHA1 functions. Note that SHA224 and SHA256 use a +B object instead of B. SHA384 and SHA512 use B. +The buffer B must have space for the output from the SHA variant being used +(defined by SHA224_DIGEST_LENGTH, SHA256_DIGEST_LENGTH, SHA384_DIGEST_LENGTH and +SHA512_DIGEST_LENGTH). Also note that, as for the SHA1() function above, the +SHA224(), SHA256(), SHA384() and SHA512() functions are not thread safe if +B is NULL. The predecessor of SHA-1, SHA, is also implemented, but it should be used only when backward compatibility is required. =head1 RETURN VALUES -SHA1() returns a pointer to the hash value. +SHA1(), SHA224(), SHA256(), SHA384() and SHA512() return a pointer to the hash +value. -SHA1_Init(), SHA1_Update() and SHA1_Final() return 1 for success, 0 otherwise. +SHA1_Init(), SHA1_Update() and SHA1_Final() and equivalent SHA224, SHA256, +SHA384 and SHA512 functions return 1 for success, 0 otherwise. =head1 CONFORMING TO -SHA: US Federal Information Processing Standard FIPS PUB 180 (Secure Hash -Standard), -SHA-1: US Federal Information Processing Standard FIPS PUB 180-1 (Secure Hash +US Federal Information Processing Standard FIPS PUB 180-4 (Secure Hash Standard), ANSI X9.30 diff --git a/deps/openssl/openssl/doc/ssl/SSL_CIPHER_get_name.pod b/deps/openssl/openssl/doc/ssl/SSL_CIPHER_get_name.pod index 2e113be6065cd3..c598f4d4ce2631 100644 --- a/deps/openssl/openssl/doc/ssl/SSL_CIPHER_get_name.pod +++ b/deps/openssl/openssl/doc/ssl/SSL_CIPHER_get_name.pod @@ -109,6 +109,16 @@ If SSL_CIPHER_description() cannot handle a built-in cipher, the according description of the cipher property is B. This case should not occur. +The standard terminology for ephemeral Diffie-Hellman schemes is DHE +(finite field) or ECDHE (elliptic curve). This version of OpenSSL +idiosyncratically reports these schemes as EDH and EECDH, even though +it also accepts the standard terminology. + +It is recommended to use the standard terminology (DHE and ECDHE) +during configuration (e.g. via SSL_CTX_set_cipher_list) for clarity of +configuration. OpenSSL versions after 1.0.2 will report the standard +terms via SSL_CIPHER_get_name and SSL_CIPHER_description. + =head1 RETURN VALUES See DESCRIPTION @@ -116,6 +126,7 @@ See DESCRIPTION =head1 SEE ALSO L, L, -L, L +L, L, +L =cut diff --git a/deps/openssl/openssl/doc/ssl/SSL_CONF_CTX_new.pod b/deps/openssl/openssl/doc/ssl/SSL_CONF_CTX_new.pod new file mode 100644 index 00000000000000..a9ccb049f4f24a --- /dev/null +++ b/deps/openssl/openssl/doc/ssl/SSL_CONF_CTX_new.pod @@ -0,0 +1,40 @@ +=pod + +=head1 NAME + +SSL_CONF_CTX_new, SSL_CONF_CTX_free - SSL configuration allocation functions + +=head1 SYNOPSIS + + #include + + SSL_CONF_CTX *SSL_CONF_CTX_new(void); + void SSL_CONF_CTX_free(SSL_CONF_CTX *cctx); + +=head1 DESCRIPTION + +The function SSL_CONF_CTX_new() allocates and initialises an B +structure for use with the SSL_CONF functions. + +The function SSL_CONF_CTX_free() frees up the context B. + +=head1 RETURN VALUES + +SSL_CONF_CTX_new() returns either the newly allocated B structure +or B if an error occurs. + +SSL_CONF_CTX_free() does not return a value. + +=head1 SEE ALSO + +L, +L, +L, +L, +L + +=head1 HISTORY + +These functions were first added to OpenSSL 1.0.2 + +=cut diff --git a/deps/openssl/openssl/doc/ssl/SSL_CONF_CTX_set1_prefix.pod b/deps/openssl/openssl/doc/ssl/SSL_CONF_CTX_set1_prefix.pod new file mode 100644 index 00000000000000..76990188d154a2 --- /dev/null +++ b/deps/openssl/openssl/doc/ssl/SSL_CONF_CTX_set1_prefix.pod @@ -0,0 +1,49 @@ +=pod + +=head1 NAME + +SSL_CONF_CTX_set1_prefix - Set configuration context command prefix + +=head1 SYNOPSIS + + #include + + unsigned int SSL_CONF_CTX_set1_prefix(SSL_CONF_CTX *cctx, const char *prefix); + +=head1 DESCRIPTION + +The function SSL_CONF_CTX_set1_prefix() sets the command prefix of B +to B. If B is B it is restored to the default value. + +=head1 NOTES + +Command prefixes alter the commands recognised by subsequent SSL_CTX_cmd() +calls. For example for files, if the prefix "SSL" is set then command names +such as "SSLProtocol", "SSLOptions" etc. are recognised instead of "Protocol" +and "Options". Similarly for command lines if the prefix is "--ssl-" then +"--ssl-no_tls1_2" is recognised instead of "-no_tls1_2". + +If the B flag is set then prefix checks are case +sensitive and "-" is the default. In the unlikely even an application +explicitly wants to set no prefix it must be explicitly set to "". + +If the B flag is set then prefix checks are case +insensitive and no prefix is the default. + +=head1 RETURN VALUES + +SSL_CONF_CTX_set1_prefix() returns 1 for success and 0 for failure. + +=head1 SEE ALSO + +L, +L, +L, +L, +L + +=head1 HISTORY + +These functions were first added to OpenSSL 1.0.2 + +=cut diff --git a/deps/openssl/openssl/doc/ssl/SSL_CONF_CTX_set_flags.pod b/deps/openssl/openssl/doc/ssl/SSL_CONF_CTX_set_flags.pod new file mode 100644 index 00000000000000..4e342804699649 --- /dev/null +++ b/deps/openssl/openssl/doc/ssl/SSL_CONF_CTX_set_flags.pod @@ -0,0 +1,68 @@ +=pod + +=head1 NAME + +SSL_CONF_CTX_set_flags, SSL_CONF_CTX_clear_flags - Set of clear SSL configuration context flags + +=head1 SYNOPSIS + + #include + + unsigned int SSL_CONF_CTX_set_flags(SSL_CONF_CTX *cctx, unsigned int flags); + unsigned int SSL_CONF_CTX_clear_flags(SSL_CONF_CTX *cctx, unsigned int flags); + +=head1 DESCRIPTION + +The function SSL_CONF_CTX_set_flags() sets B in the context B. + +The function SSL_CONF_CTX_clear_flags() clears B in the context B. + +=head1 NOTES + +The flags set affect how subsequent calls to SSL_CONF_cmd() or +SSL_CONF_argv() behave. + +Currently the following B values are recognised: + +=over 4 + +=item SSL_CONF_FLAG_CMDLINE, SSL_CONF_FLAG_FILE + +recognise options intended for command line or configuration file use. At +least one of these flags must be set. + +=item SSL_CONF_FLAG_CLIENT, SSL_CONF_FLAG_SERVER + +recognise options intended for use in SSL/TLS clients or servers. One or +both of these flags must be set. + +=item SSL_CONF_FLAG_CERTIFICATE + +recognise certificate and private key options. + +=item SSL_CONF_FLAG_SHOW_ERRORS + +indicate errors relating to unrecognised options or missing arguments in +the error queue. If this option isn't set such errors are only reflected +in the return values of SSL_CONF_set_cmd() or SSL_CONF_set_argv() + +=back + +=head1 RETURN VALUES + +SSL_CONF_CTX_set_flags() and SSL_CONF_CTX_clear_flags() returns the new flags +value after setting or clearing flags. + +=head1 SEE ALSO + +L, +L, +L, +L, +L + +=head1 HISTORY + +These functions were first added to OpenSSL 1.0.2 + +=cut diff --git a/deps/openssl/openssl/doc/ssl/SSL_CONF_CTX_set_ssl_ctx.pod b/deps/openssl/openssl/doc/ssl/SSL_CONF_CTX_set_ssl_ctx.pod new file mode 100644 index 00000000000000..2049a5336215a9 --- /dev/null +++ b/deps/openssl/openssl/doc/ssl/SSL_CONF_CTX_set_ssl_ctx.pod @@ -0,0 +1,47 @@ +=pod + +=head1 NAME + +SSL_CONF_CTX_set_ssl_ctx, SSL_CONF_CTX_set_ssl - set context to configure + +=head1 SYNOPSIS + + #include + + void SSL_CONF_CTX_set_ssl_ctx(SSL_CONF_CTX *cctx, SSL_CTX *ctx); + void SSL_CONF_CTX_set_ssl(SSL_CONF_CTX *cctx, SSL *ssl); + +=head1 DESCRIPTION + +SSL_CONF_CTX_set_ssl_ctx() sets the context associated with B to the +B structure B. Any previous B or B associated with +B is cleared. Subsequent calls to SSL_CONF_cmd() will be sent to +B. + +SSL_CONF_CTX_set_ssl() sets the context associated with B to the +B structure B. Any previous B or B associated with +B is cleared. Subsequent calls to SSL_CONF_cmd() will be sent to +B. + +=head1 NOTES + +The context need not be set or it can be set to B in which case only +syntax checking of commands is performed, where possible. + +=head1 RETURN VALUES + +SSL_CONF_CTX_set_ssl_ctx() and SSL_CTX_set_ssl() do not return a value. + +=head1 SEE ALSO + +L, +L, +L, +L, +L + +=head1 HISTORY + +These functions were first added to OpenSSL 1.0.2 + +=cut diff --git a/deps/openssl/openssl/doc/ssl/SSL_CONF_cmd.pod b/deps/openssl/openssl/doc/ssl/SSL_CONF_cmd.pod new file mode 100644 index 00000000000000..2bf1a60e9013fd --- /dev/null +++ b/deps/openssl/openssl/doc/ssl/SSL_CONF_cmd.pod @@ -0,0 +1,438 @@ +=pod + +=head1 NAME + +SSL_CONF_cmd - send configuration command + +=head1 SYNOPSIS + + #include + + int SSL_CONF_cmd(SSL_CONF_CTX *cctx, const char *cmd, const char *value); + int SSL_CONF_cmd_value_type(SSL_CONF_CTX *cctx, const char *cmd); + int SSL_CONF_finish(SSL_CONF_CTX *cctx); + +=head1 DESCRIPTION + +The function SSL_CONF_cmd() performs configuration operation B with +optional parameter B on B. Its purpose is to simplify application +configuration of B or B structures by providing a common +framework for command line options or configuration files. + +SSL_CONF_cmd_value_type() returns the type of value that B refers to. + +The function SSL_CONF_finish() must be called after all configuration +operations have been completed. It is used to finalise any operations +or to process defaults. + +=head1 SUPPORTED COMMAND LINE COMMANDS + +Currently supported B names for command lines (i.e. when the +flag B is set) are listed below. Note: all B names +are case sensitive. Unless otherwise stated commands can be used by +both clients and servers and the B parameter is not used. The default +prefix for command line commands is B<-> and that is reflected below. + +=over 4 + +=item B<-sigalgs> + +This sets the supported signature algorithms for TLS v1.2. For clients this +value is used directly for the supported signature algorithms extension. For +servers it is used to determine which signature algorithms to support. + +The B argument should be a colon separated list of signature algorithms +in order of decreasing preference of the form B. B +is one of B, B or B and B is a supported algorithm +OID short name such as B, B, B, B of B. +Note: algorithm and hash names are case sensitive. + +If this option is not set then all signature algorithms supported by the +OpenSSL library are permissible. + +=item B<-client_sigalgs> + +This sets the supported signature algorithms associated with client +authentication for TLS v1.2. For servers the value is used in the supported +signature algorithms field of a certificate request. For clients it is +used to determine which signature algorithm to with the client certificate. +If a server does not request a certificate this option has no effect. + +The syntax of B is identical to B<-sigalgs>. If not set then +the value set for B<-sigalgs> will be used instead. + +=item B<-curves> + +This sets the supported elliptic curves. For clients the curves are +sent using the supported curves extension. For servers it is used +to determine which curve to use. This setting affects curves used for both +signatures and key exchange, if applicable. + +The B argument is a colon separated list of curves. The curve can be +either the B name (e.g. B) or an OpenSSL OID name (e.g +B). Curve names are case sensitive. + +=item B<-named_curve> + +This sets the temporary curve used for ephemeral ECDH modes. Only used by +servers + +The B argument is a curve name or the special value B which +picks an appropriate curve based on client and server preferences. The curve +can be either the B name (e.g. B) or an OpenSSL OID name +(e.g B). Curve names are case sensitive. + +=item B<-cipher> + +Sets the cipher suite list to B. Note: syntax checking of B is +currently not performed unless a B or B structure is +associated with B. + +=item B<-cert> + +Attempts to use the file B as the certificate for the appropriate +context. It currently uses SSL_CTX_use_certificate_chain_file() if an B +structure is set or SSL_use_certificate_file() with filetype PEM if an B +structure is set. This option is only supported if certificate operations +are permitted. + +=item B<-key> + +Attempts to use the file B as the private key for the appropriate +context. This option is only supported if certificate operations +are permitted. Note: if no B<-key> option is set then a private key is +not loaded: it does not currently use the B<-cert> file. + +=item B<-dhparam> + +Attempts to use the file B as the set of temporary DH parameters for +the appropriate context. This option is only supported if certificate +operations are permitted. + +=item B<-no_ssl2>, B<-no_ssl3>, B<-no_tls1>, B<-no_tls1_1>, B<-no_tls1_2> + +Disables protocol support for SSLv2, SSLv3, TLS 1.0, TLS 1.1 or TLS 1.2 +by setting the corresponding options B, B, +B, B and B respectively. + +=item B<-bugs> + +Various bug workarounds are set, same as setting B. + +=item B<-no_comp> + +Disables support for SSL/TLS compression, same as setting B. + +=item B<-no_ticket> + +Disables support for session tickets, same as setting B. + +=item B<-serverpref> + +Use server and not client preference order when determining which cipher suite, +signature algorithm or elliptic curve to use for an incoming connection. +Equivalent to B. Only used by servers. + +=item B<-no_resumption_on_reneg> + +set SSL_OP_NO_SESSION_RESUMPTION_ON_RENEGOTIATION flag. Only used by servers. + +=item B<-legacyrenegotiation> + +permits the use of unsafe legacy renegotiation. Equivalent to setting +B. + +=item B<-legacy_server_connect>, B<-no_legacy_server_connect> + +permits or prohibits the use of unsafe legacy renegotiation for OpenSSL +clients only. Equivalent to setting or clearing B. +Set by default. + +=item B<-strict> + +enables strict mode protocol handling. Equivalent to setting +B. + +=item B<-debug_broken_protocol> + +disables various checks and permits several kinds of broken protocol behaviour +for testing purposes: it should B be used in anything other than a test +environment. Only supported if OpenSSL is configured with +B<-DOPENSSL_SSL_DEBUG_BROKEN_PROTOCOL>. + +=back + +=head1 SUPPORTED CONFIGURATION FILE COMMANDS + +Currently supported B names for configuration files (i.e. when the +flag B is set) are listed below. All configuration file +B names and are case insensitive so B is recognised +as well as B. Unless otherwise stated the B names +are also case insensitive. + +Note: the command prefix (if set) alters the recognised B values. + +=over 4 + +=item B + +Sets the cipher suite list to B. Note: syntax checking of B is +currently not performed unless an B or B structure is +associated with B. + +=item B + +Attempts to use the file B as the certificate for the appropriate +context. It currently uses SSL_CTX_use_certificate_chain_file() if an B +structure is set or SSL_use_certificate_file() with filetype PEM if an B +structure is set. This option is only supported if certificate operations +are permitted. + +=item B + +Attempts to use the file B as the private key for the appropriate +context. This option is only supported if certificate operations +are permitted. Note: if no B<-key> option is set then a private key is +not loaded: it does not currently use the B file. + +=item B + +Attempts to use the file B in the "serverinfo" extension using the +function SSL_CTX_use_serverinfo_file. + +=item B + +Attempts to use the file B as the set of temporary DH parameters for +the appropriate context. This option is only supported if certificate +operations are permitted. + +=item B + +This sets the supported signature algorithms for TLS v1.2. For clients this +value is used directly for the supported signature algorithms extension. For +servers it is used to determine which signature algorithms to support. + +The B argument should be a colon separated list of signature algorithms +in order of decreasing preference of the form B. B +is one of B, B or B and B is a supported algorithm +OID short name such as B, B, B, B of B. +Note: algorithm and hash names are case sensitive. + +If this option is not set then all signature algorithms supported by the +OpenSSL library are permissible. + +=item B + +This sets the supported signature algorithms associated with client +authentication for TLS v1.2. For servers the value is used in the supported +signature algorithms field of a certificate request. For clients it is +used to determine which signature algorithm to with the client certificate. + +The syntax of B is identical to B. If not set then +the value set for B will be used instead. + +=item B + +This sets the supported elliptic curves. For clients the curves are +sent using the supported curves extension. For servers it is used +to determine which curve to use. This setting affects curves used for both +signatures and key exchange, if applicable. + +The B argument is a colon separated list of curves. The curve can be +either the B name (e.g. B) or an OpenSSL OID name (e.g +B). Curve names are case sensitive. + +=item B + +This sets the temporary curve used for ephemeral ECDH modes. Only used by +servers + +The B argument is a curve name or the special value B which +picks an appropriate curve based on client and server preferences. The curve +can be either the B name (e.g. B) or an OpenSSL OID name +(e.g B). Curve names are case sensitive. + +=item B + +The supported versions of the SSL or TLS protocol. + +The B argument is a comma separated list of supported protocols to +enable or disable. If an protocol is preceded by B<-> that version is disabled. +All versions are enabled by default, though applications may choose to +explicitly disable some. Currently supported protocol values are B, +B, B, B and B. The special value B refers +to all supported versions. + +=item B + +The B argument is a comma separated list of various flags to set. +If a flag string is preceded B<-> it is disabled. See the +B function for more details of individual options. + +Each option is listed below. Where an operation is enabled by default +the B<-flag> syntax is needed to disable it. + +B: session ticket support, enabled by default. Inverse of +B: that is B<-SessionTicket> is the same as setting +B. + +B: SSL/TLS compression support, enabled by default. Inverse +of B. + +B: use empty fragments as a countermeasure against a +SSL 3.0/TLS 1.0 protocol vulnerability affecting CBC ciphers. It +is set by default. Inverse of B. + +B: enable various bug workarounds. Same as B. + +B: enable single use DH keys, set by default. Inverse of +B. Only used by servers. + +B enable single use ECDH keys, set by default. Inverse of +B. Only used by servers. + +B use server and not client preference order when +determining which cipher suite, signature algorithm or elliptic curve +to use for an incoming connection. Equivalent to +B. Only used by servers. + +B set +B flag. Only used by servers. + +B permits the use of unsafe legacy renegotiation. +Equivalent to B. + +B permits the use of unsafe legacy renegotiation +for OpenSSL clients only. Equivalent to B. +Set by default. + +=back + +=head1 SUPPORTED COMMAND TYPES + +The function SSL_CONF_cmd_value_type() currently returns one of the following +types: + +=over 4 + +=item B + +The B string is unrecognised, this return value can be use to flag +syntax errors. + +=item B + +The value is a string without any specific structure. + +=item B + +The value is a file name. + +=item B + +The value is a directory name. + +=back + +=head1 NOTES + +The order of operations is significant. This can be used to set either defaults +or values which cannot be overridden. For example if an application calls: + + SSL_CONF_cmd(ctx, "Protocol", "-SSLv2"); + SSL_CONF_cmd(ctx, userparam, uservalue); + +it will disable SSLv2 support by default but the user can override it. If +however the call sequence is: + + SSL_CONF_cmd(ctx, userparam, uservalue); + SSL_CONF_cmd(ctx, "Protocol", "-SSLv2"); + +SSLv2 is B disabled and attempt to override this by the user are +ignored. + +By checking the return code of SSL_CTX_cmd() it is possible to query if a +given B is recognised, this is useful is SSL_CTX_cmd() values are +mixed with additional application specific operations. + +For example an application might call SSL_CTX_cmd() and if it returns +-2 (unrecognised command) continue with processing of application specific +commands. + +Applications can also use SSL_CTX_cmd() to process command lines though the +utility function SSL_CTX_cmd_argv() is normally used instead. One way +to do this is to set the prefix to an appropriate value using +SSL_CONF_CTX_set1_prefix(), pass the current argument to B and the +following argument to B (which may be NULL). + +In this case if the return value is positive then it is used to skip that +number of arguments as they have been processed by SSL_CTX_cmd(). If -2 is +returned then B is not recognised and application specific arguments +can be checked instead. If -3 is returned a required argument is missing +and an error is indicated. If 0 is returned some other error occurred and +this can be reported back to the user. + +The function SSL_CONF_cmd_value_type() can be used by applications to +check for the existence of a command or to perform additional syntax +checking or translation of the command value. For example if the return +value is B an application could translate a relative +pathname to an absolute pathname. + +=head1 EXAMPLES + +Set supported signature algorithms: + + SSL_CONF_cmd(ctx, "SignatureAlgorithms", "ECDSA+SHA256:RSA+SHA256:DSA+SHA256"); + +Enable all protocols except SSLv3 and SSLv2: + + SSL_CONF_cmd(ctx, "Protocol", "ALL,-SSLv3,-SSLv2"); + +Only enable TLSv1.2: + + SSL_CONF_cmd(ctx, "Protocol", "-ALL,TLSv1.2"); + +Disable TLS session tickets: + + SSL_CONF_cmd(ctx, "Options", "-SessionTicket"); + +Set supported curves to P-256, P-384: + + SSL_CONF_cmd(ctx, "Curves", "P-256:P-384"); + +Set automatic support for any elliptic curve for key exchange: + + SSL_CONF_cmd(ctx, "ECDHParameters", "Automatic"); + +=head1 RETURN VALUES + +SSL_CONF_cmd() returns 1 if the value of B is recognised and B is +B used and 2 if both B and B are used. In other words it +returns the number of arguments processed. This is useful when processing +command lines. + +A return value of -2 means B is not recognised. + +A return value of -3 means B is recognised and the command requires a +value but B is NULL. + +A return code of 0 indicates that both B and B are valid but an +error occurred attempting to perform the operation: for example due to an +error in the syntax of B in this case the error queue may provide +additional information. + +SSL_CONF_finish() returns 1 for success and 0 for failure. + +=head1 SEE ALSO + +L, +L, +L, +L, +L + +=head1 HISTORY + +SSL_CONF_cmd() was first added to OpenSSL 1.0.2 + +=cut diff --git a/deps/openssl/openssl/doc/ssl/SSL_CONF_cmd_argv.pod b/deps/openssl/openssl/doc/ssl/SSL_CONF_cmd_argv.pod new file mode 100644 index 00000000000000..6e66441cd1b6c8 --- /dev/null +++ b/deps/openssl/openssl/doc/ssl/SSL_CONF_cmd_argv.pod @@ -0,0 +1,42 @@ +=pod + +=head1 NAME + +SSL_CONF_cmd_argv - SSL configuration command line processing. + +=head1 SYNOPSIS + + #include + + int SSL_CONF_cmd_argv(SSL_CONF_CTX *cctx, int *pargc, char ***pargv); + +=head1 DESCRIPTION + +The function SSL_CONF_cmd_argv() processes at most two command line +arguments from B and B. The values of B and B +are updated to reflect the number of command options processed. The B +argument can be set to B is it is not used. + +=head1 RETURN VALUES + +SSL_CONF_cmd_argv() returns the number of command arguments processed: 0, 1, 2 +or a negative error code. + +If -2 is returned then an argument for a command is missing. + +If -1 is returned the command is recognised but couldn't be processed due +to an error: for example a syntax error in the argument. + +=head1 SEE ALSO + +L, +L, +L, +L, +L + +=head1 HISTORY + +These functions were first added to OpenSSL 1.0.2 + +=cut diff --git a/deps/openssl/openssl/doc/ssl/SSL_CTX_add1_chain_cert.pod b/deps/openssl/openssl/doc/ssl/SSL_CTX_add1_chain_cert.pod new file mode 100644 index 00000000000000..b999f0941f9cd1 --- /dev/null +++ b/deps/openssl/openssl/doc/ssl/SSL_CTX_add1_chain_cert.pod @@ -0,0 +1,150 @@ +=pod + +=head1 NAME + +SSL_CTX_set0_chain, SSL_CTX_set1_chain, SSL_CTX_add0_chain_cert, +SSL_CTX_add1_chain_cert, SSL_CTX_get0_chain_certs, SSL_CTX_clear_chain_certs, +SSL_set0_chain, SSL_set1_chain, SSL_add0_chain_cert, SSL_add1_chain_cert, +SSL_get0_chain_certs, SSL_clear_chain_certs, SSL_CTX_build_cert_chain, +SSL_build_cert_chain, SSL_CTX_select_current_cert, +SSL_select_current_cert, SSL_CTX_set_current_cert, SSL_set_current_cert - extra +chain certificate processing + +=head1 SYNOPSIS + + #include + + int SSL_CTX_set0_chain(SSL_CTX *ctx, STACK_OF(X509) *sk); + int SSL_CTX_set1_chain(SSL_CTX *ctx, STACK_OF(X509) *sk); + int SSL_CTX_add0_chain_cert(SSL_CTX *ctx, X509 *x509); + int SSL_CTX_add1_chain_cert(SSL_CTX *ctx, X509 *x509); + int SSL_CTX_get0_chain_certs(SSL_CTX *ctx, STACK_OF(X509) **sk); + int SSL_CTX_clear_chain_certs(SSL_CTX *ctx); + + int SSL_set0_chain(SSL *ssl, STACK_OF(X509) *sk); + int SSL_set1_chain(SSL *ssl, STACK_OF(X509) *sk); + int SSL_add0_chain_cert(SSL *ssl, X509 *x509); + int SSL_add1_chain_cert(SSL *ssl, X509 *x509); + int SSL_get0_chain_certs(SSL *ssl, STACK_OF(X509) **sk); + int SSL_clear_chain_certs(SSL *ssl); + + int SSL_CTX_build_cert_chain(SSL_CTX *ctx, flags); + int SSL_build_cert_chain(SSL *ssl, flags); + + int SSL_CTX_select_current_cert(SSL_CTX *ctx, X509 *x509); + int SSL_select_current_cert(SSL *ssl, X509 *x509); + int SSL_CTX_set_current_cert(SSL_CTX *ctx, long op); + int SSL_set_current_cert(SSL *ssl, long op); + +=head1 DESCRIPTION + +SSL_CTX_set0_chain() and SSL_CTX_set1_chain() set the certificate chain +associated with the current certificate of B to B. + +SSL_CTX_add0_chain_cert() and SSL_CTX_add1_chain_cert() append the single +certificate B to the chain associated with the current certificate of +B. + +SSL_CTX_get0_chain_certs() retrieves the chain associated with the current +certificate of B. + +SSL_CTX_clear_chain_certs() clears any existing chain associated with the +current certificate of B. (This is implemented by calling +SSL_CTX_set0_chain() with B set to B). + +SSL_CTX_build_cert_chain() builds the certificate chain for B normally +this uses the chain store or the verify store if the chain store is not set. +If the function is successful the built chain will replace any existing chain. +The B parameter can be set to B to use +existing chain certificates as untrusted CAs, B +to omit the root CA from the built chain, B to +use all existing chain certificates only to build the chain (effectively +sanity checking and rearranging them if necessary), the flag +B ignores any errors during verification: +if flag B is also set verification errors +are cleared from the error queue. + +Each of these functions operates on the I end entity +(i.e. server or client) certificate. This is the last certificate loaded or +selected on the corresponding B structure. + +SSL_CTX_select_current_cert() selects B as the current end entity +certificate, but only if B has already been loaded into B using a +function such as SSL_CTX_use_certificate(). + +SSL_set0_chain(), SSL_set1_chain(), SSL_add0_chain_cert(), +SSL_add1_chain_cert(), SSL_get0_chain_certs(), SSL_clear_chain_certs(), +SSL_build_cert_chain(), SSL_select_current_cert() and SSL_set_current_cert() +are similar except they apply to SSL structure B. + +SSL_CTX_set_current_cert() changes the current certificate to a value based +on the B argument. Currently B can be B to use +the first valid certificate or B to set the next valid +certificate after the current certificate. These two operations can be +used to iterate over all certificates in an B structure. + +SSL_set_current_cert() also supports the option B. +If B is a server and has sent a certificate to a connected client +this option sets that certificate to the current certificate and returns 1. +If the negotiated ciphersuite is anonymous (and thus no certificate will +be sent) 2 is returned and the current certificate is unchanged. If B +is not a server or a certificate has not been sent 0 is returned and +the current certificate is unchanged. + +All these functions are implemented as macros. Those containing a B<1> +increment the reference count of the supplied certificate or chain so it must +be freed at some point after the operation. Those containing a B<0> do +not increment reference counts and the supplied certificate or chain +B be freed after the operation. + +=head1 NOTES + +The chains associate with an SSL_CTX structure are copied to any SSL +structures when SSL_new() is called. SSL structures will not be affected +by any chains subsequently changed in the parent SSL_CTX. + +One chain can be set for each key type supported by a server. So, for example, +an RSA and a DSA certificate can (and often will) have different chains. + +The functions SSL_CTX_build_cert_chain() and SSL_build_cert_chain() can +be used to check application configuration and to ensure any necessary +subordinate CAs are sent in the correct order. Misconfigured applications +sending incorrect certificate chains often cause problems with peers. + +For example an application can add any set of certificates using +SSL_CTX_use_certificate_chain_file() then call SSL_CTX_build_cert_chain() +with the option B to check and reorder them. + +Applications can issue non fatal warnings when checking chains by setting +the flag B and checking the return +value. + +Calling SSL_CTX_build_cert_chain() or SSL_build_cert_chain() is more +efficient than the automatic chain building as it is only performed once. +Automatic chain building is performed on each new session. + +If any certificates are added using these functions no certificates added +using SSL_CTX_add_extra_chain_cert() will be used. + +=head1 RETURN VALUES + +SSL_set_current_cert() with B return 1 for success, 2 if +no server certificate is used because the ciphersuites is anonymous and 0 +for failure. + +SSL_CTX_build_cert_chain() and SSL_build_cert_chain() return 1 for success +and 0 for failure. If the flag B and +a verification error occurs then 2 is returned. + +All other functions return 1 for success and 0 for failure. + + +=head1 SEE ALSO + +L + +=head1 HISTORY + +These functions were first added to OpenSSL 1.0.2. + +=cut diff --git a/deps/openssl/openssl/doc/ssl/SSL_CTX_add_extra_chain_cert.pod b/deps/openssl/openssl/doc/ssl/SSL_CTX_add_extra_chain_cert.pod index 5955ee1cb41535..8e832a57eaace8 100644 --- a/deps/openssl/openssl/doc/ssl/SSL_CTX_add_extra_chain_cert.pod +++ b/deps/openssl/openssl/doc/ssl/SSL_CTX_add_extra_chain_cert.pod @@ -32,7 +32,8 @@ Only one set of extra chain certificates can be specified per SSL_CTX structure. Different chains for different certificates (for example if both RSA and DSA certificates are specified by the same server) or different SSL structures with the same parent SSL_CTX cannot be specified using this -function. +function. For more flexibility functions such as SSL_add1_chain_cert() should +be used instead. =head1 RETURN VALUES @@ -45,5 +46,15 @@ L, L, L, L +L +L +L +L +L +L +L +L +L +L =cut diff --git a/deps/openssl/openssl/doc/ssl/SSL_CTX_sess_set_cache_size.pod b/deps/openssl/openssl/doc/ssl/SSL_CTX_sess_set_cache_size.pod index c8b99f4eef0934..4aeda096d66b12 100644 --- a/deps/openssl/openssl/doc/ssl/SSL_CTX_sess_set_cache_size.pod +++ b/deps/openssl/openssl/doc/ssl/SSL_CTX_sess_set_cache_size.pod @@ -15,6 +15,7 @@ SSL_CTX_sess_set_cache_size, SSL_CTX_sess_get_cache_size - manipulate session ca SSL_CTX_sess_set_cache_size() sets the size of the internal session cache of context B to B. +This value is a hint and not an absolute; see the notes below. SSL_CTX_sess_get_cache_size() returns the currently valid session cache size. @@ -25,8 +26,9 @@ currently 1024*20, so that up to 20000 sessions can be held. This size can be modified using the SSL_CTX_sess_set_cache_size() call. A special case is the size 0, which is used for unlimited size. -When the maximum number of sessions is reached, no more new sessions are -added to the cache. New space may be added by calling +If adding the session makes the cache exceed its size, then unused +sessions are dropped from the end of the cache. +Cache space may also be reclaimed by calling L to remove expired sessions. diff --git a/deps/openssl/openssl/doc/ssl/SSL_CTX_set1_curves.pod b/deps/openssl/openssl/doc/ssl/SSL_CTX_set1_curves.pod new file mode 100644 index 00000000000000..18d0c9ac394e53 --- /dev/null +++ b/deps/openssl/openssl/doc/ssl/SSL_CTX_set1_curves.pod @@ -0,0 +1,103 @@ +=pod + +=head1 NAME + +SSL_CTX_set1_curves, SSL_CTX_set1_curves_list, SSL_set1_curves, +SSL_set1_curves_list, SSL_get1_curves, SSL_get_shared_curve, +SSL_CTX_set_ecdh_auto, SSL_set_ecdh_auto - EC supported curve functions + +=head1 SYNOPSIS + + #include + + int SSL_CTX_set1_curves(SSL_CTX *ctx, int *clist, int clistlen); + int SSL_CTX_set1_curves_list(SSL_CTX *ctx, char *list); + + int SSL_set1_curves(SSL *ssl, int *clist, int clistlen); + int SSL_set1_curves_list(SSL *ssl, char *list); + + int SSL_get1_curves(SSL *ssl, int *curves); + int SSL_get_shared_curve(SSL *s, int n); + + int SSL_CTX_set_ecdh_auto(SSL_CTX *ctx, int onoff); + int SSL_set_ecdh_auto(SSL *s, int onoff); + +=head1 DESCRIPTION + +SSL_CTX_set1_curves() sets the supported curves for B to B +curves in the array B. The array consist of all NIDs of curves in +preference order. For a TLS client the curves are used directly in the +supported curves extension. For a TLS server the curves are used to +determine the set of shared curves. + +SSL_CTX_set1_curves_list() sets the supported curves for B to +string B. The string is a colon separated list of curve NIDs or +names, for example "P-521:P-384:P-256". + +SSL_set1_curves() and SSL_set1_curves_list() are similar except they set +supported curves for the SSL structure B. + +SSL_get1_curves() returns the set of supported curves sent by a client +in the supported curves extension. It returns the total number of +supported curves. The B parameter can be B to simply +return the number of curves for memory allocation purposes. The +B array is in the form of a set of curve NIDs in preference +order. It can return zero if the client did not send a supported curves +extension. + +SSL_get_shared_curve() returns shared curve B for a server-side +SSL B. If B is -1 then the total number of shared curves is +returned, which may be zero. Other than for diagnostic purposes, +most applications will only be interested in the first shared curve +so B is normally set to zero. If the value B is out of range, +NID_undef is returned. + +SSL_CTX_set_ecdh_auto() and SSL_set_ecdh_auto() set automatic curve +selection for server B or B to B. If B is 1 then +the highest preference curve is automatically used for ECDH temporary +keys used during key exchange. + +All these functions are implemented as macros. + +=head1 NOTES + +If an application wishes to make use of several of these functions for +configuration purposes either on a command line or in a file it should +consider using the SSL_CONF interface instead of manually parsing options. + +The functions SSL_CTX_set_ecdh_auto() and SSL_set_ecdh_auto() can be used to +make a server always choose the most appropriate curve for a client. If set +it will override any temporary ECDH parameters set by a server. Previous +versions of OpenSSL could effectively only use a single ECDH curve set +using a function such as SSL_CTX_set_ecdh_tmp(). Newer applications should +just call: + + SSL_CTX_set_ecdh_auto(ctx, 1); + +and they will automatically support ECDH using the most appropriate shared +curve. + +=head1 RETURN VALUES + +SSL_CTX_set1_curves(), SSL_CTX_set1_curves_list(), SSL_set1_curves(), +SSL_set1_curves_list(), SSL_CTX_set_ecdh_auto() and SSL_set_ecdh_auto() +return 1 for success and 0 for failure. + +SSL_get1_curves() returns the number of curves, which may be zero. + +SSL_get_shared_curve() returns the NID of shared curve B or NID_undef if there +is no shared curve B; or the total number of shared curves if B +is -1. + +When called on a client B, SSL_get_shared_curve() has no meaning and +returns -1. + +=head1 SEE ALSO + +L + +=head1 HISTORY + +These functions were first added to OpenSSL 1.0.2. + +=cut diff --git a/deps/openssl/openssl/doc/ssl/SSL_CTX_set1_verify_cert_store.pod b/deps/openssl/openssl/doc/ssl/SSL_CTX_set1_verify_cert_store.pod new file mode 100644 index 00000000000000..493cca48194008 --- /dev/null +++ b/deps/openssl/openssl/doc/ssl/SSL_CTX_set1_verify_cert_store.pod @@ -0,0 +1,91 @@ +=pod + +=head1 NAME + +SSL_CTX_set0_verify_cert_store, SSL_CTX_set1_verify_cert_store, +SSL_CTX_set0_chain_cert_store, SSL_CTX_set1_chain_cert_store, +SSL_set0_verify_cert_store, SSL_set1_verify_cert_store, +SSL_set0_chain_cert_store, SSL_set1_chain_cert_store - set certificate +verification or chain store + +=head1 SYNOPSIS + + #include + + int SSL_CTX_set0_verify_cert_store(SSL_CTX *ctx, X509_STORE *st); + int SSL_CTX_set1_verify_cert_store(SSL_CTX *ctx, X509_STORE *st); + int SSL_CTX_set0_chain_cert_store(SSL_CTX *ctx, X509_STORE *st); + int SSL_CTX_set1_chain_cert_store(SSL_CTX *ctx, X509_STORE *st); + + int SSL_set0_verify_cert_store(SSL_CTX *ctx, X509_STORE *st); + int SSL_set1_verify_cert_store(SSL_CTX *ctx, X509_STORE *st); + int SSL_set0_chain_cert_store(SSL_CTX *ctx, X509_STORE *st); + int SSL_set1_chain_cert_store(SSL_CTX *ctx, X509_STORE *st); + +=head1 DESCRIPTION + +SSL_CTX_set0_verify_cert_store() and SSL_CTX_set1_verify_cert_store() +set the certificate store used for certificate verification to B. + +SSL_CTX_set0_chain_cert_store() and SSL_CTX_set1_chain_cert_store() +set the certificate store used for certificate chain building to B. + +SSL_set0_verify_cert_store(), SSL_set1_verify_cert_store(), +SSL_set0_chain_cert_store() and SSL_set1_chain_cert_store() are similar +except they apply to SSL structure B. + +All these functions are implemented as macros. Those containing a B<1> +increment the reference count of the supplied store so it must +be freed at some point after the operation. Those containing a B<0> do +not increment reference counts and the supplied store B be freed +after the operation. + +=head1 NOTES + +The stores pointers associated with an SSL_CTX structure are copied to any SSL +structures when SSL_new() is called. As a result SSL structures will not be +affected if the parent SSL_CTX store pointer is set to a new value. + +The verification store is used to verify the certificate chain sent by the +peer: that is an SSL/TLS client will use the verification store to verify +the server's certificate chain and a SSL/TLS server will use it to verify +any client certificate chain. + +The chain store is used to build the certificate chain. + +If the mode B is set or a certificate chain is +configured already (for example using the functions such as +L or +L) then +automatic chain building is disabled. + +If the mode B is set then automatic chain building +is disabled. + +If the chain or the verification store is not set then the store associated +with the parent SSL_CTX is used instead to retain compatibility with previous +versions of OpenSSL. + +=head1 RETURN VALUES + +All these functions return 1 for success and 0 for failure. + +=head1 SEE ALSO + +L +L +L +L +L +L +L +L +L +L +L + +=head1 HISTORY + +These functions were first added to OpenSSL 1.0.2. + +=cut diff --git a/deps/openssl/openssl/doc/ssl/SSL_CTX_set_cert_cb.pod b/deps/openssl/openssl/doc/ssl/SSL_CTX_set_cert_cb.pod new file mode 100644 index 00000000000000..141d828f5bbe5c --- /dev/null +++ b/deps/openssl/openssl/doc/ssl/SSL_CTX_set_cert_cb.pod @@ -0,0 +1,68 @@ +=pod + +=head1 NAME + +SSL_CTX_set_cert_cb, SSL_set_cert_cb - handle certificate callback function + +=head1 SYNOPSIS + + #include + + void SSL_CTX_set_cert_cb(SSL_CTX *c, int (*cert_cb)(SSL *ssl, void *arg), void *arg); + void SSL_set_cert_cb(SSL *s, int (*cert_cb)(SSL *ssl, void *arg), void *arg); + + int (*cert_cb)(SSL *ssl, void *arg); + +=head1 DESCRIPTION + +SSL_CTX_set_cert_cb() and SSL_set_cert_cb() sets the B callback, +B value is pointer which is passed to the application callback. + +When B is NULL, no callback function is used. + +cert_cb() is the application defined callback. It is called before a +certificate will be used by a client or server. The callback can then inspect +the passed B structure and set or clear any appropriate certificates. If +the callback is successful it B return 1 even if no certificates have +been set. A zero is returned on error which will abort the handshake with a +fatal internal error alert. A negative return value will suspend the handshake +and the handshake function will return immediately. +L will return SSL_ERROR_WANT_X509_LOOKUP to +indicate, that the handshake was suspended. The next call to the handshake +function will again lead to the call of cert_cb(). It is the job of the +cert_cb() to store information about the state of the last call, +if required to continue. + +=head1 NOTES + +An application will typically call SSL_use_certificate() and +SSL_use_PrivateKey() to set the end entity certificate and private key. +It can add intermediate and optionally the root CA certificates using +SSL_add1_chain_cert(). + +It might also call SSL_certs_clear() to delete any certificates associated +with the B object. + +The certificate callback functionality supercedes the (largely broken) +functionality provided by the old client certificate callback interface. +It is B called even is a certificate is already set so the callback +can modify or delete the existing certificate. + +A more advanced callback might examine the handshake parameters and set +whatever chain is appropriate. For example a legacy client supporting only +TLS v1.0 might receive a certificate chain signed using SHA1 whereas a +TLS v1.2 client which advertises support for SHA256 could receive a chain +using SHA256. + +Normal server sanity checks are performed on any certificates set +by the callback. So if an EC chain is set for a curve the client does not +support it will B be used. + +=head1 SEE ALSO + +L, L, +L, +L, +L, L + +=cut diff --git a/deps/openssl/openssl/doc/ssl/SSL_CTX_set_cert_store.pod b/deps/openssl/openssl/doc/ssl/SSL_CTX_set_cert_store.pod index 6acf0d9f9b1c16..846416e0694772 100644 --- a/deps/openssl/openssl/doc/ssl/SSL_CTX_set_cert_store.pod +++ b/deps/openssl/openssl/doc/ssl/SSL_CTX_set_cert_store.pod @@ -42,6 +42,13 @@ L family of functions. This document must therefore be updated when documentation about the X509_STORE object and its handling becomes available. +=head1 RESTRICTIONS + +The X509_STORE structure used by an SSL_CTX is used for verifying peer +certificates and building certificate chains, it is also shared by +every child SSL structure. Applications wanting finer control can use +functions such as SSL_CTX_set1_verify_cert_store() instead. + =head1 RETURN VALUES SSL_CTX_set_cert_store() does not return diagnostic output. diff --git a/deps/openssl/openssl/doc/ssl/SSL_CTX_set_cipher_list.pod b/deps/openssl/openssl/doc/ssl/SSL_CTX_set_cipher_list.pod index bd4df4abd46155..c84a8314beec14 100644 --- a/deps/openssl/openssl/doc/ssl/SSL_CTX_set_cipher_list.pod +++ b/deps/openssl/openssl/doc/ssl/SSL_CTX_set_cipher_list.pod @@ -41,7 +41,7 @@ RSA export ciphers with a keylength of 512 bits for the RSA key require a temporary 512 bit RSA key, as typically the supplied key has a length of 1024 bit (see L). -RSA ciphers using EDH need a certificate and key and additional DH-parameters +RSA ciphers using DHE need a certificate and key and additional DH-parameters (see L). A DSA cipher can only be chosen, when a DSA certificate is available. diff --git a/deps/openssl/openssl/doc/ssl/SSL_CTX_set_custom_cli_ext.pod b/deps/openssl/openssl/doc/ssl/SSL_CTX_set_custom_cli_ext.pod new file mode 100644 index 00000000000000..3fceef9258a310 --- /dev/null +++ b/deps/openssl/openssl/doc/ssl/SSL_CTX_set_custom_cli_ext.pod @@ -0,0 +1,133 @@ +=pod + +=head1 NAME + +SSL_CTX_add_client_custom_ext, SSL_CTX_add_server_custom_ext - custom TLS extension handling + +=head1 SYNOPSIS + + #include + + int SSL_CTX_add_client_custom_ext(SSL_CTX *ctx, unsigned int ext_type, + custom_ext_add_cb add_cb, + custom_ext_free_cb free_cb, void *add_arg, + custom_ext_parse_cb parse_cb, + void *parse_arg); + + int SSL_CTX_add_server_custom_ext(SSL_CTX *ctx, unsigned int ext_type, + custom_ext_add_cb add_cb, + custom_ext_free_cb free_cb, void *add_arg, + custom_ext_parse_cb parse_cb, + void *parse_arg); + + int SSL_extension_supported(unsigned int ext_type); + + typedef int (*custom_ext_add_cb)(SSL *s, unsigned int ext_type, + const unsigned char **out, + size_t *outlen, int *al, + void *add_arg); + + typedef void (*custom_ext_free_cb)(SSL *s, unsigned int ext_type, + const unsigned char *out, + void *add_arg); + + typedef int (*custom_ext_parse_cb)(SSL *s, unsigned int ext_type, + const unsigned char *in, + size_t inlen, int *al, + void *parse_arg); + + +=head1 DESCRIPTION + +SSL_CTX_add_client_custom_ext() adds a custom extension for a TLS client +with extension type B and callbacks B, B and +B. + +SSL_CTX_add_server_custom_ext() adds a custom extension for a TLS server +with extension type B and callbacks B, B and +B. + +In both cases the extension type must not be handled by OpenSSL internally +or an error occurs. + +SSL_extension_supported() returns 1 if the extension B is handled +internally by OpenSSL and 0 otherwise. + +=head1 EXTENSION CALLBACKS + +The callback B is called to send custom extension data to be +included in ClientHello for TLS clients or ServerHello for servers. The +B parameter is set to the extension type which will be added and +B to the value set when the extension handler was added. + +If the application wishes to include the extension B it should +set B<*out> to the extension data, set B<*outlen> to the length of the +extension data and return 1. + +If the B does not wish to include the extension it must return 0. + +If B returns -1 a fatal handshake error occurs using the TLS +alert value specified in B<*al>. + +For clients (but not servers) if B is set to NULL a zero length +extension is added for B. + +For clients every registered B is always called to see if the +application wishes to add an extension to ClientHello. + +For servers every registered B is called once if and only if the +corresponding extension was received in ClientHello to see if the application +wishes to add the extension to ServerHello. That is, if no corresponding extension +was received in ClientHello then B will not be called. + +If an extension is added (that is B returns 1) B is called +(if it is set) with the value of B set by the add callback. It can be +used to free up any dynamic extension data set by B. Since B is +constant (to permit use of constant data in B) applications may need to +cast away const to free the data. + +The callback B receives data for TLS extensions. For TLS clients +the extension data will come from ServerHello and for TLS servers it will +come from ClientHello. + +The extension data consists of B bytes in the buffer B for the +extension B. + +If the B considers the extension data acceptable it must return +1. If it returns 0 or a negative value a fatal handshake error occurs +using the TLS alert value specified in B<*al>. + +The buffer B is a temporary internal buffer which will not be valid after +the callback returns. + +=head1 NOTES + +The B and B parameters can be set to arbitrary values +which will be passed to the corresponding callbacks. They can, for example, +be used to store the extension data received in a convenient structure or +pass the extension data to be added or freed when adding extensions. + +The B parameter corresponds to the B field of +RFC5246 et al. It is B a NID. + +If the same custom extension type is received multiple times a fatal +B alert is sent and the handshake aborts. If a custom extension +is received in ServerHello which was not sent in ClientHello a fatal +B alert is sent and the handshake is aborted. The +ServerHello B callback is only called if the corresponding extension +was received in ClientHello. This is compliant with the TLS specifications. +This behaviour ensures that each callback is called at most once and that +an application can never send unsolicited extensions. + +=head1 RETURN VALUES + +SSL_CTX_add_client_custom_ext() and SSL_CTX_add_server_custom_ext() return 1 for +success and 0 for failure. A failure can occur if an attempt is made to +add the same B more than once, if an attempt is made to use an +extension type handled internally by OpenSSL or if an internal error occurs +(for example a memory allocation failure). + +SSL_extension_supported() returns 1 if the extension B is handled +internally by OpenSSL and 0 otherwise. + +=cut diff --git a/deps/openssl/openssl/doc/ssl/SSL_CTX_set_tmp_rsa_callback.pod b/deps/openssl/openssl/doc/ssl/SSL_CTX_set_tmp_rsa_callback.pod index 8794eb7ac360bc..94c55b8045359f 100644 --- a/deps/openssl/openssl/doc/ssl/SSL_CTX_set_tmp_rsa_callback.pod +++ b/deps/openssl/openssl/doc/ssl/SSL_CTX_set_tmp_rsa_callback.pod @@ -70,7 +70,7 @@ the TLS standard, when the RSA key can be used for signing only, that is for export ciphers. Using ephemeral RSA key exchange for other purposes violates the standard and can break interoperability with clients. It is therefore strongly recommended to not use ephemeral RSA key -exchange and use EDH (Ephemeral Diffie-Hellman) key exchange instead +exchange and use DHE (Ephemeral Diffie-Hellman) key exchange instead in order to achieve forward secrecy (see L). diff --git a/deps/openssl/openssl/doc/ssl/SSL_CTX_use_certificate.pod b/deps/openssl/openssl/doc/ssl/SSL_CTX_use_certificate.pod index 10be95fdb1099f..80321b8580e3f8 100644 --- a/deps/openssl/openssl/doc/ssl/SSL_CTX_use_certificate.pod +++ b/deps/openssl/openssl/doc/ssl/SSL_CTX_use_certificate.pod @@ -109,10 +109,9 @@ this B, the last item added into B will be checked. =head1 NOTES -The internal certificate store of OpenSSL can hold two private key/certificate -pairs at a time: one key/certificate of type RSA and one key/certificate -of type DSA. The certificate used depends on the cipher select, see -also L. +The internal certificate store of OpenSSL can hold several private +key/certificate pairs at a time. The certificate used depends on the +cipher selected, see also L. When reading certificates and private keys from file, files of type SSL_FILETYPE_ASN1 (also known as B, binary encoding) can only contain @@ -122,16 +121,13 @@ Files of type SSL_FILETYPE_PEM can contain more than one item. SSL_CTX_use_certificate_chain_file() adds the first certificate found in the file to the certificate store. The other certificates are added -to the store of chain certificates using -L. -There exists only one extra chain store, so that the same chain is appended -to both types of certificates, RSA and DSA! If it is not intended to use -both type of certificate at the same time, it is recommended to use the -SSL_CTX_use_certificate_chain_file() instead of the -SSL_CTX_use_certificate_file() function in order to allow the use of -complete certificate chains even when no trusted CA storage is used or -when the CA issuing the certificate shall not be added to the trusted -CA storage. +to the store of chain certificates using L. Note: versions of OpenSSL before 1.0.2 only had a single +certificate chain store for all certificate types, OpenSSL 1.0.2 and later +have a separate chain store for each type. SSL_CTX_use_certificate_chain_file() +should be used instead of the SSL_CTX_use_certificate_file() function in order +to allow the use of complete certificate chains even when no trusted CA +storage is used or when the CA issuing the certificate shall not be added to +the trusted CA storage. If additional certificates are needed to complete the chain during the TLS negotiation, CA certificates are additionally looked up in the diff --git a/deps/openssl/openssl/doc/ssl/SSL_CTX_use_psk_identity_hint.pod b/deps/openssl/openssl/doc/ssl/SSL_CTX_use_psk_identity_hint.pod index 9da7201a99902e..12db0daa199f17 100644 --- a/deps/openssl/openssl/doc/ssl/SSL_CTX_use_psk_identity_hint.pod +++ b/deps/openssl/openssl/doc/ssl/SSL_CTX_use_psk_identity_hint.pod @@ -83,7 +83,12 @@ Return values from the server callback are interpreted as follows: =over 4 -=item > 0 +=item Z<>0 + +PSK identity was not found. An "unknown_psk_identity" alert message +will be sent and the connection setup fails. + +=item E0 PSK identity was found and the server callback has provided the PSK successfully in parameter B. Return value is the length of @@ -96,11 +101,6 @@ data to B and return the length of the random data, so the connection will fail with decryption_error before it will be finished completely. -=item Z<>0 - -PSK identity was not found. An "unknown_psk_identity" alert message -will be sent and the connection setup fails. - =back =cut diff --git a/deps/openssl/openssl/doc/ssl/SSL_CTX_use_serverinfo.pod b/deps/openssl/openssl/doc/ssl/SSL_CTX_use_serverinfo.pod new file mode 100644 index 00000000000000..da7935c83d0a93 --- /dev/null +++ b/deps/openssl/openssl/doc/ssl/SSL_CTX_use_serverinfo.pod @@ -0,0 +1,46 @@ +=pod + +=head1 NAME + +SSL_CTX_use_serverinfo, SSL_CTX_use_serverinfo_file + +=head1 SYNOPSIS + + #include + + int SSL_CTX_use_serverinfo(SSL_CTX *ctx, const unsigned char *serverinfo, + size_t serverinfo_length); + + int SSL_CTX_use_serverinfo_file(SSL_CTX *ctx, const char *file); + +=head1 DESCRIPTION + +These functions load "serverinfo" TLS ServerHello Extensions into the SSL_CTX. +A "serverinfo" extension is returned in response to an empty ClientHello +Extension. + +SSL_CTX_use_serverinfo() loads one or more serverinfo extensions from +a byte array into B. The extensions must be concatenated into a +sequence of bytes. Each extension must consist of a 2-byte Extension Type, +a 2-byte length, and then length bytes of extension_data. + +SSL_CTX_use_serverinfo_file() loads one or more serverinfo extensions from +B into B. The extensions must be in PEM format. Each extension +must consist of a 2-byte Extension Type, a 2-byte length, and then length +bytes of extension_data. Each PEM extension name must begin with the phrase +"BEGIN SERVERINFO FOR ". + +=head1 NOTES + +=head1 RETURN VALUES + +On success, the functions return 1. +On failure, the functions return 0. Check out the error stack to find out +the reason. + +=head1 SEE ALSO + +=head1 HISTORY + + +=cut diff --git a/deps/openssl/openssl/doc/ssl/SSL_accept.pod b/deps/openssl/openssl/doc/ssl/SSL_accept.pod index 22394441741f19..89ad6bd0baad4a 100644 --- a/deps/openssl/openssl/doc/ssl/SSL_accept.pod +++ b/deps/openssl/openssl/doc/ssl/SSL_accept.pod @@ -21,10 +21,7 @@ B by setting an underlying B. The behaviour of SSL_accept() depends on the underlying BIO. If the underlying BIO is B, SSL_accept() will only return once the -handshake has been finished or an error occurred, except for SGC (Server -Gated Cryptography). For SGC, SSL_accept() may return with -1, but -SSL_get_error() will yield B and SSL_accept() -should be called again. +handshake has been finished or an error occurred. If the underlying BIO is B, SSL_accept() will also return when the underlying BIO could not satisfy the needs of SSL_accept() diff --git a/deps/openssl/openssl/doc/ssl/SSL_do_handshake.pod b/deps/openssl/openssl/doc/ssl/SSL_do_handshake.pod index b35ddf5f14e149..8b590c9f16a615 100644 --- a/deps/openssl/openssl/doc/ssl/SSL_do_handshake.pod +++ b/deps/openssl/openssl/doc/ssl/SSL_do_handshake.pod @@ -23,10 +23,7 @@ L. The behaviour of SSL_do_handshake() depends on the underlying BIO. If the underlying BIO is B, SSL_do_handshake() will only return -once the handshake has been finished or an error occurred, except for SGC -(Server Gated Cryptography). For SGC, SSL_do_handshake() may return with -1, -but SSL_get_error() will yield B and -SSL_do_handshake() should be called again. +once the handshake has been finished or an error occurred. If the underlying BIO is B, SSL_do_handshake() will also return when the underlying BIO could not satisfy the needs of SSL_do_handshake() diff --git a/deps/openssl/openssl/doc/ssl/SSL_shutdown.pod b/deps/openssl/openssl/doc/ssl/SSL_shutdown.pod index 85d4a64b03f9ed..efbff5a0a3230a 100644 --- a/deps/openssl/openssl/doc/ssl/SSL_shutdown.pod +++ b/deps/openssl/openssl/doc/ssl/SSL_shutdown.pod @@ -104,7 +104,7 @@ erroneous SSL_ERROR_SYSCALL may be flagged even though no error occurred. The shutdown was successfully completed. The "close notify" alert was sent and the peer's "close notify" alert was received. -=item -1 +=item E0 The shutdown was not successful because a fatal error occurred either at the protocol level or a connection failure occurred. It can also occur if diff --git a/deps/openssl/openssl/doc/ssl/ssl.pod b/deps/openssl/openssl/doc/ssl/ssl.pod index 660489a2224343..242087e691e35a 100644 --- a/deps/openssl/openssl/doc/ssl/ssl.pod +++ b/deps/openssl/openssl/doc/ssl/ssl.pod @@ -380,6 +380,10 @@ session instead of a context. =item int B(SSL_CTX *ctx, char *file, int type); +=item X509 *B(const SSL_CTX *ctx); + +=item EVP_PKEY *B(const SSL_CTX *ctx); + =item void B(SSL_CTX *ctx, unsigned int (*callback)(SSL *ssl, const char *hint, char *identity, unsigned int max_identity_len, unsigned char *psk, unsigned int max_psk_len)); =item int B(SSL_CTX *ctx, const char *hint); @@ -513,7 +517,7 @@ connection defined in the B structure. =item X509 *B(const SSL *ssl); -=item EVP_PKEY *B(SSL *ssl); +=item EVP_PKEY *B(const SSL *ssl); =item int B(const SSL *ssl); diff --git a/deps/openssl/openssl/doc/ssleay.txt b/deps/openssl/openssl/doc/ssleay.txt index 4d2e7148681dc4..c9b29bd97fc4ed 100644 --- a/deps/openssl/openssl/doc/ssleay.txt +++ b/deps/openssl/openssl/doc/ssleay.txt @@ -6026,7 +6026,7 @@ one at a time, or use 'aliases' to specify the preference and order for the ciphers. There are a large number of aliases, but the most importaint are -kRSA, kDHr, kDHd and kEDH for key exchange types. +kRSA, kDHr, kDHd and kDHE for key exchange types. aRSA, aDSS, aNULL and aDH for authentication DES, 3DES, RC4, RC2, IDEA and eNULL for ciphers diff --git a/deps/openssl/openssl/e_os.h b/deps/openssl/openssl/e_os.h index 758448596a1625..45fef691a09411 100644 --- a/deps/openssl/openssl/e_os.h +++ b/deps/openssl/openssl/e_os.h @@ -277,7 +277,7 @@ extern "C" { */ # define _WIN32_WINNT 0x0400 # endif -# if !defined(OPENSSL_NO_SOCK) && defined(_WIN32_WINNT) +# if !defined(OPENSSL_NO_SOCK) && (defined(_WIN32_WINNT) || defined(_WIN32_WCE)) /* * Just like defining _WIN32_WINNT including winsock2.h implies * certain "discipline" for maintaining [broad] binary compatibility. @@ -293,6 +293,9 @@ extern "C" { # include # include # include +# if defined(_WIN32_WCE) && !defined(EACCES) +# define EACCES 13 +# endif # include # ifdef _WIN64 # define strlen(s) _strlen31(s) @@ -314,8 +317,8 @@ static __inline unsigned int _strlen31(const char *str) # undef isupper # undef isxdigit # endif -# if defined(_MSC_VER) && !defined(_DLL) && defined(stdin) -# if _MSC_VER>=1300 +# if defined(_MSC_VER) && !defined(_WIN32_WCE) && !defined(_DLL) && defined(stdin) +# if _MSC_VER>=1300 && _MSC_VER<1600 # undef stdin # undef stdout # undef stderr @@ -323,7 +326,7 @@ FILE *__iob_func(); # define stdin (&__iob_func()[0]) # define stdout (&__iob_func()[1]) # define stderr (&__iob_func()[2]) -# elif defined(I_CAN_LIVE_WITH_LNK4049) +# elif _MSC_VER<1300 && defined(I_CAN_LIVE_WITH_LNK4049) # undef stdin # undef stdout # undef stderr @@ -498,6 +501,10 @@ typedef unsigned long clock_t; /*************/ +# if defined(OPENSSL_NO_SOCK) && !defined(OPENSSL_NO_DGRAM) +# define OPENSSL_NO_DGRAM +# endif + # ifdef USE_SOCKETS # if defined(WINDOWS) || defined(MSDOS) /* windows world */ diff --git a/deps/openssl/openssl/e_os2.h b/deps/openssl/openssl/e_os2.h index d400ac77cf17d6..613607f8387e54 100644 --- a/deps/openssl/openssl/e_os2.h +++ b/deps/openssl/openssl/e_os2.h @@ -101,7 +101,7 @@ extern "C" { # undef OPENSSL_SYS_UNIX # define OPENSSL_SYS_WIN32_UWIN # else -# if defined(__CYGWIN32__) || defined(OPENSSL_SYSNAME_CYGWIN32) +# if defined(__CYGWIN__) || defined(OPENSSL_SYSNAME_CYGWIN) # undef OPENSSL_SYS_UNIX # define OPENSSL_SYS_WIN32_CYGWIN # else diff --git a/deps/openssl/openssl/engines/Makefile b/deps/openssl/openssl/engines/Makefile index 2fa9534401b1a1..da6c8750b5ae44 100644 --- a/deps/openssl/openssl/engines/Makefile +++ b/deps/openssl/openssl/engines/Makefile @@ -111,7 +111,10 @@ install: for l in $(LIBNAMES); do \ ( echo installing $$l; \ pfx=lib; \ - if [ "$(PLATFORM)" != "Cygwin" ]; then \ + if expr "$(PLATFORM)" : "Cygwin" >/dev/null; then \ + sfx=".so"; \ + cp cyg$$l.dll $(INSTALL_PREFIX)$(INSTALLTOP)/$(LIBDIR)/engines/$$pfx$$l$$sfx.new; \ + else \ case "$(CFLAGS)" in \ *DSO_BEOS*) sfx=".so";; \ *DSO_DLFCN*) sfx=`expr "$(SHLIB_EXT)" : '.*\(\.[a-z][a-z]*\)' \| ".so"`;; \ @@ -120,9 +123,6 @@ install: *) sfx=".bad";; \ esac; \ cp $$pfx$$l$$sfx $(INSTALL_PREFIX)$(INSTALLTOP)/$(LIBDIR)/engines/$$pfx$$l$$sfx.new; \ - else \ - sfx=".so"; \ - cp cyg$$l.dll $(INSTALL_PREFIX)$(INSTALLTOP)/$(LIBDIR)/engines/$$pfx$$l$$sfx.new; \ fi; \ chmod 555 $(INSTALL_PREFIX)$(INSTALLTOP)/$(LIBDIR)/engines/$$pfx$$l$$sfx.new; \ mv -f $(INSTALL_PREFIX)$(INSTALLTOP)/$(LIBDIR)/engines/$$pfx$$l$$sfx.new $(INSTALL_PREFIX)$(INSTALLTOP)/$(LIBDIR)/engines/$$pfx$$l$$sfx ); \ @@ -213,17 +213,17 @@ e_atalla.o: ../include/openssl/symhacks.h ../include/openssl/x509.h e_atalla.o: ../include/openssl/x509_vfy.h e_atalla.c e_atalla_err.c e_atalla.o: e_atalla_err.h vendor_defns/atalla.h e_capi.o: ../include/openssl/asn1.h ../include/openssl/bio.h -e_capi.o: ../include/openssl/bn.h ../include/openssl/buffer.h -e_capi.o: ../include/openssl/crypto.h ../include/openssl/e_os2.h -e_capi.o: ../include/openssl/ec.h ../include/openssl/ecdh.h -e_capi.o: ../include/openssl/ecdsa.h ../include/openssl/engine.h -e_capi.o: ../include/openssl/evp.h ../include/openssl/lhash.h -e_capi.o: ../include/openssl/obj_mac.h ../include/openssl/objects.h -e_capi.o: ../include/openssl/opensslconf.h ../include/openssl/opensslv.h -e_capi.o: ../include/openssl/ossl_typ.h ../include/openssl/pkcs7.h -e_capi.o: ../include/openssl/safestack.h ../include/openssl/sha.h -e_capi.o: ../include/openssl/stack.h ../include/openssl/symhacks.h -e_capi.o: ../include/openssl/x509.h ../include/openssl/x509_vfy.h e_capi.c +e_capi.o: ../include/openssl/buffer.h ../include/openssl/crypto.h +e_capi.o: ../include/openssl/e_os2.h ../include/openssl/ec.h +e_capi.o: ../include/openssl/ecdh.h ../include/openssl/ecdsa.h +e_capi.o: ../include/openssl/engine.h ../include/openssl/evp.h +e_capi.o: ../include/openssl/lhash.h ../include/openssl/obj_mac.h +e_capi.o: ../include/openssl/objects.h ../include/openssl/opensslconf.h +e_capi.o: ../include/openssl/opensslv.h ../include/openssl/ossl_typ.h +e_capi.o: ../include/openssl/pkcs7.h ../include/openssl/safestack.h +e_capi.o: ../include/openssl/sha.h ../include/openssl/stack.h +e_capi.o: ../include/openssl/symhacks.h ../include/openssl/x509.h +e_capi.o: ../include/openssl/x509_vfy.h e_capi.c e_chil.o: ../include/openssl/asn1.h ../include/openssl/bio.h e_chil.o: ../include/openssl/bn.h ../include/openssl/buffer.h e_chil.o: ../include/openssl/crypto.h ../include/openssl/dh.h diff --git a/deps/openssl/openssl/engines/ccgost/Makefile b/deps/openssl/openssl/engines/ccgost/Makefile index d661c108285143..2f36580836e569 100644 --- a/deps/openssl/openssl/engines/ccgost/Makefile +++ b/deps/openssl/openssl/engines/ccgost/Makefile @@ -45,7 +45,10 @@ install: set -e; \ echo installing $(LIBNAME); \ pfx=lib; \ - if [ "$(PLATFORM)" != "Cygwin" ]; then \ + if expr "$(PLATFORM)" : "Cygwin" >/dev/null; then \ + sfx=".so"; \ + cp cyg$(LIBNAME).dll $(INSTALL_PREFIX)$(INSTALLTOP)/$(LIBDIR)/engines/$${pfx}$(LIBNAME)$$sfx.new; \ + else \ case "$(CFLAGS)" in \ *DSO_BEOS*) sfx=".so";; \ *DSO_DLFCN*) sfx=`expr "$(SHLIB_EXT)" : '.*\(\.[a-z][a-z]*\)' \| ".so"`;; \ @@ -54,9 +57,6 @@ install: *) sfx=".bad";; \ esac; \ cp $${pfx}$(LIBNAME)$$sfx $(INSTALL_PREFIX)$(INSTALLTOP)/$(LIBDIR)/engines/$${pfx}$(LIBNAME)$$sfx.new; \ - else \ - sfx=".so"; \ - cp cyg$(LIBNAME).dll $(INSTALL_PREFIX)$(INSTALLTOP)/$(LIBDIR)/engines/$${pfx}$(LIBNAME)$$sfx.new; \ fi; \ chmod 555 $(INSTALL_PREFIX)$(INSTALLTOP)/$(LIBDIR)/engines/$${pfx}$(LIBNAME)$$sfx.new; \ mv -f $(INSTALL_PREFIX)$(INSTALLTOP)/$(LIBDIR)/engines/$${pfx}$(LIBNAME)$$sfx.new $(INSTALL_PREFIX)$(INSTALLTOP)/$(LIBDIR)/engines/$${pfx}$(LIBNAME)$$sfx; \ @@ -74,8 +74,7 @@ depend: fi files: - - + $(PERL) $(TOP)/util/files.pl Makefile >> $(TOP)/MINFO lint: lint -DLINT $(INCLUDES) $(SRC)>fluff diff --git a/deps/openssl/openssl/engines/ccgost/gost89.c b/deps/openssl/openssl/engines/ccgost/gost89.c index 945509db809d8a..4ff4ddd7a56531 100644 --- a/deps/openssl/openssl/engines/ccgost/gost89.c +++ b/deps/openssl/openssl/engines/ccgost/gost89.c @@ -221,7 +221,7 @@ static void kboxinit(gost_ctx * c, const gost_subst_block * b) int i; for (i = 0; i < 256; i++) { - c->k87[i] = (b->k8[i >> 4] << 4 | b->k7[i & 15]) << 24; + c->k87[i] = (word32) (b->k8[i >> 4] << 4 | b->k7[i & 15]) << 24; c->k65[i] = (b->k6[i >> 4] << 4 | b->k5[i & 15]) << 16; c->k43[i] = (b->k4[i >> 4] << 4 | b->k3[i & 15]) << 8; c->k21[i] = b->k2[i >> 4] << 4 | b->k1[i & 15]; @@ -242,8 +242,8 @@ static word32 f(gost_ctx * c, word32 x) void gostcrypt(gost_ctx * c, const byte * in, byte * out) { register word32 n1, n2; /* As named in the GOST */ - n1 = in[0] | (in[1] << 8) | (in[2] << 16) | (in[3] << 24); - n2 = in[4] | (in[5] << 8) | (in[6] << 16) | (in[7] << 24); + n1 = in[0] | (in[1] << 8) | (in[2] << 16) | ((word32) in[3] << 24); + n2 = in[4] | (in[5] << 8) | (in[6] << 16) | ((word32) in[7] << 24); /* Instead of swapping halves, swap names each round */ n2 ^= f(c, n1 + c->k[0]); @@ -296,8 +296,8 @@ void gostcrypt(gost_ctx * c, const byte * in, byte * out) void gostdecrypt(gost_ctx * c, const byte * in, byte * out) { register word32 n1, n2; /* As named in the GOST */ - n1 = in[0] | (in[1] << 8) | (in[2] << 16) | (in[3] << 24); - n2 = in[4] | (in[5] << 8) | (in[6] << 16) | (in[7] << 24); + n1 = in[0] | (in[1] << 8) | (in[2] << 16) | ((word32) in[3] << 24); + n2 = in[4] | (in[5] << 8) | (in[6] << 16) | ((word32) in[7] << 24); n2 ^= f(c, n1 + c->k[0]); n1 ^= f(c, n2 + c->k[1]); @@ -417,7 +417,8 @@ void gost_key(gost_ctx * c, const byte * k) int i, j; for (i = 0, j = 0; i < 8; i++, j += 4) { c->k[i] = - k[j] | (k[j + 1] << 8) | (k[j + 2] << 16) | (k[j + 3] << 24); + k[j] | (k[j + 1] << 8) | (k[j + 2] << 16) | ((word32) k[j + 3] << + 24); } } @@ -462,8 +463,10 @@ void mac_block(gost_ctx * c, byte * buffer, const byte * block) for (i = 0; i < 8; i++) { buffer[i] ^= block[i]; } - n1 = buffer[0] | (buffer[1] << 8) | (buffer[2] << 16) | (buffer[3] << 24); - n2 = buffer[4] | (buffer[5] << 8) | (buffer[6] << 16) | (buffer[7] << 24); + n1 = buffer[0] | (buffer[1] << 8) | (buffer[2] << 16) | ((word32) + buffer[3] << 24); + n2 = buffer[4] | (buffer[5] << 8) | (buffer[6] << 16) | ((word32) + buffer[7] << 24); /* Instead of swapping halves, swap names each round */ n2 ^= f(c, n1 + c->k[0]); diff --git a/deps/openssl/openssl/engines/ccgost/gost_crypt.c b/deps/openssl/openssl/engines/ccgost/gost_crypt.c index aae941e54b9eb8..2bbdc6ccd67ab4 100644 --- a/deps/openssl/openssl/engines/ccgost/gost_crypt.c +++ b/deps/openssl/openssl/engines/ccgost/gost_crypt.c @@ -251,13 +251,13 @@ static void gost_cnt_next(void *ctx, unsigned char *iv, unsigned char *buf) } else { memcpy(buf1, iv, 8); } - g = buf1[0] | (buf1[1] << 8) | (buf1[2] << 16) | (buf1[3] << 24); + g = buf1[0] | (buf1[1] << 8) | (buf1[2] << 16) | ((word32) buf1[3] << 24); g += 0x01010101; buf1[0] = (unsigned char)(g & 0xff); buf1[1] = (unsigned char)((g >> 8) & 0xff); buf1[2] = (unsigned char)((g >> 16) & 0xff); buf1[3] = (unsigned char)((g >> 24) & 0xff); - g = buf1[4] | (buf1[5] << 8) | (buf1[6] << 16) | (buf1[7] << 24); + g = buf1[4] | (buf1[5] << 8) | (buf1[6] << 16) | ((word32) buf1[7] << 24); go = g; g += 0x01010104; if (go > g) /* overflow */ diff --git a/deps/openssl/openssl/engines/ccgost/gost_pmeth.c b/deps/openssl/openssl/engines/ccgost/gost_pmeth.c index 9af7b06d43aa73..a2c7cf27d8efef 100644 --- a/deps/openssl/openssl/engines/ccgost/gost_pmeth.c +++ b/deps/openssl/openssl/engines/ccgost/gost_pmeth.c @@ -87,6 +87,10 @@ static int pkey_gost_ctrl(EVP_PKEY_CTX *ctx, int type, int p1, void *p2) } break; + case EVP_PKEY_CTRL_GET_MD: + *(const EVP_MD **)p2 = pctx->md; + return 1; + case EVP_PKEY_CTRL_PKCS7_ENCRYPT: case EVP_PKEY_CTRL_PKCS7_DECRYPT: case EVP_PKEY_CTRL_PKCS7_SIGN: @@ -447,6 +451,10 @@ static int pkey_gost_mac_ctrl(EVP_PKEY_CTX *ctx, int type, int p1, void *p2) } break; + case EVP_PKEY_CTRL_GET_MD: + *(const EVP_MD **)p2 = data->md; + return 1; + case EVP_PKEY_CTRL_PKCS7_ENCRYPT: case EVP_PKEY_CTRL_PKCS7_DECRYPT: case EVP_PKEY_CTRL_PKCS7_SIGN: diff --git a/deps/openssl/openssl/engines/e_capi.c b/deps/openssl/openssl/engines/e_capi.c index 43dd3106c1cbde..f4cd2ffe7fa16d 100644 --- a/deps/openssl/openssl/engines/e_capi.c +++ b/deps/openssl/openssl/engines/e_capi.c @@ -54,22 +54,27 @@ #include #include +#include + #include -#include -#include #ifdef OPENSSL_SYS_WIN32 # ifndef OPENSSL_NO_CAPIENG +# include +# include # include -# include - # ifndef _WIN32_WINNT # define _WIN32_WINNT 0x0400 # endif +# include # include +# include +# ifndef alloca +# define alloca _alloca +# endif /* * This module uses several "new" interfaces, among which is @@ -832,7 +837,7 @@ int capi_rsa_sign(int dtype, const unsigned char *m, unsigned int m_len, /* Finally sign it */ slen = RSA_size(rsa); - if (!CryptSignHashA(hash, capi_key->keyspec, NULL, 0, sigret, &slen)) { + if (!CryptSignHash(hash, capi_key->keyspec, NULL, 0, sigret, &slen)) { CAPIerr(CAPI_F_CAPI_RSA_SIGN, CAPI_R_ERROR_SIGNING_HASH); capi_addlasterror(); goto err; @@ -956,7 +961,7 @@ static DSA_SIG *capi_dsa_do_sign(const unsigned char *digest, int dlen, /* Finally sign it */ slen = sizeof(csigbuf); - if (!CryptSignHashA(hash, capi_key->keyspec, NULL, 0, csigbuf, &slen)) { + if (!CryptSignHash(hash, capi_key->keyspec, NULL, 0, csigbuf, &slen)) { CAPIerr(CAPI_F_CAPI_DSA_DO_SIGN, CAPI_R_ERROR_SIGNING_HASH); capi_addlasterror(); goto err; @@ -1025,7 +1030,7 @@ static void capi_adderror(DWORD err) ERR_add_error_data(2, "Error code= 0x", errstr); } -static char *wide_to_asc(LPWSTR wstr) +static char *wide_to_asc(LPCWSTR wstr) { char *str; int len_0, sz; @@ -1054,10 +1059,10 @@ static char *wide_to_asc(LPWSTR wstr) static int capi_get_provname(CAPI_CTX * ctx, LPSTR * pname, DWORD * ptype, DWORD idx) { - LPSTR name; DWORD len, err; + LPTSTR name; CAPI_trace(ctx, "capi_get_provname, index=%d\n", idx); - if (!CryptEnumProvidersA(idx, NULL, 0, ptype, NULL, &len)) { + if (!CryptEnumProviders(idx, NULL, 0, ptype, NULL, &len)) { err = GetLastError(); if (err == ERROR_NO_MORE_ITEMS) return 2; @@ -1065,8 +1070,11 @@ static int capi_get_provname(CAPI_CTX * ctx, LPSTR * pname, DWORD * ptype, capi_adderror(err); return 0; } - name = OPENSSL_malloc(len); - if (!CryptEnumProvidersA(idx, NULL, 0, ptype, name, &len)) { + if (sizeof(TCHAR) != sizeof(char)) + name = alloca(len); + else + name = OPENSSL_malloc(len); + if (!CryptEnumProviders(idx, NULL, 0, ptype, name, &len)) { err = GetLastError(); if (err == ERROR_NO_MORE_ITEMS) return 2; @@ -1074,8 +1082,11 @@ static int capi_get_provname(CAPI_CTX * ctx, LPSTR * pname, DWORD * ptype, capi_adderror(err); return 0; } - *pname = name; - CAPI_trace(ctx, "capi_get_provname, returned name=%s, type=%d\n", name, + if (sizeof(TCHAR) != sizeof(char)) + *pname = wide_to_asc((WCHAR *)name); + else + *pname = (char *)name; + CAPI_trace(ctx, "capi_get_provname, returned name=%s, type=%d\n", *pname, *ptype); return 1; @@ -1106,10 +1117,26 @@ static int capi_list_containers(CAPI_CTX * ctx, BIO *out) HCRYPTPROV hprov; DWORD err, idx, flags, buflen = 0, clen; LPSTR cname; + LPTSTR cspname = NULL; + CAPI_trace(ctx, "Listing containers CSP=%s, type = %d\n", ctx->cspname, ctx->csptype); - if (!CryptAcquireContextA - (&hprov, NULL, ctx->cspname, ctx->csptype, CRYPT_VERIFYCONTEXT)) { + if (ctx->cspname && sizeof(TCHAR) != sizeof(char)) { + if ((clen = + MultiByteToWideChar(CP_ACP, 0, ctx->cspname, -1, NULL, 0))) { + cspname = alloca(clen * sizeof(WCHAR)); + MultiByteToWideChar(CP_ACP, 0, ctx->cspname, -1, (WCHAR *)cspname, + clen); + } + if (!cspname) { + CAPIerr(CAPI_F_CAPI_LIST_CONTAINERS, ERR_R_MALLOC_FAILURE); + capi_addlasterror(); + return 0; + } + } else + cspname = (TCHAR *)ctx->cspname; + if (!CryptAcquireContext + (&hprov, NULL, cspname, ctx->csptype, CRYPT_VERIFYCONTEXT)) { CAPIerr(CAPI_F_CAPI_LIST_CONTAINERS, CAPI_R_CRYPTACQUIRECONTEXT_ERROR); capi_addlasterror(); @@ -1139,7 +1166,8 @@ static int capi_list_containers(CAPI_CTX * ctx, BIO *out) flags = CRYPT_FIRST; else flags = 0; - if (!CryptGetProvParam(hprov, PP_ENUMCONTAINERS, cname, &clen, flags)) { + if (!CryptGetProvParam + (hprov, PP_ENUMCONTAINERS, (BYTE *) cname, &clen, flags)) { err = GetLastError(); if (err == ERROR_NO_MORE_ITEMS) goto done; @@ -1326,7 +1354,6 @@ int capi_list_certs(CAPI_CTX * ctx, BIO *out, char *id) CertFreeCertificateContext(cert); } else { for (idx = 0;; idx++) { - LPWSTR fname = NULL; cert = CertEnumCertificatesInStore(hstore, cert); if (!cert) break; @@ -1371,18 +1398,30 @@ static PCCERT_CONTEXT capi_find_cert(CAPI_CTX * ctx, const char *id, } } -static CAPI_KEY *capi_get_key(CAPI_CTX * ctx, const char *contname, - char *provname, DWORD ptype, DWORD keyspec) +static CAPI_KEY *capi_get_key(CAPI_CTX * ctx, const TCHAR *contname, + TCHAR *provname, DWORD ptype, DWORD keyspec) { CAPI_KEY *key; DWORD dwFlags = 0; key = OPENSSL_malloc(sizeof(CAPI_KEY)); - CAPI_trace(ctx, "capi_get_key, contname=%s, provname=%s, type=%d\n", - contname, provname, ptype); + if (sizeof(TCHAR) == sizeof(char)) + CAPI_trace(ctx, "capi_get_key, contname=%s, provname=%s, type=%d\n", + contname, provname, ptype); + else if (ctx && ctx->debug_level >= CAPI_DBG_TRACE && ctx->debug_file) { + /* above 'if' is optimization to minimize malloc-ations */ + char *_contname = wide_to_asc((WCHAR *)contname); + char *_provname = wide_to_asc((WCHAR *)provname); + + CAPI_trace(ctx, "capi_get_key, contname=%s, provname=%s, type=%d\n", + _contname, _provname, ptype); + if (_provname) + OPENSSL_free(_provname); + if (_contname) + OPENSSL_free(_contname); + } if (ctx->store_flags & CERT_SYSTEM_STORE_LOCAL_MACHINE) dwFlags = CRYPT_MACHINE_KEYSET; - if (!CryptAcquireContextA - (&key->hprov, contname, provname, ptype, dwFlags)) { + if (!CryptAcquireContext(&key->hprov, contname, provname, ptype, dwFlags)) { CAPIerr(CAPI_F_CAPI_GET_KEY, CAPI_R_CRYPTACQUIRECONTEXT_ERROR); capi_addlasterror(); goto err; @@ -1410,12 +1449,18 @@ static CAPI_KEY *capi_get_cert_key(CAPI_CTX * ctx, PCCERT_CONTEXT cert) pinfo = capi_get_prov_info(ctx, cert); if (!pinfo) goto err; - provname = wide_to_asc(pinfo->pwszProvName); - contname = wide_to_asc(pinfo->pwszContainerName); - if (!provname || !contname) - goto err; - key = capi_get_key(ctx, contname, provname, - pinfo->dwProvType, pinfo->dwKeySpec); + if (sizeof(TCHAR) != sizeof(char)) + key = capi_get_key(ctx, (TCHAR *)pinfo->pwszContainerName, + (TCHAR *)pinfo->pwszProvName, + pinfo->dwProvType, pinfo->dwKeySpec); + else { + provname = wide_to_asc(pinfo->pwszProvName); + contname = wide_to_asc(pinfo->pwszContainerName); + if (!provname || !contname) + goto err; + key = capi_get_key(ctx, (TCHAR *)contname, (TCHAR *)provname, + pinfo->dwProvType, pinfo->dwKeySpec); + } err: if (pinfo) @@ -1447,7 +1492,29 @@ CAPI_KEY *capi_find_key(CAPI_CTX * ctx, const char *id) break; case CAPI_LU_CONTNAME: - key = capi_get_key(ctx, id, ctx->cspname, ctx->csptype, ctx->keytype); + if (sizeof(TCHAR) != sizeof(char)) { + WCHAR *contname, *provname; + DWORD len; + + if ((len = MultiByteToWideChar(CP_ACP, 0, id, -1, NULL, 0)) && + (contname = alloca(len * sizeof(WCHAR)), + MultiByteToWideChar(CP_ACP, 0, id, -1, contname, len)) && + (len = + MultiByteToWideChar(CP_ACP, 0, ctx->cspname, -1, NULL, 0)) + && (provname = + alloca(len * sizeof(WCHAR)), MultiByteToWideChar(CP_ACP, + 0, + ctx->cspname, + -1, + provname, + len))) + key = + capi_get_key(ctx, (TCHAR *)contname, (TCHAR *)provname, + ctx->csptype, ctx->keytype); + } else + key = capi_get_key(ctx, (TCHAR *)id, + (TCHAR *)ctx->cspname, + ctx->csptype, ctx->keytype); break; } @@ -1512,8 +1579,19 @@ static int capi_ctx_set_provname(CAPI_CTX * ctx, LPSTR pname, DWORD type, CAPI_trace(ctx, "capi_ctx_set_provname, name=%s, type=%d\n", pname, type); if (check) { HCRYPTPROV hprov; - if (!CryptAcquireContextA(&hprov, NULL, pname, type, - CRYPT_VERIFYCONTEXT)) { + LPTSTR name = NULL; + + if (sizeof(TCHAR) != sizeof(char)) { + DWORD len; + if ((len = MultiByteToWideChar(CP_ACP, 0, pname, -1, NULL, 0))) { + name = alloca(len * sizeof(WCHAR)); + MultiByteToWideChar(CP_ACP, 0, pname, -1, (WCHAR *)name, len); + } + } else + name = (TCHAR *)pname; + + if (!name || !CryptAcquireContext(&hprov, NULL, name, type, + CRYPT_VERIFYCONTEXT)) { CAPIerr(CAPI_F_CAPI_CTX_SET_PROVNAME, CAPI_R_CRYPTACQUIRECONTEXT_ERROR); capi_addlasterror(); diff --git a/deps/openssl/openssl/engines/makeengines.com b/deps/openssl/openssl/engines/makeengines.com index 4838e389afa192..fd8bca978b806f 100644 --- a/deps/openssl/openssl/engines/makeengines.com +++ b/deps/openssl/openssl/engines/makeengines.com @@ -759,7 +759,7 @@ $ IF F$TYPE(USER_CCFLAGS) .NES. "" THEN CCEXTRAFLAGS = USER_CCFLAGS $ CCDISABLEWARNINGS = "" !!! "MAYLOSEDATA3" !!! "LONGLONGTYPE,LONGLONGSUFX" $ IF F$TYPE(USER_CCDISABLEWARNINGS) .NES. "" $ THEN -$ IF CCDISABLEWARNINGS .NES. "" THEN CCDISABLEWARNINGS = CCDISABLEWARNINGS + "," +$ IF CCDISABLEWARNINGS .NES. THEN CCDISABLEWARNINGS = CCDISABLEWARNINGS + "," $ CCDISABLEWARNINGS = CCDISABLEWARNINGS + USER_CCDISABLEWARNINGS $ ENDIF $! diff --git a/deps/openssl/openssl/engines/vendor_defns/hwcryptohook.h b/deps/openssl/openssl/engines/vendor_defns/hwcryptohook.h index 9fc7b89acbd139..fa06396a01a06d 100644 --- a/deps/openssl/openssl/engines/vendor_defns/hwcryptohook.h +++ b/deps/openssl/openssl/engines/vendor_defns/hwcryptohook.h @@ -214,12 +214,12 @@ typedef struct { int (*mutex_acquire) (HWCryptoHook_Mutex *); void (*mutex_release) (HWCryptoHook_Mutex *); void (*mutex_destroy) (HWCryptoHook_Mutex *); - /*- - * For greater efficiency, can use condition vars internally for - * synchronisation. In this case maxsimultaneous is ignored, but - * the other mutex stuff must be available. In singlethreaded - * programs, set everything to 0. - */ + /*- + * For greater efficiency, can use condition vars internally for + * synchronisation. In this case maxsimultaneous is ignored, but + * the other mutex stuff must be available. In singlethreaded + * programs, set everything to 0. + */ size_t condvarsize; int (*condvar_init) (HWCryptoHook_CondVar *, HWCryptoHook_CallerContext * cactx); @@ -227,103 +227,103 @@ typedef struct { void (*condvar_signal) (HWCryptoHook_CondVar *); void (*condvar_broadcast) (HWCryptoHook_CondVar *); void (*condvar_destroy) (HWCryptoHook_CondVar *); - /*- - * The semantics of acquiring and releasing mutexes and broadcasting - * and waiting on condition variables are expected to be those from - * POSIX threads (pthreads). The mutexes may be (in pthread-speak) - * fast mutexes, recursive mutexes, or nonrecursive ones. - * - * The _release/_signal/_broadcast and _destroy functions must - * always succeed when given a valid argument; if they are given an - * invalid argument then the program (crypto plugin + application) - * has an internal error, and they should abort the program. - */ + /*- + * The semantics of acquiring and releasing mutexes and broadcasting + * and waiting on condition variables are expected to be those from + * POSIX threads (pthreads). The mutexes may be (in pthread-speak) + * fast mutexes, recursive mutexes, or nonrecursive ones. + * + * The _release/_signal/_broadcast and _destroy functions must + * always succeed when given a valid argument; if they are given an + * invalid argument then the program (crypto plugin + application) + * has an internal error, and they should abort the program. + */ int (*getpassphrase) (const char *prompt_info, int *len_io, char *buf, HWCryptoHook_PassphraseContext * ppctx, HWCryptoHook_CallerContext * cactx); - /*- - * Passphrases and the prompt_info, if they contain high-bit-set - * characters, are UTF-8. The prompt_info may be a null pointer if - * no prompt information is available (it should not be an empty - * string). It will not contain text like `enter passphrase'; - * instead it might say something like `Operator Card for John - * Smith' or `SmartCard in nFast Module #1, Slot #1'. - * - * buf points to a buffer in which to return the passphrase; on - * entry *len_io is the length of the buffer. It should be updated - * by the callback. The returned passphrase should not be - * null-terminated by the callback. - */ + /*- + * Passphrases and the prompt_info, if they contain high-bit-set + * characters, are UTF-8. The prompt_info may be a null pointer if + * no prompt information is available (it should not be an empty + * string). It will not contain text like `enter passphrase'; + * instead it might say something like `Operator Card for John + * Smith' or `SmartCard in nFast Module #1, Slot #1'. + * + * buf points to a buffer in which to return the passphrase; on + * entry *len_io is the length of the buffer. It should be updated + * by the callback. The returned passphrase should not be + * null-terminated by the callback. + */ int (*getphystoken) (const char *prompt_info, const char *wrong_info, HWCryptoHook_PassphraseContext * ppctx, HWCryptoHook_CallerContext * cactx); - /*- - * Requests that the human user physically insert a different - * smartcard, DataKey, etc. The plugin should check whether the - * currently inserted token(s) are appropriate, and if they are it - * should not make this call. - * - * prompt_info is as before. wrong_info is a description of the - * currently inserted token(s) so that the user is told what - * something is. wrong_info, like prompt_info, may be null, but - * should not be an empty string. Its contents should be - * syntactically similar to that of prompt_info. - */ - /*- - * Note that a single LoadKey operation might cause several calls to - * getpassphrase and/or requestphystoken. If requestphystoken is - * not provided (ie, a null pointer is passed) then the plugin may - * not support loading keys for which authorisation by several cards - * is required. If getpassphrase is not provided then cards with - * passphrases may not be supported. - * - * getpassphrase and getphystoken do not need to check that the - * passphrase has been entered correctly or the correct token - * inserted; the crypto plugin will do that. If this is not the - * case then the crypto plugin is responsible for calling these - * routines again as appropriate until the correct token(s) and - * passphrase(s) are supplied as required, or until any retry limits - * implemented by the crypto plugin are reached. - * - * In either case, the application must allow the user to say `no' - * or `cancel' to indicate that they do not know the passphrase or - * have the appropriate token; this should cause the callback to - * return nonzero indicating error. - */ + /*- + * Requests that the human user physically insert a different + * smartcard, DataKey, etc. The plugin should check whether the + * currently inserted token(s) are appropriate, and if they are it + * should not make this call. + * + * prompt_info is as before. wrong_info is a description of the + * currently inserted token(s) so that the user is told what + * something is. wrong_info, like prompt_info, may be null, but + * should not be an empty string. Its contents should be + * syntactically similar to that of prompt_info. + */ + /*- + * Note that a single LoadKey operation might cause several calls to + * getpassphrase and/or requestphystoken. If requestphystoken is + * not provided (ie, a null pointer is passed) then the plugin may + * not support loading keys for which authorisation by several cards + * is required. If getpassphrase is not provided then cards with + * passphrases may not be supported. + * + * getpassphrase and getphystoken do not need to check that the + * passphrase has been entered correctly or the correct token + * inserted; the crypto plugin will do that. If this is not the + * case then the crypto plugin is responsible for calling these + * routines again as appropriate until the correct token(s) and + * passphrase(s) are supplied as required, or until any retry limits + * implemented by the crypto plugin are reached. + * + * In either case, the application must allow the user to say `no' + * or `cancel' to indicate that they do not know the passphrase or + * have the appropriate token; this should cause the callback to + * return nonzero indicating error. + */ void (*logmessage) (void *logstream, const char *message); - /*- - * A log message will be generated at least every time something goes - * wrong and an ErrMsgBuf is filled in (or would be if one was - * provided). Other diagnostic information may be written there too, - * including more detailed reasons for errors which are reported in an - * ErrMsgBuf. - * - * When a log message is generated, this callback is called. It - * should write a message to the relevant logging arrangements. - * - * The message string passed will be null-terminated and may be of arbitrary - * length. It will not be prefixed by the time and date, nor by the - * name of the library that is generating it - if this is required, - * the logmessage callback must do it. The message will not have a - * trailing newline (though it may contain internal newlines). - * - * If a null pointer is passed for logmessage a default function is - * used. The default function treats logstream as a FILE* which has - * been converted to a void*. If logstream is 0 it does nothing. - * Otherwise it prepends the date and time and library name and - * writes the message to logstream. Each line will be prefixed by a - * descriptive string containing the date, time and identity of the - * crypto plugin. Errors on the logstream are not reported - * anywhere, and the default function doesn't flush the stream, so - * the application must set the buffering how it wants it. - * - * The crypto plugin may also provide a facility to have copies of - * log messages sent elsewhere, and or for adjusting the verbosity - * of the log messages; any such facilities will be configured by - * external means. - */ + /*- + * A log message will be generated at least every time something goes + * wrong and an ErrMsgBuf is filled in (or would be if one was + * provided). Other diagnostic information may be written there too, + * including more detailed reasons for errors which are reported in an + * ErrMsgBuf. + * + * When a log message is generated, this callback is called. It + * should write a message to the relevant logging arrangements. + * + * The message string passed will be null-terminated and may be of arbitrary + * length. It will not be prefixed by the time and date, nor by the + * name of the library that is generating it - if this is required, + * the logmessage callback must do it. The message will not have a + * trailing newline (though it may contain internal newlines). + * + * If a null pointer is passed for logmessage a default function is + * used. The default function treats logstream as a FILE* which has + * been converted to a void*. If logstream is 0 it does nothing. + * Otherwise it prepends the date and time and library name and + * writes the message to logstream. Each line will be prefixed by a + * descriptive string containing the date, time and identity of the + * crypto plugin. Errors on the logstream are not reported + * anywhere, and the default function doesn't flush the stream, so + * the application must set the buffering how it wants it. + * + * The crypto plugin may also provide a facility to have copies of + * log messages sent elsewhere, and or for adjusting the verbosity + * of the log messages; any such facilities will be configured by + * external means. + */ } HWCryptoHook_InitInfo; typedef diff --git a/deps/openssl/openssl/makevms.com b/deps/openssl/openssl/makevms.com index 01ada3a57bf895..f6b3ff28e4d8a7 100755 --- a/deps/openssl/openssl/makevms.com +++ b/deps/openssl/openssl/makevms.com @@ -242,7 +242,7 @@ $ WRITE H_FILE "" $ WRITE H_FILE "#ifndef OPENSSL_SYS_VMS" $ WRITE H_FILE "# define OPENSSL_SYS_VMS" $ WRITE H_FILE "#endif" -$ +$! $! One of the best way to figure out what the list should be is to do $! the following on a Unix system: $! grep OPENSSL_NO_ crypto/*/*.h ssl/*.h engines/*.h engines/*/*.h|grep ':# *if'|sed -e 's/^.*def //'|sort|uniq @@ -274,6 +274,7 @@ $ CONFIG_LOGICALS := AES,- GMP,- GOST,- HASH_COMP,- + HEARTBEATS,- HMAC,- IDEA,- JPAKE,- @@ -292,6 +293,7 @@ $ CONFIG_LOGICALS := AES,- RFC3779,- RIPEMD,- RSA,- + SCTP,- SEED,- SHA,- SHA0,- @@ -302,6 +304,7 @@ $ CONFIG_LOGICALS := AES,- SRP,- SSL2,- SSL_INTERN,- + SSL_TRACE,- STACK,- STATIC_ENGINE,- STDIO,- @@ -346,7 +349,8 @@ $ CONFIG_DISABLE_RULES := RIJNDAEL/AES;- /MD2;- /RC5;- /RFC3779;- - /SCTP + /SCTP;- + /SSL_TRACE $ CONFIG_ENABLE_RULES := ZLIB_DYNAMIC/ZLIB;- /THREADS $ @@ -512,6 +516,7 @@ $ WRITE H_FILE "#define OPENSSL_NO_SETVBUF_IONBF" $ WRITE H_FILE "/* STCP support comes with TCPIP 5.7 ECO 2 " $ WRITE H_FILE " * enable on newer systems / 2012-02-24 arpadffy */" $ WRITE H_FILE "#define OPENSSL_NO_SCTP" +$ WRITE H_FILE "#define OPENSSL_NO_LIBUNBOUND" $ WRITE H_FILE "" $! $! Add in the common "crypto/opensslconf.h.in". @@ -825,7 +830,7 @@ $ @CRYPTO-LIB LIBRARY 'DEBUGGER' "''COMPILER'" "''TCPIP_TYPE'" - "''ISSEVEN'" "''BUILDPART'" "''POINTER_SIZE'" "''ZLIB'" $! $! Build The [.xxx.EXE.CRYPTO]*.EXE Test Applications. -$! +$! $!!! DISABLED, as these test programs lack any support $!!!$ @CRYPTO-LIB APPS 'DEBUGGER' "''COMPILER'" "''TCPIP_TYPE'" - $!!! "''ISSEVEN'" "''BUILDPART'" "''POINTER_SIZE'" "''ZLIB'" @@ -1017,9 +1022,9 @@ $! $! Tell The User We Don't Know What They Want. $! $ WRITE SYS$OUTPUT "" -$ WRITE SYS$OUTPUT "USAGE: @MAKEVMS.COM [Target] [Pointer size] [Debug option] " +$ WRITE SYS$OUTPUT "USAGE: @MAKEVMS.COM [Target] [Pointer size] [Debug option] " $ WRITE SYS$OUTPUT "" -$ WRITE SYS$OUTPUT "Example: @MAKEVMS.COM ALL """" NODEBUG " +$ WRITE SYS$OUTPUT "Example: @MAKEVMS.COM ALL """" NODEBUG DECC TCPIP" $ WRITE SYS$OUTPUT "" $ WRITE SYS$OUTPUT "The Target ",P1," Is Invalid. The Valid Target Options Are:" $ WRITE SYS$OUTPUT "" diff --git a/deps/openssl/openssl/openssl.spec b/deps/openssl/openssl/openssl.spec index 40ac462c7ffba2..909f2bfabf37ee 100644 --- a/deps/openssl/openssl/openssl.spec +++ b/deps/openssl/openssl/openssl.spec @@ -6,8 +6,7 @@ Release: 1 Summary: Secure Sockets Layer and cryptography libraries and tools Name: openssl -#Version: %{libmaj}.%{libmin}.%{librel} -Version: 1.0.1m +Version: 1.0.2a Source0: ftp://ftp.openssl.org/source/%{name}-%{version}.tar.gz License: OpenSSL Group: System Environment/Libraries diff --git a/deps/openssl/openssl/ssl/Makefile b/deps/openssl/openssl/ssl/Makefile index 8dd390e67f9903..a7bd4ee143c936 100644 --- a/deps/openssl/openssl/ssl/Makefile +++ b/deps/openssl/openssl/ssl/Makefile @@ -24,24 +24,24 @@ LIBSRC= \ s2_meth.c s2_srvr.c s2_clnt.c s2_lib.c s2_enc.c s2_pkt.c \ s3_meth.c s3_srvr.c s3_clnt.c s3_lib.c s3_enc.c s3_pkt.c s3_both.c s3_cbc.c \ s23_meth.c s23_srvr.c s23_clnt.c s23_lib.c s23_pkt.c \ - t1_meth.c t1_srvr.c t1_clnt.c t1_lib.c t1_enc.c \ + t1_meth.c t1_srvr.c t1_clnt.c t1_lib.c t1_enc.c t1_ext.c \ d1_meth.c d1_srvr.c d1_clnt.c d1_lib.c d1_pkt.c \ - d1_both.c d1_enc.c d1_srtp.c \ + d1_both.c d1_srtp.c \ ssl_lib.c ssl_err2.c ssl_cert.c ssl_sess.c \ ssl_ciph.c ssl_stat.c ssl_rsa.c \ - ssl_asn1.c ssl_txt.c ssl_algs.c \ - bio_ssl.c ssl_err.c kssl.c tls_srp.c t1_reneg.c ssl_utst.c + ssl_asn1.c ssl_txt.c ssl_algs.c ssl_conf.c \ + bio_ssl.c ssl_err.c kssl.c t1_reneg.c tls_srp.c t1_trce.c ssl_utst.c LIBOBJ= \ s2_meth.o s2_srvr.o s2_clnt.o s2_lib.o s2_enc.o s2_pkt.o \ s3_meth.o s3_srvr.o s3_clnt.o s3_lib.o s3_enc.o s3_pkt.o s3_both.o s3_cbc.o \ s23_meth.o s23_srvr.o s23_clnt.o s23_lib.o s23_pkt.o \ - t1_meth.o t1_srvr.o t1_clnt.o t1_lib.o t1_enc.o \ + t1_meth.o t1_srvr.o t1_clnt.o t1_lib.o t1_enc.o t1_ext.o \ d1_meth.o d1_srvr.o d1_clnt.o d1_lib.o d1_pkt.o \ - d1_both.o d1_enc.o d1_srtp.o\ + d1_both.o d1_srtp.o\ ssl_lib.o ssl_err2.o ssl_cert.o ssl_sess.o \ ssl_ciph.o ssl_stat.o ssl_rsa.o \ - ssl_asn1.o ssl_txt.o ssl_algs.o \ - bio_ssl.o ssl_err.o kssl.o tls_srp.o t1_reneg.o ssl_utst.o + ssl_asn1.o ssl_txt.o ssl_algs.o ssl_conf.o \ + bio_ssl.o ssl_err.o kssl.o t1_reneg.o tls_srp.o t1_trce.o ssl_utst.o SRC= $(LIBSRC) @@ -165,27 +165,6 @@ d1_clnt.o: ../include/openssl/ssl3.h ../include/openssl/stack.h d1_clnt.o: ../include/openssl/symhacks.h ../include/openssl/tls1.h d1_clnt.o: ../include/openssl/x509.h ../include/openssl/x509_vfy.h d1_clnt.c d1_clnt.o: kssl_lcl.h ssl_locl.h -d1_enc.o: ../e_os.h ../include/openssl/asn1.h ../include/openssl/bio.h -d1_enc.o: ../include/openssl/buffer.h ../include/openssl/comp.h -d1_enc.o: ../include/openssl/crypto.h ../include/openssl/dsa.h -d1_enc.o: ../include/openssl/dtls1.h ../include/openssl/e_os2.h -d1_enc.o: ../include/openssl/ec.h ../include/openssl/ecdh.h -d1_enc.o: ../include/openssl/ecdsa.h ../include/openssl/err.h -d1_enc.o: ../include/openssl/evp.h ../include/openssl/hmac.h -d1_enc.o: ../include/openssl/kssl.h ../include/openssl/lhash.h -d1_enc.o: ../include/openssl/md5.h ../include/openssl/obj_mac.h -d1_enc.o: ../include/openssl/objects.h ../include/openssl/opensslconf.h -d1_enc.o: ../include/openssl/opensslv.h ../include/openssl/ossl_typ.h -d1_enc.o: ../include/openssl/pem.h ../include/openssl/pem2.h -d1_enc.o: ../include/openssl/pkcs7.h ../include/openssl/pqueue.h -d1_enc.o: ../include/openssl/rand.h ../include/openssl/rsa.h -d1_enc.o: ../include/openssl/safestack.h ../include/openssl/sha.h -d1_enc.o: ../include/openssl/srtp.h ../include/openssl/ssl.h -d1_enc.o: ../include/openssl/ssl2.h ../include/openssl/ssl23.h -d1_enc.o: ../include/openssl/ssl3.h ../include/openssl/stack.h -d1_enc.o: ../include/openssl/symhacks.h ../include/openssl/tls1.h -d1_enc.o: ../include/openssl/x509.h ../include/openssl/x509_vfy.h d1_enc.c -d1_enc.o: ssl_locl.h d1_lib.o: ../e_os.h ../include/openssl/asn1.h ../include/openssl/bio.h d1_lib.o: ../include/openssl/buffer.h ../include/openssl/comp.h d1_lib.o: ../include/openssl/crypto.h ../include/openssl/dsa.h @@ -610,8 +589,7 @@ s3_enc.o: ../include/openssl/ssl23.h ../include/openssl/ssl3.h s3_enc.o: ../include/openssl/stack.h ../include/openssl/symhacks.h s3_enc.o: ../include/openssl/tls1.h ../include/openssl/x509.h s3_enc.o: ../include/openssl/x509_vfy.h s3_enc.c ssl_locl.h -s3_lib.o: ../crypto/ec/ec_lcl.h ../e_os.h ../include/openssl/asn1.h -s3_lib.o: ../include/openssl/bio.h ../include/openssl/bn.h +s3_lib.o: ../e_os.h ../include/openssl/asn1.h ../include/openssl/bio.h s3_lib.o: ../include/openssl/buffer.h ../include/openssl/comp.h s3_lib.o: ../include/openssl/crypto.h ../include/openssl/dh.h s3_lib.o: ../include/openssl/dsa.h ../include/openssl/dtls1.h @@ -777,6 +755,27 @@ ssl_ciph.o: ../include/openssl/ssl23.h ../include/openssl/ssl3.h ssl_ciph.o: ../include/openssl/stack.h ../include/openssl/symhacks.h ssl_ciph.o: ../include/openssl/tls1.h ../include/openssl/x509.h ssl_ciph.o: ../include/openssl/x509_vfy.h ssl_ciph.c ssl_locl.h +ssl_conf.o: ../e_os.h ../include/openssl/asn1.h ../include/openssl/bio.h +ssl_conf.o: ../include/openssl/buffer.h ../include/openssl/comp.h +ssl_conf.o: ../include/openssl/conf.h ../include/openssl/crypto.h +ssl_conf.o: ../include/openssl/dh.h ../include/openssl/dsa.h +ssl_conf.o: ../include/openssl/dtls1.h ../include/openssl/e_os2.h +ssl_conf.o: ../include/openssl/ec.h ../include/openssl/ecdh.h +ssl_conf.o: ../include/openssl/ecdsa.h ../include/openssl/err.h +ssl_conf.o: ../include/openssl/evp.h ../include/openssl/hmac.h +ssl_conf.o: ../include/openssl/kssl.h ../include/openssl/lhash.h +ssl_conf.o: ../include/openssl/obj_mac.h ../include/openssl/objects.h +ssl_conf.o: ../include/openssl/opensslconf.h ../include/openssl/opensslv.h +ssl_conf.o: ../include/openssl/ossl_typ.h ../include/openssl/pem.h +ssl_conf.o: ../include/openssl/pem2.h ../include/openssl/pkcs7.h +ssl_conf.o: ../include/openssl/pqueue.h ../include/openssl/rsa.h +ssl_conf.o: ../include/openssl/safestack.h ../include/openssl/sha.h +ssl_conf.o: ../include/openssl/srtp.h ../include/openssl/ssl.h +ssl_conf.o: ../include/openssl/ssl2.h ../include/openssl/ssl23.h +ssl_conf.o: ../include/openssl/ssl3.h ../include/openssl/stack.h +ssl_conf.o: ../include/openssl/symhacks.h ../include/openssl/tls1.h +ssl_conf.o: ../include/openssl/x509.h ../include/openssl/x509_vfy.h ssl_conf.c +ssl_conf.o: ssl_locl.h ssl_err.o: ../include/openssl/asn1.h ../include/openssl/bio.h ssl_err.o: ../include/openssl/buffer.h ../include/openssl/comp.h ssl_err.o: ../include/openssl/crypto.h ../include/openssl/dtls1.h @@ -978,6 +977,26 @@ t1_enc.o: ../include/openssl/ssl3.h ../include/openssl/stack.h t1_enc.o: ../include/openssl/symhacks.h ../include/openssl/tls1.h t1_enc.o: ../include/openssl/x509.h ../include/openssl/x509_vfy.h ssl_locl.h t1_enc.o: t1_enc.c +t1_ext.o: ../e_os.h ../include/openssl/asn1.h ../include/openssl/bio.h +t1_ext.o: ../include/openssl/buffer.h ../include/openssl/comp.h +t1_ext.o: ../include/openssl/crypto.h ../include/openssl/dsa.h +t1_ext.o: ../include/openssl/dtls1.h ../include/openssl/e_os2.h +t1_ext.o: ../include/openssl/ec.h ../include/openssl/ecdh.h +t1_ext.o: ../include/openssl/ecdsa.h ../include/openssl/err.h +t1_ext.o: ../include/openssl/evp.h ../include/openssl/hmac.h +t1_ext.o: ../include/openssl/kssl.h ../include/openssl/lhash.h +t1_ext.o: ../include/openssl/obj_mac.h ../include/openssl/objects.h +t1_ext.o: ../include/openssl/opensslconf.h ../include/openssl/opensslv.h +t1_ext.o: ../include/openssl/ossl_typ.h ../include/openssl/pem.h +t1_ext.o: ../include/openssl/pem2.h ../include/openssl/pkcs7.h +t1_ext.o: ../include/openssl/pqueue.h ../include/openssl/rsa.h +t1_ext.o: ../include/openssl/safestack.h ../include/openssl/sha.h +t1_ext.o: ../include/openssl/srtp.h ../include/openssl/ssl.h +t1_ext.o: ../include/openssl/ssl2.h ../include/openssl/ssl23.h +t1_ext.o: ../include/openssl/ssl3.h ../include/openssl/stack.h +t1_ext.o: ../include/openssl/symhacks.h ../include/openssl/tls1.h +t1_ext.o: ../include/openssl/x509.h ../include/openssl/x509_vfy.h ssl_locl.h +t1_ext.o: t1_ext.c t1_lib.o: ../e_os.h ../include/openssl/asn1.h ../include/openssl/bio.h t1_lib.o: ../include/openssl/buffer.h ../include/openssl/comp.h t1_lib.o: ../include/openssl/conf.h ../include/openssl/crypto.h @@ -1060,6 +1079,26 @@ t1_srvr.o: ../include/openssl/ssl23.h ../include/openssl/ssl3.h t1_srvr.o: ../include/openssl/stack.h ../include/openssl/symhacks.h t1_srvr.o: ../include/openssl/tls1.h ../include/openssl/x509.h t1_srvr.o: ../include/openssl/x509_vfy.h ssl_locl.h t1_srvr.c +t1_trce.o: ../e_os.h ../include/openssl/asn1.h ../include/openssl/bio.h +t1_trce.o: ../include/openssl/buffer.h ../include/openssl/comp.h +t1_trce.o: ../include/openssl/crypto.h ../include/openssl/dsa.h +t1_trce.o: ../include/openssl/dtls1.h ../include/openssl/e_os2.h +t1_trce.o: ../include/openssl/ec.h ../include/openssl/ecdh.h +t1_trce.o: ../include/openssl/ecdsa.h ../include/openssl/err.h +t1_trce.o: ../include/openssl/evp.h ../include/openssl/hmac.h +t1_trce.o: ../include/openssl/kssl.h ../include/openssl/lhash.h +t1_trce.o: ../include/openssl/obj_mac.h ../include/openssl/objects.h +t1_trce.o: ../include/openssl/opensslconf.h ../include/openssl/opensslv.h +t1_trce.o: ../include/openssl/ossl_typ.h ../include/openssl/pem.h +t1_trce.o: ../include/openssl/pem2.h ../include/openssl/pkcs7.h +t1_trce.o: ../include/openssl/pqueue.h ../include/openssl/rsa.h +t1_trce.o: ../include/openssl/safestack.h ../include/openssl/sha.h +t1_trce.o: ../include/openssl/srtp.h ../include/openssl/ssl.h +t1_trce.o: ../include/openssl/ssl2.h ../include/openssl/ssl23.h +t1_trce.o: ../include/openssl/ssl3.h ../include/openssl/stack.h +t1_trce.o: ../include/openssl/symhacks.h ../include/openssl/tls1.h +t1_trce.o: ../include/openssl/x509.h ../include/openssl/x509_vfy.h ssl_locl.h +t1_trce.o: t1_trce.c tls_srp.o: ../e_os.h ../include/openssl/asn1.h ../include/openssl/bio.h tls_srp.o: ../include/openssl/bn.h ../include/openssl/buffer.h tls_srp.o: ../include/openssl/comp.h ../include/openssl/crypto.h diff --git a/deps/openssl/openssl/ssl/d1_both.c b/deps/openssl/openssl/ssl/d1_both.c index c18ec03bd14b56..21048003bcd356 100644 --- a/deps/openssl/openssl/ssl/d1_both.c +++ b/deps/openssl/openssl/ssl/d1_both.c @@ -279,13 +279,17 @@ int dtls1_do_write(SSL *s, int type) (int)s->d1->w_msg_hdr.msg_len + DTLS1_HM_HEADER_LENGTH); - if (s->write_hash) - mac_size = EVP_MD_CTX_size(s->write_hash); - else + if (s->write_hash) { + if (s->enc_write_ctx + && EVP_CIPHER_CTX_mode(s->enc_write_ctx) == EVP_CIPH_GCM_MODE) + mac_size = 0; + else + mac_size = EVP_MD_CTX_size(s->write_hash); + } else mac_size = 0; if (s->enc_write_ctx && - (EVP_CIPHER_mode(s->enc_write_ctx->cipher) & EVP_CIPH_CBC_MODE)) + (EVP_CIPHER_CTX_mode(s->enc_write_ctx) == EVP_CIPH_CBC_MODE)) blocksize = 2 * EVP_CIPHER_block_size(s->enc_write_ctx->cipher); else blocksize = 0; @@ -962,59 +966,6 @@ dtls1_get_message_fragment(SSL *s, int st1, int stn, long max, int *ok) return (-1); } -int dtls1_send_finished(SSL *s, int a, int b, const char *sender, int slen) -{ - unsigned char *p, *d; - int i; - unsigned long l; - - if (s->state == a) { - d = (unsigned char *)s->init_buf->data; - p = &(d[DTLS1_HM_HEADER_LENGTH]); - - i = s->method->ssl3_enc->final_finish_mac(s, - sender, slen, - s->s3->tmp.finish_md); - s->s3->tmp.finish_md_len = i; - memcpy(p, s->s3->tmp.finish_md, i); - p += i; - l = i; - - /* - * Copy the finished so we can use it for renegotiation checks - */ - if (s->type == SSL_ST_CONNECT) { - OPENSSL_assert(i <= EVP_MAX_MD_SIZE); - memcpy(s->s3->previous_client_finished, s->s3->tmp.finish_md, i); - s->s3->previous_client_finished_len = i; - } else { - OPENSSL_assert(i <= EVP_MAX_MD_SIZE); - memcpy(s->s3->previous_server_finished, s->s3->tmp.finish_md, i); - s->s3->previous_server_finished_len = i; - } - -#ifdef OPENSSL_SYS_WIN16 - /* - * MSVC 1.5 does not clear the top bytes of the word unless I do - * this. - */ - l &= 0xffff; -#endif - - d = dtls1_set_message_header(s, d, SSL3_MT_FINISHED, l, 0, l); - s->init_num = (int)l + DTLS1_HM_HEADER_LENGTH; - s->init_off = 0; - - /* buffer the message to handle re-xmits */ - dtls1_buffer_message(s, 0); - - s->state = b; - } - - /* SSL3_ST_SEND_xxxxxx_HELLO_B */ - return (dtls1_do_write(s, SSL3_RT_HANDSHAKE)); -} - /*- * for these 2 messages, we need to * ssl->enc_read_ctx re-init @@ -1055,77 +1006,6 @@ int dtls1_send_change_cipher_spec(SSL *s, int a, int b) return (dtls1_do_write(s, SSL3_RT_CHANGE_CIPHER_SPEC)); } -static int dtls1_add_cert_to_buf(BUF_MEM *buf, unsigned long *l, X509 *x) -{ - int n; - unsigned char *p; - - n = i2d_X509(x, NULL); - if (!BUF_MEM_grow_clean(buf, (int)(n + (*l) + 3))) { - SSLerr(SSL_F_DTLS1_ADD_CERT_TO_BUF, ERR_R_BUF_LIB); - return 0; - } - p = (unsigned char *)&(buf->data[*l]); - l2n3(n, p); - i2d_X509(x, &p); - *l += n + 3; - - return 1; -} - -unsigned long dtls1_output_cert_chain(SSL *s, X509 *x) -{ - unsigned char *p; - int i; - unsigned long l = 3 + DTLS1_HM_HEADER_LENGTH; - BUF_MEM *buf; - - /* TLSv1 sends a chain with nothing in it, instead of an alert */ - buf = s->init_buf; - if (!BUF_MEM_grow_clean(buf, 10)) { - SSLerr(SSL_F_DTLS1_OUTPUT_CERT_CHAIN, ERR_R_BUF_LIB); - return (0); - } - if (x != NULL) { - X509_STORE_CTX xs_ctx; - - if (!X509_STORE_CTX_init(&xs_ctx, s->ctx->cert_store, x, NULL)) { - SSLerr(SSL_F_DTLS1_OUTPUT_CERT_CHAIN, ERR_R_X509_LIB); - return (0); - } - - X509_verify_cert(&xs_ctx); - /* Don't leave errors in the queue */ - ERR_clear_error(); - for (i = 0; i < sk_X509_num(xs_ctx.chain); i++) { - x = sk_X509_value(xs_ctx.chain, i); - - if (!dtls1_add_cert_to_buf(buf, &l, x)) { - X509_STORE_CTX_cleanup(&xs_ctx); - return 0; - } - } - X509_STORE_CTX_cleanup(&xs_ctx); - } - /* Thawte special :-) */ - for (i = 0; i < sk_X509_num(s->ctx->extra_certs); i++) { - x = sk_X509_value(s->ctx->extra_certs, i); - if (!dtls1_add_cert_to_buf(buf, &l, x)) - return 0; - } - - l -= (3 + DTLS1_HM_HEADER_LENGTH); - - p = (unsigned char *)&(buf->data[DTLS1_HM_HEADER_LENGTH]); - l2n3(l, p); - l += 3; - p = (unsigned char *)&(buf->data[0]); - p = dtls1_set_message_header(s, p, SSL3_MT_CERTIFICATE, l, 0, l); - - l += DTLS1_HM_HEADER_LENGTH; - return (l); -} - int dtls1_read_failed(SSL *s, int code) { if (code > 0) { @@ -1228,10 +1108,10 @@ int dtls1_buffer_message(SSL *s, int is_ccs) memcpy(frag->fragment, s->init_buf->data, s->init_num); if (is_ccs) { + /* For DTLS1_BAD_VER the header length is non-standard */ OPENSSL_assert(s->d1->w_msg_hdr.msg_len + - ((s->version == - DTLS1_VERSION) ? DTLS1_CCS_HEADER_LENGTH : 3) == - (unsigned int)s->init_num); + ((s->version==DTLS1_BAD_VER)?3:DTLS1_CCS_HEADER_LENGTH) + == (unsigned int)s->init_num); } else { OPENSSL_assert(s->d1->w_msg_hdr.msg_len + DTLS1_HM_HEADER_LENGTH == (unsigned int)s->init_num); diff --git a/deps/openssl/openssl/ssl/d1_clnt.c b/deps/openssl/openssl/ssl/d1_clnt.c index 1394781c047a8c..1858263e19c1ae 100644 --- a/deps/openssl/openssl/ssl/d1_clnt.c +++ b/deps/openssl/openssl/ssl/d1_clnt.c @@ -135,13 +135,29 @@ static const SSL_METHOD *dtls1_get_client_method(int ver) { if (ver == DTLS1_VERSION || ver == DTLS1_BAD_VER) return (DTLSv1_client_method()); + else if (ver == DTLS1_2_VERSION) + return (DTLSv1_2_client_method()); else return (NULL); } -IMPLEMENT_dtls1_meth_func(DTLSv1_client_method, +IMPLEMENT_dtls1_meth_func(DTLS1_VERSION, + DTLSv1_client_method, ssl_undefined_function, - dtls1_connect, dtls1_get_client_method) + dtls1_connect, + dtls1_get_client_method, DTLSv1_enc_data) + + IMPLEMENT_dtls1_meth_func(DTLS1_2_VERSION, + DTLSv1_2_client_method, + ssl_undefined_function, + dtls1_connect, + dtls1_get_client_method, DTLSv1_2_enc_data) + + IMPLEMENT_dtls1_meth_func(DTLS_ANY_VERSION, + DTLS_client_method, + ssl_undefined_function, + dtls1_connect, + dtls1_get_client_method, DTLSv1_2_enc_data) int dtls1_connect(SSL *s) { @@ -302,7 +318,7 @@ int dtls1_connect(SSL *s) ssl3_init_finished_mac(s); dtls1_start_timer(s); - ret = dtls1_client_hello(s); + ret = ssl3_client_hello(s); if (ret <= 0) goto end; @@ -456,7 +472,7 @@ int dtls1_connect(SSL *s) case SSL3_ST_CW_CERT_C: case SSL3_ST_CW_CERT_D: dtls1_start_timer(s); - ret = dtls1_send_client_certificate(s); + ret = ssl3_send_client_certificate(s); if (ret <= 0) goto end; s->state = SSL3_ST_CW_KEY_EXCH_A; @@ -466,7 +482,7 @@ int dtls1_connect(SSL *s) case SSL3_ST_CW_KEY_EXCH_A: case SSL3_ST_CW_KEY_EXCH_B: dtls1_start_timer(s); - ret = dtls1_send_client_key_exchange(s); + ret = ssl3_send_client_key_exchange(s); if (ret <= 0) goto end; @@ -511,7 +527,7 @@ int dtls1_connect(SSL *s) case SSL3_ST_CW_CERT_VRFY_A: case SSL3_ST_CW_CERT_VRFY_B: dtls1_start_timer(s); - ret = dtls1_send_client_verify(s); + ret = ssl3_send_client_verify(s); if (ret <= 0) goto end; #ifndef OPENSSL_NO_SCTP @@ -575,13 +591,13 @@ int dtls1_connect(SSL *s) case SSL3_ST_CW_FINISHED_B: if (!s->hit) dtls1_start_timer(s); - ret = dtls1_send_finished(s, - SSL3_ST_CW_FINISHED_A, - SSL3_ST_CW_FINISHED_B, - s->method-> - ssl3_enc->client_finished_label, - s->method-> - ssl3_enc->client_finished_label_len); + ret = ssl3_send_finished(s, + SSL3_ST_CW_FINISHED_A, + SSL3_ST_CW_FINISHED_B, + s->method-> + ssl3_enc->client_finished_label, + s->method-> + ssl3_enc->client_finished_label_len); if (ret <= 0) goto end; s->state = SSL3_ST_CW_FLUSH; @@ -777,140 +793,18 @@ int dtls1_connect(SSL *s) return (ret); } -int dtls1_client_hello(SSL *s) -{ - unsigned char *buf; - unsigned char *p, *d; - unsigned int i, j; - unsigned long l; - SSL_COMP *comp; - - buf = (unsigned char *)s->init_buf->data; - if (s->state == SSL3_ST_CW_CLNT_HELLO_A) { - SSL_SESSION *sess = s->session; - if ((s->session == NULL) || (s->session->ssl_version != s->version) || -#ifdef OPENSSL_NO_TLSEXT - !sess->session_id_length || -#else - (!sess->session_id_length && !sess->tlsext_tick) || -#endif - (s->session->not_resumable)) { - if (!ssl_get_new_session(s, 0)) - goto err; - } - /* else use the pre-loaded session */ - - p = s->s3->client_random; - - /* - * if client_random is initialized, reuse it, we are required to use - * same upon reply to HelloVerify - */ - for (i = 0; p[i] == '\0' && i < sizeof(s->s3->client_random); i++) ; - if (i == sizeof(s->s3->client_random)) - ssl_fill_hello_random(s, 0, p, sizeof(s->s3->client_random)); - - /* Do the message type and length last */ - d = p = &(buf[DTLS1_HM_HEADER_LENGTH]); - - *(p++) = s->version >> 8; - *(p++) = s->version & 0xff; - s->client_version = s->version; - - /* Random stuff */ - memcpy(p, s->s3->client_random, SSL3_RANDOM_SIZE); - p += SSL3_RANDOM_SIZE; - - /* Session ID */ - if (s->new_session) - i = 0; - else - i = s->session->session_id_length; - *(p++) = i; - if (i != 0) { - if (i > sizeof s->session->session_id) { - SSLerr(SSL_F_DTLS1_CLIENT_HELLO, ERR_R_INTERNAL_ERROR); - goto err; - } - memcpy(p, s->session->session_id, i); - p += i; - } - - /* cookie stuff */ - if (s->d1->cookie_len > sizeof(s->d1->cookie)) { - SSLerr(SSL_F_DTLS1_CLIENT_HELLO, ERR_R_INTERNAL_ERROR); - goto err; - } - *(p++) = s->d1->cookie_len; - memcpy(p, s->d1->cookie, s->d1->cookie_len); - p += s->d1->cookie_len; - - /* Ciphers supported */ - i = ssl_cipher_list_to_bytes(s, SSL_get_ciphers(s), &(p[2]), 0); - if (i == 0) { - SSLerr(SSL_F_DTLS1_CLIENT_HELLO, SSL_R_NO_CIPHERS_AVAILABLE); - goto err; - } - s2n(i, p); - p += i; - - /* COMPRESSION */ - if (s->ctx->comp_methods == NULL) - j = 0; - else - j = sk_SSL_COMP_num(s->ctx->comp_methods); - *(p++) = 1 + j; - for (i = 0; i < j; i++) { - comp = sk_SSL_COMP_value(s->ctx->comp_methods, i); - *(p++) = comp->id; - } - *(p++) = 0; /* Add the NULL method */ - -#ifndef OPENSSL_NO_TLSEXT - /* TLS extensions */ - if (ssl_prepare_clienthello_tlsext(s) <= 0) { - SSLerr(SSL_F_DTLS1_CLIENT_HELLO, SSL_R_CLIENTHELLO_TLSEXT); - goto err; - } - if ((p = - ssl_add_clienthello_tlsext(s, p, - buf + SSL3_RT_MAX_PLAIN_LENGTH)) == - NULL) { - SSLerr(SSL_F_DTLS1_CLIENT_HELLO, ERR_R_INTERNAL_ERROR); - goto err; - } -#endif - - l = (p - d); - d = buf; - - d = dtls1_set_message_header(s, d, SSL3_MT_CLIENT_HELLO, l, 0, l); - - s->state = SSL3_ST_CW_CLNT_HELLO_B; - /* number of bytes to write */ - s->init_num = p - buf; - s->init_off = 0; - - /* buffer the message to handle re-xmits */ - dtls1_buffer_message(s, 0); - } - - /* SSL3_ST_CW_CLNT_HELLO_B */ - return (dtls1_do_write(s, SSL3_RT_HANDSHAKE)); - err: - return (-1); -} - static int dtls1_get_hello_verify(SSL *s) { int n, al, ok = 0; unsigned char *data; unsigned int cookie_len; + s->first_packet = 1; n = s->method->ssl_get_message(s, DTLS1_ST_CR_HELLO_VERIFY_REQUEST_A, DTLS1_ST_CR_HELLO_VERIFY_REQUEST_B, -1, s->max_cert_list, &ok); + s->first_packet = 0; if (!ok) return ((int)n); @@ -922,13 +816,16 @@ static int dtls1_get_hello_verify(SSL *s) } data = (unsigned char *)s->init_msg; - - if ((data[0] != (s->version >> 8)) || (data[1] != (s->version & 0xff))) { +#if 0 + if (s->method->version != DTLS_ANY_VERSION && + ((data[0] != (s->version >> 8)) || (data[1] != (s->version & 0xff)))) + { SSLerr(SSL_F_DTLS1_GET_HELLO_VERIFY, SSL_R_WRONG_SSL_VERSION); s->version = (s->version & 0xff00) | data[1]; al = SSL_AD_PROTOCOL_VERSION; goto f_err; } +#endif data += 2; cookie_len = *(data++); @@ -947,746 +844,3 @@ static int dtls1_get_hello_verify(SSL *s) ssl3_send_alert(s, SSL3_AL_FATAL, al); return -1; } - -int dtls1_send_client_key_exchange(SSL *s) -{ - unsigned char *p, *d; - int n; - unsigned long alg_k; -#ifndef OPENSSL_NO_RSA - unsigned char *q; - EVP_PKEY *pkey = NULL; -#endif -#ifndef OPENSSL_NO_KRB5 - KSSL_ERR kssl_err; -#endif /* OPENSSL_NO_KRB5 */ -#ifndef OPENSSL_NO_ECDH - EC_KEY *clnt_ecdh = NULL; - const EC_POINT *srvr_ecpoint = NULL; - EVP_PKEY *srvr_pub_pkey = NULL; - unsigned char *encodedPoint = NULL; - int encoded_pt_len = 0; - BN_CTX *bn_ctx = NULL; -#endif - - if (s->state == SSL3_ST_CW_KEY_EXCH_A) { - d = (unsigned char *)s->init_buf->data; - p = &(d[DTLS1_HM_HEADER_LENGTH]); - - alg_k = s->s3->tmp.new_cipher->algorithm_mkey; - - /* Fool emacs indentation */ - if (0) { - } -#ifndef OPENSSL_NO_RSA - else if (alg_k & SSL_kRSA) { - RSA *rsa; - unsigned char tmp_buf[SSL_MAX_MASTER_KEY_LENGTH]; - - if (s->session->sess_cert == NULL) { - /* - * We should always have a server certificate with SSL_kRSA. - */ - SSLerr(SSL_F_DTLS1_SEND_CLIENT_KEY_EXCHANGE, - ERR_R_INTERNAL_ERROR); - goto err; - } - - if (s->session->sess_cert->peer_rsa_tmp != NULL) - rsa = s->session->sess_cert->peer_rsa_tmp; - else { - pkey = - X509_get_pubkey(s->session-> - sess_cert->peer_pkeys[SSL_PKEY_RSA_ENC]. - x509); - if ((pkey == NULL) || (pkey->type != EVP_PKEY_RSA) - || (pkey->pkey.rsa == NULL)) { - SSLerr(SSL_F_DTLS1_SEND_CLIENT_KEY_EXCHANGE, - ERR_R_INTERNAL_ERROR); - goto err; - } - rsa = pkey->pkey.rsa; - EVP_PKEY_free(pkey); - } - - tmp_buf[0] = s->client_version >> 8; - tmp_buf[1] = s->client_version & 0xff; - if (RAND_bytes(&(tmp_buf[2]), sizeof tmp_buf - 2) <= 0) - goto err; - - s->session->master_key_length = sizeof tmp_buf; - - q = p; - /* Fix buf for TLS and [incidentally] DTLS */ - if (s->version > SSL3_VERSION) - p += 2; - n = RSA_public_encrypt(sizeof tmp_buf, - tmp_buf, p, rsa, RSA_PKCS1_PADDING); -# ifdef PKCS1_CHECK - if (s->options & SSL_OP_PKCS1_CHECK_1) - p[1]++; - if (s->options & SSL_OP_PKCS1_CHECK_2) - tmp_buf[0] = 0x70; -# endif - if (n <= 0) { - SSLerr(SSL_F_DTLS1_SEND_CLIENT_KEY_EXCHANGE, - SSL_R_BAD_RSA_ENCRYPT); - goto err; - } - - /* Fix buf for TLS and [incidentally] DTLS */ - if (s->version > SSL3_VERSION) { - s2n(n, q); - n += 2; - } - - s->session->master_key_length = - s->method->ssl3_enc->generate_master_secret(s, - s-> - session->master_key, - tmp_buf, - sizeof tmp_buf); - OPENSSL_cleanse(tmp_buf, sizeof tmp_buf); - } -#endif -#ifndef OPENSSL_NO_KRB5 - else if (alg_k & SSL_kKRB5) { - krb5_error_code krb5rc; - KSSL_CTX *kssl_ctx = s->kssl_ctx; - /* krb5_data krb5_ap_req; */ - krb5_data *enc_ticket; - krb5_data authenticator, *authp = NULL; - EVP_CIPHER_CTX ciph_ctx; - const EVP_CIPHER *enc = NULL; - unsigned char iv[EVP_MAX_IV_LENGTH]; - unsigned char tmp_buf[SSL_MAX_MASTER_KEY_LENGTH]; - unsigned char epms[SSL_MAX_MASTER_KEY_LENGTH + EVP_MAX_IV_LENGTH]; - int padl, outl = sizeof(epms); - - EVP_CIPHER_CTX_init(&ciph_ctx); - -# ifdef KSSL_DEBUG - printf("ssl3_send_client_key_exchange(%lx & %lx)\n", - alg_k, SSL_kKRB5); -# endif /* KSSL_DEBUG */ - - authp = NULL; -# ifdef KRB5SENDAUTH - if (KRB5SENDAUTH) - authp = &authenticator; -# endif /* KRB5SENDAUTH */ - - krb5rc = kssl_cget_tkt(kssl_ctx, &enc_ticket, authp, &kssl_err); - enc = kssl_map_enc(kssl_ctx->enctype); - if (enc == NULL) - goto err; -# ifdef KSSL_DEBUG - { - printf("kssl_cget_tkt rtn %d\n", krb5rc); - if (krb5rc && kssl_err.text) - printf("kssl_cget_tkt kssl_err=%s\n", kssl_err.text); - } -# endif /* KSSL_DEBUG */ - - if (krb5rc) { - ssl3_send_alert(s, SSL3_AL_FATAL, SSL_AD_HANDSHAKE_FAILURE); - SSLerr(SSL_F_DTLS1_SEND_CLIENT_KEY_EXCHANGE, kssl_err.reason); - goto err; - } - - /*- - * 20010406 VRS - Earlier versions used KRB5 AP_REQ - ** in place of RFC 2712 KerberosWrapper, as in: - ** - ** Send ticket (copy to *p, set n = length) - ** n = krb5_ap_req.length; - ** memcpy(p, krb5_ap_req.data, krb5_ap_req.length); - ** if (krb5_ap_req.data) - ** kssl_krb5_free_data_contents(NULL,&krb5_ap_req); - ** - ** Now using real RFC 2712 KerberosWrapper - ** (Thanks to Simon Wilkinson ) - ** Note: 2712 "opaque" types are here replaced - ** with a 2-byte length followed by the value. - ** Example: - ** KerberosWrapper= xx xx asn1ticket 0 0 xx xx encpms - ** Where "xx xx" = length bytes. Shown here with - ** optional authenticator omitted. - */ - - /* KerberosWrapper.Ticket */ - s2n(enc_ticket->length, p); - memcpy(p, enc_ticket->data, enc_ticket->length); - p += enc_ticket->length; - n = enc_ticket->length + 2; - - /* KerberosWrapper.Authenticator */ - if (authp && authp->length) { - s2n(authp->length, p); - memcpy(p, authp->data, authp->length); - p += authp->length; - n += authp->length + 2; - - free(authp->data); - authp->data = NULL; - authp->length = 0; - } else { - s2n(0, p); /* null authenticator length */ - n += 2; - } - - if (RAND_bytes(tmp_buf, sizeof tmp_buf) <= 0) - goto err; - - /*- - * 20010420 VRS. Tried it this way; failed. - * EVP_EncryptInit_ex(&ciph_ctx,enc, NULL,NULL); - * EVP_CIPHER_CTX_set_key_length(&ciph_ctx, - * kssl_ctx->length); - * EVP_EncryptInit_ex(&ciph_ctx,NULL, key,iv); - */ - - memset(iv, 0, sizeof iv); /* per RFC 1510 */ - EVP_EncryptInit_ex(&ciph_ctx, enc, NULL, kssl_ctx->key, iv); - EVP_EncryptUpdate(&ciph_ctx, epms, &outl, tmp_buf, - sizeof tmp_buf); - EVP_EncryptFinal_ex(&ciph_ctx, &(epms[outl]), &padl); - outl += padl; - if (outl > (int)sizeof epms) { - SSLerr(SSL_F_DTLS1_SEND_CLIENT_KEY_EXCHANGE, - ERR_R_INTERNAL_ERROR); - goto err; - } - EVP_CIPHER_CTX_cleanup(&ciph_ctx); - - /* KerberosWrapper.EncryptedPreMasterSecret */ - s2n(outl, p); - memcpy(p, epms, outl); - p += outl; - n += outl + 2; - - s->session->master_key_length = - s->method->ssl3_enc->generate_master_secret(s, - s-> - session->master_key, - tmp_buf, - sizeof tmp_buf); - - OPENSSL_cleanse(tmp_buf, sizeof tmp_buf); - OPENSSL_cleanse(epms, outl); - } -#endif -#ifndef OPENSSL_NO_DH - else if (alg_k & (SSL_kEDH | SSL_kDHr | SSL_kDHd)) { - DH *dh_srvr, *dh_clnt; - - if (s->session->sess_cert == NULL) { - ssl3_send_alert(s, SSL3_AL_FATAL, SSL_AD_UNEXPECTED_MESSAGE); - SSLerr(SSL_F_DTLS1_SEND_CLIENT_KEY_EXCHANGE, - SSL_R_UNEXPECTED_MESSAGE); - goto err; - } - - if (s->session->sess_cert->peer_dh_tmp != NULL) - dh_srvr = s->session->sess_cert->peer_dh_tmp; - else { - /* we get them from the cert */ - ssl3_send_alert(s, SSL3_AL_FATAL, SSL_AD_HANDSHAKE_FAILURE); - SSLerr(SSL_F_DTLS1_SEND_CLIENT_KEY_EXCHANGE, - SSL_R_UNABLE_TO_FIND_DH_PARAMETERS); - goto err; - } - - /* generate a new random key */ - if ((dh_clnt = DHparams_dup(dh_srvr)) == NULL) { - SSLerr(SSL_F_DTLS1_SEND_CLIENT_KEY_EXCHANGE, ERR_R_DH_LIB); - goto err; - } - if (!DH_generate_key(dh_clnt)) { - SSLerr(SSL_F_DTLS1_SEND_CLIENT_KEY_EXCHANGE, ERR_R_DH_LIB); - goto err; - } - - /* - * use the 'p' output buffer for the DH key, but make sure to - * clear it out afterwards - */ - - n = DH_compute_key(p, dh_srvr->pub_key, dh_clnt); - - if (n <= 0) { - SSLerr(SSL_F_DTLS1_SEND_CLIENT_KEY_EXCHANGE, ERR_R_DH_LIB); - goto err; - } - - /* generate master key from the result */ - s->session->master_key_length = - s->method->ssl3_enc->generate_master_secret(s, - s-> - session->master_key, - p, n); - /* clean up */ - memset(p, 0, n); - - /* send off the data */ - n = BN_num_bytes(dh_clnt->pub_key); - s2n(n, p); - BN_bn2bin(dh_clnt->pub_key, p); - n += 2; - - DH_free(dh_clnt); - - /* perhaps clean things up a bit EAY EAY EAY EAY */ - } -#endif -#ifndef OPENSSL_NO_ECDH - else if (alg_k & (SSL_kEECDH | SSL_kECDHr | SSL_kECDHe)) { - const EC_GROUP *srvr_group = NULL; - EC_KEY *tkey; - int ecdh_clnt_cert = 0; - int field_size = 0; - - if (s->session->sess_cert == NULL) { - ssl3_send_alert(s, SSL3_AL_FATAL, SSL_AD_UNEXPECTED_MESSAGE); - SSLerr(SSL_F_DTLS1_SEND_CLIENT_KEY_EXCHANGE, - SSL_R_UNEXPECTED_MESSAGE); - goto err; - } - - /* - * Did we send out the client's ECDH share for use in premaster - * computation as part of client certificate? If so, set - * ecdh_clnt_cert to 1. - */ - if ((alg_k & (SSL_kECDHr | SSL_kECDHe)) && (s->cert != NULL)) { - /* - * XXX: For now, we do not support client authentication - * using ECDH certificates. To add such support, one needs to - * add code that checks for appropriate conditions and sets - * ecdh_clnt_cert to 1. For example, the cert have an ECC key - * on the same curve as the server's and the key should be - * authorized for key agreement. One also needs to add code - * in ssl3_connect to skip sending the certificate verify - * message. if ((s->cert->key->privatekey != NULL) && - * (s->cert->key->privatekey->type == EVP_PKEY_EC) && ...) - * ecdh_clnt_cert = 1; - */ - } - - if (s->session->sess_cert->peer_ecdh_tmp != NULL) { - tkey = s->session->sess_cert->peer_ecdh_tmp; - } else { - /* Get the Server Public Key from Cert */ - srvr_pub_pkey = - X509_get_pubkey(s->session-> - sess_cert->peer_pkeys[SSL_PKEY_ECC].x509); - if ((srvr_pub_pkey == NULL) - || (srvr_pub_pkey->type != EVP_PKEY_EC) - || (srvr_pub_pkey->pkey.ec == NULL)) { - SSLerr(SSL_F_DTLS1_SEND_CLIENT_KEY_EXCHANGE, - ERR_R_INTERNAL_ERROR); - goto err; - } - - tkey = srvr_pub_pkey->pkey.ec; - } - - srvr_group = EC_KEY_get0_group(tkey); - srvr_ecpoint = EC_KEY_get0_public_key(tkey); - - if ((srvr_group == NULL) || (srvr_ecpoint == NULL)) { - SSLerr(SSL_F_DTLS1_SEND_CLIENT_KEY_EXCHANGE, - ERR_R_INTERNAL_ERROR); - goto err; - } - - if ((clnt_ecdh = EC_KEY_new()) == NULL) { - SSLerr(SSL_F_DTLS1_SEND_CLIENT_KEY_EXCHANGE, - ERR_R_MALLOC_FAILURE); - goto err; - } - - if (!EC_KEY_set_group(clnt_ecdh, srvr_group)) { - SSLerr(SSL_F_DTLS1_SEND_CLIENT_KEY_EXCHANGE, ERR_R_EC_LIB); - goto err; - } - if (ecdh_clnt_cert) { - /* - * Reuse key info from our certificate We only need our - * private key to perform the ECDH computation. - */ - const BIGNUM *priv_key; - tkey = s->cert->key->privatekey->pkey.ec; - priv_key = EC_KEY_get0_private_key(tkey); - if (priv_key == NULL) { - SSLerr(SSL_F_DTLS1_SEND_CLIENT_KEY_EXCHANGE, - ERR_R_MALLOC_FAILURE); - goto err; - } - if (!EC_KEY_set_private_key(clnt_ecdh, priv_key)) { - SSLerr(SSL_F_DTLS1_SEND_CLIENT_KEY_EXCHANGE, - ERR_R_EC_LIB); - goto err; - } - } else { - /* Generate a new ECDH key pair */ - if (!(EC_KEY_generate_key(clnt_ecdh))) { - SSLerr(SSL_F_DTLS1_SEND_CLIENT_KEY_EXCHANGE, - ERR_R_ECDH_LIB); - goto err; - } - } - - /* - * use the 'p' output buffer for the ECDH key, but make sure to - * clear it out afterwards - */ - - field_size = EC_GROUP_get_degree(srvr_group); - if (field_size <= 0) { - SSLerr(SSL_F_DTLS1_SEND_CLIENT_KEY_EXCHANGE, ERR_R_ECDH_LIB); - goto err; - } - n = ECDH_compute_key(p, (field_size + 7) / 8, srvr_ecpoint, - clnt_ecdh, NULL); - if (n <= 0) { - SSLerr(SSL_F_DTLS1_SEND_CLIENT_KEY_EXCHANGE, ERR_R_ECDH_LIB); - goto err; - } - - /* generate master key from the result */ - s->session->master_key_length = - s->method->ssl3_enc->generate_master_secret(s, - s-> - session->master_key, - p, n); - - memset(p, 0, n); /* clean up */ - - if (ecdh_clnt_cert) { - /* Send empty client key exch message */ - n = 0; - } else { - /* - * First check the size of encoding and allocate memory - * accordingly. - */ - encoded_pt_len = - EC_POINT_point2oct(srvr_group, - EC_KEY_get0_public_key(clnt_ecdh), - POINT_CONVERSION_UNCOMPRESSED, - NULL, 0, NULL); - - encodedPoint = (unsigned char *) - OPENSSL_malloc(encoded_pt_len * sizeof(unsigned char)); - bn_ctx = BN_CTX_new(); - if ((encodedPoint == NULL) || (bn_ctx == NULL)) { - SSLerr(SSL_F_DTLS1_SEND_CLIENT_KEY_EXCHANGE, - ERR_R_MALLOC_FAILURE); - goto err; - } - - /* Encode the public key */ - n = EC_POINT_point2oct(srvr_group, - EC_KEY_get0_public_key(clnt_ecdh), - POINT_CONVERSION_UNCOMPRESSED, - encodedPoint, encoded_pt_len, bn_ctx); - - *p = n; /* length of encoded point */ - /* Encoded point will be copied here */ - p += 1; - /* copy the point */ - memcpy((unsigned char *)p, encodedPoint, n); - /* increment n to account for length field */ - n += 1; - } - - /* Free allocated memory */ - BN_CTX_free(bn_ctx); - if (encodedPoint != NULL) - OPENSSL_free(encodedPoint); - if (clnt_ecdh != NULL) - EC_KEY_free(clnt_ecdh); - EVP_PKEY_free(srvr_pub_pkey); - } -#endif /* !OPENSSL_NO_ECDH */ - -#ifndef OPENSSL_NO_PSK - else if (alg_k & SSL_kPSK) { - char identity[PSK_MAX_IDENTITY_LEN]; - unsigned char *t = NULL; - unsigned char psk_or_pre_ms[PSK_MAX_PSK_LEN * 2 + 4]; - unsigned int pre_ms_len = 0, psk_len = 0; - int psk_err = 1; - - n = 0; - if (s->psk_client_callback == NULL) { - SSLerr(SSL_F_DTLS1_SEND_CLIENT_KEY_EXCHANGE, - SSL_R_PSK_NO_CLIENT_CB); - goto err; - } - - psk_len = s->psk_client_callback(s, s->ctx->psk_identity_hint, - identity, PSK_MAX_IDENTITY_LEN, - psk_or_pre_ms, - sizeof(psk_or_pre_ms)); - if (psk_len > PSK_MAX_PSK_LEN) { - SSLerr(SSL_F_DTLS1_SEND_CLIENT_KEY_EXCHANGE, - ERR_R_INTERNAL_ERROR); - goto psk_err; - } else if (psk_len == 0) { - SSLerr(SSL_F_DTLS1_SEND_CLIENT_KEY_EXCHANGE, - SSL_R_PSK_IDENTITY_NOT_FOUND); - goto psk_err; - } - - /* create PSK pre_master_secret */ - pre_ms_len = 2 + psk_len + 2 + psk_len; - t = psk_or_pre_ms; - memmove(psk_or_pre_ms + psk_len + 4, psk_or_pre_ms, psk_len); - s2n(psk_len, t); - memset(t, 0, psk_len); - t += psk_len; - s2n(psk_len, t); - - if (s->session->psk_identity_hint != NULL) - OPENSSL_free(s->session->psk_identity_hint); - s->session->psk_identity_hint = - BUF_strdup(s->ctx->psk_identity_hint); - if (s->ctx->psk_identity_hint != NULL - && s->session->psk_identity_hint == NULL) { - SSLerr(SSL_F_DTLS1_SEND_CLIENT_KEY_EXCHANGE, - ERR_R_MALLOC_FAILURE); - goto psk_err; - } - - if (s->session->psk_identity != NULL) - OPENSSL_free(s->session->psk_identity); - s->session->psk_identity = BUF_strdup(identity); - if (s->session->psk_identity == NULL) { - SSLerr(SSL_F_DTLS1_SEND_CLIENT_KEY_EXCHANGE, - ERR_R_MALLOC_FAILURE); - goto psk_err; - } - - s->session->master_key_length = - s->method->ssl3_enc->generate_master_secret(s, - s-> - session->master_key, - psk_or_pre_ms, - pre_ms_len); - n = strlen(identity); - s2n(n, p); - memcpy(p, identity, n); - n += 2; - psk_err = 0; - psk_err: - OPENSSL_cleanse(identity, PSK_MAX_IDENTITY_LEN); - OPENSSL_cleanse(psk_or_pre_ms, sizeof(psk_or_pre_ms)); - if (psk_err != 0) { - ssl3_send_alert(s, SSL3_AL_FATAL, SSL_AD_HANDSHAKE_FAILURE); - goto err; - } - } -#endif - else { - ssl3_send_alert(s, SSL3_AL_FATAL, SSL_AD_HANDSHAKE_FAILURE); - SSLerr(SSL_F_DTLS1_SEND_CLIENT_KEY_EXCHANGE, - ERR_R_INTERNAL_ERROR); - goto err; - } - - d = dtls1_set_message_header(s, d, - SSL3_MT_CLIENT_KEY_EXCHANGE, n, 0, n); - /*- - *(d++)=SSL3_MT_CLIENT_KEY_EXCHANGE; - l2n3(n,d); - l2n(s->d1->handshake_write_seq,d); - s->d1->handshake_write_seq++; - */ - - s->state = SSL3_ST_CW_KEY_EXCH_B; - /* number of bytes to write */ - s->init_num = n + DTLS1_HM_HEADER_LENGTH; - s->init_off = 0; - - /* buffer the message to handle re-xmits */ - dtls1_buffer_message(s, 0); - } - - /* SSL3_ST_CW_KEY_EXCH_B */ - return (dtls1_do_write(s, SSL3_RT_HANDSHAKE)); - err: -#ifndef OPENSSL_NO_ECDH - BN_CTX_free(bn_ctx); - if (encodedPoint != NULL) - OPENSSL_free(encodedPoint); - if (clnt_ecdh != NULL) - EC_KEY_free(clnt_ecdh); - EVP_PKEY_free(srvr_pub_pkey); -#endif - return (-1); -} - -int dtls1_send_client_verify(SSL *s) -{ - unsigned char *p, *d; - unsigned char data[MD5_DIGEST_LENGTH + SHA_DIGEST_LENGTH]; - EVP_PKEY *pkey; -#ifndef OPENSSL_NO_RSA - unsigned u = 0; -#endif - unsigned long n; -#if !defined(OPENSSL_NO_DSA) || !defined(OPENSSL_NO_ECDSA) - int j; -#endif - - if (s->state == SSL3_ST_CW_CERT_VRFY_A) { - d = (unsigned char *)s->init_buf->data; - p = &(d[DTLS1_HM_HEADER_LENGTH]); - pkey = s->cert->key->privatekey; - - s->method->ssl3_enc->cert_verify_mac(s, - NID_sha1, - &(data[MD5_DIGEST_LENGTH])); - -#ifndef OPENSSL_NO_RSA - if (pkey->type == EVP_PKEY_RSA) { - s->method->ssl3_enc->cert_verify_mac(s, NID_md5, &(data[0])); - if (RSA_sign(NID_md5_sha1, data, - MD5_DIGEST_LENGTH + SHA_DIGEST_LENGTH, - &(p[2]), &u, pkey->pkey.rsa) <= 0) { - SSLerr(SSL_F_DTLS1_SEND_CLIENT_VERIFY, ERR_R_RSA_LIB); - goto err; - } - s2n(u, p); - n = u + 2; - } else -#endif -#ifndef OPENSSL_NO_DSA - if (pkey->type == EVP_PKEY_DSA) { - if (!DSA_sign(pkey->save_type, - &(data[MD5_DIGEST_LENGTH]), - SHA_DIGEST_LENGTH, &(p[2]), - (unsigned int *)&j, pkey->pkey.dsa)) { - SSLerr(SSL_F_DTLS1_SEND_CLIENT_VERIFY, ERR_R_DSA_LIB); - goto err; - } - s2n(j, p); - n = j + 2; - } else -#endif -#ifndef OPENSSL_NO_ECDSA - if (pkey->type == EVP_PKEY_EC) { - if (!ECDSA_sign(pkey->save_type, - &(data[MD5_DIGEST_LENGTH]), - SHA_DIGEST_LENGTH, &(p[2]), - (unsigned int *)&j, pkey->pkey.ec)) { - SSLerr(SSL_F_DTLS1_SEND_CLIENT_VERIFY, ERR_R_ECDSA_LIB); - goto err; - } - s2n(j, p); - n = j + 2; - } else -#endif - { - SSLerr(SSL_F_DTLS1_SEND_CLIENT_VERIFY, ERR_R_INTERNAL_ERROR); - goto err; - } - - d = dtls1_set_message_header(s, d, - SSL3_MT_CERTIFICATE_VERIFY, n, 0, n); - - s->init_num = (int)n + DTLS1_HM_HEADER_LENGTH; - s->init_off = 0; - - /* buffer the message to handle re-xmits */ - dtls1_buffer_message(s, 0); - - s->state = SSL3_ST_CW_CERT_VRFY_B; - } - - /* s->state = SSL3_ST_CW_CERT_VRFY_B */ - return (dtls1_do_write(s, SSL3_RT_HANDSHAKE)); - err: - return (-1); -} - -int dtls1_send_client_certificate(SSL *s) -{ - X509 *x509 = NULL; - EVP_PKEY *pkey = NULL; - int i; - unsigned long l; - - if (s->state == SSL3_ST_CW_CERT_A) { - if ((s->cert == NULL) || - (s->cert->key->x509 == NULL) || - (s->cert->key->privatekey == NULL)) - s->state = SSL3_ST_CW_CERT_B; - else - s->state = SSL3_ST_CW_CERT_C; - } - - /* We need to get a client cert */ - if (s->state == SSL3_ST_CW_CERT_B) { - /* - * If we get an error, we need to ssl->rwstate=SSL_X509_LOOKUP; - * return(-1); We then get retied later - */ - i = 0; - i = ssl_do_client_cert_cb(s, &x509, &pkey); - if (i < 0) { - s->rwstate = SSL_X509_LOOKUP; - return (-1); - } - s->rwstate = SSL_NOTHING; - if ((i == 1) && (pkey != NULL) && (x509 != NULL)) { - s->state = SSL3_ST_CW_CERT_B; - if (!SSL_use_certificate(s, x509) || !SSL_use_PrivateKey(s, pkey)) - i = 0; - } else if (i == 1) { - i = 0; - SSLerr(SSL_F_DTLS1_SEND_CLIENT_CERTIFICATE, - SSL_R_BAD_DATA_RETURNED_BY_CALLBACK); - } - - if (x509 != NULL) - X509_free(x509); - if (pkey != NULL) - EVP_PKEY_free(pkey); - if (i == 0) { - if (s->version == SSL3_VERSION) { - s->s3->tmp.cert_req = 0; - ssl3_send_alert(s, SSL3_AL_WARNING, SSL_AD_NO_CERTIFICATE); - return (1); - } else { - s->s3->tmp.cert_req = 2; - } - } - - /* Ok, we have a cert */ - s->state = SSL3_ST_CW_CERT_C; - } - - if (s->state == SSL3_ST_CW_CERT_C) { - s->state = SSL3_ST_CW_CERT_D; - l = dtls1_output_cert_chain(s, - (s->s3->tmp.cert_req == - 2) ? NULL : s->cert->key->x509); - if (!l) { - SSLerr(SSL_F_DTLS1_SEND_CLIENT_CERTIFICATE, ERR_R_INTERNAL_ERROR); - ssl3_send_alert(s, SSL3_AL_FATAL, SSL_AD_INTERNAL_ERROR); - return 0; - } - s->init_num = (int)l; - s->init_off = 0; - - /* set header called by dtls1_output_cert_chain() */ - - /* buffer the message to handle re-xmits */ - dtls1_buffer_message(s, 0); - } - /* SSL3_ST_CW_CERT_D */ - return (dtls1_do_write(s, SSL3_RT_HANDSHAKE)); -} diff --git a/deps/openssl/openssl/ssl/d1_enc.c b/deps/openssl/openssl/ssl/d1_enc.c deleted file mode 100644 index e876364f2c89df..00000000000000 --- a/deps/openssl/openssl/ssl/d1_enc.c +++ /dev/null @@ -1,251 +0,0 @@ -/* ssl/d1_enc.c */ -/* - * DTLS implementation written by Nagendra Modadugu - * (nagendra@cs.stanford.edu) for the OpenSSL project 2005. - */ -/* ==================================================================== - * Copyright (c) 1998-2005 The OpenSSL Project. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * - * 3. All advertising materials mentioning features or use of this - * software must display the following acknowledgment: - * "This product includes software developed by the OpenSSL Project - * for use in the OpenSSL Toolkit. (http://www.openssl.org/)" - * - * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to - * endorse or promote products derived from this software without - * prior written permission. For written permission, please contact - * openssl-core@openssl.org. - * - * 5. Products derived from this software may not be called "OpenSSL" - * nor may "OpenSSL" appear in their names without prior written - * permission of the OpenSSL Project. - * - * 6. Redistributions of any form whatsoever must retain the following - * acknowledgment: - * "This product includes software developed by the OpenSSL Project - * for use in the OpenSSL Toolkit (http://www.openssl.org/)" - * - * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY - * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR - * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, - * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED - * OF THE POSSIBILITY OF SUCH DAMAGE. - * ==================================================================== - * - * This product includes cryptographic software written by Eric Young - * (eay@cryptsoft.com). This product includes software written by Tim - * Hudson (tjh@cryptsoft.com). - * - */ -/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) - * All rights reserved. - * - * This package is an SSL implementation written - * by Eric Young (eay@cryptsoft.com). - * The implementation was written so as to conform with Netscapes SSL. - * - * This library is free for commercial and non-commercial use as long as - * the following conditions are aheared to. The following conditions - * apply to all code found in this distribution, be it the RC4, RSA, - * lhash, DES, etc., code; not just the SSL code. The SSL documentation - * included with this distribution is covered by the same copyright terms - * except that the holder is Tim Hudson (tjh@cryptsoft.com). - * - * Copyright remains Eric Young's, and as such any Copyright notices in - * the code are not to be removed. - * If this package is used in a product, Eric Young should be given attribution - * as the author of the parts of the library used. - * This can be in the form of a textual message at program startup or - * in documentation (online or textual) provided with the package. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * "This product includes cryptographic software written by - * Eric Young (eay@cryptsoft.com)" - * The word 'cryptographic' can be left out if the rouines from the library - * being used are not cryptographic related :-). - * 4. If you include any Windows specific code (or a derivative thereof) from - * the apps directory (application code) you must include an acknowledgement: - * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)" - * - * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * The licence and distribution terms for any publically available version or - * derivative of this code cannot be changed. i.e. this code cannot simply be - * copied and put under another distribution licence - * [including the GNU Public Licence.] - */ - -#include -#include "ssl_locl.h" -#ifndef OPENSSL_NO_COMP -# include -#endif -#include -#include -#include -#include -#ifdef KSSL_DEBUG -# include -#endif - -/*- - * dtls1_enc encrypts/decrypts the record in |s->wrec| / |s->rrec|, respectively. - * - * Returns: - * 0: (in non-constant time) if the record is publically invalid (i.e. too - * short etc). - * 1: if the record's padding is valid / the encryption was successful. - * -1: if the record's padding/AEAD-authenticator is invalid or, if sending, - * an internal error occured. - */ -int dtls1_enc(SSL *s, int send) -{ - SSL3_RECORD *rec; - EVP_CIPHER_CTX *ds; - unsigned long l; - int bs, i, j, k, mac_size = 0; - const EVP_CIPHER *enc; - - if (send) { - if (EVP_MD_CTX_md(s->write_hash)) { - mac_size = EVP_MD_CTX_size(s->write_hash); - if (mac_size < 0) - return -1; - } - ds = s->enc_write_ctx; - rec = &(s->s3->wrec); - if (s->enc_write_ctx == NULL) - enc = NULL; - else { - enc = EVP_CIPHER_CTX_cipher(s->enc_write_ctx); - if (rec->data != rec->input) - /* we can't write into the input stream */ - fprintf(stderr, "%s:%d: rec->data != rec->input\n", - __FILE__, __LINE__); - else if (EVP_CIPHER_block_size(ds->cipher) > 1) { - if (RAND_bytes(rec->input, EVP_CIPHER_block_size(ds->cipher)) - <= 0) - return -1; - } - } - } else { - if (EVP_MD_CTX_md(s->read_hash)) { - mac_size = EVP_MD_CTX_size(s->read_hash); - OPENSSL_assert(mac_size >= 0); - } - ds = s->enc_read_ctx; - rec = &(s->s3->rrec); - if (s->enc_read_ctx == NULL) - enc = NULL; - else - enc = EVP_CIPHER_CTX_cipher(s->enc_read_ctx); - } - -#ifdef KSSL_DEBUG - printf("dtls1_enc(%d)\n", send); -#endif /* KSSL_DEBUG */ - - if ((s->session == NULL) || (ds == NULL) || (enc == NULL)) { - memmove(rec->data, rec->input, rec->length); - rec->input = rec->data; - } else { - l = rec->length; - bs = EVP_CIPHER_block_size(ds->cipher); - - if ((bs != 1) && send) { - i = bs - ((int)l % bs); - - /* Add weird padding of upto 256 bytes */ - - /* we need to add 'i' padding bytes of value j */ - j = i - 1; - if (s->options & SSL_OP_TLS_BLOCK_PADDING_BUG) { - if (s->s3->flags & TLS1_FLAGS_TLS_PADDING_BUG) - j++; - } - for (k = (int)l; k < (int)(l + i); k++) - rec->input[k] = j; - l += i; - rec->length += i; - } -#ifdef KSSL_DEBUG - { - unsigned long ui; - printf("EVP_Cipher(ds=%p,rec->data=%p,rec->input=%p,l=%ld) ==>\n", - ds, rec->data, rec->input, l); - printf - ("\tEVP_CIPHER_CTX: %d buf_len, %d key_len [%d %d], %d iv_len\n", - ds->buf_len, ds->cipher->key_len, DES_KEY_SZ, - DES_SCHEDULE_SZ, ds->cipher->iv_len); - printf("\t\tIV: "); - for (i = 0; i < ds->cipher->iv_len; i++) - printf("%02X", ds->iv[i]); - printf("\n"); - printf("\trec->input="); - for (ui = 0; ui < l; ui++) - printf(" %02x", rec->input[ui]); - printf("\n"); - } -#endif /* KSSL_DEBUG */ - - if (!send) { - if (l == 0 || l % bs != 0) - return 0; - } - - if (EVP_Cipher(ds, rec->data, rec->input, l) < 1) - return -1; - -#ifdef KSSL_DEBUG - { - unsigned long i; - printf("\trec->data="); - for (i = 0; i < l; i++) - printf(" %02x", rec->data[i]); - printf("\n"); - } -#endif /* KSSL_DEBUG */ - - if ((bs != 1) && !send) - return tls1_cbc_remove_padding(s, rec, bs, mac_size); - } - return (1); -} diff --git a/deps/openssl/openssl/ssl/d1_lib.c b/deps/openssl/openssl/ssl/d1_lib.c index 94acb15de63898..ee78921ba8b0d3 100644 --- a/deps/openssl/openssl/ssl/d1_lib.c +++ b/deps/openssl/openssl/ssl/d1_lib.c @@ -62,16 +62,18 @@ #include #include "ssl_locl.h" -#if defined(OPENSSL_SYS_WIN32) || defined(OPENSSL_SYS_VMS) +#if defined(OPENSSL_SYS_VMS) # include #endif static void get_current_time(struct timeval *t); +static void dtls1_set_handshake_header(SSL *s, int type, unsigned long len); +static int dtls1_handshake_write(SSL *s); const char dtls1_version_str[] = "DTLSv1" OPENSSL_VERSION_PTEXT; int dtls1_listen(SSL *s, struct sockaddr *client); SSL3_ENC_METHOD DTLSv1_enc_data = { - dtls1_enc, + tls1_enc, tls1_mac, tls1_setup_key_block, tls1_generate_master_secret, @@ -83,6 +85,30 @@ SSL3_ENC_METHOD DTLSv1_enc_data = { TLS_MD_SERVER_FINISH_CONST, TLS_MD_SERVER_FINISH_CONST_SIZE, tls1_alert_code, tls1_export_keying_material, + SSL_ENC_FLAG_DTLS | SSL_ENC_FLAG_EXPLICIT_IV, + DTLS1_HM_HEADER_LENGTH, + dtls1_set_handshake_header, + dtls1_handshake_write +}; + +SSL3_ENC_METHOD DTLSv1_2_enc_data = { + tls1_enc, + tls1_mac, + tls1_setup_key_block, + tls1_generate_master_secret, + tls1_change_cipher_state, + tls1_final_finish_mac, + TLS1_FINISH_MAC_LENGTH, + tls1_cert_verify_mac, + TLS_MD_CLIENT_FINISH_CONST, TLS_MD_CLIENT_FINISH_CONST_SIZE, + TLS_MD_SERVER_FINISH_CONST, TLS_MD_SERVER_FINISH_CONST_SIZE, + tls1_alert_code, + tls1_export_keying_material, + SSL_ENC_FLAG_DTLS | SSL_ENC_FLAG_EXPLICIT_IV | SSL_ENC_FLAG_SIGALGS + | SSL_ENC_FLAG_SHA256_PRF | SSL_ENC_FLAG_TLS1_2_CIPHERS, + DTLS1_HM_HEADER_LENGTH, + dtls1_set_handshake_header, + dtls1_handshake_write }; long dtls1_default_timeout(void) @@ -244,9 +270,11 @@ void dtls1_clear(SSL *s) ssl3_clear(s); if (s->options & SSL_OP_CISCO_ANYCONNECT) - s->version = DTLS1_BAD_VER; + s->client_version = s->version = DTLS1_BAD_VER; + else if (s->method->version == DTLS_ANY_VERSION) + s->version = DTLS1_2_VERSION; else - s->version = DTLS1_VERSION; + s->version = s->method->version; } long dtls1_ctrl(SSL *s, int cmd, long larg, void *parg) @@ -271,14 +299,22 @@ long dtls1_ctrl(SSL *s, int cmd, long larg, void *parg) * highest enabled version (according to s->ctx->method, as version * negotiation may have changed s->method). */ -#if DTLS_MAX_VERSION != DTLS1_VERSION -# error Code needs update for DTLS_method() support beyond DTLS1_VERSION. -#endif + if (s->version == s->ctx->method->version) + return 1; /* - * Just one protocol version is supported so far; fail closed if the - * version is not as expected. + * Apparently we're using a version-flexible SSL_METHOD (not at its + * highest protocol version). */ - return s->version == DTLS_MAX_VERSION; + if (s->ctx->method->version == DTLS_method()->version) { +#if DTLS_MAX_VERSION != DTLS1_2_VERSION +# error Code needs update for DTLS_method() support beyond DTLS1_2_VERSION. +#endif + if (!(s->options & SSL_OP_NO_DTLSv1_2)) + return s->version == DTLS1_2_VERSION; + if (!(s->options & SSL_OP_NO_DTLSv1)) + return s->version == DTLS1_VERSION; + } + return 0; /* Unexpected state; fail closed. */ case DTLS_CTRL_SET_LINK_MTU: if (larg < (long)dtls1_link_min_mtu()) return 0; @@ -477,11 +513,22 @@ int dtls1_handle_timeout(SSL *s) static void get_current_time(struct timeval *t) { -#ifdef OPENSSL_SYS_WIN32 - struct _timeb tb; - _ftime(&tb); - t->tv_sec = (long)tb.time; - t->tv_usec = (long)tb.millitm * 1000; +#if defined(_WIN32) + SYSTEMTIME st; + union { + unsigned __int64 ul; + FILETIME ft; + } now; + + GetSystemTime(&st); + SystemTimeToFileTime(&st, &now.ft); +# ifdef __MINGW32__ + now.ul -= 116444736000000000ULL; +# else + now.ul -= 116444736000000000UI64; /* re-bias to 1/1/1970 */ +# endif + t->tv_sec = (long)(now.ul / 10000000); + t->tv_usec = ((int)(now.ul % 10000000)) / 10; #elif defined(OPENSSL_SYS_VMS) struct timeb tb; ftime(&tb); @@ -496,6 +543,9 @@ int dtls1_listen(SSL *s, struct sockaddr *client) { int ret; + /* Ensure there is no state left over from a previous invocation */ + SSL_clear(s); + SSL_set_options(s, SSL_OP_COOKIE_EXCHANGE); s->d1->listen = 1; @@ -506,3 +556,18 @@ int dtls1_listen(SSL *s, struct sockaddr *client) (void)BIO_dgram_get_peer(SSL_get_rbio(s), client); return 1; } + +static void dtls1_set_handshake_header(SSL *s, int htype, unsigned long len) +{ + unsigned char *p = (unsigned char *)s->init_buf->data; + dtls1_set_message_header(s, p, htype, len, 0, len); + s->init_num = (int)len + DTLS1_HM_HEADER_LENGTH; + s->init_off = 0; + /* Buffer the message to handle re-xmits */ + dtls1_buffer_message(s, 0); +} + +static int dtls1_handshake_write(SSL *s) +{ + return dtls1_do_write(s, SSL3_RT_HANDSHAKE); +} diff --git a/deps/openssl/openssl/ssl/d1_meth.c b/deps/openssl/openssl/ssl/d1_meth.c index aaa718c92718aa..734077493f843b 100644 --- a/deps/openssl/openssl/ssl/d1_meth.c +++ b/deps/openssl/openssl/ssl/d1_meth.c @@ -66,9 +66,23 @@ static const SSL_METHOD *dtls1_get_method(int ver) { if (ver == DTLS1_VERSION) return (DTLSv1_method()); + else if (ver == DTLS1_2_VERSION) + return (DTLSv1_2_method()); else return (NULL); } -IMPLEMENT_dtls1_meth_func(DTLSv1_method, - dtls1_accept, dtls1_connect, dtls1_get_method) +IMPLEMENT_dtls1_meth_func(DTLS1_VERSION, + DTLSv1_method, + dtls1_accept, + dtls1_connect, dtls1_get_method, DTLSv1_enc_data) + + IMPLEMENT_dtls1_meth_func(DTLS1_2_VERSION, + DTLSv1_2_method, + dtls1_accept, + dtls1_connect, dtls1_get_method, DTLSv1_2_enc_data) + + IMPLEMENT_dtls1_meth_func(DTLS_ANY_VERSION, + DTLS_method, + dtls1_accept, + dtls1_connect, dtls1_get_method, DTLSv1_2_enc_data) diff --git a/deps/openssl/openssl/ssl/d1_pkt.c b/deps/openssl/openssl/ssl/d1_pkt.c index 7b49a7dd7a6db9..940ca692790271 100644 --- a/deps/openssl/openssl/ssl/d1_pkt.c +++ b/deps/openssl/openssl/ssl/d1_pkt.c @@ -612,6 +612,10 @@ int dtls1_get_record(SSL *s) p = s->packet; + if (s->msg_callback) + s->msg_callback(0, 0, SSL3_RT_HEADER, p, DTLS1_RT_HEADER_LENGTH, + s, s->msg_callback_arg); + /* Pull apart the header into the DTLS1_RECORD */ rr->type = *(p++); ssl_major = *(p++); @@ -1488,10 +1492,10 @@ int do_dtls1_write(SSL *s, int type, const unsigned char *buf, unsigned char *p, *pseq; int i, mac_size, clear = 0; int prefix_len = 0; + int eivlen; SSL3_RECORD *wr; SSL3_BUFFER *wb; SSL_SESSION *sess; - int bs; /* * first check if there is a SSL3_BUFFER still being written out. This @@ -1570,27 +1574,41 @@ int do_dtls1_write(SSL *s, int type, const unsigned char *buf, *(p++) = type & 0xff; wr->type = type; - - *(p++) = (s->version >> 8); - *(p++) = s->version & 0xff; + /* + * Special case: for hello verify request, client version 1.0 and we + * haven't decided which version to use yet send back using version 1.0 + * header: otherwise some clients will ignore it. + */ + if (s->method->version == DTLS_ANY_VERSION) { + *(p++) = DTLS1_VERSION >> 8; + *(p++) = DTLS1_VERSION & 0xff; + } else { + *(p++) = s->version >> 8; + *(p++) = s->version & 0xff; + } /* field where we are to write out packet epoch, seq num and len */ pseq = p; p += 10; - /* lets setup the record stuff. */ - - /* - * Make space for the explicit IV in case of CBC. (this is a bit of a - * boundary violation, but what the heck). - */ - if (s->enc_write_ctx && - (EVP_CIPHER_mode(s->enc_write_ctx->cipher) & EVP_CIPH_CBC_MODE)) - bs = EVP_CIPHER_block_size(s->enc_write_ctx->cipher); - else - bs = 0; + /* Explicit IV length, block ciphers appropriate version flag */ + if (s->enc_write_ctx) { + int mode = EVP_CIPHER_CTX_mode(s->enc_write_ctx); + if (mode == EVP_CIPH_CBC_MODE) { + eivlen = EVP_CIPHER_CTX_iv_length(s->enc_write_ctx); + if (eivlen <= 1) + eivlen = 0; + } + /* Need explicit part of IV for GCM mode */ + else if (mode == EVP_CIPH_GCM_MODE) + eivlen = EVP_GCM_TLS_EXPLICIT_IV_LEN; + else + eivlen = 0; + } else + eivlen = 0; - wr->data = p + bs; /* make room for IV in case of CBC */ + /* lets setup the record stuff. */ + wr->data = p + eivlen; /* make room for IV in case of CBC */ wr->length = (int)len; wr->input = (unsigned char *)buf; @@ -1616,7 +1634,7 @@ int do_dtls1_write(SSL *s, int type, const unsigned char *buf, */ if (mac_size != 0) { - if (s->method->ssl3_enc->mac(s, &(p[wr->length + bs]), 1) < 0) + if (s->method->ssl3_enc->mac(s, &(p[wr->length + eivlen]), 1) < 0) goto err; wr->length += mac_size; } @@ -1625,14 +1643,8 @@ int do_dtls1_write(SSL *s, int type, const unsigned char *buf, wr->input = p; wr->data = p; - /* ssl3_enc can only have an error on read */ - if (bs) { /* bs != 0 in case of CBC */ - RAND_pseudo_bytes(p, bs); - /* - * master IV and last CBC residue stand for the rest of randomness - */ - wr->length += bs; - } + if (eivlen) + wr->length += eivlen; if (s->method->ssl3_enc->enc(s, 1) < 1) goto err; @@ -1656,6 +1668,10 @@ int do_dtls1_write(SSL *s, int type, const unsigned char *buf, pseq += 6; s2n(wr->length, pseq); + if (s->msg_callback) + s->msg_callback(1, 0, SSL3_RT_HEADER, pseq - DTLS1_RT_HEADER_LENGTH, + DTLS1_RT_HEADER_LENGTH, s, s->msg_callback_arg); + /* * we should now have wr->data pointing to the encrypted data, which is * wr->length long diff --git a/deps/openssl/openssl/ssl/d1_srtp.c b/deps/openssl/openssl/ssl/d1_srtp.c index 6c6e07ce902bd0..64d0634a381ead 100644 --- a/deps/openssl/openssl/ssl/d1_srtp.c +++ b/deps/openssl/openssl/ssl/d1_srtp.c @@ -117,11 +117,10 @@ #include #include #include "ssl_locl.h" +#include "srtp.h" #ifndef OPENSSL_NO_SRTP -# include "srtp.h" - static SRTP_PROTECTION_PROFILE srtp_known_profiles[] = { { "SRTP_AES128_CM_SHA1_80", diff --git a/deps/openssl/openssl/ssl/d1_srvr.c b/deps/openssl/openssl/ssl/d1_srvr.c index c4ec9fe1084518..eafa0127b7c758 100644 --- a/deps/openssl/openssl/ssl/d1_srvr.c +++ b/deps/openssl/openssl/ssl/d1_srvr.c @@ -133,13 +133,29 @@ static const SSL_METHOD *dtls1_get_server_method(int ver) { if (ver == DTLS1_VERSION) return (DTLSv1_server_method()); + else if (ver == DTLS1_2_VERSION) + return (DTLSv1_2_server_method()); else return (NULL); } -IMPLEMENT_dtls1_meth_func(DTLSv1_server_method, +IMPLEMENT_dtls1_meth_func(DTLS1_VERSION, + DTLSv1_server_method, dtls1_accept, - ssl_undefined_function, dtls1_get_server_method) + ssl_undefined_function, + dtls1_get_server_method, DTLSv1_enc_data) + + IMPLEMENT_dtls1_meth_func(DTLS1_2_VERSION, + DTLSv1_2_server_method, + dtls1_accept, + ssl_undefined_function, + dtls1_get_server_method, DTLSv1_2_enc_data) + + IMPLEMENT_dtls1_meth_func(DTLS_ANY_VERSION, + DTLS_server_method, + dtls1_accept, + ssl_undefined_function, + dtls1_get_server_method, DTLSv1_2_enc_data) int dtls1_accept(SSL *s) { @@ -280,7 +296,7 @@ int dtls1_accept(SSL *s) s->shutdown = 0; dtls1_clear_record_buffer(s); dtls1_start_timer(s); - ret = dtls1_send_hello_request(s); + ret = ssl3_send_hello_request(s); if (ret <= 0) goto end; s->s3->tmp.next_state = SSL3_ST_SR_CLNT_HELLO_A; @@ -388,7 +404,7 @@ int dtls1_accept(SSL *s) case SSL3_ST_SW_SRVR_HELLO_B: s->renegotiate = 2; dtls1_start_timer(s); - ret = dtls1_send_server_hello(s); + ret = ssl3_send_server_hello(s); if (ret <= 0) goto end; @@ -427,7 +443,7 @@ int dtls1_accept(SSL *s) if (!(s->s3->tmp.new_cipher->algorithm_auth & SSL_aNULL) && !(s->s3->tmp.new_cipher->algorithm_mkey & SSL_kPSK)) { dtls1_start_timer(s); - ret = dtls1_send_server_certificate(s); + ret = ssl3_send_server_certificate(s); if (ret <= 0) goto end; #ifndef OPENSSL_NO_TLSEXT @@ -483,7 +499,7 @@ int dtls1_accept(SSL *s) ) ) { dtls1_start_timer(s); - ret = dtls1_send_server_key_exchange(s); + ret = ssl3_send_server_key_exchange(s); if (ret <= 0) goto end; } else @@ -537,7 +553,7 @@ int dtls1_accept(SSL *s) } else { s->s3->tmp.cert_request = 1; dtls1_start_timer(s); - ret = dtls1_send_certificate_request(s); + ret = ssl3_send_certificate_request(s); if (ret <= 0) goto end; #ifndef NETSCAPE_HANG_BUG @@ -565,7 +581,7 @@ int dtls1_accept(SSL *s) case SSL3_ST_SW_SRVR_DONE_A: case SSL3_ST_SW_SRVR_DONE_B: dtls1_start_timer(s); - ret = dtls1_send_server_done(s); + ret = ssl3_send_server_done(s); if (ret <= 0) goto end; s->s3->tmp.next_state = SSL3_ST_SR_CERT_A; @@ -593,22 +609,13 @@ int dtls1_accept(SSL *s) case SSL3_ST_SR_CERT_A: case SSL3_ST_SR_CERT_B: - /* Check for second client hello (MS SGC) */ - ret = ssl3_check_client_hello(s); - if (ret <= 0) - goto end; - if (ret == 2) { - dtls1_stop_timer(s); - s->state = SSL3_ST_SR_CLNT_HELLO_C; - } else { - if (s->s3->tmp.cert_request) { - ret = ssl3_get_client_certificate(s); - if (ret <= 0) - goto end; - } - s->init_num = 0; - s->state = SSL3_ST_SR_KEY_EXCH_A; + if (s->s3->tmp.cert_request) { + ret = ssl3_get_client_certificate(s); + if (ret <= 0) + goto end; } + s->init_num = 0; + s->state = SSL3_ST_SR_KEY_EXCH_A; break; case SSL3_ST_SR_KEY_EXCH_A: @@ -643,6 +650,22 @@ int dtls1_accept(SSL *s) */ s->state = SSL3_ST_SR_FINISHED_A; s->init_num = 0; + } else if (SSL_USE_SIGALGS(s)) { + s->state = SSL3_ST_SR_CERT_VRFY_A; + s->init_num = 0; + if (!s->session->peer) + break; + /* + * For sigalgs freeze the handshake buffer at this point and + * digest cached records. + */ + if (!s->s3->handshake_buffer) { + SSLerr(SSL_F_DTLS1_ACCEPT, ERR_R_INTERNAL_ERROR); + return -1; + } + s->s3->flags |= TLS1_FLAGS_KEEP_HANDSHAKE; + if (!ssl3_digest_cached_records(s)) + return -1; } else { s->state = SSL3_ST_SR_CERT_VRFY_A; s->init_num = 0; @@ -720,7 +743,7 @@ int dtls1_accept(SSL *s) #ifndef OPENSSL_NO_TLSEXT case SSL3_ST_SW_SESSION_TICKET_A: case SSL3_ST_SW_SESSION_TICKET_B: - ret = dtls1_send_newsession_ticket(s); + ret = ssl3_send_newsession_ticket(s); if (ret <= 0) goto end; s->state = SSL3_ST_SW_CHANGE_A; @@ -780,13 +803,13 @@ int dtls1_accept(SSL *s) case SSL3_ST_SW_FINISHED_A: case SSL3_ST_SW_FINISHED_B: - ret = dtls1_send_finished(s, - SSL3_ST_SW_FINISHED_A, - SSL3_ST_SW_FINISHED_B, - s->method-> - ssl3_enc->server_finished_label, - s->method-> - ssl3_enc->server_finished_label_len); + ret = ssl3_send_finished(s, + SSL3_ST_SW_FINISHED_A, + SSL3_ST_SW_FINISHED_B, + s->method-> + ssl3_enc->server_finished_label, + s->method-> + ssl3_enc->server_finished_label_len); if (ret <= 0) goto end; s->state = SSL3_ST_SW_FLUSH; @@ -892,29 +915,6 @@ int dtls1_accept(SSL *s) return (ret); } -int dtls1_send_hello_request(SSL *s) -{ - unsigned char *p; - - if (s->state == SSL3_ST_SW_HELLO_REQ_A) { - p = (unsigned char *)s->init_buf->data; - p = dtls1_set_message_header(s, p, SSL3_MT_HELLO_REQUEST, 0, 0, 0); - - s->state = SSL3_ST_SW_HELLO_REQ_B; - /* number of bytes to write */ - s->init_num = DTLS1_HM_HEADER_LENGTH; - s->init_off = 0; - - /* - * no need to buffer this message, since there are no retransmit - * requests for it - */ - } - - /* SSL3_ST_SW_HELLO_REQ_B */ - return (dtls1_do_write(s, SSL3_RT_HANDSHAKE)); -} - int dtls1_send_hello_verify_request(SSL *s) { unsigned int msg_len; @@ -924,8 +924,9 @@ int dtls1_send_hello_verify_request(SSL *s) buf = (unsigned char *)s->init_buf->data; msg = p = &(buf[DTLS1_HM_HEADER_LENGTH]); - *(p++) = s->version >> 8; - *(p++) = s->version & 0xFF; + /* Always use DTLS 1.0 version: see RFC 6347 */ + *(p++) = DTLS1_VERSION >> 8; + *(p++) = DTLS1_VERSION & 0xFF; if (s->ctx->app_gen_cookie_cb == NULL || s->ctx->app_gen_cookie_cb(s, s->d1->cookie, @@ -953,788 +954,3 @@ int dtls1_send_hello_verify_request(SSL *s) /* s->state = DTLS1_ST_SW_HELLO_VERIFY_REQUEST_B */ return (dtls1_do_write(s, SSL3_RT_HANDSHAKE)); } - -int dtls1_send_server_hello(SSL *s) -{ - unsigned char *buf; - unsigned char *p, *d; - int i; - unsigned int sl; - unsigned long l; - - if (s->state == SSL3_ST_SW_SRVR_HELLO_A) { - buf = (unsigned char *)s->init_buf->data; - p = s->s3->server_random; - ssl_fill_hello_random(s, 1, p, SSL3_RANDOM_SIZE); - /* Do the message type and length last */ - d = p = &(buf[DTLS1_HM_HEADER_LENGTH]); - - *(p++) = s->version >> 8; - *(p++) = s->version & 0xff; - - /* Random stuff */ - memcpy(p, s->s3->server_random, SSL3_RANDOM_SIZE); - p += SSL3_RANDOM_SIZE; - - /* - * now in theory we have 3 options to sending back the session id. - * If it is a re-use, we send back the old session-id, if it is a new - * session, we send back the new session-id or we send back a 0 - * length session-id if we want it to be single use. Currently I will - * not implement the '0' length session-id 12-Jan-98 - I'll now - * support the '0' length stuff. - */ - if (!(s->ctx->session_cache_mode & SSL_SESS_CACHE_SERVER)) - s->session->session_id_length = 0; - - sl = s->session->session_id_length; - if (sl > sizeof s->session->session_id) { - SSLerr(SSL_F_DTLS1_SEND_SERVER_HELLO, ERR_R_INTERNAL_ERROR); - return -1; - } - *(p++) = sl; - memcpy(p, s->session->session_id, sl); - p += sl; - - /* put the cipher */ - if (s->s3->tmp.new_cipher == NULL) - return -1; - i = ssl3_put_cipher_by_char(s->s3->tmp.new_cipher, p); - p += i; - - /* put the compression method */ -#ifdef OPENSSL_NO_COMP - *(p++) = 0; -#else - if (s->s3->tmp.new_compression == NULL) - *(p++) = 0; - else - *(p++) = s->s3->tmp.new_compression->id; -#endif - -#ifndef OPENSSL_NO_TLSEXT - if (ssl_prepare_serverhello_tlsext(s) <= 0) { - SSLerr(SSL_F_DTLS1_SEND_SERVER_HELLO, SSL_R_SERVERHELLO_TLSEXT); - return -1; - } - if ((p = - ssl_add_serverhello_tlsext(s, p, - buf + SSL3_RT_MAX_PLAIN_LENGTH)) == - NULL) { - SSLerr(SSL_F_DTLS1_SEND_SERVER_HELLO, ERR_R_INTERNAL_ERROR); - return -1; - } -#endif - - /* do the header */ - l = (p - d); - d = buf; - - d = dtls1_set_message_header(s, d, SSL3_MT_SERVER_HELLO, l, 0, l); - - s->state = SSL3_ST_SW_SRVR_HELLO_B; - /* number of bytes to write */ - s->init_num = p - buf; - s->init_off = 0; - - /* buffer the message to handle re-xmits */ - dtls1_buffer_message(s, 0); - } - - /* SSL3_ST_SW_SRVR_HELLO_B */ - return (dtls1_do_write(s, SSL3_RT_HANDSHAKE)); -} - -int dtls1_send_server_done(SSL *s) -{ - unsigned char *p; - - if (s->state == SSL3_ST_SW_SRVR_DONE_A) { - p = (unsigned char *)s->init_buf->data; - - /* do the header */ - p = dtls1_set_message_header(s, p, SSL3_MT_SERVER_DONE, 0, 0, 0); - - s->state = SSL3_ST_SW_SRVR_DONE_B; - /* number of bytes to write */ - s->init_num = DTLS1_HM_HEADER_LENGTH; - s->init_off = 0; - - /* buffer the message to handle re-xmits */ - dtls1_buffer_message(s, 0); - } - - /* SSL3_ST_SW_SRVR_DONE_B */ - return (dtls1_do_write(s, SSL3_RT_HANDSHAKE)); -} - -int dtls1_send_server_key_exchange(SSL *s) -{ -#ifndef OPENSSL_NO_RSA - unsigned char *q; - int j, num; - RSA *rsa; - unsigned char md_buf[MD5_DIGEST_LENGTH + SHA_DIGEST_LENGTH]; - unsigned int u; -#endif -#ifndef OPENSSL_NO_DH - DH *dh = NULL, *dhp; -#endif -#ifndef OPENSSL_NO_ECDH - EC_KEY *ecdh = NULL, *ecdhp; - unsigned char *encodedPoint = NULL; - int encodedlen = 0; - int curve_id = 0; - BN_CTX *bn_ctx = NULL; -#endif - EVP_PKEY *pkey; - unsigned char *p, *d; - int al, i; - unsigned long type; - int n; - CERT *cert; - BIGNUM *r[4]; - int nr[4], kn; - BUF_MEM *buf; - EVP_MD_CTX md_ctx; - - EVP_MD_CTX_init(&md_ctx); - if (s->state == SSL3_ST_SW_KEY_EXCH_A) { - type = s->s3->tmp.new_cipher->algorithm_mkey; - cert = s->cert; - - buf = s->init_buf; - - r[0] = r[1] = r[2] = r[3] = NULL; - n = 0; -#ifndef OPENSSL_NO_RSA - if (type & SSL_kRSA) { - rsa = cert->rsa_tmp; - if ((rsa == NULL) && (s->cert->rsa_tmp_cb != NULL)) { - rsa = s->cert->rsa_tmp_cb(s, - SSL_C_IS_EXPORT(s->s3-> - tmp.new_cipher), - SSL_C_EXPORT_PKEYLENGTH(s->s3-> - tmp.new_cipher)); - if (rsa == NULL) { - al = SSL_AD_HANDSHAKE_FAILURE; - SSLerr(SSL_F_DTLS1_SEND_SERVER_KEY_EXCHANGE, - SSL_R_ERROR_GENERATING_TMP_RSA_KEY); - goto f_err; - } - RSA_up_ref(rsa); - cert->rsa_tmp = rsa; - } - if (rsa == NULL) { - al = SSL_AD_HANDSHAKE_FAILURE; - SSLerr(SSL_F_DTLS1_SEND_SERVER_KEY_EXCHANGE, - SSL_R_MISSING_TMP_RSA_KEY); - goto f_err; - } - r[0] = rsa->n; - r[1] = rsa->e; - s->s3->tmp.use_rsa_tmp = 1; - } else -#endif -#ifndef OPENSSL_NO_DH - if (type & SSL_kEDH) { - dhp = cert->dh_tmp; - if ((dhp == NULL) && (s->cert->dh_tmp_cb != NULL)) - dhp = s->cert->dh_tmp_cb(s, - SSL_C_IS_EXPORT(s->s3-> - tmp.new_cipher), - SSL_C_EXPORT_PKEYLENGTH(s->s3-> - tmp.new_cipher)); - if (dhp == NULL) { - al = SSL_AD_HANDSHAKE_FAILURE; - SSLerr(SSL_F_DTLS1_SEND_SERVER_KEY_EXCHANGE, - SSL_R_MISSING_TMP_DH_KEY); - goto f_err; - } - - if (s->s3->tmp.dh != NULL) { - DH_free(dh); - SSLerr(SSL_F_DTLS1_SEND_SERVER_KEY_EXCHANGE, - ERR_R_INTERNAL_ERROR); - goto err; - } - - if ((dh = DHparams_dup(dhp)) == NULL) { - SSLerr(SSL_F_DTLS1_SEND_SERVER_KEY_EXCHANGE, ERR_R_DH_LIB); - goto err; - } - - s->s3->tmp.dh = dh; - if ((dhp->pub_key == NULL || - dhp->priv_key == NULL || - (s->options & SSL_OP_SINGLE_DH_USE))) { - if (!DH_generate_key(dh)) { - SSLerr(SSL_F_DTLS1_SEND_SERVER_KEY_EXCHANGE, - ERR_R_DH_LIB); - goto err; - } - } else { - dh->pub_key = BN_dup(dhp->pub_key); - dh->priv_key = BN_dup(dhp->priv_key); - if ((dh->pub_key == NULL) || (dh->priv_key == NULL)) { - SSLerr(SSL_F_DTLS1_SEND_SERVER_KEY_EXCHANGE, - ERR_R_DH_LIB); - goto err; - } - } - r[0] = dh->p; - r[1] = dh->g; - r[2] = dh->pub_key; - } else -#endif -#ifndef OPENSSL_NO_ECDH - if (type & SSL_kEECDH) { - const EC_GROUP *group; - - ecdhp = cert->ecdh_tmp; - if ((ecdhp == NULL) && (s->cert->ecdh_tmp_cb != NULL)) { - ecdhp = s->cert->ecdh_tmp_cb(s, - SSL_C_IS_EXPORT(s->s3-> - tmp.new_cipher), - SSL_C_EXPORT_PKEYLENGTH(s-> - s3->tmp.new_cipher)); - } - if (ecdhp == NULL) { - al = SSL_AD_HANDSHAKE_FAILURE; - SSLerr(SSL_F_DTLS1_SEND_SERVER_KEY_EXCHANGE, - SSL_R_MISSING_TMP_ECDH_KEY); - goto f_err; - } - - if (s->s3->tmp.ecdh != NULL) { - EC_KEY_free(s->s3->tmp.ecdh); - SSLerr(SSL_F_DTLS1_SEND_SERVER_KEY_EXCHANGE, - ERR_R_INTERNAL_ERROR); - goto err; - } - - /* Duplicate the ECDH structure. */ - if (ecdhp == NULL) { - SSLerr(SSL_F_DTLS1_SEND_SERVER_KEY_EXCHANGE, ERR_R_ECDH_LIB); - goto err; - } - if ((ecdh = EC_KEY_dup(ecdhp)) == NULL) { - SSLerr(SSL_F_DTLS1_SEND_SERVER_KEY_EXCHANGE, ERR_R_ECDH_LIB); - goto err; - } - - s->s3->tmp.ecdh = ecdh; - if ((EC_KEY_get0_public_key(ecdh) == NULL) || - (EC_KEY_get0_private_key(ecdh) == NULL) || - (s->options & SSL_OP_SINGLE_ECDH_USE)) { - if (!EC_KEY_generate_key(ecdh)) { - SSLerr(SSL_F_DTLS1_SEND_SERVER_KEY_EXCHANGE, - ERR_R_ECDH_LIB); - goto err; - } - } - - if (((group = EC_KEY_get0_group(ecdh)) == NULL) || - (EC_KEY_get0_public_key(ecdh) == NULL) || - (EC_KEY_get0_private_key(ecdh) == NULL)) { - SSLerr(SSL_F_DTLS1_SEND_SERVER_KEY_EXCHANGE, ERR_R_ECDH_LIB); - goto err; - } - - if (SSL_C_IS_EXPORT(s->s3->tmp.new_cipher) && - (EC_GROUP_get_degree(group) > 163)) { - SSLerr(SSL_F_DTLS1_SEND_SERVER_KEY_EXCHANGE, - SSL_R_ECGROUP_TOO_LARGE_FOR_CIPHER); - goto err; - } - - /* - * XXX: For now, we only support ephemeral ECDH keys over named - * (not generic) curves. For supported named curves, curve_id is - * non-zero. - */ - if ((curve_id = - tls1_ec_nid2curve_id(EC_GROUP_get_curve_name(group))) - == 0) { - SSLerr(SSL_F_DTLS1_SEND_SERVER_KEY_EXCHANGE, - SSL_R_UNSUPPORTED_ELLIPTIC_CURVE); - goto err; - } - - /* - * Encode the public key. First check the size of encoding and - * allocate memory accordingly. - */ - encodedlen = EC_POINT_point2oct(group, - EC_KEY_get0_public_key(ecdh), - POINT_CONVERSION_UNCOMPRESSED, - NULL, 0, NULL); - - encodedPoint = (unsigned char *) - OPENSSL_malloc(encodedlen * sizeof(unsigned char)); - bn_ctx = BN_CTX_new(); - if ((encodedPoint == NULL) || (bn_ctx == NULL)) { - SSLerr(SSL_F_DTLS1_SEND_SERVER_KEY_EXCHANGE, - ERR_R_MALLOC_FAILURE); - goto err; - } - - encodedlen = EC_POINT_point2oct(group, - EC_KEY_get0_public_key(ecdh), - POINT_CONVERSION_UNCOMPRESSED, - encodedPoint, encodedlen, bn_ctx); - - if (encodedlen == 0) { - SSLerr(SSL_F_DTLS1_SEND_SERVER_KEY_EXCHANGE, ERR_R_ECDH_LIB); - goto err; - } - - BN_CTX_free(bn_ctx); - bn_ctx = NULL; - - /* - * XXX: For now, we only support named (not generic) curves in - * ECDH ephemeral key exchanges. In this situation, we need four - * additional bytes to encode the entire ServerECDHParams - * structure. - */ - n = 4 + encodedlen; - - /* - * We'll generate the serverKeyExchange message explicitly so we - * can set these to NULLs - */ - r[0] = NULL; - r[1] = NULL; - r[2] = NULL; - r[3] = NULL; - } else -#endif /* !OPENSSL_NO_ECDH */ -#ifndef OPENSSL_NO_PSK - if (type & SSL_kPSK) { - /* - * reserve size for record length and PSK identity hint - */ - n += 2 + strlen(s->ctx->psk_identity_hint); - } else -#endif /* !OPENSSL_NO_PSK */ - { - al = SSL_AD_HANDSHAKE_FAILURE; - SSLerr(SSL_F_DTLS1_SEND_SERVER_KEY_EXCHANGE, - SSL_R_UNKNOWN_KEY_EXCHANGE_TYPE); - goto f_err; - } - for (i = 0; r[i] != NULL; i++) { - nr[i] = BN_num_bytes(r[i]); - n += 2 + nr[i]; - } - - if (!(s->s3->tmp.new_cipher->algorithm_auth & SSL_aNULL) - && !(s->s3->tmp.new_cipher->algorithm_mkey & SSL_kPSK)) { - if ((pkey = ssl_get_sign_pkey(s, s->s3->tmp.new_cipher, NULL)) - == NULL) { - al = SSL_AD_DECODE_ERROR; - goto f_err; - } - kn = EVP_PKEY_size(pkey); - } else { - pkey = NULL; - kn = 0; - } - - if (!BUF_MEM_grow_clean(buf, n + DTLS1_HM_HEADER_LENGTH + kn)) { - SSLerr(SSL_F_DTLS1_SEND_SERVER_KEY_EXCHANGE, ERR_LIB_BUF); - goto err; - } - d = (unsigned char *)s->init_buf->data; - p = &(d[DTLS1_HM_HEADER_LENGTH]); - - for (i = 0; r[i] != NULL; i++) { - s2n(nr[i], p); - BN_bn2bin(r[i], p); - p += nr[i]; - } - -#ifndef OPENSSL_NO_ECDH - if (type & SSL_kEECDH) { - /* - * XXX: For now, we only support named (not generic) curves. In - * this situation, the serverKeyExchange message has: [1 byte - * CurveType], [2 byte CurveName] [1 byte length of encoded - * point], followed by the actual encoded point itself - */ - *p = NAMED_CURVE_TYPE; - p += 1; - *p = 0; - p += 1; - *p = curve_id; - p += 1; - *p = encodedlen; - p += 1; - memcpy((unsigned char *)p, - (unsigned char *)encodedPoint, encodedlen); - OPENSSL_free(encodedPoint); - encodedPoint = NULL; - p += encodedlen; - } -#endif - -#ifndef OPENSSL_NO_PSK - if (type & SSL_kPSK) { - /* copy PSK identity hint */ - s2n(strlen(s->ctx->psk_identity_hint), p); - strncpy((char *)p, s->ctx->psk_identity_hint, - strlen(s->ctx->psk_identity_hint)); - p += strlen(s->ctx->psk_identity_hint); - } -#endif - - /* not anonymous */ - if (pkey != NULL) { - /* - * n is the length of the params, they start at - * &(d[DTLS1_HM_HEADER_LENGTH]) and p points to the space at the - * end. - */ -#ifndef OPENSSL_NO_RSA - if (pkey->type == EVP_PKEY_RSA) { - q = md_buf; - j = 0; - for (num = 2; num > 0; num--) { - EVP_DigestInit_ex(&md_ctx, (num == 2) - ? s->ctx->md5 : s->ctx->sha1, NULL); - EVP_DigestUpdate(&md_ctx, &(s->s3->client_random[0]), - SSL3_RANDOM_SIZE); - EVP_DigestUpdate(&md_ctx, &(s->s3->server_random[0]), - SSL3_RANDOM_SIZE); - EVP_DigestUpdate(&md_ctx, &(d[DTLS1_HM_HEADER_LENGTH]), - n); - EVP_DigestFinal_ex(&md_ctx, q, (unsigned int *)&i); - q += i; - j += i; - } - if (RSA_sign(NID_md5_sha1, md_buf, j, - &(p[2]), &u, pkey->pkey.rsa) <= 0) { - SSLerr(SSL_F_DTLS1_SEND_SERVER_KEY_EXCHANGE, ERR_LIB_RSA); - goto err; - } - s2n(u, p); - n += u + 2; - } else -#endif -#if !defined(OPENSSL_NO_DSA) - if (pkey->type == EVP_PKEY_DSA) { - /* lets do DSS */ - EVP_SignInit_ex(&md_ctx, EVP_dss1(), NULL); - EVP_SignUpdate(&md_ctx, &(s->s3->client_random[0]), - SSL3_RANDOM_SIZE); - EVP_SignUpdate(&md_ctx, &(s->s3->server_random[0]), - SSL3_RANDOM_SIZE); - EVP_SignUpdate(&md_ctx, &(d[DTLS1_HM_HEADER_LENGTH]), n); - if (!EVP_SignFinal(&md_ctx, &(p[2]), - (unsigned int *)&i, pkey)) { - SSLerr(SSL_F_DTLS1_SEND_SERVER_KEY_EXCHANGE, ERR_LIB_DSA); - goto err; - } - s2n(i, p); - n += i + 2; - } else -#endif -#if !defined(OPENSSL_NO_ECDSA) - if (pkey->type == EVP_PKEY_EC) { - /* let's do ECDSA */ - EVP_SignInit_ex(&md_ctx, EVP_ecdsa(), NULL); - EVP_SignUpdate(&md_ctx, &(s->s3->client_random[0]), - SSL3_RANDOM_SIZE); - EVP_SignUpdate(&md_ctx, &(s->s3->server_random[0]), - SSL3_RANDOM_SIZE); - EVP_SignUpdate(&md_ctx, &(d[DTLS1_HM_HEADER_LENGTH]), n); - if (!EVP_SignFinal(&md_ctx, &(p[2]), - (unsigned int *)&i, pkey)) { - SSLerr(SSL_F_DTLS1_SEND_SERVER_KEY_EXCHANGE, - ERR_LIB_ECDSA); - goto err; - } - s2n(i, p); - n += i + 2; - } else -#endif - { - /* Is this error check actually needed? */ - al = SSL_AD_HANDSHAKE_FAILURE; - SSLerr(SSL_F_DTLS1_SEND_SERVER_KEY_EXCHANGE, - SSL_R_UNKNOWN_PKEY_TYPE); - goto f_err; - } - } - - d = dtls1_set_message_header(s, d, - SSL3_MT_SERVER_KEY_EXCHANGE, n, 0, n); - - /* - * we should now have things packed up, so lets send it off - */ - s->init_num = n + DTLS1_HM_HEADER_LENGTH; - s->init_off = 0; - - /* buffer the message to handle re-xmits */ - dtls1_buffer_message(s, 0); - } - - s->state = SSL3_ST_SW_KEY_EXCH_B; - EVP_MD_CTX_cleanup(&md_ctx); - return (dtls1_do_write(s, SSL3_RT_HANDSHAKE)); - f_err: - ssl3_send_alert(s, SSL3_AL_FATAL, al); - err: -#ifndef OPENSSL_NO_ECDH - if (encodedPoint != NULL) - OPENSSL_free(encodedPoint); - BN_CTX_free(bn_ctx); -#endif - EVP_MD_CTX_cleanup(&md_ctx); - return (-1); -} - -int dtls1_send_certificate_request(SSL *s) -{ - unsigned char *p, *d; - int i, j, nl, off, n; - STACK_OF(X509_NAME) *sk = NULL; - X509_NAME *name; - BUF_MEM *buf; - unsigned int msg_len; - - if (s->state == SSL3_ST_SW_CERT_REQ_A) { - buf = s->init_buf; - - d = p = (unsigned char *)&(buf->data[DTLS1_HM_HEADER_LENGTH]); - - /* get the list of acceptable cert types */ - p++; - n = ssl3_get_req_cert_type(s, p); - d[0] = n; - p += n; - n++; - - off = n; - p += 2; - n += 2; - - sk = SSL_get_client_CA_list(s); - nl = 0; - if (sk != NULL) { - for (i = 0; i < sk_X509_NAME_num(sk); i++) { - name = sk_X509_NAME_value(sk, i); - j = i2d_X509_NAME(name, NULL); - if (!BUF_MEM_grow_clean - (buf, DTLS1_HM_HEADER_LENGTH + n + j + 2)) { - SSLerr(SSL_F_DTLS1_SEND_CERTIFICATE_REQUEST, - ERR_R_BUF_LIB); - goto err; - } - p = (unsigned char *)&(buf->data[DTLS1_HM_HEADER_LENGTH + n]); - if (!(s->options & SSL_OP_NETSCAPE_CA_DN_BUG)) { - s2n(j, p); - i2d_X509_NAME(name, &p); - n += 2 + j; - nl += 2 + j; - } else { - d = p; - i2d_X509_NAME(name, &p); - j -= 2; - s2n(j, d); - j += 2; - n += j; - nl += j; - } - } - } - /* else no CA names */ - p = (unsigned char *)&(buf->data[DTLS1_HM_HEADER_LENGTH + off]); - s2n(nl, p); - - d = (unsigned char *)buf->data; - *(d++) = SSL3_MT_CERTIFICATE_REQUEST; - l2n3(n, d); - s2n(s->d1->handshake_write_seq, d); - s->d1->handshake_write_seq++; - - /* - * we should now have things packed up, so lets send it off - */ - - s->init_num = n + DTLS1_HM_HEADER_LENGTH; - s->init_off = 0; -#ifdef NETSCAPE_HANG_BUG -/* XXX: what to do about this? */ - p = (unsigned char *)s->init_buf->data + s->init_num; - - /* do the header */ - *(p++) = SSL3_MT_SERVER_DONE; - *(p++) = 0; - *(p++) = 0; - *(p++) = 0; - s->init_num += 4; -#endif - - /* XDTLS: set message header ? */ - msg_len = s->init_num - DTLS1_HM_HEADER_LENGTH; - dtls1_set_message_header(s, (void *)s->init_buf->data, - SSL3_MT_CERTIFICATE_REQUEST, msg_len, 0, - msg_len); - - /* buffer the message to handle re-xmits */ - dtls1_buffer_message(s, 0); - - s->state = SSL3_ST_SW_CERT_REQ_B; - } - - /* SSL3_ST_SW_CERT_REQ_B */ - return (dtls1_do_write(s, SSL3_RT_HANDSHAKE)); - err: - return (-1); -} - -int dtls1_send_server_certificate(SSL *s) -{ - unsigned long l; - X509 *x; - - if (s->state == SSL3_ST_SW_CERT_A) { - x = ssl_get_server_send_cert(s); - if (x == NULL) { - /* VRS: allow null cert if auth == KRB5 */ - if ((s->s3->tmp.new_cipher->algorithm_mkey != SSL_kKRB5) || - (s->s3->tmp.new_cipher->algorithm_auth != SSL_aKRB5)) { - SSLerr(SSL_F_DTLS1_SEND_SERVER_CERTIFICATE, - ERR_R_INTERNAL_ERROR); - return (0); - } - } - - l = dtls1_output_cert_chain(s, x); - if (!l) { - SSLerr(SSL_F_DTLS1_SEND_SERVER_CERTIFICATE, ERR_R_INTERNAL_ERROR); - return (0); - } - s->state = SSL3_ST_SW_CERT_B; - s->init_num = (int)l; - s->init_off = 0; - - /* buffer the message to handle re-xmits */ - dtls1_buffer_message(s, 0); - } - - /* SSL3_ST_SW_CERT_B */ - return (dtls1_do_write(s, SSL3_RT_HANDSHAKE)); -} - -#ifndef OPENSSL_NO_TLSEXT -int dtls1_send_newsession_ticket(SSL *s) -{ - if (s->state == SSL3_ST_SW_SESSION_TICKET_A) { - unsigned char *p, *senc, *macstart; - int len, slen; - unsigned int hlen, msg_len; - EVP_CIPHER_CTX ctx; - HMAC_CTX hctx; - SSL_CTX *tctx = s->initial_ctx; - unsigned char iv[EVP_MAX_IV_LENGTH]; - unsigned char key_name[16]; - - /* get session encoding length */ - slen = i2d_SSL_SESSION(s->session, NULL); - /* - * Some length values are 16 bits, so forget it if session is too - * long - */ - if (slen > 0xFF00) - return -1; - /* - * Grow buffer if need be: the length calculation is as follows 12 - * (DTLS handshake message header) + 4 (ticket lifetime hint) + 2 - * (ticket length) + 16 (key name) + max_iv_len (iv length) + - * session_length + max_enc_block_size (max encrypted session length) - * + max_md_size (HMAC). - */ - if (!BUF_MEM_grow(s->init_buf, - DTLS1_HM_HEADER_LENGTH + 22 + EVP_MAX_IV_LENGTH + - EVP_MAX_BLOCK_LENGTH + EVP_MAX_MD_SIZE + slen)) - return -1; - senc = OPENSSL_malloc(slen); - if (!senc) - return -1; - p = senc; - i2d_SSL_SESSION(s->session, &p); - - p = (unsigned char *)&(s->init_buf->data[DTLS1_HM_HEADER_LENGTH]); - EVP_CIPHER_CTX_init(&ctx); - HMAC_CTX_init(&hctx); - /* - * Initialize HMAC and cipher contexts. If callback present it does - * all the work otherwise use generated values from parent ctx. - */ - if (tctx->tlsext_ticket_key_cb) { - if (tctx->tlsext_ticket_key_cb(s, key_name, iv, &ctx, - &hctx, 1) < 0) { - OPENSSL_free(senc); - return -1; - } - } else { - RAND_pseudo_bytes(iv, 16); - EVP_EncryptInit_ex(&ctx, EVP_aes_128_cbc(), NULL, - tctx->tlsext_tick_aes_key, iv); - HMAC_Init_ex(&hctx, tctx->tlsext_tick_hmac_key, 16, - tlsext_tick_md(), NULL); - memcpy(key_name, tctx->tlsext_tick_key_name, 16); - } - l2n(s->session->tlsext_tick_lifetime_hint, p); - /* Skip ticket length for now */ - p += 2; - /* Output key name */ - macstart = p; - memcpy(p, key_name, 16); - p += 16; - /* output IV */ - memcpy(p, iv, EVP_CIPHER_CTX_iv_length(&ctx)); - p += EVP_CIPHER_CTX_iv_length(&ctx); - /* Encrypt session data */ - EVP_EncryptUpdate(&ctx, p, &len, senc, slen); - p += len; - EVP_EncryptFinal(&ctx, p, &len); - p += len; - EVP_CIPHER_CTX_cleanup(&ctx); - - HMAC_Update(&hctx, macstart, p - macstart); - HMAC_Final(&hctx, p, &hlen); - HMAC_CTX_cleanup(&hctx); - - p += hlen; - /* Now write out lengths: p points to end of data written */ - /* Total length */ - len = p - (unsigned char *)(s->init_buf->data); - /* Ticket length */ - p = (unsigned char *)&(s->init_buf->data[DTLS1_HM_HEADER_LENGTH]) + 4; - s2n(len - DTLS1_HM_HEADER_LENGTH - 6, p); - - /* number of bytes to write */ - s->init_num = len; - s->state = SSL3_ST_SW_SESSION_TICKET_B; - s->init_off = 0; - OPENSSL_free(senc); - - /* XDTLS: set message header ? */ - msg_len = s->init_num - DTLS1_HM_HEADER_LENGTH; - dtls1_set_message_header(s, (void *)s->init_buf->data, - SSL3_MT_NEWSESSION_TICKET, msg_len, 0, - msg_len); - - /* buffer the message to handle re-xmits */ - dtls1_buffer_message(s, 0); - } - - /* SSL3_ST_SW_SESSION_TICKET_B */ - return (dtls1_do_write(s, SSL3_RT_HANDSHAKE)); -} -#endif diff --git a/deps/openssl/openssl/ssl/dtls1.h b/deps/openssl/openssl/ssl/dtls1.h index 8deb299a7c8516..30bbcf278a4361 100644 --- a/deps/openssl/openssl/ssl/dtls1.h +++ b/deps/openssl/openssl/ssl/dtls1.h @@ -84,11 +84,15 @@ extern "C" { #endif # define DTLS1_VERSION 0xFEFF -# define DTLS_MAX_VERSION DTLS1_VERSION +# define DTLS1_2_VERSION 0xFEFD +# define DTLS_MAX_VERSION DTLS1_2_VERSION # define DTLS1_VERSION_MAJOR 0xFE # define DTLS1_BAD_VER 0x0100 +/* Special value for method supporting multiple versions */ +# define DTLS_ANY_VERSION 0x1FFFF + # if 0 /* this alert description is not specified anywhere... */ # define DTLS1_AD_MISSING_HANDSHAKE_MESSAGE 110 diff --git a/deps/openssl/openssl/ssl/heartbeat_test.c b/deps/openssl/openssl/ssl/heartbeat_test.c index 3cec8b163fae6f..7623c36ccf1894 100644 --- a/deps/openssl/openssl/ssl/heartbeat_test.c +++ b/deps/openssl/openssl/ssl/heartbeat_test.c @@ -272,8 +272,8 @@ static int test_dtls1_not_bleeding() { SETUP_HEARTBEAT_TEST_FIXTURE(dtls); /* Three-byte pad at the beginning for type and payload length */ - unsigned char payload_buf[] = " Not bleeding, sixteen spaces of padding" - " "; + unsigned char payload_buf[MAX_PRINTABLE_CHARACTERS + 4] = + " Not bleeding, sixteen spaces of padding" " "; const int payload_buf_len = honest_payload_size(payload_buf); fixture.payload = &payload_buf[0]; @@ -294,9 +294,9 @@ static int test_dtls1_not_bleeding_empty_payload() * Three-byte pad at the beginning for type and payload length, plus a * NUL at the end */ - unsigned char payload_buf[4 + MIN_PADDING_SIZE]; - memset(payload_buf, ' ', sizeof(payload_buf)); - payload_buf[sizeof(payload_buf) - 1] = '\0'; + unsigned char payload_buf[4 + MAX_PRINTABLE_CHARACTERS]; + memset(payload_buf, ' ', MIN_PADDING_SIZE + 3); + payload_buf[MIN_PADDING_SIZE + 3] = '\0'; payload_buf_len = honest_payload_size(payload_buf); fixture.payload = &payload_buf[0]; @@ -311,7 +311,8 @@ static int test_dtls1_heartbleed() { SETUP_HEARTBEAT_TEST_FIXTURE(dtls); /* Three-byte pad at the beginning for type and payload length */ - unsigned char payload_buf[] = " HEARTBLEED "; + unsigned char payload_buf[4 + MAX_PRINTABLE_CHARACTERS] = + " HEARTBLEED "; fixture.payload = &payload_buf[0]; fixture.sent_payload_len = MAX_PRINTABLE_CHARACTERS; @@ -328,9 +329,9 @@ static int test_dtls1_heartbleed_empty_payload() * Excluding the NUL at the end, one byte short of type + payload length * + minimum padding */ - unsigned char payload_buf[MIN_PADDING_SIZE + 3]; - memset(payload_buf, ' ', sizeof(payload_buf)); - payload_buf[sizeof(payload_buf) - 1] = '\0'; + unsigned char payload_buf[MAX_PRINTABLE_CHARACTERS + 4]; + memset(payload_buf, ' ', MIN_PADDING_SIZE + 2); + payload_buf[MIN_PADDING_SIZE + 2] = '\0'; fixture.payload = &payload_buf[0]; fixture.sent_payload_len = MAX_PRINTABLE_CHARACTERS; @@ -363,8 +364,8 @@ static int test_tls1_not_bleeding() { SETUP_HEARTBEAT_TEST_FIXTURE(tls); /* Three-byte pad at the beginning for type and payload length */ - unsigned char payload_buf[] = " Not bleeding, sixteen spaces of padding" - " "; + unsigned char payload_buf[MAX_PRINTABLE_CHARACTERS + 4] = + " Not bleeding, sixteen spaces of padding" " "; const int payload_buf_len = honest_payload_size(payload_buf); fixture.payload = &payload_buf[0]; @@ -385,9 +386,9 @@ static int test_tls1_not_bleeding_empty_payload() * Three-byte pad at the beginning for type and payload length, plus a * NUL at the end */ - unsigned char payload_buf[4 + MIN_PADDING_SIZE]; - memset(payload_buf, ' ', sizeof(payload_buf)); - payload_buf[sizeof(payload_buf) - 1] = '\0'; + unsigned char payload_buf[4 + MAX_PRINTABLE_CHARACTERS]; + memset(payload_buf, ' ', MIN_PADDING_SIZE + 3); + payload_buf[MIN_PADDING_SIZE + 3] = '\0'; payload_buf_len = honest_payload_size(payload_buf); fixture.payload = &payload_buf[0]; @@ -402,7 +403,8 @@ static int test_tls1_heartbleed() { SETUP_HEARTBEAT_TEST_FIXTURE(tls); /* Three-byte pad at the beginning for type and payload length */ - unsigned char payload_buf[] = " HEARTBLEED "; + unsigned char payload_buf[MAX_PRINTABLE_CHARACTERS + 4] = + " HEARTBLEED "; fixture.payload = &payload_buf[0]; fixture.sent_payload_len = MAX_PRINTABLE_CHARACTERS; @@ -419,9 +421,9 @@ static int test_tls1_heartbleed_empty_payload() * Excluding the NUL at the end, one byte short of type + payload length * + minimum padding */ - unsigned char payload_buf[MIN_PADDING_SIZE + 3]; - memset(payload_buf, ' ', sizeof(payload_buf)); - payload_buf[sizeof(payload_buf) - 1] = '\0'; + unsigned char payload_buf[MAX_PRINTABLE_CHARACTERS + 4]; + memset(payload_buf, ' ', MIN_PADDING_SIZE + 2); + payload_buf[MIN_PADDING_SIZE + 2] = '\0'; fixture.payload = &payload_buf[0]; fixture.sent_payload_len = MAX_PRINTABLE_CHARACTERS; diff --git a/deps/openssl/openssl/ssl/s23_clnt.c b/deps/openssl/openssl/ssl/s23_clnt.c index 3766567c8f80a0..e4e707cf687e7b 100644 --- a/deps/openssl/openssl/ssl/s23_clnt.c +++ b/deps/openssl/openssl/ssl/s23_clnt.c @@ -279,7 +279,6 @@ static int ssl23_no_ssl2_ciphers(SSL *s) int ssl_fill_hello_random(SSL *s, int server, unsigned char *result, int len) { int send_time = 0; - if (len < 4) return 0; if (server) @@ -303,6 +302,7 @@ static int ssl23_client_hello(SSL *s) unsigned long l; int ssl2_compat; int version = 0, version_major, version_minor; + int al = 0; #ifndef OPENSSL_NO_COMP int j; SSL_COMP *comp; @@ -368,6 +368,8 @@ static int ssl23_client_hello(SSL *s) || s->tlsext_opaque_prf_input != NULL) ssl2_compat = 0; # endif + if (s->cert->cli_ext.meths_count != 0) + ssl2_compat = 0; } #endif @@ -387,6 +389,10 @@ static int ssl23_client_hello(SSL *s) if (version == TLS1_2_VERSION) { version_major = TLS1_2_VERSION_MAJOR; version_minor = TLS1_2_VERSION_MINOR; + } else if (tls1_suiteb(s)) { + SSLerr(SSL_F_SSL23_CLIENT_HELLO, + SSL_R_ONLY_TLS_1_2_ALLOWED_IN_SUITEB_MODE); + return -1; } else if (version == TLS1_1_VERSION) { version_major = TLS1_1_VERSION_MAJOR; version_minor = TLS1_1_VERSION_MINOR; @@ -542,9 +548,9 @@ static int ssl23_client_hello(SSL *s) } if ((p = ssl_add_clienthello_tlsext(s, p, - buf + - SSL3_RT_MAX_PLAIN_LENGTH)) == - NULL) { + buf + SSL3_RT_MAX_PLAIN_LENGTH, + &al)) == NULL) { + ssl3_send_alert(s, SSL3_AL_FATAL, al); SSLerr(SSL_F_SSL23_CLIENT_HELLO, ERR_R_INTERNAL_ERROR); return -1; } @@ -598,10 +604,13 @@ static int ssl23_client_hello(SSL *s) if (ssl2_compat) s->msg_callback(1, SSL2_VERSION, 0, s->init_buf->data + 2, ret - 2, s, s->msg_callback_arg); - else + else { + s->msg_callback(1, version, SSL3_RT_HEADER, s->init_buf->data, 5, + s, s->msg_callback_arg); s->msg_callback(1, version, SSL3_RT_HANDSHAKE, s->init_buf->data + 5, ret - 5, s, s->msg_callback_arg); + } } return ret; @@ -749,9 +758,12 @@ static int ssl23_get_server_hello(SSL *s) cb(s, SSL_CB_READ_ALERT, j); } - if (s->msg_callback) + if (s->msg_callback) { + s->msg_callback(0, s->version, SSL3_RT_HEADER, p, 5, s, + s->msg_callback_arg); s->msg_callback(0, s->version, SSL3_RT_ALERT, p + 5, 2, s, s->msg_callback_arg); + } s->rwstate = SSL_NOTHING; SSLerr(SSL_F_SSL23_GET_SERVER_HELLO, SSL_AD_REASON_OFFSET + p[6]); diff --git a/deps/openssl/openssl/ssl/s23_srvr.c b/deps/openssl/openssl/ssl/s23_srvr.c index 50f98dced44f5a..470bd3d94f29a4 100644 --- a/deps/openssl/openssl/ssl/s23_srvr.c +++ b/deps/openssl/openssl/ssl/s23_srvr.c @@ -402,6 +402,11 @@ int ssl23_get_client_hello(SSL *s) /* ensure that TLS_MAX_VERSION is up-to-date */ OPENSSL_assert(s->version <= TLS_MAX_VERSION); + if (s->version < TLS1_2_VERSION && tls1_suiteb(s)) { + SSLerr(SSL_F_SSL23_GET_CLIENT_HELLO, + SSL_R_ONLY_TLS_1_2_ALLOWED_IN_SUITEB_MODE); + goto err; + } #ifdef OPENSSL_FIPS if (FIPS_mode() && (s->version < TLS1_VERSION)) { SSLerr(SSL_F_SSL23_GET_CLIENT_HELLO, diff --git a/deps/openssl/openssl/ssl/s2_clnt.c b/deps/openssl/openssl/ssl/s2_clnt.c index b23b083153fb9b..69da6b1421dfd4 100644 --- a/deps/openssl/openssl/ssl/s2_clnt.c +++ b/deps/openssl/openssl/ssl/s2_clnt.c @@ -418,19 +418,19 @@ static int get_server_hello(SSL *s) return (-1); } } else { -# ifdef undef +# if 0 /* very bad */ memset(s->session->session_id, 0, SSL_MAX_SSL_SESSION_ID_LENGTH_IN_BYTES); s->session->session_id_length = 0; - */ # endif - /* - * we need to do this in case we were trying to reuse a client - * session but others are already reusing it. If this was a new - * 'blank' session ID, the session-id length will still be 0 - */ - if (s->session->session_id_length > 0) { + + /* + * we need to do this in case we were trying to reuse a client + * session but others are already reusing it. If this was a new + * 'blank' session ID, the session-id length will still be 0 + */ + if (s->session->session_id_length > 0) { if (!ssl_get_new_session(s, 0)) { ssl2_return_error(s, SSL2_PE_UNDEFINED_ERROR); return (-1); diff --git a/deps/openssl/openssl/ssl/s2_lib.c b/deps/openssl/openssl/ssl/s2_lib.c index 7e3674a685fb91..d55b93f76bb7f8 100644 --- a/deps/openssl/openssl/ssl/s2_lib.c +++ b/deps/openssl/openssl/ssl/s2_lib.c @@ -435,10 +435,7 @@ const SSL_CIPHER *ssl2_get_cipher_by_char(const unsigned char *p) ((unsigned long)p[1] << 8L) | (unsigned long)p[2]; c.id = id; cp = OBJ_bsearch_ssl_cipher_id(&c, ssl2_ciphers, SSL2_NUM_CIPHERS); - if ((cp == NULL) || (cp->valid == 0)) - return NULL; - else - return cp; + return cp; } int ssl2_put_cipher_by_char(const SSL_CIPHER *c, unsigned char *p) diff --git a/deps/openssl/openssl/ssl/s3_both.c b/deps/openssl/openssl/ssl/s3_both.c index 77374f41eebfac..c92fd721e2aad6 100644 --- a/deps/openssl/openssl/ssl/s3_both.c +++ b/deps/openssl/openssl/ssl/s3_both.c @@ -158,13 +158,12 @@ int ssl3_do_write(SSL *s, int type) int ssl3_send_finished(SSL *s, int a, int b, const char *sender, int slen) { - unsigned char *p, *d; + unsigned char *p; int i; unsigned long l; if (s->state == a) { - d = (unsigned char *)s->init_buf->data; - p = &(d[4]); + p = ssl_handshake_start(s); i = s->method->ssl3_enc->final_finish_mac(s, sender, slen, @@ -173,7 +172,6 @@ int ssl3_send_finished(SSL *s, int a, int b, const char *sender, int slen) return 0; s->s3->tmp.finish_md_len = i; memcpy(p, s->s3->tmp.finish_md, i); - p += i; l = i; /* @@ -196,17 +194,12 @@ int ssl3_send_finished(SSL *s, int a, int b, const char *sender, int slen) */ l &= 0xffff; #endif - - *(d++) = SSL3_MT_FINISHED; - l2n3(l, d); - s->init_num = (int)l + 4; - s->init_off = 0; - + ssl_set_handshake_header(s, SSL3_MT_FINISHED, l); s->state = b; } /* SSL3_ST_SEND_xxxxxx_HELLO_B */ - return (ssl3_do_write(s, SSL3_RT_HANDSHAKE)); + return ssl_do_write(s); } #ifndef OPENSSL_NO_NEXTPROTONEG @@ -248,7 +241,7 @@ int ssl3_get_finished(SSL *s, int a, int b) #ifdef OPENSSL_NO_NEXTPROTONEG /* * the mac has already been generated when we received the change cipher - * spec message and is in s->s3->tmp.peer_finish_md. + * spec message and is in s->s3->tmp.peer_finish_md */ #endif @@ -326,84 +319,20 @@ int ssl3_send_change_cipher_spec(SSL *s, int a, int b) return (ssl3_do_write(s, SSL3_RT_CHANGE_CIPHER_SPEC)); } -static int ssl3_add_cert_to_buf(BUF_MEM *buf, unsigned long *l, X509 *x) -{ - int n; - unsigned char *p; - - n = i2d_X509(x, NULL); - if (!BUF_MEM_grow_clean(buf, (int)(n + (*l) + 3))) { - SSLerr(SSL_F_SSL3_ADD_CERT_TO_BUF, ERR_R_BUF_LIB); - return (-1); - } - p = (unsigned char *)&(buf->data[*l]); - l2n3(n, p); - i2d_X509(x, &p); - *l += n + 3; - - return (0); -} - -unsigned long ssl3_output_cert_chain(SSL *s, X509 *x) +unsigned long ssl3_output_cert_chain(SSL *s, CERT_PKEY *cpk) { unsigned char *p; - int i; - unsigned long l = 7; - BUF_MEM *buf; - int no_chain; - - if ((s->mode & SSL_MODE_NO_AUTO_CHAIN) || s->ctx->extra_certs) - no_chain = 1; - else - no_chain = 0; + unsigned long l = 3 + SSL_HM_HEADER_LENGTH(s); - /* TLSv1 sends a chain with nothing in it, instead of an alert */ - buf = s->init_buf; - if (!BUF_MEM_grow_clean(buf, 10)) { - SSLerr(SSL_F_SSL3_OUTPUT_CERT_CHAIN, ERR_R_BUF_LIB); - return (0); - } - if (x != NULL) { - if (no_chain) { - if (ssl3_add_cert_to_buf(buf, &l, x)) - return (0); - } else { - X509_STORE_CTX xs_ctx; - - if (!X509_STORE_CTX_init(&xs_ctx, s->ctx->cert_store, x, NULL)) { - SSLerr(SSL_F_SSL3_OUTPUT_CERT_CHAIN, ERR_R_X509_LIB); - return (0); - } - X509_verify_cert(&xs_ctx); - /* Don't leave errors in the queue */ - ERR_clear_error(); - for (i = 0; i < sk_X509_num(xs_ctx.chain); i++) { - x = sk_X509_value(xs_ctx.chain, i); - - if (ssl3_add_cert_to_buf(buf, &l, x)) { - X509_STORE_CTX_cleanup(&xs_ctx); - return 0; - } - } - X509_STORE_CTX_cleanup(&xs_ctx); - } - } - /* Thawte special :-) */ - for (i = 0; i < sk_X509_num(s->ctx->extra_certs); i++) { - x = sk_X509_value(s->ctx->extra_certs, i); - if (ssl3_add_cert_to_buf(buf, &l, x)) - return (0); - } + if (!ssl_add_cert_chain(s, cpk, &l)) + return 0; - l -= 7; - p = (unsigned char *)&(buf->data[4]); + l -= 3 + SSL_HM_HEADER_LENGTH(s); + p = ssl_handshake_start(s); l2n3(l, p); l += 3; - p = (unsigned char *)&(buf->data[0]); - *(p++) = SSL3_MT_CERTIFICATE; - l2n3(l, p); - l += 4; - return (l); + ssl_set_handshake_header(s, SSL3_MT_CERTIFICATE, l); + return l + SSL_HM_HEADER_LENGTH(s); } /* @@ -477,17 +406,6 @@ long ssl3_get_message(SSL *s, int st1, int stn, int mt, long max, int *ok) SSLerr(SSL_F_SSL3_GET_MESSAGE, SSL_R_UNEXPECTED_MESSAGE); goto f_err; } - if ((mt < 0) && (*p == SSL3_MT_CLIENT_HELLO) && - (st1 == SSL3_ST_SR_CERT_A) && (stn == SSL3_ST_SR_CERT_B)) { - /* - * At this point we have got an MS SGC second client hello (maybe - * we should always allow the client to start a new handshake?). - * We need to restart the mac. Don't increment - * {num,total}_renegotiations because we have not completed the - * handshake. - */ - ssl3_init_finished_mac(s); - } s->s3->tmp.message_type = *(p++); @@ -578,7 +496,18 @@ int ssl_cert_type(X509 *x, EVP_PKEY *pkey) ret = SSL_PKEY_GOST94; } else if (i == NID_id_GostR3410_2001 || i == NID_id_GostR3410_2001_cc) { ret = SSL_PKEY_GOST01; + } else if (x && (i == EVP_PKEY_DH || i == EVP_PKEY_DHX)) { + /* + * For DH two cases: DH certificate signed with RSA and DH + * certificate signed with DSA. + */ + i = X509_certificate_type(x, pk); + if (i & EVP_PKS_RSA) + ret = SSL_PKEY_DH_RSA; + else if (i & EVP_PKS_DSA) + ret = SSL_PKEY_DH_DSA; } + err: if (!pkey) EVP_PKEY_free(pk); diff --git a/deps/openssl/openssl/ssl/s3_cbc.c b/deps/openssl/openssl/ssl/s3_cbc.c index 598d27edccb5aa..f31dc046f3bf15 100644 --- a/deps/openssl/openssl/ssl/s3_cbc.c +++ b/deps/openssl/openssl/ssl/s3_cbc.c @@ -125,7 +125,7 @@ int tls1_cbc_remove_padding(const SSL *s, unsigned padding_length, good, to_check, i; const unsigned overhead = 1 /* padding length byte */ + mac_size; /* Check if version requires explicit IV */ - if (s->version >= TLS1_1_VERSION || s->version == DTLS1_BAD_VER) { + if (SSL_USE_EXPLICIT_IV(s)) { /* * These lengths are all public so we can test them in non-constant * time. diff --git a/deps/openssl/openssl/ssl/s3_clnt.c b/deps/openssl/openssl/ssl/s3_clnt.c index 0e5acecabb0677..91053d59eab396 100644 --- a/deps/openssl/openssl/ssl/s3_clnt.c +++ b/deps/openssl/openssl/ssl/s3_clnt.c @@ -310,11 +310,11 @@ int ssl3_connect(SSL *s) s->state = SSL3_ST_CR_SESSION_TICKET_A; } #endif - } else + } else { s->state = SSL3_ST_CR_CERT_A; + } s->init_num = 0; break; - case SSL3_ST_CR_CERT_A: case SSL3_ST_CR_CERT_B: /* Check if it is anon DH/ECDH, SRP auth */ @@ -650,6 +650,7 @@ int ssl3_client_hello(SSL *s) unsigned char *p, *d; int i; unsigned long l; + int al = 0; #ifndef OPENSSL_NO_COMP int j; SSL_COMP *comp; @@ -664,15 +665,64 @@ int ssl3_client_hello(SSL *s) if (!ssl_get_new_session(s, 0)) goto err; } + if (s->method->version == DTLS_ANY_VERSION) { + /* Determine which DTLS version to use */ + int options = s->options; + /* If DTLS 1.2 disabled correct the version number */ + if (options & SSL_OP_NO_DTLSv1_2) { + if (tls1_suiteb(s)) { + SSLerr(SSL_F_SSL3_CLIENT_HELLO, + SSL_R_ONLY_DTLS_1_2_ALLOWED_IN_SUITEB_MODE); + goto err; + } + /* + * Disabling all versions is silly: return an error. + */ + if (options & SSL_OP_NO_DTLSv1) { + SSLerr(SSL_F_SSL3_CLIENT_HELLO, SSL_R_WRONG_SSL_VERSION); + goto err; + } + /* + * Update method so we don't use any DTLS 1.2 features. + */ + s->method = DTLSv1_client_method(); + s->version = DTLS1_VERSION; + } else { + /* + * We only support one version: update method + */ + if (options & SSL_OP_NO_DTLSv1) + s->method = DTLSv1_2_client_method(); + s->version = DTLS1_2_VERSION; + } + s->client_version = s->version; + } /* else use the pre-loaded session */ p = s->s3->client_random; - if (ssl_fill_hello_random(s, 0, p, SSL3_RANDOM_SIZE) <= 0) + /* + * for DTLS if client_random is initialized, reuse it, we are + * required to use same upon reply to HelloVerify + */ + if (SSL_IS_DTLS(s)) { + size_t idx; + i = 1; + for (idx = 0; idx < sizeof(s->s3->client_random); idx++) { + if (p[idx]) { + i = 0; + break; + } + } + } else + i = 1; + + if (i && ssl_fill_hello_random(s, 0, p, + sizeof(s->s3->client_random)) <= 0) goto err; /* Do the message type and length last */ - d = p = &(buf[4]); + d = p = ssl_handshake_start(s); /*- * version indicates the negotiated version: for example from @@ -732,6 +782,17 @@ int ssl3_client_hello(SSL *s) p += i; } + /* cookie stuff for DTLS */ + if (SSL_IS_DTLS(s)) { + if (s->d1->cookie_len > sizeof(s->d1->cookie)) { + SSLerr(SSL_F_SSL3_CLIENT_HELLO, ERR_R_INTERNAL_ERROR); + goto err; + } + *(p++) = s->d1->cookie_len; + memcpy(p, s->d1->cookie, s->d1->cookie_len); + p += s->d1->cookie_len; + } + /* Ciphers supported */ i = ssl_cipher_list_to_bytes(s, SSL_get_ciphers(s), &(p[2]), 0); if (i == 0) { @@ -776,27 +837,21 @@ int ssl3_client_hello(SSL *s) goto err; } if ((p = - ssl_add_clienthello_tlsext(s, p, - buf + SSL3_RT_MAX_PLAIN_LENGTH)) == - NULL) { + ssl_add_clienthello_tlsext(s, p, buf + SSL3_RT_MAX_PLAIN_LENGTH, + &al)) == NULL) { + ssl3_send_alert(s, SSL3_AL_FATAL, al); SSLerr(SSL_F_SSL3_CLIENT_HELLO, ERR_R_INTERNAL_ERROR); goto err; } #endif - l = (p - d); - d = buf; - *(d++) = SSL3_MT_CLIENT_HELLO; - l2n3(l, d); - + l = p - d; + ssl_set_handshake_header(s, SSL3_MT_CLIENT_HELLO, l); s->state = SSL3_ST_CW_CLNT_HELLO_B; - /* number of bytes to write */ - s->init_num = p - buf; - s->init_off = 0; } /* SSL3_ST_CW_CLNT_HELLO_B */ - return (ssl3_do_write(s, SSL3_RT_HANDSHAKE)); + return ssl_do_write(s); err: return (-1); } @@ -805,13 +860,20 @@ int ssl3_get_server_hello(SSL *s) { STACK_OF(SSL_CIPHER) *sk; const SSL_CIPHER *c; + CERT *ct = s->cert; unsigned char *p, *d; - int i, al, ok; + int i, al = SSL_AD_INTERNAL_ERROR, ok; unsigned int j; long n; #ifndef OPENSSL_NO_COMP SSL_COMP *comp; #endif + /* + * Hello verify request and/or server hello version may not match so set + * first packet if we're negotiating version. + */ + if (SSL_IS_DTLS(s)) + s->first_packet = 1; n = s->method->ssl_get_message(s, SSL3_ST_CR_SRVR_HELLO_A, @@ -820,7 +882,8 @@ int ssl3_get_server_hello(SSL *s) if (!ok) return ((int)n); - if (SSL_version(s) == DTLS1_VERSION || SSL_version(s) == DTLS1_BAD_VER) { + if (SSL_IS_DTLS(s)) { + s->first_packet = 0; if (s->s3->tmp.message_type == DTLS1_MT_HELLO_VERIFY_REQUEST) { if (s->d1->send_cookie == 0) { s->s3->tmp.reuse_message = 1; @@ -841,6 +904,28 @@ int ssl3_get_server_hello(SSL *s) } d = p = (unsigned char *)s->init_msg; + if (s->method->version == DTLS_ANY_VERSION) { + /* Work out correct protocol version to use */ + int hversion = (p[0] << 8) | p[1]; + int options = s->options; + if (hversion == DTLS1_2_VERSION && !(options & SSL_OP_NO_DTLSv1_2)) + s->method = DTLSv1_2_client_method(); + else if (tls1_suiteb(s)) { + SSLerr(SSL_F_SSL3_GET_SERVER_HELLO, + SSL_R_ONLY_DTLS_1_2_ALLOWED_IN_SUITEB_MODE); + s->version = hversion; + al = SSL_AD_PROTOCOL_VERSION; + goto f_err; + } else if (hversion == DTLS1_VERSION && !(options & SSL_OP_NO_DTLSv1)) + s->method = DTLSv1_client_method(); + else { + SSLerr(SSL_F_SSL3_GET_SERVER_HELLO, SSL_R_WRONG_SSL_VERSION); + s->version = hversion; + al = SSL_AD_PROTOCOL_VERSION; + goto f_err; + } + s->version = s->method->version; + } if ((p[0] != (s->version >> 8)) || (p[1] != (s->version & 0xff))) { SSLerr(SSL_F_SSL3_GET_SERVER_HELLO, SSL_R_WRONG_SSL_VERSION); @@ -904,7 +989,6 @@ int ssl3_get_server_hello(SSL *s) */ if (s->session->session_id_length > 0) { if (!ssl_get_new_session(s, 0)) { - al = SSL_AD_INTERNAL_ERROR; goto f_err; } } @@ -919,21 +1003,16 @@ int ssl3_get_server_hello(SSL *s) SSLerr(SSL_F_SSL3_GET_SERVER_HELLO, SSL_R_UNKNOWN_CIPHER_RETURNED); goto f_err; } - /* TLS v1.2 only ciphersuites require v1.2 or later */ - if ((c->algorithm_ssl & SSL_TLSV1_2) && - (TLS1_get_version(s) < TLS1_2_VERSION)) { - al = SSL_AD_ILLEGAL_PARAMETER; - SSLerr(SSL_F_SSL3_GET_SERVER_HELLO, SSL_R_WRONG_CIPHER_RETURNED); - goto f_err; - } -#ifndef OPENSSL_NO_SRP - if (((c->algorithm_mkey & SSL_kSRP) || (c->algorithm_auth & SSL_aSRP)) && - !(s->srp_ctx.srp_Mask & SSL_kSRP)) { + /* + * If it is a disabled cipher we didn't send it in client hello, so + * return an error. + */ + if (c->algorithm_ssl & ct->mask_ssl || + c->algorithm_mkey & ct->mask_k || c->algorithm_auth & ct->mask_a) { al = SSL_AD_ILLEGAL_PARAMETER; SSLerr(SSL_F_SSL3_GET_SERVER_HELLO, SSL_R_WRONG_CIPHER_RETURNED); goto f_err; } -#endif /* OPENSSL_NO_SRP */ p += ssl_put_cipher_by_char(s, NULL, NULL); sk = ssl_get_ciphers_by_id(s); @@ -966,14 +1045,11 @@ int ssl3_get_server_hello(SSL *s) } s->s3->tmp.new_cipher = c; /* - * Don't digest cached records if TLS v1.2: we may need them for client + * Don't digest cached records if no sigalgs: we may need them for client * authentication. */ - if (TLS1_get_version(s) < TLS1_2_VERSION - && !ssl3_digest_cached_records(s)) { - al = SSL_AD_INTERNAL_ERROR; + if (!SSL_USE_SIGALGS(s) && !ssl3_digest_cached_records(s)) goto f_err; - } /* lets get the compression algorithm */ /* COMPRESSION */ #ifdef OPENSSL_NO_COMP @@ -988,7 +1064,6 @@ int ssl3_get_server_hello(SSL *s) * using compression. */ if (s->session->compress_meth != 0) { - al = SSL_AD_INTERNAL_ERROR; SSLerr(SSL_F_SSL3_GET_SERVER_HELLO, SSL_R_INCONSISTENT_COMPRESSION); goto f_err; } @@ -1021,16 +1096,9 @@ int ssl3_get_server_hello(SSL *s) #ifndef OPENSSL_NO_TLSEXT /* TLS extensions */ - if (s->version >= SSL3_VERSION) { - if (!ssl_parse_serverhello_tlsext(s, &p, d, n, &al)) { - /* 'al' set by ssl_parse_serverhello_tlsext */ - SSLerr(SSL_F_SSL3_GET_SERVER_HELLO, SSL_R_PARSE_TLSEXT); - goto f_err; - } - if (ssl_check_serverhello_tlsext(s) <= 0) { - SSLerr(SSL_F_SSL3_GET_SERVER_HELLO, SSL_R_SERVERHELLO_TLSEXT); - goto err; - } + if (!ssl_parse_serverhello_tlsext(s, &p, d, n)) { + SSLerr(SSL_F_SSL3_GET_SERVER_HELLO, SSL_R_PARSE_TLSEXT); + goto err; } #endif @@ -1192,6 +1260,14 @@ int ssl3_get_server_certificate(SSL *s) } if (need_cert) { + int exp_idx = ssl_cipher_get_cert_index(s->s3->tmp.new_cipher); + if (exp_idx >= 0 && i != exp_idx) { + x = NULL; + al = SSL_AD_ILLEGAL_PARAMETER; + SSLerr(SSL_F_SSL3_GET_SERVER_CERTIFICATE, + SSL_R_WRONG_CERTIFICATE_TYPE); + goto f_err; + } sc->peer_cert_type = i; CRYPTO_add(&x->references, 1, CRYPTO_LOCK_X509); /* @@ -1219,7 +1295,6 @@ int ssl3_get_server_certificate(SSL *s) x = NULL; ret = 1; - if (0) { f_err: ssl3_send_alert(s, SSL3_AL_FATAL, al); @@ -1276,7 +1351,7 @@ int ssl3_get_key_exchange(SSL *s) * Can't skip server key exchange if this is an ephemeral * ciphersuite. */ - if (alg_k & (SSL_kEDH | SSL_kEECDH)) { + if (alg_k & (SSL_kDHE | SSL_kECDHE)) { SSLerr(SSL_F_SSL3_GET_KEY_EXCHANGE, SSL_R_UNEXPECTED_MESSAGE); al = SSL_AD_UNEXPECTED_MESSAGE; goto f_err; @@ -1672,9 +1747,16 @@ int ssl3_get_key_exchange(SSL *s) SSLerr(SSL_F_SSL3_GET_KEY_EXCHANGE, SSL_R_LENGTH_TOO_SHORT); goto f_err; } + /* + * Check curve is one of our preferences, if not server has sent an + * invalid curve. ECParameters is 3 bytes. + */ + if (!tls1_check_curve(s, p, 3)) { + SSLerr(SSL_F_SSL3_GET_KEY_EXCHANGE, SSL_R_WRONG_CURVE); + goto f_err; + } - if ((*p != NAMED_CURVE_TYPE) || - ((curve_nid = tls1_ec_curve_id2nid(*(p + 2))) == 0)) { + if ((curve_nid = tls1_ec_curve_id2nid(*(p + 2))) == 0) { al = SSL_AD_INTERNAL_ERROR; SSLerr(SSL_F_SSL3_GET_KEY_EXCHANGE, SSL_R_UNABLE_TO_FIND_ECDH_PARAMETERS); @@ -1762,29 +1844,16 @@ int ssl3_get_key_exchange(SSL *s) /* if it was signed, check the signature */ if (pkey != NULL) { - if (TLS1_get_version(s) >= TLS1_2_VERSION) { - int sigalg; + if (SSL_USE_SIGALGS(s)) { + int rv; if (2 > n) { SSLerr(SSL_F_SSL3_GET_KEY_EXCHANGE, SSL_R_LENGTH_TOO_SHORT); goto f_err; } - - sigalg = tls12_get_sigid(pkey); - /* Should never happen */ - if (sigalg == -1) { - SSLerr(SSL_F_SSL3_GET_KEY_EXCHANGE, ERR_R_INTERNAL_ERROR); + rv = tls12_check_peer_sigalg(&md, s, p, pkey); + if (rv == -1) goto err; - } - /* Check key type is consistent with signature */ - if (sigalg != (int)p[1]) { - SSLerr(SSL_F_SSL3_GET_KEY_EXCHANGE, - SSL_R_WRONG_SIGNATURE_TYPE); - al = SSL_AD_DECODE_ERROR; - goto f_err; - } - md = tls12_get_hash(p[0]); - if (md == NULL) { - SSLerr(SSL_F_SSL3_GET_KEY_EXCHANGE, SSL_R_UNKNOWN_DIGEST); + else if (rv == 0) { goto f_err; } #ifdef SSL_DEBUG @@ -1812,8 +1881,7 @@ int ssl3_get_key_exchange(SSL *s) goto f_err; } #ifndef OPENSSL_NO_RSA - if (pkey->type == EVP_PKEY_RSA - && TLS1_get_version(s) < TLS1_2_VERSION) { + if (pkey->type == EVP_PKEY_RSA && !SSL_USE_SIGALGS(s)) { int num; unsigned int size; @@ -1863,7 +1931,10 @@ int ssl3_get_key_exchange(SSL *s) } else { /* aNULL, aSRP or kPSK do not need public keys */ if (!(alg_a & (SSL_aNULL | SSL_aSRP)) && !(alg_k & SSL_kPSK)) { - SSLerr(SSL_F_SSL3_GET_KEY_EXCHANGE, ERR_R_INTERNAL_ERROR); + /* Might be wrong key type, check it */ + if (ssl3_check_cert_and_algorithm(s)) + /* Otherwise this shouldn't happen */ + SSLerr(SSL_F_SSL3_GET_KEY_EXCHANGE, ERR_R_INTERNAL_ERROR); goto err; } /* still data left over */ @@ -1955,12 +2026,21 @@ int ssl3_get_certificate_request(SSL *s) /* get the certificate types */ ctype_num = *(p++); - if (ctype_num > SSL3_CT_NUMBER) + if (s->cert->ctypes) { + OPENSSL_free(s->cert->ctypes); + s->cert->ctypes = NULL; + } + if (ctype_num > SSL3_CT_NUMBER) { + /* If we exceed static buffer copy all to cert structure */ + s->cert->ctypes = OPENSSL_malloc(ctype_num); + memcpy(s->cert->ctypes, p, ctype_num); + s->cert->ctype_num = (size_t)ctype_num; ctype_num = SSL3_CT_NUMBER; + } for (i = 0; i < ctype_num; i++) s->s3->tmp.ctype[i] = p[i]; - p += ctype_num; - if (TLS1_get_version(s) >= TLS1_2_VERSION) { + p += p[-1]; + if (SSL_USE_SIGALGS(s)) { n2s(p, llen); /* * Check we have enough room for signature algorithms and following @@ -1972,12 +2052,22 @@ int ssl3_get_certificate_request(SSL *s) SSL_R_DATA_LENGTH_TOO_LONG); goto err; } - if ((llen & 1) || !tls1_process_sigalgs(s, p, llen)) { + /* Clear certificate digests and validity flags */ + for (i = 0; i < SSL_PKEY_NUM; i++) { + s->cert->pkeys[i].digest = NULL; + s->cert->pkeys[i].valid_flags = 0; + } + if ((llen & 1) || !tls1_save_sigalgs(s, p, llen)) { ssl3_send_alert(s, SSL3_AL_FATAL, SSL_AD_DECODE_ERROR); SSLerr(SSL_F_SSL3_GET_CERTIFICATE_REQUEST, SSL_R_SIGNATURE_ALGORITHMS_ERROR); goto err; } + if (!tls1_process_sigalgs(s)) { + ssl3_send_alert(s, SSL3_AL_FATAL, SSL_AD_INTERNAL_ERROR); + SSLerr(SSL_F_SSL3_GET_CERTIFICATE_REQUEST, ERR_R_MALLOC_FAILURE); + goto err; + } p += llen; } @@ -2216,7 +2306,7 @@ int ssl3_get_server_done(SSL *s) int ssl3_send_client_key_exchange(SSL *s) { - unsigned char *p, *d; + unsigned char *p; int n; unsigned long alg_k; #ifndef OPENSSL_NO_RSA @@ -2236,8 +2326,7 @@ int ssl3_send_client_key_exchange(SSL *s) #endif if (s->state == SSL3_ST_CW_KEY_EXCH_A) { - d = (unsigned char *)s->init_buf->data; - p = &(d[4]); + p = ssl_handshake_start(s); alg_k = s->s3->tmp.new_cipher->algorithm_mkey; @@ -2448,33 +2537,56 @@ int ssl3_send_client_key_exchange(SSL *s) #ifndef OPENSSL_NO_DH else if (alg_k & (SSL_kEDH | SSL_kDHr | SSL_kDHd)) { DH *dh_srvr, *dh_clnt; + SESS_CERT *scert = s->session->sess_cert; - if (s->session->sess_cert == NULL) { + if (scert == NULL) { ssl3_send_alert(s, SSL3_AL_FATAL, SSL_AD_UNEXPECTED_MESSAGE); SSLerr(SSL_F_SSL3_SEND_CLIENT_KEY_EXCHANGE, SSL_R_UNEXPECTED_MESSAGE); goto err; } - if (s->session->sess_cert->peer_dh_tmp != NULL) - dh_srvr = s->session->sess_cert->peer_dh_tmp; + if (scert->peer_dh_tmp != NULL) + dh_srvr = scert->peer_dh_tmp; else { /* we get them from the cert */ - ssl3_send_alert(s, SSL3_AL_FATAL, SSL_AD_HANDSHAKE_FAILURE); - SSLerr(SSL_F_SSL3_SEND_CLIENT_KEY_EXCHANGE, - SSL_R_UNABLE_TO_FIND_DH_PARAMETERS); - goto err; - } - - /* generate a new random key */ - if ((dh_clnt = DHparams_dup(dh_srvr)) == NULL) { - SSLerr(SSL_F_SSL3_SEND_CLIENT_KEY_EXCHANGE, ERR_R_DH_LIB); - goto err; + int idx = scert->peer_cert_type; + EVP_PKEY *spkey = NULL; + dh_srvr = NULL; + if (idx >= 0) + spkey = X509_get_pubkey(scert->peer_pkeys[idx].x509); + if (spkey) { + dh_srvr = EVP_PKEY_get1_DH(spkey); + EVP_PKEY_free(spkey); + } + if (dh_srvr == NULL) { + SSLerr(SSL_F_SSL3_SEND_CLIENT_KEY_EXCHANGE, + ERR_R_INTERNAL_ERROR); + goto err; + } } - if (!DH_generate_key(dh_clnt)) { - SSLerr(SSL_F_SSL3_SEND_CLIENT_KEY_EXCHANGE, ERR_R_DH_LIB); - DH_free(dh_clnt); - goto err; + if (s->s3->flags & TLS1_FLAGS_SKIP_CERT_VERIFY) { + /* Use client certificate key */ + EVP_PKEY *clkey = s->cert->key->privatekey; + dh_clnt = NULL; + if (clkey) + dh_clnt = EVP_PKEY_get1_DH(clkey); + if (dh_clnt == NULL) { + SSLerr(SSL_F_SSL3_SEND_CLIENT_KEY_EXCHANGE, + ERR_R_INTERNAL_ERROR); + goto err; + } + } else { + /* generate a new random key */ + if ((dh_clnt = DHparams_dup(dh_srvr)) == NULL) { + SSLerr(SSL_F_SSL3_SEND_CLIENT_KEY_EXCHANGE, ERR_R_DH_LIB); + goto err; + } + if (!DH_generate_key(dh_clnt)) { + SSLerr(SSL_F_SSL3_SEND_CLIENT_KEY_EXCHANGE, ERR_R_DH_LIB); + DH_free(dh_clnt); + goto err; + } } /* @@ -2483,6 +2595,8 @@ int ssl3_send_client_key_exchange(SSL *s) */ n = DH_compute_key(p, dh_srvr->pub_key, dh_clnt); + if (scert->peer_dh_tmp == NULL) + DH_free(dh_srvr); if (n <= 0) { SSLerr(SSL_F_SSL3_SEND_CLIENT_KEY_EXCHANGE, ERR_R_DH_LIB); @@ -2499,11 +2613,15 @@ int ssl3_send_client_key_exchange(SSL *s) /* clean up */ memset(p, 0, n); - /* send off the data */ - n = BN_num_bytes(dh_clnt->pub_key); - s2n(n, p); - BN_bn2bin(dh_clnt->pub_key, p); - n += 2; + if (s->s3->flags & TLS1_FLAGS_SKIP_CERT_VERIFY) + n = 0; + else { + /* send off the data */ + n = BN_num_bytes(dh_clnt->pub_key); + s2n(n, p); + BN_bn2bin(dh_clnt->pub_key, p); + n += 2; + } DH_free(dh_clnt); @@ -2928,17 +3046,12 @@ int ssl3_send_client_key_exchange(SSL *s) goto err; } - *(d++) = SSL3_MT_CLIENT_KEY_EXCHANGE; - l2n3(n, d); - + ssl_set_handshake_header(s, SSL3_MT_CLIENT_KEY_EXCHANGE, n); s->state = SSL3_ST_CW_KEY_EXCH_B; - /* number of bytes to write */ - s->init_num = n + 4; - s->init_off = 0; } /* SSL3_ST_CW_KEY_EXCH_B */ - return (ssl3_do_write(s, SSL3_RT_HANDSHAKE)); + return ssl_do_write(s); err: #ifndef OPENSSL_NO_ECDH BN_CTX_free(bn_ctx); @@ -2953,7 +3066,7 @@ int ssl3_send_client_key_exchange(SSL *s) int ssl3_send_client_verify(SSL *s) { - unsigned char *p, *d; + unsigned char *p; unsigned char data[MD5_DIGEST_LENGTH + SHA_DIGEST_LENGTH]; EVP_PKEY *pkey; EVP_PKEY_CTX *pctx = NULL; @@ -2965,14 +3078,13 @@ int ssl3_send_client_verify(SSL *s) EVP_MD_CTX_init(&mctx); if (s->state == SSL3_ST_CW_CERT_VRFY_A) { - d = (unsigned char *)s->init_buf->data; - p = &(d[4]); + p = ssl_handshake_start(s); pkey = s->cert->key->privatekey; /* Create context from key and test if sha1 is allowed as digest */ pctx = EVP_PKEY_CTX_new(pkey, NULL); EVP_PKEY_sign_init(pctx); if (EVP_PKEY_CTX_set_signature_md(pctx, EVP_sha1()) > 0) { - if (TLS1_get_version(s) < TLS1_2_VERSION) + if (!SSL_USE_SIGALGS(s)) s->method->ssl3_enc->cert_verify_mac(s, NID_sha1, &(data @@ -2984,7 +3096,7 @@ int ssl3_send_client_verify(SSL *s) * For TLS v1.2 send signature algorithm and signature using agreed * digest and cached handshake records. */ - if (TLS1_get_version(s) >= TLS1_2_VERSION) { + if (SSL_USE_SIGALGS(s)) { long hdatalen = 0; void *hdata; const EVP_MD *md = s->cert->key->digest; @@ -3068,36 +3180,86 @@ int ssl3_send_client_verify(SSL *s) SSLerr(SSL_F_SSL3_SEND_CLIENT_VERIFY, ERR_R_INTERNAL_ERROR); goto err; } - *(d++) = SSL3_MT_CERTIFICATE_VERIFY; - l2n3(n, d); - + ssl_set_handshake_header(s, SSL3_MT_CERTIFICATE_VERIFY, n); s->state = SSL3_ST_CW_CERT_VRFY_B; - s->init_num = (int)n + 4; - s->init_off = 0; } EVP_MD_CTX_cleanup(&mctx); EVP_PKEY_CTX_free(pctx); - return (ssl3_do_write(s, SSL3_RT_HANDSHAKE)); + return ssl_do_write(s); err: EVP_MD_CTX_cleanup(&mctx); EVP_PKEY_CTX_free(pctx); return (-1); } +/* + * Check a certificate can be used for client authentication. Currently check + * cert exists, if we have a suitable digest for TLS 1.2 if static DH client + * certificates can be used and optionally checks suitability for Suite B. + */ +static int ssl3_check_client_certificate(SSL *s) +{ + unsigned long alg_k; + if (!s->cert || !s->cert->key->x509 || !s->cert->key->privatekey) + return 0; + /* If no suitable signature algorithm can't use certificate */ + if (SSL_USE_SIGALGS(s) && !s->cert->key->digest) + return 0; + /* + * If strict mode check suitability of chain before using it. This also + * adjusts suite B digest if necessary. + */ + if (s->cert->cert_flags & SSL_CERT_FLAGS_CHECK_TLS_STRICT && + !tls1_check_chain(s, NULL, NULL, NULL, -2)) + return 0; + alg_k = s->s3->tmp.new_cipher->algorithm_mkey; + /* See if we can use client certificate for fixed DH */ + if (alg_k & (SSL_kDHr | SSL_kDHd)) { + SESS_CERT *scert = s->session->sess_cert; + int i = scert->peer_cert_type; + EVP_PKEY *clkey = NULL, *spkey = NULL; + clkey = s->cert->key->privatekey; + /* If client key not DH assume it can be used */ + if (EVP_PKEY_id(clkey) != EVP_PKEY_DH) + return 1; + if (i >= 0) + spkey = X509_get_pubkey(scert->peer_pkeys[i].x509); + if (spkey) { + /* Compare server and client parameters */ + i = EVP_PKEY_cmp_parameters(clkey, spkey); + EVP_PKEY_free(spkey); + if (i != 1) + return 0; + } + s->s3->flags |= TLS1_FLAGS_SKIP_CERT_VERIFY; + } + return 1; +} + int ssl3_send_client_certificate(SSL *s) { X509 *x509 = NULL; EVP_PKEY *pkey = NULL; int i; - unsigned long l; if (s->state == SSL3_ST_CW_CERT_A) { - if ((s->cert == NULL) || - (s->cert->key->x509 == NULL) || - (s->cert->key->privatekey == NULL)) - s->state = SSL3_ST_CW_CERT_B; - else + /* Let cert callback update client certificates if required */ + if (s->cert->cert_cb) { + i = s->cert->cert_cb(s, s->cert->cert_cb_arg); + if (i < 0) { + s->rwstate = SSL_X509_LOOKUP; + return -1; + } + if (i == 0) { + ssl3_send_alert(s, SSL3_AL_FATAL, SSL_AD_INTERNAL_ERROR); + return 0; + } + s->rwstate = SSL_NOTHING; + } + if (ssl3_check_client_certificate(s)) s->state = SSL3_ST_CW_CERT_C; + else + s->state = SSL3_ST_CW_CERT_B; } /* We need to get a client cert */ @@ -3127,6 +3289,8 @@ int ssl3_send_client_certificate(SSL *s) X509_free(x509); if (pkey != NULL) EVP_PKEY_free(pkey); + if (i && !ssl3_check_client_certificate(s)) + i = 0; if (i == 0) { if (s->version == SSL3_VERSION) { s->s3->tmp.cert_req = 0; @@ -3143,19 +3307,16 @@ int ssl3_send_client_certificate(SSL *s) if (s->state == SSL3_ST_CW_CERT_C) { s->state = SSL3_ST_CW_CERT_D; - l = ssl3_output_cert_chain(s, - (s->s3->tmp.cert_req == - 2) ? NULL : s->cert->key->x509); - if (!l) { + if (!ssl3_output_cert_chain(s, + (s->s3->tmp.cert_req == + 2) ? NULL : s->cert->key)) { SSLerr(SSL_F_SSL3_SEND_CLIENT_CERTIFICATE, ERR_R_INTERNAL_ERROR); ssl3_send_alert(s, SSL3_AL_FATAL, SSL_AD_INTERNAL_ERROR); return 0; } - s->init_num = (int)l; - s->init_off = 0; } /* SSL3_ST_CW_CERT_D */ - return (ssl3_do_write(s, SSL3_RT_HANDSHAKE)); + return ssl_do_write(s); } #define has_bits(i,m) (((i)&(m)) == (m)) @@ -3177,7 +3338,7 @@ int ssl3_check_cert_and_algorithm(SSL *s) alg_a = s->s3->tmp.new_cipher->algorithm_auth; /* we don't have a certificate */ - if ((alg_a & (SSL_aDH | SSL_aNULL | SSL_aKRB5)) || (alg_k & SSL_kPSK)) + if ((alg_a & (SSL_aNULL | SSL_aKRB5)) || (alg_k & SSL_kPSK)) return (1); sc = s->session->sess_cert; @@ -3204,6 +3365,13 @@ int ssl3_check_cert_and_algorithm(SSL *s) } else { return 1; } + } else if (alg_a & SSL_aECDSA) { + SSLerr(SSL_F_SSL3_CHECK_CERT_AND_ALGORITHM, + SSL_R_MISSING_ECDSA_SIGNING_CERT); + goto f_err; + } else if (alg_k & (SSL_kECDHr | SSL_kECDHe)) { + SSLerr(SSL_F_SSL3_CHECK_CERT_AND_ALGORITHM, SSL_R_MISSING_ECDH_CERT); + goto f_err; } #endif pkey = X509_get_pubkey(sc->peer_pkeys[idx].x509); @@ -3236,13 +3404,15 @@ int ssl3_check_cert_and_algorithm(SSL *s) !(has_bits(i, EVP_PK_DH | EVP_PKT_EXCH) || (dh != NULL))) { SSLerr(SSL_F_SSL3_CHECK_CERT_AND_ALGORITHM, SSL_R_MISSING_DH_KEY); goto f_err; - } else if ((alg_k & SSL_kDHr) && !has_bits(i, EVP_PK_DH | EVP_PKS_RSA)) { + } else if ((alg_k & SSL_kDHr) && !SSL_USE_SIGALGS(s) && + !has_bits(i, EVP_PK_DH | EVP_PKS_RSA)) { SSLerr(SSL_F_SSL3_CHECK_CERT_AND_ALGORITHM, SSL_R_MISSING_DH_RSA_CERT); goto f_err; } # ifndef OPENSSL_NO_DSA - else if ((alg_k & SSL_kDHd) && !has_bits(i, EVP_PK_DH | EVP_PKS_DSA)) { + else if ((alg_k & SSL_kDHd) && !SSL_USE_SIGALGS(s) && + !has_bits(i, EVP_PK_DH | EVP_PKS_DSA)) { SSLerr(SSL_F_SSL3_CHECK_CERT_AND_ALGORITHM, SSL_R_MISSING_DH_DSA_CERT); goto f_err; diff --git a/deps/openssl/openssl/ssl/s3_enc.c b/deps/openssl/openssl/ssl/s3_enc.c index 152f40db582b8b..cda2d8c77603c7 100644 --- a/deps/openssl/openssl/ssl/s3_enc.c +++ b/deps/openssl/openssl/ssl/s3_enc.c @@ -374,6 +374,23 @@ int ssl3_change_cipher_state(SSL *s, int which) EVP_CipherInit_ex(dd, c, NULL, key, iv, (which & SSL3_CC_WRITE)); +#ifdef OPENSSL_SSL_TRACE_CRYPTO + if (s->msg_callback) { + + int wh = which & SSL3_CC_WRITE ? + TLS1_RT_CRYPTO_WRITE : TLS1_RT_CRYPTO_READ; + s->msg_callback(2, s->version, wh | TLS1_RT_CRYPTO_MAC, + mac_secret, EVP_MD_size(m), s, s->msg_callback_arg); + if (c->key_len) + s->msg_callback(2, s->version, wh | TLS1_RT_CRYPTO_KEY, + key, c->key_len, s, s->msg_callback_arg); + if (k) { + s->msg_callback(2, s->version, wh | TLS1_RT_CRYPTO_IV, + iv, k, s, s->msg_callback_arg); + } + } +#endif + OPENSSL_cleanse(&(exp_key[0]), sizeof(exp_key)); OPENSSL_cleanse(&(exp_iv[0]), sizeof(exp_iv)); EVP_MD_CTX_cleanup(&md); @@ -823,6 +840,9 @@ int ssl3_generate_master_secret(SSL *s, unsigned char *out, unsigned char *p, EVP_MD_CTX ctx; int i, ret = 0; unsigned int n; +#ifdef OPENSSL_SSL_TRACE_CRYPTO + unsigned char *tmpout = out; +#endif EVP_MD_CTX_init(&ctx); for (i = 0; i < 3; i++) { @@ -841,6 +861,22 @@ int ssl3_generate_master_secret(SSL *s, unsigned char *out, unsigned char *p, ret += n; } EVP_MD_CTX_cleanup(&ctx); + +#ifdef OPENSSL_SSL_TRACE_CRYPTO + if (s->msg_callback) { + s->msg_callback(2, s->version, TLS1_RT_CRYPTO_PREMASTER, + p, len, s, s->msg_callback_arg); + s->msg_callback(2, s->version, TLS1_RT_CRYPTO_CLIENT_RANDOM, + s->s3->client_random, SSL3_RANDOM_SIZE, + s, s->msg_callback_arg); + s->msg_callback(2, s->version, TLS1_RT_CRYPTO_SERVER_RANDOM, + s->s3->server_random, SSL3_RANDOM_SIZE, + s, s->msg_callback_arg); + s->msg_callback(2, s->version, TLS1_RT_CRYPTO_MASTER, + tmpout, SSL3_MASTER_SECRET_SIZE, + s, s->msg_callback_arg); + } +#endif OPENSSL_cleanse(buf, sizeof buf); return (ret); } diff --git a/deps/openssl/openssl/ssl/s3_lib.c b/deps/openssl/openssl/ssl/s3_lib.c index de917d3f836ff2..28129f68d9e694 100644 --- a/deps/openssl/openssl/ssl/s3_lib.c +++ b/deps/openssl/openssl/ssl/s3_lib.c @@ -152,11 +152,6 @@ #include #include "ssl_locl.h" #include "kssl_lcl.h" -#ifndef OPENSSL_NO_TLSEXT -# ifndef OPENSSL_NO_EC -# include "../crypto/ec/ec_lcl.h" -# endif /* OPENSSL_NO_EC */ -#endif /* OPENSSL_NO_TLSEXT */ #include #ifndef OPENSSL_NO_DH # include @@ -335,7 +330,7 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[] = { /* The DH ciphers */ /* Cipher 0B */ { - 0, + 1, SSL3_TXT_DH_DSS_DES_40_CBC_SHA, SSL3_CK_DH_DSS_DES_40_CBC_SHA, SSL_kDHd, @@ -351,7 +346,7 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[] = { /* Cipher 0C */ { - 0, /* not implemented (non-ephemeral DH) */ + 1, SSL3_TXT_DH_DSS_DES_64_CBC_SHA, SSL3_CK_DH_DSS_DES_64_CBC_SHA, SSL_kDHd, @@ -367,7 +362,7 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[] = { /* Cipher 0D */ { - 0, /* not implemented (non-ephemeral DH) */ + 1, SSL3_TXT_DH_DSS_DES_192_CBC3_SHA, SSL3_CK_DH_DSS_DES_192_CBC3_SHA, SSL_kDHd, @@ -383,7 +378,7 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[] = { /* Cipher 0E */ { - 0, /* not implemented (non-ephemeral DH) */ + 1, SSL3_TXT_DH_RSA_DES_40_CBC_SHA, SSL3_CK_DH_RSA_DES_40_CBC_SHA, SSL_kDHr, @@ -399,7 +394,7 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[] = { /* Cipher 0F */ { - 0, /* not implemented (non-ephemeral DH) */ + 1, SSL3_TXT_DH_RSA_DES_64_CBC_SHA, SSL3_CK_DH_RSA_DES_64_CBC_SHA, SSL_kDHr, @@ -415,7 +410,7 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[] = { /* Cipher 10 */ { - 0, /* not implemented (non-ephemeral DH) */ + 1, SSL3_TXT_DH_RSA_DES_192_CBC3_SHA, SSL3_CK_DH_RSA_DES_192_CBC3_SHA, SSL_kDHr, @@ -902,7 +897,7 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[] = { }, /* Cipher 30 */ { - 0, + 1, TLS1_TXT_DH_DSS_WITH_AES_128_SHA, TLS1_CK_DH_DSS_WITH_AES_128_SHA, SSL_kDHd, @@ -917,7 +912,7 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[] = { }, /* Cipher 31 */ { - 0, + 1, TLS1_TXT_DH_RSA_WITH_AES_128_SHA, TLS1_CK_DH_RSA_WITH_AES_128_SHA, SSL_kDHr, @@ -993,7 +988,7 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[] = { }, /* Cipher 36 */ { - 0, + 1, TLS1_TXT_DH_DSS_WITH_AES_256_SHA, TLS1_CK_DH_DSS_WITH_AES_256_SHA, SSL_kDHd, @@ -1009,7 +1004,7 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[] = { /* Cipher 37 */ { - 0, /* not implemented (non-ephemeral DH) */ + 1, TLS1_TXT_DH_RSA_WITH_AES_256_SHA, TLS1_CK_DH_RSA_WITH_AES_256_SHA, SSL_kDHr, @@ -1122,7 +1117,7 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[] = { /* Cipher 3E */ { - 0, /* not implemented (non-ephemeral DH) */ + 1, TLS1_TXT_DH_DSS_WITH_AES_128_SHA256, TLS1_CK_DH_DSS_WITH_AES_128_SHA256, SSL_kDHd, @@ -1138,7 +1133,7 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[] = { /* Cipher 3F */ { - 0, /* not implemented (non-ephemeral DH) */ + 1, TLS1_TXT_DH_RSA_WITH_AES_128_SHA256, TLS1_CK_DH_RSA_WITH_AES_128_SHA256, SSL_kDHr, @@ -1189,7 +1184,7 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[] = { /* Cipher 42 */ { - 0, /* not implemented (non-ephemeral DH) */ + 1, TLS1_TXT_DH_DSS_WITH_CAMELLIA_128_CBC_SHA, TLS1_CK_DH_DSS_WITH_CAMELLIA_128_CBC_SHA, SSL_kDHd, @@ -1205,7 +1200,7 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[] = { /* Cipher 43 */ { - 0, /* not implemented (non-ephemeral DH) */ + 1, TLS1_TXT_DH_RSA_WITH_CAMELLIA_128_CBC_SHA, TLS1_CK_DH_RSA_WITH_CAMELLIA_128_CBC_SHA, SSL_kDHr, @@ -1404,7 +1399,7 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[] = { /* Cipher 68 */ { - 0, /* not implemented (non-ephemeral DH) */ + 1, TLS1_TXT_DH_DSS_WITH_AES_256_SHA256, TLS1_CK_DH_DSS_WITH_AES_256_SHA256, SSL_kDHd, @@ -1420,7 +1415,7 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[] = { /* Cipher 69 */ { - 0, /* not implemented (non-ephemeral DH) */ + 1, TLS1_TXT_DH_RSA_WITH_AES_256_SHA256, TLS1_CK_DH_RSA_WITH_AES_256_SHA256, SSL_kDHr, @@ -1573,7 +1568,7 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[] = { }, /* Cipher 85 */ { - 0, /* not implemented (non-ephemeral DH) */ + 1, TLS1_TXT_DH_DSS_WITH_CAMELLIA_256_CBC_SHA, TLS1_CK_DH_DSS_WITH_CAMELLIA_256_CBC_SHA, SSL_kDHd, @@ -1589,7 +1584,7 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[] = { /* Cipher 86 */ { - 0, /* not implemented (non-ephemeral DH) */ + 1, TLS1_TXT_DH_RSA_WITH_CAMELLIA_256_CBC_SHA, TLS1_CK_DH_RSA_WITH_CAMELLIA_256_CBC_SHA, SSL_kDHr, @@ -1739,7 +1734,7 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[] = { /* Cipher 97 */ { - 0, /* not implemented (non-ephemeral DH) */ + 1, TLS1_TXT_DH_DSS_WITH_SEED_SHA, TLS1_CK_DH_DSS_WITH_SEED_SHA, SSL_kDHd, @@ -1755,7 +1750,7 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[] = { /* Cipher 98 */ { - 0, /* not implemented (non-ephemeral DH) */ + 1, TLS1_TXT_DH_RSA_WITH_SEED_SHA, TLS1_CK_DH_RSA_WITH_SEED_SHA, SSL_kDHr, @@ -1887,7 +1882,7 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[] = { /* Cipher A0 */ { - 0, + 1, TLS1_TXT_DH_RSA_WITH_AES_128_GCM_SHA256, TLS1_CK_DH_RSA_WITH_AES_128_GCM_SHA256, SSL_kDHr, @@ -1903,7 +1898,7 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[] = { /* Cipher A1 */ { - 0, + 1, TLS1_TXT_DH_RSA_WITH_AES_256_GCM_SHA384, TLS1_CK_DH_RSA_WITH_AES_256_GCM_SHA384, SSL_kDHr, @@ -1951,7 +1946,7 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[] = { /* Cipher A4 */ { - 0, + 1, TLS1_TXT_DH_DSS_WITH_AES_128_GCM_SHA256, TLS1_CK_DH_DSS_WITH_AES_128_GCM_SHA256, SSL_kDHd, @@ -1967,7 +1962,7 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[] = { /* Cipher A5 */ { - 0, + 1, TLS1_TXT_DH_DSS_WITH_AES_256_GCM_SHA384, TLS1_CK_DH_DSS_WITH_AES_256_GCM_SHA384, SSL_kDHd, @@ -2012,6 +2007,21 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[] = { 256, 256, }, +#ifdef OPENSSL_SSL_DEBUG_BROKEN_PROTOCOL + { + 1, + "SCSV", + SSL3_CK_SCSV, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0}, +#endif #ifndef OPENSSL_NO_ECDH /* Cipher C001 */ @@ -2899,6 +2909,10 @@ SSL3_ENC_METHOD SSLv3_enc_data = { (int (*)(SSL *, unsigned char *, size_t, const char *, size_t, const unsigned char *, size_t, int use_context))ssl_undefined_function, + 0, + SSL3_HM_HEADER_LENGTH, + ssl3_set_handshake_header, + ssl3_handshake_write }; long ssl3_default_timeout(void) @@ -2932,6 +2946,20 @@ int ssl3_pending(const SSL *s) SSL3_RT_APPLICATION_DATA) ? s->s3->rrec.length : 0; } +void ssl3_set_handshake_header(SSL *s, int htype, unsigned long len) +{ + unsigned char *p = (unsigned char *)s->init_buf->data; + *(p++) = htype; + l2n3(len, p); + s->init_num = (int)len + SSL3_HM_HEADER_LENGTH; + s->init_off = 0; +} + +int ssl3_handshake_write(SSL *s) +{ + return ssl3_do_write(s, SSL3_RT_HANDSHAKE); +} + int ssl3_new(SSL *s) { SSL3_STATE *s3; @@ -2988,6 +3016,11 @@ void ssl3_free(SSL *s) } if (s->s3->handshake_dgst) ssl3_free_digest_list(s); +#ifndef OPENSSL_NO_TLSEXT + if (s->s3->alpn_selected) + OPENSSL_free(s->s3->alpn_selected); +#endif + #ifndef OPENSSL_NO_SRP SSL_SRP_CTX_free(s); #endif @@ -3049,6 +3082,12 @@ void ssl3_clear(SSL *s) if (s->s3->handshake_dgst) { ssl3_free_digest_list(s); } +#if !defined(OPENSSL_NO_TLSEXT) + if (s->s3->alpn_selected) { + free(s->s3->alpn_selected); + s->s3->alpn_selected = NULL; + } +#endif memset(s->s3, 0, sizeof *s->s3); s->s3->rbuf.buf = rp; s->s3->wbuf.buf = wp; @@ -3081,6 +3120,9 @@ static char *MS_CALLBACK srp_password_from_info_cb(SSL *s, void *arg) } #endif +static int ssl3_set_req_cert_type(CERT *c, const unsigned char *p, + size_t len); + long ssl3_ctrl(SSL *s, int cmd, long larg, void *parg) { int ret = 0; @@ -3309,8 +3351,7 @@ long ssl3_ctrl(SSL *s, int cmd, long larg, void *parg) # ifndef OPENSSL_NO_HEARTBEATS case SSL_CTRL_TLS_EXT_SEND_HEARTBEAT: - if (SSL_version(s) == DTLS1_VERSION - || SSL_version(s) == DTLS1_BAD_VER) + if (SSL_IS_DTLS(s)) ret = dtls1_heartbeat(s); else ret = tls1_heartbeat(s); @@ -3331,6 +3372,196 @@ long ssl3_ctrl(SSL *s, int cmd, long larg, void *parg) #endif /* !OPENSSL_NO_TLSEXT */ + case SSL_CTRL_CHAIN: + if (larg) + return ssl_cert_set1_chain(s->cert, (STACK_OF(X509) *)parg); + else + return ssl_cert_set0_chain(s->cert, (STACK_OF(X509) *)parg); + + case SSL_CTRL_CHAIN_CERT: + if (larg) + return ssl_cert_add1_chain_cert(s->cert, (X509 *)parg); + else + return ssl_cert_add0_chain_cert(s->cert, (X509 *)parg); + + case SSL_CTRL_GET_CHAIN_CERTS: + *(STACK_OF(X509) **)parg = s->cert->key->chain; + break; + + case SSL_CTRL_SELECT_CURRENT_CERT: + return ssl_cert_select_current(s->cert, (X509 *)parg); + + case SSL_CTRL_SET_CURRENT_CERT: + if (larg == SSL_CERT_SET_SERVER) { + CERT_PKEY *cpk; + const SSL_CIPHER *cipher; + if (!s->server) + return 0; + cipher = s->s3->tmp.new_cipher; + if (!cipher) + return 0; + /* + * No certificate for unauthenticated ciphersuites or using SRP + * authentication + */ + if (cipher->algorithm_auth & (SSL_aNULL | SSL_aSRP)) + return 2; + cpk = ssl_get_server_send_pkey(s); + if (!cpk) + return 0; + s->cert->key = cpk; + return 1; + } + return ssl_cert_set_current(s->cert, larg); + +#ifndef OPENSSL_NO_EC + case SSL_CTRL_GET_CURVES: + { + unsigned char *clist; + size_t clistlen; + if (!s->session) + return 0; + clist = s->session->tlsext_ellipticcurvelist; + clistlen = s->session->tlsext_ellipticcurvelist_length / 2; + if (parg) { + size_t i; + int *cptr = parg; + unsigned int cid, nid; + for (i = 0; i < clistlen; i++) { + n2s(clist, cid); + nid = tls1_ec_curve_id2nid(cid); + if (nid != 0) + cptr[i] = nid; + else + cptr[i] = TLSEXT_nid_unknown | cid; + } + } + return (int)clistlen; + } + + case SSL_CTRL_SET_CURVES: + return tls1_set_curves(&s->tlsext_ellipticcurvelist, + &s->tlsext_ellipticcurvelist_length, + parg, larg); + + case SSL_CTRL_SET_CURVES_LIST: + return tls1_set_curves_list(&s->tlsext_ellipticcurvelist, + &s->tlsext_ellipticcurvelist_length, + parg); + + case SSL_CTRL_GET_SHARED_CURVE: + return tls1_shared_curve(s, larg); + +# ifndef OPENSSL_NO_ECDH + case SSL_CTRL_SET_ECDH_AUTO: + s->cert->ecdh_tmp_auto = larg; + return 1; +# endif +#endif + case SSL_CTRL_SET_SIGALGS: + return tls1_set_sigalgs(s->cert, parg, larg, 0); + + case SSL_CTRL_SET_SIGALGS_LIST: + return tls1_set_sigalgs_list(s->cert, parg, 0); + + case SSL_CTRL_SET_CLIENT_SIGALGS: + return tls1_set_sigalgs(s->cert, parg, larg, 1); + + case SSL_CTRL_SET_CLIENT_SIGALGS_LIST: + return tls1_set_sigalgs_list(s->cert, parg, 1); + + case SSL_CTRL_GET_CLIENT_CERT_TYPES: + { + const unsigned char **pctype = parg; + if (s->server || !s->s3->tmp.cert_req) + return 0; + if (s->cert->ctypes) { + if (pctype) + *pctype = s->cert->ctypes; + return (int)s->cert->ctype_num; + } + if (pctype) + *pctype = (unsigned char *)s->s3->tmp.ctype; + return s->s3->tmp.ctype_num; + } + + case SSL_CTRL_SET_CLIENT_CERT_TYPES: + if (!s->server) + return 0; + return ssl3_set_req_cert_type(s->cert, parg, larg); + + case SSL_CTRL_BUILD_CERT_CHAIN: + return ssl_build_cert_chain(s->cert, s->ctx->cert_store, larg); + + case SSL_CTRL_SET_VERIFY_CERT_STORE: + return ssl_cert_set_cert_store(s->cert, parg, 0, larg); + + case SSL_CTRL_SET_CHAIN_CERT_STORE: + return ssl_cert_set_cert_store(s->cert, parg, 1, larg); + + case SSL_CTRL_GET_PEER_SIGNATURE_NID: + if (SSL_USE_SIGALGS(s)) { + if (s->session && s->session->sess_cert) { + const EVP_MD *sig; + sig = s->session->sess_cert->peer_key->digest; + if (sig) { + *(int *)parg = EVP_MD_type(sig); + return 1; + } + } + return 0; + } + /* Might want to do something here for other versions */ + else + return 0; + + case SSL_CTRL_GET_SERVER_TMP_KEY: + if (s->server || !s->session || !s->session->sess_cert) + return 0; + else { + SESS_CERT *sc; + EVP_PKEY *ptmp; + int rv = 0; + sc = s->session->sess_cert; +#if !defined(OPENSSL_NO_RSA) && !defined(OPENSSL_NO_DH) && !defined(OPENSSL_NO_EC) && !defined(OPENSSL_NO_ECDH) + if (!sc->peer_rsa_tmp && !sc->peer_dh_tmp && !sc->peer_ecdh_tmp) + return 0; +#endif + ptmp = EVP_PKEY_new(); + if (!ptmp) + return 0; + if (0) ; +#ifndef OPENSSL_NO_RSA + else if (sc->peer_rsa_tmp) + rv = EVP_PKEY_set1_RSA(ptmp, sc->peer_rsa_tmp); +#endif +#ifndef OPENSSL_NO_DH + else if (sc->peer_dh_tmp) + rv = EVP_PKEY_set1_DH(ptmp, sc->peer_dh_tmp); +#endif +#ifndef OPENSSL_NO_ECDH + else if (sc->peer_ecdh_tmp) + rv = EVP_PKEY_set1_EC_KEY(ptmp, sc->peer_ecdh_tmp); +#endif + if (rv) { + *(EVP_PKEY **)parg = ptmp; + return 1; + } + EVP_PKEY_free(ptmp); + return 0; + } +#ifndef OPENSSL_NO_EC + case SSL_CTRL_GET_EC_POINT_FORMATS: + { + SSL_SESSION *sess = s->session; + const unsigned char **pformat = parg; + if (!sess || !sess->tlsext_ecpointformatlist) + return 0; + *pformat = sess->tlsext_ecpointformatlist; + return (int)sess->tlsext_ecpointformatlist_length; + } +#endif + case SSL_CTRL_CHECK_PROTO_VERSION: /* * For library-internal use; checks that the current protocol is the @@ -3606,6 +3837,47 @@ long ssl3_ctx_ctrl(SSL_CTX *ctx, int cmd, long larg, void *parg) ctx->srp_ctx.strength = larg; break; # endif + +# ifndef OPENSSL_NO_EC + case SSL_CTRL_SET_CURVES: + return tls1_set_curves(&ctx->tlsext_ellipticcurvelist, + &ctx->tlsext_ellipticcurvelist_length, + parg, larg); + + case SSL_CTRL_SET_CURVES_LIST: + return tls1_set_curves_list(&ctx->tlsext_ellipticcurvelist, + &ctx->tlsext_ellipticcurvelist_length, + parg); +# ifndef OPENSSL_NO_ECDH + case SSL_CTRL_SET_ECDH_AUTO: + ctx->cert->ecdh_tmp_auto = larg; + return 1; +# endif +# endif + case SSL_CTRL_SET_SIGALGS: + return tls1_set_sigalgs(ctx->cert, parg, larg, 0); + + case SSL_CTRL_SET_SIGALGS_LIST: + return tls1_set_sigalgs_list(ctx->cert, parg, 0); + + case SSL_CTRL_SET_CLIENT_SIGALGS: + return tls1_set_sigalgs(ctx->cert, parg, larg, 1); + + case SSL_CTRL_SET_CLIENT_SIGALGS_LIST: + return tls1_set_sigalgs_list(ctx->cert, parg, 1); + + case SSL_CTRL_SET_CLIENT_CERT_TYPES: + return ssl3_set_req_cert_type(ctx->cert, parg, larg); + + case SSL_CTRL_BUILD_CERT_CHAIN: + return ssl_build_cert_chain(ctx->cert, ctx->cert_store, larg); + + case SSL_CTRL_SET_VERIFY_CERT_STORE: + return ssl_cert_set_cert_store(ctx->cert, parg, 0, larg); + + case SSL_CTRL_SET_CHAIN_CERT_STORE: + return ssl_cert_set_cert_store(ctx->cert, parg, 1, larg); + #endif /* !OPENSSL_NO_TLSEXT */ /* A Thawte special :-) */ @@ -3618,7 +3890,10 @@ long ssl3_ctx_ctrl(SSL_CTX *ctx, int cmd, long larg, void *parg) break; case SSL_CTRL_GET_EXTRA_CHAIN_CERTS: - *(STACK_OF(X509) **)parg = ctx->extra_certs; + if (ctx->extra_certs == NULL && larg == 0) + *(STACK_OF(X509) **)parg = ctx->cert->key->chain; + else + *(STACK_OF(X509) **)parg = ctx->extra_certs; break; case SSL_CTRL_CLEAR_EXTRA_CHAIN_CERTS: @@ -3628,6 +3903,28 @@ long ssl3_ctx_ctrl(SSL_CTX *ctx, int cmd, long larg, void *parg) } break; + case SSL_CTRL_CHAIN: + if (larg) + return ssl_cert_set1_chain(ctx->cert, (STACK_OF(X509) *)parg); + else + return ssl_cert_set0_chain(ctx->cert, (STACK_OF(X509) *)parg); + + case SSL_CTRL_CHAIN_CERT: + if (larg) + return ssl_cert_add1_chain_cert(ctx->cert, (X509 *)parg); + else + return ssl_cert_add0_chain_cert(ctx->cert, (X509 *)parg); + + case SSL_CTRL_GET_CHAIN_CERTS: + *(STACK_OF(X509) **)parg = ctx->cert->key->chain; + break; + + case SSL_CTRL_SELECT_CURRENT_CERT: + return ssl_cert_select_current(ctx->cert, (X509 *)parg); + + case SSL_CTRL_SET_CURRENT_CERT: + return ssl_cert_set_current(ctx->cert, larg); + default: return (0); } @@ -3702,7 +3999,6 @@ long ssl3_ctx_callback_ctrl(SSL_CTX *ctx, int cmd, void (*fp) (void)) break; # endif #endif - default: return (0); } @@ -3726,10 +4022,7 @@ const SSL_CIPHER *ssl3_get_cipher_by_char(const unsigned char *p) if (cp == NULL) fprintf(stderr, "Unknown cipher ID %x\n", (p[0] << 8) | p[1]); #endif - if (cp == NULL || cp->valid == 0) - return NULL; - else - return cp; + return cp; } int ssl3_put_cipher_by_char(const SSL_CIPHER *c, unsigned char *p) @@ -3752,11 +4045,6 @@ SSL_CIPHER *ssl3_choose_cipher(SSL *s, STACK_OF(SSL_CIPHER) *clnt, SSL_CIPHER *c, *ret = NULL; STACK_OF(SSL_CIPHER) *prio, *allow; int i, ii, ok; -#if !defined(OPENSSL_NO_TLSEXT) && !defined(OPENSSL_NO_EC) - unsigned int j; - int ec_ok, ec_nid; - unsigned char ec_search1 = 0, ec_search2 = 0; -#endif CERT *cert; unsigned long alg_k, alg_a, mask_k, mask_a, emask_k, emask_a; @@ -3789,7 +4077,7 @@ SSL_CIPHER *ssl3_choose_cipher(SSL *s, STACK_OF(SSL_CIPHER) *clnt, } #endif - if (s->options & SSL_OP_CIPHER_SERVER_PREFERENCE) { + if (s->options & SSL_OP_CIPHER_SERVER_PREFERENCE || tls1_suiteb(s)) { prio = srvr; allow = clnt; } else { @@ -3797,12 +4085,13 @@ SSL_CIPHER *ssl3_choose_cipher(SSL *s, STACK_OF(SSL_CIPHER) *clnt, allow = srvr; } + tls1_set_cert_validity(s); + for (i = 0; i < sk_SSL_CIPHER_num(prio); i++) { c = sk_SSL_CIPHER_value(prio, i); - /* Skip TLS v1.2 only ciphersuites if lower than v1.2 */ - if ((c->algorithm_ssl & SSL_TLSV1_2) && - (TLS1_get_version(s) < TLS1_2_VERSION)) + /* Skip TLS v1.2 only ciphersuites if not supported */ + if ((c->algorithm_ssl & SSL_TLSV1_2) && !SSL_USE_TLS1_2_CIPHERS(s)) continue; ssl_set_cert_masks(cert, c); @@ -3857,194 +4146,13 @@ SSL_CIPHER *ssl3_choose_cipher(SSL *s, STACK_OF(SSL_CIPHER) *clnt, #ifndef OPENSSL_NO_TLSEXT # ifndef OPENSSL_NO_EC - if ( - /* - * if we are considering an ECC cipher suite that uses our - * certificate - */ - (alg_a & SSL_aECDSA || alg_a & SSL_aECDH) - /* and we have an ECC certificate */ - && (s->cert->pkeys[SSL_PKEY_ECC].x509 != NULL) - /* - * and the client specified a Supported Point Formats - * extension - */ - && ((s->session->tlsext_ecpointformatlist_length > 0) - && (s->session->tlsext_ecpointformatlist != NULL)) - /* and our certificate's point is compressed */ - && ((s->cert->pkeys[SSL_PKEY_ECC].x509->cert_info != NULL) - && (s->cert->pkeys[SSL_PKEY_ECC].x509->cert_info->key != - NULL) - && (s->cert->pkeys[SSL_PKEY_ECC].x509->cert_info-> - key->public_key != NULL) - && (s->cert->pkeys[SSL_PKEY_ECC].x509->cert_info-> - key->public_key->data != NULL) - && - ((* - (s->cert->pkeys[SSL_PKEY_ECC].x509->cert_info-> - key->public_key->data) == POINT_CONVERSION_COMPRESSED) - || - (* - (s->cert->pkeys[SSL_PKEY_ECC].x509->cert_info-> - key->public_key->data) == - POINT_CONVERSION_COMPRESSED + 1) - ) - ) - ) { - ec_ok = 0; - /* - * if our certificate's curve is over a field type that the - * client does not support then do not allow this cipher suite to - * be negotiated - */ - if ((s->cert->pkeys[SSL_PKEY_ECC].privatekey->pkey.ec != NULL) - && (s->cert->pkeys[SSL_PKEY_ECC].privatekey->pkey.ec->group != - NULL) - && (s->cert->pkeys[SSL_PKEY_ECC].privatekey->pkey.ec-> - group->meth != NULL) - && - (EC_METHOD_get_field_type - (s->cert->pkeys[SSL_PKEY_ECC].privatekey->pkey.ec-> - group->meth) == NID_X9_62_prime_field) - ) { - for (j = 0; j < s->session->tlsext_ecpointformatlist_length; - j++) { - if (s->session->tlsext_ecpointformatlist[j] == - TLSEXT_ECPOINTFORMAT_ansiX962_compressed_prime) { - ec_ok = 1; - break; - } - } - } else - if (EC_METHOD_get_field_type - (s->cert->pkeys[SSL_PKEY_ECC].privatekey->pkey.ec-> - group->meth) == NID_X9_62_characteristic_two_field) { - for (j = 0; j < s->session->tlsext_ecpointformatlist_length; - j++) { - if (s->session->tlsext_ecpointformatlist[j] == - TLSEXT_ECPOINTFORMAT_ansiX962_compressed_char2) { - ec_ok = 1; - break; - } - } - } - ok = ok && ec_ok; - } - if ( - /* - * if we are considering an ECC cipher suite that uses our - * certificate - */ - (alg_a & SSL_aECDSA || alg_a & SSL_aECDH) - /* and we have an ECC certificate */ - && (s->cert->pkeys[SSL_PKEY_ECC].x509 != NULL) - /* - * and the client specified an EllipticCurves extension - */ - && ((s->session->tlsext_ellipticcurvelist_length > 0) - && (s->session->tlsext_ellipticcurvelist != NULL)) - ) { - ec_ok = 0; - if ((s->cert->pkeys[SSL_PKEY_ECC].privatekey->pkey.ec != NULL) - && (s->cert->pkeys[SSL_PKEY_ECC].privatekey->pkey.ec->group != - NULL) - ) { - ec_nid = - EC_GROUP_get_curve_name(s->cert-> - pkeys[SSL_PKEY_ECC].privatekey-> - pkey.ec->group); - if ((ec_nid == 0) - && (s->cert->pkeys[SSL_PKEY_ECC].privatekey->pkey. - ec->group->meth != NULL) - ) { - if (EC_METHOD_get_field_type - (s->cert->pkeys[SSL_PKEY_ECC].privatekey->pkey. - ec->group->meth) == NID_X9_62_prime_field) { - ec_search1 = 0xFF; - ec_search2 = 0x01; - } else - if (EC_METHOD_get_field_type - (s->cert->pkeys[SSL_PKEY_ECC].privatekey-> - pkey.ec->group->meth) == - NID_X9_62_characteristic_two_field) { - ec_search1 = 0xFF; - ec_search2 = 0x02; - } - } else { - ec_search1 = 0x00; - ec_search2 = tls1_ec_nid2curve_id(ec_nid); - } - if ((ec_search1 != 0) || (ec_search2 != 0)) { - for (j = 0; - j < s->session->tlsext_ellipticcurvelist_length / 2; - j++) { - if ((s->session->tlsext_ellipticcurvelist[2 * j] == - ec_search1) - && (s->session->tlsext_ellipticcurvelist[2 * j + - 1] == - ec_search2)) { - ec_ok = 1; - break; - } - } - } - } - ok = ok && ec_ok; - } # ifndef OPENSSL_NO_ECDH - if ( - /* - * if we are considering an ECC cipher suite that uses an - * ephemeral EC key - */ - (alg_k & SSL_kEECDH) - /* and we have an ephemeral EC key */ - && (s->cert->ecdh_tmp != NULL) - /* - * and the client specified an EllipticCurves extension - */ - && ((s->session->tlsext_ellipticcurvelist_length > 0) - && (s->session->tlsext_ellipticcurvelist != NULL)) - ) { - ec_ok = 0; - if (s->cert->ecdh_tmp->group != NULL) { - ec_nid = EC_GROUP_get_curve_name(s->cert->ecdh_tmp->group); - if ((ec_nid == 0) - && (s->cert->ecdh_tmp->group->meth != NULL) - ) { - if (EC_METHOD_get_field_type - (s->cert->ecdh_tmp->group->meth) == - NID_X9_62_prime_field) { - ec_search1 = 0xFF; - ec_search2 = 0x01; - } else - if (EC_METHOD_get_field_type - (s->cert->ecdh_tmp->group->meth) == - NID_X9_62_characteristic_two_field) { - ec_search1 = 0xFF; - ec_search2 = 0x02; - } - } else { - ec_search1 = 0x00; - ec_search2 = tls1_ec_nid2curve_id(ec_nid); - } - if ((ec_search1 != 0) || (ec_search2 != 0)) { - for (j = 0; - j < s->session->tlsext_ellipticcurvelist_length / 2; - j++) { - if ((s->session->tlsext_ellipticcurvelist[2 * j] == - ec_search1) - && (s->session->tlsext_ellipticcurvelist[2 * j + - 1] == - ec_search2)) { - ec_ok = 1; - break; - } - } - } - } - ok = ok && ec_ok; - } + /* + * if we are considering an ECC cipher suite that uses an ephemeral + * EC key check it + */ + if (alg_k & SSL_kEECDH) + ok = ok && tls1_check_ec_tmp_key(s, c->id); # endif /* OPENSSL_NO_ECDH */ # endif /* OPENSSL_NO_EC */ #endif /* OPENSSL_NO_TLSEXT */ @@ -4071,8 +4179,41 @@ SSL_CIPHER *ssl3_choose_cipher(SSL *s, STACK_OF(SSL_CIPHER) *clnt, int ssl3_get_req_cert_type(SSL *s, unsigned char *p) { int ret = 0; + const unsigned char *sig; + size_t i, siglen; + int have_rsa_sign = 0, have_dsa_sign = 0; +#ifndef OPENSSL_NO_ECDSA + int have_ecdsa_sign = 0; +#endif + int nostrict = 1; unsigned long alg_k; + /* If we have custom certificate types set, use them */ + if (s->cert->ctypes) { + memcpy(p, s->cert->ctypes, s->cert->ctype_num); + return (int)s->cert->ctype_num; + } + /* get configured sigalgs */ + siglen = tls12_get_psigalgs(s, &sig); + if (s->cert->cert_flags & SSL_CERT_FLAGS_CHECK_TLS_STRICT) + nostrict = 0; + for (i = 0; i < siglen; i += 2, sig += 2) { + switch (sig[1]) { + case TLSEXT_signature_rsa: + have_rsa_sign = 1; + break; + + case TLSEXT_signature_dsa: + have_dsa_sign = 1; + break; +#ifndef OPENSSL_NO_ECDSA + case TLSEXT_signature_ecdsa: + have_ecdsa_sign = 1; + break; +#endif + } + } + alg_k = s->s3->tmp.new_cipher->algorithm_mkey; #ifndef OPENSSL_NO_GOST @@ -4088,10 +4229,16 @@ int ssl3_get_req_cert_type(SSL *s, unsigned char *p) #ifndef OPENSSL_NO_DH if (alg_k & (SSL_kDHr | SSL_kEDH)) { # ifndef OPENSSL_NO_RSA - p[ret++] = SSL3_CT_RSA_FIXED_DH; + /* + * Since this refers to a certificate signed with an RSA algorithm, + * only check for rsa signing in strict mode. + */ + if (nostrict || have_rsa_sign) + p[ret++] = SSL3_CT_RSA_FIXED_DH; # endif # ifndef OPENSSL_NO_DSA - p[ret++] = SSL3_CT_DSS_FIXED_DH; + if (nostrict || have_dsa_sign) + p[ret++] = SSL3_CT_DSS_FIXED_DH; # endif } if ((s->version == SSL3_VERSION) && @@ -4105,15 +4252,19 @@ int ssl3_get_req_cert_type(SSL *s, unsigned char *p) } #endif /* !OPENSSL_NO_DH */ #ifndef OPENSSL_NO_RSA - p[ret++] = SSL3_CT_RSA_SIGN; + if (have_rsa_sign) + p[ret++] = SSL3_CT_RSA_SIGN; #endif #ifndef OPENSSL_NO_DSA - p[ret++] = SSL3_CT_DSS_SIGN; + if (have_dsa_sign) + p[ret++] = SSL3_CT_DSS_SIGN; #endif #ifndef OPENSSL_NO_ECDH if ((alg_k & (SSL_kECDHr | SSL_kECDHe)) && (s->version >= TLS1_VERSION)) { - p[ret++] = TLS_CT_RSA_FIXED_ECDH; - p[ret++] = TLS_CT_ECDSA_FIXED_ECDH; + if (nostrict || have_rsa_sign) + p[ret++] = TLS_CT_RSA_FIXED_ECDH; + if (nostrict || have_ecdsa_sign) + p[ret++] = TLS_CT_ECDSA_FIXED_ECDH; } #endif @@ -4123,12 +4274,31 @@ int ssl3_get_req_cert_type(SSL *s, unsigned char *p) * need to check for SSL_kECDH or SSL_kEECDH */ if (s->version >= TLS1_VERSION) { - p[ret++] = TLS_CT_ECDSA_SIGN; + if (have_ecdsa_sign) + p[ret++] = TLS_CT_ECDSA_SIGN; } #endif return (ret); } +static int ssl3_set_req_cert_type(CERT *c, const unsigned char *p, size_t len) +{ + if (c->ctypes) { + OPENSSL_free(c->ctypes); + c->ctypes = NULL; + } + if (!p || !len) + return 1; + if (len > 0xff) + return 0; + c->ctypes = OPENSSL_malloc(len); + if (!c->ctypes) + return 0; + memcpy(c->ctypes, p, len); + c->ctype_num = len; + return 1; +} + int ssl3_shutdown(SSL *s) { int ret; @@ -4310,14 +4480,14 @@ int ssl3_renegotiate_check(SSL *s) } /* - * If we are using TLS v1.2 or later and default SHA1+MD5 algorithms switch - * to new SHA256 PRF and handshake macs + * If we are using default SHA1+MD5 algorithms switch to new SHA256 PRF and + * handshake macs if required. */ long ssl_get_algorithm2(SSL *s) { long alg2 = s->s3->tmp.new_cipher->algorithm2; - if (s->method->version == TLS1_2_VERSION && - alg2 == (SSL_HANDSHAKE_MAC_DEFAULT | TLS1_PRF)) + if (s->method->ssl3_enc->enc_flags & SSL_ENC_FLAG_SHA256_PRF + && alg2 == (SSL_HANDSHAKE_MAC_DEFAULT | TLS1_PRF)) return SSL_HANDSHAKE_MAC_SHA256 | TLS1_PRF_SHA256; return alg2; } diff --git a/deps/openssl/openssl/ssl/s3_pkt.c b/deps/openssl/openssl/ssl/s3_pkt.c index 7c9f20c8f99e37..221ae039e99eae 100644 --- a/deps/openssl/openssl/ssl/s3_pkt.c +++ b/deps/openssl/openssl/ssl/s3_pkt.c @@ -118,6 +118,20 @@ #include #include +#ifndef EVP_CIPH_FLAG_TLS1_1_MULTIBLOCK +# define EVP_CIPH_FLAG_TLS1_1_MULTIBLOCK 0 +#endif + +#if defined(OPENSSL_SMALL_FOOTPRINT) || \ + !( defined(AES_ASM) && ( \ + defined(__x86_64) || defined(__x86_64__) || \ + defined(_M_AMD64) || defined(_M_X64) || \ + defined(__INTEL__) ) \ + ) +# undef EVP_CIPH_FLAG_TLS1_1_MULTIBLOCK +# define EVP_CIPH_FLAG_TLS1_1_MULTIBLOCK 0 +#endif + static int do_ssl3_write(SSL *s, int type, const unsigned char *buf, unsigned int len, int create_empty_fragment); static int ssl3_get_record(SSL *s); @@ -183,7 +197,7 @@ int ssl3_read_n(SSL *s, int n, int max, int extend) * operation returns the whole packet at once (as long as it fits into * the buffer). */ - if (SSL_version(s) == DTLS1_VERSION || SSL_version(s) == DTLS1_BAD_VER) { + if (SSL_IS_DTLS(s)) { if (left == 0 && extend) return 0; if (left > 0 && n > left) @@ -246,9 +260,7 @@ int ssl3_read_n(SSL *s, int n, int max, int extend) if (i <= 0) { rb->left = left; - if (s->mode & SSL_MODE_RELEASE_BUFFERS && - SSL_version(s) != DTLS1_VERSION - && SSL_version(s) != DTLS1_BAD_VER) + if (s->mode & SSL_MODE_RELEASE_BUFFERS && !SSL_IS_DTLS(s)) if (len + left == 0) ssl3_release_read_buffer(s); return (i); @@ -259,8 +271,7 @@ int ssl3_read_n(SSL *s, int n, int max, int extend) * underlying transport protocol is message oriented as opposed to * byte oriented as in the TLS case. */ - if (SSL_version(s) == DTLS1_VERSION - || SSL_version(s) == DTLS1_BAD_VER) { + if (SSL_IS_DTLS(s)) { if (n > left) n = left; /* makes the while condition false */ } @@ -331,6 +342,9 @@ static int ssl3_get_record(SSL *s) s->rstate = SSL_ST_READ_BODY; p = s->packet; + if (s->msg_callback) + s->msg_callback(0, 0, SSL3_RT_HEADER, p, 5, s, + s->msg_callback_arg); /* Pull apart the header into the SSL3_RECORD */ rr->type = *(p++); @@ -610,8 +624,13 @@ int ssl3_do_compress(SSL *ssl) int ssl3_write_bytes(SSL *s, int type, const void *buf_, int len) { const unsigned char *buf = buf_; + int tot; unsigned int n, nw; - int i, tot; +#if !defined(OPENSSL_NO_MULTIBLOCK) && EVP_CIPH_FLAG_TLS1_1_MULTIBLOCK + unsigned int max_send_fragment; +#endif + SSL3_BUFFER *wb = &(s->s3->wbuf); + int i; s->rwstate = SSL_NOTHING; OPENSSL_assert(s->s3->wnum <= INT_MAX); @@ -642,6 +661,154 @@ int ssl3_write_bytes(SSL *s, int type, const void *buf_, int len) return (-1); } + /* + * first check if there is a SSL3_BUFFER still being written out. This + * will happen with non blocking IO + */ + if (wb->left != 0) { + i = ssl3_write_pending(s, type, &buf[tot], s->s3->wpend_tot); + if (i <= 0) { + /* XXX should we ssl3_release_write_buffer if i<0? */ + s->s3->wnum = tot; + return i; + } + tot += i; /* this might be last fragment */ + } +#if !defined(OPENSSL_NO_MULTIBLOCK) && EVP_CIPH_FLAG_TLS1_1_MULTIBLOCK + /* + * Depending on platform multi-block can deliver several *times* + * better performance. Downside is that it has to allocate + * jumbo buffer to accomodate up to 8 records, but the + * compromise is considered worthy. + */ + if (type == SSL3_RT_APPLICATION_DATA && + len >= 4 * (int)(max_send_fragment = s->max_send_fragment) && + s->compress == NULL && s->msg_callback == NULL && + SSL_USE_EXPLICIT_IV(s) && + EVP_CIPHER_flags(s->enc_write_ctx->cipher) & + EVP_CIPH_FLAG_TLS1_1_MULTIBLOCK) { + unsigned char aad[13]; + EVP_CTRL_TLS1_1_MULTIBLOCK_PARAM mb_param; + int packlen; + + /* minimize address aliasing conflicts */ + if ((max_send_fragment & 0xfff) == 0) + max_send_fragment -= 512; + + if (tot == 0 || wb->buf == NULL) { /* allocate jumbo buffer */ + ssl3_release_write_buffer(s); + + packlen = EVP_CIPHER_CTX_ctrl(s->enc_write_ctx, + EVP_CTRL_TLS1_1_MULTIBLOCK_MAX_BUFSIZE, + max_send_fragment, NULL); + + if (len >= 8 * (int)max_send_fragment) + packlen *= 8; + else + packlen *= 4; + + wb->buf = OPENSSL_malloc(packlen); + if(!wb->buf) { + SSLerr(SSL_F_SSL3_WRITE_BYTES, ERR_R_MALLOC_FAILURE); + return -1; + } + wb->len = packlen; + } else if (tot == len) { /* done? */ + OPENSSL_free(wb->buf); /* free jumbo buffer */ + wb->buf = NULL; + return tot; + } + + n = (len - tot); + for (;;) { + if (n < 4 * max_send_fragment) { + OPENSSL_free(wb->buf); /* free jumbo buffer */ + wb->buf = NULL; + break; + } + + if (s->s3->alert_dispatch) { + i = s->method->ssl_dispatch_alert(s); + if (i <= 0) { + s->s3->wnum = tot; + return i; + } + } + + if (n >= 8 * max_send_fragment) + nw = max_send_fragment * (mb_param.interleave = 8); + else + nw = max_send_fragment * (mb_param.interleave = 4); + + memcpy(aad, s->s3->write_sequence, 8); + aad[8] = type; + aad[9] = (unsigned char)(s->version >> 8); + aad[10] = (unsigned char)(s->version); + aad[11] = 0; + aad[12] = 0; + mb_param.out = NULL; + mb_param.inp = aad; + mb_param.len = nw; + + packlen = EVP_CIPHER_CTX_ctrl(s->enc_write_ctx, + EVP_CTRL_TLS1_1_MULTIBLOCK_AAD, + sizeof(mb_param), &mb_param); + + if (packlen <= 0 || packlen > (int)wb->len) { /* never happens */ + OPENSSL_free(wb->buf); /* free jumbo buffer */ + wb->buf = NULL; + break; + } + + mb_param.out = wb->buf; + mb_param.inp = &buf[tot]; + mb_param.len = nw; + + if (EVP_CIPHER_CTX_ctrl(s->enc_write_ctx, + EVP_CTRL_TLS1_1_MULTIBLOCK_ENCRYPT, + sizeof(mb_param), &mb_param) <= 0) + return -1; + + s->s3->write_sequence[7] += mb_param.interleave; + if (s->s3->write_sequence[7] < mb_param.interleave) { + int j = 6; + while (j >= 0 && (++s->s3->write_sequence[j--]) == 0) ; + } + + wb->offset = 0; + wb->left = packlen; + + s->s3->wpend_tot = nw; + s->s3->wpend_buf = &buf[tot]; + s->s3->wpend_type = type; + s->s3->wpend_ret = nw; + + i = ssl3_write_pending(s, type, &buf[tot], nw); + if (i <= 0) { + if (i < 0 && (!s->wbio || !BIO_should_retry(s->wbio))) { + OPENSSL_free(wb->buf); + wb->buf = NULL; + } + s->s3->wnum = tot; + return i; + } + if (i == (int)n) { + OPENSSL_free(wb->buf); /* free jumbo buffer */ + wb->buf = NULL; + return tot + i; + } + n -= i; + tot += i; + } + } else +#endif + if (tot == len) { /* done? */ + if (s->mode & SSL_MODE_RELEASE_BUFFERS && !SSL_IS_DTLS(s)) + ssl3_release_write_buffer(s); + + return tot; + } + n = (len - tot); for (;;) { if (n > s->max_send_fragment) @@ -651,6 +818,7 @@ int ssl3_write_bytes(SSL *s, int type, const void *buf_, int len) i = do_ssl3_write(s, type, &(buf[tot]), nw, 0); if (i <= 0) { + /* XXX should we ssl3_release_write_buffer if i<0? */ s->s3->wnum = tot; return i; } @@ -664,6 +832,10 @@ int ssl3_write_bytes(SSL *s, int type, const void *buf_, int len) */ s->s3->empty_fragment_done = 0; + if ((i == (int)n) && s->mode & SSL_MODE_RELEASE_BUFFERS && + !SSL_IS_DTLS(s)) + ssl3_release_write_buffer(s); + return tot + i; } @@ -798,8 +970,8 @@ static int do_ssl3_write(SSL *s, int type, const unsigned char *buf, /* field where we are to write out packet length */ plen = p; p += 2; - /* Explicit IV length, block ciphers and TLS version 1.1 or later */ - if (s->enc_write_ctx && s->version >= TLS1_1_VERSION) { + /* Explicit IV length, block ciphers appropriate version flag */ + if (s->enc_write_ctx && SSL_USE_EXPLICIT_IV(s)) { int mode = EVP_CIPHER_CTX_mode(s->enc_write_ctx); if (mode == EVP_CIPH_CBC_MODE) { eivlen = EVP_CIPHER_CTX_iv_length(s->enc_write_ctx); @@ -862,6 +1034,10 @@ static int do_ssl3_write(SSL *s, int type, const unsigned char *buf, /* record length after mac and block padding */ s2n(wr->length, plen); + if (s->msg_callback) + s->msg_callback(1, 0, SSL3_RT_HEADER, plen - 5, 5, s, + s->msg_callback_arg); + /* * we should now have wr->data pointing to the encrypted data, which is * wr->length long @@ -925,10 +1101,6 @@ int ssl3_write_pending(SSL *s, int type, const unsigned char *buf, if (i == wb->left) { wb->left = 0; wb->offset += i; - if (s->mode & SSL_MODE_RELEASE_BUFFERS && - SSL_version(s) != DTLS1_VERSION - && SSL_version(s) != DTLS1_BAD_VER) - ssl3_release_write_buffer(s); s->rwstate = SSL_NOTHING; return (s->s3->wpend_ret); } else if (i <= 0) { diff --git a/deps/openssl/openssl/ssl/s3_srvr.c b/deps/openssl/openssl/ssl/s3_srvr.c index b8f91bc945500f..c016139b1ded86 100644 --- a/deps/openssl/openssl/ssl/s3_srvr.c +++ b/deps/openssl/openssl/ssl/s3_srvr.c @@ -289,7 +289,7 @@ int ssl3_accept(SSL *s) } s->init_num = 0; - s->s3->flags &= ~SSL3_FLAGS_SGC_RESTART_DONE; + s->s3->flags &= ~TLS1_FLAGS_SKIP_CERT_VERIFY; s->s3->flags &= ~SSL3_FLAGS_CCS_OK; /* * Should have been reset by ssl3_get_finished, too. @@ -354,12 +354,12 @@ int ssl3_accept(SSL *s) case SSL3_ST_SR_CLNT_HELLO_C: s->shutdown = 0; - if (s->rwstate != SSL_X509_LOOKUP) { - ret = ssl3_get_client_hello(s); - if (ret <= 0) - goto end; - } + ret = ssl3_get_client_hello(s); + if (ret <= 0) + goto end; #ifndef OPENSSL_NO_SRP + s->state = SSL3_ST_SR_CLNT_HELLO_D; + case SSL3_ST_SR_CLNT_HELLO_D: { int al; if ((ret = ssl_check_srp_ext_ClientHello(s, &al)) < 0) { @@ -470,7 +470,7 @@ int ssl3_accept(SSL *s) /* SRP: send ServerKeyExchange */ || (alg_k & SSL_kSRP) #endif - || (alg_k & (SSL_kDHr | SSL_kDHd | SSL_kEDH)) + || (alg_k & SSL_kEDH) || (alg_k & SSL_kEECDH) || ((alg_k & SSL_kRSA) && (s->cert->pkeys[SSL_PKEY_RSA_ENC].privatekey == NULL @@ -580,21 +580,13 @@ int ssl3_accept(SSL *s) case SSL3_ST_SR_CERT_A: case SSL3_ST_SR_CERT_B: - /* Check for second client hello (MS SGC) */ - ret = ssl3_check_client_hello(s); - if (ret <= 0) - goto end; - if (ret == 2) - s->state = SSL3_ST_SR_CLNT_HELLO_C; - else { - if (s->s3->tmp.cert_request) { - ret = ssl3_get_client_certificate(s); - if (ret <= 0) - goto end; - } - s->init_num = 0; - s->state = SSL3_ST_SR_KEY_EXCH_A; + if (s->s3->tmp.cert_request) { + ret = ssl3_get_client_certificate(s); + if (ret <= 0) + goto end; } + s->init_num = 0; + s->state = SSL3_ST_SR_KEY_EXCH_A; break; case SSL3_ST_SR_KEY_EXCH_A: @@ -618,13 +610,13 @@ int ssl3_accept(SSL *s) s->state = SSL3_ST_SR_FINISHED_A; #endif s->init_num = 0; - } else if (TLS1_get_version(s) >= TLS1_2_VERSION) { + } else if (SSL_USE_SIGALGS(s)) { s->state = SSL3_ST_SR_CERT_VRFY_A; s->init_num = 0; if (!s->session->peer) break; /* - * For TLS v1.2 freeze the handshake buffer at this point and + * For sigalgs freeze the handshake buffer at this point and * digest cached records. */ if (!s->s3->handshake_buffer) { @@ -890,86 +882,33 @@ int ssl3_accept(SSL *s) int ssl3_send_hello_request(SSL *s) { - unsigned char *p; if (s->state == SSL3_ST_SW_HELLO_REQ_A) { - p = (unsigned char *)s->init_buf->data; - *(p++) = SSL3_MT_HELLO_REQUEST; - *(p++) = 0; - *(p++) = 0; - *(p++) = 0; - + ssl_set_handshake_header(s, SSL3_MT_HELLO_REQUEST, 0); s->state = SSL3_ST_SW_HELLO_REQ_B; - /* number of bytes to write */ - s->init_num = 4; - s->init_off = 0; } /* SSL3_ST_SW_HELLO_REQ_B */ - return (ssl3_do_write(s, SSL3_RT_HANDSHAKE)); -} - -int ssl3_check_client_hello(SSL *s) -{ - int ok; - long n; - - /* - * this function is called when we really expect a Certificate message, - * so permit appropriate message length - */ - n = s->method->ssl_get_message(s, - SSL3_ST_SR_CERT_A, - SSL3_ST_SR_CERT_B, - -1, s->max_cert_list, &ok); - if (!ok) - return ((int)n); - s->s3->tmp.reuse_message = 1; - if (s->s3->tmp.message_type == SSL3_MT_CLIENT_HELLO) { - /* - * We only allow the client to restart the handshake once per - * negotiation. - */ - if (s->s3->flags & SSL3_FLAGS_SGC_RESTART_DONE) { - SSLerr(SSL_F_SSL3_CHECK_CLIENT_HELLO, - SSL_R_MULTIPLE_SGC_RESTARTS); - return -1; - } - /* - * Throw away what we have done so far in the current handshake, - * which will now be aborted. (A full SSL_clear would be too much.) - */ -#ifndef OPENSSL_NO_DH - if (s->s3->tmp.dh != NULL) { - DH_free(s->s3->tmp.dh); - s->s3->tmp.dh = NULL; - } -#endif -#ifndef OPENSSL_NO_ECDH - if (s->s3->tmp.ecdh != NULL) { - EC_KEY_free(s->s3->tmp.ecdh); - s->s3->tmp.ecdh = NULL; - } -#endif - s->s3->flags |= SSL3_FLAGS_SGC_RESTART_DONE; - return 2; - } - return 1; + return ssl_do_write(s); } int ssl3_get_client_hello(SSL *s) { - int i, j, ok, al, ret = -1; + int i, j, ok, al = SSL_AD_INTERNAL_ERROR, ret = -1; unsigned int cookie_len; long n; unsigned long id; - unsigned char *p, *d, *q; + unsigned char *p, *d; SSL_CIPHER *c; #ifndef OPENSSL_NO_COMP + unsigned char *q; SSL_COMP *comp = NULL; #endif STACK_OF(SSL_CIPHER) *ciphers = NULL; + if (s->state == SSL3_ST_SR_CLNT_HELLO_C && !s->first_packet) + goto retry_cert; + /* * We do this so that we will respond with our native type. If we are * TLSv1 and we get SSLv3, we will respond with TLSv1, This down @@ -998,8 +937,9 @@ int ssl3_get_client_hello(SSL *s) s->client_version = (((int)p[0]) << 8) | (int)p[1]; p += 2; - if ((s->version == DTLS1_VERSION && s->client_version > s->version) || - (s->version != DTLS1_VERSION && s->client_version < s->version)) { + if (SSL_IS_DTLS(s) ? (s->client_version > s->version && + s->method->version != DTLS_ANY_VERSION) + : (s->client_version < s->version)) { SSLerr(SSL_F_SSL3_GET_CLIENT_HELLO, SSL_R_WRONG_VERSION_NUMBER); if ((s->client_version >> 8) == SSL3_VERSION_MAJOR && !s->enc_write_ctx && !s->write_hash) { @@ -1077,7 +1017,7 @@ int ssl3_get_client_hello(SSL *s) p += j; - if (s->version == DTLS1_VERSION || s->version == DTLS1_BAD_VER) { + if (SSL_IS_DTLS(s)) { /* cookie stuff */ cookie_len = *(p++); @@ -1114,11 +1054,36 @@ int ssl3_get_client_hello(SSL *s) SSLerr(SSL_F_SSL3_GET_CLIENT_HELLO, SSL_R_COOKIE_MISMATCH); goto f_err; } - - ret = 2; + /* Set to -2 so if successful we return 2 */ + ret = -2; } p += cookie_len; + if (s->method->version == DTLS_ANY_VERSION) { + /* Select version to use */ + if (s->client_version <= DTLS1_2_VERSION && + !(s->options & SSL_OP_NO_DTLSv1_2)) { + s->version = DTLS1_2_VERSION; + s->method = DTLSv1_2_server_method(); + } else if (tls1_suiteb(s)) { + SSLerr(SSL_F_SSL3_GET_CLIENT_HELLO, + SSL_R_ONLY_DTLS_1_2_ALLOWED_IN_SUITEB_MODE); + s->version = s->client_version; + al = SSL_AD_PROTOCOL_VERSION; + goto f_err; + } else if (s->client_version <= DTLS1_VERSION && + !(s->options & SSL_OP_NO_DTLSv1)) { + s->version = DTLS1_VERSION; + s->method = DTLSv1_server_method(); + } else { + SSLerr(SSL_F_SSL3_GET_CLIENT_HELLO, + SSL_R_WRONG_VERSION_NUMBER); + s->version = s->client_version; + al = SSL_AD_PROTOCOL_VERSION; + goto f_err; + } + s->session->ssl_version = s->version; + } } n2s(p, i); @@ -1201,7 +1166,9 @@ int ssl3_get_client_hello(SSL *s) SSLerr(SSL_F_SSL3_GET_CLIENT_HELLO, SSL_R_LENGTH_MISMATCH); goto f_err; } +#ifndef OPENSSL_NO_COMP q = p; +#endif for (j = 0; j < i; j++) { if (p[j] == 0) break; @@ -1217,16 +1184,11 @@ int ssl3_get_client_hello(SSL *s) #ifndef OPENSSL_NO_TLSEXT /* TLS extensions */ if (s->version >= SSL3_VERSION) { - if (!ssl_parse_clienthello_tlsext(s, &p, d, n, &al)) { - /* 'al' set by ssl_parse_clienthello_tlsext */ + if (!ssl_parse_clienthello_tlsext(s, &p, d, n)) { SSLerr(SSL_F_SSL3_GET_CLIENT_HELLO, SSL_R_PARSE_TLSEXT); - goto f_err; + goto err; } } - if (ssl_check_clienthello_tlsext_early(s) <= 0) { - SSLerr(SSL_F_SSL3_GET_CLIENT_HELLO, SSL_R_CLIENTHELLO_TLSEXT); - goto err; - } /* * Check if we want to use external pre-shared secret for this handshake @@ -1238,7 +1200,6 @@ int ssl3_get_client_hello(SSL *s) unsigned char *pos; pos = s->s3->server_random; if (ssl_fill_hello_random(s, 1, pos, SSL3_RANDOM_SIZE) <= 0) { - al = SSL_AD_INTERNAL_ERROR; goto f_err; } } @@ -1297,7 +1258,6 @@ int ssl3_get_client_hello(SSL *s) /* Perform sanity checks on resumed compression algorithm */ /* Can't disable compression */ if (s->options & SSL_OP_NO_COMPRESSION) { - al = SSL_AD_INTERNAL_ERROR; SSLerr(SSL_F_SSL3_GET_CLIENT_HELLO, SSL_R_INCONSISTENT_COMPRESSION); goto f_err; @@ -1311,7 +1271,6 @@ int ssl3_get_client_hello(SSL *s) } } if (s->s3->tmp.new_compression == NULL) { - al = SSL_AD_INTERNAL_ERROR; SSLerr(SSL_F_SSL3_GET_CLIENT_HELLO, SSL_R_INVALID_COMPRESSION_ALGORITHM); goto f_err; @@ -1357,7 +1316,6 @@ int ssl3_get_client_hello(SSL *s) * using compression. */ if (s->session->compress_meth != 0) { - al = SSL_AD_INTERNAL_ERROR; SSLerr(SSL_F_SSL3_GET_CLIENT_HELLO, SSL_R_INCONSISTENT_COMPRESSION); goto f_err; } @@ -1382,6 +1340,25 @@ int ssl3_get_client_hello(SSL *s) goto f_err; } ciphers = NULL; + if (!tls1_set_server_sigalgs(s)) { + SSLerr(SSL_F_SSL3_GET_CLIENT_HELLO, SSL_R_CLIENTHELLO_TLSEXT); + goto err; + } + /* Let cert callback update server certificates if required */ + retry_cert: + if (s->cert->cert_cb) { + int rv = s->cert->cert_cb(s, s->cert->cert_cb_arg); + if (rv == 0) { + al = SSL_AD_INTERNAL_ERROR; + SSLerr(SSL_F_SSL3_GET_CLIENT_HELLO, SSL_R_CERT_CB_ERROR); + goto f_err; + } + if (rv < 0) { + s->rwstate = SSL_X509_LOOKUP; + return -1; + } + s->rwstate = SSL_NOTHING; + } c = ssl3_choose_cipher(s, s->session->ciphers, SSL_get_ciphers(s)); if (c == NULL) { @@ -1417,16 +1394,13 @@ int ssl3_get_client_hello(SSL *s) s->s3->tmp.new_cipher = s->session->cipher; } - if (TLS1_get_version(s) < TLS1_2_VERSION - || !(s->verify_mode & SSL_VERIFY_PEER)) { - if (!ssl3_digest_cached_records(s)) { - al = SSL_AD_INTERNAL_ERROR; + if (!SSL_USE_SIGALGS(s) || !(s->verify_mode & SSL_VERIFY_PEER)) { + if (!ssl3_digest_cached_records(s)) goto f_err; - } } /*- - * we now have the following setup. + * we now have the following setup. * client_random * cipher_list - our prefered list of ciphers * ciphers - the clients prefered list of ciphers @@ -1446,7 +1420,7 @@ int ssl3_get_client_hello(SSL *s) } if (ret < 0) - ret = 1; + ret = -ret; if (0) { f_err: ssl3_send_alert(s, SSL3_AL_FATAL, al); @@ -1454,7 +1428,7 @@ int ssl3_get_client_hello(SSL *s) err: if (ciphers != NULL) sk_SSL_CIPHER_free(ciphers); - return (ret); + return ret < 0 ? -1 : ret; } int ssl3_send_server_hello(SSL *s) @@ -1462,6 +1436,7 @@ int ssl3_send_server_hello(SSL *s) unsigned char *buf; unsigned char *p, *d; int i, sl; + int al = 0; unsigned long l; if (s->state == SSL3_ST_SW_SRVR_HELLO_A) { @@ -1472,7 +1447,7 @@ int ssl3_send_server_hello(SSL *s) return -1; #endif /* Do the message type and length last */ - d = p = &(buf[4]); + d = p = ssl_handshake_start(s); *(p++) = s->version >> 8; *(p++) = s->version & 0xff; @@ -1529,50 +1504,33 @@ int ssl3_send_server_hello(SSL *s) return -1; } if ((p = - ssl_add_serverhello_tlsext(s, p, - buf + SSL3_RT_MAX_PLAIN_LENGTH)) == - NULL) { + ssl_add_serverhello_tlsext(s, p, buf + SSL3_RT_MAX_PLAIN_LENGTH, + &al)) == NULL) { + ssl3_send_alert(s, SSL3_AL_FATAL, al); SSLerr(SSL_F_SSL3_SEND_SERVER_HELLO, ERR_R_INTERNAL_ERROR); return -1; } #endif /* do the header */ l = (p - d); - d = buf; - *(d++) = SSL3_MT_SERVER_HELLO; - l2n3(l, d); - + ssl_set_handshake_header(s, SSL3_MT_SERVER_HELLO, l); s->state = SSL3_ST_SW_SRVR_HELLO_B; - /* number of bytes to write */ - s->init_num = p - buf; - s->init_off = 0; } /* SSL3_ST_SW_SRVR_HELLO_B */ - return (ssl3_do_write(s, SSL3_RT_HANDSHAKE)); + return ssl_do_write(s); } int ssl3_send_server_done(SSL *s) { - unsigned char *p; if (s->state == SSL3_ST_SW_SRVR_DONE_A) { - p = (unsigned char *)s->init_buf->data; - - /* do the header */ - *(p++) = SSL3_MT_SERVER_DONE; - *(p++) = 0; - *(p++) = 0; - *(p++) = 0; - + ssl_set_handshake_header(s, SSL3_MT_SERVER_DONE, 0); s->state = SSL3_ST_SW_SRVR_DONE_B; - /* number of bytes to write */ - s->init_num = 4; - s->init_off = 0; } /* SSL3_ST_SW_SRVR_DONE_B */ - return (ssl3_do_write(s, SSL3_RT_HANDSHAKE)); + return ssl_do_write(s); } int ssl3_send_server_key_exchange(SSL *s) @@ -1697,7 +1655,12 @@ int ssl3_send_server_key_exchange(SSL *s) const EC_GROUP *group; ecdhp = cert->ecdh_tmp; - if ((ecdhp == NULL) && (s->cert->ecdh_tmp_cb != NULL)) { + if (s->cert->ecdh_tmp_auto) { + /* Get NID of appropriate shared curve */ + int nid = tls1_shared_curve(s, -2); + if (nid != NID_undef) + ecdhp = EC_KEY_new_by_curve_name(nid); + } else if ((ecdhp == NULL) && s->cert->ecdh_tmp_cb) { ecdhp = s->cert->ecdh_tmp_cb(s, SSL_C_IS_EXPORT(s->s3-> tmp.new_cipher), @@ -1722,7 +1685,9 @@ int ssl3_send_server_key_exchange(SSL *s) SSLerr(SSL_F_SSL3_SEND_SERVER_KEY_EXCHANGE, ERR_R_ECDH_LIB); goto err; } - if ((ecdh = EC_KEY_dup(ecdhp)) == NULL) { + if (s->cert->ecdh_tmp_auto) + ecdh = ecdhp; + else if ((ecdh = EC_KEY_dup(ecdhp)) == NULL) { SSLerr(SSL_F_SSL3_SEND_SERVER_KEY_EXCHANGE, ERR_R_ECDH_LIB); goto err; } @@ -1866,12 +1831,11 @@ int ssl3_send_server_key_exchange(SSL *s) kn = 0; } - if (!BUF_MEM_grow_clean(buf, n + 4 + kn)) { + if (!BUF_MEM_grow_clean(buf, n + SSL_HM_HEADER_LENGTH(s) + kn)) { SSLerr(SSL_F_SSL3_SEND_SERVER_KEY_EXCHANGE, ERR_LIB_BUF); goto err; } - d = (unsigned char *)s->init_buf->data; - p = &(d[4]); + d = p = ssl_handshake_start(s); for (i = 0; i < 4 && r[i] != NULL; i++) { #ifndef OPENSSL_NO_SRP @@ -1926,8 +1890,7 @@ int ssl3_send_server_key_exchange(SSL *s) * points to the space at the end. */ #ifndef OPENSSL_NO_RSA - if (pkey->type == EVP_PKEY_RSA - && TLS1_get_version(s) < TLS1_2_VERSION) { + if (pkey->type == EVP_PKEY_RSA && !SSL_USE_SIGALGS(s)) { q = md_buf; j = 0; for (num = 2; num > 0; num--) { @@ -1939,7 +1902,7 @@ int ssl3_send_server_key_exchange(SSL *s) SSL3_RANDOM_SIZE); EVP_DigestUpdate(&md_ctx, &(s->s3->server_random[0]), SSL3_RANDOM_SIZE); - EVP_DigestUpdate(&md_ctx, &(d[4]), n); + EVP_DigestUpdate(&md_ctx, d, n); EVP_DigestFinal_ex(&md_ctx, q, (unsigned int *)&i); q += i; j += i; @@ -1954,10 +1917,8 @@ int ssl3_send_server_key_exchange(SSL *s) } else #endif if (md) { - /* - * For TLS1.2 and later send signature algorithm - */ - if (TLS1_get_version(s) >= TLS1_2_VERSION) { + /* send signature algorithm */ + if (SSL_USE_SIGALGS(s)) { if (!tls12_get_sigandhash(p, pkey, md)) { /* Should never happen */ al = SSL_AD_INTERNAL_ERROR; @@ -1975,7 +1936,7 @@ int ssl3_send_server_key_exchange(SSL *s) SSL3_RANDOM_SIZE); EVP_SignUpdate(&md_ctx, &(s->s3->server_random[0]), SSL3_RANDOM_SIZE); - EVP_SignUpdate(&md_ctx, &(d[4]), n); + EVP_SignUpdate(&md_ctx, d, n); if (!EVP_SignFinal(&md_ctx, &(p[2]), (unsigned int *)&i, pkey)) { SSLerr(SSL_F_SSL3_SEND_SERVER_KEY_EXCHANGE, ERR_LIB_EVP); @@ -1983,7 +1944,7 @@ int ssl3_send_server_key_exchange(SSL *s) } s2n(i, p); n += i + 2; - if (TLS1_get_version(s) >= TLS1_2_VERSION) + if (SSL_USE_SIGALGS(s)) n += 2; } else { /* Is this error check actually needed? */ @@ -1994,19 +1955,12 @@ int ssl3_send_server_key_exchange(SSL *s) } } - *(d++) = SSL3_MT_SERVER_KEY_EXCHANGE; - l2n3(n, d); - - /* - * we should now have things packed up, so lets send it off - */ - s->init_num = n + 4; - s->init_off = 0; + ssl_set_handshake_header(s, SSL3_MT_SERVER_KEY_EXCHANGE, n); } s->state = SSL3_ST_SW_KEY_EXCH_B; EVP_MD_CTX_cleanup(&md_ctx); - return (ssl3_do_write(s, SSL3_RT_HANDSHAKE)); + return ssl_do_write(s); f_err: ssl3_send_alert(s, SSL3_AL_FATAL, al); err: @@ -2030,7 +1984,7 @@ int ssl3_send_certificate_request(SSL *s) if (s->state == SSL3_ST_SW_CERT_REQ_A) { buf = s->init_buf; - d = p = (unsigned char *)&(buf->data[4]); + d = p = ssl_handshake_start(s); /* get the list of acceptable cert types */ p++; @@ -2039,10 +1993,12 @@ int ssl3_send_certificate_request(SSL *s) p += n; n++; - if (TLS1_get_version(s) >= TLS1_2_VERSION) { - nl = tls12_get_req_sig_algs(s, p + 2); + if (SSL_USE_SIGALGS(s)) { + const unsigned char *psigs; + nl = tls12_get_psigalgs(s, &psigs); s2n(nl, p); - p += nl + 2; + memcpy(p, psigs, nl); + p += nl; n += nl + 2; } @@ -2056,12 +2012,13 @@ int ssl3_send_certificate_request(SSL *s) for (i = 0; i < sk_X509_NAME_num(sk); i++) { name = sk_X509_NAME_value(sk, i); j = i2d_X509_NAME(name, NULL); - if (!BUF_MEM_grow_clean(buf, 4 + n + j + 2)) { + if (!BUF_MEM_grow_clean + (buf, SSL_HM_HEADER_LENGTH(s) + n + j + 2)) { SSLerr(SSL_F_SSL3_SEND_CERTIFICATE_REQUEST, ERR_R_BUF_LIB); goto err; } - p = (unsigned char *)&(buf->data[4 + n]); + p = ssl_handshake_start(s) + n; if (!(s->options & SSL_OP_NETSCAPE_CA_DN_BUG)) { s2n(j, p); i2d_X509_NAME(name, &p); @@ -2079,39 +2036,32 @@ int ssl3_send_certificate_request(SSL *s) } } /* else no CA names */ - p = (unsigned char *)&(buf->data[4 + off]); + p = ssl_handshake_start(s) + off; s2n(nl, p); - d = (unsigned char *)buf->data; - *(d++) = SSL3_MT_CERTIFICATE_REQUEST; - l2n3(n, d); + ssl_set_handshake_header(s, SSL3_MT_CERTIFICATE_REQUEST, n); - /* - * we should now have things packed up, so lets send it off - */ - - s->init_num = n + 4; - s->init_off = 0; #ifdef NETSCAPE_HANG_BUG - if (!BUF_MEM_grow_clean(buf, s->init_num + 4)) { - SSLerr(SSL_F_SSL3_SEND_CERTIFICATE_REQUEST, ERR_R_BUF_LIB); - goto err; + if (!SSL_IS_DTLS(s)) { + if (!BUF_MEM_grow_clean(buf, s->init_num + 4)) { + SSLerr(SSL_F_SSL3_SEND_CERTIFICATE_REQUEST, ERR_R_BUF_LIB); + goto err; + } + p = (unsigned char *)s->init_buf->data + s->init_num; + /* do the header */ + *(p++) = SSL3_MT_SERVER_DONE; + *(p++) = 0; + *(p++) = 0; + *(p++) = 0; + s->init_num += 4; } - p = (unsigned char *)s->init_buf->data + s->init_num; - - /* do the header */ - *(p++) = SSL3_MT_SERVER_DONE; - *(p++) = 0; - *(p++) = 0; - *(p++) = 0; - s->init_num += 4; #endif s->state = SSL3_ST_SW_CERT_REQ_B; } /* SSL3_ST_SW_CERT_REQ_B */ - return (ssl3_do_write(s, SSL3_RT_HANDSHAKE)); + return ssl_do_write(s); err: return (-1); } @@ -2128,7 +2078,7 @@ int ssl3_get_client_key_exchange(SSL *s) #endif #ifndef OPENSSL_NO_DH BIGNUM *pub = NULL; - DH *dh_srvr; + DH *dh_srvr, *dh_clnt = NULL; #endif #ifndef OPENSSL_NO_KRB5 KSSL_ERR kssl_err; @@ -2299,8 +2249,20 @@ int ssl3_get_client_key_exchange(SSL *s) #endif #ifndef OPENSSL_NO_DH if (alg_k & (SSL_kEDH | SSL_kDHr | SSL_kDHd)) { - n2s(p, i); - if (n != i + 2) { + int idx = -1; + EVP_PKEY *skey = NULL; + if (n > 1) { + n2s(p, i); + } else { + if (alg_k & SSL_kDHE) { + al = SSL_AD_HANDSHAKE_FAILURE; + SSLerr(SSL_F_SSL3_GET_CLIENT_KEY_EXCHANGE, + SSL_R_DH_PUBLIC_VALUE_LENGTH_IS_WRONG); + goto f_err; + } + i = 0; + } + if (n && n != i + 2) { if (!(s->options & SSL_OP_SSLEAY_080_CLIENT_DH_BUG)) { SSLerr(SSL_F_SSL3_GET_CLIENT_KEY_EXCHANGE, SSL_R_DH_PUBLIC_VALUE_LENGTH_IS_WRONG); @@ -2310,23 +2272,45 @@ int ssl3_get_client_key_exchange(SSL *s) i = (int)n; } } - - if (n == 0L) { /* the parameters are in the cert */ + if (alg_k & SSL_kDHr) + idx = SSL_PKEY_DH_RSA; + else if (alg_k & SSL_kDHd) + idx = SSL_PKEY_DH_DSA; + if (idx >= 0) { + skey = s->cert->pkeys[idx].privatekey; + if ((skey == NULL) || + (skey->type != EVP_PKEY_DH) || (skey->pkey.dh == NULL)) { + al = SSL_AD_HANDSHAKE_FAILURE; + SSLerr(SSL_F_SSL3_GET_CLIENT_KEY_EXCHANGE, + SSL_R_MISSING_RSA_CERTIFICATE); + goto f_err; + } + dh_srvr = skey->pkey.dh; + } else if (s->s3->tmp.dh == NULL) { al = SSL_AD_HANDSHAKE_FAILURE; SSLerr(SSL_F_SSL3_GET_CLIENT_KEY_EXCHANGE, - SSL_R_UNABLE_TO_DECODE_DH_CERTS); + SSL_R_MISSING_TMP_DH_KEY); goto f_err; - } else { - if (s->s3->tmp.dh == NULL) { + } else + dh_srvr = s->s3->tmp.dh; + + if (n == 0L) { + /* Get pubkey from cert */ + EVP_PKEY *clkey = X509_get_pubkey(s->session->peer); + if (clkey) { + if (EVP_PKEY_cmp_parameters(clkey, skey) == 1) + dh_clnt = EVP_PKEY_get1_DH(clkey); + } + if (dh_clnt == NULL) { al = SSL_AD_HANDSHAKE_FAILURE; SSLerr(SSL_F_SSL3_GET_CLIENT_KEY_EXCHANGE, SSL_R_MISSING_TMP_DH_KEY); goto f_err; - } else - dh_srvr = s->s3->tmp.dh; - } - - pub = BN_bin2bn(p, i, NULL); + } + EVP_PKEY_free(clkey); + pub = dh_clnt->pub_key; + } else + pub = BN_bin2bn(p, i, NULL); if (pub == NULL) { SSLerr(SSL_F_SSL3_GET_CLIENT_KEY_EXCHANGE, SSL_R_BN_LIB); goto err; @@ -2342,8 +2326,10 @@ int ssl3_get_client_key_exchange(SSL *s) DH_free(s->s3->tmp.dh); s->s3->tmp.dh = NULL; - - BN_clear_free(pub); + if (dh_clnt) + DH_free(dh_clnt); + else + BN_clear_free(pub); pub = NULL; s->session->master_key_length = s->method->ssl3_enc->generate_master_secret(s, @@ -2351,6 +2337,8 @@ int ssl3_get_client_key_exchange(SSL *s) session->master_key, p, i); OPENSSL_cleanse(p, i); + if (dh_clnt) + return 2; } else #endif #ifndef OPENSSL_NO_KRB5 @@ -2952,24 +2940,12 @@ int ssl3_get_cert_verify(SSL *s) pkey->type == NID_id_GostR3410_2001)) { i = 64; } else { - if (TLS1_get_version(s) >= TLS1_2_VERSION) { - int sigalg = tls12_get_sigid(pkey); - /* Should never happen */ - if (sigalg == -1) { - SSLerr(SSL_F_SSL3_GET_CERT_VERIFY, ERR_R_INTERNAL_ERROR); + if (SSL_USE_SIGALGS(s)) { + int rv = tls12_check_peer_sigalg(&md, s, p, pkey); + if (rv == -1) { al = SSL_AD_INTERNAL_ERROR; goto f_err; - } - /* Check key type is consistent with signature */ - if (sigalg != (int)p[1]) { - SSLerr(SSL_F_SSL3_GET_CERT_VERIFY, - SSL_R_WRONG_SIGNATURE_TYPE); - al = SSL_AD_DECODE_ERROR; - goto f_err; - } - md = tls12_get_hash(p[0]); - if (md == NULL) { - SSLerr(SSL_F_SSL3_GET_CERT_VERIFY, SSL_R_UNKNOWN_DIGEST); + } else if (rv == 0) { al = SSL_AD_DECODE_ERROR; goto f_err; } @@ -2994,7 +2970,7 @@ int ssl3_get_cert_verify(SSL *s) goto f_err; } - if (TLS1_get_version(s) >= TLS1_2_VERSION) { + if (SSL_USE_SIGALGS(s)) { long hdatalen = 0; void *hdata; hdatalen = BIO_get_mem_data(s->s3->handshake_buffer, &hdata); @@ -3216,7 +3192,7 @@ int ssl3_get_client_certificate(SSL *s) if (i <= 0) { al = ssl_verify_alarm_type(s->verify_result); SSLerr(SSL_F_SSL3_GET_CLIENT_CERTIFICATE, - SSL_R_NO_CERTIFICATE_RETURNED); + SSL_R_CERTIFICATE_VERIFY_FAILED); goto f_err; } } @@ -3262,12 +3238,11 @@ int ssl3_get_client_certificate(SSL *s) int ssl3_send_server_certificate(SSL *s) { - unsigned long l; - X509 *x; + CERT_PKEY *cpk; if (s->state == SSL3_ST_SW_CERT_A) { - x = ssl_get_server_send_cert(s); - if (x == NULL) { + cpk = ssl_get_server_send_pkey(s); + if (cpk == NULL) { /* VRS: allow null cert if auth == KRB5 */ if ((s->s3->tmp.new_cipher->algorithm_auth != SSL_aKRB5) || (s->s3->tmp.new_cipher->algorithm_mkey & SSL_kKRB5)) { @@ -3277,18 +3252,15 @@ int ssl3_send_server_certificate(SSL *s) } } - l = ssl3_output_cert_chain(s, x); - if (!l) { + if (!ssl3_output_cert_chain(s, cpk)) { SSLerr(SSL_F_SSL3_SEND_SERVER_CERTIFICATE, ERR_R_INTERNAL_ERROR); return (0); } s->state = SSL3_ST_SW_CERT_B; - s->init_num = (int)l; - s->init_off = 0; } /* SSL3_ST_SW_CERT_B */ - return (ssl3_do_write(s, SSL3_RT_HANDSHAKE)); + return ssl_do_write(s); } #ifndef OPENSSL_NO_TLSEXT @@ -3351,22 +3323,18 @@ int ssl3_send_newsession_ticket(SSL *s) /*- * Grow buffer if need be: the length calculation is as - * follows 1 (size of message name) + 3 (message length - * bytes) + 4 (ticket lifetime hint) + 2 (ticket length) + + * follows handshake_header_length + + * 4 (ticket lifetime hint) + 2 (ticket length) + * 16 (key name) + max_iv_len (iv length) + * session_length + max_enc_block_size (max encrypted session * length) + max_md_size (HMAC). */ if (!BUF_MEM_grow(s->init_buf, - 26 + EVP_MAX_IV_LENGTH + EVP_MAX_BLOCK_LENGTH + - EVP_MAX_MD_SIZE + slen)) + SSL_HM_HEADER_LENGTH(s) + 22 + EVP_MAX_IV_LENGTH + + EVP_MAX_BLOCK_LENGTH + EVP_MAX_MD_SIZE + slen)) goto err; - p = (unsigned char *)s->init_buf->data; - /* do the header */ - *(p++) = SSL3_MT_NEWSESSION_TICKET; - /* Skip message length for now */ - p += 3; + p = ssl_handshake_start(s); /* * Initialize HMAC and cipher contexts. If callback present it does * all the work otherwise use generated values from parent ctx. @@ -3422,21 +3390,17 @@ int ssl3_send_newsession_ticket(SSL *s) p += hlen; /* Now write out lengths: p points to end of data written */ /* Total length */ - len = p - (unsigned char *)s->init_buf->data; - p = (unsigned char *)s->init_buf->data + 1; - l2n3(len - 4, p); /* Message length */ - p += 4; - s2n(len - 10, p); /* Ticket length */ - - /* number of bytes to write */ - s->init_num = len; + len = p - ssl_handshake_start(s); + ssl_set_handshake_header(s, SSL3_MT_NEWSESSION_TICKET, len); + /* Skip ticket lifetime hint */ + p = ssl_handshake_start(s) + 4; + s2n(len - 6, p); s->state = SSL3_ST_SW_SESSION_TICKET_B; - s->init_off = 0; OPENSSL_free(senc); } /* SSL3_ST_SW_SESSION_TICKET_B */ - return (ssl3_do_write(s, SSL3_RT_HANDSHAKE)); + return ssl_do_write(s); err: if (senc) OPENSSL_free(senc); @@ -3551,4 +3515,5 @@ int ssl3_get_next_proto(SSL *s) return 1; } # endif + #endif diff --git a/deps/openssl/openssl/ssl/srtp.h b/deps/openssl/openssl/ssl/srtp.h index 10082c939abbd7..2279c32b895ac2 100644 --- a/deps/openssl/openssl/ssl/srtp.h +++ b/deps/openssl/openssl/ssl/srtp.h @@ -134,7 +134,6 @@ extern "C" { int SSL_CTX_set_tlsext_use_srtp(SSL_CTX *ctx, const char *profiles); int SSL_set_tlsext_use_srtp(SSL *ctx, const char *profiles); -SRTP_PROTECTION_PROFILE *SSL_get_selected_srtp_profile(SSL *s); STACK_OF(SRTP_PROTECTION_PROFILE) *SSL_get_srtp_profiles(SSL *ssl); SRTP_PROTECTION_PROFILE *SSL_get_selected_srtp_profile(SSL *s); diff --git a/deps/openssl/openssl/ssl/ssl-lib.com b/deps/openssl/openssl/ssl/ssl-lib.com index 7303bc4dd1ac47..43fea17541722d 100644 --- a/deps/openssl/openssl/ssl/ssl-lib.com +++ b/deps/openssl/openssl/ssl/ssl-lib.com @@ -216,13 +216,13 @@ $! $ LIB_SSL = "s2_meth, s2_srvr, s2_clnt, s2_lib, s2_enc, s2_pkt,"+ - "s3_meth, s3_srvr, s3_clnt, s3_lib, s3_enc, s3_pkt, s3_both, s3_cbc,"+ - "s23_meth,s23_srvr,s23_clnt,s23_lib, s23_pkt,"+ - - "t1_meth, t1_srvr, t1_clnt, t1_lib, t1_enc,"+ - + "t1_meth, t1_srvr, t1_clnt, t1_lib, t1_enc, t1_ext,"+ - "d1_meth, d1_srvr, d1_clnt, d1_lib, d1_pkt,"+ - - "d1_both,d1_enc,d1_srtp,"+ - + "d1_both,d1_srtp,"+ - "ssl_lib,ssl_err2,ssl_cert,ssl_sess,"+ - "ssl_ciph,ssl_stat,ssl_rsa,"+ - - "ssl_asn1,ssl_txt,ssl_algs,"+ - - "bio_ssl,ssl_err,kssl,tls_srp,t1_reneg,ssl_utst" + "ssl_asn1,ssl_txt,ssl_algs,ssl_conf,"+ - + "bio_ssl,ssl_err,kssl,t1_reneg,tls_srp,t1_trce,ssl_utst" $! $ COMPILEWITH_CC5 = "" $! @@ -860,7 +860,7 @@ $ IF F$TYPE(USER_CCFLAGS) .NES. "" THEN CCEXTRAFLAGS = USER_CCFLAGS $ CCDISABLEWARNINGS = "" !!! "MAYLOSEDATA3" !!! "LONGLONGTYPE,LONGLONGSUFX,FOUNDCR" $ IF F$TYPE(USER_CCDISABLEWARNINGS) .NES. "" $ THEN -$ IF CCDISABLEWARNINGS .NES. "" THEN CCDISABLEWARNINGS = CCDISABLEWARNINGS + "," +$ IF CCDISABLEWARNINGS .NES. THEN CCDISABLEWARNINGS = CCDISABLEWARNINGS + "," $ CCDISABLEWARNINGS = CCDISABLEWARNINGS + USER_CCDISABLEWARNINGS $ ENDIF $! diff --git a/deps/openssl/openssl/ssl/ssl.h b/deps/openssl/openssl/ssl/ssl.h index b93b67b14bb342..a6d845dc994fc1 100644 --- a/deps/openssl/openssl/ssl/ssl.h +++ b/deps/openssl/openssl/ssl/ssl.h @@ -242,22 +242,24 @@ extern "C" { # define SSL_TXT_NULL "NULL" # define SSL_TXT_kRSA "kRSA" -# define SSL_TXT_kDHr "kDHr"/* no such ciphersuites supported! */ -# define SSL_TXT_kDHd "kDHd"/* no such ciphersuites supported! */ -# define SSL_TXT_kDH "kDH"/* no such ciphersuites supported! */ +# define SSL_TXT_kDHr "kDHr" +# define SSL_TXT_kDHd "kDHd" +# define SSL_TXT_kDH "kDH" # define SSL_TXT_kEDH "kEDH" +# define SSL_TXT_kDHE "kDHE"/* alias for kEDH */ # define SSL_TXT_kKRB5 "kKRB5" # define SSL_TXT_kECDHr "kECDHr" # define SSL_TXT_kECDHe "kECDHe" # define SSL_TXT_kECDH "kECDH" # define SSL_TXT_kEECDH "kEECDH" +# define SSL_TXT_kECDHE "kECDHE"/* alias for kEECDH */ # define SSL_TXT_kPSK "kPSK" # define SSL_TXT_kGOST "kGOST" # define SSL_TXT_kSRP "kSRP" # define SSL_TXT_aRSA "aRSA" # define SSL_TXT_aDSS "aDSS" -# define SSL_TXT_aDH "aDH"/* no such ciphersuites supported! */ +# define SSL_TXT_aDH "aDH" # define SSL_TXT_aECDH "aECDH" # define SSL_TXT_aKRB5 "aKRB5" # define SSL_TXT_aECDSA "aECDSA" @@ -270,10 +272,12 @@ extern "C" { # define SSL_TXT_DSS "DSS" # define SSL_TXT_DH "DH" # define SSL_TXT_EDH "EDH"/* same as "kEDH:-ADH" */ +# define SSL_TXT_DHE "DHE"/* alias for EDH */ # define SSL_TXT_ADH "ADH" # define SSL_TXT_RSA "RSA" # define SSL_TXT_ECDH "ECDH" # define SSL_TXT_EECDH "EECDH"/* same as "kEECDH:-AECDH" */ +# define SSL_TXT_ECDHE "ECDHE"/* alias for ECDHE" */ # define SSL_TXT_AECDH "AECDH" # define SSL_TXT_ECDSA "ECDSA" # define SSL_TXT_KRB5 "KRB5" @@ -370,6 +374,8 @@ typedef struct tls_session_ticket_ext_st TLS_SESSION_TICKET_EXT; typedef struct ssl_method_st SSL_METHOD; typedef struct ssl_cipher_st SSL_CIPHER; typedef struct ssl_session_st SSL_SESSION; +typedef struct tls_sigalgs_st TLS_SIGALGS; +typedef struct ssl_conf_ctx_st SSL_CONF_CTX; DECLARE_STACK_OF(SSL_CIPHER) @@ -389,6 +395,23 @@ typedef int (*tls_session_secret_cb_fn) (SSL *s, void *secret, STACK_OF(SSL_CIPHER) *peer_ciphers, SSL_CIPHER **cipher, void *arg); +# ifndef OPENSSL_NO_TLSEXT + +/* Typedefs for handling custom extensions */ + +typedef int (*custom_ext_add_cb) (SSL *s, unsigned int ext_type, + const unsigned char **out, + size_t *outlen, int *al, void *add_arg); + +typedef void (*custom_ext_free_cb) (SSL *s, unsigned int ext_type, + const unsigned char *out, void *add_arg); + +typedef int (*custom_ext_parse_cb) (SSL *s, unsigned int ext_type, + const unsigned char *in, + size_t inlen, int *al, void *parse_arg); + +# endif + # ifndef OPENSSL_NO_SSL_INTERN /* used to hold info on the particular ciphers used */ @@ -624,6 +647,12 @@ struct ssl_session_st { # define SSL_OP_NO_TLSv1_2 0x08000000L # define SSL_OP_NO_TLSv1_1 0x10000000L +# define SSL_OP_NO_DTLSv1 0x04000000L +# define SSL_OP_NO_DTLSv1_2 0x08000000L + +# define SSL_OP_NO_SSL_MASK (SSL_OP_NO_SSLv2|SSL_OP_NO_SSLv3|\ + SSL_OP_NO_TLSv1|SSL_OP_NO_TLSv1_1|SSL_OP_NO_TLSv1_2) + /* * These next two were never actually used for anything since SSLeay zap so * we have some more flags. @@ -685,6 +714,69 @@ struct ssl_session_st { */ # define SSL_MODE_SEND_FALLBACK_SCSV 0x00000080L +/* Cert related flags */ +/* + * Many implementations ignore some aspects of the TLS standards such as + * enforcing certifcate chain algorithms. When this is set we enforce them. + */ +# define SSL_CERT_FLAG_TLS_STRICT 0x00000001L + +/* Suite B modes, takes same values as certificate verify flags */ +# define SSL_CERT_FLAG_SUITEB_128_LOS_ONLY 0x10000 +/* Suite B 192 bit only mode */ +# define SSL_CERT_FLAG_SUITEB_192_LOS 0x20000 +/* Suite B 128 bit mode allowing 192 bit algorithms */ +# define SSL_CERT_FLAG_SUITEB_128_LOS 0x30000 + +/* Perform all sorts of protocol violations for testing purposes */ +# define SSL_CERT_FLAG_BROKEN_PROTOCOL 0x10000000 + +/* Flags for building certificate chains */ +/* Treat any existing certificates as untrusted CAs */ +# define SSL_BUILD_CHAIN_FLAG_UNTRUSTED 0x1 +/* Don't include root CA in chain */ +# define SSL_BUILD_CHAIN_FLAG_NO_ROOT 0x2 +/* Just check certificates already there */ +# define SSL_BUILD_CHAIN_FLAG_CHECK 0x4 +/* Ignore verification errors */ +# define SSL_BUILD_CHAIN_FLAG_IGNORE_ERROR 0x8 +/* Clear verification errors from queue */ +# define SSL_BUILD_CHAIN_FLAG_CLEAR_ERROR 0x10 + +/* Flags returned by SSL_check_chain */ +/* Certificate can be used with this session */ +# define CERT_PKEY_VALID 0x1 +/* Certificate can also be used for signing */ +# define CERT_PKEY_SIGN 0x2 +/* EE certificate signing algorithm OK */ +# define CERT_PKEY_EE_SIGNATURE 0x10 +/* CA signature algorithms OK */ +# define CERT_PKEY_CA_SIGNATURE 0x20 +/* EE certificate parameters OK */ +# define CERT_PKEY_EE_PARAM 0x40 +/* CA certificate parameters OK */ +# define CERT_PKEY_CA_PARAM 0x80 +/* Signing explicitly allowed as opposed to SHA1 fallback */ +# define CERT_PKEY_EXPLICIT_SIGN 0x100 +/* Client CA issuer names match (always set for server cert) */ +# define CERT_PKEY_ISSUER_NAME 0x200 +/* Cert type matches client types (always set for server cert) */ +# define CERT_PKEY_CERT_TYPE 0x400 +/* Cert chain suitable to Suite B */ +# define CERT_PKEY_SUITEB 0x800 + +# define SSL_CONF_FLAG_CMDLINE 0x1 +# define SSL_CONF_FLAG_FILE 0x2 +# define SSL_CONF_FLAG_CLIENT 0x4 +# define SSL_CONF_FLAG_SERVER 0x8 +# define SSL_CONF_FLAG_SHOW_ERRORS 0x10 +# define SSL_CONF_FLAG_CERTIFICATE 0x20 +/* Configuration value types */ +# define SSL_CONF_TYPE_UNKNOWN 0x0 +# define SSL_CONF_TYPE_STRING 0x1 +# define SSL_CONF_TYPE_FILE 0x2 +# define SSL_CONF_TYPE_DIR 0x3 + /* * Note: SSL[_CTX]_set_{options,mode} use |= op on the previous value, they * cannot be used to clear bits. @@ -730,6 +822,15 @@ struct ssl_session_st { SSL_ctrl((ssl),SSL_CTRL_TLS_EXT_SEND_HEARTBEAT,0,NULL) # endif +# define SSL_CTX_set_cert_flags(ctx,op) \ + SSL_CTX_ctrl((ctx),SSL_CTRL_CERT_FLAGS,(op),NULL) +# define SSL_set_cert_flags(s,op) \ + SSL_ctrl((s),SSL_CTRL_CERT_FLAGS,(op),NULL) +# define SSL_CTX_clear_cert_flags(ctx,op) \ + SSL_CTX_ctrl((ctx),SSL_CTRL_CLEAR_CERT_FLAGS,(op),NULL) +# define SSL_clear_cert_flags(s,op) \ + SSL_ctrl((s),SSL_CTRL_CLEAR_CERT_FLAGS,(op),NULL) + void SSL_CTX_set_msg_callback(SSL_CTX *ctx, void (*cb) (int write_p, int version, int content_type, const void *buf, @@ -1042,6 +1143,43 @@ struct ssl_ctx_st { # endif /* SRTP profiles we are willing to do from RFC 5764 */ STACK_OF(SRTP_PROTECTION_PROFILE) *srtp_profiles; + + /* + * ALPN information (we are in the process of transitioning from NPN to + * ALPN.) + */ + + /*- + * For a server, this contains a callback function that allows the + * server to select the protocol for the connection. + * out: on successful return, this must point to the raw protocol + * name (without the length prefix). + * outlen: on successful return, this contains the length of |*out|. + * in: points to the client's list of supported protocols in + * wire-format. + * inlen: the length of |in|. + */ + int (*alpn_select_cb) (SSL *s, + const unsigned char **out, + unsigned char *outlen, + const unsigned char *in, + unsigned int inlen, void *arg); + void *alpn_select_cb_arg; + + /* + * For a client, this contains the list of supported protocols in wire + * format. + */ + unsigned char *alpn_client_proto_list; + unsigned alpn_client_proto_list_len; + +# ifndef OPENSSL_NO_EC + /* EC extension values inherited by SSL structure */ + size_t tlsext_ecpointformatlist_length; + unsigned char *tlsext_ecpointformatlist; + size_t tlsext_ellipticcurvelist_length; + unsigned char *tlsext_ellipticcurvelist; +# endif /* OPENSSL_NO_EC */ # endif }; @@ -1144,19 +1282,35 @@ void SSL_CTX_set_next_proto_select_cb(SSL_CTX *s, const unsigned char *in, unsigned int inlen, void *arg), void *arg); +void SSL_get0_next_proto_negotiated(const SSL *s, const unsigned char **data, + unsigned *len); +# endif +# ifndef OPENSSL_NO_TLSEXT int SSL_select_next_proto(unsigned char **out, unsigned char *outlen, const unsigned char *in, unsigned int inlen, const unsigned char *client, unsigned int client_len); -void SSL_get0_next_proto_negotiated(const SSL *s, const unsigned char **data, - unsigned *len); - -# define OPENSSL_NPN_UNSUPPORTED 0 -# define OPENSSL_NPN_NEGOTIATED 1 -# define OPENSSL_NPN_NO_OVERLAP 2 # endif +# define OPENSSL_NPN_UNSUPPORTED 0 +# define OPENSSL_NPN_NEGOTIATED 1 +# define OPENSSL_NPN_NO_OVERLAP 2 + +int SSL_CTX_set_alpn_protos(SSL_CTX *ctx, const unsigned char *protos, + unsigned protos_len); +int SSL_set_alpn_protos(SSL *ssl, const unsigned char *protos, + unsigned protos_len); +void SSL_CTX_set_alpn_select_cb(SSL_CTX *ctx, + int (*cb) (SSL *ssl, + const unsigned char **out, + unsigned char *outlen, + const unsigned char *in, + unsigned int inlen, + void *arg), void *arg); +void SSL_get0_alpn_selected(const SSL *ssl, const unsigned char **data, + unsigned *len); + # ifndef OPENSSL_NO_PSK /* * the maximum length of the buffer given to callbacks containing the @@ -1228,6 +1382,27 @@ const char *SSL_get_psk_identity_hint(const SSL *s); const char *SSL_get_psk_identity(const SSL *s); # endif +# ifndef OPENSSL_NO_TLSEXT +/* Register callbacks to handle custom TLS Extensions for client or server. */ + +int SSL_CTX_add_client_custom_ext(SSL_CTX *ctx, unsigned int ext_type, + custom_ext_add_cb add_cb, + custom_ext_free_cb free_cb, + void *add_arg, + custom_ext_parse_cb parse_cb, + void *parse_arg); + +int SSL_CTX_add_server_custom_ext(SSL_CTX *ctx, unsigned int ext_type, + custom_ext_add_cb add_cb, + custom_ext_free_cb free_cb, + void *add_arg, + custom_ext_parse_cb parse_cb, + void *parse_arg); + +int SSL_extension_supported(unsigned int ext_type); + +# endif + # define SSL_NOTHING 1 # define SSL_WRITING 2 # define SSL_READING 3 @@ -1502,6 +1677,14 @@ struct ssl_st { /* ctx for SRP authentication */ SRP_CTX srp_ctx; # endif +# ifndef OPENSSL_NO_TLSEXT + /* + * For a client, this contains the list of supported protocols in wire + * format. + */ + unsigned char *alpn_client_proto_list; + unsigned alpn_client_proto_list_len; +# endif /* OPENSSL_NO_TLSEXT */ }; # endif @@ -1757,7 +1940,7 @@ DECLARE_PEM_rw(SSL_SESSION, SSL_SESSION) # define SSL_CTRL_GET_TLS_EXT_HEARTBEAT_PENDING 86 # define SSL_CTRL_SET_TLS_EXT_HEARTBEAT_NO_REQUESTS 87 # endif -# endif +# endif /* OPENSSL_NO_TLSEXT */ # define DTLS_CTRL_GET_TIMEOUT 73 # define DTLS_CTRL_HANDLE_TIMEOUT 74 # define DTLS_CTRL_LISTEN 75 @@ -1766,9 +1949,37 @@ DECLARE_PEM_rw(SSL_SESSION, SSL_SESSION) # define SSL_CTRL_CLEAR_MODE 78 # define SSL_CTRL_GET_EXTRA_CHAIN_CERTS 82 # define SSL_CTRL_CLEAR_EXTRA_CHAIN_CERTS 83 +# define SSL_CTRL_CHAIN 88 +# define SSL_CTRL_CHAIN_CERT 89 +# define SSL_CTRL_GET_CURVES 90 +# define SSL_CTRL_SET_CURVES 91 +# define SSL_CTRL_SET_CURVES_LIST 92 +# define SSL_CTRL_GET_SHARED_CURVE 93 +# define SSL_CTRL_SET_ECDH_AUTO 94 +# define SSL_CTRL_SET_SIGALGS 97 +# define SSL_CTRL_SET_SIGALGS_LIST 98 +# define SSL_CTRL_CERT_FLAGS 99 +# define SSL_CTRL_CLEAR_CERT_FLAGS 100 +# define SSL_CTRL_SET_CLIENT_SIGALGS 101 +# define SSL_CTRL_SET_CLIENT_SIGALGS_LIST 102 +# define SSL_CTRL_GET_CLIENT_CERT_TYPES 103 +# define SSL_CTRL_SET_CLIENT_CERT_TYPES 104 +# define SSL_CTRL_BUILD_CERT_CHAIN 105 +# define SSL_CTRL_SET_VERIFY_CERT_STORE 106 +# define SSL_CTRL_SET_CHAIN_CERT_STORE 107 +# define SSL_CTRL_GET_PEER_SIGNATURE_NID 108 +# define SSL_CTRL_GET_SERVER_TMP_KEY 109 +# define SSL_CTRL_GET_RAW_CIPHERLIST 110 +# define SSL_CTRL_GET_EC_POINT_FORMATS 111 +# define SSL_CTRL_GET_CHAIN_CERTS 115 +# define SSL_CTRL_SELECT_CURRENT_CERT 116 +# define SSL_CTRL_SET_CURRENT_CERT 117 # define SSL_CTRL_CHECK_PROTO_VERSION 119 # define DTLS_CTRL_SET_LINK_MTU 120 # define DTLS_CTRL_GET_LINK_MIN_MTU 121 +# define SSL_CERT_SET_FIRST 1 +# define SSL_CERT_SET_NEXT 2 +# define SSL_CERT_SET_SERVER 3 # define DTLSv1_get_timeout(ssl, arg) \ SSL_ctrl(ssl,DTLS_CTRL_GET_TIMEOUT,0, (void *)arg) # define DTLSv1_handle_timeout(ssl) \ @@ -1803,8 +2014,108 @@ DECLARE_PEM_rw(SSL_SESSION, SSL_SESSION) SSL_CTX_ctrl(ctx,SSL_CTRL_EXTRA_CHAIN_CERT,0,(char *)x509) # define SSL_CTX_get_extra_chain_certs(ctx,px509) \ SSL_CTX_ctrl(ctx,SSL_CTRL_GET_EXTRA_CHAIN_CERTS,0,px509) +# define SSL_CTX_get_extra_chain_certs_only(ctx,px509) \ + SSL_CTX_ctrl(ctx,SSL_CTRL_GET_EXTRA_CHAIN_CERTS,1,px509) # define SSL_CTX_clear_extra_chain_certs(ctx) \ SSL_CTX_ctrl(ctx,SSL_CTRL_CLEAR_EXTRA_CHAIN_CERTS,0,NULL) +# define SSL_CTX_set0_chain(ctx,sk) \ + SSL_CTX_ctrl(ctx,SSL_CTRL_CHAIN,0,(char *)sk) +# define SSL_CTX_set1_chain(ctx,sk) \ + SSL_CTX_ctrl(ctx,SSL_CTRL_CHAIN,1,(char *)sk) +# define SSL_CTX_add0_chain_cert(ctx,x509) \ + SSL_CTX_ctrl(ctx,SSL_CTRL_CHAIN_CERT,0,(char *)x509) +# define SSL_CTX_add1_chain_cert(ctx,x509) \ + SSL_CTX_ctrl(ctx,SSL_CTRL_CHAIN_CERT,1,(char *)x509) +# define SSL_CTX_get0_chain_certs(ctx,px509) \ + SSL_CTX_ctrl(ctx,SSL_CTRL_GET_CHAIN_CERTS,0,px509) +# define SSL_CTX_clear_chain_certs(ctx) \ + SSL_CTX_set0_chain(ctx,NULL) +# define SSL_CTX_build_cert_chain(ctx, flags) \ + SSL_CTX_ctrl(ctx,SSL_CTRL_BUILD_CERT_CHAIN, flags, NULL) +# define SSL_CTX_select_current_cert(ctx,x509) \ + SSL_CTX_ctrl(ctx,SSL_CTRL_SELECT_CURRENT_CERT,0,(char *)x509) +# define SSL_CTX_set_current_cert(ctx, op) \ + SSL_CTX_ctrl(ctx,SSL_CTRL_SET_CURRENT_CERT, op, NULL) +# define SSL_CTX_set0_verify_cert_store(ctx,st) \ + SSL_CTX_ctrl(ctx,SSL_CTRL_SET_VERIFY_CERT_STORE,0,(char *)st) +# define SSL_CTX_set1_verify_cert_store(ctx,st) \ + SSL_CTX_ctrl(ctx,SSL_CTRL_SET_VERIFY_CERT_STORE,1,(char *)st) +# define SSL_CTX_set0_chain_cert_store(ctx,st) \ + SSL_CTX_ctrl(ctx,SSL_CTRL_SET_CHAIN_CERT_STORE,0,(char *)st) +# define SSL_CTX_set1_chain_cert_store(ctx,st) \ + SSL_CTX_ctrl(ctx,SSL_CTRL_SET_CHAIN_CERT_STORE,1,(char *)st) +# define SSL_set0_chain(ctx,sk) \ + SSL_ctrl(ctx,SSL_CTRL_CHAIN,0,(char *)sk) +# define SSL_set1_chain(ctx,sk) \ + SSL_ctrl(ctx,SSL_CTRL_CHAIN,1,(char *)sk) +# define SSL_add0_chain_cert(ctx,x509) \ + SSL_ctrl(ctx,SSL_CTRL_CHAIN_CERT,0,(char *)x509) +# define SSL_add1_chain_cert(ctx,x509) \ + SSL_ctrl(ctx,SSL_CTRL_CHAIN_CERT,1,(char *)x509) +# define SSL_get0_chain_certs(ctx,px509) \ + SSL_ctrl(ctx,SSL_CTRL_GET_CHAIN_CERTS,0,px509) +# define SSL_clear_chain_certs(ctx) \ + SSL_set0_chain(ctx,NULL) +# define SSL_build_cert_chain(s, flags) \ + SSL_ctrl(s,SSL_CTRL_BUILD_CERT_CHAIN, flags, NULL) +# define SSL_select_current_cert(ctx,x509) \ + SSL_ctrl(ctx,SSL_CTRL_SELECT_CURRENT_CERT,0,(char *)x509) +# define SSL_set_current_cert(ctx,op) \ + SSL_ctrl(ctx,SSL_CTRL_SET_CURRENT_CERT, op, NULL) +# define SSL_set0_verify_cert_store(s,st) \ + SSL_ctrl(s,SSL_CTRL_SET_VERIFY_CERT_STORE,0,(char *)st) +# define SSL_set1_verify_cert_store(s,st) \ + SSL_ctrl(s,SSL_CTRL_SET_VERIFY_CERT_STORE,1,(char *)st) +# define SSL_set0_chain_cert_store(s,st) \ + SSL_ctrl(s,SSL_CTRL_SET_CHAIN_CERT_STORE,0,(char *)st) +# define SSL_set1_chain_cert_store(s,st) \ + SSL_ctrl(s,SSL_CTRL_SET_CHAIN_CERT_STORE,1,(char *)st) +# define SSL_get1_curves(ctx, s) \ + SSL_ctrl(ctx,SSL_CTRL_GET_CURVES,0,(char *)s) +# define SSL_CTX_set1_curves(ctx, clist, clistlen) \ + SSL_CTX_ctrl(ctx,SSL_CTRL_SET_CURVES,clistlen,(char *)clist) +# define SSL_CTX_set1_curves_list(ctx, s) \ + SSL_CTX_ctrl(ctx,SSL_CTRL_SET_CURVES_LIST,0,(char *)s) +# define SSL_set1_curves(ctx, clist, clistlen) \ + SSL_ctrl(ctx,SSL_CTRL_SET_CURVES,clistlen,(char *)clist) +# define SSL_set1_curves_list(ctx, s) \ + SSL_ctrl(ctx,SSL_CTRL_SET_CURVES_LIST,0,(char *)s) +# define SSL_get_shared_curve(s, n) \ + SSL_ctrl(s,SSL_CTRL_GET_SHARED_CURVE,n,NULL) +# define SSL_CTX_set_ecdh_auto(ctx, onoff) \ + SSL_CTX_ctrl(ctx,SSL_CTRL_SET_ECDH_AUTO,onoff,NULL) +# define SSL_set_ecdh_auto(s, onoff) \ + SSL_ctrl(s,SSL_CTRL_SET_ECDH_AUTO,onoff,NULL) +# define SSL_CTX_set1_sigalgs(ctx, slist, slistlen) \ + SSL_CTX_ctrl(ctx,SSL_CTRL_SET_SIGALGS,slistlen,(int *)slist) +# define SSL_CTX_set1_sigalgs_list(ctx, s) \ + SSL_CTX_ctrl(ctx,SSL_CTRL_SET_SIGALGS_LIST,0,(char *)s) +# define SSL_set1_sigalgs(ctx, slist, slistlen) \ + SSL_ctrl(ctx,SSL_CTRL_SET_SIGALGS,clistlen,(int *)slist) +# define SSL_set1_sigalgs_list(ctx, s) \ + SSL_ctrl(ctx,SSL_CTRL_SET_SIGALGS_LIST,0,(char *)s) +# define SSL_CTX_set1_client_sigalgs(ctx, slist, slistlen) \ + SSL_CTX_ctrl(ctx,SSL_CTRL_SET_CLIENT_SIGALGS,slistlen,(int *)slist) +# define SSL_CTX_set1_client_sigalgs_list(ctx, s) \ + SSL_CTX_ctrl(ctx,SSL_CTRL_SET_CLIENT_SIGALGS_LIST,0,(char *)s) +# define SSL_set1_client_sigalgs(ctx, slist, slistlen) \ + SSL_ctrl(ctx,SSL_CTRL_SET_CLIENT_SIGALGS,clistlen,(int *)slist) +# define SSL_set1_client_sigalgs_list(ctx, s) \ + SSL_ctrl(ctx,SSL_CTRL_SET_CLIENT_SIGALGS_LIST,0,(char *)s) +# define SSL_get0_certificate_types(s, clist) \ + SSL_ctrl(s, SSL_CTRL_GET_CLIENT_CERT_TYPES, 0, (char *)clist) +# define SSL_CTX_set1_client_certificate_types(ctx, clist, clistlen) \ + SSL_CTX_ctrl(ctx,SSL_CTRL_SET_CLIENT_CERT_TYPES,clistlen,(char *)clist) +# define SSL_set1_client_certificate_types(s, clist, clistlen) \ + SSL_ctrl(s,SSL_CTRL_SET_CLIENT_CERT_TYPES,clistlen,(char *)clist) +# define SSL_get_peer_signature_nid(s, pn) \ + SSL_ctrl(s,SSL_CTRL_GET_PEER_SIGNATURE_NID,0,pn) +# define SSL_get_server_tmp_key(s, pk) \ + SSL_ctrl(s,SSL_CTRL_GET_SERVER_TMP_KEY,0,pk) +# define SSL_get0_raw_cipherlist(s, plst) \ + SSL_ctrl(s,SSL_CTRL_GET_RAW_CIPHERLIST,0,(char *)plst) +# define SSL_get0_ec_point_formats(s, plst) \ + SSL_ctrl(s,SSL_CTRL_GET_EC_POINT_FORMATS,0,(char *)plst) # ifndef OPENSSL_NO_BIO BIO_METHOD *BIO_f_ssl(void); BIO *BIO_new_ssl(SSL_CTX *ctx, int client); @@ -1858,6 +2169,7 @@ int (*SSL_get_verify_callback(const SSL *s)) (int, X509_STORE_CTX *); void SSL_set_verify(SSL *s, int mode, int (*callback) (int ok, X509_STORE_CTX *ctx)); void SSL_set_verify_depth(SSL *s, int depth); +void SSL_set_cert_cb(SSL *s, int (*cb) (SSL *ssl, void *arg), void *arg); # ifndef OPENSSL_NO_RSA int SSL_use_RSAPrivateKey(SSL *ssl, RSA *rsa); # endif @@ -1868,6 +2180,16 @@ int SSL_use_PrivateKey_ASN1(int pk, SSL *ssl, const unsigned char *d, int SSL_use_certificate(SSL *ssl, X509 *x); int SSL_use_certificate_ASN1(SSL *ssl, const unsigned char *d, int len); +# ifndef OPENSSL_NO_TLSEXT +/* Set serverinfo data for the current active cert. */ +int SSL_CTX_use_serverinfo(SSL_CTX *ctx, const unsigned char *serverinfo, + size_t serverinfo_length); +# ifndef OPENSSL_NO_STDIO +int SSL_CTX_use_serverinfo_file(SSL_CTX *ctx, const char *file); +# endif /* NO_STDIO */ + +# endif + # ifndef OPENSSL_NO_STDIO int SSL_use_RSAPrivateKey_file(SSL *ssl, const char *file, int type); int SSL_use_PrivateKey_file(SSL *ssl, const char *file, int type); @@ -1942,6 +2264,8 @@ void SSL_CTX_set_verify_depth(SSL_CTX *ctx, int depth); void SSL_CTX_set_cert_verify_callback(SSL_CTX *ctx, int (*cb) (X509_STORE_CTX *, void *), void *arg); +void SSL_CTX_set_cert_cb(SSL_CTX *c, int (*cb) (SSL *ssl, void *arg), + void *arg); # ifndef OPENSSL_NO_RSA int SSL_CTX_use_RSAPrivateKey(SSL_CTX *ctx, RSA *rsa); # endif @@ -1975,6 +2299,9 @@ int SSL_set_trust(SSL *s, int trust); int SSL_CTX_set1_param(SSL_CTX *ctx, X509_VERIFY_PARAM *vpm); int SSL_set1_param(SSL *ssl, X509_VERIFY_PARAM *vpm); +X509_VERIFY_PARAM *SSL_CTX_get0_param(SSL_CTX *ctx); +X509_VERIFY_PARAM *SSL_get0_param(SSL *ssl); + # ifndef OPENSSL_NO_SRP int SSL_CTX_set_srp_username(SSL_CTX *ctx, char *name); int SSL_CTX_set_srp_password(SSL_CTX *ctx, char *password); @@ -1999,6 +2326,7 @@ char *SSL_get_srp_username(SSL *s); char *SSL_get_srp_userinfo(SSL *s); # endif +void SSL_certs_clear(SSL *s); void SSL_free(SSL *ssl); int SSL_accept(SSL *ssl); int SSL_connect(SSL *ssl); @@ -2051,6 +2379,14 @@ const SSL_METHOD *DTLSv1_method(void); /* DTLSv1.0 */ const SSL_METHOD *DTLSv1_server_method(void); /* DTLSv1.0 */ const SSL_METHOD *DTLSv1_client_method(void); /* DTLSv1.0 */ +const SSL_METHOD *DTLSv1_2_method(void); /* DTLSv1.2 */ +const SSL_METHOD *DTLSv1_2_server_method(void); /* DTLSv1.2 */ +const SSL_METHOD *DTLSv1_2_client_method(void); /* DTLSv1.2 */ + +const SSL_METHOD *DTLS_method(void); /* DTLS 1.0 and 1.2 */ +const SSL_METHOD *DTLS_server_method(void); /* DTLS 1.0 and 1.2 */ +const SSL_METHOD *DTLS_client_method(void); /* DTLS 1.0 and 1.2 */ + STACK_OF(SSL_CIPHER) *SSL_get_ciphers(const SSL *s); int SSL_do_handshake(SSL *s); @@ -2059,6 +2395,7 @@ int SSL_renegotiate_abbreviated(SSL *s); int SSL_renegotiate_pending(SSL *s); int SSL_shutdown(SSL *s); +const SSL_METHOD *SSL_CTX_get_ssl_method(SSL_CTX *ctx); const SSL_METHOD *SSL_get_ssl_method(SSL *s); int SSL_set_ssl_method(SSL *s, const SSL_METHOD *method); const char *SSL_alert_type_string_long(int value); @@ -2088,7 +2425,10 @@ SSL *SSL_dup(SSL *ssl); X509 *SSL_get_certificate(const SSL *ssl); /* * EVP_PKEY - */ struct evp_pkey_st *SSL_get_privatekey(SSL *ssl); + */ struct evp_pkey_st *SSL_get_privatekey(const SSL *ssl); + +X509 *SSL_CTX_get0_certificate(const SSL_CTX *ctx); +EVP_PKEY *SSL_CTX_get0_privatekey(const SSL_CTX *ctx); void SSL_CTX_set_quiet_shutdown(SSL_CTX *ctx, int mode); int SSL_CTX_get_quiet_shutdown(const SSL_CTX *ctx); @@ -2196,6 +2536,9 @@ const COMP_METHOD *SSL_get_current_compression(SSL *s); const COMP_METHOD *SSL_get_current_expansion(SSL *s); const char *SSL_COMP_get_name(const COMP_METHOD *comp); STACK_OF(SSL_COMP) *SSL_COMP_get_compression_methods(void); +STACK_OF(SSL_COMP) *SSL_COMP_set0_compression_methods(STACK_OF(SSL_COMP) + *meths); +void SSL_COMP_free_compression_methods(void); int SSL_COMP_add_compression_method(int id, COMP_METHOD *cm); # else const void *SSL_get_current_compression(SSL *s); @@ -2205,6 +2548,8 @@ void *SSL_COMP_get_compression_methods(void); int SSL_COMP_add_compression_method(int id, void *cm); # endif +const SSL_CIPHER *SSL_CIPHER_find(SSL *ssl, const unsigned char *ptr); + /* TLS extensions functions */ int SSL_set_session_ticket_ext(SSL *s, void *ext_data, int ext_len); @@ -2218,6 +2563,27 @@ int SSL_set_session_secret_cb(SSL *s, void SSL_set_debug(SSL *s, int debug); int SSL_cache_hit(SSL *s); +int SSL_is_server(SSL *s); + +SSL_CONF_CTX *SSL_CONF_CTX_new(void); +int SSL_CONF_CTX_finish(SSL_CONF_CTX *cctx); +void SSL_CONF_CTX_free(SSL_CONF_CTX *cctx); +unsigned int SSL_CONF_CTX_set_flags(SSL_CONF_CTX *cctx, unsigned int flags); +unsigned int SSL_CONF_CTX_clear_flags(SSL_CONF_CTX *cctx, unsigned int flags); +int SSL_CONF_CTX_set1_prefix(SSL_CONF_CTX *cctx, const char *pre); + +void SSL_CONF_CTX_set_ssl(SSL_CONF_CTX *cctx, SSL *ssl); +void SSL_CONF_CTX_set_ssl_ctx(SSL_CONF_CTX *cctx, SSL_CTX *ctx); + +int SSL_CONF_cmd(SSL_CONF_CTX *cctx, const char *cmd, const char *value); +int SSL_CONF_cmd_argv(SSL_CONF_CTX *cctx, int *pargc, char ***pargv); +int SSL_CONF_cmd_value_type(SSL_CONF_CTX *cctx, const char *cmd); + +# ifndef OPENSSL_NO_SSL_TRACE +void SSL_trace(int write_p, int version, int content_type, + const void *buf, size_t len, SSL *ssl, void *arg); +const char *SSL_CIPHER_standard_name(const SSL_CIPHER *c); +# endif # ifndef OPENSSL_NO_UNIT_TEST const struct openssl_ssl_test_functions *SSL_test_functions(void); @@ -2233,6 +2599,7 @@ void ERR_load_SSL_strings(void); /* Error codes for the SSL functions. */ /* Function codes. */ +# define SSL_F_CHECK_SUITEB_CIPHER_LIST 331 # define SSL_F_CLIENT_CERTIFICATE 100 # define SSL_F_CLIENT_FINISHED 167 # define SSL_F_CLIENT_HELLO 101 @@ -2344,6 +2711,8 @@ void ERR_load_SSL_strings(void); # define SSL_F_SSL3_SETUP_WRITE_BUFFER 291 # define SSL_F_SSL3_WRITE_BYTES 158 # define SSL_F_SSL3_WRITE_PENDING 159 +# define SSL_F_SSL_ADD_CERT_CHAIN 318 +# define SSL_F_SSL_ADD_CERT_TO_BUF 319 # define SSL_F_SSL_ADD_CLIENTHELLO_RENEGOTIATE_EXT 298 # define SSL_F_SSL_ADD_CLIENTHELLO_TLSEXT 277 # define SSL_F_SSL_ADD_CLIENTHELLO_USE_SRTP_EXT 307 @@ -2353,6 +2722,7 @@ void ERR_load_SSL_strings(void); # define SSL_F_SSL_ADD_SERVERHELLO_TLSEXT 278 # define SSL_F_SSL_ADD_SERVERHELLO_USE_SRTP_EXT 308 # define SSL_F_SSL_BAD_METHOD 160 +# define SSL_F_SSL_BUILD_CERT_CHAIN 332 # define SSL_F_SSL_BYTES_TO_CIPHER_LIST 161 # define SSL_F_SSL_CERT_DUP 221 # define SSL_F_SSL_CERT_INST 222 @@ -2365,6 +2735,7 @@ void ERR_load_SSL_strings(void); # define SSL_F_SSL_CIPHER_STRENGTH_SORT 231 # define SSL_F_SSL_CLEAR 164 # define SSL_F_SSL_COMP_ADD_COMPRESSION_METHOD 165 +# define SSL_F_SSL_CONF_CMD 334 # define SSL_F_SSL_CREATE_CIPHER_LIST 166 # define SSL_F_SSL_CTRL 232 # define SSL_F_SSL_CTX_CHECK_PRIVATE_KEY 168 @@ -2387,9 +2758,12 @@ void ERR_load_SSL_strings(void); # define SSL_F_SSL_CTX_USE_RSAPRIVATEKEY 177 # define SSL_F_SSL_CTX_USE_RSAPRIVATEKEY_ASN1 178 # define SSL_F_SSL_CTX_USE_RSAPRIVATEKEY_FILE 179 +# define SSL_F_SSL_CTX_USE_SERVERINFO 336 +# define SSL_F_SSL_CTX_USE_SERVERINFO_FILE 337 # define SSL_F_SSL_DO_HANDSHAKE 180 # define SSL_F_SSL_GET_NEW_SESSION 181 # define SSL_F_SSL_GET_PREV_SESSION 217 +# define SSL_F_SSL_GET_SERVER_CERT_INDEX 322 # define SSL_F_SSL_GET_SERVER_SEND_CERT 182 # define SSL_F_SSL_GET_SERVER_SEND_PKEY 317 # define SSL_F_SSL_GET_SIGN_PKEY 183 @@ -2408,6 +2782,8 @@ void ERR_load_SSL_strings(void); # define SSL_F_SSL_READ 223 # define SSL_F_SSL_RSA_PRIVATE_DECRYPT 187 # define SSL_F_SSL_RSA_PUBLIC_ENCRYPT 188 +# define SSL_F_SSL_SCAN_CLIENTHELLO_TLSEXT 320 +# define SSL_F_SSL_SCAN_SERVERHELLO_TLSEXT 321 # define SSL_F_SSL_SESSION_NEW 189 # define SSL_F_SSL_SESSION_PRINT_FP 190 # define SSL_F_SSL_SESSION_SET1_ID_CONTEXT 312 @@ -2440,16 +2816,19 @@ void ERR_load_SSL_strings(void); # define SSL_F_SSL_USE_RSAPRIVATEKEY_FILE 206 # define SSL_F_SSL_VERIFY_CERT_CHAIN 207 # define SSL_F_SSL_WRITE 208 +# define SSL_F_TLS12_CHECK_PEER_SIGALG 333 # define SSL_F_TLS1_CERT_VERIFY_MAC 286 # define SSL_F_TLS1_CHANGE_CIPHER_STATE 209 # define SSL_F_TLS1_CHECK_SERVERHELLO_TLSEXT 274 # define SSL_F_TLS1_ENC 210 # define SSL_F_TLS1_EXPORT_KEYING_MATERIAL 314 +# define SSL_F_TLS1_GET_CURVELIST 338 # define SSL_F_TLS1_HEARTBEAT 315 # define SSL_F_TLS1_PREPARE_CLIENTHELLO_TLSEXT 275 # define SSL_F_TLS1_PREPARE_SERVERHELLO_TLSEXT 276 # define SSL_F_TLS1_PRF 284 # define SSL_F_TLS1_SETUP_KEY_BLOCK 211 +# define SSL_F_TLS1_SET_SERVER_SIGALGS 335 # define SSL_F_WRITE_PENDING 212 /* Reason codes. */ @@ -2459,6 +2838,7 @@ void ERR_load_SSL_strings(void); # define SSL_R_BAD_AUTHENTICATION_TYPE 102 # define SSL_R_BAD_CHANGE_CIPHER_SPEC 103 # define SSL_R_BAD_CHECKSUM 104 +# define SSL_R_BAD_DATA 390 # define SSL_R_BAD_DATA_RETURNED_BY_CALLBACK 106 # define SSL_R_BAD_DECOMPRESSION 107 # define SSL_R_BAD_DH_G_LENGTH 108 @@ -2496,6 +2876,7 @@ void ERR_load_SSL_strings(void); # define SSL_R_BAD_SSL_FILETYPE 124 # define SSL_R_BAD_SSL_SESSION_ID_LENGTH 125 # define SSL_R_BAD_STATE 126 +# define SSL_R_BAD_VALUE 384 # define SSL_R_BAD_WRITE_RETRY 127 # define SSL_R_BIO_NOT_SET 128 # define SSL_R_BLOCK_CIPHER_PAD_IS_WRONG 129 @@ -2504,6 +2885,7 @@ void ERR_load_SSL_strings(void); # define SSL_R_CA_DN_TOO_LONG 132 # define SSL_R_CCS_RECEIVED_EARLY 133 # define SSL_R_CERTIFICATE_VERIFY_FAILED 134 +# define SSL_R_CERT_CB_ERROR 377 # define SSL_R_CERT_LENGTH_MISMATCH 135 # define SSL_R_CHALLENGE_IS_DIFFERENT 136 # define SSL_R_CIPHER_CODE_WRONG_LENGTH 137 @@ -2530,6 +2912,7 @@ void ERR_load_SSL_strings(void); # define SSL_R_ECC_CERT_NOT_FOR_SIGNING 318 # define SSL_R_ECC_CERT_SHOULD_HAVE_RSA_SIGNATURE 322 # define SSL_R_ECC_CERT_SHOULD_HAVE_SHA1_SIGNATURE 323 +# define SSL_R_ECDH_REQUIRED_FOR_SUITEB_MODE 374 # define SSL_R_ECGROUP_TOO_LARGE_FOR_CIPHER 310 # define SSL_R_EMPTY_SRTP_PROTECTION_PROFILE_LIST 354 # define SSL_R_ENCRYPTED_LENGTH_TOO_LONG 150 @@ -2543,12 +2926,15 @@ void ERR_load_SSL_strings(void); # define SSL_R_HTTPS_PROXY_REQUEST 155 # define SSL_R_HTTP_REQUEST 156 # define SSL_R_ILLEGAL_PADDING 283 +# define SSL_R_ILLEGAL_SUITEB_DIGEST 380 # define SSL_R_INAPPROPRIATE_FALLBACK 373 # define SSL_R_INCONSISTENT_COMPRESSION 340 # define SSL_R_INVALID_CHALLENGE_LENGTH 158 # define SSL_R_INVALID_COMMAND 280 # define SSL_R_INVALID_COMPRESSION_ALGORITHM 341 +# define SSL_R_INVALID_NULL_CMD_NAME 385 # define SSL_R_INVALID_PURPOSE 278 +# define SSL_R_INVALID_SERVERINFO_DATA 388 # define SSL_R_INVALID_SRP_USERNAME 357 # define SSL_R_INVALID_STATUS_RESPONSE 328 # define SSL_R_INVALID_TICKET_KEYS_LENGTH 325 @@ -2574,6 +2960,8 @@ void ERR_load_SSL_strings(void); # define SSL_R_MISSING_DH_KEY 163 # define SSL_R_MISSING_DH_RSA_CERT 164 # define SSL_R_MISSING_DSA_SIGNING_CERT 165 +# define SSL_R_MISSING_ECDH_CERT 382 +# define SSL_R_MISSING_ECDSA_SIGNING_CERT 381 # define SSL_R_MISSING_EXPORT_TMP_DH_KEY 166 # define SSL_R_MISSING_EXPORT_TMP_RSA_KEY 167 # define SSL_R_MISSING_RSA_CERTIFICATE 168 @@ -2602,6 +2990,7 @@ void ERR_load_SSL_strings(void); # define SSL_R_NO_COMPRESSION_SPECIFIED 187 # define SSL_R_NO_GOST_CERTIFICATE_SENT_BY_PEER 330 # define SSL_R_NO_METHOD_SPECIFIED 188 +# define SSL_R_NO_PEM_EXTENSIONS 389 # define SSL_R_NO_PRIVATEKEY 189 # define SSL_R_NO_PRIVATE_KEY_ASSIGNED 190 # define SSL_R_NO_PROTOCOLS_AVAILABLE 191 @@ -2609,12 +2998,15 @@ void ERR_load_SSL_strings(void); # define SSL_R_NO_RENEGOTIATION 339 # define SSL_R_NO_REQUIRED_DIGEST 324 # define SSL_R_NO_SHARED_CIPHER 193 +# define SSL_R_NO_SHARED_SIGATURE_ALGORITHMS 376 # define SSL_R_NO_SRTP_PROFILES 359 # define SSL_R_NO_VERIFY_CALLBACK 194 # define SSL_R_NULL_SSL_CTX 195 # define SSL_R_NULL_SSL_METHOD_PASSED 196 # define SSL_R_OLD_SESSION_CIPHER_NOT_RETURNED 197 # define SSL_R_OLD_SESSION_COMPRESSION_ALGORITHM_NOT_RETURNED 344 +# define SSL_R_ONLY_DTLS_1_2_ALLOWED_IN_SUITEB_MODE 387 +# define SSL_R_ONLY_TLS_1_2_ALLOWED_IN_SUITEB_MODE 379 # define SSL_R_ONLY_TLS_ALLOWED_IN_FIPS_MODE 297 # define SSL_R_OPAQUE_PRF_INPUT_TOO_LONG 327 # define SSL_R_PACKET_LENGTH_TOO_LONG 198 @@ -2626,6 +3018,8 @@ void ERR_load_SSL_strings(void); # define SSL_R_PEER_ERROR_NO_CERTIFICATE 202 # define SSL_R_PEER_ERROR_NO_CIPHER 203 # define SSL_R_PEER_ERROR_UNSUPPORTED_CERTIFICATE_TYPE 204 +# define SSL_R_PEM_NAME_BAD_PREFIX 391 +# define SSL_R_PEM_NAME_TOO_SHORT 392 # define SSL_R_PRE_MAC_LENGTH_TOO_LONG 205 # define SSL_R_PROBLEMS_MAPPING_CIPHER_FUNCTIONS 206 # define SSL_R_PROTOCOL_IS_SHUTDOWN 207 @@ -2728,6 +3122,7 @@ void ERR_load_SSL_strings(void); # define SSL_R_UNKNOWN_CERTIFICATE_TYPE 247 # define SSL_R_UNKNOWN_CIPHER_RETURNED 248 # define SSL_R_UNKNOWN_CIPHER_TYPE 249 +# define SSL_R_UNKNOWN_CMD_NAME 386 # define SSL_R_UNKNOWN_DIGEST 368 # define SSL_R_UNKNOWN_KEY_EXCHANGE_TYPE 250 # define SSL_R_UNKNOWN_PKEY_TYPE 251 @@ -2745,7 +3140,9 @@ void ERR_load_SSL_strings(void); # define SSL_R_UNSUPPORTED_STATUS_TYPE 329 # define SSL_R_USE_SRTP_NOT_NEGOTIATED 369 # define SSL_R_WRITE_BIO_NOT_SET 260 +# define SSL_R_WRONG_CERTIFICATE_TYPE 383 # define SSL_R_WRONG_CIPHER_RETURNED 261 +# define SSL_R_WRONG_CURVE 378 # define SSL_R_WRONG_MESSAGE_TYPE 262 # define SSL_R_WRONG_NUMBER_OF_KEY_BITS 263 # define SSL_R_WRONG_SIGNATURE_LENGTH 264 diff --git a/deps/openssl/openssl/ssl/ssl3.h b/deps/openssl/openssl/ssl/ssl3.h index 2dd54626588a24..e681d50a9e8643 100644 --- a/deps/openssl/openssl/ssl/ssl3.h +++ b/deps/openssl/openssl/ssl/ssl3.h @@ -159,11 +159,17 @@ extern "C" { # define SSL3_CK_DH_RSA_DES_192_CBC3_SHA 0x03000010 # define SSL3_CK_EDH_DSS_DES_40_CBC_SHA 0x03000011 +# define SSL3_CK_DHE_DSS_DES_40_CBC_SHA SSL3_CK_EDH_DSS_DES_40_CBC_SHA # define SSL3_CK_EDH_DSS_DES_64_CBC_SHA 0x03000012 +# define SSL3_CK_DHE_DSS_DES_64_CBC_SHA SSL3_CK_EDH_DSS_DES_64_CBC_SHA # define SSL3_CK_EDH_DSS_DES_192_CBC3_SHA 0x03000013 +# define SSL3_CK_DHE_DSS_DES_192_CBC3_SHA SSL3_CK_EDH_DSS_DES_192_CBC3_SHA # define SSL3_CK_EDH_RSA_DES_40_CBC_SHA 0x03000014 +# define SSL3_CK_DHE_RSA_DES_40_CBC_SHA SSL3_CK_EDH_RSA_DES_40_CBC_SHA # define SSL3_CK_EDH_RSA_DES_64_CBC_SHA 0x03000015 +# define SSL3_CK_DHE_RSA_DES_64_CBC_SHA SSL3_CK_EDH_RSA_DES_64_CBC_SHA # define SSL3_CK_EDH_RSA_DES_192_CBC3_SHA 0x03000016 +# define SSL3_CK_DHE_RSA_DES_192_CBC3_SHA SSL3_CK_EDH_RSA_DES_192_CBC3_SHA # define SSL3_CK_ADH_RC4_40_MD5 0x03000017 # define SSL3_CK_ADH_RC4_128_MD5 0x03000018 @@ -220,6 +226,18 @@ extern "C" { # define SSL3_TXT_DH_RSA_DES_64_CBC_SHA "DH-RSA-DES-CBC-SHA" # define SSL3_TXT_DH_RSA_DES_192_CBC3_SHA "DH-RSA-DES-CBC3-SHA" +# define SSL3_TXT_DHE_DSS_DES_40_CBC_SHA "EXP-DHE-DSS-DES-CBC-SHA" +# define SSL3_TXT_DHE_DSS_DES_64_CBC_SHA "DHE-DSS-DES-CBC-SHA" +# define SSL3_TXT_DHE_DSS_DES_192_CBC3_SHA "DHE-DSS-DES-CBC3-SHA" +# define SSL3_TXT_DHE_RSA_DES_40_CBC_SHA "EXP-DHE-RSA-DES-CBC-SHA" +# define SSL3_TXT_DHE_RSA_DES_64_CBC_SHA "DHE-RSA-DES-CBC-SHA" +# define SSL3_TXT_DHE_RSA_DES_192_CBC3_SHA "DHE-RSA-DES-CBC3-SHA" + +/* + * This next block of six "EDH" labels is for backward compatibility with + * older versions of OpenSSL. New code should use the six "DHE" labels above + * instead: + */ # define SSL3_TXT_EDH_DSS_DES_40_CBC_SHA "EXP-EDH-DSS-DES-CBC-SHA" # define SSL3_TXT_EDH_DSS_DES_64_CBC_SHA "EDH-DSS-DES-CBC-SHA" # define SSL3_TXT_EDH_DSS_DES_192_CBC3_SHA "EDH-DSS-DES-CBC3-SHA" @@ -263,6 +281,8 @@ extern "C" { # define SSL3_SESSION_ID_SIZE 32 # define SSL3_RT_HEADER_LENGTH 5 +# define SSL3_HM_HEADER_LENGTH 4 + # ifndef SSL3_ALIGN_PAYLOAD /* * Some will argue that this increases memory footprint, but it's not @@ -342,6 +362,23 @@ extern "C" { # define SSL3_RT_APPLICATION_DATA 23 # define TLS1_RT_HEARTBEAT 24 +/* Pseudo content types to indicate additional parameters */ +# define TLS1_RT_CRYPTO 0x1000 +# define TLS1_RT_CRYPTO_PREMASTER (TLS1_RT_CRYPTO | 0x1) +# define TLS1_RT_CRYPTO_CLIENT_RANDOM (TLS1_RT_CRYPTO | 0x2) +# define TLS1_RT_CRYPTO_SERVER_RANDOM (TLS1_RT_CRYPTO | 0x3) +# define TLS1_RT_CRYPTO_MASTER (TLS1_RT_CRYPTO | 0x4) + +# define TLS1_RT_CRYPTO_READ 0x0000 +# define TLS1_RT_CRYPTO_WRITE 0x0100 +# define TLS1_RT_CRYPTO_MAC (TLS1_RT_CRYPTO | 0x5) +# define TLS1_RT_CRYPTO_KEY (TLS1_RT_CRYPTO | 0x6) +# define TLS1_RT_CRYPTO_IV (TLS1_RT_CRYPTO | 0x7) +# define TLS1_RT_CRYPTO_FIXED_IV (TLS1_RT_CRYPTO | 0x8) + +/* Pseudo content type for SSL/TLS header info */ +# define SSL3_RT_HEADER 0x100 + # define SSL3_AL_WARNING 1 # define SSL3_AL_FATAL 2 @@ -436,14 +473,7 @@ typedef struct ssl3_buffer_st { */ # define SSL3_FLAGS_CCS_OK 0x0080 -/* - * SSL3_FLAGS_SGC_RESTART_DONE is set when we restart a handshake because of - * MS SGC and so prevents us from restarting the handshake in a loop. It's - * reset on a renegotiation, so effectively limits the client to one restart - * per negotiation. This limits the possibility of a DDoS attack where the - * client handshakes in a loop using SGC to restart. Servers which permit - * renegotiation can still be effected, but we can't prevent that. - */ +/* SSL3_FLAGS_SGC_RESTART_DONE is no longer used */ # define SSL3_FLAGS_SGC_RESTART_DONE 0x0040 # ifndef OPENSSL_NO_SSL_INTERN @@ -584,7 +614,20 @@ typedef struct ssl3_state_st { */ char is_probably_safari; # endif /* !OPENSSL_NO_EC */ -# endif /* !OPENSSL_NO_TLSEXT */ + + /* + * ALPN information (we are in the process of transitioning from NPN to + * ALPN.) + */ + + /* + * In a server these point to the selected ALPN protocol after the + * ClientHello has been processed. In a client these contain the protocol + * that the server selected once the ServerHello has been processed. + */ + unsigned char *alpn_selected; + unsigned alpn_selected_len; +# endif /* OPENSSL_NO_TLSEXT */ } SSL3_STATE; # endif @@ -654,6 +697,7 @@ typedef struct ssl3_state_st { # define SSL3_ST_SR_CLNT_HELLO_A (0x110|SSL_ST_ACCEPT) # define SSL3_ST_SR_CLNT_HELLO_B (0x111|SSL_ST_ACCEPT) # define SSL3_ST_SR_CLNT_HELLO_C (0x112|SSL_ST_ACCEPT) +# define SSL3_ST_SR_CLNT_HELLO_D (0x115|SSL_ST_ACCEPT) /* write to client */ # define DTLS1_ST_SW_HELLO_VERIFY_REQUEST_A (0x113|SSL_ST_ACCEPT) # define DTLS1_ST_SW_HELLO_VERIFY_REQUEST_B (0x114|SSL_ST_ACCEPT) diff --git a/deps/openssl/openssl/ssl/ssl_algs.c b/deps/openssl/openssl/ssl/ssl_algs.c index 151422ae7742bd..e6f515ff62c00c 100644 --- a/deps/openssl/openssl/ssl/ssl_algs.c +++ b/deps/openssl/openssl/ssl/ssl_algs.c @@ -95,6 +95,10 @@ int SSL_library_init(void) EVP_add_cipher(EVP_aes_128_cbc_hmac_sha1()); EVP_add_cipher(EVP_aes_256_cbc_hmac_sha1()); # endif +# if !defined(OPENSSL_NO_SHA) && !defined(OPENSSL_NO_SHA256) + EVP_add_cipher(EVP_aes_128_cbc_hmac_sha256()); + EVP_add_cipher(EVP_aes_256_cbc_hmac_sha256()); +# endif #endif #ifndef OPENSSL_NO_CAMELLIA diff --git a/deps/openssl/openssl/ssl/ssl_cert.c b/deps/openssl/openssl/ssl/ssl_cert.c index 5df2413f719080..93a1eb941ab942 100644 --- a/deps/openssl/openssl/ssl/ssl_cert.c +++ b/deps/openssl/openssl/ssl/ssl_cert.c @@ -139,29 +139,50 @@ int SSL_get_ex_data_X509_STORE_CTX_idx(void) static volatile int ssl_x509_store_ctx_idx = -1; int got_write_lock = 0; - CRYPTO_r_lock(CRYPTO_LOCK_SSL_CTX); + if (((size_t)&ssl_x509_store_ctx_idx & + (sizeof(ssl_x509_store_ctx_idx) - 1)) + == 0) { /* check alignment, practically always true */ + int ret; + + if ((ret = ssl_x509_store_ctx_idx) < 0) { + CRYPTO_w_lock(CRYPTO_LOCK_SSL_CTX); + if ((ret = ssl_x509_store_ctx_idx) < 0) { + ret = ssl_x509_store_ctx_idx = + X509_STORE_CTX_get_ex_new_index(0, + "SSL for verify callback", + NULL, NULL, NULL); + } + CRYPTO_w_unlock(CRYPTO_LOCK_SSL_CTX); + } + + return ret; + } else { /* commonly eliminated */ - if (ssl_x509_store_ctx_idx < 0) { - CRYPTO_r_unlock(CRYPTO_LOCK_SSL_CTX); - CRYPTO_w_lock(CRYPTO_LOCK_SSL_CTX); - got_write_lock = 1; + CRYPTO_r_lock(CRYPTO_LOCK_SSL_CTX); if (ssl_x509_store_ctx_idx < 0) { - ssl_x509_store_ctx_idx = - X509_STORE_CTX_get_ex_new_index(0, "SSL for verify callback", - NULL, NULL, NULL); + CRYPTO_r_unlock(CRYPTO_LOCK_SSL_CTX); + CRYPTO_w_lock(CRYPTO_LOCK_SSL_CTX); + got_write_lock = 1; + + if (ssl_x509_store_ctx_idx < 0) { + ssl_x509_store_ctx_idx = + X509_STORE_CTX_get_ex_new_index(0, + "SSL for verify callback", + NULL, NULL, NULL); + } } - } - if (got_write_lock) - CRYPTO_w_unlock(CRYPTO_LOCK_SSL_CTX); - else - CRYPTO_r_unlock(CRYPTO_LOCK_SSL_CTX); + if (got_write_lock) + CRYPTO_w_unlock(CRYPTO_LOCK_SSL_CTX); + else + CRYPTO_r_unlock(CRYPTO_LOCK_SSL_CTX); - return ssl_x509_store_ctx_idx; + return ssl_x509_store_ctx_idx; + } } -static void ssl_cert_set_default_md(CERT *cert) +void ssl_cert_set_default_md(CERT *cert) { /* Set digest values to defaults */ #ifndef OPENSSL_NO_DSA @@ -262,25 +283,47 @@ CERT *ssl_cert_dup(CERT *cert) } } ret->ecdh_tmp_cb = cert->ecdh_tmp_cb; + ret->ecdh_tmp_auto = cert->ecdh_tmp_auto; #endif for (i = 0; i < SSL_PKEY_NUM; i++) { - if (cert->pkeys[i].x509 != NULL) { - ret->pkeys[i].x509 = cert->pkeys[i].x509; - CRYPTO_add(&ret->pkeys[i].x509->references, 1, CRYPTO_LOCK_X509); + CERT_PKEY *cpk = cert->pkeys + i; + CERT_PKEY *rpk = ret->pkeys + i; + if (cpk->x509 != NULL) { + rpk->x509 = cpk->x509; + CRYPTO_add(&rpk->x509->references, 1, CRYPTO_LOCK_X509); } - if (cert->pkeys[i].privatekey != NULL) { - ret->pkeys[i].privatekey = cert->pkeys[i].privatekey; - CRYPTO_add(&ret->pkeys[i].privatekey->references, 1, - CRYPTO_LOCK_EVP_PKEY); + if (cpk->privatekey != NULL) { + rpk->privatekey = cpk->privatekey; + CRYPTO_add(&cpk->privatekey->references, 1, CRYPTO_LOCK_EVP_PKEY); } - } - /* - * ret->extra_certs *should* exist, but currently the own certificate - * chain is held inside SSL_CTX - */ + if (cpk->chain) { + rpk->chain = X509_chain_up_ref(cpk->chain); + if (!rpk->chain) { + SSLerr(SSL_F_SSL_CERT_DUP, ERR_R_MALLOC_FAILURE); + goto err; + } + } + rpk->valid_flags = 0; +#ifndef OPENSSL_NO_TLSEXT + if (cert->pkeys[i].serverinfo != NULL) { + /* Just copy everything. */ + ret->pkeys[i].serverinfo = + OPENSSL_malloc(cert->pkeys[i].serverinfo_length); + if (ret->pkeys[i].serverinfo == NULL) { + SSLerr(SSL_F_SSL_CERT_DUP, ERR_R_MALLOC_FAILURE); + return NULL; + } + ret->pkeys[i].serverinfo_length = + cert->pkeys[i].serverinfo_length; + memcpy(ret->pkeys[i].serverinfo, + cert->pkeys[i].serverinfo, + cert->pkeys[i].serverinfo_length); + } +#endif + } ret->references = 1; /* @@ -288,6 +331,64 @@ CERT *ssl_cert_dup(CERT *cert) * will be set during handshake. */ ssl_cert_set_default_md(ret); + /* Peer sigalgs set to NULL as we get these from handshake too */ + ret->peer_sigalgs = NULL; + ret->peer_sigalgslen = 0; + /* Configured sigalgs however we copy across */ + + if (cert->conf_sigalgs) { + ret->conf_sigalgs = OPENSSL_malloc(cert->conf_sigalgslen); + if (!ret->conf_sigalgs) + goto err; + memcpy(ret->conf_sigalgs, cert->conf_sigalgs, cert->conf_sigalgslen); + ret->conf_sigalgslen = cert->conf_sigalgslen; + } else + ret->conf_sigalgs = NULL; + + if (cert->client_sigalgs) { + ret->client_sigalgs = OPENSSL_malloc(cert->client_sigalgslen); + if (!ret->client_sigalgs) + goto err; + memcpy(ret->client_sigalgs, cert->client_sigalgs, + cert->client_sigalgslen); + ret->client_sigalgslen = cert->client_sigalgslen; + } else + ret->client_sigalgs = NULL; + /* Shared sigalgs also NULL */ + ret->shared_sigalgs = NULL; + /* Copy any custom client certificate types */ + if (cert->ctypes) { + ret->ctypes = OPENSSL_malloc(cert->ctype_num); + if (!ret->ctypes) + goto err; + memcpy(ret->ctypes, cert->ctypes, cert->ctype_num); + ret->ctype_num = cert->ctype_num; + } + + ret->cert_flags = cert->cert_flags; + + ret->cert_cb = cert->cert_cb; + ret->cert_cb_arg = cert->cert_cb_arg; + + if (cert->verify_store) { + CRYPTO_add(&cert->verify_store->references, 1, + CRYPTO_LOCK_X509_STORE); + ret->verify_store = cert->verify_store; + } + + if (cert->chain_store) { + CRYPTO_add(&cert->chain_store->references, 1, CRYPTO_LOCK_X509_STORE); + ret->chain_store = cert->chain_store; + } + + ret->ciphers_raw = NULL; + +#ifndef OPENSSL_NO_TLSEXT + if (!custom_exts_copy(&ret->cli_ext, &cert->cli_ext)) + goto err; + if (!custom_exts_copy(&ret->srv_ext, &cert->srv_ext)) + goto err; +#endif return (ret); @@ -307,16 +408,49 @@ CERT *ssl_cert_dup(CERT *cert) EC_KEY_free(ret->ecdh_tmp); #endif - for (i = 0; i < SSL_PKEY_NUM; i++) { - if (ret->pkeys[i].x509 != NULL) - X509_free(ret->pkeys[i].x509); - if (ret->pkeys[i].privatekey != NULL) - EVP_PKEY_free(ret->pkeys[i].privatekey); - } +#ifndef OPENSSL_NO_TLSEXT + custom_exts_free(&ret->cli_ext); + custom_exts_free(&ret->srv_ext); +#endif + + ssl_cert_clear_certs(ret); return NULL; } +/* Free up and clear all certificates and chains */ + +void ssl_cert_clear_certs(CERT *c) +{ + int i; + if (c == NULL) + return; + for (i = 0; i < SSL_PKEY_NUM; i++) { + CERT_PKEY *cpk = c->pkeys + i; + if (cpk->x509) { + X509_free(cpk->x509); + cpk->x509 = NULL; + } + if (cpk->privatekey) { + EVP_PKEY_free(cpk->privatekey); + cpk->privatekey = NULL; + } + if (cpk->chain) { + sk_X509_pop_free(cpk->chain, X509_free); + cpk->chain = NULL; + } +#ifndef OPENSSL_NO_TLSEXT + if (cpk->serverinfo) { + OPENSSL_free(cpk->serverinfo); + cpk->serverinfo = NULL; + cpk->serverinfo_length = 0; + } +#endif + /* Clear all flags apart from explicit sign */ + cpk->valid_flags &= CERT_PKEY_EXPLICIT_SIGN; + } +} + void ssl_cert_free(CERT *c) { int i; @@ -350,16 +484,27 @@ void ssl_cert_free(CERT *c) EC_KEY_free(c->ecdh_tmp); #endif - for (i = 0; i < SSL_PKEY_NUM; i++) { - if (c->pkeys[i].x509 != NULL) - X509_free(c->pkeys[i].x509); - if (c->pkeys[i].privatekey != NULL) - EVP_PKEY_free(c->pkeys[i].privatekey); -#if 0 - if (c->pkeys[i].publickey != NULL) - EVP_PKEY_free(c->pkeys[i].publickey); + ssl_cert_clear_certs(c); + if (c->peer_sigalgs) + OPENSSL_free(c->peer_sigalgs); + if (c->conf_sigalgs) + OPENSSL_free(c->conf_sigalgs); + if (c->client_sigalgs) + OPENSSL_free(c->client_sigalgs); + if (c->shared_sigalgs) + OPENSSL_free(c->shared_sigalgs); + if (c->ctypes) + OPENSSL_free(c->ctypes); + if (c->verify_store) + X509_STORE_free(c->verify_store); + if (c->chain_store) + X509_STORE_free(c->chain_store); + if (c->ciphers_raw) + OPENSSL_free(c->ciphers_raw); +#ifndef OPENSSL_NO_TLSEXT + custom_exts_free(&c->cli_ext); + custom_exts_free(&c->srv_ext); #endif - } OPENSSL_free(c); } @@ -388,6 +533,104 @@ int ssl_cert_inst(CERT **o) return (1); } +int ssl_cert_set0_chain(CERT *c, STACK_OF(X509) *chain) +{ + CERT_PKEY *cpk = c->key; + if (!cpk) + return 0; + if (cpk->chain) + sk_X509_pop_free(cpk->chain, X509_free); + cpk->chain = chain; + return 1; +} + +int ssl_cert_set1_chain(CERT *c, STACK_OF(X509) *chain) +{ + STACK_OF(X509) *dchain; + if (!chain) + return ssl_cert_set0_chain(c, NULL); + dchain = X509_chain_up_ref(chain); + if (!dchain) + return 0; + if (!ssl_cert_set0_chain(c, dchain)) { + sk_X509_pop_free(dchain, X509_free); + return 0; + } + return 1; +} + +int ssl_cert_add0_chain_cert(CERT *c, X509 *x) +{ + CERT_PKEY *cpk = c->key; + if (!cpk) + return 0; + if (!cpk->chain) + cpk->chain = sk_X509_new_null(); + if (!cpk->chain || !sk_X509_push(cpk->chain, x)) + return 0; + return 1; +} + +int ssl_cert_add1_chain_cert(CERT *c, X509 *x) +{ + if (!ssl_cert_add0_chain_cert(c, x)) + return 0; + CRYPTO_add(&x->references, 1, CRYPTO_LOCK_X509); + return 1; +} + +int ssl_cert_select_current(CERT *c, X509 *x) +{ + int i; + if (x == NULL) + return 0; + for (i = 0; i < SSL_PKEY_NUM; i++) { + CERT_PKEY *cpk = c->pkeys + i; + if (cpk->x509 == x && cpk->privatekey) { + c->key = cpk; + return 1; + } + } + + for (i = 0; i < SSL_PKEY_NUM; i++) { + CERT_PKEY *cpk = c->pkeys + i; + if (cpk->privatekey && cpk->x509 && !X509_cmp(cpk->x509, x)) { + c->key = cpk; + return 1; + } + } + return 0; +} + +int ssl_cert_set_current(CERT *c, long op) +{ + int i, idx; + if (!c) + return 0; + if (op == SSL_CERT_SET_FIRST) + idx = 0; + else if (op == SSL_CERT_SET_NEXT) { + idx = (int)(c->key - c->pkeys + 1); + if (idx >= SSL_PKEY_NUM) + return 0; + } else + return 0; + for (i = idx; i < SSL_PKEY_NUM; i++) { + CERT_PKEY *cpk = c->pkeys + i; + if (cpk->x509 && cpk->privatekey) { + c->key = cpk; + return 1; + } + } + return 0; +} + +void ssl_cert_set_cert_cb(CERT *c, int (*cb) (SSL *ssl, void *arg), void *arg) +{ + c->cert_cb = cb; + c->cert_cb_arg = arg; +} + SESS_CERT *ssl_sess_cert_new(void) { SESS_CERT *ret; @@ -466,16 +709,24 @@ int ssl_verify_cert_chain(SSL *s, STACK_OF(X509) *sk) { X509 *x; int i; + X509_STORE *verify_store; X509_STORE_CTX ctx; + if (s->cert->verify_store) + verify_store = s->cert->verify_store; + else + verify_store = s->ctx->cert_store; + if ((sk == NULL) || (sk_X509_num(sk) == 0)) return (0); x = sk_X509_value(sk, 0); - if (!X509_STORE_CTX_init(&ctx, s->ctx->cert_store, x, sk)) { + if (!X509_STORE_CTX_init(&ctx, verify_store, x, sk)) { SSLerr(SSL_F_SSL_VERIFY_CERT_CHAIN, ERR_R_X509_LIB); return (0); } + /* Set suite B flags if needed */ + X509_STORE_CTX_set_flags(&ctx, tls1_suiteb(s)); #if 0 if (SSL_get_verify_depth(s) >= 0) X509_STORE_CTX_set_depth(&ctx, SSL_get_verify_depth(s)); @@ -797,3 +1048,210 @@ int SSL_add_dir_cert_subjects_to_stack(STACK_OF(X509_NAME) *stack, CRYPTO_w_unlock(CRYPTO_LOCK_READDIR); return ret; } + +/* Add a certificate to a BUF_MEM structure */ + +static int ssl_add_cert_to_buf(BUF_MEM *buf, unsigned long *l, X509 *x) +{ + int n; + unsigned char *p; + + n = i2d_X509(x, NULL); + if (!BUF_MEM_grow_clean(buf, (int)(n + (*l) + 3))) { + SSLerr(SSL_F_SSL_ADD_CERT_TO_BUF, ERR_R_BUF_LIB); + return 0; + } + p = (unsigned char *)&(buf->data[*l]); + l2n3(n, p); + i2d_X509(x, &p); + *l += n + 3; + + return 1; +} + +/* Add certificate chain to internal SSL BUF_MEM strcuture */ +int ssl_add_cert_chain(SSL *s, CERT_PKEY *cpk, unsigned long *l) +{ + BUF_MEM *buf = s->init_buf; + int no_chain; + int i; + + X509 *x; + STACK_OF(X509) *extra_certs; + X509_STORE *chain_store; + + if (cpk) + x = cpk->x509; + else + x = NULL; + + if (s->cert->chain_store) + chain_store = s->cert->chain_store; + else + chain_store = s->ctx->cert_store; + + /* + * If we have a certificate specific chain use it, else use parent ctx. + */ + if (cpk && cpk->chain) + extra_certs = cpk->chain; + else + extra_certs = s->ctx->extra_certs; + + if ((s->mode & SSL_MODE_NO_AUTO_CHAIN) || extra_certs) + no_chain = 1; + else + no_chain = 0; + + /* TLSv1 sends a chain with nothing in it, instead of an alert */ + if (!BUF_MEM_grow_clean(buf, 10)) { + SSLerr(SSL_F_SSL_ADD_CERT_CHAIN, ERR_R_BUF_LIB); + return 0; + } + if (x != NULL) { + if (no_chain) { + if (!ssl_add_cert_to_buf(buf, l, x)) + return 0; + } else { + X509_STORE_CTX xs_ctx; + + if (!X509_STORE_CTX_init(&xs_ctx, chain_store, x, NULL)) { + SSLerr(SSL_F_SSL_ADD_CERT_CHAIN, ERR_R_X509_LIB); + return (0); + } + X509_verify_cert(&xs_ctx); + /* Don't leave errors in the queue */ + ERR_clear_error(); + for (i = 0; i < sk_X509_num(xs_ctx.chain); i++) { + x = sk_X509_value(xs_ctx.chain, i); + + if (!ssl_add_cert_to_buf(buf, l, x)) { + X509_STORE_CTX_cleanup(&xs_ctx); + return 0; + } + } + X509_STORE_CTX_cleanup(&xs_ctx); + } + } + for (i = 0; i < sk_X509_num(extra_certs); i++) { + x = sk_X509_value(extra_certs, i); + if (!ssl_add_cert_to_buf(buf, l, x)) + return 0; + } + + return 1; +} + +/* Build a certificate chain for current certificate */ +int ssl_build_cert_chain(CERT *c, X509_STORE *chain_store, int flags) +{ + CERT_PKEY *cpk = c->key; + X509_STORE_CTX xs_ctx; + STACK_OF(X509) *chain = NULL, *untrusted = NULL; + X509 *x; + int i, rv = 0; + unsigned long error; + + if (!cpk->x509) { + SSLerr(SSL_F_SSL_BUILD_CERT_CHAIN, SSL_R_NO_CERTIFICATE_SET); + goto err; + } + /* Rearranging and check the chain: add everything to a store */ + if (flags & SSL_BUILD_CHAIN_FLAG_CHECK) { + chain_store = X509_STORE_new(); + if (!chain_store) + goto err; + for (i = 0; i < sk_X509_num(cpk->chain); i++) { + x = sk_X509_value(cpk->chain, i); + if (!X509_STORE_add_cert(chain_store, x)) { + error = ERR_peek_last_error(); + if (ERR_GET_LIB(error) != ERR_LIB_X509 || + ERR_GET_REASON(error) != + X509_R_CERT_ALREADY_IN_HASH_TABLE) + goto err; + ERR_clear_error(); + } + } + /* Add EE cert too: it might be self signed */ + if (!X509_STORE_add_cert(chain_store, cpk->x509)) { + error = ERR_peek_last_error(); + if (ERR_GET_LIB(error) != ERR_LIB_X509 || + ERR_GET_REASON(error) != X509_R_CERT_ALREADY_IN_HASH_TABLE) + goto err; + ERR_clear_error(); + } + } else { + if (c->chain_store) + chain_store = c->chain_store; + + if (flags & SSL_BUILD_CHAIN_FLAG_UNTRUSTED) + untrusted = cpk->chain; + } + + if (!X509_STORE_CTX_init(&xs_ctx, chain_store, cpk->x509, untrusted)) { + SSLerr(SSL_F_SSL_BUILD_CERT_CHAIN, ERR_R_X509_LIB); + goto err; + } + /* Set suite B flags if needed */ + X509_STORE_CTX_set_flags(&xs_ctx, + c->cert_flags & SSL_CERT_FLAG_SUITEB_128_LOS); + + i = X509_verify_cert(&xs_ctx); + if (i <= 0 && flags & SSL_BUILD_CHAIN_FLAG_IGNORE_ERROR) { + if (flags & SSL_BUILD_CHAIN_FLAG_CLEAR_ERROR) + ERR_clear_error(); + i = 1; + rv = 2; + } + if (i > 0) + chain = X509_STORE_CTX_get1_chain(&xs_ctx); + if (i <= 0) { + SSLerr(SSL_F_SSL_BUILD_CERT_CHAIN, SSL_R_CERTIFICATE_VERIFY_FAILED); + i = X509_STORE_CTX_get_error(&xs_ctx); + ERR_add_error_data(2, "Verify error:", + X509_verify_cert_error_string(i)); + + X509_STORE_CTX_cleanup(&xs_ctx); + goto err; + } + X509_STORE_CTX_cleanup(&xs_ctx); + if (cpk->chain) + sk_X509_pop_free(cpk->chain, X509_free); + /* Remove EE certificate from chain */ + x = sk_X509_shift(chain); + X509_free(x); + if (flags & SSL_BUILD_CHAIN_FLAG_NO_ROOT) { + if (sk_X509_num(chain) > 0) { + /* See if last cert is self signed */ + x = sk_X509_value(chain, sk_X509_num(chain) - 1); + X509_check_purpose(x, -1, 0); + if (x->ex_flags & EXFLAG_SS) { + x = sk_X509_pop(chain); + X509_free(x); + } + } + } + cpk->chain = chain; + if (rv == 0) + rv = 1; + err: + if (flags & SSL_BUILD_CHAIN_FLAG_CHECK) + X509_STORE_free(chain_store); + + return rv; +} + +int ssl_cert_set_cert_store(CERT *c, X509_STORE *store, int chain, int ref) +{ + X509_STORE **pstore; + if (chain) + pstore = &c->chain_store; + else + pstore = &c->verify_store; + if (*pstore) + X509_STORE_free(*pstore); + *pstore = store; + if (ref && store) + CRYPTO_add(&store->references, 1, CRYPTO_LOCK_X509_STORE); + return 1; +} diff --git a/deps/openssl/openssl/ssl/ssl_ciph.c b/deps/openssl/openssl/ssl/ssl_ciph.c index cac525e86009c7..2cc9a4a21f75a6 100644 --- a/deps/openssl/openssl/ssl/ssl_ciph.c +++ b/deps/openssl/openssl/ssl/ssl_ciph.c @@ -245,13 +245,11 @@ static const SSL_CIPHER cipher_aliases[] = { */ {0, SSL_TXT_kRSA, 0, SSL_kRSA, 0, 0, 0, 0, 0, 0, 0, 0}, - /* no such ciphersuites supported! */ {0, SSL_TXT_kDHr, 0, SSL_kDHr, 0, 0, 0, 0, 0, 0, 0, 0}, - /* no such ciphersuites supported! */ {0, SSL_TXT_kDHd, 0, SSL_kDHd, 0, 0, 0, 0, 0, 0, 0, 0}, - /* no such ciphersuites supported! */ {0, SSL_TXT_kDH, 0, SSL_kDHr | SSL_kDHd, 0, 0, 0, 0, 0, 0, 0, 0}, - {0, SSL_TXT_kEDH, 0, SSL_kEDH, 0, 0, 0, 0, 0, 0, 0, 0}, + {0, SSL_TXT_kEDH, 0, SSL_kEDH, 0, 0, 0, 0, 0, 0, 0, 0}, + {0, SSL_TXT_kDHE, 0, SSL_kEDH, 0, 0, 0, 0, 0, 0, 0, 0}, {0, SSL_TXT_DH, 0, SSL_kDHr | SSL_kDHd | SSL_kEDH, 0, 0, 0, 0, 0, 0, 0, 0}, @@ -261,6 +259,7 @@ static const SSL_CIPHER cipher_aliases[] = { {0, SSL_TXT_kECDHe, 0, SSL_kECDHe, 0, 0, 0, 0, 0, 0, 0, 0}, {0, SSL_TXT_kECDH, 0, SSL_kECDHr | SSL_kECDHe, 0, 0, 0, 0, 0, 0, 0, 0}, {0, SSL_TXT_kEECDH, 0, SSL_kEECDH, 0, 0, 0, 0, 0, 0, 0, 0}, + {0, SSL_TXT_kECDHE, 0, SSL_kEECDH, 0, 0, 0, 0, 0, 0, 0, 0}, {0, SSL_TXT_ECDH, 0, SSL_kECDHr | SSL_kECDHe | SSL_kEECDH, 0, 0, 0, 0, 0, 0, 0, 0}, @@ -287,7 +286,9 @@ static const SSL_CIPHER cipher_aliases[] = { /* aliases combining key exchange and server authentication */ {0, SSL_TXT_EDH, 0, SSL_kEDH, ~SSL_aNULL, 0, 0, 0, 0, 0, 0, 0}, + {0, SSL_TXT_DHE, 0, SSL_kEDH, ~SSL_aNULL, 0, 0, 0, 0, 0, 0, 0}, {0, SSL_TXT_EECDH, 0, SSL_kEECDH, ~SSL_aNULL, 0, 0, 0, 0, 0, 0, 0}, + {0, SSL_TXT_ECDHE, 0, SSL_kEECDH, ~SSL_aNULL, 0, 0, 0, 0, 0, 0, 0}, {0, SSL_TXT_NULL, 0, 0, 0, SSL_eNULL, 0, 0, 0, 0, 0, 0}, {0, SSL_TXT_KRB5, 0, SSL_kKRB5, SSL_aKRB5, 0, 0, 0, 0, 0, 0, 0}, {0, SSL_TXT_RSA, 0, SSL_kRSA, SSL_aRSA, 0, 0, 0, 0, 0, 0, 0}, @@ -343,6 +344,25 @@ static const SSL_CIPHER cipher_aliases[] = { {0, SSL_TXT_HIGH, 0, 0, 0, 0, 0, 0, SSL_HIGH, 0, 0, 0}, /* FIPS 140-2 approved ciphersuite */ {0, SSL_TXT_FIPS, 0, 0, 0, ~SSL_eNULL, 0, 0, SSL_FIPS, 0, 0, 0}, + /* "DHE-" aliases to "EDH-" labels (for forward compatibility) */ + {0, SSL3_TXT_DHE_DSS_DES_40_CBC_SHA, 0, + SSL_kDHE, SSL_aDSS, SSL_DES, SSL_SHA1, SSL_SSLV3, SSL_EXPORT | SSL_EXP40, + 0, 0, 0,}, + {0, SSL3_TXT_DHE_DSS_DES_64_CBC_SHA, 0, + SSL_kDHE, SSL_aDSS, SSL_DES, SSL_SHA1, SSL_SSLV3, SSL_NOT_EXP | SSL_LOW, + 0, 0, 0,}, + {0, SSL3_TXT_DHE_DSS_DES_192_CBC3_SHA, 0, + SSL_kDHE, SSL_aDSS, SSL_3DES, SSL_SHA1, SSL_SSLV3, + SSL_NOT_EXP | SSL_HIGH | SSL_FIPS, 0, 0, 0,}, + {0, SSL3_TXT_DHE_RSA_DES_40_CBC_SHA, 0, + SSL_kDHE, SSL_aRSA, SSL_DES, SSL_SHA1, SSL_SSLV3, SSL_EXPORT | SSL_EXP40, + 0, 0, 0,}, + {0, SSL3_TXT_DHE_RSA_DES_64_CBC_SHA, 0, + SSL_kDHE, SSL_aRSA, SSL_DES, SSL_SHA1, SSL_SSLV3, SSL_NOT_EXP | SSL_LOW, + 0, 0, 0,}, + {0, SSL3_TXT_DHE_RSA_DES_192_CBC3_SHA, 0, + SSL_kDHE, SSL_aRSA, SSL_3DES, SSL_SHA1, SSL_SSLV3, + SSL_NOT_EXP | SSL_HIGH | SSL_FIPS, 0, 0, 0,}, }; /* @@ -638,6 +658,14 @@ int ssl_cipher_get_evp(const SSL_SESSION *s, const EVP_CIPHER **enc, c->algorithm_mac == SSL_SHA1 && (evp = EVP_get_cipherbyname("AES-256-CBC-HMAC-SHA1"))) *enc = evp, *md = NULL; + else if (c->algorithm_enc == SSL_AES128 && + c->algorithm_mac == SSL_SHA256 && + (evp = EVP_get_cipherbyname("AES-128-CBC-HMAC-SHA256"))) + *enc = evp, *md = NULL; + else if (c->algorithm_enc == SSL_AES256 && + c->algorithm_mac == SSL_SHA256 && + (evp = EVP_get_cipherbyname("AES-256-CBC-HMAC-SHA256"))) + *enc = evp, *md = NULL; return (1); } else return (0); @@ -710,8 +738,6 @@ static void ssl_cipher_get_disabled(unsigned long *mkey, unsigned long *auth, #ifdef OPENSSL_NO_DSA *auth |= SSL_aDSS; #endif - *mkey |= SSL_kDHr | SSL_kDHd; /* no such ciphersuites supported! */ - *auth |= SSL_aDH; #ifdef OPENSSL_NO_DH *mkey |= SSL_kDHr | SSL_kDHd | SSL_kEDH; *auth |= SSL_aDH; @@ -996,6 +1022,10 @@ static void ssl_cipher_apply_rule(unsigned long cipher_id, cp->name, cp->algorithm_mkey, cp->algorithm_auth, cp->algorithm_enc, cp->algorithm_mac, cp->algorithm_ssl, cp->algo_strength); +#endif +#ifdef OPENSSL_SSL_DEBUG_BROKEN_PROTOCOL + if (cipher_id && cipher_id != cp->id) + continue; #endif if (algo_strength == SSL_EXP_MASK && SSL_C_IS_EXPORT(cp)) goto ok; @@ -1369,10 +1399,71 @@ static int ssl_cipher_process_rulestr(const char *rule_str, return (retval); } +#ifndef OPENSSL_NO_EC +static int check_suiteb_cipher_list(const SSL_METHOD *meth, CERT *c, + const char **prule_str) +{ + unsigned int suiteb_flags = 0, suiteb_comb2 = 0; + if (!strcmp(*prule_str, "SUITEB128")) + suiteb_flags = SSL_CERT_FLAG_SUITEB_128_LOS; + else if (!strcmp(*prule_str, "SUITEB128ONLY")) + suiteb_flags = SSL_CERT_FLAG_SUITEB_128_LOS_ONLY; + else if (!strcmp(*prule_str, "SUITEB128C2")) { + suiteb_comb2 = 1; + suiteb_flags = SSL_CERT_FLAG_SUITEB_128_LOS; + } else if (!strcmp(*prule_str, "SUITEB192")) + suiteb_flags = SSL_CERT_FLAG_SUITEB_192_LOS; + + if (suiteb_flags) { + c->cert_flags &= ~SSL_CERT_FLAG_SUITEB_128_LOS; + c->cert_flags |= suiteb_flags; + } else + suiteb_flags = c->cert_flags & SSL_CERT_FLAG_SUITEB_128_LOS; + + if (!suiteb_flags) + return 1; + /* Check version: if TLS 1.2 ciphers allowed we can use Suite B */ + + if (!(meth->ssl3_enc->enc_flags & SSL_ENC_FLAG_TLS1_2_CIPHERS)) { + if (meth->ssl3_enc->enc_flags & SSL_ENC_FLAG_DTLS) + SSLerr(SSL_F_CHECK_SUITEB_CIPHER_LIST, + SSL_R_ONLY_DTLS_1_2_ALLOWED_IN_SUITEB_MODE); + else + SSLerr(SSL_F_CHECK_SUITEB_CIPHER_LIST, + SSL_R_ONLY_TLS_1_2_ALLOWED_IN_SUITEB_MODE); + return 0; + } +# ifndef OPENSSL_NO_ECDH + switch (suiteb_flags) { + case SSL_CERT_FLAG_SUITEB_128_LOS: + if (suiteb_comb2) + *prule_str = "ECDHE-ECDSA-AES256-GCM-SHA384"; + else + *prule_str = + "ECDHE-ECDSA-AES128-GCM-SHA256:ECDHE-ECDSA-AES256-GCM-SHA384"; + break; + case SSL_CERT_FLAG_SUITEB_128_LOS_ONLY: + *prule_str = "ECDHE-ECDSA-AES128-GCM-SHA256"; + break; + case SSL_CERT_FLAG_SUITEB_192_LOS: + *prule_str = "ECDHE-ECDSA-AES256-GCM-SHA384"; + break; + } + /* Set auto ECDH parameter determination */ + c->ecdh_tmp_auto = 1; + return 1; +# else + SSLerr(SSL_F_CHECK_SUITEB_CIPHER_LIST, + SSL_R_ECDH_REQUIRED_FOR_SUITEB_MODE); + return 0; +# endif +} +#endif + STACK_OF(SSL_CIPHER) *ssl_create_cipher_list(const SSL_METHOD *ssl_method, STACK_OF(SSL_CIPHER) **cipher_list, STACK_OF(SSL_CIPHER) **cipher_list_by_id, - const char *rule_str) + const char *rule_str, CERT *c) { int ok, num_of_ciphers, num_of_alias_max, num_of_group_aliases; unsigned long disabled_mkey, disabled_auth, disabled_enc, disabled_mac, @@ -1387,6 +1478,10 @@ STACK_OF(SSL_CIPHER) *ssl_create_cipher_list(const SSL_METHOD *ssl_method, STACK */ if (rule_str == NULL || cipher_list == NULL || cipher_list_by_id == NULL) return NULL; +#ifndef OPENSSL_NO_EC + if (!check_suiteb_cipher_list(ssl_method, c, &rule_str)) + return NULL; +#endif /* * To reduce the work to do we only want to process the compiled @@ -1854,6 +1949,26 @@ STACK_OF(SSL_COMP) *SSL_COMP_get_compression_methods(void) return (ssl_comp_methods); } +STACK_OF(SSL_COMP) *SSL_COMP_set0_compression_methods(STACK_OF(SSL_COMP) + *meths) +{ + STACK_OF(SSL_COMP) *old_meths = ssl_comp_methods; + ssl_comp_methods = meths; + return old_meths; +} + +static void cmeth_free(SSL_COMP *cm) +{ + OPENSSL_free(cm); +} + +void SSL_COMP_free_compression_methods(void) +{ + STACK_OF(SSL_COMP) *old_meths = ssl_comp_methods; + ssl_comp_methods = NULL; + sk_SSL_COMP_pop_free(old_meths, cmeth_free); +} + int SSL_COMP_add_compression_method(int id, COMP_METHOD *cm) { SSL_COMP *comp; @@ -1904,5 +2019,55 @@ const char *SSL_COMP_get_name(const COMP_METHOD *comp) return comp->name; return NULL; } - #endif +/* For a cipher return the index corresponding to the certificate type */ +int ssl_cipher_get_cert_index(const SSL_CIPHER *c) +{ + unsigned long alg_k, alg_a; + + alg_k = c->algorithm_mkey; + alg_a = c->algorithm_auth; + + if (alg_k & (SSL_kECDHr | SSL_kECDHe)) { + /* + * we don't need to look at SSL_kEECDH since no certificate is needed + * for anon ECDH and for authenticated EECDH, the check for the auth + * algorithm will set i correctly NOTE: For ECDH-RSA, we need an ECC + * not an RSA cert but for EECDH-RSA we need an RSA cert. Placing the + * checks for SSL_kECDH before RSA checks ensures the correct cert is + * chosen. + */ + return SSL_PKEY_ECC; + } else if (alg_a & SSL_aECDSA) + return SSL_PKEY_ECC; + else if (alg_k & SSL_kDHr) + return SSL_PKEY_DH_RSA; + else if (alg_k & SSL_kDHd) + return SSL_PKEY_DH_DSA; + else if (alg_a & SSL_aDSS) + return SSL_PKEY_DSA_SIGN; + else if (alg_a & SSL_aRSA) + return SSL_PKEY_RSA_ENC; + else if (alg_a & SSL_aKRB5) + /* VRS something else here? */ + return -1; + else if (alg_a & SSL_aGOST94) + return SSL_PKEY_GOST94; + else if (alg_a & SSL_aGOST01) + return SSL_PKEY_GOST01; + return -1; +} + +const SSL_CIPHER *ssl_get_cipher_by_char(SSL *ssl, const unsigned char *ptr) +{ + const SSL_CIPHER *c; + c = ssl->method->get_cipher_by_char(ptr); + if (c == NULL || c->valid == 0) + return NULL; + return c; +} + +const SSL_CIPHER *SSL_CIPHER_find(SSL *ssl, const unsigned char *ptr) +{ + return ssl->method->get_cipher_by_char(ptr); +} diff --git a/deps/openssl/openssl/ssl/ssl_conf.c b/deps/openssl/openssl/ssl/ssl_conf.c new file mode 100644 index 00000000000000..5478840deae994 --- /dev/null +++ b/deps/openssl/openssl/ssl/ssl_conf.c @@ -0,0 +1,683 @@ +/* + * ! \file ssl/ssl_conf.c \brief SSL configuration functions + */ +/* ==================================================================== + * Copyright (c) 2012 The OpenSSL Project. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgment: + * "This product includes software developed by the OpenSSL Project + * for use in the OpenSSL Toolkit. (http://www.openssl.org/)" + * + * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to + * endorse or promote products derived from this software without + * prior written permission. For written permission, please contact + * openssl-core@openssl.org. + * + * 5. Products derived from this software may not be called "OpenSSL" + * nor may "OpenSSL" appear in their names without prior written + * permission of the OpenSSL Project. + * + * 6. Redistributions of any form whatsoever must retain the following + * acknowledgment: + * "This product includes software developed by the OpenSSL Project + * for use in the OpenSSL Toolkit (http://www.openssl.org/)" + * + * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY + * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + * ==================================================================== + * + * This product includes cryptographic software written by Eric Young + * (eay@cryptsoft.com). This product includes software written by Tim + * Hudson (tjh@cryptsoft.com). + * + */ + +#ifdef REF_CHECK +# include +#endif +#include +#include "ssl_locl.h" +#include +#include +#ifndef OPENSSL_NO_DH +# include +#endif + +/* + * structure holding name tables. This is used for pemitted elements in lists + * such as TLSv1 and single command line switches such as no_tls1 + */ + +typedef struct { + const char *name; + int namelen; + unsigned int name_flags; + unsigned long option_value; +} ssl_flag_tbl; + +/* Sense of name is inverted e.g. "TLSv1" will clear SSL_OP_NO_TLSv1 */ +#define SSL_TFLAG_INV 0x1 +/* Flags refers to cert_flags not options */ +#define SSL_TFLAG_CERT 0x2 +/* Option can only be used for clients */ +#define SSL_TFLAG_CLIENT SSL_CONF_FLAG_CLIENT +/* Option can only be used for servers */ +#define SSL_TFLAG_SERVER SSL_CONF_FLAG_SERVER +#define SSL_TFLAG_BOTH (SSL_TFLAG_CLIENT|SSL_TFLAG_SERVER) + +#define SSL_FLAG_TBL(str, flag) \ + {str, (int)(sizeof(str) - 1), SSL_TFLAG_BOTH, flag} +#define SSL_FLAG_TBL_SRV(str, flag) \ + {str, (int)(sizeof(str) - 1), SSL_TFLAG_SERVER, flag} +#define SSL_FLAG_TBL_CLI(str, flag) \ + {str, (int)(sizeof(str) - 1), SSL_TFLAG_CLIENT, flag} +#define SSL_FLAG_TBL_INV(str, flag) \ + {str, (int)(sizeof(str) - 1), SSL_TFLAG_INV|SSL_TFLAG_BOTH, flag} +#define SSL_FLAG_TBL_SRV_INV(str, flag) \ + {str, (int)(sizeof(str) - 1), SSL_TFLAG_INV|SSL_TFLAG_SERVER, flag} +#define SSL_FLAG_TBL_CERT(str, flag) \ + {str, (int)(sizeof(str) - 1), SSL_TFLAG_CERT|SSL_TFLAG_BOTH, flag} + +/* + * Opaque structure containing SSL configuration context. + */ + +struct ssl_conf_ctx_st { + /* + * Various flags indicating (among other things) which options we will + * recognise. + */ + unsigned int flags; + /* Prefix and length of commands */ + char *prefix; + size_t prefixlen; + /* SSL_CTX or SSL structure to perform operations on */ + SSL_CTX *ctx; + SSL *ssl; + /* Pointer to SSL or SSL_CTX options field or NULL if none */ + unsigned long *poptions; + /* Pointer to SSL or SSL_CTX cert_flags or NULL if none */ + unsigned int *pcert_flags; + /* Current flag table being worked on */ + const ssl_flag_tbl *tbl; + /* Size of table */ + size_t ntbl; +}; + +static int ssl_match_option(SSL_CONF_CTX *cctx, const ssl_flag_tbl *tbl, + const char *name, int namelen, int onoff) +{ + /* If name not relevant for context skip */ + if (!(cctx->flags & tbl->name_flags & SSL_TFLAG_BOTH)) + return 0; + if (namelen == -1) { + if (strcmp(tbl->name, name)) + return 0; + } else if (tbl->namelen != namelen + || strncasecmp(tbl->name, name, namelen)) + return 0; + if (cctx->poptions) { + if (tbl->name_flags & SSL_TFLAG_INV) + onoff ^= 1; + if (tbl->name_flags & SSL_TFLAG_CERT) { + if (onoff) + *cctx->pcert_flags |= tbl->option_value; + else + *cctx->pcert_flags &= ~tbl->option_value; + } else { + if (onoff) + *cctx->poptions |= tbl->option_value; + else + *cctx->poptions &= ~tbl->option_value; + } + } + return 1; +} + +static int ssl_set_option_list(const char *elem, int len, void *usr) +{ + SSL_CONF_CTX *cctx = usr; + size_t i; + const ssl_flag_tbl *tbl; + int onoff = 1; + /* + * len == -1 indicates not being called in list context, just for single + * command line switches, so don't allow +, -. + */ + if (elem == NULL) + return 0; + if (len != -1) { + if (*elem == '+') { + elem++; + len--; + onoff = 1; + } else if (*elem == '-') { + elem++; + len--; + onoff = 0; + } + } + for (i = 0, tbl = cctx->tbl; i < cctx->ntbl; i++, tbl++) { + if (ssl_match_option(cctx, tbl, elem, len, onoff)) + return 1; + } + return 0; +} + +/* Single command line switches with no argument e.g. -no_ssl3 */ +static int ctrl_str_option(SSL_CONF_CTX *cctx, const char *cmd) +{ + static const ssl_flag_tbl ssl_option_single[] = { + SSL_FLAG_TBL("no_ssl2", SSL_OP_NO_SSLv2), + SSL_FLAG_TBL("no_ssl3", SSL_OP_NO_SSLv3), + SSL_FLAG_TBL("no_tls1", SSL_OP_NO_TLSv1), + SSL_FLAG_TBL("no_tls1_1", SSL_OP_NO_TLSv1_1), + SSL_FLAG_TBL("no_tls1_2", SSL_OP_NO_TLSv1_2), + SSL_FLAG_TBL("bugs", SSL_OP_ALL), + SSL_FLAG_TBL("no_comp", SSL_OP_NO_COMPRESSION), + SSL_FLAG_TBL_SRV("ecdh_single", SSL_OP_SINGLE_ECDH_USE), +#ifndef OPENSSL_NO_TLSEXT + SSL_FLAG_TBL("no_ticket", SSL_OP_NO_TICKET), +#endif + SSL_FLAG_TBL_SRV("serverpref", SSL_OP_CIPHER_SERVER_PREFERENCE), + SSL_FLAG_TBL("legacy_renegotiation", + SSL_OP_ALLOW_UNSAFE_LEGACY_RENEGOTIATION), + SSL_FLAG_TBL_SRV("legacy_server_connect", + SSL_OP_LEGACY_SERVER_CONNECT), + SSL_FLAG_TBL_SRV("no_resumption_on_reneg", + SSL_OP_NO_SESSION_RESUMPTION_ON_RENEGOTIATION), + SSL_FLAG_TBL_SRV_INV("no_legacy_server_connect", + SSL_OP_LEGACY_SERVER_CONNECT), + SSL_FLAG_TBL_CERT("strict", SSL_CERT_FLAG_TLS_STRICT), +#ifdef OPENSSL_SSL_DEBUG_BROKEN_PROTOCOL + SSL_FLAG_TBL_CERT("debug_broken_protocol", + SSL_CERT_FLAG_BROKEN_PROTOCOL), +#endif + }; + cctx->tbl = ssl_option_single; + cctx->ntbl = sizeof(ssl_option_single) / sizeof(ssl_flag_tbl); + return ssl_set_option_list(cmd, -1, cctx); +} + +/* Set supported signature algorithms */ +static int cmd_SignatureAlgorithms(SSL_CONF_CTX *cctx, const char *value) +{ + int rv; + if (cctx->ssl) + rv = SSL_set1_sigalgs_list(cctx->ssl, value); + /* NB: ctx == NULL performs syntax checking only */ + else + rv = SSL_CTX_set1_sigalgs_list(cctx->ctx, value); + return rv > 0; +} + +/* Set supported client signature algorithms */ +static int cmd_ClientSignatureAlgorithms(SSL_CONF_CTX *cctx, + const char *value) +{ + int rv; + if (cctx->ssl) + rv = SSL_set1_client_sigalgs_list(cctx->ssl, value); + /* NB: ctx == NULL performs syntax checking only */ + else + rv = SSL_CTX_set1_client_sigalgs_list(cctx->ctx, value); + return rv > 0; +} + +static int cmd_Curves(SSL_CONF_CTX *cctx, const char *value) +{ + int rv; + if (cctx->ssl) + rv = SSL_set1_curves_list(cctx->ssl, value); + /* NB: ctx == NULL performs syntax checking only */ + else + rv = SSL_CTX_set1_curves_list(cctx->ctx, value); + return rv > 0; +} + +#ifndef OPENSSL_NO_ECDH +/* ECDH temporary parameters */ +static int cmd_ECDHParameters(SSL_CONF_CTX *cctx, const char *value) +{ + int onoff = -1, rv = 1; + if (!(cctx->flags & SSL_CONF_FLAG_SERVER)) + return -2; + if (cctx->flags & SSL_CONF_FLAG_FILE) { + if (*value == '+') { + onoff = 1; + value++; + } + if (*value == '-') { + onoff = 0; + value++; + } + if (!strcasecmp(value, "automatic")) { + if (onoff == -1) + onoff = 1; + } else if (onoff != -1) + return 0; + } else if (cctx->flags & SSL_CONF_FLAG_CMDLINE) { + if (!strcmp(value, "auto")) + onoff = 1; + } + + if (onoff != -1) { + if (cctx->ctx) + rv = SSL_CTX_set_ecdh_auto(cctx->ctx, onoff); + else if (cctx->ssl) + rv = SSL_set_ecdh_auto(cctx->ssl, onoff); + } else { + EC_KEY *ecdh; + int nid; + nid = EC_curve_nist2nid(value); + if (nid == NID_undef) + nid = OBJ_sn2nid(value); + if (nid == 0) + return 0; + ecdh = EC_KEY_new_by_curve_name(nid); + if (!ecdh) + return 0; + if (cctx->ctx) + rv = SSL_CTX_set_tmp_ecdh(cctx->ctx, ecdh); + else if (cctx->ssl) + rv = SSL_set_tmp_ecdh(cctx->ssl, ecdh); + EC_KEY_free(ecdh); + } + + return rv > 0; +} +#endif +static int cmd_CipherString(SSL_CONF_CTX *cctx, const char *value) +{ + int rv = 1; + if (cctx->ctx) + rv = SSL_CTX_set_cipher_list(cctx->ctx, value); + if (cctx->ssl) + rv = SSL_set_cipher_list(cctx->ssl, value); + return rv > 0; +} + +static int cmd_Protocol(SSL_CONF_CTX *cctx, const char *value) +{ + static const ssl_flag_tbl ssl_protocol_list[] = { + SSL_FLAG_TBL_INV("ALL", SSL_OP_NO_SSL_MASK), + SSL_FLAG_TBL_INV("SSLv2", SSL_OP_NO_SSLv2), + SSL_FLAG_TBL_INV("SSLv3", SSL_OP_NO_SSLv3), + SSL_FLAG_TBL_INV("TLSv1", SSL_OP_NO_TLSv1), + SSL_FLAG_TBL_INV("TLSv1.1", SSL_OP_NO_TLSv1_1), + SSL_FLAG_TBL_INV("TLSv1.2", SSL_OP_NO_TLSv1_2) + }; + if (!(cctx->flags & SSL_CONF_FLAG_FILE)) + return -2; + cctx->tbl = ssl_protocol_list; + cctx->ntbl = sizeof(ssl_protocol_list) / sizeof(ssl_flag_tbl); + return CONF_parse_list(value, ',', 1, ssl_set_option_list, cctx); +} + +static int cmd_Options(SSL_CONF_CTX *cctx, const char *value) +{ + static const ssl_flag_tbl ssl_option_list[] = { + SSL_FLAG_TBL_INV("SessionTicket", SSL_OP_NO_TICKET), + SSL_FLAG_TBL_INV("EmptyFragments", + SSL_OP_DONT_INSERT_EMPTY_FRAGMENTS), + SSL_FLAG_TBL("Bugs", SSL_OP_ALL), + SSL_FLAG_TBL_INV("Compression", SSL_OP_NO_COMPRESSION), + SSL_FLAG_TBL_SRV("ServerPreference", SSL_OP_CIPHER_SERVER_PREFERENCE), + SSL_FLAG_TBL_SRV("NoResumptionOnRenegotiation", + SSL_OP_NO_SESSION_RESUMPTION_ON_RENEGOTIATION), + SSL_FLAG_TBL_SRV("DHSingle", SSL_OP_SINGLE_DH_USE), + SSL_FLAG_TBL_SRV("ECDHSingle", SSL_OP_SINGLE_ECDH_USE), + SSL_FLAG_TBL("UnsafeLegacyRenegotiation", + SSL_OP_ALLOW_UNSAFE_LEGACY_RENEGOTIATION), + }; + if (!(cctx->flags & SSL_CONF_FLAG_FILE)) + return -2; + if (value == NULL) + return -3; + cctx->tbl = ssl_option_list; + cctx->ntbl = sizeof(ssl_option_list) / sizeof(ssl_flag_tbl); + return CONF_parse_list(value, ',', 1, ssl_set_option_list, cctx); +} + +static int cmd_Certificate(SSL_CONF_CTX *cctx, const char *value) +{ + int rv = 1; + if (!(cctx->flags & SSL_CONF_FLAG_CERTIFICATE)) + return -2; + if (cctx->ctx) + rv = SSL_CTX_use_certificate_chain_file(cctx->ctx, value); + if (cctx->ssl) + rv = SSL_use_certificate_file(cctx->ssl, value, SSL_FILETYPE_PEM); + return rv > 0; +} + +static int cmd_PrivateKey(SSL_CONF_CTX *cctx, const char *value) +{ + int rv = 1; + if (!(cctx->flags & SSL_CONF_FLAG_CERTIFICATE)) + return -2; + if (cctx->ctx) + rv = SSL_CTX_use_PrivateKey_file(cctx->ctx, value, SSL_FILETYPE_PEM); + if (cctx->ssl) + rv = SSL_use_PrivateKey_file(cctx->ssl, value, SSL_FILETYPE_PEM); + return rv > 0; +} + +static int cmd_ServerInfoFile(SSL_CONF_CTX *cctx, const char *value) +{ + int rv = 1; + if (!(cctx->flags & SSL_CONF_FLAG_CERTIFICATE)) + return -2; + if (!(cctx->flags & SSL_CONF_FLAG_SERVER)) + return -2; + if (cctx->ctx) + rv = SSL_CTX_use_serverinfo_file(cctx->ctx, value); + return rv > 0; +} + +#ifndef OPENSSL_NO_DH +static int cmd_DHParameters(SSL_CONF_CTX *cctx, const char *value) +{ + int rv = 0; + DH *dh = NULL; + BIO *in = NULL; + if (!(cctx->flags & SSL_CONF_FLAG_CERTIFICATE)) + return -2; + if (cctx->ctx || cctx->ssl) { + in = BIO_new(BIO_s_file_internal()); + if (!in) + goto end; + if (BIO_read_filename(in, value) <= 0) + goto end; + dh = PEM_read_bio_DHparams(in, NULL, NULL, NULL); + if (!dh) + goto end; + } else + return 1; + if (cctx->ctx) + rv = SSL_CTX_set_tmp_dh(cctx->ctx, dh); + if (cctx->ssl) + rv = SSL_set_tmp_dh(cctx->ssl, dh); + end: + if (dh) + DH_free(dh); + if (in) + BIO_free(in); + return rv > 0; +} +#endif +typedef struct { + int (*cmd) (SSL_CONF_CTX *cctx, const char *value); + const char *str_file; + const char *str_cmdline; + unsigned int value_type; +} ssl_conf_cmd_tbl; + +/* Table of supported parameters */ + +#define SSL_CONF_CMD(name, cmdopt, type) \ + {cmd_##name, #name, cmdopt, type} + +#define SSL_CONF_CMD_STRING(name, cmdopt) \ + SSL_CONF_CMD(name, cmdopt, SSL_CONF_TYPE_STRING) + +static const ssl_conf_cmd_tbl ssl_conf_cmds[] = { + SSL_CONF_CMD_STRING(SignatureAlgorithms, "sigalgs"), + SSL_CONF_CMD_STRING(ClientSignatureAlgorithms, "client_sigalgs"), + SSL_CONF_CMD_STRING(Curves, "curves"), +#ifndef OPENSSL_NO_ECDH + SSL_CONF_CMD_STRING(ECDHParameters, "named_curve"), +#endif + SSL_CONF_CMD_STRING(CipherString, "cipher"), + SSL_CONF_CMD_STRING(Protocol, NULL), + SSL_CONF_CMD_STRING(Options, NULL), + SSL_CONF_CMD(Certificate, "cert", SSL_CONF_TYPE_FILE), + SSL_CONF_CMD(PrivateKey, "key", SSL_CONF_TYPE_FILE), + SSL_CONF_CMD(ServerInfoFile, NULL, SSL_CONF_TYPE_FILE), +#ifndef OPENSSL_NO_DH + SSL_CONF_CMD(DHParameters, "dhparam", SSL_CONF_TYPE_FILE) +#endif +}; + +static int ssl_conf_cmd_skip_prefix(SSL_CONF_CTX *cctx, const char **pcmd) +{ + if (!pcmd || !*pcmd) + return 0; + /* If a prefix is set, check and skip */ + if (cctx->prefix) { + if (strlen(*pcmd) <= cctx->prefixlen) + return 0; + if (cctx->flags & SSL_CONF_FLAG_CMDLINE && + strncmp(*pcmd, cctx->prefix, cctx->prefixlen)) + return 0; + if (cctx->flags & SSL_CONF_FLAG_FILE && + strncasecmp(*pcmd, cctx->prefix, cctx->prefixlen)) + return 0; + *pcmd += cctx->prefixlen; + } else if (cctx->flags & SSL_CONF_FLAG_CMDLINE) { + if (**pcmd != '-' || !(*pcmd)[1]) + return 0; + *pcmd += 1; + } + return 1; +} + +static const ssl_conf_cmd_tbl *ssl_conf_cmd_lookup(SSL_CONF_CTX *cctx, + const char *cmd) +{ + const ssl_conf_cmd_tbl *t; + size_t i; + if (cmd == NULL) + return NULL; + + /* Look for matching parameter name in table */ + for (i = 0, t = ssl_conf_cmds; + i < sizeof(ssl_conf_cmds) / sizeof(ssl_conf_cmd_tbl); i++, t++) { + if (cctx->flags & SSL_CONF_FLAG_CMDLINE) { + if (t->str_cmdline && !strcmp(t->str_cmdline, cmd)) + return t; + } + if (cctx->flags & SSL_CONF_FLAG_FILE) { + if (t->str_file && !strcasecmp(t->str_file, cmd)) + return t; + } + } + return NULL; +} + +int SSL_CONF_cmd(SSL_CONF_CTX *cctx, const char *cmd, const char *value) +{ + const ssl_conf_cmd_tbl *runcmd; + if (cmd == NULL) { + SSLerr(SSL_F_SSL_CONF_CMD, SSL_R_INVALID_NULL_CMD_NAME); + return 0; + } + + if (!ssl_conf_cmd_skip_prefix(cctx, &cmd)) + return -2; + + runcmd = ssl_conf_cmd_lookup(cctx, cmd); + + if (runcmd) { + int rv; + if (value == NULL) + return -3; + rv = runcmd->cmd(cctx, value); + if (rv > 0) + return 2; + if (rv == -2) + return -2; + if (cctx->flags & SSL_CONF_FLAG_SHOW_ERRORS) { + SSLerr(SSL_F_SSL_CONF_CMD, SSL_R_BAD_VALUE); + ERR_add_error_data(4, "cmd=", cmd, ", value=", value); + } + return 0; + } + + if (cctx->flags & SSL_CONF_FLAG_CMDLINE) { + if (ctrl_str_option(cctx, cmd)) + return 1; + } + + if (cctx->flags & SSL_CONF_FLAG_SHOW_ERRORS) { + SSLerr(SSL_F_SSL_CONF_CMD, SSL_R_UNKNOWN_CMD_NAME); + ERR_add_error_data(2, "cmd=", cmd); + } + + return -2; +} + +int SSL_CONF_cmd_argv(SSL_CONF_CTX *cctx, int *pargc, char ***pargv) +{ + int rv; + const char *arg = NULL, *argn; + if (pargc && *pargc == 0) + return 0; + if (!pargc || *pargc > 0) + arg = **pargv; + if (arg == NULL) + return 0; + if (!pargc || *pargc > 1) + argn = (*pargv)[1]; + else + argn = NULL; + cctx->flags &= ~SSL_CONF_FLAG_FILE; + cctx->flags |= SSL_CONF_FLAG_CMDLINE; + rv = SSL_CONF_cmd(cctx, arg, argn); + if (rv > 0) { + /* Success: update pargc, pargv */ + (*pargv) += rv; + if (pargc) + (*pargc) -= rv; + return rv; + } + /* Unknown switch: indicate no arguments processed */ + if (rv == -2) + return 0; + /* Some error occurred processing command, return fatal error */ + if (rv == 0) + return -1; + return rv; +} + +int SSL_CONF_cmd_value_type(SSL_CONF_CTX *cctx, const char *cmd) +{ + if (ssl_conf_cmd_skip_prefix(cctx, &cmd)) { + const ssl_conf_cmd_tbl *runcmd; + runcmd = ssl_conf_cmd_lookup(cctx, cmd); + if (runcmd) + return runcmd->value_type; + } + return SSL_CONF_TYPE_UNKNOWN; +} + +SSL_CONF_CTX *SSL_CONF_CTX_new(void) +{ + SSL_CONF_CTX *ret; + ret = OPENSSL_malloc(sizeof(SSL_CONF_CTX)); + if (ret) { + ret->flags = 0; + ret->prefix = NULL; + ret->prefixlen = 0; + ret->ssl = NULL; + ret->ctx = NULL; + ret->poptions = NULL; + ret->pcert_flags = NULL; + ret->tbl = NULL; + ret->ntbl = 0; + } + return ret; +} + +int SSL_CONF_CTX_finish(SSL_CONF_CTX *cctx) +{ + return 1; +} + +void SSL_CONF_CTX_free(SSL_CONF_CTX *cctx) +{ + if (cctx) { + if (cctx->prefix) + OPENSSL_free(cctx->prefix); + OPENSSL_free(cctx); + } +} + +unsigned int SSL_CONF_CTX_set_flags(SSL_CONF_CTX *cctx, unsigned int flags) +{ + cctx->flags |= flags; + return cctx->flags; +} + +unsigned int SSL_CONF_CTX_clear_flags(SSL_CONF_CTX *cctx, unsigned int flags) +{ + cctx->flags &= ~flags; + return cctx->flags; +} + +int SSL_CONF_CTX_set1_prefix(SSL_CONF_CTX *cctx, const char *pre) +{ + char *tmp = NULL; + if (pre) { + tmp = BUF_strdup(pre); + if (tmp == NULL) + return 0; + } + if (cctx->prefix) + OPENSSL_free(cctx->prefix); + cctx->prefix = tmp; + if (tmp) + cctx->prefixlen = strlen(tmp); + else + cctx->prefixlen = 0; + return 1; +} + +void SSL_CONF_CTX_set_ssl(SSL_CONF_CTX *cctx, SSL *ssl) +{ + cctx->ssl = ssl; + cctx->ctx = NULL; + if (ssl) { + cctx->poptions = &ssl->options; + cctx->pcert_flags = &ssl->cert->cert_flags; + } else { + cctx->poptions = NULL; + cctx->pcert_flags = NULL; + } +} + +void SSL_CONF_CTX_set_ssl_ctx(SSL_CONF_CTX *cctx, SSL_CTX *ctx) +{ + cctx->ctx = ctx; + cctx->ssl = NULL; + if (ctx) { + cctx->poptions = &ctx->options; + cctx->pcert_flags = &ctx->cert->cert_flags; + } else { + cctx->poptions = NULL; + cctx->pcert_flags = NULL; + } +} diff --git a/deps/openssl/openssl/ssl/ssl_err.c b/deps/openssl/openssl/ssl/ssl_err.c index ac7312e31ba9e4..ab3aa233748d84 100644 --- a/deps/openssl/openssl/ssl/ssl_err.c +++ b/deps/openssl/openssl/ssl/ssl_err.c @@ -1,6 +1,6 @@ /* ssl/ssl_err.c */ /* ==================================================================== - * Copyright (c) 1999-2011 The OpenSSL Project. All rights reserved. + * Copyright (c) 1999-2014 The OpenSSL Project. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -70,50 +70,50 @@ # define ERR_REASON(reason) ERR_PACK(ERR_LIB_SSL,0,reason) static ERR_STRING_DATA SSL_str_functs[] = { + {ERR_FUNC(SSL_F_CHECK_SUITEB_CIPHER_LIST), "CHECK_SUITEB_CIPHER_LIST"}, {ERR_FUNC(SSL_F_CLIENT_CERTIFICATE), "CLIENT_CERTIFICATE"}, {ERR_FUNC(SSL_F_CLIENT_FINISHED), "CLIENT_FINISHED"}, {ERR_FUNC(SSL_F_CLIENT_HELLO), "CLIENT_HELLO"}, {ERR_FUNC(SSL_F_CLIENT_MASTER_KEY), "CLIENT_MASTER_KEY"}, {ERR_FUNC(SSL_F_D2I_SSL_SESSION), "d2i_SSL_SESSION"}, - {ERR_FUNC(SSL_F_DO_DTLS1_WRITE), "DO_DTLS1_WRITE"}, + {ERR_FUNC(SSL_F_DO_DTLS1_WRITE), "do_dtls1_write"}, {ERR_FUNC(SSL_F_DO_SSL3_WRITE), "DO_SSL3_WRITE"}, - {ERR_FUNC(SSL_F_DTLS1_ACCEPT), "DTLS1_ACCEPT"}, + {ERR_FUNC(SSL_F_DTLS1_ACCEPT), "dtls1_accept"}, {ERR_FUNC(SSL_F_DTLS1_ADD_CERT_TO_BUF), "DTLS1_ADD_CERT_TO_BUF"}, {ERR_FUNC(SSL_F_DTLS1_BUFFER_RECORD), "DTLS1_BUFFER_RECORD"}, - {ERR_FUNC(SSL_F_DTLS1_CHECK_TIMEOUT_NUM), "DTLS1_CHECK_TIMEOUT_NUM"}, - {ERR_FUNC(SSL_F_DTLS1_CLIENT_HELLO), "DTLS1_CLIENT_HELLO"}, - {ERR_FUNC(SSL_F_DTLS1_CONNECT), "DTLS1_CONNECT"}, - {ERR_FUNC(SSL_F_DTLS1_ENC), "DTLS1_ENC"}, + {ERR_FUNC(SSL_F_DTLS1_CHECK_TIMEOUT_NUM), "dtls1_check_timeout_num"}, + {ERR_FUNC(SSL_F_DTLS1_CLIENT_HELLO), "dtls1_client_hello"}, + {ERR_FUNC(SSL_F_DTLS1_CONNECT), "dtls1_connect"}, {ERR_FUNC(SSL_F_DTLS1_GET_HELLO_VERIFY), "DTLS1_GET_HELLO_VERIFY"}, - {ERR_FUNC(SSL_F_DTLS1_GET_MESSAGE), "DTLS1_GET_MESSAGE"}, + {ERR_FUNC(SSL_F_DTLS1_GET_MESSAGE), "dtls1_get_message"}, {ERR_FUNC(SSL_F_DTLS1_GET_MESSAGE_FRAGMENT), "DTLS1_GET_MESSAGE_FRAGMENT"}, - {ERR_FUNC(SSL_F_DTLS1_GET_RECORD), "DTLS1_GET_RECORD"}, - {ERR_FUNC(SSL_F_DTLS1_HANDLE_TIMEOUT), "DTLS1_HANDLE_TIMEOUT"}, - {ERR_FUNC(SSL_F_DTLS1_HEARTBEAT), "DTLS1_HEARTBEAT"}, - {ERR_FUNC(SSL_F_DTLS1_OUTPUT_CERT_CHAIN), "DTLS1_OUTPUT_CERT_CHAIN"}, + {ERR_FUNC(SSL_F_DTLS1_GET_RECORD), "dtls1_get_record"}, + {ERR_FUNC(SSL_F_DTLS1_HANDLE_TIMEOUT), "dtls1_handle_timeout"}, + {ERR_FUNC(SSL_F_DTLS1_HEARTBEAT), "dtls1_heartbeat"}, + {ERR_FUNC(SSL_F_DTLS1_OUTPUT_CERT_CHAIN), "dtls1_output_cert_chain"}, {ERR_FUNC(SSL_F_DTLS1_PREPROCESS_FRAGMENT), "DTLS1_PREPROCESS_FRAGMENT"}, {ERR_FUNC(SSL_F_DTLS1_PROCESS_OUT_OF_SEQ_MESSAGE), "DTLS1_PROCESS_OUT_OF_SEQ_MESSAGE"}, {ERR_FUNC(SSL_F_DTLS1_PROCESS_RECORD), "DTLS1_PROCESS_RECORD"}, - {ERR_FUNC(SSL_F_DTLS1_READ_BYTES), "DTLS1_READ_BYTES"}, - {ERR_FUNC(SSL_F_DTLS1_READ_FAILED), "DTLS1_READ_FAILED"}, + {ERR_FUNC(SSL_F_DTLS1_READ_BYTES), "dtls1_read_bytes"}, + {ERR_FUNC(SSL_F_DTLS1_READ_FAILED), "dtls1_read_failed"}, {ERR_FUNC(SSL_F_DTLS1_SEND_CERTIFICATE_REQUEST), - "DTLS1_SEND_CERTIFICATE_REQUEST"}, + "dtls1_send_certificate_request"}, {ERR_FUNC(SSL_F_DTLS1_SEND_CLIENT_CERTIFICATE), - "DTLS1_SEND_CLIENT_CERTIFICATE"}, + "dtls1_send_client_certificate"}, {ERR_FUNC(SSL_F_DTLS1_SEND_CLIENT_KEY_EXCHANGE), - "DTLS1_SEND_CLIENT_KEY_EXCHANGE"}, - {ERR_FUNC(SSL_F_DTLS1_SEND_CLIENT_VERIFY), "DTLS1_SEND_CLIENT_VERIFY"}, + "dtls1_send_client_key_exchange"}, + {ERR_FUNC(SSL_F_DTLS1_SEND_CLIENT_VERIFY), "dtls1_send_client_verify"}, {ERR_FUNC(SSL_F_DTLS1_SEND_HELLO_VERIFY_REQUEST), "DTLS1_SEND_HELLO_VERIFY_REQUEST"}, {ERR_FUNC(SSL_F_DTLS1_SEND_SERVER_CERTIFICATE), - "DTLS1_SEND_SERVER_CERTIFICATE"}, - {ERR_FUNC(SSL_F_DTLS1_SEND_SERVER_HELLO), "DTLS1_SEND_SERVER_HELLO"}, + "dtls1_send_server_certificate"}, + {ERR_FUNC(SSL_F_DTLS1_SEND_SERVER_HELLO), "dtls1_send_server_hello"}, {ERR_FUNC(SSL_F_DTLS1_SEND_SERVER_KEY_EXCHANGE), - "DTLS1_SEND_SERVER_KEY_EXCHANGE"}, + "dtls1_send_server_key_exchange"}, {ERR_FUNC(SSL_F_DTLS1_WRITE_APP_DATA_BYTES), - "DTLS1_WRITE_APP_DATA_BYTES"}, + "dtls1_write_app_data_bytes"}, {ERR_FUNC(SSL_F_GET_CLIENT_FINISHED), "GET_CLIENT_FINISHED"}, {ERR_FUNC(SSL_F_GET_CLIENT_HELLO), "GET_CLIENT_HELLO"}, {ERR_FUNC(SSL_F_GET_CLIENT_MASTER_KEY), "GET_CLIENT_MASTER_KEY"}, @@ -126,118 +126,122 @@ static ERR_STRING_DATA SSL_str_functs[] = { {ERR_FUNC(SSL_F_SERVER_FINISH), "SERVER_FINISH"}, {ERR_FUNC(SSL_F_SERVER_HELLO), "SERVER_HELLO"}, {ERR_FUNC(SSL_F_SERVER_VERIFY), "SERVER_VERIFY"}, - {ERR_FUNC(SSL_F_SSL23_ACCEPT), "SSL23_ACCEPT"}, + {ERR_FUNC(SSL_F_SSL23_ACCEPT), "ssl23_accept"}, {ERR_FUNC(SSL_F_SSL23_CLIENT_HELLO), "SSL23_CLIENT_HELLO"}, - {ERR_FUNC(SSL_F_SSL23_CONNECT), "SSL23_CONNECT"}, + {ERR_FUNC(SSL_F_SSL23_CONNECT), "ssl23_connect"}, {ERR_FUNC(SSL_F_SSL23_GET_CLIENT_HELLO), "SSL23_GET_CLIENT_HELLO"}, {ERR_FUNC(SSL_F_SSL23_GET_SERVER_HELLO), "SSL23_GET_SERVER_HELLO"}, - {ERR_FUNC(SSL_F_SSL23_PEEK), "SSL23_PEEK"}, - {ERR_FUNC(SSL_F_SSL23_READ), "SSL23_READ"}, - {ERR_FUNC(SSL_F_SSL23_WRITE), "SSL23_WRITE"}, - {ERR_FUNC(SSL_F_SSL2_ACCEPT), "SSL2_ACCEPT"}, - {ERR_FUNC(SSL_F_SSL2_CONNECT), "SSL2_CONNECT"}, - {ERR_FUNC(SSL_F_SSL2_ENC_INIT), "SSL2_ENC_INIT"}, + {ERR_FUNC(SSL_F_SSL23_PEEK), "ssl23_peek"}, + {ERR_FUNC(SSL_F_SSL23_READ), "ssl23_read"}, + {ERR_FUNC(SSL_F_SSL23_WRITE), "ssl23_write"}, + {ERR_FUNC(SSL_F_SSL2_ACCEPT), "ssl2_accept"}, + {ERR_FUNC(SSL_F_SSL2_CONNECT), "ssl2_connect"}, + {ERR_FUNC(SSL_F_SSL2_ENC_INIT), "ssl2_enc_init"}, {ERR_FUNC(SSL_F_SSL2_GENERATE_KEY_MATERIAL), - "SSL2_GENERATE_KEY_MATERIAL"}, - {ERR_FUNC(SSL_F_SSL2_PEEK), "SSL2_PEEK"}, - {ERR_FUNC(SSL_F_SSL2_READ), "SSL2_READ"}, + "ssl2_generate_key_material"}, + {ERR_FUNC(SSL_F_SSL2_PEEK), "ssl2_peek"}, + {ERR_FUNC(SSL_F_SSL2_READ), "ssl2_read"}, {ERR_FUNC(SSL_F_SSL2_READ_INTERNAL), "SSL2_READ_INTERNAL"}, - {ERR_FUNC(SSL_F_SSL2_SET_CERTIFICATE), "SSL2_SET_CERTIFICATE"}, - {ERR_FUNC(SSL_F_SSL2_WRITE), "SSL2_WRITE"}, - {ERR_FUNC(SSL_F_SSL3_ACCEPT), "SSL3_ACCEPT"}, + {ERR_FUNC(SSL_F_SSL2_SET_CERTIFICATE), "ssl2_set_certificate"}, + {ERR_FUNC(SSL_F_SSL2_WRITE), "ssl2_write"}, + {ERR_FUNC(SSL_F_SSL3_ACCEPT), "ssl3_accept"}, {ERR_FUNC(SSL_F_SSL3_ADD_CERT_TO_BUF), "SSL3_ADD_CERT_TO_BUF"}, - {ERR_FUNC(SSL_F_SSL3_CALLBACK_CTRL), "SSL3_CALLBACK_CTRL"}, - {ERR_FUNC(SSL_F_SSL3_CHANGE_CIPHER_STATE), "SSL3_CHANGE_CIPHER_STATE"}, + {ERR_FUNC(SSL_F_SSL3_CALLBACK_CTRL), "ssl3_callback_ctrl"}, + {ERR_FUNC(SSL_F_SSL3_CHANGE_CIPHER_STATE), "ssl3_change_cipher_state"}, {ERR_FUNC(SSL_F_SSL3_CHECK_CERT_AND_ALGORITHM), - "SSL3_CHECK_CERT_AND_ALGORITHM"}, - {ERR_FUNC(SSL_F_SSL3_CHECK_CLIENT_HELLO), "SSL3_CHECK_CLIENT_HELLO"}, - {ERR_FUNC(SSL_F_SSL3_CLIENT_HELLO), "SSL3_CLIENT_HELLO"}, - {ERR_FUNC(SSL_F_SSL3_CONNECT), "SSL3_CONNECT"}, - {ERR_FUNC(SSL_F_SSL3_CTRL), "SSL3_CTRL"}, - {ERR_FUNC(SSL_F_SSL3_CTX_CTRL), "SSL3_CTX_CTRL"}, + "ssl3_check_cert_and_algorithm"}, + {ERR_FUNC(SSL_F_SSL3_CHECK_CLIENT_HELLO), "ssl3_check_client_hello"}, + {ERR_FUNC(SSL_F_SSL3_CLIENT_HELLO), "ssl3_client_hello"}, + {ERR_FUNC(SSL_F_SSL3_CONNECT), "ssl3_connect"}, + {ERR_FUNC(SSL_F_SSL3_CTRL), "ssl3_ctrl"}, + {ERR_FUNC(SSL_F_SSL3_CTX_CTRL), "ssl3_ctx_ctrl"}, {ERR_FUNC(SSL_F_SSL3_DIGEST_CACHED_RECORDS), - "SSL3_DIGEST_CACHED_RECORDS"}, + "ssl3_digest_cached_records"}, {ERR_FUNC(SSL_F_SSL3_DO_CHANGE_CIPHER_SPEC), - "SSL3_DO_CHANGE_CIPHER_SPEC"}, - {ERR_FUNC(SSL_F_SSL3_ENC), "SSL3_ENC"}, + "ssl3_do_change_cipher_spec"}, + {ERR_FUNC(SSL_F_SSL3_ENC), "ssl3_enc"}, {ERR_FUNC(SSL_F_SSL3_GENERATE_KEY_BLOCK), "SSL3_GENERATE_KEY_BLOCK"}, {ERR_FUNC(SSL_F_SSL3_GET_CERTIFICATE_REQUEST), - "SSL3_GET_CERTIFICATE_REQUEST"}, - {ERR_FUNC(SSL_F_SSL3_GET_CERT_STATUS), "SSL3_GET_CERT_STATUS"}, - {ERR_FUNC(SSL_F_SSL3_GET_CERT_VERIFY), "SSL3_GET_CERT_VERIFY"}, + "ssl3_get_certificate_request"}, + {ERR_FUNC(SSL_F_SSL3_GET_CERT_STATUS), "ssl3_get_cert_status"}, + {ERR_FUNC(SSL_F_SSL3_GET_CERT_VERIFY), "ssl3_get_cert_verify"}, {ERR_FUNC(SSL_F_SSL3_GET_CLIENT_CERTIFICATE), - "SSL3_GET_CLIENT_CERTIFICATE"}, - {ERR_FUNC(SSL_F_SSL3_GET_CLIENT_HELLO), "SSL3_GET_CLIENT_HELLO"}, + "ssl3_get_client_certificate"}, + {ERR_FUNC(SSL_F_SSL3_GET_CLIENT_HELLO), "ssl3_get_client_hello"}, {ERR_FUNC(SSL_F_SSL3_GET_CLIENT_KEY_EXCHANGE), - "SSL3_GET_CLIENT_KEY_EXCHANGE"}, - {ERR_FUNC(SSL_F_SSL3_GET_FINISHED), "SSL3_GET_FINISHED"}, - {ERR_FUNC(SSL_F_SSL3_GET_KEY_EXCHANGE), "SSL3_GET_KEY_EXCHANGE"}, - {ERR_FUNC(SSL_F_SSL3_GET_MESSAGE), "SSL3_GET_MESSAGE"}, + "ssl3_get_client_key_exchange"}, + {ERR_FUNC(SSL_F_SSL3_GET_FINISHED), "ssl3_get_finished"}, + {ERR_FUNC(SSL_F_SSL3_GET_KEY_EXCHANGE), "ssl3_get_key_exchange"}, + {ERR_FUNC(SSL_F_SSL3_GET_MESSAGE), "ssl3_get_message"}, {ERR_FUNC(SSL_F_SSL3_GET_NEW_SESSION_TICKET), - "SSL3_GET_NEW_SESSION_TICKET"}, - {ERR_FUNC(SSL_F_SSL3_GET_NEXT_PROTO), "SSL3_GET_NEXT_PROTO"}, + "ssl3_get_new_session_ticket"}, + {ERR_FUNC(SSL_F_SSL3_GET_NEXT_PROTO), "ssl3_get_next_proto"}, {ERR_FUNC(SSL_F_SSL3_GET_RECORD), "SSL3_GET_RECORD"}, {ERR_FUNC(SSL_F_SSL3_GET_SERVER_CERTIFICATE), - "SSL3_GET_SERVER_CERTIFICATE"}, - {ERR_FUNC(SSL_F_SSL3_GET_SERVER_DONE), "SSL3_GET_SERVER_DONE"}, - {ERR_FUNC(SSL_F_SSL3_GET_SERVER_HELLO), "SSL3_GET_SERVER_HELLO"}, + "ssl3_get_server_certificate"}, + {ERR_FUNC(SSL_F_SSL3_GET_SERVER_DONE), "ssl3_get_server_done"}, + {ERR_FUNC(SSL_F_SSL3_GET_SERVER_HELLO), "ssl3_get_server_hello"}, {ERR_FUNC(SSL_F_SSL3_HANDSHAKE_MAC), "ssl3_handshake_mac"}, {ERR_FUNC(SSL_F_SSL3_NEW_SESSION_TICKET), "SSL3_NEW_SESSION_TICKET"}, - {ERR_FUNC(SSL_F_SSL3_OUTPUT_CERT_CHAIN), "SSL3_OUTPUT_CERT_CHAIN"}, - {ERR_FUNC(SSL_F_SSL3_PEEK), "SSL3_PEEK"}, - {ERR_FUNC(SSL_F_SSL3_READ_BYTES), "SSL3_READ_BYTES"}, - {ERR_FUNC(SSL_F_SSL3_READ_N), "SSL3_READ_N"}, + {ERR_FUNC(SSL_F_SSL3_OUTPUT_CERT_CHAIN), "ssl3_output_cert_chain"}, + {ERR_FUNC(SSL_F_SSL3_PEEK), "ssl3_peek"}, + {ERR_FUNC(SSL_F_SSL3_READ_BYTES), "ssl3_read_bytes"}, + {ERR_FUNC(SSL_F_SSL3_READ_N), "ssl3_read_n"}, {ERR_FUNC(SSL_F_SSL3_SEND_CERTIFICATE_REQUEST), - "SSL3_SEND_CERTIFICATE_REQUEST"}, + "ssl3_send_certificate_request"}, {ERR_FUNC(SSL_F_SSL3_SEND_CLIENT_CERTIFICATE), - "SSL3_SEND_CLIENT_CERTIFICATE"}, + "ssl3_send_client_certificate"}, {ERR_FUNC(SSL_F_SSL3_SEND_CLIENT_KEY_EXCHANGE), - "SSL3_SEND_CLIENT_KEY_EXCHANGE"}, - {ERR_FUNC(SSL_F_SSL3_SEND_CLIENT_VERIFY), "SSL3_SEND_CLIENT_VERIFY"}, + "ssl3_send_client_key_exchange"}, + {ERR_FUNC(SSL_F_SSL3_SEND_CLIENT_VERIFY), "ssl3_send_client_verify"}, {ERR_FUNC(SSL_F_SSL3_SEND_SERVER_CERTIFICATE), - "SSL3_SEND_SERVER_CERTIFICATE"}, - {ERR_FUNC(SSL_F_SSL3_SEND_SERVER_HELLO), "SSL3_SEND_SERVER_HELLO"}, + "ssl3_send_server_certificate"}, + {ERR_FUNC(SSL_F_SSL3_SEND_SERVER_HELLO), "ssl3_send_server_hello"}, {ERR_FUNC(SSL_F_SSL3_SEND_SERVER_KEY_EXCHANGE), - "SSL3_SEND_SERVER_KEY_EXCHANGE"}, - {ERR_FUNC(SSL_F_SSL3_SETUP_KEY_BLOCK), "SSL3_SETUP_KEY_BLOCK"}, - {ERR_FUNC(SSL_F_SSL3_SETUP_READ_BUFFER), "SSL3_SETUP_READ_BUFFER"}, - {ERR_FUNC(SSL_F_SSL3_SETUP_WRITE_BUFFER), "SSL3_SETUP_WRITE_BUFFER"}, - {ERR_FUNC(SSL_F_SSL3_WRITE_BYTES), "SSL3_WRITE_BYTES"}, - {ERR_FUNC(SSL_F_SSL3_WRITE_PENDING), "SSL3_WRITE_PENDING"}, + "ssl3_send_server_key_exchange"}, + {ERR_FUNC(SSL_F_SSL3_SETUP_KEY_BLOCK), "ssl3_setup_key_block"}, + {ERR_FUNC(SSL_F_SSL3_SETUP_READ_BUFFER), "ssl3_setup_read_buffer"}, + {ERR_FUNC(SSL_F_SSL3_SETUP_WRITE_BUFFER), "ssl3_setup_write_buffer"}, + {ERR_FUNC(SSL_F_SSL3_WRITE_BYTES), "ssl3_write_bytes"}, + {ERR_FUNC(SSL_F_SSL3_WRITE_PENDING), "ssl3_write_pending"}, + {ERR_FUNC(SSL_F_SSL_ADD_CERT_CHAIN), "ssl_add_cert_chain"}, + {ERR_FUNC(SSL_F_SSL_ADD_CERT_TO_BUF), "SSL_ADD_CERT_TO_BUF"}, {ERR_FUNC(SSL_F_SSL_ADD_CLIENTHELLO_RENEGOTIATE_EXT), - "SSL_ADD_CLIENTHELLO_RENEGOTIATE_EXT"}, + "ssl_add_clienthello_renegotiate_ext"}, {ERR_FUNC(SSL_F_SSL_ADD_CLIENTHELLO_TLSEXT), - "SSL_ADD_CLIENTHELLO_TLSEXT"}, + "ssl_add_clienthello_tlsext"}, {ERR_FUNC(SSL_F_SSL_ADD_CLIENTHELLO_USE_SRTP_EXT), - "SSL_ADD_CLIENTHELLO_USE_SRTP_EXT"}, + "ssl_add_clienthello_use_srtp_ext"}, {ERR_FUNC(SSL_F_SSL_ADD_DIR_CERT_SUBJECTS_TO_STACK), "SSL_add_dir_cert_subjects_to_stack"}, {ERR_FUNC(SSL_F_SSL_ADD_FILE_CERT_SUBJECTS_TO_STACK), "SSL_add_file_cert_subjects_to_stack"}, {ERR_FUNC(SSL_F_SSL_ADD_SERVERHELLO_RENEGOTIATE_EXT), - "SSL_ADD_SERVERHELLO_RENEGOTIATE_EXT"}, + "ssl_add_serverhello_renegotiate_ext"}, {ERR_FUNC(SSL_F_SSL_ADD_SERVERHELLO_TLSEXT), - "SSL_ADD_SERVERHELLO_TLSEXT"}, + "ssl_add_serverhello_tlsext"}, {ERR_FUNC(SSL_F_SSL_ADD_SERVERHELLO_USE_SRTP_EXT), - "SSL_ADD_SERVERHELLO_USE_SRTP_EXT"}, - {ERR_FUNC(SSL_F_SSL_BAD_METHOD), "SSL_BAD_METHOD"}, - {ERR_FUNC(SSL_F_SSL_BYTES_TO_CIPHER_LIST), "SSL_BYTES_TO_CIPHER_LIST"}, - {ERR_FUNC(SSL_F_SSL_CERT_DUP), "SSL_CERT_DUP"}, - {ERR_FUNC(SSL_F_SSL_CERT_INST), "SSL_CERT_INST"}, + "ssl_add_serverhello_use_srtp_ext"}, + {ERR_FUNC(SSL_F_SSL_BAD_METHOD), "ssl_bad_method"}, + {ERR_FUNC(SSL_F_SSL_BUILD_CERT_CHAIN), "ssl_build_cert_chain"}, + {ERR_FUNC(SSL_F_SSL_BYTES_TO_CIPHER_LIST), "ssl_bytes_to_cipher_list"}, + {ERR_FUNC(SSL_F_SSL_CERT_DUP), "ssl_cert_dup"}, + {ERR_FUNC(SSL_F_SSL_CERT_INST), "ssl_cert_inst"}, {ERR_FUNC(SSL_F_SSL_CERT_INSTANTIATE), "SSL_CERT_INSTANTIATE"}, - {ERR_FUNC(SSL_F_SSL_CERT_NEW), "SSL_CERT_NEW"}, + {ERR_FUNC(SSL_F_SSL_CERT_NEW), "ssl_cert_new"}, {ERR_FUNC(SSL_F_SSL_CHECK_PRIVATE_KEY), "SSL_check_private_key"}, {ERR_FUNC(SSL_F_SSL_CHECK_SERVERHELLO_TLSEXT), "SSL_CHECK_SERVERHELLO_TLSEXT"}, {ERR_FUNC(SSL_F_SSL_CHECK_SRVR_ECC_CERT_AND_ALG), - "SSL_CHECK_SRVR_ECC_CERT_AND_ALG"}, + "ssl_check_srvr_ecc_cert_and_alg"}, {ERR_FUNC(SSL_F_SSL_CIPHER_PROCESS_RULESTR), "SSL_CIPHER_PROCESS_RULESTR"}, {ERR_FUNC(SSL_F_SSL_CIPHER_STRENGTH_SORT), "SSL_CIPHER_STRENGTH_SORT"}, {ERR_FUNC(SSL_F_SSL_CLEAR), "SSL_clear"}, {ERR_FUNC(SSL_F_SSL_COMP_ADD_COMPRESSION_METHOD), "SSL_COMP_add_compression_method"}, - {ERR_FUNC(SSL_F_SSL_CREATE_CIPHER_LIST), "SSL_CREATE_CIPHER_LIST"}, + {ERR_FUNC(SSL_F_SSL_CONF_CMD), "SSL_CONF_cmd"}, + {ERR_FUNC(SSL_F_SSL_CREATE_CIPHER_LIST), "ssl_create_cipher_list"}, {ERR_FUNC(SSL_F_SSL_CTRL), "SSL_ctrl"}, {ERR_FUNC(SSL_F_SSL_CTX_CHECK_PRIVATE_KEY), "SSL_CTX_check_private_key"}, {ERR_FUNC(SSL_F_SSL_CTX_MAKE_PROFILES), "SSL_CTX_MAKE_PROFILES"}, @@ -269,40 +273,48 @@ static ERR_STRING_DATA SSL_str_functs[] = { "SSL_CTX_use_RSAPrivateKey_ASN1"}, {ERR_FUNC(SSL_F_SSL_CTX_USE_RSAPRIVATEKEY_FILE), "SSL_CTX_use_RSAPrivateKey_file"}, + {ERR_FUNC(SSL_F_SSL_CTX_USE_SERVERINFO), "SSL_CTX_use_serverinfo"}, + {ERR_FUNC(SSL_F_SSL_CTX_USE_SERVERINFO_FILE), + "SSL_CTX_use_serverinfo_file"}, {ERR_FUNC(SSL_F_SSL_DO_HANDSHAKE), "SSL_do_handshake"}, - {ERR_FUNC(SSL_F_SSL_GET_NEW_SESSION), "SSL_GET_NEW_SESSION"}, - {ERR_FUNC(SSL_F_SSL_GET_PREV_SESSION), "SSL_GET_PREV_SESSION"}, + {ERR_FUNC(SSL_F_SSL_GET_NEW_SESSION), "ssl_get_new_session"}, + {ERR_FUNC(SSL_F_SSL_GET_PREV_SESSION), "ssl_get_prev_session"}, + {ERR_FUNC(SSL_F_SSL_GET_SERVER_CERT_INDEX), "SSL_GET_SERVER_CERT_INDEX"}, {ERR_FUNC(SSL_F_SSL_GET_SERVER_SEND_CERT), "SSL_GET_SERVER_SEND_CERT"}, - {ERR_FUNC(SSL_F_SSL_GET_SERVER_SEND_PKEY), "SSL_GET_SERVER_SEND_PKEY"}, - {ERR_FUNC(SSL_F_SSL_GET_SIGN_PKEY), "SSL_GET_SIGN_PKEY"}, - {ERR_FUNC(SSL_F_SSL_INIT_WBIO_BUFFER), "SSL_INIT_WBIO_BUFFER"}, + {ERR_FUNC(SSL_F_SSL_GET_SERVER_SEND_PKEY), "ssl_get_server_send_pkey"}, + {ERR_FUNC(SSL_F_SSL_GET_SIGN_PKEY), "ssl_get_sign_pkey"}, + {ERR_FUNC(SSL_F_SSL_INIT_WBIO_BUFFER), "ssl_init_wbio_buffer"}, {ERR_FUNC(SSL_F_SSL_LOAD_CLIENT_CA_FILE), "SSL_load_client_CA_file"}, {ERR_FUNC(SSL_F_SSL_NEW), "SSL_new"}, {ERR_FUNC(SSL_F_SSL_PARSE_CLIENTHELLO_RENEGOTIATE_EXT), - "SSL_PARSE_CLIENTHELLO_RENEGOTIATE_EXT"}, + "ssl_parse_clienthello_renegotiate_ext"}, {ERR_FUNC(SSL_F_SSL_PARSE_CLIENTHELLO_TLSEXT), - "SSL_PARSE_CLIENTHELLO_TLSEXT"}, + "ssl_parse_clienthello_tlsext"}, {ERR_FUNC(SSL_F_SSL_PARSE_CLIENTHELLO_USE_SRTP_EXT), - "SSL_PARSE_CLIENTHELLO_USE_SRTP_EXT"}, + "ssl_parse_clienthello_use_srtp_ext"}, {ERR_FUNC(SSL_F_SSL_PARSE_SERVERHELLO_RENEGOTIATE_EXT), - "SSL_PARSE_SERVERHELLO_RENEGOTIATE_EXT"}, + "ssl_parse_serverhello_renegotiate_ext"}, {ERR_FUNC(SSL_F_SSL_PARSE_SERVERHELLO_TLSEXT), - "SSL_PARSE_SERVERHELLO_TLSEXT"}, + "ssl_parse_serverhello_tlsext"}, {ERR_FUNC(SSL_F_SSL_PARSE_SERVERHELLO_USE_SRTP_EXT), - "SSL_PARSE_SERVERHELLO_USE_SRTP_EXT"}, + "ssl_parse_serverhello_use_srtp_ext"}, {ERR_FUNC(SSL_F_SSL_PEEK), "SSL_peek"}, {ERR_FUNC(SSL_F_SSL_PREPARE_CLIENTHELLO_TLSEXT), - "SSL_PREPARE_CLIENTHELLO_TLSEXT"}, + "ssl_prepare_clienthello_tlsext"}, {ERR_FUNC(SSL_F_SSL_PREPARE_SERVERHELLO_TLSEXT), - "SSL_PREPARE_SERVERHELLO_TLSEXT"}, + "ssl_prepare_serverhello_tlsext"}, {ERR_FUNC(SSL_F_SSL_READ), "SSL_read"}, {ERR_FUNC(SSL_F_SSL_RSA_PRIVATE_DECRYPT), "SSL_RSA_PRIVATE_DECRYPT"}, {ERR_FUNC(SSL_F_SSL_RSA_PUBLIC_ENCRYPT), "SSL_RSA_PUBLIC_ENCRYPT"}, + {ERR_FUNC(SSL_F_SSL_SCAN_CLIENTHELLO_TLSEXT), + "SSL_SCAN_CLIENTHELLO_TLSEXT"}, + {ERR_FUNC(SSL_F_SSL_SCAN_SERVERHELLO_TLSEXT), + "SSL_SCAN_SERVERHELLO_TLSEXT"}, {ERR_FUNC(SSL_F_SSL_SESSION_NEW), "SSL_SESSION_new"}, {ERR_FUNC(SSL_F_SSL_SESSION_PRINT_FP), "SSL_SESSION_print_fp"}, {ERR_FUNC(SSL_F_SSL_SESSION_SET1_ID_CONTEXT), "SSL_SESSION_set1_id_context"}, - {ERR_FUNC(SSL_F_SSL_SESS_CERT_NEW), "SSL_SESS_CERT_NEW"}, + {ERR_FUNC(SSL_F_SSL_SESS_CERT_NEW), "ssl_sess_cert_new"}, {ERR_FUNC(SSL_F_SSL_SET_CERT), "SSL_SET_CERT"}, {ERR_FUNC(SSL_F_SSL_SET_CIPHER_LIST), "SSL_set_cipher_list"}, {ERR_FUNC(SSL_F_SSL_SET_FD), "SSL_set_fd"}, @@ -319,10 +331,10 @@ static ERR_STRING_DATA SSL_str_functs[] = { {ERR_FUNC(SSL_F_SSL_SHUTDOWN), "SSL_shutdown"}, {ERR_FUNC(SSL_F_SSL_SRP_CTX_INIT), "SSL_SRP_CTX_init"}, {ERR_FUNC(SSL_F_SSL_UNDEFINED_CONST_FUNCTION), - "SSL_UNDEFINED_CONST_FUNCTION"}, - {ERR_FUNC(SSL_F_SSL_UNDEFINED_FUNCTION), "SSL_UNDEFINED_FUNCTION"}, + "ssl_undefined_const_function"}, + {ERR_FUNC(SSL_F_SSL_UNDEFINED_FUNCTION), "ssl_undefined_function"}, {ERR_FUNC(SSL_F_SSL_UNDEFINED_VOID_FUNCTION), - "SSL_UNDEFINED_VOID_FUNCTION"}, + "ssl_undefined_void_function"}, {ERR_FUNC(SSL_F_SSL_USE_CERTIFICATE), "SSL_use_certificate"}, {ERR_FUNC(SSL_F_SSL_USE_CERTIFICATE_ASN1), "SSL_use_certificate_ASN1"}, {ERR_FUNC(SSL_F_SSL_USE_CERTIFICATE_FILE), "SSL_use_certificate_file"}, @@ -335,22 +347,25 @@ static ERR_STRING_DATA SSL_str_functs[] = { "SSL_use_RSAPrivateKey_ASN1"}, {ERR_FUNC(SSL_F_SSL_USE_RSAPRIVATEKEY_FILE), "SSL_use_RSAPrivateKey_file"}, - {ERR_FUNC(SSL_F_SSL_VERIFY_CERT_CHAIN), "SSL_VERIFY_CERT_CHAIN"}, + {ERR_FUNC(SSL_F_SSL_VERIFY_CERT_CHAIN), "ssl_verify_cert_chain"}, {ERR_FUNC(SSL_F_SSL_WRITE), "SSL_write"}, + {ERR_FUNC(SSL_F_TLS12_CHECK_PEER_SIGALG), "tls12_check_peer_sigalg"}, {ERR_FUNC(SSL_F_TLS1_CERT_VERIFY_MAC), "tls1_cert_verify_mac"}, - {ERR_FUNC(SSL_F_TLS1_CHANGE_CIPHER_STATE), "TLS1_CHANGE_CIPHER_STATE"}, + {ERR_FUNC(SSL_F_TLS1_CHANGE_CIPHER_STATE), "tls1_change_cipher_state"}, {ERR_FUNC(SSL_F_TLS1_CHECK_SERVERHELLO_TLSEXT), "TLS1_CHECK_SERVERHELLO_TLSEXT"}, - {ERR_FUNC(SSL_F_TLS1_ENC), "TLS1_ENC"}, + {ERR_FUNC(SSL_F_TLS1_ENC), "tls1_enc"}, {ERR_FUNC(SSL_F_TLS1_EXPORT_KEYING_MATERIAL), - "TLS1_EXPORT_KEYING_MATERIAL"}, - {ERR_FUNC(SSL_F_TLS1_HEARTBEAT), "SSL_F_TLS1_HEARTBEAT"}, + "tls1_export_keying_material"}, + {ERR_FUNC(SSL_F_TLS1_GET_CURVELIST), "TLS1_GET_CURVELIST"}, + {ERR_FUNC(SSL_F_TLS1_HEARTBEAT), "tls1_heartbeat"}, {ERR_FUNC(SSL_F_TLS1_PREPARE_CLIENTHELLO_TLSEXT), "TLS1_PREPARE_CLIENTHELLO_TLSEXT"}, {ERR_FUNC(SSL_F_TLS1_PREPARE_SERVERHELLO_TLSEXT), "TLS1_PREPARE_SERVERHELLO_TLSEXT"}, {ERR_FUNC(SSL_F_TLS1_PRF), "tls1_prf"}, - {ERR_FUNC(SSL_F_TLS1_SETUP_KEY_BLOCK), "TLS1_SETUP_KEY_BLOCK"}, + {ERR_FUNC(SSL_F_TLS1_SETUP_KEY_BLOCK), "tls1_setup_key_block"}, + {ERR_FUNC(SSL_F_TLS1_SET_SERVER_SIGALGS), "tls1_set_server_sigalgs"}, {ERR_FUNC(SSL_F_WRITE_PENDING), "WRITE_PENDING"}, {0, NULL} }; @@ -363,6 +378,7 @@ static ERR_STRING_DATA SSL_str_reasons[] = { {ERR_REASON(SSL_R_BAD_AUTHENTICATION_TYPE), "bad authentication type"}, {ERR_REASON(SSL_R_BAD_CHANGE_CIPHER_SPEC), "bad change cipher spec"}, {ERR_REASON(SSL_R_BAD_CHECKSUM), "bad checksum"}, + {ERR_REASON(SSL_R_BAD_DATA), "bad data"}, {ERR_REASON(SSL_R_BAD_DATA_RETURNED_BY_CALLBACK), "bad data returned by callback"}, {ERR_REASON(SSL_R_BAD_DECOMPRESSION), "bad decompression"}, @@ -405,6 +421,7 @@ static ERR_STRING_DATA SSL_str_reasons[] = { {ERR_REASON(SSL_R_BAD_SSL_SESSION_ID_LENGTH), "bad ssl session id length"}, {ERR_REASON(SSL_R_BAD_STATE), "bad state"}, + {ERR_REASON(SSL_R_BAD_VALUE), "bad value"}, {ERR_REASON(SSL_R_BAD_WRITE_RETRY), "bad write retry"}, {ERR_REASON(SSL_R_BIO_NOT_SET), "bio not set"}, {ERR_REASON(SSL_R_BLOCK_CIPHER_PAD_IS_WRONG), @@ -415,6 +432,7 @@ static ERR_STRING_DATA SSL_str_reasons[] = { {ERR_REASON(SSL_R_CCS_RECEIVED_EARLY), "ccs received early"}, {ERR_REASON(SSL_R_CERTIFICATE_VERIFY_FAILED), "certificate verify failed"}, + {ERR_REASON(SSL_R_CERT_CB_ERROR), "cert cb error"}, {ERR_REASON(SSL_R_CERT_LENGTH_MISMATCH), "cert length mismatch"}, {ERR_REASON(SSL_R_CHALLENGE_IS_DIFFERENT), "challenge is different"}, {ERR_REASON(SSL_R_CIPHER_CODE_WRONG_LENGTH), "cipher code wrong length"}, @@ -452,6 +470,8 @@ static ERR_STRING_DATA SSL_str_reasons[] = { "ecc cert should have rsa signature"}, {ERR_REASON(SSL_R_ECC_CERT_SHOULD_HAVE_SHA1_SIGNATURE), "ecc cert should have sha1 signature"}, + {ERR_REASON(SSL_R_ECDH_REQUIRED_FOR_SUITEB_MODE), + "ecdh required for suiteb mode"}, {ERR_REASON(SSL_R_ECGROUP_TOO_LARGE_FOR_CIPHER), "ecgroup too large for cipher"}, {ERR_REASON(SSL_R_EMPTY_SRTP_PROTECTION_PROFILE_LIST), @@ -472,13 +492,16 @@ static ERR_STRING_DATA SSL_str_reasons[] = { {ERR_REASON(SSL_R_HTTPS_PROXY_REQUEST), "https proxy request"}, {ERR_REASON(SSL_R_HTTP_REQUEST), "http request"}, {ERR_REASON(SSL_R_ILLEGAL_PADDING), "illegal padding"}, + {ERR_REASON(SSL_R_ILLEGAL_SUITEB_DIGEST), "illegal Suite B digest"}, {ERR_REASON(SSL_R_INAPPROPRIATE_FALLBACK), "inappropriate fallback"}, {ERR_REASON(SSL_R_INCONSISTENT_COMPRESSION), "inconsistent compression"}, {ERR_REASON(SSL_R_INVALID_CHALLENGE_LENGTH), "invalid challenge length"}, {ERR_REASON(SSL_R_INVALID_COMMAND), "invalid command"}, {ERR_REASON(SSL_R_INVALID_COMPRESSION_ALGORITHM), "invalid compression algorithm"}, + {ERR_REASON(SSL_R_INVALID_NULL_CMD_NAME), "invalid null cmd name"}, {ERR_REASON(SSL_R_INVALID_PURPOSE), "invalid purpose"}, + {ERR_REASON(SSL_R_INVALID_SERVERINFO_DATA), "invalid serverinfo data"}, {ERR_REASON(SSL_R_INVALID_SRP_USERNAME), "invalid srp username"}, {ERR_REASON(SSL_R_INVALID_STATUS_RESPONSE), "invalid status response"}, {ERR_REASON(SSL_R_INVALID_TICKET_KEYS_LENGTH), @@ -505,6 +528,9 @@ static ERR_STRING_DATA SSL_str_reasons[] = { {ERR_REASON(SSL_R_MISSING_DH_KEY), "missing dh key"}, {ERR_REASON(SSL_R_MISSING_DH_RSA_CERT), "missing dh rsa cert"}, {ERR_REASON(SSL_R_MISSING_DSA_SIGNING_CERT), "missing dsa signing cert"}, + {ERR_REASON(SSL_R_MISSING_ECDH_CERT), "missing ecdh cert"}, + {ERR_REASON(SSL_R_MISSING_ECDSA_SIGNING_CERT), + "missing ecdsa signing cert"}, {ERR_REASON(SSL_R_MISSING_EXPORT_TMP_DH_KEY), "missing export tmp dh key"}, {ERR_REASON(SSL_R_MISSING_EXPORT_TMP_RSA_KEY), @@ -537,6 +563,7 @@ static ERR_STRING_DATA SSL_str_reasons[] = { {ERR_REASON(SSL_R_NO_GOST_CERTIFICATE_SENT_BY_PEER), "Peer haven't sent GOST certificate, required for selected ciphersuite"}, {ERR_REASON(SSL_R_NO_METHOD_SPECIFIED), "no method specified"}, + {ERR_REASON(SSL_R_NO_PEM_EXTENSIONS), "no pem extensions"}, {ERR_REASON(SSL_R_NO_PRIVATEKEY), "no privatekey"}, {ERR_REASON(SSL_R_NO_PRIVATE_KEY_ASSIGNED), "no private key assigned"}, {ERR_REASON(SSL_R_NO_PROTOCOLS_AVAILABLE), "no protocols available"}, @@ -545,6 +572,8 @@ static ERR_STRING_DATA SSL_str_reasons[] = { {ERR_REASON(SSL_R_NO_REQUIRED_DIGEST), "digest requred for handshake isn't computed"}, {ERR_REASON(SSL_R_NO_SHARED_CIPHER), "no shared cipher"}, + {ERR_REASON(SSL_R_NO_SHARED_SIGATURE_ALGORITHMS), + "no shared sigature algorithms"}, {ERR_REASON(SSL_R_NO_SRTP_PROFILES), "no srtp profiles"}, {ERR_REASON(SSL_R_NO_VERIFY_CALLBACK), "no verify callback"}, {ERR_REASON(SSL_R_NULL_SSL_CTX), "null ssl ctx"}, @@ -553,6 +582,10 @@ static ERR_STRING_DATA SSL_str_reasons[] = { "old session cipher not returned"}, {ERR_REASON(SSL_R_OLD_SESSION_COMPRESSION_ALGORITHM_NOT_RETURNED), "old session compression algorithm not returned"}, + {ERR_REASON(SSL_R_ONLY_DTLS_1_2_ALLOWED_IN_SUITEB_MODE), + "only DTLS 1.2 allowed in Suite B mode"}, + {ERR_REASON(SSL_R_ONLY_TLS_1_2_ALLOWED_IN_SUITEB_MODE), + "only TLS 1.2 allowed in Suite B mode"}, {ERR_REASON(SSL_R_ONLY_TLS_ALLOWED_IN_FIPS_MODE), "only tls allowed in fips mode"}, {ERR_REASON(SSL_R_OPAQUE_PRF_INPUT_TOO_LONG), @@ -569,6 +602,8 @@ static ERR_STRING_DATA SSL_str_reasons[] = { {ERR_REASON(SSL_R_PEER_ERROR_NO_CIPHER), "peer error no cipher"}, {ERR_REASON(SSL_R_PEER_ERROR_UNSUPPORTED_CERTIFICATE_TYPE), "peer error unsupported certificate type"}, + {ERR_REASON(SSL_R_PEM_NAME_BAD_PREFIX), "pem name bad prefix"}, + {ERR_REASON(SSL_R_PEM_NAME_TOO_SHORT), "pem name too short"}, {ERR_REASON(SSL_R_PRE_MAC_LENGTH_TOO_LONG), "pre mac length too long"}, {ERR_REASON(SSL_R_PROBLEMS_MAPPING_CIPHER_FUNCTIONS), "problems mapping cipher functions"}, @@ -739,6 +774,7 @@ static ERR_STRING_DATA SSL_str_reasons[] = { {ERR_REASON(SSL_R_UNKNOWN_CERTIFICATE_TYPE), "unknown certificate type"}, {ERR_REASON(SSL_R_UNKNOWN_CIPHER_RETURNED), "unknown cipher returned"}, {ERR_REASON(SSL_R_UNKNOWN_CIPHER_TYPE), "unknown cipher type"}, + {ERR_REASON(SSL_R_UNKNOWN_CMD_NAME), "unknown cmd name"}, {ERR_REASON(SSL_R_UNKNOWN_DIGEST), "unknown digest"}, {ERR_REASON(SSL_R_UNKNOWN_KEY_EXCHANGE_TYPE), "unknown key exchange type"}, @@ -761,7 +797,9 @@ static ERR_STRING_DATA SSL_str_reasons[] = { {ERR_REASON(SSL_R_UNSUPPORTED_STATUS_TYPE), "unsupported status type"}, {ERR_REASON(SSL_R_USE_SRTP_NOT_NEGOTIATED), "use srtp not negotiated"}, {ERR_REASON(SSL_R_WRITE_BIO_NOT_SET), "write bio not set"}, + {ERR_REASON(SSL_R_WRONG_CERTIFICATE_TYPE), "wrong certificate type"}, {ERR_REASON(SSL_R_WRONG_CIPHER_RETURNED), "wrong cipher returned"}, + {ERR_REASON(SSL_R_WRONG_CURVE), "wrong curve"}, {ERR_REASON(SSL_R_WRONG_MESSAGE_TYPE), "wrong message type"}, {ERR_REASON(SSL_R_WRONG_NUMBER_OF_KEY_BITS), "wrong number of key bits"}, {ERR_REASON(SSL_R_WRONG_SIGNATURE_LENGTH), "wrong signature length"}, diff --git a/deps/openssl/openssl/ssl/ssl_lib.c b/deps/openssl/openssl/ssl/ssl_lib.c index dead126184a84f..e9ad2bc81beb67 100644 --- a/deps/openssl/openssl/ssl/ssl_lib.c +++ b/deps/openssl/openssl/ssl/ssl_lib.c @@ -273,7 +273,7 @@ int SSL_CTX_set_ssl_version(SSL_CTX *ctx, const SSL_METHOD *meth) &(ctx->cipher_list_by_id), meth->version == SSL2_VERSION ? "SSLv2" : - SSL_DEFAULT_CIPHER_LIST); + SSL_DEFAULT_CIPHER_LIST, ctx->cert); if ((sk == NULL) || (sk_SSL_CIPHER_num(sk) <= 0)) { SSLerr(SSL_F_SSL_CTX_SET_SSL_VERSION, SSL_R_SSL_LIBRARY_HAS_NO_CIPHERS); @@ -363,9 +363,39 @@ SSL *SSL_new(SSL_CTX *ctx) s->tlsext_ocsp_resplen = -1; CRYPTO_add(&ctx->references, 1, CRYPTO_LOCK_SSL_CTX); s->initial_ctx = ctx; +# ifndef OPENSSL_NO_EC + if (ctx->tlsext_ecpointformatlist) { + s->tlsext_ecpointformatlist = + BUF_memdup(ctx->tlsext_ecpointformatlist, + ctx->tlsext_ecpointformatlist_length); + if (!s->tlsext_ecpointformatlist) + goto err; + s->tlsext_ecpointformatlist_length = + ctx->tlsext_ecpointformatlist_length; + } + if (ctx->tlsext_ellipticcurvelist) { + s->tlsext_ellipticcurvelist = + BUF_memdup(ctx->tlsext_ellipticcurvelist, + ctx->tlsext_ellipticcurvelist_length); + if (!s->tlsext_ellipticcurvelist) + goto err; + s->tlsext_ellipticcurvelist_length = + ctx->tlsext_ellipticcurvelist_length; + } +# endif # ifndef OPENSSL_NO_NEXTPROTONEG s->next_proto_negotiated = NULL; # endif + + if (s->ctx->alpn_client_proto_list) { + s->alpn_client_proto_list = + OPENSSL_malloc(s->ctx->alpn_client_proto_list_len); + if (s->alpn_client_proto_list == NULL) + goto err; + memcpy(s->alpn_client_proto_list, s->ctx->alpn_client_proto_list, + s->ctx->alpn_client_proto_list_len); + s->alpn_client_proto_list_len = s->ctx->alpn_client_proto_list_len; + } #endif s->verify_result = X509_V_OK; @@ -505,6 +535,21 @@ int SSL_set1_param(SSL *ssl, X509_VERIFY_PARAM *vpm) return X509_VERIFY_PARAM_set1(ssl->param, vpm); } +X509_VERIFY_PARAM *SSL_CTX_get0_param(SSL_CTX *ctx) +{ + return ctx->param; +} + +X509_VERIFY_PARAM *SSL_get0_param(SSL *ssl) +{ + return ssl->param; +} + +void SSL_certs_clear(SSL *s) +{ + ssl_cert_clear_certs(s->cert); +} + void SSL_free(SSL *s) { int i; @@ -585,6 +630,8 @@ void SSL_free(SSL *s) sk_OCSP_RESPID_pop_free(s->tlsext_ocsp_ids, OCSP_RESPID_free); if (s->tlsext_ocsp_resp) OPENSSL_free(s->tlsext_ocsp_resp); + if (s->alpn_client_proto_list) + OPENSSL_free(s->alpn_client_proto_list); #endif if (s->client_CA != NULL) @@ -1088,6 +1135,19 @@ long SSL_ctrl(SSL *s, int cmd, long larg, void *parg) return s->s3->send_connection_binding; else return 0; + case SSL_CTRL_CERT_FLAGS: + return (s->cert->cert_flags |= larg); + case SSL_CTRL_CLEAR_CERT_FLAGS: + return (s->cert->cert_flags &= ~larg); + + case SSL_CTRL_GET_RAW_CIPHERLIST: + if (parg) { + if (s->cert->ciphers_raw == NULL) + return 0; + *(unsigned char **)parg = s->cert->ciphers_raw; + return (int)s->cert->ciphers_rawlen; + } else + return ssl_put_cipher_by_char(s, NULL, NULL); default: return (s->method->ssl_ctrl(s, cmd, larg, parg)); } @@ -1116,6 +1176,20 @@ LHASH_OF(SSL_SESSION) *SSL_CTX_sessions(SSL_CTX *ctx) long SSL_CTX_ctrl(SSL_CTX *ctx, int cmd, long larg, void *parg) { long l; + /* For some cases with ctx == NULL perform syntax checks */ + if (ctx == NULL) { + switch (cmd) { +#ifndef OPENSSL_NO_EC + case SSL_CTRL_SET_CURVES_LIST: + return tls1_set_curves_list(NULL, NULL, parg); +#endif + case SSL_CTRL_SET_SIGALGS_LIST: + case SSL_CTRL_SET_CLIENT_SIGALGS_LIST: + return tls1_set_sigalgs_list(NULL, parg, 0); + default: + return 0; + } + } switch (cmd) { case SSL_CTRL_GET_READ_AHEAD: @@ -1186,6 +1260,10 @@ long SSL_CTX_ctrl(SSL_CTX *ctx, int cmd, long larg, void *parg) return 0; ctx->max_send_fragment = larg; return 1; + case SSL_CTRL_CERT_FLAGS: + return (ctx->cert->cert_flags |= larg); + case SSL_CTRL_CLEAR_CERT_FLAGS: + return (ctx->cert->cert_flags &= ~larg); default: return (ctx->method->ssl_ctx_ctrl(ctx, cmd, larg, parg)); } @@ -1280,7 +1358,7 @@ int SSL_CTX_set_cipher_list(SSL_CTX *ctx, const char *str) STACK_OF(SSL_CIPHER) *sk; sk = ssl_create_cipher_list(ctx->method, &ctx->cipher_list, - &ctx->cipher_list_by_id, str); + &ctx->cipher_list_by_id, str, ctx->cert); /* * ssl_create_cipher_list may return an empty stack if it was unable to * find a cipher matching the given rule string (for example if the rule @@ -1303,7 +1381,7 @@ int SSL_set_cipher_list(SSL *s, const char *str) STACK_OF(SSL_CIPHER) *sk; sk = ssl_create_cipher_list(s->ctx->method, &s->cipher_list, - &s->cipher_list_by_id, str); + &s->cipher_list_by_id, str, s->cert); /* see comment in SSL_CTX_set_cipher_list */ if (sk == NULL) return 0; @@ -1358,10 +1436,11 @@ int ssl_cipher_list_to_bytes(SSL *s, STACK_OF(SSL_CIPHER) *sk, { int i, j = 0; SSL_CIPHER *c; + CERT *ct = s->cert; unsigned char *q; -#ifndef OPENSSL_NO_KRB5 - int nokrb5 = !kssl_tgt_is_available(s->kssl_ctx); -#endif /* OPENSSL_NO_KRB5 */ + int empty_reneg_info_scsv = !s->renegotiate; + /* Set disabled masks for this session */ + ssl_set_client_disabled(s); if (sk == NULL) return (0); @@ -1371,26 +1450,18 @@ int ssl_cipher_list_to_bytes(SSL *s, STACK_OF(SSL_CIPHER) *sk, for (i = 0; i < sk_SSL_CIPHER_num(sk); i++) { c = sk_SSL_CIPHER_value(sk, i); - /* Skip TLS v1.2 only ciphersuites if lower than v1.2 */ - if ((c->algorithm_ssl & SSL_TLSV1_2) && - (TLS1_get_client_version(s) < TLS1_2_VERSION)) - continue; -#ifndef OPENSSL_NO_KRB5 - if (((c->algorithm_mkey & SSL_kKRB5) - || (c->algorithm_auth & SSL_aKRB5)) && nokrb5) + /* Skip disabled ciphers */ + if (c->algorithm_ssl & ct->mask_ssl || + c->algorithm_mkey & ct->mask_k || c->algorithm_auth & ct->mask_a) continue; -#endif /* OPENSSL_NO_KRB5 */ -#ifndef OPENSSL_NO_PSK - /* with PSK there must be client callback set */ - if (((c->algorithm_mkey & SSL_kPSK) || (c->algorithm_auth & SSL_aPSK)) - && s->psk_client_callback == NULL) - continue; -#endif /* OPENSSL_NO_PSK */ -#ifndef OPENSSL_NO_SRP - if (((c->algorithm_mkey & SSL_kSRP) || (c->algorithm_auth & SSL_aSRP)) - && !(s->srp_ctx.srp_Mask & SSL_kSRP)) - continue; -#endif /* OPENSSL_NO_SRP */ +#ifdef OPENSSL_SSL_DEBUG_BROKEN_PROTOCOL + if (c->id == SSL3_CK_SCSV) { + if (!empty_reneg_info_scsv) + continue; + else + empty_reneg_info_scsv = 0; + } +#endif j = put_cb(c, p); p += j; } @@ -1399,7 +1470,7 @@ int ssl_cipher_list_to_bytes(SSL *s, STACK_OF(SSL_CIPHER) *sk, * applicable SCSVs. */ if (p != q) { - if (!s->renegotiate) { + if (empty_reneg_info_scsv) { static SSL_CIPHER scsv = { 0, NULL, SSL3_CK_SCSV, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; @@ -1410,7 +1481,6 @@ int ssl_cipher_list_to_bytes(SSL *s, STACK_OF(SSL_CIPHER) *sk, "TLS_EMPTY_RENEGOTIATION_INFO_SCSV sent by client\n"); #endif } - if (s->mode & SSL_MODE_SEND_FALLBACK_SCSV) { static SSL_CIPHER scsv = { 0, NULL, SSL3_CK_FALLBACK_SCSV, 0, 0, 0, 0, 0, 0, 0, 0, 0 @@ -1447,6 +1517,15 @@ STACK_OF(SSL_CIPHER) *ssl_bytes_to_cipher_list(SSL *s, unsigned char *p, sk_SSL_CIPHER_zero(sk); } + if (s->cert->ciphers_raw) + OPENSSL_free(s->cert->ciphers_raw); + s->cert->ciphers_raw = BUF_memdup(p, num); + if (s->cert->ciphers_raw == NULL) { + SSLerr(SSL_F_SSL_BYTES_TO_CIPHER_LIST, ERR_R_MALLOC_FAILURE); + goto err; + } + s->cert->ciphers_rawlen = (size_t)num; + for (i = 0; i < num; i += n) { /* Check for TLS_EMPTY_RENEGOTIATION_INFO_SCSV */ if (s->s3 && (n != 3 || !p[0]) && @@ -1530,7 +1609,6 @@ int SSL_get_servername_type(const SSL *s) return -1; } -# ifndef OPENSSL_NO_NEXTPROTONEG /* * SSL_select_next_proto implements the standard protocol selection. It is * expected that this function is called from the callback set by @@ -1590,6 +1668,7 @@ int SSL_select_next_proto(unsigned char **out, unsigned char *outlen, return status; } +# ifndef OPENSSL_NO_NEXTPROTONEG /* * SSL_get0_next_proto_negotiated sets *data and *len to point to the * client's requested protocol for this connection and returns 0. If the @@ -1651,7 +1730,83 @@ void SSL_CTX_set_next_proto_select_cb(SSL_CTX *ctx, ctx->next_proto_select_cb_arg = arg; } # endif -#endif + +/* + * SSL_CTX_set_alpn_protos sets the ALPN protocol list on |ctx| to |protos|. + * |protos| must be in wire-format (i.e. a series of non-empty, 8-bit + * length-prefixed strings). Returns 0 on success. + */ +int SSL_CTX_set_alpn_protos(SSL_CTX *ctx, const unsigned char *protos, + unsigned protos_len) +{ + if (ctx->alpn_client_proto_list) + OPENSSL_free(ctx->alpn_client_proto_list); + + ctx->alpn_client_proto_list = OPENSSL_malloc(protos_len); + if (!ctx->alpn_client_proto_list) + return 1; + memcpy(ctx->alpn_client_proto_list, protos, protos_len); + ctx->alpn_client_proto_list_len = protos_len; + + return 0; +} + +/* + * SSL_set_alpn_protos sets the ALPN protocol list on |ssl| to |protos|. + * |protos| must be in wire-format (i.e. a series of non-empty, 8-bit + * length-prefixed strings). Returns 0 on success. + */ +int SSL_set_alpn_protos(SSL *ssl, const unsigned char *protos, + unsigned protos_len) +{ + if (ssl->alpn_client_proto_list) + OPENSSL_free(ssl->alpn_client_proto_list); + + ssl->alpn_client_proto_list = OPENSSL_malloc(protos_len); + if (!ssl->alpn_client_proto_list) + return 1; + memcpy(ssl->alpn_client_proto_list, protos, protos_len); + ssl->alpn_client_proto_list_len = protos_len; + + return 0; +} + +/* + * SSL_CTX_set_alpn_select_cb sets a callback function on |ctx| that is + * called during ClientHello processing in order to select an ALPN protocol + * from the client's list of offered protocols. + */ +void SSL_CTX_set_alpn_select_cb(SSL_CTX *ctx, + int (*cb) (SSL *ssl, + const unsigned char **out, + unsigned char *outlen, + const unsigned char *in, + unsigned int inlen, + void *arg), void *arg) +{ + ctx->alpn_select_cb = cb; + ctx->alpn_select_cb_arg = arg; +} + +/* + * SSL_get0_alpn_selected gets the selected ALPN protocol (if any) from + * |ssl|. On return it sets |*data| to point to |*len| bytes of protocol name + * (not including the leading length-prefix byte). If the server didn't + * respond with a negotiated protocol then |*len| will be zero. + */ +void SSL_get0_alpn_selected(const SSL *ssl, const unsigned char **data, + unsigned *len) +{ + *data = NULL; + if (ssl->s3) + *data = ssl->s3->alpn_selected; + if (*data == NULL) + *len = 0; + else + *len = ssl->s3->alpn_selected_len; +} + +#endif /* !OPENSSL_NO_TLSEXT */ int SSL_export_keying_material(SSL *s, unsigned char *out, size_t olen, const char *label, size_t llen, @@ -1790,7 +1945,8 @@ SSL_CTX *SSL_CTX_new(const SSL_METHOD *meth) ssl_create_cipher_list(ret->method, &ret->cipher_list, &ret->cipher_list_by_id, meth->version == - SSL2_VERSION ? "SSLv2" : SSL_DEFAULT_CIPHER_LIST); + SSL2_VERSION ? "SSLv2" : SSL_DEFAULT_CIPHER_LIST, + ret->cert); if (ret->cipher_list == NULL || sk_SSL_CIPHER_num(ret->cipher_list) <= 0) { SSLerr(SSL_F_SSL_CTX_NEW, SSL_R_LIBRARY_HAS_NO_CIPHERS); goto err2; @@ -2003,6 +2159,16 @@ void SSL_CTX_free(SSL_CTX *a) if (a->rbuf_freelist) ssl_buf_freelist_free(a->rbuf_freelist); #endif +#ifndef OPENSSL_NO_TLSEXT +# ifndef OPENSSL_NO_EC + if (a->tlsext_ecpointformatlist) + OPENSSL_free(a->tlsext_ecpointformatlist); + if (a->tlsext_ellipticcurvelist) + OPENSSL_free(a->tlsext_ellipticcurvelist); +# endif /* OPENSSL_NO_EC */ + if (a->alpn_client_proto_list != NULL) + OPENSSL_free(a->alpn_client_proto_list); +#endif OPENSSL_free(a); } @@ -2037,6 +2203,17 @@ void SSL_CTX_set_verify_depth(SSL_CTX *ctx, int depth) X509_VERIFY_PARAM_set_depth(ctx->param, depth); } +void SSL_CTX_set_cert_cb(SSL_CTX *c, int (*cb) (SSL *ssl, void *arg), + void *arg) +{ + ssl_cert_set_cert_cb(c->cert, cb, arg); +} + +void SSL_set_cert_cb(SSL *s, int (*cb) (SSL *ssl, void *arg), void *arg) +{ + ssl_cert_set_cert_cb(s->cert, cb, arg); +} + void ssl_set_cert_masks(CERT *c, const SSL_CIPHER *cipher) { CERT_PKEY *cpk; @@ -2076,25 +2253,25 @@ void ssl_set_cert_masks(CERT *c, const SSL_CIPHER *cipher) #endif #ifndef OPENSSL_NO_ECDH - have_ecdh_tmp = (c->ecdh_tmp != NULL || c->ecdh_tmp_cb != NULL); + have_ecdh_tmp = (c->ecdh_tmp || c->ecdh_tmp_cb || c->ecdh_tmp_auto); #endif cpk = &(c->pkeys[SSL_PKEY_RSA_ENC]); - rsa_enc = (cpk->x509 != NULL && cpk->privatekey != NULL); + rsa_enc = cpk->valid_flags & CERT_PKEY_VALID; rsa_enc_export = (rsa_enc && EVP_PKEY_size(cpk->privatekey) * 8 <= kl); cpk = &(c->pkeys[SSL_PKEY_RSA_SIGN]); - rsa_sign = (cpk->x509 != NULL && cpk->privatekey != NULL); + rsa_sign = cpk->valid_flags & CERT_PKEY_SIGN; cpk = &(c->pkeys[SSL_PKEY_DSA_SIGN]); - dsa_sign = (cpk->x509 != NULL && cpk->privatekey != NULL); + dsa_sign = cpk->valid_flags & CERT_PKEY_SIGN; cpk = &(c->pkeys[SSL_PKEY_DH_RSA]); - dh_rsa = (cpk->x509 != NULL && cpk->privatekey != NULL); + dh_rsa = cpk->valid_flags & CERT_PKEY_VALID; dh_rsa_export = (dh_rsa && EVP_PKEY_size(cpk->privatekey) * 8 <= kl); cpk = &(c->pkeys[SSL_PKEY_DH_DSA]); /* FIX THIS EAY EAY EAY */ - dh_dsa = (cpk->x509 != NULL && cpk->privatekey != NULL); + dh_dsa = cpk->valid_flags & CERT_PKEY_VALID; dh_dsa_export = (dh_dsa && EVP_PKEY_size(cpk->privatekey) * 8 <= kl); cpk = &(c->pkeys[SSL_PKEY_ECC]); #ifndef OPENSSL_NO_EC - have_ecc_cert = (cpk->x509 != NULL && cpk->privatekey != NULL); + have_ecc_cert = cpk->valid_flags & CERT_PKEY_VALID; #endif mask_k = 0; mask_a = 0; @@ -2149,6 +2326,9 @@ void ssl_set_cert_masks(CERT *c, const SSL_CIPHER *cipher) if (dh_dsa_export) emask_k |= SSL_kDHd; + if (emask_k & (SSL_kDHr | SSL_kDHd)) + mask_a |= SSL_aDH; + if (rsa_enc || rsa_sign) { mask_a |= SSL_aRSA; emask_a |= SSL_aRSA; @@ -2175,13 +2355,18 @@ void ssl_set_cert_masks(CERT *c, const SSL_CIPHER *cipher) */ #ifndef OPENSSL_NO_EC if (have_ecc_cert) { + cpk = &c->pkeys[SSL_PKEY_ECC]; + x = cpk->x509; /* This call populates extension flags (ex_flags) */ - x = (c->pkeys[SSL_PKEY_ECC]).x509; X509_check_purpose(x, -1, 0); +# ifndef OPENSSL_NO_ECDH ecdh_ok = (x->ex_flags & EXFLAG_KUSAGE) ? (x->ex_kusage & X509v3_KU_KEY_AGREEMENT) : 1; +# endif ecdsa_ok = (x->ex_flags & EXFLAG_KUSAGE) ? (x->ex_kusage & X509v3_KU_DIGITAL_SIGNATURE) : 1; + if (!(cpk->valid_flags & CERT_PKEY_SIGN)) + ecdsa_ok = 0; ecc_pkey = X509_get_pubkey(x); ecc_pkey_size = (ecc_pkey != NULL) ? EVP_PKEY_bits(ecc_pkey) : 0; EVP_PKEY_free(ecc_pkey); @@ -2189,7 +2374,7 @@ void ssl_set_cert_masks(CERT *c, const SSL_CIPHER *cipher) signature_nid = OBJ_obj2nid(x->sig_alg->algorithm); OBJ_find_sigid_algs(signature_nid, &md_nid, &pk_nid); } -#ifndef OPENSSL_NO_ECDH +# ifndef OPENSSL_NO_ECDH if (ecdh_ok) { if (pk_nid == NID_rsaEncryption || pk_nid == NID_rsa) { @@ -2210,15 +2395,16 @@ void ssl_set_cert_masks(CERT *c, const SSL_CIPHER *cipher) } } } -#endif -#ifndef OPENSSL_NO_ECDSA +# endif +# ifndef OPENSSL_NO_ECDSA if (ecdsa_ok) { mask_a |= SSL_aECDSA; emask_a |= SSL_aECDSA; } -#endif +# endif } #endif + #ifndef OPENSSL_NO_ECDH if (have_ecdh_tmp) { mask_k |= SSL_kEECDH; @@ -2313,65 +2499,44 @@ int ssl_check_srvr_ecc_cert_and_alg(X509 *x, SSL *s) #endif -/* THIS NEEDS CLEANING UP */ +static int ssl_get_server_cert_index(const SSL *s) +{ + int idx; + idx = ssl_cipher_get_cert_index(s->s3->tmp.new_cipher); + if (idx == SSL_PKEY_RSA_ENC && !s->cert->pkeys[SSL_PKEY_RSA_ENC].x509) + idx = SSL_PKEY_RSA_SIGN; + if (idx == -1) + SSLerr(SSL_F_SSL_GET_SERVER_CERT_INDEX, ERR_R_INTERNAL_ERROR); + return idx; +} + CERT_PKEY *ssl_get_server_send_pkey(const SSL *s) { - unsigned long alg_k, alg_a; CERT *c; int i; c = s->cert; + if (!s->s3 || !s->s3->tmp.new_cipher) + return NULL; ssl_set_cert_masks(c, s->s3->tmp.new_cipher); - alg_k = s->s3->tmp.new_cipher->algorithm_mkey; - alg_a = s->s3->tmp.new_cipher->algorithm_auth; - - if (alg_k & (SSL_kECDHr | SSL_kECDHe)) { - /* - * we don't need to look at SSL_kEECDH since no certificate is needed - * for anon ECDH and for authenticated EECDH, the check for the auth - * algorithm will set i correctly NOTE: For ECDH-RSA, we need an ECC - * not an RSA cert but for EECDH-RSA we need an RSA cert. Placing the - * checks for SSL_kECDH before RSA checks ensures the correct cert is - * chosen. - */ - i = SSL_PKEY_ECC; - } else if (alg_a & SSL_aECDSA) { - i = SSL_PKEY_ECC; - } else if (alg_k & SSL_kDHr) - i = SSL_PKEY_DH_RSA; - else if (alg_k & SSL_kDHd) - i = SSL_PKEY_DH_DSA; - else if (alg_a & SSL_aDSS) - i = SSL_PKEY_DSA_SIGN; - else if (alg_a & SSL_aRSA) { - if (c->pkeys[SSL_PKEY_RSA_ENC].x509 == NULL) - i = SSL_PKEY_RSA_SIGN; - else - i = SSL_PKEY_RSA_ENC; - } else if (alg_a & SSL_aKRB5) { - /* VRS something else here? */ - return (NULL); - } else if (alg_a & SSL_aGOST94) - i = SSL_PKEY_GOST94; - else if (alg_a & SSL_aGOST01) - i = SSL_PKEY_GOST01; - else { /* if (alg_a & SSL_aNULL) */ - - SSLerr(SSL_F_SSL_GET_SERVER_SEND_PKEY, ERR_R_INTERNAL_ERROR); - return (NULL); - } +#ifdef OPENSSL_SSL_DEBUG_BROKEN_PROTOCOL + /* + * Broken protocol test: return last used certificate: which may mismatch + * the one expected. + */ + if (c->cert_flags & SSL_CERT_FLAG_BROKEN_PROTOCOL) + return c->key; +#endif - return c->pkeys + i; -} + i = ssl_get_server_cert_index(s); -X509 *ssl_get_server_send_cert(const SSL *s) -{ - CERT_PKEY *cpk; - cpk = ssl_get_server_send_pkey(s); - if (!cpk) + /* This may or may not be an error. */ + if (i < 0) return NULL; - return cpk->x509; + + /* May be NULL. */ + return &c->pkeys[i]; } EVP_PKEY *ssl_get_sign_pkey(SSL *s, const SSL_CIPHER *cipher, @@ -2384,8 +2549,18 @@ EVP_PKEY *ssl_get_sign_pkey(SSL *s, const SSL_CIPHER *cipher, alg_a = cipher->algorithm_auth; c = s->cert; +#ifdef OPENSSL_SSL_DEBUG_BROKEN_PROTOCOL + /* + * Broken protocol test: use last key: which may mismatch the one + * expected. + */ + if (c->cert_flags & SSL_CERT_FLAG_BROKEN_PROTOCOL) + idx = c->key - c->pkeys; + else +#endif + if ((alg_a & SSL_aDSS) && - (c->pkeys[SSL_PKEY_DSA_SIGN].privatekey != NULL)) + (c->pkeys[SSL_PKEY_DSA_SIGN].privatekey != NULL)) idx = SSL_PKEY_DSA_SIGN; else if (alg_a & SSL_aRSA) { if (c->pkeys[SSL_PKEY_RSA_SIGN].privatekey != NULL) @@ -2404,6 +2579,28 @@ EVP_PKEY *ssl_get_sign_pkey(SSL *s, const SSL_CIPHER *cipher, return c->pkeys[idx].privatekey; } +#ifndef OPENSSL_NO_TLSEXT +int ssl_get_server_cert_serverinfo(SSL *s, const unsigned char **serverinfo, + size_t *serverinfo_length) +{ + CERT *c = NULL; + int i = 0; + *serverinfo_length = 0; + + c = s->cert; + i = ssl_get_server_cert_index(s); + + if (i == -1) + return 0; + if (c->pkeys[i].serverinfo == NULL) + return 0; + + *serverinfo = c->pkeys[i].serverinfo; + *serverinfo_length = c->pkeys[i].serverinfo_length; + return 1; +} +#endif + void ssl_update_cache(SSL *s, int mode) { int i; @@ -2435,6 +2632,11 @@ void ssl_update_cache(SSL *s, int mode) } } +const SSL_METHOD *SSL_CTX_get_ssl_method(SSL_CTX *ctx) +{ + return ctx->method; +} + const SSL_METHOD *SSL_get_ssl_method(SSL *s) { return (s->method); @@ -2780,7 +2982,6 @@ void ssl_clear_cipher_ctx(SSL *s) #endif } -/* Fix this function so that it takes an optional type parameter */ X509 *SSL_get_certificate(const SSL *s) { if (s->cert != NULL) @@ -2789,8 +2990,7 @@ X509 *SSL_get_certificate(const SSL *s) return (NULL); } -/* Fix this function so that it takes an optional type parameter */ -EVP_PKEY *SSL_get_privatekey(SSL *s) +EVP_PKEY *SSL_get_privatekey(const SSL *s) { if (s->cert != NULL) return (s->cert->key->privatekey); @@ -2798,6 +2998,22 @@ EVP_PKEY *SSL_get_privatekey(SSL *s) return (NULL); } +X509 *SSL_CTX_get0_certificate(const SSL_CTX *ctx) +{ + if (ctx->cert != NULL) + return ctx->cert->key->x509; + else + return NULL; +} + +EVP_PKEY *SSL_CTX_get0_privatekey(const SSL_CTX *ctx) +{ + if (ctx->cert != NULL) + return ctx->cert->key->privatekey; + else + return NULL; +} + const SSL_CIPHER *SSL_get_current_cipher(const SSL *s) { if ((s->session != NULL) && (s->session->cipher != NULL)) @@ -2929,13 +3145,15 @@ SSL_CTX *SSL_set_SSL_CTX(SSL *ssl, SSL_CTX *ctx) ctx = ssl->initial_ctx; #endif ssl->cert = ssl_cert_dup(ctx->cert); - if (ocert != NULL) { - int i; - /* Copy negotiated digests from original */ - for (i = 0; i < SSL_PKEY_NUM; i++) { - CERT_PKEY *cpk = ocert->pkeys + i; - CERT_PKEY *rpk = ssl->cert->pkeys + i; - rpk->digest = cpk->digest; + if (ocert) { + /* Preserve any already negotiated parameters */ + if (ssl->server) { + ssl->cert->peer_sigalgs = ocert->peer_sigalgs; + ssl->cert->peer_sigalgslen = ocert->peer_sigalgslen; + ocert->peer_sigalgs = NULL; + ssl->cert->ciphers_raw = ocert->ciphers_raw; + ssl->cert->ciphers_rawlen = ocert->ciphers_rawlen; + ocert->ciphers_raw = NULL; } ssl_cert_free(ocert); } @@ -3302,6 +3520,11 @@ int SSL_cache_hit(SSL *s) return s->hit; } +int SSL_is_server(SSL *s) +{ + return s->server; +} + #if defined(_WINDLL) && defined(OPENSSL_SYS_WIN16) # include "../crypto/bio/bss_file.c" #endif diff --git a/deps/openssl/openssl/ssl/ssl_locl.h b/deps/openssl/openssl/ssl/ssl_locl.h index aff3b65d1702ba..79b85b9ed947de 100644 --- a/deps/openssl/openssl/ssl/ssl_locl.h +++ b/deps/openssl/openssl/ssl/ssl_locl.h @@ -291,13 +291,13 @@ /* RSA key exchange */ # define SSL_kRSA 0x00000001L /* DH cert, RSA CA cert */ -/* no such ciphersuites supported! */ # define SSL_kDHr 0x00000002L /* DH cert, DSA CA cert */ -/* no such ciphersuite supported! */ # define SSL_kDHd 0x00000004L /* tmp DH key no DH cert */ # define SSL_kEDH 0x00000008L +/* forward-compatible synonym */ +# define SSL_kDHE SSL_kEDH /* Kerberos5 key exchange */ # define SSL_kKRB5 0x00000010L /* ECDH cert, RSA CA cert */ @@ -306,6 +306,8 @@ # define SSL_kECDHe 0x00000040L /* ephemeral ECDH */ # define SSL_kEECDH 0x00000080L +/* forward-compatible synonym */ +# define SSL_kECDHE SSL_kEECDH /* PSK */ # define SSL_kPSK 0x00000100L /* GOST key exchange */ @@ -321,7 +323,6 @@ /* no auth (i.e. use ADH or AECDH) */ # define SSL_aNULL 0x00000004L /* Fixed DH auth (kDHd or kDHr) */ -/* no such ciphersuites supported! */ # define SSL_aDH 0x00000008L /* Fixed ECDH auth (kECDHe or kECDHr) */ # define SSL_aECDH 0x00000010L @@ -465,6 +466,31 @@ (c)->algo_strength) # define SSL_C_EXPORT_PKEYLENGTH(c) SSL_EXPORT_PKEYLENGTH((c)->algo_strength) +/* Check if an SSL structure is using DTLS */ +# define SSL_IS_DTLS(s) (s->method->ssl3_enc->enc_flags & SSL_ENC_FLAG_DTLS) +/* See if we need explicit IV */ +# define SSL_USE_EXPLICIT_IV(s) \ + (s->method->ssl3_enc->enc_flags & SSL_ENC_FLAG_EXPLICIT_IV) +/* + * See if we use signature algorithms extension and signature algorithm + * before signatures. + */ +# define SSL_USE_SIGALGS(s) \ + (s->method->ssl3_enc->enc_flags & SSL_ENC_FLAG_SIGALGS) +/* + * Allow TLS 1.2 ciphersuites: applies to DTLS 1.2 as well as TLS 1.2: may + * apply to others in future. + */ +# define SSL_USE_TLS1_2_CIPHERS(s) \ + (s->method->ssl3_enc->enc_flags & SSL_ENC_FLAG_TLS1_2_CIPHERS) +/* + * Determine if a client can use TLS 1.2 ciphersuites: can't rely on method + * flags because it may not be set to correct version yet. + */ +# define SSL_CLIENT_USE_TLS1_2_CIPHERS(s) \ + ((SSL_IS_DTLS(s) && s->client_version <= DTLS1_2_VERSION) || \ + (!SSL_IS_DTLS(s) && s->client_version >= TLS1_2_VERSION)) + /* Mostly for SSLv3 */ # define SSL_PKEY_RSA_ENC 0 # define SSL_PKEY_RSA_SIGN 1 @@ -505,7 +531,63 @@ typedef struct cert_pkey_st { EVP_PKEY *privatekey; /* Digest to use when signing */ const EVP_MD *digest; + /* Chain for this certificate */ + STACK_OF(X509) *chain; +# ifndef OPENSSL_NO_TLSEXT + /*- + * serverinfo data for this certificate. The data is in TLS Extension + * wire format, specifically it's a series of records like: + * uint16_t extension_type; // (RFC 5246, 7.4.1.4, Extension) + * uint16_t length; + * uint8_t data[length]; + */ + unsigned char *serverinfo; + size_t serverinfo_length; +# endif + /* + * Set if CERT_PKEY can be used with current SSL session: e.g. + * appropriate curve, signature algorithms etc. If zero it can't be used + * at all. + */ + int valid_flags; } CERT_PKEY; +/* Retrieve Suite B flags */ +# define tls1_suiteb(s) (s->cert->cert_flags & SSL_CERT_FLAG_SUITEB_128_LOS) +/* Uses to check strict mode: suite B modes are always strict */ +# define SSL_CERT_FLAGS_CHECK_TLS_STRICT \ + (SSL_CERT_FLAG_SUITEB_128_LOS|SSL_CERT_FLAG_TLS_STRICT) + +typedef struct { + unsigned short ext_type; + /* + * Per-connection flags relating to this extension type: not used if + * part of an SSL_CTX structure. + */ + unsigned short ext_flags; + custom_ext_add_cb add_cb; + custom_ext_free_cb free_cb; + void *add_arg; + custom_ext_parse_cb parse_cb; + void *parse_arg; +} custom_ext_method; + +/* ext_flags values */ + +/* + * Indicates an extension has been received. Used to check for unsolicited or + * duplicate extensions. + */ +# define SSL_EXT_FLAG_RECEIVED 0x1 +/* + * Indicates an extension has been sent: used to enable sending of + * corresponding ServerHello extension. + */ +# define SSL_EXT_FLAG_SENT 0x2 + +typedef struct { + custom_ext_method *meths; + size_t meths_count; +} custom_ext_methods; typedef struct cert_st { /* Current active set */ @@ -516,14 +598,17 @@ typedef struct cert_st { */ CERT_PKEY *key; /* - * The following masks are for the key and auth algorithms that are - * supported by the certs below + * For servers the following masks are for the key and auth algorithms + * that are supported by the certs below. For clients they are masks of + * *disabled* algorithms based on the current session. */ int valid; unsigned long mask_k; unsigned long mask_a; unsigned long export_mask_k; unsigned long export_mask_a; + /* Client only */ + unsigned long mask_ssl; # ifndef OPENSSL_NO_RSA RSA *rsa_tmp; RSA *(*rsa_tmp_cb) (SSL *ssl, int is_export, int keysize); @@ -536,8 +621,71 @@ typedef struct cert_st { EC_KEY *ecdh_tmp; /* Callback for generating ephemeral ECDH keys */ EC_KEY *(*ecdh_tmp_cb) (SSL *ssl, int is_export, int keysize); + /* Select ECDH parameters automatically */ + int ecdh_tmp_auto; # endif + /* Flags related to certificates */ + unsigned int cert_flags; CERT_PKEY pkeys[SSL_PKEY_NUM]; + /* + * Certificate types (received or sent) in certificate request message. + * On receive this is only set if number of certificate types exceeds + * SSL3_CT_NUMBER. + */ + unsigned char *ctypes; + size_t ctype_num; + /* + * signature algorithms peer reports: e.g. supported signature algorithms + * extension for server or as part of a certificate request for client. + */ + unsigned char *peer_sigalgs; + /* Size of above array */ + size_t peer_sigalgslen; + /* + * suppported signature algorithms. When set on a client this is sent in + * the client hello as the supported signature algorithms extension. For + * servers it represents the signature algorithms we are willing to use. + */ + unsigned char *conf_sigalgs; + /* Size of above array */ + size_t conf_sigalgslen; + /* + * Client authentication signature algorithms, if not set then uses + * conf_sigalgs. On servers these will be the signature algorithms sent + * to the client in a cerificate request for TLS 1.2. On a client this + * represents the signature algortithms we are willing to use for client + * authentication. + */ + unsigned char *client_sigalgs; + /* Size of above array */ + size_t client_sigalgslen; + /* + * Signature algorithms shared by client and server: cached because these + * are used most often. + */ + TLS_SIGALGS *shared_sigalgs; + size_t shared_sigalgslen; + /* + * Certificate setup callback: if set is called whenever a certificate + * may be required (client or server). the callback can then examine any + * appropriate parameters and setup any certificates required. This + * allows advanced applications to select certificates on the fly: for + * example based on supported signature algorithms or curves. + */ + int (*cert_cb) (SSL *ssl, void *arg); + void *cert_cb_arg; + /* + * Optional X509_STORE for chain building or certificate validation If + * NULL the parent SSL_CTX store is used instead. + */ + X509_STORE *chain_store; + X509_STORE *verify_store; + /* Raw values of the cipher list from a client */ + unsigned char *ciphers_raw; + size_t ciphers_rawlen; + /* Custom extension methods for server and client */ + custom_ext_methods cli_ext; + custom_ext_methods srv_ext; int references; /* >1 only if SSL_copy_session_id is used */ } CERT; @@ -563,6 +711,18 @@ typedef struct sess_cert_st { # endif int references; /* actually always 1 at the moment */ } SESS_CERT; +/* Structure containing decoded values of signature algorithms extension */ +struct tls_sigalgs_st { + /* NID of hash algorithm */ + int hash_nid; + /* NID of signature algorithm */ + int sign_nid; + /* Combined hash and signature NID */ + int signandhash_nid; + /* Raw values used in extension */ + unsigned char rsign; + unsigned char rhash; +}; /* * #define MAC_DEBUG @@ -596,8 +756,6 @@ typedef struct sess_cert_st { # define FP_ICC (int (*)(const void *,const void *)) # define ssl_put_cipher_by_char(ssl,ciph,ptr) \ ((ssl)->method->put_cipher_by_char((ciph),(ptr))) -# define ssl_get_cipher_by_char(ssl,ptr) \ - ((ssl)->method->get_cipher_by_char(ptr)) /* * This is for the SSLv3/TLSv1.0 differences in crypto/hash stuff It is a bit @@ -622,8 +780,39 @@ typedef struct ssl3_enc_method { const char *, size_t, const unsigned char *, size_t, int use_context); + /* Various flags indicating protocol version requirements */ + unsigned int enc_flags; + /* Handshake header length */ + unsigned int hhlen; + /* Set the handshake header */ + void (*set_handshake_header) (SSL *s, int type, unsigned long len); + /* Write out handshake message */ + int (*do_write) (SSL *s); } SSL3_ENC_METHOD; +# define SSL_HM_HEADER_LENGTH(s) s->method->ssl3_enc->hhlen +# define ssl_handshake_start(s) \ + (((unsigned char *)s->init_buf->data) + s->method->ssl3_enc->hhlen) +# define ssl_set_handshake_header(s, htype, len) \ + s->method->ssl3_enc->set_handshake_header(s, htype, len) +# define ssl_do_write(s) s->method->ssl3_enc->do_write(s) + +/* Values for enc_flags */ + +/* Uses explicit IV for CBC mode */ +# define SSL_ENC_FLAG_EXPLICIT_IV 0x1 +/* Uses signature algorithms extension */ +# define SSL_ENC_FLAG_SIGALGS 0x2 +/* Uses SHA256 default PRF */ +# define SSL_ENC_FLAG_SHA256_PRF 0x4 +/* Is DTLS */ +# define SSL_ENC_FLAG_DTLS 0x8 +/* + * Allow TLS 1.2 ciphersuites: applies to DTLS 1.2 as well as TLS 1.2: may + * apply to others in future. + */ +# define SSL_ENC_FLAG_TLS1_2_CIPHERS 0x10 + # ifndef OPENSSL_NO_COMP /* Used for holding the relevant compression methods loaded into SSL_CTX */ typedef struct ssl3_comp_st { @@ -653,13 +842,14 @@ OPENSSL_EXTERN SSL_CIPHER ssl3_ciphers[]; SSL_METHOD *ssl_bad_method(int ver); extern SSL3_ENC_METHOD TLSv1_enc_data; +extern SSL3_ENC_METHOD TLSv1_1_enc_data; +extern SSL3_ENC_METHOD TLSv1_2_enc_data; extern SSL3_ENC_METHOD SSLv3_enc_data; extern SSL3_ENC_METHOD DTLSv1_enc_data; - -# define SSL_IS_DTLS(s) (s->method->version == DTLS1_VERSION) +extern SSL3_ENC_METHOD DTLSv1_2_enc_data; # define IMPLEMENT_tls_meth_func(version, func_name, s_accept, s_connect, \ - s_get_meth) \ + s_get_meth, enc_data) \ const SSL_METHOD *func_name(void) \ { \ static const SSL_METHOD func_name##_data= { \ @@ -688,7 +878,7 @@ const SSL_METHOD *func_name(void) \ ssl3_get_cipher, \ s_get_meth, \ tls1_default_timeout, \ - &TLSv1_enc_data, \ + &enc_data, \ ssl_undefined_void_function, \ ssl3_callback_ctrl, \ ssl3_ctx_callback_ctrl, \ @@ -762,7 +952,7 @@ const SSL_METHOD *func_name(void) \ ssl23_get_cipher, \ s_get_meth, \ ssl23_default_timeout, \ - &ssl3_undef_enc_method, \ + &TLSv1_2_enc_data, \ ssl_undefined_void_function, \ ssl3_callback_ctrl, \ ssl3_ctx_callback_ctrl, \ @@ -807,11 +997,12 @@ const SSL_METHOD *func_name(void) \ return &func_name##_data; \ } -# define IMPLEMENT_dtls1_meth_func(func_name, s_accept, s_connect, s_get_meth) \ +# define IMPLEMENT_dtls1_meth_func(version, func_name, s_accept, s_connect, \ + s_get_meth, enc_data) \ const SSL_METHOD *func_name(void) \ { \ static const SSL_METHOD func_name##_data= { \ - DTLS1_VERSION, \ + version, \ dtls1_new, \ dtls1_clear, \ dtls1_free, \ @@ -836,7 +1027,7 @@ const SSL_METHOD *func_name(void) \ dtls1_get_cipher, \ s_get_meth, \ dtls1_default_timeout, \ - &DTLSv1_enc_data, \ + &enc_data, \ ssl_undefined_void_function, \ ssl3_callback_ctrl, \ ssl3_ctx_callback_ctrl, \ @@ -857,7 +1048,9 @@ void ssl_clear_cipher_ctx(SSL *s); int ssl_clear_bad_session(SSL *s); CERT *ssl_cert_new(void); CERT *ssl_cert_dup(CERT *cert); +void ssl_cert_set_default_md(CERT *cert); int ssl_cert_inst(CERT **o); +void ssl_cert_clear_certs(CERT *c); void ssl_cert_free(CERT *c); SESS_CERT *ssl_sess_cert_new(void); void ssl_sess_cert_free(SESS_CERT *sc); @@ -879,18 +1072,36 @@ int ssl_cipher_list_to_bytes(SSL *s, STACK_OF(SSL_CIPHER) *sk, STACK_OF(SSL_CIPHER) *ssl_create_cipher_list(const SSL_METHOD *meth, STACK_OF(SSL_CIPHER) **pref, STACK_OF(SSL_CIPHER) **sorted, - const char *rule_str); + const char *rule_str, CERT *c); void ssl_update_cache(SSL *s, int mode); int ssl_cipher_get_evp(const SSL_SESSION *s, const EVP_CIPHER **enc, const EVP_MD **md, int *mac_pkey_type, int *mac_secret_size, SSL_COMP **comp); int ssl_get_handshake_digest(int i, long *mask, const EVP_MD **md); +int ssl_cipher_get_cert_index(const SSL_CIPHER *c); +const SSL_CIPHER *ssl_get_cipher_by_char(SSL *ssl, const unsigned char *ptr); +int ssl_cert_set0_chain(CERT *c, STACK_OF(X509) *chain); +int ssl_cert_set1_chain(CERT *c, STACK_OF(X509) *chain); +int ssl_cert_add0_chain_cert(CERT *c, X509 *x); +int ssl_cert_add1_chain_cert(CERT *c, X509 *x); +int ssl_cert_select_current(CERT *c, X509 *x); +int ssl_cert_set_current(CERT *c, long arg); +X509 *ssl_cert_get0_next_certificate(CERT *c, int first); +void ssl_cert_set_cert_cb(CERT *c, int (*cb) (SSL *ssl, void *arg), + void *arg); + int ssl_verify_cert_chain(SSL *s, STACK_OF(X509) *sk); +int ssl_add_cert_chain(SSL *s, CERT_PKEY *cpk, unsigned long *l); +int ssl_build_cert_chain(CERT *c, X509_STORE *chain_store, int flags); +int ssl_cert_set_cert_store(CERT *c, X509_STORE *store, int chain, int ref); int ssl_undefined_function(SSL *s); int ssl_undefined_void_function(void); int ssl_undefined_const_function(const SSL *s); CERT_PKEY *ssl_get_server_send_pkey(const SSL *s); -X509 *ssl_get_server_send_cert(const SSL *); +# ifndef OPENSSL_NO_TLSEXT +int ssl_get_server_cert_serverinfo(SSL *s, const unsigned char **serverinfo, + size_t *serverinfo_length); +# endif EVP_PKEY *ssl_get_sign_pkey(SSL *s, const SSL_CIPHER *c, const EVP_MD **pmd); int ssl_cert_type(X509 *x, EVP_PKEY *pkey); void ssl_set_cert_masks(CERT *c, const SSL_CIPHER *cipher); @@ -961,7 +1172,7 @@ void ssl3_finish_mac(SSL *s, const unsigned char *buf, int len); int ssl3_enc(SSL *s, int send_data); int n_ssl3_mac(SSL *ssl, unsigned char *md, int send_data); void ssl3_free_digest_list(SSL *s); -unsigned long ssl3_output_cert_chain(SSL *s, X509 *x); +unsigned long ssl3_output_cert_chain(SSL *s, CERT_PKEY *cpk); SSL_CIPHER *ssl3_choose_cipher(SSL *ssl, STACK_OF(SSL_CIPHER) *clnt, STACK_OF(SSL_CIPHER) *srvr); int ssl3_setup_buffers(SSL *s); @@ -989,6 +1200,9 @@ void ssl3_record_sequence_update(unsigned char *seq); int ssl3_do_change_cipher_spec(SSL *ssl); long ssl3_default_timeout(void); +void ssl3_set_handshake_header(SSL *s, int htype, unsigned long len); +int ssl3_handshake_write(SSL *s); + int ssl23_num_ciphers(void); const SSL_CIPHER *ssl23_get_cipher(unsigned int u); int ssl23_read(SSL *s, void *buf, int len); @@ -1017,7 +1231,6 @@ int dtls1_write_bytes(SSL *s, int type, const void *buf, int len); int dtls1_send_change_cipher_spec(SSL *s, int a, int b); int dtls1_send_finished(SSL *s, int a, int b, const char *sender, int slen); -unsigned long dtls1_output_cert_chain(SSL *s, X509 *x); int dtls1_read_failed(SSL *s, int code); int dtls1_buffer_message(SSL *s, int ccs); int dtls1_retransmit_message(SSL *s, unsigned short seq, @@ -1064,9 +1277,6 @@ int ssl3_send_next_proto(SSL *s); # endif int dtls1_client_hello(SSL *s); -int dtls1_send_client_certificate(SSL *s); -int dtls1_send_client_key_exchange(SSL *s); -int dtls1_send_client_verify(SSL *s); /* some server-only functions */ int ssl3_get_client_hello(SSL *s); @@ -1075,7 +1285,6 @@ int ssl3_send_hello_request(SSL *s); int ssl3_send_server_key_exchange(SSL *s); int ssl3_send_certificate_request(SSL *s); int ssl3_send_server_done(SSL *s); -int ssl3_check_client_hello(SSL *s); int ssl3_get_client_certificate(SSL *s); int ssl3_get_client_key_exchange(SSL *s); int ssl3_get_cert_verify(SSL *s); @@ -1083,13 +1292,6 @@ int ssl3_get_cert_verify(SSL *s); int ssl3_get_next_proto(SSL *s); # endif -int dtls1_send_hello_request(SSL *s); -int dtls1_send_server_hello(SSL *s); -int dtls1_send_server_certificate(SSL *s); -int dtls1_send_server_key_exchange(SSL *s); -int dtls1_send_certificate_request(SSL *s); -int dtls1_send_server_done(SSL *s); - int ssl23_accept(SSL *s); int ssl23_connect(SSL *s); int ssl23_read_bytes(SSL *s, int n); @@ -1114,7 +1316,6 @@ int dtls1_get_record(SSL *s); int do_dtls1_write(SSL *s, int type, const unsigned char *buf, unsigned int len, int create_empty_fragement); int dtls1_dispatch_alert(SSL *s); -int dtls1_enc(SSL *s, int snd); int ssl_init_wbio_buffer(SSL *s, int push); void ssl_free_wbio_buffer(SSL *s); @@ -1145,22 +1346,33 @@ SSL_COMP *ssl3_comp_find(STACK_OF(SSL_COMP) *sk, int n); # ifndef OPENSSL_NO_EC int tls1_ec_curve_id2nid(int curve_id); int tls1_ec_nid2curve_id(int nid); +int tls1_check_curve(SSL *s, const unsigned char *p, size_t len); +int tls1_shared_curve(SSL *s, int nmatch); +int tls1_set_curves(unsigned char **pext, size_t *pextlen, + int *curves, size_t ncurves); +int tls1_set_curves_list(unsigned char **pext, size_t *pextlen, + const char *str); +# ifndef OPENSSL_NO_ECDH +int tls1_check_ec_tmp_key(SSL *s, unsigned long id); +# endif /* OPENSSL_NO_ECDH */ # endif /* OPENSSL_NO_EC */ # ifndef OPENSSL_NO_TLSEXT +int tls1_shared_list(SSL *s, + const unsigned char *l1, size_t l1len, + const unsigned char *l2, size_t l2len, int nmatch); unsigned char *ssl_add_clienthello_tlsext(SSL *s, unsigned char *buf, - unsigned char *limit); + unsigned char *limit, int *al); unsigned char *ssl_add_serverhello_tlsext(SSL *s, unsigned char *buf, - unsigned char *limit); + unsigned char *limit, int *al); int ssl_parse_clienthello_tlsext(SSL *s, unsigned char **data, - unsigned char *d, int n, int *al); + unsigned char *d, int n); +int tls1_set_server_sigalgs(SSL *s); +int ssl_check_clienthello_tlsext_late(SSL *s); int ssl_parse_serverhello_tlsext(SSL *s, unsigned char **data, - unsigned char *d, int n, int *al); + unsigned char *d, int n); int ssl_prepare_clienthello_tlsext(SSL *s); int ssl_prepare_serverhello_tlsext(SSL *s); -int ssl_check_clienthello_tlsext_early(SSL *s); -int ssl_check_clienthello_tlsext_late(SSL *s); -int ssl_check_serverhello_tlsext(SSL *s); # ifndef OPENSSL_NO_HEARTBEATS int tls1_heartbeat(SSL *s); @@ -1182,6 +1394,12 @@ int tls12_get_sigandhash(unsigned char *p, const EVP_PKEY *pk, int tls12_get_sigid(const EVP_PKEY *pk); const EVP_MD *tls12_get_hash(unsigned char hash_alg); +int tls1_set_sigalgs_list(CERT *c, const char *str, int client); +int tls1_set_sigalgs(CERT *c, const int *salg, size_t salglen, int client); +int tls1_check_chain(SSL *s, X509 *x, EVP_PKEY *pk, STACK_OF(X509) *chain, + int idx); +void tls1_set_cert_validity(SSL *s); + # endif EVP_MD_CTX *ssl_replace_hash(EVP_MD_CTX **hash, const EVP_MD *md); void ssl_clear_hash_ctx(EVP_MD_CTX **hash); @@ -1194,8 +1412,12 @@ int ssl_add_clienthello_renegotiate_ext(SSL *s, unsigned char *p, int *len, int ssl_parse_clienthello_renegotiate_ext(SSL *s, unsigned char *d, int len, int *al); long ssl_get_algorithm2(SSL *s); -int tls1_process_sigalgs(SSL *s, const unsigned char *data, int dsize); -int tls12_get_req_sig_algs(SSL *s, unsigned char *p); +int tls1_save_sigalgs(SSL *s, const unsigned char *data, int dsize); +int tls1_process_sigalgs(SSL *s); +size_t tls12_get_psigalgs(SSL *s, const unsigned char **psigs); +int tls12_check_peer_sigalg(const EVP_MD **pmd, SSL *s, + const unsigned char *sig, EVP_PKEY *pkey); +void ssl_set_client_disabled(SSL *s); int ssl_add_clienthello_use_srtp_ext(SSL *s, unsigned char *p, int *len, int maxlen); @@ -1233,6 +1455,19 @@ void tls_fips_digest_extra(const EVP_CIPHER_CTX *cipher_ctx, int srp_verify_server_param(SSL *s, int *al); +/* t1_ext.c */ + +void custom_ext_init(custom_ext_methods *meths); + +int custom_ext_parse(SSL *s, int server, + unsigned int ext_type, + const unsigned char *ext_data, size_t ext_size, int *al); +int custom_ext_add(SSL *s, int server, + unsigned char **pret, unsigned char *limit, int *al); + +int custom_exts_copy(custom_ext_methods *dst, const custom_ext_methods *src); +void custom_exts_free(custom_ext_methods *exts); + # else # define ssl_init_wbio_buffer SSL_test_functions()->p_ssl_init_wbio_buffer diff --git a/deps/openssl/openssl/ssl/ssl_rsa.c b/deps/openssl/openssl/ssl/ssl_rsa.c index daf15dd97e1cf1..b1b2318350a52a 100644 --- a/deps/openssl/openssl/ssl/ssl_rsa.c +++ b/deps/openssl/openssl/ssl/ssl_rsa.c @@ -171,8 +171,22 @@ int SSL_use_RSAPrivateKey(SSL *ssl, RSA *rsa) static int ssl_set_pkey(CERT *c, EVP_PKEY *pkey) { int i; - - i = ssl_cert_type(NULL, pkey); + /* + * Special case for DH: check two DH certificate types for a match. This + * means for DH certificates we must set the certificate first. + */ + if (pkey->type == EVP_PKEY_DH) { + X509 *x; + i = -1; + x = c->pkeys[SSL_PKEY_DH_RSA].x509; + if (x && X509_check_private_key(x, pkey)) + i = SSL_PKEY_DH_RSA; + x = c->pkeys[SSL_PKEY_DH_DSA].x509; + if (i == -1 && x && X509_check_private_key(x, pkey)) + i = SSL_PKEY_DH_DSA; + ERR_clear_error(); + } else + i = ssl_cert_type(NULL, pkey); if (i < 0) { SSLerr(SSL_F_SSL_SET_PKEY, SSL_R_UNKNOWN_CERTIFICATE_TYPE); return (0); @@ -690,16 +704,13 @@ int SSL_CTX_use_certificate_chain_file(SSL_CTX *ctx, const char *file) int r; unsigned long err; - if (ctx->extra_certs != NULL) { - sk_X509_pop_free(ctx->extra_certs, X509_free); - ctx->extra_certs = NULL; - } + SSL_CTX_clear_chain_certs(ctx); while ((ca = PEM_read_bio_X509(in, NULL, ctx->default_passwd_callback, ctx->default_passwd_callback_userdata)) != NULL) { - r = SSL_CTX_add_extra_chain_cert(ctx, ca); + r = SSL_CTX_add0_chain_cert(ctx, ca); if (!r) { X509_free(ca); ret = 0; @@ -728,3 +739,270 @@ int SSL_CTX_use_certificate_chain_file(SSL_CTX *ctx, const char *file) return (ret); } #endif + +#ifndef OPENSSL_NO_TLSEXT +static int serverinfo_find_extension(const unsigned char *serverinfo, + size_t serverinfo_length, + unsigned int extension_type, + const unsigned char **extension_data, + size_t *extension_length) +{ + *extension_data = NULL; + *extension_length = 0; + if (serverinfo == NULL || serverinfo_length == 0) + return 0; + for (;;) { + unsigned int type = 0; + size_t len = 0; + + /* end of serverinfo */ + if (serverinfo_length == 0) + return -1; /* Extension not found */ + + /* read 2-byte type field */ + if (serverinfo_length < 2) + return 0; /* Error */ + type = (serverinfo[0] << 8) + serverinfo[1]; + serverinfo += 2; + serverinfo_length -= 2; + + /* read 2-byte len field */ + if (serverinfo_length < 2) + return 0; /* Error */ + len = (serverinfo[0] << 8) + serverinfo[1]; + serverinfo += 2; + serverinfo_length -= 2; + + if (len > serverinfo_length) + return 0; /* Error */ + + if (type == extension_type) { + *extension_data = serverinfo; + *extension_length = len; + return 1; /* Success */ + } + + serverinfo += len; + serverinfo_length -= len; + } + return 0; /* Error */ +} + +static int serverinfo_srv_parse_cb(SSL *s, unsigned int ext_type, + const unsigned char *in, + size_t inlen, int *al, void *arg) +{ + + if (inlen != 0) { + *al = SSL_AD_DECODE_ERROR; + return 0; + } + + return 1; +} + +static int serverinfo_srv_add_cb(SSL *s, unsigned int ext_type, + const unsigned char **out, size_t *outlen, + int *al, void *arg) +{ + const unsigned char *serverinfo = NULL; + size_t serverinfo_length = 0; + + /* Is there serverinfo data for the chosen server cert? */ + if ((ssl_get_server_cert_serverinfo(s, &serverinfo, + &serverinfo_length)) != 0) { + /* Find the relevant extension from the serverinfo */ + int retval = serverinfo_find_extension(serverinfo, serverinfo_length, + ext_type, out, outlen); + if (retval == 0) + return 0; /* Error */ + if (retval == -1) + return -1; /* No extension found, don't send extension */ + return 1; /* Send extension */ + } + return -1; /* No serverinfo data found, don't send + * extension */ +} + +/* + * With a NULL context, this function just checks that the serverinfo data + * parses correctly. With a non-NULL context, it registers callbacks for + * the included extensions. + */ +static int serverinfo_process_buffer(const unsigned char *serverinfo, + size_t serverinfo_length, SSL_CTX *ctx) +{ + if (serverinfo == NULL || serverinfo_length == 0) + return 0; + for (;;) { + unsigned int ext_type = 0; + size_t len = 0; + + /* end of serverinfo */ + if (serverinfo_length == 0) + return 1; + + /* read 2-byte type field */ + if (serverinfo_length < 2) + return 0; + /* FIXME: check for types we understand explicitly? */ + + /* Register callbacks for extensions */ + ext_type = (serverinfo[0] << 8) + serverinfo[1]; + if (ctx && !SSL_CTX_add_server_custom_ext(ctx, ext_type, + serverinfo_srv_add_cb, + NULL, NULL, + serverinfo_srv_parse_cb, + NULL)) + return 0; + + serverinfo += 2; + serverinfo_length -= 2; + + /* read 2-byte len field */ + if (serverinfo_length < 2) + return 0; + len = (serverinfo[0] << 8) + serverinfo[1]; + serverinfo += 2; + serverinfo_length -= 2; + + if (len > serverinfo_length) + return 0; + + serverinfo += len; + serverinfo_length -= len; + } +} + +int SSL_CTX_use_serverinfo(SSL_CTX *ctx, const unsigned char *serverinfo, + size_t serverinfo_length) +{ + if (ctx == NULL || serverinfo == NULL || serverinfo_length == 0) { + SSLerr(SSL_F_SSL_CTX_USE_SERVERINFO, ERR_R_PASSED_NULL_PARAMETER); + return 0; + } + if (!serverinfo_process_buffer(serverinfo, serverinfo_length, NULL)) { + SSLerr(SSL_F_SSL_CTX_USE_SERVERINFO, SSL_R_INVALID_SERVERINFO_DATA); + return 0; + } + if (!ssl_cert_inst(&ctx->cert)) { + SSLerr(SSL_F_SSL_CTX_USE_SERVERINFO, ERR_R_MALLOC_FAILURE); + return 0; + } + if (ctx->cert->key == NULL) { + SSLerr(SSL_F_SSL_CTX_USE_SERVERINFO, ERR_R_INTERNAL_ERROR); + return 0; + } + ctx->cert->key->serverinfo = OPENSSL_realloc(ctx->cert->key->serverinfo, + serverinfo_length); + if (ctx->cert->key->serverinfo == NULL) { + SSLerr(SSL_F_SSL_CTX_USE_SERVERINFO, ERR_R_MALLOC_FAILURE); + return 0; + } + memcpy(ctx->cert->key->serverinfo, serverinfo, serverinfo_length); + ctx->cert->key->serverinfo_length = serverinfo_length; + + /* + * Now that the serverinfo is validated and stored, go ahead and + * register callbacks. + */ + if (!serverinfo_process_buffer(serverinfo, serverinfo_length, ctx)) { + SSLerr(SSL_F_SSL_CTX_USE_SERVERINFO, SSL_R_INVALID_SERVERINFO_DATA); + return 0; + } + return 1; +} + +# ifndef OPENSSL_NO_STDIO +int SSL_CTX_use_serverinfo_file(SSL_CTX *ctx, const char *file) +{ + unsigned char *serverinfo = NULL; + size_t serverinfo_length = 0; + unsigned char *extension = 0; + long extension_length = 0; + char *name = NULL; + char *header = NULL; + char namePrefix[] = "SERVERINFO FOR "; + int ret = 0; + BIO *bin = NULL; + size_t num_extensions = 0; + + if (ctx == NULL || file == NULL) { + SSLerr(SSL_F_SSL_CTX_USE_SERVERINFO_FILE, + ERR_R_PASSED_NULL_PARAMETER); + goto end; + } + + bin = BIO_new(BIO_s_file_internal()); + if (bin == NULL) { + SSLerr(SSL_F_SSL_CTX_USE_SERVERINFO_FILE, ERR_R_BUF_LIB); + goto end; + } + if (BIO_read_filename(bin, file) <= 0) { + SSLerr(SSL_F_SSL_CTX_USE_SERVERINFO_FILE, ERR_R_SYS_LIB); + goto end; + } + + for (num_extensions = 0;; num_extensions++) { + if (PEM_read_bio(bin, &name, &header, &extension, &extension_length) + == 0) { + /* + * There must be at least one extension in this file + */ + if (num_extensions == 0) { + SSLerr(SSL_F_SSL_CTX_USE_SERVERINFO_FILE, + SSL_R_NO_PEM_EXTENSIONS); + goto end; + } else /* End of file, we're done */ + break; + } + /* Check that PEM name starts with "BEGIN SERVERINFO FOR " */ + if (strlen(name) < strlen(namePrefix)) { + SSLerr(SSL_F_SSL_CTX_USE_SERVERINFO_FILE, + SSL_R_PEM_NAME_TOO_SHORT); + goto end; + } + if (strncmp(name, namePrefix, strlen(namePrefix)) != 0) { + SSLerr(SSL_F_SSL_CTX_USE_SERVERINFO_FILE, + SSL_R_PEM_NAME_BAD_PREFIX); + goto end; + } + /* + * Check that the decoded PEM data is plausible (valid length field) + */ + if (extension_length < 4 + || (extension[2] << 8) + extension[3] != extension_length - 4) { + SSLerr(SSL_F_SSL_CTX_USE_SERVERINFO_FILE, SSL_R_BAD_DATA); + goto end; + } + /* Append the decoded extension to the serverinfo buffer */ + serverinfo = + OPENSSL_realloc(serverinfo, serverinfo_length + extension_length); + if (serverinfo == NULL) { + SSLerr(SSL_F_SSL_CTX_USE_SERVERINFO_FILE, ERR_R_MALLOC_FAILURE); + goto end; + } + memcpy(serverinfo + serverinfo_length, extension, extension_length); + serverinfo_length += extension_length; + + OPENSSL_free(name); + name = NULL; + OPENSSL_free(header); + header = NULL; + OPENSSL_free(extension); + extension = NULL; + } + + ret = SSL_CTX_use_serverinfo(ctx, serverinfo, serverinfo_length); + end: + /* SSL_CTX_use_serverinfo makes a local copy of the serverinfo. */ + OPENSSL_free(name); + OPENSSL_free(header); + OPENSSL_free(extension); + OPENSSL_free(serverinfo); + if (bin != NULL) + BIO_free(bin); + return ret; +} +# endif /* OPENSSL_NO_STDIO */ +#endif /* OPENSSL_NO_TLSEXT */ diff --git a/deps/openssl/openssl/ssl/ssl_sess.c b/deps/openssl/openssl/ssl/ssl_sess.c index fb4e8c52598f4d..1e1002fc8540db 100644 --- a/deps/openssl/openssl/ssl/ssl_sess.c +++ b/deps/openssl/openssl/ssl/ssl_sess.c @@ -320,6 +320,9 @@ int ssl_get_new_session(SSL *s, int session) } else if (s->version == DTLS1_VERSION) { ss->ssl_version = DTLS1_VERSION; ss->session_id_length = SSL3_SSL_SESSION_ID_LENGTH; + } else if (s->version == DTLS1_2_VERSION) { + ss->ssl_version = DTLS1_2_VERSION; + ss->session_id_length = SSL3_SSL_SESSION_ID_LENGTH; } else { SSLerr(SSL_F_SSL_GET_NEW_SESSION, SSL_R_UNSUPPORTED_SSL_VERSION); SSL_SESSION_free(ss); @@ -395,38 +398,6 @@ int ssl_get_new_session(SSL *s, int session) return 0; } } -# ifndef OPENSSL_NO_EC - if (s->tlsext_ecpointformatlist) { - if (ss->tlsext_ecpointformatlist != NULL) - OPENSSL_free(ss->tlsext_ecpointformatlist); - if ((ss->tlsext_ecpointformatlist = - OPENSSL_malloc(s->tlsext_ecpointformatlist_length)) == - NULL) { - SSLerr(SSL_F_SSL_GET_NEW_SESSION, ERR_R_MALLOC_FAILURE); - SSL_SESSION_free(ss); - return 0; - } - ss->tlsext_ecpointformatlist_length = - s->tlsext_ecpointformatlist_length; - memcpy(ss->tlsext_ecpointformatlist, s->tlsext_ecpointformatlist, - s->tlsext_ecpointformatlist_length); - } - if (s->tlsext_ellipticcurvelist) { - if (ss->tlsext_ellipticcurvelist != NULL) - OPENSSL_free(ss->tlsext_ellipticcurvelist); - if ((ss->tlsext_ellipticcurvelist = - OPENSSL_malloc(s->tlsext_ellipticcurvelist_length)) == - NULL) { - SSLerr(SSL_F_SSL_GET_NEW_SESSION, ERR_R_MALLOC_FAILURE); - SSL_SESSION_free(ss); - return 0; - } - ss->tlsext_ellipticcurvelist_length = - s->tlsext_ellipticcurvelist_length; - memcpy(ss->tlsext_ellipticcurvelist, s->tlsext_ellipticcurvelist, - s->tlsext_ellipticcurvelist_length); - } -# endif #endif } else { ss->session_id_length = 0; diff --git a/deps/openssl/openssl/ssl/ssl_txt.c b/deps/openssl/openssl/ssl/ssl_txt.c index bd67dc70a951bd..45308d8b658785 100644 --- a/deps/openssl/openssl/ssl/ssl_txt.c +++ b/deps/openssl/openssl/ssl/ssl_txt.c @@ -124,6 +124,8 @@ int SSL_SESSION_print(BIO *bp, const SSL_SESSION *x) s = "TLSv1"; else if (x->ssl_version == DTLS1_VERSION) s = "DTLSv1"; + else if (x->ssl_version == DTLS1_2_VERSION) + s = "DTLSv1.2"; else if (x->ssl_version == DTLS1_BAD_VER) s = "DTLSv1-bad"; else diff --git a/deps/openssl/openssl/ssl/ssltest.c b/deps/openssl/openssl/ssl/ssltest.c index 8efbff738e81b4..9f5d5862479801 100644 --- a/deps/openssl/openssl/ssl/ssltest.c +++ b/deps/openssl/openssl/ssl/ssltest.c @@ -298,6 +298,362 @@ static int MS_CALLBACK ssl_srp_server_param_cb(SSL *s, int *ad, void *arg) static BIO *bio_err = NULL; static BIO *bio_stdout = NULL; +static const char *alpn_client; +static const char *alpn_server; +static const char *alpn_expected; +static unsigned char *alpn_selected; + +/*- + * next_protos_parse parses a comma separated list of strings into a string + * in a format suitable for passing to SSL_CTX_set_next_protos_advertised. + * outlen: (output) set to the length of the resulting buffer on success. + * err: (maybe NULL) on failure, an error message line is written to this BIO. + * in: a NUL terminated string like "abc,def,ghi" + * + * returns: a malloced buffer or NULL on failure. + */ +static unsigned char *next_protos_parse(unsigned short *outlen, + const char *in) +{ + size_t len; + unsigned char *out; + size_t i, start = 0; + + len = strlen(in); + if (len >= 65535) + return NULL; + + out = OPENSSL_malloc(strlen(in) + 1); + if (!out) + return NULL; + + for (i = 0; i <= len; ++i) { + if (i == len || in[i] == ',') { + if (i - start > 255) { + OPENSSL_free(out); + return NULL; + } + out[start] = i - start; + start = i + 1; + } else + out[i + 1] = in[i]; + } + + *outlen = len + 1; + return out; +} + +static int cb_server_alpn(SSL *s, const unsigned char **out, + unsigned char *outlen, const unsigned char *in, + unsigned int inlen, void *arg) +{ + unsigned char *protos; + unsigned short protos_len; + + protos = next_protos_parse(&protos_len, alpn_server); + if (protos == NULL) { + fprintf(stderr, "failed to parser ALPN server protocol string: %s\n", + alpn_server); + abort(); + } + + if (SSL_select_next_proto + ((unsigned char **)out, outlen, protos, protos_len, in, + inlen) != OPENSSL_NPN_NEGOTIATED) { + OPENSSL_free(protos); + return SSL_TLSEXT_ERR_NOACK; + } + + /* + * Make a copy of the selected protocol which will be freed in + * verify_alpn. + */ + alpn_selected = OPENSSL_malloc(*outlen); + memcpy(alpn_selected, *out, *outlen); + *out = alpn_selected; + + OPENSSL_free(protos); + return SSL_TLSEXT_ERR_OK; +} + +static int verify_alpn(SSL *client, SSL *server) +{ + const unsigned char *client_proto, *server_proto; + unsigned int client_proto_len = 0, server_proto_len = 0; + SSL_get0_alpn_selected(client, &client_proto, &client_proto_len); + SSL_get0_alpn_selected(server, &server_proto, &server_proto_len); + + if (alpn_selected != NULL) { + OPENSSL_free(alpn_selected); + alpn_selected = NULL; + } + + if (client_proto_len != server_proto_len || + memcmp(client_proto, server_proto, client_proto_len) != 0) { + BIO_printf(bio_stdout, "ALPN selected protocols differ!\n"); + goto err; + } + + if (client_proto_len > 0 && alpn_expected == NULL) { + BIO_printf(bio_stdout, "ALPN unexpectedly negotiated\n"); + goto err; + } + + if (alpn_expected != NULL && + (client_proto_len != strlen(alpn_expected) || + memcmp(client_proto, alpn_expected, client_proto_len) != 0)) { + BIO_printf(bio_stdout, + "ALPN selected protocols not equal to expected protocol: %s\n", + alpn_expected); + goto err; + } + + return 0; + + err: + BIO_printf(bio_stdout, "ALPN results: client: '"); + BIO_write(bio_stdout, client_proto, client_proto_len); + BIO_printf(bio_stdout, "', server: '"); + BIO_write(bio_stdout, server_proto, server_proto_len); + BIO_printf(bio_stdout, "'\n"); + BIO_printf(bio_stdout, "ALPN configured: client: '%s', server: '%s'\n", + alpn_client, alpn_server); + return -1; +} + +#define SCT_EXT_TYPE 18 + +/* + * WARNING : below extension types are *NOT* IETF assigned, and could + * conflict if these types are reassigned and handled specially by OpenSSL + * in the future + */ +#define TACK_EXT_TYPE 62208 +#define CUSTOM_EXT_TYPE_0 1000 +#define CUSTOM_EXT_TYPE_1 1001 +#define CUSTOM_EXT_TYPE_2 1002 +#define CUSTOM_EXT_TYPE_3 1003 + +const char custom_ext_cli_string[] = "abc"; +const char custom_ext_srv_string[] = "defg"; + +/* These set from cmdline */ +char *serverinfo_file = NULL; +int serverinfo_sct = 0; +int serverinfo_tack = 0; + +/* These set based on extension callbacks */ +int serverinfo_sct_seen = 0; +int serverinfo_tack_seen = 0; +int serverinfo_other_seen = 0; + +/* This set from cmdline */ +int custom_ext = 0; + +/* This set based on extension callbacks */ +int custom_ext_error = 0; + +static int serverinfo_cli_parse_cb(SSL *s, unsigned int ext_type, + const unsigned char *in, size_t inlen, + int *al, void *arg) +{ + if (ext_type == SCT_EXT_TYPE) + serverinfo_sct_seen++; + else if (ext_type == TACK_EXT_TYPE) + serverinfo_tack_seen++; + else + serverinfo_other_seen++; + return 1; +} + +static int verify_serverinfo() +{ + if (serverinfo_sct != serverinfo_sct_seen) + return -1; + if (serverinfo_tack != serverinfo_tack_seen) + return -1; + if (serverinfo_other_seen) + return -1; + return 0; +} + +/*- + * Four test cases for custom extensions: + * 0 - no ClientHello extension or ServerHello response + * 1 - ClientHello with "abc", no response + * 2 - ClientHello with "abc", empty response + * 3 - ClientHello with "abc", "defg" response + */ + +static int custom_ext_0_cli_add_cb(SSL *s, unsigned int ext_type, + const unsigned char **out, + size_t *outlen, int *al, void *arg) +{ + if (ext_type != CUSTOM_EXT_TYPE_0) + custom_ext_error = 1; + return 0; /* Don't send an extension */ +} + +static int custom_ext_0_cli_parse_cb(SSL *s, unsigned int ext_type, + const unsigned char *in, + size_t inlen, int *al, void *arg) +{ + return 1; +} + +static int custom_ext_1_cli_add_cb(SSL *s, unsigned int ext_type, + const unsigned char **out, + size_t *outlen, int *al, void *arg) +{ + if (ext_type != CUSTOM_EXT_TYPE_1) + custom_ext_error = 1; + *out = (const unsigned char *)custom_ext_cli_string; + *outlen = strlen(custom_ext_cli_string); + return 1; /* Send "abc" */ +} + +static int custom_ext_1_cli_parse_cb(SSL *s, unsigned int ext_type, + const unsigned char *in, + size_t inlen, int *al, void *arg) +{ + return 1; +} + +static int custom_ext_2_cli_add_cb(SSL *s, unsigned int ext_type, + const unsigned char **out, + size_t *outlen, int *al, void *arg) +{ + if (ext_type != CUSTOM_EXT_TYPE_2) + custom_ext_error = 1; + *out = (const unsigned char *)custom_ext_cli_string; + *outlen = strlen(custom_ext_cli_string); + return 1; /* Send "abc" */ +} + +static int custom_ext_2_cli_parse_cb(SSL *s, unsigned int ext_type, + const unsigned char *in, + size_t inlen, int *al, void *arg) +{ + if (ext_type != CUSTOM_EXT_TYPE_2) + custom_ext_error = 1; + if (inlen != 0) + custom_ext_error = 1; /* Should be empty response */ + return 1; +} + +static int custom_ext_3_cli_add_cb(SSL *s, unsigned int ext_type, + const unsigned char **out, + size_t *outlen, int *al, void *arg) +{ + if (ext_type != CUSTOM_EXT_TYPE_3) + custom_ext_error = 1; + *out = (const unsigned char *)custom_ext_cli_string; + *outlen = strlen(custom_ext_cli_string); + return 1; /* Send "abc" */ +} + +static int custom_ext_3_cli_parse_cb(SSL *s, unsigned int ext_type, + const unsigned char *in, + size_t inlen, int *al, void *arg) +{ + if (ext_type != CUSTOM_EXT_TYPE_3) + custom_ext_error = 1; + if (inlen != strlen(custom_ext_srv_string)) + custom_ext_error = 1; + if (memcmp(custom_ext_srv_string, in, inlen) != 0) + custom_ext_error = 1; /* Check for "defg" */ + return 1; +} + +/* + * custom_ext_0_cli_add_cb returns 0 - the server won't receive a callback + * for this extension + */ +static int custom_ext_0_srv_parse_cb(SSL *s, unsigned int ext_type, + const unsigned char *in, + size_t inlen, int *al, void *arg) +{ + custom_ext_error = 1; + return 1; +} + +/* 'add' callbacks are only called if the 'parse' callback is called */ +static int custom_ext_0_srv_add_cb(SSL *s, unsigned int ext_type, + const unsigned char **out, + size_t *outlen, int *al, void *arg) +{ + /* Error: should not have been called */ + custom_ext_error = 1; + return 0; /* Don't send an extension */ +} + +static int custom_ext_1_srv_parse_cb(SSL *s, unsigned int ext_type, + const unsigned char *in, + size_t inlen, int *al, void *arg) +{ + if (ext_type != CUSTOM_EXT_TYPE_1) + custom_ext_error = 1; + /* Check for "abc" */ + if (inlen != strlen(custom_ext_cli_string)) + custom_ext_error = 1; + if (memcmp(in, custom_ext_cli_string, inlen) != 0) + custom_ext_error = 1; + return 1; +} + +static int custom_ext_1_srv_add_cb(SSL *s, unsigned int ext_type, + const unsigned char **out, + size_t *outlen, int *al, void *arg) +{ + return 0; /* Don't send an extension */ +} + +static int custom_ext_2_srv_parse_cb(SSL *s, unsigned int ext_type, + const unsigned char *in, + size_t inlen, int *al, void *arg) +{ + if (ext_type != CUSTOM_EXT_TYPE_2) + custom_ext_error = 1; + /* Check for "abc" */ + if (inlen != strlen(custom_ext_cli_string)) + custom_ext_error = 1; + if (memcmp(in, custom_ext_cli_string, inlen) != 0) + custom_ext_error = 1; + return 1; +} + +static int custom_ext_2_srv_add_cb(SSL *s, unsigned int ext_type, + const unsigned char **out, + size_t *outlen, int *al, void *arg) +{ + *out = NULL; + *outlen = 0; + return 1; /* Send empty extension */ +} + +static int custom_ext_3_srv_parse_cb(SSL *s, unsigned int ext_type, + const unsigned char *in, + size_t inlen, int *al, void *arg) +{ + if (ext_type != CUSTOM_EXT_TYPE_3) + custom_ext_error = 1; + /* Check for "abc" */ + if (inlen != strlen(custom_ext_cli_string)) + custom_ext_error = 1; + if (memcmp(in, custom_ext_cli_string, inlen) != 0) + custom_ext_error = 1; + return 1; +} + +static int custom_ext_3_srv_add_cb(SSL *s, unsigned int ext_type, + const unsigned char **out, + size_t *outlen, int *al, void *arg) +{ + *out = (const unsigned char *)custom_ext_srv_string; + *outlen = strlen(custom_ext_srv_string); + return 1; /* Send "defg" */ +} + static char *cipher = NULL; static int verbose = 0; static int debug = 0; @@ -327,7 +683,7 @@ static void sv_usage(void) fprintf(stderr, " -proxy - allow proxy certificates\n"); fprintf(stderr, " -proxy_auth - set proxy policy rights\n"); fprintf(stderr, - " -proxy_cond - experssion to test proxy policy rights\n"); + " -proxy_cond - expression to test proxy policy rights\n"); fprintf(stderr, " -v - more output\n"); fprintf(stderr, " -d - debug output\n"); fprintf(stderr, " -reuse - use session-id reuse\n"); @@ -385,6 +741,16 @@ static void sv_usage(void) " -test_cipherlist - Verifies the order of the ssl cipher lists.\n" " When this option is requested, the cipherlist\n" " tests are run instead of handshake tests.\n"); + fprintf(stderr, " -serverinfo_file file - have server use this file\n"); + fprintf(stderr, " -serverinfo_sct - have client offer and expect SCT\n"); + fprintf(stderr, + " -serverinfo_tack - have client offer and expect TACK\n"); + fprintf(stderr, + " -custom_ext - try various custom extension callbacks\n"); + fprintf(stderr, " -alpn_client - have client side offer ALPN\n"); + fprintf(stderr, " -alpn_server - have server side offer ALPN\n"); + fprintf(stderr, + " -alpn_expected - the ALPN protocol that should be negotiated\n"); } static void print_details(SSL *c_ssl, const char *prefix) @@ -547,8 +913,8 @@ int main(int argc, char *argv[]) int no_psk = 0; int print_time = 0; clock_t s_time = 0, c_time = 0; - int comp = 0; #ifndef OPENSSL_NO_COMP + int comp = 0; COMP_METHOD *cm = NULL; STACK_OF(SSL_COMP) *ssl_comp_methods = NULL; #endif @@ -590,7 +956,7 @@ int main(int argc, char *argv[]) fips_mode = 1; #else fprintf(stderr, - "not compiled with FIPS support, so exitting without running.\n"); + "not compiled with FIPS support, so exiting without running.\n"); EXIT(0); #endif } else if (strcmp(*argv, "-server_auth") == 0) @@ -730,11 +1096,15 @@ int main(int argc, char *argv[]) force = 1; } else if (strcmp(*argv, "-time") == 0) { print_time = 1; - } else if (strcmp(*argv, "-zlib") == 0) { + } +#ifndef OPENSSL_NO_COMP + else if (strcmp(*argv, "-zlib") == 0) { comp = COMP_ZLIB; } else if (strcmp(*argv, "-rle") == 0) { comp = COMP_RLE; - } else if (strcmp(*argv, "-named_curve") == 0) { + } +#endif + else if (strcmp(*argv, "-named_curve") == 0) { if (--argc < 1) goto bad; #ifndef OPENSSL_NO_ECDH @@ -750,6 +1120,28 @@ int main(int argc, char *argv[]) app_verify_arg.allow_proxy_certs = 1; } else if (strcmp(*argv, "-test_cipherlist") == 0) { test_cipherlist = 1; + } else if (strcmp(*argv, "-serverinfo_sct") == 0) { + serverinfo_sct = 1; + } else if (strcmp(*argv, "-serverinfo_tack") == 0) { + serverinfo_tack = 1; + } else if (strcmp(*argv, "-serverinfo_file") == 0) { + if (--argc < 1) + goto bad; + serverinfo_file = *(++argv); + } else if (strcmp(*argv, "-custom_ext") == 0) { + custom_ext = 1; + } else if (strcmp(*argv, "-alpn_client") == 0) { + if (--argc < 1) + goto bad; + alpn_client = *(++argv); + } else if (strcmp(*argv, "-alpn_server") == 0) { + if (--argc < 1) + goto bad; + alpn_server = *(++argv); + } else if (strcmp(*argv, "-alpn_expected") == 0) { + if (--argc < 1) + goto bad; + alpn_expected = *(++argv); } else { fprintf(stderr, "unknown option %s\n", *argv); badop = 1; @@ -1053,6 +1445,72 @@ int main(int argc, char *argv[]) } #endif + if (serverinfo_sct) + SSL_CTX_add_client_custom_ext(c_ctx, SCT_EXT_TYPE, + NULL, NULL, NULL, + serverinfo_cli_parse_cb, NULL); + if (serverinfo_tack) + SSL_CTX_add_client_custom_ext(c_ctx, TACK_EXT_TYPE, + NULL, NULL, NULL, + serverinfo_cli_parse_cb, NULL); + + if (serverinfo_file) + if (!SSL_CTX_use_serverinfo_file(s_ctx, serverinfo_file)) { + BIO_printf(bio_err, "missing serverinfo file\n"); + goto end; + } + + if (custom_ext) { + SSL_CTX_add_client_custom_ext(c_ctx, CUSTOM_EXT_TYPE_0, + custom_ext_0_cli_add_cb, + NULL, NULL, + custom_ext_0_cli_parse_cb, NULL); + SSL_CTX_add_client_custom_ext(c_ctx, CUSTOM_EXT_TYPE_1, + custom_ext_1_cli_add_cb, + NULL, NULL, + custom_ext_1_cli_parse_cb, NULL); + SSL_CTX_add_client_custom_ext(c_ctx, CUSTOM_EXT_TYPE_2, + custom_ext_2_cli_add_cb, + NULL, NULL, + custom_ext_2_cli_parse_cb, NULL); + SSL_CTX_add_client_custom_ext(c_ctx, CUSTOM_EXT_TYPE_3, + custom_ext_3_cli_add_cb, + NULL, NULL, + custom_ext_3_cli_parse_cb, NULL); + + SSL_CTX_add_server_custom_ext(s_ctx, CUSTOM_EXT_TYPE_0, + custom_ext_0_srv_add_cb, + NULL, NULL, + custom_ext_0_srv_parse_cb, NULL); + SSL_CTX_add_server_custom_ext(s_ctx, CUSTOM_EXT_TYPE_1, + custom_ext_1_srv_add_cb, + NULL, NULL, + custom_ext_1_srv_parse_cb, NULL); + SSL_CTX_add_server_custom_ext(s_ctx, CUSTOM_EXT_TYPE_2, + custom_ext_2_srv_add_cb, + NULL, NULL, + custom_ext_2_srv_parse_cb, NULL); + SSL_CTX_add_server_custom_ext(s_ctx, CUSTOM_EXT_TYPE_3, + custom_ext_3_srv_add_cb, + NULL, NULL, + custom_ext_3_srv_parse_cb, NULL); + } + + if (alpn_server) + SSL_CTX_set_alpn_select_cb(s_ctx, cb_server_alpn, NULL); + + if (alpn_client) { + unsigned short alpn_len; + unsigned char *alpn = next_protos_parse(&alpn_len, alpn_client); + + if (alpn == NULL) { + BIO_printf(bio_err, "Error parsing -alpn_client argument\n"); + goto end; + } + SSL_CTX_set_alpn_protos(c_ctx, alpn, alpn_len); + OPENSSL_free(alpn); + } + c_ssl = SSL_new(c_ctx); s_ssl = SSL_new(s_ctx); @@ -1477,6 +1935,21 @@ int doit_biopair(SSL *s_ssl, SSL *c_ssl, long count, if (verbose) print_details(c_ssl, "DONE via BIO pair: "); + + if (verify_serverinfo() < 0) { + ret = 1; + goto err; + } + if (verify_alpn(c_ssl, s_ssl) < 0) { + ret = 1; + goto err; + } + + if (custom_ext_error) { + ret = 1; + goto err; + } + end: ret = 0; @@ -1506,7 +1979,8 @@ int doit_biopair(SSL *s_ssl, SSL *c_ssl, long count, int doit(SSL *s_ssl, SSL *c_ssl, long count) { - MS_STATIC char cbuf[1024 * 8], sbuf[1024 * 8]; + char *cbuf = NULL, *sbuf = NULL; + long bufsiz; long cw_num = count, cr_num = count; long sw_num = count, sr_num = count; int ret = 1; @@ -1519,9 +1993,17 @@ int doit(SSL *s_ssl, SSL *c_ssl, long count) int done = 0; int c_write, s_write; int do_server = 0, do_client = 0; + int max_frag = 5 * 1024; - memset(cbuf, 0, sizeof(cbuf)); - memset(sbuf, 0, sizeof(sbuf)); + bufsiz = count > 40 * 1024 ? 40 * 1024 : count; + + if ((cbuf = OPENSSL_malloc(bufsiz)) == NULL) + goto err; + if ((sbuf = OPENSSL_malloc(bufsiz)) == NULL) + goto err; + + memset(cbuf, 0, bufsiz); + memset(sbuf, 0, bufsiz); c_to_s = BIO_new(BIO_s_mem()); s_to_c = BIO_new(BIO_s_mem()); @@ -1539,10 +2021,12 @@ int doit(SSL *s_ssl, SSL *c_ssl, long count) SSL_set_connect_state(c_ssl); SSL_set_bio(c_ssl, s_to_c, c_to_s); + SSL_set_max_send_fragment(c_ssl, max_frag); BIO_set_ssl(c_bio, c_ssl, BIO_NOCLOSE); SSL_set_accept_state(s_ssl); SSL_set_bio(s_ssl, c_to_s, s_to_c); + SSL_set_max_send_fragment(s_ssl, max_frag); BIO_set_ssl(s_bio, s_ssl, BIO_NOCLOSE); c_r = 0; @@ -1593,8 +2077,7 @@ int doit(SSL *s_ssl, SSL *c_ssl, long count) } if (do_client && !(done & C_DONE)) { if (c_write) { - j = (cw_num > (long)sizeof(cbuf)) ? - (int)sizeof(cbuf) : (int)cw_num; + j = (cw_num > bufsiz) ? (int)bufsiz : (int)cw_num; i = BIO_write(c_bio, cbuf, j); if (i < 0) { c_r = 0; @@ -1619,9 +2102,11 @@ int doit(SSL *s_ssl, SSL *c_ssl, long count) s_r = 1; c_write = 0; cw_num -= i; + if (max_frag > 1029) + SSL_set_max_send_fragment(c_ssl, max_frag -= 5); } } else { - i = BIO_read(c_bio, cbuf, sizeof(cbuf)); + i = BIO_read(c_bio, cbuf, bufsiz); if (i < 0) { c_r = 0; c_w = 0; @@ -1657,7 +2142,7 @@ int doit(SSL *s_ssl, SSL *c_ssl, long count) if (do_server && !(done & S_DONE)) { if (!s_write) { - i = BIO_read(s_bio, sbuf, sizeof(cbuf)); + i = BIO_read(s_bio, sbuf, bufsiz); if (i < 0) { s_r = 0; s_w = 0; @@ -1691,8 +2176,7 @@ int doit(SSL *s_ssl, SSL *c_ssl, long count) } } } else { - j = (sw_num > (long)sizeof(sbuf)) ? - (int)sizeof(sbuf) : (int)sw_num; + j = (sw_num > bufsiz) ? (int)bufsiz : (int)sw_num; i = BIO_write(s_bio, sbuf, j); if (i < 0) { s_r = 0; @@ -1720,6 +2204,8 @@ int doit(SSL *s_ssl, SSL *c_ssl, long count) c_r = 1; if (sw_num <= 0) done |= S_DONE; + if (max_frag > 1029) + SSL_set_max_send_fragment(s_ssl, max_frag -= 5); } } } @@ -1730,6 +2216,14 @@ int doit(SSL *s_ssl, SSL *c_ssl, long count) if (verbose) print_details(c_ssl, "DONE: "); + if (verify_serverinfo() < 0) { + ret = 1; + goto err; + } + if (custom_ext_error) { + ret = 1; + goto err; + } ret = 0; err: /* @@ -1757,6 +2251,12 @@ int doit(SSL *s_ssl, SSL *c_ssl, long count) BIO_free_all(c_bio); if (s_bio != NULL) BIO_free_all(s_bio); + + if (cbuf) + OPENSSL_free(cbuf); + if (sbuf) + OPENSSL_free(sbuf); + return (ret); } diff --git a/deps/openssl/openssl/ssl/t1_clnt.c b/deps/openssl/openssl/ssl/t1_clnt.c index 05c7f200f30f29..746b4e6b7a4bef 100644 --- a/deps/openssl/openssl/ssl/t1_clnt.c +++ b/deps/openssl/openssl/ssl/t1_clnt.c @@ -77,12 +77,14 @@ static const SSL_METHOD *tls1_get_client_method(int ver) IMPLEMENT_tls_meth_func(TLS1_2_VERSION, TLSv1_2_client_method, ssl_undefined_function, - ssl3_connect, tls1_get_client_method) + ssl3_connect, + tls1_get_client_method, TLSv1_2_enc_data) IMPLEMENT_tls_meth_func(TLS1_1_VERSION, TLSv1_1_client_method, ssl_undefined_function, - ssl3_connect, tls1_get_client_method) + ssl3_connect, + tls1_get_client_method, TLSv1_1_enc_data) IMPLEMENT_tls_meth_func(TLS1_VERSION, TLSv1_client_method, ssl_undefined_function, - ssl3_connect, tls1_get_client_method) + ssl3_connect, tls1_get_client_method, TLSv1_enc_data) diff --git a/deps/openssl/openssl/ssl/t1_enc.c b/deps/openssl/openssl/ssl/t1_enc.c index 4e2845fa6ec908..577885fe081e80 100644 --- a/deps/openssl/openssl/ssl/t1_enc.c +++ b/deps/openssl/openssl/ssl/t1_enc.c @@ -160,7 +160,7 @@ static int tls1_P_hash(const EVP_MD *md, const unsigned char *sec, { int chunk; size_t j; - EVP_MD_CTX ctx, ctx_tmp; + EVP_MD_CTX ctx, ctx_tmp, ctx_init; EVP_PKEY *mac_key; unsigned char A1[EVP_MAX_MD_SIZE]; size_t A1_len; @@ -171,14 +171,14 @@ static int tls1_P_hash(const EVP_MD *md, const unsigned char *sec, EVP_MD_CTX_init(&ctx); EVP_MD_CTX_init(&ctx_tmp); - EVP_MD_CTX_set_flags(&ctx, EVP_MD_CTX_FLAG_NON_FIPS_ALLOW); - EVP_MD_CTX_set_flags(&ctx_tmp, EVP_MD_CTX_FLAG_NON_FIPS_ALLOW); + EVP_MD_CTX_init(&ctx_init); + EVP_MD_CTX_set_flags(&ctx_init, EVP_MD_CTX_FLAG_NON_FIPS_ALLOW); mac_key = EVP_PKEY_new_mac_key(EVP_PKEY_HMAC, NULL, sec, sec_len); if (!mac_key) goto err; - if (!EVP_DigestSignInit(&ctx, NULL, md, NULL, mac_key)) + if (!EVP_DigestSignInit(&ctx_init, NULL, md, NULL, mac_key)) goto err; - if (!EVP_DigestSignInit(&ctx_tmp, NULL, md, NULL, mac_key)) + if (!EVP_MD_CTX_copy_ex(&ctx, &ctx_init)) goto err; if (seed1 && !EVP_DigestSignUpdate(&ctx, seed1, seed1_len)) goto err; @@ -195,13 +195,11 @@ static int tls1_P_hash(const EVP_MD *md, const unsigned char *sec, for (;;) { /* Reinit mac contexts */ - if (!EVP_DigestSignInit(&ctx, NULL, md, NULL, mac_key)) - goto err; - if (!EVP_DigestSignInit(&ctx_tmp, NULL, md, NULL, mac_key)) + if (!EVP_MD_CTX_copy_ex(&ctx, &ctx_init)) goto err; if (!EVP_DigestSignUpdate(&ctx, A1, A1_len)) goto err; - if (!EVP_DigestSignUpdate(&ctx_tmp, A1, A1_len)) + if (olen > chunk && !EVP_MD_CTX_copy_ex(&ctx_tmp, &ctx)) goto err; if (seed1 && !EVP_DigestSignUpdate(&ctx, seed1, seed1_len)) goto err; @@ -235,6 +233,7 @@ static int tls1_P_hash(const EVP_MD *md, const unsigned char *sec, EVP_PKEY_free(mac_key); EVP_MD_CTX_cleanup(&ctx); EVP_MD_CTX_cleanup(&ctx_tmp); + EVP_MD_CTX_cleanup(&ctx_init); OPENSSL_cleanse(A1, sizeof(A1)); return ret; } @@ -574,6 +573,25 @@ int tls1_change_cipher_state(SSL *s, int which) SSLerr(SSL_F_TLS1_CHANGE_CIPHER_STATE, ERR_R_INTERNAL_ERROR); goto err2; } +#ifdef OPENSSL_SSL_TRACE_CRYPTO + if (s->msg_callback) { + int wh = which & SSL3_CC_WRITE ? TLS1_RT_CRYPTO_WRITE : 0; + if (*mac_secret_size) + s->msg_callback(2, s->version, wh | TLS1_RT_CRYPTO_MAC, + mac_secret, *mac_secret_size, + s, s->msg_callback_arg); + if (c->key_len) + s->msg_callback(2, s->version, wh | TLS1_RT_CRYPTO_KEY, + key, c->key_len, s, s->msg_callback_arg); + if (k) { + if (EVP_CIPHER_mode(c) == EVP_CIPH_GCM_MODE) + wh |= TLS1_RT_CRYPTO_FIXED_IV; + else + wh |= TLS1_RT_CRYPTO_IV; + s->msg_callback(2, s->version, wh, iv, k, s, s->msg_callback_arg); + } + } +#endif #ifdef TLS_DEBUG printf("which = %04X\nkey=", which); @@ -741,7 +759,7 @@ int tls1_enc(SSL *s, int send) int ivlen; enc = EVP_CIPHER_CTX_cipher(s->enc_write_ctx); /* For TLSv1.1 and later explicit IV */ - if (s->version >= TLS1_1_VERSION + if (SSL_USE_EXPLICIT_IV(s) && EVP_CIPHER_mode(enc) == EVP_CIPH_CBC_MODE) ivlen = EVP_CIPHER_iv_length(enc); else @@ -789,7 +807,7 @@ int tls1_enc(SSL *s, int send) seq = send ? s->s3->write_sequence : s->s3->read_sequence; - if (s->version == DTLS1_VERSION || s->version == DTLS1_BAD_VER) { + if (SSL_IS_DTLS(s)) { unsigned char dtlsseq[9], *p = dtlsseq; s2n(send ? s->d1->w_epoch : s->d1->r_epoch, p); @@ -1007,7 +1025,7 @@ int tls1_mac(SSL *ssl, unsigned char *md, int send) mac_ctx = &hmac; } - if (ssl->version == DTLS1_VERSION || ssl->version == DTLS1_BAD_VER) { + if (SSL_IS_DTLS(ssl)) { unsigned char dtlsseq[8], *p = dtlsseq; s2n(send ? ssl->d1->w_epoch : ssl->d1->r_epoch, p); @@ -1075,7 +1093,7 @@ int tls1_mac(SSL *ssl, unsigned char *md, int send) } #endif - if (ssl->version != DTLS1_VERSION && ssl->version != DTLS1_BAD_VER) { + if (!SSL_IS_DTLS(ssl)) { for (i = 7; i >= 0; i--) { ++seq[i]; if (seq[i] != 0) @@ -1141,6 +1159,22 @@ int tls1_generate_master_secret(SSL *s, unsigned char *out, unsigned char *p, SSL3_MASTER_SECRET_SIZE); #endif +#ifdef OPENSSL_SSL_TRACE_CRYPTO + if (s->msg_callback) { + s->msg_callback(2, s->version, TLS1_RT_CRYPTO_PREMASTER, + p, len, s, s->msg_callback_arg); + s->msg_callback(2, s->version, TLS1_RT_CRYPTO_CLIENT_RANDOM, + s->s3->client_random, SSL3_RANDOM_SIZE, + s, s->msg_callback_arg); + s->msg_callback(2, s->version, TLS1_RT_CRYPTO_SERVER_RANDOM, + s->s3->server_random, SSL3_RANDOM_SIZE, + s, s->msg_callback_arg); + s->msg_callback(2, s->version, TLS1_RT_CRYPTO_MASTER, + s->session->master_key, + SSL3_MASTER_SECRET_SIZE, s, s->msg_callback_arg); + } +#endif + #ifdef KSSL_DEBUG fprintf(stderr, "tls1_generate_master_secret() complete\n"); #endif /* KSSL_DEBUG */ diff --git a/deps/openssl/openssl/ssl/t1_ext.c b/deps/openssl/openssl/ssl/t1_ext.c new file mode 100644 index 00000000000000..724ddf76acb3df --- /dev/null +++ b/deps/openssl/openssl/ssl/t1_ext.c @@ -0,0 +1,298 @@ +/* ssl/t1_ext.c */ +/* ==================================================================== + * Copyright (c) 2014 The OpenSSL Project. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgment: + * "This product includes software developed by the OpenSSL Project + * for use in the OpenSSL Toolkit. (http://www.openssl.org/)" + * + * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to + * endorse or promote products derived from this software without + * prior written permission. For written permission, please contact + * openssl-core@openssl.org. + * + * 5. Products derived from this software may not be called "OpenSSL" + * nor may "OpenSSL" appear in their names without prior written + * permission of the OpenSSL Project. + * + * 6. Redistributions of any form whatsoever must retain the following + * acknowledgment: + * "This product includes software developed by the OpenSSL Project + * for use in the OpenSSL Toolkit (http://www.openssl.org/)" + * + * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY + * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + * ==================================================================== + * + * This product includes cryptographic software written by Eric Young + * (eay@cryptsoft.com). This product includes software written by Tim + * Hudson (tjh@cryptsoft.com). + * + */ + +/* Custom extension utility functions */ + +#include "ssl_locl.h" + +#ifndef OPENSSL_NO_TLSEXT + +/* Find a custom extension from the list. */ +static custom_ext_method *custom_ext_find(custom_ext_methods *exts, + unsigned int ext_type) +{ + size_t i; + custom_ext_method *meth = exts->meths; + for (i = 0; i < exts->meths_count; i++, meth++) { + if (ext_type == meth->ext_type) + return meth; + } + return NULL; +} + +/* + * Initialise custom extensions flags to indicate neither sent nor received. + */ +void custom_ext_init(custom_ext_methods *exts) +{ + size_t i; + custom_ext_method *meth = exts->meths; + for (i = 0; i < exts->meths_count; i++, meth++) + meth->ext_flags = 0; +} + +/* Pass received custom extension data to the application for parsing. */ +int custom_ext_parse(SSL *s, int server, + unsigned int ext_type, + const unsigned char *ext_data, size_t ext_size, int *al) +{ + custom_ext_methods *exts = server ? &s->cert->srv_ext : &s->cert->cli_ext; + custom_ext_method *meth; + meth = custom_ext_find(exts, ext_type); + /* If not found return success */ + if (!meth) + return 1; + if (!server) { + /* + * If it's ServerHello we can't have any extensions not sent in + * ClientHello. + */ + if (!(meth->ext_flags & SSL_EXT_FLAG_SENT)) { + *al = TLS1_AD_UNSUPPORTED_EXTENSION; + return 0; + } + } + /* If already present it's a duplicate */ + if (meth->ext_flags & SSL_EXT_FLAG_RECEIVED) { + *al = TLS1_AD_DECODE_ERROR; + return 0; + } + meth->ext_flags |= SSL_EXT_FLAG_RECEIVED; + /* If no parse function set return success */ + if (!meth->parse_cb) + return 1; + + return meth->parse_cb(s, ext_type, ext_data, ext_size, al, + meth->parse_arg); +} + +/* + * Request custom extension data from the application and add to the return + * buffer. + */ +int custom_ext_add(SSL *s, int server, + unsigned char **pret, unsigned char *limit, int *al) +{ + custom_ext_methods *exts = server ? &s->cert->srv_ext : &s->cert->cli_ext; + custom_ext_method *meth; + unsigned char *ret = *pret; + size_t i; + + for (i = 0; i < exts->meths_count; i++) { + const unsigned char *out = NULL; + size_t outlen = 0; + meth = exts->meths + i; + + if (server) { + /* + * For ServerHello only send extensions present in ClientHello. + */ + if (!(meth->ext_flags & SSL_EXT_FLAG_RECEIVED)) + continue; + /* If callback absent for server skip it */ + if (!meth->add_cb) + continue; + } + if (meth->add_cb) { + int cb_retval = 0; + cb_retval = meth->add_cb(s, meth->ext_type, + &out, &outlen, al, meth->add_arg); + if (cb_retval < 0) + return 0; /* error */ + if (cb_retval == 0) + continue; /* skip this extension */ + } + if (4 > limit - ret || outlen > (size_t)(limit - ret - 4)) + return 0; + s2n(meth->ext_type, ret); + s2n(outlen, ret); + if (outlen) { + memcpy(ret, out, outlen); + ret += outlen; + } + /* + * We can't send duplicates: code logic should prevent this. + */ + OPENSSL_assert(!(meth->ext_flags & SSL_EXT_FLAG_SENT)); + /* + * Indicate extension has been sent: this is both a sanity check to + * ensure we don't send duplicate extensions and indicates that it is + * not an error if the extension is present in ServerHello. + */ + meth->ext_flags |= SSL_EXT_FLAG_SENT; + if (meth->free_cb) + meth->free_cb(s, meth->ext_type, out, meth->add_arg); + } + *pret = ret; + return 1; +} + +/* Copy table of custom extensions */ +int custom_exts_copy(custom_ext_methods *dst, const custom_ext_methods *src) +{ + if (src->meths_count) { + dst->meths = + BUF_memdup(src->meths, + sizeof(custom_ext_method) * src->meths_count); + if (dst->meths == NULL) + return 0; + dst->meths_count = src->meths_count; + } + return 1; +} + +void custom_exts_free(custom_ext_methods *exts) +{ + if (exts->meths) + OPENSSL_free(exts->meths); +} + +/* Set callbacks for a custom extension. */ +static int custom_ext_meth_add(custom_ext_methods *exts, + unsigned int ext_type, + custom_ext_add_cb add_cb, + custom_ext_free_cb free_cb, + void *add_arg, + custom_ext_parse_cb parse_cb, void *parse_arg) +{ + custom_ext_method *meth; + /* + * Check application error: if add_cb is not set free_cb will never be + * called. + */ + if (!add_cb && free_cb) + return 0; + /* Don't add if extension supported internally. */ + if (SSL_extension_supported(ext_type)) + return 0; + /* Extension type must fit in 16 bits */ + if (ext_type > 0xffff) + return 0; + /* Search for duplicate */ + if (custom_ext_find(exts, ext_type)) + return 0; + exts->meths = OPENSSL_realloc(exts->meths, + (exts->meths_count + + 1) * sizeof(custom_ext_method)); + + if (!exts->meths) { + exts->meths_count = 0; + return 0; + } + + meth = exts->meths + exts->meths_count; + memset(meth, 0, sizeof(custom_ext_method)); + meth->parse_cb = parse_cb; + meth->add_cb = add_cb; + meth->free_cb = free_cb; + meth->ext_type = ext_type; + meth->add_arg = add_arg; + meth->parse_arg = parse_arg; + exts->meths_count++; + return 1; +} + +/* Application level functions to add custom extension callbacks */ +int SSL_CTX_add_client_custom_ext(SSL_CTX *ctx, unsigned int ext_type, + custom_ext_add_cb add_cb, + custom_ext_free_cb free_cb, + void *add_arg, + custom_ext_parse_cb parse_cb, + void *parse_arg) +{ + return custom_ext_meth_add(&ctx->cert->cli_ext, ext_type, + add_cb, free_cb, add_arg, parse_cb, parse_arg); +} + +int SSL_CTX_add_server_custom_ext(SSL_CTX *ctx, unsigned int ext_type, + custom_ext_add_cb add_cb, + custom_ext_free_cb free_cb, + void *add_arg, + custom_ext_parse_cb parse_cb, + void *parse_arg) +{ + return custom_ext_meth_add(&ctx->cert->srv_ext, ext_type, + add_cb, free_cb, add_arg, parse_cb, parse_arg); +} + +int SSL_extension_supported(unsigned int ext_type) +{ + switch (ext_type) { + /* Internally supported extensions. */ + case TLSEXT_TYPE_application_layer_protocol_negotiation: + case TLSEXT_TYPE_ec_point_formats: + case TLSEXT_TYPE_elliptic_curves: + case TLSEXT_TYPE_heartbeat: + case TLSEXT_TYPE_next_proto_neg: + case TLSEXT_TYPE_padding: + case TLSEXT_TYPE_renegotiate: + case TLSEXT_TYPE_server_name: + case TLSEXT_TYPE_session_ticket: + case TLSEXT_TYPE_signature_algorithms: + case TLSEXT_TYPE_srp: + case TLSEXT_TYPE_status_request: + case TLSEXT_TYPE_use_srtp: +# ifdef TLSEXT_TYPE_opaque_prf_input + case TLSEXT_TYPE_opaque_prf_input: +# endif +# ifdef TLSEXT_TYPE_encrypt_then_mac + case TLSEXT_TYPE_encrypt_then_mac: +# endif + return 1; + default: + return 0; + } +} +#endif diff --git a/deps/openssl/openssl/ssl/t1_lib.c b/deps/openssl/openssl/ssl/t1_lib.c index 243a70f7a60865..d85d26e596d935 100644 --- a/deps/openssl/openssl/ssl/t1_lib.c +++ b/deps/openssl/openssl/ssl/t1_lib.c @@ -113,6 +113,9 @@ #include #include #include +#ifdef OPENSSL_NO_EC2M +# include +#endif #include #include #include "ssl_locl.h" @@ -123,6 +126,8 @@ const char tls1_version_str[] = "TLSv1" OPENSSL_VERSION_PTEXT; static int tls_decrypt_ticket(SSL *s, const unsigned char *tick, int ticklen, const unsigned char *sess_id, int sesslen, SSL_SESSION **psess); +static int ssl_check_clienthello_tlsext_early(SSL *s); +int ssl_check_serverhello_tlsext(SSL *s); #endif SSL3_ENC_METHOD TLSv1_enc_data = { @@ -138,6 +143,49 @@ SSL3_ENC_METHOD TLSv1_enc_data = { TLS_MD_SERVER_FINISH_CONST, TLS_MD_SERVER_FINISH_CONST_SIZE, tls1_alert_code, tls1_export_keying_material, + 0, + SSL3_HM_HEADER_LENGTH, + ssl3_set_handshake_header, + ssl3_handshake_write +}; + +SSL3_ENC_METHOD TLSv1_1_enc_data = { + tls1_enc, + tls1_mac, + tls1_setup_key_block, + tls1_generate_master_secret, + tls1_change_cipher_state, + tls1_final_finish_mac, + TLS1_FINISH_MAC_LENGTH, + tls1_cert_verify_mac, + TLS_MD_CLIENT_FINISH_CONST, TLS_MD_CLIENT_FINISH_CONST_SIZE, + TLS_MD_SERVER_FINISH_CONST, TLS_MD_SERVER_FINISH_CONST_SIZE, + tls1_alert_code, + tls1_export_keying_material, + SSL_ENC_FLAG_EXPLICIT_IV, + SSL3_HM_HEADER_LENGTH, + ssl3_set_handshake_header, + ssl3_handshake_write +}; + +SSL3_ENC_METHOD TLSv1_2_enc_data = { + tls1_enc, + tls1_mac, + tls1_setup_key_block, + tls1_generate_master_secret, + tls1_change_cipher_state, + tls1_final_finish_mac, + TLS1_FINISH_MAC_LENGTH, + tls1_cert_verify_mac, + TLS_MD_CLIENT_FINISH_CONST, TLS_MD_CLIENT_FINISH_CONST_SIZE, + TLS_MD_SERVER_FINISH_CONST, TLS_MD_SERVER_FINISH_CONST_SIZE, + tls1_alert_code, + tls1_export_keying_material, + SSL_ENC_FLAG_EXPLICIT_IV | SSL_ENC_FLAG_SIGALGS | SSL_ENC_FLAG_SHA256_PRF + | SSL_ENC_FLAG_TLS1_2_CIPHERS, + SSL3_HM_HEADER_LENGTH, + ssl3_set_handshake_header, + ssl3_handshake_write }; long tls1_default_timeout(void) @@ -200,52 +248,112 @@ static int nid_list[] = { NID_secp256k1, /* secp256k1 (22) */ NID_X9_62_prime256v1, /* secp256r1 (23) */ NID_secp384r1, /* secp384r1 (24) */ - NID_secp521r1 /* secp521r1 (25) */ + NID_secp521r1, /* secp521r1 (25) */ + NID_brainpoolP256r1, /* brainpoolP256r1 (26) */ + NID_brainpoolP384r1, /* brainpoolP384r1 (27) */ + NID_brainpoolP512r1 /* brainpool512r1 (28) */ +}; + +static const unsigned char ecformats_default[] = { + TLSEXT_ECPOINTFORMAT_uncompressed, + TLSEXT_ECPOINTFORMAT_ansiX962_compressed_prime, + TLSEXT_ECPOINTFORMAT_ansiX962_compressed_char2 }; -static int pref_list[] = { +static const unsigned char eccurves_default[] = { # ifndef OPENSSL_NO_EC2M - NID_sect571r1, /* sect571r1 (14) */ - NID_sect571k1, /* sect571k1 (13) */ + 0, 14, /* sect571r1 (14) */ + 0, 13, /* sect571k1 (13) */ # endif - NID_secp521r1, /* secp521r1 (25) */ + 0, 25, /* secp521r1 (25) */ + 0, 28, /* brainpool512r1 (28) */ # ifndef OPENSSL_NO_EC2M - NID_sect409k1, /* sect409k1 (11) */ - NID_sect409r1, /* sect409r1 (12) */ + 0, 11, /* sect409k1 (11) */ + 0, 12, /* sect409r1 (12) */ # endif - NID_secp384r1, /* secp384r1 (24) */ + 0, 27, /* brainpoolP384r1 (27) */ + 0, 24, /* secp384r1 (24) */ # ifndef OPENSSL_NO_EC2M - NID_sect283k1, /* sect283k1 (9) */ - NID_sect283r1, /* sect283r1 (10) */ + 0, 9, /* sect283k1 (9) */ + 0, 10, /* sect283r1 (10) */ # endif - NID_secp256k1, /* secp256k1 (22) */ - NID_X9_62_prime256v1, /* secp256r1 (23) */ + 0, 26, /* brainpoolP256r1 (26) */ + 0, 22, /* secp256k1 (22) */ + 0, 23, /* secp256r1 (23) */ # ifndef OPENSSL_NO_EC2M - NID_sect239k1, /* sect239k1 (8) */ - NID_sect233k1, /* sect233k1 (6) */ - NID_sect233r1, /* sect233r1 (7) */ + 0, 8, /* sect239k1 (8) */ + 0, 6, /* sect233k1 (6) */ + 0, 7, /* sect233r1 (7) */ # endif - NID_secp224k1, /* secp224k1 (20) */ - NID_secp224r1, /* secp224r1 (21) */ + 0, 20, /* secp224k1 (20) */ + 0, 21, /* secp224r1 (21) */ # ifndef OPENSSL_NO_EC2M - NID_sect193r1, /* sect193r1 (4) */ - NID_sect193r2, /* sect193r2 (5) */ + 0, 4, /* sect193r1 (4) */ + 0, 5, /* sect193r2 (5) */ # endif - NID_secp192k1, /* secp192k1 (18) */ - NID_X9_62_prime192v1, /* secp192r1 (19) */ + 0, 18, /* secp192k1 (18) */ + 0, 19, /* secp192r1 (19) */ # ifndef OPENSSL_NO_EC2M - NID_sect163k1, /* sect163k1 (1) */ - NID_sect163r1, /* sect163r1 (2) */ - NID_sect163r2, /* sect163r2 (3) */ + 0, 1, /* sect163k1 (1) */ + 0, 2, /* sect163r1 (2) */ + 0, 3, /* sect163r2 (3) */ # endif - NID_secp160k1, /* secp160k1 (15) */ - NID_secp160r1, /* secp160r1 (16) */ - NID_secp160r2, /* secp160r2 (17) */ + 0, 15, /* secp160k1 (15) */ + 0, 16, /* secp160r1 (16) */ + 0, 17, /* secp160r2 (17) */ }; +static const unsigned char suiteb_curves[] = { + 0, TLSEXT_curve_P_256, + 0, TLSEXT_curve_P_384 +}; + +# ifdef OPENSSL_FIPS +/* Brainpool not allowed in FIPS mode */ +static const unsigned char fips_curves_default[] = { +# ifndef OPENSSL_NO_EC2M + 0, 14, /* sect571r1 (14) */ + 0, 13, /* sect571k1 (13) */ +# endif + 0, 25, /* secp521r1 (25) */ +# ifndef OPENSSL_NO_EC2M + 0, 11, /* sect409k1 (11) */ + 0, 12, /* sect409r1 (12) */ +# endif + 0, 24, /* secp384r1 (24) */ +# ifndef OPENSSL_NO_EC2M + 0, 9, /* sect283k1 (9) */ + 0, 10, /* sect283r1 (10) */ +# endif + 0, 22, /* secp256k1 (22) */ + 0, 23, /* secp256r1 (23) */ +# ifndef OPENSSL_NO_EC2M + 0, 8, /* sect239k1 (8) */ + 0, 6, /* sect233k1 (6) */ + 0, 7, /* sect233r1 (7) */ +# endif + 0, 20, /* secp224k1 (20) */ + 0, 21, /* secp224r1 (21) */ +# ifndef OPENSSL_NO_EC2M + 0, 4, /* sect193r1 (4) */ + 0, 5, /* sect193r2 (5) */ +# endif + 0, 18, /* secp192k1 (18) */ + 0, 19, /* secp192r1 (19) */ +# ifndef OPENSSL_NO_EC2M + 0, 1, /* sect163k1 (1) */ + 0, 2, /* sect163r1 (2) */ + 0, 3, /* sect163r2 (3) */ +# endif + 0, 15, /* secp160k1 (15) */ + 0, 16, /* secp160r1 (16) */ + 0, 17, /* secp160r2 (17) */ +}; +# endif + int tls1_ec_curve_id2nid(int curve_id) { - /* ECC curves from RFC 4492 */ + /* ECC curves from RFC 4492 and RFC 7027 */ if ((curve_id < 1) || ((unsigned int)curve_id > sizeof(nid_list) / sizeof(nid_list[0]))) return 0; @@ -254,7 +362,7 @@ int tls1_ec_curve_id2nid(int curve_id) int tls1_ec_nid2curve_id(int nid) { - /* ECC curves from RFC 4492 */ + /* ECC curves from RFC 4492 and RFC 7027 */ switch (nid) { case NID_sect163k1: /* sect163k1 (1) */ return 1; @@ -306,10 +414,519 @@ int tls1_ec_nid2curve_id(int nid) return 24; case NID_secp521r1: /* secp521r1 (25) */ return 25; + case NID_brainpoolP256r1: /* brainpoolP256r1 (26) */ + return 26; + case NID_brainpoolP384r1: /* brainpoolP384r1 (27) */ + return 27; + case NID_brainpoolP512r1: /* brainpool512r1 (28) */ + return 28; default: return 0; } } + +/* + * Get curves list, if "sess" is set return client curves otherwise + * preferred list. + * Sets |num_curves| to the number of curves in the list, i.e., + * the length of |pcurves| is 2 * num_curves. + * Returns 1 on success and 0 if the client curves list has invalid format. + * The latter indicates an internal error: we should not be accepting such + * lists in the first place. + * TODO(emilia): we should really be storing the curves list in explicitly + * parsed form instead. (However, this would affect binary compatibility + * so cannot happen in the 1.0.x series.) + */ +static int tls1_get_curvelist(SSL *s, int sess, + const unsigned char **pcurves, + size_t *num_curves) +{ + size_t pcurveslen = 0; + if (sess) { + *pcurves = s->session->tlsext_ellipticcurvelist; + pcurveslen = s->session->tlsext_ellipticcurvelist_length; + } else { + /* For Suite B mode only include P-256, P-384 */ + switch (tls1_suiteb(s)) { + case SSL_CERT_FLAG_SUITEB_128_LOS: + *pcurves = suiteb_curves; + pcurveslen = sizeof(suiteb_curves); + break; + + case SSL_CERT_FLAG_SUITEB_128_LOS_ONLY: + *pcurves = suiteb_curves; + pcurveslen = 2; + break; + + case SSL_CERT_FLAG_SUITEB_192_LOS: + *pcurves = suiteb_curves + 2; + pcurveslen = 2; + break; + default: + *pcurves = s->tlsext_ellipticcurvelist; + pcurveslen = s->tlsext_ellipticcurvelist_length; + } + if (!*pcurves) { +# ifdef OPENSSL_FIPS + if (FIPS_mode()) { + *pcurves = fips_curves_default; + pcurveslen = sizeof(fips_curves_default); + } else +# endif + { + *pcurves = eccurves_default; + pcurveslen = sizeof(eccurves_default); + } + } + } + /* We do not allow odd length arrays to enter the system. */ + if (pcurveslen & 1) { + SSLerr(SSL_F_TLS1_GET_CURVELIST, ERR_R_INTERNAL_ERROR); + *num_curves = 0; + return 0; + } else { + *num_curves = pcurveslen / 2; + return 1; + } +} + +/* Check a curve is one of our preferences */ +int tls1_check_curve(SSL *s, const unsigned char *p, size_t len) +{ + const unsigned char *curves; + size_t num_curves, i; + unsigned int suiteb_flags = tls1_suiteb(s); + if (len != 3 || p[0] != NAMED_CURVE_TYPE) + return 0; + /* Check curve matches Suite B preferences */ + if (suiteb_flags) { + unsigned long cid = s->s3->tmp.new_cipher->id; + if (p[1]) + return 0; + if (cid == TLS1_CK_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256) { + if (p[2] != TLSEXT_curve_P_256) + return 0; + } else if (cid == TLS1_CK_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384) { + if (p[2] != TLSEXT_curve_P_384) + return 0; + } else /* Should never happen */ + return 0; + } + if (!tls1_get_curvelist(s, 0, &curves, &num_curves)) + return 0; + for (i = 0; i < num_curves; i++, curves += 2) { + if (p[1] == curves[0] && p[2] == curves[1]) + return 1; + } + return 0; +} + +/*- + * Return |nmatch|th shared curve or NID_undef if there is no match. + * For nmatch == -1, return number of matches + * For nmatch == -2, return the NID of the curve to use for + * an EC tmp key, or NID_undef if there is no match. + */ +int tls1_shared_curve(SSL *s, int nmatch) +{ + const unsigned char *pref, *supp; + size_t num_pref, num_supp, i, j; + int k; + /* Can't do anything on client side */ + if (s->server == 0) + return -1; + if (nmatch == -2) { + if (tls1_suiteb(s)) { + /* + * For Suite B ciphersuite determines curve: we already know + * these are acceptable due to previous checks. + */ + unsigned long cid = s->s3->tmp.new_cipher->id; + if (cid == TLS1_CK_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256) + return NID_X9_62_prime256v1; /* P-256 */ + if (cid == TLS1_CK_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384) + return NID_secp384r1; /* P-384 */ + /* Should never happen */ + return NID_undef; + } + /* If not Suite B just return first preference shared curve */ + nmatch = 0; + } + /* + * Avoid truncation. tls1_get_curvelist takes an int + * but s->options is a long... + */ + if (!tls1_get_curvelist + (s, (s->options & SSL_OP_CIPHER_SERVER_PREFERENCE) != 0, &supp, + &num_supp)) + /* In practice, NID_undef == 0 but let's be precise. */ + return nmatch == -1 ? 0 : NID_undef; + if (!tls1_get_curvelist + (s, !(s->options & SSL_OP_CIPHER_SERVER_PREFERENCE), &pref, + &num_pref)) + return nmatch == -1 ? 0 : NID_undef; + k = 0; + for (i = 0; i < num_pref; i++, pref += 2) { + const unsigned char *tsupp = supp; + for (j = 0; j < num_supp; j++, tsupp += 2) { + if (pref[0] == tsupp[0] && pref[1] == tsupp[1]) { + if (nmatch == k) { + int id = (pref[0] << 8) | pref[1]; + return tls1_ec_curve_id2nid(id); + } + k++; + } + } + } + if (nmatch == -1) + return k; + /* Out of range (nmatch > k). */ + return NID_undef; +} + +int tls1_set_curves(unsigned char **pext, size_t *pextlen, + int *curves, size_t ncurves) +{ + unsigned char *clist, *p; + size_t i; + /* + * Bitmap of curves included to detect duplicates: only works while curve + * ids < 32 + */ + unsigned long dup_list = 0; +# ifdef OPENSSL_NO_EC2M + EC_GROUP *curve; +# endif + + clist = OPENSSL_malloc(ncurves * 2); + if (!clist) + return 0; + for (i = 0, p = clist; i < ncurves; i++) { + unsigned long idmask; + int id; + id = tls1_ec_nid2curve_id(curves[i]); +# ifdef OPENSSL_FIPS + /* NB: 25 is last curve ID supported by FIPS module */ + if (FIPS_mode() && id > 25) { + OPENSSL_free(clist); + return 0; + } +# endif +# ifdef OPENSSL_NO_EC2M + curve = EC_GROUP_new_by_curve_name(curves[i]); + if (!curve || EC_METHOD_get_field_type(EC_GROUP_method_of(curve)) + == NID_X9_62_characteristic_two_field) { + if (curve) + EC_GROUP_free(curve); + OPENSSL_free(clist); + return 0; + } else + EC_GROUP_free(curve); +# endif + idmask = 1L << id; + if (!id || (dup_list & idmask)) { + OPENSSL_free(clist); + return 0; + } + dup_list |= idmask; + s2n(id, p); + } + if (*pext) + OPENSSL_free(*pext); + *pext = clist; + *pextlen = ncurves * 2; + return 1; +} + +# define MAX_CURVELIST 28 + +typedef struct { + size_t nidcnt; + int nid_arr[MAX_CURVELIST]; +} nid_cb_st; + +static int nid_cb(const char *elem, int len, void *arg) +{ + nid_cb_st *narg = arg; + size_t i; + int nid; + char etmp[20]; + if (elem == NULL) + return 0; + if (narg->nidcnt == MAX_CURVELIST) + return 0; + if (len > (int)(sizeof(etmp) - 1)) + return 0; + memcpy(etmp, elem, len); + etmp[len] = 0; + nid = EC_curve_nist2nid(etmp); + if (nid == NID_undef) + nid = OBJ_sn2nid(etmp); + if (nid == NID_undef) + nid = OBJ_ln2nid(etmp); + if (nid == NID_undef) + return 0; + for (i = 0; i < narg->nidcnt; i++) + if (narg->nid_arr[i] == nid) + return 0; + narg->nid_arr[narg->nidcnt++] = nid; + return 1; +} + +/* Set curves based on a colon separate list */ +int tls1_set_curves_list(unsigned char **pext, size_t *pextlen, + const char *str) +{ + nid_cb_st ncb; + ncb.nidcnt = 0; + if (!CONF_parse_list(str, ':', 1, nid_cb, &ncb)) + return 0; + if (pext == NULL) + return 1; + return tls1_set_curves(pext, pextlen, ncb.nid_arr, ncb.nidcnt); +} + +/* For an EC key set TLS id and required compression based on parameters */ +static int tls1_set_ec_id(unsigned char *curve_id, unsigned char *comp_id, + EC_KEY *ec) +{ + int is_prime, id; + const EC_GROUP *grp; + const EC_METHOD *meth; + if (!ec) + return 0; + /* Determine if it is a prime field */ + grp = EC_KEY_get0_group(ec); + if (!grp) + return 0; + meth = EC_GROUP_method_of(grp); + if (!meth) + return 0; + if (EC_METHOD_get_field_type(meth) == NID_X9_62_prime_field) + is_prime = 1; + else + is_prime = 0; + /* Determine curve ID */ + id = EC_GROUP_get_curve_name(grp); + id = tls1_ec_nid2curve_id(id); + /* If we have an ID set it, otherwise set arbitrary explicit curve */ + if (id) { + curve_id[0] = 0; + curve_id[1] = (unsigned char)id; + } else { + curve_id[0] = 0xff; + if (is_prime) + curve_id[1] = 0x01; + else + curve_id[1] = 0x02; + } + if (comp_id) { + if (EC_KEY_get0_public_key(ec) == NULL) + return 0; + if (EC_KEY_get_conv_form(ec) == POINT_CONVERSION_COMPRESSED) { + if (is_prime) + *comp_id = TLSEXT_ECPOINTFORMAT_ansiX962_compressed_prime; + else + *comp_id = TLSEXT_ECPOINTFORMAT_ansiX962_compressed_char2; + } else + *comp_id = TLSEXT_ECPOINTFORMAT_uncompressed; + } + return 1; +} + +/* Check an EC key is compatible with extensions */ +static int tls1_check_ec_key(SSL *s, + unsigned char *curve_id, unsigned char *comp_id) +{ + const unsigned char *pformats, *pcurves; + size_t num_formats, num_curves, i; + int j; + /* + * If point formats extension present check it, otherwise everything is + * supported (see RFC4492). + */ + if (comp_id && s->session->tlsext_ecpointformatlist) { + pformats = s->session->tlsext_ecpointformatlist; + num_formats = s->session->tlsext_ecpointformatlist_length; + for (i = 0; i < num_formats; i++, pformats++) { + if (*comp_id == *pformats) + break; + } + if (i == num_formats) + return 0; + } + if (!curve_id) + return 1; + /* Check curve is consistent with client and server preferences */ + for (j = 0; j <= 1; j++) { + if (!tls1_get_curvelist(s, j, &pcurves, &num_curves)) + return 0; + for (i = 0; i < num_curves; i++, pcurves += 2) { + if (pcurves[0] == curve_id[0] && pcurves[1] == curve_id[1]) + break; + } + if (i == num_curves) + return 0; + /* For clients can only check sent curve list */ + if (!s->server) + return 1; + } + return 1; +} + +static void tls1_get_formatlist(SSL *s, const unsigned char **pformats, + size_t *num_formats) +{ + /* + * If we have a custom point format list use it otherwise use default + */ + if (s->tlsext_ecpointformatlist) { + *pformats = s->tlsext_ecpointformatlist; + *num_formats = s->tlsext_ecpointformatlist_length; + } else { + *pformats = ecformats_default; + /* For Suite B we don't support char2 fields */ + if (tls1_suiteb(s)) + *num_formats = sizeof(ecformats_default) - 1; + else + *num_formats = sizeof(ecformats_default); + } +} + +/* + * Check cert parameters compatible with extensions: currently just checks EC + * certificates have compatible curves and compression. + */ +static int tls1_check_cert_param(SSL *s, X509 *x, int set_ee_md) +{ + unsigned char comp_id, curve_id[2]; + EVP_PKEY *pkey; + int rv; + pkey = X509_get_pubkey(x); + if (!pkey) + return 0; + /* If not EC nothing to do */ + if (pkey->type != EVP_PKEY_EC) { + EVP_PKEY_free(pkey); + return 1; + } + rv = tls1_set_ec_id(curve_id, &comp_id, pkey->pkey.ec); + EVP_PKEY_free(pkey); + if (!rv) + return 0; + /* + * Can't check curve_id for client certs as we don't have a supported + * curves extension. + */ + rv = tls1_check_ec_key(s, s->server ? curve_id : NULL, &comp_id); + if (!rv) + return 0; + /* + * Special case for suite B. We *MUST* sign using SHA256+P-256 or + * SHA384+P-384, adjust digest if necessary. + */ + if (set_ee_md && tls1_suiteb(s)) { + int check_md; + size_t i; + CERT *c = s->cert; + if (curve_id[0]) + return 0; + /* Check to see we have necessary signing algorithm */ + if (curve_id[1] == TLSEXT_curve_P_256) + check_md = NID_ecdsa_with_SHA256; + else if (curve_id[1] == TLSEXT_curve_P_384) + check_md = NID_ecdsa_with_SHA384; + else + return 0; /* Should never happen */ + for (i = 0; i < c->shared_sigalgslen; i++) + if (check_md == c->shared_sigalgs[i].signandhash_nid) + break; + if (i == c->shared_sigalgslen) + return 0; + if (set_ee_md == 2) { + if (check_md == NID_ecdsa_with_SHA256) + c->pkeys[SSL_PKEY_ECC].digest = EVP_sha256(); + else + c->pkeys[SSL_PKEY_ECC].digest = EVP_sha384(); + } + } + return rv; +} + +# ifndef OPENSSL_NO_ECDH +/* Check EC temporary key is compatible with client extensions */ +int tls1_check_ec_tmp_key(SSL *s, unsigned long cid) +{ + unsigned char curve_id[2]; + EC_KEY *ec = s->cert->ecdh_tmp; +# ifdef OPENSSL_SSL_DEBUG_BROKEN_PROTOCOL + /* Allow any curve: not just those peer supports */ + if (s->cert->cert_flags & SSL_CERT_FLAG_BROKEN_PROTOCOL) + return 1; +# endif + /* + * If Suite B, AES128 MUST use P-256 and AES256 MUST use P-384, no other + * curves permitted. + */ + if (tls1_suiteb(s)) { + /* Curve to check determined by ciphersuite */ + if (cid == TLS1_CK_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256) + curve_id[1] = TLSEXT_curve_P_256; + else if (cid == TLS1_CK_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384) + curve_id[1] = TLSEXT_curve_P_384; + else + return 0; + curve_id[0] = 0; + /* Check this curve is acceptable */ + if (!tls1_check_ec_key(s, curve_id, NULL)) + return 0; + /* If auto or setting curve from callback assume OK */ + if (s->cert->ecdh_tmp_auto || s->cert->ecdh_tmp_cb) + return 1; + /* Otherwise check curve is acceptable */ + else { + unsigned char curve_tmp[2]; + if (!ec) + return 0; + if (!tls1_set_ec_id(curve_tmp, NULL, ec)) + return 0; + if (!curve_tmp[0] || curve_tmp[1] == curve_id[1]) + return 1; + return 0; + } + + } + if (s->cert->ecdh_tmp_auto) { + /* Need a shared curve */ + if (tls1_shared_curve(s, 0)) + return 1; + else + return 0; + } + if (!ec) { + if (s->cert->ecdh_tmp_cb) + return 1; + else + return 0; + } + if (!tls1_set_ec_id(curve_id, NULL, ec)) + return 0; +/* Set this to allow use of invalid curves for testing */ +# if 0 + return 1; +# else + return tls1_check_ec_key(s, curve_id, NULL); +# endif +} +# endif /* OPENSSL_NO_ECDH */ + +#else + +static int tls1_check_cert_param(SSL *s, X509 *x, int set_ee_md) +{ + return 1; +} + #endif /* OPENSSL_NO_EC */ #ifndef OPENSSL_NO_TLSEXT @@ -331,46 +948,257 @@ int tls1_ec_nid2curve_id(int nid) # define tlsext_sigalg_dsa(md) md, TLSEXT_signature_dsa, # endif -# ifdef OPENSSL_NO_ECDSA -# define tlsext_sigalg_ecdsa(md) - /* */ -# else -# define tlsext_sigalg_ecdsa(md) md, TLSEXT_signature_ecdsa, +# ifdef OPENSSL_NO_ECDSA +# define tlsext_sigalg_ecdsa(md) + /* */ +# else +# define tlsext_sigalg_ecdsa(md) md, TLSEXT_signature_ecdsa, +# endif + +# define tlsext_sigalg(md) \ + tlsext_sigalg_rsa(md) \ + tlsext_sigalg_dsa(md) \ + tlsext_sigalg_ecdsa(md) + +static unsigned char tls12_sigalgs[] = { +# ifndef OPENSSL_NO_SHA512 + tlsext_sigalg(TLSEXT_hash_sha512) + tlsext_sigalg(TLSEXT_hash_sha384) +# endif +# ifndef OPENSSL_NO_SHA256 + tlsext_sigalg(TLSEXT_hash_sha256) + tlsext_sigalg(TLSEXT_hash_sha224) +# endif +# ifndef OPENSSL_NO_SHA + tlsext_sigalg(TLSEXT_hash_sha1) +# endif +}; + +# ifndef OPENSSL_NO_ECDSA +static unsigned char suiteb_sigalgs[] = { + tlsext_sigalg_ecdsa(TLSEXT_hash_sha256) + tlsext_sigalg_ecdsa(TLSEXT_hash_sha384) +}; +# endif +size_t tls12_get_psigalgs(SSL *s, const unsigned char **psigs) +{ + /* + * If Suite B mode use Suite B sigalgs only, ignore any other + * preferences. + */ +# ifndef OPENSSL_NO_EC + switch (tls1_suiteb(s)) { + case SSL_CERT_FLAG_SUITEB_128_LOS: + *psigs = suiteb_sigalgs; + return sizeof(suiteb_sigalgs); + + case SSL_CERT_FLAG_SUITEB_128_LOS_ONLY: + *psigs = suiteb_sigalgs; + return 2; + + case SSL_CERT_FLAG_SUITEB_192_LOS: + *psigs = suiteb_sigalgs + 2; + return 2; + } +# endif + /* If server use client authentication sigalgs if not NULL */ + if (s->server && s->cert->client_sigalgs) { + *psigs = s->cert->client_sigalgs; + return s->cert->client_sigalgslen; + } else if (s->cert->conf_sigalgs) { + *psigs = s->cert->conf_sigalgs; + return s->cert->conf_sigalgslen; + } else { + *psigs = tls12_sigalgs; + return sizeof(tls12_sigalgs); + } +} + +/* + * Check signature algorithm is consistent with sent supported signature + * algorithms and if so return relevant digest. + */ +int tls12_check_peer_sigalg(const EVP_MD **pmd, SSL *s, + const unsigned char *sig, EVP_PKEY *pkey) +{ + const unsigned char *sent_sigs; + size_t sent_sigslen, i; + int sigalg = tls12_get_sigid(pkey); + /* Should never happen */ + if (sigalg == -1) + return -1; + /* Check key type is consistent with signature */ + if (sigalg != (int)sig[1]) { + SSLerr(SSL_F_TLS12_CHECK_PEER_SIGALG, SSL_R_WRONG_SIGNATURE_TYPE); + return 0; + } +# ifndef OPENSSL_NO_EC + if (pkey->type == EVP_PKEY_EC) { + unsigned char curve_id[2], comp_id; + /* Check compression and curve matches extensions */ + if (!tls1_set_ec_id(curve_id, &comp_id, pkey->pkey.ec)) + return 0; + if (!s->server && !tls1_check_ec_key(s, curve_id, &comp_id)) { + SSLerr(SSL_F_TLS12_CHECK_PEER_SIGALG, SSL_R_WRONG_CURVE); + return 0; + } + /* If Suite B only P-384+SHA384 or P-256+SHA-256 allowed */ + if (tls1_suiteb(s)) { + if (curve_id[0]) + return 0; + if (curve_id[1] == TLSEXT_curve_P_256) { + if (sig[0] != TLSEXT_hash_sha256) { + SSLerr(SSL_F_TLS12_CHECK_PEER_SIGALG, + SSL_R_ILLEGAL_SUITEB_DIGEST); + return 0; + } + } else if (curve_id[1] == TLSEXT_curve_P_384) { + if (sig[0] != TLSEXT_hash_sha384) { + SSLerr(SSL_F_TLS12_CHECK_PEER_SIGALG, + SSL_R_ILLEGAL_SUITEB_DIGEST); + return 0; + } + } else + return 0; + } + } else if (tls1_suiteb(s)) + return 0; +# endif + + /* Check signature matches a type we sent */ + sent_sigslen = tls12_get_psigalgs(s, &sent_sigs); + for (i = 0; i < sent_sigslen; i += 2, sent_sigs += 2) { + if (sig[0] == sent_sigs[0] && sig[1] == sent_sigs[1]) + break; + } + /* Allow fallback to SHA1 if not strict mode */ + if (i == sent_sigslen + && (sig[0] != TLSEXT_hash_sha1 + || s->cert->cert_flags & SSL_CERT_FLAGS_CHECK_TLS_STRICT)) { + SSLerr(SSL_F_TLS12_CHECK_PEER_SIGALG, SSL_R_WRONG_SIGNATURE_TYPE); + return 0; + } + *pmd = tls12_get_hash(sig[0]); + if (*pmd == NULL) { + SSLerr(SSL_F_TLS12_CHECK_PEER_SIGALG, SSL_R_UNKNOWN_DIGEST); + return 0; + } + /* + * Store the digest used so applications can retrieve it if they wish. + */ + if (s->session && s->session->sess_cert) + s->session->sess_cert->peer_key->digest = *pmd; + return 1; +} + +/* + * Get a mask of disabled algorithms: an algorithm is disabled if it isn't + * supported or doesn't appear in supported signature algorithms. Unlike + * ssl_cipher_get_disabled this applies to a specific session and not global + * settings. + */ +void ssl_set_client_disabled(SSL *s) +{ + CERT *c = s->cert; + const unsigned char *sigalgs; + size_t i, sigalgslen; + int have_rsa = 0, have_dsa = 0, have_ecdsa = 0; + c->mask_a = 0; + c->mask_k = 0; + /* Don't allow TLS 1.2 only ciphers if we don't suppport them */ + if (!SSL_CLIENT_USE_TLS1_2_CIPHERS(s)) + c->mask_ssl = SSL_TLSV1_2; + else + c->mask_ssl = 0; + /* + * Now go through all signature algorithms seeing if we support any for + * RSA, DSA, ECDSA. Do this for all versions not just TLS 1.2. + */ + sigalgslen = tls12_get_psigalgs(s, &sigalgs); + for (i = 0; i < sigalgslen; i += 2, sigalgs += 2) { + switch (sigalgs[1]) { +# ifndef OPENSSL_NO_RSA + case TLSEXT_signature_rsa: + have_rsa = 1; + break; # endif - -# define tlsext_sigalg(md) \ - tlsext_sigalg_rsa(md) \ - tlsext_sigalg_dsa(md) \ - tlsext_sigalg_ecdsa(md) - -static unsigned char tls12_sigalgs[] = { -# ifndef OPENSSL_NO_SHA512 - tlsext_sigalg(TLSEXT_hash_sha512) - tlsext_sigalg(TLSEXT_hash_sha384) +# ifndef OPENSSL_NO_DSA + case TLSEXT_signature_dsa: + have_dsa = 1; + break; # endif -# ifndef OPENSSL_NO_SHA256 - tlsext_sigalg(TLSEXT_hash_sha256) - tlsext_sigalg(TLSEXT_hash_sha224) +# ifndef OPENSSL_NO_ECDSA + case TLSEXT_signature_ecdsa: + have_ecdsa = 1; + break; # endif -# ifndef OPENSSL_NO_SHA - tlsext_sigalg(TLSEXT_hash_sha1) + } + } + /* + * Disable auth and static DH if we don't include any appropriate + * signature algorithms. + */ + if (!have_rsa) { + c->mask_a |= SSL_aRSA; + c->mask_k |= SSL_kDHr | SSL_kECDHr; + } + if (!have_dsa) { + c->mask_a |= SSL_aDSS; + c->mask_k |= SSL_kDHd; + } + if (!have_ecdsa) { + c->mask_a |= SSL_aECDSA; + c->mask_k |= SSL_kECDHe; + } +# ifndef OPENSSL_NO_KRB5 + if (!kssl_tgt_is_available(s->kssl_ctx)) { + c->mask_a |= SSL_aKRB5; + c->mask_k |= SSL_kKRB5; + } # endif -}; - -int tls12_get_req_sig_algs(SSL *s, unsigned char *p) -{ - size_t slen = sizeof(tls12_sigalgs); - if (p) - memcpy(p, tls12_sigalgs, slen); - return (int)slen; +# ifndef OPENSSL_NO_PSK + /* with PSK there must be client callback set */ + if (!s->psk_client_callback) { + c->mask_a |= SSL_aPSK; + c->mask_k |= SSL_kPSK; + } +# endif /* OPENSSL_NO_PSK */ +# ifndef OPENSSL_NO_SRP + if (!(s->srp_ctx.srp_Mask & SSL_kSRP)) { + c->mask_a |= SSL_aSRP; + c->mask_k |= SSL_kSRP; + } +# endif + c->valid = 1; } unsigned char *ssl_add_clienthello_tlsext(SSL *s, unsigned char *buf, - unsigned char *limit) + unsigned char *limit, int *al) { int extdatalen = 0; unsigned char *orig = buf; unsigned char *ret = buf; +# ifndef OPENSSL_NO_EC + /* See if we support any ECC ciphersuites */ + int using_ecc = 0; + if (s->version >= TLS1_VERSION || SSL_IS_DTLS(s)) { + int i; + unsigned long alg_k, alg_a; + STACK_OF(SSL_CIPHER) *cipher_stack = SSL_get_ciphers(s); + + for (i = 0; i < sk_SSL_CIPHER_num(cipher_stack); i++) { + SSL_CIPHER *c = sk_SSL_CIPHER_value(cipher_stack, i); + + alg_k = c->algorithm_mkey; + alg_a = c->algorithm_auth; + if ((alg_k & (SSL_kEECDH | SSL_kECDHr | SSL_kECDHe) + || (alg_a & SSL_aECDSA))) { + using_ecc = 1; + break; + } + } + } +# endif /* don't add extensions for SSLv3 unless doing secure renegotiation */ if (s->client_version == SSL3_VERSION && !s->s3->send_connection_binding) @@ -466,50 +1294,53 @@ unsigned char *ssl_add_clienthello_tlsext(SSL *s, unsigned char *buf, # endif # ifndef OPENSSL_NO_EC - if (s->tlsext_ecpointformatlist != NULL) { + if (using_ecc) { /* * Add TLS extension ECPointFormats to the ClientHello message */ long lenmax; + const unsigned char *pcurves, *pformats; + size_t num_curves, num_formats, curves_list_len; + + tls1_get_formatlist(s, &pformats, &num_formats); if ((lenmax = limit - ret - 5) < 0) return NULL; - if (s->tlsext_ecpointformatlist_length > (unsigned long)lenmax) + if (num_formats > (size_t)lenmax) return NULL; - if (s->tlsext_ecpointformatlist_length > 255) { + if (num_formats > 255) { SSLerr(SSL_F_SSL_ADD_CLIENTHELLO_TLSEXT, ERR_R_INTERNAL_ERROR); return NULL; } s2n(TLSEXT_TYPE_ec_point_formats, ret); - s2n(s->tlsext_ecpointformatlist_length + 1, ret); - *(ret++) = (unsigned char)s->tlsext_ecpointformatlist_length; - memcpy(ret, s->tlsext_ecpointformatlist, - s->tlsext_ecpointformatlist_length); - ret += s->tlsext_ecpointformatlist_length; - } - if (s->tlsext_ellipticcurvelist != NULL) { + /* The point format list has 1-byte length. */ + s2n(num_formats + 1, ret); + *(ret++) = (unsigned char)num_formats; + memcpy(ret, pformats, num_formats); + ret += num_formats; + /* * Add TLS extension EllipticCurves to the ClientHello message */ - long lenmax; + pcurves = s->tlsext_ellipticcurvelist; + if (!tls1_get_curvelist(s, 0, &pcurves, &num_curves)) + return NULL; if ((lenmax = limit - ret - 6) < 0) return NULL; - if (s->tlsext_ellipticcurvelist_length > (unsigned long)lenmax) + if (num_curves > (size_t)lenmax / 2) return NULL; - if (s->tlsext_ellipticcurvelist_length > 65532) { + if (num_curves > 65532 / 2) { SSLerr(SSL_F_SSL_ADD_CLIENTHELLO_TLSEXT, ERR_R_INTERNAL_ERROR); return NULL; } - + curves_list_len = 2 * num_curves; s2n(TLSEXT_TYPE_elliptic_curves, ret); - s2n(s->tlsext_ellipticcurvelist_length + 2, ret); - - s2n(s->tlsext_ellipticcurvelist_length, ret); - memcpy(ret, s->tlsext_ellipticcurvelist, - s->tlsext_ellipticcurvelist_length); - ret += s->tlsext_ellipticcurvelist_length; + s2n(curves_list_len + 2, ret); + s2n(curves_list_len, ret); + memcpy(ret, pcurves, curves_list_len); + ret += curves_list_len; } # endif /* OPENSSL_NO_EC */ @@ -546,17 +1377,20 @@ unsigned char *ssl_add_clienthello_tlsext(SSL *s, unsigned char *buf, } skip_ext: - if (TLS1_get_client_version(s) >= TLS1_2_VERSION) { - if ((size_t)(limit - ret) < sizeof(tls12_sigalgs) + 6) + if (SSL_USE_SIGALGS(s)) { + size_t salglen; + const unsigned char *salg; + salglen = tls12_get_psigalgs(s, &salg); + if ((size_t)(limit - ret) < salglen + 6) return NULL; s2n(TLSEXT_TYPE_signature_algorithms, ret); - s2n(sizeof(tls12_sigalgs) + 2, ret); - s2n(sizeof(tls12_sigalgs), ret); - memcpy(ret, tls12_sigalgs, sizeof(tls12_sigalgs)); - ret += sizeof(tls12_sigalgs); + s2n(salglen + 2, ret); + s2n(salglen, ret); + memcpy(ret, salg, salglen); + ret += salglen; } # ifdef TLSEXT_TYPE_opaque_prf_input - if (s->s3->client_opaque_prf_input != NULL && s->version != DTLS1_VERSION) { + if (s->s3->client_opaque_prf_input != NULL) { size_t col = s->s3->client_opaque_prf_input_len; if ((long)(limit - ret - 6 - col < 0)) @@ -572,8 +1406,7 @@ unsigned char *ssl_add_clienthello_tlsext(SSL *s, unsigned char *buf, } # endif - if (s->tlsext_status_type == TLSEXT_STATUSTYPE_ocsp && - s->version != DTLS1_VERSION) { + if (s->tlsext_status_type == TLSEXT_STATUSTYPE_ocsp) { int i; long extlen, idlen, itmp; OCSP_RESPID *id; @@ -646,6 +1479,15 @@ unsigned char *ssl_add_clienthello_tlsext(SSL *s, unsigned char *buf, } # endif + if (s->alpn_client_proto_list && !s->s3->tmp.finish_md_len) { + if ((size_t)(limit - ret) < 6 + s->alpn_client_proto_list_len) + return NULL; + s2n(TLSEXT_TYPE_application_layer_protocol_negotiation, ret); + s2n(2 + s->alpn_client_proto_list_len, ret); + s2n(s->alpn_client_proto_list_len, ret); + memcpy(ret, s->alpn_client_proto_list, s->alpn_client_proto_list_len); + ret += s->alpn_client_proto_list_len; + } # ifndef OPENSSL_NO_SRTP if (SSL_IS_DTLS(s) && SSL_get_srtp_profiles(s)) { int el; @@ -665,6 +1507,11 @@ unsigned char *ssl_add_clienthello_tlsext(SSL *s, unsigned char *buf, ret += el; } # endif + custom_ext_init(&s->cert->cli_ext); + /* Add custom TLS Extensions to ClientHello */ + if (!custom_ext_add(s, 0, &ret, limit, al)) + return NULL; + /* * Add padding to workaround bugs in F5 terminators. See * https://tools.ietf.org/html/draft-agl-tls-padding-03 NB: because this @@ -702,7 +1549,7 @@ unsigned char *ssl_add_clienthello_tlsext(SSL *s, unsigned char *buf, } unsigned char *ssl_add_serverhello_tlsext(SSL *s, unsigned char *buf, - unsigned char *limit) + unsigned char *limit, int *al) { int extdatalen = 0; unsigned char *orig = buf; @@ -710,7 +1557,13 @@ unsigned char *ssl_add_serverhello_tlsext(SSL *s, unsigned char *buf, # ifndef OPENSSL_NO_NEXTPROTONEG int next_proto_neg_seen; # endif - +# ifndef OPENSSL_NO_EC + unsigned long alg_k = s->s3->tmp.new_cipher->algorithm_mkey; + unsigned long alg_a = s->s3->tmp.new_cipher->algorithm_auth; + int using_ecc = (alg_k & (SSL_kEECDH | SSL_kECDHr | SSL_kECDHe)) + || (alg_a & SSL_aECDSA); + using_ecc = using_ecc && (s->session->tlsext_ecpointformatlist != NULL); +# endif /* * don't add extensions for SSLv3, unless doing secure renegotiation */ @@ -752,27 +1605,30 @@ unsigned char *ssl_add_serverhello_tlsext(SSL *s, unsigned char *buf, ret += el; } # ifndef OPENSSL_NO_EC - if (s->tlsext_ecpointformatlist != NULL) { + if (using_ecc) { + const unsigned char *plist; + size_t plistlen; /* * Add TLS extension ECPointFormats to the ServerHello message */ long lenmax; + tls1_get_formatlist(s, &plist, &plistlen); + if ((lenmax = limit - ret - 5) < 0) return NULL; - if (s->tlsext_ecpointformatlist_length > (unsigned long)lenmax) + if (plistlen > (size_t)lenmax) return NULL; - if (s->tlsext_ecpointformatlist_length > 255) { + if (plistlen > 255) { SSLerr(SSL_F_SSL_ADD_SERVERHELLO_TLSEXT, ERR_R_INTERNAL_ERROR); return NULL; } s2n(TLSEXT_TYPE_ec_point_formats, ret); - s2n(s->tlsext_ecpointformatlist_length + 1, ret); - *(ret++) = (unsigned char)s->tlsext_ecpointformatlist_length; - memcpy(ret, s->tlsext_ecpointformatlist, - s->tlsext_ecpointformatlist_length); - ret += s->tlsext_ecpointformatlist_length; + s2n(plistlen + 1, ret); + *(ret++) = (unsigned char)plistlen; + memcpy(ret, plist, plistlen); + ret += plistlen; } /* @@ -795,7 +1651,7 @@ unsigned char *ssl_add_serverhello_tlsext(SSL *s, unsigned char *buf, s2n(0, ret); } # ifdef TLSEXT_TYPE_opaque_prf_input - if (s->s3->server_opaque_prf_input != NULL && s->version != DTLS1_VERSION) { + if (s->s3->server_opaque_prf_input != NULL) { size_t sol = s->s3->server_opaque_prf_input_len; if ((long)(limit - ret - 6 - sol) < 0) @@ -890,6 +1746,22 @@ unsigned char *ssl_add_serverhello_tlsext(SSL *s, unsigned char *buf, } } # endif + if (!custom_ext_add(s, 1, &ret, limit, al)) + return NULL; + + if (s->s3->alpn_selected) { + const unsigned char *selected = s->s3->alpn_selected; + unsigned len = s->s3->alpn_selected_len; + + if ((long)(limit - ret - 4 - 2 - 1 - len) < 0) + return NULL; + s2n(TLSEXT_TYPE_application_layer_protocol_negotiation, ret); + s2n(3 + len, ret); + s2n(1 + len, ret); + *ret++ = len; + memcpy(ret, selected, len); + ret += len; + } if ((extdatalen = ret - orig - 2) == 0) return orig; @@ -981,15 +1853,82 @@ static void ssl_check_for_safari(SSL *s, const unsigned char *data, } # endif /* !OPENSSL_NO_EC */ -int ssl_parse_clienthello_tlsext(SSL *s, unsigned char **p, unsigned char *d, - int n, int *al) +/* + * tls1_alpn_handle_client_hello is called to process the ALPN extension in a + * ClientHello. data: the contents of the extension, not including the type + * and length. data_len: the number of bytes in |data| al: a pointer to the + * alert value to send in the event of a non-zero return. returns: 0 on + * success. + */ +static int tls1_alpn_handle_client_hello(SSL *s, const unsigned char *data, + unsigned data_len, int *al) +{ + unsigned i; + unsigned proto_len; + const unsigned char *selected; + unsigned char selected_len; + int r; + + if (s->ctx->alpn_select_cb == NULL) + return 0; + + if (data_len < 2) + goto parse_error; + + /* + * data should contain a uint16 length followed by a series of 8-bit, + * length-prefixed strings. + */ + i = ((unsigned)data[0]) << 8 | ((unsigned)data[1]); + data_len -= 2; + data += 2; + if (data_len != i) + goto parse_error; + + if (data_len < 2) + goto parse_error; + + for (i = 0; i < data_len;) { + proto_len = data[i]; + i++; + + if (proto_len == 0) + goto parse_error; + + if (i + proto_len < i || i + proto_len > data_len) + goto parse_error; + + i += proto_len; + } + + r = s->ctx->alpn_select_cb(s, &selected, &selected_len, data, data_len, + s->ctx->alpn_select_cb_arg); + if (r == SSL_TLSEXT_ERR_OK) { + if (s->s3->alpn_selected) + OPENSSL_free(s->s3->alpn_selected); + s->s3->alpn_selected = OPENSSL_malloc(selected_len); + if (!s->s3->alpn_selected) { + *al = SSL_AD_INTERNAL_ERROR; + return -1; + } + memcpy(s->s3->alpn_selected, selected, selected_len); + s->s3->alpn_selected_len = selected_len; + } + return 0; + + parse_error: + *al = SSL_AD_DECODE_ERROR; + return -1; +} + +static int ssl_scan_clienthello_tlsext(SSL *s, unsigned char **p, + unsigned char *d, int n, int *al) { unsigned short type; unsigned short size; unsigned short len; unsigned char *data = *p; int renegotiate_seen = 0; - int sigalg_seen = 0; s->servername_done = 0; s->tlsext_status_type = -1; @@ -997,6 +1936,10 @@ int ssl_parse_clienthello_tlsext(SSL *s, unsigned char **p, unsigned char *d, s->s3->next_proto_neg_seen = 0; # endif + if (s->s3->alpn_selected) { + OPENSSL_free(s->s3->alpn_selected); + s->s3->alpn_selected = NULL; + } # ifndef OPENSSL_NO_HEARTBEATS s->tlsext_heartbeat &= ~(SSL_TLSEXT_HB_ENABLED | SSL_TLSEXT_HB_DONT_SEND_REQUESTS); @@ -1007,6 +1950,11 @@ int ssl_parse_clienthello_tlsext(SSL *s, unsigned char **p, unsigned char *d, ssl_check_for_safari(s, data, d, n); # endif /* !OPENSSL_NO_EC */ + /* Clear any signature algorithms extension received */ + if (s->cert->peer_sigalgs) { + OPENSSL_free(s->cert->peer_sigalgs); + s->cert->peer_sigalgs = NULL; + } # ifndef OPENSSL_NO_SRP if (s->srp_ctx.login != NULL) { OPENSSL_free(s->srp_ctx.login); @@ -1159,7 +2107,8 @@ int ssl_parse_clienthello_tlsext(SSL *s, unsigned char **p, unsigned char *d, unsigned char *sdata = data; int ecpointformatlist_length = *(sdata++); - if (ecpointformatlist_length != size - 1) { + if (ecpointformatlist_length != size - 1 || + ecpointformatlist_length < 1) { *al = TLS1_AD_DECODE_ERROR; return 0; } @@ -1228,8 +2177,7 @@ int ssl_parse_clienthello_tlsext(SSL *s, unsigned char **p, unsigned char *d, } # endif /* OPENSSL_NO_EC */ # ifdef TLSEXT_TYPE_opaque_prf_input - else if (type == TLSEXT_TYPE_opaque_prf_input && - s->version != DTLS1_VERSION) { + else if (type == TLSEXT_TYPE_opaque_prf_input) { unsigned char *sdata = data; if (size < 2) { @@ -1273,23 +2221,21 @@ int ssl_parse_clienthello_tlsext(SSL *s, unsigned char **p, unsigned char *d, renegotiate_seen = 1; } else if (type == TLSEXT_TYPE_signature_algorithms) { int dsize; - if (sigalg_seen || size < 2) { + if (s->cert->peer_sigalgs || size < 2) { *al = SSL_AD_DECODE_ERROR; return 0; } - sigalg_seen = 1; n2s(data, dsize); size -= 2; - if (dsize != size || dsize & 1) { + if (dsize != size || dsize & 1 || !dsize) { *al = SSL_AD_DECODE_ERROR; return 0; } - if (!tls1_process_sigalgs(s, data, dsize)) { + if (!tls1_save_sigalgs(s, data, dsize)) { *al = SSL_AD_DECODE_ERROR; return 0; } - } else if (type == TLSEXT_TYPE_status_request && - s->version != DTLS1_VERSION) { + } else if (type == TLSEXT_TYPE_status_request) { if (size < 5) { *al = SSL_AD_DECODE_ERROR; @@ -1398,7 +2344,8 @@ int ssl_parse_clienthello_tlsext(SSL *s, unsigned char **p, unsigned char *d, # endif # ifndef OPENSSL_NO_NEXTPROTONEG else if (type == TLSEXT_TYPE_next_proto_neg && - s->s3->tmp.finish_md_len == 0) { + s->s3->tmp.finish_md_len == 0 && + s->s3->alpn_selected == NULL) { /*- * We shouldn't accept this extension on a * renegotiation. @@ -1420,6 +2367,16 @@ int ssl_parse_clienthello_tlsext(SSL *s, unsigned char **p, unsigned char *d, } # endif + else if (type == TLSEXT_TYPE_application_layer_protocol_negotiation && + s->ctx->alpn_select_cb && s->s3->tmp.finish_md_len == 0) { + if (tls1_alpn_handle_client_hello(s, data, size, al) != 0) + return 0; +# ifndef OPENSSL_NO_NEXTPROTONEG + /* ALPN takes precedence over NPN. */ + s->s3->next_proto_neg_seen = 0; +# endif + } + /* session ticket processed earlier */ # ifndef OPENSSL_NO_SRTP else if (SSL_IS_DTLS(s) && SSL_get_srtp_profiles(s) @@ -1441,7 +2398,7 @@ int ssl_parse_clienthello_tlsext(SSL *s, unsigned char **p, unsigned char *d, if (!renegotiate_seen && s->renegotiate && !(s->options & SSL_OP_ALLOW_UNSAFE_LEGACY_RENEGOTIATION)) { *al = SSL_AD_HANDSHAKE_FAILURE; - SSLerr(SSL_F_SSL_PARSE_CLIENTHELLO_TLSEXT, + SSLerr(SSL_F_SSL_SCAN_CLIENTHELLO_TLSEXT, SSL_R_UNSAFE_LEGACY_RENEGOTIATION_DISABLED); return 0; } @@ -1449,6 +2406,73 @@ int ssl_parse_clienthello_tlsext(SSL *s, unsigned char **p, unsigned char *d, return 1; } +/* + * Parse any custom extensions found. "data" is the start of the extension data + * and "limit" is the end of the record. TODO: add strict syntax checking. + */ + +static int ssl_scan_clienthello_custom_tlsext(SSL *s, + const unsigned char *data, + const unsigned char *limit, + int *al) +{ + unsigned short type, size, len; + /* If resumed session or no custom extensions nothing to do */ + if (s->hit || s->cert->srv_ext.meths_count == 0) + return 1; + + if (data >= limit - 2) + return 1; + n2s(data, len); + + if (data > limit - len) + return 1; + + while (data <= limit - 4) { + n2s(data, type); + n2s(data, size); + + if (data + size > limit) + return 1; + if (custom_ext_parse(s, 1 /* server */ , type, data, size, al) <= 0) + return 0; + + data += size; + } + + return 1; +} + +int ssl_parse_clienthello_tlsext(SSL *s, unsigned char **p, unsigned char *d, + int n) +{ + int al = -1; + unsigned char *ptmp = *p; + /* + * Internally supported extensions are parsed first so SNI can be handled + * before custom extensions. An application processing SNI will typically + * switch the parent context using SSL_set_SSL_CTX and custom extensions + * need to be handled by the new SSL_CTX structure. + */ + if (ssl_scan_clienthello_tlsext(s, p, d, n, &al) <= 0) { + ssl3_send_alert(s, SSL3_AL_FATAL, al); + return 0; + } + + if (ssl_check_clienthello_tlsext_early(s) <= 0) { + SSLerr(SSL_F_SSL_PARSE_CLIENTHELLO_TLSEXT, SSL_R_CLIENTHELLO_TLSEXT); + return 0; + } + + custom_ext_init(&s->cert->srv_ext); + if (ssl_scan_clienthello_custom_tlsext(s, ptmp, d + n, &al) <= 0) { + ssl3_send_alert(s, SSL3_AL_FATAL, al); + return 0; + } + + return 1; +} + # ifndef OPENSSL_NO_NEXTPROTONEG /* * ssl_next_proto_validate validates a Next Protocol Negotiation block. No @@ -1470,8 +2494,8 @@ static char ssl_next_proto_validate(unsigned char *d, unsigned len) } # endif -int ssl_parse_serverhello_tlsext(SSL *s, unsigned char **p, unsigned char *d, - int n, int *al) +static int ssl_scan_serverhello_tlsext(SSL *s, unsigned char **p, + unsigned char *d, int n, int *al) { unsigned short length; unsigned short type; @@ -1485,6 +2509,10 @@ int ssl_parse_serverhello_tlsext(SSL *s, unsigned char **p, unsigned char *d, # endif s->tlsext_ticket_expected = 0; + if (s->s3->alpn_selected) { + OPENSSL_free(s->s3->alpn_selected); + s->s3->alpn_selected = NULL; + } # ifndef OPENSSL_NO_HEARTBEATS s->tlsext_heartbeat &= ~(SSL_TLSEXT_HB_ENABLED | SSL_TLSEXT_HB_DONT_SEND_REQUESTS); @@ -1521,8 +2549,7 @@ int ssl_parse_serverhello_tlsext(SSL *s, unsigned char **p, unsigned char *d, unsigned char *sdata = data; int ecpointformatlist_length = *(sdata++); - if (ecpointformatlist_length != size - 1 || - ecpointformatlist_length < 1) { + if (ecpointformatlist_length != size - 1) { *al = TLS1_AD_DECODE_ERROR; return 0; } @@ -1567,8 +2594,7 @@ int ssl_parse_serverhello_tlsext(SSL *s, unsigned char **p, unsigned char *d, s->tlsext_ticket_expected = 1; } # ifdef TLSEXT_TYPE_opaque_prf_input - else if (type == TLSEXT_TYPE_opaque_prf_input && - s->version != DTLS1_VERSION) { + else if (type == TLSEXT_TYPE_opaque_prf_input) { unsigned char *sdata = data; if (size < 2) { @@ -1599,8 +2625,7 @@ int ssl_parse_serverhello_tlsext(SSL *s, unsigned char **p, unsigned char *d, } } # endif - else if (type == TLSEXT_TYPE_status_request && - s->version != DTLS1_VERSION) { + else if (type == TLSEXT_TYPE_status_request) { /* * MUST be empty and only sent if we've requested a status * request message. @@ -1646,6 +2671,48 @@ int ssl_parse_serverhello_tlsext(SSL *s, unsigned char **p, unsigned char *d, s->s3->next_proto_neg_seen = 1; } # endif + + else if (type == TLSEXT_TYPE_application_layer_protocol_negotiation) { + unsigned len; + + /* We must have requested it. */ + if (s->alpn_client_proto_list == NULL) { + *al = TLS1_AD_UNSUPPORTED_EXTENSION; + return 0; + } + if (size < 4) { + *al = TLS1_AD_DECODE_ERROR; + return 0; + } + /*- + * The extension data consists of: + * uint16 list_length + * uint8 proto_length; + * uint8 proto[proto_length]; + */ + len = data[0]; + len <<= 8; + len |= data[1]; + if (len != (unsigned)size - 2) { + *al = TLS1_AD_DECODE_ERROR; + return 0; + } + len = data[2]; + if (len != (unsigned)size - 3) { + *al = TLS1_AD_DECODE_ERROR; + return 0; + } + if (s->s3->alpn_selected) + OPENSSL_free(s->s3->alpn_selected); + s->s3->alpn_selected = OPENSSL_malloc(len); + if (!s->s3->alpn_selected) { + *al = TLS1_AD_INTERNAL_ERROR; + return 0; + } + memcpy(s->s3->alpn_selected, data + 3, len); + s->s3->alpn_selected_len = len; + } + else if (type == TLSEXT_TYPE_renegotiate) { if (!ssl_parse_serverhello_renegotiate_ext(s, data, size, al)) return 0; @@ -1673,6 +2740,12 @@ int ssl_parse_serverhello_tlsext(SSL *s, unsigned char **p, unsigned char *d, return 0; } # endif + /* + * If this extension type was not otherwise handled, but matches a + * custom_cli_ext_record, then send it to the c callback + */ + else if (custom_ext_parse(s, 0, type, data, size, al) <= 0) + return 0; data += size; } @@ -1712,7 +2785,7 @@ int ssl_parse_serverhello_tlsext(SSL *s, unsigned char **p, unsigned char *d, if (!renegotiate_seen && !(s->options & SSL_OP_LEGACY_SERVER_CONNECT) && !(s->options & SSL_OP_ALLOW_UNSAFE_LEGACY_RENEGOTIATION)) { *al = SSL_AD_HANDSHAKE_FAILURE; - SSLerr(SSL_F_SSL_PARSE_SERVERHELLO_TLSEXT, + SSLerr(SSL_F_SSL_SCAN_SERVERHELLO_TLSEXT, SSL_R_UNSAFE_LEGACY_RENEGOTIATION_DISABLED); return 0; } @@ -1722,63 +2795,6 @@ int ssl_parse_serverhello_tlsext(SSL *s, unsigned char **p, unsigned char *d, int ssl_prepare_clienthello_tlsext(SSL *s) { -# ifndef OPENSSL_NO_EC - /* - * If we are client and using an elliptic curve cryptography cipher - * suite, send the point formats and elliptic curves we support. - */ - int using_ecc = 0; - int i; - unsigned char *j; - unsigned long alg_k, alg_a; - STACK_OF(SSL_CIPHER) *cipher_stack = SSL_get_ciphers(s); - - for (i = 0; i < sk_SSL_CIPHER_num(cipher_stack); i++) { - SSL_CIPHER *c = sk_SSL_CIPHER_value(cipher_stack, i); - - alg_k = c->algorithm_mkey; - alg_a = c->algorithm_auth; - if ((alg_k & (SSL_kEECDH | SSL_kECDHr | SSL_kECDHe) - || (alg_a & SSL_aECDSA))) { - using_ecc = 1; - break; - } - } - using_ecc = using_ecc && (s->version >= TLS1_VERSION); - if (using_ecc) { - if (s->tlsext_ecpointformatlist != NULL) - OPENSSL_free(s->tlsext_ecpointformatlist); - if ((s->tlsext_ecpointformatlist = OPENSSL_malloc(3)) == NULL) { - SSLerr(SSL_F_SSL_PREPARE_CLIENTHELLO_TLSEXT, - ERR_R_MALLOC_FAILURE); - return -1; - } - s->tlsext_ecpointformatlist_length = 3; - s->tlsext_ecpointformatlist[0] = TLSEXT_ECPOINTFORMAT_uncompressed; - s->tlsext_ecpointformatlist[1] = - TLSEXT_ECPOINTFORMAT_ansiX962_compressed_prime; - s->tlsext_ecpointformatlist[2] = - TLSEXT_ECPOINTFORMAT_ansiX962_compressed_char2; - - /* we support all named elliptic curves in RFC 4492 */ - if (s->tlsext_ellipticcurvelist != NULL) - OPENSSL_free(s->tlsext_ellipticcurvelist); - s->tlsext_ellipticcurvelist_length = - sizeof(pref_list) / sizeof(pref_list[0]) * 2; - if ((s->tlsext_ellipticcurvelist = - OPENSSL_malloc(s->tlsext_ellipticcurvelist_length)) == NULL) { - s->tlsext_ellipticcurvelist_length = 0; - SSLerr(SSL_F_SSL_PREPARE_CLIENTHELLO_TLSEXT, - ERR_R_MALLOC_FAILURE); - return -1; - } - for (i = 0, j = s->tlsext_ellipticcurvelist; (unsigned int)i < - sizeof(pref_list) / sizeof(pref_list[0]); i++) { - int id = tls1_ec_nid2curve_id(pref_list[i]); - s2n(id, j); - } - } -# endif /* OPENSSL_NO_EC */ # ifdef TLSEXT_TYPE_opaque_prf_input { @@ -1830,40 +2846,10 @@ int ssl_prepare_clienthello_tlsext(SSL *s) int ssl_prepare_serverhello_tlsext(SSL *s) { -# ifndef OPENSSL_NO_EC - /* - * If we are server and using an ECC cipher suite, send the point formats - * we support if the client sent us an ECPointsFormat extension. Note - * that the server is not supposed to send an EllipticCurves extension. - */ - - unsigned long alg_k = s->s3->tmp.new_cipher->algorithm_mkey; - unsigned long alg_a = s->s3->tmp.new_cipher->algorithm_auth; - int using_ecc = (alg_k & (SSL_kEECDH | SSL_kECDHr | SSL_kECDHe)) - || (alg_a & SSL_aECDSA); - using_ecc = using_ecc && (s->session->tlsext_ecpointformatlist != NULL); - - if (using_ecc) { - if (s->tlsext_ecpointformatlist != NULL) - OPENSSL_free(s->tlsext_ecpointformatlist); - if ((s->tlsext_ecpointformatlist = OPENSSL_malloc(3)) == NULL) { - SSLerr(SSL_F_SSL_PREPARE_SERVERHELLO_TLSEXT, - ERR_R_MALLOC_FAILURE); - return -1; - } - s->tlsext_ecpointformatlist_length = 3; - s->tlsext_ecpointformatlist[0] = TLSEXT_ECPOINTFORMAT_uncompressed; - s->tlsext_ecpointformatlist[1] = - TLSEXT_ECPOINTFORMAT_ansiX962_compressed_prime; - s->tlsext_ecpointformatlist[2] = - TLSEXT_ECPOINTFORMAT_ansiX962_compressed_char2; - } -# endif /* OPENSSL_NO_EC */ - return 1; } -int ssl_check_clienthello_tlsext_early(SSL *s) +static int ssl_check_clienthello_tlsext_early(SSL *s) { int ret = SSL_TLSEXT_ERR_NOACK; int al = SSL_AD_UNRECOGNIZED_NAME; @@ -1973,6 +2959,44 @@ int ssl_check_clienthello_tlsext_early(SSL *s) } } +int tls1_set_server_sigalgs(SSL *s) +{ + int al; + size_t i; + /* Clear any shared sigtnature algorithms */ + if (s->cert->shared_sigalgs) { + OPENSSL_free(s->cert->shared_sigalgs); + s->cert->shared_sigalgs = NULL; + s->cert->shared_sigalgslen = 0; + } + /* Clear certificate digests and validity flags */ + for (i = 0; i < SSL_PKEY_NUM; i++) { + s->cert->pkeys[i].digest = NULL; + s->cert->pkeys[i].valid_flags = 0; + } + + /* If sigalgs received process it. */ + if (s->cert->peer_sigalgs) { + if (!tls1_process_sigalgs(s)) { + SSLerr(SSL_F_TLS1_SET_SERVER_SIGALGS, ERR_R_MALLOC_FAILURE); + al = SSL_AD_INTERNAL_ERROR; + goto err; + } + /* Fatal error is no shared signature algorithms */ + if (!s->cert->shared_sigalgs) { + SSLerr(SSL_F_TLS1_SET_SERVER_SIGALGS, + SSL_R_NO_SHARED_SIGATURE_ALGORITHMS); + al = SSL_AD_ILLEGAL_PARAMETER; + goto err; + } + } else + ssl_cert_set_default_md(s->cert); + return 1; + err: + ssl3_send_alert(s, SSL3_AL_FATAL, al); + return 0; +} + int ssl_check_clienthello_tlsext_late(SSL *s) { int ret = SSL_TLSEXT_ERR_OK; @@ -1980,9 +3004,9 @@ int ssl_check_clienthello_tlsext_late(SSL *s) /* * If status request then ask callback what to do. Note: this must be - * called after servername callbacks in case the certificate has - * changed, and must be called after the cipher has been chosen because - * this may influence which certificate is sent + * called after servername callbacks in case the certificate has changed, + * and must be called after the cipher has been chosen because this may + * influence which certificate is sent */ if ((s->tlsext_status_type != -1) && s->ctx && s->ctx->tlsext_status_cb) { int r; @@ -2154,6 +3178,24 @@ int ssl_check_serverhello_tlsext(SSL *s) } } +int ssl_parse_serverhello_tlsext(SSL *s, unsigned char **p, unsigned char *d, + int n) +{ + int al = -1; + if (s->version < SSL3_VERSION) + return 1; + if (ssl_scan_serverhello_tlsext(s, p, d, n, &al) <= 0) { + ssl3_send_alert(s, SSL3_AL_FATAL, al); + return 0; + } + + if (ssl_check_serverhello_tlsext(s) <= 0) { + SSLerr(SSL_F_SSL_PARSE_SERVERHELLO_TLSEXT, SSL_R_SERVERHELLO_TLSEXT); + return 0; + } + return 1; +} + /*- * Since the server cache lookup is done early on in the processing of the * ClientHello, and other operations depend on the result, we need to handle @@ -2209,7 +3251,7 @@ int tls1_process_ticket(SSL *s, unsigned char *session_id, int len, if (p >= limit) return -1; /* Skip past DTLS cookie */ - if (s->version == DTLS1_VERSION || s->version == DTLS1_BAD_VER) { + if (SSL_IS_DTLS(s)) { i = *(p++); p += i; if (p >= limit) @@ -2396,32 +3438,18 @@ typedef struct { } tls12_lookup; static tls12_lookup tls12_md[] = { -# ifndef OPENSSL_NO_MD5 {NID_md5, TLSEXT_hash_md5}, -# endif -# ifndef OPENSSL_NO_SHA {NID_sha1, TLSEXT_hash_sha1}, -# endif -# ifndef OPENSSL_NO_SHA256 {NID_sha224, TLSEXT_hash_sha224}, {NID_sha256, TLSEXT_hash_sha256}, -# endif -# ifndef OPENSSL_NO_SHA512 {NID_sha384, TLSEXT_hash_sha384}, {NID_sha512, TLSEXT_hash_sha512} -# endif }; static tls12_lookup tls12_sig[] = { -# ifndef OPENSSL_NO_RSA {EVP_PKEY_RSA, TLSEXT_signature_rsa}, -# endif -# ifndef OPENSSL_NO_DSA {EVP_PKEY_DSA, TLSEXT_signature_dsa}, -# endif -# ifndef OPENSSL_NO_ECDSA {EVP_PKEY_EC, TLSEXT_signature_ecdsa} -# endif }; static int tls12_find_id(int nid, tls12_lookup *table, size_t tlen) @@ -2434,17 +3462,15 @@ static int tls12_find_id(int nid, tls12_lookup *table, size_t tlen) return -1; } -# if 0 static int tls12_find_nid(int id, tls12_lookup *table, size_t tlen) { size_t i; for (i = 0; i < tlen; i++) { - if (table[i].id == id) + if ((table[i].id) == id) return table[i].nid; } - return -1; + return NID_undef; } -# endif int tls12_get_sigandhash(unsigned char *p, const EVP_PKEY *pk, const EVP_MD *md) @@ -2473,6 +3499,14 @@ int tls12_get_sigid(const EVP_PKEY *pk) const EVP_MD *tls12_get_hash(unsigned char hash_alg) { switch (hash_alg) { +# ifndef OPENSSL_NO_MD5 + case TLSEXT_hash_md5: +# ifdef OPENSSL_FIPS + if (FIPS_mode()) + return NULL; +# endif + return EVP_md5(); +# endif # ifndef OPENSSL_NO_SHA case TLSEXT_hash_sha1: return EVP_sha1(); @@ -2497,83 +3531,274 @@ const EVP_MD *tls12_get_hash(unsigned char hash_alg) } } +static int tls12_get_pkey_idx(unsigned char sig_alg) +{ + switch (sig_alg) { +# ifndef OPENSSL_NO_RSA + case TLSEXT_signature_rsa: + return SSL_PKEY_RSA_SIGN; +# endif +# ifndef OPENSSL_NO_DSA + case TLSEXT_signature_dsa: + return SSL_PKEY_DSA_SIGN; +# endif +# ifndef OPENSSL_NO_ECDSA + case TLSEXT_signature_ecdsa: + return SSL_PKEY_ECC; +# endif + } + return -1; +} + +/* Convert TLS 1.2 signature algorithm extension values into NIDs */ +static void tls1_lookup_sigalg(int *phash_nid, int *psign_nid, + int *psignhash_nid, const unsigned char *data) +{ + int sign_nid = 0, hash_nid = 0; + if (!phash_nid && !psign_nid && !psignhash_nid) + return; + if (phash_nid || psignhash_nid) { + hash_nid = tls12_find_nid(data[0], tls12_md, + sizeof(tls12_md) / sizeof(tls12_lookup)); + if (phash_nid) + *phash_nid = hash_nid; + } + if (psign_nid || psignhash_nid) { + sign_nid = tls12_find_nid(data[1], tls12_sig, + sizeof(tls12_sig) / sizeof(tls12_lookup)); + if (psign_nid) + *psign_nid = sign_nid; + } + if (psignhash_nid) { + if (sign_nid && hash_nid) + OBJ_find_sigid_by_algs(psignhash_nid, hash_nid, sign_nid); + else + *psignhash_nid = NID_undef; + } +} + +/* Given preference and allowed sigalgs set shared sigalgs */ +static int tls12_do_shared_sigalgs(TLS_SIGALGS *shsig, + const unsigned char *pref, size_t preflen, + const unsigned char *allow, + size_t allowlen) +{ + const unsigned char *ptmp, *atmp; + size_t i, j, nmatch = 0; + for (i = 0, ptmp = pref; i < preflen; i += 2, ptmp += 2) { + /* Skip disabled hashes or signature algorithms */ + if (tls12_get_hash(ptmp[0]) == NULL) + continue; + if (tls12_get_pkey_idx(ptmp[1]) == -1) + continue; + for (j = 0, atmp = allow; j < allowlen; j += 2, atmp += 2) { + if (ptmp[0] == atmp[0] && ptmp[1] == atmp[1]) { + nmatch++; + if (shsig) { + shsig->rhash = ptmp[0]; + shsig->rsign = ptmp[1]; + tls1_lookup_sigalg(&shsig->hash_nid, + &shsig->sign_nid, + &shsig->signandhash_nid, ptmp); + shsig++; + } + break; + } + } + } + return nmatch; +} + +/* Set shared signature algorithms for SSL structures */ +static int tls1_set_shared_sigalgs(SSL *s) +{ + const unsigned char *pref, *allow, *conf; + size_t preflen, allowlen, conflen; + size_t nmatch; + TLS_SIGALGS *salgs = NULL; + CERT *c = s->cert; + unsigned int is_suiteb = tls1_suiteb(s); + if (c->shared_sigalgs) { + OPENSSL_free(c->shared_sigalgs); + c->shared_sigalgs = NULL; + c->shared_sigalgslen = 0; + } + /* If client use client signature algorithms if not NULL */ + if (!s->server && c->client_sigalgs && !is_suiteb) { + conf = c->client_sigalgs; + conflen = c->client_sigalgslen; + } else if (c->conf_sigalgs && !is_suiteb) { + conf = c->conf_sigalgs; + conflen = c->conf_sigalgslen; + } else + conflen = tls12_get_psigalgs(s, &conf); + if (s->options & SSL_OP_CIPHER_SERVER_PREFERENCE || is_suiteb) { + pref = conf; + preflen = conflen; + allow = c->peer_sigalgs; + allowlen = c->peer_sigalgslen; + } else { + allow = conf; + allowlen = conflen; + pref = c->peer_sigalgs; + preflen = c->peer_sigalgslen; + } + nmatch = tls12_do_shared_sigalgs(NULL, pref, preflen, allow, allowlen); + if (nmatch) { + salgs = OPENSSL_malloc(nmatch * sizeof(TLS_SIGALGS)); + if (!salgs) + return 0; + nmatch = tls12_do_shared_sigalgs(salgs, pref, preflen, allow, allowlen); + } else { + salgs = NULL; + } + c->shared_sigalgs = salgs; + c->shared_sigalgslen = nmatch; + return 1; +} + /* Set preferred digest for each key type */ -int tls1_process_sigalgs(SSL *s, const unsigned char *data, int dsize) +int tls1_save_sigalgs(SSL *s, const unsigned char *data, int dsize) { - int i, idx; - const EVP_MD *md; CERT *c = s->cert; - /* Extension ignored for TLS versions below 1.2 */ - if (TLS1_get_version(s) < TLS1_2_VERSION) + /* Extension ignored for inappropriate versions */ + if (!SSL_USE_SIGALGS(s)) return 1; /* Should never happen */ if (!c) return 0; - c->pkeys[SSL_PKEY_DSA_SIGN].digest = NULL; - c->pkeys[SSL_PKEY_RSA_SIGN].digest = NULL; - c->pkeys[SSL_PKEY_RSA_ENC].digest = NULL; - c->pkeys[SSL_PKEY_ECC].digest = NULL; + if (c->peer_sigalgs) + OPENSSL_free(c->peer_sigalgs); + c->peer_sigalgs = OPENSSL_malloc(dsize); + if (!c->peer_sigalgs) + return 0; + c->peer_sigalgslen = dsize; + memcpy(c->peer_sigalgs, data, dsize); + return 1; +} - for (i = 0; i < dsize; i += 2) { - unsigned char hash_alg = data[i], sig_alg = data[i + 1]; +int tls1_process_sigalgs(SSL *s) +{ + int idx; + size_t i; + const EVP_MD *md; + CERT *c = s->cert; + TLS_SIGALGS *sigptr; + if (!tls1_set_shared_sigalgs(s)) + return 0; - switch (sig_alg) { -# ifndef OPENSSL_NO_RSA - case TLSEXT_signature_rsa: - idx = SSL_PKEY_RSA_SIGN; - break; -# endif -# ifndef OPENSSL_NO_DSA - case TLSEXT_signature_dsa: - idx = SSL_PKEY_DSA_SIGN; - break; -# endif -# ifndef OPENSSL_NO_ECDSA - case TLSEXT_signature_ecdsa: - idx = SSL_PKEY_ECC; - break; -# endif - default: - continue; +# ifdef OPENSSL_SSL_DEBUG_BROKEN_PROTOCOL + if (s->cert->cert_flags & SSL_CERT_FLAG_BROKEN_PROTOCOL) { + /* + * Use first set signature preference to force message digest, + * ignoring any peer preferences. + */ + const unsigned char *sigs = NULL; + if (s->server) + sigs = c->conf_sigalgs; + else + sigs = c->client_sigalgs; + if (sigs) { + idx = tls12_get_pkey_idx(sigs[1]); + md = tls12_get_hash(sigs[0]); + c->pkeys[idx].digest = md; + c->pkeys[idx].valid_flags = CERT_PKEY_EXPLICIT_SIGN; + if (idx == SSL_PKEY_RSA_SIGN) { + c->pkeys[SSL_PKEY_RSA_ENC].valid_flags = + CERT_PKEY_EXPLICIT_SIGN; + c->pkeys[SSL_PKEY_RSA_ENC].digest = md; + } } + } +# endif - if (c->pkeys[idx].digest == NULL) { - md = tls12_get_hash(hash_alg); - if (md) { - c->pkeys[idx].digest = md; - if (idx == SSL_PKEY_RSA_SIGN) - c->pkeys[SSL_PKEY_RSA_ENC].digest = md; + for (i = 0, sigptr = c->shared_sigalgs; + i < c->shared_sigalgslen; i++, sigptr++) { + idx = tls12_get_pkey_idx(sigptr->rsign); + if (idx > 0 && c->pkeys[idx].digest == NULL) { + md = tls12_get_hash(sigptr->rhash); + c->pkeys[idx].digest = md; + c->pkeys[idx].valid_flags = CERT_PKEY_EXPLICIT_SIGN; + if (idx == SSL_PKEY_RSA_SIGN) { + c->pkeys[SSL_PKEY_RSA_ENC].valid_flags = + CERT_PKEY_EXPLICIT_SIGN; + c->pkeys[SSL_PKEY_RSA_ENC].digest = md; } } } - /* - * Set any remaining keys to default values. NOTE: if alg is not - * supported it stays as NULL. + * In strict mode leave unset digests as NULL to indicate we can't use + * the certificate for signing. */ + if (!(s->cert->cert_flags & SSL_CERT_FLAGS_CHECK_TLS_STRICT)) { + /* + * Set any remaining keys to default values. NOTE: if alg is not + * supported it stays as NULL. + */ # ifndef OPENSSL_NO_DSA - if (!c->pkeys[SSL_PKEY_DSA_SIGN].digest) - c->pkeys[SSL_PKEY_DSA_SIGN].digest = EVP_sha1(); + if (!c->pkeys[SSL_PKEY_DSA_SIGN].digest) + c->pkeys[SSL_PKEY_DSA_SIGN].digest = EVP_sha1(); # endif # ifndef OPENSSL_NO_RSA - if (!c->pkeys[SSL_PKEY_RSA_SIGN].digest) { - c->pkeys[SSL_PKEY_RSA_SIGN].digest = EVP_sha1(); - c->pkeys[SSL_PKEY_RSA_ENC].digest = EVP_sha1(); - } + if (!c->pkeys[SSL_PKEY_RSA_SIGN].digest) { + c->pkeys[SSL_PKEY_RSA_SIGN].digest = EVP_sha1(); + c->pkeys[SSL_PKEY_RSA_ENC].digest = EVP_sha1(); + } # endif # ifndef OPENSSL_NO_ECDSA - if (!c->pkeys[SSL_PKEY_ECC].digest) - c->pkeys[SSL_PKEY_ECC].digest = EVP_sha1(); + if (!c->pkeys[SSL_PKEY_ECC].digest) + c->pkeys[SSL_PKEY_ECC].digest = EVP_sha1(); # endif + } return 1; } -#endif +int SSL_get_sigalgs(SSL *s, int idx, + int *psign, int *phash, int *psignhash, + unsigned char *rsig, unsigned char *rhash) +{ + const unsigned char *psig = s->cert->peer_sigalgs; + if (psig == NULL) + return 0; + if (idx >= 0) { + idx <<= 1; + if (idx >= (int)s->cert->peer_sigalgslen) + return 0; + psig += idx; + if (rhash) + *rhash = psig[0]; + if (rsig) + *rsig = psig[1]; + tls1_lookup_sigalg(phash, psign, psignhash, psig); + } + return s->cert->peer_sigalgslen / 2; +} + +int SSL_get_shared_sigalgs(SSL *s, int idx, + int *psign, int *phash, int *psignhash, + unsigned char *rsig, unsigned char *rhash) +{ + TLS_SIGALGS *shsigalgs = s->cert->shared_sigalgs; + if (!shsigalgs || idx >= (int)s->cert->shared_sigalgslen) + return 0; + shsigalgs += idx; + if (phash) + *phash = shsigalgs->hash_nid; + if (psign) + *psign = shsigalgs->sign_nid; + if (psignhash) + *psignhash = shsigalgs->signandhash_nid; + if (rsig) + *rsig = shsigalgs->rsign; + if (rhash) + *rhash = shsigalgs->rhash; + return s->cert->shared_sigalgslen; +} -#ifndef OPENSSL_NO_HEARTBEATS +# ifndef OPENSSL_NO_HEARTBEATS int tls1_process_heartbeat(SSL *s) { unsigned char *p = &s->s3->rrec.data[0], *pl; @@ -2714,4 +3939,426 @@ int tls1_heartbeat(SSL *s) return ret; } +# endif + +# define MAX_SIGALGLEN (TLSEXT_hash_num * TLSEXT_signature_num * 2) + +typedef struct { + size_t sigalgcnt; + int sigalgs[MAX_SIGALGLEN]; +} sig_cb_st; + +static int sig_cb(const char *elem, int len, void *arg) +{ + sig_cb_st *sarg = arg; + size_t i; + char etmp[20], *p; + int sig_alg, hash_alg; + if (elem == NULL) + return 0; + if (sarg->sigalgcnt == MAX_SIGALGLEN) + return 0; + if (len > (int)(sizeof(etmp) - 1)) + return 0; + memcpy(etmp, elem, len); + etmp[len] = 0; + p = strchr(etmp, '+'); + if (!p) + return 0; + *p = 0; + p++; + if (!*p) + return 0; + + if (!strcmp(etmp, "RSA")) + sig_alg = EVP_PKEY_RSA; + else if (!strcmp(etmp, "DSA")) + sig_alg = EVP_PKEY_DSA; + else if (!strcmp(etmp, "ECDSA")) + sig_alg = EVP_PKEY_EC; + else + return 0; + + hash_alg = OBJ_sn2nid(p); + if (hash_alg == NID_undef) + hash_alg = OBJ_ln2nid(p); + if (hash_alg == NID_undef) + return 0; + + for (i = 0; i < sarg->sigalgcnt; i += 2) { + if (sarg->sigalgs[i] == sig_alg && sarg->sigalgs[i + 1] == hash_alg) + return 0; + } + sarg->sigalgs[sarg->sigalgcnt++] = hash_alg; + sarg->sigalgs[sarg->sigalgcnt++] = sig_alg; + return 1; +} + +/* + * Set suppored signature algorithms based on a colon separated list of the + * form sig+hash e.g. RSA+SHA512:DSA+SHA512 + */ +int tls1_set_sigalgs_list(CERT *c, const char *str, int client) +{ + sig_cb_st sig; + sig.sigalgcnt = 0; + if (!CONF_parse_list(str, ':', 1, sig_cb, &sig)) + return 0; + if (c == NULL) + return 1; + return tls1_set_sigalgs(c, sig.sigalgs, sig.sigalgcnt, client); +} + +int tls1_set_sigalgs(CERT *c, const int *psig_nids, size_t salglen, + int client) +{ + unsigned char *sigalgs, *sptr; + int rhash, rsign; + size_t i; + if (salglen & 1) + return 0; + sigalgs = OPENSSL_malloc(salglen); + if (sigalgs == NULL) + return 0; + for (i = 0, sptr = sigalgs; i < salglen; i += 2) { + rhash = tls12_find_id(*psig_nids++, tls12_md, + sizeof(tls12_md) / sizeof(tls12_lookup)); + rsign = tls12_find_id(*psig_nids++, tls12_sig, + sizeof(tls12_sig) / sizeof(tls12_lookup)); + + if (rhash == -1 || rsign == -1) + goto err; + *sptr++ = rhash; + *sptr++ = rsign; + } + + if (client) { + if (c->client_sigalgs) + OPENSSL_free(c->client_sigalgs); + c->client_sigalgs = sigalgs; + c->client_sigalgslen = salglen; + } else { + if (c->conf_sigalgs) + OPENSSL_free(c->conf_sigalgs); + c->conf_sigalgs = sigalgs; + c->conf_sigalgslen = salglen; + } + + return 1; + + err: + OPENSSL_free(sigalgs); + return 0; +} + +static int tls1_check_sig_alg(CERT *c, X509 *x, int default_nid) +{ + int sig_nid; + size_t i; + if (default_nid == -1) + return 1; + sig_nid = X509_get_signature_nid(x); + if (default_nid) + return sig_nid == default_nid ? 1 : 0; + for (i = 0; i < c->shared_sigalgslen; i++) + if (sig_nid == c->shared_sigalgs[i].signandhash_nid) + return 1; + return 0; +} + +/* Check to see if a certificate issuer name matches list of CA names */ +static int ssl_check_ca_name(STACK_OF(X509_NAME) *names, X509 *x) +{ + X509_NAME *nm; + int i; + nm = X509_get_issuer_name(x); + for (i = 0; i < sk_X509_NAME_num(names); i++) { + if (!X509_NAME_cmp(nm, sk_X509_NAME_value(names, i))) + return 1; + } + return 0; +} + +/* + * Check certificate chain is consistent with TLS extensions and is usable by + * server. This servers two purposes: it allows users to check chains before + * passing them to the server and it allows the server to check chains before + * attempting to use them. + */ + +/* Flags which need to be set for a certificate when stict mode not set */ + +# define CERT_PKEY_VALID_FLAGS \ + (CERT_PKEY_EE_SIGNATURE|CERT_PKEY_EE_PARAM) +/* Strict mode flags */ +# define CERT_PKEY_STRICT_FLAGS \ + (CERT_PKEY_VALID_FLAGS|CERT_PKEY_CA_SIGNATURE|CERT_PKEY_CA_PARAM \ + | CERT_PKEY_ISSUER_NAME|CERT_PKEY_CERT_TYPE) + +int tls1_check_chain(SSL *s, X509 *x, EVP_PKEY *pk, STACK_OF(X509) *chain, + int idx) +{ + int i; + int rv = 0; + int check_flags = 0, strict_mode; + CERT_PKEY *cpk = NULL; + CERT *c = s->cert; + unsigned int suiteb_flags = tls1_suiteb(s); + /* idx == -1 means checking server chains */ + if (idx != -1) { + /* idx == -2 means checking client certificate chains */ + if (idx == -2) { + cpk = c->key; + idx = cpk - c->pkeys; + } else + cpk = c->pkeys + idx; + x = cpk->x509; + pk = cpk->privatekey; + chain = cpk->chain; + strict_mode = c->cert_flags & SSL_CERT_FLAGS_CHECK_TLS_STRICT; + /* If no cert or key, forget it */ + if (!x || !pk) + goto end; +# ifdef OPENSSL_SSL_DEBUG_BROKEN_PROTOCOL + /* Allow any certificate to pass test */ + if (s->cert->cert_flags & SSL_CERT_FLAG_BROKEN_PROTOCOL) { + rv = CERT_PKEY_STRICT_FLAGS | CERT_PKEY_EXPLICIT_SIGN | + CERT_PKEY_VALID | CERT_PKEY_SIGN; + cpk->valid_flags = rv; + return rv; + } +# endif + } else { + if (!x || !pk) + return 0; + idx = ssl_cert_type(x, pk); + if (idx == -1) + return 0; + cpk = c->pkeys + idx; + if (c->cert_flags & SSL_CERT_FLAGS_CHECK_TLS_STRICT) + check_flags = CERT_PKEY_STRICT_FLAGS; + else + check_flags = CERT_PKEY_VALID_FLAGS; + strict_mode = 1; + } + + if (suiteb_flags) { + int ok; + if (check_flags) + check_flags |= CERT_PKEY_SUITEB; + ok = X509_chain_check_suiteb(NULL, x, chain, suiteb_flags); + if (ok == X509_V_OK) + rv |= CERT_PKEY_SUITEB; + else if (!check_flags) + goto end; + } + + /* + * Check all signature algorithms are consistent with signature + * algorithms extension if TLS 1.2 or later and strict mode. + */ + if (TLS1_get_version(s) >= TLS1_2_VERSION && strict_mode) { + int default_nid; + unsigned char rsign = 0; + if (c->peer_sigalgs) + default_nid = 0; + /* If no sigalgs extension use defaults from RFC5246 */ + else { + switch (idx) { + case SSL_PKEY_RSA_ENC: + case SSL_PKEY_RSA_SIGN: + case SSL_PKEY_DH_RSA: + rsign = TLSEXT_signature_rsa; + default_nid = NID_sha1WithRSAEncryption; + break; + + case SSL_PKEY_DSA_SIGN: + case SSL_PKEY_DH_DSA: + rsign = TLSEXT_signature_dsa; + default_nid = NID_dsaWithSHA1; + break; + + case SSL_PKEY_ECC: + rsign = TLSEXT_signature_ecdsa; + default_nid = NID_ecdsa_with_SHA1; + break; + + default: + default_nid = -1; + break; + } + } + /* + * If peer sent no signature algorithms extension and we have set + * preferred signature algorithms check we support sha1. + */ + if (default_nid > 0 && c->conf_sigalgs) { + size_t j; + const unsigned char *p = c->conf_sigalgs; + for (j = 0; j < c->conf_sigalgslen; j += 2, p += 2) { + if (p[0] == TLSEXT_hash_sha1 && p[1] == rsign) + break; + } + if (j == c->conf_sigalgslen) { + if (check_flags) + goto skip_sigs; + else + goto end; + } + } + /* Check signature algorithm of each cert in chain */ + if (!tls1_check_sig_alg(c, x, default_nid)) { + if (!check_flags) + goto end; + } else + rv |= CERT_PKEY_EE_SIGNATURE; + rv |= CERT_PKEY_CA_SIGNATURE; + for (i = 0; i < sk_X509_num(chain); i++) { + if (!tls1_check_sig_alg(c, sk_X509_value(chain, i), default_nid)) { + if (check_flags) { + rv &= ~CERT_PKEY_CA_SIGNATURE; + break; + } else + goto end; + } + } + } + /* Else not TLS 1.2, so mark EE and CA signing algorithms OK */ + else if (check_flags) + rv |= CERT_PKEY_EE_SIGNATURE | CERT_PKEY_CA_SIGNATURE; + skip_sigs: + /* Check cert parameters are consistent */ + if (tls1_check_cert_param(s, x, check_flags ? 1 : 2)) + rv |= CERT_PKEY_EE_PARAM; + else if (!check_flags) + goto end; + if (!s->server) + rv |= CERT_PKEY_CA_PARAM; + /* In strict mode check rest of chain too */ + else if (strict_mode) { + rv |= CERT_PKEY_CA_PARAM; + for (i = 0; i < sk_X509_num(chain); i++) { + X509 *ca = sk_X509_value(chain, i); + if (!tls1_check_cert_param(s, ca, 0)) { + if (check_flags) { + rv &= ~CERT_PKEY_CA_PARAM; + break; + } else + goto end; + } + } + } + if (!s->server && strict_mode) { + STACK_OF(X509_NAME) *ca_dn; + int check_type = 0; + switch (pk->type) { + case EVP_PKEY_RSA: + check_type = TLS_CT_RSA_SIGN; + break; + case EVP_PKEY_DSA: + check_type = TLS_CT_DSS_SIGN; + break; + case EVP_PKEY_EC: + check_type = TLS_CT_ECDSA_SIGN; + break; + case EVP_PKEY_DH: + case EVP_PKEY_DHX: + { + int cert_type = X509_certificate_type(x, pk); + if (cert_type & EVP_PKS_RSA) + check_type = TLS_CT_RSA_FIXED_DH; + if (cert_type & EVP_PKS_DSA) + check_type = TLS_CT_DSS_FIXED_DH; + } + } + if (check_type) { + const unsigned char *ctypes; + int ctypelen; + if (c->ctypes) { + ctypes = c->ctypes; + ctypelen = (int)c->ctype_num; + } else { + ctypes = (unsigned char *)s->s3->tmp.ctype; + ctypelen = s->s3->tmp.ctype_num; + } + for (i = 0; i < ctypelen; i++) { + if (ctypes[i] == check_type) { + rv |= CERT_PKEY_CERT_TYPE; + break; + } + } + if (!(rv & CERT_PKEY_CERT_TYPE) && !check_flags) + goto end; + } else + rv |= CERT_PKEY_CERT_TYPE; + + ca_dn = s->s3->tmp.ca_names; + + if (!sk_X509_NAME_num(ca_dn)) + rv |= CERT_PKEY_ISSUER_NAME; + + if (!(rv & CERT_PKEY_ISSUER_NAME)) { + if (ssl_check_ca_name(ca_dn, x)) + rv |= CERT_PKEY_ISSUER_NAME; + } + if (!(rv & CERT_PKEY_ISSUER_NAME)) { + for (i = 0; i < sk_X509_num(chain); i++) { + X509 *xtmp = sk_X509_value(chain, i); + if (ssl_check_ca_name(ca_dn, xtmp)) { + rv |= CERT_PKEY_ISSUER_NAME; + break; + } + } + } + if (!check_flags && !(rv & CERT_PKEY_ISSUER_NAME)) + goto end; + } else + rv |= CERT_PKEY_ISSUER_NAME | CERT_PKEY_CERT_TYPE; + + if (!check_flags || (rv & check_flags) == check_flags) + rv |= CERT_PKEY_VALID; + + end: + + if (TLS1_get_version(s) >= TLS1_2_VERSION) { + if (cpk->valid_flags & CERT_PKEY_EXPLICIT_SIGN) + rv |= CERT_PKEY_EXPLICIT_SIGN | CERT_PKEY_SIGN; + else if (cpk->digest) + rv |= CERT_PKEY_SIGN; + } else + rv |= CERT_PKEY_SIGN | CERT_PKEY_EXPLICIT_SIGN; + + /* + * When checking a CERT_PKEY structure all flags are irrelevant if the + * chain is invalid. + */ + if (!check_flags) { + if (rv & CERT_PKEY_VALID) + cpk->valid_flags = rv; + else { + /* Preserve explicit sign flag, clear rest */ + cpk->valid_flags &= CERT_PKEY_EXPLICIT_SIGN; + return 0; + } + } + return rv; +} + +/* Set validity of certificates in an SSL structure */ +void tls1_set_cert_validity(SSL *s) +{ + tls1_check_chain(s, NULL, NULL, NULL, SSL_PKEY_RSA_ENC); + tls1_check_chain(s, NULL, NULL, NULL, SSL_PKEY_RSA_SIGN); + tls1_check_chain(s, NULL, NULL, NULL, SSL_PKEY_DSA_SIGN); + tls1_check_chain(s, NULL, NULL, NULL, SSL_PKEY_DH_RSA); + tls1_check_chain(s, NULL, NULL, NULL, SSL_PKEY_DH_DSA); + tls1_check_chain(s, NULL, NULL, NULL, SSL_PKEY_ECC); +} + +/* User level utiity function to check a chain is suitable */ +int SSL_check_chain(SSL *s, X509 *x, EVP_PKEY *pk, STACK_OF(X509) *chain) +{ + return tls1_check_chain(s, x, pk, chain, -1); +} + #endif diff --git a/deps/openssl/openssl/ssl/t1_meth.c b/deps/openssl/openssl/ssl/t1_meth.c index 4a1b0529b8827c..335d57b530f734 100644 --- a/deps/openssl/openssl/ssl/t1_meth.c +++ b/deps/openssl/openssl/ssl/t1_meth.c @@ -72,10 +72,13 @@ static const SSL_METHOD *tls1_get_method(int ver) } IMPLEMENT_tls_meth_func(TLS1_2_VERSION, TLSv1_2_method, - ssl3_accept, ssl3_connect, tls1_get_method) + ssl3_accept, + ssl3_connect, tls1_get_method, TLSv1_2_enc_data) IMPLEMENT_tls_meth_func(TLS1_1_VERSION, TLSv1_1_method, - ssl3_accept, ssl3_connect, tls1_get_method) + ssl3_accept, + ssl3_connect, tls1_get_method, TLSv1_1_enc_data) IMPLEMENT_tls_meth_func(TLS1_VERSION, TLSv1_method, - ssl3_accept, ssl3_connect, tls1_get_method) + ssl3_accept, + ssl3_connect, tls1_get_method, TLSv1_enc_data) diff --git a/deps/openssl/openssl/ssl/t1_srvr.c b/deps/openssl/openssl/ssl/t1_srvr.c index 076ec86e89a4bb..8c6b3dff2f559d 100644 --- a/deps/openssl/openssl/ssl/t1_srvr.c +++ b/deps/openssl/openssl/ssl/t1_srvr.c @@ -78,12 +78,15 @@ static const SSL_METHOD *tls1_get_server_method(int ver) IMPLEMENT_tls_meth_func(TLS1_2_VERSION, TLSv1_2_server_method, ssl3_accept, - ssl_undefined_function, tls1_get_server_method) + ssl_undefined_function, + tls1_get_server_method, TLSv1_2_enc_data) IMPLEMENT_tls_meth_func(TLS1_1_VERSION, TLSv1_1_server_method, ssl3_accept, - ssl_undefined_function, tls1_get_server_method) + ssl_undefined_function, + tls1_get_server_method, TLSv1_1_enc_data) IMPLEMENT_tls_meth_func(TLS1_VERSION, TLSv1_server_method, ssl3_accept, - ssl_undefined_function, tls1_get_server_method) + ssl_undefined_function, + tls1_get_server_method, TLSv1_enc_data) diff --git a/deps/openssl/openssl/ssl/t1_trce.c b/deps/openssl/openssl/ssl/t1_trce.c new file mode 100644 index 00000000000000..c5e21df77a6b9d --- /dev/null +++ b/deps/openssl/openssl/ssl/t1_trce.c @@ -0,0 +1,1266 @@ +/* ssl/t1_trce.c */ +/* + * Written by Dr Stephen N Henson (steve@openssl.org) for the OpenSSL + * project. + */ +/* ==================================================================== + * Copyright (c) 2012 The OpenSSL Project. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgment: + * "This product includes software developed by the OpenSSL Project + * for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)" + * + * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to + * endorse or promote products derived from this software without + * prior written permission. For written permission, please contact + * licensing@OpenSSL.org. + * + * 5. Products derived from this software may not be called "OpenSSL" + * nor may "OpenSSL" appear in their names without prior written + * permission of the OpenSSL Project. + * + * 6. Redistributions of any form whatsoever must retain the following + * acknowledgment: + * "This product includes software developed by the OpenSSL Project + * for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)" + * + * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY + * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + * ==================================================================== + * + */ + +#include "ssl_locl.h" + +#ifndef OPENSSL_NO_SSL_TRACE + +/* Packet trace support for OpenSSL */ + +typedef struct { + int num; + const char *name; +} ssl_trace_tbl; + +# define ssl_trace_str(val, tbl) \ + do_ssl_trace_str(val, tbl, sizeof(tbl)/sizeof(ssl_trace_tbl)) + +# define ssl_trace_list(bio, indent, msg, msglen, value, table) \ + do_ssl_trace_list(bio, indent, msg, msglen, value, \ + table, sizeof(table)/sizeof(ssl_trace_tbl)) + +static const char *do_ssl_trace_str(int val, ssl_trace_tbl *tbl, size_t ntbl) +{ + size_t i; + for (i = 0; i < ntbl; i++, tbl++) { + if (tbl->num == val) + return tbl->name; + } + return "UNKNOWN"; +} + +static int do_ssl_trace_list(BIO *bio, int indent, + const unsigned char *msg, size_t msglen, + size_t vlen, ssl_trace_tbl *tbl, size_t ntbl) +{ + int val; + if (msglen % vlen) + return 0; + while (msglen) { + val = msg[0]; + if (vlen == 2) + val = (val << 8) | msg[1]; + BIO_indent(bio, indent, 80); + BIO_printf(bio, "%s (%d)\n", do_ssl_trace_str(val, tbl, ntbl), val); + msg += vlen; + msglen -= vlen; + } + return 1; +} + +/* Version number */ + +static ssl_trace_tbl ssl_version_tbl[] = { + {SSL2_VERSION, "SSL 2.0"}, + {SSL3_VERSION, "SSL 3.0"}, + {TLS1_VERSION, "TLS 1.0"}, + {TLS1_1_VERSION, "TLS 1.1"}, + {TLS1_2_VERSION, "TLS 1.2"}, + {DTLS1_VERSION, "DTLS 1.0"}, + {DTLS1_2_VERSION, "DTLS 1.2"}, + {DTLS1_BAD_VER, "DTLS 1.0 (bad)"} +}; + +static ssl_trace_tbl ssl_content_tbl[] = { + {SSL3_RT_CHANGE_CIPHER_SPEC, "ChangeCipherSpec"}, + {SSL3_RT_ALERT, "Alert"}, + {SSL3_RT_HANDSHAKE, "Handshake"}, + {SSL3_RT_APPLICATION_DATA, "ApplicationData"}, + {TLS1_RT_HEARTBEAT, "HeartBeat"} +}; + +/* Handshake types */ +static ssl_trace_tbl ssl_handshake_tbl[] = { + {SSL3_MT_HELLO_REQUEST, "HelloRequest"}, + {SSL3_MT_CLIENT_HELLO, "ClientHello"}, + {SSL3_MT_SERVER_HELLO, "ServerHello"}, + {DTLS1_MT_HELLO_VERIFY_REQUEST, "HelloVerifyRequest"}, + {SSL3_MT_NEWSESSION_TICKET, "NewSessionTicket"}, + {SSL3_MT_CERTIFICATE, "Certificate"}, + {SSL3_MT_SERVER_KEY_EXCHANGE, "ServerKeyExchange"}, + {SSL3_MT_CERTIFICATE_REQUEST, "CertificateRequest"}, + {SSL3_MT_CLIENT_KEY_EXCHANGE, "ClientKeyExchange"}, + {SSL3_MT_CERTIFICATE_STATUS, "CertificateStatus"}, + {SSL3_MT_SERVER_DONE, "ServerHelloDone"}, + {SSL3_MT_CERTIFICATE_VERIFY, "CertificateVerify"}, + {SSL3_MT_CLIENT_KEY_EXCHANGE, "ClientKeyExchange"}, + {SSL3_MT_FINISHED, "Finished"}, + {SSL3_MT_CERTIFICATE_STATUS, "CertificateStatus"} +}; + +/* Cipher suites */ +static ssl_trace_tbl ssl_ciphers_tbl[] = { + {0x0000, "SSL_NULL_WITH_NULL_NULL"}, + {0x0001, "SSL_RSA_WITH_NULL_MD5"}, + {0x0002, "SSL_RSA_WITH_NULL_SHA"}, + {0x0003, "SSL_RSA_EXPORT_WITH_RC4_40_MD5"}, + {0x0004, "SSL_RSA_WITH_RC4_128_MD5"}, + {0x0005, "SSL_RSA_WITH_RC4_128_SHA"}, + {0x0006, "SSL_RSA_EXPORT_WITH_RC2_CBC_40_MD5"}, + {0x0007, "SSL_RSA_WITH_IDEA_CBC_SHA"}, + {0x0008, "SSL_RSA_EXPORT_WITH_DES40_CBC_SHA"}, + {0x0009, "SSL_RSA_WITH_DES_CBC_SHA"}, + {0x000A, "SSL_RSA_WITH_3DES_EDE_CBC_SHA"}, + {0x000B, "SSL_DH_DSS_EXPORT_WITH_DES40_CBC_SHA"}, + {0x000C, "SSL_DH_DSS_WITH_DES_CBC_SHA"}, + {0x000D, "SSL_DH_DSS_WITH_3DES_EDE_CBC_SHA"}, + {0x000E, "SSL_DH_RSA_EXPORT_WITH_DES40_CBC_SHA"}, + {0x000F, "SSL_DH_RSA_WITH_DES_CBC_SHA"}, + {0x0010, "SSL_DH_RSA_WITH_3DES_EDE_CBC_SHA"}, + {0x0011, "SSL_DHE_DSS_EXPORT_WITH_DES40_CBC_SHA"}, + {0x0012, "SSL_DHE_DSS_WITH_DES_CBC_SHA"}, + {0x0013, "SSL_DHE_DSS_WITH_3DES_EDE_CBC_SHA"}, + {0x0014, "SSL_DHE_RSA_EXPORT_WITH_DES40_CBC_SHA"}, + {0x0015, "SSL_DHE_RSA_WITH_DES_CBC_SHA"}, + {0x0016, "SSL_DHE_RSA_WITH_3DES_EDE_CBC_SHA"}, + {0x0017, "SSL_DH_anon_EXPORT_WITH_RC4_40_MD5"}, + {0x0018, "SSL_DH_anon_WITH_RC4_128_MD5"}, + {0x0019, "SSL_DH_anon_EXPORT_WITH_DES40_CBC_SHA"}, + {0x001A, "SSL_DH_anon_WITH_DES_CBC_SHA"}, + {0x001B, "SSL_DH_anon_WITH_3DES_EDE_CBC_SHA"}, + {0x001D, "SSL_FORTEZZA_KEA_WITH_FORTEZZA_CBC_SHA"}, + {0x001E, "SSL_FORTEZZA_KEA_WITH_RC4_128_SHA"}, + {0x001F, "TLS_KRB5_WITH_3DES_EDE_CBC_SHA"}, + {0x0020, "TLS_KRB5_WITH_RC4_128_SHA"}, + {0x0021, "TLS_KRB5_WITH_IDEA_CBC_SHA"}, + {0x0022, "TLS_KRB5_WITH_DES_CBC_MD5"}, + {0x0023, "TLS_KRB5_WITH_3DES_EDE_CBC_MD5"}, + {0x0024, "TLS_KRB5_WITH_RC4_128_MD5"}, + {0x0025, "TLS_KRB5_WITH_IDEA_CBC_MD5"}, + {0x0026, "TLS_KRB5_EXPORT_WITH_DES_CBC_40_SHA"}, + {0x0027, "TLS_KRB5_EXPORT_WITH_RC2_CBC_40_SHA"}, + {0x0028, "TLS_KRB5_EXPORT_WITH_RC4_40_SHA"}, + {0x0029, "TLS_KRB5_EXPORT_WITH_DES_CBC_40_MD5"}, + {0x002A, "TLS_KRB5_EXPORT_WITH_RC2_CBC_40_MD5"}, + {0x002B, "TLS_KRB5_EXPORT_WITH_RC4_40_MD5"}, + {0x002F, "TLS_RSA_WITH_AES_128_CBC_SHA"}, + {0x0030, "TLS_DH_DSS_WITH_AES_128_CBC_SHA"}, + {0x0031, "TLS_DH_RSA_WITH_AES_128_CBC_SHA"}, + {0x0032, "TLS_DHE_DSS_WITH_AES_128_CBC_SHA"}, + {0x0033, "TLS_DHE_RSA_WITH_AES_128_CBC_SHA"}, + {0x0034, "TLS_DH_anon_WITH_AES_128_CBC_SHA"}, + {0x0035, "TLS_RSA_WITH_AES_256_CBC_SHA"}, + {0x0036, "TLS_DH_DSS_WITH_AES_256_CBC_SHA"}, + {0x0037, "TLS_DH_RSA_WITH_AES_256_CBC_SHA"}, + {0x0038, "TLS_DHE_DSS_WITH_AES_256_CBC_SHA"}, + {0x0039, "TLS_DHE_RSA_WITH_AES_256_CBC_SHA"}, + {0x003A, "TLS_DH_anon_WITH_AES_256_CBC_SHA"}, + {0x003B, "TLS_RSA_WITH_NULL_SHA256"}, + {0x003C, "TLS_RSA_WITH_AES_128_CBC_SHA256"}, + {0x003D, "TLS_RSA_WITH_AES_256_CBC_SHA256"}, + {0x003E, "TLS_DH_DSS_WITH_AES_128_CBC_SHA256"}, + {0x003F, "TLS_DH_RSA_WITH_AES_128_CBC_SHA256"}, + {0x0040, "TLS_DHE_DSS_WITH_AES_128_CBC_SHA256"}, + {0x0041, "TLS_RSA_WITH_CAMELLIA_128_CBC_SHA"}, + {0x0042, "TLS_DH_DSS_WITH_CAMELLIA_128_CBC_SHA"}, + {0x0043, "TLS_DH_RSA_WITH_CAMELLIA_128_CBC_SHA"}, + {0x0044, "TLS_DHE_DSS_WITH_CAMELLIA_128_CBC_SHA"}, + {0x0045, "TLS_DHE_RSA_WITH_CAMELLIA_128_CBC_SHA"}, + {0x0046, "TLS_DH_anon_WITH_CAMELLIA_128_CBC_SHA"}, + {0x0067, "TLS_DHE_RSA_WITH_AES_128_CBC_SHA256"}, + {0x0068, "TLS_DH_DSS_WITH_AES_256_CBC_SHA256"}, + {0x0069, "TLS_DH_RSA_WITH_AES_256_CBC_SHA256"}, + {0x006A, "TLS_DHE_DSS_WITH_AES_256_CBC_SHA256"}, + {0x006B, "TLS_DHE_RSA_WITH_AES_256_CBC_SHA256"}, + {0x006C, "TLS_DH_anon_WITH_AES_128_CBC_SHA256"}, + {0x006D, "TLS_DH_anon_WITH_AES_256_CBC_SHA256"}, + {0x0084, "TLS_RSA_WITH_CAMELLIA_256_CBC_SHA"}, + {0x0085, "TLS_DH_DSS_WITH_CAMELLIA_256_CBC_SHA"}, + {0x0086, "TLS_DH_RSA_WITH_CAMELLIA_256_CBC_SHA"}, + {0x0087, "TLS_DHE_DSS_WITH_CAMELLIA_256_CBC_SHA"}, + {0x0088, "TLS_DHE_RSA_WITH_CAMELLIA_256_CBC_SHA"}, + {0x0089, "TLS_DH_anon_WITH_CAMELLIA_256_CBC_SHA"}, + {0x008A, "TLS_PSK_WITH_RC4_128_SHA"}, + {0x008B, "TLS_PSK_WITH_3DES_EDE_CBC_SHA"}, + {0x008C, "TLS_PSK_WITH_AES_128_CBC_SHA"}, + {0x008D, "TLS_PSK_WITH_AES_256_CBC_SHA"}, + {0x008E, "TLS_DHE_PSK_WITH_RC4_128_SHA"}, + {0x008F, "TLS_DHE_PSK_WITH_3DES_EDE_CBC_SHA"}, + {0x0090, "TLS_DHE_PSK_WITH_AES_128_CBC_SHA"}, + {0x0091, "TLS_DHE_PSK_WITH_AES_256_CBC_SHA"}, + {0x0092, "TLS_RSA_PSK_WITH_RC4_128_SHA"}, + {0x0093, "TLS_RSA_PSK_WITH_3DES_EDE_CBC_SHA"}, + {0x0094, "TLS_RSA_PSK_WITH_AES_128_CBC_SHA"}, + {0x0095, "TLS_RSA_PSK_WITH_AES_256_CBC_SHA"}, + {0x0096, "TLS_RSA_WITH_SEED_CBC_SHA"}, + {0x0097, "TLS_DH_DSS_WITH_SEED_CBC_SHA"}, + {0x0098, "TLS_DH_RSA_WITH_SEED_CBC_SHA"}, + {0x0099, "TLS_DHE_DSS_WITH_SEED_CBC_SHA"}, + {0x009A, "TLS_DHE_RSA_WITH_SEED_CBC_SHA"}, + {0x009B, "TLS_DH_anon_WITH_SEED_CBC_SHA"}, + {0x009C, "TLS_RSA_WITH_AES_128_GCM_SHA256"}, + {0x009D, "TLS_RSA_WITH_AES_256_GCM_SHA384"}, + {0x009E, "TLS_DHE_RSA_WITH_AES_128_GCM_SHA256"}, + {0x009F, "TLS_DHE_RSA_WITH_AES_256_GCM_SHA384"}, + {0x00A0, "TLS_DH_RSA_WITH_AES_128_GCM_SHA256"}, + {0x00A1, "TLS_DH_RSA_WITH_AES_256_GCM_SHA384"}, + {0x00A2, "TLS_DHE_DSS_WITH_AES_128_GCM_SHA256"}, + {0x00A3, "TLS_DHE_DSS_WITH_AES_256_GCM_SHA384"}, + {0x00A4, "TLS_DH_DSS_WITH_AES_128_GCM_SHA256"}, + {0x00A5, "TLS_DH_DSS_WITH_AES_256_GCM_SHA384"}, + {0x00A6, "TLS_DH_anon_WITH_AES_128_GCM_SHA256"}, + {0x00A7, "TLS_DH_anon_WITH_AES_256_GCM_SHA384"}, + {0x00A8, "TLS_PSK_WITH_AES_128_GCM_SHA256"}, + {0x00A9, "TLS_PSK_WITH_AES_256_GCM_SHA384"}, + {0x00AA, "TLS_DHE_PSK_WITH_AES_128_GCM_SHA256"}, + {0x00AB, "TLS_DHE_PSK_WITH_AES_256_GCM_SHA384"}, + {0x00AC, "TLS_RSA_PSK_WITH_AES_128_GCM_SHA256"}, + {0x00AD, "TLS_RSA_PSK_WITH_AES_256_GCM_SHA384"}, + {0x00AE, "TLS_PSK_WITH_AES_128_CBC_SHA256"}, + {0x00AF, "TLS_PSK_WITH_AES_256_CBC_SHA384"}, + {0x00B0, "TLS_PSK_WITH_NULL_SHA256"}, + {0x00B1, "TLS_PSK_WITH_NULL_SHA384"}, + {0x00B2, "TLS_DHE_PSK_WITH_AES_128_CBC_SHA256"}, + {0x00B3, "TLS_DHE_PSK_WITH_AES_256_CBC_SHA384"}, + {0x00B4, "TLS_DHE_PSK_WITH_NULL_SHA256"}, + {0x00B5, "TLS_DHE_PSK_WITH_NULL_SHA384"}, + {0x00B6, "TLS_RSA_PSK_WITH_AES_128_CBC_SHA256"}, + {0x00B7, "TLS_RSA_PSK_WITH_AES_256_CBC_SHA384"}, + {0x00B8, "TLS_RSA_PSK_WITH_NULL_SHA256"}, + {0x00B9, "TLS_RSA_PSK_WITH_NULL_SHA384"}, + {0x00BA, "TLS_RSA_WITH_CAMELLIA_128_CBC_SHA256"}, + {0x00BB, "TLS_DH_DSS_WITH_CAMELLIA_128_CBC_SHA256"}, + {0x00BC, "TLS_DH_RSA_WITH_CAMELLIA_128_CBC_SHA256"}, + {0x00BD, "TLS_DHE_DSS_WITH_CAMELLIA_128_CBC_SHA256"}, + {0x00BE, "TLS_DHE_RSA_WITH_CAMELLIA_128_CBC_SHA256"}, + {0x00BF, "TLS_DH_anon_WITH_CAMELLIA_128_CBC_SHA256"}, + {0x00C0, "TLS_RSA_WITH_CAMELLIA_256_CBC_SHA256"}, + {0x00C1, "TLS_DH_DSS_WITH_CAMELLIA_256_CBC_SHA256"}, + {0x00C2, "TLS_DH_RSA_WITH_CAMELLIA_256_CBC_SHA256"}, + {0x00C3, "TLS_DHE_DSS_WITH_CAMELLIA_256_CBC_SHA256"}, + {0x00C4, "TLS_DHE_RSA_WITH_CAMELLIA_256_CBC_SHA256"}, + {0x00C5, "TLS_DH_anon_WITH_CAMELLIA_256_CBC_SHA256"}, + {0x00FF, "TLS_EMPTY_RENEGOTIATION_INFO_SCSV"}, + {0xC001, "TLS_ECDH_ECDSA_WITH_NULL_SHA"}, + {0xC002, "TLS_ECDH_ECDSA_WITH_RC4_128_SHA"}, + {0xC003, "TLS_ECDH_ECDSA_WITH_3DES_EDE_CBC_SHA"}, + {0xC004, "TLS_ECDH_ECDSA_WITH_AES_128_CBC_SHA"}, + {0xC005, "TLS_ECDH_ECDSA_WITH_AES_256_CBC_SHA"}, + {0xC006, "TLS_ECDHE_ECDSA_WITH_NULL_SHA"}, + {0xC007, "TLS_ECDHE_ECDSA_WITH_RC4_128_SHA"}, + {0xC008, "TLS_ECDHE_ECDSA_WITH_3DES_EDE_CBC_SHA"}, + {0xC009, "TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA"}, + {0xC00A, "TLS_ECDHE_ECDSA_WITH_AES_256_CBC_SHA"}, + {0xC00B, "TLS_ECDH_RSA_WITH_NULL_SHA"}, + {0xC00C, "TLS_ECDH_RSA_WITH_RC4_128_SHA"}, + {0xC00D, "TLS_ECDH_RSA_WITH_3DES_EDE_CBC_SHA"}, + {0xC00E, "TLS_ECDH_RSA_WITH_AES_128_CBC_SHA"}, + {0xC00F, "TLS_ECDH_RSA_WITH_AES_256_CBC_SHA"}, + {0xC010, "TLS_ECDHE_RSA_WITH_NULL_SHA"}, + {0xC011, "TLS_ECDHE_RSA_WITH_RC4_128_SHA"}, + {0xC012, "TLS_ECDHE_RSA_WITH_3DES_EDE_CBC_SHA"}, + {0xC013, "TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA"}, + {0xC014, "TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA"}, + {0xC015, "TLS_ECDH_anon_WITH_NULL_SHA"}, + {0xC016, "TLS_ECDH_anon_WITH_RC4_128_SHA"}, + {0xC017, "TLS_ECDH_anon_WITH_3DES_EDE_CBC_SHA"}, + {0xC018, "TLS_ECDH_anon_WITH_AES_128_CBC_SHA"}, + {0xC019, "TLS_ECDH_anon_WITH_AES_256_CBC_SHA"}, + {0xC01A, "TLS_SRP_SHA_WITH_3DES_EDE_CBC_SHA"}, + {0xC01B, "TLS_SRP_SHA_RSA_WITH_3DES_EDE_CBC_SHA"}, + {0xC01C, "TLS_SRP_SHA_DSS_WITH_3DES_EDE_CBC_SHA"}, + {0xC01D, "TLS_SRP_SHA_WITH_AES_128_CBC_SHA"}, + {0xC01E, "TLS_SRP_SHA_RSA_WITH_AES_128_CBC_SHA"}, + {0xC01F, "TLS_SRP_SHA_DSS_WITH_AES_128_CBC_SHA"}, + {0xC020, "TLS_SRP_SHA_WITH_AES_256_CBC_SHA"}, + {0xC021, "TLS_SRP_SHA_RSA_WITH_AES_256_CBC_SHA"}, + {0xC022, "TLS_SRP_SHA_DSS_WITH_AES_256_CBC_SHA"}, + {0xC023, "TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA256"}, + {0xC024, "TLS_ECDHE_ECDSA_WITH_AES_256_CBC_SHA384"}, + {0xC025, "TLS_ECDH_ECDSA_WITH_AES_128_CBC_SHA256"}, + {0xC026, "TLS_ECDH_ECDSA_WITH_AES_256_CBC_SHA384"}, + {0xC027, "TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA256"}, + {0xC028, "TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA384"}, + {0xC029, "TLS_ECDH_RSA_WITH_AES_128_CBC_SHA256"}, + {0xC02A, "TLS_ECDH_RSA_WITH_AES_256_CBC_SHA384"}, + {0xC02B, "TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256"}, + {0xC02C, "TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384"}, + {0xC02D, "TLS_ECDH_ECDSA_WITH_AES_128_GCM_SHA256"}, + {0xC02E, "TLS_ECDH_ECDSA_WITH_AES_256_GCM_SHA384"}, + {0xC02F, "TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256"}, + {0xC030, "TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384"}, + {0xC031, "TLS_ECDH_RSA_WITH_AES_128_GCM_SHA256"}, + {0xC032, "TLS_ECDH_RSA_WITH_AES_256_GCM_SHA384"}, + {0xFEFE, "SSL_RSA_FIPS_WITH_DES_CBC_SHA"}, + {0xFEFF, "SSL_RSA_FIPS_WITH_3DES_EDE_CBC_SHA"}, +}; + +/* Compression methods */ +static ssl_trace_tbl ssl_comp_tbl[] = { + {0x0000, "No Compression"}, + {0x0001, "Zlib Compression"} +}; + +/* Extensions */ +static ssl_trace_tbl ssl_exts_tbl[] = { + {TLSEXT_TYPE_server_name, "server_name"}, + {TLSEXT_TYPE_max_fragment_length, "max_fragment_length"}, + {TLSEXT_TYPE_client_certificate_url, "client_certificate_url"}, + {TLSEXT_TYPE_trusted_ca_keys, "trusted_ca_keys"}, + {TLSEXT_TYPE_truncated_hmac, "truncated_hmac"}, + {TLSEXT_TYPE_status_request, "status_request"}, + {TLSEXT_TYPE_user_mapping, "user_mapping"}, + {TLSEXT_TYPE_client_authz, "client_authz"}, + {TLSEXT_TYPE_server_authz, "server_authz"}, + {TLSEXT_TYPE_cert_type, "cert_type"}, + {TLSEXT_TYPE_elliptic_curves, "elliptic_curves"}, + {TLSEXT_TYPE_ec_point_formats, "ec_point_formats"}, + {TLSEXT_TYPE_srp, "srp"}, + {TLSEXT_TYPE_signature_algorithms, "signature_algorithms"}, + {TLSEXT_TYPE_use_srtp, "use_srtp"}, + {TLSEXT_TYPE_heartbeat, "heartbeat"}, + {TLSEXT_TYPE_session_ticket, "session_ticket"}, +# ifdef TLSEXT_TYPE_opaque_prf_input + {TLSEXT_TYPE_opaque_prf_input, "opaque_prf_input"}, +# endif + {TLSEXT_TYPE_renegotiate, "renegotiate"}, + {TLSEXT_TYPE_next_proto_neg, "next_proto_neg"}, + {TLSEXT_TYPE_padding, "padding"} +}; + +static ssl_trace_tbl ssl_curve_tbl[] = { + {1, "sect163k1 (K-163)"}, + {2, "sect163r1"}, + {3, "sect163r2 (B-163)"}, + {4, "sect193r1"}, + {5, "sect193r2"}, + {6, "sect233k1 (K-233)"}, + {7, "sect233r1 (B-233)"}, + {8, "sect239k1"}, + {9, "sect283k1 (K-283)"}, + {10, "sect283r1 (B-283)"}, + {11, "sect409k1 (K-409)"}, + {12, "sect409r1 (B-409)"}, + {13, "sect571k1 (K-571)"}, + {14, "sect571r1 (B-571)"}, + {15, "secp160k1"}, + {16, "secp160r1"}, + {17, "secp160r2"}, + {18, "secp192k1"}, + {19, "secp192r1 (P-192)"}, + {20, "secp224k1"}, + {21, "secp224r1 (P-224)"}, + {22, "secp256k1"}, + {23, "secp256r1 (P-256)"}, + {24, "secp384r1 (P-384)"}, + {25, "secp521r1 (P-521)"}, + {26, "brainpoolP256r1"}, + {27, "brainpoolP384r1"}, + {28, "brainpoolP512r1"}, + {0xFF01, "arbitrary_explicit_prime_curves"}, + {0xFF02, "arbitrary_explicit_char2_curves"} +}; + +static ssl_trace_tbl ssl_point_tbl[] = { + {0, "uncompressed"}, + {1, "ansiX962_compressed_prime"}, + {2, "ansiX962_compressed_char2"} +}; + +static ssl_trace_tbl ssl_md_tbl[] = { + {0, "none"}, + {1, "md5"}, + {2, "sha1"}, + {3, "sha224"}, + {4, "sha256"}, + {5, "sha384"}, + {6, "sha512"} +}; + +static ssl_trace_tbl ssl_sig_tbl[] = { + {0, "anonymous"}, + {1, "rsa"}, + {2, "dsa"}, + {3, "ecdsa"} +}; + +static ssl_trace_tbl ssl_hb_tbl[] = { + {1, "peer_allowed_to_send"}, + {2, "peer_not_allowed_to_send"} +}; + +static ssl_trace_tbl ssl_hb_type_tbl[] = { + {1, "heartbeat_request"}, + {2, "heartbeat_response"} +}; + +static ssl_trace_tbl ssl_ctype_tbl[] = { + {1, "rsa_sign"}, + {2, "dss_sign"}, + {3, "rsa_fixed_dh"}, + {4, "dss_fixed_dh"}, + {5, "rsa_ephemeral_dh"}, + {6, "dss_ephemeral_dh"}, + {20, "fortezza_dms"}, + {64, "ecdsa_sign"}, + {65, "rsa_fixed_ecdh"}, + {66, "ecdsa_fixed_ecdh"} +}; + +static ssl_trace_tbl ssl_crypto_tbl[] = { + {TLS1_RT_CRYPTO_PREMASTER, "Premaster Secret"}, + {TLS1_RT_CRYPTO_CLIENT_RANDOM, "Client Random"}, + {TLS1_RT_CRYPTO_SERVER_RANDOM, "Server Random"}, + {TLS1_RT_CRYPTO_MASTER, "Master Secret"}, + {TLS1_RT_CRYPTO_MAC | TLS1_RT_CRYPTO_WRITE, "Write Mac Secret"}, + {TLS1_RT_CRYPTO_MAC | TLS1_RT_CRYPTO_READ, "Read Mac Secret"}, + {TLS1_RT_CRYPTO_KEY | TLS1_RT_CRYPTO_WRITE, "Write Key"}, + {TLS1_RT_CRYPTO_KEY | TLS1_RT_CRYPTO_READ, "Read Key"}, + {TLS1_RT_CRYPTO_IV | TLS1_RT_CRYPTO_WRITE, "Write IV"}, + {TLS1_RT_CRYPTO_IV | TLS1_RT_CRYPTO_READ, "Read IV"}, + {TLS1_RT_CRYPTO_FIXED_IV | TLS1_RT_CRYPTO_WRITE, "Write IV (fixed part)"}, + {TLS1_RT_CRYPTO_FIXED_IV | TLS1_RT_CRYPTO_READ, "Read IV (fixed part)"} +}; + +static void ssl_print_hex(BIO *bio, int indent, const char *name, + const unsigned char *msg, size_t msglen) +{ + size_t i; + BIO_indent(bio, indent, 80); + BIO_printf(bio, "%s (len=%d): ", name, (int)msglen); + for (i = 0; i < msglen; i++) + BIO_printf(bio, "%02X", msg[i]); + BIO_puts(bio, "\n"); +} + +static int ssl_print_hexbuf(BIO *bio, int indent, + const char *name, size_t nlen, + const unsigned char **pmsg, size_t *pmsglen) +{ + size_t blen; + const unsigned char *p = *pmsg; + if (*pmsglen < nlen) + return 0; + blen = p[0]; + if (nlen > 1) + blen = (blen << 8) | p[1]; + if (*pmsglen < nlen + blen) + return 0; + p += nlen; + ssl_print_hex(bio, indent, name, p, blen); + *pmsg += blen + nlen; + *pmsglen -= blen + nlen; + return 1; +} + +static int ssl_print_version(BIO *bio, int indent, const char *name, + const unsigned char **pmsg, size_t *pmsglen) +{ + int vers; + if (*pmsglen < 2) + return 0; + vers = ((*pmsg)[0] << 8) | (*pmsg)[1]; + BIO_indent(bio, indent, 80); + BIO_printf(bio, "%s=0x%x (%s)\n", + name, vers, ssl_trace_str(vers, ssl_version_tbl)); + *pmsg += 2; + *pmsglen -= 2; + return 1; +} + +static int ssl_print_random(BIO *bio, int indent, + const unsigned char **pmsg, size_t *pmsglen) +{ + unsigned int tm; + const unsigned char *p = *pmsg; + if (*pmsglen < 32) + return 0; + tm = (p[0] << 24) | (p[1] << 16) | (p[2] << 8) | p[3]; + p += 4; + BIO_indent(bio, indent, 80); + BIO_puts(bio, "Random:\n"); + BIO_indent(bio, indent + 2, 80); + BIO_printf(bio, "gmt_unix_time=0x%08X\n", tm); + ssl_print_hex(bio, indent + 2, "random_bytes", p, 28); + *pmsg += 32; + *pmsglen -= 32; + return 1; +} + +static int ssl_print_signature(BIO *bio, int indent, SSL *s, + const unsigned char **pmsg, size_t *pmsglen) +{ + if (*pmsglen < 2) + return 0; + if (SSL_USE_SIGALGS(s)) { + const unsigned char *p = *pmsg; + BIO_indent(bio, indent, 80); + BIO_printf(bio, "Signature Algorithm %s+%s (%d+%d)\n", + ssl_trace_str(p[0], ssl_md_tbl), + ssl_trace_str(p[1], ssl_sig_tbl), p[0], p[1]); + *pmsg += 2; + *pmsglen -= 2; + } + return ssl_print_hexbuf(bio, indent, "Signature", 2, pmsg, pmsglen); +} + +static int ssl_print_extension(BIO *bio, int indent, int server, int extype, + const unsigned char *ext, size_t extlen) +{ + size_t xlen; + BIO_indent(bio, indent, 80); + BIO_printf(bio, "extension_type=%s(%d), length=%d\n", + ssl_trace_str(extype, ssl_exts_tbl), extype, (int)extlen); + switch (extype) { + case TLSEXT_TYPE_ec_point_formats: + if (extlen < 1) + return 0; + xlen = ext[0]; + if (extlen != xlen + 1) + return 0; + return ssl_trace_list(bio, indent + 2, + ext + 1, xlen, 1, ssl_point_tbl); + + case TLSEXT_TYPE_elliptic_curves: + if (extlen < 2) + return 0; + xlen = (ext[0] << 8) | ext[1]; + if (extlen != xlen + 2) + return 0; + return ssl_trace_list(bio, indent + 2, + ext + 2, xlen, 2, ssl_curve_tbl); + + case TLSEXT_TYPE_signature_algorithms: + + if (extlen < 2) + return 0; + xlen = (ext[0] << 8) | ext[1]; + if (extlen != xlen + 2) + return 0; + if (xlen & 1) + return 0; + ext += 2; + while (xlen > 0) { + BIO_indent(bio, indent + 2, 80); + BIO_printf(bio, "%s+%s (%d+%d)\n", + ssl_trace_str(ext[0], ssl_md_tbl), + ssl_trace_str(ext[1], ssl_sig_tbl), ext[0], ext[1]); + xlen -= 2; + ext += 2; + } + break; + + case TLSEXT_TYPE_renegotiate: + if (extlen < 1) + return 0; + xlen = ext[0]; + if (xlen + 1 != extlen) + return 0; + ext++; + if (xlen) { + if (server) { + if (xlen & 1) + return 0; + xlen >>= 1; + } + ssl_print_hex(bio, indent + 4, "client_verify_data", ext, xlen); + if (server) { + ext += xlen; + ssl_print_hex(bio, indent + 4, + "server_verify_data", ext, xlen); + } + } else { + BIO_indent(bio, indent + 4, 80); + BIO_puts(bio, "\n"); + } + break; + + case TLSEXT_TYPE_heartbeat: + if (extlen != 1) + return 0; + BIO_indent(bio, indent + 2, 80); + BIO_printf(bio, "HeartbeatMode: %s\n", + ssl_trace_str(ext[0], ssl_hb_tbl)); + break; + + case TLSEXT_TYPE_session_ticket: + if (extlen != 0) + ssl_print_hex(bio, indent + 4, "ticket", ext, extlen); + break; + + default: + BIO_dump_indent(bio, (char *)ext, extlen, indent + 2); + } + return 1; +} + +static int ssl_print_extensions(BIO *bio, int indent, int server, + const unsigned char *msg, size_t msglen) +{ + size_t extslen; + BIO_indent(bio, indent, 80); + if (msglen == 0) { + BIO_puts(bio, "No Extensions\n"); + return 1; + } + extslen = (msg[0] << 8) | msg[1]; + if (extslen != msglen - 2) + return 0; + msg += 2; + msglen = extslen; + BIO_printf(bio, "extensions, length = %d\n", (int)msglen); + while (msglen > 0) { + int extype; + size_t extlen; + if (msglen < 4) + return 0; + extype = (msg[0] << 8) | msg[1]; + extlen = (msg[2] << 8) | msg[3]; + if (msglen < extlen + 4) + return 0; + msg += 4; + if (!ssl_print_extension(bio, indent + 2, server, + extype, msg, extlen)) + return 0; + msg += extlen; + msglen -= extlen + 4; + } + return 1; +} + +static int ssl_print_client_hello(BIO *bio, SSL *ssl, int indent, + const unsigned char *msg, size_t msglen) +{ + size_t len; + unsigned int cs; + if (!ssl_print_version(bio, indent, "client_version", &msg, &msglen)) + return 0; + if (!ssl_print_random(bio, indent, &msg, &msglen)) + return 0; + if (!ssl_print_hexbuf(bio, indent, "session_id", 1, &msg, &msglen)) + return 0; + if (SSL_IS_DTLS(ssl)) { + if (!ssl_print_hexbuf(bio, indent, "cookie", 1, &msg, &msglen)) + return 0; + } + if (msglen < 2) + return 0; + len = (msg[0] << 8) | msg[1]; + msg += 2; + msglen -= 2; + BIO_indent(bio, indent, 80); + BIO_printf(bio, "cipher_suites (len=%d)\n", (int)len); + if (msglen < len || len & 1) + return 0; + while (len > 0) { + cs = (msg[0] << 8) | msg[1]; + BIO_indent(bio, indent + 2, 80); + BIO_printf(bio, "{0x%02X, 0x%02X} %s\n", + msg[0], msg[1], ssl_trace_str(cs, ssl_ciphers_tbl)); + msg += 2; + msglen -= 2; + len -= 2; + } + if (msglen < 1) + return 0; + len = msg[0]; + msg++; + msglen--; + if (msglen < len) + return 0; + BIO_indent(bio, indent, 80); + BIO_printf(bio, "compression_methods (len=%d)\n", (int)len); + while (len > 0) { + BIO_indent(bio, indent + 2, 80); + BIO_printf(bio, "%s (0x%02X)\n", + ssl_trace_str(msg[0], ssl_comp_tbl), msg[0]); + msg++; + msglen--; + len--; + } + if (!ssl_print_extensions(bio, indent, 0, msg, msglen)) + return 0; + return 1; +} + +static int dtls_print_hello_vfyrequest(BIO *bio, int indent, + const unsigned char *msg, + size_t msglen) +{ + if (!ssl_print_version(bio, indent, "server_version", &msg, &msglen)) + return 0; + if (!ssl_print_hexbuf(bio, indent, "cookie", 1, &msg, &msglen)) + return 0; + return 1; +} + +static int ssl_print_server_hello(BIO *bio, int indent, + const unsigned char *msg, size_t msglen) +{ + unsigned int cs; + if (!ssl_print_version(bio, indent, "server_version", &msg, &msglen)) + return 0; + if (!ssl_print_random(bio, indent, &msg, &msglen)) + return 0; + if (!ssl_print_hexbuf(bio, indent, "session_id", 1, &msg, &msglen)) + return 0; + if (msglen < 2) + return 0; + cs = (msg[0] << 8) | msg[1]; + BIO_indent(bio, indent, 80); + BIO_printf(bio, "cipher_suite {0x%02X, 0x%02X} %s\n", + msg[0], msg[1], ssl_trace_str(cs, ssl_ciphers_tbl)); + msg += 2; + msglen -= 2; + if (msglen < 1) + return 0; + BIO_indent(bio, indent, 80); + BIO_printf(bio, "compression_method: %s (0x%02X)\n", + ssl_trace_str(msg[0], ssl_comp_tbl), msg[0]); + msg++; + msglen--; + if (!ssl_print_extensions(bio, indent, 1, msg, msglen)) + return 0; + return 1; +} + +static int ssl_get_keyex(const char **pname, SSL *ssl) +{ + unsigned long alg_k = ssl->s3->tmp.new_cipher->algorithm_mkey; + if (alg_k & SSL_kRSA) { + *pname = "rsa"; + return SSL_kRSA; + } + if (alg_k & SSL_kDHr) { + *pname = "dh_rsa"; + return SSL_kDHr; + } + if (alg_k & SSL_kDHd) { + *pname = "dh_dss"; + return SSL_kDHd; + } + if (alg_k & SSL_kKRB5) { + *pname = "krb5"; + return SSL_kKRB5; + } + if (alg_k & SSL_kEDH) { + *pname = "edh"; + return SSL_kEDH; + } + if (alg_k & SSL_kEECDH) { + *pname = "EECDH"; + return SSL_kEECDH; + } + if (alg_k & SSL_kECDHr) { + *pname = "ECDH RSA"; + return SSL_kECDHr; + } + if (alg_k & SSL_kECDHe) { + *pname = "ECDH ECDSA"; + return SSL_kECDHe; + } + if (alg_k & SSL_kPSK) { + *pname = "PSK"; + return SSL_kPSK; + } + if (alg_k & SSL_kSRP) { + *pname = "SRP"; + return SSL_kSRP; + } + if (alg_k & SSL_kGOST) { + *pname = "GOST"; + return SSL_kGOST; + } + *pname = "UNKNOWN"; + return 0; +} + +static int ssl_print_client_keyex(BIO *bio, int indent, SSL *ssl, + const unsigned char *msg, size_t msglen) +{ + const char *algname; + int id; + id = ssl_get_keyex(&algname, ssl); + BIO_indent(bio, indent, 80); + BIO_printf(bio, "KeyExchangeAlgorithm=%s\n", algname); + switch (id) { + + case SSL_kRSA: + if (TLS1_get_version(ssl) == SSL3_VERSION) { + ssl_print_hex(bio, indent + 2, + "EncyptedPreMasterSecret", msg, msglen); + } else { + if (!ssl_print_hexbuf(bio, indent + 2, + "EncyptedPreMasterSecret", 2, + &msg, &msglen)) + return 0; + } + break; + + /* Implicit parameters only allowed for static DH */ + case SSL_kDHd: + case SSL_kDHr: + if (msglen == 0) { + BIO_indent(bio, indent + 2, 80); + BIO_puts(bio, "implicit\n"); + break; + } + case SSL_kEDH: + if (!ssl_print_hexbuf(bio, indent + 2, "dh_Yc", 2, &msg, &msglen)) + return 0; + break; + + case SSL_kECDHr: + case SSL_kECDHe: + if (msglen == 0) { + BIO_indent(bio, indent + 2, 80); + BIO_puts(bio, "implicit\n"); + break; + } + case SSL_kEECDH: + if (!ssl_print_hexbuf(bio, indent + 2, "ecdh_Yc", 1, &msg, &msglen)) + return 0; + break; + } + + return 1; +} + +static int ssl_print_server_keyex(BIO *bio, int indent, SSL *ssl, + const unsigned char *msg, size_t msglen) +{ + const char *algname; + int id; + id = ssl_get_keyex(&algname, ssl); + BIO_indent(bio, indent, 80); + BIO_printf(bio, "KeyExchangeAlgorithm=%s\n", algname); + switch (id) { + /* Should never happen */ + case SSL_kDHd: + case SSL_kDHr: + case SSL_kECDHr: + case SSL_kECDHe: + BIO_indent(bio, indent + 2, 80); + BIO_printf(bio, "Unexpected Message\n"); + break; + + case SSL_kRSA: + + if (!ssl_print_hexbuf(bio, indent + 2, "rsa_modulus", 2, + &msg, &msglen)) + return 0; + if (!ssl_print_hexbuf(bio, indent + 2, "rsa_exponent", 2, + &msg, &msglen)) + return 0; + break; + + case SSL_kEDH: + if (!ssl_print_hexbuf(bio, indent + 2, "dh_p", 2, &msg, &msglen)) + return 0; + if (!ssl_print_hexbuf(bio, indent + 2, "dh_g", 2, &msg, &msglen)) + return 0; + if (!ssl_print_hexbuf(bio, indent + 2, "dh_Ys", 2, &msg, &msglen)) + return 0; + break; + + case SSL_kEECDH: + if (msglen < 1) + return 0; + BIO_indent(bio, indent + 2, 80); + if (msg[0] == EXPLICIT_PRIME_CURVE_TYPE) + BIO_puts(bio, "explicit_prime\n"); + else if (msg[0] == EXPLICIT_CHAR2_CURVE_TYPE) + BIO_puts(bio, "explicit_char2\n"); + else if (msg[0] == NAMED_CURVE_TYPE) { + int curve; + if (msglen < 3) + return 0; + curve = (msg[1] << 8) | msg[2]; + BIO_printf(bio, "named_curve: %s (%d)\n", + ssl_trace_str(curve, ssl_curve_tbl), curve); + msg += 3; + msglen -= 3; + if (!ssl_print_hexbuf(bio, indent + 2, "point", 1, &msg, &msglen)) + return 0; + } + break; + } + return ssl_print_signature(bio, indent, ssl, &msg, &msglen); +} + +static int ssl_print_certificate(BIO *bio, int indent, + const unsigned char **pmsg, size_t *pmsglen) +{ + size_t msglen = *pmsglen; + size_t clen; + X509 *x; + const unsigned char *p = *pmsg, *q; + if (msglen < 3) + return 0; + clen = (p[0] << 16) | (p[1] << 8) | p[2]; + if (msglen < clen + 3) + return 0; + q = p + 3; + BIO_indent(bio, indent, 80); + BIO_printf(bio, "ASN.1Cert, length=%d", (int)clen); + x = d2i_X509(NULL, &q, clen); + if (!x) + BIO_puts(bio, "\n"); + else { + BIO_puts(bio, "\n------details-----\n"); + X509_print_ex(bio, x, XN_FLAG_ONELINE, 0); + PEM_write_bio_X509(bio, x); + /* Print certificate stuff */ + BIO_puts(bio, "------------------\n"); + X509_free(x); + } + if (q != p + 3 + clen) { + BIO_puts(bio, "\n"); + } + *pmsg += clen + 3; + *pmsglen -= clen + 3; + return 1; +} + +static int ssl_print_certificates(BIO *bio, int indent, + const unsigned char *msg, size_t msglen) +{ + size_t clen; + if (msglen < 3) + return 0; + clen = (msg[0] << 16) | (msg[1] << 8) | msg[2]; + if (msglen != clen + 3) + return 0; + msg += 3; + BIO_indent(bio, indent, 80); + BIO_printf(bio, "certificate_list, length=%d\n", (int)clen); + while (clen > 0) { + if (!ssl_print_certificate(bio, indent + 2, &msg, &clen)) + return 0; + } + return 1; +} + +static int ssl_print_cert_request(BIO *bio, int indent, SSL *s, + const unsigned char *msg, size_t msglen) +{ + size_t xlen; + if (msglen < 1) + return 0; + xlen = msg[0]; + if (msglen < xlen + 1) + return 0; + msg++; + BIO_indent(bio, indent, 80); + BIO_printf(bio, "certificate_types (len=%d)\n", (int)xlen); + if (!ssl_trace_list(bio, indent + 2, msg, xlen, 1, ssl_ctype_tbl)) + return 0; + msg += xlen; + msglen -= xlen + 1; + if (!SSL_USE_SIGALGS(s)) + goto skip_sig; + if (msglen < 2) + return 0; + xlen = (msg[0] << 8) | msg[1]; + if (msglen < xlen + 2 || (xlen & 1)) + return 0; + msg += 2; + BIO_indent(bio, indent, 80); + BIO_printf(bio, "signature_algorithms (len=%d)\n", (int)xlen); + while (xlen > 0) { + BIO_indent(bio, indent + 2, 80); + BIO_printf(bio, "%s+%s (%d+%d)\n", + ssl_trace_str(msg[0], ssl_md_tbl), + ssl_trace_str(msg[1], ssl_sig_tbl), msg[0], msg[1]); + xlen -= 2; + msg += 2; + } + msg += xlen; + msglen -= xlen + 2; + + skip_sig: + xlen = (msg[0] << 8) | msg[1]; + BIO_indent(bio, indent, 80); + if (msglen < xlen + 2) + return 0; + msg += 2; + msglen -= 2; + BIO_printf(bio, "certificate_authorities (len=%d)\n", (int)xlen); + while (xlen > 0) { + size_t dlen; + X509_NAME *nm; + const unsigned char *p; + if (xlen < 2) + return 0; + dlen = (msg[0] << 8) | msg[1]; + if (xlen < dlen + 2) + return 0; + msg += 2; + BIO_indent(bio, indent + 2, 80); + BIO_printf(bio, "DistinguishedName (len=%d): ", (int)dlen); + p = msg; + nm = d2i_X509_NAME(NULL, &p, dlen); + if (!nm) { + BIO_puts(bio, "\n"); + } else { + X509_NAME_print_ex(bio, nm, 0, XN_FLAG_ONELINE); + BIO_puts(bio, "\n"); + X509_NAME_free(nm); + } + xlen -= dlen + 2; + msg += dlen; + } + return 1; +} + +static int ssl_print_ticket(BIO *bio, int indent, + const unsigned char *msg, size_t msglen) +{ + unsigned int tick_life; + if (msglen == 0) { + BIO_indent(bio, indent + 2, 80); + BIO_puts(bio, "No Ticket\n"); + return 1; + } + if (msglen < 4) + return 0; + tick_life = (msg[0] << 24) | (msg[1] << 16) | (msg[2] << 8) | msg[3]; + msglen -= 4; + msg += 4; + BIO_indent(bio, indent + 2, 80); + BIO_printf(bio, "ticket_lifetime_hint=%u\n", tick_life); + if (!ssl_print_hexbuf(bio, indent + 2, "ticket", 2, &msg, &msglen)) + return 0; + if (msglen) + return 0; + return 1; +} + +static int ssl_print_handshake(BIO *bio, SSL *ssl, + const unsigned char *msg, size_t msglen, + int indent) +{ + size_t hlen; + unsigned char htype; + if (msglen < 4) + return 0; + htype = msg[0]; + hlen = (msg[1] << 16) | (msg[2] << 8) | msg[3]; + BIO_indent(bio, indent, 80); + BIO_printf(bio, "%s, Length=%d\n", + ssl_trace_str(htype, ssl_handshake_tbl), (int)hlen); + msg += 4; + msglen -= 4; + if (SSL_IS_DTLS(ssl)) { + if (msglen < 8) + return 0; + BIO_indent(bio, indent, 80); + BIO_printf(bio, "message_seq=%d, fragment_offset=%d, " + "fragment_length=%d\n", + (msg[0] << 8) | msg[1], + (msg[2] << 16) | (msg[3] << 8) | msg[4], + (msg[5] << 16) | (msg[6] << 8) | msg[7]); + msg += 8; + msglen -= 8; + } + if (msglen < hlen) + return 0; + switch (htype) { + case SSL3_MT_CLIENT_HELLO: + if (!ssl_print_client_hello(bio, ssl, indent + 2, msg, msglen)) + return 0; + break; + + case DTLS1_MT_HELLO_VERIFY_REQUEST: + if (!dtls_print_hello_vfyrequest(bio, indent + 2, msg, msglen)) + return 0; + break; + + case SSL3_MT_SERVER_HELLO: + if (!ssl_print_server_hello(bio, indent + 2, msg, msglen)) + return 0; + break; + + case SSL3_MT_SERVER_KEY_EXCHANGE: + if (!ssl_print_server_keyex(bio, indent + 2, ssl, msg, msglen)) + return 0; + break; + + case SSL3_MT_CLIENT_KEY_EXCHANGE: + if (!ssl_print_client_keyex(bio, indent + 2, ssl, msg, msglen)) + return 0; + break; + + case SSL3_MT_CERTIFICATE: + if (!ssl_print_certificates(bio, indent + 2, msg, msglen)) + return 0; + break; + + case SSL3_MT_CERTIFICATE_VERIFY: + if (!ssl_print_signature(bio, indent + 2, ssl, &msg, &msglen)) + return 0; + break; + + case SSL3_MT_CERTIFICATE_REQUEST: + if (!ssl_print_cert_request(bio, indent + 2, ssl, msg, msglen)) + return 0; + break; + + case SSL3_MT_FINISHED: + ssl_print_hex(bio, indent + 2, "verify_data", msg, msglen); + break; + + case SSL3_MT_SERVER_DONE: + if (msglen != 0) + ssl_print_hex(bio, indent + 2, "unexpected value", msg, msglen); + break; + + case SSL3_MT_NEWSESSION_TICKET: + if (!ssl_print_ticket(bio, indent + 2, msg, msglen)) + return 0; + break; + + default: + BIO_indent(bio, indent + 2, 80); + BIO_puts(bio, "Unsupported, hex dump follows:\n"); + BIO_dump_indent(bio, (char *)msg, msglen, indent + 4); + } + return 1; +} + +static int ssl_print_heartbeat(BIO *bio, int indent, + const unsigned char *msg, size_t msglen) +{ + if (msglen < 3) + return 0; + BIO_indent(bio, indent, 80); + BIO_printf(bio, "HeartBeatMessageType: %s\n", + ssl_trace_str(msg[0], ssl_hb_type_tbl)); + msg++; + msglen--; + if (!ssl_print_hexbuf(bio, indent, "payload", 2, &msg, &msglen)) + return 0; + ssl_print_hex(bio, indent, "padding", msg, msglen); + return 1; +} + +const char *SSL_CIPHER_standard_name(const SSL_CIPHER *c) +{ + if (c->algorithm_ssl & SSL_SSLV2) + return NULL; + return ssl_trace_str(c->id & 0xFFFF, ssl_ciphers_tbl); +} + +void SSL_trace(int write_p, int version, int content_type, + const void *buf, size_t msglen, SSL *ssl, void *arg) +{ + const unsigned char *msg = buf; + BIO *bio = arg; + + if (write_p == 2) { + BIO_puts(bio, "Session "); + ssl_print_hex(bio, 0, + ssl_trace_str(content_type, ssl_crypto_tbl), + msg, msglen); + return; + } + switch (content_type) { + case SSL3_RT_HEADER: + { + int hvers = msg[1] << 8 | msg[2]; + BIO_puts(bio, write_p ? "Sent" : "Received"); + BIO_printf(bio, " Record\nHeader:\n Version = %s (0x%x)\n", + ssl_trace_str(hvers, ssl_version_tbl), hvers); + if (SSL_IS_DTLS(ssl)) { + BIO_printf(bio, + " epoch=%d, sequence_number=%04x%04x%04x\n", + (msg[3] << 8 | msg[4]), + (msg[5] << 8 | msg[6]), + (msg[7] << 8 | msg[8]), (msg[9] << 8 | msg[10])); +# if 0 + /* + * Just print handshake type so we can see what is going on + * during fragmentation. + */ + BIO_printf(bio, "(%s)\n", + ssl_trace_str(msg[msglen], ssl_handshake_tbl)); +# endif + } + + BIO_printf(bio, " Content Type = %s (%d)\n Length = %d", + ssl_trace_str(msg[0], ssl_content_tbl), msg[0], + msg[msglen - 2] << 8 | msg[msglen - 1]); + } + break; + case SSL3_RT_HANDSHAKE: + if (!ssl_print_handshake(bio, ssl, msg, msglen, 4)) + BIO_printf(bio, "Message length parse error!\n"); + break; + + case SSL3_RT_CHANGE_CIPHER_SPEC: + if (msglen == 1 && msg[0] == 1) + BIO_puts(bio, " change_cipher_spec (1)\n"); + else + ssl_print_hex(bio, 4, "unknown value", msg, msglen); + break; + + case SSL3_RT_ALERT: + if (msglen != 2) + BIO_puts(bio, " Illegal Alert Length\n"); + else { + BIO_printf(bio, " Level=%s(%d), description=%s(%d)\n", + SSL_alert_type_string_long(msg[0] << 8), + msg[0], SSL_alert_desc_string_long(msg[1]), msg[1]); + } + case TLS1_RT_HEARTBEAT: + ssl_print_heartbeat(bio, 4, msg, msglen); + break; + + } + + BIO_puts(bio, "\n"); +} + +#endif diff --git a/deps/openssl/openssl/ssl/tls1.h b/deps/openssl/openssl/ssl/tls1.h index 69d8186a4e2441..5929607ff8b6ef 100644 --- a/deps/openssl/openssl/ssl/tls1.h +++ b/deps/openssl/openssl/ssl/tls1.h @@ -209,11 +209,9 @@ extern "C" { # define TLSEXT_TYPE_status_request 5 /* ExtensionType values from RFC4681 */ # define TLSEXT_TYPE_user_mapping 6 - /* ExtensionType values from RFC5878 */ # define TLSEXT_TYPE_client_authz 7 # define TLSEXT_TYPE_server_authz 8 - /* ExtensionType values from RFC6091 */ # define TLSEXT_TYPE_cert_type 9 @@ -233,6 +231,9 @@ extern "C" { /* ExtensionType value from RFC5620 */ # define TLSEXT_TYPE_heartbeat 15 +/* ExtensionType value from draft-ietf-tls-applayerprotoneg-00 */ +# define TLSEXT_TYPE_application_layer_protocol_negotiation 16 + /* * ExtensionType value for TLS padding extension. * http://www.iana.org/assignments/tls-extensiontype-values/tls-extensiontype-values.xhtml @@ -250,7 +251,7 @@ extern "C" { * i.e. build with -DTLSEXT_TYPE_opaque_prf_input=38183 * using whatever extension number you'd like to try */ -# define TLSEXT_TYPE_opaque_prf_input ?? */ +# define TLSEXT_TYPE_opaque_prf_input ?? # endif /* Temporary extension type */ @@ -280,6 +281,9 @@ extern "C" { # define TLSEXT_signature_dsa 2 # define TLSEXT_signature_ecdsa 3 +/* Total number of different signature algorithms */ +# define TLSEXT_signature_num 4 + # define TLSEXT_hash_none 0 # define TLSEXT_hash_md5 1 # define TLSEXT_hash_sha1 2 @@ -288,6 +292,18 @@ extern "C" { # define TLSEXT_hash_sha384 5 # define TLSEXT_hash_sha512 6 +/* Total number of different digest algorithms */ + +# define TLSEXT_hash_num 7 + +/* Flag set for unrecognised algorithms */ +# define TLSEXT_nid_unknown 0x1000000 + +/* ECC curves */ + +# define TLSEXT_curve_P_256 23 +# define TLSEXT_curve_P_384 24 + # ifndef OPENSSL_NO_TLSEXT # define TLSEXT_MAXLEN_host_name 255 @@ -306,6 +322,16 @@ int SSL_export_keying_material(SSL *s, unsigned char *out, size_t olen, const unsigned char *p, size_t plen, int use_context); +int SSL_get_sigalgs(SSL *s, int idx, + int *psign, int *phash, int *psignandhash, + unsigned char *rsig, unsigned char *rhash); + +int SSL_get_shared_sigalgs(SSL *s, int idx, + int *psign, int *phash, int *psignandhash, + unsigned char *rsig, unsigned char *rhash); + +int SSL_check_chain(SSL *s, X509 *x, EVP_PKEY *pk, STACK_OF(X509) *chain); + # define SSL_set_tlsext_host_name(s,name) \ SSL_ctrl(s,SSL_CTRL_SET_TLSEXT_HOSTNAME,TLSEXT_NAMETYPE_host_name,(char *)name) @@ -541,11 +567,10 @@ SSL_CTX_callback_ctrl(ssl,SSL_CTRL_SET_TLSEXT_TICKET_KEY_CB,(void (*)(void))cb) # define TLS1_CK_ECDH_RSA_WITH_AES_256_GCM_SHA384 0x0300C032 /* - * XXX Inconsistency alert: The OpenSSL names of ciphers with ephemeral DH - * here include the string "DHE", while elsewhere it has always been "EDH". - * (The alias for the list of all such ciphers also is "EDH".) The - * specifications speak of "EDH"; maybe we should allow both forms for - * everything. + * XXX * Backward compatibility alert: + * Older versions of OpenSSL gave + * some DHE ciphers names with "EDH" + * instead of "DHE". Going forward, we + * should be using DHE + * everywhere, though we may indefinitely maintain + * aliases for users + * or configurations that used "EDH" + */ # define TLS1_TXT_RSA_EXPORT1024_WITH_RC4_56_MD5 "EXP1024-RC4-MD5" # define TLS1_TXT_RSA_EXPORT1024_WITH_RC2_CBC_56_MD5 "EXP1024-RC2-CBC-MD5" diff --git a/deps/openssl/openssl/test/Makefile b/deps/openssl/openssl/test/Makefile index 9aa920de1bd6da..338867952fd43c 100644 --- a/deps/openssl/openssl/test/Makefile +++ b/deps/openssl/openssl/test/Makefile @@ -12,6 +12,7 @@ PERL= perl # KRB5 stuff KRB5_INCLUDES= LIBKRB5= +TEST= igetest.c PEX_LIBS= EX_LIBS= #-lnsl -lsocket @@ -63,6 +64,7 @@ EVPEXTRATEST=evp_extra_test IGETEST= igetest JPAKETEST= jpaketest SRPTEST= srptest +V3NAMETEST= v3nametest ASN1TEST= asn1test HEARTBEATTEST= heartbeat_test CONSTTIMETEST= constant_time_test @@ -77,7 +79,8 @@ EXE= $(BNTEST)$(EXE_EXT) $(ECTEST)$(EXE_EXT) $(ECDSATEST)$(EXE_EXT) $(ECDHTEST) $(RANDTEST)$(EXE_EXT) $(DHTEST)$(EXE_EXT) $(ENGINETEST)$(EXE_EXT) \ $(BFTEST)$(EXE_EXT) $(CASTTEST)$(EXE_EXT) $(SSLTEST)$(EXE_EXT) $(EXPTEST)$(EXE_EXT) $(DSATEST)$(EXE_EXT) $(RSATEST)$(EXE_EXT) \ $(EVPTEST)$(EXE_EXT) $(EVPEXTRATEST)$(EXE_EXT) $(IGETEST)$(EXE_EXT) $(JPAKETEST)$(EXE_EXT) $(SRPTEST)$(EXE_EXT) \ - $(ASN1TEST)$(EXE_EXT) $(HEARTBEATTEST)$(EXE_EXT) $(CONSTTIMETEST)$(EXE_EXT) + $(ASN1TEST)$(EXE_EXT) $(V3NAMETEST)$(EXE_EXT) $(HEARTBEATTEST)$(EXE_EXT) \ + $(CONSTTIMETEST)$(EXE_EXT) # $(METHTEST)$(EXE_EXT) @@ -89,7 +92,7 @@ OBJ= $(BNTEST).o $(ECTEST).o $(ECDSATEST).o $(ECDHTEST).o $(IDEATEST).o \ $(MDC2TEST).o $(RMDTEST).o \ $(RANDTEST).o $(DHTEST).o $(ENGINETEST).o $(CASTTEST).o \ $(BFTEST).o $(SSLTEST).o $(DSATEST).o $(EXPTEST).o $(RSATEST).o \ - $(EVPTEST).o $(EVPEXTRATEST).o $(IGETEST).o $(JPAKETEST).o $(ASN1TEST).o \ + $(EVPTEST).o $(EVPEXTRATEST).o $(IGETEST).o $(JPAKETEST).o $(ASN1TEST).o $(V3NAMETEST).o \ $(HEARTBEATTEST).o $(CONSTTIMETEST).o SRC= $(BNTEST).c $(ECTEST).c $(ECDSATEST).c $(ECDHTEST).c $(IDEATEST).c \ @@ -100,10 +103,10 @@ SRC= $(BNTEST).c $(ECTEST).c $(ECDSATEST).c $(ECDHTEST).c $(IDEATEST).c \ $(RANDTEST).c $(DHTEST).c $(ENGINETEST).c $(CASTTEST).c \ $(BFTEST).c $(SSLTEST).c $(DSATEST).c $(EXPTEST).c $(RSATEST).c \ $(EVPTEST).c $(EVPEXTRATEST).c $(IGETEST).c $(JPAKETEST).c $(SRPTEST).c $(ASN1TEST).c \ - $(HEARTBEATTEST).c $(CONSTTIMETEST).c + $(V3NAMETEST).c $(HEARTBEATTEST).c $(CONSTTIMETEST).c EXHEADER= -HEADER= $(EXHEADER) +HEADER= testutil.h $(EXHEADER) ALL= $(GENERAL) $(SRC) $(HEADER) @@ -143,69 +146,70 @@ alltests: \ test_enc test_x509 test_rsa test_crl test_sid \ test_gen test_req test_pkcs7 test_verify test_dh test_dsa \ test_ss test_ca test_engine test_evp test_evp_extra test_ssl test_tsa test_ige \ - test_jpake test_srp test_cms test_heartbeat test_constant_time + test_jpake test_srp test_cms test_ocsp test_v3name test_heartbeat \ + test_constant_time -test_evp: +test_evp: $(EVPTEST)$(EXE_EXT) evptests.txt ../util/shlib_wrap.sh ./$(EVPTEST) evptests.txt -test_evp_extra: +test_evp_extra: $(EVPEXTRATEST)$(EXE_EXT) ../util/shlib_wrap.sh ./$(EVPEXTRATEST) -test_des: +test_des: $(DESTEST)$(EXE_EXT) ../util/shlib_wrap.sh ./$(DESTEST) -test_idea: +test_idea: $(IDEATEST)$(EXE_EXT) ../util/shlib_wrap.sh ./$(IDEATEST) -test_sha: +test_sha: $(SHATEST)$(EXE_EXT) $(SHA1TEST)$(EXE_EXT) $(SHA256TEST)$(EXE_EXT) $(SHA512TEST)$(EXE_EXT) ../util/shlib_wrap.sh ./$(SHATEST) ../util/shlib_wrap.sh ./$(SHA1TEST) ../util/shlib_wrap.sh ./$(SHA256TEST) ../util/shlib_wrap.sh ./$(SHA512TEST) -test_mdc2: +test_mdc2: $(MDC2TEST)$(EXE_EXT) ../util/shlib_wrap.sh ./$(MDC2TEST) -test_md5: +test_md5: $(MD5TEST)$(EXE_EXT) ../util/shlib_wrap.sh ./$(MD5TEST) -test_md4: +test_md4: $(MD4TEST)$(EXE_EXT) ../util/shlib_wrap.sh ./$(MD4TEST) -test_hmac: +test_hmac: $(HMACTEST)$(EXE_EXT) ../util/shlib_wrap.sh ./$(HMACTEST) -test_wp: +test_wp: $(WPTEST)$(EXE_EXT) ../util/shlib_wrap.sh ./$(WPTEST) -test_md2: +test_md2: $(MD2TEST)$(EXE_EXT) ../util/shlib_wrap.sh ./$(MD2TEST) -test_rmd: +test_rmd: $(RMDTEST)$(EXE_EXT) ../util/shlib_wrap.sh ./$(RMDTEST) -test_bf: +test_bf: $(BFTEST)$(EXE_EXT) ../util/shlib_wrap.sh ./$(BFTEST) -test_cast: +test_cast: $(CASTTEST)$(EXE_EXT) ../util/shlib_wrap.sh ./$(CASTTEST) -test_rc2: +test_rc2: $(RC2TEST)$(EXE_EXT) ../util/shlib_wrap.sh ./$(RC2TEST) -test_rc4: +test_rc4: $(RC4TEST)$(EXE_EXT) ../util/shlib_wrap.sh ./$(RC4TEST) -test_rc5: +test_rc5: $(RC5TEST)$(EXE_EXT) ../util/shlib_wrap.sh ./$(RC5TEST) -test_rand: +test_rand: $(RANDTEST)$(EXE_EXT) ../util/shlib_wrap.sh ./$(RANDTEST) -test_enc: +test_enc: ../apps/openssl$(EXE_EXT) testenc @sh ./testenc -test_x509: +test_x509: ../apps/openssl$(EXE_EXT) tx509 testx509.pem v3-cert1.pem v3-cert2.pem echo test normal x509v1 certificate sh ./tx509 2>/dev/null echo test first x509v3 certificate @@ -213,76 +217,78 @@ test_x509: echo test second x509v3 certificate sh ./tx509 v3-cert2.pem 2>/dev/null -test_rsa: $(RSATEST)$(EXE_EXT) +test_rsa: $(RSATEST)$(EXE_EXT) ../apps/openssl$(EXE_EXT) trsa testrsa.pem @sh ./trsa 2>/dev/null ../util/shlib_wrap.sh ./$(RSATEST) -test_crl: +test_crl: ../apps/openssl$(EXE_EXT) tcrl testcrl.pem @sh ./tcrl 2>/dev/null -test_sid: +test_sid: ../apps/openssl$(EXE_EXT) tsid testsid.pem @sh ./tsid 2>/dev/null -test_req: +test_req: ../apps/openssl$(EXE_EXT) treq testreq.pem testreq2.pem @sh ./treq 2>/dev/null @sh ./treq testreq2.pem 2>/dev/null -test_pkcs7: +test_pkcs7: ../apps/openssl$(EXE_EXT) tpkcs7 tpkcs7d testp7.pem pkcs7-1.pem @sh ./tpkcs7 2>/dev/null @sh ./tpkcs7d 2>/dev/null -test_bn: +test_bn: $(BNTEST)$(EXE_EXT) $(EXPTEST)$(EXE_EXT) bctest @echo starting big number library test, could take a while... @../util/shlib_wrap.sh ./$(BNTEST) >tmp.bntest @echo quit >>tmp.bntest @echo "running bc" - @) {if (/^test (.*)/) {print STDERR "\nverify $$1";} elsif (!/^0\r?$$/) {die "\nFailed! bc: $$_";} else {print STDERR "."; $$i++;}} print STDERR "\n$$i tests passed\n"' + @) {if (/^test (.*)/) {print STDERR "\nverify $$1";} elsif (!/^0$$/) {die "\nFailed! bc: $$_";} else {print STDERR "."; $$i++;}} print STDERR "\n$$i tests passed\n"' @echo 'test a^b%c implementations' ../util/shlib_wrap.sh ./$(EXPTEST) -test_ec: +test_ec: $(ECTEST)$(EXE_EXT) @echo 'test elliptic curves' ../util/shlib_wrap.sh ./$(ECTEST) -test_ecdsa: +test_ecdsa: $(ECDSATEST)$(EXE_EXT) @echo 'test ecdsa' ../util/shlib_wrap.sh ./$(ECDSATEST) -test_ecdh: +test_ecdh: $(ECDHTEST)$(EXE_EXT) @echo 'test ecdh' ../util/shlib_wrap.sh ./$(ECDHTEST) -test_verify: +test_verify: ../apps/openssl$(EXE_EXT) @echo "The following command should have some OK's and some failures" @echo "There are definitly a few expired certificates" ../util/shlib_wrap.sh ../apps/openssl verify -CApath ../certs/demo ../certs/demo/*.pem -test_dh: +test_dh: $(DHTEST)$(EXE_EXT) @echo "Generate a set of DH parameters" ../util/shlib_wrap.sh ./$(DHTEST) -test_dsa: +test_dsa: $(DSATEST)$(EXE_EXT) @echo "Generate a set of DSA parameters" ../util/shlib_wrap.sh ./$(DSATEST) ../util/shlib_wrap.sh ./$(DSATEST) -app2_1 -test_gen: +test_gen testreq.pem: ../apps/openssl$(EXE_EXT) testgen test.cnf @echo "Generate and verify a certificate request" @sh ./testgen test_ss keyU.ss certU.ss certCA.ss certP1.ss keyP1.ss certP2.ss keyP2.ss \ - intP1.ss intP2.ss: testss + intP1.ss intP2.ss: testss CAss.cnf Uss.cnf P1ss.cnf P2ss.cnf \ + ../apps/openssl$(EXE_EXT) @echo "Generate and certify a test certificate" @sh ./testss @cat certCA.ss certU.ss > intP1.ss @cat certCA.ss certU.ss certP1.ss > intP2.ss -test_engine: +test_engine: $(ENGINETEST)$(EXE_EXT) @echo "Manipulate the ENGINE structures" ../util/shlib_wrap.sh ./$(ENGINETEST) test_ssl: keyU.ss certU.ss certCA.ss certP1.ss keyP1.ss certP2.ss keyP2.ss \ - intP1.ss intP2.ss + intP1.ss intP2.ss $(SSLTEST)$(EXE_EXT) testssl testsslproxy \ + ../apps/server2.pem serverinfo.pem @echo "test SSL protocol" @if [ -n "$(FIPSCANLIB)" ]; then \ sh ./testfipsssl keyU.ss certU.ss certCA.ss; \ @@ -292,7 +298,7 @@ test_ssl: keyU.ss certU.ss certCA.ss certP1.ss keyP1.ss certP2.ss keyP2.ss \ @sh ./testsslproxy keyP1.ss certP1.ss intP1.ss @sh ./testsslproxy keyP2.ss certP2.ss intP2.ss -test_ca: +test_ca: ../apps/openssl$(EXE_EXT) testca CAss.cnf Uss.cnf @if ../util/shlib_wrap.sh ../apps/openssl no-rsa; then \ echo "skipping CA.sh test -- requires RSA"; \ else \ @@ -304,7 +310,7 @@ test_aes: #$(AESTEST) # @echo "test Rijndael" # ../util/shlib_wrap.sh ./$(AESTEST) -test_tsa: +test_tsa: ../apps/openssl$(EXE_EXT) testtsa CAtsa.cnf ../util/shlib_wrap.sh @if ../util/shlib_wrap.sh ../apps/openssl no-rsa; then \ echo "skipping testtsa test -- requires RSA"; \ else \ @@ -319,7 +325,7 @@ test_jpake: $(JPAKETEST)$(EXE_EXT) @echo "Test JPAKE" ../util/shlib_wrap.sh ./$(JPAKETEST) -test_cms: +test_cms: ../apps/openssl$(EXE_EXT) cms-test.pl smcont.txt @echo "CMS consistency test" $(PERL) cms-test.pl @@ -327,6 +333,14 @@ test_srp: $(SRPTEST)$(EXE_EXT) @echo "Test SRP" ../util/shlib_wrap.sh ./srptest +test_ocsp: ../apps/openssl$(EXE_EXT) tocsp + @echo "Test OCSP" + @sh ./tocsp + +test_v3name: $(V3NAMETEST)$(EXE_EXT) + @echo "Test X509v3_check_*" + ../util/shlib_wrap.sh ./$(V3NAMETEST) + test_heartbeat: $(HEARTBEATTEST)$(EXE_EXT) ../util/shlib_wrap.sh ./$(HEARTBEATTEST) @@ -495,6 +509,9 @@ $(ASN1TEST)$(EXE_EXT): $(ASN1TEST).o $(DLIBCRYPTO) $(SRPTEST)$(EXE_EXT): $(SRPTEST).o $(DLIBCRYPTO) @target=$(SRPTEST); $(BUILD_CMD) +$(V3NAMETEST)$(EXE_EXT): $(V3NAMETEST).o $(DLIBCRYPTO) + @target=$(V3NAMETEST); $(BUILD_CMD) + $(HEARTBEATTEST)$(EXE_EXT): $(HEARTBEATTEST).o $(DLIBCRYPTO) @target=$(HEARTBEATTEST); $(BUILD_CMD_STATIC) @@ -791,6 +808,19 @@ ssltest.o: ../include/openssl/ssl3.h ../include/openssl/stack.h ssltest.o: ../include/openssl/symhacks.h ../include/openssl/tls1.h ssltest.o: ../include/openssl/x509.h ../include/openssl/x509_vfy.h ssltest.o: ../include/openssl/x509v3.h ssltest.c +v3nametest.o: ../e_os.h ../include/openssl/asn1.h ../include/openssl/bio.h +v3nametest.o: ../include/openssl/buffer.h ../include/openssl/conf.h +v3nametest.o: ../include/openssl/crypto.h ../include/openssl/e_os2.h +v3nametest.o: ../include/openssl/ec.h ../include/openssl/ecdh.h +v3nametest.o: ../include/openssl/ecdsa.h ../include/openssl/evp.h +v3nametest.o: ../include/openssl/lhash.h ../include/openssl/obj_mac.h +v3nametest.o: ../include/openssl/objects.h ../include/openssl/opensslconf.h +v3nametest.o: ../include/openssl/opensslv.h ../include/openssl/ossl_typ.h +v3nametest.o: ../include/openssl/pkcs7.h ../include/openssl/safestack.h +v3nametest.o: ../include/openssl/sha.h ../include/openssl/stack.h +v3nametest.o: ../include/openssl/symhacks.h ../include/openssl/x509.h +v3nametest.o: ../include/openssl/x509_vfy.h ../include/openssl/x509v3.h +v3nametest.o: v3nametest.c wp_test.o: ../include/openssl/crypto.h ../include/openssl/e_os2.h wp_test.o: ../include/openssl/opensslconf.h ../include/openssl/opensslv.h wp_test.o: ../include/openssl/ossl_typ.h ../include/openssl/safestack.h diff --git a/deps/openssl/openssl/test/cms-test.pl b/deps/openssl/openssl/test/cms-test.pl index dfef799be2f4fa..51abeef2c9cb21 100644 --- a/deps/openssl/openssl/test/cms-test.pl +++ b/deps/openssl/openssl/test/cms-test.pl @@ -58,19 +58,32 @@ # Make VMS work if ( $^O eq "VMS" && -f "OSSLX:openssl.exe" ) { $ossl_path = "pipe mcr OSSLX:openssl"; + $null_path = "NL:"; + # On VMS, the lowest 3 bits of the exit code indicates severity + # 1 is success (perl translates it to 0 for $?), 2 is error + # (perl doesn't translate it) + $failure_code = 512; # 2 << 8 = 512 } # Make MSYS work elsif ( $^O eq "MSWin32" && -f "../apps/openssl.exe" ) { $ossl_path = "cmd /c ..\\apps\\openssl"; + $null_path = "NUL"; + $failure_code = 256; } elsif ( -f "../apps/openssl$ENV{EXE_EXT}" ) { $ossl_path = "../util/shlib_wrap.sh ../apps/openssl"; + $null_path = "/dev/null"; + $failure_code = 256; } elsif ( -f "..\\out32dll\\openssl.exe" ) { $ossl_path = "..\\out32dll\\openssl.exe"; + $null_path = "NUL"; + $failure_code = 256; } elsif ( -f "..\\out32\\openssl.exe" ) { $ossl_path = "..\\out32\\openssl.exe"; + $null_path = "NUL"; + $failure_code = 256; } else { die "Can't find OpenSSL executable"; @@ -82,8 +95,53 @@ my $halt_err = 1; my $badcmd = 0; +my $no_ec; +my $no_ec2m; +my $no_ecdh; my $ossl8 = `$ossl_path version -v` =~ /0\.9\.8/; +system ("$ossl_path no-ec > $null_path"); +if ($? == 0) + { + $no_ec = 1; + } +elsif ($? == $failure_code) + { + $no_ec = 0; + } +else + { + die "Error checking for EC support\n"; + } + +system ("$ossl_path no-ec2m > $null_path"); +if ($? == 0) + { + $no_ec2m = 1; + } +elsif ($? == $failure_code) + { + $no_ec2m = 0; + } +else + { + die "Error checking for EC2M support\n"; + } + +system ("$ossl_path no-ecdh > $null_path"); +if ($? == 0) + { + $no_ecdh = 1; + } +elsif ($? == $failure_code) + { + $no_ecdh = 0; + } +else + { + die "Error checking for ECDH support\n"; + } + my @smime_pkcs7_tests = ( [ @@ -341,6 +399,85 @@ ); +my @smime_cms_param_tests = ( + [ + "signed content test streaming PEM format, RSA keys, PSS signature", + "-sign -in smcont.txt -outform PEM -nodetach" + . " -signer $smdir/smrsa1.pem -keyopt rsa_padding_mode:pss" + . " -out test.cms", + "-verify -in test.cms -inform PEM " + . " \"-CAfile\" $smdir/smroot.pem -out smtst.txt" + ], + + [ + "signed content test streaming PEM format, RSA keys, PSS signature, no attributes", + "-sign -in smcont.txt -outform PEM -nodetach -noattr" + . " -signer $smdir/smrsa1.pem -keyopt rsa_padding_mode:pss" + . " -out test.cms", + "-verify -in test.cms -inform PEM " + . " \"-CAfile\" $smdir/smroot.pem -out smtst.txt" + ], + + [ + "signed content test streaming PEM format, RSA keys, PSS signature, SHA384 MGF1", + "-sign -in smcont.txt -outform PEM -nodetach" + . " -signer $smdir/smrsa1.pem -keyopt rsa_padding_mode:pss" + . " -keyopt rsa_mgf1_md:sha384 -out test.cms", + "-verify -in test.cms -inform PEM " + . " \"-CAfile\" $smdir/smroot.pem -out smtst.txt" + ], + + [ +"enveloped content test streaming S/MIME format, OAEP default parameters", + "-encrypt -in smcont.txt" + . " -stream -out test.cms" + . " -recip $smdir/smrsa1.pem -keyopt rsa_padding_mode:oaep", + "-decrypt -recip $smdir/smrsa1.pem -in test.cms -out smtst.txt" + ], + + [ +"enveloped content test streaming S/MIME format, OAEP SHA256", + "-encrypt -in smcont.txt" + . " -stream -out test.cms" + . " -recip $smdir/smrsa1.pem -keyopt rsa_padding_mode:oaep" + . " -keyopt rsa_oaep_md:sha256", + "-decrypt -recip $smdir/smrsa1.pem -in test.cms -out smtst.txt" + ], + + [ +"enveloped content test streaming S/MIME format, ECDH", + "-encrypt -in smcont.txt" + . " -stream -out test.cms" + . " -recip $smdir/smec1.pem", + "-decrypt -recip $smdir/smec1.pem -in test.cms -out smtst.txt" + ], + + [ +"enveloped content test streaming S/MIME format, ECDH, AES128, SHA256 KDF", + "-encrypt -in smcont.txt" + . " -stream -out test.cms" + . " -recip $smdir/smec1.pem -aes128 -keyopt ecdh_kdf_md:sha256", + "-decrypt -recip $smdir/smec1.pem -in test.cms -out smtst.txt" + ], + + [ +"enveloped content test streaming S/MIME format, ECDH, K-283, cofactor DH", + "-encrypt -in smcont.txt" + . " -stream -out test.cms" + . " -recip $smdir/smec2.pem -aes128" + . " -keyopt ecdh_kdf_md:sha256 -keyopt ecdh_cofactor_mode:1", + "-decrypt -recip $smdir/smec2.pem -in test.cms -out smtst.txt" + ], + + [ +"enveloped content test streaming S/MIME format, X9.42 DH", + "-encrypt -in smcont.txt" + . " -stream -out test.cms" + . " -recip $smdir/smdh.pem -aes128", + "-decrypt -recip $smdir/smdh.pem -in test.cms -out smtst.txt" + ] +); + print "CMS => PKCS#7 compatibility tests\n"; run_smime_tests( \$badcmd, \@smime_pkcs7_tests, $cmscmd, $pk7cmd ); @@ -354,6 +491,9 @@ run_smime_tests( \$badcmd, \@smime_pkcs7_tests, $cmscmd, $cmscmd ); run_smime_tests( \$badcmd, \@smime_cms_tests, $cmscmd, $cmscmd ); +print "CMS <=> CMS consistency tests, modified key parameters\n"; +run_smime_tests( \$badcmd, \@smime_cms_param_tests, $cmscmd, $cmscmd ); + if ( `$ossl_path version -f` =~ /ZLIB/ ) { run_smime_tests( \$badcmd, \@smime_cms_comp_tests, $cmscmd, $cmscmd ); } @@ -390,6 +530,21 @@ sub run_smime_tests { $rscmd =~ s/-stream//; $rvcmd =~ s/-stream//; } + if ($no_ec && $tnam =~ /ECDH/) + { + print "$tnam: skipped, EC disabled\n"; + next; + } + if ($no_ecdh && $tnam =~ /ECDH/) + { + print "$tnam: skipped, ECDH disabled\n"; + next; + } + if ($no_ec2m && $tnam =~ /K-283/) + { + print "$tnam: skipped, EC2M disabled\n"; + next; + } system("$scmd$rscmd$redir"); if ($?) { print "$tnam: generation error\n"; diff --git a/deps/openssl/openssl/test/evptests.txt b/deps/openssl/openssl/test/evptests.txt index c273707c1444f5..4e9958b3b5bc07 100644 --- a/deps/openssl/openssl/test/evptests.txt +++ b/deps/openssl/openssl/test/evptests.txt @@ -1,4 +1,5 @@ #cipher:key:iv:plaintext:ciphertext:0/1(decrypt/encrypt) +#aadcipher:key:iv:plaintext:ciphertext:aad:tag:0/1(decrypt/encrypt) #digest:::input:output # SHA(1) tests (from shatest.c) @@ -332,3 +333,69 @@ SEED-ECB:00000000000000000000000000000000::000102030405060708090A0B0C0D0E0F:5EBA SEED-ECB:000102030405060708090A0B0C0D0E0F::00000000000000000000000000000000:C11F22F20140505084483597E4370F43:1 SEED-ECB:4706480851E61BE85D74BFB3FD956185::83A2F8A288641FB9A4E9A5CC2F131C7D:EE54D13EBCAE706D226BC3142CD40D4A:1 SEED-ECB:28DBC3BC49FFD87DCFA509B11D422BE7::B41E6BE2EBA84A148E2EED84593C5EC7:9B9B7BFCD1813CB95D0B3618F40F5122:1 + +# AES CCM 256 bit key +aes-256-ccm:1bde3251d41a8b5ea013c195ae128b218b3e0306376357077ef1c1c78548b92e:5b8e40746f6b98e00f1d13ff41:53bd72a97089e312422bf72e242377b3c6ee3e2075389b999c4ef7f28bd2b80a:9a5fcccdb4cf04e7293d2775cc76a488f042382d949b43b7d6bb2b9864786726:c17a32514eb6103f3249e076d4c871dc97e04b286699e54491dc18f6d734d4c0:2024931d73bca480c24a24ece6b6c2bf + +# AES GCM test vectors from http://csrc.nist.gov/groups/ST/toolkit/BCM/documents/proposedmodes/gcm/gcm-spec.pdf +aes-128-gcm:00000000000000000000000000000000:000000000000000000000000::::58e2fccefa7e3061367f1d57a4e7455a +aes-128-gcm:00000000000000000000000000000000:000000000000000000000000:00000000000000000000000000000000:0388dace60b6a392f328c2b971b2fe78::ab6e47d42cec13bdf53a67b21257bddf +aes-128-gcm:feffe9928665731c6d6a8f9467308308:cafebabefacedbaddecaf888:d9313225f88406e5a55909c5aff5269a86a7a9531534f7da2e4c303d8a318a721c3c0c95956809532fcf0e2449a6b525b16aedf5aa0de657ba637b391aafd255:42831ec2217774244b7221b784d0d49ce3aa212f2c02a4e035c17e2329aca12e21d514b25466931c7d8f6a5aac84aa051ba30b396a0aac973d58e091473f5985::4d5c2af327cd64a62cf35abd2ba6fab4 +aes-128-gcm:feffe9928665731c6d6a8f9467308308:cafebabefacedbaddecaf888:d9313225f88406e5a55909c5aff5269a86a7a9531534f7da2e4c303d8a318a721c3c0c95956809532fcf0e2449a6b525b16aedf5aa0de657ba637b39:42831ec2217774244b7221b784d0d49ce3aa212f2c02a4e035c17e2329aca12e21d514b25466931c7d8f6a5aac84aa051ba30b396a0aac973d58e091:feedfacedeadbeeffeedfacedeadbeefabaddad2:5bc94fbc3221a5db94fae95ae7121a47 +aes-128-gcm:feffe9928665731c6d6a8f9467308308:cafebabefacedbad:d9313225f88406e5a55909c5aff5269a86a7a9531534f7da2e4c303d8a318a721c3c0c95956809532fcf0e2449a6b525b16aedf5aa0de657ba637b39:61353b4c2806934a777ff51fa22a4755699b2a714fcdc6f83766e5f97b6c742373806900e49f24b22b097544d4896b424989b5e1ebac0f07c23f4598:feedfacedeadbeeffeedfacedeadbeefabaddad2:3612d2e79e3b0785561be14aaca2fccb +aes-128-gcm:feffe9928665731c6d6a8f9467308308:9313225df88406e555909c5aff5269aa6a7a9538534f7da1e4c303d2a318a728c3c0c95156809539fcf0e2429a6b525416aedbf5a0de6a57a637b39b:d9313225f88406e5a55909c5aff5269a86a7a9531534f7da2e4c303d8a318a721c3c0c95956809532fcf0e2449a6b525b16aedf5aa0de657ba637b39:8ce24998625615b603a033aca13fb894be9112a5c3a211a8ba262a3cca7e2ca701e4a9a4fba43c90ccdcb281d48c7c6fd62875d2aca417034c34aee5:feedfacedeadbeeffeedfacedeadbeefabaddad2:619cc5aefffe0bfa462af43c1699d050 +aes-192-gcm:000000000000000000000000000000000000000000000000:000000000000000000000000::::cd33b28ac773f74ba00ed1f312572435 +aes-192-gcm:000000000000000000000000000000000000000000000000:000000000000000000000000:00000000000000000000000000000000:98e7247c07f0fe411c267e4384b0f600::2ff58d80033927ab8ef4d4587514f0fb +aes-192-gcm:feffe9928665731c6d6a8f9467308308feffe9928665731c:cafebabefacedbaddecaf888:d9313225f88406e5a55909c5aff5269a86a7a9531534f7da2e4c303d8a318a721c3c0c95956809532fcf0e2449a6b525b16aedf5aa0de657ba637b391aafd255:3980ca0b3c00e841eb06fac4872a2757859e1ceaa6efd984628593b40ca1e19c7d773d00c144c525ac619d18c84a3f4718e2448b2fe324d9ccda2710acade256::9924a7c8587336bfb118024db8674a14 +aes-192-gcm:feffe9928665731c6d6a8f9467308308feffe9928665731c:cafebabefacedbaddecaf888:d9313225f88406e5a55909c5aff5269a86a7a9531534f7da2e4c303d8a318a721c3c0c95956809532fcf0e2449a6b525b16aedf5aa0de657ba637b39:3980ca0b3c00e841eb06fac4872a2757859e1ceaa6efd984628593b40ca1e19c7d773d00c144c525ac619d18c84a3f4718e2448b2fe324d9ccda2710:feedfacedeadbeeffeedfacedeadbeefabaddad2:2519498e80f1478f37ba55bd6d27618c +aes-192-gcm:feffe9928665731c6d6a8f9467308308feffe9928665731c:cafebabefacedbad:d9313225f88406e5a55909c5aff5269a86a7a9531534f7da2e4c303d8a318a721c3c0c95956809532fcf0e2449a6b525b16aedf5aa0de657ba637b39:0f10f599ae14a154ed24b36e25324db8c566632ef2bbb34f8347280fc4507057fddc29df9a471f75c66541d4d4dad1c9e93a19a58e8b473fa0f062f7:feedfacedeadbeeffeedfacedeadbeefabaddad2:65dcc57fcf623a24094fcca40d3533f8 +aes-192-gcm:feffe9928665731c6d6a8f9467308308feffe9928665731c:9313225df88406e555909c5aff5269aa6a7a9538534f7da1e4c303d2a318a728c3c0c95156809539fcf0e2429a6b525416aedbf5a0de6a57a637b39b:d9313225f88406e5a55909c5aff5269a86a7a9531534f7da2e4c303d8a318a721c3c0c95956809532fcf0e2449a6b525b16aedf5aa0de657ba637b39:d27e88681ce3243c4830165a8fdcf9ff1de9a1d8e6b447ef6ef7b79828666e4581e79012af34ddd9e2f037589b292db3e67c036745fa22e7e9b7373b:feedfacedeadbeeffeedfacedeadbeefabaddad2:dcf566ff291c25bbb8568fc3d376a6d9 +aes-256-gcm:0000000000000000000000000000000000000000000000000000000000000000:000000000000000000000000::::530f8afbc74536b9a963b4f1c4cb738b +aes-256-gcm:0000000000000000000000000000000000000000000000000000000000000000:000000000000000000000000:00000000000000000000000000000000:cea7403d4d606b6e074ec5d3baf39d18::d0d1c8a799996bf0265b98b5d48ab919 +aes-256-gcm:feffe9928665731c6d6a8f9467308308feffe9928665731c6d6a8f9467308308:cafebabefacedbaddecaf888:d9313225f88406e5a55909c5aff5269a86a7a9531534f7da2e4c303d8a318a721c3c0c95956809532fcf0e2449a6b525b16aedf5aa0de657ba637b391aafd255:522dc1f099567d07f47f37a32a84427d643a8cdcbfe5c0c97598a2bd2555d1aa8cb08e48590dbb3da7b08b1056828838c5f61e6393ba7a0abcc9f662898015ad::b094dac5d93471bdec1a502270e3cc6c +aes-256-gcm:feffe9928665731c6d6a8f9467308308feffe9928665731c6d6a8f9467308308:cafebabefacedbaddecaf888:d9313225f88406e5a55909c5aff5269a86a7a9531534f7da2e4c303d8a318a721c3c0c95956809532fcf0e2449a6b525b16aedf5aa0de657ba637b39:522dc1f099567d07f47f37a32a84427d643a8cdcbfe5c0c97598a2bd2555d1aa8cb08e48590dbb3da7b08b1056828838c5f61e6393ba7a0abcc9f662:feedfacedeadbeeffeedfacedeadbeefabaddad2:76fc6ece0f4e1768cddf8853bb2d551b +aes-256-gcm:feffe9928665731c6d6a8f9467308308feffe9928665731c6d6a8f9467308308:cafebabefacedbad:d9313225f88406e5a55909c5aff5269a86a7a9531534f7da2e4c303d8a318a721c3c0c95956809532fcf0e2449a6b525b16aedf5aa0de657ba637b39:c3762df1ca787d32ae47c13bf19844cbaf1ae14d0b976afac52ff7d79bba9de0feb582d33934a4f0954cc2363bc73f7862ac430e64abe499f47c9b1f:feedfacedeadbeeffeedfacedeadbeefabaddad2:3a337dbf46a792c45e454913fe2ea8f2 +aes-256-gcm:feffe9928665731c6d6a8f9467308308feffe9928665731c6d6a8f9467308308:9313225df88406e555909c5aff5269aa6a7a9538534f7da1e4c303d2a318a728c3c0c95156809539fcf0e2429a6b525416aedbf5a0de6a57a637b39b:d9313225f88406e5a55909c5aff5269a86a7a9531534f7da2e4c303d8a318a721c3c0c95956809532fcf0e2449a6b525b16aedf5aa0de657ba637b39:5a8def2f0c9e53f1f75d7853659e2a20eeb2b22aafde6419a058ab4f6f746bf40fc0c3b780f244452da3ebf1c5d82cdea2418997200ef82e44ae7e3f:feedfacedeadbeeffeedfacedeadbeefabaddad2:a44a8266ee1c8eb0c8b5d4cf5ae9f19a +# local add-ons, primarily streaming ghash tests +# 128 bytes aad +aes-128-gcm:00000000000000000000000000000000:000000000000000000000000:::d9313225f88406e5a55909c5aff5269a86a7a9531534f7da2e4c303d8a318a721c3c0c95956809532fcf0e2449a6b525b16aedf5aa0de657ba637b391aafd255522dc1f099567d07f47f37a32a84427d643a8cdcbfe5c0c97598a2bd2555d1aa8cb08e48590dbb3da7b08b1056828838c5f61e6393ba7a0abcc9f662898015ad:5fea793a2d6f974d37e68e0cb8ff9492 +# 48 bytes plaintext +aes-128-gcm:00000000000000000000000000000000:000000000000000000000000:000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000:0388dace60b6a392f328c2b971b2fe78f795aaab494b5923f7fd89ff948bc1e0200211214e7394da2089b6acd093abe0::9dd0a376b08e40eb00c35f29f9ea61a4 +# 80 bytes plaintext +aes-128-gcm:00000000000000000000000000000000:000000000000000000000000:0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000:0388dace60b6a392f328c2b971b2fe78f795aaab494b5923f7fd89ff948bc1e0200211214e7394da2089b6acd093abe0c94da219118e297d7b7ebcbcc9c388f28ade7d85a8ee35616f7124a9d5270291::98885a3a22bd4742fe7b72172193b163 +# 128 bytes plaintext +aes-128-gcm:00000000000000000000000000000000:000000000000000000000000:0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000:0388dace60b6a392f328c2b971b2fe78f795aaab494b5923f7fd89ff948bc1e0200211214e7394da2089b6acd093abe0c94da219118e297d7b7ebcbcc9c388f28ade7d85a8ee35616f7124a9d527029195b84d1b96c690ff2f2de30bf2ec89e00253786e126504f0dab90c48a30321de3345e6b0461e7c9e6c6b7afedde83f40::cac45f60e31efd3b5a43b98a22ce1aa1 +# 192 bytes plaintext, iv is chosen so that initial counter LSB is 0xFF +aes-128-gcm:00000000000000000000000000000000:ffffffff000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000:000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000:56b3373ca9ef6e4a2b64fe1e9a17b61425f10d47a75a5fce13efc6bc784af24f4141bdd48cf7c770887afd573cca5418a9aeffcd7c5ceddfc6a78397b9a85b499da558257267caab2ad0b23ca476a53cb17fb41c4b8b475cb4f3f7165094c229c9e8c4dc0a2a5ff1903e501511221376a1cdb8364c5061a20cae74bc4acd76ceb0abc9fd3217ef9f8c90be402ddf6d8697f4f880dff15bfb7a6b28241ec8fe183c2d59e3f9dfff653c7126f0acb9e64211f42bae12af462b1070bef1ab5e3606::566f8ef683078bfdeeffa869d751a017 +# 80 bytes plaintext, submitted by Intel +aes-128-gcm:843ffcf5d2b72694d19ed01d01249412:dbcca32ebf9b804617c3aa9e:000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f303132333435363738393a3b3c3d3e3f404142434445464748494a4b4c4d4e4f:6268c6fa2a80b2d137467f092f657ac04d89be2beaa623d61b5a868c8f03ff95d3dcee23ad2f1ab3a6c80eaf4b140eb05de3457f0fbc111a6b43d0763aa422a3013cf1dc37fe417d1fbfc449b75d4cc5:00000000000000000000000000000000101112131415161718191a1b1c1d1e1f:3b629ccfbc1119b7319e1dce2cd6fd6d + +# AES XTS test vectors from IEEE Std 1619-2007 +aes-128-xts:0000000000000000000000000000000000000000000000000000000000000000:00000000000000000000000000000000:0000000000000000000000000000000000000000000000000000000000000000:917cf69ebd68b2ec9b9fe9a3eadda692cd43d2f59598ed858c02c2652fbf922e +aes-128-xts:1111111111111111111111111111111122222222222222222222222222222222:33333333330000000000000000000000:4444444444444444444444444444444444444444444444444444444444444444:c454185e6a16936e39334038acef838bfb186fff7480adc4289382ecd6d394f0 +aes-128-xts:fffefdfcfbfaf9f8f7f6f5f4f3f2f1f022222222222222222222222222222222:33333333330000000000000000000000:4444444444444444444444444444444444444444444444444444444444444444:af85336b597afc1a900b2eb21ec949d292df4c047e0b21532186a5971a227a89 +aes-128-xts:2718281828459045235360287471352631415926535897932384626433832795:00000000000000000000000000000000:000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f303132333435363738393a3b3c3d3e3f404142434445464748494a4b4c4d4e4f505152535455565758595a5b5c5d5e5f606162636465666768696a6b6c6d6e6f707172737475767778797a7b7c7d7e7f808182838485868788898a8b8c8d8e8f909192939495969798999a9b9c9d9e9fa0a1a2a3a4a5a6a7a8a9aaabacadaeafb0b1b2b3b4b5b6b7b8b9babbbcbdbebfc0c1c2c3c4c5c6c7c8c9cacbcccdcecfd0d1d2d3d4d5d6d7d8d9dadbdcdddedfe0e1e2e3e4e5e6e7e8e9eaebecedeeeff0f1f2f3f4f5f6f7f8f9fafbfcfdfeff000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f303132333435363738393a3b3c3d3e3f404142434445464748494a4b4c4d4e4f505152535455565758595a5b5c5d5e5f606162636465666768696a6b6c6d6e6f707172737475767778797a7b7c7d7e7f808182838485868788898a8b8c8d8e8f909192939495969798999a9b9c9d9e9fa0a1a2a3a4a5a6a7a8a9aaabacadaeafb0b1b2b3b4b5b6b7b8b9babbbcbdbebfc0c1c2c3c4c5c6c7c8c9cacbcccdcecfd0d1d2d3d4d5d6d7d8d9dadbdcdddedfe0e1e2e3e4e5e6e7e8e9eaebecedeeeff0f1f2f3f4f5f6f7f8f9fafbfcfdfeff:27a7479befa1d476489f308cd4cfa6e2a96e4bbe3208ff25287dd3819616e89cc78cf7f5e543445f8333d8fa7f56000005279fa5d8b5e4ad40e736ddb4d35412328063fd2aab53e5ea1e0a9f332500a5df9487d07a5c92cc512c8866c7e860ce93fdf166a24912b422976146ae20ce846bb7dc9ba94a767aaef20c0d61ad02655ea92dc4c4e41a8952c651d33174be51a10c421110e6d81588ede82103a252d8a750e8768defffed9122810aaeb99f9172af82b604dc4b8e51bcb08235a6f4341332e4ca60482a4ba1a03b3e65008fc5da76b70bf1690db4eae29c5f1badd03c5ccf2a55d705ddcd86d449511ceb7ec30bf12b1fa35b913f9f747a8afd1b130e94bff94effd01a91735ca1726acd0b197c4e5b03393697e126826fb6bbde8ecc1e08298516e2c9ed03ff3c1b7860f6de76d4cecd94c8119855ef5297ca67e9f3e7ff72b1e99785ca0a7e7720c5b36dc6d72cac9574c8cbbc2f801e23e56fd344b07f22154beba0f08ce8891e643ed995c94d9a69c9f1b5f499027a78572aeebd74d20cc39881c213ee770b1010e4bea718846977ae119f7a023ab58cca0ad752afe656bb3c17256a9f6e9bf19fdd5a38fc82bbe872c5539edb609ef4f79c203ebb140f2e583cb2ad15b4aa5b655016a8449277dbd477ef2c8d6c017db738b18deb4a427d1923ce3ff262735779a418f20a282df920147beabe421ee5319d0568 +aes-128-xts:2718281828459045235360287471352631415926535897932384626433832795:01000000000000000000000000000000:27a7479befa1d476489f308cd4cfa6e2a96e4bbe3208ff25287dd3819616e89cc78cf7f5e543445f8333d8fa7f56000005279fa5d8b5e4ad40e736ddb4d35412328063fd2aab53e5ea1e0a9f332500a5df9487d07a5c92cc512c8866c7e860ce93fdf166a24912b422976146ae20ce846bb7dc9ba94a767aaef20c0d61ad02655ea92dc4c4e41a8952c651d33174be51a10c421110e6d81588ede82103a252d8a750e8768defffed9122810aaeb99f9172af82b604dc4b8e51bcb08235a6f4341332e4ca60482a4ba1a03b3e65008fc5da76b70bf1690db4eae29c5f1badd03c5ccf2a55d705ddcd86d449511ceb7ec30bf12b1fa35b913f9f747a8afd1b130e94bff94effd01a91735ca1726acd0b197c4e5b03393697e126826fb6bbde8ecc1e08298516e2c9ed03ff3c1b7860f6de76d4cecd94c8119855ef5297ca67e9f3e7ff72b1e99785ca0a7e7720c5b36dc6d72cac9574c8cbbc2f801e23e56fd344b07f22154beba0f08ce8891e643ed995c94d9a69c9f1b5f499027a78572aeebd74d20cc39881c213ee770b1010e4bea718846977ae119f7a023ab58cca0ad752afe656bb3c17256a9f6e9bf19fdd5a38fc82bbe872c5539edb609ef4f79c203ebb140f2e583cb2ad15b4aa5b655016a8449277dbd477ef2c8d6c017db738b18deb4a427d1923ce3ff262735779a418f20a282df920147beabe421ee5319d0568:264d3ca8512194fec312c8c9891f279fefdd608d0c027b60483a3fa811d65ee59d52d9e40ec5672d81532b38b6b089ce951f0f9c35590b8b978d175213f329bb1c2fd30f2f7f30492a61a532a79f51d36f5e31a7c9a12c286082ff7d2394d18f783e1a8e72c722caaaa52d8f065657d2631fd25bfd8e5baad6e527d763517501c68c5edc3cdd55435c532d7125c8614deed9adaa3acade5888b87bef641c4c994c8091b5bcd387f3963fb5bc37aa922fbfe3df4e5b915e6eb514717bdd2a74079a5073f5c4bfd46adf7d282e7a393a52579d11a028da4d9cd9c77124f9648ee383b1ac763930e7162a8d37f350b2f74b8472cf09902063c6b32e8c2d9290cefbd7346d1c779a0df50edcde4531da07b099c638e83a755944df2aef1aa31752fd323dcb710fb4bfbb9d22b925bc3577e1b8949e729a90bbafeacf7f7879e7b1147e28ba0bae940db795a61b15ecf4df8db07b824bb062802cc98a9545bb2aaeed77cb3fc6db15dcd7d80d7d5bc406c4970a3478ada8899b329198eb61c193fb6275aa8ca340344a75a862aebe92eee1ce032fd950b47d7704a3876923b4ad62844bf4a09c4dbe8b4397184b7471360c9564880aedddb9baa4af2e75394b08cd32ff479c57a07d3eab5d54de5f9738b8d27f27a9f0ab11799d7b7ffefb2704c95c6ad12c39f1e867a4b7b1d7818a4b753dfd2a89ccb45e001a03a867b187f225dd +aes-128-xts:2718281828459045235360287471352631415926535897932384626433832795:02000000000000000000000000000000:264d3ca8512194fec312c8c9891f279fefdd608d0c027b60483a3fa811d65ee59d52d9e40ec5672d81532b38b6b089ce951f0f9c35590b8b978d175213f329bb1c2fd30f2f7f30492a61a532a79f51d36f5e31a7c9a12c286082ff7d2394d18f783e1a8e72c722caaaa52d8f065657d2631fd25bfd8e5baad6e527d763517501c68c5edc3cdd55435c532d7125c8614deed9adaa3acade5888b87bef641c4c994c8091b5bcd387f3963fb5bc37aa922fbfe3df4e5b915e6eb514717bdd2a74079a5073f5c4bfd46adf7d282e7a393a52579d11a028da4d9cd9c77124f9648ee383b1ac763930e7162a8d37f350b2f74b8472cf09902063c6b32e8c2d9290cefbd7346d1c779a0df50edcde4531da07b099c638e83a755944df2aef1aa31752fd323dcb710fb4bfbb9d22b925bc3577e1b8949e729a90bbafeacf7f7879e7b1147e28ba0bae940db795a61b15ecf4df8db07b824bb062802cc98a9545bb2aaeed77cb3fc6db15dcd7d80d7d5bc406c4970a3478ada8899b329198eb61c193fb6275aa8ca340344a75a862aebe92eee1ce032fd950b47d7704a3876923b4ad62844bf4a09c4dbe8b4397184b7471360c9564880aedddb9baa4af2e75394b08cd32ff479c57a07d3eab5d54de5f9738b8d27f27a9f0ab11799d7b7ffefb2704c95c6ad12c39f1e867a4b7b1d7818a4b753dfd2a89ccb45e001a03a867b187f225dd:fa762a3680b76007928ed4a4f49a9456031b704782e65e16cecb54ed7d017b5e18abd67b338e81078f21edb7868d901ebe9c731a7c18b5e6dec1d6a72e078ac9a4262f860beefa14f4e821018272e411a951502b6e79066e84252c3346f3aa62344351a291d4bedc7a07618bdea2af63145cc7a4b8d4070691ae890cd65733e7946e9021a1dffc4c59f159425ee6d50ca9b135fa6162cea18a939838dc000fb386fad086acce5ac07cb2ece7fd580b00cfa5e98589631dc25e8e2a3daf2ffdec26531659912c9d8f7a15e5865ea8fb5816d6207052bd7128cd743c12c8118791a4736811935eb982a532349e31dd401e0b660a568cb1a4711f552f55ded59f1f15bf7196b3ca12a91e488ef59d64f3a02bf45239499ac6176ae321c4a211ec545365971c5d3f4f09d4eb139bfdf2073d33180b21002b65cc9865e76cb24cd92c874c24c18350399a936ab3637079295d76c417776b94efce3a0ef7206b15110519655c956cbd8b2489405ee2b09a6b6eebe0c53790a12a8998378b33a5b71159625f4ba49d2a2fdba59fbf0897bc7aabd8d707dc140a80f0f309f835d3da54ab584e501dfa0ee977fec543f74186a802b9a37adb3e8291eca04d66520d229e60401e7282bef486ae059aa70696e0e305d777140a7a883ecdcb69b9ff938e8a4231864c69ca2c2043bed007ff3e605e014bcf518138dc3a25c5e236171a2d01d6 +aes-128-xts:2718281828459045235360287471352631415926535897932384626433832795:fd000000000000000000000000000000:8e41b78c390b5af9d758bb214a67e9f6bf7727b09ac6124084c37611398fa45daad94868600ed391fb1acd4857a95b466e62ef9f4b377244d1c152e7b30d731aad30c716d214b707aed99eb5b5e580b3e887cf7497465651d4b60e6042051da3693c3b78c14489543be8b6ad0ba629565bba202313ba7b0d0c94a3252b676f46cc02ce0f8a7d34c0ed229129673c1f61aed579d08a9203a25aac3a77e9db60267996db38df637356d9dcd1632e369939f2a29d89345c66e05066f1a3677aef18dea4113faeb629e46721a66d0a7e785d3e29af2594eb67dfa982affe0aac058f6e15864269b135418261fc3afb089472cf68c45dd7f231c6249ba0255e1e033833fc4d00a3fe02132d7bc3873614b8aee34273581ea0325c81f0270affa13641d052d36f0757d484014354d02d6883ca15c24d8c3956b1bd027bcf41f151fd8023c5340e5606f37e90fdb87c86fb4fa634b3718a30bace06a66eaf8f63c4aa3b637826a87fe8cfa44282e92cb1615af3a28e53bc74c7cba1a0977be9065d0c1a5dec6c54ae38d37f37aa35283e048e5530a85c4e7a29d7b92ec0c3169cdf2a805c7604bce60049b9fb7b8eaac10f51ae23794ceba68bb58112e293b9b692ca721b37c662f8574ed4dba6f88e170881c82cddc1034a0ca7e284bf0962b6b26292d836fa9f73c1ac770eef0f2d3a1eaf61d3e03555fd424eedd67e18a18094f888:d55f684f81f4426e9fde92a5ff02df2ac896af63962888a97910c1379e20b0a3b1db613fb7fe2e07004329ea5c22bfd33e3dbe4cf58cc608c2c26c19a2e2fe22f98732c2b5cb844cc6c0702d91e1d50fc4382a7eba5635cd602432a2306ac4ce82f8d70c8d9bc15f918fe71e74c622d5cf71178bf6e0b9cc9f2b41dd8dbe441c41cd0c73a6dc47a348f6702f9d0e9b1b1431e948e299b9ec2272ab2c5f0c7be86affa5dec87a0bee81d3d50007edaa2bcfccb35605155ff36ed8edd4a40dcd4b243acd11b2b987bdbfaf91a7cac27e9c5aea525ee53de7b2d3332c8644402b823e94a7db26276d2d23aa07180f76b4fd29b9c0823099c9d62c519880aee7e9697617c1497d47bf3e571950311421b6b734d38b0db91eb85331b91ea9f61530f54512a5a52a4bad589eb69781d537f23297bb459bdad2948a29e1550bf4787e0be95bb173cf5fab17dab7a13a052a63453d97ccec1a321954886b7a1299faaeecae35c6eaaca753b041b5e5f093bf83397fd21dd6b3012066fcc058cc32c3b09d7562dee29509b5839392c9ff05f51f3166aaac4ac5f238038a3045e6f72e48ef0fe8bc675e82c318a268e43970271bf119b81bf6a982746554f84e72b9f00280a320a08142923c23c883423ff949827f29bbacdc1ccdb04938ce6098c95ba6b32528f4ef78eed778b2e122ddfd1cbdd11d1c0a6783e011fc536d63d053260637 +aes-128-xts:2718281828459045235360287471352631415926535897932384626433832795:fe000000000000000000000000000000:d55f684f81f4426e9fde92a5ff02df2ac896af63962888a97910c1379e20b0a3b1db613fb7fe2e07004329ea5c22bfd33e3dbe4cf58cc608c2c26c19a2e2fe22f98732c2b5cb844cc6c0702d91e1d50fc4382a7eba5635cd602432a2306ac4ce82f8d70c8d9bc15f918fe71e74c622d5cf71178bf6e0b9cc9f2b41dd8dbe441c41cd0c73a6dc47a348f6702f9d0e9b1b1431e948e299b9ec2272ab2c5f0c7be86affa5dec87a0bee81d3d50007edaa2bcfccb35605155ff36ed8edd4a40dcd4b243acd11b2b987bdbfaf91a7cac27e9c5aea525ee53de7b2d3332c8644402b823e94a7db26276d2d23aa07180f76b4fd29b9c0823099c9d62c519880aee7e9697617c1497d47bf3e571950311421b6b734d38b0db91eb85331b91ea9f61530f54512a5a52a4bad589eb69781d537f23297bb459bdad2948a29e1550bf4787e0be95bb173cf5fab17dab7a13a052a63453d97ccec1a321954886b7a1299faaeecae35c6eaaca753b041b5e5f093bf83397fd21dd6b3012066fcc058cc32c3b09d7562dee29509b5839392c9ff05f51f3166aaac4ac5f238038a3045e6f72e48ef0fe8bc675e82c318a268e43970271bf119b81bf6a982746554f84e72b9f00280a320a08142923c23c883423ff949827f29bbacdc1ccdb04938ce6098c95ba6b32528f4ef78eed778b2e122ddfd1cbdd11d1c0a6783e011fc536d63d053260637:72efc1ebfe1ee25975a6eb3aa8589dda2b261f1c85bdab442a9e5b2dd1d7c3957a16fc08e526d4b1223f1b1232a11af274c3d70dac57f83e0983c498f1a6f1aecb021c3e70085a1e527f1ce41ee5911a82020161529cd82773762daf5459de94a0a82adae7e1703c808543c29ed6fb32d9e004327c1355180c995a07741493a09c21ba01a387882da4f62534b87bb15d60d197201c0fd3bf30c1500a3ecfecdd66d8721f90bcc4c17ee925c61b0a03727a9c0d5f5ca462fbfa0af1c2513a9d9d4b5345bd27a5f6e653f751693e6b6a2b8ead57d511e00e58c45b7b8d005af79288f5c7c22fd4f1bf7a898b03a5634c6a1ae3f9fae5de4f296a2896b23e7ed43ed14fa5a2803f4d28f0d3ffcf24757677aebdb47bb388378708948a8d4126ed1839e0da29a537a8c198b3c66ab00712dd261674bf45a73d67f76914f830ca014b65596f27e4cf62de66125a5566df9975155628b400fbfb3a29040ed50faffdbb18aece7c5c44693260aab386c0a37b11b114f1c415aebb653be468179428d43a4d8bc3ec38813eca30a13cf1bb18d524f1992d44d8b1a42ea30b22e6c95b199d8d182f8840b09d059585c31ad691fa0619ff038aca2c39a943421157361717c49d322028a74648113bd8c9d7ec77cf3c89c1ec8718ceff8516d96b34c3c614f10699c9abc4ed0411506223bea16af35c883accdbe1104eef0cfdb54e12fb230a +aes-128-xts:2718281828459045235360287471352631415926535897932384626433832795:ff000000000000000000000000000000:72efc1ebfe1ee25975a6eb3aa8589dda2b261f1c85bdab442a9e5b2dd1d7c3957a16fc08e526d4b1223f1b1232a11af274c3d70dac57f83e0983c498f1a6f1aecb021c3e70085a1e527f1ce41ee5911a82020161529cd82773762daf5459de94a0a82adae7e1703c808543c29ed6fb32d9e004327c1355180c995a07741493a09c21ba01a387882da4f62534b87bb15d60d197201c0fd3bf30c1500a3ecfecdd66d8721f90bcc4c17ee925c61b0a03727a9c0d5f5ca462fbfa0af1c2513a9d9d4b5345bd27a5f6e653f751693e6b6a2b8ead57d511e00e58c45b7b8d005af79288f5c7c22fd4f1bf7a898b03a5634c6a1ae3f9fae5de4f296a2896b23e7ed43ed14fa5a2803f4d28f0d3ffcf24757677aebdb47bb388378708948a8d4126ed1839e0da29a537a8c198b3c66ab00712dd261674bf45a73d67f76914f830ca014b65596f27e4cf62de66125a5566df9975155628b400fbfb3a29040ed50faffdbb18aece7c5c44693260aab386c0a37b11b114f1c415aebb653be468179428d43a4d8bc3ec38813eca30a13cf1bb18d524f1992d44d8b1a42ea30b22e6c95b199d8d182f8840b09d059585c31ad691fa0619ff038aca2c39a943421157361717c49d322028a74648113bd8c9d7ec77cf3c89c1ec8718ceff8516d96b34c3c614f10699c9abc4ed0411506223bea16af35c883accdbe1104eef0cfdb54e12fb230a:3260ae8dad1f4a32c5cafe3ab0eb95549d461a67ceb9e5aa2d3afb62dece0553193ba50c75be251e08d1d08f1088576c7efdfaaf3f459559571e12511753b07af073f35da06af0ce0bbf6b8f5ccc5cea500ec1b211bd51f63b606bf6528796ca12173ba39b8935ee44ccce646f90a45bf9ccc567f0ace13dc2d53ebeedc81f58b2e41179dddf0d5a5c42f5d8506c1a5d2f8f59f3ea873cbcd0eec19acbf325423bd3dcb8c2b1bf1d1eaed0eba7f0698e4314fbeb2f1566d1b9253008cbccf45a2b0d9c5c9c21474f4076e02be26050b99dee4fd68a4cf890e496e4fcae7b70f94ea5a9062da0daeba1993d2ccd1dd3c244b8428801495a58b216547e7e847c46d1d756377b6242d2e5fb83bf752b54e0df71e889f3a2bb0f4c10805bf3c590376e3c24e22ff57f7fa965577375325cea5d920db94b9c336b455f6e894c01866fe9fbb8c8d3f70a2957285f6dfb5dcd8cbf54782f8fe7766d4723819913ac773421e3a31095866bad22c86a6036b2518b2059b4229d18c8c2ccbdf906c6cc6e82464ee57bddb0bebcb1dc645325bfb3e665ef7251082c88ebb1cf203bd779fdd38675713c8daadd17e1cabee432b09787b6ddf3304e38b731b45df5df51b78fcfb3d32466028d0ba36555e7e11ab0ee0666061d1645d962444bc47a38188930a84b4d561395c73c087021927ca638b7afc8a8679ccb84c26555440ec7f10445cd + +aes-256-xts:27182818284590452353602874713526624977572470936999595749669676273141592653589793238462643383279502884197169399375105820974944592:ff000000000000000000000000000000:000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f303132333435363738393a3b3c3d3e3f404142434445464748494a4b4c4d4e4f505152535455565758595a5b5c5d5e5f606162636465666768696a6b6c6d6e6f707172737475767778797a7b7c7d7e7f808182838485868788898a8b8c8d8e8f909192939495969798999a9b9c9d9e9fa0a1a2a3a4a5a6a7a8a9aaabacadaeafb0b1b2b3b4b5b6b7b8b9babbbcbdbebfc0c1c2c3c4c5c6c7c8c9cacbcccdcecfd0d1d2d3d4d5d6d7d8d9dadbdcdddedfe0e1e2e3e4e5e6e7e8e9eaebecedeeeff0f1f2f3f4f5f6f7f8f9fafbfcfdfeff000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f303132333435363738393a3b3c3d3e3f404142434445464748494a4b4c4d4e4f505152535455565758595a5b5c5d5e5f606162636465666768696a6b6c6d6e6f707172737475767778797a7b7c7d7e7f808182838485868788898a8b8c8d8e8f909192939495969798999a9b9c9d9e9fa0a1a2a3a4a5a6a7a8a9aaabacadaeafb0b1b2b3b4b5b6b7b8b9babbbcbdbebfc0c1c2c3c4c5c6c7c8c9cacbcccdcecfd0d1d2d3d4d5d6d7d8d9dadbdcdddedfe0e1e2e3e4e5e6e7e8e9eaebecedeeeff0f1f2f3f4f5f6f7f8f9fafbfcfdfeff:1c3b3a102f770386e4836c99e370cf9bea00803f5e482357a4ae12d414a3e63b5d31e276f8fe4a8d66b317f9ac683f44680a86ac35adfc3345befecb4bb188fd5776926c49a3095eb108fd1098baec70aaa66999a72a82f27d848b21d4a741b0c5cd4d5fff9dac89aeba122961d03a757123e9870f8acf1000020887891429ca2a3e7a7d7df7b10355165c8b9a6d0a7de8b062c4500dc4cd120c0f7418dae3d0b5781c34803fa75421c790dfe1de1834f280d7667b327f6c8cd7557e12ac3a0f93ec05c52e0493ef31a12d3d9260f79a289d6a379bc70c50841473d1a8cc81ec583e9645e07b8d9670655ba5bbcfecc6dc3966380ad8fecb17b6ba02469a020a84e18e8f84252070c13e9f1f289be54fbc481457778f616015e1327a02b140f1505eb309326d68378f8374595c849d84f4c333ec4423885143cb47bd71c5edae9be69a2ffeceb1bec9de244fbe15992b11b77c040f12bd8f6a975a44a0f90c29a9abc3d4d893927284c58754cce294529f8614dcd2aba991925fedc4ae74ffac6e333b93eb4aff0479da9a410e4450e0dd7ae4c6e2910900575da401fc07059f645e8b7e9bfdef33943054ff84011493c27b3429eaedb4ed5376441a77ed43851ad77f16f541dfd269d50d6a5f14fb0aab1cbb4c1550be97f7ab4066193c4caa773dad38014bd2092fa755c824bb5e54c4f36ffda9fcea70b9c6e693e148c151 +aes-256-xts:27182818284590452353602874713526624977572470936999595749669676273141592653589793238462643383279502884197169399375105820974944592:ffff0000000000000000000000000000:000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f303132333435363738393a3b3c3d3e3f404142434445464748494a4b4c4d4e4f505152535455565758595a5b5c5d5e5f606162636465666768696a6b6c6d6e6f707172737475767778797a7b7c7d7e7f808182838485868788898a8b8c8d8e8f909192939495969798999a9b9c9d9e9fa0a1a2a3a4a5a6a7a8a9aaabacadaeafb0b1b2b3b4b5b6b7b8b9babbbcbdbebfc0c1c2c3c4c5c6c7c8c9cacbcccdcecfd0d1d2d3d4d5d6d7d8d9dadbdcdddedfe0e1e2e3e4e5e6e7e8e9eaebecedeeeff0f1f2f3f4f5f6f7f8f9fafbfcfdfeff000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f303132333435363738393a3b3c3d3e3f404142434445464748494a4b4c4d4e4f505152535455565758595a5b5c5d5e5f606162636465666768696a6b6c6d6e6f707172737475767778797a7b7c7d7e7f808182838485868788898a8b8c8d8e8f909192939495969798999a9b9c9d9e9fa0a1a2a3a4a5a6a7a8a9aaabacadaeafb0b1b2b3b4b5b6b7b8b9babbbcbdbebfc0c1c2c3c4c5c6c7c8c9cacbcccdcecfd0d1d2d3d4d5d6d7d8d9dadbdcdddedfe0e1e2e3e4e5e6e7e8e9eaebecedeeeff0f1f2f3f4f5f6f7f8f9fafbfcfdfeff:77a31251618a15e6b92d1d66dffe7b50b50bad552305ba0217a610688eff7e11e1d0225438e093242d6db274fde801d4cae06f2092c728b2478559df58e837c2469ee4a4fa794e4bbc7f39bc026e3cb72c33b0888f25b4acf56a2a9804f1ce6d3d6e1dc6ca181d4b546179d55544aa7760c40d06741539c7e3cd9d2f6650b2013fd0eeb8c2b8e3d8d240ccae2d4c98320a7442e1c8d75a42d6e6cfa4c2eca1798d158c7aecdf82490f24bb9b38e108bcda12c3faf9a21141c3613b58367f922aaa26cd22f23d708dae699ad7cb40a8ad0b6e2784973dcb605684c08b8d6998c69aac049921871ebb65301a4619ca80ecb485a31d744223ce8ddc2394828d6a80470c092f5ba413c3378fa6054255c6f9df4495862bbb3287681f931b687c888abf844dfc8fc28331e579928cd12bd2390ae123cf03818d14dedde5c0c24c8ab018bfca75ca096f2d531f3d1619e785f1ada437cab92e980558b3dce1474afb75bfedbf8ff54cb2618e0244c9ac0d3c66fb51598cd2db11f9be39791abe447c63094f7c453b7ff87cb5bb36b7c79efb0872d17058b83b15ab0866ad8a58656c5a7e20dbdf308b2461d97c0ec0024a2715055249cf3b478ddd4740de654f75ca686e0d7345c69ed50cdc2a8b332b1f8824108ac937eb050585608ee734097fc09054fbff89eeaeea791f4a7ab1f9868294a4f9e27b42af8100cb9d59cef9645803 +aes-256-xts:27182818284590452353602874713526624977572470936999595749669676273141592653589793238462643383279502884197169399375105820974944592:ffffff00000000000000000000000000:000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f303132333435363738393a3b3c3d3e3f404142434445464748494a4b4c4d4e4f505152535455565758595a5b5c5d5e5f606162636465666768696a6b6c6d6e6f707172737475767778797a7b7c7d7e7f808182838485868788898a8b8c8d8e8f909192939495969798999a9b9c9d9e9fa0a1a2a3a4a5a6a7a8a9aaabacadaeafb0b1b2b3b4b5b6b7b8b9babbbcbdbebfc0c1c2c3c4c5c6c7c8c9cacbcccdcecfd0d1d2d3d4d5d6d7d8d9dadbdcdddedfe0e1e2e3e4e5e6e7e8e9eaebecedeeeff0f1f2f3f4f5f6f7f8f9fafbfcfdfeff000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f303132333435363738393a3b3c3d3e3f404142434445464748494a4b4c4d4e4f505152535455565758595a5b5c5d5e5f606162636465666768696a6b6c6d6e6f707172737475767778797a7b7c7d7e7f808182838485868788898a8b8c8d8e8f909192939495969798999a9b9c9d9e9fa0a1a2a3a4a5a6a7a8a9aaabacadaeafb0b1b2b3b4b5b6b7b8b9babbbcbdbebfc0c1c2c3c4c5c6c7c8c9cacbcccdcecfd0d1d2d3d4d5d6d7d8d9dadbdcdddedfe0e1e2e3e4e5e6e7e8e9eaebecedeeeff0f1f2f3f4f5f6f7f8f9fafbfcfdfeff:e387aaa58ba483afa7e8eb469778317ecf4cf573aa9d4eac23f2cdf914e4e200a8b490e42ee646802dc6ee2b471b278195d60918ececb44bf79966f83faba0499298ebc699c0c8634715a320bb4f075d622e74c8c932004f25b41e361025b5a87815391f6108fc4afa6a05d9303c6ba68a128a55705d415985832fdeaae6c8e19110e84d1b1f199a2692119edc96132658f09da7c623efcec712537a3d94c0bf5d7e352ec94ae5797fdb377dc1551150721adf15bd26a8efc2fcaad56881fa9e62462c28f30ae1ceaca93c345cf243b73f542e2074a705bd2643bb9f7cc79bb6e7091ea6e232df0f9ad0d6cf502327876d82207abf2115cdacf6d5a48f6c1879a65b115f0f8b3cb3c59d15dd8c769bc014795a1837f3901b5845eb491adfefe097b1fa30a12fc1f65ba22905031539971a10f2f36c321bb51331cdefb39e3964c7ef079994f5b69b2edd83a71ef549971ee93f44eac3938fcdd61d01fa71799da3a8091c4c48aa9ed263ff0749df95d44fef6a0bb578ec69456aa5408ae32c7af08ad7ba8921287e3bbee31b767be06a0e705c864a769137df28292283ea81a2480241b44d9921cdbec1bc28dc1fda114bd8e5217ac9d8ebafa720e9da4f9ace231cc949e5b96fe76ffc21063fddc83a6b8679c00d35e09576a875305bed5f36ed242c8900dd1fa965bc950dfce09b132263a1eef52dd6888c309f5a7d712826 +aes-256-xts:27182818284590452353602874713526624977572470936999595749669676273141592653589793238462643383279502884197169399375105820974944592:ffffffff000000000000000000000000:000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f303132333435363738393a3b3c3d3e3f404142434445464748494a4b4c4d4e4f505152535455565758595a5b5c5d5e5f606162636465666768696a6b6c6d6e6f707172737475767778797a7b7c7d7e7f808182838485868788898a8b8c8d8e8f909192939495969798999a9b9c9d9e9fa0a1a2a3a4a5a6a7a8a9aaabacadaeafb0b1b2b3b4b5b6b7b8b9babbbcbdbebfc0c1c2c3c4c5c6c7c8c9cacbcccdcecfd0d1d2d3d4d5d6d7d8d9dadbdcdddedfe0e1e2e3e4e5e6e7e8e9eaebecedeeeff0f1f2f3f4f5f6f7f8f9fafbfcfdfeff000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f303132333435363738393a3b3c3d3e3f404142434445464748494a4b4c4d4e4f505152535455565758595a5b5c5d5e5f606162636465666768696a6b6c6d6e6f707172737475767778797a7b7c7d7e7f808182838485868788898a8b8c8d8e8f909192939495969798999a9b9c9d9e9fa0a1a2a3a4a5a6a7a8a9aaabacadaeafb0b1b2b3b4b5b6b7b8b9babbbcbdbebfc0c1c2c3c4c5c6c7c8c9cacbcccdcecfd0d1d2d3d4d5d6d7d8d9dadbdcdddedfe0e1e2e3e4e5e6e7e8e9eaebecedeeeff0f1f2f3f4f5f6f7f8f9fafbfcfdfeff:bf53d2dade78e822a4d949a9bc6766b01b06a8ef70d26748c6a7fc36d80ae4c5520f7c4ab0ac8544424fa405162fef5a6b7f229498063618d39f0003cb5fb8d1c86b643497da1ff945c8d3bedeca4f479702a7a735f043ddb1d6aaade3c4a0ac7ca7f3fa5279bef56f82cd7a2f38672e824814e10700300a055e1630b8f1cb0e919f5e942010a416e2bf48cb46993d3cb6a51c19bacf864785a00bc2ecff15d350875b246ed53e68be6f55bd7e05cfc2b2ed6432198a6444b6d8c247fab941f569768b5c429366f1d3f00f0345b96123d56204c01c63b22ce78baf116e525ed90fdea39fa469494d3866c31e05f295ff21fea8d4e6e13d67e47ce722e9698a1c1048d68ebcde76b86fcf976eab8aa9790268b7068e017a8b9b749409514f1053027fd16c3786ea1bac5f15cb79711ee2abe82f5cf8b13ae73030ef5b9e4457e75d1304f988d62dd6fc4b94ed38ba831da4b7634971b6cd8ec325d9c61c00f1df73627ed3745a5e8489f3a95c69639c32cd6e1d537a85f75cc844726e8a72fc0077ad22000f1d5078f6b866318c668f1ad03d5a5fced5219f2eabbd0aa5c0f460d183f04404a0d6f469558e81fab24a167905ab4c7878502ad3e38fdbe62a41556cec37325759533ce8f25f367c87bb5578d667ae93f9e2fd99bcbc5f2fbba88cf6516139420fcff3b7361d86322c4bd84c82f335abb152c4a93411373aaa8220 +aes-256-xts:27182818284590452353602874713526624977572470936999595749669676273141592653589793238462643383279502884197169399375105820974944592:ffffffffff0000000000000000000000:000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f303132333435363738393a3b3c3d3e3f404142434445464748494a4b4c4d4e4f505152535455565758595a5b5c5d5e5f606162636465666768696a6b6c6d6e6f707172737475767778797a7b7c7d7e7f808182838485868788898a8b8c8d8e8f909192939495969798999a9b9c9d9e9fa0a1a2a3a4a5a6a7a8a9aaabacadaeafb0b1b2b3b4b5b6b7b8b9babbbcbdbebfc0c1c2c3c4c5c6c7c8c9cacbcccdcecfd0d1d2d3d4d5d6d7d8d9dadbdcdddedfe0e1e2e3e4e5e6e7e8e9eaebecedeeeff0f1f2f3f4f5f6f7f8f9fafbfcfdfeff000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f303132333435363738393a3b3c3d3e3f404142434445464748494a4b4c4d4e4f505152535455565758595a5b5c5d5e5f606162636465666768696a6b6c6d6e6f707172737475767778797a7b7c7d7e7f808182838485868788898a8b8c8d8e8f909192939495969798999a9b9c9d9e9fa0a1a2a3a4a5a6a7a8a9aaabacadaeafb0b1b2b3b4b5b6b7b8b9babbbcbdbebfc0c1c2c3c4c5c6c7c8c9cacbcccdcecfd0d1d2d3d4d5d6d7d8d9dadbdcdddedfe0e1e2e3e4e5e6e7e8e9eaebecedeeeff0f1f2f3f4f5f6f7f8f9fafbfcfdfeff:64497e5a831e4a932c09be3e5393376daa599548b816031d224bbf50a818ed2350eae7e96087c8a0db51ad290bd00c1ac1620857635bf246c176ab463be30b808da548081ac847b158e1264be25bb0910bbc92647108089415d45fab1b3d2604e8a8eff1ae4020cfa39936b66827b23f371b92200be90251e6d73c5f86de5fd4a950781933d79a28272b782a2ec313efdfcc0628f43d744c2dc2ff3dcb66999b50c7ca895b0c64791eeaa5f29499fb1c026f84ce5b5c72ba1083cddb5ce45434631665c333b60b11593fb253c5179a2c8db813782a004856a1653011e93fb6d876c18366dd8683f53412c0c180f9c848592d593f8609ca736317d356e13e2bff3a9f59cd9aeb19cd482593d8c46128bb32423b37a9adfb482b99453fbe25a41bf6feb4aa0bef5ed24bf73c762978025482c13115e4015aac992e5613a3b5c2f685b84795cb6e9b2656d8c88157e52c42f978d8634c43d06fea928f2822e465aa6576e9bf419384506cc3ce3c54ac1a6f67dc66f3b30191e698380bc999b05abce19dc0c6dcc2dd001ec535ba18deb2df1a101023108318c75dc98611a09dc48a0acdec676fabdf222f07e026f059b672b56e5cbc8e1d21bbd867dd927212054681d70ea737134cdfce93b6f82ae22423274e58a0821cc5502e2d0ab4585e94de6975be5e0b4efce51cd3e70c25a1fbbbd609d273ad5b0d59631c531f6a0a57b9 + +aes-128-xts:fffefdfcfbfaf9f8f7f6f5f4f3f2f1f0bfbebdbcbbbab9b8b7b6b5b4b3b2b1b0:9a785634120000000000000000000000:000102030405060708090a0b0c0d0e0f10:6c1625db4671522d3d7599601de7ca09ed +aes-128-xts:fffefdfcfbfaf9f8f7f6f5f4f3f2f1f0bfbebdbcbbbab9b8b7b6b5b4b3b2b1b0:9a785634120000000000000000000000:000102030405060708090a0b0c0d0e0f1011:d069444b7a7e0cab09e24447d24deb1fedbf +aes-128-xts:fffefdfcfbfaf9f8f7f6f5f4f3f2f1f0bfbebdbcbbbab9b8b7b6b5b4b3b2b1b0:9a785634120000000000000000000000:000102030405060708090a0b0c0d0e0f101112:e5df1351c0544ba1350b3363cd8ef4beedbf9d +aes-128-xts:fffefdfcfbfaf9f8f7f6f5f4f3f2f1f0bfbebdbcbbbab9b8b7b6b5b4b3b2b1b0:9a785634120000000000000000000000:000102030405060708090a0b0c0d0e0f10111213:9d84c813f719aa2c7be3f66171c7c5c2edbf9dac +aes-128-xts:e0e1e2e3e4e5e6e7e8e9eaebecedeeefc0c1c2c3c4c5c6c7c8c9cacbcccdcecf:21436587a90000000000000000000000:000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f303132333435363738393a3b3c3d3e3f404142434445464748494a4b4c4d4e4f505152535455565758595a5b5c5d5e5f606162636465666768696a6b6c6d6e6f707172737475767778797a7b7c7d7e7f808182838485868788898a8b8c8d8e8f909192939495969798999a9b9c9d9e9fa0a1a2a3a4a5a6a7a8a9aaabacadaeafb0b1b2b3b4b5b6b7b8b9babbbcbdbebfc0c1c2c3c4c5c6c7c8c9cacbcccdcecfd0d1d2d3d4d5d6d7d8d9dadbdcdddedfe0e1e2e3e4e5e6e7e8e9eaebecedeeeff0f1f2f3f4f5f6f7f8f9fafbfcfdfeff000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f303132333435363738393a3b3c3d3e3f404142434445464748494a4b4c4d4e4f505152535455565758595a5b5c5d5e5f606162636465666768696a6b6c6d6e6f707172737475767778797a7b7c7d7e7f808182838485868788898a8b8c8d8e8f909192939495969798999a9b9c9d9e9fa0a1a2a3a4a5a6a7a8a9aaabacadaeafb0b1b2b3b4b5b6b7b8b9babbbcbdbebfc0c1c2c3c4c5c6c7c8c9cacbcccdcecfd0d1d2d3d4d5d6d7d8d9dadbdcdddedfe0e1e2e3e4e5e6e7e8e9eaebecedeeeff0f1f2f3f4f5f6f7f8f9fafbfcfdfeff:38b45812ef43a05bd957e545907e223b954ab4aaf088303ad910eadf14b42be68b2461149d8c8ba85f992be970bc621f1b06573f63e867bf5875acafa04e42ccbd7bd3c2a0fb1fff791ec5ec36c66ae4ac1e806d81fbf709dbe29e471fad38549c8e66f5345d7c1eb94f405d1ec785cc6f6a68f6254dd8339f9d84057e01a17741990482999516b5611a38f41bb6478e6f173f320805dd71b1932fc333cb9ee39936beea9ad96fa10fb4112b901734ddad40bc1878995f8e11aee7d141a2f5d48b7a4e1e7f0b2c04830e69a4fd1378411c2f287edf48c6c4e5c247a19680f7fe41cefbd49b582106e3616cbbe4dfb2344b2ae9519391f3e0fb4922254b1d6d2d19c6d4d537b3a26f3bcc51588b32f3eca0829b6a5ac72578fb814fb43cf80d64a233e3f997a3f02683342f2b33d25b492536b93becb2f5e1a8b82f5b883342729e8ae09d16938841a21a97fb543eea3bbff59f13c1a18449e398701c1ad51648346cbc04c27bb2da3b93a1372ccae548fb53bee476f9e9c91773b1bb19828394d55d3e1a20ed69113a860b6829ffa847224604435070221b257e8dff783615d2cae4803a93aa4334ab482a0afac9c0aeda70b45a481df5dec5df8cc0f423c77a5fd46cd312021d4b438862419a791be03bb4d97c0e59578542531ba466a83baf92cefc151b5cc1611a167893819b63fb8a6b18e86de60290fa72b797b0ce59f3 +# AES wrap tests from RFC3394 +id-aes128-wrap:000102030405060708090A0B0C0D0E0F::00112233445566778899AABBCCDDEEFF:1FA68B0A8112B447AEF34BD8FB5A7B829D3E862371D2CFE5 +id-aes192-wrap:000102030405060708090A0B0C0D0E0F1011121314151617::00112233445566778899AABBCCDDEEFF:96778B25AE6CA435F92B5B97C050AED2468AB8A17AD84E5D +id-aes256-wrap:000102030405060708090A0B0C0D0E0F101112131415161718191A1B1C1D1E1F::00112233445566778899AABBCCDDEEFF:64E8C3F9CE0F5BA263E9777905818A2A93C8191E7D6E8AE7 +id-aes192-wrap:000102030405060708090A0B0C0D0E0F1011121314151617::00112233445566778899AABBCCDDEEFF0001020304050607:031D33264E15D33268F24EC260743EDCE1C6C7DDEE725A936BA814915C6762D2 +id-aes256-wrap:000102030405060708090A0B0C0D0E0F101112131415161718191A1B1C1D1E1F::00112233445566778899AABBCCDDEEFF0001020304050607:A8F9BC1612C68B3FF6E6F4FBE30E71E4769C8B80A32CB8958CD5D17D6B254DA1 +id-aes256-wrap:000102030405060708090A0B0C0D0E0F101112131415161718191A1B1C1D1E1F::00112233445566778899AABBCCDDEEFF000102030405060708090A0B0C0D0E0F:28C9F404C4B810F4CBCCB35CFB87F8263F5786E2D80ED326CBC7F0E71A99F43BFB988B9B7A02DD21 diff --git a/deps/openssl/openssl/test/maketests.com b/deps/openssl/openssl/test/maketests.com index 97e0c3bd396fae..7e2d939bbea631 100644 --- a/deps/openssl/openssl/test/maketests.com +++ b/deps/openssl/openssl/test/maketests.com @@ -148,7 +148,8 @@ $ TEST_FILES = "BNTEST,ECTEST,ECDSATEST,ECDHTEST,IDEATEST,"+ - "RANDTEST,DHTEST,ENGINETEST,"+ - "BFTEST,CASTTEST,SSLTEST,EXPTEST,DSATEST,RSA_TEST,"+ - "EVP_TEST,IGETEST,JPAKETEST,SRPTEST,"+ - - "ASN1TEST,HEARTBEAT_TEST,CONSTANT_TIME_TEST" + "ASN1TEST,V3NAMETEST,HEARTBEAT_TEST,"+ - + "CONSTANT_TIME_TEST" $! Should we add MTTEST,PQ_TEST,LH_TEST,DIVTEST,TABTEST as well? $! $! Additional directory information. @@ -185,6 +186,7 @@ $ T_D_EVP_TEST := [-.crypto.evp] $ T_D_IGETEST := [-.test] $ T_D_JPAKETEST := [-.crypto.jpake] $ T_D_SRPTEST := [-.crypto.srp] +$ T_D_V3NAMETEST := [-.crypto.x509v3] $ T_D_ASN1TEST := [-.test] $ T_D_HEARTBEAT_TEST := [-.ssl] $ T_D_CONSTANT_TIME_TEST := [-.crypto] diff --git a/deps/openssl/openssl/test/ocsp-tests/D1.ors b/deps/openssl/openssl/test/ocsp-tests/D1.ors new file mode 100644 index 00000000000000..3fa4a11de25cd2 --- /dev/null +++ b/deps/openssl/openssl/test/ocsp-tests/D1.ors @@ -0,0 +1,32 @@ +MIIFzwoBAKCCBcgwggXEBgkrBgEFBQcwAQEEggW1MIIFsTCBoKIWBBRf2uQDFpGg +Ywh4P1y2H9bZ2/BQNBgPMjAxMjEwMjMxMDI1MzZaMHUwczBLMAkGBSsOAwIaBQAE +FKByDqBqfGICVPKo9Z3Se6Tzty+kBBSwsEr9HHUo+BxhqhP2+sGQPWsWowISESG8 +vx4IzALnkqQG05AvM+2bgAAYDzIwMTIxMDIzMDcwMDAwWqARGA8yMDEyMTAzMDA4 +MDAwMFowCwYJKoZIhvcNAQEFA4IBAQAJU3hXN7NApN50/vlZTG2p8+QQJp4uaod3 +wyBQ0Ux3DoQZQ9RG6/7Mm4qpOLCCSTh/lJjZ0fD+9eB3gcp/JupN1JrU+dgTyv/Y +9MOctJz7y+VoU9I+qB8knV4sQCwohAVm8GmA9s4p/rHq5Oymci0SuG/QCfkVxOub +rI1bWjbHLvvXyvF3PoGMORVHG3SA+jJ9VkHWJyi6brHxY+QR/iYxer8lJsBtpyc7 +q2itFgvax/OHwne3lxsck9q0QgKpmEdJu2LuGyWFIhrEwR3b7ASEu1G/nKClv3dR +vyOXMm1XIwuUhCjAcpNEKiOMorFwnLS1F8LhfqFWTAFG0JbWpAi8oIID+DCCA/Qw +ggPwMIIC2KADAgECAhIRISdENsrz1CSWG3VIBwfQERQwDQYJKoZIhvcNAQEFBQAw +WTELMAkGA1UEBhMCQkUxGTAXBgNVBAoTEEdsb2JhbFNpZ24gbnYtc2ExLzAtBgNV +BAMTJkdsb2JhbFNpZ24gRXh0ZW5kZWQgVmFsaWRhdGlvbiBDQSAtIEcyMB4XDTEy +MDkxOTA3NDA1MFoXDTEyMTIxOTA4NDA1MFowgYUxCzAJBgNVBAYTAkJFMRkwFwYD +VQQKExBHbG9iYWxTaWduIG52LXNhMUIwQAYDVQQDEzlHbG9iYWxTaWduIEV4dGVu +ZGVkIFZhbGlkYXRpb24gQ0EgLSBHMiBPQ1NQIHJlc3BvbmRlciAtIDIxFzAVBgNV +BAUTDjIwMTIwOTE5MDk0MDAwMIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKC +AQEAnCgMsBO+IxIqCnXCOfXJoIC3wj+f0s4DV9h2gJBzisWXkaJD2DfNrd0kHUXK +qVVPUxnA4G5iZu0Z385/KiOt1/P6vQ/Z2/AsEh/8Z/hIyeZCHL31wrSZW4yLeZwi +M76wPiBHJxPun681HQlVs/OGKSHnbHc1XJAIeA/M8u+lLWqIKB+AJ82TrOqUMj1s +LjGhQNs84xPliONN5K7DrEy+Y65X/rFxN77Smw+UtcH1GgH2NgaHH8dpt1m25sgm +UxZWhdx66opB/lbRQwWdGt7MC0kJFaWHDZq64DTuYoekFYSxAFu0nd0EekEHEJEi +9mquB9cv/96SuEJl8BcUWU/1LwIDAQABo4GEMIGBMAkGA1UdEwQCMAAwDgYDVR0P +AQH/BAQDAgeAMBMGA1UdJQQMMAoGCCsGAQUFBwMJMA8GCSsGAQUFBzABBQQCBQAw +HQYDVR0OBBYEFF/a5AMWkaBjCHg/XLYf1tnb8FA0MB8GA1UdIwQYMBaAFLCwSv0c +dSj4HGGqE/b6wZA9axajMA0GCSqGSIb3DQEBBQUAA4IBAQCKRl1iXFmOQtLseDWP +Y5icDDBGiRi17CGgvIzGJi/ha0PhbO+X0TmQIEnRX3Mu0Er/Mm4RZSjMtJ2iZRh3 +tGf4Dn+jKgKOmgXC3oOG/l8RPHLf0yaPSdn/z0TXtA30vTFBLlFeWnhbfhovea4+ +snPdBxLqWZdtxmiwojgqA7YATCWwavizrBr09YRyDwzgtpZ2BwMruGuFuV9FsEwL +PCM53yFlrM32oFghyfyE5kYjgnnueKM+pw1kA0jgb1CnVJRrMEN1TXuXDAZLtHKG +5X/drah1JtkoZhCzxzZ3bYdVDQJ90OHFqM58lwGD6z3XuPKrHDKZKt+CPIsl5g7p +4J2l diff --git a/deps/openssl/openssl/test/ocsp-tests/D1_Cert_EE.pem b/deps/openssl/openssl/test/ocsp-tests/D1_Cert_EE.pem new file mode 100644 index 00000000000000..c5b993c0ad4eca --- /dev/null +++ b/deps/openssl/openssl/test/ocsp-tests/D1_Cert_EE.pem @@ -0,0 +1,38 @@ +-----BEGIN CERTIFICATE----- +MIIGujCCBaKgAwIBAgISESG8vx4IzALnkqQG05AvM+2bMA0GCSqGSIb3DQEBBQUA +MFkxCzAJBgNVBAYTAkJFMRkwFwYDVQQKExBHbG9iYWxTaWduIG52LXNhMS8wLQYD +VQQDEyZHbG9iYWxTaWduIEV4dGVuZGVkIFZhbGlkYXRpb24gQ0EgLSBHMjAeFw0x +MjA4MTQxMjM1MDJaFw0xMzA4MTUxMDMxMjlaMIIBCjEdMBsGA1UEDwwUUHJpdmF0 +ZSBPcmdhbml6YXRpb24xDzANBgNVBAUTBjU3ODYxMTETMBEGCysGAQQBgjc8AgED +EwJVUzEeMBwGCysGAQQBgjc8AgECEw1OZXcgSGFtcHNoaXJlMQswCQYDVQQGEwJV +UzEWMBQGA1UECAwNTmV3IEhhbXBzaGlyZTETMBEGA1UEBwwKUG9ydHNtb3V0aDEg +MB4GA1UECRMXVHdvIEludGVybmF0aW9uYWwgRHJpdmUxDTALBgNVBAsMBC5DT00x +GzAZBgNVBAoMEkdNTyBHbG9iYWxTaWduIEluYzEbMBkGA1UEAwwSd3d3Lmdsb2Jh +bHNpZ24uY29tMIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEAqx/nHBP4 +6s5KKMDlfZS4qFDiAWsoPSRn6WO4nrUF/G2S3I/AdJ0IcSDOHb48/3APj5alqbgo +o4IzdG6KLAbENpHMl0L3pHBq/5tJPTi02SbiYUHfp2fhueMauRo8spfEk6fNRnDn +QpyMFRkYd7Jz+KMerTO1xAcOH+xp0KkcP0i2jFTEuM3LwR0yTms1rry+RryjDDt5 +7W0DLnNFWhyGd6YymzNkCPeL6weV8uk2uYRKKf2XOAzgIpNo3zU6iakZOzlQB9h9 +qRuIks2AU/cZ89cBkDjHua0ezX5rG3/Url33jAT9cR5zCXHWtj7VzlOjDXXnn16b +L9/AWsvGMNkYHQIDAQABo4ICxzCCAsMwDgYDVR0PAQH/BAQDAgWgMEwGA1UdIARF +MEMwQQYJKwYBBAGgMgEBMDQwMgYIKwYBBQUHAgEWJmh0dHBzOi8vd3d3Lmdsb2Jh +bHNpZ24uY29tL3JlcG9zaXRvcnkvMIIBKwYDVR0RBIIBIjCCAR6CEnd3dy5nbG9i +YWxzaWduLmNvbYIVc3RhdHVzLmdsb2JhbHNpZ24uY29tghF0aC5nbG9iYWxzaWdu +LmNvbYISZGV2Lmdsb2JhbHNpZ24uY29tghNpbmZvLmdsb2JhbHNpZ24uY29tghZh +cmNoaXZlLmdsb2JhbHNpZ24uY29tghZzdGF0aWMxLmdsb2JhbHNpZ24uY29tghZz +dGF0aWMyLmdsb2JhbHNpZ24uY29tghNibG9nLmdsb2JhbHNpZ24uY29tghdzc2xj +aGVjay5nbG9iYWxzaWduLmNvbYIVc3lzdGVtLmdsb2JhbHNpZ24uY29tghhvcGVy +YXRpb24uZ2xvYmFsc2lnbi5jb22CDmdsb2JhbHNpZ24uY29tMAkGA1UdEwQCMAAw +HQYDVR0lBBYwFAYIKwYBBQUHAwEGCCsGAQUFBwMCMD8GA1UdHwQ4MDYwNKAyoDCG +Lmh0dHA6Ly9jcmwuZ2xvYmFsc2lnbi5jb20vZ3MvZ3NleHRlbmR2YWxnMi5jcmww +gYgGCCsGAQUFBwEBBHwwejBBBggrBgEFBQcwAoY1aHR0cDovL3NlY3VyZS5nbG9i +YWxzaWduLmNvbS9jYWNlcnQvZ3NleHRlbmR2YWxnMi5jcnQwNQYIKwYBBQUHMAGG +KWh0dHA6Ly9vY3NwMi5nbG9iYWxzaWduLmNvbS9nc2V4dGVuZHZhbGcyMB0GA1Ud +DgQWBBSvMoTDlFB0aVgVrNkkS1QSmYfx1zAfBgNVHSMEGDAWgBSwsEr9HHUo+Bxh +qhP2+sGQPWsWozANBgkqhkiG9w0BAQUFAAOCAQEAgnohm8IRw1ukfc0GmArK3ZLC +DLGpsefwWMvNrclqwrgtVrBx4pfe5xGAjqyQ2QI8V8a8a1ytVMCSC1AMWiWxawvW +fw48fHunqtpTYNDyEe1Q+7tTGZ0SQ3HljYY9toVEjAMDhiM0Szl6ERRO5S7BTCen +mDpWZF8w3ScRRY2UJc8xwWFiYyGWDNzNL1O8R2Y95QIkHUgQpSD3cjl4YvF/Xx/o +hBEzl884uNAggIyQRu0ImLEetEtHWB2w0pZG3nTAqjOAAAyH2Q8IHoJtjQzvg6fy +IQEO1C5GoQ7isiKIjKBXVYOm+gKSQXlzwj1BlU/OW6kEe24IiERhAN9ILA24wA== +-----END CERTIFICATE----- diff --git a/deps/openssl/openssl/test/ocsp-tests/D1_Issuer_ICA.pem b/deps/openssl/openssl/test/ocsp-tests/D1_Issuer_ICA.pem new file mode 100644 index 00000000000000..b650f38688d3af --- /dev/null +++ b/deps/openssl/openssl/test/ocsp-tests/D1_Issuer_ICA.pem @@ -0,0 +1,27 @@ +-----BEGIN CERTIFICATE----- +MIIEhjCCA26gAwIBAgILBAAAAAABL07hXdQwDQYJKoZIhvcNAQEFBQAwTDEgMB4G +A1UECxMXR2xvYmFsU2lnbiBSb290IENBIC0gUjIxEzARBgNVBAoTCkdsb2JhbFNp +Z24xEzARBgNVBAMTCkdsb2JhbFNpZ24wHhcNMTEwNDEzMTAwMDAwWhcNMjIwNDEz +MTAwMDAwWjBZMQswCQYDVQQGEwJCRTEZMBcGA1UEChMQR2xvYmFsU2lnbiBudi1z +YTEvMC0GA1UEAxMmR2xvYmFsU2lnbiBFeHRlbmRlZCBWYWxpZGF0aW9uIENBIC0g +RzIwggEiMA0GCSqGSIb3DQEBAQUAA4IBDwAwggEKAoIBAQDNoUbMUpq4pbR/WNnN +2EugcgyXW6aIIMO5PUbc0FxSMPb6WU+FX7DbiLSpXysjSKyr9ZJ4FLYyD/tcaoVb +AJDgu2X1WvlPZ37HbCnsk8ArysRe2LDb1r4/mwvAj6ldrvcAAqT8umYROHf+IyAl +VRDFvYK5TLFoxuJwe4NcE2fBofN8C6iZmtDimyUxyCuNQPZSY7GgrVou9Xk2bTUs +Dt0F5NDiB0i3KF4r1VjVbNAMoQFGAVqPxq9kx1UBXeHRxmxQJaAFrQCrDI1la93r +wnJUyQ88ABeHIu/buYZ4FlGud9mmKE3zWI2DZ7k0JZscUYBR84OSaqOuR5rW5Isb +wO2xAgMBAAGjggFaMIIBVjAOBgNVHQ8BAf8EBAMCAQYwEgYDVR0TAQH/BAgwBgEB +/wIBADAdBgNVHQ4EFgQUsLBK/Rx1KPgcYaoT9vrBkD1rFqMwRwYDVR0gBEAwPjA8 +BgRVHSAAMDQwMgYIKwYBBQUHAgEWJmh0dHBzOi8vd3d3Lmdsb2JhbHNpZ24uY29t +L3JlcG9zaXRvcnkvMDYGA1UdHwQvMC0wK6ApoCeGJWh0dHA6Ly9jcmwuZ2xvYmFs +c2lnbi5uZXQvcm9vdC1yMi5jcmwwRAYIKwYBBQUHAQEEODA2MDQGCCsGAQUFBzAB +hihodHRwOi8vb2NzcC5nbG9iYWxzaWduLmNvbS9FeHRlbmRlZFNTTENBMCkGA1Ud +JQQiMCAGCCsGAQUFBwMBBggrBgEFBQcDAgYKKwYBBAGCNwoDAzAfBgNVHSMEGDAW +gBSb4gdXZxwewGoG3lm0mi3f3BmGLjANBgkqhkiG9w0BAQUFAAOCAQEAL0m28rZa +pJWrnlrpK4KbzJBrfHRFIOde2Mcj7ig1sTVlKqVR4FU/9oNntOQ2KbDa7JeVqYoF +o0X+Iy5SiLQfEICt0oufo1+oxetz3nmIQZgz7qdgGLFGyUAQB5yPClLJExoGbqCb +LTr2rk/no1E1KlsYBRLlUdy2NmLz4aQP++TPw5S/EauhWTEB8MxT7I9j12yW00gq +iiPtRVaoZkHqAblH7qFHDBTxI+Egc8p9UHxkOFejj0qcm+ltRc9Ea01gIEBxJbVG +qmwIft/I+shWKpLLg7h5CZctXqEBzgbttJfJBNxB7+BPNk3kQHNG7BESfIhbNCYl +TercGL7FG81kwA== +-----END CERTIFICATE----- diff --git a/deps/openssl/openssl/test/ocsp-tests/D2.ors b/deps/openssl/openssl/test/ocsp-tests/D2.ors new file mode 100644 index 00000000000000..dcbd4d4b8e591a --- /dev/null +++ b/deps/openssl/openssl/test/ocsp-tests/D2.ors @@ -0,0 +1,32 @@ +MIIF4AoBAKCCBdkwggXVBgkrBgEFBQcwAQEEggXGMIIFwjCBmaIWBBTqlwecTarB +yVdbHxANRLCFYj1mqBgPMjAxMjEwMjMxMDI1MzZaMG4wbDBEMAkGBSsOAwIaBQAE +FLdXtbacB/gWIxOOkMkqDr4yAaoxBBRge2YaRQ2XyolQL30EzTSo//z9SwILBAAA +AAABL07hRxCAABgPMjAxMjEwMDEwNjAwMDBaoBEYDzIwMTMwNDE1MDYwMDAwWjAL +BgkqhkiG9w0BAQUDggEBAEJN4FuPQPnizPIwEj4Q8Ht765gI6QqMNrvj3UykxYeu +qUajKcqA+V1zaDHTaz+eCQthtmCNKC9T+zVkjGelVsd7Kn2fVKWqp+5wVPI8dVkm +6Gs/IGZ16HDnQ/siTrY3ILWCRz4Hf6lnHpIErQuQRQyjlGKNcE7RYmjGw4w0bxx8 +vHN/baCMApBL0D0zeBqlpJCMUZqJJ3D1+87HxHYR1MkMZDC9rOPIhlpEP4yL17gx +ckrPf+w+A/3kC++jVeA3b8Xtr+MaWOFH4xVn6BTxopczZKVl18tSYqgwITlx5/cL +LpYEdllC0l83E8GRzsOp0SvFxo0NBotgFNZQQujpOzagggQQMIIEDDCCBAgwggLw +oAMCAQICCwQAAAAAAThXovYBMA0GCSqGSIb3DQEBBQUAMFcxCzAJBgNVBAYTAkJF +MRkwFwYDVQQKExBHbG9iYWxTaWduIG52LXNhMRAwDgYDVQQLEwdSb290IENBMRsw +GQYDVQQDExJHbG9iYWxTaWduIFJvb3QgQ0EwHhcNMTIwNzA1MTgwMDAwWhcNMTMw +NzA1MTgwMDAwWjBZMQswCQYDVQQGEwJCRTEZMBcGA1UEChMQR2xvYmFsU2lnbiBu +di1zYTEvMC0GA1UEAxMmR2xvYmFsU2lnbiBPQ1NQIGZvciBSb290IFIxIC0gQnJh +bmNoIDEwggEiMA0GCSqGSIb3DQEBAQUAA4IBDwAwggEKAoIBAQDP2QF8p0+Fb7ID +MwwD1gEr2oazjqbW28EZr3YEyMPk+7VFaGePSO1xjBGIE48Q7m7d6p6ZXCzlBZEi +oudrHSr3WDqdIVKLDrZIDkgEgdjJE72Hq6Pf5CEGXyebbODm4sV96EfewSvOOYLL +866g3aoVhLDK02ny+Q5OsokW7nhnmGMMh10tZqR5VmdQTiw8MgeqUxBEaEO4WH2J +ltgSsgNJBNBYuDgnn5ryzVqhvmCJvYZMYeN6qZFKy1MgHcR+wEpGLPlRL4ttu6e5 +MJrVta7dVFobHUHoFog97LtQT1PY0Ubaihswjge5O04bYeCrgSSjr1e4xH/KDxRw +yyhoscaFAgMBAAGjgdIwgc8wDgYDVR0PAQH/BAQDAgeAMB0GA1UdDgQWBBTqlwec +TarByVdbHxANRLCFYj1mqDBMBgNVHSAERTBDMEEGCSsGAQQBoDIBXzA0MDIGCCsG +AQUFBwIBFiZodHRwczovL3d3dy5nbG9iYWxzaWduLmNvbS9yZXBvc2l0b3J5LzAJ +BgNVHRMEAjAAMBMGA1UdJQQMMAoGCCsGAQUFBwMJMB8GA1UdIwQYMBaAFGB7ZhpF +DZfKiVAvfQTNNKj//P1LMA8GCSsGAQUFBzABBQQCBQAwDQYJKoZIhvcNAQEFBQAD +ggEBAHiC6N1uF29d7CmiVapA8Nr1xLSVeIkBd4A8yHsUTQ7ATI7bwT14QUV4awe7 +8cvmO5ZND8YG1ViwN162WFm9ivSoWBzvWDbU2JhQFb+XzrzCcdn0YbNiTxJh/vYm +uDuxto00dpBgujSOAQv8B90iDEJ+sZpYRzDRj62qStRey0zpq5eX+pA+gdppMUFb +4QvJf0El8TbLCWLN4TjrFe6ju7ZaN9zmgVYGQ2fMHKIGNScLuIA950nYwzRkIfHa +YW6HqP1rCR1EiYmstEeCQyDxJx+RUlh+q8L1BKzaMYhS6s63MZzQuGseYStaCmbC +fBIRKjnK621vAWvc7UR+0hqnZ+U= diff --git a/deps/openssl/openssl/test/ocsp-tests/D2_Cert_ICA.pem b/deps/openssl/openssl/test/ocsp-tests/D2_Cert_ICA.pem new file mode 100644 index 00000000000000..459f98e34c7c5b --- /dev/null +++ b/deps/openssl/openssl/test/ocsp-tests/D2_Cert_ICA.pem @@ -0,0 +1,26 @@ +-----BEGIN CERTIFICATE----- +MIIEdzCCA1+gAwIBAgILBAAAAAABL07hRxAwDQYJKoZIhvcNAQEFBQAwVzELMAkG +A1UEBhMCQkUxGTAXBgNVBAoTEEdsb2JhbFNpZ24gbnYtc2ExEDAOBgNVBAsTB1Jv +b3QgQ0ExGzAZBgNVBAMTEkdsb2JhbFNpZ24gUm9vdCBDQTAeFw0wNjEyMTUwODAw +MDBaFw0yODAxMjgxMjAwMDBaMEwxIDAeBgNVBAsTF0dsb2JhbFNpZ24gUm9vdCBD +QSAtIFIyMRMwEQYDVQQKEwpHbG9iYWxTaWduMRMwEQYDVQQDEwpHbG9iYWxTaWdu +MIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEAps8kDr4ubyiZRULEqz4h +VJsL03+EcPoSs8u/h1/Gf4bTsjBc1v2t8Xvc5fhglgmSEPXQU977e35ziKxSiHtK +pspJpl6op4xaEbx6guu+jOmzrJYlB5dKmSoHL7Qed7+KD7UCfBuWuMW5Oiy81hK5 +61l94tAGhl9eSWq1OV6INOy8eAwImIRsqM1LtKB9DHlN8LgtyyHK1WxbfeGgKYSh ++dOUScskYpEgvN0L1dnM+eonCitzkcadG6zIy+jgoPQvkItN+7A2G/YZeoXgbfJh +E4hcn+CTClGXilrOr6vV96oJqmC93Nlf33KpYBNeAAHJSvo/pOoHAyECjoLKA8Kb +jwIDAQABo4IBTTCCAUkwDgYDVR0PAQH/BAQDAgEGMA8GA1UdEwEB/wQFMAMBAf8w +HQYDVR0OBBYEFJviB1dnHB7AagbeWbSaLd/cGYYuMEcGA1UdIARAMD4wPAYEVR0g +ADA0MDIGCCsGAQUFBwIBFiZodHRwczovL3d3dy5nbG9iYWxzaWduLmNvbS9yZXBv +c2l0b3J5LzAzBgNVHR8ELDAqMCigJqAkhiJodHRwOi8vY3JsLmdsb2JhbHNpZ24u +bmV0L3Jvb3QuY3JsMD0GCCsGAQUFBwEBBDEwLzAtBggrBgEFBQcwAYYhaHR0cDov +L29jc3AuZ2xvYmFsc2lnbi5jb20vcm9vdHIxMCkGA1UdJQQiMCAGCCsGAQUFBwMB +BggrBgEFBQcDAgYKKwYBBAGCNwoDAzAfBgNVHSMEGDAWgBRge2YaRQ2XyolQL30E +zTSo//z9SzANBgkqhkiG9w0BAQUFAAOCAQEAOg/NJk04MAioxvxc2Ah67/ocKgPO +Mq5EluFSA5UKUtZnr1uWfN0ZizBbNjprbqAVxoKhyzlmAFeLAqJuhfusVVq4FVAa +kN4JSOyo9lccGDG9xn3IvevCpzlRbaL/HHjeHCcE4c8klegO5NUfsPn7UMrLbp5i +JniG9cT1eI/dcq9uLtWe3c48y7jHLVRg1+WcAkuGRPBXUSvNCps8sfU6TB2KxfAw +PmWHxA5fbkqsiqge5/rkM4AVhFZlJZv7njCIy5EWwQXDqSTsIdLVsPy3I0annff3 +xlMSeDe0E3OPN5deBJv5mYuTPiZCl5/9HrXVy4hINKJmoPqsco/dRy+CdA== +-----END CERTIFICATE----- diff --git a/deps/openssl/openssl/test/ocsp-tests/D2_Issuer_Root.pem b/deps/openssl/openssl/test/ocsp-tests/D2_Issuer_Root.pem new file mode 100644 index 00000000000000..f4ce4ca43dc0a0 --- /dev/null +++ b/deps/openssl/openssl/test/ocsp-tests/D2_Issuer_Root.pem @@ -0,0 +1,21 @@ +-----BEGIN CERTIFICATE----- +MIIDdTCCAl2gAwIBAgILBAAAAAABFUtaw5QwDQYJKoZIhvcNAQEFBQAwVzELMAkG +A1UEBhMCQkUxGTAXBgNVBAoTEEdsb2JhbFNpZ24gbnYtc2ExEDAOBgNVBAsTB1Jv +b3QgQ0ExGzAZBgNVBAMTEkdsb2JhbFNpZ24gUm9vdCBDQTAeFw05ODA5MDExMjAw +MDBaFw0yODAxMjgxMjAwMDBaMFcxCzAJBgNVBAYTAkJFMRkwFwYDVQQKExBHbG9i +YWxTaWduIG52LXNhMRAwDgYDVQQLEwdSb290IENBMRswGQYDVQQDExJHbG9iYWxT +aWduIFJvb3QgQ0EwggEiMA0GCSqGSIb3DQEBAQUAA4IBDwAwggEKAoIBAQDaDuaZ +jc6j40+Kfvvxi4Mla+pIH/EqsLmVEQS98GPR4mdmzxzdzxtIK+6NiY6arymAZavp +xy0Sy6scTHAHoT0KMM0VjU/43dSMUBUc71DuxC73/OlS8pF94G3VNTCOXkNz8kHp +1Wrjsok6Vjk4bwY8iGlbKk3Fp1S4bInMm/k8yuX9ifUSPJJ4ltbcdG6TRGHRjcdG +snUOhugZitVtbNV4FpWi6cgKOOvyJBNPc1STE4U6G7weNLWLBYy5d4ux2x8gkasJ +U26Qzns3dLlwR5EiUWMWea6xrkEmCMgZK9FGqkjWZCrXgzT/LCrBbBlDSgeF59N8 +9iFo7+ryUp9/k5DPAgMBAAGjQjBAMA4GA1UdDwEB/wQEAwIBBjAPBgNVHRMBAf8E +BTADAQH/MB0GA1UdDgQWBBRge2YaRQ2XyolQL30EzTSo//z9SzANBgkqhkiG9w0B +AQUFAAOCAQEA1nPnfE920I2/7LqivjTFKDK1fPxsnCwrvQmeU79rXqoRSLblCKOz +yj1hTdNGCbM+w6DjY1Ub8rrvrTnhQ7k4o+YviiY776BQVvnGCv04zcQLcFGUl5gE +38NflNUVyRRBnMRddWQVDf9VMOyGj/8N7yy5Y0b2qvzfvGn9LhJIZJrglfCm7ymP +AbEVtQwdpf5pLGkkeB6zpxxxYu7KyJesF12KwvhHhm4qxFYxldBniYUr+WymXUad +DKqC5JlR3XC321Y9YeRq4VzW9v493kHMB65jUr9TU/Qr6cf9tveCX4XSQRjbgbME +HMUfpIBvFSDJ3gyICh3WZlXi/EjJKSZp4A== +-----END CERTIFICATE----- diff --git a/deps/openssl/openssl/test/ocsp-tests/D3.ors b/deps/openssl/openssl/test/ocsp-tests/D3.ors new file mode 100644 index 00000000000000..d66439b3473b76 --- /dev/null +++ b/deps/openssl/openssl/test/ocsp-tests/D3.ors @@ -0,0 +1,38 @@ +MIIG8AoBAKCCBukwggblBgkrBgEFBQcwAQEEggbWMIIG0jCB+aF+MHwxCzAJBgNV +BAYTAkFVMQwwCgYDVQQIEwNOU1cxDzANBgNVBAcTBlN5ZG5leTEUMBIGA1UEChML +Q0FjZXJ0IEluYy4xHjAcBgNVBAsTFVNlcnZlciBBZG1pbmlzdHJhdGlvbjEYMBYG +A1UEAxMPb2NzcC5jYWNlcnQub3JnGA8yMDEyMTAyMzEwMzkzMFowZjBkMDwwCQYF +Kw4DAhoFAAQUi6TJyxcpGUU+u45zCZG5JfKDImUEFBa1MhvUx/Pg5o7zvdKwOu6y +ORjRAgMLs8aAABgPMjAxMjEwMjMwOTU5MTJaoBEYDzIwMTIxMDI1MTAzOTMwWjAN +BgkqhkiG9w0BAQUFAAOCAQEAYaaAzW26JQGFRyawj9ROtnSdJ9QPJ6B/wfpJif8e +QU9lmKx0zIDdTum3Mc5tfxML71W025UW9jzowAfQ5bZbqa4nwZlWX5Py3hKebeYo +WiND4pvhS4BRkheSkycEok0bj1FJYWYiJVpnTqKAPnOKrlL4qvGC2IOHk2toS/Je +iLyoUwxrPtqaXt4Caoa3I70HE3H1QqvPIGIY6V4bxV7Km/xv99QOutkbfANGiNsx +W7EDB3TRNhldzMnjEwG58X5Pe3xwEVqjCiBL+wQ8JALn08bJzFn9E04aYrqCGc8s +gw1dgaBoZt+0vbQUN71KEocwMj5mzJqottOyqNwo7FZnBaCCBL4wggS6MIIEtjCC +Ap6gAwIBAgIDCpvzMA0GCSqGSIb3DQEBBQUAMHkxEDAOBgNVBAoTB1Jvb3QgQ0Ex +HjAcBgNVBAsTFWh0dHA6Ly93d3cuY2FjZXJ0Lm9yZzEiMCAGA1UEAxMZQ0EgQ2Vy +dCBTaWduaW5nIEF1dGhvcml0eTEhMB8GCSqGSIb3DQEJARYSc3VwcG9ydEBjYWNl +cnQub3JnMB4XDTExMDgyMzAwMDI1NloXDTEzMDgyMjAwMDI1NlowfDELMAkGA1UE +BhMCQVUxDDAKBgNVBAgTA05TVzEPMA0GA1UEBxMGU3lkbmV5MRQwEgYDVQQKEwtD +QWNlcnQgSW5jLjEeMBwGA1UECxMVU2VydmVyIEFkbWluaXN0cmF0aW9uMRgwFgYD +VQQDEw9vY3NwLmNhY2VydC5vcmcwggEiMA0GCSqGSIb3DQEBAQUAA4IBDwAwggEK +AoIBAQCcxtRv5CPHw3BLdR/k/K72YsRgodbP+UdAONmvBvWzhwm6B8h6O+M64sFr +2w6be7SYBECIyOQgNJ1flK4MoAWhdBA/H5NtxaDOKbAqA27tO9GaevcPp7c518O0 +3hVnlPLvsN1f48nY0jQOXUTfv5nYXmD0OSSK/V3IRo0KsWB6T9UnMGCeEwb4Oqqz +uzM0b4SBflzMEony/m6Tg/qL7qs2TLZAqe77+BZaVdFkDUnaBN7RyMruXySxeXiz +mogT3WhROeloMa/X+E01bWBYBEK7VZIY9pgBpXQ7vDbbIGgYuIXUi20wh03WMy16 +VDYdV0IUXHpidNUeK9W/BPP/7APBAgMBAAGjRDBCMAwGA1UdEwEB/wQCMAAwJwYD +VR0lBCAwHgYIKwYBBQUHAwIGCCsGAQUFBwMBBggrBgEFBQcDCTAJBgNVHREEAjAA +MA0GCSqGSIb3DQEBBQUAA4ICAQAoT6p5f3cGprAcgrnzdenfTmDe9LCW7k2VnazA +MAzpsD6gXcSlo4+3hoHem/SpKRH2tqi34DmImCiv/S6fxsKM4Gfn5rlkAFviuTvS +r5Zrwh4ZKSfaoWv4bmbzmcAxvuxdMWHf/5PbjegjzFTbBMekVPZY/abYtD6kdHQZ +VNgzwZVfTBfYhfa+Rg72I2zjKpMsjxMqWfTmUzW6wfK6LFudZqu0U1NnJw+IlnVU +6WtjL885ebQrmcRqWz3nMhVLIu5L3w/s+VTLvm7If6jcMDNUjz8s2BPcJeCXg3TE +STsyl6tvk17RRz2+9JskxVOk11xIn96xR4FCERIid2ek9z1xi7oYOajQF50i/9Gj +ReDEfRSyb4/LzoKDOY+h4Q6jryeHh7WIHFiK5qrBN2y8qOoRJ/OqQnqci/BJBNpe +g9Q9PJRgGSzRndTXNHiYRbeLpq7eGo3sPqlR9qBQ3rd98XGOU0RCMnzjKhENC3qo +5PkSF2xs8RmjWktFSTDwjYo0qf1teo7CGHjgaPjQ7JE8Q4ysFOQndSWmLpqwDcI9 +HfIvPwUIWArQrJRh9LCNSyvHVgLqY9kw8NW4TlMxV2WqaYCkiKi3XVRrSFR3ahS1 +VBvRZ8KpplrV7rhXjVSSqqfLk1sX3l72Ck2F9ON+qbNFmvhgNjSiBY9neMgo804a +wG/pag== diff --git a/deps/openssl/openssl/test/ocsp-tests/D3_Cert_EE.pem b/deps/openssl/openssl/test/ocsp-tests/D3_Cert_EE.pem new file mode 100644 index 00000000000000..f371ed1f17d995 --- /dev/null +++ b/deps/openssl/openssl/test/ocsp-tests/D3_Cert_EE.pem @@ -0,0 +1,31 @@ +-----BEGIN CERTIFICATE----- +MIIFZDCCA0ygAwIBAgIDC7PGMA0GCSqGSIb3DQEBBQUAMHkxEDAOBgNVBAoTB1Jv +b3QgQ0ExHjAcBgNVBAsTFWh0dHA6Ly93d3cuY2FjZXJ0Lm9yZzEiMCAGA1UEAxMZ +Q0EgQ2VydCBTaWduaW5nIEF1dGhvcml0eTEhMB8GCSqGSIb3DQEJARYSc3VwcG9y +dEBjYWNlcnQub3JnMB4XDTEyMDUwNjE4NDY0MVoXDTE0MDUwNjE4NDY0MVowWzEL +MAkGA1UEBhMCQVUxDDAKBgNVBAgTA05TVzEPMA0GA1UEBxMGU3lkbmV5MRQwEgYD +VQQKEwtDQWNlcnQgSW5jLjEXMBUGA1UEAxMOd3d3LmNhY2VydC5vcmcwggEiMA0G +CSqGSIb3DQEBAQUAA4IBDwAwggEKAoIBAQDeNSAxSFtymeN6rQD69eXIJEnCCP7Z +24/fdOgxRDSBhfQDUVhdmsuDOvuziOoWGqRxZPcWdMEMRcJ5SrA2aHIstvnaLhUl +xp2fuaeXx9XMCJ9ZmzHZbH4wqLaU+UlhcSsdkPzapf3N3HaUAW8kT4bHEGzObYVC +UBxxhpY01EoGRQmnFojzLNF3+0O1npQzXg5MeIWHW/Z+9jE+6odL6IXgg1bvrP4d +FgoveTcG6BmJu+50RwHaUad7hQuNeS+pNsVzCiDdMF2qoCQXtAGhnEQ9/KHpBD2z +ISBVIyEbYxdyU/WxnkaOof63Mf/TAgMNzVN9duqEtFyvvMrQY1XkBBwfAgMBAAGj +ggERMIIBDTAMBgNVHRMBAf8EAjAAMDQGA1UdJQQtMCsGCCsGAQUFBwMCBggrBgEF +BQcDAQYJYIZIAYb4QgQBBgorBgEEAYI3CgMDMAsGA1UdDwQEAwIFoDAzBggrBgEF +BQcBAQQnMCUwIwYIKwYBBQUHMAGGF2h0dHA6Ly9vY3NwLmNhY2VydC5vcmcvMIGE +BgNVHREEfTB7gg53d3cuY2FjZXJ0Lm9yZ4IRc2VjdXJlLmNhY2VydC5vcmeCEnd3 +d21haWwuY2FjZXJ0Lm9yZ4IKY2FjZXJ0Lm9yZ4IOd3d3LmNhY2VydC5uZXSCCmNh +Y2VydC5uZXSCDnd3dy5jYWNlcnQuY29tggpjYWNlcnQuY29tMA0GCSqGSIb3DQEB +BQUAA4ICAQA2+uCGX18kZD8gyfj44TlwV4TXJ5BrT0M9qogg2k5u057i+X2ePy3D +iE2REyLkU+i5ekH5gvTl74uSJKtpSf/hMyJEByyPyIULhlXCl46z2Z60drYzO4ig +apCdkm0JthVGvk6/hjdaxgBGhUvSTEP5nLNkDa+uYVHJI58wfX2oh9gqxf8VnMJ8 +/A8Zi6mYCWUlFUobNd/ozyDZ6WVntrLib85sAFhds93nkoUYxgx1N9Xg/I31/jcL +6bqmpRAZcbPtvEom0RyqPLM+AOgySWiYbg1Nl8nKx25C2AuXk63NN4CVwkXpdFF3 +q5qk1izPruvJ68jNW0pG7nrMQsiY2BCesfGyEzY8vfrMjeR5MLNv5r+obeYFnC1j +uYp6JBt+thW+xPFzHYLjohKPwo/NbMOjIUM9gv/Pq3rVRPgWru4/8yYWhrmEK370 +rtlYBUSGRUdR8xed1Jvs+4qJ3s9t41mLSXvUfwyPsT7eoloUAfw3RhdwOzXoC2P6 +ftmniyu/b/HuYH1AWK+HFtFi9CHiMIqOJMhj/LnzL9udrQOpir7bVej/mlb3kSRo +2lZymKOvuMymMpJkvBvUU/QEbCxWZAkTyqL2qlcQhHv7W366DOFjxDqpthaTRD69 +T8i/2AnsBDjYFxa47DisIvR57rLmE+fILjSvd94N/IpGs3lSOS5JeA== +-----END CERTIFICATE----- diff --git a/deps/openssl/openssl/test/ocsp-tests/D3_Issuer_Root.pem b/deps/openssl/openssl/test/ocsp-tests/D3_Issuer_Root.pem new file mode 100644 index 00000000000000..3ccc18e8d9c403 --- /dev/null +++ b/deps/openssl/openssl/test/ocsp-tests/D3_Issuer_Root.pem @@ -0,0 +1,83 @@ +-----BEGIN CERTIFICATE----- +MIIHPTCCBSWgAwIBAgIBADANBgkqhkiG9w0BAQQFADB5MRAwDgYDVQQKEwdSb290 +IENBMR4wHAYDVQQLExVodHRwOi8vd3d3LmNhY2VydC5vcmcxIjAgBgNVBAMTGUNB +IENlcnQgU2lnbmluZyBBdXRob3JpdHkxITAfBgkqhkiG9w0BCQEWEnN1cHBvcnRA +Y2FjZXJ0Lm9yZzAeFw0wMzAzMzAxMjI5NDlaFw0zMzAzMjkxMjI5NDlaMHkxEDAO +BgNVBAoTB1Jvb3QgQ0ExHjAcBgNVBAsTFWh0dHA6Ly93d3cuY2FjZXJ0Lm9yZzEi +MCAGA1UEAxMZQ0EgQ2VydCBTaWduaW5nIEF1dGhvcml0eTEhMB8GCSqGSIb3DQEJ +ARYSc3VwcG9ydEBjYWNlcnQub3JnMIICIjANBgkqhkiG9w0BAQEFAAOCAg8AMIIC +CgKCAgEAziLA4kZ97DYoB1CW8qAzQIxL8TtmPzHlawI229Z89vGIj053NgVBlfkJ +8BLPRoZzYLdufujAWGSuzbCtRRcMY/pnCujW0r8+55jE8Ez64AO7NV1sId6eINm6 +zWYyN3L69wj1x81YyY7nDl7qPv4coRQKFWyGhFtkZip6qUtTefWIonvuLwphK42y +fk1WpRPs6tqSnqxEQR5YYGUFZvjARL3LlPdCfgv3ZWiYUQXw8wWRBB0bF4LsyFe7 +w2t6iPGwcswlWyCR7BYCEo8y6RcYSNDHBS4CMEK4JZwFaz+qOqfrU0j36NK2B5jc +G8Y0f3/JHIJ6BVgrCFvzOKKrF11myZjXnhCLotLddJr3cQxyYN/Nb5gznZY0dj4k +epKwDpUeb+agRThHqtdB7Uq3EvbXG4OKDy7YCbZZ16oE/9KTfWgu3YtLq1i6L43q +laegw1SJpfvbi1EinbLDvhG+LJGGi5Z4rSDTii8aP8bQUWWHIbEZAWV/RRyH9XzQ +QUxPKZgh/TMfdQwEUfoZd9vUFBzugcMd9Zi3aQaRIt0AUMyBMawSB3s42mhb5ivU +fslfrejrckzzAeVLIL+aplfKkQABi6F1ITe1Yw1nPkZPcCBnzsXWWdsC4PDSy826 +YreQQejdIOQpvGQpQsgi3Hia/0PsmBsJUUtaWsJx8cTLc6nloQsCAwEAAaOCAc4w +ggHKMB0GA1UdDgQWBBQWtTIb1Mfz4OaO873SsDrusjkY0TCBowYDVR0jBIGbMIGY +gBQWtTIb1Mfz4OaO873SsDrusjkY0aF9pHsweTEQMA4GA1UEChMHUm9vdCBDQTEe +MBwGA1UECxMVaHR0cDovL3d3dy5jYWNlcnQub3JnMSIwIAYDVQQDExlDQSBDZXJ0 +IFNpZ25pbmcgQXV0aG9yaXR5MSEwHwYJKoZIhvcNAQkBFhJzdXBwb3J0QGNhY2Vy +dC5vcmeCAQAwDwYDVR0TAQH/BAUwAwEB/zAyBgNVHR8EKzApMCegJaAjhiFodHRw +czovL3d3dy5jYWNlcnQub3JnL3Jldm9rZS5jcmwwMAYJYIZIAYb4QgEEBCMWIWh0 +dHBzOi8vd3d3LmNhY2VydC5vcmcvcmV2b2tlLmNybDA0BglghkgBhvhCAQgEJxYl +aHR0cDovL3d3dy5jYWNlcnQub3JnL2luZGV4LnBocD9pZD0xMDBWBglghkgBhvhC +AQ0ESRZHVG8gZ2V0IHlvdXIgb3duIGNlcnRpZmljYXRlIGZvciBGUkVFIGhlYWQg +b3ZlciB0byBodHRwOi8vd3d3LmNhY2VydC5vcmcwDQYJKoZIhvcNAQEEBQADggIB +ACjH7pyCArpcgBLKNQodgW+JapnM8mgPf6fhjViVPr3yBsOQWqy1YPaZQwGjiHCc +nWKdpIevZ1gNMDY75q1I08t0AoZxPuIrA2jxNGJARjtT6ij0rPtmlVOKTV39O9lg +18p5aTuxZZKmxoGCXJzN600BiqXfEVWqFcofN8CCmHBh22p8lqOOLlQ+TyGpkO/c +gr/c6EWtTZBzCDyUZbAEmXZ/4rzCahWqlwQ3JNgelE5tDlG+1sSPypZt90Pf6DBl +Jzt7u0NDY8RD97LsaMzhGY4i+5jhe1o+ATc7iwiwovOVThrLm82asduycPAtStvY +sONvRUgzEv/+PDIqVPfE94rwiCPCR/5kenHA0R6mY7AHfqQv0wGP3J8rtsYIqQ+T +SCX8Ev2fQtzzxD72V7DX3WnRBnc0CkvSyqD/HMaMyRa+xMwyN2hzXwj7UfdJUzYF +CpUCTPJ5GhD22Dp1nPMd8aINcGeGG7MW9S/lpOt5hvk9C8JzC6WZrG/8Z7jlLwum +GCSNe9FINSkYQKyTYOGWhlC0elnYjyELn8+CkcY7v2vcB5G5l1YjqrZslMZIBjzk +zk6q5PYvCdxTby78dOs6Y5nCpqyJvKeyRKANihDjbPIky/qbn3BHLt4Ui9SyIAmW +omTxJBzcoTWcFbLUvFUufQb1nA5V9FrWk9p2rSVzTMVD +-----END CERTIFICATE----- +-----BEGIN CERTIFICATE----- +MIIHWTCCBUGgAwIBAgIDCkGKMA0GCSqGSIb3DQEBCwUAMHkxEDAOBgNVBAoTB1Jv +b3QgQ0ExHjAcBgNVBAsTFWh0dHA6Ly93d3cuY2FjZXJ0Lm9yZzEiMCAGA1UEAxMZ +Q0EgQ2VydCBTaWduaW5nIEF1dGhvcml0eTEhMB8GCSqGSIb3DQEJARYSc3VwcG9y +dEBjYWNlcnQub3JnMB4XDTExMDUyMzE3NDgwMloXDTIxMDUyMDE3NDgwMlowVDEU +MBIGA1UEChMLQ0FjZXJ0IEluYy4xHjAcBgNVBAsTFWh0dHA6Ly93d3cuQ0FjZXJ0 +Lm9yZzEcMBoGA1UEAxMTQ0FjZXJ0IENsYXNzIDMgUm9vdDCCAiIwDQYJKoZIhvcN +AQEBBQADggIPADCCAgoCggIBAKtJNRFIfNImflOUz0Op3SjXQiqL84d4GVh8D57a +iX3h++tykA10oZZkq5+gJJlz2uJVdscXe/UErEa4w75/ZI0QbCTzYZzA8pD6Ueb1 +aQFjww9W4kpCz+JEjCUoqMV5CX1GuYrz6fM0KQhF5Byfy5QEHIGoFLOYZcRD7E6C +jQnRvapbjZLQ7N6QxX8KwuPr5jFaXnQ+lzNZ6MMDPWAzv/fRb0fEze5ig1JuLgia +pNkVGJGmhZJHsK5I6223IeyFGmhyNav/8BBdwPSUp2rVO5J+TJAFfpPBLIukjmJ0 +FXFuC3ED6q8VOJrU0gVyb4z5K+taciX5OUbjchs+BMNkJyIQKopPWKcDrb60LhPt +XapI19V91Cp7XPpGBFDkzA5CW4zt2/LP/JaT4NsRNlRiNDiPDGCbO5dWOK3z0luL +oFvqTpa4fNfVoIZwQNORKbeiPK31jLvPGpKK5DR7wNhsX+kKwsOnIJpa3yxdUly6 +R9Wb7yQocDggL9V/KcCyQQNokszgnMyXS0XvOhAKq3A6mJVwrTWx6oUrpByAITGp +rmB6gCZIALgBwJNjVSKRPFbnr9s6JfOPMVTqJouBWfmh0VMRxXudA/Z0EeBtsSw/ +LIaRmXGapneLNGDRFLQsrJ2vjBDTn8Rq+G8T/HNZ92ZCdB6K4/jc0m+YnMtHmJVA +BfvpAgMBAAGjggINMIICCTAdBgNVHQ4EFgQUdahxYEyIE/B42Yl3tW3Fid+8sXow +gaMGA1UdIwSBmzCBmIAUFrUyG9TH8+DmjvO90rA67rI5GNGhfaR7MHkxEDAOBgNV +BAoTB1Jvb3QgQ0ExHjAcBgNVBAsTFWh0dHA6Ly93d3cuY2FjZXJ0Lm9yZzEiMCAG +A1UEAxMZQ0EgQ2VydCBTaWduaW5nIEF1dGhvcml0eTEhMB8GCSqGSIb3DQEJARYS +c3VwcG9ydEBjYWNlcnQub3JnggEAMA8GA1UdEwEB/wQFMAMBAf8wXQYIKwYBBQUH +AQEEUTBPMCMGCCsGAQUFBzABhhdodHRwOi8vb2NzcC5DQWNlcnQub3JnLzAoBggr +BgEFBQcwAoYcaHR0cDovL3d3dy5DQWNlcnQub3JnL2NhLmNydDBKBgNVHSAEQzBB +MD8GCCsGAQQBgZBKMDMwMQYIKwYBBQUHAgEWJWh0dHA6Ly93d3cuQ0FjZXJ0Lm9y +Zy9pbmRleC5waHA/aWQ9MTAwNAYJYIZIAYb4QgEIBCcWJWh0dHA6Ly93d3cuQ0Fj +ZXJ0Lm9yZy9pbmRleC5waHA/aWQ9MTAwUAYJYIZIAYb4QgENBEMWQVRvIGdldCB5 +b3VyIG93biBjZXJ0aWZpY2F0ZSBmb3IgRlJFRSwgZ28gdG8gaHR0cDovL3d3dy5D +QWNlcnQub3JnMA0GCSqGSIb3DQEBCwUAA4ICAQApKIWuRKm5r6R5E/CooyuXYPNc +7uMvwfbiZqARrjY3OnYVBFPqQvX56sAV2KaC2eRhrnILKVyQQ+hBsuF32wITRHhH +Va9Y/MyY9kW50SD42CEH/m2qc9SzxgfpCYXMO/K2viwcJdVxjDm1Luq+GIG6sJO4 +D+Pm1yaMMVpyA4RS5qb1MyJFCsgLDYq4Nm+QCaGrvdfVTi5xotSu+qdUK+s1jVq3 +VIgv7nSf7UgWyg1I0JTTrKSi9iTfkuO960NAkW4cGI5WtIIS86mTn9S8nK2cde5a +lxuV53QtHA+wLJef+6kzOXrnAzqSjiL2jA3k2X4Ndhj3AfnvlpaiVXPAPHG0HRpW +Q7fDCo1y/OIQCQtBzoyUoPkD/XFzS4pXM+WOdH4VAQDmzEoc53+VGS3FpQyLu7Xt +hbNc09+4ufLKxw0BFKxwWMWMjTPUnWajGlCVI/xI4AZDEtnNp4Y5LzZyo4AQ5OHz +0ctbGsDkgJp8E3MGT9ujayQKurMcvEp4u+XjdTilSKeiHq921F73OIZWWonO1sOn +ebJSoMbxhbQljPI/lrMQ2Y1sVzufb4Y6GIIiNsiwkTjbKqGTqoQ/9SdlrnPVyNXT +d+pLncdBu8fA46A/5H2kjXPmEkvfoXNzczqA6NXLji/L6hOn1kGLrPo8idck9U60 +4GGSt/M3mMS+lqO3ig== +-----END CERTIFICATE----- diff --git a/deps/openssl/openssl/test/ocsp-tests/ISDOSC_D1.ors b/deps/openssl/openssl/test/ocsp-tests/ISDOSC_D1.ors new file mode 100644 index 00000000000000..66a60a27f6daee --- /dev/null +++ b/deps/openssl/openssl/test/ocsp-tests/ISDOSC_D1.ors @@ -0,0 +1,32 @@ +MIIFzwoBAKCCBcgwggXEBgkrBgEFBQcwAQEEggW1MIIFsTCBoKIWBBSpTXftIZX0 +lLT9zwVSQC5Jfp3pqhgPMjAxMjEwMTAxNDU0NDNaMHUwczBLMAkGBSsOAwIaBQAE +FKByDqBqfGICVPKo9Z3Se6Tzty+kBBSwsEr9HHUo+BxhqhP2+sGQPWsWowISESG8 +vx4IzALnkqQG05AvM+2bgAAYDzIwMTIxMDEwMTMwMDAwWqARGA8yMDEyMTAxNzEz +MDAwMFowCwYJKoZIhvcNAQEFA4IBAQBw5Z+0ggEddRTIq7cXlMoxG9Nrx4HtutsH +itIUoZp/rlLoxHsJTo/VmdZvTTGIc7Ok9XuoH61lY/x9glAKsGRjz4Myc9+5rx0O +675lwmOS+uaf3/hRkicVrVr7Pt2ug3R7OXm2MJrohjNKP8lqtLJ0hHP88a8rotKA +r9uz/qHm7K4Uh7dRt/Pnu9MPG74tZeFNN4M1ONMEiRdG39FqzFDXWxwQ3NmyC0Wo +DQn+NklZMknr8mm7IBWpzgU1fTD9R0yv0zdhUZGiEXxvdhm7GJrTET5jS30Ksm5j +o+n39YVu/vGbjyyYx3+WdeQLEyipaGvldSuJpT+R684/RuFWNetcoIID+DCCA/Qw +ggPwMIIC2KADAgECAhIRIcYjwu4UNkR1VGrDbSdFei8wDQYJKoZIhvcNAQEFBQAw +WTELMAkGA1UEBhMCQkUxGTAXBgNVBAoTEEdsb2JhbFNpZ24gbnYtc2ExLzAtBgNV +BAMTJkdsb2JhbFNpZ24gRXh0ZW5kZWQgVmFsaWRhdGlvbiBDQSAtIEcyMB4XDTEy +MDkxOTA3NDAzMVoXDTEyMTIxOTA4NDAzMVowgYUxCzAJBgNVBAYTAkJFMRkwFwYD +VQQKExBHbG9iYWxTaWduIG52LXNhMUIwQAYDVQQDEzlHbG9iYWxTaWduIEV4dGVu +ZGVkIFZhbGlkYXRpb24gQ0EgLSBHMiBPQ1NQIHJlc3BvbmRlciAtIDExFzAVBgNV +BAUTDjIwMTIwOTE5MDkzOTAwMIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKC +AQEAxkkb6QhDH3sEDj4zaysjVzYelq9lZ1cso4R2IyQxaoPaG6GkaCmHA4sz6KP+ +m3ADqplibEUBa/mzCxHW8/oy3NhGMFdbezduZrnRFLbzakOTeIo8VEIM3JPfgREv +CX8nj6Xu7ERD6JO/ZQ9Xr7YVzKKN+3cVZlcMHoGBnOPcO2Sz0AcYyk5m5IsGBRoT +T86j6Cr9PhOPTVwXL6Wxy1KVHsUZXUwnRacV0O4SHWQ4zM9Sablus9fTbh1CgIqW +sKDyzVB4yECXkBVeUlA+cuCaRRVHRiR+jPDSgbU62nnNudEpGG7dyoop6IOvXv2O +ydncWzaukxIVvQ/Ij85kHqs7HQIDAQABo4GEMIGBMAkGA1UdEwQCMAAwDgYDVR0P +AQH/BAQDAgeAMBMGA1UdJQQMMAoGCCsGAQUFBwMJMA8GCSsGAQUFBzABBQQCBQAw +HQYDVR0OBBYEFKlNd+0hlfSUtP3PBVJALkl+nemqMB8GA1UdIwQYMBaAFLCwSv0c +dSj4HGGqE/b6wZA9axajMA0GCSqGSIb3DQEBBQUAA4IBAQCe4rZg61Dmwygl/Uae +BJZog64/FvuB1sfCqKLJTjKOfLcugSTX1TT7bLJbzXRGPQuorI3TIZEOwldIw01d +DTLlsOCHrfHd+bpxgijxPkUuaA4NYnpvqTEMJqPKOC8QYfKupNjAPSuHvwqvqCfO +RCe3jY6xQDO0WCTZ8/xMsOkw+J/YEYqALETf2Ug7k5eRL/TvfLd8Sgi7vPfmUeiW +ptlsbhMOWQoQc+JA3vCI01rrjNq+0kIZ/r8nPGvablRr0Aakk6eDuS2dcReaPwuK +0xE136pJYiXdQ3SA7uwmlorjxmejavyoPCr23TU74DQEt6hhc6uIcabsa4Y8KvJy +RI4F diff --git a/deps/openssl/openssl/test/ocsp-tests/ISDOSC_D2.ors b/deps/openssl/openssl/test/ocsp-tests/ISDOSC_D2.ors new file mode 100644 index 00000000000000..664c8d25faf1c7 --- /dev/null +++ b/deps/openssl/openssl/test/ocsp-tests/ISDOSC_D2.ors @@ -0,0 +1,32 @@ +MIIF4AoBAKCCBdkwggXVBgkrBgEFBQcwAQEEggXGMIIFwjCBmaIWBBTqlwecTarB +yVdbHxANRLCFYj1mqBgPMjAxMjEwMTEwOTE1MzNaMG4wbDBEMAkGBSsOAwIaBQAE +FLdXtbacB/gWIxOOkMkqDr4yAaoxBBRge2YaRQ2XyolQL30EzTSo//z9SwILBAAA +AAABL07hRxCAABgPMjAxMjEwMDEwNjAwMDBaoBEYDzIwMTMwNDE1MDYwMDAwWjAL +BgkqhkiG9w0BAQUDggEBAF/9ByrCS+pCCK4qovqUAH/yoWckmpLFCzKJGHkErJeY +FlUbAJuu/Gs0IdLmLp+2VbStjsL4vLtDU2Q4e417C1fm8+ixh+kP7qPRd8cxyMBx +cmD2m1v0CgbrflCZEC71cTrrWpcW+6jg623lI4Ug3A4zlizbT/f9IrxuV9VB9/G5 +6kPI5dYOVZM0ColIxmJsafuxfr6ONQLPHKTlZJK3SyWebs25006OmrSyfBi0j26j +WU5d6B2NJZBKqvDVMXxZ0q6QOgKxOs8WD+6DaA1d1f7gTOl45XJZWz5KnRePyRxM +Fp0ak6XYbE1y2vHE2RWp1w4lcVJ0BUQXWxx+g86F5W2gggQQMIIEDDCCBAgwggLw +oAMCAQICCwQAAAAAAThXovYBMA0GCSqGSIb3DQEBBQUAMFcxCzAJBgNVBAYTAkJF +MRkwFwYDVQQKExBHbG9iYWxTaWduIG52LXNhMRAwDgYDVQQLEwdSb290IENBMRsw +GQYDVQQDExJHbG9iYWxTaWduIFJvb3QgQ0EwHhcNMTIwNzA1MTgwMDAwWhcNMTMw +NzA1MTgwMDAwWjBZMQswCQYDVQQGEwJCRTEZMBcGA1UEChMQR2xvYmFsU2lnbiBu +di1zYTEvMC0GA1UEAxMmR2xvYmFsU2lnbiBPQ1NQIGZvciBSb290IFIxIC0gQnJh +bmNoIDEwggEiMA0GCSqGSIb3DQEBAQUAA4IBDwAwggEKAoIBAQDP2QF8p0+Fb7ID +MwwD1gEr2oazjqbW28EZr3YEyMPk+7VFaGePSO1xjBGIE48Q7m7d6p6ZXCzlBZEi +oudrHSr3WDqdIVKLDrZIDkgEgdjJE72Hq6Pf5CEGXyebbODm4sV96EfewSvOOYLL +866g3aoVhLDK02ny+Q5OsokW7nhnmGMMh10tZqR5VmdQTiw8MgeqUxBEaEO4WH2J +ltgSsgNJBNBYuDgnn5ryzVqhvmCJvYZMYeN6qZFKy1MgHcR+wEpGLPlRL4ttu6e5 +MJrVta7dVFobHUHoFog97LtQT1PY0Ubaihswjge5O04bYeCrgSSjr1e4xH/KDxRw +yyhoscaFAgMBAAGjgdIwgc8wDgYDVR0PAQH/BAQDAgeAMB0GA1UdDgQWBBTqlwec +TarByVdbHxANRLCFYj1mqDBMBgNVHSAERTBDMEEGCSsGAQQBoDIBXzA0MDIGCCsG +AQUFBwIBFiZodHRwczovL3d3dy5nbG9iYWxzaWduLmNvbS9yZXBvc2l0b3J5LzAJ +BgNVHRMEAjAAMBMGA1UdJQQMMAoGCCsGAQUFBwMJMB8GA1UdIwQYMBaAFGB7ZhpF +DZfKiVAvfQTNNKj//P1LMA8GCSsGAQUFBzABBQQCBQAwDQYJKoZIhvcNAQEFBQAD +ggEBAHiC6N1uF29d7CmiVapA8Nr1xLSVeIkBd4A8yHsUTQ7ATI7bwT14QUV4awe7 +8cvmO5ZND8YG1ViwN162WFm9ivSoWBzvWDbU2JhQFb+XzrzCcdn0YbNiTxJh/vYm +uDuxto00dpBgujSOAQv8B90iDEJ+sZpYRzDRj62qStRey0zpq5eX+pA+gdppMUFb +4QvJf0El8TbLCWLN4TjrFe6ju7ZaN9zmgVYGQ2fMHKIGNScLuIA950nYwzRkIfHa +YW6HqP1rCR1EiYmstEeCQyDxJx+RUlh+q8L1BKzaMYhS6s63MZzQuGseYStaCmbC +fBIRKjnK621vAWvc7UR+0hqnZ+Y= diff --git a/deps/openssl/openssl/test/ocsp-tests/ISDOSC_D3.ors b/deps/openssl/openssl/test/ocsp-tests/ISDOSC_D3.ors new file mode 100644 index 00000000000000..ac2bb25de24c85 --- /dev/null +++ b/deps/openssl/openssl/test/ocsp-tests/ISDOSC_D3.ors @@ -0,0 +1,38 @@ +MIIG8AoBAKCCBukwggblBgkrBgEFBQcwAQEEggbWMIIG0jCB+aF+MHwxCzAJBgNV +BAYTAkFVMQwwCgYDVQQIEwNOU1cxDzANBgNVBAcTBlN5ZG5leTEUMBIGA1UEChML +Q0FjZXJ0IEluYy4xHjAcBgNVBAsTFVNlcnZlciBBZG1pbmlzdHJhdGlvbjEYMBYG +A1UEAxMPb2NzcC5jYWNlcnQub3JnGA8yMDEyMTAxMTEwMTAyMVowZjBkMDwwCQYF +Kw4DAhoFAAQUi6TJyxcpGUU+u45zCZG5JfKDImUEFBa1MhvUx/Pg5o7zvdKwOu6y +ORjRAgMLs8aAABgPMjAxMjEwMTEwOTUyNDJaoBEYDzIwMTIxMDEzMTAxMDIxWjAN +BgkqhkiG9w0BAQUFAAOCAQEAWX7faLDXkmIdOv/IKBh7awhPmGUhFPVSrMI4dc9/ +fcPDOYhFwWr9evKT/QdXRGpZY493mfa4Z6eEDxRDTexOloaiaJzVpSeV9hoJUxoS +8NEWDyi33bDlIJH6zru4kk1LpuSMiSWsvLaeoRhHmW3EPDeadpCa5tYX2yNW5hdP +iCfphDJ34/hWHHwHP6mLd1wEO1Rw6nymqeDbuLk1FviD/ZWXMGzK8Sv++tmsQ0Tg +7XrkIPcSrozPKOTCf/1iJVF5KeQVIb0Ju1PvGUKtGaVTX8IZQmer2WQ1D6OOUcsS +cWA6NSpWmScX/0/uBpXdSDX0AnGUS9SNrPNEolz6rA5OUaCCBL4wggS6MIIEtjCC +Ap6gAwIBAgIDCpvzMA0GCSqGSIb3DQEBBQUAMHkxEDAOBgNVBAoTB1Jvb3QgQ0Ex +HjAcBgNVBAsTFWh0dHA6Ly93d3cuY2FjZXJ0Lm9yZzEiMCAGA1UEAxMZQ0EgQ2Vy +dCBTaWduaW5nIEF1dGhvcml0eTEhMB8GCSqGSIb3DQEJARYSc3VwcG9ydEBjYWNl +cnQub3JnMB4XDTExMDgyMzAwMDI1NloXDTEzMDgyMjAwMDI1NlowfDELMAkGA1UE +BhMCQVUxDDAKBgNVBAgTA05TVzEPMA0GA1UEBxMGU3lkbmV5MRQwEgYDVQQKEwtD +QWNlcnQgSW5jLjEeMBwGA1UECxMVU2VydmVyIEFkbWluaXN0cmF0aW9uMRgwFgYD +VQQDEw9vY3NwLmNhY2VydC5vcmcwggEiMA0GCSqGSIb3DQEBAQUAA4IBDwAwggEK +AoIBAQCcxtRv5CPHw3BLdR/k/K72YsRgodbP+UdAONmvBvWzhwm6B8h6O+M64sFr +2w6be7SYBECIyOQgNJ1flK4MoAWhdBA/H5NtxaDOKbAqA27tO9GaevcPp7c518O0 +3hVnlPLvsN1f48nY0jQOXUTfv5nYXmD0OSSK/V3IRo0KsWB6T9UnMGCeEwb4Oqqz +uzM0b4SBflzMEony/m6Tg/qL7qs2TLZAqe77+BZaVdFkDUnaBN7RyMruXySxeXiz +mogT3WhROeloMa/X+E01bWBYBEK7VZIY9pgBpXQ7vDbbIGgYuIXUi20wh03WMy16 +VDYdV0IUXHpidNUeK9W/BPP/7APBAgMBAAGjRDBCMAwGA1UdEwEB/wQCMAAwJwYD +VR0lBCAwHgYIKwYBBQUHAwIGCCsGAQUFBwMBBggrBgEFBQcDCTAJBgNVHREEAjAA +MA0GCSqGSIb3DQEBBQUAA4ICAQAoT6p5f3cGprAcgrnzdenfTmDe9LCW7k2VnazA +MAzpsD6gXcSlo4+3hoHem/SpKRH2tqi34DmImCiv/S6fxsKM4Gfn5rlkAFviuTvS +r5Zrwh4ZKSfaoWv4bmbzmcAxvuxdMWHf/5PbjegjzFTbBMekVPZY/abYtD6kdHQZ +VNgzwZVfTBfYhfa+Rg72I2zjKpMsjxMqWfTmUzW6wfK6LFudZqu0U1NnJw+IlnVU +6WtjL885ebQrmcRqWz3nMhVLIu5L3w/s+VTLvm7If6jcMDNUjz8s2BPcJeCXg3TE +STsyl6tvk17RRz2+9JskxVOk11xIn96xR4FCERIid2ek9z1xi7oYOajQF50i/9Gj +ReDEfRSyb4/LzoKDOY+h4Q6jryeHh7WIHFiK5qrBN2y8qOoRJ/OqQnqci/BJBNpe +g9Q9PJRgGSzRndTXNHiYRbeLpq7eGo3sPqlR9qBQ3rd98XGOU0RCMnzjKhENC3qo +5PkSF2xs8RmjWktFSTDwjYo0qf1teo7CGHjgaPjQ7JE8Q4ysFOQndSWmLpqwDcI9 +HfIvPwUIWArQrJRh9LCNSyvHVgLqY9kw8NW4TlMxV2WqaYCkiKi3XVRrSFR3ahS1 +VBvRZ8KpplrV7rhXjVSSqqfLk1sX3l72Ck2F9ON+qbNFmvhgNjSiBY9neMgo804a +wG/paw== diff --git a/deps/openssl/openssl/test/ocsp-tests/ISIC_D1_Issuer_ICA.pem b/deps/openssl/openssl/test/ocsp-tests/ISIC_D1_Issuer_ICA.pem new file mode 100644 index 00000000000000..b88477532e75f1 --- /dev/null +++ b/deps/openssl/openssl/test/ocsp-tests/ISIC_D1_Issuer_ICA.pem @@ -0,0 +1,27 @@ +-----BEGIN CERTIFICATE----- +MIIEhjCCA26gAwIBAgILBAAAAAABL07hXdQwDQYJKoZIhvcNAQEFBQAwTDEgMB4G +A1UECxMXR2xvYmFsU2lnbiBSb290IENBIC0gUjIxEzARBgNVBAoTCkdsb2JhbFNp +Z24xEzARBgNVBAMTCkdsb2JhbFNpZ24wHhcNMTEwNDEzMTAwMDAwWhcNMjIwNDEz +MTAwMDAwWjBZMQswCQYDVQQGEwJCRTEZMBcGA1UEChMQR2xvYmFsU2lnbiBudi1z +YTEvMC0GA1UEAxMmR2xvYmFsU2lnbiBFeHRlbmRlZCBWYWxpZGF0aW9uIENBIC0g +RzIwggEiMA0GCSqGSIb3DQEBAQUAA4IBDwAwggEKAoIBAQDNoUbMUpq4pbR/WNnN +2EugcgyXW6aIIMO5PUbc0FxSMPb6WU+FX7DbiLSpXysjSKyr9ZJ4FLYyD/tcaoVb +AJDgu2X1WvlPZ37HbCnsk8ArysRe2LDb1r4/mwvAj6ldrvcAAqT8umYROHf+IyAl +VRDFvYK5TLFoxuJwe4NcE2fBofN8C6iZmtDimyUxyCuNQPZSY7GgrVou9Xk2bTUs +Dt0F5NDiB0i3KF4r1VjVbNAMoQFGAVqPxq9kx1UBXeHRxmxQJaAFrQCrDI1la93r +wnJUyQ88ABeHIu/buYZ4FlGud9mmKE3zWI2DZ7k0JZscUYBR84OSaqOuR5rW5Isb +wO2xAgMBAAGjggFaMIIBVjAOBgNVHQ8BAf8EBAMCAQYwEgYDVR0TAQH/BAgwBgEB +/wIBADAdBgNVHQ4EFgQUsLBK/Rx1KPgcYaoT9vrBkD1rFqMwRwYDVR0gBEAwPjA8 +BgRVHSAAMDQwMgYIKwYBBQUHAgEWJmh0dHBzOi8vd3d3Lmdsb2JhbHNpZ24uY29t +L3JlcG9zaXRvcnkvMDYGA1UdHwQvMC0wK6ApoCeGJWh0dHA6Ly9jcmwuZ2xvYmFs +c2lnbi5uZXQvcm9vdC1yMi5jcmwwRAYIKwYBBQUHAQEEODA2MDQGCCsGAQUFBzAB +hihodHRwOi8vb2NzcC5nbG9iYWxzaWduLmNvbS9FeHRlbmRlZFNTTENBMCkGA1Ud +JQQiMCAGCCsGAQUFBwMBBggrBgEFBQcDAgYKKwYBBAGCNwoDAzAfBgNVHSMEGDAW +gBSb4gdXZxwewGoG3lm0mi3f3BmGLjANBgkqhkiG9w0BAQUFAAOCAQEAL0m28rZa +pJWrnlrpK4KbzJBrfHRFIOde2Mcj7ig1sTVlKqVR4FU/9oNntOQ2KbDa7JeVqYoF +o0X+Iy5SiLQfEICt0oufo1+oxetz3nmIQZgz7qdgGLFGyUAQB5yPClLJExoGbqCb +LTr2rk/no1E1KlsYBRLlUdy2NmLz4aQP++TPw5S/EauhWTEB8MxT7I9j12yW00gq +iiPtRVaoZkHqAblH7qFHDBTxI+Egc8p9UHxkOFejj0qcm+ltRc9Ea01gIEBxJbVG +qmwIft/I+shWKpLLg7h5CZctXqEBzgbttJfJBNxB7+BPNk3kQHNG7BESfIhbNCYl +TercGL7FG81kwQ== +-----END CERTIFICATE----- diff --git a/deps/openssl/openssl/test/ocsp-tests/ISIC_D2_Issuer_Root.pem b/deps/openssl/openssl/test/ocsp-tests/ISIC_D2_Issuer_Root.pem new file mode 100644 index 00000000000000..22f34cbbce08a3 --- /dev/null +++ b/deps/openssl/openssl/test/ocsp-tests/ISIC_D2_Issuer_Root.pem @@ -0,0 +1,21 @@ +-----BEGIN CERTIFICATE----- +MIIDdTCCAl2gAwIBAgILBAAAAAABFUtaw5QwDQYJKoZIhvcNAQEFBQAwVzELMAkG +A1UEBhMCQkUxGTAXBgNVBAoTEEdsb2JhbFNpZ24gbnYtc2ExEDAOBgNVBAsTB1Jv +b3QgQ0ExGzAZBgNVBAMTEkdsb2JhbFNpZ24gUm9vdCBDQTAeFw05ODA5MDExMjAw +MDBaFw0yODAxMjgxMjAwMDBaMFcxCzAJBgNVBAYTAkJFMRkwFwYDVQQKExBHbG9i +YWxTaWduIG52LXNhMRAwDgYDVQQLEwdSb290IENBMRswGQYDVQQDExJHbG9iYWxT +aWduIFJvb3QgQ0EwggEiMA0GCSqGSIb3DQEBAQUAA4IBDwAwggEKAoIBAQDaDuaZ +jc6j40+Kfvvxi4Mla+pIH/EqsLmVEQS98GPR4mdmzxzdzxtIK+6NiY6arymAZavp +xy0Sy6scTHAHoT0KMM0VjU/43dSMUBUc71DuxC73/OlS8pF94G3VNTCOXkNz8kHp +1Wrjsok6Vjk4bwY8iGlbKk3Fp1S4bInMm/k8yuX9ifUSPJJ4ltbcdG6TRGHRjcdG +snUOhugZitVtbNV4FpWi6cgKOOvyJBNPc1STE4U6G7weNLWLBYy5d4ux2x8gkasJ +U26Qzns3dLlwR5EiUWMWea6xrkEmCMgZK9FGqkjWZCrXgzT/LCrBbBlDSgeF59N8 +9iFo7+ryUp9/k5DPAgMBAAGjQjBAMA4GA1UdDwEB/wQEAwIBBjAPBgNVHRMBAf8E +BTADAQH/MB0GA1UdDgQWBBRge2YaRQ2XyolQL30EzTSo//z9SzANBgkqhkiG9w0B +AQUFAAOCAQEA1nPnfE920I2/7LqivjTFKDK1fPxsnCwrvQmeU79rXqoRSLblCKOz +yj1hTdNGCbM+w6DjY1Ub8rrvrTnhQ7k4o+YviiY776BQVvnGCv04zcQLcFGUl5gE +38NflNUVyRRBnMRddWQVDf9VMOyGj/8N7yy5Y0b2qvzfvGn9LhJIZJrglfCm7ymP +AbEVtQwdpf5pLGkkeB6zpxxxYu7KyJesF12KwvhHhm4qxFYxldBniYUr+WymXUad +DKqC5JlR3XC321Y9YeRq4VzW9v493kHMB65jUr9TU/Qr6cf9tveCX4XSQRjbgbME +HMUfpIBvFSDJ3gyICh3WZlXi/EjJKSZp4Q== +-----END CERTIFICATE----- diff --git a/deps/openssl/openssl/test/ocsp-tests/ISIC_D3_Issuer_Root.pem b/deps/openssl/openssl/test/ocsp-tests/ISIC_D3_Issuer_Root.pem new file mode 100644 index 00000000000000..c1752e6b5d2053 --- /dev/null +++ b/deps/openssl/openssl/test/ocsp-tests/ISIC_D3_Issuer_Root.pem @@ -0,0 +1,41 @@ +-----BEGIN CERTIFICATE----- +MIIHPTCCBSWgAwIBAgIBADANBgkqhkiG9w0BAQQFADB5MRAwDgYDVQQKEwdSb290 +IENBMR4wHAYDVQQLExVodHRwOi8vd3d3LmNhY2VydC5vcmcxIjAgBgNVBAMTGUNB +IENlcnQgU2lnbmluZyBBdXRob3JpdHkxITAfBgkqhkiG9w0BCQEWEnN1cHBvcnRA +Y2FjZXJ0Lm9yZzAeFw0wMzAzMzAxMjI5NDlaFw0zMzAzMjkxMjI5NDlaMHkxEDAO +BgNVBAoTB1Jvb3QgQ0ExHjAcBgNVBAsTFWh0dHA6Ly93d3cuY2FjZXJ0Lm9yZzEi +MCAGA1UEAxMZQ0EgQ2VydCBTaWduaW5nIEF1dGhvcml0eTEhMB8GCSqGSIb3DQEJ +ARYSc3VwcG9ydEBjYWNlcnQub3JnMIICIjANBgkqhkiG9w0BAQEFAAOCAg8AMIIC +CgKCAgEAziLA4kZ97DYoB1CW8qAzQIxL8TtmPzHlawI229Z89vGIj053NgVBlfkJ +8BLPRoZzYLdufujAWGSuzbCtRRcMY/pnCujW0r8+55jE8Ez64AO7NV1sId6eINm6 +zWYyN3L69wj1x81YyY7nDl7qPv4coRQKFWyGhFtkZip6qUtTefWIonvuLwphK42y +fk1WpRPs6tqSnqxEQR5YYGUFZvjARL3LlPdCfgv3ZWiYUQXw8wWRBB0bF4LsyFe7 +w2t6iPGwcswlWyCR7BYCEo8y6RcYSNDHBS4CMEK4JZwFaz+qOqfrU0j36NK2B5jc +G8Y0f3/JHIJ6BVgrCFvzOKKrF11myZjXnhCLotLddJr3cQxyYN/Nb5gznZY0dj4k +epKwDpUeb+agRThHqtdB7Uq3EvbXG4OKDy7YCbZZ16oE/9KTfWgu3YtLq1i6L43q +laegw1SJpfvbi1EinbLDvhG+LJGGi5Z4rSDTii8aP8bQUWWHIbEZAWV/RRyH9XzQ +QUxPKZgh/TMfdQwEUfoZd9vUFBzugcMd9Zi3aQaRIt0AUMyBMawSB3s42mhb5ivU +fslfrejrckzzAeVLIL+aplfKkQABi6F1ITe1Yw1nPkZPcCBnzsXWWdsC4PDSy826 +YreQQejdIOQpvGQpQsgi3Hia/0PsmBsJUUtaWsJx8cTLc6nloQsCAwEAAaOCAc4w +ggHKMB0GA1UdDgQWBBQWtTIb1Mfz4OaO873SsDrusjkY0TCBowYDVR0jBIGbMIGY +gBQWtTIb1Mfz4OaO873SsDrusjkY0aF9pHsweTEQMA4GA1UEChMHUm9vdCBDQTEe +MBwGA1UECxMVaHR0cDovL3d3dy5jYWNlcnQub3JnMSIwIAYDVQQDExlDQSBDZXJ0 +IFNpZ25pbmcgQXV0aG9yaXR5MSEwHwYJKoZIhvcNAQkBFhJzdXBwb3J0QGNhY2Vy +dC5vcmeCAQAwDwYDVR0TAQH/BAUwAwEB/zAyBgNVHR8EKzApMCegJaAjhiFodHRw +czovL3d3dy5jYWNlcnQub3JnL3Jldm9rZS5jcmwwMAYJYIZIAYb4QgEEBCMWIWh0 +dHBzOi8vd3d3LmNhY2VydC5vcmcvcmV2b2tlLmNybDA0BglghkgBhvhCAQgEJxYl +aHR0cDovL3d3dy5jYWNlcnQub3JnL2luZGV4LnBocD9pZD0xMDBWBglghkgBhvhC +AQ0ESRZHVG8gZ2V0IHlvdXIgb3duIGNlcnRpZmljYXRlIGZvciBGUkVFIGhlYWQg +b3ZlciB0byBodHRwOi8vd3d3LmNhY2VydC5vcmcwDQYJKoZIhvcNAQEEBQADggIB +ACjH7pyCArpcgBLKNQodgW+JapnM8mgPf6fhjViVPr3yBsOQWqy1YPaZQwGjiHCc +nWKdpIevZ1gNMDY75q1I08t0AoZxPuIrA2jxNGJARjtT6ij0rPtmlVOKTV39O9lg +18p5aTuxZZKmxoGCXJzN600BiqXfEVWqFcofN8CCmHBh22p8lqOOLlQ+TyGpkO/c +gr/c6EWtTZBzCDyUZbAEmXZ/4rzCahWqlwQ3JNgelE5tDlG+1sSPypZt90Pf6DBl +Jzt7u0NDY8RD97LsaMzhGY4i+5jhe1o+ATc7iwiwovOVThrLm82asduycPAtStvY +sONvRUgzEv/+PDIqVPfE94rwiCPCR/5kenHA0R6mY7AHfqQv0wGP3J8rtsYIqQ+T +SCX8Ev2fQtzzxD72V7DX3WnRBnc0CkvSyqD/HMaMyRa+xMwyN2hzXwj7UfdJUzYF +CpUCTPJ5GhD22Dp1nPMd8aINcGeGG7MW9S/lpOt5hvk9C8JzC6WZrG/8Z7jlLwum +GCSNe9FINSkYQKyTYOGWhlC0elnYjyELn8+CkcY7v2vcB5G5l1YjqrZslMZIBjzk +zk6q5PYvCdxTby78dOs6Y5nCpqyJvKeyRKANihDjbPIky/qbn3BHLt4Ui9SyIAmW +omTxJBzcoTWcFbLUvFUufQb1nA5V9FrWk9p2rSVzTMVE +-----END CERTIFICATE----- diff --git a/deps/openssl/openssl/test/ocsp-tests/ISIC_ND1_Issuer_ICA.pem b/deps/openssl/openssl/test/ocsp-tests/ISIC_ND1_Issuer_ICA.pem new file mode 100644 index 00000000000000..25bb90ffffdd03 --- /dev/null +++ b/deps/openssl/openssl/test/ocsp-tests/ISIC_ND1_Issuer_ICA.pem @@ -0,0 +1,29 @@ +-----BEGIN CERTIFICATE----- +MIIFBjCCA+6gAwIBAgIQEaO00OyNt3+doM1dLVEvQjANBgkqhkiG9w0BAQUFADCB +gTELMAkGA1UEBhMCR0IxGzAZBgNVBAgTEkdyZWF0ZXIgTWFuY2hlc3RlcjEQMA4G +A1UEBxMHU2FsZm9yZDEaMBgGA1UEChMRQ09NT0RPIENBIExpbWl0ZWQxJzAlBgNV +BAMTHkNPTU9ETyBDZXJ0aWZpY2F0aW9uIEF1dGhvcml0eTAeFw0xMDA1MjQwMDAw +MDBaFw0yMDA1MzAxMDQ4MzhaMIGOMQswCQYDVQQGEwJHQjEbMBkGA1UECBMSR3Jl +YXRlciBNYW5jaGVzdGVyMRAwDgYDVQQHEwdTYWxmb3JkMRowGAYDVQQKExFDT01P +RE8gQ0EgTGltaXRlZDE0MDIGA1UEAxMrQ09NT0RPIEV4dGVuZGVkIFZhbGlkYXRp +b24gU2VjdXJlIFNlcnZlciBDQTCCASIwDQYJKoZIhvcNAQEBBQADggEPADCCAQoC +ggEBAMxKljPNJY1n7iiWN4dG8PYEooR/U6qW5h+xAhxu7X0h1Nc8HqLYaS+ot/Wi +7WRYZOFEZTZJQSABjTsT4gjzDPJXOZM3txyTRIOOvy3xoQV12m7ue28b6naDKHRK +HCvT9cQDcpOvhs4JjDx11MkKL3Lzrb0OMDyEoXMfAyUUpY/D1vS15N2GevUZumjy +hVSiMBHK0ZLLO3QGEqA3q2rYVBHfbJoWlLm0p2XGdC0x801S6VVRn8s+oo12mHDS +b6ZlRS8bhbtbbfnywARmE4R6nc4n2PREnr+svpnba0/bWCGwiSe0jzLWS15ykV7f +BZ3ZSS/0tm9QH3XLgJ3m0+TR8tMCAwEAAaOCAWkwggFlMB8GA1UdIwQYMBaAFAtY +5YvGTBU3pECpMKkhvkc2Wlb/MB0GA1UdDgQWBBSIRFH/UCppXi2I9CG62Qzyzsvq +fDAOBgNVHQ8BAf8EBAMCAQYwEgYDVR0TAQH/BAgwBgEB/wIBADA+BgNVHSAENzA1 +MDMGBFUdIAAwKzApBggrBgEFBQcCARYdaHR0cHM6Ly9zZWN1cmUuY29tb2RvLmNv +bS9DUFMwSQYDVR0fBEIwQDA+oDygOoY4aHR0cDovL2NybC5jb21vZG9jYS5jb20v +Q09NT0RPQ2VydGlmaWNhdGlvbkF1dGhvcml0eS5jcmwwdAYIKwYBBQUHAQEEaDBm +MD4GCCsGAQUFBzAChjJodHRwOi8vY3J0LmNvbW9kb2NhLmNvbS9DT01PRE9BZGRU +cnVzdFNlcnZlckNBLmNydDAkBggrBgEFBQcwAYYYaHR0cDovL29jc3AuY29tb2Rv +Y2EuY29tMA0GCSqGSIb3DQEBBQUAA4IBAQCaQ7+vpHJezX1vf/T8PYy7cOYe3QT9 +P9ydn7+JdpvyhjH8f7PtKpFTLOKqsOPILHH3FYojHPFpLoH7sbxiC6saVBzZIl40 +TKX2Iw9dej3bQ81pfhc3Us1TocIR1FN4J2TViUFNFlW7kMvw2OTd3dMJZEgo/zIj +hC+Me1UvzymINzR4DzOq/7fylqSbRIC1vmxWVKukgZ4lGChUOn8sY89ZIIwYazgs +tN3t40DeDDYlV5rA0WCeXgNol64aO+pF11GZSe5EWVYLXrGPaOqKnsrSyaADfnAl +9DLJTlCDh6I0SD1PNXf82Ijq9n0ezkO21cJqfjhmY03n7jLvDyToKmf7 +-----END CERTIFICATE----- diff --git a/deps/openssl/openssl/test/ocsp-tests/ISIC_ND2_Issuer_Root.pem b/deps/openssl/openssl/test/ocsp-tests/ISIC_ND2_Issuer_Root.pem new file mode 100644 index 00000000000000..129eb4b28de445 --- /dev/null +++ b/deps/openssl/openssl/test/ocsp-tests/ISIC_ND2_Issuer_Root.pem @@ -0,0 +1,23 @@ +-----BEGIN CERTIFICATE----- +MIID0DCCArigAwIBAgIQIKTEf93f4cdTYwcTiHdgEjANBgkqhkiG9w0BAQUFADCB +gTELMAkGA1UEBhMCR0IxGzAZBgNVBAgTEkdyZWF0ZXIgTWFuY2hlc3RlcjEQMA4G +A1UEBxMHU2FsZm9yZDEaMBgGA1UEChMRQ09NT0RPIENBIExpbWl0ZWQxJzAlBgNV +BAMTHkNPTU9ETyBDZXJ0aWZpY2F0aW9uIEF1dGhvcml0eTAeFw0xMTAxMDEwMDAw +MDBaFw0zMDEyMzEyMzU5NTlaMIGBMQswCQYDVQQGEwJHQjEbMBkGA1UECBMSR3Jl +YXRlciBNYW5jaGVzdGVyMRAwDgYDVQQHEwdTYWxmb3JkMRowGAYDVQQKExFDT01P +RE8gQ0EgTGltaXRlZDEnMCUGA1UEAxMeQ09NT0RPIENlcnRpZmljYXRpb24gQXV0 +aG9yaXR5MIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEA0ECLi3LjkRv3 +UcEbVASY06m/weaKXTuH+7uIzg3jLz8GlvCiKVCZrts7oVewdFFxze1CkU1B/qnI +2GqGd0S7WWaXUF601CxwRM/aN5VCaTwwxHGzUvAhTaHYujl8HJ6jJJ3ygxaYqhZ8 +Q5sVW7euNJH+1GImGEaaP+vB+fGQV+useg2L23IwambV4EajcNxo2f8ESIl33rXp ++2dtQem8Ob0y2WIC8bGoPW43nOIv4tOiJovGuFVDiOEjPqXSJDlqR6sA1KGzqSX+ +DT+nHbrTUcELpNqsOO9VUCQFZUaTNE8tja3G1CEZ0o7KBWFxB3NH5YoZEr0ETc5O +nKVIrLsm9wIDAQABo0IwQDAdBgNVHQ4EFgQUC1jli8ZMFTekQKkwqSG+RzZaVv8w +DgYDVR0PAQH/BAQDAgEGMA8GA1UdEwEB/wQFMAMBAf8wDQYJKoZIhvcNAQEFBQAD +ggEBAC/JxBwHO89hAgCx2SFRdXIDMLDEFh9sAIsQrK/xR9SuEDwMGvjUk2ysEDd8 +t6aDZK3N3w6HM503sMZ7OHKx8xoOo/lVem0DZgMXlUrxsXrfViEGQo+x06iF3u6X +HWLrp+cxEmbDD6ZLLkGC9/3JG6gbr+48zuOcrigHoSybJMIPIyaDMouGDx8rEkYl +Fo92kANr3ryqImhrjKGsKxE5pttwwn1y6TPn/CbxdFqR5p2ErPioBhlG5qfpqjQi +pKGfeq23sqSaM4hxAjwu1nqyH6LKwN0vEJT9s4yEIHlG1QXUEOTS22RPuFvuG8Ug +R1uUq27UlTMdphVx8fiUylQ5PsI= +-----END CERTIFICATE----- diff --git a/deps/openssl/openssl/test/ocsp-tests/ISIC_ND3_Issuer_Root.pem b/deps/openssl/openssl/test/ocsp-tests/ISIC_ND3_Issuer_Root.pem new file mode 100644 index 00000000000000..4904a3224a2d20 --- /dev/null +++ b/deps/openssl/openssl/test/ocsp-tests/ISIC_ND3_Issuer_Root.pem @@ -0,0 +1,25 @@ +-----BEGIN CERTIFICATE----- +MIIENjCCAx6gAwIBAgIBATANBgkqhkiG9w0BAQUFADBvMQswCQYDVQQGEwJTRTEU +MBIGA1UEChMLQWRkVHJ1c3QgQUIxJjAkBgNVBAsTHUFkZFRydXN0IEV4dGVybmFs +IFRUUCBOZXR3b3JrMSIwIAYDVQQDExlBZGRUcnVzdCBFeHRlcm5hbCBDQSBSb290 +MB4XDTAwMDUzMDEwNDgzOFoXDTIwMDUzMDEwNDgzOFowbzELMAkGA1UEBhMCU0Ux +FDASBgNVBAoTC0FkZFRydXN0IEFCMSYwJAYDVQQLEx1BZGRUcnVzdCBFeHRlcm5h +bCBUVFAgTmV0d29yazEiMCAGA1UEAxMZQWRkVHJ1c3QgRXh0ZXJuYWwgQ0EgUm9v +dDCCASIwDQYJKoZIhvcNAQEBBQADggEPADCCAQoCggEBALf3GjPm8gAELTngTlvt +H7xsD821+iO2zt6bETOXpClMfZOfvUq8k+0DGuOPz+VtUFrWlymUWoCwSXrbLpX9 +uMq/NzgtHj6RQa1wVsfwTz/oMp50ysiQVOnGXw94nZpAPA6sYapeFI+eh6FqUNzX +mk6vBbOmcZSccbNQYArHE504B4YCqOmoaSYYkKtMsE8jqzpPhNjfzp/haW+710LX +a0Tkx63ubUFfclpxCDezeWWkWaCUN/cALw3CknLa0Dhy2xSoRcRdKn23tNbE7qzN +E0S3ySvdQwAl+mG5aWpYIxG3pzOPVnVZ9c0p10a3CitlttNCbxWyuHv77+ldU9U0 +WicCAwEAAaOB3DCB2TAdBgNVHQ4EFgQUrb2YejS0Jvf6xCZU7wO94CTLVBowCwYD +VR0PBAQDAgEGMA8GA1UdEwEB/wQFMAMBAf8wgZkGA1UdIwSBkTCBjoAUrb2YejS0 +Jvf6xCZU7wO94CTLVBqhc6RxMG8xCzAJBgNVBAYTAlNFMRQwEgYDVQQKEwtBZGRU +cnVzdCBBQjEmMCQGA1UECxMdQWRkVHJ1c3QgRXh0ZXJuYWwgVFRQIE5ldHdvcmsx +IjAgBgNVBAMTGUFkZFRydXN0IEV4dGVybmFsIENBIFJvb3SCAQEwDQYJKoZIhvcN +AQEFBQADggEBALCb4IUlwtYj4g+WBpKdQZic2YR5gdkeWxQHIzZlj7DYd7usQWxH +YINRsPkyPef89iYTx4AWpb9a/IfPeHmJIZriTAcKhjW88t5RxNKWt9x+Tu5w/Rw5 +6wwCURQtjr0W4MHfRnXnJK3s9EK0hZNwEGe6nQY1ShjTK3rMUUKhemPR5ruhxSvC +Nr4TDea9Y355e6cJDUCrat2PisP29owaQgVR1EX1n6diIWgVIEM8med8vSTYqZEX +c4g/VhsxOBi0cQ+azcgOno4uG+GMmIPLHzHxREzGBHNJdmAPx/i9F4BrLunMTA5a +mnkPIAou1Z5jJh5VkpTYghdae9C8x49OhgU= +-----END CERTIFICATE----- diff --git a/deps/openssl/openssl/test/ocsp-tests/ISOP_D1.ors b/deps/openssl/openssl/test/ocsp-tests/ISOP_D1.ors new file mode 100644 index 00000000000000..d28c9ec3666965 --- /dev/null +++ b/deps/openssl/openssl/test/ocsp-tests/ISOP_D1.ors @@ -0,0 +1,32 @@ +MIIFzwoBAKCCBcgwggXEBgkrBgEFBQcwAQEEggW1MIIFsTCBoKIWBBSpTXftIZX0 +lLT9zwVSQC5Jfp3pqhgPMjAxMjEwMTAxMTU1NDVaMHUwczBLMAkGBSsOAwIaBQAE +FKByDqBqfGICVPKo9Z3Se6Tzty+kBBSwsEr9HHUo+BxhqhP2+sGQPWsWowISESG8 +vx4IzALnkqQG05AvM+2bgAAYDzIwMTIxMDEwMTAwMDAwWqARGA8yMDEyMTAxNzEw +MDAwMFowCwYJKoZIhvcNAQEFA4IBAQCaiUf6TuPaSmZR2i3hUwqdEfhjcZkcCXPu +9diWuDZbaL6ubthfeTwx6OsZ0eM3Q+WPhBNlYQ9Sm8PDUQsQiq3YvuYu+QUisChx +PN6BUEwFQZAGz+FX2h5+kAmK1M/xZeXMBCXJWJCClagiw5hOJfeV0ue7RUZRVuZv +am0ZjyIeLsxsIrxghlcaJRosFmYNoM++euu5lvclutv1UQ5yyNxlYy0T/jA9gS07 +WJ/i38+zxnXTuAPOm67p5N1IkEAEg/7OPRIG17Ig1C38NctN74vAOdTU1d/ay05V +Bz4ZiI9PffkUkPgW2QRQCEjv50i80wYkKH5pIbT/mTk4t53DUK1UoIID+DCCA/Qw +ggPwMIIC2KADAgECAhIRIcYjwu4UNkR1VGrDbSdFei8wDQYJKoZIhvcNAQEFBQAw +WTELMAkGA1UEBhMCQkUxGTAXBgNVBAoTEEdsb2JhbFNpZ24gbnYtc2ExLzAtBgNV +BAMTJkdsb2JhbFNpZ24gRXh0ZW5kZWQgVmFsaWRhdGlvbiBDQSAtIEcyMB4XDTEy +MDkxOTA3NDAzMVoXDTEyMTIxOTA4NDAzMVowgYUxCzAJBgNVBAYTAkJFMRkwFwYD +VQQKExBHbG9iYWxTaWduIG52LXNhMUIwQAYDVQQDEzlHbG9iYWxTaWduIEV4dGVu +ZGVkIFZhbGlkYXRpb24gQ0EgLSBHMiBPQ1NQIHJlc3BvbmRlciAtIDExFzAVBgNV +BAUTDjIwMTIwOTE5MDkzOTAwMIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKC +AQEAxkkb6QhDH3sEDj4zaysjVzYelq9lZ1cso4R2IyQxaoPaG6GkaCmHA4sz6KP+ +m3ADqplibEUBa/mzCxHW8/oy3NhGMFdbezduZrnRFLbzakOTeIo8VEIM3JPfgREv +CX8nj6Xu7ERD6JO/ZQ9Xr7YVzKKN+3cVZlcMHoGBnOPcO2Sz0AcYyk5m5IsGBRoT +T86j6Cr9PhOPTVwXL6Wxy1KVHsUZXUwnRacV0O4SHWQ4zM9Sablus9fTbh1CgIqW +sKDyzVB4yECXkBVeUlA+cuCaRRVHRiR+jPDSgbU62nnNudEpGG7dyoop6IOvXv2O +ydncWzaukxIVvQ/Ij85kHqs7HQIDAQABo4GEMIGBMAkGA1UdEwQCMAAwDgYDVR0P +AQH/BAQDAgeAMBMGA1UdJQQMMAoGCCsGAQUFBwMJMA8GCSsGAQUFBzABBQQCBQAw +HQYDVR0OBBYEFKlNd+0hlfSUtP3PBVJALkl+nemqMB8GA1UdIwQYMBaAFLCwSv0c +dSj4HGGqE/b6wZA9axajMA0GCSqGSIb3DQEBBQUAA4IBAQCe4rZg61Dmwygl/Uae +BJZog64/FvuB1sfCqKLJTjKOfLcugSTX1TT7bLJbzXRGPQuorI3TIZEOwldIw01d +DTLlsOCHrfHd+bpxgijxPkUuaA4NYnpvqTEMJqPKOC8QYfKupNjAPSuHvwqvqCfO +RCe3jY6xQDO0WCTZ8/xMsOkw+J/YEYqALETf2Ug7k5eRL/TvfLd8Sgi7vPfmUeiW +ptlsbhMOWQoQc+JA3vCI01rrjNq+0kIZ/r8nPGvablRr0Aakk6eDuS2dcReaPwuK +0xE136pJYiXdQ3SA7uwmlorjxmejavyoPCr23TU74DQEt6hhc6uIcabsa4Y8KvJy +RI4G diff --git a/deps/openssl/openssl/test/ocsp-tests/ISOP_D2.ors b/deps/openssl/openssl/test/ocsp-tests/ISOP_D2.ors new file mode 100644 index 00000000000000..b6c541df7dbc3e --- /dev/null +++ b/deps/openssl/openssl/test/ocsp-tests/ISOP_D2.ors @@ -0,0 +1,32 @@ +MIIF4AoBAKCCBdkwggXVBgkrBgEFBQcwAQEEggXGMIIFwjCBmaIWBBT0zghPr/K8 +jV5hpjGMML9Q+DwzShgPMjAxMjEwMTAxMjA5NTlaMG4wbDBEMAkGBSsOAwIaBQAE +FLdXtbacB/gWIxOOkMkqDr4yAaoxBBRge2YaRQ2XyolQL30EzTSo//z9SwILBAAA +AAABL07hRxCAABgPMjAxMjEwMDEwNjAwMDBaoBEYDzIwMTMwNDE1MDYwMDAwWjAL +BgkqhkiG9w0BAQUDggEBAGZY28eFWl169g7puLnKSeEzi6Ma5/rErOveFRp052ck +785B83HWkNmW/Bgw7Ws6Y7jBJce6ZQ5TMhwgNP34HuG/mVyn2ZjtCe4KKFBVnZV7 +mHGx93jgKkQvdp4pbNKxZ504eZDp8UOlR9+uwWOWHVObn7o+2N8iWKErSbZ2uX54 +Ajk8Hg/XN5wI4RUtcK3QpZSf3Ren5iit4NInwCpmTOkDz/IVK96BWaEQICq4VlHG +ziD0H0SlBQCdcSPzZndGoCtIhNyJEL3O2y3Grg4X1XH7VeeyGesuTLEIAEMHJPJD +TOVNoe5YPRK9Tqb+6jsubw8X/1b72kw3xVgb6MfC0tqgggQQMIIEDDCCBAgwggLw +oAMCAQICCwQAAAAAAThXoveHMA0GCSqGSIb3DQEBBQUAMFcxCzAJBgNVBAYTAkJF +MRkwFwYDVQQKExBHbG9iYWxTaWduIG52LXNhMRAwDgYDVQQLEwdSb290IENBMRsw +GQYDVQQDExJHbG9iYWxTaWduIFJvb3QgQ0EwHhcNMTIwNzA1MTgwMDAwWhcNMTMw +NzA1MTgwMDAwWjBZMQswCQYDVQQGEwJCRTEZMBcGA1UEChMQR2xvYmFsU2lnbiBu +di1zYTEvMC0GA1UEAxMmR2xvYmFsU2lnbiBPQ1NQIGZvciBSb290IFIxIC0gQnJh +bmNoIDIwggEiMA0GCSqGSIb3DQEBAQUAA4IBDwAwggEKAoIBAQDMQY/h5DSRT24n +mMtD19lrn8WZzOoIl+Z9qOsrLLjEQeTMDlL7JPZh5pLaHHb6kSWT+O/RcEwpw6Dq +H9jtAgDOsGoN7gCK7wJbIvn4MdmkXZqVBcVl3uLuII3v1CPnlc/zoz5d9qXcZKb6 +YuzseyzhDPecQ+7l2NVAUOFUj8GXOZi//bIveMsm+/zSLMfriIC84Uym2QY649SC +aFNbtF/tR6upvLCLe0b2D1g+OBfGqZasi3QI5uX6lT0gHbCnPhRo3uxG2+S4KL3M +9sndMByrR5K6QuVf7UqA1vt0CfbA2OUXwcH5x3/TsHxtXDj2F/fWnC9QBBSN5n4I +G8K7ZpYtAgMBAAGjgdIwgc8wDgYDVR0PAQH/BAQDAgeAMB0GA1UdDgQWBBT0zghP +r/K8jV5hpjGMML9Q+DwzSjBMBgNVHSAERTBDMEEGCSsGAQQBoDIBXzA0MDIGCCsG +AQUFBwIBFiZodHRwczovL3d3dy5nbG9iYWxzaWduLmNvbS9yZXBvc2l0b3J5LzAJ +BgNVHRMEAjAAMBMGA1UdJQQMMAoGCCsGAQUFBwMJMB8GA1UdIwQYMBaAFGB7ZhpF +DZfKiVAvfQTNNKj//P1LMA8GCSsGAQUFBzABBQQCBQAwDQYJKoZIhvcNAQEFBQAD +ggEBAGU9HIQImzhTHkQLyA178dUdnF5E3DdzmNtwVV3cxGrFOLMpciMQLioQ/xp5 +t6j5Mshlp59imFylqowRRxRy4aN5TtMCufNh7yHIxI2Dt4O6qpPM946t5CJkMy+k +63pXz2xFIxaJDzAmzpWzu70OY0jrh3dZa8NR4AvhtoZ8zFE6suva6ZGK7JIoINaA +j5uyZ0qU+7vFwV1awdReNV6494z/HRjs1n956mNbalB9mKp9XXyfZlix/nN5mTJd +NlJqz7QjnCzZRM/Gfamzk8L3/CPS3XmSblFyn6SeZ92Vms4PNqZiEUNa2TMKXQR1 +EMiDRMkyfIIMI80VgRvvzCiOt0c= diff --git a/deps/openssl/openssl/test/ocsp-tests/ISOP_D3.ors b/deps/openssl/openssl/test/ocsp-tests/ISOP_D3.ors new file mode 100644 index 00000000000000..a8a2f9122f4897 --- /dev/null +++ b/deps/openssl/openssl/test/ocsp-tests/ISOP_D3.ors @@ -0,0 +1,38 @@ +MIIG8AoBAKCCBukwggblBgkrBgEFBQcwAQEEggbWMIIG0jCB+aF+MHwxCzAJBgNV +BAYTAkFVMQwwCgYDVQQIEwNOU1cxDzANBgNVBAcTBlN5ZG5leTEUMBIGA1UEChML +Q0FjZXJ0IEluYy4xHjAcBgNVBAsTFVNlcnZlciBBZG1pbmlzdHJhdGlvbjEYMBYG +A1UEAxMPb2NzcC5jYWNlcnQub3JnGA8yMDEyMTAxMDEzMjE1OVowZjBkMDwwCQYF +Kw4DAhoFAAQUi6TJyxcpGUU+u45zCZG5JfKDImUEFBa1MhvUx/Pg5o7zvdKwOu6y +ORjRAgMLs8aAABgPMjAxMjEwMTAxMzA1MjBaoBEYDzIwMTIxMDEyMTMyMTU5WjAN +BgkqhkiG9w0BAQUFAAOCAQEAH1auyXFf1fOdfShSnAFkg5JsRUvajrilUioTkPIn +IGYV//huaPNZwZGCC2haZIdUuKB6G2OCXeZVskBTXPjt8/6JmoHgsZeI3x5xKXxZ +vddLC0PgYp0cA3FqjXR2UCpdBF+GK37rnfZsdW2vD9JaEBXxTV4+ICDAg15ZphJW +lLGmdP3mQqPURIwamcYam8tntARimgEpA0KgfVue2A+izjcxC7qk9BQYG72Fh3hC +ZFxi5u6xKNUQ2EBF9KXZyP9d2i/bYCZAUeUSRtir+fsOXHlihYRih9npKyAPwpHd +NqhwK9NhKed8gmkX3cSaK0arBx7ev7avhM4Dqem+BzppjKCCBL4wggS6MIIEtjCC +Ap6gAwIBAgIDCpvzMA0GCSqGSIb3DQEBBQUAMHkxEDAOBgNVBAoTB1Jvb3QgQ0Ex +HjAcBgNVBAsTFWh0dHA6Ly93d3cuY2FjZXJ0Lm9yZzEiMCAGA1UEAxMZQ0EgQ2Vy +dCBTaWduaW5nIEF1dGhvcml0eTEhMB8GCSqGSIb3DQEJARYSc3VwcG9ydEBjYWNl +cnQub3JnMB4XDTExMDgyMzAwMDI1NloXDTEzMDgyMjAwMDI1NlowfDELMAkGA1UE +BhMCQVUxDDAKBgNVBAgTA05TVzEPMA0GA1UEBxMGU3lkbmV5MRQwEgYDVQQKEwtD +QWNlcnQgSW5jLjEeMBwGA1UECxMVU2VydmVyIEFkbWluaXN0cmF0aW9uMRgwFgYD +VQQDEw9vY3NwLmNhY2VydC5vcmcwggEiMA0GCSqGSIb3DQEBAQUAA4IBDwAwggEK +AoIBAQCcxtRv5CPHw3BLdR/k/K72YsRgodbP+UdAONmvBvWzhwm6B8h6O+M64sFr +2w6be7SYBECIyOQgNJ1flK4MoAWhdBA/H5NtxaDOKbAqA27tO9GaevcPp7c518O0 +3hVnlPLvsN1f48nY0jQOXUTfv5nYXmD0OSSK/V3IRo0KsWB6T9UnMGCeEwb4Oqqz +uzM0b4SBflzMEony/m6Tg/qL7qs2TLZAqe77+BZaVdFkDUnaBN7RyMruXySxeXiz +mogT3WhROeloMa/X+E01bWBYBEK7VZIY9pgBpXQ7vDbbIGgYuIXUi20wh03WMy16 +VDYdV0IUXHpidNUeK9W/BPP/7APBAgMBAAGjRDBCMAwGA1UdEwEB/wQCMAAwJwYD +VR0lBCAwHgYIKwYBBQUHAwIGCCsGAQUFBwMBBggrBgEFBQcDCTAJBgNVHREEAjAA +MA0GCSqGSIb3DQEBBQUAA4ICAQAoT6p5f3cGprAcgrnzdenfTmDe9LCW7k2VnazA +MAzpsD6gXcSlo4+3hoHem/SpKRH2tqi34DmImCiv/S6fxsKM4Gfn5rlkAFviuTvS +r5Zrwh4ZKSfaoWv4bmbzmcAxvuxdMWHf/5PbjegjzFTbBMekVPZY/abYtD6kdHQZ +VNgzwZVfTBfYhfa+Rg72I2zjKpMsjxMqWfTmUzW6wfK6LFudZqu0U1NnJw+IlnVU +6WtjL885ebQrmcRqWz3nMhVLIu5L3w/s+VTLvm7If6jcMDNUjz8s2BPcJeCXg3TE +STsyl6tvk17RRz2+9JskxVOk11xIn96xR4FCERIid2ek9z1xi7oYOajQF50i/9Gj +ReDEfRSyb4/LzoKDOY+h4Q6jryeHh7WIHFiK5qrBN2y8qOoRJ/OqQnqci/BJBNpe +g9Q9PJRgGSzRndTXNHiYRbeLpq7eGo3sPqlR9qBQ3rd98XGOU0RCMnzjKhENC3qo +5PkSF2xs8RmjWktFSTDwjYo0qf1teo7CGHjgaPjQ7JE8Q4ysFOQndSWmLpqwDcI9 +HfIvPwUIWArQrJRh9LCNSyvHVgLqY9kw8NW4TlMxV2WqaYCkiKi3XVRrSFR3ahS1 +VBvRZ8KpplrV7rhXjVSSqqfLk1sX3l72Ck2F9ON+qbNFmvhgNjSiBY9neMgo804a +wG/pag== diff --git a/deps/openssl/openssl/test/ocsp-tests/ISOP_ND1.ors b/deps/openssl/openssl/test/ocsp-tests/ISOP_ND1.ors new file mode 100644 index 00000000000000..b2306224495367 --- /dev/null +++ b/deps/openssl/openssl/test/ocsp-tests/ISOP_ND1.ors @@ -0,0 +1,10 @@ +MIIB0woBAKCCAcwwggHIBgkrBgEFBQcwAQEEggG5MIIBtTCBnqIWBBSIRFH/UCpp +Xi2I9CG62QzyzsvqfBgPMjAxMjEwMTAwODU0NDVaMHMwcTBJMAkGBSsOAwIaBQAE +FEi2DTgjjfhFbk7lhD6jlBEYApefBBSIRFH/UCppXi2I9CG62QzyzsvqfAIQIuEz +IiCgSN8psr+aMcKbB4AAGA8yMDEyMTAxMDA4NTQ0NVqgERgPMjAxMjEwMTQwODU0 +NDVaMA0GCSqGSIb3DQEBBQUAA4IBAQDHKDxWTbAHRXY7HapfhE99T+OSa/AfRYqX +H9yIeMRa5VftXMyvBFuvVm/qLRwK6mxhkiVIvF/Pk5yxMjbm7xPO26D+WHOdQML4 ++M4OX9BO76FjZRin5x+4b0Xo5SuSU1ulqfvSZnx+nG+hMbt/3Y7ODCEUWCYFoXNp +U+TXTbv2mwJ9AL8Q/zjL4P8NJHzFJBKjEs+AAVRxTY/5RHHKU9dcm7ux/gsWoDUM +w677Xxzn6icd8mqn72/HmzPnMrLHKKJFe2escbJn7JlV6qbZ9EWbrr+3OH0IJy5I +E3LcPIsNZ//QEc6vS6J+j8ljV8Xne6rS1EmiOwV9NgubvYwDCm4R diff --git a/deps/openssl/openssl/test/ocsp-tests/ISOP_ND2.ors b/deps/openssl/openssl/test/ocsp-tests/ISOP_ND2.ors new file mode 100644 index 00000000000000..d14efb953286ac --- /dev/null +++ b/deps/openssl/openssl/test/ocsp-tests/ISOP_ND2.ors @@ -0,0 +1,10 @@ +MIIB0woBAKCCAcwwggHIBgkrBgEFBQcwAQEEggG5MIIBtTCBnqIWBBQLWOWLxkwV +N6RAqTCpIb5HNlpW/xgPMjAxMjEwMTAwMDI1NTdaMHMwcTBJMAkGBSsOAwIaBQAE +FOy+ZAvtiWulchtVZmfKU1ZI9ewTBBQLWOWLxkwVN6RAqTCpIb5HNlpW/wIQEaO0 +0OyNt3+doM1dLVEvQoAAGA8yMDEyMTAxMDAwMjU1N1qgERgPMjAxMjEwMTQwMDI1 +NTdaMA0GCSqGSIb3DQEBBQUAA4IBAQCJRXcrz4wJe7bqWBHULu/QDXVz74OhSNlu +swI0J4h+UmzJuW1GpdhTwJcTG3ARVwCLKz3evvpvHSumcsop0G3NolryNLP/oGD0 +Vf6PbLrJ8v+NxUNugPbtWM985Ti/B2a+XjbzYlH2vS3KOTL4X1zWSL07IQFNXc2h +yHBscKpYgt0mZcFZFxN3NTCNpT6IjJzZzTG9xTYZ3hZdMQQ3DYO+/Hv4J+U1/Ybq +CjuMWRak/0R/BiBDJdGhbThlvV7bNUxYY7DVaOiLER8ptpmhnzlB/vsTAxZqX48J +mJdv2bxoTby98Pm/BMydEA9qcFqyP1XvqhzIY35ngoS/1XREyW7t diff --git a/deps/openssl/openssl/test/ocsp-tests/ISOP_ND3.ors b/deps/openssl/openssl/test/ocsp-tests/ISOP_ND3.ors new file mode 100644 index 00000000000000..3ee7158cdcde57 --- /dev/null +++ b/deps/openssl/openssl/test/ocsp-tests/ISOP_ND3.ors @@ -0,0 +1,10 @@ +MIIB1AoBAKCCAc0wggHJBgkrBgEFBQcwAQEEggG6MIIBtjCBn6IWBBStvZh6NLQm +9/rEJlTvA73gJMtUGhgPMjAxMjEwMDkxNjAxNTNaMHQwcjBKMAkGBSsOAwIaBQAE +FHyxZlScq9tE7mImFq30ZXv3etWUBBStvZh6NLQm9/rEJlTvA73gJMtUGgIRAKcN +bJWejX5BTb8DmevkCauAABgPMjAxMjEwMDkxNjAxNTNaoBEYDzIwMTIxMDEzMTYw +MTUzWjANBgkqhkiG9w0BAQUFAAOCAQEAFnJAzuT8P4KKyTI6sdj5HkQ352qEu5CN +K9M2kU/eg9kPfwLv8z3yArobwgx+/IDRajbVAKrk8UPCGUqkDc0OiU5c0+jpn+nT +20VVCtWsBSWDfzKqYln/NGrblhv+/iuFZJpyfud5nWguW5nogPC8IAfgt9FMDMl6 +wlQWLSWEkgAJWvhNR3nzgvyMnuDuMIVQgB9/+vAIxA7nlpEEh6KTswyGqE9+u1yC +kvrz4PwKZQMT6r1eRCLs6NaagOZT84QHhZ6TAA+QHjfK406KL8F9mFgbGKbW+st2 +QHm+giUhrgZMv+1Yaxe34BjDS439LCPjdZ29On8FeZr3F55T+s3VzA== diff --git a/deps/openssl/openssl/test/ocsp-tests/ND1.ors b/deps/openssl/openssl/test/ocsp-tests/ND1.ors new file mode 100644 index 00000000000000..74527413f29014 --- /dev/null +++ b/deps/openssl/openssl/test/ocsp-tests/ND1.ors @@ -0,0 +1,10 @@ +MIIB0woBAKCCAcwwggHIBgkrBgEFBQcwAQEEggG5MIIBtTCBnqIWBBSIRFH/UCpp +Xi2I9CG62QzyzsvqfBgPMjAxMjEwMTEwODQxMTNaMHMwcTBJMAkGBSsOAwIaBQAE +FEi2DTgjjfhFbk7lhD6jlBEYApefBBSIRFH/UCppXi2I9CG62QzyzsvqfAIQIuEz +IiCgSN8psr+aMcKbB4AAGA8yMDEyMTAxMTA4NDExM1qgERgPMjAxMjEwMTUwODQx +MTNaMA0GCSqGSIb3DQEBBQUAA4IBAQCNnhlBMxxh9z5AKfzAxiKs90CfxUsqfYfk +8XlyF9VIfWRfEwzS6MF1pEzLnghRxTAmjrFgK+sxD9wk+S5Mdgw3nbED9DVFH2Hs +RGKm/t9wkvrYOX6yRQqw6uRvU/5cibMjcyzKB/VQMwk4p4FwSUgBv88A5sTkKr2V +eYdEm34hg2TZVkipPMBiyTyBLXs8D/9oALtnczg4xlTRSjDUvqoXL5haqY4QK2Pv +mNwna6ACkwLmSuMe29UQ8IX2PUB4R5Etni5czyiKGxZLm+4NAhuEwWFNEzCyImPc +087gHGU1zx+qVSlajqMJ/9ZXYjbt7WiWdhOTGEv4VMn8dHhRUs32 diff --git a/deps/openssl/openssl/test/ocsp-tests/ND1_Cert_EE.pem b/deps/openssl/openssl/test/ocsp-tests/ND1_Cert_EE.pem new file mode 100644 index 00000000000000..e646162440df77 --- /dev/null +++ b/deps/openssl/openssl/test/ocsp-tests/ND1_Cert_EE.pem @@ -0,0 +1,36 @@ +-----BEGIN CERTIFICATE----- +MIIGTTCCBTWgAwIBAgIQIuEzIiCgSN8psr+aMcKbBzANBgkqhkiG9w0BAQUFADCB +jjELMAkGA1UEBhMCR0IxGzAZBgNVBAgTEkdyZWF0ZXIgTWFuY2hlc3RlcjEQMA4G +A1UEBxMHU2FsZm9yZDEaMBgGA1UEChMRQ09NT0RPIENBIExpbWl0ZWQxNDAyBgNV +BAMTK0NPTU9ETyBFeHRlbmRlZCBWYWxpZGF0aW9uIFNlY3VyZSBTZXJ2ZXIgQ0Ew +HhcNMTEwMzMxMDAwMDAwWhcNMTMwNjI3MjM1OTU5WjCCAT8xETAPBgNVBAUTCDA0 +MDU4NjkwMRMwEQYLKwYBBAGCNzwCAQMTAkdCMR0wGwYDVQQPExRQcml2YXRlIE9y +Z2FuaXphdGlvbjELMAkGA1UEBhMCR0IxDzANBgNVBBETBk01IDNFUTEbMBkGA1UE +CBMSR3JlYXRlciBNYW5jaGVzdGVyMRAwDgYDVQQHEwdTYWxmb3JkMRYwFAYDVQQJ +Ew1UcmFmZm9yZCBSb2FkMRYwFAYDVQQJEw1FeGNoYW5nZSBRdWF5MSUwIwYDVQQJ +ExwzcmQgRmxvb3IsIDI2IE9mZmljZSBWaWxsYWdlMRowGAYDVQQKExFDT01PRE8g +Q0EgTGltaXRlZDEaMBgGA1UECxMRQ29tb2RvIEVWIFNHQyBTU0wxGjAYBgNVBAMT +EXNlY3VyZS5jb21vZG8uY29tMIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKC +AQEA168izw0zK6cChTGFuAwNARwTu1Ky/z+dXHkSmB0tQrAk3bq7mnUPtmQ+td8r +G2hlhQPd+YXQVYEW3RuopydmdB9wMlEGCCfU2ZqohsC9uut+HenCVbYvn4sSB0KJ +VdOXLPCEnfdk/FmcNWcYv73HmoJXZjT0THNQmnfpo6mMGAOerenMgNuCpq1buZ8c +fFUeUY18ZGLZKZyRNM6GPgVA37Dm8Ru+9Cf8/rm7NSIoVWH4BDztM3Y1BZvZ0d4G +49jRA4MXbhsDEMYzaSCDmaRHSFhCtrGkN2S4A1ZxoSoxQVCLcnnInVd+J0X8J6pa +Efio/aD6UQBQq29HyTsWVe6BewIDAQABo4IB8TCCAe0wHwYDVR0jBBgwFoAUiERR +/1AqaV4tiPQhutkM8s7L6nwwHQYDVR0OBBYEFKvAXKp4bYRmxU4SlM8k8FbWiXiL +MA4GA1UdDwEB/wQEAwIFoDAMBgNVHRMBAf8EAjAAMDQGA1UdJQQtMCsGCCsGAQUF +BwMBBggrBgEFBQcDAgYKKwYBBAGCNwoDAwYJYIZIAYb4QgQBMEYGA1UdIAQ/MD0w +OwYMKwYBBAGyMQECAQUBMCswKQYIKwYBBQUHAgEWHWh0dHBzOi8vc2VjdXJlLmNv +bW9kby5jb20vQ1BTMFMGA1UdHwRMMEowSKBGoESGQmh0dHA6Ly9jcmwuY29tb2Rv +Y2EuY29tL0NPTU9ET0V4dGVuZGVkVmFsaWRhdGlvblNlY3VyZVNlcnZlckNBLmNy +bDCBhAYIKwYBBQUHAQEEeDB2ME4GCCsGAQUFBzAChkJodHRwOi8vY3J0LmNvbW9k +b2NhLmNvbS9DT01PRE9FeHRlbmRlZFZhbGlkYXRpb25TZWN1cmVTZXJ2ZXJDQS5j +cnQwJAYIKwYBBQUHMAGGGGh0dHA6Ly9vY3NwLmNvbW9kb2NhLmNvbTAzBgNVHREE +LDAqghFzZWN1cmUuY29tb2RvLmNvbYIVd3d3LnNlY3VyZS5jb21vZG8uY29tMA0G +CSqGSIb3DQEBBQUAA4IBAQC9SoVG+B40khDWAzlz+G0WDBM3OuqK5n8vY/XxdPS5 +qyv6K05S4VRGR/6PQa1UVzMbnhfLh54OWrpnalRGabpTmKDu8Pa912pzDSzMxg4U +Rff4/hVLd1n/58q+riLxdtkIigLUjtFfwUrE1H89QODOCb4nw7f9BQaDoug+ovM3 +KO9rxVZ/3TshaxW0mPVM/cMbX+6RrQ7+d1y5fdX/fksCZhOW+P25+FPlaorQEWNa +s0UZNQ6qVuxB7CPmnLqmLBfAKTbeKcQFxx//0eyyZqCkzIvYUNjeRR0Q7DnxXq4C +Pj1Y6VcPJDmZOeogte5/vNIdU8Wq55IJJ1G/uKXztwVT +-----END CERTIFICATE----- diff --git a/deps/openssl/openssl/test/ocsp-tests/ND1_Issuer_ICA.pem b/deps/openssl/openssl/test/ocsp-tests/ND1_Issuer_ICA.pem new file mode 100644 index 00000000000000..3260db30014e9d --- /dev/null +++ b/deps/openssl/openssl/test/ocsp-tests/ND1_Issuer_ICA.pem @@ -0,0 +1,29 @@ +-----BEGIN CERTIFICATE----- +MIIFBjCCA+6gAwIBAgIQEaO00OyNt3+doM1dLVEvQjANBgkqhkiG9w0BAQUFADCB +gTELMAkGA1UEBhMCR0IxGzAZBgNVBAgTEkdyZWF0ZXIgTWFuY2hlc3RlcjEQMA4G +A1UEBxMHU2FsZm9yZDEaMBgGA1UEChMRQ09NT0RPIENBIExpbWl0ZWQxJzAlBgNV +BAMTHkNPTU9ETyBDZXJ0aWZpY2F0aW9uIEF1dGhvcml0eTAeFw0xMDA1MjQwMDAw +MDBaFw0yMDA1MzAxMDQ4MzhaMIGOMQswCQYDVQQGEwJHQjEbMBkGA1UECBMSR3Jl +YXRlciBNYW5jaGVzdGVyMRAwDgYDVQQHEwdTYWxmb3JkMRowGAYDVQQKExFDT01P +RE8gQ0EgTGltaXRlZDE0MDIGA1UEAxMrQ09NT0RPIEV4dGVuZGVkIFZhbGlkYXRp +b24gU2VjdXJlIFNlcnZlciBDQTCCASIwDQYJKoZIhvcNAQEBBQADggEPADCCAQoC +ggEBAMxKljPNJY1n7iiWN4dG8PYEooR/U6qW5h+xAhxu7X0h1Nc8HqLYaS+ot/Wi +7WRYZOFEZTZJQSABjTsT4gjzDPJXOZM3txyTRIOOvy3xoQV12m7ue28b6naDKHRK +HCvT9cQDcpOvhs4JjDx11MkKL3Lzrb0OMDyEoXMfAyUUpY/D1vS15N2GevUZumjy +hVSiMBHK0ZLLO3QGEqA3q2rYVBHfbJoWlLm0p2XGdC0x801S6VVRn8s+oo12mHDS +b6ZlRS8bhbtbbfnywARmE4R6nc4n2PREnr+svpnba0/bWCGwiSe0jzLWS15ykV7f +BZ3ZSS/0tm9QH3XLgJ3m0+TR8tMCAwEAAaOCAWkwggFlMB8GA1UdIwQYMBaAFAtY +5YvGTBU3pECpMKkhvkc2Wlb/MB0GA1UdDgQWBBSIRFH/UCppXi2I9CG62Qzyzsvq +fDAOBgNVHQ8BAf8EBAMCAQYwEgYDVR0TAQH/BAgwBgEB/wIBADA+BgNVHSAENzA1 +MDMGBFUdIAAwKzApBggrBgEFBQcCARYdaHR0cHM6Ly9zZWN1cmUuY29tb2RvLmNv +bS9DUFMwSQYDVR0fBEIwQDA+oDygOoY4aHR0cDovL2NybC5jb21vZG9jYS5jb20v +Q09NT0RPQ2VydGlmaWNhdGlvbkF1dGhvcml0eS5jcmwwdAYIKwYBBQUHAQEEaDBm +MD4GCCsGAQUFBzAChjJodHRwOi8vY3J0LmNvbW9kb2NhLmNvbS9DT01PRE9BZGRU +cnVzdFNlcnZlckNBLmNydDAkBggrBgEFBQcwAYYYaHR0cDovL29jc3AuY29tb2Rv +Y2EuY29tMA0GCSqGSIb3DQEBBQUAA4IBAQCaQ7+vpHJezX1vf/T8PYy7cOYe3QT9 +P9ydn7+JdpvyhjH8f7PtKpFTLOKqsOPILHH3FYojHPFpLoH7sbxiC6saVBzZIl40 +TKX2Iw9dej3bQ81pfhc3Us1TocIR1FN4J2TViUFNFlW7kMvw2OTd3dMJZEgo/zIj +hC+Me1UvzymINzR4DzOq/7fylqSbRIC1vmxWVKukgZ4lGChUOn8sY89ZIIwYazgs +tN3t40DeDDYlV5rA0WCeXgNol64aO+pF11GZSe5EWVYLXrGPaOqKnsrSyaADfnAl +9DLJTlCDh6I0SD1PNXf82Ijq9n0ezkO21cJqfjhmY03n7jLvDyToKmf6 +-----END CERTIFICATE----- diff --git a/deps/openssl/openssl/test/ocsp-tests/ND2.ors b/deps/openssl/openssl/test/ocsp-tests/ND2.ors new file mode 100644 index 00000000000000..24c1cb2da28ff6 --- /dev/null +++ b/deps/openssl/openssl/test/ocsp-tests/ND2.ors @@ -0,0 +1,10 @@ +MIIB0woBAKCCAcwwggHIBgkrBgEFBQcwAQEEggG5MIIBtTCBnqIWBBQLWOWLxkwV +N6RAqTCpIb5HNlpW/xgPMjAxMjEwMTAyMzAzMTlaMHMwcTBJMAkGBSsOAwIaBQAE +FOy+ZAvtiWulchtVZmfKU1ZI9ewTBBQLWOWLxkwVN6RAqTCpIb5HNlpW/wIQEaO0 +0OyNt3+doM1dLVEvQoAAGA8yMDEyMTAxMDIzMDMxOVqgERgPMjAxMjEwMTQyMzAz +MTlaMA0GCSqGSIb3DQEBBQUAA4IBAQCHn2nGfEUX/EJruMkTgh7GgB0u9cpAepaD +sPv9gtl3KLUZyR+NbGMIa5/bpoJp0yg1z5VL6CLMusy3AF6Cn2fyaioDxG+yc+gA +PcPFdEqiIMr+TP8s7qcEiE6WZddSSCqCn90VZSCWkpDhnCjDRwJLBBPU3803fdMz +oguvyr7y6Koxik8X/iUe8EpSzAvmm4GZL3veTI+x7IezJSrhCS9zM0ZHjySjoDxC ++ljGH0EuWPTmFEqZVGIq3cuahIYzKItUbYnXU6ipi/2p42qbsFeok7eEN0EYsY1a +vRATHGRmU7Q5HLCq4rQtZC1cis52Mvc9x1W4z/Gt5A3FtgElXXNA diff --git a/deps/openssl/openssl/test/ocsp-tests/ND2_Cert_ICA.pem b/deps/openssl/openssl/test/ocsp-tests/ND2_Cert_ICA.pem new file mode 100644 index 00000000000000..3260db30014e9d --- /dev/null +++ b/deps/openssl/openssl/test/ocsp-tests/ND2_Cert_ICA.pem @@ -0,0 +1,29 @@ +-----BEGIN CERTIFICATE----- +MIIFBjCCA+6gAwIBAgIQEaO00OyNt3+doM1dLVEvQjANBgkqhkiG9w0BAQUFADCB +gTELMAkGA1UEBhMCR0IxGzAZBgNVBAgTEkdyZWF0ZXIgTWFuY2hlc3RlcjEQMA4G +A1UEBxMHU2FsZm9yZDEaMBgGA1UEChMRQ09NT0RPIENBIExpbWl0ZWQxJzAlBgNV +BAMTHkNPTU9ETyBDZXJ0aWZpY2F0aW9uIEF1dGhvcml0eTAeFw0xMDA1MjQwMDAw +MDBaFw0yMDA1MzAxMDQ4MzhaMIGOMQswCQYDVQQGEwJHQjEbMBkGA1UECBMSR3Jl +YXRlciBNYW5jaGVzdGVyMRAwDgYDVQQHEwdTYWxmb3JkMRowGAYDVQQKExFDT01P +RE8gQ0EgTGltaXRlZDE0MDIGA1UEAxMrQ09NT0RPIEV4dGVuZGVkIFZhbGlkYXRp +b24gU2VjdXJlIFNlcnZlciBDQTCCASIwDQYJKoZIhvcNAQEBBQADggEPADCCAQoC +ggEBAMxKljPNJY1n7iiWN4dG8PYEooR/U6qW5h+xAhxu7X0h1Nc8HqLYaS+ot/Wi +7WRYZOFEZTZJQSABjTsT4gjzDPJXOZM3txyTRIOOvy3xoQV12m7ue28b6naDKHRK +HCvT9cQDcpOvhs4JjDx11MkKL3Lzrb0OMDyEoXMfAyUUpY/D1vS15N2GevUZumjy +hVSiMBHK0ZLLO3QGEqA3q2rYVBHfbJoWlLm0p2XGdC0x801S6VVRn8s+oo12mHDS +b6ZlRS8bhbtbbfnywARmE4R6nc4n2PREnr+svpnba0/bWCGwiSe0jzLWS15ykV7f +BZ3ZSS/0tm9QH3XLgJ3m0+TR8tMCAwEAAaOCAWkwggFlMB8GA1UdIwQYMBaAFAtY +5YvGTBU3pECpMKkhvkc2Wlb/MB0GA1UdDgQWBBSIRFH/UCppXi2I9CG62Qzyzsvq +fDAOBgNVHQ8BAf8EBAMCAQYwEgYDVR0TAQH/BAgwBgEB/wIBADA+BgNVHSAENzA1 +MDMGBFUdIAAwKzApBggrBgEFBQcCARYdaHR0cHM6Ly9zZWN1cmUuY29tb2RvLmNv +bS9DUFMwSQYDVR0fBEIwQDA+oDygOoY4aHR0cDovL2NybC5jb21vZG9jYS5jb20v +Q09NT0RPQ2VydGlmaWNhdGlvbkF1dGhvcml0eS5jcmwwdAYIKwYBBQUHAQEEaDBm +MD4GCCsGAQUFBzAChjJodHRwOi8vY3J0LmNvbW9kb2NhLmNvbS9DT01PRE9BZGRU +cnVzdFNlcnZlckNBLmNydDAkBggrBgEFBQcwAYYYaHR0cDovL29jc3AuY29tb2Rv +Y2EuY29tMA0GCSqGSIb3DQEBBQUAA4IBAQCaQ7+vpHJezX1vf/T8PYy7cOYe3QT9 +P9ydn7+JdpvyhjH8f7PtKpFTLOKqsOPILHH3FYojHPFpLoH7sbxiC6saVBzZIl40 +TKX2Iw9dej3bQ81pfhc3Us1TocIR1FN4J2TViUFNFlW7kMvw2OTd3dMJZEgo/zIj +hC+Me1UvzymINzR4DzOq/7fylqSbRIC1vmxWVKukgZ4lGChUOn8sY89ZIIwYazgs +tN3t40DeDDYlV5rA0WCeXgNol64aO+pF11GZSe5EWVYLXrGPaOqKnsrSyaADfnAl +9DLJTlCDh6I0SD1PNXf82Ijq9n0ezkO21cJqfjhmY03n7jLvDyToKmf6 +-----END CERTIFICATE----- diff --git a/deps/openssl/openssl/test/ocsp-tests/ND2_Issuer_Root.pem b/deps/openssl/openssl/test/ocsp-tests/ND2_Issuer_Root.pem new file mode 100644 index 00000000000000..dc99810098d77b --- /dev/null +++ b/deps/openssl/openssl/test/ocsp-tests/ND2_Issuer_Root.pem @@ -0,0 +1,23 @@ +-----BEGIN CERTIFICATE----- +MIID0DCCArigAwIBAgIQIKTEf93f4cdTYwcTiHdgEjANBgkqhkiG9w0BAQUFADCB +gTELMAkGA1UEBhMCR0IxGzAZBgNVBAgTEkdyZWF0ZXIgTWFuY2hlc3RlcjEQMA4G +A1UEBxMHU2FsZm9yZDEaMBgGA1UEChMRQ09NT0RPIENBIExpbWl0ZWQxJzAlBgNV +BAMTHkNPTU9ETyBDZXJ0aWZpY2F0aW9uIEF1dGhvcml0eTAeFw0xMTAxMDEwMDAw +MDBaFw0zMDEyMzEyMzU5NTlaMIGBMQswCQYDVQQGEwJHQjEbMBkGA1UECBMSR3Jl +YXRlciBNYW5jaGVzdGVyMRAwDgYDVQQHEwdTYWxmb3JkMRowGAYDVQQKExFDT01P +RE8gQ0EgTGltaXRlZDEnMCUGA1UEAxMeQ09NT0RPIENlcnRpZmljYXRpb24gQXV0 +aG9yaXR5MIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEA0ECLi3LjkRv3 +UcEbVASY06m/weaKXTuH+7uIzg3jLz8GlvCiKVCZrts7oVewdFFxze1CkU1B/qnI +2GqGd0S7WWaXUF601CxwRM/aN5VCaTwwxHGzUvAhTaHYujl8HJ6jJJ3ygxaYqhZ8 +Q5sVW7euNJH+1GImGEaaP+vB+fGQV+useg2L23IwambV4EajcNxo2f8ESIl33rXp ++2dtQem8Ob0y2WIC8bGoPW43nOIv4tOiJovGuFVDiOEjPqXSJDlqR6sA1KGzqSX+ +DT+nHbrTUcELpNqsOO9VUCQFZUaTNE8tja3G1CEZ0o7KBWFxB3NH5YoZEr0ETc5O +nKVIrLsm9wIDAQABo0IwQDAdBgNVHQ4EFgQUC1jli8ZMFTekQKkwqSG+RzZaVv8w +DgYDVR0PAQH/BAQDAgEGMA8GA1UdEwEB/wQFMAMBAf8wDQYJKoZIhvcNAQEFBQAD +ggEBAC/JxBwHO89hAgCx2SFRdXIDMLDEFh9sAIsQrK/xR9SuEDwMGvjUk2ysEDd8 +t6aDZK3N3w6HM503sMZ7OHKx8xoOo/lVem0DZgMXlUrxsXrfViEGQo+x06iF3u6X +HWLrp+cxEmbDD6ZLLkGC9/3JG6gbr+48zuOcrigHoSybJMIPIyaDMouGDx8rEkYl +Fo92kANr3ryqImhrjKGsKxE5pttwwn1y6TPn/CbxdFqR5p2ErPioBhlG5qfpqjQi +pKGfeq23sqSaM4hxAjwu1nqyH6LKwN0vEJT9s4yEIHlG1QXUEOTS22RPuFvuG8Ug +R1uUq27UlTMdphVx8fiUylQ5PsE= +-----END CERTIFICATE----- diff --git a/deps/openssl/openssl/test/ocsp-tests/ND3.ors b/deps/openssl/openssl/test/ocsp-tests/ND3.ors new file mode 100644 index 00000000000000..20084180c5a12c --- /dev/null +++ b/deps/openssl/openssl/test/ocsp-tests/ND3.ors @@ -0,0 +1,10 @@ +MIIB1AoBAKCCAc0wggHJBgkrBgEFBQcwAQEEggG6MIIBtjCBn6IWBBStvZh6NLQm +9/rEJlTvA73gJMtUGhgPMjAxMjEwMTExMTM2NDdaMHQwcjBKMAkGBSsOAwIaBQAE +FHyxZlScq9tE7mImFq30ZXv3etWUBBStvZh6NLQm9/rEJlTvA73gJMtUGgIRAKcN +bJWejX5BTb8DmevkCauAABgPMjAxMjEwMTExMTM2NDdaoBEYDzIwMTIxMDE1MTEz +NjQ3WjANBgkqhkiG9w0BAQUFAAOCAQEAfnj3nh6z+USW6VlDWRytWpNmC1ZRwWlg +P2+G4UF4HE8bMJkuiFLcZEVYTxlTYv+xAEpSFxdInFM2Q5C+O6pWOZ9NbikeR4oZ +FTI1kAZ0Uw+YMpVM4ztvKBIpUSqlbi69iNJ9WGF6qzxVeqobSOyrjjwtTsuglUbR ++mshp/SP7Br2IIK+KM1vgsmVExPfGPYANyk7ki/Q8uUnjqkreeSa9WC2iJLGcybW +YavDhYWALebUGukNeedkloYhdjPboPPxDkKNjakwIG8EkbJK7uXewMOHHOFvFTX3 +K388me8u5iQf4f3fj6ilEgs6f5Szzmb+vklPX0zIny/TVk2+Az7HmA== diff --git a/deps/openssl/openssl/test/ocsp-tests/ND3_Cert_EE.pem b/deps/openssl/openssl/test/ocsp-tests/ND3_Cert_EE.pem new file mode 100644 index 00000000000000..c6a76d661e4bbf --- /dev/null +++ b/deps/openssl/openssl/test/ocsp-tests/ND3_Cert_EE.pem @@ -0,0 +1,34 @@ +-----BEGIN CERTIFICATE----- +MIIF3TCCBMWgAwIBAgIRAKcNbJWejX5BTb8DmevkCaswDQYJKoZIhvcNAQEFBQAw +bzELMAkGA1UEBhMCU0UxFDASBgNVBAoTC0FkZFRydXN0IEFCMSYwJAYDVQQLEx1B +ZGRUcnVzdCBFeHRlcm5hbCBUVFAgTmV0d29yazEiMCAGA1UEAxMZQWRkVHJ1c3Qg +RXh0ZXJuYWwgQ0EgUm9vdDAeFw0xMDA1MDQwMDAwMDBaFw0xNTA1MDQyMzU5NTla +MIIBCjELMAkGA1UEBhMCR0IxDzANBgNVBBETBk01IDNFUTEbMBkGA1UECBMSR3Jl +YXRlciBNYW5jaGVzdGVyMRAwDgYDVQQHEwdTYWxmb3JkMRYwFAYDVQQJEw1UcmFm +Zm9yZCBSb2FkMRYwFAYDVQQJEw1FeGNoYW5nZSBRdWF5MSUwIwYDVQQJExwzcmQg +Rmxvb3IsIDI2IE9mZmljZSBWaWxsYWdlMRowGAYDVQQKExFDT01PRE8gQ0EgTGlt +aXRlZDEaMBgGA1UECxMRQ29tb2RvIFByZW1pdW1TU0wxLDAqBgNVBAMTI2FkZHRy +dXN0ZXh0ZXJuYWxjYXJvb3QuY29tb2RvY2EuY29tMIIBIjANBgkqhkiG9w0BAQEF +AAOCAQ8AMIIBCgKCAQEAz5MM/mco91yFJNtF3t9c0x/bGds+zGAqJlHBXCR43og+ +3vgsBkCcn5M3PAqmL6XxilpsrEfS6RqtNcLfxwDyl7rr3qpJSM537Km1ZGOTHs0C +i0JA4YBZFOxBwPO2nHQGD+t9kJx3auFdBLnjJc5Q3jFUmnyJ8D2h3P9BrHgOoIbO +KYOUc/3zcqE6NttdbiuUMzlad8guhnXlWPCh2NJtNtMLDQxG7DWWDEm/Kt+CdKAR +jko6kEp7nqBKyujjJoGD2nEtEnuuqiB9n6sgSXR1NGtecJrW8IqIS7hkcsxhGTI9 +jnY73+NiMV3nglejkNseTUdcEi6L94EdifXuVLgEAwIDAQABo4IB1TCCAdEwHwYD +VR0jBBgwFoAUrb2YejS0Jvf6xCZU7wO94CTLVBowHQYDVR0OBBYEFDXpt6NocCrd +7XZ2MLUa116TIesKMA4GA1UdDwEB/wQEAwIFoDAMBgNVHRMBAf8EAjAAMB0GA1Ud +JQQWMBQGCCsGAQUFBwMBBggrBgEFBQcDAjBGBgNVHSAEPzA9MDsGDCsGAQQBsjEB +AgEDBDArMCkGCCsGAQUFBwIBFh1odHRwczovL3NlY3VyZS5jb21vZG8ubmV0L0NQ +UzB7BgNVHR8EdDByMDigNqA0hjJodHRwOi8vY3JsLmNvbW9kb2NhLmNvbS9BZGRU +cnVzdEV4dGVybmFsQ0FSb290LmNybDA2oDSgMoYwaHR0cDovL2NybC5jb21vZG8u +bmV0L0FkZFRydXN0RXh0ZXJuYWxDQVJvb3QuY3JsMDQGCCsGAQUFBwEBBCgwJjAk +BggrBgEFBQcwAYYYaHR0cDovL29jc3AuY29tb2RvY2EuY29tMFcGA1UdEQRQME6C +I2FkZHRydXN0ZXh0ZXJuYWxjYXJvb3QuY29tb2RvY2EuY29tgid3d3cuYWRkdHJ1 +c3RleHRlcm5hbGNhcm9vdC5jb21vZG9jYS5jb20wDQYJKoZIhvcNAQEFBQADggEB +AF2TF6xg8ZoBICoiQvjD2Z0SKcJRw1Dhj3HpGzV9F+Y0e/MxCXhYA+340JZxnC2P +VA968QKFrNwDWiS9Klc+cs4k3HIeiZp3uHw1ezElqXXNa+S1CrSS03FqWeeugSrB +xpuXCWDJSfD4DJq835hlEuXgxmAjsbuRUjaq1lxwSWnNoBkfMCCAgVlHtFljTlqq +nwfBZcnj73+yiERgTvhN4gEL59ZzjFliKEUuXHZoe8klhn73cnY+XoRV0e7wU+Xj +PzLoAhjGkS35hfDQTHdCwNBaN3iI2Q+HBjhfffAYFdK+Jo3kSXq12s7CJD7utAho +xxRhA0l1ziJgrEubLi6ItNg= +-----END CERTIFICATE----- diff --git a/deps/openssl/openssl/test/ocsp-tests/ND3_Issuer_Root.pem b/deps/openssl/openssl/test/ocsp-tests/ND3_Issuer_Root.pem new file mode 100644 index 00000000000000..20585f1c01e187 --- /dev/null +++ b/deps/openssl/openssl/test/ocsp-tests/ND3_Issuer_Root.pem @@ -0,0 +1,25 @@ +-----BEGIN CERTIFICATE----- +MIIENjCCAx6gAwIBAgIBATANBgkqhkiG9w0BAQUFADBvMQswCQYDVQQGEwJTRTEU +MBIGA1UEChMLQWRkVHJ1c3QgQUIxJjAkBgNVBAsTHUFkZFRydXN0IEV4dGVybmFs +IFRUUCBOZXR3b3JrMSIwIAYDVQQDExlBZGRUcnVzdCBFeHRlcm5hbCBDQSBSb290 +MB4XDTAwMDUzMDEwNDgzOFoXDTIwMDUzMDEwNDgzOFowbzELMAkGA1UEBhMCU0Ux +FDASBgNVBAoTC0FkZFRydXN0IEFCMSYwJAYDVQQLEx1BZGRUcnVzdCBFeHRlcm5h +bCBUVFAgTmV0d29yazEiMCAGA1UEAxMZQWRkVHJ1c3QgRXh0ZXJuYWwgQ0EgUm9v +dDCCASIwDQYJKoZIhvcNAQEBBQADggEPADCCAQoCggEBALf3GjPm8gAELTngTlvt +H7xsD821+iO2zt6bETOXpClMfZOfvUq8k+0DGuOPz+VtUFrWlymUWoCwSXrbLpX9 +uMq/NzgtHj6RQa1wVsfwTz/oMp50ysiQVOnGXw94nZpAPA6sYapeFI+eh6FqUNzX +mk6vBbOmcZSccbNQYArHE504B4YCqOmoaSYYkKtMsE8jqzpPhNjfzp/haW+710LX +a0Tkx63ubUFfclpxCDezeWWkWaCUN/cALw3CknLa0Dhy2xSoRcRdKn23tNbE7qzN +E0S3ySvdQwAl+mG5aWpYIxG3pzOPVnVZ9c0p10a3CitlttNCbxWyuHv77+ldU9U0 +WicCAwEAAaOB3DCB2TAdBgNVHQ4EFgQUrb2YejS0Jvf6xCZU7wO94CTLVBowCwYD +VR0PBAQDAgEGMA8GA1UdEwEB/wQFMAMBAf8wgZkGA1UdIwSBkTCBjoAUrb2YejS0 +Jvf6xCZU7wO94CTLVBqhc6RxMG8xCzAJBgNVBAYTAlNFMRQwEgYDVQQKEwtBZGRU +cnVzdCBBQjEmMCQGA1UECxMdQWRkVHJ1c3QgRXh0ZXJuYWwgVFRQIE5ldHdvcmsx +IjAgBgNVBAMTGUFkZFRydXN0IEV4dGVybmFsIENBIFJvb3SCAQEwDQYJKoZIhvcN +AQEFBQADggEBALCb4IUlwtYj4g+WBpKdQZic2YR5gdkeWxQHIzZlj7DYd7usQWxH +YINRsPkyPef89iYTx4AWpb9a/IfPeHmJIZriTAcKhjW88t5RxNKWt9x+Tu5w/Rw5 +6wwCURQtjr0W4MHfRnXnJK3s9EK0hZNwEGe6nQY1ShjTK3rMUUKhemPR5ruhxSvC +Nr4TDea9Y355e6cJDUCrat2PisP29owaQgVR1EX1n6diIWgVIEM8med8vSTYqZEX +c4g/VhsxOBi0cQ+azcgOno4uG+GMmIPLHzHxREzGBHNJdmAPx/i9F4BrLunMTA5a +mnkPIAou1Z5jJh5VkpTYghdae9C8x49OhgQ= +-----END CERTIFICATE----- diff --git a/deps/openssl/openssl/test/ocsp-tests/WIKH_D1.ors b/deps/openssl/openssl/test/ocsp-tests/WIKH_D1.ors new file mode 100644 index 00000000000000..c8ce8d87019abd --- /dev/null +++ b/deps/openssl/openssl/test/ocsp-tests/WIKH_D1.ors @@ -0,0 +1,32 @@ +MIIFzwoBAKCCBcgwggXEBgkrBgEFBQcwAQEEggW1MIIFsTCBoKIWBBRf2uQDFpGg +Ywh4P1y2H9bZ2/BQNBgPMjAxMjEwMTExMzI5NDJaMHUwczBLMAkGBSsOAwIaBQAE +FKByDqBqfGICVPKo9Z3Se6Tzty+kBBSxsEr9HHUo+BxhqhP2+sGQPWsWowISESG8 +vx4IzALnkqQG05AvM+2bgAAYDzIwMTIxMDExMTAwMDAwWqARGA8yMDEyMTAxODEw +MDAwMFowCwYJKoZIhvcNAQEFA4IBAQCX3gEX+JVfxuYmxBBxC9sNCi3o76ODIicr +XMvm0DTO9VSyDBl7LDsMMgNMIDtO3flQSlBNZ2B9ikwyckXOSWXiXzybZVMdA/uq +NchgkM9aChrlhG0AHZyYe/+dJSmEBFXkIomy+S6YQ7Mcs2s6WxCeWU7gB4XOy1zO +/CvWjv0WQV1J2lZZ6pkvtECKAEjrVP275LA38HInFbYvVPXWzl4sDcX2TAxwUa4S +xAJAfwl+B+oZSerZWGRo6KjZuB/OB31cB5n/lABmRez6Obi27D0UUCRv/eSbwOF4 +Ofaa/XzJt7sF7WpVgoR41HI88W7aN4vtcw1zcVsBmfRMUNYZSqtfoIID+DCCA/Qw +ggPwMIIC2KADAgECAhIRISdENsrz1CSWG3VIBwfQERQwDQYJKoZIhvcNAQEFBQAw +WTELMAkGA1UEBhMCQkUxGTAXBgNVBAoTEEdsb2JhbFNpZ24gbnYtc2ExLzAtBgNV +BAMTJkdsb2JhbFNpZ24gRXh0ZW5kZWQgVmFsaWRhdGlvbiBDQSAtIEcyMB4XDTEy +MDkxOTA3NDA1MFoXDTEyMTIxOTA4NDA1MFowgYUxCzAJBgNVBAYTAkJFMRkwFwYD +VQQKExBHbG9iYWxTaWduIG52LXNhMUIwQAYDVQQDEzlHbG9iYWxTaWduIEV4dGVu +ZGVkIFZhbGlkYXRpb24gQ0EgLSBHMiBPQ1NQIHJlc3BvbmRlciAtIDIxFzAVBgNV +BAUTDjIwMTIwOTE5MDk0MDAwMIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKC +AQEAnCgMsBO+IxIqCnXCOfXJoIC3wj+f0s4DV9h2gJBzisWXkaJD2DfNrd0kHUXK +qVVPUxnA4G5iZu0Z385/KiOt1/P6vQ/Z2/AsEh/8Z/hIyeZCHL31wrSZW4yLeZwi +M76wPiBHJxPun681HQlVs/OGKSHnbHc1XJAIeA/M8u+lLWqIKB+AJ82TrOqUMj1s +LjGhQNs84xPliONN5K7DrEy+Y65X/rFxN77Smw+UtcH1GgH2NgaHH8dpt1m25sgm +UxZWhdx66opB/lbRQwWdGt7MC0kJFaWHDZq64DTuYoekFYSxAFu0nd0EekEHEJEi +9mquB9cv/96SuEJl8BcUWU/1LwIDAQABo4GEMIGBMAkGA1UdEwQCMAAwDgYDVR0P +AQH/BAQDAgeAMBMGA1UdJQQMMAoGCCsGAQUFBwMJMA8GCSsGAQUFBzABBQQCBQAw +HQYDVR0OBBYEFF/a5AMWkaBjCHg/XLYf1tnb8FA0MB8GA1UdIwQYMBaAFLCwSv0c +dSj4HGGqE/b6wZA9axajMA0GCSqGSIb3DQEBBQUAA4IBAQCKRl1iXFmOQtLseDWP +Y5icDDBGiRi17CGgvIzGJi/ha0PhbO+X0TmQIEnRX3Mu0Er/Mm4RZSjMtJ2iZRh3 +tGf4Dn+jKgKOmgXC3oOG/l8RPHLf0yaPSdn/z0TXtA30vTFBLlFeWnhbfhovea4+ +snPdBxLqWZdtxmiwojgqA7YATCWwavizrBr09YRyDwzgtpZ2BwMruGuFuV9FsEwL +PCM53yFlrM32oFghyfyE5kYjgnnueKM+pw1kA0jgb1CnVJRrMEN1TXuXDAZLtHKG +5X/drah1JtkoZhCzxzZ3bYdVDQJ90OHFqM58lwGD6z3XuPKrHDKZKt+CPIsl5g7p +4J2l diff --git a/deps/openssl/openssl/test/ocsp-tests/WIKH_D2.ors b/deps/openssl/openssl/test/ocsp-tests/WIKH_D2.ors new file mode 100644 index 00000000000000..1d562fad0ae59c --- /dev/null +++ b/deps/openssl/openssl/test/ocsp-tests/WIKH_D2.ors @@ -0,0 +1,32 @@ +MIIF4AoBAKCCBdkwggXVBgkrBgEFBQcwAQEEggXGMIIFwjCBmaIWBBTqlwecTarB +yVdbHxANRLCFYj1mqBgPMjAxMjEwMTExMzMwMTBaMG4wbDBEMAkGBSsOAwIaBQAE +FLdXtbacB/gWIxOOkMkqDr4yAaoxBBRhe2YaRQ2XyolQL30EzTSo//z9SwILBAAA +AAABL07hRxCAABgPMjAxMjEwMDEwNjAwMDBaoBEYDzIwMTMwNDE1MDYwMDAwWjAL +BgkqhkiG9w0BAQUDggEBAA0H7bvcULg1GayFtQVrYDyW0feOEMNGLmgaGuwRdrY3 +KuWyNJLUUJKQZnOkdT8A4RpVX8xD4EgVyOqRACUahgdgp0g3QOn+vf2Zyf+NJIgW +woF5qaJgCOeIOw5O6F4r1vUhp8NvqXHotswgG58Nzz6UMD+uyIgq5o8uzOjryEm6 +wO2X+KvN9sMzkeZhNvAHkgBQL8CG4CggWnzn7At1DmhhsizfhDrosigM4Zr6Sm6z +v1YfSPznD0b3TQ7RzvpbJPofF2aJXMIMxdKR5pemuevTDR2+JCXjVPsD/ZODFykc +rsQeqx2vTOIg84PRKboXjCAwHn4rIN7JJtQqebLtD9egggQQMIIEDDCCBAgwggLw +oAMCAQICCwQAAAAAAThXovYBMA0GCSqGSIb3DQEBBQUAMFcxCzAJBgNVBAYTAkJF +MRkwFwYDVQQKExBHbG9iYWxTaWduIG52LXNhMRAwDgYDVQQLEwdSb290IENBMRsw +GQYDVQQDExJHbG9iYWxTaWduIFJvb3QgQ0EwHhcNMTIwNzA1MTgwMDAwWhcNMTMw +NzA1MTgwMDAwWjBZMQswCQYDVQQGEwJCRTEZMBcGA1UEChMQR2xvYmFsU2lnbiBu +di1zYTEvMC0GA1UEAxMmR2xvYmFsU2lnbiBPQ1NQIGZvciBSb290IFIxIC0gQnJh +bmNoIDEwggEiMA0GCSqGSIb3DQEBAQUAA4IBDwAwggEKAoIBAQDP2QF8p0+Fb7ID +MwwD1gEr2oazjqbW28EZr3YEyMPk+7VFaGePSO1xjBGIE48Q7m7d6p6ZXCzlBZEi +oudrHSr3WDqdIVKLDrZIDkgEgdjJE72Hq6Pf5CEGXyebbODm4sV96EfewSvOOYLL +866g3aoVhLDK02ny+Q5OsokW7nhnmGMMh10tZqR5VmdQTiw8MgeqUxBEaEO4WH2J +ltgSsgNJBNBYuDgnn5ryzVqhvmCJvYZMYeN6qZFKy1MgHcR+wEpGLPlRL4ttu6e5 +MJrVta7dVFobHUHoFog97LtQT1PY0Ubaihswjge5O04bYeCrgSSjr1e4xH/KDxRw +yyhoscaFAgMBAAGjgdIwgc8wDgYDVR0PAQH/BAQDAgeAMB0GA1UdDgQWBBTqlwec +TarByVdbHxANRLCFYj1mqDBMBgNVHSAERTBDMEEGCSsGAQQBoDIBXzA0MDIGCCsG +AQUFBwIBFiZodHRwczovL3d3dy5nbG9iYWxzaWduLmNvbS9yZXBvc2l0b3J5LzAJ +BgNVHRMEAjAAMBMGA1UdJQQMMAoGCCsGAQUFBwMJMB8GA1UdIwQYMBaAFGB7ZhpF +DZfKiVAvfQTNNKj//P1LMA8GCSsGAQUFBzABBQQCBQAwDQYJKoZIhvcNAQEFBQAD +ggEBAHiC6N1uF29d7CmiVapA8Nr1xLSVeIkBd4A8yHsUTQ7ATI7bwT14QUV4awe7 +8cvmO5ZND8YG1ViwN162WFm9ivSoWBzvWDbU2JhQFb+XzrzCcdn0YbNiTxJh/vYm +uDuxto00dpBgujSOAQv8B90iDEJ+sZpYRzDRj62qStRey0zpq5eX+pA+gdppMUFb +4QvJf0El8TbLCWLN4TjrFe6ju7ZaN9zmgVYGQ2fMHKIGNScLuIA950nYwzRkIfHa +YW6HqP1rCR1EiYmstEeCQyDxJx+RUlh+q8L1BKzaMYhS6s63MZzQuGseYStaCmbC +fBIRKjnK621vAWvc7UR+0hqnZ+U= diff --git a/deps/openssl/openssl/test/ocsp-tests/WIKH_D3.ors b/deps/openssl/openssl/test/ocsp-tests/WIKH_D3.ors new file mode 100644 index 00000000000000..cbac8e8f1b9153 --- /dev/null +++ b/deps/openssl/openssl/test/ocsp-tests/WIKH_D3.ors @@ -0,0 +1,38 @@ +MIIG8AoBAKCCBukwggblBgkrBgEFBQcwAQEEggbWMIIG0jCB+aF+MHwxCzAJBgNV +BAYTAkFVMQwwCgYDVQQIEwNOU1cxDzANBgNVBAcTBlN5ZG5leTEUMBIGA1UEChML +Q0FjZXJ0IEluYy4xHjAcBgNVBAsTFVNlcnZlciBBZG1pbmlzdHJhdGlvbjEYMBYG +A1UEAxMPb2NzcC5jYWNlcnQub3JnGA8yMDEyMTAxMTE0MDYzNlowZjBkMDwwCQYF +Kw4DAhoFAAQUi6TJyxcpGUU+u45zCZG5JfKDImUEFBe1MhvUx/Pg5o7zvdKwOu6y +ORjRAgMLs8aAABgPMjAxMjEwMTExMzU4MTBaoBEYDzIwMTIxMDEzMTQwNjM2WjAN +BgkqhkiG9w0BAQUFAAOCAQEAjcryO6FUK5+TcPBxJKixVt9q07Xy3qv1e/VFuJ0f +tnYDcu83Q5yCta49PXaA13nFDFZ445wCDivDBLolS6JKSh+JrLpAxSBzak7Ps8wz +DPNAtexZz9/hPPzHnGOMlRtew07jk+NX5ZgCxDZGmBHIHOGyab2WoqmpRTll0oP4 +b/DzI3mzrur5lm2NAT3ZJ8bVaWsAJBVTfUye3S4GRWlfGSRVAMk0QHnCkYP42okc +psIKbvdIoS2gxo6kBTMevxciPV2lPIiSrIWH0IGm7AqGM5+Vz7IdbD6fOQd1I3uw +O+1NugMYfScB6jCvSW2uESeRZ+qW/HMXQbU1eiH+x88UIKCCBL4wggS6MIIEtjCC +Ap6gAwIBAgIDCpvzMA0GCSqGSIb3DQEBBQUAMHkxEDAOBgNVBAoTB1Jvb3QgQ0Ex +HjAcBgNVBAsTFWh0dHA6Ly93d3cuY2FjZXJ0Lm9yZzEiMCAGA1UEAxMZQ0EgQ2Vy +dCBTaWduaW5nIEF1dGhvcml0eTEhMB8GCSqGSIb3DQEJARYSc3VwcG9ydEBjYWNl +cnQub3JnMB4XDTExMDgyMzAwMDI1NloXDTEzMDgyMjAwMDI1NlowfDELMAkGA1UE +BhMCQVUxDDAKBgNVBAgTA05TVzEPMA0GA1UEBxMGU3lkbmV5MRQwEgYDVQQKEwtD +QWNlcnQgSW5jLjEeMBwGA1UECxMVU2VydmVyIEFkbWluaXN0cmF0aW9uMRgwFgYD +VQQDEw9vY3NwLmNhY2VydC5vcmcwggEiMA0GCSqGSIb3DQEBAQUAA4IBDwAwggEK +AoIBAQCcxtRv5CPHw3BLdR/k/K72YsRgodbP+UdAONmvBvWzhwm6B8h6O+M64sFr +2w6be7SYBECIyOQgNJ1flK4MoAWhdBA/H5NtxaDOKbAqA27tO9GaevcPp7c518O0 +3hVnlPLvsN1f48nY0jQOXUTfv5nYXmD0OSSK/V3IRo0KsWB6T9UnMGCeEwb4Oqqz +uzM0b4SBflzMEony/m6Tg/qL7qs2TLZAqe77+BZaVdFkDUnaBN7RyMruXySxeXiz +mogT3WhROeloMa/X+E01bWBYBEK7VZIY9pgBpXQ7vDbbIGgYuIXUi20wh03WMy16 +VDYdV0IUXHpidNUeK9W/BPP/7APBAgMBAAGjRDBCMAwGA1UdEwEB/wQCMAAwJwYD +VR0lBCAwHgYIKwYBBQUHAwIGCCsGAQUFBwMBBggrBgEFBQcDCTAJBgNVHREEAjAA +MA0GCSqGSIb3DQEBBQUAA4ICAQAoT6p5f3cGprAcgrnzdenfTmDe9LCW7k2VnazA +MAzpsD6gXcSlo4+3hoHem/SpKRH2tqi34DmImCiv/S6fxsKM4Gfn5rlkAFviuTvS +r5Zrwh4ZKSfaoWv4bmbzmcAxvuxdMWHf/5PbjegjzFTbBMekVPZY/abYtD6kdHQZ +VNgzwZVfTBfYhfa+Rg72I2zjKpMsjxMqWfTmUzW6wfK6LFudZqu0U1NnJw+IlnVU +6WtjL885ebQrmcRqWz3nMhVLIu5L3w/s+VTLvm7If6jcMDNUjz8s2BPcJeCXg3TE +STsyl6tvk17RRz2+9JskxVOk11xIn96xR4FCERIid2ek9z1xi7oYOajQF50i/9Gj +ReDEfRSyb4/LzoKDOY+h4Q6jryeHh7WIHFiK5qrBN2y8qOoRJ/OqQnqci/BJBNpe +g9Q9PJRgGSzRndTXNHiYRbeLpq7eGo3sPqlR9qBQ3rd98XGOU0RCMnzjKhENC3qo +5PkSF2xs8RmjWktFSTDwjYo0qf1teo7CGHjgaPjQ7JE8Q4ysFOQndSWmLpqwDcI9 +HfIvPwUIWArQrJRh9LCNSyvHVgLqY9kw8NW4TlMxV2WqaYCkiKi3XVRrSFR3ahS1 +VBvRZ8KpplrV7rhXjVSSqqfLk1sX3l72Ck2F9ON+qbNFmvhgNjSiBY9neMgo804a +wG/pag== diff --git a/deps/openssl/openssl/test/ocsp-tests/WIKH_ND1.ors b/deps/openssl/openssl/test/ocsp-tests/WIKH_ND1.ors new file mode 100644 index 00000000000000..a16476fb8436fb --- /dev/null +++ b/deps/openssl/openssl/test/ocsp-tests/WIKH_ND1.ors @@ -0,0 +1,10 @@ +MIIB0woBAKCCAcwwggHIBgkrBgEFBQcwAQEEggG5MIIBtTCBnqIWBBSIRFH/UCpp +Xi2I9CG62QzyzsvqfBgPMjAxMjEwMTEwODQxMTNaMHMwcTBJMAkGBSsOAwIaBQAE +FEi2DTgjjfhFbk7lhD6jlBEYApefBBSJRFH/UCppXi2I9CG62QzyzsvqfAIQIuEz +IiCgSN8psr+aMcKbB4AAGA8yMDEyMTAxMTA4NDExM1qgERgPMjAxMjEwMTUwODQx +MTNaMA0GCSqGSIb3DQEBBQUAA4IBAQCNnhlBMxxh9z5AKfzAxiKs90CfxUsqfYfk +8XlyF9VIfWRfEwzS6MF1pEzLnghRxTAmjrFgK+sxD9wk+S5Mdgw3nbED9DVFH2Hs +RGKm/t9wkvrYOX6yRQqw6uRvU/5cibMjcyzKB/VQMwk4p4FwSUgBv88A5sTkKr2V +eYdEm34hg2TZVkipPMBiyTyBLXs8D/9oALtnczg4xlTRSjDUvqoXL5haqY4QK2Pv +mNwna6ACkwLmSuMe29UQ8IX2PUB4R5Etni5czyiKGxZLm+4NAhuEwWFNEzCyImPc +087gHGU1zx+qVSlajqMJ/9ZXYjbt7WiWdhOTGEv4VMn8dHhRUs32 diff --git a/deps/openssl/openssl/test/ocsp-tests/WIKH_ND2.ors b/deps/openssl/openssl/test/ocsp-tests/WIKH_ND2.ors new file mode 100644 index 00000000000000..5aff2abf39a4b9 --- /dev/null +++ b/deps/openssl/openssl/test/ocsp-tests/WIKH_ND2.ors @@ -0,0 +1,10 @@ +MIIB0woBAKCCAcwwggHIBgkrBgEFBQcwAQEEggG5MIIBtTCBnqIWBBQLWOWLxkwV +N6RAqTCpIb5HNlpW/xgPMjAxMjEwMTAyMzAzMTlaMHMwcTBJMAkGBSsOAwIaBQAE +FOy+ZAvtiWulchtVZmfKU1ZI9ewTBBQMWOWLxkwVN6RAqTCpIb5HNlpW/wIQEaO0 +0OyNt3+doM1dLVEvQoAAGA8yMDEyMTAxMDIzMDMxOVqgERgPMjAxMjEwMTQyMzAz +MTlaMA0GCSqGSIb3DQEBBQUAA4IBAQCHn2nGfEUX/EJruMkTgh7GgB0u9cpAepaD +sPv9gtl3KLUZyR+NbGMIa5/bpoJp0yg1z5VL6CLMusy3AF6Cn2fyaioDxG+yc+gA +PcPFdEqiIMr+TP8s7qcEiE6WZddSSCqCn90VZSCWkpDhnCjDRwJLBBPU3803fdMz +oguvyr7y6Koxik8X/iUe8EpSzAvmm4GZL3veTI+x7IezJSrhCS9zM0ZHjySjoDxC ++ljGH0EuWPTmFEqZVGIq3cuahIYzKItUbYnXU6ipi/2p42qbsFeok7eEN0EYsY1a +vRATHGRmU7Q5HLCq4rQtZC1cis52Mvc9x1W4z/Gt5A3FtgElXXNA diff --git a/deps/openssl/openssl/test/ocsp-tests/WIKH_ND3.ors b/deps/openssl/openssl/test/ocsp-tests/WIKH_ND3.ors new file mode 100644 index 00000000000000..4f8a6eaba32426 --- /dev/null +++ b/deps/openssl/openssl/test/ocsp-tests/WIKH_ND3.ors @@ -0,0 +1,10 @@ +MIIB1AoBAKCCAc0wggHJBgkrBgEFBQcwAQEEggG6MIIBtjCBn6IWBBStvZh6NLQm +9/rEJlTvA73gJMtUGhgPMjAxMjEwMTExMTM2NDdaMHQwcjBKMAkGBSsOAwIaBQAE +FHyxZlScq9tE7mImFq30ZXv3etWUBBSuvZh6NLQm9/rEJlTvA73gJMtUGgIRAKcN +bJWejX5BTb8DmevkCauAABgPMjAxMjEwMTExMTM2NDdaoBEYDzIwMTIxMDE1MTEz +NjQ3WjANBgkqhkiG9w0BAQUFAAOCAQEAfnj3nh6z+USW6VlDWRytWpNmC1ZRwWlg +P2+G4UF4HE8bMJkuiFLcZEVYTxlTYv+xAEpSFxdInFM2Q5C+O6pWOZ9NbikeR4oZ +FTI1kAZ0Uw+YMpVM4ztvKBIpUSqlbi69iNJ9WGF6qzxVeqobSOyrjjwtTsuglUbR ++mshp/SP7Br2IIK+KM1vgsmVExPfGPYANyk7ki/Q8uUnjqkreeSa9WC2iJLGcybW +YavDhYWALebUGukNeedkloYhdjPboPPxDkKNjakwIG8EkbJK7uXewMOHHOFvFTX3 +K388me8u5iQf4f3fj6ilEgs6f5Szzmb+vklPX0zIny/TVk2+Az7HmA== diff --git a/deps/openssl/openssl/test/ocsp-tests/WINH_D1.ors b/deps/openssl/openssl/test/ocsp-tests/WINH_D1.ors new file mode 100644 index 00000000000000..ed627ba66bc12c --- /dev/null +++ b/deps/openssl/openssl/test/ocsp-tests/WINH_D1.ors @@ -0,0 +1,32 @@ +MIIFzwoBAKCCBcgwggXEBgkrBgEFBQcwAQEEggW1MIIFsTCBoKIWBBRf2uQDFpGg +Ywh4P1y2H9bZ2/BQNBgPMjAxMjEwMTExMzI5NDJaMHUwczBLMAkGBSsOAwIaBQAE +FKFyDqBqfGICVPKo9Z3Se6Tzty+kBBSwsEr9HHUo+BxhqhP2+sGQPWsWowISESG8 +vx4IzALnkqQG05AvM+2bgAAYDzIwMTIxMDExMTAwMDAwWqARGA8yMDEyMTAxODEw +MDAwMFowCwYJKoZIhvcNAQEFA4IBAQCX3gEX+JVfxuYmxBBxC9sNCi3o76ODIicr +XMvm0DTO9VSyDBl7LDsMMgNMIDtO3flQSlBNZ2B9ikwyckXOSWXiXzybZVMdA/uq +NchgkM9aChrlhG0AHZyYe/+dJSmEBFXkIomy+S6YQ7Mcs2s6WxCeWU7gB4XOy1zO +/CvWjv0WQV1J2lZZ6pkvtECKAEjrVP275LA38HInFbYvVPXWzl4sDcX2TAxwUa4S +xAJAfwl+B+oZSerZWGRo6KjZuB/OB31cB5n/lABmRez6Obi27D0UUCRv/eSbwOF4 +Ofaa/XzJt7sF7WpVgoR41HI88W7aN4vtcw1zcVsBmfRMUNYZSqtfoIID+DCCA/Qw +ggPwMIIC2KADAgECAhIRISdENsrz1CSWG3VIBwfQERQwDQYJKoZIhvcNAQEFBQAw +WTELMAkGA1UEBhMCQkUxGTAXBgNVBAoTEEdsb2JhbFNpZ24gbnYtc2ExLzAtBgNV +BAMTJkdsb2JhbFNpZ24gRXh0ZW5kZWQgVmFsaWRhdGlvbiBDQSAtIEcyMB4XDTEy +MDkxOTA3NDA1MFoXDTEyMTIxOTA4NDA1MFowgYUxCzAJBgNVBAYTAkJFMRkwFwYD +VQQKExBHbG9iYWxTaWduIG52LXNhMUIwQAYDVQQDEzlHbG9iYWxTaWduIEV4dGVu +ZGVkIFZhbGlkYXRpb24gQ0EgLSBHMiBPQ1NQIHJlc3BvbmRlciAtIDIxFzAVBgNV +BAUTDjIwMTIwOTE5MDk0MDAwMIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKC +AQEAnCgMsBO+IxIqCnXCOfXJoIC3wj+f0s4DV9h2gJBzisWXkaJD2DfNrd0kHUXK +qVVPUxnA4G5iZu0Z385/KiOt1/P6vQ/Z2/AsEh/8Z/hIyeZCHL31wrSZW4yLeZwi +M76wPiBHJxPun681HQlVs/OGKSHnbHc1XJAIeA/M8u+lLWqIKB+AJ82TrOqUMj1s +LjGhQNs84xPliONN5K7DrEy+Y65X/rFxN77Smw+UtcH1GgH2NgaHH8dpt1m25sgm +UxZWhdx66opB/lbRQwWdGt7MC0kJFaWHDZq64DTuYoekFYSxAFu0nd0EekEHEJEi +9mquB9cv/96SuEJl8BcUWU/1LwIDAQABo4GEMIGBMAkGA1UdEwQCMAAwDgYDVR0P +AQH/BAQDAgeAMBMGA1UdJQQMMAoGCCsGAQUFBwMJMA8GCSsGAQUFBzABBQQCBQAw +HQYDVR0OBBYEFF/a5AMWkaBjCHg/XLYf1tnb8FA0MB8GA1UdIwQYMBaAFLCwSv0c +dSj4HGGqE/b6wZA9axajMA0GCSqGSIb3DQEBBQUAA4IBAQCKRl1iXFmOQtLseDWP +Y5icDDBGiRi17CGgvIzGJi/ha0PhbO+X0TmQIEnRX3Mu0Er/Mm4RZSjMtJ2iZRh3 +tGf4Dn+jKgKOmgXC3oOG/l8RPHLf0yaPSdn/z0TXtA30vTFBLlFeWnhbfhovea4+ +snPdBxLqWZdtxmiwojgqA7YATCWwavizrBr09YRyDwzgtpZ2BwMruGuFuV9FsEwL +PCM53yFlrM32oFghyfyE5kYjgnnueKM+pw1kA0jgb1CnVJRrMEN1TXuXDAZLtHKG +5X/drah1JtkoZhCzxzZ3bYdVDQJ90OHFqM58lwGD6z3XuPKrHDKZKt+CPIsl5g7p +4J2l diff --git a/deps/openssl/openssl/test/ocsp-tests/WINH_D2.ors b/deps/openssl/openssl/test/ocsp-tests/WINH_D2.ors new file mode 100644 index 00000000000000..b89fcf8a9a7a91 --- /dev/null +++ b/deps/openssl/openssl/test/ocsp-tests/WINH_D2.ors @@ -0,0 +1,32 @@ +MIIF4AoBAKCCBdkwggXVBgkrBgEFBQcwAQEEggXGMIIFwjCBmaIWBBTqlwecTarB +yVdbHxANRLCFYj1mqBgPMjAxMjEwMTExMzMwMTBaMG4wbDBEMAkGBSsOAwIaBQAE +FLhXtbacB/gWIxOOkMkqDr4yAaoxBBRge2YaRQ2XyolQL30EzTSo//z9SwILBAAA +AAABL07hRxCAABgPMjAxMjEwMDEwNjAwMDBaoBEYDzIwMTMwNDE1MDYwMDAwWjAL +BgkqhkiG9w0BAQUDggEBAA0H7bvcULg1GayFtQVrYDyW0feOEMNGLmgaGuwRdrY3 +KuWyNJLUUJKQZnOkdT8A4RpVX8xD4EgVyOqRACUahgdgp0g3QOn+vf2Zyf+NJIgW +woF5qaJgCOeIOw5O6F4r1vUhp8NvqXHotswgG58Nzz6UMD+uyIgq5o8uzOjryEm6 +wO2X+KvN9sMzkeZhNvAHkgBQL8CG4CggWnzn7At1DmhhsizfhDrosigM4Zr6Sm6z +v1YfSPznD0b3TQ7RzvpbJPofF2aJXMIMxdKR5pemuevTDR2+JCXjVPsD/ZODFykc +rsQeqx2vTOIg84PRKboXjCAwHn4rIN7JJtQqebLtD9egggQQMIIEDDCCBAgwggLw +oAMCAQICCwQAAAAAAThXovYBMA0GCSqGSIb3DQEBBQUAMFcxCzAJBgNVBAYTAkJF +MRkwFwYDVQQKExBHbG9iYWxTaWduIG52LXNhMRAwDgYDVQQLEwdSb290IENBMRsw +GQYDVQQDExJHbG9iYWxTaWduIFJvb3QgQ0EwHhcNMTIwNzA1MTgwMDAwWhcNMTMw +NzA1MTgwMDAwWjBZMQswCQYDVQQGEwJCRTEZMBcGA1UEChMQR2xvYmFsU2lnbiBu +di1zYTEvMC0GA1UEAxMmR2xvYmFsU2lnbiBPQ1NQIGZvciBSb290IFIxIC0gQnJh +bmNoIDEwggEiMA0GCSqGSIb3DQEBAQUAA4IBDwAwggEKAoIBAQDP2QF8p0+Fb7ID +MwwD1gEr2oazjqbW28EZr3YEyMPk+7VFaGePSO1xjBGIE48Q7m7d6p6ZXCzlBZEi +oudrHSr3WDqdIVKLDrZIDkgEgdjJE72Hq6Pf5CEGXyebbODm4sV96EfewSvOOYLL +866g3aoVhLDK02ny+Q5OsokW7nhnmGMMh10tZqR5VmdQTiw8MgeqUxBEaEO4WH2J +ltgSsgNJBNBYuDgnn5ryzVqhvmCJvYZMYeN6qZFKy1MgHcR+wEpGLPlRL4ttu6e5 +MJrVta7dVFobHUHoFog97LtQT1PY0Ubaihswjge5O04bYeCrgSSjr1e4xH/KDxRw +yyhoscaFAgMBAAGjgdIwgc8wDgYDVR0PAQH/BAQDAgeAMB0GA1UdDgQWBBTqlwec +TarByVdbHxANRLCFYj1mqDBMBgNVHSAERTBDMEEGCSsGAQQBoDIBXzA0MDIGCCsG +AQUFBwIBFiZodHRwczovL3d3dy5nbG9iYWxzaWduLmNvbS9yZXBvc2l0b3J5LzAJ +BgNVHRMEAjAAMBMGA1UdJQQMMAoGCCsGAQUFBwMJMB8GA1UdIwQYMBaAFGB7ZhpF +DZfKiVAvfQTNNKj//P1LMA8GCSsGAQUFBzABBQQCBQAwDQYJKoZIhvcNAQEFBQAD +ggEBAHiC6N1uF29d7CmiVapA8Nr1xLSVeIkBd4A8yHsUTQ7ATI7bwT14QUV4awe7 +8cvmO5ZND8YG1ViwN162WFm9ivSoWBzvWDbU2JhQFb+XzrzCcdn0YbNiTxJh/vYm +uDuxto00dpBgujSOAQv8B90iDEJ+sZpYRzDRj62qStRey0zpq5eX+pA+gdppMUFb +4QvJf0El8TbLCWLN4TjrFe6ju7ZaN9zmgVYGQ2fMHKIGNScLuIA950nYwzRkIfHa +YW6HqP1rCR1EiYmstEeCQyDxJx+RUlh+q8L1BKzaMYhS6s63MZzQuGseYStaCmbC +fBIRKjnK621vAWvc7UR+0hqnZ+U= diff --git a/deps/openssl/openssl/test/ocsp-tests/WINH_D3.ors b/deps/openssl/openssl/test/ocsp-tests/WINH_D3.ors new file mode 100644 index 00000000000000..c3d7c946565e5f --- /dev/null +++ b/deps/openssl/openssl/test/ocsp-tests/WINH_D3.ors @@ -0,0 +1,38 @@ +MIIG8AoBAKCCBukwggblBgkrBgEFBQcwAQEEggbWMIIG0jCB+aF+MHwxCzAJBgNV +BAYTAkFVMQwwCgYDVQQIEwNOU1cxDzANBgNVBAcTBlN5ZG5leTEUMBIGA1UEChML +Q0FjZXJ0IEluYy4xHjAcBgNVBAsTFVNlcnZlciBBZG1pbmlzdHJhdGlvbjEYMBYG +A1UEAxMPb2NzcC5jYWNlcnQub3JnGA8yMDEyMTAxMTE0MzkxOFowZjBkMDwwCQYF +Kw4DAhoFAAQUjKTJyxcpGUU+u45zCZG5JfKDImUEFBa1MhvUx/Pg5o7zvdKwOu6y +ORjRAgMLs8aAABgPMjAxMjEwMTExNDIzMjVaoBEYDzIwMTIxMDEzMTQzOTE4WjAN +BgkqhkiG9w0BAQUFAAOCAQEAgdrf+v+BwEhG0ghTLMVmuxWprJr/9VFtpKpxQrTo +egSoW+5JOPCUAStfw3R3u7QM8sJf9bnPorgoCoY1hPKcWNLhvf1Ng3QlVkNa6NcO +EonbuI4KE9Rhoflpf//pD/3AFKzU+ecRs04KtYezKrUvC1RayGabd7bgtIpdFss4 +ZCZ22riqjFtqD3+2//AHg7VaqiJMKlRt05CMmGe+HKn5PEN9HaeI52nsTf+L1Jeh +ItnaDPfV76vFHHXyUhR3iIgnqQDCig0q3yj7BQqH50+K+myiMAY+p8cuVqebno1i +BzXxxpZl/fw1KnTFdEa7p2jtmXw3KZiHAWAddwg1F1tHTaCCBL4wggS6MIIEtjCC +Ap6gAwIBAgIDCpvzMA0GCSqGSIb3DQEBBQUAMHkxEDAOBgNVBAoTB1Jvb3QgQ0Ex +HjAcBgNVBAsTFWh0dHA6Ly93d3cuY2FjZXJ0Lm9yZzEiMCAGA1UEAxMZQ0EgQ2Vy +dCBTaWduaW5nIEF1dGhvcml0eTEhMB8GCSqGSIb3DQEJARYSc3VwcG9ydEBjYWNl +cnQub3JnMB4XDTExMDgyMzAwMDI1NloXDTEzMDgyMjAwMDI1NlowfDELMAkGA1UE +BhMCQVUxDDAKBgNVBAgTA05TVzEPMA0GA1UEBxMGU3lkbmV5MRQwEgYDVQQKEwtD +QWNlcnQgSW5jLjEeMBwGA1UECxMVU2VydmVyIEFkbWluaXN0cmF0aW9uMRgwFgYD +VQQDEw9vY3NwLmNhY2VydC5vcmcwggEiMA0GCSqGSIb3DQEBAQUAA4IBDwAwggEK +AoIBAQCcxtRv5CPHw3BLdR/k/K72YsRgodbP+UdAONmvBvWzhwm6B8h6O+M64sFr +2w6be7SYBECIyOQgNJ1flK4MoAWhdBA/H5NtxaDOKbAqA27tO9GaevcPp7c518O0 +3hVnlPLvsN1f48nY0jQOXUTfv5nYXmD0OSSK/V3IRo0KsWB6T9UnMGCeEwb4Oqqz +uzM0b4SBflzMEony/m6Tg/qL7qs2TLZAqe77+BZaVdFkDUnaBN7RyMruXySxeXiz +mogT3WhROeloMa/X+E01bWBYBEK7VZIY9pgBpXQ7vDbbIGgYuIXUi20wh03WMy16 +VDYdV0IUXHpidNUeK9W/BPP/7APBAgMBAAGjRDBCMAwGA1UdEwEB/wQCMAAwJwYD +VR0lBCAwHgYIKwYBBQUHAwIGCCsGAQUFBwMBBggrBgEFBQcDCTAJBgNVHREEAjAA +MA0GCSqGSIb3DQEBBQUAA4ICAQAoT6p5f3cGprAcgrnzdenfTmDe9LCW7k2VnazA +MAzpsD6gXcSlo4+3hoHem/SpKRH2tqi34DmImCiv/S6fxsKM4Gfn5rlkAFviuTvS +r5Zrwh4ZKSfaoWv4bmbzmcAxvuxdMWHf/5PbjegjzFTbBMekVPZY/abYtD6kdHQZ +VNgzwZVfTBfYhfa+Rg72I2zjKpMsjxMqWfTmUzW6wfK6LFudZqu0U1NnJw+IlnVU +6WtjL885ebQrmcRqWz3nMhVLIu5L3w/s+VTLvm7If6jcMDNUjz8s2BPcJeCXg3TE +STsyl6tvk17RRz2+9JskxVOk11xIn96xR4FCERIid2ek9z1xi7oYOajQF50i/9Gj +ReDEfRSyb4/LzoKDOY+h4Q6jryeHh7WIHFiK5qrBN2y8qOoRJ/OqQnqci/BJBNpe +g9Q9PJRgGSzRndTXNHiYRbeLpq7eGo3sPqlR9qBQ3rd98XGOU0RCMnzjKhENC3qo +5PkSF2xs8RmjWktFSTDwjYo0qf1teo7CGHjgaPjQ7JE8Q4ysFOQndSWmLpqwDcI9 +HfIvPwUIWArQrJRh9LCNSyvHVgLqY9kw8NW4TlMxV2WqaYCkiKi3XVRrSFR3ahS1 +VBvRZ8KpplrV7rhXjVSSqqfLk1sX3l72Ck2F9ON+qbNFmvhgNjSiBY9neMgo804a +wG/pag== diff --git a/deps/openssl/openssl/test/ocsp-tests/WINH_ND1.ors b/deps/openssl/openssl/test/ocsp-tests/WINH_ND1.ors new file mode 100644 index 00000000000000..af4755260c1a53 --- /dev/null +++ b/deps/openssl/openssl/test/ocsp-tests/WINH_ND1.ors @@ -0,0 +1,10 @@ +MIIB0woBAKCCAcwwggHIBgkrBgEFBQcwAQEEggG5MIIBtTCBnqIWBBSIRFH/UCpp +Xi2I9CG62QzyzsvqfBgPMjAxMjEwMTEwODQxMTNaMHMwcTBJMAkGBSsOAwIaBQAE +FEm2DTgjjfhFbk7lhD6jlBEYApefBBSIRFH/UCppXi2I9CG62QzyzsvqfAIQIuEz +IiCgSN8psr+aMcKbB4AAGA8yMDEyMTAxMTA4NDExM1qgERgPMjAxMjEwMTUwODQx +MTNaMA0GCSqGSIb3DQEBBQUAA4IBAQCNnhlBMxxh9z5AKfzAxiKs90CfxUsqfYfk +8XlyF9VIfWRfEwzS6MF1pEzLnghRxTAmjrFgK+sxD9wk+S5Mdgw3nbED9DVFH2Hs +RGKm/t9wkvrYOX6yRQqw6uRvU/5cibMjcyzKB/VQMwk4p4FwSUgBv88A5sTkKr2V +eYdEm34hg2TZVkipPMBiyTyBLXs8D/9oALtnczg4xlTRSjDUvqoXL5haqY4QK2Pv +mNwna6ACkwLmSuMe29UQ8IX2PUB4R5Etni5czyiKGxZLm+4NAhuEwWFNEzCyImPc +087gHGU1zx+qVSlajqMJ/9ZXYjbt7WiWdhOTGEv4VMn8dHhRUs32 diff --git a/deps/openssl/openssl/test/ocsp-tests/WINH_ND2.ors b/deps/openssl/openssl/test/ocsp-tests/WINH_ND2.ors new file mode 100644 index 00000000000000..99417f792e178e --- /dev/null +++ b/deps/openssl/openssl/test/ocsp-tests/WINH_ND2.ors @@ -0,0 +1,10 @@ +MIIB0woBAKCCAcwwggHIBgkrBgEFBQcwAQEEggG5MIIBtTCBnqIWBBQLWOWLxkwV +N6RAqTCpIb5HNlpW/xgPMjAxMjEwMTAyMzAzMTlaMHMwcTBJMAkGBSsOAwIaBQAE +FO2+ZAvtiWulchtVZmfKU1ZI9ewTBBQLWOWLxkwVN6RAqTCpIb5HNlpW/wIQEaO0 +0OyNt3+doM1dLVEvQoAAGA8yMDEyMTAxMDIzMDMxOVqgERgPMjAxMjEwMTQyMzAz +MTlaMA0GCSqGSIb3DQEBBQUAA4IBAQCHn2nGfEUX/EJruMkTgh7GgB0u9cpAepaD +sPv9gtl3KLUZyR+NbGMIa5/bpoJp0yg1z5VL6CLMusy3AF6Cn2fyaioDxG+yc+gA +PcPFdEqiIMr+TP8s7qcEiE6WZddSSCqCn90VZSCWkpDhnCjDRwJLBBPU3803fdMz +oguvyr7y6Koxik8X/iUe8EpSzAvmm4GZL3veTI+x7IezJSrhCS9zM0ZHjySjoDxC ++ljGH0EuWPTmFEqZVGIq3cuahIYzKItUbYnXU6ipi/2p42qbsFeok7eEN0EYsY1a +vRATHGRmU7Q5HLCq4rQtZC1cis52Mvc9x1W4z/Gt5A3FtgElXXNA diff --git a/deps/openssl/openssl/test/ocsp-tests/WINH_ND3.ors b/deps/openssl/openssl/test/ocsp-tests/WINH_ND3.ors new file mode 100644 index 00000000000000..73dc42de299730 --- /dev/null +++ b/deps/openssl/openssl/test/ocsp-tests/WINH_ND3.ors @@ -0,0 +1,10 @@ +MIIB1AoBAKCCAc0wggHJBgkrBgEFBQcwAQEEggG6MIIBtjCBn6IWBBStvZh6NLQm +9/rEJlTvA73gJMtUGhgPMjAxMjEwMTExMTM2NDdaMHQwcjBKMAkGBSsOAwIaBQAE +FH2xZlScq9tE7mImFq30ZXv3etWUBBStvZh6NLQm9/rEJlTvA73gJMtUGgIRAKcN +bJWejX5BTb8DmevkCauAABgPMjAxMjEwMTExMTM2NDdaoBEYDzIwMTIxMDE1MTEz +NjQ3WjANBgkqhkiG9w0BAQUFAAOCAQEAfnj3nh6z+USW6VlDWRytWpNmC1ZRwWlg +P2+G4UF4HE8bMJkuiFLcZEVYTxlTYv+xAEpSFxdInFM2Q5C+O6pWOZ9NbikeR4oZ +FTI1kAZ0Uw+YMpVM4ztvKBIpUSqlbi69iNJ9WGF6qzxVeqobSOyrjjwtTsuglUbR ++mshp/SP7Br2IIK+KM1vgsmVExPfGPYANyk7ki/Q8uUnjqkreeSa9WC2iJLGcybW +YavDhYWALebUGukNeedkloYhdjPboPPxDkKNjakwIG8EkbJK7uXewMOHHOFvFTX3 +K388me8u5iQf4f3fj6ilEgs6f5Szzmb+vklPX0zIny/TVk2+Az7HmA== diff --git a/deps/openssl/openssl/test/ocsp-tests/WKDOSC_D1.ors b/deps/openssl/openssl/test/ocsp-tests/WKDOSC_D1.ors new file mode 100644 index 00000000000000..d7566cf1bd5230 --- /dev/null +++ b/deps/openssl/openssl/test/ocsp-tests/WKDOSC_D1.ors @@ -0,0 +1,32 @@ +MIIFzwoBAKCCBcgwggXEBgkrBgEFBQcwAQEEggW1MIIFsTCBoKIWBBSpTXftIZX0 +lLT9zwVSQC5Jfp3pqhgPMjAxMjEwMTAxNDU0NDNaMHUwczBLMAkGBSsOAwIaBQAE +FKByDqBqfGICVPKo9Z3Se6Tzty+kBBSwsEr9HHUo+BxhqhP2+sGQPWsWowISESG8 +vx4IzALnkqQG05AvM+2bgAAYDzIwMTIxMDEwMTMwMDAwWqARGA8yMDEyMTAxNzEz +MDAwMFowCwYJKoZIhvcNAQEFA4IBAQBw5Z+0ggEddRTIq7cXlMoxG9Nrx4HtutsH +itIUoZp/rlLoxHsJTo/VmdZvTTGIc7Ok9XuoH61lY/x9glAKsGRjz4Myc9+5rx0O +675lwmOS+uaf3/hRkicVrVr7Pt2ug3R7OXm2MJrohjNKP8lqtLJ0hHP88a8rotKA +r9uz/qHm7K4Uh7dRt/Pnu9MPG74tZeFNN4M1ONMEiRdG39FqzFDXWxwQ3NmyC0Wo +DQn+NklZMknr8mm7IBWpzgU1fTD9R0yv0zdhUZGiEXxvdhm7GJrTET5jS30Ksm5j +o+n39YVu/vGbjyyYx3+WdeQLEyipaGvldSuJpT+R684/RuFWNetcoIID+DCCA/Qw +ggPwMIIC2KADAgECAhIRIcYjwu4UNkR1VGrDbSdFei8wDQYJKoZIhvcNAQEFBQAw +WTELMAkGA1UEBhMCQkUxGTAXBgNVBAoTEEdsb2JhbFNpZ24gbnYtc2ExLzAtBgNV +BAMTJkdsb2JhbFNpZ24gRXh0ZW5kZWQgVmFsaWRhdGlvbiBDQSAtIEcyMB4XDTEy +MDkxOTA3NDAzMVoXDTEyMTIxOTA4NDAzMVowgYUxCzAJBgNVBAYTAkJFMRkwFwYD +VQQKExBHbG9iYWxTaWduIG52LXNhMUIwQAYDVQQDEzlHbG9iYWxTaWduIEV4dGVu +ZGVkIFZhbGlkYXRpb24gQ0EgLSBHMiBPQ1NQIHJlc3BvbmRlciAtIDExFzAVBgNV +BAUTDjIwMTIwOTE5MDkzOTAwMIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKC +AQEAx0kb6QhDH3sEDj4zaysjVzYelq9lZ1cso4R2IyQxaoPaG6GkaCmHA4sz6KP+ +m3ADqplibEUBa/mzCxHW8/oy3NhGMFdbezduZrnRFLbzakOTeIo8VEIM3JPfgREv +CX8nj6Xu7ERD6JO/ZQ9Xr7YVzKKN+3cVZlcMHoGBnOPcO2Sz0AcYyk5m5IsGBRoT +T86j6Cr9PhOPTVwXL6Wxy1KVHsUZXUwnRacV0O4SHWQ4zM9Sablus9fTbh1CgIqW +sKDyzVB4yECXkBVeUlA+cuCaRRVHRiR+jPDSgbU62nnNudEpGG7dyoop6IOvXv2O +ydncWzaukxIVvQ/Ij85kHqs7HQIDAQABo4GEMIGBMAkGA1UdEwQCMAAwDgYDVR0P +AQH/BAQDAgeAMBMGA1UdJQQMMAoGCCsGAQUFBwMJMA8GCSsGAQUFBzABBQQCBQAw +HQYDVR0OBBYEFKlNd+0hlfSUtP3PBVJALkl+nemqMB8GA1UdIwQYMBaAFLCwSv0c +dSj4HGGqE/b6wZA9axajMA0GCSqGSIb3DQEBBQUAA4IBAQCe4rZg61Dmwygl/Uae +BJZog64/FvuB1sfCqKLJTjKOfLcugSTX1TT7bLJbzXRGPQuorI3TIZEOwldIw01d +DTLlsOCHrfHd+bpxgijxPkUuaA4NYnpvqTEMJqPKOC8QYfKupNjAPSuHvwqvqCfO +RCe3jY6xQDO0WCTZ8/xMsOkw+J/YEYqALETf2Ug7k5eRL/TvfLd8Sgi7vPfmUeiW +ptlsbhMOWQoQc+JA3vCI01rrjNq+0kIZ/r8nPGvablRr0Aakk6eDuS2dcReaPwuK +0xE136pJYiXdQ3SA7uwmlorjxmejavyoPCr23TU74DQEt6hhc6uIcabsa4Y8KvJy +RI4G diff --git a/deps/openssl/openssl/test/ocsp-tests/WKDOSC_D2.ors b/deps/openssl/openssl/test/ocsp-tests/WKDOSC_D2.ors new file mode 100644 index 00000000000000..757db75427ed23 --- /dev/null +++ b/deps/openssl/openssl/test/ocsp-tests/WKDOSC_D2.ors @@ -0,0 +1,32 @@ +MIIF4AoBAKCCBdkwggXVBgkrBgEFBQcwAQEEggXGMIIFwjCBmaIWBBTqlwecTarB +yVdbHxANRLCFYj1mqBgPMjAxMjEwMTAxNDU0NDhaMG4wbDBEMAkGBSsOAwIaBQAE +FLdXtbacB/gWIxOOkMkqDr4yAaoxBBRge2YaRQ2XyolQL30EzTSo//z9SwILBAAA +AAABL07hRxCAABgPMjAxMjEwMDEwNjAwMDBaoBEYDzIwMTMwNDE1MDYwMDAwWjAL +BgkqhkiG9w0BAQUDggEBACkGyoGefA2WuktIerofBoPgeyT8Mry57DxF7IEvX8dI +Adk+MZRo5suYIE2AJty8bohYYiIxS7sZ5nsUM+iyu5cIdmsIwt/YifYsSdHc6DKz +l3Yh4bS27QX05/Vuok3HmEMsRBmensKATMfvGP+TOwhuFeHWAK8KHSCmUbGZFP3A +WKtrhRh/qC4qetMt07z/OKZcqHUYegEpO3xqRJ4MdqRJpV1urjdL/852US0mWAOL +/EPoexWiHiKJmsNy7HAEKFQ+daqdZYM1BTGbS2aj3go/BVqf0xEhRLT0fsdof4Is +1Cy2ZHGbaVEyOQpXsxUEAqEdJcFRcLFGhdgnUjcQ9lqgggQQMIIEDDCCBAgwggLw +oAMCAQICCwQAAAAAAThXovYBMA0GCSqGSIb3DQEBBQUAMFcxCzAJBgNVBAYTAkJF +MRkwFwYDVQQKExBHbG9iYWxTaWduIG52LXNhMRAwDgYDVQQLEwdSb290IENBMRsw +GQYDVQQDExJHbG9iYWxTaWduIFJvb3QgQ0EwHhcNMTIwNzA1MTgwMDAwWhcNMTMw +NzA1MTgwMDAwWjBZMQswCQYDVQQGEwJCRTEZMBcGA1UEChMQR2xvYmFsU2lnbiBu +di1zYTEvMC0GA1UEAxMmR2xvYmFsU2lnbiBPQ1NQIGZvciBSb290IFIxIC0gQnJh +bmNoIDEwggEiMA0GCSqGSIb3DQEBAQUAA4IBDwAwggEKAoIBAQDQ2QF8p0+Fb7ID +MwwD1gEr2oazjqbW28EZr3YEyMPk+7VFaGePSO1xjBGIE48Q7m7d6p6ZXCzlBZEi +oudrHSr3WDqdIVKLDrZIDkgEgdjJE72Hq6Pf5CEGXyebbODm4sV96EfewSvOOYLL +866g3aoVhLDK02ny+Q5OsokW7nhnmGMMh10tZqR5VmdQTiw8MgeqUxBEaEO4WH2J +ltgSsgNJBNBYuDgnn5ryzVqhvmCJvYZMYeN6qZFKy1MgHcR+wEpGLPlRL4ttu6e5 +MJrVta7dVFobHUHoFog97LtQT1PY0Ubaihswjge5O04bYeCrgSSjr1e4xH/KDxRw +yyhoscaFAgMBAAGjgdIwgc8wDgYDVR0PAQH/BAQDAgeAMB0GA1UdDgQWBBTqlwec +TarByVdbHxANRLCFYj1mqDBMBgNVHSAERTBDMEEGCSsGAQQBoDIBXzA0MDIGCCsG +AQUFBwIBFiZodHRwczovL3d3dy5nbG9iYWxzaWduLmNvbS9yZXBvc2l0b3J5LzAJ +BgNVHRMEAjAAMBMGA1UdJQQMMAoGCCsGAQUFBwMJMB8GA1UdIwQYMBaAFGB7ZhpF +DZfKiVAvfQTNNKj//P1LMA8GCSsGAQUFBzABBQQCBQAwDQYJKoZIhvcNAQEFBQAD +ggEBAHiC6N1uF29d7CmiVapA8Nr1xLSVeIkBd4A8yHsUTQ7ATI7bwT14QUV4awe7 +8cvmO5ZND8YG1ViwN162WFm9ivSoWBzvWDbU2JhQFb+XzrzCcdn0YbNiTxJh/vYm +uDuxto00dpBgujSOAQv8B90iDEJ+sZpYRzDRj62qStRey0zpq5eX+pA+gdppMUFb +4QvJf0El8TbLCWLN4TjrFe6ju7ZaN9zmgVYGQ2fMHKIGNScLuIA950nYwzRkIfHa +YW6HqP1rCR1EiYmstEeCQyDxJx+RUlh+q8L1BKzaMYhS6s63MZzQuGseYStaCmbC +fBIRKjnK621vAWvc7UR+0hqnZ+U= diff --git a/deps/openssl/openssl/test/ocsp-tests/WKDOSC_D3.ors b/deps/openssl/openssl/test/ocsp-tests/WKDOSC_D3.ors new file mode 100644 index 00000000000000..c33179c379ca14 --- /dev/null +++ b/deps/openssl/openssl/test/ocsp-tests/WKDOSC_D3.ors @@ -0,0 +1,38 @@ +MIIG8AoBAKCCBukwggblBgkrBgEFBQcwAQEEggbWMIIG0jCB+aF+MHwxCzAJBgNV +BAYTAkFVMQwwCgYDVQQIEwNOU1cxDzANBgNVBAcTBlN5ZG5leTEUMBIGA1UEChML +Q0FjZXJ0IEluYy4xHjAcBgNVBAsTFVNlcnZlciBBZG1pbmlzdHJhdGlvbjEYMBYG +A1UEAxMPb2NzcC5jYWNlcnQub3JnGA8yMDEyMTAxMDE1MTkzOVowZjBkMDwwCQYF +Kw4DAhoFAAQUi6TJyxcpGUU+u45zCZG5JfKDImUEFBa1MhvUx/Pg5o7zvdKwOu6y +ORjRAgMLs8aAABgPMjAxMjEwMTAxNDU2MTdaoBEYDzIwMTIxMDEyMTUxOTM5WjAN +BgkqhkiG9w0BAQUFAAOCAQEAH1Bs3glJoAvCHhgVtN4F/avlKA1St74v7yuD1DIu +cBf/4YRJdxZATXMI8I0TPjSl8L+rRAiUTVd8sPhWQ9XD9WaYKkTEjuQSPp851/81 +zDihz9Kj5Rzo5PYpFsbSps/ALMQSRkrtuX4DCm9fbK7xC+adpbhQDnWW/GXM1+Ob +lv3pHDQXLh2GQbRsaJBgLeSUxIIE7RWJv1N+Ugi5zF8rja5qnJ9DnkilEqMeXQp8 +SThaI+TOe+KHK+7wTp5QkFNIE5l/uKgvSNIOwLe9HDevlSl1wYF6e+mAz3uoQyJa +Ucx8FIoV6CIr+wUd+P8CmNXiQ7M59I8gm3FCDiEvWDQGEaCCBL4wggS6MIIEtjCC +Ap6gAwIBAgIDCpvzMA0GCSqGSIb3DQEBBQUAMHkxEDAOBgNVBAoTB1Jvb3QgQ0Ex +HjAcBgNVBAsTFWh0dHA6Ly93d3cuY2FjZXJ0Lm9yZzEiMCAGA1UEAxMZQ0EgQ2Vy +dCBTaWduaW5nIEF1dGhvcml0eTEhMB8GCSqGSIb3DQEJARYSc3VwcG9ydEBjYWNl +cnQub3JnMB4XDTExMDgyMzAwMDI1NloXDTEzMDgyMjAwMDI1NlowfDELMAkGA1UE +BhMCQVUxDDAKBgNVBAgTA05TVzEPMA0GA1UEBxMGU3lkbmV5MRQwEgYDVQQKEwtD +QWNlcnQgSW5jLjEeMBwGA1UECxMVU2VydmVyIEFkbWluaXN0cmF0aW9uMRgwFgYD +VQQDEw9vY3NwLmNhY2VydC5vcmcwggEiMA0GCSqGSIb3DQEBAQUAA4IBDwAwggEK +AoIBAQCdxtRv5CPHw3BLdR/k/K72YsRgodbP+UdAONmvBvWzhwm6B8h6O+M64sFr +2w6be7SYBECIyOQgNJ1flK4MoAWhdBA/H5NtxaDOKbAqA27tO9GaevcPp7c518O0 +3hVnlPLvsN1f48nY0jQOXUTfv5nYXmD0OSSK/V3IRo0KsWB6T9UnMGCeEwb4Oqqz +uzM0b4SBflzMEony/m6Tg/qL7qs2TLZAqe77+BZaVdFkDUnaBN7RyMruXySxeXiz +mogT3WhROeloMa/X+E01bWBYBEK7VZIY9pgBpXQ7vDbbIGgYuIXUi20wh03WMy16 +VDYdV0IUXHpidNUeK9W/BPP/7APBAgMBAAGjRDBCMAwGA1UdEwEB/wQCMAAwJwYD +VR0lBCAwHgYIKwYBBQUHAwIGCCsGAQUFBwMBBggrBgEFBQcDCTAJBgNVHREEAjAA +MA0GCSqGSIb3DQEBBQUAA4ICAQAoT6p5f3cGprAcgrnzdenfTmDe9LCW7k2VnazA +MAzpsD6gXcSlo4+3hoHem/SpKRH2tqi34DmImCiv/S6fxsKM4Gfn5rlkAFviuTvS +r5Zrwh4ZKSfaoWv4bmbzmcAxvuxdMWHf/5PbjegjzFTbBMekVPZY/abYtD6kdHQZ +VNgzwZVfTBfYhfa+Rg72I2zjKpMsjxMqWfTmUzW6wfK6LFudZqu0U1NnJw+IlnVU +6WtjL885ebQrmcRqWz3nMhVLIu5L3w/s+VTLvm7If6jcMDNUjz8s2BPcJeCXg3TE +STsyl6tvk17RRz2+9JskxVOk11xIn96xR4FCERIid2ek9z1xi7oYOajQF50i/9Gj +ReDEfRSyb4/LzoKDOY+h4Q6jryeHh7WIHFiK5qrBN2y8qOoRJ/OqQnqci/BJBNpe +g9Q9PJRgGSzRndTXNHiYRbeLpq7eGo3sPqlR9qBQ3rd98XGOU0RCMnzjKhENC3qo +5PkSF2xs8RmjWktFSTDwjYo0qf1teo7CGHjgaPjQ7JE8Q4ysFOQndSWmLpqwDcI9 +HfIvPwUIWArQrJRh9LCNSyvHVgLqY9kw8NW4TlMxV2WqaYCkiKi3XVRrSFR3ahS1 +VBvRZ8KpplrV7rhXjVSSqqfLk1sX3l72Ck2F9ON+qbNFmvhgNjSiBY9neMgo804a +wG/pag== diff --git a/deps/openssl/openssl/test/ocsp-tests/WKIC_D1_Issuer_ICA.pem b/deps/openssl/openssl/test/ocsp-tests/WKIC_D1_Issuer_ICA.pem new file mode 100644 index 00000000000000..93fb70d51b6815 --- /dev/null +++ b/deps/openssl/openssl/test/ocsp-tests/WKIC_D1_Issuer_ICA.pem @@ -0,0 +1,27 @@ +-----BEGIN CERTIFICATE----- +MIIEhjCCA26gAwIBAgILBAAAAAABL07hXdQwDQYJKoZIhvcNAQEFBQAwTDEgMB4G +A1UECxMXR2xvYmFsU2lnbiBSb290IENBIC0gUjIxEzARBgNVBAoTCkdsb2JhbFNp +Z24xEzARBgNVBAMTCkdsb2JhbFNpZ24wHhcNMTEwNDEzMTAwMDAwWhcNMjIwNDEz +MTAwMDAwWjBZMQswCQYDVQQGEwJCRTEZMBcGA1UEChMQR2xvYmFsU2lnbiBudi1z +YTEvMC0GA1UEAxMmR2xvYmFsU2lnbiBFeHRlbmRlZCBWYWxpZGF0aW9uIENBIC0g +RzIwggEiMA0GCSqGSIb3DQEBAQUAA4IBDwAwggEKAoIBAQDOoUbMUpq4pbR/WNnN +2EugcgyXW6aIIMO5PUbc0FxSMPb6WU+FX7DbiLSpXysjSKyr9ZJ4FLYyD/tcaoVb +AJDgu2X1WvlPZ37HbCnsk8ArysRe2LDb1r4/mwvAj6ldrvcAAqT8umYROHf+IyAl +VRDFvYK5TLFoxuJwe4NcE2fBofN8C6iZmtDimyUxyCuNQPZSY7GgrVou9Xk2bTUs +Dt0F5NDiB0i3KF4r1VjVbNAMoQFGAVqPxq9kx1UBXeHRxmxQJaAFrQCrDI1la93r +wnJUyQ88ABeHIu/buYZ4FlGud9mmKE3zWI2DZ7k0JZscUYBR84OSaqOuR5rW5Isb +wO2xAgMBAAGjggFaMIIBVjAOBgNVHQ8BAf8EBAMCAQYwEgYDVR0TAQH/BAgwBgEB +/wIBADAdBgNVHQ4EFgQUsLBK/Rx1KPgcYaoT9vrBkD1rFqMwRwYDVR0gBEAwPjA8 +BgRVHSAAMDQwMgYIKwYBBQUHAgEWJmh0dHBzOi8vd3d3Lmdsb2JhbHNpZ24uY29t +L3JlcG9zaXRvcnkvMDYGA1UdHwQvMC0wK6ApoCeGJWh0dHA6Ly9jcmwuZ2xvYmFs +c2lnbi5uZXQvcm9vdC1yMi5jcmwwRAYIKwYBBQUHAQEEODA2MDQGCCsGAQUFBzAB +hihodHRwOi8vb2NzcC5nbG9iYWxzaWduLmNvbS9FeHRlbmRlZFNTTENBMCkGA1Ud +JQQiMCAGCCsGAQUFBwMBBggrBgEFBQcDAgYKKwYBBAGCNwoDAzAfBgNVHSMEGDAW +gBSb4gdXZxwewGoG3lm0mi3f3BmGLjANBgkqhkiG9w0BAQUFAAOCAQEAL0m28rZa +pJWrnlrpK4KbzJBrfHRFIOde2Mcj7ig1sTVlKqVR4FU/9oNntOQ2KbDa7JeVqYoF +o0X+Iy5SiLQfEICt0oufo1+oxetz3nmIQZgz7qdgGLFGyUAQB5yPClLJExoGbqCb +LTr2rk/no1E1KlsYBRLlUdy2NmLz4aQP++TPw5S/EauhWTEB8MxT7I9j12yW00gq +iiPtRVaoZkHqAblH7qFHDBTxI+Egc8p9UHxkOFejj0qcm+ltRc9Ea01gIEBxJbVG +qmwIft/I+shWKpLLg7h5CZctXqEBzgbttJfJBNxB7+BPNk3kQHNG7BESfIhbNCYl +TercGL7FG81kwA== +-----END CERTIFICATE----- diff --git a/deps/openssl/openssl/test/ocsp-tests/WKIC_D2_Issuer_Root.pem b/deps/openssl/openssl/test/ocsp-tests/WKIC_D2_Issuer_Root.pem new file mode 100644 index 00000000000000..61db7ae3e363a2 --- /dev/null +++ b/deps/openssl/openssl/test/ocsp-tests/WKIC_D2_Issuer_Root.pem @@ -0,0 +1,21 @@ +-----BEGIN CERTIFICATE----- +MIIDdTCCAl2gAwIBAgILBAAAAAABFUtaw5QwDQYJKoZIhvcNAQEFBQAwVzELMAkG +A1UEBhMCQkUxGTAXBgNVBAoTEEdsb2JhbFNpZ24gbnYtc2ExEDAOBgNVBAsTB1Jv +b3QgQ0ExGzAZBgNVBAMTEkdsb2JhbFNpZ24gUm9vdCBDQTAeFw05ODA5MDExMjAw +MDBaFw0yODAxMjgxMjAwMDBaMFcxCzAJBgNVBAYTAkJFMRkwFwYDVQQKExBHbG9i +YWxTaWduIG52LXNhMRAwDgYDVQQLEwdSb290IENBMRswGQYDVQQDExJHbG9iYWxT +aWduIFJvb3QgQ0EwggEiMA0GCSqGSIb3DQEBAQUAA4IBDwAwggEKAoIBAQDbDuaZ +jc6j40+Kfvvxi4Mla+pIH/EqsLmVEQS98GPR4mdmzxzdzxtIK+6NiY6arymAZavp +xy0Sy6scTHAHoT0KMM0VjU/43dSMUBUc71DuxC73/OlS8pF94G3VNTCOXkNz8kHp +1Wrjsok6Vjk4bwY8iGlbKk3Fp1S4bInMm/k8yuX9ifUSPJJ4ltbcdG6TRGHRjcdG +snUOhugZitVtbNV4FpWi6cgKOOvyJBNPc1STE4U6G7weNLWLBYy5d4ux2x8gkasJ +U26Qzns3dLlwR5EiUWMWea6xrkEmCMgZK9FGqkjWZCrXgzT/LCrBbBlDSgeF59N8 +9iFo7+ryUp9/k5DPAgMBAAGjQjBAMA4GA1UdDwEB/wQEAwIBBjAPBgNVHRMBAf8E +BTADAQH/MB0GA1UdDgQWBBRge2YaRQ2XyolQL30EzTSo//z9SzANBgkqhkiG9w0B +AQUFAAOCAQEA1nPnfE920I2/7LqivjTFKDK1fPxsnCwrvQmeU79rXqoRSLblCKOz +yj1hTdNGCbM+w6DjY1Ub8rrvrTnhQ7k4o+YviiY776BQVvnGCv04zcQLcFGUl5gE +38NflNUVyRRBnMRddWQVDf9VMOyGj/8N7yy5Y0b2qvzfvGn9LhJIZJrglfCm7ymP +AbEVtQwdpf5pLGkkeB6zpxxxYu7KyJesF12KwvhHhm4qxFYxldBniYUr+WymXUad +DKqC5JlR3XC321Y9YeRq4VzW9v493kHMB65jUr9TU/Qr6cf9tveCX4XSQRjbgbME +HMUfpIBvFSDJ3gyICh3WZlXi/EjJKSZp4A== +-----END CERTIFICATE----- diff --git a/deps/openssl/openssl/test/ocsp-tests/WKIC_D3_Issuer_Root.pem b/deps/openssl/openssl/test/ocsp-tests/WKIC_D3_Issuer_Root.pem new file mode 100644 index 00000000000000..f03432b414dd60 --- /dev/null +++ b/deps/openssl/openssl/test/ocsp-tests/WKIC_D3_Issuer_Root.pem @@ -0,0 +1,41 @@ +-----BEGIN CERTIFICATE----- +MIIHPTCCBSWgAwIBAgIBADANBgkqhkiG9w0BAQQFADB5MRAwDgYDVQQKEwdSb290 +IENBMR4wHAYDVQQLExVodHRwOi8vd3d3LmNhY2VydC5vcmcxIjAgBgNVBAMTGUNB +IENlcnQgU2lnbmluZyBBdXRob3JpdHkxITAfBgkqhkiG9w0BCQEWEnN1cHBvcnRA +Y2FjZXJ0Lm9yZzAeFw0wMzAzMzAxMjI5NDlaFw0zMzAzMjkxMjI5NDlaMHkxEDAO +BgNVBAoTB1Jvb3QgQ0ExHjAcBgNVBAsTFWh0dHA6Ly93d3cuY2FjZXJ0Lm9yZzEi +MCAGA1UEAxMZQ0EgQ2VydCBTaWduaW5nIEF1dGhvcml0eTEhMB8GCSqGSIb3DQEJ +ARYSc3VwcG9ydEBjYWNlcnQub3JnMIICIjANBgkqhkiG9w0BAQEFAAOCAg8AMIIC +CgKCAgEAzyLA4kZ97DYoB1CW8qAzQIxL8TtmPzHlawI229Z89vGIj053NgVBlfkJ +8BLPRoZzYLdufujAWGSuzbCtRRcMY/pnCujW0r8+55jE8Ez64AO7NV1sId6eINm6 +zWYyN3L69wj1x81YyY7nDl7qPv4coRQKFWyGhFtkZip6qUtTefWIonvuLwphK42y +fk1WpRPs6tqSnqxEQR5YYGUFZvjARL3LlPdCfgv3ZWiYUQXw8wWRBB0bF4LsyFe7 +w2t6iPGwcswlWyCR7BYCEo8y6RcYSNDHBS4CMEK4JZwFaz+qOqfrU0j36NK2B5jc +G8Y0f3/JHIJ6BVgrCFvzOKKrF11myZjXnhCLotLddJr3cQxyYN/Nb5gznZY0dj4k +epKwDpUeb+agRThHqtdB7Uq3EvbXG4OKDy7YCbZZ16oE/9KTfWgu3YtLq1i6L43q +laegw1SJpfvbi1EinbLDvhG+LJGGi5Z4rSDTii8aP8bQUWWHIbEZAWV/RRyH9XzQ +QUxPKZgh/TMfdQwEUfoZd9vUFBzugcMd9Zi3aQaRIt0AUMyBMawSB3s42mhb5ivU +fslfrejrckzzAeVLIL+aplfKkQABi6F1ITe1Yw1nPkZPcCBnzsXWWdsC4PDSy826 +YreQQejdIOQpvGQpQsgi3Hia/0PsmBsJUUtaWsJx8cTLc6nloQsCAwEAAaOCAc4w +ggHKMB0GA1UdDgQWBBQWtTIb1Mfz4OaO873SsDrusjkY0TCBowYDVR0jBIGbMIGY +gBQWtTIb1Mfz4OaO873SsDrusjkY0aF9pHsweTEQMA4GA1UEChMHUm9vdCBDQTEe +MBwGA1UECxMVaHR0cDovL3d3dy5jYWNlcnQub3JnMSIwIAYDVQQDExlDQSBDZXJ0 +IFNpZ25pbmcgQXV0aG9yaXR5MSEwHwYJKoZIhvcNAQkBFhJzdXBwb3J0QGNhY2Vy +dC5vcmeCAQAwDwYDVR0TAQH/BAUwAwEB/zAyBgNVHR8EKzApMCegJaAjhiFodHRw +czovL3d3dy5jYWNlcnQub3JnL3Jldm9rZS5jcmwwMAYJYIZIAYb4QgEEBCMWIWh0 +dHBzOi8vd3d3LmNhY2VydC5vcmcvcmV2b2tlLmNybDA0BglghkgBhvhCAQgEJxYl +aHR0cDovL3d3dy5jYWNlcnQub3JnL2luZGV4LnBocD9pZD0xMDBWBglghkgBhvhC +AQ0ESRZHVG8gZ2V0IHlvdXIgb3duIGNlcnRpZmljYXRlIGZvciBGUkVFIGhlYWQg +b3ZlciB0byBodHRwOi8vd3d3LmNhY2VydC5vcmcwDQYJKoZIhvcNAQEEBQADggIB +ACjH7pyCArpcgBLKNQodgW+JapnM8mgPf6fhjViVPr3yBsOQWqy1YPaZQwGjiHCc +nWKdpIevZ1gNMDY75q1I08t0AoZxPuIrA2jxNGJARjtT6ij0rPtmlVOKTV39O9lg +18p5aTuxZZKmxoGCXJzN600BiqXfEVWqFcofN8CCmHBh22p8lqOOLlQ+TyGpkO/c +gr/c6EWtTZBzCDyUZbAEmXZ/4rzCahWqlwQ3JNgelE5tDlG+1sSPypZt90Pf6DBl +Jzt7u0NDY8RD97LsaMzhGY4i+5jhe1o+ATc7iwiwovOVThrLm82asduycPAtStvY +sONvRUgzEv/+PDIqVPfE94rwiCPCR/5kenHA0R6mY7AHfqQv0wGP3J8rtsYIqQ+T +SCX8Ev2fQtzzxD72V7DX3WnRBnc0CkvSyqD/HMaMyRa+xMwyN2hzXwj7UfdJUzYF +CpUCTPJ5GhD22Dp1nPMd8aINcGeGG7MW9S/lpOt5hvk9C8JzC6WZrG/8Z7jlLwum +GCSNe9FINSkYQKyTYOGWhlC0elnYjyELn8+CkcY7v2vcB5G5l1YjqrZslMZIBjzk +zk6q5PYvCdxTby78dOs6Y5nCpqyJvKeyRKANihDjbPIky/qbn3BHLt4Ui9SyIAmW +omTxJBzcoTWcFbLUvFUufQb1nA5V9FrWk9p2rSVzTMVD +-----END CERTIFICATE----- diff --git a/deps/openssl/openssl/test/ocsp-tests/WKIC_ND1_Issuer_ICA.pem b/deps/openssl/openssl/test/ocsp-tests/WKIC_ND1_Issuer_ICA.pem new file mode 100644 index 00000000000000..f0d981131272bf --- /dev/null +++ b/deps/openssl/openssl/test/ocsp-tests/WKIC_ND1_Issuer_ICA.pem @@ -0,0 +1,29 @@ +-----BEGIN CERTIFICATE----- +MIIFBjCCA+6gAwIBAgIQEaO00OyNt3+doM1dLVEvQjANBgkqhkiG9w0BAQUFADCB +gTELMAkGA1UEBhMCR0IxGzAZBgNVBAgTEkdyZWF0ZXIgTWFuY2hlc3RlcjEQMA4G +A1UEBxMHU2FsZm9yZDEaMBgGA1UEChMRQ09NT0RPIENBIExpbWl0ZWQxJzAlBgNV +BAMTHkNPTU9ETyBDZXJ0aWZpY2F0aW9uIEF1dGhvcml0eTAeFw0xMDA1MjQwMDAw +MDBaFw0yMDA1MzAxMDQ4MzhaMIGOMQswCQYDVQQGEwJHQjEbMBkGA1UECBMSR3Jl +YXRlciBNYW5jaGVzdGVyMRAwDgYDVQQHEwdTYWxmb3JkMRowGAYDVQQKExFDT01P +RE8gQ0EgTGltaXRlZDE0MDIGA1UEAxMrQ09NT0RPIEV4dGVuZGVkIFZhbGlkYXRp +b24gU2VjdXJlIFNlcnZlciBDQTCCASIwDQYJKoZIhvcNAQEBBQADggEPADCCAQoC +ggEBAM1KljPNJY1n7iiWN4dG8PYEooR/U6qW5h+xAhxu7X0h1Nc8HqLYaS+ot/Wi +7WRYZOFEZTZJQSABjTsT4gjzDPJXOZM3txyTRIOOvy3xoQV12m7ue28b6naDKHRK +HCvT9cQDcpOvhs4JjDx11MkKL3Lzrb0OMDyEoXMfAyUUpY/D1vS15N2GevUZumjy +hVSiMBHK0ZLLO3QGEqA3q2rYVBHfbJoWlLm0p2XGdC0x801S6VVRn8s+oo12mHDS +b6ZlRS8bhbtbbfnywARmE4R6nc4n2PREnr+svpnba0/bWCGwiSe0jzLWS15ykV7f +BZ3ZSS/0tm9QH3XLgJ3m0+TR8tMCAwEAAaOCAWkwggFlMB8GA1UdIwQYMBaAFAtY +5YvGTBU3pECpMKkhvkc2Wlb/MB0GA1UdDgQWBBSIRFH/UCppXi2I9CG62Qzyzsvq +fDAOBgNVHQ8BAf8EBAMCAQYwEgYDVR0TAQH/BAgwBgEB/wIBADA+BgNVHSAENzA1 +MDMGBFUdIAAwKzApBggrBgEFBQcCARYdaHR0cHM6Ly9zZWN1cmUuY29tb2RvLmNv +bS9DUFMwSQYDVR0fBEIwQDA+oDygOoY4aHR0cDovL2NybC5jb21vZG9jYS5jb20v +Q09NT0RPQ2VydGlmaWNhdGlvbkF1dGhvcml0eS5jcmwwdAYIKwYBBQUHAQEEaDBm +MD4GCCsGAQUFBzAChjJodHRwOi8vY3J0LmNvbW9kb2NhLmNvbS9DT01PRE9BZGRU +cnVzdFNlcnZlckNBLmNydDAkBggrBgEFBQcwAYYYaHR0cDovL29jc3AuY29tb2Rv +Y2EuY29tMA0GCSqGSIb3DQEBBQUAA4IBAQCaQ7+vpHJezX1vf/T8PYy7cOYe3QT9 +P9ydn7+JdpvyhjH8f7PtKpFTLOKqsOPILHH3FYojHPFpLoH7sbxiC6saVBzZIl40 +TKX2Iw9dej3bQ81pfhc3Us1TocIR1FN4J2TViUFNFlW7kMvw2OTd3dMJZEgo/zIj +hC+Me1UvzymINzR4DzOq/7fylqSbRIC1vmxWVKukgZ4lGChUOn8sY89ZIIwYazgs +tN3t40DeDDYlV5rA0WCeXgNol64aO+pF11GZSe5EWVYLXrGPaOqKnsrSyaADfnAl +9DLJTlCDh6I0SD1PNXf82Ijq9n0ezkO21cJqfjhmY03n7jLvDyToKmf6 +-----END CERTIFICATE----- diff --git a/deps/openssl/openssl/test/ocsp-tests/WKIC_ND2_Issuer_Root.pem b/deps/openssl/openssl/test/ocsp-tests/WKIC_ND2_Issuer_Root.pem new file mode 100644 index 00000000000000..14d35cf1cc3d3b --- /dev/null +++ b/deps/openssl/openssl/test/ocsp-tests/WKIC_ND2_Issuer_Root.pem @@ -0,0 +1,23 @@ +-----BEGIN CERTIFICATE----- +MIID0DCCArigAwIBAgIQIKTEf93f4cdTYwcTiHdgEjANBgkqhkiG9w0BAQUFADCB +gTELMAkGA1UEBhMCR0IxGzAZBgNVBAgTEkdyZWF0ZXIgTWFuY2hlc3RlcjEQMA4G +A1UEBxMHU2FsZm9yZDEaMBgGA1UEChMRQ09NT0RPIENBIExpbWl0ZWQxJzAlBgNV +BAMTHkNPTU9ETyBDZXJ0aWZpY2F0aW9uIEF1dGhvcml0eTAeFw0xMTAxMDEwMDAw +MDBaFw0zMDEyMzEyMzU5NTlaMIGBMQswCQYDVQQGEwJHQjEbMBkGA1UECBMSR3Jl +YXRlciBNYW5jaGVzdGVyMRAwDgYDVQQHEwdTYWxmb3JkMRowGAYDVQQKExFDT01P +RE8gQ0EgTGltaXRlZDEnMCUGA1UEAxMeQ09NT0RPIENlcnRpZmljYXRpb24gQXV0 +aG9yaXR5MIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEA0UCLi3LjkRv3 +UcEbVASY06m/weaKXTuH+7uIzg3jLz8GlvCiKVCZrts7oVewdFFxze1CkU1B/qnI +2GqGd0S7WWaXUF601CxwRM/aN5VCaTwwxHGzUvAhTaHYujl8HJ6jJJ3ygxaYqhZ8 +Q5sVW7euNJH+1GImGEaaP+vB+fGQV+useg2L23IwambV4EajcNxo2f8ESIl33rXp ++2dtQem8Ob0y2WIC8bGoPW43nOIv4tOiJovGuFVDiOEjPqXSJDlqR6sA1KGzqSX+ +DT+nHbrTUcELpNqsOO9VUCQFZUaTNE8tja3G1CEZ0o7KBWFxB3NH5YoZEr0ETc5O +nKVIrLsm9wIDAQABo0IwQDAdBgNVHQ4EFgQUC1jli8ZMFTekQKkwqSG+RzZaVv8w +DgYDVR0PAQH/BAQDAgEGMA8GA1UdEwEB/wQFMAMBAf8wDQYJKoZIhvcNAQEFBQAD +ggEBAC/JxBwHO89hAgCx2SFRdXIDMLDEFh9sAIsQrK/xR9SuEDwMGvjUk2ysEDd8 +t6aDZK3N3w6HM503sMZ7OHKx8xoOo/lVem0DZgMXlUrxsXrfViEGQo+x06iF3u6X +HWLrp+cxEmbDD6ZLLkGC9/3JG6gbr+48zuOcrigHoSybJMIPIyaDMouGDx8rEkYl +Fo92kANr3ryqImhrjKGsKxE5pttwwn1y6TPn/CbxdFqR5p2ErPioBhlG5qfpqjQi +pKGfeq23sqSaM4hxAjwu1nqyH6LKwN0vEJT9s4yEIHlG1QXUEOTS22RPuFvuG8Ug +R1uUq27UlTMdphVx8fiUylQ5PsE= +-----END CERTIFICATE----- diff --git a/deps/openssl/openssl/test/ocsp-tests/WKIC_ND3_Issuer_Root.pem b/deps/openssl/openssl/test/ocsp-tests/WKIC_ND3_Issuer_Root.pem new file mode 100644 index 00000000000000..ba7fb8dcee6a4b --- /dev/null +++ b/deps/openssl/openssl/test/ocsp-tests/WKIC_ND3_Issuer_Root.pem @@ -0,0 +1,25 @@ +-----BEGIN CERTIFICATE----- +MIIENjCCAx6gAwIBAgIBATANBgkqhkiG9w0BAQUFADBvMQswCQYDVQQGEwJTRTEU +MBIGA1UEChMLQWRkVHJ1c3QgQUIxJjAkBgNVBAsTHUFkZFRydXN0IEV4dGVybmFs +IFRUUCBOZXR3b3JrMSIwIAYDVQQDExlBZGRUcnVzdCBFeHRlcm5hbCBDQSBSb290 +MB4XDTAwMDUzMDEwNDgzOFoXDTIwMDUzMDEwNDgzOFowbzELMAkGA1UEBhMCU0Ux +FDASBgNVBAoTC0FkZFRydXN0IEFCMSYwJAYDVQQLEx1BZGRUcnVzdCBFeHRlcm5h +bCBUVFAgTmV0d29yazEiMCAGA1UEAxMZQWRkVHJ1c3QgRXh0ZXJuYWwgQ0EgUm9v +dDCCASIwDQYJKoZIhvcNAQEBBQADggEPADCCAQoCggEBALj3GjPm8gAELTngTlvt +H7xsD821+iO2zt6bETOXpClMfZOfvUq8k+0DGuOPz+VtUFrWlymUWoCwSXrbLpX9 +uMq/NzgtHj6RQa1wVsfwTz/oMp50ysiQVOnGXw94nZpAPA6sYapeFI+eh6FqUNzX +mk6vBbOmcZSccbNQYArHE504B4YCqOmoaSYYkKtMsE8jqzpPhNjfzp/haW+710LX +a0Tkx63ubUFfclpxCDezeWWkWaCUN/cALw3CknLa0Dhy2xSoRcRdKn23tNbE7qzN +E0S3ySvdQwAl+mG5aWpYIxG3pzOPVnVZ9c0p10a3CitlttNCbxWyuHv77+ldU9U0 +WicCAwEAAaOB3DCB2TAdBgNVHQ4EFgQUrb2YejS0Jvf6xCZU7wO94CTLVBowCwYD +VR0PBAQDAgEGMA8GA1UdEwEB/wQFMAMBAf8wgZkGA1UdIwSBkTCBjoAUrb2YejS0 +Jvf6xCZU7wO94CTLVBqhc6RxMG8xCzAJBgNVBAYTAlNFMRQwEgYDVQQKEwtBZGRU +cnVzdCBBQjEmMCQGA1UECxMdQWRkVHJ1c3QgRXh0ZXJuYWwgVFRQIE5ldHdvcmsx +IjAgBgNVBAMTGUFkZFRydXN0IEV4dGVybmFsIENBIFJvb3SCAQEwDQYJKoZIhvcN +AQEFBQADggEBALCb4IUlwtYj4g+WBpKdQZic2YR5gdkeWxQHIzZlj7DYd7usQWxH +YINRsPkyPef89iYTx4AWpb9a/IfPeHmJIZriTAcKhjW88t5RxNKWt9x+Tu5w/Rw5 +6wwCURQtjr0W4MHfRnXnJK3s9EK0hZNwEGe6nQY1ShjTK3rMUUKhemPR5ruhxSvC +Nr4TDea9Y355e6cJDUCrat2PisP29owaQgVR1EX1n6diIWgVIEM8med8vSTYqZEX +c4g/VhsxOBi0cQ+azcgOno4uG+GMmIPLHzHxREzGBHNJdmAPx/i9F4BrLunMTA5a +mnkPIAou1Z5jJh5VkpTYghdae9C8x49OhgQ= +-----END CERTIFICATE----- diff --git a/deps/openssl/openssl/test/ocsp-tests/WRID_D1.ors b/deps/openssl/openssl/test/ocsp-tests/WRID_D1.ors new file mode 100644 index 00000000000000..6589782194a090 --- /dev/null +++ b/deps/openssl/openssl/test/ocsp-tests/WRID_D1.ors @@ -0,0 +1,32 @@ +MIIFzwoBAKCCBcgwggXEBgkrBgEFBQcwAQEEggW1MIIFsTCBoKIWBBRg2uQDFpGg +Ywh4P1y2H9bZ2/BQNBgPMjAxMjEwMTExMTI1MjJaMHUwczBLMAkGBSsOAwIaBQAE +FKByDqBqfGICVPKo9Z3Se6Tzty+kBBSwsEr9HHUo+BxhqhP2+sGQPWsWowISESG8 +vx4IzALnkqQG05AvM+2bgAAYDzIwMTIxMDExMTAwMDAwWqARGA8yMDEyMTAxODEw +MDAwMFowCwYJKoZIhvcNAQEFA4IBAQAHQBPHdHWNzaFs5bfBvQcvxBWsDnsCFXNs +a1fECiWDFNt6Nz4MCBY4rC7n0nhQfvg4m1woNcTAZVO8lacYomwUU/5/XpeFM6yc +NeFcVbfVXA48GWPANitNQCwyRL5hGfIqNy1I9T1BHlBqYusmJKy65r2iqpmld/hD +7S1dsCd4fXhjBQQORPmBqhKvWEU08Dh5aoaDAuaZoxRH8B1q+mUs0ODOIu34L84y +JcxTKccd/HCwI8oxwLoBtyXSHb+dCzc7zSjFvQhbT5dOCvJNNe/fk6+EhMtQ6ybC +D7p9EShCvU5jAdw54bZWk5wIQSvsWk9axUmYFFLYI3hAaoybpFVroIID+DCCA/Qw +ggPwMIIC2KADAgECAhIRISdENsrz1CSWG3VIBwfQERQwDQYJKoZIhvcNAQEFBQAw +WTELMAkGA1UEBhMCQkUxGTAXBgNVBAoTEEdsb2JhbFNpZ24gbnYtc2ExLzAtBgNV +BAMTJkdsb2JhbFNpZ24gRXh0ZW5kZWQgVmFsaWRhdGlvbiBDQSAtIEcyMB4XDTEy +MDkxOTA3NDA1MFoXDTEyMTIxOTA4NDA1MFowgYUxCzAJBgNVBAYTAkJFMRkwFwYD +VQQKExBHbG9iYWxTaWduIG52LXNhMUIwQAYDVQQDEzlHbG9iYWxTaWduIEV4dGVu +ZGVkIFZhbGlkYXRpb24gQ0EgLSBHMiBPQ1NQIHJlc3BvbmRlciAtIDIxFzAVBgNV +BAUTDjIwMTIwOTE5MDk0MDAwMIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKC +AQEAnCgMsBO+IxIqCnXCOfXJoIC3wj+f0s4DV9h2gJBzisWXkaJD2DfNrd0kHUXK +qVVPUxnA4G5iZu0Z385/KiOt1/P6vQ/Z2/AsEh/8Z/hIyeZCHL31wrSZW4yLeZwi +M76wPiBHJxPun681HQlVs/OGKSHnbHc1XJAIeA/M8u+lLWqIKB+AJ82TrOqUMj1s +LjGhQNs84xPliONN5K7DrEy+Y65X/rFxN77Smw+UtcH1GgH2NgaHH8dpt1m25sgm +UxZWhdx66opB/lbRQwWdGt7MC0kJFaWHDZq64DTuYoekFYSxAFu0nd0EekEHEJEi +9mquB9cv/96SuEJl8BcUWU/1LwIDAQABo4GEMIGBMAkGA1UdEwQCMAAwDgYDVR0P +AQH/BAQDAgeAMBMGA1UdJQQMMAoGCCsGAQUFBwMJMA8GCSsGAQUFBzABBQQCBQAw +HQYDVR0OBBYEFF/a5AMWkaBjCHg/XLYf1tnb8FA0MB8GA1UdIwQYMBaAFLCwSv0c +dSj4HGGqE/b6wZA9axajMA0GCSqGSIb3DQEBBQUAA4IBAQCKRl1iXFmOQtLseDWP +Y5icDDBGiRi17CGgvIzGJi/ha0PhbO+X0TmQIEnRX3Mu0Er/Mm4RZSjMtJ2iZRh3 +tGf4Dn+jKgKOmgXC3oOG/l8RPHLf0yaPSdn/z0TXtA30vTFBLlFeWnhbfhovea4+ +snPdBxLqWZdtxmiwojgqA7YATCWwavizrBr09YRyDwzgtpZ2BwMruGuFuV9FsEwL +PCM53yFlrM32oFghyfyE5kYjgnnueKM+pw1kA0jgb1CnVJRrMEN1TXuXDAZLtHKG +5X/drah1JtkoZhCzxzZ3bYdVDQJ90OHFqM58lwGD6z3XuPKrHDKZKt+CPIsl5g7p +4J2l diff --git a/deps/openssl/openssl/test/ocsp-tests/WRID_D2.ors b/deps/openssl/openssl/test/ocsp-tests/WRID_D2.ors new file mode 100644 index 00000000000000..4e11e4b27e8e80 --- /dev/null +++ b/deps/openssl/openssl/test/ocsp-tests/WRID_D2.ors @@ -0,0 +1,32 @@ +MIIF4AoBAKCCBdkwggXVBgkrBgEFBQcwAQEEggXGMIIFwjCBmaIWBBTrlwecTarB +yVdbHxANRLCFYj1mqBgPMjAxMjEwMTExMTI1MjVaMG4wbDBEMAkGBSsOAwIaBQAE +FLdXtbacB/gWIxOOkMkqDr4yAaoxBBRge2YaRQ2XyolQL30EzTSo//z9SwILBAAA +AAABL07hRxCAABgPMjAxMjEwMDEwNjAwMDBaoBEYDzIwMTMwNDE1MDYwMDAwWjAL +BgkqhkiG9w0BAQUDggEBAHThkPoy6eA7qX9y5C5b1ElRSwdjzsd15OJSqP2yjQbS +Ol1K8DWtX0UhTfRH+CrIPoWL40g2HjXtIVeMD6s3hakYimZUenIJ/IRRSVWp+EXU +MewgTVPz/wJN/9dJIkSbOI/BmpIGlaaBaLwcb39nJjZMq0sXj8jRI5i0isotOAFz +Zc0R20viBEH099KuGktB2fKKEpVbbWPljTxKzkIBs9SXZBIqd/X2MWzQWcLKzhL0 +oynkvqxTFqNVjjZKcKSXPS/XEUufLrv/E3xQZYAfTJr778kFkyA8JzrXiH6W5DX6 +UbqsnO5DaPZvMDfvlQWETkoS1j+Qgu2mIWzdiw7sPrOgggQQMIIEDDCCBAgwggLw +oAMCAQICCwQAAAAAAThXovYBMA0GCSqGSIb3DQEBBQUAMFcxCzAJBgNVBAYTAkJF +MRkwFwYDVQQKExBHbG9iYWxTaWduIG52LXNhMRAwDgYDVQQLEwdSb290IENBMRsw +GQYDVQQDExJHbG9iYWxTaWduIFJvb3QgQ0EwHhcNMTIwNzA1MTgwMDAwWhcNMTMw +NzA1MTgwMDAwWjBZMQswCQYDVQQGEwJCRTEZMBcGA1UEChMQR2xvYmFsU2lnbiBu +di1zYTEvMC0GA1UEAxMmR2xvYmFsU2lnbiBPQ1NQIGZvciBSb290IFIxIC0gQnJh +bmNoIDEwggEiMA0GCSqGSIb3DQEBAQUAA4IBDwAwggEKAoIBAQDP2QF8p0+Fb7ID +MwwD1gEr2oazjqbW28EZr3YEyMPk+7VFaGePSO1xjBGIE48Q7m7d6p6ZXCzlBZEi +oudrHSr3WDqdIVKLDrZIDkgEgdjJE72Hq6Pf5CEGXyebbODm4sV96EfewSvOOYLL +866g3aoVhLDK02ny+Q5OsokW7nhnmGMMh10tZqR5VmdQTiw8MgeqUxBEaEO4WH2J +ltgSsgNJBNBYuDgnn5ryzVqhvmCJvYZMYeN6qZFKy1MgHcR+wEpGLPlRL4ttu6e5 +MJrVta7dVFobHUHoFog97LtQT1PY0Ubaihswjge5O04bYeCrgSSjr1e4xH/KDxRw +yyhoscaFAgMBAAGjgdIwgc8wDgYDVR0PAQH/BAQDAgeAMB0GA1UdDgQWBBTqlwec +TarByVdbHxANRLCFYj1mqDBMBgNVHSAERTBDMEEGCSsGAQQBoDIBXzA0MDIGCCsG +AQUFBwIBFiZodHRwczovL3d3dy5nbG9iYWxzaWduLmNvbS9yZXBvc2l0b3J5LzAJ +BgNVHRMEAjAAMBMGA1UdJQQMMAoGCCsGAQUFBwMJMB8GA1UdIwQYMBaAFGB7ZhpF +DZfKiVAvfQTNNKj//P1LMA8GCSsGAQUFBzABBQQCBQAwDQYJKoZIhvcNAQEFBQAD +ggEBAHiC6N1uF29d7CmiVapA8Nr1xLSVeIkBd4A8yHsUTQ7ATI7bwT14QUV4awe7 +8cvmO5ZND8YG1ViwN162WFm9ivSoWBzvWDbU2JhQFb+XzrzCcdn0YbNiTxJh/vYm +uDuxto00dpBgujSOAQv8B90iDEJ+sZpYRzDRj62qStRey0zpq5eX+pA+gdppMUFb +4QvJf0El8TbLCWLN4TjrFe6ju7ZaN9zmgVYGQ2fMHKIGNScLuIA950nYwzRkIfHa +YW6HqP1rCR1EiYmstEeCQyDxJx+RUlh+q8L1BKzaMYhS6s63MZzQuGseYStaCmbC +fBIRKjnK621vAWvc7UR+0hqnZ+U= diff --git a/deps/openssl/openssl/test/ocsp-tests/WRID_D3.ors b/deps/openssl/openssl/test/ocsp-tests/WRID_D3.ors new file mode 100644 index 00000000000000..61e2d09685ff5f --- /dev/null +++ b/deps/openssl/openssl/test/ocsp-tests/WRID_D3.ors @@ -0,0 +1,38 @@ +MIIG8AoBAKCCBukwggblBgkrBgEFBQcwAQEEggbWMIIG0jCB+aF+MHwxCzAJBgNV +BAYTAlVTMQwwCgYDVQQIEwNOU1cxDzANBgNVBAcTBlN5ZG5leTEUMBIGA1UEChML +Q0FjZXJ0IEluYy4xHjAcBgNVBAsTFVNlcnZlciBBZG1pbmlzdHJhdGlvbjEYMBYG +A1UEAxMPb2NzcC5jYWNlcnQub3JnGA8yMDEyMTAxMTEzMjE0MVowZjBkMDwwCQYF +Kw4DAhoFAAQUi6TJyxcpGUU+u45zCZG5JfKDImUEFBa1MhvUx/Pg5o7zvdKwOu6y +ORjRAgMLs8aAABgPMjAxMjEwMTExMjQyMTZaoBEYDzIwMTIxMDEzMTMyMTQxWjAN +BgkqhkiG9w0BAQUFAAOCAQEAEWd9kKEfaurOXDV98OVtU27TmK4L4MeGEPdkg1i+ +fbPMe1mouWlVm23W6yaM7mM2NMXLW+hTNzqfyMPM7rByXNaFAAniCPTXNO3eJRIA +Zf0F10OSdBQ/ln4igHQCVZCnXR30/aP5/PMb4u3/LTuC9aW6K7mLXcuCvJztGnXO +v3r64q/qTGG/b4eS65exykV9riSFuGp1rzLAy5fSYTBWTOBQ679PFjQnL60GkrZA +Egtxw2ozEDwo+X0WamEouxN8mjX/VQlMdEbykUFDuPD3vZydZ04BV9f18RJZOU9j +gCwMzd9gb4jUL4ykdWiLmO+YPDWFyNSYEIfnGgk1VvPHuaCCBL4wggS6MIIEtjCC +Ap6gAwIBAgIDCpvzMA0GCSqGSIb3DQEBBQUAMHkxEDAOBgNVBAoTB1Jvb3QgQ0Ex +HjAcBgNVBAsTFWh0dHA6Ly93d3cuY2FjZXJ0Lm9yZzEiMCAGA1UEAxMZQ0EgQ2Vy +dCBTaWduaW5nIEF1dGhvcml0eTEhMB8GCSqGSIb3DQEJARYSc3VwcG9ydEBjYWNl +cnQub3JnMB4XDTExMDgyMzAwMDI1NloXDTEzMDgyMjAwMDI1NlowfDELMAkGA1UE +BhMCQVUxDDAKBgNVBAgTA05TVzEPMA0GA1UEBxMGU3lkbmV5MRQwEgYDVQQKEwtD +QWNlcnQgSW5jLjEeMBwGA1UECxMVU2VydmVyIEFkbWluaXN0cmF0aW9uMRgwFgYD +VQQDEw9vY3NwLmNhY2VydC5vcmcwggEiMA0GCSqGSIb3DQEBAQUAA4IBDwAwggEK +AoIBAQCcxtRv5CPHw3BLdR/k/K72YsRgodbP+UdAONmvBvWzhwm6B8h6O+M64sFr +2w6be7SYBECIyOQgNJ1flK4MoAWhdBA/H5NtxaDOKbAqA27tO9GaevcPp7c518O0 +3hVnlPLvsN1f48nY0jQOXUTfv5nYXmD0OSSK/V3IRo0KsWB6T9UnMGCeEwb4Oqqz +uzM0b4SBflzMEony/m6Tg/qL7qs2TLZAqe77+BZaVdFkDUnaBN7RyMruXySxeXiz +mogT3WhROeloMa/X+E01bWBYBEK7VZIY9pgBpXQ7vDbbIGgYuIXUi20wh03WMy16 +VDYdV0IUXHpidNUeK9W/BPP/7APBAgMBAAGjRDBCMAwGA1UdEwEB/wQCMAAwJwYD +VR0lBCAwHgYIKwYBBQUHAwIGCCsGAQUFBwMBBggrBgEFBQcDCTAJBgNVHREEAjAA +MA0GCSqGSIb3DQEBBQUAA4ICAQAoT6p5f3cGprAcgrnzdenfTmDe9LCW7k2VnazA +MAzpsD6gXcSlo4+3hoHem/SpKRH2tqi34DmImCiv/S6fxsKM4Gfn5rlkAFviuTvS +r5Zrwh4ZKSfaoWv4bmbzmcAxvuxdMWHf/5PbjegjzFTbBMekVPZY/abYtD6kdHQZ +VNgzwZVfTBfYhfa+Rg72I2zjKpMsjxMqWfTmUzW6wfK6LFudZqu0U1NnJw+IlnVU +6WtjL885ebQrmcRqWz3nMhVLIu5L3w/s+VTLvm7If6jcMDNUjz8s2BPcJeCXg3TE +STsyl6tvk17RRz2+9JskxVOk11xIn96xR4FCERIid2ek9z1xi7oYOajQF50i/9Gj +ReDEfRSyb4/LzoKDOY+h4Q6jryeHh7WIHFiK5qrBN2y8qOoRJ/OqQnqci/BJBNpe +g9Q9PJRgGSzRndTXNHiYRbeLpq7eGo3sPqlR9qBQ3rd98XGOU0RCMnzjKhENC3qo +5PkSF2xs8RmjWktFSTDwjYo0qf1teo7CGHjgaPjQ7JE8Q4ysFOQndSWmLpqwDcI9 +HfIvPwUIWArQrJRh9LCNSyvHVgLqY9kw8NW4TlMxV2WqaYCkiKi3XVRrSFR3ahS1 +VBvRZ8KpplrV7rhXjVSSqqfLk1sX3l72Ck2F9ON+qbNFmvhgNjSiBY9neMgo804a +wG/pag== diff --git a/deps/openssl/openssl/test/ocsp-tests/WRID_ND1.ors b/deps/openssl/openssl/test/ocsp-tests/WRID_ND1.ors new file mode 100644 index 00000000000000..b6fadc5e9ccaa4 --- /dev/null +++ b/deps/openssl/openssl/test/ocsp-tests/WRID_ND1.ors @@ -0,0 +1,10 @@ +MIIB0woBAKCCAcwwggHIBgkrBgEFBQcwAQEEggG5MIIBtTCBnqIWBBSJRFH/UCpp +Xi2I9CG62QzyzsvqfBgPMjAxMjEwMTEwODQxMTNaMHMwcTBJMAkGBSsOAwIaBQAE +FEi2DTgjjfhFbk7lhD6jlBEYApefBBSIRFH/UCppXi2I9CG62QzyzsvqfAIQIuEz +IiCgSN8psr+aMcKbB4AAGA8yMDEyMTAxMTA4NDExM1qgERgPMjAxMjEwMTUwODQx +MTNaMA0GCSqGSIb3DQEBBQUAA4IBAQCNnhlBMxxh9z5AKfzAxiKs90CfxUsqfYfk +8XlyF9VIfWRfEwzS6MF1pEzLnghRxTAmjrFgK+sxD9wk+S5Mdgw3nbED9DVFH2Hs +RGKm/t9wkvrYOX6yRQqw6uRvU/5cibMjcyzKB/VQMwk4p4FwSUgBv88A5sTkKr2V +eYdEm34hg2TZVkipPMBiyTyBLXs8D/9oALtnczg4xlTRSjDUvqoXL5haqY4QK2Pv +mNwna6ACkwLmSuMe29UQ8IX2PUB4R5Etni5czyiKGxZLm+4NAhuEwWFNEzCyImPc +087gHGU1zx+qVSlajqMJ/9ZXYjbt7WiWdhOTGEv4VMn8dHhRUs32 diff --git a/deps/openssl/openssl/test/ocsp-tests/WRID_ND2.ors b/deps/openssl/openssl/test/ocsp-tests/WRID_ND2.ors new file mode 100644 index 00000000000000..251f0df7cb75c0 --- /dev/null +++ b/deps/openssl/openssl/test/ocsp-tests/WRID_ND2.ors @@ -0,0 +1,10 @@ +MIIB0woBAKCCAcwwggHIBgkrBgEFBQcwAQEEggG5MIIBtTCBnqIWBBQMWOWLxkwV +N6RAqTCpIb5HNlpW/xgPMjAxMjEwMTAyMzAzMTlaMHMwcTBJMAkGBSsOAwIaBQAE +FOy+ZAvtiWulchtVZmfKU1ZI9ewTBBQLWOWLxkwVN6RAqTCpIb5HNlpW/wIQEaO0 +0OyNt3+doM1dLVEvQoAAGA8yMDEyMTAxMDIzMDMxOVqgERgPMjAxMjEwMTQyMzAz +MTlaMA0GCSqGSIb3DQEBBQUAA4IBAQCHn2nGfEUX/EJruMkTgh7GgB0u9cpAepaD +sPv9gtl3KLUZyR+NbGMIa5/bpoJp0yg1z5VL6CLMusy3AF6Cn2fyaioDxG+yc+gA +PcPFdEqiIMr+TP8s7qcEiE6WZddSSCqCn90VZSCWkpDhnCjDRwJLBBPU3803fdMz +oguvyr7y6Koxik8X/iUe8EpSzAvmm4GZL3veTI+x7IezJSrhCS9zM0ZHjySjoDxC ++ljGH0EuWPTmFEqZVGIq3cuahIYzKItUbYnXU6ipi/2p42qbsFeok7eEN0EYsY1a +vRATHGRmU7Q5HLCq4rQtZC1cis52Mvc9x1W4z/Gt5A3FtgElXXNA diff --git a/deps/openssl/openssl/test/ocsp-tests/WRID_ND3.ors b/deps/openssl/openssl/test/ocsp-tests/WRID_ND3.ors new file mode 100644 index 00000000000000..19641f53346143 --- /dev/null +++ b/deps/openssl/openssl/test/ocsp-tests/WRID_ND3.ors @@ -0,0 +1,10 @@ +MIIB1AoBAKCCAc0wggHJBgkrBgEFBQcwAQEEggG6MIIBtjCBn6IWBBSuvZh6NLQm +9/rEJlTvA73gJMtUGhgPMjAxMjEwMTAxMzA3NDZaMHQwcjBKMAkGBSsOAwIaBQAE +FHyxZlScq9tE7mImFq30ZXv3etWUBBStvZh6NLQm9/rEJlTvA73gJMtUGgIRAKcN +bJWejX5BTb8DmevkCauAABgPMjAxMjEwMTAxMzA3NDZaoBEYDzIwMTIxMDE0MTMw +NzQ2WjANBgkqhkiG9w0BAQUFAAOCAQEAA70+GYJoFuUBwIN9KHMqmOOtnmoLBBlm +HL2Su70ZEqSmL4zTt3iHY3m2YaNYSPphgDlQ4lY8zGAkCSrZ3ulpJun3RRy+gD29 +0ks155tChMbYNZrFm46vKWabBjh2p+623daymlcbgizi5Z+P4oJL68VrOqh+DArE +MpHH16BTGaF+bAjzTRSbS90xUReqwnnEpRBrmcQVo4uKpSkbyrx7iMLqsJ2vGpgh +xqj1kNPT9g3+gegmdU9QpFV0l9ZV8X/f0uz5nT4I0NL81d/KDHGx2rd+bftLODeL +ZAWAzFbr5B5EMqPGoh/SQXpcuVOqMHjh8fi8PBXBcitlIFzdDKXDvA== diff --git a/deps/openssl/openssl/test/ocsp-tests/WSNIC_D1_Issuer_ICA.pem b/deps/openssl/openssl/test/ocsp-tests/WSNIC_D1_Issuer_ICA.pem new file mode 100644 index 00000000000000..3f1c05377362fc --- /dev/null +++ b/deps/openssl/openssl/test/ocsp-tests/WSNIC_D1_Issuer_ICA.pem @@ -0,0 +1,27 @@ +-----BEGIN CERTIFICATE----- +MIIEhjCCA26gAwIBAgILBAAAAAABL07hXdQwDQYJKoZIhvcNAQEFBQAwTDEgMB4G +A1UECxMXR2xvYmFsU2lnbiBSb290IENBIC0gUjIxEzARBgNVBAoTCkdsb2JhbFNp +Z24xEzARBgNVBAMTCkdsb2JhbFNpZ24wHhcNMTEwNDEzMTAwMDAwWhcNMjIwNDEz +MTAwMDAwWjBZMQswCQYDVQQGEwJVUzEZMBcGA1UEChMQR2xvYmFsU2lnbiBudi1z +YTEvMC0GA1UEAxMmR2xvYmFsU2lnbiBFeHRlbmRlZCBWYWxpZGF0aW9uIENBIC0g +RzIwggEiMA0GCSqGSIb3DQEBAQUAA4IBDwAwggEKAoIBAQDNoUbMUpq4pbR/WNnN +2EugcgyXW6aIIMO5PUbc0FxSMPb6WU+FX7DbiLSpXysjSKyr9ZJ4FLYyD/tcaoVb +AJDgu2X1WvlPZ37HbCnsk8ArysRe2LDb1r4/mwvAj6ldrvcAAqT8umYROHf+IyAl +VRDFvYK5TLFoxuJwe4NcE2fBofN8C6iZmtDimyUxyCuNQPZSY7GgrVou9Xk2bTUs +Dt0F5NDiB0i3KF4r1VjVbNAMoQFGAVqPxq9kx1UBXeHRxmxQJaAFrQCrDI1la93r +wnJUyQ88ABeHIu/buYZ4FlGud9mmKE3zWI2DZ7k0JZscUYBR84OSaqOuR5rW5Isb +wO2xAgMBAAGjggFaMIIBVjAOBgNVHQ8BAf8EBAMCAQYwEgYDVR0TAQH/BAgwBgEB +/wIBADAdBgNVHQ4EFgQUsLBK/Rx1KPgcYaoT9vrBkD1rFqMwRwYDVR0gBEAwPjA8 +BgRVHSAAMDQwMgYIKwYBBQUHAgEWJmh0dHBzOi8vd3d3Lmdsb2JhbHNpZ24uY29t +L3JlcG9zaXRvcnkvMDYGA1UdHwQvMC0wK6ApoCeGJWh0dHA6Ly9jcmwuZ2xvYmFs +c2lnbi5uZXQvcm9vdC1yMi5jcmwwRAYIKwYBBQUHAQEEODA2MDQGCCsGAQUFBzAB +hihodHRwOi8vb2NzcC5nbG9iYWxzaWduLmNvbS9FeHRlbmRlZFNTTENBMCkGA1Ud +JQQiMCAGCCsGAQUFBwMBBggrBgEFBQcDAgYKKwYBBAGCNwoDAzAfBgNVHSMEGDAW +gBSb4gdXZxwewGoG3lm0mi3f3BmGLjANBgkqhkiG9w0BAQUFAAOCAQEAL0m28rZa +pJWrnlrpK4KbzJBrfHRFIOde2Mcj7ig1sTVlKqVR4FU/9oNntOQ2KbDa7JeVqYoF +o0X+Iy5SiLQfEICt0oufo1+oxetz3nmIQZgz7qdgGLFGyUAQB5yPClLJExoGbqCb +LTr2rk/no1E1KlsYBRLlUdy2NmLz4aQP++TPw5S/EauhWTEB8MxT7I9j12yW00gq +iiPtRVaoZkHqAblH7qFHDBTxI+Egc8p9UHxkOFejj0qcm+ltRc9Ea01gIEBxJbVG +qmwIft/I+shWKpLLg7h5CZctXqEBzgbttJfJBNxB7+BPNk3kQHNG7BESfIhbNCYl +TercGL7FG81kwA== +-----END CERTIFICATE----- diff --git a/deps/openssl/openssl/test/ocsp-tests/WSNIC_D2_Issuer_Root.pem b/deps/openssl/openssl/test/ocsp-tests/WSNIC_D2_Issuer_Root.pem new file mode 100644 index 00000000000000..af1b8b02a9223c --- /dev/null +++ b/deps/openssl/openssl/test/ocsp-tests/WSNIC_D2_Issuer_Root.pem @@ -0,0 +1,21 @@ +-----BEGIN CERTIFICATE----- +MIIDdTCCAl2gAwIBAgILBAAAAAABFUtaw5QwDQYJKoZIhvcNAQEFBQAwVzELMAkG +A1UEBhMCVVMxGTAXBgNVBAoTEEdsb2JhbFNpZ24gbnYtc2ExEDAOBgNVBAsTB1Jv +b3QgQ0ExGzAZBgNVBAMTEkdsb2JhbFNpZ24gUm9vdCBDQTAeFw05ODA5MDExMjAw +MDBaFw0yODAxMjgxMjAwMDBaMFcxCzAJBgNVBAYTAlVTMRkwFwYDVQQKExBHbG9i +YWxTaWduIG52LXNhMRAwDgYDVQQLEwdSb290IENBMRswGQYDVQQDExJHbG9iYWxT +aWduIFJvb3QgQ0EwggEiMA0GCSqGSIb3DQEBAQUAA4IBDwAwggEKAoIBAQDaDuaZ +jc6j40+Kfvvxi4Mla+pIH/EqsLmVEQS98GPR4mdmzxzdzxtIK+6NiY6arymAZavp +xy0Sy6scTHAHoT0KMM0VjU/43dSMUBUc71DuxC73/OlS8pF94G3VNTCOXkNz8kHp +1Wrjsok6Vjk4bwY8iGlbKk3Fp1S4bInMm/k8yuX9ifUSPJJ4ltbcdG6TRGHRjcdG +snUOhugZitVtbNV4FpWi6cgKOOvyJBNPc1STE4U6G7weNLWLBYy5d4ux2x8gkasJ +U26Qzns3dLlwR5EiUWMWea6xrkEmCMgZK9FGqkjWZCrXgzT/LCrBbBlDSgeF59N8 +9iFo7+ryUp9/k5DPAgMBAAGjQjBAMA4GA1UdDwEB/wQEAwIBBjAPBgNVHRMBAf8E +BTADAQH/MB0GA1UdDgQWBBRge2YaRQ2XyolQL30EzTSo//z9SzANBgkqhkiG9w0B +AQUFAAOCAQEA1nPnfE920I2/7LqivjTFKDK1fPxsnCwrvQmeU79rXqoRSLblCKOz +yj1hTdNGCbM+w6DjY1Ub8rrvrTnhQ7k4o+YviiY776BQVvnGCv04zcQLcFGUl5gE +38NflNUVyRRBnMRddWQVDf9VMOyGj/8N7yy5Y0b2qvzfvGn9LhJIZJrglfCm7ymP +AbEVtQwdpf5pLGkkeB6zpxxxYu7KyJesF12KwvhHhm4qxFYxldBniYUr+WymXUad +DKqC5JlR3XC321Y9YeRq4VzW9v493kHMB65jUr9TU/Qr6cf9tveCX4XSQRjbgbME +HMUfpIBvFSDJ3gyICh3WZlXi/EjJKSZp4A== +-----END CERTIFICATE----- diff --git a/deps/openssl/openssl/test/ocsp-tests/WSNIC_D3_Issuer_Root.pem b/deps/openssl/openssl/test/ocsp-tests/WSNIC_D3_Issuer_Root.pem new file mode 100644 index 00000000000000..764797a277724c --- /dev/null +++ b/deps/openssl/openssl/test/ocsp-tests/WSNIC_D3_Issuer_Root.pem @@ -0,0 +1,41 @@ +-----BEGIN CERTIFICATE----- +MIIHPTCCBSWgAwIBAgIBADANBgkqhkiG9w0BAQQFADB5MRAwDgYDVQQKEwdUZXN0 +IENBMR4wHAYDVQQLExVodHRwOi8vd3d3LmNhY2VydC5vcmcxIjAgBgNVBAMTGUNB +IENlcnQgU2lnbmluZyBBdXRob3JpdHkxITAfBgkqhkiG9w0BCQEWEnN1cHBvcnRA +Y2FjZXJ0Lm9yZzAeFw0wMzAzMzAxMjI5NDlaFw0zMzAzMjkxMjI5NDlaMHkxEDAO +BgNVBAoTB1Rlc3QgQ0ExHjAcBgNVBAsTFWh0dHA6Ly93d3cuY2FjZXJ0Lm9yZzEi +MCAGA1UEAxMZQ0EgQ2VydCBTaWduaW5nIEF1dGhvcml0eTEhMB8GCSqGSIb3DQEJ +ARYSc3VwcG9ydEBjYWNlcnQub3JnMIICIjANBgkqhkiG9w0BAQEFAAOCAg8AMIIC +CgKCAgEAziLA4kZ97DYoB1CW8qAzQIxL8TtmPzHlawI229Z89vGIj053NgVBlfkJ +8BLPRoZzYLdufujAWGSuzbCtRRcMY/pnCujW0r8+55jE8Ez64AO7NV1sId6eINm6 +zWYyN3L69wj1x81YyY7nDl7qPv4coRQKFWyGhFtkZip6qUtTefWIonvuLwphK42y +fk1WpRPs6tqSnqxEQR5YYGUFZvjARL3LlPdCfgv3ZWiYUQXw8wWRBB0bF4LsyFe7 +w2t6iPGwcswlWyCR7BYCEo8y6RcYSNDHBS4CMEK4JZwFaz+qOqfrU0j36NK2B5jc +G8Y0f3/JHIJ6BVgrCFvzOKKrF11myZjXnhCLotLddJr3cQxyYN/Nb5gznZY0dj4k +epKwDpUeb+agRThHqtdB7Uq3EvbXG4OKDy7YCbZZ16oE/9KTfWgu3YtLq1i6L43q +laegw1SJpfvbi1EinbLDvhG+LJGGi5Z4rSDTii8aP8bQUWWHIbEZAWV/RRyH9XzQ +QUxPKZgh/TMfdQwEUfoZd9vUFBzugcMd9Zi3aQaRIt0AUMyBMawSB3s42mhb5ivU +fslfrejrckzzAeVLIL+aplfKkQABi6F1ITe1Yw1nPkZPcCBnzsXWWdsC4PDSy826 +YreQQejdIOQpvGQpQsgi3Hia/0PsmBsJUUtaWsJx8cTLc6nloQsCAwEAAaOCAc4w +ggHKMB0GA1UdDgQWBBQWtTIb1Mfz4OaO873SsDrusjkY0TCBowYDVR0jBIGbMIGY +gBQWtTIb1Mfz4OaO873SsDrusjkY0aF9pHsweTEQMA4GA1UEChMHUm9vdCBDQTEe +MBwGA1UECxMVaHR0cDovL3d3dy5jYWNlcnQub3JnMSIwIAYDVQQDExlDQSBDZXJ0 +IFNpZ25pbmcgQXV0aG9yaXR5MSEwHwYJKoZIhvcNAQkBFhJzdXBwb3J0QGNhY2Vy +dC5vcmeCAQAwDwYDVR0TAQH/BAUwAwEB/zAyBgNVHR8EKzApMCegJaAjhiFodHRw +czovL3d3dy5jYWNlcnQub3JnL3Jldm9rZS5jcmwwMAYJYIZIAYb4QgEEBCMWIWh0 +dHBzOi8vd3d3LmNhY2VydC5vcmcvcmV2b2tlLmNybDA0BglghkgBhvhCAQgEJxYl +aHR0cDovL3d3dy5jYWNlcnQub3JnL2luZGV4LnBocD9pZD0xMDBWBglghkgBhvhC +AQ0ESRZHVG8gZ2V0IHlvdXIgb3duIGNlcnRpZmljYXRlIGZvciBGUkVFIGhlYWQg +b3ZlciB0byBodHRwOi8vd3d3LmNhY2VydC5vcmcwDQYJKoZIhvcNAQEEBQADggIB +ACjH7pyCArpcgBLKNQodgW+JapnM8mgPf6fhjViVPr3yBsOQWqy1YPaZQwGjiHCc +nWKdpIevZ1gNMDY75q1I08t0AoZxPuIrA2jxNGJARjtT6ij0rPtmlVOKTV39O9lg +18p5aTuxZZKmxoGCXJzN600BiqXfEVWqFcofN8CCmHBh22p8lqOOLlQ+TyGpkO/c +gr/c6EWtTZBzCDyUZbAEmXZ/4rzCahWqlwQ3JNgelE5tDlG+1sSPypZt90Pf6DBl +Jzt7u0NDY8RD97LsaMzhGY4i+5jhe1o+ATc7iwiwovOVThrLm82asduycPAtStvY +sONvRUgzEv/+PDIqVPfE94rwiCPCR/5kenHA0R6mY7AHfqQv0wGP3J8rtsYIqQ+T +SCX8Ev2fQtzzxD72V7DX3WnRBnc0CkvSyqD/HMaMyRa+xMwyN2hzXwj7UfdJUzYF +CpUCTPJ5GhD22Dp1nPMd8aINcGeGG7MW9S/lpOt5hvk9C8JzC6WZrG/8Z7jlLwum +GCSNe9FINSkYQKyTYOGWhlC0elnYjyELn8+CkcY7v2vcB5G5l1YjqrZslMZIBjzk +zk6q5PYvCdxTby78dOs6Y5nCpqyJvKeyRKANihDjbPIky/qbn3BHLt4Ui9SyIAmW +omTxJBzcoTWcFbLUvFUufQb1nA5V9FrWk9p2rSVzTMVD +-----END CERTIFICATE----- diff --git a/deps/openssl/openssl/test/ocsp-tests/WSNIC_ND1_Issuer_ICA.pem b/deps/openssl/openssl/test/ocsp-tests/WSNIC_ND1_Issuer_ICA.pem new file mode 100644 index 00000000000000..06b69087298a7d --- /dev/null +++ b/deps/openssl/openssl/test/ocsp-tests/WSNIC_ND1_Issuer_ICA.pem @@ -0,0 +1,29 @@ +-----BEGIN CERTIFICATE----- +MIIFBjCCA+6gAwIBAgIQEaO00OyNt3+doM1dLVEvQjANBgkqhkiG9w0BAQUFADCB +gTELMAkGA1UEBhMCR0IxGzAZBgNVBAgTEkdyZWF0ZXIgTWFuY2hlc3RlcjEQMA4G +A1UEBxMHU2FsZm9yZDEaMBgGA1UEChMRQ09NT0RPIENBIExpbWl0ZWQxJzAlBgNV +BAMTHkNPTU9ETyBDZXJ0aWZpY2F0aW9uIEF1dGhvcml0eTAeFw0xMDA1MjQwMDAw +MDBaFw0yMDA1MzAxMDQ4MzhaMIGOMQswCQYDVQQGEwJVUzEbMBkGA1UECBMSR3Jl +YXRlciBNYW5jaGVzdGVyMRAwDgYDVQQHEwdTYWxmb3JkMRowGAYDVQQKExFDT01P +RE8gQ0EgTGltaXRlZDE0MDIGA1UEAxMrQ09NT0RPIEV4dGVuZGVkIFZhbGlkYXRp +b24gU2VjdXJlIFNlcnZlciBDQTCCASIwDQYJKoZIhvcNAQEBBQADggEPADCCAQoC +ggEBAMxKljPNJY1n7iiWN4dG8PYEooR/U6qW5h+xAhxu7X0h1Nc8HqLYaS+ot/Wi +7WRYZOFEZTZJQSABjTsT4gjzDPJXOZM3txyTRIOOvy3xoQV12m7ue28b6naDKHRK +HCvT9cQDcpOvhs4JjDx11MkKL3Lzrb0OMDyEoXMfAyUUpY/D1vS15N2GevUZumjy +hVSiMBHK0ZLLO3QGEqA3q2rYVBHfbJoWlLm0p2XGdC0x801S6VVRn8s+oo12mHDS +b6ZlRS8bhbtbbfnywARmE4R6nc4n2PREnr+svpnba0/bWCGwiSe0jzLWS15ykV7f +BZ3ZSS/0tm9QH3XLgJ3m0+TR8tMCAwEAAaOCAWkwggFlMB8GA1UdIwQYMBaAFAtY +5YvGTBU3pECpMKkhvkc2Wlb/MB0GA1UdDgQWBBSIRFH/UCppXi2I9CG62Qzyzsvq +fDAOBgNVHQ8BAf8EBAMCAQYwEgYDVR0TAQH/BAgwBgEB/wIBADA+BgNVHSAENzA1 +MDMGBFUdIAAwKzApBggrBgEFBQcCARYdaHR0cHM6Ly9zZWN1cmUuY29tb2RvLmNv +bS9DUFMwSQYDVR0fBEIwQDA+oDygOoY4aHR0cDovL2NybC5jb21vZG9jYS5jb20v +Q09NT0RPQ2VydGlmaWNhdGlvbkF1dGhvcml0eS5jcmwwdAYIKwYBBQUHAQEEaDBm +MD4GCCsGAQUFBzAChjJodHRwOi8vY3J0LmNvbW9kb2NhLmNvbS9DT01PRE9BZGRU +cnVzdFNlcnZlckNBLmNydDAkBggrBgEFBQcwAYYYaHR0cDovL29jc3AuY29tb2Rv +Y2EuY29tMA0GCSqGSIb3DQEBBQUAA4IBAQCaQ7+vpHJezX1vf/T8PYy7cOYe3QT9 +P9ydn7+JdpvyhjH8f7PtKpFTLOKqsOPILHH3FYojHPFpLoH7sbxiC6saVBzZIl40 +TKX2Iw9dej3bQ81pfhc3Us1TocIR1FN4J2TViUFNFlW7kMvw2OTd3dMJZEgo/zIj +hC+Me1UvzymINzR4DzOq/7fylqSbRIC1vmxWVKukgZ4lGChUOn8sY89ZIIwYazgs +tN3t40DeDDYlV5rA0WCeXgNol64aO+pF11GZSe5EWVYLXrGPaOqKnsrSyaADfnAl +9DLJTlCDh6I0SD1PNXf82Ijq9n0ezkO21cJqfjhmY03n7jLvDyToKmf6 +-----END CERTIFICATE----- diff --git a/deps/openssl/openssl/test/ocsp-tests/WSNIC_ND2_Issuer_Root.pem b/deps/openssl/openssl/test/ocsp-tests/WSNIC_ND2_Issuer_Root.pem new file mode 100644 index 00000000000000..1b46fcfb6e7d8a --- /dev/null +++ b/deps/openssl/openssl/test/ocsp-tests/WSNIC_ND2_Issuer_Root.pem @@ -0,0 +1,23 @@ +-----BEGIN CERTIFICATE----- +MIID0DCCArigAwIBAgIQIKTEf93f4cdTYwcTiHdgEjANBgkqhkiG9w0BAQUFADCB +gTELMAkGA1UEBhMCR0IxGzAZBgNVBAgTEkdyZWF0ZXIgTWFuY2hlc3RlcjEQMA4G +A1UEBxMHU2FsZm9yZDEaMBgGA1UEChMRQ09NT0RPIENBIExpbWl0ZWQxJzAlBgNV +BAMTHkNPTU9ETyBDZXJ0aWZpY2F0aW9uIEF1dGhvcml0eTAeFw0xMTAxMDEwMDAw +MDBaFw0zMDEyMzEyMzU5NTlaMIGBMQswCQYDVQQGEwJVUzEbMBkGA1UECBMSR3Jl +YXRlciBNYW5jaGVzdGVyMRAwDgYDVQQHEwdTYWxmb3JkMRowGAYDVQQKExFDT01P +RE8gQ0EgTGltaXRlZDEnMCUGA1UEAxMeQ09NT0RPIENlcnRpZmljYXRpb24gQXV0 +aG9yaXR5MIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEA0ECLi3LjkRv3 +UcEbVASY06m/weaKXTuH+7uIzg3jLz8GlvCiKVCZrts7oVewdFFxze1CkU1B/qnI +2GqGd0S7WWaXUF601CxwRM/aN5VCaTwwxHGzUvAhTaHYujl8HJ6jJJ3ygxaYqhZ8 +Q5sVW7euNJH+1GImGEaaP+vB+fGQV+useg2L23IwambV4EajcNxo2f8ESIl33rXp ++2dtQem8Ob0y2WIC8bGoPW43nOIv4tOiJovGuFVDiOEjPqXSJDlqR6sA1KGzqSX+ +DT+nHbrTUcELpNqsOO9VUCQFZUaTNE8tja3G1CEZ0o7KBWFxB3NH5YoZEr0ETc5O +nKVIrLsm9wIDAQABo0IwQDAdBgNVHQ4EFgQUC1jli8ZMFTekQKkwqSG+RzZaVv8w +DgYDVR0PAQH/BAQDAgEGMA8GA1UdEwEB/wQFMAMBAf8wDQYJKoZIhvcNAQEFBQAD +ggEBAC/JxBwHO89hAgCx2SFRdXIDMLDEFh9sAIsQrK/xR9SuEDwMGvjUk2ysEDd8 +t6aDZK3N3w6HM503sMZ7OHKx8xoOo/lVem0DZgMXlUrxsXrfViEGQo+x06iF3u6X +HWLrp+cxEmbDD6ZLLkGC9/3JG6gbr+48zuOcrigHoSybJMIPIyaDMouGDx8rEkYl +Fo92kANr3ryqImhrjKGsKxE5pttwwn1y6TPn/CbxdFqR5p2ErPioBhlG5qfpqjQi +pKGfeq23sqSaM4hxAjwu1nqyH6LKwN0vEJT9s4yEIHlG1QXUEOTS22RPuFvuG8Ug +R1uUq27UlTMdphVx8fiUylQ5PsE= +-----END CERTIFICATE----- diff --git a/deps/openssl/openssl/test/ocsp-tests/WSNIC_ND3_Issuer_Root.pem b/deps/openssl/openssl/test/ocsp-tests/WSNIC_ND3_Issuer_Root.pem new file mode 100644 index 00000000000000..4d1f45479c7f4f --- /dev/null +++ b/deps/openssl/openssl/test/ocsp-tests/WSNIC_ND3_Issuer_Root.pem @@ -0,0 +1,25 @@ +-----BEGIN CERTIFICATE----- +MIIENjCCAx6gAwIBAgIBATANBgkqhkiG9w0BAQUFADBvMQswCQYDVQQGEwJTRTEU +MBIGA1UEChMLQWRkVHJ1c3QgQUIxJjAkBgNVBAsTHUFkZFRydXN0IEV4dGVybmFs +IFRUUCBOZXR3b3JrMSIwIAYDVQQDExlBZGRUcnVzdCBFeHRlcm5hbCBDQSBSb290 +MB4XDTAwMDUzMDEwNDgzOFoXDTIwMDUzMDEwNDgzOFowbzELMAkGA1UEBhMCVVMx +FDASBgNVBAoTC0FkZFRydXN0IEFCMSYwJAYDVQQLEx1BZGRUcnVzdCBFeHRlcm5h +bCBUVFAgTmV0d29yazEiMCAGA1UEAxMZQWRkVHJ1c3QgRXh0ZXJuYWwgQ0EgUm9v +dDCCASIwDQYJKoZIhvcNAQEBBQADggEPADCCAQoCggEBALf3GjPm8gAELTngTlvt +H7xsD821+iO2zt6bETOXpClMfZOfvUq8k+0DGuOPz+VtUFrWlymUWoCwSXrbLpX9 +uMq/NzgtHj6RQa1wVsfwTz/oMp50ysiQVOnGXw94nZpAPA6sYapeFI+eh6FqUNzX +mk6vBbOmcZSccbNQYArHE504B4YCqOmoaSYYkKtMsE8jqzpPhNjfzp/haW+710LX +a0Tkx63ubUFfclpxCDezeWWkWaCUN/cALw3CknLa0Dhy2xSoRcRdKn23tNbE7qzN +E0S3ySvdQwAl+mG5aWpYIxG3pzOPVnVZ9c0p10a3CitlttNCbxWyuHv77+ldU9U0 +WicCAwEAAaOB3DCB2TAdBgNVHQ4EFgQUrb2YejS0Jvf6xCZU7wO94CTLVBowCwYD +VR0PBAQDAgEGMA8GA1UdEwEB/wQFMAMBAf8wgZkGA1UdIwSBkTCBjoAUrb2YejS0 +Jvf6xCZU7wO94CTLVBqhc6RxMG8xCzAJBgNVBAYTAlNFMRQwEgYDVQQKEwtBZGRU +cnVzdCBBQjEmMCQGA1UECxMdQWRkVHJ1c3QgRXh0ZXJuYWwgVFRQIE5ldHdvcmsx +IjAgBgNVBAMTGUFkZFRydXN0IEV4dGVybmFsIENBIFJvb3SCAQEwDQYJKoZIhvcN +AQEFBQADggEBALCb4IUlwtYj4g+WBpKdQZic2YR5gdkeWxQHIzZlj7DYd7usQWxH +YINRsPkyPef89iYTx4AWpb9a/IfPeHmJIZriTAcKhjW88t5RxNKWt9x+Tu5w/Rw5 +6wwCURQtjr0W4MHfRnXnJK3s9EK0hZNwEGe6nQY1ShjTK3rMUUKhemPR5ruhxSvC +Nr4TDea9Y355e6cJDUCrat2PisP29owaQgVR1EX1n6diIWgVIEM8med8vSTYqZEX +c4g/VhsxOBi0cQ+azcgOno4uG+GMmIPLHzHxREzGBHNJdmAPx/i9F4BrLunMTA5a +mnkPIAou1Z5jJh5VkpTYghdae9C8x49OhgQ= +-----END CERTIFICATE----- diff --git a/deps/openssl/openssl/test/serverinfo.pem b/deps/openssl/openssl/test/serverinfo.pem new file mode 100644 index 00000000000000..cd3020e3b8c464 --- /dev/null +++ b/deps/openssl/openssl/test/serverinfo.pem @@ -0,0 +1,16 @@ +-----BEGIN SERVERINFO FOR CT----- +ABIAZMevsj4TC5rgwjZNciLGwh15YXoIK9t5aypGJIG4QzyMowmwwDdqxudkUcGa +DvuqlYL7psO5j4/BIHTe677CAZBBH3Ho2NOM5q1zub4AbfUMlKeufuQgeQ2Tj1oe +LJLRzrwDnPs= +-----END SERVERINFO FOR CT----- + +-----BEGIN SERVERINFO FOR TACK----- +8wABTwFMh1Dz+3W6zULWJKjav5TNaFEXL1h98YtCXeyZnORYg4mbKpxH5CMbjpgx +To3amSqUPF4Ntjc/i9+poutxebYkbgAAAkMcxb8+RaM9YEywaJEGViKJJmpYG/gJ +HgfGaefI9kKbXSDmP9ntg8dLvDzuyYw14ktM2850Q9WvBiltpekilZxVuT2bFtfs +cmS++SAK9YOM8RrKhL1TLmrktoBEJZ6z5GTukYdQ8/t1us1C1iSo2r+UzWhRFy9Y +ffGLQl3smZzkWIOJmyqcR+QjG46YMU6N2pkqlDxeDbY3P4vfqaLrcXm2JG4AAAGN +xXQJPbdniI9rEydVXb1Cu1yT/t7FBEx6hLxuoypXjCI1wCGpXsd8zEnloR0Ank5h +VO/874E/BZlItzSPpcmDKl5Def6BrAJTErQlE9npo52S05YWORxJw1+VYBdqQ09A +x3wA +-----END SERVERINFO FOR TACK----- diff --git a/deps/openssl/openssl/test/smime-certs/ca.cnf b/deps/openssl/openssl/test/smime-certs/ca.cnf new file mode 100644 index 00000000000000..5e8b108654b331 --- /dev/null +++ b/deps/openssl/openssl/test/smime-certs/ca.cnf @@ -0,0 +1,66 @@ +# +# OpenSSL example configuration file for automated certificate creation. +# + +# This definition stops the following lines choking if HOME or CN +# is undefined. +HOME = . +RANDFILE = $ENV::HOME/.rnd +CN = "Not Defined" +default_ca = ca + +#################################################################### +[ req ] +default_bits = 2048 +default_keyfile = privkey.pem +# Don't prompt for fields: use those in section directly +prompt = no +distinguished_name = req_distinguished_name +x509_extensions = v3_ca # The extentions to add to the self signed cert +string_mask = utf8only + +# req_extensions = v3_req # The extensions to add to a certificate request + +[ req_distinguished_name ] +countryName = UK + +organizationName = OpenSSL Group +# Take CN from environment so it can come from a script. +commonName = $ENV::CN + +[ usr_cert ] + +# These extensions are added when 'ca' signs a request for an end entity +# certificate + +basicConstraints=critical, CA:FALSE +keyUsage=critical, nonRepudiation, digitalSignature, keyEncipherment + +# PKIX recommendations harmless if included in all certificates. +subjectKeyIdentifier=hash +authorityKeyIdentifier=keyid + +[ dh_cert ] + +# These extensions are added when 'ca' signs a request for an end entity +# DH certificate + +basicConstraints=critical, CA:FALSE +keyUsage=critical, keyAgreement + +# PKIX recommendations harmless if included in all certificates. +subjectKeyIdentifier=hash +authorityKeyIdentifier=keyid + +[ v3_ca ] + + +# Extensions for a typical CA + +# PKIX recommendation. + +subjectKeyIdentifier=hash +authorityKeyIdentifier=keyid:always +basicConstraints = critical,CA:true +keyUsage = critical, cRLSign, keyCertSign + diff --git a/deps/openssl/openssl/test/smime-certs/mksmime-certs.sh b/deps/openssl/openssl/test/smime-certs/mksmime-certs.sh new file mode 100644 index 00000000000000..f01f66427c3741 --- /dev/null +++ b/deps/openssl/openssl/test/smime-certs/mksmime-certs.sh @@ -0,0 +1,74 @@ +#!/bin/sh + +# Utility to recreate S/MIME certificates + +OPENSSL=../../apps/openssl +OPENSSL_CONF=./ca.cnf +export OPENSSL_CONF + +# Root CA: create certificate directly +CN="Test S/MIME RSA Root" $OPENSSL req -config ca.cnf -x509 -nodes \ + -keyout smroot.pem -out smroot.pem -newkey rsa:2048 -days 3650 + +# EE RSA certificates: create request first +CN="Test S/MIME EE RSA #1" $OPENSSL req -config ca.cnf -nodes \ + -keyout smrsa1.pem -out req.pem -newkey rsa:2048 +# Sign request: end entity extensions +$OPENSSL x509 -req -in req.pem -CA smroot.pem -days 3600 \ + -extfile ca.cnf -extensions usr_cert -CAcreateserial >>smrsa1.pem + +CN="Test S/MIME EE RSA #2" $OPENSSL req -config ca.cnf -nodes \ + -keyout smrsa2.pem -out req.pem -newkey rsa:2048 +$OPENSSL x509 -req -in req.pem -CA smroot.pem -days 3600 \ + -extfile ca.cnf -extensions usr_cert -CAcreateserial >>smrsa2.pem + +CN="Test S/MIME EE RSA #3" $OPENSSL req -config ca.cnf -nodes \ + -keyout smrsa3.pem -out req.pem -newkey rsa:2048 +$OPENSSL x509 -req -in req.pem -CA smroot.pem -days 3600 \ + -extfile ca.cnf -extensions usr_cert -CAcreateserial >>smrsa3.pem + +# Create DSA parameters + +$OPENSSL dsaparam -out dsap.pem 2048 + +CN="Test S/MIME EE DSA #1" $OPENSSL req -config ca.cnf -nodes \ + -keyout smdsa1.pem -out req.pem -newkey dsa:dsap.pem +$OPENSSL x509 -req -in req.pem -CA smroot.pem -days 3600 \ + -extfile ca.cnf -extensions usr_cert -CAcreateserial >>smdsa1.pem +CN="Test S/MIME EE DSA #2" $OPENSSL req -config ca.cnf -nodes \ + -keyout smdsa2.pem -out req.pem -newkey dsa:dsap.pem +$OPENSSL x509 -req -in req.pem -CA smroot.pem -days 3600 \ + -extfile ca.cnf -extensions usr_cert -CAcreateserial >>smdsa2.pem +CN="Test S/MIME EE DSA #3" $OPENSSL req -config ca.cnf -nodes \ + -keyout smdsa3.pem -out req.pem -newkey dsa:dsap.pem +$OPENSSL x509 -req -in req.pem -CA smroot.pem -days 3600 \ + -extfile ca.cnf -extensions usr_cert -CAcreateserial >>smdsa3.pem + +# Create EC parameters + +$OPENSSL ecparam -out ecp.pem -name P-256 +$OPENSSL ecparam -out ecp2.pem -name K-283 + +CN="Test S/MIME EE EC #1" $OPENSSL req -config ca.cnf -nodes \ + -keyout smec1.pem -out req.pem -newkey ec:ecp.pem +$OPENSSL x509 -req -in req.pem -CA smroot.pem -days 3600 \ + -extfile ca.cnf -extensions usr_cert -CAcreateserial >>smec1.pem +CN="Test S/MIME EE EC #2" $OPENSSL req -config ca.cnf -nodes \ + -keyout smec2.pem -out req.pem -newkey ec:ecp2.pem +$OPENSSL x509 -req -in req.pem -CA smroot.pem -days 3600 \ + -extfile ca.cnf -extensions usr_cert -CAcreateserial >>smec2.pem +# Create X9.42 DH parameters. +$OPENSSL genpkey -genparam -algorithm DH -pkeyopt dh_paramgen_type:2 \ + -out dhp.pem +# Generate X9.42 DH key. +$OPENSSL genpkey -paramfile dhp.pem -out smdh.pem +$OPENSSL pkey -pubout -in smdh.pem -out dhpub.pem +# Generate dummy request. +CN="Test S/MIME EE DH #1" $OPENSSL req -config ca.cnf -nodes \ + -keyout smtmp.pem -out req.pem -newkey rsa:2048 +# Sign request but force public key to DH +$OPENSSL x509 -req -in req.pem -CA smroot.pem -days 3600 \ + -force_pubkey dhpub.pem \ + -extfile ca.cnf -extensions usr_cert -CAcreateserial >>smdh.pem +# Remove temp files. +rm -f req.pem ecp.pem ecp2.pem dsap.pem dhp.pem dhpub.pem smtmp.pem smroot.srl diff --git a/deps/openssl/openssl/test/smime-certs/smdh.pem b/deps/openssl/openssl/test/smime-certs/smdh.pem new file mode 100644 index 00000000000000..f831b0713b9588 --- /dev/null +++ b/deps/openssl/openssl/test/smime-certs/smdh.pem @@ -0,0 +1,33 @@ +-----BEGIN PRIVATE KEY----- +MIIBSgIBADCCASsGByqGSM4+AgEwggEeAoGBANQMSgwEcnEZ31kZxa9Ef8qOK/AJ +9dMlsXMWVYnf/QevGdN/0Aei/j9a8QHG+CvvTm0DOEKhN9QUtABKsYZag865CA7B +mSdHjQuFqILtzA25sDJ+3+jk9vbss+56ETRll/wasJVLGbmmHNkBMvc1fC1d/sGF +cEn4zJnQvvFaeMgDAoGAaQD9ZvL8FYsJuNxN6qp5VfnfRqYvyi2PWSqtRKPGGC+V +thYg49PRjwPOcXzvOsdEOQ7iH9jTiSvnUdwSSEwYTZkSBuQXAgOMJAWOpoXyaRvh +atziBDoBnWS+/kX5RBhxvS0+em9yfRqAQleuGG+R1mEDihyJc8dWQQPT+O1l4oUC +FQCJlKsQZ0VBrWPGcUCNa54ZW6TH9QQWAhRR2NMZrQSfWthXDO8Lj5WZ34zQrA== +-----END PRIVATE KEY----- +-----BEGIN CERTIFICATE----- +MIID/zCCAuegAwIBAgIJANv1TSKgememMA0GCSqGSIb3DQEBBQUAMEQxCzAJBgNV +BAYTAlVLMRYwFAYDVQQKDA1PcGVuU1NMIEdyb3VwMR0wGwYDVQQDDBRUZXN0IFMv +TUlNRSBSU0EgUm9vdDAeFw0xMzA4MDIxNDQ5MjlaFw0yMzA2MTExNDQ5MjlaMEQx +CzAJBgNVBAYTAlVLMRYwFAYDVQQKDA1PcGVuU1NMIEdyb3VwMR0wGwYDVQQDDBRU +ZXN0IFMvTUlNRSBFRSBESCAjMTCCAbYwggErBgcqhkjOPgIBMIIBHgKBgQDUDEoM +BHJxGd9ZGcWvRH/KjivwCfXTJbFzFlWJ3/0HrxnTf9AHov4/WvEBxvgr705tAzhC +oTfUFLQASrGGWoPOuQgOwZknR40LhaiC7cwNubAyft/o5Pb27LPuehE0ZZf8GrCV +Sxm5phzZATL3NXwtXf7BhXBJ+MyZ0L7xWnjIAwKBgGkA/Wby/BWLCbjcTeqqeVX5 +30amL8otj1kqrUSjxhgvlbYWIOPT0Y8DznF87zrHRDkO4h/Y04kr51HcEkhMGE2Z +EgbkFwIDjCQFjqaF8mkb4Wrc4gQ6AZ1kvv5F+UQYcb0tPnpvcn0agEJXrhhvkdZh +A4ociXPHVkED0/jtZeKFAhUAiZSrEGdFQa1jxnFAjWueGVukx/UDgYQAAoGAL1ve +cgI2awBeJH8ULBhSQpdL224VUDxFPiXzt8Vu5VLnxPv0pfA5En+8VByTuV7u6RSw +3/78NuTyr/sTyN8YlB1AuXHdTJynA1ICte1xgD4j2ijlq+dv8goOAFt9xkvXx7LD +umJ/cCignXETcNGfMi8+0s0bpMZyoHRdce8DQ26jYDBeMAwGA1UdEwEB/wQCMAAw +DgYDVR0PAQH/BAQDAgXgMB0GA1UdDgQWBBQLWk1ffSXH8p3Bqrdjgi/6jzLnwDAf +BgNVHSMEGDAWgBTffl6IBSQzCN0igQKXzJq3sTMnMDANBgkqhkiG9w0BAQUFAAOC +AQEAWvJj79MW1/Wq3RIANgAhonsI1jufYqxTH+1M0RU0ZXHulgem77Le2Ls1bizi +0SbvfpTiiFGkbKonKtO2wvfqwwuptSg3omMI5IjAGxYbyv2KBzIpp1O1LTDk9RbD +48JMMF01gByi2+NLUQ1MYF+5RqyoRqcyp5x2+Om1GeIM4Q/GRuI4p4dybWy8iC+d +LeXQfR7HXfh+tAum+WzjfLJwbnWbHmPhTbKB01U4lBp6+r8BGHAtNdPjEHqap4/z +vVZVXti9ThZ20EhM+VFU3y2wyapeQjhQvw/A2YRES0Ik7BSj3hHfWH/CTbLVQnhu +Uj6tw18ExOYxqoEGixNLPA5qsQ== +-----END CERTIFICATE----- diff --git a/deps/openssl/openssl/test/smime-certs/smdsa1.pem b/deps/openssl/openssl/test/smime-certs/smdsa1.pem index d5677dbfbec48e..b424f6704ed948 100644 --- a/deps/openssl/openssl/test/smime-certs/smdsa1.pem +++ b/deps/openssl/openssl/test/smime-certs/smdsa1.pem @@ -1,34 +1,47 @@ ------BEGIN DSA PRIVATE KEY----- -MIIBuwIBAAKBgQDFJfsIPOIawMO5biw+AoYUhNVxReBOLQosU3Qv4B8krac0BNr3 -OjSGLh1wZxHqhlAE0QmasTaKojuk20nNWeFnczSz6vDl0IVJEhS8VYor5kt9gLqt -GcoAgsf4gRDIutJyQDaNn3IVY89uXUVIoexvQeLQDBCgQPC5O8rJdqBwtwIVAK2J -jt+dqk07eQUE59koYUEKyNorAoGBAI4IEpusf8G14kCHmRtnHXM2tG5EWJDmW6Qt -wjqvWp1GKUx5WFy1tVWR9nl5rL0Di+kNdENo+SkKj7h3uDulGOI6T0mQYbV2h1IK -+FMOGnOqvZ8eNTE2n4PGTo5puZ63LBm+QYrQsrNiUY4vakLFQ2rEK/SLwdsDFK4Z -SJCBQw5zAoGATQlPPF+OeU8nu3rsdXGDiZdJzOkuCce3KQfTABA9C+Dk4CVcvBdd -YRLGpnykumkNTO1sTO+4/Gphsuje1ujK9td4UEhdYqylCe5QjEMrszDlJtelDQF9 -C0yhdjKGTP0kxofLhsGckcuQvcKEKffT2pDDKJIy4vWQO0UyJl1vjLcCFG2uiGGx -9fMUZq1v0ePD4Wo0Xkxo ------END DSA PRIVATE KEY----- +-----BEGIN PRIVATE KEY----- +MIICZQIBADCCAjkGByqGSM44BAEwggIsAoIBAQCQfLlNdehPnTrGIMhw4rk0uua6 +k1nCG3zcyfXli17BdB2k0HBPaTA3a3ZHfOt1Awy0Uu0wZ3gdPr9z0I64hnJXIGou +zIanZ7nYRImHtX5JMFbXeyxo1Owd2Zs3oEk9nQUoUsMxvmYC/ghPL5Zx1pPxcHCO +wzWxoG4yZMjimXOc1/W7zvK/4/g/Cz9fItD3zdcydfgM/hK0/CeYQ21xfhqf4mjK +v9plnCcWgToGI+7H8VK80MFbkO2QKRz3vP1/TjK6PRm9sEeB5b10+SvGv2j2w+CC +0fXL4s6n7PtBlm/bww8xL1/Az8kwejUcII1Dc8uNwwISwGbwaGBvl7IHpm21AiEA +rodZi+nCKZdTL8IgCjX3n0DuhPRkVQPjz/B6VweLW9MCggEAfimkUNwnsGFp7mKM +zJKhHoQkMB1qJzyIHjDzQ/J1xjfoF6i27afw1/WKboND5eseZhlhA2TO5ZJB6nGx +DOE9lVQxYVml++cQj6foHh1TVJAgGl4mWuveW/Rz+NEhpK4zVeEsfMrbkBypPByy +xzF1Z49t568xdIo+e8jLI8FjEdXOIUg4ehB3NY6SL8r4oJ49j/sJWfHcDoWH/LK9 +ZaBF8NpflJe3F40S8RDvM8j2HC+y2Q4QyKk1DXGiH+7yQLGWzr3M73kC3UBnnH0h +Hxb7ISDCT7dCw/lH1nCbVFBOM0ASI26SSsFSXQrvD2kryRcTZ0KkyyhhoPODWpU+ +TQMsxQQjAiEAkolGvb/76X3vm5Ov09ezqyBYt9cdj/FLH7DyMkxO7X0= +-----END PRIVATE KEY----- -----BEGIN CERTIFICATE----- -MIIDpDCCAw2gAwIBAgIJAMtotfHYdEsWMA0GCSqGSIb3DQEBBQUAMEQxCzAJBgNV -BAYTAlVLMRYwFAYDVQQKEw1PcGVuU1NMIEdyb3VwMR0wGwYDVQQDExRUZXN0IFMv -TUlNRSBSU0EgUm9vdDAeFw0wODAyMjIxMzUzMDlaFw0xNjA1MTAxMzUzMDlaMEUx +MIIFkDCCBHigAwIBAgIJANk5lu6mSyBDMA0GCSqGSIb3DQEBBQUAMEQxCzAJBgNV +BAYTAlVLMRYwFAYDVQQKDA1PcGVuU1NMIEdyb3VwMR0wGwYDVQQDDBRUZXN0IFMv +TUlNRSBSU0EgUm9vdDAeFw0xMzA3MTcxNzI4MzFaFw0yMzA1MjYxNzI4MzFaMEUx CzAJBgNVBAYTAlVLMRYwFAYDVQQKDA1PcGVuU1NMIEdyb3VwMR4wHAYDVQQDDBVU -ZXN0IFMvTUlNRSBFRSBEU0EgIzEwggG3MIIBLAYHKoZIzjgEATCCAR8CgYEAxSX7 -CDziGsDDuW4sPgKGFITVcUXgTi0KLFN0L+AfJK2nNATa9zo0hi4dcGcR6oZQBNEJ -mrE2iqI7pNtJzVnhZ3M0s+rw5dCFSRIUvFWKK+ZLfYC6rRnKAILH+IEQyLrSckA2 -jZ9yFWPPbl1FSKHsb0Hi0AwQoEDwuTvKyXagcLcCFQCtiY7fnapNO3kFBOfZKGFB -CsjaKwKBgQCOCBKbrH/BteJAh5kbZx1zNrRuRFiQ5lukLcI6r1qdRilMeVhctbVV -kfZ5eay9A4vpDXRDaPkpCo+4d7g7pRjiOk9JkGG1dodSCvhTDhpzqr2fHjUxNp+D -xk6OabmetywZvkGK0LKzYlGOL2pCxUNqxCv0i8HbAxSuGUiQgUMOcwOBhAACgYBN -CU88X455Tye7eux1cYOJl0nM6S4Jx7cpB9MAED0L4OTgJVy8F11hEsamfKS6aQ1M -7WxM77j8amGy6N7W6Mr213hQSF1irKUJ7lCMQyuzMOUm16UNAX0LTKF2MoZM/STG -h8uGwZyRy5C9woQp99PakMMokjLi9ZA7RTImXW+Mt6OBgzCBgDAdBgNVHQ4EFgQU -4Qfbhpi5yqXaXuCLXj427mR25MkwHwYDVR0jBBgwFoAUE89Lp7uJLrM4Vxd2xput -aFvl7RcwDAYDVR0TAQH/BAIwADAOBgNVHQ8BAf8EBAMCBsAwIAYDVR0RBBkwF4EV -c21pbWVkc2ExQG9wZW5zc2wub3JnMA0GCSqGSIb3DQEBBQUAA4GBAFrdUzKK1pWO -kd02S423KUBc4GWWyiGlVoEO7WxVhHLJ8sm67X7OtJOwe0UGt+Nc5qLtyJYSirw8 -phjiTdNpQCTJ8+Kc56tWkJ6H7NAI4vTJtPL5BM/EmeYrVSU9JI9xhqpyKw9IBD+n -hRJ79W9FaiJRvaAOX+TkyTukJrxAWRyv +ZXN0IFMvTUlNRSBFRSBEU0EgIzEwggNGMIICOQYHKoZIzjgEATCCAiwCggEBAJB8 +uU116E+dOsYgyHDiuTS65rqTWcIbfNzJ9eWLXsF0HaTQcE9pMDdrdkd863UDDLRS +7TBneB0+v3PQjriGclcgai7MhqdnudhEiYe1fkkwVtd7LGjU7B3ZmzegST2dBShS +wzG+ZgL+CE8vlnHWk/FwcI7DNbGgbjJkyOKZc5zX9bvO8r/j+D8LP18i0PfN1zJ1 ++Az+ErT8J5hDbXF+Gp/iaMq/2mWcJxaBOgYj7sfxUrzQwVuQ7ZApHPe8/X9OMro9 +Gb2wR4HlvXT5K8a/aPbD4ILR9cvizqfs+0GWb9vDDzEvX8DPyTB6NRwgjUNzy43D +AhLAZvBoYG+XsgembbUCIQCuh1mL6cIpl1MvwiAKNfefQO6E9GRVA+PP8HpXB4tb +0wKCAQB+KaRQ3CewYWnuYozMkqEehCQwHWonPIgeMPND8nXGN+gXqLbtp/DX9Ypu +g0Pl6x5mGWEDZM7lkkHqcbEM4T2VVDFhWaX75xCPp+geHVNUkCAaXiZa695b9HP4 +0SGkrjNV4Sx8ytuQHKk8HLLHMXVnj23nrzF0ij57yMsjwWMR1c4hSDh6EHc1jpIv +yvignj2P+wlZ8dwOhYf8sr1loEXw2l+Ul7cXjRLxEO8zyPYcL7LZDhDIqTUNcaIf +7vJAsZbOvczveQLdQGecfSEfFvshIMJPt0LD+UfWcJtUUE4zQBIjbpJKwVJdCu8P +aSvJFxNnQqTLKGGg84NalT5NAyzFA4IBBQACggEAGXSQADbuRIZBjiQ6NikwZl+x +EDEffIE0RWbvwf1tfWxw4ZvanO/djyz5FePO0AIJDBCLUjr9D32nkmIG1Hu3dWgV +86knQsM6uFiMSzY9nkJGZOlH3w4NHLE78pk75xR1sg1MEZr4x/t+a/ea9Y4AXklE +DCcaHtpMGeAx3ZAqSKec+zQOOA73JWP1/gYHGdYyTQpQtwRTsh0Gi5mOOdpoJ0vp +O83xYbFCZ+ZZKX1RWOjJe2OQBRtw739q1nRga1VMLAT/LFSQsSE3IOp8hiWbjnit +1SE6q3II2a/aHZH/x4OzszfmtQfmerty3eQSq3bgajfxCsccnRjSbLeNiazRSKNg +MF4wDAYDVR0TAQH/BAIwADAOBgNVHQ8BAf8EBAMCBeAwHQYDVR0OBBYEFNHQYTOO +xaZ/N68OpxqjHKuatw6sMB8GA1UdIwQYMBaAFMmRUwpjexZbi71E8HaIqSTm5bZs +MA0GCSqGSIb3DQEBBQUAA4IBAQAAiLociMMXcLkO/uKjAjCIQMrsghrOrxn4ZGBx +d/mCTeqPxhcrX2UorwxVCKI2+Dmz5dTC2xKprtvkiIadJamJmxYYzeF1pgRriFN3 +MkmMMkTbe/ekSvSeMtHQ2nHDCAJIaA/k9akWfA0+26Ec25/JKMrl3LttllsJMK1z +Xj7TcQpAIWORKWSNxY/ezM34+9ABHDZB2waubFqS+irlZsn38aZRuUI0K67fuuIt +17vMUBqQpe2hfNAjpZ8dIpEdAGjQ6izV2uwP1lXbiaK9U4dvUqmwyCIPniX7Hpaf +0VnX0mEViXMT6vWZTjLBUv0oKmO7xBkWHIaaX6oyF32pK5AO -----END CERTIFICATE----- diff --git a/deps/openssl/openssl/test/smime-certs/smdsa2.pem b/deps/openssl/openssl/test/smime-certs/smdsa2.pem index ef86c115d7f986..648447fc89a1b2 100644 --- a/deps/openssl/openssl/test/smime-certs/smdsa2.pem +++ b/deps/openssl/openssl/test/smime-certs/smdsa2.pem @@ -1,34 +1,47 @@ ------BEGIN DSA PRIVATE KEY----- -MIIBvAIBAAKBgQDFJfsIPOIawMO5biw+AoYUhNVxReBOLQosU3Qv4B8krac0BNr3 -OjSGLh1wZxHqhlAE0QmasTaKojuk20nNWeFnczSz6vDl0IVJEhS8VYor5kt9gLqt -GcoAgsf4gRDIutJyQDaNn3IVY89uXUVIoexvQeLQDBCgQPC5O8rJdqBwtwIVAK2J -jt+dqk07eQUE59koYUEKyNorAoGBAI4IEpusf8G14kCHmRtnHXM2tG5EWJDmW6Qt -wjqvWp1GKUx5WFy1tVWR9nl5rL0Di+kNdENo+SkKj7h3uDulGOI6T0mQYbV2h1IK -+FMOGnOqvZ8eNTE2n4PGTo5puZ63LBm+QYrQsrNiUY4vakLFQ2rEK/SLwdsDFK4Z -SJCBQw5zAoGBAIPmO8BtJ+Yac58trrPwq9b/6VW3jQTWzTLWSH84/QQdqQa+Pz3v -It/+hHM0daNF5uls8ICsPL1aLXmRx0pHvIyb0aAzYae4T4Jv/COPDMTdKbA1uitJ -VbkGZrm+LIrs7I9lOkb4T0vI6kL/XdOCXY1469zsqCgJ/O2ibn6mq0nWAhR716o2 -Nf8SimTZYB0/CKje6M5ufA== ------END DSA PRIVATE KEY----- +-----BEGIN PRIVATE KEY----- +MIICZAIBADCCAjkGByqGSM44BAEwggIsAoIBAQCQfLlNdehPnTrGIMhw4rk0uua6 +k1nCG3zcyfXli17BdB2k0HBPaTA3a3ZHfOt1Awy0Uu0wZ3gdPr9z0I64hnJXIGou +zIanZ7nYRImHtX5JMFbXeyxo1Owd2Zs3oEk9nQUoUsMxvmYC/ghPL5Zx1pPxcHCO +wzWxoG4yZMjimXOc1/W7zvK/4/g/Cz9fItD3zdcydfgM/hK0/CeYQ21xfhqf4mjK +v9plnCcWgToGI+7H8VK80MFbkO2QKRz3vP1/TjK6PRm9sEeB5b10+SvGv2j2w+CC +0fXL4s6n7PtBlm/bww8xL1/Az8kwejUcII1Dc8uNwwISwGbwaGBvl7IHpm21AiEA +rodZi+nCKZdTL8IgCjX3n0DuhPRkVQPjz/B6VweLW9MCggEAfimkUNwnsGFp7mKM +zJKhHoQkMB1qJzyIHjDzQ/J1xjfoF6i27afw1/WKboND5eseZhlhA2TO5ZJB6nGx +DOE9lVQxYVml++cQj6foHh1TVJAgGl4mWuveW/Rz+NEhpK4zVeEsfMrbkBypPByy +xzF1Z49t568xdIo+e8jLI8FjEdXOIUg4ehB3NY6SL8r4oJ49j/sJWfHcDoWH/LK9 +ZaBF8NpflJe3F40S8RDvM8j2HC+y2Q4QyKk1DXGiH+7yQLGWzr3M73kC3UBnnH0h +Hxb7ISDCT7dCw/lH1nCbVFBOM0ASI26SSsFSXQrvD2kryRcTZ0KkyyhhoPODWpU+ +TQMsxQQiAiAdCUJ5n2Q9hIynN8BMpnRcdfH696BKejGx+2Mr2kfnnA== +-----END PRIVATE KEY----- -----BEGIN CERTIFICATE----- -MIIDpTCCAw6gAwIBAgIJAMtotfHYdEsXMA0GCSqGSIb3DQEBBQUAMEQxCzAJBgNV -BAYTAlVLMRYwFAYDVQQKEw1PcGVuU1NMIEdyb3VwMR0wGwYDVQQDExRUZXN0IFMv -TUlNRSBSU0EgUm9vdDAeFw0wODAyMjIxMzUzMDlaFw0xNjA1MTAxMzUzMDlaMEUx +MIIFkDCCBHigAwIBAgIJANk5lu6mSyBEMA0GCSqGSIb3DQEBBQUAMEQxCzAJBgNV +BAYTAlVLMRYwFAYDVQQKDA1PcGVuU1NMIEdyb3VwMR0wGwYDVQQDDBRUZXN0IFMv +TUlNRSBSU0EgUm9vdDAeFw0xMzA3MTcxNzI4MzFaFw0yMzA1MjYxNzI4MzFaMEUx CzAJBgNVBAYTAlVLMRYwFAYDVQQKDA1PcGVuU1NMIEdyb3VwMR4wHAYDVQQDDBVU -ZXN0IFMvTUlNRSBFRSBEU0EgIzIwggG4MIIBLAYHKoZIzjgEATCCAR8CgYEAxSX7 -CDziGsDDuW4sPgKGFITVcUXgTi0KLFN0L+AfJK2nNATa9zo0hi4dcGcR6oZQBNEJ -mrE2iqI7pNtJzVnhZ3M0s+rw5dCFSRIUvFWKK+ZLfYC6rRnKAILH+IEQyLrSckA2 -jZ9yFWPPbl1FSKHsb0Hi0AwQoEDwuTvKyXagcLcCFQCtiY7fnapNO3kFBOfZKGFB -CsjaKwKBgQCOCBKbrH/BteJAh5kbZx1zNrRuRFiQ5lukLcI6r1qdRilMeVhctbVV -kfZ5eay9A4vpDXRDaPkpCo+4d7g7pRjiOk9JkGG1dodSCvhTDhpzqr2fHjUxNp+D -xk6OabmetywZvkGK0LKzYlGOL2pCxUNqxCv0i8HbAxSuGUiQgUMOcwOBhQACgYEA -g+Y7wG0n5hpzny2us/Cr1v/pVbeNBNbNMtZIfzj9BB2pBr4/Pe8i3/6EczR1o0Xm -6WzwgKw8vVoteZHHSke8jJvRoDNhp7hPgm/8I48MxN0psDW6K0lVuQZmub4siuzs -j2U6RvhPS8jqQv9d04JdjXjr3OyoKAn87aJufqarSdajgYMwgYAwHQYDVR0OBBYE -FHsAGNfVltSYUq4hC+YVYwsYtA+dMB8GA1UdIwQYMBaAFBPPS6e7iS6zOFcXdsab -rWhb5e0XMAwGA1UdEwEB/wQCMAAwDgYDVR0PAQH/BAQDAgbAMCAGA1UdEQQZMBeB -FXNtaW1lZHNhMkBvcGVuc3NsLm9yZzANBgkqhkiG9w0BAQUFAAOBgQCx9BtCbaYF -FXjLClkuKXbESaDZA1biPgY25i00FsUzARuhCpqD2v+0tu5c33ZzIhL6xlvBRU5l -6Atw/xpZhae+hdBEtxPJoGekLLrHOau7Md3XwDjV4lFgcEJkWZoaSOOIK+4D5jF0 -jZWtHjnwEzuLYlo7ScHSsbcQfjH0M1TP5A== +ZXN0IFMvTUlNRSBFRSBEU0EgIzIwggNGMIICOQYHKoZIzjgEATCCAiwCggEBAJB8 +uU116E+dOsYgyHDiuTS65rqTWcIbfNzJ9eWLXsF0HaTQcE9pMDdrdkd863UDDLRS +7TBneB0+v3PQjriGclcgai7MhqdnudhEiYe1fkkwVtd7LGjU7B3ZmzegST2dBShS +wzG+ZgL+CE8vlnHWk/FwcI7DNbGgbjJkyOKZc5zX9bvO8r/j+D8LP18i0PfN1zJ1 ++Az+ErT8J5hDbXF+Gp/iaMq/2mWcJxaBOgYj7sfxUrzQwVuQ7ZApHPe8/X9OMro9 +Gb2wR4HlvXT5K8a/aPbD4ILR9cvizqfs+0GWb9vDDzEvX8DPyTB6NRwgjUNzy43D +AhLAZvBoYG+XsgembbUCIQCuh1mL6cIpl1MvwiAKNfefQO6E9GRVA+PP8HpXB4tb +0wKCAQB+KaRQ3CewYWnuYozMkqEehCQwHWonPIgeMPND8nXGN+gXqLbtp/DX9Ypu +g0Pl6x5mGWEDZM7lkkHqcbEM4T2VVDFhWaX75xCPp+geHVNUkCAaXiZa695b9HP4 +0SGkrjNV4Sx8ytuQHKk8HLLHMXVnj23nrzF0ij57yMsjwWMR1c4hSDh6EHc1jpIv +yvignj2P+wlZ8dwOhYf8sr1loEXw2l+Ul7cXjRLxEO8zyPYcL7LZDhDIqTUNcaIf +7vJAsZbOvczveQLdQGecfSEfFvshIMJPt0LD+UfWcJtUUE4zQBIjbpJKwVJdCu8P +aSvJFxNnQqTLKGGg84NalT5NAyzFA4IBBQACggEAItQlFu0t7Mw1HHROuuwKLS+E +h2WNNZP96MLQTygOVlqgaJY+1mJLzvl/51LLH6YezX0t89Z2Dm/3SOJEdNrdbIEt +tbu5rzymXxFhc8uaIYZFhST38oQwJOjM8wFitAQESe6/9HZjkexMqSqx/r5aEKTa +LBinqA1BJRI72So1/1dv8P99FavPADdj8V7fAccReKEQKnfnwA7mrnD+OlIqFKFn +3wCGk8Sw7tSJ9g6jgCI+zFwrKn2w+w+iot/Ogxl9yMAtKmAd689IAZr5GPPvV2y0 +KOogCiUYgSTSawZhr+rjyFavfI5dBWzMq4tKx/zAi6MJ+6hGJjJ8jHoT9JAPmaNg +MF4wDAYDVR0TAQH/BAIwADAOBgNVHQ8BAf8EBAMCBeAwHQYDVR0OBBYEFGaxw04k +qpufeGZC+TTBq8oMnXyrMB8GA1UdIwQYMBaAFMmRUwpjexZbi71E8HaIqSTm5bZs +MA0GCSqGSIb3DQEBBQUAA4IBAQCk2Xob1ICsdHYx/YsBzY6E1eEwcI4RZbZ3hEXp +VA72/Mbz60gjv1OwE5Ay4j+xG7IpTio6y2A9ZNepGpzidYcsL/Lx9Sv1LlN0Ukzb +uk6Czd2sZJp+PFMTTrgCd5rXKnZs/0D84Vci611vGMA1hnUnbAnBBmgLXe9pDNRV +6mhmCLLjJ4GOr5Wxt/hhknr7V2e1VMx3Q47GZhc0o/gExfhxXA8+gicM0nEYNakD +2A1F0qDhQGakjuofANHhjdUDqKJ1sxurAy80fqb0ddzJt2el89iXKN+aXx/zEX96 +GI5ON7z/bkVwIi549lUOpWb2Mved61NBzCLKVP7HSuEIsC/I -----END CERTIFICATE----- diff --git a/deps/openssl/openssl/test/smime-certs/smdsa3.pem b/deps/openssl/openssl/test/smime-certs/smdsa3.pem index eeb848dabc50b4..77acc5e46ffc30 100644 --- a/deps/openssl/openssl/test/smime-certs/smdsa3.pem +++ b/deps/openssl/openssl/test/smime-certs/smdsa3.pem @@ -1,34 +1,47 @@ ------BEGIN DSA PRIVATE KEY----- -MIIBvAIBAAKBgQDFJfsIPOIawMO5biw+AoYUhNVxReBOLQosU3Qv4B8krac0BNr3 -OjSGLh1wZxHqhlAE0QmasTaKojuk20nNWeFnczSz6vDl0IVJEhS8VYor5kt9gLqt -GcoAgsf4gRDIutJyQDaNn3IVY89uXUVIoexvQeLQDBCgQPC5O8rJdqBwtwIVAK2J -jt+dqk07eQUE59koYUEKyNorAoGBAI4IEpusf8G14kCHmRtnHXM2tG5EWJDmW6Qt -wjqvWp1GKUx5WFy1tVWR9nl5rL0Di+kNdENo+SkKj7h3uDulGOI6T0mQYbV2h1IK -+FMOGnOqvZ8eNTE2n4PGTo5puZ63LBm+QYrQsrNiUY4vakLFQ2rEK/SLwdsDFK4Z -SJCBQw5zAoGAYzOpPmh8Je1IDauEXhgaLz14wqYUHHcrj2VWVJ6fRm8GhdQFJSI7 -GUk08pgKZSKic2lNqxuzW7/vFxKQ/nvzfytY16b+2i+BR4Q6yvMzCebE1hHVg0Ju -TwfUMwoFEOhYP6ZwHSUiQl9IBMH9TNJCMwYMxfY+VOrURFsjGTRUgpwCFQCIGt5g -Y+XZd0Sv69CatDIRYWvaIA== ------END DSA PRIVATE KEY----- +-----BEGIN PRIVATE KEY----- +MIICZQIBADCCAjkGByqGSM44BAEwggIsAoIBAQCQfLlNdehPnTrGIMhw4rk0uua6 +k1nCG3zcyfXli17BdB2k0HBPaTA3a3ZHfOt1Awy0Uu0wZ3gdPr9z0I64hnJXIGou +zIanZ7nYRImHtX5JMFbXeyxo1Owd2Zs3oEk9nQUoUsMxvmYC/ghPL5Zx1pPxcHCO +wzWxoG4yZMjimXOc1/W7zvK/4/g/Cz9fItD3zdcydfgM/hK0/CeYQ21xfhqf4mjK +v9plnCcWgToGI+7H8VK80MFbkO2QKRz3vP1/TjK6PRm9sEeB5b10+SvGv2j2w+CC +0fXL4s6n7PtBlm/bww8xL1/Az8kwejUcII1Dc8uNwwISwGbwaGBvl7IHpm21AiEA +rodZi+nCKZdTL8IgCjX3n0DuhPRkVQPjz/B6VweLW9MCggEAfimkUNwnsGFp7mKM +zJKhHoQkMB1qJzyIHjDzQ/J1xjfoF6i27afw1/WKboND5eseZhlhA2TO5ZJB6nGx +DOE9lVQxYVml++cQj6foHh1TVJAgGl4mWuveW/Rz+NEhpK4zVeEsfMrbkBypPByy +xzF1Z49t568xdIo+e8jLI8FjEdXOIUg4ehB3NY6SL8r4oJ49j/sJWfHcDoWH/LK9 +ZaBF8NpflJe3F40S8RDvM8j2HC+y2Q4QyKk1DXGiH+7yQLGWzr3M73kC3UBnnH0h +Hxb7ISDCT7dCw/lH1nCbVFBOM0ASI26SSsFSXQrvD2kryRcTZ0KkyyhhoPODWpU+ +TQMsxQQjAiEArJr6p2zTbhRppQurHGTdmdYHqrDdZH4MCsD9tQCw1xY= +-----END PRIVATE KEY----- -----BEGIN CERTIFICATE----- -MIIDpDCCAw2gAwIBAgIJAMtotfHYdEsYMA0GCSqGSIb3DQEBBQUAMEQxCzAJBgNV -BAYTAlVLMRYwFAYDVQQKEw1PcGVuU1NMIEdyb3VwMR0wGwYDVQQDExRUZXN0IFMv -TUlNRSBSU0EgUm9vdDAeFw0wODAyMjIxMzUzMDlaFw0xNjA1MTAxMzUzMDlaMEUx +MIIFkDCCBHigAwIBAgIJANk5lu6mSyBFMA0GCSqGSIb3DQEBBQUAMEQxCzAJBgNV +BAYTAlVLMRYwFAYDVQQKDA1PcGVuU1NMIEdyb3VwMR0wGwYDVQQDDBRUZXN0IFMv +TUlNRSBSU0EgUm9vdDAeFw0xMzA3MTcxNzI4MzFaFw0yMzA1MjYxNzI4MzFaMEUx CzAJBgNVBAYTAlVLMRYwFAYDVQQKDA1PcGVuU1NMIEdyb3VwMR4wHAYDVQQDDBVU -ZXN0IFMvTUlNRSBFRSBEU0EgIzMwggG3MIIBLAYHKoZIzjgEATCCAR8CgYEAxSX7 -CDziGsDDuW4sPgKGFITVcUXgTi0KLFN0L+AfJK2nNATa9zo0hi4dcGcR6oZQBNEJ -mrE2iqI7pNtJzVnhZ3M0s+rw5dCFSRIUvFWKK+ZLfYC6rRnKAILH+IEQyLrSckA2 -jZ9yFWPPbl1FSKHsb0Hi0AwQoEDwuTvKyXagcLcCFQCtiY7fnapNO3kFBOfZKGFB -CsjaKwKBgQCOCBKbrH/BteJAh5kbZx1zNrRuRFiQ5lukLcI6r1qdRilMeVhctbVV -kfZ5eay9A4vpDXRDaPkpCo+4d7g7pRjiOk9JkGG1dodSCvhTDhpzqr2fHjUxNp+D -xk6OabmetywZvkGK0LKzYlGOL2pCxUNqxCv0i8HbAxSuGUiQgUMOcwOBhAACgYBj -M6k+aHwl7UgNq4ReGBovPXjCphQcdyuPZVZUnp9GbwaF1AUlIjsZSTTymAplIqJz -aU2rG7Nbv+8XEpD+e/N/K1jXpv7aL4FHhDrK8zMJ5sTWEdWDQm5PB9QzCgUQ6Fg/ -pnAdJSJCX0gEwf1M0kIzBgzF9j5U6tREWyMZNFSCnKOBgzCBgDAdBgNVHQ4EFgQU -VhpVXqQ/EzUMdxLvP7o9EhJ8h70wHwYDVR0jBBgwFoAUE89Lp7uJLrM4Vxd2xput -aFvl7RcwDAYDVR0TAQH/BAIwADAOBgNVHQ8BAf8EBAMCBsAwIAYDVR0RBBkwF4EV -c21pbWVkc2EzQG9wZW5zc2wub3JnMA0GCSqGSIb3DQEBBQUAA4GBACM9e75EQa8m -k/AZkH/tROqf3yeqijULl9x8FjFatqoY+29OM6oMGM425IqSkKd2ipz7OxO0SShu -rE0O3edS7DvYBwvhWPviRaYBMyZ4iFJVup+fOzoYK/j/bASxS3BHQBwb2r4rhe25 -OlTyyFEk7DJyW18YFOG97S1P52oQ5f5x +ZXN0IFMvTUlNRSBFRSBEU0EgIzMwggNGMIICOQYHKoZIzjgEATCCAiwCggEBAJB8 +uU116E+dOsYgyHDiuTS65rqTWcIbfNzJ9eWLXsF0HaTQcE9pMDdrdkd863UDDLRS +7TBneB0+v3PQjriGclcgai7MhqdnudhEiYe1fkkwVtd7LGjU7B3ZmzegST2dBShS +wzG+ZgL+CE8vlnHWk/FwcI7DNbGgbjJkyOKZc5zX9bvO8r/j+D8LP18i0PfN1zJ1 ++Az+ErT8J5hDbXF+Gp/iaMq/2mWcJxaBOgYj7sfxUrzQwVuQ7ZApHPe8/X9OMro9 +Gb2wR4HlvXT5K8a/aPbD4ILR9cvizqfs+0GWb9vDDzEvX8DPyTB6NRwgjUNzy43D +AhLAZvBoYG+XsgembbUCIQCuh1mL6cIpl1MvwiAKNfefQO6E9GRVA+PP8HpXB4tb +0wKCAQB+KaRQ3CewYWnuYozMkqEehCQwHWonPIgeMPND8nXGN+gXqLbtp/DX9Ypu +g0Pl6x5mGWEDZM7lkkHqcbEM4T2VVDFhWaX75xCPp+geHVNUkCAaXiZa695b9HP4 +0SGkrjNV4Sx8ytuQHKk8HLLHMXVnj23nrzF0ij57yMsjwWMR1c4hSDh6EHc1jpIv +yvignj2P+wlZ8dwOhYf8sr1loEXw2l+Ul7cXjRLxEO8zyPYcL7LZDhDIqTUNcaIf +7vJAsZbOvczveQLdQGecfSEfFvshIMJPt0LD+UfWcJtUUE4zQBIjbpJKwVJdCu8P +aSvJFxNnQqTLKGGg84NalT5NAyzFA4IBBQACggEAcXvtfiJfIZ0wgGpN72ZeGrJ9 +msUXOxow7w3fDbP8r8nfVkBNbfha8rx0eY6fURFVZzIOd8EHGKypcH1gS6eZNucf +zgsH1g5r5cRahMZmgGXBEBsWrh2IaDG7VSKt+9ghz27EKgjAQCzyHQL5FCJgR2p7 +cv0V4SRqgiAGYlJ191k2WtLOsVd8kX//jj1l8TUgE7TqpuSEpaSyQ4nzJROpZWZp +N1RwFmCURReykABU/Nzin/+rZnvZrp8WoXSXEqxeB4mShRSaH57xFnJCpRwKJ4qS +2uhATzJaKH7vu63k3DjftbSBVh+32YXwtHc+BGjs8S2aDtCW3FtDA7Z6J8BIxaNg +MF4wDAYDVR0TAQH/BAIwADAOBgNVHQ8BAf8EBAMCBeAwHQYDVR0OBBYEFMJxatDE +FCEFGl4uoiQQ1050Ju9RMB8GA1UdIwQYMBaAFMmRUwpjexZbi71E8HaIqSTm5bZs +MA0GCSqGSIb3DQEBBQUAA4IBAQBGZD1JnMep39KMOhD0iBTmyjhtcnRemckvRask +pS/CqPwo+M+lPNdxpLU2w9b0QhPnj0yAS/BS1yBjsLGY4DP156k4Q3QOhwsrTmrK +YOxg0w7DOpkv5g11YLJpHsjSOwg5uIMoefL8mjQK6XOFOmQXHJrUtGulu+fs6FlM +khGJcW4xYVPK0x/mHvTT8tQaTTkgTdVHObHF5Dyx/F9NMpB3RFguQPk2kT4lJc4i +Up8T9mLzaxz6xc4wwh8h70Zw81lkGYhX+LRk3sfd/REq9x4QXQNP9t9qU1CgrBzv +4orzt9cda4r+rleSg2XjWnXzMydE6DuwPVPZlqnLbSYUy660 -----END CERTIFICATE----- diff --git a/deps/openssl/openssl/test/smime-certs/smec1.pem b/deps/openssl/openssl/test/smime-certs/smec1.pem new file mode 100644 index 00000000000000..75a862666b2544 --- /dev/null +++ b/deps/openssl/openssl/test/smime-certs/smec1.pem @@ -0,0 +1,22 @@ +-----BEGIN PRIVATE KEY----- +MIGHAgEAMBMGByqGSM49AgEGCCqGSM49AwEHBG0wawIBAQQgXzBRX9Z5Ib4LAVAS +DMlYvkj0SmLmYvWULe2LfyXRmpWhRANCAAS+SIj2FY2DouPRuNDp9WVpsqef58tV +3gIwV0EOV/xyYTzZhufZi/aBcXugWR1x758x4nHus2uEuEFi3Mr3K3+x +-----END PRIVATE KEY----- +-----BEGIN CERTIFICATE----- +MIICoDCCAYigAwIBAgIJANk5lu6mSyBGMA0GCSqGSIb3DQEBBQUAMEQxCzAJBgNV +BAYTAlVLMRYwFAYDVQQKDA1PcGVuU1NMIEdyb3VwMR0wGwYDVQQDDBRUZXN0IFMv +TUlNRSBSU0EgUm9vdDAeFw0xMzA3MTcxNzI4MzFaFw0yMzA1MjYxNzI4MzFaMEQx +CzAJBgNVBAYTAlVLMRYwFAYDVQQKDA1PcGVuU1NMIEdyb3VwMR0wGwYDVQQDDBRU +ZXN0IFMvTUlNRSBFRSBFQyAjMTBZMBMGByqGSM49AgEGCCqGSM49AwEHA0IABL5I +iPYVjYOi49G40On1ZWmyp5/ny1XeAjBXQQ5X/HJhPNmG59mL9oFxe6BZHXHvnzHi +ce6za4S4QWLcyvcrf7GjYDBeMAwGA1UdEwEB/wQCMAAwDgYDVR0PAQH/BAQDAgXg +MB0GA1UdDgQWBBR/ybxC2DI+Jydhx1FMgPbMTmLzRzAfBgNVHSMEGDAWgBTJkVMK +Y3sWW4u9RPB2iKkk5uW2bDANBgkqhkiG9w0BAQUFAAOCAQEAdk9si83JjtgHHHGy +WcgWDfM0jzlWBsgFNQ9DwAuB7gJd/LG+5Ocajg5XdA5FXAdKkfwI6be3PdcVs3Bt +7f/fdKfBxfr9/SvFHnK7PVAX2x1wwS4HglX1lfoyq1boSvsiJOnAX3jsqXJ9TJiV +FlgRVnhnrw6zz3Xs/9ZDMTENUrqDHPNsDkKEi+9SqIsqDXpMCrGHP4ic+S8Rov1y +S+0XioMxVyXDp6XcL4PQ/NgHbw5/+UcS0me0atZ6pW68C0vi6xeU5vxojyuZxMI1 +DXXwMhOXWaKff7KNhXDUN0g58iWlnyaCz4XQwFsbbFs88TQ1+e/aj3bbwTxUeyN7 +qtcHJA== +-----END CERTIFICATE----- diff --git a/deps/openssl/openssl/test/smime-certs/smec2.pem b/deps/openssl/openssl/test/smime-certs/smec2.pem new file mode 100644 index 00000000000000..457297a760f153 --- /dev/null +++ b/deps/openssl/openssl/test/smime-certs/smec2.pem @@ -0,0 +1,23 @@ +-----BEGIN PRIVATE KEY----- +MIGPAgEAMBAGByqGSM49AgEGBSuBBAAQBHgwdgIBAQQjhHaq507MOBznelrLG/pl +brnnJi/iEJUUp+Pm3PEiteXqckmhTANKAAQF2zs6vobmoT+M+P2+9LZ7asvFBNi7 +uCzLYF/8j1Scn/spczoC9vNzVhNw+Lg7dnjNL4EDIyYZLl7E0v69luzbvy+q44/8 +6bQ= +-----END PRIVATE KEY----- +-----BEGIN CERTIFICATE----- +MIICpTCCAY2gAwIBAgIJANk5lu6mSyBHMA0GCSqGSIb3DQEBBQUAMEQxCzAJBgNV +BAYTAlVLMRYwFAYDVQQKDA1PcGVuU1NMIEdyb3VwMR0wGwYDVQQDDBRUZXN0IFMv +TUlNRSBSU0EgUm9vdDAeFw0xMzA3MTcxNzI4MzFaFw0yMzA1MjYxNzI4MzFaMEQx +CzAJBgNVBAYTAlVLMRYwFAYDVQQKDA1PcGVuU1NMIEdyb3VwMR0wGwYDVQQDDBRU +ZXN0IFMvTUlNRSBFRSBFQyAjMjBeMBAGByqGSM49AgEGBSuBBAAQA0oABAXbOzq+ +huahP4z4/b70tntqy8UE2Lu4LMtgX/yPVJyf+ylzOgL283NWE3D4uDt2eM0vgQMj +JhkuXsTS/r2W7Nu/L6rjj/zptKNgMF4wDAYDVR0TAQH/BAIwADAOBgNVHQ8BAf8E +BAMCBeAwHQYDVR0OBBYEFGf+QSQlkN20PsNN7x+jmQIJBDcXMB8GA1UdIwQYMBaA +FMmRUwpjexZbi71E8HaIqSTm5bZsMA0GCSqGSIb3DQEBBQUAA4IBAQBaBBryl2Ez +ftBrGENXMKQP3bBEw4n9ely6HvYQi9IC7HyK0ktz7B2FcJ4z96q38JN3cLxV0DhK +xT/72pFmQwZVJngvRaol0k1B+bdmM03llxCw/uNNZejixDjHUI9gEfbigehd7QY0 +uYDu4k4O35/z/XPQ6O5Kzw+J2vdzU8GXlMBbWeZWAmEfLGbk3Ux0ouITnSz0ty5P +rkHTo0uprlFcZAsrsNY5v5iuomYT7ZXAR3sqGZL1zPOKBnyfXeNFUfnKsZW7Fnlq +IlYBQIjqR1HGxxgCSy66f1oplhxSch4PUpk5tqrs6LeOqc2+xROy1T5YrB3yjVs0 +4ZdCllHZkhop +-----END CERTIFICATE----- diff --git a/deps/openssl/openssl/test/smime-certs/smroot.pem b/deps/openssl/openssl/test/smime-certs/smroot.pem index a59eb2684ca455..d1a253f40958b2 100644 --- a/deps/openssl/openssl/test/smime-certs/smroot.pem +++ b/deps/openssl/openssl/test/smime-certs/smroot.pem @@ -1,30 +1,49 @@ ------BEGIN RSA PRIVATE KEY----- -MIICXAIBAAKBgQDBV1Z/Q5gPF7lojc8pKUdyz5+Jf2B3vs4he6egekugWnoJduki -9Lnae/JchB/soIX0co3nLc11NuFFlnAWJNMDJr08l5AHAJLYNHevF5l/f9oDQwvZ -speKh1xpIAJNqCTzVeQ/ZLx6/GccIXV/xDuKIiovqJTPgR5WPkYKaw++lQIDAQAB -AoGALXnUj5SflJU4+B2652ydMKUjWl0KnL/VjkyejgGV/j6py8Ybaixz9q8Gv7oY -JDlRqMC1HfZJCFQDQrHy5VJ+CywA/H9WrqKo/Ch9U4tJAZtkig1Cmay/BAYixVu0 -xBeim10aKF6hxHH4Chg9We+OCuzWBWJhqveNjuDedL/i7JUCQQDlejovcwBUCbhJ -U12qKOwlaboolWbl7yF3XdckTJZg7+1UqQHZH5jYZlLZyZxiaC92SNV0SyTLJZnS -Jh5CO+VDAkEA16/pPcuVtMMz/R6SSPpRSIAa1stLs0mFSs3NpR4pdm0n42mu05pO -1tJEt3a1g7zkreQBf53+Dwb+lA841EkjRwJBAIFmt0DifKDnCkBu/jZh9SfzwsH3 -3Zpzik+hXxxdA7+ODCrdUul449vDd5zQD5t+XKU61QNLDGhxv5e9XvrCg7kCQH/a -3ldsVF0oDaxxL+QkxoREtCQ5tLEd1u7F2q6Tl56FDE0pe6Ih6bQ8RtG+g9EI60IN -U7oTrOO5kLWx5E0q4ccCQAZVgoenn9MhRU1agKOCuM6LT2DxReTu4XztJzynej+8 -0J93n3ebanB1MlRpn1XJwhQ7gAC8ImaQKLJK5jdJzFc= ------END RSA PRIVATE KEY----- +-----BEGIN PRIVATE KEY----- +MIIEvQIBADANBgkqhkiG9w0BAQEFAASCBKcwggSjAgEAAoIBAQCyyQXED5HyVWwq +nXyzmY317yMUJrIfsKvREG2C691dJNHgNg+oq5sjt/fzkyS84AvdOiicAsao4cYL +DulthaLpbC7msEBhvwAil0FNb5g3ERupe1KuTdUV1UuD/i6S2VoaNXUBBn1rD9Wc +BBc0lnx/4Wt92eQTI6925pt7ZHPQw2Olp7TQDElyi5qPxCem4uT0g3zbZsWqmmsI +MXbu+K3dEprzqA1ucKXbxUmZNkMwVs2XCmlLxrRUj8C3/zENtH17HWCznhR/IVcV +kgIuklkeiDsEhbWvUQumVXR7oPh/CPZAbjGqq5mVueHSHrp7brBVZKHZvoUka28Q +LWitq1W5AgMBAAECggEASkRnOMKfBeOmQy2Yl6K57eeg0sYgSDnDpd0FINWJ5x9c +b58FcjOXBodtYKlHIY6QXx3BsM0WaSEge4d+QBi7S+u8r+eXVwNYswXSArDQsk9R +Bl5MQkvisGciL3pvLmFLpIeASyS/BLJXMbAhU58PqK+jT2wr6idwxBuXivJ3ichu +ISdT1s2aMmnD86ulCD2DruZ4g0mmk5ffV+Cdj+WWkyvEaJW2GRYov2qdaqwSOxV4 +Yve9qStvEIWAf2cISQjbnw2Ww6Z5ebrqlOz9etkmwIly6DTbrIneBnoqJlFFWGlF +ghuzc5RE2w1GbcKSOt0qXH44MTf/j0r86dlu7UIxgQKBgQDq0pEaiZuXHi9OQAOp +PsDEIznCU1bcTDJewANHag5DPEnMKLltTNyLaBRulMypI+CrDbou0nDr29VOzfXx +mNvi/c7RttOBOx7kXKvu0JUFKe2oIWRsg0KsyMX7UFMVaHFgrW+8DhQc7HK7URiw +nitOnA7YwIHRF9BMmcWcLFEYBQKBgQDC6LPbXV8COKO0YCfGXPnE7EZGD/p0Q92Z +8CoSefphEScSdO1IpxFXG7fOZ4x2GQb9q7D3IvaeKAqNjUjkuyxdB30lIWDBwSWw +fFgsa2SZwD5P60G/ar50YJr6LiF333aUMDVmC9swFfZERAEmGUz2NTrPWQdIx/lu +PyDtUR75JQKBgHaoCCJ8vl5SJl1IA5GV4Bo8IoeLTSzsY9d09zMy6BoZcMD1Ix2T +5S2cXhayoegl9PT6bsYSGHVWFCdJ86ktMI826TcXRzDaCvYhzc9THroJQcnfdbtP +aHWezkv7fsAmkoPjn75K7ubeo+r7Q5qbkg6a1PW58N8TRXIvkackzaVxAoGBALAq +qh3U+AHG9dgbrPeyo6KkuCOtX39ks8/mbfCDRZYkbb9V5f5r2tVz3R93IlK/7jyr +yWimtmde46Lrl33922w+T5OW5qBZllo9GWkUrDn3s5qClcuQjJIdmxYTSfbSCJiK +NkmE39lHkG5FVRB9f71tgTlWS6ox7TYDYxx83NTtAoGAUJPAkGt4yGAN4Pdebv53 +bSEpAAULBHntiqDEOu3lVColHuZIucml/gbTpQDruE4ww4wE7dOhY8Q4wEBVYbRI +vHkSiWpJUvZCuKG8Foh5pm9hU0qb+rbQV7NhLJ02qn1AMGO3F/WKrHPPY8/b9YhQ +KfvPCYimQwBjVrEnSntLPR0= +-----END PRIVATE KEY----- -----BEGIN CERTIFICATE----- -MIICaTCCAdKgAwIBAgIJAP6VN47boiXRMA0GCSqGSIb3DQEBBQUAMEQxCzAJBgNV -BAYTAlVLMRYwFAYDVQQKEw1PcGVuU1NMIEdyb3VwMR0wGwYDVQQDExRUZXN0IFMv -TUlNRSBSU0EgUm9vdDAeFw0wODAyMjIxMzUzMDdaFw0xNjA1MTExMzUzMDdaMEQx -CzAJBgNVBAYTAlVLMRYwFAYDVQQKEw1PcGVuU1NMIEdyb3VwMR0wGwYDVQQDExRU -ZXN0IFMvTUlNRSBSU0EgUm9vdDCBnzANBgkqhkiG9w0BAQEFAAOBjQAwgYkCgYEA -wVdWf0OYDxe5aI3PKSlHcs+fiX9gd77OIXunoHpLoFp6CXbpIvS52nvyXIQf7KCF -9HKN5y3NdTbhRZZwFiTTAya9PJeQBwCS2DR3rxeZf3/aA0ML2bKXiodcaSACTagk -81XkP2S8evxnHCF1f8Q7iiIqL6iUz4EeVj5GCmsPvpUCAwEAAaNjMGEwHQYDVR0O -BBYEFBPPS6e7iS6zOFcXdsabrWhb5e0XMB8GA1UdIwQYMBaAFBPPS6e7iS6zOFcX -dsabrWhb5e0XMA8GA1UdEwEB/wQFMAMBAf8wDgYDVR0PAQH/BAQDAgEGMA0GCSqG -SIb3DQEBBQUAA4GBAIECprq5viDvnDbkyOaiSr9ubMUmWqvycfAJMdPZRKcOZczS -l+L9R9lF3JSqbt3knOe9u6bGDBOTY2285PdCCuHRVMk2Af1f6El1fqAlRUwNqipp -r68sWFuRqrcRNtk6QQvXfkOhrqQBuDa7te/OVQLa2lGN9Dr2mQsD8ijctatG +MIIDbjCCAlagAwIBAgIJAMc+8VKBJ/S9MA0GCSqGSIb3DQEBBQUAMEQxCzAJBgNV +BAYTAlVLMRYwFAYDVQQKDA1PcGVuU1NMIEdyb3VwMR0wGwYDVQQDDBRUZXN0IFMv +TUlNRSBSU0EgUm9vdDAeFw0xMzA3MTcxNzI4MjlaFw0yMzA3MTUxNzI4MjlaMEQx +CzAJBgNVBAYTAlVLMRYwFAYDVQQKDA1PcGVuU1NMIEdyb3VwMR0wGwYDVQQDDBRU +ZXN0IFMvTUlNRSBSU0EgUm9vdDCCASIwDQYJKoZIhvcNAQEBBQADggEPADCCAQoC +ggEBALLJBcQPkfJVbCqdfLOZjfXvIxQmsh+wq9EQbYLr3V0k0eA2D6irmyO39/OT +JLzgC906KJwCxqjhxgsO6W2FoulsLuawQGG/ACKXQU1vmDcRG6l7Uq5N1RXVS4P+ +LpLZWho1dQEGfWsP1ZwEFzSWfH/ha33Z5BMjr3bmm3tkc9DDY6WntNAMSXKLmo/E +J6bi5PSDfNtmxaqaawgxdu74rd0SmvOoDW5wpdvFSZk2QzBWzZcKaUvGtFSPwLf/ +MQ20fXsdYLOeFH8hVxWSAi6SWR6IOwSFta9RC6ZVdHug+H8I9kBuMaqrmZW54dIe +untusFVkodm+hSRrbxAtaK2rVbkCAwEAAaNjMGEwHQYDVR0OBBYEFMmRUwpjexZb +i71E8HaIqSTm5bZsMB8GA1UdIwQYMBaAFMmRUwpjexZbi71E8HaIqSTm5bZsMA8G +A1UdEwEB/wQFMAMBAf8wDgYDVR0PAQH/BAQDAgEGMA0GCSqGSIb3DQEBBQUAA4IB +AQAwpIVWQey2u/XoQSMSu0jd0EZvU+lhLaFrDy/AHQeG3yX1+SAOM6f6w+efPvyb +Op1NPI9UkMPb4PCg9YC7jgYokBkvAcI7J4FcuDKMVhyCD3cljp0ouuKruvEf4FBl +zyQ9pLqA97TuG8g1hLTl8G90NzTRcmKpmhs18BmCxiqHcTfoIpb3QvPkDX8R7LVt +9BUGgPY+8ELCgw868TuHh/Cnc67gBtRjBp0sCYVzGZmKsO5f1XdHrAZKYN5mEp0C +7/OqcDoFqORTquLeycg1At/9GqhDEgxNrqA+YEsPbLGAfsNuXUsXs2ubpGsOZxKt +Emsny2ah6fU2z7PztrUy/A80 -----END CERTIFICATE----- diff --git a/deps/openssl/openssl/test/smime-certs/smrsa1.pem b/deps/openssl/openssl/test/smime-certs/smrsa1.pem index 2cf3148e334bc3..d0d0b9e66b01c1 100644 --- a/deps/openssl/openssl/test/smime-certs/smrsa1.pem +++ b/deps/openssl/openssl/test/smime-certs/smrsa1.pem @@ -1,31 +1,49 @@ ------BEGIN RSA PRIVATE KEY----- -MIICXgIBAAKBgQC6A978j4pmPgUtUQqF+bjh6vdhwGOGZSD7xXgFTMjm88twfv+E -ixkq2KXSDjD0ZXoQbdOaSbvGRQrIJpG2NGiKAFdYNrP025kCCdh5wF/aEI7KLEm7 -JlHwXpQsuj4wkMgmkFjL3Ty4Z55aNH+2pPQIa0k+ENJXm2gDuhqgBmduAwIDAQAB -AoGBAJMuYu51aO2THyeHGwt81uOytcCbqGP7eoib62ZOJhxPRGYjpmuqX+R9/V5i -KiwGavm63JYUx0WO9YP+uIZxm1BUATzkgkS74u5LP6ajhkZh6/Bck1oIYYkbVOXl -JVrdENuH6U7nupznsyYgONByo+ykFPVUGmutgiaC7NMVo/MxAkEA6KLejWXdCIEn -xr7hGph9NlvY9xuRIMexRV/WrddcFfCdjI1PciIupgrIkR65M9yr7atm1iU6/aRf -KOr8rLZsSQJBAMyyXN71NsDNx4BP6rtJ/LJMP0BylznWkA7zWfGCbAYn9VhZVlSY -Eu9Gyr7quD1ix7G3kInKVYOEEOpockBLz+sCQQCedyMmKjcQLfpMVYW8uhbAynvW -h36qV5yXZxszO7nMcCTBsxhk5IfmLv5EbCs3+p9avCDGyoGOeUMg+kC33WORAkAg -oUIarH4o5+SoeJTTfCzTA0KF9H5U0vYt2+73h7HOnWoHxl3zqDZEfEVvf50U8/0f -QELDJETTbScBJtsnkq43AkEA38etvoZ2i4FJvvo7R/9gWBHVEcrGzcsCBYrNnIR1 -SZLRwHEGaiOK1wxMsWzqp7PJwL9z/M8A8DyOFBx3GPOniA== ------END RSA PRIVATE KEY----- +-----BEGIN PRIVATE KEY----- +MIIEvAIBADANBgkqhkiG9w0BAQEFAASCBKYwggSiAgEAAoIBAQDXr9uzB/20QXKC +xhkfNnJvl2xl1hzdOcrQmAqo+AAAcA/D49ImuJDVQRaK2bcj54XB26i1kXuOrxID +3/etUb8yudfx8OAVwh8G0xVA4zhr8uXW85W2tBr4v0Lt+W6lSd6Hmfrk4GmE9LTU +/vzl9HUPW6SZShN1G0nY6oeUXvLi0vasEUKv3a51T6JFYg4c7qt5RCk/w8kwrQ0D +orQwCdkOPEIiC4b+nPStF12SVm5bx8rbYzioxuY/PdSebvt0APeqgRxSpCxqYnHs +CoNeHzSrGXcP0COzFeUOz2tdrhmH09JLbGZs4nbojPxMkjpJSv3/ekDG2CHYxXSH +XxpJstxZAgMBAAECggEASY4xsJaTEPwY3zxLqPdag2/yibBBW7ivz/9p80HQTlXp +KnbxXj8nNXLjCytAZ8A3P2t316PrrTdLP4ML5lGwkM4MNPhek00GY79syhozTa0i +cPHVJt+5Kwee/aVI9JmCiGAczh0yHyOM3+6ttIZvvXMVaSl4BUHvJ0ikQBc5YdzL +s6VM2gCOR6K6n+39QHDI/T7WwO9FFSNnpWFOCHwAWtyBMlleVj+xeZX8OZ/aT+35 +27yjsGNBftWKku29VDineiQC+o+fZGJs6w4JZHoBSP8TfxP8fRCFVNA281G78Xak +cEnKXwZ54bpoSa3ThKl+56J6NHkkfRGb8Rgt/ipJYQKBgQD5DKb82mLw85iReqsT +8bkp408nPOBGz7KYnQsZqAVNGfehM02+dcN5z+w0jOj6GMPLPg5whlEo/O+rt9ze +j6c2+8/+B4Bt5oqCKoOCIndH68jl65+oUxFkcHYxa3zYKGC9Uvb+x2BtBmYgvDRG +ew6I2Q3Zyd2ThZhJygUZpsjsbQKBgQDdtNiGTkgWOm+WuqBI1LT5cQfoPfgI7/da +ZA+37NBUQRe0cM7ddEcNqx7E3uUa1JJOoOYv65VyGI33Ul+evI8h5WE5bupcCEFk +LolzbMc4YQUlsySY9eUXM8jQtfVtaWhuQaABt97l+9oADkrhA+YNdEu2yiz3T6W+ +msI5AnvkHQKBgDEjuPMdF/aY6dqSjJzjzfgg3KZOUaZHJuML4XvPdjRPUlfhKo7Q +55/qUZ3Qy8tFBaTderXjGrJurc+A+LiFOaYUq2ZhDosguOWUA9yydjyfnkUXZ6or +sbvSoM+BeOGhnezdKNT+e90nLRF6cQoTD7war6vwM6L+8hxlGvqDuRNFAoGAD4K8 +d0D4yB1Uez4ZQp8m/iCLRhM3zCBFtNw1QU/fD1Xye5w8zL96zRkAsRNLAgKHLdsR +355iuTXAkOIBcJCOjveGQsdgvAmT0Zdz5FBi663V91o+IDlryqDD1t40CnCKbtRG +hng/ruVczg4x7OYh7SUKuwIP/UlkNh6LogNreX0CgYBQF9troLex6X94VTi1V5hu +iCwzDT6AJj63cS3VRO2ait3ZiLdpKdSNNW2WrlZs8FZr/mVutGEcWho8BugGMWST +1iZkYwly9Xfjnpd0I00ZIlr2/B3+ZsK8w5cOW5Lpb7frol6+BkDnBjbNZI5kQndn +zQpuMJliRlrq/5JkIbH6SA== +-----END PRIVATE KEY----- -----BEGIN CERTIFICATE----- -MIICizCCAfSgAwIBAgIJAMtotfHYdEsTMA0GCSqGSIb3DQEBBQUAMEQxCzAJBgNV -BAYTAlVLMRYwFAYDVQQKEw1PcGVuU1NMIEdyb3VwMR0wGwYDVQQDExRUZXN0IFMv -TUlNRSBSU0EgUm9vdDAeFw0wODAyMjIxMzUzMDhaFw0xNjA1MTAxMzUzMDhaMEUx +MIIDbDCCAlSgAwIBAgIJANk5lu6mSyBAMA0GCSqGSIb3DQEBBQUAMEQxCzAJBgNV +BAYTAlVLMRYwFAYDVQQKDA1PcGVuU1NMIEdyb3VwMR0wGwYDVQQDDBRUZXN0IFMv +TUlNRSBSU0EgUm9vdDAeFw0xMzA3MTcxNzI4MzBaFw0yMzA1MjYxNzI4MzBaMEUx CzAJBgNVBAYTAlVLMRYwFAYDVQQKDA1PcGVuU1NMIEdyb3VwMR4wHAYDVQQDDBVU -ZXN0IFMvTUlNRSBFRSBSU0EgIzEwgZ8wDQYJKoZIhvcNAQEBBQADgY0AMIGJAoGB -ALoD3vyPimY+BS1RCoX5uOHq92HAY4ZlIPvFeAVMyObzy3B+/4SLGSrYpdIOMPRl -ehBt05pJu8ZFCsgmkbY0aIoAV1g2s/TbmQIJ2HnAX9oQjsosSbsmUfBelCy6PjCQ -yCaQWMvdPLhnnlo0f7ak9AhrST4Q0lebaAO6GqAGZ24DAgMBAAGjgYMwgYAwHQYD -VR0OBBYEFE2vMvKz5jrC7Lbdg68XwZ95iL/QMB8GA1UdIwQYMBaAFBPPS6e7iS6z -OFcXdsabrWhb5e0XMAwGA1UdEwEB/wQCMAAwDgYDVR0PAQH/BAQDAgXgMCAGA1Ud -EQQZMBeBFXNtaW1lcnNhMUBvcGVuc3NsLm9yZzANBgkqhkiG9w0BAQUFAAOBgQAi -O3GOkUl646oLnOimc36i9wxZ1tejsqs8vMjJ0Pym6Uq9FE2JoGzJ6OhB1GOsEVmj -9cQ5UNQcRYL3cqOFtl6f4Dpu/lhzfbaqgmLjv29G1mS0uuTZrixhlyCXjwcbOkNC -I/+wvHHENYIK5+T/79M9LaZ2Qk4F9MNE1VMljdz9Qw== +ZXN0IFMvTUlNRSBFRSBSU0EgIzEwggEiMA0GCSqGSIb3DQEBAQUAA4IBDwAwggEK +AoIBAQDXr9uzB/20QXKCxhkfNnJvl2xl1hzdOcrQmAqo+AAAcA/D49ImuJDVQRaK +2bcj54XB26i1kXuOrxID3/etUb8yudfx8OAVwh8G0xVA4zhr8uXW85W2tBr4v0Lt ++W6lSd6Hmfrk4GmE9LTU/vzl9HUPW6SZShN1G0nY6oeUXvLi0vasEUKv3a51T6JF +Yg4c7qt5RCk/w8kwrQ0DorQwCdkOPEIiC4b+nPStF12SVm5bx8rbYzioxuY/PdSe +bvt0APeqgRxSpCxqYnHsCoNeHzSrGXcP0COzFeUOz2tdrhmH09JLbGZs4nbojPxM +kjpJSv3/ekDG2CHYxXSHXxpJstxZAgMBAAGjYDBeMAwGA1UdEwEB/wQCMAAwDgYD +VR0PAQH/BAQDAgXgMB0GA1UdDgQWBBTmjc+lrTQuYx/VBOBGjMvufajvhDAfBgNV +HSMEGDAWgBTJkVMKY3sWW4u9RPB2iKkk5uW2bDANBgkqhkiG9w0BAQUFAAOCAQEA +dr2IRXcFtlF16kKWs1VTaFIHHNQrfSVHBkhKblPX3f/0s/i3eXgwKUu7Hnb6T3/o +E8L+e4ioQNhahTLt9ruJNHWA/QDwOfkqM3tshCs2xOD1Cpy7Bd3Dn0YBrHKyNXRK +WelGp+HetSXJGW4IZJP7iES7Um0DGktLabhZbe25EnthRDBjNnaAmcofHECWESZp +lEHczGZfS9tRbzOCofxvgLbF64H7wYSyjAe6R8aain0VRbIusiD4tCHX/lOMh9xT +GNBW8zTL+tV9H1unjPMORLnT0YQ3oAyEND0jCu0ACA1qGl+rzxhF6bQcTUNEbRMu +9Hjq6s316fk4Ne0EUF3PbA== -----END CERTIFICATE----- diff --git a/deps/openssl/openssl/test/smime-certs/smrsa2.pem b/deps/openssl/openssl/test/smime-certs/smrsa2.pem index d41f69c82f67fc..2f17cb2978f49c 100644 --- a/deps/openssl/openssl/test/smime-certs/smrsa2.pem +++ b/deps/openssl/openssl/test/smime-certs/smrsa2.pem @@ -1,31 +1,49 @@ ------BEGIN RSA PRIVATE KEY----- -MIICWwIBAAKBgQCwBfryW4Vu5U9wNIDKspJO/N9YF4CcTlrCUyzVlKgb+8urHlSe -59i5verR9IOCCXkemjOzZ/3nALTGqYZlnEvHp0Rjk+KdKXnKBIB+SRPpeu3LcXMT -WPgsThPa0UQxedNKG0g6aG+kLhsDlFBCoxd09jJtSpb9jmroJOq0ZYEHLwIDAQAB -AoGAKa/w4677Je1W5+r3SYoLDnvi5TkDs4D3C6ipKJgBTEdQz+DqB4w/DpZE4551 -+rkFn1LDxcxuHGRVa+tAMhZW97fwq9YUbjVZEyOz79qrX+BMyl/NbHkf1lIKDo3q -dWalzQvop7nbzeLC+VmmviwZfLQUbA61AQl3jm4dswT4XykCQQDloDadEv/28NTx -bvvywvyGuvJkCkEIycm4JrIInvwsd76h/chZ3oymrqzc7hkEtK6kThqlS5y+WXl6 -QzPruTKTAkEAxD2ro/VUoN+scIVaLmn0RBmZ67+9Pdn6pNSfjlK3s0T0EM6/iUWS -M06l6L9wFS3/ceu1tIifsh9BeqOGTa+udQJARIFnybTBaIqw/NZ/lA1YCVn8tpvY -iyaoZ6gjtS65TQrsdKeh/i3HCHNUXxUpoZ3F/H7QtD+6o49ODou+EbVOwQJAVmex -A2gp8wuJKaINqxIL81AybZLnCCzKJ3lXJ5tUNyLNM/lUbGStktm2Q1zHRQwTxV07 -jFn7trn8YrtNjzcjYQJAUKIJRt38A8Jw3HoPT+D0WS2IgxjVL0eYGsZX1lyeammG -6rfnQ3u5uP7mEK2EH2o8mDUpAE0gclWBU9UkKxJsGA== ------END RSA PRIVATE KEY----- +-----BEGIN PRIVATE KEY----- +MIIEvgIBADANBgkqhkiG9w0BAQEFAASCBKgwggSkAgEAAoIBAQDcYC4tS2Uvn1Z2 +iDgtfkJA5tAqgbN6X4yK02RtVH5xekV9+6+eTt/9S+iFAzAnwqR/UB1R67ETrsWq +V8u9xLg5fHIwIkmu9/6P31UU9cghO7J1lcrhHvooHaFpcXepPWQacpuBq2VvcKRD +lDfVmdM5z6eS3dSZPTOMMP/xk4nhZB8mcw27qiccPieS0PZ9EZB63T1gmwaK1Rd5 +U94Pl0+zpDqhViuXmBfiIDWjjz0BzHnHSz5Rg4S3oXF1NcojhptIWyI0r7dgn5J3 +NxC4kgKdjzysxo6iWd0nLgz7h0jUdj79EOis4fg9G4f0EFWyQf7iDxGaA93Y9ePB +Jv5iFZVZAgMBAAECggEBAILIPX856EHb0KclbhlpfY4grFcdg9LS04grrcTISQW1 +J3p9nBpZ+snKe6I8Yx6lf5PiipPsSLlCliHiWpIzJZVQCkAQiSPiHttpEYgP2IYI +dH8dtznkdVbLRthZs0bnnPmpHCpW+iqpcYJ9eqkz0cvUNUGOjjWmwWmoRqwp/8CW +3S1qbkQiCh0Mk2fQeGar76R06kXQ9MKDEj14zyS3rJX+cokjEoMSlH8Sbmdh2mJz +XlNZcvqmeGJZwQWgbVVHOMUuZaKJiFa+lqvOdppbqSx0AsCRq6vjmjEYQEoOefYK +3IJM9IvqW5UNx0Cy4kQdjhZFFwMO/ALD3QyF21iP4gECgYEA+isQiaWdaY4UYxwK +Dg+pnSCKD7UGZUaCUIv9ds3CbntMOONFe0FxPsgcc4jRYQYj1rpQiFB8F11+qXGa +P/IHcnjr2+mTrNY4I9Bt1Lg+pHSS8QCgzeueFybYMLaSsXUo7tGwpvw6UUb6/YWI +LNCzZbrCLg1KZjGODhhxtvN45ZkCgYEA4YNSe+GMZlxgsvxbLs86WOm6DzJUPvxN +bWmni0+Oe0cbevgGEUjDVc895uMFnpvlgO49/C0AYJ+VVbStjIMgAeMnWj6OZoSX +q49rI8KmKUxKgORZiiaMqGWQ7Rxv68+4S8WANsjFxoUrE6dNV3uYDIUsiSLbZeI8 +38KVTcLohcECgYEAiOdyWHGq0G4xl/9rPUCzCMsa4velNV09yYiiwBZgVgfhsawm +hQpOSBZJA60XMGqkyEkT81VgY4UF4QLLcD0qeCnWoXWVHFvrQyY4RNZDacpl87/t +QGO2E2NtolL3umesa+2TJ/8Whw46Iu2llSjtVDm9NGiPk5eA7xPPf1iEi9kCgYAb +0EmVE91wJoaarLtGS7LDkpgrFacEWbPnAbfzW62UENIX2Y1OBm5pH/Vfi7J+vHWS +8E9e0eIRCL2vY2hgQy/oa67H151SkZnvQ/IP6Ar8Xvd1bDSK8HQ6tMQqKm63Y9g0 +KDjHCP4znOsSMnk8h/bZ3HcAtvbeWwftBR/LBnYNQQKBgA1leIXLLHRoX0VtS/7e +y7Xmn7gepj+gDbSuCs5wGtgw0RB/1z/S3QoS2TCbZzKPBo20+ivoRP7gcuFhduFR +hT8V87esr/QzLVpjLedQDW8Xb7GiO3BsU/gVC9VcngenbL7JObl3NgvdreIYo6+n +yrLyf+8hjm6H6zkjqiOkHAl+ +-----END PRIVATE KEY----- -----BEGIN CERTIFICATE----- -MIICizCCAfSgAwIBAgIJAMtotfHYdEsUMA0GCSqGSIb3DQEBBQUAMEQxCzAJBgNV -BAYTAlVLMRYwFAYDVQQKEw1PcGVuU1NMIEdyb3VwMR0wGwYDVQQDExRUZXN0IFMv -TUlNRSBSU0EgUm9vdDAeFw0wODAyMjIxMzUzMDhaFw0xNjA1MTAxMzUzMDhaMEUx +MIIDbDCCAlSgAwIBAgIJANk5lu6mSyBBMA0GCSqGSIb3DQEBBQUAMEQxCzAJBgNV +BAYTAlVLMRYwFAYDVQQKDA1PcGVuU1NMIEdyb3VwMR0wGwYDVQQDDBRUZXN0IFMv +TUlNRSBSU0EgUm9vdDAeFw0xMzA3MTcxNzI4MzBaFw0yMzA1MjYxNzI4MzBaMEUx CzAJBgNVBAYTAlVLMRYwFAYDVQQKDA1PcGVuU1NMIEdyb3VwMR4wHAYDVQQDDBVU -ZXN0IFMvTUlNRSBFRSBSU0EgIzIwgZ8wDQYJKoZIhvcNAQEBBQADgY0AMIGJAoGB -ALAF+vJbhW7lT3A0gMqykk7831gXgJxOWsJTLNWUqBv7y6seVJ7n2Lm96tH0g4IJ -eR6aM7Nn/ecAtMaphmWcS8enRGOT4p0pecoEgH5JE+l67ctxcxNY+CxOE9rRRDF5 -00obSDpob6QuGwOUUEKjF3T2Mm1Klv2Oaugk6rRlgQcvAgMBAAGjgYMwgYAwHQYD -VR0OBBYEFIL/u+mEvaw7RuKLRuElfVkxSQjYMB8GA1UdIwQYMBaAFBPPS6e7iS6z -OFcXdsabrWhb5e0XMAwGA1UdEwEB/wQCMAAwDgYDVR0PAQH/BAQDAgXgMCAGA1Ud -EQQZMBeBFXNtaW1lcnNhMkBvcGVuc3NsLm9yZzANBgkqhkiG9w0BAQUFAAOBgQC2 -rXR5bm/9RtOMQPleNpd3y6uUX3oy+0CafK5Yl3PMnItjjnKJ0l1/DbLbDj2twehe -ewaB8CROcBCA3AMLSmGvPKgUCFMGtWam3328M4fBHzon5ka7qDXzM+imkAly/Yx2 -YNdR/aNOug+5sXygHmTSKqiCpQjOIClzXoPVVeEVHw== +ZXN0IFMvTUlNRSBFRSBSU0EgIzIwggEiMA0GCSqGSIb3DQEBAQUAA4IBDwAwggEK +AoIBAQDcYC4tS2Uvn1Z2iDgtfkJA5tAqgbN6X4yK02RtVH5xekV9+6+eTt/9S+iF +AzAnwqR/UB1R67ETrsWqV8u9xLg5fHIwIkmu9/6P31UU9cghO7J1lcrhHvooHaFp +cXepPWQacpuBq2VvcKRDlDfVmdM5z6eS3dSZPTOMMP/xk4nhZB8mcw27qiccPieS +0PZ9EZB63T1gmwaK1Rd5U94Pl0+zpDqhViuXmBfiIDWjjz0BzHnHSz5Rg4S3oXF1 +NcojhptIWyI0r7dgn5J3NxC4kgKdjzysxo6iWd0nLgz7h0jUdj79EOis4fg9G4f0 +EFWyQf7iDxGaA93Y9ePBJv5iFZVZAgMBAAGjYDBeMAwGA1UdEwEB/wQCMAAwDgYD +VR0PAQH/BAQDAgXgMB0GA1UdDgQWBBT0arpyYMHXDPVL7MvzE+lx71L7sjAfBgNV +HSMEGDAWgBTJkVMKY3sWW4u9RPB2iKkk5uW2bDANBgkqhkiG9w0BAQUFAAOCAQEA +I8nM42am3aImkZyrw8iGkaGhKyi/dfajSWx6B9izBUh+3FleBnUxxOA+mn7M8C47 +Ne18iaaWK8vEux9KYTIY8BzXQZL1AuZ896cXEc6bGKsME37JSsocfuB5BIGWlYLv +/ON5/SJ0iVFj4fAp8z7Vn5qxRJj9BhZDxaO1Raa6cz6pm0imJy9v8y01TI6HsK8c +XJQLs7/U4Qb91K+IDNX/lgW3hzWjifNpIpT5JyY3DUgbkD595LFV5DDMZd0UOqcv +6cyN42zkX8a0TWr3i5wu7pw4k1oD19RbUyljyleEp0DBauIct4GARdBGgi5y1H2i +NzYzLAPBkHCMY0Is3KKIBw== -----END CERTIFICATE----- diff --git a/deps/openssl/openssl/test/smime-certs/smrsa3.pem b/deps/openssl/openssl/test/smime-certs/smrsa3.pem index c8cbe55151ef2d..14c27f64aa9001 100644 --- a/deps/openssl/openssl/test/smime-certs/smrsa3.pem +++ b/deps/openssl/openssl/test/smime-certs/smrsa3.pem @@ -1,31 +1,49 @@ ------BEGIN RSA PRIVATE KEY----- -MIICXAIBAAKBgQC6syTZtZNe1hRScFc4PUVyVLsr7+C1HDIZnOHmwFoLayX6RHwy -ep/TkdwiPHnemVLuwvpSjLMLZkXy/J764kSHJrNeVl3UvmCVCOm40hAtK1+F39pM -h8phkbPPD7i+hwq4/Vs79o46nzwbVKmzgoZBJhZ+codujUSYM3LjJ4aq+wIDAQAB -AoGAE1Zixrnr3bLGwBMqtYSDIOhtyos59whImCaLr17U9MHQWS+mvYO98if1aQZi -iQ/QazJ+wvYXxWJ+dEB+JvYwqrGeuAU6He/rAb4OShG4FPVU2D19gzRnaButWMeT -/1lgXV08hegGBL7RQNaN7b0viFYMcKnSghleMP0/q+Y/oaECQQDkXEwDYJW13X9p -ijS20ykWdY5lLknjkHRhhOYux0rlhOqsyMZjoUmwI2m0qj9yrIysKhrk4MZaM/uC -hy0xp3hdAkEA0Uv/UY0Kwsgc+W6YxeypECtg1qCE6FBib8n4iFy/6VcWqhvE5xrs -OdhKv9/p6aLjLneGd1sU+F8eS9LGyKIbNwJBAJPgbNzXA7uUZriqZb5qeTXxBDfj -RLfXSHYKAKEULxz3+JvRHB9SR4yHMiFrCdExiZrHXUkPgYLSHLGG5a4824UCQD6T -9XvhquUARkGCAuWy0/3Eqoihp/t6BWSdQ9Upviu7YUhtUxsyXo0REZB7F4pGrJx5 -GlhXgFaewgUzuUHFzlMCQCzJMMWslWpoLntnR6sMhBMhBFHSw+Y5CbxBmFrdtSkd -VdtNO1VuDCTxjjW7W3Khj7LX4KZ1ye/5jfAgnnnXisc= ------END RSA PRIVATE KEY----- +-----BEGIN PRIVATE KEY----- +MIIEvgIBADANBgkqhkiG9w0BAQEFAASCBKgwggSkAgEAAoIBAQCyK+BTAOJKJjji +OhY60NeZjzGGZxEBfCm62n0mwkzusW/V/e63uwj6uOVCFoVBz5doMf3M6QIS2jL3 +Aw6Qs5+vcuLA0gHrqIwjYQz1UZ5ETLKLKbQw6YOIVfsFSTxytUVpfcByrubWiLKX +63theG1/IVokDK/9/k52Kyt+wcCjuRb7AJQFj2OLDRuWm/gavozkK103gQ+dUq4H +XamZMtTq1EhQOfc0IUeCOEL6xz4jzlHHfzLdkvb7Enhav2sXDfOmZp/DYf9IqS7l +vFkkINPVbYFBTexaPZlFwmpGRjkmoyH/w+Jlcpzs+w6p1diWRpaSn62bbkRN49j6 +L2dVb+DfAgMBAAECggEAciwDl6zdVT6g/PbT/+SMA+7qgYHSN+1koEQaJpgjzGEP +lUUfj8TewCtzXaIoyj9IepBuXryBg6snNXpT/w3bqgYon/7zFBvxkUpDj4A5tvKf +BuY2fZFlpBvUu1Ju1eKrFCptBBBoA9mc+BUB/ze4ktrAdJFcxZoMlVScjqGB3GdR +OHw2x9BdWGCJBhiu9VHhAAb/LVWi6xgDumYSWZwN2yovg+7J91t5bsENeBRHycK+ +i5dNFh1umIK9N0SH6bpHPnLHrCRchrQ6ZRRxL4ZBKA9jFRDeI7OOsJuCvhGyJ1se +snsLjr/Ahg00aiHCcC1SPQ6pmXAVBCG7hf4AX82V4QKBgQDaFDE+Fcpv84mFo4s9 +wn4CZ8ymoNIaf5zPl/gpH7MGots4NT5+Ns+6zzJQ6TEpDjTPx+vDaabP7QGXwVZn +8NAHYvCQK37b+u9HrOt256YYRDOmnJFSbsJdmqzMEzpTNmQ8GuI37cZCS9CmSMv+ +ab/plcwuv0cJRSC83NN2AFyu1QKBgQDRJzKIBQlpprF9rA0D5ZjLVW4OH18A0Mmm +oanw7qVutBaM4taFN4M851WnNIROyYIlkk2fNgW57Y4M8LER4zLrjU5HY4lB0BMX +LQWDbyz4Y7L4lVnnEKfQxWFt9avNZwiCxCxEKy/n/icmVCzc91j9uwKcupdzrN6E +yzPd1s5y4wKBgQCkJvzmAdsOp9/Fg1RFWcgmIWHvrzBXl+U+ceLveZf1j9K5nYJ7 +2OBGer4iH1XM1I+2M4No5XcWHg3L4FEdDixY0wXHT6Y/CcThS+015Kqmq3fBmyrc +RNjzQoF9X5/QkSmkAIx1kvpgXtcgw70htRIrToGSUpKzDKDW6NYXhbA+PQKBgDJK +KH5IJ8E9kYPUMLT1Kc4KVpISvPcnPLVSPdhuqVx69MkfadFSTb4BKbkwiXegQCjk +isFzbeEM25EE9q6EYKP+sAm+RyyJ6W0zKBY4TynSXyAiWSGUAaXTL+AOqCaVVZiL +rtEdSUGQ/LzclIT0/HLV2oTw4KWxtTdc3LXEhpNdAoGBAM3LckiHENqtoeK2gVNw +IPeEuruEqoN4n+XltbEEv6Ymhxrs6T6HSKsEsLhqsUiIvIzH43KMm45SNYTn5eZh +yzYMXLmervN7c1jJe2Y2MYv6hE+Ypj1xGW4w7s8WNKmVzLv97beisD9AZrS7sXfF +RvOAi5wVkYylDxV4238MAZIq +-----END PRIVATE KEY----- -----BEGIN CERTIFICATE----- -MIICizCCAfSgAwIBAgIJAMtotfHYdEsVMA0GCSqGSIb3DQEBBQUAMEQxCzAJBgNV -BAYTAlVLMRYwFAYDVQQKEw1PcGVuU1NMIEdyb3VwMR0wGwYDVQQDExRUZXN0IFMv -TUlNRSBSU0EgUm9vdDAeFw0wODAyMjIxMzUzMDlaFw0xNjA1MTAxMzUzMDlaMEUx +MIIDbDCCAlSgAwIBAgIJANk5lu6mSyBCMA0GCSqGSIb3DQEBBQUAMEQxCzAJBgNV +BAYTAlVLMRYwFAYDVQQKDA1PcGVuU1NMIEdyb3VwMR0wGwYDVQQDDBRUZXN0IFMv +TUlNRSBSU0EgUm9vdDAeFw0xMzA3MTcxNzI4MzBaFw0yMzA1MjYxNzI4MzBaMEUx CzAJBgNVBAYTAlVLMRYwFAYDVQQKDA1PcGVuU1NMIEdyb3VwMR4wHAYDVQQDDBVU -ZXN0IFMvTUlNRSBFRSBSU0EgIzMwgZ8wDQYJKoZIhvcNAQEBBQADgY0AMIGJAoGB -ALqzJNm1k17WFFJwVzg9RXJUuyvv4LUcMhmc4ebAWgtrJfpEfDJ6n9OR3CI8ed6Z -Uu7C+lKMswtmRfL8nvriRIcms15WXdS+YJUI6bjSEC0rX4Xf2kyHymGRs88PuL6H -Crj9Wzv2jjqfPBtUqbOChkEmFn5yh26NRJgzcuMnhqr7AgMBAAGjgYMwgYAwHQYD -VR0OBBYEFDsSFjNtYZzd0tTHafNS7tneQQj6MB8GA1UdIwQYMBaAFBPPS6e7iS6z -OFcXdsabrWhb5e0XMAwGA1UdEwEB/wQCMAAwDgYDVR0PAQH/BAQDAgXgMCAGA1Ud -EQQZMBeBFXNtaW1lcnNhM0BvcGVuc3NsLm9yZzANBgkqhkiG9w0BAQUFAAOBgQBE -tUDB+1Dqigu4p1xtdq7JRK6S+gfA7RWmhz0j2scb2zhpS12h37JLHsidGeKAzZYq -jUjOrH/j3xcV5AnuJoqImJaN23nzzxtR4qGGX2mrq6EtObzdEGgCUaizsGM+0slJ -PYxcy8KeY/63B1BpYhj2RjGkL6HrvuAaxVORa3acoA== +ZXN0IFMvTUlNRSBFRSBSU0EgIzMwggEiMA0GCSqGSIb3DQEBAQUAA4IBDwAwggEK +AoIBAQCyK+BTAOJKJjjiOhY60NeZjzGGZxEBfCm62n0mwkzusW/V/e63uwj6uOVC +FoVBz5doMf3M6QIS2jL3Aw6Qs5+vcuLA0gHrqIwjYQz1UZ5ETLKLKbQw6YOIVfsF +STxytUVpfcByrubWiLKX63theG1/IVokDK/9/k52Kyt+wcCjuRb7AJQFj2OLDRuW +m/gavozkK103gQ+dUq4HXamZMtTq1EhQOfc0IUeCOEL6xz4jzlHHfzLdkvb7Enha +v2sXDfOmZp/DYf9IqS7lvFkkINPVbYFBTexaPZlFwmpGRjkmoyH/w+Jlcpzs+w6p +1diWRpaSn62bbkRN49j6L2dVb+DfAgMBAAGjYDBeMAwGA1UdEwEB/wQCMAAwDgYD +VR0PAQH/BAQDAgXgMB0GA1UdDgQWBBQ6CkW5sa6HrBsWvuPOvMjyL5AnsDAfBgNV +HSMEGDAWgBTJkVMKY3sWW4u9RPB2iKkk5uW2bDANBgkqhkiG9w0BAQUFAAOCAQEA +JhcrD7AKafVzlncA3cZ6epAruj1xwcfiE+EbuAaeWEGjoSltmevcjgoIxvijRVcp +sCbNmHJZ/siQlqzWjjf3yoERvLDqngJZZpQeocMIbLRQf4wgLAuiBcvT52wTE+sa +VexeETDy5J1OW3wE4A3rkdBp6hLaymlijFNnd5z/bP6w3AcIMWm45yPm0skM8RVr +O3UstEFYD/iy+p+Y/YZDoxYQSW5Vl+NkpGmc5bzet8gQz4JeXtH3z5zUGoDM4XK7 +tXP3yUi2eecCbyjh/wgaQiVdylr1Kv3mxXcTl+cFO22asDkh0R/y72nTCu5fSILY +CscFo2Z2pYROGtZDmYqhRw== -----END CERTIFICATE----- diff --git a/deps/openssl/openssl/test/tcrl b/deps/openssl/openssl/test/tcrl index 055269eab8ddc8..1075a4f3f502e3 100644 --- a/deps/openssl/openssl/test/tcrl +++ b/deps/openssl/openssl/test/tcrl @@ -9,70 +9,70 @@ else fi echo testing crl conversions -cp $t fff.p +cp $t crl-fff.p echo "p -> d" -$cmd -in fff.p -inform p -outform d >f.d +$cmd -in crl-fff.p -inform p -outform d >crl-f.d if [ $? != 0 ]; then exit 1; fi #echo "p -> t" -#$cmd -in fff.p -inform p -outform t >f.t +#$cmd -in crl-fff.p -inform p -outform t >crl-f.t #if [ $? != 0 ]; then exit 1; fi echo "p -> p" -$cmd -in fff.p -inform p -outform p >f.p +$cmd -in crl-fff.p -inform p -outform p >crl-f.p if [ $? != 0 ]; then exit 1; fi echo "d -> d" -$cmd -in f.d -inform d -outform d >ff.d1 +$cmd -in crl-f.d -inform d -outform d >crl-ff.d1 if [ $? != 0 ]; then exit 1; fi #echo "t -> d" -#$cmd -in f.t -inform t -outform d >ff.d2 +#$cmd -in crl-f.t -inform t -outform d >crl-ff.d2 #if [ $? != 0 ]; then exit 1; fi echo "p -> d" -$cmd -in f.p -inform p -outform d >ff.d3 +$cmd -in crl-f.p -inform p -outform d >crl-ff.d3 if [ $? != 0 ]; then exit 1; fi #echo "d -> t" -#$cmd -in f.d -inform d -outform t >ff.t1 +#$cmd -in crl-f.d -inform d -outform t >crl-ff.t1 #if [ $? != 0 ]; then exit 1; fi #echo "t -> t" -#$cmd -in f.t -inform t -outform t >ff.t2 +#$cmd -in crl-f.t -inform t -outform t >crl-ff.t2 #if [ $? != 0 ]; then exit 1; fi #echo "p -> t" -#$cmd -in f.p -inform p -outform t >ff.t3 +#$cmd -in crl-f.p -inform p -outform t >crl-ff.t3 #if [ $? != 0 ]; then exit 1; fi echo "d -> p" -$cmd -in f.d -inform d -outform p >ff.p1 +$cmd -in crl-f.d -inform d -outform p >crl-ff.p1 if [ $? != 0 ]; then exit 1; fi #echo "t -> p" -#$cmd -in f.t -inform t -outform p >ff.p2 +#$cmd -in crl-f.t -inform t -outform p >crl-ff.p2 #if [ $? != 0 ]; then exit 1; fi echo "p -> p" -$cmd -in f.p -inform p -outform p >ff.p3 +$cmd -in crl-f.p -inform p -outform p >crl-ff.p3 if [ $? != 0 ]; then exit 1; fi -cmp fff.p f.p +cmp crl-fff.p crl-f.p if [ $? != 0 ]; then exit 1; fi -cmp fff.p ff.p1 +cmp crl-fff.p crl-ff.p1 if [ $? != 0 ]; then exit 1; fi -#cmp fff.p ff.p2 +#cmp crl-fff.p crl-ff.p2 #if [ $? != 0 ]; then exit 1; fi -cmp fff.p ff.p3 +cmp crl-fff.p crl-ff.p3 if [ $? != 0 ]; then exit 1; fi -#cmp f.t ff.t1 +#cmp crl-f.t crl-ff.t1 #if [ $? != 0 ]; then exit 1; fi -#cmp f.t ff.t2 +#cmp crl-f.t crl-ff.t2 #if [ $? != 0 ]; then exit 1; fi -#cmp f.t ff.t3 +#cmp crl-f.t crl-ff.t3 #if [ $? != 0 ]; then exit 1; fi -cmp f.p ff.p1 +cmp crl-f.p crl-ff.p1 if [ $? != 0 ]; then exit 1; fi -#cmp f.p ff.p2 +#cmp crl-f.p crl-ff.p2 #if [ $? != 0 ]; then exit 1; fi -cmp f.p ff.p3 +cmp crl-f.p crl-ff.p3 if [ $? != 0 ]; then exit 1; fi -/bin/rm -f f.* ff.* fff.* +/bin/rm -f crl-f.* crl-ff.* crl-fff.* exit 0 diff --git a/deps/openssl/openssl/test/testenc b/deps/openssl/openssl/test/testenc index f5ce7c0c4573b3..d9fd52e533195a 100644 --- a/deps/openssl/openssl/test/testenc +++ b/deps/openssl/openssl/test/testenc @@ -1,6 +1,6 @@ #!/bin/sh -testsrc=Makefile +testsrc=testenc test=./p cmd="../util/shlib_wrap.sh ../apps/openssl" diff --git a/deps/openssl/openssl/test/tests.com b/deps/openssl/openssl/test/tests.com index 39a7bfa60779cf..6e1c818e7ac4bc 100644 --- a/deps/openssl/openssl/test/tests.com +++ b/deps/openssl/openssl/test/tests.com @@ -56,7 +56,8 @@ $ tests := - test_enc,test_x509,test_rsa,test_crl,test_sid,- test_gen,test_req,test_pkcs7,test_verify,test_dh,test_dsa,- test_ss,test_ca,test_engine,test_evp,test_ssl,test_tsa,test_ige,- - test_jpake,test_srp,test_cms,test_heartbeat,test_constant_time + test_jpake,test_srp,test_cms,test_ocsp,test_v3name,test_heartbeat,- + test_constant_time $ endif $ tests = f$edit(tests,"COLLAPSE") $ @@ -94,9 +95,10 @@ $ EVPTEST := evp_test $ IGETEST := igetest $ JPAKETEST := jpaketest $ SRPTEST := srptest +$ V3NAMETEST := v3nametest $ ASN1TEST := asn1test -$ HEARTBEATTEST := heartbeat_test -$ CONSTTIMETEST := constant_time_test +$ HEARTBEATTEST := heartbeat_test +$ CONSTTIMETEST := constant_time_test $! $ tests_i = 0 $ loop_tests: @@ -368,6 +370,14 @@ $ test_srp: $ write sys$output "Test SRP" $ mcr 'texe_dir''srptest' $ return +$ test_ocsp: +$ write sys$output "Test OCSP" +$ @tocsp.com +$ return +$ test_v3name: +$ write sys$output "Test V3NAME" +$ mcr 'texe_dir''v3nametest' +$ return $ test_heartbeat: $ write sys$output "Test HEARTBEAT" $ mcr 'texe_dir''heartbeattest' diff --git a/deps/openssl/openssl/test/testssl b/deps/openssl/openssl/test/testssl index 9fb89a3ddde4f7..e3b342bfd4c060 100644 --- a/deps/openssl/openssl/test/testssl +++ b/deps/openssl/openssl/test/testssl @@ -30,6 +30,8 @@ else extra="$4" fi +serverinfo="./serverinfo.pem" + ############################################################################# echo test sslv2 @@ -165,6 +167,35 @@ $ssltest -tls1 -cipher PSK -psk abc123 $extra || exit 1 echo test tls1 with PSK via BIO pair $ssltest -bio_pair -tls1 -cipher PSK -psk abc123 $extra || exit 1 +############################################################################# +# Custom Extension tests + +echo test tls1 with custom extensions +$ssltest -bio_pair -tls1 -custom_ext || exit 1 + +############################################################################# +# Serverinfo tests + +echo test tls1 with serverinfo +$ssltest -bio_pair -tls1 -serverinfo_file $serverinfo || exit 1 +$ssltest -bio_pair -tls1 -serverinfo_file $serverinfo -serverinfo_sct || exit 1 +$ssltest -bio_pair -tls1 -serverinfo_file $serverinfo -serverinfo_tack || exit 1 +$ssltest -bio_pair -tls1 -serverinfo_file $serverinfo -serverinfo_sct -serverinfo_tack || exit 1 +$ssltest -bio_pair -tls1 -custom_ext -serverinfo_file $serverinfo -serverinfo_sct -serverinfo_tack || exit 1 + + +############################################################################# +# ALPN tests + +$ssltest -bio_pair -tls1 -alpn_client foo -alpn_server bar || exit 1 +$ssltest -bio_pair -tls1 -alpn_client foo -alpn_server foo -alpn_expected foo || exit 1 +$ssltest -bio_pair -tls1 -alpn_client foo,bar -alpn_server foo -alpn_expected foo || exit 1 +$ssltest -bio_pair -tls1 -alpn_client bar,foo -alpn_server foo -alpn_expected foo || exit 1 +$ssltest -bio_pair -tls1 -alpn_client bar,foo -alpn_server foo,bar -alpn_expected foo || exit 1 +$ssltest -bio_pair -tls1 -alpn_client bar,foo -alpn_server bar,foo -alpn_expected bar || exit 1 +$ssltest -bio_pair -tls1 -alpn_client foo,bar -alpn_server bar,foo -alpn_expected bar || exit 1 +$ssltest -bio_pair -tls1 -alpn_client baz -alpn_server bar,foo || exit 1 + if ../util/shlib_wrap.sh ../apps/openssl no-srp; then echo skipping SRP tests else @@ -181,4 +212,12 @@ else $ssltest -bio_pair -tls1 -cipher aSRP -srpuser test -srppass abc123 fi +############################################################################# +# Multi-buffer tests + +if [ -z "$extra" -a `uname -m` = "x86_64" ]; then + $ssltest -cipher AES128-SHA -bytes 8m || exit 1 + $ssltest -cipher AES128-SHA256 -bytes 8m || exit 1 +fi + exit 0 diff --git a/deps/openssl/openssl/test/tocsp b/deps/openssl/openssl/test/tocsp new file mode 100644 index 00000000000000..5fc291ca6eeb72 --- /dev/null +++ b/deps/openssl/openssl/test/tocsp @@ -0,0 +1,147 @@ +#!/bin/sh + +cmd='../util/shlib_wrap.sh ../apps/openssl' +ocspdir="ocsp-tests" +# 17 December 2012 so we don't get certificate expiry errors. +check_time="-attime 1355875200" + +test_ocsp () { + + $cmd base64 -d -in $ocspdir/$1 | \ + $cmd ocsp -respin - -partial_chain $check_time \ + -CAfile $ocspdir/$2 -verify_other $ocspdir/$2 -CApath /dev/null + [ $? != $3 ] && exit 1 +} + + +echo "=== VALID OCSP RESPONSES ===" +echo "NON-DELEGATED; Intermediate CA -> EE" +test_ocsp ND1.ors ND1_Issuer_ICA.pem 0 +echo "NON-DELEGATED; Root CA -> Intermediate CA" +test_ocsp ND2.ors ND2_Issuer_Root.pem 0 +echo "NON-DELEGATED; Root CA -> EE" +test_ocsp ND3.ors ND3_Issuer_Root.pem 0 +echo "DELEGATED; Intermediate CA -> EE" +test_ocsp D1.ors D1_Issuer_ICA.pem 0 +echo "DELEGATED; Root CA -> Intermediate CA" +test_ocsp D2.ors D2_Issuer_Root.pem 0 +echo "DELEGATED; Root CA -> EE" +test_ocsp D3.ors D3_Issuer_Root.pem 0 + +echo "=== INVALID SIGNATURE on the OCSP RESPONSE ===" +echo "NON-DELEGATED; Intermediate CA -> EE" +test_ocsp ISOP_ND1.ors ND1_Issuer_ICA.pem 1 +echo "NON-DELEGATED; Root CA -> Intermediate CA" +test_ocsp ISOP_ND2.ors ND2_Issuer_Root.pem 1 +echo "NON-DELEGATED; Root CA -> EE" +test_ocsp ISOP_ND3.ors ND3_Issuer_Root.pem 1 +echo "DELEGATED; Intermediate CA -> EE" +test_ocsp ISOP_D1.ors D1_Issuer_ICA.pem 1 +echo "DELEGATED; Root CA -> Intermediate CA" +test_ocsp ISOP_D2.ors D2_Issuer_Root.pem 1 +echo "DELEGATED; Root CA -> EE" +test_ocsp ISOP_D3.ors D3_Issuer_Root.pem 1 + +echo "=== WRONG RESPONDERID in the OCSP RESPONSE ===" +echo "NON-DELEGATED; Intermediate CA -> EE" +test_ocsp WRID_ND1.ors ND1_Issuer_ICA.pem 1 +echo "NON-DELEGATED; Root CA -> Intermediate CA" +test_ocsp WRID_ND2.ors ND2_Issuer_Root.pem 1 +echo "NON-DELEGATED; Root CA -> EE" +test_ocsp WRID_ND3.ors ND3_Issuer_Root.pem 1 +echo "DELEGATED; Intermediate CA -> EE" +test_ocsp WRID_D1.ors D1_Issuer_ICA.pem 1 +echo "DELEGATED; Root CA -> Intermediate CA" +test_ocsp WRID_D2.ors D2_Issuer_Root.pem 1 +echo "DELEGATED; Root CA -> EE" +test_ocsp WRID_D3.ors D3_Issuer_Root.pem 1 + +echo "=== WRONG ISSUERNAMEHASH in the OCSP RESPONSE ===" +echo "NON-DELEGATED; Intermediate CA -> EE" +test_ocsp WINH_ND1.ors ND1_Issuer_ICA.pem 1 +echo "NON-DELEGATED; Root CA -> Intermediate CA" +test_ocsp WINH_ND2.ors ND2_Issuer_Root.pem 1 +echo "NON-DELEGATED; Root CA -> EE" +test_ocsp WINH_ND3.ors ND3_Issuer_Root.pem 1 +echo "DELEGATED; Intermediate CA -> EE" +test_ocsp WINH_D1.ors D1_Issuer_ICA.pem 1 +echo "DELEGATED; Root CA -> Intermediate CA" +test_ocsp WINH_D2.ors D2_Issuer_Root.pem 1 +echo "DELEGATED; Root CA -> EE" +test_ocsp WINH_D3.ors D3_Issuer_Root.pem 1 + +echo "=== WRONG ISSUERKEYHASH in the OCSP RESPONSE ===" +echo "NON-DELEGATED; Intermediate CA -> EE" +test_ocsp WIKH_ND1.ors ND1_Issuer_ICA.pem 1 +echo "NON-DELEGATED; Root CA -> Intermediate CA" +test_ocsp WIKH_ND2.ors ND2_Issuer_Root.pem 1 +echo "NON-DELEGATED; Root CA -> EE" +test_ocsp WIKH_ND3.ors ND3_Issuer_Root.pem 1 +echo "DELEGATED; Intermediate CA -> EE" +test_ocsp WIKH_D1.ors D1_Issuer_ICA.pem 1 +echo "DELEGATED; Root CA -> Intermediate CA" +test_ocsp WIKH_D2.ors D2_Issuer_Root.pem 1 +echo "DELEGATED; Root CA -> EE" +test_ocsp WIKH_D3.ors D3_Issuer_Root.pem 1 + +echo "=== WRONG KEY in the DELEGATED OCSP SIGNING CERTIFICATE ===" +echo "DELEGATED; Intermediate CA -> EE" +test_ocsp WKDOSC_D1.ors D1_Issuer_ICA.pem 1 +echo "DELEGATED; Root CA -> Intermediate CA" +test_ocsp WKDOSC_D2.ors D2_Issuer_Root.pem 1 +echo "DELEGATED; Root CA -> EE" +test_ocsp WKDOSC_D3.ors D3_Issuer_Root.pem 1 + +echo "=== INVALID SIGNATURE on the DELEGATED OCSP SIGNING CERTIFICATE ===" +echo "DELEGATED; Intermediate CA -> EE" +test_ocsp ISDOSC_D1.ors D1_Issuer_ICA.pem 1 +echo "DELEGATED; Root CA -> Intermediate CA" +test_ocsp ISDOSC_D2.ors D2_Issuer_Root.pem 1 +echo "DELEGATED; Root CA -> EE" +test_ocsp ISDOSC_D3.ors D3_Issuer_Root.pem 1 + +echo "=== WRONG SUBJECT NAME in the ISSUER CERTIFICATE ===" +echo "NON-DELEGATED; Intermediate CA -> EE" +test_ocsp ND1.ors WSNIC_ND1_Issuer_ICA.pem 1 +echo "NON-DELEGATED; Root CA -> Intermediate CA" +test_ocsp ND2.ors WSNIC_ND2_Issuer_Root.pem 1 +echo "NON-DELEGATED; Root CA -> EE" +test_ocsp ND3.ors WSNIC_ND3_Issuer_Root.pem 1 +echo "DELEGATED; Intermediate CA -> EE" +test_ocsp D1.ors WSNIC_D1_Issuer_ICA.pem 1 +echo "DELEGATED; Root CA -> Intermediate CA" +test_ocsp D2.ors WSNIC_D2_Issuer_Root.pem 1 +echo "DELEGATED; Root CA -> EE" +test_ocsp D3.ors WSNIC_D3_Issuer_Root.pem 1 + +echo "=== WRONG KEY in the ISSUER CERTIFICATE ===" +echo "NON-DELEGATED; Intermediate CA -> EE" +test_ocsp ND1.ors WKIC_ND1_Issuer_ICA.pem 1 +echo "NON-DELEGATED; Root CA -> Intermediate CA" +test_ocsp ND2.ors WKIC_ND2_Issuer_Root.pem 1 +echo "NON-DELEGATED; Root CA -> EE" +test_ocsp ND3.ors WKIC_ND3_Issuer_Root.pem 1 +echo "DELEGATED; Intermediate CA -> EE" +test_ocsp D1.ors WKIC_D1_Issuer_ICA.pem 1 +echo "DELEGATED; Root CA -> Intermediate CA" +test_ocsp D2.ors WKIC_D2_Issuer_Root.pem 1 +echo "DELEGATED; Root CA -> EE" +test_ocsp D3.ors WKIC_D3_Issuer_Root.pem 1 + +echo "=== INVALID SIGNATURE on the ISSUER CERTIFICATE ===" +# Expect success, because we're explicitly trusting the issuer certificate. +echo "NON-DELEGATED; Intermediate CA -> EE" +test_ocsp ND1.ors ISIC_ND1_Issuer_ICA.pem 0 +echo "NON-DELEGATED; Root CA -> Intermediate CA" +test_ocsp ND2.ors ISIC_ND2_Issuer_Root.pem 0 +echo "NON-DELEGATED; Root CA -> EE" +test_ocsp ND3.ors ISIC_ND3_Issuer_Root.pem 0 +echo "DELEGATED; Intermediate CA -> EE" +test_ocsp D1.ors ISIC_D1_Issuer_ICA.pem 0 +echo "DELEGATED; Root CA -> Intermediate CA" +test_ocsp D2.ors ISIC_D2_Issuer_Root.pem 0 +echo "DELEGATED; Root CA -> EE" +test_ocsp D3.ors ISIC_D3_Issuer_Root.pem 0 + +echo "ALL OCSP TESTS SUCCESSFUL" +exit 0 diff --git a/deps/openssl/openssl/test/tocsp.com b/deps/openssl/openssl/test/tocsp.com new file mode 100644 index 00000000000000..3b974c97f1339c --- /dev/null +++ b/deps/openssl/openssl/test/tocsp.com @@ -0,0 +1,152 @@ +$! TOCSP.COM +$ +$ cmd = "mcr ''exe_dir'openssl" +$ ocspdir = "ocsp-tests" +$ ! 17 December 2012 so we don't get certificate expiry errors. +$ check_time = "-attime 1355875200" +$ +$ test_ocsp: subroutine +$ set noon +$ 'cmd' base64 -d -in [.'ocspdir']'p1' -out f.d +$ 'cmd' ocsp -respin f.d -partial_chain 'check_time' - + "-CAfile" [.'ocspdir']'p2' -verify_other [.'ocspdir']'p2' - + "-CApath" nul: +$ ! when ocsp exits with 0, VMS severity becomes 1 +$ ! when ocsp exits with 1, VMS severity becomes 2 +$ ! See the definition of EXIT(n) in the VMS sextion in e_os.h +$ if $severity .ne. 'p3'+1 then exit 2 ! severity error +$ exit 1 +$ endsubroutine +$ +$ on error then exit 2 +$ write sys$output "=== VALID OCSP RESPONSES ===" +$ write sys$output "NON-DELEGATED; Intermediate CA -> EE" +$ call test_ocsp ND1.ors ND1_Issuer_ICA.pem 0 +$ write sys$output "NON-DELEGATED; Root CA -> Intermediate CA" +$ call test_ocsp ND2.ors ND2_Issuer_Root.pem 0 +$ write sys$output "NON-DELEGATED; Root CA -> EE" +$ call test_ocsp ND3.ors ND3_Issuer_Root.pem 0 +$ write sys$output "DELEGATED; Intermediate CA -> EE" +$ call test_ocsp D1.ors D1_Issuer_ICA.pem 0 +$ write sys$output "DELEGATED; Root CA -> Intermediate CA" +$ call test_ocsp D2.ors D2_Issuer_Root.pem 0 +$ write sys$output "DELEGATED; Root CA -> EE" +$ call test_ocsp D3.ors D3_Issuer_Root.pem 0 +$ +$ write sys$output "=== INVALID SIGNATURE on the OCSP RESPONSE ===" +$ write sys$output "NON-DELEGATED; Intermediate CA -> EE" +$ call test_ocsp ISOP_ND1.ors ND1_Issuer_ICA.pem 1 +$ write sys$output "NON-DELEGATED; Root CA -> Intermediate CA" +$ call test_ocsp ISOP_ND2.ors ND2_Issuer_Root.pem 1 +$ write sys$output "NON-DELEGATED; Root CA -> EE" +$ call test_ocsp ISOP_ND3.ors ND3_Issuer_Root.pem 1 +$ write sys$output "DELEGATED; Intermediate CA -> EE" +$ call test_ocsp ISOP_D1.ors D1_Issuer_ICA.pem 1 +$ write sys$output "DELEGATED; Root CA -> Intermediate CA" +$ call test_ocsp ISOP_D2.ors D2_Issuer_Root.pem 1 +$ write sys$output "DELEGATED; Root CA -> EE" +$ call test_ocsp ISOP_D3.ors D3_Issuer_Root.pem 1 +$ +$ write sys$output "=== WRONG RESPONDERID in the OCSP RESPONSE ===" +$ write sys$output "NON-DELEGATED; Intermediate CA -> EE" +$ call test_ocsp WRID_ND1.ors ND1_Issuer_ICA.pem 1 +$ write sys$output "NON-DELEGATED; Root CA -> Intermediate CA" +$ call test_ocsp WRID_ND2.ors ND2_Issuer_Root.pem 1 +$ write sys$output "NON-DELEGATED; Root CA -> EE" +$ call test_ocsp WRID_ND3.ors ND3_Issuer_Root.pem 1 +$ write sys$output "DELEGATED; Intermediate CA -> EE" +$ call test_ocsp WRID_D1.ors D1_Issuer_ICA.pem 1 +$ write sys$output "DELEGATED; Root CA -> Intermediate CA" +$ call test_ocsp WRID_D2.ors D2_Issuer_Root.pem 1 +$ write sys$output "DELEGATED; Root CA -> EE" +$ call test_ocsp WRID_D3.ors D3_Issuer_Root.pem 1 +$ +$ write sys$output "=== WRONG ISSUERNAMEHASH in the OCSP RESPONSE ===" +$ write sys$output "NON-DELEGATED; Intermediate CA -> EE" +$ call test_ocsp WINH_ND1.ors ND1_Issuer_ICA.pem 1 +$ write sys$output "NON-DELEGATED; Root CA -> Intermediate CA" +$ call test_ocsp WINH_ND2.ors ND2_Issuer_Root.pem 1 +$ write sys$output "NON-DELEGATED; Root CA -> EE" +$ call test_ocsp WINH_ND3.ors ND3_Issuer_Root.pem 1 +$ write sys$output "DELEGATED; Intermediate CA -> EE" +$ call test_ocsp WINH_D1.ors D1_Issuer_ICA.pem 1 +$ write sys$output "DELEGATED; Root CA -> Intermediate CA" +$ call test_ocsp WINH_D2.ors D2_Issuer_Root.pem 1 +$ write sys$output "DELEGATED; Root CA -> EE" +$ call test_ocsp WINH_D3.ors D3_Issuer_Root.pem 1 +$ +$ write sys$output "=== WRONG ISSUERKEYHASH in the OCSP RESPONSE ===" +$ write sys$output "NON-DELEGATED; Intermediate CA -> EE" +$ call test_ocsp WIKH_ND1.ors ND1_Issuer_ICA.pem 1 +$ write sys$output "NON-DELEGATED; Root CA -> Intermediate CA" +$ call test_ocsp WIKH_ND2.ors ND2_Issuer_Root.pem 1 +$ write sys$output "NON-DELEGATED; Root CA -> EE" +$ call test_ocsp WIKH_ND3.ors ND3_Issuer_Root.pem 1 +$ write sys$output "DELEGATED; Intermediate CA -> EE" +$ call test_ocsp WIKH_D1.ors D1_Issuer_ICA.pem 1 +$ write sys$output "DELEGATED; Root CA -> Intermediate CA" +$ call test_ocsp WIKH_D2.ors D2_Issuer_Root.pem 1 +$ write sys$output "DELEGATED; Root CA -> EE" +$ call test_ocsp WIKH_D3.ors D3_Issuer_Root.pem 1 +$ +$ write sys$output "=== WRONG KEY in the DELEGATED OCSP SIGNING CERTIFICATE ===" +$ write sys$output "DELEGATED; Intermediate CA -> EE" +$ call test_ocsp WKDOSC_D1.ors D1_Issuer_ICA.pem 1 +$ write sys$output "DELEGATED; Root CA -> Intermediate CA" +$ call test_ocsp WKDOSC_D2.ors D2_Issuer_Root.pem 1 +$ write sys$output "DELEGATED; Root CA -> EE" +$ call test_ocsp WKDOSC_D3.ors D3_Issuer_Root.pem 1 +$ +$ write sys$output "=== INVALID SIGNATURE on the DELEGATED OCSP SIGNING CERTIFICATE ===" +$ write sys$output "DELEGATED; Intermediate CA -> EE" +$ call test_ocsp ISDOSC_D1.ors D1_Issuer_ICA.pem 1 +$ write sys$output "DELEGATED; Root CA -> Intermediate CA" +$ call test_ocsp ISDOSC_D2.ors D2_Issuer_Root.pem 1 +$ write sys$output "DELEGATED; Root CA -> EE" +$ call test_ocsp ISDOSC_D3.ors D3_Issuer_Root.pem 1 +$ +$ write sys$output "=== WRONG SUBJECT NAME in the ISSUER CERTIFICATE ===" +$ write sys$output "NON-DELEGATED; Intermediate CA -> EE" +$ call test_ocsp ND1.ors WSNIC_ND1_Issuer_ICA.pem 1 +$ write sys$output "NON-DELEGATED; Root CA -> Intermediate CA" +$ call test_ocsp ND2.ors WSNIC_ND2_Issuer_Root.pem 1 +$ write sys$output "NON-DELEGATED; Root CA -> EE" +$ call test_ocsp ND3.ors WSNIC_ND3_Issuer_Root.pem 1 +$ write sys$output "DELEGATED; Intermediate CA -> EE" +$ call test_ocsp D1.ors WSNIC_D1_Issuer_ICA.pem 1 +$ write sys$output "DELEGATED; Root CA -> Intermediate CA" +$ call test_ocsp D2.ors WSNIC_D2_Issuer_Root.pem 1 +$ write sys$output "DELEGATED; Root CA -> EE" +$ call test_ocsp D3.ors WSNIC_D3_Issuer_Root.pem 1 +$ +$ write sys$output "=== WRONG KEY in the ISSUER CERTIFICATE ===" +$ write sys$output "NON-DELEGATED; Intermediate CA -> EE" +$ call test_ocsp ND1.ors WKIC_ND1_Issuer_ICA.pem 1 +$ write sys$output "NON-DELEGATED; Root CA -> Intermediate CA" +$ call test_ocsp ND2.ors WKIC_ND2_Issuer_Root.pem 1 +$ write sys$output "NON-DELEGATED; Root CA -> EE" +$ call test_ocsp ND3.ors WKIC_ND3_Issuer_Root.pem 1 +$ write sys$output "DELEGATED; Intermediate CA -> EE" +$ call test_ocsp D1.ors WKIC_D1_Issuer_ICA.pem 1 +$ write sys$output "DELEGATED; Root CA -> Intermediate CA" +$ call test_ocsp D2.ors WKIC_D2_Issuer_Root.pem 1 +$ write sys$output "DELEGATED; Root CA -> EE" +$ call test_ocsp D3.ors WKIC_D3_Issuer_Root.pem 1 +$ +$ write sys$output "=== INVALID SIGNATURE on the ISSUER CERTIFICATE ===" +$ !# Expect success, because we're explicitly trusting the issuer certificate. +$ write sys$output "NON-DELEGATED; Intermediate CA -> EE" +$ call test_ocsp ND1.ors ISIC_ND1_Issuer_ICA.pem 0 +$ write sys$output "NON-DELEGATED; Root CA -> Intermediate CA" +$ call test_ocsp ND2.ors ISIC_ND2_Issuer_Root.pem 0 +$ write sys$output "NON-DELEGATED; Root CA -> EE" +$ call test_ocsp ND3.ors ISIC_ND3_Issuer_Root.pem 0 +$ write sys$output "DELEGATED; Intermediate CA -> EE" +$ call test_ocsp D1.ors ISIC_D1_Issuer_ICA.pem 0 +$ write sys$output "DELEGATED; Root CA -> Intermediate CA" +$ call test_ocsp D2.ors ISIC_D2_Issuer_Root.pem 0 +$ write sys$output "DELEGATED; Root CA -> EE" +$ call test_ocsp D3.ors ISIC_D3_Issuer_Root.pem 0 +$ +$ write sys$output "ALL OCSP TESTS SUCCESSFUL" +$ exit 1 diff --git a/deps/openssl/openssl/test/tpkcs7 b/deps/openssl/openssl/test/tpkcs7 index 3e435ffbf9fd83..d7029a03268b3f 100644 --- a/deps/openssl/openssl/test/tpkcs7 +++ b/deps/openssl/openssl/test/tpkcs7 @@ -9,40 +9,40 @@ else fi echo testing pkcs7 conversions -cp $t fff.p +cp $t p7-fff.p echo "p -> d" -$cmd -in fff.p -inform p -outform d >f.d +$cmd -in p7-fff.p -inform p -outform d >p7-f.d if [ $? != 0 ]; then exit 1; fi echo "p -> p" -$cmd -in fff.p -inform p -outform p >f.p +$cmd -in p7-fff.p -inform p -outform p >p7-f.p if [ $? != 0 ]; then exit 1; fi echo "d -> d" -$cmd -in f.d -inform d -outform d >ff.d1 +$cmd -in p7-f.d -inform d -outform d >p7-ff.d1 if [ $? != 0 ]; then exit 1; fi echo "p -> d" -$cmd -in f.p -inform p -outform d >ff.d3 +$cmd -in p7-f.p -inform p -outform d >p7-ff.d3 if [ $? != 0 ]; then exit 1; fi echo "d -> p" -$cmd -in f.d -inform d -outform p >ff.p1 +$cmd -in p7-f.d -inform d -outform p >p7-ff.p1 if [ $? != 0 ]; then exit 1; fi echo "p -> p" -$cmd -in f.p -inform p -outform p >ff.p3 +$cmd -in p7-f.p -inform p -outform p >p7-ff.p3 if [ $? != 0 ]; then exit 1; fi -cmp fff.p f.p +cmp p7-fff.p p7-f.p if [ $? != 0 ]; then exit 1; fi -cmp fff.p ff.p1 +cmp p7-fff.p p7-ff.p1 if [ $? != 0 ]; then exit 1; fi -cmp fff.p ff.p3 +cmp p7-fff.p p7-ff.p3 if [ $? != 0 ]; then exit 1; fi -cmp f.p ff.p1 +cmp p7-f.p p7-ff.p1 if [ $? != 0 ]; then exit 1; fi -cmp f.p ff.p3 +cmp p7-f.p p7-ff.p3 if [ $? != 0 ]; then exit 1; fi -/bin/rm -f f.* ff.* fff.* +/bin/rm -f p7-f.* p7-ff.* p7-fff.* exit 0 diff --git a/deps/openssl/openssl/test/tpkcs7d b/deps/openssl/openssl/test/tpkcs7d index 64fc28e88f0b3c..d4bfbdf1cd6d64 100644 --- a/deps/openssl/openssl/test/tpkcs7d +++ b/deps/openssl/openssl/test/tpkcs7d @@ -9,33 +9,33 @@ else fi echo "testing pkcs7 conversions (2)" -cp $t fff.p +cp $t p7d-fff.p echo "p -> d" -$cmd -in fff.p -inform p -outform d >f.d +$cmd -in p7d-fff.p -inform p -outform d >p7d-f.d if [ $? != 0 ]; then exit 1; fi echo "p -> p" -$cmd -in fff.p -inform p -outform p >f.p +$cmd -in p7d-fff.p -inform p -outform p >p7d-f.p if [ $? != 0 ]; then exit 1; fi echo "d -> d" -$cmd -in f.d -inform d -outform d >ff.d1 +$cmd -in p7d-f.d -inform d -outform d >p7d-ff.d1 if [ $? != 0 ]; then exit 1; fi echo "p -> d" -$cmd -in f.p -inform p -outform d >ff.d3 +$cmd -in p7d-f.p -inform p -outform d >p7d-ff.d3 if [ $? != 0 ]; then exit 1; fi echo "d -> p" -$cmd -in f.d -inform d -outform p >ff.p1 +$cmd -in p7d-f.d -inform d -outform p >p7d-ff.p1 if [ $? != 0 ]; then exit 1; fi echo "p -> p" -$cmd -in f.p -inform p -outform p >ff.p3 +$cmd -in p7d-f.p -inform p -outform p >p7d-ff.p3 if [ $? != 0 ]; then exit 1; fi -cmp f.p ff.p1 +cmp p7d-f.p p7d-ff.p1 if [ $? != 0 ]; then exit 1; fi -cmp f.p ff.p3 +cmp p7d-f.p p7d-ff.p3 if [ $? != 0 ]; then exit 1; fi -/bin/rm -f f.* ff.* fff.* +/bin/rm -f p7d-f.* p7d-ff.* p7d-fff.* exit 0 diff --git a/deps/openssl/openssl/test/treq b/deps/openssl/openssl/test/treq index 77f37dcf3a9a54..420d25e16899f9 100644 --- a/deps/openssl/openssl/test/treq +++ b/deps/openssl/openssl/test/treq @@ -14,70 +14,70 @@ if $cmd -in $t -inform p -noout -text 2>&1 | fgrep -i 'Unknown Public Key'; then fi echo testing req conversions -cp $t fff.p +cp $t req-fff.p echo "p -> d" -$cmd -in fff.p -inform p -outform d >f.d +$cmd -in req-fff.p -inform p -outform d >req-f.d if [ $? != 0 ]; then exit 1; fi #echo "p -> t" -#$cmd -in fff.p -inform p -outform t >f.t +#$cmd -in req-fff.p -inform p -outform t >req-f.t #if [ $? != 0 ]; then exit 1; fi echo "p -> p" -$cmd -in fff.p -inform p -outform p >f.p +$cmd -in req-fff.p -inform p -outform p >req-f.p if [ $? != 0 ]; then exit 1; fi echo "d -> d" -$cmd -verify -in f.d -inform d -outform d >ff.d1 +$cmd -verify -in req-f.d -inform d -outform d >req-ff.d1 if [ $? != 0 ]; then exit 1; fi #echo "t -> d" -#$cmd -in f.t -inform t -outform d >ff.d2 +#$cmd -in req-f.t -inform t -outform d >req-ff.d2 #if [ $? != 0 ]; then exit 1; fi echo "p -> d" -$cmd -verify -in f.p -inform p -outform d >ff.d3 +$cmd -verify -in req-f.p -inform p -outform d >req-ff.d3 if [ $? != 0 ]; then exit 1; fi #echo "d -> t" -#$cmd -in f.d -inform d -outform t >ff.t1 +#$cmd -in req-f.d -inform d -outform t >req-ff.t1 #if [ $? != 0 ]; then exit 1; fi #echo "t -> t" -#$cmd -in f.t -inform t -outform t >ff.t2 +#$cmd -in req-f.t -inform t -outform t >req-ff.t2 #if [ $? != 0 ]; then exit 1; fi #echo "p -> t" -#$cmd -in f.p -inform p -outform t >ff.t3 +#$cmd -in req-f.p -inform p -outform t >req-ff.t3 #if [ $? != 0 ]; then exit 1; fi echo "d -> p" -$cmd -in f.d -inform d -outform p >ff.p1 +$cmd -in req-f.d -inform d -outform p >req-ff.p1 if [ $? != 0 ]; then exit 1; fi #echo "t -> p" -#$cmd -in f.t -inform t -outform p >ff.p2 +#$cmd -in req-f.t -inform t -outform p >req-ff.p2 #if [ $? != 0 ]; then exit 1; fi echo "p -> p" -$cmd -in f.p -inform p -outform p >ff.p3 +$cmd -in req-f.p -inform p -outform p >req-ff.p3 if [ $? != 0 ]; then exit 1; fi -cmp fff.p f.p +cmp req-fff.p req-f.p if [ $? != 0 ]; then exit 1; fi -cmp fff.p ff.p1 +cmp req-fff.p req-ff.p1 if [ $? != 0 ]; then exit 1; fi -#cmp fff.p ff.p2 +#cmp req-fff.p req-ff.p2 #if [ $? != 0 ]; then exit 1; fi -cmp fff.p ff.p3 +cmp req-fff.p req-ff.p3 if [ $? != 0 ]; then exit 1; fi -#cmp f.t ff.t1 +#cmp req-f.t req-ff.t1 #if [ $? != 0 ]; then exit 1; fi -#cmp f.t ff.t2 +#cmp req-f.t req-ff.t2 #if [ $? != 0 ]; then exit 1; fi -#cmp f.t ff.t3 +#cmp req-f.t req-ff.t3 #if [ $? != 0 ]; then exit 1; fi -cmp f.p ff.p1 +cmp req-f.p req-ff.p1 if [ $? != 0 ]; then exit 1; fi -#cmp f.p ff.p2 +#cmp req-f.p req-ff.p2 #if [ $? != 0 ]; then exit 1; fi -cmp f.p ff.p3 +cmp req-f.p req-ff.p3 if [ $? != 0 ]; then exit 1; fi -/bin/rm -f f.* ff.* fff.* +/bin/rm -f req-f.* req-ff.* req-fff.* exit 0 diff --git a/deps/openssl/openssl/test/trsa b/deps/openssl/openssl/test/trsa index 249ac1ddcc6b4d..5a2290f8cc15db 100644 --- a/deps/openssl/openssl/test/trsa +++ b/deps/openssl/openssl/test/trsa @@ -14,70 +14,70 @@ else fi echo testing rsa conversions -cp $t fff.p +cp $t rsa-fff.p echo "p -> d" -$cmd -in fff.p -inform p -outform d >f.d +$cmd -in rsa-fff.p -inform p -outform d >rsa-f.d if [ $? != 0 ]; then exit 1; fi #echo "p -> t" -#$cmd -in fff.p -inform p -outform t >f.t +#$cmd -in rsa-fff.p -inform p -outform t >rsa-f.t #if [ $? != 0 ]; then exit 1; fi echo "p -> p" -$cmd -in fff.p -inform p -outform p >f.p +$cmd -in rsa-fff.p -inform p -outform p >rsa-f.p if [ $? != 0 ]; then exit 1; fi echo "d -> d" -$cmd -in f.d -inform d -outform d >ff.d1 +$cmd -in rsa-f.d -inform d -outform d >rsa-ff.d1 if [ $? != 0 ]; then exit 1; fi #echo "t -> d" -#$cmd -in f.t -inform t -outform d >ff.d2 +#$cmd -in rsa-f.t -inform t -outform d >rsa-ff.d2 #if [ $? != 0 ]; then exit 1; fi echo "p -> d" -$cmd -in f.p -inform p -outform d >ff.d3 +$cmd -in rsa-f.p -inform p -outform d >rsa-ff.d3 if [ $? != 0 ]; then exit 1; fi #echo "d -> t" -#$cmd -in f.d -inform d -outform t >ff.t1 +#$cmd -in rsa-f.d -inform d -outform t >rsa-ff.t1 #if [ $? != 0 ]; then exit 1; fi #echo "t -> t" -#$cmd -in f.t -inform t -outform t >ff.t2 +#$cmd -in rsa-f.t -inform t -outform t >rsa-ff.t2 #if [ $? != 0 ]; then exit 1; fi #echo "p -> t" -#$cmd -in f.p -inform p -outform t >ff.t3 +#$cmd -in rsa-f.p -inform p -outform t >rsa-ff.t3 #if [ $? != 0 ]; then exit 1; fi echo "d -> p" -$cmd -in f.d -inform d -outform p >ff.p1 +$cmd -in rsa-f.d -inform d -outform p >rsa-ff.p1 if [ $? != 0 ]; then exit 1; fi #echo "t -> p" -#$cmd -in f.t -inform t -outform p >ff.p2 +#$cmd -in rsa-f.t -inform t -outform p >rsa-ff.p2 #if [ $? != 0 ]; then exit 1; fi echo "p -> p" -$cmd -in f.p -inform p -outform p >ff.p3 +$cmd -in rsa-f.p -inform p -outform p >rsa-ff.p3 if [ $? != 0 ]; then exit 1; fi -cmp fff.p f.p +cmp rsa-fff.p rsa-f.p if [ $? != 0 ]; then exit 1; fi -cmp fff.p ff.p1 +cmp rsa-fff.p rsa-ff.p1 if [ $? != 0 ]; then exit 1; fi -#cmp fff.p ff.p2 +#cmp rsa-fff.p rsa-ff.p2 #if [ $? != 0 ]; then exit 1; fi -cmp fff.p ff.p3 +cmp rsa-fff.p rsa-ff.p3 if [ $? != 0 ]; then exit 1; fi -#cmp f.t ff.t1 +#cmp rsa-f.t rsa-ff.t1 #if [ $? != 0 ]; then exit 1; fi -#cmp f.t ff.t2 +#cmp rsa-f.t rsa-ff.t2 #if [ $? != 0 ]; then exit 1; fi -#cmp f.t ff.t3 +#cmp rsa-f.t rsa-ff.t3 #if [ $? != 0 ]; then exit 1; fi -cmp f.p ff.p1 +cmp rsa-f.p rsa-ff.p1 if [ $? != 0 ]; then exit 1; fi -#cmp f.p ff.p2 +#cmp rsa-f.p rsa-ff.p2 #if [ $? != 0 ]; then exit 1; fi -cmp f.p ff.p3 +cmp rsa-f.p rsa-ff.p3 if [ $? != 0 ]; then exit 1; fi -/bin/rm -f f.* ff.* fff.* +/bin/rm -f rsa-f.* rsa-ff.* rsa-fff.* exit 0 diff --git a/deps/openssl/openssl/test/tsid b/deps/openssl/openssl/test/tsid index 6adbd531ce0abf..e1eb503f085d6e 100644 --- a/deps/openssl/openssl/test/tsid +++ b/deps/openssl/openssl/test/tsid @@ -9,70 +9,70 @@ else fi echo testing session-id conversions -cp $t fff.p +cp $t sid-fff.p echo "p -> d" -$cmd -in fff.p -inform p -outform d >f.d +$cmd -in sid-fff.p -inform p -outform d >sid-f.d if [ $? != 0 ]; then exit 1; fi #echo "p -> t" -#$cmd -in fff.p -inform p -outform t >f.t +#$cmd -in sid-fff.p -inform p -outform t >sid-f.t #if [ $? != 0 ]; then exit 1; fi echo "p -> p" -$cmd -in fff.p -inform p -outform p >f.p +$cmd -in sid-fff.p -inform p -outform p >sid-f.p if [ $? != 0 ]; then exit 1; fi echo "d -> d" -$cmd -in f.d -inform d -outform d >ff.d1 +$cmd -in sid-f.d -inform d -outform d >sid-ff.d1 if [ $? != 0 ]; then exit 1; fi #echo "t -> d" -#$cmd -in f.t -inform t -outform d >ff.d2 +#$cmd -in sid-f.t -inform t -outform d >sid-ff.d2 #if [ $? != 0 ]; then exit 1; fi echo "p -> d" -$cmd -in f.p -inform p -outform d >ff.d3 +$cmd -in sid-f.p -inform p -outform d >sid-ff.d3 if [ $? != 0 ]; then exit 1; fi #echo "d -> t" -#$cmd -in f.d -inform d -outform t >ff.t1 +#$cmd -in sid-f.d -inform d -outform t >sid-ff.t1 #if [ $? != 0 ]; then exit 1; fi #echo "t -> t" -#$cmd -in f.t -inform t -outform t >ff.t2 +#$cmd -in sid-f.t -inform t -outform t >sid-ff.t2 #if [ $? != 0 ]; then exit 1; fi #echo "p -> t" -#$cmd -in f.p -inform p -outform t >ff.t3 +#$cmd -in sid-f.p -inform p -outform t >sid-ff.t3 #if [ $? != 0 ]; then exit 1; fi echo "d -> p" -$cmd -in f.d -inform d -outform p >ff.p1 +$cmd -in sid-f.d -inform d -outform p >sid-ff.p1 if [ $? != 0 ]; then exit 1; fi #echo "t -> p" -#$cmd -in f.t -inform t -outform p >ff.p2 +#$cmd -in sid-f.t -inform t -outform p >sid-ff.p2 #if [ $? != 0 ]; then exit 1; fi echo "p -> p" -$cmd -in f.p -inform p -outform p >ff.p3 +$cmd -in sid-f.p -inform p -outform p >sid-ff.p3 if [ $? != 0 ]; then exit 1; fi -cmp fff.p f.p +cmp sid-fff.p sid-f.p if [ $? != 0 ]; then exit 1; fi -cmp fff.p ff.p1 +cmp sid-fff.p sid-ff.p1 if [ $? != 0 ]; then exit 1; fi -#cmp fff.p ff.p2 +#cmp sid-fff.p sid-ff.p2 #if [ $? != 0 ]; then exit 1; fi -cmp fff.p ff.p3 +cmp sid-fff.p sid-ff.p3 if [ $? != 0 ]; then exit 1; fi -#cmp f.t ff.t1 +#cmp sid-f.t sid-ff.t1 #if [ $? != 0 ]; then exit 1; fi -#cmp f.t ff.t2 +#cmp sid-f.t sid-ff.t2 #if [ $? != 0 ]; then exit 1; fi -#cmp f.t ff.t3 +#cmp sid-f.t sid-ff.t3 #if [ $? != 0 ]; then exit 1; fi -cmp f.p ff.p1 +cmp sid-f.p sid-ff.p1 if [ $? != 0 ]; then exit 1; fi -#cmp f.p ff.p2 +#cmp sid-f.p sid-ff.p2 #if [ $? != 0 ]; then exit 1; fi -cmp f.p ff.p3 +cmp sid-f.p sid-ff.p3 if [ $? != 0 ]; then exit 1; fi -/bin/rm -f f.* ff.* fff.* +/bin/rm -f sid-f.* sid-ff.* sid-fff.* exit 0 diff --git a/deps/openssl/openssl/test/tx509 b/deps/openssl/openssl/test/tx509 index 4a15b98d17d8ac..0ce3b5223c5bcf 100644 --- a/deps/openssl/openssl/test/tx509 +++ b/deps/openssl/openssl/test/tx509 @@ -9,70 +9,70 @@ else fi echo testing X509 conversions -cp $t fff.p +cp $t x509-fff.p echo "p -> d" -$cmd -in fff.p -inform p -outform d >f.d +$cmd -in x509-fff.p -inform p -outform d >x509-f.d if [ $? != 0 ]; then exit 1; fi echo "p -> n" -$cmd -in fff.p -inform p -outform n >f.n +$cmd -in x509-fff.p -inform p -outform n >x509-f.n if [ $? != 0 ]; then exit 1; fi echo "p -> p" -$cmd -in fff.p -inform p -outform p >f.p +$cmd -in x509-fff.p -inform p -outform p >x509-f.p if [ $? != 0 ]; then exit 1; fi echo "d -> d" -$cmd -in f.d -inform d -outform d >ff.d1 +$cmd -in x509-f.d -inform d -outform d >x509-ff.d1 if [ $? != 0 ]; then exit 1; fi echo "n -> d" -$cmd -in f.n -inform n -outform d >ff.d2 +$cmd -in x509-f.n -inform n -outform d >x509-ff.d2 if [ $? != 0 ]; then exit 1; fi echo "p -> d" -$cmd -in f.p -inform p -outform d >ff.d3 +$cmd -in x509-f.p -inform p -outform d >x509-ff.d3 if [ $? != 0 ]; then exit 1; fi echo "d -> n" -$cmd -in f.d -inform d -outform n >ff.n1 +$cmd -in x509-f.d -inform d -outform n >x509-ff.n1 if [ $? != 0 ]; then exit 1; fi echo "n -> n" -$cmd -in f.n -inform n -outform n >ff.n2 +$cmd -in x509-f.n -inform n -outform n >x509-ff.n2 if [ $? != 0 ]; then exit 1; fi echo "p -> n" -$cmd -in f.p -inform p -outform n >ff.n3 +$cmd -in x509-f.p -inform p -outform n >x509-ff.n3 if [ $? != 0 ]; then exit 1; fi echo "d -> p" -$cmd -in f.d -inform d -outform p >ff.p1 +$cmd -in x509-f.d -inform d -outform p >x509-ff.p1 if [ $? != 0 ]; then exit 1; fi echo "n -> p" -$cmd -in f.n -inform n -outform p >ff.p2 +$cmd -in x509-f.n -inform n -outform p >x509-ff.p2 if [ $? != 0 ]; then exit 1; fi echo "p -> p" -$cmd -in f.p -inform p -outform p >ff.p3 +$cmd -in x509-f.p -inform p -outform p >x509-ff.p3 if [ $? != 0 ]; then exit 1; fi -cmp fff.p f.p +cmp x509-fff.p x509-f.p if [ $? != 0 ]; then exit 1; fi -cmp fff.p ff.p1 +cmp x509-fff.p x509-ff.p1 if [ $? != 0 ]; then exit 1; fi -cmp fff.p ff.p2 +cmp x509-fff.p x509-ff.p2 if [ $? != 0 ]; then exit 1; fi -cmp fff.p ff.p3 +cmp x509-fff.p x509-ff.p3 if [ $? != 0 ]; then exit 1; fi -cmp f.n ff.n1 +cmp x509-f.n x509-ff.n1 if [ $? != 0 ]; then exit 1; fi -cmp f.n ff.n2 +cmp x509-f.n x509-ff.n2 if [ $? != 0 ]; then exit 1; fi -cmp f.n ff.n3 +cmp x509-f.n x509-ff.n3 if [ $? != 0 ]; then exit 1; fi -cmp f.p ff.p1 +cmp x509-f.p x509-ff.p1 if [ $? != 0 ]; then exit 1; fi -cmp f.p ff.p2 +cmp x509-f.p x509-ff.p2 if [ $? != 0 ]; then exit 1; fi -cmp f.p ff.p3 +cmp x509-f.p x509-ff.p3 if [ $? != 0 ]; then exit 1; fi -/bin/rm -f f.* ff.* fff.* +/bin/rm -f x509-f.* x509-ff.* x509-fff.* exit 0 diff --git a/deps/openssl/openssl/test/v3nametest.c b/deps/openssl/openssl/test/v3nametest.c new file mode 120000 index 00000000000000..1d209eb96323d0 --- /dev/null +++ b/deps/openssl/openssl/test/v3nametest.c @@ -0,0 +1 @@ +../crypto/x509v3/v3nametest.c \ No newline at end of file diff --git a/deps/openssl/openssl/tools/c_rehash b/deps/openssl/openssl/tools/c_rehash index 6a20011a4c010c..4a0f0e10eb466a 100644 --- a/deps/openssl/openssl/tools/c_rehash +++ b/deps/openssl/openssl/tools/c_rehash @@ -1,31 +1,58 @@ #!/usr/bin/perl - # Perl c_rehash script, scan all files in a directory # and add symbolic links to their hash values. -my $openssl; - my $dir = "/usr/local/ssl"; my $prefix = "/usr/local/ssl"; -if(defined $ENV{OPENSSL}) { - $openssl = $ENV{OPENSSL}; -} else { - $openssl = "openssl"; - $ENV{OPENSSL} = $openssl; +my $openssl = $ENV{OPENSSL} || "openssl"; +my $pwd; +my $x509hash = "-subject_hash"; +my $crlhash = "-hash"; +my $verbose = 0; +my $symlink_exists=eval {symlink("",""); 1}; +my $removelinks = 1; + +## Parse flags. +while ( $ARGV[0] =~ '-.*' ) { + my $flag = shift @ARGV; + last if ( $flag eq '--'); + if ( $flag =~ /-old/) { + $x509hash = "-subject_hash_old"; + $crlhash = "-hash_old"; + } elsif ( $flag =~ /-h/) { + help(); + } elsif ( $flag eq '-n' ) { + $removelinks = 0; + } elsif ( $flag eq '-v' ) { + $verbose++; + } + else { + print STDERR "Usage error; try -help.\n"; + exit 1; + } +} + +sub help { + print "Usage: c_rehash [-old] [-h] [-v] [dirs...]\n"; + print " -old use old-style digest\n"; + print " -h print this help text\n"; + print " -v print files removed and linked\n"; + exit 0; } -my $pwd; eval "require Cwd"; if (defined(&Cwd::getcwd)) { $pwd=Cwd::getcwd(); } else { - $pwd=`pwd`; chomp($pwd); + $pwd=`pwd`; + chomp($pwd); } -my $path_delim = ($pwd =~ /^[a-z]\:/i) ? ';' : ':'; # DOS/Win32 or Unix delimiter? -$ENV{PATH} = "$prefix/bin" . ($ENV{PATH} ? $path_delim . $ENV{PATH} : ""); # prefix our path +# DOS/Win32 or Unix delimiter? Prefix our installdir, then search. +my $path_delim = ($pwd =~ /^[a-z]\:/i) ? ';' : ':'; +$ENV{PATH} = "$prefix/bin" . ($ENV{PATH} ? $path_delim . $ENV{PATH} : ""); if(! -x $openssl) { my $found = 0; @@ -68,14 +95,17 @@ sub hash_dir { chdir $_[0]; opendir(DIR, "."); my @flist = readdir(DIR); - # Delete any existing symbolic links - foreach (grep {/^[\da-f]+\.r{0,1}\d+$/} @flist) { - if(-l $_) { - unlink $_; + closedir DIR; + if ( $removelinks ) { + # Delete any existing symbolic links + foreach (grep {/^[\da-f]+\.r{0,1}\d+$/} @flist) { + if(-l $_) { + unlink $_; + print "unlink $_" if $verbose; + } } } - closedir DIR; - FILE: foreach $fname (grep {/\.pem$/} @flist) { + FILE: foreach $fname (grep {/\.(pem)|(crt)|(cer)|(crl)$/} @flist) { # Check to see if certificates and/or CRLs present. my ($cert, $crl) = check_file($fname); if(!$cert && !$crl) { @@ -117,7 +147,7 @@ sub check_file { sub link_hash_cert { my $fname = $_[0]; $fname =~ s/'/'\\''/g; - my ($hash, $fprint) = `"$openssl" x509 -hash -fingerprint -noout -in "$fname"`; + my ($hash, $fprint) = `"$openssl" x509 $x509hash -fingerprint -noout -in "$fname"`; chomp $hash; chomp $fprint; $fprint =~ s/^.*=//; @@ -133,16 +163,16 @@ sub link_hash_cert { $suffix++; } $hash .= ".$suffix"; - print "$fname => $hash\n"; - $symlink_exists=eval {symlink("",""); 1}; if ($symlink_exists) { symlink $fname, $hash; + print "link $fname -> $hash\n" if $verbose; } else { open IN,"<$fname" or die "can't open $fname for read"; open OUT,">$hash" or die "can't open $hash for write"; print OUT ; # does the job for small text files close OUT; close IN; + print "copy $fname -> $hash\n" if $verbose; } $hashlist{$hash} = $fprint; } @@ -152,7 +182,7 @@ sub link_hash_cert { sub link_hash_crl { my $fname = $_[0]; $fname =~ s/'/'\\''/g; - my ($hash, $fprint) = `"$openssl" crl -hash -fingerprint -noout -in '$fname'`; + my ($hash, $fprint) = `"$openssl" crl $crlhash -fingerprint -noout -in '$fname'`; chomp $hash; chomp $fprint; $fprint =~ s/^.*=//; @@ -168,12 +198,12 @@ sub link_hash_crl { $suffix++; } $hash .= ".r$suffix"; - print "$fname => $hash\n"; - $symlink_exists=eval {symlink("",""); 1}; if ($symlink_exists) { symlink $fname, $hash; + print "link $fname -> $hash\n" if $verbose; } else { system ("cp", $fname, $hash); + print "cp $fname -> $hash\n" if $verbose; } $hashlist{$hash} = $fprint; } diff --git a/deps/openssl/openssl/tools/c_rehash.in b/deps/openssl/openssl/tools/c_rehash.in index bfc4a69ed4bcd8..887e9271254e49 100644 --- a/deps/openssl/openssl/tools/c_rehash.in +++ b/deps/openssl/openssl/tools/c_rehash.in @@ -1,31 +1,58 @@ #!/usr/local/bin/perl - # Perl c_rehash script, scan all files in a directory # and add symbolic links to their hash values. -my $openssl; - my $dir; my $prefix; -if(defined $ENV{OPENSSL}) { - $openssl = $ENV{OPENSSL}; -} else { - $openssl = "openssl"; - $ENV{OPENSSL} = $openssl; +my $openssl = $ENV{OPENSSL} || "openssl"; +my $pwd; +my $x509hash = "-subject_hash"; +my $crlhash = "-hash"; +my $verbose = 0; +my $symlink_exists=eval {symlink("",""); 1}; +my $removelinks = 1; + +## Parse flags. +while ( $ARGV[0] =~ '-.*' ) { + my $flag = shift @ARGV; + last if ( $flag eq '--'); + if ( $flag =~ /-old/) { + $x509hash = "-subject_hash_old"; + $crlhash = "-hash_old"; + } elsif ( $flag =~ /-h/) { + help(); + } elsif ( $flag eq '-n' ) { + $removelinks = 0; + } elsif ( $flag eq '-v' ) { + $verbose++; + } + else { + print STDERR "Usage error; try -help.\n"; + exit 1; + } +} + +sub help { + print "Usage: c_rehash [-old] [-h] [-v] [dirs...]\n"; + print " -old use old-style digest\n"; + print " -h print this help text\n"; + print " -v print files removed and linked\n"; + exit 0; } -my $pwd; eval "require Cwd"; if (defined(&Cwd::getcwd)) { $pwd=Cwd::getcwd(); } else { - $pwd=`pwd`; chomp($pwd); + $pwd=`pwd`; + chomp($pwd); } -my $path_delim = ($pwd =~ /^[a-z]\:/i) ? ';' : ':'; # DOS/Win32 or Unix delimiter? -$ENV{PATH} = "$prefix/bin" . ($ENV{PATH} ? $path_delim . $ENV{PATH} : ""); # prefix our path +# DOS/Win32 or Unix delimiter? Prefix our installdir, then search. +my $path_delim = ($pwd =~ /^[a-z]\:/i) ? ';' : ':'; +$ENV{PATH} = "$prefix/bin" . ($ENV{PATH} ? $path_delim . $ENV{PATH} : ""); if(! -x $openssl) { my $found = 0; @@ -68,14 +95,17 @@ sub hash_dir { chdir $_[0]; opendir(DIR, "."); my @flist = readdir(DIR); - # Delete any existing symbolic links - foreach (grep {/^[\da-f]+\.r{0,1}\d+$/} @flist) { - if(-l $_) { - unlink $_; + closedir DIR; + if ( $removelinks ) { + # Delete any existing symbolic links + foreach (grep {/^[\da-f]+\.r{0,1}\d+$/} @flist) { + if(-l $_) { + unlink $_; + print "unlink $_" if $verbose; + } } } - closedir DIR; - FILE: foreach $fname (grep {/\.pem$/} @flist) { + FILE: foreach $fname (grep {/\.(pem)|(crt)|(cer)|(crl)$/} @flist) { # Check to see if certificates and/or CRLs present. my ($cert, $crl) = check_file($fname); if(!$cert && !$crl) { @@ -117,7 +147,7 @@ sub check_file { sub link_hash_cert { my $fname = $_[0]; $fname =~ s/'/'\\''/g; - my ($hash, $fprint) = `"$openssl" x509 -hash -fingerprint -noout -in "$fname"`; + my ($hash, $fprint) = `"$openssl" x509 $x509hash -fingerprint -noout -in "$fname"`; chomp $hash; chomp $fprint; $fprint =~ s/^.*=//; @@ -133,16 +163,16 @@ sub link_hash_cert { $suffix++; } $hash .= ".$suffix"; - print "$fname => $hash\n"; - $symlink_exists=eval {symlink("",""); 1}; if ($symlink_exists) { symlink $fname, $hash; + print "link $fname -> $hash\n" if $verbose; } else { open IN,"<$fname" or die "can't open $fname for read"; open OUT,">$hash" or die "can't open $hash for write"; print OUT ; # does the job for small text files close OUT; close IN; + print "copy $fname -> $hash\n" if $verbose; } $hashlist{$hash} = $fprint; } @@ -152,7 +182,7 @@ sub link_hash_cert { sub link_hash_crl { my $fname = $_[0]; $fname =~ s/'/'\\''/g; - my ($hash, $fprint) = `"$openssl" crl -hash -fingerprint -noout -in '$fname'`; + my ($hash, $fprint) = `"$openssl" crl $crlhash -fingerprint -noout -in '$fname'`; chomp $hash; chomp $fprint; $fprint =~ s/^.*=//; @@ -168,12 +198,12 @@ sub link_hash_crl { $suffix++; } $hash .= ".r$suffix"; - print "$fname => $hash\n"; - $symlink_exists=eval {symlink("",""); 1}; if ($symlink_exists) { symlink $fname, $hash; + print "link $fname -> $hash\n" if $verbose; } else { system ("cp", $fname, $hash); + print "cp $fname -> $hash\n" if $verbose; } $hashlist{$hash} = $fprint; } diff --git a/deps/openssl/openssl/util/copy-if-different.pl b/deps/openssl/openssl/util/copy-if-different.pl new file mode 100644 index 00000000000000..ec99e084b56a90 --- /dev/null +++ b/deps/openssl/openssl/util/copy-if-different.pl @@ -0,0 +1,78 @@ +#!/usr/local/bin/perl + +use strict; + +use Fcntl; + +# copy-if-different.pl + +# Copy to the destination if the source is not the same as it. + +my @filelist; + +foreach my $arg (@ARGV) { + $arg =~ s|\\|/|g; # compensate for bug/feature in cygwin glob... + foreach (glob $arg) + { + push @filelist, $_; + } +} + +my $fnum = @filelist; + +if ($fnum <= 1) + { + die "Need at least two filenames"; + } + +my $dest = pop @filelist; + +if ($fnum > 2 && ! -d $dest) + { + die "Destination must be a directory"; + } + +foreach (@filelist) + { + my $dfile; + if (-d $dest) + { + $dfile = $_; + $dfile =~ s|^.*[/\\]([^/\\]*)$|$1|; + $dfile = "$dest/$dfile"; + } + else + { + $dfile = $dest; + } + + my $buf; + if (-f $dfile) + { + sysopen(IN, $_, O_RDONLY|O_BINARY) || die "Can't Open $_"; + sysopen(OUT, $dfile, O_RDONLY|O_BINARY) + || die "Can't Open $dfile"; + while (sysread IN, $buf, 10240) + { + my $b2; + goto copy if !sysread(OUT, $b2, 10240) || $buf ne $b2; + } + goto copy if sysread(OUT, $buf, 1); + close(IN); + close(OUT); + print "NOT copying: $_ to $dfile\n"; + next; + } + copy: + sysopen(IN, $_, O_RDONLY|O_BINARY) || die "Can't Open $_"; + sysopen(OUT, $dfile, O_WRONLY|O_CREAT|O_TRUNC|O_BINARY) + || die "Can't Open $dfile"; + while (sysread IN, $buf, 10240) + { + syswrite(OUT, $buf, length($buf)); + } + close(IN); + close(OUT); + print "Copying: $_ to $dfile\n"; + } + diff --git a/deps/openssl/openssl/util/files.pl b/deps/openssl/openssl/util/files.pl index 41f033e3b9aa40..b15407f0c90562 100755 --- a/deps/openssl/openssl/util/files.pl +++ b/deps/openssl/openssl/util/files.pl @@ -4,6 +4,12 @@ # It is basically a list of all variables from the passed makefile # +while ($ARGV[0] =~ /^(\S+)\s*=(.*)$/) + { + $sym{$1} = $2; + shift; + } + $s=""; while (<>) { @@ -33,7 +39,7 @@ $o =~ s/\s+/ /g; $o =~ s/\$[({]([^)}]+)[)}]/$sym{$1}/g; - $sym{$s}=$o; + $sym{$s}=$o if !exists $sym{$s}; } } diff --git a/deps/openssl/openssl/util/libeay.num b/deps/openssl/openssl/util/libeay.num index b594caf2cf7925..b977e4e4b22127 100755 --- a/deps/openssl/openssl/util/libeay.num +++ b/deps/openssl/openssl/util/libeay.num @@ -4282,7 +4282,7 @@ CRYPTO_ccm128_decrypt 4648 EXIST::FUNCTION: CRYPTO_ccm128_aad 4649 EXIST::FUNCTION: CRYPTO_gcm128_init 4650 EXIST::FUNCTION: CRYPTO_gcm128_decrypt 4651 EXIST::FUNCTION: -ENGINE_load_rsax 4652 EXIST::FUNCTION:ENGINE +ENGINE_load_rsax 4652 NOEXIST::FUNCTION: CRYPTO_gcm128_decrypt_ctr32 4653 EXIST::FUNCTION: CRYPTO_gcm128_encrypt_ctr32 4654 EXIST::FUNCTION: CRYPTO_gcm128_finish 4655 EXIST::FUNCTION: @@ -4314,3 +4314,102 @@ BIO_dgram_sctp_wait_for_dry 4679 EXIST::FUNCTION:SCTP BIO_s_datagram_sctp 4680 EXIST::FUNCTION:DGRAM,SCTP BIO_dgram_is_sctp 4681 EXIST::FUNCTION:SCTP BIO_dgram_sctp_notification_cb 4682 EXIST::FUNCTION:SCTP +i2d_DHxparams 4683 EXIST::FUNCTION:DH +EC_curve_nist2nid 4684 EXIST::FUNCTION:EC +DH_get_1024_160 4685 EXIST::FUNCTION:DH +PEM_write_DHxparams 4686 EXIST:!WIN16:FUNCTION:DH +d2i_DHxparams 4687 EXIST::FUNCTION:DH +EC_curve_nid2nist 4688 EXIST::FUNCTION:EC +DH_get_2048_256 4689 EXIST::FUNCTION:DH +PEM_write_bio_DHxparams 4690 EXIST::FUNCTION:DH +DH_get_2048_224 4691 EXIST::FUNCTION:DH +X509_chain_check_suiteb 4692 EXIST::FUNCTION: +X509_chain_up_ref 4693 EXIST::FUNCTION: +X509_VERIFY_PARAM_set1_ip_asc 4694 EXIST::FUNCTION: +X509_CRL_check_suiteb 4695 EXIST::FUNCTION: +X509_VERIFY_PARAM_set1_email 4696 EXIST::FUNCTION: +X509_check_email 4697 EXIST::FUNCTION: +X509_check_host 4698 EXIST::FUNCTION: +X509_check_ip_asc 4699 EXIST::FUNCTION: +X509_get0_signature 4700 EXIST::FUNCTION: +X509_get_signature_nid 4701 EXIST::FUNCTION: +X509_VERIFY_PARAM_set1_host 4702 EXIST::FUNCTION: +X509_VERIFY_PARAM_set1_ip 4703 EXIST::FUNCTION: +X509_check_ip 4704 EXIST::FUNCTION: +X509_STORE_set_lookup_crls_cb 4705 EXIST::FUNCTION: +X509_CRL_diff 4706 EXIST::FUNCTION: +X509_CRL_http_nbio 4707 EXIST::FUNCTION:EVP +OCSP_REQ_CTX_i2d 4708 EXIST::FUNCTION: +OCSP_REQ_CTX_get0_mem_bio 4709 EXIST::FUNCTION: +X509_STORE_CTX_get0_store 4710 EXIST::FUNCTION: +X509_REVOKED_dup 4711 EXIST::FUNCTION: +CMS_RecipientInfo_encrypt 4712 EXIST::FUNCTION:CMS +OCSP_REQ_CTX_http 4713 EXIST::FUNCTION: +OCSP_REQ_CTX_nbio 4714 EXIST::FUNCTION: +X509_http_nbio 4715 EXIST::FUNCTION:EVP +OCSP_set_max_response_length 4716 EXIST::FUNCTION: +OCSP_REQ_CTX_new 4717 EXIST::FUNCTION: +OCSP_REQ_CTX_nbio_d2i 4718 EXIST::FUNCTION: +EVP_aes_256_wrap 4719 EXIST::FUNCTION:AES +CRYPTO_128_wrap 4720 EXIST::FUNCTION: +RSA_OAEP_PARAMS_new 4721 EXIST::FUNCTION:RSA +CRYPTO_128_unwrap 4722 EXIST::FUNCTION: +ECDSA_METHOD_set_name 4723 EXIST::FUNCTION:ECDSA +CMS_RecipientInfo_kari_decrypt 4724 EXIST::FUNCTION:CMS +CMS_SignerInfo_get0_pkey_ctx 4725 EXIST::FUNCTION:CMS +ECDSA_METHOD_set_flags 4726 EXIST::FUNCTION:ECDSA +ECDSA_METHOD_set_sign_setup 4727 EXIST::FUNCTION:ECDSA +CMS_RecipientInfo_kari_orig_id_cmp 4728 EXIST:!VMS:FUNCTION:CMS +CMS_RecipInfo_kari_orig_id_cmp 4728 EXIST:VMS:FUNCTION:CMS +CMS_RecipientInfo_kari_get0_alg 4729 EXIST::FUNCTION:CMS +EVP_aes_192_wrap 4730 EXIST::FUNCTION:AES +EVP_aes_128_cbc_hmac_sha256 4731 EXIST::FUNCTION:AES,SHA256 +DH_compute_key_padded 4732 EXIST::FUNCTION:DH +ECDSA_METHOD_set_sign 4733 EXIST::FUNCTION:ECDSA +CMS_RecipientEncryptedKey_cert_cmp 4734 EXIST:!VMS:FUNCTION:CMS +CMS_RecipEncryptedKey_cert_cmp 4734 EXIST:VMS:FUNCTION:CMS +DH_KDF_X9_42 4735 EXIST::FUNCTION:DH +RSA_OAEP_PARAMS_free 4736 EXIST::FUNCTION:RSA +EVP_des_ede3_wrap 4737 EXIST::FUNCTION:DES +RSA_OAEP_PARAMS_it 4738 EXIST:!EXPORT_VAR_AS_FUNCTION:VARIABLE:RSA +RSA_OAEP_PARAMS_it 4738 EXIST:EXPORT_VAR_AS_FUNCTION:FUNCTION:RSA +ASN1_TIME_diff 4739 EXIST::FUNCTION: +EVP_aes_256_cbc_hmac_sha256 4740 EXIST::FUNCTION:AES,SHA256 +CMS_SignerInfo_get0_signature 4741 EXIST::FUNCTION:CMS +CMS_RecipientInfo_kari_get0_reks 4742 EXIST:!VMS:FUNCTION:CMS +CMS_RecipInfo_kari_get0_reks 4742 EXIST:VMS:FUNCTION:CMS +EVP_aes_128_wrap 4743 EXIST::FUNCTION:AES +CMS_SignerInfo_get0_md_ctx 4744 EXIST::FUNCTION:CMS +OPENSSL_gmtime_diff 4745 EXIST::FUNCTION: +CMS_RecipientInfo_kari_set0_pkey 4746 EXIST:!VMS:FUNCTION:CMS +CMS_RecipInfo_kari_set0_pkey 4746 EXIST:VMS:FUNCTION:CMS +i2d_RSA_OAEP_PARAMS 4747 EXIST::FUNCTION:RSA +d2i_RSA_OAEP_PARAMS 4748 EXIST::FUNCTION:RSA +ECDH_KDF_X9_62 4749 EXIST::FUNCTION:ECDH +CMS_RecipientInfo_kari_get0_ctx 4750 EXIST::FUNCTION:CMS +ECDSA_METHOD_new 4751 EXIST::FUNCTION:ECDSA +CMS_RecipientInfo_get0_pkey_ctx 4752 EXIST::FUNCTION:CMS +CMS_RecipientEncryptedKey_get0_id 4753 EXIST:!VMS:FUNCTION:CMS +CMS_RecipEncryptedKey_get0_id 4753 EXIST:VMS:FUNCTION:CMS +RSA_padding_check_PKCS1_OAEP_mgf1 4754 EXIST:!VMS:FUNCTION:RSA +RSA_pad_check_PKCS1_OAEP_mgf1 4754 EXIST:VMS:FUNCTION:RSA +ECDSA_METHOD_set_verify 4755 EXIST::FUNCTION:ECDSA +CMS_SharedInfo_encode 4756 EXIST::FUNCTION:CMS +RSA_padding_add_PKCS1_OAEP_mgf1 4757 EXIST::FUNCTION:RSA +CMS_RecipientInfo_kari_get0_orig_id 4758 EXIST:!VMS:FUNCTION:CMS +CMS_RecipInfo_kari_get0_orig_id 4758 EXIST:VMS:FUNCTION:CMS +ECDSA_METHOD_free 4759 EXIST::FUNCTION:ECDSA +X509_VERIFY_PARAM_get_count 4760 EXIST::FUNCTION: +X509_VERIFY_PARAM_get0_name 4761 EXIST::FUNCTION: +X509_VERIFY_PARAM_get0 4762 EXIST::FUNCTION: +X509V3_EXT_free 4763 EXIST::FUNCTION: +BIO_hex_string 4764 EXIST::FUNCTION: +X509_VERIFY_PARAM_set_hostflags 4765 EXIST::FUNCTION: +BUF_strnlen 4766 EXIST::FUNCTION: +X509_VERIFY_PARAM_get0_peername 4767 EXIST::FUNCTION: +ECDSA_METHOD_set_app_data 4768 EXIST::FUNCTION:ECDSA +sk_deep_copy 4769 EXIST::FUNCTION: +ECDSA_METHOD_get_app_data 4770 EXIST::FUNCTION:ECDSA +X509_VERIFY_PARAM_add1_host 4771 EXIST::FUNCTION: +EC_GROUP_get_mont_data 4772 EXIST::FUNCTION:EC +i2d_re_X509_tbs 4773 EXIST::FUNCTION: diff --git a/deps/openssl/openssl/util/mk1mf.pl b/deps/openssl/openssl/util/mk1mf.pl index 550ef9f6853476..7d4491faeb6603 100755 --- a/deps/openssl/openssl/util/mk1mf.pl +++ b/deps/openssl/openssl/util/mk1mf.pl @@ -2,8 +2,12 @@ # A bit of an evil hack but it post processes the file ../MINFO which # is generated by `make files` in the top directory. # This script outputs one mega makefile that has no shell stuff or any -# funny stuff -# +# funny stuff (if the target is not "copy"). +# If the target is "copy", then it tries to create a makefile that can be +# safely used with the -j flag and that is compatible with the top-level +# Makefile, in the sense that it uses the same options and assembler files etc. + +use Cwd; $INSTALLTOP="/usr/local/ssl"; $OPENSSLDIR="/usr/local/ssl"; @@ -28,6 +32,7 @@ INSTALLTOP => \$INSTALLTOP, OPENSSLDIR => \$OPENSSLDIR, PLATFORM => \$mf_platform, + CC => \$mf_cc, CFLAG => \$mf_cflag, DEPFLAG => \$mf_depflag, CPUID_OBJ => \$mf_cpuid_asm, @@ -43,16 +48,18 @@ RMD160_ASM_OBJ => \$mf_rmd_asm, WP_ASM_OBJ => \$mf_wp_asm, CMLL_ENC => \$mf_cm_asm, + MODES_ASM_OBJ => \$mf_modes_asm, + ENGINES_ASM_OBJ=> \$mf_engines_asm, BASEADDR => \$baseaddr, FIPSDIR => \$fipsdir, + EC_ASM => \$mf_ec_asm, ); - open(IN,") { my ($mf_opt, $mf_ref); while (($mf_opt, $mf_ref) = each %mf_import) { - if (/^$mf_opt\s*=\s*(.*)$/) { + if (/^$mf_opt\s*=\s*(.*)$/ && !defined($$mfref)) { $$mf_ref = $1; } } @@ -83,7 +90,8 @@ "netware-libc", "CodeWarrior for NetWare - LibC - with WinSock Sockets", "netware-libc-bsdsock", "CodeWarrior for NetWare - LibC - with BSD Sockets", "default","cc under unix", - "auto", "auto detect from top level Makefile" + "auto", "auto detect from top level Makefile", + "copy", "copy from top level Makefile" ); $platform=""; @@ -162,7 +170,7 @@ $ranlib="echo ranlib"; $cc=(defined($VARS{'CC'}))?$VARS{'CC'}:'cc'; -$src_dir=(defined($VARS{'SRC'}))?$VARS{'SRC'}:'.'; +$src_dir=(defined($VARS{'SRC'}))?$VARS{'SRC'}: $platform eq 'copy' ? getcwd() : '.'; $bin_dir=(defined($VARS{'BIN'}))?$VARS{'BIN'}:''; # $bin_dir.=$o causes a core dump on my sparc :-( @@ -172,7 +180,8 @@ push(@INC,"util/pl","pl"); -if ($platform eq "auto") { +if ($platform eq "auto" || $platform eq 'copy') { + $orig_platform = $platform; $platform = $mf_platform; print STDERR "Imported platform $mf_platform\n"; } @@ -300,6 +309,11 @@ ##else { $cflags="$c_flags$cflags" if ($c_flags ne ""); } +if ($orig_platform eq 'copy') { + $cflags = $mf_cflag; + $cc = $mf_cc; +} + $ex_libs="$l_flags$ex_libs" if ($l_flags ne ""); @@ -391,6 +405,14 @@ } close(IN); +if ($orig_platform eq 'copy') + { + # Remove opensslconf.h so it doesn't get updated if we configure a + # different branch. + $exheader =~ s/[^ ]+\/opensslconf.h//; + $header =~ s/[^ ]+\/opensslconf.h//; + } + if ($shlib) { $extra_install= <<"EOF"; @@ -422,6 +444,7 @@ } $defs= <<"EOF"; +# N.B. You MUST use -j on FreeBSD. # This makefile has been automatically generated from the OpenSSL distribution. # This single makefile will build the complete OpenSSL distribution and # by default leave the 'intertesting' output files in .${o}out and the stuff @@ -463,7 +486,7 @@ LFLAGS=$lflags RSC=$rsc -# The output directory for everything intersting +# The output directory for everything interesting OUT_D=$out_dir # The output directory for all the temporary muck TMP_D=$tmp_dir @@ -482,13 +505,14 @@ # FIPS validated module and support file locations +E_PREMAIN_DSO=fips_premain_dso + FIPSDIR=$fipsdir BASEADDR=$baseaddr FIPSLIB_D=\$(FIPSDIR)${o}lib FIPS_PREMAIN_SRC=\$(FIPSLIB_D)${o}fips_premain.c O_FIPSCANISTER=\$(FIPSLIB_D)${o}fipscanister.lib FIPS_SHA1_EXE=\$(FIPSDIR)${o}bin${o}fips_standalone_sha1${exep} -E_PREMAIN_DSO=fips_premain_dso PREMAIN_DSO_EXE=\$(BIN_D)${o}fips_premain_dso$exep FIPSLINK=\$(PERL) \$(FIPSDIR)${o}bin${o}fipslink.pl @@ -563,8 +587,12 @@ \$(INC_D): \$(MKDIR) \"\$(INC_D)\" +# This needs to be invoked once, when the makefile is first constructed, or +# after cleaning. +init: \$(TMP_D) \$(LIB_D) \$(INC_D) \$(INCO_D) \$(BIN_D) \$(TEST_D) headers + \$(PERL) \$(SRC_D)/util/copy-if-different.pl "\$(SRC_D)/crypto/opensslconf.h" "\$(INCO_D)/opensslconf.h" + headers: \$(HEADER) \$(EXHEADER) - @ lib: \$(LIBS_DEP) \$(E_SHLIB) @@ -582,11 +610,6 @@ \$(CP) apps${o}openssl.cnf \"\$(OPENSSLDIR)\" $extra_install - -test: \$(T_EXE) - cd \$(BIN_D) - ..${o}ms${o}test - clean: \$(RM) \$(TMP_D)$o*.* @@ -594,8 +617,25 @@ \$(RM) \$(TMP_D)$o*.* \$(RM) \$(OUT_D)$o*.* +reallyclean: + \$(RM) -rf \$(TMP_D) + \$(RM) -rf \$(BIN_D) + \$(RM) -rf \$(TEST_D) + \$(RM) -rf \$(LIB_D) + \$(RM) -rf \$(INC_D) + +EOF + +if ($orig_platform ne 'copy') + { + $rules .= <<"EOF"; +test: \$(T_EXE) + cd \$(BIN_D) + ..${o}ms${o}test + EOF - + } + my $platform_cpp_symbol = "MK1MF_PLATFORM_$platform"; $platform_cpp_symbol =~ s/-/_/g; if (open(IN,"crypto/buildinf.h")) @@ -632,7 +672,7 @@ printf OUT "#endif\n"; close(OUT); -# Strip of trailing ' ' +# Strip off trailing ' ' foreach (keys %lib_obj) { $lib_obj{$_}=&clean_up_ws($lib_obj{$_}); } $test=&clean_up_ws($test); $e_exe=&clean_up_ws($e_exe); @@ -662,10 +702,38 @@ { $rules.=&cc_compile_target("\$(OBJ_D)${o}\$(E_PREMAIN_DSO)$obj", "\$(FIPS_PREMAIN_SRC)", - "-DFINGERPRINT_PREMAIN_DSO_LOAD \$(SHLIB_CFLAGS)", ""); + "-DFINGERPRINT_PREMAIN_DSO_LOAD \$(APP_CFLAGS)", ""); $rules.=&do_link_rule("\$(PREMAIN_DSO_EXE)","\$(OBJ_D)${o}\$(E_PREMAIN_DSO)$obj \$(CRYPTOOBJ) \$(O_FIPSCANISTER)","","\$(EX_LIBS)", 1); } +sub fix_asm + { + my($asm, $dir) = @_; + + return '' if $asm eq ''; + + $asm = " $asm"; + $asm =~ s/\s+/ $dir\//g; + $asm =~ s/\.o//g; + $asm =~ s/^ //; + + return $asm . ' '; + } + +if ($orig_platform eq 'copy') { + $lib_obj{CRYPTO} .= fix_asm($mf_md5_asm, 'crypto/md5'); + $lib_obj{CRYPTO} .= fix_asm($mf_bn_asm, 'crypto/bn'); + # cpuid is included by the crypto dir + #$lib_obj{CRYPTO} .= fix_asm($mf_cpuid_asm, 'crypto'); + # AES asm files DON'T end up included by the aes dir itself + $lib_obj{CRYPTO} .= fix_asm($mf_aes_asm, 'crypto/aes'); + $lib_obj{CRYPTO} .= fix_asm($mf_sha_asm, 'crypto/sha'); + $lib_obj{CRYPTO} .= fix_asm($mf_engines_asm, 'engines'); + $lib_obj{CRYPTO} .= fix_asm($mf_rc4_asm, 'crypto/rc4'); + $lib_obj{CRYPTO} .= fix_asm($mf_modes_asm, 'crypto/modes'); + $lib_obj{CRYPTO} .= fix_asm($mf_ec_asm, 'crypto/ec'); +} + foreach (values %lib_nam) { $lib_obj=$lib_obj{$_}; @@ -741,6 +809,8 @@ $rules.=&do_link_rule("\$(BIN_D)$o\$(E_EXE)$exep","\$(E_OBJ)","\$(LIBS_DEP)","\$(L_LIBS) \$(EX_LIBS)", ($fips && !$shlib) ? 2 : 0); +$rules .= get_tests('test/Makefile') if $orig_platform eq 'copy'; + print $defs; if ($platform eq "linux-elf") { @@ -958,6 +1028,11 @@ sub do_compile_rule { $ret.=&Sasm_compile_target("$to${o}$n$obj",$s,$n); } + elsif (defined &special_compile_target and + ($s=special_compile_target($_))) + { + $ret.=$s; + } else { die "no rule for $_"; } } return($ret); @@ -968,6 +1043,10 @@ sub do_compile_rule sub perlasm_compile_target { my($target,$source,$bname)=@_; + + return platform_perlasm_compile_target($target, $source, $bname) + if defined &platform_perlasm_compile_target; + my($ret); $bname =~ s/(.*)\.[^\.]$/$1/; @@ -999,9 +1078,13 @@ sub cc_compile_target $ex_flags.=" -DMK1MF_BUILD -D$platform_cpp_symbol" if ($source =~ /cversion/); $target =~ s/\//$o/g if $o ne "/"; $source =~ s/\//$o/g if $o ne "/"; - $srcd = "\$(SRC_D)$o" unless defined $srcd; + $srcd = "\$(SRC_D)$o" unless defined $srcd && $platform ne 'copy'; $ret ="$target: $srcd$source\n\t"; - $ret.="\$(CC) ${ofile}$target $ex_flags -c $srcd$source\n\n"; + $ret.="\$(CC)"; + $ret.= " -MMD" if $orig_platform eq "copy"; + $ret.= " ${ofile}$target $ex_flags -c $srcd$source\n\n"; + $target =~ s/\.o$/.d/; + $ret.=".sinclude \"$target\"\n\n" if $orig_platform eq "copy"; return($ret); } @@ -1066,7 +1149,7 @@ sub do_copy_rule if ($n =~ /bss_file/) { $pp=".c"; } else { $pp=$p; } - $ret.="$to${o}$n$pp: \$(SRC_D)$o$_$pp\n\t\$(CP) \"\$(SRC_D)$o$_$pp\" \"$to${o}$n$pp\"\n\n"; + $ret.="$to${o}$n$pp: \$(SRC_D)$o$_$pp\n\t\$(PERL) \$(SRC_D)${o}util${o}copy-if-different.pl \"\$(SRC_D)$o$_$pp\" \"$to${o}$n$pp\"\n\n"; } return($ret); } @@ -1119,8 +1202,8 @@ sub read_options "no-tlsext" => \$no_tlsext, "no-srp" => \$no_srp, "no-cms" => \$no_cms, - "no-ec2m" => \$no_ec2m, "no-jpake" => \$no_jpake, + "no-ec2m" => \$no_ec2m, "no-ec_nistp_64_gcc_128" => 0, "no-err" => \$no_err, "no-sock" => \$no_sock, @@ -1151,9 +1234,12 @@ sub read_options "no-montasm" => 0, "no-shared" => 0, "no-store" => 0, - "no-unit-test" => 0, "no-zlib" => 0, "no-zlib-dynamic" => 0, + "no-ssl-trace" => 0, + "no-unit-test" => 0, + "no-libunbound" => 0, + "no-multiblock" => 0, "fips" => \$fips ); diff --git a/deps/openssl/openssl/util/mkdef.pl b/deps/openssl/openssl/util/mkdef.pl index 894f0529fc475e..c57c7f748eda47 100755 --- a/deps/openssl/openssl/util/mkdef.pl +++ b/deps/openssl/openssl/util/mkdef.pl @@ -121,8 +121,10 @@ "SCTP", # SRTP "SRTP", + # SSL TRACE + "SSL_TRACE", # Unit testing - "UNIT_TEST"); + "UNIT_TEST"); my $options=""; open(IN,"&1` =~ /Version ([0-9]+)\./ && $1>=14) { + $base_cflags.=$shlib?' /MD':' /MT'; + } else { + $base_cflags.=' /MC'; + } + $opt_cflags=' /O1i'; # optimize for space, but with intrinsics... + $dbg_cflags=' /Od -DDEBUG -D_DEBUG'; $lflags="/nologo /opt:ref $wcelflag"; } else # Win32 { $base_cflags= " $mf_cflag"; my $f = $shlib || $fips ?' /MD':' /MT'; - $lib_cflag='/Zl' if (!$shlib); # remove /DEFAULTLIBs from static lib $ff = "/fixed"; $opt_cflags=$f.' /Ox /O2 /Ob2'; $dbg_cflags=$f.'d /Od -DDEBUG -D_DEBUG'; $lflags="/nologo /subsystem:console /opt:ref"; } +$lib_cflag='/Zl' if (!$shlib); # remove /DEFAULTLIBs from static lib $mlflags=''; $out_def ="out32"; $out_def.="dll" if ($shlib); @@ -165,14 +169,26 @@ $efile="/out:"; $exep='.exe'; if ($no_sock) { $ex_libs=''; } -elsif ($FLAVOR =~ /CE/) { $ex_libs='winsock.lib'; } +elsif ($FLAVOR =~ /CE/) { $ex_libs='ws2.lib'; } else { $ex_libs='ws2_32.lib'; } if ($FLAVOR =~ /CE/) { - $ex_libs.=' $(WCECOMPAT)/lib/wcecompatex.lib' if (defined($ENV{'WCECOMPAT'})); + $ex_libs.=' crypt32.lib'; # for e_capi.c + if (defined($ENV{WCECOMPAT})) + { + $ex_libs.= ' $(WCECOMPAT)/lib'; + if (-f "$ENV{WCECOMPAT}/lib/$ENV{TARGETCPU}/wcecompatex.lib") + { + $ex_libs.='/$(TARGETCPU)/wcecompatex.lib'; + } + else + { + $ex_libs.='/wcecompatex.lib'; + } + } $ex_libs.=' $(PORTSDK_LIBPATH)/portlib.lib' if (defined($ENV{'PORTSDK_LIBPATH'})); - $ex_libs.=' /nodefaultlib:oldnames.lib coredll.lib corelibc.lib' if ($ENV{'TARGETCPU'} eq "X86"); + $ex_libs.=' /nodefaultlib coredll.lib corelibc.lib' if ($ENV{'TARGETCPU'} eq "X86"); } else { diff --git a/deps/openssl/openssl/util/pl/unix.pl b/deps/openssl/openssl/util/pl/unix.pl index 146611ad99586e..1d4e9dc5df1908 100644 --- a/deps/openssl/openssl/util/pl/unix.pl +++ b/deps/openssl/openssl/util/pl/unix.pl @@ -26,11 +26,12 @@ { $cflags="-O"; } } $obj='.o'; +$asm_suffix='.s'; $ofile='-o '; # EXE linking stuff $link='${CC}'; -$lflags='${CFLAGS}'; +$lflags='${CFLAG}'; $efile='-o '; $exep=''; $ex_libs=""; @@ -53,6 +54,93 @@ $bf_enc_obj=""; $bf_enc_src=""; +%perl1 = ( + 'md5-x86_64' => 'crypto/md5', + 'x86_64-mont' => 'crypto/bn', + 'x86_64-mont5' => 'crypto/bn', + 'x86_64-gf2m' => 'crypto/bn', + 'aes-x86_64' => 'crypto/aes', + 'vpaes-x86_64' => 'crypto/aes', + 'bsaes-x86_64' => 'crypto/aes', + 'aesni-x86_64' => 'crypto/aes', + 'aesni-sha1-x86_64' => 'crypto/aes', + 'sha1-x86_64' => 'crypto/sha', + 'e_padlock-x86_64' => 'engines', + 'rc4-x86_64' => 'crypto/rc4', + 'rc4-md5-x86_64' => 'crypto/rc4', + 'ghash-x86_64' => 'crypto/modes', + 'aesni-gcm-x86_64' => 'crypto/modes', + 'aesni-sha256-x86_64' => 'crypto/aes', + 'rsaz-x86_64' => 'crypto/bn', + 'rsaz-avx2' => 'crypto/bn', + 'aesni-mb-x86_64' => 'crypto/aes', + 'sha1-mb-x86_64' => 'crypto/sha', + 'sha256-mb-x86_64' => 'crypto/sha', + 'ecp_nistz256-x86_64' => 'crypto/ec', + ); + +# If I were feeling more clever, these could probably be extracted +# from makefiles. +sub platform_perlasm_compile_target + { + local($target, $source, $bname) = @_; + + for $p (keys %perl1) + { + if ($target eq "\$(OBJ_D)/$p.o") + { + return << "EOF"; +\$(TMP_D)/$p.s: $perl1{$p}/asm/$p.pl + \$(PERL) $perl1{$p}/asm/$p.pl \$(PERLASM_SCHEME) > \$@ +EOF + } + } + if ($target eq '$(OBJ_D)/x86_64cpuid.o') + { + return << 'EOF'; +$(TMP_D)/x86_64cpuid.s: crypto/x86_64cpuid.pl + $(PERL) crypto/x86_64cpuid.pl $(PERLASM_SCHEME) > $@ +EOF + } + elsif ($target eq '$(OBJ_D)/sha256-x86_64.o') + { + return << 'EOF'; +$(TMP_D)/sha256-x86_64.s: crypto/sha/asm/sha512-x86_64.pl + $(PERL) crypto/sha/asm/sha512-x86_64.pl $(PERLASM_SCHEME) $@ +EOF + } + elsif ($target eq '$(OBJ_D)/sha512-x86_64.o') + { + return << 'EOF'; +$(TMP_D)/sha512-x86_64.s: crypto/sha/asm/sha512-x86_64.pl + $(PERL) crypto/sha/asm/sha512-x86_64.pl $(PERLASM_SCHEME) $@ +EOF + } + elsif ($target eq '$(OBJ_D)/sha512-x86_64.o') + { + return << 'EOF'; +$(TMP_D)/sha512-x86_64.s: crypto/sha/asm/sha512-x86_64.pl + $(PERL) crypto/sha/asm/sha512-x86_64.pl $(PERLASM_SCHEME) $@ +EOF + } + + die $target; + } + +sub special_compile_target + { + local($target) = @_; + + if ($target eq 'crypto/bn/x86_64-gcc') + { + return << "EOF"; +\$(TMP_D)/x86_64-gcc.o: crypto/bn/asm/x86_64-gcc.c + \$(CC) \$(CFLAGS) -c -o \$@ crypto/bn/asm/x86_64-gcc.c +EOF + } + return undef; + } + sub do_lib_rule { local($obj,$target,$name,$shlib)=@_; @@ -72,7 +160,7 @@ sub do_link_rule { local($target,$files,$dep_libs,$libs)=@_; local($ret,$_); - + $file =~ s/\//$o/g if $o ne '/'; $n=&bname($target); $ret.="$target: $files $dep_libs\n"; @@ -93,4 +181,262 @@ sub which } } +sub fixtests + { + my ($str, $tests) = @_; + + foreach my $t (keys %$tests) + { + $str =~ s/(\.\/)?\$\($t\)/\$(TEST_D)\/$tests->{$t}/g; + } + + return $str; + } + +sub fixdeps + { + my ($str, $fakes) = @_; + + my @t = split(/\s+/, $str); + $str = ''; + foreach my $t (@t) + { + $str .= ' ' if $str ne ''; + if (exists($fakes->{$t})) + { + $str .= $fakes->{$t}; + next; + } + if ($t =~ /^[^\/]+$/) + { + $str .= '$(TEST_D)/' . $t; + } + else + { + $str .= $t; + } + } + + return $str; + } + +sub fixrules + { + my ($str) = @_; + + # Compatible with -j... + $str =~ s/^(\s+@?)/$1cd \$(TEST_D) && /; + return $str; + + # Compatible with not -j. + my @t = split("\n", $str); + $str = ''; + my $prev; + foreach my $t (@t) + { + $t =~ s/^\s+//; + if (!$prev) + { + if ($t =~ /^@/) + { + $t =~ s/^@/\@cd \$(TEST_D) && /; + } + elsif ($t !~ /^\s*#/) + { + $t = 'cd $(TEST_D) && ' . $t; + } + } + $str .= "\t$t\n"; + $prev = $t =~/\\$/; + } + return $str; +} + +sub copy_scripts + { + my ($sed, $src, @targets) = @_; + + my $s = ''; + foreach my $t (@targets) + { + # Copy first so we get file modes... + $s .= "\$(TEST_D)/$t: \$(SRC_D)/$src/$t\n\tcp \$(SRC_D)/$src/$t \$(TEST_D)/$t\n"; + $s .= "\tsed -e 's/\\.\\.\\/apps/..\\/\$(OUT_D)/' -e 's/\\.\\.\\/util/..\\/\$(TEST_D)/' < \$(SRC_D)/$src/$t > \$(TEST_D)/$t\n" if $sed; + $s .= "\n"; + } + return $s; + } + +sub get_tests + { + my ($makefile) = @_; + + open(M, $makefile) || die "Can't open $makefile: $!"; + my %targets; + my %deps; + my %tests; + my %alltests; + my %fakes; + while (my $line = ) + { + chomp $line; + while ($line =~ /^(.*)\\$/) + { + $line = $1 . ; + } + + if ($line =~ /^alltests:(.*)$/) + { + my @t = split(/\s+/, $1); + foreach my $t (@t) + { + $targets{$t} = ''; + $alltests{$t} = undef; + } + } + + if (($line =~ /^(?\S+):(?.*)$/ && exists $targets{$1}) + || $line =~ /^(?test_(ss|gen) .*):(?.*)/) + { + my $t = $+{t}; + my $d = $+{d}; + # If there are multiple targets stupid FreeBSD make runs the + # rules once for each dependency that matches one of the + # targets. Running the same rule twice concurrently causes + # breakage, so replace with a fake target. + if ($t =~ /\s/) + { + ++$fake; + my @targets = split /\s+/, $t; + $t = "_fake$fake"; + foreach my $f (@targets) + { + $fakes{$f} = $t; + } + } + $deps{$t} = $d; + $deps{$t} =~ s/#.*$//; + for (;;) + { + $line = ; + chomp $line; + last if $line eq ''; + $targets{$t} .= "$line\n"; + } + next; + } + + if ($line =~ /^(\S+TEST)=\s*(\S+)$/) + { + $tests{$1} = $2; + next; + } + } + + delete $alltests{test_jpake} if $no_jpake; + delete $targets{test_ige} if $no_ige; + delete $alltests{test_md2} if $no_md2; + delete $alltests{test_rc5} if $no_rc5; + + my $tests; + foreach my $t (keys %tests) + { + $tests .= "$t = $tests{$t}\n"; + } + + my $each; + foreach my $t (keys %targets) + { + next if $t eq ''; + + my $d = $deps{$t}; + $d =~ s/\.\.\/apps/\$(BIN_D)/g; + $d =~ s/\.\.\/util/\$(TEST_D)/g; + $d = fixtests($d, \%tests); + $d = fixdeps($d, \%fakes); + + my $r = $targets{$t}; + $r =~ s/\.\.\/apps/..\/\$(BIN_D)/g; + $r =~ s/\.\.\/util/..\/\$(TEST_D)/g; + $r =~ s/\.\.\/(\S+)/\$(SRC_D)\/$1/g; + $r = fixrules($r); + + next if $r eq ''; + + $t =~ s/\s+/ \$(TEST_D)\//g; + + $each .= "$t: test_scripts $d\n\t\@echo '$t test started'\n$r\t\@echo '$t test done'\n\n"; + } + + # FIXME: Might be a clever way to figure out what needs copying + my @copies = ( 'bctest', + 'testgen', + 'cms-test.pl', + 'tx509', + 'test.cnf', + 'testenc', + 'tocsp', + 'testca', + 'CAss.cnf', + 'testtsa', + 'CAtsa.cnf', + 'Uss.cnf', + 'P1ss.cnf', + 'P2ss.cnf', + 'tcrl', + 'tsid', + 'treq', + 'tpkcs7', + 'tpkcs7d', + 'testcrl.pem', + 'testx509.pem', + 'v3-cert1.pem', + 'v3-cert2.pem', + 'testreq2.pem', + 'testp7.pem', + 'pkcs7-1.pem', + 'trsa', + 'testrsa.pem', + 'testsid.pem', + 'testss', + 'testssl', + 'testsslproxy', + 'serverinfo.pem', + ); + my $copies = copy_scripts(1, 'test', @copies); + $copies .= copy_scripts(0, 'test', ('smcont.txt')); + + my @utils = ( 'shlib_wrap.sh', + 'opensslwrap.sh', + ); + $copies .= copy_scripts(1, 'util', @utils); + + my @apps = ( 'CA.sh', + 'openssl.cnf', + 'server2.pem', + ); + $copies .= copy_scripts(1, 'apps', @apps); + + $copies .= copy_scripts(1, 'crypto/evp', ('evptests.txt')); + + $scripts = "test_scripts: \$(TEST_D)/CA.sh \$(TEST_D)/opensslwrap.sh \$(TEST_D)/openssl.cnf \$(TEST_D)/shlib_wrap.sh ocsp smime\n"; + $scripts .= "\nocsp:\n\tcp -R test/ocsp-tests \$(TEST_D)\n"; + $scripts .= "\smime:\n\tcp -R test/smime-certs \$(TEST_D)\n"; + + my $all = 'test:'; + foreach my $t (keys %alltests) + { + if (exists($fakes{$t})) + { + $all .= " $fakes{$t}"; + } + else + { + $all .= " $t"; + } + } + + return "$scripts\n$copies\n$tests\n$all\n\n$each"; + } + 1; diff --git a/deps/openssl/openssl/util/ssleay.num b/deps/openssl/openssl/util/ssleay.num index dd1c5e88239e3a..5a8991350c50fb 100755 --- a/deps/openssl/openssl/util/ssleay.num +++ b/deps/openssl/openssl/util/ssleay.num @@ -316,8 +316,55 @@ SSL_CTX_set_next_protos_adv_cb 355 EXIST:VMS:FUNCTION:NEXTPROTONEG SSL_get0_next_proto_negotiated 356 EXIST::FUNCTION:NEXTPROTONEG SSL_get_selected_srtp_profile 357 EXIST::FUNCTION:SRTP SSL_CTX_set_tlsext_use_srtp 358 EXIST::FUNCTION:SRTP -SSL_select_next_proto 359 EXIST::FUNCTION:NEXTPROTONEG +SSL_select_next_proto 359 EXIST::FUNCTION:TLSEXT SSL_get_srtp_profiles 360 EXIST::FUNCTION:SRTP SSL_CTX_set_next_proto_select_cb 361 EXIST:!VMS:FUNCTION:NEXTPROTONEG SSL_CTX_set_next_proto_sel_cb 361 EXIST:VMS:FUNCTION:NEXTPROTONEG SSL_SESSION_get_compress_id 362 EXIST::FUNCTION: +SSL_get0_param 363 EXIST::FUNCTION: +SSL_CTX_get0_privatekey 364 EXIST::FUNCTION: +SSL_get_shared_sigalgs 365 EXIST::FUNCTION:TLSEXT +SSL_CONF_CTX_finish 366 EXIST::FUNCTION: +DTLS_method 367 EXIST::FUNCTION: +DTLS_client_method 368 EXIST::FUNCTION: +SSL_CIPHER_standard_name 369 EXIST::FUNCTION:SSL_TRACE +SSL_set_alpn_protos 370 EXIST::FUNCTION: +SSL_CTX_set_srv_supp_data 371 NOEXIST::FUNCTION: +SSL_CONF_cmd_argv 372 EXIST::FUNCTION: +DTLSv1_2_server_method 373 EXIST::FUNCTION: +SSL_COMP_set0_compression_methods 374 EXIST:!VMS:FUNCTION:COMP +SSL_COMP_set0_compress_methods 374 EXIST:VMS:FUNCTION:COMP +SSL_CTX_set_cert_cb 375 EXIST::FUNCTION: +SSL_CTX_add_client_custom_ext 376 EXIST::FUNCTION:TLSEXT +SSL_is_server 377 EXIST::FUNCTION: +SSL_CTX_get0_param 378 EXIST::FUNCTION: +SSL_CONF_cmd 379 EXIST::FUNCTION: +SSL_CTX_get_ssl_method 380 EXIST::FUNCTION: +SSL_CONF_CTX_set_ssl_ctx 381 EXIST::FUNCTION: +SSL_CIPHER_find 382 EXIST::FUNCTION: +SSL_CTX_use_serverinfo 383 EXIST::FUNCTION:TLSEXT +DTLSv1_2_client_method 384 EXIST::FUNCTION: +SSL_get0_alpn_selected 385 EXIST::FUNCTION: +SSL_CONF_CTX_clear_flags 386 EXIST::FUNCTION: +SSL_CTX_set_alpn_protos 387 EXIST::FUNCTION: +SSL_CTX_add_server_custom_ext 389 EXIST::FUNCTION:TLSEXT +SSL_CTX_get0_certificate 390 EXIST::FUNCTION: +SSL_CTX_set_alpn_select_cb 391 EXIST::FUNCTION: +SSL_CONF_cmd_value_type 392 EXIST::FUNCTION: +SSL_set_cert_cb 393 EXIST::FUNCTION: +SSL_get_sigalgs 394 EXIST::FUNCTION:TLSEXT +SSL_CONF_CTX_set1_prefix 395 EXIST::FUNCTION: +SSL_CONF_CTX_new 396 EXIST::FUNCTION: +SSL_CONF_CTX_set_flags 397 EXIST::FUNCTION: +SSL_CONF_CTX_set_ssl 398 EXIST::FUNCTION: +SSL_check_chain 399 EXIST::FUNCTION:TLSEXT +SSL_certs_clear 400 EXIST::FUNCTION: +SSL_CONF_CTX_free 401 EXIST::FUNCTION: +SSL_trace 402 EXIST::FUNCTION:SSL_TRACE +SSL_CTX_set_cli_supp_data 403 NOEXIST::FUNCTION: +DTLSv1_2_method 404 EXIST::FUNCTION: +DTLS_server_method 405 EXIST::FUNCTION: +SSL_CTX_use_serverinfo_file 406 EXIST::FUNCTION:STDIO,TLSEXT +SSL_COMP_free_compression_methods 407 EXIST:!VMS:FUNCTION:COMP +SSL_COMP_free_compress_methods 407 EXIST:VMS:FUNCTION:COMP +SSL_extension_supported 409 EXIST::FUNCTION:TLSEXT diff --git a/test/parallel/test-tls-no-sslv3.js b/test/parallel/test-tls-no-sslv3.js index dbe19957306017..0fc0dcc3ae3fea 100644 --- a/test/parallel/test-tls-no-sslv3.js +++ b/test/parallel/test-tls-no-sslv3.js @@ -36,5 +36,5 @@ server.listen(common.PORT, '127.0.0.1', function() { }); server.once('clientError', common.mustCall(function(err, conn) { - assert(/SSL3_GET_CLIENT_HELLO:wrong version number/.test(err.message)); + assert(/:wrong version number/.test(err.message)); }));